From 18bde9a6b2953946b3feb5e3febaf4e7e9da3af9 Mon Sep 17 00:00:00 2001 From: Zhou Peng Date: Mon, 20 Jan 2020 19:52:27 +0800 Subject: [PATCH 0001/2522] lite:micro:tools:make: Move the default target(all) up before any others. The default Makefile target `all` is compiled if there's no command-line arguments. --- tensorflow/lite/micro/tools/make/Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile index b341bcbf0c5..cec22269eb3 100644 --- a/tensorflow/lite/micro/tools/make/Makefile +++ b/tensorflow/lite/micro/tools/make/Makefile @@ -239,6 +239,10 @@ CXX := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}${CXX_TOOL} CC := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}${CC_TOOL} AR := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}${AR_TOOL} +# The default Makefile target(all) must appear before any target, +# which is compiled if there's no command-line arguments. +all: $(MICROLITE_LIB_PATH) + # Load the examples. include $(wildcard tensorflow/lite/micro/examples/*/Makefile.inc) @@ -268,9 +272,6 @@ $(OBJDIR)%.o: %.S $(THIRD_PARTY_TARGETS) @mkdir -p $(dir $@) $(CC) $(CCFLAGS) $(INCLUDES) -c $< -o $@ -# The target that's compiled if there's no command-line arguments. -all: $(MICROLITE_LIB_PATH) - microlite: $(MICROLITE_LIB_PATH) # Hack for generating schema file bypassing flatbuffer parsing From 84b639d8582a0375db1b39edd6a50d39e21240ff Mon Sep 17 00:00:00 2001 From: Zhou Peng Date: Tue, 21 Jan 2020 14:13:59 +0800 Subject: [PATCH 0002/2522] lite:micro:riscv32_mcu:Fix build failure of undefined references. Fix the ld error: undefined references to `__wrap_puts` for build commands like `make -f tensorflow/lite/micro/tools/make/Makefile TARGET=riscv32_mcu hello_world_bin` The related issue is tensorflow#32041 Refactoring, suggested by Nick Kreeger(nick.kreeger@gmail.com): The targets/mcu_riscv_makefile.inc should only include the bare-bones parts for building for this platform. So move platform specific items from targets/mcu_riscv_makefile.inc to the actual example folder. Create a riscv32_mcu folder in each example directory. In those directories, create a new Makefile.inc that adds these rules moved out. The bug's original reasons: The Makefile variables XXX_TEST_SRCS/XXX_SRCS in targets/mcu_riscv_makefile.inc are overridden by the the examples's respective makefile.inc (eg. hello_world/Makefile.inc), which leads to the architecture special __wrap__funs are not included correctly. --- .../hello_world/riscv32_mcu/Makefile.inc | 25 +++++++++++++++++++ .../magic_wand/riscv32_mcu/Makefile.inc | 25 +++++++++++++++++++ .../micro_speech/riscv32_mcu/Makefile.inc | 24 ++++++++++++++++++ .../person_detection/riscv32_mcu/Makefile.inc | 24 ++++++++++++++++++ .../micro/tools/make/helper_functions.inc | 2 +- .../tools/make/targets/mcu_riscv_makefile.inc | 17 +------------ 6 files changed, 100 insertions(+), 17 deletions(-) create mode 100644 tensorflow/lite/micro/examples/hello_world/riscv32_mcu/Makefile.inc create mode 100644 tensorflow/lite/micro/examples/magic_wand/riscv32_mcu/Makefile.inc create mode 100644 tensorflow/lite/micro/examples/micro_speech/riscv32_mcu/Makefile.inc create mode 100644 tensorflow/lite/micro/examples/person_detection/riscv32_mcu/Makefile.inc diff --git a/tensorflow/lite/micro/examples/hello_world/riscv32_mcu/Makefile.inc b/tensorflow/lite/micro/examples/hello_world/riscv32_mcu/Makefile.inc new file mode 100644 index 00000000000..61d683286dd --- /dev/null +++ b/tensorflow/lite/micro/examples/hello_world/riscv32_mcu/Makefile.inc @@ -0,0 +1,25 @@ +ifeq ($(TARGET), riscv32_mcu) + # Wrap functions + MICRO_FE310_LIBWRAP_SRCS := \ + $(wildcard $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/libwrap/sys/*.c) \ + $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/libwrap/misc/write_hex.c \ + $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/libwrap/stdlib/malloc.c + + MICRO_FE310_BSP_ENV_SRCS := \ + $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/start.S \ + $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/entry.S \ + $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/freedom-e300-hifive1/init.c + + HELLO_WORLD_TEST_SRCS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) + HELLO_WORLD_SRCS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) + + LIBWRAP_SYMS := malloc free \ + open lseek read write fstat stat close link unlink \ + execve fork getpid kill wait \ + isatty times sbrk _exit puts + + LDFLAGS += $(foreach s,$(LIBWRAP_SYMS),-Wl,--wrap=$(s)) + LDFLAGS += $(foreach s,$(LIBWRAP_SYMS),-Wl,--wrap=_$(s)) + LDFLAGS += -L. -Wl,--start-group -lc -Wl,--end-group +endif + diff --git a/tensorflow/lite/micro/examples/magic_wand/riscv32_mcu/Makefile.inc b/tensorflow/lite/micro/examples/magic_wand/riscv32_mcu/Makefile.inc new file mode 100644 index 00000000000..e8f8b15a279 --- /dev/null +++ b/tensorflow/lite/micro/examples/magic_wand/riscv32_mcu/Makefile.inc @@ -0,0 +1,25 @@ +ifeq ($(TARGET), riscv32_mcu) + # Wrap functions + MICRO_FE310_LIBWRAP_SRCS := \ + $(wildcard $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/libwrap/sys/*.c) \ + $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/libwrap/misc/write_hex.c \ + $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/libwrap/stdlib/malloc.c + + MICRO_FE310_BSP_ENV_SRCS := \ + $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/start.S \ + $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/entry.S \ + $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/freedom-e300-hifive1/init.c + + magic_wand_TEST_SRCS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) + magic_wand_SRCS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) + + LIBWRAP_SYMS := malloc free \ + open lseek read write fstat stat close link unlink \ + execve fork getpid kill wait \ + isatty times sbrk _exit puts + + LDFLAGS += $(foreach s,$(LIBWRAP_SYMS),-Wl,--wrap=$(s)) + LDFLAGS += $(foreach s,$(LIBWRAP_SYMS),-Wl,--wrap=_$(s)) + LDFLAGS += -L. -Wl,--start-group -lc -Wl,--end-group +endif + diff --git a/tensorflow/lite/micro/examples/micro_speech/riscv32_mcu/Makefile.inc b/tensorflow/lite/micro/examples/micro_speech/riscv32_mcu/Makefile.inc new file mode 100644 index 00000000000..3c9960d1510 --- /dev/null +++ b/tensorflow/lite/micro/examples/micro_speech/riscv32_mcu/Makefile.inc @@ -0,0 +1,24 @@ +ifeq ($(TARGET), riscv32_mcu) + # Wrap functions + MICRO_FE310_LIBWRAP_SRCS := \ + $(wildcard $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/libwrap/sys/*.c) \ + $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/libwrap/misc/write_hex.c \ + $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/libwrap/stdlib/malloc.c + + MICRO_FE310_BSP_ENV_SRCS := \ + $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/start.S \ + $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/entry.S \ + $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/freedom-e300-hifive1/init.c + + MICRO_SPEECH_TEST_SRCS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) + MICRO_SPEECH_SRCS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) + + LIBWRAP_SYMS := malloc free \ + open lseek read write fstat stat close link unlink \ + execve fork getpid kill wait \ + isatty times sbrk _exit puts + + LDFLAGS += $(foreach s,$(LIBWRAP_SYMS),-Wl,--wrap=$(s)) + LDFLAGS += $(foreach s,$(LIBWRAP_SYMS),-Wl,--wrap=_$(s)) + LDFLAGS += -L. -Wl,--start-group -lc -Wl,--end-group +endif diff --git a/tensorflow/lite/micro/examples/person_detection/riscv32_mcu/Makefile.inc b/tensorflow/lite/micro/examples/person_detection/riscv32_mcu/Makefile.inc new file mode 100644 index 00000000000..54c180c3baa --- /dev/null +++ b/tensorflow/lite/micro/examples/person_detection/riscv32_mcu/Makefile.inc @@ -0,0 +1,24 @@ +ifeq ($(TARGET), riscv32_mcu) + # Wrap functions + MICRO_FE310_LIBWRAP_SRCS := \ + $(wildcard $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/libwrap/sys/*.c) \ + $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/libwrap/misc/write_hex.c \ + $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/libwrap/stdlib/malloc.c + + MICRO_FE310_BSP_ENV_SRCS := \ + $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/start.S \ + $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/entry.S \ + $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/freedom-e300-hifive1/init.c + + person_detection_TEST_HDRS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) + person_detection_SRCS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) + + LIBWRAP_SYMS := malloc free \ + open lseek read write fstat stat close link unlink \ + execve fork getpid kill wait \ + isatty times sbrk _exit puts + + LDFLAGS += $(foreach s,$(LIBWRAP_SYMS),-Wl,--wrap=$(s)) + LDFLAGS += $(foreach s,$(LIBWRAP_SYMS),-Wl,--wrap=_$(s)) + LDFLAGS += -L. -Wl,--start-group -lc -Wl,--end-group +endif diff --git a/tensorflow/lite/micro/tools/make/helper_functions.inc b/tensorflow/lite/micro/tools/make/helper_functions.inc index d07a119945f..cf8065c7cab 100644 --- a/tensorflow/lite/micro/tools/make/helper_functions.inc +++ b/tensorflow/lite/micro/tools/make/helper_functions.inc @@ -357,7 +357,7 @@ $(1)_LOCAL_SRCS := $$(call specialize,$$($(1)_LOCAL_SRCS)) ALL_SRCS += $$($(1)_LOCAL_SRCS) $(1)_LOCAL_HDRS := $(3) $(1)_LOCAL_OBJS := $$(addprefix $$(OBJDIR), \ -$$(patsubst %.cc,%.o,$$(patsubst %.c,%.o,$$($(1)_LOCAL_SRCS)))) +$$(patsubst %.S,%.o,$$(patsubst %.cc,%.o,$$(patsubst %.c,%.o,$$($(1)_LOCAL_SRCS))))) $(1)_BINARY := $$(BINDIR)$(1) $$($(1)_BINARY): $$($(1)_LOCAL_OBJS) $$(MICROLITE_LIB_PATH) @mkdir -p $$(dir $$@) diff --git a/tensorflow/lite/micro/tools/make/targets/mcu_riscv_makefile.inc b/tensorflow/lite/micro/tools/make/targets/mcu_riscv_makefile.inc index 1ec91cdca82..5d1a729dbaf 100644 --- a/tensorflow/lite/micro/tools/make/targets/mcu_riscv_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/mcu_riscv_makefile.inc @@ -48,22 +48,7 @@ ifeq ($(TARGET), riscv32_mcu) MICROLITE_CC_SRCS += \ $(wildcard tensorflow/lite/micro/riscv32_mcu/*.cc) - MICRO_SPEECH_TEST_SRCS += \ - $(wildcard $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/libwrap/sys/*.c) \ - $(wildcard $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/libwrap/sys/*.cc) \ - $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/libwrap/misc/write_hex.c \ - $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/libwrap/stdlib/malloc.c \ - $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/start.S \ - $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/entry.S \ - $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/freedom-e300-hifive1/init.c - LIBWRAP_SYMS := malloc free \ - open lseek read write fstat stat close link unlink \ - execve fork getpid kill wait \ - isatty times sbrk _exit puts - LDFLAGS += $(foreach s,$(LIBWRAP_SYMS),-Wl,--wrap=$(s)) - LDFLAGS += $(foreach s,$(LIBWRAP_SYMS),-Wl,--wrap=_$(s)) - LDFLAGS += -L. -Wl,--start-group -lc -Wl,--end-group LDFLAGS += \ -T$(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/freedom-e300-hifive1/flash.lds \ -nostartfiles \ @@ -76,4 +61,4 @@ ifeq ($(TARGET), riscv32_mcu) $(BINDIR)/%.bin: $(BINDIR)/% @mkdir -p $(dir $@) $(OBJCOPY) $< $@ -O binary -endif \ No newline at end of file +endif From 823317cbceeae07b26961533752770d4073d7e98 Mon Sep 17 00:00:00 2001 From: Zhou Peng Date: Tue, 21 Jan 2020 14:58:02 +0800 Subject: [PATCH 0003/2522] lite:micro:riscv32_mcu: Fix hidden symbol `__dso_handle' isn't defined. For arduino sketch in riscv_mcu examples, this patch fix by declare the global variable `void* __dso_handle;`. --- tensorflow/lite/micro/arduino/abi.cc | 16 ++++++++++++++++ .../hello_world/riscv32_mcu/Makefile.inc | 3 ++- .../examples/magic_wand/riscv32_mcu/Makefile.inc | 6 ++++-- .../micro_speech/riscv32_mcu/Makefile.inc | 6 ++++-- .../person_detection/riscv32_mcu/Makefile.inc | 6 ++++-- 5 files changed, 30 insertions(+), 7 deletions(-) create mode 100644 tensorflow/lite/micro/arduino/abi.cc diff --git a/tensorflow/lite/micro/arduino/abi.cc b/tensorflow/lite/micro/arduino/abi.cc new file mode 100644 index 00000000000..6e58671f9a6 --- /dev/null +++ b/tensorflow/lite/micro/arduino/abi.cc @@ -0,0 +1,16 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +void* __dso_handle; diff --git a/tensorflow/lite/micro/examples/hello_world/riscv32_mcu/Makefile.inc b/tensorflow/lite/micro/examples/hello_world/riscv32_mcu/Makefile.inc index 61d683286dd..f24610a5c14 100644 --- a/tensorflow/lite/micro/examples/hello_world/riscv32_mcu/Makefile.inc +++ b/tensorflow/lite/micro/examples/hello_world/riscv32_mcu/Makefile.inc @@ -11,7 +11,8 @@ ifeq ($(TARGET), riscv32_mcu) $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/freedom-e300-hifive1/init.c HELLO_WORLD_TEST_SRCS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) - HELLO_WORLD_SRCS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) + HELLO_WORLD_SRCS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) \ + tensorflow/lite/micro/arduino/abi.cc LIBWRAP_SYMS := malloc free \ open lseek read write fstat stat close link unlink \ diff --git a/tensorflow/lite/micro/examples/magic_wand/riscv32_mcu/Makefile.inc b/tensorflow/lite/micro/examples/magic_wand/riscv32_mcu/Makefile.inc index e8f8b15a279..545ed1ad1f5 100644 --- a/tensorflow/lite/micro/examples/magic_wand/riscv32_mcu/Makefile.inc +++ b/tensorflow/lite/micro/examples/magic_wand/riscv32_mcu/Makefile.inc @@ -10,8 +10,10 @@ ifeq ($(TARGET), riscv32_mcu) $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/entry.S \ $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/freedom-e300-hifive1/init.c - magic_wand_TEST_SRCS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) - magic_wand_SRCS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) + magic_wand_TEST_SRCS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) \ + tensorflow/lite/micro/arduino/abi.cc + magic_wand_SRCS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) \ + tensorflow/lite/micro/arduino/abi.cc LIBWRAP_SYMS := malloc free \ open lseek read write fstat stat close link unlink \ diff --git a/tensorflow/lite/micro/examples/micro_speech/riscv32_mcu/Makefile.inc b/tensorflow/lite/micro/examples/micro_speech/riscv32_mcu/Makefile.inc index 3c9960d1510..480c6fbb219 100644 --- a/tensorflow/lite/micro/examples/micro_speech/riscv32_mcu/Makefile.inc +++ b/tensorflow/lite/micro/examples/micro_speech/riscv32_mcu/Makefile.inc @@ -10,8 +10,10 @@ ifeq ($(TARGET), riscv32_mcu) $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/entry.S \ $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/freedom-e300-hifive1/init.c - MICRO_SPEECH_TEST_SRCS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) - MICRO_SPEECH_SRCS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) + MICRO_SPEECH_TEST_SRCS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) \ + tensorflow/lite/micro/arduino/abi.cc + MICRO_SPEECH_SRCS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) \ + tensorflow/lite/micro/arduino/abi.cc LIBWRAP_SYMS := malloc free \ open lseek read write fstat stat close link unlink \ diff --git a/tensorflow/lite/micro/examples/person_detection/riscv32_mcu/Makefile.inc b/tensorflow/lite/micro/examples/person_detection/riscv32_mcu/Makefile.inc index 54c180c3baa..e138efd0954 100644 --- a/tensorflow/lite/micro/examples/person_detection/riscv32_mcu/Makefile.inc +++ b/tensorflow/lite/micro/examples/person_detection/riscv32_mcu/Makefile.inc @@ -10,8 +10,10 @@ ifeq ($(TARGET), riscv32_mcu) $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/entry.S \ $(MAKEFILE_DIR)/downloads/sifive_fe310_lib/bsp/env/freedom-e300-hifive1/init.c - person_detection_TEST_HDRS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) - person_detection_SRCS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) + person_detection_TEST_HDRS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) \ + tensorflow/lite/micro/arduino/abi.cc + person_detection_SRCS += $(MICRO_FE310_LIBWRAP_SRCS) $(MICRO_FE310_BSP_ENV_SRCS) \ + tensorflow/lite/micro/arduino/abi.cc LIBWRAP_SYMS := malloc free \ open lseek read write fstat stat close link unlink \ From 2f9335a19722165cb3c3b26319e34ddecd9305e0 Mon Sep 17 00:00:00 2001 From: bbbboom Date: Sat, 7 Mar 2020 16:38:08 +0800 Subject: [PATCH 0004/2522] Fix a go binding install error on windows platform If running *go generate github.com/tensorflow/tensorflow/tensorflow/go/op* on windows platform, it well show > \go_path/src/github.com/tensorflow/tensorflow: warning: directory does not exist. Cannot convert path "\go_path/src/github.com/tensorflow/tensorflow/tensorflow/core/framework/*.proto" to or from Windows style ..\genop\main.go:17: running "bash": exit status 1 ..\github.com\tensorflow\tensorflow\tensorflow\go\op\generate.go:17: running "go": exit status 1 So this commit will automatically convert windows style slashes. --- tensorflow/go/genop/generate.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tensorflow/go/genop/generate.sh b/tensorflow/go/genop/generate.sh index 18bee11da5a..9df20c47a5e 100644 --- a/tensorflow/go/genop/generate.sh +++ b/tensorflow/go/genop/generate.sh @@ -24,6 +24,15 @@ then GOPATH=$(go env GOPATH) fi +# Check if it is running in git-bash +ls -la / | grep git-bash.exe +if [ $? -e "0" ] +then + GOPATH=${GOPATH//\\/\/} + GOPATH=/${GOPATH//:/} +fi + + cd $(dirname $0) for g in $(echo "${GOPATH//:/ }"); do TF_DIR="${g}/src/github.com/tensorflow/tensorflow" From d61528e84f5627cc8ecea151ddfe55321f3e303c Mon Sep 17 00:00:00 2001 From: bbbboom Date: Thu, 12 Mar 2020 20:39:46 +0800 Subject: [PATCH 0005/2522] C needs to distinguish between platforms Command needs to distinguish between platforms. Maybe it would be better. --- tensorflow/go/genop/generate.go | 5 + tensorflow/go/genop/generate.sh | 139 +++++++++++++-------------- tensorflow/go/genop/generate.win.go | 5 + tensorflow/go/genop/main.go | 142 ++++++++++++++-------------- 4 files changed, 148 insertions(+), 143 deletions(-) create mode 100644 tensorflow/go/genop/generate.go create mode 100644 tensorflow/go/genop/generate.win.go diff --git a/tensorflow/go/genop/generate.go b/tensorflow/go/genop/generate.go new file mode 100644 index 00000000000..72ec6f552c2 --- /dev/null +++ b/tensorflow/go/genop/generate.go @@ -0,0 +1,5 @@ +// +build !windows + +//go:generate bash generate.sh + +package main diff --git a/tensorflow/go/genop/generate.sh b/tensorflow/go/genop/generate.sh index 9df20c47a5e..6346f12687d 100644 --- a/tensorflow/go/genop/generate.sh +++ b/tensorflow/go/genop/generate.sh @@ -1,71 +1,68 @@ -#!/usr/bin/env bash -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -set -e - -go get github.com/golang/protobuf/proto -go get github.com/golang/protobuf/protoc-gen-go - -if [ -z "${GOPATH}" ] -then - GOPATH=$(go env GOPATH) -fi - -# Check if it is running in git-bash -ls -la / | grep git-bash.exe -if [ $? -e "0" ] -then - GOPATH=${GOPATH//\\/\/} - GOPATH=/${GOPATH//:/} -fi - - -cd $(dirname $0) -for g in $(echo "${GOPATH//:/ }"); do - TF_DIR="${g}/src/github.com/tensorflow/tensorflow" - PROTOC="${TF_DIR}/bazel-out/host/bin/external/protobuf/protoc" - if [ -x "${PROTOC}" ]; then - break - fi -done - -if [ ! -x "${PROTOC}" ] -then - set +e - PATH_PROTOC=$(which protoc) - if [ ! -x "${PATH_PROTOC}" ] - then - echo "Protocol buffer compiler protoc not found in PATH or in ${PROTOC}" - echo "Perhaps build it using:" - echo "bazel build --config opt @com_google_protobuf//:protoc" - exit 1 - fi - PROTOC=$PATH_PROTOC - set -e -fi - -# Ensure that protoc-gen-go is available in $PATH -# Since ${PROTOC} will require it. -export PATH=$PATH:${GOPATH}/bin -mkdir -p ../vendor -for FILE in ${TF_DIR}/tensorflow/core/framework/*.proto \ - ${TF_DIR}/tensorflow/core/protobuf/*.proto \ - ${TF_DIR}/tensorflow/stream_executor/*.proto; do - ${PROTOC} \ - -I ${TF_DIR} \ - --go_out=../vendor \ - $FILE -done +#!/usr/bin/env bash +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +set -e + +go get github.com/golang/protobuf/proto +go get github.com/golang/protobuf/protoc-gen-go + +if [ -z "${GOPATH}" ] +then + GOPATH=$(go env GOPATH) +fi + +# change GOPATH style +if [ $1 == "win" ]; then + GOPATH=${GOPATH//\\/\/} + GOPATH=/${GOPATH//:/} +fi + +cd $(dirname $0) +for g in $(echo "${GOPATH//:/ }"); do + TF_DIR="${g}/src/github.com/tensorflow/tensorflow" + PROTOC="${TF_DIR}/bazel-out/host/bin/external/protobuf/protoc" + if [ -x "${PROTOC}" ]; then + break + fi +done + +if [ ! -x "${PROTOC}" ] +then + set +e + PATH_PROTOC=$(which protoc) + if [ ! -x "${PATH_PROTOC}" ] + then + echo "Protocol buffer compiler protoc not found in PATH or in ${PROTOC}" + echo "Perhaps build it using:" + echo "bazel build --config opt @com_google_protobuf//:protoc" + exit 1 + fi + PROTOC=$PATH_PROTOC + set -e +fi + +# Ensure that protoc-gen-go is available in $PATH +# Since ${PROTOC} will require it. +export PATH=$PATH:${GOPATH}/bin +mkdir -p ../vendor +for FILE in ${TF_DIR}/tensorflow/core/framework/*.proto \ + ${TF_DIR}/tensorflow/core/protobuf/*.proto \ + ${TF_DIR}/tensorflow/stream_executor/*.proto; do + ${PROTOC} \ + -I ${TF_DIR} \ + --go_out=../vendor \ + $FILE +done diff --git a/tensorflow/go/genop/generate.win.go b/tensorflow/go/genop/generate.win.go new file mode 100644 index 00000000000..23b27fffc2a --- /dev/null +++ b/tensorflow/go/genop/generate.win.go @@ -0,0 +1,5 @@ +// +build windows + +//go:generate bash generate.sh win + +package main diff --git a/tensorflow/go/genop/main.go b/tensorflow/go/genop/main.go index 4a53084ed13..83a361176f0 100644 --- a/tensorflow/go/genop/main.go +++ b/tensorflow/go/genop/main.go @@ -1,72 +1,70 @@ -/* -Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -//go:generate bash generate.sh - -// Command genop generates a Go source file with functions for TensorFlow ops. -package main - -import ( - "bytes" - "flag" - "go/format" - "io/ioutil" - "log" - "os" - "path/filepath" - "strings" - - "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal" -) - -func main() { - var ( - filename = flag.String("outfile", "", "File to write generated source code to.") - header = flag.String("header", "", "Path to a file whose contents will be copied into the generated file. Can be empty") - apiDefDirs = flag.String("api_def_dirs", "", "Comma-separated directories containing api_def_*.pbtxt files.") - buf bytes.Buffer - ) - flag.Parse() - if *filename == "" { - log.Fatal("-outfile must be set") - } - if *header != "" { - hdr, err := ioutil.ReadFile(*header) - if err != nil { - log.Fatalf("Unable to read %s: %v", *header, err) - } - buf.Write(hdr) - buf.WriteString("\n\n") - } - os.MkdirAll(filepath.Dir(*filename), 0755) - - apiDefDirsList := []string{} - if len(*apiDefDirs) > 0 { - apiDefDirsList = strings.Split(*apiDefDirs, ",") - } - - if err := internal.GenerateFunctionsForRegisteredOps( - &buf, apiDefDirsList); err != nil { - log.Fatal(err) - } - formatted, err := format.Source(buf.Bytes()) - if err != nil { - log.Fatalf("Failed to generate valid source? 'go fmt' failed: %v", err) - } - if err := ioutil.WriteFile(*filename, formatted, 0644); err != nil { - log.Fatalf("Failed to write to %q: %v", *filename, err) - } -} +/* +Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Command genop generates a Go source file with functions for TensorFlow ops. +package main + +import ( + "bytes" + "flag" + "go/format" + "io/ioutil" + "log" + "os" + "path/filepath" + "strings" + + "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal" +) + +func main() { + var ( + filename = flag.String("outfile", "", "File to write generated source code to.") + header = flag.String("header", "", "Path to a file whose contents will be copied into the generated file. Can be empty") + apiDefDirs = flag.String("api_def_dirs", "", "Comma-separated directories containing api_def_*.pbtxt files.") + buf bytes.Buffer + ) + flag.Parse() + if *filename == "" { + log.Fatal("-outfile must be set") + } + if *header != "" { + hdr, err := ioutil.ReadFile(*header) + if err != nil { + log.Fatalf("Unable to read %s: %v", *header, err) + } + buf.Write(hdr) + buf.WriteString("\n\n") + } + os.MkdirAll(filepath.Dir(*filename), 0755) + + apiDefDirsList := []string{} + if len(*apiDefDirs) > 0 { + apiDefDirsList = strings.Split(*apiDefDirs, ",") + } + + if err := internal.GenerateFunctionsForRegisteredOps( + &buf, apiDefDirsList); err != nil { + log.Fatal(err) + } + formatted, err := format.Source(buf.Bytes()) + if err != nil { + log.Fatalf("Failed to generate valid source? 'go fmt' failed: %v", err) + } + if err := ioutil.WriteFile(*filename, formatted, 0644); err != nil { + log.Fatalf("Failed to write to %q: %v", *filename, err) + } +} From e75080ce47a30f0c331446f674551e098fa09f58 Mon Sep 17 00:00:00 2001 From: bbbboom Date: Thu, 12 Mar 2020 20:45:26 +0800 Subject: [PATCH 0006/2522] change CR-LF to LF --- tensorflow/go/genop/generate.go | 10 +- tensorflow/go/genop/generate.sh | 136 +++++++++++++-------------- tensorflow/go/genop/generate.win.go | 10 +- tensorflow/go/genop/main.go | 140 ++++++++++++++-------------- 4 files changed, 148 insertions(+), 148 deletions(-) diff --git a/tensorflow/go/genop/generate.go b/tensorflow/go/genop/generate.go index 72ec6f552c2..fcf1d65d594 100644 --- a/tensorflow/go/genop/generate.go +++ b/tensorflow/go/genop/generate.go @@ -1,5 +1,5 @@ -// +build !windows - -//go:generate bash generate.sh - -package main +// +build !windows + +//go:generate bash generate.sh + +package main diff --git a/tensorflow/go/genop/generate.sh b/tensorflow/go/genop/generate.sh index 6346f12687d..54541106f13 100644 --- a/tensorflow/go/genop/generate.sh +++ b/tensorflow/go/genop/generate.sh @@ -1,68 +1,68 @@ -#!/usr/bin/env bash -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -set -e - -go get github.com/golang/protobuf/proto -go get github.com/golang/protobuf/protoc-gen-go - -if [ -z "${GOPATH}" ] -then - GOPATH=$(go env GOPATH) -fi - -# change GOPATH style -if [ $1 == "win" ]; then - GOPATH=${GOPATH//\\/\/} - GOPATH=/${GOPATH//:/} -fi - -cd $(dirname $0) -for g in $(echo "${GOPATH//:/ }"); do - TF_DIR="${g}/src/github.com/tensorflow/tensorflow" - PROTOC="${TF_DIR}/bazel-out/host/bin/external/protobuf/protoc" - if [ -x "${PROTOC}" ]; then - break - fi -done - -if [ ! -x "${PROTOC}" ] -then - set +e - PATH_PROTOC=$(which protoc) - if [ ! -x "${PATH_PROTOC}" ] - then - echo "Protocol buffer compiler protoc not found in PATH or in ${PROTOC}" - echo "Perhaps build it using:" - echo "bazel build --config opt @com_google_protobuf//:protoc" - exit 1 - fi - PROTOC=$PATH_PROTOC - set -e -fi - -# Ensure that protoc-gen-go is available in $PATH -# Since ${PROTOC} will require it. -export PATH=$PATH:${GOPATH}/bin -mkdir -p ../vendor -for FILE in ${TF_DIR}/tensorflow/core/framework/*.proto \ - ${TF_DIR}/tensorflow/core/protobuf/*.proto \ - ${TF_DIR}/tensorflow/stream_executor/*.proto; do - ${PROTOC} \ - -I ${TF_DIR} \ - --go_out=../vendor \ - $FILE -done +#!/usr/bin/env bash +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +set -e + +go get github.com/golang/protobuf/proto +go get github.com/golang/protobuf/protoc-gen-go + +if [ -z "${GOPATH}" ] +then + GOPATH=$(go env GOPATH) +fi + +# change GOPATH style +if [ $1 == "win" ]; then + GOPATH=${GOPATH//\\/\/} + GOPATH=/${GOPATH//:/} +fi + +cd $(dirname $0) +for g in $(echo "${GOPATH//:/ }"); do + TF_DIR="${g}/src/github.com/tensorflow/tensorflow" + PROTOC="${TF_DIR}/bazel-out/host/bin/external/protobuf/protoc" + if [ -x "${PROTOC}" ]; then + break + fi +done + +if [ ! -x "${PROTOC}" ] +then + set +e + PATH_PROTOC=$(which protoc) + if [ ! -x "${PATH_PROTOC}" ] + then + echo "Protocol buffer compiler protoc not found in PATH or in ${PROTOC}" + echo "Perhaps build it using:" + echo "bazel build --config opt @com_google_protobuf//:protoc" + exit 1 + fi + PROTOC=$PATH_PROTOC + set -e +fi + +# Ensure that protoc-gen-go is available in $PATH +# Since ${PROTOC} will require it. +export PATH=$PATH:${GOPATH}/bin +mkdir -p ../vendor +for FILE in ${TF_DIR}/tensorflow/core/framework/*.proto \ + ${TF_DIR}/tensorflow/core/protobuf/*.proto \ + ${TF_DIR}/tensorflow/stream_executor/*.proto; do + ${PROTOC} \ + -I ${TF_DIR} \ + --go_out=../vendor \ + $FILE +done diff --git a/tensorflow/go/genop/generate.win.go b/tensorflow/go/genop/generate.win.go index 23b27fffc2a..3eff6ab5e7e 100644 --- a/tensorflow/go/genop/generate.win.go +++ b/tensorflow/go/genop/generate.win.go @@ -1,5 +1,5 @@ -// +build windows - -//go:generate bash generate.sh win - -package main +// +build windows + +//go:generate bash generate.sh win + +package main diff --git a/tensorflow/go/genop/main.go b/tensorflow/go/genop/main.go index 83a361176f0..87c1d27c3b5 100644 --- a/tensorflow/go/genop/main.go +++ b/tensorflow/go/genop/main.go @@ -1,70 +1,70 @@ -/* -Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// Command genop generates a Go source file with functions for TensorFlow ops. -package main - -import ( - "bytes" - "flag" - "go/format" - "io/ioutil" - "log" - "os" - "path/filepath" - "strings" - - "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal" -) - -func main() { - var ( - filename = flag.String("outfile", "", "File to write generated source code to.") - header = flag.String("header", "", "Path to a file whose contents will be copied into the generated file. Can be empty") - apiDefDirs = flag.String("api_def_dirs", "", "Comma-separated directories containing api_def_*.pbtxt files.") - buf bytes.Buffer - ) - flag.Parse() - if *filename == "" { - log.Fatal("-outfile must be set") - } - if *header != "" { - hdr, err := ioutil.ReadFile(*header) - if err != nil { - log.Fatalf("Unable to read %s: %v", *header, err) - } - buf.Write(hdr) - buf.WriteString("\n\n") - } - os.MkdirAll(filepath.Dir(*filename), 0755) - - apiDefDirsList := []string{} - if len(*apiDefDirs) > 0 { - apiDefDirsList = strings.Split(*apiDefDirs, ",") - } - - if err := internal.GenerateFunctionsForRegisteredOps( - &buf, apiDefDirsList); err != nil { - log.Fatal(err) - } - formatted, err := format.Source(buf.Bytes()) - if err != nil { - log.Fatalf("Failed to generate valid source? 'go fmt' failed: %v", err) - } - if err := ioutil.WriteFile(*filename, formatted, 0644); err != nil { - log.Fatalf("Failed to write to %q: %v", *filename, err) - } -} +/* +Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Command genop generates a Go source file with functions for TensorFlow ops. +package main + +import ( + "bytes" + "flag" + "go/format" + "io/ioutil" + "log" + "os" + "path/filepath" + "strings" + + "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal" +) + +func main() { + var ( + filename = flag.String("outfile", "", "File to write generated source code to.") + header = flag.String("header", "", "Path to a file whose contents will be copied into the generated file. Can be empty") + apiDefDirs = flag.String("api_def_dirs", "", "Comma-separated directories containing api_def_*.pbtxt files.") + buf bytes.Buffer + ) + flag.Parse() + if *filename == "" { + log.Fatal("-outfile must be set") + } + if *header != "" { + hdr, err := ioutil.ReadFile(*header) + if err != nil { + log.Fatalf("Unable to read %s: %v", *header, err) + } + buf.Write(hdr) + buf.WriteString("\n\n") + } + os.MkdirAll(filepath.Dir(*filename), 0755) + + apiDefDirsList := []string{} + if len(*apiDefDirs) > 0 { + apiDefDirsList = strings.Split(*apiDefDirs, ",") + } + + if err := internal.GenerateFunctionsForRegisteredOps( + &buf, apiDefDirsList); err != nil { + log.Fatal(err) + } + formatted, err := format.Source(buf.Bytes()) + if err != nil { + log.Fatalf("Failed to generate valid source? 'go fmt' failed: %v", err) + } + if err := ioutil.WriteFile(*filename, formatted, 0644); err != nil { + log.Fatalf("Failed to write to %q: %v", *filename, err) + } +} From 2328b196ba64072f18dd5b093c1455a56168b506 Mon Sep 17 00:00:00 2001 From: Chris Tessum Date: Fri, 1 May 2020 11:12:54 -0500 Subject: [PATCH 0007/2522] go: Add input mapping option when importing Graph --- tensorflow/go/graph.go | 30 +++++++++++++++++ tensorflow/go/graph_test.go | 67 +++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) diff --git a/tensorflow/go/graph.go b/tensorflow/go/graph.go index b3b2c9cc20a..ac28c3ac5bd 100644 --- a/tensorflow/go/graph.go +++ b/tensorflow/go/graph.go @@ -61,9 +61,33 @@ type GraphImportOptions struct { // Execution device Device string + // inputMapping defines a mapping between Outputs in the graph + // and Outputs they should be replaced with. + inputMapping map[struct { + Name string + Index int + }]Output + // TODO: extend this structure to support more options from TF_ImportGraphDefOptions } +// AddInputMapping adds a mapping between an Output in the imported graph +// and an Ouput in the destination graph that it should be replaced with, +// where src:srcIndex is the name of the Operation and Output index to +// replace and dst is the output to replace it with. +func (o *GraphImportOptions) AddInputMapping(src string, srcIndex int, dst Output) { + if o.inputMapping == nil { + o.inputMapping = make(map[struct { + Name string + Index int + }]Output) + } + o.inputMapping[struct { + Name string + Index int + }{src, srcIndex}] = dst +} + // NewGraph returns a new Graph. func NewGraph() *Graph { g := &Graph{C.TF_NewGraph()} @@ -122,6 +146,12 @@ func (g *Graph) ImportWithOptions(def []byte, options GraphImportOptions) error C.TF_ImportGraphDefOptionsSetDefaultDevice(opts, cdev) } + for src, dst := range options.inputMapping { + cSrcName := C.CString(src.Name) + C.TF_ImportGraphDefOptionsAddInputMapping(opts, cSrcName, C.int(src.Index), dst.c()) + C.free(unsafe.Pointer(cSrcName)) + } + buf := C.TF_NewBuffer() defer C.TF_DeleteBuffer(buf) buf.length = C.size_t(len(def)) diff --git a/tensorflow/go/graph_test.go b/tensorflow/go/graph_test.go index 067c7db5c3c..bb112303807 100644 --- a/tensorflow/go/graph_test.go +++ b/tensorflow/go/graph_test.go @@ -82,6 +82,73 @@ func TestGraphWriteToAndImport(t *testing.T) { } } +func TestGraphInputMapping(t *testing.T) { + // Construct a graph + g := NewGraph() + v, err := NewTensor(int64(1)) + if err != nil { + t.Fatal(err) + } + input, err := Placeholder(g, "input", v.DataType()) + if err != nil { + t.Fatal(err) + } + neg, err := Neg(g, "neg", input) + if err != nil { + t.Fatal(err) + } + + // Serialize the graph + buf := new(bytes.Buffer) + if _, err := g.WriteTo(buf); err != nil { + t.Fatal(err) + } + + g = NewGraph() + v, err = NewTensor(int64(1)) + if err != nil { + t.Fatal(err) + } + + replacement, err := Placeholder(g, "replacement", v.DataType()) + if err != nil { + t.Fatal(err) + } + + options := GraphImportOptions{ + Prefix: "imported", + } + options.AddInputMapping("input", 0, replacement) + // Import it into the same graph, with a prefix and replacement + if err := g.ImportWithOptions(buf.Bytes(), options); err != nil { + t.Error(err) + } + if err := hasOperations(g, "replacement", "imported/neg"); err != nil { + t.Error(err) + } + + sess, err := NewSession(g, nil) + if err != nil { + t.Fatal(err) + } + + neg = g.Operation("imported/neg").Output(0) + + outputs, err := sess.Run( + map[Output]*Tensor{replacement: v}, + []Output{neg}, + nil) + if err != nil { + t.Fatal(err) + } + if len(outputs) != 1 { + t.Fatal(len(outputs)) + } + if outputs[0].Value().(int64) != -1 { + t.Fatalf("Got %v, wanted int64 -1", outputs[0].Value()) + } +} + func TestGraphAddGradients(t *testing.T) { g := NewGraph() x1, err := Placeholder(g, "x1", Float) From c13a0bf808a4145fb2ffc894f4af1c95f21dbc79 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Mon, 11 May 2020 13:08:35 +0300 Subject: [PATCH 0008/2522] add a with_values method to SparseTensor --- tensorflow/python/framework/sparse_tensor.py | 15 +++++++++++++++ tensorflow/python/framework/sparse_tensor_test.py | 8 ++++++++ 2 files changed, 23 insertions(+) diff --git a/tensorflow/python/framework/sparse_tensor.py b/tensorflow/python/framework/sparse_tensor.py index 76cb24f2cc6..ab7afabeae5 100644 --- a/tensorflow/python/framework/sparse_tensor.py +++ b/tensorflow/python/framework/sparse_tensor.py @@ -178,6 +178,21 @@ class SparseTensor(internal.NativeObject, composite_tensor.CompositeTensor): """ return self._values + def with_values(self, new_values): + """Returns a copy of `self` with `values` replaced by `new_values`. + + This method produces a new `SparseTensor` that has the same nonzero + indices, but updated values. + + Args: + new_values: The values of the new `SparseTensor. Needs to have the same + shape as the current `.values` `Tensor`. + + Returns: + A `SparseTensor` with identical indices but updated values. + """ + return SparseTensor(self._indices, new_values, self._dense_shape) + @property def op(self): """The `Operation` that produces `values` as an output.""" diff --git a/tensorflow/python/framework/sparse_tensor_test.py b/tensorflow/python/framework/sparse_tensor_test.py index 0d18af1fe2f..4db32065960 100644 --- a/tensorflow/python/framework/sparse_tensor_test.py +++ b/tensorflow/python/framework/sparse_tensor_test.py @@ -97,6 +97,14 @@ class SparseTensorTest(test_util.TensorFlowTestCase): self.assertIn(dense.op, sp.consumers()) self.assertIn(out.op, sp.consumers()) + def testWithValues(self): + source = sparse_tensor.SparseTensor( + indices=[[0, 0], [1, 2]], values=[1., 2], dense_shape=[3, 4]) + new_tensor = tensor.with_values([5.0, 1.0]) + self.assertAllEqual(new_tensor.indices, source.indices) + self.assertAllEqual(new_tensor.values, [5.0, 1.0]) + self.assertAllEqual(new_tensor.dense_shape, source.dense_shape) + class ConvertToTensorOrSparseTensorTest(test_util.TensorFlowTestCase): From b3d9b905e15df29817907ebe0c4f0ebe4819468c Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Tue, 12 May 2020 22:06:58 +0300 Subject: [PATCH 0009/2522] fixed typo --- tensorflow/python/framework/sparse_tensor_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/framework/sparse_tensor_test.py b/tensorflow/python/framework/sparse_tensor_test.py index 4db32065960..71693087c28 100644 --- a/tensorflow/python/framework/sparse_tensor_test.py +++ b/tensorflow/python/framework/sparse_tensor_test.py @@ -100,7 +100,7 @@ class SparseTensorTest(test_util.TensorFlowTestCase): def testWithValues(self): source = sparse_tensor.SparseTensor( indices=[[0, 0], [1, 2]], values=[1., 2], dense_shape=[3, 4]) - new_tensor = tensor.with_values([5.0, 1.0]) + new_tensor = source.with_values([5.0, 1.0]) self.assertAllEqual(new_tensor.indices, source.indices) self.assertAllEqual(new_tensor.values, [5.0, 1.0]) self.assertAllEqual(new_tensor.dense_shape, source.dense_shape) From 1c73e5f20a7b69169e69e14d835c257b6c4e355b Mon Sep 17 00:00:00 2001 From: "aaa.jq" <895521320@qq.com> Date: Tue, 19 May 2020 19:30:06 +0800 Subject: [PATCH 0010/2522] Add judgment for whether the sp_input is ordered Fix a bug that the output will not be ordered when the sp_input is unordered and all_rows_full is true. --- tensorflow/core/kernels/sparse_fill_empty_rows_op.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/sparse_fill_empty_rows_op.cc b/tensorflow/core/kernels/sparse_fill_empty_rows_op.cc index 8de93cf9b30..77bae318977 100644 --- a/tensorflow/core/kernels/sparse_fill_empty_rows_op.cc +++ b/tensorflow/core/kernels/sparse_fill_empty_rows_op.cc @@ -118,13 +118,17 @@ class SparseFillEmptyRowsOp : public OpKernel { return; } + bool indices_is_order = true; + int64 last_indices_row = 0; std::vector csr_offset(dense_rows, 0); for (int i = 0; i < N; ++i) { const int64 row = indices(i, 0); OP_REQUIRES(context, row >= 0 && row < dense_rows, errors::InvalidArgument("indices(", i, ", 0) is invalid: ", row, " >= ", dense_rows)); - ++csr_offset[indices(i, 0)]; + ++csr_offset[row]; + indices_is_order = indices_is_order & (row >= last_indices_row); + last_indices_row = row; } bool all_rows_full = true; for (int row = 0; row < dense_rows; ++row) { @@ -147,7 +151,7 @@ class SparseFillEmptyRowsOp : public OpKernel { } } - if (all_rows_full) { + if (all_rows_full && indices_is_order) { context->set_output(kOutputIndicesOutput, indices_t); context->set_output(kOutputValuesOutput, values_t); if (reverse_index_map) { From 43d8a825abba5b3f4efe663fc7d62487c377d42c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Nilsson?= Date: Mon, 25 May 2020 14:28:11 +0200 Subject: [PATCH 0011/2522] TFLu: Update RELU Current RELU was not bit exact to Convnet Change-Id: Ib7ffdc97a893d4133ce355e3a06f1b66e6793325 --- tensorflow/lite/micro/kernels/activations.cc | 48 ++++++++++++++++---- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/tensorflow/lite/micro/kernels/activations.cc b/tensorflow/lite/micro/kernels/activations.cc index 4a9b8ce5d8e..629144a54ce 100644 --- a/tensorflow/lite/micro/kernels/activations.cc +++ b/tensorflow/lite/micro/kernels/activations.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/op_macros.h" #include "tensorflow/lite/micro/micro_utils.h" +#include "tensorflow/lite/kernels/internal/types.h" namespace tflite { namespace ops { @@ -31,14 +32,43 @@ constexpr int kInputTensor = 0; constexpr int kOutputTensor = 0; template -inline void ReluQuantized(int32_t lower, const RuntimeShape& input_shape, - const Q* input_data, const RuntimeShape& output_shape, - Q* output_data) { +inline void ReluQuantized(const TfLiteTensor* input, TfLiteTensor* output, + const Q* input_data, Q* output_data) { + ReluParams params; + float act_min = 0.0; + float act_max = std::numeric_limits::infinity(); + double real_multiplier = input->params.scale / output->params.scale; + + const RuntimeShape input_shape = GetTensorShape(input); + const RuntimeShape output_shape = GetTensorShape(output); + + QuantizeMultiplier(real_multiplier, ¶ms.output_multiplier, + ¶ms.output_shift); + + params.quantized_activation_min = + std::max(static_cast(std::numeric_limits::min()), + output->params.zero_point + + static_cast(roundf(act_min / output->params.scale))); + params.quantized_activation_max = + act_max == std::numeric_limits::infinity() + ? static_cast(std::numeric_limits::max()) + : std::min( + static_cast(std::numeric_limits::max()), + output->params.zero_point + + static_cast(roundf(act_max / output->params.scale))); + params.input_offset = input->params.zero_point; + params.output_offset = output->params.zero_point; + const int flat_size = MatchingFlatSize(input_shape, output_shape); for (int i = 0; i < flat_size; ++i) { - const Q val = input_data[i]; - const Q clamped = val < lower ? lower : val; - output_data[i] = clamped; + const int32 val = static_cast(input_data[i]); + int32 clamped = params.output_offset + + MultiplyByQuantizedMultiplier(val - params.input_offset, + params.output_multiplier, + params.output_shift); + clamped = std::max(params.quantized_activation_min, clamped); + clamped = std::min(params.quantized_activation_max, clamped); + output_data[i] = static_cast(clamped); } } @@ -93,16 +123,14 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } case kTfLiteInt8: { - ReluQuantized(input->params.zero_point, GetTensorShape(input), + ReluQuantized(input, output, GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); return kTfLiteOk; } case kTfLiteUInt8: { - ReluQuantized(input->params.zero_point, GetTensorShape(input), + ReluQuantized(input, output, GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); return kTfLiteOk; } From 8817f8ef0ed4440b68849cc7f42d93f6752fcce8 Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Wed, 27 May 2020 17:06:00 +0100 Subject: [PATCH 0012/2522] Added 16x8 reference kernel for MEAN operator. Tests are extended for 16x8 case. --- .../delegates/nnapi/acceleration_test_list.cc | 4 +- .../internal/reference/integer_ops/mean.h | 13 +- .../lite/kernels/internal/reference/reduce.h | 3 + tensorflow/lite/kernels/reduce.cc | 106 +++++++++------- tensorflow/lite/kernels/reduce_test.cc | 117 ++++++++++++++---- tensorflow/lite/kernels/register.cc | 2 +- tensorflow/lite/toco/tflite/op_version.cc | 1 + .../lite/tools/versioning/op_version.cc | 2 +- 8 files changed, 169 insertions(+), 79 deletions(-) diff --git a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc index 46a6a720d1e..acacfc91c64 100644 --- a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc +++ b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc @@ -287,7 +287,9 @@ QuantizeOpTest/UINT8,29 -ConstUint8(Mean|Sum)OpTest/.+ ConstUint8(Max|Min)OpTest/.+,29 ConstUint8(Mean)OpTest/.+ -Constint8(Mean|Max|Min)OpTest/.+ +Constint8(Max|Min)OpTest/.+ +ConstMeanOpTest.+/.+Int8 +MeanOpTest.+/.+Int8 ConstFloat(Sum|Prod|Max|Min)OpTest/NotKeepDims,29 ConstFloat(Sum|Prod|Max|Min)OpTest/KeepDims,29 ConstFloat(Mean|Any)OpTest/NotKeepDims diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h b/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h index 1a00130cd8d..738e1306db2 100644 --- a/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h +++ b/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h @@ -20,11 +20,12 @@ limitations under the License. namespace tflite { namespace reference_integer_ops { +template inline void Mean(const tflite::MeanParams& op_params, int32_t multiplier, int32_t shift, const RuntimeShape& unextended_input_shape, - const int8_t* input_data, int32 input_zero_point, + const integer_type* input_data, int32 input_zero_point, const RuntimeShape& unextended_output_shape, - int8_t* output_data, int32 output_zero_point) { + integer_type* output_data, int32 output_zero_point) { // Current implementation only supports dimension equals 4 and simultaneous // reduction over width and height. TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4); @@ -47,8 +48,8 @@ inline void Mean(const tflite::MeanParams& op_params, int32_t multiplier, TFLITE_CHECK_EQ(output_height, 1); TFLITE_CHECK_EQ(output_width, 1); - static constexpr int32_t kMinInt8 = std::numeric_limits::min(); - static constexpr int32_t kMaxInt8 = std::numeric_limits::max(); + static constexpr int32_t kMinInt = std::numeric_limits::min(); + static constexpr int32_t kMaxInt = std::numeric_limits::max(); for (int out_b = 0; out_b < output_batch; ++out_b) { for (int out_d = 0; out_d < output_depth; ++out_d) { @@ -63,9 +64,9 @@ inline void Mean(const tflite::MeanParams& op_params, int32_t multiplier, acc = acc > 0 ? (acc + num_elements_in_axis / 2) / num_elements_in_axis : (acc - num_elements_in_axis / 2) / num_elements_in_axis; acc += output_zero_point; - acc = std::min(std::max(acc, kMinInt8), kMaxInt8); + acc = std::min(std::max(acc, kMinInt), kMaxInt); output_data[Offset(output_shape, out_b, 0, 0, out_d)] = - static_cast(acc); + static_cast(acc); } } } diff --git a/tensorflow/lite/kernels/internal/reference/reduce.h b/tensorflow/lite/kernels/internal/reference/reduce.h index 17dfd8557ae..4f44a2b7764 100644 --- a/tensorflow/lite/kernels/internal/reference/reduce.h +++ b/tensorflow/lite/kernels/internal/reference/reduce.h @@ -320,8 +320,11 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32 input_zero_point, int* temp_index, int* resolved_axis, U* temp_sum, bool compute_sum) { const bool uint8_case = std::is_same::value; + const bool int16_case = std::is_same::value; if (uint8_case) { ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Uint8" : "Mean/Uint8"); + } else if (int16_case) { + ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int16" : "Mean/Int16"); } else { ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Int8" : "Mean/Int8"); } diff --git a/tensorflow/lite/kernels/reduce.cc b/tensorflow/lite/kernels/reduce.cc index f0222a08fe3..31022b1cd5b 100644 --- a/tensorflow/lite/kernels/reduce.cc +++ b/tensorflow/lite/kernels/reduce.cc @@ -198,6 +198,9 @@ TfLiteStatus InitializeTemporaries(TfLiteContext* context, TfLiteNode* node, case kTfLiteInt8: temp_sum->type = kTfLiteInt32; break; + case kTfLiteInt16: + temp_sum->type = kTfLiteInt32; + break; case kTfLiteBool: temp_sum->type = kTfLiteBool; break; @@ -242,7 +245,8 @@ TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) { // reduce_mean requires a buffer to store intermediate sum result. OpContext op_context(context, node); - if (op_context.input->type == kTfLiteInt8) { + if (op_context.input->type == kTfLiteInt8 || + op_context.input->type == kTfLiteInt16) { const double real_multiplier = static_cast(op_context.input->params.scale) / static_cast(op_context.output->params.scale); @@ -270,6 +274,56 @@ void ResolveAxis(const int* axis_data, int axis_count, } } +template +TfLiteStatus EvalMeanReferenceOps(TfLiteContext* context, + const OpContext& op_context, int num_axis, + OpData* data, TfLiteTensor* temp_index, + TfLiteTensor* resolved_axis, + TfLiteTensor* temp_sum) { + tflite::MeanParams op_params; + op_params.axis_count = num_axis; + ResolveAxis(GetTensorData(op_context.axis), num_axis, &op_params); + const TfLiteTensor* input = op_context.input; + // TODO(b/139102329): Handle all the cases in the combined reference + // method. + if (op_context.params->keep_dims && NumDimensions(input) == 4 && + op_params.axis_count == 2 && + ((op_params.axis[0] == 1 && op_params.axis[1] == 2) || + (op_params.axis[0] == 2 && op_params.axis[1] == 1))) { + reference_integer_ops::Mean( + op_params, data->multiplier, data->shift, GetTensorShape(input), + GetTensorData(input), op_context.input->params.zero_point, + GetTensorShape(op_context.output), + GetTensorData(op_context.output), + op_context.output->params.zero_point); + } else if (input->params.zero_point == op_context.output->params.zero_point && + input->params.scale == op_context.output->params.scale) { + TF_LITE_ENSURE( + context, + reference_ops::Mean( + GetTensorData(input), input->dims->data, + input->dims->size, GetTensorData(op_context.output), + op_context.output->dims->data, op_context.output->dims->size, + GetTensorData(op_context.axis), num_axis, + op_context.params->keep_dims, GetTensorData(temp_index), + GetTensorData(resolved_axis), GetTensorData(temp_sum))); + } else { + TF_LITE_ENSURE( + context, + reference_ops::QuantizedMeanOrSum<>( + GetTensorData(input), input->params.zero_point, + input->params.scale, input->dims->data, input->dims->size, + GetTensorData(op_context.output), + op_context.output->params.zero_point, + op_context.output->params.scale, op_context.output->dims->data, + op_context.output->dims->size, GetTensorData(op_context.axis), + num_axis, op_context.params->keep_dims, + GetTensorData(temp_index), GetTensorData(resolved_axis), + GetTensorData(temp_sum), + /*compute_sum=*/false)); + } +} + template TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) { OpContext op_context(context, node); @@ -397,50 +451,12 @@ TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) { GetTensorData(temp_sum))); break; case kTfLiteInt8: { - tflite::MeanParams op_params; - op_params.axis_count = num_axis; - ResolveAxis(GetTensorData(op_context.axis), num_axis, &op_params); - const TfLiteTensor* input = op_context.input; - // TODO(b/139102329): Handle all the cases in the combined reference - // method. - if (op_context.params->keep_dims && NumDimensions(input) == 4 && - op_params.axis_count == 2 && - ((op_params.axis[0] == 1 && op_params.axis[1] == 2) || - (op_params.axis[0] == 2 && op_params.axis[1] == 1))) { - reference_integer_ops::Mean( - op_params, data->multiplier, data->shift, GetTensorShape(input), - GetTensorData(input), op_context.input->params.zero_point, - GetTensorShape(op_context.output), - GetTensorData(op_context.output), - op_context.output->params.zero_point); - } else if (input->params.zero_point == - op_context.output->params.zero_point && - input->params.scale == op_context.output->params.scale) { - TF_LITE_ENSURE( - context, - reference_ops::Mean( - GetTensorData(input), input->dims->data, - input->dims->size, GetTensorData(op_context.output), - op_context.output->dims->data, op_context.output->dims->size, - GetTensorData(op_context.axis), num_axis, - op_context.params->keep_dims, GetTensorData(temp_index), - GetTensorData(resolved_axis), - GetTensorData(temp_sum))); - } else { - TF_LITE_ENSURE( - context, - reference_ops::QuantizedMeanOrSum<>( - GetTensorData(input), input->params.zero_point, - input->params.scale, input->dims->data, input->dims->size, - GetTensorData(op_context.output), - op_context.output->params.zero_point, - op_context.output->params.scale, op_context.output->dims->data, - op_context.output->dims->size, - GetTensorData(op_context.axis), num_axis, - op_context.params->keep_dims, GetTensorData(temp_index), - GetTensorData(resolved_axis), GetTensorData(temp_sum), - /*compute_sum=*/false)); - } + EvalMeanReferenceOps(context, op_context, num_axis, data, + temp_index, resolved_axis, temp_sum); + } break; + case kTfLiteInt16: { + EvalMeanReferenceOps(context, op_context, num_axis, data, + temp_index, resolved_axis, temp_sum); } break; case kTfLiteUInt8: { // TODO(b/139102329): Handle all the cases in the combined reference diff --git a/tensorflow/lite/kernels/reduce_test.cc b/tensorflow/lite/kernels/reduce_test.cc index ddbd5106063..068333a0e99 100644 --- a/tensorflow/lite/kernels/reduce_test.cc +++ b/tensorflow/lite/kernels/reduce_test.cc @@ -229,7 +229,14 @@ class AnyOpDynamicModel : public BaseOpModel { }; // for quantized Add, the error shouldn't exceed step -float GetTolerance(int min, int max) { return (max - min) / 255.0; } +template +float GetTolerance(int min, int max) { + if (std::is_same::value) { + return (max - min) / 65536.0; + } else { + return (max - min) / 255.0; + } +} // Tests for reduce_mean TEST(ConstFloatMeanOpTest, NotKeepDims) { @@ -426,65 +433,125 @@ TEST(ConstUint8MeanOpTest, KeepDims) { ElementsAreArray(ArrayFloatNear({0.3, 0.35, 0.55}, kQuantizedTolerance))); } -TEST(ConstInt8MeanOpTest, NonSpecialAxisSameScale) { - float kQuantizedTolerance = GetTolerance(-5.0, 5.0); +template +void MeanOpConstModelTest() { + float kQuantizedTolerance = GetTolerance(-5.0, 5.0); std::vector data = {105.0, 71.0, 233.0, 92.0, 227.0, 11.0, 14.0, 43.0}; - MeanOpConstModel m({TensorType_INT8, {1, 1, 2, 4}, 0.0, 255.0}, - {TensorType_INT8, {1, 2, 4}, 0.0, 255.0}, {1}, {1}, false); - m.QuantizeAndPopulate(m.Input(), data); + + float scale = tensor_dtype == TensorType_INT16 ? 255 / 32767.0f : 0.0f; + + MeanOpConstModel m({tensor_dtype, {1, 1, 2, 4}, 0.0, 255.0, scale, 0}, + {tensor_dtype, {1, 2, 4}, 0.0, 255.0, scale, 0}, {1}, {1}, + false); + m.QuantizeAndPopulate(m.Input(), data); m.Invoke(); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2, 4})); - EXPECT_THAT(m.GetDequantizedOutput(), + EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear(data, kQuantizedTolerance))); } -TEST(ConstInt8MeanOpTest, NonSpecialAxisNonSameScale) { - float kQuantizedTolerance = GetTolerance(-5.0, 5.0); +class ConstMeanOpTestSameScale : public ::testing::Test {}; + +TEST_F(ConstMeanOpTestSameScale, NonSpecialAxisSameScaleInt8) { + MeanOpConstModelTest(); +} + +TEST_F(ConstMeanOpTestSameScale, NonSpecialAxisSameScaleInt16) { + MeanOpConstModelTest(); +} + +template +void ConstMeanOpTestNonSameScale() { + float kQuantizedTolerance = GetTolerance(-5.0, 5.0); std::vector data = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8}; - MeanOpConstModel m({TensorType_INT8, {1, 1, 2, 4}, -1.0, 1.0}, - {TensorType_INT8, {1, 2}, -5.0, 5.0}, {2}, {1, 3}, false); - m.QuantizeAndPopulate(m.Input(), data); + + float scale = tensor_dtype == TensorType_INT16 ? 1 / 32767.f : 0.0f; + + MeanOpConstModel m({tensor_dtype, {1, 1, 2, 4}, -1.0, 1.0, scale, 0}, + {tensor_dtype, {1, 2}, -5.0, 5.0, scale, 0}, {2}, {1, 3}, + false); + m.QuantizeAndPopulate(m.Input(), data); m.Invoke(); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 2})); EXPECT_THAT( - m.GetDequantizedOutput(), + m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear({0.25, 0.65}, kQuantizedTolerance))); } -TEST(ConstInt8MeanOpTest, QuantizedSameScale) { - float kQuantizedTolerance = GetTolerance(-5.0, 5.0); +class ConstMeanOpTestNonSameScale : public ::testing::Test {}; + +TEST_F(ConstMeanOpTestNonSameScale, NonSpecialAxisNonSameScaleInt8) { + MeanOpConstModelTest(); +} + +TEST_F(ConstMeanOpTestNonSameScale, NonSpecialAxisNonSameScaleInt16) { + MeanOpConstModelTest(); +} + +template +void MeanOpTestQuantizedSameScale() { + float kQuantizedTolerance = GetTolerance(-5.0, 5.0); + + float scale = tensor_dtype == TensorType_INT16 ? 1 / 32767.f : 0.0f; + std::vector data = {0.1, 0.2, 0.3, 0.4, 0.2, 0.3, 0.4, 0.5, 0.1, 0.1, 0.1, 0.1, 0.4, 0.2, 0.2, 0.2, 0.9, 0.9, 0.9, 0.9, 0.2, 0.3, 0.7, 0.7, 0.1, 0.1, 0.3, 0.3, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4}; - MeanOpConstModel m({TensorType_INT8, {1, 2, 2, 9}, -1.0, 1.0}, - {TensorType_INT8, {2}, -1.0, 1.0}, {2}, {1, 2}, true); - m.QuantizeAndPopulate(m.Input(), data); + MeanOpConstModel m({tensor_dtype, {1, 2, 2, 9}, -1.0, 1.0, scale, 0}, + {tensor_dtype, {2}, -1.0, 1.0, scale, 0}, {2}, {1, 2}, + true); + m.QuantizeAndPopulate(m.Input(), data); m.Invoke(); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 1, 9})); - EXPECT_THAT(m.GetDequantizedOutput(), + EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear( {0.35, 0.325, 0.2, 0.35, 0.375, 0.325, 0.225, 0.45, 0.425}, kQuantizedTolerance))); } -TEST(ConstInt8MeanOpTest, QuantizedDifferentScale) { - float kQuantizedTolerance = GetTolerance(-5.0, 5.0); +class MeanOpTestQuantizedSameScale : public ::testing::Test {}; + +TEST_F(MeanOpTestQuantizedSameScale, QuantizedSameScaleInt8) { + MeanOpConstModelTest(); +} + +TEST_F(MeanOpTestQuantizedSameScale, QuantizedSameScaleInt16) { + MeanOpConstModelTest(); +} + +template +void MeanOpTestQuantizedDifferentScale() { + float kQuantizedTolerance = GetTolerance(-5.0, 5.0); + + float scale = tensor_dtype == TensorType_INT16 ? 1 / 32767.f : 0.0f; + std::vector data = {0.1, 0.2, 0.3, 0.4, 0.2, 0.3, 0.4, 0.5, 0.1, 0.1, 0.1, 0.1, 0.4, 0.2, 0.2, 0.2, 0.9, 0.9, 0.9, 0.9, 0.2, 0.3, 0.7, 0.7, 0.1, 0.1, 0.3, 0.3, 0.1, 0.2, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4}; - MeanOpConstModel m({TensorType_INT8, {1, 2, 2, 9}, -1.0, 1.0}, - {TensorType_INT8, {2}, -4.0, 4.0}, {2}, {1, 2}, true); - m.QuantizeAndPopulate(m.Input(), data); + MeanOpConstModel m({tensor_dtype, {1, 2, 2, 9}, -1.0, 1.0, scale, 0}, + {tensor_dtype, {2}, -4.0, 4.0, scale, 0}, {2}, {1, 2}, + true); + m.QuantizeAndPopulate(m.Input(), data); m.Invoke(); EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 1, 9})); - EXPECT_THAT(m.GetDequantizedOutput(), + EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear( {0.35, 0.325, 0.2, 0.35, 0.375, 0.325, 0.225, 0.45, 0.425}, kQuantizedTolerance))); } +class MeanOpTestQuantizedDifferentScale : public ::testing::Test {}; + +TEST_F(MeanOpTestQuantizedDifferentScale, QuantizedDifferentScaleInt8) { + MeanOpConstModelTest(); +} + +TEST_F(MeanOpTestQuantizedDifferentScale, QuantizedDifferentScaleInt16) { + MeanOpConstModelTest(); +} + TEST(ConstFloatMeanOpTest, KeepDims4DMeanLargeDepthInt8) { float kQuantizedTolerance = GetTolerance(-5.0, 5.0); std::vector data = { diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc index 8ca58e6a309..67069fe663c 100644 --- a/tensorflow/lite/kernels/register.cc +++ b/tensorflow/lite/kernels/register.cc @@ -135,7 +135,7 @@ BuiltinOpResolver::BuiltinOpResolver() { /* max_version = */ 4); AddBuiltin(BuiltinOperator_MEAN, Register_MEAN(), /* min_version = */ 1, - /* max_version = */ 2); + /* max_version = */ 3); AddBuiltin(BuiltinOperator_DIV, Register_DIV(), /* min_version */ 1, /* max_version */ 2); diff --git a/tensorflow/lite/toco/tflite/op_version.cc b/tensorflow/lite/toco/tflite/op_version.cc index cf127a9f459..17ff92b3a5d 100644 --- a/tensorflow/lite/toco/tflite/op_version.cc +++ b/tensorflow/lite/toco/tflite/op_version.cc @@ -126,6 +126,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) { {{OperatorType::kBidirectionalSequenceRnn, 1}, "1.14.0"}, {{OperatorType::kMean, 1}, "1.6.0"}, {{OperatorType::kMean, 2}, "1.14.0"}, + {{OperatorType::kMean, 3}, kPendingReleaseOpVersion}, {{OperatorType::kSum, 1}, "1.10.0"}, {{OperatorType::kSum, 2}, "1.15.0"}, {{OperatorType::kReduceMax, 1}, "1.11.0"}, diff --git a/tensorflow/lite/tools/versioning/op_version.cc b/tensorflow/lite/tools/versioning/op_version.cc index 118e2d420f8..cc7da3625da 100644 --- a/tensorflow/lite/tools/versioning/op_version.cc +++ b/tensorflow/lite/tools/versioning/op_version.cc @@ -490,6 +490,7 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) { case BuiltinOperator_CONCATENATION: case BuiltinOperator_SOFTMAX: + case BuiltinOperator_MEAN: // In case of int16 inputs, the version is 3. if (op_sig.input_types.at(0) == TensorType_INT16) { return 3; @@ -504,7 +505,6 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) { case BuiltinOperator_PADV2: case BuiltinOperator_SPACE_TO_DEPTH: case BuiltinOperator_SPLIT_V: - case BuiltinOperator_MEAN: case BuiltinOperator_SUM: case BuiltinOperator_REDUCE_MAX: case BuiltinOperator_REDUCE_MIN: From c08dae6b51816a9f9a4d9d3ca7fd228b32a6f8d7 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Mon, 1 Jun 2020 00:26:21 +0300 Subject: [PATCH 0013/2522] Update resnet doc - added lost caution - fixed reference according to https://github.com/tensorflow/tensorflow/commit/71964116c54e3f1ad9686b9fb4987d526a15d8a7 --- tensorflow/python/keras/applications/resnet.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/keras/applications/resnet.py b/tensorflow/python/keras/applications/resnet.py index 91562d91e47..61310399180 100644 --- a/tensorflow/python/keras/applications/resnet.py +++ b/tensorflow/python/keras/applications/resnet.py @@ -538,13 +538,16 @@ decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ DOC = """ - Reference paper: - - [Deep Residual Learning for Image Recognition] - (https://arxiv.org/abs/1512.03385) (CVPR 2015) + Reference: + - [Deep Residual Learning for Image Recognition]( + https://arxiv.org/abs/1512.03385) (CVPR 2015) Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in your Keras config at `~/.keras/keras.json`. + + Caution: Be sure to properly pre-process your inputs to the application. + Please see `applications.resnet.preprocess_input` for an example. Arguments: include_top: whether to include the fully-connected From 9f740b0119c390567325c6fe248e922c061c428b Mon Sep 17 00:00:00 2001 From: wondertx Date: Mon, 1 Jun 2020 15:30:22 +0800 Subject: [PATCH 0014/2522] Fix package name --- tensorflow/python/training/checkpoint_management.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/training/checkpoint_management.py b/tensorflow/python/training/checkpoint_management.py index e54ed9907c1..b65cce7ae5c 100644 --- a/tensorflow/python/training/checkpoint_management.py +++ b/tensorflow/python/training/checkpoint_management.py @@ -331,7 +331,7 @@ def latest_checkpoint(checkpoint_dir, latest_filename=None): Gets the checkpoint state given the provided checkpoint_dir and looks for a corresponding TensorFlow 2 (preferred) or TensorFlow 1.x checkpoint path. The latest_filename argument is only applicable if you are saving checkpoint - using `v1.Saver.save` + using `v1.train.Saver.save` See the [Training Checkpoints @@ -342,7 +342,7 @@ def latest_checkpoint(checkpoint_dir, latest_filename=None): checkpoint_dir: Directory where the variables were saved. latest_filename: Optional name for the protocol buffer file that contains the list of most recent checkpoint filenames. - See the corresponding argument to `v1.Saver.save`. + See the corresponding argument to `v1.train.Saver.save`. Returns: The full path to the latest checkpoint or `None` if no checkpoint was found. From 270196bc62558920bfe7fc3c279355d77fe71495 Mon Sep 17 00:00:00 2001 From: Hugh Ku Date: Wed, 3 Jun 2020 12:39:25 +0800 Subject: [PATCH 0015/2522] Fix HessiansV2's docstring and its default argument colocate_gradients_with_ops forwarding to HessiansV1 --- tensorflow/python/ops/gradients_impl.py | 28 ++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py index 8575ea807e4..8169b574eb2 100644 --- a/tensorflow/python/ops/gradients_impl.py +++ b/tensorflow/python/ops/gradients_impl.py @@ -446,8 +446,30 @@ def HessiansV2(ys, gate_gradients=False, aggregation_method=None, name="hessians"): - return hessians(ys, xs, name=name, gate_gradients=gate_gradients, - aggregation_method=aggregation_method) + """Constructs the Hessian of sum of `ys` with respect to `x` in `xs`. + `hessians()` adds ops to the graph to output the Hessian matrix of `ys` + with respect to `xs`. It returns a list of `Tensor` of length `len(xs)` + where each tensor is the Hessian of `sum(ys)`. -HessiansV2.__doc__ = hessians.__doc__ + The Hessian is a matrix of second-order partial derivatives of a scalar + tensor (see https://en.wikipedia.org/wiki/Hessian_matrix for more details). + + Args: + ys: A `Tensor` or list of tensors to be differentiated. + xs: A `Tensor` or list of tensors to be used for differentiation. + name: Optional name to use for grouping all the gradient ops together. + defaults to 'hessians'. + gate_gradients: See `gradients()` documentation for details. + aggregation_method: See `gradients()` documentation for details. + + Returns: + A list of Hessian matrices of `sum(ys)` for each `x` in `xs`. + + Raises: + LookupError: if one of the operations between `xs` and `ys` does not + have a registered gradient function. + """ + return hessians(ys, xs, name=name, + colocate_gradients_with_ops=True, + gate_gradients=gate_gradients, aggregation_method=aggregation_method) \ No newline at end of file From 177b9a417a4316ec55cb2079c012403266019989 Mon Sep 17 00:00:00 2001 From: Hugh Ku Date: Wed, 3 Jun 2020 12:47:24 +0800 Subject: [PATCH 0016/2522] Update tf.hessains function argument warnings --- tensorflow/tools/compatibility/tf_upgrade_v2.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2.py b/tensorflow/tools/compatibility/tf_upgrade_v2.py index d27c75fb44e..6a4143757fb 100644 --- a/tensorflow/tools/compatibility/tf_upgrade_v2.py +++ b/tensorflow/tools/compatibility/tf_upgrade_v2.py @@ -1312,6 +1312,13 @@ class TFAPIChangeSpec(ast_edits.NoUpdateSpec): "'colocate_gradients_with_ops' argument, it behaves as if it " "was set to True."), }, + "tf.hessians": { + ("colocate_gradients_with_ops", 3): ( + ast_edits.INFO, + "tf.hessians no longer takes " + "'colocate_gradients_with_ops' argument, it behaves as if it " + "was set to True."), + }, "*.minimize": { ("colocate_gradients_with_ops", 5): ( ast_edits.INFO, From 1f0eabb2d0d3f6e2bb3109f89694f06a20e55be9 Mon Sep 17 00:00:00 2001 From: Hugh Ku Date: Wed, 3 Jun 2020 12:57:13 +0800 Subject: [PATCH 0017/2522] Add test case of Hessians on tf 2.0 upgration --- tensorflow/tools/compatibility/tf_upgrade_v2_test.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py index 47b9899a6b7..f54edbe3215 100644 --- a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py +++ b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py @@ -1118,6 +1118,12 @@ bazel-bin/tensorflow/tools/compatibility/update/generate_v2_reorders_map self.assertEqual("optimizer.compute_gradients(a)\n", new_text) self.assertIn("Optimizer.compute_gradients no longer takes", report) + def testColocateGradientsWithHessians(self): + text = "tf.hessians(ys=a, xs=b, colocate_gradients_with_ops=False)\n" + _, report, unused_errors, new_text = self._upgrade(text) + self.assertEqual("tf.hessians(ys=a, xs=b)\n", new_text) + self.assertIn("tf.hessians no longer takes", report) + def testExportSavedModelRename(self): text = "self.est.export_savedmodel(path)" _, report, unused_errors, unused_new_text = self._upgrade(text) From ed27477845dac063d85791e96b022ce744225799 Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Wed, 3 Jun 2020 20:12:25 -0400 Subject: [PATCH 0018/2522] Fix missing tick in BatchNorm doc --- tensorflow/python/keras/layers/normalization_v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/layers/normalization_v2.py b/tensorflow/python/keras/layers/normalization_v2.py index 48af6b97ce1..57f641b39ab 100644 --- a/tensorflow/python/keras/layers/normalization_v2.py +++ b/tensorflow/python/keras/layers/normalization_v2.py @@ -210,7 +210,7 @@ class BatchNormalization(normalization.BatchNormalizationBase): __doc__ = normalization.replace_in_base_docstring([ ('{{TRAINABLE_ATTRIBUTE_NOTE}}', ''' - **About setting `layer.trainable = False` on a `BatchNormalization layer:** + **About setting `layer.trainable = False` on a `BatchNormalization` layer:** The meaning of setting `layer.trainable = False` is to freeze the layer, i.e. its internal state will not change during training: From f9328b7003ead40380bc25f68dfa5d68397b2655 Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Thu, 4 Jun 2020 11:37:40 +0100 Subject: [PATCH 0019/2522] Addressed reviewer's comments. Change-Id: I09295395ef0686320d9a7738d99fadc6f5dabcde --- tensorflow/lite/kernels/reduce.cc | 77 +++++++++---------------------- 1 file changed, 21 insertions(+), 56 deletions(-) diff --git a/tensorflow/lite/kernels/reduce.cc b/tensorflow/lite/kernels/reduce.cc index 31022b1cd5b..f1045442aad 100644 --- a/tensorflow/lite/kernels/reduce.cc +++ b/tensorflow/lite/kernels/reduce.cc @@ -246,7 +246,7 @@ TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) { // reduce_mean requires a buffer to store intermediate sum result. OpContext op_context(context, node); if (op_context.input->type == kTfLiteInt8 || - op_context.input->type == kTfLiteInt16) { + op_context.input->type == kTfLiteInt16) { const double real_multiplier = static_cast(op_context.input->params.scale) / static_cast(op_context.output->params.scale); @@ -290,12 +290,24 @@ TfLiteStatus EvalMeanReferenceOps(TfLiteContext* context, op_params.axis_count == 2 && ((op_params.axis[0] == 1 && op_params.axis[1] == 2) || (op_params.axis[0] == 2 && op_params.axis[1] == 1))) { - reference_integer_ops::Mean( - op_params, data->multiplier, data->shift, GetTensorShape(input), - GetTensorData(input), op_context.input->params.zero_point, - GetTensorShape(op_context.output), - GetTensorData(op_context.output), - op_context.output->params.zero_point); + if (std::is_same::value) { + reference_ops::Mean(op_params, GetTensorShape(op_context.input), + GetTensorData(op_context.input), + op_context.input->params.zero_point, + op_context.input->params.scale, + GetTensorShape(op_context.output), + GetTensorData(op_context.output), + op_context.output->params.zero_point, + op_context.output->params.scale); + } else { + reference_integer_ops::Mean( + op_params, data->multiplier, data->shift, GetTensorShape(input), + GetTensorData(input), + op_context.input->params.zero_point, + GetTensorShape(op_context.output), + GetTensorData(op_context.output), + op_context.output->params.zero_point); + } } else if (input->params.zero_point == op_context.output->params.zero_point && input->params.scale == op_context.output->params.scale) { TF_LITE_ENSURE( @@ -459,55 +471,8 @@ TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) { temp_index, resolved_axis, temp_sum); } break; case kTfLiteUInt8: { - // TODO(b/139102329): Handle all the cases in the combined reference - // method. - tflite::MeanParams op_params; - op_params.axis_count = num_axis; - ResolveAxis(GetTensorData(op_context.axis), num_axis, &op_params); - if (op_context.params->keep_dims && - NumDimensions(op_context.input) == 4 && op_params.axis_count == 2 && - ((op_params.axis[0] == 1 && op_params.axis[1] == 2) || - (op_params.axis[0] == 2 && op_params.axis[1] == 1))) { - reference_ops::Mean(op_params, GetTensorShape(op_context.input), - GetTensorData(op_context.input), - op_context.input->params.zero_point, - op_context.input->params.scale, - GetTensorShape(op_context.output), - GetTensorData(op_context.output), - op_context.output->params.zero_point, - op_context.output->params.scale); - } else if (op_context.input->params.zero_point == - op_context.output->params.zero_point && - op_context.input->params.scale == - op_context.output->params.scale) { - TF_LITE_ENSURE( - context, - reference_ops::Mean( - GetTensorData(op_context.input), - op_context.input->dims->data, op_context.input->dims->size, - GetTensorData(op_context.output), - op_context.output->dims->data, op_context.output->dims->size, - GetTensorData(op_context.axis), num_axis, - op_context.params->keep_dims, GetTensorData(temp_index), - GetTensorData(resolved_axis), - GetTensorData(temp_sum))); - } else { - TF_LITE_ENSURE( - context, - reference_ops::QuantizedMeanOrSum<>( - GetTensorData(op_context.input), - op_context.input->params.zero_point, - op_context.input->params.scale, op_context.input->dims->data, - op_context.input->dims->size, - GetTensorData(op_context.output), - op_context.output->params.zero_point, - op_context.output->params.scale, op_context.output->dims->data, - op_context.output->dims->size, - GetTensorData(op_context.axis), num_axis, - op_context.params->keep_dims, GetTensorData(temp_index), - GetTensorData(resolved_axis), GetTensorData(temp_sum), - /*compute_sum=*/false)); - } + EvalMeanReferenceOps(context, op_context, num_axis, data, + temp_index, resolved_axis, temp_sum); } break; default: return kTfLiteError; From c8ae3bb52dbb4b804b7824e979fb7a391a966bcf Mon Sep 17 00:00:00 2001 From: Tongzhou Wang Date: Thu, 4 Jun 2020 11:49:40 -0400 Subject: [PATCH 0020/2522] fix a couple more doc issues --- tensorflow/python/keras/layers/recurrent.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py index 0ce17c6101e..8fd8556145c 100644 --- a/tensorflow/python/keras/layers/recurrent.py +++ b/tensorflow/python/keras/layers/recurrent.py @@ -323,7 +323,7 @@ class RNN(Layer): This is the expected shape of your inputs *including the batch size*. It should be a tuple of integers, e.g. `(32, 10, 100)`. - - Specify `shuffle=False` when calling fit(). + - Specify `shuffle=False` when calling `fit()`. To reset the states of your model, call `.reset_states()` on either a specific layer, or on your entire model. @@ -1114,7 +1114,7 @@ class DropoutRNNCellMixin(object): is used every time. Also the caches are created without tracking. Since they are not picklable - by python when deepcopy, we don't want layer._obj_reference_counts_dict + by python when deepcopy, we don't want `layer._obj_reference_counts_dict` to track it by default. """ self._dropout_mask_cache = K.ContextValueCache(self._create_dropout_mask) @@ -1124,8 +1124,8 @@ class DropoutRNNCellMixin(object): def reset_dropout_mask(self): """Reset the cached dropout masks if any. - This is important for the RNN layer to invoke this in it call() method so - that the cached mask is cleared before calling the cell.call(). The mask + This is important for the RNN layer to invoke this in it `call()` method so + that the cached mask is cleared before calling the `cell.call()`. The mask should be cached across the timestep within the same batch, but shouldn't be cached between batches. Otherwise it will introduce unreasonable bias against certain index of data within the batch. @@ -2634,7 +2634,7 @@ class LSTM(RNN): the `recurrent_kernel` weights matrix. bias_regularizer: Regularizer function applied to the bias vector. activity_regularizer: Regularizer function applied to - the output of the layer (its "activation").. + the output of the layer (its "activation"). kernel_constraint: Constraint function applied to the `kernel` weights matrix. recurrent_constraint: Constraint function applied to From 8265d5f8ebb24450b65db42324fbe0ef6845da90 Mon Sep 17 00:00:00 2001 From: Hugh Ku Date: Fri, 5 Jun 2020 13:50:00 +0800 Subject: [PATCH 0021/2522] Fix pylint error --- tensorflow/python/ops/gradients_impl.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py index 8169b574eb2..baa781cfd87 100644 --- a/tensorflow/python/ops/gradients_impl.py +++ b/tensorflow/python/ops/gradients_impl.py @@ -472,4 +472,5 @@ def HessiansV2(ys, """ return hessians(ys, xs, name=name, colocate_gradients_with_ops=True, - gate_gradients=gate_gradients, aggregation_method=aggregation_method) \ No newline at end of file + gate_gradients=gate_gradients, + aggregation_method=aggregation_method) \ No newline at end of file From 5a0c9c796cd257d851c7a5320aaa064d4132614d Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Fri, 5 Jun 2020 10:28:39 +0100 Subject: [PATCH 0022/2522] Addressed reviewer's comments. Change-Id: I9c8f1246eb86365761fa444315b844e9a13b7970 --- tensorflow/lite/kernels/internal/reference/reduce.h | 2 +- tensorflow/lite/kernels/reduce.cc | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/kernels/internal/reference/reduce.h b/tensorflow/lite/kernels/internal/reference/reduce.h index 4f44a2b7764..68c1fa42f90 100644 --- a/tensorflow/lite/kernels/internal/reference/reduce.h +++ b/tensorflow/lite/kernels/internal/reference/reduce.h @@ -319,7 +319,7 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32 input_zero_point, const int num_axis_dimensions, bool keep_dims, int* temp_index, int* resolved_axis, U* temp_sum, bool compute_sum) { - const bool uint8_case = std::is_same::value; + const bool uint8_case = std::is_same::value; const bool int16_case = std::is_same::value; if (uint8_case) { ruy::profiler::ScopeLabel label(compute_sum ? "Sum/Uint8" : "Mean/Uint8"); diff --git a/tensorflow/lite/kernels/reduce.cc b/tensorflow/lite/kernels/reduce.cc index f1045442aad..4b69402f15b 100644 --- a/tensorflow/lite/kernels/reduce.cc +++ b/tensorflow/lite/kernels/reduce.cc @@ -246,6 +246,7 @@ TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) { // reduce_mean requires a buffer to store intermediate sum result. OpContext op_context(context, node); if (op_context.input->type == kTfLiteInt8 || + op_context.input->type == kTfLiteUInt8 || op_context.input->type == kTfLiteInt16) { const double real_multiplier = static_cast(op_context.input->params.scale) / From 25fdc55b8de73fe5df03b59afe011d05c48a2d54 Mon Sep 17 00:00:00 2001 From: Hugh Ku Date: Fri, 5 Jun 2020 17:46:00 +0800 Subject: [PATCH 0023/2522] Fix pylint error - new line break at the end --- tensorflow/python/ops/gradients_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py index baa781cfd87..eb60c6ea4ea 100644 --- a/tensorflow/python/ops/gradients_impl.py +++ b/tensorflow/python/ops/gradients_impl.py @@ -473,4 +473,4 @@ def HessiansV2(ys, return hessians(ys, xs, name=name, colocate_gradients_with_ops=True, gate_gradients=gate_gradients, - aggregation_method=aggregation_method) \ No newline at end of file + aggregation_method=aggregation_method) From 32176933097225bd1579b2039a6929029409e32f Mon Sep 17 00:00:00 2001 From: Tamas Nyiri Date: Sun, 31 May 2020 17:36:54 +0100 Subject: [PATCH 0024/2522] Add INT16 support in modify_model_interface * Added support for 16bit quantized models in modify_model_interface.cc * Added extra arguments for inputing 16bit quantized models in modify_model_interface_main.cc * Modified 2 of the unit tests to be parametrized and covered 16bit models in modify_model_interface_test.cc Signed-off-by: Tamas Nyiri --- .../tools/optimize/modify_model_interface.cc | 107 ++++++--- .../tools/optimize/modify_model_interface.h | 2 +- .../optimize/modify_model_interface_main.cc | 26 ++- .../optimize/modify_model_interface_test.cc | 203 +++++++++--------- 4 files changed, 183 insertions(+), 155 deletions(-) diff --git a/tensorflow/lite/tools/optimize/modify_model_interface.cc b/tensorflow/lite/tools/optimize/modify_model_interface.cc index 9451483b79d..cadb0708cf4 100644 --- a/tensorflow/lite/tools/optimize/modify_model_interface.cc +++ b/tensorflow/lite/tools/optimize/modify_model_interface.cc @@ -46,7 +46,8 @@ struct TensorOpTensor { // Finds float tensors that are model inputs and is consumed by a quantize Op. // The returned TensorOpTensor should have reverse order. std::vector GetInputTensors(ModelT* model, - ErrorReporter* error_reporter) { + ErrorReporter* error_reporter, + const TensorType& input_type) { std::vector result; // Get all input tensors. for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs.size(); @@ -71,7 +72,7 @@ std::vector GetInputTensors(ModelT* model, continue; } if (op_code != BuiltinOperator_QUANTIZE) { - // Current only support INT8 quantized models. + // Currently only supports INT8 and INT16 quantized models. TF_LITE_REPORT_ERROR( error_reporter, "modify_model_interface called on a model without quant/dequant."); @@ -85,10 +86,16 @@ std::vector GetInputTensors(ModelT* model, } const int model_input_index = input_tensors[input_tensor]; TensorT* quant_output = subgraph->tensors[op->outputs[0]].get(); - if (quant_output->type != TensorType_INT8) { + if (quant_output->type != TensorType_INT8 && quant_output->type != TensorType_INT16) { TF_LITE_REPORT_ERROR(error_reporter, - "modify_model_interface currently only support " - "int8 quantized models."); + "modify_model_interface currently only supports " + "int8 and int16 quantized models."); + } + if (quant_output->type != input_type) { + if (!(quant_output->type == TensorType_INT8 && input_type == TensorType_UINT8)) { + TF_LITE_REPORT_ERROR(error_reporter, + "Model's type incompatible with output type argument."); + } } if (quant_output->quantization == nullptr) { continue; @@ -103,7 +110,8 @@ std::vector GetInputTensors(ModelT* model, // Finds float tensors that are model output and is consumed by a dequantize Op. // The returned TensorOpTensor should have reverse order. std::vector GetOutputTensors(ModelT* model, - ErrorReporter* error_reporter) { + ErrorReporter* error_reporter, + const TensorType& output_type) { std::vector result; // Get all output tensors. for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs.size(); @@ -128,7 +136,7 @@ std::vector GetOutputTensors(ModelT* model, continue; } if (op_code != BuiltinOperator_DEQUANTIZE) { - // Current only support INT8 quantized models. + // Currently only supports INT8 and INT16 quantized models. TF_LITE_REPORT_ERROR( error_reporter, "modify_model_interface called on a model without quant/dequant."); @@ -142,13 +150,19 @@ std::vector GetOutputTensors(ModelT* model, } const int model_output_index = output_tensors[output_tensor]; TensorT* dequant_input = subgraph->tensors[op->inputs[0]].get(); - if (dequant_input->type != TensorType_INT8) { - // Current only support INT8 quantized models. + if (dequant_input->type != TensorType_INT8 && dequant_input->type != TensorType_INT16) { + // Currently only supports INT8 and INT16 quantized models. TF_LITE_REPORT_ERROR(error_reporter, - "modify_model_interface currently only support " - "int8 quantized models."); + "modify_model_interface currently only supports " + "int8 and int16 quantized models."); return {}; } + if (dequant_input->type != output_type) { + if (!(dequant_input->type == TensorType_INT8 && output_type == TensorType_UINT8)) { + TF_LITE_REPORT_ERROR(error_reporter, + "Model's type incompatible with output type argument."); + } + } if (dequant_input->quantization == nullptr) { continue; } @@ -302,28 +316,36 @@ TfLiteStatus ModifyModelInterface(flatbuffers::FlatBufferBuilder* builder, tflite::StderrReporter error_reporter; const int original_number_tensors = GetOriginalNumberOfTensors(model, &error_reporter); - // Find float tensors that are model output and is consumed by a float to int8 + // Finds float tensors that are model output and are consumed by a float to int8/int16 // quantize Op. // Do output first since the tensors are added into input first., std::vector outputs = - GetOutputTensors(model, &error_reporter); - if (output_type == TensorType_UINT8) { - SetOutputTypeToUINT8(model, outputs); - } else if (output_type == TensorType_INT8) { - RemoveOutputTensor(model, outputs, original_number_tensors); - } else { - return kTfLiteError; + GetOutputTensors(model, &error_reporter, output_type); + switch (output_type) { + case TensorType_UINT8: + SetOutputTypeToUINT8(model, outputs); + break; + case TensorType_INT8: + case TensorType_INT16: + RemoveOutputTensor(model, outputs); + break; + default: + return kTfLiteError; } - // Find float tensors that are model input and is consumed by a float to int8 + // Find float tensors that are model input and is consumed by a float to int8/int16 // quantize Op. - std::vector inputs = GetInputTensors(model, &error_reporter); - if (input_type == TensorType_UINT8) { - SetInputTypeToUINT8(model, inputs); - } else if (input_type == TensorType_INT8) { - RemoveInputTensor(model, inputs, original_number_tensors); - } else { - return kTfLiteError; + std::vector inputs = GetInputTensors(model, &error_reporter, input_type); + switch (input_type) { + case TensorType_UINT8: + SetInputTypeToUINT8(model, inputs); + break; + case TensorType_INT8: + case TensorType_INT16: + RemoveInputTensor(model, inputs); + break; + default: + return kTfLiteError; } // Write to builder. @@ -340,11 +362,13 @@ TfLiteStatus ModifyModelInterface(const string& input_file, const TensorType& output_type) { // Sanity Check if (input_type != tflite::TensorType_INT8 && - input_type != tflite::TensorType_UINT8) { + input_type != tflite::TensorType_UINT8 && + input_type != tflite::TensorType_INT16) { return kTfLiteError; } if (output_type != tflite::TensorType_INT8 && - output_type != tflite::TensorType_UINT8) { + output_type != tflite::TensorType_UINT8 && + output_type != tflite::TensorType_INT16) { return kTfLiteError; } @@ -357,13 +381,26 @@ TfLiteStatus ModifyModelInterface(const string& input_file, absl::make_unique(); flatbuffers::FlatBufferBuilder builder; - tflite::TensorType input_override_type = tflite::TensorType_INT8; - if (input_type == tflite::TensorType_UINT8) { - input_override_type = tflite::TensorType_UINT8; + tflite::TensorType input_override_type; + tflite::TensorType output_override_type; + + switch (input_type) { + case tflite::TensorType_UINT8: + case tflite::TensorType_INT8: + case tflite::TensorType_INT16: + input_override_type = input_type; + break; + default: + return kTfLiteError; } - tflite::TensorType output_override_type = tflite::TensorType_INT8; - if (output_type == tflite::TensorType_UINT8) { - output_override_type = tflite::TensorType_UINT8; + switch (output_type) { + case tflite::TensorType_UINT8: + case tflite::TensorType_INT8: + case tflite::TensorType_INT16: + output_override_type = output_type; + break; + default: + return kTfLiteError; } auto status = ModifyModelInterface(&builder, tflite_model.get(), diff --git a/tensorflow/lite/tools/optimize/modify_model_interface.h b/tensorflow/lite/tools/optimize/modify_model_interface.h index 170e0e73a67..b3a39d63801 100644 --- a/tensorflow/lite/tools/optimize/modify_model_interface.h +++ b/tensorflow/lite/tools/optimize/modify_model_interface.h @@ -24,7 +24,7 @@ namespace optimize { // Changes the interface of a quantized model. This method allows the users to // replace float interface with other types. // This populates the builder with the new model. -// Currently only int8 and unit8 are supported. +// Currently only int8, int16 and unit8 are supported. // // Note: This is a private API, subject to change. TfLiteStatus ModifyModelInterface(flatbuffers::FlatBufferBuilder* builder, diff --git a/tensorflow/lite/tools/optimize/modify_model_interface_main.cc b/tensorflow/lite/tools/optimize/modify_model_interface_main.cc index 24674a1b341..18e210332f0 100644 --- a/tensorflow/lite/tools/optimize/modify_model_interface_main.cc +++ b/tensorflow/lite/tools/optimize/modify_model_interface_main.cc @@ -25,24 +25,22 @@ int main(int argc, char** argv) { return 1; } - if (strcmp(argv[3], "uint8") && strcmp(argv[3], "int8")) { - printf("Only support uint8 and int8 for input interface"); - return 1; - } - - if (strcmp(argv[4], "uint8") && strcmp(argv[4], "int8")) { - printf("Only support uint8 and int8 for output interface"); - return 1; - } + const std::unordered_map supported_types + { + { "uint8", tflite::TensorType_UINT8 }, + { "int8", tflite::TensorType_INT8 }, + { "int16", tflite::TensorType_INT16 } + }; tflite::TensorType input = tflite::TensorType_INT8; tflite::TensorType output = tflite::TensorType_INT8; - if (!strcmp(argv[3], "uint8")) { - input = tflite::TensorType_UINT8; - } - if (!strcmp(argv[4], "uint8")) { - output = tflite::TensorType_UINT8; + try { + input = supported_types.at(argv[3]); + output = supported_types.at(argv[4]); + } catch (const std::out_of_range&) { + printf("Only supports uint8, int8 and int16 for input and output interfaces"); + return 1; } tflite::optimize::ModifyModelInterface(argv[1], argv[2], input, output); diff --git a/tensorflow/lite/tools/optimize/modify_model_interface_test.cc b/tensorflow/lite/tools/optimize/modify_model_interface_test.cc index 5a04f28f638..ca0699bcea2 100644 --- a/tensorflow/lite/tools/optimize/modify_model_interface_test.cc +++ b/tensorflow/lite/tools/optimize/modify_model_interface_test.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include + #include "absl/memory/memory.h" #include "tensorflow/lite/model.h" #include "tensorflow/lite/schema/schema_generated.h" @@ -29,7 +30,7 @@ namespace { using ::testing::ElementsAreArray; // Create a model with 1 quant, 1 FC, 1 dequant -std::unique_ptr CreateModelSingleInputOutput() { +std::unique_ptr CreateModelSingleInputOutput(const TensorType& quantization_type) { auto model = absl::make_unique(); auto subgraph = absl::make_unique(); auto buffer = absl::make_unique(); @@ -85,7 +86,7 @@ std::unique_ptr CreateModelSingleInputOutput() { tensor_0->quantization->zero_point.push_back(28); tensor_0->name = "tensor_0"; tensor_0->shape = {}; - tensor_0->type = TensorType_INT8; + tensor_0->type = quantization_type; auto tensor_1 = absl::make_unique(); tensor_1->quantization = absl::make_unique(); @@ -93,7 +94,7 @@ std::unique_ptr CreateModelSingleInputOutput() { tensor_1->quantization->zero_point.push_back(50); tensor_1->name = "tensor_1"; tensor_1->shape = {}; - tensor_1->type = TensorType_INT8; + tensor_1->type = quantization_type; auto tensor_2 = absl::make_unique(); tensor_2->name = "tensor_2"; @@ -118,7 +119,7 @@ std::unique_ptr CreateModelSingleInputOutput() { // Create a model with 2 quant, 1 FC, 2 dequant // The model mimics the behavior of the quantize_model.cc. -std::unique_ptr CreateModelMultipleInputOutput() { +std::unique_ptr CreateModelMultipleInputOutput(const TensorType& quantization_type) { auto model = absl::make_unique(); auto subgraph = absl::make_unique(); auto buffer = absl::make_unique(); @@ -183,7 +184,7 @@ std::unique_ptr CreateModelMultipleInputOutput() { tensor_0->quantization->zero_point.push_back(28); tensor_0->name = "tensor_0"; tensor_0->shape = {}; - tensor_0->type = TensorType_INT8; + tensor_0->type = quantization_type; auto tensor_1 = absl::make_unique(); tensor_1->quantization = absl::make_unique(); @@ -191,7 +192,7 @@ std::unique_ptr CreateModelMultipleInputOutput() { tensor_1->quantization->zero_point.push_back(50); tensor_1->name = "tensor_1"; tensor_1->shape = {}; - tensor_1->type = TensorType_INT8; + tensor_1->type = quantization_type; auto tensor_2 = absl::make_unique(); tensor_2->quantization = absl::make_unique(); @@ -199,7 +200,7 @@ std::unique_ptr CreateModelMultipleInputOutput() { tensor_2->quantization->zero_point.push_back(28); tensor_2->name = "tensor_2"; tensor_2->shape = {}; - tensor_2->type = TensorType_INT8; + tensor_2->type = quantization_type; auto tensor_3 = absl::make_unique(); tensor_3->quantization = absl::make_unique(); @@ -207,7 +208,7 @@ std::unique_ptr CreateModelMultipleInputOutput() { tensor_3->quantization->zero_point.push_back(50); tensor_3->name = "tensor_3"; tensor_3->shape = {}; - tensor_3->type = TensorType_INT8; + tensor_3->type = quantization_type; auto tensor_4 = absl::make_unique(); tensor_4->name = "tensor_4"; @@ -290,8 +291,95 @@ std::unique_ptr CreateFloatModel() { return model; } +struct ModelInterface: + ::testing::TestWithParam {}; + +TEST_P(ModelInterface, SingleInputOutput) { + TensorType quantization_type = GetParam(); + + auto model = CreateModelSingleInputOutput(quantization_type); + + // Change model type. + flatbuffers::FlatBufferBuilder builder; + EXPECT_EQ(ModifyModelInterface(&builder, model.get(), quantization_type, + quantization_type), + kTfLiteOk); + + // Verify results. + EXPECT_EQ(model->operator_codes.size(), 3); + EXPECT_EQ(model->subgraphs.size(), 1); + EXPECT_EQ(model->subgraphs[0]->operators.size(), 1); + EXPECT_EQ(model->subgraphs[0]->tensors.size(), 2); + EXPECT_EQ(model->buffers.size(), 1); + + EXPECT_EQ(model->subgraphs[0]->inputs.size(), 1); + EXPECT_EQ(model->subgraphs[0]->inputs[0], 0); + EXPECT_EQ(model->subgraphs[0]->outputs.size(), 1); + EXPECT_EQ(model->subgraphs[0]->outputs[0], 1); +} + +TEST_P(ModelInterface, MutipleInputOutput) { + + TensorType quantization_type = GetParam(); + + auto model = CreateModelMultipleInputOutput(quantization_type); + + // Change model type. + flatbuffers::FlatBufferBuilder builder; + EXPECT_EQ(ModifyModelInterface(&builder, model.get(), quantization_type, + quantization_type), + kTfLiteOk); + + // Verify results. + EXPECT_EQ(model->operator_codes.size(), 3); + EXPECT_EQ(model->subgraphs.size(), 1); + EXPECT_EQ(model->subgraphs[0]->operators.size(), 1); + EXPECT_EQ(model->subgraphs[0]->tensors.size(), 4); + EXPECT_EQ(model->subgraphs[0]->inputs.size(), 2); + EXPECT_EQ(model->subgraphs[0]->inputs[0], 0); + EXPECT_EQ(model->subgraphs[0]->inputs[1], 1); + EXPECT_EQ(model->subgraphs[0]->outputs.size(), 2); + EXPECT_EQ(model->subgraphs[0]->outputs[0], 2); + EXPECT_EQ(model->subgraphs[0]->outputs[1], 3); + EXPECT_EQ(model->buffers.size(), 1); + + // Tensors, + EXPECT_EQ(model->subgraphs[0]->tensors[0]->name, "tensor_0"); + EXPECT_EQ(model->subgraphs[0]->tensors[0]->type, quantization_type); + EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[0]->quantization->scale[0], + 0.35); + EXPECT_EQ(model->subgraphs[0]->tensors[0]->quantization->zero_point[0], 28); + + EXPECT_EQ(model->subgraphs[0]->tensors[1]->name, "tensor_1"); + EXPECT_EQ(model->subgraphs[0]->tensors[1]->type, quantization_type); + EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[1]->quantization->scale[0], + 0.12); + EXPECT_EQ(model->subgraphs[0]->tensors[1]->quantization->zero_point[0], 50); + + EXPECT_EQ(model->subgraphs[0]->tensors[2]->name, "tensor_2"); + EXPECT_EQ(model->subgraphs[0]->tensors[2]->type, quantization_type); + EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[2]->quantization->scale[0], + 0.45); + EXPECT_EQ(model->subgraphs[0]->tensors[2]->quantization->zero_point[0], 28); + + EXPECT_EQ(model->subgraphs[0]->tensors[3]->name, "tensor_3"); + EXPECT_EQ(model->subgraphs[0]->tensors[3]->type, quantization_type); + EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[3]->quantization->scale[0], + 0.22); + EXPECT_EQ(model->subgraphs[0]->tensors[3]->quantization->zero_point[0], 50); + + // Ops. + EXPECT_EQ(model->subgraphs[0]->operators[0]->opcode_index, 1); +} + +INSTANTIATE_TEST_SUITE_P( + MultipleInputOutputTests, + ModelInterface, + ::testing::Values(TensorType_INT8, TensorType_INT16) +); + TEST(ModelInterface, Uint8SingleInputOutput) { - auto model = CreateModelSingleInputOutput(); + auto model = CreateModelSingleInputOutput(TensorType_INT8); // Ops. EXPECT_EQ(model->subgraphs[0]->operators[0]->opcode_index, 0); @@ -329,52 +417,8 @@ TEST(ModelInterface, Uint8SingleInputOutput) { EXPECT_EQ(model->subgraphs[0]->operators[2]->opcode_index, 0); } -TEST(ModelInterface, Int8SingleInputOutput) { - auto model = CreateModelSingleInputOutput(); - - // Change model type. - flatbuffers::FlatBufferBuilder builder; - EXPECT_EQ(ModifyModelInterface(&builder, model.get(), TensorType_INT8, - TensorType_INT8), - kTfLiteOk); - - // Verify results. - EXPECT_EQ(model->operator_codes.size(), 3); - EXPECT_EQ(model->subgraphs.size(), 1); - EXPECT_EQ(model->subgraphs[0]->operators.size(), 1); - EXPECT_EQ(model->subgraphs[0]->tensors.size(), 2); - EXPECT_EQ(model->buffers.size(), 1); - - EXPECT_EQ(model->subgraphs[0]->inputs.size(), 1); - EXPECT_EQ(model->subgraphs[0]->inputs[0], 0); - EXPECT_EQ(model->subgraphs[0]->outputs.size(), 1); - EXPECT_EQ(model->subgraphs[0]->outputs[0], 1); -} - -TEST(ModelInterface, MixedTypeSingleInputOutput) { - auto model = CreateModelSingleInputOutput(); - - // Change model type. - flatbuffers::FlatBufferBuilder builder; - EXPECT_EQ(ModifyModelInterface(&builder, model.get(), TensorType_UINT8, - TensorType_INT8), - kTfLiteOk); - - // Verify results. - EXPECT_EQ(model->operator_codes.size(), 3); - EXPECT_EQ(model->subgraphs.size(), 1); - EXPECT_EQ(model->subgraphs[0]->operators.size(), 2); - EXPECT_EQ(model->subgraphs[0]->tensors.size(), 3); - EXPECT_EQ(model->buffers.size(), 1); - - EXPECT_EQ(model->subgraphs[0]->inputs.size(), 1); - EXPECT_EQ(model->subgraphs[0]->inputs[0], 2); - EXPECT_EQ(model->subgraphs[0]->outputs.size(), 1); - EXPECT_EQ(model->subgraphs[0]->outputs[0], 1); -} - TEST(ModelInterface, Uint8MutipleInputOutput) { - auto model = CreateModelMultipleInputOutput(); + auto model = CreateModelMultipleInputOutput(TensorType_INT8); // Ops. EXPECT_EQ(model->subgraphs[0]->operators[0]->opcode_index, 0); @@ -436,57 +480,6 @@ TEST(ModelInterface, Uint8MutipleInputOutput) { EXPECT_EQ(model->subgraphs[0]->operators[4]->opcode_index, 0); } -TEST(ModelInterface, Int8MutipleInputOutput) { - auto model = CreateModelMultipleInputOutput(); - - // Change model type. - flatbuffers::FlatBufferBuilder builder; - EXPECT_EQ(ModifyModelInterface(&builder, model.get(), TensorType_INT8, - TensorType_INT8), - kTfLiteOk); - - // Verify results. - EXPECT_EQ(model->operator_codes.size(), 3); - EXPECT_EQ(model->subgraphs.size(), 1); - EXPECT_EQ(model->subgraphs[0]->operators.size(), 1); - EXPECT_EQ(model->subgraphs[0]->tensors.size(), 4); - EXPECT_EQ(model->subgraphs[0]->inputs.size(), 2); - EXPECT_EQ(model->subgraphs[0]->inputs[0], 0); - EXPECT_EQ(model->subgraphs[0]->inputs[1], 1); - EXPECT_EQ(model->subgraphs[0]->outputs.size(), 2); - EXPECT_EQ(model->subgraphs[0]->outputs[0], 2); - EXPECT_EQ(model->subgraphs[0]->outputs[1], 3); - EXPECT_EQ(model->buffers.size(), 1); - - // Tensors, - EXPECT_EQ(model->subgraphs[0]->tensors[0]->name, "tensor_0"); - EXPECT_EQ(model->subgraphs[0]->tensors[0]->type, TensorType_INT8); - EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[0]->quantization->scale[0], - 0.35); - EXPECT_EQ(model->subgraphs[0]->tensors[0]->quantization->zero_point[0], 28); - - EXPECT_EQ(model->subgraphs[0]->tensors[1]->name, "tensor_1"); - EXPECT_EQ(model->subgraphs[0]->tensors[1]->type, TensorType_INT8); - EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[1]->quantization->scale[0], - 0.12); - EXPECT_EQ(model->subgraphs[0]->tensors[1]->quantization->zero_point[0], 50); - - EXPECT_EQ(model->subgraphs[0]->tensors[2]->name, "tensor_2"); - EXPECT_EQ(model->subgraphs[0]->tensors[2]->type, TensorType_INT8); - EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[2]->quantization->scale[0], - 0.45); - EXPECT_EQ(model->subgraphs[0]->tensors[2]->quantization->zero_point[0], 28); - - EXPECT_EQ(model->subgraphs[0]->tensors[3]->name, "tensor_3"); - EXPECT_EQ(model->subgraphs[0]->tensors[3]->type, TensorType_INT8); - EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[3]->quantization->scale[0], - 0.22); - EXPECT_EQ(model->subgraphs[0]->tensors[3]->quantization->zero_point[0], 50); - - // Ops. - EXPECT_EQ(model->subgraphs[0]->operators[0]->opcode_index, 1); -} - TEST(ModelInterface, Float) { // Create the model. std::unique_ptr input_model_t = CreateFloatModel(); From 996b28e8341ff5698535ea05ee1b33d82e9d7bb0 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Sat, 6 Jun 2020 20:45:54 +0300 Subject: [PATCH 0025/2522] addressing the review issues --- tensorflow/python/framework/sparse_tensor.py | 9 +++++---- tensorflow/python/framework/sparse_tensor_test.py | 5 +++++ .../api/golden/v1/tensorflow.sparse.-sparse-tensor.pbtxt | 4 ++++ .../api/golden/v2/tensorflow.sparse.-sparse-tensor.pbtxt | 4 ++++ 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/framework/sparse_tensor.py b/tensorflow/python/framework/sparse_tensor.py index ab7afabeae5..1592f7d6604 100644 --- a/tensorflow/python/framework/sparse_tensor.py +++ b/tensorflow/python/framework/sparse_tensor.py @@ -182,14 +182,15 @@ class SparseTensor(internal.NativeObject, composite_tensor.CompositeTensor): """Returns a copy of `self` with `values` replaced by `new_values`. This method produces a new `SparseTensor` that has the same nonzero - indices, but updated values. + `indices` and same `dense_shape`, but updated values. Args: - new_values: The values of the new `SparseTensor. Needs to have the same - shape as the current `.values` `Tensor`. + new_values: The values of the new `SparseTensor`. Needs to have the same + shape as the current `.values` `Tensor`. May have a different type + than the current `values`. Returns: - A `SparseTensor` with identical indices but updated values. + A `SparseTensor` with identical indices and shape but updated values. """ return SparseTensor(self._indices, new_values, self._dense_shape) diff --git a/tensorflow/python/framework/sparse_tensor_test.py b/tensorflow/python/framework/sparse_tensor_test.py index 71693087c28..736543f669b 100644 --- a/tensorflow/python/framework/sparse_tensor_test.py +++ b/tensorflow/python/framework/sparse_tensor_test.py @@ -24,6 +24,7 @@ import numpy as np from tensorflow.python.eager import context from tensorflow.python.eager import def_function from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape @@ -105,6 +106,10 @@ class SparseTensorTest(test_util.TensorFlowTestCase): self.assertAllEqual(new_tensor.values, [5.0, 1.0]) self.assertAllEqual(new_tensor.dense_shape, source.dense_shape) + # ensure new value's shape is checked + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + source.with_values([[5.0, 1.0]]) + class ConvertToTensorOrSparseTensorTest(test_util.TensorFlowTestCase): diff --git a/tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-tensor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-tensor.pbtxt index a49cd1ccc4d..e13dad8be69 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-tensor.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-tensor.pbtxt @@ -48,6 +48,10 @@ tf_class { name: "from_value" argspec: "args=[\'cls\', \'sparse_tensor_value\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "with_values" + argspec: "args=[\'self\', \'new_values\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_shape" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-tensor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-tensor.pbtxt index a49cd1ccc4d..e13dad8be69 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-tensor.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-tensor.pbtxt @@ -48,6 +48,10 @@ tf_class { name: "from_value" argspec: "args=[\'cls\', \'sparse_tensor_value\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "with_values" + argspec: "args=[\'self\', \'new_values\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_shape" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" From efe0812a463aa3dcbac53d2d52c623d60d607634 Mon Sep 17 00:00:00 2001 From: Shawn Presser Date: Tue, 9 Jun 2020 05:18:52 -0700 Subject: [PATCH 0026/2522] Make http://metadata.google.internal configurable The TPU client library has a hardcoded dependency on `http://metadata.google.internal`. We happen to need to redirect this URL to a different VM. Since the URL is hardcoded, we're forced to use a fragile code patch against our version of Tensorflow, which isn't ideal, or rely on `/etc/hosts` to forward `metadata.google.internal`, which causes unexpected global side effects to the user's VM. (For example, GCE uses `metadata.google.internal` to distribute SSH keys to GCE VMs, which breaks when we reroute `metadata.google.internal` using `/etc/hosts`.) oauth2client solves this by making `http://metadata.google.internal` configurable via the `GCE_METADATA_IP` environment variable. The final url becomes `'http://' + os.getenv('GCE_METADATA_IP', '169.254.169.254')`: https://github.com/googleapis/oauth2client/blob/50d20532a748f18e53f7d24ccbe6647132c979a9/oauth2client/client.py#L111 Following oauth2client's lead, this PR makes `http://metadata.google.internal` configurable for Tensorflow users via `GCE_METADATA_IP`: ```py _GCE_METADATA_URL_ENV_VARIABLE = 'GCE_METADATA_IP' # ... def _gce_metadata_endpoint(): return 'http://' + os.environ.get( _GCE_METADATA_URL_ENV_VARIABLE, 'metadata.google.internal') ``` `GCE_METADATA_IP` might seem like an awkward name. After all, `metadata.google.internal` is a URL, not an IP address. But it's probably best to match oauth2client's naming convention. That way users won't need to worry about setting two slightly-different variable names to configure both oauth2client and Tensorflow. --- tensorflow/python/tpu/client/client.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/tpu/client/client.py b/tensorflow/python/tpu/client/client.py index bc693cbef68..8f48298345e 100644 --- a/tensorflow/python/tpu/client/client.py +++ b/tensorflow/python/tpu/client/client.py @@ -38,7 +38,7 @@ _GKE_ENV_VARIABLE = 'KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS' _ENDPOINTS_SEPARATOR = ',' _DEFAULT_ENV_VARIABLE = 'TPU_NAME' _DISCOVERY_SERVICE_URL_ENV_VARIABLE = 'TPU_API_DISCOVERY_URL' -_GCE_METADATA_ENDPOINT = 'http://metadata.google.internal' +_GCE_METADATA_URL_ENV_VARIABLE = 'GCE_METADATA_IP' _DEFAULT_ENDPOINT_PORT = '8470' @@ -46,9 +46,15 @@ def _environment_discovery_url(): return os.environ.get(_DISCOVERY_SERVICE_URL_ENV_VARIABLE) +def _gce_metadata_endpoint(): + return 'http://' + os.environ.get( + _GCE_METADATA_URL_ENV_VARIABLE, + 'metadata.google.internal') + + def _request_compute_metadata(path): req = request.Request( - '%s/computeMetadata/v1/%s' % (_GCE_METADATA_ENDPOINT, path), + '%s/computeMetadata/v1/%s' % (_gce_metadata_endpoint(), path), headers={'Metadata-Flavor': 'Google'}) resp = request.urlopen(req) return _as_text(resp.read()) From 2e4184f8294110748787c635a8a545bbef95725e Mon Sep 17 00:00:00 2001 From: Tamas Nyiri Date: Wed, 10 Jun 2020 00:20:34 +0100 Subject: [PATCH 0027/2522] Refactored to reflect changes in upstream --- .../tools/optimize/modify_model_interface.cc | 12 +- .../optimize/modify_model_interface_test.cc | 198 +++++++++--------- 2 files changed, 108 insertions(+), 102 deletions(-) diff --git a/tensorflow/lite/tools/optimize/modify_model_interface.cc b/tensorflow/lite/tools/optimize/modify_model_interface.cc index cadb0708cf4..eead4999ca6 100644 --- a/tensorflow/lite/tools/optimize/modify_model_interface.cc +++ b/tensorflow/lite/tools/optimize/modify_model_interface.cc @@ -302,9 +302,9 @@ std::unique_ptr CreateMutableModelFromFile( return copied_model; } -int GetOriginalNumberOfTensors(ModelT* model, ErrorReporter* error_reporter) { - std::vector outputs = GetOutputTensors(model, error_reporter); - std::vector inputs = GetInputTensors(model, error_reporter); +int GetOriginalNumberOfTensors(ModelT* model, ErrorReporter* error_reporter, const TensorType& input_type, const TensorType& output_type) { + std::vector outputs = GetOutputTensors(model, error_reporter, output_type); + std::vector inputs = GetInputTensors(model, error_reporter, input_type); return model->subgraphs[0]->tensors.size() - outputs.size() - inputs.size(); } @@ -315,7 +315,7 @@ TfLiteStatus ModifyModelInterface(flatbuffers::FlatBufferBuilder* builder, const TensorType& output_type) { tflite::StderrReporter error_reporter; const int original_number_tensors = - GetOriginalNumberOfTensors(model, &error_reporter); + GetOriginalNumberOfTensors(model, &error_reporter, input_type, output_type); // Finds float tensors that are model output and are consumed by a float to int8/int16 // quantize Op. // Do output first since the tensors are added into input first., @@ -327,7 +327,7 @@ TfLiteStatus ModifyModelInterface(flatbuffers::FlatBufferBuilder* builder, break; case TensorType_INT8: case TensorType_INT16: - RemoveOutputTensor(model, outputs); + RemoveOutputTensor(model, outputs, original_number_tensors); break; default: return kTfLiteError; @@ -342,7 +342,7 @@ TfLiteStatus ModifyModelInterface(flatbuffers::FlatBufferBuilder* builder, break; case TensorType_INT8: case TensorType_INT16: - RemoveInputTensor(model, inputs); + RemoveInputTensor(model, inputs, original_number_tensors); break; default: return kTfLiteError; diff --git a/tensorflow/lite/tools/optimize/modify_model_interface_test.cc b/tensorflow/lite/tools/optimize/modify_model_interface_test.cc index 1df0a36c727..acf9bf1a02d 100644 --- a/tensorflow/lite/tools/optimize/modify_model_interface_test.cc +++ b/tensorflow/lite/tools/optimize/modify_model_interface_test.cc @@ -298,46 +298,6 @@ TEST_P(ModelInterface, SingleInputOutput) { auto model = CreateQuantizedModelSingleInputOutput(quantization_type); - // Change model type. - flatbuffers::FlatBufferBuilder builder; - EXPECT_EQ(ModifyModelInterface(&builder, model.get(), quantization_type, - quantization_type), - kTfLiteOk); - - // Verify results. - EXPECT_EQ(model->subgraphs.size(), 1); - EXPECT_EQ(model->subgraphs[0]->tensors.size(), 4); - EXPECT_EQ(model->subgraphs[0]->inputs.size(), 1); - EXPECT_EQ(model->subgraphs[0]->inputs[0], 0); - EXPECT_EQ(model->subgraphs[0]->outputs.size(), 1); - EXPECT_EQ(model->subgraphs[0]->outputs[0], 3); - EXPECT_EQ(model->operator_codes.size(), 3); - EXPECT_EQ(model->subgraphs[0]->operators.size(), 3); - EXPECT_EQ(model->subgraphs[0]->operators[0]->opcode_index, 0); - EXPECT_EQ(model->subgraphs[0]->operators[1]->opcode_index, 1); - EXPECT_EQ(model->subgraphs[0]->operators[2]->opcode_index, 0); - - auto input_quant_op = model->subgraphs[0]->operators[0].get(); - auto input = model->subgraphs[0]->tensors[input_quant_op->inputs[0]].get(); - EXPECT_EQ(input->name, "tensor_0"); - EXPECT_EQ(input->type, TensorType_UINT8); - EXPECT_FLOAT_EQ(input->quantization->scale[0], 0.35); - EXPECT_EQ(input->quantization->zero_point[0], 156); - - auto output_quant_op = model->subgraphs[0]->operators[2].get(); - auto output = model->subgraphs[0]->tensors[output_quant_op->outputs[0]].get(); - EXPECT_EQ(output->name, "tensor_3"); - EXPECT_EQ(output->type, TensorType_UINT8); - EXPECT_FLOAT_EQ(output->quantization->scale[0], 0.12); - EXPECT_EQ(output->quantization->zero_point[0], 178); -} - -TEST_P(ModelInterface, MutipleInputOutput) { - - TensorType quantization_type = GetParam(); - - auto model = CreateModelMultipleInputOutput(quantization_type); - // Change model type. flatbuffers::FlatBufferBuilder builder; EXPECT_EQ(ModifyModelInterface(&builder, model.get(), quantization_type, @@ -361,24 +321,84 @@ TEST_P(ModelInterface, MutipleInputOutput) { auto input = model->subgraphs[0]->tensors[fc_op->inputs[0]].get(); EXPECT_EQ(input->name, "tensor_1"); - EXPECT_EQ(input->type, TensorType_INT8); + EXPECT_EQ(input->type, quantization_type); EXPECT_FLOAT_EQ(input->quantization->scale[0], 0.35); EXPECT_EQ(input->quantization->zero_point[0], 28); auto output = model->subgraphs[0]->tensors[fc_op->outputs[0]].get(); EXPECT_EQ(output->name, "tensor_2"); - EXPECT_EQ(output->type, TensorType_INT8); + EXPECT_EQ(output->type, quantization_type); EXPECT_FLOAT_EQ(output->quantization->scale[0], 0.12); EXPECT_EQ(output->quantization->zero_point[0], 50); } +TEST_P(ModelInterface, MutipleInputOutput) { + + TensorType quantization_type = GetParam(); + + auto model = CreateQuantizedModelMultipleInputOutput(quantization_type); + + // Change model type. + flatbuffers::FlatBufferBuilder builder; + EXPECT_EQ(ModifyModelInterface(&builder, model.get(), quantization_type, + quantization_type), + kTfLiteOk); + + // Verify results. + EXPECT_EQ(model->subgraphs.size(), 1); + // TODO (b/158254056): Remove unused inputs and outputs from tensor list + // EXPECT_EQ(model->subgraphs[0]->tensors.size(), 4); + EXPECT_EQ(model->subgraphs[0]->tensors.size(), 6); + EXPECT_EQ(model->subgraphs[0]->inputs.size(), 2); + EXPECT_EQ(model->subgraphs[0]->inputs[0], 2); + EXPECT_EQ(model->subgraphs[0]->inputs[1], 3); + EXPECT_EQ(model->subgraphs[0]->outputs.size(), 2); + EXPECT_EQ(model->subgraphs[0]->outputs[0], 4); + EXPECT_EQ(model->subgraphs[0]->outputs[1], 5); + EXPECT_EQ(model->operator_codes.size(), 3); + EXPECT_EQ(model->subgraphs[0]->operators.size(), 1); + EXPECT_EQ(model->subgraphs[0]->operators[0]->opcode_index, 1); + + auto fc_op = model->subgraphs[0]->operators[0].get(); + + auto input_1 = model->subgraphs[0]->tensors[fc_op->inputs[0]].get(); + EXPECT_EQ(input_1->name, "tensor_2"); + EXPECT_EQ(input_1->type, quantization_type); + EXPECT_FLOAT_EQ(input_1->quantization->scale[0], 0.35); + EXPECT_EQ(input_1->quantization->zero_point[0], 28); + + auto input_2 = model->subgraphs[0]->tensors[fc_op->inputs[1]].get(); + EXPECT_EQ(input_2->name, "tensor_3"); + EXPECT_EQ(input_2->type, quantization_type); + EXPECT_FLOAT_EQ(input_2->quantization->scale[0], 0.12); + EXPECT_EQ(input_2->quantization->zero_point[0], 50); + + auto output_1 = model->subgraphs[0]->tensors[fc_op->outputs[0]].get(); + EXPECT_EQ(output_1->name, "tensor_4"); + EXPECT_EQ(output_1->type, quantization_type); + EXPECT_FLOAT_EQ(output_1->quantization->scale[0], 0.45); + EXPECT_EQ(output_1->quantization->zero_point[0], 28); + + auto output_2 = model->subgraphs[0]->tensors[fc_op->outputs[1]].get(); + EXPECT_EQ(output_2->name, "tensor_5"); + EXPECT_EQ(output_2->type, quantization_type); + EXPECT_FLOAT_EQ(output_2->quantization->scale[0], 0.22); + EXPECT_EQ(output_2->quantization->zero_point[0], 50); +} + +INSTANTIATE_TEST_SUITE_P( + MultipleInputOutputTests, + ModelInterface, + ::testing::Values(TensorType_INT8, TensorType_INT16) +); + TEST(ModelInterface, MixedTypeSingleInputOutput) { - auto model = CreateQuantizedModelSingleInputOutput(); + auto model = CreateQuantizedModelSingleInputOutput(TensorType_INT8); // Change model type. flatbuffers::FlatBufferBuilder builder; EXPECT_EQ(ModifyModelInterface(&builder, model.get(), TensorType_UINT8, - TensorType_UINT8), + TensorType_INT8), kTfLiteOk); // Verify results. @@ -408,8 +428,45 @@ TEST(ModelInterface, MixedTypeSingleInputOutput) { EXPECT_EQ(output->quantization->zero_point[0], 50); } +TEST(ModelInterface, Uint8SingleInputOutput) { + auto model = CreateQuantizedModelSingleInputOutput(TensorType_INT8); + + // Change model type. + flatbuffers::FlatBufferBuilder builder; + EXPECT_EQ(ModifyModelInterface(&builder, model.get(), TensorType_UINT8, + TensorType_UINT8), + kTfLiteOk); + + // Verify results. + EXPECT_EQ(model->subgraphs.size(), 1); + EXPECT_EQ(model->subgraphs[0]->tensors.size(), 4); + EXPECT_EQ(model->subgraphs[0]->inputs.size(), 1); + EXPECT_EQ(model->subgraphs[0]->inputs[0], 0); + EXPECT_EQ(model->subgraphs[0]->outputs.size(), 1); + EXPECT_EQ(model->subgraphs[0]->outputs[0], 3); + EXPECT_EQ(model->operator_codes.size(), 3); + EXPECT_EQ(model->subgraphs[0]->operators.size(), 3); + EXPECT_EQ(model->subgraphs[0]->operators[0]->opcode_index, 0); + EXPECT_EQ(model->subgraphs[0]->operators[1]->opcode_index, 1); + EXPECT_EQ(model->subgraphs[0]->operators[2]->opcode_index, 0); + + auto input_quant_op = model->subgraphs[0]->operators[0].get(); + auto input = model->subgraphs[0]->tensors[input_quant_op->inputs[0]].get(); + EXPECT_EQ(input->name, "tensor_0"); + EXPECT_EQ(input->type, TensorType_UINT8); + EXPECT_FLOAT_EQ(input->quantization->scale[0], 0.35); + EXPECT_EQ(input->quantization->zero_point[0], 156); + + auto output_quant_op = model->subgraphs[0]->operators[2].get(); + auto output = model->subgraphs[0]->tensors[output_quant_op->outputs[0]].get(); + EXPECT_EQ(output->name, "tensor_3"); + EXPECT_EQ(output->type, TensorType_UINT8); + EXPECT_FLOAT_EQ(output->quantization->scale[0], 0.12); + EXPECT_EQ(output->quantization->zero_point[0], 178); +} + TEST(ModelInterface, Uint8MutipleInputOutput) { - auto model = CreateQuantizedModelMultipleInputOutput(); + auto model = CreateQuantizedModelMultipleInputOutput(TensorType_INT8); // Change model type. flatbuffers::FlatBufferBuilder builder; @@ -465,57 +522,6 @@ TEST(ModelInterface, Uint8MutipleInputOutput) { EXPECT_EQ(output_2->quantization->zero_point[0], 178); } -TEST(ModelInterface, Int8MutipleInputOutput) { - auto model = CreateQuantizedModelMultipleInputOutput(); - - // Change model type. - flatbuffers::FlatBufferBuilder builder; - EXPECT_EQ(ModifyModelInterface(&builder, model.get(), TensorType_INT8, - TensorType_INT8), - kTfLiteOk); - - // Verify results. - EXPECT_EQ(model->subgraphs.size(), 1); - // TODO (b/158254056): Remove unused inputs and outputs from tensor list - // EXPECT_EQ(model->subgraphs[0]->tensors.size(), 4); - EXPECT_EQ(model->subgraphs[0]->tensors.size(), 6); - EXPECT_EQ(model->subgraphs[0]->inputs.size(), 2); - EXPECT_EQ(model->subgraphs[0]->inputs[0], 2); - EXPECT_EQ(model->subgraphs[0]->inputs[1], 3); - EXPECT_EQ(model->subgraphs[0]->outputs.size(), 2); - EXPECT_EQ(model->subgraphs[0]->outputs[0], 4); - EXPECT_EQ(model->subgraphs[0]->outputs[1], 5); - EXPECT_EQ(model->operator_codes.size(), 3); - EXPECT_EQ(model->subgraphs[0]->operators.size(), 1); - EXPECT_EQ(model->subgraphs[0]->operators[0]->opcode_index, 1); - - auto fc_op = model->subgraphs[0]->operators[0].get(); - - auto input_1 = model->subgraphs[0]->tensors[fc_op->inputs[0]].get(); - EXPECT_EQ(input_1->name, "tensor_2"); - EXPECT_EQ(input_1->type, TensorType_INT8); - EXPECT_FLOAT_EQ(input_1->quantization->scale[0], 0.35); - EXPECT_EQ(input_1->quantization->zero_point[0], 28); - - auto input_2 = model->subgraphs[0]->tensors[fc_op->inputs[1]].get(); - EXPECT_EQ(input_2->name, "tensor_3"); - EXPECT_EQ(input_2->type, TensorType_INT8); - EXPECT_FLOAT_EQ(input_2->quantization->scale[0], 0.12); - EXPECT_EQ(input_2->quantization->zero_point[0], 50); - - auto output_1 = model->subgraphs[0]->tensors[fc_op->outputs[0]].get(); - EXPECT_EQ(output_1->name, "tensor_4"); - EXPECT_EQ(output_1->type, TensorType_INT8); - EXPECT_FLOAT_EQ(output_1->quantization->scale[0], 0.45); - EXPECT_EQ(output_1->quantization->zero_point[0], 28); - - auto output_2 = model->subgraphs[0]->tensors[fc_op->outputs[1]].get(); - EXPECT_EQ(output_2->name, "tensor_5"); - EXPECT_EQ(output_2->type, TensorType_INT8); - EXPECT_FLOAT_EQ(output_2->quantization->scale[0], 0.22); - EXPECT_EQ(output_2->quantization->zero_point[0], 50); -} - TEST(ModelInterface, Float) { // Create the model. std::unique_ptr input_model_t = CreateFloatModel(); From 426c9839aafcccccb390842407da3465711e3fa5 Mon Sep 17 00:00:00 2001 From: Tamas Nyiri Date: Wed, 10 Jun 2020 10:27:54 +0100 Subject: [PATCH 0028/2522] added comment and wqminor stylistic changes --- .../tools/optimize/modify_model_interface.cc | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/tensorflow/lite/tools/optimize/modify_model_interface.cc b/tensorflow/lite/tools/optimize/modify_model_interface.cc index eead4999ca6..9e76257296e 100644 --- a/tensorflow/lite/tools/optimize/modify_model_interface.cc +++ b/tensorflow/lite/tools/optimize/modify_model_interface.cc @@ -45,9 +45,9 @@ struct TensorOpTensor { // Finds float tensors that are model inputs and is consumed by a quantize Op. // The returned TensorOpTensor should have reverse order. -std::vector GetInputTensors(ModelT* model, - ErrorReporter* error_reporter, - const TensorType& input_type) { +std::vector GetInputTensors(const TensorType& input_type, + ModelT* model, + ErrorReporter* error_reporter) { std::vector result; // Get all input tensors. for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs.size(); @@ -91,7 +91,10 @@ std::vector GetInputTensors(ModelT* model, "modify_model_interface currently only supports " "int8 and int16 quantized models."); } + // Usually the input model has to have the same quantization layers as the ones + // we're trying to remove. if (quant_output->type != input_type) { + //An exception from this is when we are setting the input or output type to UINT8. if (!(quant_output->type == TensorType_INT8 && input_type == TensorType_UINT8)) { TF_LITE_REPORT_ERROR(error_reporter, "Model's type incompatible with output type argument."); @@ -109,9 +112,9 @@ std::vector GetInputTensors(ModelT* model, // Finds float tensors that are model output and is consumed by a dequantize Op. // The returned TensorOpTensor should have reverse order. -std::vector GetOutputTensors(ModelT* model, - ErrorReporter* error_reporter, - const TensorType& output_type) { +std::vector GetOutputTensors(const TensorType& output_type, + ModelT* model, + ErrorReporter* error_reporter) { std::vector result; // Get all output tensors. for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs.size(); @@ -302,9 +305,9 @@ std::unique_ptr CreateMutableModelFromFile( return copied_model; } -int GetOriginalNumberOfTensors(ModelT* model, ErrorReporter* error_reporter, const TensorType& input_type, const TensorType& output_type) { - std::vector outputs = GetOutputTensors(model, error_reporter, output_type); - std::vector inputs = GetInputTensors(model, error_reporter, input_type); +int GetOriginalNumberOfTensors(const TensorType& input_type, const TensorType& output_type, ModelT* model, ErrorReporter* error_reporter) { + std::vector outputs = GetOutputTensors(output_type, model, error_reporter); + std::vector inputs = GetInputTensors(input_type, model, error_reporter); return model->subgraphs[0]->tensors.size() - outputs.size() - inputs.size(); } @@ -315,12 +318,12 @@ TfLiteStatus ModifyModelInterface(flatbuffers::FlatBufferBuilder* builder, const TensorType& output_type) { tflite::StderrReporter error_reporter; const int original_number_tensors = - GetOriginalNumberOfTensors(model, &error_reporter, input_type, output_type); + GetOriginalNumberOfTensors(input_type, output_type, model, &error_reporter); // Finds float tensors that are model output and are consumed by a float to int8/int16 // quantize Op. // Do output first since the tensors are added into input first., std::vector outputs = - GetOutputTensors(model, &error_reporter, output_type); + GetOutputTensors(output_type, model, &error_reporter); switch (output_type) { case TensorType_UINT8: SetOutputTypeToUINT8(model, outputs); @@ -335,7 +338,7 @@ TfLiteStatus ModifyModelInterface(flatbuffers::FlatBufferBuilder* builder, // Find float tensors that are model input and is consumed by a float to int8/int16 // quantize Op. - std::vector inputs = GetInputTensors(model, &error_reporter, input_type); + std::vector inputs = GetInputTensors(input_type, model, &error_reporter); switch (input_type) { case TensorType_UINT8: SetInputTypeToUINT8(model, inputs); From 1efc6cc1353cd97780a726f6ea37ba6d2f2753fd Mon Sep 17 00:00:00 2001 From: Tamas Nyiri Date: Wed, 10 Jun 2020 10:27:54 +0100 Subject: [PATCH 0029/2522] Added comment and minor stylistic changes --- .../tools/optimize/modify_model_interface.cc | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/tensorflow/lite/tools/optimize/modify_model_interface.cc b/tensorflow/lite/tools/optimize/modify_model_interface.cc index eead4999ca6..9e76257296e 100644 --- a/tensorflow/lite/tools/optimize/modify_model_interface.cc +++ b/tensorflow/lite/tools/optimize/modify_model_interface.cc @@ -45,9 +45,9 @@ struct TensorOpTensor { // Finds float tensors that are model inputs and is consumed by a quantize Op. // The returned TensorOpTensor should have reverse order. -std::vector GetInputTensors(ModelT* model, - ErrorReporter* error_reporter, - const TensorType& input_type) { +std::vector GetInputTensors(const TensorType& input_type, + ModelT* model, + ErrorReporter* error_reporter) { std::vector result; // Get all input tensors. for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs.size(); @@ -91,7 +91,10 @@ std::vector GetInputTensors(ModelT* model, "modify_model_interface currently only supports " "int8 and int16 quantized models."); } + // Usually the input model has to have the same quantization layers as the ones + // we're trying to remove. if (quant_output->type != input_type) { + //An exception from this is when we are setting the input or output type to UINT8. if (!(quant_output->type == TensorType_INT8 && input_type == TensorType_UINT8)) { TF_LITE_REPORT_ERROR(error_reporter, "Model's type incompatible with output type argument."); @@ -109,9 +112,9 @@ std::vector GetInputTensors(ModelT* model, // Finds float tensors that are model output and is consumed by a dequantize Op. // The returned TensorOpTensor should have reverse order. -std::vector GetOutputTensors(ModelT* model, - ErrorReporter* error_reporter, - const TensorType& output_type) { +std::vector GetOutputTensors(const TensorType& output_type, + ModelT* model, + ErrorReporter* error_reporter) { std::vector result; // Get all output tensors. for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs.size(); @@ -302,9 +305,9 @@ std::unique_ptr CreateMutableModelFromFile( return copied_model; } -int GetOriginalNumberOfTensors(ModelT* model, ErrorReporter* error_reporter, const TensorType& input_type, const TensorType& output_type) { - std::vector outputs = GetOutputTensors(model, error_reporter, output_type); - std::vector inputs = GetInputTensors(model, error_reporter, input_type); +int GetOriginalNumberOfTensors(const TensorType& input_type, const TensorType& output_type, ModelT* model, ErrorReporter* error_reporter) { + std::vector outputs = GetOutputTensors(output_type, model, error_reporter); + std::vector inputs = GetInputTensors(input_type, model, error_reporter); return model->subgraphs[0]->tensors.size() - outputs.size() - inputs.size(); } @@ -315,12 +318,12 @@ TfLiteStatus ModifyModelInterface(flatbuffers::FlatBufferBuilder* builder, const TensorType& output_type) { tflite::StderrReporter error_reporter; const int original_number_tensors = - GetOriginalNumberOfTensors(model, &error_reporter, input_type, output_type); + GetOriginalNumberOfTensors(input_type, output_type, model, &error_reporter); // Finds float tensors that are model output and are consumed by a float to int8/int16 // quantize Op. // Do output first since the tensors are added into input first., std::vector outputs = - GetOutputTensors(model, &error_reporter, output_type); + GetOutputTensors(output_type, model, &error_reporter); switch (output_type) { case TensorType_UINT8: SetOutputTypeToUINT8(model, outputs); @@ -335,7 +338,7 @@ TfLiteStatus ModifyModelInterface(flatbuffers::FlatBufferBuilder* builder, // Find float tensors that are model input and is consumed by a float to int8/int16 // quantize Op. - std::vector inputs = GetInputTensors(model, &error_reporter, input_type); + std::vector inputs = GetInputTensors(input_type, model, &error_reporter); switch (input_type) { case TensorType_UINT8: SetInputTypeToUINT8(model, inputs); From 5f1c1ae431f6b230a4f7c680589ce4ad79cf6cf9 Mon Sep 17 00:00:00 2001 From: Tamas Nyiri Date: Wed, 10 Jun 2020 13:05:21 +0100 Subject: [PATCH 0030/2522] applied clang-format --- .../tools/optimize/modify_model_interface.cc | 58 +++++++++++-------- .../optimize/modify_model_interface_main.cc | 13 ++--- .../optimize/modify_model_interface_test.cc | 25 ++++---- 3 files changed, 52 insertions(+), 44 deletions(-) diff --git a/tensorflow/lite/tools/optimize/modify_model_interface.cc b/tensorflow/lite/tools/optimize/modify_model_interface.cc index 9e76257296e..e8e9229b1bb 100644 --- a/tensorflow/lite/tools/optimize/modify_model_interface.cc +++ b/tensorflow/lite/tools/optimize/modify_model_interface.cc @@ -19,8 +19,8 @@ limitations under the License. #include #include -#include "flatbuffers/flexbuffers.h" #include "absl/memory/memory.h" +#include "flatbuffers/flexbuffers.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/error_reporter.h" #include "tensorflow/lite/kernels/internal/compatibility.h" @@ -86,18 +86,22 @@ std::vector GetInputTensors(const TensorType& input_type, } const int model_input_index = input_tensors[input_tensor]; TensorT* quant_output = subgraph->tensors[op->outputs[0]].get(); - if (quant_output->type != TensorType_INT8 && quant_output->type != TensorType_INT16) { + if (quant_output->type != TensorType_INT8 && + quant_output->type != TensorType_INT16) { TF_LITE_REPORT_ERROR(error_reporter, "modify_model_interface currently only supports " "int8 and int16 quantized models."); } - // Usually the input model has to have the same quantization layers as the ones - // we're trying to remove. + // Usually the input model has to have the same quantization layers as the + // ones we're trying to remove. if (quant_output->type != input_type) { - //An exception from this is when we are setting the input or output type to UINT8. - if (!(quant_output->type == TensorType_INT8 && input_type == TensorType_UINT8)) { - TF_LITE_REPORT_ERROR(error_reporter, - "Model's type incompatible with output type argument."); + // An exception from this is when we are setting the input or output + // type to UINT8. + if (!(quant_output->type == TensorType_INT8 && + input_type == TensorType_UINT8)) { + TF_LITE_REPORT_ERROR( + error_reporter, + "Model's type incompatible with output type argument."); } } if (quant_output->quantization == nullptr) { @@ -153,7 +157,8 @@ std::vector GetOutputTensors(const TensorType& output_type, } const int model_output_index = output_tensors[output_tensor]; TensorT* dequant_input = subgraph->tensors[op->inputs[0]].get(); - if (dequant_input->type != TensorType_INT8 && dequant_input->type != TensorType_INT16) { + if (dequant_input->type != TensorType_INT8 && + dequant_input->type != TensorType_INT16) { // Currently only supports INT8 and INT16 quantized models. TF_LITE_REPORT_ERROR(error_reporter, "modify_model_interface currently only supports " @@ -161,9 +166,11 @@ std::vector GetOutputTensors(const TensorType& output_type, return {}; } if (dequant_input->type != output_type) { - if (!(dequant_input->type == TensorType_INT8 && output_type == TensorType_UINT8)) { - TF_LITE_REPORT_ERROR(error_reporter, - "Model's type incompatible with output type argument."); + if (!(dequant_input->type == TensorType_INT8 && + output_type == TensorType_UINT8)) { + TF_LITE_REPORT_ERROR( + error_reporter, + "Model's type incompatible with output type argument."); } } if (dequant_input->quantization == nullptr) { @@ -305,9 +312,13 @@ std::unique_ptr CreateMutableModelFromFile( return copied_model; } -int GetOriginalNumberOfTensors(const TensorType& input_type, const TensorType& output_type, ModelT* model, ErrorReporter* error_reporter) { - std::vector outputs = GetOutputTensors(output_type, model, error_reporter); - std::vector inputs = GetInputTensors(input_type, model, error_reporter); +int GetOriginalNumberOfTensors(const TensorType& input_type, + const TensorType& output_type, ModelT* model, + ErrorReporter* error_reporter) { + std::vector outputs = + GetOutputTensors(output_type, model, error_reporter); + std::vector inputs = + GetInputTensors(input_type, model, error_reporter); return model->subgraphs[0]->tensors.size() - outputs.size() - inputs.size(); } @@ -317,11 +328,11 @@ TfLiteStatus ModifyModelInterface(flatbuffers::FlatBufferBuilder* builder, ModelT* model, const TensorType& input_type, const TensorType& output_type) { tflite::StderrReporter error_reporter; - const int original_number_tensors = - GetOriginalNumberOfTensors(input_type, output_type, model, &error_reporter); - // Finds float tensors that are model output and are consumed by a float to int8/int16 - // quantize Op. - // Do output first since the tensors are added into input first., + const int original_number_tensors = GetOriginalNumberOfTensors( + input_type, output_type, model, &error_reporter); + // Finds float tensors that are model output and are consumed by a float to + // int8/int16 quantize Op. Do output first since the tensors are added into + // input first., std::vector outputs = GetOutputTensors(output_type, model, &error_reporter); switch (output_type) { @@ -336,9 +347,10 @@ TfLiteStatus ModifyModelInterface(flatbuffers::FlatBufferBuilder* builder, return kTfLiteError; } - // Find float tensors that are model input and is consumed by a float to int8/int16 - // quantize Op. - std::vector inputs = GetInputTensors(input_type, model, &error_reporter); + // Find float tensors that are model input and is consumed by a float to + // int8/int16 quantize Op. + std::vector inputs = + GetInputTensors(input_type, model, &error_reporter); switch (input_type) { case TensorType_UINT8: SetInputTypeToUINT8(model, inputs); diff --git a/tensorflow/lite/tools/optimize/modify_model_interface_main.cc b/tensorflow/lite/tools/optimize/modify_model_interface_main.cc index 18e210332f0..b22c2baccaa 100644 --- a/tensorflow/lite/tools/optimize/modify_model_interface_main.cc +++ b/tensorflow/lite/tools/optimize/modify_model_interface_main.cc @@ -25,12 +25,10 @@ int main(int argc, char** argv) { return 1; } - const std::unordered_map supported_types - { - { "uint8", tflite::TensorType_UINT8 }, - { "int8", tflite::TensorType_INT8 }, - { "int16", tflite::TensorType_INT16 } - }; + const std::unordered_map supported_types{ + {"uint8", tflite::TensorType_UINT8}, + {"int8", tflite::TensorType_INT8}, + {"int16", tflite::TensorType_INT16}}; tflite::TensorType input = tflite::TensorType_INT8; tflite::TensorType output = tflite::TensorType_INT8; @@ -39,7 +37,8 @@ int main(int argc, char** argv) { input = supported_types.at(argv[3]); output = supported_types.at(argv[4]); } catch (const std::out_of_range&) { - printf("Only supports uint8, int8 and int16 for input and output interfaces"); + printf( + "Only supports uint8, int8 and int16 for input and output interfaces"); return 1; } diff --git a/tensorflow/lite/tools/optimize/modify_model_interface_test.cc b/tensorflow/lite/tools/optimize/modify_model_interface_test.cc index acf9bf1a02d..88613c3c2be 100644 --- a/tensorflow/lite/tools/optimize/modify_model_interface_test.cc +++ b/tensorflow/lite/tools/optimize/modify_model_interface_test.cc @@ -14,11 +14,11 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/lite/tools/optimize/modify_model_interface.h" -#include - #include #include +#include + #include "absl/memory/memory.h" #include "tensorflow/lite/model.h" #include "tensorflow/lite/schema/schema_generated.h" @@ -30,7 +30,8 @@ namespace { using ::testing::ElementsAreArray; // Create a model with 1 quant, 1 FC, 1 dequant -std::unique_ptr CreateQuantizedModelSingleInputOutput(const TensorType& quantization_type) { +std::unique_ptr CreateQuantizedModelSingleInputOutput( + const TensorType& quantization_type) { auto model = absl::make_unique(); auto subgraph = absl::make_unique(); auto buffer = absl::make_unique(); @@ -118,7 +119,8 @@ std::unique_ptr CreateQuantizedModelSingleInputOutput(const TensorType& // Create a model with 2 quant, 1 FC, 2 dequant // The model mimics the behavior of the quantize_model.cc. -std::unique_ptr CreateQuantizedModelMultipleInputOutput(const TensorType& quantization_type) { +std::unique_ptr CreateQuantizedModelMultipleInputOutput( + const TensorType& quantization_type) { auto model = absl::make_unique(); auto subgraph = absl::make_unique(); auto buffer = absl::make_unique(); @@ -290,8 +292,7 @@ std::unique_ptr CreateFloatModel() { return model; } -struct ModelInterface: - ::testing::TestWithParam {}; +struct ModelInterface : ::testing::TestWithParam {}; TEST_P(ModelInterface, SingleInputOutput) { TensorType quantization_type = GetParam(); @@ -333,15 +334,14 @@ TEST_P(ModelInterface, SingleInputOutput) { } TEST_P(ModelInterface, MutipleInputOutput) { - TensorType quantization_type = GetParam(); auto model = CreateQuantizedModelMultipleInputOutput(quantization_type); // Change model type. - flatbuffers::FlatBufferBuilder builder; + flatbuffers::FlatBufferBuilder builder; EXPECT_EQ(ModifyModelInterface(&builder, model.get(), quantization_type, - quantization_type), + quantization_type), kTfLiteOk); // Verify results. @@ -386,11 +386,8 @@ TEST_P(ModelInterface, MutipleInputOutput) { EXPECT_EQ(output_2->quantization->zero_point[0], 50); } -INSTANTIATE_TEST_SUITE_P( - MultipleInputOutputTests, - ModelInterface, - ::testing::Values(TensorType_INT8, TensorType_INT16) -); +INSTANTIATE_TEST_SUITE_P(MultipleInputOutputTests, ModelInterface, + ::testing::Values(TensorType_INT8, TensorType_INT16)); TEST(ModelInterface, MixedTypeSingleInputOutput) { auto model = CreateQuantizedModelSingleInputOutput(TensorType_INT8); From 01dd066031448709b14cbce727fda4fa9195b74e Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Wed, 10 Jun 2020 21:23:36 +0100 Subject: [PATCH 0031/2522] Addressed reviewer's comments. Change-Id: I4e5e73d43d35c3168c4f98355b4bbfd2d4aea471 --- tensorflow/lite/kernels/reduce.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tensorflow/lite/kernels/reduce.cc b/tensorflow/lite/kernels/reduce.cc index 4b69402f15b..96c923c5146 100644 --- a/tensorflow/lite/kernels/reduce.cc +++ b/tensorflow/lite/kernels/reduce.cc @@ -193,11 +193,7 @@ TfLiteStatus InitializeTemporaries(TfLiteContext* context, TfLiteNode* node, temp_sum->type = kTfLiteInt64; break; case kTfLiteUInt8: - temp_sum->type = kTfLiteInt32; - break; case kTfLiteInt8: - temp_sum->type = kTfLiteInt32; - break; case kTfLiteInt16: temp_sum->type = kTfLiteInt32; break; From 8c10f56c9272a9d8246aee6382ca7dbe126b54d4 Mon Sep 17 00:00:00 2001 From: Tamas Nyiri Date: Thu, 11 Jun 2020 01:27:54 +0100 Subject: [PATCH 0032/2522] Stylistic changes --- tensorflow/lite/tools/optimize/modify_model_interface.cc | 8 ++++---- tensorflow/lite/tools/optimize/modify_model_interface.h | 2 +- .../lite/tools/optimize/modify_model_interface_main.cc | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/lite/tools/optimize/modify_model_interface.cc b/tensorflow/lite/tools/optimize/modify_model_interface.cc index e8e9229b1bb..e7cd09b0006 100644 --- a/tensorflow/lite/tools/optimize/modify_model_interface.cc +++ b/tensorflow/lite/tools/optimize/modify_model_interface.cc @@ -72,7 +72,7 @@ std::vector GetInputTensors(const TensorType& input_type, continue; } if (op_code != BuiltinOperator_QUANTIZE) { - // Currently only supports INT8 and INT16 quantized models. + // Currently only supports int8 and int16 quantized models. TF_LITE_REPORT_ERROR( error_reporter, "modify_model_interface called on a model without quant/dequant."); @@ -96,7 +96,7 @@ std::vector GetInputTensors(const TensorType& input_type, // ones we're trying to remove. if (quant_output->type != input_type) { // An exception from this is when we are setting the input or output - // type to UINT8. + // type to uint8. if (!(quant_output->type == TensorType_INT8 && input_type == TensorType_UINT8)) { TF_LITE_REPORT_ERROR( @@ -143,7 +143,7 @@ std::vector GetOutputTensors(const TensorType& output_type, continue; } if (op_code != BuiltinOperator_DEQUANTIZE) { - // Currently only supports INT8 and INT16 quantized models. + // Currently only supports int8 and int16 quantized models. TF_LITE_REPORT_ERROR( error_reporter, "modify_model_interface called on a model without quant/dequant."); @@ -159,7 +159,7 @@ std::vector GetOutputTensors(const TensorType& output_type, TensorT* dequant_input = subgraph->tensors[op->inputs[0]].get(); if (dequant_input->type != TensorType_INT8 && dequant_input->type != TensorType_INT16) { - // Currently only supports INT8 and INT16 quantized models. + // Currently only supports int8 and int16 quantized models. TF_LITE_REPORT_ERROR(error_reporter, "modify_model_interface currently only supports " "int8 and int16 quantized models."); diff --git a/tensorflow/lite/tools/optimize/modify_model_interface.h b/tensorflow/lite/tools/optimize/modify_model_interface.h index b3a39d63801..5711a615812 100644 --- a/tensorflow/lite/tools/optimize/modify_model_interface.h +++ b/tensorflow/lite/tools/optimize/modify_model_interface.h @@ -24,7 +24,7 @@ namespace optimize { // Changes the interface of a quantized model. This method allows the users to // replace float interface with other types. // This populates the builder with the new model. -// Currently only int8, int16 and unit8 are supported. +// Currently only int8, int16 and uint8 are supported. // // Note: This is a private API, subject to change. TfLiteStatus ModifyModelInterface(flatbuffers::FlatBufferBuilder* builder, diff --git a/tensorflow/lite/tools/optimize/modify_model_interface_main.cc b/tensorflow/lite/tools/optimize/modify_model_interface_main.cc index b22c2baccaa..6272ef30777 100644 --- a/tensorflow/lite/tools/optimize/modify_model_interface_main.cc +++ b/tensorflow/lite/tools/optimize/modify_model_interface_main.cc @@ -38,7 +38,7 @@ int main(int argc, char** argv) { output = supported_types.at(argv[4]); } catch (const std::out_of_range&) { printf( - "Only supports uint8, int8 and int16 for input and output interfaces"); + "Only supports uint8, int8 and int16 for input and output types"); return 1; } From 9123bfa78d112e13179812bc515918c6ba8952f3 Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Fri, 12 Jun 2020 18:18:51 +0800 Subject: [PATCH 0033/2522] add hoist data-discarding ops pass for tf.data --- .../core/grappler/optimizers/data/BUILD | 39 ++++++ .../optimizers/data/graph_test_utils.cc | 44 +++++++ .../optimizers/data/graph_test_utils.h | 13 ++ .../data/hoist_data_discarding_ops.cc | 120 ++++++++++++++++++ .../data/hoist_data_discarding_ops.h | 52 ++++++++ .../data/hoist_data_discarding_ops_test.cc | 95 ++++++++++++++ .../optimizers/data/meta_optimizer.cc | 3 +- .../experimental/ops/optimization_options.py | 8 ++ ...a.experimental.-optimization-options.pbtxt | 4 + ...a.experimental.-optimization-options.pbtxt | 4 + 10 files changed, 381 insertions(+), 1 deletion(-) create mode 100644 tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops.cc create mode 100644 tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops.h create mode 100644 tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops_test.cc diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD index a927afc5b30..f4411f02f86 100644 --- a/tensorflow/core/grappler/optimizers/data/BUILD +++ b/tensorflow/core/grappler/optimizers/data/BUILD @@ -16,6 +16,7 @@ cc_library( deps = [ ":filter_fusion", ":filter_with_random_uniform_fusion", + ":hoist_data_discarding_ops", ":hoist_random_uniform", ":inject_prefetch", ":latency_all_edges", @@ -252,6 +253,44 @@ cc_library( ] + tf_protos_all(), ) +cc_library( + name = "hoist_data_discarding_ops", + srcs = ["hoist_data_discarding_ops.cc"], + hdrs = [ + "hoist_data_discarding_ops.h", + ], + deps = [ + ":function_utils", + ":graph_utils", + ":optimizer_base", + "@com_google_absl//absl/container:flat_hash_set", + "//tensorflow/core:lib", + "//tensorflow/core/grappler:mutable_graph_view", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:op_types", + "//tensorflow/core/grappler:utils", + "//tensorflow/core/grappler/clusters:cluster", + "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry", + "//tensorflow/core:lib_internal", + ] + tf_protos_all(), + alwayslink = 1, +) + +tf_cc_test( + name = "hoist_data_discarding_ops_test", + srcs = ["hoist_data_discarding_ops_test.cc"], + deps = [ + ":graph_test_utils", + ":graph_utils", + ":hoist_data_discarding_ops", + "//tensorflow/core:framework", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/grappler:grappler_item", + ] + tf_protos_all(), +) + cc_library( name = "hoist_random_uniform", srcs = ["hoist_random_uniform.cc"], diff --git a/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc b/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc index 323e3c2c6d8..5aeeb977e73 100644 --- a/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc +++ b/tensorflow/core/grappler/optimizers/data/graph_test_utils.cc @@ -139,6 +139,50 @@ NodeDef MakeShuffleV2Node(StringPiece name, StringPiece input_node_name, }); } +NodeDef MakeTakeNode(StringPiece name, StringPiece input_node_name, + StringPiece count_node_name) { + return test::function::NDef( + name, "TakeDataset", + { + string(input_node_name), + string(count_node_name), + }, + { + {"output_shapes", gtl::ArraySlice{}}, + {"output_types", gtl::ArraySlice{}}, + }); +} + +NodeDef MakeSkipNode(StringPiece name, StringPiece input_node_name, + StringPiece count_node_name) { + return test::function::NDef( + name, "SkipDataset", + { + string(input_node_name), + string(count_node_name), + }, + { + {"output_shapes", gtl::ArraySlice{}}, + {"output_types", gtl::ArraySlice{}}, + }); +} + +NodeDef MakeShardNode(StringPiece name, StringPiece input_node_name, + StringPiece num_shards_node_name, + StringPiece index_node_name) { + return test::function::NDef( + name, "ShardDataset", + { + string(input_node_name), + string(num_shards_node_name), + string(index_node_name), + }, + { + {"output_shapes", gtl::ArraySlice{}}, + {"output_types", gtl::ArraySlice{}}, + }); +} + } // namespace graph_tests_utils } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/graph_test_utils.h b/tensorflow/core/grappler/optimizers/data/graph_test_utils.h index 0dcfe656b89..268d2120ab0 100644 --- a/tensorflow/core/grappler/optimizers/data/graph_test_utils.h +++ b/tensorflow/core/grappler/optimizers/data/graph_test_utils.h @@ -66,6 +66,19 @@ NodeDef MakeShuffleV2Node(StringPiece name, StringPiece input_node_name, StringPiece buffer_size_node_name, StringPiece seed_generator_node_name); +// Creates a test NodeDef for TakeDataset. +NodeDef MakeTakeNode(StringPiece name, StringPiece input_node_name, + StringPiece count_node_name); + +// Creates a test NodeDef for SkipDataset. +NodeDef MakeSkipNode(StringPiece name, StringPiece input_node_name, + StringPiece count_node_name); + +// Creates a test NodeDef for ShardDataset. +NodeDef MakeShardNode(StringPiece name, StringPiece input_node_name, + StringPiece num_shards_node_name, + StringPiece index_node_name); + } // namespace graph_tests_utils } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops.cc b/tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops.cc new file mode 100644 index 00000000000..a99d73f5943 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops.cc @@ -0,0 +1,120 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops.h" + +#include "absl/container/flat_hash_set.h" +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/grappler/clusters/cluster.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/mutable_graph_view.h" +#include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" +#include "tensorflow/core/grappler/optimizers/data/function_utils.h" +#include "tensorflow/core/grappler/optimizers/data/graph_utils.h" +#include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/platform/protobuf.h" + +namespace tensorflow { +namespace grappler { +namespace { + +constexpr std::array kDataDiscarding = { + "ShardDataset", "SkipDataset", "TakeDataset", +}; + +constexpr std::array kCardinalityPreserving = { + "CacheDataset", "CacheDatasetV2", "PrefetchDataset", + "MapDataset", "ParallelMapDataset", "ParallelMapDatasetV2", +}; + +bool IsDataDiscarding(const NodeDef& node) { + for (const auto& data_discarding_op : kDataDiscarding) { + if (node.op() == data_discarding_op) { + return true; + } + } + return false; +} + +bool IsCardinalityPreserving(const NodeDef& node) { + for (const auto& cardinality_preserving_op : kCardinalityPreserving) { + if (node.op() == cardinality_preserving_op) { + return true; + } + } + return false; +} + +} // namepsace + +Status HoistDataDiscardingOps::OptimizeAndCollectStats(Cluster* cluster, + const GrapplerItem& item, + GraphDef* output, + OptimizationStats* stats) { + *output = item.graph; + MutableGraphView graph(output); + bool updated; + do { + updated = false; + for (NodeDef node : graph.graph()->node()) { + if (IsDataDiscarding(node)) { + NodeDef* start = &node; + NodeDef* start_parent = graph_utils::GetInputNode(*start, graph); + while (IsCardinalityPreserving(*start_parent) && + NumOutputs(*start_parent, graph.graph()) == 1) { + start = start_parent; + start_parent = graph_utils::GetInputNode(*start, graph); + } + // no cardinality preserving op with indegree 1. + if (start->name() == node.name()) { + continue; + } + NodeDef hoisted_node = node; + if (!absl::StartsWith(node.name(), "hoist_data_dsicarding_op/")) { + graph_utils::SetUniqueGraphNodeName( + strings::StrCat("hoist_data_discarding_ops/", node.name()), + graph.graph(), &hoisted_node + ); + } + for (const auto& attr_name : {"output_types", "output_shapes"}) { + graph_utils::CopyAttribute(attr_name, *start_parent, + &hoisted_node); + } + *hoisted_node.mutable_input(0) = start_parent->name(); + *start->mutable_input(0) = hoisted_node.name(); + + auto parent = graph_utils::GetInputNode(node, graph); + TF_RETURN_IF_ERROR(graph.UpdateFanouts(node.name(), parent->name())); + graph.DeleteNodes({node.name()}); + graph.AddNode(std::move(hoisted_node)); + updated = true; + } + } + } while (updated); + return Status::OK(); +} + +void HoistDataDiscardingOps::Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimize_output, + double result) { + // no-op +} + +REGISTER_GRAPH_OPTIMIZER_AS(HoistDataDiscardingOps, "hoist_data_discarding_ops"); + +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops.h b/tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops.h new file mode 100644 index 00000000000..6168474cff8 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops.h @@ -0,0 +1,52 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_HOIST_DATA_DISCARDING_OPS_H_ +#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_HOIST_DATA_DISCARDING_OPS_H_ + +#include "tensorflow/core/grappler/optimizers/data/optimizer_base.h" + +namespace tensorflow { +namespace grappler { + +// This optimization hoists the data discarding ops (such as `skip`, `take` and +// `shard`) to avoid unnecessary computation. +class HoistDataDiscardingOps : public TFDataOptimizerBase { + public: + HoistDataDiscardingOps() = default; + ~HoistDataDiscardingOps() override = default; + + string name() const override { return "hoist_data_discarding_ops"; }; + + bool UsesFunctionLibrary() const override { return false; } + + Status Init( + const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override { + return Status::OK(); + } + + Status OptimizeAndCollectStats(Cluster* cluster, const GrapplerItem& item, + GraphDef* output, + OptimizationStats* stats) override; + + void Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimize_output, double result) override; +}; + +} // namespace grappler +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_HOIST_DATA_DISCARDING_OPS_H_ + diff --git a/tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops_test.cc b/tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops_test.cc new file mode 100644 index 00000000000..f09dc3ebf5c --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops_test.cc @@ -0,0 +1,95 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops.h" + +#include "tensorflow/core/framework/attr_value_util.h" +#include "tensorflow/core/framework/function_testlib.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/optimizers/data/graph_test_utils.h" +#include "tensorflow/core/grappler/optimizers/data/graph_utils.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace grappler { +namespace { + +TEST(HoistDataDiscardingOpsTest, ExampleOps) { + using test::function::NDef; + GrapplerItem item; + item.graph = test::function::GDef( + {NDef("start", "Const", {}, {{"value", 0}, {"dtype", DT_INT32}}), + NDef("stop", "Const", {}, {{"value", 10}, {"dtype", DT_INT32}}), + NDef("step", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}), + NDef("range", "RangeDataset", {"start", "stop", "step"}, { + {"output_shapes", gtl::ArraySlice{}}, + {"output_types", gtl::ArraySlice{}}, + }), + NDef("num_parallel_calls", "Const", {}, + {{"value", 1}, {"dtype", DT_INT32}}), + graph_tests_utils::MakeParallelMapNode("map", "range", + "num_parallel_calls", "XTimesTwo", + /*sloppy=*/false), + NDef("dummy_memory_cache", "DummyMemoryCache", {}, {}), + graph_tests_utils::MakeCacheV2Node("cache", "map", "", "dummy_memory_cache"), + NDef("take_count", "Const", {}, + {{"value", 5}, {"dtype", DT_INT32}}), + graph_tests_utils::MakeTakeNode("take", "cache", "take_count"), + NDef("skip_count", "Const", {}, + {{"value", 1}, {"dtype", DT_INT32}}), + graph_tests_utils::MakeSkipNode("skip", "take", "skip_count"), + NDef("batch_size", "Const", {}, + {{"value", 2}, {"dtype", DT_INT32}}), + NDef("drop_remainder", "Const", {}, + {{"value", true}, {"dtype", DT_BOOL}}), + graph_tests_utils::MakeMapAndBatchNode("map_and_batch", "skip", + "batch_size", "drop_remainder", + "XTimesTwo"), + NDef("num_shards", "Const", {}, + {{"value", 2}, {"dtype", DT_INT32}}), + NDef("index", "Const", {}, + {{"value", 0}, {"dtype", DT_INT32}}), + graph_tests_utils::MakeShardNode("shard", "map_and_batch", + "num_shards", "index")}, + // FunctionLib + { + test::function::XTimesTwo(), + }); + + HoistDataDiscardingOps optimizer; + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); + + EXPECT_TRUE(graph_utils::ContainsGraphNodeWithName("hoist_data_discarding_ops/take", output)); + EXPECT_TRUE(graph_utils::ContainsGraphNodeWithName("hoist_data_discarding_ops/skip", output)); + EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("hoist_data_discarding_ops/shard", output)); + + EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("take", output)); + EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("skip", output)); + EXPECT_TRUE(graph_utils::ContainsGraphNodeWithName("shard", output)); + + MutableGraphView graph(&output); + EXPECT_TRUE(graph_utils::GetInputNode(*graph.GetNode("hoist_data_discarding_ops/take"), + graph)->name() == "range"); + EXPECT_TRUE(graph_utils::GetInputNode(*graph.GetNode("hoist_data_discarding_ops/skip"), + graph)->name() == "hoist_data_discarding_ops/take"); + EXPECT_TRUE(graph_utils::GetInputNode(*graph.GetNode("map_and_batch"), graph)->name() == "cache"); +} + +} // namespace +} // namsepace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/data/meta_optimizer.cc index 5804c3ee01a..18745351dd0 100644 --- a/tensorflow/core/grappler/optimizers/data/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/data/meta_optimizer.cc @@ -35,13 +35,14 @@ using ConfigMap = std::map; // tf.data optimizations, in the order we want to perform them. -constexpr std::array kTFDataOptimizations = { +constexpr std::array kTFDataOptimizations = { "noop_elimination", "shuffle_and_repeat_fusion", "map_fusion", "filter_fusion", "filter_with_random_uniform_fusion", "map_and_filter_fusion", + "hoist_data_discarding_ops", "hoist_random_uniform", "map_parallelization", "map_and_batch_fusion", diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py index 5db4db91c17..5f6797a6a80 100644 --- a/tensorflow/python/data/experimental/ops/optimization_options.py +++ b/tensorflow/python/data/experimental/ops/optimization_options.py @@ -126,6 +126,13 @@ class OptimizationOptions(options.OptionsBase): "Whether to fuse filter dataset that predicts random_uniform < rate into " "a sampling dataset. If None, defaults to False.") + hoist_data_discarding_ops = options.create_option( + name="hoist_data_discarding_ops", + ty=bool, + docstring= + "Whether to hoist ops that will discard data (such as skip, take, shard)" + "out of map transformations. If None, defaults to False.") + hoist_random_uniform = options.create_option( name="hoist_random_uniform", ty=bool, @@ -218,6 +225,7 @@ class OptimizationOptions(options.OptionsBase): all_optimizations = [ "filter_fusion", "filter_with_random_uniform_fusion", + "hoist_data_discarding_ops", "hoist_random_uniform", "map_and_batch_fusion", "map_and_filter_fusion", diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt index a79d205cf0b..9aa1c76afd1 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt @@ -27,6 +27,10 @@ tf_class { name: "filter_with_random_uniform_fusion" mtype: "" } + member { + name: "hoist_data_discarding_ops" + mtype: "" + } member { name: "hoist_random_uniform" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt index a79d205cf0b..9aa1c76afd1 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt @@ -27,6 +27,10 @@ tf_class { name: "filter_with_random_uniform_fusion" mtype: "" } + member { + name: "hoist_data_discarding_ops" + mtype: "" + } member { name: "hoist_random_uniform" mtype: "" From 0c296013f624e35232570b5ffd1575b77a6a0d93 Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Sat, 13 Jun 2020 10:05:23 +0800 Subject: [PATCH 0034/2522] rename the pass the hoist_discard --- .../core/grappler/optimizers/data/BUILD | 14 ++++++------ ...ata_discarding_ops.cc => hoist_discard.cc} | 22 +++++++++---------- ..._data_discarding_ops.h => hoist_discard.h} | 8 +++---- ...ding_ops_test.cc => hoist_discard_test.cc} | 18 +++++++-------- .../optimizers/data/meta_optimizer.cc | 2 +- .../experimental/ops/optimization_options.py | 6 ++--- ...a.experimental.-optimization-options.pbtxt | 2 +- ...a.experimental.-optimization-options.pbtxt | 2 +- 8 files changed, 37 insertions(+), 37 deletions(-) rename tensorflow/core/grappler/optimizers/data/{hoist_data_discarding_ops.cc => hoist_discard.cc} (81%) rename tensorflow/core/grappler/optimizers/data/{hoist_data_discarding_ops.h => hoist_discard.h} (88%) rename tensorflow/core/grappler/optimizers/data/{hoist_data_discarding_ops_test.cc => hoist_discard_test.cc} (85%) diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD index f4411f02f86..2b98d679e0f 100644 --- a/tensorflow/core/grappler/optimizers/data/BUILD +++ b/tensorflow/core/grappler/optimizers/data/BUILD @@ -16,7 +16,7 @@ cc_library( deps = [ ":filter_fusion", ":filter_with_random_uniform_fusion", - ":hoist_data_discarding_ops", + ":hoist_discard", ":hoist_random_uniform", ":inject_prefetch", ":latency_all_edges", @@ -254,10 +254,10 @@ cc_library( ) cc_library( - name = "hoist_data_discarding_ops", - srcs = ["hoist_data_discarding_ops.cc"], + name = "hoist_discard", + srcs = ["hoist_discard.cc"], hdrs = [ - "hoist_data_discarding_ops.h", + "hoist_discard.h", ], deps = [ ":function_utils", @@ -277,12 +277,12 @@ cc_library( ) tf_cc_test( - name = "hoist_data_discarding_ops_test", - srcs = ["hoist_data_discarding_ops_test.cc"], + name = "hoist_discard_test", + srcs = ["hoist_discard_test.cc"], deps = [ ":graph_test_utils", ":graph_utils", - ":hoist_data_discarding_ops", + ":hoist_discard", "//tensorflow/core:framework", "//tensorflow/core:test", "//tensorflow/core:test_main", diff --git a/tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops.cc b/tensorflow/core/grappler/optimizers/data/hoist_discard.cc similarity index 81% rename from tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops.cc rename to tensorflow/core/grappler/optimizers/data/hoist_discard.cc index a99d73f5943..d2d8cedfa66 100644 --- a/tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops.cc +++ b/tensorflow/core/grappler/optimizers/data/hoist_discard.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops.h" +#include "tensorflow/core/grappler/optimizers/data/hoist_discard.h" #include "absl/container/flat_hash_set.h" #include "tensorflow/core/framework/attr_value.pb.h" @@ -61,10 +61,10 @@ bool IsCardinalityPreserving(const NodeDef& node) { } // namepsace -Status HoistDataDiscardingOps::OptimizeAndCollectStats(Cluster* cluster, - const GrapplerItem& item, - GraphDef* output, - OptimizationStats* stats) { +Status HoistDiscard::OptimizeAndCollectStats(Cluster* cluster, + const GrapplerItem& item, + GraphDef* output, + OptimizationStats* stats) { *output = item.graph; MutableGraphView graph(output); bool updated; @@ -84,9 +84,9 @@ Status HoistDataDiscardingOps::OptimizeAndCollectStats(Cluster* cluster, continue; } NodeDef hoisted_node = node; - if (!absl::StartsWith(node.name(), "hoist_data_dsicarding_op/")) { + if (!absl::StartsWith(node.name(), "hoist_discard/")) { graph_utils::SetUniqueGraphNodeName( - strings::StrCat("hoist_data_discarding_ops/", node.name()), + strings::StrCat("hoist_discard/", node.name()), graph.graph(), &hoisted_node ); } @@ -108,13 +108,13 @@ Status HoistDataDiscardingOps::OptimizeAndCollectStats(Cluster* cluster, return Status::OK(); } -void HoistDataDiscardingOps::Feedback(Cluster* cluster, const GrapplerItem& item, - const GraphDef& optimize_output, - double result) { +void HoistDiscard::Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimize_output, + double result) { // no-op } -REGISTER_GRAPH_OPTIMIZER_AS(HoistDataDiscardingOps, "hoist_data_discarding_ops"); +REGISTER_GRAPH_OPTIMIZER_AS(HoistDiscard, "hoist_discard"); } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops.h b/tensorflow/core/grappler/optimizers/data/hoist_discard.h similarity index 88% rename from tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops.h rename to tensorflow/core/grappler/optimizers/data/hoist_discard.h index 6168474cff8..324d80100a1 100644 --- a/tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops.h +++ b/tensorflow/core/grappler/optimizers/data/hoist_discard.h @@ -23,12 +23,12 @@ namespace grappler { // This optimization hoists the data discarding ops (such as `skip`, `take` and // `shard`) to avoid unnecessary computation. -class HoistDataDiscardingOps : public TFDataOptimizerBase { +class HoistDiscard : public TFDataOptimizerBase { public: - HoistDataDiscardingOps() = default; - ~HoistDataDiscardingOps() override = default; + HoistDiscard() = default; + ~HoistDiscard() override = default; - string name() const override { return "hoist_data_discarding_ops"; }; + string name() const override { return "hoist_discard"; }; bool UsesFunctionLibrary() const override { return false; } diff --git a/tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops_test.cc b/tensorflow/core/grappler/optimizers/data/hoist_discard_test.cc similarity index 85% rename from tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops_test.cc rename to tensorflow/core/grappler/optimizers/data/hoist_discard_test.cc index f09dc3ebf5c..d8b56df97b1 100644 --- a/tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops_test.cc +++ b/tensorflow/core/grappler/optimizers/data/hoist_discard_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/grappler/optimizers/data/hoist_data_discarding_ops.h" +#include "tensorflow/core/grappler/optimizers/data/hoist_discard.h" #include "tensorflow/core/framework/attr_value_util.h" #include "tensorflow/core/framework/function_testlib.h" @@ -28,7 +28,7 @@ namespace tensorflow { namespace grappler { namespace { -TEST(HoistDataDiscardingOpsTest, ExampleOps) { +TEST(HoistDiscardTest, ExampleOps) { using test::function::NDef; GrapplerItem item; item.graph = test::function::GDef( @@ -70,23 +70,23 @@ TEST(HoistDataDiscardingOpsTest, ExampleOps) { test::function::XTimesTwo(), }); - HoistDataDiscardingOps optimizer; + HoistDiscard optimizer; GraphDef output; TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); - EXPECT_TRUE(graph_utils::ContainsGraphNodeWithName("hoist_data_discarding_ops/take", output)); - EXPECT_TRUE(graph_utils::ContainsGraphNodeWithName("hoist_data_discarding_ops/skip", output)); - EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("hoist_data_discarding_ops/shard", output)); + EXPECT_TRUE(graph_utils::ContainsGraphNodeWithName("hoist_discard/take", output)); + EXPECT_TRUE(graph_utils::ContainsGraphNodeWithName("hoist_discard/skip", output)); + EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("hoist_discard/shard", output)); EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("take", output)); EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("skip", output)); EXPECT_TRUE(graph_utils::ContainsGraphNodeWithName("shard", output)); MutableGraphView graph(&output); - EXPECT_TRUE(graph_utils::GetInputNode(*graph.GetNode("hoist_data_discarding_ops/take"), + EXPECT_TRUE(graph_utils::GetInputNode(*graph.GetNode("hoist_discard/take"), graph)->name() == "range"); - EXPECT_TRUE(graph_utils::GetInputNode(*graph.GetNode("hoist_data_discarding_ops/skip"), - graph)->name() == "hoist_data_discarding_ops/take"); + EXPECT_TRUE(graph_utils::GetInputNode(*graph.GetNode("hoist_discard/skip"), + graph)->name() == "hoist_discard/take"); EXPECT_TRUE(graph_utils::GetInputNode(*graph.GetNode("map_and_batch"), graph)->name() == "cache"); } diff --git a/tensorflow/core/grappler/optimizers/data/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/data/meta_optimizer.cc index 18745351dd0..f5f285c8c4d 100644 --- a/tensorflow/core/grappler/optimizers/data/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/data/meta_optimizer.cc @@ -42,7 +42,7 @@ constexpr std::array kTFDataOptimizations = { "filter_fusion", "filter_with_random_uniform_fusion", "map_and_filter_fusion", - "hoist_data_discarding_ops", + "hoist_discard", "hoist_random_uniform", "map_parallelization", "map_and_batch_fusion", diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py index 5f6797a6a80..b8fc317e222 100644 --- a/tensorflow/python/data/experimental/ops/optimization_options.py +++ b/tensorflow/python/data/experimental/ops/optimization_options.py @@ -126,8 +126,8 @@ class OptimizationOptions(options.OptionsBase): "Whether to fuse filter dataset that predicts random_uniform < rate into " "a sampling dataset. If None, defaults to False.") - hoist_data_discarding_ops = options.create_option( - name="hoist_data_discarding_ops", + hoist_discard = options.create_option( + name="hoist_discard", ty=bool, docstring= "Whether to hoist ops that will discard data (such as skip, take, shard)" @@ -225,7 +225,7 @@ class OptimizationOptions(options.OptionsBase): all_optimizations = [ "filter_fusion", "filter_with_random_uniform_fusion", - "hoist_data_discarding_ops", + "hoist_discard", "hoist_random_uniform", "map_and_batch_fusion", "map_and_filter_fusion", diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt index 9aa1c76afd1..2a59682df2e 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt @@ -28,7 +28,7 @@ tf_class { mtype: "" } member { - name: "hoist_data_discarding_ops" + name: "hoist_discard" mtype: "" } member { diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt index 9aa1c76afd1..2a59682df2e 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt @@ -28,7 +28,7 @@ tf_class { mtype: "" } member { - name: "hoist_data_discarding_ops" + name: "hoist_discard" mtype: "" } member { From f86b97ab3d3055772594053377d541562e1be95d Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Sat, 13 Jun 2020 22:26:00 +0800 Subject: [PATCH 0035/2522] updates based on reviews --- .../grappler/optimizers/data/hoist_discard.cc | 62 +++++++++---------- 1 file changed, 29 insertions(+), 33 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/data/hoist_discard.cc b/tensorflow/core/grappler/optimizers/data/hoist_discard.cc index d2d8cedfa66..b44a2a1b677 100644 --- a/tensorflow/core/grappler/optimizers/data/hoist_discard.cc +++ b/tensorflow/core/grappler/optimizers/data/hoist_discard.cc @@ -32,31 +32,33 @@ namespace tensorflow { namespace grappler { namespace { -constexpr std::array kDataDiscarding = { +const std::unordered_set kDataDiscarding = { "ShardDataset", "SkipDataset", "TakeDataset", }; -constexpr std::array kCardinalityPreserving = { +const std::unordered_set kCardinalityPreserving = { "CacheDataset", "CacheDatasetV2", "PrefetchDataset", "MapDataset", "ParallelMapDataset", "ParallelMapDatasetV2", }; bool IsDataDiscarding(const NodeDef& node) { - for (const auto& data_discarding_op : kDataDiscarding) { - if (node.op() == data_discarding_op) { - return true; - } + auto iter = kDataDiscarding.find(node.op()); + if (iter == kDataDiscarding.end()) { + return false; } - return false; + return true; } bool IsCardinalityPreserving(const NodeDef& node) { - for (const auto& cardinality_preserving_op : kCardinalityPreserving) { - if (node.op() == cardinality_preserving_op) { - return true; - } + auto iter = kCardinalityPreserving.find(node.op()); + if (iter == kCardinalityPreserving.end()) { + return false; } - return false; + auto attr_iter = node.attr().find("preserve_cardinality"); + if (attr_iter != node.attr().end() && !attr_iter->second.b()) { + return false; + } + return true; } } // namepsace @@ -70,38 +72,32 @@ Status HoistDiscard::OptimizeAndCollectStats(Cluster* cluster, bool updated; do { updated = false; - for (NodeDef node : graph.graph()->node()) { - if (IsDataDiscarding(node)) { - NodeDef* start = &node; + for (int i = 0; i < graph.graph()->node_size(); i++) { + auto node = graph.graph()->mutable_node(i); + if (IsDataDiscarding(*node)) { + NodeDef* start = node; NodeDef* start_parent = graph_utils::GetInputNode(*start, graph); - while (IsCardinalityPreserving(*start_parent) && - NumOutputs(*start_parent, graph.graph()) == 1) { + while (IsCardinalityPreserving(*start_parent)) { start = start_parent; start_parent = graph_utils::GetInputNode(*start, graph); } - // no cardinality preserving op with indegree 1. - if (start->name() == node.name()) { + if (start->name() == node->name()) { continue; } - NodeDef hoisted_node = node; - if (!absl::StartsWith(node.name(), "hoist_discard/")) { - graph_utils::SetUniqueGraphNodeName( - strings::StrCat("hoist_discard/", node.name()), - graph.graph(), &hoisted_node - ); + auto parent = graph_utils::GetInputNode(*node, graph); + TF_RETURN_IF_ERROR(graph.UpdateFanouts(node->name(), parent->name())); + if (!absl::StartsWith(node->name(), "hoist_discard/")) { + TF_RETURN_IF_ERROR(graph.UpdateNodeName(node->name(), + strings::StrCat("hoist_discard/", node->name()), false)); } for (const auto& attr_name : {"output_types", "output_shapes"}) { graph_utils::CopyAttribute(attr_name, *start_parent, - &hoisted_node); + node); } - *hoisted_node.mutable_input(0) = start_parent->name(); - *start->mutable_input(0) = hoisted_node.name(); - - auto parent = graph_utils::GetInputNode(node, graph); - TF_RETURN_IF_ERROR(graph.UpdateFanouts(node.name(), parent->name())); - graph.DeleteNodes({node.name()}); - graph.AddNode(std::move(hoisted_node)); + *node->mutable_input(0) = start_parent->name(); + *start->mutable_input(0) = node->name(); updated = true; + break; } } } while (updated); From eca4d50cadefa3e3621b82efe14d1f83708676d9 Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Sat, 13 Jun 2020 22:29:55 +0800 Subject: [PATCH 0036/2522] update misleading doc --- .../python/data/experimental/ops/optimization_options.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py index b8fc317e222..d4c19a7ccf4 100644 --- a/tensorflow/python/data/experimental/ops/optimization_options.py +++ b/tensorflow/python/data/experimental/ops/optimization_options.py @@ -130,8 +130,9 @@ class OptimizationOptions(options.OptionsBase): name="hoist_discard", ty=bool, docstring= - "Whether to hoist ops that will discard data (such as skip, take, shard)" - "out of map transformations. If None, defaults to False.") + "Whether to hoist ops that will discard data (such as skip, take, shard) " + "out of unary cardinality preserved transformations. " + "If None, defaults to False.") hoist_random_uniform = options.create_option( name="hoist_random_uniform", From 31827dbc7c3ec8c3a45f3c8813d1a7cfd35774dd Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Sat, 13 Jun 2020 23:30:31 +0800 Subject: [PATCH 0037/2522] add python test --- .../kernel_tests/optimization/BUILD | 18 +++++ .../optimization/hoist_discard_test.py | 71 +++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 tensorflow/python/data/experimental/kernel_tests/optimization/hoist_discard_test.py diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD index 1411481f0ac..3ef1b86fd6a 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD @@ -97,6 +97,24 @@ cuda_py_test( ], ) +tf_py_test( + name = "hoist_discard_test", + size = "small", + srcs = ["hoist_discard_test.py"], + tags = [ + "no_oss", + "no_pip", + "no_windows", + ], + deps = [ + "//tensorflow/python:client_testlib", + "//tensorflow/python:errors", + "//tensorflow/python/data/experimental/ops:testing", + "//tensorflow/python/data/kernel_tests:test_base", + "//tensorflow/python/data/ops:dataset_ops", + ], +) + tf_py_test( name = "hoist_random_uniform_test", size = "small", diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_discard_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_discard_test.py new file mode 100644 index 00000000000..06801cb46db --- /dev/null +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_discard_test.py @@ -0,0 +1,71 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the `HoistDiscard` rewrite.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized + +from tensorflow.python.data.experimental.ops import testing +from tensorflow.python.data.kernel_tests import test_base +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import combinations +from tensorflow.python.platform import test + + +class HoistDiscardTest(test_base.DatasetTestBase, parameterized.TestCase): + + @combinations.generate(combinations.combine(tf_api_version=2, + mode=["eager", "graph"])) + def testSimpleHoistingV2(self): + dataset = dataset_ops.Dataset.range(100) + dataset = dataset.apply( + testing.assert_next(["FiniteSkip", "FiniteTake", "Shard", + "ParallelMap", "MemoryCacheImpl"])) + dataset = dataset.map( + lambda x: x + 1, num_parallel_calls=10) + dataset = dataset.skip(10) + dataset = dataset.cache() + dataset = dataset.take(50) + dataset = dataset.shard(2, 0) + options = dataset_ops.Options() + options.experimental_optimization.apply_default_optimizations = False + options.experimental_optimization.hoist_discard = True + dataset = dataset.with_options(options) + self.assertDatasetProduces(dataset, range(11, 61, 2)) + + @combinations.generate(combinations.combine(tf_api_version=1, + mode=["eager", "graph"])) + def testSimpleHoistingV1(self): + dataset = dataset_ops.Dataset.range(100) + dataset = dataset.apply( + testing.assert_next(["ParallelMap", "FiniteSkip", "FiniteTake", + "Shard", "MemoryCacheImpl"])) + dataset = dataset.map( + lambda x: x + 1, num_parallel_calls=10) + dataset = dataset.skip(10) + dataset = dataset.cache() + dataset = dataset.take(50) + dataset = dataset.shard(2, 0) + options = dataset_ops.Options() + options.experimental_optimization.apply_default_optimizations = False + options.experimental_optimization.hoist_discard = True + dataset = dataset.with_options(options) + self.assertDatasetProduces(dataset, range(11, 61, 2)) + + +if __name__ == "__main__": + test.main() From bb027421700c1fed19c7951ce38bd6d68e4d99c3 Mon Sep 17 00:00:00 2001 From: Tamas Nyiri Date: Mon, 15 Jun 2020 09:57:38 +0100 Subject: [PATCH 0038/2522] upgraded error message to show more information --- .../tools/optimize/modify_model_interface.cc | 37 ++++++++++++------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/tensorflow/lite/tools/optimize/modify_model_interface.cc b/tensorflow/lite/tools/optimize/modify_model_interface.cc index e7cd09b0006..785abc2ae97 100644 --- a/tensorflow/lite/tools/optimize/modify_model_interface.cc +++ b/tensorflow/lite/tools/optimize/modify_model_interface.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include #include +#include #include "absl/memory/memory.h" #include "flatbuffers/flexbuffers.h" @@ -92,16 +93,19 @@ std::vector GetInputTensors(const TensorType& input_type, "modify_model_interface currently only supports " "int8 and int16 quantized models."); } - // Usually the input model has to have the same quantization layers as the - // ones we're trying to remove. - if (quant_output->type != input_type) { - // An exception from this is when we are setting the input or output - // type to uint8. - if (!(quant_output->type == TensorType_INT8 && - input_type == TensorType_UINT8)) { + + // The input type must be the same as the model quantization type + if (input_type != quant_output->type) { + // An exception, allow for UINT8 input type for INT8 quantized model. + if (!(input_type == TensorType_UINT8 && + quant_output->type == TensorType_INT8)) { TF_LITE_REPORT_ERROR( - error_reporter, - "Model's type incompatible with output type argument."); + error_reporter, + "The %s input type is incompatible with %s quantized models. " + "To resolve this error, change the input_type to a compatible one. " + "See: modify_model_interface.cc", + EnumNameTensorType(input_type), + EnumNameTensorType(quant_output->type)); } } if (quant_output->quantization == nullptr) { @@ -165,12 +169,17 @@ std::vector GetOutputTensors(const TensorType& output_type, "int8 and int16 quantized models."); return {}; } - if (dequant_input->type != output_type) { - if (!(dequant_input->type == TensorType_INT8 && - output_type == TensorType_UINT8)) { + if (output_type != dequant_input->type) { + // An exception, allow for UINT8 input type for INT8 quantized model. + if (!(output_type == TensorType_UINT8 && + dequant_input->type == TensorType_INT8)) { TF_LITE_REPORT_ERROR( - error_reporter, - "Model's type incompatible with output type argument."); + error_reporter, + "The %s output type is incompatible with %s quantized models. " + "To resolve this error, change the output_type to a compatible one. " + "See: modify_model_interface.cc", + EnumNameTensorType(output_type), + EnumNameTensorType(dequant_input->type)); } } if (dequant_input->quantization == nullptr) { From 240fd79798f82184aaf60d88e94caf360c7ca56d Mon Sep 17 00:00:00 2001 From: Tamas Nyiri Date: Mon, 15 Jun 2020 10:23:18 +0100 Subject: [PATCH 0039/2522] removed unnecessary override type --- .../tools/optimize/modify_model_interface.cc | 24 +------------------ 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/tensorflow/lite/tools/optimize/modify_model_interface.cc b/tensorflow/lite/tools/optimize/modify_model_interface.cc index 785abc2ae97..643e9493ce7 100644 --- a/tensorflow/lite/tools/optimize/modify_model_interface.cc +++ b/tensorflow/lite/tools/optimize/modify_model_interface.cc @@ -405,30 +405,8 @@ TfLiteStatus ModifyModelInterface(const string& input_file, absl::make_unique(); flatbuffers::FlatBufferBuilder builder; - tflite::TensorType input_override_type; - tflite::TensorType output_override_type; - - switch (input_type) { - case tflite::TensorType_UINT8: - case tflite::TensorType_INT8: - case tflite::TensorType_INT16: - input_override_type = input_type; - break; - default: - return kTfLiteError; - } - switch (output_type) { - case tflite::TensorType_UINT8: - case tflite::TensorType_INT8: - case tflite::TensorType_INT16: - output_override_type = output_type; - break; - default: - return kTfLiteError; - } - auto status = ModifyModelInterface(&builder, tflite_model.get(), - input_override_type, output_override_type); + input_type, output_type); TFLITE_DCHECK_EQ(status, kTfLiteOk); WriteFile(output_file, builder.GetBufferPointer(), builder.GetSize()); From 9134fbb13794865a45288d2e722ad47c362e0ae4 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Thu, 18 Jun 2020 23:13:11 +0000 Subject: [PATCH 0040/2522] summary_op needs tstring C API sync --- tensorflow/c/kernels/diff.patch | 0 tensorflow/c/kernels/ops/summary.cc | 70 ++++++++++ tensorflow/c/kernels/summary_op.cc | 171 ++++++++++++++++++++++++ tensorflow/c/kernels/summary_op_test.cc | 96 +++++++++++++ 4 files changed, 337 insertions(+) create mode 100644 tensorflow/c/kernels/diff.patch create mode 100644 tensorflow/c/kernels/ops/summary.cc create mode 100644 tensorflow/c/kernels/summary_op.cc create mode 100644 tensorflow/c/kernels/summary_op_test.cc diff --git a/tensorflow/c/kernels/diff.patch b/tensorflow/c/kernels/diff.patch new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tensorflow/c/kernels/ops/summary.cc b/tensorflow/c/kernels/ops/summary.cc new file mode 100644 index 00000000000..550a663d006 --- /dev/null +++ b/tensorflow/c/kernels/ops/summary.cc @@ -0,0 +1,70 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include + +#include "tensorflow/c/ops.h" +#include "tensorflow/core/framework/selective_registration.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/macros.h" + + +static void TF_ScalarSummary_shape_inference_fn(TF_ShapeInferenceContext* ctx, + TF_Status* status) { + TF_ShapeHandle* result = TF_NewShapeHandle(); + // TODO: what to do in the case of unknown input shape? + if (TF_GetCode(status) == TF_OK && + !TF_ShapeInferenceContextRankKnown(ctx, result)) { + TF_ShapeInferenceContextSetUnknownShape(ctx, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) + << "Error while setting unknown shape function"; + TF_DeleteShapeHandle(result); + return; + } + // make shape handle a scalar value (empty shape) + if (TF_GetCode(status) == TF_OK) { + TF_ShapeInferenceContextSetOutput(ctx, 0, result, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) + << "Error while setting shape function"; + } + TF_DeleteShapeHandle(result); +} + +void Register_ScalarSummaryOp() { + TF_Status* status = TF_NewStatus(); + + TF_OpDefinitionBuilder* op_builder = TF_NewOpDefinitionBuilder("SummaryScalar"); + TF_OpDefinitionBuilderAddInput(op_builder, "tags: string"); + TF_OpDefinitionBuilderAddInput(op_builder, "values: T"); + TF_OpDefinitionBuilderAddOutput(op_builder, "summary: string"); + TF_OpDefinitionBuilderAddAttr( + op_builder, + "T: realnumbertype"); + TF_OpDefinitionBuilderSetShapeInferenceFunction(op_builder, + &TF_ScalarSummary_shape_inference_fn); + + TF_RegisterOpDefinition(op_builder, status); + CHECK_EQ(TF_GetCode(status), TF_OK) + << "TF_ScalarSummary op registration failed: " << TF_Message(status); + TF_DeleteStatus(status); +} + +TF_ATTRIBUTE_UNUSED static bool SummaryScalarOpRegistered = []() { + if (SHOULD_REGISTER_OP("SummaryScalar")) { + Register_ScalarSummaryOp(); + } + return true; +}(); diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc new file mode 100644 index 00000000000..3a78d321d75 --- /dev/null +++ b/tensorflow/c/kernels/summary_op.cc @@ -0,0 +1,171 @@ + +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/c/kernels.h" +#include "tensorflow/c/ops.h" +#include "tensorflow/c/tf_tensor.h" +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/selective_registration.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/framework/summary.pb.h" +#include "tensorflow/core/platform/protobuf.h" +#include "tensorflow/core/framework/register_types.h" + +#include "tensorflow/core/framework/types.h" + +// BitcastOp implements a bitcast kernel, creating an output tensor that shares +// the same data buffer as the input but with a different shape and/or data +// type. Its inputs are: +// +// * the input tensor +// * an attribute named "T" containing the TF_DataType of the input tensor +// * an attribute named "type" containing the TF_DataType of the output tensor +// +// Given an input tensor of shape [...], if the input DataType "T" is larger +// than the output DataType "type", then the shape changes from [...] +// to [..., sizeof(T)/sizeof(type)]. +// +// If "T" is smaller than "type", the operator requires that the rightmost +// dimension be equal to sizeof(type)/sizeof(T). The shape then goes from +// [..., sizeof(type)/sizeof(T)] to [...]. +// +// Bitcast is implemented as a low-level cast, so machines with different endian +// orderings will give different results. + +static void* SummaryScalarOp_Create(TF_OpKernelConstruction* ctx) { + // TODO: replace with a void* pointer type later + int a = 4; + return static_cast(&a); +} + +static void SummaryScalarOp_Delete(void* kernel) { + return; +} + +bool IsSameSize(TF_Tensor* tensor1, TF_Tensor* tensor2){ + if (TF_NumDims(tensor1) != TF_NumDims(tensor2)){ + return false; + } + for(int d = 0; d < TF_NumDims(tensor1); d++){ + if (TF_Dim(tensor1, d) != TF_Dim(tensor2, d)){ + return false; + } + } + return true; +} + +template +static void SummaryScalarOp_Compute(void* kernel, TF_OpKernelContext* ctx) { + TF_Tensor* tags; + TF_Tensor* values; + TF_Status* status = TF_NewStatus(); + TF_GetInput(ctx, 0, &tags, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) + << "Error while getting input"; + if (TF_GetCode(status) == TF_OK){ + TF_GetInput(ctx, 1, &values, status); + } + CHECK_EQ(TF_OK, TF_GetCode(status)) + << "Error while getting input"; + if (TF_GetCode(status) == TF_OK) { + if (!IsSameSize(tags, values)) { + std::ostringstream err; + err << "tags and values not the same shape: "; + TF_SetStatus(status, TF_INVALID_ARGUMENT, err.str().c_str()); + } + } + + tensorflow::Summary s; + if (TF_GetCode(status) == TF_OK) { + auto Ttags_array = static_cast(TF_TensorData(tags)); + auto values_array = static_cast(TF_TensorData(values)); + for (int i = 0; i < TF_TensorElementCount(tags); ++i){ + tensorflow::Summary::Value* v = s.add_value(); + TF_TString_Init(Ttags_array[i]); + v->set_tag(TF_TString_GetDataPointer(Ttags_array[i]), TF_TString_GetSize(Ttags_array[i])); + v->set_simple_value(float(values_array[i])); + } + + + // TF_Tensor* summary_tensor = TF_AllocateOutput(ctx, 0, TF_ExpectedOutputDataType(ctx, 0), 0, 0) + + // TF_Tensor* output = TF_AllocateTensor(k->output_data_type, dims, 0, + // TF_DataTypeSize(k->output_data_type)); + // if (TF_GetCode(status) == TF_OK) { + // TF_SetOutput(ctx, 0, output, status); + // } + // TF_DeleteTensor(output); + } + + // if (TF_GetCode(status) != TF_OK) { + // TF_OpKernelContext_Failure(ctx, status); + // } + // TF_DeleteStatus(status); + // TF_DeleteTensor(tags); +} + +template +void RegisterSummaryScalarOpKernel() { + TF_Status* status = TF_NewStatus(); + { + auto* builder = TF_NewKernelBuilder("SummaryScalar", tensorflow::DEVICE_CPU, + &SummaryScalarOp_Create, &SummaryScalarOp_Compute, + &SummaryScalarOp_Delete); + TF_KernelBuilder_TypeConstraint(builder, "T", static_cast(tensorflow::DataTypeToEnum::v()), status); + CHECK_EQ(TF_OK, TF_GetCode(status)) + << "Error while adding type constraint"; + TF_RegisterKernelBuilder("SummaryScalar", builder, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) + << "Error while registering Summary Scalar kernel"; + } +// template +// #if GOOGLE_CUDA +// { +// auto* builder = TF_NewKernelBuilder("SummaryScalar", tensorflow::DEVICE_GPU, +// &SummaryScalarOp_Create, &SummaryScalarOp_Compute, +// &SummaryScalarOp_Delete); +// TF_RegisterKernelBuilder("SummaryScalar", builder, status); +// CHECK_EQ(TF_OK, TF_GetCode(status)) +// << "Error while registering CUDA SummaryScalar kernel"; +// } +// #endif + + TF_DeleteStatus(status); +} + +// A dummy static variable initialized by a lambda whose side-effect is to +// register the bitcast kernel. + + +TF_ATTRIBUTE_UNUSED static bool IsSummaryScalarOpKernelRegistered = []() { + if (SHOULD_REGISTER_OP_KERNEL("SummaryScalar")) { + RegisterSummaryScalarOpKernel(); + RegisterSummaryScalarOpKernel(); + RegisterSummaryScalarOpKernel(); + RegisterSummaryScalarOpKernel(); + RegisterSummaryScalarOpKernel(); + RegisterSummaryScalarOpKernel(); + RegisterSummaryScalarOpKernel(); + RegisterSummaryScalarOpKernel(); + RegisterSummaryScalarOpKernel(); + } + return true; +}(); + diff --git a/tensorflow/c/kernels/summary_op_test.cc b/tensorflow/c/kernels/summary_op_test.cc new file mode 100644 index 00000000000..fd6199abd6c --- /dev/null +++ b/tensorflow/c/kernels/summary_op_test.cc @@ -0,0 +1,96 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/attr_value_util.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/platform/test.h" + +#include +#include +#include +namespace tensorflow { +namespace { + +class DummyDevice : public DeviceBase { + public: + explicit DummyDevice(Env* env) : DeviceBase(env) {} + Allocator* GetAllocator(AllocatorAttributes /*attr*/) override { + return cpu_allocator(); + } +}; + +void TestScalarSummaryOp(Tensor* tags, Tensor* values, error::Code expected_code) { + Status status; + NodeDef def; + def.set_op("SummaryScalar"); + + def.set_device(DEVICE_CPU); + + AttrValue valuesTypeAttr; + SetAttrValue(values->dtype(), &valuesTypeAttr); + (*def.mutable_attr())["T"] = valuesTypeAttr; + + def.add_input( + strings::StrCat("input1: ", DataTypeString(tags->dtype()))); + def.add_input( + strings::StrCat("input2: ", DataTypeString(values->dtype()))); + + std::unique_ptr kernel = + CreateOpKernel(DeviceType(DEVICE_CPU), nullptr, nullptr, def, 1, &status); + ASSERT_TRUE(status.ok()) << status.ToString(); + OpKernelContext::Params params; + DummyDevice dummy_device(nullptr); + params.device = &dummy_device; + params.op_kernel = kernel.get(); + gtl::InlinedVector inputs; + inputs.emplace_back(tags); + inputs.emplace_back(values); + params.inputs = &inputs; + OpKernelContext ctx(¶ms, 1); + kernel->Compute(&ctx); + + ASSERT_EQ(expected_code, ctx.status().code()); + if (expected_code == error::OK) { + ASSERT_EQ(true, false) + << ctx.mutable_output(0)->shape().DebugString(); + } +} + +TEST(ScalarSummaryOpTest, Test) { + int vectorSize = 2; + Tensor tags(DT_STRING, {vectorSize}); + Tensor values(DT_FLOAT, {vectorSize}); + for (int i = 0; i < vectorSize; ++i){ + values.vec()(i) = static_cast(i); + } + tags.vec()(0) = "tag 1"; + tags.vec()(1) = "tag 2"; + TestScalarSummaryOp(&tags, &values, error::INVALID_ARGUMENT); +} + + +PartialTensorShape S(std::initializer_list dims) { + return PartialTensorShape(dims); +} + + + +} // namespace +} // namespace tensorflow From 6c82e7b9ab9b9f21508e2c0c1efe1209b16d0b83 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Thu, 18 Jun 2020 23:21:30 +0000 Subject: [PATCH 0041/2522] fixed comments --- ...-summary_op-needs-tstring-C-API-sync.patch | 377 ++++++++++++++++++ tensorflow/c/kernels/summary_op.cc | 21 +- 2 files changed, 379 insertions(+), 19 deletions(-) create mode 100644 tensorflow/c/kernels/0001-summary_op-needs-tstring-C-API-sync.patch diff --git a/tensorflow/c/kernels/0001-summary_op-needs-tstring-C-API-sync.patch b/tensorflow/c/kernels/0001-summary_op-needs-tstring-C-API-sync.patch new file mode 100644 index 00000000000..856f4a554c3 --- /dev/null +++ b/tensorflow/c/kernels/0001-summary_op-needs-tstring-C-API-sync.patch @@ -0,0 +1,377 @@ +From 9134fbb13794865a45288d2e722ad47c362e0ae4 Mon Sep 17 00:00:00 2001 +From: Daniel Nguyen +Date: Thu, 18 Jun 2020 23:13:11 +0000 +Subject: [PATCH] summary_op needs tstring C API sync + +--- + tensorflow/c/kernels/diff.patch | 0 + tensorflow/c/kernels/ops/summary.cc | 70 ++++++++++ + tensorflow/c/kernels/summary_op.cc | 171 ++++++++++++++++++++++++ + tensorflow/c/kernels/summary_op_test.cc | 96 +++++++++++++ + 4 files changed, 337 insertions(+) + create mode 100644 tensorflow/c/kernels/diff.patch + create mode 100644 tensorflow/c/kernels/ops/summary.cc + create mode 100644 tensorflow/c/kernels/summary_op.cc + create mode 100644 tensorflow/c/kernels/summary_op_test.cc + +diff --git a/tensorflow/c/kernels/diff.patch b/tensorflow/c/kernels/diff.patch +new file mode 100644 +index 0000000000..e69de29bb2 +diff --git a/tensorflow/c/kernels/ops/summary.cc b/tensorflow/c/kernels/ops/summary.cc +new file mode 100644 +index 0000000000..550a663d00 +--- /dev/null ++++ b/tensorflow/c/kernels/ops/summary.cc +@@ -0,0 +1,70 @@ ++/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. ++ ++Licensed under the Apache License, Version 2.0 (the "License"); ++you may not use this file except in compliance with the License. ++You may obtain a copy of the License at ++ ++ http://www.apache.org/licenses/LICENSE-2.0 ++ ++Unless required by applicable law or agreed to in writing, software ++distributed under the License is distributed on an "AS IS" BASIS, ++WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++See the License for the specific language governing permissions and ++limitations under the License. ++==============================================================================*/ ++ ++#include ++#include ++ ++#include "tensorflow/c/ops.h" ++#include "tensorflow/core/framework/selective_registration.h" ++#include "tensorflow/core/platform/logging.h" ++#include "tensorflow/core/platform/macros.h" ++ ++ ++static void TF_ScalarSummary_shape_inference_fn(TF_ShapeInferenceContext* ctx, ++ TF_Status* status) { ++ TF_ShapeHandle* result = TF_NewShapeHandle(); ++ // TODO: what to do in the case of unknown input shape? ++ if (TF_GetCode(status) == TF_OK && ++ !TF_ShapeInferenceContextRankKnown(ctx, result)) { ++ TF_ShapeInferenceContextSetUnknownShape(ctx, status); ++ CHECK_EQ(TF_OK, TF_GetCode(status)) ++ << "Error while setting unknown shape function"; ++ TF_DeleteShapeHandle(result); ++ return; ++ } ++ // make shape handle a scalar value (empty shape) ++ if (TF_GetCode(status) == TF_OK) { ++ TF_ShapeInferenceContextSetOutput(ctx, 0, result, status); ++ CHECK_EQ(TF_OK, TF_GetCode(status)) ++ << "Error while setting shape function"; ++ } ++ TF_DeleteShapeHandle(result); ++} ++ ++void Register_ScalarSummaryOp() { ++ TF_Status* status = TF_NewStatus(); ++ ++ TF_OpDefinitionBuilder* op_builder = TF_NewOpDefinitionBuilder("SummaryScalar"); ++ TF_OpDefinitionBuilderAddInput(op_builder, "tags: string"); ++ TF_OpDefinitionBuilderAddInput(op_builder, "values: T"); ++ TF_OpDefinitionBuilderAddOutput(op_builder, "summary: string"); ++ TF_OpDefinitionBuilderAddAttr( ++ op_builder, ++ "T: realnumbertype"); ++ TF_OpDefinitionBuilderSetShapeInferenceFunction(op_builder, ++ &TF_ScalarSummary_shape_inference_fn); ++ ++ TF_RegisterOpDefinition(op_builder, status); ++ CHECK_EQ(TF_GetCode(status), TF_OK) ++ << "TF_ScalarSummary op registration failed: " << TF_Message(status); ++ TF_DeleteStatus(status); ++} ++ ++TF_ATTRIBUTE_UNUSED static bool SummaryScalarOpRegistered = []() { ++ if (SHOULD_REGISTER_OP("SummaryScalar")) { ++ Register_ScalarSummaryOp(); ++ } ++ return true; ++}(); +diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc +new file mode 100644 +index 0000000000..3a78d321d7 +--- /dev/null ++++ b/tensorflow/c/kernels/summary_op.cc +@@ -0,0 +1,171 @@ ++ ++/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. ++ ++Licensed under the Apache License, Version 2.0 (the "License"); ++you may not use this file except in compliance with the License. ++You may obtain a copy of the License at ++ ++ http://www.apache.org/licenses/LICENSE-2.0 ++ ++Unless required by applicable law or agreed to in writing, software ++distributed under the License is distributed on an "AS IS" BASIS, ++WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++See the License for the specific language governing permissions and ++limitations under the License. ++==============================================================================*/ ++ ++#include ++ ++#include "tensorflow/c/kernels.h" ++#include "tensorflow/c/ops.h" ++#include "tensorflow/c/tf_tensor.h" ++#include "tensorflow/core/framework/common_shape_fns.h" ++#include "tensorflow/core/framework/op.h" ++#include "tensorflow/core/framework/selective_registration.h" ++#include "tensorflow/core/framework/shape_inference.h" ++#include "tensorflow/core/platform/macros.h" ++#include "tensorflow/core/framework/summary.pb.h" ++#include "tensorflow/core/platform/protobuf.h" ++#include "tensorflow/core/framework/register_types.h" ++ ++#include "tensorflow/core/framework/types.h" ++ ++// BitcastOp implements a bitcast kernel, creating an output tensor that shares ++// the same data buffer as the input but with a different shape and/or data ++// type. Its inputs are: ++// ++// * the input tensor ++// * an attribute named "T" containing the TF_DataType of the input tensor ++// * an attribute named "type" containing the TF_DataType of the output tensor ++// ++// Given an input tensor of shape [...], if the input DataType "T" is larger ++// than the output DataType "type", then the shape changes from [...] ++// to [..., sizeof(T)/sizeof(type)]. ++// ++// If "T" is smaller than "type", the operator requires that the rightmost ++// dimension be equal to sizeof(type)/sizeof(T). The shape then goes from ++// [..., sizeof(type)/sizeof(T)] to [...]. ++// ++// Bitcast is implemented as a low-level cast, so machines with different endian ++// orderings will give different results. ++ ++static void* SummaryScalarOp_Create(TF_OpKernelConstruction* ctx) { ++ // TODO: replace with a void* pointer type later ++ int a = 4; ++ return static_cast(&a); ++} ++ ++static void SummaryScalarOp_Delete(void* kernel) { ++ return; ++} ++ ++bool IsSameSize(TF_Tensor* tensor1, TF_Tensor* tensor2){ ++ if (TF_NumDims(tensor1) != TF_NumDims(tensor2)){ ++ return false; ++ } ++ for(int d = 0; d < TF_NumDims(tensor1); d++){ ++ if (TF_Dim(tensor1, d) != TF_Dim(tensor2, d)){ ++ return false; ++ } ++ } ++ return true; ++} ++ ++template ++static void SummaryScalarOp_Compute(void* kernel, TF_OpKernelContext* ctx) { ++ TF_Tensor* tags; ++ TF_Tensor* values; ++ TF_Status* status = TF_NewStatus(); ++ TF_GetInput(ctx, 0, &tags, status); ++ CHECK_EQ(TF_OK, TF_GetCode(status)) ++ << "Error while getting input"; ++ if (TF_GetCode(status) == TF_OK){ ++ TF_GetInput(ctx, 1, &values, status); ++ } ++ CHECK_EQ(TF_OK, TF_GetCode(status)) ++ << "Error while getting input"; ++ if (TF_GetCode(status) == TF_OK) { ++ if (!IsSameSize(tags, values)) { ++ std::ostringstream err; ++ err << "tags and values not the same shape: "; ++ TF_SetStatus(status, TF_INVALID_ARGUMENT, err.str().c_str()); ++ } ++ } ++ ++ tensorflow::Summary s; ++ if (TF_GetCode(status) == TF_OK) { ++ auto Ttags_array = static_cast(TF_TensorData(tags)); ++ auto values_array = static_cast(TF_TensorData(values)); ++ for (int i = 0; i < TF_TensorElementCount(tags); ++i){ ++ tensorflow::Summary::Value* v = s.add_value(); ++ TF_TString_Init(Ttags_array[i]); ++ v->set_tag(TF_TString_GetDataPointer(Ttags_array[i]), TF_TString_GetSize(Ttags_array[i])); ++ v->set_simple_value(float(values_array[i])); ++ } ++ ++ ++ // TF_Tensor* summary_tensor = TF_AllocateOutput(ctx, 0, TF_ExpectedOutputDataType(ctx, 0), 0, 0) ++ ++ // TF_Tensor* output = TF_AllocateTensor(k->output_data_type, dims, 0, ++ // TF_DataTypeSize(k->output_data_type)); ++ // if (TF_GetCode(status) == TF_OK) { ++ // TF_SetOutput(ctx, 0, output, status); ++ // } ++ // TF_DeleteTensor(output); ++ } ++ ++ // if (TF_GetCode(status) != TF_OK) { ++ // TF_OpKernelContext_Failure(ctx, status); ++ // } ++ // TF_DeleteStatus(status); ++ // TF_DeleteTensor(tags); ++} ++ ++template ++void RegisterSummaryScalarOpKernel() { ++ TF_Status* status = TF_NewStatus(); ++ { ++ auto* builder = TF_NewKernelBuilder("SummaryScalar", tensorflow::DEVICE_CPU, ++ &SummaryScalarOp_Create, &SummaryScalarOp_Compute, ++ &SummaryScalarOp_Delete); ++ TF_KernelBuilder_TypeConstraint(builder, "T", static_cast(tensorflow::DataTypeToEnum::v()), status); ++ CHECK_EQ(TF_OK, TF_GetCode(status)) ++ << "Error while adding type constraint"; ++ TF_RegisterKernelBuilder("SummaryScalar", builder, status); ++ CHECK_EQ(TF_OK, TF_GetCode(status)) ++ << "Error while registering Summary Scalar kernel"; ++ } ++// template ++// #if GOOGLE_CUDA ++// { ++// auto* builder = TF_NewKernelBuilder("SummaryScalar", tensorflow::DEVICE_GPU, ++// &SummaryScalarOp_Create, &SummaryScalarOp_Compute, ++// &SummaryScalarOp_Delete); ++// TF_RegisterKernelBuilder("SummaryScalar", builder, status); ++// CHECK_EQ(TF_OK, TF_GetCode(status)) ++// << "Error while registering CUDA SummaryScalar kernel"; ++// } ++// #endif ++ ++ TF_DeleteStatus(status); ++} ++ ++// A dummy static variable initialized by a lambda whose side-effect is to ++// register the bitcast kernel. ++ ++ ++TF_ATTRIBUTE_UNUSED static bool IsSummaryScalarOpKernelRegistered = []() { ++ if (SHOULD_REGISTER_OP_KERNEL("SummaryScalar")) { ++ RegisterSummaryScalarOpKernel(); ++ RegisterSummaryScalarOpKernel(); ++ RegisterSummaryScalarOpKernel(); ++ RegisterSummaryScalarOpKernel(); ++ RegisterSummaryScalarOpKernel(); ++ RegisterSummaryScalarOpKernel(); ++ RegisterSummaryScalarOpKernel(); ++ RegisterSummaryScalarOpKernel(); ++ RegisterSummaryScalarOpKernel(); ++ } ++ return true; ++}(); ++ +diff --git a/tensorflow/c/kernels/summary_op_test.cc b/tensorflow/c/kernels/summary_op_test.cc +new file mode 100644 +index 0000000000..fd6199abd6 +--- /dev/null ++++ b/tensorflow/c/kernels/summary_op_test.cc +@@ -0,0 +1,96 @@ ++/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. ++ ++Licensed under the Apache License, Version 2.0 (the "License"); ++you may not use this file except in compliance with the License. ++You may obtain a copy of the License at ++ ++ http://www.apache.org/licenses/LICENSE-2.0 ++ ++Unless required by applicable law or agreed to in writing, software ++distributed under the License is distributed on an "AS IS" BASIS, ++WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++See the License for the specific language governing permissions and ++limitations under the License. ++==============================================================================*/ ++ ++#include "tensorflow/core/framework/attr_value.pb.h" ++#include "tensorflow/core/framework/attr_value_util.h" ++#include "tensorflow/core/framework/fake_input.h" ++#include "tensorflow/core/framework/node_def.pb.h" ++#include "tensorflow/core/framework/node_def_builder.h" ++#include "tensorflow/core/framework/op_kernel.h" ++#include "tensorflow/core/framework/shape_inference.h" ++#include "tensorflow/core/platform/test.h" ++ ++#include ++#include ++#include ++namespace tensorflow { ++namespace { ++ ++class DummyDevice : public DeviceBase { ++ public: ++ explicit DummyDevice(Env* env) : DeviceBase(env) {} ++ Allocator* GetAllocator(AllocatorAttributes /*attr*/) override { ++ return cpu_allocator(); ++ } ++}; ++ ++void TestScalarSummaryOp(Tensor* tags, Tensor* values, error::Code expected_code) { ++ Status status; ++ NodeDef def; ++ def.set_op("SummaryScalar"); ++ ++ def.set_device(DEVICE_CPU); ++ ++ AttrValue valuesTypeAttr; ++ SetAttrValue(values->dtype(), &valuesTypeAttr); ++ (*def.mutable_attr())["T"] = valuesTypeAttr; ++ ++ def.add_input( ++ strings::StrCat("input1: ", DataTypeString(tags->dtype()))); ++ def.add_input( ++ strings::StrCat("input2: ", DataTypeString(values->dtype()))); ++ ++ std::unique_ptr kernel = ++ CreateOpKernel(DeviceType(DEVICE_CPU), nullptr, nullptr, def, 1, &status); ++ ASSERT_TRUE(status.ok()) << status.ToString(); ++ OpKernelContext::Params params; ++ DummyDevice dummy_device(nullptr); ++ params.device = &dummy_device; ++ params.op_kernel = kernel.get(); ++ gtl::InlinedVector inputs; ++ inputs.emplace_back(tags); ++ inputs.emplace_back(values); ++ params.inputs = &inputs; ++ OpKernelContext ctx(¶ms, 1); ++ kernel->Compute(&ctx); ++ ++ ASSERT_EQ(expected_code, ctx.status().code()); ++ if (expected_code == error::OK) { ++ ASSERT_EQ(true, false) ++ << ctx.mutable_output(0)->shape().DebugString(); ++ } ++} ++ ++TEST(ScalarSummaryOpTest, Test) { ++ int vectorSize = 2; ++ Tensor tags(DT_STRING, {vectorSize}); ++ Tensor values(DT_FLOAT, {vectorSize}); ++ for (int i = 0; i < vectorSize; ++i){ ++ values.vec()(i) = static_cast(i); ++ } ++ tags.vec()(0) = "tag 1"; ++ tags.vec()(1) = "tag 2"; ++ TestScalarSummaryOp(&tags, &values, error::INVALID_ARGUMENT); ++} ++ ++ ++PartialTensorShape S(std::initializer_list dims) { ++ return PartialTensorShape(dims); ++} ++ ++ ++ ++} // namespace ++} // namespace tensorflow +-- +2.27.0.111.gc72c7da667-goog + diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index 3a78d321d75..6921eb4fdaa 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -30,24 +30,7 @@ limitations under the License. #include "tensorflow/core/framework/types.h" -// BitcastOp implements a bitcast kernel, creating an output tensor that shares -// the same data buffer as the input but with a different shape and/or data -// type. Its inputs are: -// -// * the input tensor -// * an attribute named "T" containing the TF_DataType of the input tensor -// * an attribute named "type" containing the TF_DataType of the output tensor -// -// Given an input tensor of shape [...], if the input DataType "T" is larger -// than the output DataType "type", then the shape changes from [...] -// to [..., sizeof(T)/sizeof(type)]. -// -// If "T" is smaller than "type", the operator requires that the rightmost -// dimension be equal to sizeof(type)/sizeof(T). The shape then goes from -// [..., sizeof(type)/sizeof(T)] to [...]. -// -// Bitcast is implemented as a low-level cast, so machines with different endian -// orderings will give different results. +// TODO: Copy over Summary Scalar Op Doc static void* SummaryScalarOp_Create(TF_OpKernelConstruction* ctx) { // TODO: replace with a void* pointer type later @@ -91,7 +74,7 @@ static void SummaryScalarOp_Compute(void* kernel, TF_OpKernelContext* ctx) { TF_SetStatus(status, TF_INVALID_ARGUMENT, err.str().c_str()); } } - + // Copy tag and string data into summary protobuf tensorflow::Summary s; if (TF_GetCode(status) == TF_OK) { auto Ttags_array = static_cast(TF_TensorData(tags)); From 2d613a9c700f1748ba2ad50c16de05cc9b9d794b Mon Sep 17 00:00:00 2001 From: Yixing Fu Date: Thu, 18 Jun 2020 22:01:13 -0400 Subject: [PATCH 0042/2522] add test case for save/load dir as pathlib --- tensorflow/python/keras/saving/save_test.py | 28 +++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tensorflow/python/keras/saving/save_test.py b/tensorflow/python/keras/saving/save_test.py index 5c5846fe738..5b5da8c5047 100644 --- a/tensorflow/python/keras/saving/save_test.py +++ b/tensorflow/python/keras/saving/save_test.py @@ -71,6 +71,13 @@ class TestSaveModel(test.TestCase, parameterized.TestCase): save.save_model(self.model, path) self.assert_saved_model(path) + @test_util.run_v2_only + def test_save_format_defaults_pathlib(self): + if sys.version_info >= (3, 6): + path = pathlib.Path(self.get_temp_dir()) / 'model_path' + save.save_model(self.model, path) + self.assert_saved_model(path) + @test_util.run_v2_only def test_save_hdf5(self): path = os.path.join(self.get_temp_dir(), 'model') @@ -81,6 +88,13 @@ class TestSaveModel(test.TestCase, parameterized.TestCase): 'requires the model to be a Functional model or a Sequential model.'): save.save_model(self.subclassed_model, path, save_format='h5') + @test_util.run_v2_only + def test_save_load_hdf5_pathlib(self): + if sys.version_info >= (3, 6): + path = pathlib.Path(self.get_temp_dir()) / 'model' + save.save_model(self.model, path, save_format='h5') + save.load_model(path) + @test_util.run_v2_only def test_save_tf(self): path = os.path.join(self.get_temp_dir(), 'model') @@ -105,6 +119,20 @@ class TestSaveModel(test.TestCase, parameterized.TestCase): save.save_model(self.model, path, save_format='tf') save.load_model(path) + @test_util.run_v2_only + def test_save_load_weights_tf_pathlib(self): + if sys.version_info >= (3, 6): + path = pathlib.Path(self.get_temp_dir()) / 'model' + self.model.save_weights(path, save_format='tf') + self.model.load_weights(path) + + @test_util.run_v2_only + def test_save_load_weights_hdf5_pathlib(self): + if sys.version_info >= (3, 6): + path = pathlib.Path(self.get_temp_dir()) / 'model' + self.model.save_weights(path, save_format='h5') + self.model.load_weights(path) + @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_saving_with_dense_features(self): cols = [ From 5135cf8f6395956551dd2cd1fed79c4ee6ef0251 Mon Sep 17 00:00:00 2001 From: Yixing Fu Date: Thu, 18 Jun 2020 22:02:06 -0400 Subject: [PATCH 0043/2522] cast pathlike objects to string --- tensorflow/python/saved_model/loader_impl.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/saved_model/loader_impl.py b/tensorflow/python/saved_model/loader_impl.py index 2df2bea428e..06cd988130d 100644 --- a/tensorflow/python/saved_model/loader_impl.py +++ b/tensorflow/python/saved_model/loader_impl.py @@ -73,7 +73,7 @@ def parse_saved_model(export_dir): """Reads the savedmodel.pb or savedmodel.pbtxt file containing `SavedModel`. Args: - export_dir: Directory containing the SavedModel file. + export_dir: String or Pathlike, path to the directory containing the SavedModel file. Returns: A `SavedModel` protocol buffer. @@ -83,11 +83,11 @@ def parse_saved_model(export_dir): """ # Build the path to the SavedModel in pbtxt format. path_to_pbtxt = os.path.join( - compat.as_bytes(export_dir), + compat.as_bytes(compat.path_to_str(export_dir)), compat.as_bytes(constants.SAVED_MODEL_FILENAME_PBTXT)) # Build the path to the SavedModel in pb format. path_to_pb = os.path.join( - compat.as_bytes(export_dir), + compat.as_bytes(compat.path_to_str(export_dir)), compat.as_bytes(constants.SAVED_MODEL_FILENAME_PB)) # Parse the SavedModel protocol buffer. From 0fd550c5e6949402775381c551b93596770911f1 Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Fri, 19 Jun 2020 11:33:10 +0800 Subject: [PATCH 0044/2522] modification based on reviews --- .../grappler/optimizers/data/hoist_discard.cc | 89 ++++++++++--------- .../grappler/optimizers/data/hoist_discard.h | 9 +- .../optimization/hoist_discard_test.py | 7 +- .../experimental/ops/optimization_options.py | 5 +- 4 files changed, 61 insertions(+), 49 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/data/hoist_discard.cc b/tensorflow/core/grappler/optimizers/data/hoist_discard.cc index b44a2a1b677..4b04b3882c8 100644 --- a/tensorflow/core/grappler/optimizers/data/hoist_discard.cc +++ b/tensorflow/core/grappler/optimizers/data/hoist_discard.cc @@ -32,33 +32,39 @@ namespace tensorflow { namespace grappler { namespace { -const std::unordered_set kDataDiscarding = { +constexpr char kHoistDiscardOpPrefix[] = "hoist_discard/"; + +constexpr std::array kDataDiscarding = { "ShardDataset", "SkipDataset", "TakeDataset", }; -const std::unordered_set kCardinalityPreserving = { +constexpr std::array kCardinalityPreserving = { "CacheDataset", "CacheDatasetV2", "PrefetchDataset", "MapDataset", "ParallelMapDataset", "ParallelMapDatasetV2", }; bool IsDataDiscarding(const NodeDef& node) { - auto iter = kDataDiscarding.find(node.op()); - if (iter == kDataDiscarding.end()) { - return false; + for (const auto& discard_op : kDataDiscarding) { + if (node.op() == discard_op) { + return true; + } } - return true; + return false; } bool IsCardinalityPreserving(const NodeDef& node) { - auto iter = kCardinalityPreserving.find(node.op()); - if (iter == kCardinalityPreserving.end()) { - return false; + for (const auto& cardinality_preserving_op : kCardinalityPreserving) { + if (node.op() != cardinality_preserving_op) { + continue; + } + // Map ops with preserve_cardinality=false do not qualify. + auto attr_iter = node.attr().find("preserve_cardinality"); + if (attr_iter != node.attr().end() && !attr_iter->second.b()) { + return false; + } + return true; } - auto attr_iter = node.attr().find("preserve_cardinality"); - if (attr_iter != node.attr().end() && !attr_iter->second.b()) { - return false; - } - return true; + return false; } } // namepsace @@ -72,33 +78,36 @@ Status HoistDiscard::OptimizeAndCollectStats(Cluster* cluster, bool updated; do { updated = false; - for (int i = 0; i < graph.graph()->node_size(); i++) { - auto node = graph.graph()->mutable_node(i); - if (IsDataDiscarding(*node)) { - NodeDef* start = node; - NodeDef* start_parent = graph_utils::GetInputNode(*start, graph); - while (IsCardinalityPreserving(*start_parent)) { - start = start_parent; - start_parent = graph_utils::GetInputNode(*start, graph); - } - if (start->name() == node->name()) { - continue; - } - auto parent = graph_utils::GetInputNode(*node, graph); - TF_RETURN_IF_ERROR(graph.UpdateFanouts(node->name(), parent->name())); - if (!absl::StartsWith(node->name(), "hoist_discard/")) { - TF_RETURN_IF_ERROR(graph.UpdateNodeName(node->name(), - strings::StrCat("hoist_discard/", node->name()), false)); - } - for (const auto& attr_name : {"output_types", "output_shapes"}) { - graph_utils::CopyAttribute(attr_name, *start_parent, - node); - } - *node->mutable_input(0) = start_parent->name(); - *start->mutable_input(0) = node->name(); - updated = true; - break; + for (int i = 0; i < graph.graph()->node_size(); ++i) { + NodeDef* discard_node = graph.graph()->mutable_node(i); + if (!IsDataDiscarding(*discard_node)) { + continue; } + NodeDef* start = discard_node; + NodeDef* start_parent = graph_utils::GetInputNode(*start, graph); + while (IsCardinalityPreserving(*start_parent)) { + start = start_parent; + start_parent = graph_utils::GetInputNode(*start, graph); + } + if (start->name() == discard_node->name()) { + continue; + } + NodeDef* parent = graph_utils::GetInputNode(*discard_node, graph); + TF_RETURN_IF_ERROR( + graph.UpdateFanouts(discard_node->name(), parent->name())); + if (!absl::StartsWith(discard_node->name(), kHoistDiscardOpPrefix)) { + TF_RETURN_IF_ERROR(graph.UpdateNodeName(discard_node->name(), + strings::StrCat(kHoistDiscardOpPrefix, discard_node->name()), + false)); + } + for (const auto& attr_name : {"output_types", "output_shapes"}) { + graph_utils::CopyAttribute(attr_name, *start_parent, + discard_node); + } + *discard_node->mutable_input(0) = start_parent->name(); + *start->mutable_input(0) = discard_node->name(); + updated = true; + break; } } while (updated); return Status::OK(); diff --git a/tensorflow/core/grappler/optimizers/data/hoist_discard.h b/tensorflow/core/grappler/optimizers/data/hoist_discard.h index 324d80100a1..e56bde1d804 100644 --- a/tensorflow/core/grappler/optimizers/data/hoist_discard.h +++ b/tensorflow/core/grappler/optimizers/data/hoist_discard.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_HOIST_DATA_DISCARDING_OPS_H_ -#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_HOIST_DATA_DISCARDING_OPS_H_ +#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_HOIST_DISCARD_H_ +#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_HOIST_DISCARD_H_ #include "tensorflow/core/grappler/optimizers/data/optimizer_base.h" @@ -22,7 +22,7 @@ namespace tensorflow { namespace grappler { // This optimization hoists the data discarding ops (such as `skip`, `take` and -// `shard`) to avoid unnecessary computation. +// `shard`) to avoid unnecessary computation. class HoistDiscard : public TFDataOptimizerBase { public: HoistDiscard() = default; @@ -48,5 +48,4 @@ class HoistDiscard : public TFDataOptimizerBase { } // namespace grappler } // namespace tensorflow -#endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_HOIST_DATA_DISCARDING_OPS_H_ - +#endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_HOIST_DISCARD_H_ diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_discard_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_discard_test.py index 06801cb46db..3a7e078b804 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_discard_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_discard_test.py @@ -34,11 +34,12 @@ class HoistDiscardTest(test_base.DatasetTestBase, parameterized.TestCase): dataset = dataset_ops.Dataset.range(100) dataset = dataset.apply( testing.assert_next(["FiniteSkip", "FiniteTake", "Shard", - "ParallelMap", "MemoryCacheImpl"])) + "ParallelMap", "MemoryCacheImpl", "Prefetch"])) dataset = dataset.map( lambda x: x + 1, num_parallel_calls=10) dataset = dataset.skip(10) dataset = dataset.cache() + dataset = dataset.prefetch(1) dataset = dataset.take(50) dataset = dataset.shard(2, 0) options = dataset_ops.Options() @@ -51,13 +52,15 @@ class HoistDiscardTest(test_base.DatasetTestBase, parameterized.TestCase): mode=["eager", "graph"])) def testSimpleHoistingV1(self): dataset = dataset_ops.Dataset.range(100) + # Map ops have preserve_cardinality=false in tensorflow v1. dataset = dataset.apply( testing.assert_next(["ParallelMap", "FiniteSkip", "FiniteTake", - "Shard", "MemoryCacheImpl"])) + "Shard", "MemoryCacheImpl", "Prefetch"])) dataset = dataset.map( lambda x: x + 1, num_parallel_calls=10) dataset = dataset.skip(10) dataset = dataset.cache() + dataset = dataset.prefetch(1) dataset = dataset.take(50) dataset = dataset.shard(2, 0) options = dataset_ops.Options() diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py index d4c19a7ccf4..5ed5bf2ee7e 100644 --- a/tensorflow/python/data/experimental/ops/optimization_options.py +++ b/tensorflow/python/data/experimental/ops/optimization_options.py @@ -130,8 +130,9 @@ class OptimizationOptions(options.OptionsBase): name="hoist_discard", ty=bool, docstring= - "Whether to hoist ops that will discard data (such as skip, take, shard) " - "out of unary cardinality preserved transformations. " + "Whether to hoist ops that will discard data (such as skip, take, shard)" + "out of unary cardinality preserved transformations, e.g. " + "dataset.map(...).take(3) gets optimized to dataset.take(3).map()." "If None, defaults to False.") hoist_random_uniform = options.create_option( From 43442a2c7f449fb979c26e543976a087fddad96c Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Mon, 22 Jun 2020 16:43:58 +0000 Subject: [PATCH 0045/2522] initial map ops set up --- tensorflow/core/BUILD | 2 + tensorflow/core/kernels/map_kernels.cc | 52 ++++++++++++++++++ tensorflow/core/ops/map_ops.cc | 33 ++++++++++++ tensorflow/python/BUILD | 14 +++++ tensorflow/python/kernel_tests/BUILD | 24 +++++++++ .../python/kernel_tests/map_ops_test.py | 39 ++++++++++++++ tensorflow/python/ops/map_ops.py | 53 +++++++++++++++++++ 7 files changed, 217 insertions(+) create mode 100644 tensorflow/core/kernels/map_kernels.cc create mode 100644 tensorflow/core/ops/map_ops.cc create mode 100644 tensorflow/python/kernel_tests/map_ops_test.py create mode 100644 tensorflow/python/ops/map_ops.py diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 2b16801f6ed..66e7061642c 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -628,6 +628,7 @@ tf_gen_op_libs( "io_ops", "linalg_ops", "list_ops", + "map_ops", "lookup_ops", "logging_ops", "manip_ops", @@ -859,6 +860,7 @@ cc_library( ":io_ops_op_lib", ":linalg_ops_op_lib", ":list_ops_op_lib", + ":map_ops_op_lib", ":logging_ops_op_lib", ":lookup_ops_op_lib", ":manip_ops_op_lib", diff --git a/tensorflow/core/kernels/map_kernels.cc b/tensorflow/core/kernels/map_kernels.cc new file mode 100644 index 00000000000..bd6d880aec5 --- /dev/null +++ b/tensorflow/core/kernels/map_kernels.cc @@ -0,0 +1,52 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op_kernel.h" +#include +using namespace std; + +namespace tensorflow { + +class ZeroOutOp : public OpKernel { + public: + explicit ZeroOutOp(OpKernelConstruction* c) : OpKernel(c) {} + + void Compute(OpKernelContext* c) override { + cout << "Hello World - Op" << endl; + // Grab the input tensor + const Tensor& input_tensor = c->input(0); + auto input = input_tensor.flat(); + + // Create an output tensor + Tensor* output_tensor = NULL; + OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(), + &output_tensor)); + auto output_flat = output_tensor->flat(); + + // Set all but the first element of the output tensor to 0 + const int N = input.size(); + for (int i=1; i 0) output_flat(0) = input(0); + } +}; + +REGISTER_KERNEL_BUILDER(Name("ZeroOut").Device(DEVICE_CPU), + ZeroOutOp); + +} // namespace tensorflow diff --git a/tensorflow/core/ops/map_ops.cc b/tensorflow/core/ops/map_ops.cc new file mode 100644 index 00000000000..bdad6d389b0 --- /dev/null +++ b/tensorflow/core/ops/map_ops.cc @@ -0,0 +1,33 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { +namespace { + +REGISTER_OP("ZeroOut") + .Input("to_zero: int32") + .Output("zeroed: int32") + .SetShapeFn([](shape_inference::InferenceContext* c) { + //c->set_output(0, c->Scalar()); + c->set_output(0, c->input(0)); + return Status::OK(); + }); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index e71900b430f..3e59e61ae88 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3000,6 +3000,10 @@ tf_gen_op_wrapper_private_py( name = "list_ops_gen", ) +tf_gen_op_wrapper_private_py( + name = "map_ops_gen", +) + tf_gen_op_wrapper_private_py( name = "script_ops_gen", ) @@ -4175,6 +4179,16 @@ py_library( ], ) +py_library( + name = "map_ops", + srcs = ["ops/map_ops.py"], + srcs_version = "PY2AND3", + deps = [ + ":array_ops", + ":map_ops_gen", + ], +) + py_library( name = "nn", srcs = [ diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index a04c874c9d6..d8c8e3dc2a8 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -142,6 +142,30 @@ cuda_py_test( ], ) +cuda_py_test( + name = "map_ops_test", + size = "small", + srcs = ["map_ops_test.py"], + grpc_enabled = True, + tags = [ + "noasan", # TODO(b/155406705): flaky + ], + deps = [ + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:gradients_impl", + "//tensorflow/python:map_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:tensor_shape", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:def_function", + "//third_party/py/numpy", + "@absl_py//absl/testing:parameterized", + ], +) + cuda_py_test( name = "benchmark_test", size = "small", diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py new file mode 100644 index 00000000000..28cadf0a6df --- /dev/null +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -0,0 +1,39 @@ +# Copyright 2018 The Sonnet Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Tests for zero_out ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.platform import test +#try: +# from tensorflow_zero_out.python.ops.zero_out_ops import zero_out +#except ImportError: +# from zero_out_ops import zero_out +from tensorflow.python.ops import map_ops + +class ZeroOutTest(test.TestCase): + + def testZeroOut(self): + print("Hello World - Test") + with self.test_session(): + self.assertAllClose( + zero_out([[1, 2], [3, 4]]), np.array([[1, 0], [0, 0]])) + + +if __name__ == '__main__': + test.main() \ No newline at end of file diff --git a/tensorflow/python/ops/map_ops.py b/tensorflow/python/ops/map_ops.py new file mode 100644 index 00000000000..93f4cc0bdc8 --- /dev/null +++ b/tensorflow/python/ops/map_ops.py @@ -0,0 +1,53 @@ +# Copyright 2018 The Sonnet Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Use zero_out ops in python.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import load_library +from tensorflow.python.platform import resource_loader +# go/tf-wildcard-import +# pylint: disable=wildcard-import +from tensorflow.python.ops import gen_map_ops +from tensorflow.python.ops.gen_map_ops import * + +#zero_out_ops = load_library.load_op_library( +# resource_loader.get_path_to_datafile('_zero_out_ops.so')) +#zero_out = zero_out_ops.zero_out + +def zero_out(to_zero): + print("Hello World - PythonS Op") + return gen_map_ops.zero_out(to_zero) + +@ops.RegisterGradient("ZeroOut") +def _zero_out_grad(op, grad): + """The gradients for `zero_out`. + + Args: + op: The `zero_out` `Operation` that we are differentiating, which we can use + to find the inputs and outputs of the original op. + grad: Gradient with respect to the output of the `zero_out` op. + + Returns: + Gradients with respect to the input of `zero_out`. + """ + to_zero = op.inputs[0] + shape = array_ops.shape(to_zero) + index = array_ops.zeros_like(shape) + first_grad = array_ops.reshape(grad, [-1])[0] + to_zero_grad = sparse_ops.sparse_to_dense([index], shape, first_grad, 0) + return [to_zero_grad] # List of one Tensor, since we have one input From c6f06c2f1b17508670601894378bb89b71fbe37e Mon Sep 17 00:00:00 2001 From: bzhaoopenstack Date: Tue, 23 Jun 2020 12:04:12 +0800 Subject: [PATCH 0046/2522] Add arm source file into aws-checksums According to https://github.com/tensorflow/tensorflow/issues/40463#issuecomment-647640030 , seem the aws libs need to add the arm related libs during build tensorflow package. --- third_party/aws/aws-checksums.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/aws/aws-checksums.bazel b/third_party/aws/aws-checksums.bazel index 759cb2e6fcf..f620a96d2c8 100644 --- a/third_party/aws/aws-checksums.bazel +++ b/third_party/aws/aws-checksums.bazel @@ -16,6 +16,7 @@ cc_library( "//conditions:default": [], }) + glob([ "source/intel/*.c", + "source/arm/*.c", "source/*.c", ]), hdrs = glob([ From f8943f369b079fbb833ebbfefcf66a5454c98a32 Mon Sep 17 00:00:00 2001 From: "902449@58880@bigcat_chen@ASIC" Date: Tue, 23 Jun 2020 14:06:45 +0800 Subject: [PATCH 0047/2522] remove unuse comment --- tensorflow/lite/micro/himax_we1_evb/debug_log.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tensorflow/lite/micro/himax_we1_evb/debug_log.cc b/tensorflow/lite/micro/himax_we1_evb/debug_log.cc index 36ac3f3fa03..0cd26874646 100644 --- a/tensorflow/lite/micro/himax_we1_evb/debug_log.cc +++ b/tensorflow/lite/micro/himax_we1_evb/debug_log.cc @@ -13,10 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// Implementation for the DebugLog() function that prints to the UART on the -// SparkFun Edge microcontroller. The same should work for other targets using -// the Ambiq Apollo 3. - #include "tensorflow/lite/micro/debug_log.h" #include "hx_drv_tflm.h" From 7eed107f52cde76a44a0e1f66896d259af1051bf Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 19 Jun 2020 22:55:27 +0000 Subject: [PATCH 0048/2522] Fix error caused by incorrect rank assumption in tf.image.flip_left_right This PR tries to address the issue raised in 40580 where an error was thrown out when tf.image.flip_left_right process a tensor of unknown rank. The reason was that tf.image.flip_left_right assumes rank == 3 in case of unknown rank. This PR adjust to use array_ops.rank(image) to obtain the true rank when unknown rank is present. This PR fixes 40580. Signed-off-by: Yong Tang --- tensorflow/python/ops/image_ops_impl.py | 49 ++++++++++++++++++++++--- tensorflow/python/ops/image_ops_test.py | 19 +++++++++- 2 files changed, 61 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index bbce25724e7..d4f3ed0d9c9 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -431,7 +431,8 @@ def _random_flip(image, flip_index, seed, scope_name): image = ops.convert_to_tensor(image, name='image') image = _AssertAtLeast3DImage(image) shape = image.get_shape() - if shape.ndims == 3 or shape.ndims is None: + + def f_rank3(): uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed) mirror_cond = math_ops.less(uniform_random, .5) result = control_flow_ops.cond( @@ -440,7 +441,8 @@ def _random_flip(image, flip_index, seed, scope_name): lambda: image, name=scope) return fix_image_flip_shape(image, result) - elif shape.ndims == 4: + + def f_rank4(): batch_size = array_ops.shape(image)[0] uniform_random = random_ops.random_uniform([batch_size], 0, @@ -451,6 +453,15 @@ def _random_flip(image, flip_index, seed, scope_name): flips = math_ops.cast(flips, image.dtype) flipped_input = array_ops.reverse(image, [flip_index + 1]) return flips * flipped_input + (1 - flips) * image + + if shape.ndims is None: + rank = array_ops.rank(image) + return control_flow_ops.cond( + math_ops.equal(rank, 3), f_rank3, f_rank4) + if shape.ndims == 3: + return f_rank3() + elif shape.ndims == 4: + return f_rank4() else: raise ValueError( '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape) @@ -549,10 +560,20 @@ def _flip(image, flip_index, scope_name): image = ops.convert_to_tensor(image, name='image') image = _AssertAtLeast3DImage(image) shape = image.get_shape() - if shape.ndims == 3 or shape.ndims is None: + + def f_rank3(): return fix_image_flip_shape(image, array_ops.reverse(image, [flip_index])) - elif shape.ndims == 4: + def f_rank4(): return array_ops.reverse(image, [flip_index + 1]) + + if shape.ndims is None: + rank = array_ops.rank(image) + return control_flow_ops.cond( + math_ops.equal(rank, 3), f_rank3, f_rank4) + elif shape.ndims == 3: + return f_rank3() + elif shape.ndims == 4: + return f_rank4() else: raise ValueError( '\'image\' (shape %s)must have either 3 or 4 dimensions.' % shape) @@ -599,7 +620,15 @@ def rot90(image, k=1, name=None): k = math_ops.mod(k, 4) shape = image.get_shape() - if shape.ndims == 3 or shape.ndims is None: + if shape.ndims is None: + rank = array_ops.rank(image) + def f_rank3(): + return _rot90_3D(image, k, scope) + def f_rank4(): + return _rot90_4D(image, k, scope) + return control_flow_ops.cond( + math_ops.equal(rank, 3), f_rank3, f_rank4) + elif shape.ndims == 3: return _rot90_3D(image, k, scope) elif shape.ndims == 4: return _rot90_4D(image, k, scope) @@ -722,7 +751,15 @@ def transpose(image, name=None): image = ops.convert_to_tensor(image, name='image') image = _AssertAtLeast3DImage(image) shape = image.get_shape() - if shape.ndims == 3 or shape.ndims is None: + if shape.ndims is None: + rank = array_ops.rank(image) + def f_rank3(): + return array_ops.transpose(image, [1, 0, 2], name=name) + def f_rank4(): + return array_ops.transpose(image, [0, 2, 1, 3], name=name) + return control_flow_ops.cond( + math_ops.equal(rank, 3), f_rank3, f_rank4) + elif shape.ndims == 3: return array_ops.transpose(image, [1, 0, 2], name=name) elif shape.ndims == 4: return array_ops.transpose(image, [0, 2, 1, 3], name=name) diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index a05209c2038..14c19193fd9 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -31,6 +31,7 @@ from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session from tensorflow.python.compat import compat +from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -1305,7 +1306,7 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): image_ops.transpose, image_ops.rot90 ]: transformed_unknown_rank = op(p_unknown_rank) - self.assertEqual(3, transformed_unknown_rank.get_shape().ndims) + self.assertEqual(None, transformed_unknown_rank.get_shape().ndims) transformed_unknown_dims_3 = op(p_unknown_dims_3) self.assertEqual(3, transformed_unknown_dims_3.get_shape().ndims) transformed_unknown_width = op(p_unknown_width) @@ -1364,6 +1365,22 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): y_np = np.rot90(image, k=k, axes=(1, 2)) self.assertAllEqual(y_np, y_tf.eval({k_placeholder: k})) + def testFlipImageUnknownShape(self): + image_input = constant_op.constant( + [[[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]]]]) + + expected_output = constant_op.constant( + [[[[3, 4, 5], [0, 1, 2]], [[9, 10, 11], [6, 7, 8]]]]) + + def generator(): yield image_input + + dataset = dataset_ops.Dataset.from_generator( + generator, output_types=dtypes.int32) + dataset = dataset.map(image_ops.flip_left_right) + + image_flipped_via_dataset_map = next(iter(dataset)) + self.assertAllEqual(image_flipped_via_dataset_map, expected_output) + class AdjustContrastTest(test_util.TensorFlowTestCase): def _testContrast(self, x_np, y_np, contrast_factor): From 5e261399de5d638844d2e0ec54ff005184bb20ad Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Tue, 9 Jun 2020 18:51:16 +0300 Subject: [PATCH 0049/2522] added sparse.map_values --- tensorflow/python/ops/sparse_ops.py | 94 +++++++++++++++++++ tensorflow/python/ops/sparse_ops_test.py | 41 ++++++++ .../api/golden/v1/tensorflow.sparse.pbtxt | 4 + .../api/golden/v2/tensorflow.sparse.pbtxt | 4 + 4 files changed, 143 insertions(+) diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index cee1dc23aa0..f8f7872448e 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -47,6 +47,7 @@ from tensorflow.python.ops.gen_sparse_ops import * from tensorflow.python.util import compat from tensorflow.python.util import deprecation from tensorflow.python.util import dispatch +from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect from tensorflow.python.util.compat import collections_abc from tensorflow.python.util.tf_export import get_canonical_name_for_symbol @@ -2733,6 +2734,99 @@ def sparse_transpose(sp_input, perm=None, name=None): return transposed_st +@tf_export("sparse.map_values") +@dispatch.add_dispatch_support +def map_values(op, *args, **kwargs): + """Applies `op` to the values of one or more `SparseTensor`s. + + Replaces any `SparseTensor` in `args` or `kwargs` with its `values` + tensor, and then calls `op`. Returns a `SparseTensor` that is constructed + from the input `SparseTensor`s' `indices` and the value returned by + the `op`. + + If the input arguments contain multiple `SparseTensor`s, then they must have + identical `indices`. + + Examples: + + >>> st = tf.sparse.from_dense([[1, 2, 0], [0, 4, 0], [1, 0, 0]]) + >>> tf.sparse.to_dense(tf.sparse.map_values(tf.ones_like, st)).numpy().to_list() + [[1, 1, 0], [0, 1, 0], [1, 0, 0]] + >>> tf.sparse.to_dense(tf.sparse.map_values(tf.multiply, st, st)).numpy().to_list() + [[1, 4, 0], [0, 16, 0], [1, 0, 0]] + >>> tf.sparse.to_dense(tf.sparse.map_values(tf.add, st, 5)).numpy().to_list() + [[5, 7, 0], [0, 9, 0], [6, 0, 0]] + + Note in particular that even though `tf.add(0, 5) != 0`, implicit zeros + will remain unchanged. However, if the sparse tensor contains any explict + zeros, these will be affected by the mapping! + + Args: + op: The operation that should be applied to the SparseTensor `values`. + `op` is typically an element-wise operation (such as math_ops.add), but + any operation that preserves the shape can be used. + *args: Arguments for `op`. + **kwargs: Keyword arguments for `op`. + + Returns: + A `SparseTensor` whose `indices` matches the `indices` of all + input `SparseTensor`s. + Raises: + ValueError: If args contains no `SparseTensor`, or if the `indices` + of the input `SparseTensor`s are not identical. + """ + sparse_list = [] + inner_args = _replace_sparse_with_values(args, sparse_list) + inner_kwargs = _replace_sparse_with_values(kwargs, sparse_list) + if not sparse_list: + raise ValueError("No SparseTensor in argument list of map_values") + + with ops.control_dependencies(_assert_sparse_compatible(sparse_list)): + # Delegate to op, and then compose the result from the transformed values + # and the known indices/dense shape. Since we ensure that indices and shape + # are identical, we can just use the firs tone. + return sparse_tensor.SparseTensor(sparse_list[0].indices, + op(*inner_args, **inner_kwargs), sparse_list[0].dense_shape) + + +def _assert_sparse_compatible(sparse_tensors): + """Check that all of `sparse_tensors` have same `indices` and `dense_shape` + + Returns: An op to be used as a control dependency. + """ + checks = [] + first = sparse_tensors[0] + for t in sparse_tensors[1:]: + checks.append(check_ops.assert_equal(first.dense_shape, t.dense_shape, + message="Mismatched shapes!")) + checks.append(check_ops.assert_equal(first.indices, t.indices, + message="Mismatched indices!")) + return checks + + +def _replace_sparse_with_values(value, sparse_list): + """Replace `SparseTensor`s with their values in `value` + + Each `SparseTensor` in `value` is replaced by its `values` tensor, and + collects all `SparseTensor`s in `sparse_list`. + + Args: + value: A structure of `Tensor`s and `SparseTensor`s + sparse_list: A list. Output parameter that collects all `SparseTensor`s + in `value`. + """ + flat_vals = nest.flatten(value, expand_composites=False) + new_vals = [] + for v in flat_vals: + if isinstance(v, sparse_tensor.SparseTensor): + sparse_list.append(v) + new_vals.append(v.values) + else: + new_vals.append(v) + return nest.pack_sequence_as(value, new_vals, expand_composites=False) + + + def _add_sparse_to_tensors_map(sp_input, container=None, shared_name=None, diff --git a/tensorflow/python/ops/sparse_ops_test.py b/tensorflow/python/ops/sparse_ops_test.py index d321f41a85a..92ed563e97f 100644 --- a/tensorflow/python/ops/sparse_ops_test.py +++ b/tensorflow/python/ops/sparse_ops_test.py @@ -23,6 +23,7 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util @@ -180,6 +181,46 @@ class SparseOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): array_ops.transpose(dense_of_sparse)) self.assertAllEqual(expected, result) + def testMapValues(self): + # supplying no sparse tensor should result in ValueError + with self.assertRaises(ValueError): + sparse_ops.map_values(math_ops.abs, 0.0) + + sp = sparse_ops.from_dense([[0.0, 1.0, 0.0], [-2.0, 1.0, 0.0]]) + + # helper function to check equality of sparse tensor + def assert_sparse_equal(expected, result): + self.assertAllEqual(expected.values, result.values, msg="Values differ") + self.assertAllEqual(expected.indices, result.indices, msg="Indices differ") + self.assertAllEqual(expected.dense_shape, result.dense_shape, msg="Shapes differ") + + # check for a single sparse argument + expected = sparse_ops.from_dense([[0.0, 1.0, 0.0], [2.0, 1.0, 0.0]]) + result = sparse_ops.map_values(math_ops.abs, sp) + assert_sparse_equal(expected, result) + + # check correct passing of keyword argument, and handling of two sparse + # arguments at the same time + def mapping(arg1, arg2, kwarg): + self.assertTrue(kwarg == "kwarg") + return arg1 + arg2 + result = sparse_ops.map_values(mapping, sp, sp, kwarg="kwarg") + expected = sparse_ops.from_dense([[0.0, 2.0, 0.0], [-4.0, 2.0, 0.0]]) + assert_sparse_equal(expected, result) + + # check that index mismatches are correctly detected even if the `value`s + # have compatible shape + sp_incomp = sparse_ops.from_dense([[0.0, 1.0, 0.0], [-2.0, 0.0, 1.0]]) + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + result = sparse_ops.map_values(mapping, sp, sp_incomp, kwarg="kwarg") + self.evaluate(result) + + # check that shape mismatches are correctly detected + sp_incomp = sparse_tensor.SparseTensor(sp.indices, sp.values, (25, 25)) + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + result = sparse_ops.map_values(mapping, sp, sp_incomp, kwarg="kwarg") + self.evaluate(result) + def testConstantStringToSparse(self): # Test case for GitHub issue 40633. tensor = constant_op.constant(list('ababa')) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.sparse.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.sparse.pbtxt index 9550418c2a6..ea94adae57c 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.sparse.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.sparse.pbtxt @@ -44,6 +44,10 @@ tf_module { name: "from_dense" argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "map_values" + argspec: "args=[\'op\'], varargs=args, keywords=kwargs, defaults=None" + } member_method { name: "mask" argspec: "args=[\'a\', \'mask_indices\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.sparse.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.sparse.pbtxt index 0028b7d8953..4bd63ab5243 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.sparse.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.sparse.pbtxt @@ -40,6 +40,10 @@ tf_module { name: "from_dense" argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "map_values" + argspec: "args=[\'op\'], varargs=args, keywords=kwargs, defaults=None" + } member_method { name: "mask" argspec: "args=[\'a\', \'mask_indices\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " From 8d631691bfaccc7220eeac0bdddfe438ac76b9d7 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Wed, 24 Jun 2020 00:58:50 +0300 Subject: [PATCH 0050/2522] v2 only --- tensorflow/python/ops/sparse_ops.py | 2 +- tensorflow/tools/api/golden/v1/tensorflow.sparse.pbtxt | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index f8f7872448e..f9b49430752 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -2734,7 +2734,7 @@ def sparse_transpose(sp_input, perm=None, name=None): return transposed_st -@tf_export("sparse.map_values") +@tf_export("sparse.map_values", v1=[]) @dispatch.add_dispatch_support def map_values(op, *args, **kwargs): """Applies `op` to the values of one or more `SparseTensor`s. diff --git a/tensorflow/tools/api/golden/v1/tensorflow.sparse.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.sparse.pbtxt index ea94adae57c..9550418c2a6 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.sparse.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.sparse.pbtxt @@ -44,10 +44,6 @@ tf_module { name: "from_dense" argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "map_values" - argspec: "args=[\'op\'], varargs=args, keywords=kwargs, defaults=None" - } member_method { name: "mask" argspec: "args=[\'a\', \'mask_indices\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " From f702f4969b421531a12f599caff9bc8b0b3bddf9 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Wed, 24 Jun 2020 01:03:43 +0300 Subject: [PATCH 0051/2522] updated docs to address review comments --- tensorflow/python/ops/sparse_ops.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index f9b49430752..a7ec64994cb 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -2737,15 +2737,16 @@ def sparse_transpose(sp_input, perm=None, name=None): @tf_export("sparse.map_values", v1=[]) @dispatch.add_dispatch_support def map_values(op, *args, **kwargs): - """Applies `op` to the values of one or more `SparseTensor`s. + """Applies `op` to the `.values` tensor of one or more `SparseTensor`s. Replaces any `SparseTensor` in `args` or `kwargs` with its `values` - tensor, and then calls `op`. Returns a `SparseTensor` that is constructed - from the input `SparseTensor`s' `indices` and the value returned by - the `op`. + tensor (which contains the non-default values for the SparseTensor), + and then calls `op`. Returns a `SparseTensor` that is constructed + from the input `SparseTensor`s' `indices`, `dense_shape`, and the + value returned by the `op`. If the input arguments contain multiple `SparseTensor`s, then they must have - identical `indices`. + equal `indices` and dense shapes. Examples: @@ -2769,11 +2770,11 @@ def map_values(op, *args, **kwargs): **kwargs: Keyword arguments for `op`. Returns: - A `SparseTensor` whose `indices` matches the `indices` of all - input `SparseTensor`s. + A `SparseTensor` whose `indices` and `dense_shape` matches the `indices` + and `dense_shape` of all input `SparseTensor`s. Raises: ValueError: If args contains no `SparseTensor`, or if the `indices` - of the input `SparseTensor`s are not identical. + or `dense_shape`s of the input `SparseTensor`s are not equal. """ sparse_list = [] inner_args = _replace_sparse_with_values(args, sparse_list) @@ -2784,13 +2785,13 @@ def map_values(op, *args, **kwargs): with ops.control_dependencies(_assert_sparse_compatible(sparse_list)): # Delegate to op, and then compose the result from the transformed values # and the known indices/dense shape. Since we ensure that indices and shape - # are identical, we can just use the firs tone. + # are identical, we can just use the first one. return sparse_tensor.SparseTensor(sparse_list[0].indices, op(*inner_args, **inner_kwargs), sparse_list[0].dense_shape) def _assert_sparse_compatible(sparse_tensors): - """Check that all of `sparse_tensors` have same `indices` and `dense_shape` + """Check that all of `sparse_tensors` have same `indices` and `dense_shape`. Returns: An op to be used as a control dependency. """ From 6acbd6b91236a9b914155a0473158f58fa8a39bd Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Wed, 24 Jun 2020 10:00:26 +0800 Subject: [PATCH 0052/2522] rename to reorder_data_discarding_ops --- .../core/grappler/optimizers/data/BUILD | 78 +++++++++---------- .../optimizers/data/meta_optimizer.cc | 2 +- ...card.cc => reorder_data_discarding_ops.cc} | 14 ++-- ...iscard.h => reorder_data_discarding_ops.h} | 14 ++-- ...cc => reorder_data_discarding_ops_test.cc} | 18 ++--- .../kernel_tests/optimization/BUILD | 36 ++++----- ...py => reorder_data_discarding_ops_test.py} | 8 +- .../experimental/ops/optimization_options.py | 20 ++--- ...a.experimental.-optimization-options.pbtxt | 8 +- ...a.experimental.-optimization-options.pbtxt | 8 +- 10 files changed, 103 insertions(+), 103 deletions(-) rename tensorflow/core/grappler/optimizers/data/{hoist_discard.cc => reorder_data_discarding_ops.cc} (87%) rename tensorflow/core/grappler/optimizers/data/{hoist_discard.h => reorder_data_discarding_ops.h} (76%) rename tensorflow/core/grappler/optimizers/data/{hoist_discard_test.cc => reorder_data_discarding_ops_test.cc} (83%) rename tensorflow/python/data/experimental/kernel_tests/optimization/{hoist_discard_test.py => reorder_data_discarding_ops_test.py} (90%) diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD index 2b98d679e0f..88cb11b83ce 100644 --- a/tensorflow/core/grappler/optimizers/data/BUILD +++ b/tensorflow/core/grappler/optimizers/data/BUILD @@ -16,7 +16,6 @@ cc_library( deps = [ ":filter_fusion", ":filter_with_random_uniform_fusion", - ":hoist_discard", ":hoist_random_uniform", ":inject_prefetch", ":latency_all_edges", @@ -29,6 +28,7 @@ cc_library( ":meta_optimizer", ":noop_elimination", ":parallel_batch", + ":reorder_data_discarding_ops", ":shuffle_and_repeat_fusion", ":slack", ], @@ -253,44 +253,6 @@ cc_library( ] + tf_protos_all(), ) -cc_library( - name = "hoist_discard", - srcs = ["hoist_discard.cc"], - hdrs = [ - "hoist_discard.h", - ], - deps = [ - ":function_utils", - ":graph_utils", - ":optimizer_base", - "@com_google_absl//absl/container:flat_hash_set", - "//tensorflow/core:lib", - "//tensorflow/core/grappler:mutable_graph_view", - "//tensorflow/core/grappler:grappler_item", - "//tensorflow/core/grappler:op_types", - "//tensorflow/core/grappler:utils", - "//tensorflow/core/grappler/clusters:cluster", - "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry", - "//tensorflow/core:lib_internal", - ] + tf_protos_all(), - alwayslink = 1, -) - -tf_cc_test( - name = "hoist_discard_test", - srcs = ["hoist_discard_test.cc"], - deps = [ - ":graph_test_utils", - ":graph_utils", - ":hoist_discard", - "//tensorflow/core:framework", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - "//tensorflow/core/grappler:grappler_item", - ] + tf_protos_all(), -) - cc_library( name = "hoist_random_uniform", srcs = ["hoist_random_uniform.cc"], @@ -730,6 +692,44 @@ tf_cc_test( ], ) +cc_library( + name = "reorder_data_discarding_ops", + srcs = ["reorder_data_discarding_ops.cc"], + hdrs = [ + "reorder_data_discarding_ops.h", + ], + deps = [ + ":function_utils", + ":graph_utils", + ":optimizer_base", + "@com_google_absl//absl/container:flat_hash_set", + "//tensorflow/core:lib", + "//tensorflow/core/grappler:mutable_graph_view", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:op_types", + "//tensorflow/core/grappler:utils", + "//tensorflow/core/grappler/clusters:cluster", + "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry", + "//tensorflow/core:lib_internal", + ] + tf_protos_all(), + alwayslink = 1, +) + +tf_cc_test( + name = "reorder_data_discarding_ops_test", + srcs = ["reorder_data_discarding_ops_test.cc"], + deps = [ + ":graph_test_utils", + ":graph_utils", + ":reorder_data_discarding_ops", + "//tensorflow/core:framework", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/grappler:grappler_item", + ] + tf_protos_all(), +) + cc_library( name = "shuffle_and_repeat_fusion", srcs = ["shuffle_and_repeat_fusion.cc"], diff --git a/tensorflow/core/grappler/optimizers/data/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/data/meta_optimizer.cc index f5f285c8c4d..bd7e18b807c 100644 --- a/tensorflow/core/grappler/optimizers/data/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/data/meta_optimizer.cc @@ -42,7 +42,6 @@ constexpr std::array kTFDataOptimizations = { "filter_fusion", "filter_with_random_uniform_fusion", "map_and_filter_fusion", - "hoist_discard", "hoist_random_uniform", "map_parallelization", "map_and_batch_fusion", @@ -50,6 +49,7 @@ constexpr std::array kTFDataOptimizations = { "latency_all_edges", "make_sloppy", "parallel_batch", + "reorder_data_discarding_ops", "slack", "inject_prefetch"}; diff --git a/tensorflow/core/grappler/optimizers/data/hoist_discard.cc b/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.cc similarity index 87% rename from tensorflow/core/grappler/optimizers/data/hoist_discard.cc rename to tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.cc index 4b04b3882c8..3a2bdcfc974 100644 --- a/tensorflow/core/grappler/optimizers/data/hoist_discard.cc +++ b/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/grappler/optimizers/data/hoist_discard.h" +#include "tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.h" #include "absl/container/flat_hash_set.h" #include "tensorflow/core/framework/attr_value.pb.h" @@ -32,7 +32,7 @@ namespace tensorflow { namespace grappler { namespace { -constexpr char kHoistDiscardOpPrefix[] = "hoist_discard/"; +constexpr char kReorderDataDiscardingOpsOpPrefix[] = "reorder_data_discarding_ops/"; constexpr std::array kDataDiscarding = { "ShardDataset", "SkipDataset", "TakeDataset", @@ -69,7 +69,7 @@ bool IsCardinalityPreserving(const NodeDef& node) { } // namepsace -Status HoistDiscard::OptimizeAndCollectStats(Cluster* cluster, +Status ReorderDataDiscardingOps::OptimizeAndCollectStats(Cluster* cluster, const GrapplerItem& item, GraphDef* output, OptimizationStats* stats) { @@ -95,9 +95,9 @@ Status HoistDiscard::OptimizeAndCollectStats(Cluster* cluster, NodeDef* parent = graph_utils::GetInputNode(*discard_node, graph); TF_RETURN_IF_ERROR( graph.UpdateFanouts(discard_node->name(), parent->name())); - if (!absl::StartsWith(discard_node->name(), kHoistDiscardOpPrefix)) { + if (!absl::StartsWith(discard_node->name(), kReorderDataDiscardingOpsOpPrefix)) { TF_RETURN_IF_ERROR(graph.UpdateNodeName(discard_node->name(), - strings::StrCat(kHoistDiscardOpPrefix, discard_node->name()), + strings::StrCat(kReorderDataDiscardingOpsOpPrefix, discard_node->name()), false)); } for (const auto& attr_name : {"output_types", "output_shapes"}) { @@ -113,13 +113,13 @@ Status HoistDiscard::OptimizeAndCollectStats(Cluster* cluster, return Status::OK(); } -void HoistDiscard::Feedback(Cluster* cluster, const GrapplerItem& item, +void ReorderDataDiscardingOps::Feedback(Cluster* cluster, const GrapplerItem& item, const GraphDef& optimize_output, double result) { // no-op } -REGISTER_GRAPH_OPTIMIZER_AS(HoistDiscard, "hoist_discard"); +REGISTER_GRAPH_OPTIMIZER_AS(ReorderDataDiscardingOps, "reorder_data_discarding_ops"); } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/hoist_discard.h b/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.h similarity index 76% rename from tensorflow/core/grappler/optimizers/data/hoist_discard.h rename to tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.h index e56bde1d804..03ac7301525 100644 --- a/tensorflow/core/grappler/optimizers/data/hoist_discard.h +++ b/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_HOIST_DISCARD_H_ -#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_HOIST_DISCARD_H_ +#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_REORDER_DATA_DISCARDING_OPS_H_ +#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_REORDER_DATA_DISCARDING_OPS_H_ #include "tensorflow/core/grappler/optimizers/data/optimizer_base.h" @@ -23,12 +23,12 @@ namespace grappler { // This optimization hoists the data discarding ops (such as `skip`, `take` and // `shard`) to avoid unnecessary computation. -class HoistDiscard : public TFDataOptimizerBase { +class ReorderDataDiscardingOps : public TFDataOptimizerBase { public: - HoistDiscard() = default; - ~HoistDiscard() override = default; + ReorderDataDiscardingOps() = default; + ~ReorderDataDiscardingOps() override = default; - string name() const override { return "hoist_discard"; }; + string name() const override { return "reorder_data_discarding_ops"; }; bool UsesFunctionLibrary() const override { return false; } @@ -48,4 +48,4 @@ class HoistDiscard : public TFDataOptimizerBase { } // namespace grappler } // namespace tensorflow -#endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_HOIST_DISCARD_H_ +#endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_REORDER_DATA_DISCARDING_OPS_H_ diff --git a/tensorflow/core/grappler/optimizers/data/hoist_discard_test.cc b/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops_test.cc similarity index 83% rename from tensorflow/core/grappler/optimizers/data/hoist_discard_test.cc rename to tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops_test.cc index d8b56df97b1..3f5a03c7528 100644 --- a/tensorflow/core/grappler/optimizers/data/hoist_discard_test.cc +++ b/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/grappler/optimizers/data/hoist_discard.h" +#include "tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.h" #include "tensorflow/core/framework/attr_value_util.h" #include "tensorflow/core/framework/function_testlib.h" @@ -28,7 +28,7 @@ namespace tensorflow { namespace grappler { namespace { -TEST(HoistDiscardTest, ExampleOps) { +TEST(ReorderDataDiscardingOpsTest, ExampleOps) { using test::function::NDef; GrapplerItem item; item.graph = test::function::GDef( @@ -70,23 +70,23 @@ TEST(HoistDiscardTest, ExampleOps) { test::function::XTimesTwo(), }); - HoistDiscard optimizer; + ReorderDataDiscardingOps optimizer; GraphDef output; TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); - EXPECT_TRUE(graph_utils::ContainsGraphNodeWithName("hoist_discard/take", output)); - EXPECT_TRUE(graph_utils::ContainsGraphNodeWithName("hoist_discard/skip", output)); - EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("hoist_discard/shard", output)); + EXPECT_TRUE(graph_utils::ContainsGraphNodeWithName("reorder_data_discarding_ops/take", output)); + EXPECT_TRUE(graph_utils::ContainsGraphNodeWithName("reorder_data_discarding_ops/skip", output)); + EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("reorder_data_discarding_ops/shard", output)); EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("take", output)); EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("skip", output)); EXPECT_TRUE(graph_utils::ContainsGraphNodeWithName("shard", output)); MutableGraphView graph(&output); - EXPECT_TRUE(graph_utils::GetInputNode(*graph.GetNode("hoist_discard/take"), + EXPECT_TRUE(graph_utils::GetInputNode(*graph.GetNode("reorder_data_discarding_ops/take"), graph)->name() == "range"); - EXPECT_TRUE(graph_utils::GetInputNode(*graph.GetNode("hoist_discard/skip"), - graph)->name() == "hoist_discard/take"); + EXPECT_TRUE(graph_utils::GetInputNode(*graph.GetNode("reorder_data_discarding_ops/skip"), + graph)->name() == "reorder_data_discarding_ops/take"); EXPECT_TRUE(graph_utils::GetInputNode(*graph.GetNode("map_and_batch"), graph)->name() == "cache"); } diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD index 3ef1b86fd6a..f3e871a903e 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD @@ -97,24 +97,6 @@ cuda_py_test( ], ) -tf_py_test( - name = "hoist_discard_test", - size = "small", - srcs = ["hoist_discard_test.py"], - tags = [ - "no_oss", - "no_pip", - "no_windows", - ], - deps = [ - "//tensorflow/python:client_testlib", - "//tensorflow/python:errors", - "//tensorflow/python/data/experimental/ops:testing", - "//tensorflow/python/data/kernel_tests:test_base", - "//tensorflow/python/data/ops:dataset_ops", - ], -) - tf_py_test( name = "hoist_random_uniform_test", size = "small", @@ -322,6 +304,24 @@ tf_py_test( ], ) +tf_py_test( + name = "reorder_data_discarding_ops_test", + size = "small", + srcs = ["reorder_data_discarding_ops_test.py"], + tags = [ + "no_oss", + "no_pip", + "no_windows", + ], + deps = [ + "//tensorflow/python:client_testlib", + "//tensorflow/python:errors", + "//tensorflow/python/data/experimental/ops:testing", + "//tensorflow/python/data/kernel_tests:test_base", + "//tensorflow/python/data/ops:dataset_ops", + ], +) + tf_py_test( name = "shuffle_and_repeat_fusion_test", srcs = ["shuffle_and_repeat_fusion_test.py"], diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_discard_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/reorder_data_discarding_ops_test.py similarity index 90% rename from tensorflow/python/data/experimental/kernel_tests/optimization/hoist_discard_test.py rename to tensorflow/python/data/experimental/kernel_tests/optimization/reorder_data_discarding_ops_test.py index 3a7e078b804..be0a180ef59 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_discard_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/reorder_data_discarding_ops_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for the `HoistDiscard` rewrite.""" +"""Tests for the `ReorderDataDiscardingOps` rewrite.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -26,7 +26,7 @@ from tensorflow.python.framework import combinations from tensorflow.python.platform import test -class HoistDiscardTest(test_base.DatasetTestBase, parameterized.TestCase): +class ReorderDataDiscardingOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(combinations.combine(tf_api_version=2, mode=["eager", "graph"])) @@ -44,7 +44,7 @@ class HoistDiscardTest(test_base.DatasetTestBase, parameterized.TestCase): dataset = dataset.shard(2, 0) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False - options.experimental_optimization.hoist_discard = True + options.experimental_optimization.reorder_data_discarding_ops = True dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, range(11, 61, 2)) @@ -65,7 +65,7 @@ class HoistDiscardTest(test_base.DatasetTestBase, parameterized.TestCase): dataset = dataset.shard(2, 0) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False - options.experimental_optimization.hoist_discard = True + options.experimental_optimization.reorder_data_discarding_ops = True dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, range(11, 61, 2)) diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py index 5ed5bf2ee7e..05a4dd504d7 100644 --- a/tensorflow/python/data/experimental/ops/optimization_options.py +++ b/tensorflow/python/data/experimental/ops/optimization_options.py @@ -126,15 +126,6 @@ class OptimizationOptions(options.OptionsBase): "Whether to fuse filter dataset that predicts random_uniform < rate into " "a sampling dataset. If None, defaults to False.") - hoist_discard = options.create_option( - name="hoist_discard", - ty=bool, - docstring= - "Whether to hoist ops that will discard data (such as skip, take, shard)" - "out of unary cardinality preserved transformations, e.g. " - "dataset.map(...).take(3) gets optimized to dataset.take(3).map()." - "If None, defaults to False.") - hoist_random_uniform = options.create_option( name="hoist_random_uniform", ty=bool, @@ -189,6 +180,15 @@ class OptimizationOptions(options.OptionsBase): docstring="Whether to parallelize copying of batch elements. If None, " "defaults to False.") + reorder_data_discarding_ops = options.create_option( + name="reorder_data_discarding_ops", + ty=bool, + docstring= + "Whether to hoist ops that will discard data (such as skip, take, shard)" + "out of unary cardinality preserved transformations, e.g. " + "dataset.map(...).take(3) gets optimized to dataset.take(3).map()." + "If None, defaults to False.") + shuffle_and_repeat_fusion = options.create_option( name="shuffle_and_repeat_fusion", ty=bool, @@ -227,7 +227,6 @@ class OptimizationOptions(options.OptionsBase): all_optimizations = [ "filter_fusion", "filter_with_random_uniform_fusion", - "hoist_discard", "hoist_random_uniform", "map_and_batch_fusion", "map_and_filter_fusion", @@ -235,6 +234,7 @@ class OptimizationOptions(options.OptionsBase): "map_fusion", "noop_elimination", "parallel_batch", + "reorder_data_discarding_ops", "shuffle_and_repeat_fusion", ] for optimization in all_optimizations: diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt index 2a59682df2e..e33265430ea 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt @@ -27,10 +27,6 @@ tf_class { name: "filter_with_random_uniform_fusion" mtype: "" } - member { - name: "hoist_discard" - mtype: "" - } member { name: "hoist_random_uniform" mtype: "" @@ -63,6 +59,10 @@ tf_class { name: "parallel_batch" mtype: "" } + member { + name: "reorder_data_discarding_ops" + mtype: "" + } member { name: "shuffle_and_repeat_fusion" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt index 2a59682df2e..e33265430ea 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt @@ -27,10 +27,6 @@ tf_class { name: "filter_with_random_uniform_fusion" mtype: "" } - member { - name: "hoist_discard" - mtype: "" - } member { name: "hoist_random_uniform" mtype: "" @@ -63,6 +59,10 @@ tf_class { name: "parallel_batch" mtype: "" } + member { + name: "reorder_data_discarding_ops" + mtype: "" + } member { name: "shuffle_and_repeat_fusion" mtype: "" From bbf639859f43feec75081b9e9c9c739f07f65feb Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Wed, 24 Jun 2020 10:17:15 +0800 Subject: [PATCH 0053/2522] add details to doc --- .../optimizers/data/reorder_data_discarding_ops.cc | 6 +++--- .../optimizers/data/reorder_data_discarding_ops.h | 5 +++-- .../optimization/reorder_data_discarding_ops_test.py | 4 ++-- .../data/experimental/ops/optimization_options.py | 11 ++++++++--- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.cc b/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.cc index 3a2bdcfc974..e7de3759594 100644 --- a/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.cc +++ b/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.cc @@ -32,7 +32,7 @@ namespace tensorflow { namespace grappler { namespace { -constexpr char kReorderDataDiscardingOpsOpPrefix[] = "reorder_data_discarding_ops/"; +constexpr char kReorderDataDiscardingOpPrefix[] = "reorder_data_discarding_ops/"; constexpr std::array kDataDiscarding = { "ShardDataset", "SkipDataset", "TakeDataset", @@ -95,9 +95,9 @@ Status ReorderDataDiscardingOps::OptimizeAndCollectStats(Cluster* cluster, NodeDef* parent = graph_utils::GetInputNode(*discard_node, graph); TF_RETURN_IF_ERROR( graph.UpdateFanouts(discard_node->name(), parent->name())); - if (!absl::StartsWith(discard_node->name(), kReorderDataDiscardingOpsOpPrefix)) { + if (!absl::StartsWith(discard_node->name(), kReorderDataDiscardingOpPrefix)) { TF_RETURN_IF_ERROR(graph.UpdateNodeName(discard_node->name(), - strings::StrCat(kReorderDataDiscardingOpsOpPrefix, discard_node->name()), + strings::StrCat(kReorderDataDiscardingOpPrefix, discard_node->name()), false)); } for (const auto& attr_name : {"output_types", "output_shapes"}) { diff --git a/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.h b/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.h index 03ac7301525..6079d1de7f5 100644 --- a/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.h +++ b/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.h @@ -21,8 +21,9 @@ limitations under the License. namespace tensorflow { namespace grappler { -// This optimization hoists the data discarding ops (such as `skip`, `take` and -// `shard`) to avoid unnecessary computation. +// This optimization reorders the data discarding ops (such as `skip`, `take` +// and `shard`) to avoid unnecessary computation, +// e.g. reordering ds.map(...).cache().take(5) to ds.take(5).map(...).cache(). class ReorderDataDiscardingOps : public TFDataOptimizerBase { public: ReorderDataDiscardingOps() = default; diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/reorder_data_discarding_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/reorder_data_discarding_ops_test.py index be0a180ef59..0e7d0fafbb5 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimization/reorder_data_discarding_ops_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/reorder_data_discarding_ops_test.py @@ -30,7 +30,7 @@ class ReorderDataDiscardingOpsTest(test_base.DatasetTestBase, parameterized.Test @combinations.generate(combinations.combine(tf_api_version=2, mode=["eager", "graph"])) - def testSimpleHoistingV2(self): + def testSimpleReorderingV2(self): dataset = dataset_ops.Dataset.range(100) dataset = dataset.apply( testing.assert_next(["FiniteSkip", "FiniteTake", "Shard", @@ -50,7 +50,7 @@ class ReorderDataDiscardingOpsTest(test_base.DatasetTestBase, parameterized.Test @combinations.generate(combinations.combine(tf_api_version=1, mode=["eager", "graph"])) - def testSimpleHoistingV1(self): + def testSimpleReorderingV1(self): dataset = dataset_ops.Dataset.range(100) # Map ops have preserve_cardinality=false in tensorflow v1. dataset = dataset.apply( diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py index 05a4dd504d7..12377f49930 100644 --- a/tensorflow/python/data/experimental/ops/optimization_options.py +++ b/tensorflow/python/data/experimental/ops/optimization_options.py @@ -184,9 +184,14 @@ class OptimizationOptions(options.OptionsBase): name="reorder_data_discarding_ops", ty=bool, docstring= - "Whether to hoist ops that will discard data (such as skip, take, shard)" - "out of unary cardinality preserved transformations, e.g. " - "dataset.map(...).take(3) gets optimized to dataset.take(3).map()." + "Whether to reorder ops that will discard data to the front of unary" + "cardinality preserved transformations, e.g. dataset.map(...).take(3)" + "will be optimized to dataset.take(3).map(...). For now this" + "optimization will move `skip`, `shard` and `take` to the front of" + "`cache`, `map` and `prefetch`. And notice this optimization is only" + "for performance, it will not affect the output of the dataset." + "However, it will influence the cache to the file, for the unused" + "data will no longer be saved after this optimization." "If None, defaults to False.") shuffle_and_repeat_fusion = options.create_option( From fda56a4c619d093108cd2778e74073d65fb0e407 Mon Sep 17 00:00:00 2001 From: Balint Cristian Date: Wed, 24 Jun 2020 12:10:23 +0300 Subject: [PATCH 0054/2522] [EXT-SYSLIB] Add absl_py logging submodule to build flow. --- tensorflow/opensource_only.files | 1 + tensorflow/workspace.bzl | 1 + third_party/systemlibs/absl_py.absl.logging.BUILD | 11 +++++++++++ 3 files changed, 13 insertions(+) create mode 100644 third_party/systemlibs/absl_py.absl.logging.BUILD diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files index 3d57e5f2089..cddf018bb21 100644 --- a/tensorflow/opensource_only.files +++ b/tensorflow/opensource_only.files @@ -170,6 +170,7 @@ tensorflow/third_party/systemlibs/BUILD.tpl tensorflow/third_party/systemlibs/absl_py.BUILD tensorflow/third_party/systemlibs/absl_py.absl.flags.BUILD tensorflow/third_party/systemlibs/absl_py.absl.testing.BUILD +tensorflow/third_party/systemlibs/absl_py.absl.logging.BUILD tensorflow/third_party/systemlibs/astor.BUILD tensorflow/third_party/systemlibs/boringssl.BUILD tensorflow/third_party/systemlibs/build_defs.bzl.tpl diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index f2d0c028c5f..d142910619c 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -535,6 +535,7 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): "//third_party/systemlibs:absl_py.absl.BUILD": "absl/BUILD", "//third_party/systemlibs:absl_py.absl.flags.BUILD": "absl/flags/BUILD", "//third_party/systemlibs:absl_py.absl.testing.BUILD": "absl/testing/BUILD", + "//third_party/systemlibs:absl_py.absl.logging.BUILD": "absl/logging/BUILD", }, urls = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/abseil/abseil-py/archive/pypi-v0.9.0.tar.gz", diff --git a/third_party/systemlibs/absl_py.absl.logging.BUILD b/third_party/systemlibs/absl_py.absl.logging.BUILD new file mode 100644 index 00000000000..71cfc7a247c --- /dev/null +++ b/third_party/systemlibs/absl_py.absl.logging.BUILD @@ -0,0 +1,11 @@ +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:public"]) + +filegroup( + name = "LICENSE", +) + +py_library( + name = "logging", +) From b619d2cfdcb2a1875333243bb7b8bb49ec95ad10 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Wed, 24 Jun 2020 14:51:28 +0000 Subject: [PATCH 0055/2522] cc build file --- tensorflow/cc/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD index e1fad8e697a..8602bfafff8 100644 --- a/tensorflow/cc/BUILD +++ b/tensorflow/cc/BUILD @@ -558,6 +558,7 @@ tf_gen_op_wrappers_cc( "io_ops", "linalg_ops", "list_ops", + "map_ops", "logging_ops", "lookup_ops", "manip_ops", From fc15ab8a358a2b14d683671fbb1de403ed77c6b8 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Wed, 24 Jun 2020 16:58:44 +0000 Subject: [PATCH 0056/2522] initial TensorMap class --- tensorflow/core/kernels/tensor_map.cc | 146 +++++++++++++++++++++++ tensorflow/core/kernels/tensor_map.h | 159 ++++++++++++++++++++++++++ 2 files changed, 305 insertions(+) create mode 100644 tensorflow/core/kernels/tensor_map.cc create mode 100644 tensorflow/core/kernels/tensor_map.h diff --git a/tensorflow/core/kernels/tensor_map.cc b/tensorflow/core/kernels/tensor_map.cc new file mode 100644 index 00000000000..a0b2c7a9f7a --- /dev/null +++ b/tensorflow/core/kernels/tensor_map.cc @@ -0,0 +1,146 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/kernels/tensor_map.h" + +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/framework/variant_op_registry.h" +#include "tensorflow/core/lib/core/coding.h" + +namespace tensorflow { + +TensorMap::~TensorMap() { + if (tensors_) tensors_->Unref(); +} + +void TensorMap::Encode(VariantTensorData* data) const { + data->set_type_name(TypeName()); + + std::map::iterator map_it = tensors().begin(); + size_t i = 0; + std::vector invalid_indices; + while (map_it != tensors().end()) { + Tensor k = map_it->first; + Tensor v = map_it->second; + // k should also not be DT_RESOURCE or DT_VARIANT + if(k.dtype != DT_INVALID && v.dtype != DT_INVALID) { + *data->add_tensors() = k; + *data->add_tensors() = v; + // not sure if this is the correct order + } + else { + invalid_indices.push_back(i); + } + } + /* + for (size_t i = 0; i < tensors().size(); i++) { + if (tensors().at(i).dtype() != DT_INVALID) { + *data->add_tensors() = tensors().at(i); + } else { + invalid_indices.push_back(i); + } + }*/ + string metadata; + // TODO(b/118838800): Add a proto for storing the metadata. + // Metadata format: + // + core::PutVarint64(&metadata, static_cast(invalid_indices.size())); + for (size_t i : invalid_indices) { + core::PutVarint64(&metadata, static_cast(i)); + } + core::PutVarint64(&metadata, static_cast(element_dtype)); + core::PutVarint64(&metadata, static_cast(max_num_elements)); + TensorShapeProto element_shape_proto; + element_shape.AsProto(&element_shape_proto); + element_shape_proto.AppendToString(&metadata); + data->set_metadata(metadata); +} + +static Status TensorMapDeviceCopy( + const TensorMap& from, TensorMap* to, + const UnaryVariantOpRegistry::AsyncTensorDeviceCopyFn& copy) { + to->element_shape = from.element_shape; + to->element_dtype = from.element_dtype; + to->max_num_elements = from.max_num_elements; + //to->tensors().reserve(from.tensors().size()); + for (const std::pair& p : from.tensors()) { + to->tensors().emplace(p); //why was it emplace t.dtype? + if (t.dtype() != DT_INVALID) { + //TF_RETURN_IF_ERROR(copy(p, &to->tensors().back())); + } + } + return Status::OK(); +} + +#define REGISTER_LIST_COPY(DIRECTION) \ + INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION(TensorMap, DIRECTION, \ + TensorMapDeviceCopy) + +REGISTER_LIST_COPY(VariantDeviceCopyDirection::HOST_TO_DEVICE); +REGISTER_LIST_COPY(VariantDeviceCopyDirection::DEVICE_TO_HOST); +REGISTER_LIST_COPY(VariantDeviceCopyDirection::DEVICE_TO_DEVICE); + +REGISTER_UNARY_VARIANT_DECODE_FUNCTION(TensorMap, TensorMap::kTypeName); + +bool TensorMap::Decode(const VariantTensorData& data) { + // TODO(srbs): Change the signature to Decode(VariantTensorData data) so + // that we do not have to copy each tensor individually below. This would + // require changing VariantTensorData::tensors() as well. + string metadata; + data.get_metadata(&metadata); + uint64 scratch; + StringPiece iter(metadata); + std::vector invalid_indices; + core::GetVarint64(&iter, &scratch); + size_t num_invalid_tensors = static_cast(scratch); + invalid_indices.resize(num_invalid_tensors); + for (size_t i = 0; i < num_invalid_tensors; i++) { + core::GetVarint64(&iter, &scratch); + invalid_indices[i] = static_cast(scratch); + } + + size_t total_num_tensors = data.tensors().size()/2 + num_invalid_tensors; + //tensors().reserve(total_num_tensors); + std::vector::iterator invalid_indices_it = invalid_indices.begin(); + std::vector::const_iterator tensors_it = data.tensors().begin(); + for (size_t i = 0; i < total_num_tensors; i++) { + if (invalid_indices_it != invalid_indices.end() && + *invalid_indices_it == i) { + //no need to do invalid indices for a map + //tensors().emplace(Tensor(DT_INVALID),Tensor(DT_INVALID)); + invalid_indices_it++; + } else if (tensors_it != data.tensors().end()) { + // should assert that tensors_it + 1 is also not the end + tensors().emplace(*tensors_it,*++tensors_it); + tensors_it++; + } else { + // VariantTensorData is corrupted. + return false; + } + } + + core::GetVarint64(&iter, &scratch); + element_dtype = static_cast(scratch); + core::GetVarint64(&iter, &scratch); + max_num_elements = static_cast(scratch); + TensorShapeProto element_shape_proto; + element_shape_proto.ParseFromString(string(iter.data(), iter.size())); + element_shape = PartialTensorShape(element_shape_proto); + return true; +} + +const char TensorMap::kTypeName[] = "tensorflow::TensorMap"; + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/tensor_map.h b/tensorflow/core/kernels/tensor_map.h new file mode 100644 index 00000000000..d8726bbecb4 --- /dev/null +++ b/tensorflow/core/kernels/tensor_map.h @@ -0,0 +1,159 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_KERNELS_TENSOR_LIST_H_ +#define TENSORFLOW_CORE_KERNELS_TENSOR_LIST_H_ + +#include + +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/variant.h" +#include "tensorflow/core/framework/variant_tensor_data.h" +#include "tensorflow/core/lib/core/refcount.h" + +namespace tensorflow { + +// Variant compatible type for a map of tensors. This is mutable but instances +// should never be mutated after stored in a variant tensor. +// +// **NOTE**: TensorMap stores a refcounted container of tf::Tensor objects, +// which are accessible via TensorMap::tensors(). Because it is refcounted, +// straight copies of the form: +// +// TensorMap b = a; +// b.tensors().insert(k,v); // WARNING: This modifies a.tensors(). +// +// Do not create a true copy of the underlying container - but instead increment +// a reference count. Modifying b.tensors() modifies a.tensors(). In this way, +// TensorList should be considered similar to the tf::Tensor object. +// +// In order to get a copy of the underlying list, use the Copy method: +// +// TensorList b = a.Copy(); +// b.tensors().push_back(t); // This does not modify a.tensors(). +// +// Note that this is not a deep copy: the memory locations of the underlying +// tensors will still point to the same locations of the corresponding tensors +// in the original. To truly perform a deep copy, Device and Type-specific +// code needs to be applied to the underlying tensors as usual. +// +// The most important implication of RefCounted TLs is that OpKernels +// wishing to reuse TensorList inputs as outputs via context->forward_input() +// need to perform an additional check on the refcount of the TensorList, +// to ensure aliasing can be performed safely. For example: +// +// bool can_alias = false; +// auto fw = c->forward_input(..., DT_VARIANT, {}, ...); +// if (fw && fw->dtype() == DT_VARIANT && fw->NumElements() == 1) { +// auto* tl = fw->scalar()().get(); +// if (tl && tl->RefCountIsOne()) { +// can_alias = true; +// } +// } +// +class TensorMap { + public: + TensorMap() : tensors_(new Tensors) {} + ~TensorMap(); + + TensorMap(const TensorMap& other) + : element_shape(other.element_shape), + element_dtype(other.element_dtype), + max_num_elements(other.max_num_elements), + tensors_(other.tensors_) { + tensors_->Ref(); + } + + TensorMap(TensorMap&& rhs) + : element_shape(std::move(rhs.element_shape)), + element_dtype(rhs.element_dtype), + max_num_elements(rhs.max_num_elements), + tensors_(rhs.tensors_) { + rhs.tensors_ = nullptr; + } + + TensorMap& operator=(const TensorMap& rhs) { + if (this == &rhs) return *this; + element_shape = rhs.element_shape; + element_dtype = rhs.element_dtype; + max_num_elements = rhs.max_num_elements; + tensors_->Unref(); + tensors_ = rhs.tensors_; + tensors_->Ref(); + return *this; + } + + TensorMap& operator=(TensorMap&& rhs) { + if (this == &rhs) return *this; + element_shape = rhs.element_shape; + element_dtype = rhs.element_dtype; + max_num_elements = rhs.max_num_elements; + std::swap(tensors_, rhs.tensors_); + return *this; + } + + static const char kTypeName[]; + + string TypeName() const { return kTypeName; } + + void Encode(VariantTensorData* data) const; + + bool Decode(const VariantTensorData& data); + + // TODO(apassos) fill this out + string DebugString() const { return "TensorMap"; } + + PartialTensorShape element_shape; + + DataType element_dtype; + + // The maximum allowed size of `tensors`. Defaults to -1 meaning that the size + // of `tensors` is unbounded. + int max_num_elements = -1; + + // Access to the underlying tensor container. + std::map& tensors() { return tensors_->values_; } + const std::map& tensors() const { return tensors_->values_; } + + // Get a new TensorList containing a copy of the underlying tensor container. + TensorMap Copy() const { + TensorMap out; + out.element_shape = element_shape; + out.element_dtype = element_dtype; + out.max_num_elements = max_num_elements; + // This performs a copy of the std::map. + out.tensors_->values_ = tensors_->values_; + return out; + } + + // Is this TensorMap the only one with a reference to the underlying + // container? + bool RefCountIsOne() const { return tensors_->RefCountIsOne(); } + + private: + class Tensors : public core::RefCounted { + public: + std::map values_; + }; + Tensors* tensors_; +}; + +#if defined(PLATFORM_GOOGLE) +// TODO(ebrevdo): Identify why Variant inline size is smaller on mobile devices. +static_assert(Variant::CanInlineType(), + "Must be able to inline TensorMap into a Variant"); +#endif +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_KERNELS_TENSOR_LIST_H_ From 26612305bb8c8af70f8845d2d3bb65e96bf3dbe8 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Wed, 24 Jun 2020 17:01:49 +0000 Subject: [PATCH 0057/2522] build and test updates --- tensorflow/core/BUILD | 1 + tensorflow/core/kernels/BUILD | 39 +++++++++++++++++++ tensorflow/core/kernels/map_kernels.cc | 36 ++++++++++++++++- tensorflow/core/ops/map_ops.cc | 20 ++++++++++ .../python/kernel_tests/map_ops_test.py | 21 +++++++++- tensorflow/python/ops/map_ops.py | 16 +++++++- 6 files changed, 129 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 66e7061642c..d6e44bb36aa 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1120,6 +1120,7 @@ cc_library( # these also dynamically loading. "//tensorflow/core/kernels:dataset_ops", # Depends on grappler "//tensorflow/core/kernels:list_kernels", # Depends on variant_op_registry.h + "//tensorflow/core/kernels:map_kernels", ], ) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index fd6a8ab1cf6..5139dd95e5d 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2937,6 +2937,45 @@ tf_kernel_library( "//third_party/eigen3", ], ) +cc_library( + name = "tensor_map", + srcs = ["tensor_map.cc"], + hdrs = ["tensor_map.h"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/framework:tensor_shape_proto_cc", + "//tensorflow/core/lib/core:refcount", + ], +) + +tf_kernel_library( + name = "map_kernels", + srcs = ["map_kernels.cc"], + deps = [ + ":concat_lib", + ":fill_functor", + ":tensor_map", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//third_party/eigen3", + ], +) + +tf_cc_tests( + name = "tensor_map_test", + size = "small", + srcs = [ + "tensor_map_test.cc" + ], + deps = [ + ":tensor_map", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "@com_google_absl//absl/strings", + ], +) tf_kernel_library( name = "fact_op", diff --git a/tensorflow/core/kernels/map_kernels.cc b/tensorflow/core/kernels/map_kernels.cc index bd6d880aec5..17793cdc0aa 100644 --- a/tensorflow/core/kernels/map_kernels.cc +++ b/tensorflow/core/kernels/map_kernels.cc @@ -14,11 +14,45 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/kernels/tensor_map.h" #include using namespace std; namespace tensorflow { +/*class EmptyTensorMap : public OpKernel { + public: + explicit EmptyTensorMap(OpKernelConstruction* ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("element_dtype", &element_dtype_)); + } + + void Compute(OpKernelContext* ctx) override { + const Tensor& max_num_elements_t = ctx->input(1); + OP_REQUIRES( + ctx, TensorShapeUtils::IsScalar(max_num_elements_t.shape()), + errors::InvalidArgument( + "max_num_elements expected to be a scalar ", + "but got shape: ", max_num_elements_t.shape().DebugString())); + Tensor* result; + AllocatorAttributes attr; + attr.set_on_host(true); + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape{}, &result, attr)); + TensorMap empty; + empty.element_dtype = element_dtype_; + empty.max_num_elements = max_num_elements_t.scalar()(); + PartialTensorShape element_shape; + OP_REQUIRES_OK(ctx, TensorShapeFromTensor(ctx->input(0), &element_shape)); + empty.element_shape = element_shape; + result->scalar()() = std::move(empty); + } + + private: + DataType element_dtype_; +}; + +REGISTER_KERNEL_BUILDER(Name("EmptyTensorMap").Device(DEVICE_CPU), + EmptyTensorMap);*/ + class ZeroOutOp : public OpKernel { public: explicit ZeroOutOp(OpKernelConstruction* c) : OpKernel(c) {} @@ -31,7 +65,7 @@ class ZeroOutOp : public OpKernel { // Create an output tensor Tensor* output_tensor = NULL; - OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(), + OP_REQUIRES_OK(c, c->allocate_output(0, input_tensor.shape(), &output_tensor)); auto output_flat = output_tensor->flat(); diff --git a/tensorflow/core/ops/map_ops.cc b/tensorflow/core/ops/map_ops.cc index bdad6d389b0..59c20c6d75f 100644 --- a/tensorflow/core/ops/map_ops.cc +++ b/tensorflow/core/ops/map_ops.cc @@ -20,6 +20,26 @@ limitations under the License. namespace tensorflow { namespace { + +REGISTER_OP("EmptyTensorMap") + .Input("element_shape: shape_type") + .Input("max_num_elements: int32") + .Output("handle: variant") + .Attr("element_dtype: type") + .Attr("shape_type: {int32, int64}") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->Scalar()); + DataType element_dtype; + TF_RETURN_IF_ERROR(c->GetAttr("element_dtype", &element_dtype)); + shape_inference::ShapeHandle element_shape; + TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensorTreatScalarAsUnknownShape( + 0, &element_shape)); + c->set_output_handle_shapes_and_types( + 0, std::vector{ + {element_shape, element_dtype}}); + return Status::OK(); + }); + REGISTER_OP("ZeroOut") .Input("to_zero: int32") .Output("zeroed: int32") diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index 28cadf0a6df..7b9654886f3 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -20,19 +20,36 @@ from __future__ import print_function import numpy as np from tensorflow.python.platform import test +from absl.testing import parameterized +from tensorflow.python.framework import test_util + #try: # from tensorflow_zero_out.python.ops.zero_out_ops import zero_out #except ImportError: # from zero_out_ops import zero_out from tensorflow.python.ops import map_ops -class ZeroOutTest(test.TestCase): +class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): + """ + @parameterized.named_parameters(("NoMaxNumElements", None), + ("WithMaxNumElements", 2)) + @test_util.run_deprecated_v1 + def testEraseFromEmptyTensorMapFails(self, max_num_elements): + m = map_ops.empty_tensor_map( + element_dtype=dtypes.float32, + element_shape=[], + max_num_elements=max_num_elements) + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "Trying to erase from an empty map"): + m = map_ops.tensor_map_erase(l, element_dtype=dtypes.float32) + self.evaluate(l) + """ def testZeroOut(self): print("Hello World - Test") with self.test_session(): self.assertAllClose( - zero_out([[1, 2], [3, 4]]), np.array([[1, 0], [0, 0]])) + map_ops.zero_out([[1, 2], [3, 4]]), np.array([[1, 0], [0, 0]])) if __name__ == '__main__': diff --git a/tensorflow/python/ops/map_ops.py b/tensorflow/python/ops/map_ops.py index 93f4cc0bdc8..4abd7f3f998 100644 --- a/tensorflow/python/ops/map_ops.py +++ b/tensorflow/python/ops/map_ops.py @@ -22,6 +22,7 @@ from tensorflow.python.framework import load_library from tensorflow.python.platform import resource_loader # go/tf-wildcard-import # pylint: disable=wildcard-import +from tensorflow.python.framework import ops from tensorflow.python.ops import gen_map_ops from tensorflow.python.ops.gen_map_ops import * @@ -29,8 +30,21 @@ from tensorflow.python.ops.gen_map_ops import * # resource_loader.get_path_to_datafile('_zero_out_ops.so')) #zero_out = zero_out_ops.zero_out +def empty_tensor_map(element_shape, + element_dtype, + max_num_elements=None, + name=None): + if max_num_elements is None: + max_num_elements = -1 + + return gen_map_ops.empty_tensor_map( + element_shape=_build_element_shape(element_shape), + element_dtype=element_dtype, + max_num_elements=max_num_elements, + name=name) + def zero_out(to_zero): - print("Hello World - PythonS Op") + print("Hello World - Python Op") return gen_map_ops.zero_out(to_zero) @ops.RegisterGradient("ZeroOut") From 6dac692f13b42f29f8e9a03ee250453b56911e57 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Tue, 23 Jun 2020 18:32:55 +0000 Subject: [PATCH 0058/2522] Convert from next(iter(dataset)) to get_single_element(dataset.take(1)) to avoid graph mode failure Also specify the full shape to try to fix internal failure Signed-off-by: Yong Tang --- tensorflow/python/ops/image_ops_test.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index 14c19193fd9..a254ce0d8d3 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -31,11 +31,13 @@ from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session from tensorflow.python.compat import compat +from tensorflow.python.data.experimental.ops import get_single_element from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops @@ -1375,10 +1377,13 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): def generator(): yield image_input dataset = dataset_ops.Dataset.from_generator( - generator, output_types=dtypes.int32) + generator, + output_types=dtypes.int32, + output_shapes=tensor_shape.TensorShape([1, 2, 2, 3])) dataset = dataset.map(image_ops.flip_left_right) - image_flipped_via_dataset_map = next(iter(dataset)) + image_flipped_via_dataset_map = get_single_element.get_single_element( + dataset.take(1)) self.assertAllEqual(image_flipped_via_dataset_map, expected_output) class AdjustContrastTest(test_util.TensorFlowTestCase): From 477470d094b2e96eec8aecae91f9af699946ecb1 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Wed, 24 Jun 2020 18:35:02 +0000 Subject: [PATCH 0059/2522] finished test file --- tensorflow/c/kernels.cc | 27 ++++++ tensorflow/c/kernels.h | 4 + tensorflow/c/kernels/BUILD | 39 ++++++++ tensorflow/c/kernels/summary_op.cc | 56 ++++++------ tensorflow/c/kernels/summary_op_test.cc | 113 ++++++++++++++++++++---- tensorflow/c/tf_tensor.cc | 99 ++++----------------- tensorflow/c/tf_tensor.h | 5 ++ tensorflow/c/tf_tensor_internal.h | 3 + 8 files changed, 220 insertions(+), 126 deletions(-) diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc index a0ed0d9f245..e1ece820ab7 100644 --- a/tensorflow/c/kernels.cc +++ b/tensorflow/c/kernels.cc @@ -26,6 +26,9 @@ limitations under the License. #include "tensorflow/core/framework/types.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/lib/gtl/array_slice.h" + // This file forms the basis of a stable ABI for third-party kernel // implementations. It is crucial that changes to this file are made cautiously // and with a focus on maintaining both source and binary compatibility. @@ -260,3 +263,27 @@ TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int index, } return result; } + + +TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, + int64_t* dims, int num_dims, TF_Status* Status, TF_Tensor* tf_tensor_temp){ + auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelContext*>(context); + // convert inputs to compatible types for API call + // tensorflow::DataType enum_of_dtype = tensorflow::EnumToDataType::v(); + // temp_tensor = Tensor(dtype, shape); + // tensorflow::TensorShape s(dimensions); + tensorflow::TensorShape shape; + for(int i = 0; i < num_dims; ++i){ + shape.AddDim(dims[i]); + } + tensorflow::Status allocation_status; + tensorflow::Tensor tensor_temp; + TF_TensorToTensor(tf_tensor_temp, &tensor_temp); + allocation_status = cc_ctx->allocate_temp(static_cast(dtype), shape, &tensor_temp); + tf_tensor_temp = TF_TensorFromTensor(tensor_temp, &allocation_status); + + + + + // Status allocation_status = cc_ctx->allocate_temp() +} diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h index 084717c1d9e..9fcfdbeddc2 100644 --- a/tensorflow/c/kernels.h +++ b/tensorflow/c/kernels.h @@ -190,6 +190,10 @@ TF_CAPI_EXPORT TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int64_t* dims, int num_dims, size_t len, TF_Status* status); +TF_CAPI_EXPORT extern TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, + int64_t* dims, int num_dims, TF_Status* Status); + + #ifdef __cplusplus } /* end extern "C" */ #endif diff --git a/tensorflow/c/kernels/BUILD b/tensorflow/c/kernels/BUILD index 770352c62c1..3ce53309841 100644 --- a/tensorflow/c/kernels/BUILD +++ b/tensorflow/c/kernels/BUILD @@ -24,6 +24,21 @@ tf_kernel_library( ], ) +tf_kernel_library( + name = "summary_op", + prefix = "summary_op", + deps = [ + "//tensorflow/c:kernels", + "//tensorflow/c:ops", + "//tensorflow/c:tf_datatype", + "//tensorflow/c:tf_status", + "//tensorflow/c:tf_tensor", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ], +) + + tf_gen_op_libs( op_lib_names = ["bitcast"], deps = [ @@ -35,6 +50,17 @@ tf_gen_op_libs( ], ) +tf_gen_op_libs( + op_lib_names = ["summary"], + deps = [ + "//tensorflow/c:ops", + "//tensorflow/c:tf_datatype", + "//tensorflow/c:tf_status", + "//tensorflow/c:tf_tensor", + "//tensorflow/core:lib", + ], +) + tf_cc_test( name = "bitcast_op_test", srcs = ["bitcast_op_test.cc"], @@ -48,6 +74,19 @@ tf_cc_test( ], ) +tf_cc_test( + name = "summary_op_test", + srcs = ["summary_op_test.cc"], + deps = [ + ":summary_op", + ":summary_op_lib", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) # Changes to the Android srcs here should be replicated in # tensorflow/contrib/makefile/tf_op_files.txt. # diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index 6921eb4fdaa..002de6fb6e8 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -29,13 +29,14 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/types.h" +#include // TODO: Copy over Summary Scalar Op Doc static void* SummaryScalarOp_Create(TF_OpKernelConstruction* ctx) { // TODO: replace with a void* pointer type later - int a = 4; - return static_cast(&a); + void* ptr; + return ptr; } static void SummaryScalarOp_Delete(void* kernel) { @@ -60,48 +61,46 @@ static void SummaryScalarOp_Compute(void* kernel, TF_OpKernelContext* ctx) { TF_Tensor* values; TF_Status* status = TF_NewStatus(); TF_GetInput(ctx, 0, &tags, status); - CHECK_EQ(TF_OK, TF_GetCode(status)) - << "Error while getting input"; if (TF_GetCode(status) == TF_OK){ TF_GetInput(ctx, 1, &values, status); } - CHECK_EQ(TF_OK, TF_GetCode(status)) - << "Error while getting input"; + if (TF_GetCode(status) == TF_OK) { if (!IsSameSize(tags, values)) { std::ostringstream err; - err << "tags and values not the same shape: "; + err << "tags and values not the same shape: " << TF_ShapeDebugString(tags) + << " != " << TF_ShapeDebugString(values); TF_SetStatus(status, TF_INVALID_ARGUMENT, err.str().c_str()); } } + // Copy tag and string data into summary protobuf tensorflow::Summary s; if (TF_GetCode(status) == TF_OK) { - auto Ttags_array = static_cast(TF_TensorData(tags)); + // Convert tags and values tensor to array to access elements by index + auto tags_array = static_cast(TF_TensorData(tags)); auto values_array = static_cast(TF_TensorData(values)); for (int i = 0; i < TF_TensorElementCount(tags); ++i){ tensorflow::Summary::Value* v = s.add_value(); - TF_TString_Init(Ttags_array[i]); - v->set_tag(TF_TString_GetDataPointer(Ttags_array[i]), TF_TString_GetSize(Ttags_array[i])); + v->set_tag(TF_TString_GetDataPointer(&tags_array[i]), + TF_TString_GetSize(&tags_array[i])); v->set_simple_value(float(values_array[i])); } - - - // TF_Tensor* summary_tensor = TF_AllocateOutput(ctx, 0, TF_ExpectedOutputDataType(ctx, 0), 0, 0) - - // TF_Tensor* output = TF_AllocateTensor(k->output_data_type, dims, 0, - // TF_DataTypeSize(k->output_data_type)); - // if (TF_GetCode(status) == TF_OK) { - // TF_SetOutput(ctx, 0, output, status); - // } - // TF_DeleteTensor(output); + TF_Tensor* summary_tensor = TF_AllocateOutput(ctx, 0, + TF_ExpectedOutputDataType(ctx, 0), nullptr, 0, + sizeof(TF_TString), status); + if (TF_GetCode(status) == TF_OK){ + SerializeToTString(s, static_cast + (TF_TensorData(summary_tensor))); + } + TF_DeleteTensor(summary_tensor); } - // if (TF_GetCode(status) != TF_OK) { - // TF_OpKernelContext_Failure(ctx, status); - // } - // TF_DeleteStatus(status); - // TF_DeleteTensor(tags); + if (TF_GetCode(status) != TF_OK) { + TF_OpKernelContext_Failure(ctx, status); + } + TF_DeleteStatus(status); + TF_DeleteTensor(tags); } template @@ -114,15 +113,14 @@ void RegisterSummaryScalarOpKernel() { TF_KernelBuilder_TypeConstraint(builder, "T", static_cast(tensorflow::DataTypeToEnum::v()), status); CHECK_EQ(TF_OK, TF_GetCode(status)) << "Error while adding type constraint"; - TF_RegisterKernelBuilder("SummaryScalar", builder, status); + TF_RegisterKernelBuilder("SummaryScalarOp", builder, status); CHECK_EQ(TF_OK, TF_GetCode(status)) << "Error while registering Summary Scalar kernel"; } -// template // #if GOOGLE_CUDA // { // auto* builder = TF_NewKernelBuilder("SummaryScalar", tensorflow::DEVICE_GPU, -// &SummaryScalarOp_Create, &SummaryScalarOp_Compute, +// &SummaryScalarOp_Create, &SummaryScalarOp_Compute, // &SummaryScalarOp_Delete); // TF_RegisterKernelBuilder("SummaryScalar", builder, status); // CHECK_EQ(TF_OK, TF_GetCode(status)) @@ -138,7 +136,7 @@ void RegisterSummaryScalarOpKernel() { TF_ATTRIBUTE_UNUSED static bool IsSummaryScalarOpKernelRegistered = []() { - if (SHOULD_REGISTER_OP_KERNEL("SummaryScalar")) { + if (SHOULD_REGISTER_OP_KERNEL("SummaryScalarOp")) { RegisterSummaryScalarOpKernel(); RegisterSummaryScalarOpKernel(); RegisterSummaryScalarOpKernel(); diff --git a/tensorflow/c/kernels/summary_op_test.cc b/tensorflow/c/kernels/summary_op_test.cc index fd6199abd6c..afc818fb7b5 100644 --- a/tensorflow/c/kernels/summary_op_test.cc +++ b/tensorflow/c/kernels/summary_op_test.cc @@ -22,6 +22,11 @@ limitations under the License. #include "tensorflow/core/framework/shape_inference.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/framework/summary.pb.h" +#include "tensorflow/core/platform/protobuf.h" +#include "tensorflow/c/tf_tensor.h" +#include "tensorflow/c/tf_tensor_internal.h" + #include #include #include @@ -36,17 +41,25 @@ class DummyDevice : public DeviceBase { } }; -void TestScalarSummaryOp(Tensor* tags, Tensor* values, error::Code expected_code) { +// Helper for comparing ouput and expected output +static void EXPECT_SummaryMatches(const Summary& actual, + const string& expected_str) { + Summary expected; + (protobuf::TextFormat::ParseFromString(expected_str, &expected)); + EXPECT_EQ(expected.DebugString(), actual.DebugString()); +} + + +void TestScalarSummaryOp(Tensor* tags, Tensor* values, string expected_summary, + error::Code expected_code) { + // initialize node used to fetch OpKernel Status status; NodeDef def; def.set_op("SummaryScalar"); - def.set_device(DEVICE_CPU); - AttrValue valuesTypeAttr; SetAttrValue(values->dtype(), &valuesTypeAttr); (*def.mutable_attr())["T"] = valuesTypeAttr; - def.add_input( strings::StrCat("input1: ", DataTypeString(tags->dtype()))); def.add_input( @@ -55,6 +68,8 @@ void TestScalarSummaryOp(Tensor* tags, Tensor* values, error::Code expected_code std::unique_ptr kernel = CreateOpKernel(DeviceType(DEVICE_CPU), nullptr, nullptr, def, 1, &status); ASSERT_TRUE(status.ok()) << status.ToString(); + + // initialize OpKernel parameters OpKernelContext::Params params; DummyDevice dummy_device(nullptr); params.device = &dummy_device; @@ -64,27 +79,93 @@ void TestScalarSummaryOp(Tensor* tags, Tensor* values, error::Code expected_code inputs.emplace_back(values); params.inputs = &inputs; OpKernelContext ctx(¶ms, 1); + AllocatorAttributes alloc_attrs; + std::vector output_alloc_attrs({alloc_attrs}); + params.output_attr_array = output_alloc_attrs.data(); kernel->Compute(&ctx); - ASSERT_EQ(expected_code, ctx.status().code()); - if (expected_code == error::OK) { - ASSERT_EQ(true, false) - << ctx.mutable_output(0)->shape().DebugString(); + if (expected_code == error::OK){ + Summary summary; + ParseProtoUnlimited(&summary, ctx.mutable_output(0)->scalar()()); + EXPECT_SummaryMatches(summary, expected_summary); } } -TEST(ScalarSummaryOpTest, Test) { - int vectorSize = 2; +TEST(ScalarSummaryOpTest, SimpleFloat) { + int vectorSize = 3; Tensor tags(DT_STRING, {vectorSize}); Tensor values(DT_FLOAT, {vectorSize}); - for (int i = 0; i < vectorSize; ++i){ - values.vec()(i) = static_cast(i); - } - tags.vec()(0) = "tag 1"; - tags.vec()(1) = "tag 2"; - TestScalarSummaryOp(&tags, &values, error::INVALID_ARGUMENT); + tags.vec()(0) = "tag1"; + tags.vec()(1) = "tag2"; + tags.vec()(2) = "tag3"; + values.vec()(0) = 1.0f; + values.vec()(1) = -0.73f; + values.vec()(2) = 10000.0f; + TestScalarSummaryOp(&tags, &values, R"( + value { tag: 'tag1' simple_value: 1.0 } + value { tag: 'tag2' simple_value: -0.73} + value { tag: 'tag3' simple_value: 10000.0})", error::OK); } +TEST(ScalarSummaryOpTest, SimpleDouble) { + int vectorSize = 3; + Tensor tags(DT_STRING, {vectorSize}); + Tensor values(DT_DOUBLE, {vectorSize}); + tags.vec()(0) = "tag1"; + tags.vec()(1) = "tag2"; + tags.vec()(2) = "tag3"; + values.vec()(0) = 1.0; + values.vec()(1) = -0.73; + values.vec()(2) = 10000.0; + TestScalarSummaryOp(&tags, &values, R"( + value { tag: 'tag1' simple_value: 1.0 } + value { tag: 'tag2' simple_value: -0.73} + value { tag: 'tag3' simple_value: 10000.0})", error::OK); +} + +TEST(ScalarSummaryOpTest, SimpleHalf) { + int vectorSize = 3; + Tensor tags(DT_STRING, {vectorSize}); + Tensor values(DT_HALF, {vectorSize}); + tags.vec()(0) = "tag1"; + tags.vec()(1) = "tag2"; + tags.vec()(2) = "tag3"; + values.vec()(0) = static_cast(1.0); + values.vec()(1) = static_cast(-2.0); + values.vec()(2) = static_cast(10000.0); + TestScalarSummaryOp(&tags, &values, R"( + value { tag: 'tag1' simple_value: 1.0 } + value { tag: 'tag2' simple_value: -2.0} + value { tag: 'tag3' simple_value: 10000.0})", error::OK); +} + +TEST(ScalarSummaryOpTest, Error_WrongDimsTags) { + int vectorSize = 3; + Tensor tags(DT_STRING, {2, 1}); + Tensor values(DT_FLOAT, {2}); + tags.matrix()(0, 0) = "tag1"; + tags.matrix()(1, 0) = "tag2"; + values.vec()(0) = 1.0f; + values.vec()(1) = -2.0f; + TestScalarSummaryOp(&tags, &values, R"()", error::INVALID_ARGUMENT); +} + +TEST(ScalarSummaryOpTest, Error_WrongValuesTags) { + Tensor tags(DT_STRING, {2}); + Tensor values(DT_FLOAT, {2, 1}); + tags.vec()(0) = "tag1"; + tags.vec()(1) = "tag2"; + values.matrix()(0, 0) = 1.0f; + values.matrix()(1, 0) = -2.0f; + TestScalarSummaryOp(&tags, &values, R"()", error::INVALID_ARGUMENT); +} + +TEST(ScalarSummaryOpTest, IsRegistered){ + const OpRegistrationData* reg; + TF_CHECK_OK(OpRegistry::Global()->LookUp("SummaryScalar", ®)); +} + + PartialTensorShape S(std::initializer_list dims) { return PartialTensorShape(dims); diff --git a/tensorflow/c/tf_tensor.cc b/tensorflow/c/tf_tensor.cc index 7e4d3bb4932..948a8700690 100644 --- a/tensorflow/c/tf_tensor.cc +++ b/tensorflow/c/tf_tensor.cc @@ -28,6 +28,8 @@ limitations under the License. #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/lib/core/coding.h" #include "tensorflow/core/platform/casts.h" +#include +#include using tensorflow::Status; using tensorflow::Tensor; @@ -180,6 +182,11 @@ void TF_TensorBitcastFrom(const TF_Tensor* from, TF_DataType type, Set_TF_Status_from_Status(status, cc_status); } +std::string TF_ShapeDebugString(const TF_Tensor* t){ + return tensorflow::down_cast(t->tensor) + ->ShapeDebugString(); +} + namespace tensorflow { void TensorInterface::Release() { delete this; } @@ -225,6 +232,10 @@ Status TensorInterface::BitcastFrom(const TensorInterface& from, DataType type, return tensor_.BitcastFrom(from.tensor_, type, s); } +std::string TensorInterface::ShapeDebugString() const { + return tensor_.shape().DebugString(); +} + } // namespace tensorflow // -------------------------------------------------------------------------- @@ -307,6 +318,7 @@ static TF_Tensor* EmptyTensor(TF_DataType dtype, namespace tensorflow { // Non-static for testing. + TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src, Status* status) { *status = tensorflow::Status::OK(); if (!src.IsInitialized()) { @@ -334,58 +346,13 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src, Status* status) { std::memcpy(TF_TensorData(t), str.c_str(), str.size()); return t; } - if (src.dtype() != tensorflow::DT_STRING) { - Tensor tensor; - if (!tensor.CopyFrom(src, src.shape())) { - return nullptr; - } - return new TF_Tensor{new tensorflow::TensorInterface(tensor)}; - } - // DT_STRING tensors require a copying since TF_Tensor.buffer expects a flatly - // encoded sequence of strings. - // Compute bytes needed for encoding. - size_t size = 0; - const auto& srcarray = src.flat(); - for (int i = 0; i < srcarray.size(); ++i) { - const string& s = srcarray(i); - // uint64 starting_offset, TF_StringEncode-d string. - size += sizeof(tensorflow::uint64) + TF_StringEncodedSize(s.size()); - } - - // Encode all strings. - char* base = new char[size]; - char* data_start = base + sizeof(tensorflow::uint64) * srcarray.size(); - char* dst = data_start; // Where next string is encoded. - size_t dst_len = size - static_cast(data_start - base); - tensorflow::uint64* offsets = reinterpret_cast(base); - for (int i = 0; i < srcarray.size(); ++i) { - *offsets = (dst - data_start); - offsets++; - const string& s = srcarray(i); - const size_t consumed = TF_StringEncodedSize(s.size()); - StringEncode(s.data(), s.size(), dst); - dst += consumed; - dst_len -= consumed; - } - if (dst != base + size) { - *status = InvalidArgument( - "invalid string tensor encoding (decoded ", (dst - base), - " bytes, but the tensor is encoded in ", size, " bytes"); - delete[] base; + Tensor tensor; + if (!tensor.CopyFrom(src, src.shape())) { return nullptr; } + return new TF_Tensor{new tensorflow::TensorInterface(tensor)}; - auto dims = src.shape().dim_sizes(); - std::vector dimvec(dims.size()); - for (size_t i = 0; i < dims.size(); ++i) { - dimvec[i] = dims[i]; - } - static_assert(sizeof(int64_t) == sizeof(tensorflow::int64), - "64-bit int types should match in size"); - return TF_NewTensor(TF_STRING, - reinterpret_cast(dimvec.data()), - dimvec.size(), base, size, DeleteArray, base); } Status TF_TensorToTensor(const TF_Tensor* src, Tensor* dst) { @@ -409,44 +376,14 @@ Status TensorInterface::ToTensor(tensorflow::Tensor* dst) const { } return Status::OK(); } - if (tensor_.dtype() != DT_STRING) { - *dst = tensor_; - return Status::OK(); - } - // TF_STRING tensors require copying since Tensor class expects a sequence of - // string objects. - const tensorflow::int64 num_elements = tensor_.NumElements(); - const char* input = reinterpret_cast(Data()); - const size_t src_size = ByteSize(); - if (static_cast(src_size / sizeof(tensorflow::uint64)) < - num_elements) { - return InvalidArgument( - "Malformed TF_STRING tensor; too short to hold number of elements"); - } - const char* data_start = input + sizeof(tensorflow::uint64) * num_elements; - const char* limit = input + src_size; - - *dst = tensorflow::Tensor(tensor_.dtype(), tensor_.shape()); - auto dstarray = dst->flat(); - for (tensorflow::int64 i = 0; i < num_elements; ++i) { - tensorflow::uint64 offset = - reinterpret_cast(input)[i]; - if (static_cast(offset) >= (limit - data_start)) { - return InvalidArgument("Malformed TF_STRING tensor; element ", i, - " out of range"); - } - size_t len; - const char* p; - const char* srcp = data_start + offset; - Status status = TF_StringDecode_Impl(srcp, limit - srcp, &p, &len); - if (!status.ok()) return status; - dstarray(i).assign(p, len); - } + *dst = tensor_; return Status::OK(); } + bool TensorInterface::IsAligned() const { return tensor_.IsAligned(); } } // namespace tensorflow bool TF_TensorIsAligned(const TF_Tensor* t) { return t->tensor->IsAligned(); } + diff --git a/tensorflow/c/tf_tensor.h b/tensorflow/c/tf_tensor.h index 7ed4a9f754e..3da4fef0f13 100644 --- a/tensorflow/c/tf_tensor.h +++ b/tensorflow/c/tf_tensor.h @@ -22,6 +22,9 @@ limitations under the License. #include "tensorflow/c/tf_datatype.h" #include "tensorflow/c/tf_status.h" +#include +#include + // Macro to control visibility of exported symbols in the shared library (.so, // .dylib, .dll). // This duplicates the TF_EXPORT macro definition in @@ -179,6 +182,8 @@ TF_CAPI_EXPORT extern size_t TF_StringEncodedSize(size_t len); // Returns bool iff this tensor is aligned. TF_CAPI_EXPORT extern bool TF_TensorIsAligned(const TF_Tensor*); +TF_CAPI_EXPORT extern std::string TF_ShapeDebugString(const TF_Tensor*); + #ifdef __cplusplus } /* end extern "C" */ #endif diff --git a/tensorflow/c/tf_tensor_internal.h b/tensorflow/c/tf_tensor_internal.h index 7a896dc5d11..b3f44c71245 100644 --- a/tensorflow/c/tf_tensor_internal.h +++ b/tensorflow/c/tf_tensor_internal.h @@ -24,6 +24,8 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/platform/casts.h" +#include +#include // Internal structures used by the C API. These are likely to change and should // not be depended on. @@ -104,6 +106,7 @@ class TensorInterface : public AbstractTensorInterface { void* Data() const override; bool IsAligned() const override; bool CanMove() const override; + std::string ShapeDebugString() const; Status ToTensor(tensorflow::Tensor* dst) const; Status BitcastFrom(const TensorInterface& from, DataType type, From 7b11290ba0927e63ea9002ecd907a2e63ce021e7 Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Fri, 26 Jun 2020 11:44:03 +0800 Subject: [PATCH 0060/2522] remove support for cache op --- .../optimizers/data/reorder_data_discarding_ops.cc | 12 +++++++----- .../optimizers/data/reorder_data_discarding_ops.h | 2 +- .../data/reorder_data_discarding_ops_test.cc | 12 +++--------- .../optimization/reorder_data_discarding_ops_test.py | 6 ++---- .../data/experimental/ops/optimization_options.py | 8 +++----- 5 files changed, 16 insertions(+), 24 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.cc b/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.cc index e7de3759594..72de5ed8d70 100644 --- a/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.cc +++ b/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.cc @@ -38,9 +38,11 @@ constexpr std::array kDataDiscarding = { "ShardDataset", "SkipDataset", "TakeDataset", }; -constexpr std::array kCardinalityPreserving = { - "CacheDataset", "CacheDatasetV2", "PrefetchDataset", - "MapDataset", "ParallelMapDataset", "ParallelMapDatasetV2", +// TODO(zilinzhu): Support memory cache op when file cache op and +// memory cache op are separated. +const std::array kCardinalityPreserving = { + "PrefetchDataset", "MapDataset", + "ParallelMapDataset", "ParallelMapDatasetV2", }; bool IsDataDiscarding(const NodeDef& node) { @@ -58,8 +60,8 @@ bool IsCardinalityPreserving(const NodeDef& node) { continue; } // Map ops with preserve_cardinality=false do not qualify. - auto attr_iter = node.attr().find("preserve_cardinality"); - if (attr_iter != node.attr().end() && !attr_iter->second.b()) { + auto attr = node.attr().find("preserve_cardinality"); + if (attr != node.attr().end() && !attr->second.b()) { return false; } return true; diff --git a/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.h b/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.h index 6079d1de7f5..1cf0b0861dc 100644 --- a/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.h +++ b/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops.h @@ -23,7 +23,7 @@ namespace grappler { // This optimization reorders the data discarding ops (such as `skip`, `take` // and `shard`) to avoid unnecessary computation, -// e.g. reordering ds.map(...).cache().take(5) to ds.take(5).map(...).cache(). +// e.g. reordering ds.map(...).take(5) to ds.take(5).map(...). class ReorderDataDiscardingOps : public TFDataOptimizerBase { public: ReorderDataDiscardingOps() = default; diff --git a/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops_test.cc b/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops_test.cc index 3f5a03c7528..f1d8a863936 100644 --- a/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops_test.cc +++ b/tensorflow/core/grappler/optimizers/data/reorder_data_discarding_ops_test.cc @@ -39,16 +39,10 @@ TEST(ReorderDataDiscardingOpsTest, ExampleOps) { {"output_shapes", gtl::ArraySlice{}}, {"output_types", gtl::ArraySlice{}}, }), - NDef("num_parallel_calls", "Const", {}, - {{"value", 1}, {"dtype", DT_INT32}}), - graph_tests_utils::MakeParallelMapNode("map", "range", - "num_parallel_calls", "XTimesTwo", - /*sloppy=*/false), - NDef("dummy_memory_cache", "DummyMemoryCache", {}, {}), - graph_tests_utils::MakeCacheV2Node("cache", "map", "", "dummy_memory_cache"), + graph_tests_utils::MakeMapNode("map", "range", "XTimesTwo"), NDef("take_count", "Const", {}, {{"value", 5}, {"dtype", DT_INT32}}), - graph_tests_utils::MakeTakeNode("take", "cache", "take_count"), + graph_tests_utils::MakeTakeNode("take", "map", "take_count"), NDef("skip_count", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}), graph_tests_utils::MakeSkipNode("skip", "take", "skip_count"), @@ -87,7 +81,7 @@ TEST(ReorderDataDiscardingOpsTest, ExampleOps) { graph)->name() == "range"); EXPECT_TRUE(graph_utils::GetInputNode(*graph.GetNode("reorder_data_discarding_ops/skip"), graph)->name() == "reorder_data_discarding_ops/take"); - EXPECT_TRUE(graph_utils::GetInputNode(*graph.GetNode("map_and_batch"), graph)->name() == "cache"); + EXPECT_TRUE(graph_utils::GetInputNode(*graph.GetNode("map_and_batch"), graph)->name() == "map"); } } // namespace diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/reorder_data_discarding_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/reorder_data_discarding_ops_test.py index 0e7d0fafbb5..260d6a37495 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimization/reorder_data_discarding_ops_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/reorder_data_discarding_ops_test.py @@ -34,11 +34,10 @@ class ReorderDataDiscardingOpsTest(test_base.DatasetTestBase, parameterized.Test dataset = dataset_ops.Dataset.range(100) dataset = dataset.apply( testing.assert_next(["FiniteSkip", "FiniteTake", "Shard", - "ParallelMap", "MemoryCacheImpl", "Prefetch"])) + "ParallelMap", "Prefetch"])) dataset = dataset.map( lambda x: x + 1, num_parallel_calls=10) dataset = dataset.skip(10) - dataset = dataset.cache() dataset = dataset.prefetch(1) dataset = dataset.take(50) dataset = dataset.shard(2, 0) @@ -55,11 +54,10 @@ class ReorderDataDiscardingOpsTest(test_base.DatasetTestBase, parameterized.Test # Map ops have preserve_cardinality=false in tensorflow v1. dataset = dataset.apply( testing.assert_next(["ParallelMap", "FiniteSkip", "FiniteTake", - "Shard", "MemoryCacheImpl", "Prefetch"])) + "Shard", "Prefetch"])) dataset = dataset.map( lambda x: x + 1, num_parallel_calls=10) dataset = dataset.skip(10) - dataset = dataset.cache() dataset = dataset.prefetch(1) dataset = dataset.take(50) dataset = dataset.shard(2, 0) diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py index 12377f49930..60dc87ccc96 100644 --- a/tensorflow/python/data/experimental/ops/optimization_options.py +++ b/tensorflow/python/data/experimental/ops/optimization_options.py @@ -188,11 +188,9 @@ class OptimizationOptions(options.OptionsBase): "cardinality preserved transformations, e.g. dataset.map(...).take(3)" "will be optimized to dataset.take(3).map(...). For now this" "optimization will move `skip`, `shard` and `take` to the front of" - "`cache`, `map` and `prefetch`. And notice this optimization is only" - "for performance, it will not affect the output of the dataset." - "However, it will influence the cache to the file, for the unused" - "data will no longer be saved after this optimization." - "If None, defaults to False.") + "`map` and `prefetch`. And notice this optimization is only for" + "performance, it will not affect the output of the dataset." + "If None, defaults to True.") shuffle_and_repeat_fusion = options.create_option( name="shuffle_and_repeat_fusion", From 07c5adfcfb7b7f62e8aeed9ec40535e9dda10976 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Fri, 26 Jun 2020 06:24:53 +0000 Subject: [PATCH 0061/2522] working absl hashable tensor --- tensorflow/core/framework/tensor.h | 30 ++++++++++++- tensorflow/core/kernels/tensor_map.cc | 65 +++++++-------------------- tensorflow/core/kernels/tensor_map.h | 14 ++++-- 3 files changed, 54 insertions(+), 55 deletions(-) diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h index 28eab3ab1e0..33c2338aeb4 100644 --- a/tensorflow/core/framework/tensor.h +++ b/tensorflow/core/framework/tensor.h @@ -319,6 +319,34 @@ class Tensor { return true; } + /// Hashable + // Equality operator. Needed for absl hashing. + friend bool operator==(const Tensor& lhs, const Tensor& rhs) { + return lhs.shape() == rhs.shape() && lhs.dtype() == rhs.dtype(); + } + + // Comparison operator. Needed for absl hashing. + /*friend bool operator<(const Tensor& lhs, const Tensor& rhs) { + return lhs.NumElements() < rhs.NumElements(); + }*/ + + // AbslHashValue() function, needed for absl hashing. + template + friend H AbslHashValue(H h, const Tensor& k) { + //int temp = k.NumElements(); + + uint8* d = (uint8*)(k.buf_->data()); + std::cout << "buffer " << d << std::endl; + size_t s = k.buf_->size(); + std::vector vec; + + for (int i=0; i < s; i++) { + vec.push_back(d[i]); + } + + return H::combine(std::move(h), vec); + } + /// \brief Slice this tensor along the 1st dimension. /// I.e., the returned tensor satisfies @@ -648,7 +676,7 @@ class Tensor { // buffer is one. bool RefCountIsOne() const; - private: + protected: void CheckType(DataType expected_dtype) const; void CheckTypeAndIsAligned(DataType expected_dtype) const; void CheckIsAlignedAndSingleElement() const; diff --git a/tensorflow/core/kernels/tensor_map.cc b/tensorflow/core/kernels/tensor_map.cc index a0b2c7a9f7a..3a20708933e 100644 --- a/tensorflow/core/kernels/tensor_map.cc +++ b/tensorflow/core/kernels/tensor_map.cc @@ -28,38 +28,20 @@ TensorMap::~TensorMap() { void TensorMap::Encode(VariantTensorData* data) const { data->set_type_name(TypeName()); - std::map::iterator map_it = tensors().begin(); - size_t i = 0; - std::vector invalid_indices; + absl::flat_hash_map::const_iterator map_it = tensors().begin(); while (map_it != tensors().end()) { Tensor k = map_it->first; Tensor v = map_it->second; - // k should also not be DT_RESOURCE or DT_VARIANT - if(k.dtype != DT_INVALID && v.dtype != DT_INVALID) { + // TODO: k should also not be DT_RESOURCE or DT_VARIANT + if(k.dtype() != DT_INVALID && v.dtype() != DT_INVALID) { *data->add_tensors() = k; *data->add_tensors() = v; - // not sure if this is the correct order - } - else { - invalid_indices.push_back(i); } } - /* - for (size_t i = 0; i < tensors().size(); i++) { - if (tensors().at(i).dtype() != DT_INVALID) { - *data->add_tensors() = tensors().at(i); - } else { - invalid_indices.push_back(i); - } - }*/ string metadata; // TODO(b/118838800): Add a proto for storing the metadata. // Metadata format: - // - core::PutVarint64(&metadata, static_cast(invalid_indices.size())); - for (size_t i : invalid_indices) { - core::PutVarint64(&metadata, static_cast(i)); - } + // core::PutVarint64(&metadata, static_cast(element_dtype)); core::PutVarint64(&metadata, static_cast(max_num_elements)); TensorShapeProto element_shape_proto; @@ -74,12 +56,11 @@ static Status TensorMapDeviceCopy( to->element_shape = from.element_shape; to->element_dtype = from.element_dtype; to->max_num_elements = from.max_num_elements; - //to->tensors().reserve(from.tensors().size()); for (const std::pair& p : from.tensors()) { - to->tensors().emplace(p); //why was it emplace t.dtype? - if (t.dtype() != DT_INVALID) { + to->tensors().emplace(p); //TODO: check valid dtype + //if (t.dtype() != DT_INVALID) { //TF_RETURN_IF_ERROR(copy(p, &to->tensors().back())); - } + //} } return Status::OK(); } @@ -102,33 +83,17 @@ bool TensorMap::Decode(const VariantTensorData& data) { data.get_metadata(&metadata); uint64 scratch; StringPiece iter(metadata); - std::vector invalid_indices; - core::GetVarint64(&iter, &scratch); - size_t num_invalid_tensors = static_cast(scratch); - invalid_indices.resize(num_invalid_tensors); - for (size_t i = 0; i < num_invalid_tensors; i++) { - core::GetVarint64(&iter, &scratch); - invalid_indices[i] = static_cast(scratch); - } - size_t total_num_tensors = data.tensors().size()/2 + num_invalid_tensors; - //tensors().reserve(total_num_tensors); - std::vector::iterator invalid_indices_it = invalid_indices.begin(); std::vector::const_iterator tensors_it = data.tensors().begin(); - for (size_t i = 0; i < total_num_tensors; i++) { - if (invalid_indices_it != invalid_indices.end() && - *invalid_indices_it == i) { - //no need to do invalid indices for a map - //tensors().emplace(Tensor(DT_INVALID),Tensor(DT_INVALID)); - invalid_indices_it++; - } else if (tensors_it != data.tensors().end()) { - // should assert that tensors_it + 1 is also not the end - tensors().emplace(*tensors_it,*++tensors_it); - tensors_it++; - } else { - // VariantTensorData is corrupted. + while (tensors_it != data.tensors().end()) + { + // should assert that tensors_it + 1 is also not the end + /*if (*tensors_it + 1 == data.tensors().end()) { return false; - } + }*/ + + tensors().emplace(*tensors_it,*++tensors_it); + tensors_it++; } core::GetVarint64(&iter, &scratch); diff --git a/tensorflow/core/kernels/tensor_map.h b/tensorflow/core/kernels/tensor_map.h index d8726bbecb4..17b2f63f2c9 100644 --- a/tensorflow/core/kernels/tensor_map.h +++ b/tensorflow/core/kernels/tensor_map.h @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/framework/variant.h" #include "tensorflow/core/framework/variant_tensor_data.h" #include "tensorflow/core/lib/core/refcount.h" +#include "absl/container/flat_hash_map.h" namespace tensorflow { @@ -123,8 +124,12 @@ class TensorMap { int max_num_elements = -1; // Access to the underlying tensor container. - std::map& tensors() { return tensors_->values_; } - const std::map& tensors() const { return tensors_->values_; } + absl::flat_hash_map& tensors() { return tensors_->values_; } + const absl::flat_hash_map& tensors() const { return tensors_->values_; } + + // Access to shape and element dtype + PartialTensorShape& shape() { return element_shape; } + DataType dtype() { return element_dtype; } // Get a new TensorList containing a copy of the underlying tensor container. TensorMap Copy() const { @@ -132,7 +137,7 @@ class TensorMap { out.element_shape = element_shape; out.element_dtype = element_dtype; out.max_num_elements = max_num_elements; - // This performs a copy of the std::map. + // This performs a copy of the absl::hashmap. out.tensors_->values_ = tensors_->values_; return out; } @@ -144,7 +149,8 @@ class TensorMap { private: class Tensors : public core::RefCounted { public: - std::map values_; + //std::unordered_map values_; + absl::flat_hash_map values_; }; Tensors* tensors_; }; From 5d8e3a41c57497588e8b22941cd480a5300c15d2 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Fri, 26 Jun 2020 07:16:28 +0000 Subject: [PATCH 0062/2522] tensor map tests --- tensorflow/core/kernels/tensor_map.h | 13 ++++++ tensorflow/core/kernels/tensor_map_test.cc | 54 ++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 tensorflow/core/kernels/tensor_map_test.cc diff --git a/tensorflow/core/kernels/tensor_map.h b/tensorflow/core/kernels/tensor_map.h index 17b2f63f2c9..2e8ebcd219d 100644 --- a/tensorflow/core/kernels/tensor_map.h +++ b/tensorflow/core/kernels/tensor_map.h @@ -142,6 +142,19 @@ class TensorMap { return out; } + bool insert(Tensor key, Tensor value) { + tensors_->values_.try_emplace(key, value); + return true; + } + + /*Tensor& lookup(Tensor key) { + return tensors_->values_.find(key); + }*/ + + bool erase(Tensor key) { + return tensors_->values_.erase(key); + } + // Is this TensorMap the only one with a reference to the underlying // container? bool RefCountIsOne() const { return tensors_->RefCountIsOne(); } diff --git a/tensorflow/core/kernels/tensor_map_test.cc b/tensorflow/core/kernels/tensor_map_test.cc new file mode 100644 index 00000000000..49f963bf950 --- /dev/null +++ b/tensorflow/core/kernels/tensor_map_test.cc @@ -0,0 +1,54 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/kernels/tensor_map.h" +#include "tensorflow/core/framework/tensor.h" +#include "absl/container/flat_hash_map.h" + +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" + +namespace tensorflow { + +namespace { + +TEST(TensorMapTest, Empty) { + TensorMap tm; + EXPECT_EQ(tm.tensors().size(), 0); + EXPECT_EQ(tm.tensors().begin(), tm.tensors().end()); +} + +TEST(TensorMap, Copy) { + TensorMap tm; + TensorMap tmc = tm.Copy(); + EXPECT_EQ(tm.dtype(),tmc.dtype()); + EXPECT_EQ(tm.tensors(),tmc.tensors()); +} + +TEST(TensorMap, Insert) { + EXPECT_EQ(1,1); + TensorMap tm; + Tensor k = Tensor(DT_INT64, TensorShape({1,1})); + Tensor v = Tensor(DT_INT64, TensorShape({2,3})); + tm.insert(k,v); + absl::flat_hash_map am; + am.try_emplace(k,v); + EXPECT_EQ(tm.tensors(), am); +} + +//TODO(kattian): test Lookup, Erase + +} // namespace + +} // namespace tensorflow From 6b01b02274a696a2c4c391e2aef9555f1fcdc0a8 Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Fri, 26 Jun 2020 16:55:34 +0800 Subject: [PATCH 0063/2522] modify docstring --- .../data/experimental/ops/optimization_options.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py index 60dc87ccc96..d9fd3f62daa 100644 --- a/tensorflow/python/data/experimental/ops/optimization_options.py +++ b/tensorflow/python/data/experimental/ops/optimization_options.py @@ -184,12 +184,12 @@ class OptimizationOptions(options.OptionsBase): name="reorder_data_discarding_ops", ty=bool, docstring= - "Whether to reorder ops that will discard data to the front of unary" - "cardinality preserved transformations, e.g. dataset.map(...).take(3)" - "will be optimized to dataset.take(3).map(...). For now this" - "optimization will move `skip`, `shard` and `take` to the front of" - "`map` and `prefetch`. And notice this optimization is only for" - "performance, it will not affect the output of the dataset." + "Whether to reorder ops that will discard data to the front of unary " + "cardinality preserving transformations, e.g. dataset.map(...).take(3) " + "will be optimized to dataset.take(3).map(...). For now this " + "optimization will move `skip`, `shard` and `take` to the front of " + "`map` and `prefetch`. This optimization is only for performance; " + "it will not affect the output of the dataset." "If None, defaults to True.") shuffle_and_repeat_fusion = options.create_option( From 580dc945a65406b24673d8eb2be48dbcf54d8fcc Mon Sep 17 00:00:00 2001 From: Eugene Kuznetsov Date: Wed, 22 Jan 2020 12:24:24 -0800 Subject: [PATCH 0064/2522] Adding ROCm support to OpsTestBase (necessary for some unit tests) --- tensorflow/core/kernels/ops_testutil.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/ops_testutil.cc b/tensorflow/core/kernels/ops_testutil.cc index 87f70d3a3b3..c6f751d196c 100644 --- a/tensorflow/core/kernels/ops_testutil.cc +++ b/tensorflow/core/kernels/ops_testutil.cc @@ -14,7 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/framework/node_properties.h" -#ifdef GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define EIGEN_USE_GPU #include "tensorflow/core/common_runtime/gpu/gpu_managed_allocator.h" #endif @@ -112,7 +112,7 @@ void OpsTestBase::SetDevice(const DeviceType& device_type, thread_pool_.get()); device_type_ = device_type; -#ifdef GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM if (device_type == DEVICE_GPU) { managed_allocator_.reset(new GpuManagedAllocator()); allocator_ = managed_allocator_.get(); @@ -122,7 +122,7 @@ void OpsTestBase::SetDevice(const DeviceType& device_type, } #else CHECK_NE(device_type, DEVICE_GPU) - << "Requesting GPU on binary compiled without GOOGLE_CUDA."; + << "Requesting GPU on binary compiled without GOOGLE_CUDA or TENSORFLOW_USE_ROCM."; allocator_ = device_->GetAllocator(AllocatorAttributes()); #endif } @@ -195,7 +195,7 @@ TensorValue OpsTestBase::mutable_input(int input_index) { Tensor* OpsTestBase::GetOutput(int output_index) { CHECK_LT(output_index, context_->num_outputs()); Tensor* output = context_->mutable_output(output_index); -#ifdef GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM if (device_type_ == DEVICE_GPU) { managed_outputs_.resize(context_->num_outputs()); // Copy the output tensor to managed memory if we haven't done so. From 577579c575c30aac34e428bdc561133cb9482635 Mon Sep 17 00:00:00 2001 From: Eugene Kuznetsov Date: Wed, 22 Jan 2020 12:22:56 -0800 Subject: [PATCH 0065/2522] Fixing and enabling NonMaxSuppression for ROCm --- .../core/kernels/non_max_suppression_op.cu.cc | 66 ++++++++++--------- .../core/kernels/non_max_suppression_op.h | 2 +- .../non_max_suppression_op_gpu_test.cc | 2 +- tensorflow/core/kernels/ops_testutil.cc | 3 +- 4 files changed, 40 insertions(+), 33 deletions(-) diff --git a/tensorflow/core/kernels/non_max_suppression_op.cu.cc b/tensorflow/core/kernels/non_max_suppression_op.cu.cc index c2cae2ab212..d19f6eb676c 100644 --- a/tensorflow/core/kernels/non_max_suppression_op.cu.cc +++ b/tensorflow/core/kernels/non_max_suppression_op.cu.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define EIGEN_USE_GPU #include @@ -28,7 +28,12 @@ limitations under the License. #include "tensorflow/core/util/gpu_launch_config.h" #include "tensorflow/stream_executor/stream_executor.h" -struct __align__(16) Box { + +struct +#if GOOGLE_CUDA + __align__(16) +#endif + Box { float x1, y1, x2, y2; }; @@ -114,7 +119,7 @@ __global__ void NMSReduce(const int* bitmask, const int bit_mask_len, char* result_mask) { extern __shared__ int local[]; // set global mask to accept all boxes - for (int box : CudaGridRangeX(bit_mask_len)) { + for (int box : GpuGridRangeX(bit_mask_len)) { local[box] = 0xFFFFFFFF; } __syncthreads(); @@ -127,7 +132,7 @@ __global__ void NMSReduce(const int* bitmask, const int bit_mask_len, accepted_boxes += 1; int offset = box * bit_mask_len; // update global mask with current box's mask - for (int b : CudaGridRangeX(bit_mask_len)) { + for (int b : GpuGridRangeX(bit_mask_len)) { local[b] &= ~bitmask[offset + b]; } __syncthreads(); @@ -135,7 +140,7 @@ __global__ void NMSReduce(const int* bitmask, const int bit_mask_len, } // copy global mask to result_max char array. char array is needed for // cub::DeviceSelect later. - for (int box : CudaGridRangeX(num_boxes)) { + for (int box : GpuGridRangeX(num_boxes)) { result_mask[box] = CheckBit(local, box); } } @@ -232,14 +237,14 @@ __device__ EIGEN_STRONG_INLINE void SelectHelper(const Index i_selected, template __global__ void IndexMultiSelect(const int num_elements, const Index* indices, const T* original, T* selected, Args... args) { - for (const int idx : CudaGridRangeX(num_elements)) { + for (const int idx : GpuGridRangeX(num_elements)) { SelectHelper(idx, indices[idx], original, selected, args...); } } template __global__ void Iota(const int num_elements, const T offset, T* to_fill) { - for (int idx : CudaGridRangeX(num_elements)) { + for (int idx : GpuGridRangeX(num_elements)) { to_fill[idx] = static_cast(idx) + offset; } } @@ -322,13 +327,13 @@ Status NmsGpu(const float* d_sorted_boxes_float_ptr, const int num_boxes, TF_RETURN_IF_CUDA_ERROR(cudaGetLastError()); // do Cub::deviceSelect::flagged size_t flagged_buffer_size = 0; - cub::DeviceSelect::Flagged(static_cast(nullptr), // temp_storage - flagged_buffer_size, - static_cast(nullptr), // input - static_cast(nullptr), // selection flag - static_cast(nullptr), // selected items - static_cast(nullptr), // num_selected - num_boxes, device.stream()); + gpuprim::DeviceSelect::Flagged(static_cast(nullptr), // temp_storage + flagged_buffer_size, + static_cast(nullptr), // input + static_cast(nullptr), // selection flag + static_cast(nullptr), // selected items + static_cast(nullptr), // num_selected + num_boxes, device.stream()); Tensor cub_scratch; TF_RETURN_IF_ERROR(context->allocate_temp( DataType::DT_INT8, TensorShape({(int64)flagged_buffer_size}), @@ -337,22 +342,22 @@ Status NmsGpu(const float* d_sorted_boxes_float_ptr, const int num_boxes, TF_RETURN_IF_ERROR(context->allocate_temp(DataType::DT_INT32, TensorShape({1}), &d_num_selected)); - cub::DeviceSelect::Flagged( + gpuprim::DeviceSelect::Flagged( (void*)cub_scratch.flat().data(), // temp_storage flagged_buffer_size, d_indices.flat().data(), // input selected, // selection flag d_selected_indices, // selected items d_num_selected.flat().data(), num_boxes, device.stream()); - cudaEvent_t copy_done; + gpuEvent_t copy_done; TF_RETURN_IF_CUDA_ERROR( - cudaEventCreateWithFlags(©_done, cudaEventDisableTiming)); + gpuEventCreateWithFlags(©_done, gpuEventDisableTiming)); device.memcpyDeviceToHost(h_selected_count, d_num_selected.flat().data(), sizeof(int)); - TF_RETURN_IF_CUDA_ERROR(cudaEventRecord(copy_done, device.stream())); - TF_RETURN_IF_CUDA_ERROR(cudaEventSynchronize(copy_done)); + TF_RETURN_IF_CUDA_ERROR(gpuEventRecord(copy_done, device.stream())); + TF_RETURN_IF_CUDA_ERROR(gpuEventSynchronize(copy_done)); *h_nkeep = *h_selected_count; - cudaEventDestroy(copy_done); + gpuEventDestroy(copy_done); return Status::OK(); } @@ -375,9 +380,10 @@ Status CountIf(OpKernelContext* context, const float* dev_array, const Op& op, size_t workspace_size = 0; auto cuda_stream = tensorflow::GetGpuStream(context); auto device = context->eigen_gpu_device(); - cub::DeviceSelect::If(nullptr, workspace_size, static_cast(nullptr), - static_cast(nullptr), - static_cast(nullptr), num_elements, op); + gpuprim::DeviceSelect::If(nullptr, workspace_size, + static_cast(nullptr), + static_cast(nullptr), + static_cast(nullptr), num_elements, op); TF_RETURN_IF_ERROR(context->allocate_temp( DataType::DT_FLOAT, TensorShape({num_elements}), &scratch_output)); @@ -385,17 +391,17 @@ Status CountIf(OpKernelContext* context, const float* dev_array, const Op& op, DataType::DT_INT8, TensorShape({(int64)workspace_size}), &workspace)); TF_RETURN_IF_ERROR(context->allocate_temp(DataType::DT_INT32, TensorShape({1}), &element_count)); - cudaEvent_t copy_done; + gpuEvent_t copy_done; TF_RETURN_IF_CUDA_ERROR( - cudaEventCreateWithFlags(©_done, cudaEventDisableTiming)); - TF_RETURN_IF_CUDA_ERROR(cub::DeviceSelect::If( + gpuEventCreateWithFlags(©_done, gpuEventDisableTiming)); + TF_RETURN_IF_CUDA_ERROR(gpuprim::DeviceSelect::If( workspace.flat().data(), workspace_size, dev_array, scratch_output.flat().data(), element_count.flat().data(), num_elements, op, cuda_stream)); device.memcpyDeviceToHost(result, element_count.flat().data(), sizeof(int)); - TF_RETURN_IF_CUDA_ERROR(cudaEventRecord(copy_done, device.stream())); - TF_RETURN_IF_CUDA_ERROR(cudaEventSynchronize(copy_done)); + TF_RETURN_IF_CUDA_ERROR(gpuEventRecord(copy_done, device.stream())); + TF_RETURN_IF_CUDA_ERROR(gpuEventSynchronize(copy_done)); return Status::OK(); } @@ -418,7 +424,7 @@ Status DoNMS(OpKernelContext* context, const Tensor& boxes, return Status::OK(); } - cudaError_t cuda_ret = cub::DeviceRadixSort::SortPairsDescending( + cudaError_t cuda_ret = gpuprim::DeviceRadixSort::SortPairsDescending( nullptr, cub_sort_temp_storage_bytes, static_cast(nullptr), // scores static_cast(nullptr), // sorted scores @@ -458,7 +464,7 @@ Status DoNMS(OpKernelContext* context, const Tensor& boxes, config.virtual_thread_count, 0, d_indices.flat().data())); TF_RETURN_IF_CUDA_ERROR(cudaGetLastError()); - cuda_ret = cub::DeviceRadixSort::SortPairsDescending( + cuda_ret = gpuprim::DeviceRadixSort::SortPairsDescending( d_cub_sort_buffer.flat().data(), cub_sort_temp_storage_bytes, scores.flat().data(), d_sorted_scores.flat().data(), d_indices.flat().data(), d_sorted_indices.flat().data(), diff --git a/tensorflow/core/kernels/non_max_suppression_op.h b/tensorflow/core/kernels/non_max_suppression_op.h index eaa1b28ad4b..24957c2bbed 100644 --- a/tensorflow/core/kernels/non_max_suppression_op.h +++ b/tensorflow/core/kernels/non_max_suppression_op.h @@ -35,7 +35,7 @@ struct NonMaxSuppression { } // namespace functor -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM extern const int kNmsBoxesPerTread; // Given descending sorted box list, apply non-maximal-suppression with given diff --git a/tensorflow/core/kernels/non_max_suppression_op_gpu_test.cc b/tensorflow/core/kernels/non_max_suppression_op_gpu_test.cc index 8dcb9c77a41..57f812d410e 100644 --- a/tensorflow/core/kernels/non_max_suppression_op_gpu_test.cc +++ b/tensorflow/core/kernels/non_max_suppression_op_gpu_test.cc @@ -35,7 +35,7 @@ limitations under the License. namespace tensorflow { -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM // These tests are copied from non_max_suppression_op_test.cc file and modified // to use GPU ops. See other file for test details. diff --git a/tensorflow/core/kernels/ops_testutil.cc b/tensorflow/core/kernels/ops_testutil.cc index c6f751d196c..c535fe66601 100644 --- a/tensorflow/core/kernels/ops_testutil.cc +++ b/tensorflow/core/kernels/ops_testutil.cc @@ -122,7 +122,8 @@ void OpsTestBase::SetDevice(const DeviceType& device_type, } #else CHECK_NE(device_type, DEVICE_GPU) - << "Requesting GPU on binary compiled without GOOGLE_CUDA or TENSORFLOW_USE_ROCM."; + << "Requesting GPU on binary compiled without GOOGLE_CUDA or " + "TENSORFLOW_USE_ROCM."; allocator_ = device_->GetAllocator(AllocatorAttributes()); #endif } From 352526451c4d9860e833ec556a04a554d8db0c6a Mon Sep 17 00:00:00 2001 From: amturati Date: Fri, 26 Jun 2020 20:30:03 +0000 Subject: [PATCH 0066/2522] initial test for matmul with abstract tensors --- tensorflow/c/eager/c_api_test_util.cc | 14 ++++ tensorflow/c/eager/c_api_test_util.h | 3 + .../eager/c_api_unified_experimental_test.cc | 67 +++++++++++++++++++ 3 files changed, 84 insertions(+) diff --git a/tensorflow/c/eager/c_api_test_util.cc b/tensorflow/c/eager/c_api_test_util.cc index 4b5ad8f50f7..4002dcbc5c1 100644 --- a/tensorflow/c/eager/c_api_test_util.cc +++ b/tensorflow/c/eager/c_api_test_util.cc @@ -88,6 +88,20 @@ TFE_TensorHandle* TestMatrixTensorHandle(TFE_Context* ctx) { return th; } +TFE_TensorHandle* TestMatrixTensorHandleWithInput(TFE_Context* ctx, float data[]) { + int64_t dims[] = {2, 2}; + //float data[] = vals; + TF_Status* status = TF_NewStatus(); + TF_Tensor* t = TFE_AllocateHostTensor(ctx, TF_FLOAT, &dims[0], + sizeof(dims) / sizeof(int64_t), status); + memcpy(TF_TensorData(t), &data[0], TF_TensorByteSize(t)); + TFE_TensorHandle* th = TFE_NewTensorHandleFromTensor(ctx, t, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TF_DeleteTensor(t); + TF_DeleteStatus(status); + return th; +} + TFE_TensorHandle* TestMatrixTensorHandle100x100(TFE_Context* ctx) { constexpr int64_t dims[] = {100, 100}; constexpr int num_elements = dims[0] * dims[1]; diff --git a/tensorflow/c/eager/c_api_test_util.h b/tensorflow/c/eager/c_api_test_util.h index fcf62223f14..497264e7c01 100644 --- a/tensorflow/c/eager/c_api_test_util.h +++ b/tensorflow/c/eager/c_api_test_util.h @@ -34,6 +34,9 @@ TFE_TensorHandle* DoubleTestMatrixTensorHandle(TFE_Context* ctx); // Return a tensor handle containing a 2x2 matrix of floats TFE_TensorHandle* TestMatrixTensorHandle(TFE_Context* ctx); +//Return a tensor handle containing 2x2 matrix containing given data +TFE_TensorHandle* TestMatrixTensorHandleWithInput(TFE_Context* ctx, float data[]); + // Return a tensor handle containing a 100x100 matrix of floats TFE_TensorHandle* TestMatrixTensorHandle100x100(TFE_Context* ctx); diff --git a/tensorflow/c/eager/c_api_unified_experimental_test.cc b/tensorflow/c/eager/c_api_unified_experimental_test.cc index 24d170f2f99..5c431794747 100644 --- a/tensorflow/c/eager/c_api_unified_experimental_test.cc +++ b/tensorflow/c/eager/c_api_unified_experimental_test.cc @@ -86,6 +86,73 @@ TEST_P(UnifiedCAPI, TestBasicEager) { TF_DeleteExecutionContext(ctx); } + +//MatMul Test +TEST_P(UnifiedCAPI, TestBasicEagerMatMul) { + std::unique_ptr status( + TF_NewStatus(), TF_DeleteStatus); + TFE_ContextOptions* opts = TFE_NewContextOptions(); + TF_ExecutionContext* ctx = TF_NewEagerExecutionContext(opts, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + TFE_DeleteContextOptions(opts); + + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + /* Want to test simple MatMul example: + + [ [0,0] , * [ [0,0] , = [ [0,0], + [0,0] ] [0,0] ] [0,0]] + + */ + // Build an abstract input tensor. + float vals [] = {0.0f,0.0f,0.0f,0.0f}; + TFE_Context* eager_ctx = TF_ExecutionContextGetTFEContext(ctx); + TFE_TensorHandle* t = TestMatrixTensorHandleWithInput(eager_ctx, vals); + + TF_AbstractTensor* at = + TF_CreateAbstractTensorFromEagerTensor(t, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + // Build an abstract operation. + auto* op = TF_NewAbstractOp(ctx); + TF_AbstractOpSetOpType(op, "MatMul", status.get()); //correct syntax to specify matrix multiply for tensors? + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + // Build inputs and outputs. + TF_AbstractTensor* inputs[2] = {at, at}; + TF_OutputList* o = TF_NewOutputList(); + TF_OutputListSetNumOutputs(o, 1, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + // Execute. + TF_ExecuteOperation(op, 2, inputs, o, ctx, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + // Clean up operation and inputs. + TF_DeleteAbstractOp(op); + TF_DeleteAbstractTensor(at); + + // Verify the results. + ASSERT_EQ(1, TF_OutputListNumOutputs(o)); + TF_AbstractTensor* result = TF_OutputListGet(o, 0); + TFE_TensorHandle* result_t = + TF_AbstractTensorGetEagerTensor(result, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + TF_Tensor* result_tensor = TFE_TensorHandleResolve(result_t, status.get()); + TF_Tensor* t_tensor = TFE_TensorHandleResolve(t, status.get()); //Is this the best way to get the TF_Tensor from t? + float* result_value = static_cast(TF_TensorData(result_tensor)); + float* t_value = static_cast(TF_TensorData(t_tensor)); + EXPECT_EQ(*result_value, *t_value); + + TF_DeleteTensor(result_tensor); + TF_DeleteTensor(t_tensor); + TF_DeleteAbstractTensor(result); + TF_DeleteOutputList(o); + TF_DeleteExecutionContext(ctx); +} + + + TEST_P(UnifiedCAPI, TestBasicGraph) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); From f40cf089f0ecd0316c0150918b9bbfc930fba955 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Fri, 26 Jun 2020 22:05:12 +0000 Subject: [PATCH 0067/2522] completed allocate_temp --- tensorflow/c/kernels.cc | 24 +++++ tensorflow/c/kernels.h | 9 ++ tensorflow/c/kernels_test.cc | 166 +++++++++++++++++++++++++++-------- 3 files changed, 163 insertions(+), 36 deletions(-) diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc index a0ed0d9f245..80b5234b52d 100644 --- a/tensorflow/c/kernels.cc +++ b/tensorflow/c/kernels.cc @@ -26,6 +26,9 @@ limitations under the License. #include "tensorflow/core/framework/types.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/lib/gtl/array_slice.h" + // This file forms the basis of a stable ABI for third-party kernel // implementations. It is crucial that changes to this file are made cautiously // and with a focus on maintaining both source and binary compatibility. @@ -260,3 +263,24 @@ TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int index, } return result; } + +TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, + int64_t* dims, int num_dims, TF_Status* status){ + auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelContext*>(context); + TF_SetStatus(status, TF_OK, ""); + tensorflow::TensorShape shape; + for(int i = 0; i < num_dims; ++i){ + shape.AddDim(dims[i]); + } + tensorflow::Status s; + tensorflow::Tensor tensor_temp; + TF_Tensor* tf_tensor_temp; + s = cc_ctx->allocate_temp(static_cast(dtype), shape, &tensor_temp); + if (s.ok()){ + tf_tensor_temp = TF_TensorFromTensor(tensor_temp, &s); + } + if (s.ok()){ + ::tensorflow::Set_TF_Status_from_Status(status, s); + return tf_tensor_temp; + } +} diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h index 084717c1d9e..e450511da3a 100644 --- a/tensorflow/c/kernels.h +++ b/tensorflow/c/kernels.h @@ -190,6 +190,15 @@ TF_CAPI_EXPORT TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int64_t* dims, int num_dims, size_t len, TF_Status* status); +// Allocates a temporary Tensor of the specified type and shape. The +// Tensor must not be used after kernel construction is +// complete. + +// num_dims must equal the size of array dims +TF_CAPI_EXPORT extern TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, + int64_t* dims, int num_dims, TF_Status* status); + + #ifdef __cplusplus } /* end extern "C" */ #endif diff --git a/tensorflow/c/kernels_test.cc b/tensorflow/c/kernels_test.cc index 423302741de..738c1e12c80 100644 --- a/tensorflow/c/kernels_test.cc +++ b/tensorflow/c/kernels_test.cc @@ -360,6 +360,17 @@ class DeviceKernelOpTest : public OpsTestBase { #endif }; +// Helper function for tests that validates that the tensor has +// shape and type corresponding to dims and dtype. +void validate_tensor(TF_Tensor* tensor, int64_t* dims, int64_t num_dims, + TF_DataType dtype); + +// Helper function for tests that copies data of length +// tensor_size_bytes from values to tensor +template +void set_tensor_data(TF_Tensor* tensor, T* values, size_t tensor_size_bytes, + TF_OpKernelContext* ctx); + REGISTER_OP("AllocateOutputOp1").Output("output1: float"); TEST_F(DeviceKernelOpTest, TestAllocateOutputSizeOne) { @@ -371,22 +382,11 @@ TEST_F(DeviceKernelOpTest, TestAllocateOutputSizeOne) { TF_Tensor* output = TF_AllocateOutput( /*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/&dim, /*num_dims=*/1, /*len=*/tensor_size_bytes, s); - EXPECT_EQ(TF_OK, TF_GetCode(s)); - EXPECT_EQ(TF_FLOAT, TF_TensorType(output)); - EXPECT_EQ(1, TF_NumDims(output)); - EXPECT_EQ(1, TF_Dim(output, 0)); - + validate_tensor(output, &dim, 1, TF_FLOAT); + // Set output to 3 - float* data = reinterpret_cast(TF_TensorData(output)); - float value = 3.0f; -#if GOOGLE_CUDA - OpKernelContext* cc_ctx = reinterpret_cast(ctx); - cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, &value, - tensor_size_bytes); -#else - *data = value; -#endif - + float values[1] = {3.0f}; + set_tensor_data(output, values, tensor_size_bytes, ctx); TF_DeleteStatus(s); TF_DeleteTensor(output); }; @@ -409,12 +409,8 @@ TEST_F(DeviceKernelOpTest, TestAllocateEmptyOutput) { TF_Tensor* output = TF_AllocateOutput( /*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/&dim, /*num_dims=*/1, /*len=*/0, s); - EXPECT_EQ(TF_OK, TF_GetCode(s)); - EXPECT_EQ(TF_FLOAT, TF_TensorType(output)); - EXPECT_EQ(1, TF_NumDims(output)); - EXPECT_EQ(0, TF_Dim(output, 0)); - + validate_tensor(output, &dim, 1, TF_FLOAT); TF_DeleteStatus(s); TF_DeleteTensor(output); }; @@ -434,27 +430,16 @@ TEST_F(DeviceKernelOpTest, TestAllocateOutputSize2x3) { TF_Status* s = TF_NewStatus(); // Allocate 2x3 output int64_t dim[2] = {2, 3}; - size_t tensor_size_bytes = 6 * TF_DataTypeSize(TF_FLOAT); + size_t tensor_size_bytes = TF_DataTypeSize(TF_FLOAT) * 6; TF_Tensor* output = TF_AllocateOutput( /*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/dim, /*num_dims=*/2, /*len=*/tensor_size_bytes, s); EXPECT_EQ(TF_OK, TF_GetCode(s)); - EXPECT_EQ(TF_FLOAT, TF_TensorType(output)); - EXPECT_EQ(2, TF_NumDims(output)); - EXPECT_EQ(2, TF_Dim(output, 0)); - EXPECT_EQ(3, TF_Dim(output, 1)); + validate_tensor(output, dim, 2, TF_FLOAT); // Set output to [1 2 3 4 5 6] - void* data = TF_TensorData(output); - float value[6] = {1, 2, 3, 4, 5, 6}; -#if GOOGLE_CUDA - OpKernelContext* cc_ctx = reinterpret_cast(ctx); - cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, value, - tensor_size_bytes); -#else - memcpy(data, value, tensor_size_bytes); -#endif - + float values[6] = {1, 2, 3, 4, 5, 6}; + set_tensor_data(output, values, tensor_size_bytes, ctx); TF_DeleteStatus(s); TF_DeleteTensor(output); }; @@ -466,4 +451,113 @@ TEST_F(DeviceKernelOpTest, TestAllocateOutputSize2x3) { EXPECT_EQ("Tensor", output->DebugString(100)); } -} // namespace tensorflow + +REGISTER_OP("AllocateTempOp1").Output("output1: float"); + +TEST_F(DeviceKernelOpTest, TestAllocateTempSizeOne) { + auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) { + // Allocate output + TF_Status* s = TF_NewStatus(); + int64_t dim = 1; + TF_Tensor* output = TF_AllocateTemp( + /*context=*/ctx, /*dtype=*/TF_FLOAT, /*dims=*/&dim, + /*num_dims=*/1, s); + size_t tensor_size_bytes = TF_DataTypeSize(TF_FLOAT); + EXPECT_EQ(TF_OK, TF_GetCode(s)); + validate_tensor(output, &dim, 1, TF_FLOAT); + + // Set output to 3 + float values[1] = {3.0f}; + set_tensor_data(output, values, tensor_size_bytes, ctx); + TF_SetOutput(ctx, 0, output, s); + TF_DeleteStatus(s); + TF_DeleteTensor(output); + }; + + SetupOp("AllocateTempOp1", "AllocateTemp1", my_compute_func); + + TF_ASSERT_OK(RunOpKernel()); + Tensor* output = GetOutput(0); + EXPECT_EQ("Tensor", + output->DebugString(100)); +} + +REGISTER_OP("AllocateTempOp0").Output("output1: float"); + +TEST_F(DeviceKernelOpTest, TestAllocateTempEmpty) { + auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) { + TF_Status* s = TF_NewStatus(); + // Allocate empty output + int64_t dim = 0; + TF_Tensor* output = TF_AllocateTemp( + /*context=*/ctx, /*dtype=*/TF_FLOAT, /*dims=*/&dim, + /*num_dims=*/1, s); + EXPECT_EQ(TF_OK, TF_GetCode(s)); + validate_tensor(output, &dim, 1, TF_FLOAT); + TF_SetOutput(ctx, 0, output, s); + TF_DeleteStatus(s); + TF_DeleteTensor(output); + }; + + SetupOp("AllocateTempOp0", "AllocateTemp0", my_compute_func); + + TF_ASSERT_OK(RunOpKernel()); + Tensor* output = GetOutput(0); + EXPECT_EQ("Tensor", + output->DebugString(100)); +} + +REGISTER_OP("AllocateTempOp2x3").Output("output1: float"); + +TEST_F(DeviceKernelOpTest, TestAllocateTempSize2x3) { + auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) { + TF_Status* s = TF_NewStatus(); + size_t tensor_size_bytes = 6 * TF_DataTypeSize(TF_FLOAT); + // Allocate 2x3 output + int64_t dim[2] = {2, 3}; + TF_Tensor* output = TF_AllocateTemp( + /*context=*/ctx, /*dtype=*/TF_FLOAT, /*dims=*/dim, + /*num_dims=*/2, s); + EXPECT_EQ(TF_OK, TF_GetCode(s)); + validate_tensor(output, dim, 2, TF_FLOAT); + + // Set output to [1 2 3 4 5 6] + void* data = TF_TensorData(output); + float values[6] = {1, 2, 3, 4, 5, 6}; + set_tensor_data(output, values, tensor_size_bytes, ctx); + TF_SetOutput(ctx, 0, output, s); + TF_DeleteStatus(s); + TF_DeleteTensor(output); + }; + + SetupOp("AllocateTempOp2x3", "AllocateTempOp2x3", my_compute_func); + + TF_ASSERT_OK(RunOpKernel()); + Tensor* output = GetOutput(0); + EXPECT_EQ("Tensor", + output->DebugString(100)); +} + +void validate_tensor(TF_Tensor* tensor, int64_t* dims, int64_t num_dims, + TF_DataType dtype){ + EXPECT_EQ(TF_FLOAT, TF_TensorType(tensor)); + EXPECT_EQ(num_dims, TF_NumDims(tensor)); + for(int i = 0; i < num_dims; ++i){ + EXPECT_EQ(dims[i], TF_Dim(tensor, i)); + } +} + +template +void set_tensor_data(TF_Tensor* tensor, T* values, size_t tensor_size_bytes, + TF_OpKernelContext* ctx){ + T* data = reinterpret_cast(TF_TensorData(tensor)); +#if GOOGLE_CUDA + OpKernelContext* cc_ctx = reinterpret_cast(ctx); + cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, values, + tensor_size_bytes); +#else + memcpy(data, values, tensor_size_bytes); +#endif +} + +} // namespace tensorflow \ No newline at end of file From 8b07609b2ee977a8a97120dbaad4c3fabc151b0f Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Fri, 26 Jun 2020 22:33:19 +0000 Subject: [PATCH 0068/2522] added priority function to TF_KernelDefBuilder --- tensorflow/c/kernels.cc | 5 +++++ tensorflow/c/kernels.h | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc index 80b5234b52d..905219c6e16 100644 --- a/tensorflow/c/kernels.cc +++ b/tensorflow/c/kernels.cc @@ -100,6 +100,11 @@ void TF_KernelBuilder_HostMemory(TF_KernelBuilder* kernel_builder, kernel_builder->cc_builder->HostMemory(arg_name); } +void TF_KernelBuilder_Priority(TF_KernelBuilder* kernel_builder, + int32_t priority_number){ + kernel_builder->cc_builder->Priority(priority_number); +} + namespace tensorflow { namespace { diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h index e450511da3a..b245dd8a7fc 100644 --- a/tensorflow/c/kernels.h +++ b/tensorflow/c/kernels.h @@ -107,6 +107,10 @@ TF_CAPI_EXPORT extern void TF_KernelBuilder_TypeConstraint( TF_CAPI_EXPORT extern void TF_KernelBuilder_HostMemory( TF_KernelBuilder* kernel_builder, const char* arg_name); +// Specify a priority number for this kernel. +TF_CAPI_EXPORT extern void TF_KernelBuilder_Priority( + TF_KernelBuilder* kernel_builder, int32_t priority_number); + // Register the given kernel builder with the TensorFlow runtime. If // registration fails, the given status will be populated. // From db2b7a6677f87bdfa37557b1872dd7ef2b6315f3 Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Sat, 27 Jun 2020 06:58:26 +0800 Subject: [PATCH 0069/2522] fix lint error --- .../optimization/reorder_data_discarding_ops_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/reorder_data_discarding_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/reorder_data_discarding_ops_test.py index 260d6a37495..10f28dc4722 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimization/reorder_data_discarding_ops_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/reorder_data_discarding_ops_test.py @@ -26,7 +26,8 @@ from tensorflow.python.framework import combinations from tensorflow.python.platform import test -class ReorderDataDiscardingOpsTest(test_base.DatasetTestBase, parameterized.TestCase): +class ReorderDataDiscardingOpsTest(test_base.DatasetTestBase, + parameterized.TestCase): @combinations.generate(combinations.combine(tf_api_version=2, mode=["eager", "graph"])) From 57d698854a16b6b7db011069f04e6b1c27b069cb Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Sun, 28 Jun 2020 03:48:59 +0200 Subject: [PATCH 0070/2522] Don't call batch hooks in tb and checkpoint callbacks if possible --- tensorflow/python/keras/callbacks.py | 7 +++++++ tensorflow/python/keras/callbacks_test.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py index 9e7575a232b..f19f03125d5 100644 --- a/tensorflow/python/keras/callbacks.py +++ b/tensorflow/python/keras/callbacks.py @@ -1235,6 +1235,10 @@ class ModelCheckpoint(Callback): raise ValueError('Error loading file from {}. Reason: {}'.format( filepath_to_load, e)) + def _implements_train_batch_hooks(self): + # Only call batch hooks when saving on batch + return self.save_freq != 'epoch' + def on_train_batch_end(self, batch, logs=None): if self._should_save_on_batch(batch): self._save_model(epoch=self._current_epoch, logs=logs) @@ -2155,6 +2159,9 @@ class TensorBoard(Callback, version_utils.TensorBoardVersionSelector): def on_test_end(self, logs=None): self._pop_writer() + def _implements_train_batch_hooks(self): + return self._should_trace # Only call batch hooks when tracing is enabled + def on_train_batch_begin(self, batch, logs=None): self._global_train_batch += 1 if not self._should_trace: diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py index d180e85a1d9..f3d19d949d0 100644 --- a/tensorflow/python/keras/callbacks_test.py +++ b/tensorflow/python/keras/callbacks_test.py @@ -2203,7 +2203,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): model.compile('sgd', 'mse', run_eagerly=False) self.fitModelAndAssertKerasModelWritten(model) - def test_TensoriBoard_writeModel(self): + def test_TensorBoard_writeModel(self): inputs = keras.layers.Input([10, 10, 1]) x = keras.layers.Conv2D(8, (3, 3), activation='relu')(inputs) x = keras.layers.Flatten()(x) From 5d08cdbd3e00534ec734e7945f64dc8665af9b08 Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Sun, 28 Jun 2020 04:42:30 +0000 Subject: [PATCH 0071/2522] Create boolean flag to enable using type annotations to improve tracing --- tensorflow/python/eager/def_function.py | 22 +++++++++++++++++----- tensorflow/python/eager/function.py | 24 ++++++++++++++++++++---- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py index d38870f2fe2..6cadb5a021c 100644 --- a/tensorflow/python/eager/def_function.py +++ b/tensorflow/python/eager/def_function.py @@ -456,7 +456,8 @@ class Function(object): experimental_implements=None, experimental_autograph_options=None, experimental_relax_shapes=False, - experimental_compile=None): + experimental_compile=None, + experimental_type_tracing=False): """Initializes a `Function`. Args: @@ -512,6 +513,9 @@ class Function(object): executor). Set this value to `False` when directly running a multi-device function on TPUs (e.g. two TPU cores, one TPU core and its host CPU). + experimental_type_tracing: When true, arguments type annotated with + tf.TensorLike will be treated as if they were a tensor. + Raises: ValueError: if `input_signature` is not None and the `python_function`'s argspec has keyword arguments. @@ -519,7 +523,8 @@ class Function(object): self._lock = threading.Lock() self._python_function = python_function self._function_spec = function_lib.FunctionSpec.from_function_and_signature( - python_function, input_signature) + python_function, input_signature, + experimental_type_tracing=experimental_type_tracing) self._implements = experimental_implements # If `True`, the function uses the rendezvous of the parent. This is only # needed to support code where raw send/recv operations are inserted and @@ -529,6 +534,7 @@ class Function(object): self._experimental_autograph_options = experimental_autograph_options self._experimental_relax_shapes = experimental_relax_shapes self._experimental_compile = experimental_compile + self._experimental_type_tracing = experimental_type_tracing self._created_variables = None # GUARDED_BY(self._lock) self._stateful_fn = None # GUARDED_BY(self._lock) self._stateless_fn = None # GUARDED_BY(self._lock) @@ -658,6 +664,7 @@ class Function(object): autograph=self._autograph, experimental_autograph_options=self._experimental_autograph_options, experimental_compile=self._experimental_compile, + experimental_type_tracing=self._experimental_type_tracing, experimental_relax_shapes=self._experimental_relax_shapes) def _initialize(self, args, kwds, add_initializers_to=None): @@ -716,7 +723,8 @@ class Function(object): experimental_implements=self._implements, experimental_autograph_options=self._experimental_autograph_options, experimental_relax_shapes=self._experimental_relax_shapes, - experimental_compile=self._experimental_compile) + experimental_compile=self._experimental_compile, + experimental_type_tracing=self._experimental_type_tracing) if self._shared_rendezvous: f._shared_rendezvous = self._shared_rendezvous # pylint: disable=protected-access @@ -1203,7 +1211,8 @@ def function(func=None, experimental_implements=None, experimental_autograph_options=None, experimental_relax_shapes=False, - experimental_compile=None): + experimental_compile=None, + experimental_type_tracing=False): """Compiles a function into a callable TensorFlow graph. `tf.function` constructs a callable that executes a TensorFlow graph @@ -1406,6 +1415,8 @@ def function(func=None, experimental_compile: If True, the function is always compiled by [XLA](https://www.tensorflow.org/xla). XLA may be more efficient in some cases (e.g. TPU, XLA_GPU, dense tensor computations). + experimental_type_tracing: When true, arguments type annotated with + tf.TensorLike will be treated as if they were a tensor. Returns: If `func` is not None, returns a callable that will execute the compiled @@ -1436,7 +1447,8 @@ def function(func=None, experimental_autograph_options=experimental_autograph_options, experimental_relax_shapes=experimental_relax_shapes, experimental_compile=experimental_compile, - experimental_implements=experimental_implements)) + experimental_implements=experimental_implements, + experimental_type_tracing=experimental_type_tracing)) # This code path is for the `foo = tf.function(foo, ...)` use case if func is not None: diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index dc75ca13645..750c000afbd 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -2313,7 +2313,8 @@ class FunctionSpec(object): @staticmethod def from_function_and_signature(python_function, input_signature, - is_pure=False): + is_pure=False, + experimental_type_tracing=False): """Create a FunctionSpec instance given a python function and signature. Args: @@ -2398,13 +2399,16 @@ class FunctionSpec(object): name = getattr(python_function, "__name__", "f") return FunctionSpec( - fullargspec, is_method, input_signature, is_pure=is_pure, name=name) + fullargspec, is_method, input_signature, + is_pure=is_pure, experimental_type_tracing=experimental_type_tracing, + name=name) def __init__(self, fullargspec, is_method, input_signature, is_pure=False, + experimental_type_tracing=False, name=None): """Constructs a FunctionSpec describing a python function. @@ -2419,6 +2423,7 @@ class FunctionSpec(object): self._fullargspec = fullargspec self._is_method = is_method self._is_pure = is_pure + self._experimental_type_tracing = experimental_type_tracing # TODO(edloper): Include name when serializing for SavedModel? self._name = name or "f" @@ -2487,6 +2492,10 @@ class FunctionSpec(object): def is_pure(self): return self._is_pure + @property + def experimental_type_tracing(self): + return self._experimental_type_tracing + @property def arg_names(self): return self._arg_names @@ -2788,7 +2797,8 @@ class Function(object): autograph_options=None, experimental_relax_shapes=False, capture_by_value=None, - experimental_compile=None): + experimental_compile=None, + experimental_type_tracing=False): """Initializes a `Function`. Args: @@ -2812,6 +2822,8 @@ class Function(object): default to False. experimental_compile: Force-compile the function with XLA, cf. def_function.Function doc on experimental_compile. + experimental_type_tracing: When true, arguments type annotated with + tf.TensorLike will be treated as if they were a tensor. Raises: ValueError: if `input_signature` is not None and the `python_function`'s @@ -2820,7 +2832,8 @@ class Function(object): self._python_function = python_function pure_function = attributes and IMPLEMENTS_ATTRIBUTE_NAME in attributes self._function_spec = FunctionSpec.from_function_and_signature( - python_function, input_signature, is_pure=pure_function) + python_function, input_signature, is_pure=pure_function, + experimental_type_tracing=experimental_type_tracing) self._name = name self._autograph = autograph self._autograph_options = autograph_options @@ -2836,6 +2849,7 @@ class Function(object): # functions for each instance. self._descriptor_cache = weakref.WeakKeyDictionary() self._experimental_compile = experimental_compile + self._experimental_type_tracing = experimental_type_tracing def __call__(self, *args, **kwargs): """Calls a graph function specialized to the inputs.""" @@ -3612,6 +3626,7 @@ def defun_with_attributes(func=None, autograph=True, experimental_autograph_options=None, experimental_compile=None, + experimental_type_tracing=False, experimental_relax_shapes=False): """Compiles a Python function into a callable TensorFlow graph. @@ -3661,6 +3676,7 @@ def defun_with_attributes(func=None, autograph=autograph, autograph_options=experimental_autograph_options, experimental_compile=experimental_compile, + experimental_type_tracing=experimental_type_tracing, experimental_relax_shapes=experimental_relax_shapes)) # This code path is for the `foo = tfe.defun(foo, ...)` use case From 030fae94de1a0dbf8c410b840124ce33e74d7e1a Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Sun, 28 Jun 2020 04:46:17 +0000 Subject: [PATCH 0072/2522] Create TensorLike to be a Union of function input types --- tensorflow/python/types/core.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/python/types/core.py b/tensorflow/python/types/core.py index 20da83e562d..f2c5d965a2d 100644 --- a/tensorflow/python/types/core.py +++ b/tensorflow/python/types/core.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from typing import Union +from tensorflow.python.util.tf_export import tf_export # TODO(mdan): Consider adding ABC once the dependence on isinstance is reduced. # TODO(mdan): Add type annotations. @@ -58,3 +60,6 @@ class Value(Tensor): def numpy(self): pass + +# TODO(rahulkamat): add complete set of types to TensorLike and add tf.export +TensorLike = Union[Tensor, int, float, bool, str, tuple] From b26422c68be777c62febeeec38f2b4015c208ed2 Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Sun, 28 Jun 2020 05:07:13 +0000 Subject: [PATCH 0073/2522] Add method to convert arguments annotated with TensorLike to tensors --- tensorflow/python/eager/function.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 750c000afbd..cb3cc6d76d0 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -74,6 +74,7 @@ from tensorflow.python.util import nest from tensorflow.python.util import object_identity from tensorflow.python.util import tf_decorator from tensorflow.python.util import tf_inspect +from tensorflow.python.types import core as core_tf_types # Loaded lazily due to a circular dependency (roughly # tf.function->autograph->->dataset->tf.function). @@ -2534,6 +2535,31 @@ class FunctionSpec(object): kwargs = {kw: ops.convert_to_tensor(x) for kw, x in kwargs.items()} return tuple(args), kwargs + def _convert_typed_variables_to_tensors(self, args, kwargs): + if self.input_signature is not None: + return + + args = list(args) + for i, arg in enumerate(args): + if i < len(self._fullargspec.args): + arg_annotation = self._fullargspec.annotations.get( + self._fullargspec.args[i]) + if arg_annotation == core_tf_types.TensorLike: + args[i] = ops.convert_to_tensor(arg) + else: + varargs_annotation = self._fullargspec.annotations.get( + self._fullargspec.varargs) + if varargs_annotation == core_tf_types.TensorLike: + args[i] = ops.convert_to_tensor(arg) + + if self._fullargspec.varkw is not None: + varkw_annotation = self._fullargspec.annotations.get( + self._fullargspec.varkw) + if varkw_annotation == core_tf_types.TensorLike: + kwargs = {kw: ops.convert_to_tensor(x) for kw, x in kwargs.items()} + + return tuple(args), kwargs + def canonicalize_function_inputs(self, *args, **kwargs): """Canonicalizes `args` and `kwargs`. @@ -2566,6 +2592,8 @@ class FunctionSpec(object): """ if self._is_pure: args, kwargs = self._convert_variables_to_tensors(args, kwargs) + if self._experimental_type_tracing: + args, kwargs = self._convert_typed_variables_to_tensors(args, kwargs) if self._input_signature is not None: if len(args) > len(self._input_signature): raise TypeError("{} takes {} positional arguments (as specified by the " From bf79497b694a81269516a1cfbcc99260a5b4344f Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Sun, 28 Jun 2020 12:33:31 +0200 Subject: [PATCH 0074/2522] Make ProgBarLogger callback not use batch hooks --- tensorflow/python/keras/callbacks.py | 48 +++++++++++++++++------ tensorflow/python/keras/callbacks_test.py | 13 ++++++ 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py index f19f03125d5..8077861552e 100644 --- a/tensorflow/python/keras/callbacks.py +++ b/tensorflow/python/keras/callbacks.py @@ -424,8 +424,6 @@ class CallbackList(object): the values of the `Model`'s metrics are returned. Example: `{'loss': 0.2, 'accuracy': 0.7}`. """ - # TODO(b/150629188): Make ProgBarLogger callback not use batch hooks - # when verbose != 1 if self._should_call_train_batch_hooks: self._call_batch_hook(ModeKeys.TRAIN, 'begin', batch, logs=logs) @@ -929,6 +927,9 @@ class ProgbarLogger(Callback): self.verbose = 1 self.epochs = 1 + self._train_step, self._test_step, self._predict_step = None, None, None + self._call_batch_hooks = True + self._called_in_fit = False def set_params(self, params): @@ -941,6 +942,16 @@ class ProgbarLogger(Callback): else: self.target = None # Will be inferred at the end of the first epoch. + self._call_batch_hooks = self.verbose == 1 + if self.target is None: + try: + self._train_step = self.model._train_counter # pylint: disable=protected-access + self._test_step = self.model._test_counter # pylint: disable=protected-access + self._predict_step = self.model._predict_counter # pylint: disable=protected-access + except AttributeError: + self._call_batch_hooks = True + + def on_train_begin(self, logs=None): # When this logger is called inside `fit`, validation is silent. self._called_in_fit = True @@ -969,14 +980,14 @@ class ProgbarLogger(Callback): self._batch_update_progbar(batch, None) def on_epoch_end(self, epoch, logs=None): - self._finalize_progbar(logs) + self._finalize_progbar(logs, self._train_step) def on_test_end(self, logs=None): if not self._called_in_fit: - self._finalize_progbar(logs) + self._finalize_progbar(logs, self._test_step) def on_predict_end(self, logs=None): - self._finalize_progbar(logs) + self._finalize_progbar(logs, self._predict_step) def _reset_progbar(self): self.seen = 0 @@ -985,7 +996,7 @@ class ProgbarLogger(Callback): def _maybe_init_progbar(self): if self.stateful_metrics is None: if self.model: - self.stateful_metrics = (set(m.name for m in self.model.metrics)) + self.stateful_metrics = set(m.name for m in self.model.metrics) else: self.stateful_metrics = set() @@ -996,6 +1007,15 @@ class ProgbarLogger(Callback): stateful_metrics=self.stateful_metrics, unit_name='step' if self.use_steps else 'sample') + def _implements_train_batch_hooks(self): + return self._call_batch_hooks + + def _implements_test_batch_hooks(self): + return self._call_batch_hooks + + def _implements_predict_batch_hooks(self): + return self._call_batch_hooks + def _batch_update_progbar(self, batch, logs=None): """Updates the progbar.""" logs = logs or {} @@ -1016,14 +1036,16 @@ class ProgbarLogger(Callback): logs = tf_utils.to_numpy_or_python_type(logs) self.progbar.update(self.seen, list(logs.items()), finalize=False) - def _finalize_progbar(self, logs): - logs = logs or {} - self._maybe_init_progbar() + def _finalize_progbar(self, logs, counter): + logs = tf_utils.to_numpy_or_python_type(logs or {}) if self.target is None: - self.target = self.seen - self.progbar.target = self.seen - logs = tf_utils.to_numpy_or_python_type(logs) - self.progbar.update(self.seen, list(logs.items()), finalize=True) + if counter is not None: + counter = counter.numpy() + if not self.use_steps: + counter *= logs.get('size', 1) + self.target = counter or self.seen + self._maybe_init_progbar() + self.progbar.update(self.target, list(logs.items()), finalize=True) @keras_export('keras.callbacks.History') diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py index f3d19d949d0..60b8b69b642 100644 --- a/tensorflow/python/keras/callbacks_test.py +++ b/tensorflow/python/keras/callbacks_test.py @@ -1704,6 +1704,19 @@ class KerasCallbacksTest(keras_parameterized.TestCase): self.assertEqual(my_cb.test_batches, 0) self.assertEqual(my_cb.predict_batches, 0) + @keras_parameterized.run_all_keras_modes(always_skip_v1=True) + def test_default_callbacks_do_not_call_batch_hooks(self): + model = keras.Sequential([keras.layers.Dense(1)]) + log_dir = self.get_temp_dir() + cb_list = keras.callbacks.CallbackList([ + keras.callbacks.TensorBoard(log_dir, profile_batch=0), + keras.callbacks.ModelCheckpoint(log_dir), + ], add_progbar=True, model=model, verbose=2, epochs=3) + self.assertLen(cb_list.callbacks, 3) + self.assertFalse(cb_list._should_call_train_batch_hooks) + self.assertFalse(cb_list._should_call_test_batch_hooks) + self.assertFalse(cb_list._should_call_predict_batch_hooks) + # A summary that was emitted during a test. Fields: # logdir: str. The logdir of the FileWriter to which the summary was From b31f069d3abc8446a5d370c6fa65f9df0a7720b6 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Sun, 28 Jun 2020 13:31:19 +0200 Subject: [PATCH 0075/2522] Fix epoch timing --- tensorflow/python/keras/callbacks.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py index 8077861552e..a10a3641baa 100644 --- a/tensorflow/python/keras/callbacks.py +++ b/tensorflow/python/keras/callbacks.py @@ -959,12 +959,15 @@ class ProgbarLogger(Callback): def on_test_begin(self, logs=None): if not self._called_in_fit: self._reset_progbar() + self._maybe_init_progbar() def on_predict_begin(self, logs=None): self._reset_progbar() + self._maybe_init_progbar() def on_epoch_begin(self, epoch, logs=None): self._reset_progbar() + self._maybe_init_progbar() if self.verbose and self.epochs > 1: print('Epoch %d/%d' % (epoch + 1, self.epochs)) @@ -1044,7 +1047,7 @@ class ProgbarLogger(Callback): if not self.use_steps: counter *= logs.get('size', 1) self.target = counter or self.seen - self._maybe_init_progbar() + self.progbar.target = self.target self.progbar.update(self.target, list(logs.items()), finalize=True) From 70feb59205f75ed0642eb091bd63960f7358039e Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Sun, 28 Jun 2020 20:55:41 +0000 Subject: [PATCH 0076/2522] fixed indentation and comments for allocate_temp --- tensorflow/c/kernels.cc | 11 +++++++---- tensorflow/c/kernels.h | 10 +++++----- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc index 905219c6e16..02703d97bbe 100644 --- a/tensorflow/c/kernels.cc +++ b/tensorflow/c/kernels.cc @@ -281,11 +281,14 @@ TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, tensorflow::Tensor tensor_temp; TF_Tensor* tf_tensor_temp; s = cc_ctx->allocate_temp(static_cast(dtype), shape, &tensor_temp); - if (s.ok()){ - tf_tensor_temp = TF_TensorFromTensor(tensor_temp, &s); + if (!s.ok()){ + ::tensorflow::Set_TF_Status_from_Status(status, s); + return nullptr; } - if (s.ok()){ + tf_tensor_temp = TF_TensorFromTensor(tensor_temp, &s); + if (!s.ok()){ ::tensorflow::Set_TF_Status_from_Status(status, s); - return tf_tensor_temp; + return nullptr; } + return tf_tensor_temp; } diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h index b245dd8a7fc..8ed3488988d 100644 --- a/tensorflow/c/kernels.h +++ b/tensorflow/c/kernels.h @@ -194,13 +194,13 @@ TF_CAPI_EXPORT TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int64_t* dims, int num_dims, size_t len, TF_Status* status); -// Allocates a temporary Tensor of the specified type and shape. The -// Tensor must not be used after kernel construction is -// complete. +// Allocates a temporary Tensor of the specified type and shape. Devices +// such as GPUs that enqueue Ops for lazy execution may retain references +// to the temporary tensors after the Op's Compute method has run. // num_dims must equal the size of array dims -TF_CAPI_EXPORT extern TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, - int64_t* dims, int num_dims, TF_Status* status); +TF_CAPI_EXPORT extern TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, + TF_DataType dtype, int64_t* dims, int num_dims, TF_Status* status); #ifdef __cplusplus From b58635dca87103807acd14b67545122d4d112ee0 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Sun, 28 Jun 2020 20:55:41 +0000 Subject: [PATCH 0077/2522] took out array_slice.h --- tensorflow/c/kernels.cc | 13 +++++++------ tensorflow/c/kernels.h | 10 +++++----- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc index 905219c6e16..3a8170575ad 100644 --- a/tensorflow/c/kernels.cc +++ b/tensorflow/c/kernels.cc @@ -25,9 +25,7 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/platform/types.h" - #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/lib/gtl/array_slice.h" // This file forms the basis of a stable ABI for third-party kernel // implementations. It is crucial that changes to this file are made cautiously @@ -281,11 +279,14 @@ TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, tensorflow::Tensor tensor_temp; TF_Tensor* tf_tensor_temp; s = cc_ctx->allocate_temp(static_cast(dtype), shape, &tensor_temp); - if (s.ok()){ - tf_tensor_temp = TF_TensorFromTensor(tensor_temp, &s); + if (!s.ok()){ + ::tensorflow::Set_TF_Status_from_Status(status, s); + return nullptr; } - if (s.ok()){ + tf_tensor_temp = TF_TensorFromTensor(tensor_temp, &s); + if (!s.ok()){ ::tensorflow::Set_TF_Status_from_Status(status, s); - return tf_tensor_temp; + return nullptr; } + return tf_tensor_temp; } diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h index b245dd8a7fc..8ed3488988d 100644 --- a/tensorflow/c/kernels.h +++ b/tensorflow/c/kernels.h @@ -194,13 +194,13 @@ TF_CAPI_EXPORT TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int64_t* dims, int num_dims, size_t len, TF_Status* status); -// Allocates a temporary Tensor of the specified type and shape. The -// Tensor must not be used after kernel construction is -// complete. +// Allocates a temporary Tensor of the specified type and shape. Devices +// such as GPUs that enqueue Ops for lazy execution may retain references +// to the temporary tensors after the Op's Compute method has run. // num_dims must equal the size of array dims -TF_CAPI_EXPORT extern TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, - int64_t* dims, int num_dims, TF_Status* status); +TF_CAPI_EXPORT extern TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, + TF_DataType dtype, int64_t* dims, int num_dims, TF_Status* status); #ifdef __cplusplus From 34955fa78c93a0525f8cfc275c5d2aa71b8745a5 Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Sun, 28 Jun 2020 21:46:36 +0000 Subject: [PATCH 0078/2522] Add tests to verify tracing with annotations --- tensorflow/python/eager/function_test.py | 78 ++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index b70b1bc5c1f..3fda7630d73 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -80,6 +80,7 @@ from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.ops.structured import structured_tensor from tensorflow.python.platform import test from tensorflow.python.training import training_ops +from tensorflow.python.types import core as core_tf_types from tensorflow.python.util import compat from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect @@ -3931,6 +3932,83 @@ class FunctionTest(test.TestCase, parameterized.TestCase): gradients(constant_op.constant([[[1.0], [2.0]]])) # No error is raised + def testTraceWithAnnotationsBasic(self): + trace_count = [0] + def func(x: core_tf_types.TensorLike): + trace_count[0] += 1 + return x + + enabled = def_function.function(func, experimental_type_tracing=True) + disabled = def_function.function(func, experimental_type_tracing=False) + + enabled(1) # Initial call gets traced + enabled(2) + enabled(3) + self.assertEqual(trace_count[0], 1) + + trace_count = [0] + disabled(1) + disabled(2) # Retrace + disabled(3) # Retrace + self.assertEqual(trace_count[0], 3) + + def testTraceWithAnnotationsWithArgs(self): + trace_count = [0] + def func(*args: core_tf_types.TensorLike): + trace_count[0] += 1 + return args + + enabled = def_function.function(func, experimental_type_tracing=True) + disabled = def_function.function(func, experimental_type_tracing=False) + + args = ("abc", "def",) * 20 + args2 = ("def", "abc",) * 20 + + enabled(args) + enabled(args2) + self.assertEqual(trace_count[0], 1) + + trace_count = [0] + disabled(args) + disabled(args2) # Retrace + self.assertEqual(trace_count[0], 2) + + def testTraceWithAnnotationsWithKwargs(self): + trace_count = [0] + def func(t: core_tf_types.TensorLike, **kwargs: core_tf_types.TensorLike): + trace_count[0] += 1 + return t + + enabled = def_function.function(func, experimental_type_tracing=True) + disabled = def_function.function(func, experimental_type_tracing=False) + + enabled(1, x=1, y=1.0, z="one") + enabled(2, x=2, y=2.0, z="two") + self.assertEqual(trace_count[0], 1) + + trace_count = [0] + disabled(1, x=1, y=1.0, z="one") + disabled(2, x=2, y=2.0, z="two") # Retrace + self.assertEqual(trace_count[0], 2) + + def testTraceWithAnnotationsWithMultipleInputTypes(self): + trace_count = [0] + def func(t: core_tf_types.TensorLike, *args: core_tf_types.TensorLike, + **kwargs: core_tf_types.TensorLike): + trace_count[0] += 1 + return t + + enabled = def_function.function(func, experimental_type_tracing=True) + disabled = def_function.function(func, experimental_type_tracing=False) + + enabled(1, constant_op.constant(1), "str", x=4.0) + enabled(2, constant_op.constant(2), "str2", x=5.0) + self.assertEqual(trace_count[0], 1) + + trace_count = [0] + disabled(1, constant_op.constant(1), "str", x=4.0) + disabled(2, constant_op.constant(2), "str2", x=5.0) # Retrace + self.assertEqual(trace_count[0], 2) class MultiDeviceTest(test.TestCase, parameterized.TestCase): From 44573a2d3a22ad4a2ae6353876a1f1e60e7aceaf Mon Sep 17 00:00:00 2001 From: 8bitmp3 <19637339+8bitmp3@users.noreply.github.com> Date: Mon, 29 Jun 2020 00:28:47 +0100 Subject: [PATCH 0079/2522] Update docs in control_flow_ops.py for consistent Markdown rendering --- .../python/ops/parallel_for/control_flow_ops.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/ops/parallel_for/control_flow_ops.py b/tensorflow/python/ops/parallel_for/control_flow_ops.py index a7649778161..d145a7beda5 100644 --- a/tensorflow/python/ops/parallel_for/control_flow_ops.py +++ b/tensorflow/python/ops/parallel_for/control_flow_ops.py @@ -51,8 +51,8 @@ def for_loop(loop_fn, loop_fn_dtypes, iters, parallel_iterations=None): loop_fn: A function that takes an int32 scalar tf.Tensor object representing the iteration number, and returns a possibly nested structure of tensor objects. The shape of these outputs should not depend on the input. - loop_fn_dtypes: dtypes for the outputs of loop_fn. - iters: Number of iterations for which to run loop_fn. + loop_fn_dtypes: dtypes for the outputs of `loop_fn`. + iters: Number of iterations for which to run `loop_fn`. parallel_iterations: The number of iterations that can be dispatched in parallel. This knob can be used to control the total memory usage. @@ -137,7 +137,7 @@ def pfor(loop_fn, iters, fallback_to_while_loop=True, parallel_iterations=None): `pfor` has functionality similar to `for_loop`, i.e. running `loop_fn` `iters` times, with input from 0 to `iters - 1`, and stacking corresponding output of - each iteration. However the implementation does not use a tf.while_loop. + each iteration. However the implementation does not use a `tf.while_loop`. Instead it adds new operations to the graph that collectively compute the same value as what running `loop_fn` in a loop would compute. @@ -152,7 +152,7 @@ def pfor(loop_fn, iters, fallback_to_while_loop=True, parallel_iterations=None): reads, etc). - Conversion works only on a limited set of kernels for which a converter has been registered. - - loop_fn has limited support for control flow operations. tf.cond in + - `loop_fn` has limited support for control flow operations. `tf.cond` in particular is not supported. - `loop_fn` should return nested structure of Tensors or Operations. However if an Operation is returned, it should have zero outputs. @@ -166,9 +166,9 @@ def pfor(loop_fn, iters, fallback_to_while_loop=True, parallel_iterations=None): or Operation objects. Note that if setting `parallel_iterations` argument to something other than None, `loop_fn` may be called more than once during graph construction. So it may need to avoid mutating global state. - iters: Number of iterations for which to run loop_fn. + iters: Number of iterations for which to run `loop_fn`. fallback_to_while_loop: If true, on failing to vectorize an operation, pfor - fallbacks to using a tf.while_loop to dispatch the iterations. + fallbacks to using a `tf.while_loop` to dispatch the iterations. parallel_iterations: A knob to control how many iterations are vectorized and dispatched in parallel. The default value of None corresponds to vectorizing all the iterations. If `parallel_iterations` is smaller than @@ -337,7 +337,7 @@ def vectorized_map(fn, elems, fallback_to_while_loop=True): """Parallel map on the list of tensors unpacked from `elems` on dimension 0. - This method works similar to tf.map_fn but is optimized to run much faster, + This method works similar to `tf.map_fn` but is optimized to run much faster, possibly with a much larger memory footprint. The speedups are obtained by vectorization (see https://arxiv.org/pdf/1903.04243.pdf). The idea behind vectorization is to semantically launch all the invocations of `fn` in From 133fa6a1277c4c08596ad19ffac6d863afc421f5 Mon Sep 17 00:00:00 2001 From: 8bitmp3 <19637339+8bitmp3@users.noreply.github.com> Date: Mon, 29 Jun 2020 00:43:43 +0100 Subject: [PATCH 0080/2522] Add the Auto-Vectorizing TF Graphs research paper title to tf.vectorized_map --- tensorflow/python/ops/parallel_for/control_flow_ops.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/ops/parallel_for/control_flow_ops.py b/tensorflow/python/ops/parallel_for/control_flow_ops.py index d145a7beda5..9bdf81a16b1 100644 --- a/tensorflow/python/ops/parallel_for/control_flow_ops.py +++ b/tensorflow/python/ops/parallel_for/control_flow_ops.py @@ -339,7 +339,8 @@ def vectorized_map(fn, elems, fallback_to_while_loop=True): This method works similar to `tf.map_fn` but is optimized to run much faster, possibly with a much larger memory footprint. The speedups are obtained by - vectorization (see https://arxiv.org/pdf/1903.04243.pdf). The idea behind + vectorization (see [Auto-Vectorizing TensorFlow Graphs: Jacobians, Auto-Batching + and Beyond](https://arxiv.org/pdf/1903.04243.pdf)). The idea behind vectorization is to semantically launch all the invocations of `fn` in parallel and fuse corresponding operations across all these invocations. This fusion is done statically at graph generation time and the generated code is From 6162dbe55e8d354623818f6ffe780dacd1c152bd Mon Sep 17 00:00:00 2001 From: Tare Gaskin Date: Mon, 29 Jun 2020 00:13:20 +0000 Subject: [PATCH 0081/2522] [-Wsign-compare] warning fixes batch 6 --- tensorflow/compiler/tf2xla/ops/xla_ops.cc | 3 +- tensorflow/core/framework/shape_inference.cc | 34 +++++++++++-------- .../grappler/costs/op_level_cost_estimator.cc | 11 +++--- .../optimizers/common_subgraph_elimination.cc | 2 +- .../grappler/optimizers/debug_stripper.cc | 2 +- .../core/grappler/optimizers/model_pruner.cc | 3 +- tensorflow/core/grappler/utils.cc | 2 +- .../core/grappler/utils/topological_sort.cc | 6 ++-- .../kernels/initializable_lookup_table.cc | 2 +- tensorflow/core/kernels/lookup_util.cc | 2 +- .../core/profiler/utils/derived_timeline.cc | 2 +- .../core/profiler/utils/xplane_utils.cc | 2 +- tensorflow/core/util/bcast.h | 6 ++-- tensorflow/python/framework/python_op_gen.cc | 12 +++---- .../framework/python_op_gen_internal.cc | 4 +-- 15 files changed, 53 insertions(+), 40 deletions(-) diff --git a/tensorflow/compiler/tf2xla/ops/xla_ops.cc b/tensorflow/compiler/tf2xla/ops/xla_ops.cc index 862da1f3f95..f4b9e9654d2 100644 --- a/tensorflow/compiler/tf2xla/ops/xla_ops.cc +++ b/tensorflow/compiler/tf2xla/ops/xla_ops.cc @@ -441,7 +441,8 @@ REGISTER_OP("XlaReduce") auto dim_in_range = [rank](int64 dim) { return dim >= 0 && dim < rank; }; - if (rank < dimensions_to_reduce.size() || + const int dimensions_to_reduce_size = dimensions_to_reduce.size(); + if (rank < dimensions_to_reduce_size || dims_set.size() != dimensions_to_reduce.size() || !absl::c_all_of(dimensions_to_reduce, dim_in_range)) { return errors::InvalidArgument( diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc index 92e98b3fed4..0861188ba4e 100644 --- a/tensorflow/core/framework/shape_inference.cc +++ b/tensorflow/core/framework/shape_inference.cc @@ -62,14 +62,14 @@ InferenceContext::InferenceContext( } std::vector>> handle_data( input_shapes.size()); - for (int i = 0; i < input_handle_shapes_and_types.size(); ++i) { + for (int i = 0, iter_limit = input_handle_shapes_and_types.size(); i < iter_limit; ++i) { const auto& v = input_handle_shapes_and_types[i]; if (v == nullptr) { continue; } handle_data[i].reset(new std::vector(v->size())); auto& new_v = *handle_data[i]; - for (int j = 0; j < v->size(); ++j) { + for (int j = 0, iter_limit = v->size(); j < iter_limit; ++j) { const auto& p = (*v)[j]; construction_status_.Update( MakeShapeFromPartialTensorShape(p.first, &new_v[j].shape)); @@ -123,7 +123,8 @@ Status InferenceContext::set_output(StringPiece output_name, } else { const int start = result->second.first; const int size = result->second.second - start; - if (size != shapes.size()) { + const int shapes_size = shapes.size(); + if (size != shapes_size) { return errors::InvalidArgument("Must have exactly ", shapes.size(), " shapes."); } @@ -181,7 +182,8 @@ void InferenceContext::PreInputInit( } Status InferenceContext::ExpandOutputs(int new_output_size) { - if (new_output_size < outputs_.size()) { + int outputs_size_ = outputs_.size(); + if (new_output_size < outputs_size_) { return errors::InvalidArgument("Trying to reduce number of outputs of op."); } outputs_.resize(new_output_size, nullptr); @@ -209,8 +211,8 @@ void InferenceContext::PostInputInit( } input_handle_shapes_and_types_ = std::move(input_handle_data); } - - if (inputs_.size() != num_inputs_from_node_def) { + int inputs_size_ = inputs_.size(); + if (inputs_size_ != num_inputs_from_node_def) { construction_status_ = errors::InvalidArgument( "Wrong number of inputs passed: ", inputs_.size(), " while ", num_inputs_from_node_def, " expected based on NodeDef"); @@ -718,7 +720,8 @@ Status InferenceContext::MakeShapeFromShapeTensorTreatScalarAsUnknownShape( TF_RETURN_IF_ERROR(WithRankAtMost(input(input_idx), 1, &input_shape)); requested_input_tensor_as_partial_shape_[input_idx] = true; - if (input_idx < input_tensors_as_shapes_.size() && + int input_tensors_as_shapes_size_ = input_tensors_as_shapes_.size(); + if (input_idx < input_tensors_as_shapes_size_ && input_tensors_as_shapes_[input_idx].IsSet() && RankKnown(input_tensors_as_shapes_[input_idx])) { *out = input_tensors_as_shapes_[input_idx]; @@ -736,7 +739,8 @@ Status InferenceContext::MakeShapeFromShapeTensor(int input_idx, TF_RETURN_IF_ERROR(WithRank(input(input_idx), 1, &input_shape)); requested_input_tensor_as_partial_shape_[input_idx] = true; - if (input_idx < input_tensors_as_shapes_.size() && + int input_tensors_as_shapes_size_ = input_tensors_as_shapes_.size(); + if (input_idx < input_tensors_as_shapes_size_ && input_tensors_as_shapes_[input_idx].IsSet() && RankKnown(input_tensors_as_shapes_[input_idx])) { *out = input_tensors_as_shapes_[input_idx]; @@ -1099,14 +1103,16 @@ Status InferenceContext::AttachContext(const Status& status) { std::vector input_from_tensors_str; std::vector input_from_tensors_as_shape_str; input_from_tensors_as_shape_str.reserve(inputs_.size()); - for (int i = 0; i < inputs_.size(); ++i) { + for (int i = 0, iter_limit = inputs_.size(); i < iter_limit; ++i) { + int input_tensors_size_ = input_tensors_.size(); + int input_tensors_as_shapes_size_ = input_tensors_as_shapes_.size(); if (requested_input_tensor_as_partial_shape_[i] && - i < input_tensors_as_shapes_.size() && + i < input_tensors_as_shapes_size_ && input_tensors_as_shapes_[i].IsSet() && RankKnown(input_tensors_as_shapes_[i])) { input_from_tensors_as_shape_str.push_back(strings::StrCat( "input[", i, "] = ", DebugString(input_tensors_as_shapes_[i]))); - } else if (requested_input_tensor_[i] && i < input_tensors_.size() && + } else if (requested_input_tensor_[i] && i < input_tensors_size_ && input_tensors_[i] != nullptr) { input_from_tensors_str.push_back(strings::StrCat( "input[", i, "] = <", @@ -1140,7 +1146,7 @@ bool InferenceContext::MergeHandleShapesAndTypes( } std::vector new_values(shapes_and_types.size()); bool refined = false; - for (int i = 0; i < shapes_and_types.size(); ++i) { + for (int i = 0, iter_limit = shapes_and_types.size(); i < iter_limit; ++i) { const ShapeAndType& existing = (*to_update)[i]; if (shapes_and_types[i].dtype == existing.dtype) { new_values[i].dtype = existing.dtype; @@ -1164,7 +1170,7 @@ bool InferenceContext::MergeHandleShapesAndTypes( if (!refined) { return false; } - for (int i = 0; i < new_values.size(); ++i) { + for (int i = 0, iter_limit = new_values.size(); i < iter_limit; ++i) { (*to_update)[i] = new_values[i]; } return true; @@ -1199,7 +1205,7 @@ bool InferenceContext::RelaxHandleShapesAndMergeTypes( return false; } std::vector new_values(shapes_and_types.size()); - for (int i = 0; i < shapes_and_types.size(); ++i) { + for (int i = 0, iter_limit = shapes_and_types.size(); i < iter_limit; ++i) { const ShapeAndType& existing = (*to_update)[i]; if (shapes_and_types[i].dtype == existing.dtype) { new_values[i].dtype = existing.dtype; diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index b8b62cbd6e5..a1f29e1c63c 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -1469,9 +1469,10 @@ Costs OpLevelCostEstimator::PredictEinsum(const OpContext& op_context) const { found_unknown_shapes = a_input_shape_unknown || b_input_shape_unknown || (a_input.shape().dim_size() < matrix_rank) || (b_input.shape().dim_size() < matrix_rank); - - if (a_input_str.size() != a_input_shape.dim_size() || - b_input_str.size() != b_input_shape.dim_size()) { + int a_input_str_size = a_input_str.size(); + int b_input_str_size = b_input_str.size(); + if (a_input_str_size != a_input_shape.dim_size() || + b_input_str_size != b_input_shape.dim_size()) { VLOG(1) << "Missing accurate estimator for op: " << op_info.op() << ", equation subscripts don't match tensor rank."; return PredictCostOfAnUnknownOp(op_context); @@ -1513,7 +1514,7 @@ Costs OpLevelCostEstimator::PredictEinsum(const OpContext& op_context) const { n_dim.set_size(1); k_dim.set_size(1); - for (int i_idx = 0; i_idx < a_input_str.size(); ++i_idx) { + for (int i_idx = 0, iter_limit = a_input_str.size(); i_idx < iter_limit; ++i_idx) { if (b_input_str.find(a_input_str[i_idx]) == std::string::npos) { if (rhs_str.find(a_input_str[i_idx]) == std::string::npos) { VLOG(1) << "Missing accurate estimator for op: " << op_info.op(); @@ -1533,7 +1534,7 @@ Costs OpLevelCostEstimator::PredictEinsum(const OpContext& op_context) const { *(a_matrix_shape->add_dim()) = a_input_shape.dim(i_idx); *(b_matrix_shape->add_dim()) = a_input_shape.dim(i_idx); } - for (int i_idx = 0; i_idx < b_input_str.size(); ++i_idx) { + for (int i_idx = 0, iter_limit = b_input_str.size(); i_idx < iter_limit; ++i_idx) { if (a_input_str.find(b_input_str[i_idx]) == std::string::npos) { if (rhs_str.find(b_input_str[i_idx]) == std::string::npos) { VLOG(1) << "Missing accurate estimator for op: " << op_info.op(); diff --git a/tensorflow/core/grappler/optimizers/common_subgraph_elimination.cc b/tensorflow/core/grappler/optimizers/common_subgraph_elimination.cc index af323e913a7..ad0cbce0b1d 100644 --- a/tensorflow/core/grappler/optimizers/common_subgraph_elimination.cc +++ b/tensorflow/core/grappler/optimizers/common_subgraph_elimination.cc @@ -73,7 +73,7 @@ class UniqueNodes { if (it == memoized_signatures_.end()) return; std::vector& candidates = rep_[it->second]; - for (int i = 0; i < candidates.size(); ++i) { + for (int i = 0, iter_limit = candidates.size(); i < iter_limit; ++i) { if (candidates[i] == node) { std::swap(candidates[i], candidates[candidates.size() - 1]); candidates.resize(candidates.size() - 1); diff --git a/tensorflow/core/grappler/optimizers/debug_stripper.cc b/tensorflow/core/grappler/optimizers/debug_stripper.cc index d4b3bf395c3..b9e14df9261 100644 --- a/tensorflow/core/grappler/optimizers/debug_stripper.cc +++ b/tensorflow/core/grappler/optimizers/debug_stripper.cc @@ -63,7 +63,7 @@ Status DebugStripper::Optimize(Cluster* cluster, const GrapplerItem& item, node.mutable_attr()->swap(new_attr); // As Identity op only takes one input, mark redundant inputs as control // input. - for (size_t i = 1; i < node.input_size(); ++i) { + for (int i = 1, iter_limit = node.input_size(); i < iter_limit; ++i) { if (!IsControlInput(node.input(i))) { *node.mutable_input(i) = AsControlDependency(NodeName(node.input(i))); } diff --git a/tensorflow/core/grappler/optimizers/model_pruner.cc b/tensorflow/core/grappler/optimizers/model_pruner.cc index 20db4360f73..5956fea4695 100644 --- a/tensorflow/core/grappler/optimizers/model_pruner.cc +++ b/tensorflow/core/grappler/optimizers/model_pruner.cc @@ -401,9 +401,10 @@ Status SplitIdentityNInputs(GraphDef* graph, } const int num_non_control_inputs = NumNonControlInputs(*node); + const int terminal_second_size = terminal.second.size(); if (node->attr().count("T") == 0 || node->attr().at("T").list().type_size() != num_non_control_inputs || - terminal.second.size() >= num_non_control_inputs) { + terminal_second_size >= num_non_control_inputs) { continue; } diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index cd6b4855583..e46910172aa 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -357,7 +357,7 @@ void PermuteNodesInPlace(GraphDef* graph, std::vector* permutation, } permutation->swap(inv_perm); } - for (std::size_t n = 0; n + 1 < permutation->size(); ++n) { + for (int n = 0, iter_limit = permutation->size(); n + 1 < iter_limit; ++n) { while (n != (*permutation)[n]) { std::size_t r = (*permutation)[n]; graph->mutable_node()->SwapElements(n, r); diff --git a/tensorflow/core/grappler/utils/topological_sort.cc b/tensorflow/core/grappler/utils/topological_sort.cc index e24a457593a..932276edeb8 100644 --- a/tensorflow/core/grappler/utils/topological_sort.cc +++ b/tensorflow/core/grappler/utils/topological_sort.cc @@ -81,7 +81,8 @@ Status ComputeTopologicalOrder( int ready_node = (*ready_nodes)[front]; for (int fanout : graph_view.GetFanout(ready_node)) { ++num_ready_inputs[fanout]; - if (num_ready_inputs[fanout] == graph_view.GetFanin(fanout).size()) { + int graph_view_GetFanin_fanout_size = graph_view.GetFanin(fanout).size(); + if (num_ready_inputs[fanout] == graph_view_GetFanin_fanout_size) { ready_nodes->push_back(fanout); ++back; } @@ -95,7 +96,8 @@ Status ComputeTopologicalOrder( "at node = " << graph.node(back).DebugString(); for (int i = 0; i < graph_view.num_nodes(); ++i) { - if (num_ready_inputs[i] != graph_view.GetFanin(i).size()) { + int graph_view_GetFanin_i_size = graph_view.GetFanin(i).size(); + if (num_ready_inputs[i] != graph_view_GetFanin_i_size) { VLOG(1) << "Node not ready: " << graph.node(i).DebugString(); } } diff --git a/tensorflow/core/kernels/initializable_lookup_table.cc b/tensorflow/core/kernels/initializable_lookup_table.cc index 196c2fe95a3..48041526022 100644 --- a/tensorflow/core/kernels/initializable_lookup_table.cc +++ b/tensorflow/core/kernels/initializable_lookup_table.cc @@ -74,7 +74,7 @@ Status InitializableLookupTable::Initialize(InitTableIterator& iter) { Status InitializableLookupTable::AreEntriesSame(const InitTableIterator& iter, bool* result) { - *result = iter.total_size() == size(); + *result = static_cast(iter.total_size()) == size(); return Status::OK(); } diff --git a/tensorflow/core/kernels/lookup_util.cc b/tensorflow/core/kernels/lookup_util.cc index 142878d8fb0..9adcedd6b1a 100644 --- a/tensorflow/core/kernels/lookup_util.cc +++ b/tensorflow/core/kernels/lookup_util.cc @@ -132,7 +132,7 @@ class TextFileLineIterator std::vector tokens; if (!ignore_split_) { tokens = str_util::Split(line, delimiter_); - if (std::max(key_index_, value_index_) >= tokens.size()) { + if ( static_cast(std::max(key_index_, value_index_)) >= tokens.size()) { status_ = errors::InvalidArgument( "Invalid number of columns in ", filename_, " line ", next_id_, " (", line, ") : expected ", std::max(key_index_, value_index_), diff --git a/tensorflow/core/profiler/utils/derived_timeline.cc b/tensorflow/core/profiler/utils/derived_timeline.cc index f63a8e5c2d9..bcadf51c110 100644 --- a/tensorflow/core/profiler/utils/derived_timeline.cc +++ b/tensorflow/core/profiler/utils/derived_timeline.cc @@ -130,7 +130,7 @@ void DerivedXLineBuilder::ExpandOrAddLevelEvent(const XEvent& event, } void DerivedXLineBuilder::ResetLastEvents(int level) { - for (int i = level; i < last_event_by_level_.size(); ++i) { + for (int i = level, iter_limit = last_event_by_level_.size(); i < iter_limit; ++i) { last_event_by_level_[i] = absl::nullopt; } if (level == 0) ResetDependentLines(); diff --git a/tensorflow/core/profiler/utils/xplane_utils.cc b/tensorflow/core/profiler/utils/xplane_utils.cc index 3fa421c3459..f59c071784f 100644 --- a/tensorflow/core/profiler/utils/xplane_utils.cc +++ b/tensorflow/core/profiler/utils/xplane_utils.cc @@ -155,7 +155,7 @@ void SortXSpace(XSpace* space) { // smaller than these value. void NormalizeTimestamps(XPlane* plane, uint64 start_time_ns) { for (XLine& line : *plane->mutable_lines()) { - if (line.timestamp_ns() >= start_time_ns) { + if (line.timestamp_ns() >= static_cast(start_time_ns)) { line.set_timestamp_ns(line.timestamp_ns() - start_time_ns); } } diff --git a/tensorflow/core/util/bcast.h b/tensorflow/core/util/bcast.h index 7bb8ea18ad3..7b969f72475 100644 --- a/tensorflow/core/util/bcast.h +++ b/tensorflow/core/util/bcast.h @@ -139,7 +139,8 @@ BCastList::BCastList(const BCastList::Vec (&x)[N], if (x[i] != x[0]) { all_equal = false; } - if (x[i].size() > largest_rank) { + int x_i_size = x[i].size(); + if (x_i_size > largest_rank) { largest_rank = x[i].size(); } } @@ -176,7 +177,8 @@ BCastList::BCastList(const BCastList::Vec (&x)[N], // 1-extend and align all vectors. for (int i = 0; i < N; ++i) { - if (copy[i].size() < largest_rank) { + int copy_i_size = copy[i].size(); + if (copy_i_size < largest_rank) { copy[i].resize(largest_rank, 1); } } diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc index ca0c5d9ef1a..12aebb6a671 100644 --- a/tensorflow/python/framework/python_op_gen.cc +++ b/tensorflow/python/framework/python_op_gen.cc @@ -63,7 +63,7 @@ void AddInferredAttr(const string& indentation, const string& attr_name, string VectorToTuple(const std::vector& l) { if (l.size() == 1) return strings::StrCat("(", l.front(), ",)"); string ret = "("; - for (int i = 0; i < l.size(); ++i) { + for (int i = 0, iter_limit = l.size(); i < iter_limit; ++i) { if (i > 0) { strings::StrAppend(&ret, ", "); } @@ -75,11 +75,11 @@ string VectorToTuple(const std::vector& l) { void Unflatten(const string& prefix, const std::vector& output_sizes, const string& var, string* result) { - for (int i = 0; i < output_sizes.size(); ++i) { + for (int i = 0, iter_limit = output_sizes.size(); i < iter_limit; ++i) { if (!output_sizes[i].empty()) { strings::StrAppend(result, prefix, var, " = "); if (i > 0) strings::StrAppend(result, var, "[:", i, "] + "); - if (i + 1 < output_sizes.size()) { + if (i + 1 < iter_limit) { // Special case i == 0 to avoid "0 +" in the generated code. if (i == 0) { strings::StrAppend(result, "[", var, "[:", output_sizes[i], "]] + ", @@ -295,7 +295,7 @@ string GenEagerPythonOp::Code() { // from the end of params_no_default_, and adding params_no_default_. attrs_.reserve(params_no_default_.size() - op_def_.input_arg_size() + params_with_default_.size()); - for (int i = op_def_.input_arg_size(); i < params_no_default_.size(); ++i) { + for (int i = op_def_.input_arg_size(), iter_limit = params_no_default_.size(); i < iter_limit; ++i) { attrs_.push_back(params_no_default_[i].GetName()); } for (const auto& p : params_with_default_) { @@ -331,7 +331,7 @@ string GenEagerPythonOp::Code() { parameters_with_defaults.empty() ? "" : ", ", "name=None"); // Add attr_expressions_ for attrs that are params. - for (int i = 0; i < attrs_.size(); ++i) { + for (int i = 0, iter_limit = attrs_.size(); i < iter_limit; ++i) { const string& attr_name = attrs_[i]; const string& attr_api_name = param_names_[i + op_def_.input_arg_size()].GetRenameTo(); @@ -522,7 +522,7 @@ bool GenEagerPythonOp::GetEagerFunctionSetup(const string& indentation, } } - for (int i = 0; i < attrs_.size(); ++i) { + for (int i = 0, iter_limit = attrs_.size(); i < iter_limit; ++i) { const string& attr_name = attrs_[i]; const auto& param = param_names_[i + op_def_.input_arg_size()]; const auto& attr = *FindAttr(attr_name, op_def_); diff --git a/tensorflow/python/framework/python_op_gen_internal.cc b/tensorflow/python/framework/python_op_gen_internal.cc index 05102db0189..d2e25e368b4 100644 --- a/tensorflow/python/framework/python_op_gen_internal.cc +++ b/tensorflow/python/framework/python_op_gen_internal.cc @@ -561,10 +561,10 @@ string GenPythonOp::Code() { // from the end of args_no_default, and adding args_no_default. attrs_.reserve(params_no_default.size() - op_def_.input_arg_size() + params_with_default.size()); - for (int i = op_def_.input_arg_size(); i < params_no_default.size(); ++i) { + for (int i = op_def_.input_arg_size(), iter_limit = params_no_default.size(); i < iter_limit; ++i) { attrs_.push_back(params_no_default[i].GetName()); } - for (int i = 0; i < params_with_default.size(); ++i) { + for (int i = 0, iter_limit = params_with_default.size(); i < iter_limit; ++i) { attrs_.push_back(params_with_default[i].GetName()); } From 412da53d657511bc5fafee4d6cfa34e61fc2d069 Mon Sep 17 00:00:00 2001 From: Patrik Laurell Date: Wed, 17 Jun 2020 16:43:22 +0200 Subject: [PATCH 0082/2522] Support float32->int16 and int16->int16 quantization in TFLu --- tensorflow/lite/micro/kernels/quantize.cc | 19 +++++- .../lite/micro/kernels/quantize_test.cc | 60 +++++++++++++++++++ 2 files changed, 76 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/micro/kernels/quantize.cc b/tensorflow/lite/micro/kernels/quantize.cc index b58a1cb368e..efaf2e583cd 100644 --- a/tensorflow/lite/micro/kernels/quantize.cc +++ b/tensorflow/lite/micro/kernels/quantize.cc @@ -66,11 +66,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE(context, input->type == kTfLiteFloat32 || input->type == kTfLiteInt16 || input->type == kTfLiteInt8); - TF_LITE_ENSURE(context, - output->type == kTfLiteUInt8 || output->type == kTfLiteInt8); + TF_LITE_ENSURE(context, output->type == kTfLiteUInt8 || + output->type == kTfLiteInt8 || + output->type == kTfLiteInt16); if ((input->type == kTfLiteInt16 || input->type == kTfLiteInt8) && - output->type == kTfLiteInt8) { + output->type == kTfLiteInt8 || + (input->type == kTfLiteInt16 && output->type == kTfLiteInt16)) { double effective_scale = static_cast(input->params.scale / output->params.scale); @@ -103,6 +105,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { op_params, GetTensorShape(input), GetTensorData(input), GetTensorShape(output), GetTensorData(output)); break; + case kTfLiteInt16: + reference_ops::AffineQuantize( + op_params, GetTensorShape(input), GetTensorData(input), + GetTensorShape(output), GetTensorData(output)); + return kTfLiteOk; default: TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.", TfLiteTypeGetName(input->type), @@ -118,6 +125,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { data->output_shift, input->params.zero_point, output->params.zero_point, GetTensorData(output)); break; + case kTfLiteInt16: + reference_ops::Requantize( + GetTensorData(input), size, data->output_multiplier, + data->output_shift, input->params.zero_point, + output->params.zero_point, GetTensorData(output)); + return kTfLiteOk; default: TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.", TfLiteTypeGetName(input->type), diff --git a/tensorflow/lite/micro/kernels/quantize_test.cc b/tensorflow/lite/micro/kernels/quantize_test.cc index b6f885d09e7..8e097429ca0 100644 --- a/tensorflow/lite/micro/kernels/quantize_test.cc +++ b/tensorflow/lite/micro/kernels/quantize_test.cc @@ -198,6 +198,32 @@ TF_LITE_MICRO_TEST(QuantizeOpTestInt8NoScale) { dims, values, dims, values, values_quantized, scale, zero_point, output); } +TF_LITE_MICRO_TEST(QuantizeOpTestInt16) { + const int length = 10; + const int dims[] = {2, 2, 5}; + const float values[] = {-63.5, -63, -62.5, -62, -61.5, + 62, 62.5, 63, 63.5, 64}; + const float scale = 0.5; + const int zero_point = -1; + int16_t output[length]; + int16_t values_quantized[length]; + tflite::testing::TestQuantizeFloat( + dims, values, dims, values, values_quantized, scale, zero_point, output); +} + +TF_LITE_MICRO_TEST(QuantizeOpTestInt16NoScale) { + const int length = 10; + const int dims[] = {2, 2, 5}; + const float values[] = {-128, -127, -126, -125, -124, + 123, 124, 125, 126, 127}; + const float scale = 1.0; + const int zero_point = 0; + int16_t output[length]; + int16_t values_quantized[length]; + tflite::testing::TestQuantizeFloat( + dims, values, dims, values, values_quantized, scale, zero_point, output); +} + TF_LITE_MICRO_TEST(QuantizeOpTestInt16toInt8) { const int length = 10; const int dims[] = {2, 2, 5}; @@ -215,6 +241,40 @@ TF_LITE_MICRO_TEST(QuantizeOpTestInt16toInt8) { output_zero_point, output_quantized); } +TF_LITE_MICRO_TEST(QuantizeOpTestInt16toInt16) { + const int length = 10; + const int dims[] = {2, 2, 5}; + const float values[] = {-64, -62, -60, -58, -56, 54, 56, 58, 60, 62}; + const float input_scale = 2.f; + const int input_zero_point = 0; + const float output_scale = 0.5; + const int output_zero_point = 32; + int16_t output_quantized[length]; + int16_t values_quantized[length]; + int16_t input_quantized[length]; + tflite::testing::TestRequantize(dims, values, input_quantized, input_scale, + input_zero_point, dims, values, + values_quantized, output_scale, + output_zero_point, output_quantized); +} + +TF_LITE_MICRO_TEST(QuantizeOpTestInt16toInt16NoZeroPoint) { + const int length = 10; + const int dims[] = {2, 2, 5}; + const float values[] = {-32, -31, -30, -29, -28, 27, 28, 29, 30, 31}; + const float input_scale = 1.f; + const int input_zero_point = 0; + const float output_scale = 0.5; + const int output_zero_point = 0; + int16_t output_quantized[length]; + int16_t values_quantized[length]; + int16_t input_quantized[length]; + tflite::testing::TestRequantize(dims, values, input_quantized, input_scale, + input_zero_point, dims, values, + values_quantized, output_scale, + output_zero_point, output_quantized); +} + TF_LITE_MICRO_TEST(QuantizeOpTestInt8toInt8) { const int length = 10; const int dims[] = {2, 2, 5}; From 05ea0a60728706e9963357d0f70adb9a1a89454a Mon Sep 17 00:00:00 2001 From: amturati Date: Mon, 29 Jun 2020 16:16:13 +0000 Subject: [PATCH 0083/2522] Fixed MatMul Abstract Tensor test, now passing --- .../c/eager/c_api_unified_experimental_test.cc | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tensorflow/c/eager/c_api_unified_experimental_test.cc b/tensorflow/c/eager/c_api_unified_experimental_test.cc index 5c431794747..23cb39b3fab 100644 --- a/tensorflow/c/eager/c_api_unified_experimental_test.cc +++ b/tensorflow/c/eager/c_api_unified_experimental_test.cc @@ -101,21 +101,22 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul) { /* Want to test simple MatMul example: [ [0,0] , * [ [0,0] , = [ [0,0], - [0,0] ] [0,0] ] [0,0]] + [0,0] ] [0,0] ] [0,0] ] */ // Build an abstract input tensor. float vals [] = {0.0f,0.0f,0.0f,0.0f}; TFE_Context* eager_ctx = TF_ExecutionContextGetTFEContext(ctx); TFE_TensorHandle* t = TestMatrixTensorHandleWithInput(eager_ctx, vals); - + TFE_TensorHandle* expected_tensor = TestMatrixTensorHandleWithInput(eager_ctx, vals); // 2x2 matrix of zeros as expected result + TF_AbstractTensor* at = - TF_CreateAbstractTensorFromEagerTensor(t, status.get()); + TF_CreateAbstractTensorFromEagerTensor(t, status.get()); //get abstract tensor ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); // Build an abstract operation. auto* op = TF_NewAbstractOp(ctx); - TF_AbstractOpSetOpType(op, "MatMul", status.get()); //correct syntax to specify matrix multiply for tensors? + TF_AbstractOpSetOpType(op, "MatMul", status.get()); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); // Build inputs and outputs. @@ -139,13 +140,14 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul) { TF_AbstractTensorGetEagerTensor(result, status.get()); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); TF_Tensor* result_tensor = TFE_TensorHandleResolve(result_t, status.get()); - TF_Tensor* t_tensor = TFE_TensorHandleResolve(t, status.get()); //Is this the best way to get the TF_Tensor from t? + + TF_Tensor* zero_tensor = TFE_TensorHandleResolve(expected_tensor, status.get()); float* result_value = static_cast(TF_TensorData(result_tensor)); - float* t_value = static_cast(TF_TensorData(t_tensor)); - EXPECT_EQ(*result_value, *t_value); + float* zero_value = static_cast(TF_TensorData(zero_tensor)); + EXPECT_EQ(*result_value, *zero_value); TF_DeleteTensor(result_tensor); - TF_DeleteTensor(t_tensor); + TF_DeleteTensor(zero_tensor); TF_DeleteAbstractTensor(result); TF_DeleteOutputList(o); TF_DeleteExecutionContext(ctx); From 35bd17b6717384cf587aa7cc271d693a111d4a11 Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Mon, 29 Jun 2020 16:24:23 +0000 Subject: [PATCH 0084/2522] Remove TensorLike and changes to types/core.py --- tensorflow/python/types/core.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tensorflow/python/types/core.py b/tensorflow/python/types/core.py index f2c5d965a2d..20da83e562d 100644 --- a/tensorflow/python/types/core.py +++ b/tensorflow/python/types/core.py @@ -18,8 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from typing import Union -from tensorflow.python.util.tf_export import tf_export # TODO(mdan): Consider adding ABC once the dependence on isinstance is reduced. # TODO(mdan): Add type annotations. @@ -60,6 +58,3 @@ class Value(Tensor): def numpy(self): pass - -# TODO(rahulkamat): add complete set of types to TensorLike and add tf.export -TensorLike = Union[Tensor, int, float, bool, str, tuple] From 91f55543ef494c510b2f6cc54f6c9c90e530fb4c Mon Sep 17 00:00:00 2001 From: amturati Date: Mon, 29 Jun 2020 16:38:25 +0000 Subject: [PATCH 0085/2522] added non-trivial matmul example with abstract tensors --- .../eager/c_api_unified_experimental_test.cc | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/tensorflow/c/eager/c_api_unified_experimental_test.cc b/tensorflow/c/eager/c_api_unified_experimental_test.cc index 23cb39b3fab..796e0cc0d2f 100644 --- a/tensorflow/c/eager/c_api_unified_experimental_test.cc +++ b/tensorflow/c/eager/c_api_unified_experimental_test.cc @@ -154,6 +154,86 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul) { } +//MatMul Test 2 +TEST_P(UnifiedCAPI, TestBasicEagerMatMul2) { + std::unique_ptr status( + TF_NewStatus(), TF_DeleteStatus); + TFE_ContextOptions* opts = TFE_NewContextOptions(); + TF_ExecutionContext* ctx = TF_NewEagerExecutionContext(opts, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + TFE_DeleteContextOptions(opts); + + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + /* Want to test simple MatMul example with abstract tensors: + + [ [1,2] , * [ [5,6] , = [ [19,22], + [3,4] ] [7,8] ] [43,50] ] + + */ + + // Build 1st Matrix. + float vals1 [] = {1.0f,2.0f,3.0f,4.0f}; + TFE_Context* eager_ctx = TF_ExecutionContextGetTFEContext(ctx); + TFE_TensorHandle* t1 = TestMatrixTensorHandleWithInput(eager_ctx, vals1); + + TF_AbstractTensor* at1 = + TF_CreateAbstractTensorFromEagerTensor(t1, status.get()); //get abstract tensor + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + // Build 2nd Matrix. + float vals2 [] = {5.0f,6.0f,7.0f,8.0f}; + //TFE_Context* eager_ctx2 = TF_ExecutionContextGetTFEContext(ctx); + TFE_TensorHandle* t2 = TestMatrixTensorHandleWithInput(eager_ctx, vals2); + + TF_AbstractTensor* at2 = + TF_CreateAbstractTensorFromEagerTensor(t2, status.get()); //get abstract tensor + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + // Build expected result + float e_vals [] = {19.0f,22.0f,43.0f,50.0f}; + TFE_TensorHandle* expected_tensor = TestMatrixTensorHandleWithInput(eager_ctx, e_vals); // 2x2 matrix of expected result + + // Build an abstract operation. + auto* op = TF_NewAbstractOp(ctx); + TF_AbstractOpSetOpType(op, "MatMul", status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + // Build inputs and outputs. + TF_AbstractTensor* inputs[2] = {at1, at2}; + TF_OutputList* o = TF_NewOutputList(); + TF_OutputListSetNumOutputs(o, 1, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + // Execute. + TF_ExecuteOperation(op, 2, inputs, o, ctx, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + // Clean up operation and inputs. + TF_DeleteAbstractOp(op); + TF_DeleteAbstractTensor(at1); + TF_DeleteAbstractTensor(at2); + + // Verify the results. + ASSERT_EQ(1, TF_OutputListNumOutputs(o)); + TF_AbstractTensor* result = TF_OutputListGet(o, 0); + TFE_TensorHandle* result_t = + TF_AbstractTensorGetEagerTensor(result, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + TF_Tensor* result_tensor = TFE_TensorHandleResolve(result_t, status.get()); + + TF_Tensor* expected_val_tensor = TFE_TensorHandleResolve(expected_tensor, status.get()); + float* result_value = static_cast(TF_TensorData(result_tensor)); + float* expected_value = static_cast(TF_TensorData(expected_val_tensor)); + EXPECT_EQ(*result_value, *expected_value); + + TF_DeleteTensor(result_tensor); + TF_DeleteTensor(expected_val_tensor); + TF_DeleteAbstractTensor(result); + TF_DeleteOutputList(o); + TF_DeleteExecutionContext(ctx); +} + TEST_P(UnifiedCAPI, TestBasicGraph) { std::unique_ptr status( From 5c6557c34674f6a9adce741dcaec536d3bcec663 Mon Sep 17 00:00:00 2001 From: amturati Date: Mon, 29 Jun 2020 21:30:43 +0000 Subject: [PATCH 0086/2522] cleaned up syntax and style for matmul tests with abstract tensors --- tensorflow/c/eager/c_api_test_util.cc | 2 +- .../eager/c_api_unified_experimental_test.cc | 40 +++++++++++-------- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/tensorflow/c/eager/c_api_test_util.cc b/tensorflow/c/eager/c_api_test_util.cc index 4002dcbc5c1..aae4ad2c6eb 100644 --- a/tensorflow/c/eager/c_api_test_util.cc +++ b/tensorflow/c/eager/c_api_test_util.cc @@ -90,7 +90,7 @@ TFE_TensorHandle* TestMatrixTensorHandle(TFE_Context* ctx) { TFE_TensorHandle* TestMatrixTensorHandleWithInput(TFE_Context* ctx, float data[]) { int64_t dims[] = {2, 2}; - //float data[] = vals; + TF_Status* status = TF_NewStatus(); TF_Tensor* t = TFE_AllocateHostTensor(ctx, TF_FLOAT, &dims[0], sizeof(dims) / sizeof(int64_t), status); diff --git a/tensorflow/c/eager/c_api_unified_experimental_test.cc b/tensorflow/c/eager/c_api_unified_experimental_test.cc index 796e0cc0d2f..8ebfa569f10 100644 --- a/tensorflow/c/eager/c_api_unified_experimental_test.cc +++ b/tensorflow/c/eager/c_api_unified_experimental_test.cc @@ -87,7 +87,7 @@ TEST_P(UnifiedCAPI, TestBasicEager) { } -//MatMul Test +// MatMul Test TEST_P(UnifiedCAPI, TestBasicEagerMatMul) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); @@ -104,6 +104,7 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul) { [0,0] ] [0,0] ] [0,0] ] */ + // Build an abstract input tensor. float vals [] = {0.0f,0.0f,0.0f,0.0f}; TFE_Context* eager_ctx = TF_ExecutionContextGetTFEContext(ctx); @@ -141,20 +142,23 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul) { ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); TF_Tensor* result_tensor = TFE_TensorHandleResolve(result_t, status.get()); - TF_Tensor* zero_tensor = TFE_TensorHandleResolve(expected_tensor, status.get()); - float* result_value = static_cast(TF_TensorData(result_tensor)); - float* zero_value = static_cast(TF_TensorData(zero_tensor)); - EXPECT_EQ(*result_value, *zero_value); + // Copy Tensor data into an array. + float result_data[4] = {0}; + memcpy(&result_data[0], TF_TensorData(result_tensor), TF_TensorByteSize(result_tensor)); + + int data_len = 4; //length of result_data + for(int i = 0; i < data_len; i++){ + EXPECT_EQ(result_data[i], 0); + } TF_DeleteTensor(result_tensor); - TF_DeleteTensor(zero_tensor); TF_DeleteAbstractTensor(result); TF_DeleteOutputList(o); TF_DeleteExecutionContext(ctx); } -//MatMul Test 2 +// MatMul Test 2 TEST_P(UnifiedCAPI, TestBasicEagerMatMul2) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); @@ -171,7 +175,7 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul2) { [3,4] ] [7,8] ] [43,50] ] */ - + // Build 1st Matrix. float vals1 [] = {1.0f,2.0f,3.0f,4.0f}; TFE_Context* eager_ctx = TF_ExecutionContextGetTFEContext(ctx); @@ -183,7 +187,6 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul2) { // Build 2nd Matrix. float vals2 [] = {5.0f,6.0f,7.0f,8.0f}; - //TFE_Context* eager_ctx2 = TF_ExecutionContextGetTFEContext(ctx); TFE_TensorHandle* t2 = TestMatrixTensorHandleWithInput(eager_ctx, vals2); TF_AbstractTensor* at2 = @@ -192,8 +195,7 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul2) { // Build expected result float e_vals [] = {19.0f,22.0f,43.0f,50.0f}; - TFE_TensorHandle* expected_tensor = TestMatrixTensorHandleWithInput(eager_ctx, e_vals); // 2x2 matrix of expected result - + // Build an abstract operation. auto* op = TF_NewAbstractOp(ctx); TF_AbstractOpSetOpType(op, "MatMul", status.get()); @@ -220,15 +222,19 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul2) { TFE_TensorHandle* result_t = TF_AbstractTensorGetEagerTensor(result, status.get()); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); - TF_Tensor* result_tensor = TFE_TensorHandleResolve(result_t, status.get()); - TF_Tensor* expected_val_tensor = TFE_TensorHandleResolve(expected_tensor, status.get()); - float* result_value = static_cast(TF_TensorData(result_tensor)); - float* expected_value = static_cast(TF_TensorData(expected_val_tensor)); - EXPECT_EQ(*result_value, *expected_value); + TF_Tensor* result_tensor = TFE_TensorHandleResolve(result_t, status.get()); + + // Copy Tensor data into array. + float result_data[4] = {0}; + memcpy(&result_data[0], TF_TensorData(result_tensor), TF_TensorByteSize(result_tensor)); + + int data_len = 4; //length of e_vals + for(int i = 0; i < data_len; i++){ + EXPECT_EQ(result_data[i], e_vals[i]); + } TF_DeleteTensor(result_tensor); - TF_DeleteTensor(expected_val_tensor); TF_DeleteAbstractTensor(result); TF_DeleteOutputList(o); TF_DeleteExecutionContext(ctx); From dc6a18b6bf9ed77d009b5114ad294b6cf8db7019 Mon Sep 17 00:00:00 2001 From: amturati Date: Mon, 29 Jun 2020 22:23:37 +0000 Subject: [PATCH 0087/2522] resolving nit style issues --- .../c/eager/c_api_unified_experimental_test.cc | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tensorflow/c/eager/c_api_unified_experimental_test.cc b/tensorflow/c/eager/c_api_unified_experimental_test.cc index 8ebfa569f10..9c738038e49 100644 --- a/tensorflow/c/eager/c_api_unified_experimental_test.cc +++ b/tensorflow/c/eager/c_api_unified_experimental_test.cc @@ -109,10 +109,9 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul) { float vals [] = {0.0f,0.0f,0.0f,0.0f}; TFE_Context* eager_ctx = TF_ExecutionContextGetTFEContext(ctx); TFE_TensorHandle* t = TestMatrixTensorHandleWithInput(eager_ctx, vals); - TFE_TensorHandle* expected_tensor = TestMatrixTensorHandleWithInput(eager_ctx, vals); // 2x2 matrix of zeros as expected result TF_AbstractTensor* at = - TF_CreateAbstractTensorFromEagerTensor(t, status.get()); //get abstract tensor + TF_CreateAbstractTensorFromEagerTensor(t, status.get()); // get abstract tensor ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); // Build an abstract operation. @@ -146,7 +145,7 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul) { float result_data[4] = {0}; memcpy(&result_data[0], TF_TensorData(result_tensor), TF_TensorByteSize(result_tensor)); - int data_len = 4; //length of result_data + int data_len = 4; // length of result_data for(int i = 0; i < data_len; i++){ EXPECT_EQ(result_data[i], 0); } @@ -182,7 +181,7 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul2) { TFE_TensorHandle* t1 = TestMatrixTensorHandleWithInput(eager_ctx, vals1); TF_AbstractTensor* at1 = - TF_CreateAbstractTensorFromEagerTensor(t1, status.get()); //get abstract tensor + TF_CreateAbstractTensorFromEagerTensor(t1, status.get()); // get abstract tensor ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); // Build 2nd Matrix. @@ -190,12 +189,9 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul2) { TFE_TensorHandle* t2 = TestMatrixTensorHandleWithInput(eager_ctx, vals2); TF_AbstractTensor* at2 = - TF_CreateAbstractTensorFromEagerTensor(t2, status.get()); //get abstract tensor + TF_CreateAbstractTensorFromEagerTensor(t2, status.get()); // get abstract tensor ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); - // Build expected result - float e_vals [] = {19.0f,22.0f,43.0f,50.0f}; - // Build an abstract operation. auto* op = TF_NewAbstractOp(ctx); TF_AbstractOpSetOpType(op, "MatMul", status.get()); @@ -229,7 +225,10 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul2) { float result_data[4] = {0}; memcpy(&result_data[0], TF_TensorData(result_tensor), TF_TensorByteSize(result_tensor)); - int data_len = 4; //length of e_vals + // Build expected result & verify. + float e_vals [] = {19.0f,22.0f,43.0f,50.0f}; + + int data_len = 4; // length of e_vals for(int i = 0; i < data_len; i++){ EXPECT_EQ(result_data[i], e_vals[i]); } From 2296107175c8c07ba1ca6f59d2ff2a77ca437cba Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Tue, 30 Jun 2020 00:28:12 +0000 Subject: [PATCH 0088/2522] Add TensorLike with tf_export --- tensorflow/python/types/core.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/python/types/core.py b/tensorflow/python/types/core.py index 20da83e562d..44bfa8fb517 100644 --- a/tensorflow/python/types/core.py +++ b/tensorflow/python/types/core.py @@ -18,6 +18,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from typing import Union +import numpy as np + +from tensorflow.python.util.tf_export import tf_export # TODO(mdan): Consider adding ABC once the dependence on isinstance is reduced. # TODO(mdan): Add type annotations. @@ -58,3 +62,7 @@ class Value(Tensor): def numpy(self): pass + + +TensorLike = Union[Tensor, int, float, bool, str, tuple, list, np.ndarray] +tf_export("TensorLike").export_constant(__name__, "TensorLike") From ce47a396ff795bdb6cf48eb53dbcba46cb51fa7d Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Tue, 30 Jun 2020 04:12:11 +0000 Subject: [PATCH 0089/2522] TensorKey class and TensorMap tests --- tensorflow/core/BUILD | 1 + tensorflow/core/framework/BUILD | 70 ++++++++++++++ tensorflow/core/framework/tensor.h | 30 +----- tensorflow/core/framework/tensor_key.h | 64 ++++++++++++ tensorflow/core/kernels/BUILD | 1 + tensorflow/core/kernels/tensor_map.cc | 13 +-- tensorflow/core/kernels/tensor_map.h | 31 ++++-- tensorflow/core/kernels/tensor_map_test.cc | 107 ++++++++++++++++++--- 8 files changed, 262 insertions(+), 55 deletions(-) create mode 100644 tensorflow/core/framework/tensor_key.h diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index d6e44bb36aa..cf5c1c4faed 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -494,6 +494,7 @@ tf_cuda_library( "//tensorflow/core/framework:shared_ptr_variant.h", "//tensorflow/core/framework:stats_aggregator.h", "//tensorflow/core/framework:tensor.h", + "//tensorflow/core/framework:tensor_key.h", "//tensorflow/core/framework:tensor_shape.h", "//tensorflow/core/framework:tensor_slice.h", "//tensorflow/core/framework:tensor_types.h", diff --git a/tensorflow/core/framework/BUILD b/tensorflow/core/framework/BUILD index 52f15dcb5c2..40f355cc082 100644 --- a/tensorflow/core/framework/BUILD +++ b/tensorflow/core/framework/BUILD @@ -211,6 +211,7 @@ filegroup( "shared_ptr_variant.h", "stats_aggregator.h", "tensor.h", + "tensor_key.h", "tensor_reference.h", "tensor_shape.h", "tensor_slice.h", @@ -762,6 +763,75 @@ tf_cuda_library( alwayslink = 1, ) +tf_cuda_library( + name = "tensor_key", + srcs = [ + "log_memory.cc", + "tensor.cc", + "typed_allocator.cc", + "types.cc", + "variant.cc", + "variant_op_registry.cc", + "variant_tensor_data.cc", + ], + hdrs = [ + "log_memory.h", + "register_types.h", + "tensor.h", + "tensor_key.h", + "typed_allocator.h", + "types.h", + "variant.h", + "variant_encode_decode.h", + "variant_op_registry.h", + "variant_tensor_data.h", + ], + visibility = [ + "//tensorflow/core:__pkg__", + "//tensorflow/core/util:__pkg__", + ], + deps = [ + ":allocation_description_proto_cc", + ":allocator", + ":bfloat16", + ":log_memory_proto_cc", + ":numeric_types", + ":resource_handle", + ":resource_handle_proto_cc", + ":tensor_description_proto_cc", + ":tensor_proto_cc", + ":tensor_shape", + ":tensor_types", + ":type_index", + ":type_traits", + ":types_proto_cc", + "//tensorflow/core/lib/core:coding", + "//tensorflow/core/lib/core:errors", + "//tensorflow/core/lib/core:refcount", + "//tensorflow/core/lib/core:status", + "//tensorflow/core/lib/core:stringpiece", + "//tensorflow/core/lib/gtl:array_slice", + "//tensorflow/core/lib/gtl:flatmap", + "//tensorflow/core/lib/gtl:inlined_vector", + "//tensorflow/core/lib/hash", + "//tensorflow/core/lib/strings:str_util", + "//tensorflow/core/lib/strings:strcat", + "//tensorflow/core/platform:abi", + "//tensorflow/core/platform:logging", + "//tensorflow/core/platform:macros", + "//tensorflow/core/platform:platform_port", + "//tensorflow/core/platform:protobuf", + "//tensorflow/core/platform:strcat", + "//tensorflow/core/platform:tensor_coding", + "//tensorflow/core/platform:types", + "//tensorflow/core/public:version", + "//third_party/eigen3", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + ], + alwayslink = 1, +) + cc_library( name = "shape_inference", srcs = ["shape_inference.cc"], diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h index 33c2338aeb4..28eab3ab1e0 100644 --- a/tensorflow/core/framework/tensor.h +++ b/tensorflow/core/framework/tensor.h @@ -319,34 +319,6 @@ class Tensor { return true; } - /// Hashable - // Equality operator. Needed for absl hashing. - friend bool operator==(const Tensor& lhs, const Tensor& rhs) { - return lhs.shape() == rhs.shape() && lhs.dtype() == rhs.dtype(); - } - - // Comparison operator. Needed for absl hashing. - /*friend bool operator<(const Tensor& lhs, const Tensor& rhs) { - return lhs.NumElements() < rhs.NumElements(); - }*/ - - // AbslHashValue() function, needed for absl hashing. - template - friend H AbslHashValue(H h, const Tensor& k) { - //int temp = k.NumElements(); - - uint8* d = (uint8*)(k.buf_->data()); - std::cout << "buffer " << d << std::endl; - size_t s = k.buf_->size(); - std::vector vec; - - for (int i=0; i < s; i++) { - vec.push_back(d[i]); - } - - return H::combine(std::move(h), vec); - } - /// \brief Slice this tensor along the 1st dimension. /// I.e., the returned tensor satisfies @@ -676,7 +648,7 @@ class Tensor { // buffer is one. bool RefCountIsOne() const; - protected: + private: void CheckType(DataType expected_dtype) const; void CheckTypeAndIsAligned(DataType expected_dtype) const; void CheckIsAlignedAndSingleElement() const; diff --git a/tensorflow/core/framework/tensor_key.h b/tensorflow/core/framework/tensor_key.h new file mode 100644 index 00000000000..8eff58b2dda --- /dev/null +++ b/tensorflow/core/framework/tensor_key.h @@ -0,0 +1,64 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/tensor.h" + +namespace tensorflow { + +class TensorKey : public Tensor { + public: + using Tensor::Tensor; + + TensorKey(const Tensor& t) : Tensor(t) {} + + // Equality operator. Needed for absl hashing. + friend bool operator==(const TensorKey& t1, const TensorKey& t2) { + if (t1.dtype() != t2.dtype() || t1.shape() != t2.shape()) { + return false; + } + if (DataTypeCanUseMemcpy(t1.dtype())) { + return t1.tensor_data() == t2.tensor_data(); + } + if (t1.dtype() == DT_STRING) { + const auto s1 = t1.unaligned_flat(); + const auto s2 = t2.unaligned_flat(); + for (int64 i = 0, n = t1.NumElements(); i < n; ++i) { + if (TF_PREDICT_FALSE(s1(i) != s2(i))) { + return false; + } + } + return true; + } + return false; + } + + friend bool operator!=(const TensorKey& t1, const TensorKey& t2) { + return !(t1==t2); + } + + // AbslHashValue() function, needed for absl hashing. + template + friend H AbslHashValue(H h, const TensorKey& k) { + uint8* d = (uint8*)(k.data()); + size_t s = k.AllocatedBytes(); + std::vector vec; + for (int i=0; i < s; i++) { + vec.push_back(d[i]); + } + return H::combine(std::move(h), s); + } +}; + +} //namespace tensorflow \ No newline at end of file diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 5139dd95e5d..eba435c6b25 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2971,6 +2971,7 @@ tf_cc_tests( ], deps = [ ":tensor_map", + "//tensorflow/core/framework:tensor_testutil", "//tensorflow/core:test", "//tensorflow/core:test_main", "@com_google_absl//absl/strings", diff --git a/tensorflow/core/kernels/tensor_map.cc b/tensorflow/core/kernels/tensor_map.cc index 3a20708933e..6245b01cf13 100644 --- a/tensorflow/core/kernels/tensor_map.cc +++ b/tensorflow/core/kernels/tensor_map.cc @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/tensor_map.h" +#include "tensorflow/core/kernels/tensor_map.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/variant_op_registry.h" @@ -28,7 +28,7 @@ TensorMap::~TensorMap() { void TensorMap::Encode(VariantTensorData* data) const { data->set_type_name(TypeName()); - absl::flat_hash_map::const_iterator map_it = tensors().begin(); + absl::flat_hash_map::const_iterator map_it = tensors().begin(); while (map_it != tensors().end()) { Tensor k = map_it->first; Tensor v = map_it->second; @@ -56,7 +56,7 @@ static Status TensorMapDeviceCopy( to->element_shape = from.element_shape; to->element_dtype = from.element_dtype; to->max_num_elements = from.max_num_elements; - for (const std::pair& p : from.tensors()) { + for (const std::pair& p : from.tensors()) { to->tensors().emplace(p); //TODO: check valid dtype //if (t.dtype() != DT_INVALID) { //TF_RETURN_IF_ERROR(copy(p, &to->tensors().back())); @@ -85,14 +85,15 @@ bool TensorMap::Decode(const VariantTensorData& data) { StringPiece iter(metadata); std::vector::const_iterator tensors_it = data.tensors().begin(); + while (tensors_it != data.tensors().end()) { // should assert that tensors_it + 1 is also not the end - /*if (*tensors_it + 1 == data.tensors().end()) { + /*if (*std::next(tensors_it) == data.tensors().end()) { return false; }*/ - - tensors().emplace(*tensors_it,*++tensors_it); + TensorKey k = TensorKey(*tensors_it); // copy inefficient? + tensors().emplace(k,*++tensors_it); tensors_it++; } diff --git a/tensorflow/core/kernels/tensor_map.h b/tensorflow/core/kernels/tensor_map.h index 2e8ebcd219d..4e23fd59e51 100644 --- a/tensorflow/core/kernels/tensor_map.h +++ b/tensorflow/core/kernels/tensor_map.h @@ -18,6 +18,7 @@ limitations under the License. #include #include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_key.h" #include "tensorflow/core/framework/variant.h" #include "tensorflow/core/framework/variant_tensor_data.h" #include "tensorflow/core/lib/core/refcount.h" @@ -124,8 +125,8 @@ class TensorMap { int max_num_elements = -1; // Access to the underlying tensor container. - absl::flat_hash_map& tensors() { return tensors_->values_; } - const absl::flat_hash_map& tensors() const { return tensors_->values_; } + absl::flat_hash_map& tensors() { return tensors_->values_; } + const absl::flat_hash_map& tensors() const { return tensors_->values_; } // Access to shape and element dtype PartialTensorShape& shape() { return element_shape; } @@ -142,19 +143,31 @@ class TensorMap { return out; } - bool insert(Tensor key, Tensor value) { - tensors_->values_.try_emplace(key, value); - return true; + // Insert key and value if the key does not already exist. + // Returns true if the insertion happens. + bool insert(TensorKey key, Tensor value) { + auto r = tensors_->values_.try_emplace(key, value); + return r.second; } - /*Tensor& lookup(Tensor key) { + // Lookup given key. Returns iterator to found key or end. + absl::flat_hash_map::iterator find(TensorKey key) { return tensors_->values_.find(key); - }*/ + } - bool erase(Tensor key) { + Tensor& operator[](TensorKey& k) { + return tensors_->values_[k]; + } + // Removes element with given key. Return size of removed element. + size_t erase(TensorKey key) { return tensors_->values_.erase(key); } + // Size returns the number of elements in the map + size_t size() { + return tensors_->values_.size(); + } + // Is this TensorMap the only one with a reference to the underlying // container? bool RefCountIsOne() const { return tensors_->RefCountIsOne(); } @@ -163,7 +176,7 @@ class TensorMap { class Tensors : public core::RefCounted { public: //std::unordered_map values_; - absl::flat_hash_map values_; + absl::flat_hash_map values_; }; Tensors* tensors_; }; diff --git a/tensorflow/core/kernels/tensor_map_test.cc b/tensorflow/core/kernels/tensor_map_test.cc index 49f963bf950..16726b780de 100644 --- a/tensorflow/core/kernels/tensor_map_test.cc +++ b/tensorflow/core/kernels/tensor_map_test.cc @@ -12,9 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ + #include "tensorflow/core/kernels/tensor_map.h" #include "tensorflow/core/framework/tensor.h" #include "absl/container/flat_hash_map.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/variant.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" @@ -29,26 +32,108 @@ TEST(TensorMapTest, Empty) { EXPECT_EQ(tm.tensors().begin(), tm.tensors().end()); } -TEST(TensorMap, Copy) { - TensorMap tm; - TensorMap tmc = tm.Copy(); - EXPECT_EQ(tm.dtype(),tmc.dtype()); - EXPECT_EQ(tm.tensors(),tmc.tensors()); +TEST(TensorKeyTest, Equal) { + TensorKey k1 = Tensor(15); + TensorKey k2 = Tensor(15); + EXPECT_EQ(k1,k2); + + TensorKey k3 = Tensor(15); + TensorKey k4 = Tensor(37); + EXPECT_NE(k3,k4); } -TEST(TensorMap, Insert) { +TEST(TensorMapTest, Insert) { EXPECT_EQ(1,1); TensorMap tm; - Tensor k = Tensor(DT_INT64, TensorShape({1,1})); - Tensor v = Tensor(DT_INT64, TensorShape({2,3})); + TensorKey k = Tensor(11); + Tensor v = Tensor(22); tm.insert(k,v); - absl::flat_hash_map am; + absl::flat_hash_map am; am.try_emplace(k,v); - EXPECT_EQ(tm.tensors(), am); + + absl::flat_hash_map::iterator map_it = tm.tensors().begin(); + EXPECT_EQ(map_it->first, k); + test::ExpectTensorEqual(map_it->second, v); + map_it++; + EXPECT_EQ(map_it, tm.tensors().end()); } -//TODO(kattian): test Lookup, Erase +TEST(TensorMapTest, Lookup) { + TensorMap tm; + TensorKey k = Tensor(11); + Tensor v = Tensor(22); + tm.insert(k,v); + absl::flat_hash_map::iterator map_it = tm.find(k); + Tensor f = map_it->second; + EXPECT_EQ(map_it->first, k); + test::ExpectTensorEqual(f, v); +} + +TEST(TensorMapTest, Erase) { + TensorMap tm; + TensorKey k = Tensor(11); + Tensor v = Tensor(22); + tm.insert(k,v); + tm.erase(k); + EXPECT_EQ(tm.find(k), tm.tensors().end()); +} + +TEST(TensorMapTest, SameKeyInsert) { + TensorMap tm; + TensorKey k = Tensor(11); + Tensor v1 = Tensor(22); + Tensor v2 = Tensor(23); + bool b1 = tm.insert(k,v1); + bool b2 = tm.insert(k,v2); + EXPECT_EQ(b1, true); + EXPECT_EQ(b2, false); + absl::flat_hash_map::iterator map_it = tm.find(k); + EXPECT_EQ(map_it->first, k); + test::ExpectTensorEqual(map_it->second, v1); +} + +TEST(TensorMapTest, Replace) { + TensorMap tm; + TensorKey k = Tensor(11); + Tensor v1 = Tensor(22); + Tensor v2 = Tensor(23); + tm[k] = v2; + + absl::flat_hash_map::iterator map_it = tm.find(k); + EXPECT_EQ(map_it->first, k); + test::ExpectTensorEqual(map_it->second, v2); +} + +TEST(TensorMapTest, Copy) { + TensorMap tm; + TensorKey k = Tensor(11); + Tensor v = Tensor(22); + tm.insert(k,v); + TensorMap tmc = tm.Copy(); + EXPECT_EQ(tm.dtype(), tmc.dtype()); + EXPECT_EQ(tm.size(), tmc.size()); + EXPECT_NE(tm.find(k), tm.tensors().end()); + EXPECT_NE(tmc.find(k), tmc.tensors().end()); + EXPECT_EQ(tm.find(k)->first, tmc.find(k)->first); + test::ExpectTensorEqual(tm.find(k)->second, tmc.find(k)->second); +} + +/*TEST(TensorMapTest, EncodeDecode) { + TensorMap tm; + TensorKey k = Tensor(11); + Tensor v = Tensor(22); + tm.insert(k,v); + VariantTensorData* data; + TensorMap tmc = Decode(Encode(data)); + + EXPECT_EQ(tm.dtype(), tmc.dtype()); + EXPECT_EQ(tm.size(), tmc.size()); + EXPECT_NE(tm.find(k), tm.tensors().end()); + EXPECT_NE(tmc.find(k), tmc.tensors().end()); + EXPECT_EQ(tm.find(k)->first, tmc.find(k)->first); + test::ExpectTensorEqual(tm.find(k)->second, tmc.find(k)->second); +}*/ } // namespace } // namespace tensorflow From 0d5104e550d306259d09b920fddba76a70814f68 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Tue, 30 Jun 2020 07:22:28 +0000 Subject: [PATCH 0090/2522] EncodeDecode Test working --- tensorflow/core/kernels/tensor_map.cc | 1 + tensorflow/core/kernels/tensor_map_test.cc | 12 +++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/kernels/tensor_map.cc b/tensorflow/core/kernels/tensor_map.cc index 6245b01cf13..abeaf92390e 100644 --- a/tensorflow/core/kernels/tensor_map.cc +++ b/tensorflow/core/kernels/tensor_map.cc @@ -37,6 +37,7 @@ void TensorMap::Encode(VariantTensorData* data) const { *data->add_tensors() = k; *data->add_tensors() = v; } + map_it++; } string metadata; // TODO(b/118838800): Add a proto for storing the metadata. diff --git a/tensorflow/core/kernels/tensor_map_test.cc b/tensorflow/core/kernels/tensor_map_test.cc index 16726b780de..b93171b4f70 100644 --- a/tensorflow/core/kernels/tensor_map_test.cc +++ b/tensorflow/core/kernels/tensor_map_test.cc @@ -119,21 +119,23 @@ TEST(TensorMapTest, Copy) { test::ExpectTensorEqual(tm.find(k)->second, tmc.find(k)->second); } -/*TEST(TensorMapTest, EncodeDecode) { +TEST(TensorMapTest, EncodeDecode) { TensorMap tm; TensorKey k = Tensor(11); Tensor v = Tensor(22); tm.insert(k,v); - VariantTensorData* data; - TensorMap tmc = Decode(Encode(data)); - + VariantTensorData data; + tm.Encode(&data); + TensorMap tmc; + tmc.Decode(data); + EXPECT_EQ(tm.dtype(), tmc.dtype()); EXPECT_EQ(tm.size(), tmc.size()); EXPECT_NE(tm.find(k), tm.tensors().end()); EXPECT_NE(tmc.find(k), tmc.tensors().end()); EXPECT_EQ(tm.find(k)->first, tmc.find(k)->first); test::ExpectTensorEqual(tm.find(k)->second, tmc.find(k)->second); -}*/ +} } // namespace } // namespace tensorflow From 5817699a27210a317a198a829cea634e547f3515 Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Tue, 30 Jun 2020 18:21:07 +0000 Subject: [PATCH 0091/2522] Add complex type, Update the goldens for api_test --- tensorflow/python/types/core.py | 4 ++-- tensorflow/tools/api/golden/v1/tensorflow.experimental.pbtxt | 4 ++++ tensorflow/tools/api/golden/v2/tensorflow.experimental.pbtxt | 4 ++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/types/core.py b/tensorflow/python/types/core.py index 44bfa8fb517..0ac9d83983d 100644 --- a/tensorflow/python/types/core.py +++ b/tensorflow/python/types/core.py @@ -64,5 +64,5 @@ class Value(Tensor): pass -TensorLike = Union[Tensor, int, float, bool, str, tuple, list, np.ndarray] -tf_export("TensorLike").export_constant(__name__, "TensorLike") +TensorLike = Union[Tensor, int, float, bool, str, complex, tuple, list, np.ndarray] +tf_export("experimental.TensorLike").export_constant(__name__, "TensorLike") diff --git a/tensorflow/tools/api/golden/v1/tensorflow.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.experimental.pbtxt index c3a84b15dd6..4ce622ecd12 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.experimental.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.experimental.pbtxt @@ -4,6 +4,10 @@ tf_module { name: "Optional" mtype: "" } + member { + name: "TensorLike" + mtype: "" + } member_method { name: "async_clear_error" argspec: "args=[], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.experimental.pbtxt index 95e06075952..0ecda215eac 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.experimental.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.experimental.pbtxt @@ -4,6 +4,10 @@ tf_module { name: "Optional" mtype: "" } + member { + name: "TensorLike" + mtype: "" + } member { name: "dlpack" mtype: "" From 9c424e45882e4fed1097c518053c3e721db6af1d Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Tue, 30 Jun 2020 18:40:25 +0000 Subject: [PATCH 0092/2522] build updates --- tensorflow/core/BUILD | 4 +- tensorflow/core/framework/BUILD | 69 --------------------------------- tensorflow/python/BUILD | 3 ++ 3 files changed, 5 insertions(+), 71 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index cf5c1c4faed..5b8eac9759c 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -494,7 +494,6 @@ tf_cuda_library( "//tensorflow/core/framework:shared_ptr_variant.h", "//tensorflow/core/framework:stats_aggregator.h", "//tensorflow/core/framework:tensor.h", - "//tensorflow/core/framework:tensor_key.h", "//tensorflow/core/framework:tensor_shape.h", "//tensorflow/core/framework:tensor_slice.h", "//tensorflow/core/framework:tensor_types.h", @@ -1122,7 +1121,8 @@ cc_library( "//tensorflow/core/kernels:dataset_ops", # Depends on grappler "//tensorflow/core/kernels:list_kernels", # Depends on variant_op_registry.h "//tensorflow/core/kernels:map_kernels", - ], + "//tensorflow/core/kernels:tensor_map", + ], ) tf_cuda_library( diff --git a/tensorflow/core/framework/BUILD b/tensorflow/core/framework/BUILD index 40f355cc082..e09022d5235 100644 --- a/tensorflow/core/framework/BUILD +++ b/tensorflow/core/framework/BUILD @@ -763,75 +763,6 @@ tf_cuda_library( alwayslink = 1, ) -tf_cuda_library( - name = "tensor_key", - srcs = [ - "log_memory.cc", - "tensor.cc", - "typed_allocator.cc", - "types.cc", - "variant.cc", - "variant_op_registry.cc", - "variant_tensor_data.cc", - ], - hdrs = [ - "log_memory.h", - "register_types.h", - "tensor.h", - "tensor_key.h", - "typed_allocator.h", - "types.h", - "variant.h", - "variant_encode_decode.h", - "variant_op_registry.h", - "variant_tensor_data.h", - ], - visibility = [ - "//tensorflow/core:__pkg__", - "//tensorflow/core/util:__pkg__", - ], - deps = [ - ":allocation_description_proto_cc", - ":allocator", - ":bfloat16", - ":log_memory_proto_cc", - ":numeric_types", - ":resource_handle", - ":resource_handle_proto_cc", - ":tensor_description_proto_cc", - ":tensor_proto_cc", - ":tensor_shape", - ":tensor_types", - ":type_index", - ":type_traits", - ":types_proto_cc", - "//tensorflow/core/lib/core:coding", - "//tensorflow/core/lib/core:errors", - "//tensorflow/core/lib/core:refcount", - "//tensorflow/core/lib/core:status", - "//tensorflow/core/lib/core:stringpiece", - "//tensorflow/core/lib/gtl:array_slice", - "//tensorflow/core/lib/gtl:flatmap", - "//tensorflow/core/lib/gtl:inlined_vector", - "//tensorflow/core/lib/hash", - "//tensorflow/core/lib/strings:str_util", - "//tensorflow/core/lib/strings:strcat", - "//tensorflow/core/platform:abi", - "//tensorflow/core/platform:logging", - "//tensorflow/core/platform:macros", - "//tensorflow/core/platform:platform_port", - "//tensorflow/core/platform:protobuf", - "//tensorflow/core/platform:strcat", - "//tensorflow/core/platform:tensor_coding", - "//tensorflow/core/platform:types", - "//tensorflow/core/public:version", - "//third_party/eigen3", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", - ], - alwayslink = 1, -) - cc_library( name = "shape_inference", srcs = ["shape_inference.cc"], diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 3e59e61ae88..3c465252007 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -168,6 +168,7 @@ py_library( ":kernels", ":lib", ":list_ops", + ":map_ops", ":manip_ops", ":map_fn", ":math_ops", @@ -3521,6 +3522,7 @@ py_library( ":functional_ops_gen", ":gradients_util", ":list_ops", + ":map_ops", ":pywrap_tf_session", ":tensor_array_ops", ":tensor_shape", @@ -5002,6 +5004,7 @@ cuda_py_test( ":gradients", ":init_ops", ":list_ops", + ":map_ops", ":math_grad", ":math_ops", ":nn_grad", From 19632ba23d5478680c7951d0d20da270f0fa4162 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Tue, 30 Jun 2020 18:56:30 +0000 Subject: [PATCH 0093/2522] import guards --- tensorflow/core/BUILD | 2 +- tensorflow/core/framework/tensor_key.h | 6 +++++- tensorflow/core/kernels/tensor_map.h | 4 ++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 5b8eac9759c..87d71d01935 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1122,7 +1122,7 @@ cc_library( "//tensorflow/core/kernels:list_kernels", # Depends on variant_op_registry.h "//tensorflow/core/kernels:map_kernels", "//tensorflow/core/kernels:tensor_map", - ], + ], ) tf_cuda_library( diff --git a/tensorflow/core/framework/tensor_key.h b/tensorflow/core/framework/tensor_key.h index 8eff58b2dda..14875de5918 100644 --- a/tensorflow/core/framework/tensor_key.h +++ b/tensorflow/core/framework/tensor_key.h @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#ifndef TENSORFLOW_CORE_FRAMEWORK_TENSOR_KEY_H_ +#define TENSORFLOW_CORE_FRAMEWORK_TENSOR_KEY_H_ #include "tensorflow/core/framework/tensor.h" @@ -61,4 +63,6 @@ class TensorKey : public Tensor { } }; -} //namespace tensorflow \ No newline at end of file +} //namespace tensorflow + +#endif \ No newline at end of file diff --git a/tensorflow/core/kernels/tensor_map.h b/tensorflow/core/kernels/tensor_map.h index 4e23fd59e51..7da8283c655 100644 --- a/tensorflow/core/kernels/tensor_map.h +++ b/tensorflow/core/kernels/tensor_map.h @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_TENSOR_LIST_H_ -#define TENSORFLOW_CORE_KERNELS_TENSOR_LIST_H_ +#ifndef TENSORFLOW_CORE_KERNELS_TENSOR_MAP_H_ +#define TENSORFLOW_CORE_KERNELS_TENSOR_MAP_H_ #include From 880b51a36700277e9231d6ee73843de3b5bee65b Mon Sep 17 00:00:00 2001 From: amturati Date: Tue, 30 Jun 2020 19:37:26 +0000 Subject: [PATCH 0094/2522] fixed signature for util function to accept dimensions as a parameter --- tensorflow/c/eager/c_api_test_util.cc | 6 ++---- tensorflow/c/eager/c_api_test_util.h | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/tensorflow/c/eager/c_api_test_util.cc b/tensorflow/c/eager/c_api_test_util.cc index aae4ad2c6eb..0bd69c9e927 100644 --- a/tensorflow/c/eager/c_api_test_util.cc +++ b/tensorflow/c/eager/c_api_test_util.cc @@ -88,12 +88,10 @@ TFE_TensorHandle* TestMatrixTensorHandle(TFE_Context* ctx) { return th; } -TFE_TensorHandle* TestMatrixTensorHandleWithInput(TFE_Context* ctx, float data[]) { - int64_t dims[] = {2, 2}; - +TFE_TensorHandle* TestMatrixTensorHandleWithInput(TFE_Context* ctx, float data[], int64_t dims [], int num_dims) { TF_Status* status = TF_NewStatus(); TF_Tensor* t = TFE_AllocateHostTensor(ctx, TF_FLOAT, &dims[0], - sizeof(dims) / sizeof(int64_t), status); + num_dims, status); memcpy(TF_TensorData(t), &data[0], TF_TensorByteSize(t)); TFE_TensorHandle* th = TFE_NewTensorHandleFromTensor(ctx, t, status); CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); diff --git a/tensorflow/c/eager/c_api_test_util.h b/tensorflow/c/eager/c_api_test_util.h index 497264e7c01..eea4784cdac 100644 --- a/tensorflow/c/eager/c_api_test_util.h +++ b/tensorflow/c/eager/c_api_test_util.h @@ -34,8 +34,8 @@ TFE_TensorHandle* DoubleTestMatrixTensorHandle(TFE_Context* ctx); // Return a tensor handle containing a 2x2 matrix of floats TFE_TensorHandle* TestMatrixTensorHandle(TFE_Context* ctx); -//Return a tensor handle containing 2x2 matrix containing given data -TFE_TensorHandle* TestMatrixTensorHandleWithInput(TFE_Context* ctx, float data[]); +//Return a tensor handle containing 2D matrix containing given data and dimensions +TFE_TensorHandle* TestMatrixTensorHandleWithInput(TFE_Context* ctx, float data[], int64_t dims [], int num_dims); // Return a tensor handle containing a 100x100 matrix of floats TFE_TensorHandle* TestMatrixTensorHandle100x100(TFE_Context* ctx); From fc8f6095c4f0290b9bcec63a3190a0ab13beed62 Mon Sep 17 00:00:00 2001 From: amturati Date: Tue, 30 Jun 2020 19:38:40 +0000 Subject: [PATCH 0095/2522] updated tests to take in dims as parameters, and added graph tracing test for MatMul --- .../eager/c_api_unified_experimental_test.cc | 124 +++++++++++++++++- 1 file changed, 119 insertions(+), 5 deletions(-) diff --git a/tensorflow/c/eager/c_api_unified_experimental_test.cc b/tensorflow/c/eager/c_api_unified_experimental_test.cc index 9c738038e49..398830c35fe 100644 --- a/tensorflow/c/eager/c_api_unified_experimental_test.cc +++ b/tensorflow/c/eager/c_api_unified_experimental_test.cc @@ -105,10 +105,13 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul) { */ - // Build an abstract input tensor. + // Build an abstract input tensor. + int64_t dims [] = {2,2}; // Matrices will be 2 x 2 + int num_dims = sizeof(dims)/sizeof(dims[0]); + float vals [] = {0.0f,0.0f,0.0f,0.0f}; TFE_Context* eager_ctx = TF_ExecutionContextGetTFEContext(ctx); - TFE_TensorHandle* t = TestMatrixTensorHandleWithInput(eager_ctx, vals); + TFE_TensorHandle* t = TestMatrixTensorHandleWithInput(eager_ctx, vals, dims,num_dims); //, dims[0],dims[1]); TF_AbstractTensor* at = TF_CreateAbstractTensorFromEagerTensor(t, status.get()); // get abstract tensor @@ -175,10 +178,13 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul2) { */ - // Build 1st Matrix. + // Build 1st Matrix. + int64_t dims [] = {2,2}; // Matrices will be 2 x 2 + int num_dims = sizeof(dims)/sizeof(dims[0]); + float vals1 [] = {1.0f,2.0f,3.0f,4.0f}; TFE_Context* eager_ctx = TF_ExecutionContextGetTFEContext(ctx); - TFE_TensorHandle* t1 = TestMatrixTensorHandleWithInput(eager_ctx, vals1); + TFE_TensorHandle* t1 = TestMatrixTensorHandleWithInput(eager_ctx, vals1, dims, num_dims); TF_AbstractTensor* at1 = TF_CreateAbstractTensorFromEagerTensor(t1, status.get()); // get abstract tensor @@ -186,7 +192,7 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul2) { // Build 2nd Matrix. float vals2 [] = {5.0f,6.0f,7.0f,8.0f}; - TFE_TensorHandle* t2 = TestMatrixTensorHandleWithInput(eager_ctx, vals2); + TFE_TensorHandle* t2 = TestMatrixTensorHandleWithInput(eager_ctx, vals2, dims, num_dims); TF_AbstractTensor* at2 = TF_CreateAbstractTensorFromEagerTensor(t2, status.get()); // get abstract tensor @@ -243,6 +249,7 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul2) { TEST_P(UnifiedCAPI, TestBasicGraph) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); + // Start a new function / execution context. string fn_name = "double"; TF_ExecutionContext* graph_ctx = @@ -284,6 +291,7 @@ TEST_P(UnifiedCAPI, TestBasicGraph) { TF_ExecutionContextRegisterFunction(eager_execution_ctx, func, status.get()); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + // Build the abstract op to run the function. TF_AbstractOp* fn_op = TF_NewAbstractOp(eager_execution_ctx); TF_AbstractOpSetOpType(fn_op, fn_name.c_str(), status.get()); @@ -323,6 +331,112 @@ TEST_P(UnifiedCAPI, TestBasicGraph) { TF_DeleteExecutionContext(eager_execution_ctx); } +// Graph Tracing for MatMul +TEST_P(UnifiedCAPI, TestBasicGraphMatMul) { + std::unique_ptr status( + TF_NewStatus(), TF_DeleteStatus); + + // Start a new function / execution context. + string fn_name = "matrix_multiply"; + TF_ExecutionContext* graph_ctx = + TF_CreateFunction(fn_name.c_str(), status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + auto* placeholder_t = + TF_AddFunctionParameter(graph_ctx, TF_FLOAT, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + // Build an abstract operation. + auto* matmul_op = TF_NewAbstractOp(graph_ctx); + TF_AbstractOpSetOpType(matmul_op, "MatMul", status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + TF_AbstractOpSetOpName(matmul_op, "my_matmul", status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + // Build inputs and outputs. + TF_AbstractTensor* inputs[2] = {placeholder_t, placeholder_t}; + TF_OutputList* mm_outputs = TF_NewOutputList(); + + // Execute. + TF_ExecuteOperation(matmul_op, 2, inputs, mm_outputs, graph_ctx, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + // Clean up operation and inputs. + TF_DeleteAbstractOp(matmul_op); + + TF_AbstractFunction* func = + TF_FinalizeFunction(graph_ctx, mm_outputs, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + /* Now that the graph is built, test graph implementation on matmul example: + + [ [1,1] , * [ [1,1] , = [ [2,2], + [1,1] ] [1,1] ] [2,2] ] + + */ + + // Build eager context. + TFE_ContextOptions* opts = TFE_NewContextOptions(); + TF_ExecutionContext* eager_execution_ctx = + TF_NewEagerExecutionContext(opts, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + TFE_DeleteContextOptions(opts); + + TF_ExecutionContextRegisterFunction(eager_execution_ctx, func, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + // Build the abstract op to run the function. + TF_AbstractOp* fn_op = TF_NewAbstractOp(eager_execution_ctx); + TF_AbstractOpSetOpType(fn_op, fn_name.c_str(), status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + + // Build an abstract input tensor. + TFE_Context* eager_ctx = + TF_ExecutionContextGetTFEContext(eager_execution_ctx); + + float vals [] = {1.0f,1.0f,1.0f,1.0f}; + int64_t dims [] = {2,2}; // Matrices will be 2 x 2 + int num_dims = sizeof(dims)/sizeof(dims[0]); + + TFE_TensorHandle* input_eager = TestMatrixTensorHandleWithInput(eager_ctx, vals, dims, num_dims); + TF_AbstractTensor* input_t = + TF_CreateAbstractTensorFromEagerTensor(input_eager, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + TF_OutputListSetNumOutputs(mm_outputs, 1, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + TF_ExecuteOperation(fn_op, 1, &input_t, mm_outputs, eager_execution_ctx, + status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + ASSERT_EQ(1, TF_OutputListNumOutputs(mm_outputs)); + TF_AbstractTensor* final_result = TF_OutputListGet(mm_outputs, 0); + TFE_TensorHandle* final = + TF_AbstractTensorGetEagerTensor(final_result, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + TF_Tensor* f_t = TFE_TensorHandleResolve(final, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + float result_data[4] = {0}; + memcpy(&result_data[0], TF_TensorData(f_t), TF_TensorByteSize(f_t)); + + int data_len = 4; + for(int i = 0; i < data_len; i++){ + ASSERT_EQ(result_data[i], 2.0f); + } + + TF_DeleteOutputList(mm_outputs); + TF_DeleteAbstractOp(fn_op); + TF_DeleteAbstractTensor(input_t); + TF_DeleteAbstractTensor(final_result); + TF_DeleteTensor(f_t); + TF_DeleteAbstractFunction(func); + + TF_DeleteExecutionContext(eager_execution_ctx); +} + + TEST_P(UnifiedCAPI, TestMultiOutputGraph) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); From 13a63576819626c58d73429f88d98bed7de48774 Mon Sep 17 00:00:00 2001 From: Reed Date: Tue, 30 Jun 2020 13:46:31 -0700 Subject: [PATCH 0096/2522] Fix tf32. Before, allowing TF32 would have no effect. The issue was the tf32_util.cc file was linked in twice, so there were two copies of the `tf32_allowed` global variable. Also add a test. --- tensorflow/core/platform/BUILD | 5 +++ tensorflow/python/BUILD | 2 +- tensorflow/python/framework/config_test.py | 38 ++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD index 33a1e7cfe0a..8b7c64e35d7 100644 --- a/tensorflow/core/platform/BUILD +++ b/tensorflow/core/platform/BUILD @@ -945,6 +945,11 @@ cc_library( copts = tf_copts(), ) +filegroup( + name = "tf32_hdr", + srcs = ["tf32_utils.h"], +) + tf_cc_tests( name = "low_level_library_tests", size = "small", diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 1aae054387b..77acca8c250 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -791,9 +791,9 @@ tf_python_pybind_extension( tf_python_pybind_extension( name = "_pywrap_tf32_execution", srcs = ["util/tf32.cc"], + hdrs = ["//tensorflow/core/platform:tf32_hdr"], module_name = "_pywrap_tf32_execution", deps = [ - "//tensorflow/core/platform:tf32_utils", "@pybind11", ], ) diff --git a/tensorflow/python/framework/config_test.py b/tensorflow/python/framework/config_test.py index 65845535ea7..81f8df69e99 100644 --- a/tensorflow/python/framework/config_test.py +++ b/tensorflow/python/framework/config_test.py @@ -756,6 +756,44 @@ class DeviceTest(test.TestCase): new_rewrite_options.scoped_allocator_opts.enable_op) +class TensorFloat32Test(test.TestCase): + + def setUp(self): + if not test_util.is_gpu_available(cuda_only=True, + min_cuda_compute_capability=(8, 0)): + self.skipTest('TensorFloat-32 requires an NVIDIA GPU with compute ' + 'capability of at least 8.0') + + def tearDown(self): + config.allow_tensor_float_32_execution(False) + + def test_tf32_enabled(self): + self.assertFalse(config.tensor_float_32_execution_allowed()) + config.allow_tensor_float_32_execution(True) + self.assertTrue(config.tensor_float_32_execution_allowed()) + + x = array_ops.fill((8, 8), 1 + 2 ** -20) + y = array_ops.ones((8, 8)) + out = math_ops.matmul(x, y) + # In tf32, each element of x is rounded to 1, so the output will be 8s. + expected = array_ops.fill((8, 8), 8) + self.assertAllEqual(out, expected) + + def test_tf32_disabled(self): + x = array_ops.fill((8, 8), 1 + 2 ** -20) + y = array_ops.ones((8, 8)) + out = math_ops.matmul(x, y) + expected = array_ops.fill((8, 8), 8 * (1 + 2 ** -20)) + self.assertAllEqual(out, expected) + + # Test disabling tf32 after enabling it works correctly + config.allow_tensor_float_32_execution(True) + config.allow_tensor_float_32_execution(False) + self.assertFalse(config.tensor_float_32_execution_allowed()) + out = math_ops.matmul(x, y) + self.assertAllEqual(out, expected) + + if __name__ == '__main__': ops.enable_eager_execution() test.main() From ae28e95da12e04e613478c3e74f777be90ba009b Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Tue, 30 Jun 2020 20:55:38 +0000 Subject: [PATCH 0097/2522] Change annotation check from TensorLike to Tensor --- tensorflow/python/eager/function.py | 9 +++++---- tensorflow/python/eager/function_test.py | 11 +++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index cb3cc6d76d0..6ef6557b58a 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -74,7 +74,6 @@ from tensorflow.python.util import nest from tensorflow.python.util import object_identity from tensorflow.python.util import tf_decorator from tensorflow.python.util import tf_inspect -from tensorflow.python.types import core as core_tf_types # Loaded lazily due to a circular dependency (roughly # tf.function->autograph->->dataset->tf.function). @@ -2544,18 +2543,20 @@ class FunctionSpec(object): if i < len(self._fullargspec.args): arg_annotation = self._fullargspec.annotations.get( self._fullargspec.args[i]) - if arg_annotation == core_tf_types.TensorLike: + # TODO(rahulkamat): Once TensorLike is ready, change the following conditional statements + # to check if the input arg is annotated with TensorLike + if arg_annotation == ops.Tensor: args[i] = ops.convert_to_tensor(arg) else: varargs_annotation = self._fullargspec.annotations.get( self._fullargspec.varargs) - if varargs_annotation == core_tf_types.TensorLike: + if varargs_annotation == ops.Tensor: args[i] = ops.convert_to_tensor(arg) if self._fullargspec.varkw is not None: varkw_annotation = self._fullargspec.annotations.get( self._fullargspec.varkw) - if varkw_annotation == core_tf_types.TensorLike: + if varkw_annotation == ops.Tensor: kwargs = {kw: ops.convert_to_tensor(x) for kw, x in kwargs.items()} return tuple(args), kwargs diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 3fda7630d73..6965024c4b0 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -80,7 +80,6 @@ from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.ops.structured import structured_tensor from tensorflow.python.platform import test from tensorflow.python.training import training_ops -from tensorflow.python.types import core as core_tf_types from tensorflow.python.util import compat from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect @@ -3934,7 +3933,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): def testTraceWithAnnotationsBasic(self): trace_count = [0] - def func(x: core_tf_types.TensorLike): + def func(x: ops.Tensor): trace_count[0] += 1 return x @@ -3954,7 +3953,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): def testTraceWithAnnotationsWithArgs(self): trace_count = [0] - def func(*args: core_tf_types.TensorLike): + def func(*args: ops.Tensor): trace_count[0] += 1 return args @@ -3975,7 +3974,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): def testTraceWithAnnotationsWithKwargs(self): trace_count = [0] - def func(t: core_tf_types.TensorLike, **kwargs: core_tf_types.TensorLike): + def func(t: ops.Tensor, **kwargs: ops.Tensor): trace_count[0] += 1 return t @@ -3993,8 +3992,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase): def testTraceWithAnnotationsWithMultipleInputTypes(self): trace_count = [0] - def func(t: core_tf_types.TensorLike, *args: core_tf_types.TensorLike, - **kwargs: core_tf_types.TensorLike): + def func(t: ops.Tensor, *args: ops.Tensor, + **kwargs: ops.Tensor): trace_count[0] += 1 return t From fe5adc988123182ddabbfd46e9726de0acbdbae9 Mon Sep 17 00:00:00 2001 From: Reed Date: Tue, 30 Jun 2020 14:23:42 -0700 Subject: [PATCH 0098/2522] Update symbols_pybind.txt --- tensorflow/tools/def_file_filter/symbols_pybind.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/tools/def_file_filter/symbols_pybind.txt b/tensorflow/tools/def_file_filter/symbols_pybind.txt index 07f5906aa08..a95740eaa97 100644 --- a/tensorflow/tools/def_file_filter/symbols_pybind.txt +++ b/tensorflow/tools/def_file_filter/symbols_pybind.txt @@ -369,3 +369,7 @@ tensorflow::grappler::CostAnalyzer::GenerateReport [flags] # tfe tensorflow::IsXlaEnabled tensorflow::GetMlirCommonFlags + +[tf32_util] # tf32 +tensorflow::allow_tf32_execution +tensorflow::tf32_execution_allowed From 433215c623785db7261fc2bb0196af99735970ae Mon Sep 17 00:00:00 2001 From: Reed Date: Tue, 30 Jun 2020 14:27:42 -0700 Subject: [PATCH 0099/2522] Add tf32_utils target to win_lib_files_for_exported_symbols --- tensorflow/python/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 77acca8c250..bbcdf55a31c 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -6145,6 +6145,7 @@ filegroup( "//tensorflow/core/grappler/graph_analyzer:graph_analyzer_tool", # graph_analyzer "//tensorflow/core/grappler/optimizers:meta_optimizer", # tf_optimizer "//tensorflow/core/grappler/utils:topological_sort", # tf_item + "//tensorflow/core/platform:tf32_utils", # tf32 "//tensorflow/core/profiler/internal:annotation_stack_impl", # profiler "//tensorflow/core/profiler/internal:print_model_analysis", # tfprof "//tensorflow/core/profiler/internal:traceme_recorder_impl", # profiler From 8d0a27ba809c9441a0a152d740cdf67368a8d221 Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Tue, 30 Jun 2020 21:33:17 +0000 Subject: [PATCH 0100/2522] Update boolean flag name and docstrings --- tensorflow/python/eager/def_function.py | 23 ++++++++++---------- tensorflow/python/eager/function.py | 27 ++++++++++++------------ tensorflow/python/eager/function_test.py | 17 ++++++++------- 3 files changed, 34 insertions(+), 33 deletions(-) diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py index 6cadb5a021c..46be11de2f5 100644 --- a/tensorflow/python/eager/def_function.py +++ b/tensorflow/python/eager/def_function.py @@ -457,7 +457,7 @@ class Function(object): experimental_autograph_options=None, experimental_relax_shapes=False, experimental_compile=None, - experimental_type_tracing=False): + experimental_follow_type_hints=False): """Initializes a `Function`. Args: @@ -513,8 +513,7 @@ class Function(object): executor). Set this value to `False` when directly running a multi-device function on TPUs (e.g. two TPU cores, one TPU core and its host CPU). - experimental_type_tracing: When true, arguments type annotated with - tf.TensorLike will be treated as if they were a tensor. + experimental_follow_type_hints: See the documentation for `tf.function`. Raises: ValueError: if `input_signature` is not None and the `python_function`'s @@ -524,7 +523,7 @@ class Function(object): self._python_function = python_function self._function_spec = function_lib.FunctionSpec.from_function_and_signature( python_function, input_signature, - experimental_type_tracing=experimental_type_tracing) + experimental_follow_type_hints=experimental_follow_type_hints) self._implements = experimental_implements # If `True`, the function uses the rendezvous of the parent. This is only # needed to support code where raw send/recv operations are inserted and @@ -534,7 +533,7 @@ class Function(object): self._experimental_autograph_options = experimental_autograph_options self._experimental_relax_shapes = experimental_relax_shapes self._experimental_compile = experimental_compile - self._experimental_type_tracing = experimental_type_tracing + self._experimental_follow_type_hints = experimental_follow_type_hints self._created_variables = None # GUARDED_BY(self._lock) self._stateful_fn = None # GUARDED_BY(self._lock) self._stateless_fn = None # GUARDED_BY(self._lock) @@ -664,7 +663,7 @@ class Function(object): autograph=self._autograph, experimental_autograph_options=self._experimental_autograph_options, experimental_compile=self._experimental_compile, - experimental_type_tracing=self._experimental_type_tracing, + experimental_follow_type_hints=self._experimental_follow_type_hints, experimental_relax_shapes=self._experimental_relax_shapes) def _initialize(self, args, kwds, add_initializers_to=None): @@ -724,7 +723,7 @@ class Function(object): experimental_autograph_options=self._experimental_autograph_options, experimental_relax_shapes=self._experimental_relax_shapes, experimental_compile=self._experimental_compile, - experimental_type_tracing=self._experimental_type_tracing) + experimental_follow_type_hints=self._experimental_follow_type_hints) if self._shared_rendezvous: f._shared_rendezvous = self._shared_rendezvous # pylint: disable=protected-access @@ -1212,7 +1211,7 @@ def function(func=None, experimental_autograph_options=None, experimental_relax_shapes=False, experimental_compile=None, - experimental_type_tracing=False): + experimental_follow_type_hints=False): """Compiles a function into a callable TensorFlow graph. `tf.function` constructs a callable that executes a TensorFlow graph @@ -1415,8 +1414,10 @@ def function(func=None, experimental_compile: If True, the function is always compiled by [XLA](https://www.tensorflow.org/xla). XLA may be more efficient in some cases (e.g. TPU, XLA_GPU, dense tensor computations). - experimental_type_tracing: When true, arguments type annotated with - tf.TensorLike will be treated as if they were a tensor. + experimental_follow_type_hints: When true, arguments type annotated with + tf.Tensor will be treated as if they were a tensor. This will avoid + unnecessary retracing and give a boost to the usability and performance + of `tf.function`. Returns: If `func` is not None, returns a callable that will execute the compiled @@ -1448,7 +1449,7 @@ def function(func=None, experimental_relax_shapes=experimental_relax_shapes, experimental_compile=experimental_compile, experimental_implements=experimental_implements, - experimental_type_tracing=experimental_type_tracing)) + experimental_follow_type_hints=experimental_follow_type_hints)) # This code path is for the `foo = tf.function(foo, ...)` use case if func is not None: diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 6ef6557b58a..c39754be274 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -2314,7 +2314,7 @@ class FunctionSpec(object): @staticmethod def from_function_and_signature(python_function, input_signature, is_pure=False, - experimental_type_tracing=False): + experimental_follow_type_hints=False): """Create a FunctionSpec instance given a python function and signature. Args: @@ -2400,7 +2400,7 @@ class FunctionSpec(object): return FunctionSpec( fullargspec, is_method, input_signature, - is_pure=is_pure, experimental_type_tracing=experimental_type_tracing, + is_pure=is_pure, experimental_follow_type_hints=experimental_follow_type_hints, name=name) def __init__(self, @@ -2408,7 +2408,7 @@ class FunctionSpec(object): is_method, input_signature, is_pure=False, - experimental_type_tracing=False, + experimental_follow_type_hints=False, name=None): """Constructs a FunctionSpec describing a python function. @@ -2423,7 +2423,7 @@ class FunctionSpec(object): self._fullargspec = fullargspec self._is_method = is_method self._is_pure = is_pure - self._experimental_type_tracing = experimental_type_tracing + self._experimental_follow_type_hints = experimental_follow_type_hints # TODO(edloper): Include name when serializing for SavedModel? self._name = name or "f" @@ -2493,8 +2493,8 @@ class FunctionSpec(object): return self._is_pure @property - def experimental_type_tracing(self): - return self._experimental_type_tracing + def experimental_follow_type_hints(self): + return self._experimental_follow_type_hints @property def arg_names(self): @@ -2593,7 +2593,7 @@ class FunctionSpec(object): """ if self._is_pure: args, kwargs = self._convert_variables_to_tensors(args, kwargs) - if self._experimental_type_tracing: + if self._experimental_follow_type_hints: args, kwargs = self._convert_typed_variables_to_tensors(args, kwargs) if self._input_signature is not None: if len(args) > len(self._input_signature): @@ -2827,7 +2827,7 @@ class Function(object): experimental_relax_shapes=False, capture_by_value=None, experimental_compile=None, - experimental_type_tracing=False): + experimental_follow_type_hints=False): """Initializes a `Function`. Args: @@ -2851,8 +2851,7 @@ class Function(object): default to False. experimental_compile: Force-compile the function with XLA, cf. def_function.Function doc on experimental_compile. - experimental_type_tracing: When true, arguments type annotated with - tf.TensorLike will be treated as if they were a tensor. + experimental_follow_type_hints: See the documentation for `tf.function`. Raises: ValueError: if `input_signature` is not None and the `python_function`'s @@ -2862,7 +2861,7 @@ class Function(object): pure_function = attributes and IMPLEMENTS_ATTRIBUTE_NAME in attributes self._function_spec = FunctionSpec.from_function_and_signature( python_function, input_signature, is_pure=pure_function, - experimental_type_tracing=experimental_type_tracing) + experimental_follow_type_hints=experimental_follow_type_hints) self._name = name self._autograph = autograph self._autograph_options = autograph_options @@ -2878,7 +2877,7 @@ class Function(object): # functions for each instance. self._descriptor_cache = weakref.WeakKeyDictionary() self._experimental_compile = experimental_compile - self._experimental_type_tracing = experimental_type_tracing + self._experimental_follow_type_hints = experimental_follow_type_hints def __call__(self, *args, **kwargs): """Calls a graph function specialized to the inputs.""" @@ -3655,7 +3654,7 @@ def defun_with_attributes(func=None, autograph=True, experimental_autograph_options=None, experimental_compile=None, - experimental_type_tracing=False, + experimental_follow_type_hints=False, experimental_relax_shapes=False): """Compiles a Python function into a callable TensorFlow graph. @@ -3705,7 +3704,7 @@ def defun_with_attributes(func=None, autograph=autograph, autograph_options=experimental_autograph_options, experimental_compile=experimental_compile, - experimental_type_tracing=experimental_type_tracing, + experimental_follow_type_hints=experimental_follow_type_hints, experimental_relax_shapes=experimental_relax_shapes)) # This code path is for the `foo = tfe.defun(foo, ...)` use case diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 6965024c4b0..c0235a14a07 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -118,6 +118,7 @@ def _spec_for_value(value): return value + class FunctionTest(test.TestCase, parameterized.TestCase): def setUp(self): @@ -3937,8 +3938,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase): trace_count[0] += 1 return x - enabled = def_function.function(func, experimental_type_tracing=True) - disabled = def_function.function(func, experimental_type_tracing=False) + enabled = def_function.function(func, experimental_follow_type_hints=True) + disabled = def_function.function(func, experimental_follow_type_hints=False) enabled(1) # Initial call gets traced enabled(2) @@ -3957,8 +3958,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase): trace_count[0] += 1 return args - enabled = def_function.function(func, experimental_type_tracing=True) - disabled = def_function.function(func, experimental_type_tracing=False) + enabled = def_function.function(func, experimental_follow_type_hints=True) + disabled = def_function.function(func, experimental_follow_type_hints=False) args = ("abc", "def",) * 20 args2 = ("def", "abc",) * 20 @@ -3978,8 +3979,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase): trace_count[0] += 1 return t - enabled = def_function.function(func, experimental_type_tracing=True) - disabled = def_function.function(func, experimental_type_tracing=False) + enabled = def_function.function(func, experimental_follow_type_hints=True) + disabled = def_function.function(func, experimental_follow_type_hints=False) enabled(1, x=1, y=1.0, z="one") enabled(2, x=2, y=2.0, z="two") @@ -3997,8 +3998,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase): trace_count[0] += 1 return t - enabled = def_function.function(func, experimental_type_tracing=True) - disabled = def_function.function(func, experimental_type_tracing=False) + enabled = def_function.function(func, experimental_follow_type_hints=True) + disabled = def_function.function(func, experimental_follow_type_hints=False) enabled(1, constant_op.constant(1), "str", x=4.0) enabled(2, constant_op.constant(2), "str2", x=5.0) From b1f5d9e26125b4ad62c4566e4c2ddd784ce625bc Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Tue, 30 Jun 2020 22:44:18 +0000 Subject: [PATCH 0101/2522] Add tests to validate only parameters typed with ops.Tensor are converted to Tensors --- tensorflow/python/eager/function_test.py | 64 ++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index c0235a14a07..7963ed7b773 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -3932,7 +3932,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): gradients(constant_op.constant([[[1.0], [2.0]]])) # No error is raised - def testTraceWithAnnotationsBasic(self): + def testFollowTypeHintsTraceBasic(self): trace_count = [0] def func(x: ops.Tensor): trace_count[0] += 1 @@ -3952,7 +3952,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): disabled(3) # Retrace self.assertEqual(trace_count[0], 3) - def testTraceWithAnnotationsWithArgs(self): + def testFollowTypeHintsTraceWithArgs(self): trace_count = [0] def func(*args: ops.Tensor): trace_count[0] += 1 @@ -3973,7 +3973,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): disabled(args2) # Retrace self.assertEqual(trace_count[0], 2) - def testTraceWithAnnotationsWithKwargs(self): + def testFollowTypeHintsTraceWithKwargs(self): trace_count = [0] def func(t: ops.Tensor, **kwargs: ops.Tensor): trace_count[0] += 1 @@ -3991,7 +3991,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): disabled(2, x=2, y=2.0, z="two") # Retrace self.assertEqual(trace_count[0], 2) - def testTraceWithAnnotationsWithMultipleInputTypes(self): + def testFollowTypeHintsTraceWithMultipleInputTypes(self): trace_count = [0] def func(t: ops.Tensor, *args: ops.Tensor, **kwargs: ops.Tensor): @@ -4010,6 +4010,62 @@ class FunctionTest(test.TestCase, parameterized.TestCase): disabled(2, constant_op.constant(2), "str2", x=5.0) # Retrace self.assertEqual(trace_count[0], 2) + def testFollowTypeHintsTraceWithOnlyArgNamed(self): + trace_count = [0] + def func(t: ops.Tensor, i: int = 1, **kwargs): + trace_count[0] += 1 + return t + + enabled = def_function.function(func, experimental_follow_type_hints=True) + + trace_count = [0] + enabled(1, 3, x=4.0, y="str") + enabled(2, 4, x=4.0, y="str") # Retrace + self.assertEqual(trace_count[0], 2) + + def testFollowTypeHintsTraceWithNotAllNamed(self): + trace_count = [0] + def func(x, y: ops.Tensor, z: int): + trace_count[0] += 1 + return x + + enabled = def_function.function(func, experimental_follow_type_hints=True) + + enabled(1, 2, 3) + enabled(1, 20, 3) # No retrace - change in ops.Tensor typed arg + enabled(2, 2, 3) # Retrace - change in untyped arg + enabled(2, 2, 4) # Retrace - change in typed arg + self.assertEqual(trace_count[0], 3) + + def testFollowTypeHintsTraceWithOnlyArgsNamed(self): + trace_count = [0] + def func(x, y, *args: ops.Tensor): + trace_count[0] += 1 + return x + + enabled = def_function.function(func, experimental_follow_type_hints=True) + + trace_count = [0] + enabled(1, 20, 3, 4, 5, 6) + enabled(1, 20, 3, 4, 5, 60) # No retrace - change in *args + enabled(1, 30, 7, 8, 9, 10) # Retrace - change in args + self.assertEqual(trace_count[0], 2) + + def testFollowTypeHintsTraceWithOnlyKwargsNamed(self): + trace_count = [0] + def func(x, y, *args, **kwargs: ops.Tensor): + trace_count[0] += 1 + return x + + enabled = def_function.function(func, experimental_follow_type_hints=True) + + trace_count = [0] + enabled(1, 2, 3, 4, 5, 6, a=1.0, b=2.0, c=3.0) + enabled(1, 2, 3, 4, 5, 6, a=1.5, b=2.5, c=3.5) # No retrace - change in **kwargs + enabled(100, 2, 3, 4, 5, 6, a=1.0, b=2.0, c=3.0) # Retrace - change in args + enabled(1, 2, 3, 4, 5, 100, a=1.0, b=2.0, c=3.0) # Retrace - change in *args + self.assertEqual(trace_count[0], 3) + class MultiDeviceTest(test.TestCase, parameterized.TestCase): @test_util.run_gpu_only From 3bacebdf416863a4071245f78ea9e7eaaf5ee19b Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Tue, 30 Jun 2020 23:15:53 +0000 Subject: [PATCH 0102/2522] Update docstring description, Link docs for getfullargspec --- tensorflow/python/eager/def_function.py | 7 +++---- tensorflow/python/eager/function.py | 4 +++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py index 46be11de2f5..4bc62ce24e2 100644 --- a/tensorflow/python/eager/def_function.py +++ b/tensorflow/python/eager/def_function.py @@ -1414,10 +1414,9 @@ def function(func=None, experimental_compile: If True, the function is always compiled by [XLA](https://www.tensorflow.org/xla). XLA may be more efficient in some cases (e.g. TPU, XLA_GPU, dense tensor computations). - experimental_follow_type_hints: When true, arguments type annotated with - tf.Tensor will be treated as if they were a tensor. This will avoid - unnecessary retracing and give a boost to the usability and performance - of `tf.function`. + experimental_follow_type_hints: When True, the function may use type annotations + to optimize the tracing performance. For example, arguments annotated with + `tf.Tensor` will automatically be converted to a Tensor. Returns: If `func` is not None, returns a callable that will execute the compiled diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index c39754be274..75260393027 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -2400,7 +2400,8 @@ class FunctionSpec(object): return FunctionSpec( fullargspec, is_method, input_signature, - is_pure=is_pure, experimental_follow_type_hints=experimental_follow_type_hints, + is_pure=is_pure, + experimental_follow_type_hints=experimental_follow_type_hints, name=name) def __init__(self, @@ -2540,6 +2541,7 @@ class FunctionSpec(object): args = list(args) for i, arg in enumerate(args): + # See https://docs.python.org/3/library/inspect.html#inspect.getfullargspec for details on fullargspec if i < len(self._fullargspec.args): arg_annotation = self._fullargspec.annotations.get( self._fullargspec.args[i]) From b5bafaf4e77267085d1040ffbf78bf01ea31b6e1 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Tue, 30 Jun 2020 23:20:45 +0000 Subject: [PATCH 0103/2522] moved from tf_tstring to tstring for readability and fixed formatting --- ...-summary_op-needs-tstring-C-API-sync.patch | 377 ------------------ tensorflow/c/kernels/diff.patch | 0 tensorflow/c/kernels/ops/summary.cc | 30 +- tensorflow/c/kernels/summary_op.cc | 100 +++-- tensorflow/c/kernels/summary_op_test.cc | 30 +- tensorflow/core/kernels/summary_op.cc | 2 +- 6 files changed, 84 insertions(+), 455 deletions(-) delete mode 100644 tensorflow/c/kernels/0001-summary_op-needs-tstring-C-API-sync.patch delete mode 100644 tensorflow/c/kernels/diff.patch diff --git a/tensorflow/c/kernels/0001-summary_op-needs-tstring-C-API-sync.patch b/tensorflow/c/kernels/0001-summary_op-needs-tstring-C-API-sync.patch deleted file mode 100644 index 856f4a554c3..00000000000 --- a/tensorflow/c/kernels/0001-summary_op-needs-tstring-C-API-sync.patch +++ /dev/null @@ -1,377 +0,0 @@ -From 9134fbb13794865a45288d2e722ad47c362e0ae4 Mon Sep 17 00:00:00 2001 -From: Daniel Nguyen -Date: Thu, 18 Jun 2020 23:13:11 +0000 -Subject: [PATCH] summary_op needs tstring C API sync - ---- - tensorflow/c/kernels/diff.patch | 0 - tensorflow/c/kernels/ops/summary.cc | 70 ++++++++++ - tensorflow/c/kernels/summary_op.cc | 171 ++++++++++++++++++++++++ - tensorflow/c/kernels/summary_op_test.cc | 96 +++++++++++++ - 4 files changed, 337 insertions(+) - create mode 100644 tensorflow/c/kernels/diff.patch - create mode 100644 tensorflow/c/kernels/ops/summary.cc - create mode 100644 tensorflow/c/kernels/summary_op.cc - create mode 100644 tensorflow/c/kernels/summary_op_test.cc - -diff --git a/tensorflow/c/kernels/diff.patch b/tensorflow/c/kernels/diff.patch -new file mode 100644 -index 0000000000..e69de29bb2 -diff --git a/tensorflow/c/kernels/ops/summary.cc b/tensorflow/c/kernels/ops/summary.cc -new file mode 100644 -index 0000000000..550a663d00 ---- /dev/null -+++ b/tensorflow/c/kernels/ops/summary.cc -@@ -0,0 +1,70 @@ -+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. -+ -+Licensed under the Apache License, Version 2.0 (the "License"); -+you may not use this file except in compliance with the License. -+You may obtain a copy of the License at -+ -+ http://www.apache.org/licenses/LICENSE-2.0 -+ -+Unless required by applicable law or agreed to in writing, software -+distributed under the License is distributed on an "AS IS" BASIS, -+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+See the License for the specific language governing permissions and -+limitations under the License. -+==============================================================================*/ -+ -+#include -+#include -+ -+#include "tensorflow/c/ops.h" -+#include "tensorflow/core/framework/selective_registration.h" -+#include "tensorflow/core/platform/logging.h" -+#include "tensorflow/core/platform/macros.h" -+ -+ -+static void TF_ScalarSummary_shape_inference_fn(TF_ShapeInferenceContext* ctx, -+ TF_Status* status) { -+ TF_ShapeHandle* result = TF_NewShapeHandle(); -+ // TODO: what to do in the case of unknown input shape? -+ if (TF_GetCode(status) == TF_OK && -+ !TF_ShapeInferenceContextRankKnown(ctx, result)) { -+ TF_ShapeInferenceContextSetUnknownShape(ctx, status); -+ CHECK_EQ(TF_OK, TF_GetCode(status)) -+ << "Error while setting unknown shape function"; -+ TF_DeleteShapeHandle(result); -+ return; -+ } -+ // make shape handle a scalar value (empty shape) -+ if (TF_GetCode(status) == TF_OK) { -+ TF_ShapeInferenceContextSetOutput(ctx, 0, result, status); -+ CHECK_EQ(TF_OK, TF_GetCode(status)) -+ << "Error while setting shape function"; -+ } -+ TF_DeleteShapeHandle(result); -+} -+ -+void Register_ScalarSummaryOp() { -+ TF_Status* status = TF_NewStatus(); -+ -+ TF_OpDefinitionBuilder* op_builder = TF_NewOpDefinitionBuilder("SummaryScalar"); -+ TF_OpDefinitionBuilderAddInput(op_builder, "tags: string"); -+ TF_OpDefinitionBuilderAddInput(op_builder, "values: T"); -+ TF_OpDefinitionBuilderAddOutput(op_builder, "summary: string"); -+ TF_OpDefinitionBuilderAddAttr( -+ op_builder, -+ "T: realnumbertype"); -+ TF_OpDefinitionBuilderSetShapeInferenceFunction(op_builder, -+ &TF_ScalarSummary_shape_inference_fn); -+ -+ TF_RegisterOpDefinition(op_builder, status); -+ CHECK_EQ(TF_GetCode(status), TF_OK) -+ << "TF_ScalarSummary op registration failed: " << TF_Message(status); -+ TF_DeleteStatus(status); -+} -+ -+TF_ATTRIBUTE_UNUSED static bool SummaryScalarOpRegistered = []() { -+ if (SHOULD_REGISTER_OP("SummaryScalar")) { -+ Register_ScalarSummaryOp(); -+ } -+ return true; -+}(); -diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc -new file mode 100644 -index 0000000000..3a78d321d7 ---- /dev/null -+++ b/tensorflow/c/kernels/summary_op.cc -@@ -0,0 +1,171 @@ -+ -+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. -+ -+Licensed under the Apache License, Version 2.0 (the "License"); -+you may not use this file except in compliance with the License. -+You may obtain a copy of the License at -+ -+ http://www.apache.org/licenses/LICENSE-2.0 -+ -+Unless required by applicable law or agreed to in writing, software -+distributed under the License is distributed on an "AS IS" BASIS, -+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+See the License for the specific language governing permissions and -+limitations under the License. -+==============================================================================*/ -+ -+#include -+ -+#include "tensorflow/c/kernels.h" -+#include "tensorflow/c/ops.h" -+#include "tensorflow/c/tf_tensor.h" -+#include "tensorflow/core/framework/common_shape_fns.h" -+#include "tensorflow/core/framework/op.h" -+#include "tensorflow/core/framework/selective_registration.h" -+#include "tensorflow/core/framework/shape_inference.h" -+#include "tensorflow/core/platform/macros.h" -+#include "tensorflow/core/framework/summary.pb.h" -+#include "tensorflow/core/platform/protobuf.h" -+#include "tensorflow/core/framework/register_types.h" -+ -+#include "tensorflow/core/framework/types.h" -+ -+// BitcastOp implements a bitcast kernel, creating an output tensor that shares -+// the same data buffer as the input but with a different shape and/or data -+// type. Its inputs are: -+// -+// * the input tensor -+// * an attribute named "T" containing the TF_DataType of the input tensor -+// * an attribute named "type" containing the TF_DataType of the output tensor -+// -+// Given an input tensor of shape [...], if the input DataType "T" is larger -+// than the output DataType "type", then the shape changes from [...] -+// to [..., sizeof(T)/sizeof(type)]. -+// -+// If "T" is smaller than "type", the operator requires that the rightmost -+// dimension be equal to sizeof(type)/sizeof(T). The shape then goes from -+// [..., sizeof(type)/sizeof(T)] to [...]. -+// -+// Bitcast is implemented as a low-level cast, so machines with different endian -+// orderings will give different results. -+ -+static void* SummaryScalarOp_Create(TF_OpKernelConstruction* ctx) { -+ // TODO: replace with a void* pointer type later -+ int a = 4; -+ return static_cast(&a); -+} -+ -+static void SummaryScalarOp_Delete(void* kernel) { -+ return; -+} -+ -+bool IsSameSize(TF_Tensor* tensor1, TF_Tensor* tensor2){ -+ if (TF_NumDims(tensor1) != TF_NumDims(tensor2)){ -+ return false; -+ } -+ for(int d = 0; d < TF_NumDims(tensor1); d++){ -+ if (TF_Dim(tensor1, d) != TF_Dim(tensor2, d)){ -+ return false; -+ } -+ } -+ return true; -+} -+ -+template -+static void SummaryScalarOp_Compute(void* kernel, TF_OpKernelContext* ctx) { -+ TF_Tensor* tags; -+ TF_Tensor* values; -+ TF_Status* status = TF_NewStatus(); -+ TF_GetInput(ctx, 0, &tags, status); -+ CHECK_EQ(TF_OK, TF_GetCode(status)) -+ << "Error while getting input"; -+ if (TF_GetCode(status) == TF_OK){ -+ TF_GetInput(ctx, 1, &values, status); -+ } -+ CHECK_EQ(TF_OK, TF_GetCode(status)) -+ << "Error while getting input"; -+ if (TF_GetCode(status) == TF_OK) { -+ if (!IsSameSize(tags, values)) { -+ std::ostringstream err; -+ err << "tags and values not the same shape: "; -+ TF_SetStatus(status, TF_INVALID_ARGUMENT, err.str().c_str()); -+ } -+ } -+ -+ tensorflow::Summary s; -+ if (TF_GetCode(status) == TF_OK) { -+ auto Ttags_array = static_cast(TF_TensorData(tags)); -+ auto values_array = static_cast(TF_TensorData(values)); -+ for (int i = 0; i < TF_TensorElementCount(tags); ++i){ -+ tensorflow::Summary::Value* v = s.add_value(); -+ TF_TString_Init(Ttags_array[i]); -+ v->set_tag(TF_TString_GetDataPointer(Ttags_array[i]), TF_TString_GetSize(Ttags_array[i])); -+ v->set_simple_value(float(values_array[i])); -+ } -+ -+ -+ // TF_Tensor* summary_tensor = TF_AllocateOutput(ctx, 0, TF_ExpectedOutputDataType(ctx, 0), 0, 0) -+ -+ // TF_Tensor* output = TF_AllocateTensor(k->output_data_type, dims, 0, -+ // TF_DataTypeSize(k->output_data_type)); -+ // if (TF_GetCode(status) == TF_OK) { -+ // TF_SetOutput(ctx, 0, output, status); -+ // } -+ // TF_DeleteTensor(output); -+ } -+ -+ // if (TF_GetCode(status) != TF_OK) { -+ // TF_OpKernelContext_Failure(ctx, status); -+ // } -+ // TF_DeleteStatus(status); -+ // TF_DeleteTensor(tags); -+} -+ -+template -+void RegisterSummaryScalarOpKernel() { -+ TF_Status* status = TF_NewStatus(); -+ { -+ auto* builder = TF_NewKernelBuilder("SummaryScalar", tensorflow::DEVICE_CPU, -+ &SummaryScalarOp_Create, &SummaryScalarOp_Compute, -+ &SummaryScalarOp_Delete); -+ TF_KernelBuilder_TypeConstraint(builder, "T", static_cast(tensorflow::DataTypeToEnum::v()), status); -+ CHECK_EQ(TF_OK, TF_GetCode(status)) -+ << "Error while adding type constraint"; -+ TF_RegisterKernelBuilder("SummaryScalar", builder, status); -+ CHECK_EQ(TF_OK, TF_GetCode(status)) -+ << "Error while registering Summary Scalar kernel"; -+ } -+// template -+// #if GOOGLE_CUDA -+// { -+// auto* builder = TF_NewKernelBuilder("SummaryScalar", tensorflow::DEVICE_GPU, -+// &SummaryScalarOp_Create, &SummaryScalarOp_Compute, -+// &SummaryScalarOp_Delete); -+// TF_RegisterKernelBuilder("SummaryScalar", builder, status); -+// CHECK_EQ(TF_OK, TF_GetCode(status)) -+// << "Error while registering CUDA SummaryScalar kernel"; -+// } -+// #endif -+ -+ TF_DeleteStatus(status); -+} -+ -+// A dummy static variable initialized by a lambda whose side-effect is to -+// register the bitcast kernel. -+ -+ -+TF_ATTRIBUTE_UNUSED static bool IsSummaryScalarOpKernelRegistered = []() { -+ if (SHOULD_REGISTER_OP_KERNEL("SummaryScalar")) { -+ RegisterSummaryScalarOpKernel(); -+ RegisterSummaryScalarOpKernel(); -+ RegisterSummaryScalarOpKernel(); -+ RegisterSummaryScalarOpKernel(); -+ RegisterSummaryScalarOpKernel(); -+ RegisterSummaryScalarOpKernel(); -+ RegisterSummaryScalarOpKernel(); -+ RegisterSummaryScalarOpKernel(); -+ RegisterSummaryScalarOpKernel(); -+ } -+ return true; -+}(); -+ -diff --git a/tensorflow/c/kernels/summary_op_test.cc b/tensorflow/c/kernels/summary_op_test.cc -new file mode 100644 -index 0000000000..fd6199abd6 ---- /dev/null -+++ b/tensorflow/c/kernels/summary_op_test.cc -@@ -0,0 +1,96 @@ -+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. -+ -+Licensed under the Apache License, Version 2.0 (the "License"); -+you may not use this file except in compliance with the License. -+You may obtain a copy of the License at -+ -+ http://www.apache.org/licenses/LICENSE-2.0 -+ -+Unless required by applicable law or agreed to in writing, software -+distributed under the License is distributed on an "AS IS" BASIS, -+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -+See the License for the specific language governing permissions and -+limitations under the License. -+==============================================================================*/ -+ -+#include "tensorflow/core/framework/attr_value.pb.h" -+#include "tensorflow/core/framework/attr_value_util.h" -+#include "tensorflow/core/framework/fake_input.h" -+#include "tensorflow/core/framework/node_def.pb.h" -+#include "tensorflow/core/framework/node_def_builder.h" -+#include "tensorflow/core/framework/op_kernel.h" -+#include "tensorflow/core/framework/shape_inference.h" -+#include "tensorflow/core/platform/test.h" -+ -+#include -+#include -+#include -+namespace tensorflow { -+namespace { -+ -+class DummyDevice : public DeviceBase { -+ public: -+ explicit DummyDevice(Env* env) : DeviceBase(env) {} -+ Allocator* GetAllocator(AllocatorAttributes /*attr*/) override { -+ return cpu_allocator(); -+ } -+}; -+ -+void TestScalarSummaryOp(Tensor* tags, Tensor* values, error::Code expected_code) { -+ Status status; -+ NodeDef def; -+ def.set_op("SummaryScalar"); -+ -+ def.set_device(DEVICE_CPU); -+ -+ AttrValue valuesTypeAttr; -+ SetAttrValue(values->dtype(), &valuesTypeAttr); -+ (*def.mutable_attr())["T"] = valuesTypeAttr; -+ -+ def.add_input( -+ strings::StrCat("input1: ", DataTypeString(tags->dtype()))); -+ def.add_input( -+ strings::StrCat("input2: ", DataTypeString(values->dtype()))); -+ -+ std::unique_ptr kernel = -+ CreateOpKernel(DeviceType(DEVICE_CPU), nullptr, nullptr, def, 1, &status); -+ ASSERT_TRUE(status.ok()) << status.ToString(); -+ OpKernelContext::Params params; -+ DummyDevice dummy_device(nullptr); -+ params.device = &dummy_device; -+ params.op_kernel = kernel.get(); -+ gtl::InlinedVector inputs; -+ inputs.emplace_back(tags); -+ inputs.emplace_back(values); -+ params.inputs = &inputs; -+ OpKernelContext ctx(¶ms, 1); -+ kernel->Compute(&ctx); -+ -+ ASSERT_EQ(expected_code, ctx.status().code()); -+ if (expected_code == error::OK) { -+ ASSERT_EQ(true, false) -+ << ctx.mutable_output(0)->shape().DebugString(); -+ } -+} -+ -+TEST(ScalarSummaryOpTest, Test) { -+ int vectorSize = 2; -+ Tensor tags(DT_STRING, {vectorSize}); -+ Tensor values(DT_FLOAT, {vectorSize}); -+ for (int i = 0; i < vectorSize; ++i){ -+ values.vec()(i) = static_cast(i); -+ } -+ tags.vec()(0) = "tag 1"; -+ tags.vec()(1) = "tag 2"; -+ TestScalarSummaryOp(&tags, &values, error::INVALID_ARGUMENT); -+} -+ -+ -+PartialTensorShape S(std::initializer_list dims) { -+ return PartialTensorShape(dims); -+} -+ -+ -+ -+} // namespace -+} // namespace tensorflow --- -2.27.0.111.gc72c7da667-goog - diff --git a/tensorflow/c/kernels/diff.patch b/tensorflow/c/kernels/diff.patch deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tensorflow/c/kernels/ops/summary.cc b/tensorflow/c/kernels/ops/summary.cc index 550a663d006..857ff6f29fa 100644 --- a/tensorflow/c/kernels/ops/summary.cc +++ b/tensorflow/c/kernels/ops/summary.cc @@ -13,32 +13,27 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include -#include - #include "tensorflow/c/ops.h" #include "tensorflow/core/framework/selective_registration.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" - -static void TF_ScalarSummary_shape_inference_fn(TF_ShapeInferenceContext* ctx, - TF_Status* status) { +static void scalar_summary_shape_inference_fn(TF_ShapeInferenceContext* ctx, + TF_Status* status) { TF_ShapeHandle* result = TF_NewShapeHandle(); - // TODO: what to do in the case of unknown input shape? if (TF_GetCode(status) == TF_OK && !TF_ShapeInferenceContextRankKnown(ctx, result)) { TF_ShapeInferenceContextSetUnknownShape(ctx, status); CHECK_EQ(TF_OK, TF_GetCode(status)) - << "Error while setting unknown shape function"; + << "Error while setting unknown shape function"; TF_DeleteShapeHandle(result); return; } - // make shape handle a scalar value (empty shape) + // Make shape handle a scalar value (empty shape) if (TF_GetCode(status) == TF_OK) { TF_ShapeInferenceContextSetOutput(ctx, 0, result, status); - CHECK_EQ(TF_OK, TF_GetCode(status)) - << "Error while setting shape function"; + CHECK_EQ(TF_OK, TF_GetCode(status)) + << "Error while setting shape function"; } TF_DeleteShapeHandle(result); } @@ -46,19 +41,18 @@ static void TF_ScalarSummary_shape_inference_fn(TF_ShapeInferenceContext* ctx, void Register_ScalarSummaryOp() { TF_Status* status = TF_NewStatus(); - TF_OpDefinitionBuilder* op_builder = TF_NewOpDefinitionBuilder("SummaryScalar"); + TF_OpDefinitionBuilder* op_builder = + TF_NewOpDefinitionBuilder("SummaryScalar"); TF_OpDefinitionBuilderAddInput(op_builder, "tags: string"); - TF_OpDefinitionBuilderAddInput(op_builder, "values: T"); + TF_OpDefinitionBuilderAddInput(op_builder, "values: T"); TF_OpDefinitionBuilderAddOutput(op_builder, "summary: string"); - TF_OpDefinitionBuilderAddAttr( - op_builder, - "T: realnumbertype"); + TF_OpDefinitionBuilderAddAttr(op_builder, "T: realnumbertype"); TF_OpDefinitionBuilderSetShapeInferenceFunction(op_builder, - &TF_ScalarSummary_shape_inference_fn); + &scalar_summary_shape_inference_fn); TF_RegisterOpDefinition(op_builder, status); CHECK_EQ(TF_GetCode(status), TF_OK) - << "TF_ScalarSummary op registration failed: " << TF_Message(status); + << "ScalarSummary op registration failed: " << TF_Message(status); TF_DeleteStatus(status); } diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index 002de6fb6e8..d2220670d74 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -27,14 +27,9 @@ limitations under the License. #include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/framework/register_types.h" - #include "tensorflow/core/framework/types.h" -#include - -// TODO: Copy over Summary Scalar Op Doc static void* SummaryScalarOp_Create(TF_OpKernelConstruction* ctx) { - // TODO: replace with a void* pointer type later void* ptr; return ptr; } @@ -43,17 +38,9 @@ static void SummaryScalarOp_Delete(void* kernel) { return; } -bool IsSameSize(TF_Tensor* tensor1, TF_Tensor* tensor2){ - if (TF_NumDims(tensor1) != TF_NumDims(tensor2)){ - return false; - } - for(int d = 0; d < TF_NumDims(tensor1); d++){ - if (TF_Dim(tensor1, d) != TF_Dim(tensor2, d)){ - return false; - } - } - return true; -} +// Helper functions for compute method +bool IsSameSize(TF_Tensor* tensor1, TF_Tensor* tensor2); +static tensorflow::string SingleTag(TF_Tensor* tags); template static void SummaryScalarOp_Compute(void* kernel, TF_OpKernelContext* ctx) { @@ -61,35 +48,34 @@ static void SummaryScalarOp_Compute(void* kernel, TF_OpKernelContext* ctx) { TF_Tensor* values; TF_Status* status = TF_NewStatus(); TF_GetInput(ctx, 0, &tags, status); - if (TF_GetCode(status) == TF_OK){ + if (TF_GetCode(status) == TF_OK) { TF_GetInput(ctx, 1, &values, status); } - if (TF_GetCode(status) == TF_OK) { if (!IsSameSize(tags, values)) { std::ostringstream err; - err << "tags and values not the same shape: " << TF_ShapeDebugString(tags) - << " != " << TF_ShapeDebugString(values); + err << "tags and values not the same shape: " + << TF_ShapeDebugString(tags) << " != " << TF_ShapeDebugString(values) + << SingleTag(tags); TF_SetStatus(status, TF_INVALID_ARGUMENT, err.str().c_str()); } } - - // Copy tag and string data into summary protobuf + // Copy tag and string data into summary protobuf tensorflow::Summary s; if (TF_GetCode(status) == TF_OK) { // Convert tags and values tensor to array to access elements by index - auto tags_array = static_cast(TF_TensorData(tags)); + auto tags_array = static_cast(TF_TensorData(tags)); auto values_array = static_cast(TF_TensorData(values)); - for (int i = 0; i < TF_TensorElementCount(tags); ++i){ + // Copy tags and values into summary protobuf + for (int i = 0; i < TF_TensorElementCount(tags); ++i) { tensorflow::Summary::Value* v = s.add_value(); - v->set_tag(TF_TString_GetDataPointer(&tags_array[i]), - TF_TString_GetSize(&tags_array[i])); + v->set_tag(tags_array[i].data(), tags_array[i].size()); v->set_simple_value(float(values_array[i])); } TF_Tensor* summary_tensor = TF_AllocateOutput(ctx, 0, TF_ExpectedOutputDataType(ctx, 0), nullptr, 0, sizeof(TF_TString), status); - if (TF_GetCode(status) == TF_OK){ + if (TF_GetCode(status) == TF_OK) { SerializeToTString(s, static_cast (TF_TensorData(summary_tensor))); } @@ -101,40 +87,68 @@ static void SummaryScalarOp_Compute(void* kernel, TF_OpKernelContext* ctx) { } TF_DeleteStatus(status); TF_DeleteTensor(tags); + TF_DeleteTensor(values); +} + +bool IsSameSize(TF_Tensor* tensor1, TF_Tensor* tensor2){ + if (TF_NumDims(tensor1) != TF_NumDims(tensor2)) { + return false; + } + for (int d = 0; d < TF_NumDims(tensor1); d++) { + if (TF_Dim(tensor1, d) != TF_Dim(tensor2, d)) { + return false; + } + } + return true; +} + +static tensorflow::string SingleTag(TF_Tensor* tags){ + if (TF_TensorElementCount(tags) == 1) { + const char* single_tag = static_cast( + TF_TensorData(tags))->c_str(); + return tensorflow::strings::StrCat(" (tag '", single_tag, "')"); + } + else { + return ""; + } } template void RegisterSummaryScalarOpKernel() { TF_Status* status = TF_NewStatus(); { - auto* builder = TF_NewKernelBuilder("SummaryScalar", tensorflow::DEVICE_CPU, - &SummaryScalarOp_Create, &SummaryScalarOp_Compute, + auto* builder = TF_NewKernelBuilder("SummaryScalar", + tensorflow::DEVICE_CPU, + &SummaryScalarOp_Create, + &SummaryScalarOp_Compute, &SummaryScalarOp_Delete); - TF_KernelBuilder_TypeConstraint(builder, "T", static_cast(tensorflow::DataTypeToEnum::v()), status); + TF_KernelBuilder_TypeConstraint(builder, "T", + static_cast(tensorflow::DataTypeToEnum::v()), status); CHECK_EQ(TF_OK, TF_GetCode(status)) << "Error while adding type constraint"; TF_RegisterKernelBuilder("SummaryScalarOp", builder, status); CHECK_EQ(TF_OK, TF_GetCode(status)) << "Error while registering Summary Scalar kernel"; } -// #if GOOGLE_CUDA -// { -// auto* builder = TF_NewKernelBuilder("SummaryScalar", tensorflow::DEVICE_GPU, -// &SummaryScalarOp_Create, &SummaryScalarOp_Compute, -// &SummaryScalarOp_Delete); -// TF_RegisterKernelBuilder("SummaryScalar", builder, status); -// CHECK_EQ(TF_OK, TF_GetCode(status)) -// << "Error while registering CUDA SummaryScalar kernel"; -// } -// #endif + +#if GOOGLE_CUDA + { + auto* builder = TF_NewKernelBuilder("SummaryScalar", + tensorflow::DEVICE_GPU, + &SummaryScalarOp_Create, + &SummaryScalarOp_Compute, + &SummaryScalarOp_Delete); + TF_RegisterKernelBuilder("SummaryScalar", builder, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) + << "Error while registering CUDA SummaryScalar kernel"; + } +#endif TF_DeleteStatus(status); } // A dummy static variable initialized by a lambda whose side-effect is to -// register the bitcast kernel. - - +// register the bitcast kernel. TF_ATTRIBUTE_UNUSED static bool IsSummaryScalarOpKernelRegistered = []() { if (SHOULD_REGISTER_OP_KERNEL("SummaryScalarOp")) { RegisterSummaryScalarOpKernel(); diff --git a/tensorflow/c/kernels/summary_op_test.cc b/tensorflow/c/kernels/summary_op_test.cc index afc818fb7b5..5cf84453f80 100644 --- a/tensorflow/c/kernels/summary_op_test.cc +++ b/tensorflow/c/kernels/summary_op_test.cc @@ -21,15 +21,11 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/shape_inference.h" #include "tensorflow/core/platform/test.h" - #include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/platform/protobuf.h" #include "tensorflow/c/tf_tensor.h" #include "tensorflow/c/tf_tensor_internal.h" -#include -#include -#include namespace tensorflow { namespace { @@ -52,7 +48,7 @@ static void EXPECT_SummaryMatches(const Summary& actual, void TestScalarSummaryOp(Tensor* tags, Tensor* values, string expected_summary, error::Code expected_code) { - // initialize node used to fetch OpKernel + // Initialize node used to fetch OpKernel Status status; NodeDef def; def.set_op("SummaryScalar"); @@ -66,10 +62,11 @@ void TestScalarSummaryOp(Tensor* tags, Tensor* values, string expected_summary, strings::StrCat("input2: ", DataTypeString(values->dtype()))); std::unique_ptr kernel = - CreateOpKernel(DeviceType(DEVICE_CPU), nullptr, nullptr, def, 1, &status); + CreateOpKernel(DeviceType(DEVICE_CPU), nullptr, + nullptr, def, 1, &status); ASSERT_TRUE(status.ok()) << status.ToString(); - // initialize OpKernel parameters + // Initialize OpKernel parameters OpKernelContext::Params params; DummyDevice dummy_device(nullptr); params.device = &dummy_device; @@ -88,7 +85,7 @@ void TestScalarSummaryOp(Tensor* tags, Tensor* values, string expected_summary, Summary summary; ParseProtoUnlimited(&summary, ctx.mutable_output(0)->scalar()()); EXPECT_SummaryMatches(summary, expected_summary); - } + } } TEST(ScalarSummaryOpTest, SimpleFloat) { @@ -160,18 +157,19 @@ TEST(ScalarSummaryOpTest, Error_WrongValuesTags) { TestScalarSummaryOp(&tags, &values, R"()", error::INVALID_ARGUMENT); } +TEST(ScalarSummaryOpTest, Error_WrongWithSingleTag) { + Tensor tags(DT_STRING, {1}); + Tensor values(DT_FLOAT, {2, 1}); + tags.vec()(0) = "tag1"; + values.matrix()(0, 0) = 1.0f; + values.matrix()(1, 0) = -2.0f; + TestScalarSummaryOp(&tags, &values, R"()", error::INVALID_ARGUMENT); +} + TEST(ScalarSummaryOpTest, IsRegistered){ const OpRegistrationData* reg; TF_CHECK_OK(OpRegistry::Global()->LookUp("SummaryScalar", ®)); } - - -PartialTensorShape S(std::initializer_list dims) { - return PartialTensorShape(dims); -} - - - } // namespace } // namespace tensorflow diff --git a/tensorflow/core/kernels/summary_op.cc b/tensorflow/core/kernels/summary_op.cc index f4c91fc9ff1..64e8347dfc4 100644 --- a/tensorflow/core/kernels/summary_op.cc +++ b/tensorflow/core/kernels/summary_op.cc @@ -39,7 +39,7 @@ class SummaryScalarOp : public OpKernel { void Compute(OpKernelContext* c) override { const Tensor& tags = c->input(0); const Tensor& values = c->input(1); - + string tag = SingleTag(tags); OP_REQUIRES( c, tags.IsSameSize(values) || (TensorShapeUtils::IsScalar(tags.shape()) && From 2f861a79758545f3a6357814c79d7dcace920a3b Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Tue, 30 Jun 2020 23:51:04 +0000 Subject: [PATCH 0104/2522] merge with master --- tensorflow/c/kernels.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h index da4d3414842..dd685583a4f 100644 --- a/tensorflow/c/kernels.h +++ b/tensorflow/c/kernels.h @@ -194,10 +194,6 @@ TF_CAPI_EXPORT TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int64_t* dims, int num_dims, size_t len, TF_Status* status); -<<<<<<< HEAD -TF_CAPI_EXPORT extern TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, - int64_t* dims, int num_dims, TF_Status* Status); -======= // Allocates a temporary Tensor of the specified type and shape. Devices // such as GPUs that enqueue Ops for lazy execution may retain references // to the temporary tensors after the Op's Compute method has run. @@ -205,8 +201,6 @@ TF_CAPI_EXPORT extern TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF // num_dims must equal the size of array dims TF_CAPI_EXPORT extern TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, int64_t* dims, int num_dims, TF_Status* status); ->>>>>>> e2187cd137f929eee06bd82a067564c4cdac2fa3 - #ifdef __cplusplus } /* end extern "C" */ From fb70daadc449ffc50d6d0cdce17375bd72b6ab07 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Wed, 1 Jul 2020 00:15:09 +0000 Subject: [PATCH 0105/2522] fixed allocate_temp indent errors --- tensorflow/c/kernels.cc | 2 +- tensorflow/c/kernels.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc index 3a8170575ad..749e6e89b8b 100644 --- a/tensorflow/c/kernels.cc +++ b/tensorflow/c/kernels.cc @@ -268,7 +268,7 @@ TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int index, } TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, - int64_t* dims, int num_dims, TF_Status* status){ + int64_t* dims, int num_dims, TF_Status* status){ auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelContext*>(context); TF_SetStatus(status, TF_OK, ""); tensorflow::TensorShape shape; diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h index 8ed3488988d..1891ce31a23 100644 --- a/tensorflow/c/kernels.h +++ b/tensorflow/c/kernels.h @@ -200,7 +200,9 @@ TF_CAPI_EXPORT TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, // num_dims must equal the size of array dims TF_CAPI_EXPORT extern TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, - TF_DataType dtype, int64_t* dims, int num_dims, TF_Status* status); + TF_DataType dtype, + int64_t* dims, int num_dims, + TF_Status* status); #ifdef __cplusplus From 18b3aa5c4186b0fcb0236b4ac81335007af10501 Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Wed, 1 Jul 2020 02:48:58 +0000 Subject: [PATCH 0106/2522] Update the goldens for api_test --- tensorflow/python/eager/function_test.py | 3 --- tensorflow/tools/api/golden/v1/tensorflow.pbtxt | 2 +- tensorflow/tools/api/golden/v2/tensorflow.pbtxt | 2 +- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 7963ed7b773..4557eb45393 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -4018,7 +4018,6 @@ class FunctionTest(test.TestCase, parameterized.TestCase): enabled = def_function.function(func, experimental_follow_type_hints=True) - trace_count = [0] enabled(1, 3, x=4.0, y="str") enabled(2, 4, x=4.0, y="str") # Retrace self.assertEqual(trace_count[0], 2) @@ -4045,7 +4044,6 @@ class FunctionTest(test.TestCase, parameterized.TestCase): enabled = def_function.function(func, experimental_follow_type_hints=True) - trace_count = [0] enabled(1, 20, 3, 4, 5, 6) enabled(1, 20, 3, 4, 5, 60) # No retrace - change in *args enabled(1, 30, 7, 8, 9, 10) # Retrace - change in args @@ -4059,7 +4057,6 @@ class FunctionTest(test.TestCase, parameterized.TestCase): enabled = def_function.function(func, experimental_follow_type_hints=True) - trace_count = [0] enabled(1, 2, 3, 4, 5, 6, a=1.0, b=2.0, c=3.0) enabled(1, 2, 3, 4, 5, 6, a=1.5, b=2.5, c=3.5) # No retrace - change in **kwargs enabled(100, 2, 3, 4, 5, 6, a=1.0, b=2.0, c=3.0) # Retrace - change in args diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt index 1fe3c40d7e4..a11a496ff4f 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt @@ -1338,7 +1338,7 @@ tf_module { } member_method { name: "function" - argspec: "args=[\'func\', \'input_signature\', \'autograph\', \'experimental_implements\', \'experimental_autograph_options\', \'experimental_relax_shapes\', \'experimental_compile\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'True\', \'None\', \'None\', \'False\', \'None\'], " + argspec: "args=[\'func\', \'input_signature\', \'autograph\', \'experimental_implements\', \'experimental_autograph_options\', \'experimental_relax_shapes\', \'experimental_compile\', \'experimental_follow_type_hints\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'True\', \'None\', \'None\', \'False\', \'None\', \'False\'], " } member_method { name: "gather" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt index 1d56969bd15..7a6fd430983 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt @@ -670,7 +670,7 @@ tf_module { } member_method { name: "function" - argspec: "args=[\'func\', \'input_signature\', \'autograph\', \'experimental_implements\', \'experimental_autograph_options\', \'experimental_relax_shapes\', \'experimental_compile\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'True\', \'None\', \'None\', \'False\', \'None\'], " + argspec: "args=[\'func\', \'input_signature\', \'autograph\', \'experimental_implements\', \'experimental_autograph_options\', \'experimental_relax_shapes\', \'experimental_compile\', \'experimental_follow_type_hints\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'True\', \'None\', \'None\', \'False\', \'None\', \'False\'], " } member_method { name: "gather" From 5f33491c674aefcca541e9e808bb54ddf9cd7589 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Tue, 30 Jun 2020 22:06:54 -0700 Subject: [PATCH 0107/2522] Fix doctests. --- tensorflow/python/ops/sparse_ops.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index a7ec64994cb..d67b6d13b65 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -2750,13 +2750,21 @@ def map_values(op, *args, **kwargs): Examples: - >>> st = tf.sparse.from_dense([[1, 2, 0], [0, 4, 0], [1, 0, 0]]) - >>> tf.sparse.to_dense(tf.sparse.map_values(tf.ones_like, st)).numpy().to_list() - [[1, 1, 0], [0, 1, 0], [1, 0, 0]] - >>> tf.sparse.to_dense(tf.sparse.map_values(tf.multiply, st, st)).numpy().to_list() - [[1, 4, 0], [0, 16, 0], [1, 0, 0]] - >>> tf.sparse.to_dense(tf.sparse.map_values(tf.add, st, 5)).numpy().to_list() - [[5, 7, 0], [0, 9, 0], [6, 0, 0]] + >>> st = tf.sparse.from_dense([[1, 2, 0], + ... [0, 4, 0], + ... [1, 0, 0]]) + >>> print(tf.sparse.to_dense(tf.sparse.map_values(tf.ones_like, st)).numpy()) + [[1, 1, 0], + [0, 1, 0], + [1, 0, 0]] + >>> print(tf.sparse.to_dense(tf.sparse.map_values(tf.multiply, st, st)).numpy()) + [[1, 4, 0], + [0, 16, 0], + [1, 0, 0]] + >>> print(tf.sparse.to_dense(tf.sparse.map_values(tf.add, st, 5)).numpy()) + [[5, 7, 0], + [0, 9, 0], + [6, 0, 0]] Note in particular that even though `tf.add(0, 5) != 0`, implicit zeros will remain unchanged. However, if the sparse tensor contains any explict From 15068b90e5709ae8a9d6fd0aba8ab9e78205f479 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Wed, 1 Jul 2020 06:16:13 +0000 Subject: [PATCH 0108/2522] latest --- tensorflow/core/framework/tensor_key.h | 2 +- tensorflow/core/kernels/BUILD | 1 + tensorflow/core/kernels/map_kernels.cc | 83 ++----------------------- tensorflow/core/kernels/map_kernels.h | 84 ++++++++++++++++++++++++++ tensorflow/core/kernels/tensor_map.h | 6 +- tensorflow/core/ops/map_ops.cc | 13 ++-- tensorflow/python/ops/map_ops.py | 9 ++- 7 files changed, 103 insertions(+), 95 deletions(-) create mode 100644 tensorflow/core/kernels/map_kernels.h diff --git a/tensorflow/core/framework/tensor_key.h b/tensorflow/core/framework/tensor_key.h index 14875de5918..aa6fe35181a 100644 --- a/tensorflow/core/framework/tensor_key.h +++ b/tensorflow/core/framework/tensor_key.h @@ -65,4 +65,4 @@ class TensorKey : public Tensor { } //namespace tensorflow -#endif \ No newline at end of file +#endif diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index eba435c6b25..85c2b9d175b 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2953,6 +2953,7 @@ cc_library( tf_kernel_library( name = "map_kernels", srcs = ["map_kernels.cc"], + hdrs = ["map_kernels.h"], deps = [ ":concat_lib", ":fill_functor", diff --git a/tensorflow/core/kernels/map_kernels.cc b/tensorflow/core/kernels/map_kernels.cc index 17793cdc0aa..91dfbe8c384 100644 --- a/tensorflow/core/kernels/map_kernels.cc +++ b/tensorflow/core/kernels/map_kernels.cc @@ -1,86 +1,11 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - +#include "tensorflow/core/kernels/map_kernels.h" #include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/kernels/tensor_map.h" -#include -using namespace std; +#include "tensorflow/core/framework/types.h" namespace tensorflow { - -/*class EmptyTensorMap : public OpKernel { - public: - explicit EmptyTensorMap(OpKernelConstruction* ctx) : OpKernel(ctx) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("element_dtype", &element_dtype_)); - } - - void Compute(OpKernelContext* ctx) override { - const Tensor& max_num_elements_t = ctx->input(1); - OP_REQUIRES( - ctx, TensorShapeUtils::IsScalar(max_num_elements_t.shape()), - errors::InvalidArgument( - "max_num_elements expected to be a scalar ", - "but got shape: ", max_num_elements_t.shape().DebugString())); - Tensor* result; - AllocatorAttributes attr; - attr.set_on_host(true); - OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape{}, &result, attr)); - TensorMap empty; - empty.element_dtype = element_dtype_; - empty.max_num_elements = max_num_elements_t.scalar()(); - PartialTensorShape element_shape; - OP_REQUIRES_OK(ctx, TensorShapeFromTensor(ctx->input(0), &element_shape)); - empty.element_shape = element_shape; - result->scalar()() = std::move(empty); - } - - private: - DataType element_dtype_; -}; - REGISTER_KERNEL_BUILDER(Name("EmptyTensorMap").Device(DEVICE_CPU), - EmptyTensorMap);*/ - -class ZeroOutOp : public OpKernel { - public: - explicit ZeroOutOp(OpKernelConstruction* c) : OpKernel(c) {} - - void Compute(OpKernelContext* c) override { - cout << "Hello World - Op" << endl; - // Grab the input tensor - const Tensor& input_tensor = c->input(0); - auto input = input_tensor.flat(); - - // Create an output tensor - Tensor* output_tensor = NULL; - OP_REQUIRES_OK(c, c->allocate_output(0, input_tensor.shape(), - &output_tensor)); - auto output_flat = output_tensor->flat(); - - // Set all but the first element of the output tensor to 0 - const int N = input.size(); - for (int i=1; i 0) output_flat(0) = input(0); - } -}; + EmptyTensorMap); REGISTER_KERNEL_BUILDER(Name("ZeroOut").Device(DEVICE_CPU), ZeroOutOp); - -} // namespace tensorflow +} \ No newline at end of file diff --git a/tensorflow/core/kernels/map_kernels.h b/tensorflow/core/kernels/map_kernels.h new file mode 100644 index 00000000000..b10eb8dedfb --- /dev/null +++ b/tensorflow/core/kernels/map_kernels.h @@ -0,0 +1,84 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/kernels/tensor_map.h" +#include +using namespace std; + +namespace tensorflow { + +class EmptyTensorMap : public OpKernel { + public: + explicit EmptyTensorMap(OpKernelConstruction* ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("element_dtype", &element_dtype_)); + } + + void Compute(OpKernelContext* ctx) override { + const Tensor& max_num_elements_t = ctx->input(1); + OP_REQUIRES( + ctx, TensorShapeUtils::IsScalar(max_num_elements_t.shape()), + errors::InvalidArgument( + "max_num_elements expected to be a scalar ", + "but got shape: ", max_num_elements_t.shape().DebugString())); + Tensor* result; + AllocatorAttributes attr; + attr.set_on_host(true); + g(ctx, ctx->allocate_output(0, TensorShape{}, &result, attr)); + TensorMap empty; + empty.element_dtype = element_dtype_; + empty.max_num_elements = max_num_elements_t.scalar()(); + PartialTensorShape element_shape; + //OP_REQUIRES_OK(ctx, TensorShapeFromTensor(ctx->input(0), &element_shape)); + empty.element_shape = element_shape; + result->scalar()() = std::move(empty); + } + + private: + DataType element_dtype_; +}; + + + +class ZeroOutOp : public OpKernel { + public: + explicit ZeroOutOp(OpKernelConstruction* c) : OpKernel(c) {} + + void Compute(OpKernelContext* c) override { + cout << "Hello World - Op" << endl; + // Grab the input tensor + const Tensor& input_tensor = c->input(0); + auto input = input_tensor.flat(); + + // Create an output tensor + Tensor* output_tensor = NULL; + OP_REQUIRES_OK(c, c->allocate_output(0, input_tensor.shape(), + &output_tensor)); + auto output_flat = output_tensor->flat(); + + // Set all but the first element of the output tensor to 0 + const int N = input.size(); + for (int i=1; i 0) output_flat(0) = input(0); + } +}; + + + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/tensor_map.h b/tensorflow/core/kernels/tensor_map.h index 7da8283c655..c5993ec9300 100644 --- a/tensorflow/core/kernels/tensor_map.h +++ b/tensorflow/core/kernels/tensor_map.h @@ -40,10 +40,10 @@ namespace tensorflow { // a reference count. Modifying b.tensors() modifies a.tensors(). In this way, // TensorList should be considered similar to the tf::Tensor object. // -// In order to get a copy of the underlying list, use the Copy method: +// In order to get a copy of the underlying map, use the Copy method: // // TensorList b = a.Copy(); -// b.tensors().push_back(t); // This does not modify a.tensors(). +// b.tensors().insert(k, v); // This does not modify a.tensors(). // // Note that this is not a deep copy: the memory locations of the underlying // tensors will still point to the same locations of the corresponding tensors @@ -188,4 +188,4 @@ static_assert(Variant::CanInlineType(), #endif } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_TENSOR_LIST_H_ +#endif // TENSORFLOW_CORE_KERNELS_TENSOR_MAP_H_ diff --git a/tensorflow/core/ops/map_ops.cc b/tensorflow/core/ops/map_ops.cc index 59c20c6d75f..f1d7b291a70 100644 --- a/tensorflow/core/ops/map_ops.cc +++ b/tensorflow/core/ops/map_ops.cc @@ -20,23 +20,22 @@ limitations under the License. namespace tensorflow { namespace { - REGISTER_OP("EmptyTensorMap") - .Input("element_shape: shape_type") - .Input("max_num_elements: int32") +// .Input("element_shape: shape_type") +// .Input("max_num_elements: int32") .Output("handle: variant") - .Attr("element_dtype: type") - .Attr("shape_type: {int32, int64}") +// .Attr("element_dtype: type") +// .Attr("shape_type: {int32, int64}") .SetShapeFn([](shape_inference::InferenceContext* c) { c->set_output(0, c->Scalar()); - DataType element_dtype; + /*DataType element_dtype; TF_RETURN_IF_ERROR(c->GetAttr("element_dtype", &element_dtype)); shape_inference::ShapeHandle element_shape; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensorTreatScalarAsUnknownShape( 0, &element_shape)); c->set_output_handle_shapes_and_types( 0, std::vector{ - {element_shape, element_dtype}}); + {element_shape, element_dtype}});*/ return Status::OK(); }); diff --git a/tensorflow/python/ops/map_ops.py b/tensorflow/python/ops/map_ops.py index 4abd7f3f998..03acaa8fb72 100644 --- a/tensorflow/python/ops/map_ops.py +++ b/tensorflow/python/ops/map_ops.py @@ -37,11 +37,10 @@ def empty_tensor_map(element_shape, if max_num_elements is None: max_num_elements = -1 - return gen_map_ops.empty_tensor_map( - element_shape=_build_element_shape(element_shape), - element_dtype=element_dtype, - max_num_elements=max_num_elements, - name=name) + return gen_map_ops.empty_tensor_map(element_shape, + element_dtype, + max_num_elements, + name) def zero_out(to_zero): print("Hello World - Python Op") From f8933ade5959aa8d63cdfea31fb068323e564a7c Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Wed, 1 Jul 2020 06:32:58 +0000 Subject: [PATCH 0109/2522] latest --- tensorflow/core/kernels/map_kernels.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/map_kernels.h b/tensorflow/core/kernels/map_kernels.h index b10eb8dedfb..1ed7c663a57 100644 --- a/tensorflow/core/kernels/map_kernels.h +++ b/tensorflow/core/kernels/map_kernels.h @@ -36,7 +36,7 @@ class EmptyTensorMap : public OpKernel { Tensor* result; AllocatorAttributes attr; attr.set_on_host(true); - g(ctx, ctx->allocate_output(0, TensorShape{}, &result, attr)); + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape{}, &result, attr)); TensorMap empty; empty.element_dtype = element_dtype_; empty.max_num_elements = max_num_elements_t.scalar()(); From 8f3b2ae4e273968e2f36b6ae8bc4cb720d83d0d9 Mon Sep 17 00:00:00 2001 From: CuiYifeng Date: Wed, 1 Jul 2020 15:05:55 +0800 Subject: [PATCH 0110/2522] [Intel MKL] Factor out the macros for handling test registeration --- .../common_runtime/mkl_layout_pass_test.cc | 22 +------------------ .../grappler/optimizers/mkl_remapper_test.cc | 14 +----------- tensorflow/core/util/mkl_util.h | 22 +++++++++++++++++++ 3 files changed, 24 insertions(+), 34 deletions(-) diff --git a/tensorflow/core/common_runtime/mkl_layout_pass_test.cc b/tensorflow/core/common_runtime/mkl_layout_pass_test.cc index d480c0a49ce..bcc470cefd4 100644 --- a/tensorflow/core/common_runtime/mkl_layout_pass_test.cc +++ b/tensorflow/core/common_runtime/mkl_layout_pass_test.cc @@ -34,6 +34,7 @@ limitations under the License. #include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" +#include "tensorflow/core/util/mkl_util.h" namespace tensorflow { @@ -188,27 +189,6 @@ REGISTER_OP("BFloat16Output2") .SetIsStateful(); #endif // ENABLE_INTEL_MKL_BFLOAT16 -///////////////////////////////////////////////////////////////////// -// Macros for handling registeration for various types -///////////////////////////////////////////////////////////////////// - -#define REGISTER_TEST_FLOAT32(TEST) REGISTER_TEST(TEST, DT_FLOAT, Float32Input); - -#ifdef ENABLE_INTEL_MKL_BFLOAT16 -#define REGISTER_TEST_BFLOAT16(TEST) \ - REGISTER_TEST(TEST, DT_BFLOAT16, BFloat16Input); - -#define REGISTER_TEST_ALL_TYPES(TEST) \ - REGISTER_TEST_FLOAT32(TEST); \ - REGISTER_TEST_BFLOAT16(TEST); -#else -#define REGISTER_TEST_ALL_TYPES(TEST) REGISTER_TEST_FLOAT32(TEST); -#endif // ENABLE_INTEL_MKL_BFLOAT16 - -///////////////////////////////////////////////////////////////////// -// Unit tests related to node merge optimization -///////////////////////////////////////////////////////////////////// - // clang-format off TEST_F(MklLayoutPassTest, Basic) { InitGraph( diff --git a/tensorflow/core/grappler/optimizers/mkl_remapper_test.cc b/tensorflow/core/grappler/optimizers/mkl_remapper_test.cc index 77929bea257..f534d3ed34f 100644 --- a/tensorflow/core/grappler/optimizers/mkl_remapper_test.cc +++ b/tensorflow/core/grappler/optimizers/mkl_remapper_test.cc @@ -22,19 +22,7 @@ limitations under the License. #include "tensorflow/core/grappler/utils/grappler_test.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" - -#define REGISTER_TEST_FLOAT32(TEST) REGISTER_TEST(TEST, DT_FLOAT, Float32Input); - -#ifdef ENABLE_INTEL_MKL_BFLOAT16 -#define REGISTER_TEST_BFLOAT16(TEST) \ - REGISTER_TEST(TEST, DT_BFLOAT16, BFloat16Input); - -#define REGISTER_TEST_ALL_TYPES(TEST) \ - REGISTER_TEST_FLOAT32(TEST); \ - REGISTER_TEST_BFLOAT16(TEST); -#else -#define REGISTER_TEST_ALL_TYPES(TEST) REGISTER_TEST_FLOAT32(TEST); -#endif // ENABLE_INTEL_MKL_BFLOAT16 +#include "tensorflow/core/util/mkl_util.h" namespace tensorflow { namespace grappler { diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index a4a3f5ff778..d94c6848ad7 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -2281,5 +2281,27 @@ inline bool IsConv1x1StrideNot1(memory::dims filter_dims, } } // namespace tensorflow + +///////////////////////////////////////////////////////////////////// +// Macros for handling registeration for various types +///////////////////////////////////////////////////////////////////// + +#define REGISTER_TEST_FLOAT32(TEST) REGISTER_TEST(TEST, DT_FLOAT, Float32Input); + +#ifdef ENABLE_INTEL_MKL_BFLOAT16 +#define REGISTER_TEST_BFLOAT16(TEST) \ + REGISTER_TEST(TEST, DT_BFLOAT16, BFloat16Input); + +#define REGISTER_TEST_ALL_TYPES(TEST) \ + REGISTER_TEST_FLOAT32(TEST); \ + REGISTER_TEST_BFLOAT16(TEST); +#else +#define REGISTER_TEST_ALL_TYPES(TEST) REGISTER_TEST_FLOAT32(TEST); +#endif // ENABLE_INTEL_MKL_BFLOAT16 + +///////////////////////////////////////////////////////////////////// +// Unit tests related to node merge optimization +///////////////////////////////////////////////////////////////////// + #endif // INTEL_MKL #endif // TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ From ed94f8da21118af331687cb7b7b8b02c043d0a68 Mon Sep 17 00:00:00 2001 From: Tamas Nyiri Date: Wed, 1 Jul 2020 14:19:20 +0100 Subject: [PATCH 0111/2522] removed unnecessary import --- tensorflow/lite/tools/optimize/modify_model_interface.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/lite/tools/optimize/modify_model_interface.cc b/tensorflow/lite/tools/optimize/modify_model_interface.cc index efd062642f8..b9779ee7adf 100644 --- a/tensorflow/lite/tools/optimize/modify_model_interface.cc +++ b/tensorflow/lite/tools/optimize/modify_model_interface.cc @@ -18,7 +18,6 @@ limitations under the License. #include #include #include -#include #include "absl/memory/memory.h" #include "flatbuffers/flexbuffers.h" From 1b4085d652870db166197c66a16c817ecec8e2e4 Mon Sep 17 00:00:00 2001 From: amturati Date: Wed, 1 Jul 2020 16:01:57 +0000 Subject: [PATCH 0112/2522] fixed memory leaks and added matrix addition test --- .../eager/c_api_unified_experimental_test.cc | 88 ++++++++++++++++++- 1 file changed, 87 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/eager/c_api_unified_experimental_test.cc b/tensorflow/c/eager/c_api_unified_experimental_test.cc index 398830c35fe..10e8907ec96 100644 --- a/tensorflow/c/eager/c_api_unified_experimental_test.cc +++ b/tensorflow/c/eager/c_api_unified_experimental_test.cc @@ -245,6 +245,90 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul2) { TF_DeleteExecutionContext(ctx); } +// MatAdd +TEST_P(UnifiedCAPI, TestBasicEagerMatAdd) { + std::unique_ptr status( + TF_NewStatus(), TF_DeleteStatus); + TFE_ContextOptions* opts = TFE_NewContextOptions(); + TF_ExecutionContext* ctx = TF_NewEagerExecutionContext(opts, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + TFE_DeleteContextOptions(opts); + + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + /* Want to test simple MatMul example with abstract tensors: + + [ [1,2] , + [ [5,6] , = [ [6,8], + [3,4] ] [7,8] ] [10,12] ] + + */ + + // Build 1st Matrix. + int64_t dims [] = {2,2}; // Matrices will be 2 x 2 + int num_dims = sizeof(dims)/sizeof(dims[0]); + + float vals1 [] = {1.0f,2.0f,3.0f,4.0f}; + TFE_Context* eager_ctx = TF_ExecutionContextGetTFEContext(ctx); + TFE_TensorHandle* t1 = TestMatrixTensorHandleWithInput(eager_ctx, vals1, dims, num_dims); + + TF_AbstractTensor* at1 = + TF_CreateAbstractTensorFromEagerTensor(t1, status.get()); // get abstract tensor + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + // Build 2nd Matrix. + float vals2 [] = {5.0f,6.0f,7.0f,8.0f}; + TFE_TensorHandle* t2 = TestMatrixTensorHandleWithInput(eager_ctx, vals2, dims, num_dims); + + TF_AbstractTensor* at2 = + TF_CreateAbstractTensorFromEagerTensor(t2, status.get()); // get abstract tensor + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + // Build an abstract operation. + auto* op = TF_NewAbstractOp(ctx); + TF_AbstractOpSetOpType(op, "Add", status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + // Build inputs and outputs. + TF_AbstractTensor* inputs[2] = {at1, at2}; + TF_OutputList* o = TF_NewOutputList(); + TF_OutputListSetNumOutputs(o, 1, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + // Execute. + TF_ExecuteOperation(op, 2, inputs, o, ctx, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + // Clean up operation and inputs. + TF_DeleteAbstractOp(op); + TF_DeleteAbstractTensor(at1); + TF_DeleteAbstractTensor(at2); + + // Verify the results. + ASSERT_EQ(1, TF_OutputListNumOutputs(o)); + TF_AbstractTensor* result = TF_OutputListGet(o, 0); + TFE_TensorHandle* result_t = + TF_AbstractTensorGetEagerTensor(result, status.get()); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + + TF_Tensor* result_tensor = TFE_TensorHandleResolve(result_t, status.get()); + + // Copy Tensor data into array. + float result_data[4] = {0}; + memcpy(&result_data[0], TF_TensorData(result_tensor), TF_TensorByteSize(result_tensor)); + + // Build expected result & verify. + float e_vals [] = {6.0f,8.0f,10.0f,12.0f};//{19.0f,22.0f,43.0f,50.0f}; + + int data_len = 4; // length of e_vals + for(int i = 0; i < data_len; i++){ + EXPECT_EQ(result_data[i], e_vals[i]); + } + + TF_DeleteTensor(result_tensor); + TF_DeleteAbstractTensor(result); + TF_DeleteOutputList(o); + TF_DeleteExecutionContext(ctx); +} TEST_P(UnifiedCAPI, TestBasicGraph) { std::unique_ptr status( @@ -324,6 +408,7 @@ TEST_P(UnifiedCAPI, TestBasicGraph) { TF_DeleteOutputList(add_outputs); TF_DeleteAbstractOp(fn_op); TF_DeleteAbstractTensor(input_t); + TF_DeleteAbstractTensor(placeholder_t); TF_DeleteAbstractTensor(final_result); TF_DeleteTensor(f_t); TF_DeleteAbstractFunction(func); @@ -426,10 +511,11 @@ TEST_P(UnifiedCAPI, TestBasicGraphMatMul) { ASSERT_EQ(result_data[i], 2.0f); } + TF_DeleteAbstractTensor(final_result); TF_DeleteOutputList(mm_outputs); + TF_DeleteAbstractTensor(placeholder_t); TF_DeleteAbstractOp(fn_op); TF_DeleteAbstractTensor(input_t); - TF_DeleteAbstractTensor(final_result); TF_DeleteTensor(f_t); TF_DeleteAbstractFunction(func); From 55f67d71d4b3127445e11b87c2f11d3e9f3f4c59 Mon Sep 17 00:00:00 2001 From: amturati Date: Wed, 1 Jul 2020 16:28:45 +0000 Subject: [PATCH 0113/2522] added robust tracing test for multi-output graph with adds and matmul --- .../eager/c_api_unified_experimental_test.cc | 199 ++++++++++++++++++ 1 file changed, 199 insertions(+) diff --git a/tensorflow/c/eager/c_api_unified_experimental_test.cc b/tensorflow/c/eager/c_api_unified_experimental_test.cc index 10e8907ec96..932014d4898 100644 --- a/tensorflow/c/eager/c_api_unified_experimental_test.cc +++ b/tensorflow/c/eager/c_api_unified_experimental_test.cc @@ -668,6 +668,205 @@ TEST_P(UnifiedCAPI, TestMultiOutputGraph) { TF_DeleteAbstractFunction(func); } +TEST_P(UnifiedCAPI, TestMultiOutputGraphMatMul) { + std::unique_ptr status( + TF_NewStatus(), TF_DeleteStatus); + TF_Status* s = status.get(); + + // Start a new function / execution context. + string fn_name = "two_adds_and_matmul"; + TF_ExecutionContext* graph_ctx = TF_CreateFunction(fn_name.c_str(), s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + + auto* arg0 = TF_AddFunctionParameter(graph_ctx, TF_FLOAT, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + auto* arg1 = TF_AddFunctionParameter(graph_ctx, TF_FLOAT, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + + // Create a first "Add" computing `arg0 + arg1`. + TF_AbstractTensor* add_output1; + { + // Build an abstract operation, inputs and output. + auto* add_op = TF_NewAbstractOp(graph_ctx); + TF_AbstractOpSetOpType(add_op, "Add", s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + TF_AbstractOpSetOpName(add_op, "my_add1", s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + TF_AbstractTensor* inputs[2] = {arg0, arg1}; + TF_OutputList* add_outputs = TF_NewOutputList(); + + // Trace the operation now (create a node in the graph). + TF_ExecuteOperation(add_op, 2, inputs, add_outputs, graph_ctx, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + TF_DeleteAbstractOp(add_op); + // Extract the resulting tensor. + add_output1 = TF_OutputListGet(add_outputs, 0); + TF_DeleteOutputList(add_outputs); + } + + // Same with a second "Add" computing `arg1 + arg1`. + TF_AbstractTensor* add_output2; + { + // Build an abstract operation, inputs and output. + auto* add_op = TF_NewAbstractOp(graph_ctx); + TF_AbstractOpSetOpType(add_op, "Add", s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + TF_AbstractOpSetOpName(add_op, "my_add2", s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + TF_AbstractTensor* inputs[2] = {arg1, arg1}; + TF_OutputList* add_outputs = TF_NewOutputList(); + // Trace the operation now (create a node in the graph). + TF_ExecuteOperation(add_op, 2, inputs, add_outputs, graph_ctx, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + TF_DeleteAbstractOp(add_op); + // Extract the resulting tensor. + add_output2 = TF_OutputListGet(add_outputs, 0); + TF_DeleteOutputList(add_outputs); + } + + // 3rd Output will be Matrix Multiplication of add_output1 and add_output2 + TF_AbstractTensor* mm_output; + { + // Build an abstract operation, inputs and output. + auto* mm_op = TF_NewAbstractOp(graph_ctx); + TF_AbstractOpSetOpType(mm_op, "MatMul", s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + TF_AbstractOpSetOpName(mm_op, "mm", s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + TF_AbstractTensor* inputs[2] = {add_output1, add_output2}; + TF_OutputList* mm_outputs = TF_NewOutputList(); + // Trace the operation now (create a node in the graph). + TF_ExecuteOperation(mm_op, 2, inputs, mm_outputs, graph_ctx, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + TF_DeleteAbstractOp(mm_op); + // Extract the resulting tensor. + mm_output = TF_OutputListGet(mm_outputs, 0); + TF_DeleteOutputList(mm_outputs); + } + + // Finalize the function by providing the returned values. + TF_AbstractFunction* func; + { + // We want to return the output of both add operations and MatMul operation, create a new list + // and populate it. + TF_OutputList* func_outputs = TF_NewOutputList(); + TF_OutputListPushBack(func_outputs, add_output1, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + TF_OutputListPushBack(func_outputs, add_output2, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + TF_OutputListPushBack(func_outputs, mm_output, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + func = TF_FinalizeFunction(graph_ctx, func_outputs, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + TF_DeleteOutputList(func_outputs); + } + + /** + * We traced so far this function: + * + * def two_adds_and_mm(A, B): + * my_add1 = A + B + * my_add2 = B + B + * mm = tf.MatMul(my_add1,my_add2) + * return my_add1, my_add2, mm + * + * Now we will execute this function with an eager context: + * + * A = [[0, 0], [0, 0]] + * B = [[1, 0], [0, 1]] + * + * output1, output2, output3 = two_adds_and_mm(A, B) + * + * We expect outputs: + * + * output1 = [[1, 0], [0, 1]] + * output2 = [[2, 0], [0, 2]] + * output3 = [[2, 0], [0, 2]] + * + */ + + // Build eager context. + TFE_ContextOptions* opts = TFE_NewContextOptions(); + TF_ExecutionContext* eager_execution_ctx = + TF_NewEagerExecutionContext(opts, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + TFE_DeleteContextOptions(opts); + + TF_ExecutionContextRegisterFunction(eager_execution_ctx, func, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + + // Build the abstract op to run the function. + TF_AbstractOp* fn_op = TF_NewAbstractOp(eager_execution_ctx); + TF_AbstractOpSetOpType(fn_op, fn_name.c_str(), s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + + // Build two abstract input tensors as function arguments. + std::vector func_args; + { + TFE_Context* eager_ctx = + TF_ExecutionContextGetTFEContext(eager_execution_ctx); + + // 1st Arg + float vals1 [] = {0.0f,0.0f,0.0f,0.0f}; + int64_t dims [] = {2,2}; // Matrices will be 2 x 2 + int num_dims = sizeof(dims)/sizeof(dims[0]); + + TFE_TensorHandle* input_eager = TestMatrixTensorHandleWithInput(eager_ctx, vals1, dims, num_dims); + func_args.push_back(TF_CreateAbstractTensorFromEagerTensor(input_eager, s)); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + + // 2nd Arg + float vals2 [] = {1.0f,0.0f,0.0f,1.0f}; + input_eager = TestMatrixTensorHandleWithInput(eager_ctx, vals2, dims, num_dims); + func_args.push_back(TF_CreateAbstractTensorFromEagerTensor(input_eager, s)); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + } + + TF_OutputList* func_outputs = TF_NewOutputList(); + TF_OutputListSetNumOutputs(func_outputs, 3, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + TF_ExecuteOperation(fn_op, func_args.size(), func_args.data(), func_outputs, + eager_execution_ctx, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + TF_DeleteAbstractOp(fn_op); + for (TF_AbstractTensor* t : func_args) TF_DeleteAbstractTensor(t); + + ASSERT_EQ(3, TF_OutputListNumOutputs(func_outputs)); + + float expected_outputs [3][4] = {{1.0f,0.0f,0.0f,1.0f}, + {2.0f,0.0f,0.0f,2.0f}, + {2.0f,0.0f,0.0f,2.0f}}; + + float result_data[4]; + for (int idx = 0; idx < 3; ++idx) { + TF_AbstractTensor* result = TF_OutputListGet(func_outputs, idx); + TFE_TensorHandle* handle = TF_AbstractTensorGetEagerTensor(result, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + TF_Tensor* f_t = TFE_TensorHandleResolve(handle, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + + memcpy(&result_data[0], TF_TensorData(f_t), TF_TensorByteSize(f_t)); + + // Verify results for each output + for(int j = 0; j < 4; j++){ + ASSERT_EQ(result_data[j], expected_outputs[idx][j]); + } + + TF_DeleteTensor(f_t); + } + + // Free memory associated with add and MatMul outputs + for (int idx = 0; idx < 3; ++idx) { + TF_AbstractTensor* result = TF_OutputListGet(func_outputs, idx); + TF_DeleteAbstractTensor(result); + } + + TF_DeleteOutputList(func_outputs); + TF_DeleteExecutionContext(eager_execution_ctx); + TF_DeleteAbstractFunction(func); +} + + TEST(UnifiedCAPI, TF_ExecutionContextToFunctionWithEagerContextRaises) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); From 0f456453410f4a2953aece6b8e44b6dab9e756ed Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Wed, 1 Jul 2020 09:47:51 -0700 Subject: [PATCH 0114/2522] doctest --- tensorflow/python/ops/sparse_ops.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index d67b6d13b65..264783d73ed 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -2754,17 +2754,17 @@ def map_values(op, *args, **kwargs): ... [0, 4, 0], ... [1, 0, 0]]) >>> print(tf.sparse.to_dense(tf.sparse.map_values(tf.ones_like, st)).numpy()) - [[1, 1, 0], - [0, 1, 0], - [1, 0, 0]] + [[1 1 0] + [0 1 0] + [1 0 0]] >>> print(tf.sparse.to_dense(tf.sparse.map_values(tf.multiply, st, st)).numpy()) - [[1, 4, 0], - [0, 16, 0], - [1, 0, 0]] + [[ 1 4 0] + [ 0 16 0] + [ 1 0 0]] >>> print(tf.sparse.to_dense(tf.sparse.map_values(tf.add, st, 5)).numpy()) - [[5, 7, 0], - [0, 9, 0], - [6, 0, 0]] + [[6 7 0] + [0 9 0] + [6 0 0]] Note in particular that even though `tf.add(0, 5) != 0`, implicit zeros will remain unchanged. However, if the sparse tensor contains any explict From a1c859ab601485b04b7a1d5615e55df05d8bbd5b Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Wed, 1 Jul 2020 17:03:37 +0000 Subject: [PATCH 0115/2522] fix pylint errors --- tensorflow/python/types/core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/types/core.py b/tensorflow/python/types/core.py index 0ac9d83983d..f1519d4c6aa 100644 --- a/tensorflow/python/types/core.py +++ b/tensorflow/python/types/core.py @@ -64,5 +64,6 @@ class Value(Tensor): pass -TensorLike = Union[Tensor, int, float, bool, str, complex, tuple, list, np.ndarray] +TensorLike = Union[Tensor, int, float, bool, str, complex, tuple, list, + np.ndarray] tf_export("experimental.TensorLike").export_constant(__name__, "TensorLike") From 31aeac6e3fc13d08ad62dea0da5e9be316a0f0ab Mon Sep 17 00:00:00 2001 From: Trent Lo Date: Tue, 23 Jun 2020 12:32:05 -0700 Subject: [PATCH 0116/2522] [XLA/GPU] Declares that rsqrt is cheap and should be fused. --- .../xla/service/gpu/instruction_fusion.cc | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc index a0580e2ab04..5a67f2510dc 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc @@ -29,12 +29,23 @@ limitations under the License. namespace xla { namespace gpu { +bool ElementIsF32OrF16(const Shape& shape) { + PrimitiveType type = shape.element_type(); + return type == F32 || type == F16; +} + /*static*/ bool GpuInstructionFusion::IsExpensive( const HloInstruction& instruction) { - // We say that floating-point division is cheap on the GPU. - if (instruction.opcode() == HloOpcode::kDivide && - ShapeUtil::ElementIsFloating(instruction.shape())) { - return false; + + // We say that some floating-point math ops are cheap on the GPU. + switch (instruction.opcode()) { + case HloOpcode::kDivide: + case HloOpcode::kRsqrt: + if (ElementIsF32OrF16(instruction.shape())) { + return false; + } + default: + break; } return InstructionFusion::IsExpensive(instruction); } From 0e79a26d9a86329031bf460968bc6e52e88f2d44 Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Wed, 1 Jul 2020 17:40:43 +0000 Subject: [PATCH 0117/2522] fix pylint errors --- tensorflow/python/eager/def_function.py | 7 ++++--- tensorflow/python/eager/function_test.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py index 4bc62ce24e2..855122b166c 100644 --- a/tensorflow/python/eager/def_function.py +++ b/tensorflow/python/eager/def_function.py @@ -1414,9 +1414,10 @@ def function(func=None, experimental_compile: If True, the function is always compiled by [XLA](https://www.tensorflow.org/xla). XLA may be more efficient in some cases (e.g. TPU, XLA_GPU, dense tensor computations). - experimental_follow_type_hints: When True, the function may use type annotations - to optimize the tracing performance. For example, arguments annotated with - `tf.Tensor` will automatically be converted to a Tensor. + experimental_follow_type_hints: When True, the function may use type + annotations to optimize the tracing performance. For example, + arguments annotated with tf.Tensor` will automatically be converted + to a Tensor. Returns: If `func` is not None, returns a callable that will execute the compiled diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 4557eb45393..039620f6836 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -4008,7 +4008,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): trace_count = [0] disabled(1, constant_op.constant(1), "str", x=4.0) disabled(2, constant_op.constant(2), "str2", x=5.0) # Retrace - self.assertEqual(trace_count[0], 2) + self.assertEqual(trace_count[0], 2) # pylint: disable=bad-whitespace def testFollowTypeHintsTraceWithOnlyArgNamed(self): trace_count = [0] From 2a4e26dc4a2a37f5d8d86eee9a7d6176afe5329e Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Wed, 1 Jul 2020 18:11:34 +0000 Subject: [PATCH 0118/2522] Add TODO about missing types in TensorLike --- tensorflow/python/types/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/types/core.py b/tensorflow/python/types/core.py index f1519d4c6aa..d7b670fafdc 100644 --- a/tensorflow/python/types/core.py +++ b/tensorflow/python/types/core.py @@ -64,6 +64,7 @@ class Value(Tensor): pass +# TODO(rahulkamat): Add missing types that are convertible to tensor TensorLike = Union[Tensor, int, float, bool, str, complex, tuple, list, np.ndarray] tf_export("experimental.TensorLike").export_constant(__name__, "TensorLike") From 3d103cdf6082b565ff7359f36ee2e8a503fcfef5 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Wed, 1 Jul 2020 21:19:26 +0300 Subject: [PATCH 0119/2522] fixing linter issues --- tensorflow/python/ops/sparse_ops.py | 15 ++++++++------- tensorflow/python/ops/sparse_ops_test.py | 12 +++++++----- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index 264783d73ed..69f1c510360 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -2750,18 +2750,18 @@ def map_values(op, *args, **kwargs): Examples: - >>> st = tf.sparse.from_dense([[1, 2, 0], - ... [0, 4, 0], - ... [1, 0, 0]]) - >>> print(tf.sparse.to_dense(tf.sparse.map_values(tf.ones_like, st)).numpy()) + >>> s = tf.sparse.from_dense([[1, 2, 0], + ... [0, 4, 0], + ... [1, 0, 0]]) + >>> print(tf.sparse.to_dense(tf.sparse.map_values(tf.ones_like, s)).numpy()) [[1 1 0] [0 1 0] [1 0 0]] - >>> print(tf.sparse.to_dense(tf.sparse.map_values(tf.multiply, st, st)).numpy()) + >>> print(tf.sparse.to_dense(tf.sparse.map_values(tf.multiply, s, s)).numpy()) [[ 1 4 0] [ 0 16 0] [ 1 0 0]] - >>> print(tf.sparse.to_dense(tf.sparse.map_values(tf.add, st, 5)).numpy()) + >>> print(tf.sparse.to_dense(tf.sparse.map_values(tf.add, s, 5)).numpy()) [[6 7 0] [0 9 0] [6 0 0]] @@ -2795,7 +2795,8 @@ def map_values(op, *args, **kwargs): # and the known indices/dense shape. Since we ensure that indices and shape # are identical, we can just use the first one. return sparse_tensor.SparseTensor(sparse_list[0].indices, - op(*inner_args, **inner_kwargs), sparse_list[0].dense_shape) + op(*inner_args, **inner_kwargs), + sparse_list[0].dense_shape) def _assert_sparse_compatible(sparse_tensors): diff --git a/tensorflow/python/ops/sparse_ops_test.py b/tensorflow/python/ops/sparse_ops_test.py index 92ed563e97f..69ccb5ac5fc 100644 --- a/tensorflow/python/ops/sparse_ops_test.py +++ b/tensorflow/python/ops/sparse_ops_test.py @@ -190,9 +190,11 @@ class SparseOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): # helper function to check equality of sparse tensor def assert_sparse_equal(expected, result): - self.assertAllEqual(expected.values, result.values, msg="Values differ") - self.assertAllEqual(expected.indices, result.indices, msg="Indices differ") - self.assertAllEqual(expected.dense_shape, result.dense_shape, msg="Shapes differ") + self.assertAllEqual(expected.values, result.values, msg="Values differ") + self.assertAllEqual(expected.indices, result.indices, + msg="Indices differ") + self.assertAllEqual(expected.dense_shape, result.dense_shape, + msg="Shapes differ") # check for a single sparse argument expected = sparse_ops.from_dense([[0.0, 1.0, 0.0], [2.0, 1.0, 0.0]]) @@ -202,8 +204,8 @@ class SparseOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): # check correct passing of keyword argument, and handling of two sparse # arguments at the same time def mapping(arg1, arg2, kwarg): - self.assertTrue(kwarg == "kwarg") - return arg1 + arg2 + self.assertTrue(kwarg == "kwarg") + return arg1 + arg2 result = sparse_ops.map_values(mapping, sp, sp, kwarg="kwarg") expected = sparse_ops.from_dense([[0.0, 2.0, 0.0], [-4.0, 2.0, 0.0]]) assert_sparse_equal(expected, result) From db92e49669e54d4549cbb09d43a7abf4f617578b Mon Sep 17 00:00:00 2001 From: Abolfazl Shahbazi Date: Wed, 1 Jul 2020 15:30:09 -0700 Subject: [PATCH 0120/2522] Adding OneDNN partials for Ubuntu versions 16.04. 18.04, 20.04 --- ...untu-16.04-devel-onednn-jupyter.Dockerfile | 98 +++++++++++++ .../ubuntu-16.04-devel-onednn.Dockerfile | 84 +++++++++++ .../ubuntu-16.04-onednn-jupyter.Dockerfile | 66 +++++++++ .../onednn/ubuntu-16.04-onednn.Dockerfile | 52 +++++++ ...untu-18.04-devel-onednn-jupyter.Dockerfile | 98 +++++++++++++ .../ubuntu-18.04-devel-onednn.Dockerfile | 84 +++++++++++ .../ubuntu-18.04-onednn-jupyter.Dockerfile | 66 +++++++++ .../onednn/ubuntu-18.04-onednn.Dockerfile | 52 +++++++ ...untu-20.04-devel-onednn-jupyter.Dockerfile | 98 +++++++++++++ .../ubuntu-20.04-devel-onednn.Dockerfile | 84 +++++++++++ .../ubuntu-20.04-onednn-jupyter.Dockerfile | 76 ++++++++++ .../onednn/ubuntu-20.04-onednn.Dockerfile | 62 +++++++++ .../onednn/ubuntu/bazel.partial.Dockerfile | 10 ++ .../onednn/ubuntu/cpu.partial.Dockerfile | 1 + .../onednn/ubuntu/devel.partial.Dockerfile | 33 +++++ .../onednn/ubuntu/jupyter.partial.Dockerfile | 13 ++ .../onednn/ubuntu/python.partial.Dockerfile | 13 ++ .../onednn/ubuntu/python37.partial.Dockerfile | 23 +++ .../ubuntu/test-devel.partial.Dockerfile | 0 .../onednn/ubuntu/version.partial.Dockerfile | 1 + tensorflow/tools/dockerfiles/spec.yml | 131 ++++++++++++++++++ .../tools/dockerfiles/tests/import-onednn.sh | 28 ++++ 22 files changed, 1173 insertions(+) create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-devel-onednn-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-devel-onednn.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-onednn-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-onednn.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/ubuntu/bazel.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/ubuntu/cpu.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/ubuntu/devel.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/ubuntu/jupyter.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/ubuntu/python.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/ubuntu/python37.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/ubuntu/test-devel.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/ubuntu/version.partial.Dockerfile create mode 100755 tensorflow/tools/dockerfiles/tests/import-onednn.sh diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-devel-onednn-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-devel-onednn-jupyter.Dockerfile new file mode 100644 index 00000000000..ffc951f3fc3 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-devel-onednn-jupyter.Dockerfile @@ -0,0 +1,98 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} AS base + +ARG DEBIAN_FRONTEND="noninteractive" + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + curl \ + git \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + sudo \ + unzip \ + zip \ + zlib1g-dev \ + openjdk-8-jdk \ + openjdk-8-jre-headless \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + python3 \ + python3-pip + +RUN python3 -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which python3) /usr/local/bin/python + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-devel-onednn.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-devel-onednn.Dockerfile new file mode 100644 index 00000000000..10ae251d7ae --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-devel-onednn.Dockerfile @@ -0,0 +1,84 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} AS base + +ARG DEBIAN_FRONTEND="noninteractive" + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + curl \ + git \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + sudo \ + unzip \ + zip \ + zlib1g-dev \ + openjdk-8-jdk \ + openjdk-8-jre-headless \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + python3 \ + python3-pip + +RUN python3 -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which python3) /usr/local/bin/python + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-onednn-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-onednn-jupyter.Dockerfile new file mode 100644 index 00000000000..30729f9a6e3 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-onednn-jupyter.Dockerfile @@ -0,0 +1,66 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} as base + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + python3 \ + python3-pip + +RUN python3 -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which python3) /usr/local/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION= +RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-onednn.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-onednn.Dockerfile new file mode 100644 index 00000000000..6a6cdf52a55 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-onednn.Dockerfile @@ -0,0 +1,52 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} as base + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + python3 \ + python3-pip + +RUN python3 -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which python3) /usr/local/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION= +RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn-jupyter.Dockerfile new file mode 100644 index 00000000000..ffc951f3fc3 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn-jupyter.Dockerfile @@ -0,0 +1,98 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} AS base + +ARG DEBIAN_FRONTEND="noninteractive" + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + curl \ + git \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + sudo \ + unzip \ + zip \ + zlib1g-dev \ + openjdk-8-jdk \ + openjdk-8-jre-headless \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + python3 \ + python3-pip + +RUN python3 -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which python3) /usr/local/bin/python + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn.Dockerfile new file mode 100644 index 00000000000..10ae251d7ae --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn.Dockerfile @@ -0,0 +1,84 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} AS base + +ARG DEBIAN_FRONTEND="noninteractive" + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + curl \ + git \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + sudo \ + unzip \ + zip \ + zlib1g-dev \ + openjdk-8-jdk \ + openjdk-8-jre-headless \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + python3 \ + python3-pip + +RUN python3 -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which python3) /usr/local/bin/python + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn-jupyter.Dockerfile new file mode 100644 index 00000000000..30729f9a6e3 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn-jupyter.Dockerfile @@ -0,0 +1,66 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} as base + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + python3 \ + python3-pip + +RUN python3 -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which python3) /usr/local/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION= +RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn.Dockerfile new file mode 100644 index 00000000000..6a6cdf52a55 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn.Dockerfile @@ -0,0 +1,52 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} as base + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + python3 \ + python3-pip + +RUN python3 -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which python3) /usr/local/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION= +RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-jupyter.Dockerfile new file mode 100644 index 00000000000..ffc951f3fc3 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-jupyter.Dockerfile @@ -0,0 +1,98 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} AS base + +ARG DEBIAN_FRONTEND="noninteractive" + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + curl \ + git \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + sudo \ + unzip \ + zip \ + zlib1g-dev \ + openjdk-8-jdk \ + openjdk-8-jre-headless \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + python3 \ + python3-pip + +RUN python3 -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which python3) /usr/local/bin/python + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn.Dockerfile new file mode 100644 index 00000000000..10ae251d7ae --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn.Dockerfile @@ -0,0 +1,84 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} AS base + +ARG DEBIAN_FRONTEND="noninteractive" + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + curl \ + git \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + sudo \ + unzip \ + zip \ + zlib1g-dev \ + openjdk-8-jdk \ + openjdk-8-jre-headless \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + python3 \ + python3-pip + +RUN python3 -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which python3) /usr/local/bin/python + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn-jupyter.Dockerfile new file mode 100644 index 00000000000..2b145259c52 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn-jupyter.Dockerfile @@ -0,0 +1,76 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} as base + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl \ + software-properties-common + +RUN add-apt-repository ppa:deadsnakes/ppa + +RUN apt-get install -y --no-install-recommends --fix-missing \ + ${PYTHON} + +RUN curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3.7 +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/bin/python3 + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION= +RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn.Dockerfile new file mode 100644 index 00000000000..666e0839d39 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn.Dockerfile @@ -0,0 +1,62 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} as base + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl \ + software-properties-common + +RUN add-apt-repository ppa:deadsnakes/ppa + +RUN apt-get install -y --no-install-recommends --fix-missing \ + ${PYTHON} + +RUN curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3.7 +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/bin/python3 + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION= +RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/bazel.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/bazel.partial.Dockerfile new file mode 100644 index 00000000000..2feb75a8185 --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/bazel.partial.Dockerfile @@ -0,0 +1,10 @@ +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/cpu.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/cpu.partial.Dockerfile new file mode 100644 index 00000000000..d01b26e27f6 --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/cpu.partial.Dockerfile @@ -0,0 +1 @@ +FROM ubuntu:${UBUNTU_VERSION} as base diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/devel.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/devel.partial.Dockerfile new file mode 100644 index 00000000000..8466c30cf13 --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/devel.partial.Dockerfile @@ -0,0 +1,33 @@ +FROM ubuntu:${UBUNTU_VERSION} AS base + +ARG DEBIAN_FRONTEND="noninteractive" + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + curl \ + git \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + sudo \ + unzip \ + zip \ + zlib1g-dev \ + openjdk-8-jdk \ + openjdk-8-jre-headless \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/jupyter.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/jupyter.partial.Dockerfile new file mode 100644 index 00000000000..d01a945e5b6 --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/jupyter.partial.Dockerfile @@ -0,0 +1,13 @@ +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/python.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/python.partial.Dockerfile new file mode 100644 index 00000000000..be4c4a08c03 --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/python.partial.Dockerfile @@ -0,0 +1,13 @@ +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + python3 \ + python3-pip + +RUN python3 -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which python3) /usr/local/bin/python diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/python37.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/python37.partial.Dockerfile new file mode 100644 index 00000000000..85e7f51309f --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/python37.partial.Dockerfile @@ -0,0 +1,23 @@ +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl \ + software-properties-common + +RUN add-apt-repository ppa:deadsnakes/ppa + +RUN apt-get install -y --no-install-recommends --fix-missing \ + ${PYTHON} + +RUN curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3.7 +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/bin/python3 diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/test-devel.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/test-devel.partial.Dockerfile new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/version.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/version.partial.Dockerfile new file mode 100644 index 00000000000..4b1dee24baf --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/version.partial.Dockerfile @@ -0,0 +1 @@ +ARG UBUNTU_VERSION=20.04 diff --git a/tensorflow/tools/dockerfiles/spec.yml b/tensorflow/tools/dockerfiles/spec.yml index ea05d77d001..ec20f63f3d8 100644 --- a/tensorflow/tools/dockerfiles/spec.yml +++ b/tensorflow/tools/dockerfiles/spec.yml @@ -38,6 +38,18 @@ releases: versioned: tag_specs: - "{_TAG_PREFIX}{ubuntu}{jupyter}" + - "{_TAG_PREFIX}{ubuntu-1604-onednn}" + - "{_TAG_PREFIX}{ubuntu-1804-onednn}" + - "{_TAG_PREFIX}{ubuntu-2004-onednn}" + - "{_TAG_PREFIX}{ubuntu-1604-onednn}{onednn-jupyter}" + - "{_TAG_PREFIX}{ubuntu-1804-onednn}{onednn-jupyter}" + - "{_TAG_PREFIX}{ubuntu-2004-onednn}{onednn-jupyter}" + - "{_TAG_PREFIX}{ubuntu-1604-devel-onednn}" + - "{_TAG_PREFIX}{ubuntu-1804-devel-onednn}" + - "{_TAG_PREFIX}{ubuntu-2004-devel-onednn}" + - "{_TAG_PREFIX}{ubuntu-1604-devel-onednn}{onednn-jupyter}" + - "{_TAG_PREFIX}{ubuntu-1804-devel-onednn}{onednn-jupyter}" + - "{_TAG_PREFIX}{ubuntu-2004-devel-onednn}{onednn-jupyter}" # Dockerfiles stored in the TF repo; not pushed anywhere dockerfiles: @@ -50,6 +62,18 @@ releases: - "{ubuntu-devel-ppc64le}{jupyter}" - "{ubuntu-horovod}{jupyter}" - "{ubuntu-devel-horovod}{jupyter}" + - "{ubuntu-1604-onednn}" + - "{ubuntu-1804-onednn}" + - "{ubuntu-2004-onednn}" + - "{ubuntu-1604-onednn}{onednn-jupyter}" + - "{ubuntu-1804-onednn}{onednn-jupyter}" + - "{ubuntu-2004-onednn}{onednn-jupyter}" + - "{ubuntu-1604-devel-onednn}" + - "{ubuntu-1804-devel-onednn}" + - "{ubuntu-2004-devel-onednn}" + - "{ubuntu-1604-devel-onednn}{onednn-jupyter}" + - "{ubuntu-1804-devel-onednn}{onednn-jupyter}" + - "{ubuntu-2004-devel-onednn}{onednn-jupyter}" - "{ubuntu-devel-arm64v8}{jupyter}" slice_sets: @@ -60,6 +84,12 @@ slice_sets: partials: - jupyter + onednn-jupyter: + - add_to_name: "" + - add_to_name: "-jupyter" + partials: + - onednn/ubuntu/jupyter + ubuntu: - add_to_name: "" dockerfile_exclusive_name: "cpu" @@ -161,6 +191,107 @@ slice_sets: - CHECKOUT_TF_SRC=1 - CHECKOUT_HOROVOD_SRC=1 + ubuntu-1604-onednn: + - add_to_name: "-16.04-onednn" + dockerfile_exclusive_name: "ubuntu-16.04-onednn" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/cpu + - onednn/ubuntu/python + - tensorflow + - shell + tests: + - import-onednn.sh + args: + - TF_PACKAGE=intel-tensorflow + - UBUNTU_VERSION=16.04 + + ubuntu-1804-onednn: + - add_to_name: "-18.04-onednn" + dockerfile_exclusive_name: "ubuntu-18.04-onednn" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/cpu + - onednn/ubuntu/python + - tensorflow + - shell + tests: + - import-onednn.sh + args: + - TF_PACKAGE=intel-tensorflow + - UBUNTU_VERSION=18.04 + + ubuntu-2004-onednn: + - add_to_name: "-20.04-onednn" + dockerfile_exclusive_name: "ubuntu-20.04-onednn" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/cpu + - onednn/ubuntu/python37 + - tensorflow + - shell + tests: + - import-onednn.sh + args: + - TF_PACKAGE=intel-tensorflow + - UBUNTU_VERSION=20.04 + - PYTHON=python3.7 + + ubuntu-1604-devel-onednn: + - add_to_name: "-16.04-devel-onednn" + dockerfile_exclusive_name: "ubuntu-16.04-devel-onednn" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/devel + - onednn/ubuntu/python + - onednn/ubuntu/bazel + - shell + tests: + - "" + args: + - UBUNTU_VERSION=16.04 + - CHECKOUT_TF_SRC=1 + - TF_BRANCH=master + + ubuntu-1804-devel-onednn: + - add_to_name: "-18.04-devel-onednn" + dockerfile_exclusive_name: "ubuntu-18.04-devel-onednn" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/devel + - onednn/ubuntu/python + - onednn/ubuntu/bazel + - shell + tests: + - "" + args: + - UBUNTU_VERSION=18.04 + - CHECKOUT_TF_SRC=1 + - TF_BRANCH=master + + ubuntu-2004-devel-onednn: + - add_to_name: "-20.04-devel-onednn" + dockerfile_exclusive_name: "ubuntu-20.04-devel-onednn" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/devel + - onednn/ubuntu/python + - onednn/ubuntu/bazel + - shell + tests: + - "" + args: + - UBUNTU_VERSION=20.04 + - PYTHON=python3.7 + - CHECKOUT_TF_SRC=1 + - TF_BRANCH=master + ubuntu-ppc64le: - add_to_name: "-ppc64le" dockerfile_exclusive_name: "cpu-ppc64le" diff --git a/tensorflow/tools/dockerfiles/tests/import-onednn.sh b/tensorflow/tools/dockerfiles/tests/import-onednn.sh new file mode 100755 index 00000000000..b3876c30bc1 --- /dev/null +++ b/tensorflow/tools/dockerfiles/tests/import-onednn.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +#!/bin/bash + +{ # try + echo `python -c 'from tensorflow.python import _pywrap_util_port; print(_pywrap_util_port.IsMklEnabled())'` + echo "PASS: MKL is enabled" +} || { # catch + echo `python -c 'from tensorflow.python import pywrap_tensorflow; print(pywrap_tensorflow.IsMklEnabled())'` + echo "PASS: Old MKL is detected" +} || { # finally + die "FAIL: MKL is not enabled" +} From 618ab4c18524f1b683155f30331172747c924e17 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Wed, 1 Jul 2020 23:51:24 +0000 Subject: [PATCH 0121/2522] rever to old --- tensorflow/c/kernels.cc | 4 +- tensorflow/c/kernels.h | 1 + tensorflow/c/kernels/summary_op.cc | 10 +-- tensorflow/c/kernels/summary_op_test.cc | 13 ++- tensorflow/c/tf_tensor.cc | 104 ++++++++++++++++++++---- tensorflow/c/tf_tensor.h | 5 -- tensorflow/c/tf_tensor_internal.h | 3 - 7 files changed, 96 insertions(+), 44 deletions(-) diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc index fc65895c8d5..505cb40f13c 100644 --- a/tensorflow/c/kernels.cc +++ b/tensorflow/c/kernels.cc @@ -27,9 +27,6 @@ limitations under the License. #include "tensorflow/core/platform/types.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/lib/gtl/array_slice.h" - // This file forms the basis of a stable ABI for third-party kernel // implementations. It is crucial that changes to this file are made cautiously // and with a focus on maintaining both source and binary compatibility. @@ -293,3 +290,4 @@ TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, } return tf_tensor_temp; } + diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h index dd685583a4f..8ed3488988d 100644 --- a/tensorflow/c/kernels.h +++ b/tensorflow/c/kernels.h @@ -202,6 +202,7 @@ TF_CAPI_EXPORT TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, TF_CAPI_EXPORT extern TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, int64_t* dims, int num_dims, TF_Status* status); + #ifdef __cplusplus } /* end extern "C" */ #endif diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index d2220670d74..23fd437af78 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -81,10 +81,6 @@ static void SummaryScalarOp_Compute(void* kernel, TF_OpKernelContext* ctx) { } TF_DeleteTensor(summary_tensor); } - - if (TF_GetCode(status) != TF_OK) { - TF_OpKernelContext_Failure(ctx, status); - } TF_DeleteStatus(status); TF_DeleteTensor(tags); TF_DeleteTensor(values); @@ -126,11 +122,10 @@ void RegisterSummaryScalarOpKernel() { static_cast(tensorflow::DataTypeToEnum::v()), status); CHECK_EQ(TF_OK, TF_GetCode(status)) << "Error while adding type constraint"; - TF_RegisterKernelBuilder("SummaryScalarOp", builder, status); + TF_RegisterKernelBuilder("SummaryScalar", builder, status); CHECK_EQ(TF_OK, TF_GetCode(status)) << "Error while registering Summary Scalar kernel"; } - #if GOOGLE_CUDA { auto* builder = TF_NewKernelBuilder("SummaryScalar", @@ -143,14 +138,13 @@ void RegisterSummaryScalarOpKernel() { << "Error while registering CUDA SummaryScalar kernel"; } #endif - TF_DeleteStatus(status); } // A dummy static variable initialized by a lambda whose side-effect is to // register the bitcast kernel. TF_ATTRIBUTE_UNUSED static bool IsSummaryScalarOpKernelRegistered = []() { - if (SHOULD_REGISTER_OP_KERNEL("SummaryScalarOp")) { + if (SHOULD_REGISTER_OP_KERNEL("SummaryScalar")) { RegisterSummaryScalarOpKernel(); RegisterSummaryScalarOpKernel(); RegisterSummaryScalarOpKernel(); diff --git a/tensorflow/c/kernels/summary_op_test.cc b/tensorflow/c/kernels/summary_op_test.cc index 5cf84453f80..d8dbf622a55 100644 --- a/tensorflow/c/kernels/summary_op_test.cc +++ b/tensorflow/c/kernels/summary_op_test.cc @@ -52,10 +52,13 @@ void TestScalarSummaryOp(Tensor* tags, Tensor* values, string expected_summary, Status status; NodeDef def; def.set_op("SummaryScalar"); + def.set_device(DEVICE_CPU); + AttrValue valuesTypeAttr; SetAttrValue(values->dtype(), &valuesTypeAttr); (*def.mutable_attr())["T"] = valuesTypeAttr; + def.add_input( strings::StrCat("input1: ", DataTypeString(tags->dtype()))); def.add_input( @@ -65,8 +68,6 @@ void TestScalarSummaryOp(Tensor* tags, Tensor* values, string expected_summary, CreateOpKernel(DeviceType(DEVICE_CPU), nullptr, nullptr, def, 1, &status); ASSERT_TRUE(status.ok()) << status.ToString(); - - // Initialize OpKernel parameters OpKernelContext::Params params; DummyDevice dummy_device(nullptr); params.device = &dummy_device; @@ -76,10 +77,8 @@ void TestScalarSummaryOp(Tensor* tags, Tensor* values, string expected_summary, inputs.emplace_back(values); params.inputs = &inputs; OpKernelContext ctx(¶ms, 1); - AllocatorAttributes alloc_attrs; - std::vector output_alloc_attrs({alloc_attrs}); - params.output_attr_array = output_alloc_attrs.data(); kernel->Compute(&ctx); + ASSERT_EQ(expected_code, ctx.status().code()); if (expected_code == error::OK){ Summary summary; @@ -88,8 +87,8 @@ void TestScalarSummaryOp(Tensor* tags, Tensor* values, string expected_summary, } } -TEST(ScalarSummaryOpTest, SimpleFloat) { - int vectorSize = 3; +TEST(ScalarSummaryOpTest, Test) { + int vectorSize = 2; Tensor tags(DT_STRING, {vectorSize}); Tensor values(DT_FLOAT, {vectorSize}); tags.vec()(0) = "tag1"; diff --git a/tensorflow/c/tf_tensor.cc b/tensorflow/c/tf_tensor.cc index 39f0176c0bf..34c91fc23dc 100644 --- a/tensorflow/c/tf_tensor.cc +++ b/tensorflow/c/tf_tensor.cc @@ -28,8 +28,6 @@ limitations under the License. #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/lib/core/coding.h" #include "tensorflow/core/platform/casts.h" -#include -#include using tensorflow::Status; using tensorflow::Tensor; @@ -182,11 +180,6 @@ void TF_TensorBitcastFrom(const TF_Tensor* from, TF_DataType type, Set_TF_Status_from_Status(status, cc_status); } -std::string TF_ShapeDebugString(const TF_Tensor* t){ - return tensorflow::down_cast(t->tensor) - ->ShapeDebugString(); -} - namespace tensorflow { void TensorInterface::Release() { delete this; } @@ -232,10 +225,6 @@ Status TensorInterface::BitcastFrom(const TensorInterface& from, DataType type, return tensor_.BitcastFrom(from.tensor_, type, s); } -std::string TensorInterface::ShapeDebugString() const { - return tensor_.shape().DebugString(); -} - } // namespace tensorflow // -------------------------------------------------------------------------- @@ -267,7 +256,6 @@ static TF_Tensor* EmptyTensor(TF_DataType dtype, namespace tensorflow { // Non-static for testing. - TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src, Status* status) { *status = tensorflow::Status::OK(); if (!src.IsInitialized()) { @@ -295,12 +283,62 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src, Status* status) { std::memcpy(TF_TensorData(t), str.c_str(), str.size()); return t; } + if (src.dtype() != tensorflow::DT_STRING) { + Tensor tensor; + if (!tensor.CopyFrom(src, src.shape())) { + return nullptr; + } + return new TF_Tensor{new tensorflow::TensorInterface(tensor)}; + } + // DT_STRING tensors require a copying since TF_Tensor.buffer expects a flatly + // encoded sequence of strings. - Tensor tensor; - if (!tensor.CopyFrom(src, src.shape())) { + // Compute bytes needed for encoding. + size_t size = 0; + const auto& srcarray = src.flat(); + for (int i = 0; i < srcarray.size(); ++i) { + const string& s = srcarray(i); + // uint64 starting_offset, TF_StringEncode-d string. + size += sizeof(tensorflow::uint64) + TF_StringEncodedSize(s.size()); + } + + // Encode all strings. + char* base = new char[size]; + char* data_start = base + sizeof(tensorflow::uint64) * srcarray.size(); + char* dst = data_start; // Where next string is encoded. + size_t dst_len = size - static_cast(data_start - base); + tensorflow::uint64* offsets = reinterpret_cast(base); + for (int i = 0; i < srcarray.size(); ++i) { + *offsets = (dst - data_start); + offsets++; + const string& s = srcarray(i); + const size_t consumed = TF_StringEncodedSize(s.size()); + StringEncode(s.data(), s.size(), dst); + dst += consumed; + dst_len -= consumed; + } + if (dst != base + size) { + *status = InvalidArgument( + "invalid string tensor encoding (decoded ", (dst - base), + " bytes, but the tensor is encoded in ", size, " bytes"); + delete[] base; return nullptr; } - return new TF_Tensor{new tensorflow::TensorInterface(tensor)}; +// <<<<<<< HEAD +// return new TF_Tensor{new tensorflow::TensorInterface(tensor)}; +// ======= + + auto dims = src.shape().dim_sizes(); + std::vector dimvec(dims.size()); + for (size_t i = 0; i < dims.size(); ++i) { + dimvec[i] = dims[i]; + } + static_assert(sizeof(int64_t) == sizeof(tensorflow::int64), + "64-bit int types should match in size"); + return TF_NewTensor(TF_STRING, + reinterpret_cast(dimvec.data()), + dimvec.size(), base, size, DeleteArray, base); +// >>>>>>> parent of 477470d094... finished test file } Status TF_TensorToTensor(const TF_Tensor* src, Tensor* dst) { @@ -324,14 +362,44 @@ Status TensorInterface::ToTensor(tensorflow::Tensor* dst) const { } return Status::OK(); } - *dst = tensor_; + if (tensor_.dtype() != DT_STRING) { + *dst = tensor_; + return Status::OK(); + } + // TF_STRING tensors require copying since Tensor class expects a sequence of + // string objects. + const tensorflow::int64 num_elements = tensor_.NumElements(); + const char* input = reinterpret_cast(Data()); + const size_t src_size = ByteSize(); + if (static_cast(src_size / sizeof(tensorflow::uint64)) < + num_elements) { + return InvalidArgument( + "Malformed TF_STRING tensor; too short to hold number of elements"); + } + const char* data_start = input + sizeof(tensorflow::uint64) * num_elements; + const char* limit = input + src_size; + + *dst = tensorflow::Tensor(tensor_.dtype(), tensor_.shape()); + auto dstarray = dst->flat(); + for (tensorflow::int64 i = 0; i < num_elements; ++i) { + tensorflow::uint64 offset = + reinterpret_cast(input)[i]; + if (static_cast(offset) >= (limit - data_start)) { + return InvalidArgument("Malformed TF_STRING tensor; element ", i, + " out of range"); + } + size_t len; + const char* p; + const char* srcp = data_start + offset; + Status status = TF_StringDecode_Impl(srcp, limit - srcp, &p, &len); + if (!status.ok()) return status; + dstarray(i).assign(p, len); + } return Status::OK(); } - bool TensorInterface::IsAligned() const { return tensor_.IsAligned(); } } // namespace tensorflow bool TF_TensorIsAligned(const TF_Tensor* t) { return t->tensor->IsAligned(); } - diff --git a/tensorflow/c/tf_tensor.h b/tensorflow/c/tf_tensor.h index f788c0828a8..acdf053e63a 100644 --- a/tensorflow/c/tf_tensor.h +++ b/tensorflow/c/tf_tensor.h @@ -22,9 +22,6 @@ limitations under the License. #include "tensorflow/c/tf_datatype.h" #include "tensorflow/c/tf_status.h" -#include -#include - // Macro to control visibility of exported symbols in the shared library (.so, // .dylib, .dll). // This duplicates the TF_EXPORT macro definition in @@ -154,8 +151,6 @@ TF_CAPI_EXPORT extern void TF_TensorBitcastFrom(const TF_Tensor* from, // Returns bool iff this tensor is aligned. TF_CAPI_EXPORT extern bool TF_TensorIsAligned(const TF_Tensor*); -TF_CAPI_EXPORT extern std::string TF_ShapeDebugString(const TF_Tensor*); - #ifdef __cplusplus } /* end extern "C" */ #endif diff --git a/tensorflow/c/tf_tensor_internal.h b/tensorflow/c/tf_tensor_internal.h index b3f44c71245..7a896dc5d11 100644 --- a/tensorflow/c/tf_tensor_internal.h +++ b/tensorflow/c/tf_tensor_internal.h @@ -24,8 +24,6 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/platform/casts.h" -#include -#include // Internal structures used by the C API. These are likely to change and should // not be depended on. @@ -106,7 +104,6 @@ class TensorInterface : public AbstractTensorInterface { void* Data() const override; bool IsAligned() const override; bool CanMove() const override; - std::string ShapeDebugString() const; Status ToTensor(tensorflow::Tensor* dst) const; Status BitcastFrom(const TensorInterface& from, DataType type, From 9a326299119bb12b177960f9c1a407663a8d7223 Mon Sep 17 00:00:00 2001 From: qhduan Date: Thu, 2 Jul 2020 05:07:19 +0000 Subject: [PATCH 0122/2522] Fix the function name in debugging.md --- tensorflow/python/autograph/g3doc/reference/debugging.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/autograph/g3doc/reference/debugging.md b/tensorflow/python/autograph/g3doc/reference/debugging.md index 2c2a96cec86..fb75846b14b 100644 --- a/tensorflow/python/autograph/g3doc/reference/debugging.md +++ b/tensorflow/python/autograph/g3doc/reference/debugging.md @@ -21,10 +21,10 @@ Note: Python debugging can only be used to step through the code during graph construction time (or tracing time in the case of `tf.function`). To debug TensorFlow execution, use Eager execution. -### Debugging `tf.function`: `tf.config.experimental_execute_functions_eagerly` +### Debugging `tf.function`: `tf.config.experimental_run_functions_eagerly` When using `@tf.function`, you can temporarily toggle graph execution -by using `tf.config.experimental_execute_functions_eagerly`. This will +by using `tf.config.experimental_run_functions_eagerly`. This will effectively run the annotated code eagerly, without transformation. Since AutoGraph has semantics consistent with Eager, it's an effective way to debug the code step-by-step. @@ -58,7 +58,7 @@ f(1) 14 ... ``` -Adding a call to `tf.config.experimental_execute_functions_eagerly` before +Adding a call to `tf.config.experimental_run_functions_eagerly` before executing the function will land the debugger in the original code instead: ``` From d69076f6e6575886055845125003472e18882916 Mon Sep 17 00:00:00 2001 From: qhduan Date: Thu, 2 Jul 2020 05:07:19 +0000 Subject: [PATCH 0123/2522] Fix the function name in debugging.md --- tensorflow/python/autograph/g3doc/reference/debugging.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/autograph/g3doc/reference/debugging.md b/tensorflow/python/autograph/g3doc/reference/debugging.md index 2c2a96cec86..fb75846b14b 100644 --- a/tensorflow/python/autograph/g3doc/reference/debugging.md +++ b/tensorflow/python/autograph/g3doc/reference/debugging.md @@ -21,10 +21,10 @@ Note: Python debugging can only be used to step through the code during graph construction time (or tracing time in the case of `tf.function`). To debug TensorFlow execution, use Eager execution. -### Debugging `tf.function`: `tf.config.experimental_execute_functions_eagerly` +### Debugging `tf.function`: `tf.config.experimental_run_functions_eagerly` When using `@tf.function`, you can temporarily toggle graph execution -by using `tf.config.experimental_execute_functions_eagerly`. This will +by using `tf.config.experimental_run_functions_eagerly`. This will effectively run the annotated code eagerly, without transformation. Since AutoGraph has semantics consistent with Eager, it's an effective way to debug the code step-by-step. @@ -58,7 +58,7 @@ f(1) 14 ... ``` -Adding a call to `tf.config.experimental_execute_functions_eagerly` before +Adding a call to `tf.config.experimental_run_functions_eagerly` before executing the function will land the debugger in the original code instead: ``` From 8af368971e050e1a92f04c08cc2c92effffb4007 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Nilsson?= Date: Thu, 2 Jul 2020 11:02:32 +0200 Subject: [PATCH 0124/2522] TFlu: Fix implicit conversion error in relu op --- tensorflow/lite/micro/kernels/activations.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/micro/kernels/activations.cc b/tensorflow/lite/micro/kernels/activations.cc index 629144a54ce..c3cfc5e8ac3 100644 --- a/tensorflow/lite/micro/kernels/activations.cc +++ b/tensorflow/lite/micro/kernels/activations.cc @@ -37,7 +37,7 @@ inline void ReluQuantized(const TfLiteTensor* input, TfLiteTensor* output, ReluParams params; float act_min = 0.0; float act_max = std::numeric_limits::infinity(); - double real_multiplier = input->params.scale / output->params.scale; + double real_multiplier = static_cast(input->params.scale / output->params.scale); const RuntimeShape input_shape = GetTensorShape(input); const RuntimeShape output_shape = GetTensorShape(output); From e0d147ca99b4719e461547cf057e49163972a858 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Thu, 2 Jul 2020 18:24:51 +0000 Subject: [PATCH 0125/2522] Update tensorflow/python/ops/sparse_ops.py --- tensorflow/python/ops/sparse_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index 69f1c510360..f671d768d35 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -2753,7 +2753,7 @@ def map_values(op, *args, **kwargs): >>> s = tf.sparse.from_dense([[1, 2, 0], ... [0, 4, 0], ... [1, 0, 0]]) - >>> print(tf.sparse.to_dense(tf.sparse.map_values(tf.ones_like, s)).numpy()) + >>> tf.sparse.to_dense(tf.sparse.map_values(tf.ones_like, s)).numpy() [[1 1 0] [0 1 0] [1 0 0]] From 167cea0048bd8154c0e397b3c457bc47f1cd6089 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Thu, 2 Jul 2020 18:24:59 +0000 Subject: [PATCH 0126/2522] Update tensorflow/python/ops/sparse_ops.py --- tensorflow/python/ops/sparse_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index f671d768d35..fb74b8772d4 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -2761,7 +2761,7 @@ def map_values(op, *args, **kwargs): [[ 1 4 0] [ 0 16 0] [ 1 0 0]] - >>> print(tf.sparse.to_dense(tf.sparse.map_values(tf.add, s, 5)).numpy()) + >>> tf.sparse.to_dense(tf.sparse.map_values(tf.add, s, 5)).numpy() [[6 7 0] [0 9 0] [6 0 0]] From e5540f1c89e0432ccfb25d994f5021beab4e3333 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Thu, 2 Jul 2020 18:25:08 +0000 Subject: [PATCH 0127/2522] Update tensorflow/python/ops/sparse_ops.py --- tensorflow/python/ops/sparse_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index fb74b8772d4..00dc4c8df95 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -2757,7 +2757,7 @@ def map_values(op, *args, **kwargs): [[1 1 0] [0 1 0] [1 0 0]] - >>> print(tf.sparse.to_dense(tf.sparse.map_values(tf.multiply, s, s)).numpy()) + >>> tf.sparse.to_dense(tf.sparse.map_values(tf.multiply, s, s)).numpy() [[ 1 4 0] [ 0 16 0] [ 1 0 0]] From 671515abda718d4c221216d1fbb2fc2cb415ddf4 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Thu, 2 Jul 2020 17:22:15 -0400 Subject: [PATCH 0128/2522] Update shape_inference.cc --- tensorflow/core/framework/shape_inference.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc index 0861188ba4e..845d64eaf35 100644 --- a/tensorflow/core/framework/shape_inference.cc +++ b/tensorflow/core/framework/shape_inference.cc @@ -182,8 +182,8 @@ void InferenceContext::PreInputInit( } Status InferenceContext::ExpandOutputs(int new_output_size) { - int outputs_size_ = outputs_.size(); - if (new_output_size < outputs_size_) { + const int outputs_size = outputs_.size(); + if (new_output_size < outputs_size) { return errors::InvalidArgument("Trying to reduce number of outputs of op."); } outputs_.resize(new_output_size, nullptr); From 26ed7143871799b139760a27c2b9a70b57c34427 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Thu, 2 Jul 2020 17:23:32 -0400 Subject: [PATCH 0129/2522] Update shape_inference.cc --- tensorflow/core/framework/shape_inference.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc index 845d64eaf35..556629b48f7 100644 --- a/tensorflow/core/framework/shape_inference.cc +++ b/tensorflow/core/framework/shape_inference.cc @@ -211,8 +211,8 @@ void InferenceContext::PostInputInit( } input_handle_shapes_and_types_ = std::move(input_handle_data); } - int inputs_size_ = inputs_.size(); - if (inputs_size_ != num_inputs_from_node_def) { + const int inputs_size = inputs_.size(); + if (inputs_size != num_inputs_from_node_def) { construction_status_ = errors::InvalidArgument( "Wrong number of inputs passed: ", inputs_.size(), " while ", num_inputs_from_node_def, " expected based on NodeDef"); From 0b501041f648289277e9e04529f7f1a8bb36c03f Mon Sep 17 00:00:00 2001 From: ShengYang1 Date: Fri, 3 Jul 2020 08:53:53 +0800 Subject: [PATCH 0130/2522] Cmp with cast --- tensorflow/core/framework/tensor_testutil.cc | 3 + tensorflow/core/grappler/op_types.cc | 6 ++ tensorflow/core/grappler/op_types.h | 1 + .../core/grappler/optimizers/remapper.cc | 87 +++++++++++++++++++ .../core/grappler/optimizers/remapper_test.cc | 59 +++++++++++++ .../core/grappler/utils/grappler_test.h | 2 +- .../core/kernels/cwise_op_equal_to_1.cc | 2 + tensorflow/core/kernels/cwise_op_greater.cc | 2 + .../core/kernels/cwise_op_greater_equal.cc | 2 + tensorflow/core/kernels/cwise_op_less.cc | 2 + .../core/kernels/cwise_op_less_equal.cc | 2 + .../core/kernels/cwise_op_not_equal_to_1.cc | 2 + tensorflow/core/kernels/cwise_ops.h | 28 +++++- tensorflow/core/kernels/cwise_ops_test.cc | 74 ++++++++-------- tensorflow/core/ops/math_ops.cc | 43 +++++++++ 15 files changed, 278 insertions(+), 37 deletions(-) diff --git a/tensorflow/core/framework/tensor_testutil.cc b/tensorflow/core/framework/tensor_testutil.cc index 313451d6b83..bee7beccb13 100644 --- a/tensorflow/core/framework/tensor_testutil.cc +++ b/tensorflow/core/framework/tensor_testutil.cc @@ -60,6 +60,9 @@ void ExpectClose(const Tensor& x, const Tensor& y, double atol, double rtol) { case DT_HALF: ExpectClose(x, y, atol, rtol); break; + case DT_BFLOAT16: + ExpectClose(x, y, atol, rtol); + break; case DT_FLOAT: ExpectClose(x, y, atol, rtol); break; diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index efd23b6005e..f0207d24063 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -142,6 +142,12 @@ bool IsCollective(const NodeDef& node) { node.op() == "CollectiveBcastRecv"; } +bool IsComparison(const NodeDef& node) { + return node.op() == "Equal" || node.op() == "NotEqual" || + node.op() == "GreaterEqual" || node.op() == "Greater" || + node.op() == "LessEqual" || node.op() == "Less"; +} + bool IsComplex(const NodeDef& node) { return node.op() == "Complex"; } bool IsComplexAbs(const NodeDef& node) { return node.op() == "ComplexAbs"; } diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index 59fc68daba5..c1d738d6714 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -51,6 +51,7 @@ bool IsBroadcastTo(const NodeDef& node); bool IsCast(const NodeDef& node); bool IsCheckNumerics(const NodeDef& node); bool IsCollective(const NodeDef& node); +bool IsComparison(const NodeDef& node); bool IsComplex(const NodeDef& node); bool IsComplexAbs(const NodeDef& node); bool IsConcat(const NodeDef& node); diff --git a/tensorflow/core/grappler/optimizers/remapper.cc b/tensorflow/core/grappler/optimizers/remapper.cc index 44e6174970e..86a855792b2 100644 --- a/tensorflow/core/grappler/optimizers/remapper.cc +++ b/tensorflow/core/grappler/optimizers/remapper.cc @@ -87,6 +87,15 @@ struct FusedBatchNorm { int fused_batch_norm = kMissingIndex; }; +// Comparison op with cast +struct ComparisonWithCast { + ComparisonWithCast() = default; + + int comparison = kMissingIndex; + int cast = kMissingIndex; + string fused_op = "_"; +}; + // FusedBatchNorm[$is_training] with fused side input and/or activation. struct FusedBatchNormEx { FusedBatchNormEx() = default; @@ -914,6 +923,41 @@ bool FindFusedBatchNormEx(const RemapperContext& ctx, int node_index, return false; } +bool FindComparisonWithCast(const RemapperContext& ctx, int node_index, + ComparisonWithCast* matched) { + const auto* node_view = ctx.graph_view.GetNode(node_index); + const auto* node_def = node_view->node(); + + if (!IsCast(*node_def) || HasControlFaninOrFanout(*node_view)) return false; + + if (node_view->NumRegularFanins() != 1) return false; + const auto& regular_fanin_0 = node_view->GetRegularFanin(0); + const auto* comparison = regular_fanin_0.node_view(); + const auto* comparison_node_def = comparison->node(); + if (!IsComparison(*comparison_node_def) || + HasControlFaninOrFanout(*comparison)) + return false; + + DataType comparator_dtype = GetDataTypeFromAttr(*comparison_node_def, "T"); + DataType src_dtype = GetDataTypeFromAttr(*node_def, "SrcT"); + DataType dst_dtype = GetDataTypeFromAttr(*node_def, "DstT"); + + if ((comparator_dtype != DT_FLOAT) && (comparator_dtype != DT_BFLOAT16)) + return false; + if ((comparator_dtype != dst_dtype) || (src_dtype != DT_BOOL)) return false; + + // Check that only one node consumes the 0-th output of a comparison. + if (!HasAtMostOneDataFanoutAtPort0(*comparison) || + IsInPreserveSet(ctx, comparison_node_def)) + return false; + + matched->cast = node_index; + matched->comparison = regular_fanin_0.node_index(); + matched->fused_op = + matched->fused_op + comparison_node_def->op() + "WithCast"; + return true; +} + void CopyConv2DAttributes(const NodeDef& conv2d, NodeDef* fused_conv2d) { DCHECK(IsConv2D(conv2d)) << "Input node must be a Conv2D"; @@ -1365,6 +1409,40 @@ Status AddFusedBatchNormExNode(RemapperContext* ctx, return Status::OK(); } +Status AddComparisonWithCastNode(RemapperContext* ctx, + const ComparisonWithCast& matched, + std::vector* invalidated_nodes, + std::vector* nodes_to_delete) { + const GraphDef* graph = ctx->graph_view.graph(); + const NodeDef& comparison = graph->node(matched.comparison); + const NodeDef& cast = graph->node(matched.cast); + + VLOG(2) << "Fuse " << cast.op() << " with comparison:" + << " cast=" << cast.name() << " invalidated=" + << " comparison=" << comparison.name(); + + // Replace Comparison and Cast with ComparisonWithCast. + NodeDef fused_op; + fused_op.set_op(matched.fused_op); + fused_op.set_name(cast.name()); + fused_op.set_device(comparison.device()); + + fused_op.add_input(comparison.input(0)); + fused_op.add_input(comparison.input(1)); + (*fused_op.mutable_attr())["T"] = comparison.attr().at("T"); + + utils::Mutation* mutation = ctx->graph_view.GetMutationBuilder(); + Status status; + mutation->AddNode(std::move(fused_op), &status); + TF_RETURN_IF_ERROR(status); + TF_RETURN_IF_ERROR(mutation->Apply()); + + (*nodes_to_delete)[matched.comparison] = true; + (*invalidated_nodes)[matched.cast] = true; + + return Status::OK(); +} + Status AddBatchNormNodes(RemapperContext* ctx, const FusedBatchNorm& matched) { const GraphDef* graph = ctx->graph_view.graph(); const NodeDef& fused_node = graph->node(matched.fused_batch_norm); @@ -1829,6 +1907,15 @@ Status Remapper::Optimize(Cluster* cluster, const GrapplerItem& item, TF_RETURN_IF_ERROR(AddBatchNormNodes(&ctx, fused_batch_norm)); continue; } + + // Remap Comparison+Cast into the ComparisonWithCast. + ComparisonWithCast comparison_with_cast; + if (allow_non_differentiable_rewrites && + FindComparisonWithCast(ctx, i, &comparison_with_cast)) { + TF_RETURN_IF_ERROR(AddComparisonWithCastNode( + &ctx, comparison_with_cast, &invalidated_nodes, &nodes_to_delete)); + continue; + } } // Remove invalidated nodes. diff --git a/tensorflow/core/grappler/optimizers/remapper_test.cc b/tensorflow/core/grappler/optimizers/remapper_test.cc index 9d734801916..eac6b291af4 100644 --- a/tensorflow/core/grappler/optimizers/remapper_test.cc +++ b/tensorflow/core/grappler/optimizers/remapper_test.cc @@ -925,5 +925,64 @@ TEST_F(RemapperTest, FuseConv2DWithSqueezeAndBias) { } #endif +#define REGISTER_TEST_ALL_TYPES(TEST) \ + REGISTER_TEST(TEST, DT_FLOAT); \ + REGISTER_TEST(TEST, DT_BFLOAT16); + +#define REGISTER_TEST(CMP, TYPE) \ + TEST_F(RemapperTest, Fuse##CMP##WithCast_##TYPE) { \ + using ::tensorflow::ops::Placeholder; \ + for (bool is_training : {true, false}) { \ + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); \ + const int num_channels = 24; \ + TensorShape channel_shape({num_channels}); \ + TensorShape empty_shape({0}); \ + auto x = Placeholder(s.WithOpName("x"), TYPE, \ + ops::Placeholder::Shape({2, 8, 8, num_channels})); \ + auto y = Placeholder(s.WithOpName("y"), TYPE, \ + ops::Placeholder::Shape({2, 8, 8, num_channels})); \ + float epsilon = 0.1f; \ + auto comparator = ops::CMP(s.WithOpName("cmp_op"), x, y); \ + auto cast = ops::Cast(s.WithOpName("cast"), comparator.z, TYPE); \ + auto fetch = ops::Identity(s.WithOpName("fetch"), cast); \ + auto input1_t = GenerateRandomTensor({2, 8, 8, num_channels}); \ + auto input2_t = GenerateRandomTensor({2, 8, 8, num_channels}); \ + GrapplerItem item; \ + item.fetch = {"fetch"}; \ + item.feed = {{"x", input1_t}, {"y", input2_t}}; \ + TF_ASSERT_OK(s.ToGraphDef(&item.graph)); \ + for (int i = 0; i < item.graph.node_size(); ++i) { \ + item.graph.mutable_node(i)->set_device("/device:CPU:0"); \ + } \ + Remapper optimizer(RewriterConfig::AGGRESSIVE); \ + GraphDef output; \ + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); \ + int found = 0; \ + for (const NodeDef& node : output.node()) { \ + if (node.name() == "cast") { \ + EXPECT_EQ(node.op(), "_" #CMP "WithCast"); \ + ASSERT_EQ(node.input_size(), 2); \ + EXPECT_EQ(node.input(0), "x"); \ + EXPECT_EQ(node.input(1), "y"); \ + found++; \ + } \ + } \ + EXPECT_EQ(found, 1); \ + auto tensors_expected = \ + EvaluateNodes(item.graph, item.fetch, item.feed); \ + ASSERT_EQ(tensors_expected.size(), 1); \ + auto tensors = EvaluateNodes(output, item.fetch, item.feed); \ + ASSERT_EQ(tensors.size(), 1); \ + test::ExpectClose(tensors[0], tensors_expected[0], 1e-2, 1e-2); \ + } \ + } +REGISTER_TEST_ALL_TYPES(GreaterEqual) +REGISTER_TEST_ALL_TYPES(Greater) +REGISTER_TEST_ALL_TYPES(LessEqual) +REGISTER_TEST_ALL_TYPES(Less) +REGISTER_TEST_ALL_TYPES(Equal) +REGISTER_TEST_ALL_TYPES(NotEqual) +#undef REGISTER_TEST + } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/utils/grappler_test.h b/tensorflow/core/grappler/utils/grappler_test.h index 7ac70356f2c..c996c8bbe3a 100644 --- a/tensorflow/core/grappler/utils/grappler_test.h +++ b/tensorflow/core/grappler/utils/grappler_test.h @@ -85,7 +85,7 @@ class GrapplerTest : public ::testing::Test { typedef typename EnumToDataType::Type T; Tensor tensor(DTYPE, shape); for (auto i = 0; i < tensor.NumElements(); i++) - tensor.flat()(i) = i + random::New64() % 10; + tensor.flat()(i) = static_cast(i + random::New64() % 10); return tensor; } diff --git a/tensorflow/core/kernels/cwise_op_equal_to_1.cc b/tensorflow/core/kernels/cwise_op_equal_to_1.cc index 64cd784af73..86da7525685 100644 --- a/tensorflow/core/kernels/cwise_op_equal_to_1.cc +++ b/tensorflow/core/kernels/cwise_op_equal_to_1.cc @@ -19,6 +19,8 @@ namespace tensorflow { REGISTER7(BinaryOp, CPU, "Equal", functor::equal_to, float, Eigen::half, double, uint8, int8, int16, bfloat16); REGISTER3(BinaryOp, CPU, "Equal", functor::equal_to, uint16, uint32, uint64); +REGISTER2(BinaryOp, CPU, "_EqualWithCast", functor::equal_to_with_cast, float, + bfloat16); REGISTER_KERNEL_BUILDER( Name("ApproximateEqual").Device(DEVICE_CPU).TypeConstraint("T"), ApproximateEqualOp); diff --git a/tensorflow/core/kernels/cwise_op_greater.cc b/tensorflow/core/kernels/cwise_op_greater.cc index d70233dc55c..e905f13f6c6 100644 --- a/tensorflow/core/kernels/cwise_op_greater.cc +++ b/tensorflow/core/kernels/cwise_op_greater.cc @@ -18,6 +18,8 @@ limitations under the License. namespace tensorflow { REGISTER9(BinaryOp, CPU, "Greater", functor::greater, float, Eigen::half, double, int32, int64, uint8, int8, int16, bfloat16); +REGISTER2(BinaryOp, CPU, "_GreaterWithCast", functor::greater_with_cast, float, + bfloat16); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER7(BinaryOp, GPU, "Greater", functor::greater, float, Eigen::half, double, int64, uint8, int8, int16); diff --git a/tensorflow/core/kernels/cwise_op_greater_equal.cc b/tensorflow/core/kernels/cwise_op_greater_equal.cc index 7f6b788eb2e..8390035b86b 100644 --- a/tensorflow/core/kernels/cwise_op_greater_equal.cc +++ b/tensorflow/core/kernels/cwise_op_greater_equal.cc @@ -18,6 +18,8 @@ limitations under the License. namespace tensorflow { REGISTER9(BinaryOp, CPU, "GreaterEqual", functor::greater_equal, float, Eigen::half, double, int32, int64, uint8, int8, int16, bfloat16); +REGISTER2(BinaryOp, CPU, "_GreaterEqualWithCast", + functor::greater_equal_with_cast, float, bfloat16); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER7(BinaryOp, GPU, "GreaterEqual", functor::greater_equal, float, Eigen::half, double, int64, uint8, int8, int16); diff --git a/tensorflow/core/kernels/cwise_op_less.cc b/tensorflow/core/kernels/cwise_op_less.cc index 062a029f069..55f165128d8 100644 --- a/tensorflow/core/kernels/cwise_op_less.cc +++ b/tensorflow/core/kernels/cwise_op_less.cc @@ -19,6 +19,8 @@ namespace tensorflow { REGISTER5(BinaryOp, CPU, "Less", functor::less, float, Eigen::half, double, bfloat16, int32); REGISTER4(BinaryOp, CPU, "Less", functor::less, int64, uint8, int8, int16); +REGISTER2(BinaryOp, CPU, "_LessWithCast", functor::less_with_cast, float, + bfloat16); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER7(BinaryOp, GPU, "Less", functor::less, float, Eigen::half, double, diff --git a/tensorflow/core/kernels/cwise_op_less_equal.cc b/tensorflow/core/kernels/cwise_op_less_equal.cc index 43af03878e9..2961742f5f4 100644 --- a/tensorflow/core/kernels/cwise_op_less_equal.cc +++ b/tensorflow/core/kernels/cwise_op_less_equal.cc @@ -20,6 +20,8 @@ REGISTER5(BinaryOp, CPU, "LessEqual", functor::less_equal, float, Eigen::half, bfloat16, double, int32); REGISTER4(BinaryOp, CPU, "LessEqual", functor::less_equal, int64, uint8, int8, int16); +REGISTER2(BinaryOp, CPU, "_LessEqualWithCast", functor::less_equal_with_cast, + float, bfloat16); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER7(BinaryOp, GPU, "LessEqual", functor::less_equal, float, Eigen::half, diff --git a/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc b/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc index 4de69edd21d..68a996c97b6 100644 --- a/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc +++ b/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc @@ -20,6 +20,8 @@ REGISTER7(BinaryOp, CPU, "NotEqual", functor::not_equal_to, float, Eigen::half, double, uint8, int8, int16, bfloat16); REGISTER3(BinaryOp, CPU, "NotEqual", functor::not_equal_to, uint16, uint32, uint64); +REGISTER2(BinaryOp, CPU, "_NotEqualWithCast", functor::not_equal_to_with_cast, + float, bfloat16); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER4(BinaryOp, GPU, "NotEqual", functor::not_equal_to, float, Eigen::half, double, uint8); diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h index 88651d7bfdc..58c2323999b 100644 --- a/tensorflow/core/kernels/cwise_ops.h +++ b/tensorflow/core/kernels/cwise_ops.h @@ -21,10 +21,10 @@ limitations under the License. #include #include -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/bounds_check.h" #include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/framework/tensor_types.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" namespace Eigen { namespace internal { @@ -1141,6 +1141,32 @@ struct equal_to : base, bool> {}; template struct not_equal_to : base, bool> {}; +template +struct less_with_cast : base> {}; + +template +struct less_equal_with_cast : base> {}; + +template +struct greater_with_cast : base> {}; + +template +struct greater_equal_with_cast + : base> {}; + +template +struct equal_to_with_cast : base> {}; + +template +struct not_equal_to_with_cast + : base> {}; + struct logical_and : base {}; struct logical_or : base {}; diff --git a/tensorflow/core/kernels/cwise_ops_test.cc b/tensorflow/core/kernels/cwise_ops_test.cc index bc77a119f0a..4fee16fa759 100644 --- a/tensorflow/core/kernels/cwise_ops_test.cc +++ b/tensorflow/core/kernels/cwise_ops_test.cc @@ -96,62 +96,66 @@ BM_UNARY(gpu, Round, float, DT_FLOAT); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM // data func scalar. -Graph* BinaryScalar(int num, const string& func) { +template +Graph* BinaryScalar(int num, const string& func, DataType dtype) { Graph* g = new Graph(OpRegistry::Global()); - Tensor lhs(DT_FLOAT, TensorShape({64, 64, num / (64 * 64)})); - lhs.flat().setRandom(); - Tensor rhs(DT_FLOAT, TensorShape({})); - rhs.flat().setRandom(); + Tensor lhs(dtype, TensorShape({64, 64, num / (64 * 64)})); + lhs.flat().setRandom(); + Tensor rhs(dtype, TensorShape({})); + rhs.flat().setRandom(); test::graph::Binary(g, func, test::graph::Constant(g, lhs), test::graph::Constant(g, rhs)); return g; } -#define BM_BINARY_SCALAR(DEVICE, FUNC) \ - void BM_##DEVICE##_##FUNC##_scalar(int iters, int num) { \ - const int64 tot = static_cast(iters) * num; \ - testing::UseRealTime(); \ - testing::ItemsProcessed(tot); \ - testing::BytesProcessed(tot * sizeof(float)); \ - test::Benchmark(#DEVICE, BinaryScalar(num, #FUNC)).Run(iters); \ - } \ - BENCHMARK(BM_##DEVICE##_##FUNC##_scalar) \ - ->Arg(1 << 12) /* must >= 4096 */ \ - ->Arg(1 << 13) \ - ->Arg(1 << 14) \ - ->Arg((1 << 15) - (1 << 13)) \ - ->Arg(1 << 15) \ - ->Arg((1 << 15) + (1 << 14)) \ - ->Arg(1 << 16) \ - ->Arg((1 << 17) - (1 << 15)) \ - ->Arg(1 << 17) \ - ->Arg((1 << 17) + (1 << 16)) \ - ->Arg(1 << 18) \ - ->Arg(1 << 19) \ +#define BM_BINARY_SCALAR(DEVICE, FUNC, T, TYPE) \ + void BM_##DEVICE##_##FUNC##_scalar##_##TYPE(int iters, int num) { \ + const int64 tot = static_cast(iters) * num; \ + testing::UseRealTime(); \ + testing::ItemsProcessed(tot); \ + testing::BytesProcessed(tot * sizeof(T)); \ + test::Benchmark(#DEVICE, BinaryScalar(num, #FUNC, TYPE)).Run(iters); \ + } \ + BENCHMARK(BM_##DEVICE##_##FUNC##_scalar##_##TYPE) \ + ->Arg(1 << 12) /* must >= 4096 */ \ + ->Arg(1 << 13) \ + ->Arg(1 << 14) \ + ->Arg((1 << 15) - (1 << 13)) \ + ->Arg(1 << 15) \ + ->Arg((1 << 15) + (1 << 14)) \ + ->Arg(1 << 16) \ + ->Arg((1 << 17) - (1 << 15)) \ + ->Arg(1 << 17) \ + ->Arg((1 << 17) + (1 << 16)) \ + ->Arg(1 << 18) \ + ->Arg(1 << 19) \ ->Arg(1 << 20); -BM_BINARY_SCALAR(cpu, Less); +BM_BINARY_SCALAR(cpu, Less, float, DT_FLOAT); +BM_BINARY_SCALAR(cpu, Less, bfloat16, DT_BFLOAT16); +BM_BINARY_SCALAR(cpu, _LessWithCast, float, DT_FLOAT); +BM_BINARY_SCALAR(cpu, _LessWithCast, bfloat16, DT_BFLOAT16); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -BM_BINARY_SCALAR(gpu, Less); +BM_BINARY_SCALAR(gpu, Less, float, DT_FLOAT); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #ifdef TENSORFLOW_USE_SYCL -BM_BINARY_SCALAR(sycl, Less); +BM_BINARY_SCALAR(sycl, Less, float, DT_FLOAT); #endif // TENSORFLOW_USE_SYCL -BM_BINARY_SCALAR(cpu, Add); +BM_BINARY_SCALAR(cpu, Add, float, DT_FLOAT); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -BM_BINARY_SCALAR(gpu, Add); +BM_BINARY_SCALAR(gpu, Add, float, DT_FLOAT); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #ifdef TENSORFLOW_USE_SYCL -BM_BINARY_SCALAR(sycl, Add); +BM_BINARY_SCALAR(sycl, Add, float, DT_FLOAT); #endif // TENSORFLOW_USE_SYCL -BM_BINARY_SCALAR(cpu, DivNoNan); +BM_BINARY_SCALAR(cpu, DivNoNan, float, DT_FLOAT); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -BM_BINARY_SCALAR(gpu, DivNoNan); +BM_BINARY_SCALAR(gpu, DivNoNan, float, DT_FLOAT); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #ifdef TENSORFLOW_USE_SYCL -BM_BINARY_SCALAR(sycl, DivNoNan); +BM_BINARY_SCALAR(sycl, DivNoNan, float, DT_FLOAT); #endif // TENSORFLOW_USE_SYCL #undef BM_BINARY_SCALAR diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 2a70f420260..2817e0a50eb 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -700,6 +700,23 @@ REGISTER_OP("GreaterEqual").COMPARISON(); #undef COMPARISON +#define COMPARISON_WITH_CAST() \ + Input("x: T") \ + .Input("y: T") \ + .Output("z: T") \ + .Attr("T: {float, bfloat16}") \ + .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn) + +REGISTER_OP("_LessWithCast").COMPARISON_WITH_CAST(); + +REGISTER_OP("_LessEqualWithCast").COMPARISON_WITH_CAST(); + +REGISTER_OP("_GreaterWithCast").COMPARISON_WITH_CAST(); + +REGISTER_OP("_GreaterEqualWithCast").COMPARISON_WITH_CAST(); + +#undef COMPARISON_WITH_CAST + // -------------------------------------------------------------------------- #define EQUALITY_COMPARISON() \ @@ -731,6 +748,32 @@ REGISTER_OP("NotEqual").EQUALITY_COMPARISON(); #undef EQUALITY_COMPARISON +#define EQUALITY_COMPARISON_WITH_CAST() \ + Input("x: T") \ + .Input("y: T") \ + .Output("z: T") \ + .SetIsCommutative() \ + .Attr("T: {bfloat16, float}") \ + .Attr("incompatible_shape_error: bool = true") \ + .SetShapeFn([](InferenceContext* c) { \ + ShapeHandle x = c->input(0); \ + ShapeHandle y = c->input(1); \ + ShapeHandle output; \ + bool incompatible_shape_error; \ + TF_RETURN_IF_ERROR(c->GetAttr("incompatible_shape_error", \ + &incompatible_shape_error)); \ + TF_RETURN_IF_ERROR(BroadcastBinaryOpOutputShapeFnHelper( \ + c, x, y, incompatible_shape_error, &output)); \ + c->set_output(0, output); \ + return Status::OK(); \ + }) + +REGISTER_OP("_EqualWithCast").EQUALITY_COMPARISON_WITH_CAST(); + +REGISTER_OP("_NotEqualWithCast").EQUALITY_COMPARISON_WITH_CAST(); + +#undef EQUALITY_COMPARISON_WITH_CAST + REGISTER_OP("ApproximateEqual") .Input("x: T") .Input("y: T") From 265535e4af39b6e5a726bd84e03f31b9152d48c3 Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Fri, 3 Jul 2020 02:24:44 +0000 Subject: [PATCH 0131/2522] Split kwargs annotation check into args and kwonlyargs --- tensorflow/python/eager/function.py | 9 +++++++-- tensorflow/python/eager/function_test.py | 14 ++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 75260393027..52418a831d1 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -2558,8 +2558,13 @@ class FunctionSpec(object): if self._fullargspec.varkw is not None: varkw_annotation = self._fullargspec.annotations.get( self._fullargspec.varkw) - if varkw_annotation == ops.Tensor: - kwargs = {kw: ops.convert_to_tensor(x) for kw, x in kwargs.items()} + for kw, v in kwargs.items(): + if kw in self._fullargspec.args: + arg_annotation = self._fullargspec.annotations.get(kw) + if arg_annotation == ops.Tensor: + kwargs[kw] = ops.convert_to_tensor(v) + elif varkw_annotation == ops.Tensor: + kwargs[kw] = ops.convert_to_tensor(v) return tuple(args), kwargs diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 039620f6836..cbea1fcd8d6 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -4063,6 +4063,20 @@ class FunctionTest(test.TestCase, parameterized.TestCase): enabled(1, 2, 3, 4, 5, 100, a=1.0, b=2.0, c=3.0) # Retrace - change in *args self.assertEqual(trace_count[0], 3) + def testFollowTypeHintsTraceWithKwOnlyArgs(self): + trace_count = [0] + def func(x: ops.Tensor = 0, y: int = 1, **kwargs: ops.Tensor): + trace_count[0] += 1 + return x + + enabled = def_function.function(func, experimental_follow_type_hints=True) + + enabled(x=1, y=2, z=3) + enabled(x=1, y=3, z=3) # Retrace - change in args + enabled(x=2, y=2, z=4) # No retrace - change in args and **kwargs + enabled(x=2, y=2, z=4, u=5) # Retrace - change in **kwargs + self.assertEqual(trace_count[0], 3) + class MultiDeviceTest(test.TestCase, parameterized.TestCase): @test_util.run_gpu_only From b7c729404bf3a4d6fa39cad21560b65da617cd4d Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Thu, 2 Jul 2020 18:37:45 +0100 Subject: [PATCH 0132/2522] Fix for inference_input(output)_type for 16x8. Change-Id: I5791454da3c0ebb812c31e1ba304b745acc006e3 --- tensorflow/lite/python/lite.py | 6 ++++-- tensorflow/lite/python/lite_v2_test.py | 26 ++++++++++++++++++-------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index 4c6bd362efa..a679cdc72dd 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -202,7 +202,9 @@ class QuantizationMode(object): def is_post_training_integer_quantize(self): """Post training integer quantization.""" return (self.post_training_int8_no_float() or - self.post_training_int8_allow_float()) + self.post_training_int8_allow_float() or + self.post_training_int16x8_no_float() or + self.post_training_int16x8_allow_float()) def training_time_int8_allow_float(self): """Training-time int8 quantize, allow float fallback.""" @@ -556,7 +558,7 @@ class TFLiteConverterBaseV2(TFLiteConverterBase): # We only support integer types for post training integer quantization # as we have statistical information to quantize the input and output. if quant_mode.is_post_training_integer_quantize(): - all_types = default_types + [constants.INT8, constants.QUANTIZED_UINT8] + all_types = default_types + [constants.INT8, constants.INT16, constants.QUANTIZED_UINT8] if self.inference_input_type not in all_types or \ self.inference_output_type not in all_types: all_types_names = ["tf." + t.name for t in all_types] diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py index 3b51991d674..2462ea24ca8 100644 --- a/tensorflow/lite/python/lite_v2_test.py +++ b/tensorflow/lite/python/lite_v2_test.py @@ -256,13 +256,16 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): @parameterized.named_parameters( ('_DefaultFLOAT32InputOutput_UseTargetTypesFlag', lite.constants.FLOAT, - False), ('_DefaultFLOAT32InputOutput', lite.constants.FLOAT, True), - ('_INT8InputOutput', lite.constants.INT8, True), - ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8, True)) + False, False), + ('_DefaultFLOAT32InputOutput', lite.constants.FLOAT, True, False), + ('_INT8InputOutput', lite.constants.INT8, True, False), + ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8, True, False), + ('_INT16InputOutput', lite.constants.INT16, True, True)) @test_util.run_v2_only def testPostTrainingIntegerNoFloatQuantization(self, inference_input_output_type, - use_target_ops_flag): + use_target_ops_flag, + quantization_16x8): func, calibration_gen = self._getCalibrationQuantizeModel() # Convert float model. @@ -276,9 +279,15 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): quantized_converter.optimizations = [lite.Optimize.DEFAULT] quantized_converter.representative_dataset = calibration_gen if use_target_ops_flag: - quantized_converter.target_spec.supported_ops = [ - lite.OpsSet.TFLITE_BUILTINS_INT8 - ] + if quantization_16x8: + quantized_converter.target_spec.supported_ops = [ + lite.OpsSet.\ + EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8 + ] + else: + quantized_converter.target_spec.supported_ops = [ + lite.OpsSet.TFLITE_BUILTINS_INT8 + ] else: quantized_converter.target_spec.supported_types = [lite.constants.INT8] quantized_converter.inference_input_type = inference_input_output_type @@ -393,7 +402,8 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): @parameterized.named_parameters( ('_INT8InputOutput', lite.constants.INT8), - ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8)) + ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8), + ('_INT16InputOutput', lite.constants.INT16)) def testInvalidTrainingTimeQuantization(self, inference_input_output_type): # We currently don't support integer inference_input_type and # inference_output_type flags for training time quantization. From dec97493b3ea7075fc87e5148836ed0ac15c3f07 Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Fri, 3 Jul 2020 17:49:46 +0000 Subject: [PATCH 0133/2522] Update annotation check to handle kwonlyargs and kwargs, Add tests --- tensorflow/python/eager/def_function.py | 2 +- tensorflow/python/eager/function.py | 16 ++-- tensorflow/python/eager/function_test.py | 109 ++++++++++++++++++++++- 3 files changed, 119 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py index 855122b166c..5c65baa0d83 100644 --- a/tensorflow/python/eager/def_function.py +++ b/tensorflow/python/eager/def_function.py @@ -1416,7 +1416,7 @@ def function(func=None, cases (e.g. TPU, XLA_GPU, dense tensor computations). experimental_follow_type_hints: When True, the function may use type annotations to optimize the tracing performance. For example, - arguments annotated with tf.Tensor` will automatically be converted + arguments annotated with `tf.Tensor` will automatically be converted to a Tensor. Returns: diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 52418a831d1..5b6dbd4002c 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -2535,7 +2535,7 @@ class FunctionSpec(object): kwargs = {kw: ops.convert_to_tensor(x) for kw, x in kwargs.items()} return tuple(args), kwargs - def _convert_typed_variables_to_tensors(self, args, kwargs): + def _convert_annotated_args_to_tensors(self, args, kwargs): if self.input_signature is not None: return @@ -2555,10 +2555,14 @@ class FunctionSpec(object): if varargs_annotation == ops.Tensor: args[i] = ops.convert_to_tensor(arg) - if self._fullargspec.varkw is not None: - varkw_annotation = self._fullargspec.annotations.get( - self._fullargspec.varkw) - for kw, v in kwargs.items(): + for kw, v in kwargs.items(): + if kw in self._fullargspec.kwonlyargs: + kwonlyarg_annotation = self._fullargspec.annotations.get(kw) + if kwonlyarg_annotation == ops.Tensor: + kwargs[kw] = ops.convert_to_tensor(v) + elif self._fullargspec.varkw is not None: + varkw_annotation = self._fullargspec.annotations.get( + self._fullargspec.varkw) if kw in self._fullargspec.args: arg_annotation = self._fullargspec.annotations.get(kw) if arg_annotation == ops.Tensor: @@ -2601,7 +2605,7 @@ class FunctionSpec(object): if self._is_pure: args, kwargs = self._convert_variables_to_tensors(args, kwargs) if self._experimental_follow_type_hints: - args, kwargs = self._convert_typed_variables_to_tensors(args, kwargs) + args, kwargs = self._convert_annotated_args_to_tensors(args, kwargs) if self._input_signature is not None: if len(args) > len(self._input_signature): raise TypeError("{} takes {} positional arguments (as specified by the " diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index cbea1fcd8d6..55ec285079f 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -4063,7 +4063,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): enabled(1, 2, 3, 4, 5, 100, a=1.0, b=2.0, c=3.0) # Retrace - change in *args self.assertEqual(trace_count[0], 3) - def testFollowTypeHintsTraceWithKwOnlyArgs(self): + def testFollowTypeHintsTraceWithArgsEquals(self): trace_count = [0] def func(x: ops.Tensor = 0, y: int = 1, **kwargs: ops.Tensor): trace_count[0] += 1 @@ -4077,6 +4077,113 @@ class FunctionTest(test.TestCase, parameterized.TestCase): enabled(x=2, y=2, z=4, u=5) # Retrace - change in **kwargs self.assertEqual(trace_count[0], 3) + def testFollowTypeHintsTraceWithArgsEqualsTypedKwargs(self): + trace_count = [0] + def func(x, y, **kwargs: ops.Tensor): + trace_count[0] += 1 + return x + + enabled = def_function.function(func, experimental_follow_type_hints=True) + + enabled(x=1, y=2, z=3) + enabled(x=1, y=3, z=3) # Retrace + enabled(x=1, y=2, z=4) # No retrace + enabled(x=2, y=2, z=4) # Retrace + enabled(x=2, y=2, z=4, u=5) # Retrace + self.assertEqual(trace_count[0], 4) + + def testFollowTypeHintsTraceWithArgsEqualsTypedArgs(self): + trace_count = [0] + def func(x: ops.Tensor, y: int, **kwargs): + trace_count[0] += 1 + return x + + enabled = def_function.function(func, experimental_follow_type_hints=True) + + enabled(x=1, y=2, z=3) + enabled(x=1, y=3, z=3) # Retrace + enabled(x=1, y=2, z=4) # Retrace + enabled(x=2, y=2, z=3) # No retrace + enabled(x=2, y=2, z=4, u=5) # Retrace + self.assertEqual(trace_count[0], 4) + + def testFollowTypeHintsTraceWithKwOnlyArgsBasic(self): + trace_count = [0] + def func(*, a: ops.Tensor = None, b=1): + trace_count[0] += 1 + return a + + enabled = def_function.function(func, experimental_follow_type_hints=True) + + enabled(a=1, b=2) + enabled(a=2, b=2) # No retrace + enabled(a=1, b=1) # Retrace + self.assertEqual(trace_count[0], 2) + + def testFollowTypeHintsTraceWithArgsKwOnlyArgsKwargsAndTypedArg(self): + trace_count = [0] + def func(arg: ops.Tensor, *args, kwonly, **kwargs): + trace_count[0] += 1 + return arg + + enabled = def_function.function(func, experimental_follow_type_hints=True) + + enabled(1, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) + enabled(100, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) # No retrace + enabled(1000, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) # No retrace + enabled(1, 20, 30, 40, kwonly=5, kwarg1=6, kwarg2=7) # Retrace + enabled(1, 2, 3, 4, kwonly=50, kwarg1=6, kwarg2=7) # Retrace + enabled(1, 2, 3, 4, kwonly=5, kwarg1=60, kwarg2=70) # Retrace + self.assertEqual(trace_count[0], 4) + + def testFollowTypeHintsTraceWithArgsKwOnlyArgsKwargsAndTypedArgs(self): + trace_count = [0] + def func(arg, *args: ops.Tensor, kwonly, **kwargs): + trace_count[0] += 1 + return arg + + enabled = def_function.function(func, experimental_follow_type_hints=True) + + enabled(1, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) + enabled(100, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) # Retrace + enabled(1, 20, 30, 40, kwonly=5, kwarg1=6, kwarg2=7) # No retrace + enabled(1, 200, 300, 400, kwonly=5, kwarg1=6, kwarg2=7) # No retrace + enabled(1, 2, 3, 4, kwonly=50, kwarg1=6, kwarg2=7) # Retrace + enabled(1, 2, 3, 4, kwonly=5, kwarg1=60, kwarg2=70) # Retrace + self.assertEqual(trace_count[0], 4) + + def testFollowTypeHintsTraceWithArgsKwOnlyArgsKwargsAndTypedKwOnlyArg(self): + trace_count = [0] + def func(arg, *args, kwonly: ops.Tensor, **kwargs): + trace_count[0] += 1 + return arg + + enabled = def_function.function(func, experimental_follow_type_hints=True) + + enabled(1, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) + enabled(100, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) # Retrace + enabled(1, 20, 30, 40, kwonly=5, kwarg1=6, kwarg2=7) # Retrace + enabled(1, 2, 3, 4, kwonly=50, kwarg1=6, kwarg2=7) # No retrace + enabled(1, 2, 3, 4, kwonly=500, kwarg1=6, kwarg2=7) # No retrace + enabled(1, 2, 3, 4, kwonly=5, kwarg1=60, kwarg2=70) # Retrace + self.assertEqual(trace_count[0], 4) + + def testFollowTypeHintsTraceWithArgsKwOnlyArgsKwargsAndTypedKwargs(self): + trace_count = [0] + def func(arg, *args, kwonly, **kwargs: ops.Tensor): + trace_count[0] += 1 + return arg + + enabled = def_function.function(func, experimental_follow_type_hints=True) + + enabled(1, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) + enabled(100, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) # Retrace + enabled(1, 20, 30, 40, kwonly=5, kwarg1=6, kwarg2=7) # Retrace + enabled(1, 2, 3, 4, kwonly=50, kwarg1=6, kwarg2=7) # Retrace + enabled(1, 2, 3, 4, kwonly=5, kwarg1=60, kwarg2=70) # No retrace + enabled(1, 2, 3, 4, kwonly=5, kwarg1=600, kwarg2=700) # No retrace + self.assertEqual(trace_count[0], 4) + class MultiDeviceTest(test.TestCase, parameterized.TestCase): @test_util.run_gpu_only From bf661902cf6f8b6e763e7888596699b32bd00627 Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Fri, 3 Jul 2020 18:12:20 +0000 Subject: [PATCH 0134/2522] delete newline --- tensorflow/python/eager/function_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 55ec285079f..a474329a646 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -118,7 +118,6 @@ def _spec_for_value(value): return value - class FunctionTest(test.TestCase, parameterized.TestCase): def setUp(self): From 277cb88cf60115a13d9ede314cbe1ab48a1a19d8 Mon Sep 17 00:00:00 2001 From: "aaa.jq" <895521320@qq.com> Date: Sat, 4 Jul 2020 03:28:21 +0800 Subject: [PATCH 0135/2522] Rename `indices_is_order` to `rows_are_ordered ` --- tensorflow/core/kernels/sparse_fill_empty_rows_op.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/kernels/sparse_fill_empty_rows_op.cc b/tensorflow/core/kernels/sparse_fill_empty_rows_op.cc index 77bae318977..e71257037f1 100644 --- a/tensorflow/core/kernels/sparse_fill_empty_rows_op.cc +++ b/tensorflow/core/kernels/sparse_fill_empty_rows_op.cc @@ -118,7 +118,7 @@ class SparseFillEmptyRowsOp : public OpKernel { return; } - bool indices_is_order = true; + bool rows_are_ordered = true; int64 last_indices_row = 0; std::vector csr_offset(dense_rows, 0); for (int i = 0; i < N; ++i) { @@ -127,7 +127,7 @@ class SparseFillEmptyRowsOp : public OpKernel { errors::InvalidArgument("indices(", i, ", 0) is invalid: ", row, " >= ", dense_rows)); ++csr_offset[row]; - indices_is_order = indices_is_order & (row >= last_indices_row); + rows_are_ordered = rows_are_ordered & (row >= last_indices_row); last_indices_row = row; } bool all_rows_full = true; @@ -151,7 +151,7 @@ class SparseFillEmptyRowsOp : public OpKernel { } } - if (all_rows_full && indices_is_order) { + if (all_rows_full && rows_are_ordered) { context->set_output(kOutputIndicesOutput, indices_t); context->set_output(kOutputValuesOutput, values_t); if (reverse_index_map) { From 77d442ff41d12d6a1f1c8b985c709ef8378dad2d Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Fri, 3 Jul 2020 18:20:33 -0400 Subject: [PATCH 0136/2522] Update shape_inference.cc --- tensorflow/core/framework/shape_inference.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc index 556629b48f7..f1a6ea83e82 100644 --- a/tensorflow/core/framework/shape_inference.cc +++ b/tensorflow/core/framework/shape_inference.cc @@ -720,8 +720,8 @@ Status InferenceContext::MakeShapeFromShapeTensorTreatScalarAsUnknownShape( TF_RETURN_IF_ERROR(WithRankAtMost(input(input_idx), 1, &input_shape)); requested_input_tensor_as_partial_shape_[input_idx] = true; - int input_tensors_as_shapes_size_ = input_tensors_as_shapes_.size(); - if (input_idx < input_tensors_as_shapes_size_ && + const int input_tensors_as_shapes_size = input_tensors_as_shapes_.size(); + if (input_idx < input_tensors_as_shapes_size && input_tensors_as_shapes_[input_idx].IsSet() && RankKnown(input_tensors_as_shapes_[input_idx])) { *out = input_tensors_as_shapes_[input_idx]; @@ -739,8 +739,8 @@ Status InferenceContext::MakeShapeFromShapeTensor(int input_idx, TF_RETURN_IF_ERROR(WithRank(input(input_idx), 1, &input_shape)); requested_input_tensor_as_partial_shape_[input_idx] = true; - int input_tensors_as_shapes_size_ = input_tensors_as_shapes_.size(); - if (input_idx < input_tensors_as_shapes_size_ && + int input_tensors_as_shapes_size = input_tensors_as_shapes_.size(); + if (input_idx < input_tensors_as_shapes_size && input_tensors_as_shapes_[input_idx].IsSet() && RankKnown(input_tensors_as_shapes_[input_idx])) { *out = input_tensors_as_shapes_[input_idx]; From 9ae97e566ddbd7ae34ace98717294e3580f503dd Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Sat, 4 Jul 2020 11:22:37 -0400 Subject: [PATCH 0137/2522] Update shape_inference.cc --- tensorflow/core/framework/shape_inference.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc index f1a6ea83e82..f450de55602 100644 --- a/tensorflow/core/framework/shape_inference.cc +++ b/tensorflow/core/framework/shape_inference.cc @@ -739,7 +739,7 @@ Status InferenceContext::MakeShapeFromShapeTensor(int input_idx, TF_RETURN_IF_ERROR(WithRank(input(input_idx), 1, &input_shape)); requested_input_tensor_as_partial_shape_[input_idx] = true; - int input_tensors_as_shapes_size = input_tensors_as_shapes_.size(); + const int input_tensors_as_shapes_size = input_tensors_as_shapes_.size(); if (input_idx < input_tensors_as_shapes_size && input_tensors_as_shapes_[input_idx].IsSet() && RankKnown(input_tensors_as_shapes_[input_idx])) { @@ -1104,15 +1104,15 @@ Status InferenceContext::AttachContext(const Status& status) { std::vector input_from_tensors_as_shape_str; input_from_tensors_as_shape_str.reserve(inputs_.size()); for (int i = 0, iter_limit = inputs_.size(); i < iter_limit; ++i) { - int input_tensors_size_ = input_tensors_.size(); - int input_tensors_as_shapes_size_ = input_tensors_as_shapes_.size(); + const int input_tensors_size = input_tensors_.size(); + const int input_tensors_as_shapes_size = input_tensors_as_shapes_.size(); if (requested_input_tensor_as_partial_shape_[i] && - i < input_tensors_as_shapes_size_ && + i < input_tensors_as_shapes_size && input_tensors_as_shapes_[i].IsSet() && RankKnown(input_tensors_as_shapes_[i])) { input_from_tensors_as_shape_str.push_back(strings::StrCat( "input[", i, "] = ", DebugString(input_tensors_as_shapes_[i]))); - } else if (requested_input_tensor_[i] && i < input_tensors_size_ && + } else if (requested_input_tensor_[i] && i < input_tensors_size && input_tensors_[i] != nullptr) { input_from_tensors_str.push_back(strings::StrCat( "input[", i, "] = <", From 71be44917ac675ecb951fa3e5be115dae0e5aef8 Mon Sep 17 00:00:00 2001 From: MichelBr Date: Sat, 4 Jul 2020 23:48:44 +0200 Subject: [PATCH 0138/2522] Update output_handler.cc fix incorrectly documented led-color order --- .../micro/examples/hello_world/sparkfun_edge/output_handler.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/micro/examples/hello_world/sparkfun_edge/output_handler.cc b/tensorflow/lite/micro/examples/hello_world/sparkfun_edge/output_handler.cc index c9f23dc2de0..2e727095a5c 100644 --- a/tensorflow/lite/micro/examples/hello_world/sparkfun_edge/output_handler.cc +++ b/tensorflow/lite/micro/examples/hello_world/sparkfun_edge/output_handler.cc @@ -22,7 +22,7 @@ This function uses the device's LEDs to visually indicate the current y value. The y value is in the range -1 <= y <= 1. The LEDs (red, green, blue, and yellow) are physically lined up in the following order: - [ R G B Y ] + [ R B G Y ] The following table represents how we will light the LEDs for different values: From 81a475d6ffa7b3fe19afa29b1ea8ab9d1c92e25a Mon Sep 17 00:00:00 2001 From: ShengYang1 Date: Thu, 2 Jul 2020 08:26:51 +0800 Subject: [PATCH 0139/2522] Pad+Conv fusion --- .../core/common_runtime/mkl_layout_pass.cc | 5 + .../common_runtime/mkl_layout_pass_test.cc | 12 +- tensorflow/core/kernels/mkl_fused_ops_test.cc | 110 +++++++++--------- 3 files changed, 63 insertions(+), 64 deletions(-) diff --git a/tensorflow/core/common_runtime/mkl_layout_pass.cc b/tensorflow/core/common_runtime/mkl_layout_pass.cc index 778d5445cb2..1fcdc7507b4 100644 --- a/tensorflow/core/common_runtime/mkl_layout_pass.cc +++ b/tensorflow/core/common_runtime/mkl_layout_pass.cc @@ -1164,8 +1164,13 @@ class MklLayoutRewritePass : public GraphOptimizationPass { DataType T_m; TF_CHECK_OK(GetNodeAttr(m->def(), "T", &T_m)); +#ifndef ENABLE_INTEL_MKL_BFLOAT16 // Don't try to merge if datatype is not DT_FLOAT if (T_m != DT_FLOAT) return n; +#else + // Don't try to merge if datatype is not DT_FLOAT or DT_BFLOAT16 + if (T_m != DT_FLOAT && T_m != DT_BFLOAT16) return n; +#endif const Node* conv_node; if (m->type_string() == csinfo_.pad) { diff --git a/tensorflow/core/common_runtime/mkl_layout_pass_test.cc b/tensorflow/core/common_runtime/mkl_layout_pass_test.cc index d480c0a49ce..ead37936c44 100644 --- a/tensorflow/core/common_runtime/mkl_layout_pass_test.cc +++ b/tensorflow/core/common_runtime/mkl_layout_pass_test.cc @@ -620,8 +620,7 @@ REGISTER_TEST_FLOAT32(NodeMerge_Conv2DWithBias_ConvBpropInput_FilterFwd); "A:control->DMT/_2:control;B->E:2;D->E:1;DMT/_0->E:3;DMT/_1->E:4;" \ "DMT/_2->E:5;E->Z;Y->Z:1"); \ } -// TODO(nhasabni): Enable bfloat16 test when we enable the operator. -REGISTER_TEST_FLOAT32(NodeMerge_PadWithConv2D_Positive); +REGISTER_TEST_ALL_TYPES(NodeMerge_PadWithConv2D_Positive); #undef REGISTER_TEST // Test if input control edges do not duplicate after merge. @@ -679,8 +678,7 @@ REGISTER_TEST_FLOAT32(NodeMerge_PadWithConv2D_Positive); "DMT/_2:control;B->E:2;D->E:1;DMT/_0->E:3;DMT/_1->E:4;" \ "DMT/_2->E:5;E->Z;Y->Z:1"); \ } -// TODO(nhasabni): Enable bfloat16 test when we enable the operator. -REGISTER_TEST_FLOAT32(Input_ControlEdge_PadWithConv2D_Positive); +REGISTER_TEST_ALL_TYPES(Input_ControlEdge_PadWithConv2D_Positive); #undef REGISTER_TEST // Test if output control edges does not duplicate after merge. @@ -737,8 +735,7 @@ REGISTER_TEST_FLOAT32(Input_ControlEdge_PadWithConv2D_Positive); "DMT/_0->E:3;DMT/_1->E:4;DMT/_2->E:5;E->Z;E:control->A1:control;" \ "Y->Z:1"); \ } -// TODO(nhasabni): Enable bfloat16 test when we enable the operator. -REGISTER_TEST_FLOAT32(Output_ControlEdge_PadWithConv2D_Positive); +REGISTER_TEST_ALL_TYPES(Output_ControlEdge_PadWithConv2D_Positive); #undef REGISTER_TEST // Pad + Conv2D fusion with padding is VALID, @@ -778,8 +775,7 @@ REGISTER_TEST_FLOAT32(Output_ControlEdge_PadWithConv2D_Positive); "DMT/_1:control;A:control->DMT/_2:control;B->E:2;DMT/_0->E:3;"\ "DMT/_1->E:4;DMT/_2->E:5;E->Z;Y->Z:1"); \ } -// TODO(nhasabni): Enable bfloat16 test when we enable the operator. -REGISTER_TEST_FLOAT32(NodeMerge_PadWithConv2D_Common_Input); +REGISTER_TEST_ALL_TYPES(NodeMerge_PadWithConv2D_Common_Input); #undef REGISTER_TEST // Pad + Conv2D with padding is VALID, diff --git a/tensorflow/core/kernels/mkl_fused_ops_test.cc b/tensorflow/core/kernels/mkl_fused_ops_test.cc index edd1201a09c..b1751ed07b6 100644 --- a/tensorflow/core/kernels/mkl_fused_ops_test.cc +++ b/tensorflow/core/kernels/mkl_fused_ops_test.cc @@ -668,14 +668,52 @@ INSTANTIATE_TYPED_TEST_SUITE_P(Test, MklFusedDepthwiseConv2DWithBiasOpTest, MklFusedBiasAddDataTypes); // Testing fusion of pad and convolution - +template class FusedPadConvOpTest : public OpsTestBase { public: - template - void Run(DataType dtype, Tensor& image, Tensor& filter, Tensor& padding, - Tensor& expected, const string data_format) { + void Run(const string data_format) { + DataType dtype = DataTypeToEnum::v(); + const int depth = 1; + const int image_width = 4; + const int image_height = 3; + const int image_batch_count = 1; const int stride = 1; + Tensor image, expected; + if (data_format == "NHWC") { + image = + Tensor(dtype, {image_batch_count, image_height, image_width, depth}); + } else { + image = + Tensor(dtype, {image_batch_count, depth, image_height, image_width}); + } + test::FillValues(&image, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + + const int kFilterSize = 3; + const int kFilterCount = 1; + Tensor filter(dtype, {kFilterSize, kFilterSize, depth, kFilterCount}); + test::FillValues(&filter, {1, 4, 7, 2, 5, 8, 3, 6, 9}); + + const int padding_height = 4; + const int padding_width = 2; + Tensor padding(DT_INT32, {padding_height, padding_width}); + if (data_format == "NHWC") { + test::FillValues(&padding, {0, 0, 3, 4, 1, 2, 0, 0}); + } else { + test::FillValues(&padding, {0, 0, 0, 0, 3, 4, 1, 2}); + } + + if (data_format == "NHWC") { + expected = Tensor(dtype, TensorShape({1, 8, 5, 1})); + } else { + expected = Tensor(dtype, TensorShape({1, 1, 8, 5})); + } + test::FillValues( + &expected, + {0, 0, 0, 0, 0, 24, 42, 60, 33, 12, 105, 150, 183, 95, + 32, 235, 312, 357, 178, 56, 187, 234, 261, 121, 32, 106, 126, 138, + 59, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}); + // Create a fused pad+conv2d node TF_EXPECT_OK(NodeDefBuilder("fused_pad_conv_op", "_MklPadWithConv2D") .Input(FakeInput(dtype)) // Input @@ -705,65 +743,25 @@ class FusedPadConvOpTest : public OpsTestBase { const Tensor& first = *GetOutput(0); const Tensor& second = *GetOutput(2); CommonTestUtilities test_util; - test_util.ConvertAndCompare(dtype, first, second, expected); + test_util.ConvertAndCompareIntegral(dtype, first, second, expected); } }; -TEST_F(FusedPadConvOpTest, PaddingConvTest) { - const int depth = 1; - const int image_width = 4; - const int image_height = 3; - const int image_batch_count = 1; - Tensor image(DT_FLOAT, {image_batch_count, image_height, image_width, depth}); - test::FillValues(&image, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); +TYPED_TEST_CASE_P(FusedPadConvOpTest); - const int kFilterSize = 3; - const int kFilterCount = 1; - Tensor filter(DT_FLOAT, {kFilterSize, kFilterSize, depth, kFilterCount}); - test::FillValues(&filter, {1, 4, 7, 2, 5, 8, 3, 6, 9}); +TYPED_TEST_P(FusedPadConvOpTest, PaddingConvTest) { this->Run("NHWC"); } - const int padding_height = 4; - const int padding_width = 2; - Tensor padding(DT_INT32, {padding_height, padding_width}); - test::FillValues(&padding, {0, 0, 3, 4, 1, 2, 0, 0}); +TYPED_TEST_P(FusedPadConvOpTest, PaddingConvTestNchw) { this->Run("NCHW"); } - Tensor expected(DT_FLOAT, TensorShape({1, 8, 5, 1})); - test::FillValues( - &expected, - {0, 0, 0, 0, 0, 24, 42, 60, 33, 12, 105, 150, 183, 95, - 32, 235, 312, 357, 178, 56, 187, 234, 261, 121, 32, 106, 126, 138, - 59, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}); +REGISTER_TYPED_TEST_CASE_P(FusedPadConvOpTest, PaddingConvTest, + PaddingConvTestNchw); - Run(DT_FLOAT, image, filter, padding, expected, "NHWC"); -} - -TEST_F(FusedPadConvOpTest, PaddingConvTestNchw) { - const int depth = 1; - const int image_width = 4; - const int image_height = 3; - const int image_batch_count = 1; - Tensor image(DT_FLOAT, {image_batch_count, depth, image_height, image_width}); - test::FillValues(&image, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); - - const int kFilterSize = 3; - const int kFilterCount = 1; - Tensor filter(DT_FLOAT, {kFilterSize, kFilterSize, depth, kFilterCount}); - test::FillValues(&filter, {1, 4, 7, 2, 5, 8, 3, 6, 9}); - - const int padding_height = 4; - const int padding_width = 2; - Tensor padding(DT_INT32, {padding_height, padding_width}); - test::FillValues(&padding, {0, 0, 0, 0, 3, 4, 1, 2}); - - Tensor expected(DT_FLOAT, TensorShape({1, 1, 8, 5})); - test::FillValues( - &expected, - {0, 0, 0, 0, 0, 24, 42, 60, 33, 12, 105, 150, 183, 95, - 32, 235, 312, 357, 178, 56, 187, 234, 261, 121, 32, 106, 126, 138, - 59, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}); - - Run(DT_FLOAT, image, filter, padding, expected, "NCHW"); -} +#ifdef ENABLE_INTEL_MKL_BFLOAT16 +using FusedPadConvDataTypes = ::testing::Types; +#else +using FusedPadConvDataTypes = ::testing::Types; +#endif +INSTANTIATE_TYPED_TEST_CASE_P(Test, FusedPadConvOpTest, FusedPadConvDataTypes); class FilterCacheTest : public OpsTestBase { public: From 5cfb7593f2141d5885734104bc2891995532ea18 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Mon, 6 Jul 2020 14:51:56 -0400 Subject: [PATCH 0140/2522] Update bcast.h --- tensorflow/core/util/bcast.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/util/bcast.h b/tensorflow/core/util/bcast.h index 7b969f72475..4b17e1961ad 100644 --- a/tensorflow/core/util/bcast.h +++ b/tensorflow/core/util/bcast.h @@ -139,7 +139,7 @@ BCastList::BCastList(const BCastList::Vec (&x)[N], if (x[i] != x[0]) { all_equal = false; } - int x_i_size = x[i].size(); + const int x_i_size = x[i].size(); if (x_i_size > largest_rank) { largest_rank = x[i].size(); } From aab83bb851e932c322856eccb1a3be6ed728f239 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Mon, 6 Jul 2020 14:52:21 -0400 Subject: [PATCH 0141/2522] Update topological_sort.cc --- tensorflow/core/grappler/utils/topological_sort.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/utils/topological_sort.cc b/tensorflow/core/grappler/utils/topological_sort.cc index 932276edeb8..e45419b22c6 100644 --- a/tensorflow/core/grappler/utils/topological_sort.cc +++ b/tensorflow/core/grappler/utils/topological_sort.cc @@ -81,7 +81,7 @@ Status ComputeTopologicalOrder( int ready_node = (*ready_nodes)[front]; for (int fanout : graph_view.GetFanout(ready_node)) { ++num_ready_inputs[fanout]; - int graph_view_GetFanin_fanout_size = graph_view.GetFanin(fanout).size(); + const int graph_view_GetFanin_fanout_size = graph_view.GetFanin(fanout).size(); if (num_ready_inputs[fanout] == graph_view_GetFanin_fanout_size) { ready_nodes->push_back(fanout); ++back; From f206eedcf843e9176923a24de5aab9577b25d510 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Mon, 6 Jul 2020 20:40:18 +0000 Subject: [PATCH 0142/2522] summary --- tensorflow/c/kernels/summary_op.cc | 31 +++++-- tensorflow/c/kernels/summary_op_test.cc | 11 ++- tensorflow/c/tf_tensor.cc | 103 ++++-------------------- tensorflow/c/tf_tensor.h | 3 + tensorflow/c/tf_tensor_internal.h | 2 + 5 files changed, 52 insertions(+), 98 deletions(-) diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index 23fd437af78..c3d7fa84aaa 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/types.h" +#include static void* SummaryScalarOp_Create(TF_OpKernelConstruction* ctx) { void* ptr; @@ -42,6 +43,16 @@ static void SummaryScalarOp_Delete(void* kernel) { bool IsSameSize(TF_Tensor* tensor1, TF_Tensor* tensor2); static tensorflow::string SingleTag(TF_Tensor* tags); +template +float get_float_value(T* element){ + return static_cast(*element); +} + +template<> +float get_float_value(Eigen::half* element){ + return Eigen::half_impl::half_to_float(*element); +} + template static void SummaryScalarOp_Compute(void* kernel, TF_OpKernelContext* ctx) { TF_Tensor* tags; @@ -68,19 +79,25 @@ static void SummaryScalarOp_Compute(void* kernel, TF_OpKernelContext* ctx) { auto values_array = static_cast(TF_TensorData(values)); // Copy tags and values into summary protobuf for (int i = 0; i < TF_TensorElementCount(tags); ++i) { - tensorflow::Summary::Value* v = s.add_value(); - v->set_tag(tags_array[i].data(), tags_array[i].size()); - v->set_simple_value(float(values_array[i])); + tensorflow::Summary::Value* v = s.add_value(); + const tensorflow::tstring& Ttags_i = tags_array[i]; + v->set_tag(Ttags_i.data(), Ttags_i.size()); + v->set_simple_value(get_float_value(&values_array[i])); } - TF_Tensor* summary_tensor = TF_AllocateOutput(ctx, 0, + TF_Tensor* summary_tensor = TF_AllocateOutput(ctx, 0 TF_ExpectedOutputDataType(ctx, 0), nullptr, 0, - sizeof(TF_TString), status); + sizeof(tensorflow::tstring), status); if (TF_GetCode(status) == TF_OK) { - SerializeToTString(s, static_cast - (TF_TensorData(summary_tensor))); + tensorflow::tstring summary_tstring; + SerializeToTString(s, &summary_tstring); + *(TF_TensorData(summary_tensor)) = &summary_tstring; + TF_SetOutput(ctx, 0, summary_tensor, status); } TF_DeleteTensor(summary_tensor); } + if (TF_GetCode(status) != TF_OK) { + TF_OpKernelContext_Failure(ctx, status); + } TF_DeleteStatus(status); TF_DeleteTensor(tags); TF_DeleteTensor(values); diff --git a/tensorflow/c/kernels/summary_op_test.cc b/tensorflow/c/kernels/summary_op_test.cc index d8dbf622a55..7438693b430 100644 --- a/tensorflow/c/kernels/summary_op_test.cc +++ b/tensorflow/c/kernels/summary_op_test.cc @@ -72,6 +72,8 @@ void TestScalarSummaryOp(Tensor* tags, Tensor* values, string expected_summary, DummyDevice dummy_device(nullptr); params.device = &dummy_device; params.op_kernel = kernel.get(); + AllocatorAttributes alloc_attrs; + params.output_attr_array = &alloc_attrs; gtl::InlinedVector inputs; inputs.emplace_back(tags); inputs.emplace_back(values); @@ -84,11 +86,12 @@ void TestScalarSummaryOp(Tensor* tags, Tensor* values, string expected_summary, Summary summary; ParseProtoUnlimited(&summary, ctx.mutable_output(0)->scalar()()); EXPECT_SummaryMatches(summary, expected_summary); + } } TEST(ScalarSummaryOpTest, Test) { - int vectorSize = 2; + int vectorSize = 3; Tensor tags(DT_STRING, {vectorSize}); Tensor values(DT_FLOAT, {vectorSize}); tags.vec()(0) = "tag1"; @@ -126,9 +129,9 @@ TEST(ScalarSummaryOpTest, SimpleHalf) { tags.vec()(0) = "tag1"; tags.vec()(1) = "tag2"; tags.vec()(2) = "tag3"; - values.vec()(0) = static_cast(1.0); - values.vec()(1) = static_cast(-2.0); - values.vec()(2) = static_cast(10000.0); + values.vec()(0) = Eigen::half(1.0); + values.vec()(1) = Eigen::half(-2.0); + values.vec()(2) = Eigen::half(10000.0); TestScalarSummaryOp(&tags, &values, R"( value { tag: 'tag1' simple_value: 1.0 } value { tag: 'tag2' simple_value: -2.0} diff --git a/tensorflow/c/tf_tensor.cc b/tensorflow/c/tf_tensor.cc index 34c91fc23dc..5cfd495933c 100644 --- a/tensorflow/c/tf_tensor.cc +++ b/tensorflow/c/tf_tensor.cc @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/lib/core/coding.h" #include "tensorflow/core/platform/casts.h" +#include using tensorflow::Status; using tensorflow::Tensor; @@ -180,6 +181,11 @@ void TF_TensorBitcastFrom(const TF_Tensor* from, TF_DataType type, Set_TF_Status_from_Status(status, cc_status); } +std::string TF_ShapeDebugString(const TF_Tensor* t){ + return tensorflow::down_cast(t->tensor) + ->ShapeDebugString(); +} + namespace tensorflow { void TensorInterface::Release() { delete this; } @@ -225,6 +231,10 @@ Status TensorInterface::BitcastFrom(const TensorInterface& from, DataType type, return tensor_.BitcastFrom(from.tensor_, type, s); } +std::string TensorInterface::ShapeDebugString() const { + return tensor_.shape().DebugString(); +} + } // namespace tensorflow // -------------------------------------------------------------------------- @@ -283,62 +293,11 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src, Status* status) { std::memcpy(TF_TensorData(t), str.c_str(), str.size()); return t; } - if (src.dtype() != tensorflow::DT_STRING) { - Tensor tensor; - if (!tensor.CopyFrom(src, src.shape())) { - return nullptr; - } - return new TF_Tensor{new tensorflow::TensorInterface(tensor)}; - } - // DT_STRING tensors require a copying since TF_Tensor.buffer expects a flatly - // encoded sequence of strings. - - // Compute bytes needed for encoding. - size_t size = 0; - const auto& srcarray = src.flat(); - for (int i = 0; i < srcarray.size(); ++i) { - const string& s = srcarray(i); - // uint64 starting_offset, TF_StringEncode-d string. - size += sizeof(tensorflow::uint64) + TF_StringEncodedSize(s.size()); - } - - // Encode all strings. - char* base = new char[size]; - char* data_start = base + sizeof(tensorflow::uint64) * srcarray.size(); - char* dst = data_start; // Where next string is encoded. - size_t dst_len = size - static_cast(data_start - base); - tensorflow::uint64* offsets = reinterpret_cast(base); - for (int i = 0; i < srcarray.size(); ++i) { - *offsets = (dst - data_start); - offsets++; - const string& s = srcarray(i); - const size_t consumed = TF_StringEncodedSize(s.size()); - StringEncode(s.data(), s.size(), dst); - dst += consumed; - dst_len -= consumed; - } - if (dst != base + size) { - *status = InvalidArgument( - "invalid string tensor encoding (decoded ", (dst - base), - " bytes, but the tensor is encoded in ", size, " bytes"); - delete[] base; + Tensor tensor; + if (!tensor.CopyFrom(src, src.shape())) { return nullptr; } -// <<<<<<< HEAD -// return new TF_Tensor{new tensorflow::TensorInterface(tensor)}; -// ======= - - auto dims = src.shape().dim_sizes(); - std::vector dimvec(dims.size()); - for (size_t i = 0; i < dims.size(); ++i) { - dimvec[i] = dims[i]; - } - static_assert(sizeof(int64_t) == sizeof(tensorflow::int64), - "64-bit int types should match in size"); - return TF_NewTensor(TF_STRING, - reinterpret_cast(dimvec.data()), - dimvec.size(), base, size, DeleteArray, base); -// >>>>>>> parent of 477470d094... finished test file + return new TF_Tensor{new tensorflow::TensorInterface(tensor)}; } Status TF_TensorToTensor(const TF_Tensor* src, Tensor* dst) { @@ -362,39 +321,7 @@ Status TensorInterface::ToTensor(tensorflow::Tensor* dst) const { } return Status::OK(); } - if (tensor_.dtype() != DT_STRING) { - *dst = tensor_; - return Status::OK(); - } - // TF_STRING tensors require copying since Tensor class expects a sequence of - // string objects. - const tensorflow::int64 num_elements = tensor_.NumElements(); - const char* input = reinterpret_cast(Data()); - const size_t src_size = ByteSize(); - if (static_cast(src_size / sizeof(tensorflow::uint64)) < - num_elements) { - return InvalidArgument( - "Malformed TF_STRING tensor; too short to hold number of elements"); - } - const char* data_start = input + sizeof(tensorflow::uint64) * num_elements; - const char* limit = input + src_size; - - *dst = tensorflow::Tensor(tensor_.dtype(), tensor_.shape()); - auto dstarray = dst->flat(); - for (tensorflow::int64 i = 0; i < num_elements; ++i) { - tensorflow::uint64 offset = - reinterpret_cast(input)[i]; - if (static_cast(offset) >= (limit - data_start)) { - return InvalidArgument("Malformed TF_STRING tensor; element ", i, - " out of range"); - } - size_t len; - const char* p; - const char* srcp = data_start + offset; - Status status = TF_StringDecode_Impl(srcp, limit - srcp, &p, &len); - if (!status.ok()) return status; - dstarray(i).assign(p, len); - } + *dst = tensor_; return Status::OK(); } @@ -403,3 +330,5 @@ bool TensorInterface::IsAligned() const { return tensor_.IsAligned(); } } // namespace tensorflow bool TF_TensorIsAligned(const TF_Tensor* t) { return t->tensor->IsAligned(); } + + diff --git a/tensorflow/c/tf_tensor.h b/tensorflow/c/tf_tensor.h index acdf053e63a..e4953b53e43 100644 --- a/tensorflow/c/tf_tensor.h +++ b/tensorflow/c/tf_tensor.h @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/c/tf_datatype.h" #include "tensorflow/c/tf_status.h" +#include // Macro to control visibility of exported symbols in the shared library (.so, // .dylib, .dll). @@ -151,6 +152,8 @@ TF_CAPI_EXPORT extern void TF_TensorBitcastFrom(const TF_Tensor* from, // Returns bool iff this tensor is aligned. TF_CAPI_EXPORT extern bool TF_TensorIsAligned(const TF_Tensor*); +TF_CAPI_EXPORT extern std::string TF_ShapeDebugString(const TF_Tensor*); + #ifdef __cplusplus } /* end extern "C" */ #endif diff --git a/tensorflow/c/tf_tensor_internal.h b/tensorflow/c/tf_tensor_internal.h index 7a896dc5d11..036559da838 100644 --- a/tensorflow/c/tf_tensor_internal.h +++ b/tensorflow/c/tf_tensor_internal.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_C_TF_TENSOR_INTERNAL_H_ #include +#include #include "tensorflow/c/tensor_interface.h" #include "tensorflow/c/tf_datatype.h" @@ -104,6 +105,7 @@ class TensorInterface : public AbstractTensorInterface { void* Data() const override; bool IsAligned() const override; bool CanMove() const override; + std::string ShapeDebugString() const; Status ToTensor(tensorflow::Tensor* dst) const; Status BitcastFrom(const TensorInterface& from, DataType type, From a0c18345d6629baf476bbf900fc8e8295f5040c0 Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Mon, 6 Jul 2020 21:02:56 +0000 Subject: [PATCH 0143/2522] Add 2 spaces before inline comments --- tensorflow/python/eager/function_test.py | 98 ++++++++++++------------ 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index a474329a646..9769cd50a9c 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -3940,15 +3940,15 @@ class FunctionTest(test.TestCase, parameterized.TestCase): enabled = def_function.function(func, experimental_follow_type_hints=True) disabled = def_function.function(func, experimental_follow_type_hints=False) - enabled(1) # Initial call gets traced + enabled(1) # Initial call gets traced enabled(2) enabled(3) self.assertEqual(trace_count[0], 1) trace_count = [0] disabled(1) - disabled(2) # Retrace - disabled(3) # Retrace + disabled(2) # Retrace + disabled(3) # Retrace self.assertEqual(trace_count[0], 3) def testFollowTypeHintsTraceWithArgs(self): @@ -3969,7 +3969,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): trace_count = [0] disabled(args) - disabled(args2) # Retrace + disabled(args2) # Retrace self.assertEqual(trace_count[0], 2) def testFollowTypeHintsTraceWithKwargs(self): @@ -3987,7 +3987,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): trace_count = [0] disabled(1, x=1, y=1.0, z="one") - disabled(2, x=2, y=2.0, z="two") # Retrace + disabled(2, x=2, y=2.0, z="two") # Retrace self.assertEqual(trace_count[0], 2) def testFollowTypeHintsTraceWithMultipleInputTypes(self): @@ -4006,8 +4006,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase): trace_count = [0] disabled(1, constant_op.constant(1), "str", x=4.0) - disabled(2, constant_op.constant(2), "str2", x=5.0) # Retrace - self.assertEqual(trace_count[0], 2) # pylint: disable=bad-whitespace + disabled(2, constant_op.constant(2), "str2", x=5.0) # Retrace + self.assertEqual(trace_count[0], 2) def testFollowTypeHintsTraceWithOnlyArgNamed(self): trace_count = [0] @@ -4018,7 +4018,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): enabled = def_function.function(func, experimental_follow_type_hints=True) enabled(1, 3, x=4.0, y="str") - enabled(2, 4, x=4.0, y="str") # Retrace + enabled(2, 4, x=4.0, y="str") # Retrace self.assertEqual(trace_count[0], 2) def testFollowTypeHintsTraceWithNotAllNamed(self): @@ -4030,9 +4030,9 @@ class FunctionTest(test.TestCase, parameterized.TestCase): enabled = def_function.function(func, experimental_follow_type_hints=True) enabled(1, 2, 3) - enabled(1, 20, 3) # No retrace - change in ops.Tensor typed arg - enabled(2, 2, 3) # Retrace - change in untyped arg - enabled(2, 2, 4) # Retrace - change in typed arg + enabled(1, 20, 3) # No retrace - change in ops.Tensor typed arg + enabled(2, 2, 3) # Retrace - change in untyped arg + enabled(2, 2, 4) # Retrace - change in typed arg self.assertEqual(trace_count[0], 3) def testFollowTypeHintsTraceWithOnlyArgsNamed(self): @@ -4044,8 +4044,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase): enabled = def_function.function(func, experimental_follow_type_hints=True) enabled(1, 20, 3, 4, 5, 6) - enabled(1, 20, 3, 4, 5, 60) # No retrace - change in *args - enabled(1, 30, 7, 8, 9, 10) # Retrace - change in args + enabled(1, 20, 3, 4, 5, 60) # No retrace - change in *args + enabled(1, 30, 7, 8, 9, 10) # Retrace - change in args self.assertEqual(trace_count[0], 2) def testFollowTypeHintsTraceWithOnlyKwargsNamed(self): @@ -4057,9 +4057,9 @@ class FunctionTest(test.TestCase, parameterized.TestCase): enabled = def_function.function(func, experimental_follow_type_hints=True) enabled(1, 2, 3, 4, 5, 6, a=1.0, b=2.0, c=3.0) - enabled(1, 2, 3, 4, 5, 6, a=1.5, b=2.5, c=3.5) # No retrace - change in **kwargs - enabled(100, 2, 3, 4, 5, 6, a=1.0, b=2.0, c=3.0) # Retrace - change in args - enabled(1, 2, 3, 4, 5, 100, a=1.0, b=2.0, c=3.0) # Retrace - change in *args + enabled(1, 2, 3, 4, 5, 6, a=1.5, b=2.5, c=3.5) # No retrace - change in **kwargs + enabled(100, 2, 3, 4, 5, 6, a=1.0, b=2.0, c=3.0) # Retrace - change in args + enabled(1, 2, 3, 4, 5, 100, a=1.0, b=2.0, c=3.0) # Retrace - change in *args self.assertEqual(trace_count[0], 3) def testFollowTypeHintsTraceWithArgsEquals(self): @@ -4071,9 +4071,9 @@ class FunctionTest(test.TestCase, parameterized.TestCase): enabled = def_function.function(func, experimental_follow_type_hints=True) enabled(x=1, y=2, z=3) - enabled(x=1, y=3, z=3) # Retrace - change in args - enabled(x=2, y=2, z=4) # No retrace - change in args and **kwargs - enabled(x=2, y=2, z=4, u=5) # Retrace - change in **kwargs + enabled(x=1, y=3, z=3) # Retrace - change in args + enabled(x=2, y=2, z=4) # No retrace - change in args and **kwargs + enabled(x=2, y=2, z=4, u=5) # Retrace - change in **kwargs self.assertEqual(trace_count[0], 3) def testFollowTypeHintsTraceWithArgsEqualsTypedKwargs(self): @@ -4085,10 +4085,10 @@ class FunctionTest(test.TestCase, parameterized.TestCase): enabled = def_function.function(func, experimental_follow_type_hints=True) enabled(x=1, y=2, z=3) - enabled(x=1, y=3, z=3) # Retrace - enabled(x=1, y=2, z=4) # No retrace - enabled(x=2, y=2, z=4) # Retrace - enabled(x=2, y=2, z=4, u=5) # Retrace + enabled(x=1, y=3, z=3) # Retrace + enabled(x=1, y=2, z=4) # No retrace + enabled(x=2, y=2, z=4) # Retrace + enabled(x=2, y=2, z=4, u=5) # Retrace self.assertEqual(trace_count[0], 4) def testFollowTypeHintsTraceWithArgsEqualsTypedArgs(self): @@ -4100,10 +4100,10 @@ class FunctionTest(test.TestCase, parameterized.TestCase): enabled = def_function.function(func, experimental_follow_type_hints=True) enabled(x=1, y=2, z=3) - enabled(x=1, y=3, z=3) # Retrace - enabled(x=1, y=2, z=4) # Retrace - enabled(x=2, y=2, z=3) # No retrace - enabled(x=2, y=2, z=4, u=5) # Retrace + enabled(x=1, y=3, z=3) # Retrace + enabled(x=1, y=2, z=4) # Retrace + enabled(x=2, y=2, z=3) # No retrace + enabled(x=2, y=2, z=4, u=5) # Retrace self.assertEqual(trace_count[0], 4) def testFollowTypeHintsTraceWithKwOnlyArgsBasic(self): @@ -4115,8 +4115,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase): enabled = def_function.function(func, experimental_follow_type_hints=True) enabled(a=1, b=2) - enabled(a=2, b=2) # No retrace - enabled(a=1, b=1) # Retrace + enabled(a=2, b=2) # No retrace + enabled(a=1, b=1) # Retrace self.assertEqual(trace_count[0], 2) def testFollowTypeHintsTraceWithArgsKwOnlyArgsKwargsAndTypedArg(self): @@ -4128,11 +4128,11 @@ class FunctionTest(test.TestCase, parameterized.TestCase): enabled = def_function.function(func, experimental_follow_type_hints=True) enabled(1, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) - enabled(100, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) # No retrace - enabled(1000, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) # No retrace - enabled(1, 20, 30, 40, kwonly=5, kwarg1=6, kwarg2=7) # Retrace - enabled(1, 2, 3, 4, kwonly=50, kwarg1=6, kwarg2=7) # Retrace - enabled(1, 2, 3, 4, kwonly=5, kwarg1=60, kwarg2=70) # Retrace + enabled(100, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) # No retrace + enabled(1000, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) # No retrace + enabled(1, 20, 30, 40, kwonly=5, kwarg1=6, kwarg2=7) # Retrace + enabled(1, 2, 3, 4, kwonly=50, kwarg1=6, kwarg2=7) # Retrace + enabled(1, 2, 3, 4, kwonly=5, kwarg1=60, kwarg2=70) # Retrace self.assertEqual(trace_count[0], 4) def testFollowTypeHintsTraceWithArgsKwOnlyArgsKwargsAndTypedArgs(self): @@ -4144,11 +4144,11 @@ class FunctionTest(test.TestCase, parameterized.TestCase): enabled = def_function.function(func, experimental_follow_type_hints=True) enabled(1, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) - enabled(100, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) # Retrace - enabled(1, 20, 30, 40, kwonly=5, kwarg1=6, kwarg2=7) # No retrace - enabled(1, 200, 300, 400, kwonly=5, kwarg1=6, kwarg2=7) # No retrace - enabled(1, 2, 3, 4, kwonly=50, kwarg1=6, kwarg2=7) # Retrace - enabled(1, 2, 3, 4, kwonly=5, kwarg1=60, kwarg2=70) # Retrace + enabled(100, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) # Retrace + enabled(1, 20, 30, 40, kwonly=5, kwarg1=6, kwarg2=7) # No retrace + enabled(1, 200, 300, 400, kwonly=5, kwarg1=6, kwarg2=7) # No retrace + enabled(1, 2, 3, 4, kwonly=50, kwarg1=6, kwarg2=7) # Retrace + enabled(1, 2, 3, 4, kwonly=5, kwarg1=60, kwarg2=70) # Retrace self.assertEqual(trace_count[0], 4) def testFollowTypeHintsTraceWithArgsKwOnlyArgsKwargsAndTypedKwOnlyArg(self): @@ -4160,11 +4160,11 @@ class FunctionTest(test.TestCase, parameterized.TestCase): enabled = def_function.function(func, experimental_follow_type_hints=True) enabled(1, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) - enabled(100, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) # Retrace - enabled(1, 20, 30, 40, kwonly=5, kwarg1=6, kwarg2=7) # Retrace - enabled(1, 2, 3, 4, kwonly=50, kwarg1=6, kwarg2=7) # No retrace - enabled(1, 2, 3, 4, kwonly=500, kwarg1=6, kwarg2=7) # No retrace - enabled(1, 2, 3, 4, kwonly=5, kwarg1=60, kwarg2=70) # Retrace + enabled(100, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) # Retrace + enabled(1, 20, 30, 40, kwonly=5, kwarg1=6, kwarg2=7) # Retrace + enabled(1, 2, 3, 4, kwonly=50, kwarg1=6, kwarg2=7) # No retrace + enabled(1, 2, 3, 4, kwonly=500, kwarg1=6, kwarg2=7) # No retrace + enabled(1, 2, 3, 4, kwonly=5, kwarg1=60, kwarg2=70) # Retrace self.assertEqual(trace_count[0], 4) def testFollowTypeHintsTraceWithArgsKwOnlyArgsKwargsAndTypedKwargs(self): @@ -4176,11 +4176,11 @@ class FunctionTest(test.TestCase, parameterized.TestCase): enabled = def_function.function(func, experimental_follow_type_hints=True) enabled(1, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) - enabled(100, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) # Retrace - enabled(1, 20, 30, 40, kwonly=5, kwarg1=6, kwarg2=7) # Retrace - enabled(1, 2, 3, 4, kwonly=50, kwarg1=6, kwarg2=7) # Retrace - enabled(1, 2, 3, 4, kwonly=5, kwarg1=60, kwarg2=70) # No retrace - enabled(1, 2, 3, 4, kwonly=5, kwarg1=600, kwarg2=700) # No retrace + enabled(100, 2, 3, 4, kwonly=5, kwarg1=6, kwarg2=7) # Retrace + enabled(1, 20, 30, 40, kwonly=5, kwarg1=6, kwarg2=7) # Retrace + enabled(1, 2, 3, 4, kwonly=50, kwarg1=6, kwarg2=7) # Retrace + enabled(1, 2, 3, 4, kwonly=5, kwarg1=60, kwarg2=70) # No retrace + enabled(1, 2, 3, 4, kwonly=5, kwarg1=600, kwarg2=700) # No retrace self.assertEqual(trace_count[0], 4) class MultiDeviceTest(test.TestCase, parameterized.TestCase): From 4661a4fa34205aad469e5197ac145ab9e272d386 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Fri, 5 Jun 2020 11:06:17 +0200 Subject: [PATCH 0144/2522] Improve test coverage of TF-TRT pool op converters --- .../tf2tensorrt/convert/convert_nodes.cc | 9 +- .../tf2tensorrt/convert/convert_nodes_test.cc | 294 +++++++++--------- 2 files changed, 149 insertions(+), 154 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index 28b27959afc..66c6e29cf5e 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -3508,8 +3508,13 @@ Status ConvertPool(OpConverterParams* params) { const auto& inputs = params->inputs; const auto& node_def = params->node_def; TF_RETURN_IF_ERROR(CheckInputsWeights(*params, {{"input", false}})); - TF_RETURN_IF_ERROR( - AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF})); +#if IS_TRT_VERSION_GE(5, 1, 0, 0) + std::set allowed_types{DataType::DT_FLOAT, DataType::DT_HALF, + DataType::DT_INT8}; +#else + std::set allowed_types{DataType::DT_FLOAT, DataType::DT_HALF}; +#endif + TF_RETURN_IF_ERROR(AllowDataTypes(*params, allowed_types)); nvinfer1::PoolingType type; if (node_def.op() == "MaxPool") { type = nvinfer1::PoolingType::kMAX; diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc index c24b169f651..ec714887936 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc @@ -4602,41 +4602,72 @@ TEST_F(OpConverterTest, ConvertConv3D) { ElementsAreArray(ok_params[i].expected_output)); } } +#endif -TEST_F(OpConverterTest, ConvertPool3D) { - // Get nodedef for MaxPool3D and AvgPool3D layers. - auto get_pool3d_nodedef = [](std::vector ksize = {1, 1, 1, 1, 1}, - std::vector strides = {1, 1, 1, 1, 1}, - string padding = "SAME", - string data_format = "NCDHW", - const bool is_max_pooling = true) -> NodeDef { - Scope s = Scope::NewRootScope(); - auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT); - +template +NodeDef CreatePoolOp(DataType tf_type, std::vector ksize, + std::vector strides, string padding, + string data_format) { + Scope s = Scope::NewRootScope(); + auto input = ops::Placeholder(s.WithOpName("input"), tf_type); + typename T::Attrs attrs; + attrs.data_format_ = data_format; + return T(s.WithOpName("my_pool"), input, ksize, strides, padding, attrs) + .operation.node() + ->def(); +} +TEST_P(OpConverterTest1, ConvertPool) { + // Get nodedef for MaxPool and AvgPool layers (2D or 3D). + auto get_pool_nodedef = + [](DataType tf_type, int nDim, std::vector ksize = {}, + std::vector strides = {}, string padding = "SAME", + string data_format = "", const bool is_max_pooling = true) -> NodeDef { + if (ksize.empty()) { + ksize = nDim == 2 ? std::vector{1, 1, 1, 1} + : std::vector{1, 1, 1, 1, 1}; + } + if (strides.empty()) { + strides = nDim == 2 ? std::vector{1, 1, 1, 1} + : std::vector{1, 1, 1, 1, 1}; + } + if (data_format == "") { + data_format = nDim == 2 ? "NCHW" : "NCDHW"; + } if (is_max_pooling) { - ops::MaxPool3D::Attrs attrs = - ops::MaxPool3D::Attrs().DataFormat(data_format); - auto pool3d = ops::MaxPool3D(s.WithOpName("my_maxpool3d"), input, ksize, - strides, padding, attrs); - return pool3d.operation.node()->def(); + if (nDim == 3) { + return CreatePoolOp(tf_type, ksize, strides, padding, + data_format); + } else { + return CreatePoolOp(tf_type, ksize, strides, padding, + data_format); + } } else { - ops::AvgPool3D::Attrs attrs = - ops::AvgPool3D::Attrs().DataFormat(data_format); - auto pool3d = ops::AvgPool3D(s.WithOpName("my_avgpool3d"), input, ksize, - strides, padding, attrs); - return pool3d.operation.node()->def(); + if (nDim == 3) { + return CreatePoolOp(tf_type, ksize, strides, padding, + data_format); + } else { + return CreatePoolOp(tf_type, ksize, strides, padding, + data_format); + } } }; - { +#if IS_TRT_VERSION_GE(6, 0, 0, 0) + std::vector test_nDims{2, 3}; +#else + std::vector test_nDims{2}; +#endif + + for (int nDim : test_nDims) { // Input is weights, should fail. Reset(); - NodeDef node_def = get_pool3d_nodedef(); + NodeDef node_def = get_pool_nodedef(tf_type, nDim); - AddTestWeights("input", {1, 2, 3}, {1, 2, 3, 4, 5, 6}); - RunValidationAndConversion( - node_def, error::UNIMPLEMENTED, - "The input \"input\" for MaxPool3D must be a tensor, at my_maxpool3d"); + AddTestWeights("input", {1, 1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}); + RunValidationAndConversion(node_def, error::UNIMPLEMENTED, + StrCat("The input \"input\" for ", node_def.op(), + " must be a tensor, at my_pool") + .c_str()); } struct TestParams { @@ -4646,150 +4677,109 @@ TEST_F(OpConverterTest, ConvertPool3D) { std::vector strides; string padding; string data_format; - bool is_max_pooling; std::vector expected_output_dims; - std::vector expected_output; + // The expected outputs for the following operations: MaxPool2D, AvgPool2D, + // MaxPool3D, AvgPool3D + std::vector> expected_outputs; }; - // Start here - const std::vector common_array{-4, 2, 15, 3, 6, -3, 22, 1, 88, + // We use common_input as the input to test both 2D and 3D pooling operations, + // to simplify TestParams. For 2D operations, only the first 1/3 of the values + // are used from in common_input. + const std::vector common_input{-4, 2, 15, 3, 6, -3, 22, 1, 88, 56, 36, 1, 1, 105, 1, 16, -28, 1, 42, 9, 3, 1, 7, 1, 11, 61, 5}; + // Output of 2D ops for the case when the op equivalent with the identity op. + const std::vector common_2d_output{-4, 2, 15, 3, 6, -3, 22, 1, 88}; std::vector ok_params = { // Basic - just 1x1 max pooling - input = output - TestParams{/*input_dims=*/{1, 3, 3, 3}, - /*input=*/common_array, - /*ksize=*/{1, 1, 1, 1, 1}, - /*strides=*/{1, 1, 1, 1, 1}, - /*padding=*/"VALID", - /*data_format=*/"NCDHW", - /*is_max_pooling=*/true, - /*expected_output_dims=*/{1, 3, 3, 3}, - /*expected_output=*/common_array}, - // Basic - just 1x1 avg pooling - input = output - TestParams{/*input_dims=*/{1, 3, 3, 3}, - /*input=*/common_array, - /*ksize=*/{1, 1, 1, 1, 1}, - /*strides=*/{1, 1, 1, 1, 1}, - /*padding=*/"VALID", - /*data_format=*/"NCDHW", - /*is_max_pooling=*/false, - /*expected_output_dims=*/{1, 3, 3, 3}, - /*expected_output=*/common_array}, + TestParams{ + /*input_dims=*/{1, 1, 3, 3, 3}, + /*input=*/common_input, + /*ksize=*/{1, 1, 1, 1, 1}, + /*strides=*/{1, 1, 1, 1, 1}, + /*padding=*/"VALID", + /*data_format=*/"NCDHW", + /*expected_output_dims=*/{1, 1, 3, 3, 3}, + /*expected_outputs=*/ + {common_2d_output, common_2d_output, common_input, common_input}}, // Basic - just 1x1 max pooling - input = output, SAME padding - TestParams{/*input_dims=*/{1, 3, 3, 3}, - /*input=*/common_array, - /*ksize=*/{1, 1, 1, 1, 1}, - /*strides=*/{1, 1, 1, 1, 1}, - /*padding=*/"SAME", - /*data_format=*/"NCDHW", - /*is_max_pooling=*/true, - /*expected_output_dims=*/{1, 3, 3, 3}, - /*expected_output=*/common_array}, - // Basic - just 1x1 avg pooling - input = output, SAME padding - TestParams{/*input_dims=*/{1, 3, 3, 3}, - /*input=*/common_array, - /*ksize=*/{1, 1, 1, 1, 1}, - /*strides=*/{1, 1, 1, 1, 1}, - /*padding=*/"VALID", - /*data_format=*/"NCDHW", - /*is_max_pooling=*/false, - /*expected_output_dims=*/{1, 3, 3, 3}, - /*expected_output=*/common_array}, - // 3x3 max pooling - TestParams{/*input_dims=*/{1, 3, 3, 3}, - /*input=*/common_array, + TestParams{ + /*input_dims=*/{1, 1, 3, 3, 3}, + /*input=*/common_input, + /*ksize=*/{1, 1, 1, 1, 1}, + /*strides=*/{1, 1, 1, 1, 1}, + /*padding=*/"SAME", + /*data_format=*/"NCDHW", + /*expected_output_dims=*/{1, 1, 3, 3, 3}, + /*expected_outputs=*/ + {common_2d_output, common_2d_output, common_input, common_input}}, + // 3x3 pooling NCDHW + TestParams{/*input_dims=*/{1, 1, 3, 3, 3}, + /*input=*/common_input, /*ksize=*/{1, 1, 3, 3, 3}, /*strides=*/{1, 1, 1, 1, 1}, /*padding=*/"VALID", /*data_format=*/"NCDHW", - /*is_max_pooling=*/true, - /*expected_output_dims=*/{1, 1, 1, 1}, - /*expected_output=*/{105}}, - // 3x3 avg pooling - TestParams{/*input_dims=*/{1, 3, 3, 3}, - /*input=*/common_array, - /*ksize=*/{1, 1, 3, 3, 3}, + /*expected_output_dims=*/{1, 1, 1, 1, 1}, + /*expected_outputs=*/{{88}, {14.444445}, {105}, {17}}}, + // 3x3 pooling, NDHWC + TestParams{/*input_dims=*/{1, 3, 3, 3, 1}, + /*input=*/common_input, + /*ksize=*/{1, 3, 3, 3, 1}, /*strides=*/{1, 1, 1, 1, 1}, /*padding=*/"VALID", + /*data_format=*/"NDHWC", + /*expected_output_dims=*/{1, 1, 1, 1, 1}, + /*expected_outputs=*/{{88}, {14.444445}, {105}, {17}}}, + // Strided + TestParams{/*input_dims=*/{1, 1, 3, 3, 3}, + /*input=*/{1, 0, 2, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 7, 0, 8}, + /*ksize=*/{1, 1, 1, 1, 1}, + /*strides=*/{1, 1, 2, 2, 2}, + /*padding=*/"VALID", /*data_format=*/"NCDHW", - /*is_max_pooling=*/false, - /*expected_output_dims=*/{1, 1, 1, 1}, - /*expected_output=*/{17}}, - // 3x3 max pooling, NDHWC - TestParams{/*input_dims=*/{3, 3, 3, 1}, - /*input=*/common_array, - /*ksize=*/{1, 3, 3, 3, 1}, - /*strides=*/{1, 1, 1, 1, 1}, - /*padding=*/"VALID", - /*data_format=*/"NDHWC", - /*is_max_pooling=*/true, - /*expected_output_dims=*/{1, 1, 1, 1}, - /*expected_output=*/{105}}, - // 3x3 avg pooling, NDHWC - TestParams{/*input_dims=*/{3, 3, 3, 1}, - /*input=*/common_array, - /*ksize=*/{1, 3, 3, 3, 1}, - /*strides=*/{1, 1, 1, 1, 1}, - /*padding=*/"VALID", - /*data_format=*/"NDHWC", - /*is_max_pooling=*/false, - /*expected_output_dims=*/{1, 1, 1, 1}, - /*expected_output=*/{17}}, - // Strided max - TestParams{ - /*input_dims=*/{1, 3, 3, 3}, - /*input=*/{1, 0, 2, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 7, 0, 8}, - /*ksize=*/{1, 1, 1, 1, 1}, - /*strides=*/{1, 1, 2, 2, 2}, - /*padding=*/"VALID", - /*data_format=*/"NCDHW", - /*is_max_pooling=*/true, - /*expected_output_dims=*/{1, 2, 2, 2}, - /*expected_output=*/{1, 2, 3, 4, 5, 6, 7, 8} // Should only pick up - // the corners - }, - // Strided avg - TestParams{ - /*input_dims=*/{1, 3, 3, 3}, - /*input=*/{1, 0, 2, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 7, 0, 8}, - /*ksize=*/{1, 1, 1, 1, 1}, - /*strides=*/{1, 1, 2, 2, 2}, - /*padding=*/"VALID", - /*data_format=*/"NCDHW", - /*is_max_pooling=*/false, - /*expected_output_dims=*/{1, 2, 2, 2}, - /*expected_output=*/{1, 2, 3, 4, 5, 6, 7, 8} // Should only pick up - // the corners - }}; + /*expected_output_dims=*/{1, 1, 2, 2, 2}, + /*expected_outputs=*/ + {{1, 2, 3, 4}, // Should only pick up the corners + {1, 2, 3, 4}, + {1, 2, 3, 4, 5, 6, 7, 8}, + {1, 2, 3, 4, 5, 6, 7, 8}}}, + }; - for (int i = 0; i < ok_params.size(); i++) { - Reset(); - NodeDef node_def = get_pool3d_nodedef( - ok_params[i].ksize, ok_params[i].strides, ok_params[i].padding, - ok_params[i].data_format, ok_params[i].is_max_pooling); - AddTestTensor("input", ok_params[i].input_dims); - RunValidationAndConversion(node_def); - TRT_TensorOrWeights output; - string expected_node_name = - ok_params[i].is_max_pooling ? "my_maxpool3d" : "my_avgpool3d"; - TF_EXPECT_OK(GetTensorOrWeights(expected_node_name, &output)); - ASSERT_TRUE(output.is_tensor()); - ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims, - output.tensor()->getDimensions()); - - const DataVec input_data{{"input", AsTensor(ok_params[i].input)}}; - DataVec output_data{ - {expected_node_name, - ConstructTensor(ok_params[i].expected_output.size())}}; - TF_EXPECT_OK(BuildAndRun(input_data, &output_data)); - EXPECT_THAT(GetSpanForData(output_data[0]), - ElementsAreArray(ok_params[i].expected_output)); + for (auto p : ok_params) { + int test_counter = 0; + for (int nDim : test_nDims) { + auto input = p.input; + auto input_dims = p.input_dims; + auto ksize = p.ksize; + auto strides = p.strides; + auto expected_output_dims = p.expected_output_dims; + std::string data_format = p.data_format; + if (nDim == 2) { + input.resize(9); + data_format = p.data_format == "NDHWC" ? "NHWC" : "NCHW"; + // Remove one of the spatial dimensions + input_dims.erase(input_dims.begin() + 2); + ksize.erase(ksize.begin() + 2); + strides.erase(strides.begin() + 2); + expected_output_dims.erase(expected_output_dims.begin() + 2); + } + for (bool is_max_pooling : {true, false}) { + Reset(); + NodeDef node_def = + get_pool_nodedef(tf_type, nDim, ksize, strides, p.padding, + data_format, is_max_pooling); + AddTestTensor("input", input_dims, input); + TestOpConverter("my_pool", node_def, expected_output_dims, Status::OK(), + Status::OK(), + ElementsAreArray(p.expected_outputs.at(test_counter))); + test_counter++; + } + } } } -#endif // IS_TRT_VERSION_GE(6, 0, 0, 0) TEST_F(OpConverterTest, ConvertTopK) { // TODO(tmorris): This test isn't setting the input dtype properly. TopK with From ea6b83192106bdc145624c73193802cc872ed5d8 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Mon, 6 Jul 2020 22:30:56 +0000 Subject: [PATCH 0145/2522] tests passing for summary op, added shape debug string --- tensorflow/c/kernels/summary_op.cc | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index c3d7fa84aaa..cad7bd646ed 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -43,16 +43,6 @@ static void SummaryScalarOp_Delete(void* kernel) { bool IsSameSize(TF_Tensor* tensor1, TF_Tensor* tensor2); static tensorflow::string SingleTag(TF_Tensor* tags); -template -float get_float_value(T* element){ - return static_cast(*element); -} - -template<> -float get_float_value(Eigen::half* element){ - return Eigen::half_impl::half_to_float(*element); -} - template static void SummaryScalarOp_Compute(void* kernel, TF_OpKernelContext* ctx) { TF_Tensor* tags; @@ -82,18 +72,21 @@ static void SummaryScalarOp_Compute(void* kernel, TF_OpKernelContext* ctx) { tensorflow::Summary::Value* v = s.add_value(); const tensorflow::tstring& Ttags_i = tags_array[i]; v->set_tag(Ttags_i.data(), Ttags_i.size()); - v->set_simple_value(get_float_value(&values_array[i])); + v->set_simple_value(float(values_array[i])); } - TF_Tensor* summary_tensor = TF_AllocateOutput(ctx, 0 + TF_Tensor* summary_tensor = TF_AllocateOutput(ctx, 0, TF_ExpectedOutputDataType(ctx, 0), nullptr, 0, sizeof(tensorflow::tstring), status); if (TF_GetCode(status) == TF_OK) { tensorflow::tstring summary_tstring; SerializeToTString(s, &summary_tstring); - *(TF_TensorData(summary_tensor)) = &summary_tstring; - TF_SetOutput(ctx, 0, summary_tensor, status); + TF_TString* output_tf_tstring = reinterpret_cast(TF_TensorData(summary_tensor)); + TF_TString_Init(output_tf_tstring); + tensorflow::tstring* output_tstring = reinterpret_cast(output_tf_tstring); + *output_tstring = summary_tstring; // may want to use std::move } TF_DeleteTensor(summary_tensor); + } if (TF_GetCode(status) != TF_OK) { TF_OpKernelContext_Failure(ctx, status); From e52df6b5f34a76f3974132a5b3bf2653bed1e660 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Mon, 6 Jul 2020 23:27:50 +0000 Subject: [PATCH 0146/2522] fixed naming for variables --- tensorflow/c/kernels.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc index 749e6e89b8b..5f5bd9779b1 100644 --- a/tensorflow/c/kernels.cc +++ b/tensorflow/c/kernels.cc @@ -276,17 +276,17 @@ TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, shape.AddDim(dims[i]); } tensorflow::Status s; - tensorflow::Tensor tensor_temp; - TF_Tensor* tf_tensor_temp; - s = cc_ctx->allocate_temp(static_cast(dtype), shape, &tensor_temp); + tensorflow::Tensor tensor; + TF_Tensor* tf_tensor; + s = cc_ctx->allocate_temp(static_cast(dtype), shape, &tensor); if (!s.ok()){ ::tensorflow::Set_TF_Status_from_Status(status, s); return nullptr; } - tf_tensor_temp = TF_TensorFromTensor(tensor_temp, &s); + tf_tensor = TF_TensorFromTensor(tensor, &s); if (!s.ok()){ ::tensorflow::Set_TF_Status_from_Status(status, s); return nullptr; } - return tf_tensor_temp; + return tf_tensor; } From c79fbeed1a2ceba2f270d621cf656f466c59f414 Mon Sep 17 00:00:00 2001 From: "aaa.jq" <895521320@qq.com> Date: Tue, 7 Jul 2020 10:42:21 +0800 Subject: [PATCH 0147/2522] Add unit test: `testNoEmptyRowsAndUnordered` Add unit test for `SparseFillEmptyRows`: When the sp_input is unordered and all rows of the sp_input are not empty, the output of `tf.sparse.fill_empty_rows` is still ordered. --- .../python/kernel_tests/sparse_ops_test.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tensorflow/python/kernel_tests/sparse_ops_test.py b/tensorflow/python/kernel_tests/sparse_ops_test.py index e4cc2046c64..7d097b79659 100644 --- a/tensorflow/python/kernel_tests/sparse_ops_test.py +++ b/tensorflow/python/kernel_tests/sparse_ops_test.py @@ -585,6 +585,22 @@ class SparseFillEmptyRowsTest(test_util.TensorFlowTestCase): self.assertAllEqual(output.dense_shape, [2, 6]) self.assertAllEqual(empty_row_indicator_out, np.zeros(2).astype(np.bool)) + def testNoEmptyRowsAndUnordered(self): + with test_util.force_cpu(): + sp_input = sparse_tensor.SparseTensor( + indices=np.array([[1, 2], [1, 3], [0, 1], [0, 3]]), + values=np.array([1, 3, 2, 4]), + dense_shape=np.array([2, 5])) + sp_output, empty_row_indicator = (sparse_ops.sparse_fill_empty_rows( + sp_input, -1)) + + output, empty_row_indicator_out = self.evaluate( + [sp_output, empty_row_indicator]) + + self.assertAllEqual(output.indices, [[0, 1], [0, 3], [1, 2], [1, 3]]) + self.assertAllEqual(output.values, [2, 4, 1, 3]) + self.assertAllEqual(output.dense_shape, [2, 5]) + self.assertAllEqual(empty_row_indicator_out, np.zeros(2).astype(np.bool)) class SparseAddTest(test_util.TensorFlowTestCase): From e0170c9a25948857bbfe69555ac7d06d06586d7f Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Tue, 7 Jul 2020 03:42:07 +0000 Subject: [PATCH 0148/2522] working op: include variant_encode_decode.h in map_kernels.h --- tensorflow/core/kernels/map_kernels.cc | 16 ++++++++++++++++ tensorflow/core/kernels/map_kernels.h | 12 +++++++++--- tensorflow/core/ops/map_ops.cc | 12 ++++++------ tensorflow/python/kernel_tests/map_ops_test.py | 16 +++++++++++++--- 4 files changed, 44 insertions(+), 12 deletions(-) diff --git a/tensorflow/core/kernels/map_kernels.cc b/tensorflow/core/kernels/map_kernels.cc index 91dfbe8c384..7d5d2732bb6 100644 --- a/tensorflow/core/kernels/map_kernels.cc +++ b/tensorflow/core/kernels/map_kernels.cc @@ -1,6 +1,22 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ #include "tensorflow/core/kernels/map_kernels.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/types.h" +//#include "tensorflow/core/framework/variant.h" +//#include "tensorflow/core/framework/variant_op_registry.h" namespace tensorflow { REGISTER_KERNEL_BUILDER(Name("EmptyTensorMap").Device(DEVICE_CPU), diff --git a/tensorflow/core/kernels/map_kernels.h b/tensorflow/core/kernels/map_kernels.h index 1ed7c663a57..88028b13dea 100644 --- a/tensorflow/core/kernels/map_kernels.h +++ b/tensorflow/core/kernels/map_kernels.h @@ -1,4 +1,4 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -12,9 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#ifndef TENSORFLOW_CORE_KERNELS_MAP_KERNELS_H_ +#define TENSORFLOW_CORE_KERNELS_MAP_KERNELS_H_ #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/kernels/tensor_map.h" +//#include "tensorflow/core/framework/variant.h" +//#include "tensorflow/core/framework/variant_op_registry.h" +#include "tensorflow/core/framework/variant_encode_decode.h" + #include using namespace std; @@ -79,6 +85,6 @@ class ZeroOutOp : public OpKernel { } }; - - } // namespace tensorflow + +#endif // TENSORFLOW_CORE_KERNELS_MAP_KERNELS_H_ diff --git a/tensorflow/core/ops/map_ops.cc b/tensorflow/core/ops/map_ops.cc index f1d7b291a70..d8ecb5ff0ed 100644 --- a/tensorflow/core/ops/map_ops.cc +++ b/tensorflow/core/ops/map_ops.cc @@ -21,21 +21,21 @@ namespace tensorflow { namespace { REGISTER_OP("EmptyTensorMap") -// .Input("element_shape: shape_type") -// .Input("max_num_elements: int32") + .Input("element_shape: shape_type") + .Input("max_num_elements: int32") .Output("handle: variant") -// .Attr("element_dtype: type") -// .Attr("shape_type: {int32, int64}") + .Attr("element_dtype: type") + .Attr("shape_type: {int32, int64}") .SetShapeFn([](shape_inference::InferenceContext* c) { c->set_output(0, c->Scalar()); - /*DataType element_dtype; + DataType element_dtype; TF_RETURN_IF_ERROR(c->GetAttr("element_dtype", &element_dtype)); shape_inference::ShapeHandle element_shape; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensorTreatScalarAsUnknownShape( 0, &element_shape)); c->set_output_handle_shapes_and_types( 0, std::vector{ - {element_shape, element_dtype}});*/ + {element_shape, element_dtype}}); return Status::OK(); }); diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index 7b9654886f3..a8364c19d8d 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -23,18 +23,27 @@ from tensorflow.python.platform import test from absl.testing import parameterized from tensorflow.python.framework import test_util +from tensorflow.python.client import session +from tensorflow.python.eager import backprop +from tensorflow.python.eager import context +from tensorflow.python.eager import def_function +from tensorflow.python.eager import function +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes + #try: # from tensorflow_zero_out.python.ops.zero_out_ops import zero_out #except ImportError: # from zero_out_ops import zero_out from tensorflow.python.ops import map_ops +@test_util.run_all_in_graph_and_eager_modes class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): - """ @parameterized.named_parameters(("NoMaxNumElements", None), ("WithMaxNumElements", 2)) @test_util.run_deprecated_v1 def testEraseFromEmptyTensorMapFails(self, max_num_elements): + print("hello world testErase") m = map_ops.empty_tensor_map( element_dtype=dtypes.float32, element_shape=[], @@ -43,14 +52,15 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): "Trying to erase from an empty map"): m = map_ops.tensor_map_erase(l, element_dtype=dtypes.float32) self.evaluate(l) - """ + def testZeroOut(self): - print("Hello World - Test") + print("hello world testZeroOut") with self.test_session(): self.assertAllClose( map_ops.zero_out([[1, 2], [3, 4]]), np.array([[1, 0], [0, 0]])) if __name__ == '__main__': + print("hihihi") test.main() \ No newline at end of file From 38071c95b282ecfa5737a99fcb7256043074ff2c Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Tue, 7 Jul 2020 07:04:31 +0000 Subject: [PATCH 0149/2522] working EmptyTensorMap op --- tensorflow/core/kernels/map_kernels.cc | 2 - tensorflow/core/kernels/map_kernels.h | 29 +++---- tensorflow/core/ops/map_ops.cc | 81 +++++++++++++++++-- .../python/kernel_tests/map_ops_test.py | 12 +-- tensorflow/python/ops/map_ops.py | 13 +-- 5 files changed, 97 insertions(+), 40 deletions(-) diff --git a/tensorflow/core/kernels/map_kernels.cc b/tensorflow/core/kernels/map_kernels.cc index 7d5d2732bb6..12c932eb83e 100644 --- a/tensorflow/core/kernels/map_kernels.cc +++ b/tensorflow/core/kernels/map_kernels.cc @@ -15,8 +15,6 @@ limitations under the License. #include "tensorflow/core/kernels/map_kernels.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/types.h" -//#include "tensorflow/core/framework/variant.h" -//#include "tensorflow/core/framework/variant_op_registry.h" namespace tensorflow { REGISTER_KERNEL_BUILDER(Name("EmptyTensorMap").Device(DEVICE_CPU), diff --git a/tensorflow/core/kernels/map_kernels.h b/tensorflow/core/kernels/map_kernels.h index 88028b13dea..1b344718261 100644 --- a/tensorflow/core/kernels/map_kernels.h +++ b/tensorflow/core/kernels/map_kernels.h @@ -17,8 +17,6 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/kernels/tensor_map.h" -//#include "tensorflow/core/framework/variant.h" -//#include "tensorflow/core/framework/variant_op_registry.h" #include "tensorflow/core/framework/variant_encode_decode.h" #include @@ -28,27 +26,14 @@ namespace tensorflow { class EmptyTensorMap : public OpKernel { public: - explicit EmptyTensorMap(OpKernelConstruction* ctx) : OpKernel(ctx) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("element_dtype", &element_dtype_)); - } + explicit EmptyTensorMap(OpKernelConstruction* ctx) : OpKernel(ctx) {} void Compute(OpKernelContext* ctx) override { - const Tensor& max_num_elements_t = ctx->input(1); - OP_REQUIRES( - ctx, TensorShapeUtils::IsScalar(max_num_elements_t.shape()), - errors::InvalidArgument( - "max_num_elements expected to be a scalar ", - "but got shape: ", max_num_elements_t.shape().DebugString())); Tensor* result; AllocatorAttributes attr; attr.set_on_host(true); OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape{}, &result, attr)); TensorMap empty; - empty.element_dtype = element_dtype_; - empty.max_num_elements = max_num_elements_t.scalar()(); - PartialTensorShape element_shape; - //OP_REQUIRES_OK(ctx, TensorShapeFromTensor(ctx->input(0), &element_shape)); - empty.element_shape = element_shape; result->scalar()() = std::move(empty); } @@ -56,7 +41,19 @@ class EmptyTensorMap : public OpKernel { DataType element_dtype_; }; +class TensorMapSize : public OpKernel { + public: + explicit TensorMapSize(OpKernelConstruction* c) : OpKernel(c) {} + ~TEnsorMapSize() override {} + void Compute(OpKernelContext* c) override { + const TensorMap* m = nullptr; + OP_REQUIRES_OK(c, GetInputList(c, 0, &m)); + Tensor* result; + OP_REQUIRES_OK(c, c->allocate_output(0, TensorShape{}, &result)); + result->scalar()() = m->tensors().size(); + } +}; class ZeroOutOp : public OpKernel { public: diff --git a/tensorflow/core/ops/map_ops.cc b/tensorflow/core/ops/map_ops.cc index d8ecb5ff0ed..ab2fdef9127 100644 --- a/tensorflow/core/ops/map_ops.cc +++ b/tensorflow/core/ops/map_ops.cc @@ -21,24 +21,93 @@ namespace tensorflow { namespace { REGISTER_OP("EmptyTensorMap") - .Input("element_shape: shape_type") - .Input("max_num_elements: int32") .Output("handle: variant") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->Scalar()); + return Status::OK(); + }); + +REGISTER_OP("TensorMapInsert") + .Input("input_handle: variant") + .Input("key: element_dtype") + .Input("value: element_dtype") + .Output("output_handle: variant") .Attr("element_dtype: type") - .Attr("shape_type: {int32, int64}") .SetShapeFn([](shape_inference::InferenceContext* c) { c->set_output(0, c->Scalar()); DataType element_dtype; TF_RETURN_IF_ERROR(c->GetAttr("element_dtype", &element_dtype)); - shape_inference::ShapeHandle element_shape; - TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensorTreatScalarAsUnknownShape( - 0, &element_shape)); + shape_inference::ShapeHandle element_shape = c->UnknownShape(); + + auto* handle_data = c->input_handle_shapes_and_types(0); + if (handle_data != nullptr && handle_data->size() > 1) { + return errors::InvalidArgument( + "Trying to push to list with wrong variant data."); + } + if (IsValidTensorListHandleData(handle_data)) { + const shape_inference::ShapeAndType& list_shape_type = + (*handle_data)[0]; + if (list_shape_type.dtype != element_dtype) { + return errors::InvalidArgument( + "Trying to push to list with wrong element dtype. List has type ", + DataTypeString(list_shape_type.dtype), + " but trying to push element with type ", + DataTypeString(element_dtype)); + } + shape_inference::ShapeHandle ignored; + TF_RETURN_IF_ERROR( + c->Merge(element_shape, list_shape_type.shape, &ignored)); + element_shape = list_shape_type.shape; + } c->set_output_handle_shapes_and_types( 0, std::vector{ {element_shape, element_dtype}}); return Status::OK(); }); +REGISTER_OP("TensorMapSize") + .Input("input_handle: variant") + .Output("size: int32") + .SetShapeFn(shape_inference::ScalarShape); + +/*REGISTER_OP("TensorMapErase") + .Input("input_handle: variant") + .Input("element_shape: int32") + .Output("output_handle: variant") + .Output("tensor: element_dtype") + .Attr("element_dtype: type") + .SetShapeFn([](shape_inference::InferenceContext* c) { + DataType element_dtype; + TF_RETURN_IF_ERROR(c->GetAttr("element_dtype", &element_dtype)); + shape_inference::ShapeHandle tensor_shape = c->UnknownShape(); + auto* handle_data = c->input_handle_shapes_and_types(0); + if (handle_data != nullptr && handle_data->size() > 1) { + return errors::InvalidArgument( + "Trying to read from list with invalid variant data."); + } + if (IsValidTensorListHandleData(handle_data)) { + const shape_inference::ShapeAndType& list_shape_type = + (*handle_data)[0]; + if (list_shape_type.dtype != element_dtype) { + return errors::InvalidArgument( + "Trying to read from list with wrong element dtype. List has " + "type ", + DataTypeString(list_shape_type.dtype), + " but trying to push element with type ", + DataTypeString(element_dtype)); + } + shape_inference::ShapeHandle ignored; + TF_RETURN_IF_ERROR( + c->Merge(tensor_shape, list_shape_type.shape, &ignored)); + c->set_output_handle_shapes_and_types(0, *handle_data); + tensor_shape = list_shape_type.shape; + } + c->set_output(1, tensor_shape); + c->set_output(0, c->Scalar()); + return Status::OK(); + });*/ + + REGISTER_OP("ZeroOut") .Input("to_zero: int32") .Output("zeroed: int32") diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index a8364c19d8d..726c97a639b 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -39,20 +39,22 @@ from tensorflow.python.ops import map_ops @test_util.run_all_in_graph_and_eager_modes class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): + def testEmptyTensorMap(self): + m = map_ops.empty_tensor_map() + print("empty tensor map created") + + ''' @parameterized.named_parameters(("NoMaxNumElements", None), ("WithMaxNumElements", 2)) @test_util.run_deprecated_v1 def testEraseFromEmptyTensorMapFails(self, max_num_elements): print("hello world testErase") - m = map_ops.empty_tensor_map( - element_dtype=dtypes.float32, - element_shape=[], - max_num_elements=max_num_elements) + m = map_ops.empty_tensor_map() with self.assertRaisesRegexp(errors.InvalidArgumentError, "Trying to erase from an empty map"): m = map_ops.tensor_map_erase(l, element_dtype=dtypes.float32) self.evaluate(l) - + ''' def testZeroOut(self): print("hello world testZeroOut") diff --git a/tensorflow/python/ops/map_ops.py b/tensorflow/python/ops/map_ops.py index 03acaa8fb72..21c58fb773d 100644 --- a/tensorflow/python/ops/map_ops.py +++ b/tensorflow/python/ops/map_ops.py @@ -30,17 +30,8 @@ from tensorflow.python.ops.gen_map_ops import * # resource_loader.get_path_to_datafile('_zero_out_ops.so')) #zero_out = zero_out_ops.zero_out -def empty_tensor_map(element_shape, - element_dtype, - max_num_elements=None, - name=None): - if max_num_elements is None: - max_num_elements = -1 - - return gen_map_ops.empty_tensor_map(element_shape, - element_dtype, - max_num_elements, - name) +def empty_tensor_map(): + return gen_map_ops.empty_tensor_map() def zero_out(to_zero): print("Hello World - Python Op") From 93baf2ca3508524e1dedbe24bd38a025d6b9eaf8 Mon Sep 17 00:00:00 2001 From: Vladimir Menshakov Date: Tue, 19 May 2020 16:09:17 +0100 Subject: [PATCH 0150/2522] Add explicit block_size to TriangularSolveExpander constructor This small patch allows passing block_size explicitly, removing hardcoded value of 128. Provide test for triangular solve expander using different block_size values --- tensorflow/compiler/xla/service/BUILD | 21 ++++ .../xla/service/triangular_solve_expander.cc | 5 +- .../xla/service/triangular_solve_expander.h | 4 + .../service/triangular_solve_expander_test.cc | 108 ++++++++++++++++++ 4 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 tensorflow/compiler/xla/service/triangular_solve_expander_test.cc diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 126b62a8eb2..f5e267b874c 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1808,6 +1808,27 @@ cc_library( ], ) +tf_cc_test( + name = "triangular_solve_expander_test", + size = "medium", + srcs = ["triangular_solve_expander_test.cc"], + shard_count = 3, + deps = [ + ":hlo", + ":triangular_solve_expander", + "//tensorflow/compiler/jit:xla_cpu_jit", + "//tensorflow/compiler/xla:literal", + "//tensorflow/compiler/xla:reference_util", + "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:verified_hlo_module", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:test", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/memory", + ], +) + cc_library( name = "cholesky_expander", srcs = ["cholesky_expander.cc"], diff --git a/tensorflow/compiler/xla/service/triangular_solve_expander.cc b/tensorflow/compiler/xla/service/triangular_solve_expander.cc index cc483c310e8..d54eb9e78c3 100644 --- a/tensorflow/compiler/xla/service/triangular_solve_expander.cc +++ b/tensorflow/compiler/xla/service/triangular_solve_expander.cc @@ -454,6 +454,9 @@ XlaOp BuildTriangularSolve(XlaOp a, XlaOp b, bool left_side, bool lower, } // namespace +TriangularSolveExpander::TriangularSolveExpander(int64 block_size) + : block_size_(block_size) {} + bool TriangularSolveExpander::InstructionMatchesPattern( HloInstruction* instruction) { return instruction->opcode() == HloOpcode::kTriangularSolve; @@ -496,7 +499,7 @@ StatusOr TriangularSolveExpander::ExpandInstruction( BuildTriangularSolve(a, b, options.left_side(), options.lower(), transpose_a, conjugate_a, options.unit_diagonal(), - /*block_size=*/128, + /*block_size=*/block_size_, /*precision=*/PrecisionConfig::HIGHEST); TF_ASSIGN_OR_RETURN(XlaComputation xla_computation, builder.Build()); diff --git a/tensorflow/compiler/xla/service/triangular_solve_expander.h b/tensorflow/compiler/xla/service/triangular_solve_expander.h index be2374ef8c8..362e8557229 100644 --- a/tensorflow/compiler/xla/service/triangular_solve_expander.h +++ b/tensorflow/compiler/xla/service/triangular_solve_expander.h @@ -23,6 +23,8 @@ namespace xla { class TriangularSolveExpander : public OpExpanderPass { public: + explicit TriangularSolveExpander(int64 block_size = 128); + absl::string_view name() const override { return "triangular_solve_expander"; } @@ -34,6 +36,8 @@ class TriangularSolveExpander : public OpExpanderPass { HloInstruction* instruction) override; private: + // Block size for BuildTriangularSolve + const int64 block_size_; // Mapping from op signatures to existing computations. absl::flat_hash_map computation_cache_; }; diff --git a/tensorflow/compiler/xla/service/triangular_solve_expander_test.cc b/tensorflow/compiler/xla/service/triangular_solve_expander_test.cc new file mode 100644 index 00000000000..6cc95aba5d5 --- /dev/null +++ b/tensorflow/compiler/xla/service/triangular_solve_expander_test.cc @@ -0,0 +1,108 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/triangular_solve_expander.h" + +#include +#include + +#include "tensorflow/compiler/xla/literal.h" +#include "tensorflow/compiler/xla/reference_util.h" +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/core/lib/core/status_test_util.h" + +namespace xla { +namespace { + +class TriangularExpanderTest : public HloTestBase, + public ::testing::WithParamInterface {}; + +TEST_P(TriangularExpanderTest, TestBlockSize) { + auto block_size = GetParam(); + std::string hlo_string = R"( + HloModule TensorFlowTriangularSolve + + ENTRY main { + a = f32[256,256]{1,0} parameter(0) + b = f32[256,192]{1,0} parameter(1) + ROOT triangular-solve = f32[256,192]{1,0} triangular-solve(a, b), + left_side=true, unit_diagonal=true, + lower=true, transpose_a=NO_TRANSPOSE + } + )"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + { + TriangularSolveExpander triangular_solve_expander(block_size); + + TF_ASSERT_OK_AND_ASSIGN( + bool result, RunHloPass(&triangular_solve_expander, module.get())); + EXPECT_TRUE(result); + } + + // To test triangular solver expander we generate simple bidiagonal matrix: + // Solve a * x = b. + // Check that shape is still valid. + // Use reference matrix multiplication to test validity of result. + + Array2D a(256, 256); + for (int64 row = 0; row < a.dim(0); ++row) { + a(row, row) = 1; + if (row > 0) { + a(row, row - 1) = 0.01; + } + } + + Array2D b(256, 192); + const float kMax = (b.dim(0) * b.dim(1) + 1); + for (int64 row = 0; row < b.dim(0); ++row) { + for (int64 col = 0; col < b.dim(1); ++col) { + b(row, col) = (row + col + 1) / kMax; + } + } + auto la = LiteralUtil::CreateR2FromArray2D(a); + auto lb = LiteralUtil::CreateR2FromArray2D(b); + + TF_ASSERT_OK_AND_ASSIGN(Literal lx, Execute(std::move(module), {&la, &lb})); + + auto x_shape = lx.shape(); + EXPECT_EQ(x_shape.dimensions_size(), 2); + EXPECT_EQ(x_shape.dimensions(0), b.dim(0)); + EXPECT_EQ(x_shape.dimensions(1), b.dim(1)); + + Array2D x(x_shape.dimensions(0), x_shape.dimensions(1)); + x.SetValues(lx.data()); + + auto ref_b = ReferenceUtil::MatmulArray2D(a, x); + auto ref_lb = LiteralUtil::CreateR2FromArray2D(*ref_b); + + EXPECT_TRUE( + LiteralTestUtil::NearOrEqual(ref_lb, lb, ErrorSpec{0.001, 0.001})); +} + +// block_size test limits based on the following considerations: +// - test at least twice the range of original value +// - try to test odd values unaligned with matrix dims +// - full 1-256 range test takes too long to run + +INSTANTIATE_TEST_CASE_P(TriangularExpanderTestInstances, TriangularExpanderTest, + ::testing::Range(2, 256, 7)); + +} // namespace +} // namespace xla From 5f59b9b88b70198ef17645b096ab3321a78c53db Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Tue, 7 Jul 2020 16:38:02 +0000 Subject: [PATCH 0151/2522] add pylint override comments --- tensorflow/python/eager/function_test.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 9769cd50a9c..a42648b7cb3 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -4011,7 +4011,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): def testFollowTypeHintsTraceWithOnlyArgNamed(self): trace_count = [0] - def func(t: ops.Tensor, i: int = 1, **kwargs): + def func(t: ops.Tensor, i: int = 1, **kwargs): # pylint: disable=bad-whitespace trace_count[0] += 1 return t @@ -4064,7 +4064,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase): def testFollowTypeHintsTraceWithArgsEquals(self): trace_count = [0] - def func(x: ops.Tensor = 0, y: int = 1, **kwargs: ops.Tensor): + def func(x: ops.Tensor = 0, y: int = 1, # pylint: disable=bad-whitespace + **kwargs: ops.Tensor): trace_count[0] += 1 return x @@ -4108,7 +4109,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): def testFollowTypeHintsTraceWithKwOnlyArgsBasic(self): trace_count = [0] - def func(*, a: ops.Tensor = None, b=1): + def func(*, a: ops.Tensor = None, b=1): # pylint: disable=bad-whitespace trace_count[0] += 1 return a From d1a654b65ee2a7514758db8bd4e99404ea038518 Mon Sep 17 00:00:00 2001 From: amturati Date: Tue, 7 Jul 2020 16:40:10 +0000 Subject: [PATCH 0152/2522] fixed nit by adding non-trivial tensors to multioutput graph example --- .../c/eager/c_api_unified_experimental_test.cc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tensorflow/c/eager/c_api_unified_experimental_test.cc b/tensorflow/c/eager/c_api_unified_experimental_test.cc index e104da0be93..73f35ca5ce5 100644 --- a/tensorflow/c/eager/c_api_unified_experimental_test.cc +++ b/tensorflow/c/eager/c_api_unified_experimental_test.cc @@ -807,16 +807,16 @@ TEST_P(UnifiedCAPI, TestMultiOutputGraphMatMul) { * * Now we will execute this function with an eager context: * - * A = [[0, 0], [0, 0]] + * A = [[0, 1], [1, 0]] * B = [[1, 0], [0, 1]] * * output1, output2, output3 = two_adds_and_mm(A, B) * * We expect outputs: * - * output1 = [[1, 0], [0, 1]] + * output1 = [[1, 1], [1, 1]] * output2 = [[2, 0], [0, 2]] - * output3 = [[2, 0], [0, 2]] + * output3 = [[2, 2], [2, 2]] * */ @@ -842,7 +842,7 @@ TEST_P(UnifiedCAPI, TestMultiOutputGraphMatMul) { TF_ExecutionContextGetTFEContext(eager_execution_ctx,s); // 1st Arg - float vals1 [] = {0.0f,0.0f,0.0f,0.0f}; + float vals1 [] = {0.0f,1.0f,1.0f,0.0f}; int64_t dims [] = {2,2}; // Matrices will be 2 x 2 int num_dims = sizeof(dims)/sizeof(dims[0]); @@ -867,9 +867,9 @@ TEST_P(UnifiedCAPI, TestMultiOutputGraphMatMul) { ASSERT_EQ(3, TF_OutputListNumOutputs(func_outputs)); - float expected_outputs [3][4] = {{1.0f,0.0f,0.0f,1.0f}, + float expected_outputs [3][4] = {{1.0f,1.0f,1.0f,1.0f}, {2.0f,0.0f,0.0f,2.0f}, - {2.0f,0.0f,0.0f,2.0f}}; + {2.0f,2.0f,2.0f,2.0f}}; float result_data[4]; for (int idx = 0; idx < 3; ++idx) { @@ -897,6 +897,7 @@ TEST_P(UnifiedCAPI, TestMultiOutputGraphMatMul) { TF_DeleteOutputList(func_outputs); TF_DeleteExecutionContext(eager_execution_ctx); + //TF_DeleteExecutionContext(graph_ctx); TF_DeleteAbstractFunction(func); } From f38c6c1bc6b27a4cb9e2460c2be96c0b4bd1d9b5 Mon Sep 17 00:00:00 2001 From: amturati <36869454+amturati@users.noreply.github.com> Date: Tue, 7 Jul 2020 11:53:03 -0500 Subject: [PATCH 0153/2522] uncommented call to free graph_ctx --- tensorflow/c/eager/c_api_unified_experimental_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/c/eager/c_api_unified_experimental_test.cc b/tensorflow/c/eager/c_api_unified_experimental_test.cc index 73f35ca5ce5..07f4e57c705 100644 --- a/tensorflow/c/eager/c_api_unified_experimental_test.cc +++ b/tensorflow/c/eager/c_api_unified_experimental_test.cc @@ -897,7 +897,7 @@ TEST_P(UnifiedCAPI, TestMultiOutputGraphMatMul) { TF_DeleteOutputList(func_outputs); TF_DeleteExecutionContext(eager_execution_ctx); - //TF_DeleteExecutionContext(graph_ctx); + TF_DeleteExecutionContext(graph_ctx); TF_DeleteAbstractFunction(func); } From 8a917da9fad79e74772c8f3bef4ff5f825b88d5c Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Tue, 7 Jul 2020 18:38:31 -0700 Subject: [PATCH 0154/2522] Migrate python implementation of gelu --- tensorflow/python/ops/nn_ops.py | 30 ++++++++++++++++++++++++++++++ tensorflow/python/ops/nn_test.py | 23 +++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 5a9a63637f6..7058a0d474f 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -3493,6 +3493,36 @@ def leaky_relu(features, alpha=0.2, name=None): return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name) +@tf_export("nn.gelu") +@dispatch.add_dispatch_support +def gelu(features, approximate=True, name=None): + """Compute the Gaussian Error Linear Unit (GELU) activation function. + + Args: + features: A `Tensor` representing preactivation values. + approximate: An optional `bool`. Defaults to `True`. + Whether to enable approximation. + name: A name for the operation (optional). + + Returns: + A `Tensor` with the same type as `features`. + + References: + [Gaussian Error Linear Units (GELUs)](https://arxiv.org/abs/1606.08415). + """ + with ops.name_scope(name, "Gelu", [features]): + features = ops.convert_to_tensor(features, name="features") + if approximate: + pi = math_ops.cast(np.pi, features.dtype) + coeff = math_ops.cast(0.044715, features.dtype) + return 0.5 * features * (1.0 + math_ops.tanh( + math_ops.sqrt(2.0 / pi) * + (features + coeff * math_ops.pow(features, 3)))) + else: + return 0.5 * features * (1.0 + math_ops.erf( + features / math_ops.cast(1.4142135623730951, features.dtype))) + + def _flatten_outer_dims(logits): """Flattens logits' outer dimensions and keep its last dimension.""" rank = array_ops.rank(logits) diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py index c6433db610d..009fa9f804e 100644 --- a/tensorflow/python/ops/nn_test.py +++ b/tensorflow/python/ops/nn_test.py @@ -1059,6 +1059,29 @@ class LeakyReluTest(test_lib.TestCase): self.assertEqual(outputs_without_name_set.name, 'LeakyRelu:0') +class GeluTest(test_lib.TestCase): + + def test(self): + + def gelu(x, approximate=True): + if approximate: + return 0.5 * x * (1.0 + np.tanh(np.sqrt(2.0 / np.pi) * + (x + 0.044715 * np.power(x, 3)))) + else: + from scipy.stats import norm # pylint: disable=g-import-not-at-top + return x * norm.cdf(x) + + np.random.seed(1) # Make it reproducible. + x = np.random.randn(3, 4).astype(np.float32) + y = gelu(x) + z = self.evaluate(nn_ops.gelu(x)) + self.assertAllClose(y, z) + + y = gelu(x, False) + z = self.evaluate(nn_ops.gelu(x, False)) + self.assertAllClose(y, z) + + class SwishTest(test_lib.TestCase): @test_util.run_deprecated_v1 From 9d483364d19cd7740c6e93378236f1f9fefb994e Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Tue, 7 Jul 2020 18:38:42 -0700 Subject: [PATCH 0155/2522] Port to keras --- tensorflow/python/keras/activations.py | 21 +++++++++++++++++++ tensorflow/python/keras/activations_test.py | 23 ++++++++++++++++++++- 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/keras/activations.py b/tensorflow/python/keras/activations.py index 9e30e3610ec..affc37beef4 100644 --- a/tensorflow/python/keras/activations.py +++ b/tensorflow/python/keras/activations.py @@ -302,6 +302,27 @@ def relu(x, alpha=0., max_value=None, threshold=0): return K.relu(x, alpha=alpha, max_value=max_value, threshold=threshold) +@keras_export('keras.activations.gelu') +@dispatch.add_dispatch_support +def gelu(x, approximate=True): + """Gaussian Error Linear Unit. + + Arguments: + x: Input tensor. + + Returns: + The gaussian error linear activation: + `0.5 * x * (1 + tanh(sqrt(2 / pi) * (x + 0.044715 * x^3)))` + if `approximate` is `True` or + `x * P(X <= x) = 0.5 * x * (1 + erf(x / sqrt(2)))`, where P(X) ~ N(0, 1), + if `approximate` is `False`. + + Reference: + - [Gaussian Error Linear Units (GELUs)](https://arxiv.org/abs/1606.08415) + """ + return nn.gelu(x, approximate) + + @keras_export('keras.activations.tanh') @dispatch.add_dispatch_support def tanh(x): diff --git a/tensorflow/python/keras/activations_test.py b/tensorflow/python/keras/activations_test.py index f951076efbb..4a6bb7ce271 100644 --- a/tensorflow/python/keras/activations_test.py +++ b/tensorflow/python/keras/activations_test.py @@ -43,7 +43,7 @@ class KerasActivationsTest(test.TestCase, parameterized.TestCase): def test_serialization(self): all_activations = ['softmax', 'relu', 'elu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear', - 'softplus', 'softsign', 'selu'] + 'softplus', 'softsign', 'selu', 'gelu'] for name in all_activations: fn = activations.get(name) ref_fn = getattr(activations, name) @@ -170,6 +170,27 @@ class KerasActivationsTest(test.TestCase, parameterized.TestCase): expected = np.zeros((2, 5)) self.assertAllClose(result, expected, rtol=1e-05) + def test_gelu(self): + def gelu(x, approximate=True): + if approximate: + return 0.5 * x * (1.0 + np.tanh(np.sqrt(2.0 / np.pi) * + (x + 0.044715 * np.power(x, 3)))) + else: + from scipy.stats import norm # pylint: disable=g-import-not-at-top + return x * norm.cdf(x) + x = backend.placeholder(ndim=2) + f = backend.function([x], [activations.gelu(x)]) + test_values = np.random.random((2, 5)) + result = f([test_values])[0] + expected = gelu(test_values) + self.assertAllClose(result, expected, rtol=1e-05) + + f = backend.function([x], [activations.gelu(x, False)]) + test_values = np.random.random((2, 5)) + result = f([test_values])[0] + expected = gelu(test_values, False) + self.assertAllClose(result, expected, rtol=1e-05) + def test_elu(self): x = backend.placeholder(ndim=2) f = backend.function([x], [activations.elu(x, 0.5)]) From 75ad9db9f39709b5ac5dc80a93777df8c4362b55 Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Tue, 7 Jul 2020 18:55:33 -0700 Subject: [PATCH 0156/2522] run pylint --- tensorflow/python/ops/nn_ops.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 7058a0d474f..df5303eadf1 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -3515,12 +3515,13 @@ def gelu(features, approximate=True, name=None): if approximate: pi = math_ops.cast(np.pi, features.dtype) coeff = math_ops.cast(0.044715, features.dtype) - return 0.5 * features * (1.0 + math_ops.tanh( - math_ops.sqrt(2.0 / pi) * - (features + coeff * math_ops.pow(features, 3)))) + return 0.5 * features * ( + 1.0 + math_ops.tanh(math_ops.sqrt(2.0 / pi) * ( + features + coeff * math_ops.pow(features, 3)))) else: - return 0.5 * features * (1.0 + math_ops.erf( - features / math_ops.cast(1.4142135623730951, features.dtype))) + return 0.5 * features * ( + 1.0 + math_ops.erf(features / math_ops.cast( + 1.4142135623730951, features.dtype))) def _flatten_outer_dims(logits): From a3b21999360628de7fc1e553eea4a96c44fd33be Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Tue, 7 Jul 2020 20:23:32 -0700 Subject: [PATCH 0157/2522] Update golden api --- .../tools/api/golden/v1/tensorflow.keras.activations.pbtxt | 4 ++++ tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt | 4 ++++ .../tools/api/golden/v2/tensorflow.keras.activations.pbtxt | 4 ++++ tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt | 4 ++++ 4 files changed, 16 insertions(+) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt index ee3d1f3d4a2..b0a638eae73 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt @@ -12,6 +12,10 @@ tf_module { name: "exponential" argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "gelu" + argspec: "args=[\'x\', \'approximate\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " + } member_method { name: "get" argspec: "args=[\'identifier\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt index 932e5037d99..2ab4259a207 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt @@ -212,6 +212,10 @@ tf_module { name: "fused_batch_norm" argspec: "args=[\'x\', \'scale\', \'offset\', \'mean\', \'variance\', \'epsilon\', \'data_format\', \'is_training\', \'name\', \'exponential_avg_factor\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'0.001\', \'NHWC\', \'True\', \'None\', \'1.0\'], " } + member_method { + name: "gelu" + argspec: "args=[\'features\', \'approximate\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " + } member_method { name: "in_top_k" argspec: "args=[\'predictions\', \'targets\', \'k\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt index ee3d1f3d4a2..b0a638eae73 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt @@ -12,6 +12,10 @@ tf_module { name: "exponential" argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "gelu" + argspec: "args=[\'x\', \'approximate\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " + } member_method { name: "get" argspec: "args=[\'identifier\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt index 6e8e88a3598..18123d80298 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt @@ -168,6 +168,10 @@ tf_module { name: "fractional_max_pool" argspec: "args=[\'value\', \'pooling_ratio\', \'pseudo_random\', \'overlapping\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'0\', \'None\'], " } + member_method { + name: "gelu" + argspec: "args=[\'features\', \'approximate\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " + } member_method { name: "in_top_k" argspec: "args=[\'targets\', \'predictions\', \'k\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " From 8adc98973ec85f27d9f362abeb302027f0a86164 Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Tue, 7 Jul 2020 20:52:42 -0700 Subject: [PATCH 0158/2522] Update golden api --- .../tools/api/golden/v1/tensorflow.keras.activations.pbtxt | 2 +- .../tools/api/golden/v2/tensorflow.keras.activations.pbtxt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt index b0a638eae73..93daa37930c 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt @@ -14,7 +14,7 @@ tf_module { } member_method { name: "gelu" - argspec: "args=[\'x\', \'approximate\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " + argspec: "args=[\'x\', \'approximate\'], varargs=None, keywords=None, defaults=[\'True\'], " } member_method { name: "get" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt index b0a638eae73..93daa37930c 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt @@ -14,7 +14,7 @@ tf_module { } member_method { name: "gelu" - argspec: "args=[\'x\', \'approximate\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " + argspec: "args=[\'x\', \'approximate\'], varargs=None, keywords=None, defaults=[\'True\'], " } member_method { name: "get" From 28bbd123e891f09c494d08ba0559978881d60ac9 Mon Sep 17 00:00:00 2001 From: Abolfazl Shahbazi Date: Tue, 7 Jul 2020 21:04:51 -0700 Subject: [PATCH 0159/2522] A small fix to test scritps --- tensorflow/tools/dockerfiles/tests/import-onednn.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/tools/dockerfiles/tests/import-onednn.sh b/tensorflow/tools/dockerfiles/tests/import-onednn.sh index b3876c30bc1..c890d402453 100755 --- a/tensorflow/tools/dockerfiles/tests/import-onednn.sh +++ b/tensorflow/tools/dockerfiles/tests/import-onednn.sh @@ -15,8 +15,6 @@ # limitations under the License. # ============================================================================ -#!/bin/bash - { # try echo `python -c 'from tensorflow.python import _pywrap_util_port; print(_pywrap_util_port.IsMklEnabled())'` echo "PASS: MKL is enabled" From bed91800b9a1fb7aaef05232576749addb9b0e5d Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Wed, 8 Jul 2020 06:10:53 +0000 Subject: [PATCH 0160/2522] working TensorMapInsert op --- tensorflow/core/kernels/map_kernels.cc | 8 ++ tensorflow/core/kernels/map_kernels.h | 115 +++++++++++++++++- tensorflow/core/ops/map_ops.cc | 34 +++--- .../python/kernel_tests/map_ops_test.py | 16 ++- tensorflow/python/ops/map_ops.py | 10 +- 5 files changed, 164 insertions(+), 19 deletions(-) diff --git a/tensorflow/core/kernels/map_kernels.cc b/tensorflow/core/kernels/map_kernels.cc index 12c932eb83e..cb749c72f7b 100644 --- a/tensorflow/core/kernels/map_kernels.cc +++ b/tensorflow/core/kernels/map_kernels.cc @@ -15,11 +15,19 @@ limitations under the License. #include "tensorflow/core/kernels/map_kernels.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/variant_encode_decode.h" namespace tensorflow { + + REGISTER_KERNEL_BUILDER(Name("EmptyTensorMap").Device(DEVICE_CPU), EmptyTensorMap); +REGISTER_KERNEL_BUILDER(Name("TensorMapSize").Device(DEVICE_CPU), + TensorMapSize); + + + REGISTER_KERNEL_BUILDER(Name("ZeroOut").Device(DEVICE_CPU), ZeroOutOp); } \ No newline at end of file diff --git a/tensorflow/core/kernels/map_kernels.h b/tensorflow/core/kernels/map_kernels.h index 1b344718261..41c1a18a728 100644 --- a/tensorflow/core/kernels/map_kernels.h +++ b/tensorflow/core/kernels/map_kernels.h @@ -24,11 +24,65 @@ using namespace std; namespace tensorflow { +Status GetInputMap(OpKernelContext* c, int index, const TensorMap** map) { + if (!TensorShapeUtils::IsScalar(c->input(index).shape())) { + return errors::InvalidArgument("Input list must be a scalar saw: ", + c->input(index).shape().DebugString()); + } + const TensorMap* m = c->input(index).scalar()().get(); + if (m == nullptr) { + return errors::InvalidArgument( + "Input handle is not a map. Saw: '", + c->input(index).scalar()().DebugString(), "'"); + } + *map = m; + return Status::OK(); +} + +Status ForwardInputOrCreateNewMap(OpKernelContext* c, int32 input_index, + int32 output_index, + const TensorMap& input_map, + TensorMap** output_map) { + // Attempt to forward the input tensor to the output if possible. + std::unique_ptr maybe_output = c->forward_input( + input_index, output_index, DT_VARIANT, TensorShape{}, + c->input_memory_type(input_index), AllocatorAttributes()); + Tensor* output_tensor; + if (maybe_output != nullptr && maybe_output->dtype() == DT_VARIANT && + maybe_output->NumElements() == 1) { + output_tensor = maybe_output.get(); + TensorMap* tmp_out = output_tensor->scalar()().get(); + if (tmp_out == nullptr) { + return errors::InvalidArgument( + "Expected input ", input_index, " to be a TensorMap but saw ", + output_tensor->scalar()().TypeName()); + } + if (tmp_out->RefCountIsOne()) { + // Woohoo, forwarding succeeded! + c->set_output(output_index, *output_tensor); + *output_map = tmp_out; + return Status::OK(); + } + } + + // If forwarding is not possible allocate a new output tensor and copy + // the `input_list` to it. + AllocatorAttributes attr; + attr.set_on_host(true); + TF_RETURN_IF_ERROR( + c->allocate_output(output_index, {}, &output_tensor, attr)); + output_tensor->scalar()() = input_map.Copy(); + + *output_map = output_tensor->scalar()().get(); + return Status::OK(); +} + class EmptyTensorMap : public OpKernel { public: explicit EmptyTensorMap(OpKernelConstruction* ctx) : OpKernel(ctx) {} void Compute(OpKernelContext* ctx) override { + std::cout << "hello EmptyTensorMap map_kernels.h" << std::endl; Tensor* result; AllocatorAttributes attr; attr.set_on_host(true); @@ -44,17 +98,74 @@ class EmptyTensorMap : public OpKernel { class TensorMapSize : public OpKernel { public: explicit TensorMapSize(OpKernelConstruction* c) : OpKernel(c) {} - ~TEnsorMapSize() override {} + ~TensorMapSize() override {} void Compute(OpKernelContext* c) override { const TensorMap* m = nullptr; - OP_REQUIRES_OK(c, GetInputList(c, 0, &m)); + OP_REQUIRES_OK(c, GetInputMap(c, 0, &m)); Tensor* result; OP_REQUIRES_OK(c, c->allocate_output(0, TensorShape{}, &result)); result->scalar()() = m->tensors().size(); } }; +class TensorMapInsert : public OpKernel { + public: + explicit TensorMapInsert(OpKernelConstruction* c) : OpKernel(c) { + //OP_REQUIRES_OK(c, c->GetAttr("element_dtype", &element_dtype_)); + } + ~TensorMapInsert() override {} + + void Compute(OpKernelContext* c) override { + std::cout << "hello TensorMapInsert kernel" << std::endl; + const Tensor& temp_key = c->input(1); + const TensorKey key = TensorKey(temp_key); + std::cout << "got key" << std::endl; + const Tensor& value = c->input(2); + std::cout << "got value" << std::endl; + /*OP_REQUIRES(c, element_dtype_ == value.dtype(), + errors::InvalidArgument("Invalid data types; list elements ", + DataTypeString(element_dtype_), + " but tried to append ", + DataTypeString(value.dtype())));*/ + + const TensorMap* m = nullptr; + OP_REQUIRES_OK(c, GetInputMap(c, 0, &m)); + std::cout << "got map" << std::endl; + /*OP_REQUIRES(c, m->element_shape.IsCompatibleWith(input.shape()), + errors::InvalidArgument( + "Tried to append a map with incompatible shape to a " + "list. Op element shape: ", + input.shape().DebugString(), + " list shape: ", m->element_shape.DebugString()));*/ + /*OP_REQUIRES(c, element_dtype_ == m->element_dtype, + errors::InvalidArgument("Invalid data types; op elements ", + DataTypeString(element_dtype_), + " but list elements ", + DataTypeString(l->element_dtype))); + + if (l->max_num_elements != -1) { + OP_REQUIRES( + c, l->tensors().size() < l->max_num_elements, + errors::InvalidArgument("Tried to push item into a full list", + " list size: ", l->tensors().size(), + " max_num_elements: ", l->max_num_elements)); + }*/ + + TensorMap* output_map = nullptr; + OP_REQUIRES_OK(c, ForwardInputOrCreateNewMap(c, 0, 0, *m, &output_map)); + std::cout << "create output" << std::endl; + output_map->insert(key, value); + std::cout << "inserted" << std::endl; + } + + private: + DataType element_dtype_; +}; + +REGISTER_KERNEL_BUILDER(Name("TensorMapInsert").Device(DEVICE_CPU), + TensorMapInsert); + class ZeroOutOp : public OpKernel { public: explicit ZeroOutOp(OpKernelConstruction* c) : OpKernel(c) {} diff --git a/tensorflow/core/ops/map_ops.cc b/tensorflow/core/ops/map_ops.cc index ab2fdef9127..463a2ea102b 100644 --- a/tensorflow/core/ops/map_ops.cc +++ b/tensorflow/core/ops/map_ops.cc @@ -20,6 +20,13 @@ limitations under the License. namespace tensorflow { namespace { +bool IsValidTensorMapHandleData( + const std::vector* handle_data) { + std::cout << "is valid tensor map handle data " << handle_data->size() << std::endl; + return true; + //return handle_data != nullptr && handle_data->size() == 1; +} + REGISTER_OP("EmptyTensorMap") .Output("handle: variant") .SetShapeFn([](shape_inference::InferenceContext* c) { @@ -27,6 +34,11 @@ REGISTER_OP("EmptyTensorMap") return Status::OK(); }); +REGISTER_OP("TensorMapSize") + .Input("input_handle: variant") + .Output("size: int32") + .SetShapeFn(shape_inference::ScalarShape); + REGISTER_OP("TensorMapInsert") .Input("input_handle: variant") .Input("key: element_dtype") @@ -35,18 +47,17 @@ REGISTER_OP("TensorMapInsert") .Attr("element_dtype: type") .SetShapeFn([](shape_inference::InferenceContext* c) { c->set_output(0, c->Scalar()); - DataType element_dtype; + /*DataType element_dtype; TF_RETURN_IF_ERROR(c->GetAttr("element_dtype", &element_dtype)); - shape_inference::ShapeHandle element_shape = c->UnknownShape(); + shape_inference::ShapeHandle element_shape = c->UnknownShape();*/ - auto* handle_data = c->input_handle_shapes_and_types(0); + /*auto* handle_data = c->input_handle_shapes_and_types(0); if (handle_data != nullptr && handle_data->size() > 1) { return errors::InvalidArgument( "Trying to push to list with wrong variant data."); } - if (IsValidTensorListHandleData(handle_data)) { - const shape_inference::ShapeAndType& list_shape_type = - (*handle_data)[0]; + if (IsValidTensorMapHandleData(handle_data)) { + const shape_inference::ShapeAndType& map_shape_type = (*handle_data)[0]; if (list_shape_type.dtype != element_dtype) { return errors::InvalidArgument( "Trying to push to list with wrong element dtype. List has type ", @@ -56,20 +67,15 @@ REGISTER_OP("TensorMapInsert") } shape_inference::ShapeHandle ignored; TF_RETURN_IF_ERROR( - c->Merge(element_shape, list_shape_type.shape, &ignored)); - element_shape = list_shape_type.shape; + c->Merge(element_shape, map_shape_type.shape, &ignored)); + element_shape = map_shape_type.shape; } c->set_output_handle_shapes_and_types( 0, std::vector{ - {element_shape, element_dtype}}); + {element_shape, element_dtype}});*/ return Status::OK(); }); -REGISTER_OP("TensorMapSize") - .Input("input_handle: variant") - .Output("size: int32") - .SetShapeFn(shape_inference::ScalarShape); - /*REGISTER_OP("TensorMapErase") .Input("input_handle: variant") .Input("element_shape: int32") diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index 726c97a639b..6cd6d7d611d 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -41,8 +41,20 @@ from tensorflow.python.ops import map_ops class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testEmptyTensorMap(self): m = map_ops.empty_tensor_map() - print("empty tensor map created") - + print("test EmptyTensorMap") + + def testTensorMapSize(self): + m = map_ops.empty_tensor_map() + s = map_ops.tensor_map_size(m) + print("size: ", s) + + def testTensorMapInsert(self): + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + v = constant_op.constant(2.0) + m = map_ops.tensor_map_insert(m, k, v) + print("test TensorMapInsert") + ''' @parameterized.named_parameters(("NoMaxNumElements", None), ("WithMaxNumElements", 2)) diff --git a/tensorflow/python/ops/map_ops.py b/tensorflow/python/ops/map_ops.py index 21c58fb773d..61493be6b71 100644 --- a/tensorflow/python/ops/map_ops.py +++ b/tensorflow/python/ops/map_ops.py @@ -31,10 +31,18 @@ from tensorflow.python.ops.gen_map_ops import * #zero_out = zero_out_ops.zero_out def empty_tensor_map(): + print("hello gen_map_ops.empty_tensor_map") return gen_map_ops.empty_tensor_map() +def tensor_map_size(input_handle): + print("hello gen_map_ops.tensor_map_size") + return gen_map_ops.tensor_map_size(input_handle) + +def tensor_map_insert(input_handle, key, value): + print("hello gen_map_ops.tensor_map_insert") + return gen_map_ops.tensor_map_insert(input_handle, key, value) + def zero_out(to_zero): - print("Hello World - Python Op") return gen_map_ops.zero_out(to_zero) @ops.RegisterGradient("ZeroOut") From 05b4262f6e1a2edbd0b013fcc0f9f44091a9476a Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Wed, 8 Jul 2020 18:45:39 +0000 Subject: [PATCH 0161/2522] refactored inputs to kernel as its own struct --- tensorflow/c/kernels/summary_op.cc | 110 ++++++++++++++---------- tensorflow/c/kernels/summary_op_test.cc | 7 +- 2 files changed, 69 insertions(+), 48 deletions(-) diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index cad7bd646ed..ab44a8baae9 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -30,6 +30,33 @@ limitations under the License. #include "tensorflow/core/framework/types.h" #include +// Struct that stores the status and TF_Tensor inputs to the opkernel. +// Used to delete tensor and status in its destructor upon kernel return. +typedef struct Params{ + TF_Tensor* tags; + TF_Tensor* values; + TF_Status* status; + Params(TF_OpKernelContext* ctx) { + status = TF_NewStatus(); + TF_GetInput(ctx, 0, &tags, status); + if (TF_GetCode(status) == TF_OK){ + TF_GetInput(ctx, 1, &values, status); + } + else{ + values = nullptr; + } + }; + ~Params(){ + TF_DeleteStatus(status); + TF_DeleteTensor(tags); + // edge case if params fails to initialize + if (values != nullptr){ + TF_DeleteTensor(values); + } + } +}; + +// dummy functions used for kernel registration static void* SummaryScalarOp_Create(TF_OpKernelConstruction* ctx) { void* ptr; return ptr; @@ -45,55 +72,48 @@ static tensorflow::string SingleTag(TF_Tensor* tags); template static void SummaryScalarOp_Compute(void* kernel, TF_OpKernelContext* ctx) { - TF_Tensor* tags; - TF_Tensor* values; - TF_Status* status = TF_NewStatus(); - TF_GetInput(ctx, 0, &tags, status); - if (TF_GetCode(status) == TF_OK) { - TF_GetInput(ctx, 1, &values, status); - } - if (TF_GetCode(status) == TF_OK) { - if (!IsSameSize(tags, values)) { - std::ostringstream err; - err << "tags and values not the same shape: " - << TF_ShapeDebugString(tags) << " != " << TF_ShapeDebugString(values) - << SingleTag(tags); - TF_SetStatus(status, TF_INVALID_ARGUMENT, err.str().c_str()); - } + Params params(ctx); + if (TF_GetCode(params.status) != TF_OK){ + TF_OpKernelContext_Failure(ctx, params.status); + return; + } + if (!IsSameSize(params.tags, params.values)) { + std::ostringstream err; + err << "tags and values not the same shape: " + << TF_ShapeDebugString(params.tags) << " != " + << TF_ShapeDebugString(params.values) + << SingleTag(params.tags); + TF_SetStatus(params.status, TF_INVALID_ARGUMENT, err.str().c_str()); + } + if (TF_GetCode(params.status) != TF_OK){ + TF_OpKernelContext_Failure(ctx, params.status); + return; } - // Copy tag and string data into summary protobuf - tensorflow::Summary s; - if (TF_GetCode(status) == TF_OK) { - // Convert tags and values tensor to array to access elements by index - auto tags_array = static_cast(TF_TensorData(tags)); - auto values_array = static_cast(TF_TensorData(values)); - // Copy tags and values into summary protobuf - for (int i = 0; i < TF_TensorElementCount(tags); ++i) { - tensorflow::Summary::Value* v = s.add_value(); - const tensorflow::tstring& Ttags_i = tags_array[i]; - v->set_tag(Ttags_i.data(), Ttags_i.size()); - v->set_simple_value(float(values_array[i])); - } - TF_Tensor* summary_tensor = TF_AllocateOutput(ctx, 0, - TF_ExpectedOutputDataType(ctx, 0), nullptr, 0, - sizeof(tensorflow::tstring), status); - if (TF_GetCode(status) == TF_OK) { - tensorflow::tstring summary_tstring; - SerializeToTString(s, &summary_tstring); - TF_TString* output_tf_tstring = reinterpret_cast(TF_TensorData(summary_tensor)); - TF_TString_Init(output_tf_tstring); - tensorflow::tstring* output_tstring = reinterpret_cast(output_tf_tstring); - *output_tstring = summary_tstring; // may want to use std::move - } - TF_DeleteTensor(summary_tensor); + // Convert tags and values tensor to array to access elements by index + tensorflow::Summary s; + auto tags_array = static_cast( + TF_TensorData(params.tags)); + auto values_array = static_cast(TF_TensorData(params.values)); + // Copy tags and values into summary protobuf + for (int i = 0; i < TF_TensorElementCount(params.tags); ++i) { + tensorflow::Summary::Value* v = s.add_value(); + const tensorflow::tstring& Ttags_i = tags_array[i]; + v->set_tag(Ttags_i.data(), Ttags_i.size()); + v->set_simple_value(float(values_array[i])); } - if (TF_GetCode(status) != TF_OK) { - TF_OpKernelContext_Failure(ctx, status); + TF_Tensor* summary_tensor = TF_AllocateOutput(ctx, 0, + TF_ExpectedOutputDataType(ctx, 0), nullptr, 0, + sizeof(tensorflow::tstring), params.status); + if (TF_GetCode(params.status) != TF_OK){ + TF_DeleteTensor(summary_tensor); + TF_OpKernelContext_Failure(ctx, params.status); + return; } - TF_DeleteStatus(status); - TF_DeleteTensor(tags); - TF_DeleteTensor(values); + tensorflow::tstring* output_tstring = reinterpret_cast( + TF_TensorData(summary_tensor)); + SerializeToTString(s, output_tstring); + TF_DeleteTensor(summary_tensor); } bool IsSameSize(TF_Tensor* tensor1, TF_Tensor* tensor2){ diff --git a/tensorflow/c/kernels/summary_op_test.cc b/tensorflow/c/kernels/summary_op_test.cc index 7438693b430..42f7a7ff3b3 100644 --- a/tensorflow/c/kernels/summary_op_test.cc +++ b/tensorflow/c/kernels/summary_op_test.cc @@ -80,7 +80,6 @@ void TestScalarSummaryOp(Tensor* tags, Tensor* values, string expected_summary, params.inputs = &inputs; OpKernelContext ctx(¶ms, 1); kernel->Compute(&ctx); - ASSERT_EQ(expected_code, ctx.status().code()); if (expected_code == error::OK){ Summary summary; @@ -90,7 +89,7 @@ void TestScalarSummaryOp(Tensor* tags, Tensor* values, string expected_summary, } } -TEST(ScalarSummaryOpTest, Test) { +TEST(ScalarSummaryOpTest, SimpleFloat) { int vectorSize = 3; Tensor tags(DT_STRING, {vectorSize}); Tensor values(DT_FLOAT, {vectorSize}); @@ -104,7 +103,7 @@ TEST(ScalarSummaryOpTest, Test) { value { tag: 'tag1' simple_value: 1.0 } value { tag: 'tag2' simple_value: -0.73} value { tag: 'tag3' simple_value: 10000.0})", error::OK); -} +} TEST(ScalarSummaryOpTest, SimpleDouble) { int vectorSize = 3; @@ -122,6 +121,7 @@ TEST(ScalarSummaryOpTest, SimpleDouble) { value { tag: 'tag3' simple_value: 10000.0})", error::OK); } + TEST(ScalarSummaryOpTest, SimpleHalf) { int vectorSize = 3; Tensor tags(DT_STRING, {vectorSize}); @@ -168,6 +168,7 @@ TEST(ScalarSummaryOpTest, Error_WrongWithSingleTag) { TestScalarSummaryOp(&tags, &values, R"()", error::INVALID_ARGUMENT); } + TEST(ScalarSummaryOpTest, IsRegistered){ const OpRegistrationData* reg; TF_CHECK_OK(OpRegistry::Global()->LookUp("SummaryScalar", ®)); From f863e79fa8cb3d6856620ca34aeb6118e3bc9726 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Wed, 8 Jul 2020 19:57:33 +0000 Subject: [PATCH 0162/2522] added priority function --- tensorflow/c/kernels.cc | 5 +++++ tensorflow/c/kernels.h | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc index 3021a38e888..c9868ef2b67 100644 --- a/tensorflow/c/kernels.cc +++ b/tensorflow/c/kernels.cc @@ -97,6 +97,11 @@ void TF_KernelBuilder_HostMemory(TF_KernelBuilder* kernel_builder, kernel_builder->cc_builder->HostMemory(arg_name); } +void TF_KernelBuilder_Priority(TF_KernelBuilder* kernel_builder, + int32_t priority_number){ + kernel_builder->cc_builder->Priority(priority_number); +} + namespace tensorflow { namespace { diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h index 084717c1d9e..dc65a514e20 100644 --- a/tensorflow/c/kernels.h +++ b/tensorflow/c/kernels.h @@ -107,6 +107,10 @@ TF_CAPI_EXPORT extern void TF_KernelBuilder_TypeConstraint( TF_CAPI_EXPORT extern void TF_KernelBuilder_HostMemory( TF_KernelBuilder* kernel_builder, const char* arg_name); +// Specify a priority number for this kernel. +TF_CAPI_EXPORT extern void TF_KernelBuilder_Priority( + TF_KernelBuilder* kernel_builder, int32_t priority_number); + // Register the given kernel builder with the TensorFlow runtime. If // registration fails, the given status will be populated. // From 9660f6708e7349f73213e2a742a987eb68ca7cb4 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Wed, 8 Jul 2020 20:06:53 +0000 Subject: [PATCH 0163/2522] removed diffs for clean PR --- tensorflow/c/kernels.cc | 31 ----- tensorflow/c/kernels.h | 13 -- tensorflow/c/kernels/summary_op.cc | 1 - tensorflow/c/kernels_test.cc | 166 ++++++-------------------- tensorflow/c/tf_tensor.cc | 5 +- tensorflow/core/kernels/summary_op.cc | 2 +- 6 files changed, 39 insertions(+), 179 deletions(-) diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc index 1bd12353031..3021a38e888 100644 --- a/tensorflow/c/kernels.cc +++ b/tensorflow/c/kernels.cc @@ -25,7 +25,6 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/platform/types.h" -#include "tensorflow/core/framework/tensor_shape.h" // This file forms the basis of a stable ABI for third-party kernel // implementations. It is crucial that changes to this file are made cautiously @@ -98,11 +97,6 @@ void TF_KernelBuilder_HostMemory(TF_KernelBuilder* kernel_builder, kernel_builder->cc_builder->HostMemory(arg_name); } -void TF_KernelBuilder_Priority(TF_KernelBuilder* kernel_builder, - int32_t priority_number){ - kernel_builder->cc_builder->Priority(priority_number); -} - namespace tensorflow { namespace { @@ -273,28 +267,3 @@ TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int index, } return tf_tensor; } - -TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, - int64_t* dims, int num_dims, TF_Status* status){ - auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelContext*>(context); - TF_SetStatus(status, TF_OK, ""); - tensorflow::TensorShape shape; - for(int i = 0; i < num_dims; ++i){ - shape.AddDim(dims[i]); - } - tensorflow::Status s; - tensorflow::Tensor tensor_temp; - TF_Tensor* tf_tensor_temp; - s = cc_ctx->allocate_temp(static_cast(dtype), shape, &tensor_temp); - if (!s.ok()){ - ::tensorflow::Set_TF_Status_from_Status(status, s); - return nullptr; - } - tf_tensor_temp = TF_TensorFromTensor(tensor_temp, &s); - if (!s.ok()){ - ::tensorflow::Set_TF_Status_from_Status(status, s); - return nullptr; - } - return tf_tensor_temp; -} - diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h index 8ed3488988d..084717c1d9e 100644 --- a/tensorflow/c/kernels.h +++ b/tensorflow/c/kernels.h @@ -107,10 +107,6 @@ TF_CAPI_EXPORT extern void TF_KernelBuilder_TypeConstraint( TF_CAPI_EXPORT extern void TF_KernelBuilder_HostMemory( TF_KernelBuilder* kernel_builder, const char* arg_name); -// Specify a priority number for this kernel. -TF_CAPI_EXPORT extern void TF_KernelBuilder_Priority( - TF_KernelBuilder* kernel_builder, int32_t priority_number); - // Register the given kernel builder with the TensorFlow runtime. If // registration fails, the given status will be populated. // @@ -194,15 +190,6 @@ TF_CAPI_EXPORT TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int64_t* dims, int num_dims, size_t len, TF_Status* status); -// Allocates a temporary Tensor of the specified type and shape. Devices -// such as GPUs that enqueue Ops for lazy execution may retain references -// to the temporary tensors after the Op's Compute method has run. - -// num_dims must equal the size of array dims -TF_CAPI_EXPORT extern TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, - TF_DataType dtype, int64_t* dims, int num_dims, TF_Status* status); - - #ifdef __cplusplus } /* end extern "C" */ #endif diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index ab44a8baae9..9d28c0797ff 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -28,7 +28,6 @@ limitations under the License. #include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/types.h" -#include // Struct that stores the status and TF_Tensor inputs to the opkernel. // Used to delete tensor and status in its destructor upon kernel return. diff --git a/tensorflow/c/kernels_test.cc b/tensorflow/c/kernels_test.cc index 738c1e12c80..423302741de 100644 --- a/tensorflow/c/kernels_test.cc +++ b/tensorflow/c/kernels_test.cc @@ -360,17 +360,6 @@ class DeviceKernelOpTest : public OpsTestBase { #endif }; -// Helper function for tests that validates that the tensor has -// shape and type corresponding to dims and dtype. -void validate_tensor(TF_Tensor* tensor, int64_t* dims, int64_t num_dims, - TF_DataType dtype); - -// Helper function for tests that copies data of length -// tensor_size_bytes from values to tensor -template -void set_tensor_data(TF_Tensor* tensor, T* values, size_t tensor_size_bytes, - TF_OpKernelContext* ctx); - REGISTER_OP("AllocateOutputOp1").Output("output1: float"); TEST_F(DeviceKernelOpTest, TestAllocateOutputSizeOne) { @@ -382,11 +371,22 @@ TEST_F(DeviceKernelOpTest, TestAllocateOutputSizeOne) { TF_Tensor* output = TF_AllocateOutput( /*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/&dim, /*num_dims=*/1, /*len=*/tensor_size_bytes, s); - validate_tensor(output, &dim, 1, TF_FLOAT); - + EXPECT_EQ(TF_OK, TF_GetCode(s)); + EXPECT_EQ(TF_FLOAT, TF_TensorType(output)); + EXPECT_EQ(1, TF_NumDims(output)); + EXPECT_EQ(1, TF_Dim(output, 0)); + // Set output to 3 - float values[1] = {3.0f}; - set_tensor_data(output, values, tensor_size_bytes, ctx); + float* data = reinterpret_cast(TF_TensorData(output)); + float value = 3.0f; +#if GOOGLE_CUDA + OpKernelContext* cc_ctx = reinterpret_cast(ctx); + cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, &value, + tensor_size_bytes); +#else + *data = value; +#endif + TF_DeleteStatus(s); TF_DeleteTensor(output); }; @@ -409,8 +409,12 @@ TEST_F(DeviceKernelOpTest, TestAllocateEmptyOutput) { TF_Tensor* output = TF_AllocateOutput( /*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/&dim, /*num_dims=*/1, /*len=*/0, s); + EXPECT_EQ(TF_OK, TF_GetCode(s)); - validate_tensor(output, &dim, 1, TF_FLOAT); + EXPECT_EQ(TF_FLOAT, TF_TensorType(output)); + EXPECT_EQ(1, TF_NumDims(output)); + EXPECT_EQ(0, TF_Dim(output, 0)); + TF_DeleteStatus(s); TF_DeleteTensor(output); }; @@ -430,16 +434,27 @@ TEST_F(DeviceKernelOpTest, TestAllocateOutputSize2x3) { TF_Status* s = TF_NewStatus(); // Allocate 2x3 output int64_t dim[2] = {2, 3}; - size_t tensor_size_bytes = TF_DataTypeSize(TF_FLOAT) * 6; + size_t tensor_size_bytes = 6 * TF_DataTypeSize(TF_FLOAT); TF_Tensor* output = TF_AllocateOutput( /*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/dim, /*num_dims=*/2, /*len=*/tensor_size_bytes, s); EXPECT_EQ(TF_OK, TF_GetCode(s)); - validate_tensor(output, dim, 2, TF_FLOAT); + EXPECT_EQ(TF_FLOAT, TF_TensorType(output)); + EXPECT_EQ(2, TF_NumDims(output)); + EXPECT_EQ(2, TF_Dim(output, 0)); + EXPECT_EQ(3, TF_Dim(output, 1)); // Set output to [1 2 3 4 5 6] - float values[6] = {1, 2, 3, 4, 5, 6}; - set_tensor_data(output, values, tensor_size_bytes, ctx); + void* data = TF_TensorData(output); + float value[6] = {1, 2, 3, 4, 5, 6}; +#if GOOGLE_CUDA + OpKernelContext* cc_ctx = reinterpret_cast(ctx); + cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, value, + tensor_size_bytes); +#else + memcpy(data, value, tensor_size_bytes); +#endif + TF_DeleteStatus(s); TF_DeleteTensor(output); }; @@ -451,113 +466,4 @@ TEST_F(DeviceKernelOpTest, TestAllocateOutputSize2x3) { EXPECT_EQ("Tensor", output->DebugString(100)); } - -REGISTER_OP("AllocateTempOp1").Output("output1: float"); - -TEST_F(DeviceKernelOpTest, TestAllocateTempSizeOne) { - auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) { - // Allocate output - TF_Status* s = TF_NewStatus(); - int64_t dim = 1; - TF_Tensor* output = TF_AllocateTemp( - /*context=*/ctx, /*dtype=*/TF_FLOAT, /*dims=*/&dim, - /*num_dims=*/1, s); - size_t tensor_size_bytes = TF_DataTypeSize(TF_FLOAT); - EXPECT_EQ(TF_OK, TF_GetCode(s)); - validate_tensor(output, &dim, 1, TF_FLOAT); - - // Set output to 3 - float values[1] = {3.0f}; - set_tensor_data(output, values, tensor_size_bytes, ctx); - TF_SetOutput(ctx, 0, output, s); - TF_DeleteStatus(s); - TF_DeleteTensor(output); - }; - - SetupOp("AllocateTempOp1", "AllocateTemp1", my_compute_func); - - TF_ASSERT_OK(RunOpKernel()); - Tensor* output = GetOutput(0); - EXPECT_EQ("Tensor", - output->DebugString(100)); -} - -REGISTER_OP("AllocateTempOp0").Output("output1: float"); - -TEST_F(DeviceKernelOpTest, TestAllocateTempEmpty) { - auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) { - TF_Status* s = TF_NewStatus(); - // Allocate empty output - int64_t dim = 0; - TF_Tensor* output = TF_AllocateTemp( - /*context=*/ctx, /*dtype=*/TF_FLOAT, /*dims=*/&dim, - /*num_dims=*/1, s); - EXPECT_EQ(TF_OK, TF_GetCode(s)); - validate_tensor(output, &dim, 1, TF_FLOAT); - TF_SetOutput(ctx, 0, output, s); - TF_DeleteStatus(s); - TF_DeleteTensor(output); - }; - - SetupOp("AllocateTempOp0", "AllocateTemp0", my_compute_func); - - TF_ASSERT_OK(RunOpKernel()); - Tensor* output = GetOutput(0); - EXPECT_EQ("Tensor", - output->DebugString(100)); -} - -REGISTER_OP("AllocateTempOp2x3").Output("output1: float"); - -TEST_F(DeviceKernelOpTest, TestAllocateTempSize2x3) { - auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) { - TF_Status* s = TF_NewStatus(); - size_t tensor_size_bytes = 6 * TF_DataTypeSize(TF_FLOAT); - // Allocate 2x3 output - int64_t dim[2] = {2, 3}; - TF_Tensor* output = TF_AllocateTemp( - /*context=*/ctx, /*dtype=*/TF_FLOAT, /*dims=*/dim, - /*num_dims=*/2, s); - EXPECT_EQ(TF_OK, TF_GetCode(s)); - validate_tensor(output, dim, 2, TF_FLOAT); - - // Set output to [1 2 3 4 5 6] - void* data = TF_TensorData(output); - float values[6] = {1, 2, 3, 4, 5, 6}; - set_tensor_data(output, values, tensor_size_bytes, ctx); - TF_SetOutput(ctx, 0, output, s); - TF_DeleteStatus(s); - TF_DeleteTensor(output); - }; - - SetupOp("AllocateTempOp2x3", "AllocateTempOp2x3", my_compute_func); - - TF_ASSERT_OK(RunOpKernel()); - Tensor* output = GetOutput(0); - EXPECT_EQ("Tensor", - output->DebugString(100)); -} - -void validate_tensor(TF_Tensor* tensor, int64_t* dims, int64_t num_dims, - TF_DataType dtype){ - EXPECT_EQ(TF_FLOAT, TF_TensorType(tensor)); - EXPECT_EQ(num_dims, TF_NumDims(tensor)); - for(int i = 0; i < num_dims; ++i){ - EXPECT_EQ(dims[i], TF_Dim(tensor, i)); - } -} - -template -void set_tensor_data(TF_Tensor* tensor, T* values, size_t tensor_size_bytes, - TF_OpKernelContext* ctx){ - T* data = reinterpret_cast(TF_TensorData(tensor)); -#if GOOGLE_CUDA - OpKernelContext* cc_ctx = reinterpret_cast(ctx); - cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, values, - tensor_size_bytes); -#else - memcpy(data, values, tensor_size_bytes); -#endif -} - -} // namespace tensorflow \ No newline at end of file +} // namespace tensorflow diff --git a/tensorflow/c/tf_tensor.cc b/tensorflow/c/tf_tensor.cc index 5cfd495933c..aa65cb7c927 100644 --- a/tensorflow/c/tf_tensor.cc +++ b/tensorflow/c/tf_tensor.cc @@ -293,6 +293,7 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src, Status* status) { std::memcpy(TF_TensorData(t), str.c_str(), str.size()); return t; } + Tensor tensor; if (!tensor.CopyFrom(src, src.shape())) { return nullptr; @@ -329,6 +330,4 @@ bool TensorInterface::IsAligned() const { return tensor_.IsAligned(); } } // namespace tensorflow -bool TF_TensorIsAligned(const TF_Tensor* t) { return t->tensor->IsAligned(); } - - +bool TF_TensorIsAligned(const TF_Tensor* t) { return t->tensor->IsAligned(); } \ No newline at end of file diff --git a/tensorflow/core/kernels/summary_op.cc b/tensorflow/core/kernels/summary_op.cc index 64e8347dfc4..f4c91fc9ff1 100644 --- a/tensorflow/core/kernels/summary_op.cc +++ b/tensorflow/core/kernels/summary_op.cc @@ -39,7 +39,7 @@ class SummaryScalarOp : public OpKernel { void Compute(OpKernelContext* c) override { const Tensor& tags = c->input(0); const Tensor& values = c->input(1); - string tag = SingleTag(tags); + OP_REQUIRES( c, tags.IsSameSize(values) || (TensorShapeUtils::IsScalar(tags.shape()) && From a02b0c8282f8bfd07574e111550bbb0d69bc48f9 Mon Sep 17 00:00:00 2001 From: Abolfazl Shahbazi Date: Wed, 8 Jul 2020 17:33:46 -0700 Subject: [PATCH 0164/2522] Seperate OneDNN from versioned releases --- tensorflow/tools/dockerfiles/spec.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/tools/dockerfiles/spec.yml b/tensorflow/tools/dockerfiles/spec.yml index ec20f63f3d8..6d70faaaf03 100644 --- a/tensorflow/tools/dockerfiles/spec.yml +++ b/tensorflow/tools/dockerfiles/spec.yml @@ -38,6 +38,8 @@ releases: versioned: tag_specs: - "{_TAG_PREFIX}{ubuntu}{jupyter}" + onednn: + tag_specs: - "{_TAG_PREFIX}{ubuntu-1604-onednn}" - "{_TAG_PREFIX}{ubuntu-1804-onednn}" - "{_TAG_PREFIX}{ubuntu-2004-onednn}" @@ -191,6 +193,7 @@ slice_sets: - CHECKOUT_TF_SRC=1 - CHECKOUT_HOROVOD_SRC=1 + ubuntu-1604-onednn: - add_to_name: "-16.04-onednn" dockerfile_exclusive_name: "ubuntu-16.04-onednn" From 6879366d6336355ccd52c60b7de662b18bebdba5 Mon Sep 17 00:00:00 2001 From: Abolfazl Shahbazi Date: Wed, 8 Jul 2020 17:55:34 -0700 Subject: [PATCH 0165/2522] Combining Ubuntu OneDNN slice sets and cleaning up tag specs --- tensorflow/tools/dockerfiles/spec.yml | 44 ++++++--------------------- 1 file changed, 10 insertions(+), 34 deletions(-) diff --git a/tensorflow/tools/dockerfiles/spec.yml b/tensorflow/tools/dockerfiles/spec.yml index 6d70faaaf03..fbe5ed2c238 100644 --- a/tensorflow/tools/dockerfiles/spec.yml +++ b/tensorflow/tools/dockerfiles/spec.yml @@ -40,18 +40,10 @@ releases: - "{_TAG_PREFIX}{ubuntu}{jupyter}" onednn: tag_specs: - - "{_TAG_PREFIX}{ubuntu-1604-onednn}" - - "{_TAG_PREFIX}{ubuntu-1804-onednn}" - - "{_TAG_PREFIX}{ubuntu-2004-onednn}" - - "{_TAG_PREFIX}{ubuntu-1604-onednn}{onednn-jupyter}" - - "{_TAG_PREFIX}{ubuntu-1804-onednn}{onednn-jupyter}" - - "{_TAG_PREFIX}{ubuntu-2004-onednn}{onednn-jupyter}" - - "{_TAG_PREFIX}{ubuntu-1604-devel-onednn}" - - "{_TAG_PREFIX}{ubuntu-1804-devel-onednn}" - - "{_TAG_PREFIX}{ubuntu-2004-devel-onednn}" - - "{_TAG_PREFIX}{ubuntu-1604-devel-onednn}{onednn-jupyter}" - - "{_TAG_PREFIX}{ubuntu-1804-devel-onednn}{onednn-jupyter}" - - "{_TAG_PREFIX}{ubuntu-2004-devel-onednn}{onednn-jupyter}" + - "{_TAG_PREFIX}{ubuntu-onednn}" + - "{_TAG_PREFIX}{ubuntu-onednn}{onednn-jupyter}" + - "{_TAG_PREFIX}{ubuntu-devel-onednn}" + - "{_TAG_PREFIX}{ubuntu-devel-onednn}{onednn-jupyter}" # Dockerfiles stored in the TF repo; not pushed anywhere dockerfiles: @@ -64,18 +56,10 @@ releases: - "{ubuntu-devel-ppc64le}{jupyter}" - "{ubuntu-horovod}{jupyter}" - "{ubuntu-devel-horovod}{jupyter}" - - "{ubuntu-1604-onednn}" - - "{ubuntu-1804-onednn}" - - "{ubuntu-2004-onednn}" - - "{ubuntu-1604-onednn}{onednn-jupyter}" - - "{ubuntu-1804-onednn}{onednn-jupyter}" - - "{ubuntu-2004-onednn}{onednn-jupyter}" - - "{ubuntu-1604-devel-onednn}" - - "{ubuntu-1804-devel-onednn}" - - "{ubuntu-2004-devel-onednn}" - - "{ubuntu-1604-devel-onednn}{onednn-jupyter}" - - "{ubuntu-1804-devel-onednn}{onednn-jupyter}" - - "{ubuntu-2004-devel-onednn}{onednn-jupyter}" + - "{ubuntu-onednn}" + - "{ubuntu-onednn}{onednn-jupyter}" + - "{ubuntu-devel-onednn}" + - "{ubuntu-devel-onednn}{onednn-jupyter}" - "{ubuntu-devel-arm64v8}{jupyter}" slice_sets: @@ -194,7 +178,7 @@ slice_sets: - CHECKOUT_HOROVOD_SRC=1 - ubuntu-1604-onednn: + ubuntu-onednn: - add_to_name: "-16.04-onednn" dockerfile_exclusive_name: "ubuntu-16.04-onednn" dockerfile_subdirectory: "onednn" @@ -209,8 +193,6 @@ slice_sets: args: - TF_PACKAGE=intel-tensorflow - UBUNTU_VERSION=16.04 - - ubuntu-1804-onednn: - add_to_name: "-18.04-onednn" dockerfile_exclusive_name: "ubuntu-18.04-onednn" dockerfile_subdirectory: "onednn" @@ -225,8 +207,6 @@ slice_sets: args: - TF_PACKAGE=intel-tensorflow - UBUNTU_VERSION=18.04 - - ubuntu-2004-onednn: - add_to_name: "-20.04-onednn" dockerfile_exclusive_name: "ubuntu-20.04-onednn" dockerfile_subdirectory: "onednn" @@ -243,7 +223,7 @@ slice_sets: - UBUNTU_VERSION=20.04 - PYTHON=python3.7 - ubuntu-1604-devel-onednn: + ubuntu-devel-onednn: - add_to_name: "-16.04-devel-onednn" dockerfile_exclusive_name: "ubuntu-16.04-devel-onednn" dockerfile_subdirectory: "onednn" @@ -259,8 +239,6 @@ slice_sets: - UBUNTU_VERSION=16.04 - CHECKOUT_TF_SRC=1 - TF_BRANCH=master - - ubuntu-1804-devel-onednn: - add_to_name: "-18.04-devel-onednn" dockerfile_exclusive_name: "ubuntu-18.04-devel-onednn" dockerfile_subdirectory: "onednn" @@ -276,8 +254,6 @@ slice_sets: - UBUNTU_VERSION=18.04 - CHECKOUT_TF_SRC=1 - TF_BRANCH=master - - ubuntu-2004-devel-onednn: - add_to_name: "-20.04-devel-onednn" dockerfile_exclusive_name: "ubuntu-20.04-devel-onednn" dockerfile_subdirectory: "onednn" From 6e98b7f78d8d57bf7da256ae92a65f6924715100 Mon Sep 17 00:00:00 2001 From: Abolfazl Shahbazi Date: Wed, 8 Jul 2020 18:01:34 -0700 Subject: [PATCH 0166/2522] Update 'import-onednn.sh' test case per code review --- .../tools/dockerfiles/tests/import-onednn.sh | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tensorflow/tools/dockerfiles/tests/import-onednn.sh b/tensorflow/tools/dockerfiles/tests/import-onednn.sh index c890d402453..7cc24f01bf7 100755 --- a/tensorflow/tools/dockerfiles/tests/import-onednn.sh +++ b/tensorflow/tools/dockerfiles/tests/import-onednn.sh @@ -15,12 +15,16 @@ # limitations under the License. # ============================================================================ -{ # try - echo `python -c 'from tensorflow.python import _pywrap_util_port; print(_pywrap_util_port.IsMklEnabled())'` - echo "PASS: MKL is enabled" -} || { # catch - echo `python -c 'from tensorflow.python import pywrap_tensorflow; print(pywrap_tensorflow.IsMklEnabled())'` - echo "PASS: Old MKL is detected" -} || { # finally - die "FAIL: MKL is not enabled" -} +python -c 'from tensorflow.python import _pywrap_util_port; print(_pywrap_util_port.IsMklEnabled())' +new_mkl_enabled=$? + +python -c 'from tensorflow.python import pywrap_tensorflow; print(pywrap_tensorflow.IsMklEnabled())' +old_mkl_enabled=$? + +if [[ $new_mkl_enabled -eq 0 ]]; then + echo "PASS: MKL is enabled" +elif [[ $old_mkl_enabled -eq 0]]; then + echo "PASS: Old MKL is detected" +else + die "FAIL: MKL is not enabled" +fi From 1c685ebca2f41951ca59a9f2028670405b5a528b Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Wed, 8 Jul 2020 18:18:20 -0700 Subject: [PATCH 0167/2522] Support nearest fill mode --- tensorflow/core/kernels/image_ops.cc | 5 ++++- tensorflow/core/kernels/image_ops.h | 17 ++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/image_ops.cc b/tensorflow/core/kernels/image_ops.cc index 5d879661f12..4742f9ca6ff 100644 --- a/tensorflow/core/kernels/image_ops.cc +++ b/tensorflow/core/kernels/image_ops.cc @@ -75,9 +75,11 @@ class ImageProjectiveTransformV2 : public OpKernel { fill_mode_ = Mode::WRAP; } else if (mode_str == "CONSTANT") { fill_mode_ = Mode::CONSTANT; + } else if (mode_str == "NEAREST") { + fill_mode_ = Mode::NEAREST; } else { LOG(ERROR) << "Invalid mode " << mode_str - << ". Supported types: REFLECT, WRAP, CONSTANT"; + << ". Supported types: REFLECT, WRAP, CONSTANT, NEAREST"; } } @@ -182,6 +184,7 @@ namespace generator { DECLARE_MAP_FUNCTOR(Mode::REFLECT); DECLARE_MAP_FUNCTOR(Mode::WRAP); DECLARE_MAP_FUNCTOR(Mode::CONSTANT); +DECLARE_MAP_FUNCTOR(Mode::NEAREST); } // end namespace generator diff --git a/tensorflow/core/kernels/image_ops.h b/tensorflow/core/kernels/image_ops.h index 300c65921bd..5f167833e0e 100644 --- a/tensorflow/core/kernels/image_ops.h +++ b/tensorflow/core/kernels/image_ops.h @@ -30,7 +30,7 @@ namespace tensorflow { namespace generator { enum Interpolation { NEAREST, BILINEAR }; -enum Mode { REFLECT, WRAP, CONSTANT }; +enum Mode { REFLECT, WRAP, CONSTANT, NEAREST }; using Eigen::array; using Eigen::DenseIndex; @@ -89,6 +89,16 @@ struct MapCoordinate { } }; +template +struct MapCoordinate { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float operator()(const float out_coord, + const DenseIndex len) { + if (out_coord < 0) return 0; + else if (out_coord >= len) return len - 1; + return out_coord; + } +}; + template class ProjectiveGenerator { private: @@ -230,6 +240,11 @@ struct FillProjectiveTransform { output->generate(ProjectiveGenerator( images, transform, interpolation)); break; + case Mode::NEAREST: + output->device(device) = + output->generate(ProjectiveGenerator( + images, transform, interpolation)); + break; } } }; From c4e7cf7a5458d6ba2746f5a37de3a6449dafe4fa Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Wed, 8 Jul 2020 18:18:43 -0700 Subject: [PATCH 0168/2522] Add nearest fill mode and tests --- .../preprocessing/image_preprocessing.py | 25 ++++++--- .../preprocessing/image_preprocessing_test.py | 55 +++++++++++++++++++ 2 files changed, 73 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py index dd741c8c72c..25558b7c0fb 100644 --- a/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py +++ b/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py @@ -57,10 +57,10 @@ W_AXIS = 2 def check_fill_mode_and_interpolation(fill_mode, interpolation): - if fill_mode not in {'reflect', 'wrap', 'constant'}: + if fill_mode not in {'reflect', 'wrap', 'constant', 'nearest'}: raise NotImplementedError( - 'Unknown `fill_mode` {}. Only `reflect`, `wrap` and ' - '`constant` are supported.'.format(fill_mode)) + 'Unknown `fill_mode` {}. Only `reflect`, `wrap`, ' + '`constant` and `nearest` are supported.'.format(fill_mode)) if interpolation not in {'nearest', 'bilinear'}: raise NotImplementedError('Unknown `interpolation` {}. Only `nearest` and ' '`bilinear` are supported.'.format(interpolation)) @@ -449,7 +449,7 @@ class RandomTranslation(Layer): `width_factor=0.2` results in an output height shifted left or right by 20%. fill_mode: Points outside the boundaries of the input are filled according - to the given mode (one of `{'constant', 'reflect', 'wrap'}`). + to the given mode (one of `{'constant', 'reflect', 'wrap', 'nearest'}`). - *reflect*: `(d c b a | a b c d | d c b a)` The input is extended by reflecting about the edge of the last pixel. - *constant*: `(k k k k | a b c d | k k k k)` @@ -457,6 +457,8 @@ class RandomTranslation(Layer): same constant value k = 0. - *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by wrapping around to the opposite edge. + - *nearest*: `(a a a a | a b c d | d d d d)` + The input is extended by the nearest pixel. interpolation: Interpolation mode. Supported values: "nearest", "bilinear". seed: Integer. Used to create a random seed. name: A string, the name of the layer. @@ -625,7 +627,7 @@ def transform(images, transform mapping input points to output points. Note that gradients are not backpropagated into transformation parameters. fill_mode: Points outside the boundaries of the input are filled according - to the given mode (one of `{'constant', 'reflect', 'wrap'}`). + to the given mode (one of `{'constant', 'reflect', 'wrap', 'nearest'}`). interpolation: Interpolation mode. Supported values: "nearest", "bilinear". output_shape: Output dimesion after the transform, [height, width]. If None, output is the same size as input image. @@ -644,6 +646,9 @@ def transform(images, wrap (a b c d | a b c d | a b c d) The input is extended by wrapping around to the opposite edge. + nearest (a a a a | a b c d | d d d d) + The input is extended by the nearest pixel. + Input shape: 4D tensor with shape: `(samples, height, width, channels)`, data_format='channels_last'. @@ -751,13 +756,16 @@ class RandomRotation(Layer): `factor=0.2` results in an output rotating by a random amount in the range `[-20% * 2pi, 20% * 2pi]`. fill_mode: Points outside the boundaries of the input are filled according - to the given mode (one of `{'constant', 'reflect', 'wrap'}`). + to the given mode (one of `{'constant', 'reflect', 'wrap', 'nearest'}`). - *reflect*: `(d c b a | a b c d | d c b a)` The input is extended by reflecting about the edge of the last pixel. - *constant*: `(k k k k | a b c d | k k k k)` The input is extended by filling all values beyond the edge with the same constant value k = 0. - *wrap*: `(a b c d | a b c d | a b c d)` + The input is extended by wrapping around to the opposite edge. + - *nearest*: `(a a a a | a b c d | d d d d)` + The input is extended by the nearest pixel. interpolation: Interpolation mode. Supported values: "nearest", "bilinear". seed: Integer. Used to create a random seed. name: A string, the name of the layer. @@ -862,13 +870,16 @@ class RandomZoom(Layer): to 30%. Defaults to `None`, i.e., zooming vertical and horizontal directions by preserving the aspect ratio. fill_mode: Points outside the boundaries of the input are filled according - to the given mode (one of `{'constant', 'reflect', 'wrap'}`). + to the given mode (one of `{'constant', 'reflect', 'wrap', 'nearest'}`). - *reflect*: `(d c b a | a b c d | d c b a)` The input is extended by reflecting about the edge of the last pixel. - *constant*: `(k k k k | a b c d | k k k k)` The input is extended by filling all values beyond the edge with the same constant value k = 0. - *wrap*: `(a b c d | a b c d | a b c d)` + The input is extended by wrapping around to the opposite edge. + - *nearest*: `(a a a a | a b c d | d d d d)` + The input is extended by the nearest pixel. interpolation: Interpolation mode. Supported values: "nearest", "bilinear". seed: Integer. Used to create a random seed. name: A string, the name of the layer. diff --git a/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py index a3540fca6df..a039ec644e3 100644 --- a/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py +++ b/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py @@ -816,6 +816,61 @@ class RandomTransformTest(keras_parameterized.TestCase): self._run_random_transform_with_mock(transform_matrix, expected_output, 'wrap') + def test_random_translation_nearest(self): + # nearest output is (aaaa|abcd|dddd) + + # Test down shift by 1. + # pyformat: disable + expected_output = np.asarray( + [[0., 1., 2.], + [0., 1., 2.], + [3., 4., 5.], + [6., 7., 8], + [9., 10., 11]]).reshape((1, 5, 3, 1)).astype(np.float32) + # pyformat: enable + transform_matrix = np.asarray([[1., 0., 0., 0., 1., -1., 0., 0.]]) + self._run_random_transform_with_mock(transform_matrix, expected_output, + 'nearest') + + # Test up shift by 1. + # pyformat: disable + expected_output = np.asarray( + [[3., 4., 5.], + [6., 7., 8], + [9., 10., 11.], + [12., 13., 14.], + [12., 13., 14.]]).reshape((1, 5, 3, 1)).astype(np.float32) + # pyformat: enable + transform_matrix = np.asarray([[1., 0., 0., 0., 1., 1., 0., 0.]]) + self._run_random_transform_with_mock(transform_matrix, expected_output, + 'nearest') + + # Test left shift by 1. + # pyformat: disable + expected_output = np.asarray( + [[1., 2., 2.], + [4., 5., 5.], + [7., 8., 8.], + [10., 11., 11.], + [13., 14., 14.]]).reshape((1, 5, 3, 1)).astype(np.float32) + # pyformat: enable + transform_matrix = np.asarray([[1., 0., 1., 0., 1., 0., 0., 0.]]) + self._run_random_transform_with_mock(transform_matrix, expected_output, + 'nearest') + + # Test right shift by 1. + # pyformat: disable + expected_output = np.asarray( + [[0., 0., 1.], + [3., 3., 4], + [6., 6., 7.], + [9., 9., 10.], + [12., 12., 13.]]).reshape((1, 5, 3, 1)).astype(np.float32) + # pyformat: enable + transform_matrix = np.asarray([[1., 0., -1., 0., 1., 0., 0., 0.]]) + self._run_random_transform_with_mock(transform_matrix, expected_output, + 'nearest') + def test_random_translation_constant(self): # constant output is (0000|abcd|0000) From f51a56d5d6c199bcb1b588c7fc2c95ded6d2e051 Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Wed, 8 Jul 2020 18:44:56 -0700 Subject: [PATCH 0169/2522] Change enum name to avoid conflict --- tensorflow/core/kernels/image_ops.cc | 16 ++++++++-------- tensorflow/core/kernels/image_ops.h | 26 +++++++++++++------------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/tensorflow/core/kernels/image_ops.cc b/tensorflow/core/kernels/image_ops.cc index 4742f9ca6ff..8792372b6ff 100644 --- a/tensorflow/core/kernels/image_ops.cc +++ b/tensorflow/core/kernels/image_ops.cc @@ -70,13 +70,13 @@ class ImageProjectiveTransformV2 : public OpKernel { string mode_str; OP_REQUIRES_OK(ctx, ctx->GetAttr("fill_mode", &mode_str)); if (mode_str == "REFLECT") { - fill_mode_ = Mode::REFLECT; + fill_mode_ = Mode::FILL_REFLECT; } else if (mode_str == "WRAP") { - fill_mode_ = Mode::WRAP; + fill_mode_ = Mode::FILL_WRAP; } else if (mode_str == "CONSTANT") { - fill_mode_ = Mode::CONSTANT; + fill_mode_ = Mode::FILL_CONSTANT; } else if (mode_str == "NEAREST") { - fill_mode_ = Mode::NEAREST; + fill_mode_ = Mode::FILL_NEAREST; } else { LOG(ERROR) << "Invalid mode " << mode_str << ". Supported types: REFLECT, WRAP, CONSTANT, NEAREST"; @@ -181,10 +181,10 @@ namespace generator { const DenseIndex len); \ extern template struct MapCoordinate -DECLARE_MAP_FUNCTOR(Mode::REFLECT); -DECLARE_MAP_FUNCTOR(Mode::WRAP); -DECLARE_MAP_FUNCTOR(Mode::CONSTANT); -DECLARE_MAP_FUNCTOR(Mode::NEAREST); +DECLARE_MAP_FUNCTOR(Mode::FILL_REFLECT); +DECLARE_MAP_FUNCTOR(Mode::FILL_WRAP); +DECLARE_MAP_FUNCTOR(Mode::FILL_CONSTANT); +DECLARE_MAP_FUNCTOR(Mode::FILL_NEAREST); } // end namespace generator diff --git a/tensorflow/core/kernels/image_ops.h b/tensorflow/core/kernels/image_ops.h index 5f167833e0e..f1edafddd2c 100644 --- a/tensorflow/core/kernels/image_ops.h +++ b/tensorflow/core/kernels/image_ops.h @@ -30,7 +30,7 @@ namespace tensorflow { namespace generator { enum Interpolation { NEAREST, BILINEAR }; -enum Mode { REFLECT, WRAP, CONSTANT, NEAREST }; +enum Mode { FILL_REFLECT, FILL_WRAP, FILL_CONSTANT, FILL_NEAREST }; using Eigen::array; using Eigen::DenseIndex; @@ -41,7 +41,7 @@ struct MapCoordinate { }; template -struct MapCoordinate { +struct MapCoordinate { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float operator()(const float out_coord, const DenseIndex len) { float in_coord = out_coord; @@ -64,7 +64,7 @@ struct MapCoordinate { }; template -struct MapCoordinate { +struct MapCoordinate { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float operator()(const float out_coord, const DenseIndex len) { float in_coord = out_coord; @@ -82,7 +82,7 @@ struct MapCoordinate { }; template -struct MapCoordinate { +struct MapCoordinate { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float operator()(const float out_coord, const DenseIndex len) { return out_coord; @@ -90,7 +90,7 @@ struct MapCoordinate { }; template -struct MapCoordinate { +struct MapCoordinate { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float operator()(const float out_coord, const DenseIndex len) { if (out_coord < 0) return 0; @@ -225,24 +225,24 @@ struct FillProjectiveTransform { const InputType& images, const TransformsType& transform, const Mode fill_mode) const { switch (fill_mode) { - case Mode::REFLECT: + case Mode::FILL_REFLECT: output->device(device) = - output->generate(ProjectiveGenerator( + output->generate(ProjectiveGenerator( images, transform, interpolation)); break; - case Mode::WRAP: + case Mode::FILL_WRAP: output->device(device) = - output->generate(ProjectiveGenerator( + output->generate(ProjectiveGenerator( images, transform, interpolation)); break; - case Mode::CONSTANT: + case Mode::FILL_CONSTANT: output->device(device) = - output->generate(ProjectiveGenerator( + output->generate(ProjectiveGenerator( images, transform, interpolation)); break; - case Mode::NEAREST: + case Mode::FILL_NEAREST: output->device(device) = - output->generate(ProjectiveGenerator( + output->generate(ProjectiveGenerator( images, transform, interpolation)); break; } From 0195f0bad662aa594bea0cc4137854396a909a0c Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Thu, 9 Jul 2020 04:20:27 +0000 Subject: [PATCH 0170/2522] Add benchmark for tf.function with retrace --- tensorflow/python/eager/benchmarks_test.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index 24e86c77a14..72eebd87706 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -1346,6 +1346,12 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): values.append(array_ops.zeros(shape=(1000,))) self._run(lambda: np.array([x.numpy() for x in values]), 1000) + def benchmark_function_trace(self): + @def_function.function + def func(x): + return x + self._run(lambda: (func(x) for x in range(1000)), 30000) + def _benchmarkFunctionWithResourceInputs(self, num_resources, num_iters): @def_function.function def add_all(*args): From b12541ec949df7b37caf7be22c4ebe7cbc376533 Mon Sep 17 00:00:00 2001 From: Abolfazl Shahbazi Date: Wed, 8 Jul 2020 22:04:27 -0700 Subject: [PATCH 0171/2522] Rename python37 partial to python3 --- ...python37.partial.Dockerfile => python3.partial.Dockerfile} | 0 tensorflow/tools/dockerfiles/spec.yml | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename tensorflow/tools/dockerfiles/partials/onednn/ubuntu/{python37.partial.Dockerfile => python3.partial.Dockerfile} (100%) diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/python37.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/python3.partial.Dockerfile similarity index 100% rename from tensorflow/tools/dockerfiles/partials/onednn/ubuntu/python37.partial.Dockerfile rename to tensorflow/tools/dockerfiles/partials/onednn/ubuntu/python3.partial.Dockerfile diff --git a/tensorflow/tools/dockerfiles/spec.yml b/tensorflow/tools/dockerfiles/spec.yml index fbe5ed2c238..4b52d0553ba 100644 --- a/tensorflow/tools/dockerfiles/spec.yml +++ b/tensorflow/tools/dockerfiles/spec.yml @@ -213,7 +213,7 @@ slice_sets: partials: - onednn/ubuntu/version - onednn/ubuntu/cpu - - onednn/ubuntu/python37 + - onednn/ubuntu/python3 - tensorflow - shell tests: @@ -260,7 +260,7 @@ slice_sets: partials: - onednn/ubuntu/version - onednn/ubuntu/devel - - onednn/ubuntu/python + - onednn/ubuntu/python3 - onednn/ubuntu/bazel - shell tests: From a6d7749d24ff0a994f063d0c7b270a8dd347c1ee Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Thu, 9 Jul 2020 15:00:49 +0800 Subject: [PATCH 0172/2522] add SkipRecords to RecordReader --- tensorflow/core/lib/io/record_reader.cc | 28 +++++++++++++-- tensorflow/core/lib/io/record_reader.h | 17 ++++++++-- .../core/lib/io/record_reader_writer_test.cc | 34 +++++++++++++++++++ 3 files changed, 74 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/lib/io/record_reader.cc b/tensorflow/core/lib/io/record_reader.cc index 40e516f5ef9..f4fc2089065 100644 --- a/tensorflow/core/lib/io/record_reader.cc +++ b/tensorflow/core/lib/io/record_reader.cc @@ -167,10 +167,9 @@ Status RecordReader::GetMetadata(Metadata* md) { return Status::OK(); } -Status RecordReader::ReadRecord(uint64* offset, tstring* record) { - // Position the input stream. +Status RecordReader::PositionInputStream(uint64 offset) { int64 curr_pos = input_stream_->Tell(); - int64 desired_pos = static_cast(*offset); + int64 desired_pos = static_cast(offset); if (curr_pos > desired_pos || curr_pos < 0 /* EOF */ || (curr_pos == desired_pos && last_read_failed_)) { last_read_failed_ = false; @@ -180,6 +179,11 @@ Status RecordReader::ReadRecord(uint64* offset, tstring* record) { TF_RETURN_IF_ERROR(input_stream_->SkipNBytes(desired_pos - curr_pos)); } DCHECK_EQ(desired_pos, input_stream_->Tell()); + return Status::OK(); +} + +Status RecordReader::ReadRecord(uint64* offset, tstring* record) { + TF_RETURN_IF_ERROR(PositionInputStream(*offset)); // Read header data. Status s = ReadChecksummed(*offset, sizeof(uint64), record); @@ -204,6 +208,24 @@ Status RecordReader::ReadRecord(uint64* offset, tstring* record) { return Status::OK(); } +Status RecordReader::SkipRecords(uint64* offset, int num_to_skip) { + TF_RETURN_IF_ERROR(PositionInputStream(*offset)); + + Status s; + tstring record; + for (int i = 0; i < num_to_skip; ++i) { + s = ReadChecksummed(*offset, sizeof(uint64), &record); + if (!s.ok()) { + last_read_failed_ = true; + return s; + } + const uint64 length = core::DecodeFixed64(record.data()); + input_stream_->SkipNBytes(length + kFooterSize); + *offset += kHeaderSize + length + kFooterSize; + DCHECK_EQ(*offset, input_stream_->Tell()); + } +} + SequentialRecordReader::SequentialRecordReader( RandomAccessFile* file, const RecordReaderOptions& options) : underlying_(file, options), offset_(0) {} diff --git a/tensorflow/core/lib/io/record_reader.h b/tensorflow/core/lib/io/record_reader.h index 07709990a64..3d27d7d26a4 100644 --- a/tensorflow/core/lib/io/record_reader.h +++ b/tensorflow/core/lib/io/record_reader.h @@ -97,6 +97,12 @@ class RecordReader { // OUT_OF_RANGE for end of file, or something else for an error. Status ReadRecord(uint64* offset, tstring* record); + // Skip num_to_skip record starting at "*offset" and update *offset + // to point to the offset of the next num_to_skip + 1 record. + // Return OK on success, OUT_OF_RANGE for end of file, or something + // else for an error. + Status SkipRecords(uint64* offset, int num_to_skip); + // Return the metadata of the Record file. // // The current implementation scans the file to completion, @@ -110,6 +116,7 @@ class RecordReader { private: Status ReadChecksummed(uint64 offset, size_t n, tstring* result); + Status PositionInputStream(uint64 offset); RecordReaderOptions options_; std::unique_ptr input_stream_; @@ -133,13 +140,19 @@ class SequentialRecordReader { virtual ~SequentialRecordReader() = default; - // Reads the next record in the file into *record. Returns OK on success, + // Read the next record in the file into *record. Returns OK on success, // OUT_OF_RANGE for end of file, or something else for an error. Status ReadRecord(tstring* record) { return underlying_.ReadRecord(&offset_, record); } - // Returns the current offset in the file. + // Skip the next num_to_skip record in the file. Return OK on success, + // OUT_OF_RANGE for end of file, or something else for an error. + Status SkipRecords(int num_to_skip) { + return underlying_.SkipRecords(&offset_, num_to_skip); + } + + // Return the current offset in the file. uint64 TellOffset() { return offset_; } // Seek to this offset within the file and set this offset as the current diff --git a/tensorflow/core/lib/io/record_reader_writer_test.cc b/tensorflow/core/lib/io/record_reader_writer_test.cc index 486b238bd29..6a314388690 100644 --- a/tensorflow/core/lib/io/record_reader_writer_test.cc +++ b/tensorflow/core/lib/io/record_reader_writer_test.cc @@ -158,6 +158,40 @@ TEST(RecordReaderWriterTest, TestBasics) { } } +TEST(RecordReaderWriterTest, TestSkip) { + Env* env = Env::Default(); + string fname = testing::TmpDir() + "/record_reader_writer_skip_est"; + + for (auto buf_size : BufferSizes()) { + { + std::unique_ptr file; + TF_CHECK_OK(env->NewWritableFile(fname, &file)); + + io::RecordWriterOptions options; + options.zlib_options.output_buffer_size = buf_size; + io::RecordWriter writer(file.get(), options); + TF_EXPECT_OK(writer.WriteRecord("abc")); + TF_EXPECT_OK(writer.WriteRecord("defg")); + TF_EXPECT_OK(writer.WriteRecord("hij")); + TF_CHECK_OK(writer.Flush()); + } + + { + std::unique_ptr read_file; + // Read it back with the RecordReader. + TF_CHECK_OK(env->NewRandomAccessFile(fname, &read_file)); + io::RecordReaderOptions options; + options.zlib_options.input_buffer_size = buf_size; + io::RecordReader reader(read_file.get(), options); + uint64 offset = 0; + tstring record; + TF_CHECK_OK(reader.SkipRecords(&offset, 2)); + TF_CHECK_OK(reader.ReadRecord(&offset, &record)); + EXPECT_EQ("hij", record); + } + } +} + TEST(RecordReaderWriterTest, TestSnappy) { Env* env = Env::Default(); string fname = testing::TmpDir() + "/record_reader_writer_snappy_test"; From 7958d97ba4f1aeebc63608f9fe33b3ad56a1e112 Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Thu, 9 Jul 2020 11:00:03 +0100 Subject: [PATCH 0173/2522] Fix for quantize_model error reporting. Change-Id: Icffdece7804a60beb4b59eb347738457c81b9986 --- tensorflow/lite/tools/optimize/quantize_model.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/tools/optimize/quantize_model.cc b/tensorflow/lite/tools/optimize/quantize_model.cc index bb1deb695b9..5cca49ede28 100644 --- a/tensorflow/lite/tools/optimize/quantize_model.cc +++ b/tensorflow/lite/tools/optimize/quantize_model.cc @@ -950,12 +950,12 @@ TfLiteStatus QuantizeWeightsInputOutput( !allow_float) { TF_LITE_REPORT_ERROR( error_reporter, - "Quantization to 16x8-bit not yet supported for op: %", + "Quantization to 16x8-bit not yet supported for op: '%s'.\n", EnumNameBuiltinOperator(op_code)); return kTfLiteError; } else if (!property.quantizable && !allow_float) { TF_LITE_REPORT_ERROR(error_reporter, - "Quantization not yet supported for op: %", + "Quantization not yet supported for op: '%s'.\n", EnumNameBuiltinOperator(op_code)); return kTfLiteError; } From 372cc81974676059175137698752be6be28b9bd2 Mon Sep 17 00:00:00 2001 From: Trent Lo Date: Thu, 9 Jul 2020 10:58:56 -0700 Subject: [PATCH 0174/2522] [XLA] Make utility functions anonymous Also, polish comments in instruction_fusion.cc. --- tensorflow/compiler/xla/service/gpu/instruction_fusion.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc index 5a67f2510dc..3e9869a9ec9 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc @@ -29,15 +29,19 @@ limitations under the License. namespace xla { namespace gpu { +namespace { bool ElementIsF32OrF16(const Shape& shape) { PrimitiveType type = shape.element_type(); return type == F32 || type == F16; } +} // namespace /*static*/ bool GpuInstructionFusion::IsExpensive( const HloInstruction& instruction) { - // We say that some floating-point math ops are cheap on the GPU. + // We say that some floating-point math ops are cheap on the GPU. Unlike other + // intrinsics that can be expanded into many instructions, Div and Rsqrt are + // lowered into single hardware instructions. switch (instruction.opcode()) { case HloOpcode::kDivide: case HloOpcode::kRsqrt: From bed05f4d45face787ee8a0dc97ae5eb7f5b67318 Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Thu, 9 Jul 2020 18:02:14 +0000 Subject: [PATCH 0175/2522] Add a code example for tf.function, Update RELEASE.md --- RELEASE.md | 3 ++- tensorflow/python/eager/def_function.py | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index 804126a9402..e5c549fde9a 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -30,7 +30,8 @@ * `tf.keras`: * * `tf.function`/AutoGraph: - * + * Added `experimental_follow_type_hints` argument. When True, the function may use type + annotations to optimize the tracing performance. * `tf.lite`: * * `tf.random`: diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py index 5c65baa0d83..a4f9a6c0497 100644 --- a/tensorflow/python/eager/def_function.py +++ b/tensorflow/python/eager/def_function.py @@ -1374,6 +1374,24 @@ def function(func=None, In general, it is recommended to create stateful objects like `tf.Variable` outside of `tf.function` and passing them as arguments. + _Using type annotations to improve performance_ + + 'experimental_follow_type_hints` can be used along with type annotations to + improve performance by reducing the number of expensive graph retracings. + For example, an argument annotated with `tf.Tensor` is converted to Tensor + even when the input is a non-Tensor value. + + >>> @tf.function( + ... experimental_follow_type_hints=True) + ... def f(x: tf.Tensor): + ... print('Tracing!') + ... tf.print('Executing') + >>> f(1) + Tracing! + Executing + >>> f(2) + Executing + Args: func: the function to be compiled. If `func` is None, `tf.function` returns a decorator that can be invoked with a single argument - `func`. In other From ab230eded9f439ed0ea99881d35170f2723b24ce Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Thu, 9 Jul 2020 14:54:28 -0400 Subject: [PATCH 0176/2522] Update topological_sort.cc --- tensorflow/core/grappler/utils/topological_sort.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/utils/topological_sort.cc b/tensorflow/core/grappler/utils/topological_sort.cc index e45419b22c6..8e373488e2a 100644 --- a/tensorflow/core/grappler/utils/topological_sort.cc +++ b/tensorflow/core/grappler/utils/topological_sort.cc @@ -96,7 +96,7 @@ Status ComputeTopologicalOrder( "at node = " << graph.node(back).DebugString(); for (int i = 0; i < graph_view.num_nodes(); ++i) { - int graph_view_GetFanin_i_size = graph_view.GetFanin(i).size(); + const int graph_view_GetFanin_i_size = graph_view.GetFanin(i).size(); if (num_ready_inputs[i] != graph_view_GetFanin_i_size) { VLOG(1) << "Node not ready: " << graph.node(i).DebugString(); } From c7705a8cd19e19ba75fc7e444ebbbade2bfa2b21 Mon Sep 17 00:00:00 2001 From: rahul-kamat Date: Thu, 9 Jul 2020 18:55:11 +0000 Subject: [PATCH 0177/2522] Create a new tf.function object on each iteration --- tensorflow/python/eager/benchmarks_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index 72eebd87706..fae18355e2d 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -1347,10 +1347,9 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._run(lambda: np.array([x.numpy() for x in values]), 1000) def benchmark_function_trace(self): - @def_function.function def func(x): return x - self._run(lambda: (func(x) for x in range(1000)), 30000) + self._run(lambda: (def_function.function(func)(x) for x in range(1000)), 30000) def _benchmarkFunctionWithResourceInputs(self, num_resources, num_iters): @def_function.function From 9f6da28f784e7a7e2eafdbea7954ffa167090419 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Thu, 9 Jul 2020 14:55:28 -0400 Subject: [PATCH 0178/2522] Update bcast.h --- tensorflow/core/util/bcast.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/util/bcast.h b/tensorflow/core/util/bcast.h index 4b17e1961ad..db431db77fb 100644 --- a/tensorflow/core/util/bcast.h +++ b/tensorflow/core/util/bcast.h @@ -177,7 +177,7 @@ BCastList::BCastList(const BCastList::Vec (&x)[N], // 1-extend and align all vectors. for (int i = 0; i < N; ++i) { - int copy_i_size = copy[i].size(); + const int copy_i_size = copy[i].size(); if (copy_i_size < largest_rank) { copy[i].resize(largest_rank, 1); } From 4a6e831426c991d53fe1fea8b44dfec2dfe6f70a Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Thu, 9 Jul 2020 15:43:51 -0400 Subject: [PATCH 0179/2522] Create keras_io_examples folder and upload text with transformer. --- tensorflow/python/keras/benchmarks/BUILD | 22 -- .../keras_io_examples_benchmark/BUILD | 61 ++++++ .../keras_io_examples_benchmark/__init__.py | 18 ++ .../benchmark_util.py | 0 .../bidirectional_lstm_benchmark_test.py} | 42 ++-- ...assification_transformer_benchmark_test.py | 206 ++++++++++++++++++ 6 files changed, 309 insertions(+), 40 deletions(-) create mode 100644 tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/BUILD create mode 100644 tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/__init__.py rename tensorflow/python/keras/benchmarks/{ => keras_io_examples_benchmark}/benchmark_util.py (100%) rename tensorflow/python/keras/benchmarks/{keras_examples_benchmark_test.py => keras_io_examples_benchmark/bidirectional_lstm_benchmark_test.py} (75%) create mode 100644 tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/text_classification_transformer_benchmark_test.py diff --git a/tensorflow/python/keras/benchmarks/BUILD b/tensorflow/python/keras/benchmarks/BUILD index 9e0ae9194a5..bed25c208a2 100755 --- a/tensorflow/python/keras/benchmarks/BUILD +++ b/tensorflow/python/keras/benchmarks/BUILD @@ -69,25 +69,3 @@ cuda_py_test( ], ) -py_test( - name = "keras_examples_benchmark_test", - size = "medium", - srcs = ["keras_examples_benchmark_test.py"], - python_version = "PY3", - tags = [ - "no_windows", # b/160628318 - ], - deps = [ - ":benchmark_util", - "//tensorflow:tensorflow_py", - ], -) - -py_library( - name = "benchmark_util", - srcs = ["benchmark_util.py"], - deps = [ - "//tensorflow:tensorflow_py", - "//third_party/py/numpy", - ], -) diff --git a/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/BUILD b/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/BUILD new file mode 100644 index 00000000000..c9d551735b1 --- /dev/null +++ b/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/BUILD @@ -0,0 +1,61 @@ +# Description: +# Implementation of benchmarks for keras.io examples. + +load("//tensorflow:tensorflow.bzl", "cuda_py_test") + +package( + default_visibility = ["//visibility:public"], + licenses = ["notice"], # Apache 2.0 +) + +exports_files(["LICENSE"]) + +# To run CPU benchmarks: +# bazel run -c opt benchmarks_test -- --benchmarks=. + +# To run GPU benchmarks: +# bazel run --config=cuda -c opt --copt="-mavx" benchmarks_test -- \ +# --benchmarks=. + +# To run a subset of benchmarks using --benchmarks flag. +# --benchmarks: the list of benchmarks to run. The specified value is interpreted +# as a regular expression and any benchmark whose name contains a partial match +# to the regular expression is executed. +# e.g. --benchmarks=".*lstm*." will run all lstm layer related benchmarks. + +py_test( + name = "bidirectional_lstm_benchmark_test", + size = "medium", + srcs = ["bidirectional_lstm_benchmark_test.py"], + python_version = "PY3", + tags = [ + "no_windows", # b/160628318 + ], + deps = [ + ":benchmark_util", + "//tensorflow:tensorflow_py", + ], +) + +py_test( + name = "text_classification_transformer_benchmark_test", + size = "medium", + srcs = ["text_classification_transformer_benchmark_test.py"], + python_version = "PY3", + tags = [ + "no_windows", # b/160628318 + ], + deps = [ + ":benchmark_util", + "//tensorflow:tensorflow_py", + ], +) + +py_library( + name = "benchmark_util", + srcs = ["benchmark_util.py"], + deps = [ + "//tensorflow:tensorflow_py", + "//third_party/py/numpy", + ], +) diff --git a/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/__init__.py b/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/__init__.py new file mode 100644 index 00000000000..5d71aa15b5e --- /dev/null +++ b/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/__init__.py @@ -0,0 +1,18 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmark for keras.io examples.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function \ No newline at end of file diff --git a/tensorflow/python/keras/benchmarks/benchmark_util.py b/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/benchmark_util.py similarity index 100% rename from tensorflow/python/keras/benchmarks/benchmark_util.py rename to tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/benchmark_util.py diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/bidirectional_lstm_benchmark_test.py similarity index 75% rename from tensorflow/python/keras/benchmarks/keras_examples_benchmark_test.py rename to tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/bidirectional_lstm_benchmark_test.py index 9781fb9bd8f..af4494e9ce3 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/bidirectional_lstm_benchmark_test.py @@ -12,7 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Benchmark for examples on https://keras.io/examples.""" +"""Bidirectional LSTM on IMDB. + https://keras.io/examples/nlp/bidirectional_lstm_imdb/ +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -21,14 +23,15 @@ import six import tensorflow as tf -from tensorflow.python.keras.benchmarks import benchmark_util from tensorflow.python.platform import benchmark +from tensorflow.python.keras.benchmarks.keras_io_examples_benchmark \ + import benchmark_util _MAX_FEATURE = 20000 _MAX_LEN = 200 -class KerasExamplesBenchmark( +class BidirectionalLSTMBenchmark( six.with_metaclass(benchmark.ParameterizedBenchmark, tf.test.Benchmark)): """Required Arguments for measure_performance: @@ -40,6 +43,14 @@ class KerasExamplesBenchmark( Other details can see in `measure_performance()` method of benchmark_util. """ + + def __init__(self): + super(BidirectionalLSTMBenchmark, self).__init__() + (self.imdb_x, self.imdb_y), _ = tf.keras.datasets.imdb.load_data( + num_words=_MAX_FEATURE) + self.imdb_x = tf.keras.preprocessing.sequence.pad_sequences( + self.imdb_x, maxlen=_MAX_LEN) + """The parameters of each benchmark is a tuple: (benchmark_name_suffix, batch_size, run_iters). @@ -49,17 +60,17 @@ class KerasExamplesBenchmark( run_iters: Integer. Number of iterations to run the performance measurement. """ - _benchmark_parameters = [('bs_32', 32, 2), ('bs_64', 64, 2), - ('bs_128', 128, 1), ('bs_256', 256, 1), - ('bs_512', 512, 3)] + _benchmark_parameters = [ + ('bs_32', 32, 2), ('bs_64', 64, 2), + ('bs_128', 128, 1), ('bs_256', 256, 1), + ('bs_512', 512, 3)] - def _lstm_imdb_model(self): - """LSTM model from https://keras.io/examples/nlp/bidirectional_lstm_imdb/.""" + def _build_model(self): inputs = tf.keras.Input(shape=(None,), dtype='int32') x = tf.keras.layers.Embedding(_MAX_FEATURE, 128)(inputs) x = tf.keras.layers.Bidirectional( - tf.keras.layers.LSTM(64, return_sequences=True))( - x) + tf.keras.layers.LSTM( + 64, return_sequences=True))(x) x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64))(x) outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x) model = tf.keras.Model(inputs, outputs) @@ -67,15 +78,10 @@ class KerasExamplesBenchmark( def benchmark_bidirect_lstm_imdb(self, batch_size, run_iters): """Benchmark for Bidirectional LSTM on IMDB.""" - # Load dataset. - (x_train, - y_train), _ = tf.keras.datasets.imdb.load_data(num_words=_MAX_FEATURE) - x_train = tf.keras.preprocessing.sequence.pad_sequences( - x_train, maxlen=_MAX_LEN) results = benchmark_util.measure_performance( - self._lstm_imdb_model, - x=x_train, - y=y_train, + self._build_model, + x=self.imdb_x, + y=self.imdb_y, batch_size=batch_size, run_iters=run_iters, optimizer='adam', diff --git a/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/text_classification_transformer_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/text_classification_transformer_benchmark_test.py new file mode 100644 index 00000000000..fc6e2150766 --- /dev/null +++ b/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/text_classification_transformer_benchmark_test.py @@ -0,0 +1,206 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Text classification with Transformer. + https://keras.io/examples/nlp/text_classification_with_transformer/ +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import six + +import tensorflow as tf + +from tensorflow.python.platform import benchmark +from tensorflow.python.keras.benchmarks.keras_io_examples_benchmark \ + import benchmark_util + + +class TextWithTransformerBenchmark( + six.with_metaclass(benchmark.ParameterizedBenchmark, tf.test.Benchmark)): + """Required Arguments for measure_performance: + + x: Input data, it could be Numpy or load from tfds. + y: Target data. If `x` is a dataset, generator instance, + `y` should not be specified. + loss: Loss function for model. + optimizer: Optimizer for model. + Other details can see in `measure_performance()` method of + benchmark_util. + """ + + def __init__(self): + super(TextWithTransformerBenchmark, self).__init__() + max_feature = 20000 + max_len = 200 + (self.imdb_x, self.imdb_y), _ = tf.keras.datasets.imdb.load_data( + num_words=max_feature) + self.imdb_x = tf.keras.preprocessing.sequence.pad_sequences( + self.imdb_x, maxlen=max_len) + + """The parameters of each benchmark is a tuple: + + (benchmark_name_suffix, batch_size, run_iters). + benchmark_name_suffix: The suffix of the benchmark test name with + convention `{bs}_{batch_size}`. + batch_size: Integer. Number of samples per gradient update. + run_iters: Integer. Number of iterations to run the + performance measurement. + """ + _benchmark_parameters = [ + ('bs_64', 64, 2), ('bs_128', 128, 1), + ('bs_256', 256, 1), ('bs_512', 512, 3)] + + def _build_model(self): + vocab_size = 20000 + max_len = 200 + embed_dim = 32 + num_heads = 2 + ff_dim = 32 + inputs = tf.keras.layers.Input(shape=(max_len,)) + embedding_layer = TokenAndPositionEmbedding( + max_len, + vocab_size, + embed_dim) + x = embedding_layer(inputs) + transformer_block = TransformerBlock( + embed_dim, + num_heads, + ff_dim) + x = transformer_block(x) + x = tf.keras.layers.GlobalAvgPool1D()(x) + x = tf.keras.layers.Dropout(0.1)(x) + x = tf.keras.layers.Dense(20, activation="relu")(x) + x = tf.keras.layers.Dropout(0.1)(x) + outputs = tf.keras.layers.Dense(2, activation="softmax")(x) + + model = tf.keras.Model(inputs=inputs, outputs=outputs) + return model + + def benchmark_text_classification(self, batch_size, run_iters): + """Benchmark for Text classification with Transformer.""" + results = benchmark_util.measure_performance( + self._build_model, + x=self.imdb_x, + y=self.imdb_y, + batch_size=batch_size, + run_iters=run_iters, + optimizer='adam', + loss='sparse_categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, wall_time=results['wall_time'], extras=results) + + +class MultiHeadSelfAttention(tf.keras.layers.Layer): + """Implement multi head self attention as a Keras layer.""" + def __init__(self, embed_dim, num_heads=8): + super(MultiHeadSelfAttention, self).__init__() + self.embed_dim = embed_dim + self.num_heads = num_heads + if embed_dim % num_heads != 0: + raise ValueError( + f"embedding dimension = {embed_dim} should be divisible " + f"by number of heads = {num_heads}" + ) + self.projection_dim = embed_dim // num_heads + self.query_dense = tf.keras.layers.Dense(embed_dim) + self.key_dense = tf.keras.layers.Dense(embed_dim) + self.value_dense = tf.keras.layers.Dense(embed_dim) + self.combine_heads = tf.keras.layers.Dense(embed_dim) + + def attention(self, query, key, value): + score = tf.matmul(query, key, transpose_b=True) + dim_key = tf.cast(tf.shape(key)[-1], tf.float32) + scaled_score = score / tf.math.sqrt(dim_key) + weights = tf.nn.softmax(scaled_score, axis=-1) + output = tf.matmul(weights, value) + return output, weights + + def separate_heads(self, x, batch_size): + x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim)) + return tf.transpose(x, perm=[0, 2, 1, 3]) + + def call(self, inputs): + # x.shape = [batch_size, seq_len, embedding_dim] + batch_size = tf.shape(inputs)[0] + query = self.query_dense(inputs) # (batch_size, seq_len, embed_dim) + key = self.key_dense(inputs) # (batch_size, seq_len, embed_dim) + value = self.value_dense(inputs) # (batch_size, seq_len, embed_dim) + query = self.separate_heads( + query, batch_size + ) # (batch_size, num_heads, seq_len, projection_dim) + key = self.separate_heads( + key, batch_size + ) # (batch_size, num_heads, seq_len, projection_dim) + value = self.separate_heads( + value, batch_size + ) # (batch_size, num_heads, seq_len, projection_dim) + attention, weights = self.attention(query, key, value) + attention = tf.transpose( + attention, perm=[0, 2, 1, 3] + ) # (batch_size, seq_len, num_heads, projection_dim) + concat_attention = tf.reshape( + attention, (batch_size, -1, self.embed_dim) + ) # (batch_size, seq_len, embed_dim) + output = self.combine_heads( + concat_attention + ) # (batch_size, seq_len, embed_dim) + return output + + +class TransformerBlock(tf.keras.layers.Layer): + """Implement a Transformer block as a layer.""" + def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1): + super(TransformerBlock, self).__init__() + self.att = MultiHeadSelfAttention(embed_dim, num_heads) + self.ffn = tf.keras.Sequential( + [tf.keras.layers.Dense(ff_dim, activation="relu"), + tf.keras.layers.Dense(embed_dim)] + ) + self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6) + self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6) + self.dropout1 = tf.keras.layers.Dropout(rate) + self.dropout2 = tf.keras.layers.Dropout(rate) + + def call(self, inputs, training): + attn_output = self.att(inputs) + attn_output = self.dropout1(attn_output, training=training) + out1 = self.layernorm1(inputs + attn_output) + ffn_output = self.ffn(out1) + ffn_output = self.dropout2(ffn_output, training=training) + return self.layernorm2(out1 + ffn_output) + + +class TokenAndPositionEmbedding(tf.keras.layers.Layer): + """Implement embedding layer.""" + def __init__(self, maxlen, vocab_size, embed_dim): + super(TokenAndPositionEmbedding, self).__init__() + self.token_emb = tf.keras.layers.Embedding(input_dim=vocab_size, + output_dim=embed_dim) + self.pos_emb = tf.keras.layers.Embedding(input_dim=maxlen, + output_dim=embed_dim) + + def call(self, x): + maxlen = tf.shape(x)[-1] + positions = tf.range(start=0, limit=maxlen, delta=1) + positions = self.pos_emb(positions) + x = self.token_emb(x) + return x + positions + + +if __name__ == '__main__': + tf.test.main() From 25907169a9b72953e28a766c8c322c89d8ec39b9 Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Thu, 9 Jul 2020 16:20:54 -0400 Subject: [PATCH 0180/2522] Solve the benchmark_util dep problem. --- tensorflow/python/keras/benchmarks/BUILD | 2 +- .../python/keras/benchmarks/keras_cpu_benchmark_test.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/BUILD b/tensorflow/python/keras/benchmarks/BUILD index ea17c4b0b82..9ba59911dc3 100644 --- a/tensorflow/python/keras/benchmarks/BUILD +++ b/tensorflow/python/keras/benchmarks/BUILD @@ -29,8 +29,8 @@ py_test( srcs = ["keras_cpu_benchmark_test.py"], python_version = "PY3", deps = [ - ":benchmark_util", "//tensorflow:tensorflow_py", + "//tensorflow/python/keras/benchmarks/keras_io_examples_benchmark:benchmark_util", "//third_party/py/numpy", ], ) diff --git a/tensorflow/python/keras/benchmarks/keras_cpu_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_cpu_benchmark_test.py index 44f25033801..ba3035adab7 100644 --- a/tensorflow/python/keras/benchmarks/keras_cpu_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_cpu_benchmark_test.py @@ -23,8 +23,9 @@ import six import tensorflow as tf from tensorflow.python.platform import benchmark -from tensorflow.python.keras.benchmarks import benchmark_util - +from tensorflow.python.keras.benchmarks.keras_io_examples_benchmark \ + import benchmark_util + # Loss function and optimizer. _LOSS = 'binary_crossentropy' _OPTIMIZER = 'rmsprop' From 0d7c20446047dfcebd44a2f143a32b27c652e9b7 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Thu, 9 Jul 2020 20:36:38 +0000 Subject: [PATCH 0181/2522] removed GPU support --- tensorflow/c/kernels/BUILD | 15 +++++++++++++++ tensorflow/c/kernels/summary_op.cc | 12 ------------ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/tensorflow/c/kernels/BUILD b/tensorflow/c/kernels/BUILD index e34e3a55e79..309dc7e221b 100644 --- a/tensorflow/c/kernels/BUILD +++ b/tensorflow/c/kernels/BUILD @@ -87,6 +87,21 @@ tf_cc_test( "//tensorflow/core:testlib", ], ) + +tf_cc_test( + name = "summary_op_benchmark_test", + srcs = ["summary_op_benchmark_test.cc"], + deps = [ + "summary_op", + "summary_op_lib", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) # Changes to the Android srcs here should be replicated in # tensorflow/contrib/makefile/tf_op_files.txt. # diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index 9d28c0797ff..cd2509247da 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -155,18 +155,6 @@ void RegisterSummaryScalarOpKernel() { CHECK_EQ(TF_OK, TF_GetCode(status)) << "Error while registering Summary Scalar kernel"; } -#if GOOGLE_CUDA - { - auto* builder = TF_NewKernelBuilder("SummaryScalar", - tensorflow::DEVICE_GPU, - &SummaryScalarOp_Create, - &SummaryScalarOp_Compute, - &SummaryScalarOp_Delete); - TF_RegisterKernelBuilder("SummaryScalar", builder, status); - CHECK_EQ(TF_OK, TF_GetCode(status)) - << "Error while registering CUDA SummaryScalar kernel"; - } -#endif TF_DeleteStatus(status); } From 1ae3ed3e696f20f1d419caa7e0846e6cd9977915 Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Thu, 9 Jul 2020 18:05:08 -0400 Subject: [PATCH 0182/2522] Organize folder and code. --- tensorflow/python/keras/benchmarks/BUILD | 36 +++++++++++ .../benchmark_util.py | 0 .../benchmarks/keras_cpu_benchmark_test.py | 3 +- .../bidirectional_lstm_benchmark_test.py | 3 +- ...assification_transformer_benchmark_test.py | 3 +- .../keras_io_examples_benchmark/BUILD | 61 ------------------- .../keras_io_examples_benchmark/__init__.py | 18 ------ 7 files changed, 39 insertions(+), 85 deletions(-) rename tensorflow/python/keras/benchmarks/{keras_io_examples_benchmark => }/benchmark_util.py (100%) rename tensorflow/python/keras/benchmarks/{keras_io_examples_benchmark => keras_examples_benchmarks}/bidirectional_lstm_benchmark_test.py (97%) rename tensorflow/python/keras/benchmarks/{keras_io_examples_benchmark => keras_examples_benchmarks}/text_classification_transformer_benchmark_test.py (98%) delete mode 100644 tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/BUILD delete mode 100644 tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/__init__.py diff --git a/tensorflow/python/keras/benchmarks/BUILD b/tensorflow/python/keras/benchmarks/BUILD index 9ba59911dc3..414fc4a9134 100644 --- a/tensorflow/python/keras/benchmarks/BUILD +++ b/tensorflow/python/keras/benchmarks/BUILD @@ -70,3 +70,39 @@ cuda_py_test( ], ) +py_library( + name = "benchmark_util", + srcs = ["benchmark_util.py"], + deps = [ + "//tensorflow:tensorflow_py", + "//third_party/py/numpy", + ], +) + +py_test( + name = "bidirectional_lstm_benchmark_test", + size = "medium", + srcs = ["keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py"], + python_version = "PY3", + tags = [ + "no_windows", # b/160628318 + ], + deps = [ + ":benchmark_util", + "//tensorflow:tensorflow_py", + ], +) + +py_test( + name = "text_classification_transformer_benchmark_test", + size = "medium", + srcs = ["keras_examples_benchmarks/text_classification_transformer_benchmark_test.py"], + python_version = "PY3", + tags = [ + "no_windows", # b/160628318 + ], + deps = [ + ":benchmark_util", + "//tensorflow:tensorflow_py", + ], +) diff --git a/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/benchmark_util.py b/tensorflow/python/keras/benchmarks/benchmark_util.py similarity index 100% rename from tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/benchmark_util.py rename to tensorflow/python/keras/benchmarks/benchmark_util.py diff --git a/tensorflow/python/keras/benchmarks/keras_cpu_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_cpu_benchmark_test.py index ba3035adab7..1929275a246 100644 --- a/tensorflow/python/keras/benchmarks/keras_cpu_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_cpu_benchmark_test.py @@ -23,8 +23,7 @@ import six import tensorflow as tf from tensorflow.python.platform import benchmark -from tensorflow.python.keras.benchmarks.keras_io_examples_benchmark \ - import benchmark_util +from tensorflow.python.keras.benchmarks import benchmark_util # Loss function and optimizer. _LOSS = 'binary_crossentropy' diff --git a/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/bidirectional_lstm_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py similarity index 97% rename from tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/bidirectional_lstm_benchmark_test.py rename to tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py index af4494e9ce3..b0db0f4f6c6 100644 --- a/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/bidirectional_lstm_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py @@ -24,8 +24,7 @@ import six import tensorflow as tf from tensorflow.python.platform import benchmark -from tensorflow.python.keras.benchmarks.keras_io_examples_benchmark \ - import benchmark_util +from tensorflow.python.keras.benchmarks import benchmark_util _MAX_FEATURE = 20000 _MAX_LEN = 200 diff --git a/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/text_classification_transformer_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py similarity index 98% rename from tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/text_classification_transformer_benchmark_test.py rename to tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py index fc6e2150766..41407dd91b3 100644 --- a/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/text_classification_transformer_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py @@ -24,8 +24,7 @@ import six import tensorflow as tf from tensorflow.python.platform import benchmark -from tensorflow.python.keras.benchmarks.keras_io_examples_benchmark \ - import benchmark_util +from tensorflow.python.keras.benchmarks import benchmark_util class TextWithTransformerBenchmark( diff --git a/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/BUILD b/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/BUILD deleted file mode 100644 index c9d551735b1..00000000000 --- a/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/BUILD +++ /dev/null @@ -1,61 +0,0 @@ -# Description: -# Implementation of benchmarks for keras.io examples. - -load("//tensorflow:tensorflow.bzl", "cuda_py_test") - -package( - default_visibility = ["//visibility:public"], - licenses = ["notice"], # Apache 2.0 -) - -exports_files(["LICENSE"]) - -# To run CPU benchmarks: -# bazel run -c opt benchmarks_test -- --benchmarks=. - -# To run GPU benchmarks: -# bazel run --config=cuda -c opt --copt="-mavx" benchmarks_test -- \ -# --benchmarks=. - -# To run a subset of benchmarks using --benchmarks flag. -# --benchmarks: the list of benchmarks to run. The specified value is interpreted -# as a regular expression and any benchmark whose name contains a partial match -# to the regular expression is executed. -# e.g. --benchmarks=".*lstm*." will run all lstm layer related benchmarks. - -py_test( - name = "bidirectional_lstm_benchmark_test", - size = "medium", - srcs = ["bidirectional_lstm_benchmark_test.py"], - python_version = "PY3", - tags = [ - "no_windows", # b/160628318 - ], - deps = [ - ":benchmark_util", - "//tensorflow:tensorflow_py", - ], -) - -py_test( - name = "text_classification_transformer_benchmark_test", - size = "medium", - srcs = ["text_classification_transformer_benchmark_test.py"], - python_version = "PY3", - tags = [ - "no_windows", # b/160628318 - ], - deps = [ - ":benchmark_util", - "//tensorflow:tensorflow_py", - ], -) - -py_library( - name = "benchmark_util", - srcs = ["benchmark_util.py"], - deps = [ - "//tensorflow:tensorflow_py", - "//third_party/py/numpy", - ], -) diff --git a/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/__init__.py b/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/__init__.py deleted file mode 100644 index 5d71aa15b5e..00000000000 --- a/tensorflow/python/keras/benchmarks/keras_io_examples_benchmark/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Benchmark for keras.io examples.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function \ No newline at end of file From 01bc970004870144463d381c67a5696ff391a7fb Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Thu, 9 Jul 2020 18:35:26 -0400 Subject: [PATCH 0183/2522] Fix invalid test in zero_padding_2d. --- .../python/keras/layers/convolutional_test.py | 139 +++++++++--------- 1 file changed, 71 insertions(+), 68 deletions(-) diff --git a/tensorflow/python/keras/layers/convolutional_test.py b/tensorflow/python/keras/layers/convolutional_test.py index 1661f843dc9..cd78966292f 100644 --- a/tensorflow/python/keras/layers/convolutional_test.py +++ b/tensorflow/python/keras/layers/convolutional_test.py @@ -645,83 +645,86 @@ class ZeroPaddingTest(keras_parameterized.TestCase): with self.assertRaises(ValueError): keras.layers.ZeroPadding1D(padding=None) - def test_zero_padding_2d(self): + @parameterized.named_parameters(('channels_first', 'channels_first'), + ('channels_last', 'channels_last')) + def test_zero_padding_2d(self, data_format): num_samples = 2 stack_size = 2 input_num_row = 4 input_num_col = 5 - for data_format in ['channels_first', 'channels_last']: - inputs = np.ones((num_samples, input_num_row, input_num_col, stack_size)) + if data_format == 'channels_first': inputs = np.ones((num_samples, stack_size, input_num_row, input_num_col)) + elif data_format == 'channels_last': + inputs = np.ones((num_samples, input_num_row, input_num_col, stack_size)) - # basic test - with self.cached_session(use_gpu=True): - testing_utils.layer_test( - keras.layers.ZeroPadding2D, - kwargs={'padding': (2, 2), - 'data_format': data_format}, - input_shape=inputs.shape) - testing_utils.layer_test( - keras.layers.ZeroPadding2D, - kwargs={'padding': ((1, 2), (3, 4)), - 'data_format': data_format}, - input_shape=inputs.shape) + # basic test + with self.cached_session(use_gpu=True): + testing_utils.layer_test( + keras.layers.ZeroPadding2D, + kwargs={'padding': (2, 2), + 'data_format': data_format}, + input_shape=inputs.shape) + testing_utils.layer_test( + keras.layers.ZeroPadding2D, + kwargs={'padding': ((1, 2), (3, 4)), + 'data_format': data_format}, + input_shape=inputs.shape) - # correctness test - with self.cached_session(use_gpu=True): - layer = keras.layers.ZeroPadding2D( - padding=(2, 2), data_format=data_format) - layer.build(inputs.shape) - output = layer(keras.backend.variable(inputs)) - if context.executing_eagerly(): - np_output = output.numpy() - else: - np_output = keras.backend.eval(output) - if data_format == 'channels_last': - for offset in [0, 1, -1, -2]: - np.testing.assert_allclose(np_output[:, offset, :, :], 0.) - np.testing.assert_allclose(np_output[:, :, offset, :], 0.) - np.testing.assert_allclose(np_output[:, 2:-2, 2:-2, :], 1.) - elif data_format == 'channels_first': - for offset in [0, 1, -1, -2]: - np.testing.assert_allclose(np_output[:, :, offset, :], 0.) - np.testing.assert_allclose(np_output[:, :, :, offset], 0.) - np.testing.assert_allclose(np_output[:, 2:-2, 2:-2, :], 1.) + # correctness test + with self.cached_session(use_gpu=True): + layer = keras.layers.ZeroPadding2D( + padding=(2, 2), data_format=data_format) + layer.build(inputs.shape) + output = layer(keras.backend.variable(inputs)) + if context.executing_eagerly(): + np_output = output.numpy() + else: + np_output = keras.backend.eval(output) + if data_format == 'channels_last': + for offset in [0, 1, -1, -2]: + np.testing.assert_allclose(np_output[:, offset, :, :], 0.) + np.testing.assert_allclose(np_output[:, :, offset, :], 0.) + np.testing.assert_allclose(np_output[:, 2:-2, 2:-2, :], 1.) + elif data_format == 'channels_first': + for offset in [0, 1, -1, -2]: + np.testing.assert_allclose(np_output[:, :, offset, :], 0.) + np.testing.assert_allclose(np_output[:, :, :, offset], 0.) + np.testing.assert_allclose(np_output[:, 2:-2, 2:-2, :], 1.) - layer = keras.layers.ZeroPadding2D( - padding=((1, 2), (3, 4)), data_format=data_format) - layer.build(inputs.shape) - output = layer(keras.backend.variable(inputs)) - if context.executing_eagerly(): - np_output = output.numpy() - else: - np_output = keras.backend.eval(output) - if data_format == 'channels_last': - for top_offset in [0]: - np.testing.assert_allclose(np_output[:, top_offset, :, :], 0.) - for bottom_offset in [-1, -2]: - np.testing.assert_allclose(np_output[:, bottom_offset, :, :], 0.) - for left_offset in [0, 1, 2]: - np.testing.assert_allclose(np_output[:, :, left_offset, :], 0.) - for right_offset in [-1, -2, -3, -4]: - np.testing.assert_allclose(np_output[:, :, right_offset, :], 0.) - np.testing.assert_allclose(np_output[:, 1:-2, 3:-4, :], 1.) - elif data_format == 'channels_first': - for top_offset in [0]: - np.testing.assert_allclose(np_output[:, :, top_offset, :], 0.) - for bottom_offset in [-1, -2]: - np.testing.assert_allclose(np_output[:, :, bottom_offset, :], 0.) - for left_offset in [0, 1, 2]: - np.testing.assert_allclose(np_output[:, :, :, left_offset], 0.) - for right_offset in [-1, -2, -3, -4]: - np.testing.assert_allclose(np_output[:, :, :, right_offset], 0.) - np.testing.assert_allclose(np_output[:, :, 1:-2, 3:-4], 1.) + layer = keras.layers.ZeroPadding2D( + padding=((1, 2), (3, 4)), data_format=data_format) + layer.build(inputs.shape) + output = layer(keras.backend.variable(inputs)) + if context.executing_eagerly(): + np_output = output.numpy() + else: + np_output = keras.backend.eval(output) + if data_format == 'channels_last': + for top_offset in [0]: + np.testing.assert_allclose(np_output[:, top_offset, :, :], 0.) + for bottom_offset in [-1, -2]: + np.testing.assert_allclose(np_output[:, bottom_offset, :, :], 0.) + for left_offset in [0, 1, 2]: + np.testing.assert_allclose(np_output[:, :, left_offset, :], 0.) + for right_offset in [-1, -2, -3, -4]: + np.testing.assert_allclose(np_output[:, :, right_offset, :], 0.) + np.testing.assert_allclose(np_output[:, 1:-2, 3:-4, :], 1.) + elif data_format == 'channels_first': + for top_offset in [0]: + np.testing.assert_allclose(np_output[:, :, top_offset, :], 0.) + for bottom_offset in [-1, -2]: + np.testing.assert_allclose(np_output[:, :, bottom_offset, :], 0.) + for left_offset in [0, 1, 2]: + np.testing.assert_allclose(np_output[:, :, :, left_offset], 0.) + for right_offset in [-1, -2, -3, -4]: + np.testing.assert_allclose(np_output[:, :, :, right_offset], 0.) + np.testing.assert_allclose(np_output[:, :, 1:-2, 3:-4], 1.) - # test incorrect use - with self.assertRaises(ValueError): - keras.layers.ZeroPadding2D(padding=(1, 1, 1)) - with self.assertRaises(ValueError): - keras.layers.ZeroPadding2D(padding=None) + # test incorrect use + with self.assertRaises(ValueError): + keras.layers.ZeroPadding2D(padding=(1, 1, 1)) + with self.assertRaises(ValueError): + keras.layers.ZeroPadding2D(padding=None) @parameterized.named_parameters(('channels_first', 'channels_first'), ('channels_last', 'channels_last')) From aa56f9df01d6c958eafd6a4bb6b2490cb488607d Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Thu, 9 Jul 2020 20:09:59 -0400 Subject: [PATCH 0184/2522] Add essential docstring and update the BUILD file. --- tensorflow/python/keras/benchmarks/BUILD | 2 +- .../bidirectional_lstm_benchmark_test.py | 39 ++++++++-------- ...assification_transformer_benchmark_test.py | 45 +++++++++---------- 3 files changed, 40 insertions(+), 46 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/BUILD b/tensorflow/python/keras/benchmarks/BUILD index 414fc4a9134..ce263be4f45 100644 --- a/tensorflow/python/keras/benchmarks/BUILD +++ b/tensorflow/python/keras/benchmarks/BUILD @@ -29,8 +29,8 @@ py_test( srcs = ["keras_cpu_benchmark_test.py"], python_version = "PY3", deps = [ + "benchmark_util", "//tensorflow:tensorflow_py", - "//tensorflow/python/keras/benchmarks/keras_io_examples_benchmark:benchmark_util", "//third_party/py/numpy", ], ) diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py index b0db0f4f6c6..b92785a9a70 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py @@ -12,9 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Bidirectional LSTM on IMDB. - https://keras.io/examples/nlp/bidirectional_lstm_imdb/ -""" +"""Benchmarks on Bidirectional LSTM on IMDB.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -26,29 +24,18 @@ import tensorflow as tf from tensorflow.python.platform import benchmark from tensorflow.python.keras.benchmarks import benchmark_util -_MAX_FEATURE = 20000 -_MAX_LEN = 200 - class BidirectionalLSTMBenchmark( six.with_metaclass(benchmark.ParameterizedBenchmark, tf.test.Benchmark)): - """Required Arguments for measure_performance: - - x: Input data, it could be Numpy or load from tfds. - y: Target data. If `x` is a dataset, generator instance, - `y` should not be specified. - loss: Loss function for model. - optimizer: Optimizer for model. - Other details can see in `measure_performance()` method of - benchmark_util. - """ - + """Benchmarks for Bidirectional LSTM using `ParameterizedBenchmark`.""" def __init__(self): super(BidirectionalLSTMBenchmark, self).__init__() + self.max_feature = 20000 + self.max_len = 200 (self.imdb_x, self.imdb_y), _ = tf.keras.datasets.imdb.load_data( - num_words=_MAX_FEATURE) + num_words=self.max_feature) self.imdb_x = tf.keras.preprocessing.sequence.pad_sequences( - self.imdb_x, maxlen=_MAX_LEN) + self.imdb_x, maxlen=self.max_len) """The parameters of each benchmark is a tuple: @@ -65,8 +52,9 @@ class BidirectionalLSTMBenchmark( ('bs_512', 512, 3)] def _build_model(self): + """model from https://keras.io/examples/nlp/bidirectional_lstm_imdb/""" inputs = tf.keras.Input(shape=(None,), dtype='int32') - x = tf.keras.layers.Embedding(_MAX_FEATURE, 128)(inputs) + x = tf.keras.layers.Embedding(self.max_feature, 128)(inputs) x = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM( 64, return_sequences=True))(x) @@ -76,7 +64,16 @@ class BidirectionalLSTMBenchmark( return model def benchmark_bidirect_lstm_imdb(self, batch_size, run_iters): - """Benchmark for Bidirectional LSTM on IMDB.""" + """ Required Arguments for measure_performance: + + x: Input data, it could be Numpy or load from tfds. + y: Target data. If `x` is a dataset, generator instance, + `y` should not be specified. + loss: Loss function for model. + optimizer: Optimizer for model. + Other details can see in `measure_performance()` method of + benchmark_util. + """ results = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py index 41407dd91b3..ae10b47451b 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py @@ -12,9 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Text classification with Transformer. - https://keras.io/examples/nlp/text_classification_with_transformer/ -""" +"""Benchmarks on Text classification with Transformer.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -29,25 +27,16 @@ from tensorflow.python.keras.benchmarks import benchmark_util class TextWithTransformerBenchmark( six.with_metaclass(benchmark.ParameterizedBenchmark, tf.test.Benchmark)): - """Required Arguments for measure_performance: - - x: Input data, it could be Numpy or load from tfds. - y: Target data. If `x` is a dataset, generator instance, - `y` should not be specified. - loss: Loss function for model. - optimizer: Optimizer for model. - Other details can see in `measure_performance()` method of - benchmark_util. - """ - + """Benchmarks for Text classification with Transformer + using `ParameterizedBenchmark`.""" def __init__(self): super(TextWithTransformerBenchmark, self).__init__() - max_feature = 20000 - max_len = 200 + self.max_feature = 20000 + self.max_len = 200 (self.imdb_x, self.imdb_y), _ = tf.keras.datasets.imdb.load_data( - num_words=max_feature) + num_words=self.max_feature) self.imdb_x = tf.keras.preprocessing.sequence.pad_sequences( - self.imdb_x, maxlen=max_len) + self.imdb_x, maxlen=self.max_len) """The parameters of each benchmark is a tuple: @@ -63,15 +52,14 @@ class TextWithTransformerBenchmark( ('bs_256', 256, 1), ('bs_512', 512, 3)] def _build_model(self): - vocab_size = 20000 - max_len = 200 + """model from https://keras.io/examples/nlp/text_classification_with_transformer/""" embed_dim = 32 num_heads = 2 ff_dim = 32 - inputs = tf.keras.layers.Input(shape=(max_len,)) + inputs = tf.keras.layers.Input(shape=(self.max_len,)) embedding_layer = TokenAndPositionEmbedding( - max_len, - vocab_size, + self.max_len, + self.max_feature, embed_dim) x = embedding_layer(inputs) transformer_block = TransformerBlock( @@ -89,7 +77,16 @@ class TextWithTransformerBenchmark( return model def benchmark_text_classification(self, batch_size, run_iters): - """Benchmark for Text classification with Transformer.""" + """ Required Arguments for measure_performance: + + x: Input data, it could be Numpy or load from tfds. + y: Target data. If `x` is a dataset, generator instance, + `y` should not be specified. + loss: Loss function for model. + optimizer: Optimizer for model. + Other details can see in `measure_performance()` method of + benchmark_util. + """ results = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, From 609fe7990f8311191c6bffc1ee47034443ac319a Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Fri, 10 Jul 2020 01:25:03 +0000 Subject: [PATCH 0185/2522] insert, lookup, erase, replace, and initial gradients --- tensorflow/core/kernels/map_kernels.cc | 4 + tensorflow/core/kernels/map_kernels.h | 139 +++++++++++++++++- tensorflow/core/kernels/tensor_map.h | 12 +- tensorflow/core/ops/map_ops.cc | 88 ++++++++++- .../python/kernel_tests/list_ops_test.py | 12 +- .../python/kernel_tests/map_ops_test.py | 65 +++++++- tensorflow/python/ops/map_ops.py | 27 ++++ 7 files changed, 326 insertions(+), 21 deletions(-) diff --git a/tensorflow/core/kernels/map_kernels.cc b/tensorflow/core/kernels/map_kernels.cc index cb749c72f7b..eba2d99a75b 100644 --- a/tensorflow/core/kernels/map_kernels.cc +++ b/tensorflow/core/kernels/map_kernels.cc @@ -26,7 +26,11 @@ REGISTER_KERNEL_BUILDER(Name("EmptyTensorMap").Device(DEVICE_CPU), REGISTER_KERNEL_BUILDER(Name("TensorMapSize").Device(DEVICE_CPU), TensorMapSize); +REGISTER_KERNEL_BUILDER(Name("TensorMapLookup").Device(DEVICE_CPU), + TensorMapLookup); +REGISTER_KERNEL_BUILDER(Name("TensorMapErase").Device(DEVICE_CPU), + TensorMapErase); REGISTER_KERNEL_BUILDER(Name("ZeroOut").Device(DEVICE_CPU), ZeroOutOp); diff --git a/tensorflow/core/kernels/map_kernels.h b/tensorflow/core/kernels/map_kernels.h index 41c1a18a728..f67b8b6e10a 100644 --- a/tensorflow/core/kernels/map_kernels.h +++ b/tensorflow/core/kernels/map_kernels.h @@ -112,7 +112,7 @@ class TensorMapSize : public OpKernel { class TensorMapInsert : public OpKernel { public: explicit TensorMapInsert(OpKernelConstruction* c) : OpKernel(c) { - //OP_REQUIRES_OK(c, c->GetAttr("element_dtype", &element_dtype_)); + OP_REQUIRES_OK(c, c->GetAttr("element_dtype", &element_dtype_)); } ~TensorMapInsert() override {} @@ -151,7 +151,6 @@ class TensorMapInsert : public OpKernel { " list size: ", l->tensors().size(), " max_num_elements: ", l->max_num_elements)); }*/ - TensorMap* output_map = nullptr; OP_REQUIRES_OK(c, ForwardInputOrCreateNewMap(c, 0, 0, *m, &output_map)); std::cout << "create output" << std::endl; @@ -166,6 +165,142 @@ class TensorMapInsert : public OpKernel { REGISTER_KERNEL_BUILDER(Name("TensorMapInsert").Device(DEVICE_CPU), TensorMapInsert); +class TensorMapLookup : public OpKernel { + public: + explicit TensorMapLookup(OpKernelConstruction* c) : OpKernel(c) { + OP_REQUIRES_OK(c, c->GetAttr("element_dtype", &element_dtype_)); + } + ~TensorMapLookup() override {} + + void Compute(OpKernelContext* c) override { + std::cout << "hello TensorMapInsert kernel" << std::endl; + const Tensor& temp_key = c->input(1); + const TensorKey key = TensorKey(temp_key); + std::cout << "got key" << std::endl; + const TensorMap* m = nullptr; + OP_REQUIRES_OK(c, GetInputMap(c, 0, &m)); + std::cout << "got map" << std::endl; + c->set_output(0, m->tensors().find(key)->second); + std::cout << "finished" << std::endl; + } + + private: + DataType element_dtype_; +}; + +class TensorMapErase : public OpKernel { + public: + explicit TensorMapErase(OpKernelConstruction* c) : OpKernel(c) { + OP_REQUIRES_OK(c, c->GetAttr("element_dtype", &element_dtype_)); + } + + void Compute(OpKernelContext* c) override { + std::cout << "hello TensorMapErase op" << std::endl; + const TensorMap* m = nullptr; + OP_REQUIRES_OK(c, GetInputMap(c, 0, &m)); + const Tensor& temp_key = c->input(1); + const TensorKey key = TensorKey(temp_key); + /*OP_REQUIRES(c, element_dtype_ == l->element_dtype, + errors::InvalidArgument("Invalid data types; op elements ", + DataTypeString(element_dtype_), + " but list elements ", + DataTypeString(l->element_dtype)));*/ + + OP_REQUIRES(c, !m->tensors().empty(), + errors::InvalidArgument("Trying to erase from an empty map.")); + + OP_REQUIRES(c, m->tensors().find(key) != m->tensors().end(), + errors::InvalidArgument("Trying to erase non-existent item.")); + + const Tensor& t = m->tensors().find(key)->second; + c->set_output(1, t); + /*if (t.dtype() != DT_INVALID) { + c->set_output(1, t); + } else { + PartialTensorShape partial_element_shape; + OP_REQUIRES_OK( + c, GetElementShapeFromInput(c, *l, 1, &partial_element_shape)); + TensorShape element_shape; + OP_REQUIRES( + c, partial_element_shape.AsTensorShape(&element_shape), + errors::InvalidArgument("Trying to read an uninitialized tensor but ", + "element_shape is not fully defined.", + partial_element_shape.DebugString())); + Tensor* result; + AllocatorAttributes attr; + if (element_dtype_ == DT_VARIANT) { + attr.set_on_host(true); + } + OP_REQUIRES_OK(c, c->allocate_output(1, element_shape, &result, attr)); + functor::SetZeroFunctor()(c->eigen_device(), + result->flat()); + }*/ + + TensorMap* output_map = nullptr; + OP_REQUIRES_OK(c, ForwardInputOrCreateNewMap(c, 0, 0, *m, &output_map)); + output_map->tensors().erase(key); + } + + private: + DataType element_dtype_; +}; + +class TensorMapReplace : public OpKernel { + public: + explicit TensorMapReplace(OpKernelConstruction* c) : OpKernel(c) { + OP_REQUIRES_OK(c, c->GetAttr("element_dtype", &element_dtype_)); + } + ~TensorMapReplace() override {} + + void Compute(OpKernelContext* c) override { + std::cout << "hello TensorMapReplace kernel" << std::endl; + const Tensor& temp_key = c->input(1); + const TensorKey key = TensorKey(temp_key); + std::cout << "got key" << std::endl; + const Tensor& value = c->input(2); + std::cout << "got value" << std::endl; + /*OP_REQUIRES(c, element_dtype_ == value.dtype(), + errors::InvalidArgument("Invalid data types; list elements ", + DataTypeString(element_dtype_), + " but tried to append ", + DataTypeString(value.dtype())));*/ + + const TensorMap* m = nullptr; + OP_REQUIRES_OK(c, GetInputMap(c, 0, &m)); + std::cout << "got map" << std::endl; + /*OP_REQUIRES(c, m->element_shape.IsCompatibleWith(input.shape()), + errors::InvalidArgument( + "Tried to append a map with incompatible shape to a " + "list. Op element shape: ", + input.shape().DebugString(), + " list shape: ", m->element_shape.DebugString()));*/ + /*OP_REQUIRES(c, element_dtype_ == m->element_dtype, + errors::InvalidArgument("Invalid data types; op elements ", + DataTypeString(element_dtype_), + " but list elements ", + DataTypeString(l->element_dtype))); + + if (l->max_num_elements != -1) { + OP_REQUIRES( + c, l->tensors().size() < l->max_num_elements, + errors::InvalidArgument("Tried to push item into a full list", + " list size: ", l->tensors().size(), + " max_num_elements: ", l->max_num_elements)); + }*/ + TensorMap* output_map = nullptr; + OP_REQUIRES_OK(c, ForwardInputOrCreateNewMap(c, 0, 0, *m, &output_map)); + std::cout << "create output" << std::endl; + output_map->replace(key,value); + std::cout << "inserted" << std::endl; + } + + private: + DataType element_dtype_; +}; + +REGISTER_KERNEL_BUILDER(Name("TensorMapReplace").Device(DEVICE_CPU), + TensorMapReplace); + class ZeroOutOp : public OpKernel { public: explicit ZeroOutOp(OpKernelConstruction* c) : OpKernel(c) {} diff --git a/tensorflow/core/kernels/tensor_map.h b/tensorflow/core/kernels/tensor_map.h index c5993ec9300..a5d44550c98 100644 --- a/tensorflow/core/kernels/tensor_map.h +++ b/tensorflow/core/kernels/tensor_map.h @@ -145,7 +145,7 @@ class TensorMap { // Insert key and value if the key does not already exist. // Returns true if the insertion happens. - bool insert(TensorKey key, Tensor value) { + bool insert(const TensorKey& key, const Tensor& value) { auto r = tensors_->values_.try_emplace(key, value); return r.second; } @@ -155,9 +155,19 @@ class TensorMap { return tensors_->values_.find(key); } + Tensor& lookup(TensorKey key) { + return tensors_->values_.find(key)->second; + } + Tensor& operator[](TensorKey& k) { return tensors_->values_[k]; } + + bool replace(const TensorKey& k, const Tensor& v) { + tensors_->values_[k] = v; + return true; + } + // Removes element with given key. Return size of removed element. size_t erase(TensorKey key) { return tensors_->values_.erase(key); diff --git a/tensorflow/core/ops/map_ops.cc b/tensorflow/core/ops/map_ops.cc index 463a2ea102b..09183e715ea 100644 --- a/tensorflow/core/ops/map_ops.cc +++ b/tensorflow/core/ops/map_ops.cc @@ -76,14 +76,50 @@ REGISTER_OP("TensorMapInsert") return Status::OK(); }); -/*REGISTER_OP("TensorMapErase") +REGISTER_OP("TensorMapLookup") .Input("input_handle: variant") - .Input("element_shape: int32") + .Input("key: element_dtype") + .Output("value: element_dtype") + .Attr("element_dtype: type") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->Scalar()); + /*DataType element_dtype; + TF_RETURN_IF_ERROR(c->GetAttr("element_dtype", &element_dtype)); + shape_inference::ShapeHandle element_shape = c->UnknownShape();*/ + + /*auto* handle_data = c->input_handle_shapes_and_types(0); + if (handle_data != nullptr && handle_data->size() > 1) { + return errors::InvalidArgument( + "Trying to push to list with wrong variant data."); + } + if (IsValidTensorMapHandleData(handle_data)) { + const shape_inference::ShapeAndType& map_shape_type = (*handle_data)[0]; + if (list_shape_type.dtype != element_dtype) { + return errors::InvalidArgument( + "Trying to push to list with wrong element dtype. List has type ", + DataTypeString(list_shape_type.dtype), + " but trying to push element with type ", + DataTypeString(element_dtype)); + } + shape_inference::ShapeHandle ignored; + TF_RETURN_IF_ERROR( + c->Merge(element_shape, map_shape_type.shape, &ignored)); + element_shape = map_shape_type.shape; + } + c->set_output_handle_shapes_and_types( + 0, std::vector{ + {element_shape, element_dtype}});*/ + return Status::OK(); + }); + +REGISTER_OP("TensorMapErase") + .Input("input_handle: variant") + .Input("key: element_dtype") .Output("output_handle: variant") .Output("tensor: element_dtype") .Attr("element_dtype: type") .SetShapeFn([](shape_inference::InferenceContext* c) { - DataType element_dtype; + /*DataType element_dtype; TF_RETURN_IF_ERROR(c->GetAttr("element_dtype", &element_dtype)); shape_inference::ShapeHandle tensor_shape = c->UnknownShape(); auto* handle_data = c->input_handle_shapes_and_types(0); @@ -107,11 +143,49 @@ REGISTER_OP("TensorMapInsert") c->Merge(tensor_shape, list_shape_type.shape, &ignored)); c->set_output_handle_shapes_and_types(0, *handle_data); tensor_shape = list_shape_type.shape; - } - c->set_output(1, tensor_shape); - c->set_output(0, c->Scalar()); + }*/ + c->set_output(1, c->Scalar()); // removed element + c->set_output(0, c->Scalar()); // map return Status::OK(); - });*/ + }); + +REGISTER_OP("TensorMapReplace") + .Input("input_handle: variant") + .Input("key: element_dtype") + .Input("value: element_dtype") + .Output("output_handle: variant") + .Attr("element_dtype: type") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->Scalar()); + /*DataType element_dtype; + TF_RETURN_IF_ERROR(c->GetAttr("element_dtype", &element_dtype)); + shape_inference::ShapeHandle element_shape = c->UnknownShape();*/ + + /*auto* handle_data = c->input_handle_shapes_and_types(0); + if (handle_data != nullptr && handle_data->size() > 1) { + return errors::InvalidArgument( + "Trying to push to list with wrong variant data."); + } + if (IsValidTensorMapHandleData(handle_data)) { + const shape_inference::ShapeAndType& map_shape_type = (*handle_data)[0]; + if (list_shape_type.dtype != element_dtype) { + return errors::InvalidArgument( + "Trying to push to list with wrong element dtype. List has type ", + DataTypeString(list_shape_type.dtype), + " but trying to push element with type ", + DataTypeString(element_dtype)); + } + shape_inference::ShapeHandle ignored; + TF_RETURN_IF_ERROR( + c->Merge(element_shape, map_shape_type.shape, &ignored)); + element_shape = map_shape_type.shape; + } + c->set_output_handle_shapes_and_types( + 0, std::vector{ + {element_shape, element_dtype}});*/ + return Status::OK(); + }); + REGISTER_OP("ZeroOut") diff --git a/tensorflow/python/kernel_tests/list_ops_test.py b/tensorflow/python/kernel_tests/list_ops_test.py index 53ebdd3ab88..4d7f2beb00b 100644 --- a/tensorflow/python/kernel_tests/list_ops_test.py +++ b/tensorflow/python/kernel_tests/list_ops_test.py @@ -48,7 +48,7 @@ from tensorflow.python.platform import test @test_util.run_all_in_graph_and_eager_modes class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): - + ''' def _testPushPop(self, max_num_elements): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, @@ -130,7 +130,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): _, e = gen_list_ops.tensor_list_pop_back( l, element_dtype=dtypes.float32, element_shape=[1, 3]) self.evaluate(e) - + ''' def testPushGetGrad(self): with backprop.GradientTape() as tape: l = list_ops.empty_tensor_list( @@ -150,7 +150,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): dt0, dt1 = tape.gradient(t1, [c0, c1]) self.assertAllEqual(self.evaluate(dt1), [1.0, 1.0]) self.assertEqual(self.evaluate(dt0), 0.0) - + ''' def _testStack(self, max_num_elements): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, @@ -888,7 +888,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): l_worker = array_ops.identity(l_ps) l_worker = list_ops.tensor_list_push_back(l_worker, 3.0) self.evaluate(l_worker) - + ''' def testPushPopGradients(self): with backprop.GradientTape() as tape: l = list_ops.empty_tensor_list( @@ -925,7 +925,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): grad_c, grad_c2 = tape.gradient(y, [c, c2]) self.assertAllEqual(self.evaluate(grad_c), [0.0, 4.0]) self.assertAllEqual(self.evaluate(grad_c2), 6.0) - + ''' @test_util.run_deprecated_v1 def testSetOutOfBounds(self): c = constant_op.constant([1.0, 2.0]) @@ -1664,7 +1664,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): upper, constant_op.constant([0, 1, 2]), dtype=dtypes.string) self.assertAllEqual(f(), [b"A", b"B", b"C"]) - + ''' def testPopBackGrad(self): # https://github.com/tensorflow/tensorflow/issues/37230 diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index 6cd6d7d611d..85ca558af4f 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -17,7 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numpy as np +#import numpy as np from tensorflow.python.platform import test from absl.testing import parameterized @@ -39,7 +39,7 @@ from tensorflow.python.ops import map_ops @test_util.run_all_in_graph_and_eager_modes class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): - def testEmptyTensorMap(self): + '''def testEmptyTensorMap(self): m = map_ops.empty_tensor_map() print("test EmptyTensorMap") @@ -47,13 +47,68 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): m = map_ops.empty_tensor_map() s = map_ops.tensor_map_size(m) print("size: ", s) + self.assertAllClose(s, 0) def testTensorMapInsert(self): + #with self.test_session(): + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + v = constant_op.constant(2.0) + m = map_ops.tensor_map_insert(m, k, v) + s = map_ops.tensor_map_size(m) + self.assertAllClose(s, 1) + print("test TensorMapInsert") + + def testTensorMapLookup(self): m = map_ops.empty_tensor_map() k = constant_op.constant(1.0) v = constant_op.constant(2.0) m = map_ops.tensor_map_insert(m, k, v) - print("test TensorMapInsert") + l = map_ops.tensor_map_lookup(m, k) + print("lookup: ", l) + self.assertAllClose(l, v)''' + + def testTensorMapReplace(self): + #with self.test_session(): + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + v = constant_op.constant(2.0) + m = map_ops.tensor_map_insert(m, k, v) + s = map_ops.tensor_map_size(m) + self.assertAllClose(s, 1) + + v2 = constant_op.constant(3.0) + m = map_ops.tensor_map_replace(m, k, v2) + l = map_ops.tensor_map_lookup(m, k) + self.assertAllClose(l, v2) + print("test TensorMapReplace") + + def testTensorMapErase(self): + print("python erase") + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + v = constant_op.constant(2.0) + m = map_ops.tensor_map_insert(m, k, v) + s = map_ops.tensor_map_size(m) + self.assertAllClose(s, 1) + m, e = map_ops.tensor_map_erase(m, k) + s = map_ops.tensor_map_size(m) + print("erase: ", e) + self.assertAllClose(s, 0) + self.assertAllClose(e, v) + + def testInsertLookupGrad(self): + with backprop.GradientTape() as tape: + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + v = constant_op.constant(2.0) + tape.watch(v) + m = map_ops.tensor_map_insert(m, k, v) + l = map_ops.tensor_map_lookup(m, k) + l *= 5 + #print("gradient", tape.gradient(l, v), 2.0) + + ''' @parameterized.named_parameters(("NoMaxNumElements", None), @@ -68,11 +123,11 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): self.evaluate(l) ''' - def testZeroOut(self): + '''def testZeroOut(self): print("hello world testZeroOut") with self.test_session(): self.assertAllClose( - map_ops.zero_out([[1, 2], [3, 4]]), np.array([[1, 0], [0, 0]])) + map_ops.zero_out([[1, 2], [3, 4]]), np.array([[1, 0], [0, 0]]))''' if __name__ == '__main__': diff --git a/tensorflow/python/ops/map_ops.py b/tensorflow/python/ops/map_ops.py index 61493be6b71..5cb045b5406 100644 --- a/tensorflow/python/ops/map_ops.py +++ b/tensorflow/python/ops/map_ops.py @@ -30,6 +30,8 @@ from tensorflow.python.ops.gen_map_ops import * # resource_loader.get_path_to_datafile('_zero_out_ops.so')) #zero_out = zero_out_ops.zero_out +ops.NotDifferentiable("EmptyTensorMap") + def empty_tensor_map(): print("hello gen_map_ops.empty_tensor_map") return gen_map_ops.empty_tensor_map() @@ -42,6 +44,31 @@ def tensor_map_insert(input_handle, key, value): print("hello gen_map_ops.tensor_map_insert") return gen_map_ops.tensor_map_insert(input_handle, key, value) +def tensor_map_lookup(input_handle, key): + return gen_map_ops.tensor_map_lookup(input_handle, key) + +def tensor_map_erase(input_handle, key): + return gen_map_ops.tensor_map_erase(input_handle, key) + +def tensor_map_replace(input_handle, key, value): + return gen_map_ops.tensor_map_replace(input_handle, key, value) + +@ops.RegisterGradient("TensorMapLookup") +def LookupGrad(op, dval): + map_grad = None + key_grad = None + key = op.inputs[1] + value_grad = tensor_map_lookup(dmap, key) + return map_grad, key_grad + +@ops.RegisterGradient("TensorMapInsert") +def InsertGrad(op, dmap): + map_grad, _ = gen_map_ops.tensor_map_erase(dmap, key) + key_grad = None + key = op.inputs[1] + value_grad = tensor_map_lookup(dmap, key) + return map_grad, key_grad, value_grad + def zero_out(to_zero): return gen_map_ops.zero_out(to_zero) From ac2ff084a71da3dd80b4a6b77ccf7ef74305ffab Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Wed, 24 Jun 2020 14:31:44 +0000 Subject: [PATCH 0186/2522] Prepping for switch to ROCm 3.5+ Starting with ROCm 3.5 the underlying compiler used by hipcc will change from HCC to hip-clang. There will be a corresponding change in the HIP Runtime as well. This commit is part of a series which are intended to make the transition to ROCm 3.5+ easier. This commit adds an alternative lookup path for `ld.lld` (since its location within the rocm install, will move in ROCm 3.5+). --- .../xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc index d2126a8d17d..a93810b53f7 100644 --- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc +++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc @@ -630,8 +630,10 @@ StatusOr> EmitModuleToHsaco( // Locate lld. // TODO(whchung@gmail.com): change to tensorflow::ROCmRoot() after // ROCm-Device-Libs PR. - std::string lld_path = tensorflow::io::JoinPath("/opt/rocm", "hcc/bin"); - auto lld_program = llvm::sys::findProgramByName("ld.lld", {lld_path}); + std::string lld_path_1 = tensorflow::io::JoinPath("/opt/rocm", "hcc/bin"); + std::string lld_path_2 = tensorflow::io::JoinPath("/opt/rocm", "llvm/bin"); + auto lld_program = + llvm::sys::findProgramByName("ld.lld", {lld_path_1, lld_path_2}); if (!lld_program) { return xla::InternalError("unable to find ld.lld in PATH: %s", lld_program.getError().message()); From d6cd94634dc010b766402c25153ccd9fd5337e6e Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Wed, 24 Jun 2020 14:48:16 +0000 Subject: [PATCH 0187/2522] Prepping for switch to ROCm 3.5+ Starting with ROCm 3.5 the underlying compiler used by hipcc will change from HCC to hip-clang. There will be a corresponding change in the HIP Runtime as well. This commit is part of a series which are intended to make the transition to ROCm 3.5+ easier. The path to the ROCDL files changes with ROCm 3.5, and hence this change. The macro TENSORFLOW_COMPILE_IS_HIP_CLANG is only true when compiling TF with ROCm 3.5 and higher. The macro is a temporary construct to aid with the transition. Once the transition is complete, it will removed and the code updated appropriately. --- tensorflow/core/platform/default/rocm_rocdl_path.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/core/platform/default/rocm_rocdl_path.cc b/tensorflow/core/platform/default/rocm_rocdl_path.cc index 55075969cbd..9e9261d26c8 100644 --- a/tensorflow/core/platform/default/rocm_rocdl_path.cc +++ b/tensorflow/core/platform/default/rocm_rocdl_path.cc @@ -36,7 +36,11 @@ string RocmRoot() { } string RocdlRoot() { +#if TENSORFLOW_COMPILER_IS_HIP_CLANG + return tensorflow::io::JoinPath(tensorflow::RocmRoot(), "lib"); +#else return tensorflow::io::JoinPath(tensorflow::RocmRoot(), "hcc/lib"); +#endif } } // namespace tensorflow From 67e3ef7c1236b0117336040ade83a5834feac57c Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Fri, 10 Jul 2020 11:36:17 +0800 Subject: [PATCH 0188/2522] remove repeated call --- tensorflow/core/framework/op_kernel.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc index abf73cb57df..5cc5e9a860a 100644 --- a/tensorflow/core/framework/op_kernel.cc +++ b/tensorflow/core/framework/op_kernel.cc @@ -1416,7 +1416,7 @@ Status FindKernelDef( std::string device_str = DeviceTypeString(device_type); Status s = errors::NotFound( "No registered '", node_op, "' OpKernel for ", - DeviceTypeString(device_type), " devices compatible with node ", + device_str, " devices compatible with node ", FormatNodeDefForError(node_name, has_experimental_debug_info, experimental_debug_info)); if (was_attr_mismatch) { From 358b0560fa64b2f0a2a150f02a7ec96c12db7fc5 Mon Sep 17 00:00:00 2001 From: ShengYang1 Date: Fri, 10 Jul 2020 13:48:45 +0800 Subject: [PATCH 0189/2522] exp --- tensorflow/core/kernels/cwise_op_exp.cc | 4 ++-- tensorflow/python/ops/math_ops_test.py | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/cwise_op_exp.cc b/tensorflow/core/kernels/cwise_op_exp.cc index 2b157f0e7a9..48b6823cbdc 100644 --- a/tensorflow/core/kernels/cwise_op_exp.cc +++ b/tensorflow/core/kernels/cwise_op_exp.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(UnaryOp, CPU, "Exp", functor::exp, float, Eigen::half, double, - complex64, complex128); +REGISTER6(UnaryOp, CPU, "Exp", functor::exp, float, Eigen::half, double, + bfloat16, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER5(UnaryOp, GPU, "Exp", functor::exp, float, Eigen::half, double, diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index bf15bf86ee2..dd4c47e0122 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -872,5 +872,29 @@ class RangeTest(test_util.TensorFlowTestCase): self.assertAllEqual(values, self.evaluate(tensor)) +@test_util.run_all_in_graph_and_eager_modes +class ExpTest(test_util.TensorFlowTestCase): + + def testExp(self): + x = np.random.randn(1000, 1000) + for dtype in [np.float32, np.float64, np.float16]: + x_np = np.array(x, dtype=dtype) + x_tf = constant_op.constant(x_np, shape=x_np.shape) + y_tf = math_ops.exp(x_tf) + y_tf_np = self.evaluate(y_tf) + y_np = np.exp(x_np) + self.assertAllClose(y_tf_np, y_np, atol=1e-5) + + def testExpExtendType(self): + in_bf16 = np.random.randn(1000, 1000).astype(dtypes.bfloat16.as_numpy_dtype) + out_bf16 = self.evaluate(math_ops.exp(in_bf16)) + + in_f32 = math_ops.cast(in_bf16, dtypes.float32) + out_f32 = self.evaluate(math_ops.exp(in_f32)) + expected = math_ops.cast(out_f32, dtypes.bfloat16) + + self.assertAllClose(out_bf16, expected, rtol=1e-5) + + if __name__ == "__main__": googletest.main() From 38e673228b8d619a0651e09c2dae52a4a695af58 Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Thu, 9 Jul 2020 19:07:14 +0100 Subject: [PATCH 0190/2522] Quantization of PAD operator for 16x8 quantization scheme. Change-Id: Ie76153f238933fca6267fd9c04627b5fccf10368 --- tensorflow/lite/testing/op_tests/pad.py | 44 ++++++++++++++++--- .../lite/tools/optimize/operator_property.cc | 1 - 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/tensorflow/lite/testing/op_tests/pad.py b/tensorflow/lite/testing/op_tests/pad.py index 0746a5a1601..e49a54d0557 100644 --- a/tensorflow/lite/testing/op_tests/pad.py +++ b/tensorflow/lite/testing/op_tests/pad.py @@ -37,7 +37,8 @@ def make_pad_tests(options): "paddings": [[[0, 0], [0, 1], [2, 3], [0, 0]], [[0, 1], [0, 0], [0, 0], [2, 3]]], "constant_paddings": [True, False], - "fully_quantize": [False] + "fully_quantize": [False], + "quantize_mode_16x8": [False] }, # 2D: { @@ -45,7 +46,8 @@ def make_pad_tests(options): "input_shape": [[1, 2]], "paddings": [[[0, 1], [2, 3]]], "constant_paddings": [True, False], - "fully_quantize": [False] + "fully_quantize": [False], + "quantize_mode_16x8": [False] }, # 1D: { @@ -53,7 +55,8 @@ def make_pad_tests(options): "input_shape": [[1]], "paddings": [[[1, 2]]], "constant_paddings": [False], - "fully_quantize": [False] + "fully_quantize": [False], + "quantize_mode_16x8": [False] }, # 4D: { @@ -63,7 +66,18 @@ def make_pad_tests(options): [[0, 1], [0, 0], [0, 0], [2, 3]], [[0, 0], [0, 0], [0, 0], [0, 0]]], "constant_paddings": [True], - "fully_quantize": [True] + "fully_quantize": [True], + "quantize_mode_16x8": [False] + }, + { + "dtype": [tf.float32], + "input_shape": [[1, 1, 2, 1], [2, 1, 1, 1]], + "paddings": [[[0, 0], [0, 1], [2, 3], [0, 0]], + [[0, 1], [0, 0], [0, 0], [2, 3]], + [[0, 0], [0, 0], [0, 0], [0, 0]]], + "constant_paddings": [True], + "fully_quantize": [True], + "quantize_mode_16x8": [True] }, # 2D: { @@ -71,7 +85,16 @@ def make_pad_tests(options): "input_shape": [[1, 2]], "paddings": [[[0, 1], [2, 3]]], "constant_paddings": [True], - "fully_quantize": [True] + "fully_quantize": [True], + "quantize_mode_16x8": [False], + }, + { + "dtype": [tf.float32], + "input_shape": [[1, 2]], + "paddings": [[[0, 1], [2, 3]]], + "constant_paddings": [True], + "fully_quantize": [True], + "quantize_mode_16x8": [True], }, # 1D: { @@ -79,7 +102,16 @@ def make_pad_tests(options): "input_shape": [[1]], "paddings": [[[1, 2]]], "constant_paddings": [True], - "fully_quantize": [True] + "fully_quantize": [True], + "quantize_mode_16x8": [False], + }, + { + "dtype": [tf.float32], + "input_shape": [[1]], + "paddings": [[[1, 2]]], + "constant_paddings": [True], + "fully_quantize": [True], + "quantize_mode_16x8": [True], }, ] diff --git a/tensorflow/lite/tools/optimize/operator_property.cc b/tensorflow/lite/tools/optimize/operator_property.cc index f2cb98ef31a..36843188d04 100644 --- a/tensorflow/lite/tools/optimize/operator_property.cc +++ b/tensorflow/lite/tools/optimize/operator_property.cc @@ -830,7 +830,6 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, property.outputs = {{0, {}}}; property.restrict_same_input_output_scale = true; property.version = 2; - property.quantizable_int16 = false; break; case BuiltinOperator_QUANTIZE: property.inputs = {{0, {}}}; From 4b6a394e951090e8ffb3770badfef3ab0b293d23 Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Thu, 9 Jul 2020 17:21:24 +0100 Subject: [PATCH 0191/2522] Added GATHER operator for 16x8. Implementation, tests, versioning are added. Change-Id: I87ffb816994b07770419979e45ce14a73b569bf9 --- .../delegates/nnapi/acceleration_test_list.cc | 1 + tensorflow/lite/kernels/gather.cc | 5 +++++ tensorflow/lite/kernels/gather_test.cc | 18 ++++++++++++++++++ tensorflow/lite/kernels/register.cc | 2 +- tensorflow/lite/toco/tflite/op_version.cc | 1 + .../lite/tools/optimize/operator_property.cc | 1 - tensorflow/lite/tools/versioning/op_version.cc | 3 +++ .../lite/tools/versioning/runtime_version.cc | 1 + 8 files changed, 30 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc index 56c1895ca4e..e08cdb763c0 100644 --- a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc +++ b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc @@ -214,6 +214,7 @@ TypesGatherOpTest/Float32Int32,29 TypesGatherOpTest/Int32Int32,29 TypesGatherOpTest/Uint8Int32,29 TypesGatherOpTest/Int8Int32,29 +-TypesGatherOpTest/.*Int16.* # hashtable_lookup_test # All test excepted the string one should be accelerated diff --git a/tensorflow/lite/kernels/gather.cc b/tensorflow/lite/kernels/gather.cc index 1de49f7c486..01a1e2a8a17 100644 --- a/tensorflow/lite/kernels/gather.cc +++ b/tensorflow/lite/kernels/gather.cc @@ -61,6 +61,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { case kTfLiteFloat32: case kTfLiteUInt8: case kTfLiteInt8: + case kTfLiteInt16: case kTfLiteInt64: case kTfLiteInt32: case kTfLiteBool: @@ -143,6 +144,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return Gather(*params, input, positions, output); case kTfLiteInt8: return Gather(*params, input, positions, output); + case kTfLiteInt16: + return Gather(*params, input, positions, output); case kTfLiteInt32: return Gather(*params, input, positions, output); case kTfLiteInt64: @@ -165,6 +168,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return Gather(*params, input, positions, output); case kTfLiteInt8: return Gather(*params, input, positions, output); + case kTfLiteInt16: + return Gather(*params, input, positions, output); case kTfLiteInt32: return Gather(*params, input, positions, output); case kTfLiteInt64: diff --git a/tensorflow/lite/kernels/gather_test.cc b/tensorflow/lite/kernels/gather_test.cc index 01be7f01935..3f987975856 100644 --- a/tensorflow/lite/kernels/gather_test.cc +++ b/tensorflow/lite/kernels/gather_test.cc @@ -272,6 +272,24 @@ TEST(TypesGatherOpTest, Int8Int64) { EXPECT_THAT(m.GetOutput(), ElementsAreArray({14, 15, -13, -120})); } +TEST(TypesGatherOpTest, Int16Int32) { + GatherOpModel m({TensorType_INT16, {2, 2}}, {TensorType_INT32, {2}}); + m.SetInput({-13, -32000, 0, 32500}); + m.SetPositions({1, 0}); + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 32500, -13, -32000})); +} + +TEST(TypesGatherOpTest, Int16Int64) { + GatherOpModel m({TensorType_INT16, {2, 2}}, {TensorType_INT64, {2}}); + m.SetInput({-13, -32000, 0, 32500}); + m.SetPositions({1LL, 0LL}); + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 32500, -13, -32000})); +} + TEST(TypesGatherOpTest, Int64Int32) { GatherOpModel m({TensorType_INT64, {2, 2}}, {TensorType_INT32, {2}}); m.SetInput({-(1LL << 34), 134LL, 14LL, 15LL}); diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc index 333ffc12d7e..e735cb926f9 100644 --- a/tensorflow/lite/kernels/register.cc +++ b/tensorflow/lite/kernels/register.cc @@ -131,7 +131,7 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_DEPTH_TO_SPACE, Register_DEPTH_TO_SPACE()); AddBuiltin(BuiltinOperator_GATHER, Register_GATHER(), /* min_version = */ 1, - /* max_version = */ 3); + /* max_version = */ 4); AddBuiltin(BuiltinOperator_TRANSPOSE, Register_TRANSPOSE(), /* min_version = */ 1, /* max_version = */ 4); diff --git a/tensorflow/lite/toco/tflite/op_version.cc b/tensorflow/lite/toco/tflite/op_version.cc index 02afc35de3b..d395dfb5a9b 100644 --- a/tensorflow/lite/toco/tflite/op_version.cc +++ b/tensorflow/lite/toco/tflite/op_version.cc @@ -80,6 +80,7 @@ std::string GetMinimumRuntimeVersionForModel(const Model& model) { {{OperatorType::kGather, 1}, "1.6.0"}, {{OperatorType::kGather, 2}, "1.14.0"}, {{OperatorType::kGather, 3}, "1.15.0"}, + {{OperatorType::kGather, 4}, kPendingReleaseOpVersion}, {{OperatorType::kGatherNd, 1}, "1.14.0"}, {{OperatorType::kGatherNd, 2}, kPendingReleaseOpVersion}, {{OperatorType::kSvdf, 1}, "1.5.0"}, diff --git a/tensorflow/lite/tools/optimize/operator_property.cc b/tensorflow/lite/tools/optimize/operator_property.cc index f2cb98ef31a..4c63929d588 100644 --- a/tensorflow/lite/tools/optimize/operator_property.cc +++ b/tensorflow/lite/tools/optimize/operator_property.cc @@ -191,7 +191,6 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, property.outputs = {{0, {}}}; property.restrict_same_input_output_scale = true; property.version = 2; - property.quantizable_int16 = false; break; case BuiltinOperator_HARD_SWISH: { property.inputs = {{0, {}}}; diff --git a/tensorflow/lite/tools/versioning/op_version.cc b/tensorflow/lite/tools/versioning/op_version.cc index 2f62230f334..79cced17509 100644 --- a/tensorflow/lite/tools/versioning/op_version.cc +++ b/tensorflow/lite/tools/versioning/op_version.cc @@ -176,6 +176,9 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) { return 1; case BuiltinOperator_GATHER: + if (op_sig.input_types.at(0) == TensorType_INT16) { + return 4; + } // If the op takes bool input, it is version 3. if (op_sig.input_types.at(0) == TensorType_BOOL) { return 3; diff --git a/tensorflow/lite/tools/versioning/runtime_version.cc b/tensorflow/lite/tools/versioning/runtime_version.cc index d345164f7e6..a1ae430e13c 100644 --- a/tensorflow/lite/tools/versioning/runtime_version.cc +++ b/tensorflow/lite/tools/versioning/runtime_version.cc @@ -109,6 +109,7 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code, {{BuiltinOperator_GATHER, 1}, "1.6.0"}, {{BuiltinOperator_GATHER, 2}, "1.14.0"}, {{BuiltinOperator_GATHER, 3}, "1.15.0"}, + {{BuiltinOperator_GATHER, 4}, kPendingReleaseVersion}, {{BuiltinOperator_GATHER_ND, 1}, "1.14.0"}, {{BuiltinOperator_GATHER_ND, 2}, "2.3.0"}, {{BuiltinOperator_HASHTABLE_LOOKUP, 1}, "1.5.0"}, From 67323782413704ac44923de99b3ceb3229e971fa Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Fri, 10 Jul 2020 21:11:51 +0700 Subject: [PATCH 0192/2522] Add path exist, is directory, stat --- .../filesystem/plugins/gcs/gcs_filesystem.cc | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc index 7861a5708b5..9be93c4fbf4 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" #include "google/cloud/storage/client.h" #include "tensorflow/c/env.h" #include "tensorflow/c/experimental/filesystem/plugins/gcs/gcs_helper.h" @@ -556,6 +557,111 @@ void CopyFile(const TF_Filesystem* filesystem, const char* src, const char* dst, TF_SetStatusFromGCSStatus(metadata.status(), status); } +// TODO(vnvo2409): This approach can cause a problem when our path is +// `path/to/dir` and there is an object with key `path/to/directory`. Will be +// fixed when refactoring. +void PathExists(const TF_Filesystem* filesystem, const char* path, + TF_Status* status) { + std::string bucket, object; + ParseGCSPath(path, true, &bucket, &object, status); + if (TF_GetCode(status) != TF_OK) return; + + auto gcs_file = static_cast(filesystem->plugin_filesystem); + for (auto&& metadata : + gcs_file->gcs_client.ListObjects(bucket, gcs::Prefix(object))) { + if (!metadata) { + TF_SetStatusFromGCSStatus(metadata.status(), status); + return; + } + // We consider a path exists if there is at least one object whose key + // contains the path. + return TF_SetStatus(status, TF_OK, ""); + } + return TF_SetStatus( + status, TF_NOT_FOUND, + absl::StrCat("The path ", path, " does not exist.").c_str()); +} + +bool IsDirectory(const TF_Filesystem* filesystem, const char* path, + TF_Status* status) { + std::string bucket, object; + ParseGCSPath(path, true, &bucket, &object, status); + if (TF_GetCode(status) != TF_OK) return false; + + auto gcs_file = static_cast(filesystem->plugin_filesystem); + if (object.empty()) { + auto bucket_metadata = gcs_file->gcs_client.GetBucketMetadata(bucket); + TF_SetStatusFromGCSStatus(bucket_metadata.status(), status); + if (TF_GetCode(status) == TF_OK) + return true; + else + return false; + } + + // We check if there is an object with this key on the GCS server. + auto metadata = gcs_file->gcs_client.GetObjectMetadata(bucket, object); + if (metadata) { + TF_SetStatus(status, TF_OK, ""); + if (metadata->name().back() == '/') + return true; + else + return false; + } + + // If there is no object with this key on the GCS server. We check if there is + // any object whose key contains that path. + MaybeAppendSlash(&object); + for (auto&& metadata : + gcs_file->gcs_client.ListObjects(bucket, gcs::Prefix(object))) { + if (!metadata) { + TF_SetStatusFromGCSStatus(metadata.status(), status); + return false; + } + TF_SetStatus(status, TF_OK, ""); + return true; + } + TF_SetStatus(status, TF_NOT_FOUND, + absl::StrCat("The path ", path, " does not exist.").c_str()); + return false; +} + +void Stat(const TF_Filesystem* filesystem, const char* path, + TF_FileStatistics* stats, TF_Status* status) { + std::string bucket, object; + ParseGCSPath(path, true, &bucket, &object, status); + if (TF_GetCode(status) != TF_OK) return; + + auto gcs_file = static_cast(filesystem->plugin_filesystem); + if (object.empty()) { + auto bucket_metadata = gcs_file->gcs_client.GetBucketMetadata(bucket); + TF_SetStatusFromGCSStatus(bucket_metadata.status(), status); + if (TF_GetCode(status) == TF_OK) { + stats->is_directory = true; + stats->length = 0; + stats->mtime_nsec = 0; + } + return; + } + if (IsDirectory(filesystem, path, status)) { + stats->is_directory = true; + stats->length = 0; + stats->mtime_nsec = 0; + return TF_SetStatus(status, TF_OK, ""); + } + if (TF_GetCode(status) == TF_OK) { + auto metadata = gcs_file->gcs_client.GetObjectMetadata(bucket, object); + if (metadata) { + stats->is_directory = false; + stats->length = metadata.value().size(); + stats->mtime_nsec = metadata.value() + .time_storage_class_updated() + .time_since_epoch() + .count(); + } + TF_SetStatusFromGCSStatus(metadata.status(), status); + } +} + } // namespace tf_gcs_filesystem static void ProvideFilesystemSupportFor(TF_FilesystemPluginOps* ops, From 63fecc718dba35cd257a1f1ca4f4fa5688ab2f99 Mon Sep 17 00:00:00 2001 From: Tare Gaskin Date: Fri, 10 Jul 2020 15:03:43 +0000 Subject: [PATCH 0193/2522] replacing 'iter_limit' with 'end' --- tensorflow/core/framework/shape_inference.cc | 12 ++++++------ .../core/grappler/costs/op_level_cost_estimator.cc | 4 ++-- .../optimizers/common_subgraph_elimination.cc | 2 +- .../core/grappler/optimizers/debug_stripper.cc | 2 +- tensorflow/core/grappler/utils.cc | 2 +- tensorflow/core/profiler/utils/derived_timeline.cc | 2 +- tensorflow/python/framework/python_op_gen.cc | 12 ++++++------ .../python/framework/python_op_gen_internal.cc | 4 ++-- 8 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc index 0861188ba4e..4b071df88e5 100644 --- a/tensorflow/core/framework/shape_inference.cc +++ b/tensorflow/core/framework/shape_inference.cc @@ -62,14 +62,14 @@ InferenceContext::InferenceContext( } std::vector>> handle_data( input_shapes.size()); - for (int i = 0, iter_limit = input_handle_shapes_and_types.size(); i < iter_limit; ++i) { + for (int i = 0, end = input_handle_shapes_and_types.size(); i < end; ++i) { const auto& v = input_handle_shapes_and_types[i]; if (v == nullptr) { continue; } handle_data[i].reset(new std::vector(v->size())); auto& new_v = *handle_data[i]; - for (int j = 0, iter_limit = v->size(); j < iter_limit; ++j) { + for (int j = 0, end = v->size(); j < end; ++j) { const auto& p = (*v)[j]; construction_status_.Update( MakeShapeFromPartialTensorShape(p.first, &new_v[j].shape)); @@ -1103,7 +1103,7 @@ Status InferenceContext::AttachContext(const Status& status) { std::vector input_from_tensors_str; std::vector input_from_tensors_as_shape_str; input_from_tensors_as_shape_str.reserve(inputs_.size()); - for (int i = 0, iter_limit = inputs_.size(); i < iter_limit; ++i) { + for (int i = 0, end = inputs_.size(); i < end; ++i) { int input_tensors_size_ = input_tensors_.size(); int input_tensors_as_shapes_size_ = input_tensors_as_shapes_.size(); if (requested_input_tensor_as_partial_shape_[i] && @@ -1146,7 +1146,7 @@ bool InferenceContext::MergeHandleShapesAndTypes( } std::vector new_values(shapes_and_types.size()); bool refined = false; - for (int i = 0, iter_limit = shapes_and_types.size(); i < iter_limit; ++i) { + for (int i = 0, end = shapes_and_types.size(); i < end; ++i) { const ShapeAndType& existing = (*to_update)[i]; if (shapes_and_types[i].dtype == existing.dtype) { new_values[i].dtype = existing.dtype; @@ -1170,7 +1170,7 @@ bool InferenceContext::MergeHandleShapesAndTypes( if (!refined) { return false; } - for (int i = 0, iter_limit = new_values.size(); i < iter_limit; ++i) { + for (int i = 0, end = new_values.size(); i < end; ++i) { (*to_update)[i] = new_values[i]; } return true; @@ -1205,7 +1205,7 @@ bool InferenceContext::RelaxHandleShapesAndMergeTypes( return false; } std::vector new_values(shapes_and_types.size()); - for (int i = 0, iter_limit = shapes_and_types.size(); i < iter_limit; ++i) { + for (int i = 0, end = shapes_and_types.size(); i < end; ++i) { const ShapeAndType& existing = (*to_update)[i]; if (shapes_and_types[i].dtype == existing.dtype) { new_values[i].dtype = existing.dtype; diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index a1f29e1c63c..84087f86055 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -1514,7 +1514,7 @@ Costs OpLevelCostEstimator::PredictEinsum(const OpContext& op_context) const { n_dim.set_size(1); k_dim.set_size(1); - for (int i_idx = 0, iter_limit = a_input_str.size(); i_idx < iter_limit; ++i_idx) { + for (int i_idx = 0, end = a_input_str.size(); i_idx < end; ++i_idx) { if (b_input_str.find(a_input_str[i_idx]) == std::string::npos) { if (rhs_str.find(a_input_str[i_idx]) == std::string::npos) { VLOG(1) << "Missing accurate estimator for op: " << op_info.op(); @@ -1534,7 +1534,7 @@ Costs OpLevelCostEstimator::PredictEinsum(const OpContext& op_context) const { *(a_matrix_shape->add_dim()) = a_input_shape.dim(i_idx); *(b_matrix_shape->add_dim()) = a_input_shape.dim(i_idx); } - for (int i_idx = 0, iter_limit = b_input_str.size(); i_idx < iter_limit; ++i_idx) { + for (int i_idx = 0, end = b_input_str.size(); i_idx < end; ++i_idx) { if (a_input_str.find(b_input_str[i_idx]) == std::string::npos) { if (rhs_str.find(b_input_str[i_idx]) == std::string::npos) { VLOG(1) << "Missing accurate estimator for op: " << op_info.op(); diff --git a/tensorflow/core/grappler/optimizers/common_subgraph_elimination.cc b/tensorflow/core/grappler/optimizers/common_subgraph_elimination.cc index ad0cbce0b1d..57f7e7c664b 100644 --- a/tensorflow/core/grappler/optimizers/common_subgraph_elimination.cc +++ b/tensorflow/core/grappler/optimizers/common_subgraph_elimination.cc @@ -73,7 +73,7 @@ class UniqueNodes { if (it == memoized_signatures_.end()) return; std::vector& candidates = rep_[it->second]; - for (int i = 0, iter_limit = candidates.size(); i < iter_limit; ++i) { + for (int i = 0, end = candidates.size(); i < end; ++i) { if (candidates[i] == node) { std::swap(candidates[i], candidates[candidates.size() - 1]); candidates.resize(candidates.size() - 1); diff --git a/tensorflow/core/grappler/optimizers/debug_stripper.cc b/tensorflow/core/grappler/optimizers/debug_stripper.cc index b9e14df9261..865871439a8 100644 --- a/tensorflow/core/grappler/optimizers/debug_stripper.cc +++ b/tensorflow/core/grappler/optimizers/debug_stripper.cc @@ -63,7 +63,7 @@ Status DebugStripper::Optimize(Cluster* cluster, const GrapplerItem& item, node.mutable_attr()->swap(new_attr); // As Identity op only takes one input, mark redundant inputs as control // input. - for (int i = 1, iter_limit = node.input_size(); i < iter_limit; ++i) { + for (int i = 1, end = node.input_size(); i < end; ++i) { if (!IsControlInput(node.input(i))) { *node.mutable_input(i) = AsControlDependency(NodeName(node.input(i))); } diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index e46910172aa..64bc098525b 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -357,7 +357,7 @@ void PermuteNodesInPlace(GraphDef* graph, std::vector* permutation, } permutation->swap(inv_perm); } - for (int n = 0, iter_limit = permutation->size(); n + 1 < iter_limit; ++n) { + for (int n = 0, end = permutation->size(); n + 1 < end; ++n) { while (n != (*permutation)[n]) { std::size_t r = (*permutation)[n]; graph->mutable_node()->SwapElements(n, r); diff --git a/tensorflow/core/profiler/utils/derived_timeline.cc b/tensorflow/core/profiler/utils/derived_timeline.cc index bcadf51c110..b22532565f7 100644 --- a/tensorflow/core/profiler/utils/derived_timeline.cc +++ b/tensorflow/core/profiler/utils/derived_timeline.cc @@ -130,7 +130,7 @@ void DerivedXLineBuilder::ExpandOrAddLevelEvent(const XEvent& event, } void DerivedXLineBuilder::ResetLastEvents(int level) { - for (int i = level, iter_limit = last_event_by_level_.size(); i < iter_limit; ++i) { + for (int i = level, end = last_event_by_level_.size(); i < end; ++i) { last_event_by_level_[i] = absl::nullopt; } if (level == 0) ResetDependentLines(); diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc index 12aebb6a671..0e393160b3b 100644 --- a/tensorflow/python/framework/python_op_gen.cc +++ b/tensorflow/python/framework/python_op_gen.cc @@ -63,7 +63,7 @@ void AddInferredAttr(const string& indentation, const string& attr_name, string VectorToTuple(const std::vector& l) { if (l.size() == 1) return strings::StrCat("(", l.front(), ",)"); string ret = "("; - for (int i = 0, iter_limit = l.size(); i < iter_limit; ++i) { + for (int i = 0, end = l.size(); i < end; ++i) { if (i > 0) { strings::StrAppend(&ret, ", "); } @@ -75,11 +75,11 @@ string VectorToTuple(const std::vector& l) { void Unflatten(const string& prefix, const std::vector& output_sizes, const string& var, string* result) { - for (int i = 0, iter_limit = output_sizes.size(); i < iter_limit; ++i) { + for (int i = 0, end = output_sizes.size(); i < end; ++i) { if (!output_sizes[i].empty()) { strings::StrAppend(result, prefix, var, " = "); if (i > 0) strings::StrAppend(result, var, "[:", i, "] + "); - if (i + 1 < iter_limit) { + if (i + 1 < end) { // Special case i == 0 to avoid "0 +" in the generated code. if (i == 0) { strings::StrAppend(result, "[", var, "[:", output_sizes[i], "]] + ", @@ -295,7 +295,7 @@ string GenEagerPythonOp::Code() { // from the end of params_no_default_, and adding params_no_default_. attrs_.reserve(params_no_default_.size() - op_def_.input_arg_size() + params_with_default_.size()); - for (int i = op_def_.input_arg_size(), iter_limit = params_no_default_.size(); i < iter_limit; ++i) { + for (int i = op_def_.input_arg_size(), end = params_no_default_.size(); i < end; ++i) { attrs_.push_back(params_no_default_[i].GetName()); } for (const auto& p : params_with_default_) { @@ -331,7 +331,7 @@ string GenEagerPythonOp::Code() { parameters_with_defaults.empty() ? "" : ", ", "name=None"); // Add attr_expressions_ for attrs that are params. - for (int i = 0, iter_limit = attrs_.size(); i < iter_limit; ++i) { + for (int i = 0, end = attrs_.size(); i < end; ++i) { const string& attr_name = attrs_[i]; const string& attr_api_name = param_names_[i + op_def_.input_arg_size()].GetRenameTo(); @@ -522,7 +522,7 @@ bool GenEagerPythonOp::GetEagerFunctionSetup(const string& indentation, } } - for (int i = 0, iter_limit = attrs_.size(); i < iter_limit; ++i) { + for (int i = 0, end = attrs_.size(); i < end; ++i) { const string& attr_name = attrs_[i]; const auto& param = param_names_[i + op_def_.input_arg_size()]; const auto& attr = *FindAttr(attr_name, op_def_); diff --git a/tensorflow/python/framework/python_op_gen_internal.cc b/tensorflow/python/framework/python_op_gen_internal.cc index d2e25e368b4..a8de8b4e621 100644 --- a/tensorflow/python/framework/python_op_gen_internal.cc +++ b/tensorflow/python/framework/python_op_gen_internal.cc @@ -561,10 +561,10 @@ string GenPythonOp::Code() { // from the end of args_no_default, and adding args_no_default. attrs_.reserve(params_no_default.size() - op_def_.input_arg_size() + params_with_default.size()); - for (int i = op_def_.input_arg_size(), iter_limit = params_no_default.size(); i < iter_limit; ++i) { + for (int i = op_def_.input_arg_size(), end = params_no_default.size(); i < end; ++i) { attrs_.push_back(params_no_default[i].GetName()); } - for (int i = 0, iter_limit = params_with_default.size(); i < iter_limit; ++i) { + for (int i = 0, end = params_with_default.size(); i < end; ++i) { attrs_.push_back(params_with_default[i].GetName()); } From dfd2589b366710b8316162b7507521ca3f779a01 Mon Sep 17 00:00:00 2001 From: Tare Gaskin Date: Fri, 10 Jul 2020 15:57:20 +0000 Subject: [PATCH 0194/2522] shape_inference.cc --- tensorflow/core/framework/shape_inference.cc | 26 ++++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc index 4b071df88e5..456c1826572 100644 --- a/tensorflow/core/framework/shape_inference.cc +++ b/tensorflow/core/framework/shape_inference.cc @@ -128,7 +128,7 @@ Status InferenceContext::set_output(StringPiece output_name, return errors::InvalidArgument("Must have exactly ", shapes.size(), " shapes."); } - for (int i = 0; i < size; ++i) { + for (int i = 0; i < shapes_size; ++i) { outputs_[i + start] = shapes[i]; } } @@ -182,8 +182,8 @@ void InferenceContext::PreInputInit( } Status InferenceContext::ExpandOutputs(int new_output_size) { - int outputs_size_ = outputs_.size(); - if (new_output_size < outputs_size_) { + const int outputs_size = outputs_.size(); + if (new_output_size < outputs_size) { return errors::InvalidArgument("Trying to reduce number of outputs of op."); } outputs_.resize(new_output_size, nullptr); @@ -211,8 +211,8 @@ void InferenceContext::PostInputInit( } input_handle_shapes_and_types_ = std::move(input_handle_data); } - int inputs_size_ = inputs_.size(); - if (inputs_size_ != num_inputs_from_node_def) { + const int inputs_size = inputs_.size(); + if (inputs_size != num_inputs_from_node_def) { construction_status_ = errors::InvalidArgument( "Wrong number of inputs passed: ", inputs_.size(), " while ", num_inputs_from_node_def, " expected based on NodeDef"); @@ -720,8 +720,8 @@ Status InferenceContext::MakeShapeFromShapeTensorTreatScalarAsUnknownShape( TF_RETURN_IF_ERROR(WithRankAtMost(input(input_idx), 1, &input_shape)); requested_input_tensor_as_partial_shape_[input_idx] = true; - int input_tensors_as_shapes_size_ = input_tensors_as_shapes_.size(); - if (input_idx < input_tensors_as_shapes_size_ && + const int input_tensors_as_shapes_size = input_tensors_as_shapes_.size(); + if (input_idx < input_tensors_as_shapes_size && input_tensors_as_shapes_[input_idx].IsSet() && RankKnown(input_tensors_as_shapes_[input_idx])) { *out = input_tensors_as_shapes_[input_idx]; @@ -739,8 +739,8 @@ Status InferenceContext::MakeShapeFromShapeTensor(int input_idx, TF_RETURN_IF_ERROR(WithRank(input(input_idx), 1, &input_shape)); requested_input_tensor_as_partial_shape_[input_idx] = true; - int input_tensors_as_shapes_size_ = input_tensors_as_shapes_.size(); - if (input_idx < input_tensors_as_shapes_size_ && + const int input_tensors_as_shapes_size = input_tensors_as_shapes_.size(); + if (input_idx < input_tensors_as_shapes_size && input_tensors_as_shapes_[input_idx].IsSet() && RankKnown(input_tensors_as_shapes_[input_idx])) { *out = input_tensors_as_shapes_[input_idx]; @@ -1104,15 +1104,15 @@ Status InferenceContext::AttachContext(const Status& status) { std::vector input_from_tensors_as_shape_str; input_from_tensors_as_shape_str.reserve(inputs_.size()); for (int i = 0, end = inputs_.size(); i < end; ++i) { - int input_tensors_size_ = input_tensors_.size(); - int input_tensors_as_shapes_size_ = input_tensors_as_shapes_.size(); + const int input_tensors_as_shapes_size = input_tensors_as_shapes_.size(); + const int input_tensors_size = input_tensors_.size(); if (requested_input_tensor_as_partial_shape_[i] && - i < input_tensors_as_shapes_size_ && + i < input_tensors_as_shapes_size && input_tensors_as_shapes_[i].IsSet() && RankKnown(input_tensors_as_shapes_[i])) { input_from_tensors_as_shape_str.push_back(strings::StrCat( "input[", i, "] = ", DebugString(input_tensors_as_shapes_[i]))); - } else if (requested_input_tensor_[i] && i < input_tensors_size_ && + } else if (requested_input_tensor_[i] && i < input_tensors_size && input_tensors_[i] != nullptr) { input_from_tensors_str.push_back(strings::StrCat( "input[", i, "] = <", From a4fa810f1fccce4f70d4419a169b831eb7ff59e6 Mon Sep 17 00:00:00 2001 From: Tare Gaskin Date: Fri, 10 Jul 2020 16:03:33 +0000 Subject: [PATCH 0195/2522] Update bcast.h, topological_sort.cc --- tensorflow/core/grappler/utils/topological_sort.cc | 4 ++-- tensorflow/core/util/bcast.h | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/grappler/utils/topological_sort.cc b/tensorflow/core/grappler/utils/topological_sort.cc index 8e373488e2a..14a58753c9f 100644 --- a/tensorflow/core/grappler/utils/topological_sort.cc +++ b/tensorflow/core/grappler/utils/topological_sort.cc @@ -81,8 +81,8 @@ Status ComputeTopologicalOrder( int ready_node = (*ready_nodes)[front]; for (int fanout : graph_view.GetFanout(ready_node)) { ++num_ready_inputs[fanout]; - const int graph_view_GetFanin_fanout_size = graph_view.GetFanin(fanout).size(); - if (num_ready_inputs[fanout] == graph_view_GetFanin_fanout_size) { + const int graph_view_get_fanin_fanout_size = graph_view.GetFanin(fanout).size(); + if (num_ready_inputs[fanout] == graph_view_get_fanin_fanout_size) { ready_nodes->push_back(fanout); ++back; } diff --git a/tensorflow/core/util/bcast.h b/tensorflow/core/util/bcast.h index db431db77fb..47d6dc0884e 100644 --- a/tensorflow/core/util/bcast.h +++ b/tensorflow/core/util/bcast.h @@ -133,14 +133,13 @@ BCastList::BCastList(const BCastList::Vec (&x)[N], const bool return_flattened_batch_indices) { typedef BCastList::Vec Vec; bool all_equal = true; - int largest_rank = 0; + size_t largest_rank = 0; output_batch_size_ = 1; for (int i = 0; i < N; ++i) { if (x[i] != x[0]) { all_equal = false; } - const int x_i_size = x[i].size(); - if (x_i_size > largest_rank) { + if (x[i].size() > largest_rank) { largest_rank = x[i].size(); } } From f03a29b93d89f5cfd6828d13884677c8ac12bf87 Mon Sep 17 00:00:00 2001 From: Tamas Nyiri Date: Fri, 10 Jul 2020 17:40:01 +0100 Subject: [PATCH 0196/2522] added missing argument --- tensorflow/lite/tools/optimize/modify_model_interface_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/tools/optimize/modify_model_interface_test.cc b/tensorflow/lite/tools/optimize/modify_model_interface_test.cc index a1153fd8c9b..fd267575d68 100644 --- a/tensorflow/lite/tools/optimize/modify_model_interface_test.cc +++ b/tensorflow/lite/tools/optimize/modify_model_interface_test.cc @@ -520,7 +520,7 @@ TEST(ModelInterface, Uint8MutipleInputOutput) { } TEST(ModelInterface, Int8MutipleInputOutput) { - auto model = CreateQuantizedModelMultipleInputOutput(); + auto model = CreateQuantizedModelMultipleInputOutput(TensorType_INT8); // Change model type. flatbuffers::FlatBufferBuilder builder; From 4412b6861f044d79d5a564ff4398823b5f2ac3e4 Mon Sep 17 00:00:00 2001 From: Sidong-Wei Date: Fri, 10 Jul 2020 14:11:57 -0400 Subject: [PATCH 0197/2522] Adopt existing cpu frequency implementation over abseil one --- tensorflow/core/platform/default/BUILD | 2 ++ tensorflow/core/platform/default/port.cc | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/platform/default/BUILD b/tensorflow/core/platform/default/BUILD index 89231b0f206..9226c042bd3 100644 --- a/tensorflow/core/platform/default/BUILD +++ b/tensorflow/core/platform/default/BUILD @@ -269,6 +269,8 @@ cc_library( "//tensorflow/core/platform:mem.h", "//tensorflow/core/platform:numa.h", "//tensorflow/core/platform:snappy.h", + "//tensorflow/core/platform:profile_utils/cpu_utils.h", + "//tensorflow/core/platform:profile_utils/i_cpu_utils_helper.h", ], copts = tf_copts(), defines = ["TF_USE_SNAPPY"] + select({ diff --git a/tensorflow/core/platform/default/port.cc b/tensorflow/core/platform/default/port.cc index 11b3cd7fd9a..abd8ca26af2 100644 --- a/tensorflow/core/platform/default/port.cc +++ b/tensorflow/core/platform/default/port.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/platform/numa.h" #include "tensorflow/core/platform/snappy.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/platform/profile_utils/cpu_utils.h" #if defined(__linux__) && !defined(__ANDROID__) #include @@ -345,7 +346,7 @@ bool Snappy_UncompressToIOVec(const char* compressed, size_t compressed_length, string Demangle(const char* mangled) { return mangled; } double NominalCPUFrequency() { - return absl::base_internal::NominalCPUFrequency(); + return tensorflow::profile_utils::CpuUtils::GetCycleCounterFrequency(); } MemoryInfo GetMemoryInfo() { From 6e89b83f8643943c38dd413d1ebcdab28a715be5 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Fri, 10 Jul 2020 20:26:57 +0000 Subject: [PATCH 0198/2522] working gradient! --- tensorflow/core/kernels/map_kernels.h | 40 +++++++++++++++++++ tensorflow/core/kernels/tensor_map.h | 23 +++++++++++ tensorflow/core/kernels/tensor_map_test.cc | 24 +++++++++++ tensorflow/core/ops/map_ops.cc | 9 +++++ .../python/kernel_tests/map_ops_test.py | 4 +- tensorflow/python/ops/list_ops.py | 11 +++++ tensorflow/python/ops/map_ops.py | 14 +++++-- 7 files changed, 120 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/kernels/map_kernels.h b/tensorflow/core/kernels/map_kernels.h index f67b8b6e10a..78040359026 100644 --- a/tensorflow/core/kernels/map_kernels.h +++ b/tensorflow/core/kernels/map_kernels.h @@ -109,6 +109,46 @@ class TensorMapSize : public OpKernel { } }; +class TensorMapZeros : public OpKernel { + public: + explicit TensorMapZeros(OpKernelConstruction* c) : OpKernel(c) { + //OP_REQUIRES_OK(c, c->GetAttr("element_dtype", &element_dtype_)); + } + ~TensorMapZeros() override {} + + void Compute(OpKernelContext* c) override { + std::cout << "hello TensorMapInsert kernel" << std::endl; + const Tensor& temp_key = c->input(1); + const TensorKey key = TensorKey(temp_key); + std::cout << "got key" << std::endl; + const Tensor& value = c->input(2); + std::cout << "got value" << std::endl; + + const TensorMap* m = nullptr; + OP_REQUIRES_OK(c, GetInputMap(c, 0, &m)); + std::cout << "got map" << std::endl; + //TensorMap output_map; + //OP_REQUIRES_OK(c, ForwardInputOrCreateNewMap(c, 0, 0, *m, &output_map)); + //std::cout << "create output" << std::endl; + //output_map = m->Zeros(); + //c->set_output(0, &&output_map); + //std::cout << "inserted" << std::endl; + + Tensor* result; + AllocatorAttributes attr; + attr.set_on_host(true); + OP_REQUIRES_OK(c, c->allocate_output(0, TensorShape{}, &result, attr)); + TensorMap output_map = m->Zeros(); + result->scalar()() = std::move(output_map); + } + + private: + DataType element_dtype_; +}; + +REGISTER_KERNEL_BUILDER(Name("TensorMapZeros").Device(DEVICE_CPU), + TensorMapZeros); + class TensorMapInsert : public OpKernel { public: explicit TensorMapInsert(OpKernelConstruction* c) : OpKernel(c) { diff --git a/tensorflow/core/kernels/tensor_map.h b/tensorflow/core/kernels/tensor_map.h index a5d44550c98..7ab792b4813 100644 --- a/tensorflow/core/kernels/tensor_map.h +++ b/tensorflow/core/kernels/tensor_map.h @@ -143,6 +143,29 @@ class TensorMap { return out; } + TensorMap Zeros() const { + TensorMap out; + out.element_shape = element_shape; + out.element_dtype = element_dtype; + out.max_num_elements = max_num_elements; + // This performs a copy of the absl::hashmap. + absl::flat_hash_map::iterator it = tensors_->values_.begin(); + while(it != tensors_->values_.end()) { + out.tensors_->values_.try_emplace(it->first, Tensor(0)); + it++; + } + return out; + } + std::vector keys() { + std::vector keys(tensors_->values_.size()); + absl::flat_hash_map::iterator it = tensors_->values_.begin(); + while(it != tensors_->values_.end()) { + keys.push_back((Tensor)it->first); + it++; + } + return keys; + } + // Insert key and value if the key does not already exist. // Returns true if the insertion happens. bool insert(const TensorKey& key, const Tensor& value) { diff --git a/tensorflow/core/kernels/tensor_map_test.cc b/tensorflow/core/kernels/tensor_map_test.cc index b93171b4f70..5774a605bbf 100644 --- a/tensorflow/core/kernels/tensor_map_test.cc +++ b/tensorflow/core/kernels/tensor_map_test.cc @@ -136,6 +136,30 @@ TEST(TensorMapTest, EncodeDecode) { EXPECT_EQ(tm.find(k)->first, tmc.find(k)->first); test::ExpectTensorEqual(tm.find(k)->second, tmc.find(k)->second); } + +TEST(TensorMapTest, Keys) { + TensorMap tm; + TensorKey k = Tensor(11); + TensorKey k2 = Tensor(12); + Tensor v = Tensor(22); + tm.insert(k,v); + tm.insert(k2,v); + std::vector keys = tm.keys(); + EXPECT_EQ(1,1); + Tensor t = Tensor(11); + //std::cout << "keys: " << keys[0] << std::endl; + //test::ExpectTensorEqual(keys[0], t); + //test::ExpectTensorEqual(keys[1], k2); +} + +TEST(TensorMapTest, Zeros) { + TensorMap tm; + TensorKey k = Tensor(11); + Tensor v = Tensor(22); + tm.insert(k,v); + TensorMap z = tm.Zeros(); + test::ExpectTensorEqual(z.find(k)->second,Tensor(0)); +} } // namespace } // namespace tensorflow diff --git a/tensorflow/core/ops/map_ops.cc b/tensorflow/core/ops/map_ops.cc index 09183e715ea..d3711755d9e 100644 --- a/tensorflow/core/ops/map_ops.cc +++ b/tensorflow/core/ops/map_ops.cc @@ -39,6 +39,15 @@ REGISTER_OP("TensorMapSize") .Output("size: int32") .SetShapeFn(shape_inference::ScalarShape); +REGISTER_OP("TensorMapZeros") + .Input("input_handle: variant") + .Output("output_handle: variant") + //.Attr("element_dtype: type") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->Scalar()); + return Status::OK(); + }); + REGISTER_OP("TensorMapInsert") .Input("input_handle: variant") .Input("key: element_dtype") diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index 85ca558af4f..9384571dc2b 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -106,7 +106,9 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): m = map_ops.tensor_map_insert(m, k, v) l = map_ops.tensor_map_lookup(m, k) l *= 5 - #print("gradient", tape.gradient(l, v), 2.0) + g= tape.gradient(l,v) + print("gradient",g) + self.assertAllClose(g, 5.0) diff --git a/tensorflow/python/ops/list_ops.py b/tensorflow/python/ops/list_ops.py index 3e7c116ec97..ccd4e6b0494 100644 --- a/tensorflow/python/ops/list_ops.py +++ b/tensorflow/python/ops/list_ops.py @@ -248,6 +248,7 @@ def _TensorListFromTensorGrad(op, dlist): @ops.RegisterGradient("TensorListGetItem") def _TensorListGetItemGrad(op, ditem): """Gradient for TensorListGetItem.""" + print("---GetItemGrad---") list_size = gen_list_ops.tensor_list_length(op.inputs[0]) list_grad = gen_list_ops.tensor_list_set_item( gen_list_ops.tensor_list_reserve( @@ -256,14 +257,21 @@ def _TensorListGetItemGrad(op, ditem): list_size, element_dtype=ditem.dtype), index=op.inputs[1], item=ditem) + print("op inputs", op.inputs) + print("ditem", ditem) + print("list_grad", list_grad) index_grad = None element_shape_grad = None + print("------") return list_grad, index_grad, element_shape_grad @ops.RegisterGradient("TensorListSetItem") def _TensorListSetItemGrad(op, dlist): """Gradient function for TensorListSetItem.""" + print("---SetItemGrad---") + print("op inputs", op.inputs) + print("dlist", dlist) _, index, item = op.inputs list_grad = gen_list_ops.tensor_list_set_item( dlist, index=index, item=array_ops.zeros_like(item)) @@ -273,6 +281,9 @@ def _TensorListSetItemGrad(op, dlist): index, element_shape=array_ops.shape(item), element_dtype=item.dtype) + print("list_grad", list_grad) + print("value_grad", element_grad) + print("------") return list_grad, index_grad, element_grad diff --git a/tensorflow/python/ops/map_ops.py b/tensorflow/python/ops/map_ops.py index 5cb045b5406..44894a8b6d9 100644 --- a/tensorflow/python/ops/map_ops.py +++ b/tensorflow/python/ops/map_ops.py @@ -25,6 +25,8 @@ from tensorflow.python.platform import resource_loader from tensorflow.python.framework import ops from tensorflow.python.ops import gen_map_ops from tensorflow.python.ops.gen_map_ops import * +from tensorflow.python.framework import constant_op + #zero_out_ops = load_library.load_op_library( # resource_loader.get_path_to_datafile('_zero_out_ops.so')) @@ -53,20 +55,24 @@ def tensor_map_erase(input_handle, key): def tensor_map_replace(input_handle, key, value): return gen_map_ops.tensor_map_replace(input_handle, key, value) + @ops.RegisterGradient("TensorMapLookup") def LookupGrad(op, dval): - map_grad = None - key_grad = None + # map grad should be a map that is 0 everywhere except 1 @key k + m, k = op.inputs + #m = gen_map_ops.tensor_map_zeros(m) + map_grad = tensor_map_replace(m, k, dval) key = op.inputs[1] - value_grad = tensor_map_lookup(dmap, key) + key_grad = None return map_grad, key_grad @ops.RegisterGradient("TensorMapInsert") def InsertGrad(op, dmap): + _, key, val = op.inputs map_grad, _ = gen_map_ops.tensor_map_erase(dmap, key) key_grad = None - key = op.inputs[1] value_grad = tensor_map_lookup(dmap, key) + #value_grad = constant_op.constant(1.0) return map_grad, key_grad, value_grad def zero_out(to_zero): From 9f7b49a56a08be1f161d3ac58fd45eb4353422d0 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Sat, 11 Jul 2020 00:26:13 +0200 Subject: [PATCH 0199/2522] Fix deprecated usage of collections ABC --- tensorflow/python/compiler/xla/xla.py | 14 +++++++------- tensorflow/python/data/ops/dataset_ops.py | 4 ++-- tensorflow/python/data/ops/iterator_ops.py | 4 ++-- tensorflow/python/data/util/structure.py | 2 +- tensorflow/python/debug/wrappers/framework.py | 4 ++-- tensorflow/python/distribute/input_lib.py | 6 +++--- tensorflow/python/framework/indexed_slices.py | 3 ++- tensorflow/python/keras/engine/training_utils.py | 9 ++++----- .../preprocessing/preprocessing_test_utils.py | 5 ++--- tensorflow/python/keras/layers/recurrent.py | 6 +----- tensorflow/python/ops/math_ops.py | 9 ++++----- tensorflow/python/ops/nn_ops.py | 7 +++---- tensorflow/python/ops/variable_scope.py | 12 ++++++------ tensorflow/python/tools/saved_model_cli.py | 4 ++-- 14 files changed, 41 insertions(+), 48 deletions(-) diff --git a/tensorflow/python/compiler/xla/xla.py b/tensorflow/python/compiler/xla/xla.py index 5b19dc4ec5f..b68640f9b42 100644 --- a/tensorflow/python/compiler/xla/xla.py +++ b/tensorflow/python/compiler/xla/xla.py @@ -18,7 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import collections import contextlib from six.moves import xrange # pylint: disable=redefined-builtin @@ -37,6 +36,7 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import compat from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect +from tensorflow.python.util.compat import collections_abc from tensorflow.python.util.tf_export import tf_export _XLA_COMPILE_ATTR = '_xla_compile_id' @@ -329,7 +329,7 @@ def _compile_internal(computation, inputs=None): if inputs is None: inputs = [] - if not isinstance(inputs, collections.Sequence): + if not isinstance(inputs, collections_abc.Sequence): raise TypeError('inputs must be a list') # Flatten inputs. @@ -428,15 +428,15 @@ def is_flat(outputs): """ # If outputs is a list or tuple, check if it has any nested structure. If # there is, then outputs is non-flat. - if isinstance(outputs, collections.Sequence): + if isinstance(outputs, collections_abc.Sequence): for o in outputs: - if (isinstance(o, collections.Sequence) or - isinstance(o, collections.Mapping) or + if (isinstance(o, collections_abc.Sequence) or + isinstance(o, collections_abc.Mapping) or hasattr(o.__class__, '__attrs_attrs__')): return False # If outputs is a dict, it is non-flat. - if isinstance(outputs, collections.Mapping): + if isinstance(outputs, collections_abc.Mapping): return False # If outputs is from the attrs library, it is non-flat. @@ -467,7 +467,7 @@ def _postprocess_flat_outputs(outputs): if outputs is None: outputs = tuple() # If the computation only returned one value, make it a tuple. - if not isinstance(outputs, collections.Sequence): + if not isinstance(outputs, collections_abc.Sequence): outputs = (outputs,) # Append `no_op` here so that return value of this function always contains diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 11b82933595..2aeacda42bd 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -18,7 +18,6 @@ from __future__ import division from __future__ import print_function import abc -import collections import functools import sys import threading @@ -72,6 +71,7 @@ from tensorflow.python.util import deprecation from tensorflow.python.util import function_utils from tensorflow.python.util import lazy_loader from tensorflow.python.util import nest as tf_nest +from tensorflow.python.util.compat import collections_abc from tensorflow.python.util.tf_export import tf_export # Loaded lazily due to a circular dependency (roughly @@ -103,7 +103,7 @@ tf_export("data.UNKNOWN_CARDINALITY").export_constant(__name__, "UNKNOWN") @tf_export("data.Dataset", v1=[]) @six.add_metaclass(abc.ABCMeta) -class DatasetV2(collections.Iterable, tracking_base.Trackable, +class DatasetV2(collections_abc.Iterable, tracking_base.Trackable, composite_tensor.CompositeTensor): """Represents a potentially large set of elements. diff --git a/tensorflow/python/data/ops/iterator_ops.py b/tensorflow/python/data/ops/iterator_ops.py index 462711eef1e..91dfa99e40b 100644 --- a/tensorflow/python/data/ops/iterator_ops.py +++ b/tensorflow/python/data/ops/iterator_ops.py @@ -18,7 +18,6 @@ from __future__ import division from __future__ import print_function import abc -import collections import threading import warnings @@ -41,6 +40,7 @@ from tensorflow.python.ops import gen_experimental_dataset_ops from tensorflow.python.training.saver import BaseSaverBuilder from tensorflow.python.training.tracking import base as trackable from tensorflow.python.util import deprecation +from tensorflow.python.util.compat import collections_abc from tensorflow.python.util.tf_export import tf_export @@ -545,7 +545,7 @@ class IteratorResourceDeleter(object): @tf_export("data.Iterator", v1=[]) @six.add_metaclass(abc.ABCMeta) -class IteratorBase(collections.Iterator, trackable.Trackable, +class IteratorBase(collections_abc.Iterator, trackable.Trackable, composite_tensor.CompositeTensor): """Represents an iterator of a `tf.data.Dataset`. diff --git a/tensorflow/python/data/util/structure.py b/tensorflow/python/data/util/structure.py index 87825005069..30e393c82de 100644 --- a/tensorflow/python/data/util/structure.py +++ b/tensorflow/python/data/util/structure.py @@ -440,7 +440,7 @@ def type_spec_from_value(element, use_fallback=True): if isinstance(element, tuple): if hasattr(element, "_fields") and isinstance( - element._fields, collections.Sequence) and all( + element._fields, collections_abc.Sequence) and all( isinstance(f, six.string_types) for f in element._fields): if isinstance(element, wrapt.ObjectProxy): element_type = type(element.__wrapped__) diff --git a/tensorflow/python/debug/wrappers/framework.py b/tensorflow/python/debug/wrappers/framework.py index 4fc1e33d130..e6767cca804 100644 --- a/tensorflow/python/debug/wrappers/framework.py +++ b/tensorflow/python/debug/wrappers/framework.py @@ -99,7 +99,6 @@ from __future__ import division from __future__ import print_function import abc -import collections import re import threading @@ -113,6 +112,7 @@ from tensorflow.python.framework import ops from tensorflow.python.platform import tf_logging from tensorflow.python.training import monitored_session from tensorflow.python.util import nest +from tensorflow.python.util.compat import collections_abc # Helper function. @@ -445,7 +445,7 @@ class BaseDebugWrapperSession(session.SessionInterface): """Check whether a possibly nested structure is empty.""" if not nest.is_nested(x): return False - if isinstance(x, collections.Mapping): + if isinstance(x, collections_abc.Mapping): return is_empty(list(x.values())) for item in x: if not is_empty(item): diff --git a/tensorflow/python/distribute/input_lib.py b/tensorflow/python/distribute/input_lib.py index dc1eeb38f8e..387836c0d77 100644 --- a/tensorflow/python/distribute/input_lib.py +++ b/tensorflow/python/distribute/input_lib.py @@ -18,7 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import collections import functools import sys @@ -53,6 +52,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.types import distribute as distribute_types from tensorflow.python.util import nest +from tensorflow.python.util.compat import collections_abc from tensorflow.python.util.deprecation import deprecated from tensorflow.python.util.tf_export import tf_export from tensorflow.tools.docs import doc_controls @@ -143,7 +143,7 @@ def get_distributed_datasets_from_function(dataset_fn, @tf_export("distribute.DistributedIterator", v1=[]) -class DistributedIteratorInterface(collections.Iterator, +class DistributedIteratorInterface(collections_abc.Iterator, distribute_types.Iterator): """An iterator over `tf.distribute.DistributedDataset`. @@ -251,7 +251,7 @@ class DistributedIteratorInterface(collections.Iterator, @tf_export("distribute.DistributedDataset", v1=[]) -class DistributedDatasetInterface(collections.Iterable, +class DistributedDatasetInterface(collections_abc.Iterable, distribute_types.Iterable): # pylint: disable=line-too-long """Represents a dataset distributed among devices and machines. diff --git a/tensorflow/python/framework/indexed_slices.py b/tensorflow/python/framework/indexed_slices.py index 6ddf9410fd7..45f6e254b0e 100644 --- a/tensorflow/python/framework/indexed_slices.py +++ b/tensorflow/python/framework/indexed_slices.py @@ -32,6 +32,7 @@ from tensorflow.python.framework import tensor_conversion_registry from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import type_spec from tensorflow.python.types import internal +from tensorflow.python.util.compat import collections_abc from tensorflow.python.util.lazy_loader import LazyLoader from tensorflow.python.util.tf_export import tf_export @@ -344,7 +345,7 @@ def internal_convert_n_to_tensor_or_indexed_slices(values, RuntimeError: If a registered conversion function returns an invalid value. """ - if not isinstance(values, collections.Iterable): + if not isinstance(values, collections_abc.Iterable): raise TypeError("values must be iterable.") ret = [] for i, value in enumerate(values): diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py index 3e0735ceec4..de686f2cb61 100644 --- a/tensorflow/python/keras/engine/training_utils.py +++ b/tensorflow/python/keras/engine/training_utils.py @@ -19,7 +19,6 @@ from __future__ import print_function import abc import atexit -import collections from collections import OrderedDict import functools import multiprocessing.pool @@ -616,7 +615,7 @@ def standardize_sample_or_class_weights(x_weight, output_names, weight_type): 'You should provide one `' + weight_type + '`' 'array per model output.') return x_weight - if isinstance(x_weight, collections.Mapping): + if isinstance(x_weight, collections_abc.Mapping): generic_utils.check_for_unexpected_keys(weight_type, x_weight, output_names) x_weights = [] for name in output_names: @@ -863,7 +862,7 @@ def collect_per_output_metric_info(metrics, [metrics_module.clone_metric(m) for m in metrics]) else: nested_metrics = [metrics] - elif isinstance(metrics, collections.Mapping): + elif isinstance(metrics, collections_abc.Mapping): generic_utils.check_for_unexpected_keys('metrics', metrics, output_names) nested_metrics = [] for name in output_names: @@ -1442,7 +1441,7 @@ def prepare_sample_weight_modes(training_endpoints, sample_weight_mode): ValueError: In case of invalid `sample_weight_mode` input. """ - if isinstance(sample_weight_mode, collections.Mapping): + if isinstance(sample_weight_mode, collections_abc.Mapping): generic_utils.check_for_unexpected_keys( 'sample_weight_mode', sample_weight_mode, [e.output_name for e in training_endpoints]) @@ -1535,7 +1534,7 @@ def prepare_loss_weights(training_endpoints, loss_weights=None): if loss_weights is None: for e in training_endpoints: e.loss_weight = 1. - elif isinstance(loss_weights, collections.Mapping): + elif isinstance(loss_weights, collections_abc.Mapping): generic_utils.check_for_unexpected_keys( 'loss_weights', loss_weights, [e.output_name for e in training_endpoints]) diff --git a/tensorflow/python/keras/layers/preprocessing/preprocessing_test_utils.py b/tensorflow/python/keras/layers/preprocessing/preprocessing_test_utils.py index 006cab1fb11..91545b8ee28 100644 --- a/tensorflow/python/keras/layers/preprocessing/preprocessing_test_utils.py +++ b/tensorflow/python/keras/layers/preprocessing/preprocessing_test_utils.py @@ -18,11 +18,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import collections - import numpy as np from tensorflow.python.platform import test +from tensorflow.python.util.compat import collections_abc class PreprocessingLayerTest(test.TestCase): @@ -38,7 +37,7 @@ class PreprocessingLayerTest(test.TestCase): self.assertEqual(len(a), len(b)) for a_value, b_value in zip(a, b): self.assertAllCloseOrEqual(a_value, b_value, msg=msg) - elif isinstance(a, collections.Mapping): + elif isinstance(a, collections_abc.Mapping): self.assertEqual(len(a), len(b)) for key, a_value in a.items(): b_value = b[key] diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py index 2760509cd72..64bef8eaeac 100644 --- a/tensorflow/python/keras/layers/recurrent.py +++ b/tensorflow/python/keras/layers/recurrent.py @@ -45,14 +45,10 @@ from tensorflow.python.training.tracking import base as trackable from tensorflow.python.training.tracking import data_structures from tensorflow.python.util import deprecation from tensorflow.python.util import nest +from tensorflow.python.util.compat import collections_abc from tensorflow.python.util.tf_export import keras_export from tensorflow.tools.docs import doc_controls -try: - from collections import abc as collections_abc # pylint: disable=g-import-not-at-top -except ImportError: # For Python 2 - import collections as collections_abc # pylint: disable=g-import-not-at-top - RECURRENT_DROPOUT_WARNING_MSG = ( 'RNN `implementation=2` is not supported when `recurrent_dropout` is set. ' diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index d8f309a29a7..83986336c63 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -70,8 +70,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import collections - import numpy as np import six from six.moves import builtins @@ -100,6 +98,7 @@ from tensorflow.python.util import compat from tensorflow.python.util import deprecation from tensorflow.python.util import dispatch from tensorflow.python.util import nest +from tensorflow.python.util.compat import collections_abc from tensorflow.python.util.tf_export import tf_export # Aliases for some automatically-generated names. @@ -3490,7 +3489,7 @@ def add_n(inputs, name=None): ValueError: If `inputs` don't all have same shape and dtype or the shape cannot be inferred. """ - if not inputs or not isinstance(inputs, collections.Iterable): + if not inputs or not isinstance(inputs, collections_abc.Iterable): raise ValueError("inputs must be an iterable of at least one " "Tensor/IndexedSlices with the same dtype and shape") inputs = ops.convert_n_to_tensor_or_indexed_slices(inputs) @@ -3623,9 +3622,9 @@ def sigmoid(x, name=None): Returns: A Tensor with the same type as `x`. - + Usage Example: - + >>> x = tf.constant([-128.0, 0.0, 128.0], dtype=tf.float32) >>> tf.sigmoid(x) = 1. Specifies the output stride. Defaults to [1]*N. If any value of strides is > 1, then all values of @@ -3273,7 +3272,7 @@ def conv_transpose(input, # pylint: disable=redefined-builtin [input, filter, output_shape]) as name: if tensor_util.is_tensor(output_shape): n = output_shape.shape[0] - 2 - elif isinstance(output_shape, collections.Sized): + elif isinstance(output_shape, collections_abc.Sized): n = len(output_shape) - 2 else: raise ValueError("output_shape must be a tensor or sized collection.") diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index a3c28112350..6e0e83f8564 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -18,7 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import collections as collections_lib import copy import enum # pylint: disable=g-bad-import-order import functools @@ -47,6 +46,7 @@ from tensorflow.python.util import deprecation from tensorflow.python.util import function_utils from tensorflow.python.util import tf_contextlib from tensorflow.python.util import tf_inspect +from tensorflow.python.util.compat import collections_abc from tensorflow.python.util.tf_export import tf_export __all__ = [ @@ -79,13 +79,13 @@ class _PartitionInfo(object): ValueError: If `full_shape` or `var_offset` differ in length. If `var_offset` exceeds `full_shape` in any dimension. """ - if not isinstance(full_shape, collections_lib.Sequence) or isinstance( + if not isinstance(full_shape, collections_abc.Sequence) or isinstance( full_shape, six.string_types): raise TypeError( "`full_shape` must be a sequence (like tuple or list) instead of " + type(full_shape).__name__) - if not isinstance(var_offset, collections_lib.Sequence) or isinstance( + if not isinstance(var_offset, collections_abc.Sequence) or isinstance( var_offset, six.string_types): raise TypeError( "`var_offset` must be a sequence (like tuple or list) instead of " + @@ -153,7 +153,7 @@ class _PartitionInfo(object): ValueError: If `shape` is not the same length as `self.full_shape`. If the variable is partitioned in more than one dimension. """ - if not isinstance(shape, collections_lib.Sequence) or isinstance( + if not isinstance(shape, collections_abc.Sequence) or isinstance( shape, six.string_types): raise TypeError( "`shape` must be a sequence (like tuple or list) instead of " + @@ -455,7 +455,7 @@ class _VariableStore(object): synchronization=VariableSynchronization.AUTO, aggregation=VariableAggregation.NONE): is_scalar = ( - shape is not None and isinstance(shape, collections_lib.Sequence) and + shape is not None and isinstance(shape, collections_abc.Sequence) and not shape) # Partitioned variable case if partitioner is not None and not is_scalar: @@ -2515,7 +2515,7 @@ def _call_partitioner(partitioner, shape, dtype): "shape: %s" % shape) slicing = partitioner(shape=shape, dtype=dtype) - if not isinstance(slicing, collections_lib.Sequence): + if not isinstance(slicing, collections_abc.Sequence): raise ValueError("Partitioner must return a sequence, but saw: %s" % slicing) if len(slicing) != shape.ndims: diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py index 0f8f68436a3..415220ad14e 100644 --- a/tensorflow/python/tools/saved_model_cli.py +++ b/tensorflow/python/tools/saved_model_cli.py @@ -24,7 +24,6 @@ from __future__ import division from __future__ import print_function import argparse -import collections import os import re import sys @@ -51,6 +50,7 @@ from tensorflow.python.saved_model import signature_constants from tensorflow.python.tools import saved_model_aot_compile from tensorflow.python.tools import saved_model_utils from tensorflow.python.tpu import tpu +from tensorflow.python.util.compat import collections_abc _XLA_DEBUG_OPTIONS_URL = ( @@ -241,7 +241,7 @@ def _print_args(arguments, argument_type='Argument', indent=0): in_print(' %s' % element) elif isinstance(element, tensor_spec.TensorSpec): print((indent + 1) * ' ' + '%s: %s' % (element.name, repr(element))) - elif (isinstance(element, collections.Iterable) and + elif (isinstance(element, collections_abc.Iterable) and not isinstance(element, dict)): in_print(' DType: %s' % type(element).__name__) in_print(' Value: [', end='') From eebe5c6cf4849d749b2c7ced9d0fe529c166331b Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Fri, 10 Jul 2020 21:54:17 +0000 Subject: [PATCH 0200/2522] cleaned includes and BUILD file --- tensorflow/c/kernels/BUILD | 26 +------------------------ tensorflow/c/kernels/ops/summary.cc | 3 +-- tensorflow/c/kernels/summary_op.cc | 19 +++++++----------- tensorflow/c/kernels/summary_op_test.cc | 8 +------- tensorflow/c/tf_tensor.cc | 2 +- 5 files changed, 11 insertions(+), 47 deletions(-) diff --git a/tensorflow/c/kernels/BUILD b/tensorflow/c/kernels/BUILD index 309dc7e221b..b713b27f5dc 100644 --- a/tensorflow/c/kernels/BUILD +++ b/tensorflow/c/kernels/BUILD @@ -29,12 +29,8 @@ tf_kernel_library( prefix = "summary_op", deps = [ "//tensorflow/c:kernels", - "//tensorflow/c:ops", - "//tensorflow/c:tf_datatype", - "//tensorflow/c:tf_status", "//tensorflow/c:tf_tensor", "//tensorflow/core:framework", - "//tensorflow/core:lib", ], ) @@ -54,9 +50,6 @@ tf_gen_op_libs( op_lib_names = ["summary"], deps = [ "//tensorflow/c:ops", - "//tensorflow/c:tf_datatype", - "//tensorflow/c:tf_status", - "//tensorflow/c:tf_tensor", "//tensorflow/core:lib", ], ) @@ -80,28 +73,11 @@ tf_cc_test( deps = [ ":summary_op", ":summary_op_lib", - "//tensorflow/core:framework", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", "//tensorflow/core:test_main", - "//tensorflow/core:testlib", + "//tensorflow/core:testlib" ], ) -tf_cc_test( - name = "summary_op_benchmark_test", - srcs = ["summary_op_benchmark_test.cc"], - deps = [ - "summary_op", - "summary_op_lib", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) # Changes to the Android srcs here should be replicated in # tensorflow/contrib/makefile/tf_op_files.txt. # diff --git a/tensorflow/c/kernels/ops/summary.cc b/tensorflow/c/kernels/ops/summary.cc index 857ff6f29fa..355d73396b6 100644 --- a/tensorflow/c/kernels/ops/summary.cc +++ b/tensorflow/c/kernels/ops/summary.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,7 +16,6 @@ limitations under the License. #include "tensorflow/c/ops.h" #include "tensorflow/core/framework/selective_registration.h" #include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/macros.h" static void scalar_summary_shape_inference_fn(TF_ShapeInferenceContext* ctx, TF_Status* status) { diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index cd2509247da..18aa897bfa9 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -1,5 +1,5 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,16 +17,9 @@ limitations under the License. #include #include "tensorflow/c/kernels.h" -#include "tensorflow/c/ops.h" #include "tensorflow/c/tf_tensor.h" -#include "tensorflow/core/framework/common_shape_fns.h" -#include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/selective_registration.h" -#include "tensorflow/core/framework/shape_inference.h" -#include "tensorflow/core/platform/macros.h" #include "tensorflow/core/framework/summary.pb.h" -#include "tensorflow/core/platform/protobuf.h" -#include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/types.h" // Struct that stores the status and TF_Tensor inputs to the opkernel. @@ -41,7 +34,7 @@ typedef struct Params{ if (TF_GetCode(status) == TF_OK){ TF_GetInput(ctx, 1, &values, status); } - else{ + else { values = nullptr; } }; @@ -57,8 +50,7 @@ typedef struct Params{ // dummy functions used for kernel registration static void* SummaryScalarOp_Create(TF_OpKernelConstruction* ctx) { - void* ptr; - return ptr; + return nullptr; } static void SummaryScalarOp_Delete(void* kernel) { @@ -162,10 +154,13 @@ void RegisterSummaryScalarOpKernel() { // register the bitcast kernel. TF_ATTRIBUTE_UNUSED static bool IsSummaryScalarOpKernelRegistered = []() { if (SHOULD_REGISTER_OP_KERNEL("SummaryScalar")) { - RegisterSummaryScalarOpKernel(); + RegisterSummaryScalarOpKernel(); + RegisterSummaryScalarOpKernel(); RegisterSummaryScalarOpKernel(); + RegisterSummaryScalarOpKernel(); RegisterSummaryScalarOpKernel(); RegisterSummaryScalarOpKernel(); + RegisterSummaryScalarOpKernel(); RegisterSummaryScalarOpKernel(); RegisterSummaryScalarOpKernel(); RegisterSummaryScalarOpKernel(); diff --git a/tensorflow/c/kernels/summary_op_test.cc b/tensorflow/c/kernels/summary_op_test.cc index 42f7a7ff3b3..722373d36ce 100644 --- a/tensorflow/c/kernels/summary_op_test.cc +++ b/tensorflow/c/kernels/summary_op_test.cc @@ -1,4 +1,4 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,16 +15,10 @@ limitations under the License. #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/attr_value_util.h" -#include "tensorflow/core/framework/fake_input.h" #include "tensorflow/core/framework/node_def.pb.h" -#include "tensorflow/core/framework/node_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/shape_inference.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/framework/summary.pb.h" -#include "tensorflow/core/platform/protobuf.h" -#include "tensorflow/c/tf_tensor.h" -#include "tensorflow/c/tf_tensor_internal.h" namespace tensorflow { namespace { diff --git a/tensorflow/c/tf_tensor.cc b/tensorflow/c/tf_tensor.cc index aa65cb7c927..b4b8c772341 100644 --- a/tensorflow/c/tf_tensor.cc +++ b/tensorflow/c/tf_tensor.cc @@ -17,6 +17,7 @@ limitations under the License. #include #include +#include #include "tensorflow/c/tf_status.h" #include "tensorflow/c/tf_status_helper.h" @@ -28,7 +29,6 @@ limitations under the License. #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/lib/core/coding.h" #include "tensorflow/core/platform/casts.h" -#include using tensorflow::Status; using tensorflow::Tensor; From ca77c23bdaeb2d2ecd6b6a125a5520734b02d5bf Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Sat, 11 Jul 2020 00:49:50 +0200 Subject: [PATCH 0201/2522] Implement __next__ instead of next --- tensorflow/python/data/ops/dataset_ops.py | 6 +++--- tensorflow/python/data/ops/iterator_ops.py | 6 +++--- tensorflow/python/data/ops/multi_device_iterator_ops.py | 6 +++--- tensorflow/python/keras/callbacks.py | 2 +- tensorflow/python/keras/utils/data_utils.py | 6 +++--- tensorflow/python/lib/io/file_io.py | 6 +++--- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 11b82933595..eac8e13c760 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -3778,11 +3778,11 @@ class _NumpyIterator(object): def __iter__(self): return self - def next(self): + def __next__(self): return nest.map_structure(lambda x: x.numpy(), next(self._iterator)) - def __next__(self): - return self.next() + def next(self): + return self.__next__() class _VariantTracker(tracking.CapturableResource): diff --git a/tensorflow/python/data/ops/iterator_ops.py b/tensorflow/python/data/ops/iterator_ops.py index 462711eef1e..eb2900f3299 100644 --- a/tensorflow/python/data/ops/iterator_ops.py +++ b/tensorflow/python/data/ops/iterator_ops.py @@ -734,8 +734,8 @@ class OwnedIterator(IteratorBase): def __iter__(self): return self - def __next__(self): # For Python 3 compatibility - return self.next() + def next(self): # For Python 2 compatibility + return self.__next__() def _next_internal(self): if not context.executing_eagerly(): @@ -769,7 +769,7 @@ class OwnedIterator(IteratorBase): def _type_spec(self): return IteratorSpec(self.element_spec) - def next(self): + def __next__(self): try: return self._next_internal() except errors.OutOfRangeError: diff --git a/tensorflow/python/data/ops/multi_device_iterator_ops.py b/tensorflow/python/data/ops/multi_device_iterator_ops.py index cb5329650e3..7fa49a13fe6 100644 --- a/tensorflow/python/data/ops/multi_device_iterator_ops.py +++ b/tensorflow/python/data/ops/multi_device_iterator_ops.py @@ -589,10 +589,10 @@ class OwnedMultiDeviceIterator(composite_tensor.CompositeTensor): def __iter__(self): return self - def __next__(self): - return self.next() - def next(self): + return self.__next__() + + def __next__(self): try: return self.get_next() except errors.OutOfRangeError: diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py index 74b0300c648..131bc152e51 100644 --- a/tensorflow/python/keras/callbacks.py +++ b/tensorflow/python/keras/callbacks.py @@ -180,7 +180,7 @@ def set_callback_parameters(callback_list, def _is_generator_like(data): """Checks if data is a generator, Sequence, or Iterator.""" - return (hasattr(data, 'next') or hasattr(data, '__next__') or isinstance( + return (hasattr(data, '__next__') or hasattr(data, 'next') or isinstance( data, (Sequence, iterator_ops.Iterator, iterator_ops.OwnedIterator))) diff --git a/tensorflow/python/keras/utils/data_utils.py b/tensorflow/python/keras/utils/data_utils.py index 3456db013d3..7eb0b63aebd 100644 --- a/tensorflow/python/keras/utils/data_utils.py +++ b/tensorflow/python/keras/utils/data_utils.py @@ -384,10 +384,10 @@ class ThreadsafeIter(object): def __iter__(self): return self - def __next__(self): - return self.next() - def next(self): + return self.__next__() + + def __next__(self): with self.lock: if self._exception: raise self._exception # pylint: disable=raising-bad-type diff --git a/tensorflow/python/lib/io/file_io.py b/tensorflow/python/lib/io/file_io.py index dbf04097fce..fb4e19da902 100644 --- a/tensorflow/python/lib/io/file_io.py +++ b/tensorflow/python/lib/io/file_io.py @@ -202,14 +202,14 @@ class FileIO(object): def __iter__(self): return self - def next(self): + def __next__(self): retval = self.readline() if not retval: raise StopIteration() return retval - def __next__(self): - return self.next() + def next(self): + return self.__next__() def flush(self): """Flushes the Writable file. From 3ed2b8a9a61caf122f6182160a24e8ee0a6e2bdb Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Sat, 11 Jul 2020 00:50:10 +0200 Subject: [PATCH 0202/2522] Implement __bool__ instead of __nonzero__ --- tensorflow/python/ops/numpy_ops/np_arrays.py | 6 +++--- tensorflow/python/training/server_lib.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/ops/numpy_ops/np_arrays.py b/tensorflow/python/ops/numpy_ops/np_arrays.py index a8be2c9f47b..9a859047843 100644 --- a/tensorflow/python/ops/numpy_ops/np_arrays.py +++ b/tensorflow/python/ops/numpy_ops/np_arrays.py @@ -263,11 +263,11 @@ class ndarray(composite_tensor.CompositeTensor): def __float__(self): return float(self.data) - def __nonzero__(self): + def __bool__(self): return bool(self.data) - def __bool__(self): - return self.__nonzero__() + def __nonzero__(self): + return self.__bool__() def __iter__(self): if not isinstance(self.data, ops.EagerTensor): diff --git a/tensorflow/python/training/server_lib.py b/tensorflow/python/training/server_lib.py index be7a9f62d4f..b811319930e 100644 --- a/tensorflow/python/training/server_lib.py +++ b/tensorflow/python/training/server_lib.py @@ -319,11 +319,11 @@ class ClusterSpec(object): "job names to lists of network addresses, or a " "`ClusterDef` protocol buffer") - def __nonzero__(self): + def __bool__(self): return bool(self._cluster_spec) - # Python 3.x - __bool__ = __nonzero__ + # Python 2.x + __nonzero__ = __bool__ def __eq__(self, other): return self._cluster_spec == other From 98db6af79d723113e8edad839aa3f8aa163a7076 Mon Sep 17 00:00:00 2001 From: Marat Dukhan Date: Fri, 10 Jul 2020 17:09:34 -0700 Subject: [PATCH 0203/2522] Support 1x1 Max/Average Pooling with 1x1 stride in XNNPACK delegate PiperOrigin-RevId: 320703905 Change-Id: Id9787ba836b7d50f34dcad82033553d0e70b1ac9 --- tensorflow/lite/delegates/xnnpack/README.md | 4 +- .../delegates/xnnpack/average_pool_2d_test.cc | 54 +++++++++++++ .../delegates/xnnpack/max_pool_2d_test.cc | 54 +++++++++++++ .../delegates/xnnpack/xnnpack_delegate.cc | 78 ++++++++++++------- 4 files changed, 159 insertions(+), 31 deletions(-) diff --git a/tensorflow/lite/delegates/xnnpack/README.md b/tensorflow/lite/delegates/xnnpack/README.md index 88b0729ed62..47c5d7db907 100644 --- a/tensorflow/lite/delegates/xnnpack/README.md +++ b/tensorflow/lite/delegates/xnnpack/README.md @@ -100,7 +100,7 @@ Below is the list of current operators and limitations: ### `AVERAGE_POOL_2D` * Inputs and outputs must be in 32-bit floating-point format. -* 1x1 pooling is not supported. +* 1x1 pooling with non-unit stride is not supported. * Fused `NONE`, `RELU`, `RELU_N1_TO_1`, and `RELU6` activations are supported, but fused `TANH` and `SIGN_BIT` activations are not. @@ -157,7 +157,7 @@ Below is the list of current operators and limitations: ### `MAX_POOL_2D` * Inputs and outputs must be in 32-bit floating-point format. -* 1x1 pooling is not supported. +* 1x1 pooling with non-unit stride is not supported. * Fused `NONE`, `RELU`, `RELU_N1_TO_1`, and `RELU6` activations are supported, but fused `TANH` and `SIGN_BIT` activations are not. diff --git a/tensorflow/lite/delegates/xnnpack/average_pool_2d_test.cc b/tensorflow/lite/delegates/xnnpack/average_pool_2d_test.cc index 515fec8083f..9cfaf8e432e 100644 --- a/tensorflow/lite/delegates/xnnpack/average_pool_2d_test.cc +++ b/tensorflow/lite/delegates/xnnpack/average_pool_2d_test.cc @@ -25,6 +25,60 @@ limitations under the License. namespace tflite { namespace xnnpack { +TEST(AveragePool2D, UnitPoolSamePadding) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto batch_rng = + std::bind(std::uniform_int_distribution(2, 4), std::ref(rng)); + auto input_rng = + std::bind(std::uniform_int_distribution(10, 25), std::ref(rng)); + auto channel_rng = + std::bind(std::uniform_int_distribution(5, 16), std::ref(rng)); + + Pool2DTester() + .BatchSize(batch_rng()) + .InputHeight(input_rng()) + .InputWidth(input_rng()) + .Channels(channel_rng()) + .PoolingHeight(1) + .PoolingWidth(1) + .StrideHeight(1) + .StrideWidth(1) + .SamePadding() + .Test(BuiltinOperator_AVERAGE_POOL_2D, xnnpack_delegate.get()); +} + +TEST(AveragePool2D, UnitPoolValidPadding) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto batch_rng = + std::bind(std::uniform_int_distribution(2, 4), std::ref(rng)); + auto input_rng = + std::bind(std::uniform_int_distribution(10, 25), std::ref(rng)); + auto channel_rng = + std::bind(std::uniform_int_distribution(5, 16), std::ref(rng)); + + Pool2DTester() + .BatchSize(batch_rng()) + .InputHeight(input_rng()) + .InputWidth(input_rng()) + .Channels(channel_rng()) + .PoolingHeight(1) + .PoolingWidth(1) + .StrideHeight(1) + .StrideWidth(1) + .ValidPadding() + .Test(BuiltinOperator_AVERAGE_POOL_2D, xnnpack_delegate.get()); +} + TEST(AveragePool2D, EqualPoolAndStrideWithSamePadding) { std::unique_ptr xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), diff --git a/tensorflow/lite/delegates/xnnpack/max_pool_2d_test.cc b/tensorflow/lite/delegates/xnnpack/max_pool_2d_test.cc index aaf217800d8..a9651ad1fc5 100644 --- a/tensorflow/lite/delegates/xnnpack/max_pool_2d_test.cc +++ b/tensorflow/lite/delegates/xnnpack/max_pool_2d_test.cc @@ -25,6 +25,60 @@ limitations under the License. namespace tflite { namespace xnnpack { +TEST(MaxPool2D, UnitPoolSamePadding) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto batch_rng = + std::bind(std::uniform_int_distribution(2, 4), std::ref(rng)); + auto input_rng = + std::bind(std::uniform_int_distribution(10, 25), std::ref(rng)); + auto channel_rng = + std::bind(std::uniform_int_distribution(5, 16), std::ref(rng)); + + Pool2DTester() + .BatchSize(batch_rng()) + .InputHeight(input_rng()) + .InputWidth(input_rng()) + .Channels(channel_rng()) + .PoolingHeight(1) + .PoolingWidth(1) + .StrideHeight(1) + .StrideWidth(1) + .SamePadding() + .Test(BuiltinOperator_MAX_POOL_2D, xnnpack_delegate.get()); +} + +TEST(MaxPool2D, UnitPoolValidPadding) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto batch_rng = + std::bind(std::uniform_int_distribution(2, 4), std::ref(rng)); + auto input_rng = + std::bind(std::uniform_int_distribution(10, 25), std::ref(rng)); + auto channel_rng = + std::bind(std::uniform_int_distribution(5, 16), std::ref(rng)); + + Pool2DTester() + .BatchSize(batch_rng()) + .InputHeight(input_rng()) + .InputWidth(input_rng()) + .Channels(channel_rng()) + .PoolingHeight(1) + .PoolingWidth(1) + .StrideHeight(1) + .StrideWidth(1) + .ValidPadding() + .Test(BuiltinOperator_MAX_POOL_2D, xnnpack_delegate.get()); +} + TEST(MaxPool2D, EqualPoolAndStrideWithSamePadding) { std::unique_ptr xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), diff --git a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc index 535c6a9ef1e..c7aea59b231 100644 --- a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc +++ b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc @@ -570,8 +570,13 @@ class Subgraph { params->filter_height, node_index); return kTfLiteError; } - if (params->filter_width == 1 && params->filter_height == 1) { - TF_LITE_MAYBE_KERNEL_LOG(context, "meaningless 1x1 pooling in node #%d", + + if (params->filter_width == 1 && params->filter_height == 1 && + std::max(params->stride_width, params->stride_height) > 1) { + TF_LITE_MAYBE_KERNEL_LOG(context, + "unsupported pooling with 1x1 filter " + "and %dx%d stride in node #%d", + params->stride_width, params->stride_height, node_index); return kTfLiteError; } @@ -1105,19 +1110,27 @@ class Subgraph { &output_max)); if (subgraph != nullptr) { - const xnn_status status = xnn_define_average_pooling_2d( - subgraph, - /*input_padding_top=*/0, - /*input_padding_right=*/0, - /*input_padding_bottom=*/0, - /*input_padding_left=*/0, - static_cast(pool_params->filter_height), - static_cast(pool_params->filter_width), - static_cast(pool_params->stride_height), - static_cast(pool_params->stride_width), output_min, - output_max, - /*input_id=*/xnnpack_tensors[node->inputs->data[0]], - /*output_id=*/xnnpack_tensors[node->outputs->data[0]], flags); + xnn_status status = xnn_status_success; + if (pool_params->filter_height == 1 && pool_params->filter_width == 1) { + status = xnn_define_clamp( + subgraph, output_min, output_max, + /*input_id=*/xnnpack_tensors[node->inputs->data[0]], + /*output_id=*/xnnpack_tensors[node->outputs->data[0]], /*flags=*/0); + } else { + status = xnn_define_average_pooling_2d( + subgraph, + /*input_padding_top=*/0, + /*input_padding_right=*/0, + /*input_padding_bottom=*/0, + /*input_padding_left=*/0, + static_cast(pool_params->filter_height), + static_cast(pool_params->filter_width), + static_cast(pool_params->stride_height), + static_cast(pool_params->stride_width), output_min, + output_max, + /*input_id=*/xnnpack_tensors[node->inputs->data[0]], + /*output_id=*/xnnpack_tensors[node->outputs->data[0]], flags); + } if (status != xnn_status_success) { TF_LITE_KERNEL_LOG(logging_context, "failed to delegate AVERAGE_POOL_2D node #%d", @@ -1710,20 +1723,27 @@ class Subgraph { &output_max)); if (subgraph != nullptr) { - const xnn_status status = xnn_define_max_pooling_2d( - subgraph, - /*input_padding_top=*/0, - /*input_padding_right=*/0, - /*input_padding_bottom=*/0, - /*input_padding_left=*/0, - static_cast(pool_params->filter_height), - static_cast(pool_params->filter_width), - static_cast(pool_params->stride_height), - static_cast(pool_params->stride_width), - /*dilation_height=*/1, - /*dilation_width=*/1, output_min, output_max, - /*input_id=*/xnnpack_tensors[node->inputs->data[0]], - /*output_id=*/xnnpack_tensors[node->outputs->data[0]], flags); + xnn_status status = xnn_status_success; + if (pool_params->filter_height == 1 && pool_params->filter_width == 1) { + status = xnn_define_clamp( + subgraph, output_min, output_max, + /*input_id=*/xnnpack_tensors[node->inputs->data[0]], + /*output_id=*/xnnpack_tensors[node->outputs->data[0]], /*flags=*/0); + } else { + status = xnn_define_max_pooling_2d( + subgraph, + /*input_padding_top=*/0, + /*input_padding_right=*/0, + /*input_padding_bottom=*/0, + /*input_padding_left=*/0, + static_cast(pool_params->filter_height), + static_cast(pool_params->filter_width), + static_cast(pool_params->stride_height), + static_cast(pool_params->stride_width), + /*dilation_height=*/1, /*dilation_width=*/1, output_min, output_max, + /*input_id=*/xnnpack_tensors[node->inputs->data[0]], + /*output_id=*/xnnpack_tensors[node->outputs->data[0]], flags); + } if (status != xnn_status_success) { TF_LITE_KERNEL_LOG(logging_context, "failed to delegate MAX_POOL_2D node #%d", From dea810d4e3fc516cdda193d94dac5c4a09a255ab Mon Sep 17 00:00:00 2001 From: Xiao Yu Date: Fri, 10 Jul 2020 17:12:58 -0700 Subject: [PATCH 0204/2522] Refactor SelectDevice method to allow us to reuse this method for TFRT device placement. This can allow TFRT to make the same decision as current TF. Remove a few unit test, since it is harder to write unit tests using the new SelectDevice method. However, device_placement_test should provide better coverage than the unit test. PiperOrigin-RevId: 320704318 Change-Id: I9bee9b508d4eba714f68b8c7464d818091224fdc --- tensorflow/core/common_runtime/eager/BUILD | 15 ++ .../core/common_runtime/eager/context.cc | 24 ++- .../core/common_runtime/eager/context.h | 12 +- .../core/common_runtime/eager/context_test.cc | 89 --------- .../core/common_runtime/eager/execute.cc | 14 +- .../common_runtime/eager/placement_test.cc | 184 ++++++++++++++++++ .../python/eager/device_placement_test.py | 8 + 7 files changed, 226 insertions(+), 120 deletions(-) create mode 100644 tensorflow/core/common_runtime/eager/placement_test.cc diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD index 911b59eed17..003a4e5996f 100644 --- a/tensorflow/core/common_runtime/eager/BUILD +++ b/tensorflow/core/common_runtime/eager/BUILD @@ -120,6 +120,21 @@ tf_cc_test( ], ) +tf_cc_test( + name = "placement_test", + srcs = ["placement_test.cc"], + deps = [ + ":context", + ":core", + ":eager_operation", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + tf_cuda_library( name = "eager_operation", srcs = [ diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc index ae992f9d6f1..106e6bb87db 100644 --- a/tensorflow/core/common_runtime/eager/context.cc +++ b/tensorflow/core/common_runtime/eager/context.cc @@ -275,13 +275,19 @@ Device* SelectBestMatchingDevice(const DeviceNameUtils::ParsedName& pattern, } // namespace Status EagerContext::SelectDevice(DeviceNameUtils::ParsedName preferred, - const PrioritizedDeviceTypeVector& supported, - const DataType dtype, Device** out) const { + const NodeDef& ndef, Device** out) const { DCHECK(out != nullptr); - // We always place string tensors on the CPU device if we're allowed to. - if (dtype == DT_STRING && AllowSoftPlacement()) { - preferred = HostCPU()->parsed_name(); + PrioritizedDeviceTypeVector supported_devs; + auto device_type_list = prioritized_device_type_list(); + TF_RETURN_IF_ERROR(SupportedDeviceTypesForNode( + *device_type_list, ndef, &supported_devs, &HostCPU()->parsed_name())); + if (supported_devs.empty()) { + return errors::NotFound("Could not find device for node: ", + errors::FormatNodeNameForError(ndef.name()), " = ", + ndef.op(), "[", SummarizeAttrs(ndef), "]", + "\nAll kernels registered for op ", ndef.op(), + ":\n", KernelsRegisteredForOp(ndef.op())); } // Select the first matching registered device from the supported device @@ -290,7 +296,7 @@ Status EagerContext::SelectDevice(DeviceNameUtils::ParsedName preferred, const auto pflr_device_set = pflr()->device_set(); const PrioritizedDeviceVector& existing = pflr_device_set->prioritized_devices(); - *out = SelectBestMatchingDevice(preferred, existing, supported); + *out = SelectBestMatchingDevice(preferred, existing, supported_devs); if (*out != nullptr) { return Status::OK(); } @@ -302,7 +308,7 @@ Status EagerContext::SelectDevice(DeviceNameUtils::ParsedName preferred, soft_device_name.has_id = false; // TODO(b/148213746): Soft placement logic picks up another task if the // requested does not exist. - *out = SelectBestMatchingDevice(soft_device_name, existing, supported); + *out = SelectBestMatchingDevice(soft_device_name, existing, supported_devs); if (*out != nullptr) { return Status::OK(); } @@ -313,7 +319,7 @@ Status EagerContext::SelectDevice(DeviceNameUtils::ParsedName preferred, "Could not satisfy device specification '", preferred, "'. enable_soft_placement=", AllowSoftPlacement(), ". Supported device types [", - absl::StrJoin(DeviceTypesToString(supported), ", "), + absl::StrJoin(DeviceTypesToString(supported_devs), ", "), "]. All available devices [", absl::StrJoin(DevicesToString(existing), ", "), "]."); } @@ -322,7 +328,7 @@ Status EagerContext::SelectDevice(DeviceNameUtils::ParsedName preferred, absl::StrJoin(DevicesToString(existing), ", "), "]. enable_soft_placement=", AllowSoftPlacement(), ". Supported devices types [", - absl::StrJoin(DeviceTypesToString(supported), ", "), "]."); + absl::StrJoin(DeviceTypesToString(supported_devs), ", "), "]."); } void EagerContext::ResetClusterFLR( diff --git a/tensorflow/core/common_runtime/eager/context.h b/tensorflow/core/common_runtime/eager/context.h index 5a14ebdfda7..36aa2f18292 100644 --- a/tensorflow/core/common_runtime/eager/context.h +++ b/tensorflow/core/common_runtime/eager/context.h @@ -222,24 +222,18 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { // Select an appropriate device for an operation. // - // Given the preferred device for the operation, and the list of devices the - // operation supports, finds the best suitable device for the operation in - // this context. + // Given the preferred device for the operation, and the node_def, finds the + // best suitable device for the operation in this context. // // The preferred device is specified as a `ParsedName` containing the elements // (details) that the resulting device should match. If there are no such // devices, and the context currently allows soft device placement, a suitable // device not matching `preferred` will be chosen. // - // The `dtype` parameter specifies the operation's result data type, if - // known. Setting it to DT_INVALID will make this method not use the data type - // for its decisions. - // // The chosen device is stored in the `device` argument. The argument is not // modified unless this method returns `Status::OK()`. Status SelectDevice(DeviceNameUtils::ParsedName preferred, - const PrioritizedDeviceTypeVector& supported, - const DataType dtype, Device** device) const; + const NodeDef& ndef, Device** out) const; // Sets the implicit copy policy for the current thread. void SetThreadLocalMirroringPolicy(ContextMirroringPolicy); diff --git a/tensorflow/core/common_runtime/eager/context_test.cc b/tensorflow/core/common_runtime/eager/context_test.cc index 7f34884b4db..e577b1d8152 100644 --- a/tensorflow/core/common_runtime/eager/context_test.cc +++ b/tensorflow/core/common_runtime/eager/context_test.cc @@ -81,95 +81,6 @@ class EagerContextTest : public ::testing::Test { EagerContext* context_; }; -TEST_F(EagerContextTest, SelectDeviceExplicitHardPlacement) { - SessionOptions options; - options.config.set_log_device_placement(true); - options.config.set_allow_soft_placement(false); - InitContext(options, DEVICE_PLACEMENT_EXPLICIT); - - Device* dev; - DeviceNameUtils::ParsedName requested; - const PrioritizedDeviceTypeVector supported{ - std::make_pair(DeviceType(DEVICE_GPU), 20), - std::make_pair(DeviceType(DEVICE_CPU), 10), - }; - - // No supported devices should result in an error. - requested.Clear(); - Status status = context()->SelectDevice( - requested, PrioritizedDeviceTypeVector{}, DT_INVALID, &dev); - EXPECT_TRUE(errors::IsInvalidArgument(status)); - EXPECT_TRUE( - absl::StrContains(status.error_message(), "No supported device found")) - << "unexpected error message " << status.error_message(); - - // An invalid requested device should also cause an error. - ASSERT_TRUE(DeviceNameUtils::ParseLocalName("GPU:99", &requested)); - status = context()->SelectDevice(requested, supported, DT_INVALID, &dev); - EXPECT_TRUE(errors::IsInvalidArgument(status)); - EXPECT_TRUE(absl::StrContains(status.error_message(), - "Could not satisfy device specification")) - << "unexpected error message " << status.error_message(); - - // Should pick the "best" supported device if given no constraints. - requested.Clear(); - TF_ASSERT_OK(context()->SelectDevice(requested, supported, DT_INVALID, &dev)); - EXPECT_EQ(dev->device_type(), DEVICE_GPU); - - // Should pick a CPU if asked to. - ASSERT_TRUE(DeviceNameUtils::ParseLocalName("CPU:1", &requested)); - TF_ASSERT_OK(context()->SelectDevice(requested, supported, DT_INVALID, &dev)); - EXPECT_EQ(dev->device_type(), DEVICE_CPU); - - // String tensors stay in GPU under hard device placement. - requested.Clear(); - TF_ASSERT_OK(context()->SelectDevice(requested, supported, DT_STRING, &dev)); - EXPECT_EQ(dev->device_type(), DEVICE_GPU); -} - -TEST_F(EagerContextTest, SelectDeviceExplicitSoftPlacement) { - SessionOptions options; - options.config.set_log_device_placement(true); - options.config.set_allow_soft_placement(true); - InitContext(options, DEVICE_PLACEMENT_EXPLICIT); - - Device* dev; - DeviceNameUtils::ParsedName requested; - const PrioritizedDeviceTypeVector supported{ - std::make_pair(DeviceType(DEVICE_GPU), 20), - std::make_pair(DeviceType(DEVICE_CPU), 10), - }; - - // No supported devices should result in an error. - requested.Clear(); - Status status = context()->SelectDevice( - requested, PrioritizedDeviceTypeVector{}, DT_INVALID, &dev); - EXPECT_TRUE(errors::IsInvalidArgument(status)); - EXPECT_TRUE( - absl::StrContains(status.error_message(), "No supported device found")) - << "unexpected error message " << status.error_message(); - - // An invalid requested device should be replaced by the "best" one. - ASSERT_TRUE(DeviceNameUtils::ParseLocalName("GPU:99", &requested)); - TF_ASSERT_OK(context()->SelectDevice(requested, supported, DT_INVALID, &dev)); - EXPECT_EQ(dev->device_type(), DEVICE_GPU); - - // Should pick the "best" supported device if given no constraints. - requested.Clear(); - TF_ASSERT_OK(context()->SelectDevice(requested, supported, DT_INVALID, &dev)); - EXPECT_EQ(dev->device_type(), DEVICE_GPU); - - // Should pick a CPU if asked to. - ASSERT_TRUE(DeviceNameUtils::ParseLocalName("CPU:1", &requested)); - TF_ASSERT_OK(context()->SelectDevice(requested, supported, DT_INVALID, &dev)); - EXPECT_EQ(dev->device_type(), DEVICE_CPU); - - // String tensors move to CPU under soft device placement. - requested.Clear(); - TF_ASSERT_OK(context()->SelectDevice(requested, supported, DT_STRING, &dev)); - EXPECT_EQ(dev->device_type(), DEVICE_CPU); -} - TEST_F(EagerContextTest, CompositeDevice) { InitContext(SessionOptions(), DEVICE_PLACEMENT_EXPLICIT); std::vector underlying_devices = { diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc index a031974a969..507cb1390cd 100644 --- a/tensorflow/core/common_runtime/eager/execute.cc +++ b/tensorflow/core/common_runtime/eager/execute.cc @@ -470,20 +470,8 @@ Status GetOrCreateKernelAndDevice( const NodeDef& ndef = op->MutableAttrs()->BuildNodeDef(); if (device == nullptr) { - PrioritizedDeviceTypeVector supported_devs; - auto device_type_list = ctx.prioritized_device_type_list(); TF_RETURN_IF_ERROR( - SupportedDeviceTypesForNode(*device_type_list, ndef, &supported_devs, - &ctx.HostCPU()->parsed_name())); - if (supported_devs.empty()) { - return errors::NotFound( - "Could not find device for node: ", - errors::FormatNodeNameForError(ndef.name()), " = ", ndef.op(), "[", - SummarizeAttrs(ndef), "]", "\nAll kernels registered for op ", - ndef.op(), ":\n", KernelsRegisteredForOp(ndef.op())); - } - TF_RETURN_IF_ERROR(ctx.SelectDevice(op->GetDeviceParsedName(), - supported_devs, DT_INVALID, &device)); + ctx.SelectDevice(op->GetDeviceParsedName(), ndef, &device)); DVLOG(1) << "Placer place op [" << op->Name() << "] on device: " << device->name(); diff --git a/tensorflow/core/common_runtime/eager/placement_test.cc b/tensorflow/core/common_runtime/eager/placement_test.cc new file mode 100644 index 00000000000..4ea38d2f5f9 --- /dev/null +++ b/tensorflow/core/common_runtime/eager/placement_test.cc @@ -0,0 +1,184 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/eager/context.h" +#include "tensorflow/core/framework/function_testlib.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace { + +using ::tensorflow::test::function::NDef; + +constexpr char kFullCPU[] = "/job:a/replica:0/task:0/device:CPU:0"; +constexpr char kFullGPU[] = "/job:a/replica:0/task:0/device:FakeGPU:0"; + +//////////////////////////////////////////////////////////////////////////////// +// +// Op, kernel to set up the environment. +// +// The Placer uses information about the op (input types), +// kernel (device constraints). To avoid depending on the full runtime, we +// define dummy implementations of these, and register them with the +// runtime. +// +//////////////////////////////////////////////////////////////////////////////// + +// A dummy OpKernel that is used to register ops on different devices. +class DummyOp : public OpKernel { + public: + explicit DummyOp(OpKernelConstruction* context) : OpKernel(context) {} + void Compute(OpKernelContext* context) override {} +}; + +// Register the following ops so they can be added to a Graph, and +// kernels so that they can be placed on particular device types. +REGISTER_OP("InvalidOp").Output("o: Ref(float)"); + +REGISTER_OP("TestOp").Output("o: Ref(float)"); +REGISTER_KERNEL_BUILDER(Name("TestOp").Device(DEVICE_CPU).Priority(1), DummyOp); +REGISTER_KERNEL_BUILDER(Name("TestOp").Device("FakeGPU").Priority(2), DummyOp); + +static Device* CreateDevice(const char* type, const char* name) { + class FakeDevice : public Device { + public: + explicit FakeDevice(const DeviceAttributes& attr) : Device(nullptr, attr) {} + Status Sync() override { return Status::OK(); } + Allocator* GetAllocator(AllocatorAttributes) override { return nullptr; } + }; + DeviceAttributes attr; + attr.set_name(name); + attr.set_device_type(type); + return new FakeDevice(attr); +} + +class PlacementTest : public ::testing::Test { + public: + PlacementTest() : device_manager_(nullptr), context_(nullptr) {} + + ~PlacementTest() override { + delete device_manager_; + if (context_) { + context_->Unref(); + } + } + + EagerContext* context() { return context_; } + + void InitContext(const SessionOptions& opts, + ContextDevicePlacementPolicy policy) { + ASSERT_EQ(context_, nullptr); + InitDeviceManager(); + context_ = new EagerContext( + opts, policy, + /* default_mirroring_policy */ MIRRORING_NONE, + /* async */ false, + /* lazy_copy_function_remote_inputs */ false, device_manager_, + /* device_mgr_owned */ false, /* rendezvous */ nullptr, + /* custom_kernel_creator */ nullptr, + /* cluster_flr */ nullptr); + } + + protected: + void InitDeviceManager() { + ASSERT_EQ(device_manager_, nullptr); + device_manager_ = new DynamicDeviceMgr(); + std::vector> added_devices; + SessionOptions opts; + + // Have to use real CPU device. Other, ctx->HostCPU() will return invalid + // device. + added_devices.emplace_back(CreateDevice(DEVICE_CPU, kFullCPU)); + added_devices.emplace_back(CreateDevice("FakeGPU", kFullGPU)); + + TF_CHECK_OK(device_manager_->AddDevices(std::move(added_devices))); + } + + DynamicDeviceMgr* device_manager_; + EagerContext* context_; +}; + +TEST_F(PlacementTest, SelectDeviceExplicitHardPlacement) { + SessionOptions options; + options.config.set_log_device_placement(true); + options.config.set_allow_soft_placement(false); + InitContext(options, DEVICE_PLACEMENT_EXPLICIT); + + Device* dev; + DeviceNameUtils::ParsedName requested; + + // No supported devices should result in an error. + requested.Clear(); + NodeDef invalid_op = NDef("invalid_op", "InvalidOp", {}, {}); + + Status status = context()->SelectDevice(requested, invalid_op, &dev); + LOG(ERROR) << status.ToString(); + EXPECT_TRUE(errors::IsNotFound(status)); + EXPECT_TRUE(absl::StrContains(status.error_message(), + "Could not find device for node")) + << "unexpected error message " << status.error_message(); + + // An invalid requested device should also cause an error. + ASSERT_TRUE(DeviceNameUtils::ParseLocalName("FakeGPU:99", &requested)); + NodeDef node = NDef("x", "TestOp", {}, {}); + status = context()->SelectDevice(requested, node, &dev); + + EXPECT_TRUE(errors::IsInvalidArgument(status)); + EXPECT_TRUE(absl::StrContains(status.error_message(), + "Could not satisfy device specification")) + << "unexpected error message " << status.error_message(); + + // Should pick the device with higher priority if given no constraints. + requested.Clear(); + TF_ASSERT_OK(context()->SelectDevice(requested, node, &dev)); + EXPECT_EQ(dev->device_type(), "FakeGPU"); + + // Should pick a CPU if asked to. + ASSERT_TRUE(DeviceNameUtils::ParseLocalName("CPU:0", &requested)); + TF_ASSERT_OK(context()->SelectDevice(requested, node, &dev)); + EXPECT_EQ(dev->device_type(), DEVICE_CPU); +} + +TEST_F(PlacementTest, SelectDeviceExplicitSoftPlacement) { + SessionOptions options; + options.config.set_log_device_placement(true); + options.config.set_allow_soft_placement(true); + InitContext(options, DEVICE_PLACEMENT_EXPLICIT); + + Device* dev; + DeviceNameUtils::ParsedName requested; + + // No supported devices should result in an error. + requested.Clear(); + NodeDef invalid_op = NDef("invalid_op", "InvalidOp", {}, {}); + + Status status = context()->SelectDevice(requested, invalid_op, &dev); + LOG(ERROR) << status.ToString(); + EXPECT_TRUE(errors::IsNotFound(status)); + EXPECT_TRUE(absl::StrContains(status.error_message(), + "Could not find device for node")) + << "unexpected error message " << status.error_message(); + + // An invalid requested device should be replaced by the "best" one. + ASSERT_TRUE(DeviceNameUtils::ParseLocalName("FakeGPU:99", &requested)); + NodeDef node = NDef("x", "TestOp", {}, {}); + status = context()->SelectDevice(requested, node, &dev); + EXPECT_EQ(dev->device_type(), "FakeGPU"); +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/python/eager/device_placement_test.py b/tensorflow/python/eager/device_placement_test.py index af6c68243b4..1ebe5e2ffef 100644 --- a/tensorflow/python/eager/device_placement_test.py +++ b/tensorflow/python/eager/device_placement_test.py @@ -82,6 +82,14 @@ class SoftDevicePlacementTest(test.TestCase, parameterized.TestCase): self.assertIn('CPU', c.device) self.assertIn('CPU', d.device) + @test_util.run_gpu_only + def testSoftPlacedGPU(self): + a = constant_op.constant(1) + b = constant_op.constant(2) + with ops.device('GPU:110'): + c = a + b + self.assertIn('GPU:0', c.device) + @test_util.run_gpu_only def testNestedDeviceScope(self): a = constant_op.constant(1) From 6ffe7a2ecd7f275723eebc406ac6498c0632d5e9 Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Fri, 10 Jul 2020 17:32:11 -0700 Subject: [PATCH 0205/2522] Disable flaky test //third_party/tensorflow/core/grappler/optimizers:remapper_test_gpu FuseConv2DWithBiasAndActivationOnGPU PiperOrigin-RevId: 320706567 Change-Id: I7cd8c82204a07f84ca0f65dfa22568944b3c8e07 --- tensorflow/core/grappler/optimizers/remapper_test.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/remapper_test.cc b/tensorflow/core/grappler/optimizers/remapper_test.cc index 9d734801916..da984a4fa19 100644 --- a/tensorflow/core/grappler/optimizers/remapper_test.cc +++ b/tensorflow/core/grappler/optimizers/remapper_test.cc @@ -449,7 +449,8 @@ TEST_F(RemapperTest, FuseMatMulWithBias) { test::ExpectTensorNear(tensors[0], tensors_expected[0], 1e-6); } -TEST_F(RemapperTest, FuseConv2DWithBiasAndActivationOnGPU) { +// TODO(b/161005848): Fix flaky test. +TEST_F(RemapperTest, DISABLED_FuseConv2DWithBiasAndActivationOnGPU) { #if !(GOOGLE_CUDA) GTEST_SKIP() << "No CUDA, skip FuseConv2DWithBiasAndActivation on GPU"; #endif // !GOOGLE_CUDA From 2d7b48cec74efbfa20de604a47a5b4195322245d Mon Sep 17 00:00:00 2001 From: Karim Nosir Date: Fri, 10 Jul 2020 17:38:29 -0700 Subject: [PATCH 0206/2522] Add section about supported ops in the inference / overview page that links to details about supported ops and ops limitations. PiperOrigin-RevId: 320707226 Change-Id: Icaadbefbe70b52054dcf48fcc4f04f3e23dbcebb --- tensorflow/lite/g3doc/guide/inference.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/lite/g3doc/guide/inference.md b/tensorflow/lite/g3doc/guide/inference.md index 6e47d6d5190..fbf03ab84b5 100644 --- a/tensorflow/lite/g3doc/guide/inference.md +++ b/tensorflow/lite/g3doc/guide/inference.md @@ -57,6 +57,12 @@ explicit goal and some variance between languages is to be expected. Across all libraries, the TensorFlow Lite API enables you to load models, feed inputs, and retrieve inference outputs. +## Supported operations + +TensorFlow Lite supports a subset of TensorFlow operations with some +limitations. For full list of operations and limitations see +[TF Lite Ops page](https://www.tensorflow.org/mlir/tfl_ops). + ### Android On Android, TensorFlow Lite inference can be performed using either Java or C++ From ac6b4535c38d24fbdbee5c54bde91cf83bfd7ba6 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Fri, 10 Jul 2020 17:40:09 -0700 Subject: [PATCH 0207/2522] Rollback PR #35985 PiperOrigin-RevId: 320707414 Change-Id: I3261a877344d83ae62b86a6e30dbe93631d476d6 --- tensorflow/lite/c/builtin_op_data.h | 4 - .../lite/core/api/flatbuffer_conversions.cc | 2 - tensorflow/lite/kernels/add.cc | 85 ++++------------ tensorflow/lite/kernels/add_test.cc | 31 ++---- .../lite/kernels/internal/reference/add.h | 66 +++---------- tensorflow/lite/kernels/register.cc | 6 +- tensorflow/lite/kernels/sub.cc | 97 ++++--------------- tensorflow/lite/kernels/sub_test.cc | 12 --- tensorflow/lite/schema/schema.fbs | 4 - tensorflow/lite/schema/schema_generated.h | 22 +---- tensorflow/lite/toco/tflite/op_version.cc | 3 - tensorflow/lite/toco/tflite/operator.cc | 4 +- .../lite/tools/versioning/op_version.cc | 46 +-------- tensorflow/lite/tools/versioning/op_version.h | 5 - .../lite/tools/versioning/runtime_version.cc | 3 - 15 files changed, 73 insertions(+), 317 deletions(-) diff --git a/tensorflow/lite/c/builtin_op_data.h b/tensorflow/lite/c/builtin_op_data.h index e205f075b43..232f5f95928 100644 --- a/tensorflow/lite/c/builtin_op_data.h +++ b/tensorflow/lite/c/builtin_op_data.h @@ -199,8 +199,6 @@ typedef struct { typedef struct { TfLiteFusedActivation activation; - // Parameter added for the version 4. - bool pot_scale_int16; } TfLiteAddParams; typedef struct { @@ -222,8 +220,6 @@ typedef struct { typedef struct { TfLiteFusedActivation activation; - // Parameter added for the version 5. - bool pot_scale_int16; } TfLiteSubParams; typedef struct { diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.cc b/tensorflow/lite/core/api/flatbuffer_conversions.cc index fff8d15491e..e5422697acc 100644 --- a/tensorflow/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/lite/core/api/flatbuffer_conversions.cc @@ -893,7 +893,6 @@ TfLiteStatus ParseAdd(const Operator* op, ErrorReporter* error_reporter, if (schema_params != nullptr) { params->activation = ConvertActivation(schema_params->fused_activation_function()); - params->pot_scale_int16 = schema_params->pot_scale_int16(); } else { // TODO(b/157480169): We should either return kTfLiteError or fill in some // reasonable defaults in the params struct. We are not doing so until we @@ -1629,7 +1628,6 @@ TfLiteStatus ParseSub(const Operator* op, ErrorReporter* error_reporter, if (schema_params != nullptr) { params->activation = ConvertActivation(schema_params->fused_activation_function()); - params->pot_scale_int16 = schema_params->pot_scale_int16(); } else { // TODO(b/157480169): We should either return kTfLiteError or fill in some // reasonable defaults in the params struct. We are not doing so until we diff --git a/tensorflow/lite/kernels/add.cc b/tensorflow/lite/kernels/add.cc index 7692ae9e54b..bda475bdc35 100644 --- a/tensorflow/lite/kernels/add.cc +++ b/tensorflow/lite/kernels/add.cc @@ -68,11 +68,6 @@ struct OpData { int32 input1_offset; int32 input2_offset; int32 output_offset; - - // This parameter is used to indicate whether - // parameter scale is power of two. - // It is used in 16-bit -> 16-bit quantization. - bool pot_scale_int16; }; void* Init(TfLiteContext* context, const char* buffer, size_t length) { @@ -108,55 +103,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { output_size = TfLiteIntArrayCopy(input1->dims); } - // 8bit -> 8bit general quantized path, with general rescalings - // as well as, int16 -> int16 with general rescalings - bool pot_scale_int16 = true; - - bool input1_scale_is_pot = false; - bool input2_scale_is_pot = false; - bool output_scale_is_pot = false; - - int input1_scale_log2_rounded{0}; - int input2_scale_log2_rounded{0}; - int output_scale_log2_rounded{0}; - - if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 && - output->type == kTfLiteInt16) { - // In case of 16-bit, there are two implementation: - // the scale parameter is a general number - // the scale parameter is POT and - // zero_point is zero for inputs/output. - pot_scale_int16 = (input1->params.zero_point == 0) && - (input2->params.zero_point == 0) && - (output->params.zero_point == 0); - - input1_scale_is_pot = - CheckedLog2(input1->params.scale, &input1_scale_log2_rounded); - - input2_scale_is_pot = - CheckedLog2(input2->params.scale, &input2_scale_log2_rounded); - - output_scale_is_pot = - CheckedLog2(output->params.scale, &output_scale_log2_rounded); - - pot_scale_int16 &= - input1_scale_is_pot && input2_scale_is_pot && output_scale_is_pot; - } - - data->pot_scale_int16 = pot_scale_int16; - - if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || - !pot_scale_int16) { + if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { // 8bit -> 8bit general quantized path, with general rescalings - // as well as, 16bit -> 16bit with general rescalings data->input1_offset = -input1->params.zero_point; data->input2_offset = -input2->params.zero_point; data->output_offset = output->params.zero_point; - - // The shift is set to 15 for 16-bit and 20 in case of 8-bit, accordingly. - // In case of 16-bit we have 65535 << 15 which is less than 1 << 31, - // therefore the addition will still fit in a 32 bit accumulator. - data->left_shift = !pot_scale_int16 ? 15 : 20; + data->left_shift = 20; const double twice_max_input_scale = 2 * std::max(input1->params.scale, input2->params.scale); const double real_input1_multiplier = @@ -192,8 +144,19 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, input2->params.zero_point, 0); TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); + int input1_scale_log2_rounded; + bool input1_scale_is_pot = + CheckedLog2(input1->params.scale, &input1_scale_log2_rounded); TF_LITE_ENSURE(context, input1_scale_is_pot); + + int input2_scale_log2_rounded; + bool input2_scale_is_pot = + CheckedLog2(input2->params.scale, &input2_scale_log2_rounded); TF_LITE_ENSURE(context, input2_scale_is_pot); + + int output_scale_log2_rounded; + bool output_scale_is_pot = + CheckedLog2(output->params.scale, &output_scale_log2_rounded); TF_LITE_ENSURE(context, output_scale_is_pot); data->input1_shift = input1_scale_log2_rounded - output_scale_log2_rounded; @@ -268,8 +231,7 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input1, const TfLiteTensor* input2, TfLiteTensor* output) { - if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || - !data->pot_scale_int16) { + if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { tflite::ArithmeticParams op_params; op_params.left_shift = data->left_shift; op_params.input1_offset = data->input1_offset; @@ -304,15 +266,6 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, TF_LITE_ADD(optimized_integer_ops, Add, int8_t); } } - } else if (output->type == kTfLiteInt16) { - if (need_broadcast) { - TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, int16_t); - } else { - reference_ops::Add( - op_params, GetTensorShape(input1), GetTensorData(input1), - GetTensorShape(input2), GetTensorData(input2), - GetTensorShape(output), GetTensorData(output), false); - } } else { if (kernel_type == kReference) { if (need_broadcast) { @@ -330,12 +283,12 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, } #undef TF_LITE_ADD } else if (output->type == kTfLiteInt16) { - tflite::ArithmeticParams op_params; - op_params.input1_shift = data->input1_shift; - op_params.input2_shift = data->input2_shift; - SetActivationParams(data->output_activation_min, - data->output_activation_max, &op_params); #define TF_LITE_ADD(type, opname) \ + tflite::ArithmeticParams op_params; \ + op_params.input1_shift = data->input1_shift; \ + op_params.input2_shift = data->input2_shift; \ + SetActivationParams(data->output_activation_min, \ + data->output_activation_max, &op_params); \ type::opname(op_params, GetTensorShape(input1), \ GetTensorData(input1), GetTensorShape(input2), \ GetTensorData(input2), GetTensorShape(output), \ diff --git a/tensorflow/lite/kernels/add_test.cc b/tensorflow/lite/kernels/add_test.cc index fc78f930897..bb883dd9b05 100644 --- a/tensorflow/lite/kernels/add_test.cc +++ b/tensorflow/lite/kernels/add_test.cc @@ -310,18 +310,15 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) { const float kMin = -1.f; const float kMax = 32767.f / 32768.f; float kQuantizedTolerance = GetToleranceInt16(kMin, kMax); - std::vector> inputs1 = {{0.1, 0.2, 0.3, 0.4, 0.9, 0.7}, - {-0.8, 0.2, 0.4, 0.7, 0.1, 0.0}, - {-0.8, 0.2, 0.7, 0.3, 0.9, 0.1}}; - std::vector> inputs2 = {{0.6, 0.4, 0.3, 0.1, -0.1, 0.3}, - {0.6, 0.4, 0.5, -0.8, 0.0, -1.0}, - {0.6, 0.4, -0.8, 0.5, -0.9, 0.1}}; - std::vector> results = {{0.7, 0.6, 0.6, 0.5, 0.8, 1.0}, - {-0.2, 0.6, 0.9, -0.1, 0.1, -1.0}, - {-0.2, 0.6, -0.1, 0.8, 0.0, 0.2}}; + std::vector> inputs1 = { + {0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, 0.7, 0.3}}; + std::vector> inputs2 = { + {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.8}, {0.6, 0.4, -0.8, 0.5}}; + std::vector> results = { + {0.7, 0.6, 0.6, 0.5}, {-0.2, 0.6, 0.9, -0.1}, {-0.2, 0.6, -0.1, 0.8}}; for (size_t i = 0; i < inputs1.size(); ++i) { - QuantizedAddOpModel m({TensorType_INT16, {1, 2, 3, 1}, kMin, kMax}, - {TensorType_INT16, {1, 2, 3, 1}, kMin, kMax}, + QuantizedAddOpModel m({TensorType_INT16, {1, 2, 2, 1}, kMin, kMax}, + {TensorType_INT16, {1, 2, 2, 1}, kMin, kMax}, {TensorType_INT16, {}, kMin, kMax}, ActivationFunctionType_NONE); m.QuantizeAndPopulate(m.input1(), inputs1[i]); @@ -442,10 +439,6 @@ TEST(QuantizedAddOpModel, QuantizedWithScalarBroadcastInt8) { QuantizedWithScalarBroadcast(); } -TEST(QuantizedAddOpModel, QuantizedWithScalarBroadcastInt16) { - QuantizedWithScalarBroadcast(); -} - template void QuantizedWithMixedBroadcast() { float kQuantizedTolerance = GetTolerance(-3.f, 3.f); @@ -508,10 +501,6 @@ TEST(QuantizedAddOpModel, QuantizedWithMixedBroadcastInt8) { QuantizedWithMixedBroadcast(); } -TEST(QuantizedAddOpModel, QuantizedWithMixedBroadcastInt16) { - QuantizedWithMixedBroadcast(); -} - template void QuantizedWithGenericBroadcast() { float kQuantizedTolerance = GetTolerance(-1.0, 1.0); @@ -538,9 +527,5 @@ TEST(QuantizedAddOpModel, QuantizedWithGenericdBroadcastInt8) { QuantizedWithGenericBroadcast(); } -TEST(QuantizedAddOpModel, QuantizedWithGenericdBroadcastInt16) { - QuantizedWithGenericBroadcast(); -} - } // namespace } // namespace tflite diff --git a/tensorflow/lite/kernels/internal/reference/add.h b/tensorflow/lite/kernels/internal/reference/add.h index 741f4e684c5..d0c40912091 100644 --- a/tensorflow/lite/kernels/internal/reference/add.h +++ b/tensorflow/lite/kernels/internal/reference/add.h @@ -51,18 +51,13 @@ inline void Add(const ArithmeticParams& params, // Element-wise add that can often be used for inner loop of broadcast add as // well as the non-broadcast add. - -// This function is used for 8-bit as well as for 16-bit, but the accumulator -// is 32-bit for both cases. The overflow does not happen due to the -// choice of the shift (20 or 15, accordingly - see add.cc for more comments). -template inline void AddElementwise(int size, const ArithmeticParams& params, - const T* input1_data, const T* input2_data, - T* output_data) { - TFLITE_DCHECK_GT(params.input1_offset, -std::numeric_limits::max()); - TFLITE_DCHECK_GT(params.input2_offset, -std::numeric_limits::max()); - TFLITE_DCHECK_LT(params.input1_offset, std::numeric_limits::max()); - TFLITE_DCHECK_LT(params.input2_offset, std::numeric_limits::max()); + const uint8* input1_data, const uint8* input2_data, + uint8* output_data) { + TFLITE_DCHECK_GT(params.input1_offset, -256); + TFLITE_DCHECK_GT(params.input2_offset, -256); + TFLITE_DCHECK_LT(params.input1_offset, 256); + TFLITE_DCHECK_LT(params.input2_offset, 256); for (int i = 0; i < size; ++i) { const int32 input1_val = params.input1_offset + input1_data[i]; @@ -83,7 +78,7 @@ inline void AddElementwise(int size, const ArithmeticParams& params, const int32 clamped_output = std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output)); - output_data[i] = static_cast(clamped_output); + output_data[i] = static_cast(clamped_output); } } @@ -137,38 +132,10 @@ inline void Add(const ArithmeticParams& params, AddElementwise(flat_size, params, input1_data, input2_data, output_data); } -inline void AddGeneralParamScale(const ArithmeticParams& params, - const RuntimeShape& input1_shape, - const int16* input1_data, - const RuntimeShape& input2_shape, - const int16* input2_data, - const RuntimeShape& output_shape, - int16* output_data) { - TFLITE_DCHECK_LE(params.quantized_activation_min, - params.quantized_activation_max); - const int flat_size = - MatchingElementsSize(input1_shape, input2_shape, output_shape); - - int max_value = std::numeric_limits::max(); - - TFLITE_DCHECK_GT(params.input1_offset, -max_value); - TFLITE_DCHECK_GT(params.input2_offset, -max_value); - TFLITE_DCHECK_LT(params.input1_offset, max_value); - TFLITE_DCHECK_LT(params.input2_offset, max_value); - AddElementwise(flat_size, params, input1_data, input2_data, output_data); -} - inline void Add(const ArithmeticParams& params, const RuntimeShape& input1_shape, const int16* input1_data, const RuntimeShape& input2_shape, const int16* input2_data, - const RuntimeShape& output_shape, int16* output_data, - bool pot_scale = true) { - if (!pot_scale) { - AddGeneralParamScale(params, input1_shape, input1_data, input2_shape, - input2_data, output_shape, output_data); - return; - } - + const RuntimeShape& output_shape, int16* output_data) { TFLITE_DCHECK_LE(params.quantized_activation_min, params.quantized_activation_max); @@ -290,14 +257,13 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params, } } -// This function is used for 8-bit as well as for 16-bit, but the accumulator -// is 32-bit for both cases. The overflow does not happen due to the -// choice of the shift (20 or 15, accordingly - see add.cc for more comments). -template -inline void BroadcastAdd4DSlow( - const ArithmeticParams& params, const RuntimeShape& input1_shape, - const T* input1_data, const RuntimeShape& input2_shape, - const T* input2_data, const RuntimeShape& output_shape, T* output_data) { +inline void BroadcastAdd4DSlow(const ArithmeticParams& params, + const RuntimeShape& input1_shape, + const uint8* input1_data, + const RuntimeShape& input2_shape, + const uint8* input2_data, + const RuntimeShape& output_shape, + uint8* output_data) { NdArrayDesc<4> desc1; NdArrayDesc<4> desc2; NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, @@ -347,7 +313,7 @@ inline void BroadcastAdd4DSlow( std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output)); output_data[Offset(extended_output_shape, b, y, x, c)] = - static_cast(clamped_output); + static_cast(clamped_output); } } } diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc index 667d4a8e4f8..333ffc12d7e 100644 --- a/tensorflow/lite/kernels/register.cc +++ b/tensorflow/lite/kernels/register.cc @@ -89,8 +89,8 @@ BuiltinOpResolver::BuiltinOpResolver() { /* min_version = */ 1, /* max_version = */ 3); AddBuiltin(BuiltinOperator_ADD, Register_ADD(), - /* min_version */ 1, - /* max_version */ 4); + /* min_version = */ 1, + /* max_version = */ 2); AddBuiltin(BuiltinOperator_SPACE_TO_BATCH_ND, Register_SPACE_TO_BATCH_ND(), /* min_version = */ 1, /* max_version = */ 3); @@ -143,7 +143,7 @@ BuiltinOpResolver::BuiltinOpResolver() { /* max_version */ 2); AddBuiltin(BuiltinOperator_SUB, Register_SUB(), /* min_version = */ 1, - /* max_version = */ 5); + /* max_version = */ 4); AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT(), /* min_version = */ 1, /* max_version = */ 4); diff --git a/tensorflow/lite/kernels/sub.cc b/tensorflow/lite/kernels/sub.cc index f93ebecd46d..4cd9dd7ff60 100644 --- a/tensorflow/lite/kernels/sub.cc +++ b/tensorflow/lite/kernels/sub.cc @@ -71,11 +71,6 @@ struct OpData { int32 input1_offset; int32 input2_offset; int32 output_offset; - - // This parameter is used to indicate whether - // parameter scale is power of two. - // It is used in 16-bit -> 16-bit quantization. - bool pot_scale_int16; }; void* Init(TfLiteContext* context, const char* buffer, size_t length) { @@ -88,14 +83,13 @@ void Free(TfLiteContext* context, void* buffer) { delete reinterpret_cast(buffer); } -TfLiteStatus PrepareGeneralSubOp(TfLiteContext* context, - const TfLiteTensor* input_1, - const TfLiteTensor* input_2, - TfLiteTensor* output, TfLiteSubParams* params, - OpData* op_params, int op_sign) { - TF_LITE_ENSURE(context, output->type == kTfLiteUInt8 || - output->type == kTfLiteInt8 || - output->type == kTfLiteInt16); +TfLiteStatus Prepare8BitSubOp(TfLiteContext* context, + const TfLiteTensor* input_1, + const TfLiteTensor* input_2, TfLiteTensor* output, + TfLiteSubParams* params, OpData* op_params, + int op_sign) { + TF_LITE_ENSURE(context, + output->type == kTfLiteUInt8 || output->type == kTfLiteInt8); const auto& input1_quantization_params = input_1->params; const auto& input2_quantization_params = input_2->params; const auto& output_quantization_params = output->params; @@ -104,9 +98,6 @@ TfLiteStatus PrepareGeneralSubOp(TfLiteContext* context, if (output->type == kTfLiteUInt8) { integer_type_min = std::numeric_limits::min(); integer_type_max = std::numeric_limits::max(); - } else if (output->type == kTfLiteInt16) { - integer_type_min = std::numeric_limits::min(); - integer_type_max = std::numeric_limits::max(); } else { // output->type == kTfLiteInt8 integer_type_min = std::numeric_limits::min(); @@ -129,11 +120,7 @@ TfLiteStatus PrepareGeneralSubOp(TfLiteContext* context, op_params->input1_offset = -input1_quantization_params.zero_point; op_params->input2_offset = -input2_quantization_params.zero_point; op_params->output_offset = output_quantization_params.zero_point; - - // The shift is set to 15 in case of 16-bit and 20 in case of 8-bit, - // accordingly. In case of 16-bit we have 65535 << 15 which is less than 1 << - // 31, therefore the addition will still fit in a 32 bit accumulator. - op_params->left_shift = output->type == kTfLiteInt16 ? 15 : 20; + op_params->left_shift = 20; const double twice_max_input_scale = 2 * std::max(input1_quantization_params.scale, input2_quantization_params.scale); @@ -159,15 +146,13 @@ TfLiteStatus PrepareGeneralSubOp(TfLiteContext* context, TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( context, params->activation, output, &op_params->output_activation_min, &op_params->output_activation_max)); - return kTfLiteOk; } -TfLiteStatus PrepareInt16SubOpPOT(TfLiteContext* context, - const TfLiteTensor* input1, - const TfLiteTensor* input2, - TfLiteTensor* output, TfLiteSubParams* params, - OpData* data) { +TfLiteStatus PrepareInt16SubOp(TfLiteContext* context, + const TfLiteTensor* input1, + const TfLiteTensor* input2, TfLiteTensor* output, + TfLiteSubParams* params, OpData* data) { // 16bit -> 16bit special quantized path, supporting only a rather // narrow case of quantization parameters: zero_points must all be 0 // ("symmetric quantization") and scales must be power-of-two (which @@ -234,51 +219,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { output_size = TfLiteIntArrayCopy(input1->dims); } - // 8bit -> 8bit general quantized path, with general rescalings - // as well as, 16bit -> 16bit with general rescalings - bool pot_scale_int16 = true; - - bool input1_scale_is_pot = false; - bool input2_scale_is_pot = false; - bool output_scale_is_pot = false; - - int input1_scale_log2_rounded{0}; - int input2_scale_log2_rounded{0}; - int output_scale_log2_rounded{0}; - - if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 && - output->type == kTfLiteInt16) { - // In case of 16-bit, there are two implementation: - // the scale parameter is a general number - // the scale parameter is POT and - // zero_point is zero for inputs/output. - pot_scale_int16 = (input1->params.zero_point == 0) && - (input2->params.zero_point == 0) && - (output->params.zero_point == 0); - - input1_scale_is_pot = - CheckedLog2(input1->params.scale, &input1_scale_log2_rounded); - - input2_scale_is_pot = - CheckedLog2(input2->params.scale, &input2_scale_log2_rounded); - - output_scale_is_pot = - CheckedLog2(output->params.scale, &output_scale_log2_rounded); - - pot_scale_int16 &= - input1_scale_is_pot && input2_scale_is_pot && output_scale_is_pot; - } - - data->pot_scale_int16 = pot_scale_int16; - - if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || - !pot_scale_int16) { - TF_LITE_ENSURE_OK(context, PrepareGeneralSubOp(context, input1, input2, - output, params, data, -1)); + if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { + TF_LITE_ENSURE_OK(context, Prepare8BitSubOp(context, input1, input2, output, + params, data, -1)); } else if (output->type == kTfLiteInt16) { - // LSTM-special case with scale parameter of POT - TF_LITE_ENSURE_OK(context, PrepareInt16SubOpPOT(context, input1, input2, - output, params, data)); + TF_LITE_ENSURE_OK(context, PrepareInt16SubOp(context, input1, input2, + output, params, data)); } return context->ResizeTensor(context, output, output_size); @@ -386,15 +332,6 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, } else { TF_LITE_SUB(reference_integer_ops, Add, int8_t); } - } else if (!data->pot_scale_int16) { - if (need_broadcast) { - TF_LITE_SUB(reference_ops, BroadcastAdd4DSlow, int16_t); - } else { - reference_ops::Add(op_params, GetTensorShape(input1), - GetTensorData(input1), GetTensorShape(input2), - GetTensorData(input2), GetTensorShape(output), - GetTensorData(output), false); - } } else if (output->type == kTfLiteUInt8) { if (kernel_type == kReference) { if (need_broadcast) { diff --git a/tensorflow/lite/kernels/sub_test.cc b/tensorflow/lite/kernels/sub_test.cc index 24d9c251afb..67054fe4903 100644 --- a/tensorflow/lite/kernels/sub_test.cc +++ b/tensorflow/lite/kernels/sub_test.cc @@ -304,10 +304,6 @@ TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt8) { QuantizedTestsNoActivation(); } -TEST(QuantizedSubOpModel, QuantizedTestsNoActivationGenericInt16) { - QuantizedTestsNoActivation(); -} - template void QuantizedTestsActivationRELU_N1_TO_1() { float kQuantizedTolerance = GetTolerance(-1.0, 1.0); @@ -369,10 +365,6 @@ TEST(QuantizedSubOpModel, QuantizedVariousInputShapesInt8) { QuantizedVariousInputShapes(); } -TEST(QuantizedSubOpModel, QuantizedVariousInputShapesInt16) { - QuantizedVariousInputShapes(); -} - template void QuantizedWithBroadcast() { float kQuantizedTolerance = GetTolerance(-3.0, 3.0); @@ -401,10 +393,6 @@ TEST(QuantizedSubOpModel, QuantizedWithBroadcastInt8) { QuantizedWithBroadcast(); } -TEST(QuantizedSubOpModel, QuantizedWithBroadcastInt16) { - QuantizedWithBroadcast(); -} - TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt16) { const float kMin = -1.f; const float kMax = diff --git a/tensorflow/lite/schema/schema.fbs b/tensorflow/lite/schema/schema.fbs index ff977a0db02..b7f41c756e4 100644 --- a/tensorflow/lite/schema/schema.fbs +++ b/tensorflow/lite/schema/schema.fbs @@ -582,8 +582,6 @@ table ConcatenationOptions { table AddOptions { fused_activation_function:ActivationFunctionType; - // Parameters supported by version 4. - pot_scale_int16:bool = true; } table MulOptions { @@ -705,8 +703,6 @@ table DepthToSpaceOptions { table SubOptions { fused_activation_function:ActivationFunctionType; - // Parameters supported by version 5 - pot_scale_int16:bool = true; } table DivOptions { diff --git a/tensorflow/lite/schema/schema_generated.h b/tensorflow/lite/schema/schema_generated.h index 3ac32f8ac8f..b044acb4033 100755 --- a/tensorflow/lite/schema/schema_generated.h +++ b/tensorflow/lite/schema/schema_generated.h @@ -4739,29 +4739,22 @@ flatbuffers::Offset CreateConcatenationOptions(flatbuffers struct AddOptionsT : public flatbuffers::NativeTable { typedef AddOptions TableType; - bool pot_scale_int16; tflite::ActivationFunctionType fused_activation_function; AddOptionsT() - : pot_scale_int16(true), - fused_activation_function(tflite::ActivationFunctionType_NONE) { + : fused_activation_function(tflite::ActivationFunctionType_NONE) { } }; struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef AddOptionsT NativeTableType; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_FUSED_ACTIVATION_FUNCTION = 4, - VT_POT_SCALE_INT16 = 6 + VT_FUSED_ACTIVATION_FUNCTION = 4 }; - bool pot_scale_int16() const { - return GetField(VT_POT_SCALE_INT16, 0) != 0; - } tflite::ActivationFunctionType fused_activation_function() const { return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyField(verifier, VT_POT_SCALE_INT16) && VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); } @@ -5911,29 +5904,22 @@ flatbuffers::Offset CreateDepthToSpaceOptions(flatbuffers:: struct SubOptionsT : public flatbuffers::NativeTable { typedef SubOptions TableType; - bool pot_scale_int16; tflite::ActivationFunctionType fused_activation_function; SubOptionsT() - : pot_scale_int16(true), - fused_activation_function(tflite::ActivationFunctionType_NONE) { + : fused_activation_function(tflite::ActivationFunctionType_NONE) { } }; struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef SubOptionsT NativeTableType; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_FUSED_ACTIVATION_FUNCTION = 4, - VT_POT_SCALE_INT16 = 6 + VT_FUSED_ACTIVATION_FUNCTION = 4 }; - bool pot_scale_int16() const { - return GetField(VT_POT_SCALE_INT16, 0) != 0; - } tflite::ActivationFunctionType fused_activation_function() const { return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyField(verifier, VT_POT_SCALE_INT16) && VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); } diff --git a/tensorflow/lite/toco/tflite/op_version.cc b/tensorflow/lite/toco/tflite/op_version.cc index c34cc8d1b24..02afc35de3b 100644 --- a/tensorflow/lite/toco/tflite/op_version.cc +++ b/tensorflow/lite/toco/tflite/op_version.cc @@ -53,15 +53,12 @@ std::string GetMinimumRuntimeVersionForModel(const Model& model) { {{OperatorType::kDepthwiseConv, 5}, kPendingReleaseOpVersion}, {{OperatorType::kAdd, 1}, "1.5.0"}, {{OperatorType::kAdd, 2}, "1.14.0"}, - {{OperatorType::kAdd, 3}, kPendingReleaseOpVersion}, {{OperatorType::kAddN, 1}, "1.14.0"}, {{OperatorType::kSpaceToBatchND, 1}, "1.6.0"}, {{OperatorType::kSpaceToBatchND, 2}, "1.14.0"}, {{OperatorType::kSub, 1}, "1.6.0"}, {{OperatorType::kSub, 2}, "1.14.0"}, - {{OperatorType::kSub, 3}, "1.15.0"}, {{OperatorType::kSub, 4}, kPendingReleaseOpVersion}, - {{OperatorType::kSub, 5}, kPendingReleaseOpVersion}, {{OperatorType::kDiv, 1}, "1.6.0"}, {{OperatorType::kBatchToSpaceND, 1}, "1.6.0"}, {{OperatorType::kBatchToSpaceND, 2}, "1.14.0"}, diff --git a/tensorflow/lite/toco/tflite/operator.cc b/tensorflow/lite/toco/tflite/operator.cc index 144884f054a..bc12d49a115 100644 --- a/tensorflow/lite/toco/tflite/operator.cc +++ b/tensorflow/lite/toco/tflite/operator.cc @@ -275,10 +275,10 @@ class Sub : public BuiltinOperator 4) { + if (op_sig.options.broadcast.need_broadcast && + op_sig.options.broadcast.num_dims > 4) { return 3; } if (op_sig.input_types.at(0) == TensorType_INT8) { @@ -556,7 +538,7 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) { } } return 1; - + case BuiltinOperator_ADD: case BuiltinOperator_SPACE_TO_DEPTH: case BuiltinOperator_SPLIT_V: case BuiltinOperator_MEAN: @@ -684,26 +666,6 @@ OpSignature GetOpSignature(const OperatorCode* op_code, const Operator* op, } } break; - case BuiltinOperator_ADD: { - auto add_option = op->builtin_options_as_AddOptions(); - op_sig.options.addsub.pot_scale_int16 = true; - if (add_option) { - op_sig.options.addsub.pot_scale_int16 = add_option->pot_scale_int16(); - } - } break; - - case BuiltinOperator_SUB: { - auto sub_option = op->builtin_options_as_SubOptions(); - op_sig.options.addsub.need_broadcast = - !HaveSameShapes(subgraph, op, 0, 1); - op_sig.options.addsub.num_dims = - std::max(GetNumDims(subgraph, op, 0), GetNumDims(subgraph, op, 1)); - op_sig.options.addsub.pot_scale_int16 = true; - if (sub_option) { - op_sig.options.addsub.pot_scale_int16 = sub_option->pot_scale_int16(); - } - } break; - case BuiltinOperator_LSTM: { auto lstm_option = op->builtin_options_as_LSTMOptions(); if (lstm_option) { @@ -749,7 +711,7 @@ OpSignature GetOpSignature(const OperatorCode* op_code, const Operator* op, case BuiltinOperator_TRANSPOSE: { op_sig.options.single_input_op.num_dims = GetNumDims(subgraph, op, 0); } break; - + case BuiltinOperator_SUB: case BuiltinOperator_DIV: case BuiltinOperator_MAXIMUM: case BuiltinOperator_MINIMUM: { diff --git a/tensorflow/lite/tools/versioning/op_version.h b/tensorflow/lite/tools/versioning/op_version.h index 67a7b79fe38..71362001387 100644 --- a/tensorflow/lite/tools/versioning/op_version.h +++ b/tensorflow/lite/tools/versioning/op_version.h @@ -63,11 +63,6 @@ typedef struct { int32_t num_dims; bool need_broadcast; } broadcast; - struct { - bool pot_scale_int16; - int32_t num_dims; - bool need_broadcast; - } addsub; struct { bool is_per_channel_quantized; } conv_2d; diff --git a/tensorflow/lite/tools/versioning/runtime_version.cc b/tensorflow/lite/tools/versioning/runtime_version.cc index 2be02a6d41e..d345164f7e6 100644 --- a/tensorflow/lite/tools/versioning/runtime_version.cc +++ b/tensorflow/lite/tools/versioning/runtime_version.cc @@ -72,8 +72,6 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code, {{BuiltinOperator_DEPTHWISE_CONV_2D, 6}, "2.3.0"}, {{BuiltinOperator_ADD, 1}, "1.5.0"}, {{BuiltinOperator_ADD, 2}, "1.14.0"}, - {{BuiltinOperator_ADD, 3}, kPendingReleaseVersion}, - {{BuiltinOperator_ADD, 4}, kPendingReleaseVersion}, {{BuiltinOperator_ADD_N, 1}, "1.14.0"}, {{BuiltinOperator_SPACE_TO_BATCH_ND, 1}, "1.6.0"}, {{BuiltinOperator_SPACE_TO_BATCH_ND, 2}, "1.14.0"}, @@ -82,7 +80,6 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code, {{BuiltinOperator_SUB, 2}, "1.14.0"}, {{BuiltinOperator_SUB, 3}, "2.3.0"}, {{BuiltinOperator_SUB, 4}, kPendingReleaseVersion}, - {{BuiltinOperator_SUB, 5}, kPendingReleaseVersion}, {{BuiltinOperator_DENSIFY, 1}, "2.2.0"}, {{BuiltinOperator_DIV, 1}, "1.6.0"}, {{BuiltinOperator_DIV, 2}, "2.3.0"}, From 2a172d2081f60d8bbd281b4f4be68178557b05de Mon Sep 17 00:00:00 2001 From: Reed Date: Fri, 10 Jul 2020 17:57:39 -0700 Subject: [PATCH 0208/2522] Try to fix Windows build failure --- tensorflow/python/BUILD | 1 + tensorflow/tools/def_file_filter/symbols_pybind.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 6ca2d44093b..83448150935 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -6076,6 +6076,7 @@ pywrap_tensorflow_macro( "//tensorflow/core/profiler/internal:print_model_analysis", "//tensorflow/core/profiler/internal/cpu:python_tracer", "//tensorflow/tools/graph_transforms:transform_graph_lib", + "//tensorflow/core/platform:tf32_utils", "//tensorflow/lite/toco/python:toco_python_api", "//tensorflow/python/eager:pywrap_tfe_lib", "//tensorflow/core/util/tensor_bundle", diff --git a/tensorflow/tools/def_file_filter/symbols_pybind.txt b/tensorflow/tools/def_file_filter/symbols_pybind.txt index a95740eaa97..69ccb99c333 100644 --- a/tensorflow/tools/def_file_filter/symbols_pybind.txt +++ b/tensorflow/tools/def_file_filter/symbols_pybind.txt @@ -370,6 +370,6 @@ tensorflow::grappler::CostAnalyzer::GenerateReport tensorflow::IsXlaEnabled tensorflow::GetMlirCommonFlags -[tf32_util] # tf32 +[tf32_utils] # tf32 tensorflow::allow_tf32_execution tensorflow::tf32_execution_allowed From 428be958c1c8857a7d31906540860c4c42249a71 Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Fri, 10 Jul 2020 17:51:39 -0700 Subject: [PATCH 0209/2522] Internal change PiperOrigin-RevId: 320708573 Change-Id: I180e7122da55d564e32e9c8a4be87b05249dc19b --- RELEASE.md | 6 +----- tensorflow/python/framework/ops.py | 13 +------------ .../tools/api/golden/v1/tensorflow.-tensor.pbtxt | 2 +- tensorflow/tools/api/golden/v1/tensorflow.pbtxt | 2 +- .../tools/api/golden/v2/tensorflow.-tensor.pbtxt | 2 +- tensorflow/tools/api/golden/v2/tensorflow.pbtxt | 2 +- 6 files changed, 6 insertions(+), 21 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 69eca82c5f2..236428d4e83 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -31,11 +31,7 @@ * * * TF Core: - * - * `tf.Tensor` is now a subclass of `typing.Generic`, allowing type annotations - to be parameterized by dtype: `tf.Tensor[tf.Int32]`. This requires Python 3, - and will become fully compatible with static type checkers in the future. - + * * `tf.data`: * Added optional `exclude_cols` parameter to CsvDataset. This parameter is the complement of `select_cols`; at most one of these should be specified. diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 8a823e2e92c..4483dadecc0 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -24,7 +24,6 @@ import sys import threading import types -from typing import Generic, TypeVar import numpy as np import six from six.moves import map # pylint: disable=redefined-builtin @@ -255,19 +254,9 @@ def disable_tensor_equality(): Tensor._USE_EQUALITY = False # pylint: disable=protected-access -DataType = TypeVar("DataType", bound=dtypes.DType) - -# TODO(rahulkamat): Remove this and make Tensor a generic class -# once compatibility with Python 2 is dropped. -if sys.version_info[0] >= 3: - TensorTypeBase = Generic[DataType] -else: - TensorTypeBase = object - - # TODO(mdan): This object should subclass Symbol, not just Tensor. @tf_export("Tensor") -class Tensor(internal.NativeObject, core_tf_types.Tensor, TensorTypeBase): +class Tensor(internal.NativeObject, core_tf_types.Tensor): """A tensor is a multidimensional array of elements represented by a `tf.Tensor` object. All elements are of a single known data type. diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-tensor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-tensor.pbtxt index c4c62860836..9315973e51d 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.-tensor.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.-tensor.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" member { name: "OVERLOADABLE_OPERATORS" mtype: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt index dc09038dd5a..1fe3c40d7e4 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt @@ -258,7 +258,7 @@ tf_module { } member { name: "Tensor" - mtype: "" + mtype: "" } member { name: "TensorArray" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-tensor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-tensor.pbtxt index c4c62860836..9315973e51d 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.-tensor.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.-tensor.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" member { name: "OVERLOADABLE_OPERATORS" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt index d9795fde29c..1d56969bd15 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt @@ -66,7 +66,7 @@ tf_module { } member { name: "Tensor" - mtype: "" + mtype: "" } member { name: "TensorArray" From 1d6a0a0f7128f108bcb2da612e9a323909aee1d4 Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Fri, 10 Jul 2020 21:11:14 -0400 Subject: [PATCH 0210/2522] Fix BUILD file and add pylint comments. --- tensorflow/python/keras/benchmarks/BUILD | 2 +- .../text_classification_transformer_benchmark_test.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/BUILD b/tensorflow/python/keras/benchmarks/BUILD index 8a836fe0389..b3f36ae5041 100644 --- a/tensorflow/python/keras/benchmarks/BUILD +++ b/tensorflow/python/keras/benchmarks/BUILD @@ -108,5 +108,5 @@ py_test( deps = [ ":benchmark_util", "//tensorflow:tensorflow_py", - ] + ], ) diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py index 12d7abda441..03fac618e99 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py @@ -61,12 +61,12 @@ class TextWithTransformerBenchmark( self.max_len, self.max_feature, embed_dim) - x = embedding_layer(inputs) + x = embedding_layer(inputs) #pylint: disable=not-callable transformer_block = TransformerBlock( embed_dim, num_heads, ff_dim) - x = transformer_block(x) + x = transformer_block(x) #pylint: disable=not-callable x = tf.keras.layers.GlobalAvgPool1D()(x) x = tf.keras.layers.Dropout(0.1)(x) x = tf.keras.layers.Dense(20, activation="relu")(x) @@ -173,7 +173,7 @@ class TransformerBlock(tf.keras.layers.Layer): self.dropout2 = tf.keras.layers.Dropout(rate) def call(self, inputs, training): #pylint: disable=arguments-differ - attn_output = self.att(inputs) + attn_output = self.att(inputs) #pylint: disable=not-callable attn_output = self.dropout1(attn_output, training=training) out1 = self.layernorm1(inputs + attn_output) ffn_output = self.ffn(out1) From 484fc676b33d38fd8b3eb8d93c16b21b7eecf256 Mon Sep 17 00:00:00 2001 From: Wenhao Jia Date: Fri, 10 Jul 2020 18:28:48 -0700 Subject: [PATCH 0211/2522] Fix a comment typo. PiperOrigin-RevId: 320712223 Change-Id: I0103a005fede57bfa3bfe3ee2ceb8d02e6d741a0 --- tensorflow/core/tpu/tpu_api_dlsym_initializer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/tpu/tpu_api_dlsym_initializer.cc b/tensorflow/core/tpu/tpu_api_dlsym_initializer.cc index 383f7a445f7..320dd8c34d4 100644 --- a/tensorflow/core/tpu/tpu_api_dlsym_initializer.cc +++ b/tensorflow/core/tpu/tpu_api_dlsym_initializer.cc @@ -62,7 +62,7 @@ Status InitializeTpuLibrary(void* library_handle) { RegisterTpuPlatform(); RegisterTpuSystemDevice(); RegisterTpuNodeDevice( - /*tpu_autoclustering_flag=*/false, + /*tpu_autoclustering=*/false, /*tpu_xla_device_failure_closes_chips=*/true, /*tpu_use_substreams_for_cross_tpu_device_transfers=*/true); } From c51da68e1dbe80029b0ef93b86cf6fde4447aaa4 Mon Sep 17 00:00:00 2001 From: Xiao Yu Date: Fri, 10 Jul 2020 18:42:31 -0700 Subject: [PATCH 0212/2522] Pluggable device/op_handler support in c_api_tfrt. And it starts to reuse device name (e.g. /device:CPU:0) borrowed from TensorFlow. It also allows creating different op handler for different GPU devices. PiperOrigin-RevId: 320713554 Change-Id: Id554249713fe7571e29e8f2f36fc0986ee44e9ec --- tensorflow/c/eager/c_api.cc | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 70acd710166..6804247794b 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -725,13 +725,7 @@ void TFE_DeleteContextOptions(TFE_ContextOptions* options) { delete options; } TFE_Context* TFE_NewContext(const TFE_ContextOptions* opts, TF_Status* status) { if (opts->use_tfrt) { #ifdef PLATFORM_GOOGLE - tfrt::SmallVector op_handler_chains; - tfrt::SmallVector device_attributes; - status->status = tfrt::ListOpHandlerChains( - opts->session_options.options, &op_handler_chains, &device_attributes); - if (!status->status.ok()) return nullptr; - return tensorflow::wrap(new tfrt::ContextInterface( - op_handler_chains, device_attributes, opts->async)); + return tensorflow::wrap(new tfrt::ContextInterface(opts->async)); #else status->status = tensorflow::errors::Unimplemented("TFRT is not supported"); return nullptr; From 35526c1ae5fc56162903bc05512297fa62ac4c8d Mon Sep 17 00:00:00 2001 From: Feng Liu Date: Fri, 10 Jul 2020 18:43:22 -0700 Subject: [PATCH 0213/2522] Migrate the fixed output range trait to op interface PiperOrigin-RevId: 320713636 Change-Id: I117b34e62b07d7eb46bbdd3caa662ced12b5126d --- tensorflow/compiler/mlir/lite/ir/tfl_ops.h | 2 +- tensorflow/compiler/mlir/lite/ir/tfl_ops.td | 121 ++++++++---------- .../lite/quantization/quantization_driver.cc | 16 ++- .../lite/quantization/quantization_utils.cc | 26 +++- .../lite/quantization/quantization_utils.h | 9 +- 5 files changed, 93 insertions(+), 81 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.h b/tensorflow/compiler/mlir/lite/ir/tfl_ops.h index c7a1504c3b7..5f619503e56 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.h +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.h @@ -29,7 +29,7 @@ limitations under the License. #include "mlir/Interfaces/LoopLikeInterface.h" // from @llvm-project #include "mlir/Interfaces/SideEffectInterfaces.h" // from @llvm-project #include "mlir/Support/LLVM.h" // from @llvm-project -#include "tensorflow/compiler/mlir/lite/quantization/quantization_traits.h" +#include "tensorflow/compiler/mlir/lite/quantization/quantization_utils.h" #include "tensorflow/lite/schema/schema_generated.h" namespace mlir { diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td index 4a56d893b19..f462eee5622 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td @@ -1676,12 +1676,7 @@ def TFL_HardSwishOp: TFL_Op<"hard_swish", [ } def TFL_L2NormalizationOp : TFL_Op<"l2_normalization", [NoSideEffect, - FixedOutputRangeInterface, - // central_value = min_value / 2 + (max_value - 1) / 2 + 1 - // zero_point = central_value - // scale = 1. / (central_value - min_value) - FixedResultScale>, - FixedResultScale>]> { + FixedOutputRangeInterface]> { let summary = "L2 Normalize Operator"; let description = [{ @@ -1703,29 +1698,12 @@ def TFL_L2NormalizationOp : TFL_Op<"l2_normalization", [NoSideEffect, // FixedOutputRangeInterface: quant::UniformQuantizedType GetFixedOutputRange( bool is_signed, int bit_width) { - auto result_type = output().getType().cast(); - if (!result_type.getElementType().isa()) return {}; - Builder builder(result_type.getContext()); - - // Only support 8-bits - if (bit_width != 8) return {}; - IntegerType storage_type = builder.getIntegerType(bit_width); - - double scale = 1.0 / 128; - int64_t zero_point, storage_min, storage_max; - if (is_signed) { - zero_point = 0; - storage_min = -128; - storage_max = 127; - } else { - zero_point = 128; - storage_min = 0; - storage_max = 255; - } - - return quant::UniformQuantizedType::getChecked( - is_signed, storage_type, result_type.getElementType(), scale, - zero_point, storage_min, storage_max, builder.getUnknownLoc()); + auto result_type = output().getType(); + // central_value = min_value / 2 + (max_value - 1) / 2 + 1 + // zero_point = central_value + // scale = 1. / (central_value - min_value) + return quant::GetFixedOutputRange(is_signed, bit_width, result_type, + /*scale=*/1.0 / 128, /*zero_point=*/0); } }]; } @@ -1834,10 +1812,6 @@ def TFL_LogisticOp: TFL_Op<"logistic", [ PredOpTrait<"x and y must have same element type", TFL_TCresVTEtIsSameAsOp<0, 0>>, SameOperandsAndResultShape, - // zero_point = 0 - // scale = 1. / (max_value + 1) - FixedResultScale>, - FixedResultScale>, FixedOutputRangeInterface, TFL_GpuTargetOp]> { let summary = "Logistic operator"; @@ -1854,29 +1828,11 @@ def TFL_LogisticOp: TFL_Op<"logistic", [ // FixedOutputRangeInterface: quant::UniformQuantizedType GetFixedOutputRange( bool is_signed, int bit_width) { - auto result_type = y().getType().cast(); - if (!result_type.getElementType().isa()) return {}; - Builder builder(result_type.getContext()); - - // Only support 8-bits - if (bit_width != 8) return {}; - IntegerType storage_type = builder.getIntegerType(bit_width); - - double scale = 1.0 / 256; - int64_t zero_point, storage_min, storage_max; - if (is_signed) { - zero_point = -128; - storage_min = -128; - storage_max = 127; - } else { - zero_point = 0; - storage_min = 0; - storage_max = 255; - } - - return quant::UniformQuantizedType::getChecked( - is_signed, storage_type, result_type.getElementType(), scale, - zero_point, storage_min, storage_max, builder.getUnknownLoc()); + auto result_type = y().getType(); + // zero_point = 0 + // scale = 1. / (max_value + 1) + return quant::GetFixedOutputRange(is_signed, bit_width, result_type, + /*scale=*/1.0 / 256, /*zero_point=*/-128); } }]; } @@ -1905,10 +1861,7 @@ def TFL_LogSoftmaxOp : TFL_Op<"log_softmax", [ SameOperandsAndResultShape, PredOpTrait<"x and y must have same element type", TFL_TCresVTEtIsSameAsOp<0, 0>>, - // zero_point = max_value - // scale = -log_softmax_output_min / (max_value + 1) - FixedResultScale>, - FixedResultScale>]> { + FixedOutputRangeInterface]> { let summary = "Log softmax operator"; let description = [{ @@ -1922,6 +1875,18 @@ def TFL_LogSoftmaxOp : TFL_Op<"log_softmax", [ let results = (outs TFL_TensorOf<[F32, QUI8, QI8, TFL_Quint8]>:$output); let hasOptions = 1; + + let extraClassDeclaration = [{ + // FixedOutputRangeInterface: + quant::UniformQuantizedType GetFixedOutputRange( + bool is_signed, int bit_width) { + auto result_type = output().getType(); + // zero_point = max_value + // scale = -log_softmax_output_min / (max_value + 1) + return quant::GetFixedOutputRange(is_signed, bit_width, result_type, + /*scale=*/16.0 / 256, /*zero_point=*/127); + } + }]; } // TODO(ashwinm): Revisit the granularity of the PredOpTraits. We could @@ -2833,10 +2798,7 @@ def TFL_SoftmaxOp : TFL_Op<"softmax", [ TFL_TCresVTEtIsSameAsOp<0, 0>>, TFL_OperandHasRankRange<0, 1, 4>, SameOperandsAndResultShape, - // zero_point = 0 - // scale = 1. / (max_value + 1) - FixedResultScale>, - FixedResultScale>, + FixedOutputRangeInterface, TFL_GpuTargetOp]> { let summary = "Softmax operator"; @@ -2854,6 +2816,18 @@ def TFL_SoftmaxOp : TFL_Op<"softmax", [ let results = (outs TFL_TensorOf<[F32, QI8, QUI8, TFL_Quint8]>:$output); let hasOptions = 1; + + let extraClassDeclaration = [{ + // FixedOutputRangeInterface: + quant::UniformQuantizedType GetFixedOutputRange( + bool is_signed, int bit_width) { + auto result_type = output().getType(); + // zero_point = 0 + // scale = 1. / (max_value + 1) + return quant::GetFixedOutputRange(is_signed, bit_width, result_type, + /*scale=*/1.0 / 256, /*zero_point=*/-128); + } + }]; } def TFL_SqrtOp: TFL_Op<"sqrt", [ @@ -2959,11 +2933,7 @@ def TFL_TanhOp: TFL_Op<"tanh", [ SameOperandsAndResultShape, PredOpTrait<"input and output must have same element type", TFL_TCresVTEtIsSameAsOp<0, 0>>, - // central_value = min_value / 2 + (max_value - 1) / 2 + 1 - // zero_point = central_value - // scale = 1. / (central_value - min_value) - FixedResultScale>, - FixedResultScale>, + FixedOutputRangeInterface, TFL_GpuTargetOp]> { let summary = "Hyperbolic tangent operator"; @@ -2985,6 +2955,19 @@ def TFL_TanhOp: TFL_Op<"tanh", [ state.addTypes(input.getType()); }]> ]; + + let extraClassDeclaration = [{ + // FixedOutputRangeInterface: + quant::UniformQuantizedType GetFixedOutputRange( + bool is_signed, int bit_width) { + auto result_type = output().getType(); + // central_value = min_value / 2 + (max_value - 1) / 2 + 1 + // zero_point = central_value + // scale = 1. / (central_value - min_value) + return quant::GetFixedOutputRange(is_signed, bit_width, result_type, + /*scale=*/1.0 / 128, /*zero_point=*/0); + } + }]; } def TFL_TileOp: TFL_Op<"tile", [ diff --git a/tensorflow/compiler/mlir/lite/quantization/quantization_driver.cc b/tensorflow/compiler/mlir/lite/quantization/quantization_driver.cc index 0c9ccf1a979..9e0ad990657 100644 --- a/tensorflow/compiler/mlir/lite/quantization/quantization_driver.cc +++ b/tensorflow/compiler/mlir/lite/quantization/quantization_driver.cc @@ -794,16 +794,18 @@ bool QuantizationDriver::PropagateParams() { } // TODO(fengliuai): make the bit width configurable. - auto spec = GetQuantSpec(op); - auto key = std::make_pair(8, is_signed_); - auto &restricted_outputs = spec->restricted_output_params[key]; - for (int i = 0, e = restricted_outputs.size(); i != e; ++i) { - // The restrict can be nullptr if the result has been quantized. - if (auto params = restricted_outputs[i]) { - changed |= SetResultParams(op, i, params); + if (auto restricted = llvm::dyn_cast(op)) { + // TODO(fengliuai): different result can have different fixed range. + auto params = restricted.GetFixedOutputRange(is_signed_, /*bit_width=*/8); + for (auto i = 0; i < op->getNumResults(); ++i) { + // The range is null if the result has been quantized. + if (params) { + changed |= SetResultParams(op, i, params); + } } } + auto spec = GetQuantSpec(op); for (auto &it : spec->biases_params) { auto params = GetBiasParams(op, it.first, it.second.first, it.second.second); diff --git a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.cc b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.cc index 8f6b63b3ee6..9991d103449 100644 --- a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.cc +++ b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.cc @@ -449,7 +449,7 @@ static bool PreferResultScale(Operation* op) { // only considers the ops with restricted output params. static bool IsStatsRedundant(Operation* op, OpQuantSpecGetter op_quant_spec_getter) { - return !op_quant_spec_getter(op)->restricted_output_params.empty(); + return llvm::isa(op); } bool RemoveRedundantStatsOps(mlir::FuncOp func, @@ -469,7 +469,7 @@ bool RemoveRedundantStatsOps(mlir::FuncOp func, // Step 1: forward pass: propagate any value scales which are not produces // by `SameOperandsAndResultsScale`. Additionally, remove the value scales - // which are produced by the `restricted_output_params`. + // which are produced by the ops with the `FixedOutputRangeInterface`. // Note that we don't propagate across the multiple-operands // `SameOperandsAndResultsScale` ops like `concatenation`. func.walk( @@ -594,5 +594,27 @@ LogicalResult VerifySameScales(Operation* op) { } return success(); } + +quant::UniformQuantizedType GetFixedOutputRange(bool is_signed, int bit_width, + Type tensor_type, double scale, + int64_t zero_point, + int64_t storage_min, + int64_t storage_max) { + auto result_type = tensor_type.cast(); + if (!result_type.getElementType().isa()) return {}; + Builder builder(result_type.getContext()); + + // Only support 8-bits + if (bit_width != 8) return {}; + IntegerType storage_type = builder.getIntegerType(bit_width); + if (!is_signed) { + zero_point += 128; + storage_min += 128; + storage_max += 128; + } + return quant::UniformQuantizedType::getChecked( + is_signed, storage_type, result_type.getElementType(), scale, zero_point, + storage_min, storage_max, builder.getUnknownLoc()); +} } // namespace quant } // namespace mlir diff --git a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h index 4ced43014f5..07e5ba4e879 100644 --- a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h +++ b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h @@ -395,8 +395,6 @@ struct FoldTrivalRequantizeOp : public OpRewritePattern { llvm::SmallVector new_output_types; for (auto result : def->getResults()) { - result.getUsers().begin()->dump(); - op.dump(); if (result.hasOneUse() && *result.getUsers().begin() == op) { new_output_types.push_back(op.qtype()); } else { @@ -502,6 +500,13 @@ void ApplyQuantizationParamsPropagation(mlir::FuncOp func, bool is_signed, bool RemoveRedundantStatsOps(mlir::FuncOp func, OpQuantSpecGetter op_quant_spec_getter); +// Given quantization parameters for int8, compute the quantization parameters +// for uint if it is required, and wrap the result in an UniformQuantizedType. +quant::UniformQuantizedType GetFixedOutputRange(bool is_signed, int bit_width, + Type tensor_type, double scale, + int64_t zero_point, + int64_t storage_min = -128, + int64_t storage_max = 127); } // namespace quant } // namespace mlir From 08d78e5b92f4b2a731624592a88ce59ee4c9a1d3 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Fri, 10 Jul 2020 18:56:32 -0700 Subject: [PATCH 0214/2522] Updates a number of Keras tests to pass when kerastensors are enabled. Most of these changes are just updates to the tests themselves when the exact errors being raised/internal checks the test should be making may have changed in a way that does not affect behavior. The one behavior fix is for metrics: In head, when calling a metric on a symbolic keras input the current metric value is eagerly returned and no update is applied. W/ kerastensors before this cl a messy stack trace was raised. This cl fixes KerasTensors so they act in the same way as symbolic inputs in head: return the current metric value w/o applying any updates. In the future we may want to revisit this & make the behavior of metrics more consistent w/ standard keras layers: If a metric sees a symbolic input, return a symbolic output that represents the value of the metric. If a functional model is built from those symbolic inputs/outputs, the metric will be included as a layer that applies the update & returns the metric result at the point when it's run. PiperOrigin-RevId: 320714741 Change-Id: Ie535d8b8dcb35e65ced20d28a0e1b6d59c4c9b83 --- .../keras/engine/base_layer_utils_test.py | 18 +++-- .../python/keras/engine/training_test.py | 44 ++++++----- .../keras/engine/training_utils_test.py | 79 +++++++++++++------ .../python/keras/keras_parameterized_test.py | 8 +- tensorflow/python/keras/metrics.py | 8 +- 5 files changed, 102 insertions(+), 55 deletions(-) diff --git a/tensorflow/python/keras/engine/base_layer_utils_test.py b/tensorflow/python/keras/engine/base_layer_utils_test.py index 72a4977f003..c59e518536d 100644 --- a/tensorflow/python/keras/engine/base_layer_utils_test.py +++ b/tensorflow/python/keras/engine/base_layer_utils_test.py @@ -90,16 +90,18 @@ class OpLayerTest(keras_parameterized.TestCase): self.assertAllClose(expected, output) def test_ragged_op_layer(self): - with self.assertRaisesRegex(ValueError, 'Keras automatic op wrapping'): - int_values = keras.Input(shape=(None,), dtype=dtypes.int32, ragged=True) - float_values = math_ops.cast(int_values, dtypes.float32) - _ = keras.Model(int_values, float_values) + with testing_utils.use_keras_tensors_scope(False): + with self.assertRaisesRegex(ValueError, 'Keras automatic op wrapping'): + int_values = keras.Input(shape=(None,), dtype=dtypes.int32, ragged=True) + float_values = math_ops.cast(int_values, dtypes.float32) + _ = keras.Model(int_values, float_values) def test_sparse_op_layer(self): - with self.assertRaisesRegex(ValueError, 'Keras automatic op wrapping'): - int_values = keras.Input(shape=(None,), dtype=dtypes.int32, sparse=True) - float_values = math_ops.cast(int_values, dtypes.float32) - _ = keras.Model(int_values, float_values) + with testing_utils.use_keras_tensors_scope(False): + with self.assertRaisesRegex(ValueError, 'Keras automatic op wrapping'): + int_values = keras.Input(shape=(None,), dtype=dtypes.int32, sparse=True) + float_values = math_ops.cast(int_values, dtypes.float32) + _ = keras.Model(int_values, float_values) def test_ragged_op_layer_keras_tensors(self): with testing_utils.use_keras_tensors_scope(True): diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py index 2885422ac42..93e9b66b196 100644 --- a/tensorflow/python/keras/engine/training_test.py +++ b/tensorflow/python/keras/engine/training_test.py @@ -1644,13 +1644,21 @@ class TestExceptionsAndWarnings(keras_parameterized.TestCase): run_eagerly=testing_utils.should_run_eagerly()) @keras_parameterized.run_with_all_model_types - @keras_parameterized.run_all_keras_modes(skip_keras_tensors=True) + @keras_parameterized.run_all_keras_modes def test_sparse_op_with_op_layer(self): - inputs = layers_module.Input(shape=(2,), sparse=True, name='sparse_tensor') - output = sparse_ops.sparse_minimum(inputs, inputs) - with self.assertRaisesRegex(ValueError, - 'not supported by Keras automatic op wrapping'): - training_module.Model([inputs], output) + with testing_utils.use_keras_tensors_scope(False): + # The meaningful error is only raised w/o KerasTensors. + # It's tricky to raise the exact same error w/ KerasTensors enabled. + # We may want to add dispatching to the sparse_ops and have dispatch + # trigger on attributeerror so that these ops fully work w/ KerasTensors. + # This may need to wait until dispatch v2 + inputs = layers_module.Input( + shape=(2,), sparse=True, name='sparse_tensor') + output = sparse_ops.sparse_minimum(inputs, inputs) + with self.assertRaisesRegex( + ValueError, 'not supported by Keras automatic ' + 'op wrapping'): + training_module.Model([inputs], output) @keras_parameterized.run_all_keras_modes(always_skip_v1=True) def test_predict_error_with_empty_x(self): @@ -2906,7 +2914,7 @@ class TestTrainingWithMetrics(keras_parameterized.TestCase): scores = model.train_on_batch(x, y, sample_weight=w) self.assertArrayNear(scores, [0.3328, 0.8], 0.001) - @keras_parameterized.run_all_keras_modes(skip_keras_tensors=True) + @keras_parameterized.run_all_keras_modes def test_add_metric_with_tensor_on_model(self): x = layers_module.Input(shape=(1,)) y = layers_module.Dense(1, kernel_initializer='ones')(x) @@ -2920,11 +2928,11 @@ class TestTrainingWithMetrics(keras_parameterized.TestCase): with self.assertRaisesRegex( ValueError, 'Expected a symbolic Tensor for the metric value'): model.add_metric(mean_result, name='metric_2') - - with self.assertRaisesRegex( - ValueError, 'Using the result of calling a `Metric` object '): - with backend.get_graph().as_default(): - model.add_metric(metrics_module.Mean(name='metric_2')(y)) + else: + with self.assertRaisesRegex( + ValueError, 'Using the result of calling a `Metric` object '): + with backend.get_graph().as_default(): + model.add_metric(metrics_module.Mean(name='metric_2')(y)) model.compile( 'sgd', @@ -3021,8 +3029,7 @@ class TestTrainingWithMetrics(keras_parameterized.TestCase): self.assertEqual(history.history['metric_1'][-1], 5) self.assertAlmostEqual(history.history['val_metric_1'][-1], 5, 0) - @keras_parameterized.run_all_keras_modes(always_skip_v1=True, - skip_keras_tensors=True) + @keras_parameterized.run_all_keras_modes(always_skip_v1=True) def test_model_metrics_list(self): class LayerWithAddMetric(layers_module.Layer): @@ -3063,10 +3070,11 @@ class TestTrainingWithMetrics(keras_parameterized.TestCase): ValueError, 'Expected a symbolic Tensor for the metric value'): model.add_metric(mean_result, name='metric_4') - with self.assertRaisesRegex( - ValueError, 'Using the result of calling a `Metric` object '): - with backend.get_graph().as_default(): - model.add_metric(metrics_module.Mean(name='metric_4')(y)) + else: + with self.assertRaisesRegex( + ValueError, 'Using the result of calling a `Metric` object '): + with backend.get_graph().as_default(): + model.add_metric(metrics_module.Mean(name='metric_4')(y)) model.compile( 'sgd', diff --git a/tensorflow/python/keras/engine/training_utils_test.py b/tensorflow/python/keras/engine/training_utils_test.py index bc2c4c91268..06d26ef5088 100644 --- a/tensorflow/python/keras/engine/training_utils_test.py +++ b/tensorflow/python/keras/engine/training_utils_test.py @@ -34,6 +34,7 @@ from tensorflow.python.framework import tensor_util from tensorflow.python.keras import backend from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras import testing_utils +from tensorflow.python.keras.engine import keras_tensor from tensorflow.python.keras.engine import training_utils from tensorflow.python.keras.utils import tf_utils from tensorflow.python.platform import test @@ -54,16 +55,28 @@ class ModelInputsTest(test.TestCase): self.assertEqual(backend.floatx(), vals[0].dtype) def test_single_thing_eager(self): - with context.eager_mode(): - a = np.ones(10, dtype=np.int32) - model_inputs = training_utils.ModelInputs(a) - self.assertEqual(['input_1'], model_inputs.get_input_names()) - val = model_inputs.get_symbolic_inputs() - self.assertTrue(tf_utils.is_symbolic_tensor(val)) - vals = model_inputs.get_symbolic_inputs(return_single_as_list=True) - self.assertEqual(1, len(vals)) - self.assertTrue(tf_utils.is_symbolic_tensor(vals[0])) - self.assertEqual(dtypes.int32, vals[0].dtype) + with testing_utils.use_keras_tensors_scope(False): + with context.eager_mode(): + a = np.ones(10, dtype=np.int32) + model_inputs = training_utils.ModelInputs(a) + self.assertEqual(['input_1'], model_inputs.get_input_names()) + val = model_inputs.get_symbolic_inputs() + self.assertTrue(tf_utils.is_symbolic_tensor(val)) + vals = model_inputs.get_symbolic_inputs(return_single_as_list=True) + self.assertEqual(1, len(vals)) + self.assertTrue(tf_utils.is_symbolic_tensor(vals[0])) + self.assertEqual(dtypes.int32, vals[0].dtype) + with testing_utils.use_keras_tensors_scope(True): + with context.eager_mode(): + a = np.ones(10, dtype=np.int32) + model_inputs = training_utils.ModelInputs(a) + self.assertEqual(['input_1'], model_inputs.get_input_names()) + val = model_inputs.get_symbolic_inputs() + self.assertIsInstance(val, keras_tensor.KerasTensor) + vals = model_inputs.get_symbolic_inputs(return_single_as_list=True) + self.assertEqual(1, len(vals)) + self.assertIsInstance(vals[0], keras_tensor.KerasTensor) + self.assertEqual(dtypes.int32, vals[0].dtype) def test_list(self): a = [np.ones(10), np.ones(20)] @@ -74,13 +87,22 @@ class ModelInputsTest(test.TestCase): self.assertTrue(tensor_util.is_tensor(vals[1])) def test_list_eager(self): - with context.eager_mode(): - a = [np.ones(10), np.ones(20)] - model_inputs = training_utils.ModelInputs(a) - self.assertEqual(['input_1', 'input_2'], model_inputs.get_input_names()) - vals = model_inputs.get_symbolic_inputs() - self.assertTrue(tf_utils.is_symbolic_tensor(vals[0])) - self.assertTrue(tf_utils.is_symbolic_tensor(vals[1])) + with testing_utils.use_keras_tensors_scope(False): + with context.eager_mode(): + a = [np.ones(10), np.ones(20)] + model_inputs = training_utils.ModelInputs(a) + self.assertEqual(['input_1', 'input_2'], model_inputs.get_input_names()) + vals = model_inputs.get_symbolic_inputs() + self.assertTrue(tf_utils.is_symbolic_tensor(vals[0])) + self.assertTrue(tf_utils.is_symbolic_tensor(vals[1])) + with testing_utils.use_keras_tensors_scope(True): + with context.eager_mode(): + a = [np.ones(10), np.ones(20)] + model_inputs = training_utils.ModelInputs(a) + self.assertEqual(['input_1', 'input_2'], model_inputs.get_input_names()) + vals = model_inputs.get_symbolic_inputs() + self.assertIsInstance(vals[0], keras_tensor.KerasTensor) + self.assertIsInstance(vals[1], keras_tensor.KerasTensor) def test_dict(self): a = {'b': np.ones(10), 'a': np.ones(20)} @@ -91,13 +113,22 @@ class ModelInputsTest(test.TestCase): self.assertTrue(tensor_util.is_tensor(vals['b'])) def test_dict_eager(self): - with context.eager_mode(): - a = {'b': np.ones(10), 'a': np.ones(20)} - model_inputs = training_utils.ModelInputs(a) - self.assertEqual(['a', 'b'], model_inputs.get_input_names()) - vals = model_inputs.get_symbolic_inputs() - self.assertTrue(tf_utils.is_symbolic_tensor(vals['a'])) - self.assertTrue(tf_utils.is_symbolic_tensor(vals['b'])) + with testing_utils.use_keras_tensors_scope(False): + with context.eager_mode(): + a = {'b': np.ones(10), 'a': np.ones(20)} + model_inputs = training_utils.ModelInputs(a) + self.assertEqual(['a', 'b'], model_inputs.get_input_names()) + vals = model_inputs.get_symbolic_inputs() + self.assertTrue(tf_utils.is_symbolic_tensor(vals['a'])) + self.assertTrue(tf_utils.is_symbolic_tensor(vals['b'])) + with testing_utils.use_keras_tensors_scope(True): + with context.eager_mode(): + a = {'b': np.ones(10), 'a': np.ones(20)} + model_inputs = training_utils.ModelInputs(a) + self.assertEqual(['a', 'b'], model_inputs.get_input_names()) + vals = model_inputs.get_symbolic_inputs() + self.assertIsInstance(vals['a'], keras_tensor.KerasTensor) + self.assertIsInstance(vals['b'], keras_tensor.KerasTensor) class DatasetUtilsTest(test.TestCase, parameterized.TestCase): diff --git a/tensorflow/python/keras/keras_parameterized_test.py b/tensorflow/python/keras/keras_parameterized_test.py index 9bddc6608ff..33c68df62c4 100644 --- a/tensorflow/python/keras/keras_parameterized_test.py +++ b/tensorflow/python/keras/keras_parameterized_test.py @@ -269,8 +269,8 @@ class KerasParameterizedTest(keras_parameterized.TestCase): self.assertLen(l, 4) self.assertAllEqual(l, [ ("graph", False, False), - ("eager", True, False), - ("eager", False, False), + ("eager", True, keras_tensor._KERAS_TENSORS_ENABLED), + ("eager", False, keras_tensor._KERAS_TENSORS_ENABLED), ("eager", False, True), ]) @@ -281,8 +281,8 @@ class KerasParameterizedTest(keras_parameterized.TestCase): else: self.assertLen(l, 3) self.assertAllEqual(l, [ - ("eager", True, False), - ("eager", False, False), + ("eager", True, keras_tensor._KERAS_TENSORS_ENABLED), + ("eager", False, keras_tensor._KERAS_TENSORS_ENABLED), ("eager", False, True), ]) diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py index cb24e3f0fc8..0d154dd25d1 100644 --- a/tensorflow/python/keras/metrics.py +++ b/tensorflow/python/keras/metrics.py @@ -39,6 +39,7 @@ from tensorflow.python.framework import tensor_spec from tensorflow.python.keras import backend as K from tensorflow.python.keras.engine import base_layer from tensorflow.python.keras.engine import base_layer_utils +from tensorflow.python.keras.engine import keras_tensor from tensorflow.python.keras.losses import binary_crossentropy from tensorflow.python.keras.losses import categorical_crossentropy from tensorflow.python.keras.losses import categorical_hinge @@ -208,7 +209,12 @@ class Metric(base_layer.Layer): def replica_local_fn(*args, **kwargs): """Updates the state of the metric in a replica-local context.""" - update_op = self.update_state(*args, **kwargs) # pylint: disable=not-callable + if any( + isinstance(arg, keras_tensor.KerasTensor) + for arg in nest.flatten((args, kwargs))): + update_op = None + else: + update_op = self.update_state(*args, **kwargs) # pylint: disable=not-callable update_ops = [] if update_op is not None: update_ops.append(update_op) From 5ede3172764c08c2d1d8c80c0d621cc6ce87c8e1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Jul 2020 19:22:27 -0700 Subject: [PATCH 0215/2522] Add legalization for FusedBatchNormV2. PiperOrigin-RevId: 320716791 Change-Id: I79c5592873ca0dd2a521bb7a549154d3df06a267 --- .../mlir/tensorflow/ir/tf_generated_ops.td | 32 +++-- .../compiler/mlir/tensorflow/ir/tf_ops.cc | 67 +++++++---- .../compiler/mlir/xla/tests/legalize-tf.mlir | 22 ++++ .../mlir/xla/transforms/legalize_tf.cc | 113 +++++++++++------- 4 files changed, 159 insertions(+), 75 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index a0e73f116cf..7bbdce6b985 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -3540,7 +3540,7 @@ The size of 1D Tensors matches the dimension C of the 4D Tensors. }]; } -def TF_FusedBatchNormV3Op : TF_Op<"FusedBatchNormV3", [NoSideEffect, TF_FoldOperandsTransposeInterface, TF_LayoutSensitiveInterface]> { +class TF_FusedBatchNormOpBase : TF_Op { let summary = "Batch normalization."; let description = [{ @@ -3561,15 +3561,6 @@ The size of 1D Tensors matches the dimension C of the 4D Tensors. DefaultValuedAttr:$is_training ); - let results = (outs - TensorOf<[BF16, F16, F32]>:$y, - F32Tensor:$batch_mean, - F32Tensor:$batch_variance, - F32Tensor:$reserve_space_1, - F32Tensor:$reserve_space_2, - F32Tensor:$reserve_space_3 - ); - TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; TF_DerivedOperandTypeAttr U = TF_DerivedOperandTypeAttr<1>; @@ -3585,6 +3576,27 @@ The size of 1D Tensors matches the dimension C of the 4D Tensors. }]; } +def TF_FusedBatchNormV2Op : TF_FusedBatchNormOpBase<"FusedBatchNormV2"> { + let results = (outs + TensorOf<[BF16, F16, F32]>:$y, + F32Tensor:$batch_mean, + F32Tensor:$batch_variance, + F32Tensor:$reserve_space_1, + F32Tensor:$reserve_space_2 + ); +} + +def TF_FusedBatchNormV3Op : TF_FusedBatchNormOpBase<"FusedBatchNormV3"> { + let results = (outs + TensorOf<[BF16, F16, F32]>:$y, + F32Tensor:$batch_mean, + F32Tensor:$batch_variance, + F32Tensor:$reserve_space_1, + F32Tensor:$reserve_space_2, + F32Tensor:$reserve_space_3 + ); +} + def TF_GatherOp : TF_Op<"Gather", [NoSideEffect]> { let summary = "Gather slices from `params` according to `indices`."; diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc index de6ce2d313a..101de17122a 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc @@ -1977,13 +1977,55 @@ static LogicalResult Verify(FusedBatchNormOp op) { return success(); } -LogicalResult FusedBatchNormV3Op::FoldOperandsPermutation( - ArrayRef permutation) { +//===----------------------------------------------------------------------===// +// FusedBatchNormV2Op / FusedBatchNormV3Op +//===----------------------------------------------------------------------===// + +template +static LogicalResult InferenceFoldOperandsPermutation( + ArrayRef permutation, Op *op) { // FusedBatchNorm in training mode is a layout sentitive operation, and should // have already assigned an optimal data format. - if (is_training()) return failure(); + if (op->is_training()) return failure(); + return ::mlir::TF::FoldOperandsPermutation(permutation, op); +} - return ::mlir::TF::FoldOperandsPermutation(permutation, this); +template +static StringRef GetOptimalLayout(const RuntimeDevices &devices, Op *op) { + // In inference mode FusedBatchNorm is not sensitive to data layout. + if (!op->is_training()) return op->data_format(); + + // Keep current data format if no GPUs are available or if explicit placement + // does not allow to use GPU for this operation. + if (!CanUseGpuDevice(devices) || !CanUseGpuDevice(op->getOperation())) + return op->data_format(); + + // For f16 data type on devices with Tensor Cores support NHWC data format + // is up to ~2x faster. + auto x_ty = op->x().getType().template cast(); + const bool is_f16 = x_ty.getElementType().isF16(); + if (is_f16 && CanUseTensorCores(devices)) return "NHWC"; + + // For all other data types prefer NCHW. + return "NCHW"; +} + +LogicalResult FusedBatchNormV2Op::FoldOperandsPermutation( + ArrayRef permutation) { + return ::mlir::TF::InferenceFoldOperandsPermutation(permutation, this); +} + +LogicalResult FusedBatchNormV2Op::UpdateDataFormat(StringRef data_format) { + return ::mlir::TF::UpdateDataFormat(data_format, this); +} + +StringRef FusedBatchNormV2Op::GetOptimalLayout(const RuntimeDevices &devices) { + return ::mlir::TF::GetOptimalLayout(devices, this); +} + +LogicalResult FusedBatchNormV3Op::FoldOperandsPermutation( + ArrayRef permutation) { + return ::mlir::TF::InferenceFoldOperandsPermutation(permutation, this); } LogicalResult FusedBatchNormV3Op::UpdateDataFormat(StringRef data_format) { @@ -1991,22 +2033,7 @@ LogicalResult FusedBatchNormV3Op::UpdateDataFormat(StringRef data_format) { } StringRef FusedBatchNormV3Op::GetOptimalLayout(const RuntimeDevices &devices) { - // In inference mode FusedBatchNorm is not sensitive to data layout. - if (!is_training()) return data_format(); - - // Keep current data format if no GPUs are available or if explicit placement - // does not allow to use GPU for this operation. - if (!CanUseGpuDevice(devices) || !CanUseGpuDevice(getOperation())) - return data_format(); - - // For f16 data type on devices with Tensor Cores support NHWC data format - // is up to ~2x faster. - auto x_ty = x().getType().cast(); - const bool is_f16 = x_ty.getElementType().isF16(); - if (is_f16 && CanUseTensorCores(devices)) return "NHWC"; - - // For all other data types prefer NCHW. - return "NCHW"; + return ::mlir::TF::GetOptimalLayout(devices, this); } //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir index 221fa19f77c..28a31058e99 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir @@ -26,6 +26,28 @@ func @fusedBatchNorm_training(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8xf32>, return %0#0 : tensor<8x8x8x8xf32> } +// fusedBatchNormV2 is almost identical to fusedBatchNormV3 (and uses the same +// code), so only do a couple of basic checks. + +// CHECK-LABEL: fusedBatchNormV2_noTraining +func @fusedBatchNormV2_noTraining(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8xf32>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>) -> (tensor<8x8x8x8xf32>) { + // CHECK: "mhlo.batch_norm_inference"({{.*}}, %arg1, %arg2, %arg3, %arg4) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> tensor<8x8x8x8xf32> + %0:5 = "tf.FusedBatchNormV2"(%arg0, %arg1, %arg2, %arg3, %arg4) {T = "tfdtype$DT_FLOAT", data_format = "NHWC", epsilon = 0.001 : f32, is_training = false} : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) + return %0#0 : tensor<8x8x8x8xf32> +} + +// CHECK-LABEL: fusedBatchNormV2_training +func @fusedBatchNormV2_training(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8xf32>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>) -> (tensor<8x8x8x8xf32>) { + // CHECK: %[[RESULT0:.*]] = "mhlo.batch_norm_training"({{.*}}, %arg1, %arg2) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>) -> tuple, tensor<8xf32>, tensor<8xf32>> + %0:5 = "tf.FusedBatchNormV2"(%arg0, %arg1, %arg2, %arg3, %arg4) {T = "tfdtype$DT_FLOAT", data_format = "NHWC", epsilon = 0.001 : f32, exponential_avg_factor = 1.0 : f32, is_training = true} : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) + // CHECK: "mhlo.get_tuple_element"(%[[RESULT0]]) {index = 0 : i32} : (tuple, tensor<8xf32>, tensor<8xf32>>) -> tensor<8x8x8x8xf32> + // CHECK: "mhlo.get_tuple_element"(%[[RESULT0]]) {index = 1 : i32} : (tuple, tensor<8xf32>, tensor<8xf32>>) -> tensor<8xf32> + // CHECK: %[[VAR:.*]] = "mhlo.get_tuple_element"(%[[RESULT0]]) {index = 2 : i32} : (tuple, tensor<8xf32>, tensor<8xf32>>) -> tensor<8xf32> + // CHECK: mhlo.constant + // CHECK: chlo.broadcast_multiply %[[VAR]], {{.*}} : (tensor<8xf32>, tensor) -> tensor<8xf32> + return %0#0 : tensor<8x8x8x8xf32> +} + // CHECK-LABEL: fusedBatchNormV3_noTraining func @fusedBatchNormV3_noTraining(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8xf32>, %arg2: tensor<8xf32>, %arg3: tensor<8xf32>, %arg4: tensor<8xf32>) -> (tensor<8x8x8x8xf32>) { // CHECK: "mhlo.batch_norm_inference"({{.*}}, %arg1, %arg2, %arg3, %arg4) {epsilon = 1.000000e-03 : f32, feature_index = 3 : i64} : (tensor<8x8x8x8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>, tensor<8xf32>) -> tensor<8x8x8x8xf32> diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc index 23f11cef4d9..e498980572b 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc @@ -1531,23 +1531,23 @@ using ConvertFusedBatchNormGradV3Op = // Converts TensorFlow FusedBatchNormV3Op to either HLO BatchNormTrainingOp or // HLO BatchNormInferenceOp, depending on the value of the 'is_training' // parameter. -class ConvertFusedBatchNormV3Op - : public OpRewritePattern { +template +class ConvertFusedBatchNormBase : public OpRewritePattern { public: - using OpRewritePattern::OpRewritePattern; + using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(TF::FusedBatchNormV3Op op, + LogicalResult matchAndRewrite(FusedBatchNormOpT op, PatternRewriter &rewriter) const override { auto feature_dim = getFeatureDimensionAttr(rewriter, op.data_formatAttr(), op.x()); - auto input_type_tensor = op.x().getType().cast(); + auto input_type_tensor = op.x().getType().template cast(); auto input_element_type = input_type_tensor.getElementType(); - auto scale_type_tensor = op.scale().getType().cast(); + auto scale_type_tensor = op.scale().getType().template cast(); auto scale_element_type = scale_type_tensor.getElementType(); - auto mean_type_tensor = op.mean().getType().cast(); + auto mean_type_tensor = op.mean().getType().template cast(); auto mean_element_type = mean_type_tensor.getElementType(); // In the training case, dimensions of input tensors must be static. if (op.is_training() && (!input_type_tensor.hasStaticShape() || @@ -1561,7 +1561,7 @@ class ConvertFusedBatchNormV3Op Value bn_train_input = rewriter.create(op.getLoc(), op.x(), scale_element_type); TensorType bn_train_input_type_tensor = - bn_train_input.getType().cast(); + bn_train_input.getType().template cast(); if (op.is_training()) { // Training case. @@ -1643,17 +1643,25 @@ class ConvertFusedBatchNormV3Op /*broadcast_dimensions=*/DenseIntElementsAttr()); } - // TF FusedBatchNormV3 op expects 5 outputs. Outputs 3 and 4 are - // currently marked as "reserved spaces 1 and 2". They are used to - // pass the per-batch mean and variance to the gradiant. Here we - // maintain the same behavior by setting them to the mean and variance - // calculated by BatchNormTraining. Output 5 is unused; it doesn't - // matter what we pass there. - rewriter.replaceOp(op, {y_out, /*batch_mean=*/batch_mean, - /*batch_variance=*/corrected_variance, - /*reserve_space_1=*/reserve_space_1, - /*reserve_space_2=*/batch_variance, - /*reserve_space_3=*/op.x()}); + if (std::is_same::value) { + // FusedBatchNormV2 expects 4 outputs. + // Outputs 3 and 4 are currently marked as "reserved spaces 1 and 2". + // They are used to pass the per-batch mean and variance to the + // gradiant. Here we maintain the same behavior by setting them to the + // mean and variance calculated by BatchNormTraining. + rewriter.replaceOp(op, {y_out, /*batch_mean=*/batch_mean, + /*batch_variance=*/corrected_variance, + /*reserve_space_1=*/reserve_space_1, + /*reserve_space_2=*/batch_variance}); + } else { // TF::FusedBatchNormV3Op + // FusedBatchNormV3 expects a 5th output, but the output is unused; it + // doesn't matter what we pass there. + rewriter.replaceOp(op, {y_out, /*batch_mean=*/batch_mean, + /*batch_variance=*/corrected_variance, + /*reserve_space_1=*/reserve_space_1, + /*reserve_space_2=*/batch_variance, + /*reserve_space_3=*/op.x()}); + } } else { // Inference case. auto bn_train_op = rewriter.create( op.getLoc(), @@ -1670,31 +1678,45 @@ class ConvertFusedBatchNormV3Op // not used for inference. It doesn't matter what values we provide for // the last 5 results as long as they are of the same type. Forward // input mean and variance to output mean, variance, reserved_space_1 and - // reserver_space_2. Create a constant tensor to forward to last - // reserve_space_3 output. - auto reserve_space_3_type = op.getResult(5).getType().cast(); - int num_elements = reserve_space_3_type.hasStaticShape() - ? reserve_space_3_type.getNumElements() - : 0; - auto const_attr_type = RankedTensorType::get( - {num_elements}, getElementTypeOrSelf(reserve_space_3_type)); - - Value dummy_const = rewriter.create( - op.getLoc(), DenseElementsAttr::get(const_attr_type, 0.0)); - if (const_attr_type != reserve_space_3_type) - dummy_const = rewriter.create( - op.getLoc(), reserve_space_3_type, dummy_const); - rewriter.replaceOp(op, {/*y=*/y_out, - /*batch_mean=*/op.mean(), - /*batch_variance=*/op.variance(), - /*reserve_space_1=*/op.mean(), - /*reserve_space_2=*/op.variance(), - /*reserve_space_3=*/dummy_const}); + // reserved_space_2. + if (std::is_same::value) { + rewriter.replaceOp(op, {/*y=*/y_out, + /*batch_mean=*/op.mean(), + /*batch_variance=*/op.variance(), + /*reserve_space_1=*/op.mean(), + /*reserve_space_2=*/op.variance()}); + } else { + // For FusedBatchNormV3Op, also create a constant tensor to forward to + // last reserve_space_3 output. + auto reserve_space_3_type = + op.getResult(5).getType().template cast(); + int num_elements = reserve_space_3_type.hasStaticShape() + ? reserve_space_3_type.getNumElements() + : 0; + auto const_attr_type = RankedTensorType::get( + {num_elements}, getElementTypeOrSelf(reserve_space_3_type)); + Value dummy_const = rewriter.create( + op.getLoc(), DenseElementsAttr::get(const_attr_type, 0.0)); + if (const_attr_type != reserve_space_3_type) + dummy_const = rewriter.create( + op.getLoc(), reserve_space_3_type, dummy_const); + rewriter.replaceOp(op, {/*y=*/y_out, + /*batch_mean=*/op.mean(), + /*batch_variance=*/op.variance(), + /*reserve_space_1=*/op.mean(), + /*reserve_space_2=*/op.variance(), + /*reserve_space_3=*/dummy_const}); + } } return success(); } }; +using ConvertFusedBatchNormV2Op = + ConvertFusedBatchNormBase; +using ConvertFusedBatchNormV3Op = + ConvertFusedBatchNormBase; + using PaddingArray = std::vector>; @@ -5481,12 +5503,13 @@ LogicalResult legalizeTF(Operation *op, bool allow_partial_conversion, ConvertConv2DBackpropInputOp, ConvertConv3DBackpropInputOp, ConvertCumsumOp, ConvertDiagPartOp, ConvertEinsumOp, ConvertFusedBatchNormGradOp, ConvertFusedBatchNormGradV2Op, - ConvertFusedBatchNormGradV3Op, ConvertFusedBatchNormV3Op, - ConvertInfeedDequeueTupleOp, ConvertInplaceUpdateOp, ConvertLinSpaceOp, - ConvertMaxOp, ConvertMinOp, ConvertAvgPoolOp, ConvertAvgPool2DGradOp, - ConvertAvgPool3DGradOp, ConvertMaxPool2DOp, ConvertMaxPool3DOp, - ConvertMaxPool2DGradOp, ConvertMaxPool3DGradOp, ConvertMeanOp, - ConvertOneHotOp, ConvertOutfeedEnqueueTupleOp, ConvertProdOp, ConvertQrOp, + ConvertFusedBatchNormGradV3Op, ConvertFusedBatchNormV2Op, + ConvertFusedBatchNormV3Op, ConvertInfeedDequeueTupleOp, + ConvertInplaceUpdateOp, ConvertLinSpaceOp, ConvertMaxOp, ConvertMinOp, + ConvertAvgPoolOp, ConvertAvgPool2DGradOp, ConvertAvgPool3DGradOp, + ConvertMaxPool2DOp, ConvertMaxPool3DOp, ConvertMaxPool2DGradOp, + ConvertMaxPool3DGradOp, ConvertMeanOp, ConvertOneHotOp, + ConvertOutfeedEnqueueTupleOp, ConvertProdOp, ConvertQrOp, ConvertDynamicRangeOp, ConvertRangeOp, ConvertSelectV2Op, ConvertSigmoidOp, ConvertShapeOp, ConvertSizeOp, ConvertSoftmaxOp, From dfae6ae7b7f533a6b063796e2a69ee5c0479dc75 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Fri, 10 Jul 2020 19:25:10 -0700 Subject: [PATCH 0216/2522] Execute globals() and locals() in the correct context in converted code. PiperOrigin-RevId: 320716956 Change-Id: If1eae7e65edbfb965edf73fa27d21bfc7bea8f18 --- tensorflow/python/autograph/impl/api.py | 4 + tensorflow/python/autograph/operators/BUILD | 20 +-- .../python/autograph/operators/py_builtins.py | 10 ++ .../operators/py_builtins_py3_test.py | 123 --------------- .../autograph/operators/py_builtins_test.py | 146 +++++++++++++++++- 5 files changed, 156 insertions(+), 147 deletions(-) delete mode 100644 tensorflow/python/autograph/operators/py_builtins_py3_test.py diff --git a/tensorflow/python/autograph/impl/api.py b/tensorflow/python/autograph/impl/api.py index 8c7093c864d..fb318e71cd9 100644 --- a/tensorflow/python/autograph/impl/api.py +++ b/tensorflow/python/autograph/impl/api.py @@ -377,6 +377,10 @@ def converted_call(f, return py_builtins.eval_in_original_context(f, args, caller_fn_scope) if f is super: return py_builtins.super_in_original_context(f, args, caller_fn_scope) + if f is globals: + return py_builtins.globals_in_original_context(caller_fn_scope) + if f is locals: + return py_builtins.locals_in_original_context(caller_fn_scope) if kwargs: return py_builtins.overload_of(f)(*args, **kwargs) else: diff --git a/tensorflow/python/autograph/operators/BUILD b/tensorflow/python/autograph/operators/BUILD index 5f644ea525d..77d02e69976 100644 --- a/tensorflow/python/autograph/operators/BUILD +++ b/tensorflow/python/autograph/operators/BUILD @@ -120,8 +120,7 @@ py_test( name = "py_builtins_test", srcs = ["py_builtins_test.py"], python_version = "PY3", - srcs_version = "PY2AND3", - tags = ["no_windows"], + srcs_version = "PY3", deps = [ ":operators", "//tensorflow/python:client_testlib", @@ -133,23 +132,6 @@ py_test( ], ) -py_test( - name = "py_builtins_py3_test", - srcs = ["py_builtins_py3_test.py"], - python_version = "PY3", - srcs_version = "PY3", - tags = [ - "no_windows", - # TODO(kkb): Temporay workaround since KokoroPresubmit was failing. - # cl/259400943 for more context. - "no_oss_py2", - ], - deps = [ - ":operators", - "//tensorflow/python:client_testlib", - ], -) - py_test( name = "slices_test", srcs = ["slices_test.py"], diff --git a/tensorflow/python/autograph/operators/py_builtins.py b/tensorflow/python/autograph/operators/py_builtins.py index 4dbe25aec6d..f86668c12f0 100644 --- a/tensorflow/python/autograph/operators/py_builtins.py +++ b/tensorflow/python/autograph/operators/py_builtins.py @@ -89,6 +89,16 @@ def _find_originating_frame(caller_fn_scope, innermost=True): return result +def locals_in_original_context(caller_fn_scope): + """Executes the locals function in the context of a specified function.""" + return _find_originating_frame(caller_fn_scope, innermost=True).f_locals + + +def globals_in_original_context(caller_fn_scope): + """Executes the locals function in the context of a specified function.""" + return _find_originating_frame(caller_fn_scope, innermost=True).f_globals + + def eval_in_original_context(f, args, caller_fn_scope): """Executes the eval function in the context of a specified function.""" # When control flow is rewritten using functions, eval should use the diff --git a/tensorflow/python/autograph/operators/py_builtins_py3_test.py b/tensorflow/python/autograph/operators/py_builtins_py3_test.py deleted file mode 100644 index 11a33b90b75..00000000000 --- a/tensorflow/python/autograph/operators/py_builtins_py3_test.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for py_builtins_py3 module.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.autograph.core import converter -from tensorflow.python.autograph.core import function_wrappers -from tensorflow.python.autograph.operators import py_builtins -from tensorflow.python.platform import test - - -class TestBaseClass(object): - - def overridden_method(self, x): - return x + 20 - - -class PyBuiltinsTest(test.TestCase): - - def _basic_function_scope(self): - return function_wrappers.FunctionScope( - 'test_function_name', - 'test_scope', # Note: this must match the name in the `with` statement. - converter.ConversionOptions()) - - def test_super_in_original_context_niladic_call(self): - test_case_self = self - - class TestSubclass(TestBaseClass): - - def overridden_method(self, x): - test_case_self.fail('This should never be called.') - - def test_method(self): - with test_case_self._basic_function_scope() as test_scope: - b = py_builtins.super_in_original_context(super, (), test_scope) - return b.overridden_method(1) - - tc = TestSubclass() - self.assertEqual(tc.test_method(), 21) - - def test_super_in_original_context_caller_with_locals(self): - test_case_self = self - - class TestSubclass(TestBaseClass): - - def overridden_method(self, x): - test_case_self.fail('This should never be called.') - - def test_method(self, x): - y = 7 - with test_case_self._basic_function_scope() as test_scope: - z = 7 - return py_builtins.super_in_original_context( - super, (), test_scope).overridden_method(x + y - z) - - tc = TestSubclass() - self.assertEqual(tc.test_method(1), 21) - - def test_super_in_original_context_inner_function(self): - test_case_self = self - - class TestSubclass(TestBaseClass): - - def overridden_method(self, x): - test_case_self.fail('This should never be called.') - - def test_method(self, x): - with test_case_self._basic_function_scope() as test_scope: - # Oddly, it's sufficient to use `self` in an inner function - # to gain access to __class__ in this scope. - # TODO(mdan): Is this true across implementations? - # Note: normally, it's illegal to use super() in inner functions (it - # throws an error), but the generated code may create them. - def inner_fn(): - return py_builtins.super_in_original_context( - super, (), test_scope).overridden_method(x) - - return inner_fn() - - tc = TestSubclass() - self.assertEqual(tc.test_method(1), 21) - - def test_super_in_original_context_inner_lambda(self): - test_case_self = self - - class TestSubclass(TestBaseClass): - - def overridden_method(self, x): - test_case_self.fail('This should never be called.') - - def test_method(self, x): - with test_case_self._basic_function_scope() as test_scope: - # Oddly, it's sufficient to use `self` in an inner function - # to gain access to __class__ in this scope. - # TODO(mdan): Is this true across implementations? - # Note: normally, it's illegal to use super() in inner functions (it - # throws an error), but the generated code may create them. - l = lambda: py_builtins.super_in_original_context( # pylint:disable=g-long-lambda - super, (), test_scope).overridden_method(x) - return l() - - tc = TestSubclass() - self.assertEqual(tc.test_method(1), 21) - - -if __name__ == '__main__': - test.main() diff --git a/tensorflow/python/autograph/operators/py_builtins_test.py b/tensorflow/python/autograph/operators/py_builtins_test.py index 43feb0d9bc3..8dab5c2522c 100644 --- a/tensorflow/python/autograph/operators/py_builtins_test.py +++ b/tensorflow/python/autograph/operators/py_builtins_test.py @@ -40,10 +40,11 @@ from tensorflow.python.platform import test class TestBase(object): - def plus_twenty(self, x): + def overridden_method(self, x): return x + 20 +@test_util.run_all_in_graph_and_eager_modes class PyBuiltinsTest(test.TestCase): def test_abs(self): @@ -400,12 +401,67 @@ class PyBuiltinsTest(test.TestCase): self.assertEqual(test_fn(), 2) + def test_locals_in_original_context(self): + + def test_fn(): + l = 1 # pylint:disable=unused-variable + with self._basic_function_scope() as test_scope: + return py_builtins.locals_in_original_context(test_scope) + + locs = test_fn() + + self.assertEqual(locs['l'], 1) + + def test_locals_in_original_context_inner_function(self): + + def test_fn(): + l = 1 # pylint:disable=unused-variable + with self._basic_function_scope() as test_scope: + + def inner_fn(): + # Note: a user function without a top-level function scope should + # never be found in user code; it's only possible in generated code. + l = 2 # pylint:disable=unused-variable + return py_builtins.locals_in_original_context(test_scope) + + return inner_fn() + + locs = test_fn() + + self.assertEqual(locs['l'], 2) + + def test_globals_in_original_context(self): + + def test_fn(): + with self._basic_function_scope() as test_scope: + return py_builtins.globals_in_original_context(test_scope) + + globs = test_fn() + + self.assertIs(globs['TestBase'], TestBase) + + def test_globals_in_original_context_inner_function(self): + + def test_fn(): + with self._basic_function_scope() as test_scope: + + def inner_fn(): + # Note: a user function without a top-level function scope should + # never be found in user code; it's only possible in generated code. + return py_builtins.globals_in_original_context(test_scope) + + return inner_fn() + + globs = test_fn() + + self.assertIs(globs['TestBase'], TestBase) + def test_super_in_original_context_unary_call(self): test_case_self = self class TestSubclass(TestBase): - def plus_twenty(self, x): + def overridden_method(self, x): test_case_self.fail('This should never be called.') def test_method(self): @@ -413,7 +469,7 @@ class PyBuiltinsTest(test.TestCase): test_base_unbound = py_builtins.super_in_original_context( super, (TestSubclass,), test_scope) test_base = test_base_unbound.__get__(self, TestSubclass) - return test_base.plus_twenty(1) + return test_base.overridden_method(1) tc = TestSubclass() self.assertEqual(tc.test_method(), 21) @@ -423,18 +479,98 @@ class PyBuiltinsTest(test.TestCase): class TestSubclass(TestBase): - def plus_twenty(self, x): + def overridden_method(self, x): test_case_self.fail('This should never be called.') def test_method(self): with test_case_self._basic_function_scope() as test_scope: test_base = py_builtins.super_in_original_context( super, (TestSubclass, self), test_scope) - return test_base.plus_twenty(1) + return test_base.overridden_method(1) tc = TestSubclass() self.assertEqual(tc.test_method(), 21) + def test_super_in_original_context_niladic_call(self): + test_case_self = self + + class TestSubclass(TestBase): + + def overridden_method(self, x): + test_case_self.fail('This should never be called.') + + def test_method(self): + with test_case_self._basic_function_scope() as test_scope: + b = py_builtins.super_in_original_context(super, (), test_scope) + return b.overridden_method(1) + + tc = TestSubclass() + self.assertEqual(tc.test_method(), 21) + + def test_super_in_original_context_caller_with_locals(self): + test_case_self = self + + class TestSubclass(TestBase): + + def overridden_method(self, x): + test_case_self.fail('This should never be called.') + + def test_method(self, x): + y = 7 + with test_case_self._basic_function_scope() as test_scope: + z = 7 + return py_builtins.super_in_original_context( + super, (), test_scope).overridden_method(x + y - z) + + tc = TestSubclass() + self.assertEqual(tc.test_method(1), 21) + + def test_super_in_original_context_inner_function(self): + test_case_self = self + + class TestSubclass(TestBase): + + def overridden_method(self, x): + test_case_self.fail('This should never be called.') + + def test_method(self, x): + with test_case_self._basic_function_scope() as test_scope: + # Oddly, it's sufficient to use `self` in an inner function + # to gain access to __class__ in this scope. + # TODO(mdan): Is this true across implementations? + # Note: normally, it's illegal to use super() in inner functions (it + # throws an error), but the generated code may create them. + def inner_fn(): + return py_builtins.super_in_original_context( + super, (), test_scope).overridden_method(x) + + return inner_fn() + + tc = TestSubclass() + self.assertEqual(tc.test_method(1), 21) + + def test_super_in_original_context_inner_lambda(self): + test_case_self = self + + class TestSubclass(TestBase): + + def overridden_method(self, x): + test_case_self.fail('This should never be called.') + + def test_method(self, x): + with test_case_self._basic_function_scope() as test_scope: + # Oddly, it's sufficient to use `self` in an inner function + # to gain access to __class__ in this scope. + # TODO(mdan): Is this true across implementations? + # Note: normally, it's illegal to use super() in inner functions (it + # throws an error), but the generated code may create them. + l = lambda: py_builtins.super_in_original_context( # pylint:disable=g-long-lambda + super, (), test_scope).overridden_method(x) + return l() + + tc = TestSubclass() + self.assertEqual(tc.test_method(1), 21) + def test_filter(self): self.assertListEqual( list(py_builtins.filter_(lambda x: x == 'b', ['a', 'b', 'c'])), ['b']) From 63e31d95084cdc8ff04022afca61dc4bce414873 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Fri, 10 Jul 2020 19:26:41 -0700 Subject: [PATCH 0217/2522] Load source code of lambdas in a way that works in interactive shells like Jupyter. PiperOrigin-RevId: 320717062 Change-Id: I0a33bd2da3e0acb2879bf6cc5784f1be11dd2619 --- tensorflow/python/autograph/pyct/parser.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/autograph/pyct/parser.py b/tensorflow/python/autograph/pyct/parser.py index 5b369270ac7..51523cbc642 100644 --- a/tensorflow/python/autograph/pyct/parser.py +++ b/tensorflow/python/autograph/pyct/parser.py @@ -22,6 +22,7 @@ from __future__ import division from __future__ import print_function import inspect +import linecache import re import sys import textwrap @@ -183,7 +184,6 @@ def _without_context(node, lines, minl, maxl): if end_col_offset is not None: # This is only available in 3.8. code_lines[-1] = code_lines[-1][:end_col_offset] - code_block = '\n'.join(lines[minl - 1:maxl]) col_offset = getattr(node, 'col_offset', None) if col_offset is None: @@ -195,7 +195,7 @@ def _without_context(node, lines, minl, maxl): if col_offset is not None: code_lines[0] = code_lines[0][col_offset:] - code_block = '\n'.join(code_lines) + code_block = '\n'.join([c.rstrip() for c in code_lines]) return node, code_block @@ -247,9 +247,15 @@ def _parse_lambda(lam): # potential multi-line definition. mod = inspect.getmodule(lam) + f = inspect.getsourcefile(lam) def_line = lam.__code__.co_firstlineno - source = inspect.getsource(mod) - lines = source.split('\n') + + # This method is more robust that just calling inspect.getsource(mod), as it + # works in interactive shells, where getsource would fail. This is the + # same procedure followed by inspect for non-modules: + # https://github.com/python/cpython/blob/3.8/Lib/inspect.py#L772 + lines = linecache.getlines(f, mod.__dict__) + source = ''.join(lines) # Narrow down to the last node starting before our definition node. all_nodes = parse(source, preamble_len=0, single_node=False) From fd8eb855c17fe8a76841feb469e3f849b9fc2432 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Sat, 11 Jul 2020 02:59:50 +0000 Subject: [PATCH 0218/2522] clean working first gradient --- tensorflow/core/kernels/map_kernels.cc | 7 +- tensorflow/core/kernels/map_kernels.h | 187 ++---------------- tensorflow/core/kernels/tensor_map.h | 23 --- tensorflow/core/kernels/tensor_map_test.cc | 23 --- tensorflow/core/ops/map_ops.cc | 76 +------ .../python/kernel_tests/list_ops_test.py | 12 +- .../python/kernel_tests/map_ops_test.py | 84 +++----- tensorflow/python/ops/map_ops.py | 30 +-- 8 files changed, 51 insertions(+), 391 deletions(-) diff --git a/tensorflow/core/kernels/map_kernels.cc b/tensorflow/core/kernels/map_kernels.cc index eba2d99a75b..7d45d3942e1 100644 --- a/tensorflow/core/kernels/map_kernels.cc +++ b/tensorflow/core/kernels/map_kernels.cc @@ -29,9 +29,12 @@ REGISTER_KERNEL_BUILDER(Name("TensorMapSize").Device(DEVICE_CPU), REGISTER_KERNEL_BUILDER(Name("TensorMapLookup").Device(DEVICE_CPU), TensorMapLookup); +REGISTER_KERNEL_BUILDER(Name("TensorMapInsert").Device(DEVICE_CPU), + TensorMapInsert); + REGISTER_KERNEL_BUILDER(Name("TensorMapErase").Device(DEVICE_CPU), TensorMapErase); -REGISTER_KERNEL_BUILDER(Name("ZeroOut").Device(DEVICE_CPU), - ZeroOutOp); +REGISTER_KERNEL_BUILDER(Name("TensorMapReplace").Device(DEVICE_CPU), + TensorMapReplace); } \ No newline at end of file diff --git a/tensorflow/core/kernels/map_kernels.h b/tensorflow/core/kernels/map_kernels.h index 78040359026..33282a75e0a 100644 --- a/tensorflow/core/kernels/map_kernels.h +++ b/tensorflow/core/kernels/map_kernels.h @@ -24,9 +24,10 @@ using namespace std; namespace tensorflow { + Status GetInputMap(OpKernelContext* c, int index, const TensorMap** map) { if (!TensorShapeUtils::IsScalar(c->input(index).shape())) { - return errors::InvalidArgument("Input list must be a scalar saw: ", + return errors::InvalidArgument("Input map must be a scalar saw: ", c->input(index).shape().DebugString()); } const TensorMap* m = c->input(index).scalar()().get(); @@ -39,6 +40,7 @@ Status GetInputMap(OpKernelContext* c, int index, const TensorMap** map) { return Status::OK(); } + Status ForwardInputOrCreateNewMap(OpKernelContext* c, int32 input_index, int32 output_index, const TensorMap& input_map, @@ -77,24 +79,22 @@ Status ForwardInputOrCreateNewMap(OpKernelContext* c, int32 input_index, return Status::OK(); } + class EmptyTensorMap : public OpKernel { public: - explicit EmptyTensorMap(OpKernelConstruction* ctx) : OpKernel(ctx) {} + explicit EmptyTensorMap(OpKernelConstruction* c) : OpKernel(c) {} - void Compute(OpKernelContext* ctx) override { - std::cout << "hello EmptyTensorMap map_kernels.h" << std::endl; + void Compute(OpKernelContext* c) override { Tensor* result; AllocatorAttributes attr; attr.set_on_host(true); - OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape{}, &result, attr)); + OP_REQUIRES_OK(c, c->allocate_output(0, TensorShape{}, &result, attr)); TensorMap empty; result->scalar()() = std::move(empty); } - - private: - DataType element_dtype_; }; + class TensorMapSize : public OpKernel { public: explicit TensorMapSize(OpKernelConstruction* c) : OpKernel(c) {} @@ -109,45 +109,6 @@ class TensorMapSize : public OpKernel { } }; -class TensorMapZeros : public OpKernel { - public: - explicit TensorMapZeros(OpKernelConstruction* c) : OpKernel(c) { - //OP_REQUIRES_OK(c, c->GetAttr("element_dtype", &element_dtype_)); - } - ~TensorMapZeros() override {} - - void Compute(OpKernelContext* c) override { - std::cout << "hello TensorMapInsert kernel" << std::endl; - const Tensor& temp_key = c->input(1); - const TensorKey key = TensorKey(temp_key); - std::cout << "got key" << std::endl; - const Tensor& value = c->input(2); - std::cout << "got value" << std::endl; - - const TensorMap* m = nullptr; - OP_REQUIRES_OK(c, GetInputMap(c, 0, &m)); - std::cout << "got map" << std::endl; - //TensorMap output_map; - //OP_REQUIRES_OK(c, ForwardInputOrCreateNewMap(c, 0, 0, *m, &output_map)); - //std::cout << "create output" << std::endl; - //output_map = m->Zeros(); - //c->set_output(0, &&output_map); - //std::cout << "inserted" << std::endl; - - Tensor* result; - AllocatorAttributes attr; - attr.set_on_host(true); - OP_REQUIRES_OK(c, c->allocate_output(0, TensorShape{}, &result, attr)); - TensorMap output_map = m->Zeros(); - result->scalar()() = std::move(output_map); - } - - private: - DataType element_dtype_; -}; - -REGISTER_KERNEL_BUILDER(Name("TensorMapZeros").Device(DEVICE_CPU), - TensorMapZeros); class TensorMapInsert : public OpKernel { public: @@ -157,53 +118,20 @@ class TensorMapInsert : public OpKernel { ~TensorMapInsert() override {} void Compute(OpKernelContext* c) override { - std::cout << "hello TensorMapInsert kernel" << std::endl; - const Tensor& temp_key = c->input(1); - const TensorKey key = TensorKey(temp_key); - std::cout << "got key" << std::endl; + const TensorKey& key = c->input(1); const Tensor& value = c->input(2); - std::cout << "got value" << std::endl; - /*OP_REQUIRES(c, element_dtype_ == value.dtype(), - errors::InvalidArgument("Invalid data types; list elements ", - DataTypeString(element_dtype_), - " but tried to append ", - DataTypeString(value.dtype())));*/ - const TensorMap* m = nullptr; OP_REQUIRES_OK(c, GetInputMap(c, 0, &m)); - std::cout << "got map" << std::endl; - /*OP_REQUIRES(c, m->element_shape.IsCompatibleWith(input.shape()), - errors::InvalidArgument( - "Tried to append a map with incompatible shape to a " - "list. Op element shape: ", - input.shape().DebugString(), - " list shape: ", m->element_shape.DebugString()));*/ - /*OP_REQUIRES(c, element_dtype_ == m->element_dtype, - errors::InvalidArgument("Invalid data types; op elements ", - DataTypeString(element_dtype_), - " but list elements ", - DataTypeString(l->element_dtype))); - if (l->max_num_elements != -1) { - OP_REQUIRES( - c, l->tensors().size() < l->max_num_elements, - errors::InvalidArgument("Tried to push item into a full list", - " list size: ", l->tensors().size(), - " max_num_elements: ", l->max_num_elements)); - }*/ TensorMap* output_map = nullptr; OP_REQUIRES_OK(c, ForwardInputOrCreateNewMap(c, 0, 0, *m, &output_map)); - std::cout << "create output" << std::endl; output_map->insert(key, value); - std::cout << "inserted" << std::endl; } private: DataType element_dtype_; }; -REGISTER_KERNEL_BUILDER(Name("TensorMapInsert").Device(DEVICE_CPU), - TensorMapInsert); class TensorMapLookup : public OpKernel { public: @@ -213,21 +141,17 @@ class TensorMapLookup : public OpKernel { ~TensorMapLookup() override {} void Compute(OpKernelContext* c) override { - std::cout << "hello TensorMapInsert kernel" << std::endl; - const Tensor& temp_key = c->input(1); - const TensorKey key = TensorKey(temp_key); - std::cout << "got key" << std::endl; + const TensorKey& key = c->input(1); const TensorMap* m = nullptr; OP_REQUIRES_OK(c, GetInputMap(c, 0, &m)); - std::cout << "got map" << std::endl; c->set_output(0, m->tensors().find(key)->second); - std::cout << "finished" << std::endl; } private: DataType element_dtype_; }; + class TensorMapErase : public OpKernel { public: explicit TensorMapErase(OpKernelConstruction* c) : OpKernel(c) { @@ -240,11 +164,6 @@ class TensorMapErase : public OpKernel { OP_REQUIRES_OK(c, GetInputMap(c, 0, &m)); const Tensor& temp_key = c->input(1); const TensorKey key = TensorKey(temp_key); - /*OP_REQUIRES(c, element_dtype_ == l->element_dtype, - errors::InvalidArgument("Invalid data types; op elements ", - DataTypeString(element_dtype_), - " but list elements ", - DataTypeString(l->element_dtype)));*/ OP_REQUIRES(c, !m->tensors().empty(), errors::InvalidArgument("Trying to erase from an empty map.")); @@ -254,27 +173,6 @@ class TensorMapErase : public OpKernel { const Tensor& t = m->tensors().find(key)->second; c->set_output(1, t); - /*if (t.dtype() != DT_INVALID) { - c->set_output(1, t); - } else { - PartialTensorShape partial_element_shape; - OP_REQUIRES_OK( - c, GetElementShapeFromInput(c, *l, 1, &partial_element_shape)); - TensorShape element_shape; - OP_REQUIRES( - c, partial_element_shape.AsTensorShape(&element_shape), - errors::InvalidArgument("Trying to read an uninitialized tensor but ", - "element_shape is not fully defined.", - partial_element_shape.DebugString())); - Tensor* result; - AllocatorAttributes attr; - if (element_dtype_ == DT_VARIANT) { - attr.set_on_host(true); - } - OP_REQUIRES_OK(c, c->allocate_output(1, element_shape, &result, attr)); - functor::SetZeroFunctor()(c->eigen_device(), - result->flat()); - }*/ TensorMap* output_map = nullptr; OP_REQUIRES_OK(c, ForwardInputOrCreateNewMap(c, 0, 0, *m, &output_map)); @@ -293,81 +191,20 @@ class TensorMapReplace : public OpKernel { ~TensorMapReplace() override {} void Compute(OpKernelContext* c) override { - std::cout << "hello TensorMapReplace kernel" << std::endl; - const Tensor& temp_key = c->input(1); - const TensorKey key = TensorKey(temp_key); - std::cout << "got key" << std::endl; + const TensorKey& key = c->input(1); const Tensor& value = c->input(2); - std::cout << "got value" << std::endl; - /*OP_REQUIRES(c, element_dtype_ == value.dtype(), - errors::InvalidArgument("Invalid data types; list elements ", - DataTypeString(element_dtype_), - " but tried to append ", - DataTypeString(value.dtype())));*/ - const TensorMap* m = nullptr; OP_REQUIRES_OK(c, GetInputMap(c, 0, &m)); - std::cout << "got map" << std::endl; - /*OP_REQUIRES(c, m->element_shape.IsCompatibleWith(input.shape()), - errors::InvalidArgument( - "Tried to append a map with incompatible shape to a " - "list. Op element shape: ", - input.shape().DebugString(), - " list shape: ", m->element_shape.DebugString()));*/ - /*OP_REQUIRES(c, element_dtype_ == m->element_dtype, - errors::InvalidArgument("Invalid data types; op elements ", - DataTypeString(element_dtype_), - " but list elements ", - DataTypeString(l->element_dtype))); - if (l->max_num_elements != -1) { - OP_REQUIRES( - c, l->tensors().size() < l->max_num_elements, - errors::InvalidArgument("Tried to push item into a full list", - " list size: ", l->tensors().size(), - " max_num_elements: ", l->max_num_elements)); - }*/ TensorMap* output_map = nullptr; OP_REQUIRES_OK(c, ForwardInputOrCreateNewMap(c, 0, 0, *m, &output_map)); - std::cout << "create output" << std::endl; output_map->replace(key,value); - std::cout << "inserted" << std::endl; } private: DataType element_dtype_; }; -REGISTER_KERNEL_BUILDER(Name("TensorMapReplace").Device(DEVICE_CPU), - TensorMapReplace); - -class ZeroOutOp : public OpKernel { - public: - explicit ZeroOutOp(OpKernelConstruction* c) : OpKernel(c) {} - - void Compute(OpKernelContext* c) override { - cout << "Hello World - Op" << endl; - // Grab the input tensor - const Tensor& input_tensor = c->input(0); - auto input = input_tensor.flat(); - - // Create an output tensor - Tensor* output_tensor = NULL; - OP_REQUIRES_OK(c, c->allocate_output(0, input_tensor.shape(), - &output_tensor)); - auto output_flat = output_tensor->flat(); - - // Set all but the first element of the output tensor to 0 - const int N = input.size(); - for (int i=1; i 0) output_flat(0) = input(0); - } -}; - } // namespace tensorflow #endif // TENSORFLOW_CORE_KERNELS_MAP_KERNELS_H_ diff --git a/tensorflow/core/kernels/tensor_map.h b/tensorflow/core/kernels/tensor_map.h index 7ab792b4813..a5d44550c98 100644 --- a/tensorflow/core/kernels/tensor_map.h +++ b/tensorflow/core/kernels/tensor_map.h @@ -143,29 +143,6 @@ class TensorMap { return out; } - TensorMap Zeros() const { - TensorMap out; - out.element_shape = element_shape; - out.element_dtype = element_dtype; - out.max_num_elements = max_num_elements; - // This performs a copy of the absl::hashmap. - absl::flat_hash_map::iterator it = tensors_->values_.begin(); - while(it != tensors_->values_.end()) { - out.tensors_->values_.try_emplace(it->first, Tensor(0)); - it++; - } - return out; - } - std::vector keys() { - std::vector keys(tensors_->values_.size()); - absl::flat_hash_map::iterator it = tensors_->values_.begin(); - while(it != tensors_->values_.end()) { - keys.push_back((Tensor)it->first); - it++; - } - return keys; - } - // Insert key and value if the key does not already exist. // Returns true if the insertion happens. bool insert(const TensorKey& key, const Tensor& value) { diff --git a/tensorflow/core/kernels/tensor_map_test.cc b/tensorflow/core/kernels/tensor_map_test.cc index 5774a605bbf..294aa07c963 100644 --- a/tensorflow/core/kernels/tensor_map_test.cc +++ b/tensorflow/core/kernels/tensor_map_test.cc @@ -137,29 +137,6 @@ TEST(TensorMapTest, EncodeDecode) { test::ExpectTensorEqual(tm.find(k)->second, tmc.find(k)->second); } -TEST(TensorMapTest, Keys) { - TensorMap tm; - TensorKey k = Tensor(11); - TensorKey k2 = Tensor(12); - Tensor v = Tensor(22); - tm.insert(k,v); - tm.insert(k2,v); - std::vector keys = tm.keys(); - EXPECT_EQ(1,1); - Tensor t = Tensor(11); - //std::cout << "keys: " << keys[0] << std::endl; - //test::ExpectTensorEqual(keys[0], t); - //test::ExpectTensorEqual(keys[1], k2); -} - -TEST(TensorMapTest, Zeros) { - TensorMap tm; - TensorKey k = Tensor(11); - Tensor v = Tensor(22); - tm.insert(k,v); - TensorMap z = tm.Zeros(); - test::ExpectTensorEqual(z.find(k)->second,Tensor(0)); -} } // namespace } // namespace tensorflow diff --git a/tensorflow/core/ops/map_ops.cc b/tensorflow/core/ops/map_ops.cc index d3711755d9e..8949e3f1923 100644 --- a/tensorflow/core/ops/map_ops.cc +++ b/tensorflow/core/ops/map_ops.cc @@ -39,15 +39,6 @@ REGISTER_OP("TensorMapSize") .Output("size: int32") .SetShapeFn(shape_inference::ScalarShape); -REGISTER_OP("TensorMapZeros") - .Input("input_handle: variant") - .Output("output_handle: variant") - //.Attr("element_dtype: type") - .SetShapeFn([](shape_inference::InferenceContext* c) { - c->set_output(0, c->Scalar()); - return Status::OK(); - }); - REGISTER_OP("TensorMapInsert") .Input("input_handle: variant") .Input("key: element_dtype") @@ -56,32 +47,6 @@ REGISTER_OP("TensorMapInsert") .Attr("element_dtype: type") .SetShapeFn([](shape_inference::InferenceContext* c) { c->set_output(0, c->Scalar()); - /*DataType element_dtype; - TF_RETURN_IF_ERROR(c->GetAttr("element_dtype", &element_dtype)); - shape_inference::ShapeHandle element_shape = c->UnknownShape();*/ - - /*auto* handle_data = c->input_handle_shapes_and_types(0); - if (handle_data != nullptr && handle_data->size() > 1) { - return errors::InvalidArgument( - "Trying to push to list with wrong variant data."); - } - if (IsValidTensorMapHandleData(handle_data)) { - const shape_inference::ShapeAndType& map_shape_type = (*handle_data)[0]; - if (list_shape_type.dtype != element_dtype) { - return errors::InvalidArgument( - "Trying to push to list with wrong element dtype. List has type ", - DataTypeString(list_shape_type.dtype), - " but trying to push element with type ", - DataTypeString(element_dtype)); - } - shape_inference::ShapeHandle ignored; - TF_RETURN_IF_ERROR( - c->Merge(element_shape, map_shape_type.shape, &ignored)); - element_shape = map_shape_type.shape; - } - c->set_output_handle_shapes_and_types( - 0, std::vector{ - {element_shape, element_dtype}});*/ return Status::OK(); }); @@ -92,32 +57,6 @@ REGISTER_OP("TensorMapLookup") .Attr("element_dtype: type") .SetShapeFn([](shape_inference::InferenceContext* c) { c->set_output(0, c->Scalar()); - /*DataType element_dtype; - TF_RETURN_IF_ERROR(c->GetAttr("element_dtype", &element_dtype)); - shape_inference::ShapeHandle element_shape = c->UnknownShape();*/ - - /*auto* handle_data = c->input_handle_shapes_and_types(0); - if (handle_data != nullptr && handle_data->size() > 1) { - return errors::InvalidArgument( - "Trying to push to list with wrong variant data."); - } - if (IsValidTensorMapHandleData(handle_data)) { - const shape_inference::ShapeAndType& map_shape_type = (*handle_data)[0]; - if (list_shape_type.dtype != element_dtype) { - return errors::InvalidArgument( - "Trying to push to list with wrong element dtype. List has type ", - DataTypeString(list_shape_type.dtype), - " but trying to push element with type ", - DataTypeString(element_dtype)); - } - shape_inference::ShapeHandle ignored; - TF_RETURN_IF_ERROR( - c->Merge(element_shape, map_shape_type.shape, &ignored)); - element_shape = map_shape_type.shape; - } - c->set_output_handle_shapes_and_types( - 0, std::vector{ - {element_shape, element_dtype}});*/ return Status::OK(); }); @@ -128,9 +67,9 @@ REGISTER_OP("TensorMapErase") .Output("tensor: element_dtype") .Attr("element_dtype: type") .SetShapeFn([](shape_inference::InferenceContext* c) { - /*DataType element_dtype; + DataType element_dtype; TF_RETURN_IF_ERROR(c->GetAttr("element_dtype", &element_dtype)); - shape_inference::ShapeHandle tensor_shape = c->UnknownShape(); + /*shape_inference::ShapeHandle tensor_shape = c->UnknownShape(); auto* handle_data = c->input_handle_shapes_and_types(0); if (handle_data != nullptr && handle_data->size() > 1) { return errors::InvalidArgument( @@ -195,16 +134,5 @@ REGISTER_OP("TensorMapReplace") return Status::OK(); }); - - -REGISTER_OP("ZeroOut") - .Input("to_zero: int32") - .Output("zeroed: int32") - .SetShapeFn([](shape_inference::InferenceContext* c) { - //c->set_output(0, c->Scalar()); - c->set_output(0, c->input(0)); - return Status::OK(); - }); - } // namespace } // namespace tensorflow diff --git a/tensorflow/python/kernel_tests/list_ops_test.py b/tensorflow/python/kernel_tests/list_ops_test.py index 4d7f2beb00b..7ffc4d3889d 100644 --- a/tensorflow/python/kernel_tests/list_ops_test.py +++ b/tensorflow/python/kernel_tests/list_ops_test.py @@ -48,7 +48,7 @@ from tensorflow.python.platform import test @test_util.run_all_in_graph_and_eager_modes class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): - ''' + def _testPushPop(self, max_num_elements): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, @@ -130,7 +130,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): _, e = gen_list_ops.tensor_list_pop_back( l, element_dtype=dtypes.float32, element_shape=[1, 3]) self.evaluate(e) - ''' + def testPushGetGrad(self): with backprop.GradientTape() as tape: l = list_ops.empty_tensor_list( @@ -150,7 +150,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): dt0, dt1 = tape.gradient(t1, [c0, c1]) self.assertAllEqual(self.evaluate(dt1), [1.0, 1.0]) self.assertEqual(self.evaluate(dt0), 0.0) - ''' + def _testStack(self, max_num_elements): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, @@ -888,7 +888,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): l_worker = array_ops.identity(l_ps) l_worker = list_ops.tensor_list_push_back(l_worker, 3.0) self.evaluate(l_worker) - ''' + def testPushPopGradients(self): with backprop.GradientTape() as tape: l = list_ops.empty_tensor_list( @@ -925,7 +925,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): grad_c, grad_c2 = tape.gradient(y, [c, c2]) self.assertAllEqual(self.evaluate(grad_c), [0.0, 4.0]) self.assertAllEqual(self.evaluate(grad_c2), 6.0) - ''' + @test_util.run_deprecated_v1 def testSetOutOfBounds(self): c = constant_op.constant([1.0, 2.0]) @@ -1664,7 +1664,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): upper, constant_op.constant([0, 1, 2]), dtype=dtypes.string) self.assertAllEqual(f(), [b"A", b"B", b"C"]) - ''' + def testPopBackGrad(self): # https://github.com/tensorflow/tensorflow/issues/37230 diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index 9384571dc2b..dc4e5b97fc3 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -18,7 +18,6 @@ from __future__ import division from __future__ import print_function #import numpy as np - from tensorflow.python.platform import test from absl.testing import parameterized from tensorflow.python.framework import test_util @@ -30,34 +29,26 @@ from tensorflow.python.eager import def_function from tensorflow.python.eager import function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes - -#try: -# from tensorflow_zero_out.python.ops.zero_out_ops import zero_out -#except ImportError: -# from zero_out_ops import zero_out from tensorflow.python.ops import map_ops @test_util.run_all_in_graph_and_eager_modes class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): - '''def testEmptyTensorMap(self): + + def testEmptyTensorMap(self): m = map_ops.empty_tensor_map() - print("test EmptyTensorMap") def testTensorMapSize(self): m = map_ops.empty_tensor_map() s = map_ops.tensor_map_size(m) - print("size: ", s) self.assertAllClose(s, 0) def testTensorMapInsert(self): - #with self.test_session(): - m = map_ops.empty_tensor_map() - k = constant_op.constant(1.0) - v = constant_op.constant(2.0) - m = map_ops.tensor_map_insert(m, k, v) - s = map_ops.tensor_map_size(m) - self.assertAllClose(s, 1) - print("test TensorMapInsert") + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + v = constant_op.constant(2.0) + m = map_ops.tensor_map_insert(m, k, v) + s = map_ops.tensor_map_size(m) + self.assertAllClose(s, 1) def testTensorMapLookup(self): m = map_ops.empty_tensor_map() @@ -65,35 +56,31 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): v = constant_op.constant(2.0) m = map_ops.tensor_map_insert(m, k, v) l = map_ops.tensor_map_lookup(m, k) - print("lookup: ", l) - self.assertAllClose(l, v)''' + self.assertAllClose(l, v) def testTensorMapReplace(self): - #with self.test_session(): - m = map_ops.empty_tensor_map() - k = constant_op.constant(1.0) - v = constant_op.constant(2.0) - m = map_ops.tensor_map_insert(m, k, v) - s = map_ops.tensor_map_size(m) - self.assertAllClose(s, 1) - - v2 = constant_op.constant(3.0) - m = map_ops.tensor_map_replace(m, k, v2) - l = map_ops.tensor_map_lookup(m, k) - self.assertAllClose(l, v2) - print("test TensorMapReplace") - - def testTensorMapErase(self): - print("python erase") m = map_ops.empty_tensor_map() k = constant_op.constant(1.0) v = constant_op.constant(2.0) m = map_ops.tensor_map_insert(m, k, v) s = map_ops.tensor_map_size(m) self.assertAllClose(s, 1) + + v2 = constant_op.constant(3.0) + m = map_ops.tensor_map_replace(m, k, v2) + l = map_ops.tensor_map_lookup(m, k) + self.assertAllClose(l, v2) + + def testTensorMapErase(self): + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + v = constant_op.constant(2.0) + m = map_ops.tensor_map_insert(m, k, v) + s = map_ops.tensor_map_size(m) + self.assertAllClose(s, 1) + m, e = map_ops.tensor_map_erase(m, k) s = map_ops.tensor_map_size(m) - print("erase: ", e) self.assertAllClose(s, 0) self.assertAllClose(e, v) @@ -106,32 +93,9 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): m = map_ops.tensor_map_insert(m, k, v) l = map_ops.tensor_map_lookup(m, k) l *= 5 - g= tape.gradient(l,v) - print("gradient",g) + g = tape.gradient(l,v) self.assertAllClose(g, 5.0) - - - ''' - @parameterized.named_parameters(("NoMaxNumElements", None), - ("WithMaxNumElements", 2)) - @test_util.run_deprecated_v1 - def testEraseFromEmptyTensorMapFails(self, max_num_elements): - print("hello world testErase") - m = map_ops.empty_tensor_map() - with self.assertRaisesRegexp(errors.InvalidArgumentError, - "Trying to erase from an empty map"): - m = map_ops.tensor_map_erase(l, element_dtype=dtypes.float32) - self.evaluate(l) - ''' - - '''def testZeroOut(self): - print("hello world testZeroOut") - with self.test_session(): - self.assertAllClose( - map_ops.zero_out([[1, 2], [3, 4]]), np.array([[1, 0], [0, 0]]))''' - if __name__ == '__main__': - print("hihihi") test.main() \ No newline at end of file diff --git a/tensorflow/python/ops/map_ops.py b/tensorflow/python/ops/map_ops.py index 44894a8b6d9..29f0751d91f 100644 --- a/tensorflow/python/ops/map_ops.py +++ b/tensorflow/python/ops/map_ops.py @@ -35,15 +35,12 @@ from tensorflow.python.framework import constant_op ops.NotDifferentiable("EmptyTensorMap") def empty_tensor_map(): - print("hello gen_map_ops.empty_tensor_map") return gen_map_ops.empty_tensor_map() def tensor_map_size(input_handle): - print("hello gen_map_ops.tensor_map_size") return gen_map_ops.tensor_map_size(input_handle) def tensor_map_insert(input_handle, key, value): - print("hello gen_map_ops.tensor_map_insert") return gen_map_ops.tensor_map_insert(input_handle, key, value) def tensor_map_lookup(input_handle, key): @@ -55,13 +52,13 @@ def tensor_map_erase(input_handle, key): def tensor_map_replace(input_handle, key, value): return gen_map_ops.tensor_map_replace(input_handle, key, value) - @ops.RegisterGradient("TensorMapLookup") def LookupGrad(op, dval): # map grad should be a map that is 0 everywhere except 1 @key k m, k = op.inputs #m = gen_map_ops.tensor_map_zeros(m) - map_grad = tensor_map_replace(m, k, dval) + map_grad = empty_tensor_map() + map_grad = tensor_map_insert(map_grad, k, dval) key = op.inputs[1] key_grad = None return map_grad, key_grad @@ -72,27 +69,4 @@ def InsertGrad(op, dmap): map_grad, _ = gen_map_ops.tensor_map_erase(dmap, key) key_grad = None value_grad = tensor_map_lookup(dmap, key) - #value_grad = constant_op.constant(1.0) return map_grad, key_grad, value_grad - -def zero_out(to_zero): - return gen_map_ops.zero_out(to_zero) - -@ops.RegisterGradient("ZeroOut") -def _zero_out_grad(op, grad): - """The gradients for `zero_out`. - - Args: - op: The `zero_out` `Operation` that we are differentiating, which we can use - to find the inputs and outputs of the original op. - grad: Gradient with respect to the output of the `zero_out` op. - - Returns: - Gradients with respect to the input of `zero_out`. - """ - to_zero = op.inputs[0] - shape = array_ops.shape(to_zero) - index = array_ops.zeros_like(shape) - first_grad = array_ops.reshape(grad, [-1])[0] - to_zero_grad = sparse_ops.sparse_to_dense([index], shape, first_grad, 0) - return [to_zero_grad] # List of one Tensor, since we have one input From b5973195532a786343de6a4278322056574b207c Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Fri, 10 Jul 2020 20:21:51 -0700 Subject: [PATCH 0219/2522] [XLA:SPMD] Use subgroup AllToAll for resharding Reshard from tile [2,2,1] to [1,2,2] can be done by a subgroup all-to-all between dimensions 0 and 2. PiperOrigin-RevId: 320720720 Change-Id: I1b63ba731b830610596c77697c5577fa9e2e0f79 --- .../xla/service/spmd/spmd_partitioner.cc | 123 ++++++++++++------ .../xla/service/spmd/spmd_partitioner.h | 3 +- .../xla/service/spmd/spmd_partitioner_test.cc | 26 ++++ 3 files changed, 108 insertions(+), 44 deletions(-) diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc index 7e136be54e6..1b484e018ba 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc @@ -176,16 +176,45 @@ std::vector CreateReplicaGroups(int64 num_replicas) { return groups; } -bool CanReshardWithAllToAll(const HloSharding& source, - const HloSharding& target) { - return UniqueTiledDim(source) && UniqueTiledDim(target) && - UniqueTiledDim(source) != UniqueTiledDim(target); +absl::optional> GetReshardAllToAllSourceTargetDims( + const HloSharding& source, const HloSharding& target) { + if (source.IsTileMaximal() || target.IsTileMaximal() || + source.tile_assignment().num_dimensions() != + target.tile_assignment().num_dimensions()) { + return absl::nullopt; + } + int64 source_dim = -1; + int64 target_dim = -1; + for (int64 i = 0; i < source.tile_assignment().num_dimensions(); ++i) { + if (source.tile_assignment().dim(i) > 1 && + target.tile_assignment().dim(i) == 1) { + if (source_dim != -1) { + return absl::nullopt; + } + source_dim = i; + } else if (source.tile_assignment().dim(i) == 1 && + target.tile_assignment().dim(i) > 1) { + if (target_dim != -1) { + return absl::nullopt; + } + target_dim = i; + } else if (source.tile_assignment().dim(i) != + target.tile_assignment().dim(i)) { + return absl::nullopt; + } + } + if (source_dim == -1 || target_dim == -1 || source_dim == target_dim) { + return absl::nullopt; + } + return std::pair(source_dim, target_dim); } bool CanReshardWithCollectivePermute(const HloSharding& source, const HloSharding& target) { - return UniqueTiledDim(source) && UniqueTiledDim(target) && - UniqueTiledDim(source) == UniqueTiledDim(target) && source != target; + return !source.IsTileMaximal() && !target.IsTileMaximal() && + source.tile_assignment().dimensions() == + target.tile_assignment().dimensions() && + source.tile_assignment() != target.tile_assignment(); } // Clears all sharding attributes from instructions in the module. This must be @@ -278,8 +307,10 @@ PartitionedHlo PartitionedHlo::ReshardNoCache(const HloSharding& target) { return ReshardWithCollectivePermute(target); } - if (CanReshardWithAllToAll(sharding(), target)) { - return ReshardWithAllToAll(target); + if (auto src_tgt_dims = + GetReshardAllToAllSourceTargetDims(sharding(), target)) { + return ReshardWithAllToAll(target, src_tgt_dims->first, + src_tgt_dims->second); } // If not replicated yet, first replicate and then reshard to use one of the @@ -745,45 +776,53 @@ PartitionedHlo PartitionedHlo::Broadcast() const { return PartitionedHlo(result, base_shape_, state_); } -PartitionedHlo PartitionedHlo::ReshardWithAllToAll( - const HloSharding& target) const { - int64 partition_count = sharding().tile_assignment().num_elements(); - absl::optional input_partition_dim = UniqueTiledDim(sharding()); - absl::optional output_partition_dim = UniqueTiledDim(target); - CHECK(input_partition_dim.has_value()); - CHECK(output_partition_dim.has_value()); +PartitionedHlo PartitionedHlo::ReshardWithAllToAll(const HloSharding& target, + int64 source_dim, + int64 target_dim) const { + const int64 group_size = sharding().tile_assignment().dim(source_dim); // If the device order is different in the target, fix the order with // ReshardWithCollectivePermute. - auto input_tile_fixed_device_order = target.tile_assignment(); - input_tile_fixed_device_order.Reshape( - sharding().tile_assignment().dimensions()); + std::vector xpose_dims(target.tile_assignment().num_dimensions()); + std::iota(xpose_dims.begin(), xpose_dims.end(), 0); + xpose_dims[source_dim] = target_dim; + xpose_dims[target_dim] = source_dim; auto input_sharding_fixed_device_order = - HloSharding::Tile(input_tile_fixed_device_order); + hlo_sharding_util::TransposeSharding(target, xpose_dims); if (input_sharding_fixed_device_order != sharding()) { auto fixed_order = ReshardWithCollectivePermute(input_sharding_fixed_device_order); - return fixed_order.ReshardWithAllToAll(target); + return fixed_order.ReshardWithAllToAll(target, source_dim, target_dim); } auto padded_hlo = PadBaseShapeBeforeUnevenTiledSharding(hlo_, target, state_.b); // The order of ids in the group must follow the target sharding. - std::vector groups(1); - for (int64 device : target.tile_assignment()) { - groups[0].add_replica_ids(device); - } + std::vector groups(target.tile_assignment().num_elements() / + group_size); + target.tile_assignment().Each( + [&](absl::Span indices, int64 device) { + int64 group_id = 0; + for (int64 dim = 0; dim < indices.size(); ++dim) { + if (dim == target_dim) { + continue; + } + group_id *= target.tile_assignment().dim(dim); + group_id += indices[dim]; + } + groups[group_id].add_replica_ids(device); + }); HloInstruction* result = nullptr; - // Split along the split dimension (output_partition_dim) of the all-to-all + // Split along the split dimension (target_dim) of the all-to-all // output. std::vector dimensions; for (int64 i = 0; i < base_shape_.rank(); ++i) { - if (i == *output_partition_dim) { - dimensions.push_back(partition_count); - dimensions.push_back(padded_hlo->shape().dimensions(i) / partition_count); + if (i == target_dim) { + dimensions.push_back(group_size); + dimensions.push_back(padded_hlo->shape().dimensions(i) / group_size); } else { dimensions.push_back(padded_hlo->shape().dimensions(i)); } @@ -794,21 +833,19 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll( // After the reshape, it is guaranteed to have at least 3 dimensions. auto all_to_all = state_.collective_ops_creator.create_cross_partition_all_to_all( - state_.b, {reshape}, groups, (*state_.next_channel_id)++, - output_partition_dim); + state_.b, {reshape}, groups, (*state_.next_channel_id)++, target_dim); // Reorder the split dimension of the reshape to be located in front of the // input partition dimension, so the two dimensions can be combined. - int64 new_input_partition_dim = (*output_partition_dim < *input_partition_dim) - ? *input_partition_dim + 1 - : *input_partition_dim; + int64 new_source_dim = + (target_dim < source_dim) ? source_dim + 1 : source_dim; std::vector permutation; for (int64 i = 0; i < all_to_all->shape().rank(); ++i) { - if (i == *output_partition_dim) { + if (i == target_dim) { continue; } - if (i == new_input_partition_dim) { - permutation.push_back(*output_partition_dim); + if (i == new_source_dim) { + permutation.push_back(target_dim); } permutation.push_back(i); } @@ -819,8 +856,7 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll( // Combine the split dimension and the input partition dimension. auto new_shape = ShapeInference::InferAllToAllShape( - padded_hlo->shape(), *output_partition_dim, - *input_partition_dim, partition_count) + padded_hlo->shape(), target_dim, source_dim, group_size) .ValueOrDie(); result = state_.b->AddInstruction( HloInstruction::CreateReshape(new_shape, transpose)); @@ -837,7 +873,8 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll( PartitionedHlo PartitionedHlo::ReshardWithCollectivePermute( const HloSharding& target) const { - CHECK(CanReshardWithCollectivePermute(sharding(), target)); + CHECK(CanReshardWithCollectivePermute(sharding(), target)) + << sharding().ToString() << " to " << target.ToString(); std::vector> src_dst_pairs; sharding().tile_assignment().Each( [&](absl::Span indices, int64 src_device) { @@ -3653,8 +3690,8 @@ Status SpmdPartitioningVisitor::HandleDotHelper( output_batch_partitions == num_partitions_ && lhs_sharding_transposed_to_match_output == hlo->sharding()) { if (!may_reshard_with_allreduce && - !CanReshardWithAllToAll(rhs.sharding(), - *lhs_sharding_transposed_to_match_rhs)) { + !GetReshardAllToAllSourceTargetDims( + rhs.sharding(), *lhs_sharding_transposed_to_match_rhs)) { return false; } auto resharded_rhs = rhs.Reshard(*lhs_sharding_transposed_to_match_rhs); @@ -3668,8 +3705,8 @@ Status SpmdPartitioningVisitor::HandleDotHelper( output_batch_partitions == num_partitions_ && rhs_sharding_transposed_to_match_output == hlo->sharding()) { if (!may_reshard_with_allreduce && - !CanReshardWithAllToAll(lhs.sharding(), - *rhs_sharding_transposed_to_match_lhs)) { + !GetReshardAllToAllSourceTargetDims( + lhs.sharding(), *rhs_sharding_transposed_to_match_lhs)) { return false; } auto resharded_lhs = lhs.Reshard(*rhs_sharding_transposed_to_match_lhs); diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h index 52e4c9021d8..40881b4b91c 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h @@ -284,7 +284,8 @@ class PartitionedHlo { // Helper function to reshard the tensor using AllToAll (instead of the // default of Replicate followed by Slice). - PartitionedHlo ReshardWithAllToAll(const HloSharding& target) const; + PartitionedHlo ReshardWithAllToAll(const HloSharding& target, + int64 source_dim, int64 target_dim) const; // Helper function to reshard the tensor using CollectivePermute. PartitionedHlo ReshardWithCollectivePermute(const HloSharding& target) const; diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc index 1f0b1d06c1f..9f3708fc12c 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc @@ -3766,6 +3766,32 @@ ENTRY entry { op::Parameter(0)))); } +TEST_F(SpmdPartitioningTest, SubgroupAllToAllReshard) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %param0 = f32[8,8,8,8] parameter(0), + sharding={devices=[2,2,1,2]0,1,2,3,4,5,6,7} + ROOT %copy = f32[8,8,8,8] copy(%param0), + sharding={devices=[1,2,2,2]0,1,4,5,2,3,6,7} +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/2)); + VLOG(1) << module->ToString(); + + auto root = module->entry_computation()->root_instruction(); + auto reshape = + AllOf(op::Shape("f32[4,4,2,4,4]"), op::Reshape(op::Parameter(0))); + auto all_to_all = AllOf(op::Shape("f32[4,4,2,4,4]"), op::AllToAll(reshape)); + auto xpose = AllOf(op::Shape("f32[2,4,4,4,4]"), op::Transpose(all_to_all)); + EXPECT_THAT(root, + op::Copy(AllOf(op::Reshape(xpose), op::Shape("f32[8,4,4,4]")))); + EXPECT_EQ(root->operand(0)->operand(0)->operand(0)->replica_groups().size(), + 4); +} + } // namespace } // namespace spmd } // namespace xla From 5e28f0c5c83afc3ede7758b2b541777e719cb146 Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Thu, 9 Jul 2020 23:05:09 -0700 Subject: [PATCH 0220/2522] Enable half for resource scatter Call each type Enable sparse adam test in eager mode Use self.evaluate() Update tests for adam Enable half for scatter Use assertAllCloseAccordingToType to pass float16 tests imake linter happy Run pylint Relax tolerance for half --- tensorflow/core/kernels/resource_variable_ops.cc | 4 ++-- tensorflow/core/kernels/scatter_functor.cc | 2 +- tensorflow/core/kernels/scatter_functor_gpu.cu.cc | 4 +--- tensorflow/core/kernels/scatter_op.cc | 4 ++-- tensorflow/python/kernel_tests/scatter_ops_test.py | 5 +++-- 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc index 510e95ca606..950a80b6b2d 100644 --- a/tensorflow/core/kernels/resource_variable_ops.cc +++ b/tensorflow/core/kernels/resource_variable_ops.cc @@ -998,8 +998,8 @@ REGISTER_SCATTER_KERNEL(Variant, CPU, "ResourceScatterUpdate", #define REGISTER_SCATTER_UPDATE_GPU(type) REGISTER_SCATTER_UPDATE(type, GPU); -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ARITHMETIC_GPU); -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_MINMAX_GPU); +TF_CALL_GPU_NUMBER_TYPES(REGISTER_SCATTER_ARITHMETIC_GPU); +TF_CALL_GPU_NUMBER_TYPES(REGISTER_SCATTER_MINMAX_GPU); REGISTER_KERNEL_BUILDER(Name("ResourceScatterUpdate") .Device(DEVICE_GPU) diff --git a/tensorflow/core/kernels/scatter_functor.cc b/tensorflow/core/kernels/scatter_functor.cc index f17d8759d20..5d4ff1f5f5a 100644 --- a/tensorflow/core/kernels/scatter_functor.cc +++ b/tensorflow/core/kernels/scatter_functor.cc @@ -55,7 +55,7 @@ namespace functor { DECLARE_GPU_SPECS_INDEX(T, int32); \ DECLARE_GPU_SPECS_INDEX(T, int64); -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DECLARE_GPU_SPECS); +TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); #undef DECLARE_GPU_SPECS #undef DECLARE_GPU_SPECS_INDEX diff --git a/tensorflow/core/kernels/scatter_functor_gpu.cu.cc b/tensorflow/core/kernels/scatter_functor_gpu.cu.cc index 7bfd0051de9..7083502da9e 100644 --- a/tensorflow/core/kernels/scatter_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/scatter_functor_gpu.cu.cc @@ -40,13 +40,11 @@ typedef Eigen::GpuDevice GPUDevice; DEFINE_GPU_SPECS_INDEX(T, int32); \ DEFINE_GPU_SPECS_INDEX(T, int64); +DEFINE_GPU_SPECS(Eigen::half); DEFINE_GPU_SPECS(float); DEFINE_GPU_SPECS(double); DEFINE_GPU_SPECS_OP(bool, int32, scatter_op::UpdateOp::ASSIGN); DEFINE_GPU_SPECS_OP(bool, int64, scatter_op::UpdateOp::ASSIGN); -// TODO(b/27222123): The following fails to compile due to lack of support for -// fp16. -// TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_SPECS); #undef DEFINE_GPU_SPECS #undef DEFINE_GPU_SPECS_INDEX diff --git a/tensorflow/core/kernels/scatter_op.cc b/tensorflow/core/kernels/scatter_op.cc index 1c1f0d7e9e0..c7ea9def4fa 100644 --- a/tensorflow/core/kernels/scatter_op.cc +++ b/tensorflow/core/kernels/scatter_op.cc @@ -286,9 +286,9 @@ TF_CALL_ALL_TYPES(REGISTER_SCATTER_UPDATE_CPU); #define REGISTER_SCATTER_UPDATE_GPU(type) REGISTER_SCATTER_UPDATE(type, GPU); -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ARITHMETIC_GPU); +TF_CALL_GPU_NUMBER_TYPES(REGISTER_SCATTER_ARITHMETIC_GPU); TF_CALL_GPU_NUMBER_TYPES(REGISTER_SCATTER_MINMAX_GPU); -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_UPDATE_GPU); +TF_CALL_GPU_NUMBER_TYPES(REGISTER_SCATTER_UPDATE_GPU); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/python/kernel_tests/scatter_ops_test.py b/tensorflow/python/kernel_tests/scatter_ops_test.py index 8ed3595b904..e8b23d1b686 100644 --- a/tensorflow/python/kernel_tests/scatter_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_ops_test.py @@ -182,13 +182,14 @@ class ScatterTest(test.TestCase): ref = variables.Variable(old) self.evaluate(ref.initializer) self.evaluate(tf_scatter(ref, indices, updates)) - self.assertAllClose(self.evaluate(ref), new) + self.assertAllCloseAccordingToType( + self.evaluate(ref), new, half_rtol=5e-3, half_atol=5e-3) def _VariableRankTests(self, tf_scatter, repeat_indices=False, updates_are_scalar=False): - vtypes = [np.float32, np.float64] + vtypes = [np.float16, np.float32, np.float64] if tf_scatter != state_ops.scatter_div: vtypes.append(np.int32) From 154ad2abb7748624633b959eaab0b559aaeaf58e Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Fri, 10 Jul 2020 03:02:45 -0700 Subject: [PATCH 0221/2522] Do not test float16 for div Test float and int for resource scatter add Fix indentation Test half for scatter add Remove tests --- tensorflow/python/kernel_tests/scatter_ops_test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/scatter_ops_test.py b/tensorflow/python/kernel_tests/scatter_ops_test.py index e8b23d1b686..b9206bf3221 100644 --- a/tensorflow/python/kernel_tests/scatter_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_ops_test.py @@ -189,9 +189,11 @@ class ScatterTest(test.TestCase): tf_scatter, repeat_indices=False, updates_are_scalar=False): - vtypes = [np.float16, np.float32, np.float64] + vtypes = [np.float32, np.float64] if tf_scatter != state_ops.scatter_div: vtypes.append(np.int32) + # float16 is numerically unstable for div + vtypes.append(np.float16) for vtype in vtypes: for itype in (np.int32, np.int64): From 43b19c184e9ef9c5cef48efaea6c1ac94c7bbea1 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Fri, 10 Jul 2020 21:28:52 -0700 Subject: [PATCH 0222/2522] In shared batch scheduler, rename 'max_batch_size' to 'input_batch_size_limit'. PiperOrigin-RevId: 320725389 Change-Id: I10979f6e225498b52ec3c20b6d5e91b3322b442d --- RELEASE.md | 4 -- tensorflow/core/kernels/batch_kernels.cc | 3 +- .../batching_util/basic_batch_scheduler.h | 3 +- .../batching_util/shared_batch_scheduler.h | 47 +++++++++---------- .../shared_batch_scheduler_test.cc | 18 +++---- 5 files changed, 34 insertions(+), 41 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 236428d4e83..22edbcd1f41 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -11,10 +11,6 @@ * C-API functions `TF_StringDecode`, `TF_StringEncode`, and `TF_StringEncodedSize` are no longer relevant and have been removed; see core/platform/ctstring.h for string access/modification in C. -* In batching library, rename parameter - SharedBatchScheduler::QueueOptions::max_batch_size to a more accurate name - (input_batch_size_limit) for a recent feature to enable split of large batch - sizes. ## Known Caveats diff --git a/tensorflow/core/kernels/batch_kernels.cc b/tensorflow/core/kernels/batch_kernels.cc index 3bd3cc5116c..818685a3fff 100644 --- a/tensorflow/core/kernels/batch_kernels.cc +++ b/tensorflow/core/kernels/batch_kernels.cc @@ -368,8 +368,7 @@ class BatchResource : public ResourceBase { TF_RETURN_IF_ERROR( Batcher::Create(batcher_options, &new_resource->batcher_)); - new_resource->batcher_queue_options_.input_batch_size_limit = - max_batch_size; + new_resource->batcher_queue_options_.max_batch_size = max_batch_size; new_resource->batcher_queue_options_.max_enqueued_batches = max_enqueued_batches; new_resource->batcher_queue_options_.batch_timeout_micros = diff --git a/tensorflow/core/kernels/batching_util/basic_batch_scheduler.h b/tensorflow/core/kernels/batching_util/basic_batch_scheduler.h index fab71209668..26df1f82920 100644 --- a/tensorflow/core/kernels/batching_util/basic_batch_scheduler.h +++ b/tensorflow/core/kernels/batching_util/basic_batch_scheduler.h @@ -226,8 +226,7 @@ Status BasicBatchScheduler::Create( typename SharedBatchScheduler::QueueOptions shared_scheduler_queue_options; - shared_scheduler_queue_options.input_batch_size_limit = - options.max_batch_size; + shared_scheduler_queue_options.max_batch_size = options.max_batch_size; shared_scheduler_queue_options.batch_timeout_micros = options.batch_timeout_micros; shared_scheduler_queue_options.max_enqueued_batches = diff --git a/tensorflow/core/kernels/batching_util/shared_batch_scheduler.h b/tensorflow/core/kernels/batching_util/shared_batch_scheduler.h index 71ab55ab006..6763e366c76 100644 --- a/tensorflow/core/kernels/batching_util/shared_batch_scheduler.h +++ b/tensorflow/core/kernels/batching_util/shared_batch_scheduler.h @@ -136,15 +136,17 @@ class SharedBatchScheduler struct QueueOptions { // The size limit of an input batch to the queue. // - // If `enable_large_batch_splitting` is True, 'input_batch_size_limit' - // should be greater or equal than `max_execution_batch_size`; otherwise - // `input_batch_size_limit` should be equal to `max_execution_batch_size`. - size_t input_batch_size_limit = 1000; + // If `enable_large_batch_splitting` is True, 'max_batch_size' should be + // greater or equal than `max_execution_batch_size`; otherwise + // `max_batch_size` should be equal to `max_execution_batch_size`. + // TODO(b/154140947): + // Rename it to 'input_batch_size_limit' here and in caller's code. + size_t max_batch_size = 1000; // If a task has been enqueued for this amount of time (in microseconds), // and a thread is available, the scheduler will immediately form a batch // from enqueued tasks and assign the batch to the thread for processing, - // even if the batch's size is below 'input_batch_size_limit'. + // even if the batch's size is below 'max_batch_size'. // // This parameter offers a way to bound queue latency, so that a task isn't // stuck in the queue indefinitely waiting for enough tasks to arrive to @@ -171,7 +173,7 @@ class SharedBatchScheduler // `input_task`: a unit of task to be splitted (raw pointer not owned). // `first_output_task_size`: task size of first output. - // `max_execution_batch_size`: Maximum size of each batch. + // `max_batch_size`: Maximum size of each batch. // `output_tasks`: A list of output tasks after split. // // REQUIRED: @@ -182,7 +184,7 @@ class SharedBatchScheduler // Instantiations of `TaskType` may vary, so it's up to caller to define // how (e.g., which members to access) to split input tasks. std::function* input_task, - int first_output_task_size, int input_batch_size_limit, + int first_output_task_size, int max_batch_size, std::vector>* output_tasks)> split_input_task_func; @@ -267,7 +269,7 @@ class Queue { using SchedulableBatchCallback = std::function; using SplitInputTaskIntoSubtasksCallback = std::function* input_task, int open_batch_remaining_slot, - int max_execution_batch_size, + int max_batch_size, std::vector>* output_tasks)>; Queue(const typename SharedBatchScheduler::QueueOptions& options, Env* env, ProcessBatchCallback process_batch_callback, @@ -295,7 +297,7 @@ class Queue { size_t SchedulingCapacity() const; // Returns the maximum allowed size of tasks submitted to the queue. - size_t max_task_size() const { return options_.input_batch_size_limit; } + size_t max_task_size() const { return options_.max_batch_size; } // Returns the maximum allowed size of tasks to be enqueued. // Returned value would be less than or equal to the maximum allowed input @@ -304,7 +306,7 @@ class Queue { if (options_.enable_large_batch_splitting) { return options_.max_execution_batch_size; } else { - return options_.input_batch_size_limit; + return options_.max_batch_size; } } @@ -457,10 +459,9 @@ Status SharedBatchScheduler::AddQueue( std::function>)> process_batch_callback, std::unique_ptr>* queue) { - if (options.input_batch_size_limit == 0) { - return errors::InvalidArgument( - "input_batch_size_limit must be positive; was ", - options.input_batch_size_limit); + if (options.max_batch_size == 0) { + return errors::InvalidArgument("max_batch_size must be positive; was ", + options.max_batch_size); } if (options.batch_timeout_micros < 0) { return errors::InvalidArgument( @@ -482,12 +483,11 @@ Status SharedBatchScheduler::AddQueue( } if (options.enable_large_batch_splitting && - (options.input_batch_size_limit < options.max_execution_batch_size)) { + (options.max_batch_size < options.max_execution_batch_size)) { return errors::InvalidArgument( - "When enable_large_batch_splitting is true, input_batch_size_limit " - "must be " + "When enable_large_batch_splitting is true, max_batch_size must be " "greater than or equal to max_execution_batch_size.", - options.enable_large_batch_splitting, options.input_batch_size_limit, + options.enable_large_batch_splitting, options.max_batch_size, options.max_execution_batch_size); } @@ -616,10 +616,10 @@ Status Queue::Schedule(std::unique_ptr* task) { template Status Queue::ScheduleWithoutSplit(std::unique_ptr* task) { - if ((*task)->size() > options_.input_batch_size_limit) { + if ((*task)->size() > options_.max_batch_size) { return errors::InvalidArgument("Task size ", (*task)->size(), " is larger than maximum input batch size ", - options_.input_batch_size_limit); + options_.max_batch_size); } bool notify_of_schedulable_batch = false; @@ -628,8 +628,7 @@ Status Queue::ScheduleWithoutSplit(std::unique_ptr* task) { DCHECK(!closed_); - if (batches_.back()->size() + (*task)->size() > - options_.input_batch_size_limit) { + if (batches_.back()->size() + (*task)->size() > options_.max_batch_size) { if (batches_.size() >= options_.max_enqueued_batches) { return errors::Unavailable( "The batch scheduling queue to which this task was submitted is " @@ -670,10 +669,10 @@ Status Queue::ScheduleWithSplit(std::unique_ptr* task) { profiler::TraceMe trace_me([task] { return strings::StrCat("ScheduleWithSplit:", (*task)->size()); }); - if ((*task)->size() > options_.input_batch_size_limit) { + if ((*task)->size() > options_.max_batch_size) { return errors::InvalidArgument("Task size ", (*task)->size(), " is larger than maximum input batch size ", - options_.input_batch_size_limit); + options_.max_batch_size); } // The max size to be enqueued. diff --git a/tensorflow/core/kernels/batching_util/shared_batch_scheduler_test.cc b/tensorflow/core/kernels/batching_util/shared_batch_scheduler_test.cc index 10f34cf829b..a1958777a49 100644 --- a/tensorflow/core/kernels/batching_util/shared_batch_scheduler_test.cc +++ b/tensorflow/core/kernels/batching_util/shared_batch_scheduler_test.cc @@ -97,7 +97,7 @@ TEST(SharedBatchSchedulerTest, Basic) { // Create two queues. SharedBatchScheduler::QueueOptions queue_options; - queue_options.input_batch_size_limit = 10; + queue_options.max_batch_size = 10; queue_options.batch_timeout_micros = 10 * 1000 * 1000; // 10 seconds queue_options.max_enqueued_batches = 2; std::unique_ptr> queue_0; @@ -155,7 +155,7 @@ TEST(SharedBatchSchedulerTest, ObeyBatchSizeConstraint) { std::shared_ptr> scheduler; TF_ASSERT_OK(SharedBatchScheduler::Create(options, &scheduler)); SharedBatchScheduler::QueueOptions queue_options; - queue_options.input_batch_size_limit = 10; + queue_options.max_batch_size = 10; queue_options.batch_timeout_micros = 10 * 1000 * 1000; // 10 seconds queue_options.max_enqueued_batches = 2; std::unique_ptr> queue; @@ -217,7 +217,7 @@ TEST(SharedBatchSchedulerTest, ObeysTimeout) { std::shared_ptr> scheduler; TF_ASSERT_OK(SharedBatchScheduler::Create(options, &scheduler)); SharedBatchScheduler::QueueOptions queue_options; - queue_options.input_batch_size_limit = 4; + queue_options.max_batch_size = 4; queue_options.batch_timeout_micros = 10; queue_options.max_enqueued_batches = 2; std::unique_ptr> queue; @@ -273,7 +273,7 @@ TEST(SharedBatchSchedulerTest, ObeysTimeoutWithRealClock) { std::shared_ptr> scheduler; TF_ASSERT_OK(SharedBatchScheduler::Create(options, &scheduler)); SharedBatchScheduler::QueueOptions queue_options; - queue_options.input_batch_size_limit = 10; + queue_options.max_batch_size = 10; queue_options.batch_timeout_micros = 100 * 1000; // 100 milliseconds queue_options.max_enqueued_batches = 2; std::unique_ptr> queue; @@ -318,7 +318,7 @@ TEST(SharedBatchSchedulerTest, TF_ASSERT_OK(SharedBatchScheduler::Create(options, &scheduler)); SharedBatchScheduler::QueueOptions queue_options; // Set a large batch size, so that we don't hit the batch size limit. - queue_options.input_batch_size_limit = 100; + queue_options.max_batch_size = 100; // Process a batch as soon as a thread is available. queue_options.batch_timeout_micros = 0; queue_options.max_enqueued_batches = 2; @@ -371,7 +371,7 @@ TEST(SharedBatchSchedulerTest, Fairness) { std::shared_ptr> scheduler; TF_ASSERT_OK(SharedBatchScheduler::Create(options, &scheduler)); SharedBatchScheduler::QueueOptions queue_options; - queue_options.input_batch_size_limit = 10; + queue_options.max_batch_size = 10; queue_options.batch_timeout_micros = 1; queue_options.max_enqueued_batches = 100 /* give plenty of room */; std::vector>> queues(2); @@ -423,7 +423,7 @@ TEST(SharedBatchSchedulerTest, ConstMethods) { std::shared_ptr> scheduler; TF_ASSERT_OK(SharedBatchScheduler::Create(options, &scheduler)); SharedBatchScheduler::QueueOptions queue_options; - queue_options.input_batch_size_limit = 2; + queue_options.max_batch_size = 2; queue_options.batch_timeout_micros = 0; queue_options.max_enqueued_batches = max_enqueued_batches; std::unique_ptr> queue; @@ -494,7 +494,7 @@ TEST(SharedBatchSchedulerTest, OneFullQueueDoesntBlockOtherQueues) { std::shared_ptr> scheduler; TF_ASSERT_OK(SharedBatchScheduler::Create(options, &scheduler)); SharedBatchScheduler::QueueOptions queue_options; - queue_options.input_batch_size_limit = 10; + queue_options.max_batch_size = 10; queue_options.batch_timeout_micros = 0; queue_options.max_enqueued_batches = 2; std::unique_ptr> queue_0; @@ -550,7 +550,7 @@ TEST(SharedBatchSchedulerTest, QueueDestructorBlocksUntilAllTasksProcessed) { std::shared_ptr> scheduler; TF_ASSERT_OK(SharedBatchScheduler::Create(options, &scheduler)); SharedBatchScheduler::QueueOptions queue_options; - queue_options.input_batch_size_limit = 10; + queue_options.max_batch_size = 10; queue_options.batch_timeout_micros = 0; queue_options.max_enqueued_batches = 2; std::unique_ptr> queue; From f3b556a903716877af2ece31adee537e808624bc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 10 Jul 2020 21:42:18 -0700 Subject: [PATCH 0223/2522] [XLA:SPMD] Use subgroup AllToAll for resharding Reshard from tile [2,2,1] to [1,2,2] can be done by a subgroup all-to-all between dimensions 0 and 2. PiperOrigin-RevId: 320726161 Change-Id: Ied51f29d67c2f29f6a5aaf03fc07c44ddaecb809 --- .../xla/service/spmd/spmd_partitioner.cc | 123 ++++++------------ .../xla/service/spmd/spmd_partitioner.h | 3 +- .../xla/service/spmd/spmd_partitioner_test.cc | 26 ---- 3 files changed, 44 insertions(+), 108 deletions(-) diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc index 1b484e018ba..7e136be54e6 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc @@ -176,45 +176,16 @@ std::vector CreateReplicaGroups(int64 num_replicas) { return groups; } -absl::optional> GetReshardAllToAllSourceTargetDims( - const HloSharding& source, const HloSharding& target) { - if (source.IsTileMaximal() || target.IsTileMaximal() || - source.tile_assignment().num_dimensions() != - target.tile_assignment().num_dimensions()) { - return absl::nullopt; - } - int64 source_dim = -1; - int64 target_dim = -1; - for (int64 i = 0; i < source.tile_assignment().num_dimensions(); ++i) { - if (source.tile_assignment().dim(i) > 1 && - target.tile_assignment().dim(i) == 1) { - if (source_dim != -1) { - return absl::nullopt; - } - source_dim = i; - } else if (source.tile_assignment().dim(i) == 1 && - target.tile_assignment().dim(i) > 1) { - if (target_dim != -1) { - return absl::nullopt; - } - target_dim = i; - } else if (source.tile_assignment().dim(i) != - target.tile_assignment().dim(i)) { - return absl::nullopt; - } - } - if (source_dim == -1 || target_dim == -1 || source_dim == target_dim) { - return absl::nullopt; - } - return std::pair(source_dim, target_dim); +bool CanReshardWithAllToAll(const HloSharding& source, + const HloSharding& target) { + return UniqueTiledDim(source) && UniqueTiledDim(target) && + UniqueTiledDim(source) != UniqueTiledDim(target); } bool CanReshardWithCollectivePermute(const HloSharding& source, const HloSharding& target) { - return !source.IsTileMaximal() && !target.IsTileMaximal() && - source.tile_assignment().dimensions() == - target.tile_assignment().dimensions() && - source.tile_assignment() != target.tile_assignment(); + return UniqueTiledDim(source) && UniqueTiledDim(target) && + UniqueTiledDim(source) == UniqueTiledDim(target) && source != target; } // Clears all sharding attributes from instructions in the module. This must be @@ -307,10 +278,8 @@ PartitionedHlo PartitionedHlo::ReshardNoCache(const HloSharding& target) { return ReshardWithCollectivePermute(target); } - if (auto src_tgt_dims = - GetReshardAllToAllSourceTargetDims(sharding(), target)) { - return ReshardWithAllToAll(target, src_tgt_dims->first, - src_tgt_dims->second); + if (CanReshardWithAllToAll(sharding(), target)) { + return ReshardWithAllToAll(target); } // If not replicated yet, first replicate and then reshard to use one of the @@ -776,53 +745,45 @@ PartitionedHlo PartitionedHlo::Broadcast() const { return PartitionedHlo(result, base_shape_, state_); } -PartitionedHlo PartitionedHlo::ReshardWithAllToAll(const HloSharding& target, - int64 source_dim, - int64 target_dim) const { - const int64 group_size = sharding().tile_assignment().dim(source_dim); +PartitionedHlo PartitionedHlo::ReshardWithAllToAll( + const HloSharding& target) const { + int64 partition_count = sharding().tile_assignment().num_elements(); + absl::optional input_partition_dim = UniqueTiledDim(sharding()); + absl::optional output_partition_dim = UniqueTiledDim(target); + CHECK(input_partition_dim.has_value()); + CHECK(output_partition_dim.has_value()); // If the device order is different in the target, fix the order with // ReshardWithCollectivePermute. - std::vector xpose_dims(target.tile_assignment().num_dimensions()); - std::iota(xpose_dims.begin(), xpose_dims.end(), 0); - xpose_dims[source_dim] = target_dim; - xpose_dims[target_dim] = source_dim; + auto input_tile_fixed_device_order = target.tile_assignment(); + input_tile_fixed_device_order.Reshape( + sharding().tile_assignment().dimensions()); auto input_sharding_fixed_device_order = - hlo_sharding_util::TransposeSharding(target, xpose_dims); + HloSharding::Tile(input_tile_fixed_device_order); if (input_sharding_fixed_device_order != sharding()) { auto fixed_order = ReshardWithCollectivePermute(input_sharding_fixed_device_order); - return fixed_order.ReshardWithAllToAll(target, source_dim, target_dim); + return fixed_order.ReshardWithAllToAll(target); } auto padded_hlo = PadBaseShapeBeforeUnevenTiledSharding(hlo_, target, state_.b); // The order of ids in the group must follow the target sharding. - std::vector groups(target.tile_assignment().num_elements() / - group_size); - target.tile_assignment().Each( - [&](absl::Span indices, int64 device) { - int64 group_id = 0; - for (int64 dim = 0; dim < indices.size(); ++dim) { - if (dim == target_dim) { - continue; - } - group_id *= target.tile_assignment().dim(dim); - group_id += indices[dim]; - } - groups[group_id].add_replica_ids(device); - }); + std::vector groups(1); + for (int64 device : target.tile_assignment()) { + groups[0].add_replica_ids(device); + } HloInstruction* result = nullptr; - // Split along the split dimension (target_dim) of the all-to-all + // Split along the split dimension (output_partition_dim) of the all-to-all // output. std::vector dimensions; for (int64 i = 0; i < base_shape_.rank(); ++i) { - if (i == target_dim) { - dimensions.push_back(group_size); - dimensions.push_back(padded_hlo->shape().dimensions(i) / group_size); + if (i == *output_partition_dim) { + dimensions.push_back(partition_count); + dimensions.push_back(padded_hlo->shape().dimensions(i) / partition_count); } else { dimensions.push_back(padded_hlo->shape().dimensions(i)); } @@ -833,19 +794,21 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll(const HloSharding& target, // After the reshape, it is guaranteed to have at least 3 dimensions. auto all_to_all = state_.collective_ops_creator.create_cross_partition_all_to_all( - state_.b, {reshape}, groups, (*state_.next_channel_id)++, target_dim); + state_.b, {reshape}, groups, (*state_.next_channel_id)++, + output_partition_dim); // Reorder the split dimension of the reshape to be located in front of the // input partition dimension, so the two dimensions can be combined. - int64 new_source_dim = - (target_dim < source_dim) ? source_dim + 1 : source_dim; + int64 new_input_partition_dim = (*output_partition_dim < *input_partition_dim) + ? *input_partition_dim + 1 + : *input_partition_dim; std::vector permutation; for (int64 i = 0; i < all_to_all->shape().rank(); ++i) { - if (i == target_dim) { + if (i == *output_partition_dim) { continue; } - if (i == new_source_dim) { - permutation.push_back(target_dim); + if (i == new_input_partition_dim) { + permutation.push_back(*output_partition_dim); } permutation.push_back(i); } @@ -856,7 +819,8 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll(const HloSharding& target, // Combine the split dimension and the input partition dimension. auto new_shape = ShapeInference::InferAllToAllShape( - padded_hlo->shape(), target_dim, source_dim, group_size) + padded_hlo->shape(), *output_partition_dim, + *input_partition_dim, partition_count) .ValueOrDie(); result = state_.b->AddInstruction( HloInstruction::CreateReshape(new_shape, transpose)); @@ -873,8 +837,7 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll(const HloSharding& target, PartitionedHlo PartitionedHlo::ReshardWithCollectivePermute( const HloSharding& target) const { - CHECK(CanReshardWithCollectivePermute(sharding(), target)) - << sharding().ToString() << " to " << target.ToString(); + CHECK(CanReshardWithCollectivePermute(sharding(), target)); std::vector> src_dst_pairs; sharding().tile_assignment().Each( [&](absl::Span indices, int64 src_device) { @@ -3690,8 +3653,8 @@ Status SpmdPartitioningVisitor::HandleDotHelper( output_batch_partitions == num_partitions_ && lhs_sharding_transposed_to_match_output == hlo->sharding()) { if (!may_reshard_with_allreduce && - !GetReshardAllToAllSourceTargetDims( - rhs.sharding(), *lhs_sharding_transposed_to_match_rhs)) { + !CanReshardWithAllToAll(rhs.sharding(), + *lhs_sharding_transposed_to_match_rhs)) { return false; } auto resharded_rhs = rhs.Reshard(*lhs_sharding_transposed_to_match_rhs); @@ -3705,8 +3668,8 @@ Status SpmdPartitioningVisitor::HandleDotHelper( output_batch_partitions == num_partitions_ && rhs_sharding_transposed_to_match_output == hlo->sharding()) { if (!may_reshard_with_allreduce && - !GetReshardAllToAllSourceTargetDims( - lhs.sharding(), *rhs_sharding_transposed_to_match_lhs)) { + !CanReshardWithAllToAll(lhs.sharding(), + *rhs_sharding_transposed_to_match_lhs)) { return false; } auto resharded_lhs = lhs.Reshard(*rhs_sharding_transposed_to_match_lhs); diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h index 40881b4b91c..52e4c9021d8 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h @@ -284,8 +284,7 @@ class PartitionedHlo { // Helper function to reshard the tensor using AllToAll (instead of the // default of Replicate followed by Slice). - PartitionedHlo ReshardWithAllToAll(const HloSharding& target, - int64 source_dim, int64 target_dim) const; + PartitionedHlo ReshardWithAllToAll(const HloSharding& target) const; // Helper function to reshard the tensor using CollectivePermute. PartitionedHlo ReshardWithCollectivePermute(const HloSharding& target) const; diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc index 9f3708fc12c..1f0b1d06c1f 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc @@ -3766,32 +3766,6 @@ ENTRY entry { op::Parameter(0)))); } -TEST_F(SpmdPartitioningTest, SubgroupAllToAllReshard) { - const char* const hlo_string = R"( -HloModule module - -ENTRY entry { - %param0 = f32[8,8,8,8] parameter(0), - sharding={devices=[2,2,1,2]0,1,2,3,4,5,6,7} - ROOT %copy = f32[8,8,8,8] copy(%param0), - sharding={devices=[1,2,2,2]0,1,4,5,2,3,6,7} -})"; - - TF_ASSERT_OK_AND_ASSIGN(auto module, - PartitionComputation(hlo_string, /*num_devices=*/2)); - VLOG(1) << module->ToString(); - - auto root = module->entry_computation()->root_instruction(); - auto reshape = - AllOf(op::Shape("f32[4,4,2,4,4]"), op::Reshape(op::Parameter(0))); - auto all_to_all = AllOf(op::Shape("f32[4,4,2,4,4]"), op::AllToAll(reshape)); - auto xpose = AllOf(op::Shape("f32[2,4,4,4,4]"), op::Transpose(all_to_all)); - EXPECT_THAT(root, - op::Copy(AllOf(op::Reshape(xpose), op::Shape("f32[8,4,4,4]")))); - EXPECT_EQ(root->operand(0)->operand(0)->operand(0)->replica_groups().size(), - 4); -} - } // namespace } // namespace spmd } // namespace xla From 3750943228d27d16ef03b235c59cd79de7efd4a9 Mon Sep 17 00:00:00 2001 From: Gaurav Jain Date: Fri, 10 Jul 2020 22:31:50 -0700 Subject: [PATCH 0224/2522] Remove unnecessary eval() calls The assertAll* statements already evaluate the arguments. PiperOrigin-RevId: 320729457 Change-Id: Ie1564419eb5cf8f69d0e700c000074e248401dbc --- tensorflow/python/eager/backprop_test.py | 4 +- .../python/eager/function_gradients_test.py | 2 +- tensorflow/python/eager/function_test.py | 2 +- .../feature_column/feature_column_test.py | 5 +- .../framework/auto_control_deps_test.py | 6 +- tensorflow/python/framework/function_test.py | 4 +- tensorflow/python/framework/importer_test.py | 2 +- .../keras/legacy_tf_layers/core_test.py | 6 +- .../python/kernel_tests/array_ops_test.py | 12 +- .../python/kernel_tests/atrous_conv2d_test.py | 12 +- .../distributions/categorical_test.py | 20 +-- .../python/kernel_tests/embedding_ops_test.py | 65 ++++--- .../python/kernel_tests/init_ops_test.py | 6 +- .../linalg/linear_operator_addition_test.py | 24 ++- .../python/kernel_tests/linalg_ops_test.py | 3 +- .../kernel_tests/regex_replace_op_test.py | 10 +- .../kernel_tests/sparse_cross_op_test.py | 12 +- .../kernel_tests/sparse_slice_op_test.py | 169 +++++++++--------- tensorflow/python/ops/gradients_test.py | 2 +- .../python/training/checkpoint_ops_test.py | 19 +- 20 files changed, 180 insertions(+), 205 deletions(-) diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index 4f53e45ba0a..6ae2a4c9a5e 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -288,8 +288,8 @@ class BackpropTest(test.TestCase, parameterized.TestCase): tf_opt = training.GradientDescentOptimizer(0.1) tf_embedding.initializer.run() - self.assertAllClose(tf_grad.indices.eval(), grad.indices) - self.assertAllClose(tf_grad.values.eval(), grad.values) + self.assertAllClose(tf_grad.indices, grad.indices) + self.assertAllClose(tf_grad.values, grad.values) tf_opt.apply_gradients([(tf_grad, tf_embedding)]).run() expected = self.evaluate(tf_embedding) diff --git a/tensorflow/python/eager/function_gradients_test.py b/tensorflow/python/eager/function_gradients_test.py index d0fc4f5e809..a19cd662083 100644 --- a/tensorflow/python/eager/function_gradients_test.py +++ b/tensorflow/python/eager/function_gradients_test.py @@ -825,7 +825,7 @@ class FunctionGradientsTest(test.TestCase, parameterized.TestCase): return middle_fn(x, v) x = constant_op.constant(5.0) - self.assertAllEqual(outer_fn(x).eval(), 5.0 * (5.0 + 3.0)) + self.assertAllEqual(outer_fn(x), 5.0 * (5.0 + 3.0)) grad, = gradients_impl.gradients(outer_fn(x), x) diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 1bcf51e62c6..c40986f9478 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -1360,7 +1360,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): def g(x): return f(x) + 1 - self.assertAllEqual(g(constant_op.constant(2.0)).eval(), 5.0) + self.assertAllEqual(g(constant_op.constant(2.0)), 5.0) def testDict(self): diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index e3dff5d1591..2ea7face467 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -752,9 +752,8 @@ class HashedCategoricalColumnTest(test.TestCase): with self.cached_session(): self.assertEqual(dtypes.int64, output.values.dtype) self.assertAllEqual(expected_values, output.values) - self.assertAllEqual(wire_tensor.indices.eval(), output.indices) - self.assertAllEqual(wire_tensor.dense_shape.eval(), - output.dense_shape.eval()) + self.assertAllEqual(wire_tensor.indices, output.indices) + self.assertAllEqual(wire_tensor.dense_shape, output.dense_shape.eval()) def test_tensor_dtype_should_be_string_or_integer(self): string_fc = fc._categorical_column_with_hash_bucket( diff --git a/tensorflow/python/framework/auto_control_deps_test.py b/tensorflow/python/framework/auto_control_deps_test.py index 07049b869e1..dc5d8986958 100644 --- a/tensorflow/python/framework/auto_control_deps_test.py +++ b/tensorflow/python/framework/auto_control_deps_test.py @@ -607,9 +607,9 @@ class AutomaticControlDependenciesTest(test.TestCase): one = constant_op.constant(1.0) one = c.mark_as_return(one) one.eval(feed_dict={p: False}) - self.assertAllEqual(v.read_value().eval(), 5.0) + self.assertAllEqual(v.read_value(), 5.0) one.eval(feed_dict={p: True}) - self.assertAllEqual(v.read_value().eval(), 6.0) + self.assertAllEqual(v.read_value(), 6.0) @test_util.run_v1_only("b/120545219") def testCondNested(self): @@ -737,7 +737,7 @@ class AutomaticControlDependenciesTest(test.TestCase): v.assign(2 * v) return v.read_value() - self.assertAllEqual(f().eval(), 4.0) + self.assertAllEqual(f(), 4.0) def testOptimizerInDefun(self): def loss(v): diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py index 16b2c7c5048..596b93227bf 100644 --- a/tensorflow/python/framework/function_test.py +++ b/tensorflow/python/framework/function_test.py @@ -437,10 +437,10 @@ class FunctionTest(test.TestCase): self.assertEqual([("Assert", "Assert")], Foo.stateful_ops) g = ops.Graph() with g.as_default(), self.cached_session(): - self.assertAllEqual(Foo(constant_op.constant(3.0)).eval(), 6.0) + self.assertAllEqual(Foo(constant_op.constant(3.0)), 6.0) with self.assertRaisesRegex(errors_impl.InvalidArgumentError, "assertion failed.*-3"): - self.assertAllEqual(Foo(constant_op.constant(-3.0)).eval(), 6.0) + self.assertAllEqual(Foo(constant_op.constant(-3.0)), 6.0) @test_util.run_deprecated_v1 def testAssertWrapper(self): diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py index de295955c78..8fd25a39bd4 100644 --- a/tensorflow/python/framework/importer_test.py +++ b/tensorflow/python/framework/importer_test.py @@ -945,7 +945,7 @@ class ImportGraphDefTest(test.TestCase): with self.cached_session(): pack, = importer.import_graph_def(gdef, return_elements=["pack"]) - self.assertAllEqual(pack.outputs[0].eval(), [5.0, 5.0]) + self.assertAllEqual(pack.outputs[0], [5.0, 5.0]) def testWithDevice(self): with ops.Graph().as_default() as g: diff --git a/tensorflow/python/keras/legacy_tf_layers/core_test.py b/tensorflow/python/keras/legacy_tf_layers/core_test.py index 46fb4bef620..88f9a1afa0a 100644 --- a/tensorflow/python/keras/legacy_tf_layers/core_test.py +++ b/tensorflow/python/keras/legacy_tf_layers/core_test.py @@ -284,11 +284,11 @@ class DenseTest(test.TestCase, parameterized.TestCase): weights = _get_variable_dict_from_varstore() self.assertEqual(len(weights), 2) # Check that the matrix weights got initialized to ones (from scope). - self.assertAllClose(weights['scope/dense/kernel'].read_value().eval(), + self.assertAllClose(weights['scope/dense/kernel'].read_value(), np.ones((3, 2))) # Check that the bias still got initialized to zeros. - self.assertAllClose(weights['scope/dense/bias'].read_value().eval(), - np.zeros((2))) + self.assertAllClose(weights['scope/dense/bias'].read_value(), np.zeros( + (2))) def testEagerExecution(self): with context.eager_mode(): diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 994a6a6cd9b..af2b28a1033 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -1277,7 +1277,7 @@ class SequenceMaskTest(test_util.TensorFlowTestCase): res = array_ops.sequence_mask(constant_op.constant([1, 3, 2]), 5) self.assertAllEqual(res.get_shape(), [3, 5]) self.assertAllEqual( - res.eval(), + res, [[True, False, False, False, False], [True, True, True, False, False], [True, True, False, False, False]]) @@ -1289,7 +1289,7 @@ class SequenceMaskTest(test_util.TensorFlowTestCase): constant_op.constant([0, 1, 4]), dtype=dtypes.float32) self.assertAllEqual(res.get_shape().as_list(), [3, 4]) self.assertAllEqual( - res.eval(), + res, [[0.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0]]) @test_util.run_deprecated_v1 @@ -1298,8 +1298,8 @@ class SequenceMaskTest(test_util.TensorFlowTestCase): res = array_ops.sequence_mask(constant_op.constant([0, 1, 4])) self.assertAllEqual(res.get_shape().as_list(), [3, 4]) self.assertAllEqual( - res.eval(), [[False, False, False, False], - [True, False, False, False], [True, True, True, True]]) + res, [[False, False, False, False], [True, False, False, False], + [True, True, True, True]]) @test_util.run_deprecated_v1 def testTwoDimensional(self): @@ -1315,7 +1315,7 @@ class SequenceMaskTest(test_util.TensorFlowTestCase): constant_op.constant([[0, 1, 4], [1, 2, 3]]), dtype=dtypes.float32) self.assertAllEqual(res.get_shape().as_list(), [2, 3, 4]) self.assertAllEqual( - res.eval(), + res, [[[0.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0]], [[1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 0.0, 0.0], [1.0, 1.0, 1.0, 0.0]]]) @@ -1334,7 +1334,7 @@ class SequenceMaskTest(test_util.TensorFlowTestCase): constant_op.constant(5, dtype=maxlen_dtype)) self.assertAllEqual(res.get_shape(), [3, 5]) self.assertAllEqual( - res.eval(), + res, [[True, False, False, False, False], [True, True, True, False, False], [True, True, False, False, False]]) diff --git a/tensorflow/python/kernel_tests/atrous_conv2d_test.py b/tensorflow/python/kernel_tests/atrous_conv2d_test.py index b84e7647239..e0cf7c2cc50 100644 --- a/tensorflow/python/kernel_tests/atrous_conv2d_test.py +++ b/tensorflow/python/kernel_tests/atrous_conv2d_test.py @@ -81,8 +81,7 @@ class AtrousConv2DTest(test.TestCase): y1 = nn_ops.atrous_conv2d(x, f, rate, padding=padding) y2 = nn_ops.conv2d( x, f_up, strides=[1, 1, 1, 1], padding=padding) - self.assertAllClose( - y1.eval(), self.evaluate(y2), rtol=1e-3, atol=1e-3) + self.assertAllClose(y1, y2, rtol=1e-3, atol=1e-3) @test_util.run_deprecated_v1 def testAtrousSequence(self): @@ -135,8 +134,7 @@ class AtrousConv2DTest(test.TestCase): y2 = nn_ops.conv2d(y2, f, strides=[1, 1, 1, 1], padding=padding) y2 = nn_ops.conv2d(y2, f, strides=[1, 1, 1, 1], padding=padding) y2 = array_ops.batch_to_space(y2, crops=pad, block_size=rate) - self.assertAllClose( - y1.eval(), self.evaluate(y2), rtol=1e-2, atol=1e-2) + self.assertAllClose(y1, y2, rtol=1e-2, atol=1e-2) @test_util.run_deprecated_v1 def testGradient(self): @@ -200,8 +198,7 @@ class AtrousConv2DTransposeTest(test.TestCase): padding) y2 = nn_ops.conv2d_transpose( x, f_up, y_shape, strides=[1, 1, 1, 1], padding=padding) - self.assertAllClose( - y1.eval(), self.evaluate(y2), rtol=1e-3, atol=1e-3) + self.assertAllClose(y1, y2, rtol=1e-3, atol=1e-3) class AtrousDepthwiseConv2DTest(test.TestCase): @@ -229,8 +226,7 @@ class AtrousDepthwiseConv2DTest(test.TestCase): y1 = nn_impl.depthwise_conv2d( x, f, strides, padding, rate=[rate, rate]) y2 = nn_impl.depthwise_conv2d(x, f_up, strides, padding) - self.assertAllClose( - y1.eval(), self.evaluate(y2), rtol=1e-3, atol=1e-3) + self.assertAllClose(y1, y2, rtol=1e-3, atol=1e-3) if __name__ == "__main__": diff --git a/tensorflow/python/kernel_tests/distributions/categorical_test.py b/tensorflow/python/kernel_tests/distributions/categorical_test.py index fbde3abba45..7dd953e1cb4 100644 --- a/tensorflow/python/kernel_tests/distributions/categorical_test.py +++ b/tensorflow/python/kernel_tests/distributions/categorical_test.py @@ -61,8 +61,8 @@ class CategoricalTest(test.TestCase, parameterized.TestCase): with self.cached_session(): self.assertAllEqual([2], dist.probs.get_shape()) self.assertAllEqual([2], dist.logits.get_shape()) - self.assertAllClose(dist.probs.eval(), p) - self.assertAllClose(dist.logits.eval(), logits) + self.assertAllClose(dist.probs, p) + self.assertAllClose(dist.logits, logits) @test_util.run_deprecated_v1 def testShapes(self): @@ -131,14 +131,14 @@ class CategoricalTest(test.TestCase, parameterized.TestCase): histograms = [[0.2, 0.8], [0.6, 0.4]] dist = categorical.Categorical(math_ops.log(histograms) - 50.) with self.cached_session(): - self.assertAllClose(dist.prob([0, 1]).eval(), [0.2, 0.4]) + self.assertAllClose(dist.prob([0, 1]), [0.2, 0.4]) @test_util.run_deprecated_v1 def testPMFNoBatch(self): histograms = [0.2, 0.8] dist = categorical.Categorical(math_ops.log(histograms) - 50.) with self.cached_session(): - self.assertAllClose(dist.prob(0).eval(), 0.2) + self.assertAllClose(dist.prob(0), 0.2) @test_util.run_deprecated_v1 def testCDFWithDynamicEventShapeKnownNdims(self): @@ -240,7 +240,7 @@ class CategoricalTest(test.TestCase, parameterized.TestCase): expected_cdf_result[2, 1] = 0.75 with self.cached_session(): - self.assertAllClose(dist.cdf(devent).eval(), expected_cdf_result) + self.assertAllClose(dist.cdf(devent), expected_cdf_result) def testBroadcastWithBatchParamsAndBiggerEvent(self): ## The parameters have a single batch dimension, and the event has two. @@ -314,15 +314,15 @@ class CategoricalTest(test.TestCase, parameterized.TestCase): logits = np.log([[0.2, 0.8], [0.6, 0.4]]) - 50. dist = categorical.Categorical(logits) with self.cached_session(): - self.assertAllClose(dist.log_prob([0, 1]).eval(), np.log([0.2, 0.4])) - self.assertAllClose(dist.log_prob([0.0, 1.0]).eval(), np.log([0.2, 0.4])) + self.assertAllClose(dist.log_prob([0, 1]), np.log([0.2, 0.4])) + self.assertAllClose(dist.log_prob([0.0, 1.0]), np.log([0.2, 0.4])) @test_util.run_deprecated_v1 def testEntropyNoBatch(self): logits = np.log([0.2, 0.8]) - 50. dist = categorical.Categorical(logits) with self.cached_session(): - self.assertAllClose(dist.entropy().eval(), + self.assertAllClose(dist.entropy(), -(0.2 * np.log(0.2) + 0.8 * np.log(0.8))) @test_util.run_deprecated_v1 @@ -330,7 +330,7 @@ class CategoricalTest(test.TestCase, parameterized.TestCase): logits = np.log([[0.2, 0.8], [0.6, 0.4]]) - 50. dist = categorical.Categorical(logits) with self.cached_session(): - self.assertAllClose(dist.entropy().eval(), [ + self.assertAllClose(dist.entropy(), [ -(0.2 * np.log(0.2) + 0.8 * np.log(0.8)), -(0.6 * np.log(0.6) + 0.4 * np.log(0.4)) ]) @@ -460,7 +460,7 @@ class CategoricalTest(test.TestCase, parameterized.TestCase): with self.cached_session(): histograms = [[[0.2, 0.8], [0.6, 0.4]]] dist = categorical.Categorical(math_ops.log(histograms) - 50.) - self.assertAllEqual(dist.mode().eval(), [[1, 0]]) + self.assertAllEqual(dist.mode(), [[1, 0]]) @test_util.run_deprecated_v1 def testCategoricalCategoricalKL(self): diff --git a/tensorflow/python/kernel_tests/embedding_ops_test.py b/tensorflow/python/kernel_tests/embedding_ops_test.py index be8ff5f7d08..dce010b5c80 100644 --- a/tensorflow/python/kernel_tests/embedding_ops_test.py +++ b/tensorflow/python/kernel_tests/embedding_ops_test.py @@ -556,7 +556,7 @@ class EmbeddingLookupTest(test.TestCase): ids = np.random.randint( params.shape[0], size=np.prod(ids_shape)).reshape(ids_shape) # Compare nonsharded to gather - simple = embedding_ops.embedding_lookup(params, ids).eval() + simple = embedding_ops.embedding_lookup(params, ids) self.assertAllEqual(simple, array_ops.gather(params, ids)) # Run a few random sharded versions for procs in 1, 2, 3: @@ -564,7 +564,7 @@ class EmbeddingLookupTest(test.TestCase): split_params = [ array_ops.gather(params, stride + p) for p in xrange(procs) ] - sharded = embedding_ops.embedding_lookup(split_params, ids).eval() + sharded = embedding_ops.embedding_lookup(split_params, ids) self.assertAllEqual(simple, sharded) @test_util.run_deprecated_v1 @@ -583,8 +583,7 @@ class EmbeddingLookupTest(test.TestCase): params.shape[0], size=np.prod(ids_shape, dtype=np.int64)).reshape(ids_shape) # Compare nonsharded to gather - simple = embedding_ops.embedding_lookup( - params, ids, max_norm=1.0).eval() + simple = embedding_ops.embedding_lookup(params, ids, max_norm=1.0) # assertAllClose is used here as different implementations of sqrt may # be used to compute each of the values being compared. For example, # on AVX512 builds the embedding operation makes use of Eigen's fast @@ -599,7 +598,7 @@ class EmbeddingLookupTest(test.TestCase): array_ops.gather(params, stride + p) for p in xrange(procs) ] sharded = embedding_ops.embedding_lookup( - split_params, ids, max_norm=1.0).eval() + split_params, ids, max_norm=1.0) self.assertAllEqual(simple, sharded) @test_util.run_deprecated_v1 @@ -626,7 +625,7 @@ class EmbeddingLookupTest(test.TestCase): dtype=np.int64)).reshape(ids_shape) # Compare nonsharded to gather. simple = embedding_ops._embedding_lookup_and_transform( - params, ids, max_norm=l2_norm, transform_fn=transform).eval() + params, ids, max_norm=l2_norm, transform_fn=transform) self.assertAllClose(simple, array_ops.gather(params_norm, ids)) # Run a few different sharded versions. for procs in 1, 2, 3: @@ -635,8 +634,7 @@ class EmbeddingLookupTest(test.TestCase): array_ops.gather(params, stride + p) for p in xrange(procs) ] sharded = embedding_ops._embedding_lookup_and_transform( - split_params, ids, max_norm=l2_norm, - transform_fn=transform).eval() + split_params, ids, max_norm=l2_norm, transform_fn=transform) # assertAllClose is used here as different implementations of sqrt may # be used to compute each of the values being compared. For example, # on AVX512 builds the embedding operation makes use of Eigen's fast @@ -871,8 +869,9 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): sparse_ids, sparse_weights = self._ids_and_weights_2d() embedding_lookup_result = ( - embedding_ops.safe_embedding_lookup_sparse_v2( - embedding_weights, sparse_ids, sparse_weights).eval()) + embedding_ops.safe_embedding_lookup_sparse_v2(embedding_weights, + sparse_ids, + sparse_weights)) self.assertAllClose( embedding_lookup_result, @@ -887,8 +886,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): embedding_lookup_result = ( embedding_ops.safe_embedding_lookup_sparse_v2( - embedding_weights, sparse_ids, sparse_weights, - default_id=3).eval()) + embedding_weights, sparse_ids, sparse_weights, default_id=3)) self.assertAllClose( embedding_lookup_result, @@ -903,8 +901,8 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): sparse_ids, _ = self._ids_and_weights_2d() embedding_lookup_result = ( - embedding_ops.safe_embedding_lookup_sparse_v2( - embedding_weights, sparse_ids, None).eval()) + embedding_ops.safe_embedding_lookup_sparse_v2(embedding_weights, + sparse_ids, None)) self.assertAllClose( embedding_lookup_result, @@ -919,8 +917,8 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): sparse_ids, _ = self._ids_and_weights_2d() embedding_lookup_result = ( - embedding_ops.safe_embedding_lookup_sparse_v2( - embedding_weights, sparse_ids, None).eval()) + embedding_ops.safe_embedding_lookup_sparse_v2(embedding_weights, + sparse_ids, None)) embedding_weights = list(itertools.chain(*embedding_weights)) self.assertAllClose(embedding_lookup_result, @@ -951,8 +949,9 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): sparse_ids, sparse_weights = self._ids_and_weights_3d() embedding_lookup_result = ( - embedding_ops.safe_embedding_lookup_sparse_v2( - embedding_weights, sparse_ids, sparse_weights).eval()) + embedding_ops.safe_embedding_lookup_sparse_v2(embedding_weights, + sparse_ids, + sparse_weights)) self.assertAllClose(embedding_lookup_result, [[ (1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / 3.0, @@ -967,8 +966,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): embedding_lookup_result = ( embedding_ops.safe_embedding_lookup_sparse_v2( - embedding_weights, sparse_ids, sparse_weights, - default_id=3).eval()) + embedding_weights, sparse_ids, sparse_weights, default_id=3)) self.assertAllClose( embedding_lookup_result, @@ -985,8 +983,8 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): sparse_ids, _ = self._ids_and_weights_3d() embedding_lookup_result = ( - embedding_ops.safe_embedding_lookup_sparse_v2( - embedding_weights, sparse_ids, None).eval()) + embedding_ops.safe_embedding_lookup_sparse_v2(embedding_weights, + sparse_ids, None)) self.assertAllClose(embedding_lookup_result, [[( embedding_weights[0][0] + embedding_weights[0][1]) / 2.0, [0] * 4, [ @@ -1003,8 +1001,8 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): sparse_ids, _ = self._ids_and_weights_3d() embedding_lookup_result = ( - embedding_ops.safe_embedding_lookup_sparse_v2( - embedding_weights, sparse_ids, None).eval()) + embedding_ops.safe_embedding_lookup_sparse_v2(embedding_weights, + sparse_ids, None)) embedding_weights = list(itertools.chain(*embedding_weights)) self.assertAllClose(embedding_lookup_result, [[ @@ -1046,7 +1044,7 @@ class DynamicStitchOpTest(test.TestCase): ops.convert_to_tensor([1, 2]) ] self.assertAllEqual( - data_flow_ops.dynamic_stitch(indices, values).eval(), [12, 23, 1, 2]) + data_flow_ops.dynamic_stitch(indices, values), [12, 23, 1, 2]) @test_util.run_deprecated_v1 def testCint32Gpu(self): @@ -1060,7 +1058,7 @@ class DynamicStitchOpTest(test.TestCase): ops.convert_to_tensor([1, 2]) ] self.assertAllEqual( - data_flow_ops.dynamic_stitch(indices, values).eval(), [12, 23, 1, 2]) + data_flow_ops.dynamic_stitch(indices, values), [12, 23, 1, 2]) @test_util.run_deprecated_v1 def testInt32Cpu(self): @@ -1074,7 +1072,7 @@ class DynamicStitchOpTest(test.TestCase): ops.convert_to_tensor([1, 2]) ] self.assertAllEqual( - data_flow_ops.dynamic_stitch(indices, values).eval(), [12, 23, 1, 2]) + data_flow_ops.dynamic_stitch(indices, values), [12, 23, 1, 2]) @test_util.run_deprecated_v1 def testInt32Gpu(self): @@ -1088,7 +1086,7 @@ class DynamicStitchOpTest(test.TestCase): ops.convert_to_tensor([1, 2]) ] self.assertAllEqual( - data_flow_ops.dynamic_stitch(indices, values).eval(), [12, 23, 1, 2]) + data_flow_ops.dynamic_stitch(indices, values), [12, 23, 1, 2]) @test_util.run_deprecated_v1 def testSumGradArgs(self): @@ -1102,7 +1100,7 @@ class DynamicStitchOpTest(test.TestCase): ops.convert_to_tensor([1, 1]) ] self.assertAllEqual( - data_flow_ops.dynamic_stitch(indices, values).eval(), [2, 3, 1, 1]) + data_flow_ops.dynamic_stitch(indices, values), [2, 3, 1, 1]) # We expect that the values are merged in order. @test_util.run_deprecated_v1 @@ -1115,7 +1113,7 @@ class DynamicStitchOpTest(test.TestCase): indices.extend([ops.convert_to_tensor(np.arange(100).astype(np.int32))]) np_values.extend([np.random.uniform(size=100)]) values.extend([ops.convert_to_tensor(np_values[-1])]) - stitched = data_flow_ops.dynamic_stitch(indices, values).eval() + stitched = data_flow_ops.dynamic_stitch(indices, values) self.assertAllEqual(np_values[-1], stitched) @@ -1133,7 +1131,7 @@ class ParallelDynamicStitchOpTest(test.TestCase): ops.convert_to_tensor([1, 2, 3]) ] self.assertAllEqual( - data_flow_ops.parallel_dynamic_stitch(indices, values).eval(), + data_flow_ops.parallel_dynamic_stitch(indices, values), [12, 23, 1, 2, 34, 3, 45]) @test_util.run_deprecated_v1 @@ -1148,7 +1146,7 @@ class ParallelDynamicStitchOpTest(test.TestCase): ops.convert_to_tensor([1, 3, 2]) ] self.assertAllEqual( - data_flow_ops.parallel_dynamic_stitch(indices, values).eval(), + data_flow_ops.parallel_dynamic_stitch(indices, values), [12, 23, 1, 2, 3, 34, 45, 56]) @test_util.run_deprecated_v1 @@ -1157,8 +1155,7 @@ class ParallelDynamicStitchOpTest(test.TestCase): indices = [ops.convert_to_tensor([0, 1]), ops.convert_to_tensor([2, 3])] values = [ops.convert_to_tensor([2, 3]), ops.convert_to_tensor([1, 1])] self.assertAllEqual( - data_flow_ops.parallel_dynamic_stitch(indices, values).eval(), - [2, 3, 1, 1]) + data_flow_ops.parallel_dynamic_stitch(indices, values), [2, 3, 1, 1]) if __name__ == "__main__": diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py index 393d2cdfdcd..57dcec1f23f 100644 --- a/tensorflow/python/kernel_tests/init_ops_test.py +++ b/tensorflow/python/kernel_tests/init_ops_test.py @@ -1325,7 +1325,7 @@ class IdentityInitializerTest(test.TestCase): init = init_ops.identity_initializer() shape = (10, 5) with self.session(graph=ops.Graph(), use_gpu=True): - self.assertAllClose(init(shape).eval(), np.eye(*shape)) + self.assertAllClose(init(shape), np.eye(*shape)) @test_util.run_deprecated_v1 def testGain(self): @@ -1334,9 +1334,9 @@ class IdentityInitializerTest(test.TestCase): init_default = init_ops.identity_initializer(dtype=dtype) init_custom = init_ops.identity_initializer(gain=0.9, dtype=dtype) with self.session(graph=ops.Graph(), use_gpu=True): - self.assertAllClose(init_default(shape).eval(), np.eye(*shape)) + self.assertAllClose(init_default(shape), np.eye(*shape)) with self.session(graph=ops.Graph(), use_gpu=True): - self.assertAllClose(init_custom(shape).eval(), np.eye(*shape) * 0.9) + self.assertAllClose(init_custom(shape), np.eye(*shape) * 0.9) @test_util.run_deprecated_v1 def testPartitions(self): diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_addition_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_addition_test.py index 4e4a81d0647..fa45c27228b 100644 --- a/tensorflow/python/kernel_tests/linalg/linear_operator_addition_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_addition_test.py @@ -279,9 +279,8 @@ class AddAndReturnScaledIdentityTest(test.TestCase): self.assertIsInstance(operator, linalg.LinearOperatorScaledIdentity) with self.cached_session(): - self.assertAllClose(2 * - linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(), - operator.to_dense().eval()) + self.assertAllClose(2 * linalg_ops.eye(num_rows=2, batch_shape=[3]), + operator.to_dense()) self.assertTrue(operator.is_positive_definite) self.assertTrue(operator.is_non_singular) self.assertEqual("my_operator", operator.name) @@ -298,9 +297,8 @@ class AddAndReturnScaledIdentityTest(test.TestCase): self.assertIsInstance(operator, linalg.LinearOperatorScaledIdentity) with self.cached_session(): - self.assertAllClose(3.2 * - linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(), - operator.to_dense().eval()) + self.assertAllClose(3.2 * linalg_ops.eye(num_rows=2, batch_shape=[3]), + operator.to_dense()) self.assertTrue(operator.is_positive_definite) self.assertTrue(operator.is_non_singular) self.assertEqual("my_operator", operator.name) @@ -318,9 +316,8 @@ class AddAndReturnScaledIdentityTest(test.TestCase): self.assertIsInstance(operator, linalg.LinearOperatorScaledIdentity) with self.cached_session(): - self.assertAllClose(1.2 * - linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(), - operator.to_dense().eval()) + self.assertAllClose(1.2 * linalg_ops.eye(num_rows=2, batch_shape=[3]), + operator.to_dense()) self.assertTrue(operator.is_positive_definite) self.assertTrue(operator.is_non_singular) self.assertEqual("my_operator", operator.name) @@ -343,9 +340,8 @@ class AddAndReturnDiagTest(test.TestCase): self.assertIsInstance(operator, linalg.LinearOperatorDiag) with self.cached_session(): - self.assertAllClose(2 * - linalg_ops.eye(num_rows=2, batch_shape=[3]).eval(), - operator.to_dense().eval()) + self.assertAllClose(2 * linalg_ops.eye(num_rows=2, batch_shape=[3]), + operator.to_dense()) self.assertTrue(operator.is_positive_definite) self.assertTrue(operator.is_non_singular) self.assertEqual("my_operator", operator.name) @@ -365,8 +361,8 @@ class AddAndReturnDiagTest(test.TestCase): with self.cached_session(): self.assertAllClose( - linalg.LinearOperatorDiag(diag1 + diag2).to_dense().eval(), - operator.to_dense().eval()) + linalg.LinearOperatorDiag(diag1 + diag2).to_dense(), + operator.to_dense()) self.assertTrue(operator.is_positive_definite) self.assertTrue(operator.is_non_singular) self.assertEqual("my_operator", operator.name) diff --git a/tensorflow/python/kernel_tests/linalg_ops_test.py b/tensorflow/python/kernel_tests/linalg_ops_test.py index 916d9a4b8c8..2cddddae0dd 100644 --- a/tensorflow/python/kernel_tests/linalg_ops_test.py +++ b/tensorflow/python/kernel_tests/linalg_ops_test.py @@ -69,8 +69,7 @@ class CholeskySolveTest(test.TestCase): with self.subTest(n=n, np_type=np_type, atol=atol, k=k): rhs = self.rng.randn(2, n, k).astype(np_type) x = linalg_ops.cholesky_solve(chol, rhs) - self.assertAllClose( - rhs, math_ops.matmul(array, x).eval(), atol=atol) + self.assertAllClose(rhs, math_ops.matmul(array, x), atol=atol) class LogdetTest(test.TestCase): diff --git a/tensorflow/python/kernel_tests/regex_replace_op_test.py b/tensorflow/python/kernel_tests/regex_replace_op_test.py index 6c7dfee7b40..c0e5f9d76fb 100644 --- a/tensorflow/python/kernel_tests/regex_replace_op_test.py +++ b/tensorflow/python/kernel_tests/regex_replace_op_test.py @@ -44,7 +44,7 @@ class RegexReplaceOpVariantsTest(test.TestCase, parameterized.TestCase): "HiJkLmN"], dtypes.string), pos=0, len=5) - stripped = op(inp, "\\p{Ll}", ".").eval() + stripped = op(inp, "\\p{Ll}", ".") self.assertAllEqual([b"A.C.E", b"H.J.L"], stripped) @test_util.run_deprecated_v1 @@ -52,7 +52,7 @@ class RegexReplaceOpVariantsTest(test.TestCase, parameterized.TestCase): values = ["a:foo", "a:bar", "a:foo", "b:baz", "b:qux", "ca:b"] with self.cached_session(): input_vector = constant_op.constant(values, dtypes.string) - stripped = op(input_vector, "^(a:|b:)", "", replace_global=False).eval() + stripped = op(input_vector, "^(a:|b:)", "", replace_global=False) self.assertAllEqual([b"foo", b"bar", b"foo", b"baz", b"qux", b"ca:b"], stripped) @@ -61,7 +61,7 @@ class RegexReplaceOpVariantsTest(test.TestCase, parameterized.TestCase): values = ["aba\naba", "abcdabcde"] with self.cached_session(): input_vector = constant_op.constant(values, dtypes.string) - stripped = op(input_vector, "a.*a", "(\\0)").eval() + stripped = op(input_vector, "a.*a", "(\\0)") self.assertAllEqual([b"(aba)\n(aba)", b"(abcda)bcde"], stripped) @test_util.run_deprecated_v1 @@ -69,7 +69,7 @@ class RegexReplaceOpVariantsTest(test.TestCase, parameterized.TestCase): values = ["abc", "1"] with self.cached_session(): input_vector = constant_op.constant(values, dtypes.string) - stripped = op(input_vector, "", "x").eval() + stripped = op(input_vector, "", "x") self.assertAllEqual([b"xaxbxcx", b"x1x"], stripped) @test_util.run_deprecated_v1 @@ -87,7 +87,7 @@ class RegexReplaceOpVariantsTest(test.TestCase, parameterized.TestCase): values = ["ababababab", "abcabcabc", ""] with self.cached_session(): input_vector = constant_op.constant(values, dtypes.string) - stripped = op(input_vector, "ab", "abc", True).eval() + stripped = op(input_vector, "ab", "abc", True) self.assertAllEqual([b"abcabcabcabcabc", b"abccabccabcc", b""], stripped) diff --git a/tensorflow/python/kernel_tests/sparse_cross_op_test.py b/tensorflow/python/kernel_tests/sparse_cross_op_test.py index 48192551a18..9f36e7212e1 100644 --- a/tensorflow/python/kernel_tests/sparse_cross_op_test.py +++ b/tensorflow/python/kernel_tests/sparse_cross_op_test.py @@ -65,9 +65,9 @@ class BaseSparseCrossOpTest(test.TestCase): constant_op.constant(shape, dtypes.int64)) def _assert_sparse_tensor_equals(self, sp1, sp2): - self.assertAllEqual(sp1.indices.eval(), sp2.indices) - self.assertAllEqual(sp1.values.eval(), sp2.values) - self.assertAllEqual(sp1.dense_shape.eval(), sp2.dense_shape) + self.assertAllEqual(sp1.indices, sp2.indices) + self.assertAllEqual(sp1.values, sp2.values) + self.assertAllEqual(sp1.dense_shape, sp2.dense_shape) def _assert_sparse_tensor_empty(self, sp): self.assertEqual(0, sp.indices.size) @@ -424,9 +424,9 @@ class SparseCrossOpTest(test.TestCase): self.assertEqual(0, sp.dense_shape[1]) def _assert_sparse_tensor_equals(self, sp1, sp2): - self.assertAllEqual(sp1.indices.eval(), sp2.indices) - self.assertAllEqual(sp1.values.eval(), sp2.values) - self.assertAllEqual(sp1.dense_shape.eval(), sp2.dense_shape) + self.assertAllEqual(sp1.indices, sp2.indices) + self.assertAllEqual(sp1.values, sp2.values) + self.assertAllEqual(sp1.dense_shape, sp2.dense_shape) def _sparse_tensor(self, data, batch_size=-1): """Generates a SparseTensor. diff --git a/tensorflow/python/kernel_tests/sparse_slice_op_test.py b/tensorflow/python/kernel_tests/sparse_slice_op_test.py index 7f8c91bde67..721b64c3f5f 100644 --- a/tensorflow/python/kernel_tests/sparse_slice_op_test.py +++ b/tensorflow/python/kernel_tests/sparse_slice_op_test.py @@ -87,16 +87,15 @@ class SparseSliceOpTest(test.TestCase): sp_tensor0 = sparse_ops.sparse_slice(sp_input, [0, 0], [2, 6]) sp_tensor1 = sparse_ops.sparse_slice(sp_input, [2, 0], [3, 7]) self.assertAllEqual( - sp_tensor0.indices.eval(), + sp_tensor0.indices, [[0, 0], [0, 2], [0, 4], [0, 5], [1, 1], [1, 3], [1, 4]]) - self.assertAllEqual(sp_tensor0.values.eval(), [0, 2, 4, 5, 11, 13, 14]) - self.assertAllEqual(sp_tensor0.dense_shape.eval(), [2, 6]) + self.assertAllEqual(sp_tensor0.values, [0, 2, 4, 5, 11, 13, 14]) + self.assertAllEqual(sp_tensor0.dense_shape, [2, 6]) self.assertAllEqual( - sp_tensor1.indices.eval(), + sp_tensor1.indices, [[0, 0], [0, 3], [0, 5], [1, 0], [1, 2], [1, 3], [1, 5]]) - self.assertAllEqual(sp_tensor1.values.eval(), - [20, 23, 25, 30, 32, 33, 35]) - self.assertAllEqual(sp_tensor1.dense_shape.eval(), [2, 6]) + self.assertAllEqual(sp_tensor1.values, [20, 23, 25, 30, 32, 33, 35]) + self.assertAllEqual(sp_tensor1.dense_shape, [2, 6]) @test_util.run_deprecated_v1 def testSliceMatrixUnevenCols(self): @@ -107,38 +106,38 @@ class SparseSliceOpTest(test.TestCase): sp_tensor2 = sparse_ops.sparse_slice(sp_input, [0, 5], [5, 2]) self.assertAllEqual( - sp_tensor0.indices.eval(), + sp_tensor0.indices, [[0, 0], [0, 2], [1, 1], [2, 0], [3, 0], [3, 2], [4, 1]]) - self.assertAllEqual(sp_tensor0.values.eval(), [0, 2, 11, 20, 30, 32, 41]) - self.assertAllEqual(sp_tensor0.dense_shape.eval(), [5, 3]) - self.assertAllEqual(sp_tensor1.indices.eval(), + self.assertAllEqual(sp_tensor0.values, [0, 2, 11, 20, 30, 32, 41]) + self.assertAllEqual(sp_tensor0.dense_shape, [5, 3]) + self.assertAllEqual(sp_tensor1.indices, [[0, 1], [1, 0], [1, 1], [2, 0], [3, 0], [4, 1]]) - self.assertAllEqual(sp_tensor1.values.eval(), [4, 13, 14, 23, 33, 44]) - self.assertAllEqual(sp_tensor1.dense_shape.eval(), [5, 2]) - self.assertAllEqual(sp_tensor2.indices.eval(), + self.assertAllEqual(sp_tensor1.values, [4, 13, 14, 23, 33, 44]) + self.assertAllEqual(sp_tensor1.dense_shape, [5, 2]) + self.assertAllEqual(sp_tensor2.indices, [[0, 0], [1, 1], [2, 0], [3, 0], [4, 1]]) - self.assertAllEqual(sp_tensor2.values.eval(), [5, 16, 25, 35, 46]) - self.assertAllEqual(sp_tensor2.dense_shape.eval(), [5, 2]) + self.assertAllEqual(sp_tensor2.values, [5, 16, 25, 35, 46]) + self.assertAllEqual(sp_tensor2.dense_shape, [5, 2]) sp_tensor0 = sparse_ops.sparse_slice(sp_input, [0, 0], [5, 2]) sp_tensor1 = sparse_ops.sparse_slice(sp_input, [0, 2], [5, 2]) sp_tensor2 = sparse_ops.sparse_slice(sp_input, [0, 4], [5, 2]) sp_tensor3 = sparse_ops.sparse_slice(sp_input, [0, 6], [5, 2]) - self.assertAllEqual(sp_tensor0.indices.eval(), + self.assertAllEqual(sp_tensor0.indices, [[0, 0], [1, 1], [2, 0], [3, 0], [4, 1]]) - self.assertAllEqual(sp_tensor0.values.eval(), [0, 11, 20, 30, 41]) - self.assertAllEqual(sp_tensor0.dense_shape.eval(), [5, 2]) - self.assertAllEqual(sp_tensor1.indices.eval(), + self.assertAllEqual(sp_tensor0.values, [0, 11, 20, 30, 41]) + self.assertAllEqual(sp_tensor0.dense_shape, [5, 2]) + self.assertAllEqual(sp_tensor1.indices, [[0, 0], [1, 1], [2, 1], [3, 0], [3, 1]]) - self.assertAllEqual(sp_tensor1.values.eval(), [2, 13, 23, 32, 33]) - self.assertAllEqual(sp_tensor1.dense_shape.eval(), [5, 2]) - self.assertAllEqual(sp_tensor2.indices.eval(), + self.assertAllEqual(sp_tensor1.values, [2, 13, 23, 32, 33]) + self.assertAllEqual(sp_tensor1.dense_shape, [5, 2]) + self.assertAllEqual(sp_tensor2.indices, [[0, 0], [0, 1], [1, 0], [2, 1], [3, 1], [4, 0]]) - self.assertAllEqual(sp_tensor2.values.eval(), [4, 5, 14, 25, 35, 44]) - self.assertAllEqual(sp_tensor2.dense_shape.eval(), [5, 2]) - self.assertAllEqual(sp_tensor3.indices.eval(), [[1, 0], [4, 0]]) - self.assertAllEqual(sp_tensor3.values.eval(), [16, 46]) - self.assertAllEqual(sp_tensor3.dense_shape.eval(), [5, 1]) + self.assertAllEqual(sp_tensor2.values, [4, 5, 14, 25, 35, 44]) + self.assertAllEqual(sp_tensor2.dense_shape, [5, 2]) + self.assertAllEqual(sp_tensor3.indices, [[1, 0], [4, 0]]) + self.assertAllEqual(sp_tensor3.values, [16, 46]) + self.assertAllEqual(sp_tensor3.dense_shape, [5, 1]) @test_util.run_deprecated_v1 def testSliceMatrixUnevenRows(self): @@ -146,35 +145,32 @@ class SparseSliceOpTest(test.TestCase): sp_input = self._SparseTensor_5x7() sp_tensor0 = sparse_ops.sparse_slice(sp_input, [0, 0], [3, 7]) sp_tensor1 = sparse_ops.sparse_slice(sp_input, [3, 0], [3, 7]) - self.assertAllEqual(sp_tensor0.indices.eval(), + self.assertAllEqual(sp_tensor0.indices, [[0, 0], [0, 2], [0, 4], [0, 5], [1, 1], [1, 3], [1, 4], [1, 6], [2, 0], [2, 3], [2, 5]]) - self.assertAllEqual(sp_tensor0.values.eval(), + self.assertAllEqual(sp_tensor0.values, [0, 2, 4, 5, 11, 13, 14, 16, 20, 23, 25]) - self.assertAllEqual(sp_tensor0.dense_shape.eval(), [3, 7]) + self.assertAllEqual(sp_tensor0.dense_shape, [3, 7]) self.assertAllEqual( - sp_tensor1.indices.eval(), + sp_tensor1.indices, [[0, 0], [0, 2], [0, 3], [0, 5], [1, 1], [1, 4], [1, 6]]) - self.assertAllEqual(sp_tensor1.values.eval(), - [30, 32, 33, 35, 41, 44, 46]) - self.assertAllEqual(sp_tensor1.dense_shape.eval(), [2, 7]) + self.assertAllEqual(sp_tensor1.values, [30, 32, 33, 35, 41, 44, 46]) + self.assertAllEqual(sp_tensor1.dense_shape, [2, 7]) sp_tensor0 = sparse_ops.sparse_slice(sp_input, [0, 0], [2, 7]) sp_tensor1 = sparse_ops.sparse_slice(sp_input, [2, 0], [2, 7]) sp_tensor2 = sparse_ops.sparse_slice(sp_input, [4, 0], [2, 7]) self.assertAllEqual( - sp_tensor0.indices.eval(), + sp_tensor0.indices, [[0, 0], [0, 2], [0, 4], [0, 5], [1, 1], [1, 3], [1, 4], [1, 6]]) - self.assertAllEqual(sp_tensor0.values.eval(), - [0, 2, 4, 5, 11, 13, 14, 16]) - self.assertAllEqual(sp_tensor0.dense_shape.eval(), [2, 7]) + self.assertAllEqual(sp_tensor0.values, [0, 2, 4, 5, 11, 13, 14, 16]) + self.assertAllEqual(sp_tensor0.dense_shape, [2, 7]) - self.assertAllEqual(sp_tensor1.values.eval(), - [20, 23, 25, 30, 32, 33, 35]) - self.assertAllEqual(sp_tensor1.dense_shape.eval(), [2, 7]) - self.assertAllEqual(sp_tensor2.indices.eval(), [[0, 1], [0, 4], [0, 6]]) - self.assertAllEqual(sp_tensor2.values.eval(), [41, 44, 46]) - self.assertAllEqual(sp_tensor2.dense_shape.eval(), [1, 7]) + self.assertAllEqual(sp_tensor1.values, [20, 23, 25, 30, 32, 33, 35]) + self.assertAllEqual(sp_tensor1.dense_shape, [2, 7]) + self.assertAllEqual(sp_tensor2.indices, [[0, 1], [0, 4], [0, 6]]) + self.assertAllEqual(sp_tensor2.values, [41, 44, 46]) + self.assertAllEqual(sp_tensor2.dense_shape, [1, 7]) return @test_util.run_deprecated_v1 @@ -185,20 +181,18 @@ class SparseSliceOpTest(test.TestCase): sp_tensor1 = sparse_ops.sparse_slice(sp_input, [1, 0], [1, 6]) sp_tensor2 = sparse_ops.sparse_slice(sp_input, [2, 0], [1, 7]) sp_tensor3 = sparse_ops.sparse_slice(sp_input, [3, 0], [2, 7]) - self.assertAllEqual(sp_tensor0.indices.eval(), - [[0, 0], [0, 2], [0, 4], [0, 5]]) - self.assertAllEqual(sp_tensor0.values.eval(), [0, 2, 4, 5]) - self.assertAllEqual(sp_tensor0.dense_shape.eval(), [1, 6]) - self.assertAllEqual(sp_tensor1.indices.eval(), [[0, 1], [0, 3], [0, 4]]) - self.assertAllEqual(sp_tensor1.values.eval(), [11, 13, 14]) - self.assertAllEqual(sp_tensor1.dense_shape.eval(), [1, 6]) - self.assertAllEqual(sp_tensor2.indices.eval(), [[0, 0], [0, 3], [0, 5]]) - self.assertAllEqual(sp_tensor2.values.eval(), [20, 23, 25]) - self.assertAllEqual(sp_tensor2.dense_shape.eval(), [1, 6]) - self.assertAllEqual(sp_tensor3.indices.eval(), - [[0, 0], [0, 2], [0, 3], [0, 5]]) - self.assertAllEqual(sp_tensor3.values.eval(), [30, 32, 33, 35]) - self.assertAllEqual(sp_tensor3.dense_shape.eval(), [1, 6]) + self.assertAllEqual(sp_tensor0.indices, [[0, 0], [0, 2], [0, 4], [0, 5]]) + self.assertAllEqual(sp_tensor0.values, [0, 2, 4, 5]) + self.assertAllEqual(sp_tensor0.dense_shape, [1, 6]) + self.assertAllEqual(sp_tensor1.indices, [[0, 1], [0, 3], [0, 4]]) + self.assertAllEqual(sp_tensor1.values, [11, 13, 14]) + self.assertAllEqual(sp_tensor1.dense_shape, [1, 6]) + self.assertAllEqual(sp_tensor2.indices, [[0, 0], [0, 3], [0, 5]]) + self.assertAllEqual(sp_tensor2.values, [20, 23, 25]) + self.assertAllEqual(sp_tensor2.dense_shape, [1, 6]) + self.assertAllEqual(sp_tensor3.indices, [[0, 0], [0, 2], [0, 3], [0, 5]]) + self.assertAllEqual(sp_tensor3.values, [30, 32, 33, 35]) + self.assertAllEqual(sp_tensor3.dense_shape, [1, 6]) @test_util.run_deprecated_v1 def testSliceColumns(self): @@ -208,18 +202,18 @@ class SparseSliceOpTest(test.TestCase): sparse_tensor1 = sparse_ops.sparse_slice(sp_input, [0, 2], [5, 2]) sparse_tensor2 = sparse_ops.sparse_slice(sp_input, [0, 4], [5, 3]) - self.assertAllEqual(sparse_tensor0.indices.eval(), + self.assertAllEqual(sparse_tensor0.indices, [[0, 0], [1, 1], [2, 0], [3, 0]]) - self.assertAllEqual(sparse_tensor0.values.eval(), [0, 11, 20, 30]) - self.assertAllEqual(sparse_tensor0.dense_shape.eval(), [4, 2]) - self.assertAllEqual(sparse_tensor1.indices.eval(), + self.assertAllEqual(sparse_tensor0.values, [0, 11, 20, 30]) + self.assertAllEqual(sparse_tensor0.dense_shape, [4, 2]) + self.assertAllEqual(sparse_tensor1.indices, [[0, 0], [1, 1], [2, 1], [3, 0], [3, 1]]) - self.assertAllEqual(sparse_tensor1.values.eval(), [2, 13, 23, 32, 33]) - self.assertAllEqual(sparse_tensor1.dense_shape.eval(), [4, 2]) - self.assertAllEqual(sparse_tensor2.indices.eval(), + self.assertAllEqual(sparse_tensor1.values, [2, 13, 23, 32, 33]) + self.assertAllEqual(sparse_tensor1.dense_shape, [4, 2]) + self.assertAllEqual(sparse_tensor2.indices, [[0, 0], [0, 1], [1, 0], [2, 1], [3, 1]]) - self.assertAllEqual(sparse_tensor2.values.eval(), [4, 5, 14, 25, 35]) - self.assertAllEqual(sparse_tensor2.dense_shape.eval(), [4, 2]) + self.assertAllEqual(sparse_tensor2.values, [4, 5, 14, 25, 35]) + self.assertAllEqual(sparse_tensor2.dense_shape, [4, 2]) @test_util.run_deprecated_v1 def testSliceAllColumns(self): @@ -231,27 +225,24 @@ class SparseSliceOpTest(test.TestCase): sparse_tensor3 = sparse_ops.sparse_slice(sp_input, [0, 3], [4, 1]) sparse_tensor4 = sparse_ops.sparse_slice(sp_input, [0, 4], [5, 1]) sparse_tensor5 = sparse_ops.sparse_slice(sp_input, [0, 5], [6, 3]) - self.assertAllEqual(sparse_tensor0.indices.eval(), - [[0, 0], [2, 0], [3, 0]]) - self.assertAllEqual(sparse_tensor0.values.eval(), [0, 20, 30]) - self.assertAllEqual(sparse_tensor0.dense_shape.eval(), [4, 1]) - self.assertAllEqual(sparse_tensor1.indices.eval(), [[1, 0]]) - self.assertAllEqual(sparse_tensor1.values.eval(), [11]) - self.assertAllEqual(sparse_tensor1.dense_shape.eval(), [4, 1]) - self.assertAllEqual(sparse_tensor2.indices.eval(), [[0, 0], [3, 0]]) - self.assertAllEqual(sparse_tensor2.values.eval(), [2, 32]) - self.assertAllEqual(sparse_tensor2.dense_shape.eval(), [4, 1]) - self.assertAllEqual(sparse_tensor3.indices.eval(), - [[1, 0], [2, 0], [3, 0]]) - self.assertAllEqual(sparse_tensor3.dense_shape.eval(), [4, 1]) - self.assertAllEqual(sparse_tensor3.values.eval(), [13, 23, 33]) - self.assertAllEqual(sparse_tensor4.indices.eval(), [[0, 0], [1, 0]]) - self.assertAllEqual(sparse_tensor4.values.eval(), [4, 14]) - self.assertAllEqual(sparse_tensor4.dense_shape.eval(), [4, 1]) - self.assertAllEqual(sparse_tensor5.indices.eval(), - [[0, 0], [2, 0], [3, 0]]) - self.assertAllEqual(sparse_tensor5.values.eval(), [5, 25, 35]) - self.assertAllEqual(sparse_tensor5.dense_shape.eval(), [4, 1]) + self.assertAllEqual(sparse_tensor0.indices, [[0, 0], [2, 0], [3, 0]]) + self.assertAllEqual(sparse_tensor0.values, [0, 20, 30]) + self.assertAllEqual(sparse_tensor0.dense_shape, [4, 1]) + self.assertAllEqual(sparse_tensor1.indices, [[1, 0]]) + self.assertAllEqual(sparse_tensor1.values, [11]) + self.assertAllEqual(sparse_tensor1.dense_shape, [4, 1]) + self.assertAllEqual(sparse_tensor2.indices, [[0, 0], [3, 0]]) + self.assertAllEqual(sparse_tensor2.values, [2, 32]) + self.assertAllEqual(sparse_tensor2.dense_shape, [4, 1]) + self.assertAllEqual(sparse_tensor3.indices, [[1, 0], [2, 0], [3, 0]]) + self.assertAllEqual(sparse_tensor3.dense_shape, [4, 1]) + self.assertAllEqual(sparse_tensor3.values, [13, 23, 33]) + self.assertAllEqual(sparse_tensor4.indices, [[0, 0], [1, 0]]) + self.assertAllEqual(sparse_tensor4.values, [4, 14]) + self.assertAllEqual(sparse_tensor4.dense_shape, [4, 1]) + self.assertAllEqual(sparse_tensor5.indices, [[0, 0], [2, 0], [3, 0]]) + self.assertAllEqual(sparse_tensor5.values, [5, 25, 35]) + self.assertAllEqual(sparse_tensor5.dense_shape, [4, 1]) @test_util.run_deprecated_v1 def testGradients(self): diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py index cc9b029e6d9..d179735b47a 100644 --- a/tensorflow/python/ops/gradients_test.py +++ b/tensorflow/python/ops/gradients_test.py @@ -1646,7 +1646,7 @@ class GradPassThroughTest(test_util.TensorFlowTestCase): with self.cached_session(): self.evaluate(variables.global_variables_initializer()) - self.assertAllClose(grads[0].eval(), 6.0) + self.assertAllClose(grads[0], 6.0) # Verify that variables involved in the wrapped op do not receive gradients. y = custom_gradient.grad_pass_through(lambda v: x * v)(z) diff --git a/tensorflow/python/training/checkpoint_ops_test.py b/tensorflow/python/training/checkpoint_ops_test.py index 5a6a66f1312..47ce8d14e88 100644 --- a/tensorflow/python/training/checkpoint_ops_test.py +++ b/tensorflow/python/training/checkpoint_ops_test.py @@ -156,8 +156,7 @@ class LoadAndRemapWrappersTest(test.TestCase): with self.cached_session(): self.evaluate(variables.global_variables_initializer()) - self.assertAllClose(expected_remapped_matrix, - remapped_matrix.as_tensor().eval()) + self.assertAllClose(expected_remapped_matrix, remapped_matrix.as_tensor()) def test_load_and_remap_output_layer_weight_initializer_dnn_output(self): """Tests for the output layer initializer in the DNN output case.""" @@ -190,8 +189,7 @@ class LoadAndRemapWrappersTest(test.TestCase): with self.cached_session(): self.evaluate(variables.global_variables_initializer()) - self.assertAllClose(expected_remapped_matrix, - remapped_matrix.as_tensor().eval()) + self.assertAllClose(expected_remapped_matrix, remapped_matrix.as_tensor()) def test_initializer_with_oov_only_partition(self): """Tests for the output layer initializer where one partition is all OOV.""" @@ -228,8 +226,7 @@ class LoadAndRemapWrappersTest(test.TestCase): with self.cached_session(): self.evaluate(variables.global_variables_initializer()) - self.assertAllClose(expected_remapped_matrix, - remapped_matrix.as_tensor().eval()) + self.assertAllClose(expected_remapped_matrix, remapped_matrix.as_tensor()) def test_load_and_remap_linear_multiclass_initializer_default_init(self): """Tests where the zeros_initializer default is used for linear.""" @@ -264,8 +261,7 @@ class LoadAndRemapWrappersTest(test.TestCase): with self.cached_session(): self.evaluate(variables.global_variables_initializer()) - self.assertAllClose(expected_remapped_matrix, - remapped_matrix.as_tensor().eval()) + self.assertAllClose(expected_remapped_matrix, remapped_matrix.as_tensor()) def test_load_embedding_initializer(self): """Tests for the load_embedding_initializer wrapper.""" @@ -299,7 +295,7 @@ class LoadAndRemapWrappersTest(test.TestCase): with self.cached_session(): self.evaluate(variables.global_variables_initializer()) self.assertAllClose(expected_remapped_embeddings, - remapped_embeddings.as_tensor().eval()) + remapped_embeddings.as_tensor()) def test_load_embedding_initializer_large_oov(self): """Tests for the large OOV case for load_embedding_initializer wrapper.""" @@ -345,7 +341,7 @@ class LoadAndRemapWrappersTest(test.TestCase): with self.cached_session(): self.evaluate(variables.global_variables_initializer()) self.assertAllClose(expected_remapped_embeddings, - remapped_embeddings.as_tensor().eval()) + remapped_embeddings.as_tensor()) def test_load_embedding_initializer_old_row_vocab(self): """Tests for load_embedding_initializer where we constrain old vocab.""" @@ -383,7 +379,8 @@ class LoadAndRemapWrappersTest(test.TestCase): with self.cached_session(): self.evaluate(variables.global_variables_initializer()) self.assertAllClose(expected_remapped_embeddings, - remapped_embeddings.as_tensor().eval()) + remapped_embeddings.as_tensor()) + if __name__ == '__main__': test.main() From 51be86b23b9dda1e11295f05b27ee188b767e6ce Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Fri, 10 Jul 2020 23:49:09 -0700 Subject: [PATCH 0225/2522] [XLA:SPMD] Resubmit change: use subgroup AllToAll for resharding Reshard from tile [2,2,1] to [1,2,2] can be done by a subgroup all-to-all between dimensions 0 and 2. PiperOrigin-RevId: 320735717 Change-Id: I7085293f02f7a9d60a2837eae34b444c0e89f5fe --- .../xla/service/spmd/spmd_partitioner.cc | 123 ++++++++++++------ .../xla/service/spmd/spmd_partitioner.h | 3 +- .../xla/service/spmd/spmd_partitioner_test.cc | 26 ++++ 3 files changed, 108 insertions(+), 44 deletions(-) diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc index 7e136be54e6..1b484e018ba 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc @@ -176,16 +176,45 @@ std::vector CreateReplicaGroups(int64 num_replicas) { return groups; } -bool CanReshardWithAllToAll(const HloSharding& source, - const HloSharding& target) { - return UniqueTiledDim(source) && UniqueTiledDim(target) && - UniqueTiledDim(source) != UniqueTiledDim(target); +absl::optional> GetReshardAllToAllSourceTargetDims( + const HloSharding& source, const HloSharding& target) { + if (source.IsTileMaximal() || target.IsTileMaximal() || + source.tile_assignment().num_dimensions() != + target.tile_assignment().num_dimensions()) { + return absl::nullopt; + } + int64 source_dim = -1; + int64 target_dim = -1; + for (int64 i = 0; i < source.tile_assignment().num_dimensions(); ++i) { + if (source.tile_assignment().dim(i) > 1 && + target.tile_assignment().dim(i) == 1) { + if (source_dim != -1) { + return absl::nullopt; + } + source_dim = i; + } else if (source.tile_assignment().dim(i) == 1 && + target.tile_assignment().dim(i) > 1) { + if (target_dim != -1) { + return absl::nullopt; + } + target_dim = i; + } else if (source.tile_assignment().dim(i) != + target.tile_assignment().dim(i)) { + return absl::nullopt; + } + } + if (source_dim == -1 || target_dim == -1 || source_dim == target_dim) { + return absl::nullopt; + } + return std::pair(source_dim, target_dim); } bool CanReshardWithCollectivePermute(const HloSharding& source, const HloSharding& target) { - return UniqueTiledDim(source) && UniqueTiledDim(target) && - UniqueTiledDim(source) == UniqueTiledDim(target) && source != target; + return !source.IsTileMaximal() && !target.IsTileMaximal() && + source.tile_assignment().dimensions() == + target.tile_assignment().dimensions() && + source.tile_assignment() != target.tile_assignment(); } // Clears all sharding attributes from instructions in the module. This must be @@ -278,8 +307,10 @@ PartitionedHlo PartitionedHlo::ReshardNoCache(const HloSharding& target) { return ReshardWithCollectivePermute(target); } - if (CanReshardWithAllToAll(sharding(), target)) { - return ReshardWithAllToAll(target); + if (auto src_tgt_dims = + GetReshardAllToAllSourceTargetDims(sharding(), target)) { + return ReshardWithAllToAll(target, src_tgt_dims->first, + src_tgt_dims->second); } // If not replicated yet, first replicate and then reshard to use one of the @@ -745,45 +776,53 @@ PartitionedHlo PartitionedHlo::Broadcast() const { return PartitionedHlo(result, base_shape_, state_); } -PartitionedHlo PartitionedHlo::ReshardWithAllToAll( - const HloSharding& target) const { - int64 partition_count = sharding().tile_assignment().num_elements(); - absl::optional input_partition_dim = UniqueTiledDim(sharding()); - absl::optional output_partition_dim = UniqueTiledDim(target); - CHECK(input_partition_dim.has_value()); - CHECK(output_partition_dim.has_value()); +PartitionedHlo PartitionedHlo::ReshardWithAllToAll(const HloSharding& target, + int64 source_dim, + int64 target_dim) const { + const int64 group_size = sharding().tile_assignment().dim(source_dim); // If the device order is different in the target, fix the order with // ReshardWithCollectivePermute. - auto input_tile_fixed_device_order = target.tile_assignment(); - input_tile_fixed_device_order.Reshape( - sharding().tile_assignment().dimensions()); + std::vector xpose_dims(target.tile_assignment().num_dimensions()); + std::iota(xpose_dims.begin(), xpose_dims.end(), 0); + xpose_dims[source_dim] = target_dim; + xpose_dims[target_dim] = source_dim; auto input_sharding_fixed_device_order = - HloSharding::Tile(input_tile_fixed_device_order); + hlo_sharding_util::TransposeSharding(target, xpose_dims); if (input_sharding_fixed_device_order != sharding()) { auto fixed_order = ReshardWithCollectivePermute(input_sharding_fixed_device_order); - return fixed_order.ReshardWithAllToAll(target); + return fixed_order.ReshardWithAllToAll(target, source_dim, target_dim); } auto padded_hlo = PadBaseShapeBeforeUnevenTiledSharding(hlo_, target, state_.b); // The order of ids in the group must follow the target sharding. - std::vector groups(1); - for (int64 device : target.tile_assignment()) { - groups[0].add_replica_ids(device); - } + std::vector groups(target.tile_assignment().num_elements() / + group_size); + target.tile_assignment().Each( + [&](absl::Span indices, int64 device) { + int64 group_id = 0; + for (int64 dim = 0; dim < indices.size(); ++dim) { + if (dim == target_dim) { + continue; + } + group_id *= target.tile_assignment().dim(dim); + group_id += indices[dim]; + } + groups[group_id].add_replica_ids(device); + }); HloInstruction* result = nullptr; - // Split along the split dimension (output_partition_dim) of the all-to-all + // Split along the split dimension (target_dim) of the all-to-all // output. std::vector dimensions; for (int64 i = 0; i < base_shape_.rank(); ++i) { - if (i == *output_partition_dim) { - dimensions.push_back(partition_count); - dimensions.push_back(padded_hlo->shape().dimensions(i) / partition_count); + if (i == target_dim) { + dimensions.push_back(group_size); + dimensions.push_back(padded_hlo->shape().dimensions(i) / group_size); } else { dimensions.push_back(padded_hlo->shape().dimensions(i)); } @@ -794,21 +833,19 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll( // After the reshape, it is guaranteed to have at least 3 dimensions. auto all_to_all = state_.collective_ops_creator.create_cross_partition_all_to_all( - state_.b, {reshape}, groups, (*state_.next_channel_id)++, - output_partition_dim); + state_.b, {reshape}, groups, (*state_.next_channel_id)++, target_dim); // Reorder the split dimension of the reshape to be located in front of the // input partition dimension, so the two dimensions can be combined. - int64 new_input_partition_dim = (*output_partition_dim < *input_partition_dim) - ? *input_partition_dim + 1 - : *input_partition_dim; + int64 new_source_dim = + (target_dim < source_dim) ? source_dim + 1 : source_dim; std::vector permutation; for (int64 i = 0; i < all_to_all->shape().rank(); ++i) { - if (i == *output_partition_dim) { + if (i == target_dim) { continue; } - if (i == new_input_partition_dim) { - permutation.push_back(*output_partition_dim); + if (i == new_source_dim) { + permutation.push_back(target_dim); } permutation.push_back(i); } @@ -819,8 +856,7 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll( // Combine the split dimension and the input partition dimension. auto new_shape = ShapeInference::InferAllToAllShape( - padded_hlo->shape(), *output_partition_dim, - *input_partition_dim, partition_count) + padded_hlo->shape(), target_dim, source_dim, group_size) .ValueOrDie(); result = state_.b->AddInstruction( HloInstruction::CreateReshape(new_shape, transpose)); @@ -837,7 +873,8 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll( PartitionedHlo PartitionedHlo::ReshardWithCollectivePermute( const HloSharding& target) const { - CHECK(CanReshardWithCollectivePermute(sharding(), target)); + CHECK(CanReshardWithCollectivePermute(sharding(), target)) + << sharding().ToString() << " to " << target.ToString(); std::vector> src_dst_pairs; sharding().tile_assignment().Each( [&](absl::Span indices, int64 src_device) { @@ -3653,8 +3690,8 @@ Status SpmdPartitioningVisitor::HandleDotHelper( output_batch_partitions == num_partitions_ && lhs_sharding_transposed_to_match_output == hlo->sharding()) { if (!may_reshard_with_allreduce && - !CanReshardWithAllToAll(rhs.sharding(), - *lhs_sharding_transposed_to_match_rhs)) { + !GetReshardAllToAllSourceTargetDims( + rhs.sharding(), *lhs_sharding_transposed_to_match_rhs)) { return false; } auto resharded_rhs = rhs.Reshard(*lhs_sharding_transposed_to_match_rhs); @@ -3668,8 +3705,8 @@ Status SpmdPartitioningVisitor::HandleDotHelper( output_batch_partitions == num_partitions_ && rhs_sharding_transposed_to_match_output == hlo->sharding()) { if (!may_reshard_with_allreduce && - !CanReshardWithAllToAll(lhs.sharding(), - *rhs_sharding_transposed_to_match_lhs)) { + !GetReshardAllToAllSourceTargetDims( + lhs.sharding(), *rhs_sharding_transposed_to_match_lhs)) { return false; } auto resharded_lhs = lhs.Reshard(*rhs_sharding_transposed_to_match_lhs); diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h index 52e4c9021d8..40881b4b91c 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h @@ -284,7 +284,8 @@ class PartitionedHlo { // Helper function to reshard the tensor using AllToAll (instead of the // default of Replicate followed by Slice). - PartitionedHlo ReshardWithAllToAll(const HloSharding& target) const; + PartitionedHlo ReshardWithAllToAll(const HloSharding& target, + int64 source_dim, int64 target_dim) const; // Helper function to reshard the tensor using CollectivePermute. PartitionedHlo ReshardWithCollectivePermute(const HloSharding& target) const; diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc index 1f0b1d06c1f..5b6c869c5fa 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc @@ -3766,6 +3766,32 @@ ENTRY entry { op::Parameter(0)))); } +TEST_F(SpmdPartitioningTest, SubgroupAllToAllReshard) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %param0 = f32[8,8,8,8] parameter(0), + sharding={devices=[2,2,1,2]0,1,2,3,4,5,6,7} + ROOT %copy = f32[8,8,8,8] copy(%param0), + sharding={devices=[1,2,2,2]0,1,4,5,2,3,6,7} +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/8)); + VLOG(1) << module->ToString(); + + auto root = module->entry_computation()->root_instruction(); + auto reshape = + AllOf(op::Shape("f32[4,4,2,4,4]"), op::Reshape(op::Parameter(0))); + auto all_to_all = AllOf(op::Shape("f32[4,4,2,4,4]"), op::AllToAll(reshape)); + auto xpose = AllOf(op::Shape("f32[2,4,4,4,4]"), op::Transpose(all_to_all)); + EXPECT_THAT(root, + op::Copy(AllOf(op::Reshape(xpose), op::Shape("f32[8,4,4,4]")))); + EXPECT_EQ(root->operand(0)->operand(0)->operand(0)->replica_groups().size(), + 4); +} + } // namespace } // namespace spmd } // namespace xla From 84206fa0dbe0957011dd4a02ccc34222df112777 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 11 Jul 2020 02:01:31 -0700 Subject: [PATCH 0226/2522] Update GraphDef version to 459. PiperOrigin-RevId: 320743969 Change-Id: Ia6c24dc284a21fd6669c37b283463b15a077ec9a --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 8cf889cd868..228afc332bb 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 458 // Updated: 2020/7/10 +#define TF_GRAPH_DEF_VERSION 459 // Updated: 2020/7/11 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 4dcb1d2f928a86f281f7327db5efd6d810c76ad4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 11 Jul 2020 02:01:31 -0700 Subject: [PATCH 0227/2522] compat: Update forward compatibility horizon to 2020-07-11 PiperOrigin-RevId: 320743970 Change-Id: I4dbbc5af48496f474f572b75eecf33ed524fbfe1 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 447814b378d..83d08cfdec2 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 10) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 11) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 0d7e40f0dba759a54a8ee4e4c2a96e05937e14c2 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Sat, 11 Jul 2020 08:55:26 -0700 Subject: [PATCH 0228/2522] Rollback of breaking [XLA:SPMD] Resubmit change: use subgroup AllToAll for resharding PiperOrigin-RevId: 320766295 Change-Id: Ia0b7901b5f582a631616c5ee5f5db4a82462cd30 --- .../xla/service/spmd/spmd_partitioner.cc | 123 ++++++------------ .../xla/service/spmd/spmd_partitioner.h | 3 +- .../xla/service/spmd/spmd_partitioner_test.cc | 26 ---- 3 files changed, 44 insertions(+), 108 deletions(-) diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc index 1b484e018ba..7e136be54e6 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc @@ -176,45 +176,16 @@ std::vector CreateReplicaGroups(int64 num_replicas) { return groups; } -absl::optional> GetReshardAllToAllSourceTargetDims( - const HloSharding& source, const HloSharding& target) { - if (source.IsTileMaximal() || target.IsTileMaximal() || - source.tile_assignment().num_dimensions() != - target.tile_assignment().num_dimensions()) { - return absl::nullopt; - } - int64 source_dim = -1; - int64 target_dim = -1; - for (int64 i = 0; i < source.tile_assignment().num_dimensions(); ++i) { - if (source.tile_assignment().dim(i) > 1 && - target.tile_assignment().dim(i) == 1) { - if (source_dim != -1) { - return absl::nullopt; - } - source_dim = i; - } else if (source.tile_assignment().dim(i) == 1 && - target.tile_assignment().dim(i) > 1) { - if (target_dim != -1) { - return absl::nullopt; - } - target_dim = i; - } else if (source.tile_assignment().dim(i) != - target.tile_assignment().dim(i)) { - return absl::nullopt; - } - } - if (source_dim == -1 || target_dim == -1 || source_dim == target_dim) { - return absl::nullopt; - } - return std::pair(source_dim, target_dim); +bool CanReshardWithAllToAll(const HloSharding& source, + const HloSharding& target) { + return UniqueTiledDim(source) && UniqueTiledDim(target) && + UniqueTiledDim(source) != UniqueTiledDim(target); } bool CanReshardWithCollectivePermute(const HloSharding& source, const HloSharding& target) { - return !source.IsTileMaximal() && !target.IsTileMaximal() && - source.tile_assignment().dimensions() == - target.tile_assignment().dimensions() && - source.tile_assignment() != target.tile_assignment(); + return UniqueTiledDim(source) && UniqueTiledDim(target) && + UniqueTiledDim(source) == UniqueTiledDim(target) && source != target; } // Clears all sharding attributes from instructions in the module. This must be @@ -307,10 +278,8 @@ PartitionedHlo PartitionedHlo::ReshardNoCache(const HloSharding& target) { return ReshardWithCollectivePermute(target); } - if (auto src_tgt_dims = - GetReshardAllToAllSourceTargetDims(sharding(), target)) { - return ReshardWithAllToAll(target, src_tgt_dims->first, - src_tgt_dims->second); + if (CanReshardWithAllToAll(sharding(), target)) { + return ReshardWithAllToAll(target); } // If not replicated yet, first replicate and then reshard to use one of the @@ -776,53 +745,45 @@ PartitionedHlo PartitionedHlo::Broadcast() const { return PartitionedHlo(result, base_shape_, state_); } -PartitionedHlo PartitionedHlo::ReshardWithAllToAll(const HloSharding& target, - int64 source_dim, - int64 target_dim) const { - const int64 group_size = sharding().tile_assignment().dim(source_dim); +PartitionedHlo PartitionedHlo::ReshardWithAllToAll( + const HloSharding& target) const { + int64 partition_count = sharding().tile_assignment().num_elements(); + absl::optional input_partition_dim = UniqueTiledDim(sharding()); + absl::optional output_partition_dim = UniqueTiledDim(target); + CHECK(input_partition_dim.has_value()); + CHECK(output_partition_dim.has_value()); // If the device order is different in the target, fix the order with // ReshardWithCollectivePermute. - std::vector xpose_dims(target.tile_assignment().num_dimensions()); - std::iota(xpose_dims.begin(), xpose_dims.end(), 0); - xpose_dims[source_dim] = target_dim; - xpose_dims[target_dim] = source_dim; + auto input_tile_fixed_device_order = target.tile_assignment(); + input_tile_fixed_device_order.Reshape( + sharding().tile_assignment().dimensions()); auto input_sharding_fixed_device_order = - hlo_sharding_util::TransposeSharding(target, xpose_dims); + HloSharding::Tile(input_tile_fixed_device_order); if (input_sharding_fixed_device_order != sharding()) { auto fixed_order = ReshardWithCollectivePermute(input_sharding_fixed_device_order); - return fixed_order.ReshardWithAllToAll(target, source_dim, target_dim); + return fixed_order.ReshardWithAllToAll(target); } auto padded_hlo = PadBaseShapeBeforeUnevenTiledSharding(hlo_, target, state_.b); // The order of ids in the group must follow the target sharding. - std::vector groups(target.tile_assignment().num_elements() / - group_size); - target.tile_assignment().Each( - [&](absl::Span indices, int64 device) { - int64 group_id = 0; - for (int64 dim = 0; dim < indices.size(); ++dim) { - if (dim == target_dim) { - continue; - } - group_id *= target.tile_assignment().dim(dim); - group_id += indices[dim]; - } - groups[group_id].add_replica_ids(device); - }); + std::vector groups(1); + for (int64 device : target.tile_assignment()) { + groups[0].add_replica_ids(device); + } HloInstruction* result = nullptr; - // Split along the split dimension (target_dim) of the all-to-all + // Split along the split dimension (output_partition_dim) of the all-to-all // output. std::vector dimensions; for (int64 i = 0; i < base_shape_.rank(); ++i) { - if (i == target_dim) { - dimensions.push_back(group_size); - dimensions.push_back(padded_hlo->shape().dimensions(i) / group_size); + if (i == *output_partition_dim) { + dimensions.push_back(partition_count); + dimensions.push_back(padded_hlo->shape().dimensions(i) / partition_count); } else { dimensions.push_back(padded_hlo->shape().dimensions(i)); } @@ -833,19 +794,21 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll(const HloSharding& target, // After the reshape, it is guaranteed to have at least 3 dimensions. auto all_to_all = state_.collective_ops_creator.create_cross_partition_all_to_all( - state_.b, {reshape}, groups, (*state_.next_channel_id)++, target_dim); + state_.b, {reshape}, groups, (*state_.next_channel_id)++, + output_partition_dim); // Reorder the split dimension of the reshape to be located in front of the // input partition dimension, so the two dimensions can be combined. - int64 new_source_dim = - (target_dim < source_dim) ? source_dim + 1 : source_dim; + int64 new_input_partition_dim = (*output_partition_dim < *input_partition_dim) + ? *input_partition_dim + 1 + : *input_partition_dim; std::vector permutation; for (int64 i = 0; i < all_to_all->shape().rank(); ++i) { - if (i == target_dim) { + if (i == *output_partition_dim) { continue; } - if (i == new_source_dim) { - permutation.push_back(target_dim); + if (i == new_input_partition_dim) { + permutation.push_back(*output_partition_dim); } permutation.push_back(i); } @@ -856,7 +819,8 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll(const HloSharding& target, // Combine the split dimension and the input partition dimension. auto new_shape = ShapeInference::InferAllToAllShape( - padded_hlo->shape(), target_dim, source_dim, group_size) + padded_hlo->shape(), *output_partition_dim, + *input_partition_dim, partition_count) .ValueOrDie(); result = state_.b->AddInstruction( HloInstruction::CreateReshape(new_shape, transpose)); @@ -873,8 +837,7 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll(const HloSharding& target, PartitionedHlo PartitionedHlo::ReshardWithCollectivePermute( const HloSharding& target) const { - CHECK(CanReshardWithCollectivePermute(sharding(), target)) - << sharding().ToString() << " to " << target.ToString(); + CHECK(CanReshardWithCollectivePermute(sharding(), target)); std::vector> src_dst_pairs; sharding().tile_assignment().Each( [&](absl::Span indices, int64 src_device) { @@ -3690,8 +3653,8 @@ Status SpmdPartitioningVisitor::HandleDotHelper( output_batch_partitions == num_partitions_ && lhs_sharding_transposed_to_match_output == hlo->sharding()) { if (!may_reshard_with_allreduce && - !GetReshardAllToAllSourceTargetDims( - rhs.sharding(), *lhs_sharding_transposed_to_match_rhs)) { + !CanReshardWithAllToAll(rhs.sharding(), + *lhs_sharding_transposed_to_match_rhs)) { return false; } auto resharded_rhs = rhs.Reshard(*lhs_sharding_transposed_to_match_rhs); @@ -3705,8 +3668,8 @@ Status SpmdPartitioningVisitor::HandleDotHelper( output_batch_partitions == num_partitions_ && rhs_sharding_transposed_to_match_output == hlo->sharding()) { if (!may_reshard_with_allreduce && - !GetReshardAllToAllSourceTargetDims( - lhs.sharding(), *rhs_sharding_transposed_to_match_lhs)) { + !CanReshardWithAllToAll(lhs.sharding(), + *rhs_sharding_transposed_to_match_lhs)) { return false; } auto resharded_lhs = lhs.Reshard(*rhs_sharding_transposed_to_match_lhs); diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h index 40881b4b91c..52e4c9021d8 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h @@ -284,8 +284,7 @@ class PartitionedHlo { // Helper function to reshard the tensor using AllToAll (instead of the // default of Replicate followed by Slice). - PartitionedHlo ReshardWithAllToAll(const HloSharding& target, - int64 source_dim, int64 target_dim) const; + PartitionedHlo ReshardWithAllToAll(const HloSharding& target) const; // Helper function to reshard the tensor using CollectivePermute. PartitionedHlo ReshardWithCollectivePermute(const HloSharding& target) const; diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc index 5b6c869c5fa..1f0b1d06c1f 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc @@ -3766,32 +3766,6 @@ ENTRY entry { op::Parameter(0)))); } -TEST_F(SpmdPartitioningTest, SubgroupAllToAllReshard) { - const char* const hlo_string = R"( -HloModule module - -ENTRY entry { - %param0 = f32[8,8,8,8] parameter(0), - sharding={devices=[2,2,1,2]0,1,2,3,4,5,6,7} - ROOT %copy = f32[8,8,8,8] copy(%param0), - sharding={devices=[1,2,2,2]0,1,4,5,2,3,6,7} -})"; - - TF_ASSERT_OK_AND_ASSIGN(auto module, - PartitionComputation(hlo_string, /*num_devices=*/8)); - VLOG(1) << module->ToString(); - - auto root = module->entry_computation()->root_instruction(); - auto reshape = - AllOf(op::Shape("f32[4,4,2,4,4]"), op::Reshape(op::Parameter(0))); - auto all_to_all = AllOf(op::Shape("f32[4,4,2,4,4]"), op::AllToAll(reshape)); - auto xpose = AllOf(op::Shape("f32[2,4,4,4,4]"), op::Transpose(all_to_all)); - EXPECT_THAT(root, - op::Copy(AllOf(op::Reshape(xpose), op::Shape("f32[8,4,4,4]")))); - EXPECT_EQ(root->operand(0)->operand(0)->operand(0)->replica_groups().size(), - 4); -} - } // namespace } // namespace spmd } // namespace xla From 83bd5602e3f5eb9127351d2f5ef68a2d6990bac6 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Sat, 11 Jul 2020 12:24:51 -0700 Subject: [PATCH 0229/2522] [XLA:SPMD] Resubmit change: use subgroup AllToAll for resharding PiperOrigin-RevId: 320777648 Change-Id: I5f0ea1a95e66d74a56f380e826e0e267c2600096 --- .../xla/service/spmd/spmd_partitioner.cc | 123 ++++++++++++------ .../xla/service/spmd/spmd_partitioner.h | 3 +- .../xla/service/spmd/spmd_partitioner_test.cc | 26 ++++ 3 files changed, 108 insertions(+), 44 deletions(-) diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc index 7e136be54e6..fa28b6f8de9 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc @@ -176,16 +176,45 @@ std::vector CreateReplicaGroups(int64 num_replicas) { return groups; } -bool CanReshardWithAllToAll(const HloSharding& source, - const HloSharding& target) { - return UniqueTiledDim(source) && UniqueTiledDim(target) && - UniqueTiledDim(source) != UniqueTiledDim(target); +absl::optional> GetReshardAllToAllSourceTargetDims( + const HloSharding& source, const HloSharding& target) { + if (source.IsTileMaximal() || target.IsTileMaximal() || + source.tile_assignment().num_dimensions() != + target.tile_assignment().num_dimensions()) { + return absl::nullopt; + } + int64 source_dim = -1; + int64 target_dim = -1; + for (int64 i = 0; i < source.tile_assignment().num_dimensions(); ++i) { + if (source.tile_assignment().dim(i) > 1 && + target.tile_assignment().dim(i) == 1) { + if (source_dim != -1) { + return absl::nullopt; + } + source_dim = i; + } else if (source.tile_assignment().dim(i) == 1 && + target.tile_assignment().dim(i) > 1) { + if (target_dim != -1) { + return absl::nullopt; + } + target_dim = i; + } else if (source.tile_assignment().dim(i) != + target.tile_assignment().dim(i)) { + return absl::nullopt; + } + } + if (source_dim == -1 || target_dim == -1 || source_dim == target_dim) { + return absl::nullopt; + } + return std::pair(source_dim, target_dim); } bool CanReshardWithCollectivePermute(const HloSharding& source, const HloSharding& target) { - return UniqueTiledDim(source) && UniqueTiledDim(target) && - UniqueTiledDim(source) == UniqueTiledDim(target) && source != target; + return !source.IsTileMaximal() && !target.IsTileMaximal() && + source.tile_assignment().dimensions() == + target.tile_assignment().dimensions() && + source.tile_assignment() != target.tile_assignment(); } // Clears all sharding attributes from instructions in the module. This must be @@ -278,8 +307,10 @@ PartitionedHlo PartitionedHlo::ReshardNoCache(const HloSharding& target) { return ReshardWithCollectivePermute(target); } - if (CanReshardWithAllToAll(sharding(), target)) { - return ReshardWithAllToAll(target); + if (auto src_tgt_dims = + GetReshardAllToAllSourceTargetDims(sharding(), target)) { + return ReshardWithAllToAll(target, src_tgt_dims->first, + src_tgt_dims->second); } // If not replicated yet, first replicate and then reshard to use one of the @@ -745,45 +776,53 @@ PartitionedHlo PartitionedHlo::Broadcast() const { return PartitionedHlo(result, base_shape_, state_); } -PartitionedHlo PartitionedHlo::ReshardWithAllToAll( - const HloSharding& target) const { - int64 partition_count = sharding().tile_assignment().num_elements(); - absl::optional input_partition_dim = UniqueTiledDim(sharding()); - absl::optional output_partition_dim = UniqueTiledDim(target); - CHECK(input_partition_dim.has_value()); - CHECK(output_partition_dim.has_value()); +PartitionedHlo PartitionedHlo::ReshardWithAllToAll(const HloSharding& target, + int64 source_dim, + int64 target_dim) const { + const int64 group_size = sharding().tile_assignment().dim(source_dim); // If the device order is different in the target, fix the order with // ReshardWithCollectivePermute. - auto input_tile_fixed_device_order = target.tile_assignment(); - input_tile_fixed_device_order.Reshape( - sharding().tile_assignment().dimensions()); + std::vector xpose_dims(target.tile_assignment().num_dimensions()); + std::iota(xpose_dims.begin(), xpose_dims.end(), 0); + xpose_dims[source_dim] = target_dim; + xpose_dims[target_dim] = source_dim; auto input_sharding_fixed_device_order = - HloSharding::Tile(input_tile_fixed_device_order); + hlo_sharding_util::TransposeSharding(target, xpose_dims); if (input_sharding_fixed_device_order != sharding()) { auto fixed_order = ReshardWithCollectivePermute(input_sharding_fixed_device_order); - return fixed_order.ReshardWithAllToAll(target); + return fixed_order.ReshardWithAllToAll(target, source_dim, target_dim); } auto padded_hlo = PadBaseShapeBeforeUnevenTiledSharding(hlo_, target, state_.b); // The order of ids in the group must follow the target sharding. - std::vector groups(1); - for (int64 device : target.tile_assignment()) { - groups[0].add_replica_ids(device); - } + std::vector groups(target.tile_assignment().num_elements() / + group_size); + target.tile_assignment().Each( + [&](absl::Span indices, int64 device) { + int64 group_id = 0; + for (int64 dim = 0; dim < indices.size(); ++dim) { + if (dim == target_dim) { + continue; + } + group_id *= target.tile_assignment().dim(dim); + group_id += indices[dim]; + } + groups[group_id].add_replica_ids(device); + }); HloInstruction* result = nullptr; - // Split along the split dimension (output_partition_dim) of the all-to-all + // Split along the split dimension (target_dim) of the all-to-all // output. std::vector dimensions; for (int64 i = 0; i < base_shape_.rank(); ++i) { - if (i == *output_partition_dim) { - dimensions.push_back(partition_count); - dimensions.push_back(padded_hlo->shape().dimensions(i) / partition_count); + if (i == target_dim) { + dimensions.push_back(group_size); + dimensions.push_back(padded_hlo->shape().dimensions(i) / group_size); } else { dimensions.push_back(padded_hlo->shape().dimensions(i)); } @@ -794,21 +833,19 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll( // After the reshape, it is guaranteed to have at least 3 dimensions. auto all_to_all = state_.collective_ops_creator.create_cross_partition_all_to_all( - state_.b, {reshape}, groups, (*state_.next_channel_id)++, - output_partition_dim); + state_.b, {reshape}, groups, (*state_.next_channel_id)++, target_dim); // Reorder the split dimension of the reshape to be located in front of the // input partition dimension, so the two dimensions can be combined. - int64 new_input_partition_dim = (*output_partition_dim < *input_partition_dim) - ? *input_partition_dim + 1 - : *input_partition_dim; + int64 new_source_dim = + (target_dim < source_dim) ? source_dim + 1 : source_dim; std::vector permutation; for (int64 i = 0; i < all_to_all->shape().rank(); ++i) { - if (i == *output_partition_dim) { + if (i == target_dim) { continue; } - if (i == new_input_partition_dim) { - permutation.push_back(*output_partition_dim); + if (i == new_source_dim) { + permutation.push_back(target_dim); } permutation.push_back(i); } @@ -819,8 +856,7 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll( // Combine the split dimension and the input partition dimension. auto new_shape = ShapeInference::InferAllToAllShape( - padded_hlo->shape(), *output_partition_dim, - *input_partition_dim, partition_count) + padded_hlo->shape(), target_dim, source_dim, group_size) .ValueOrDie(); result = state_.b->AddInstruction( HloInstruction::CreateReshape(new_shape, transpose)); @@ -837,7 +873,8 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll( PartitionedHlo PartitionedHlo::ReshardWithCollectivePermute( const HloSharding& target) const { - CHECK(CanReshardWithCollectivePermute(sharding(), target)); + CHECK(CanReshardWithCollectivePermute(sharding(), target)) + << sharding().ToString() << " to " << target.ToString(); std::vector> src_dst_pairs; sharding().tile_assignment().Each( [&](absl::Span indices, int64 src_device) { @@ -3653,8 +3690,8 @@ Status SpmdPartitioningVisitor::HandleDotHelper( output_batch_partitions == num_partitions_ && lhs_sharding_transposed_to_match_output == hlo->sharding()) { if (!may_reshard_with_allreduce && - !CanReshardWithAllToAll(rhs.sharding(), - *lhs_sharding_transposed_to_match_rhs)) { + !GetReshardAllToAllSourceTargetDims( + rhs.sharding(), *lhs_sharding_transposed_to_match_rhs)) { return false; } auto resharded_rhs = rhs.Reshard(*lhs_sharding_transposed_to_match_rhs); @@ -3668,8 +3705,8 @@ Status SpmdPartitioningVisitor::HandleDotHelper( output_batch_partitions == num_partitions_ && rhs_sharding_transposed_to_match_output == hlo->sharding()) { if (!may_reshard_with_allreduce && - !CanReshardWithAllToAll(lhs.sharding(), - *rhs_sharding_transposed_to_match_lhs)) { + !GetReshardAllToAllSourceTargetDims( + lhs.sharding(), *rhs_sharding_transposed_to_match_lhs)) { return false; } auto resharded_lhs = lhs.Reshard(*rhs_sharding_transposed_to_match_lhs); diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h index 52e4c9021d8..40881b4b91c 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h @@ -284,7 +284,8 @@ class PartitionedHlo { // Helper function to reshard the tensor using AllToAll (instead of the // default of Replicate followed by Slice). - PartitionedHlo ReshardWithAllToAll(const HloSharding& target) const; + PartitionedHlo ReshardWithAllToAll(const HloSharding& target, + int64 source_dim, int64 target_dim) const; // Helper function to reshard the tensor using CollectivePermute. PartitionedHlo ReshardWithCollectivePermute(const HloSharding& target) const; diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc index 1f0b1d06c1f..5b6c869c5fa 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc @@ -3766,6 +3766,32 @@ ENTRY entry { op::Parameter(0)))); } +TEST_F(SpmdPartitioningTest, SubgroupAllToAllReshard) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %param0 = f32[8,8,8,8] parameter(0), + sharding={devices=[2,2,1,2]0,1,2,3,4,5,6,7} + ROOT %copy = f32[8,8,8,8] copy(%param0), + sharding={devices=[1,2,2,2]0,1,4,5,2,3,6,7} +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/8)); + VLOG(1) << module->ToString(); + + auto root = module->entry_computation()->root_instruction(); + auto reshape = + AllOf(op::Shape("f32[4,4,2,4,4]"), op::Reshape(op::Parameter(0))); + auto all_to_all = AllOf(op::Shape("f32[4,4,2,4,4]"), op::AllToAll(reshape)); + auto xpose = AllOf(op::Shape("f32[2,4,4,4,4]"), op::Transpose(all_to_all)); + EXPECT_THAT(root, + op::Copy(AllOf(op::Reshape(xpose), op::Shape("f32[8,4,4,4]")))); + EXPECT_EQ(root->operand(0)->operand(0)->operand(0)->replica_groups().size(), + 4); +} + } // namespace } // namespace spmd } // namespace xla From 9cfb2917745540cb7d46ec7a97d3712af9a9b089 Mon Sep 17 00:00:00 2001 From: 8bitmp3 <19637339+8bitmp3@users.noreply.github.com> Date: Sat, 11 Jul 2020 20:37:40 +0100 Subject: [PATCH 0230/2522] Update control_flow_ops.py to pass Ubuntu tests --- tensorflow/python/ops/parallel_for/control_flow_ops.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/ops/parallel_for/control_flow_ops.py b/tensorflow/python/ops/parallel_for/control_flow_ops.py index 9bdf81a16b1..8507d9e30e3 100644 --- a/tensorflow/python/ops/parallel_for/control_flow_ops.py +++ b/tensorflow/python/ops/parallel_for/control_flow_ops.py @@ -339,9 +339,9 @@ def vectorized_map(fn, elems, fallback_to_while_loop=True): This method works similar to `tf.map_fn` but is optimized to run much faster, possibly with a much larger memory footprint. The speedups are obtained by - vectorization (see [Auto-Vectorizing TensorFlow Graphs: Jacobians, Auto-Batching - and Beyond](https://arxiv.org/pdf/1903.04243.pdf)). The idea behind - vectorization is to semantically launch all the invocations of `fn` in + vectorization (see [Auto-Vectorizing TensorFlow Graphs: Jacobians, + Auto-Batching and Beyond](https://arxiv.org/pdf/1903.04243.pdf)). The idea + behind vectorization is to semantically launch all the invocations of `fn` in parallel and fuse corresponding operations across all these invocations. This fusion is done statically at graph generation time and the generated code is often similar in performance to a manually fused version. From 17bf7027b14aaa75c0a75e7b652a81f3a975be28 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Sat, 11 Jul 2020 13:08:13 -0700 Subject: [PATCH 0231/2522] Some slight improvements for tfnp benchmarks PiperOrigin-RevId: 320780569 Change-Id: I4e456fdda3c46355872d039ad2ea5fb7599b06ff --- .../python/ops/numpy_ops/np_array_ops.py | 10 +++++----- tensorflow/python/ops/numpy_ops/np_dtypes.py | 18 +++++++++++------- tensorflow/python/ops/numpy_ops/np_math_ops.py | 2 +- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/tensorflow/python/ops/numpy_ops/np_array_ops.py b/tensorflow/python/ops/numpy_ops/np_array_ops.py index d1b8135ca95..2b23dbbab4b 100644 --- a/tensorflow/python/ops/numpy_ops/np_array_ops.py +++ b/tensorflow/python/ops/numpy_ops/np_array_ops.py @@ -378,11 +378,11 @@ def _promote_dtype(*arrays): def _promote_dtype_binary(t1, t2): dtype = np_utils._result_type_binary(t1, t2) # pylint: disable=protected-access - def _fast_asarray(a): - if isinstance(a, np_arrays.ndarray) and dtype == a.dtype: - return a - return _array_internal(a, dtype=dtype, copy=False) - return _fast_asarray(t1), _fast_asarray(t2) + if not(isinstance(t1, np_arrays.ndarray) and dtype == t1.dtype): + t1 = _array_internal(t1, dtype=dtype, copy=False) + if not(isinstance(t2, np_arrays.ndarray) and dtype == t2.dtype): + t2 = _array_internal(t2, dtype=dtype, copy=False) + return t1, t2 @np_utils.np_doc('all') diff --git a/tensorflow/python/ops/numpy_ops/np_dtypes.py b/tensorflow/python/ops/numpy_ops/np_dtypes.py index 099ae3ed5b1..cde3883d3d9 100644 --- a/tensorflow/python/ops/numpy_ops/np_dtypes.py +++ b/tensorflow/python/ops/numpy_ops/np_dtypes.py @@ -77,9 +77,11 @@ def set_allow_float64(b): def canonicalize_dtype(dtype): if not _allow_float64: - return _to_float32.get(dtype, dtype) - else: - return dtype + try: + return _to_float32[dtype] + except KeyError: + pass + return dtype def _result_type(*arrays_and_dtypes): @@ -90,10 +92,12 @@ def _result_type(*arrays_and_dtypes): def _get_cached_dtype(dtype): """Returns an np.dtype for the TensorFlow DType.""" global _cached_np_dtypes - cached_dtype = _cached_np_dtypes.get(dtype, None) - if cached_dtype is None: - cached_dtype = np.dtype(dtype.as_numpy_dtype) - _cached_np_dtypes[dtype] = cached_dtype + try: + return _cached_np_dtypes[dtype] + except KeyError: + pass + cached_dtype = np.dtype(dtype.as_numpy_dtype) + _cached_np_dtypes[dtype] = cached_dtype return cached_dtype diff --git a/tensorflow/python/ops/numpy_ops/np_math_ops.py b/tensorflow/python/ops/numpy_ops/np_math_ops.py index 138fac3d294..690de58ea38 100644 --- a/tensorflow/python/ops/numpy_ops/np_math_ops.py +++ b/tensorflow/python/ops/numpy_ops/np_math_ops.py @@ -221,7 +221,7 @@ def clip(a, a_min, a_max): # pylint: disable=missing-docstring def matmul(x1, x2): # pylint: disable=missing-docstring def f(x1, x2): try: - if x1.shape.rank == 2 and x2.shape.rank == 2: + if x1._rank() == 2 and x2._rank() == 2: # pylint: disable=protected-access # Fast path for known ranks. return gen_math_ops.mat_mul(x1, x2) return np_utils.cond( From 8115adfea1d82fe1983eef983d8cfda04022eaff Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 11 Jul 2020 14:26:03 -0700 Subject: [PATCH 0232/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/943660fd15f1 PiperOrigin-RevId: 320784584 Change-Id: Ie295cbeef7672975ce6812ea13038a6e9d7d44c6 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 09dfe270943..2535941d668 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "c2a61ef3885019c5e0444d8789de63e1ce4d5003" - LLVM_SHA256 = "7e44c7970640da0a8b81e267252d9e0245390a832d1c23f20b32522f1473d12a" + LLVM_COMMIT = "943660fd15f193dc6961597c25541fee2e01ebbb" + LLVM_SHA256 = "72a3f845eb1839b32bccaffa317517cca910511896b68f5c18959a579d57d4f2" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 9500aad42332574dcf1bb98a5f9d4467bb12c52c Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Sat, 11 Jul 2020 18:17:49 -0700 Subject: [PATCH 0233/2522] Test scatter add --- .../kernel_tests/resource_variable_ops_test.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index 953c616b0bc..fb69d857bd7 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -285,17 +285,17 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase, resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)) self.assertEqual(read, 2) + @parameterized.parameters( + dtypes.int32, dtypes.float16, dtypes.float32, dtypes.float64 + ) @test_util.run_in_graph_and_eager_modes - def testScatterAdd(self): - handle = resource_variable_ops.var_handle_op( - dtype=dtypes.int32, shape=[1, 1]) - self.evaluate( - resource_variable_ops.assign_variable_op( - handle, constant_op.constant([[1]], dtype=dtypes.int32))) + def testScatterAdd(self, dtype): + v = resource_variable_ops.ResourceVariable([[1]], dtype=dtype) + self.evaluate(variables.global_variables_initializer()) self.evaluate( resource_variable_ops.resource_scatter_add( - handle, [0], constant_op.constant([[2]], dtype=dtypes.int32))) - read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + v.handle, [0], constant_op.constant([[2]], dtype=dtype))) + read = resource_variable_ops.read_variable_op(v.handle, dtype=dtype) self.assertEqual(self.evaluate(read), [[3]]) @test_util.run_in_graph_and_eager_modes From 8c87d2ba1b46234a38c03a3339c88a46db07a2cc Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Sat, 11 Jul 2020 19:00:03 -0700 Subject: [PATCH 0234/2522] Revert "Test scatter add" This reverts commit 9500aad42332574dcf1bb98a5f9d4467bb12c52c. --- .../kernel_tests/resource_variable_ops_test.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index fb69d857bd7..953c616b0bc 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -285,17 +285,17 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase, resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32)) self.assertEqual(read, 2) - @parameterized.parameters( - dtypes.int32, dtypes.float16, dtypes.float32, dtypes.float64 - ) @test_util.run_in_graph_and_eager_modes - def testScatterAdd(self, dtype): - v = resource_variable_ops.ResourceVariable([[1]], dtype=dtype) - self.evaluate(variables.global_variables_initializer()) + def testScatterAdd(self): + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate( + resource_variable_ops.assign_variable_op( + handle, constant_op.constant([[1]], dtype=dtypes.int32))) self.evaluate( resource_variable_ops.resource_scatter_add( - v.handle, [0], constant_op.constant([[2]], dtype=dtype))) - read = resource_variable_ops.read_variable_op(v.handle, dtype=dtype) + handle, [0], constant_op.constant([[2]], dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) self.assertEqual(self.evaluate(read), [[3]]) @test_util.run_in_graph_and_eager_modes From 3e778600766f507c51e1486dce3890f9366de92e Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Sat, 11 Jul 2020 19:05:34 -0700 Subject: [PATCH 0235/2522] Test scatter add --- .../python/kernel_tests/resource_variable_ops_test.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index 953c616b0bc..b70d294a093 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -562,13 +562,16 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase, read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) self.assertEqual(self.evaluate(read), [[6]]) + @parameterized.parameters(dtypes.float16, dtypes.float32, dtypes.float64) @test_util.run_in_graph_and_eager_modes - def testScatterAddVariableMethod(self): - v = resource_variable_ops.ResourceVariable([0.0, 1.5], name="add") + def testScatterAddVariableMethod(self, dtype): + v = resource_variable_ops.ResourceVariable( + [0.0, 1.5], name="add", dtype=dtype) self.evaluate(variables.global_variables_initializer()) self.evaluate( - v.scatter_add(ops.IndexedSlices(indices=[1], values=[2.5]))) - self.assertAllEqual([0.0, 4.0], self.evaluate(v)) + v.scatter_add(ops.IndexedSlices( + indices=[1], values=constant_op.constant([2.5], dtype=dtype)))) + self.assertAllCloseAccordingToType([0.0, 4.0], self.evaluate(v)) @test_util.run_in_graph_and_eager_modes def testScatterSubVariableMethod(self): From 8d38e35a7db9a5cb225eed4669d388254b9ebbbe Mon Sep 17 00:00:00 2001 From: Tare Gaskin Date: Sun, 12 Jul 2020 02:10:38 +0000 Subject: [PATCH 0236/2522] update bcast.h --- tensorflow/core/util/bcast.h | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/util/bcast.h b/tensorflow/core/util/bcast.h index 47d6dc0884e..4337b0dc4fe 100644 --- a/tensorflow/core/util/bcast.h +++ b/tensorflow/core/util/bcast.h @@ -176,7 +176,6 @@ BCastList::BCastList(const BCastList::Vec (&x)[N], // 1-extend and align all vectors. for (int i = 0; i < N; ++i) { - const int copy_i_size = copy[i].size(); if (copy_i_size < largest_rank) { copy[i].resize(largest_rank, 1); } From 98ea4950912e146b357eac3ffcf89154b5983f87 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Sat, 11 Jul 2020 20:57:51 -0700 Subject: [PATCH 0237/2522] Add a space to format a log message properly PiperOrigin-RevId: 320806328 Change-Id: Ia12a62f66c2654aaa1c98162449d01050fae2a4c --- tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc b/tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc index 67002aa65bf..3a31f553b9a 100644 --- a/tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc +++ b/tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc @@ -188,12 +188,12 @@ Status MlirV1CompatGraphOptimizationPass::Run( if (!is_enabled) { VLOG(0) << "None of the MLIR optimization passes are enabled " - << "(registered" << registry_->passes().size() << " passes)"; + << "(registered " << registry_->passes().size() << " passes)"; return Status::OK(); } VLOG(0) << "Running MLIR Graph Optimization V1 Compat Passes " - << "(registered" << registry_->passes().size() << " passes)"; + << "(registered " << registry_->passes().size() << " passes)"; GraphDebugInfo debug_info; RegisterDialects(); From 98c24f4fd6c57b1e19e3a2abcbf3c2ebb043e098 Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Sat, 11 Jul 2020 21:05:38 -0700 Subject: [PATCH 0238/2522] Test scatter sub --- .../python/kernel_tests/resource_variable_ops_test.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index b70d294a093..b95cc931dda 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -573,12 +573,15 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase, indices=[1], values=constant_op.constant([2.5], dtype=dtype)))) self.assertAllCloseAccordingToType([0.0, 4.0], self.evaluate(v)) + @parameterized.parameters(dtypes.float16, dtypes.float32, dtypes.float64) @test_util.run_in_graph_and_eager_modes - def testScatterSubVariableMethod(self): - v = resource_variable_ops.ResourceVariable([0.0, 2.5], name="sub") + def testScatterSubVariableMethod(self, dtype): + v = resource_variable_ops.ResourceVariable( + [0.0, 2.5], name="sub", dtype=dtype) self.evaluate(variables.global_variables_initializer()) self.evaluate( - v.scatter_sub(ops.IndexedSlices(indices=[1], values=[1.5]))) + v.scatter_sub(ops.IndexedSlices( + indices=[1], values=constant_op.constant([2.5, dtype=dtype])))) self.assertAllEqual([0.0, 1.0], self.evaluate(v)) @test_util.run_in_graph_and_eager_modes From a3b1575e5850510861ada653c66e8b9c887e2047 Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Sat, 11 Jul 2020 21:16:12 -0700 Subject: [PATCH 0239/2522] Fix typo --- tensorflow/python/kernel_tests/resource_variable_ops_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index b95cc931dda..721678ff77f 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -581,7 +581,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase, self.evaluate(variables.global_variables_initializer()) self.evaluate( v.scatter_sub(ops.IndexedSlices( - indices=[1], values=constant_op.constant([2.5, dtype=dtype])))) + indices=[1], values=constant_op.constant([2.5], dtype=dtype)))) self.assertAllEqual([0.0, 1.0], self.evaluate(v)) @test_util.run_in_graph_and_eager_modes From 07790071531ce783af75f2820b29fbd3c449870b Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Sat, 11 Jul 2020 21:46:57 -0700 Subject: [PATCH 0240/2522] Use assertAllCloseAccordingToType --- tensorflow/python/kernel_tests/resource_variable_ops_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index 721678ff77f..669c7c63967 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -581,8 +581,8 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase, self.evaluate(variables.global_variables_initializer()) self.evaluate( v.scatter_sub(ops.IndexedSlices( - indices=[1], values=constant_op.constant([2.5], dtype=dtype)))) - self.assertAllEqual([0.0, 1.0], self.evaluate(v)) + indices=[1], values=constant_op.constant([1.5], dtype=dtype)))) + self.assertAllCloseAccordingToType([0.0, 1.0], self.evaluate(v)) @test_util.run_in_graph_and_eager_modes def testScatterMaxVariableMethod(self): From 4abf3012f14b76aa0fd1ce987f9afdad7639d734 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 11 Jul 2020 22:31:02 -0700 Subject: [PATCH 0241/2522] The original conditional code motion implementation has been extensively rewritten to allow for flexible combinations of instructions being moved out and to later enable instructions outside of a conditional to be moved inside the conditional. Currently the moving-in optimization is not yet supported but place holders have been put inside for later extension. Only instructions that are identical across all branches are allowed to move out, but independent instructions are separately considered in the code motion optimization. PiperOrigin-RevId: 320810794 Change-Id: Id03f628bf08b98b7e40cc4b60de2bf51484a5799 --- .../xla/service/conditional_code_motion.cc | 734 ++++++++++-------- .../xla/service/conditional_code_motion.h | 67 +- .../service/conditional_code_motion_test.cc | 117 ++- 3 files changed, 527 insertions(+), 391 deletions(-) diff --git a/tensorflow/compiler/xla/service/conditional_code_motion.cc b/tensorflow/compiler/xla/service/conditional_code_motion.cc index 6db4c3eb6d4..3287726b1a7 100644 --- a/tensorflow/compiler/xla/service/conditional_code_motion.cc +++ b/tensorflow/compiler/xla/service/conditional_code_motion.cc @@ -46,161 +46,63 @@ limitations under the License. namespace xla { -namespace { - -struct ConditionalBoundary { - ConditionalBoundary(HloInstruction* op, int64 op_index, HloInstruction* usr) - : operand(op), operand_index(op_index), user(usr) {} - // `operand` is one of `user`'s operand. - - // Instruction that remains in the conditional but one of its user - // is moved out of conditonal. - HloInstruction* operand; - // operand_index for `operand` in the `user`. - int64 operand_index; - // Instruction that moved out of conditional. - HloInstruction* user; -}; +namespace conditional_opt { // Visit the root instructions to its operands follow BFS. // Will visit an instructions after all its users have been visited. Parameters // are not visited. -class BranchVisitor { +class BoundaryVisitor { public: - explicit BranchVisitor(const HloComputation* branch_computation) { - HloInstruction* root_inst = branch_computation->root_instruction(); - worklist_.push_back(root_inst); - visited_.insert(root_inst); - for (auto parameter_inst : branch_computation->parameter_instructions()) { - parameter_instructions_.insert(parameter_inst); - } + // start with an existing conditional computation. + explicit BoundaryVisitor(HloInstruction* conditional) { + Boundary b(Boundary::Position::kInsideBranch); + b.Operands().push_back(conditional); + worklist_.push_back(b); } + // Start with an empty work list. + BoundaryVisitor() {} // Get next intruction to visit. - HloInstruction* GetNextInstruction() { - if (!worklist_.empty()) { - HloInstruction* inst = worklist_.front(); - worklist_.pop_front(); - return inst; - } - return nullptr; + Boundary PopNextBoundary() { + CHECK(!worklist_.empty()); + Boundary inst = worklist_.front(); + worklist_.pop_front(); + return inst; + } + void AddToWorkList(const Boundary& b) { + CHECK_GT(b.Operands().size(), 0); + worklist_.push_back(b); } - // Add operands of one instruction to worklist for further visit. - void AddInstructionOperands(HloInstruction* inst) { - int64 operand_count = inst->operand_count(); - for (int i = 0; i < operand_count; i++) { - HloInstruction* operand = inst->mutable_operand(i); - if (ContainsKey(visited_, operand)) { - continue; - } - bool all_user_visited = std::all_of( - operand->users().begin(), operand->users().end(), - [&](HloInstruction* user) { return ContainsKey(visited_, user); }); - - if (!all_user_visited) { - continue; - } - // Do not visit parameter_instructions. - if (ContainsKey(parameter_instructions_, operand)) { - // Add the operand and this instruction to the boundaries. - boundaries_.emplace_back(operand, i, inst); - continue; - } - worklist_.push_back(operand); - visited_.insert(operand); - } - } - - // Add instruction and its users to conditional boundaries. - void AddInstructionToBoundary(HloInstruction* inst) { - for (auto user : inst->users()) { - boundaries_.emplace_back(inst, user->operand_index(inst), user); - } - } - - // Add instruction to the to be removed instructions set and vector. - void AddInstructionToHoist(HloInstruction* inst) { - instructions_to_hoist_set_.insert(inst); - instructions_to_hoist_.emplace_back(inst); - } - - // If visitor has next instruction to visit. - bool HasNextInstruction() const { return !worklist_.empty(); } - - // If there is no hoist intruction. - int64 HoistInstructionSize() { return instructions_to_hoist_.size(); } - - // Get boundaries of this branch. - const std::vector& boundaries() const { - return boundaries_; - } - - // Get instructions to hoist in this branch. - const std::vector& instructions_to_hoist() const { - return instructions_to_hoist_; - } - - // Get hoist instruction set in this branch. - const std::unordered_set& instructions_to_hoist_set() const { - return instructions_to_hoist_set_; - } + bool HasNextBoundary() const { return !worklist_.empty(); } private: // worklist is the deque that contains instructions to be visited. - std::deque worklist_; - - // instructions that has been visited. - std::unordered_set visited_; - - // parameter instructions of the branch. - std::unordered_set parameter_instructions_; - - // Boundaries contains the set of instructions that its operand is within - // conditional but it can be hoist out of conditional. - std::vector boundaries_; - - // Instructions to hoist. - std::unordered_set instructions_to_hoist_set_; - - // Instructions to hoist, the order within this vector is BFS and - // an instruction's order will always be after its users. - std::vector instructions_to_hoist_; + std::deque worklist_; }; -// Returns true if `instruction` is worth hoisting out. -bool WorthHoisting(HloInstruction* instruction) { - for (const auto* operand : instruction->operands()) { - // Only move out instructions that won't share the same operand - // to avoid copy of the operand. - if (operand->user_count() > 1) { - return false; - } - } - switch (instruction->opcode()) { - case HloOpcode::kConvert: - // If Convert is after AllReduce, it is worth moving out AllReduce out - // of conditional for AR/CRS combine. If Convert is after other ops such - // as Dot or Convolutional, it is better to keep convert within - // conditional so that convert can be fused with Dot or Convolutional. - // - // TODO(b/154283721): figure out the scenario when convert can be fused - // with AllReduce out of conditional. - if (instruction->operand(0)->opcode() == HloOpcode::kAllReduce) { - return true; - } - return false; - case HloOpcode::kAllReduce: - case HloOpcode::kAdd: - case HloOpcode::kConstant: - case HloOpcode::kSubtract: - case HloOpcode::kMultiply: - case HloOpcode::kDivide: - case HloOpcode::kTuple: - case HloOpcode::kSqrt: +// Returns estimation of potential reuses carried by a given instruction. +// Use different integers to classify different levels of reuses +// This is used as a placeholder only, assuming all instructions can be +// fused to enable data reuses +int64 ReusesCarriedBy(HloInstruction* op, HloInstruction* user) { + VLOG(1) << "ConditionalCodeMotion: Add reuses carried by instr: " + << op->ToString() << "=>" << user->ToString() << "\n"; + switch (user->opcode()) { case HloOpcode::kGetTupleElement: - return true; + return 0; default: - return false; + break; + } + switch (op->opcode()) { + // These instructions are lightweight and easy to fuse. + case HloOpcode::kConstant: + return 0; + default: + // Assume fusion will not happen anyway if user count > 1) + if (op->user_count() > 1) { + return 0; + } + return 10; } } @@ -220,7 +122,7 @@ bool InstructionWithinBranchIdentical( return *a == *b; }; - if (instructions[0] == nullptr) { + if (instructions.empty()) { return false; } @@ -248,109 +150,27 @@ bool InstructionWithinBranchIdentical( }); } -// Returns if all the visitors/branches has next instruction to visit. -bool HasNextInstruction(const std::vector& visitors) { - bool has_next = true; - for (const auto& visitor : visitors) { - has_next &= visitor.HasNextInstruction(); - } - return has_next; -} - -// Create tuple element as the new root of the branch. The tuple will contain -// the operands that can't move out of conditional but its user will be moved -// out of conditional. -HloInstruction* CreateNewRoot( - const std::vector& boundaries, - const std::unordered_set& instructions_to_hoist_set, - HloComputation* computation) { - std::vector elements; - elements.reserve(boundaries.size()); - for (auto boundary : boundaries) { - if (ContainsKey(instructions_to_hoist_set, boundary.user)) { - elements.push_back(boundary.operand); - } - } - return computation->AddInstruction(HloInstruction::CreateTuple(elements)); -} - // Copy identical instructions within conditional outside of conditional. -void CopyIdenticalInstructionsOutOfConditional( - const std::vector& instructions_to_hoist, - HloComputation* conditional_parent, - absl::flat_hash_map* +Status CopyOutOfConditional( + Boundary& boundary, HloComputation* conditional_parent, + absl::flat_hash_map& hoisted_instructions) { - int64 instructions_size = instructions_to_hoist.size(); - // Visit the operands before its users and copy it, so that the copied - // user will point to the correct operand. - for (int64 i = instructions_size - 1; i >= 0; i--) { - HloInstruction* old_instruction = instructions_to_hoist[i]; - auto get_new_operand = [&](HloInstruction* old_operand) { - // If the operand can't be found in `instructions_to_hoist`, this - // operand will be in the `boundaries`, GetTupleElement instructions - // will be added later to replace this operand. - if (!ContainsKey(*hoisted_instructions, old_operand)) { - return old_operand; - } - return FindOrDie(*hoisted_instructions, old_operand); - }; - - absl::InlinedVector new_operands; - absl::c_transform(old_instruction->operands(), - std::back_inserter(new_operands), get_new_operand); - - HloInstruction* new_instruction = conditional_parent->AddInstruction( - old_instruction->CloneWithNewOperands(old_instruction->shape(), - new_operands)); - // Maps the instruction outside of conditional to the instruction - // inside of the conditional. - InsertOrDie(hoisted_instructions, old_instruction, new_instruction); + // Insert GetTupleElement before the instructions whose operands might still + // be within the conditional. + HloInstruction* op = boundary.Operands()[0]; + absl::InlinedVector new_operands; + for (int i = 0; i < op->operands().size(); ++i) { + auto op_i = op->operands()[i]; + VLOG(2) << "Looking for operand:" << op_i->ToString() << "\n"; + CHECK(ContainsKey(hoisted_instructions, op_i)); + new_operands.push_back(FindOrDie(hoisted_instructions, op_i)); } -} - -// If there are instructions to hoist, the root of the conditional must be -// moved out. Change the users of the conditional to the hoisted instruction -// of the new root. -Status ChangeConditionalUsers( - HloInstruction* conditional, HloInstruction* old_root, - const absl::flat_hash_map& - hoisted_instructions) { - HloInstruction* new_root = FindOrDie(hoisted_instructions, old_root); - TF_RETURN_IF_ERROR(conditional->ReplaceAllUsesWith(new_root)); - return Status::OK(); -} - -// Insert GetTupleElement before the instructions whose operands might still -// be within the conditional. -Status CreateGetTupleElementAfterConditional( - const std::vector& boundaries, - const std::unordered_set& instructions_to_hoist_set, - const absl::flat_hash_map& - hoisted_instructions, - HloInstruction* conditional, HloComputation* computation) { - int boundary_instruction_size = boundaries.size(); - - // Inserts GetTupleElement before the boundary instructions. - for (int i = 0; i < boundary_instruction_size; i++) { - HloInstruction* gte = - computation->AddInstruction(HloInstruction::CreateGetTupleElement( - boundaries[i].operand->shape(), conditional, i)); - - HloInstruction* new_instruction = - FindOrDie(hoisted_instructions, boundaries[i].user); - TF_RETURN_IF_ERROR( - new_instruction->ReplaceOperandWith(boundaries[i].operand_index, gte)); - } - return Status::OK(); -} - -// Remove instructions to be hoisted out of the branch computation. -Status RemoveInstructionFromComputation( - const std::vector& instructions_to_hoist, - HloComputation* branch) { - // Will visit the instructions after its users. - for (auto* instruction : instructions_to_hoist) { - TF_RETURN_IF_ERROR(branch->RemoveInstruction(instruction)); + HloInstruction* new_instruction = conditional_parent->AddInstruction( + op->CloneWithNewOperands(op->shape(), new_operands)); + // Maps the instruction outside of conditional to the instruction + // inside of the conditional. + for (HloInstruction* op : boundary.Operands()) { + hoisted_instructions[op] = new_instruction; } return Status::OK(); } @@ -574,128 +394,359 @@ StatusOr ConvertSpecialMove(HloInstruction* conditional, // are the shape of the operands are identical and their properties are // identical. Will start from the root instruction of each branch and get // the identical ops to hoist. -StatusOr MergeIdenticalElements(HloInstruction* conditional, - bool is_layout_sensitive) { - VLOG(1) << " visiting conditional:" << conditional->ToString(); - int branch_count = conditional->branch_count(); - if (branch_count <= 0) { +StatusOr ConditionalCodeMotion::MoveInstructionOut( + HloInstruction* conditional, std::vector& to_move_out, + std::vector& new_boundaries) { + if (to_move_out.empty()) { return false; } - - std::vector visitors; - visitors.reserve(branch_count); - // Visit instructions from the root instruction to the operands using BFS. - for (int i = 0; i < branch_count; i++) { - visitors.emplace_back(BranchVisitor(conditional->branch_computation(i))); - } - - // The instructions to be visited within each branch. - std::vector front_instructions(branch_count); - - while (HasNextInstruction(visitors)) { - for (int i = 0; i < branch_count; i++) { - front_instructions[i] = visitors[i].GetNextInstruction(); - } - // If two instructions has the same shape, opcode and its operands has the - // same shape, then this instruction can be moved out of conditional. - if (WorthHoisting(front_instructions[0]) && - InstructionWithinBranchIdentical(front_instructions, - is_layout_sensitive)) { - for (int i = 0; i < branch_count; i++) { - visitors[i].AddInstructionOperands(front_instructions[i]); - visitors[i].AddInstructionToHoist(front_instructions[i]); - } - } else { - for (int i = 0; i < branch_count; i++) { - // If the ops are not identical, these ops and its users will - // be in the boundaries` of the conditional. These ops will be stayed - // within the conditional, but one its only user will be moved out - // of conditional. - visitors[i].AddInstructionToBoundary(front_instructions[i]); - } - } - } - - if (visitors[0].HoistInstructionSize() < 1) { - return false; - } - - HloInstruction* old_root = - conditional->branch_computation(0)->root_instruction(); + VLOG(1) << "number of boundaries to move out:" << to_move_out.size() << "\n"; HloComputation* conditional_parent = conditional->parent(); + // save the old users before add new conditional user instructions + std::vector old_conditional_users = conditional->users(); + absl::flat_hash_map hoisted_instructions; // Maps instructions in the conditional body to instructions hoisted outside // the conditional that compute the same value. - absl::flat_hash_map hoisted_instructions; - // Copy identical instructions out of the conditional. - CopyIdenticalInstructionsOutOfConditional(visitors[0].instructions_to_hoist(), - conditional_parent, - &hoisted_instructions); - // If there are instructions to hoist, the root of the conditional must be - // moved out. Change the users of the conditional to the hoisted instruction - // of the new root. - TF_RETURN_IF_ERROR( - ChangeConditionalUsers(conditional, old_root, hoisted_instructions)); - + VLOG(2) << "before opt:" + << conditional_parent->ToString(HloPrintOptions::Fingerprint()) + << "\n"; + int64 op_index = 0; + for (Boundary b : new_boundaries) { + HloInstruction* op = b.Operands()[0]; + CHECK(op != nullptr); + VLOG(2) << "Mapping new boundary instr: " << op->ToString() << "\n"; + HloInstruction* gtr = conditional_parent->AddInstruction( + HloInstruction::CreateGetTupleElement(op->shape(), conditional, + op_index++)); + hoisted_instructions[op] = gtr; + } + // Copy boundary instructions out of the conditional. + // Visit the operands before its users and copy it, so that the copied + // user will point to the correct operand. + for (int64 i = to_move_out.size() - 1; i >= 0; i--) { + TF_RETURN_IF_ERROR(CopyOutOfConditional(to_move_out[i], conditional_parent, + hoisted_instructions)); + } + VLOG(2) << "Done copy branch instructions out\n" + << conditional_parent->ToString(HloPrintOptions::Fingerprint()) + << "\n"; + // Change original users of the conditional to use the correct operands. + HloInstruction* old_root = + conditional->branch_computation(0)->root_instruction(); + for (auto user_instr : old_conditional_users) { + CHECK(user_instr->opcode() == HloOpcode::kGetTupleElement); + auto tuple_opd = down_cast(user_instr); + int64 index = tuple_opd->tuple_index(); + HloInstruction* old_opd = old_root->operands()[index]; + HloInstruction* new_opd = hoisted_instructions[old_opd]; + CHECK(old_opd != nullptr); + CHECK(new_opd != nullptr); + TF_RETURN_IF_ERROR(user_instr->ReplaceAllUsesWith(new_opd)); + TF_RETURN_IF_ERROR(conditional_parent->RemoveInstruction(user_instr)); + } // Create tuple element within each branch and set it as root. + int64 branch_count = conditional->branch_count(); for (int i = 0; i < branch_count; i++) { - HloInstruction* tuple = CreateNewRoot( - visitors[i].boundaries(), visitors[i].instructions_to_hoist_set(), - conditional->branch_computation(i)); - conditional->branch_computation(i)->set_root_instruction(tuple, true); - } - // Changes conditional instruction shape to the shape of the new root. - *conditional->mutable_shape() = - conditional->branch_computation(0)->root_instruction()->shape(); - - // Insert GetTupleElement before the instructions whose operands might still - // be within the conditional. - TF_RETURN_IF_ERROR(CreateGetTupleElementAfterConditional( - visitors[0].boundaries(), visitors[0].instructions_to_hoist_set(), - hoisted_instructions, conditional, conditional_parent)); - - // Remove hoist instructions from the branches. - for (int i = 0; i < branch_count; i++) { - TF_RETURN_IF_ERROR( - RemoveInstructionFromComputation(visitors[i].instructions_to_hoist(), - conditional->branch_computation(i))); + auto computation = conditional->branch_computation(i); + std::vector elements; + for (auto b1 : new_boundaries) { + HloInstruction* op = b1.Operands()[i]; + VLOG(1) << "branch count=" << i << "\n"; + CHECK(op != nullptr); + VLOG(1) << "Adding to root " << i << " with " << op->ToString() << "\n"; + elements.push_back(op); + } + HloInstruction* tuple = + computation->AddInstruction(HloInstruction::CreateTuple(elements)); + computation->set_root_instruction(tuple, true); + VLOG(2) << "computation is :" << computation->ToString() << "\n"; + // Remove hoisted instructions from the branches. + for (auto b2 : to_move_out) { + VLOG(2) << "Removing boundary:" << b2.ToString() << "\n"; + TF_RETURN_IF_ERROR(computation->RemoveInstruction(b2.Operands()[i])); + } } + // Change conditional instruction shape to the shape of the new root. + HloInstruction* new_root = + conditional->branch_computation(0)->root_instruction(); + *conditional->mutable_shape() = new_root->shape(); + // + VLOG(2) << "done moving instructions out of branches\n" + << conditional_parent->ToString(HloPrintOptions::Fingerprint()) + << "\n"; return true; } -} // namespace +// Group single chains of operands or uses of boundaries into new boundaries +class GroupConnectedBoundaries { + private: + std::unordered_set visited_; + std::vector connected_boundaries_, new_boundaries_; + HloInstruction* conditional_; + bool is_layout_sensitive_; -StatusOr ConditionalCodeMotion::Run(HloModule* module) { - bool changed = false; - - if (pursue_full_conditional_code_motion_) { - std::vector conditional_ops; - for (auto* comp : module->MakeComputationPostOrder()) { - for (auto* instr : comp->MakeInstructionPostOrder()) { - if (instr->opcode() == HloOpcode::kConditional) { - conditional_ops.push_back(instr); + public: + explicit GroupConnectedBoundaries(HloInstruction* conditional, + bool is_layout_sensitive) + : conditional_(conditional), is_layout_sensitive_(is_layout_sensitive) {} + // Returns true if `instruction` is worth hoisting out. + bool WorthHoisting(HloInstruction* instruction) { + switch (instruction->opcode()) { + case HloOpcode::kConvert: + // If Convert is after AllReduce, it is worth moving out AllReduce out + // of conditional for AR/CRS combine. If Convert is after other ops such + // as Dot or Convolutional, it is better to keep convert within + // conditional so that convert can be fused with Dot or Convolutional. + // + // TODO(b/154283721): figure out the scenario when convert can be fused + // with AllReduce out of conditional. + switch (instruction->operand(0)->opcode()) { + case HloOpcode::kAllReduce: + case HloOpcode::kReshape: + return true; + default: + VLOG(1) << "Instruction is convert and its operand is not know to " + "be worth hoisting\n"; + return false; } + case HloOpcode::kAllReduce: + case HloOpcode::kAdd: + case HloOpcode::kConstant: + case HloOpcode::kSubtract: + case HloOpcode::kMultiply: + case HloOpcode::kDivide: + case HloOpcode::kTuple: + case HloOpcode::kSqrt: + case HloOpcode::kReshape: + case HloOpcode::kGetTupleElement: + return true; + default: + VLOG(1) << "Instruction is not known to be worth hoisting\n"; + return false; + } + } + // Calculates the degree of reuses carried by a pair of conditional + // boundaries, if b1 is inside a conditional and b2 is outside. + int64 ReusesBeforeBoundary(HloInstruction* user) { + int64 reuses = 0; + for (auto op : user->operands()) { + // Only consider single-user cases as reuseable. + if (ContainsKey(visited_, op) && op->user_count() == 1) { + reuses += ReusesCarriedBy(op, user); } } + VLOG(1) << "cost to be paied after moving out" << user->ToString() << ":" + << reuses << "\n"; + return reuses; + } - for (HloInstruction* conditional_op : conditional_ops) { - TF_ASSIGN_OR_RETURN( - bool result, - MergeIdenticalElements(conditional_op, is_layout_sensitive_)); - changed |= result; + int64 ReusesAfterBoundary(HloInstruction* user) { + CHECK(user != nullptr); + auto all_users = user->users(); + // For now, assume that if an instruction has multiple-consumers, it will + // not be reused (the reuse currently requires duplication in fusion and so + // is expensive). + if (all_users.size() > 1) { + return 0; } + if (!all_users.empty()) { + auto op = all_users[0]; + int64 reuses = 0; + // Only count reuses that run through the conditional root. + if (op == conditional_->branch_computation(0)->root_instruction()) { + int64 index = op->operand_index(user); + for (auto op2 : conditional_->users()) { + CHECK(op2->opcode() == HloOpcode::kGetTupleElement); + auto tuple_opd = down_cast(op2); + if (index == tuple_opd->tuple_index()) { + all_users = op2->users(); + if (!all_users.empty()) { + reuses += ReusesCarriedBy(user, all_users[0]); + break; + } + } + } + } + VLOG(1) << "reuses to be gained after moving " << user->ToString() << ":" + << reuses << "\n"; + return reuses; + } + return 0; + } - if (changed) { - HloPassPipeline subpipeline("after_conditional_code_motion"); - subpipeline.AddPass(); - subpipeline.AddPass(); - subpipeline.AddPass(); - TF_ASSIGN_OR_RETURN(bool cleanup_changed, subpipeline.Run(module)); - changed |= cleanup_changed; + int64 BenefitForMovingBoundaries(const std::vector& boundaries) { + int64 reuses_before = 0, reuses_after = 0; + for (Boundary b : boundaries) { + auto op = b.Operands()[0]; + if (op == conditional_->branch_computation(0)->root_instruction()) { + continue; + } + reuses_before += ReusesBeforeBoundary(op); + VLOG(1) << "Cost of moving so far: " << reuses_before << "\n"; + reuses_after += ReusesAfterBoundary(op); + VLOG(1) << "Benefit from moving so far : " << reuses_after << "\n"; + } + if (reuses_after == 0 && reuses_before == 0) { + return -1; + } else if (boundaries[0].IsInsideBranch()) { + return reuses_after - reuses_before; + } else { + return reuses_before - reuses_after; } } + Boundary GetNextBoundary(const Boundary& b, int64 op_index) { + Boundary b2(b.GetPosition()); + CHECK(b.Operands().size() == conditional_->branch_count()); + for (int j = 0; j < b.Operands().size(); ++j) { + HloInstruction* inst = b.Operands()[j]; + CHECK(inst != nullptr); + HloInstruction* op = (b.IsInsideBranch()) ? inst->operands()[op_index] + : inst->users()[op_index]; + CHECK(op != nullptr); + b2.Operands().push_back(op); + } + return b2; + } + void AddBoundaries(const Boundary& boundary) { + BoundaryVisitor visitor; + visitor.AddToWorkList(boundary); + while (visitor.HasNextBoundary()) { + Boundary b = visitor.PopNextBoundary(); + // if b is already visited, it must have multiple users and is already in + // new boundaries. Skip it. + if (ContainsKey(visited_, b.Operands()[0])) { + continue; + } + VLOG(1) << "visiting boundary " << b.ToString() << "\n"; + if ((b.Operands().size() == 1 || + InstructionWithinBranchIdentical(b.Operands(), + is_layout_sensitive_)) && + WorthHoisting(b.Operands()[0])) { + connected_boundaries_.push_back(b); + VLOG(1) << "boundary can be moved\n"; + int64 operand_count = (b.IsInsideBranch()) + ? b.Operands()[0]->operand_count() + : b.Operands()[0]->users().size(); + for (int i = 0; i < operand_count; i++) { + Boundary b2 = GetNextBoundary(b, i); + int64 b2_count = (b2.IsInsideBranch()) + ? b2.Operands()[0]->user_count() + : b2.Operands()[0]->operand_count(); + // only consider adding an exclusive producor into the same group. + if (b2_count == 1) { + VLOG(2) << "Add operand " << i << " to visit later\n"; + visitor.AddToWorkList(b2); + } else { + VLOG(2) << "Operand " << i << " has multiple uses\n"; + if (!ContainsKey(visited_, b2.Operands()[0])) { + visited_.insert(b2.Operands()[0]); + new_boundaries_.push_back(b2); + } + } + } + } else { + VLOG(1) << "boundary cannot be moved\n"; + visited_.insert(b.Operands()[0]); + new_boundaries_.push_back(b); + } + } + } + std::vector BoundariesToMoveOut(const Boundary& b) { + HloInstruction* inst = b.Operands()[0]; + if (inst->opcode() == HloOpcode::kConditional) { + int branch_count = inst->branch_count(); + // Visit instructions from the root instruction to the operands using BFS. + Boundary boundary_in(Boundary::Position::kInsideBranch); + for (int i = 0; i < branch_count; i++) { + HloComputation* branch_computation = inst->branch_computation(i); + HloInstruction* root_inst = branch_computation->root_instruction(); + CHECK(root_inst != nullptr); + boundary_in.Operands().push_back(root_inst); + } + AddBoundaries(boundary_in); + } + return connected_boundaries_; + } + std::vector BoundariesToMoveIn(const Boundary& b) { + if (b.IsInsideBranch()) { + return std::vector(); + } + AddBoundaries(b); + return connected_boundaries_; + } + std::vector GetNewBoundaries() { return new_boundaries_; } +}; + +ConditionalCodeMotion::Decision ConditionalCodeMotion::ConsiderCodeMotion( + HloInstruction* conditional, const Boundary& cur_boundary, + std::vector& to_move, std::vector& new_boundaries) { + GroupConnectedBoundaries connect(conditional, is_layout_sensitive_); + auto move_out = connect.BoundariesToMoveOut(cur_boundary); + if (!move_out.empty()) { + std::vector next_boundaries = connect.GetNewBoundaries(); + auto benefit = connect.BenefitForMovingBoundaries(move_out); + VLOG(1) << "benefit of moving " << cur_boundary.Operands()[0]->ToString() + << ":" << benefit << "\n"; + if (benefit >= 0) { + new_boundaries = next_boundaries; + to_move = move_out; + return Decision::kMoveOutOfBranch; + } + } + return ConditionalCodeMotion::Decision::kNoChange; +} + +StatusOr ConditionalCodeMotion::Run(HloModule* module) { + // Gather all the conditional ops in the module ahead of time, to avoid + // potential complications of modifying the code that affecting traversal. + std::vector conditional_ops; + for (auto* comp : module->MakeComputationPostOrder()) { + for (auto* instr : comp->MakeInstructionPostOrder()) { + if (instr->opcode() == HloOpcode::kConditional) { + conditional_ops.push_back(instr); + } + } + } + + bool changed = false; + std::vector to_move_out, to_move_in, new_boundaries; + for (HloInstruction* conditional : conditional_ops) { + BoundaryVisitor visitor(conditional); + VLOG(2) << "Analyzing conditional:" << conditional->ToString() << "\n"; + // Boundariess to move out of and to move into the branches. + while (visitor.HasNextBoundary()) { + std::vector to_move, next_boundary; + Boundary boundary = visitor.PopNextBoundary(); + VLOG(2) << "Analyzing boundary:" << boundary.ToString() << "\n"; + ConditionalCodeMotion::Decision d = + ConsiderCodeMotion(conditional, boundary, to_move, next_boundary); + switch (d) { + case Decision::kMoveOutOfBranch: + VLOG(2) << "Decision is move out of branch\n"; + to_move_out.insert(to_move_out.end(), to_move.begin(), to_move.end()); + break; + case Decision::kMoveIntoBranch: + VLOG(2) << "Decision is move into branch\n"; + to_move_in.insert(to_move_in.end(), to_move.begin(), to_move.end()); + break; + case Decision::kNoChange: + VLOG(2) << "Decision is no change\n"; + new_boundaries.push_back(boundary); + break; + } + for (const Boundary& b : next_boundary) { + visitor.AddToWorkList(b); + } + } + TF_ASSIGN_OR_RETURN( + bool result, + MoveInstructionOut(conditional, to_move_out, new_boundaries)); + VLOG(2) << "moving out result:" << result << "\n"; + changed |= result; + } // handling convert rematerialization/hoisting - { + if (!changed && pursue_full_conditional_code_motion_) { std::vector conditional_ops; for (auto* comp : module->MakeComputationPostOrder()) { for (auto* instr : comp->MakeInstructionPostOrder()) { @@ -711,7 +762,6 @@ StatusOr ConditionalCodeMotion::Run(HloModule* module) { changed |= convert_result; } } - if (changed) { HloPassPipeline subpipeline( "after_conditional_code_motion_after_convert_hoisting"); @@ -721,8 +771,8 @@ StatusOr ConditionalCodeMotion::Run(HloModule* module) { TF_ASSIGN_OR_RETURN(bool cleanup_changed, subpipeline.Run(module)); changed |= cleanup_changed; } - return changed; } +} // namespace conditional_opt } // namespace xla diff --git a/tensorflow/compiler/xla/service/conditional_code_motion.h b/tensorflow/compiler/xla/service/conditional_code_motion.h index 95f02833e15..d7295058467 100644 --- a/tensorflow/compiler/xla/service/conditional_code_motion.h +++ b/tensorflow/compiler/xla/service/conditional_code_motion.h @@ -23,35 +23,80 @@ limitations under the License. namespace xla { -// ConditionalCodeMotion specializes in hoisting/rematerializing -// unconditional converts in the default mode. -// When pursue_full_conditional_code_motion_ is set to true, the -// full HLO pass moves identical ops out of a conditional in addition to moving -// converts. +namespace conditional_opt { +// At the conceptural level, a boundary can be thought of as representing a +// single virtual operation, except this virtual operation is conditionally +// instantiated into different concrete operations at each conditional branch. +// So a boundary is mapped to a single concrete operation if it is outside of +// conditional branches, and is mapped to a list of instructions if inside the +// branches. This data structure therefore allows a common data structure +// representation of the instructions to be moved, whether they are inside or +// outside of the branches. Subsequently, it allows a common implementation +// basis to be used for both moving instructions out of and for moving them +// inside branches. +class Boundary { + public: + enum class Position { kInsideBranch, kOutsideBranch }; + explicit Boundary(Position p) : position_(p) {} + std::vector& Operands() { return operands_; } + const std::vector& Operands() const { return operands_; } + bool IsInsideBranch() const { return position_ == Position::kInsideBranch; } + bool IsOutsideBranch() const { return position_ == Position::kOutsideBranch; } + Position GetPosition() const { return position_; } + bool IsEmpty() const { return operands_.empty(); } + std::string ToString() const { + std::string res; + for (HloInstruction* op : operands_) { + res += op->ToString() + ";"; + } + return res; + } + + private: + // Boundary instructions in the conditional branches, one from each branch + // of the conditional. + std::vector operands_; + Position position_; +}; + +// HLO pass that moves identical ops in/out of conditional. // - The definition of identical are the shape of the operands are identical // and their properties are identical. -// - Currently, only some types of instructions is supported. -// TODO(b/154283721): relax non-sharable operand constraint and avoid copies in -// the new root. // - Only the identical ops that won't share operands with other ops will // be moved out of conditional. class ConditionalCodeMotion : public HloModulePass { public: // If is_layout_sensitive is true, then the hoist process preserves layout // during identical comparison. Otherwise, layout is ignored. - explicit ConditionalCodeMotion( - bool is_layout_sensitive = true, - bool pursue_full_conditional_code_motion = false) + explicit ConditionalCodeMotion(bool is_layout_sensitive, + bool pursue_full_conditional_code_motion) : is_layout_sensitive_(is_layout_sensitive), pursue_full_conditional_code_motion_( pursue_full_conditional_code_motion) {} absl::string_view name() const override { return "conditional-code-motion"; } StatusOr Run(HloModule* module) override; + // Optimization decision for each boundary of the conditional instruction. + enum class Decision { kMoveOutOfBranch, kMoveIntoBranch, kNoChange }; + // If the optimization decision is NO_CHANGE, new_boundary is set to nullptr; + // otherwise, it is set to the new boundary after proposed optimization. + virtual Decision ConsiderCodeMotion(HloInstruction* conditional, + const Boundary& cur_boundary, + std::vector& to_move, + std::vector& new_boundaries); + private: const bool is_layout_sensitive_; const bool pursue_full_conditional_code_motion_; + + StatusOr MoveInstructionOut(HloInstruction* conditional, + std::vector& to_move_out, + std::vector& new_boundaries); + StatusOr MoveInstructionIn(HloInstruction* conditional, + std::vector& to_move_in, + std::vector& new_boundaries); }; +} // namespace conditional_opt } // namespace xla diff --git a/tensorflow/compiler/xla/service/conditional_code_motion_test.cc b/tensorflow/compiler/xla/service/conditional_code_motion_test.cc index 38b2b515fa0..b3c5e17094a 100644 --- a/tensorflow/compiler/xla/service/conditional_code_motion_test.cc +++ b/tensorflow/compiler/xla/service/conditional_code_motion_test.cc @@ -33,7 +33,7 @@ limitations under the License. #include "tensorflow/core/platform/types.h" namespace xla { -namespace { +namespace conditional_opt { using ConditionalCodeMotionTest = HloTestBase; namespace op = xla::testing::opcode_matchers; @@ -117,6 +117,47 @@ ENTRY main { EXPECT_THAT(root, AllOf(op::Tuple(op::Convert()))); } +TEST_F(ConditionalCodeMotionTest, MoveConvertOutConditional) { + absl::string_view hlo_string = + R"( +HloModule RemoveDotOpOut + +on_true { + %arg_tuple.1 = (f32[93184,4]{1,0}) parameter(0) + %get-tuple-element.1 = f32[93184,4]{1,0} get-tuple-element(%arg_tuple.1), index=0 + %reshape.8493 = f32[2,512,364]{2,1,0} reshape(f32[93184,4]{1,0} %get-tuple-element.1) + %add.8493 = f32[2,512,364]{2,1,0} add(f32[2,512,364]{2,1,0} %reshape.8493, f32[2,512,364]{2,1,0} %reshape.8493) + %convert.2894 = bf16[2,512,364]{2,1,0} convert(f32[2,512,364]{2,1,0} %add.8493) + ROOT %tuple.1 = ( bf16[2,512,364]{2,1,0}) tuple(%convert.2894) +} + +on_false { + %arg_tuple.2 = (f32[93184,4]{1,0}) parameter(0) + %get-tuple-element.3 = f32[93184,4]{1,0} get-tuple-element(%arg_tuple.2), index=0 + %reshape.9717 = f32[2,512,364]{2,1,0} reshape(f32[93184,4]{1,0} %get-tuple-element.3) + %add.8493 = f32[2,512,364]{2,1,0} add(f32[2,512,364]{2,1,0} %reshape.9717, f32[2,512,364]{2,1,0} %reshape.9717) + %sub.8493 = f32[2,512,364]{2,1,0} subtract(f32[2,512,364]{2,1,0} %add.8493, f32[2,512,364]{2,1,0} %reshape.9717) + %convert.3604 = bf16[2,512,364]{2,1,0} convert(f32[2,512,364]{2,1,0} %reshape.9717), metadata={op_type="Cast" op_name="gradients/Cast_125_grad/Cast"} + ROOT %tuple.2 = (bf16[2,512,364]{2,1,0}) tuple(%convert.3604) +} + +ENTRY main { + pred.1 = pred[] parameter(0) + arg_tuple.11 = (f32[93184,4]{1,0}) parameter(1) + arg_tuple.22 = (f32[93184,4]{1,0}) parameter(2) + conditional = (bf16[2,512,364]{2,1,0}) conditional(pred.1, arg_tuple.11, arg_tuple.22), true_computation=on_true, false_computation=on_false + get-first-index = bf16[2,512,364]{2,1,0} get-tuple-element(conditional), index=0 + ROOT result = (bf16[2,512,364]{2,1,0}) tuple(get-first-index) +} +)"; + auto module = ParseAndReturnVerifiedModule(hlo_string).ValueOrDie(); + ConditionalCodeMotion pass(true, true); + ASSERT_TRUE(pass.Run(&*module).ValueOrDie()); + + HloInstruction* root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, AllOf(op::Tuple(op::Convert()))); +} + TEST_F(ConditionalCodeMotionTest, MoveConvertOut) { absl::string_view hlo_string = R"( @@ -152,8 +193,20 @@ ENTRY main { ConditionalCodeMotion pass(true, true); ASSERT_TRUE(pass.Run(&*module).ValueOrDie()); + const HloInstruction* conditional = + FindInstruction(module.get(), "conditional"); + const HloComputation* on_true = conditional->branch_computation(0); + ASSERT_EQ(on_true->instruction_count(), 2); + const HloComputation* on_false = conditional->branch_computation(1); + ASSERT_EQ(on_false->instruction_count(), 2); + HloInstruction* root = module->entry_computation()->root_instruction(); - EXPECT_THAT(root, AllOf(op::Tuple(op::Add(op::Convert(), op::Convert())))); + EXPECT_THAT( + root, + AllOf(op::Tuple(op::Add(op::Convert(op::Reshape(op::GetTupleElement( + op::GetTupleElement(op::Conditional())))), + op::Convert(op::Reshape(op::GetTupleElement( + op::GetTupleElement(op::Conditional())))))))); } TEST_F(ConditionalCodeMotionTest, UserShareOperandCannotBeMoved) { @@ -173,7 +226,7 @@ on_true { add.2 = f32[] add(add.1, constant.2) add.3 = f32[] add(add.1, constant.3) add.4 = f32[] add(add.3, constant.5) - multiply.1 = f32[] multiply(add.2, constant.4) + multiply.1 = f32[] multiply(add.4, constant.4) ROOT tuple.6 = (f32[], f32[]) tuple(multiply.1, add.4) } @@ -216,13 +269,11 @@ ENTRY main { const HloComputation* on_false = conditional->branch_computation(1); ASSERT_EQ(on_false->instruction_count(), 9); - // Check only one add and multiply is moved out. HloInstruction* root = module->entry_computation()->root_instruction(); EXPECT_THAT( - root, - AllOf(op::Tuple( - op::Multiply(op::GetTupleElement(op::Conditional()), op::Constant()), - op::Add(op::GetTupleElement(op::Conditional()), op::Constant())))); + root, AllOf(op::Tuple(op::Multiply(op::GetTupleElement(op::Conditional()), + op::Constant()), + op::GetTupleElement(op::Conditional())))); } TEST_F(ConditionalCodeMotionTest, ConditionalRootElementChanged) { @@ -269,16 +320,16 @@ ENTRY main { const HloInstruction* conditional = FindInstruction(module.get(), "conditional"); const HloComputation* on_true = conditional->branch_computation(0); - ASSERT_EQ(on_true->instruction_count(), 7); + ASSERT_EQ(on_true->instruction_count(), 1); const HloComputation* on_false = conditional->branch_computation(1); - ASSERT_EQ(on_false->instruction_count(), 7); + ASSERT_EQ(on_false->instruction_count(), 1); - // add.3 in on_true will be moved out, add.1 and add.2 will be in condtional - // root. - ASSERT_TRUE(ShapeUtil::Compatible( - conditional->shape(), - ShapeUtil::MakeTupleShape( - {ShapeUtil::MakeShape(F32, {}), ShapeUtil::MakeShape(F32, {})}))); + HloInstruction* root = module->entry_computation()->root_instruction(); + EXPECT_THAT( + root, + AllOf(op::Tuple(op::Add( + op::Add(op::GetTupleElement(op::Conditional()), op::Constant()), + op::Add(op::GetTupleElement(op::Conditional()), op::Constant()))))); } TEST_F(ConditionalCodeMotionTest, ConditionalIsRootInstruction) { @@ -329,24 +380,9 @@ ENTRY main { )"; auto module = ParseAndReturnVerifiedModule(hlo_string).ValueOrDie(); ConditionalCodeMotion pass(true, true); - ASSERT_TRUE(pass.Run(&*module).ValueOrDie()); - - const HloInstruction* conditional = - FindInstruction(module.get(), "conditional"); - const HloComputation* on_true = conditional->branch_computation(0); - ASSERT_EQ(on_true->instruction_count(), 9); - const HloComputation* on_false = conditional->branch_computation(1); - ASSERT_EQ(on_false->instruction_count(), 9); - - // Check only one add and multiply is moved out. - // add.3 and add.5 can't be moved out because they share operands with - // other instructions. - HloInstruction* root = module->entry_computation()->root_instruction(); - EXPECT_THAT( - root, - AllOf(op::Tuple( - op::Multiply(op::GetTupleElement(op::Conditional()), op::Constant()), - op::Add(op::GetTupleElement(op::Conditional()), op::Constant())))); + // If there is no instruction after the conditional, there is no benefit to + // move + ASSERT_FALSE(pass.Run(&*module).ValueOrDie()); } TEST_F(ConditionalCodeMotionTest, LayoutMisMatchCannotMovedOut) { @@ -469,7 +505,8 @@ ENTRY main { false_computation=on_false get-first-index = f32[3,3,128,128] get-tuple-element(conditional), index=0 - ROOT result = (f32[3,3,128,128]) tuple(get-first-index) + add.1 = f32[3,3,128,128] add(f32[3,3,128,128] get-first-index, f32[3,3,128,128] get-first-index) + ROOT result = (f32[3,3,128,128]) tuple(add.1) } )"; auto module = ParseAndReturnVerifiedModule(hlo_string).ValueOrDie(); @@ -487,10 +524,14 @@ ENTRY main { conditional->shape(), ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape( BF16, {3, 3, 128, 128})}))); HloInstruction* root = module->entry_computation()->root_instruction(); - EXPECT_THAT(root, AllOf(op::Tuple(op::Convert(op::AllReduce( - op::GetTupleElement(op::Conditional())))))); + EXPECT_THAT( + root, + AllOf(op::Tuple(op::Add( + op::Convert(op::AllReduce(op::GetTupleElement(op::Conditional()))), + op::Convert( + op::AllReduce(op::GetTupleElement(op::Conditional()))))))); } -} // namespace +} // namespace conditional_opt } // namespace xla From af7fee31703e3fb370ac963898dd7337e893a288 Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Sun, 12 Jul 2020 00:08:53 -0700 Subject: [PATCH 0242/2522] Test scatter max --- .../kernel_tests/resource_variable_ops_test.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index 669c7c63967..59a0a0abd7a 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -584,18 +584,23 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase, indices=[1], values=constant_op.constant([1.5], dtype=dtype)))) self.assertAllCloseAccordingToType([0.0, 1.0], self.evaluate(v)) + @parameterized.parameters(dtypes.float16, dtypes.float32, dtypes.float64) @test_util.run_in_graph_and_eager_modes - def testScatterMaxVariableMethod(self): - v = resource_variable_ops.ResourceVariable([0.0, 4.0], name="max1") + def testScatterMaxVariableMethod(self, dtype): + v = resource_variable_ops.ResourceVariable( + [0.0, 4.0], name="max1", dtype=dtype) self.evaluate(variables.global_variables_initializer()) self.evaluate( - v.scatter_max(ops.IndexedSlices(indices=[1], values=[5.0]))) + v.scatter_max(ops.IndexedSlices( + indices=[1], values=constant_op.constant([5.0], dtype=dtype)))) self.assertAllEqual([0.0, 5.0], self.evaluate(v)) - v = resource_variable_ops.ResourceVariable([0.0, 3.5], name="max2") + v = resource_variable_ops.ResourceVariable( + [0.0, 3.5], name="max2", dtype=dtype) self.evaluate(variables.global_variables_initializer()) self.evaluate( - v.scatter_max(ops.IndexedSlices(indices=[1], values=[2.0]))) + v.scatter_max(ops.IndexedSlices( + indices=[1], values=constant_op.constant([2.0], dtype=dtype)))) self.assertAllEqual([0.0, 3.5], self.evaluate(v)) @test_util.run_in_graph_and_eager_modes From d7834424cbd81d32700c3465483926e919deb463 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 12 Jul 2020 02:01:55 -0700 Subject: [PATCH 0243/2522] Update GraphDef version to 460. PiperOrigin-RevId: 320822145 Change-Id: Ic6dce9b3581a7a885eb4ec1b2441d99d7f8fbc7e --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 228afc332bb..2b00930f8d5 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 459 // Updated: 2020/7/11 +#define TF_GRAPH_DEF_VERSION 460 // Updated: 2020/7/12 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From f356a508cfa22ef39704154f62e0812b879b7833 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 12 Jul 2020 02:01:56 -0700 Subject: [PATCH 0244/2522] compat: Update forward compatibility horizon to 2020-07-12 PiperOrigin-RevId: 320822147 Change-Id: Ia3a6b7c621fc2b2b29327f9ea96c0dcc1b08c6a2 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 83d08cfdec2..0a00ebcc2f9 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 11) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 12) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 242168dd700e379f7ec9fbcf15cc6b46a5cc1c2a Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Sun, 12 Jul 2020 02:36:48 -0700 Subject: [PATCH 0245/2522] Test scatter min --- .../resource_variable_ops_test.py | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index 59a0a0abd7a..16b6cb9c21b 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -593,7 +593,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase, self.evaluate( v.scatter_max(ops.IndexedSlices( indices=[1], values=constant_op.constant([5.0], dtype=dtype)))) - self.assertAllEqual([0.0, 5.0], self.evaluate(v)) + self.assertAllCloseAccordingToType([0.0, 5.0], self.evaluate(v)) v = resource_variable_ops.ResourceVariable( [0.0, 3.5], name="max2", dtype=dtype) @@ -601,21 +601,26 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase, self.evaluate( v.scatter_max(ops.IndexedSlices( indices=[1], values=constant_op.constant([2.0], dtype=dtype)))) - self.assertAllEqual([0.0, 3.5], self.evaluate(v)) + self.assertAllCloseAccordingToType([0.0, 3.5], self.evaluate(v)) + @parameterized.parameters(dtypes.float16, dtypes.float32, dtypes.float64) @test_util.run_in_graph_and_eager_modes - def testScatterMinVariableMethod(self): - v = resource_variable_ops.ResourceVariable([0.0, 4.0], name="min1") + def testScatterMinVariableMethod(self, dtype): + v = resource_variable_ops.ResourceVariable( + [0.0, 4.0], name="min1", dtype=dtype) self.evaluate(variables.global_variables_initializer()) self.evaluate( - v.scatter_min(ops.IndexedSlices(indices=[1], values=[5.0]))) - self.assertAllEqual([0.0, 4.0], self.evaluate(v)) + v.scatter_min(ops.IndexedSlices( + indices=[1], values=constant_op.constant([5.0], dtype=dtype)))) + self.assertAllCloseAccordingToType([0.0, 4.0], self.evaluate(v)) - v = resource_variable_ops.ResourceVariable([0.0, 3.5], name="min2") + v = resource_variable_ops.ResourceVariable( + [0.0, 3.5], name="min2", dtype=dtype) self.evaluate(variables.global_variables_initializer()) self.evaluate( - v.scatter_min(ops.IndexedSlices(indices=[1], values=[2.0]))) - self.assertAllEqual([0.0, 2.0], self.evaluate(v)) + v.scatter_min(ops.IndexedSlices( + indices=[1], values=constant_op.constant([2.0], dtype=dtype)))) + self.assertAllCloseAccordingToType([0.0, 2.0], self.evaluate(v)) @test_util.run_in_graph_and_eager_modes def testScatterMulVariableMethod(self): From f6984195e3ea91f05d329725086eb2af4043b07d Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Sun, 12 Jul 2020 07:39:22 -0700 Subject: [PATCH 0246/2522] Rollback of 4abf3012f14b76aa0fd1ce987f9afdad7639d734 which accidentally breaks compilation of `tensorflow/compiler/xla/service/conditional_code_motion.cc` PiperOrigin-RevId: 320840323 Change-Id: Ifaf6dd56f2d7f885a85213097d77da901b216677 --- .../xla/service/conditional_code_motion.cc | 744 ++++++++---------- .../xla/service/conditional_code_motion.h | 67 +- .../service/conditional_code_motion_test.cc | 117 +-- 3 files changed, 396 insertions(+), 532 deletions(-) diff --git a/tensorflow/compiler/xla/service/conditional_code_motion.cc b/tensorflow/compiler/xla/service/conditional_code_motion.cc index 3287726b1a7..6db4c3eb6d4 100644 --- a/tensorflow/compiler/xla/service/conditional_code_motion.cc +++ b/tensorflow/compiler/xla/service/conditional_code_motion.cc @@ -46,63 +46,161 @@ limitations under the License. namespace xla { -namespace conditional_opt { +namespace { + +struct ConditionalBoundary { + ConditionalBoundary(HloInstruction* op, int64 op_index, HloInstruction* usr) + : operand(op), operand_index(op_index), user(usr) {} + // `operand` is one of `user`'s operand. + + // Instruction that remains in the conditional but one of its user + // is moved out of conditonal. + HloInstruction* operand; + // operand_index for `operand` in the `user`. + int64 operand_index; + // Instruction that moved out of conditional. + HloInstruction* user; +}; // Visit the root instructions to its operands follow BFS. // Will visit an instructions after all its users have been visited. Parameters // are not visited. -class BoundaryVisitor { +class BranchVisitor { public: - // start with an existing conditional computation. - explicit BoundaryVisitor(HloInstruction* conditional) { - Boundary b(Boundary::Position::kInsideBranch); - b.Operands().push_back(conditional); - worklist_.push_back(b); + explicit BranchVisitor(const HloComputation* branch_computation) { + HloInstruction* root_inst = branch_computation->root_instruction(); + worklist_.push_back(root_inst); + visited_.insert(root_inst); + for (auto parameter_inst : branch_computation->parameter_instructions()) { + parameter_instructions_.insert(parameter_inst); + } } - // Start with an empty work list. - BoundaryVisitor() {} // Get next intruction to visit. - Boundary PopNextBoundary() { - CHECK(!worklist_.empty()); - Boundary inst = worklist_.front(); - worklist_.pop_front(); - return inst; - } - void AddToWorkList(const Boundary& b) { - CHECK_GT(b.Operands().size(), 0); - worklist_.push_back(b); + HloInstruction* GetNextInstruction() { + if (!worklist_.empty()) { + HloInstruction* inst = worklist_.front(); + worklist_.pop_front(); + return inst; + } + return nullptr; } - bool HasNextBoundary() const { return !worklist_.empty(); } + // Add operands of one instruction to worklist for further visit. + void AddInstructionOperands(HloInstruction* inst) { + int64 operand_count = inst->operand_count(); + for (int i = 0; i < operand_count; i++) { + HloInstruction* operand = inst->mutable_operand(i); + if (ContainsKey(visited_, operand)) { + continue; + } + bool all_user_visited = std::all_of( + operand->users().begin(), operand->users().end(), + [&](HloInstruction* user) { return ContainsKey(visited_, user); }); + + if (!all_user_visited) { + continue; + } + // Do not visit parameter_instructions. + if (ContainsKey(parameter_instructions_, operand)) { + // Add the operand and this instruction to the boundaries. + boundaries_.emplace_back(operand, i, inst); + continue; + } + worklist_.push_back(operand); + visited_.insert(operand); + } + } + + // Add instruction and its users to conditional boundaries. + void AddInstructionToBoundary(HloInstruction* inst) { + for (auto user : inst->users()) { + boundaries_.emplace_back(inst, user->operand_index(inst), user); + } + } + + // Add instruction to the to be removed instructions set and vector. + void AddInstructionToHoist(HloInstruction* inst) { + instructions_to_hoist_set_.insert(inst); + instructions_to_hoist_.emplace_back(inst); + } + + // If visitor has next instruction to visit. + bool HasNextInstruction() const { return !worklist_.empty(); } + + // If there is no hoist intruction. + int64 HoistInstructionSize() { return instructions_to_hoist_.size(); } + + // Get boundaries of this branch. + const std::vector& boundaries() const { + return boundaries_; + } + + // Get instructions to hoist in this branch. + const std::vector& instructions_to_hoist() const { + return instructions_to_hoist_; + } + + // Get hoist instruction set in this branch. + const std::unordered_set& instructions_to_hoist_set() const { + return instructions_to_hoist_set_; + } private: // worklist is the deque that contains instructions to be visited. - std::deque worklist_; + std::deque worklist_; + + // instructions that has been visited. + std::unordered_set visited_; + + // parameter instructions of the branch. + std::unordered_set parameter_instructions_; + + // Boundaries contains the set of instructions that its operand is within + // conditional but it can be hoist out of conditional. + std::vector boundaries_; + + // Instructions to hoist. + std::unordered_set instructions_to_hoist_set_; + + // Instructions to hoist, the order within this vector is BFS and + // an instruction's order will always be after its users. + std::vector instructions_to_hoist_; }; -// Returns estimation of potential reuses carried by a given instruction. -// Use different integers to classify different levels of reuses -// This is used as a placeholder only, assuming all instructions can be -// fused to enable data reuses -int64 ReusesCarriedBy(HloInstruction* op, HloInstruction* user) { - VLOG(1) << "ConditionalCodeMotion: Add reuses carried by instr: " - << op->ToString() << "=>" << user->ToString() << "\n"; - switch (user->opcode()) { - case HloOpcode::kGetTupleElement: - return 0; - default: - break; +// Returns true if `instruction` is worth hoisting out. +bool WorthHoisting(HloInstruction* instruction) { + for (const auto* operand : instruction->operands()) { + // Only move out instructions that won't share the same operand + // to avoid copy of the operand. + if (operand->user_count() > 1) { + return false; + } } - switch (op->opcode()) { - // These instructions are lightweight and easy to fuse. - case HloOpcode::kConstant: - return 0; - default: - // Assume fusion will not happen anyway if user count > 1) - if (op->user_count() > 1) { - return 0; + switch (instruction->opcode()) { + case HloOpcode::kConvert: + // If Convert is after AllReduce, it is worth moving out AllReduce out + // of conditional for AR/CRS combine. If Convert is after other ops such + // as Dot or Convolutional, it is better to keep convert within + // conditional so that convert can be fused with Dot or Convolutional. + // + // TODO(b/154283721): figure out the scenario when convert can be fused + // with AllReduce out of conditional. + if (instruction->operand(0)->opcode() == HloOpcode::kAllReduce) { + return true; } - return 10; + return false; + case HloOpcode::kAllReduce: + case HloOpcode::kAdd: + case HloOpcode::kConstant: + case HloOpcode::kSubtract: + case HloOpcode::kMultiply: + case HloOpcode::kDivide: + case HloOpcode::kTuple: + case HloOpcode::kSqrt: + case HloOpcode::kGetTupleElement: + return true; + default: + return false; } } @@ -122,7 +220,7 @@ bool InstructionWithinBranchIdentical( return *a == *b; }; - if (instructions.empty()) { + if (instructions[0] == nullptr) { return false; } @@ -150,27 +248,109 @@ bool InstructionWithinBranchIdentical( }); } -// Copy identical instructions within conditional outside of conditional. -Status CopyOutOfConditional( - Boundary& boundary, HloComputation* conditional_parent, - absl::flat_hash_map& - hoisted_instructions) { - // Insert GetTupleElement before the instructions whose operands might still - // be within the conditional. - HloInstruction* op = boundary.Operands()[0]; - absl::InlinedVector new_operands; - for (int i = 0; i < op->operands().size(); ++i) { - auto op_i = op->operands()[i]; - VLOG(2) << "Looking for operand:" << op_i->ToString() << "\n"; - CHECK(ContainsKey(hoisted_instructions, op_i)); - new_operands.push_back(FindOrDie(hoisted_instructions, op_i)); +// Returns if all the visitors/branches has next instruction to visit. +bool HasNextInstruction(const std::vector& visitors) { + bool has_next = true; + for (const auto& visitor : visitors) { + has_next &= visitor.HasNextInstruction(); } - HloInstruction* new_instruction = conditional_parent->AddInstruction( - op->CloneWithNewOperands(op->shape(), new_operands)); - // Maps the instruction outside of conditional to the instruction - // inside of the conditional. - for (HloInstruction* op : boundary.Operands()) { - hoisted_instructions[op] = new_instruction; + return has_next; +} + +// Create tuple element as the new root of the branch. The tuple will contain +// the operands that can't move out of conditional but its user will be moved +// out of conditional. +HloInstruction* CreateNewRoot( + const std::vector& boundaries, + const std::unordered_set& instructions_to_hoist_set, + HloComputation* computation) { + std::vector elements; + elements.reserve(boundaries.size()); + for (auto boundary : boundaries) { + if (ContainsKey(instructions_to_hoist_set, boundary.user)) { + elements.push_back(boundary.operand); + } + } + return computation->AddInstruction(HloInstruction::CreateTuple(elements)); +} + +// Copy identical instructions within conditional outside of conditional. +void CopyIdenticalInstructionsOutOfConditional( + const std::vector& instructions_to_hoist, + HloComputation* conditional_parent, + absl::flat_hash_map* + hoisted_instructions) { + int64 instructions_size = instructions_to_hoist.size(); + // Visit the operands before its users and copy it, so that the copied + // user will point to the correct operand. + for (int64 i = instructions_size - 1; i >= 0; i--) { + HloInstruction* old_instruction = instructions_to_hoist[i]; + auto get_new_operand = [&](HloInstruction* old_operand) { + // If the operand can't be found in `instructions_to_hoist`, this + // operand will be in the `boundaries`, GetTupleElement instructions + // will be added later to replace this operand. + if (!ContainsKey(*hoisted_instructions, old_operand)) { + return old_operand; + } + return FindOrDie(*hoisted_instructions, old_operand); + }; + + absl::InlinedVector new_operands; + absl::c_transform(old_instruction->operands(), + std::back_inserter(new_operands), get_new_operand); + + HloInstruction* new_instruction = conditional_parent->AddInstruction( + old_instruction->CloneWithNewOperands(old_instruction->shape(), + new_operands)); + // Maps the instruction outside of conditional to the instruction + // inside of the conditional. + InsertOrDie(hoisted_instructions, old_instruction, new_instruction); + } +} + +// If there are instructions to hoist, the root of the conditional must be +// moved out. Change the users of the conditional to the hoisted instruction +// of the new root. +Status ChangeConditionalUsers( + HloInstruction* conditional, HloInstruction* old_root, + const absl::flat_hash_map& + hoisted_instructions) { + HloInstruction* new_root = FindOrDie(hoisted_instructions, old_root); + TF_RETURN_IF_ERROR(conditional->ReplaceAllUsesWith(new_root)); + return Status::OK(); +} + +// Insert GetTupleElement before the instructions whose operands might still +// be within the conditional. +Status CreateGetTupleElementAfterConditional( + const std::vector& boundaries, + const std::unordered_set& instructions_to_hoist_set, + const absl::flat_hash_map& + hoisted_instructions, + HloInstruction* conditional, HloComputation* computation) { + int boundary_instruction_size = boundaries.size(); + + // Inserts GetTupleElement before the boundary instructions. + for (int i = 0; i < boundary_instruction_size; i++) { + HloInstruction* gte = + computation->AddInstruction(HloInstruction::CreateGetTupleElement( + boundaries[i].operand->shape(), conditional, i)); + + HloInstruction* new_instruction = + FindOrDie(hoisted_instructions, boundaries[i].user); + TF_RETURN_IF_ERROR( + new_instruction->ReplaceOperandWith(boundaries[i].operand_index, gte)); + } + return Status::OK(); +} + +// Remove instructions to be hoisted out of the branch computation. +Status RemoveInstructionFromComputation( + const std::vector& instructions_to_hoist, + HloComputation* branch) { + // Will visit the instructions after its users. + for (auto* instruction : instructions_to_hoist) { + TF_RETURN_IF_ERROR(branch->RemoveInstruction(instruction)); } return Status::OK(); } @@ -394,359 +574,128 @@ StatusOr ConvertSpecialMove(HloInstruction* conditional, // are the shape of the operands are identical and their properties are // identical. Will start from the root instruction of each branch and get // the identical ops to hoist. -StatusOr ConditionalCodeMotion::MoveInstructionOut( - HloInstruction* conditional, std::vector& to_move_out, - std::vector& new_boundaries) { - if (to_move_out.empty()) { +StatusOr MergeIdenticalElements(HloInstruction* conditional, + bool is_layout_sensitive) { + VLOG(1) << " visiting conditional:" << conditional->ToString(); + int branch_count = conditional->branch_count(); + if (branch_count <= 0) { return false; } - VLOG(1) << "number of boundaries to move out:" << to_move_out.size() << "\n"; - HloComputation* conditional_parent = conditional->parent(); - // save the old users before add new conditional user instructions - std::vector old_conditional_users = conditional->users(); - absl::flat_hash_map hoisted_instructions; - // Maps instructions in the conditional body to instructions hoisted outside - // the conditional that compute the same value. - VLOG(2) << "before opt:" - << conditional_parent->ToString(HloPrintOptions::Fingerprint()) - << "\n"; - int64 op_index = 0; - for (Boundary b : new_boundaries) { - HloInstruction* op = b.Operands()[0]; - CHECK(op != nullptr); - VLOG(2) << "Mapping new boundary instr: " << op->ToString() << "\n"; - HloInstruction* gtr = conditional_parent->AddInstruction( - HloInstruction::CreateGetTupleElement(op->shape(), conditional, - op_index++)); - hoisted_instructions[op] = gtr; + + std::vector visitors; + visitors.reserve(branch_count); + // Visit instructions from the root instruction to the operands using BFS. + for (int i = 0; i < branch_count; i++) { + visitors.emplace_back(BranchVisitor(conditional->branch_computation(i))); } - // Copy boundary instructions out of the conditional. - // Visit the operands before its users and copy it, so that the copied - // user will point to the correct operand. - for (int64 i = to_move_out.size() - 1; i >= 0; i--) { - TF_RETURN_IF_ERROR(CopyOutOfConditional(to_move_out[i], conditional_parent, - hoisted_instructions)); + + // The instructions to be visited within each branch. + std::vector front_instructions(branch_count); + + while (HasNextInstruction(visitors)) { + for (int i = 0; i < branch_count; i++) { + front_instructions[i] = visitors[i].GetNextInstruction(); + } + // If two instructions has the same shape, opcode and its operands has the + // same shape, then this instruction can be moved out of conditional. + if (WorthHoisting(front_instructions[0]) && + InstructionWithinBranchIdentical(front_instructions, + is_layout_sensitive)) { + for (int i = 0; i < branch_count; i++) { + visitors[i].AddInstructionOperands(front_instructions[i]); + visitors[i].AddInstructionToHoist(front_instructions[i]); + } + } else { + for (int i = 0; i < branch_count; i++) { + // If the ops are not identical, these ops and its users will + // be in the boundaries` of the conditional. These ops will be stayed + // within the conditional, but one its only user will be moved out + // of conditional. + visitors[i].AddInstructionToBoundary(front_instructions[i]); + } + } } - VLOG(2) << "Done copy branch instructions out\n" - << conditional_parent->ToString(HloPrintOptions::Fingerprint()) - << "\n"; - // Change original users of the conditional to use the correct operands. + + if (visitors[0].HoistInstructionSize() < 1) { + return false; + } + HloInstruction* old_root = conditional->branch_computation(0)->root_instruction(); - for (auto user_instr : old_conditional_users) { - CHECK(user_instr->opcode() == HloOpcode::kGetTupleElement); - auto tuple_opd = down_cast(user_instr); - int64 index = tuple_opd->tuple_index(); - HloInstruction* old_opd = old_root->operands()[index]; - HloInstruction* new_opd = hoisted_instructions[old_opd]; - CHECK(old_opd != nullptr); - CHECK(new_opd != nullptr); - TF_RETURN_IF_ERROR(user_instr->ReplaceAllUsesWith(new_opd)); - TF_RETURN_IF_ERROR(conditional_parent->RemoveInstruction(user_instr)); - } + HloComputation* conditional_parent = conditional->parent(); + // Maps instructions in the conditional body to instructions hoisted outside + // the conditional that compute the same value. + absl::flat_hash_map hoisted_instructions; + // Copy identical instructions out of the conditional. + CopyIdenticalInstructionsOutOfConditional(visitors[0].instructions_to_hoist(), + conditional_parent, + &hoisted_instructions); + // If there are instructions to hoist, the root of the conditional must be + // moved out. Change the users of the conditional to the hoisted instruction + // of the new root. + TF_RETURN_IF_ERROR( + ChangeConditionalUsers(conditional, old_root, hoisted_instructions)); + // Create tuple element within each branch and set it as root. - int64 branch_count = conditional->branch_count(); for (int i = 0; i < branch_count; i++) { - auto computation = conditional->branch_computation(i); - std::vector elements; - for (auto b1 : new_boundaries) { - HloInstruction* op = b1.Operands()[i]; - VLOG(1) << "branch count=" << i << "\n"; - CHECK(op != nullptr); - VLOG(1) << "Adding to root " << i << " with " << op->ToString() << "\n"; - elements.push_back(op); - } - HloInstruction* tuple = - computation->AddInstruction(HloInstruction::CreateTuple(elements)); - computation->set_root_instruction(tuple, true); - VLOG(2) << "computation is :" << computation->ToString() << "\n"; - // Remove hoisted instructions from the branches. - for (auto b2 : to_move_out) { - VLOG(2) << "Removing boundary:" << b2.ToString() << "\n"; - TF_RETURN_IF_ERROR(computation->RemoveInstruction(b2.Operands()[i])); - } + HloInstruction* tuple = CreateNewRoot( + visitors[i].boundaries(), visitors[i].instructions_to_hoist_set(), + conditional->branch_computation(i)); + conditional->branch_computation(i)->set_root_instruction(tuple, true); + } + // Changes conditional instruction shape to the shape of the new root. + *conditional->mutable_shape() = + conditional->branch_computation(0)->root_instruction()->shape(); + + // Insert GetTupleElement before the instructions whose operands might still + // be within the conditional. + TF_RETURN_IF_ERROR(CreateGetTupleElementAfterConditional( + visitors[0].boundaries(), visitors[0].instructions_to_hoist_set(), + hoisted_instructions, conditional, conditional_parent)); + + // Remove hoist instructions from the branches. + for (int i = 0; i < branch_count; i++) { + TF_RETURN_IF_ERROR( + RemoveInstructionFromComputation(visitors[i].instructions_to_hoist(), + conditional->branch_computation(i))); } - // Change conditional instruction shape to the shape of the new root. - HloInstruction* new_root = - conditional->branch_computation(0)->root_instruction(); - *conditional->mutable_shape() = new_root->shape(); - // - VLOG(2) << "done moving instructions out of branches\n" - << conditional_parent->ToString(HloPrintOptions::Fingerprint()) - << "\n"; return true; } -// Group single chains of operands or uses of boundaries into new boundaries -class GroupConnectedBoundaries { - private: - std::unordered_set visited_; - std::vector connected_boundaries_, new_boundaries_; - HloInstruction* conditional_; - bool is_layout_sensitive_; - - public: - explicit GroupConnectedBoundaries(HloInstruction* conditional, - bool is_layout_sensitive) - : conditional_(conditional), is_layout_sensitive_(is_layout_sensitive) {} - // Returns true if `instruction` is worth hoisting out. - bool WorthHoisting(HloInstruction* instruction) { - switch (instruction->opcode()) { - case HloOpcode::kConvert: - // If Convert is after AllReduce, it is worth moving out AllReduce out - // of conditional for AR/CRS combine. If Convert is after other ops such - // as Dot or Convolutional, it is better to keep convert within - // conditional so that convert can be fused with Dot or Convolutional. - // - // TODO(b/154283721): figure out the scenario when convert can be fused - // with AllReduce out of conditional. - switch (instruction->operand(0)->opcode()) { - case HloOpcode::kAllReduce: - case HloOpcode::kReshape: - return true; - default: - VLOG(1) << "Instruction is convert and its operand is not know to " - "be worth hoisting\n"; - return false; - } - case HloOpcode::kAllReduce: - case HloOpcode::kAdd: - case HloOpcode::kConstant: - case HloOpcode::kSubtract: - case HloOpcode::kMultiply: - case HloOpcode::kDivide: - case HloOpcode::kTuple: - case HloOpcode::kSqrt: - case HloOpcode::kReshape: - case HloOpcode::kGetTupleElement: - return true; - default: - VLOG(1) << "Instruction is not known to be worth hoisting\n"; - return false; - } - } - // Calculates the degree of reuses carried by a pair of conditional - // boundaries, if b1 is inside a conditional and b2 is outside. - int64 ReusesBeforeBoundary(HloInstruction* user) { - int64 reuses = 0; - for (auto op : user->operands()) { - // Only consider single-user cases as reuseable. - if (ContainsKey(visited_, op) && op->user_count() == 1) { - reuses += ReusesCarriedBy(op, user); - } - } - VLOG(1) << "cost to be paied after moving out" << user->ToString() << ":" - << reuses << "\n"; - return reuses; - } - - int64 ReusesAfterBoundary(HloInstruction* user) { - CHECK(user != nullptr); - auto all_users = user->users(); - // For now, assume that if an instruction has multiple-consumers, it will - // not be reused (the reuse currently requires duplication in fusion and so - // is expensive). - if (all_users.size() > 1) { - return 0; - } - if (!all_users.empty()) { - auto op = all_users[0]; - int64 reuses = 0; - // Only count reuses that run through the conditional root. - if (op == conditional_->branch_computation(0)->root_instruction()) { - int64 index = op->operand_index(user); - for (auto op2 : conditional_->users()) { - CHECK(op2->opcode() == HloOpcode::kGetTupleElement); - auto tuple_opd = down_cast(op2); - if (index == tuple_opd->tuple_index()) { - all_users = op2->users(); - if (!all_users.empty()) { - reuses += ReusesCarriedBy(user, all_users[0]); - break; - } - } - } - } - VLOG(1) << "reuses to be gained after moving " << user->ToString() << ":" - << reuses << "\n"; - return reuses; - } - return 0; - } - - int64 BenefitForMovingBoundaries(const std::vector& boundaries) { - int64 reuses_before = 0, reuses_after = 0; - for (Boundary b : boundaries) { - auto op = b.Operands()[0]; - if (op == conditional_->branch_computation(0)->root_instruction()) { - continue; - } - reuses_before += ReusesBeforeBoundary(op); - VLOG(1) << "Cost of moving so far: " << reuses_before << "\n"; - reuses_after += ReusesAfterBoundary(op); - VLOG(1) << "Benefit from moving so far : " << reuses_after << "\n"; - } - if (reuses_after == 0 && reuses_before == 0) { - return -1; - } else if (boundaries[0].IsInsideBranch()) { - return reuses_after - reuses_before; - } else { - return reuses_before - reuses_after; - } - } - - Boundary GetNextBoundary(const Boundary& b, int64 op_index) { - Boundary b2(b.GetPosition()); - CHECK(b.Operands().size() == conditional_->branch_count()); - for (int j = 0; j < b.Operands().size(); ++j) { - HloInstruction* inst = b.Operands()[j]; - CHECK(inst != nullptr); - HloInstruction* op = (b.IsInsideBranch()) ? inst->operands()[op_index] - : inst->users()[op_index]; - CHECK(op != nullptr); - b2.Operands().push_back(op); - } - return b2; - } - void AddBoundaries(const Boundary& boundary) { - BoundaryVisitor visitor; - visitor.AddToWorkList(boundary); - while (visitor.HasNextBoundary()) { - Boundary b = visitor.PopNextBoundary(); - // if b is already visited, it must have multiple users and is already in - // new boundaries. Skip it. - if (ContainsKey(visited_, b.Operands()[0])) { - continue; - } - VLOG(1) << "visiting boundary " << b.ToString() << "\n"; - if ((b.Operands().size() == 1 || - InstructionWithinBranchIdentical(b.Operands(), - is_layout_sensitive_)) && - WorthHoisting(b.Operands()[0])) { - connected_boundaries_.push_back(b); - VLOG(1) << "boundary can be moved\n"; - int64 operand_count = (b.IsInsideBranch()) - ? b.Operands()[0]->operand_count() - : b.Operands()[0]->users().size(); - for (int i = 0; i < operand_count; i++) { - Boundary b2 = GetNextBoundary(b, i); - int64 b2_count = (b2.IsInsideBranch()) - ? b2.Operands()[0]->user_count() - : b2.Operands()[0]->operand_count(); - // only consider adding an exclusive producor into the same group. - if (b2_count == 1) { - VLOG(2) << "Add operand " << i << " to visit later\n"; - visitor.AddToWorkList(b2); - } else { - VLOG(2) << "Operand " << i << " has multiple uses\n"; - if (!ContainsKey(visited_, b2.Operands()[0])) { - visited_.insert(b2.Operands()[0]); - new_boundaries_.push_back(b2); - } - } - } - } else { - VLOG(1) << "boundary cannot be moved\n"; - visited_.insert(b.Operands()[0]); - new_boundaries_.push_back(b); - } - } - } - std::vector BoundariesToMoveOut(const Boundary& b) { - HloInstruction* inst = b.Operands()[0]; - if (inst->opcode() == HloOpcode::kConditional) { - int branch_count = inst->branch_count(); - // Visit instructions from the root instruction to the operands using BFS. - Boundary boundary_in(Boundary::Position::kInsideBranch); - for (int i = 0; i < branch_count; i++) { - HloComputation* branch_computation = inst->branch_computation(i); - HloInstruction* root_inst = branch_computation->root_instruction(); - CHECK(root_inst != nullptr); - boundary_in.Operands().push_back(root_inst); - } - AddBoundaries(boundary_in); - } - return connected_boundaries_; - } - std::vector BoundariesToMoveIn(const Boundary& b) { - if (b.IsInsideBranch()) { - return std::vector(); - } - AddBoundaries(b); - return connected_boundaries_; - } - std::vector GetNewBoundaries() { return new_boundaries_; } -}; - -ConditionalCodeMotion::Decision ConditionalCodeMotion::ConsiderCodeMotion( - HloInstruction* conditional, const Boundary& cur_boundary, - std::vector& to_move, std::vector& new_boundaries) { - GroupConnectedBoundaries connect(conditional, is_layout_sensitive_); - auto move_out = connect.BoundariesToMoveOut(cur_boundary); - if (!move_out.empty()) { - std::vector next_boundaries = connect.GetNewBoundaries(); - auto benefit = connect.BenefitForMovingBoundaries(move_out); - VLOG(1) << "benefit of moving " << cur_boundary.Operands()[0]->ToString() - << ":" << benefit << "\n"; - if (benefit >= 0) { - new_boundaries = next_boundaries; - to_move = move_out; - return Decision::kMoveOutOfBranch; - } - } - return ConditionalCodeMotion::Decision::kNoChange; -} +} // namespace StatusOr ConditionalCodeMotion::Run(HloModule* module) { - // Gather all the conditional ops in the module ahead of time, to avoid - // potential complications of modifying the code that affecting traversal. - std::vector conditional_ops; - for (auto* comp : module->MakeComputationPostOrder()) { - for (auto* instr : comp->MakeInstructionPostOrder()) { - if (instr->opcode() == HloOpcode::kConditional) { - conditional_ops.push_back(instr); + bool changed = false; + + if (pursue_full_conditional_code_motion_) { + std::vector conditional_ops; + for (auto* comp : module->MakeComputationPostOrder()) { + for (auto* instr : comp->MakeInstructionPostOrder()) { + if (instr->opcode() == HloOpcode::kConditional) { + conditional_ops.push_back(instr); + } } } + + for (HloInstruction* conditional_op : conditional_ops) { + TF_ASSIGN_OR_RETURN( + bool result, + MergeIdenticalElements(conditional_op, is_layout_sensitive_)); + changed |= result; + } + + if (changed) { + HloPassPipeline subpipeline("after_conditional_code_motion"); + subpipeline.AddPass(); + subpipeline.AddPass(); + subpipeline.AddPass(); + TF_ASSIGN_OR_RETURN(bool cleanup_changed, subpipeline.Run(module)); + changed |= cleanup_changed; + } } - bool changed = false; - std::vector to_move_out, to_move_in, new_boundaries; - for (HloInstruction* conditional : conditional_ops) { - BoundaryVisitor visitor(conditional); - VLOG(2) << "Analyzing conditional:" << conditional->ToString() << "\n"; - // Boundariess to move out of and to move into the branches. - while (visitor.HasNextBoundary()) { - std::vector to_move, next_boundary; - Boundary boundary = visitor.PopNextBoundary(); - VLOG(2) << "Analyzing boundary:" << boundary.ToString() << "\n"; - ConditionalCodeMotion::Decision d = - ConsiderCodeMotion(conditional, boundary, to_move, next_boundary); - switch (d) { - case Decision::kMoveOutOfBranch: - VLOG(2) << "Decision is move out of branch\n"; - to_move_out.insert(to_move_out.end(), to_move.begin(), to_move.end()); - break; - case Decision::kMoveIntoBranch: - VLOG(2) << "Decision is move into branch\n"; - to_move_in.insert(to_move_in.end(), to_move.begin(), to_move.end()); - break; - case Decision::kNoChange: - VLOG(2) << "Decision is no change\n"; - new_boundaries.push_back(boundary); - break; - } - for (const Boundary& b : next_boundary) { - visitor.AddToWorkList(b); - } - } - TF_ASSIGN_OR_RETURN( - bool result, - MoveInstructionOut(conditional, to_move_out, new_boundaries)); - VLOG(2) << "moving out result:" << result << "\n"; - changed |= result; - } // handling convert rematerialization/hoisting - if (!changed && pursue_full_conditional_code_motion_) { + { std::vector conditional_ops; for (auto* comp : module->MakeComputationPostOrder()) { for (auto* instr : comp->MakeInstructionPostOrder()) { @@ -762,6 +711,7 @@ StatusOr ConditionalCodeMotion::Run(HloModule* module) { changed |= convert_result; } } + if (changed) { HloPassPipeline subpipeline( "after_conditional_code_motion_after_convert_hoisting"); @@ -771,8 +721,8 @@ StatusOr ConditionalCodeMotion::Run(HloModule* module) { TF_ASSIGN_OR_RETURN(bool cleanup_changed, subpipeline.Run(module)); changed |= cleanup_changed; } + return changed; } -} // namespace conditional_opt } // namespace xla diff --git a/tensorflow/compiler/xla/service/conditional_code_motion.h b/tensorflow/compiler/xla/service/conditional_code_motion.h index d7295058467..95f02833e15 100644 --- a/tensorflow/compiler/xla/service/conditional_code_motion.h +++ b/tensorflow/compiler/xla/service/conditional_code_motion.h @@ -23,80 +23,35 @@ limitations under the License. namespace xla { -namespace conditional_opt { -// At the conceptural level, a boundary can be thought of as representing a -// single virtual operation, except this virtual operation is conditionally -// instantiated into different concrete operations at each conditional branch. -// So a boundary is mapped to a single concrete operation if it is outside of -// conditional branches, and is mapped to a list of instructions if inside the -// branches. This data structure therefore allows a common data structure -// representation of the instructions to be moved, whether they are inside or -// outside of the branches. Subsequently, it allows a common implementation -// basis to be used for both moving instructions out of and for moving them -// inside branches. -class Boundary { - public: - enum class Position { kInsideBranch, kOutsideBranch }; - explicit Boundary(Position p) : position_(p) {} - std::vector& Operands() { return operands_; } - const std::vector& Operands() const { return operands_; } - bool IsInsideBranch() const { return position_ == Position::kInsideBranch; } - bool IsOutsideBranch() const { return position_ == Position::kOutsideBranch; } - Position GetPosition() const { return position_; } - bool IsEmpty() const { return operands_.empty(); } - std::string ToString() const { - std::string res; - for (HloInstruction* op : operands_) { - res += op->ToString() + ";"; - } - return res; - } - - private: - // Boundary instructions in the conditional branches, one from each branch - // of the conditional. - std::vector operands_; - Position position_; -}; - -// HLO pass that moves identical ops in/out of conditional. +// ConditionalCodeMotion specializes in hoisting/rematerializing +// unconditional converts in the default mode. +// When pursue_full_conditional_code_motion_ is set to true, the +// full HLO pass moves identical ops out of a conditional in addition to moving +// converts. // - The definition of identical are the shape of the operands are identical // and their properties are identical. +// - Currently, only some types of instructions is supported. +// TODO(b/154283721): relax non-sharable operand constraint and avoid copies in +// the new root. // - Only the identical ops that won't share operands with other ops will // be moved out of conditional. class ConditionalCodeMotion : public HloModulePass { public: // If is_layout_sensitive is true, then the hoist process preserves layout // during identical comparison. Otherwise, layout is ignored. - explicit ConditionalCodeMotion(bool is_layout_sensitive, - bool pursue_full_conditional_code_motion) + explicit ConditionalCodeMotion( + bool is_layout_sensitive = true, + bool pursue_full_conditional_code_motion = false) : is_layout_sensitive_(is_layout_sensitive), pursue_full_conditional_code_motion_( pursue_full_conditional_code_motion) {} absl::string_view name() const override { return "conditional-code-motion"; } StatusOr Run(HloModule* module) override; - // Optimization decision for each boundary of the conditional instruction. - enum class Decision { kMoveOutOfBranch, kMoveIntoBranch, kNoChange }; - // If the optimization decision is NO_CHANGE, new_boundary is set to nullptr; - // otherwise, it is set to the new boundary after proposed optimization. - virtual Decision ConsiderCodeMotion(HloInstruction* conditional, - const Boundary& cur_boundary, - std::vector& to_move, - std::vector& new_boundaries); - private: const bool is_layout_sensitive_; const bool pursue_full_conditional_code_motion_; - - StatusOr MoveInstructionOut(HloInstruction* conditional, - std::vector& to_move_out, - std::vector& new_boundaries); - StatusOr MoveInstructionIn(HloInstruction* conditional, - std::vector& to_move_in, - std::vector& new_boundaries); }; -} // namespace conditional_opt } // namespace xla diff --git a/tensorflow/compiler/xla/service/conditional_code_motion_test.cc b/tensorflow/compiler/xla/service/conditional_code_motion_test.cc index b3c5e17094a..38b2b515fa0 100644 --- a/tensorflow/compiler/xla/service/conditional_code_motion_test.cc +++ b/tensorflow/compiler/xla/service/conditional_code_motion_test.cc @@ -33,7 +33,7 @@ limitations under the License. #include "tensorflow/core/platform/types.h" namespace xla { -namespace conditional_opt { +namespace { using ConditionalCodeMotionTest = HloTestBase; namespace op = xla::testing::opcode_matchers; @@ -117,47 +117,6 @@ ENTRY main { EXPECT_THAT(root, AllOf(op::Tuple(op::Convert()))); } -TEST_F(ConditionalCodeMotionTest, MoveConvertOutConditional) { - absl::string_view hlo_string = - R"( -HloModule RemoveDotOpOut - -on_true { - %arg_tuple.1 = (f32[93184,4]{1,0}) parameter(0) - %get-tuple-element.1 = f32[93184,4]{1,0} get-tuple-element(%arg_tuple.1), index=0 - %reshape.8493 = f32[2,512,364]{2,1,0} reshape(f32[93184,4]{1,0} %get-tuple-element.1) - %add.8493 = f32[2,512,364]{2,1,0} add(f32[2,512,364]{2,1,0} %reshape.8493, f32[2,512,364]{2,1,0} %reshape.8493) - %convert.2894 = bf16[2,512,364]{2,1,0} convert(f32[2,512,364]{2,1,0} %add.8493) - ROOT %tuple.1 = ( bf16[2,512,364]{2,1,0}) tuple(%convert.2894) -} - -on_false { - %arg_tuple.2 = (f32[93184,4]{1,0}) parameter(0) - %get-tuple-element.3 = f32[93184,4]{1,0} get-tuple-element(%arg_tuple.2), index=0 - %reshape.9717 = f32[2,512,364]{2,1,0} reshape(f32[93184,4]{1,0} %get-tuple-element.3) - %add.8493 = f32[2,512,364]{2,1,0} add(f32[2,512,364]{2,1,0} %reshape.9717, f32[2,512,364]{2,1,0} %reshape.9717) - %sub.8493 = f32[2,512,364]{2,1,0} subtract(f32[2,512,364]{2,1,0} %add.8493, f32[2,512,364]{2,1,0} %reshape.9717) - %convert.3604 = bf16[2,512,364]{2,1,0} convert(f32[2,512,364]{2,1,0} %reshape.9717), metadata={op_type="Cast" op_name="gradients/Cast_125_grad/Cast"} - ROOT %tuple.2 = (bf16[2,512,364]{2,1,0}) tuple(%convert.3604) -} - -ENTRY main { - pred.1 = pred[] parameter(0) - arg_tuple.11 = (f32[93184,4]{1,0}) parameter(1) - arg_tuple.22 = (f32[93184,4]{1,0}) parameter(2) - conditional = (bf16[2,512,364]{2,1,0}) conditional(pred.1, arg_tuple.11, arg_tuple.22), true_computation=on_true, false_computation=on_false - get-first-index = bf16[2,512,364]{2,1,0} get-tuple-element(conditional), index=0 - ROOT result = (bf16[2,512,364]{2,1,0}) tuple(get-first-index) -} -)"; - auto module = ParseAndReturnVerifiedModule(hlo_string).ValueOrDie(); - ConditionalCodeMotion pass(true, true); - ASSERT_TRUE(pass.Run(&*module).ValueOrDie()); - - HloInstruction* root = module->entry_computation()->root_instruction(); - EXPECT_THAT(root, AllOf(op::Tuple(op::Convert()))); -} - TEST_F(ConditionalCodeMotionTest, MoveConvertOut) { absl::string_view hlo_string = R"( @@ -193,20 +152,8 @@ ENTRY main { ConditionalCodeMotion pass(true, true); ASSERT_TRUE(pass.Run(&*module).ValueOrDie()); - const HloInstruction* conditional = - FindInstruction(module.get(), "conditional"); - const HloComputation* on_true = conditional->branch_computation(0); - ASSERT_EQ(on_true->instruction_count(), 2); - const HloComputation* on_false = conditional->branch_computation(1); - ASSERT_EQ(on_false->instruction_count(), 2); - HloInstruction* root = module->entry_computation()->root_instruction(); - EXPECT_THAT( - root, - AllOf(op::Tuple(op::Add(op::Convert(op::Reshape(op::GetTupleElement( - op::GetTupleElement(op::Conditional())))), - op::Convert(op::Reshape(op::GetTupleElement( - op::GetTupleElement(op::Conditional())))))))); + EXPECT_THAT(root, AllOf(op::Tuple(op::Add(op::Convert(), op::Convert())))); } TEST_F(ConditionalCodeMotionTest, UserShareOperandCannotBeMoved) { @@ -226,7 +173,7 @@ on_true { add.2 = f32[] add(add.1, constant.2) add.3 = f32[] add(add.1, constant.3) add.4 = f32[] add(add.3, constant.5) - multiply.1 = f32[] multiply(add.4, constant.4) + multiply.1 = f32[] multiply(add.2, constant.4) ROOT tuple.6 = (f32[], f32[]) tuple(multiply.1, add.4) } @@ -269,11 +216,13 @@ ENTRY main { const HloComputation* on_false = conditional->branch_computation(1); ASSERT_EQ(on_false->instruction_count(), 9); + // Check only one add and multiply is moved out. HloInstruction* root = module->entry_computation()->root_instruction(); EXPECT_THAT( - root, AllOf(op::Tuple(op::Multiply(op::GetTupleElement(op::Conditional()), - op::Constant()), - op::GetTupleElement(op::Conditional())))); + root, + AllOf(op::Tuple( + op::Multiply(op::GetTupleElement(op::Conditional()), op::Constant()), + op::Add(op::GetTupleElement(op::Conditional()), op::Constant())))); } TEST_F(ConditionalCodeMotionTest, ConditionalRootElementChanged) { @@ -320,16 +269,16 @@ ENTRY main { const HloInstruction* conditional = FindInstruction(module.get(), "conditional"); const HloComputation* on_true = conditional->branch_computation(0); - ASSERT_EQ(on_true->instruction_count(), 1); + ASSERT_EQ(on_true->instruction_count(), 7); const HloComputation* on_false = conditional->branch_computation(1); - ASSERT_EQ(on_false->instruction_count(), 1); + ASSERT_EQ(on_false->instruction_count(), 7); - HloInstruction* root = module->entry_computation()->root_instruction(); - EXPECT_THAT( - root, - AllOf(op::Tuple(op::Add( - op::Add(op::GetTupleElement(op::Conditional()), op::Constant()), - op::Add(op::GetTupleElement(op::Conditional()), op::Constant()))))); + // add.3 in on_true will be moved out, add.1 and add.2 will be in condtional + // root. + ASSERT_TRUE(ShapeUtil::Compatible( + conditional->shape(), + ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShape(F32, {}), ShapeUtil::MakeShape(F32, {})}))); } TEST_F(ConditionalCodeMotionTest, ConditionalIsRootInstruction) { @@ -380,9 +329,24 @@ ENTRY main { )"; auto module = ParseAndReturnVerifiedModule(hlo_string).ValueOrDie(); ConditionalCodeMotion pass(true, true); - // If there is no instruction after the conditional, there is no benefit to - // move - ASSERT_FALSE(pass.Run(&*module).ValueOrDie()); + ASSERT_TRUE(pass.Run(&*module).ValueOrDie()); + + const HloInstruction* conditional = + FindInstruction(module.get(), "conditional"); + const HloComputation* on_true = conditional->branch_computation(0); + ASSERT_EQ(on_true->instruction_count(), 9); + const HloComputation* on_false = conditional->branch_computation(1); + ASSERT_EQ(on_false->instruction_count(), 9); + + // Check only one add and multiply is moved out. + // add.3 and add.5 can't be moved out because they share operands with + // other instructions. + HloInstruction* root = module->entry_computation()->root_instruction(); + EXPECT_THAT( + root, + AllOf(op::Tuple( + op::Multiply(op::GetTupleElement(op::Conditional()), op::Constant()), + op::Add(op::GetTupleElement(op::Conditional()), op::Constant())))); } TEST_F(ConditionalCodeMotionTest, LayoutMisMatchCannotMovedOut) { @@ -505,8 +469,7 @@ ENTRY main { false_computation=on_false get-first-index = f32[3,3,128,128] get-tuple-element(conditional), index=0 - add.1 = f32[3,3,128,128] add(f32[3,3,128,128] get-first-index, f32[3,3,128,128] get-first-index) - ROOT result = (f32[3,3,128,128]) tuple(add.1) + ROOT result = (f32[3,3,128,128]) tuple(get-first-index) } )"; auto module = ParseAndReturnVerifiedModule(hlo_string).ValueOrDie(); @@ -524,14 +487,10 @@ ENTRY main { conditional->shape(), ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape( BF16, {3, 3, 128, 128})}))); HloInstruction* root = module->entry_computation()->root_instruction(); - EXPECT_THAT( - root, - AllOf(op::Tuple(op::Add( - op::Convert(op::AllReduce(op::GetTupleElement(op::Conditional()))), - op::Convert( - op::AllReduce(op::GetTupleElement(op::Conditional()))))))); + EXPECT_THAT(root, AllOf(op::Tuple(op::Convert(op::AllReduce( + op::GetTupleElement(op::Conditional())))))); } -} // namespace conditional_opt +} // namespace } // namespace xla From 072225c268db1a42c4838f60711b39b22e95b834 Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Sun, 12 Jul 2020 09:00:37 -0700 Subject: [PATCH 0247/2522] Test scatter mul --- .../python/kernel_tests/resource_variable_ops_test.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index 16b6cb9c21b..e0146846d11 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -622,12 +622,15 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase, indices=[1], values=constant_op.constant([2.0], dtype=dtype)))) self.assertAllCloseAccordingToType([0.0, 2.0], self.evaluate(v)) + @parameterized.parameters(dtypes.float16, dtypes.float32, dtypes.float64) @test_util.run_in_graph_and_eager_modes - def testScatterMulVariableMethod(self): - v = resource_variable_ops.ResourceVariable([0.0, 4.0], name="mul") + def testScatterMulVariableMethod(self, dtype): + v = resource_variable_ops.ResourceVariable( + [0.0, 4.0], name="mul", dtype=dtype) self.evaluate(variables.global_variables_initializer()) self.evaluate( - v.scatter_mul(ops.IndexedSlices(indices=[1], values=[3.0]))) + v.scatter_mul(ops.IndexedSlices( + indices=[1], values=constant_op.constant([3.0], dtype=dtype)))) self.assertAllEqual([0.0, 12.0], self.evaluate(v)) @test_util.run_in_graph_and_eager_modes From f0f92f755142780fc1701ab44c59188c2cf6c338 Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Sun, 12 Jul 2020 09:46:56 -0700 Subject: [PATCH 0248/2522] Test scatter div --- .../kernel_tests/resource_variable_ops_test.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index e0146846d11..d5bf02d56b2 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -631,15 +631,18 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase, self.evaluate( v.scatter_mul(ops.IndexedSlices( indices=[1], values=constant_op.constant([3.0], dtype=dtype)))) - self.assertAllEqual([0.0, 12.0], self.evaluate(v)) + self.assertAllCloseAccordingToType([0.0, 12.0], self.evaluate(v)) + @parameterized.parameters(dtypes.float16, dtypes.float32, dtypes.float64) @test_util.run_in_graph_and_eager_modes - def testScatterDivVariableMethod(self): - v = resource_variable_ops.ResourceVariable([0.0, 6.0], name="div") + def testScatterDivVariableMethod(self, dtype): + v = resource_variable_ops.ResourceVariable( + [0.0, 6.0], name="div", dtype=dtype) self.evaluate(variables.global_variables_initializer()) self.evaluate( - v.scatter_div(ops.IndexedSlices(indices=[1], values=[2.0]))) - self.assertAllEqual([0.0, 3.0], self.evaluate(v)) + v.scatter_div(ops.IndexedSlices( + indices=[1], values=constant_op.constant([2.0], dtype=dtype)))) + self.assertAllCloseAccordingToType([0.0, 3.0], self.evaluate(v)) @test_util.run_in_graph_and_eager_modes def testScatterUpdateVariableMethod(self): From 6de1348dbb60ecfcede5283efb2faf253f08e4de Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Sun, 12 Jul 2020 10:04:59 -0700 Subject: [PATCH 0249/2522] Test scatter update --- .../python/kernel_tests/resource_variable_ops_test.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index d5bf02d56b2..b5409ac663f 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -644,13 +644,16 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase, indices=[1], values=constant_op.constant([2.0], dtype=dtype)))) self.assertAllCloseAccordingToType([0.0, 3.0], self.evaluate(v)) + @parameterized.parameters(dtypes.float16, dtypes.float32, dtypes.float64) @test_util.run_in_graph_and_eager_modes - def testScatterUpdateVariableMethod(self): - v = resource_variable_ops.ResourceVariable([0.0, 6.0], name="update") + def testScatterUpdateVariableMethod(self, dtype): + v = resource_variable_ops.ResourceVariable( + [0.0, 6.0], name="update", dtype=dtype) self.evaluate(variables.global_variables_initializer()) self.evaluate( - v.scatter_update(ops.IndexedSlices(indices=[1], values=[3.0]))) - self.assertAllEqual([0.0, 3.0], self.evaluate(v)) + v.scatter_update(ops.IndexedSlices( + indices=[1], values=constant_op.constant([3.0], dtype=dtype)))) + self.assertAllCloseAccordingToType([0.0, 3.0], self.evaluate(v)) @test_util.run_deprecated_v1 def testScatterUpdateString(self): From 585de3969452255a1f8cd6d295b87a2e2d8298e9 Mon Sep 17 00:00:00 2001 From: bubblebooy Date: Sun, 12 Jul 2020 13:42:02 -0500 Subject: [PATCH 0250/2522] dense attention --- tensorflow/python/keras/layers/dense_attention.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/keras/layers/dense_attention.py b/tensorflow/python/keras/layers/dense_attention.py index fa9aea02372..705e5e75732 100644 --- a/tensorflow/python/keras/layers/dense_attention.py +++ b/tensorflow/python/keras/layers/dense_attention.py @@ -192,10 +192,10 @@ class BaseDenseAttention(Layer): raise ValueError( '{} layer mask must be a list, ' 'namely [query_mask, value_mask].'.format(class_name)) - if len(mask) != 2: + if len(mask) < 2 or len(mask) > 3: raise ValueError( - '{} layer mask must be a list of length 2, namely [query_mask, ' - 'value_mask]. Given length: {}'.format(class_name, len(mask))) + '{} layer mask must be a list of length 2 or 3, namely [query_mask, ' + 'value_mask] or [query_mask, value_mask, key_mask]. Given length: {}'.format(class_name, len(mask))) def get_config(self): config = { From d91193d641799988ba8f1c06148cf394b8d5157f Mon Sep 17 00:00:00 2001 From: Prakalp Srivastava Date: Sun, 12 Jul 2020 11:51:38 -0700 Subject: [PATCH 0251/2522] Increase constant fold policy size threshold. To accommodate some of the failing tests, increase the size threshold of constant fold policy from 128 KB to 256 KB. PiperOrigin-RevId: 320853763 Change-Id: I68f905a0236cc0923c28bd9bf182f35beb72f36b --- tensorflow/compiler/mlir/tensorflow/tests/constant-fold.mlir | 2 +- tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/constant-fold.mlir b/tensorflow/compiler/mlir/tensorflow/tests/constant-fold.mlir index 7b8c998bcf1..b86815dbe57 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/constant-fold.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/constant-fold.mlir @@ -443,7 +443,7 @@ func @DontRemoveTrivialMul(%arg0: tensor<1x6x8x1xf32>) -> tensor<1x6x8x1xf32> { // CHECK: return %[[RESULT]] : tensor<1x6x8x1xf32> } -// Do not fold if total result size is large (>128 KB) and more than 2 times +// Do not fold if total result size is large (>256 KB) and more than 2 times // the size of operands. // LINT.IfChange(folding-policy-test) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.cc b/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.cc index 007baaae433..1429e2b3fd4 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.cc @@ -40,7 +40,7 @@ namespace TF { // LINT.IfChange(folding-policy) static bool ShouldBeFolded(Operation* inst) { constexpr int kSizeFactor = 2; - constexpr int64_t kSizeThreshold = (1 << 20); // 128 KB + constexpr int64_t kSizeThreshold = (1 << 21); // 256 KB bool has_unknown_shape = false; auto get_size = [&](TypeRange types) { int64_t size = 0; From 7b14d48d8af2e063e9d5fbd309fa9f7c18b624c9 Mon Sep 17 00:00:00 2001 From: Taehee Jeong Date: Sun, 12 Jul 2020 19:16:08 -0700 Subject: [PATCH 0252/2522] Update README regarding Metal delegate instructions. PiperOrigin-RevId: 320880762 Change-Id: I6b36930f9b946b1f798c335326af182be49fedb2 --- tensorflow/lite/delegates/gpu/README.md | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/README.md b/tensorflow/lite/delegates/gpu/README.md index 552e1cdbec6..c37ee90b704 100644 --- a/tensorflow/lite/delegates/gpu/README.md +++ b/tensorflow/lite/delegates/gpu/README.md @@ -51,6 +51,10 @@ TFLite on GPU supports the following ops in 16-bit and 32-bit float precision: ## Basic Usage +**Note:** Following section describes the example usage for Android GPU delegate +with C++. For other languages and platforms, please see +[the documentation](https://www.tensorflow.org/lite/performance/gpu). + Using TFLite on GPU is as simple as getting the GPU delegate via `TfLiteGpuDelegateV2Create()` and then passing it to `Interpreter::ModifyGraphWithDelegate()` instead of calling @@ -99,13 +103,13 @@ Metal shaders are used for iOS, which were introduced with iOS 8. Thus, compilation flags should look like: ```sh -bazel build --config ios_arm64 //path/to/your:project +bazel build --config ios_fat //path/to/your:project ``` ## Advanced Usage: Delegate Options There are GPU options that can be set and passed on to -`TfLiteGpuDelegateCreate()`. When option is set to `nullptr` as shown in the +`TfLiteGpuDelegateV2Create()`. When option is set to `nullptr` as shown in the Basic Usage, it translates to: ```c++ @@ -113,12 +117,13 @@ const TfLiteGpuDelegateOptionsV2 kDefaultOptions = TfLiteGpuDelegateOptionsV2Default(); ``` -Similar for `NewTfLiteMetalDelegate()`: +Similar for `TFLGpuDelegateCreate()`: ```c++ -const TfLiteMetalDelegateOptions kDefaultOptions = { - .precision_loss_allowed = 0, // false - .wait_type = TFLITE_METAL_WAIT_TYPE_SLEEP, +const TFLGpuDelegateOptions kDefaultOptions = { + .allow_precision_loss = false, + .wait_type = TFLGpuDelegateWaitTypePassive, + .enable_quantization = false, }; ``` @@ -126,9 +131,10 @@ While it is convenient to just supply `nullptr`, it is recommended to explicitly set the options to avoid any unexpected artifacts in case default values are changed. -*IMPORTANT:* Note that the default option does not allow precision loss, and -thus may not be the fastest. For faster execution, you may want to set -`precision_loss_allowed` to `1` for FP16 execution. +*IMPORTANT:* Note that the default option may not be the fastest. For faster +execution, you may want to set `allow_precision_loss` to `true` so that the GPU +performs FP16 calculation internally, and set `wait_type` to +`TFLGpuDelegateWaitTypeAggressive` to avoid GPU sleep mode. ## Tips and Tricks From 7ae0db7a0522afd1a8e9eb0d6c64859b42b3b348 Mon Sep 17 00:00:00 2001 From: Chao Mei Date: Sun, 12 Jul 2020 20:22:44 -0700 Subject: [PATCH 0253/2522] Fixed the main coco object detection eval library to use the same delegate creation provider. PiperOrigin-RevId: 320884835 Change-Id: Id4aeb7838d1941382488bc9db33e83530f449280 --- .../tools/evaluation/tasks/coco_object_detection/run_eval.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/lite/tools/evaluation/tasks/coco_object_detection/run_eval.cc b/tensorflow/lite/tools/evaluation/tasks/coco_object_detection/run_eval.cc index 73491457f38..44a6ca36c98 100644 --- a/tensorflow/lite/tools/evaluation/tasks/coco_object_detection/run_eval.cc +++ b/tensorflow/lite/tools/evaluation/tasks/coco_object_detection/run_eval.cc @@ -67,7 +67,6 @@ class CocoObjectDetection : public TaskExecutor { bool debug_mode_; std::string delegate_; int num_interpreter_threads_; - DelegateProviders delegate_providers_; }; std::vector CocoObjectDetection::GetFlags() { From 525bfce301eb15d2903ee215e9da641e4d5f918d Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Sun, 12 Jul 2020 20:28:00 -0700 Subject: [PATCH 0254/2522] Add RaggedTensorToTensor to flex delegate The ragged_to_dense_util should be put in core/util instead of core/ops. Then it can be added to mobile_srcs_only_runtime. PiperOrigin-RevId: 320885158 Change-Id: I1083c2ac57e87b4e29a6e5fef2998d7b7a951133 --- tensorflow/core/BUILD | 31 +------------------ tensorflow/core/kernels/BUILD | 3 +- .../kernels/ragged_tensor_to_tensor_op.cc | 2 +- tensorflow/core/ops/ragged_conversion_ops.cc | 2 +- tensorflow/core/util/BUILD | 31 +++++++++++++++++++ .../{ops => util}/ragged_to_dense_util.cc | 2 +- .../core/{ops => util}/ragged_to_dense_util.h | 0 .../ragged_to_dense_util_test.cc | 3 +- .../delegates/flex/allowlisted_flex_ops.cc | 1 + 9 files changed, 39 insertions(+), 36 deletions(-) rename tensorflow/core/{ops => util}/ragged_to_dense_util.cc (99%) rename tensorflow/core/{ops => util}/ragged_to_dense_util.h (100%) rename tensorflow/core/{ops => util}/ragged_to_dense_util_test.cc (99%) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 18341a81df4..9c6fd9f3632 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -798,36 +798,7 @@ tf_gen_op_libs( "ragged_conversion_ops", "ragged_math_ops", ], - deps = [":ragged_to_dense_util"], -) - -cc_library( - name = "ragged_to_dense_util", - srcs = [ - "ops/ragged_to_dense_util.cc", - ], - hdrs = [ - "ops/ragged_to_dense_util.h", - ], - deps = [ - ":framework", - ":protos_all_cc", - ], -) - -tf_cc_test( - name = "ragged_to_dense_util_test", - srcs = [ - "ops/ragged_to_dense_util_test.cc", - ], - deps = [ - ":framework", - ":protos_all_cc", - ":ragged_to_dense_util", - ":test", - ":testlib", - "@com_google_googletest//:gtest_main", - ], + deps = ["//tensorflow/core/util:ragged_to_dense_util"], ) cc_library( diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 9ace481a991..b4129e05f91 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -1508,7 +1508,7 @@ tf_kernel_library( "//tensorflow/core:framework_lite", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", - "//tensorflow/core:ragged_to_dense_util", + "//tensorflow/core/util:ragged_to_dense_util", ], ) @@ -6909,6 +6909,7 @@ filegroup( "queue_op.cc", "queue_ops.cc", "ragged_range_op.cc", + "ragged_tensor_to_tensor_op.cc", "random_op.cc", "random_op_cpu.h", "random_poisson_op.cc", diff --git a/tensorflow/core/kernels/ragged_tensor_to_tensor_op.cc b/tensorflow/core/kernels/ragged_tensor_to_tensor_op.cc index 9ae5d7ffbdc..88931292ef2 100644 --- a/tensorflow/core/kernels/ragged_tensor_to_tensor_op.cc +++ b/tensorflow/core/kernels/ragged_tensor_to_tensor_op.cc @@ -37,9 +37,9 @@ limitations under the License. #include "tensorflow/core/lib/bfloat16/bfloat16.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/ops/ragged_to_dense_util.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/bcast.h" +#include "tensorflow/core/util/ragged_to_dense_util.h" namespace tensorflow { diff --git a/tensorflow/core/ops/ragged_conversion_ops.cc b/tensorflow/core/ops/ragged_conversion_ops.cc index 6bee189c85e..44712bf7739 100644 --- a/tensorflow/core/ops/ragged_conversion_ops.cc +++ b/tensorflow/core/ops/ragged_conversion_ops.cc @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/core/framework/common_shape_fns.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/shape_inference.h" -#include "tensorflow/core/ops/ragged_to_dense_util.h" +#include "tensorflow/core/util/ragged_to_dense_util.h" namespace tensorflow { diff --git a/tensorflow/core/util/BUILD b/tensorflow/core/util/BUILD index 8e878c2464d..962beb55e05 100644 --- a/tensorflow/core/util/BUILD +++ b/tensorflow/core/util/BUILD @@ -93,6 +93,8 @@ filegroup( "port.h", "presized_cuckoo_map.h", "ptr_util.h", + "ragged_to_dense_util.cc", + "ragged_to_dense_util.h", "reffed_status_callback.h", "saved_tensor_slice_util.cc", "saved_tensor_slice_util.h", @@ -368,6 +370,35 @@ cc_library( ], ) +cc_library( + name = "ragged_to_dense_util", + srcs = [ + "ragged_to_dense_util.cc", + ], + hdrs = [ + "ragged_to_dense_util.h", + ], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + ], +) + +tf_cc_test( + name = "ragged_to_dense_util_test", + srcs = [ + "ragged_to_dense_util_test.cc", + ], + deps = [ + ":ragged_to_dense_util", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:testlib", + "@com_google_googletest//:gtest_main", + ], +) + tf_cc_test( name = "stats_calculator_test", srcs = ["stats_calculator_test.cc"], diff --git a/tensorflow/core/ops/ragged_to_dense_util.cc b/tensorflow/core/util/ragged_to_dense_util.cc similarity index 99% rename from tensorflow/core/ops/ragged_to_dense_util.cc rename to tensorflow/core/util/ragged_to_dense_util.cc index ecb95e163ab..cd95b5ec75b 100644 --- a/tensorflow/core/ops/ragged_to_dense_util.cc +++ b/tensorflow/core/util/ragged_to_dense_util.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/ops/ragged_to_dense_util.h" +#include "tensorflow/core/util/ragged_to_dense_util.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/shape_inference.h" diff --git a/tensorflow/core/ops/ragged_to_dense_util.h b/tensorflow/core/util/ragged_to_dense_util.h similarity index 100% rename from tensorflow/core/ops/ragged_to_dense_util.h rename to tensorflow/core/util/ragged_to_dense_util.h diff --git a/tensorflow/core/ops/ragged_to_dense_util_test.cc b/tensorflow/core/util/ragged_to_dense_util_test.cc similarity index 99% rename from tensorflow/core/ops/ragged_to_dense_util_test.cc rename to tensorflow/core/util/ragged_to_dense_util_test.cc index d3d9e68ae2e..7b8f2c4d3b2 100644 --- a/tensorflow/core/ops/ragged_to_dense_util_test.cc +++ b/tensorflow/core/util/ragged_to_dense_util_test.cc @@ -13,10 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/ops/ragged_to_dense_util.h" +#include "tensorflow/core/util/ragged_to_dense_util.h" #include -#include #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/shape_inference.h" #include "tensorflow/core/framework/tensor_shape.h" diff --git a/tensorflow/lite/delegates/flex/allowlisted_flex_ops.cc b/tensorflow/lite/delegates/flex/allowlisted_flex_ops.cc index 885601e5333..fa29d81371d 100644 --- a/tensorflow/lite/delegates/flex/allowlisted_flex_ops.cc +++ b/tensorflow/lite/delegates/flex/allowlisted_flex_ops.cc @@ -299,6 +299,7 @@ bool IsAllowlistedFlexOp(const std::string& tensorflow_op_name) { "RFFT2D", "RFFT3D", "RaggedRange", + "RaggedTensorToTensor", "RandomGamma", "RandomStandardNormal", "RandomUniform", From 8effbbb15d7b3d3803b5bb450371e2d15cabcfee Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 12 Jul 2020 21:57:05 -0700 Subject: [PATCH 0255/2522] Use tf._implements to fuse ops instead of tf.api_implement PiperOrigin-RevId: 320894161 Change-Id: I76fced848cbbce7d994979af4b1ffa4daf10f154 --- tensorflow/compiler/mlir/lite/BUILD | 2 + .../compiler/mlir/lite/tests/fuse-tftext.mlir | 13 ++--- .../prepare_composite_functions_tf.cc | 50 +++++++++++++------ .../compiler/mlir/lite/utils/tftext_utils.cc | 21 ++++---- .../compiler/mlir/lite/utils/tftext_utils.h | 8 ++- .../mlir/lite/utils/tftext_utils_test.cc | 4 +- 6 files changed, 65 insertions(+), 33 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD index 8d0c204f434..56dd0854ee8 100644 --- a/tensorflow/compiler/mlir/lite/BUILD +++ b/tensorflow/compiler/mlir/lite/BUILD @@ -273,6 +273,7 @@ cc_library( deps = [ ":tensorflow_lite", "//tensorflow/compiler/mlir/tensorflow", + "//tensorflow/compiler/mlir/tensorflow:tensorflow_attributes", "//tensorflow/core:framework", "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", @@ -360,6 +361,7 @@ cc_library( "//tensorflow/compiler/mlir/tensorflow", "//tensorflow/compiler/mlir/tensorflow:convert_tensor", "//tensorflow/compiler/mlir/tensorflow:mangling_util", + "//tensorflow/compiler/mlir/tensorflow:tensorflow_attributes", "//tensorflow/compiler/mlir/tensorflow:tensorflow_types", "//tensorflow/compiler/mlir/tensorflow:unroll_batch_matmul_pass", "//tensorflow/compiler/xla:status", diff --git a/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir b/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir index 1a3ed0509c4..97c0c7358ca 100644 --- a/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir +++ b/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir @@ -1,7 +1,7 @@ // RUN: tf-opt -tfl-prepare-composite-funcs-tf -tfl-fuse-tftext=true %s -split-input-file | FileCheck %s module { - func @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {tf._input_shapes = [#tf.shape<1>], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { + func @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<1>], tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf.signature.is_stateful} { %0 = "tf.Const"() {value = dense<[0, 1]> : tensor<2xi64>} : () -> tensor<2xi64> %1 = "tf.Const"() {value = dense<[]> : tensor<0xi64>} : () -> tensor<0xi64> %2 = "tf.Const"() {value = dense : tensor} : () -> tensor @@ -1027,11 +1027,11 @@ module { return %1 : tensor } - // CHECK: func @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {tf._input_shapes = [#tf.shape<1>], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { + // CHECK: func @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf._input_shapes = [#tf.shape<1>], tf.signature.is_stateful} { // CHECK: %0:2 = "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor<1x!tf.string>) -> (tensor, tensor) // CHECK: return %0#0, %0#1 : tensor, tensor - func @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {tf._input_shapes = [#tf.shape], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { + func @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape], tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf.signature.is_stateful} { %0 = "tf.Const"() {value = dense<[]> : tensor<0xi64>} : () -> tensor<0xi64> %1 = "tf.Const"() {value = dense : tensor} : () -> tensor %2 = "tf.Const"() {value = dense<-1> : tensor} : () -> tensor @@ -2160,11 +2160,12 @@ module { } - // CHECK: func @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {tf._input_shapes = [#tf.shape], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { + + // CHECK: func @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf._input_shapes = [#tf.shape], tf.signature.is_stateful} { // CHECK: %0:3 = "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor) -> (tensor, tensor, tensor) // CHECK: return %0#0, %0#1, %0#2 : tensor, tensor, tensor - func @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {tf._input_shapes = [#tf.shape<>], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { + func @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>], tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf.signature.is_stateful} { %0 = "tf.Const"() {value = dense<[0, 1]> : tensor<2xi64>} : () -> tensor<2xi64> %1 = "tf.Const"() {value = dense<[]> : tensor<0xi64>} : () -> tensor<0xi64> %2 = "tf.Const"() {value = dense : tensor} : () -> tensor @@ -3190,7 +3191,7 @@ module { return %1 : tensor } - // CHECK: func @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {tf._input_shapes = [#tf.shape<>], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { + // CHECK: func @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf._input_shapes = [#tf.shape<>], tf.signature.is_stateful} { // CHECK: %0 = "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor) -> tensor // CHECK: return %0 : tensor } diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc index 3d2ab662e6f..baedb783d83 100644 --- a/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc @@ -42,6 +42,7 @@ limitations under the License. #include "tensorflow/compiler/mlir/lite/transforms/passes.h" #include "tensorflow/compiler/mlir/lite/utils/lstm_utils.h" #include "tensorflow/compiler/mlir/lite/utils/tftext_utils.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_attributes.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" // The cmd line flag to turn on/off Tf.Text API fusion. @@ -56,9 +57,11 @@ namespace TFL { namespace { constexpr char kTFAPIImplements[] = "tf.api_implements"; -constexpr char kTfTextAPIPRefix[] = "tftext:"; +constexpr char kTFTextAPIPrefix[] = "tftext:"; constexpr char kTfNMSPadded[] = "non_max_suppression_padded_v2"; +using mlir::TF::FuncAttr; + // Abstracts the conversion of the embedded lookup composite function. class ConvertEmbeddedLookupFunc { public: @@ -158,12 +161,18 @@ class PrepareCompositeFunctionsPass : public PassWrapper> { public: - explicit PrepareCompositeFunctionsPass() {} + explicit PrepareCompositeFunctionsPass() { + enable_fuse_tftext_ = fuse_tftext_flag || + IsTFTextRegistered(tensorflow::OpRegistry::Global()); + } private: + // TODO(b/160915525): Consolidate FuncAttr and StringAttr into one. void ConvertTFImplements(FuncOp func, StringAttr attr); + void ConvertTFImplementsWithAttributes(FuncOp func, FuncAttr attr); void ConvertTFAPIImplements(FuncOp func, StringAttr attr, ModuleOp module); void runOnOperation() override; + bool enable_fuse_tftext_; }; void PrepareCompositeFunctionsPass::ConvertTFImplements(FuncOp func, @@ -204,6 +213,18 @@ void PrepareCompositeFunctionsPass::ConvertTFImplements(FuncOp func, } } +void PrepareCompositeFunctionsPass::ConvertTFImplementsWithAttributes( + FuncOp func, FuncAttr attr) { + if (enable_fuse_tftext_) { + auto api_name = attr.GetName().getLeafReference(); + if (api_name.startswith(kTFTextAPIPrefix)) { + if (failed(ConvertTFTextAPI(func, api_name, attr))) { + return signalPassFailure(); + } + } + } +} + LogicalResult CheckOutputConsumer( Operation* call_op, int expected_num_outputs, llvm::DenseSet expected_consumer_indices) { @@ -256,26 +277,27 @@ void PrepareCompositeFunctionsPass::ConvertTFAPIImplements(FuncOp func, OpBuilder builder(func.getBody()); if (failed(ConvertKerasLSTMLayer(func, &builder))) return signalPassFailure(); - } else if (fuse_tftext_flag || - IsTfTextRegistered(tensorflow::OpRegistry::Global())) { - if (attr.getValue().startswith(kTfTextAPIPRefix)) { - if (failed(ConvertTFTextAPI(func, attr.getValue()))) { - return signalPassFailure(); - } - } } } void PrepareCompositeFunctionsPass::runOnOperation() { auto module = getOperation(); for (auto func : module.getOps()) { - // We have two kinds of implements: - // 1) tf._implements. - // 2) tf.api_implements. + // We have three kinds of implements: + // 1) tf._implements, with string attributes. + // 2) tf._implements, with proto attributes. + // 3) tf.api_implements. // We need to handle them separately. - auto tf_implements_attr = func.getAttrOfType(kTFImplements); + auto tf_implements_attr_str = func.getAttrOfType(kTFImplements); + if (tf_implements_attr_str) { + ConvertTFImplements(func, tf_implements_attr_str); + continue; + } + + auto tf_implements_attr = func.getAttrOfType(kTFImplements); if (tf_implements_attr) { - ConvertTFImplements(func, tf_implements_attr); + ConvertTFImplementsWithAttributes(func, tf_implements_attr); + continue; } auto tf_api_implements_attr = diff --git a/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc b/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc index 2ed0891dc59..1681f654b92 100644 --- a/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc +++ b/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc @@ -44,7 +44,9 @@ namespace TFL { namespace { constexpr char kWhitespaceTokenizer[] = "tftext:WhitespaceTokenizer"; -constexpr char kTFAPIImplements[] = "tf.api_implements"; +constexpr char kTFImplements[] = "tf._implements"; + +using mlir::TF::FuncAttr; inline OpaqueElementsAttr emptyCustomOption(OpBuilder* builder) { std::string content = ""; @@ -121,11 +123,11 @@ LogicalResult VerifyWhitespaceTokenizer(mlir::FuncOp func) { return success(); } -LogicalResult ConvertWhitespaceTokenizer(mlir::FuncOp func, - llvm::StringRef api) { +LogicalResult ConvertWhitespaceTokenizer(mlir::FuncOp func, llvm::StringRef api, + FuncAttr attr) { func.eraseBody(); func.addEntryBlock(); - func.setAttr(kTFAPIImplements, StringAttr::get(api, func.getContext())); + func.setAttr(kTFImplements, attr); Value text = func.getArgument(0); OpBuilder builder(func.getBody()); @@ -137,20 +139,21 @@ LogicalResult ConvertWhitespaceTokenizer(mlir::FuncOp func, } } // namespace -LogicalResult ConvertTFTextAPI(mlir::FuncOp func, llvm::StringRef api) { +LogicalResult ConvertTFTextAPI(mlir::FuncOp func, llvm::StringRef api, + FuncAttr attr) { if (api.str() == kWhitespaceTokenizer) { if (succeeded(VerifyWhitespaceTokenizer(func))) { - return ConvertWhitespaceTokenizer(func, api); + return ConvertWhitespaceTokenizer(func, api, attr); } } return failure(); } -bool IsTfTextRegistered(const tensorflow::OpRegistry* op_registery) { - const std::vector kTfTextOps = { +bool IsTFTextRegistered(const tensorflow::OpRegistry* op_registery) { + const std::vector kTFTextOps = { "WhitespaceTokenizeWithOffsets", }; - for (const auto& iter : kTfTextOps) { + for (const auto& iter : kTFTextOps) { if (op_registery->LookUp(iter)) { return true; } diff --git a/tensorflow/compiler/mlir/lite/utils/tftext_utils.h b/tensorflow/compiler/mlir/lite/utils/tftext_utils.h index c52ee019d8d..55e4680c3dd 100644 --- a/tensorflow/compiler/mlir/lite/utils/tftext_utils.h +++ b/tensorflow/compiler/mlir/lite/utils/tftext_utils.h @@ -27,14 +27,18 @@ limitations under the License. #include "mlir/IR/Value.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_attributes.h" #include "tensorflow/core/framework/op.h" namespace mlir { namespace TFL { -LogicalResult ConvertTFTextAPI(mlir::FuncOp func, llvm::StringRef api); +// Fuse TF.Text APIs annotated by tf.function to a TFLite custom op. +LogicalResult ConvertTFTextAPI(mlir::FuncOp func, llvm::StringRef api, + mlir::TF::FuncAttr attr); -bool IsTfTextRegistered(const tensorflow::OpRegistry* op_registery); +// Check if TF.Text Tensorflow ops are registered. +bool IsTFTextRegistered(const tensorflow::OpRegistry* op_registery); } // end namespace TFL } // end namespace mlir diff --git a/tensorflow/compiler/mlir/lite/utils/tftext_utils_test.cc b/tensorflow/compiler/mlir/lite/utils/tftext_utils_test.cc index 7d29264aaae..9bcfa89c544 100644 --- a/tensorflow/compiler/mlir/lite/utils/tftext_utils_test.cc +++ b/tensorflow/compiler/mlir/lite/utils/tftext_utils_test.cc @@ -41,13 +41,13 @@ void Register(const std::string& op_name, OpRegistry* registry) { TEST(TfTextUtilsTest, TestTfTextRegistered) { std::unique_ptr registry(new OpRegistry); Register("WhitespaceTokenizeWithOffsets", registry.get()); - EXPECT_TRUE(IsTfTextRegistered(registry.get())); + EXPECT_TRUE(IsTFTextRegistered(registry.get())); } TEST(TfTextUtilsTest, TestTfTextNotRegistered) { std::unique_ptr registry(new OpRegistry); Register("Test", registry.get()); - EXPECT_FALSE(IsTfTextRegistered(registry.get())); + EXPECT_FALSE(IsTFTextRegistered(registry.get())); } } // namespace TFL } // namespace mlir From fba773beadbd9459a556c2c9dbb3c7b60f13b9d2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 12 Jul 2020 22:21:09 -0700 Subject: [PATCH 0256/2522] Use tf._implements to fuse ops instead of tf.api_implement PiperOrigin-RevId: 320896230 Change-Id: I42b1c12331eb82eef2f83fcf9ce558daf6c75e72 --- tensorflow/compiler/mlir/lite/BUILD | 2 - .../compiler/mlir/lite/tests/fuse-tftext.mlir | 13 +++-- .../prepare_composite_functions_tf.cc | 50 ++++++------------- .../compiler/mlir/lite/utils/tftext_utils.cc | 21 ++++---- .../compiler/mlir/lite/utils/tftext_utils.h | 8 +-- .../mlir/lite/utils/tftext_utils_test.cc | 4 +- 6 files changed, 33 insertions(+), 65 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD index 56dd0854ee8..8d0c204f434 100644 --- a/tensorflow/compiler/mlir/lite/BUILD +++ b/tensorflow/compiler/mlir/lite/BUILD @@ -273,7 +273,6 @@ cc_library( deps = [ ":tensorflow_lite", "//tensorflow/compiler/mlir/tensorflow", - "//tensorflow/compiler/mlir/tensorflow:tensorflow_attributes", "//tensorflow/core:framework", "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", @@ -361,7 +360,6 @@ cc_library( "//tensorflow/compiler/mlir/tensorflow", "//tensorflow/compiler/mlir/tensorflow:convert_tensor", "//tensorflow/compiler/mlir/tensorflow:mangling_util", - "//tensorflow/compiler/mlir/tensorflow:tensorflow_attributes", "//tensorflow/compiler/mlir/tensorflow:tensorflow_types", "//tensorflow/compiler/mlir/tensorflow:unroll_batch_matmul_pass", "//tensorflow/compiler/xla:status", diff --git a/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir b/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir index 97c0c7358ca..1a3ed0509c4 100644 --- a/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir +++ b/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir @@ -1,7 +1,7 @@ // RUN: tf-opt -tfl-prepare-composite-funcs-tf -tfl-fuse-tftext=true %s -split-input-file | FileCheck %s module { - func @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<1>], tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf.signature.is_stateful} { + func @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {tf._input_shapes = [#tf.shape<1>], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { %0 = "tf.Const"() {value = dense<[0, 1]> : tensor<2xi64>} : () -> tensor<2xi64> %1 = "tf.Const"() {value = dense<[]> : tensor<0xi64>} : () -> tensor<0xi64> %2 = "tf.Const"() {value = dense : tensor} : () -> tensor @@ -1027,11 +1027,11 @@ module { return %1 : tensor } - // CHECK: func @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf._input_shapes = [#tf.shape<1>], tf.signature.is_stateful} { + // CHECK: func @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {tf._input_shapes = [#tf.shape<1>], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { // CHECK: %0:2 = "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor<1x!tf.string>) -> (tensor, tensor) // CHECK: return %0#0, %0#1 : tensor, tensor - func @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape], tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf.signature.is_stateful} { + func @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {tf._input_shapes = [#tf.shape], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { %0 = "tf.Const"() {value = dense<[]> : tensor<0xi64>} : () -> tensor<0xi64> %1 = "tf.Const"() {value = dense : tensor} : () -> tensor %2 = "tf.Const"() {value = dense<-1> : tensor} : () -> tensor @@ -2160,12 +2160,11 @@ module { } - - // CHECK: func @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf._input_shapes = [#tf.shape], tf.signature.is_stateful} { + // CHECK: func @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {tf._input_shapes = [#tf.shape], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { // CHECK: %0:3 = "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor) -> (tensor, tensor, tensor) // CHECK: return %0#0, %0#1, %0#2 : tensor, tensor, tensor - func @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>], tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf.signature.is_stateful} { + func @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {tf._input_shapes = [#tf.shape<>], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { %0 = "tf.Const"() {value = dense<[0, 1]> : tensor<2xi64>} : () -> tensor<2xi64> %1 = "tf.Const"() {value = dense<[]> : tensor<0xi64>} : () -> tensor<0xi64> %2 = "tf.Const"() {value = dense : tensor} : () -> tensor @@ -3191,7 +3190,7 @@ module { return %1 : tensor } - // CHECK: func @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf._input_shapes = [#tf.shape<>], tf.signature.is_stateful} { + // CHECK: func @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {tf._input_shapes = [#tf.shape<>], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { // CHECK: %0 = "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor) -> tensor // CHECK: return %0 : tensor } diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc index baedb783d83..3d2ab662e6f 100644 --- a/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc @@ -42,7 +42,6 @@ limitations under the License. #include "tensorflow/compiler/mlir/lite/transforms/passes.h" #include "tensorflow/compiler/mlir/lite/utils/lstm_utils.h" #include "tensorflow/compiler/mlir/lite/utils/tftext_utils.h" -#include "tensorflow/compiler/mlir/tensorflow/ir/tf_attributes.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" // The cmd line flag to turn on/off Tf.Text API fusion. @@ -57,11 +56,9 @@ namespace TFL { namespace { constexpr char kTFAPIImplements[] = "tf.api_implements"; -constexpr char kTFTextAPIPrefix[] = "tftext:"; +constexpr char kTfTextAPIPRefix[] = "tftext:"; constexpr char kTfNMSPadded[] = "non_max_suppression_padded_v2"; -using mlir::TF::FuncAttr; - // Abstracts the conversion of the embedded lookup composite function. class ConvertEmbeddedLookupFunc { public: @@ -161,18 +158,12 @@ class PrepareCompositeFunctionsPass : public PassWrapper> { public: - explicit PrepareCompositeFunctionsPass() { - enable_fuse_tftext_ = fuse_tftext_flag || - IsTFTextRegistered(tensorflow::OpRegistry::Global()); - } + explicit PrepareCompositeFunctionsPass() {} private: - // TODO(b/160915525): Consolidate FuncAttr and StringAttr into one. void ConvertTFImplements(FuncOp func, StringAttr attr); - void ConvertTFImplementsWithAttributes(FuncOp func, FuncAttr attr); void ConvertTFAPIImplements(FuncOp func, StringAttr attr, ModuleOp module); void runOnOperation() override; - bool enable_fuse_tftext_; }; void PrepareCompositeFunctionsPass::ConvertTFImplements(FuncOp func, @@ -213,18 +204,6 @@ void PrepareCompositeFunctionsPass::ConvertTFImplements(FuncOp func, } } -void PrepareCompositeFunctionsPass::ConvertTFImplementsWithAttributes( - FuncOp func, FuncAttr attr) { - if (enable_fuse_tftext_) { - auto api_name = attr.GetName().getLeafReference(); - if (api_name.startswith(kTFTextAPIPrefix)) { - if (failed(ConvertTFTextAPI(func, api_name, attr))) { - return signalPassFailure(); - } - } - } -} - LogicalResult CheckOutputConsumer( Operation* call_op, int expected_num_outputs, llvm::DenseSet expected_consumer_indices) { @@ -277,27 +256,26 @@ void PrepareCompositeFunctionsPass::ConvertTFAPIImplements(FuncOp func, OpBuilder builder(func.getBody()); if (failed(ConvertKerasLSTMLayer(func, &builder))) return signalPassFailure(); + } else if (fuse_tftext_flag || + IsTfTextRegistered(tensorflow::OpRegistry::Global())) { + if (attr.getValue().startswith(kTfTextAPIPRefix)) { + if (failed(ConvertTFTextAPI(func, attr.getValue()))) { + return signalPassFailure(); + } + } } } void PrepareCompositeFunctionsPass::runOnOperation() { auto module = getOperation(); for (auto func : module.getOps()) { - // We have three kinds of implements: - // 1) tf._implements, with string attributes. - // 2) tf._implements, with proto attributes. - // 3) tf.api_implements. + // We have two kinds of implements: + // 1) tf._implements. + // 2) tf.api_implements. // We need to handle them separately. - auto tf_implements_attr_str = func.getAttrOfType(kTFImplements); - if (tf_implements_attr_str) { - ConvertTFImplements(func, tf_implements_attr_str); - continue; - } - - auto tf_implements_attr = func.getAttrOfType(kTFImplements); + auto tf_implements_attr = func.getAttrOfType(kTFImplements); if (tf_implements_attr) { - ConvertTFImplementsWithAttributes(func, tf_implements_attr); - continue; + ConvertTFImplements(func, tf_implements_attr); } auto tf_api_implements_attr = diff --git a/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc b/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc index 1681f654b92..2ed0891dc59 100644 --- a/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc +++ b/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc @@ -44,9 +44,7 @@ namespace TFL { namespace { constexpr char kWhitespaceTokenizer[] = "tftext:WhitespaceTokenizer"; -constexpr char kTFImplements[] = "tf._implements"; - -using mlir::TF::FuncAttr; +constexpr char kTFAPIImplements[] = "tf.api_implements"; inline OpaqueElementsAttr emptyCustomOption(OpBuilder* builder) { std::string content = ""; @@ -123,11 +121,11 @@ LogicalResult VerifyWhitespaceTokenizer(mlir::FuncOp func) { return success(); } -LogicalResult ConvertWhitespaceTokenizer(mlir::FuncOp func, llvm::StringRef api, - FuncAttr attr) { +LogicalResult ConvertWhitespaceTokenizer(mlir::FuncOp func, + llvm::StringRef api) { func.eraseBody(); func.addEntryBlock(); - func.setAttr(kTFImplements, attr); + func.setAttr(kTFAPIImplements, StringAttr::get(api, func.getContext())); Value text = func.getArgument(0); OpBuilder builder(func.getBody()); @@ -139,21 +137,20 @@ LogicalResult ConvertWhitespaceTokenizer(mlir::FuncOp func, llvm::StringRef api, } } // namespace -LogicalResult ConvertTFTextAPI(mlir::FuncOp func, llvm::StringRef api, - FuncAttr attr) { +LogicalResult ConvertTFTextAPI(mlir::FuncOp func, llvm::StringRef api) { if (api.str() == kWhitespaceTokenizer) { if (succeeded(VerifyWhitespaceTokenizer(func))) { - return ConvertWhitespaceTokenizer(func, api, attr); + return ConvertWhitespaceTokenizer(func, api); } } return failure(); } -bool IsTFTextRegistered(const tensorflow::OpRegistry* op_registery) { - const std::vector kTFTextOps = { +bool IsTfTextRegistered(const tensorflow::OpRegistry* op_registery) { + const std::vector kTfTextOps = { "WhitespaceTokenizeWithOffsets", }; - for (const auto& iter : kTFTextOps) { + for (const auto& iter : kTfTextOps) { if (op_registery->LookUp(iter)) { return true; } diff --git a/tensorflow/compiler/mlir/lite/utils/tftext_utils.h b/tensorflow/compiler/mlir/lite/utils/tftext_utils.h index 55e4680c3dd..c52ee019d8d 100644 --- a/tensorflow/compiler/mlir/lite/utils/tftext_utils.h +++ b/tensorflow/compiler/mlir/lite/utils/tftext_utils.h @@ -27,18 +27,14 @@ limitations under the License. #include "mlir/IR/Value.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h" -#include "tensorflow/compiler/mlir/tensorflow/ir/tf_attributes.h" #include "tensorflow/core/framework/op.h" namespace mlir { namespace TFL { -// Fuse TF.Text APIs annotated by tf.function to a TFLite custom op. -LogicalResult ConvertTFTextAPI(mlir::FuncOp func, llvm::StringRef api, - mlir::TF::FuncAttr attr); +LogicalResult ConvertTFTextAPI(mlir::FuncOp func, llvm::StringRef api); -// Check if TF.Text Tensorflow ops are registered. -bool IsTFTextRegistered(const tensorflow::OpRegistry* op_registery); +bool IsTfTextRegistered(const tensorflow::OpRegistry* op_registery); } // end namespace TFL } // end namespace mlir diff --git a/tensorflow/compiler/mlir/lite/utils/tftext_utils_test.cc b/tensorflow/compiler/mlir/lite/utils/tftext_utils_test.cc index 9bcfa89c544..7d29264aaae 100644 --- a/tensorflow/compiler/mlir/lite/utils/tftext_utils_test.cc +++ b/tensorflow/compiler/mlir/lite/utils/tftext_utils_test.cc @@ -41,13 +41,13 @@ void Register(const std::string& op_name, OpRegistry* registry) { TEST(TfTextUtilsTest, TestTfTextRegistered) { std::unique_ptr registry(new OpRegistry); Register("WhitespaceTokenizeWithOffsets", registry.get()); - EXPECT_TRUE(IsTFTextRegistered(registry.get())); + EXPECT_TRUE(IsTfTextRegistered(registry.get())); } TEST(TfTextUtilsTest, TestTfTextNotRegistered) { std::unique_ptr registry(new OpRegistry); Register("Test", registry.get()); - EXPECT_FALSE(IsTFTextRegistered(registry.get())); + EXPECT_FALSE(IsTfTextRegistered(registry.get())); } } // namespace TFL } // namespace mlir From 605c697cbcc44fce0ddbc4828e43c3c812e3faea Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Jul 2020 02:01:31 -0700 Subject: [PATCH 0257/2522] Update GraphDef version to 461. PiperOrigin-RevId: 320915843 Change-Id: I2467ee13879e522f681ff402dbd7c049cf309c10 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 2b00930f8d5..5119d0c098a 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 460 // Updated: 2020/7/12 +#define TF_GRAPH_DEF_VERSION 461 // Updated: 2020/7/13 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 7693360b5eff50ca5342d8c82d0d1a6bfe197e55 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Jul 2020 02:01:32 -0700 Subject: [PATCH 0258/2522] compat: Update forward compatibility horizon to 2020-07-13 PiperOrigin-RevId: 320915844 Change-Id: I98a6a20c996cb5d0fe7cd257c6ca9b6f6a208dad --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 0a00ebcc2f9..147aba08d04 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 12) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 13) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 3b8ef731b0f84f94a33b7a0f01439297c850965c Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Mon, 13 Jul 2020 04:03:19 -0700 Subject: [PATCH 0259/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/e808cab82448 PiperOrigin-RevId: 320928033 Change-Id: I77491ef74af49aab773563edad486446dc91d461 --- tensorflow/workspace.bzl | 4 ++-- third_party/mlir/BUILD | 3 +++ third_party/mlir/test.BUILD | 12 ++++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 2535941d668..5d83478e39a 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "943660fd15f193dc6961597c25541fee2e01ebbb" - LLVM_SHA256 = "72a3f845eb1839b32bccaffa317517cca910511896b68f5c18959a579d57d4f2" + LLVM_COMMIT = "e808cab824488af137b62902e65dec3827b83b46" + LLVM_SHA256 = "99c30723f2b066bc9145bc43f762cdbde10fada818004dd0cc56abc5761606ac" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD index 5627067ac5e..18a7c4df7d7 100644 --- a/third_party/mlir/BUILD +++ b/third_party/mlir/BUILD @@ -545,6 +545,7 @@ cc_library( deps = [ ":Affine", ":AffinePassIncGen", + ":AffineUtils", ":Analysis", ":IR", ":Pass", @@ -2768,6 +2769,7 @@ cc_library( "@llvm-project//mlir/test:TestDialect", "@llvm-project//mlir/test:TestIR", "@llvm-project//mlir/test:TestPass", + "@llvm-project//mlir/test:TestReducer", "@llvm-project//mlir/test:TestSPIRV", "@llvm-project//mlir/test:TestTransforms", ], @@ -2916,6 +2918,7 @@ cc_binary( "@llvm-project//mlir/test:TestDialect", "@llvm-project//mlir/test:TestIR", "@llvm-project//mlir/test:TestPass", + "@llvm-project//mlir/test:TestReducer", "@llvm-project//mlir/test:TestSPIRV", "@llvm-project//mlir/test:TestTransforms", ], diff --git a/third_party/mlir/test.BUILD b/third_party/mlir/test.BUILD index 4b999bfa466..e0966054542 100644 --- a/third_party/mlir/test.BUILD +++ b/third_party/mlir/test.BUILD @@ -167,6 +167,18 @@ cc_library( ], ) +cc_library( + name = "TestReducer", + srcs = [ + "lib/Reducer/MLIRTestReducer.cpp", + ], + deps = [ + "@llvm-project//mlir:IR", + "@llvm-project//mlir:Pass", + "@llvm-project//mlir:Support", + ], +) + cc_library( name = "TestTransforms", srcs = glob(["lib/Transforms/*.cpp"]), From 727a265ef53fc34456a1ba0aee4881257e2d394d Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Mon, 13 Jul 2020 18:08:34 +0700 Subject: [PATCH 0260/2522] S3 registration --- .../experimental/filesystem/plugins/s3/BUILD | 32 ++++++ .../filesystem/plugins/s3/s3_filesystem.cc | 100 ++++++++++++++++++ .../filesystem/plugins/s3/s3_filesystem.h | 24 +++++ 3 files changed, 156 insertions(+) create mode 100644 tensorflow/c/experimental/filesystem/plugins/s3/BUILD create mode 100644 tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc create mode 100644 tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/BUILD b/tensorflow/c/experimental/filesystem/plugins/s3/BUILD new file mode 100644 index 00000000000..d72db1a3f41 --- /dev/null +++ b/tensorflow/c/experimental/filesystem/plugins/s3/BUILD @@ -0,0 +1,32 @@ +# Experimental gcs filesystem plugin. +load("//tensorflow:tensorflow.bzl", "get_win_copts", "tf_cc_shared_object", "tf_cc_test") + +package( + licenses = ["notice"], # Apache 2.0 +) + +# Filesystem implementation for GCS environments +tf_cc_shared_object( + name = "s3_filesystem", + framework_so = [], + linkstatic = False, + per_os_targets = 1, + visibility = ["//visibility:public"], + deps = [":s3_filesystem_impl"], +) + +# The real implementation of the filesystem. +cc_library( + name = "s3_filesystem_impl", + srcs = ["s3_filesystem.cc"], + hdrs = ["s3_filesystem.h"], + copts = select({ + "//conditions:default": [], + "//tensorflow:windows": get_win_copts(), + }), + deps = [ + "//tensorflow/c:tf_status", + "//tensorflow/c/experimental/filesystem:filesystem_interface", + "@aws", + ], +) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc new file mode 100644 index 00000000000..45350565500 --- /dev/null +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -0,0 +1,100 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h" + +#include +#include + +#include "tensorflow/c/experimental/filesystem/filesystem_interface.h" +#include "tensorflow/c/tf_status.h" + +// Implementation of a filesystem for S3 environments. +// This filesystem will support `s3://` URI schemes. + +static void* plugin_memory_allocate(size_t size) { return calloc(1, size); } +static void plugin_memory_free(void* ptr) { free(ptr); } + +static void ParseS3Path(const Aws::String& fname, bool object_empty_ok, + Aws::String* bucket, Aws::String* object, + TF_Status* status) { + size_t scheme_end = fname.find("://") + 2; + if (fname.substr(0, scheme_end + 1) != "s3://") { + TF_SetStatus(status, TF_INVALID_ARGUMENT, + "S3 path doesn't start with 's3://'."); + return; + } + + size_t bucket_end = fname.find("/", scheme_end + 1); + if (bucket_end == std::string::npos) { + TF_SetStatus(status, TF_INVALID_ARGUMENT, + "S3 path doesn't contain a bucket name."); + return; + } + + *bucket = fname.substr(scheme_end + 1, bucket_end - scheme_end - 1); + *object = fname.substr(bucket_end + 1); + + if (object->empty() && !object_empty_ok) { + TF_SetStatus(status, TF_INVALID_ARGUMENT, + "S3 path doesn't contain an object name."); + } +} + +// SECTION 1. Implementation for `TF_RandomAccessFile` +// ---------------------------------------------------------------------------- +namespace tf_random_access_file { + +// TODO(vnvo2409): Implement later + +} // namespace tf_random_access_file + +// SECTION 2. Implementation for `TF_WritableFile` +// ---------------------------------------------------------------------------- +namespace tf_writable_file { + +// TODO(vnvo2409): Implement later + +} // namespace tf_writable_file + +// SECTION 3. Implementation for `TF_ReadOnlyMemoryRegion` +// ---------------------------------------------------------------------------- +namespace tf_read_only_memory_region { + +// TODO(vnvo2409): Implement later + +} // namespace tf_read_only_memory_region + +// SECTION 4. Implementation for `TF_Filesystem`, the actual filesystem +// ---------------------------------------------------------------------------- +namespace tf_s3_filesystem { + +// TODO(vnvo2409): Implement later + +} // namespace tf_s3_filesystem + +static void ProvideFilesystemSupportFor(TF_FilesystemPluginOps* ops, + const char* uri) { + TF_SetFilesystemVersionMetadata(ops); + ops->scheme = strdup(uri); +} + +void TF_InitPlugin(TF_FilesystemPluginInfo* info) { + info->plugin_memory_allocate = plugin_memory_allocate; + info->plugin_memory_free = plugin_memory_free; + info->num_schemes = 1; + info->ops = static_cast( + plugin_memory_allocate(info->num_schemes * sizeof(info->ops[0]))); + ProvideFilesystemSupportFor(&info->ops[0], "s3"); +} diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h new file mode 100644 index 00000000000..62e2a7e0c06 --- /dev/null +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h @@ -0,0 +1,24 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_S3_S3_FILESYSTEM_H_ +#define TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_S3_S3_FILESYSTEM_H_ + +#include +#include + +#include "tensorflow/c/experimental/filesystem/filesystem_interface.h" +#include "tensorflow/c/tf_status.h" + +#endif // TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_S3_S3_FILESYSTEM_H_ From 03bc968d67c368928c7c3a193e5eb167c3ef9b97 Mon Sep 17 00:00:00 2001 From: Jaesung Chung Date: Mon, 13 Jul 2020 06:42:15 -0700 Subject: [PATCH 0261/2522] Add shard count to tflite_convert_test target This is for removing flakiness of the test target. PiperOrigin-RevId: 320945448 Change-Id: I06eb829ae2411e51202aad8231c8f87cdc194a55 --- tensorflow/lite/python/BUILD | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/lite/python/BUILD b/tensorflow/lite/python/BUILD index 453aa803515..63be9602bc2 100644 --- a/tensorflow/lite/python/BUILD +++ b/tensorflow/lite/python/BUILD @@ -96,6 +96,8 @@ py_test( "@tflite_mobilenet_ssd_quant_protobuf//:tflite_graph.pb", ], python_version = "PY3", + # Increased thread count for reducing timeout failures. + shard_count = 4, srcs_version = "PY2AND3", tags = [ "no_oss", From 28164281be5ff1d1e460083844bfb544c32f1710 Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Mon, 13 Jul 2020 15:11:44 +0100 Subject: [PATCH 0262/2522] Addressed reviewer comments. Change-Id: I22d0ed35fa773087b1e5497ce19428a0c0f0ae61 --- tensorflow/lite/kernels/reduce.cc | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tensorflow/lite/kernels/reduce.cc b/tensorflow/lite/kernels/reduce.cc index 9e078b6bdb4..6d77cd0d78d 100644 --- a/tensorflow/lite/kernels/reduce.cc +++ b/tensorflow/lite/kernels/reduce.cc @@ -334,6 +334,7 @@ TfLiteStatus EvalMeanReferenceOps(TfLiteContext* context, GetTensorData(temp_sum), /*compute_sum=*/false)); } + return kTfLiteOk; } template @@ -463,16 +464,19 @@ TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) { GetTensorData(temp_sum))); break; case kTfLiteInt8: { - EvalMeanReferenceOps(context, op_context, num_axis, data, - temp_index, resolved_axis, temp_sum); + TF_LITE_ENSURE_OK(context, EvalMeanReferenceOps( + context, op_context, num_axis, data, + temp_index, resolved_axis, temp_sum)); } break; case kTfLiteInt16: { - EvalMeanReferenceOps(context, op_context, num_axis, data, - temp_index, resolved_axis, temp_sum); + TF_LITE_ENSURE_OK(context, EvalMeanReferenceOps( + context, op_context, num_axis, data, + temp_index, resolved_axis, temp_sum)); } break; case kTfLiteUInt8: { - EvalMeanReferenceOps(context, op_context, num_axis, data, - temp_index, resolved_axis, temp_sum); + TF_LITE_ENSURE_OK(context, EvalMeanReferenceOps( + context, op_context, num_axis, data, + temp_index, resolved_axis, temp_sum)); } break; default: return kTfLiteError; From ae53d7c968bb6021bc9d268c1eb45fc3a3419b92 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Mon, 13 Jul 2020 07:22:57 -0700 Subject: [PATCH 0263/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/6bda276f9302 PiperOrigin-RevId: 320950832 Change-Id: Ibb0ea3f37bc571e51bfa5019807a81d3dcf51ca9 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 5d83478e39a..9af009075db 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "e808cab824488af137b62902e65dec3827b83b46" - LLVM_SHA256 = "99c30723f2b066bc9145bc43f762cdbde10fada818004dd0cc56abc5761606ac" + LLVM_COMMIT = "6bda276f93023ae91937cb8a1f45bf27e5a3ced7" + LLVM_SHA256 = "661d02c6c56ec2e93e23c13d669b44c4506422a9d7af8323d9b368e1595af952" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 261d74db28ad0c17b0f43c64c42cd3422fb6c9b2 Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Mon, 13 Jul 2020 15:33:10 +0100 Subject: [PATCH 0264/2522] Updated the name of the option 'quant_16x8'. Change-Id: I97ff4ef79cda4a2d22c2581892645ab0d631ac43 --- tensorflow/lite/testing/op_tests/pad.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tensorflow/lite/testing/op_tests/pad.py b/tensorflow/lite/testing/op_tests/pad.py index e49a54d0557..ab243d4edc6 100644 --- a/tensorflow/lite/testing/op_tests/pad.py +++ b/tensorflow/lite/testing/op_tests/pad.py @@ -38,7 +38,7 @@ def make_pad_tests(options): [[0, 1], [0, 0], [0, 0], [2, 3]]], "constant_paddings": [True, False], "fully_quantize": [False], - "quantize_mode_16x8": [False] + "quant_16x8": [False] }, # 2D: { @@ -47,7 +47,7 @@ def make_pad_tests(options): "paddings": [[[0, 1], [2, 3]]], "constant_paddings": [True, False], "fully_quantize": [False], - "quantize_mode_16x8": [False] + "quant_16x8": [False] }, # 1D: { @@ -56,7 +56,7 @@ def make_pad_tests(options): "paddings": [[[1, 2]]], "constant_paddings": [False], "fully_quantize": [False], - "quantize_mode_16x8": [False] + "quant_16x8": [False] }, # 4D: { @@ -67,7 +67,7 @@ def make_pad_tests(options): [[0, 0], [0, 0], [0, 0], [0, 0]]], "constant_paddings": [True], "fully_quantize": [True], - "quantize_mode_16x8": [False] + "quant_16x8": [False] }, { "dtype": [tf.float32], @@ -77,7 +77,7 @@ def make_pad_tests(options): [[0, 0], [0, 0], [0, 0], [0, 0]]], "constant_paddings": [True], "fully_quantize": [True], - "quantize_mode_16x8": [True] + "quant_16x8": [True] }, # 2D: { @@ -86,7 +86,7 @@ def make_pad_tests(options): "paddings": [[[0, 1], [2, 3]]], "constant_paddings": [True], "fully_quantize": [True], - "quantize_mode_16x8": [False], + "quant_16x8": [False], }, { "dtype": [tf.float32], @@ -94,7 +94,7 @@ def make_pad_tests(options): "paddings": [[[0, 1], [2, 3]]], "constant_paddings": [True], "fully_quantize": [True], - "quantize_mode_16x8": [True], + "quant_16x8": [True], }, # 1D: { @@ -103,7 +103,7 @@ def make_pad_tests(options): "paddings": [[[1, 2]]], "constant_paddings": [True], "fully_quantize": [True], - "quantize_mode_16x8": [False], + "quant_16x8": [False], }, { "dtype": [tf.float32], @@ -111,7 +111,7 @@ def make_pad_tests(options): "paddings": [[[1, 2]]], "constant_paddings": [True], "fully_quantize": [True], - "quantize_mode_16x8": [True], + "quant_16x8": [True], }, ] From 14b9a9d66154673cad0e674ec04a3ade6d61a708 Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Mon, 13 Jul 2020 08:26:53 -0700 Subject: [PATCH 0265/2522] Implement lowering of lmhlo.reshape_memref_cast to LLVM for unknown length shape operand. PiperOrigin-RevId: 320959625 Change-Id: Ib7f3d58d8cad4bebceaf9d943ce0762d6282b548 --- .../mhlo/transforms/lhlo_legalize_to_llvm.cc | 145 ++++++++++++++++-- 1 file changed, 128 insertions(+), 17 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm.cc index 32606f068a8..dd3e2f8697d 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm.cc @@ -133,8 +133,8 @@ struct ReshapeMemRefCastOpConverter Location loc = op->getLoc(); auto reshape_op = cast(op); - Type dst_type = reshape_op.getResult().getType(); - auto element_type = dst_type.cast().getElementType(); + auto dst_type = reshape_op.getResult().getType().cast(); + auto element_type = dst_type.getElementType(); auto shape = reshape_op.shape(); @@ -162,18 +162,17 @@ struct ReshapeMemRefCastOpConverter desc.setAlignedPtr(rewriter, loc, ptrs_n_offset.aligned_ptr); desc.setOffset(rewriter, loc, ptrs_n_offset.offset); - auto llvmIndexTy = typeConverter.convertType(rewriter.getIndexType()) - .cast(); - auto llvmIndexTyPtr = llvmIndexTy.getPointerTo(); + auto llvm_index_type = typeConverter.getIndexType(); + auto llvm_index_ptr_type = llvm_index_type.getPointerTo(); Value stride_carried = rewriter.create( - loc, llvmIndexTy, + loc, llvm_index_type, rewriter.getIntegerAttr(rewriter.getIndexType(), 1)); for (int i = shape_length - 1; i >= 0; --i) { Value pos = rewriter.create( - loc, llvmIndexTy, + loc, llvm_index_type, rewriter.getIntegerAttr(rewriter.getIndexType(), i)); Value ptr = rewriter.create( - loc, llvmIndexTyPtr, shape_desc.alignedPtr(rewriter, loc), + loc, llvm_index_ptr_type, shape_desc.alignedPtr(rewriter, loc), ValueRange{pos}); Value extracted_size = rewriter.create(loc, ptr); desc.setSize(rewriter, loc, i, extracted_size); @@ -188,7 +187,7 @@ struct ReshapeMemRefCastOpConverter rewriter.replaceOp(op, {desc}); } else { Value rank = rewriter.create( - loc, llvmIndexTy, + loc, llvm_index_type, rewriter.getIntegerAttr(rewriter.getIndexType(), shape_length)); Value alloca = typeConverter.promoteOneMemRefDescriptor(loc, desc, rewriter); @@ -199,15 +198,127 @@ struct ReshapeMemRefCastOpConverter {rank, void_ptr}); rewriter.replaceOp(op, {unranked_desc}); } - } else { - /* - * TODO(pifon, herhut): - * Compute strides with llvm.loop; - * Use UnrankedMemrefDescr::ComputeSize with Alloca; - * Set all the fields using getelementptr. - */ - return failure(); + return success(); } + + // The shape is a rank-1 tensor with unknown length. + Value result_rank = shape_desc.size(rewriter, loc, 0); + // TODO(herhut): Propely handle address spaces. + unsigned address_space = 0; + auto target_type = + typeConverter + .convertType(UnrankedMemRefType::get(element_type, address_space)) + .cast(); + // Create the unranked memref descriptor that holds the ranked one. The + // inner descriptor is allocated on stack. + UnrankedMemRefDescriptor target_desc = + UnrankedMemRefDescriptor::undef(rewriter, loc, target_type); + target_desc.setRank(rewriter, loc, result_rank); + SmallVector sizes; + UnrankedMemRefDescriptor::computeSizes(rewriter, loc, typeConverter, + {target_desc}, sizes); + auto void_ptr_type = + LLVM::LLVMType::getInt8PtrTy(typeConverter.getDialect()); + Value ranked_desc_mem = rewriter.create( + loc, void_ptr_type, sizes.front(), llvm::None); + target_desc.setMemRefDescPtr(rewriter, loc, ranked_desc_mem); + + // Fill the fixed parts. For this, we cast to a 0-D memref. + auto zero_d_memref_type = MemRefType::get({}, element_type); + Value as_zero_d = rewriter.create( + loc, + typeConverter.convertType(zero_d_memref_type) + .cast() + .getPointerTo(address_space), + ranked_desc_mem); + // Some common constants. Use 32 bit where required by gep struct indexes. + auto int32_type = typeConverter.convertType(rewriter.getI32Type()); + Value zero_index = rewriter.create( + loc, typeConverter.getIndexType(), rewriter.getIndexAttr(0)); + Value zero = rewriter.create( + loc, int32_type, rewriter.getI32IntegerAttr(0)); + Value one = rewriter.create( + loc, int32_type, rewriter.getI32IntegerAttr(1)); + Value two = rewriter.create( + loc, int32_type, rewriter.getI32IntegerAttr(2)); + // Set base_pointer and aligned pointer. + auto element_ptr_ptr_type = typeConverter.convertType(element_type) + .cast() + .getPointerTo(address_space) + .getPointerTo(address_space); + auto base_gep = rewriter.create( + loc, element_ptr_ptr_type, as_zero_d, ValueRange({zero_index, zero})); + rewriter.create(loc, ptrs_n_offset.allocated_ptr, base_gep); + auto aligned_gep = rewriter.create( + loc, element_ptr_ptr_type, as_zero_d, ValueRange({zero_index, one})); + rewriter.create(loc, ptrs_n_offset.aligned_ptr, aligned_gep); + // Set offset. + auto index_ptr_type = + typeConverter.getIndexType().getPointerTo(address_space); + auto offset_gep = rewriter.create( + loc, index_ptr_type, as_zero_d, ValueRange({zero_index, two})); + rewriter.create(loc, ptrs_n_offset.offset, offset_gep); + + // Use the offset pointer as base for further addressing. Copy over the + // new shape and compute strides. For this, we need to create a loop from + // rank - 1 to 0. + Value one_index = rewriter.create( + loc, typeConverter.getIndexType(), rewriter.getIndexAttr(1)); + auto target_shape_base = rewriter.create( + loc, index_ptr_type, offset_gep, ValueRange({one})); + auto target_strides_base = rewriter.create( + loc, index_ptr_type, target_shape_base, ValueRange({result_rank})); + auto shape_ptr = shape_desc.alignedPtr(rewriter, loc); + auto result_rank_minus_one = + rewriter.create(loc, result_rank, one_index); + + Block *init_block = rewriter.getInsertionBlock(); + Block *cond_block = + rewriter.splitBlock(init_block, rewriter.getInsertionPoint()); + rewriter.setInsertionPointToEnd(init_block); + rewriter.create( + loc, ValueRange({result_rank_minus_one, one_index}), cond_block); + rewriter.setInsertionPointToStart(cond_block); + auto index_arg = cond_block->addArgument(typeConverter.getIndexType()); + auto stride_arg = cond_block->addArgument(typeConverter.getIndexType()); + auto pred = rewriter.create( + loc, LLVM::LLVMType::getInt1Ty(typeConverter.getDialect()), + LLVM::ICmpPredicate::sge, index_arg, zero_index); + + Block *body_block = + rewriter.splitBlock(cond_block, rewriter.getInsertionPoint()); + rewriter.setInsertionPointToStart(body_block); + + // Copy size from shape to descriptor. + auto size_load_gep = rewriter.create( + loc, index_ptr_type, shape_ptr, ValueRange{index_arg}); + auto extracted_size = rewriter.create(loc, size_load_gep); + auto size_store_gep = rewriter.create( + loc, index_ptr_type, target_shape_base, ValueRange({index_arg})); + rewriter.create(loc, extracted_size, size_store_gep); + // Write stride value and compute next one. + auto stride_store_gep = rewriter.create( + loc, index_ptr_type, target_strides_base, ValueRange({index_arg})); + rewriter.create(loc, stride_arg, stride_store_gep); + auto next_stride = + rewriter.create(loc, stride_arg, extracted_size); + + // Decrement loop counter and branch back. + auto decrement = rewriter.create(loc, index_arg, one_index); + rewriter.create(loc, ValueRange({decrement, next_stride}), + cond_block); + + Block *remainder = + rewriter.splitBlock(body_block, rewriter.getInsertionPoint()); + + // Hook up the cond exit to the remainder. + rewriter.setInsertionPointToEnd(cond_block); + rewriter.create(loc, pred, body_block, ValueRange(), + remainder, ValueRange()); + + // Reset position to beginning of new remainder block. + rewriter.setInsertionPointToStart(remainder); + rewriter.replaceOp(op, {target_desc}); return success(); } From 20964790b55e6c477b0105f39eb2bc040130e809 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Jul 2020 08:38:23 -0700 Subject: [PATCH 0266/2522] [XLA] rollforward of cl/320810794: Rewriting conditional code motion to enable better fusion optimization. NEW: Changed down_cast (which is supported only in gogole3) to static_cast to address portability problem. PiperOrigin-RevId: 320961531 Change-Id: I878b853f8f3f010dae173cc79ee074d4b9d6474f --- .../xla/service/conditional_code_motion.cc | 734 ++++++++++-------- .../xla/service/conditional_code_motion.h | 67 +- .../service/conditional_code_motion_test.cc | 117 ++- 3 files changed, 527 insertions(+), 391 deletions(-) diff --git a/tensorflow/compiler/xla/service/conditional_code_motion.cc b/tensorflow/compiler/xla/service/conditional_code_motion.cc index 6db4c3eb6d4..5d858096679 100644 --- a/tensorflow/compiler/xla/service/conditional_code_motion.cc +++ b/tensorflow/compiler/xla/service/conditional_code_motion.cc @@ -46,161 +46,63 @@ limitations under the License. namespace xla { -namespace { - -struct ConditionalBoundary { - ConditionalBoundary(HloInstruction* op, int64 op_index, HloInstruction* usr) - : operand(op), operand_index(op_index), user(usr) {} - // `operand` is one of `user`'s operand. - - // Instruction that remains in the conditional but one of its user - // is moved out of conditonal. - HloInstruction* operand; - // operand_index for `operand` in the `user`. - int64 operand_index; - // Instruction that moved out of conditional. - HloInstruction* user; -}; +namespace conditional_opt { // Visit the root instructions to its operands follow BFS. // Will visit an instructions after all its users have been visited. Parameters // are not visited. -class BranchVisitor { +class BoundaryVisitor { public: - explicit BranchVisitor(const HloComputation* branch_computation) { - HloInstruction* root_inst = branch_computation->root_instruction(); - worklist_.push_back(root_inst); - visited_.insert(root_inst); - for (auto parameter_inst : branch_computation->parameter_instructions()) { - parameter_instructions_.insert(parameter_inst); - } + // start with an existing conditional computation. + explicit BoundaryVisitor(HloInstruction* conditional) { + Boundary b(Boundary::Position::kInsideBranch); + b.Operands().push_back(conditional); + worklist_.push_back(b); } + // Start with an empty work list. + BoundaryVisitor() {} // Get next intruction to visit. - HloInstruction* GetNextInstruction() { - if (!worklist_.empty()) { - HloInstruction* inst = worklist_.front(); - worklist_.pop_front(); - return inst; - } - return nullptr; + Boundary PopNextBoundary() { + CHECK(!worklist_.empty()); + Boundary inst = worklist_.front(); + worklist_.pop_front(); + return inst; + } + void AddToWorkList(const Boundary& b) { + CHECK(!b.Operands().empty()); + worklist_.push_back(b); } - // Add operands of one instruction to worklist for further visit. - void AddInstructionOperands(HloInstruction* inst) { - int64 operand_count = inst->operand_count(); - for (int i = 0; i < operand_count; i++) { - HloInstruction* operand = inst->mutable_operand(i); - if (ContainsKey(visited_, operand)) { - continue; - } - bool all_user_visited = std::all_of( - operand->users().begin(), operand->users().end(), - [&](HloInstruction* user) { return ContainsKey(visited_, user); }); - - if (!all_user_visited) { - continue; - } - // Do not visit parameter_instructions. - if (ContainsKey(parameter_instructions_, operand)) { - // Add the operand and this instruction to the boundaries. - boundaries_.emplace_back(operand, i, inst); - continue; - } - worklist_.push_back(operand); - visited_.insert(operand); - } - } - - // Add instruction and its users to conditional boundaries. - void AddInstructionToBoundary(HloInstruction* inst) { - for (auto user : inst->users()) { - boundaries_.emplace_back(inst, user->operand_index(inst), user); - } - } - - // Add instruction to the to be removed instructions set and vector. - void AddInstructionToHoist(HloInstruction* inst) { - instructions_to_hoist_set_.insert(inst); - instructions_to_hoist_.emplace_back(inst); - } - - // If visitor has next instruction to visit. - bool HasNextInstruction() const { return !worklist_.empty(); } - - // If there is no hoist intruction. - int64 HoistInstructionSize() { return instructions_to_hoist_.size(); } - - // Get boundaries of this branch. - const std::vector& boundaries() const { - return boundaries_; - } - - // Get instructions to hoist in this branch. - const std::vector& instructions_to_hoist() const { - return instructions_to_hoist_; - } - - // Get hoist instruction set in this branch. - const std::unordered_set& instructions_to_hoist_set() const { - return instructions_to_hoist_set_; - } + bool HasNextBoundary() const { return !worklist_.empty(); } private: // worklist is the deque that contains instructions to be visited. - std::deque worklist_; - - // instructions that has been visited. - std::unordered_set visited_; - - // parameter instructions of the branch. - std::unordered_set parameter_instructions_; - - // Boundaries contains the set of instructions that its operand is within - // conditional but it can be hoist out of conditional. - std::vector boundaries_; - - // Instructions to hoist. - std::unordered_set instructions_to_hoist_set_; - - // Instructions to hoist, the order within this vector is BFS and - // an instruction's order will always be after its users. - std::vector instructions_to_hoist_; + std::deque worklist_; }; -// Returns true if `instruction` is worth hoisting out. -bool WorthHoisting(HloInstruction* instruction) { - for (const auto* operand : instruction->operands()) { - // Only move out instructions that won't share the same operand - // to avoid copy of the operand. - if (operand->user_count() > 1) { - return false; - } - } - switch (instruction->opcode()) { - case HloOpcode::kConvert: - // If Convert is after AllReduce, it is worth moving out AllReduce out - // of conditional for AR/CRS combine. If Convert is after other ops such - // as Dot or Convolutional, it is better to keep convert within - // conditional so that convert can be fused with Dot or Convolutional. - // - // TODO(b/154283721): figure out the scenario when convert can be fused - // with AllReduce out of conditional. - if (instruction->operand(0)->opcode() == HloOpcode::kAllReduce) { - return true; - } - return false; - case HloOpcode::kAllReduce: - case HloOpcode::kAdd: - case HloOpcode::kConstant: - case HloOpcode::kSubtract: - case HloOpcode::kMultiply: - case HloOpcode::kDivide: - case HloOpcode::kTuple: - case HloOpcode::kSqrt: +// Returns estimation of potential reuses carried by a given instruction. +// Use different integers to classify different levels of reuses +// This is used as a placeholder only, assuming all instructions can be +// fused to enable data reuses +int64 ReusesCarriedBy(HloInstruction* op, HloInstruction* user) { + VLOG(1) << "ConditionalCodeMotion: Add reuses carried by instr: " + << op->ToString() << "=>" << user->ToString() << "\n"; + switch (user->opcode()) { case HloOpcode::kGetTupleElement: - return true; + return 0; default: - return false; + break; + } + switch (op->opcode()) { + // These instructions are lightweight and easy to fuse. + case HloOpcode::kConstant: + return 0; + default: + // Assume fusion will not happen anyway if user count > 1) + if (op->user_count() > 1) { + return 0; + } + return 10; } } @@ -220,7 +122,7 @@ bool InstructionWithinBranchIdentical( return *a == *b; }; - if (instructions[0] == nullptr) { + if (instructions.empty()) { return false; } @@ -248,109 +150,27 @@ bool InstructionWithinBranchIdentical( }); } -// Returns if all the visitors/branches has next instruction to visit. -bool HasNextInstruction(const std::vector& visitors) { - bool has_next = true; - for (const auto& visitor : visitors) { - has_next &= visitor.HasNextInstruction(); - } - return has_next; -} - -// Create tuple element as the new root of the branch. The tuple will contain -// the operands that can't move out of conditional but its user will be moved -// out of conditional. -HloInstruction* CreateNewRoot( - const std::vector& boundaries, - const std::unordered_set& instructions_to_hoist_set, - HloComputation* computation) { - std::vector elements; - elements.reserve(boundaries.size()); - for (auto boundary : boundaries) { - if (ContainsKey(instructions_to_hoist_set, boundary.user)) { - elements.push_back(boundary.operand); - } - } - return computation->AddInstruction(HloInstruction::CreateTuple(elements)); -} - // Copy identical instructions within conditional outside of conditional. -void CopyIdenticalInstructionsOutOfConditional( - const std::vector& instructions_to_hoist, - HloComputation* conditional_parent, - absl::flat_hash_map* +Status CopyOutOfConditional( + Boundary& boundary, HloComputation* conditional_parent, + absl::flat_hash_map& hoisted_instructions) { - int64 instructions_size = instructions_to_hoist.size(); - // Visit the operands before its users and copy it, so that the copied - // user will point to the correct operand. - for (int64 i = instructions_size - 1; i >= 0; i--) { - HloInstruction* old_instruction = instructions_to_hoist[i]; - auto get_new_operand = [&](HloInstruction* old_operand) { - // If the operand can't be found in `instructions_to_hoist`, this - // operand will be in the `boundaries`, GetTupleElement instructions - // will be added later to replace this operand. - if (!ContainsKey(*hoisted_instructions, old_operand)) { - return old_operand; - } - return FindOrDie(*hoisted_instructions, old_operand); - }; - - absl::InlinedVector new_operands; - absl::c_transform(old_instruction->operands(), - std::back_inserter(new_operands), get_new_operand); - - HloInstruction* new_instruction = conditional_parent->AddInstruction( - old_instruction->CloneWithNewOperands(old_instruction->shape(), - new_operands)); - // Maps the instruction outside of conditional to the instruction - // inside of the conditional. - InsertOrDie(hoisted_instructions, old_instruction, new_instruction); + // Insert GetTupleElement before the instructions whose operands might still + // be within the conditional. + HloInstruction* op = boundary.Operands()[0]; + absl::InlinedVector new_operands; + for (int i = 0; i < op->operands().size(); ++i) { + auto op_i = op->operands()[i]; + VLOG(2) << "Looking for operand:" << op_i->ToString() << "\n"; + CHECK(ContainsKey(hoisted_instructions, op_i)); + new_operands.push_back(FindOrDie(hoisted_instructions, op_i)); } -} - -// If there are instructions to hoist, the root of the conditional must be -// moved out. Change the users of the conditional to the hoisted instruction -// of the new root. -Status ChangeConditionalUsers( - HloInstruction* conditional, HloInstruction* old_root, - const absl::flat_hash_map& - hoisted_instructions) { - HloInstruction* new_root = FindOrDie(hoisted_instructions, old_root); - TF_RETURN_IF_ERROR(conditional->ReplaceAllUsesWith(new_root)); - return Status::OK(); -} - -// Insert GetTupleElement before the instructions whose operands might still -// be within the conditional. -Status CreateGetTupleElementAfterConditional( - const std::vector& boundaries, - const std::unordered_set& instructions_to_hoist_set, - const absl::flat_hash_map& - hoisted_instructions, - HloInstruction* conditional, HloComputation* computation) { - int boundary_instruction_size = boundaries.size(); - - // Inserts GetTupleElement before the boundary instructions. - for (int i = 0; i < boundary_instruction_size; i++) { - HloInstruction* gte = - computation->AddInstruction(HloInstruction::CreateGetTupleElement( - boundaries[i].operand->shape(), conditional, i)); - - HloInstruction* new_instruction = - FindOrDie(hoisted_instructions, boundaries[i].user); - TF_RETURN_IF_ERROR( - new_instruction->ReplaceOperandWith(boundaries[i].operand_index, gte)); - } - return Status::OK(); -} - -// Remove instructions to be hoisted out of the branch computation. -Status RemoveInstructionFromComputation( - const std::vector& instructions_to_hoist, - HloComputation* branch) { - // Will visit the instructions after its users. - for (auto* instruction : instructions_to_hoist) { - TF_RETURN_IF_ERROR(branch->RemoveInstruction(instruction)); + HloInstruction* new_instruction = conditional_parent->AddInstruction( + op->CloneWithNewOperands(op->shape(), new_operands)); + // Maps the instruction outside of conditional to the instruction + // inside of the conditional. + for (HloInstruction* op : boundary.Operands()) { + hoisted_instructions[op] = new_instruction; } return Status::OK(); } @@ -574,128 +394,359 @@ StatusOr ConvertSpecialMove(HloInstruction* conditional, // are the shape of the operands are identical and their properties are // identical. Will start from the root instruction of each branch and get // the identical ops to hoist. -StatusOr MergeIdenticalElements(HloInstruction* conditional, - bool is_layout_sensitive) { - VLOG(1) << " visiting conditional:" << conditional->ToString(); - int branch_count = conditional->branch_count(); - if (branch_count <= 0) { +StatusOr ConditionalCodeMotion::MoveInstructionOut( + HloInstruction* conditional, std::vector& to_move_out, + std::vector& new_boundaries) { + if (to_move_out.empty()) { return false; } - - std::vector visitors; - visitors.reserve(branch_count); - // Visit instructions from the root instruction to the operands using BFS. - for (int i = 0; i < branch_count; i++) { - visitors.emplace_back(BranchVisitor(conditional->branch_computation(i))); - } - - // The instructions to be visited within each branch. - std::vector front_instructions(branch_count); - - while (HasNextInstruction(visitors)) { - for (int i = 0; i < branch_count; i++) { - front_instructions[i] = visitors[i].GetNextInstruction(); - } - // If two instructions has the same shape, opcode and its operands has the - // same shape, then this instruction can be moved out of conditional. - if (WorthHoisting(front_instructions[0]) && - InstructionWithinBranchIdentical(front_instructions, - is_layout_sensitive)) { - for (int i = 0; i < branch_count; i++) { - visitors[i].AddInstructionOperands(front_instructions[i]); - visitors[i].AddInstructionToHoist(front_instructions[i]); - } - } else { - for (int i = 0; i < branch_count; i++) { - // If the ops are not identical, these ops and its users will - // be in the boundaries` of the conditional. These ops will be stayed - // within the conditional, but one its only user will be moved out - // of conditional. - visitors[i].AddInstructionToBoundary(front_instructions[i]); - } - } - } - - if (visitors[0].HoistInstructionSize() < 1) { - return false; - } - - HloInstruction* old_root = - conditional->branch_computation(0)->root_instruction(); + VLOG(1) << "number of boundaries to move out:" << to_move_out.size() << "\n"; HloComputation* conditional_parent = conditional->parent(); + // save the old users before add new conditional user instructions + std::vector old_conditional_users = conditional->users(); + absl::flat_hash_map hoisted_instructions; // Maps instructions in the conditional body to instructions hoisted outside // the conditional that compute the same value. - absl::flat_hash_map hoisted_instructions; - // Copy identical instructions out of the conditional. - CopyIdenticalInstructionsOutOfConditional(visitors[0].instructions_to_hoist(), - conditional_parent, - &hoisted_instructions); - // If there are instructions to hoist, the root of the conditional must be - // moved out. Change the users of the conditional to the hoisted instruction - // of the new root. - TF_RETURN_IF_ERROR( - ChangeConditionalUsers(conditional, old_root, hoisted_instructions)); - + VLOG(2) << "before opt:" + << conditional_parent->ToString(HloPrintOptions::Fingerprint()) + << "\n"; + int64 op_index = 0; + for (Boundary b : new_boundaries) { + HloInstruction* op = b.Operands()[0]; + CHECK(op != nullptr); + VLOG(2) << "Mapping new boundary instr: " << op->ToString() << "\n"; + HloInstruction* gtr = conditional_parent->AddInstruction( + HloInstruction::CreateGetTupleElement(op->shape(), conditional, + op_index++)); + hoisted_instructions[op] = gtr; + } + // Copy boundary instructions out of the conditional. + // Visit the operands before its users and copy it, so that the copied + // user will point to the correct operand. + for (int64 i = to_move_out.size() - 1; i >= 0; i--) { + TF_RETURN_IF_ERROR(CopyOutOfConditional(to_move_out[i], conditional_parent, + hoisted_instructions)); + } + VLOG(2) << "Done copy branch instructions out\n" + << conditional_parent->ToString(HloPrintOptions::Fingerprint()) + << "\n"; + // Change original users of the conditional to use the correct operands. + HloInstruction* old_root = + conditional->branch_computation(0)->root_instruction(); + for (auto user_instr : old_conditional_users) { + CHECK(user_instr->opcode() == HloOpcode::kGetTupleElement); + auto tuple_opd = static_cast(user_instr); + int64 index = tuple_opd->tuple_index(); + HloInstruction* old_opd = old_root->operands()[index]; + HloInstruction* new_opd = hoisted_instructions[old_opd]; + CHECK(old_opd != nullptr); + CHECK(new_opd != nullptr); + TF_RETURN_IF_ERROR(user_instr->ReplaceAllUsesWith(new_opd)); + TF_RETURN_IF_ERROR(conditional_parent->RemoveInstruction(user_instr)); + } // Create tuple element within each branch and set it as root. + int64 branch_count = conditional->branch_count(); for (int i = 0; i < branch_count; i++) { - HloInstruction* tuple = CreateNewRoot( - visitors[i].boundaries(), visitors[i].instructions_to_hoist_set(), - conditional->branch_computation(i)); - conditional->branch_computation(i)->set_root_instruction(tuple, true); - } - // Changes conditional instruction shape to the shape of the new root. - *conditional->mutable_shape() = - conditional->branch_computation(0)->root_instruction()->shape(); - - // Insert GetTupleElement before the instructions whose operands might still - // be within the conditional. - TF_RETURN_IF_ERROR(CreateGetTupleElementAfterConditional( - visitors[0].boundaries(), visitors[0].instructions_to_hoist_set(), - hoisted_instructions, conditional, conditional_parent)); - - // Remove hoist instructions from the branches. - for (int i = 0; i < branch_count; i++) { - TF_RETURN_IF_ERROR( - RemoveInstructionFromComputation(visitors[i].instructions_to_hoist(), - conditional->branch_computation(i))); + auto computation = conditional->branch_computation(i); + std::vector elements; + for (auto b1 : new_boundaries) { + HloInstruction* op = b1.Operands()[i]; + VLOG(1) << "branch count=" << i << "\n"; + CHECK(op != nullptr); + VLOG(1) << "Adding to root " << i << " with " << op->ToString() << "\n"; + elements.push_back(op); + } + HloInstruction* tuple = + computation->AddInstruction(HloInstruction::CreateTuple(elements)); + computation->set_root_instruction(tuple, true); + VLOG(2) << "computation is :" << computation->ToString() << "\n"; + // Remove hoisted instructions from the branches. + for (auto b2 : to_move_out) { + VLOG(2) << "Removing boundary:" << b2.ToString() << "\n"; + TF_RETURN_IF_ERROR(computation->RemoveInstruction(b2.Operands()[i])); + } } + // Change conditional instruction shape to the shape of the new root. + HloInstruction* new_root = + conditional->branch_computation(0)->root_instruction(); + *conditional->mutable_shape() = new_root->shape(); + // + VLOG(2) << "done moving instructions out of branches\n" + << conditional_parent->ToString(HloPrintOptions::Fingerprint()) + << "\n"; return true; } -} // namespace +// Group single chains of operands or uses of boundaries into new boundaries +class GroupConnectedBoundaries { + private: + std::unordered_set visited_; + std::vector connected_boundaries_, new_boundaries_; + HloInstruction* conditional_; + bool is_layout_sensitive_; -StatusOr ConditionalCodeMotion::Run(HloModule* module) { - bool changed = false; - - if (pursue_full_conditional_code_motion_) { - std::vector conditional_ops; - for (auto* comp : module->MakeComputationPostOrder()) { - for (auto* instr : comp->MakeInstructionPostOrder()) { - if (instr->opcode() == HloOpcode::kConditional) { - conditional_ops.push_back(instr); + public: + explicit GroupConnectedBoundaries(HloInstruction* conditional, + bool is_layout_sensitive) + : conditional_(conditional), is_layout_sensitive_(is_layout_sensitive) {} + // Returns true if `instruction` is worth hoisting out. + bool WorthHoisting(HloInstruction* instruction) { + switch (instruction->opcode()) { + case HloOpcode::kConvert: + // If Convert is after AllReduce, it is worth moving out AllReduce out + // of conditional for AR/CRS combine. If Convert is after other ops such + // as Dot or Convolutional, it is better to keep convert within + // conditional so that convert can be fused with Dot or Convolutional. + // + // TODO(b/154283721): figure out the scenario when convert can be fused + // with AllReduce out of conditional. + switch (instruction->operand(0)->opcode()) { + case HloOpcode::kAllReduce: + case HloOpcode::kReshape: + return true; + default: + VLOG(1) << "Instruction is convert and its operand is not know to " + "be worth hoisting\n"; + return false; } + case HloOpcode::kAllReduce: + case HloOpcode::kAdd: + case HloOpcode::kConstant: + case HloOpcode::kSubtract: + case HloOpcode::kMultiply: + case HloOpcode::kDivide: + case HloOpcode::kTuple: + case HloOpcode::kSqrt: + case HloOpcode::kReshape: + case HloOpcode::kGetTupleElement: + return true; + default: + VLOG(1) << "Instruction is not known to be worth hoisting\n"; + return false; + } + } + // Calculates the degree of reuses carried by a pair of conditional + // boundaries, if b1 is inside a conditional and b2 is outside. + int64 ReusesBeforeBoundary(HloInstruction* user) { + int64 reuses = 0; + for (auto op : user->operands()) { + // Only consider single-user cases as reuseable. + if (ContainsKey(visited_, op) && op->user_count() == 1) { + reuses += ReusesCarriedBy(op, user); } } + VLOG(1) << "cost to be paied after moving out" << user->ToString() << ":" + << reuses << "\n"; + return reuses; + } - for (HloInstruction* conditional_op : conditional_ops) { - TF_ASSIGN_OR_RETURN( - bool result, - MergeIdenticalElements(conditional_op, is_layout_sensitive_)); - changed |= result; + int64 ReusesAfterBoundary(HloInstruction* user) { + CHECK(user != nullptr); + auto all_users = user->users(); + // For now, assume that if an instruction has multiple-consumers, it will + // not be reused (the reuse currently requires duplication in fusion and so + // is expensive). + if (all_users.size() > 1) { + return 0; } + if (!all_users.empty()) { + auto op = all_users[0]; + int64 reuses = 0; + // Only count reuses that run through the conditional root. + if (op == conditional_->branch_computation(0)->root_instruction()) { + int64 index = op->operand_index(user); + for (auto op2 : conditional_->users()) { + CHECK(op2->opcode() == HloOpcode::kGetTupleElement); + auto tuple_opd = static_cast(op2); + if (index == tuple_opd->tuple_index()) { + all_users = op2->users(); + if (!all_users.empty()) { + reuses += ReusesCarriedBy(user, all_users[0]); + break; + } + } + } + } + VLOG(1) << "reuses to be gained after moving " << user->ToString() << ":" + << reuses << "\n"; + return reuses; + } + return 0; + } - if (changed) { - HloPassPipeline subpipeline("after_conditional_code_motion"); - subpipeline.AddPass(); - subpipeline.AddPass(); - subpipeline.AddPass(); - TF_ASSIGN_OR_RETURN(bool cleanup_changed, subpipeline.Run(module)); - changed |= cleanup_changed; + int64 BenefitForMovingBoundaries(const std::vector& boundaries) { + int64 reuses_before = 0, reuses_after = 0; + for (Boundary b : boundaries) { + auto op = b.Operands()[0]; + if (op == conditional_->branch_computation(0)->root_instruction()) { + continue; + } + reuses_before += ReusesBeforeBoundary(op); + VLOG(1) << "Cost of moving so far: " << reuses_before << "\n"; + reuses_after += ReusesAfterBoundary(op); + VLOG(1) << "Benefit from moving so far : " << reuses_after << "\n"; + } + if (reuses_after == 0 && reuses_before == 0) { + return -1; + } else if (boundaries[0].IsInsideBranch()) { + return reuses_after - reuses_before; + } else { + return reuses_before - reuses_after; } } + Boundary GetNextBoundary(const Boundary& b, int64 op_index) { + Boundary b2(b.GetPosition()); + CHECK(b.Operands().size() == conditional_->branch_count()); + for (int j = 0; j < b.Operands().size(); ++j) { + HloInstruction* inst = b.Operands()[j]; + CHECK(inst != nullptr); + HloInstruction* op = (b.IsInsideBranch()) ? inst->operands()[op_index] + : inst->users()[op_index]; + CHECK(op != nullptr); + b2.Operands().push_back(op); + } + return b2; + } + void AddBoundaries(const Boundary& boundary) { + BoundaryVisitor visitor; + visitor.AddToWorkList(boundary); + while (visitor.HasNextBoundary()) { + Boundary b = visitor.PopNextBoundary(); + // if b is already visited, it must have multiple users and is already in + // new boundaries. Skip it. + if (ContainsKey(visited_, b.Operands()[0])) { + continue; + } + VLOG(1) << "visiting boundary " << b.ToString() << "\n"; + if ((b.Operands().size() == 1 || + InstructionWithinBranchIdentical(b.Operands(), + is_layout_sensitive_)) && + WorthHoisting(b.Operands()[0])) { + connected_boundaries_.push_back(b); + VLOG(1) << "boundary can be moved\n"; + int64 operand_count = (b.IsInsideBranch()) + ? b.Operands()[0]->operand_count() + : b.Operands()[0]->users().size(); + for (int i = 0; i < operand_count; i++) { + Boundary b2 = GetNextBoundary(b, i); + int64 b2_count = (b2.IsInsideBranch()) + ? b2.Operands()[0]->user_count() + : b2.Operands()[0]->operand_count(); + // only consider adding an exclusive producor into the same group. + if (b2_count == 1) { + VLOG(2) << "Add operand " << i << " to visit later\n"; + visitor.AddToWorkList(b2); + } else { + VLOG(2) << "Operand " << i << " has multiple uses\n"; + if (!ContainsKey(visited_, b2.Operands()[0])) { + visited_.insert(b2.Operands()[0]); + new_boundaries_.push_back(b2); + } + } + } + } else { + VLOG(1) << "boundary cannot be moved\n"; + visited_.insert(b.Operands()[0]); + new_boundaries_.push_back(b); + } + } + } + std::vector BoundariesToMoveOut(const Boundary& b) { + HloInstruction* inst = b.Operands()[0]; + if (inst->opcode() == HloOpcode::kConditional) { + int branch_count = inst->branch_count(); + // Visit instructions from the root instruction to the operands using BFS. + Boundary boundary_in(Boundary::Position::kInsideBranch); + for (int i = 0; i < branch_count; i++) { + HloComputation* branch_computation = inst->branch_computation(i); + HloInstruction* root_inst = branch_computation->root_instruction(); + CHECK(root_inst != nullptr); + boundary_in.Operands().push_back(root_inst); + } + AddBoundaries(boundary_in); + } + return connected_boundaries_; + } + std::vector BoundariesToMoveIn(const Boundary& b) { + if (b.IsInsideBranch()) { + return std::vector(); + } + AddBoundaries(b); + return connected_boundaries_; + } + std::vector GetNewBoundaries() { return new_boundaries_; } +}; + +ConditionalCodeMotion::Decision ConditionalCodeMotion::ConsiderCodeMotion( + HloInstruction* conditional, const Boundary& cur_boundary, + std::vector& to_move, std::vector& new_boundaries) { + GroupConnectedBoundaries connect(conditional, is_layout_sensitive_); + auto move_out = connect.BoundariesToMoveOut(cur_boundary); + if (!move_out.empty()) { + std::vector next_boundaries = connect.GetNewBoundaries(); + auto benefit = connect.BenefitForMovingBoundaries(move_out); + VLOG(1) << "benefit of moving " << cur_boundary.Operands()[0]->ToString() + << ":" << benefit << "\n"; + if (benefit >= 0) { + new_boundaries = next_boundaries; + to_move = move_out; + return Decision::kMoveOutOfBranch; + } + } + return ConditionalCodeMotion::Decision::kNoChange; +} + +StatusOr ConditionalCodeMotion::Run(HloModule* module) { + // Gather all the conditional ops in the module ahead of time, to avoid + // potential complications of modifying the code that affecting traversal. + std::vector conditional_ops; + for (auto* comp : module->MakeComputationPostOrder()) { + for (auto* instr : comp->MakeInstructionPostOrder()) { + if (instr->opcode() == HloOpcode::kConditional) { + conditional_ops.push_back(instr); + } + } + } + + bool changed = false; + std::vector to_move_out, to_move_in, new_boundaries; + for (HloInstruction* conditional : conditional_ops) { + BoundaryVisitor visitor(conditional); + VLOG(2) << "Analyzing conditional:" << conditional->ToString() << "\n"; + // Boundariess to move out of and to move into the branches. + while (visitor.HasNextBoundary()) { + std::vector to_move, next_boundary; + Boundary boundary = visitor.PopNextBoundary(); + VLOG(2) << "Analyzing boundary:" << boundary.ToString() << "\n"; + ConditionalCodeMotion::Decision d = + ConsiderCodeMotion(conditional, boundary, to_move, next_boundary); + switch (d) { + case Decision::kMoveOutOfBranch: + VLOG(2) << "Decision is move out of branch\n"; + to_move_out.insert(to_move_out.end(), to_move.begin(), to_move.end()); + break; + case Decision::kMoveIntoBranch: + VLOG(2) << "Decision is move into branch\n"; + to_move_in.insert(to_move_in.end(), to_move.begin(), to_move.end()); + break; + case Decision::kNoChange: + VLOG(2) << "Decision is no change\n"; + new_boundaries.push_back(boundary); + break; + } + for (const Boundary& b : next_boundary) { + visitor.AddToWorkList(b); + } + } + TF_ASSIGN_OR_RETURN( + bool result, + MoveInstructionOut(conditional, to_move_out, new_boundaries)); + VLOG(2) << "moving out result:" << result << "\n"; + changed |= result; + } // handling convert rematerialization/hoisting - { + if (!changed && pursue_full_conditional_code_motion_) { std::vector conditional_ops; for (auto* comp : module->MakeComputationPostOrder()) { for (auto* instr : comp->MakeInstructionPostOrder()) { @@ -711,7 +762,6 @@ StatusOr ConditionalCodeMotion::Run(HloModule* module) { changed |= convert_result; } } - if (changed) { HloPassPipeline subpipeline( "after_conditional_code_motion_after_convert_hoisting"); @@ -721,8 +771,8 @@ StatusOr ConditionalCodeMotion::Run(HloModule* module) { TF_ASSIGN_OR_RETURN(bool cleanup_changed, subpipeline.Run(module)); changed |= cleanup_changed; } - return changed; } +} // namespace conditional_opt } // namespace xla diff --git a/tensorflow/compiler/xla/service/conditional_code_motion.h b/tensorflow/compiler/xla/service/conditional_code_motion.h index 95f02833e15..d7295058467 100644 --- a/tensorflow/compiler/xla/service/conditional_code_motion.h +++ b/tensorflow/compiler/xla/service/conditional_code_motion.h @@ -23,35 +23,80 @@ limitations under the License. namespace xla { -// ConditionalCodeMotion specializes in hoisting/rematerializing -// unconditional converts in the default mode. -// When pursue_full_conditional_code_motion_ is set to true, the -// full HLO pass moves identical ops out of a conditional in addition to moving -// converts. +namespace conditional_opt { +// At the conceptural level, a boundary can be thought of as representing a +// single virtual operation, except this virtual operation is conditionally +// instantiated into different concrete operations at each conditional branch. +// So a boundary is mapped to a single concrete operation if it is outside of +// conditional branches, and is mapped to a list of instructions if inside the +// branches. This data structure therefore allows a common data structure +// representation of the instructions to be moved, whether they are inside or +// outside of the branches. Subsequently, it allows a common implementation +// basis to be used for both moving instructions out of and for moving them +// inside branches. +class Boundary { + public: + enum class Position { kInsideBranch, kOutsideBranch }; + explicit Boundary(Position p) : position_(p) {} + std::vector& Operands() { return operands_; } + const std::vector& Operands() const { return operands_; } + bool IsInsideBranch() const { return position_ == Position::kInsideBranch; } + bool IsOutsideBranch() const { return position_ == Position::kOutsideBranch; } + Position GetPosition() const { return position_; } + bool IsEmpty() const { return operands_.empty(); } + std::string ToString() const { + std::string res; + for (HloInstruction* op : operands_) { + res += op->ToString() + ";"; + } + return res; + } + + private: + // Boundary instructions in the conditional branches, one from each branch + // of the conditional. + std::vector operands_; + Position position_; +}; + +// HLO pass that moves identical ops in/out of conditional. // - The definition of identical are the shape of the operands are identical // and their properties are identical. -// - Currently, only some types of instructions is supported. -// TODO(b/154283721): relax non-sharable operand constraint and avoid copies in -// the new root. // - Only the identical ops that won't share operands with other ops will // be moved out of conditional. class ConditionalCodeMotion : public HloModulePass { public: // If is_layout_sensitive is true, then the hoist process preserves layout // during identical comparison. Otherwise, layout is ignored. - explicit ConditionalCodeMotion( - bool is_layout_sensitive = true, - bool pursue_full_conditional_code_motion = false) + explicit ConditionalCodeMotion(bool is_layout_sensitive, + bool pursue_full_conditional_code_motion) : is_layout_sensitive_(is_layout_sensitive), pursue_full_conditional_code_motion_( pursue_full_conditional_code_motion) {} absl::string_view name() const override { return "conditional-code-motion"; } StatusOr Run(HloModule* module) override; + // Optimization decision for each boundary of the conditional instruction. + enum class Decision { kMoveOutOfBranch, kMoveIntoBranch, kNoChange }; + // If the optimization decision is NO_CHANGE, new_boundary is set to nullptr; + // otherwise, it is set to the new boundary after proposed optimization. + virtual Decision ConsiderCodeMotion(HloInstruction* conditional, + const Boundary& cur_boundary, + std::vector& to_move, + std::vector& new_boundaries); + private: const bool is_layout_sensitive_; const bool pursue_full_conditional_code_motion_; + + StatusOr MoveInstructionOut(HloInstruction* conditional, + std::vector& to_move_out, + std::vector& new_boundaries); + StatusOr MoveInstructionIn(HloInstruction* conditional, + std::vector& to_move_in, + std::vector& new_boundaries); }; +} // namespace conditional_opt } // namespace xla diff --git a/tensorflow/compiler/xla/service/conditional_code_motion_test.cc b/tensorflow/compiler/xla/service/conditional_code_motion_test.cc index 38b2b515fa0..b3c5e17094a 100644 --- a/tensorflow/compiler/xla/service/conditional_code_motion_test.cc +++ b/tensorflow/compiler/xla/service/conditional_code_motion_test.cc @@ -33,7 +33,7 @@ limitations under the License. #include "tensorflow/core/platform/types.h" namespace xla { -namespace { +namespace conditional_opt { using ConditionalCodeMotionTest = HloTestBase; namespace op = xla::testing::opcode_matchers; @@ -117,6 +117,47 @@ ENTRY main { EXPECT_THAT(root, AllOf(op::Tuple(op::Convert()))); } +TEST_F(ConditionalCodeMotionTest, MoveConvertOutConditional) { + absl::string_view hlo_string = + R"( +HloModule RemoveDotOpOut + +on_true { + %arg_tuple.1 = (f32[93184,4]{1,0}) parameter(0) + %get-tuple-element.1 = f32[93184,4]{1,0} get-tuple-element(%arg_tuple.1), index=0 + %reshape.8493 = f32[2,512,364]{2,1,0} reshape(f32[93184,4]{1,0} %get-tuple-element.1) + %add.8493 = f32[2,512,364]{2,1,0} add(f32[2,512,364]{2,1,0} %reshape.8493, f32[2,512,364]{2,1,0} %reshape.8493) + %convert.2894 = bf16[2,512,364]{2,1,0} convert(f32[2,512,364]{2,1,0} %add.8493) + ROOT %tuple.1 = ( bf16[2,512,364]{2,1,0}) tuple(%convert.2894) +} + +on_false { + %arg_tuple.2 = (f32[93184,4]{1,0}) parameter(0) + %get-tuple-element.3 = f32[93184,4]{1,0} get-tuple-element(%arg_tuple.2), index=0 + %reshape.9717 = f32[2,512,364]{2,1,0} reshape(f32[93184,4]{1,0} %get-tuple-element.3) + %add.8493 = f32[2,512,364]{2,1,0} add(f32[2,512,364]{2,1,0} %reshape.9717, f32[2,512,364]{2,1,0} %reshape.9717) + %sub.8493 = f32[2,512,364]{2,1,0} subtract(f32[2,512,364]{2,1,0} %add.8493, f32[2,512,364]{2,1,0} %reshape.9717) + %convert.3604 = bf16[2,512,364]{2,1,0} convert(f32[2,512,364]{2,1,0} %reshape.9717), metadata={op_type="Cast" op_name="gradients/Cast_125_grad/Cast"} + ROOT %tuple.2 = (bf16[2,512,364]{2,1,0}) tuple(%convert.3604) +} + +ENTRY main { + pred.1 = pred[] parameter(0) + arg_tuple.11 = (f32[93184,4]{1,0}) parameter(1) + arg_tuple.22 = (f32[93184,4]{1,0}) parameter(2) + conditional = (bf16[2,512,364]{2,1,0}) conditional(pred.1, arg_tuple.11, arg_tuple.22), true_computation=on_true, false_computation=on_false + get-first-index = bf16[2,512,364]{2,1,0} get-tuple-element(conditional), index=0 + ROOT result = (bf16[2,512,364]{2,1,0}) tuple(get-first-index) +} +)"; + auto module = ParseAndReturnVerifiedModule(hlo_string).ValueOrDie(); + ConditionalCodeMotion pass(true, true); + ASSERT_TRUE(pass.Run(&*module).ValueOrDie()); + + HloInstruction* root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, AllOf(op::Tuple(op::Convert()))); +} + TEST_F(ConditionalCodeMotionTest, MoveConvertOut) { absl::string_view hlo_string = R"( @@ -152,8 +193,20 @@ ENTRY main { ConditionalCodeMotion pass(true, true); ASSERT_TRUE(pass.Run(&*module).ValueOrDie()); + const HloInstruction* conditional = + FindInstruction(module.get(), "conditional"); + const HloComputation* on_true = conditional->branch_computation(0); + ASSERT_EQ(on_true->instruction_count(), 2); + const HloComputation* on_false = conditional->branch_computation(1); + ASSERT_EQ(on_false->instruction_count(), 2); + HloInstruction* root = module->entry_computation()->root_instruction(); - EXPECT_THAT(root, AllOf(op::Tuple(op::Add(op::Convert(), op::Convert())))); + EXPECT_THAT( + root, + AllOf(op::Tuple(op::Add(op::Convert(op::Reshape(op::GetTupleElement( + op::GetTupleElement(op::Conditional())))), + op::Convert(op::Reshape(op::GetTupleElement( + op::GetTupleElement(op::Conditional())))))))); } TEST_F(ConditionalCodeMotionTest, UserShareOperandCannotBeMoved) { @@ -173,7 +226,7 @@ on_true { add.2 = f32[] add(add.1, constant.2) add.3 = f32[] add(add.1, constant.3) add.4 = f32[] add(add.3, constant.5) - multiply.1 = f32[] multiply(add.2, constant.4) + multiply.1 = f32[] multiply(add.4, constant.4) ROOT tuple.6 = (f32[], f32[]) tuple(multiply.1, add.4) } @@ -216,13 +269,11 @@ ENTRY main { const HloComputation* on_false = conditional->branch_computation(1); ASSERT_EQ(on_false->instruction_count(), 9); - // Check only one add and multiply is moved out. HloInstruction* root = module->entry_computation()->root_instruction(); EXPECT_THAT( - root, - AllOf(op::Tuple( - op::Multiply(op::GetTupleElement(op::Conditional()), op::Constant()), - op::Add(op::GetTupleElement(op::Conditional()), op::Constant())))); + root, AllOf(op::Tuple(op::Multiply(op::GetTupleElement(op::Conditional()), + op::Constant()), + op::GetTupleElement(op::Conditional())))); } TEST_F(ConditionalCodeMotionTest, ConditionalRootElementChanged) { @@ -269,16 +320,16 @@ ENTRY main { const HloInstruction* conditional = FindInstruction(module.get(), "conditional"); const HloComputation* on_true = conditional->branch_computation(0); - ASSERT_EQ(on_true->instruction_count(), 7); + ASSERT_EQ(on_true->instruction_count(), 1); const HloComputation* on_false = conditional->branch_computation(1); - ASSERT_EQ(on_false->instruction_count(), 7); + ASSERT_EQ(on_false->instruction_count(), 1); - // add.3 in on_true will be moved out, add.1 and add.2 will be in condtional - // root. - ASSERT_TRUE(ShapeUtil::Compatible( - conditional->shape(), - ShapeUtil::MakeTupleShape( - {ShapeUtil::MakeShape(F32, {}), ShapeUtil::MakeShape(F32, {})}))); + HloInstruction* root = module->entry_computation()->root_instruction(); + EXPECT_THAT( + root, + AllOf(op::Tuple(op::Add( + op::Add(op::GetTupleElement(op::Conditional()), op::Constant()), + op::Add(op::GetTupleElement(op::Conditional()), op::Constant()))))); } TEST_F(ConditionalCodeMotionTest, ConditionalIsRootInstruction) { @@ -329,24 +380,9 @@ ENTRY main { )"; auto module = ParseAndReturnVerifiedModule(hlo_string).ValueOrDie(); ConditionalCodeMotion pass(true, true); - ASSERT_TRUE(pass.Run(&*module).ValueOrDie()); - - const HloInstruction* conditional = - FindInstruction(module.get(), "conditional"); - const HloComputation* on_true = conditional->branch_computation(0); - ASSERT_EQ(on_true->instruction_count(), 9); - const HloComputation* on_false = conditional->branch_computation(1); - ASSERT_EQ(on_false->instruction_count(), 9); - - // Check only one add and multiply is moved out. - // add.3 and add.5 can't be moved out because they share operands with - // other instructions. - HloInstruction* root = module->entry_computation()->root_instruction(); - EXPECT_THAT( - root, - AllOf(op::Tuple( - op::Multiply(op::GetTupleElement(op::Conditional()), op::Constant()), - op::Add(op::GetTupleElement(op::Conditional()), op::Constant())))); + // If there is no instruction after the conditional, there is no benefit to + // move + ASSERT_FALSE(pass.Run(&*module).ValueOrDie()); } TEST_F(ConditionalCodeMotionTest, LayoutMisMatchCannotMovedOut) { @@ -469,7 +505,8 @@ ENTRY main { false_computation=on_false get-first-index = f32[3,3,128,128] get-tuple-element(conditional), index=0 - ROOT result = (f32[3,3,128,128]) tuple(get-first-index) + add.1 = f32[3,3,128,128] add(f32[3,3,128,128] get-first-index, f32[3,3,128,128] get-first-index) + ROOT result = (f32[3,3,128,128]) tuple(add.1) } )"; auto module = ParseAndReturnVerifiedModule(hlo_string).ValueOrDie(); @@ -487,10 +524,14 @@ ENTRY main { conditional->shape(), ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape( BF16, {3, 3, 128, 128})}))); HloInstruction* root = module->entry_computation()->root_instruction(); - EXPECT_THAT(root, AllOf(op::Tuple(op::Convert(op::AllReduce( - op::GetTupleElement(op::Conditional())))))); + EXPECT_THAT( + root, + AllOf(op::Tuple(op::Add( + op::Convert(op::AllReduce(op::GetTupleElement(op::Conditional()))), + op::Convert( + op::AllReduce(op::GetTupleElement(op::Conditional()))))))); } -} // namespace +} // namespace conditional_opt } // namespace xla From 348ef9008fa609bc9d4755ac787b88751e04f70f Mon Sep 17 00:00:00 2001 From: Jeremy Lau Date: Mon, 13 Jul 2020 08:59:27 -0700 Subject: [PATCH 0267/2522] Do not create entries in DirectSession::executors_ for all key permutations. These extra entries can use a lot of memory. If a user keeps calling Run() while shuffling inputs or outputs, it seems better to stay on the slow path that calls sort() rather than potentially using an exponential amount of memory. PiperOrigin-RevId: 320965262 Change-Id: If660b92dbb2bbd584656bc3ad74777671e7c7f55 --- tensorflow/core/common_runtime/direct_session.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 8ad6ae19afc..a1bbcde94bd 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -1526,8 +1526,6 @@ Status DirectSession::GetOrCreateExecutors( auto it = executors_.find(sorted_key); if (it != executors_.end()) { *executors_and_keys = it->second.get(); - // Insert this under the original key. - executors_.emplace(key, it->second); return Status::OK(); } } From 091c9b61386f9a11d20c17b728eeb339bb4fc397 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Mon, 13 Jul 2020 09:23:14 -0700 Subject: [PATCH 0268/2522] CLKernel kernel_ and int3 work_group_size_ moved to base GPUOperation class. PiperOrigin-RevId: 320969620 Change-Id: I35091866ed787de465da21a1a94e7cdb05845df2 --- .../lite/delegates/gpu/cl/kernels/concat_xy.cc | 6 +----- .../lite/delegates/gpu/cl/kernels/concat_xy.h | 2 -- .../lite/delegates/gpu/cl/kernels/concat_z.cc | 7 +------ .../lite/delegates/gpu/cl/kernels/concat_z.h | 3 --- .../lite/delegates/gpu/cl/kernels/conv_3d.cc | 4 +--- .../lite/delegates/gpu/cl/kernels/conv_3d.h | 2 -- .../delegates/gpu/cl/kernels/conv_buffer_1x1.cc | 4 +--- .../delegates/gpu/cl/kernels/conv_buffer_1x1.h | 1 - .../delegates/gpu/cl/kernels/conv_constants.cc | 6 +----- .../delegates/gpu/cl/kernels/conv_constants.h | 3 --- .../delegates/gpu/cl/kernels/conv_powervr.cc | 4 +--- .../lite/delegates/gpu/cl/kernels/conv_powervr.h | 2 -- .../delegates/gpu/cl/kernels/conv_texture.cc | 16 +++++++--------- .../lite/delegates/gpu/cl/kernels/conv_texture.h | 3 --- .../gpu/cl/kernels/conv_weights_converter.cc | 6 +----- .../gpu/cl/kernels/conv_weights_converter.h | 6 +----- .../gpu/cl/kernels/convolution_transposed.cc | 6 +----- .../gpu/cl/kernels/convolution_transposed.h | 3 --- .../gpu/cl/kernels/convolution_transposed_3d.cc | 6 +----- .../gpu/cl/kernels/convolution_transposed_3d.h | 3 --- .../gpu/cl/kernels/convolution_transposed_3x3.cc | 7 ++----- .../gpu/cl/kernels/convolution_transposed_3x3.h | 3 --- .../kernels/convolution_transposed_3x3_thin.cc | 6 +----- .../cl/kernels/convolution_transposed_3x3_thin.h | 3 --- .../gpu/cl/kernels/convolution_transposed_4x4.cc | 7 ++----- .../gpu/cl/kernels/convolution_transposed_4x4.h | 3 --- .../cl/kernels/convolution_transposed_thin.cc | 6 +----- .../gpu/cl/kernels/convolution_transposed_thin.h | 3 --- .../delegates/gpu/cl/kernels/depthwise_conv.cc | 16 +++++++--------- .../delegates/gpu/cl/kernels/depthwise_conv.h | 3 --- .../gpu/cl/kernels/depthwise_conv_3x3.cc | 10 ++++------ .../gpu/cl/kernels/depthwise_conv_3x3.h | 3 --- .../delegates/gpu/cl/kernels/fully_connected.cc | 6 +----- .../delegates/gpu/cl/kernels/fully_connected.h | 3 --- .../delegates/gpu/cl/kernels/gpu_operation.cc | 10 +++++----- .../delegates/gpu/cl/kernels/gpu_operation.h | 15 ++------------- tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc | 7 +------ tensorflow/lite/delegates/gpu/cl/kernels/lstm.h | 3 --- .../delegates/gpu/cl/kernels/max_unpooling.cc | 6 +----- .../delegates/gpu/cl/kernels/max_unpooling.h | 3 --- tensorflow/lite/delegates/gpu/cl/kernels/mean.cc | 10 ++++------ tensorflow/lite/delegates/gpu/cl/kernels/mean.h | 5 ----- .../lite/delegates/gpu/cl/kernels/padding.cc | 7 +------ .../lite/delegates/gpu/cl/kernels/padding.h | 2 -- .../lite/delegates/gpu/cl/kernels/pooling.cc | 6 +----- .../lite/delegates/gpu/cl/kernels/pooling.h | 3 --- .../lite/delegates/gpu/cl/kernels/reshape.cc | 7 +------ .../lite/delegates/gpu/cl/kernels/reshape.h | 6 +----- .../lite/delegates/gpu/cl/kernels/reshapex4.cc | 6 +----- .../lite/delegates/gpu/cl/kernels/reshapex4.h | 5 +---- .../lite/delegates/gpu/cl/kernels/resize.cc | 14 ++------------ .../lite/delegates/gpu/cl/kernels/resize.h | 4 ---- .../lite/delegates/gpu/cl/kernels/softmax.cc | 7 +------ .../lite/delegates/gpu/cl/kernels/softmax.h | 2 -- .../lite/delegates/gpu/cl/kernels/softmax1x1.cc | 4 +--- .../lite/delegates/gpu/cl/kernels/softmax1x1.h | 3 --- .../delegates/gpu/cl/kernels/space_to_depth.cc | 7 +------ .../delegates/gpu/cl/kernels/space_to_depth.h | 4 +--- .../delegates/gpu/cl/kernels/strided_slice.cc | 11 ++++------- .../delegates/gpu/cl/kernels/strided_slice.h | 3 --- .../lite/delegates/gpu/cl/kernels/transpose.cc | 7 +------ .../lite/delegates/gpu/cl/kernels/transpose.h | 4 +--- .../lite/delegates/gpu/cl/kernels/winograd.cc | 14 ++------------ .../lite/delegates/gpu/cl/kernels/winograd.h | 14 ++++++-------- 64 files changed, 74 insertions(+), 297 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.cc b/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.cc index 5476cc22965..0a84d8a95b1 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.cc @@ -125,16 +125,12 @@ std::string GetConcatKernelCode(const OperationDef& op_def, ConcatXY::ConcatXY(ConcatXY&& operation) : GPUOperation(std::move(operation)), attr_(operation.attr_), - tensors_count_(operation.tensors_count_), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} + tensors_count_(operation.tensors_count_) {} ConcatXY& ConcatXY::operator=(ConcatXY&& operation) { if (this != &operation) { attr_ = operation.attr_; tensors_count_ = operation.tensors_count_; - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.h b/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.h index a170b593cf0..a82ffb22709 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.h @@ -48,8 +48,6 @@ class ConcatXY : public GPUOperation { ConcatAttributes attr_; int tensors_count_; - CLKernel kernel_; - int3 work_group_size_ = int3(8, 4, 1); }; ConcatXY CreateConcatXY(const OperationDef& definition, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.cc b/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.cc index 08c18907c78..93bc7b4a9dc 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.cc @@ -134,16 +134,11 @@ std::string GetConcatKernelCode(const OperationDef& op_def, } // namespace ConcatZ::ConcatZ(ConcatZ&& kernel) - : GPUOperation(std::move(kernel)), - channels_(std::move(kernel.channels_)), - kernel_(std::move(kernel.kernel_)), - work_group_size_(kernel.work_group_size_) {} + : GPUOperation(std::move(kernel)), channels_(std::move(kernel.channels_)) {} ConcatZ& ConcatZ::operator=(ConcatZ&& kernel) { if (this != &kernel) { channels_ = std::move(kernel.channels_); - kernel_ = std::move(kernel.kernel_); - std::swap(work_group_size_, kernel.work_group_size_); GPUOperation::operator=(std::move(kernel)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h b/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h index ec25f6e4ed9..6595432677c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h @@ -48,9 +48,6 @@ class ConcatZ : public GPUOperation { int3 GetGridSize() const; std::vector channels_; - - CLKernel kernel_; - int3 work_group_size_ = int3(8, 4, 1); }; ConcatZ CreateConcatZ(const OperationDef& definition, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc index 5e54faa378f..1d9eaef38c1 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc @@ -48,8 +48,7 @@ Conv3D::Conv3D(Conv3D&& operation) padding_(operation.padding_), kernel_size_(operation.kernel_size_), dilation_(operation.dilation_), - conv_params_(operation.conv_params_), - kernel_(std::move(operation.kernel_)) {} + conv_params_(operation.conv_params_) {} Conv3D& Conv3D::operator=(Conv3D&& operation) { if (this != &operation) { @@ -58,7 +57,6 @@ Conv3D& Conv3D::operator=(Conv3D&& operation) { std::swap(kernel_size_, operation.kernel_size_); std::swap(dilation_, operation.dilation_); std::swap(conv_params_, operation.conv_params_); - kernel_ = std::move(operation.kernel_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h index 7a00fabe6a0..501aa0578ed 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h @@ -113,8 +113,6 @@ class Conv3D : public GPUOperation { int3 kernel_size_; int3 dilation_; ConvParams conv_params_; - - CLKernel kernel_; }; template diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc index 94a81ce3fa5..6fab26ac5da 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc @@ -281,13 +281,11 @@ ConvBuffer1x1::ConvBuffer1x1(const OperationDef& definition, ConvBuffer1x1::ConvBuffer1x1(ConvBuffer1x1&& operation) : GPUOperation(std::move(operation)), - conv_params_(std::move(operation.conv_params_)), - kernel_(std::move(operation.kernel_)) {} + conv_params_(std::move(operation.conv_params_)) {} ConvBuffer1x1& ConvBuffer1x1::operator=(ConvBuffer1x1&& operation) { if (this != &operation) { std::swap(conv_params_, operation.conv_params_); - kernel_ = std::move(operation.kernel_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h index 9e3f9711682..1be023fae82 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h @@ -110,7 +110,6 @@ class ConvBuffer1x1 : public GPUOperation { int3 GetGridSize() const; ConvParams conv_params_; - CLKernel kernel_; }; template diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc index e6fc5da36a2..e2d0e821b5e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc @@ -201,9 +201,7 @@ ConvConstants::ConvConstants(ConvConstants&& kernel) padding_(kernel.padding_), dilation_(kernel.dilation_), src_channels_(kernel.src_channels_), - dst_channels_(kernel.dst_channels_), - kernel_(std::move(kernel.kernel_)), - work_group_size_(kernel.work_group_size_) {} + dst_channels_(kernel.dst_channels_) {} ConvConstants& ConvConstants::operator=(ConvConstants&& kernel) { if (this != &kernel) { @@ -213,8 +211,6 @@ ConvConstants& ConvConstants::operator=(ConvConstants&& kernel) { std::swap(dilation_, kernel.dilation_); std::swap(src_channels_, kernel.src_channels_); std::swap(dst_channels_, kernel.dst_channels_); - kernel_ = std::move(kernel.kernel_); - std::swap(work_group_size_, kernel.work_group_size_); GPUOperation::operator=(std::move(kernel)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h index b9cc52f7e94..f3f0025bf91 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h @@ -77,9 +77,6 @@ class ConvConstants : public GPUOperation { int2 dilation_; int src_channels_; int dst_channels_; - - CLKernel kernel_; - int3 work_group_size_ = int3(8, 4, 1); }; template diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc index a34fa909267..551f5f33ff8 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc @@ -167,15 +167,13 @@ ConvPowerVR::ConvPowerVR(ConvPowerVR&& operation) : GPUOperation(std::move(operation)), stride_padding_(operation.stride_padding_), kernel_dilation_(operation.kernel_dilation_), - conv_params_(operation.conv_params_), - kernel_(std::move(operation.kernel_)) {} + conv_params_(operation.conv_params_) {} ConvPowerVR& ConvPowerVR::operator=(ConvPowerVR&& operation) { if (this != &operation) { std::swap(stride_padding_, operation.stride_padding_); std::swap(kernel_dilation_, operation.kernel_dilation_); std::swap(conv_params_, operation.conv_params_); - kernel_ = std::move(operation.kernel_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h index cf82ff1e966..07bcf2c1f86 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h @@ -211,8 +211,6 @@ class ConvPowerVR : public GPUOperation { int4 stride_padding_; int4 kernel_dilation_; ConvParams conv_params_; - - CLKernel kernel_; }; template diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc index 12765b11fa5..d81c7e83b83 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc @@ -340,8 +340,9 @@ ConvTexture::ConvTexture(const OperationDef& definition, padding_(-attr.padding.prepended.w, -attr.padding.prepended.h), dilation_(attr.dilations.w, attr.dilations.h), different_weights_for_height_(false), - block_size_(2, 2, 2), - work_group_size_(4, 4, 2) {} + block_size_(2, 2, 2) { + work_group_size_ = int3(4, 4, 2); +} ConvTexture::ConvTexture(const OperationDef& definition) : GPUOperation(definition), @@ -350,8 +351,9 @@ ConvTexture::ConvTexture(const OperationDef& definition) padding_(0, 0), dilation_(1, 1), different_weights_for_height_(false), - block_size_(4, 1, 2), - work_group_size_(16, 1, 2) {} + block_size_(4, 1, 2) { + work_group_size_ = int3(16, 1, 2); +} ConvTexture::ConvTexture(ConvTexture&& operation) : GPUOperation(std::move(operation)), @@ -360,9 +362,7 @@ ConvTexture::ConvTexture(ConvTexture&& operation) padding_(operation.padding_), dilation_(operation.dilation_), different_weights_for_height_(operation.different_weights_for_height_), - block_size_(operation.block_size_), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} + block_size_(operation.block_size_) {} ConvTexture& ConvTexture::operator=(ConvTexture&& operation) { if (this != &operation) { @@ -373,8 +373,6 @@ ConvTexture& ConvTexture::operator=(ConvTexture&& operation) { std::swap(different_weights_for_height_, operation.different_weights_for_height_); std::swap(block_size_, operation.block_size_); - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h index 31c2a72021e..c21d5b1deaa 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h @@ -103,9 +103,6 @@ class ConvTexture : public GPUOperation { bool different_weights_for_height_; int3 block_size_ = int3(2, 2, 2); - - CLKernel kernel_; - int3 work_group_size_; }; template diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc index 18a6886dc89..063b20edd8a 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc @@ -98,16 +98,12 @@ std::string GetConverterToConvWeightsCode( ConverterToConvWeights::ConverterToConvWeights( ConverterToConvWeights&& operation) : GPUOperation(std::move(operation)), - conv_weights_desc_(operation.conv_weights_desc_), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} + conv_weights_desc_(operation.conv_weights_desc_) {} ConverterToConvWeights& ConverterToConvWeights::operator=( ConverterToConvWeights&& operation) { if (this != &operation) { conv_weights_desc_ = operation.conv_weights_desc_; - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h index d79cfb8e3e0..3bf17fac939 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h @@ -31,9 +31,7 @@ class ConverterToConvWeights : public GPUOperation { public: ConverterToConvWeights(const OperationDef& definition, const ConvWeightsDescription& conv_weights_desc) - : GPUOperation(definition), - conv_weights_desc_(conv_weights_desc), - work_group_size_(8, 4, 1) {} + : GPUOperation(definition), conv_weights_desc_(conv_weights_desc) {} absl::Status AddToQueue(CLCommandQueue* queue) override; absl::Status Tune(const TuningParameters& params) override; @@ -50,8 +48,6 @@ class ConverterToConvWeights : public GPUOperation { int3 GetGridSize() const; ConvWeightsDescription conv_weights_desc_; - CLKernel kernel_; - int3 work_group_size_; }; // We expect src BHWC tensor and we assume that B is O, H = H, W = W, C is I diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc index 6bf4d6a9aac..85456fc5140 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc @@ -320,9 +320,7 @@ ConvolutionTransposed::ConvolutionTransposed(ConvolutionTransposed&& operation) kernel_size_(operation.kernel_size_), stride_(operation.stride_), padding_(operation.padding_), - block_size_(operation.block_size_), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} + block_size_(operation.block_size_) {} ConvolutionTransposed& ConvolutionTransposed::operator=( ConvolutionTransposed&& operation) { @@ -332,8 +330,6 @@ ConvolutionTransposed& ConvolutionTransposed::operator=( std::swap(stride_, operation.stride_); std::swap(padding_, operation.padding_); std::swap(block_size_, operation.block_size_); - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h index 4f4b7100f77..cf70799f5d4 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h @@ -75,9 +75,6 @@ class ConvolutionTransposed : public GPUOperation { int2 padding_; int3 block_size_ = int3(1, 1, 1); - - CLKernel kernel_; - int3 work_group_size_ = int3(8, 4, 1); }; template diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc index 4e2f612f43f..53f24cb7a29 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc @@ -345,9 +345,7 @@ ConvolutionTransposed3D::ConvolutionTransposed3D( kernel_size_(operation.kernel_size_), stride_(operation.stride_), padding_(operation.padding_), - block_size_(operation.block_size_), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} + block_size_(operation.block_size_) {} ConvolutionTransposed3D& ConvolutionTransposed3D::operator=( ConvolutionTransposed3D&& operation) { @@ -357,8 +355,6 @@ ConvolutionTransposed3D& ConvolutionTransposed3D::operator=( std::swap(stride_, operation.stride_); std::swap(padding_, operation.padding_); std::swap(block_size_, operation.block_size_); - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h index 30e22e6e725..4b76e617e08 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h @@ -75,9 +75,6 @@ class ConvolutionTransposed3D : public GPUOperation { int3 padding_; int4 block_size_ = int4(1, 1, 1, 1); // WHDS - - CLKernel kernel_; - int3 work_group_size_ = int3(8, 4, 1); }; template diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc index 89eb75bfc68..0da4ca67a4f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc @@ -271,6 +271,7 @@ ConvolutionTransposed3x3::ConvolutionTransposed3x3( : GPUOperation(definition), padding_(padding), work_group_launch_order_(2, 0, 1) { + work_group_size_ = int3(8, 4, 1); if (device.IsPowerVR()) { weights_upload_type_ = WeightsUploadType::LOCAL_MEM_ASYNC; } else if (device.IsNvidia() || device.IsIntel()) { @@ -287,9 +288,7 @@ ConvolutionTransposed3x3::ConvolutionTransposed3x3( : GPUOperation(std::move(operation)), padding_(operation.padding_), work_group_launch_order_(operation.work_group_launch_order_), - weights_upload_type_(operation.weights_upload_type_), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} + weights_upload_type_(operation.weights_upload_type_) {} ConvolutionTransposed3x3& ConvolutionTransposed3x3::operator=( ConvolutionTransposed3x3&& operation) { @@ -297,8 +296,6 @@ ConvolutionTransposed3x3& ConvolutionTransposed3x3::operator=( std::swap(padding_, operation.padding_); std::swap(work_group_launch_order_, operation.work_group_launch_order_); std::swap(weights_upload_type_, operation.weights_upload_type_); - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h index b11c83dfd85..3792acd174e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h @@ -74,9 +74,6 @@ class ConvolutionTransposed3x3 : public GPUOperation { int2 padding_; int3 work_group_launch_order_; WeightsUploadType weights_upload_type_; - - CLKernel kernel_; - int3 work_group_size_ = int3(8, 4, 1); }; template diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc index 020a99852d7..934c7198fa8 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc @@ -177,17 +177,13 @@ ConvolutionTransposed3x3Thin::ConvolutionTransposed3x3Thin( ConvolutionTransposed3x3Thin&& operation) : GPUOperation(std::move(operation)), src_channels_(operation.src_channels_), - dst_channels_(operation.dst_channels_), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} + dst_channels_(operation.dst_channels_) {} ConvolutionTransposed3x3Thin& ConvolutionTransposed3x3Thin::operator=( ConvolutionTransposed3x3Thin&& operation) { if (this != &operation) { std::swap(src_channels_, operation.src_channels_); std::swap(dst_channels_, operation.dst_channels_); - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h index e292f416796..2e272835818 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h @@ -72,9 +72,6 @@ class ConvolutionTransposed3x3Thin : public GPUOperation { int src_channels_; int dst_channels_; - - CLKernel kernel_; - int3 work_group_size_ = int3(8, 4, 1); }; template diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc index 61882f29f15..6c81457cd8c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc @@ -267,6 +267,7 @@ std::string GenerateConvolutionTransposedCode( ConvolutionTransposed4x4::ConvolutionTransposed4x4( const OperationDef& definition, const CLDevice& device) : GPUOperation(definition) { + work_group_size_ = int3(8, 4, 1); if (device.IsPowerVR()) { weights_upload_type_ = WeightsUploadType::LOCAL_MEM_ASYNC; } else if (device.IsNvidia() || device.IsIntel()) { @@ -281,16 +282,12 @@ ConvolutionTransposed4x4::ConvolutionTransposed4x4( ConvolutionTransposed4x4::ConvolutionTransposed4x4( ConvolutionTransposed4x4&& operation) : GPUOperation(std::move(operation)), - weights_upload_type_(operation.weights_upload_type_), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} + weights_upload_type_(operation.weights_upload_type_) {} ConvolutionTransposed4x4& ConvolutionTransposed4x4::operator=( ConvolutionTransposed4x4&& operation) { if (this != &operation) { std::swap(weights_upload_type_, operation.weights_upload_type_); - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h index b7d52a8cf5a..1cf3b836d24 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h @@ -72,9 +72,6 @@ class ConvolutionTransposed4x4 : public GPUOperation { int3 GetGridSize() const; WeightsUploadType weights_upload_type_; - - CLKernel kernel_; - int3 work_group_size_ = int3(8, 4, 1); }; template diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc index 9df9587663c..90b1a4c1da5 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc @@ -145,9 +145,7 @@ ConvolutionTransposedThin::ConvolutionTransposedThin( : GPUOperation(std::move(operation)), kernel_size_(operation.kernel_size_), src_channels_(operation.src_channels_), - dst_channels_(operation.dst_channels_), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} + dst_channels_(operation.dst_channels_) {} ConvolutionTransposedThin& ConvolutionTransposedThin::operator=( ConvolutionTransposedThin&& operation) { @@ -155,8 +153,6 @@ ConvolutionTransposedThin& ConvolutionTransposedThin::operator=( std::swap(kernel_size_, operation.kernel_size_); std::swap(src_channels_, operation.src_channels_); std::swap(dst_channels_, operation.dst_channels_); - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h index 9a994d61e70..bb06202739d 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h @@ -71,9 +71,6 @@ class ConvolutionTransposedThin : public GPUOperation { int2 kernel_size_; int src_channels_; int dst_channels_; - - CLKernel kernel_; - int3 work_group_size_ = int3(8, 4, 1); }; template diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc index de1a04befa8..82658d62f10 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc @@ -226,8 +226,9 @@ DepthwiseConvolution::DepthwiseConvolution( stride_(attr.strides.w, attr.strides.h, 0, 0), padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, 0, 0), dilation_(attr.dilations.w, attr.dilations.h, 0, 0), - channel_multiplier_(attr.weights.shape.o), - work_group_size_(8, 8, 1) {} + channel_multiplier_(attr.weights.shape.o) { + work_group_size_ = int3(8, 8, 1); +} DepthwiseConvolution::DepthwiseConvolution( const OperationDef& definition, @@ -240,8 +241,9 @@ DepthwiseConvolution::DepthwiseConvolution( padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, -attr.padding.prepended.d, 0), dilation_(attr.dilations.w, attr.dilations.h, attr.dilations.d, 0), - channel_multiplier_(attr.weights.shape.o), - work_group_size_(8, 8, 1) {} + channel_multiplier_(attr.weights.shape.o) { + work_group_size_ = int3(8, 8, 1); +} DepthwiseConvolution::DepthwiseConvolution(DepthwiseConvolution&& operation) : GPUOperation(std::move(operation)), @@ -250,9 +252,7 @@ DepthwiseConvolution::DepthwiseConvolution(DepthwiseConvolution&& operation) stride_(operation.stride_), padding_(operation.padding_), dilation_(operation.dilation_), - channel_multiplier_(operation.channel_multiplier_), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} + channel_multiplier_(operation.channel_multiplier_) {} DepthwiseConvolution& DepthwiseConvolution::operator=( DepthwiseConvolution&& operation) { @@ -263,8 +263,6 @@ DepthwiseConvolution& DepthwiseConvolution::operator=( std::swap(padding_, operation.padding_); std::swap(dilation_, operation.dilation_); std::swap(channel_multiplier_, operation.channel_multiplier_); - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h index 30cd3d06a5a..6433e8d0a3b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h @@ -91,9 +91,6 @@ class DepthwiseConvolution : public GPUOperation { int4 padding_; int4 dilation_; int channel_multiplier_; - - CLKernel kernel_; - int3 work_group_size_; }; template diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc index 309ce4a9d87..0494038e5b9 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc @@ -261,21 +261,19 @@ DepthwiseConv3x3::DepthwiseConv3x3(const OperationDef& definition, bool local_mem_uploads) : GPUOperation(definition), weights_are_buffer_(weights_are_buffer), - local_mem_uploads_(local_mem_uploads) {} + local_mem_uploads_(local_mem_uploads) { + work_group_size_ = int3(8, 4, 1); +} DepthwiseConv3x3::DepthwiseConv3x3(DepthwiseConv3x3&& operation) : GPUOperation(std::move(operation)), weights_are_buffer_(operation.weights_are_buffer_), - local_mem_uploads_(operation.local_mem_uploads_), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} + local_mem_uploads_(operation.local_mem_uploads_) {} DepthwiseConv3x3& DepthwiseConv3x3::operator=(DepthwiseConv3x3&& operation) { if (this != &operation) { std::swap(weights_are_buffer_, operation.weights_are_buffer_); std::swap(local_mem_uploads_, operation.local_mem_uploads_); - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h index 9cb2ac41c87..fd1dca4ca98 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h @@ -71,9 +71,6 @@ class DepthwiseConv3x3 : public GPUOperation { bool weights_are_buffer_; bool local_mem_uploads_; - - CLKernel kernel_; - int3 work_group_size_ = int3(8, 4, 1); }; template diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc index bc287ec2fee..1685d4f505f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc @@ -90,14 +90,10 @@ FullyConnected::FullyConnected(const OperationDef& definition) : GPUOperation(definition) {} FullyConnected::FullyConnected(FullyConnected&& kernel) - : GPUOperation(std::move(kernel)), - kernel_(std::move(kernel.kernel_)), - work_group_size_(kernel.work_group_size_) {} + : GPUOperation(std::move(kernel)) {} FullyConnected& FullyConnected::operator=(FullyConnected&& kernel) { if (this != &kernel) { - kernel_ = std::move(kernel.kernel_); - std::swap(work_group_size_, kernel.work_group_size_); GPUOperation::operator=(std::move(kernel)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h index 83490b281ab..2adff4fb685 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h @@ -60,9 +60,6 @@ class FullyConnected : public GPUOperation { template void RearrangeWeights(const tflite::gpu::Tensor& weights, absl::Span dst); - - CLKernel kernel_; - int3 work_group_size_ = int3(0, 0, 0); }; template diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc index 669e4478bdd..2310ee5fb98 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc @@ -123,6 +123,8 @@ GPUOperation::GPUOperation(GPUOperation&& operation) src_(std::move(operation.src_)), dst_(std::move(operation.dst_)), args_(std::move(operation.args_)), + kernel_(std::move(operation.kernel_)), + work_group_size_(operation.work_group_size_), linked_operations_(std::move(operation.linked_operations_)) {} GPUOperation& GPUOperation::operator=(GPUOperation&& operation) { @@ -131,6 +133,8 @@ GPUOperation& GPUOperation::operator=(GPUOperation&& operation) { src_ = std::move(operation.src_); dst_ = std::move(operation.dst_); args_ = std::move(operation.args_); + kernel_ = std::move(operation.kernel_); + std::swap(work_group_size_, operation.work_group_size_); linked_operations_ = std::move(operation.linked_operations_); } return *this; @@ -143,17 +147,13 @@ void GPUOperation::AddOperation(ElementwiseOperation* operation) { ElementwiseOperation::ElementwiseOperation(ElementwiseOperation&& operation) : GPUOperation(std::move(operation)), check_src_channels_size_(operation.check_src_channels_size_), - code_(std::move(operation.code_)), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} + code_(std::move(operation.code_)) {} ElementwiseOperation& ElementwiseOperation::operator=( ElementwiseOperation&& operation) { if (this != &operation) { check_src_channels_size_ = operation.check_src_channels_size_; code_ = std::move(operation.code_); - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h index 1e101ef2849..34d6d8c2141 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h @@ -42,17 +42,6 @@ struct CreationContext { ProgramCache* cache; }; -struct LinkingContext { - // variable(FLT4) name to apply subsequent transformations - std::string var_name; - // x coordinate name (as it appears in kernel) for variable - std::string x_coord; - // y coordinate name (as it appears in kernel) for variable - std::string y_coord; - // s coordinate name (as it appears in kernel) for variable - std::string s_coord; -}; - struct OperationDef { CalculationsPrecision precision; std::vector src_tensors; @@ -116,6 +105,8 @@ class GPUOperation { std::vector src_; std::vector dst_; Arguments args_; + CLKernel kernel_; + int3 work_group_size_ = int3(8, 4, 1); std::vector linked_operations_; }; @@ -160,8 +151,6 @@ class ElementwiseOperation : public GPUOperation { std::string code_; absl::Status BindArguments(); int3 GetGridSize() const; - CLKernel kernel_; - int3 work_group_size_ = int3(8, 4, 1); }; absl::Status MergeOperations( diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc index 4732d35e987..66d6b3d51cb 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc @@ -102,15 +102,10 @@ std::string GetLSTMCode(const OperationDef& op_def, const CLDevice& device, LSTM::LSTM(const OperationDef& definition) : GPUOperation(definition) {} -LSTM::LSTM(LSTM&& kernel) - : GPUOperation(std::move(kernel)), - kernel_(std::move(kernel.kernel_)), - work_group_size_(kernel.work_group_size_) {} +LSTM::LSTM(LSTM&& kernel) : GPUOperation(std::move(kernel)) {} LSTM& LSTM::operator=(LSTM&& kernel) { if (this != &kernel) { - kernel_ = std::move(kernel.kernel_); - std::swap(work_group_size_, kernel.work_group_size_); GPUOperation::operator=(std::move(kernel)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h index 27b072ed001..5310e19951d 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h @@ -41,9 +41,6 @@ class LSTM : public GPUOperation { private: absl::Status BindArguments(); int3 GetGridSize() const; - - CLKernel kernel_; - int3 work_group_size_ = int3(8, 4, 1); }; LSTM CreateLSTM(const OperationDef& definition); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc index dc16837102a..58ace7229ad 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc @@ -160,17 +160,13 @@ MaxUnpooling::MaxUnpooling(MaxUnpooling&& kernel) : GPUOperation(std::move(kernel)), stride_(kernel.stride_), padding_(kernel.padding_), - kernel_size_(kernel.kernel_size_), - kernel_(std::move(kernel.kernel_)), - work_group_size_(kernel.work_group_size_) {} + kernel_size_(kernel.kernel_size_) {} MaxUnpooling& MaxUnpooling::operator=(MaxUnpooling&& kernel) { if (this != &kernel) { std::swap(stride_, kernel.stride_); std::swap(padding_, kernel.padding_); std::swap(kernel_size_, kernel.kernel_size_); - kernel_ = std::move(kernel.kernel_); - std::swap(work_group_size_, kernel.work_group_size_); GPUOperation::operator=(std::move(kernel)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h index 24b8c4bbfe3..dae35e90604 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h @@ -49,9 +49,6 @@ class MaxUnpooling : public GPUOperation { int4 stride_; int4 padding_; int4 kernel_size_; - - CLKernel kernel_; - int3 work_group_size_ = int3(8, 4, 1); }; MaxUnpooling CreateMaxUnpooling(const OperationDef& definition, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc b/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc index 3f8fb5ee648..334181b98d5 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc @@ -93,21 +93,19 @@ std::string GetMeanKernelCode(const OperationDef& op_def, } } // namespace -Mean::Mean(Mean&& operation) - : GPUOperation(std::move(operation)), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} +Mean::Mean(Mean&& operation) : GPUOperation(std::move(operation)) {} Mean& Mean::operator=(Mean&& operation) { if (this != &operation) { - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; } absl::Status Mean::Compile(const CreationContext& creation_context) { + // must be: (x * y) % 4 = 0; + // must be: z = 1; + work_group_size_ = int3(16, 16, 1); if (creation_context.device->IsAdreno3xx()) { work_group_size_ = int3(16, 8, 1); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean.h b/tensorflow/lite/delegates/gpu/cl/kernels/mean.h index 4525551b5f2..028e0013ed4 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean.h @@ -43,11 +43,6 @@ class Mean : public GPUOperation { private: absl::Status BindArguments(); int3 GetGridSize() const; - CLKernel kernel_; - - // must be: (x * y) % 4 = 0; - // must be: z = 1; - int3 work_group_size_ = int3(16, 16, 1); }; Mean CreateMean(const OperationDef& definition); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc b/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc index 883067a2c2d..8576475462d 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc @@ -145,16 +145,11 @@ Padding::Padding(const OperationDef& definition, const PadAttributes& attr) : GPUOperation(definition), attributes_(attr) {} Padding::Padding(Padding&& kernel) - : GPUOperation(std::move(kernel)), - attributes_(kernel.attributes_), - kernel_(std::move(kernel.kernel_)), - work_group_size_(kernel.work_group_size_) {} + : GPUOperation(std::move(kernel)), attributes_(kernel.attributes_) {} Padding& Padding::operator=(Padding&& kernel) { if (this != &kernel) { std::swap(attributes_, kernel.attributes_); - kernel_ = std::move(kernel.kernel_); - std::swap(work_group_size_, kernel.work_group_size_); GPUOperation::operator=(std::move(kernel)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/padding.h b/tensorflow/lite/delegates/gpu/cl/kernels/padding.h index ddf9f9583be..d87a3a87be3 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/padding.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/padding.h @@ -44,8 +44,6 @@ class Padding : public GPUOperation { int3 GetGridSize() const; PadAttributes attributes_; - CLKernel kernel_; - int3 work_group_size_ = int3(8, 4, 1); }; Padding CreatePadding(const OperationDef& definition, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc index 922d484c57d..966c655b975 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc @@ -335,9 +335,7 @@ Pooling::Pooling(Pooling&& kernel) padding_(kernel.padding_), kernel_size_(kernel.kernel_size_), type_(kernel.type_), - output_indices_(kernel.output_indices_), - kernel_(std::move(kernel.kernel_)), - work_group_size_(kernel.work_group_size_) {} + output_indices_(kernel.output_indices_) {} Pooling& Pooling::operator=(Pooling&& kernel) { if (this != &kernel) { @@ -346,8 +344,6 @@ Pooling& Pooling::operator=(Pooling&& kernel) { std::swap(kernel_size_, kernel.kernel_size_); std::swap(type_, kernel.type_); std::swap(output_indices_, kernel.output_indices_); - kernel_ = std::move(kernel.kernel_); - std::swap(work_group_size_, kernel.work_group_size_); GPUOperation::operator=(std::move(kernel)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h index 20719c90ae3..67d290eccb3 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h @@ -52,9 +52,6 @@ class Pooling : public GPUOperation { PoolingType type_; bool output_indices_; - - CLKernel kernel_; - int3 work_group_size_ = int3(8, 4, 1); }; Pooling CreatePooling(const OperationDef& definition, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc b/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc index 5abfad60c1b..4cc5b1278f8 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc @@ -89,15 +89,10 @@ std::string GetReshapeCode(const OperationDef& op_def, Arguments* args) { } } // namespace -Reshape::Reshape(Reshape&& operation) - : GPUOperation(std::move(operation)), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} +Reshape::Reshape(Reshape&& operation) : GPUOperation(std::move(operation)) {} Reshape& Reshape::operator=(Reshape&& operation) { if (this != &operation) { - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshape.h b/tensorflow/lite/delegates/gpu/cl/kernels/reshape.h index e11c066ebd3..8d95bbc86bc 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/reshape.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshape.h @@ -27,8 +27,7 @@ namespace cl { class Reshape : public GPUOperation { public: - explicit Reshape(const OperationDef& definition) - : GPUOperation(definition), work_group_size_(8, 4, 1) {} + explicit Reshape(const OperationDef& definition) : GPUOperation(definition) {} absl::Status AddToQueue(CLCommandQueue* queue) override; absl::Status Tune(const TuningParameters& params) override; @@ -43,9 +42,6 @@ class Reshape : public GPUOperation { private: absl::Status BindArguments(); int3 GetGridSize() const; - - CLKernel kernel_; - int3 work_group_size_; }; Reshape CreateReshape(const OperationDef& definition); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc index 3edbe637aa2..e4c47b70a2c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc @@ -74,14 +74,10 @@ std::string GetReshapeCode(const OperationDef& op_def, Arguments* args) { } // namespace Reshapex4::Reshapex4(Reshapex4&& operation) - : GPUOperation(std::move(operation)), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} + : GPUOperation(std::move(operation)) {} Reshapex4& Reshapex4::operator=(Reshapex4&& operation) { if (this != &operation) { - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h index d61224a7367..f7c98ab63f6 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h @@ -29,7 +29,7 @@ namespace cl { class Reshapex4 : public GPUOperation { public: explicit Reshapex4(const OperationDef& definition) - : GPUOperation(definition), work_group_size_(8, 4, 1) {} + : GPUOperation(definition) {} absl::Status AddToQueue(CLCommandQueue* queue) override; absl::Status Tune(const TuningParameters& params) override; @@ -44,9 +44,6 @@ class Reshapex4 : public GPUOperation { private: absl::Status BindArguments(); int3 GetGridSize() const; - - CLKernel kernel_; - int3 work_group_size_; }; // More optimized, but require src_channels % 4 == 0 and dst_channels % 4 == 0 diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/resize.cc b/tensorflow/lite/delegates/gpu/cl/kernels/resize.cc index 6aa2d1d2570..a47fff96d85 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/resize.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/resize.cc @@ -192,16 +192,11 @@ std::string GetResize3DCode(const OperationDef& op_def, } // namespace Resize::Resize(Resize&& operation) - : GPUOperation(std::move(operation)), - attr_(operation.attr_), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} + : GPUOperation(std::move(operation)), attr_(operation.attr_) {} Resize& Resize::operator=(Resize&& operation) { if (this != &operation) { attr_ = operation.attr_; - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; @@ -259,16 +254,11 @@ Resize CreateResize(const OperationDef& definition, } Resize3D::Resize3D(Resize3D&& operation) - : GPUOperation(std::move(operation)), - attr_(operation.attr_), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} + : GPUOperation(std::move(operation)), attr_(operation.attr_) {} Resize3D& Resize3D::operator=(Resize3D&& operation) { if (this != &operation) { attr_ = operation.attr_; - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/resize.h b/tensorflow/lite/delegates/gpu/cl/kernels/resize.h index 04459e12ff9..10fb414214b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/resize.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/resize.h @@ -49,8 +49,6 @@ class Resize : public GPUOperation { int3 GetGridSize() const; Resize2DAttributes attr_; - CLKernel kernel_; - int3 work_group_size_ = int3(8, 4, 1); }; Resize CreateResize(const OperationDef& definition, @@ -80,8 +78,6 @@ class Resize3D : public GPUOperation { int3 GetGridSize() const; Resize3DAttributes attr_; - CLKernel kernel_; - int3 work_group_size_ = int3(8, 4, 1); }; Resize3D CreateResize3D(const OperationDef& definition, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc index fda7dbba6dd..ea8671bac68 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc @@ -66,15 +66,10 @@ std::string GetSoftmaxKernelCode( } } // namespace -Softmax::Softmax(Softmax&& kernel) - : GPUOperation(std::move(kernel)), - kernel_(std::move(kernel.kernel_)), - work_group_size_(kernel.work_group_size_) {} +Softmax::Softmax(Softmax&& kernel) : GPUOperation(std::move(kernel)) {} Softmax& Softmax::operator=(Softmax&& kernel) { if (this != &kernel) { - kernel_ = std::move(kernel.kernel_); - std::swap(work_group_size_, kernel.work_group_size_); GPUOperation::operator=(std::move(kernel)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h index 703a40a4e89..5f974ef7e6d 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h @@ -46,8 +46,6 @@ class Softmax : public GPUOperation { private: absl::Status BindArguments(); int3 GetGridSize() const; - CLKernel kernel_; - int3 work_group_size_ = int3(8, 4, 1); }; Softmax CreateSoftmax(const OperationDef& definition); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc index fcfe4a1810c..28ebd8a2b13 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc @@ -100,12 +100,10 @@ std::string GetSoftmaxKernelCode(const OperationDef& op_def, Arguments* args) { } } // namespace -Softmax1x1::Softmax1x1(Softmax1x1&& kernel) - : GPUOperation(std::move(kernel)), kernel_(std::move(kernel.kernel_)) {} +Softmax1x1::Softmax1x1(Softmax1x1&& kernel) : GPUOperation(std::move(kernel)) {} Softmax1x1& Softmax1x1::operator=(Softmax1x1&& kernel) { if (this != &kernel) { - kernel_ = std::move(kernel.kernel_); GPUOperation::operator=(std::move(kernel)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h index 0d28145ca03..d5ae037a695 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h @@ -41,9 +41,6 @@ class Softmax1x1 : public GPUOperation { Softmax1x1& operator=(const Softmax1x1&) = delete; friend Softmax1x1 CreateSoftmax1x1(); - - private: - CLKernel kernel_; }; Softmax1x1 CreateSoftmax1x1(const OperationDef& definition); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.cc b/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.cc index 439b7d0fc15..6b5cc9f484e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.cc @@ -79,16 +79,11 @@ std::string GetSpaceToDepthCode(const OperationDef& op_def, Arguments* args) { } // namespace SpaceToDepth::SpaceToDepth(SpaceToDepth&& operation) - : GPUOperation(std::move(operation)), - attr_(operation.attr_), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} + : GPUOperation(std::move(operation)), attr_(operation.attr_) {} SpaceToDepth& SpaceToDepth::operator=(SpaceToDepth&& operation) { if (this != &operation) { attr_ = operation.attr_; - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.h b/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.h index 9dd257a4c4d..62689200643 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.h @@ -29,7 +29,7 @@ namespace cl { class SpaceToDepth : public GPUOperation { public: SpaceToDepth(const OperationDef& op_def, const SpaceToDepthAttributes& attr) - : GPUOperation(op_def), attr_(attr), work_group_size_(8, 4, 1) {} + : GPUOperation(op_def), attr_(attr) {} absl::Status AddToQueue(CLCommandQueue* queue) override; absl::Status Tune(const TuningParameters& params) override; absl::Status Compile(const CreationContext& creation_context) override; @@ -44,8 +44,6 @@ class SpaceToDepth : public GPUOperation { int3 GetGridSize() const; SpaceToDepthAttributes attr_; - CLKernel kernel_; - int3 work_group_size_; }; SpaceToDepth CreateSpaceToDepth(const OperationDef& op_def, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc index d0c4e432f3a..904e7fc08ce 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc @@ -143,19 +143,16 @@ int4 GetOffset(const SliceAttributes& attr, int src_width, int src_height, StridedSlice::StridedSlice(const OperationDef& definition, const SliceAttributes& attr) - : GPUOperation(definition), attributes_(attr), work_group_size_(8, 4, 1) {} + : GPUOperation(definition), attributes_(attr) { + work_group_size_ = int3(8, 4, 1); +} StridedSlice::StridedSlice(StridedSlice&& operation) - : GPUOperation(std::move(operation)), - attributes_(operation.attributes_), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} + : GPUOperation(std::move(operation)), attributes_(operation.attributes_) {} StridedSlice& StridedSlice::operator=(StridedSlice&& operation) { if (this != &operation) { attributes_ = operation.attributes_; - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.h b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.h index ee6f18fdacb..3d88bd9e96b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.h @@ -43,9 +43,6 @@ class StridedSlice : public GPUOperation { int3 GetGridSize() const; SliceAttributes attributes_; - - CLKernel kernel_; - int3 work_group_size_; }; StridedSlice CreateStridedSlice(const OperationDef& definition, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc index cacfd52542d..bd5df56f6ad 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc @@ -104,16 +104,11 @@ std::string GetTransposeCode( } // namespace Transpose::Transpose(Transpose&& operation) - : GPUOperation(std::move(operation)), - attr_(operation.attr_), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} + : GPUOperation(std::move(operation)), attr_(operation.attr_) {} Transpose& Transpose::operator=(Transpose&& operation) { if (this != &operation) { attr_ = operation.attr_; - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h index 61038b1e0ca..2c32fc439d9 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h @@ -27,7 +27,7 @@ namespace cl { class Transpose : public GPUOperation { public: Transpose(const OperationDef& definition, const TransposeAttributes& attr) - : GPUOperation(definition), attr_(attr), work_group_size_(8, 4, 1) {} + : GPUOperation(definition), attr_(attr) {} absl::Status AddToQueue(CLCommandQueue* queue) override; absl::Status Tune(const TuningParameters& params) override; absl::Status Compile(const CreationContext& creation_context) override; @@ -43,8 +43,6 @@ class Transpose : public GPUOperation { int3 GetGridSize() const; TransposeAttributes attr_; - CLKernel kernel_; - int3 work_group_size_; }; Transpose CreateTranspose(const OperationDef& definition, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc index d38b72e61a6..a0f923861fa 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc @@ -171,7 +171,6 @@ std::string GetWinograd4x4To36Code( } c += " }\n"; } - const LinkingContext context{"r0", "DST_X", "DST_Y", "DST_Z"}; c += " {\n"; c += " FLT4 r0 = TO_FLT4(I0 + Bt[2] * I2 + Bt[4] * I4);\n"; c += " args.dst_tensor.Write(r0, DST_X, DST_Y, DST_Z);\n"; @@ -326,16 +325,11 @@ std::string GetWinograd36To4x4Code(const OperationDef& op_def, } // namespace Winograd4x4To36::Winograd4x4To36(Winograd4x4To36&& operation) - : GPUOperation(std::move(operation)), - padding_(operation.padding_), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} + : GPUOperation(std::move(operation)), padding_(operation.padding_) {} Winograd4x4To36& Winograd4x4To36::operator=(Winograd4x4To36&& operation) { if (this != &operation) { std::swap(padding_, operation.padding_); - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; @@ -447,14 +441,10 @@ absl::Status CreateWinograd4x4To36(const CreationContext& creation_context, } Winograd36To4x4::Winograd36To4x4(Winograd36To4x4&& operation) - : GPUOperation(std::move(operation)), - kernel_(std::move(operation.kernel_)), - work_group_size_(operation.work_group_size_) {} + : GPUOperation(std::move(operation)) {} Winograd36To4x4& Winograd36To4x4::operator=(Winograd36To4x4&& operation) { if (this != &operation) { - kernel_ = std::move(operation.kernel_); - std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h index 84ebd87042d..3f57342201b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h @@ -35,7 +35,9 @@ class Winograd4x4To36 : public GPUOperation { public: Winograd4x4To36() = default; Winograd4x4To36(const OperationDef& definition, const Padding2D& padding) - : GPUOperation(definition), padding_(padding) {} + : GPUOperation(definition), padding_(padding) { + work_group_size_ = int3(128, 1, 1); + } absl::Status AddToQueue(CLCommandQueue* queue) override; absl::Status Tune(const TuningParameters& params) override; absl::Status Compile(const CreationContext& creation_context) override; @@ -60,9 +62,6 @@ class Winograd4x4To36 : public GPUOperation { int3 GetGridSize() const; Padding2D padding_; - - CLKernel kernel_; - int3 work_group_size_ = int3(128, 1, 1); }; absl::Status CreateWinograd4x4To36(const CreationContext& creation_context, @@ -74,7 +73,9 @@ class Winograd36To4x4 : public GPUOperation { public: Winograd36To4x4() = default; explicit Winograd36To4x4(const OperationDef& definition) - : GPUOperation(definition) {} + : GPUOperation(definition) { + work_group_size_ = int3(128, 1, 1); + } absl::Status AddToQueue(CLCommandQueue* queue) override; absl::Status Tune(const TuningParameters& params) override; absl::Status Compile(const CreationContext& creation_context) override; @@ -98,9 +99,6 @@ class Winograd36To4x4 : public GPUOperation { absl::Status BindArguments(); int3 GetGridSize() const; - - CLKernel kernel_; - int3 work_group_size_ = int3(128, 1, 1); }; absl::Status CreateWinograd36To4x4( From cedf4f2c5baf34ea3aa60e5855615ca0720916df Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Mon, 13 Jul 2020 09:25:56 -0700 Subject: [PATCH 0269/2522] TPU Op library refactor PiperOrigin-RevId: 320970123 Change-Id: I045407f6fba099a97d50f4bbad6e6f6c5ad3b5f2 --- tensorflow/core/tpu/kernels/BUILD | 2 +- .../core/tpu/kernels/tpu_compile_op_common.cc | 49 +++++++++++++++---- .../core/tpu/kernels/tpu_compile_op_common.h | 22 ++++----- .../core/tpu/kernels/tpu_compile_op_support.h | 23 ++++++++- 4 files changed, 72 insertions(+), 24 deletions(-) diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 6fca7dda24a..745f54df5e1 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -158,7 +158,6 @@ cc_library( "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:xla_data_proto_cc", - "//tensorflow/compiler/xla/client:compile_only_client", "//tensorflow/compiler/xla/service:computation_layout", "//tensorflow/compiler/xla/service:dump", "//tensorflow/compiler/xla/service:hlo", @@ -168,6 +167,7 @@ cc_library( "//tensorflow/core/framework:protos_all_cc", "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", "//tensorflow/stream_executor/tpu:proto_helper", + "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", ], diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc index 5afa5c878be..bd332226bfd 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc @@ -163,12 +163,13 @@ Status TpuCompileOpKernelCommon::AssignReturnValueToCore( Status TpuCompileOpKernelCommon::BuildComputationArgumentDescriptions( const std::vector& arg_shapes, - const OpInputList& guaranteed_constants, const XlaCompiler& compiler, + const GuaranteedConsts& guaranteed_constants, const XlaCompiler& compiler, std::vector* args, std::vector* arg_core_mapping, std::vector>* per_core_arg_shapes) { // Builds a description of the computation's arguments. int constant_count = 0; + size_t guaranteed_constants_size = 0; for (int i = 0; i < metadata_.args_size(); ++i) { const tpu::TPUCompileMetadataProto::Arg& proto_arg = metadata_.args(i); args->push_back(XlaCompiler::Argument()); @@ -187,10 +188,26 @@ Status TpuCompileOpKernelCommon::BuildComputationArgumentDescriptions( break; case tpu::TPUCompileMetadataProto::Arg::GUARANTEED_CONSTANT: arg.kind = XlaCompiler::Argument::kConstant; - TF_RET_CHECK(constant_count < guaranteed_constants.size()) + guaranteed_constants_size = + guaranteed_constants.index() == 0 + ? std::get<0>(guaranteed_constants).size() + : std::get<1>(guaranteed_constants)->size(); + TF_RET_CHECK(constant_count < guaranteed_constants_size) << "More constant args in TPUCompileMetadataProto than constant " "tensors."; - arg.constant_value = guaranteed_constants[constant_count++]; + if (guaranteed_constants.index() == 0) { + // `guaranteed_constants` is of type `absl::Span`. + Tensor tensor; + CHECK(tensor.FromProto( + *std::get<0>(guaranteed_constants)[constant_count++])) + << "Failed to deserialize invalid `TensorProto` into `Tensor`."; + arg.constant_value = tensor; + } else { + // `guaranteed_constants` is of type `const OpInputList* const`. + arg.constant_value = + (*std::get<1>(guaranteed_constants))[constant_count++]; + } break; case tpu::TPUCompileMetadataProto::Arg::INVALID: default: @@ -213,7 +230,7 @@ Status TpuCompileOpKernelCommon::BuildComputationArgumentDescriptions( TF_RETURN_IF_ERROR(SetPerCoreArgShapes( proto_arg, i, &xla_arg_shape, arg_core_mapping, per_core_arg_shapes)); } - TF_RET_CHECK(constant_count == guaranteed_constants.size()) + TF_RET_CHECK(constant_count == guaranteed_constants_size) << "Not all of the constant tensors were consumed."; return Status::OK(); @@ -246,7 +263,7 @@ Status TpuCompileOpKernelCommon::CompileTFFunctionToHlo( const FunctionLibraryDefinition& flib_def, int graph_def_version, const XlaCompiler::ShapeRepresentationFn shape_representation_fn, const std::vector& arg_shapes, - const OpInputList& guaranteed_constants, const NameAttrList& function, + const GuaranteedConsts& guaranteed_constants, const NameAttrList& function, std::function populate_resource_manager_fn, xla::CompileOnlyClient* client, std::vector* arg_core_mapping, @@ -380,7 +397,8 @@ Status TpuCompileOpKernelCommon::CompileTFFunctionToHlo( return Status::OK(); } -/* static */ Status TpuCompileOpKernelCommon::ComputeArgumentShapes( +/* static */ +Status TpuCompileOpKernelCommon::ComputeArgumentShapes( const tpu::TPUCompileMetadataProto& metadata, const std::vector& dynamic_shapes, std::vector* arg_shapes) { @@ -573,10 +591,21 @@ Status TpuCompileOpKernelCommon::CompileLocallyAndFillHostCache( const OpInputList& guaranteed_constants, const TpuCompilationCacheKey& key, TpuProgramGroupInterface* tpu_program_group) { absl::Time start_time = absl::Now(); - Status compile_status = - Compile(*flib_runtime->GetFunctionLibraryDefinition(), - flib_runtime->graph_def_version(), mesh_state, dynamic_shapes, - guaranteed_constants, tpu_program_group); + std::vector arg_shapes; + TF_RETURN_IF_ERROR( + ComputeArgumentShapes(metadata_, dynamic_shapes, &arg_shapes)); + Status compile_status; + if (use_mlir_) { + compile_status = Compile(MlirToHloArgs{mlir_module_}, mesh_state->data(), + arg_shapes, tpu_program_group); + } else { + compile_status = + Compile(FunctionToHloArgs{&function_, + flib_runtime->GetFunctionLibraryDefinition(), + flib_runtime->graph_def_version(), + {&guaranteed_constants}}, + mesh_state->data(), arg_shapes, tpu_program_group); + } absl::Time end_time = absl::Now(); auto duration = end_time - start_time; diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_common.h b/tensorflow/core/tpu/kernels/tpu_compile_op_common.h index 43949c8e704..79be33f7233 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_common.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_common.h @@ -66,6 +66,14 @@ class TpuCompileOpKernelCommon { void Compute(OpKernelContext* ctx); + // Lowers Mlir or TF Function computation into HLO IR and using XLA compiler + // compiles into TPU programs ready for execution. + virtual Status Compile( + const std::variant& computation, + const XLA_TpuMeshState* mesh_state, + const std::vector& arg_shapes, + TpuProgramGroupInterface* tpu_program_group) = 0; + // Computes shapes for each argument. Uses both the static shape from the // metadata, and the dynamic shapes where the static shape is not // defined. There must be one dynamic_shape for each argument with a @@ -85,15 +93,6 @@ class TpuCompileOpKernelCommon { protected: Status ComputeInternal(OpKernelContext* ctx); - // Compile function that invokes the different helper functions to compile - // the given function. - virtual Status Compile(const FunctionLibraryDefinition& flib_def, - int graph_def_version, - const TpuMeshStateInterface* mesh_state, - const std::vector& dynamic_shapes, - const OpInputList& guaranteed_constants, - TpuProgramGroupInterface* tpu_program_group) = 0; - // Compile TPU program locally and populate the host compilation cache. Status CompileLocallyAndFillHostCache( FunctionLibraryRuntime* flib_runtime, @@ -154,7 +153,8 @@ class TpuCompileOpKernelCommon { const FunctionLibraryDefinition& flib_def, int graph_def_version, const XlaCompiler::ShapeRepresentationFn shape_representation_fn, const std::vector& arg_shapes, - const OpInputList& guaranteed_constants, const NameAttrList& function, + const GuaranteedConsts& guaranteed_constants, + const NameAttrList& function, std::function populate_resource_manager_fn, xla::CompileOnlyClient* client, std::vector* arg_core_mapping, @@ -177,7 +177,7 @@ class TpuCompileOpKernelCommon { // computation. Status BuildComputationArgumentDescriptions( const std::vector& arg_shapes, - const OpInputList& guaranteed_constants, const XlaCompiler& compiler, + const GuaranteedConsts& guaranteed_constants, const XlaCompiler& compiler, std::vector* args, std::vector* arg_core_mapping, std::vector>* per_core_arg_shapes); diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_support.h b/tensorflow/core/tpu/kernels/tpu_compile_op_support.h index 95c4131e6ed..881adbb057d 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_support.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_support.h @@ -18,8 +18,7 @@ limitations under the License. #include #include -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_key.h" -#include "tensorflow/core/tpu/kernels/tpu_compile.pb.h" +#include "absl/strings/string_view.h" #include "absl/types/optional.h" #include "absl/types/span.h" #include "tensorflow/cc/framework/ops.h" @@ -31,16 +30,36 @@ limitations under the License. #include "tensorflow/compiler/xla/shape_tree.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/protobuf/tpu/compile_metadata.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_compile.pb.h" namespace tensorflow { namespace tpu { namespace se = ::stream_executor; +// List of parameters for lowering Mlir to HLO IR. +struct MlirToHloArgs { + absl::string_view mlir_module; +}; + +// Variant of guaranteed constant tensors types. +using GuaranteedConsts = std::variant, + const OpInputList* const>; + +// List of parameters for lowering function library definition to HLO IR. +struct FunctionToHloArgs { + const NameAttrList* const function; + const FunctionLibraryDefinition* const flib_def; + int graph_def_version; + GuaranteedConsts guaranteed_constants; +}; + // Persistent cache for compiled TPU program and the related compiler metadata // intended for TPU inference. // TODO(henrytan): there is an opportunity to consolidate the interface with the From 8057a34b4b5ee96493467080918891dcb86fa5dd Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Mon, 13 Jul 2020 09:27:38 -0700 Subject: [PATCH 0270/2522] Fixed the memory leak in Resolve (on custom device). PiperOrigin-RevId: 320970410 Change-Id: I33a4127471aebec1193402db6f50da091c40b6a0 --- tensorflow/core/common_runtime/eager/core.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/common_runtime/eager/core.cc b/tensorflow/core/common_runtime/eager/core.cc index 0191527748b..28f25d643b6 100644 --- a/tensorflow/core/common_runtime/eager/core.cc +++ b/tensorflow/core/common_runtime/eager/core.cc @@ -45,7 +45,9 @@ AbstractTensorInterface* TensorHandle::Resolve(Status* status) { *status = custom_device->CopyTensorFromDevice( this, "/job:localhost/replica:0/task:0/device:CPU:0", ©); if (status->ok()) { - return copy->Resolve(status); + auto result = copy->Resolve(status); + copy->Unref(); + return result; } else { return nullptr; } From f1ee6a406c3ee63371c3ebb0072f8ea06614a8aa Mon Sep 17 00:00:00 2001 From: Robert David Date: Mon, 13 Jul 2020 09:36:06 -0700 Subject: [PATCH 0271/2522] Fix GetNumberOfRuntimeInputsForNode crashing on optional input tensors. Also use NumInputs/GetOptionalInputTensor from kernel_util.h instead of directly accessing TfLiteNode members. PiperOrigin-RevId: 320971976 Change-Id: Ieb7073dbfe644ad1f87289738ae6ff0d24e5ffad --- .../lite/delegates/gpu/common/model_builder_helper.cc | 8 +++++--- tensorflow/lite/kernels/kernel_util.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/common/model_builder_helper.cc b/tensorflow/lite/delegates/gpu/common/model_builder_helper.cc index 6ec910c8cee..453e33ec916 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder_helper.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder_helper.cc @@ -127,8 +127,10 @@ absl::Status PopulateQuantParams(const TfLiteTensor& tensor, int GetNumberOfRuntimeInputsForNode(const TfLiteContext* context, const TfLiteNode* tflite_node) { int number_of_runtime_inputs = 0; - for (int i = 0; i < tflite_node->inputs->size; i++) { - if (!IsConstantTensor(&context->tensors[tflite_node->inputs->data[i]])) { + for (int i = 0; i < NumInputs(tflite_node); i++) { + const TfLiteTensor* tensor = + GetOptionalInputTensor(context, tflite_node, i); + if (tensor != nullptr && !IsConstantTensor(tensor)) { number_of_runtime_inputs++; } } @@ -137,7 +139,7 @@ int GetNumberOfRuntimeInputsForNode(const TfLiteContext* context, int GetNumberOfConstInputsForNode(const TfLiteContext* context, const TfLiteNode* tflite_node) { - return tflite_node->inputs->size - + return NumInputs(tflite_node) - GetNumberOfRuntimeInputsForNode(context, tflite_node); } diff --git a/tensorflow/lite/kernels/kernel_util.h b/tensorflow/lite/kernels/kernel_util.h index 98418399561..4660631dded 100644 --- a/tensorflow/lite/kernels/kernel_util.h +++ b/tensorflow/lite/kernels/kernel_util.h @@ -72,7 +72,7 @@ inline int64_t NumElements(const TfLiteTensor* t) { return NumElements(t->dims); } -inline const TfLiteTensor* GetOptionalInputTensor(TfLiteContext* context, +inline const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context, const TfLiteNode* node, int index) { const bool use_tensor = index < node->inputs->size && From 78026d6a66f7f0fc80c69b1a2f8843616f4cd2a7 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Mon, 13 Jul 2020 09:47:50 -0700 Subject: [PATCH 0272/2522] Remove `scipy` dependency. We have added it temporarily to prevent a segfault due to ABI breakage in `scipy==1.4.1` but never removed it after fixing. Fixes #40884. Fixes 35709. Closes #40789. PiperOrigin-RevId: 320974190 Change-Id: I72531796db26ee6634930b004092da8703585a78 --- tensorflow/tools/pip_package/setup.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 0c476336781..b1337b9070c 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -69,8 +69,6 @@ REQUIRED_PACKAGES = [ 'wrapt >= 1.11.1', 'wheel >= 0.26', 'six >= 1.12.0', - # scipy < 1.4.1 causes segfaults due to pybind11 - 'scipy == 1.4.1', ] if sys.byteorder == 'little': From 909c073034e9b185149717c22b83d52dbd7396c3 Mon Sep 17 00:00:00 2001 From: Prakalp Srivastava Date: Mon, 13 Jul 2020 09:53:43 -0700 Subject: [PATCH 0273/2522] Return error instead of crashing when resource does not alias input. PiperOrigin-RevId: 320975373 Change-Id: If40b04f88bc189ae5a9ec04690b66cf8389b528f --- .../tensorflow/tests/resource_op_lifting.mlir | 29 +++++++++++++++++++ .../transforms/resource_op_lifting.cc | 6 ++-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir b/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir index 7c8e4382e2b..cd93e1423ea 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir @@ -600,6 +600,35 @@ func @if_else(%arg0: tensor<*x!tf.resource>>, %arg1: tensor<*x!tf. // ----- +// Tests that the pass reports error if output does not alias input. + +func @cluster_with_if(%arg0: tensor) -> tensor<4xf32> { + %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>> + %1 = "tf.VarHandleOp"() {container = "c", shared_name = "v2"} : () -> tensor<*x!tf.resource>> + %2 = "tf_device.cluster"() ( { + // expected-error @+1 {{unsupported output: resource does not alias input}} + %3 = "tf.If"(%arg0, %0, %1) {then_branch = @if_then, else_branch = @if_else, + is_stateless = false} + : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) + -> (tensor<*x!tf.resource>>) + %4 = "tf.ReadVariableOp"(%3) : (tensor<*x!tf.resource>>) -> tensor<4xf32> + tf_device.return %4 : tensor<4xf32> + }) {cluster_attr = "cluster_attr"} : () -> tensor<4xf32> + return %2 : tensor<4xf32> +} +func @if_then(%arg0: tensor<*x!tf.resource>>, %arg1: tensor<*x!tf.resource>>) + -> (tensor<*x!tf.resource>>) { + %0 = "tf.foo"(%arg0) : (tensor<*x!tf.resource>>) -> tensor<*x!tf.resource>> + return %0 : tensor<*x!tf.resource>> +} +func @if_else(%arg0: tensor<*x!tf.resource>>, %arg1: tensor<*x!tf.resource>>) + -> (tensor<*x!tf.resource>>) { + %0 = "tf.bar"(%arg0) : (tensor<*x!tf.resource>>) -> tensor<*x!tf.resource>> + return %0 : tensor<*x!tf.resource>> +} + +// ----- + // Tests that the pass lifts resources on two partitioned call ops sharing the // same callee. The lifting should clone the callee then modify the clone. diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc index 6a67f0bea0a..3e70526b9d3 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc @@ -687,10 +687,12 @@ LogicalResult HandleCaseOrIfOp(CaseOrIfOp op, ArrayRef branches) { auto retval = func.front().getTerminator()->getOperand(result_index); assert(result.getType() == retval.getType()); auto aliasing_arg = retval.dyn_cast(); + if (!aliasing_arg) + return op.emitOpError("unsupported output: ") + << "resource does not alias input"; if (common_aliasing_arg_num == kUnassigned) common_aliasing_arg_num = aliasing_arg.getArgNumber(); - if (!aliasing_arg || - aliasing_arg.getArgNumber() != common_aliasing_arg_num) + if (aliasing_arg.getArgNumber() != common_aliasing_arg_num) return op.emitOpError("unsupported output: ") << "resource does not alias a single input"; } From 12b62d11b6f77ba6caeece562b54cf6f986f58e5 Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Mon, 13 Jul 2020 10:17:04 -0700 Subject: [PATCH 0274/2522] Add `TpuCompilationRequestProto` and helper function to serialize Compile() arguments. PiperOrigin-RevId: 320980526 Change-Id: Iacdb521e36cd2245c3eac6580efb72135f6b5ae5 --- tensorflow/core/tpu/kernels/tpu_compile.proto | 41 +++++++++++++++ .../tpu/kernels/tpu_compile_op_support.cc | 50 +++++++++++++++++++ .../core/tpu/kernels/tpu_compile_op_support.h | 6 +++ 3 files changed, 97 insertions(+) diff --git a/tensorflow/core/tpu/kernels/tpu_compile.proto b/tensorflow/core/tpu/kernels/tpu_compile.proto index 5b70de67a05..03c0a402337 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile.proto +++ b/tensorflow/core/tpu/kernels/tpu_compile.proto @@ -19,6 +19,8 @@ package tensorflow.tpu; import "tensorflow/compiler/tf2xla/host_compute_metadata.proto"; import "tensorflow/compiler/xla/service/hlo.proto"; import "tensorflow/compiler/xla/xla_data.proto"; +import "tensorflow/core/framework/attr_value.proto"; +import "tensorflow/core/framework/function.proto"; import "tensorflow/core/framework/tensor.proto"; import "tensorflow/core/framework/tensor_shape.proto"; import "tensorflow/core/framework/types.proto"; @@ -142,3 +144,42 @@ message TpuAotCompilationRequestProto { // XLA compiler compilation result. XlaCompilationResultProto compilation_result = 7; } + +// TPU compilation request for compiling computations into XLA HLO IR and build +// TPU programs. +message TpuCompilationRequestProto { + // A flag reserved for using experimental version of the compilation. By + // default the value should be false. + bool use_experimental = 1; + + // Use mlir to lower computation(s) to Hlo. + bool use_mlir = 2; + + // If true, returns hlo metadatas. + bool return_hlo_protos = 3; + + // If true, unloads cache on session close. + bool unload_cache_on_session_close = 4; + + // Compilation metadata. + TPUCompileMetadataProto metadata = 5; + + // Computation argument shapes. + repeated TensorShapeProto arg_shapes = 6; + + // Input tensor that gives const guarantee to the TF runtime. + repeated TensorProto guaranteed_constants = 7; + + // MLIR module definition. + optional string mlir_module = 8; + + // A set of named functions used as the input to lowering to Hlo when mlir is + // not used. + optional FunctionDefLibrary fdef_lib = 9; + + // The version of the graph definition used to lower TF function to Hlo. + optional int32 graph_def_version = 10; + + // Function containing the computation to compile. + optional NameAttrList function = 11; +} diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc index 6102dd50ff2..a8a66718118 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc @@ -435,5 +435,55 @@ StatusOr CreateTpuAotCompilationRequest( VLOG(1) << "TpuAotCompilationRequest:\n" << aot_request.DebugString(); return aot_request; } + +StatusOr CreateTpuCompilationRequest( + const std::variant& computation, + const TPUCompileMetadataProto& metadata, + const std::vector& arg_shapes) { + VLOG(1) << "CreateTpuCompilationRequest."; + TpuCompilationRequestProto compilation_request; + bool use_mlir = computation.index() == 0; + compilation_request.set_use_mlir(use_mlir); + if (use_mlir) { + VLOG(1) << "Serializing MlirModule"; + const MlirToHloArgs& mlir_computation = std::get<0>(computation); + *compilation_request.mutable_mlir_module() = mlir_computation.mlir_module; + } else { + VLOG(1) << "Serializing FunctionDefinitionLibrary"; + const FunctionToHloArgs& function_computation = std::get<1>(computation); + *compilation_request.mutable_fdef_lib() = + function_computation.flib_def->ToProto(); + compilation_request.set_graph_def_version( + function_computation.graph_def_version); + *compilation_request.mutable_function() = *function_computation.function; + // TODO(b/160937500): serializing and copying large guaranteed_constants can + // be a perf hit. There is a future work to refactor the compilation layer + // to avoid passing guaranteed_constants over C_API. + if (function_computation.guaranteed_constants.index() == 0) { + absl::Span guaranteed_constants = + std::get<0>(function_computation.guaranteed_constants); + for (const TensorProto* constant : guaranteed_constants) { + *compilation_request.add_guaranteed_constants() = *constant; + } + } else { + CHECK_EQ(function_computation.guaranteed_constants.index(), 1); + const OpInputList& guaranteed_constants = + *std::get<1>(function_computation.guaranteed_constants); + for (const Tensor& constant : guaranteed_constants) { + constant.AsProtoTensorContent( + compilation_request.add_guaranteed_constants()); + } + } + } + + for (const TensorShape& shape : arg_shapes) { + shape.AsProto(compilation_request.add_arg_shapes()); + } + + *(compilation_request.mutable_metadata()) = metadata; + + VLOG(1) << "TpuCompilationRequest:\n" << compilation_request.DebugString(); + return compilation_request; +} } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_support.h b/tensorflow/core/tpu/kernels/tpu_compile_op_support.h index 881adbb057d..bf1aff46578 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_support.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_support.h @@ -147,6 +147,12 @@ CreateTpuAotCompilationRequest( const std::vector>>& per_core_variable_indices, const absl::optional& device_assignment); + +se::port::StatusOr CreateTpuCompilationRequest( + const std::variant& computation, + const TPUCompileMetadataProto& metadata, + const std::vector& arg_shapes); + } // namespace tpu } // namespace tensorflow From 751a7ed21dbe1bfcf55314b59cfd08f1a00b013c Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Mon, 13 Jul 2020 10:19:48 -0700 Subject: [PATCH 0275/2522] Store OpData from Eval() into a stored persistent buffer in the comparisons kernels. The reference kernel currently calculates quantized data every eval which is slow on device and does not match upcoming changes for RAM savings. PiperOrigin-RevId: 320981085 Change-Id: I0e178b58fbde83607620b23c9a59d185cf97ce78 --- tensorflow/lite/micro/kernels/comparisons.cc | 528 ++++++++++++++---- .../lite/micro/kernels/comparisons_test.cc | 7 +- 2 files changed, 410 insertions(+), 125 deletions(-) diff --git a/tensorflow/lite/micro/kernels/comparisons.cc b/tensorflow/lite/micro/kernels/comparisons.cc index 0b3b9814fa4..e63a1f602e9 100644 --- a/tensorflow/lite/micro/kernels/comparisons.cc +++ b/tensorflow/lite/micro/kernels/comparisons.cc @@ -25,103 +25,94 @@ namespace micro { namespace comparisons { namespace { +struct OpData { + ComparisonParams params; +}; + constexpr int kInputTensor1 = 0; constexpr int kInputTensor2 = 1; constexpr int kOutputTensor = 0; -// TODO(ruic): optimize macros below to using template functions. -#define TF_LITE_QUANTIZE_COMPARISON(opname) \ - template \ - void EvalQuantized##opname(TfLiteContext* context, TfLiteNode* node, \ - const TfLiteTensor* input1, \ - const TfLiteTensor* input2, TfLiteTensor* output, \ - bool requires_broadcast) { \ - if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) { \ - auto input1_offset = -input1->params.zero_point; \ - auto input2_offset = -input2->params.zero_point; \ - const int left_shift = 8; \ - \ - int32 input1_multiplier; \ - int input1_shift; \ - QuantizeMultiplierSmallerThanOneExp( \ - static_cast(input1->params.scale), &input1_multiplier, \ - &input1_shift); \ - int32 input2_multiplier; \ - int input2_shift; \ - QuantizeMultiplierSmallerThanOneExp( \ - static_cast(input2->params.scale), &input2_multiplier, \ - &input2_shift); \ - \ - ComparisonParams op_params; \ - op_params.left_shift = left_shift; \ - op_params.input1_offset = input1_offset; \ - op_params.input1_multiplier = input1_multiplier; \ - op_params.input1_shift = input1_shift; \ - op_params.input2_offset = input2_offset; \ - op_params.input2_multiplier = input2_multiplier; \ - op_params.input2_shift = input2_shift; \ - if (requires_broadcast) { \ - reference_ops::Broadcast4DSlow##opname##WithScaling( \ - op_params, GetTensorShape(input1), \ - GetTensorData(input1), GetTensorShape(input2), \ - GetTensorData(input2), GetTensorShape(output), \ - GetTensorData(output)); \ - } else { \ - reference_ops::opname##WithScaling( \ - op_params, GetTensorShape(input1), \ - GetTensorData(input1), GetTensorShape(input2), \ - GetTensorData(input2), GetTensorShape(output), \ - GetTensorData(output)); \ - } \ - } \ - } -TF_LITE_QUANTIZE_COMPARISON(Equal); -TF_LITE_QUANTIZE_COMPARISON(NotEqual); -TF_LITE_QUANTIZE_COMPARISON(Greater); -TF_LITE_QUANTIZE_COMPARISON(GreaterEqual); -TF_LITE_QUANTIZE_COMPARISON(Less); -TF_LITE_QUANTIZE_COMPARISON(LessEqual); -#undef TF_LITE_QUANTIZE_COMPARISON - -#define TF_LITE_COMPARISON(type, opname, requires_broadcast) \ - { \ - ComparisonParams op_params; \ - requires_broadcast \ - ? reference_ops::Broadcast4DSlow##opname##NoScaling( \ - op_params, GetTensorShape(input1), GetTensorData(input1), \ - GetTensorShape(input2), GetTensorData(input2), \ - GetTensorShape(output), GetTensorData(output)) \ - : reference_ops::opname##NoScaling( \ - op_params, GetTensorShape(input1), GetTensorData(input1), \ - GetTensorShape(input2), GetTensorData(input2), \ - GetTensorShape(output), GetTensorData(output)); \ - } - TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); + const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + RuntimeShape input1_shape = GetTensorShape(input1); + RuntimeShape input2_shape = GetTensorShape(input2); + RuntimeShape output_shape = GetTensorShape(output); + bool* output_data = GetTensorData(output); + bool requires_broadcast = !HaveSameShapes(input1, input2); switch (input1->type) { case kTfLiteBool: - TF_LITE_COMPARISON(bool, Equal, requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::EqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteFloat32: - TF_LITE_COMPARISON(float, Equal, requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::EqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteInt32: - TF_LITE_COMPARISON(int32_t, Equal, requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::EqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteInt64: - TF_LITE_COMPARISON(int64_t, Equal, requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::EqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteUInt8: - EvalQuantizedEqual(context, node, input1, input2, output, - requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowEqualWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::EqualWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteInt8: - EvalQuantizedEqual(context, node, input1, input2, output, - requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowEqualWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::EqualWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; default: TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", @@ -133,30 +124,85 @@ TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) { // TODO(renjieliu): Refactor the logic to avoid duplications. TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); + const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + RuntimeShape input1_shape = GetTensorShape(input1); + RuntimeShape input2_shape = GetTensorShape(input2); + RuntimeShape output_shape = GetTensorShape(output); + bool* output_data = GetTensorData(output); + bool requires_broadcast = !HaveSameShapes(input1, input2); switch (input1->type) { case kTfLiteBool: - TF_LITE_COMPARISON(bool, NotEqual, requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowNotEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::NotEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteFloat32: - TF_LITE_COMPARISON(float, NotEqual, requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowNotEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::NotEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteInt32: - TF_LITE_COMPARISON(int32_t, NotEqual, requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowNotEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::NotEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteInt64: - TF_LITE_COMPARISON(int64_t, NotEqual, requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowNotEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::NotEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteUInt8: - EvalQuantizedNotEqual(context, node, input1, input2, output, - requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowNotEqualWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::NotEqualWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteInt8: - EvalQuantizedNotEqual(context, node, input1, input2, output, - requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowNotEqualWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::NotEqualWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; default: TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", @@ -167,27 +213,74 @@ TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) { } TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); + const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + RuntimeShape input1_shape = GetTensorShape(input1); + RuntimeShape input2_shape = GetTensorShape(input2); + RuntimeShape output_shape = GetTensorShape(output); + bool* output_data = GetTensorData(output); + bool requires_broadcast = !HaveSameShapes(input1, input2); switch (input1->type) { case kTfLiteFloat32: - TF_LITE_COMPARISON(float, Greater, requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowGreaterNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::GreaterNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteInt32: - TF_LITE_COMPARISON(int32_t, Greater, requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowGreaterNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::GreaterNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteInt64: - TF_LITE_COMPARISON(int64_t, Greater, requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowGreaterNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::GreaterNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteUInt8: - EvalQuantizedGreater(context, node, input1, input2, output, - requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowGreaterWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::GreaterWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteInt8: - EvalQuantizedGreater(context, node, input1, input2, output, - requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowGreaterWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::GreaterWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; default: TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", @@ -198,27 +291,74 @@ TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) { } TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); + const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + RuntimeShape input1_shape = GetTensorShape(input1); + RuntimeShape input2_shape = GetTensorShape(input2); + RuntimeShape output_shape = GetTensorShape(output); + bool* output_data = GetTensorData(output); + bool requires_broadcast = !HaveSameShapes(input1, input2); switch (input1->type) { case kTfLiteFloat32: - TF_LITE_COMPARISON(float, GreaterEqual, requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowGreaterEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::GreaterEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteInt32: - TF_LITE_COMPARISON(int32_t, GreaterEqual, requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowGreaterEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::GreaterEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteInt64: - TF_LITE_COMPARISON(int64_t, GreaterEqual, requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowGreaterEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::GreaterEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteUInt8: - EvalQuantizedGreaterEqual(context, node, input1, input2, output, - requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowGreaterEqualWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::GreaterEqualWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteInt8: - EvalQuantizedGreaterEqual(context, node, input1, input2, output, - requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowGreaterEqualWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::GreaterEqualWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; default: TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", @@ -229,27 +369,74 @@ TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) { } TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); + const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + RuntimeShape input1_shape = GetTensorShape(input1); + RuntimeShape input2_shape = GetTensorShape(input2); + RuntimeShape output_shape = GetTensorShape(output); + bool* output_data = GetTensorData(output); + bool requires_broadcast = !HaveSameShapes(input1, input2); switch (input1->type) { case kTfLiteFloat32: - TF_LITE_COMPARISON(float, Less, requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowLessNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::LessNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteInt32: - TF_LITE_COMPARISON(int32_t, Less, requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowLessNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::LessNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteInt64: - TF_LITE_COMPARISON(int64_t, Less, requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowLessNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::LessNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteUInt8: - EvalQuantizedLess(context, node, input1, input2, output, - requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowLessWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::LessWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteInt8: - EvalQuantizedLess(context, node, input1, input2, output, - requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowLessWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::LessWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; default: TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", @@ -260,27 +447,74 @@ TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) { } TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); + const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + RuntimeShape input1_shape = GetTensorShape(input1); + RuntimeShape input2_shape = GetTensorShape(input2); + RuntimeShape output_shape = GetTensorShape(output); + bool* output_data = GetTensorData(output); + bool requires_broadcast = !HaveSameShapes(input1, input2); switch (input1->type) { case kTfLiteFloat32: - TF_LITE_COMPARISON(float, LessEqual, requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowLessEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::LessEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteInt32: - TF_LITE_COMPARISON(int32_t, LessEqual, requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowLessEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::LessEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteInt64: - TF_LITE_COMPARISON(int64_t, LessEqual, requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowLessEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::LessEqualNoScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteUInt8: - EvalQuantizedLessEqual(context, node, input1, input2, output, - requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowLessEqualWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::LessEqualWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; case kTfLiteInt8: - EvalQuantizedLessEqual(context, node, input1, input2, output, - requires_broadcast); + requires_broadcast + ? reference_ops::Broadcast4DSlowLessEqualWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data) + : reference_ops::LessEqualWithScaling( + data->params, input1_shape, GetTensorData(input1), + input2_shape, GetTensorData(input2), output_shape, + output_data); break; default: TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", @@ -291,12 +525,58 @@ TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) { } } // namespace + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + void* data = nullptr; + if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == + kTfLiteError) { + return nullptr; + } + return data; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + OpData* data = static_cast(node->user_data); + + const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); + const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); + + if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) { + auto input1_offset = -input1->params.zero_point; + auto input2_offset = -input2->params.zero_point; + const int kLeftShift = 8; + + int32 input1_multiplier; + int input1_shift; + QuantizeMultiplierSmallerThanOneExp( + static_cast(input1->params.scale), &input1_multiplier, + &input1_shift); + int32 input2_multiplier; + int input2_shift; + QuantizeMultiplierSmallerThanOneExp( + static_cast(input2->params.scale), &input2_multiplier, + &input2_shift); + + data->params.left_shift = kLeftShift; + data->params.input1_offset = input1_offset; + data->params.input1_multiplier = input1_multiplier; + data->params.input1_shift = input1_shift; + data->params.input2_offset = input2_offset; + data->params.input2_multiplier = input2_multiplier; + data->params.input2_shift = input2_shift; + } + + return kTfLiteOk; +} + } // namespace comparisons TfLiteRegistration Register_EQUAL() { - return {/*init=*/nullptr, + return {/*init=*/comparisons::Init, /*free=*/nullptr, - /*prepare=*/nullptr, + /*prepare=*/comparisons::Prepare, /*invoke=*/comparisons::EqualEval, /*profiling_string=*/nullptr, /*builtin_code=*/0, @@ -305,9 +585,9 @@ TfLiteRegistration Register_EQUAL() { } TfLiteRegistration Register_NOT_EQUAL() { - return {/*init=*/nullptr, + return {/*init=*/comparisons::Init, /*free=*/nullptr, - /*prepare=*/nullptr, + /*prepare=*/comparisons::Prepare, /*invoke=*/comparisons::NotEqualEval, /*profiling_string=*/nullptr, /*builtin_code=*/0, @@ -316,9 +596,9 @@ TfLiteRegistration Register_NOT_EQUAL() { } TfLiteRegistration Register_GREATER() { - return {/*init=*/nullptr, + return {/*init=*/comparisons::Init, /*free=*/nullptr, - /*prepare=*/nullptr, + /*prepare=*/comparisons::Prepare, /*invoke=*/comparisons::GreaterEval, /*profiling_string=*/nullptr, /*builtin_code=*/0, @@ -327,9 +607,9 @@ TfLiteRegistration Register_GREATER() { } TfLiteRegistration Register_GREATER_EQUAL() { - return {/*init=*/nullptr, + return {/*init=*/comparisons::Init, /*free=*/nullptr, - /*prepare=*/nullptr, + /*prepare=*/comparisons::Prepare, /*invoke=*/comparisons::GreaterEqualEval, /*profiling_string=*/nullptr, /*builtin_code=*/0, @@ -338,9 +618,9 @@ TfLiteRegistration Register_GREATER_EQUAL() { } TfLiteRegistration Register_LESS() { - return {/*init=*/nullptr, + return {/*init=*/comparisons::Init, /*free=*/nullptr, - /*prepare=*/nullptr, + /*prepare=*/comparisons::Prepare, /*invoke=*/comparisons::LessEval, /*profiling_string=*/nullptr, /*builtin_code=*/0, @@ -349,9 +629,9 @@ TfLiteRegistration Register_LESS() { } TfLiteRegistration Register_LESS_EQUAL() { - return {/*init=*/nullptr, + return {/*init=*/comparisons::Init, /*free=*/nullptr, - /*prepare=*/nullptr, + /*prepare=*/comparisons::Prepare, /*invoke=*/comparisons::LessEqualEval, /*profiling_string=*/nullptr, /*builtin_code=*/0, diff --git a/tensorflow/lite/micro/kernels/comparisons_test.cc b/tensorflow/lite/micro/kernels/comparisons_test.cc index b19c2aa8f01..64c39c5d2c7 100644 --- a/tensorflow/lite/micro/kernels/comparisons_test.cc +++ b/tensorflow/lite/micro/kernels/comparisons_test.cc @@ -45,10 +45,15 @@ void TestComparison(tflite::BuiltinOperator op, TfLiteTensor* tensors, const int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + void* user_data = nullptr; + if (registration->init) { + user_data = registration->init(&context, /*buffer=*/nullptr, /*length=*/0); + } + TfLiteNode node; node.inputs = inputs_array; node.outputs = outputs_array; - node.user_data = nullptr; + node.user_data = user_data; node.builtin_data = nullptr; node.custom_initial_data = nullptr; node.custom_initial_data_size = 0; From aef53d7fe63191dc3adb3efd417c2054e3addc3e Mon Sep 17 00:00:00 2001 From: amturati <36869454+amturati@users.noreply.github.com> Date: Mon, 13 Jul 2020 11:44:24 -0600 Subject: [PATCH 0276/2522] deleted extraneous comment --- tensorflow/c/eager/c_api_unified_experimental_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/c/eager/c_api_unified_experimental_test.cc b/tensorflow/c/eager/c_api_unified_experimental_test.cc index 07f4e57c705..265d99c6062 100644 --- a/tensorflow/c/eager/c_api_unified_experimental_test.cc +++ b/tensorflow/c/eager/c_api_unified_experimental_test.cc @@ -117,7 +117,7 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul) { float vals [] = {0.0f,0.0f,0.0f,0.0f}; TFE_Context* eager_ctx = TF_ExecutionContextGetTFEContext(ctx,status.get()); - TFE_TensorHandle* t = TestMatrixTensorHandleWithInput(eager_ctx, vals, dims,num_dims); //, dims[0],dims[1]); + TFE_TensorHandle* t = TestMatrixTensorHandleWithInput(eager_ctx, vals, dims,num_dims); TF_AbstractTensor* at = TF_CreateAbstractTensorFromEagerTensor(t, status.get()); // get abstract tensor From 4904a46f623fce0b2e675a40fc4adfc98a3ad009 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Mon, 13 Jul 2020 17:47:03 +0000 Subject: [PATCH 0277/2522] cleanup --- tensorflow/core/kernels/tensor_map.cc | 4 +- tensorflow/core/kernels/tensor_map.h | 5 +- tensorflow/core/ops/map_ops.cc | 52 +------------------ .../python/kernel_tests/list_ops_test.py | 12 ++--- .../python/kernel_tests/map_ops_test.py | 2 +- tensorflow/python/ops/map_ops.py | 2 - 6 files changed, 12 insertions(+), 65 deletions(-) diff --git a/tensorflow/core/kernels/tensor_map.cc b/tensorflow/core/kernels/tensor_map.cc index abeaf92390e..cfba3892650 100644 --- a/tensorflow/core/kernels/tensor_map.cc +++ b/tensorflow/core/kernels/tensor_map.cc @@ -90,9 +90,9 @@ bool TensorMap::Decode(const VariantTensorData& data) { while (tensors_it != data.tensors().end()) { // should assert that tensors_it + 1 is also not the end - /*if (*std::next(tensors_it) == data.tensors().end()) { + if (std::next(tensors_it) == data.tensors().end()) { return false; - }*/ + } TensorKey k = TensorKey(*tensors_it); // copy inefficient? tensors().emplace(k,*++tensors_it); tensors_it++; diff --git a/tensorflow/core/kernels/tensor_map.h b/tensorflow/core/kernels/tensor_map.h index a5d44550c98..633c7db8668 100644 --- a/tensorflow/core/kernels/tensor_map.h +++ b/tensorflow/core/kernels/tensor_map.h @@ -58,7 +58,7 @@ namespace tensorflow { // bool can_alias = false; // auto fw = c->forward_input(..., DT_VARIANT, {}, ...); // if (fw && fw->dtype() == DT_VARIANT && fw->NumElements() == 1) { -// auto* tl = fw->scalar()().get(); +// auto* tl = fw->scalar()().get(); // if (tl && tl->RefCountIsOne()) { // can_alias = true; // } @@ -132,7 +132,7 @@ class TensorMap { PartialTensorShape& shape() { return element_shape; } DataType dtype() { return element_dtype; } - // Get a new TensorList containing a copy of the underlying tensor container. + // Get a new TensorMap containing a copy of the underlying tensor container. TensorMap Copy() const { TensorMap out; out.element_shape = element_shape; @@ -185,7 +185,6 @@ class TensorMap { private: class Tensors : public core::RefCounted { public: - //std::unordered_map values_; absl::flat_hash_map values_; }; Tensors* tensors_; diff --git a/tensorflow/core/ops/map_ops.cc b/tensorflow/core/ops/map_ops.cc index 8949e3f1923..445180c34ef 100644 --- a/tensorflow/core/ops/map_ops.cc +++ b/tensorflow/core/ops/map_ops.cc @@ -23,8 +23,7 @@ namespace { bool IsValidTensorMapHandleData( const std::vector* handle_data) { std::cout << "is valid tensor map handle data " << handle_data->size() << std::endl; - return true; - //return handle_data != nullptr && handle_data->size() == 1; + return handle_data != nullptr && handle_data->size() == 1; } REGISTER_OP("EmptyTensorMap") @@ -69,29 +68,6 @@ REGISTER_OP("TensorMapErase") .SetShapeFn([](shape_inference::InferenceContext* c) { DataType element_dtype; TF_RETURN_IF_ERROR(c->GetAttr("element_dtype", &element_dtype)); - /*shape_inference::ShapeHandle tensor_shape = c->UnknownShape(); - auto* handle_data = c->input_handle_shapes_and_types(0); - if (handle_data != nullptr && handle_data->size() > 1) { - return errors::InvalidArgument( - "Trying to read from list with invalid variant data."); - } - if (IsValidTensorListHandleData(handle_data)) { - const shape_inference::ShapeAndType& list_shape_type = - (*handle_data)[0]; - if (list_shape_type.dtype != element_dtype) { - return errors::InvalidArgument( - "Trying to read from list with wrong element dtype. List has " - "type ", - DataTypeString(list_shape_type.dtype), - " but trying to push element with type ", - DataTypeString(element_dtype)); - } - shape_inference::ShapeHandle ignored; - TF_RETURN_IF_ERROR( - c->Merge(tensor_shape, list_shape_type.shape, &ignored)); - c->set_output_handle_shapes_and_types(0, *handle_data); - tensor_shape = list_shape_type.shape; - }*/ c->set_output(1, c->Scalar()); // removed element c->set_output(0, c->Scalar()); // map return Status::OK(); @@ -105,32 +81,6 @@ REGISTER_OP("TensorMapReplace") .Attr("element_dtype: type") .SetShapeFn([](shape_inference::InferenceContext* c) { c->set_output(0, c->Scalar()); - /*DataType element_dtype; - TF_RETURN_IF_ERROR(c->GetAttr("element_dtype", &element_dtype)); - shape_inference::ShapeHandle element_shape = c->UnknownShape();*/ - - /*auto* handle_data = c->input_handle_shapes_and_types(0); - if (handle_data != nullptr && handle_data->size() > 1) { - return errors::InvalidArgument( - "Trying to push to list with wrong variant data."); - } - if (IsValidTensorMapHandleData(handle_data)) { - const shape_inference::ShapeAndType& map_shape_type = (*handle_data)[0]; - if (list_shape_type.dtype != element_dtype) { - return errors::InvalidArgument( - "Trying to push to list with wrong element dtype. List has type ", - DataTypeString(list_shape_type.dtype), - " but trying to push element with type ", - DataTypeString(element_dtype)); - } - shape_inference::ShapeHandle ignored; - TF_RETURN_IF_ERROR( - c->Merge(element_shape, map_shape_type.shape, &ignored)); - element_shape = map_shape_type.shape; - } - c->set_output_handle_shapes_and_types( - 0, std::vector{ - {element_shape, element_dtype}});*/ return Status::OK(); }); diff --git a/tensorflow/python/kernel_tests/list_ops_test.py b/tensorflow/python/kernel_tests/list_ops_test.py index 7ffc4d3889d..53ebdd3ab88 100644 --- a/tensorflow/python/kernel_tests/list_ops_test.py +++ b/tensorflow/python/kernel_tests/list_ops_test.py @@ -48,7 +48,7 @@ from tensorflow.python.platform import test @test_util.run_all_in_graph_and_eager_modes class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): - + def _testPushPop(self, max_num_elements): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, @@ -130,7 +130,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): _, e = gen_list_ops.tensor_list_pop_back( l, element_dtype=dtypes.float32, element_shape=[1, 3]) self.evaluate(e) - + def testPushGetGrad(self): with backprop.GradientTape() as tape: l = list_ops.empty_tensor_list( @@ -150,7 +150,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): dt0, dt1 = tape.gradient(t1, [c0, c1]) self.assertAllEqual(self.evaluate(dt1), [1.0, 1.0]) self.assertEqual(self.evaluate(dt0), 0.0) - + def _testStack(self, max_num_elements): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, @@ -888,7 +888,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): l_worker = array_ops.identity(l_ps) l_worker = list_ops.tensor_list_push_back(l_worker, 3.0) self.evaluate(l_worker) - + def testPushPopGradients(self): with backprop.GradientTape() as tape: l = list_ops.empty_tensor_list( @@ -925,7 +925,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): grad_c, grad_c2 = tape.gradient(y, [c, c2]) self.assertAllEqual(self.evaluate(grad_c), [0.0, 4.0]) self.assertAllEqual(self.evaluate(grad_c2), 6.0) - + @test_util.run_deprecated_v1 def testSetOutOfBounds(self): c = constant_op.constant([1.0, 2.0]) @@ -1664,7 +1664,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): upper, constant_op.constant([0, 1, 2]), dtype=dtypes.string) self.assertAllEqual(f(), [b"A", b"B", b"C"]) - + def testPopBackGrad(self): # https://github.com/tensorflow/tensorflow/issues/37230 diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index dc4e5b97fc3..46f8f21d104 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -93,7 +93,7 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): m = map_ops.tensor_map_insert(m, k, v) l = map_ops.tensor_map_lookup(m, k) l *= 5 - g = tape.gradient(l,v) + g = tape.gradient(l, v) self.assertAllClose(g, 5.0) diff --git a/tensorflow/python/ops/map_ops.py b/tensorflow/python/ops/map_ops.py index 29f0751d91f..5d8d2b88f2f 100644 --- a/tensorflow/python/ops/map_ops.py +++ b/tensorflow/python/ops/map_ops.py @@ -54,9 +54,7 @@ def tensor_map_replace(input_handle, key, value): @ops.RegisterGradient("TensorMapLookup") def LookupGrad(op, dval): - # map grad should be a map that is 0 everywhere except 1 @key k m, k = op.inputs - #m = gen_map_ops.tensor_map_zeros(m) map_grad = empty_tensor_map() map_grad = tensor_map_insert(map_grad, k, dval) key = op.inputs[1] From 7fb762efdb51a2bdc4cc1107b15dcf372e77ed86 Mon Sep 17 00:00:00 2001 From: Yujing Zhang Date: Mon, 13 Jul 2020 10:33:02 -0700 Subject: [PATCH 0278/2522] Support packed inputs for functions which need to compile with XLA. PiperOrigin-RevId: 320983993 Change-Id: I52de47f726bca0fe81e7fadde99f04dfb8db6245 --- tensorflow/core/common_runtime/eager/execute.cc | 16 ++++++++++++++++ .../python/distribute/tpu_strategy_test.py | 12 ++++++++++++ 2 files changed, 28 insertions(+) diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc index 507cb1390cd..71d781e5d3d 100644 --- a/tensorflow/core/common_runtime/eager/execute.cc +++ b/tensorflow/core/common_runtime/eager/execute.cc @@ -193,6 +193,22 @@ Status ValidateInputTypeAndPlacement( for (int i = 0; i < n_inputs; ++i) { TensorHandle* handle = handles[i]; Device* expected_device = kernel->InputDevice(i); + if (!kernel->IsFunction() && handle->Type() == TensorHandle::PACKED) { + // Extract a handle on the op device from a packed input. + // This happens when a function is marked for XLA compilation. + // MaybePackInputTensor guarantees that a primitive op has no packed + // input at this point. + for (int j = 0; j < handle->NumPackedHandles(); ++j) { + TensorHandle* h = nullptr; + TF_RETURN_IF_ERROR(handle->ExtractPackedHandle(j, &h)); + if ((h->op_device() != nullptr) && + (h->op_device()->name() == op->DeviceName())) { + op->UpdateInput(i, h); + handle = h; + break; + } + } + } auto handle_device_variant = handle->DeviceOrHostCPU(*ctx); if (VariantDeviceIsCustom(handle_device_variant)) { return errors::Unimplemented( diff --git a/tensorflow/python/distribute/tpu_strategy_test.py b/tensorflow/python/distribute/tpu_strategy_test.py index 850981e073e..86b375aedf3 100644 --- a/tensorflow/python/distribute/tpu_strategy_test.py +++ b/tensorflow/python/distribute/tpu_strategy_test.py @@ -136,6 +136,18 @@ class TPUTest(test.TestCase): @parameterized.named_parameters([("PackedVar", True), ("", False)]) class TPUStrategyTest(test.TestCase, parameterized.TestCase): + def test_function_compile_with_xla(self, enable_packed_var): + strategy = get_tpu_strategy(enable_packed_var) + with strategy.scope(): + v = variables.Variable(1.0) + + @def_function.function + def func(): + return v.read_value() + 1.0 + + with ops.device("/device:TPU:0"): + self.assertAllEqual(func(), 2.0) + def test_sequential_experimental_runs(self, enable_packed_var): resolver = get_tpu_cluster_resolver() remote.connect_to_cluster(resolver) From 8f3cbfa0ad628624b3d2caf0203a03b2014d9b36 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Mon, 13 Jul 2020 17:51:41 +0000 Subject: [PATCH 0279/2522] restore list_ops_test --- .../python/kernel_tests/list_ops_test.py | 132 +++++++++--------- 1 file changed, 66 insertions(+), 66 deletions(-) diff --git a/tensorflow/python/kernel_tests/list_ops_test.py b/tensorflow/python/kernel_tests/list_ops_test.py index 53ebdd3ab88..ce20cf489e6 100644 --- a/tensorflow/python/kernel_tests/list_ops_test.py +++ b/tensorflow/python/kernel_tests/list_ops_test.py @@ -78,8 +78,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, element_shape=[], max_num_elements=1) l = list_ops.tensor_list_push_back(l, constant_op.constant(1.0)) - with self.assertRaisesRegexp(errors.InvalidArgumentError, - "Tried to push item into a full list"): + with self.assertRaisesRegex(errors.InvalidArgumentError, + "Tried to push item into a full list"): l = list_ops.tensor_list_push_back(l, 2.) self.evaluate(l) @@ -91,8 +91,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): element_dtype=dtypes.float32, element_shape=[], max_num_elements=max_num_elements) - with self.assertRaisesRegexp(errors.InvalidArgumentError, - "Trying to pop from an empty list"): + with self.assertRaisesRegex(errors.InvalidArgumentError, + "Trying to pop from an empty list"): l = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32) self.evaluate(l) @@ -115,7 +115,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testPopUninitializedTensorWithInvalidElementShapeFails(self): l = list_ops.tensor_list_reserve( element_dtype=dtypes.float32, element_shape=None, num_elements=3) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, "Trying to read an uninitialized tensor but " "element_shape is not fully defined"): @@ -124,7 +124,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): l = list_ops.tensor_list_reserve( element_dtype=dtypes.float32, element_shape=[None, 2], num_elements=3) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, r"Incompatible shapes during merge: \[1,3\] vs. \[\?,2\]"): _, e = gen_list_ops.tensor_list_pop_back( @@ -191,8 +191,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): # Should raise an error when the element tensors do not all have the same # shape. - with self.assertRaisesRegexp(errors.InvalidArgumentError, - "Incompatible ranks during merge: 0 vs. 1"): + with self.assertRaisesRegex(errors.InvalidArgumentError, + "Incompatible ranks during merge: 0 vs. 1"): l = list_ops.tensor_list_push_back(l, constant_op.constant([3.0, 4.0])) t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32) self.evaluate(t) @@ -213,7 +213,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): # Should raise an error when the element tensors do not all have the same # shape. - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, r"Incompatible shapes during merge: \[1\] vs. \[2\]"): l = list_ops.tensor_list_push_back(l, constant_op.constant([2.0, 3.0])) @@ -234,8 +234,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): # Should not be able to stack empty lists with partially defined # element_shape. - with self.assertRaisesRegexp(errors.InvalidArgumentError, - "non-fully-defined"): + with self.assertRaisesRegex(errors.InvalidArgumentError, + "non-fully-defined"): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, element_shape=[None, 2], @@ -244,8 +244,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): self.evaluate(t) # Should not be able to stack empty lists with undefined element_shape. - with self.assertRaisesRegexp(errors.InvalidArgumentError, - "non-fully-defined"): + with self.assertRaisesRegex(errors.InvalidArgumentError, + "non-fully-defined"): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, element_shape=None, @@ -285,10 +285,10 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testStackReservedListWithNoElementsAndPartialElementShapeFails(self): l = list_ops.tensor_list_reserve( element_dtype=dtypes.float32, element_shape=None, num_elements=3) - with self.assertRaisesRegexp(errors.InvalidArgumentError, - "Tried to stack list which only contains " - "uninitialized tensors and has a " - "non-fully-defined element_shape: "): + with self.assertRaisesRegex( + errors.InvalidArgumentError, "Tried to stack list which only contains " + "uninitialized tensors and has a " + "non-fully-defined element_shape: "): t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32) self.evaluate(t) @@ -341,8 +341,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): # Should raise an error when the requested tensors do not all have the same # shape. - with self.assertRaisesRegexp(errors.InvalidArgumentError, - "Incompatible ranks during merge: 0 vs. 1"): + with self.assertRaisesRegex(errors.InvalidArgumentError, + "Incompatible ranks during merge: 0 vs. 1"): t = list_ops.tensor_list_gather(l, [0, 2], element_dtype=dtypes.float32) self.evaluate(t) @@ -366,7 +366,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): # Should raise an error when the requested tensors do not all have the same # shape. - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, r"Incompatible shapes during merge: \[1\] vs. \[2\]"): t = list_ops.tensor_list_gather(l, [0, 2], element_dtype=dtypes.float32) @@ -387,8 +387,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): # Should not be able to gather from empty lists with partially defined # element_shape. - with self.assertRaisesRegexp(errors.InvalidArgumentError, - "non-fully-defined"): + with self.assertRaisesRegex(errors.InvalidArgumentError, + "non-fully-defined"): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, element_shape=[None, 2], @@ -398,8 +398,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): # Should not be able to gather from empty lists with undefined # element_shape. - with self.assertRaisesRegexp(errors.InvalidArgumentError, - "non-fully-defined"): + with self.assertRaisesRegex(errors.InvalidArgumentError, + "non-fully-defined"): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, element_shape=None, @@ -455,7 +455,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testGatherReservedListWithNoElementsAndPartialElementShapeFails(self): l = list_ops.tensor_list_reserve( element_dtype=dtypes.float32, element_shape=None, num_elements=3) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, "Tried to gather uninitialized tensors from a" " list with non-fully-defined element_shape"): @@ -485,7 +485,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testScatterFailsWhenIndexLargerThanNumElements(self): c0 = constant_op.constant([1.0, 2.0]) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, "TensorListScatter: Trying to scatter at index 3 in list with size 3"): l = gen_list_ops.tensor_list_scatter_v2( @@ -494,7 +494,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testScatterFailsWithInvalidNumElements(self): c0 = constant_op.constant([1.0, 2.0]) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, "TensorListScatter expects num_elements >= -1, found: -2"): l = gen_list_ops.tensor_list_scatter_v2( @@ -503,7 +503,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testScatterWithInvalidRowsInInputTensorFails(self): c0 = constant_op.constant([1.0, 2.0]) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, "Invalid number of rows in input tensor. Expected: 3 Actual: 2"): l = list_ops.tensor_list_scatter(c0, [1, 0, 2], []) @@ -511,7 +511,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testScatterWithNegativeIndicesFails(self): c0 = constant_op.constant([1.0, 2.0]) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, "Indices in TensorListScatter must all be non-negative."): l = list_ops.tensor_list_scatter(c0, [-1, -2], element_shape=[]) @@ -658,7 +658,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testGetUninitializedTensorWithInvalidElementShapeFails(self): l = list_ops.tensor_list_reserve( element_dtype=dtypes.float32, element_shape=None, num_elements=3) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, "Trying to read an uninitialized tensor but " "element_shape is not fully defined"): @@ -676,7 +676,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): error_type = errors.InvalidArgumentError else: error_type = ValueError - with self.assertRaisesRegexp(error_type, r"shapes"): + with self.assertRaisesRegex(error_type, r"shapes"): e0 = gen_list_ops.tensor_list_get_item( l, 0, element_dtype=dtypes.float32, element_shape=[1, 3]) self.evaluate(e0) @@ -699,7 +699,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testSetOnEmptyListWithMaxNumElementsFails(self): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, element_shape=[], max_num_elements=3) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, "Trying to modify element 0 in a list with 0 elements."): l = list_ops.tensor_list_set_item(l, 0, 1.) @@ -882,8 +882,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): with ops.device("/job:ps"): l_ps = array_ops.identity(l) l_ps = list_ops.tensor_list_push_back(l_ps, 2.) - with self.assertRaisesRegexp(errors.InvalidArgumentError, - "Tried to push item into a full list"): + with self.assertRaisesRegex(errors.InvalidArgumentError, + "Tried to push item into a full list"): with ops.device("/job:worker"): l_worker = array_ops.identity(l_ps) l_worker = list_ops.tensor_list_push_back(l_worker, 3.0) @@ -943,8 +943,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): # at graph building time. l = list_ops.tensor_list_set_item(l, 0, ph) l_0 = list_ops.tensor_list_get_item(l, 0, element_dtype=dtypes.float32) - with self.assertRaisesRegexp(errors.InvalidArgumentError, - "incompatible shape"): + with self.assertRaisesRegex(errors.InvalidArgumentError, + "incompatible shape"): sess.run(l_0, {ph: [3.0]}) def testResourceVariableScatterGather(self): @@ -1021,7 +1021,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): "element shapes are not identical at index 0") else: expected_error = (ValueError, "Shapes must be equal rank") - with self.assertRaisesRegexp(*expected_error): + with self.assertRaisesRegex(*expected_error): l_batch_of_vec_tls = array_ops.stack( [list_ops.tensor_list_from_tensor([[1.0]], element_shape=[1])] * 2) self.evaluate( @@ -1033,7 +1033,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): r"input_b\[0\].dtype != element_dtype.") else: expected_error = (ValueError, "input_b.type != element_dtype") - with self.assertRaisesRegexp(*expected_error): + with self.assertRaisesRegex(*expected_error): l_batch_of_int_tls = array_ops.stack( [list_ops.tensor_list_from_tensor([1], element_shape=[])] * 2) self.evaluate( @@ -1073,8 +1073,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): with self.assertRaises((errors.InvalidArgumentError, ValueError)): self.evaluate(list_ops.tensor_list_push_back_batch(l_batch, [])) - with self.assertRaisesRegexp(errors.InvalidArgumentError, - "incompatible shape to a list at index 0"): + with self.assertRaisesRegex(errors.InvalidArgumentError, + "incompatible shape to a list at index 0"): self.evaluate( list_ops.tensor_list_push_back_batch(l_batch, [[3.0], [4.0]])) @@ -1082,7 +1082,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): expected_error = (errors.InvalidArgumentError, "Invalid data type") else: expected_error = (ValueError, "wrong element dtype") - with self.assertRaisesRegexp(*expected_error): + with self.assertRaisesRegex(*expected_error): self.evaluate(list_ops.tensor_list_push_back_batch(l_batch, [3, 4])) def testZerosLike(self): @@ -1246,7 +1246,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): element_shape=[], element_dtype=dtypes.float32, num_elements=2) l2 = list_ops.tensor_list_reserve( element_shape=[], element_dtype=dtypes.float32, num_elements=3) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, "Trying to add two lists of tensors with different lengths"): l = math_ops.add_n([l1, l2]) @@ -1268,7 +1268,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): element_dtype=dtypes.float32, num_elements=3) l = math_ops.add_n([l1, l2]) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, "Trying to add two lists of tensors with incompatible element shapes" ): @@ -1314,7 +1314,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): element_dtype=dtypes.float32, element_shape=None) l = list_ops.tensor_list_push_back(l, [[0., 1.]]) l = list_ops.tensor_list_push_back(l, [[2.], [4.]]) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, r"Incompatible shapes during merge: " r"\[2\] vs. \[1\]"): t = list_ops.tensor_list_concat(l, element_dtype=dtypes.float32) @@ -1333,7 +1333,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testConcatEmptyListWithUnknownElementShapeFails(self): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, element_shape=None) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, "All except the first dimension must be fully" " defined when concating an empty tensor list"): @@ -1343,7 +1343,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testConcatEmptyListWithPartiallyDefinedElementShapeFails(self): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, element_shape=[2, None]) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, "All except the first dimension must be fully" " defined when concating an empty tensor list"): @@ -1354,7 +1354,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, element_shape=tensor_shape.TensorShape([])) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, "Concat requires elements to be at least vectors, " "found scalars instead"): @@ -1365,14 +1365,14 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, element_shape=None) l1 = list_ops.tensor_list_push_back(l, 1.) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, "Concat saw a scalar shape at index 0" " but requires at least vectors"): t = list_ops.tensor_list_concat(l1, element_dtype=dtypes.float32) self.evaluate(t) l1 = list_ops.tensor_list_push_back(l, [1.]) l1 = list_ops.tensor_list_push_back(l1, 2.) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, "Concat saw a scalar shape at index 1" " but requires at least vectors"): t = list_ops.tensor_list_concat(l1, element_dtype=dtypes.float32) @@ -1420,7 +1420,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testConcatWithUninitializedTensorsFailsIfNoElementShape(self): l = list_ops.tensor_list_reserve( element_dtype=dtypes.float32, element_shape=None, num_elements=3) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, r"Trying to concat list with only uninitialized tensors " r"but element_shape_except_first_dim_ is not fully defined"): @@ -1430,7 +1430,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testConcatWithUninitializedTensorsFailsIfNoInputLengths(self): l = list_ops.tensor_list_reserve( element_dtype=dtypes.float32, element_shape=[None, 3], num_elements=3) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, r"List contains uninitialized tensor at index 0" r" but leading_dims has only 0 elements."): @@ -1467,7 +1467,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): with self.cached_session(): tensor = array_ops.placeholder(dtype=dtypes.float32) l = list_ops.tensor_list_split(tensor, element_shape=None, lengths=[1]) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, r"Tensor must be at least a vector, but saw shape: \[\]"): l.eval({tensor: 1}) @@ -1479,24 +1479,24 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): l = list_ops.tensor_list_split([1., 2.], element_shape=None, lengths=lengths) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, r"Expected lengths to be a vector, received shape: \[\]"): l.eval({lengths: 1}) def testSplitWithInvalidLengthsFails(self): - with self.assertRaisesRegexp(errors.InvalidArgumentError, - r"Invalid value in lengths: -1"): + with self.assertRaisesRegex(errors.InvalidArgumentError, + r"Invalid value in lengths: -1"): l = list_ops.tensor_list_split([1., 2.], element_shape=None, lengths=[1, -1]) self.evaluate(l) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, r"Attempting to slice \[0, 3\] from tensor with length 2"): l = list_ops.tensor_list_split([1., 2.], element_shape=None, lengths=[3]) self.evaluate(l) - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, r"Unused values in tensor. Length of tensor: 2 Values used: 1"): l = list_ops.tensor_list_split([1., 2.], element_shape=None, lengths=[1]) @@ -1504,11 +1504,11 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): @test_util.run_deprecated_v1 def testSkipEagerSplitWithScalarElementShapeFails(self): - with self.assertRaisesRegexp(ValueError, - r"Shapes must be equal rank, but are 1 and 0"): + with self.assertRaisesRegex(ValueError, + r"Shapes must be equal rank, but are 1 and 0"): l = list_ops.tensor_list_split([1., 2.], element_shape=[], lengths=[1, 1]) with self.cached_session(): - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, r"TensorListSplit requires element_shape to be at least of rank 1, " r"but saw: \[\]"): @@ -1520,7 +1520,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testEagerOnlySplitWithScalarElementShapeFails(self): if context.executing_eagerly(): - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, r"TensorListSplit requires element_shape to be at least of rank 1, " r"but saw: \[\]"): @@ -1528,14 +1528,14 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): @test_util.run_deprecated_v1 def testSkipEagerSplitWithIncompatibleTensorShapeAndElementShapeFails(self): - with self.assertRaisesRegexp(ValueError, - r"Shapes must be equal rank, but are 2 and 1"): + with self.assertRaisesRegex(ValueError, + r"Shapes must be equal rank, but are 2 and 1"): l = list_ops.tensor_list_split([[1.], [2.]], element_shape=[1], lengths=[1, 1]) with self.cached_session(): - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, r"tensor shape \[2,1\] is not compatible with element_shape \[1\]"): element_shape = array_ops.placeholder(dtype=dtypes.int32) @@ -1546,7 +1546,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testEagerOnlySplitWithIncompatibleTensorShapeAndElementShapeFails(self): if context.executing_eagerly(): - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, r"tensor shape \[2,1\] is not compatible with element_shape \[1\]"): list_ops.tensor_list_split([[1.], [2.]], @@ -1576,7 +1576,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): [1., 2.]) def testResizeWithInvalidSizeFails(self): - with self.assertRaisesRegexp( + with self.assertRaisesRegex( errors.InvalidArgumentError, "TensorListSlice expects size to be non-negative"): l = list_ops.tensor_list_from_tensor([1., 2., 3.], element_shape=[]) From 4e7eb7f5ab6fb39b0804539f960c8bad12d7d106 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Mon, 13 Jul 2020 10:35:44 -0700 Subject: [PATCH 0280/2522] Migrate the Linux nightly release scripts to use the new bazelrc configs. PiperOrigin-RevId: 320984610 Change-Id: I8ee4d32bfc8394c08e3afb299e93e00f4c54904d --- .bazelrc | 28 ++++++++++++++++++- .../cpu_py35_full/nightly_release.sh | 9 +----- .../cpu_py36_full/nightly_release.sh | 9 +----- .../cpu_py37_full/nightly_release.sh | 9 +----- .../cpu_py38_full/nightly_release.sh | 9 +----- .../gpu_py35_full/nightly_release.sh | 14 +--------- .../gpu_py36_full/nightly_release.sh | 14 +--------- .../gpu_py37_full/nightly_release.sh | 14 +--------- .../gpu_py38_full/nightly_release.sh | 14 +--------- 9 files changed, 35 insertions(+), 85 deletions(-) diff --git a/.bazelrc b/.bazelrc index 1dd928acdb4..f23fdb7b7e8 100644 --- a/.bazelrc +++ b/.bazelrc @@ -78,7 +78,12 @@ # elinux: General Embedded Linux options shared by all flavors. # elinux_aarch64: Embedded Linux options for aarch64 (ARM64) CPU support. # elinux_armhf: Embedded Linux options for armhf (ARMv7) CPU support. - +# +# Release build options (for all operating systems) +# release_common: Common options for all builds on all operating systems. +# release_gpu_common: Common options for GPU builds on Linux and Windows. +# release_cpu_linux: Toolchain and CUDA options for Linux CPU builds. +# release_gpu_linux: Toolchain and CUDA options for Linux PU builds. # Allow builds using libc++ as a linker library # This is mostly for OSSFuzz, so we also pass in the flags from environment to clean build file @@ -534,3 +539,24 @@ try-import %workspace%/.tf_configure.bazelrc # Put user-specific options in .bazelrc.user try-import %workspace%/.bazelrc.user + +# Here are bazelrc configs for release builds +build:release_common --config=opt +build:release_common --config=v2 + +build:release_cpu_linux --config=release_common +build:release_cpu_linux --action_env=TF_NEED_CUDA=0 +build:release_cpu_linux --action_env=CC_OPT_FLAGS="-mavx" +# We use the same toolchain for CPU/GPU packages. +# Did not add this to the defaults in case this changes. +build:release_cpu_linux --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain + +build:release_gpu_common --config=release_common +build:release_gpu_common --action_env=TF_NEED_CUDA=1 +build:release_gpu_common --action_env=TF_CUDA_VERSION=10.1 +build:release_gpu_common --action_env=TF_CUDNN_VERSION=7 +build:release_gpu_common --action_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_35,sm_37,sm_52,sm_60,sm_61,compute_70" +build:release_gpu_common --action_env=TENSORRT_INSTALL_PATH="/usr/local/tensorrt" + +build:release_gpu_linux --config=release_gpu_common +build:release_gpu_linux --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh index ba1861b221e..690bfe219aa 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh @@ -27,18 +27,11 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.5) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 \ - --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ - tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_linux tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nightly_release.sh index 2b770867099..bd686959209 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nightly_release.sh @@ -27,18 +27,11 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.6) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 \ - --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ - tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_linux tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nightly_release.sh index 25e59a5b096..62c0439e4b0 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nightly_release.sh @@ -27,18 +27,11 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.7) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 \ - --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ - tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_linux tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/nightly_release.sh index e82064f7221..86add0707ba 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/nightly_release.sh @@ -27,18 +27,11 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.8) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 \ - --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ - tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_linux tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh index 2ed5c014c65..addfc59818e 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh @@ -25,23 +25,11 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=1 -export TF_CUDA_VERSION=10 -export TF_CUDNN_VERSION=7 -export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 -export TF_NEED_TENSORRT=1 -export TENSORRT_INSTALL_PATH=/usr/local/tensorrt -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.5) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 \ - --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ - tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_gpu_linux tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --nightly_flag ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --gpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh index 87b2e52d88a..c6fb6d469b1 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh @@ -25,23 +25,11 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=1 -export TF_CUDA_VERSION=10 -export TF_CUDNN_VERSION=7 -export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 -export TF_NEED_TENSORRT=1 -export TENSORRT_INSTALL_PATH=/usr/local/tensorrt -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.6) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 \ - --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ - tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_gpu_linux tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --nightly_flag ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --gpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh index 0436ec32643..6e900d7dba8 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh @@ -25,23 +25,11 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=1 -export TF_CUDA_VERSION=10 -export TF_CUDNN_VERSION=7 -export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 -export TF_NEED_TENSORRT=1 -export TENSORRT_INSTALL_PATH=/usr/local/tensorrt -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.7) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 \ - --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ - tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_gpu_linux tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --nightly_flag ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --gpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh index 783785db7f7..9b968c4c3d6 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh @@ -27,23 +27,11 @@ update_bazel_linux python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=1 -export TF_CUDA_VERSION=10 -export TF_CUDNN_VERSION=7 -export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 -export TF_NEED_TENSORRT=1 -export TENSORRT_INSTALL_PATH=/usr/local/tensorrt -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.8) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 \ - --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ - tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_gpu_linux tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --nightly_flag ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --gpu --nightly_flag From 3358c504493aedf3f7a3ef99839b45180770a3d2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Jul 2020 10:44:54 -0700 Subject: [PATCH 0281/2522] Fix build error caused by nonexplicit integer conversion. PiperOrigin-RevId: 320986742 Change-Id: I4f6ff1b8a6fb0a44cffb31f3ff62b3f59e658c8b --- .../swift/Sources/Interpreter.swift | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/tensorflow/lite/experimental/swift/Sources/Interpreter.swift b/tensorflow/lite/experimental/swift/Sources/Interpreter.swift index de7ab3f89ac..d15a0ad9b5d 100644 --- a/tensorflow/lite/experimental/swift/Sources/Interpreter.swift +++ b/tensorflow/lite/experimental/swift/Sources/Interpreter.swift @@ -12,15 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -#if os(Linux) -import SwiftGlibc -#else -import Darwin -#endif - import Foundation import TensorFlowLiteC +#if os(Linux) + import SwiftGlibc +#else + import Darwin +#endif + /// A TensorFlow Lite interpreter that performs inference from a given model. public final class Interpreter { /// The configuration options for the `Interpreter`. @@ -337,19 +337,19 @@ extension String { /// - cFormat: The format C array as a template for substituting values. /// - arguments: A C pointer to a `va_list` of arguments to substitute into `cFormat`. init?(cFormat: UnsafePointer, arguments: CVaListPointer) { -#if os(Linux) - let length = vsnprintf(nil, 0, cFormat, arguments) + 1 // null terminator - guard length > 0 else { return nil } - var buffer = UnsafeMutablePointer.allocate(capacity: length) - defer { - buffer.deallocate() - } - guard vsnprintf(buffer, length, cFormat, arguments) == length - 1 else { return nil } - self.init(validatingUTF8: buffer) -#else - var buffer: UnsafeMutablePointer? - guard vasprintf(&buffer, cFormat, arguments) != 0, let cString = buffer else { return nil } - self.init(validatingUTF8: cString) -#endif + #if os(Linux) + let length = Int(vsnprintf(nil, 0, cFormat, arguments) + 1) // null terminator + guard length > 0 else { return nil } + let buffer = UnsafeMutablePointer.allocate(capacity: length) + defer { + buffer.deallocate() + } + guard vsnprintf(buffer, length, cFormat, arguments) == length - 1 else { return nil } + self.init(validatingUTF8: buffer) + #else + var buffer: UnsafeMutablePointer? + guard vasprintf(&buffer, cFormat, arguments) != 0, let cString = buffer else { return nil } + self.init(validatingUTF8: cString) + #endif } } From db6c219b8a53a2f93edec07aa7933ffc5c942e01 Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Mon, 13 Jul 2020 11:00:39 -0700 Subject: [PATCH 0282/2522] Prevent segmentation faults when datasets produce unexpected numbers of components. PiperOrigin-RevId: 320990394 Change-Id: Ibd117c7e75a08ed2857028061b97c1d6fd7a84c8 --- tensorflow/core/framework/dataset.cc | 1 + .../core/kernels/data/captured_function.cc | 22 +++++++++++++++++-- .../data/experimental/lmdb_dataset_op_test.cc | 3 ++- .../python/data/kernel_tests/dataset_test.py | 13 +++++++++++ 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/framework/dataset.cc b/tensorflow/core/framework/dataset.cc index 12a16298f3c..65b94ad5ae1 100644 --- a/tensorflow/core/framework/dataset.cc +++ b/tensorflow/core/framework/dataset.cc @@ -491,6 +491,7 @@ Status DatasetBaseIterator::GetNext(IteratorContext* ctx, } Status s = GetNextInternal(ctx, out_tensors, end_of_sequence); if (TF_PREDICT_TRUE(s.ok() && !*end_of_sequence)) { + DCHECK_EQ(out_tensors->size(), dataset()->output_dtypes().size()); RecordElement(ctx, out_tensors); } if (model && model->collect_resource_usage() && node_) { diff --git a/tensorflow/core/kernels/data/captured_function.cc b/tensorflow/core/kernels/data/captured_function.cc index aac07eebfa1..dcc04a7a299 100644 --- a/tensorflow/core/kernels/data/captured_function.cc +++ b/tensorflow/core/kernels/data/captured_function.cc @@ -110,6 +110,18 @@ class SimpleStepStatsCollector : public StepStatsCollectorInterface { int64 processing_time_ TF_GUARDED_BY(mu_) = 0; }; +Status GetCapturedInput(const CapturedFunction* const func, int index, + const Tensor** out) { + if (TF_PREDICT_FALSE(index >= func->captured_inputs().size())) { + return errors::OutOfRange( + "Out of range access to captured inputs for function ", + func->func().name(), ". Index: ", index, + ". Num captured inputs: ", func->captured_inputs().size()); + } + *out = &func->captured_inputs()[index]; + return Status::OK(); +} + Status RunShortCircuit(const ShortCircuitInfo& info, const std::vector& args, const CapturedFunction* const func, @@ -121,7 +133,10 @@ Status RunShortCircuit(const ShortCircuitInfo& info, if (info.indices[i] < num_args) { rets->push_back(args[info.indices[i]]); } else { - rets->push_back(func->captured_inputs()[info.indices[i] - num_args]); + const Tensor* captured_input; + TF_RETURN_IF_ERROR( + GetCapturedInput(func, info.indices[i] - num_args, &captured_input)); + rets->push_back(*captured_input); } } return Status::OK(); @@ -141,7 +156,10 @@ Status RunShortCircuit(const ShortCircuitInfo& info, std::vector&& args, rets->push_back(args[info.indices[i]]); } } else { - rets->push_back(func->captured_inputs()[info.indices[i] - num_args]); + const Tensor* captured_input; + TF_RETURN_IF_ERROR( + GetCapturedInput(func, info.indices[i] - num_args, &captured_input)); + rets->push_back(*captured_input); } } return Status::OK(); diff --git a/tensorflow/core/kernels/data/experimental/lmdb_dataset_op_test.cc b/tensorflow/core/kernels/data/experimental/lmdb_dataset_op_test.cc index 80705229d2c..77a836e9f60 100644 --- a/tensorflow/core/kernels/data/experimental/lmdb_dataset_op_test.cc +++ b/tensorflow/core/kernels/data/experimental/lmdb_dataset_op_test.cc @@ -179,16 +179,17 @@ TEST_F(LMDBDatasetOpTest, InvalidPathInMiddle) { bool end_of_sequence = false; std::vector out_tensors; - std::vector next; // First 10 rows should be ok for (int i = 0; i < 10; ++i) { + std::vector next; TF_ASSERT_OK( iterator_->GetNext(iterator_ctx_.get(), &next, &end_of_sequence)); EXPECT_FALSE(end_of_sequence); } // Next read operation should raise an error + std::vector next; Status get_next_status = iterator_->GetNext(iterator_ctx_.get(), &next, &end_of_sequence); EXPECT_EQ(get_next_status.code(), error::INVALID_ARGUMENT); diff --git a/tensorflow/python/data/kernel_tests/dataset_test.py b/tensorflow/python/data/kernel_tests/dataset_test.py index 32184d1905f..1438ae70158 100644 --- a/tensorflow/python/data/kernel_tests/dataset_test.py +++ b/tensorflow/python/data/kernel_tests/dataset_test.py @@ -543,6 +543,19 @@ class DatasetTest(test_base.DatasetTestBase, parameterized.TestCase): 10, output_type=dtypes.int32).map(lambda x: (x, None)) self.assertEqual(self.evaluate(fn(dataset)), 45) + @combinations.generate(test_base.default_test_combinations()) + def testIncorrectPythonStructure(self): + # Tests that an exception is raised (as opposed to a segfault) when the + # Python structure assigned to a dataset is incorrect. + dataset = dataset_ops.Dataset.range(10) + spec = tensor_spec.TensorSpec([], dtypes.int64) + new_structure = (spec, spec) + dataset = dataset_ops._RestructuredDataset(dataset, new_structure) + dataset = dataset.map(lambda x, y: y) + + with self.assertRaisesOpError(""): + self.getDatasetOutput(dataset) + if __name__ == "__main__": test.main() From 3576b737431743d174144057d19a03944ae56cca Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Jul 2020 11:15:48 -0700 Subject: [PATCH 0283/2522] [XLA] Add documentation to explicitly state how the constructor parameters of CopyAllocation correspond to the insertion of CopyStart/CopyDone instructions. There are already similar comments in the private fields, but it is not immediately obvious how those fields correspond to the constructor. PiperOrigin-RevId: 320993994 Change-Id: Ieec4babf3c404983de1f142327d120e7e6bf5d4a --- tensorflow/compiler/xla/service/memory_space_assignment.cc | 2 +- tensorflow/compiler/xla/service/memory_space_assignment.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.cc b/tensorflow/compiler/xla/service/memory_space_assignment.cc index 4b26fba3bab..66100358d97 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment.cc +++ b/tensorflow/compiler/xla/service/memory_space_assignment.cc @@ -774,7 +774,7 @@ void AlternateMemoryBestFitHeap::AppendBufferInfoDebugString( std::vector use_names; use_times.reserve(uses.size()); use_names.reserve(uses.size()); - for (auto use : uses) { + for (const auto& use : uses) { use_times.push_back(use.first); use_names.push_back(use.second); } diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.h b/tensorflow/compiler/xla/service/memory_space_assignment.h index 5c5329033fd..8f2002009b2 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment.h +++ b/tensorflow/compiler/xla/service/memory_space_assignment.h @@ -501,6 +501,9 @@ class MemorySpaceAssignment { }; // This class represents an allocation as a result of an asynchronous copy. + // Note: CopyStart instructions are inserted after `start_time` or later, + // while CopyDone instructions are inserted before + // `copy_done_schedule_before_time` or earlier. class CopyAllocation : public Allocation { public: CopyAllocation(const Allocation& prev_allocation, MemorySpace memory_space, From f65c3d24a2e73026749fc3a8113230eeadfe4426 Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Mon, 13 Jul 2020 14:31:01 -0400 Subject: [PATCH 0284/2522] Update benchmark_test by using . --- tensorflow/python/keras/benchmarks/BUILD | 6 -- .../bidirectional_lstm_benchmark_test.py | 77 ++++++++++++++----- ...assification_transformer_benchmark_test.py | 75 +++++++++++++----- 3 files changed, 113 insertions(+), 45 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/BUILD b/tensorflow/python/keras/benchmarks/BUILD index b3f36ae5041..b41c99d87ff 100644 --- a/tensorflow/python/keras/benchmarks/BUILD +++ b/tensorflow/python/keras/benchmarks/BUILD @@ -88,9 +88,6 @@ py_test( size = "medium", srcs = ["keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py"], python_version = "PY3", - tags = [ - "no_windows", # b/160628318 - ], deps = [ ":benchmark_util", "//tensorflow:tensorflow_py", @@ -102,9 +99,6 @@ py_test( size = "medium", srcs = ["keras_examples_benchmarks/text_classification_transformer_benchmark_test.py"], python_version = "PY3", - tags = [ - "no_windows", # b/160628318 - ], deps = [ ":benchmark_util", "//tensorflow:tensorflow_py", diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py index 500b50f0081..11a1b2b2330 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py @@ -17,17 +17,14 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import six import tensorflow as tf -from tensorflow.python.platform import benchmark from tensorflow.python.keras.benchmarks import benchmark_util -class BidirectionalLSTMBenchmark( - six.with_metaclass(benchmark.ParameterizedBenchmark, tf.test.Benchmark)): - """Benchmarks for Bidirectional LSTM using `ParameterizedBenchmark`.""" +class BidirectionalLSTMBenchmark(tf.test.Benchmark): + """Benchmarks for Bidirectional LSTM using `tf.test.Benchmark`.""" def __init__(self): super(BidirectionalLSTMBenchmark, self).__init__() self.max_feature = 20000 @@ -37,20 +34,6 @@ class BidirectionalLSTMBenchmark( self.imdb_x = tf.keras.preprocessing.sequence.pad_sequences( self.imdb_x, maxlen=self.max_len) - # The parameters of each benchmark is a tuple: - - # (benchmark_name_suffix, batch_size, run_iters). - # benchmark_name_suffix: The suffix of the benchmark test name with - # convention `{bs}_{batch_size}`. - # batch_size: Integer. Number of samples per gradient update. - # run_iters: Integer. Number of iterations to run the - # performance measurement. - - _benchmark_parameters = [ - ('bs_32', 32, 2), ('bs_64', 64, 2), - ('bs_128', 128, 1), ('bs_256', 256, 1), - ('bs_512', 512, 3)] - def _build_model(self): """model from https://keras.io/examples/nlp/bidirectional_lstm_imdb/""" inputs = tf.keras.Input(shape=(None,), dtype='int32') @@ -63,7 +46,7 @@ class BidirectionalLSTMBenchmark( model = tf.keras.Model(inputs, outputs) return model - def benchmark_bidirect_lstm_imdb(self, batch_size, run_iters): + def benchmark_bidirect_lstm_imdb_bs_128(self): """ Required Arguments for measure_performance. x: Input data, it could be Numpy or load from tfds. @@ -74,6 +57,60 @@ class BidirectionalLSTMBenchmark( Other details can see in `measure_performance()` method of benchmark_util. """ + batch_size = 128 + run_iters = 1 + results = benchmark_util.measure_performance( + self._build_model, + x=self.imdb_x, + y=self.imdb_y, + batch_size=batch_size, + run_iters=run_iters, + optimizer='adam', + loss='binary_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, wall_time=results['wall_time'], extras=results) + + def benchmark_bidirect_lstm_imdb_bs_256(self): + """ Required Arguments for measure_performance. + + x: Input data, it could be Numpy or load from tfds. + y: Target data. If `x` is a dataset, generator instance, + `y` should not be specified. + loss: Loss function for model. + optimizer: Optimizer for model. + Other details can see in `measure_performance()` method of + benchmark_util. + """ + batch_size = 256 + run_iters = 2 + results = benchmark_util.measure_performance( + self._build_model, + x=self.imdb_x, + y=self.imdb_y, + batch_size=batch_size, + run_iters=run_iters, + optimizer='adam', + loss='binary_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, wall_time=results['wall_time'], extras=results) + + def benchmark_bidirect_lstm_imdb_bs_512(self): + """ Required Arguments for measure_performance. + + x: Input data, it could be Numpy or load from tfds. + y: Target data. If `x` is a dataset, generator instance, + `y` should not be specified. + loss: Loss function for model. + optimizer: Optimizer for model. + Other details can see in `measure_performance()` method of + benchmark_util. + """ + batch_size = 512 + run_iters = 1 results = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py index 03fac618e99..f070f2ddd29 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py @@ -17,18 +17,15 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import six import tensorflow as tf -from tensorflow.python.platform import benchmark from tensorflow.python.keras.benchmarks import benchmark_util -class TextWithTransformerBenchmark( - six.with_metaclass(benchmark.ParameterizedBenchmark, tf.test.Benchmark)): +class TextWithTransformerBenchmark(tf.test.Benchmark): """Benchmarks for Text classification with Transformer - using `ParameterizedBenchmark`.""" + using `tf.test.Benchmark`.""" def __init__(self): super(TextWithTransformerBenchmark, self).__init__() self.max_feature = 20000 @@ -38,19 +35,6 @@ class TextWithTransformerBenchmark( self.imdb_x = tf.keras.preprocessing.sequence.pad_sequences( self.imdb_x, maxlen=self.max_len) - # The parameters of each benchmark is a tuple: - - # (benchmark_name_suffix, batch_size, run_iters). - # benchmark_name_suffix: The suffix of the benchmark test name with - # convention `{bs}_{batch_size}`. - # batch_size: Integer. Number of samples per gradient update. - # run_iters: Integer. Number of iterations to run the - # performance measurement. - - _benchmark_parameters = [ - ('bs_64', 64, 2), ('bs_128', 128, 1), - ('bs_256', 256, 1), ('bs_512', 512, 3)] - def _build_model(self): """model from https://keras.io/examples/nlp/text_classification_with_transformer/""" embed_dim = 32 @@ -76,7 +60,7 @@ class TextWithTransformerBenchmark( model = tf.keras.Model(inputs=inputs, outputs=outputs) return model - def benchmark_text_classification(self, batch_size, run_iters): + def benchmark_text_classification_bs_64(self): """ Required Arguments for measure_performance. x: Input data, it could be Numpy or load from tfds. @@ -87,6 +71,8 @@ class TextWithTransformerBenchmark( Other details can see in `measure_performance()` method of benchmark_util. """ + batch_size = 64 + run_iters = 2 results = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, @@ -100,6 +86,57 @@ class TextWithTransformerBenchmark( self.report_benchmark( iters=run_iters, wall_time=results['wall_time'], extras=results) + def benchmark_text_classification_bs_128(self): + """ Required Arguments for measure_performance. + + x: Input data, it could be Numpy or load from tfds. + y: Target data. If `x` is a dataset, generator instance, + `y` should not be specified. + loss: Loss function for model. + optimizer: Optimizer for model. + Other details can see in `measure_performance()` method of + benchmark_util. + """ + batch_size = 128 + run_iters = 3 + results = benchmark_util.measure_performance( + self._build_model, + x=self.imdb_x, + y=self.imdb_y, + batch_size=batch_size, + run_iters=run_iters, + optimizer='adam', + loss='sparse_categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, wall_time=results['wall_time'], extras=results) + + def benchmark_text_classification_bs_256(self): + """ Required Arguments for measure_performance. + + x: Input data, it could be Numpy or load from tfds. + y: Target data. If `x` is a dataset, generator instance, + `y` should not be specified. + loss: Loss function for model. + optimizer: Optimizer for model. + Other details can see in `measure_performance()` method of + benchmark_util. + """ + batch_size = 256 + run_iters = 2 + results = benchmark_util.measure_performance( + self._build_model, + x=self.imdb_x, + y=self.imdb_y, + batch_size=batch_size, + run_iters=run_iters, + optimizer='adam', + loss='sparse_categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, wall_time=results['wall_time'], extras=results) class MultiHeadSelfAttention(tf.keras.layers.Layer): """Implement multi head self attention as a Keras layer.""" From 264eb6ed1dbfb5e078c7dd977da8d7e633106fc5 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Mon, 13 Jul 2020 11:26:15 -0700 Subject: [PATCH 0285/2522] Fixed add bias transformation. Added check for convolution with dynamic weights. PiperOrigin-RevId: 320996352 Change-Id: Ie88eb026151c8ce49e9987867bc2807e13176cea --- .../lite/delegates/gpu/common/transformations/add_bias.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/lite/delegates/gpu/common/transformations/add_bias.cc b/tensorflow/lite/delegates/gpu/common/transformations/add_bias.cc index ec2474138a3..29d70d8f4a9 100644 --- a/tensorflow/lite/delegates/gpu/common/transformations/add_bias.cc +++ b/tensorflow/lite/delegates/gpu/common/transformations/add_bias.cc @@ -48,6 +48,11 @@ class AddBias : public NodeTransformation { public: TransformResult ApplyToNode(Node* node, GraphFloat32* graph) final { if (node->operation.type == ToString(OperationType::CONVOLUTION_2D)) { + if (graph->FindInputs(node->id).size() != 1) { + return {TransformStatus::DECLINED, + "This transformation is only applicable to conv with one " + "runtime input."}; + } auto& attr = absl::any_cast(node->operation.attributes); return FillBias(attr.weights.shape.o, &attr.bias); From 1b9e0ecd96d9cbcb2eaad8f60ece9f4a5d4d757d Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Mon, 13 Jul 2020 11:35:03 -0700 Subject: [PATCH 0286/2522] [XLA:Python] Use a string tag for devices in a heap profile rather than a numeric tag. This makes filtering of heap profiles by device in pprof work reliably, e.g.: ``` pprof --tagfocus device=gpu:1 myprofile.prof ``` With a numeric filter, it was difficult to filter on device 0 (perhaps because proto3 conflates absent and default values for protobuf fields.) PiperOrigin-RevId: 320998422 Change-Id: I537eba48a102f0f875aebf8452592d132e9947f1 --- tensorflow/compiler/xla/python/py_client.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/python/py_client.cc b/tensorflow/compiler/xla/python/py_client.cc index bc7244cfc64..20f2b782fdb 100644 --- a/tensorflow/compiler/xla/python/py_client.cc +++ b/tensorflow/compiler/xla/python/py_client.cc @@ -275,7 +275,8 @@ py::bytes PyClient::HeapProfile() { kind_label->set_str(buffer_string_id); auto* device_label = sample->add_label(); device_label->set_key(device_string_id); - device_label->set_num(entry.first.device->id()); + device_label->set_str( + builder.StringId(entry.first.device->DebugString())); } else { kind_label->set_str(executable_string_id); } From 811976aa13d6375050484f463661c8cfa5ba8d78 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Mon, 13 Jul 2020 11:44:23 -0700 Subject: [PATCH 0287/2522] Disable failing test to have dashboard green PiperOrigin-RevId: 321000289 Change-Id: I7c4fce03a6ae620dadb2c94f9286124e968c22f9 --- tensorflow/python/keras/distribute/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/keras/distribute/BUILD b/tensorflow/python/keras/distribute/BUILD index 83c25562323..b089b9ad16b 100644 --- a/tensorflow/python/keras/distribute/BUILD +++ b/tensorflow/python/keras/distribute/BUILD @@ -412,6 +412,7 @@ distribute_py_test( tags = [ "multi_and_single_gpu", "no_windows_gpu", + "notap", # TODO(b/161144880): flaky "notsan", ], deps = [ From bbec020d318ad70a2b07f6d518ec82bb8f711f94 Mon Sep 17 00:00:00 2001 From: tenglu Date: Thu, 21 May 2020 04:06:35 +0800 Subject: [PATCH 0288/2522] Vectorize FP32/BF16 RandomUniform process. --- tensorflow/core/kernels/BUILD | 1 + tensorflow/core/kernels/random_op_cpu.h | 6 +- tensorflow/core/kernels/random_op_test.cc | 38 +++--- tensorflow/core/lib/random/BUILD | 1 + .../core/lib/random/random_distributions.h | 110 ++++++++++++++---- .../kernel_tests/random/random_ops_test.py | 12 +- 6 files changed, 116 insertions(+), 52 deletions(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index b4129e05f91..e76ab6ac4ea 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2424,6 +2424,7 @@ cc_library( deps = [ "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core:framework", ], ) diff --git a/tensorflow/core/kernels/random_op_cpu.h b/tensorflow/core/kernels/random_op_cpu.h index eac1faee2e4..85b406a4c79 100644 --- a/tensorflow/core/kernels/random_op_cpu.h +++ b/tensorflow/core/kernels/random_op_cpu.h @@ -166,9 +166,9 @@ void FillPhiloxRandom::operator()( int64 total_group_count = (size + kGroupSize - 1) / kGroupSize; - const int kGroupCost = - random::PhiloxRandom::kResultElementCount * - (random::PhiloxRandom::kElementCost + Distribution::kElementCost); + const int kGroupCost = random::PhiloxRandom::kResultElementCount * + random::PhiloxRandom::kElementCost + + kGroupSize * Distribution::kElementCost; Shard(worker_threads.num_threads, worker_threads.workers, total_group_count, kGroupCost, [&gen, data, size, dist](int64 start_group, int64 limit_group) { diff --git a/tensorflow/core/kernels/random_op_test.cc b/tensorflow/core/kernels/random_op_test.cc index 47d94ad9028..84a2c2b988a 100644 --- a/tensorflow/core/kernels/random_op_test.cc +++ b/tensorflow/core/kernels/random_op_test.cc @@ -37,41 +37,41 @@ Tensor VecShape(int64 v) { } } -Graph* RandomUniform(int64 n) { +Graph* RandomUniform(int64 n, DataType dtype) { Graph* g = new Graph(OpRegistry::Global()); - test::graph::RandomUniform(g, test::graph::Constant(g, VecShape(n)), - DT_FLOAT); + test::graph::RandomUniform(g, test::graph::Constant(g, VecShape(n)), dtype); return g; } -Graph* RandomNormal(int64 n) { +Graph* RandomNormal(int64 n, DataType dtype) { Graph* g = new Graph(OpRegistry::Global()); - test::graph::RandomGaussian(g, test::graph::Constant(g, VecShape(n)), - DT_FLOAT); + test::graph::RandomGaussian(g, test::graph::Constant(g, VecShape(n)), dtype); return g; } -Graph* TruncatedNormal(int64 n) { +Graph* TruncatedNormal(int64 n, DataType dtype) { Graph* g = new Graph(OpRegistry::Global()); - test::graph::TruncatedNormal(g, test::graph::Constant(g, VecShape(n)), - DT_FLOAT); + test::graph::TruncatedNormal(g, test::graph::Constant(g, VecShape(n)), dtype); return g; } -#define BM_RNG(DEVICE, RNG) \ - void BM_##DEVICE##_##RNG(int iters, int arg) { \ +#define BM_RNG(DEVICE, RNG, DTYPE) \ + void BM_##DEVICE##_##RNG##_##DTYPE(int iters, int arg) { \ testing::ItemsProcessed(static_cast(iters) * arg); \ - test::Benchmark(#DEVICE, RNG(arg)).Run(iters); \ + test::Benchmark(#DEVICE, RNG(arg, DTYPE)).Run(iters); \ } \ - BENCHMARK(BM_##DEVICE##_##RNG)->Range(1 << 20, 8 << 20); + BENCHMARK(BM_##DEVICE##_##RNG##_##DTYPE)->Range(1 << 20, 8 << 20); -BM_RNG(cpu, RandomUniform); -BM_RNG(cpu, RandomNormal); -BM_RNG(cpu, TruncatedNormal); +BM_RNG(cpu, RandomUniform, DT_FLOAT); +BM_RNG(cpu, RandomUniform, DT_BFLOAT16); +BM_RNG(cpu, RandomNormal, DT_FLOAT); +BM_RNG(cpu, TruncatedNormal, DT_FLOAT); -BM_RNG(gpu, RandomUniform); -BM_RNG(gpu, RandomNormal); -BM_RNG(gpu, TruncatedNormal); +#ifdef GOOGLE_CUDA +BM_RNG(gpu, RandomUniform, DT_FLOAT); +BM_RNG(gpu, RandomNormal, DT_FLOAT); +BM_RNG(gpu, TruncatedNormal, DT_FLOAT); +#endif Tensor VecAlphas(int64 n) { Tensor alphas(DT_DOUBLE, TensorShape({n})); diff --git a/tensorflow/core/lib/random/BUILD b/tensorflow/core/lib/random/BUILD index 1487a813149..f644eb913b6 100644 --- a/tensorflow/core/lib/random/BUILD +++ b/tensorflow/core/lib/random/BUILD @@ -45,6 +45,7 @@ cc_library( "//tensorflow/core/platform:logging", "//tensorflow/core/platform:macros", "//tensorflow/core/platform:types", + "//tensorflow/core:framework", "//third_party/eigen3", ], alwayslink = 1, diff --git a/tensorflow/core/lib/random/random_distributions.h b/tensorflow/core/lib/random/random_distributions.h index 386f13347d7..5c8b91fd982 100644 --- a/tensorflow/core/lib/random/random_distributions.h +++ b/tensorflow/core/lib/random/random_distributions.h @@ -23,22 +23,60 @@ limitations under the License. #include #include -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/lib/bfloat16/bfloat16.h" #include "tensorflow/core/lib/random/philox_random.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" namespace tensorflow { namespace random { // Helper function to convert a 16-bit integer to a half between [0..1). PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x); +// Helper function to convert a 16-bit integer to a bfloat16 between [1..2). +PHILOX_DEVICE_INLINE bfloat16 InternalUint16ToBfloat16(uint16 x); // Helper function to convert a 16-bit integer to a bfloat16 between [0..1). -PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x); +PHILOX_DEVICE_INLINE bfloat16 Uint16ToBfloat16(uint16 x); +// Helper function to convert a 32-bit integer to a float between [1..2). +PHILOX_DEVICE_INLINE float InternalUint32ToFloat(uint32 x); // Helper function to convert a 32-bit integer to a float between [0..1). PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x); // Helper function to convert two 32-bit integers to a double between [0..1). PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1); +// Helper function to format distribution result in vectorization path, +// it creates Eigen::Tensor and reuses packet feature with SIMD. +template +typename Distribution::ResultType VectorizedFormat( + Generator* gen, typename Distribution::FormatFunc functor) { + typename Generator::ResultType sample; + typename Distribution::ResultType result; + const int kResultElementCount = Distribution::kResultElementCount; + const int inner_count = Generator::kResultElementCount; + const int outer_count = kResultElementCount / inner_count; + int offset = 0; + + for (int k = 0; k < outer_count; k++) { + sample = (*gen)(); + for (int i = 0; i < inner_count; i++, offset++) { + result[offset] = (*functor)(sample[i]); + } + } + // Tail processing if any. + if (offset < kResultElementCount) { + sample = (*gen)(); + for (int i = 0; offset < kResultElementCount; i++, offset++) { + result[offset] = (*functor)(sample[i]); + } + } + + auto tensor_result = + typename TTypes::Tensor( + &result[0], kResultElementCount); + tensor_result = tensor_result - typename Distribution::ResultElementType(1.0); + return result; +} + // Computes a + b. Requires that the result is representable in the destination // type and that b is not maximal (i.e. b + 1 is not 0). Notably, the addend b // need *not* be representable in that type. (The condition on b excludes the @@ -95,7 +133,14 @@ template class UniformDistribution { public: // The number of elements that will be returned. - static constexpr int kResultElementCount = Generator::kResultElementCount; + // Set the number to be greater equal to Eigen packet size of type, + // so computations can be vectorized using SIMD. + static constexpr int kVectorLength = + Eigen::internal::packet_traits::size; + static constexpr int kResultElementCount = + (kVectorLength > Generator::kResultElementCount) + ? kVectorLength + : Generator::kResultElementCount; // Cost of generation of a single element (in cycles). static constexpr int kElementCost = 3; // Indicate that this distribution may take variable number of samples @@ -103,15 +148,13 @@ class UniformDistribution { static constexpr bool kVariableSamplesPerOutput = false; typedef Array ResultType; typedef bfloat16 ResultElementType; + // Helper definiation for the format function. + typedef bfloat16 (*FormatFunc)(uint16); PHILOX_DEVICE_INLINE ResultType operator()(Generator* gen) { - typename Generator::ResultType sample = (*gen)(); - ResultType result; - for (int i = 0; i < kResultElementCount; ++i) { - result[i] = Uint16ToGfloat16(sample[i]); - } - return result; + return VectorizedFormat, + Generator>(gen, InternalUint16ToBfloat16); } }; @@ -119,7 +162,14 @@ template class UniformDistribution { public: // The number of elements that will be returned. - static constexpr int kResultElementCount = Generator::kResultElementCount; + // Set the number to be greater equal to Eigen packet size of type, + // so computations can be vectorized using SIMD. + static constexpr int kVectorLength = + Eigen::internal::packet_traits::size; + static constexpr int kResultElementCount = + (kVectorLength > Generator::kResultElementCount) + ? kVectorLength + : Generator::kResultElementCount; // Cost of generation of a single element (in cycles). static constexpr int kElementCost = 3; // Indicate that this distribution may take variable number of samples @@ -127,15 +177,13 @@ class UniformDistribution { static constexpr bool kVariableSamplesPerOutput = false; typedef Array ResultType; typedef float ResultElementType; + // Helper definiation for the format function. + typedef float (*FormatFunc)(uint32); PHILOX_DEVICE_INLINE ResultType operator()(Generator* gen) { - typename Generator::ResultType sample = (*gen)(); - ResultType result; - for (int i = 0; i < kResultElementCount; ++i) { - result[i] = Uint32ToFloat(sample[i]); - } - return result; + return VectorizedFormat, Generator>( + gen, InternalUint32ToFloat); } }; @@ -764,9 +812,9 @@ PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x) { return result - Eigen::half(1.0); } -// Helper function to convert an 16-bit integer to a bfloat16 between [0..1). -// This can create a uniform distribution of values between [0..1). -PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x) { +// Helper function to convert an 16-bit integer to a bfloat16 between [1..2). +// This can create a uniform distribution of values between [1..2). +PHILOX_DEVICE_INLINE bfloat16 InternalUint16ToBfloat16(uint16 x) { // bfloat are formatted as follows (MSB first): // sign(1) exponent(8) mantissa(7) // Conceptually construct the following: @@ -780,13 +828,20 @@ PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x) { bfloat16 result; memcpy(&result, &val, sizeof(val)); // The mantissa has an implicit leading 1, so the above code creates a value - // in [1, 2). The minus will not cause a rounding that makes the result 1. - // Instead it will just be close to 1. - return result - bfloat16(1.0); + // in [1, 2). + return result; } -// Helper function to convert an 32-bit integer to a float between [0..1). -PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x) { +// Helper function to convert an 16-bit integer to a bfloat16 between [0..1). +// This can create a uniform distribution of values between [0..1). +PHILOX_DEVICE_INLINE bfloat16 Uint16ToBfloat16(uint16 x) { + // The minus will not cause a rounding that makes the result 1. + // Instead it will just be close to 1. + return InternalUint16ToBfloat16(x) - bfloat16(1.0); +} + +// Helper function to convert an 32-bit integer to a float between [1..2). +PHILOX_DEVICE_INLINE float InternalUint32ToFloat(uint32 x) { // IEEE754 floats are formatted as follows (MSB first): // sign(1) exponent(8) mantissa(23) // Conceptually construct the following: @@ -800,7 +855,12 @@ PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x) { // Assumes that endian-ness is same for float and uint32. float result; memcpy(&result, &val, sizeof(val)); - return result - 1.0f; + return result; +} + +// Helper function to convert an 32-bit integer to a float between [0..1). +PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x) { + return InternalUint32ToFloat(x) - 1.0f; } // Helper function to convert two 32-bit integers to a double between [0..1). diff --git a/tensorflow/python/kernel_tests/random/random_ops_test.py b/tensorflow/python/kernel_tests/random/random_ops_test.py index c361f79fb1f..7bbb50db031 100644 --- a/tensorflow/python/kernel_tests/random/random_ops_test.py +++ b/tensorflow/python/kernel_tests/random/random_ops_test.py @@ -276,8 +276,9 @@ class RandomUniformTest(RandomOpTestCommon): def testRange(self): for dt in (dtypes.float16, dtypes.float32, dtypes.float64, dtypes.int32, - dtypes.int64): - sampler = self._Sampler(1000, minv=-2, maxv=8, dtype=dt, use_gpu=True) + dtypes.int64, dtypes.bfloat16): + use_gpu = (dt != dtypes.bfloat16) + sampler = self._Sampler(1000, minv=-2, maxv=8, dtype=dt, use_gpu=use_gpu) x = sampler() self.assertTrue(-2 <= np.min(x)) self.assertTrue(np.max(x) < 8) @@ -363,10 +364,11 @@ class RandomUniformTest(RandomOpTestCommon): @test_util.run_deprecated_v1 def testSeed(self): for dt in (dtypes.float16, dtypes.float32, dtypes.float64, dtypes.int32, - dtypes.int64): + dtypes.int64, dtypes.bfloat16): for seed in [345, 2**100, -2**100]: - sx = self._Sampler(1000, 0, 17, dtype=dt, use_gpu=True, seed=seed) - sy = self._Sampler(1000, 0, 17, dtype=dt, use_gpu=True, seed=seed) + use_gpu = (dt != dtypes.bfloat16) + sx = self._Sampler(1000, 0, 17, dtype=dt, use_gpu=use_gpu, seed=seed) + sy = self._Sampler(1000, 0, 17, dtype=dt, use_gpu=use_gpu, seed=seed) self.assertAllEqual(sx(), sy()) @test_util.run_deprecated_v1 From c7aec60edf959f74ad996e8c8f48c3f414361619 Mon Sep 17 00:00:00 2001 From: tenglu Date: Mon, 25 May 2020 04:41:38 +0800 Subject: [PATCH 0289/2522] Address code and simplify build dependence for RandomUniform. --- tensorflow/core/kernels/BUILD | 4 +--- .../core/lib/random/random_distributions.h | 16 ++++++---------- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index e76ab6ac4ea..10c75ddc34c 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2411,7 +2411,6 @@ tf_kernel_library( prefix = "candidate_sampler_ops", deps = [ ":range_sampler", - "//tensorflow/core:framework", "//tensorflow/core:lib", ], ) @@ -2422,9 +2421,9 @@ cc_library( hdrs = ["range_sampler.h"], visibility = ["//visibility:private"], deps = [ + "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:framework", ], ) @@ -2434,7 +2433,6 @@ tf_cc_test( srcs = ["range_sampler_test.cc"], deps = [ ":range_sampler", - "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", diff --git a/tensorflow/core/lib/random/random_distributions.h b/tensorflow/core/lib/random/random_distributions.h index 5c8b91fd982..fe006e72ece 100644 --- a/tensorflow/core/lib/random/random_distributions.h +++ b/tensorflow/core/lib/random/random_distributions.h @@ -133,14 +133,12 @@ template class UniformDistribution { public: // The number of elements that will be returned. - // Set the number to be greater equal to Eigen packet size of type, + // Set the number to be Eigen packet size of type at least, // so computations can be vectorized using SIMD. static constexpr int kVectorLength = Eigen::internal::packet_traits::size; static constexpr int kResultElementCount = - (kVectorLength > Generator::kResultElementCount) - ? kVectorLength - : Generator::kResultElementCount; + std::max(kVectorLength, Generator::kResultElementCount); // Cost of generation of a single element (in cycles). static constexpr int kElementCost = 3; // Indicate that this distribution may take variable number of samples @@ -148,7 +146,7 @@ class UniformDistribution { static constexpr bool kVariableSamplesPerOutput = false; typedef Array ResultType; typedef bfloat16 ResultElementType; - // Helper definiation for the format function. + // Helper definition for the format function. typedef bfloat16 (*FormatFunc)(uint16); PHILOX_DEVICE_INLINE @@ -162,14 +160,12 @@ template class UniformDistribution { public: // The number of elements that will be returned. - // Set the number to be greater equal to Eigen packet size of type, + // Set the number to be Eigen packet size of type at least, // so computations can be vectorized using SIMD. static constexpr int kVectorLength = Eigen::internal::packet_traits::size; static constexpr int kResultElementCount = - (kVectorLength > Generator::kResultElementCount) - ? kVectorLength - : Generator::kResultElementCount; + std::max(kVectorLength, Generator::kResultElementCount); // Cost of generation of a single element (in cycles). static constexpr int kElementCost = 3; // Indicate that this distribution may take variable number of samples @@ -177,7 +173,7 @@ class UniformDistribution { static constexpr bool kVariableSamplesPerOutput = false; typedef Array ResultType; typedef float ResultElementType; - // Helper definiation for the format function. + // Helper definition for the format function. typedef float (*FormatFunc)(uint32); PHILOX_DEVICE_INLINE From e9703b26eecc20b27f2750ff6cca11027e567c5f Mon Sep 17 00:00:00 2001 From: tenglu Date: Tue, 26 May 2020 23:01:57 +0800 Subject: [PATCH 0290/2522] Add comment to explain why need to put condition out of loop. --- tensorflow/core/lib/random/random_distributions.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/core/lib/random/random_distributions.h b/tensorflow/core/lib/random/random_distributions.h index fe006e72ece..5f4c6f17bd1 100644 --- a/tensorflow/core/lib/random/random_distributions.h +++ b/tensorflow/core/lib/random/random_distributions.h @@ -63,6 +63,8 @@ typename Distribution::ResultType VectorizedFormat( } } // Tail processing if any. + // Put the tail condition out of above loop to improve performance: + // it will be executed only once and save time on CPU. if (offset < kResultElementCount) { sample = (*gen)(); for (int i = 0; offset < kResultElementCount; i++, offset++) { From f1ea83d69a252d2b1890d7adf46e0beb2ea22269 Mon Sep 17 00:00:00 2001 From: tenglu Date: Mon, 1 Jun 2020 23:12:59 +0800 Subject: [PATCH 0291/2522] Address hard code *4* for Distribution and fix BUILD order. --- .../core/kernels/parameterized_truncated_normal_op.cc | 7 ++++--- tensorflow/core/kernels/random_binomial_op.cc | 4 ---- tensorflow/core/lib/random/BUILD | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/kernels/parameterized_truncated_normal_op.cc b/tensorflow/core/kernels/parameterized_truncated_normal_op.cc index ba1fd280ce7..561f0134540 100644 --- a/tensorflow/core/kernels/parameterized_truncated_normal_op.cc +++ b/tensorflow/core/kernels/parameterized_truncated_normal_op.cc @@ -87,9 +87,10 @@ struct TruncatedNormalFunctor { Normal normal_dist; // Vectorized intermediate calculations for uniform rejection sampling. - // We always generate at most 4 samples. - Eigen::array z; - Eigen::array g; + const int length = + std::max(Uniform::kResultElementCount, Normal::kResultElementCount); + Eigen::array z; + Eigen::array g; for (int64 b = start_batch; b < limit_batch; ++b) { // We are passed a flat array for each of the parameter tensors. diff --git a/tensorflow/core/kernels/random_binomial_op.cc b/tensorflow/core/kernels/random_binomial_op.cc index 4647457ff6f..172d04940cd 100644 --- a/tensorflow/core/kernels/random_binomial_op.cc +++ b/tensorflow/core/kernels/random_binomial_op.cc @@ -183,10 +183,6 @@ struct RandomBinomialFunctor { // We have B1 * ... * Bk samples per batch member we need. auto DoWork = [num_batches, samples_per_batch, &bcast, &counts, &probs, &gen, &output](int start_output, int limit_output) { - // Vectorized intermediate calculations for uniform rejection sampling. - // We always generate at most 4 samples. - Eigen::array z; - Eigen::array g; const bool should_bcast = bcast.IsBroadcastingRequired(); const auto& counts_batch_indices = bcast.x_batch_indices(); const auto& probs_batch_indices = bcast.y_batch_indices(); diff --git a/tensorflow/core/lib/random/BUILD b/tensorflow/core/lib/random/BUILD index f644eb913b6..ba2c91e8b78 100644 --- a/tensorflow/core/lib/random/BUILD +++ b/tensorflow/core/lib/random/BUILD @@ -40,12 +40,12 @@ cc_library( deps = [ ":exact_uniform_int", ":philox_random", + "//tensorflow/core:framework", "//tensorflow/core/lib/bfloat16", "//tensorflow/core/lib/gtl:array_slice", "//tensorflow/core/platform:logging", "//tensorflow/core/platform:macros", "//tensorflow/core/platform:types", - "//tensorflow/core:framework", "//third_party/eigen3", ], alwayslink = 1, From 468a76d7d54417ab0e64c18c27a4221905d93bc2 Mon Sep 17 00:00:00 2001 From: Teng Lu Date: Fri, 12 Jun 2020 11:10:25 +0800 Subject: [PATCH 0292/2522] Change vars location and simplify the logic. --- .../kernels/parameterized_truncated_normal_op.cc | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/kernels/parameterized_truncated_normal_op.cc b/tensorflow/core/kernels/parameterized_truncated_normal_op.cc index 561f0134540..bf78e3b9a33 100644 --- a/tensorflow/core/kernels/parameterized_truncated_normal_op.cc +++ b/tensorflow/core/kernels/parameterized_truncated_normal_op.cc @@ -86,12 +86,6 @@ struct TruncatedNormalFunctor { using Normal = random::NormalDistribution; Normal normal_dist; - // Vectorized intermediate calculations for uniform rejection sampling. - const int length = - std::max(Uniform::kResultElementCount, Normal::kResultElementCount); - Eigen::array z; - Eigen::array g; - for (int64 b = start_batch; b < limit_batch; ++b) { // We are passed a flat array for each of the parameter tensors. // The input is either a scalar broadcasted to all batches or a vector @@ -191,6 +185,14 @@ struct TruncatedNormalFunctor { while (sample < limit_sample) { const auto rand = dist(&gen_copy); const int size = rand.size(); + + // Vectorized intermediate calculations for uniform rejection + // sampling. + // Simply allocate Uniform::kResultElementCount for the two arrays + // since they are only used by UniformDistribution. + Eigen::array z; + Eigen::array g; + // NOTE(ringwalt): These loops seem to only generate packed AVX // instructions for float32. for (int i = 0; i < size; i++) { From 92e08f139baa4aa7dbcddde967329fecf848580e Mon Sep 17 00:00:00 2001 From: Teng Lu Date: Sat, 13 Jun 2020 11:02:25 +0800 Subject: [PATCH 0293/2522] Update array size in parameterized_truncated_normal_op.cc Co-authored-by: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com> --- tensorflow/core/kernels/parameterized_truncated_normal_op.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/parameterized_truncated_normal_op.cc b/tensorflow/core/kernels/parameterized_truncated_normal_op.cc index bf78e3b9a33..1486472f6b5 100644 --- a/tensorflow/core/kernels/parameterized_truncated_normal_op.cc +++ b/tensorflow/core/kernels/parameterized_truncated_normal_op.cc @@ -190,8 +190,8 @@ struct TruncatedNormalFunctor { // sampling. // Simply allocate Uniform::kResultElementCount for the two arrays // since they are only used by UniformDistribution. - Eigen::array z; - Eigen::array g; + Eigen::array z; + Eigen::array g; // NOTE(ringwalt): These loops seem to only generate packed AVX // instructions for float32. From 639adee20333ba04ec7a029adb3d06456e707bd7 Mon Sep 17 00:00:00 2001 From: Teng Lu Date: Sun, 14 Jun 2020 15:08:28 +0800 Subject: [PATCH 0294/2522] Fix const expr error. --- .../core/kernels/parameterized_truncated_normal_op.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/parameterized_truncated_normal_op.cc b/tensorflow/core/kernels/parameterized_truncated_normal_op.cc index 1486472f6b5..01dabd56fa0 100644 --- a/tensorflow/core/kernels/parameterized_truncated_normal_op.cc +++ b/tensorflow/core/kernels/parameterized_truncated_normal_op.cc @@ -190,8 +190,11 @@ struct TruncatedNormalFunctor { // sampling. // Simply allocate Uniform::kResultElementCount for the two arrays // since they are only used by UniformDistribution. - Eigen::array z; - Eigen::array g; + OP_REQUIRES(ctx, size == Uniform::kResultElementCount, + errors::InvalidArgument( + "Incompatible UniformDistribution size.")); + Eigen::array z; + Eigen::array g; // NOTE(ringwalt): These loops seem to only generate packed AVX // instructions for float32. From d1e32e31d725e5bea074ba618ae650146d5c01d8 Mon Sep 17 00:00:00 2001 From: Teng Lu Date: Mon, 15 Jun 2020 14:25:18 +0800 Subject: [PATCH 0295/2522] Fix UniformDistribution build cycle issue. --- tensorflow/core/kernels/BUILD | 4 +++- tensorflow/core/lib/random/BUILD | 2 +- tensorflow/core/lib/random/random_distributions.h | 11 +++++++---- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 10c75ddc34c..baaf4c129fe 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2411,6 +2411,7 @@ tf_kernel_library( prefix = "candidate_sampler_ops", deps = [ ":range_sampler", + "//tensorflow/core:framework", "//tensorflow/core:lib", ], ) @@ -2421,9 +2422,9 @@ cc_library( hdrs = ["range_sampler.h"], visibility = ["//visibility:private"], deps = [ - "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core/framework:numeric_types", ], ) @@ -2433,6 +2434,7 @@ tf_cc_test( srcs = ["range_sampler_test.cc"], deps = [ ":range_sampler", + "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", diff --git a/tensorflow/core/lib/random/BUILD b/tensorflow/core/lib/random/BUILD index ba2c91e8b78..02ead082245 100644 --- a/tensorflow/core/lib/random/BUILD +++ b/tensorflow/core/lib/random/BUILD @@ -40,7 +40,7 @@ cc_library( deps = [ ":exact_uniform_int", ":philox_random", - "//tensorflow/core:framework", + "//tensorflow/core/framework:numeric_types", "//tensorflow/core/lib/bfloat16", "//tensorflow/core/lib/gtl:array_slice", "//tensorflow/core/platform:logging", diff --git a/tensorflow/core/lib/random/random_distributions.h b/tensorflow/core/lib/random/random_distributions.h index 5f4c6f17bd1..08bcbe46076 100644 --- a/tensorflow/core/lib/random/random_distributions.h +++ b/tensorflow/core/lib/random/random_distributions.h @@ -23,7 +23,7 @@ limitations under the License. #include #include -#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/lib/bfloat16/bfloat16.h" #include "tensorflow/core/lib/random/philox_random.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" @@ -72,9 +72,12 @@ typename Distribution::ResultType VectorizedFormat( } } - auto tensor_result = - typename TTypes::Tensor( - &result[0], kResultElementCount); + typedef Eigen::TensorMap< + Eigen::Tensor, + Eigen::Aligned> + Tensor; + auto tensor_result = Tensor(&result[0], kResultElementCount); tensor_result = tensor_result - typename Distribution::ResultElementType(1.0); return result; } From 6bc331ae135d01c23fb738768825a1bf05b50f5d Mon Sep 17 00:00:00 2001 From: Teng Lu Date: Thu, 18 Jun 2020 18:20:21 +0800 Subject: [PATCH 0296/2522] Separate CPU/GPU Distribution with macro. --- .../core/lib/random/random_distributions.h | 49 ++++++++++++++----- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/tensorflow/core/lib/random/random_distributions.h b/tensorflow/core/lib/random/random_distributions.h index 08bcbe46076..70713d31833 100644 --- a/tensorflow/core/lib/random/random_distributions.h +++ b/tensorflow/core/lib/random/random_distributions.h @@ -44,10 +44,25 @@ PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x); // Helper function to convert two 32-bit integers to a double between [0..1). PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1); +// Helper function to tell the suitable Distribution samples length +// on different platforms. +template +PHILOX_DEVICE_INLINE constexpr int PlatformResultElementCount() { +#ifdef __CUDA_ARCH__ + return Generator::kResultElementCount; +#else + // Set the number to be Eigen packet size of type at least, so computations + // can be vectorized using SIMD on CPU. + constexpr int kVectorLength = Eigen::internal::packet_traits::size; + return std::max(kVectorLength, Generator::kResultElementCount); +#endif // __CUDA_ARCH__ +} + // Helper function to format distribution result in vectorization path, // it creates Eigen::Tensor and reuses packet feature with SIMD. -template -typename Distribution::ResultType VectorizedFormat( +// This function can only work on CPU +template +PHILOX_DEVICE_INLINE typename Distribution::ResultType VectorizedFormat( Generator* gen, typename Distribution::FormatFunc functor) { typename Generator::ResultType sample; typename Distribution::ResultType result; @@ -138,12 +153,8 @@ template class UniformDistribution { public: // The number of elements that will be returned. - // Set the number to be Eigen packet size of type at least, - // so computations can be vectorized using SIMD. - static constexpr int kVectorLength = - Eigen::internal::packet_traits::size; static constexpr int kResultElementCount = - std::max(kVectorLength, Generator::kResultElementCount); + PlatformResultElementCount(); // Cost of generation of a single element (in cycles). static constexpr int kElementCost = 3; // Indicate that this distribution may take variable number of samples @@ -156,8 +167,17 @@ class UniformDistribution { PHILOX_DEVICE_INLINE ResultType operator()(Generator* gen) { +#ifdef __CUDA_ARCH__ + typename Generator::ResultType sample = (*gen)(); + ResultType result; + for (int i = 0; i < kResultElementCount; ++i) { + result[i] = Uint16ToBfloat16(sample[i]); + } + return result; +#else return VectorizedFormat, Generator>(gen, InternalUint16ToBfloat16); +#endif // __CUDA_ARCH__ } }; @@ -165,12 +185,8 @@ template class UniformDistribution { public: // The number of elements that will be returned. - // Set the number to be Eigen packet size of type at least, - // so computations can be vectorized using SIMD. - static constexpr int kVectorLength = - Eigen::internal::packet_traits::size; static constexpr int kResultElementCount = - std::max(kVectorLength, Generator::kResultElementCount); + PlatformResultElementCount(); // Cost of generation of a single element (in cycles). static constexpr int kElementCost = 3; // Indicate that this distribution may take variable number of samples @@ -183,8 +199,17 @@ class UniformDistribution { PHILOX_DEVICE_INLINE ResultType operator()(Generator* gen) { +#ifdef __CUDA_ARCH__ + typename Generator::ResultType sample = (*gen)(); + ResultType result; + for (int i = 0; i < kResultElementCount; ++i) { + result[i] = Uint32ToFloat(sample[i]); + } + return result; +#else return VectorizedFormat, Generator>( gen, InternalUint32ToFloat); +#endif // __CUDA_ARCH__ } }; From 725046ee1b0ee2d865430749bcfa88a6589d45c2 Mon Sep 17 00:00:00 2001 From: Teng Lu Date: Mon, 29 Jun 2020 17:58:36 +0800 Subject: [PATCH 0297/2522] Fix Skip issue and use "IsVec" flag to control optimizaiton scope. --- .../parameterized_truncated_normal_op.cc | 16 ++--- tensorflow/core/kernels/random_op.cc | 2 +- tensorflow/core/kernels/random_op_cpu.h | 7 +- .../core/lib/random/random_distributions.h | 64 +++++++++---------- 4 files changed, 41 insertions(+), 48 deletions(-) diff --git a/tensorflow/core/kernels/parameterized_truncated_normal_op.cc b/tensorflow/core/kernels/parameterized_truncated_normal_op.cc index 01dabd56fa0..ba1fd280ce7 100644 --- a/tensorflow/core/kernels/parameterized_truncated_normal_op.cc +++ b/tensorflow/core/kernels/parameterized_truncated_normal_op.cc @@ -86,6 +86,11 @@ struct TruncatedNormalFunctor { using Normal = random::NormalDistribution; Normal normal_dist; + // Vectorized intermediate calculations for uniform rejection sampling. + // We always generate at most 4 samples. + Eigen::array z; + Eigen::array g; + for (int64 b = start_batch; b < limit_batch; ++b) { // We are passed a flat array for each of the parameter tensors. // The input is either a scalar broadcasted to all batches or a vector @@ -185,17 +190,6 @@ struct TruncatedNormalFunctor { while (sample < limit_sample) { const auto rand = dist(&gen_copy); const int size = rand.size(); - - // Vectorized intermediate calculations for uniform rejection - // sampling. - // Simply allocate Uniform::kResultElementCount for the two arrays - // since they are only used by UniformDistribution. - OP_REQUIRES(ctx, size == Uniform::kResultElementCount, - errors::InvalidArgument( - "Incompatible UniformDistribution size.")); - Eigen::array z; - Eigen::array g; - // NOTE(ringwalt): These loops seem to only generate packed AVX // instructions for float32. for (int i = 0; i < size; i++) { diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc index 152ab5f7d1e..3f55fc84b46 100644 --- a/tensorflow/core/kernels/random_op.cc +++ b/tensorflow/core/kernels/random_op.cc @@ -344,7 +344,7 @@ class RandomGammaOp : public OpKernel { .HostMemory("shape") \ .TypeConstraint("dtype"), \ PhiloxRandomOp>); \ + random::PhiloxRandom, TYPE, true>>); \ REGISTER_KERNEL_BUILDER( \ Name("RandomStandardNormal") \ .Device(DEVICE_CPU) \ diff --git a/tensorflow/core/kernels/random_op_cpu.h b/tensorflow/core/kernels/random_op_cpu.h index 85b406a4c79..83e27741a72 100644 --- a/tensorflow/core/kernels/random_op_cpu.h +++ b/tensorflow/core/kernels/random_op_cpu.h @@ -86,7 +86,7 @@ struct FillPhiloxRandomTask { int64 start_group, int64 limit_group, Distribution dist) { const int kGroupSize = Distribution::kResultElementCount; - gen.Skip(start_group); + gen.Skip(start_group * kGroupSize / gen.kResultElementCount); int64 offset = start_group * kGroupSize; // First fill all the full-size groups @@ -166,9 +166,8 @@ void FillPhiloxRandom::operator()( int64 total_group_count = (size + kGroupSize - 1) / kGroupSize; - const int kGroupCost = random::PhiloxRandom::kResultElementCount * - random::PhiloxRandom::kElementCost + - kGroupSize * Distribution::kElementCost; + const int kGroupCost = kGroupSize * (random::PhiloxRandom::kElementCost + + Distribution::kElementCost); Shard(worker_threads.num_threads, worker_threads.workers, total_group_count, kGroupCost, [&gen, data, size, dist](int64 start_group, int64 limit_group) { diff --git a/tensorflow/core/lib/random/random_distributions.h b/tensorflow/core/lib/random/random_distributions.h index 70713d31833..b0214f58559 100644 --- a/tensorflow/core/lib/random/random_distributions.h +++ b/tensorflow/core/lib/random/random_distributions.h @@ -44,20 +44,6 @@ PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x); // Helper function to convert two 32-bit integers to a double between [0..1). PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1); -// Helper function to tell the suitable Distribution samples length -// on different platforms. -template -PHILOX_DEVICE_INLINE constexpr int PlatformResultElementCount() { -#ifdef __CUDA_ARCH__ - return Generator::kResultElementCount; -#else - // Set the number to be Eigen packet size of type at least, so computations - // can be vectorized using SIMD on CPU. - constexpr int kVectorLength = Eigen::internal::packet_traits::size; - return std::max(kVectorLength, Generator::kResultElementCount); -#endif // __CUDA_ARCH__ -} - // Helper function to format distribution result in vectorization path, // it creates Eigen::Tensor and reuses packet feature with SIMD. // This function can only work on CPU @@ -120,13 +106,15 @@ PHILOX_DEVICE_INLINE Int SignedAdd(Int a, // actual returned sample type. // RealType: the data type of the real numbers that will be returned by the // distribution. This could be either float or double for now. +// IsVec: mark this UniformDistribution can be vectorized or not by SIMD on +// CPU. Note this should always be false on GPU. // This class is meant to be implemented through specialization. The default // is not defined by design. -template +template class UniformDistribution; -template -class UniformDistribution { +template +class UniformDistribution { public: // The number of elements that will be returned. static constexpr int kResultElementCount = Generator::kResultElementCount; @@ -149,12 +137,17 @@ class UniformDistribution { } }; -template -class UniformDistribution { +template +class UniformDistribution { public: // The number of elements that will be returned. + // Set the number to be Eigen packet size of type at least, so computations + // can be vectorized using SIMD on CPU. + static constexpr int kVectorLength = std::max( + static_cast(Eigen::internal::packet_traits::size), + Generator::kResultElementCount); static constexpr int kResultElementCount = - PlatformResultElementCount(); + IsVec ? kVectorLength : Generator::kResultElementCount; // Cost of generation of a single element (in cycles). static constexpr int kElementCost = 3; // Indicate that this distribution may take variable number of samples @@ -168,6 +161,7 @@ class UniformDistribution { PHILOX_DEVICE_INLINE ResultType operator()(Generator* gen) { #ifdef __CUDA_ARCH__ + static_assert(!IsVec, "Can't vectorize Distribution on GPU"); typename Generator::ResultType sample = (*gen)(); ResultType result; for (int i = 0; i < kResultElementCount; ++i) { @@ -175,18 +169,23 @@ class UniformDistribution { } return result; #else - return VectorizedFormat, + return VectorizedFormat, Generator>(gen, InternalUint16ToBfloat16); #endif // __CUDA_ARCH__ } }; -template -class UniformDistribution { +template +class UniformDistribution { public: // The number of elements that will be returned. + // Set the number to be Eigen packet size of type at least, so computations + // can be vectorized using SIMD on CPU. + static constexpr int kVectorLength = std::max( + static_cast(Eigen::internal::packet_traits::size), + Generator::kResultElementCount); static constexpr int kResultElementCount = - PlatformResultElementCount(); + IsVec ? kVectorLength : Generator::kResultElementCount; // Cost of generation of a single element (in cycles). static constexpr int kElementCost = 3; // Indicate that this distribution may take variable number of samples @@ -200,6 +199,7 @@ class UniformDistribution { PHILOX_DEVICE_INLINE ResultType operator()(Generator* gen) { #ifdef __CUDA_ARCH__ + static_assert(!IsVec, "Can't vectorize Distribution on GPU"); typename Generator::ResultType sample = (*gen)(); ResultType result; for (int i = 0; i < kResultElementCount; ++i) { @@ -207,14 +207,14 @@ class UniformDistribution { } return result; #else - return VectorizedFormat, Generator>( - gen, InternalUint32ToFloat); + return VectorizedFormat, + Generator>(gen, InternalUint32ToFloat); #endif // __CUDA_ARCH__ } }; -template -class UniformDistribution { +template +class UniformDistribution { public: // The number of elements that will be returned. static constexpr int kResultElementCount = Generator::kResultElementCount / 2; @@ -237,8 +237,8 @@ class UniformDistribution { } }; -template -class UniformDistribution { +template +class UniformDistribution { public: // The number of elements that will be returned. static constexpr int kResultElementCount = Generator::kResultElementCount; @@ -272,8 +272,8 @@ class UniformDistribution { uint32 range_; }; -template -class UniformDistribution { +template +class UniformDistribution { public: // The number of elements that will be returned. static constexpr int kResultElementCount = Generator::kResultElementCount / 2; From 1ed264d86be06ee7ac7cabfe5c3806666323f700 Mon Sep 17 00:00:00 2001 From: Teng Lu Date: Tue, 14 Jul 2020 01:44:45 +0800 Subject: [PATCH 0298/2522] Fix wrong skip issue of double/int64. --- tensorflow/core/kernels/random_op_cpu.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/random_op_cpu.h b/tensorflow/core/kernels/random_op_cpu.h index 83e27741a72..86dc7752c46 100644 --- a/tensorflow/core/kernels/random_op_cpu.h +++ b/tensorflow/core/kernels/random_op_cpu.h @@ -86,7 +86,13 @@ struct FillPhiloxRandomTask { int64 start_group, int64 limit_group, Distribution dist) { const int kGroupSize = Distribution::kResultElementCount; - gen.Skip(start_group * kGroupSize / gen.kResultElementCount); + // Decide skip strides according to different kResultElementCount: + // * `1 = (4 + 3) / 4` for normal Distribution. + // * `1 = (2 + 3) / 4` for double/int64 Distribution. + // * `4 = (16 + 3) / 4` for vecotorized float/bfloat16 Distribution. + const int skip_strides = + (kGroupSize + gen.kResultElementCount - 1) / gen.kResultElementCount; + gen.Skip(start_group * skip_strides); int64 offset = start_group * kGroupSize; // First fill all the full-size groups From 0c47b7b33e3e37b9f4fc3b79760358fdca615475 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Mon, 13 Jul 2020 14:55:13 -0400 Subject: [PATCH 0299/2522] Update topological_sort.cc --- tensorflow/core/grappler/utils/topological_sort.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/utils/topological_sort.cc b/tensorflow/core/grappler/utils/topological_sort.cc index 14a58753c9f..49a67a3497e 100644 --- a/tensorflow/core/grappler/utils/topological_sort.cc +++ b/tensorflow/core/grappler/utils/topological_sort.cc @@ -96,8 +96,8 @@ Status ComputeTopologicalOrder( "at node = " << graph.node(back).DebugString(); for (int i = 0; i < graph_view.num_nodes(); ++i) { - const int graph_view_GetFanin_i_size = graph_view.GetFanin(i).size(); - if (num_ready_inputs[i] != graph_view_GetFanin_i_size) { + const int graph_view_Get_fanin_i_size = graph_view.GetFanin(i).size(); + if (num_ready_inputs[i] != graph_view_Get_fanin_i_size) { VLOG(1) << "Node not ready: " << graph.node(i).DebugString(); } } From edc5e7f646a76342843ff58a6f78fc8257a4bd45 Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Mon, 13 Jul 2020 12:00:53 -0700 Subject: [PATCH 0300/2522] Replace std::variant with absl::variant to make it more universally build against different env. PiperOrigin-RevId: 321003679 Change-Id: Ic0e9e8af2bfb474133023958a6b90ee7f10f7bfd --- tensorflow/core/tpu/kernels/BUILD | 1 + tensorflow/core/tpu/kernels/tpu_compile_op_support.cc | 2 +- tensorflow/core/tpu/kernels/tpu_compile_op_support.h | 7 ++++--- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 745f54df5e1..80c5aa71069 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -170,6 +170,7 @@ cc_library( "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", + "@com_google_absl//absl/types:variant", ], ) diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc index a8a66718118..cf5d6005653 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc @@ -437,7 +437,7 @@ StatusOr CreateTpuAotCompilationRequest( } StatusOr CreateTpuCompilationRequest( - const std::variant& computation, + const absl::variant& computation, const TPUCompileMetadataProto& metadata, const std::vector& arg_shapes) { VLOG(1) << "CreateTpuCompilationRequest."; diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_support.h b/tensorflow/core/tpu/kernels/tpu_compile_op_support.h index bf1aff46578..ca936e4c213 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_support.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_support.h @@ -21,6 +21,7 @@ limitations under the License. #include "absl/strings/string_view.h" #include "absl/types/optional.h" #include "absl/types/span.h" +#include "absl/types/variant.h" #include "tensorflow/cc/framework/ops.h" #include "tensorflow/compiler/tf2xla/xla_compiler.h" #include "tensorflow/compiler/xla/service/hlo_module_config.h" @@ -49,8 +50,8 @@ struct MlirToHloArgs { }; // Variant of guaranteed constant tensors types. -using GuaranteedConsts = std::variant, - const OpInputList* const>; +using GuaranteedConsts = absl::variant, + const OpInputList* const>; // List of parameters for lowering function library definition to HLO IR. struct FunctionToHloArgs { @@ -149,7 +150,7 @@ CreateTpuAotCompilationRequest( const absl::optional& device_assignment); se::port::StatusOr CreateTpuCompilationRequest( - const std::variant& computation, + const absl::variant& computation, const TPUCompileMetadataProto& metadata, const std::vector& arg_shapes); From 367fc16ca08e4715d9530e78b2df13a840b3a40b Mon Sep 17 00:00:00 2001 From: Yiwen Li Date: Mon, 13 Jul 2020 12:24:01 -0700 Subject: [PATCH 0301/2522] change minor typos --- tensorflow/python/ops/math_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index d8f309a29a7..b14b392e550 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -3023,7 +3023,7 @@ def trace(x, name=None): in x. If x is of rank `k` with shape `[I, J, K, ..., L, M, N]`, then output is a tensor of rank `k-2` with dimensions `[I, J, K, ..., L]` where - `output[i, j, k, ..., l] = trace(x[i, j, i, ..., l, :, :])` + `output[i, j, k, ..., l] = trace(x[i, j, k, ..., l, :, :])` For example: From bcd09022dd6900fdb33a810c62ea5d14d8543045 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Mon, 13 Jul 2020 12:25:24 -0700 Subject: [PATCH 0302/2522] [MLIR:TF] Fold no-op reshape operations PiperOrigin-RevId: 321008797 Change-Id: I0b4bea67634fd8bcdfce590d1e2d2acd72458cd4 --- .../mlir/tensorflow/ir/tf_generated_ops.td | 1 + .../compiler/mlir/tensorflow/ir/tf_ops.cc | 95 +++++++++++++++++ .../mlir/tensorflow/tests/canonicalize.mlir | 67 ++++++++++++ .../mlir/tensorflow/tests/lower_tf.mlir | 16 +-- .../tensorflow/tests/unroll-batch-matmul.mlir | 100 ++++++++---------- 5 files changed, 211 insertions(+), 68 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index 7bbdce6b985..0ef650487a8 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -7284,6 +7284,7 @@ reshape(t, []) ==> 7 }]; let hasCanonicalizer = 1; + let hasFolder = 1; } def TF_ResizeBilinearOp : TF_Op<"ResizeBilinear", [NoSideEffect]> { diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc index 101de17122a..83ad319002a 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc @@ -2917,6 +2917,101 @@ void ReshapeOp::getCanonicalizationPatterns(OwningRewritePatternList &results, results.insert(context); } +OpFoldResult ReshapeOp::fold(ArrayRef operands) { + Value tensor = this->tensor(); + Value shape = this->shape(); + + // Fold reshape if operand and result types are the same and all dimensions + // are statically known (no-op reshape). + // TODO(ezhulenev): Add the same folding for BroadcastToOp. + auto result_ty = getType().dyn_cast(); + if (result_ty && result_ty.hasStaticShape() && + result_ty == tensor.getType()) { + return tensor; + } + + // Fold reshape if the shape is computed from the input tensor: + // + // %shape = tf.Shape(%arg) // [? x ...] + // %dim0 = tf.StridedSlice(%shape, 0, 1, 1) // get unknown dim value + // %new_shape = tf.Pack(dim0, ...) { axis = 0 } // [? x ...] + // %reshape = tf.Reshape(%arg, %new_shape) // this is no-op + // + // Where `...` are some statically known dimensions. In this case reshape is + // a no-op and can be replaced by %arg (assuming `...` are equal). + auto pack_op = dyn_cast_or_null(shape.getDefiningOp()); + if (!pack_op || pack_op.values().size() < 2) return {}; + + // Dimensions packed along axis = 0 (pack scalars into vector). + if (pack_op.axis().getSExtValue() != 0) return {}; + + // First packed value is defined by a strided slice operation. + auto slice_op = + dyn_cast_or_null(pack_op.values()[0].getDefiningOp()); + if (!slice_op) return {}; + + // Input to the slice op is defined by shape operation. + auto shape_op = dyn_cast_or_null(slice_op.input().getDefiningOp()); + if (!shape_op || shape_op.input() != tensor) return {}; + + // All masks are `0` except `shrink_axis_mask` which is equal to `1` (slicing + // scalar value from input vector). + if (slice_op.begin_mask().getSExtValue() != 0 || + slice_op.ellipsis_mask().getSExtValue() != 0 || + slice_op.end_mask().getSExtValue() != 0 || + slice_op.new_axis_mask().getSExtValue() != 0 || + slice_op.shrink_axis_mask().getSExtValue() != 1) + return {}; + + // Returns a value if the `value` is defined by a ConstOp with a single + // integer element in it and has an expected rank. + auto get_value = [](Value value, int expected_rank) -> Optional { + auto const_op = dyn_cast_or_null(value.getDefiningOp()); + if (!const_op) return None; + + auto value_attr = const_op.value().dyn_cast(); + if (!value_attr || value_attr.getNumElements() != 1) return None; + + auto value_ty = value_attr.getType(); + if (!value_ty.hasRank() || value_ty.getRank() != expected_rank) return None; + + auto splat = value_attr.getSplatValue(); + return splat.getValue().getSExtValue(); + }; + + // All other packed values are scalar constants. + SmallVector packed_dims; + packed_dims.reserve(pack_op.values().size() - 1); + for (Value operand : llvm::drop_begin(pack_op.values(), 1)) { + if (auto dim = get_value(operand, /*expected_rank=*/0)) { + packed_dims.push_back(*dim); + } else { + return {}; + } + } + + // Slice exactly the first shape dimension: + // begin = [0] end = [1], strides = [1] + auto begin = get_value(slice_op.begin(), /*expected_rank=*/1); + auto end = get_value(slice_op.end(), /*expected_rank=*/1); + auto strides = get_value(slice_op.strides(), /*expected_rank=*/1); + if (!begin.hasValue() || !end.hasValue() || !strides.hasValue() || + *begin != 0 || *end != 1 || *strides != 1) + return {}; + + // First tensor dimension is dynamic. + auto arg_ty = tensor.getType().dyn_cast(); + if (!arg_ty || arg_ty.getNumDynamicDims() != 1 || !arg_ty.isDynamicDim(0)) + return {}; + + // All other dimensions are statically known and equal to packed dims. + auto arg_dims = llvm::drop_begin(arg_ty.getShape(), 1); + if (!std::equal(arg_dims.begin(), arg_dims.end(), packed_dims.begin())) + return {}; + + return tensor; +} + //===----------------------------------------------------------------------===// // SelectOp //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir index 8597740a4ae..3a948bdd2c3 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir @@ -377,6 +377,73 @@ func @testRedundantReshape(%arg0: tensor<4x4xi32>) -> tensor<2x8xi32> { // CHECK: return %1 : tensor<2x8xi32> } +// CHECK-LABEL: func @testReshapeNoOp +func @testReshapeNoOp(%arg0: tensor<2x4xf32>, %arg1: tensor<2xi32>) -> tensor<2x4xf32> { + %0 = "tf.Reshape"(%arg0, %arg1) : (tensor<2x4xf32>, tensor<2xi32>) -> tensor<2x4xf32> + + // CHECK: return %arg0 + return %0 : tensor<2x4xf32> +} + +// CHECK-LABEL: func @testReshapeNoOpShapeComputation +func @testReshapeNoOpShapeComputation(%arg0: tensor, %arg1: tensor) -> (tensor, tensor, tensor, tensor, tensor) { + // Test dimensions sizes. + %d1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %d2 = "tf.Const"() {value = dense<2> : tensor} : () -> tensor + + // Slice bounds. + %0 = "tf.Const"() {value = dense<0> : tensor<1xi32>} : () -> tensor<1xi32> + %1 = "tf.Const"() {value = dense<1> : tensor<1xi32>} : () -> tensor<1xi32> + %2 = "tf.Const"() {value = dense<2> : tensor<1xi32>} : () -> tensor<1xi32> + + // Fold reshape if the shape is computed from the input tensor: + // + // %shape = tf.Shape(%arg) // [? x ...] + // %dim0 = tf.StridedSlice(%shape, 0, 1, 1) // get unknown dim value + // %new_shape = tf.Pack(dim0, ...) { axis = 0 } // [? x ...] + // %reshape = tf.Reshape(%arg, %new_shape) // this is no-op + // + // Where `...` are some statically known dimensions. In this case reshape is + // a no-op and can be replaced by %arg (assuming `...` are equal). + + // Test Rank 2 + %3 = "tf.Shape"(%arg0) : (tensor) -> tensor<2xi32> + %4 = "tf.StridedSlice"(%3, %0, %1, %1) {shrink_axis_mask = 1 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %5 = "tf.Pack"(%4, %d1) {axis = 0 : i64} : (tensor, tensor) -> tensor<2xi32> + %6 = "tf.Reshape"(%arg0, %5) : (tensor, tensor<2xi32>) -> tensor + + // Test Rank 3. + + %7 = "tf.Shape"(%arg1) : (tensor) -> tensor<3xi32> + %8 = "tf.StridedSlice"(%7, %0, %1, %1) {shrink_axis_mask = 1 : i64} : (tensor<3xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %9 = "tf.Pack"(%8, %d1, %d2) {axis = 0 : i64} : (tensor, tensor, tensor) -> tensor<3xi32> + %10 = "tf.Reshape"(%arg1, %9) : (tensor, tensor<3xi32>) -> tensor + + // Shape was taken from the op that is not reshaped in the end: + // Reshape(%arg1) vs Shape(%arg0) + %11 = "tf.StridedSlice"(%3, %0, %1, %1) {shrink_axis_mask = 1 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %12 = "tf.Pack"(%11, %d1, %d2) {axis = 0 : i64} : (tensor, tensor, tensor) -> tensor<3xi32> + // CHECK: %[[RESHAPE0:.*]] = "tf.Reshape" + %13 = "tf.Reshape"(%arg1, %12) : (tensor, tensor<3xi32>) -> tensor + + // Packed dimensions have different order from the reshape operand: + // [?, 1, 2] vs [?, 2, 1] + %14 = "tf.StridedSlice"(%7, %0, %1, %1) {shrink_axis_mask = 1 : i64} : (tensor<3xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %15 = "tf.Pack"(%14, %d2, %d1) {axis = 0 : i64} : (tensor, tensor, tensor) -> tensor<3xi32> + // CHECK: %[[RESHAPE1:.*]] = "tf.Reshape" + %16 = "tf.Reshape"(%arg1, %15) : (tensor, tensor<3xi32>) -> tensor + + // StridedSlice takes second dimension from the shape: + // begin = [1], end = [2], stride = [1] + %17 = "tf.StridedSlice"(%7, %1, %2, %1) {shrink_axis_mask = 1 : i64} : (tensor<3xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %18 = "tf.Pack"(%17, %d1, %d2) {axis = 0 : i64} : (tensor, tensor, tensor) -> tensor<3xi32> + // CHECK: %[[RESHAPE2:.*]] = "tf.Reshape" + %19 = "tf.Reshape"(%arg1, %18) : (tensor, tensor<3xi32>) -> tensor + + // CHECK: return %arg0, %arg1, %[[RESHAPE0]], %[[RESHAPE1]], %[[RESHAPE2]] + return %6, %10, %13, %16, %19 : tensor, tensor, tensor, tensor, tensor +} + // CHECK-LABEL: testSelectScalarPred func @testSelectScalarPred(%arg0: tensor, %arg1: tensor<4x2xf16>, %arg2: tensor<4x2xf16>) -> tensor<4x2xf16> { // CHECK-NEXT: "tf.SelectV2"(%arg0, %arg1, %arg2) : (tensor, tensor<4x2xf16>, tensor<4x2xf16>) -> tensor<4x2xf16> diff --git a/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir b/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir index 3215055a249..78e10fa797f 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir @@ -371,9 +371,7 @@ func @addN_variant(%arg0: tensor>>, %arg1: tensor) -> tensor<2x2xf32> { - // CHECK-DAG: %[[SHAPE:.*]] = "tf.Const"() {value = dense<[-1, 2]> : tensor<2xi64>} : () -> tensor<2xi64> - // CHECK-DAG: %[[INP:.*]] = "tf.Reshape"(%arg0, %[[SHAPE]]) : (tensor<2x2xf32>, tensor<2xi64>) -> tensor<2x2xf32> - // CHECK-DAG: %[[ITEMS:.*]]:2 = "tf.Unpack"(%[[INP]]) {axis = 0 : i64} : (tensor<2x2xf32>) -> (tensor<2xf32>, tensor<2xf32>) + // CHECK-DAG: %[[ITEMS:.*]]:2 = "tf.Unpack"(%arg0) {axis = 0 : i64} : (tensor<2x2xf32>) -> (tensor<2xf32>, tensor<2xf32>) // CHECK-DAG: %[[AXIS:.*]] = "tf.Const"() {value = dense<0> : tensor} : () -> tensor // CHECK-DAG: %[[RESULT:.*]] = "tf.ConcatV2"(%[[ITEMS]]#1, %[[ITEMS]]#0, %[[AXIS]]) : (tensor<2xf32>, tensor<2xf32>, tensor) -> tensor<2x2xf32> // CHECK: return %[[RESULT]] @@ -411,9 +409,7 @@ func @DynamicStitch_uint8(%arg0: tensor<2x2xui8>) -> tensor<2x2xui8> { // CHECK-LABEL: func @DynamicStitch_scalar_item func @DynamicStitch_scalar_item(%arg0: tensor<2xf32>) -> tensor<2xf32> { - // CHECK-DAG: %[[SHAPE:.*]] = "tf.Const"() {value = dense<-1> : tensor<1xi64>} : () -> tensor<1xi64> - // CHECK-DAG: %[[INP:.*]] = "tf.Reshape"(%arg0, %[[SHAPE]]) : (tensor<2xf32>, tensor<1xi64>) -> tensor<2xf32> - // CHECK-DAG: %[[ITEMS]]:2 = "tf.Unpack"(%[[INP]]) {axis = 0 : i64} : (tensor<2xf32>) -> (tensor, tensor) + // CHECK-DAG: %[[ITEMS]]:2 = "tf.Unpack"(%arg0) {axis = 0 : i64} : (tensor<2xf32>) -> (tensor, tensor) // CHECK-DAG: %[[AXIS:.*]] = "tf.Const"() {value = dense<0> : tensor} : () -> tensor // CHECK-DAG: %[[RESULT]] = "tf.ConcatV2"(%[[ITEMS]]#1, %[[ITEMS]]#0, %[[AXIS]]) : (tensor, tensor, tensor) -> tensor<2xf32> // CHECK: return %[[RESULT]] @@ -425,9 +421,7 @@ func @DynamicStitch_scalar_item(%arg0: tensor<2xf32>) -> tensor<2xf32> { // CHECK-LABEL: func @DynamicStitch_matrix_item func @DynamicStitch_matrix_item(%arg0: tensor<2x2x2xf32>) -> tensor<2x2x2xf32> { - // CHECK-DAG: %[[SHAPE:.*]] = "tf.Const"() {value = dense<[-1, 2, 2]> : tensor<3xi64>} : () -> tensor<3xi64> - // CHECK-DAG: %[[INP:.*]] = "tf.Reshape"(%arg0, %[[SHAPE]]) : (tensor<2x2x2xf32>, tensor<3xi64>) -> tensor<2x2x2xf32> - // CHECK-DAG: %[[ITEMS:.*]]:2 = "tf.Unpack"(%[[INP]]) {axis = 0 : i64} : (tensor<2x2x2xf32>) -> (tensor<2x2xf32>, tensor<2x2xf32>) + // CHECK-DAG: %[[ITEMS:.*]]:2 = "tf.Unpack"(%arg0) {axis = 0 : i64} : (tensor<2x2x2xf32>) -> (tensor<2x2xf32>, tensor<2x2xf32>) // CHECK-DAG: %[[AXIS:.*]] = "tf.Const"() {value = dense<0> : tensor} : () -> tensor // CHECK-DAG: %[[RESULT:.*]] = "tf.ConcatV2"(%[[ITEMS]]#1, %[[ITEMS]]#0, %[[AXIS]]) : (tensor<2x2xf32>, tensor<2x2xf32>, tensor) -> tensor<2x2x2xf32> // CHECK: return %[[RESULT]] @@ -446,9 +440,7 @@ func @DynamicStitch_dynamic(%arg0: tensor<*xi32>, %arg1: tensor<*xf32>) -> tenso // CHECK-LABEL: func @DynamicStitch_duplicates func @DynamicStitch_duplicates(%arg0: tensor<2x2xf32>) -> tensor<1x2xf32> { - // CHECK-DAG: %[[SHAPE:.*]] = "tf.Const"() {value = dense<[-1, 2]> : tensor<2xi64>} : () -> tensor<2xi64> - // CHECK-DAG: %[[INP:.*]] = "tf.Reshape"(%arg0, %[[SHAPE]]) : (tensor<2x2xf32>, tensor<2xi64>) -> tensor<2x2xf32> - // CHECK-DAG: %[[ITEMS:.*]]:2 = "tf.Unpack"(%[[INP]]) {axis = 0 : i64} : (tensor<2x2xf32>) -> (tensor<2xf32>, tensor<2xf32>) + // CHECK-DAG: %[[ITEMS:.*]]:2 = "tf.Unpack"(%arg0) {axis = 0 : i64} : (tensor<2x2xf32>) -> (tensor<2xf32>, tensor<2xf32>) // CHECK-DAG: %[[AXIS:.*]] = "tf.Const"() {value = dense<0> : tensor} : () -> tensor // CHECK-DAG: %[[RESULT:.*]] = "tf.ConcatV2"(%[[ITEMS]]#1, %[[AXIS]]) : (tensor<2xf32>, tensor) -> tensor<1x2xf32> // CHECK: return %[[RESULT]] diff --git a/tensorflow/compiler/mlir/tensorflow/tests/unroll-batch-matmul.mlir b/tensorflow/compiler/mlir/tensorflow/tests/unroll-batch-matmul.mlir index 5a3f0b6e997..7cf5f19523d 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/unroll-batch-matmul.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/unroll-batch-matmul.mlir @@ -67,41 +67,35 @@ func @batchMatMulV2FlatInput(%arg0: tensor<3x4x5xf32>, %arg1: tensor<3x5x6xf32>) return %0 : tensor<3x4x6xf32> // CHECK-LABEL: batchMatMulV2FlatInput - // CHECK: %[[cst:.*]] = "tf.Const"() {value = dense<[3, 4, 5]> : tensor<3xi64>} // CHECK: %[[cst_0:.*]] = "tf.Const"() {value = dense<[1, 4, 5]> : tensor<3xi64>} // CHECK: %[[cst_1:.*]] = "tf.Const"() {value = dense<[4, 5]> : tensor<2xi64>} - // CHECK: %[[cst_2:.*]] = "tf.Const"() {value = dense<[3, 5, 6]> : tensor<3xi64>} - // CHECK: %[[cst_3:.*]] = "tf.Const"() {value = dense<0> : tensor<3xi64>} - // CHECK: %[[cst_4:.*]] = "tf.Const"() {value = dense<[1, 0, 0]> : tensor<3xi64>} - // CHECK: %[[cst_5:.*]] = "tf.Const"() {value = dense<[2, 0, 0]> : tensor<3xi64>} - // CHECK: %[[cst_6:.*]] = "tf.Const"() {value = dense<[1, 5, 6]> : tensor<3xi64>} - // CHECK: %[[cst_7:.*]] = "tf.Const"() {value = dense<[5, 6]> : tensor<2xi64>} - // CHECK: %[[cst_8:.*]] = "tf.Const"() {value = dense<[3, 4, 6]> : tensor<3xi64>} + // CHECK: %[[cst_2:.*]] = "tf.Const"() {value = dense<0> : tensor<3xi64>} + // CHECK: %[[cst_3:.*]] = "tf.Const"() {value = dense<[1, 0, 0]> : tensor<3xi64>} + // CHECK: %[[cst_4:.*]] = "tf.Const"() {value = dense<[2, 0, 0]> : tensor<3xi64>} + // CHECK: %[[cst_5:.*]] = "tf.Const"() {value = dense<[1, 5, 6]> : tensor<3xi64>} + // CHECK: %[[cst_6:.*]] = "tf.Const"() {value = dense<[5, 6]> : tensor<2xi64>} - // CHECK: %[[v0:.*]] = "tf.Reshape"(%arg0, %[[cst]]) : (tensor<3x4x5xf32>, tensor<3xi64>) -> tensor<3x4x5xf32> - // CHECK: %[[v1:.*]] = "tf.Slice"(%[[v0]], %[[cst_3]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> - // CHECK: %[[v2:.*]] = "tf.Reshape"(%[[v1]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> - // CHECK: %[[v3:.*]] = "tf.Slice"(%[[v0]], %[[cst_4]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> - // CHECK: %[[v4:.*]] = "tf.Reshape"(%[[v3]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> - // CHECK: %[[v5:.*]] = "tf.Slice"(%[[v0]], %[[cst_5]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> - // CHECK: %[[v6:.*]] = "tf.Reshape"(%[[v5]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> + // CHECK: %[[v0:.*]] = "tf.Slice"(%arg0, %[[cst_2]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> + // CHECK: %[[v1:.*]] = "tf.Reshape"(%[[v0]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> + // CHECK: %[[v2:.*]] = "tf.Slice"(%arg0, %[[cst_3]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> + // CHECK: %[[v3:.*]] = "tf.Reshape"(%[[v2]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> + // CHECK: %[[v4:.*]] = "tf.Slice"(%arg0, %[[cst_4]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> + // CHECK: %[[v5:.*]] = "tf.Reshape"(%[[v4]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> - // CHECK: %[[v7:.*]] = "tf.Reshape"(%arg1, %[[cst_2]]) : (tensor<3x5x6xf32>, tensor<3xi64>) -> tensor<3x5x6xf32> - // CHECK: %[[v8:.*]] = "tf.Slice"(%[[v7]], %[[cst_3]], %[[cst_6]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> - // CHECK: %[[v9:.*]] = "tf.Reshape"(%[[v8]], %[[cst_7]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> - // CHECK: %[[v10:.*]] = "tf.Slice"(%[[v7]], %[[cst_4]], %[[cst_6]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> - // CHECK: %[[v11:.*]] = "tf.Reshape"(%[[v10]], %[[cst_7]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> - // CHECK: %[[v12:.*]] = "tf.Slice"(%[[v7]], %[[cst_5]], %[[cst_6]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> - // CHECK: %[[v13:.*]] = "tf.Reshape"(%[[v12]], %[[cst_7]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> + // CHECK: %[[v6:.*]] = "tf.Slice"(%arg1, %[[cst_2]], %[[cst_5]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> + // CHECK: %[[v7:.*]] = "tf.Reshape"(%[[v6]], %[[cst_6]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> + // CHECK: %[[v8:.*]] = "tf.Slice"(%arg1, %[[cst_3]], %[[cst_5]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> + // CHECK: %[[v9:.*]] = "tf.Reshape"(%[[v8]], %[[cst_6]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> + // CHECK: %[[v10:.*]] = "tf.Slice"(%arg1, %[[cst_4]], %[[cst_5]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> + // CHECK: %[[v11:.*]] = "tf.Reshape"(%[[v10]], %[[cst_6]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> - // CHECK: %[[v14:.*]] = "tf.MatMul"(%[[v2]], %[[v9]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> - // CHECK: %[[v15:.*]] = "tf.MatMul"(%[[v4]], %[[v11]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> - // CHECK: %[[v16:.*]] = "tf.MatMul"(%[[v6]], %[[v13]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> + // CHECK: %[[mm0:.*]] = "tf.MatMul"(%[[v1]], %[[v7]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> + // CHECK: %[[mm1:.*]] = "tf.MatMul"(%[[v3]], %[[v9]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> + // CHECK: %[[mm2:.*]] = "tf.MatMul"(%[[v5]], %[[v11]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> - // CHECK: %[[v17:.*]] = "tf.Pack"(%[[v14]], %[[v15]], %[[v16]]) {axis = 0 : i64} : (tensor<4x6xf32>, tensor<4x6xf32>, tensor<4x6xf32>) -> tensor<3x4x6xf32> - // CHECK: %[[v18:.*]] = "tf.Reshape"(%[[v17]], %[[cst_8]]) : (tensor<3x4x6xf32>, tensor<3xi64>) -> tensor<3x4x6xf32> + // CHECK: %[[v17:.*]] = "tf.Pack"(%[[mm0]], %[[mm1]], %[[mm2]]) {axis = 0 : i64} : (tensor<4x6xf32>, tensor<4x6xf32>, tensor<4x6xf32>) -> tensor<3x4x6xf32> - // CHECK: return %[[v18]] : tensor<3x4x6xf32> + // CHECK: return %[[v17]] : tensor<3x4x6xf32> } // ----- @@ -184,41 +178,35 @@ func @batchMatMulFlatInput(%arg0: tensor<3x4x5xf32>, %arg1: tensor<3x5x6xf32>) - return %0 : tensor<3x4x6xf32> // CHECK-LABEL: batchMatMulFlatInput - // CHECK: %[[cst:.*]] = "tf.Const"() {value = dense<[3, 4, 5]> : tensor<3xi64>} // CHECK: %[[cst_0:.*]] = "tf.Const"() {value = dense<[1, 4, 5]> : tensor<3xi64>} // CHECK: %[[cst_1:.*]] = "tf.Const"() {value = dense<[4, 5]> : tensor<2xi64>} - // CHECK: %[[cst_2:.*]] = "tf.Const"() {value = dense<[3, 5, 6]> : tensor<3xi64>} - // CHECK: %[[cst_3:.*]] = "tf.Const"() {value = dense<0> : tensor<3xi64>} - // CHECK: %[[cst_4:.*]] = "tf.Const"() {value = dense<[1, 0, 0]> : tensor<3xi64>} - // CHECK: %[[cst_5:.*]] = "tf.Const"() {value = dense<[2, 0, 0]> : tensor<3xi64>} - // CHECK: %[[cst_6:.*]] = "tf.Const"() {value = dense<[1, 5, 6]> : tensor<3xi64>} - // CHECK: %[[cst_7:.*]] = "tf.Const"() {value = dense<[5, 6]> : tensor<2xi64>} - // CHECK: %[[cst_8:.*]] = "tf.Const"() {value = dense<[3, 4, 6]> : tensor<3xi64>} + // CHECK: %[[cst_2:.*]] = "tf.Const"() {value = dense<0> : tensor<3xi64>} + // CHECK: %[[cst_3:.*]] = "tf.Const"() {value = dense<[1, 0, 0]> : tensor<3xi64>} + // CHECK: %[[cst_4:.*]] = "tf.Const"() {value = dense<[2, 0, 0]> : tensor<3xi64>} + // CHECK: %[[cst_5:.*]] = "tf.Const"() {value = dense<[1, 5, 6]> : tensor<3xi64>} + // CHECK: %[[cst_6:.*]] = "tf.Const"() {value = dense<[5, 6]> : tensor<2xi64>} - // CHECK: %[[v0:.*]] = "tf.Reshape"(%arg0, %[[cst]]) : (tensor<3x4x5xf32>, tensor<3xi64>) -> tensor<3x4x5xf32> - // CHECK: %[[v1:.*]] = "tf.Slice"(%[[v0]], %[[cst_3]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> - // CHECK: %[[v2:.*]] = "tf.Reshape"(%[[v1]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> - // CHECK: %[[v3:.*]] = "tf.Slice"(%[[v0]], %[[cst_4]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> - // CHECK: %[[v4:.*]] = "tf.Reshape"(%[[v3]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> - // CHECK: %[[v5:.*]] = "tf.Slice"(%[[v0]], %[[cst_5]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> - // CHECK: %[[v6:.*]] = "tf.Reshape"(%[[v5]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> + // CHECK: %[[v0:.*]] = "tf.Slice"(%arg0, %[[cst_2]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> + // CHECK: %[[v1:.*]] = "tf.Reshape"(%[[v0]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> + // CHECK: %[[v2:.*]] = "tf.Slice"(%arg0, %[[cst_3]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> + // CHECK: %[[v3:.*]] = "tf.Reshape"(%[[v2]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> + // CHECK: %[[v4:.*]] = "tf.Slice"(%arg0, %[[cst_4]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> + // CHECK: %[[v5:.*]] = "tf.Reshape"(%[[v4]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> - // CHECK: %[[v7:.*]] = "tf.Reshape"(%arg1, %[[cst_2]]) : (tensor<3x5x6xf32>, tensor<3xi64>) -> tensor<3x5x6xf32> - // CHECK: %[[v8:.*]] = "tf.Slice"(%[[v7]], %[[cst_3]], %[[cst_6]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> - // CHECK: %[[v9:.*]] = "tf.Reshape"(%[[v8]], %[[cst_7]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> - // CHECK: %[[v10:.*]] = "tf.Slice"(%[[v7]], %[[cst_4]], %[[cst_6]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> - // CHECK: %[[v11:.*]] = "tf.Reshape"(%[[v10]], %[[cst_7]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> - // CHECK: %[[v12:.*]] = "tf.Slice"(%[[v7]], %[[cst_5]], %[[cst_6]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> - // CHECK: %[[v13:.*]] = "tf.Reshape"(%[[v12]], %[[cst_7]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> + // CHECK: %[[v6:.*]] = "tf.Slice"(%arg1, %[[cst_2]], %[[cst_5]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> + // CHECK: %[[v7:.*]] = "tf.Reshape"(%[[v6]], %[[cst_6]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> + // CHECK: %[[v8:.*]] = "tf.Slice"(%arg1, %[[cst_3]], %[[cst_5]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> + // CHECK: %[[v9:.*]] = "tf.Reshape"(%[[v8]], %[[cst_6]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> + // CHECK: %[[v10:.*]] = "tf.Slice"(%arg1, %[[cst_4]], %[[cst_5]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> + // CHECK: %[[v11:.*]] = "tf.Reshape"(%[[v10]], %[[cst_6]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> - // CHECK: %[[v14:.*]] = "tf.MatMul"(%[[v2]], %[[v9]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> - // CHECK: %[[v15:.*]] = "tf.MatMul"(%[[v4]], %[[v11]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> - // CHECK: %[[v16:.*]] = "tf.MatMul"(%[[v6]], %[[v13]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> + // CHECK: %[[mm0:.*]] = "tf.MatMul"(%[[v1]], %[[v7]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> + // CHECK: %[[mm1:.*]] = "tf.MatMul"(%[[v3]], %[[v9]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> + // CHECK: %[[mm2:.*]] = "tf.MatMul"(%[[v5]], %[[v11]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> - // CHECK: %[[v17:.*]] = "tf.Pack"(%[[v14]], %[[v15]], %[[v16]]) {axis = 0 : i64} : (tensor<4x6xf32>, tensor<4x6xf32>, tensor<4x6xf32>) -> tensor<3x4x6xf32> - // CHECK: %[[v18:.*]] = "tf.Reshape"(%[[v17]], %[[cst_8]]) : (tensor<3x4x6xf32>, tensor<3xi64>) -> tensor<3x4x6xf32> + // CHECK: %[[v17:.*]] = "tf.Pack"(%[[mm0]], %[[mm1]], %[[mm2]]) {axis = 0 : i64} : (tensor<4x6xf32>, tensor<4x6xf32>, tensor<4x6xf32>) -> tensor<3x4x6xf32> - // CHECK: return %[[v18]] : tensor<3x4x6xf32> + // CHECK: return %[[v17]] : tensor<3x4x6xf32> } // ----- From 018526d2ba9c29dd31c8fa6e45b7f61812096db0 Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Mon, 13 Jul 2020 15:46:48 -0400 Subject: [PATCH 0303/2522] Update docstring. --- .../bidirectional_lstm_benchmark_test.py | 48 ++++-------- ...assification_transformer_benchmark_test.py | 78 +++++++------------ 2 files changed, 45 insertions(+), 81 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py index 11a1b2b2330..f18c52cf882 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py @@ -25,6 +25,15 @@ from tensorflow.python.keras.benchmarks import benchmark_util class BidirectionalLSTMBenchmark(tf.test.Benchmark): """Benchmarks for Bidirectional LSTM using `tf.test.Benchmark`.""" + # Required Arguments for measure_performance. + # x: Input data, it could be Numpy or load from tfds. + # y: Target data. If `x` is a dataset, generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Other details can see in `measure_performance()` method of + # benchmark_util. + def __init__(self): super(BidirectionalLSTMBenchmark, self).__init__() self.max_feature = 20000 @@ -35,7 +44,7 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): self.imdb_x, maxlen=self.max_len) def _build_model(self): - """model from https://keras.io/examples/nlp/bidirectional_lstm_imdb/""" + """Model from https://keras.io/examples/nlp/bidirectional_lstm_imdb/.""" inputs = tf.keras.Input(shape=(None,), dtype='int32') x = tf.keras.layers.Embedding(self.max_feature, 128)(inputs) x = tf.keras.layers.Bidirectional( @@ -47,18 +56,9 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): return model def benchmark_bidirect_lstm_imdb_bs_128(self): - """ Required Arguments for measure_performance. - - x: Input data, it could be Numpy or load from tfds. - y: Target data. If `x` is a dataset, generator instance, - `y` should not be specified. - loss: Loss function for model. - optimizer: Optimizer for model. - Other details can see in `measure_performance()` method of - benchmark_util. - """ + """Measure performance with batch_size=128 and run_iters=3.""" batch_size = 128 - run_iters = 1 + run_iters = 3 results = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, @@ -73,16 +73,7 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): iters=run_iters, wall_time=results['wall_time'], extras=results) def benchmark_bidirect_lstm_imdb_bs_256(self): - """ Required Arguments for measure_performance. - - x: Input data, it could be Numpy or load from tfds. - y: Target data. If `x` is a dataset, generator instance, - `y` should not be specified. - loss: Loss function for model. - optimizer: Optimizer for model. - Other details can see in `measure_performance()` method of - benchmark_util. - """ + """Measure performance with batch_size=256 and run_iters=2.""" batch_size = 256 run_iters = 2 results = benchmark_util.measure_performance( @@ -99,18 +90,9 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): iters=run_iters, wall_time=results['wall_time'], extras=results) def benchmark_bidirect_lstm_imdb_bs_512(self): - """ Required Arguments for measure_performance. - - x: Input data, it could be Numpy or load from tfds. - y: Target data. If `x` is a dataset, generator instance, - `y` should not be specified. - loss: Loss function for model. - optimizer: Optimizer for model. - Other details can see in `measure_performance()` method of - benchmark_util. - """ + """Measure performance with batch_size=512 and run_iters=4.""" batch_size = 512 - run_iters = 1 + run_iters = 4 results = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py index f070f2ddd29..c5894379382 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py @@ -26,6 +26,15 @@ from tensorflow.python.keras.benchmarks import benchmark_util class TextWithTransformerBenchmark(tf.test.Benchmark): """Benchmarks for Text classification with Transformer using `tf.test.Benchmark`.""" + # Required Arguments for measure_performance. + # x: Input data, it could be Numpy or load from tfds. + # y: Target data. If `x` is a dataset, generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Other details can see in `measure_performance()` method of + # benchmark_util. + def __init__(self): super(TextWithTransformerBenchmark, self).__init__() self.max_feature = 20000 @@ -36,7 +45,7 @@ class TextWithTransformerBenchmark(tf.test.Benchmark): self.imdb_x, maxlen=self.max_len) def _build_model(self): - """model from https://keras.io/examples/nlp/text_classification_with_transformer/""" + """Model from https://keras.io/examples/nlp/text_classification_with_transformer/.""" embed_dim = 32 num_heads = 2 ff_dim = 32 @@ -60,43 +69,8 @@ class TextWithTransformerBenchmark(tf.test.Benchmark): model = tf.keras.Model(inputs=inputs, outputs=outputs) return model - def benchmark_text_classification_bs_64(self): - """ Required Arguments for measure_performance. - - x: Input data, it could be Numpy or load from tfds. - y: Target data. If `x` is a dataset, generator instance, - `y` should not be specified. - loss: Loss function for model. - optimizer: Optimizer for model. - Other details can see in `measure_performance()` method of - benchmark_util. - """ - batch_size = 64 - run_iters = 2 - results = benchmark_util.measure_performance( - self._build_model, - x=self.imdb_x, - y=self.imdb_y, - batch_size=batch_size, - run_iters=run_iters, - optimizer='adam', - loss='sparse_categorical_crossentropy', - metrics=['accuracy']) - - self.report_benchmark( - iters=run_iters, wall_time=results['wall_time'], extras=results) - def benchmark_text_classification_bs_128(self): - """ Required Arguments for measure_performance. - - x: Input data, it could be Numpy or load from tfds. - y: Target data. If `x` is a dataset, generator instance, - `y` should not be specified. - loss: Loss function for model. - optimizer: Optimizer for model. - Other details can see in `measure_performance()` method of - benchmark_util. - """ + """Measure performance with batch_size=128 and run_iters=3.""" batch_size = 128 run_iters = 3 results = benchmark_util.measure_performance( @@ -112,19 +86,27 @@ class TextWithTransformerBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, wall_time=results['wall_time'], extras=results) - def benchmark_text_classification_bs_256(self): - """ Required Arguments for measure_performance. + def benchmark_text_classification_bs_512(self): + """Measure performance with batch_size=512 and run_iters=4.""" + batch_size = 512 + run_iters = 4 + results = benchmark_util.measure_performance( + self._build_model, + x=self.imdb_x, + y=self.imdb_y, + batch_size=batch_size, + run_iters=run_iters, + optimizer='adam', + loss='sparse_categorical_crossentropy', + metrics=['accuracy']) - x: Input data, it could be Numpy or load from tfds. - y: Target data. If `x` is a dataset, generator instance, - `y` should not be specified. - loss: Loss function for model. - optimizer: Optimizer for model. - Other details can see in `measure_performance()` method of - benchmark_util. - """ + self.report_benchmark( + iters=run_iters, wall_time=results['wall_time'], extras=results) + + def benchmark_text_classification_bs_256(self): + """Measure performance with batch_size=256 and run_iters=3.""" batch_size = 256 - run_iters = 2 + run_iters = 3 results = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, From 090f164810abf241b3e5de0df7a0f5c4f93db7f2 Mon Sep 17 00:00:00 2001 From: amturati Date: Mon, 13 Jul 2020 19:50:50 +0000 Subject: [PATCH 0304/2522] fixed style with clang format --- .../eager/c_api_unified_experimental_test.cc | 248 +++++++++--------- 1 file changed, 121 insertions(+), 127 deletions(-) diff --git a/tensorflow/c/eager/c_api_unified_experimental_test.cc b/tensorflow/c/eager/c_api_unified_experimental_test.cc index 73f35ca5ce5..257c4a75990 100644 --- a/tensorflow/c/eager/c_api_unified_experimental_test.cc +++ b/tensorflow/c/eager/c_api_unified_experimental_test.cc @@ -1,15 +1,12 @@ /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and +See the License for the specific language governing permissions andgit limitations under the License. ==============================================================================*/ @@ -92,7 +89,6 @@ TEST_P(UnifiedCAPI, TestBasicEager) { TF_DeleteExecutionContext(ctx); } - // MatMul Test TEST_P(UnifiedCAPI, TestBasicEagerMatMul) { std::unique_ptr status( @@ -104,23 +100,22 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul) { ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); - /* Want to test simple MatMul example: - - [ [0,0] , * [ [0,0] , = [ [0,0], - [0,0] ] [0,0] ] [0,0] ] - + /* Want to test simple MatMul example: + [[0,0], * [[0,0], = [[0,0], + [0,0]] [0,0]] [0,0]] */ // Build an abstract input tensor. - int64_t dims [] = {2,2}; // Matrices will be 2 x 2 - int num_dims = sizeof(dims)/sizeof(dims[0]); + int64_t dims[] = {2, 2}; // Matrices will be 2 x 2 + int num_dims = sizeof(dims) / sizeof(dims[0]); - float vals [] = {0.0f,0.0f,0.0f,0.0f}; - TFE_Context* eager_ctx = TF_ExecutionContextGetTFEContext(ctx,status.get()); - TFE_TensorHandle* t = TestMatrixTensorHandleWithInput(eager_ctx, vals, dims,num_dims); //, dims[0],dims[1]); - - TF_AbstractTensor* at = - TF_CreateAbstractTensorFromEagerTensor(t, status.get()); // get abstract tensor + float vals[] = {0.0f, 0.0f, 0.0f, 0.0f}; + TFE_Context* eager_ctx = TF_ExecutionContextGetTFEContext(ctx, status.get()); + TFE_TensorHandle* t = + TestMatrixTensorHandleWithInput(eager_ctx, vals, dims, num_dims); + + TF_AbstractTensor* at = TF_CreateAbstractTensorFromEagerTensor( + t, status.get()); // get abstract tensor ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); // Build an abstract operation. @@ -149,14 +144,15 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul) { TF_AbstractTensorGetEagerTensor(result, status.get()); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); TF_Tensor* result_tensor = TFE_TensorHandleResolve(result_t, status.get()); - - // Copy Tensor data into an array. - float result_data[4] = {0}; - memcpy(&result_data[0], TF_TensorData(result_tensor), TF_TensorByteSize(result_tensor)); - int data_len = 4; // length of result_data - for(int i = 0; i < data_len; i++){ - EXPECT_EQ(result_data[i], 0); + // Copy Tensor data into an array. + float result_data[4] = {0}; + memcpy(&result_data[0], TF_TensorData(result_tensor), + TF_TensorByteSize(result_tensor)); + + int data_len = 4; // length of result_data + for (int i = 0; i < data_len; i++) { + EXPECT_EQ(result_data[i], 0); } TF_DeleteTensor(result_tensor); @@ -165,8 +161,7 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul) { TF_DeleteExecutionContext(ctx); } - -// MatMul Test 2 +// MatMul Test 2 TEST_P(UnifiedCAPI, TestBasicEagerMatMul2) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); @@ -177,31 +172,31 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul2) { ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); - /* Want to test simple MatMul example with abstract tensors: - - [ [1,2] , * [ [5,6] , = [ [19,22], - [3,4] ] [7,8] ] [43,50] ] - + /* Want to test simple MatMul example with abstract tensors: + [[1,2], * [[5,6], = [[19,22], + [3,4]] [7,8]] [43,50]] */ // Build 1st Matrix. - int64_t dims [] = {2,2}; // Matrices will be 2 x 2 - int num_dims = sizeof(dims)/sizeof(dims[0]); + int64_t dims[] = {2, 2}; // Matrices will be 2 x 2 + int num_dims = sizeof(dims) / sizeof(dims[0]); - float vals1 [] = {1.0f,2.0f,3.0f,4.0f}; - TFE_Context* eager_ctx = TF_ExecutionContextGetTFEContext(ctx,status.get()); - TFE_TensorHandle* t1 = TestMatrixTensorHandleWithInput(eager_ctx, vals1, dims, num_dims); - - TF_AbstractTensor* at1 = - TF_CreateAbstractTensorFromEagerTensor(t1, status.get()); // get abstract tensor + float vals1[] = {1.0f, 2.0f, 3.0f, 4.0f}; + TFE_Context* eager_ctx = TF_ExecutionContextGetTFEContext(ctx, status.get()); + TFE_TensorHandle* t1 = + TestMatrixTensorHandleWithInput(eager_ctx, vals1, dims, num_dims); + + TF_AbstractTensor* at1 = TF_CreateAbstractTensorFromEagerTensor( + t1, status.get()); // get abstract tensor ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); // Build 2nd Matrix. - float vals2 [] = {5.0f,6.0f,7.0f,8.0f}; - TFE_TensorHandle* t2 = TestMatrixTensorHandleWithInput(eager_ctx, vals2, dims, num_dims); - - TF_AbstractTensor* at2 = - TF_CreateAbstractTensorFromEagerTensor(t2, status.get()); // get abstract tensor + float vals2[] = {5.0f, 6.0f, 7.0f, 8.0f}; + TFE_TensorHandle* t2 = + TestMatrixTensorHandleWithInput(eager_ctx, vals2, dims, num_dims); + + TF_AbstractTensor* at2 = TF_CreateAbstractTensorFromEagerTensor( + t2, status.get()); // get abstract tensor ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); // Build an abstract operation. @@ -230,18 +225,19 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul2) { TFE_TensorHandle* result_t = TF_AbstractTensorGetEagerTensor(result, status.get()); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); - + TF_Tensor* result_tensor = TFE_TensorHandleResolve(result_t, status.get()); // Copy Tensor data into array. float result_data[4] = {0}; - memcpy(&result_data[0], TF_TensorData(result_tensor), TF_TensorByteSize(result_tensor)); + memcpy(&result_data[0], TF_TensorData(result_tensor), + TF_TensorByteSize(result_tensor)); // Build expected result & verify. - float e_vals [] = {19.0f,22.0f,43.0f,50.0f}; + float e_vals[] = {19.0f, 22.0f, 43.0f, 50.0f}; - int data_len = 4; // length of e_vals - for(int i = 0; i < data_len; i++){ + int data_len = 4; // length of e_vals + for (int i = 0; i < data_len; i++) { EXPECT_EQ(result_data[i], e_vals[i]); } @@ -262,31 +258,31 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatAdd) { ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); - /* Want to test simple MatAdd example with abstract tensors: - - [ [1,2] , + [ [5,6] , = [ [6,8], - [3,4] ] [7,8] ] [10,12] ] - + /* Want to test simple MatAdd example with abstract tensors: + [[1,2] , + [[5,6], = [[6,8], + [3,4] ] [7,8] ] [10,12]] */ // Build 1st Matrix. - int64_t dims [] = {2,2}; // Matrices will be 2 x 2 - int num_dims = sizeof(dims)/sizeof(dims[0]); + int64_t dims[] = {2, 2}; // Matrices will be 2 x 2 + int num_dims = sizeof(dims) / sizeof(dims[0]); - float vals1 [] = {1.0f,2.0f,3.0f,4.0f}; - TFE_Context* eager_ctx = TF_ExecutionContextGetTFEContext(ctx,status.get()); - TFE_TensorHandle* t1 = TestMatrixTensorHandleWithInput(eager_ctx, vals1, dims, num_dims); - - TF_AbstractTensor* at1 = - TF_CreateAbstractTensorFromEagerTensor(t1, status.get()); // get abstract tensor + float vals1[] = {1.0f, 2.0f, 3.0f, 4.0f}; + TFE_Context* eager_ctx = TF_ExecutionContextGetTFEContext(ctx, status.get()); + TFE_TensorHandle* t1 = + TestMatrixTensorHandleWithInput(eager_ctx, vals1, dims, num_dims); + + TF_AbstractTensor* at1 = TF_CreateAbstractTensorFromEagerTensor( + t1, status.get()); // get abstract tensor ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); // Build 2nd Matrix. - float vals2 [] = {5.0f,6.0f,7.0f,8.0f}; - TFE_TensorHandle* t2 = TestMatrixTensorHandleWithInput(eager_ctx, vals2, dims, num_dims); - - TF_AbstractTensor* at2 = - TF_CreateAbstractTensorFromEagerTensor(t2, status.get()); // get abstract tensor + float vals2[] = {5.0f, 6.0f, 7.0f, 8.0f}; + TFE_TensorHandle* t2 = + TestMatrixTensorHandleWithInput(eager_ctx, vals2, dims, num_dims); + + TF_AbstractTensor* at2 = TF_CreateAbstractTensorFromEagerTensor( + t2, status.get()); // get abstract tensor ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); // Build an abstract operation. @@ -315,18 +311,19 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatAdd) { TFE_TensorHandle* result_t = TF_AbstractTensorGetEagerTensor(result, status.get()); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); - + TF_Tensor* result_tensor = TFE_TensorHandleResolve(result_t, status.get()); // Copy Tensor data into array. float result_data[4] = {0}; - memcpy(&result_data[0], TF_TensorData(result_tensor), TF_TensorByteSize(result_tensor)); + memcpy(&result_data[0], TF_TensorData(result_tensor), + TF_TensorByteSize(result_tensor)); // Build expected result & verify. - float e_vals [] = {6.0f,8.0f,10.0f,12.0f}; + float e_vals[] = {6.0f, 8.0f, 10.0f, 12.0f}; - int data_len = 4; // length of e_vals - for(int i = 0; i < data_len; i++){ + int data_len = 4; // length of e_vals + for (int i = 0; i < data_len; i++) { EXPECT_EQ(result_data[i], e_vals[i]); } @@ -339,7 +336,7 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatAdd) { TEST_P(UnifiedCAPI, TestBasicGraph) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); - + // Start a new function / execution context. string fn_name = "double"; TF_ExecutionContext* graph_ctx = @@ -387,7 +384,7 @@ TEST_P(UnifiedCAPI, TestBasicGraph) { TF_ExecutionContextRegisterFunction(eager_execution_ctx, func, status.get()); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); - + // Build the abstract op to run the function. TF_AbstractOp* fn_op = TF_NewAbstractOp(eager_execution_ctx); TF_AbstractOpSetOpType(fn_op, fn_name.c_str(), status.get()); @@ -426,29 +423,28 @@ TEST_P(UnifiedCAPI, TestBasicGraph) { TF_DeleteExecutionContext(eager_execution_ctx); } - // Graph Tracing for MatMul TEST_P(UnifiedCAPI, TestBasicGraphMatMul) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); - + // Start a new function / execution context. string fn_name = "matrix_multiply"; TF_ExecutionContext* graph_ctx = TF_CreateFunction(fn_name.c_str(), status.get()); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); - + auto* placeholder_t = TF_AddFunctionParameter(graph_ctx, TF_FLOAT, status.get()); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); - + // Build an abstract operation. auto* matmul_op = TF_NewAbstractOp(graph_ctx); TF_AbstractOpSetOpType(matmul_op, "MatMul", status.get()); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); TF_AbstractOpSetOpName(matmul_op, "my_matmul", status.get()); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); - + // Build inputs and outputs. TF_AbstractTensor* inputs[2] = {placeholder_t, placeholder_t}; TF_OutputList* mm_outputs = TF_NewOutputList(); @@ -458,7 +454,7 @@ TEST_P(UnifiedCAPI, TestBasicGraphMatMul) { // Execute. TF_ExecuteOperation(matmul_op, 2, inputs, mm_outputs, status.get()); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); - + // Clean up operation and inputs. TF_DeleteAbstractOp(matmul_op); @@ -467,13 +463,11 @@ TEST_P(UnifiedCAPI, TestBasicGraphMatMul) { ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); /* Now that the graph is built, test graph implementation on matmul example: - - [ [1,1] , * [ [1,1] , = [ [2,2], - [1,1] ] [1,1] ] [2,2] ] - + [[1,1] , * [[1,1] , = [[2,2], + [1,1]] [1,1]] [2,2]] */ - - // Build eager context. + + // Build eager context. TFE_ContextOptions* opts = TFE_NewContextOptions(); TF_ExecutionContext* eager_execution_ctx = TF_NewEagerExecutionContext(opts, status.get()); @@ -482,7 +476,7 @@ TEST_P(UnifiedCAPI, TestBasicGraphMatMul) { TF_ExecutionContextRegisterFunction(eager_execution_ctx, func, status.get()); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); - + // Build the abstract op to run the function. TF_AbstractOp* fn_op = TF_NewAbstractOp(eager_execution_ctx); TF_AbstractOpSetOpType(fn_op, fn_name.c_str(), status.get()); @@ -490,21 +484,21 @@ TEST_P(UnifiedCAPI, TestBasicGraphMatMul) { // Build an abstract input tensor. TFE_Context* eager_ctx = - TF_ExecutionContextGetTFEContext(eager_execution_ctx,status.get()); + TF_ExecutionContextGetTFEContext(eager_execution_ctx, status.get()); - float vals [] = {1.0f,1.0f,1.0f,1.0f}; - int64_t dims [] = {2,2}; // Matrices will be 2 x 2 - int num_dims = sizeof(dims)/sizeof(dims[0]); + float vals[] = {1.0f, 1.0f, 1.0f, 1.0f}; + int64_t dims[] = {2, 2}; // Matrices will be 2 x 2 + int num_dims = sizeof(dims) / sizeof(dims[0]); - TFE_TensorHandle* input_eager = TestMatrixTensorHandleWithInput(eager_ctx, vals, dims, num_dims); + TFE_TensorHandle* input_eager = + TestMatrixTensorHandleWithInput(eager_ctx, vals, dims, num_dims); TF_AbstractTensor* input_t = TF_CreateAbstractTensorFromEagerTensor(input_eager, status.get()); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); TF_OutputListSetNumOutputs(mm_outputs, 1, status.get()); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); - TF_ExecuteOperation(fn_op, 1, &input_t, mm_outputs, - status.get()); + TF_ExecuteOperation(fn_op, 1, &input_t, mm_outputs, status.get()); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); ASSERT_EQ(1, TF_OutputListNumOutputs(mm_outputs)); @@ -515,15 +509,14 @@ TEST_P(UnifiedCAPI, TestBasicGraphMatMul) { TF_Tensor* f_t = TFE_TensorHandleResolve(final, status.get()); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); - float result_data [4] = {0}; + float result_data[4] = {0}; memcpy(&result_data[0], TF_TensorData(f_t), TF_TensorByteSize(f_t)); int data_len = 4; - for(int i = 0; i < data_len; i++){ + for (int i = 0; i < data_len; i++) { ASSERT_EQ(result_data[i], 2.0f); } - TF_DeleteAbstractTensor(final_result); TF_DeleteOutputList(mm_outputs); TF_DeleteAbstractTensor(placeholder_t); @@ -533,7 +526,6 @@ TEST_P(UnifiedCAPI, TestBasicGraphMatMul) { TF_DeleteAbstractFunction(func); TF_DeleteExecutionContext(eager_execution_ctx); - } TEST_P(UnifiedCAPI, TestMultiOutputGraph) { @@ -782,8 +774,8 @@ TEST_P(UnifiedCAPI, TestMultiOutputGraphMatMul) { // Finalize the function by providing the returned values. TF_AbstractFunction* func; { - // We want to return the output of both add operations and MatMul operation, create a new list - // and populate it. + // We want to return the output of both add operations and MatMul operation, + // create a new list and populate it. TF_OutputList* func_outputs = TF_NewOutputList(); TF_OutputListPushBack(func_outputs, add_output1, s); ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); @@ -807,16 +799,16 @@ TEST_P(UnifiedCAPI, TestMultiOutputGraphMatMul) { * * Now we will execute this function with an eager context: * - * A = [[0, 1], [1, 0]] - * B = [[1, 0], [0, 1]] + * A =[[0, 1],[1, 0]] + * B =[[1, 0],[0, 1]] * * output1, output2, output3 = two_adds_and_mm(A, B) * - * We expect outputs: - * - * output1 = [[1, 1], [1, 1]] - * output2 = [[2, 0], [0, 2]] - * output3 = [[2, 2], [2, 2]] + * We expect outputs: + * + * output1 = [[1, 1],[1, 1]] + * output2 = [[2, 0],[0, 2]] + * output3 = [[2, 2],[2, 2]] * */ @@ -839,20 +831,22 @@ TEST_P(UnifiedCAPI, TestMultiOutputGraphMatMul) { std::vector func_args; { TFE_Context* eager_ctx = - TF_ExecutionContextGetTFEContext(eager_execution_ctx,s); + TF_ExecutionContextGetTFEContext(eager_execution_ctx, s); // 1st Arg - float vals1 [] = {0.0f,1.0f,1.0f,0.0f}; - int64_t dims [] = {2,2}; // Matrices will be 2 x 2 - int num_dims = sizeof(dims)/sizeof(dims[0]); + float vals1[] = {0.0f, 1.0f, 1.0f, 0.0f}; + int64_t dims[] = {2, 2}; // Matrices will be 2 x 2 + int num_dims = sizeof(dims) / sizeof(dims[0]); - TFE_TensorHandle* input_eager = TestMatrixTensorHandleWithInput(eager_ctx, vals1, dims, num_dims); + TFE_TensorHandle* input_eager = + TestMatrixTensorHandleWithInput(eager_ctx, vals1, dims, num_dims); func_args.push_back(TF_CreateAbstractTensorFromEagerTensor(input_eager, s)); ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); // 2nd Arg - float vals2 [] = {1.0f,0.0f,0.0f,1.0f}; - input_eager = TestMatrixTensorHandleWithInput(eager_ctx, vals2, dims, num_dims); + float vals2[] = {1.0f, 0.0f, 0.0f, 1.0f}; + input_eager = + TestMatrixTensorHandleWithInput(eager_ctx, vals2, dims, num_dims); func_args.push_back(TF_CreateAbstractTensorFromEagerTensor(input_eager, s)); ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); } @@ -860,16 +854,17 @@ TEST_P(UnifiedCAPI, TestMultiOutputGraphMatMul) { TF_OutputList* func_outputs = TF_NewOutputList(); TF_OutputListSetNumOutputs(func_outputs, 3, s); ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); - TF_ExecuteOperation(fn_op, func_args.size(), func_args.data(), func_outputs, s); + TF_ExecuteOperation(fn_op, func_args.size(), func_args.data(), func_outputs, + s); ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); TF_DeleteAbstractOp(fn_op); for (TF_AbstractTensor* t : func_args) TF_DeleteAbstractTensor(t); ASSERT_EQ(3, TF_OutputListNumOutputs(func_outputs)); - - float expected_outputs [3][4] = {{1.0f,1.0f,1.0f,1.0f}, - {2.0f,0.0f,0.0f,2.0f}, - {2.0f,2.0f,2.0f,2.0f}}; + + float expected_outputs[3][4] = {{1.0f, 1.0f, 1.0f, 1.0f}, + {2.0f, 0.0f, 0.0f, 2.0f}, + {2.0f, 2.0f, 2.0f, 2.0f}}; float result_data[4]; for (int idx = 0; idx < 3; ++idx) { @@ -880,12 +875,12 @@ TEST_P(UnifiedCAPI, TestMultiOutputGraphMatMul) { ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); memcpy(&result_data[0], TF_TensorData(f_t), TF_TensorByteSize(f_t)); - - // Verify results for each output - for(int j = 0; j < 4; j++){ + + // Verify results for each output + for (int j = 0; j < 4; j++) { ASSERT_EQ(result_data[j], expected_outputs[idx][j]); } - + TF_DeleteTensor(f_t); } @@ -894,14 +889,12 @@ TEST_P(UnifiedCAPI, TestMultiOutputGraphMatMul) { TF_AbstractTensor* result = TF_OutputListGet(func_outputs, idx); TF_DeleteAbstractTensor(result); } - + TF_DeleteOutputList(func_outputs); TF_DeleteExecutionContext(eager_execution_ctx); - //TF_DeleteExecutionContext(graph_ctx); TF_DeleteAbstractFunction(func); } - TEST_P(UnifiedCAPI, TF_ExecutionContextToFunctionWithEagerContextRaises) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); @@ -1000,3 +993,4 @@ INSTANTIATE_TEST_SUITE_P(Tracing, UnifiedCAPI, } // namespace } // namespace tensorflow + From 5d928f34aba285fe14990a38ea8882a73af7de5f Mon Sep 17 00:00:00 2001 From: amturati <36869454+amturati@users.noreply.github.com> Date: Mon, 13 Jul 2020 14:07:26 -0600 Subject: [PATCH 0305/2522] fixed nit with a space --- tensorflow/c/eager/c_api_unified_experimental_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/c/eager/c_api_unified_experimental_test.cc b/tensorflow/c/eager/c_api_unified_experimental_test.cc index 96f045f720c..9d3f7ed0327 100644 --- a/tensorflow/c/eager/c_api_unified_experimental_test.cc +++ b/tensorflow/c/eager/c_api_unified_experimental_test.cc @@ -6,7 +6,7 @@ You may obtain a copy of the License at Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions andgit +See the License for the specific language governing permissions and git limitations under the License. ==============================================================================*/ From f4695663992ef6cc704280551862d7d2aa0ac3da Mon Sep 17 00:00:00 2001 From: amturati Date: Mon, 13 Jul 2020 20:16:33 +0000 Subject: [PATCH 0306/2522] fixed license spacing issue --- tensorflow/c/eager/c_api_unified_experimental_test.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/c/eager/c_api_unified_experimental_test.cc b/tensorflow/c/eager/c_api_unified_experimental_test.cc index 96f045f720c..913f6388b56 100644 --- a/tensorflow/c/eager/c_api_unified_experimental_test.cc +++ b/tensorflow/c/eager/c_api_unified_experimental_test.cc @@ -1,12 +1,15 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions andgit +See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ From f24daf4d8710a1398369e2a789582173341e6f0e Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Mon, 13 Jul 2020 13:12:07 -0700 Subject: [PATCH 0307/2522] Skeleton implementation for type inference. PiperOrigin-RevId: 321018594 Change-Id: Ib202f114d5465c25f880aba3eecf4729969819a3 --- tensorflow/python/autograph/pyct/anno.py | 1 + .../autograph/pyct/static_analysis/BUILD | 14 + .../pyct/static_analysis/type_inference.py | 304 ++++++++++++++++++ .../static_analysis/type_inference_test.py | 135 ++++++++ 4 files changed, 454 insertions(+) create mode 100644 tensorflow/python/autograph/pyct/static_analysis/type_inference.py create mode 100644 tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py diff --git a/tensorflow/python/autograph/pyct/anno.py b/tensorflow/python/autograph/pyct/anno.py index a5f3f5b33a4..bad937c8305 100644 --- a/tensorflow/python/autograph/pyct/anno.py +++ b/tensorflow/python/autograph/pyct/anno.py @@ -100,6 +100,7 @@ class Static(NoValue): 'Symbols defined when entering the node. See reaching_definitions.py.') LIVE_VARS_OUT = ('Symbols live when exiting the node. See liveness.py.') LIVE_VARS_IN = ('Symbols live when entering the node. See liveness.py.') + TYPES = 'Static type information. See type_inference.py.' FAIL = object() diff --git a/tensorflow/python/autograph/pyct/static_analysis/BUILD b/tensorflow/python/autograph/pyct/static_analysis/BUILD index 0764a3e64b4..1eaf3b3c177 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/BUILD +++ b/tensorflow/python/autograph/pyct/static_analysis/BUILD @@ -24,6 +24,7 @@ py_library( "liveness.py", "reaching_definitions.py", "reaching_fndefs.py", + "type_inference.py", ], srcs_version = "PY2AND3", visibility = ["//visibility:public"], @@ -167,3 +168,16 @@ py_test( "//tensorflow/python/autograph/pyct", ], ) + +py_test( + name = "type_inference_test", + srcs = ["type_inference_test.py"], + python_version = "PY3", + srcs_version = "PY3", + deps = [ + ":static_analysis", + "//tensorflow/python:client_testlib", + "//tensorflow/python/autograph/pyct", + "@gast_archive//:gast", + ], +) diff --git a/tensorflow/python/autograph/pyct/static_analysis/type_inference.py b/tensorflow/python/autograph/pyct/static_analysis/type_inference.py new file mode 100644 index 00000000000..f684d041c90 --- /dev/null +++ b/tensorflow/python/autograph/pyct/static_analysis/type_inference.py @@ -0,0 +1,304 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Type inference. + +This analysis annotates all symbols nodes of an AST with type information +extracted from static sources: + * type annotations + * global and local symbols visible to the function at analysis time + * literals + +Requires activity analysis. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import gast + +from tensorflow.python.autograph.pyct import anno +from tensorflow.python.autograph.pyct import cfg +from tensorflow.python.autograph.pyct import transformer +from tensorflow.python.autograph.pyct.static_analysis import annos + + +class Resolver(object): + """Resolvers allow customizing the process of identifying types.""" + + def resolve_external_type(self, type_): + raise NotImplementedError('subclasses must implement') + + def resolve_external_value(self, value): + raise NotImplementedError('subclasses must implement') + + def resolve_external_function_call(self, fn): + # TODO(mdan)): This must accept value/types to arguments + raise NotImplementedError('subclasses must implement') + + # TODO(mdan): More resolvers as needed. + + +class _SymbolTable(object): + """Abstraction for the state of the CFG walk for type inference. + + This is a value type. Only implements the strictly necessary operators. + + Attributes: + value: Dict[qual_names.QN, Set[Type]], mapping symbols to the set of + possible types. + """ + + def __init__(self, init_from=None): + if init_from: + assert isinstance(init_from, _SymbolTable) + self.value = { + s: set(other_types) for s, other_types in init_from.value.items() + } + else: + self.value = {} + + def __eq__(self, other): + if frozenset(self.value.keys()) != frozenset(other.value.keys()): + return False + ret = all(self.value[s] == other.value[s] for s in self.value) + return ret + + def __ne__(self, other): + return not self.__eq__(other) + + def __or__(self, other): + assert isinstance(other, _SymbolTable) + result = _SymbolTable(self) + for s, other_types in other.value.items(): + if s not in result.value: + self_types = set() + result.value[s] = self_types + else: + self_types = result.value[s] + self_types.update(other_types) + return result + + def __repr__(self): + return 'SymbolTable {}'.format(self.value) + + +# These special names don't normally show up in globals. +SPECIAL_NAMES = { + 'int': int, + 'float': float, +} + + +class Analyzer(cfg.GraphVisitor): + """CFG visitor that performs type inference at statement level.""" + + def __init__(self, graph, resolver, namespace, scope): + """Creates a new analyzer. + + Args: + graph: cfg.Graph + resolver: Resolver + namespace: Dict[str, Any] + scope: activity.Scope + """ + super(Analyzer, self).__init__(graph) + self.resolver = resolver + self.namespace = namespace + self.scope = scope + + def init_state(self, _): + return _SymbolTable() + + def _static_value(self, qn): + """Looks up a name in the namespace.""" + # TODO(mdan): This needs to be updated to work for composite symbols. + name = str(qn) + value = self.namespace.get(name, None) + if value is None: + return SPECIAL_NAMES.get(name, None) + return value + + def _infer_type(self, node, types_in): + """Infers the return type of an expression.""" + if isinstance(node, gast.Name): + # Normal variables: carry over their existing type. + name = anno.getanno(node, anno.Basic.QN) + types = types_in.value.get(name, None) + if types is not None: + return types + # If type is unknown, attempt to look the symbol up in the namespace. + if name not in self.scope.bound: + # TODO(mdan): Might still be able to do something for bound symbols. + static_value = self._static_value(name) + if static_value is not None: + return {self.resolver.resolve_external_value(static_value)} + return None + + if isinstance(node, gast.Call): + # Function calls: infer their return type. + f_name = anno.getanno(node.func, anno.Basic.QN) + static_value = self._static_value(f_name) + # TODO(mdan): This needs to be updated to work for composite symbols. + if static_value is None: + raise ValueError('cannot infer return type of {}'.format(f_name)) + return {self.resolver.resolve_external_function_call(static_value)} + + else: + raise NotImplementedError(node) + + def _assignment_types(self, node, types_in): + """Propagates types through an assignment operation.""" + targets = node.targets + if len(targets) != 1: + raise NotImplementedError('multiple assignment') + + target, = targets + qn = anno.getanno(target, anno.Basic.QN) + types = self._infer_type(node.value, types_in) + if types is None: + return () + + return (qn, types), + + def _arg_type(self, node): + """Looks up the type of an argument based on its annotation.""" + assert isinstance(node, gast.Name) + name = anno.getanno(node, anno.Basic.QN) + type_name = anno.getanno(node.annotation, anno.Basic.QN, None) + if type_name is None: + return () + + static_value = self._static_value(type_name) + if static_value is None: + raise ValueError('cannot resolve type {}'.format(type_name)) + + type_ = self.resolver.resolve_external_type(static_value) + return (name, {type_}), + + def _args_types(self, node): + """Propagates types through argument annotations.""" + types = {} + + for n in node.posonlyargs: + types.update(self._arg_type(n)) + for n in node.args: + types.update(self._arg_type(n)) + for n in node.kwonlyargs: + types.update(self._arg_type(n)) + + if node.vararg: + raise NotImplementedError('vararg') + if node.kwarg: + raise NotImplementedError('kwarg') + + # TODO(mdan): Use kw_defaults, defaults if available. + + return types + + def visit_node(self, node): + prev_types_out = self.out[node] + + types_in = _SymbolTable() + for n in node.prev: + types_in |= self.out[n] + + types_out = _SymbolTable(types_in) + ast_node = node.ast_node + if isinstance(ast_node, gast.Assign): + types_out.value.update(self._assignment_types(ast_node, types_in)) + elif isinstance(ast_node, gast.arguments): + types_out.value.update(self._args_types(ast_node)) + + self.in_[node] = types_in + self.out[node] = types_out + + return prev_types_out != types_out + + +class TreeAnnotator(transformer.Base): + """AST visitor that annotates each symbol with its possible types.""" + + def __init__(self, source_info, graphs, resolver): + super(TreeAnnotator, self).__init__(source_info) + self.graphs = graphs + self.resolver = resolver + self.current_analyzer = None + self.current_cfg_node = None + + def visit_FunctionDef(self, node): + parent_analyzer = self.current_analyzer + subgraph = self.graphs[node] + scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE) + + analyzer = Analyzer(subgraph, self.resolver, self.ctx.info.namespace, scope) + analyzer.visit_forward() + + # Recursively process any remaining subfunctions. + self.current_analyzer = analyzer + node.args = self.visit(node.args) + node.body = self.visit_block(node.body) + self.current_analyzer = parent_analyzer + + return node + + def visit_Name(self, node): + if self.current_analyzer is None: + # Names may appear outside function defs - for example in class + # definitions. + return node + + analyzer = self.current_analyzer + cfg_node = self.current_cfg_node + + assert cfg_node is not None, ('name node, %s, outside of any statement?' + % node.id) + + qn = anno.getanno(node, anno.Basic.QN) + if isinstance(node.ctx, gast.Load): + anno.setanno(node, anno.Static.TYPES, + tuple(analyzer.in_[cfg_node].value.get(qn, ()))) + else: + anno.setanno(node, anno.Static.TYPES, + tuple(analyzer.out[cfg_node].value.get(qn, ()))) + + return node + + def visit(self, node): + parent = self.current_cfg_node + + if (self.current_analyzer is not None and + node in self.current_analyzer.graph.index): + self.current_cfg_node = self.current_analyzer.graph.index[node] + node = super(TreeAnnotator, self).visit(node) + + self.current_cfg_node = parent + return node + + +def resolve(node, source_info, graphs, resolver): + """Performs type inference. + + Args: + node: ast.AST + source_info: transformer.SourceInfo + graphs: Dict[ast.FunctionDef, cfg.Graph] + resolver: Resolver + Returns: + ast.AST + """ + visitor = TreeAnnotator(source_info, graphs, resolver) + node = visitor.visit(node) + return node diff --git a/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py b/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py new file mode 100644 index 00000000000..a8e956ef558 --- /dev/null +++ b/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py @@ -0,0 +1,135 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for type_inference module.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.autograph.pyct import anno +from tensorflow.python.autograph.pyct import cfg +from tensorflow.python.autograph.pyct import qual_names +from tensorflow.python.autograph.pyct import transpiler +from tensorflow.python.autograph.pyct.static_analysis import activity +from tensorflow.python.autograph.pyct.static_analysis import type_inference +from tensorflow.python.platform import test + + +class TestResolver(type_inference.Resolver): + + def resolve_external_type(self, t): + return t + + def resolve_external_value(self, value): + return type(value) + + def resolve_external_function_call(self, fn): + return fn.__annotations__['return'] + + +class TestTranspiler(transpiler.GenericTranspiler): + + def get_transformed_name(self, _): + return 'test_item' + + def transform_ast(self, node, ctx): + node = qual_names.resolve(node) + node = activity.resolve(node, ctx) + graphs = cfg.build(node) + node = type_inference.resolve(node, ctx, graphs, TestResolver()) + return node + + +class TypeInferenceAnalyzerTest(test.TestCase): + + def assertTypes(self, node, expected): + if not isinstance(expected, tuple): + expected = expected, + self.assertSetEqual( + set(anno.getanno(node, anno.Static.TYPES)), set(expected)) + + def test_argument(self): + + def test_fn(a: int, b): + return a, b + + node, _ = TestTranspiler().transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[0].value.elts[0], int) + self.assertTypes(fn_body[0].value.elts[1], ()) + + def test_straightline_assignment(self): + + def test_fn(a: int, c): + b = a + return a, b, c + + node, _ = TestTranspiler().transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[0].targets[0], int) + self.assertTypes(fn_body[0].value, int) + self.assertTypes(fn_body[1].value.elts[0], int) + self.assertTypes(fn_body[1].value.elts[1], int) + self.assertTypes(fn_body[1].value.elts[2], ()) + + def test_assignment_overwrite(self): + + def test_fn(a: int, b: float): + c = a + c = b + return c + + node, _ = TestTranspiler().transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[0].targets[0], int) + self.assertTypes(fn_body[0].value, int) + self.assertTypes(fn_body[1].targets[0], float) + self.assertTypes(fn_body[1].value, float) + + def test_external_value(self): + + a = 'foo' + + def test_fn(): + b = a + return b + + node, _ = TestTranspiler().transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[0].targets[0], str) + self.assertTypes(fn_body[1].value, str) + + def test_external_function(self): + + def g() -> float: + return 1.0 + + def test_fn(): + a = g() + return a + + node, _ = TestTranspiler().transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[0].targets[0], float) + self.assertTypes(fn_body[1].value, float) + + +if __name__ == '__main__': + test.main() From fc64140dd5ac020e74a6c8d7fb5e0b086493a577 Mon Sep 17 00:00:00 2001 From: Ayush Dubey Date: Mon, 13 Jul 2020 13:24:35 -0700 Subject: [PATCH 0308/2522] Release mutex_lock before destroying mutex. PiperOrigin-RevId: 321021232 Change-Id: Ide86ab571c36b2c3dd8b1ad32e1d081cf6f62964 --- .../core/distributed_runtime/rpc/rpc_rendezvous_mgr_test.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr_test.cc b/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr_test.cc index 7c5779246bd..412c902a04a 100644 --- a/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr_test.cc +++ b/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr_test.cc @@ -331,8 +331,10 @@ TEST_F(RpcRendezvousMgrTest, RemoteRecvAsyncMany) { [&mu_, &status, &counter](const Status& s, const Rendezvous::Args&, const Rendezvous::Args&, const Tensor&, const bool) { - mutex_lock l(mu_); - status.Update(s); + { + mutex_lock l(mu_); + status.Update(s); + } counter.DecrementCount(); }); } From 4bb39a45e07a4f938fea1b23e64ad1c4c77725d7 Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Mon, 13 Jul 2020 13:26:11 -0700 Subject: [PATCH 0309/2522] Support iterator checkpointing with large caches. Previously there was a 2GB limit on cache size, due to protobuf serialization limits. Now we write each element to a new protobuf, so that we can checkpoint the cache as long as individual elements are less than 2GB. The utilities added to dataset_utils can be used to simplify checkpointing in many ops that store buffers of elements, such as prefetch, parallel interleave, parallel map, and shuffle. I will submit additional CLs to update those datasets. PiperOrigin-RevId: 321021511 Change-Id: Ib8cbb0979e8b1fa9b37b82837fc37b4c85f998c2 --- tensorflow/core/kernels/data/BUILD | 2 + .../core/kernels/data/cache_dataset_ops.cc | 65 ++----------------- tensorflow/core/kernels/data/cache_ops.cc | 5 ++ tensorflow/core/kernels/data/cache_ops.h | 4 ++ tensorflow/core/kernels/data/dataset_utils.cc | 46 +++++++++++++ tensorflow/core/kernels/data/dataset_utils.h | 13 ++++ .../core/kernels/data/dataset_utils_test.cc | 28 ++++++++ .../python/data/kernel_tests/cache_test.py | 21 ++++++ 8 files changed, 125 insertions(+), 59 deletions(-) diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD index 7ac6547e5cf..efce4fb0cf5 100644 --- a/tensorflow/core/kernels/data/BUILD +++ b/tensorflow/core/kernels/data/BUILD @@ -63,6 +63,7 @@ tf_cc_test( name = "dataset_utils_test", srcs = ["dataset_utils_test.cc"], deps = [ + ":dataset_test_base", ":dataset_utils", "//tensorflow/cc:cc_ops", "//tensorflow/core:framework", @@ -1201,6 +1202,7 @@ tf_kernel_library( hdrs = ["cache_dataset_ops.h"], deps = [ ":cache_ops", + ":dataset_utils", ":name_utils", "//tensorflow/core:dataset_ops_op_lib", "//tensorflow/core:framework", diff --git a/tensorflow/core/kernels/data/cache_dataset_ops.cc b/tensorflow/core/kernels/data/cache_dataset_ops.cc index b4d0f9e5ab3..f60001b0055 100644 --- a/tensorflow/core/kernels/data/cache_dataset_ops.cc +++ b/tensorflow/core/kernels/data/cache_dataset_ops.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/kernels/data/cache_ops.h" +#include "tensorflow/core/kernels/data/dataset_utils.h" #include "tensorflow/core/kernels/data/name_utils.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/strings/stringprintf.h" @@ -641,57 +642,6 @@ class CacheDatasetOp::FileDatasetV2 : public CacheDatasetOp::FileDatasetBase { const Tensor resource_handle_; }; -namespace { -template -Status SaveCache(IteratorStateWriter* writer, T* cache, FullNameFn full_name) { - size_t cache_size = cache->size(); - TF_RETURN_IF_ERROR(writer->WriteScalar(full_name(kCacheSize), cache_size)); - for (size_t i = 0; i < cache_size; i++) { - auto& element = cache->at(i); - TF_RETURN_IF_ERROR(writer->WriteScalar( - full_name(strings::StrCat(kCache, "[", i, "]", kSizeSuffix)), - element.size())); - for (size_t j = 0; j < element.size(); ++j) { - TF_RETURN_IF_ERROR(writer->WriteTensor( - full_name(strings::StrCat(kCache, "[", i, "][", j, "]")), - element[j])); - } - } - return Status::OK(); -} - -template -Status RestoreCache(IteratorContext* ctx, IteratorStateReader* reader, T* cache, - FullNameFn full_name) { - size_t cache_size; - { - int64 temp; - TF_RETURN_IF_ERROR(reader->ReadScalar(full_name(kCacheSize), &temp)); - cache_size = static_cast(temp); - } - for (size_t i = 0; i < cache_size; ++i) { - std::vector element; - size_t element_size; - { - int64 temp; - TF_RETURN_IF_ERROR(reader->ReadScalar( - full_name(strings::StrCat(kCache, "[", i, "]", kSizeSuffix)), &temp)); - element_size = static_cast(temp); - } - element.reserve(element_size); - for (size_t j = 0; j < element_size; ++j) { - element.emplace_back(); - TF_RETURN_IF_ERROR(reader->ReadTensor( - full_name(strings::StrCat(kCache, "[", i, "][", j, "]")), - &element.back())); - } - cache->emplace_back(std::move(element)); - } - return Status::OK(); -} - -} // namespace - class CacheDatasetOp::MemoryDatasetBase : public DatasetBase { public: explicit MemoryDatasetBase(OpKernelContext* ctx, const DatasetBase* input, @@ -764,8 +714,8 @@ class CacheDatasetOp::MemoryDatasetBase : public DatasetBase { mutex_lock l(mu_); if (cache_->IsCompleted()) { TF_RETURN_IF_ERROR(writer->WriteScalar(full_name(kCacheCompleted), "")); - TF_RETURN_IF_ERROR(SaveCache( - writer, cache_, [this](const string& s) { return full_name(s); })); + TF_RETURN_IF_ERROR( + WriteElementsToCheckpoint(writer, prefix(), cache_->data())); } return SaveInput(ctx, writer, iterator_); } @@ -778,8 +728,7 @@ class CacheDatasetOp::MemoryDatasetBase : public DatasetBase { if (reader->Contains(full_name(kCacheCompleted))) { std::vector> temp_cache; TF_RETURN_IF_ERROR( - RestoreCache(ctx, reader, &temp_cache, - [this](const string& s) { return full_name(s); })); + ReadElementsFromCheckpoint(reader, prefix(), &temp_cache)); cache_->Complete(std::move(temp_cache)); } TF_RETURN_IF_ERROR(InitializeIterator(ctx)); @@ -846,8 +795,7 @@ class CacheDatasetOp::MemoryDatasetBase : public DatasetBase { mutex_lock l(mu_); if (!cache_->IsCompleted()) { TF_RETURN_IF_ERROR( - SaveCache(writer, &temp_cache_, - [this](const string& s) { return full_name(s); })); + WriteElementsToCheckpoint(writer, prefix(), temp_cache_)); } return SaveInput(ctx, writer, input_impl_); } @@ -857,8 +805,7 @@ class CacheDatasetOp::MemoryDatasetBase : public DatasetBase { mutex_lock l(mu_); if (!reader->Contains(full_name(kCacheCompleted))) { TF_RETURN_IF_ERROR( - RestoreCache(ctx, reader, &temp_cache_, - [this](const string& s) { return full_name(s); })); + ReadElementsFromCheckpoint(reader, prefix(), &temp_cache_)); } return RestoreInput(ctx, reader, input_impl_); } diff --git a/tensorflow/core/kernels/data/cache_ops.cc b/tensorflow/core/kernels/data/cache_ops.cc index 90c2e905c32..f6dce49a7c0 100644 --- a/tensorflow/core/kernels/data/cache_ops.cc +++ b/tensorflow/core/kernels/data/cache_ops.cc @@ -63,6 +63,11 @@ size_t MemoryCache::size() { return cache_.size(); } +const std::vector>& MemoryCache::data() { + tf_shared_lock l(mu_); + return cache_; +} + AnonymousMemoryCacheHandleOp::AnonymousMemoryCacheHandleOp( OpKernelConstruction* ctx) : AnonymousResourceOp(ctx) {} diff --git a/tensorflow/core/kernels/data/cache_ops.h b/tensorflow/core/kernels/data/cache_ops.h index c670d6f0e50..d95d9d22b01 100644 --- a/tensorflow/core/kernels/data/cache_ops.h +++ b/tensorflow/core/kernels/data/cache_ops.h @@ -46,6 +46,10 @@ class MemoryCache { // Returns the size of the cache. size_t size(); + // Returns a reference to the cache's data. The returned reference will be + // invalidated by any call to Reset(). + const std::vector>& data(); + private: mutex mu_; // Determines whether all elements of the dataset have been cached. diff --git a/tensorflow/core/kernels/data/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc index 3161004b7ab..5f0068445a9 100644 --- a/tensorflow/core/kernels/data/dataset_utils.cc +++ b/tensorflow/core/kernels/data/dataset_utils.cc @@ -51,6 +51,9 @@ constexpr std::array kOpsWithSeed = { constexpr char kSeedInputName[] = "seed"; constexpr char kSeed2InputName[] = "seed2"; +constexpr char kComponent[] = "component"; +constexpr char kNumElements[] = "num_elements"; +constexpr char kNumComponents[] = "num_components"; template bool IsNodeOfType(const NodeDef& node, @@ -428,6 +431,49 @@ Status HashGraph(const GraphDef& graph_def, uint64* hash) { return Status::OK(); } +Status WriteElementsToCheckpoint( + IteratorStateWriter* writer, StringPiece key_prefix, + const std::vector>& elements) { + TF_RETURN_IF_ERROR( + writer->WriteScalar(key_prefix, kNumElements, elements.size())); + for (int i = 0; i < elements.size(); ++i) { + const std::vector& element = elements[i]; + std::string element_prefix = absl::StrCat(key_prefix, "::", i); + TF_RETURN_IF_ERROR( + writer->WriteScalar(element_prefix, kNumComponents, element.size())); + for (int j = 0; j < elements[i].size(); ++j) { + TF_RETURN_IF_ERROR(writer->WriteTensor( + element_prefix, absl::StrCat(kComponent, "[", j, "]"), element[j])); + } + } + return Status::OK(); +} + +Status ReadElementsFromCheckpoint(IteratorStateReader* reader, + StringPiece key_prefix, + std::vector>* elements) { + int64 num_elements; + TF_RETURN_IF_ERROR( + reader->ReadScalar(key_prefix, kNumElements, &num_elements)); + elements->reserve(num_elements); + for (int i = 0; i < num_elements; ++i) { + std::string element_prefix = absl::StrCat(key_prefix, "::", i); + int64 num_components; + TF_RETURN_IF_ERROR( + reader->ReadScalar(element_prefix, kNumComponents, &num_components)); + elements->emplace_back(); + std::vector& element = elements->at(i); + element.reserve(num_components); + for (int j = 0; j < num_components; ++j) { + element.emplace_back(); + TF_RETURN_IF_ERROR(reader->ReadTensor( + element_prefix, absl::StrCat(kComponent, "[", j, "]"), + &element.back())); + } + } + return Status::OK(); +} + std::pair MaybeOverrideSeeds(std::pair seeds) { if (seeds.first == 0 && seeds.second == 0) { return {random::New64(), random::New64()}; diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h index 0127fe68641..9a7e274714a 100644 --- a/tensorflow/core/kernels/data/dataset_utils.h +++ b/tensorflow/core/kernels/data/dataset_utils.h @@ -125,6 +125,19 @@ Status HashTensor(const Tensor& tensor, uint64* hash); // the same between TensorFlow builds. Status HashGraph(const GraphDef& graph, uint64* hash); +// Writes dataset elements to the checkpoint writer using the given key prefix. +// The elements can be read back by passing the same key prefix to +// ReadElementsFromCheckpoint. Only one list of elements can be written under +// the same key_prefix. +Status WriteElementsToCheckpoint( + IteratorStateWriter* writer, StringPiece key_prefix, + const std::vector>& elements); + +// Reads dataset elements from the checkpoint reader using the given key prefix. +Status ReadElementsFromCheckpoint(IteratorStateReader* reader, + StringPiece key_prefix, + std::vector>* elements); + // Dataset op level determinism policy. class DeterminismPolicy { public: diff --git a/tensorflow/core/kernels/data/dataset_utils_test.cc b/tensorflow/core/kernels/data/dataset_utils_test.cc index 2abea3ec796..588624a36cc 100644 --- a/tensorflow/core/kernels/data/dataset_utils_test.cc +++ b/tensorflow/core/kernels/data/dataset_utils_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/framework/variant.h" +#include "tensorflow/core/kernels/data/dataset_test_base.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/protobuf/error_codes.pb.h" @@ -149,6 +150,33 @@ TEST(DatasetUtilsTest, VariantTensorDataWriteAfterFlushing) { writer.WriteTensor(full_name("Tensor"), input_tensor).code()); } +TEST(DatasetUtilsTest, CheckpointElementsRoundTrip) { + std::vector> elements; + elements.push_back(CreateTensors(TensorShape({3}), {{1, 2, 3}})); + elements.push_back(CreateTensors(TensorShape({2}), {{4, 5}})); + VariantTensorDataWriter writer; + tstring test_prefix = full_name("test_prefix"); + TF_ASSERT_OK(WriteElementsToCheckpoint(&writer, test_prefix, elements)); + std::vector data; + writer.GetData(&data); + + VariantTensorDataReader reader(data); + std::vector> read_elements; + TF_ASSERT_OK( + ReadElementsFromCheckpoint(&reader, test_prefix, &read_elements)); + ASSERT_EQ(elements.size(), read_elements.size()); + for (int i = 0; i < elements.size(); ++i) { + std::vector& original = elements[i]; + std::vector& read = read_elements[i]; + + ASSERT_EQ(original.size(), read.size()); + for (int j = 0; j < original.size(); ++j) { + EXPECT_EQ(original[j].NumElements(), read[j].NumElements()); + EXPECT_EQ(original[j].flat()(0), read[j].flat()(0)); + } + } +} + TEST(DatasetUtilsTest, AddToFunctionLibrary) { auto make_fn_a = [](const string& fn_name) { return FunctionDefHelper::Create( diff --git a/tensorflow/python/data/kernel_tests/cache_test.py b/tensorflow/python/data/kernel_tests/cache_test.py index a95424b6843..1fa9c551106 100644 --- a/tensorflow/python/data/kernel_tests/cache_test.py +++ b/tensorflow/python/data/kernel_tests/cache_test.py @@ -33,8 +33,11 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test +from tensorflow.python.training import checkpoint_management +from tensorflow.python.training.tracking import util as trackable_utils class FileCacheTest(test_base.DatasetTestBase, parameterized.TestCase): @@ -380,6 +383,24 @@ class MemoryCacheTest(test_base.DatasetTestBase, parameterized.TestCase): for i in range(10): self.assertEqual(next(it), results[i]) + @combinations.generate(test_base.eager_only_combinations()) + def testCheckpointLargeCache(self): + # Tensor of size 100M + dataset = dataset_ops.Dataset.from_tensors( + array_ops.ones((25, 1000, 1000), dtype=dtypes.float32)) + # Repeat 25 times to exceed the 2G proto limit + dataset = dataset.repeat(25) + dataset = dataset.cache() + + # Iterate to fill the cache. + iterator = iter(dataset) + for _ in range(23): + next(iterator) + ckpt = trackable_utils.Checkpoint(iterator=iterator) + manager = checkpoint_management.CheckpointManager( + ckpt, self.get_temp_dir(), max_to_keep=1) + manager.save() + if __name__ == "__main__": test.main() From 43e7dce1901128c79fd20dd9b52972a8926f125b Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 13 Jul 2020 13:48:23 -0700 Subject: [PATCH 0310/2522] Fix ROCM build by removing optional keyword from protobuf definition PiperOrigin-RevId: 321025792 Change-Id: I676c7672b9de7187bac9763cf8d742321bf8804f --- tensorflow/core/tpu/kernels/tpu_compile.proto | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/tpu/kernels/tpu_compile.proto b/tensorflow/core/tpu/kernels/tpu_compile.proto index 03c0a402337..bdf754493ce 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile.proto +++ b/tensorflow/core/tpu/kernels/tpu_compile.proto @@ -171,15 +171,15 @@ message TpuCompilationRequestProto { repeated TensorProto guaranteed_constants = 7; // MLIR module definition. - optional string mlir_module = 8; + string mlir_module = 8; // A set of named functions used as the input to lowering to Hlo when mlir is // not used. - optional FunctionDefLibrary fdef_lib = 9; + FunctionDefLibrary fdef_lib = 9; // The version of the graph definition used to lower TF function to Hlo. - optional int32 graph_def_version = 10; + int32 graph_def_version = 10; // Function containing the computation to compile. - optional NameAttrList function = 11; + NameAttrList function = 11; } From 86f7bc3b89d2f48054fa9eaf14bd53805b9ac584 Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Mon, 13 Jul 2020 13:54:11 -0700 Subject: [PATCH 0311/2522] Support checkpointing large shuffle buffers PiperOrigin-RevId: 321026957 Change-Id: I4dbd29d3a44c73e2bec288d45cb6bda33fb6e63b --- .../core/kernels/data/shuffle_dataset_op.cc | 48 +++++-------------- .../python/data/kernel_tests/shuffle_test.py | 4 +- 2 files changed, 14 insertions(+), 38 deletions(-) diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc index 7ac27ead6c2..7b696371049 100644 --- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc +++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc @@ -143,7 +143,7 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase { seed_generator_(seed_generator), parent_generator_(seed_generator->seed(), seed_generator->seed2()), generator_(&parent_generator_) { - buffer_ = absl::make_unique[]>( + buffer_ = absl::make_unique>>( params.dataset->buffer_size_); slices_.push_back(absl::make_unique(0, 0)); } @@ -201,7 +201,7 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase { << this->dataset()->buffer_size_; } this->RecordBufferEnqueue(ctx, input_element); - buffer_[slices_.back()->end % this->dataset()->buffer_size_] = + buffer_->at(slices_.back()->end % this->dataset()->buffer_size_) = std::move(input_element); num_elements_++; slices_.back()->end++; @@ -239,11 +239,11 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase { Random() % (slices_.front()->end - slices_.front()->start); int64 index = (slices_.front()->start + offset) % this->dataset()->buffer_size_; - *out_tensors = std::move(buffer_[index]); + *out_tensors = std::move(buffer_->at(index)); this->RecordBufferDequeue(ctx, *out_tensors); - std::swap( - buffer_[index], - buffer_[slices_.front()->start % this->dataset()->buffer_size_]); + std::swap(buffer_->at(index), + buffer_->at(slices_.front()->start % + this->dataset()->buffer_size_)); slices_.front()->start++; num_elements_--; } else { @@ -293,6 +293,7 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase { TF_RETURN_IF_ERROR(writer->WriteScalar(this->full_name(kEpoch), epoch_)); TF_RETURN_IF_ERROR( writer->WriteScalar(this->full_name(kNumElements), num_elements_)); + TF_RETURN_IF_ERROR(WriteElementsToCheckpoint(writer, prefix(), *buffer_)); TF_RETURN_IF_ERROR( writer->WriteScalar(this->full_name(kSlicesSize), slices_.size())); for (size_t i = 0; i < slices_.size(); ++i) { @@ -303,19 +304,6 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase { TF_RETURN_IF_ERROR(writer->WriteScalar( this->full_name(absl::StrJoin(std::make_tuple(kSlicesEnd, i), "_")), slices_[i]->end)); - for (size_t j = slices_[i]->start; j < slices_[i]->end; ++j) { - size_t index = j % this->dataset()->buffer_size_; - TF_RETURN_IF_ERROR(writer->WriteScalar( - this->full_name( - absl::StrJoin(std::make_tuple(kBuffer, index, kSize), "_")), - buffer_[index].size())); - for (size_t k = 0; k < buffer_[index].size(); ++k) { - TF_RETURN_IF_ERROR(writer->WriteTensor( - this->full_name( - absl::StrJoin(std::make_tuple(kBuffer, index, k), "_")), - buffer_[index][k])); - } - } } if (data_produced_) { TF_RETURN_IF_ERROR( @@ -360,8 +348,10 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase { reader->ReadScalar(this->full_name(kSlicesSize), &temp)); slices_size = static_cast(temp); } - buffer_ = absl::make_unique[]>( + buffer_ = absl::make_unique>>( this->dataset()->buffer_size_); + TF_RETURN_IF_ERROR( + ReadElementsFromCheckpoint(reader, prefix(), buffer_.get())); slices_.clear(); for (size_t i = 0; i < slices_size; ++i) { int64 start; @@ -374,21 +364,6 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase { this->full_name(absl::StrJoin(std::make_tuple(kSlicesEnd, i), "_")), &end)); slices_.push_back(absl::make_unique(start, end)); - for (size_t j = start; j < end; ++j) { - size_t index = j % this->dataset()->buffer_size_; - int64 list_size; - TF_RETURN_IF_ERROR(reader->ReadScalar( - this->full_name( - absl::StrJoin(std::make_tuple(kBuffer, index, kSize), "_")), - &list_size)); - buffer_[index] = std::vector(list_size); - for (int k = 0; k < list_size; ++k) { - TF_RETURN_IF_ERROR(reader->ReadTensor( - this->full_name( - absl::StrJoin(std::make_tuple(kBuffer, index, k), "_")), - &buffer_[index][k])); - } - } } data_produced_ = reader->Contains(this->full_name(kDataProduced)); @@ -421,7 +396,8 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase { mutex mu_; SeedGenerator* const seed_generator_ TF_GUARDED_BY(mu_); // Not owned. - std::unique_ptr[]> buffer_ TF_GUARDED_BY(mu_); + std::unique_ptr>> buffer_ + TF_GUARDED_BY(mu_); std::unique_ptr input_impl_ TF_GUARDED_BY(mu_) = nullptr; int64 epoch_ TF_GUARDED_BY(mu_) = 0; int64 num_elements_ TF_GUARDED_BY(mu_) = 0; diff --git a/tensorflow/python/data/kernel_tests/shuffle_test.py b/tensorflow/python/data/kernel_tests/shuffle_test.py index ce30dcbb9a3..07ef600ffac 100644 --- a/tensorflow/python/data/kernel_tests/shuffle_test.py +++ b/tensorflow/python/data/kernel_tests/shuffle_test.py @@ -364,8 +364,8 @@ class ShuffleTest(test_base.DatasetTestBase, parameterized.TestCase): ckpt = trackable_utils.Checkpoint(iterator=iterator) manager = checkpoint_management.CheckpointManager( ckpt, self.get_temp_dir(), max_to_keep=1) - with self.assertRaisesRegex(errors.UnknownError, "Failed to serialize"): - manager.save() + manager.save() + ckpt.restore(manager.latest_checkpoint) if __name__ == "__main__": From df56513aa9b004313c2cff818e138d78cd667b67 Mon Sep 17 00:00:00 2001 From: Haoliang Zhang Date: Mon, 13 Jul 2020 14:20:48 -0700 Subject: [PATCH 0312/2522] Let Tensorlist pass recognize all types of integers (signed/unsigned/signless). PiperOrigin-RevId: 321032559 Change-Id: I25b4afaa77eeb0479bf8e47937c086be5c48bd42 --- .../mlir/lite/transforms/lower_static_tensor_list.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc b/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc index 2498a732a86..c76a6cfafab 100644 --- a/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc +++ b/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc @@ -332,9 +332,8 @@ struct ConvertTensorListInitOp : public OpConversionPattern { ConversionPatternRewriter &rewriter) const override { Type dtype = op.element_dtype(); if (!(dtype.isF16() || dtype.isF32() || dtype.isF64() || - dtype.isInteger(1) || dtype.isSignlessInteger(8) || - dtype.isSignlessInteger(16) || dtype.isSignlessInteger(32) || - dtype.isSignlessInteger(64))) { + dtype.isInteger(1) || dtype.isInteger(8) || dtype.isInteger(16) || + dtype.isInteger(32) || dtype.isInteger(64))) { op.emitError( "requires element_dtype to be 1-bit/8-bit/16-bit/32-bit/64-bit " "integer or 16-bit/32-bit/64-bit float type during TF Lite " From b24a9ce042e5b945cb3b3b81699c0e7a862dc729 Mon Sep 17 00:00:00 2001 From: Yixing Fu Date: Mon, 13 Jul 2020 17:25:52 -0400 Subject: [PATCH 0313/2522] show as skipped if not testing pathlib --- tensorflow/python/keras/saving/save_test.py | 45 ++++++++++++--------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/tensorflow/python/keras/saving/save_test.py b/tensorflow/python/keras/saving/save_test.py index 5b5da8c5047..63f2e279493 100644 --- a/tensorflow/python/keras/saving/save_test.py +++ b/tensorflow/python/keras/saving/save_test.py @@ -73,10 +73,11 @@ class TestSaveModel(test.TestCase, parameterized.TestCase): @test_util.run_v2_only def test_save_format_defaults_pathlib(self): - if sys.version_info >= (3, 6): - path = pathlib.Path(self.get_temp_dir()) / 'model_path' - save.save_model(self.model, path) - self.assert_saved_model(path) + if sys.version_info < (3, 6): + self.skipTest('pathlib is only available for python version >= 3.6') + path = pathlib.Path(self.get_temp_dir()) / 'model_path' + save.save_model(self.model, path) + self.assert_saved_model(path) @test_util.run_v2_only def test_save_hdf5(self): @@ -90,10 +91,11 @@ class TestSaveModel(test.TestCase, parameterized.TestCase): @test_util.run_v2_only def test_save_load_hdf5_pathlib(self): - if sys.version_info >= (3, 6): - path = pathlib.Path(self.get_temp_dir()) / 'model' - save.save_model(self.model, path, save_format='h5') - save.load_model(path) + if sys.version_info < (3, 6): + self.skipTest('pathlib is only available for python version >= 3.6') + path = pathlib.Path(self.get_temp_dir()) / 'model' + save.save_model(self.model, path, save_format='h5') + save.load_model(path) @test_util.run_v2_only def test_save_tf(self): @@ -114,24 +116,27 @@ class TestSaveModel(test.TestCase, parameterized.TestCase): @test_util.run_v2_only def test_save_load_tf_pathlib(self): - if sys.version_info >= (3, 6): - path = pathlib.Path(self.get_temp_dir()) / 'model' - save.save_model(self.model, path, save_format='tf') - save.load_model(path) + if sys.version_info < (3, 6): + self.skipTest('pathlib is only available for python version >= 3.6') + path = pathlib.Path(self.get_temp_dir()) / 'model' + save.save_model(self.model, path, save_format='tf') + save.load_model(path) @test_util.run_v2_only def test_save_load_weights_tf_pathlib(self): - if sys.version_info >= (3, 6): - path = pathlib.Path(self.get_temp_dir()) / 'model' - self.model.save_weights(path, save_format='tf') - self.model.load_weights(path) + if sys.version_info < (3, 6): + self.skipTest('pathlib is only available for python version >= 3.6') + path = pathlib.Path(self.get_temp_dir()) / 'model' + self.model.save_weights(path, save_format='tf') + self.model.load_weights(path) @test_util.run_v2_only def test_save_load_weights_hdf5_pathlib(self): - if sys.version_info >= (3, 6): - path = pathlib.Path(self.get_temp_dir()) / 'model' - self.model.save_weights(path, save_format='h5') - self.model.load_weights(path) + if sys.version_info < (3, 6): + self.skipTest('pathlib is only available for python version >= 3.6') + path = pathlib.Path(self.get_temp_dir()) / 'model' + self.model.save_weights(path, save_format='h5') + self.model.load_weights(path) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_saving_with_dense_features(self): From 749d3eb240e941769864e345314acf46575c9569 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Mon, 13 Jul 2020 21:42:04 +0000 Subject: [PATCH 0314/2522] changed naming to ScalarSummary and removed previous registration in core --- tensorflow/c/kernels/BUILD | 7 ++-- tensorflow/c/kernels/ops/summary.cc | 4 +-- tensorflow/c/kernels/summary_op.cc | 12 ++----- tensorflow/c/kernels/summary_op_test.cc | 4 +-- tensorflow/core/kernels/summary_op.cc | 44 ------------------------- tensorflow/core/ops/logging_ops.cc | 7 ---- 6 files changed, 12 insertions(+), 66 deletions(-) diff --git a/tensorflow/c/kernels/BUILD b/tensorflow/c/kernels/BUILD index b713b27f5dc..7e103514645 100644 --- a/tensorflow/c/kernels/BUILD +++ b/tensorflow/c/kernels/BUILD @@ -88,12 +88,15 @@ tf_cc_test( filegroup( name = "android_all_op_kernels", srcs = [ - "bitcast_op.cc", "summary_op.cc" + "bitcast_op.cc", + "summary_op.cc" ], ) # LINT.ThenChange(//tensorflow/contrib/makefile/tf_op_files.txt) filegroup( name = "android_all_ops", - srcs = ["ops/bitcast.cc", "ops/summary.cc"], + srcs = ["ops/bitcast.cc", + "ops/summary.cc" + ], ) diff --git a/tensorflow/c/kernels/ops/summary.cc b/tensorflow/c/kernels/ops/summary.cc index 355d73396b6..be39cd0f530 100644 --- a/tensorflow/c/kernels/ops/summary.cc +++ b/tensorflow/c/kernels/ops/summary.cc @@ -41,7 +41,7 @@ void Register_ScalarSummaryOp() { TF_Status* status = TF_NewStatus(); TF_OpDefinitionBuilder* op_builder = - TF_NewOpDefinitionBuilder("SummaryScalar"); + TF_NewOpDefinitionBuilder("ScalarSummary"); TF_OpDefinitionBuilderAddInput(op_builder, "tags: string"); TF_OpDefinitionBuilderAddInput(op_builder, "values: T"); TF_OpDefinitionBuilderAddOutput(op_builder, "summary: string"); @@ -56,7 +56,7 @@ void Register_ScalarSummaryOp() { } TF_ATTRIBUTE_UNUSED static bool SummaryScalarOpRegistered = []() { - if (SHOULD_REGISTER_OP("SummaryScalar")) { + if (SHOULD_REGISTER_OP("ScalarSummary")) { Register_ScalarSummaryOp(); } return true; diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index 18aa897bfa9..5db4a239905 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -34,17 +34,11 @@ typedef struct Params{ if (TF_GetCode(status) == TF_OK){ TF_GetInput(ctx, 1, &values, status); } - else { - values = nullptr; - } }; ~Params(){ TF_DeleteStatus(status); TF_DeleteTensor(tags); - // edge case if params fails to initialize - if (values != nullptr){ - TF_DeleteTensor(values); - } + TF_DeleteTensor(values); } }; @@ -134,7 +128,7 @@ template void RegisterSummaryScalarOpKernel() { TF_Status* status = TF_NewStatus(); { - auto* builder = TF_NewKernelBuilder("SummaryScalar", + auto* builder = TF_NewKernelBuilder("ScalarSummary", tensorflow::DEVICE_CPU, &SummaryScalarOp_Create, &SummaryScalarOp_Compute, @@ -143,7 +137,7 @@ void RegisterSummaryScalarOpKernel() { static_cast(tensorflow::DataTypeToEnum::v()), status); CHECK_EQ(TF_OK, TF_GetCode(status)) << "Error while adding type constraint"; - TF_RegisterKernelBuilder("SummaryScalar", builder, status); + TF_RegisterKernelBuilder("ScalarSummary", builder, status); CHECK_EQ(TF_OK, TF_GetCode(status)) << "Error while registering Summary Scalar kernel"; } diff --git a/tensorflow/c/kernels/summary_op_test.cc b/tensorflow/c/kernels/summary_op_test.cc index 722373d36ce..ad5fafe5530 100644 --- a/tensorflow/c/kernels/summary_op_test.cc +++ b/tensorflow/c/kernels/summary_op_test.cc @@ -45,7 +45,7 @@ void TestScalarSummaryOp(Tensor* tags, Tensor* values, string expected_summary, // Initialize node used to fetch OpKernel Status status; NodeDef def; - def.set_op("SummaryScalar"); + def.set_op("ScalarSummary"); def.set_device(DEVICE_CPU); @@ -165,7 +165,7 @@ TEST(ScalarSummaryOpTest, Error_WrongWithSingleTag) { TEST(ScalarSummaryOpTest, IsRegistered){ const OpRegistrationData* reg; - TF_CHECK_OK(OpRegistry::Global()->LookUp("SummaryScalar", ®)); + TF_CHECK_OK(OpRegistry::Global()->LookUp("ScalarSummary", ®)); } } // namespace diff --git a/tensorflow/core/kernels/summary_op.cc b/tensorflow/core/kernels/summary_op.cc index f4c91fc9ff1..22d1a21a889 100644 --- a/tensorflow/core/kernels/summary_op.cc +++ b/tensorflow/core/kernels/summary_op.cc @@ -31,47 +31,6 @@ limitations under the License. namespace tensorflow { -template -class SummaryScalarOp : public OpKernel { - public: - explicit SummaryScalarOp(OpKernelConstruction* context) : OpKernel(context) {} - - void Compute(OpKernelContext* c) override { - const Tensor& tags = c->input(0); - const Tensor& values = c->input(1); - - OP_REQUIRES( - c, - tags.IsSameSize(values) || (TensorShapeUtils::IsScalar(tags.shape()) && - TensorShapeUtils::IsScalar(values.shape())), - errors::InvalidArgument( - "tags and values not the same shape: ", tags.shape().DebugString(), - " != ", values.shape().DebugString(), SingleTag(tags))); - auto Ttags = tags.flat(); - auto Tvalues = values.flat(); - Summary s; - for (int i = 0; i < Ttags.size(); i++) { - Summary::Value* v = s.add_value(); - const tstring& Ttags_i = Ttags(i); - v->set_tag(Ttags_i.data(), Ttags_i.size()); - v->set_simple_value(float(Tvalues(i))); - } - - Tensor* summary_tensor = nullptr; - OP_REQUIRES_OK(c, c->allocate_output(0, TensorShape({}), &summary_tensor)); - CHECK(SerializeToTString(s, &summary_tensor->scalar()())); - } - - // If there's only one tag, include it in the error message - static string SingleTag(const Tensor& tags) { - if (tags.NumElements() == 1) { - return strings::StrCat(" (tag '", tags.flat()(0), "')"); - } else { - return ""; - } - } -}; - template class SummaryHistoOp : public OpKernel { public: @@ -114,9 +73,6 @@ class SummaryHistoOp : public OpKernel { }; #define REGISTER(T) \ - REGISTER_KERNEL_BUILDER( \ - Name("ScalarSummary").Device(DEVICE_CPU).TypeConstraint("T"), \ - SummaryScalarOp); \ REGISTER_KERNEL_BUILDER( \ Name("HistogramSummary").Device(DEVICE_CPU).TypeConstraint("T"), \ SummaryHistoOp); diff --git a/tensorflow/core/ops/logging_ops.cc b/tensorflow/core/ops/logging_ops.cc index 6489074b546..4d5ba6873a7 100644 --- a/tensorflow/core/ops/logging_ops.cc +++ b/tensorflow/core/ops/logging_ops.cc @@ -87,13 +87,6 @@ REGISTER_OP("TensorSummary") .Attr("display_name: string = ''") .SetShapeFn(shape_inference::ScalarShape); -REGISTER_OP("ScalarSummary") - .Input("tags: string") - .Input("values: T") - .Output("summary: string") - .Attr("T: realnumbertype") - .SetShapeFn(shape_inference::ScalarShape); - REGISTER_OP("HistogramSummary") .Input("tag: string") .Input("values: T") From 8126887548fb4d88eada626da90fe4f0e6991a91 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Jul 2020 14:38:27 -0700 Subject: [PATCH 0315/2522] [MLIR:TF] Fold no-op reshape operations PiperOrigin-RevId: 321036337 Change-Id: Ia7af5a53d766eb6c4a574d9aa4127ad668f8e0ca --- .../mlir/tensorflow/ir/tf_generated_ops.td | 1 - .../compiler/mlir/tensorflow/ir/tf_ops.cc | 95 ----------------- .../mlir/tensorflow/tests/canonicalize.mlir | 67 ------------ .../mlir/tensorflow/tests/lower_tf.mlir | 16 ++- .../tensorflow/tests/unroll-batch-matmul.mlir | 100 ++++++++++-------- 5 files changed, 68 insertions(+), 211 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index 0ef650487a8..7bbdce6b985 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -7284,7 +7284,6 @@ reshape(t, []) ==> 7 }]; let hasCanonicalizer = 1; - let hasFolder = 1; } def TF_ResizeBilinearOp : TF_Op<"ResizeBilinear", [NoSideEffect]> { diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc index 83ad319002a..101de17122a 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc @@ -2917,101 +2917,6 @@ void ReshapeOp::getCanonicalizationPatterns(OwningRewritePatternList &results, results.insert(context); } -OpFoldResult ReshapeOp::fold(ArrayRef operands) { - Value tensor = this->tensor(); - Value shape = this->shape(); - - // Fold reshape if operand and result types are the same and all dimensions - // are statically known (no-op reshape). - // TODO(ezhulenev): Add the same folding for BroadcastToOp. - auto result_ty = getType().dyn_cast(); - if (result_ty && result_ty.hasStaticShape() && - result_ty == tensor.getType()) { - return tensor; - } - - // Fold reshape if the shape is computed from the input tensor: - // - // %shape = tf.Shape(%arg) // [? x ...] - // %dim0 = tf.StridedSlice(%shape, 0, 1, 1) // get unknown dim value - // %new_shape = tf.Pack(dim0, ...) { axis = 0 } // [? x ...] - // %reshape = tf.Reshape(%arg, %new_shape) // this is no-op - // - // Where `...` are some statically known dimensions. In this case reshape is - // a no-op and can be replaced by %arg (assuming `...` are equal). - auto pack_op = dyn_cast_or_null(shape.getDefiningOp()); - if (!pack_op || pack_op.values().size() < 2) return {}; - - // Dimensions packed along axis = 0 (pack scalars into vector). - if (pack_op.axis().getSExtValue() != 0) return {}; - - // First packed value is defined by a strided slice operation. - auto slice_op = - dyn_cast_or_null(pack_op.values()[0].getDefiningOp()); - if (!slice_op) return {}; - - // Input to the slice op is defined by shape operation. - auto shape_op = dyn_cast_or_null(slice_op.input().getDefiningOp()); - if (!shape_op || shape_op.input() != tensor) return {}; - - // All masks are `0` except `shrink_axis_mask` which is equal to `1` (slicing - // scalar value from input vector). - if (slice_op.begin_mask().getSExtValue() != 0 || - slice_op.ellipsis_mask().getSExtValue() != 0 || - slice_op.end_mask().getSExtValue() != 0 || - slice_op.new_axis_mask().getSExtValue() != 0 || - slice_op.shrink_axis_mask().getSExtValue() != 1) - return {}; - - // Returns a value if the `value` is defined by a ConstOp with a single - // integer element in it and has an expected rank. - auto get_value = [](Value value, int expected_rank) -> Optional { - auto const_op = dyn_cast_or_null(value.getDefiningOp()); - if (!const_op) return None; - - auto value_attr = const_op.value().dyn_cast(); - if (!value_attr || value_attr.getNumElements() != 1) return None; - - auto value_ty = value_attr.getType(); - if (!value_ty.hasRank() || value_ty.getRank() != expected_rank) return None; - - auto splat = value_attr.getSplatValue(); - return splat.getValue().getSExtValue(); - }; - - // All other packed values are scalar constants. - SmallVector packed_dims; - packed_dims.reserve(pack_op.values().size() - 1); - for (Value operand : llvm::drop_begin(pack_op.values(), 1)) { - if (auto dim = get_value(operand, /*expected_rank=*/0)) { - packed_dims.push_back(*dim); - } else { - return {}; - } - } - - // Slice exactly the first shape dimension: - // begin = [0] end = [1], strides = [1] - auto begin = get_value(slice_op.begin(), /*expected_rank=*/1); - auto end = get_value(slice_op.end(), /*expected_rank=*/1); - auto strides = get_value(slice_op.strides(), /*expected_rank=*/1); - if (!begin.hasValue() || !end.hasValue() || !strides.hasValue() || - *begin != 0 || *end != 1 || *strides != 1) - return {}; - - // First tensor dimension is dynamic. - auto arg_ty = tensor.getType().dyn_cast(); - if (!arg_ty || arg_ty.getNumDynamicDims() != 1 || !arg_ty.isDynamicDim(0)) - return {}; - - // All other dimensions are statically known and equal to packed dims. - auto arg_dims = llvm::drop_begin(arg_ty.getShape(), 1); - if (!std::equal(arg_dims.begin(), arg_dims.end(), packed_dims.begin())) - return {}; - - return tensor; -} - //===----------------------------------------------------------------------===// // SelectOp //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir index 3a948bdd2c3..8597740a4ae 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir @@ -377,73 +377,6 @@ func @testRedundantReshape(%arg0: tensor<4x4xi32>) -> tensor<2x8xi32> { // CHECK: return %1 : tensor<2x8xi32> } -// CHECK-LABEL: func @testReshapeNoOp -func @testReshapeNoOp(%arg0: tensor<2x4xf32>, %arg1: tensor<2xi32>) -> tensor<2x4xf32> { - %0 = "tf.Reshape"(%arg0, %arg1) : (tensor<2x4xf32>, tensor<2xi32>) -> tensor<2x4xf32> - - // CHECK: return %arg0 - return %0 : tensor<2x4xf32> -} - -// CHECK-LABEL: func @testReshapeNoOpShapeComputation -func @testReshapeNoOpShapeComputation(%arg0: tensor, %arg1: tensor) -> (tensor, tensor, tensor, tensor, tensor) { - // Test dimensions sizes. - %d1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %d2 = "tf.Const"() {value = dense<2> : tensor} : () -> tensor - - // Slice bounds. - %0 = "tf.Const"() {value = dense<0> : tensor<1xi32>} : () -> tensor<1xi32> - %1 = "tf.Const"() {value = dense<1> : tensor<1xi32>} : () -> tensor<1xi32> - %2 = "tf.Const"() {value = dense<2> : tensor<1xi32>} : () -> tensor<1xi32> - - // Fold reshape if the shape is computed from the input tensor: - // - // %shape = tf.Shape(%arg) // [? x ...] - // %dim0 = tf.StridedSlice(%shape, 0, 1, 1) // get unknown dim value - // %new_shape = tf.Pack(dim0, ...) { axis = 0 } // [? x ...] - // %reshape = tf.Reshape(%arg, %new_shape) // this is no-op - // - // Where `...` are some statically known dimensions. In this case reshape is - // a no-op and can be replaced by %arg (assuming `...` are equal). - - // Test Rank 2 - %3 = "tf.Shape"(%arg0) : (tensor) -> tensor<2xi32> - %4 = "tf.StridedSlice"(%3, %0, %1, %1) {shrink_axis_mask = 1 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %5 = "tf.Pack"(%4, %d1) {axis = 0 : i64} : (tensor, tensor) -> tensor<2xi32> - %6 = "tf.Reshape"(%arg0, %5) : (tensor, tensor<2xi32>) -> tensor - - // Test Rank 3. - - %7 = "tf.Shape"(%arg1) : (tensor) -> tensor<3xi32> - %8 = "tf.StridedSlice"(%7, %0, %1, %1) {shrink_axis_mask = 1 : i64} : (tensor<3xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %9 = "tf.Pack"(%8, %d1, %d2) {axis = 0 : i64} : (tensor, tensor, tensor) -> tensor<3xi32> - %10 = "tf.Reshape"(%arg1, %9) : (tensor, tensor<3xi32>) -> tensor - - // Shape was taken from the op that is not reshaped in the end: - // Reshape(%arg1) vs Shape(%arg0) - %11 = "tf.StridedSlice"(%3, %0, %1, %1) {shrink_axis_mask = 1 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %12 = "tf.Pack"(%11, %d1, %d2) {axis = 0 : i64} : (tensor, tensor, tensor) -> tensor<3xi32> - // CHECK: %[[RESHAPE0:.*]] = "tf.Reshape" - %13 = "tf.Reshape"(%arg1, %12) : (tensor, tensor<3xi32>) -> tensor - - // Packed dimensions have different order from the reshape operand: - // [?, 1, 2] vs [?, 2, 1] - %14 = "tf.StridedSlice"(%7, %0, %1, %1) {shrink_axis_mask = 1 : i64} : (tensor<3xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %15 = "tf.Pack"(%14, %d2, %d1) {axis = 0 : i64} : (tensor, tensor, tensor) -> tensor<3xi32> - // CHECK: %[[RESHAPE1:.*]] = "tf.Reshape" - %16 = "tf.Reshape"(%arg1, %15) : (tensor, tensor<3xi32>) -> tensor - - // StridedSlice takes second dimension from the shape: - // begin = [1], end = [2], stride = [1] - %17 = "tf.StridedSlice"(%7, %1, %2, %1) {shrink_axis_mask = 1 : i64} : (tensor<3xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %18 = "tf.Pack"(%17, %d1, %d2) {axis = 0 : i64} : (tensor, tensor, tensor) -> tensor<3xi32> - // CHECK: %[[RESHAPE2:.*]] = "tf.Reshape" - %19 = "tf.Reshape"(%arg1, %18) : (tensor, tensor<3xi32>) -> tensor - - // CHECK: return %arg0, %arg1, %[[RESHAPE0]], %[[RESHAPE1]], %[[RESHAPE2]] - return %6, %10, %13, %16, %19 : tensor, tensor, tensor, tensor, tensor -} - // CHECK-LABEL: testSelectScalarPred func @testSelectScalarPred(%arg0: tensor, %arg1: tensor<4x2xf16>, %arg2: tensor<4x2xf16>) -> tensor<4x2xf16> { // CHECK-NEXT: "tf.SelectV2"(%arg0, %arg1, %arg2) : (tensor, tensor<4x2xf16>, tensor<4x2xf16>) -> tensor<4x2xf16> diff --git a/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir b/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir index 78e10fa797f..3215055a249 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir @@ -371,7 +371,9 @@ func @addN_variant(%arg0: tensor>>, %arg1: tensor) -> tensor<2x2xf32> { - // CHECK-DAG: %[[ITEMS:.*]]:2 = "tf.Unpack"(%arg0) {axis = 0 : i64} : (tensor<2x2xf32>) -> (tensor<2xf32>, tensor<2xf32>) + // CHECK-DAG: %[[SHAPE:.*]] = "tf.Const"() {value = dense<[-1, 2]> : tensor<2xi64>} : () -> tensor<2xi64> + // CHECK-DAG: %[[INP:.*]] = "tf.Reshape"(%arg0, %[[SHAPE]]) : (tensor<2x2xf32>, tensor<2xi64>) -> tensor<2x2xf32> + // CHECK-DAG: %[[ITEMS:.*]]:2 = "tf.Unpack"(%[[INP]]) {axis = 0 : i64} : (tensor<2x2xf32>) -> (tensor<2xf32>, tensor<2xf32>) // CHECK-DAG: %[[AXIS:.*]] = "tf.Const"() {value = dense<0> : tensor} : () -> tensor // CHECK-DAG: %[[RESULT:.*]] = "tf.ConcatV2"(%[[ITEMS]]#1, %[[ITEMS]]#0, %[[AXIS]]) : (tensor<2xf32>, tensor<2xf32>, tensor) -> tensor<2x2xf32> // CHECK: return %[[RESULT]] @@ -409,7 +411,9 @@ func @DynamicStitch_uint8(%arg0: tensor<2x2xui8>) -> tensor<2x2xui8> { // CHECK-LABEL: func @DynamicStitch_scalar_item func @DynamicStitch_scalar_item(%arg0: tensor<2xf32>) -> tensor<2xf32> { - // CHECK-DAG: %[[ITEMS]]:2 = "tf.Unpack"(%arg0) {axis = 0 : i64} : (tensor<2xf32>) -> (tensor, tensor) + // CHECK-DAG: %[[SHAPE:.*]] = "tf.Const"() {value = dense<-1> : tensor<1xi64>} : () -> tensor<1xi64> + // CHECK-DAG: %[[INP:.*]] = "tf.Reshape"(%arg0, %[[SHAPE]]) : (tensor<2xf32>, tensor<1xi64>) -> tensor<2xf32> + // CHECK-DAG: %[[ITEMS]]:2 = "tf.Unpack"(%[[INP]]) {axis = 0 : i64} : (tensor<2xf32>) -> (tensor, tensor) // CHECK-DAG: %[[AXIS:.*]] = "tf.Const"() {value = dense<0> : tensor} : () -> tensor // CHECK-DAG: %[[RESULT]] = "tf.ConcatV2"(%[[ITEMS]]#1, %[[ITEMS]]#0, %[[AXIS]]) : (tensor, tensor, tensor) -> tensor<2xf32> // CHECK: return %[[RESULT]] @@ -421,7 +425,9 @@ func @DynamicStitch_scalar_item(%arg0: tensor<2xf32>) -> tensor<2xf32> { // CHECK-LABEL: func @DynamicStitch_matrix_item func @DynamicStitch_matrix_item(%arg0: tensor<2x2x2xf32>) -> tensor<2x2x2xf32> { - // CHECK-DAG: %[[ITEMS:.*]]:2 = "tf.Unpack"(%arg0) {axis = 0 : i64} : (tensor<2x2x2xf32>) -> (tensor<2x2xf32>, tensor<2x2xf32>) + // CHECK-DAG: %[[SHAPE:.*]] = "tf.Const"() {value = dense<[-1, 2, 2]> : tensor<3xi64>} : () -> tensor<3xi64> + // CHECK-DAG: %[[INP:.*]] = "tf.Reshape"(%arg0, %[[SHAPE]]) : (tensor<2x2x2xf32>, tensor<3xi64>) -> tensor<2x2x2xf32> + // CHECK-DAG: %[[ITEMS:.*]]:2 = "tf.Unpack"(%[[INP]]) {axis = 0 : i64} : (tensor<2x2x2xf32>) -> (tensor<2x2xf32>, tensor<2x2xf32>) // CHECK-DAG: %[[AXIS:.*]] = "tf.Const"() {value = dense<0> : tensor} : () -> tensor // CHECK-DAG: %[[RESULT:.*]] = "tf.ConcatV2"(%[[ITEMS]]#1, %[[ITEMS]]#0, %[[AXIS]]) : (tensor<2x2xf32>, tensor<2x2xf32>, tensor) -> tensor<2x2x2xf32> // CHECK: return %[[RESULT]] @@ -440,7 +446,9 @@ func @DynamicStitch_dynamic(%arg0: tensor<*xi32>, %arg1: tensor<*xf32>) -> tenso // CHECK-LABEL: func @DynamicStitch_duplicates func @DynamicStitch_duplicates(%arg0: tensor<2x2xf32>) -> tensor<1x2xf32> { - // CHECK-DAG: %[[ITEMS:.*]]:2 = "tf.Unpack"(%arg0) {axis = 0 : i64} : (tensor<2x2xf32>) -> (tensor<2xf32>, tensor<2xf32>) + // CHECK-DAG: %[[SHAPE:.*]] = "tf.Const"() {value = dense<[-1, 2]> : tensor<2xi64>} : () -> tensor<2xi64> + // CHECK-DAG: %[[INP:.*]] = "tf.Reshape"(%arg0, %[[SHAPE]]) : (tensor<2x2xf32>, tensor<2xi64>) -> tensor<2x2xf32> + // CHECK-DAG: %[[ITEMS:.*]]:2 = "tf.Unpack"(%[[INP]]) {axis = 0 : i64} : (tensor<2x2xf32>) -> (tensor<2xf32>, tensor<2xf32>) // CHECK-DAG: %[[AXIS:.*]] = "tf.Const"() {value = dense<0> : tensor} : () -> tensor // CHECK-DAG: %[[RESULT:.*]] = "tf.ConcatV2"(%[[ITEMS]]#1, %[[AXIS]]) : (tensor<2xf32>, tensor) -> tensor<1x2xf32> // CHECK: return %[[RESULT]] diff --git a/tensorflow/compiler/mlir/tensorflow/tests/unroll-batch-matmul.mlir b/tensorflow/compiler/mlir/tensorflow/tests/unroll-batch-matmul.mlir index 7cf5f19523d..5a3f0b6e997 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/unroll-batch-matmul.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/unroll-batch-matmul.mlir @@ -67,35 +67,41 @@ func @batchMatMulV2FlatInput(%arg0: tensor<3x4x5xf32>, %arg1: tensor<3x5x6xf32>) return %0 : tensor<3x4x6xf32> // CHECK-LABEL: batchMatMulV2FlatInput + // CHECK: %[[cst:.*]] = "tf.Const"() {value = dense<[3, 4, 5]> : tensor<3xi64>} // CHECK: %[[cst_0:.*]] = "tf.Const"() {value = dense<[1, 4, 5]> : tensor<3xi64>} // CHECK: %[[cst_1:.*]] = "tf.Const"() {value = dense<[4, 5]> : tensor<2xi64>} - // CHECK: %[[cst_2:.*]] = "tf.Const"() {value = dense<0> : tensor<3xi64>} - // CHECK: %[[cst_3:.*]] = "tf.Const"() {value = dense<[1, 0, 0]> : tensor<3xi64>} - // CHECK: %[[cst_4:.*]] = "tf.Const"() {value = dense<[2, 0, 0]> : tensor<3xi64>} - // CHECK: %[[cst_5:.*]] = "tf.Const"() {value = dense<[1, 5, 6]> : tensor<3xi64>} - // CHECK: %[[cst_6:.*]] = "tf.Const"() {value = dense<[5, 6]> : tensor<2xi64>} + // CHECK: %[[cst_2:.*]] = "tf.Const"() {value = dense<[3, 5, 6]> : tensor<3xi64>} + // CHECK: %[[cst_3:.*]] = "tf.Const"() {value = dense<0> : tensor<3xi64>} + // CHECK: %[[cst_4:.*]] = "tf.Const"() {value = dense<[1, 0, 0]> : tensor<3xi64>} + // CHECK: %[[cst_5:.*]] = "tf.Const"() {value = dense<[2, 0, 0]> : tensor<3xi64>} + // CHECK: %[[cst_6:.*]] = "tf.Const"() {value = dense<[1, 5, 6]> : tensor<3xi64>} + // CHECK: %[[cst_7:.*]] = "tf.Const"() {value = dense<[5, 6]> : tensor<2xi64>} + // CHECK: %[[cst_8:.*]] = "tf.Const"() {value = dense<[3, 4, 6]> : tensor<3xi64>} - // CHECK: %[[v0:.*]] = "tf.Slice"(%arg0, %[[cst_2]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> - // CHECK: %[[v1:.*]] = "tf.Reshape"(%[[v0]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> - // CHECK: %[[v2:.*]] = "tf.Slice"(%arg0, %[[cst_3]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> - // CHECK: %[[v3:.*]] = "tf.Reshape"(%[[v2]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> - // CHECK: %[[v4:.*]] = "tf.Slice"(%arg0, %[[cst_4]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> - // CHECK: %[[v5:.*]] = "tf.Reshape"(%[[v4]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> + // CHECK: %[[v0:.*]] = "tf.Reshape"(%arg0, %[[cst]]) : (tensor<3x4x5xf32>, tensor<3xi64>) -> tensor<3x4x5xf32> + // CHECK: %[[v1:.*]] = "tf.Slice"(%[[v0]], %[[cst_3]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> + // CHECK: %[[v2:.*]] = "tf.Reshape"(%[[v1]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> + // CHECK: %[[v3:.*]] = "tf.Slice"(%[[v0]], %[[cst_4]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> + // CHECK: %[[v4:.*]] = "tf.Reshape"(%[[v3]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> + // CHECK: %[[v5:.*]] = "tf.Slice"(%[[v0]], %[[cst_5]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> + // CHECK: %[[v6:.*]] = "tf.Reshape"(%[[v5]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> - // CHECK: %[[v6:.*]] = "tf.Slice"(%arg1, %[[cst_2]], %[[cst_5]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> - // CHECK: %[[v7:.*]] = "tf.Reshape"(%[[v6]], %[[cst_6]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> - // CHECK: %[[v8:.*]] = "tf.Slice"(%arg1, %[[cst_3]], %[[cst_5]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> - // CHECK: %[[v9:.*]] = "tf.Reshape"(%[[v8]], %[[cst_6]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> - // CHECK: %[[v10:.*]] = "tf.Slice"(%arg1, %[[cst_4]], %[[cst_5]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> - // CHECK: %[[v11:.*]] = "tf.Reshape"(%[[v10]], %[[cst_6]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> + // CHECK: %[[v7:.*]] = "tf.Reshape"(%arg1, %[[cst_2]]) : (tensor<3x5x6xf32>, tensor<3xi64>) -> tensor<3x5x6xf32> + // CHECK: %[[v8:.*]] = "tf.Slice"(%[[v7]], %[[cst_3]], %[[cst_6]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> + // CHECK: %[[v9:.*]] = "tf.Reshape"(%[[v8]], %[[cst_7]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> + // CHECK: %[[v10:.*]] = "tf.Slice"(%[[v7]], %[[cst_4]], %[[cst_6]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> + // CHECK: %[[v11:.*]] = "tf.Reshape"(%[[v10]], %[[cst_7]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> + // CHECK: %[[v12:.*]] = "tf.Slice"(%[[v7]], %[[cst_5]], %[[cst_6]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> + // CHECK: %[[v13:.*]] = "tf.Reshape"(%[[v12]], %[[cst_7]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> - // CHECK: %[[mm0:.*]] = "tf.MatMul"(%[[v1]], %[[v7]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> - // CHECK: %[[mm1:.*]] = "tf.MatMul"(%[[v3]], %[[v9]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> - // CHECK: %[[mm2:.*]] = "tf.MatMul"(%[[v5]], %[[v11]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> + // CHECK: %[[v14:.*]] = "tf.MatMul"(%[[v2]], %[[v9]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> + // CHECK: %[[v15:.*]] = "tf.MatMul"(%[[v4]], %[[v11]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> + // CHECK: %[[v16:.*]] = "tf.MatMul"(%[[v6]], %[[v13]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> - // CHECK: %[[v17:.*]] = "tf.Pack"(%[[mm0]], %[[mm1]], %[[mm2]]) {axis = 0 : i64} : (tensor<4x6xf32>, tensor<4x6xf32>, tensor<4x6xf32>) -> tensor<3x4x6xf32> + // CHECK: %[[v17:.*]] = "tf.Pack"(%[[v14]], %[[v15]], %[[v16]]) {axis = 0 : i64} : (tensor<4x6xf32>, tensor<4x6xf32>, tensor<4x6xf32>) -> tensor<3x4x6xf32> + // CHECK: %[[v18:.*]] = "tf.Reshape"(%[[v17]], %[[cst_8]]) : (tensor<3x4x6xf32>, tensor<3xi64>) -> tensor<3x4x6xf32> - // CHECK: return %[[v17]] : tensor<3x4x6xf32> + // CHECK: return %[[v18]] : tensor<3x4x6xf32> } // ----- @@ -178,35 +184,41 @@ func @batchMatMulFlatInput(%arg0: tensor<3x4x5xf32>, %arg1: tensor<3x5x6xf32>) - return %0 : tensor<3x4x6xf32> // CHECK-LABEL: batchMatMulFlatInput + // CHECK: %[[cst:.*]] = "tf.Const"() {value = dense<[3, 4, 5]> : tensor<3xi64>} // CHECK: %[[cst_0:.*]] = "tf.Const"() {value = dense<[1, 4, 5]> : tensor<3xi64>} // CHECK: %[[cst_1:.*]] = "tf.Const"() {value = dense<[4, 5]> : tensor<2xi64>} - // CHECK: %[[cst_2:.*]] = "tf.Const"() {value = dense<0> : tensor<3xi64>} - // CHECK: %[[cst_3:.*]] = "tf.Const"() {value = dense<[1, 0, 0]> : tensor<3xi64>} - // CHECK: %[[cst_4:.*]] = "tf.Const"() {value = dense<[2, 0, 0]> : tensor<3xi64>} - // CHECK: %[[cst_5:.*]] = "tf.Const"() {value = dense<[1, 5, 6]> : tensor<3xi64>} - // CHECK: %[[cst_6:.*]] = "tf.Const"() {value = dense<[5, 6]> : tensor<2xi64>} + // CHECK: %[[cst_2:.*]] = "tf.Const"() {value = dense<[3, 5, 6]> : tensor<3xi64>} + // CHECK: %[[cst_3:.*]] = "tf.Const"() {value = dense<0> : tensor<3xi64>} + // CHECK: %[[cst_4:.*]] = "tf.Const"() {value = dense<[1, 0, 0]> : tensor<3xi64>} + // CHECK: %[[cst_5:.*]] = "tf.Const"() {value = dense<[2, 0, 0]> : tensor<3xi64>} + // CHECK: %[[cst_6:.*]] = "tf.Const"() {value = dense<[1, 5, 6]> : tensor<3xi64>} + // CHECK: %[[cst_7:.*]] = "tf.Const"() {value = dense<[5, 6]> : tensor<2xi64>} + // CHECK: %[[cst_8:.*]] = "tf.Const"() {value = dense<[3, 4, 6]> : tensor<3xi64>} - // CHECK: %[[v0:.*]] = "tf.Slice"(%arg0, %[[cst_2]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> - // CHECK: %[[v1:.*]] = "tf.Reshape"(%[[v0]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> - // CHECK: %[[v2:.*]] = "tf.Slice"(%arg0, %[[cst_3]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> - // CHECK: %[[v3:.*]] = "tf.Reshape"(%[[v2]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> - // CHECK: %[[v4:.*]] = "tf.Slice"(%arg0, %[[cst_4]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> - // CHECK: %[[v5:.*]] = "tf.Reshape"(%[[v4]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> + // CHECK: %[[v0:.*]] = "tf.Reshape"(%arg0, %[[cst]]) : (tensor<3x4x5xf32>, tensor<3xi64>) -> tensor<3x4x5xf32> + // CHECK: %[[v1:.*]] = "tf.Slice"(%[[v0]], %[[cst_3]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> + // CHECK: %[[v2:.*]] = "tf.Reshape"(%[[v1]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> + // CHECK: %[[v3:.*]] = "tf.Slice"(%[[v0]], %[[cst_4]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> + // CHECK: %[[v4:.*]] = "tf.Reshape"(%[[v3]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> + // CHECK: %[[v5:.*]] = "tf.Slice"(%[[v0]], %[[cst_5]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> + // CHECK: %[[v6:.*]] = "tf.Reshape"(%[[v5]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> - // CHECK: %[[v6:.*]] = "tf.Slice"(%arg1, %[[cst_2]], %[[cst_5]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> - // CHECK: %[[v7:.*]] = "tf.Reshape"(%[[v6]], %[[cst_6]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> - // CHECK: %[[v8:.*]] = "tf.Slice"(%arg1, %[[cst_3]], %[[cst_5]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> - // CHECK: %[[v9:.*]] = "tf.Reshape"(%[[v8]], %[[cst_6]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> - // CHECK: %[[v10:.*]] = "tf.Slice"(%arg1, %[[cst_4]], %[[cst_5]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> - // CHECK: %[[v11:.*]] = "tf.Reshape"(%[[v10]], %[[cst_6]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> + // CHECK: %[[v7:.*]] = "tf.Reshape"(%arg1, %[[cst_2]]) : (tensor<3x5x6xf32>, tensor<3xi64>) -> tensor<3x5x6xf32> + // CHECK: %[[v8:.*]] = "tf.Slice"(%[[v7]], %[[cst_3]], %[[cst_6]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> + // CHECK: %[[v9:.*]] = "tf.Reshape"(%[[v8]], %[[cst_7]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> + // CHECK: %[[v10:.*]] = "tf.Slice"(%[[v7]], %[[cst_4]], %[[cst_6]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> + // CHECK: %[[v11:.*]] = "tf.Reshape"(%[[v10]], %[[cst_7]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> + // CHECK: %[[v12:.*]] = "tf.Slice"(%[[v7]], %[[cst_5]], %[[cst_6]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> + // CHECK: %[[v13:.*]] = "tf.Reshape"(%[[v12]], %[[cst_7]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> - // CHECK: %[[mm0:.*]] = "tf.MatMul"(%[[v1]], %[[v7]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> - // CHECK: %[[mm1:.*]] = "tf.MatMul"(%[[v3]], %[[v9]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> - // CHECK: %[[mm2:.*]] = "tf.MatMul"(%[[v5]], %[[v11]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> + // CHECK: %[[v14:.*]] = "tf.MatMul"(%[[v2]], %[[v9]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> + // CHECK: %[[v15:.*]] = "tf.MatMul"(%[[v4]], %[[v11]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> + // CHECK: %[[v16:.*]] = "tf.MatMul"(%[[v6]], %[[v13]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> - // CHECK: %[[v17:.*]] = "tf.Pack"(%[[mm0]], %[[mm1]], %[[mm2]]) {axis = 0 : i64} : (tensor<4x6xf32>, tensor<4x6xf32>, tensor<4x6xf32>) -> tensor<3x4x6xf32> + // CHECK: %[[v17:.*]] = "tf.Pack"(%[[v14]], %[[v15]], %[[v16]]) {axis = 0 : i64} : (tensor<4x6xf32>, tensor<4x6xf32>, tensor<4x6xf32>) -> tensor<3x4x6xf32> + // CHECK: %[[v18:.*]] = "tf.Reshape"(%[[v17]], %[[cst_8]]) : (tensor<3x4x6xf32>, tensor<3xi64>) -> tensor<3x4x6xf32> - // CHECK: return %[[v17]] : tensor<3x4x6xf32> + // CHECK: return %[[v18]] : tensor<3x4x6xf32> } // ----- From 106827a6d7be9ea7379875ecced6011227b2c3f4 Mon Sep 17 00:00:00 2001 From: Jiho Choi Date: Mon, 13 Jul 2020 14:43:34 -0700 Subject: [PATCH 0316/2522] Add GroupMetadata which contains a name and a model id. PiperOrigin-RevId: 321037503 Change-Id: I85d10d284fe65b82cc989a21b279bc6182fec728 --- .../core/profiler/lib/profiler_session.cc | 6 +- .../core/profiler/utils/derived_timeline.cc | 23 ++++---- .../core/profiler/utils/derived_timeline.h | 8 +-- .../profiler/utils/derived_timeline_test.cc | 21 +++---- .../core/profiler/utils/group_events.cc | 29 +++++++--- tensorflow/core/profiler/utils/group_events.h | 18 ++++-- .../core/profiler/utils/group_events_test.cc | 56 +++++++++---------- 7 files changed, 93 insertions(+), 68 deletions(-) diff --git a/tensorflow/core/profiler/lib/profiler_session.cc b/tensorflow/core/profiler/lib/profiler_session.cc index f7d97711da0..90857ea8b51 100644 --- a/tensorflow/core/profiler/lib/profiler_session.cc +++ b/tensorflow/core/profiler/lib/profiler_session.cc @@ -110,10 +110,10 @@ Status ProfilerSession::CollectData(profiler::XSpace* space) { // 3. Sort each plane of the XSpace profiler::SortXSpace(space); // 4. Grouping (i.e. marking step number) events in the XSpace. - profiler::EventGroupNameMap event_group_name_map; - profiler::GroupTfEvents(space, &event_group_name_map); + profiler::GroupMetadataMap group_metadata_map; + profiler::GroupTfEvents(space, &group_metadata_map); // 5. Generated miscellaneous derived time lines for device planes. - profiler::GenerateDerivedTimeLines(event_group_name_map, space); + profiler::GenerateDerivedTimeLines(group_metadata_map, space); #endif return Status::OK(); diff --git a/tensorflow/core/profiler/utils/derived_timeline.cc b/tensorflow/core/profiler/utils/derived_timeline.cc index 42e0718f8b6..43d8305f93c 100644 --- a/tensorflow/core/profiler/utils/derived_timeline.cc +++ b/tensorflow/core/profiler/utils/derived_timeline.cc @@ -138,7 +138,7 @@ void DerivedXLineBuilder::ResetLastEvents(int level) { } void DeriveEventsFromAnnotations(const SymbolResolver& symbol_resolver, - const EventGroupNameMap& event_group_name_map, + const GroupMetadataMap& group_metadata_map, XPlane* device_trace, bool step_info_only) { // Merge and sort events by Timespan as they come from different lines. std::vector events; @@ -198,10 +198,11 @@ void DeriveEventsFromAnnotations(const SymbolResolver& symbol_resolver, XEvent step_event = CreateXEvent( *plane.GetOrCreateEventMetadata(absl::StrCat(*group_id)), offset_ps, duration_ps, group_id_stat_metadata_id, group_id); - if (auto group_name = gtl::FindOrNull(event_group_name_map, *group_id)) { + if (auto group_metadata = + gtl::FindOrNull(group_metadata_map, *group_id)) { XStat* stat = step_event.add_stats(); stat->set_metadata_id(step_name_stat_metadata_id); - stat->set_str_value(*group_name); + stat->set_str_value(group_metadata->name); } steps.ExpandOrAddEvent(step_event); } @@ -242,7 +243,7 @@ void DeriveEventsFromAnnotations(const SymbolResolver& symbol_resolver, } void DeriveEventsFromHostTrace(const XPlane* host_trace, - const EventGroupNameMap& event_group_name_map, + const GroupMetadataMap& group_metadata_map, std::vector device_traces) { struct GroupLaunchInfo { // "Group" normally means step. Timespan timespan; @@ -311,10 +312,10 @@ void DeriveEventsFromHostTrace(const XPlane* host_trace, for (const auto& kv : per_device_launch_info[i]) { int64 group_id = kv.first; const GroupLaunchInfo& group_info = kv.second; - if (auto group_name = gtl::FindOrNull(event_group_name_map, group_id)) { + if (auto group_metadata = gtl::FindOrNull(group_metadata_map, group_id)) { XEventBuilder device_event = launch_line.AddEvent(*device_plane.GetOrCreateEventMetadata( - absl::StrCat("Launch Stats for ", *group_name))); + absl::StrCat("Launch Stats for ", group_metadata->name))); device_event.SetTimestampNs( host_plane_start + PicosToNanos(group_info.timespan.begin_ps())); device_event.SetDurationPs(group_info.timespan.duration_ps()); @@ -336,23 +337,23 @@ void DeriveEventsFromHostTrace(const XPlane* host_trace, } } -void GenerateDerivedTimeLines(const EventGroupNameMap& event_group_name_map, +void GenerateDerivedTimeLines(const GroupMetadataMap& group_metadata_map, XSpace* space, bool step_info_only) { for (XPlane& plane : *space->mutable_planes()) { // Derived timelines only generated for device traces. if (IsGpuPlaneName(plane.name())) { - DeriveEventsFromAnnotations(DummySymbolResolver, event_group_name_map, + DeriveEventsFromAnnotations(DummySymbolResolver, group_metadata_map, &plane, step_info_only); } } } -void GenerateDerivedTimeLines(const EventGroupNameMap& event_group_name_map, +void GenerateDerivedTimeLines(const GroupMetadataMap& group_metadata_map, const std::vector& device_traces, bool step_info_only) { for (XPlane* plane : device_traces) { - DeriveEventsFromAnnotations(DummySymbolResolver, event_group_name_map, - plane, step_info_only); + DeriveEventsFromAnnotations(DummySymbolResolver, group_metadata_map, plane, + step_info_only); } } diff --git a/tensorflow/core/profiler/utils/derived_timeline.h b/tensorflow/core/profiler/utils/derived_timeline.h index 92489399b8f..bf8280708fa 100644 --- a/tensorflow/core/profiler/utils/derived_timeline.h +++ b/tensorflow/core/profiler/utils/derived_timeline.h @@ -85,20 +85,20 @@ void ProcessTfOpEvent(absl::string_view tf_op_full_name, int64 offset_ps, // with the same value are merged into a single event except for XLA modules. // The device_trace is both input and output. void DeriveEventsFromAnnotations(const SymbolResolver& symbol_resolver, - const EventGroupNameMap& event_group_name_map, + const GroupMetadataMap& group_metadata_map, XPlane* device_trace, bool step_info_only = false); // Derives "Launch Activities Summary" line from host trace. void DeriveEventsFromHostTrace(const XPlane* host_trace, - const EventGroupNameMap& event_group_name_map, + const GroupMetadataMap& group_metadata_map, std::vector device_traces); // Loops through XPlanes of input XSpace, if it is "device" XPlane, generating // derived timelines for the plane by calling DeriveEventsFromAnnotations. -void GenerateDerivedTimeLines(const EventGroupNameMap& event_group_name_map, +void GenerateDerivedTimeLines(const GroupMetadataMap& group_metadata_map, XSpace* space, bool step_info_only = false); -void GenerateDerivedTimeLines(const EventGroupNameMap& event_group_name_map, +void GenerateDerivedTimeLines(const GroupMetadataMap& group_metadata_map, const std::vector& device_traces, bool step_info_only = false); diff --git a/tensorflow/core/profiler/utils/derived_timeline_test.cc b/tensorflow/core/profiler/utils/derived_timeline_test.cc index a75ba8ea085..5952382bd7f 100644 --- a/tensorflow/core/profiler/utils/derived_timeline_test.cc +++ b/tensorflow/core/profiler/utils/derived_timeline_test.cc @@ -33,8 +33,8 @@ namespace { TEST(DerivedTimelineTest, EmptySpaceTest) { XSpace space; - EventGroupNameMap event_group_name_map; - GenerateDerivedTimeLines(event_group_name_map, &space); + GroupMetadataMap group_metadata_map; + GenerateDerivedTimeLines(group_metadata_map, &space); EXPECT_EQ(space.planes_size(), 0); } @@ -43,7 +43,7 @@ TEST(DerivedTimelineTest, HloModuleNameTest) { const absl::string_view kHloModuleName = "hlo_module"; const absl::string_view kKernelDetails = "kernel_details"; XSpace space; - EventGroupNameMap event_group_name_map; + GroupMetadataMap group_metadata_map; XPlane* plane = GetOrCreateGpuXPlane(&space, /*device_ordinal=*/0); XPlaneBuilder plane_builder(plane); auto line_builder = plane_builder.GetOrCreateLine(0); @@ -53,7 +53,7 @@ TEST(DerivedTimelineTest, HloModuleNameTest) { CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300, {{StatType::kHloModule, kHloModuleName}, {StatType::kKernelDetails, kKernelDetails}}); - GenerateDerivedTimeLines(event_group_name_map, &space); + GenerateDerivedTimeLines(group_metadata_map, &space); XPlaneVisitor plane_visitor = CreateTfXPlaneVisitor(plane); // Only the hlo module line is added and other empty lines are removed at the // end. @@ -73,7 +73,7 @@ TEST(DerivedTimelineTest, TfOpLineTest) { const absl::string_view kTfOpName = "mul:Mul"; const absl::string_view kKernelDetails = "kernel_details"; XSpace space; - EventGroupNameMap event_group_name_map; + GroupMetadataMap group_metadata_map; XPlane* plane = GetOrCreateGpuXPlane(&space, /*device_ordinal=*/0); XPlaneBuilder plane_builder(plane); auto line_builder = plane_builder.GetOrCreateLine(0); @@ -83,7 +83,7 @@ TEST(DerivedTimelineTest, TfOpLineTest) { CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300, {{StatType::kLevel0, kTfOpName}, {StatType::kKernelDetails, kKernelDetails}}); - GenerateDerivedTimeLines(event_group_name_map, &space); + GenerateDerivedTimeLines(group_metadata_map, &space); XPlaneVisitor plane_visitor = CreateTfXPlaneVisitor(plane); // Only the tf op line is added and other empty lines are removed at the end. EXPECT_EQ(plane_visitor.NumLines(), 2); @@ -108,7 +108,8 @@ TEST(DerivedTimelineTest, DependencyTest) { const absl::string_view kTfOpName = "mul:Mul"; const absl::string_view kKernelDetails = "kernel_details"; XSpace space; - EventGroupNameMap event_group_name_map({{0, "train 0"}, {1, "train 1"}}); + GroupMetadataMap group_metadata_map( + {{0, {"train 0", ""}}, {1, {"train 1", ""}}}); XPlane* plane = GetOrCreateGpuXPlane(&space, /*device_ordinal=*/0); XPlaneBuilder plane_builder(plane); auto line_builder = plane_builder.GetOrCreateLine(0); @@ -120,7 +121,7 @@ TEST(DerivedTimelineTest, DependencyTest) { {{StatType::kGroupId, kSecondGroupId}, {StatType::kLevel0, kTfOpName}, {StatType::kKernelDetails, kKernelDetails}}); - GenerateDerivedTimeLines(event_group_name_map, &space); + GenerateDerivedTimeLines(group_metadata_map, &space); XPlaneVisitor plane_visitor = CreateTfXPlaneVisitor(plane); // The step line and the TF op line are added. EXPECT_EQ(plane_visitor.NumLines(), 3); @@ -137,7 +138,7 @@ TEST(DerivedTimelineTest, TfOpNameScopeTest) { const absl::string_view kTfOpName = "scope1/scope2/mul:Mul"; const absl::string_view kKernelDetails = "kernel_details"; XSpace space; - EventGroupNameMap event_group_name_map; + GroupMetadataMap group_metadata_map; XPlane* plane = GetOrCreateGpuXPlane(&space, /*device_ordinal=*/0); XPlaneBuilder plane_builder(plane); auto line_builder = plane_builder.GetOrCreateLine(0); @@ -147,7 +148,7 @@ TEST(DerivedTimelineTest, TfOpNameScopeTest) { CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300, {{StatType::kLevel0, kTfOpName}, {StatType::kKernelDetails, kKernelDetails}}); - GenerateDerivedTimeLines(event_group_name_map, &space); + GenerateDerivedTimeLines(group_metadata_map, &space); XPlaneVisitor plane_visitor = CreateTfXPlaneVisitor(plane); // The TF name scope line and the TF op line are added. EXPECT_EQ(plane_visitor.NumLines(), 3); diff --git a/tensorflow/core/profiler/utils/group_events.cc b/tensorflow/core/profiler/utils/group_events.cc index 926dfe65156..ffad67b3413 100644 --- a/tensorflow/core/profiler/utils/group_events.cc +++ b/tensorflow/core/profiler/utils/group_events.cc @@ -148,7 +148,7 @@ bool IsImplicitRootEvent(const XEventVisitor& event) { } void ProcessRootEvent(int64 group_id, EventNode* root_event, - EventGroupNameMap* event_group_name_map) { + GroupMetadataMap* group_metadata_map) { root_event->PropagateGroupId(group_id); std::string group_name = root_event->GetGroupName(); // TODO(jihochoi): change event name instead. @@ -158,7 +158,7 @@ void ProcessRootEvent(int64 group_id, EventNode* root_event, // `step_name` stat's value if present. root_event->AddStepName(group_name); } - event_group_name_map->emplace(group_id, std::move(group_name)); + (*group_metadata_map)[group_id].name = std::move(group_name); } bool IsTfDataEvent(const EventNode& event_node) { @@ -502,7 +502,7 @@ void EventForest::CreateEventGroup() { if (!tf_loop_root_events_.empty()) { // If a TF loop is used, each TF loop iteration becomes a root. for (EventNode* root_event : tf_loop_root_events_) { - ProcessRootEvent(next_group_id_++, root_event, &event_group_name_map_); + ProcessRootEvent(next_group_id_++, root_event, &group_metadata_map_); } return; } @@ -510,7 +510,7 @@ void EventForest::CreateEventGroup() { SortEventList(&root_events_); for (EventNode* root_event : root_events_) { if (IsTopRoot(root_event)) { - ProcessRootEvent(next_group_id_++, root_event, &event_group_name_map_); + ProcessRootEvent(next_group_id_++, root_event, &group_metadata_map_); } } } @@ -618,6 +618,20 @@ void EventForest::ProcessWorker() { } } +void EventForest::ProcessModelIds() { + auto session_run_event_list = + gtl::FindOrNull(event_node_map_, HostEventType::kSessionRun); + if (!session_run_event_list) return; + for (const auto& session_run_event : *session_run_event_list) { + auto group_id = session_run_event->GetGroupId(); + if (!group_id.has_value()) continue; + absl::optional model_id = + session_run_event->GetEventVisitor().GetStat(StatType::kModelId); + if (!model_id.has_value()) continue; + group_metadata_map_[*group_id].model_id = model_id->ToString(); + } +} + EventForest::EventForest( const std::vector& connect_info_list, const std::vector& root_event_types, @@ -638,6 +652,7 @@ EventForest::EventForest( CreateEventGroup(); MarkEagerlyExecutedGpuKernels(); MarkEagerlyExecutedCpuTfOps(); + ProcessModelIds(); } std::vector CreateInterThreadConnectInfoList() { @@ -654,13 +669,13 @@ std::vector CreateInterThreadConnectInfoList() { return connect_info_list; } -void GroupTfEvents(XSpace* space, EventGroupNameMap* event_group_name_map) { +void GroupTfEvents(XSpace* space, GroupMetadataMap* group_metadata_map) { if (!space) return; std::vector connect_info_list = CreateInterThreadConnectInfoList(); EventForest event_forest(connect_info_list, {}, CreateTfXPlaneVisitor, space); - if (event_group_name_map) { - *event_group_name_map = event_forest.GetEventGroupNameMap(); + if (group_metadata_map) { + *group_metadata_map = event_forest.GetGroupMetadataMap(); } } diff --git a/tensorflow/core/profiler/utils/group_events.h b/tensorflow/core/profiler/utils/group_events.h index 568ebff6577..c3c156a85a5 100644 --- a/tensorflow/core/profiler/utils/group_events.h +++ b/tensorflow/core/profiler/utils/group_events.h @@ -126,7 +126,12 @@ using EventNodeMap = absl::flat_hash_map>>; -using EventGroupNameMap = absl::flat_hash_map; +struct GroupMetadata { + std::string name; + std::string model_id; // inference only. +}; + +using GroupMetadataMap = absl::flat_hash_map; using EventList = std::vector; @@ -153,8 +158,8 @@ class EventForest { const EventNodeMap& GetEventNodeMap() const { return event_node_map_; } - const EventGroupNameMap& GetEventGroupNameMap() const { - return event_group_name_map_; + const GroupMetadataMap& GetGroupMetadataMap() const { + return group_metadata_map_; } private: @@ -190,9 +195,12 @@ class EventForest { // eager ops (e.g., for Keras callback). void ProcessWorker(); + // Adds model ids to group_metadata_map_ for inference profiles. + void ProcessModelIds(); + EventNodeMap event_node_map_; std::vector visitors_; - EventGroupNameMap event_group_name_map_; + GroupMetadataMap group_metadata_map_; EventList root_events_; EventList tf_loop_root_events_; int64 next_group_id_ = 0; @@ -202,7 +210,7 @@ std::vector CreateInterThreadConnectInfoList(); // Calls GroupEvents with connect_info_list and root_event_types specific to // TensorFlow. -void GroupTfEvents(XSpace* space, EventGroupNameMap* event_group_name_map); +void GroupTfEvents(XSpace* space, GroupMetadataMap* group_metadata_map); } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/utils/group_events_test.cc b/tensorflow/core/profiler/utils/group_events_test.cc index d029dc6a03a..77dfb6cb7f0 100644 --- a/tensorflow/core/profiler/utils/group_events_test.cc +++ b/tensorflow/core/profiler/utils/group_events_test.cc @@ -61,15 +61,15 @@ TEST(GroupEventsTest, GroupGpuTraceLegacyRootTest) { CreateXEvent(&device_plane_builder, &stream, "matmul", 200, 300, {{StatType::kCorrelationId, kCorrelationId}}); - EventGroupNameMap event_group_name_map; - GroupTfEvents(&space, &event_group_name_map); + GroupMetadataMap group_metadata_map; + GroupTfEvents(&space, &group_metadata_map); XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane); EXPECT_EQ(device_plane->lines(0).events(0).stats_size(), 3); EXPECT_EQ(device_plane_visitor.GetStatType( device_plane->lines(0).events(0).stats(1)), StatType::kGroupId); - EXPECT_EQ(event_group_name_map.size(), 1); - EXPECT_EQ(event_group_name_map[0], "train 123"); + EXPECT_EQ(group_metadata_map.size(), 1); + EXPECT_EQ(group_metadata_map[0].name, "train 123"); } TEST(GroupEventsTest, GroupGpuTraceTest) { @@ -102,15 +102,15 @@ TEST(GroupEventsTest, GroupGpuTraceTest) { CreateXEvent(&device_plane_builder, &stream, "matmul", 200, 300, {{StatType::kCorrelationId, kCorrelationId}}); - EventGroupNameMap event_group_name_map; - GroupTfEvents(&space, &event_group_name_map); + GroupMetadataMap group_metadata_map; + GroupTfEvents(&space, &group_metadata_map); XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane); EXPECT_EQ(device_plane->lines(0).events(0).stats_size(), 3); EXPECT_EQ(device_plane_visitor.GetStatType( device_plane->lines(0).events(0).stats(1)), StatType::kGroupId); - EXPECT_EQ(event_group_name_map.size(), 1); - EXPECT_EQ(event_group_name_map[0], "train 123"); + EXPECT_EQ(group_metadata_map.size(), 1); + EXPECT_EQ(group_metadata_map[0].name, "train 123"); } TEST(GroupEventsTest, GroupTensorFlowLoopTest) { @@ -140,16 +140,16 @@ TEST(GroupEventsTest, GroupTensorFlowLoopTest) { CreateXEvent(&device_plane_builder, &stream, "matmul", 200, 300, {{StatType::kCorrelationId, kCorrelationId}}); - EventGroupNameMap event_group_name_map; - GroupTfEvents(&space, &event_group_name_map); + GroupMetadataMap group_metadata_map; + GroupTfEvents(&space, &group_metadata_map); XPlaneVisitor device_plane_visitor = CreateTfXPlaneVisitor(device_plane); EXPECT_EQ(device_plane->lines(0).events(0).stats_size(), 3); EXPECT_EQ(device_plane_visitor.GetStatType( device_plane->lines(0).events(0).stats(1)), StatType::kGroupId); EXPECT_EQ(device_plane->lines(0).events(0).stats(1).int64_value(), 10); - EXPECT_EQ(event_group_name_map.size(), 1); - EXPECT_EQ(event_group_name_map[10], "10"); + EXPECT_EQ(group_metadata_map.size(), 1); + EXPECT_EQ(group_metadata_map[10].name, "10"); } // When there are multiple TF loops, group_id is assigned in the order of TF @@ -187,13 +187,13 @@ TEST(GroupEventsTest, GroupMultipleTensorFlowLoopsTest) { {{StatType::kStepId, kFirstStepId}, {StatType::kIterNum, kFirstIterNumStart + 1}}); - EventGroupNameMap event_group_name_map; - GroupTfEvents(&space, &event_group_name_map); - EXPECT_EQ(event_group_name_map.size(), 4); - EXPECT_TRUE(event_group_name_map.count(10)); - EXPECT_TRUE(event_group_name_map.count(11)); - EXPECT_TRUE(event_group_name_map.count(12)); - EXPECT_TRUE(event_group_name_map.count(13)); + GroupMetadataMap group_metadata_map; + GroupTfEvents(&space, &group_metadata_map); + EXPECT_EQ(group_metadata_map.size(), 4); + EXPECT_TRUE(group_metadata_map.count(10)); + EXPECT_TRUE(group_metadata_map.count(11)); + EXPECT_TRUE(group_metadata_map.count(12)); + EXPECT_TRUE(group_metadata_map.count(13)); } TEST(GroupEventsTest, GroupFunctionalOp) { @@ -223,8 +223,8 @@ TEST(GroupEventsTest, GroupFunctionalOp) { HostEventType::kExecutorStateProcess, 100, 150, {{StatType::kStepId, kFunctionStepId}}); - EventGroupNameMap event_group_name_map; - GroupTfEvents(&space, &event_group_name_map); + GroupMetadataMap group_metadata_map; + GroupTfEvents(&space, &group_metadata_map); XPlaneVisitor host_plane_visitor = CreateTfXPlaneVisitor(host_plane); // Check that RemoteCallOp is grouped correctly so that all events belong // to the same group. @@ -271,7 +271,7 @@ TEST(GroupEventsTest, EagerOpTest) { CreateXEvent(&device_plane_builder, &stream, "matmul", 200, 300, {{StatType::kCorrelationId, kCorrelationId}}); - GroupTfEvents(&space, /*event_group_name_map=*/nullptr); + GroupTfEvents(&space, /*group_metadata_map=*/nullptr); XPlaneVisitor host_plane_visitor = CreateTfXPlaneVisitor(host_plane); const XEvent& eager_cpu_tf_op = host_plane->lines(0).events(3); EXPECT_EQ(eager_cpu_tf_op.stats_size(), 1); @@ -323,7 +323,7 @@ TEST(GroupEventsTest, FunctionOpTest) { CreateXEvent(&device_plane_builder, &stream, "matmul", 200, 300, {{StatType::kCorrelationId, kCorrelationId}}); - GroupTfEvents(&space, /*event_group_name_map=*/nullptr); + GroupTfEvents(&space, /*group_metadata_map=*/nullptr); XPlaneVisitor host_plane_visitor = CreateTfXPlaneVisitor(host_plane); const XEvent& cpu_tf_op = host_plane->lines(1).events(2); EXPECT_EQ(cpu_tf_op.stats_size(), 2); @@ -359,7 +359,7 @@ TEST(GroupEventsTest, SemanticArgTest) { {{StatType::kConsumerType, kContextType}, {StatType::kConsumerId, kContextId}}); - GroupTfEvents(&raw_space, /*event_group_name_map=*/nullptr); + GroupTfEvents(&raw_space, /*group_metadata_map=*/nullptr); int num_events = 0; CreateTfXPlaneVisitor(raw_plane).ForEachLine( [&](const tensorflow::profiler::XLineVisitor& line) { @@ -400,7 +400,7 @@ TEST(GroupEventsTest, SemanticIntArgNoMatchTest) { {{StatType::kConsumerType, kContextType}, {StatType::kConsumerId, kConsumerId}}); - GroupTfEvents(&raw_space, /*event_group_name_map=*/nullptr); + GroupTfEvents(&raw_space, /*group_metadata_map=*/nullptr); int num_events = 0; CreateTfXPlaneVisitor(raw_plane).ForEachLine( [&](const tensorflow::profiler::XLineVisitor& line) { @@ -445,7 +445,7 @@ TEST(GroupEventsTest, SemanticUintArgNoMatchTest) { {{StatType::kConsumerType, kContextType}, {StatType::kConsumerId, kConsumerId}}); - GroupTfEvents(&raw_space, /*event_group_name_map=*/nullptr); + GroupTfEvents(&raw_space, /*group_metadata_map=*/nullptr); int num_events = 0; CreateTfXPlaneVisitor(raw_plane).ForEachLine( [&](const tensorflow::profiler::XLineVisitor& line) { @@ -485,7 +485,7 @@ TEST(GroupEventsTest, AsyncEventTest) { {{StatType::kIsAsync, kIsAsync}}); CreateXEvent(&plane, &line, kChild, 20, 80); - GroupTfEvents(&raw_space, /*event_group_name_map=*/nullptr); + GroupTfEvents(&raw_space, /*group_metadata_map=*/nullptr); CreateTfXPlaneVisitor(raw_plane).ForEachLine( [&](const tensorflow::profiler::XLineVisitor& line) { EXPECT_EQ(line.NumEvents(), 3); @@ -538,7 +538,7 @@ TEST(GroupEventsTest, WorkerTest) { CreateXEvent(&plane, &line, HostEventType::kFunctionRun, kSecondFunctionRunStartTime, kFunctionRunDuration); - GroupTfEvents(&raw_space, /*event_group_name_map=*/nullptr); + GroupTfEvents(&raw_space, /*group_metadata_map=*/nullptr); CreateTfXPlaneVisitor(raw_plane).ForEachLine( [&](const tensorflow::profiler::XLineVisitor& line) { EXPECT_EQ(line.NumEvents(), 6); From a53b84a7b6b49a8e2fcd7a333a11d8ef19feba5f Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Mon, 13 Jul 2020 15:07:59 -0700 Subject: [PATCH 0317/2522] Increase timeout of //third_party/tensorflow/python/data/kernel_tests:shuffle_test PiperOrigin-RevId: 321042534 Change-Id: I8012062cb200a04bb4ebd8387dca6e390145f5b2 --- tensorflow/python/data/kernel_tests/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD index eaee1184ff4..ecfb6668909 100644 --- a/tensorflow/python/data/kernel_tests/BUILD +++ b/tensorflow/python/data/kernel_tests/BUILD @@ -648,7 +648,7 @@ tf_py_test( tf_py_test( name = "shuffle_test", - size = "small", + size = "medium", srcs = ["shuffle_test.py"], deps = [ ":test_base", From 7d202aecec0ebfa3b448c30821422740fe8c01b8 Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Mon, 13 Jul 2020 15:26:50 -0700 Subject: [PATCH 0318/2522] [XLA/GPU] Remove Thunk::hlo_instruction(). Some subclasses still keep it separately, but at least about half of them don't depend on it whatsoever. Also, instead of doing: emitter -> thunk -> annotate with hlo -> thunk with profile annotation just do emitter -> thunk with profile annotation PiperOrigin-RevId: 321045989 Change-Id: I244b75474b9f498013675adb401bdacfadd3b3ac --- .../service/gpu/collective_permute_thunk.cc | 7 +++- .../service/gpu/collective_permute_thunk.h | 1 + .../xla/service/gpu/conditional_thunk.cc | 14 ++----- .../xla/service/gpu/conditional_thunk.h | 2 +- .../xla/service/gpu/convolution_thunk.cc | 5 +-- .../xla/service/gpu/cudnn_batchnorm_thunk.cc | 15 +++++--- .../xla/service/gpu/cudnn_batchnorm_thunk.h | 3 ++ .../xla/service/gpu/custom_call_thunk.cc | 3 +- .../xla/service/gpu/custom_call_thunk.h | 1 + .../compiler/xla/service/gpu/for_thunk.cc | 10 ++--- .../compiler/xla/service/gpu/for_thunk.h | 2 +- .../compiler/xla/service/gpu/gemm_thunk.cc | 5 ++- .../compiler/xla/service/gpu/gemm_thunk.h | 1 + .../compiler/xla/service/gpu/gpu_compiler.cc | 4 +- .../xla/service/gpu/gpu_executable.cc | 11 +----- .../compiler/xla/service/gpu/gpu_executable.h | 3 -- .../compiler/xla/service/gpu/infeed_thunk.cc | 6 ++- .../compiler/xla/service/gpu/infeed_thunk.h | 1 + .../xla/service/gpu/nccl_all_reduce_thunk.cc | 12 +++--- .../xla/service/gpu/nccl_all_reduce_thunk.h | 1 + .../compiler/xla/service/gpu/outfeed_thunk.cc | 9 +++-- .../compiler/xla/service/gpu/outfeed_thunk.h | 1 + .../xla/service/gpu/sequential_thunk.cc | 6 --- .../xla/service/gpu/sequential_thunk.h | 1 - tensorflow/compiler/xla/service/gpu/thunk.h | 37 ++++--------------- .../compiler/xla/service/gpu/thunk_emitter.cc | 2 + .../compiler/xla/service/gpu/thunk_schedule.h | 4 +- .../compiler/xla/service/gpu/while_thunk.cc | 11 ++---- .../compiler/xla/service/gpu/while_thunk.h | 2 +- 29 files changed, 73 insertions(+), 107 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/collective_permute_thunk.cc b/tensorflow/compiler/xla/service/gpu/collective_permute_thunk.cc index bb76bf02eba..b3b5cf7e048 100644 --- a/tensorflow/compiler/xla/service/gpu/collective_permute_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/collective_permute_thunk.cc @@ -220,10 +220,13 @@ RefcountingHashMap& GlobalRendezvousMap() { CollectivePermuteThunk::CollectivePermuteThunk( ThunkInfo thunk_info, const BufferAllocation::Slice& src, const BufferAllocation::Slice& dest) - : Thunk(kCollectivePermute, thunk_info), src_(src), dest_(dest) {} + : Thunk(kCollectivePermute, thunk_info), + hlo_instruction_(thunk_info.hlo_instruction), + src_(src), + dest_(dest) {} Status CollectivePermuteThunk::ExecuteOnStream(const ExecuteParams& params) { - auto* instr = Cast(hlo_instruction()); + auto* instr = Cast(hlo_instruction_); auto op_profiler = params.profiler->MakeScopedInstructionProfiler(profile_index()); diff --git a/tensorflow/compiler/xla/service/gpu/collective_permute_thunk.h b/tensorflow/compiler/xla/service/gpu/collective_permute_thunk.h index 329db00c66a..44cc6a1c64e 100644 --- a/tensorflow/compiler/xla/service/gpu/collective_permute_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/collective_permute_thunk.h @@ -33,6 +33,7 @@ class CollectivePermuteThunk : public Thunk { Status ExecuteOnStream(const ExecuteParams& params) override; private: + const HloInstruction* hlo_instruction_; BufferAllocation::Slice src_; BufferAllocation::Slice dest_; }; diff --git a/tensorflow/compiler/xla/service/gpu/conditional_thunk.cc b/tensorflow/compiler/xla/service/gpu/conditional_thunk.cc index 041aa9b6fa3..4cff48a89da 100644 --- a/tensorflow/compiler/xla/service/gpu/conditional_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/conditional_thunk.cc @@ -29,6 +29,7 @@ ConditionalThunk::ConditionalThunk( absl::Span branch_operand_buffer_indexes, std::vector branch_thunk_sequences) : Thunk(Kind::kConditional, thunk_info), + hlo_instruction_(thunk_info.hlo_instruction), branch_index_is_bool_( thunk_info.hlo_instruction->operand(0)->shape().element_type() == PRED), @@ -45,13 +46,6 @@ ConditionalThunk::ConditionalThunk( } } -void ConditionalThunk::ComputeAnnotations() { - Thunk::ComputeAnnotations(); - for (auto& branch_thunk : branch_thunks_) { - branch_thunk->ComputeAnnotations(); - } -} - Status ConditionalThunk::Initialize(const GpuExecutable& executable, se::StreamExecutor* executor) { if (branch_index_is_bool_) { @@ -91,8 +85,8 @@ Status ConditionalThunk::ExecuteOnStream(const ExecuteParams& params) { branch_index = pred ? 0 : 1; } else { // Handle default scenario for branch_index not in [0, num_branches). - if (branch_index < 0 || branch_index >= hlo_instruction()->branch_count()) { - branch_index = hlo_instruction()->branch_count() - 1; + if (branch_index < 0 || branch_index >= hlo_instruction_->branch_count()) { + branch_index = hlo_instruction_->branch_count() - 1; } } @@ -100,7 +94,7 @@ Status ConditionalThunk::ExecuteOnStream(const ExecuteParams& params) { profiler.StartHloComputation(); TF_RETURN_IF_ERROR(branch_thunks_[branch_index]->ExecuteOnStream(params)); profiler.FinishHloComputation( - hlo_instruction()->branch_computation(branch_index)); + hlo_instruction_->branch_computation(branch_index)); return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/gpu/conditional_thunk.h b/tensorflow/compiler/xla/service/gpu/conditional_thunk.h index a00285efa7c..f91f1c52146 100644 --- a/tensorflow/compiler/xla/service/gpu/conditional_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/conditional_thunk.h @@ -51,12 +51,12 @@ class ConditionalThunk : public Thunk { ConditionalThunk(const ConditionalThunk&) = delete; ConditionalThunk& operator=(const ConditionalThunk&) = delete; - void ComputeAnnotations() override; Status Initialize(const GpuExecutable& executable, se::StreamExecutor* executor) override; Status ExecuteOnStream(const ExecuteParams& params) override; private: + const HloInstruction* hlo_instruction_; const bool branch_index_is_bool_; BufferAllocation::Slice branch_index_buffer_index_; std::vector branch_operand_buffer_indexes_; diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc index df3dd6d4593..3048db95c39 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc @@ -35,12 +35,11 @@ ConvolutionThunk::ConvolutionThunk( BufferAllocation::Slice result_slice, BufferAllocation::Slice scratch_slice, BufferAllocation::Slice tuple_result_slice) : Thunk(Kind::kConvolution, thunk_info), + cudnn_call_(Cast(thunk_info.hlo_instruction)), operand_buffers_(std::move(operand_slices)), result_buffer_(result_slice), scratch_buffer_(scratch_slice), - tuple_result_buffer_(tuple_result_slice) { - cudnn_call_ = Cast(hlo_instruction()); -} + tuple_result_buffer_(tuple_result_slice) {} Status ConvolutionThunk::ExecuteOnStream(const ExecuteParams& params) { const auto& buffer_allocations = *params.buffer_allocations; diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.cc b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.cc index 36f415d9d89..e91b2c4d0d2 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.cc @@ -98,6 +98,7 @@ CudnnBatchNormForwardInferenceThunk::CudnnBatchNormForwardInferenceThunk( const BufferAllocation::Slice& variance, float epsilon, int64 feature_index, const BufferAllocation::Slice& output) : Thunk(Thunk::Kind::kCudnnBatchNormForwardInference, thunk_info), + hlo_instruction_(thunk_info.hlo_instruction), operand_(operand), scale_(scale), offset_(offset), @@ -106,7 +107,7 @@ CudnnBatchNormForwardInferenceThunk::CudnnBatchNormForwardInferenceThunk( epsilon_(epsilon), feature_index_(feature_index), output_(output) { - const auto* hlo = hlo_instruction(); + const auto* hlo = hlo_instruction_; CHECK_EQ(hlo->opcode(), HloOpcode::kCustomCall); CHECK_EQ(hlo->custom_call_target(), kCudnnBatchNormForwardInferenceCallTarget); @@ -130,7 +131,7 @@ Status CudnnBatchNormForwardInferenceThunk::ExecuteOnStream( buffer_allocations.GetDeviceAddress(variance_)); auto& stream = *params.stream; TF_RETURN_IF_ERROR(RunCudnnBatchNormForwardInference( - hlo_instruction(), operand, output_base, scale, offset, mean, variance, + hlo_instruction_, operand, output_base, scale, offset, mean, variance, epsilon_, feature_index_, &stream)); if (!stream.ok()) { @@ -148,6 +149,7 @@ CudnnBatchNormForwardTrainingThunk::CudnnBatchNormForwardTrainingThunk( const BufferAllocation::Slice& output_inv_stddev, const BufferAllocation::Slice& output_tuple) : Thunk(Thunk::Kind::kCudnnBatchNormForwardTraining, thunk_info), + hlo_instruction_(thunk_info.hlo_instruction), operand_(operand), scale_(scale), offset_(offset), @@ -157,7 +159,7 @@ CudnnBatchNormForwardTrainingThunk::CudnnBatchNormForwardTrainingThunk( output_mean_(output_mean), output_inv_stddev_(output_inv_stddev), output_tuple_(output_tuple) { - const auto* hlo = hlo_instruction(); + const auto* hlo = hlo_instruction_; CHECK_EQ(hlo->opcode(), HloOpcode::kCustomCall); CHECK_EQ(hlo->custom_call_target(), kCudnnBatchNormForwardTrainingCallTarget); CHECK_EQ(hlo->shape().tuple_shapes_size(), 3); @@ -183,7 +185,7 @@ Status CudnnBatchNormForwardTrainingThunk::ExecuteOnStream( params.profiler->MakeScopedInstructionProfiler(profile_index()); auto& stream = *params.stream; TF_RETURN_IF_ERROR(RunCudnnBatchNormForwardTraining( - hlo_instruction(), operand, output_data, output_mean, output_inv_stddev, + hlo_instruction_, operand, output_data, output_mean, output_inv_stddev, se::DeviceMemory(buffer_allocations.GetDeviceAddress(scale_)), se::DeviceMemory(buffer_allocations.GetDeviceAddress(offset_)), epsilon_, feature_index_, &stream)); @@ -214,6 +216,7 @@ CudnnBatchNormBackwardThunk::CudnnBatchNormBackwardThunk( const BufferAllocation::Slice& output_grad_offset, const BufferAllocation::Slice& output_tuple) : Thunk(Thunk::Kind::kCudnnBatchNormBackward, thunk_info), + hlo_instruction_(thunk_info.hlo_instruction), operand_(operand), scale_(scale), mean_(mean), @@ -225,7 +228,7 @@ CudnnBatchNormBackwardThunk::CudnnBatchNormBackwardThunk( output_grad_scale_(output_grad_scale), output_grad_offset_(output_grad_offset), output_tuple_(output_tuple) { - const auto* hlo = hlo_instruction(); + const auto* hlo = hlo_instruction_; CHECK_EQ(hlo->opcode(), HloOpcode::kCustomCall); CHECK_EQ(hlo->custom_call_target(), kCudnnBatchNormBackwardCallTarget); CHECK_EQ(hlo->shape().tuple_shapes_size(), 3); @@ -253,7 +256,7 @@ Status CudnnBatchNormBackwardThunk::ExecuteOnStream( params.profiler->MakeScopedInstructionProfiler(profile_index()); se::Stream* stream = params.stream; TF_RETURN_IF_ERROR(RunCudnnBatchNormBackward( - hlo_instruction(), operand, output_grad_data, grad_output, + hlo_instruction_, operand, output_grad_data, grad_output, output_grad_scale, output_grad_offset, se::DeviceMemory(buffer_allocations.GetDeviceAddress(scale_)), se::DeviceMemory(buffer_allocations.GetDeviceAddress(mean_)), diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.h b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.h index 5897435a58f..bb46017b8fb 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.h @@ -63,6 +63,7 @@ class CudnnBatchNormForwardInferenceThunk : public Thunk { Status ExecuteOnStream(const ExecuteParams& params) override; private: + const HloInstruction* hlo_instruction_; BufferAllocation::Slice operand_; BufferAllocation::Slice scale_; BufferAllocation::Slice offset_; @@ -92,6 +93,7 @@ class CudnnBatchNormForwardTrainingThunk : public Thunk { Status ExecuteOnStream(const ExecuteParams& params) override; private: + const HloInstruction* hlo_instruction_; BufferAllocation::Slice operand_; BufferAllocation::Slice scale_; BufferAllocation::Slice offset_; @@ -124,6 +126,7 @@ class CudnnBatchNormBackwardThunk : public Thunk { Status ExecuteOnStream(const ExecuteParams& params) override; private: + const HloInstruction* hlo_instruction_; BufferAllocation::Slice operand_; BufferAllocation::Slice scale_; BufferAllocation::Slice mean_; diff --git a/tensorflow/compiler/xla/service/gpu/custom_call_thunk.cc b/tensorflow/compiler/xla/service/gpu/custom_call_thunk.cc index 16a1f923c91..dae15659402 100644 --- a/tensorflow/compiler/xla/service/gpu/custom_call_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/custom_call_thunk.cc @@ -26,11 +26,12 @@ CustomCallThunk::CustomCallThunk( std::vector> operand_slices, ShapeTree result_slices, std::string opaque) : Thunk(Thunk::kCustomCall, thunk_info), + hlo_instruction_(thunk_info.hlo_instruction), call_target_(call_target), operand_slices_(std::move(operand_slices)), result_slices_(std::move(result_slices)), opaque_(std::move(opaque)) { - const HloInstruction* instr = hlo_instruction(); + const HloInstruction* instr = hlo_instruction_; CHECK_EQ(instr->operand_count(), operand_slices_.size()); for (int64 i = 0; i < instr->operand_count(); ++i) { const auto& s1 = operand_slices_[i].shape(); diff --git a/tensorflow/compiler/xla/service/gpu/custom_call_thunk.h b/tensorflow/compiler/xla/service/gpu/custom_call_thunk.h index 72175daf3dd..31c03f5252f 100644 --- a/tensorflow/compiler/xla/service/gpu/custom_call_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/custom_call_thunk.h @@ -46,6 +46,7 @@ class CustomCallThunk : public Thunk { Status ExecuteOnStream(const ExecuteParams& params) override; private: + const HloInstruction* hlo_instruction_; void* call_target_; std::vector> operand_slices_; ShapeTree result_slices_; diff --git a/tensorflow/compiler/xla/service/gpu/for_thunk.cc b/tensorflow/compiler/xla/service/gpu/for_thunk.cc index 7fc3bdd4436..ccd661d8ade 100644 --- a/tensorflow/compiler/xla/service/gpu/for_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/for_thunk.cc @@ -26,6 +26,7 @@ namespace gpu { ForThunk::ForThunk(ThunkInfo thunk_info, const int64 loop_limit, std::unique_ptr body_thunk_sequence) : Thunk(Kind::kWhile, thunk_info), + hlo_instruction_(thunk_info.hlo_instruction), loop_limit_(loop_limit), body_thunk_sequence_(absl::make_unique( // Pass nullptr as the HloInstruction* to the body_thunk_sequence_ @@ -33,11 +34,6 @@ ForThunk::ForThunk(ThunkInfo thunk_info, const int64 loop_limit, // this ForThunk, and shouldn't be profiled separately from it. ThunkInfo(), std::move(*body_thunk_sequence))) {} -void ForThunk::ComputeAnnotations() { - Thunk::ComputeAnnotations(); - body_thunk_sequence_->ComputeAnnotations(); -} - Status ForThunk::Initialize(const GpuExecutable& executable, se::StreamExecutor* executor) { TF_RETURN_IF_ERROR(body_thunk_sequence_->Initialize(executable, executor)); @@ -46,14 +42,14 @@ Status ForThunk::Initialize(const GpuExecutable& executable, Status ForThunk::ExecuteOnStream(const ExecuteParams& params) { VLOG(2) << "Executing ForThunk with " << loop_limit_ << " iters for " - << (hlo_instruction() ? hlo_instruction()->ToString() : ""); + << (hlo_instruction_ ? hlo_instruction_->ToString() : ""); auto op_profiler = params.profiler->MakeScopedInstructionProfiler(profile_index()); for (int64 i = 0; i < loop_limit_; ++i) { params.profiler->StartHloComputation(); // Invoke loop body thunk sequence. TF_RETURN_IF_ERROR(body_thunk_sequence_->ExecuteOnStream(params)); - params.profiler->FinishHloComputation(hlo_instruction()->while_body()); + params.profiler->FinishHloComputation(hlo_instruction_->while_body()); } return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/gpu/for_thunk.h b/tensorflow/compiler/xla/service/gpu/for_thunk.h index 77a89ea6023..b6ee950737e 100644 --- a/tensorflow/compiler/xla/service/gpu/for_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/for_thunk.h @@ -36,12 +36,12 @@ class ForThunk : public Thunk { ForThunk(const ForThunk&) = delete; ForThunk& operator=(const ForThunk&) = delete; - void ComputeAnnotations() override; Status Initialize(const GpuExecutable& executable, se::StreamExecutor* executor) override; Status ExecuteOnStream(const ExecuteParams& params) override; private: + const HloInstruction* hlo_instruction_; const int64 loop_limit_; std::unique_ptr body_thunk_sequence_; }; diff --git a/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc b/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc index 561dfbe3137..35dad1c84ac 100644 --- a/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc @@ -40,6 +40,7 @@ GemmThunk::GemmThunk(ThunkInfo thunk_info, bool implements_whole_instruction, const GemmBackendConfig &backend_config) : Thunk(Kind::kGemm, thunk_info), + hlo_instruction_(thunk_info.hlo_instruction), lhs_buffer_(lhs_buffer), rhs_buffer_(rhs_buffer), output_buffer_(output_buffer), @@ -51,11 +52,11 @@ Status GemmThunk::ExecuteOnStream(const ExecuteParams ¶ms) { return params.buffer_allocations->GetDeviceAddress(slice); }; - VLOG(3) << "Running GEMM thunk on instruction: " << hlo_instruction(); + VLOG(3) << "Running GEMM thunk on instruction: " << hlo_instruction_; se::DeviceMemoryBase lhs_data = get_device_address(lhs_buffer_); se::DeviceMemoryBase rhs_data = get_device_address(rhs_buffer_); se::DeviceMemoryBase output_data = get_device_address(output_buffer_); - return RunGemm(hlo_instruction(), backend_config_, lhs_data, rhs_data, + return RunGemm(hlo_instruction_, backend_config_, lhs_data, rhs_data, output_data, params.stream, implements_whole_instruction_, profile_index(), params.profiler); } diff --git a/tensorflow/compiler/xla/service/gpu/gemm_thunk.h b/tensorflow/compiler/xla/service/gpu/gemm_thunk.h index 2bccb7b3572..1a51a7d4e0c 100644 --- a/tensorflow/compiler/xla/service/gpu/gemm_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/gemm_thunk.h @@ -51,6 +51,7 @@ class GemmThunk : public Thunk { Status ExecuteOnStream(const ExecuteParams& params) override; private: + const HloInstruction* hlo_instruction_; const BufferAllocation::Slice lhs_buffer_; const BufferAllocation::Slice rhs_buffer_; const BufferAllocation::Slice output_buffer_; diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 3dcdb4c90eb..3050e794f10 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -537,10 +537,10 @@ static Status CompileModuleToLlvmIrImpl( // computation. // * For each visit of these HloInstructions, either none or one Thunk // will be returned. - // * If there is a thunk returned, thunk->hlo_instruction() equals the + // * If there is a thunk returned, thunk->hlo_instruction_ equals the // input HloInstruction*. // * A returned thunk may contain other sub-thunks. A sub-thunk may or may - // not have an associated hlo_instruction(). + // not have an associated hlo_instruction_. TF_RET_CHECK(thunks->size() <= 1) << instruction->ToString(); if (!thunks->empty()) { auto thunk = std::move(thunks->front()); diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc index 89c5e123a48..469f2919fba 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc @@ -71,7 +71,6 @@ GpuExecutable::GpuExecutable( CHECK(has_module() && assignment_); GpuDebugInfoManager::Get()->RegisterModule(module().name(), shared_module(), assignment_); - ComputeThunkAnnotations(); } GpuExecutable::~GpuExecutable() { @@ -93,12 +92,6 @@ GpuExecutable::~GpuExecutable() { } } -void GpuExecutable::ComputeThunkAnnotations() { - for (Thunk* thunk : thunk_schedule_->TotalOrder()) { - thunk->ComputeAnnotations(); - } -} - Status GpuExecutable::CheckCompatibilityWithServiceExecutableRunOptions( const ServiceExecutableRunOptions* run_options) { se::Stream* main_stream = run_options->stream(); @@ -186,8 +179,8 @@ Status GpuExecutable::ExecuteThunks( stream->ThenWaitFor(FindOrDie(thunk_to_finish_event, dependency).get()); } - VLOG(2) << "Executing the thunk for " << thunk->name() << " on stream " - << stream_no; + VLOG(2) << "Executing the thunk for " << thunk->profile_annotation() + << " on stream " << stream_no; const GpuExecutableRunOptions* gpu_options = run_options->run_options().gpu_executable_run_options(); Thunk::ExecuteParams thunk_params{ diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.h b/tensorflow/compiler/xla/service/gpu/gpu_executable.h index 0da446c9739..516fa9b269a 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.h @@ -115,9 +115,6 @@ class GpuExecutable : public Executable { StatusOr ResolveConstantGlobals( stream_executor::Stream* stream); - // Computes annotations for each thunk and store them in thunk_annotations_. - void ComputeThunkAnnotations(); - // GpuExecutable check with either AMD's ISA version, or Nvidia's major minor // version for compute capability, depending on the hardware. Status CheckCompatibilityWithServiceExecutableRunOptions( diff --git a/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc b/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc index 43cc5f5a2ae..5fe459a70bc 100644 --- a/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc @@ -25,13 +25,15 @@ namespace gpu { InfeedThunk::InfeedThunk( ThunkInfo thunk_info, const ShapeTree& infeed_slices) - : Thunk(Kind::kInfeed, thunk_info), infeed_slices_(infeed_slices) {} + : Thunk(Kind::kInfeed, thunk_info), + hlo_instruction_(thunk_info.hlo_instruction), + infeed_slices_(infeed_slices) {} Status InfeedThunk::ExecuteOnStream(const ExecuteParams& params) { auto& stream = *params.stream; auto& buffer_allocations = *params.buffer_allocations; - VLOG(2) << "Infeeding to GPU: " << hlo_instruction()->ToString(); + VLOG(2) << "Infeeding to GPU: " << hlo_instruction_->ToString(); auto op_profiler = params.profiler->MakeScopedInstructionProfiler(profile_index()); diff --git a/tensorflow/compiler/xla/service/gpu/infeed_thunk.h b/tensorflow/compiler/xla/service/gpu/infeed_thunk.h index ec33235c466..ab410661ba1 100644 --- a/tensorflow/compiler/xla/service/gpu/infeed_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/infeed_thunk.h @@ -43,6 +43,7 @@ class InfeedThunk : public Thunk { Status ExecuteOnStream(const ExecuteParams& params) override; private: + const HloInstruction* hlo_instruction_; const ShapeTree infeed_slices_; }; diff --git a/tensorflow/compiler/xla/service/gpu/nccl_all_reduce_thunk.cc b/tensorflow/compiler/xla/service/gpu/nccl_all_reduce_thunk.cc index 755413beeee..25ab9a7ce6e 100644 --- a/tensorflow/compiler/xla/service/gpu/nccl_all_reduce_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/nccl_all_reduce_thunk.cc @@ -544,10 +544,11 @@ NcclAllReduceThunk::NcclAllReduceThunk( ThunkInfo thunk_info, int64 replica_count, std::vector buffers) : Thunk(Thunk::kNcclAllReduce, thunk_info), + hlo_instruction_(thunk_info.hlo_instruction), replica_count_(replica_count), buffers_(std::move(buffers)), aux_data_(absl::make_unique()) { - CHECK_EQ(hlo_instruction()->operand_count(), buffers_.size()); + CHECK_EQ(hlo_instruction_->operand_count(), buffers_.size()); } // Figures out which devices (named by their replica-ids) are participating in @@ -557,7 +558,7 @@ Status NcclAllReduceThunk::ExecuteOnStream(const ExecuteParams& params) { auto op_profiler = params.profiler->MakeScopedInstructionProfiler(profile_index()); - auto* instr = Cast(hlo_instruction()); + auto* instr = Cast(hlo_instruction_); int64 local_device_ordinal = params.stream->parent()->device_ordinal(); GlobalDeviceId global_device_id; if (params.gpu_global_device_ids) { @@ -606,7 +607,7 @@ Status NcclAllReduceThunk::ExecuteOnStream(const ExecuteParams& params) { // Find or create the rendezvous for this collective operation. RendezvousKey rendezvous_key = RendezvousKey::FromInstruction( - params.run_id, global_devices, local_devices.size(), hlo_instruction()); + params.run_id, global_devices, local_devices.size(), hlo_instruction_); if (VLOG_IS_ON(2)) { std::vector local_participants; @@ -633,13 +634,12 @@ Status NcclAllReduceThunk::ExecuteOnStream(const ExecuteParams& params) { pbuffer.destination_data = params.buffer_allocations->GetDeviceAddress(buffer.destination_buffer); pbuffer.primitive_type = - hlo_instruction()->operand(i)->shape().element_type(); + hlo_instruction_->operand(i)->shape().element_type(); participant.buffers.push_back(pbuffer); } participant.local_devices = std::move(local_devices); participant.nccl_unique_id_callback = params.nccl_unique_id_callback; - auto reduction_kind = - MatchReductionComputation(hlo_instruction()->to_apply()); + auto reduction_kind = MatchReductionComputation(hlo_instruction_->to_apply()); CHECK(reduction_kind.has_value()); participant.reduction_kind = *reduction_kind; diff --git a/tensorflow/compiler/xla/service/gpu/nccl_all_reduce_thunk.h b/tensorflow/compiler/xla/service/gpu/nccl_all_reduce_thunk.h index 1df4f0805a6..cbd4fd3aa51 100644 --- a/tensorflow/compiler/xla/service/gpu/nccl_all_reduce_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/nccl_all_reduce_thunk.h @@ -73,6 +73,7 @@ class NcclAllReduceThunk : public Thunk { // build, and we don't want to expose *that* mess in the header.) struct AuxData; + const HloInstruction* hlo_instruction_; const int64 replica_count_; const std::vector buffers_; std::unique_ptr aux_data_; diff --git a/tensorflow/compiler/xla/service/gpu/outfeed_thunk.cc b/tensorflow/compiler/xla/service/gpu/outfeed_thunk.cc index 104366fd78c..83066a4addf 100644 --- a/tensorflow/compiler/xla/service/gpu/outfeed_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/outfeed_thunk.cc @@ -26,13 +26,14 @@ namespace gpu { OutfeedThunk::OutfeedThunk(ThunkInfo thunk_info, ShapeTree outfeed_slices) : Thunk(Kind::kOutfeed, thunk_info), + hlo_instruction_(thunk_info.hlo_instruction), outfeed_slices_(std::move(outfeed_slices)) {} Status OutfeedThunk::ExecuteOnStream(const ExecuteParams& params) { auto& stream = *params.stream; auto& buffer_allocations = *params.buffer_allocations; - VLOG(2) << "Outfeeding from GPU: " << hlo_instruction()->ToString(); + VLOG(2) << "Outfeeding from GPU: " << hlo_instruction_->ToString(); auto op_profiler = params.profiler->MakeScopedInstructionProfiler(profile_index()); @@ -41,13 +42,13 @@ Status OutfeedThunk::ExecuteOnStream(const ExecuteParams& params) { outfeed_manager->BlockingGetNextDestination(); // Nothing to be done for empty tuples. - if (ShapeUtil::IsEmptyTuple(hlo_instruction()->operand(0)->shape())) { + if (ShapeUtil::IsEmptyTuple(hlo_instruction_->operand(0)->shape())) { return Status::OK(); } - CHECK(ShapeUtil::Compatible(hlo_instruction()->operand(0)->shape(), + CHECK(ShapeUtil::Compatible(hlo_instruction_->operand(0)->shape(), outfeed_buffers->shape())) << "XLA program outfeed request of shape " - << hlo_instruction()->operand(0)->shape().ToString() + << hlo_instruction_->operand(0)->shape().ToString() << " did not match the runtime's outfeed buffer of shape " << outfeed_buffers->shape().ToString(); diff --git a/tensorflow/compiler/xla/service/gpu/outfeed_thunk.h b/tensorflow/compiler/xla/service/gpu/outfeed_thunk.h index e99174e3c6c..9174e605783 100644 --- a/tensorflow/compiler/xla/service/gpu/outfeed_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/outfeed_thunk.h @@ -41,6 +41,7 @@ class OutfeedThunk : public Thunk { Status ExecuteOnStream(const ExecuteParams& params) override; private: + const HloInstruction* hlo_instruction_; const ShapeTree outfeed_slices_; }; diff --git a/tensorflow/compiler/xla/service/gpu/sequential_thunk.cc b/tensorflow/compiler/xla/service/gpu/sequential_thunk.cc index 15cf2493549..903acf4f57d 100644 --- a/tensorflow/compiler/xla/service/gpu/sequential_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/sequential_thunk.cc @@ -28,12 +28,6 @@ SequentialThunk::SequentialThunk(ThunkInfo thunk_info, std::vector> thunks) : Thunk(Kind::kSequential, thunk_info), thunks_(std::move(thunks)) {} -void SequentialThunk::ComputeAnnotations() { - for (const auto& thunk : thunks_) { - thunk->ComputeAnnotations(); - } -} - Status SequentialThunk::Initialize(const GpuExecutable& executable, se::StreamExecutor* executor) { for (auto& thunk : thunks_) { diff --git a/tensorflow/compiler/xla/service/gpu/sequential_thunk.h b/tensorflow/compiler/xla/service/gpu/sequential_thunk.h index 127c5bcf734..455ee60fa5c 100644 --- a/tensorflow/compiler/xla/service/gpu/sequential_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/sequential_thunk.h @@ -39,7 +39,6 @@ class SequentialThunk : public Thunk { const std::vector>& thunks() const { return thunks_; } - void ComputeAnnotations() override; Status Initialize(const GpuExecutable& executable, se::StreamExecutor* executor) override; Status ExecuteOnStream(const ExecuteParams& params) override; diff --git a/tensorflow/compiler/xla/service/gpu/thunk.h b/tensorflow/compiler/xla/service/gpu/thunk.h index 0a5382291c9..7a9fedec629 100644 --- a/tensorflow/compiler/xla/service/gpu/thunk.h +++ b/tensorflow/compiler/xla/service/gpu/thunk.h @@ -69,10 +69,12 @@ class Thunk { }; struct ThunkInfo { + // Optional. It's only used by subclasses which haven't been migrated away + // from HloInstructions. Once the migration is done, Thunks should be fully + // serializable. const HloInstruction* hlo_instruction = nullptr; absl::optional profile_index; - // TODO(timshen): Remove hlo_instruction and add name(), - // profile_annotation() here. + std::string profile_annotation; }; // The hlo_instruction argument is meant to be the instruction this thunk was @@ -80,9 +82,8 @@ class Thunk { // to Thunk::hlo_instruction, so it can be null. explicit Thunk(Kind kind, ThunkInfo thunk_info) : kind_(kind), - hlo_instruction_(thunk_info.hlo_instruction), - name_(hlo_instruction_ ? hlo_instruction_->name() : ""), - profile_index_(thunk_info.profile_index) {} + profile_index_(thunk_info.profile_index), + profile_annotation_(thunk_info.profile_annotation) {} virtual ~Thunk() {} Thunk(const Thunk&) = delete; Thunk& operator=(const Thunk&) = delete; @@ -90,19 +91,6 @@ class Thunk { Kind kind() const { return kind_; } string profile_annotation() const { return profile_annotation_; } - absl::string_view name() const { return name_; } - - // Constructs and caches the profile annotation string for this thunk and - // any child thunks. - virtual void ComputeAnnotations() { - const HloInstruction* hlo = hlo_instruction(); - if (hlo) { - profile_annotation_ = - absl::StrFormat("Thunk:#hlo_op=%s,hlo_module=%s#", hlo->name(), - hlo->GetModule()->name()); - } - } - // Prepares the thunk for execution on the given StreamExecutor. // // This may be called multiple times. Its main purpose is to give us a chance @@ -134,14 +122,8 @@ class Thunk { virtual Status ExecuteOnStream(const ExecuteParams& params) = 0; protected: - const HloInstruction* hlo_instruction() const { return hlo_instruction_; } - absl::optional profile_index() const { return profile_index_; } - const HloModuleConfig& GetModuleConfig() const { - return hlo_instruction()->GetModule()->config(); - } - // Safely copies the given buffer to the GPU, deleting it on the host only // after the copy has completed. template @@ -156,13 +138,8 @@ class Thunk { private: Kind kind_; - - // Will be removed in the future, as Thunk is migrating away from the - // monolithic HloInstruction. - const HloInstruction* hlo_instruction_; - std::string name_; absl::optional profile_index_; - string profile_annotation_; + std::string profile_annotation_; }; // A sequence of thunks. diff --git a/tensorflow/compiler/xla/service/gpu/thunk_emitter.cc b/tensorflow/compiler/xla/service/gpu/thunk_emitter.cc index 089d70d658f..690d0c9de56 100644 --- a/tensorflow/compiler/xla/service/gpu/thunk_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/thunk_emitter.cc @@ -386,6 +386,8 @@ Thunk::ThunkInfo ThunkEmitter::EmissionContext::GetThunkInfo( CHECK(hlo); Thunk::ThunkInfo info; info.hlo_instruction = hlo; + info.profile_annotation = absl::StrFormat( + "Thunk:#hlo_op=%s,hlo_module=%s#", hlo->name(), hlo->GetModule()->name()); return info; } } // namespace gpu diff --git a/tensorflow/compiler/xla/service/gpu/thunk_schedule.h b/tensorflow/compiler/xla/service/gpu/thunk_schedule.h index 3801dc8aee8..ceae39583f2 100644 --- a/tensorflow/compiler/xla/service/gpu/thunk_schedule.h +++ b/tensorflow/compiler/xla/service/gpu/thunk_schedule.h @@ -80,8 +80,8 @@ class ThunkSchedule { // `thunk`. // // Precondition: `operand` is a non-trivial (i.e. excluding - // thunk.hlo_instruction() itself) transitive operand of - // thunk.hlo_instruction(). + // thunk.hlo_instruction_ itself) transitive operand of + // thunk.hlo_instruction_. void AddDependenciesOnTransitiveOperands( const Thunk& thunk, const HloInstruction& operand, const absl::flat_hash_map& hlo_to_thunk); diff --git a/tensorflow/compiler/xla/service/gpu/while_thunk.cc b/tensorflow/compiler/xla/service/gpu/while_thunk.cc index 47a24552b6c..792479df4ac 100644 --- a/tensorflow/compiler/xla/service/gpu/while_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/while_thunk.cc @@ -29,6 +29,7 @@ WhileThunk::WhileThunk( std::unique_ptr condition_thunk_sequence, std::unique_ptr body_thunk_sequence) : Thunk(Kind::kWhile, thunk_info), + hlo_instruction_(thunk_info.hlo_instruction), condition_result_buffer_index_(condition_result_buffer_index), // Pass nullptr as the HloInstruction* to the condition_thunk_sequence_ // and body_thunk_sequence_ constructors because these SequentialThunks @@ -39,12 +40,6 @@ WhileThunk::WhileThunk( body_thunk_sequence_(absl::make_unique( ThunkInfo(), std::move(*body_thunk_sequence))) {} -void WhileThunk::ComputeAnnotations() { - Thunk::ComputeAnnotations(); - condition_thunk_sequence_->ComputeAnnotations(); - body_thunk_sequence_->ComputeAnnotations(); -} - Status WhileThunk::Initialize(const GpuExecutable& executable, se::StreamExecutor* executor) { TF_RETURN_IF_ERROR( @@ -67,7 +62,7 @@ Status WhileThunk::ExecuteOnStream(const ExecuteParams& params) { profiler.StartHloComputation(); VLOG(3) << "Executing condition computation"; TF_RETURN_IF_ERROR(condition_thunk_sequence_->ExecuteOnStream(params)); - profiler.FinishHloComputation(hlo_instruction()->while_condition()); + profiler.FinishHloComputation(hlo_instruction_->while_condition()); // Copy the result of condition computation and break the loop if 'false'. bool condition_result; @@ -91,7 +86,7 @@ Status WhileThunk::ExecuteOnStream(const ExecuteParams& params) { // Invoke thunk sequence for while 'body' computation, and pass on // 'profiler' to measure the timing of the thunks in 'body_thunk_sequence_'. TF_RETURN_IF_ERROR(body_thunk_sequence_->ExecuteOnStream(params)); - profiler.FinishHloComputation(hlo_instruction()->while_body()); + profiler.FinishHloComputation(hlo_instruction_->while_body()); } return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/gpu/while_thunk.h b/tensorflow/compiler/xla/service/gpu/while_thunk.h index 72d9415b309..707bac15bb2 100644 --- a/tensorflow/compiler/xla/service/gpu/while_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/while_thunk.h @@ -46,12 +46,12 @@ class WhileThunk : public Thunk { WhileThunk(const WhileThunk&) = delete; WhileThunk& operator=(const WhileThunk&) = delete; - void ComputeAnnotations() override; Status Initialize(const GpuExecutable& executable, se::StreamExecutor* executor) override; Status ExecuteOnStream(const ExecuteParams& params) override; private: + const HloInstruction* hlo_instruction_; const BufferAllocation::Slice condition_result_buffer_index_; std::unique_ptr condition_thunk_sequence_; std::unique_ptr body_thunk_sequence_; From af350043828eac4a47b97b9443b86f6c2aa53d92 Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Mon, 13 Jul 2020 15:48:51 -0700 Subject: [PATCH 0319/2522] Updating std::variant to absl::variant PiperOrigin-RevId: 321049918 Change-Id: Ib871f228d17c3e5abe3c7ddba75a4ff928520e07 --- tensorflow/core/tpu/kernels/BUILD | 1 + tensorflow/core/tpu/kernels/tpu_compile_op_common.cc | 8 ++++---- tensorflow/core/tpu/kernels/tpu_compile_op_common.h | 3 ++- tensorflow/core/tpu/kernels/tpu_compile_op_support.cc | 8 ++++---- tensorflow/core/tpu/kernels/tpu_compile_op_support.h | 2 +- 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 80c5aa71069..77ac6031805 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -62,6 +62,7 @@ cc_library( "//tensorflow/stream_executor/tpu:tpu_platform_interface", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:span", + "@com_google_absl//absl/types:variant", ], alwayslink = 1, ) diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc index bd332226bfd..21cf6d68301 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc @@ -190,8 +190,8 @@ Status TpuCompileOpKernelCommon::BuildComputationArgumentDescriptions( arg.kind = XlaCompiler::Argument::kConstant; guaranteed_constants_size = guaranteed_constants.index() == 0 - ? std::get<0>(guaranteed_constants).size() - : std::get<1>(guaranteed_constants)->size(); + ? absl::get<0>(guaranteed_constants).size() + : absl::get<1>(guaranteed_constants)->size(); TF_RET_CHECK(constant_count < guaranteed_constants_size) << "More constant args in TPUCompileMetadataProto than constant " "tensors."; @@ -200,13 +200,13 @@ Status TpuCompileOpKernelCommon::BuildComputationArgumentDescriptions( // const>`. Tensor tensor; CHECK(tensor.FromProto( - *std::get<0>(guaranteed_constants)[constant_count++])) + *absl::get<0>(guaranteed_constants)[constant_count++])) << "Failed to deserialize invalid `TensorProto` into `Tensor`."; arg.constant_value = tensor; } else { // `guaranteed_constants` is of type `const OpInputList* const`. arg.constant_value = - (*std::get<1>(guaranteed_constants))[constant_count++]; + (*absl::get<1>(guaranteed_constants))[constant_count++]; } break; case tpu::TPUCompileMetadataProto::Arg::INVALID: diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_common.h b/tensorflow/core/tpu/kernels/tpu_compile_op_common.h index 79be33f7233..567d5973226 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_common.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_common.h @@ -19,6 +19,7 @@ limitations under the License. #include #include "absl/types/span.h" +#include "absl/types/variant.h" #include "tensorflow/compiler/jit/shape_inference.h" #include "tensorflow/compiler/tf2xla/xla_compiler.h" #include "tensorflow/compiler/xla/client/compile_only_client.h" @@ -69,7 +70,7 @@ class TpuCompileOpKernelCommon { // Lowers Mlir or TF Function computation into HLO IR and using XLA compiler // compiles into TPU programs ready for execution. virtual Status Compile( - const std::variant& computation, + const absl::variant& computation, const XLA_TpuMeshState* mesh_state, const std::vector& arg_shapes, TpuProgramGroupInterface* tpu_program_group) = 0; diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc index cf5d6005653..41e81c6bca7 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc @@ -446,11 +446,11 @@ StatusOr CreateTpuCompilationRequest( compilation_request.set_use_mlir(use_mlir); if (use_mlir) { VLOG(1) << "Serializing MlirModule"; - const MlirToHloArgs& mlir_computation = std::get<0>(computation); + const MlirToHloArgs& mlir_computation = absl::get<0>(computation); *compilation_request.mutable_mlir_module() = mlir_computation.mlir_module; } else { VLOG(1) << "Serializing FunctionDefinitionLibrary"; - const FunctionToHloArgs& function_computation = std::get<1>(computation); + const FunctionToHloArgs& function_computation = absl::get<1>(computation); *compilation_request.mutable_fdef_lib() = function_computation.flib_def->ToProto(); compilation_request.set_graph_def_version( @@ -461,14 +461,14 @@ StatusOr CreateTpuCompilationRequest( // to avoid passing guaranteed_constants over C_API. if (function_computation.guaranteed_constants.index() == 0) { absl::Span guaranteed_constants = - std::get<0>(function_computation.guaranteed_constants); + absl::get<0>(function_computation.guaranteed_constants); for (const TensorProto* constant : guaranteed_constants) { *compilation_request.add_guaranteed_constants() = *constant; } } else { CHECK_EQ(function_computation.guaranteed_constants.index(), 1); const OpInputList& guaranteed_constants = - *std::get<1>(function_computation.guaranteed_constants); + *absl::get<1>(function_computation.guaranteed_constants); for (const Tensor& constant : guaranteed_constants) { constant.AsProtoTensorContent( compilation_request.add_guaranteed_constants()); diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_support.h b/tensorflow/core/tpu/kernels/tpu_compile_op_support.h index ca936e4c213..d6d407cb28f 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_support.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_support.h @@ -46,7 +46,7 @@ namespace se = ::stream_executor; // List of parameters for lowering Mlir to HLO IR. struct MlirToHloArgs { - absl::string_view mlir_module; + const std::string& mlir_module; }; // Variant of guaranteed constant tensors types. From 10b90d92d5225b3735418b606f3072d0a1c3db55 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Mon, 13 Jul 2020 15:51:42 -0700 Subject: [PATCH 0320/2522] Internal CI testing changes PiperOrigin-RevId: 321050423 Change-Id: Ia0c603ef2268355430caa781063f92db36d8bcd3 --- .../macos/cpu_libtensorflow_release.sh | 23 +++++++++ .../per_release/macos/cpu_py35_nonpip.sh | 51 +++++++++++++++++++ .../per_release/macos/cpu_py35_pip.sh | 51 +++++++++++++++++++ .../per_release/macos/cpu_py36_nonpip.sh | 51 +++++++++++++++++++ .../per_release/macos/cpu_py36_pip.sh | 51 +++++++++++++++++++ .../per_release/macos/cpu_py37_nonpip.sh | 51 +++++++++++++++++++ .../per_release/macos/cpu_py37_pip.sh | 51 +++++++++++++++++++ .../per_release/macos/cpu_py38_nonpip.sh | 51 +++++++++++++++++++ .../per_release/macos/cpu_py38_pip.sh | 51 +++++++++++++++++++ 9 files changed, 431 insertions(+) create mode 100644 tensorflow/tools/ci_build/per_release/macos/cpu_libtensorflow_release.sh create mode 100644 tensorflow/tools/ci_build/per_release/macos/cpu_py35_nonpip.sh create mode 100644 tensorflow/tools/ci_build/per_release/macos/cpu_py35_pip.sh create mode 100644 tensorflow/tools/ci_build/per_release/macos/cpu_py36_nonpip.sh create mode 100644 tensorflow/tools/ci_build/per_release/macos/cpu_py36_pip.sh create mode 100644 tensorflow/tools/ci_build/per_release/macos/cpu_py37_nonpip.sh create mode 100644 tensorflow/tools/ci_build/per_release/macos/cpu_py37_pip.sh create mode 100755 tensorflow/tools/ci_build/per_release/macos/cpu_py38_nonpip.sh create mode 100644 tensorflow/tools/ci_build/per_release/macos/cpu_py38_pip.sh diff --git a/tensorflow/tools/ci_build/per_release/macos/cpu_libtensorflow_release.sh b/tensorflow/tools/ci_build/per_release/macos/cpu_libtensorflow_release.sh new file mode 100644 index 00000000000..ccc80e1bafd --- /dev/null +++ b/tensorflow/tools/ci_build/per_release/macos/cpu_libtensorflow_release.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +set -e +set -x + +# Install latest bazel +source tensorflow/tools/ci_build/release/common.sh +install_bazelisk +tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh diff --git a/tensorflow/tools/ci_build/per_release/macos/cpu_py35_nonpip.sh b/tensorflow/tools/ci_build/per_release/macos/cpu_py35_nonpip.sh new file mode 100644 index 00000000000..06fabd7b1c7 --- /dev/null +++ b/tensorflow/tools/ci_build/per_release/macos/cpu_py35_nonpip.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh +install_bazelisk + +# Pick a more recent version of xcode +export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer +sudo xcode-select -s "${DEVELOPER_DIR}" +python3.5 -m virtualenv tf_build_env --system-site-packages +source tf_build_env/bin/activate + +# Install macos pip dependencies +install_macos_pip_deps sudo pip3.5 + +# Run configure. +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' +export TF2_BEHAVIOR=1 +export PYTHON_BIN_PATH=$(which python3.5) +yes "" | "$PYTHON_BIN_PATH" configure.py + +tag_filters="-no_oss,-oss_serial,-nomac,-no_mac,-no_oss_py35,-v1only,-gpu,-tpu,-benchmark-test" + +# Get the default test targets for bazel. +source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh + +# Run tests +set +e +bazel test --test_output=errors --config=opt \ + --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \ + --build_tag_filters="${tag_filters}" \ + --test_tag_filters="${tag_filters}" -- \ + ${DEFAULT_BAZEL_TARGETS} \ + -//tensorflow/lite/... +test_xml_summary_exit diff --git a/tensorflow/tools/ci_build/per_release/macos/cpu_py35_pip.sh b/tensorflow/tools/ci_build/per_release/macos/cpu_py35_pip.sh new file mode 100644 index 00000000000..3f31033b2ac --- /dev/null +++ b/tensorflow/tools/ci_build/per_release/macos/cpu_py35_pip.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh +install_bazelisk + +# Pick a more recent version of xcode +export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer +sudo xcode-select -s "${DEVELOPER_DIR}" + +# Install macos pip dependencies +install_macos_pip_deps sudo pip3.5 + +# Export required variables for running pip_new.sh +export OS_TYPE="MACOS" +export CONTAINER_TYPE="CPU" +export TF_PYTHON_VERSION='python3.5' +export TF_BUILD_BOTH_CPU_PACKAGES=1 + +# Run configure. +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' +export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) +yes "" | "$PYTHON_BIN_PATH" configure.py + +# Export optional variables for running pip.sh +export TF_BUILD_FLAGS="--config=opt --config=v2" +export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" +export TF_TEST_TARGETS="//tensorflow/python/..." +export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" +export TF_TEST_FILTER_TAGS='-nomac,-no_mac,-no_oss,-oss_serial,-no_oss_py35,-gpu,-tpu,-benchmark-test' +#export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. +export TF_PROJECT_NAME="tensorflow" +export TF_PIP_TEST_ROOT="pip_test" + +./tensorflow/tools/ci_build/builds/pip_new.sh diff --git a/tensorflow/tools/ci_build/per_release/macos/cpu_py36_nonpip.sh b/tensorflow/tools/ci_build/per_release/macos/cpu_py36_nonpip.sh new file mode 100644 index 00000000000..51cc3da62d6 --- /dev/null +++ b/tensorflow/tools/ci_build/per_release/macos/cpu_py36_nonpip.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh +install_bazelisk + +# Pick a more recent version of xcode +export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer +sudo xcode-select -s "${DEVELOPER_DIR}" +python3.6 -m virtualenv tf_build_env --system-site-packages +source tf_build_env/bin/activate + +# Install macos pip dependencies +install_macos_pip_deps sudo pip3.6 + +# Run configure. +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' +export TF2_BEHAVIOR=1 +export PYTHON_BIN_PATH=$(which python3.6) +yes "" | "$PYTHON_BIN_PATH" configure.py + +tag_filters="-no_oss,-oss_serial,-nomac,-no_mac,-no_oss_py36,-v1only,-gpu,-tpu,-benchmark-test" + +# Get the default test targets for bazel. +source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh + +# Run tests +set +e +bazel test --test_output=errors --config=opt \ + --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \ + --build_tag_filters="${tag_filters}" \ + --test_tag_filters="${tag_filters}" -- \ + ${DEFAULT_BAZEL_TARGETS} \ + -//tensorflow/lite/... +test_xml_summary_exit diff --git a/tensorflow/tools/ci_build/per_release/macos/cpu_py36_pip.sh b/tensorflow/tools/ci_build/per_release/macos/cpu_py36_pip.sh new file mode 100644 index 00000000000..26ee4ea8edb --- /dev/null +++ b/tensorflow/tools/ci_build/per_release/macos/cpu_py36_pip.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh +install_bazelisk + +# Pick a more recent version of xcode +export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer +sudo xcode-select -s "${DEVELOPER_DIR}" + +# Install macos pip dependencies +install_macos_pip_deps sudo pip3.6 + +# Export required variables for running pip_new.sh +export OS_TYPE="MACOS" +export CONTAINER_TYPE="CPU" +export TF_PYTHON_VERSION='python3.6' +export TF_BUILD_BOTH_CPU_PACKAGES=1 + +# Run configure. +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' +export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) +yes "" | "$PYTHON_BIN_PATH" configure.py + +# Export optional variables for running pip.sh +export TF_BUILD_FLAGS="--config=opt --config=v2" +export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" +export TF_TEST_TARGETS="//tensorflow/python/..." +export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" +export TF_TEST_FILTER_TAGS='-nomac,-no_mac,-no_oss,-oss_serial,-no_oss_py35,-v1only,-gpu,-tpu,-benchmark-test' +#export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. +export TF_PROJECT_NAME="tensorflow" +export TF_PIP_TEST_ROOT="pip_test" + +./tensorflow/tools/ci_build/builds/pip_new.sh diff --git a/tensorflow/tools/ci_build/per_release/macos/cpu_py37_nonpip.sh b/tensorflow/tools/ci_build/per_release/macos/cpu_py37_nonpip.sh new file mode 100644 index 00000000000..e0f2968b45a --- /dev/null +++ b/tensorflow/tools/ci_build/per_release/macos/cpu_py37_nonpip.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh +install_bazelisk + +# Pick a more recent version of xcode +export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer +sudo xcode-select -s "${DEVELOPER_DIR}" +python -m virtualenv tf_build_env --system-site-packages +source tf_build_env/bin/activate + +# Install macos pip dependencies +install_macos_pip_deps sudo pip3.7 + +# Run configure. +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' +export TF2_BEHAVIOR=1 +export PYTHON_BIN_PATH=$(which python3.7) +yes "" | "$PYTHON_BIN_PATH" configure.py + +tag_filters="-no_oss,-oss_serial,-nomac,-no_mac$(maybe_skip_v1),-gpu,-tpu,-benchmark-test" + +# Get the default test targets for bazel. +source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh + +# Run tests +set +e +bazel test --test_output=errors --config=opt \ + --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \ + --build_tag_filters="${tag_filters}" \ + --test_tag_filters="${tag_filters}" -- \ + ${DEFAULT_BAZEL_TARGETS} \ + -//tensorflow/lite/... +test_xml_summary_exit diff --git a/tensorflow/tools/ci_build/per_release/macos/cpu_py37_pip.sh b/tensorflow/tools/ci_build/per_release/macos/cpu_py37_pip.sh new file mode 100644 index 00000000000..ed577db961a --- /dev/null +++ b/tensorflow/tools/ci_build/per_release/macos/cpu_py37_pip.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh +install_bazelisk + +# Pick a more recent version of xcode +export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer +sudo xcode-select -s "${DEVELOPER_DIR}" + +# Install macos pip dependencies +install_macos_pip_deps sudo pip3.7 + +# Export required variables for running pip_new.sh +export OS_TYPE="MACOS" +export CONTAINER_TYPE="CPU" +export TF_PYTHON_VERSION='python3.7' +export TF_BUILD_BOTH_CPU_PACKAGES=1 + +# Run configure. +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' +export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) +yes "" | "$PYTHON_BIN_PATH" configure.py + +# Export optional variables for running pip.sh +export TF_BUILD_FLAGS="--config=opt --config=v2" +export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" +export TF_TEST_TARGETS="//tensorflow/python/..." +export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" +export TF_TEST_FILTER_TAGS='-nomac,-no_mac,-no_oss,-oss_serial,-no_oss_py37,-v1only,-gpu,-tpu,-benchmark-test' +#export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. +export TF_PROJECT_NAME="tensorflow" +export TF_PIP_TEST_ROOT="pip_test" + +./tensorflow/tools/ci_build/builds/pip_new.sh diff --git a/tensorflow/tools/ci_build/per_release/macos/cpu_py38_nonpip.sh b/tensorflow/tools/ci_build/per_release/macos/cpu_py38_nonpip.sh new file mode 100755 index 00000000000..22475f35491 --- /dev/null +++ b/tensorflow/tools/ci_build/per_release/macos/cpu_py38_nonpip.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh +install_bazelisk + +# Pick a more recent version of xcode +export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer +sudo xcode-select -s "${DEVELOPER_DIR}" +python -m virtualenv tf_build_env --system-site-packages +source tf_build_env/bin/activate + +# Install macos pip dependencies +install_macos_pip_deps sudo pip3.8 + +# Run configure. +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' +export TF2_BEHAVIOR=1 +export PYTHON_BIN_PATH=$(which python3.8) +yes "" | "$PYTHON_BIN_PATH" configure.py + +tag_filters="-no_oss,-oss_serial,-nomac,-no_mac$(maybe_skip_v1),-gpu,-tpu,-benchmark-test" + +# Get the default test targets for bazel. +source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh + +# Run tests +set +e +bazel test --test_output=errors --config=opt \ + --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \ + --build_tag_filters="${tag_filters}" \ + --test_tag_filters="${tag_filters}" -- \ + ${DEFAULT_BAZEL_TARGETS} \ + -//tensorflow/lite/... +test_xml_summary_exit diff --git a/tensorflow/tools/ci_build/per_release/macos/cpu_py38_pip.sh b/tensorflow/tools/ci_build/per_release/macos/cpu_py38_pip.sh new file mode 100644 index 00000000000..f8eda5a7520 --- /dev/null +++ b/tensorflow/tools/ci_build/per_release/macos/cpu_py38_pip.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh +install_bazelisk + +# Pick a more recent version of xcode +export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer +sudo xcode-select -s "${DEVELOPER_DIR}" + +# Install macos pip dependencies +install_macos_pip_deps sudo pip3.8 + +# Export required variables for running pip_new.sh +export OS_TYPE="MACOS" +export CONTAINER_TYPE="CPU" +export TF_PYTHON_VERSION='python3.8' +export TF_BUILD_BOTH_CPU_PACKAGES=1 + +# Run configure. +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' +export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) +yes "" | "$PYTHON_BIN_PATH" configure.py + +# Export optional variables for running pip.sh +export TF_BUILD_FLAGS="--config=opt --config=v2" +export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" +export TF_TEST_TARGETS="//tensorflow/python/..." +export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" +export TF_TEST_FILTER_TAGS='-nomac,-no_mac,-no_oss,-oss_serial,-no_oss_py38,-v1only,-gpu,-tpu,-benchmark-test' +#export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. +export TF_PROJECT_NAME="tensorflow" +export TF_PIP_TEST_ROOT="pip_test" + +./tensorflow/tools/ci_build/builds/pip_new.sh From ef1b3e0233e243d446740198e20ad83bf8e4af9d Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Mon, 13 Jul 2020 16:13:33 -0700 Subject: [PATCH 0321/2522] Enable temporary allocation functionality to SimpleMemoryAllocator. This new API will allow TFLM to allocate a chain of temporary allocations from the current "head" section of the arena. This allocation happens on the "head" since it consists of non-persistent buffers and is not updated until a new model is added to the arena. This API also allows for all temp allocations to be "reset", which is needed to keep the temp allocations from growing directly into the tail portion of the arena. Additionally, the recording version of SimpleMemoryAllocator has been updated to track the "max" of requested/used/total allocations. This will be useful for auditing how expensive a chain of temp allocations is for a model. This number will help us and developers accurately pick an arena size. PiperOrigin-RevId: 321054446 Change-Id: Ib62d01deaea594638929137720019fa15961913b --- .../lite/micro/simple_memory_allocator.cc | 33 +++++++--- .../lite/micro/simple_memory_allocator.h | 21 ++++++- .../micro/simple_memory_allocator_test.cc | 61 +++++++++++++++++++ 3 files changed, 106 insertions(+), 9 deletions(-) diff --git a/tensorflow/lite/micro/simple_memory_allocator.cc b/tensorflow/lite/micro/simple_memory_allocator.cc index 7ca662f04d8..3abec015fe3 100644 --- a/tensorflow/lite/micro/simple_memory_allocator.cc +++ b/tensorflow/lite/micro/simple_memory_allocator.cc @@ -32,7 +32,8 @@ SimpleMemoryAllocator::SimpleMemoryAllocator(ErrorReporter* error_reporter, buffer_head_(buffer_head), buffer_tail_(buffer_tail), head_(buffer_head), - tail_(buffer_tail) {} + tail_(buffer_tail), + temp_(buffer_head_) {} SimpleMemoryAllocator::SimpleMemoryAllocator(ErrorReporter* error_reporter, uint8_t* buffer, @@ -60,17 +61,17 @@ SimpleMemoryAllocator::~SimpleMemoryAllocator() {} uint8_t* SimpleMemoryAllocator::AllocateFromHead(size_t size, size_t alignment) { - uint8_t* const aligned_result = AlignPointerUp(head_, alignment); - const size_t available_memory = tail_ - aligned_result; - if (available_memory < size) { + if (head_ != temp_) { TF_LITE_REPORT_ERROR( error_reporter_, - "Failed to allocate memory. Requested: %u, available %u, missing: %u", - size, available_memory, size - available_memory); + "Called AllocateFromHead() after AllocateTemp() without resetting temp " + "allocations with ResetTempAllocations()"); return nullptr; } - head_ = aligned_result + size; - return aligned_result; + + uint8_t* ret = AllocateTemp(size, alignment); + head_ = temp_; + return ret; } uint8_t* SimpleMemoryAllocator::AllocateFromTail(size_t size, @@ -88,6 +89,22 @@ uint8_t* SimpleMemoryAllocator::AllocateFromTail(size_t size, return aligned_result; } +uint8_t* SimpleMemoryAllocator::AllocateTemp(size_t size, size_t alignment) { + uint8_t* const aligned_result = AlignPointerUp(temp_, alignment); + const size_t available_memory = tail_ - aligned_result; + if (available_memory < size) { + TF_LITE_REPORT_ERROR( + error_reporter_, + "Failed to allocate memory. Requested: %u, available %u, missing: %u", + size, available_memory, size - available_memory); + return nullptr; + } + temp_ = aligned_result + size; + return aligned_result; +} + +void SimpleMemoryAllocator::ResetTempAllocations() { temp_ = head_; } + uint8_t* SimpleMemoryAllocator::GetHead() const { return head_; } uint8_t* SimpleMemoryAllocator::GetTail() const { return tail_; } diff --git a/tensorflow/lite/micro/simple_memory_allocator.h b/tensorflow/lite/micro/simple_memory_allocator.h index 426ced032f6..fd2363955f2 100644 --- a/tensorflow/lite/micro/simple_memory_allocator.h +++ b/tensorflow/lite/micro/simple_memory_allocator.h @@ -43,12 +43,30 @@ class SimpleMemoryAllocator { size_t buffer_size); // Allocates memory starting at the head of the arena (lowest address and - // moving upwards). + // moving upwards). Calls to this method will also invalidate all temporary + // allocation values. This call will fail if a chain allocation calls through + // AllocateTemp() have not been cleaned up with a call to + // ResetTempAllocations(). virtual uint8_t* AllocateFromHead(size_t size, size_t alignment); + // Allocates memory starting at the tail of the arena (highest address and // moving downwards). virtual uint8_t* AllocateFromTail(size_t size, size_t alignment); + // Allocates a temporary buffer from the head of the arena (lowest address and + // moving upwards) but does not update the actual head allocation size or + // position. The returned buffer is guaranteed until either + // ResetTempAllocations() is called or another call to AllocateFromHead(). + // Repeat calls to this function will create a chain of temp allocations. All + // calls to AllocateTemp() must end with a call to ResetTempAllocations(). If + // AllocateFromHead() is called before a call to ResetTempAllocations(), it + // will fail with an error message. + virtual uint8_t* AllocateTemp(size_t size, size_t alignment); + + // Resets a chain of temporary allocations back to the current head of the + // arena (lowest address). + virtual void ResetTempAllocations(); + uint8_t* GetHead() const; uint8_t* GetTail() const; @@ -66,6 +84,7 @@ class SimpleMemoryAllocator { uint8_t* buffer_tail_; uint8_t* head_; uint8_t* tail_; + uint8_t* temp_; TF_LITE_REMOVE_VIRTUAL_DELETE }; diff --git a/tensorflow/lite/micro/simple_memory_allocator_test.cc b/tensorflow/lite/micro/simple_memory_allocator_test.cc index f0ebf343b59..d9ee979d5b0 100644 --- a/tensorflow/lite/micro/simple_memory_allocator_test.cc +++ b/tensorflow/lite/micro/simple_memory_allocator_test.cc @@ -59,4 +59,65 @@ TF_LITE_MICRO_TEST(TestMultipleTooLarge) { TF_LITE_MICRO_EXPECT_EQ(nullptr, result); } +TF_LITE_MICRO_TEST(TestTempAllocations) { + constexpr size_t arena_size = 1024; + uint8_t arena[arena_size]; + tflite::SimpleMemoryAllocator allocator(micro_test::reporter, arena, + arena_size); + + uint8_t* temp1 = allocator.AllocateTemp(100, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, temp1); + + uint8_t* temp2 = allocator.AllocateTemp(100, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, temp2); + + // Expect that the next micro allocation is 100 bytes away from each other. + TF_LITE_MICRO_EXPECT_EQ(temp2 - temp1, 100); +} + +TF_LITE_MICRO_TEST(TestResetTempAllocations) { + constexpr size_t arena_size = 1024; + uint8_t arena[arena_size]; + tflite::SimpleMemoryAllocator allocator(micro_test::reporter, arena, + arena_size); + + uint8_t* temp1 = allocator.AllocateTemp(100, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, temp1); + + allocator.ResetTempAllocations(); + + uint8_t* temp2 = allocator.AllocateTemp(100, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, temp2); + + // Reset temp allocations should have the same start address: + TF_LITE_MICRO_EXPECT_EQ(temp2 - temp1, 0); +} + +TF_LITE_MICRO_TEST(TestAllocateHeadWithoutResettingTemp) { + constexpr size_t arena_size = 1024; + uint8_t arena[arena_size]; + tflite::SimpleMemoryAllocator allocator(micro_test::reporter, arena, + arena_size); + + uint8_t* temp = allocator.AllocateTemp(100, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, temp); + + // Allocation should be null since temp allocation was not followed by a call + // to ResetTempAllocations(). + uint8_t* head = allocator.AllocateFromHead(100, 1); + TF_LITE_MICRO_EXPECT_EQ(nullptr, head); + + allocator.ResetTempAllocations(); + + head = allocator.AllocateFromHead(100, 1); + TF_LITE_MICRO_EXPECT_NE(nullptr, head); + + // The most recent head allocation should be in the same location as the + // original temp allocation pointer. + TF_LITE_MICRO_EXPECT_EQ(temp, head); +} + +// TODO(b/161171251): Add more coverage to this test - specifically around -1 +// alignments and other odd allocation requests. + TF_LITE_MICRO_TESTS_END From ed76a05890248ba9789655bd9896fa2e20069e7d Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Mon, 13 Jul 2020 16:31:04 -0700 Subject: [PATCH 0322/2522] Migrate the MacOS nightly release scripts to use the new bazelrc configs. PiperOrigin-RevId: 321057608 Change-Id: Ibc337541afcd08077f0ec88cbbd7602ae13d706a --- .bazelrc | 5 +++++ .../ci_build/release/macos/cpu_py2_full/nightly_release.sh | 4 +--- .../ci_build/release/macos/cpu_py35_full/nightly_release.sh | 4 +--- .../ci_build/release/macos/cpu_py36_full/nightly_release.sh | 4 +--- .../ci_build/release/macos/cpu_py37_full/nightly_release.sh | 4 +--- .../ci_build/release/macos/cpu_py38_full/nightly_release.sh | 4 +--- 6 files changed, 10 insertions(+), 15 deletions(-) diff --git a/.bazelrc b/.bazelrc index f23fdb7b7e8..801c5f0ec55 100644 --- a/.bazelrc +++ b/.bazelrc @@ -83,6 +83,7 @@ # release_common: Common options for all builds on all operating systems. # release_gpu_common: Common options for GPU builds on Linux and Windows. # release_cpu_linux: Toolchain and CUDA options for Linux CPU builds. +# release_cpu_macos: Toolchain and CUDA options for MacOS CPU builds. # release_gpu_linux: Toolchain and CUDA options for Linux PU builds. # Allow builds using libc++ as a linker library @@ -551,6 +552,10 @@ build:release_cpu_linux --action_env=CC_OPT_FLAGS="-mavx" # Did not add this to the defaults in case this changes. build:release_cpu_linux --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain +build:release_cpu_macos --config=release_common +build:release_cpu_macos --action_env=TF_NEED_CUDA=0 +build:release_cpu_macos --action_env=CC_OPT_FLAGS="-mavx" + build:release_gpu_common --config=release_common build:release_gpu_common --action_env=TF_NEED_CUDA=1 build:release_gpu_common --action_env=TF_CUDA_VERSION=10.1 diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nightly_release.sh b/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nightly_release.sh index 69c57179379..6dc3e3849ad 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nightly_release.sh @@ -30,13 +30,11 @@ sudo pip install twine ./tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python2) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_macos tensorflow/tools/pip_package:build_pip_package mkdir pip_pkg ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nightly_release.sh b/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nightly_release.sh index 1f018136ef9..45a8f435988 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nightly_release.sh @@ -35,13 +35,11 @@ sudo pip install twine ./tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.5) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_macos tensorflow/tools/pip_package:build_pip_package mkdir pip_pkg ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nightly_release.sh b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nightly_release.sh index 3702ec97413..d4cc8d7afac 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nightly_release.sh @@ -33,13 +33,11 @@ sudo pip install twine ./tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.6) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_macos tensorflow/tools/pip_package:build_pip_package mkdir pip_pkg ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nightly_release.sh b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nightly_release.sh index eee97f6e2d2..cd0f8a58ae6 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nightly_release.sh @@ -33,13 +33,11 @@ sudo pip install twine ./tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.7) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_macos tensorflow/tools/pip_package:build_pip_package mkdir pip_pkg ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nightly_release.sh b/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nightly_release.sh index 70773c1b597..11085b08a38 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nightly_release.sh @@ -33,13 +33,11 @@ sudo pip install twine ./tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.8) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_macos tensorflow/tools/pip_package:build_pip_package mkdir pip_pkg ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag From e63ebd92ef3a269e292b2c573c9c0f3bead06a9c Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Mon, 13 Jul 2020 16:36:27 -0700 Subject: [PATCH 0323/2522] Implements TpuCompile_CompileAndBuild C_API, an API to compile computations using XLA compiler and produce TPU programs ready for execution. PiperOrigin-RevId: 321058636 Change-Id: I76f3cab6da413ecbaa2551cd0519695fbced6428 --- tensorflow/core/tpu/kernels/BUILD | 3 + .../core/tpu/kernels/tpu_compile_c_api.h | 26 ++-- tensorflow/core/tpu/kernels/tpu_compile_op.h | 15 ++- .../core/tpu/kernels/tpu_program_c_api.h | 10 ++ .../core/tpu/kernels/tpu_program_group.cc | 117 +++++++++++++----- .../core/tpu/kernels/tpu_program_group.h | 17 ++- tensorflow/core/tpu/tpu_library_init_fns.inc | 2 +- 7 files changed, 146 insertions(+), 44 deletions(-) diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 77ac6031805..dfa37348c8d 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -59,6 +59,7 @@ cc_library( "//tensorflow/core/tpu:tpu_api", "//tensorflow/core/tpu:tpu_configuration", "//tensorflow/core/tpu:tpu_defs", + "//tensorflow/stream_executor/tpu:status_helper", "//tensorflow/stream_executor/tpu:tpu_platform_interface", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:span", @@ -284,6 +285,8 @@ cc_library( ":tpu_compile_op_support", ":tpu_compile_proto_cc", ":tpu_executable_info_proto_cc", + ":tpu_mesh_state_c_api_hdrs", + ":tpu_mesh_state_interface", ":tpu_program_c_api_hdrs", ":tpu_program_group_interface", "//tensorflow/compiler/tf2xla:xla_compiler", diff --git a/tensorflow/core/tpu/kernels/tpu_compile_c_api.h b/tensorflow/core/tpu/kernels/tpu_compile_c_api.h index 999300959bf..44607631e15 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_c_api.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_c_api.h @@ -23,21 +23,31 @@ limitations under the License. extern "C" { -// Executes the computations using XLA TPU compiler and returns TPU programs -// ready for execution. +// Compiles HLO IR and returns `count` number of TPU programs ready for +// execution. +// The API allocates the `XLA_TpuProgram*[]` array `tpu_programs` and creates +// `XLA_TpuProgram` object(s) using the `TpuProgram_New` API. The caller is +// responsible to deallocate both the `XLA_TpuProgram*[]` array and the +// `XLA_TpuProgram` object(s) using `TpuProgram_FreeArray` and `TpuProgram_Free` +// API respectively. TFTPU_CAPI_EXPORT void TpuCompile_CompileAheadOfTime( TpuSerializedProto aot_compilation_request, XLA_TpuProgram** tpu_programs[], size_t* count, SE_Status* status); -// Builds `DeviceAssignment` from `TpuCompileMetadata` serialized proto. -TFTPU_CAPI_EXPORT void TpuCompile_BuildXLADeviceAssignment( - TpuSerializedProto serialized_tpu_compile_metadata, - const XLA_TpuMeshState* mesh_state, - TpuSerializedProto* serialized_device_assignment, SE_Status* status); +// Compiles Mlir or TF function computation by lowering into HLO IR and returns +// `count` number of TPU programs ready for execution. +// The API allocates the `XLA_TpuProgram*[]` array `tpu_programs` and creates +// `XLA_TpuProgram` object(s) using the `TpuProgram_New` API. The caller is +// responsible to deallocate both the `XLA_TpuProgram*[]` array and the +// `XLA_TpuProgram` object(s) using `TpuProgram_FreeArray` and `TpuProgram_Free` +// API respectively. +TFTPU_CAPI_EXPORT void TpuCompile_CompileAndBuild( + TpuSerializedProto compilation_request, const XLA_TpuMeshState* mesh_state, + XLA_TpuProgram** tpu_programs[], size_t* count, SE_Status* status); struct TfTpu_CompileApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuCompile_CompileAheadOfTime); - TFTPU_ADD_FN_IN_STRUCT(TpuCompile_BuildXLADeviceAssignment); + TFTPU_ADD_FN_IN_STRUCT(TpuCompile_CompileAndBuild); }; } // extern "C" diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op.h b/tensorflow/core/tpu/kernels/tpu_compile_op.h index 8a1963dde5c..0bbf5695400 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_op.h @@ -20,10 +20,15 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" namespace tensorflow { - namespace tpu { // Forward declaration. +#if defined(LIBTFTPU) class TpuCompileOpKernelImpl; +#else +namespace internal { +class TpuCompileOpKernelImpl; +} +#endif } // namespace tpu // The TPUCompile operator compiles a Tensorflow function into a @@ -37,7 +42,11 @@ class TpuCompileOp : public OpKernel { void Compute(OpKernelContext* ctx) override; private: +#if defined(LIBTFTPU) std::unique_ptr impl_; +#else + std::unique_ptr impl_; +#endif DISALLOW_COPY_AND_ASSIGN(TpuCompileOp); }; @@ -53,7 +62,11 @@ class TpuCompileMlirOp : public OpKernel { void Compute(OpKernelContext* ctx) override; private: +#if defined(LIBTFTPU) std::unique_ptr impl_; +#else + std::unique_ptr impl_; +#endif DISALLOW_COPY_AND_ASSIGN(TpuCompileMlirOp); }; diff --git a/tensorflow/core/tpu/kernels/tpu_program_c_api.h b/tensorflow/core/tpu/kernels/tpu_program_c_api.h index 254527e7a2a..7e5ec3aeaf9 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_c_api.h +++ b/tensorflow/core/tpu/kernels/tpu_program_c_api.h @@ -28,6 +28,12 @@ XLA_TpuProgram* TpuProgram_New(); // Destroys the `tpu_program`. void TpuProgram_Free(XLA_TpuProgram* tpu_program); +// Creates an array of `XLA_TpuProgram*`. +XLA_TpuProgram** TpuProgram_NewArray(size_t count); + +// Destroys an array of `XLA_TpuProgram*`. +void TpuProgram_FreeArray(XLA_TpuProgram* tpu_program[]); + // Unloads and destroys the `tpu_program`. Once the TPU program is unloaded and // destroyed, it is in an unusable state. void TpuProgram_UnloadAndDestroy(XLA_TpuProgram* tpu_program, @@ -51,6 +57,10 @@ void TpuProgram_GetHostTransferInfo(const XLA_TpuProgram* tpu_program, void TpuProgram_GetHloMetadata(const XLA_TpuProgram* tpu_program, TpuSerializedProto* hlo_metadata); +// Gets may modify variables boolean value. +void TpuProgram_GetMayModifyVariables(const XLA_TpuProgram* tpu_program, + bool* may_modify_variables); + } // extern "C" #endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_PROGRAM_C_API_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_program_group.cc b/tensorflow/core/tpu/kernels/tpu_program_group.cc index 653b999a67d..c96eb7974df 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_group.cc +++ b/tensorflow/core/tpu/kernels/tpu_program_group.cc @@ -32,6 +32,7 @@ namespace { namespace se_tpu = ::stream_executor::tpu; +using stream_executor::port::Status; using stream_executor::port::StatusOr; using xla::Shape; @@ -70,9 +71,8 @@ StatusOr> CompileAheadOfTime( for (size_t i = 0; i < count; ++i) { tpu_programs[i] = xla_tpu_programs[i]; } - delete[] xla_tpu_programs; + TpuProgram_FreeArray(xla_tpu_programs); return tpu_programs; - return Status::OK(); } StatusOr> CompileAheadOfTime( @@ -98,11 +98,56 @@ StatusOr> CompileAheadOfTime( per_core_variable_indices, device_assignment); } +Status CreateTpuProgramGroup( + absl::Span xla_tpu_programs, + TpuProgramGroupInterface* tpu_program_group_interface) { + CHECK_GT(xla_tpu_programs.size(), 0); + TpuProgramGroup* tpu_program_group = + tensorflow::down_cast(tpu_program_group_interface); + CHECK_NE(tpu_program_group, nullptr); + tpu_program_group->set_tpu_programs(xla_tpu_programs); + + // TODO(jiawenhao): Handle the case of xla_tpu_programs.size() > 1. + bool may_modify_variables; + TpuProgram_GetMayModifyVariables(xla_tpu_programs[0], &may_modify_variables); + tpu_program_group->set_may_modify_variables( + std::vector(1, may_modify_variables)); + + TpuSerializedProto serialized_executable_info; + TpuProgram_GetExecutableInfo(xla_tpu_programs[0], + &serialized_executable_info); + TPUExecutableInfoProto executable_info = + se_tpu::DeserializeProto( + serialized_executable_info); + tpu_program_group->set_executable_info(executable_info); + StreamExecutor_Tpu_FreeSerializedProto(&serialized_executable_info); + + TPUHostTransferInfoProto host_transfer_info; + TpuSerializedProto serialized_host_transfer_info; + TpuProgram_GetHostTransferInfo(xla_tpu_programs[0], + &serialized_host_transfer_info); + if (serialized_host_transfer_info.size > 0) { + host_transfer_info = se_tpu::DeserializeProto( + serialized_host_transfer_info); + StreamExecutor_Tpu_FreeSerializedProto(&serialized_host_transfer_info); + } + tpu_program_group->set_host_transfer_info(host_transfer_info); + + TpuSerializedProto serialized_hlo_metadata; + TpuProgram_GetHloMetadata(xla_tpu_programs[0], &serialized_hlo_metadata); + xla::HloProto hlo_metadata = + se_tpu::DeserializeProto(serialized_hlo_metadata); + tpu_program_group->set_hlo_metadata(hlo_metadata); + StreamExecutor_Tpu_FreeSerializedProto(&serialized_hlo_metadata); + + return Status::OK(); +} + } // namespace int64_t TpuProgramGroup::program_size() const { int64_t total_size = 0; - for (XLA_TpuProgram* tpu_program : tpu_programs_) { + for (const XLA_TpuProgram* tpu_program : tpu_programs_) { total_size += TpuProgram_GetProgramSize(tpu_program); } return total_size; @@ -170,36 +215,9 @@ void TpuProgramGroup::UnloadAndDestroyPrograms() { // SPMD could return 1 result for all partitions. TF_RET_CHECK(xla_tpu_programs.size() == 1 || xla_tpu_programs.size() == metadata.num_cores_per_replica()); - tpu_program_group->set_tpu_programs(xla_tpu_programs); - - // TODO(jiawenhao): Handle the case of xla_tpu_programs.size() > 1. - TpuSerializedProto serialized_executable_info; - TpuProgram_GetExecutableInfo(xla_tpu_programs[0], - &serialized_executable_info); - TPUExecutableInfoProto executable_info = - se_tpu::DeserializeProto( - serialized_executable_info); - tpu_program_group->set_executable_info(executable_info); - StreamExecutor_Tpu_FreeSerializedProto(&serialized_executable_info); - - TPUHostTransferInfoProto host_transfer_info; - TpuSerializedProto serialized_host_transfer_info; - TpuProgram_GetHostTransferInfo(xla_tpu_programs[0], - &serialized_host_transfer_info); - if (serialized_host_transfer_info.size > 0) { - host_transfer_info = se_tpu::DeserializeProto( - serialized_host_transfer_info); - StreamExecutor_Tpu_FreeSerializedProto(&serialized_host_transfer_info); - } - tpu_program_group->set_host_transfer_info(host_transfer_info); - - TpuSerializedProto serialized_hlo_metadata; - TpuProgram_GetHloMetadata(xla_tpu_programs[0], &serialized_hlo_metadata); - xla::HloProto hlo_metadata = - se_tpu::DeserializeProto(serialized_hlo_metadata); - tpu_program_group->set_hlo_metadata(hlo_metadata); - StreamExecutor_Tpu_FreeSerializedProto(&serialized_hlo_metadata); + TF_RETURN_IF_ERROR( + CreateTpuProgramGroup(xla_tpu_programs, tpu_program_group)); return Status::OK(); } @@ -238,5 +256,40 @@ Status TpuProgramGroup::LogCompilationStats(const TpuCompilationCacheKey& key, // implementation can be pushing into some external storage for analytics. return Status::OK(); } + +/*static*/ +Status TpuProgramGroup::CompileAndBuild( + const TpuCompilationRequestProto& compilation_request, + const XLA_TpuMeshState* mesh_state, + TpuProgramGroupInterface* tpu_program_group_interface) { + se_tpu::SerializedProto serialized_compilation_request = + se_tpu::SerializeProto(compilation_request); + auto cleanup = gtl::MakeCleanup([serialized_compilation_request] { + se_tpu::SerializedProto_Free(serialized_compilation_request); + }); + size_t count = 0; + XLA_TpuProgram** xla_tpu_programs = nullptr; + StatusHelper status; + CompileApiFn()->TpuCompile_CompileAndBuildFn(serialized_compilation_request, + mesh_state, &xla_tpu_programs, + &count, status.c_status); + // SPMD could return 1 result for all partitions. + TF_RET_CHECK(count == 1 || + count == compilation_request.metadata().num_cores_per_replica()); + if (!status.status().ok()) { + VLOG(1) << "Run CompileAndBuild failed."; + return status.status(); + } + + VLOG(1) << "CreateTpuProgramGroup"; + Status serialize_status = + CreateTpuProgramGroup(absl::MakeConstSpan(&xla_tpu_programs[0], count), + tpu_program_group_interface); + VLOG(1) << absl::StrCat("Run CreateTpuProgramGroup completed. StatusCode: ", + serialize_status.code()); + TpuProgram_FreeArray(xla_tpu_programs); + return serialize_status; +} + } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_program_group.h b/tensorflow/core/tpu/kernels/tpu_program_group.h index 19fbb7a21f0..4bc8cdd003a 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_group.h +++ b/tensorflow/core/tpu/kernels/tpu_program_group.h @@ -25,6 +25,8 @@ limitations under the License. #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/tpu/kernels/tpu_compile_op_support.h" #include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_mesh_state_c_api.h" +#include "tensorflow/core/tpu/kernels/tpu_mesh_state_interface.h" #include "tensorflow/core/tpu/kernels/tpu_program_c_api.h" #include "tensorflow/core/tpu/kernels/tpu_program_group_interface.h" #include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" @@ -84,6 +86,14 @@ class TpuProgramGroup : public TpuProgramGroupInterface { public: using Status = ::stream_executor::port::Status; + // Compiles Mlir or TF function computation by lowering into HLO IR and + // returns TPU programs ready for execution. + static Status CompileAndBuild( + const TpuCompilationRequestProto& compilation_request, + const XLA_TpuMeshState* mesh_state, + TpuProgramGroupInterface* tpu_program_group_interface); + + // Compiles HLO IR and returns TPU programs ready for execution. static Status Build( const TPUCompileMetadataProto& metadata, const tensorflow::XlaCompiler::CompilationResult& compilation_result, @@ -125,8 +135,11 @@ class TpuProgramGroup : public TpuProgramGroupInterface { const std::vector& tpu_programs() const { return tpu_programs_; } - void set_tpu_programs(std::vector tpu_programs) { - tpu_programs_ = tpu_programs; + void set_tpu_programs(absl::Span tpu_programs) { + tpu_programs_.resize(tpu_programs.size()); + for (size_t i = 0; i < tpu_programs.size(); ++i) { + tpu_programs_[i] = tpu_programs[i]; + } } const TPUExecutableInfoProto& executable_info() const { diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index 633afe610d1..2047085d121 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -32,7 +32,7 @@ tensorflow::Status SetCompileStructFn(void* library_handle) { auto* compile_fn = tensorflow::tpu::CompileApiFn(); TFTPU_SET_FN(compile_fn, TpuCompile_CompileAheadOfTime); - TFTPU_SET_FN(compile_fn, TpuCompile_BuildXLADeviceAssignment); + TFTPU_SET_FN(compile_fn, TpuCompile_CompileAndBuild); return tensorflow::Status::OK(); } From f95b8ddf6c7334f36a6586d85c4e25657cc240a8 Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Mon, 13 Jul 2020 16:47:34 -0700 Subject: [PATCH 0324/2522] Undo LSTM refactoring. PiperOrigin-RevId: 321060640 Change-Id: Ibd2e5aa7481869ead6ed8d00de1f51c487fa760b --- .../python/keras/layers/recurrent_v2.py | 144 ++++++++++++------ 1 file changed, 97 insertions(+), 47 deletions(-) diff --git a/tensorflow/python/keras/layers/recurrent_v2.py b/tensorflow/python/keras/layers/recurrent_v2.py index 58eb0bb025b..33babb54357 100644 --- a/tensorflow/python/keras/layers/recurrent_v2.py +++ b/tensorflow/python/keras/layers/recurrent_v2.py @@ -385,17 +385,6 @@ class GRU(recurrent.DropoutRNNCellMixin, recurrent.GRU): else: logging.warn(_CUDNN_NOT_AVAILABLE_MSG % self.name) - # The first two attributes are added to support TFLite use case. - supportive_attributes = { - 'time_major': time_major, - 'go_backwards': go_backwards, - _FUNCTION_API_NAME_ATTRIBUTE: 'gru_' + str(uuid.uuid4()) - } - self.defun_gru_with_backend_selection = function.defun_with_attributes( - gru_with_backend_selection, - attributes=supportive_attributes, - autograph=False) - def build(self, input_shape): super(GRU, self).build(input_shape) @@ -478,7 +467,7 @@ class GRU(recurrent.DropoutRNNCellMixin, recurrent.GRU): if dropout_mask is not None: inputs = inputs * dropout_mask[0] - gru_kwargs = { + gpu_gru_kwargs = { 'inputs': inputs, 'init_h': _read_variable_value(initial_state[0]), 'kernel': _read_variable_value(self.cell.kernel), @@ -487,11 +476,29 @@ class GRU(recurrent.DropoutRNNCellMixin, recurrent.GRU): 'mask': mask, 'time_major': self.time_major, 'go_backwards': self.go_backwards, - 'sequence_lengths': sequence_lengths, - 'zero_output_for_mask': self.zero_output_for_mask + 'sequence_lengths': sequence_lengths } - (last_output, outputs, new_h, - runtime) = self.defun_gru_with_backend_selection(**gru_kwargs) + normal_gru_kwargs = gpu_gru_kwargs.copy() + normal_gru_kwargs.update({ + 'zero_output_for_mask': self.zero_output_for_mask, + }) + + if context.executing_eagerly(): + device_type = _get_context_device_type() + can_use_gpu = ( + # Either user specified GPU or unspecified but GPU is available. + (device_type == _GPU_DEVICE_NAME + or (device_type is None and context.num_gpus() > 0)) + and + (mask is None or is_sequence_right_padded(mask, self.time_major))) + # Under eager context, check the device placement and prefer the + if can_use_gpu: + last_output, outputs, new_h, runtime = gpu_gru(**gpu_gru_kwargs) + else: + last_output, outputs, new_h, runtime = standard_gru(**normal_gru_kwargs) + else: + last_output, outputs, new_h, runtime = gru_with_backend_selection( + **normal_gru_kwargs) states = [new_h] return last_output, outputs, runtime, states @@ -758,14 +765,24 @@ def gru_with_backend_selection(inputs, init_h, kernel, recurrent_kernel, bias, true_fn=input_right_padded, false_fn=input_not_right_padded) - # Chooses the implementation dynamicly based on the running device. - (last_output, outputs, new_h, - runtime) = control_flow_ops.execute_fn_for_device( - { - _CPU_DEVICE_NAME: lambda: standard_gru(**params), - _GPU_DEVICE_NAME: lambda: gpu_gru_with_fallback(**params) - }, lambda: standard_gru(**params)) + # Each time a `tf.function` is called, we will give it a unique + # identifiable API name, so that Grappler won't get confused when it + # sees multiple GRU layers added into same graph, and it will be able + # to pair up the different implementations across them. + api_name = 'gru_' + str(uuid.uuid4()) + supportive_attribute = { + 'time_major': time_major, + 'go_backwards': go_backwards, + } + defun_standard_gru = _generate_defun_backend( + api_name, _CPU_DEVICE_NAME, standard_gru, supportive_attribute) + defun_gpu_gru = _generate_defun_backend( + api_name, _GPU_DEVICE_NAME, gpu_gru_with_fallback, supportive_attribute) + # Call the normal GRU impl and register the CuDNN impl function. The + # grappler will kick in during session execution to optimize the graph. + last_output, outputs, new_h, runtime = defun_standard_gru(**params) + function.register(defun_gpu_gru, **params) return last_output, outputs, new_h, runtime @@ -1080,18 +1097,6 @@ class LSTM(recurrent.DropoutRNNCellMixin, recurrent.LSTM): else: logging.warn(_CUDNN_NOT_AVAILABLE_MSG % self.name) - # The first two attributes are added to support TFLite use case. - supportive_attributes = { - 'time_major': time_major, - 'go_backwards': go_backwards, - _FUNCTION_API_NAME_ATTRIBUTE: 'lstm_' + str(uuid.uuid4()) - } - - self.defun_lstm_with_backend_selection = function.defun_with_attributes( - lstm_with_backend_selection, - attributes=supportive_attributes, - autograph=False) - def call(self, inputs, mask=None, training=None, initial_state=None): # The input should be dense, padded with zeros. If a ragged input is fed # into the layer, it is padded and the row lengths are used for masking. @@ -1140,7 +1145,7 @@ class LSTM(recurrent.DropoutRNNCellMixin, recurrent.LSTM): dropout_mask = self.get_dropout_mask_for_cell(inputs, training, count=4) if dropout_mask is not None: inputs = inputs * dropout_mask[0] - lstm_kwargs = { + gpu_lstm_kwargs = { 'inputs': inputs, 'init_h': _read_variable_value(initial_state[0]), 'init_c': _read_variable_value(initial_state[1]), @@ -1150,11 +1155,32 @@ class LSTM(recurrent.DropoutRNNCellMixin, recurrent.LSTM): 'mask': mask, 'time_major': self.time_major, 'go_backwards': self.go_backwards, - 'sequence_lengths': row_lengths, - 'zero_output_for_mask': self.zero_output_for_mask, + 'sequence_lengths': row_lengths } - (last_output, outputs, new_h, new_c, - runtime) = self.defun_lstm_with_backend_selection(**lstm_kwargs) + normal_lstm_kwargs = gpu_lstm_kwargs.copy() + normal_lstm_kwargs.update({ + 'zero_output_for_mask': self.zero_output_for_mask, + }) + + if context.executing_eagerly(): + device_type = _get_context_device_type() + can_use_gpu = ( + # Either user specified GPU or unspecified but GPU is available. + (device_type == _GPU_DEVICE_NAME + or (device_type is None and context.num_gpus() > 0)) + and + (mask is None or is_sequence_right_padded(mask, self.time_major))) + # Under eager context, check the device placement and prefer the + # GPU implementation when GPU is available. + if can_use_gpu: + last_output, outputs, new_h, new_c, runtime = gpu_lstm( + **gpu_lstm_kwargs) + else: + last_output, outputs, new_h, new_c, runtime = standard_lstm( + **normal_lstm_kwargs) + else: + (last_output, outputs, new_h, new_c, + runtime) = lstm_with_backend_selection(**normal_lstm_kwargs) states = [new_h, new_c] @@ -1512,13 +1538,25 @@ def lstm_with_backend_selection(inputs, init_h, init_c, kernel, true_fn=input_right_padded, false_fn=input_not_right_padded) - # Chooses the implementation dynamicly based on the running device. - (last_output, outputs, new_h, new_c, - runtime) = control_flow_ops.execute_fn_for_device( - { - _CPU_DEVICE_NAME: lambda: standard_lstm(**params), - _GPU_DEVICE_NAME: lambda: gpu_lstm_with_fallback(**params) - }, lambda: standard_lstm(**params)) + # Each time a `tf.function` is called, we will give it a unique + # identifiable API name, so that Grappler won't get confused when it + # sees multiple LSTM layers added into same graph, and it will be able + # to pair up the different implementations across them. + api_name = 'lstm_' + str(uuid.uuid4()) + supportive_attribute = { + 'time_major': time_major, + 'go_backwards': go_backwards, + } + defun_standard_lstm = _generate_defun_backend( + api_name, _CPU_DEVICE_NAME, standard_lstm, supportive_attribute) + defun_gpu_lstm = _generate_defun_backend( + api_name, _GPU_DEVICE_NAME, gpu_lstm_with_fallback, supportive_attribute) + + # Call the normal LSTM impl and register the CuDNN impl function. The + # grappler will kick in during session execution to optimize the graph. + last_output, outputs, new_h, new_c, runtime = defun_standard_lstm( + **params) + function.register(defun_gpu_lstm, **params) return last_output, outputs, new_h, new_c, runtime @@ -1581,6 +1619,18 @@ def calculate_sequence_by_mask(mask, time_major): axis=timestep_index) +def _generate_defun_backend(unique_api_name, preferred_device, func, + supportive_attributes): + function_attributes = { + _FUNCTION_API_NAME_ATTRIBUTE: unique_api_name, + _FUNCTION_DEVICE_ATTRIBUTE: preferred_device, + } + function_attributes.update(supportive_attributes) + return function.defun_with_attributes(func=func, + attributes=function_attributes, + autograph=False) + + def _get_context_device_type(): """Parse the current context and return the device type, eg CPU/GPU.""" current_device = context.context().device_name From 88ce07f6fcf12076242b2126f3d02388fe1f2174 Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Mon, 13 Jul 2020 16:51:03 -0700 Subject: [PATCH 0325/2522] Split saved model benchmark tests into individual test to avoid timeout. PiperOrigin-RevId: 321061292 Change-Id: Iff5c7da73aec5f963b6cfb38cd16e05b09e4d2f9 --- tensorflow/python/keras/benchmarks/BUILD | 25 ++--- .../applications_saved_model_test.py | 93 ---------------- .../benchmarks/saved_model_benchmarks/BUILD | 104 ++++++++++++++++++ .../densenet_benchmark_test.py | 43 ++++++++ .../efficientnet_benchmark_test.py | 43 ++++++++ .../inception_resnet_v2_benchmark_test.py | 44 ++++++++ .../mobilenet_benchmark_test.py | 43 ++++++++ .../nasnet_large_benchmark_test.py | 43 ++++++++ .../resnet152_v2_benchmark_test.py | 44 ++++++++ .../saved_model_benchmark_util.py | 70 ++++++++++++ .../vgg_benchmark_test.py | 44 ++++++++ .../xception_benchmark_test.py | 44 ++++++++ tensorflow/tools/pip_package/BUILD | 2 +- 13 files changed, 532 insertions(+), 110 deletions(-) delete mode 100644 tensorflow/python/keras/benchmarks/applications_saved_model_test.py create mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/BUILD create mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/densenet_benchmark_test.py create mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/efficientnet_benchmark_test.py create mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/inception_resnet_v2_benchmark_test.py create mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/mobilenet_benchmark_test.py create mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/nasnet_large_benchmark_test.py create mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/resnet152_v2_benchmark_test.py create mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/saved_model_benchmark_util.py create mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/vgg_benchmark_test.py create mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/xception_benchmark_test.py diff --git a/tensorflow/python/keras/benchmarks/BUILD b/tensorflow/python/keras/benchmarks/BUILD index 2386b01c426..5e569bc5ad2 100644 --- a/tensorflow/python/keras/benchmarks/BUILD +++ b/tensorflow/python/keras/benchmarks/BUILD @@ -23,6 +23,15 @@ exports_files(["LICENSE"]) # to the regular expression is executed. # e.g. --benchmarks=".*lstm*." will run all lstm layer related benchmarks. +# Add all benchmarks related utils here for pip testing dependencis. +py_library( + name = "keras_benchmark_lib_pip", + deps = [ + ":benchmark_util", + "//tensorflow/python/keras/benchmarks/saved_model_benchmarks:saved_model_benchmark_util", + ], +) + py_test( name = "keras_cpu_benchmark_test", size = "large", @@ -49,22 +58,6 @@ cuda_py_test( ], ) -cuda_py_test( - name = "applications_saved_model_test", - size = "medium", - srcs = ["applications_saved_model_test.py"], - shard_count = 8, - tags = [ - "no_oss_py38", # b/160170347 - "no_windows", # b/160269052 - ], - deps = [ - "//tensorflow/python:client_testlib", - "//tensorflow/python/keras/applications", - "@absl_py//absl/testing:parameterized", - ], -) - cuda_py_test( name = "model_components_benchmarks_test", srcs = ["model_components_benchmarks_test.py"], diff --git a/tensorflow/python/keras/benchmarks/applications_saved_model_test.py b/tensorflow/python/keras/benchmarks/applications_saved_model_test.py deleted file mode 100644 index 0111c8f13b9..00000000000 --- a/tensorflow/python/keras/benchmarks/applications_saved_model_test.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Benchmarks for Keras applications.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tempfile -import time - -import six - -from tensorflow.python.keras.applications import densenet -from tensorflow.python.keras.applications import efficientnet -from tensorflow.python.keras.applications import inception_resnet_v2 -from tensorflow.python.keras.applications import mobilenet_v2 -from tensorflow.python.keras.applications import nasnet -from tensorflow.python.keras.applications import resnet_v2 -from tensorflow.python.keras.applications import vgg19 -from tensorflow.python.keras.applications import xception -from tensorflow.python.keras.saving.saved_model import load as keras_load -from tensorflow.python.platform import benchmark -from tensorflow.python.platform import gfile -from tensorflow.python.platform import googletest -from tensorflow.python.platform import test - - -class BenchmarkSaveApplications( - six.with_metaclass(benchmark.ParameterizedBenchmark, test.Benchmark)): - - _benchmark_parameters = [ - ('ResNet152V2', resnet_v2.ResNet152V2, 2048), - ('VGG19', vgg19.VGG19, 512), - ('Xception', xception.Xception, 2048), - ('InceptionResNetV2', inception_resnet_v2.InceptionResNetV2, 1536), - ('MobileNetV2', mobilenet_v2.MobileNetV2, 1280), - ('DenseNet201', densenet.DenseNet201, 1920), - ('EfficientNetB7', efficientnet.EfficientNetB7, 2560), - ('NASNetLarge', nasnet.NASNetLarge, 4032), - ] - - def benchmark_save_and_load_applications(self, app, _): - trials = 3 - - model = app(weights=None) - model_name = app.__name__ - - tmp_dir = googletest.GetTempDir() - gfile.MakeDirs(tmp_dir) - save_dir = tempfile.mkdtemp(dir=tmp_dir) - - total_save_time = 0 - total_load_time = 0 - - # Run one untimed iteration of saving/loading. - model.save(save_dir, save_format='tf') - keras_load.load(save_dir) - - for _ in range(trials): - start_time = time.time() - model.save(save_dir, save_format='tf') - total_save_time += time.time() - start_time - - start_time = time.time() - keras_load.load(save_dir) - total_load_time += time.time() - start_time - self.report_benchmark( - iters=trials, - wall_time=total_save_time / trials, - name='{}.save'.format(model_name)) - - self.report_benchmark( - iters=1, - wall_time=total_load_time / trials, - name='{}.load'.format(model_name)) - gfile.DeleteRecursively(save_dir) - - -if __name__ == '__main__': - test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/BUILD b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/BUILD new file mode 100644 index 00000000000..147576849a9 --- /dev/null +++ b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/BUILD @@ -0,0 +1,104 @@ +# Description: +# Implementation of Keras benchmarks. + +load("//tensorflow:tensorflow.bzl", "cuda_py_test") + +package( + default_visibility = ["//visibility:public"], + licenses = ["notice"], # Apache 2.0 +) + +exports_files(["LICENSE"]) + +# To run CPU benchmarks: +# bazel run -c opt benchmarks_test -- --benchmarks=. + +# To run GPU benchmarks: +# bazel run --config=cuda -c opt --copt="-mavx" benchmarks_test -- \ +# --benchmarks=. + +# To run a subset of benchmarks using --benchmarks flag. +# --benchmarks: the list of benchmarks to run. The specified value is interpreted +# as a regular expression and any benchmark whose name contains a partial match +# to the regular expression is executed. +# e.g. --benchmarks=".*lstm*." will run all lstm layer related benchmarks. + +py_library( + name = "saved_model_benchmark_util", + srcs = ["saved_model_benchmark_util.py"], + deps = [ + "//tensorflow:tensorflow_py", + ], +) + +cuda_py_test( + name = "densenet_benchmark_test", + srcs = ["densenet_benchmark_test.py"], + deps = [ + ":saved_model_benchmark_util", + "//tensorflow:tensorflow_py", + ], +) + +cuda_py_test( + name = "efficientnet_benchmark_test", + srcs = ["efficientnet_benchmark_test.py"], + deps = [ + ":saved_model_benchmark_util", + "//tensorflow:tensorflow_py", + ], +) + +cuda_py_test( + name = "inception_resnet_v2_benchmark_test", + srcs = ["inception_resnet_v2_benchmark_test.py"], + deps = [ + ":saved_model_benchmark_util", + "//tensorflow:tensorflow_py", + ], +) + +cuda_py_test( + name = "mobilenet_benchmark_test", + srcs = ["mobilenet_benchmark_test.py"], + deps = [ + ":saved_model_benchmark_util", + "//tensorflow:tensorflow_py", + ], +) + +cuda_py_test( + name = "nasnet_large_benchmark_test", + srcs = ["nasnet_large_benchmark_test.py"], + deps = [ + ":saved_model_benchmark_util", + "//tensorflow:tensorflow_py", + ], +) + +cuda_py_test( + name = "resnet152_v2_benchmark_test", + srcs = ["resnet152_v2_benchmark_test.py"], + deps = [ + ":saved_model_benchmark_util", + "//tensorflow:tensorflow_py", + ], +) + +cuda_py_test( + name = "vgg_benchmark_test", + srcs = ["vgg_benchmark_test.py"], + deps = [ + ":saved_model_benchmark_util", + "//tensorflow:tensorflow_py", + ], +) + +cuda_py_test( + name = "xception_benchmark_test", + srcs = ["xception_benchmark_test.py"], + deps = [ + ":saved_model_benchmark_util", + "//tensorflow:tensorflow_py", + ], +) diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/densenet_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/densenet_benchmark_test.py new file mode 100644 index 00000000000..3b8e9d632f5 --- /dev/null +++ b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/densenet_benchmark_test.py @@ -0,0 +1,43 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks for saved model on DenseNet201.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util + + +class BenchmarkSaveApplications(tf.test.Benchmark): + + def benchmark_save_and_load_densenet_201(self): + app = tf.keras.applications.DenseNet201 + save_result, load_result = ( + saved_model_benchmark_util.save_and_load_benchmark(app)) + + self.report_benchmark( + iters=save_result['iters'], + wall_time=save_result['wall_time'], + name=save_result['name']) + + self.report_benchmark( + iters=load_result['iters'], + wall_time=load_result['wall_time'], + name=load_result['name']) + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/efficientnet_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/efficientnet_benchmark_test.py new file mode 100644 index 00000000000..27316e2997a --- /dev/null +++ b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/efficientnet_benchmark_test.py @@ -0,0 +1,43 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks for saved model on EfficientNetB7.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util + + +class BenchmarkSaveApplications(tf.test.Benchmark): + + def benchmark_save_and_load_efficient_net_b7(self): + app = tf.keras.applications.EfficientNetB7 + save_result, load_result = ( + saved_model_benchmark_util.save_and_load_benchmark(app)) + + self.report_benchmark( + iters=save_result['iters'], + wall_time=save_result['wall_time'], + name=save_result['name']) + + self.report_benchmark( + iters=load_result['iters'], + wall_time=load_result['wall_time'], + name=load_result['name']) + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/inception_resnet_v2_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/inception_resnet_v2_benchmark_test.py new file mode 100644 index 00000000000..d2d5090e878 --- /dev/null +++ b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/inception_resnet_v2_benchmark_test.py @@ -0,0 +1,44 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks for saved model on InceptionResNetV2.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util + + +class BenchmarkSaveApplications(tf.test.Benchmark): + + def benchmark_save_and_load_inception_resnet_v2(self): + app = tf.keras.applications.InceptionResNetV2 + save_result, load_result = ( + saved_model_benchmark_util.save_and_load_benchmark(app)) + + self.report_benchmark( + iters=save_result['iters'], + wall_time=save_result['wall_time'], + name=save_result['name']) + + self.report_benchmark( + iters=load_result['iters'], + wall_time=load_result['wall_time'], + name=load_result['name']) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/mobilenet_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/mobilenet_benchmark_test.py new file mode 100644 index 00000000000..0d6b61f141e --- /dev/null +++ b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/mobilenet_benchmark_test.py @@ -0,0 +1,43 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks for saved model on MobileNetV2.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util + + +class BenchmarkSaveApplications(tf.test.Benchmark): + + def benchmark_save_and_load_mobilenet_v2(self): + app = tf.keras.applications.MobileNetV2 + save_result, load_result = ( + saved_model_benchmark_util.save_and_load_benchmark(app)) + + self.report_benchmark( + iters=save_result['iters'], + wall_time=save_result['wall_time'], + name=save_result['name']) + + self.report_benchmark( + iters=load_result['iters'], + wall_time=load_result['wall_time'], + name=load_result['name']) + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/nasnet_large_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/nasnet_large_benchmark_test.py new file mode 100644 index 00000000000..864ce1930ee --- /dev/null +++ b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/nasnet_large_benchmark_test.py @@ -0,0 +1,43 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks for saved model on NASNetLarge.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util + + +class BenchmarkSaveApplications(tf.test.Benchmark): + + def benchmark_save_and_load_nasnet_large(self): + app = tf.keras.applications.NASNetLarge + save_result, load_result = ( + saved_model_benchmark_util.save_and_load_benchmark(app)) + + self.report_benchmark( + iters=save_result['iters'], + wall_time=save_result['wall_time'], + name=save_result['name']) + + self.report_benchmark( + iters=load_result['iters'], + wall_time=load_result['wall_time'], + name=load_result['name']) + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/resnet152_v2_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/resnet152_v2_benchmark_test.py new file mode 100644 index 00000000000..a0603eb5136 --- /dev/null +++ b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/resnet152_v2_benchmark_test.py @@ -0,0 +1,44 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks for saved model on ResNet152V2.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util + + +class BenchmarkSaveApplications(tf.test.Benchmark): + + def benchmark_save_and_load_resnet152_v2(self): + app = tf.keras.applications.ResNet152V2 + save_result, load_result = ( + saved_model_benchmark_util.save_and_load_benchmark(app)) + + self.report_benchmark( + iters=save_result['iters'], + wall_time=save_result['wall_time'], + name=save_result['name']) + + self.report_benchmark( + iters=load_result['iters'], + wall_time=load_result['wall_time'], + name=load_result['name']) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/saved_model_benchmark_util.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/saved_model_benchmark_util.py new file mode 100644 index 00000000000..a0760fa075c --- /dev/null +++ b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/saved_model_benchmark_util.py @@ -0,0 +1,70 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Utils for saved model benchmarks.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tempfile +import time + +import tensorflow as tf + +from tensorflow.python.platform import gfile +from tensorflow.python.platform import googletest + + +def save_and_load_benchmark(app): + """Util for saved model benchmarks.""" + trials = 3 + + model = app(weights=None) + model_name = app.__name__ + + tmp_dir = googletest.GetTempDir() + gfile.MakeDirs(tmp_dir) + save_dir = tempfile.mkdtemp(dir=tmp_dir) + + total_save_time = 0 + total_load_time = 0 + + # Run one untimed iteration of saving/loading. + model.save(save_dir, save_format='tf') + tf.keras.models.load_model(save_dir) + + for _ in range(trials): + start_time = time.time() + model.save(save_dir, save_format='tf') + total_save_time += time.time() - start_time + + start_time = time.time() + tf.keras.models.load_model(save_dir) + total_load_time += time.time() - start_time + + save_result = { + 'iters': trials, + 'wall_time': total_save_time / trials, + 'name': '{}.save'.format(model_name) + } + + load_result = { + 'iters': trials, + 'wall_time': total_load_time / trials, + 'name': '{}.load'.format(model_name) + } + gfile.DeleteRecursively(save_dir) + return save_result, load_result + diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/vgg_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/vgg_benchmark_test.py new file mode 100644 index 00000000000..3ceebe4fcc4 --- /dev/null +++ b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/vgg_benchmark_test.py @@ -0,0 +1,44 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks for saved model on VGG19.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util + + +class BenchmarkSaveApplications(tf.test.Benchmark): + + def benchmark_save_and_load_vgg19(self): + app = tf.keras.applications.VGG19 + save_result, load_result = ( + saved_model_benchmark_util.save_and_load_benchmark(app)) + + self.report_benchmark( + iters=save_result['iters'], + wall_time=save_result['wall_time'], + name=save_result['name']) + + self.report_benchmark( + iters=load_result['iters'], + wall_time=load_result['wall_time'], + name=load_result['name']) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/xception_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/xception_benchmark_test.py new file mode 100644 index 00000000000..ddab2f68ffd --- /dev/null +++ b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/xception_benchmark_test.py @@ -0,0 +1,44 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks for saved model on Xception.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util + + +class BenchmarkSaveApplications(tf.test.Benchmark): + + def benchmark_save_and_load_xception(self): + app = tf.keras.applications.Xception + save_result, load_result = ( + saved_model_benchmark_util.save_and_load_benchmark(app)) + + self.report_benchmark( + iters=save_result['iters'], + wall_time=save_result['wall_time'], + name=save_result['name']) + + self.report_benchmark( + iters=load_result['iters'], + wall_time=load_result['wall_time'], + name=load_result['name']) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 4e608360f8b..38ff12b100e 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -132,7 +132,7 @@ COMMON_PIP_DEPS = [ "//tensorflow/python/keras/mixed_precision/experimental:test_util", "//tensorflow/python/keras/tests:model_subclassing_test_util", "//tensorflow/python/keras/tests:model_architectures", - "//tensorflow/python/keras/benchmarks:benchmark_util", + "//tensorflow/python/keras/benchmarks:keras_benchmark_lib_pip", "//tensorflow/python/kernel_tests:cudnn_deterministic_base", "//tensorflow/python/kernel_tests:bias_op_base", "//tensorflow/python/kernel_tests/random:util", From a007c24de69f49d2a35491c942dac4c9c6c285bd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Jul 2020 16:57:38 -0700 Subject: [PATCH 0326/2522] Use explicitly brace-initialized int64 values rather than LL suffixes to protect against situations where int64 is not long long. PiperOrigin-RevId: 321062446 Change-Id: Id4e8a82f0ad7e3fa7cd7db3a61fddfaa81af9ca4 --- .../core/common_runtime/function_test.cc | 24 ++++----- .../core/framework/kernel_def_builder_test.cc | 8 +-- .../data/experimental/snapshot_dataset_op.cc | 4 +- .../convert/xplane_to_memory_profile_test.cc | 54 +++++++++---------- .../core/profiler/utils/group_events_test.cc | 2 +- .../profiler/utils/xplane_builder_test.cc | 4 +- 6 files changed, 49 insertions(+), 47 deletions(-) diff --git a/tensorflow/core/common_runtime/function_test.cc b/tensorflow/core/common_runtime/function_test.cc index 1deafe31ae2..2d53b6a9db3 100644 --- a/tensorflow/core/common_runtime/function_test.cc +++ b/tensorflow/core/common_runtime/function_test.cc @@ -830,10 +830,10 @@ TEST_F(FunctionLibraryRuntimeTest, ExpandInlineFunctions) { { Scope s = Scope::NewRootScope(); auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); - auto x4_x2_two = ops::Const(s.WithOpName("x4/x2/two"), 2LL); - auto x4_y_two = ops::Const(s.WithOpName("x4/y/two"), 2LL); - auto y_x2_two = ops::Const(s.WithOpName("y/x2/two"), 2LL); - auto y_y_two = ops::Const(s.WithOpName("y/y/two"), 2LL); + auto x4_x2_two = ops::Const(s.WithOpName("x4/x2/two"), int64{2}); + auto x4_y_two = ops::Const(s.WithOpName("x4/y/two"), int64{2}); + auto y_x2_two = ops::Const(s.WithOpName("y/x2/two"), int64{2}); + auto y_y_two = ops::Const(s.WithOpName("y/y/two"), int64{2}); auto x4_x2_scale = ops::Cast(s.WithOpName("x4/x2/scale"), x4_x2_two, DT_FLOAT); auto x4_y_scale = ops::Cast(s.WithOpName("x4/y/scale"), x4_y_two, DT_FLOAT); @@ -876,10 +876,10 @@ TEST_F(FunctionLibraryRuntimeTest, ExpandInlineFunctions) { { Scope s = Scope::NewRootScope(); auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); - auto x4_x2_two = ops::Const(s.WithOpName("x4/x2/two"), 2LL); - auto x4_y_two = ops::Const(s.WithOpName("x4/y/two"), 2LL); - auto y_x2_two = ops::Const(s.WithOpName("y/x2/two"), 2LL); - auto y_y_two = ops::Const(s.WithOpName("y/y/two"), 2LL); + auto x4_x2_two = ops::Const(s.WithOpName("x4/x2/two"), int64{2}); + auto x4_y_two = ops::Const(s.WithOpName("x4/y/two"), int64{2}); + auto y_x2_two = ops::Const(s.WithOpName("y/x2/two"), int64{2}); + auto y_y_two = ops::Const(s.WithOpName("y/y/two"), int64{2}); auto x4_x2_scale = ops::Cast(s.WithOpName("x4/x2/scale"), x4_x2_two, DT_FLOAT); auto x4_y_scale = ops::Cast(s.WithOpName("x4/y/scale"), x4_y_two, DT_FLOAT); @@ -957,7 +957,7 @@ TEST_F(FunctionLibraryRuntimeTest, ExpandInlineFunctionsWithInputControlEdges) { s.WithOpName("Func/b/x2/input/_4").WithControlDependencies({func3}), func1); auto b_x2_two = ops::Const( - s.WithOpName("b/x2/two").WithControlDependencies({func3}), 2LL); + s.WithOpName("b/x2/two").WithControlDependencies({func3}), int64{2}); auto b_x2_scale = ops::Cast(s.WithOpName("b/x2/scale"), b_x2_two, DT_FLOAT); auto b_x2_y = ops::Mul(s.WithOpName("b/x2/y"), func4, b_x2_scale); auto func5 = ops::Identity(s.WithOpName("Func/b/x2/output/_5"), b_x2_y); @@ -968,7 +968,7 @@ TEST_F(FunctionLibraryRuntimeTest, ExpandInlineFunctionsWithInputControlEdges) { s.WithOpName("Func/b/y/input/_7").WithControlDependencies({func6}), func5); auto b_y_two = ops::Const( - s.WithOpName("b/y/two").WithControlDependencies({func6}), 2LL); + s.WithOpName("b/y/two").WithControlDependencies({func6}), int64{2}); auto b_y_scale = ops::Cast(s.WithOpName("b/y/scale"), b_y_two, DT_FLOAT); auto b_y_y = ops::Mul(s.WithOpName("b/y/y"), func7, b_y_scale); auto func8 = ops::Identity(s.WithOpName("Func/b/y/output/_8"), b_y_y); @@ -1589,7 +1589,7 @@ TEST_F(FunctionLibraryRuntimeTest, Gradient_XTimesTwo) { { Scope s = Scope::NewRootScope(); auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); - auto two = ops::Const(s.WithOpName("two"), 2LL); + auto two = ops::Const(s.WithOpName("two"), int64{2}); auto scale = ops::Cast(s.WithOpName("scale"), two, DT_FLOAT); auto y = ops::Mul(s.WithOpName("y"), x, scale); auto ret = ops::_Retval(s.WithOpName("y_RetVal"), y, 0); @@ -1607,7 +1607,7 @@ TEST_F(FunctionLibraryRuntimeTest, Gradient_XTimesTwo) { Scope s = Scope::NewRootScope(); auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); auto func0 = ops::_Arg(s.WithOpName("Func/_0"), DT_FLOAT, 1); - auto two = ops::Const(s.WithOpName("two"), 2LL); + auto two = ops::Const(s.WithOpName("two"), int64{2}); auto scale = ops::Cast(s.WithOpName("scale"), two, DT_FLOAT); auto y = ops::Mul(s.WithOpName("y"), x, scale); NameAttrList fn0; diff --git a/tensorflow/core/framework/kernel_def_builder_test.cc b/tensorflow/core/framework/kernel_def_builder_test.cc index 30bfd939ac3..48669c59641 100644 --- a/tensorflow/core/framework/kernel_def_builder_test.cc +++ b/tensorflow/core/framework/kernel_def_builder_test.cc @@ -74,8 +74,10 @@ TEST(KernelDefBuilderTest, TypeConstraint) { } TEST(KernelDefBuilderTest, Int64Constraint) { - const KernelDef* def = - KernelDefBuilder("B").Device(DEVICE_GPU).AttrConstraint("T", 5ll).Build(); + const KernelDef* def = KernelDefBuilder("B") + .Device(DEVICE_GPU) + .AttrConstraint("T", int64{5}) + .Build(); KernelDef expected; protobuf::TextFormat::ParseFromString(R"proto( op: 'B' @@ -91,7 +93,7 @@ TEST(KernelDefBuilderTest, Int64Constraint) { def = KernelDefBuilder("C") .Device(DEVICE_GPU) - .AttrConstraint("U", gtl::ArraySlice{5ll, 17ll}) + .AttrConstraint("U", gtl::ArraySlice{int64{5}, int64{17}}) .AttrConstraint("V", string("proto")) .Build(); diff --git a/tensorflow/core/kernels/data/experimental/snapshot_dataset_op.cc b/tensorflow/core/kernels/data/experimental/snapshot_dataset_op.cc index c029f2a50b8..ec6cf02e02e 100644 --- a/tensorflow/core/kernels/data/experimental/snapshot_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/snapshot_dataset_op.cc @@ -596,8 +596,8 @@ Status SnapshotDatasetV2Op::Dataset::Iterator::Writer::WriteMetadataFile( experimental::SnapshotMetadataRecord metadata; metadata.set_creation_timestamp(EnvTime::NowMicros()); - metadata.set_graph_hash(strings::Printf("%llu", dataset()->hash_)); - metadata.set_run_id(strings::Printf("%llu", run_id_)); + metadata.set_graph_hash(strings::StrCat(dataset()->hash_)); + metadata.set_run_id(strings::StrCat(run_id_)); metadata.set_version(kFileFormatVersion); for (const auto& output_dtype : dataset()->output_dtypes()) { metadata.add_dtype(output_dtype); diff --git a/tensorflow/core/profiler/convert/xplane_to_memory_profile_test.cc b/tensorflow/core/profiler/convert/xplane_to_memory_profile_test.cc index c334318dcfe..7758b215753 100644 --- a/tensorflow/core/profiler/convert/xplane_to_memory_profile_test.cc +++ b/tensorflow/core/profiler/convert/xplane_to_memory_profile_test.cc @@ -40,15 +40,15 @@ TEST(ConvertXPlaneToMemoryProfile, OneAllocatorMultiActivitiesTest) { auto tf_executor_thread = host_plane_builder.GetOrCreateLine(0); CreateXEvent(&host_plane_builder, &tf_executor_thread, "MemoryAllocation", 40000, 1000, - {{StatType::kBytesReserved, 2000LL}, - {StatType::kBytesAllocated, 3000LL}, - {StatType::kBytesAvailable, 5000LL}, - {StatType::kPeakBytesInUse, 8500LL}, - {StatType::kRequestedBytes, 200LL}, - {StatType::kAllocationBytes, 256LL}, - {StatType::kAddress, 222333LL}, - {StatType::kStepId, -93746LL}, - {StatType::kDataType, 1LL}, + {{StatType::kBytesReserved, int64{2000}}, + {StatType::kBytesAllocated, int64{3000}}, + {StatType::kBytesAvailable, int64{5000}}, + {StatType::kPeakBytesInUse, int64{8500}}, + {StatType::kRequestedBytes, int64{200}}, + {StatType::kAllocationBytes, int64{256}}, + {StatType::kAddress, int64{222333}}, + {StatType::kStepId, int64{-93746}}, + {StatType::kDataType, int64{1}}, {StatType::kAllocatorName, "GPU_0_bfc"}, {StatType::kTfOp, "foo/bar"}, {StatType::kRegionType, "output"}, @@ -56,30 +56,30 @@ TEST(ConvertXPlaneToMemoryProfile, OneAllocatorMultiActivitiesTest) { CreateXEvent(&host_plane_builder, &tf_executor_thread, "MemoryDeallocation", 50000, 1000, - {{StatType::kBytesReserved, 2000LL}, - {StatType::kBytesAllocated, 2744LL}, - {StatType::kBytesAvailable, 5256LL}, - {StatType::kPeakBytesInUse, 8500LL}, - {StatType::kRequestedBytes, 200LL}, - {StatType::kAllocationBytes, 256LL}, - {StatType::kAddress, 222333LL}, - {StatType::kStepId, 0LL}, - {StatType::kDataType, 0LL}, + {{StatType::kBytesReserved, int64{2000}}, + {StatType::kBytesAllocated, int64{2744}}, + {StatType::kBytesAvailable, int64{5256}}, + {StatType::kPeakBytesInUse, int64{8500}}, + {StatType::kRequestedBytes, int64{200}}, + {StatType::kAllocationBytes, int64{256}}, + {StatType::kAddress, int64{222333}}, + {StatType::kStepId, int64{0}}, + {StatType::kDataType, int64{0}}, {StatType::kAllocatorName, "GPU_0_bfc"}, {StatType::kRegionType, ""}, {StatType::kTensorShapes, ""}}); CreateXEvent(&host_plane_builder, &tf_executor_thread, "MemoryAllocation", 70000, 1000, - {{StatType::kBytesReserved, 2000LL}, - {StatType::kBytesAllocated, 5000LL}, - {StatType::kBytesAvailable, 3000LL}, - {StatType::kPeakBytesInUse, 9500LL}, - {StatType::kRequestedBytes, 300LL}, - {StatType::kAllocationBytes, 300LL}, - {StatType::kAddress, 345678LL}, - {StatType::kStepId, -93746LL}, - {StatType::kDataType, 9LL}, + {{StatType::kBytesReserved, int64{2000}}, + {StatType::kBytesAllocated, int64{5000}}, + {StatType::kBytesAvailable, int64{3000}}, + {StatType::kPeakBytesInUse, int64{9500}}, + {StatType::kRequestedBytes, int64{300}}, + {StatType::kAllocationBytes, int64{300}}, + {StatType::kAddress, int64{345678}}, + {StatType::kStepId, int64{-93746}}, + {StatType::kDataType, int64{9}}, {StatType::kAllocatorName, "GPU_0_bfc"}, {StatType::kTfOp, "mul_grad/Sum"}, {StatType::kRegionType, "temp"}, diff --git a/tensorflow/core/profiler/utils/group_events_test.cc b/tensorflow/core/profiler/utils/group_events_test.cc index 77dfb6cb7f0..195f2adb9c4 100644 --- a/tensorflow/core/profiler/utils/group_events_test.cc +++ b/tensorflow/core/profiler/utils/group_events_test.cc @@ -83,7 +83,7 @@ TEST(GroupEventsTest, GroupGpuTraceTest) { auto main_thread = host_plane_builder.GetOrCreateLine(0); CreateXEvent(&host_plane_builder, &main_thread, "train", 0, 100, - {{StatType::kStepNum, kStepNum}, {StatType::kIsRoot, 1LL}}); + {{StatType::kStepNum, kStepNum}, {StatType::kIsRoot, int64{1}}}); CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kFunctionRun, 10, 90, {{StatType::kStepId, kStepId}}); diff --git a/tensorflow/core/profiler/utils/xplane_builder_test.cc b/tensorflow/core/profiler/utils/xplane_builder_test.cc index e55e01d8233..044a86fe909 100644 --- a/tensorflow/core/profiler/utils/xplane_builder_test.cc +++ b/tensorflow/core/profiler/utils/xplane_builder_test.cc @@ -32,7 +32,7 @@ TEST(TimespanTests, NonInstantSpanIncludesSingleTimeTests) { XEventBuilder event_builder = xline_builder.AddEvent( *xplane_builder.GetOrCreateEventMetadata("1st event")); event_builder.AddStatValue( - *xplane_builder.GetOrCreateStatMetadata("int stat"), 1234LL); + *xplane_builder.GetOrCreateStatMetadata("int stat"), int64{1234}); event_builder.AddStatValue( *xplane_builder.GetOrCreateStatMetadata("string stat"), std::string("abc")); @@ -50,7 +50,7 @@ TEST(TimespanTests, NonInstantSpanIncludesSingleTimeTests) { EXPECT_EQ(xevent.Name(), "1st event"); xevent.ForEachStat([&](const XStatVisitor& stat) { if (stat.Name() == "int stat") { - EXPECT_EQ(stat.IntValue(), 1234LL); + EXPECT_EQ(stat.IntValue(), int64{1234}); num_stats++; } else if (stat.Name() == "string stat") { EXPECT_EQ(stat.StrOrRefValue(), "abc"); From 7b71c07139b7dcfc9f33e1da5eaeca6258ffa9d1 Mon Sep 17 00:00:00 2001 From: Ken Franko Date: Mon, 13 Jul 2020 17:01:11 -0700 Subject: [PATCH 0327/2522] Remove deprecated experimental_run_v2 method. PiperOrigin-RevId: 321062994 Change-Id: I749202fd68e7c63832ce946f88748aba98364397 --- RELEASE.md | 1 + tensorflow/python/distribute/distribute_lib.py | 6 ------ .../v1/tensorflow.distribute.-mirrored-strategy.pbtxt | 4 ---- .../v1/tensorflow.distribute.-one-device-strategy.pbtxt | 4 ---- .../api/golden/v1/tensorflow.distribute.-strategy.pbtxt | 4 ---- ....distribute.experimental.-central-storage-strategy.pbtxt | 4 ---- ...ibute.experimental.-multi-worker-mirrored-strategy.pbtxt | 4 ---- ...distribute.experimental.-parameter-server-strategy.pbtxt | 4 ---- ...tensorflow.distribute.experimental.-t-p-u-strategy.pbtxt | 4 ---- .../v2/tensorflow.distribute.-mirrored-strategy.pbtxt | 4 ---- .../v2/tensorflow.distribute.-one-device-strategy.pbtxt | 4 ---- .../api/golden/v2/tensorflow.distribute.-strategy.pbtxt | 4 ---- .../golden/v2/tensorflow.distribute.-t-p-u-strategy.pbtxt | 4 ---- ....distribute.experimental.-central-storage-strategy.pbtxt | 4 ---- ...ibute.experimental.-multi-worker-mirrored-strategy.pbtxt | 4 ---- ...distribute.experimental.-parameter-server-strategy.pbtxt | 4 ---- ...tensorflow.distribute.experimental.-t-p-u-strategy.pbtxt | 4 ---- 17 files changed, 1 insertion(+), 66 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 449014405eb..150c7077349 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -11,6 +11,7 @@ * C-API functions `TF_StringDecode`, `TF_StringEncode`, and `TF_StringEncodedSize` are no longer relevant and have been removed; see core/platform/ctstring.h for string access/modification in C. +* Removed `tf.distribute.Strategy.experimental_run_v2` method, which was deprecated in TF 2.2. ## Known Caveats diff --git a/tensorflow/python/distribute/distribute_lib.py b/tensorflow/python/distribute/distribute_lib.py index 6dc3c93a51d..c659ae6205c 100644 --- a/tensorflow/python/distribute/distribute_lib.py +++ b/tensorflow/python/distribute/distribute_lib.py @@ -1221,12 +1221,6 @@ class StrategyBase(object): fn, autograph_ctx.control_status_ctx(), convert_by_default=False) return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs) - # TODO(b/151224785): Remove deprecated alias. - @doc_controls.do_not_doc_inheritable # DEPRECATED - @deprecation.deprecated(None, "renamed to `run`") - def experimental_run_v2(self, fn, args=(), kwargs=None, options=None): - return self.run(fn, args=args, kwargs=kwargs, options=options) - def reduce(self, reduce_op, value, axis): """Reduce `value` across replicas and return result on current device. diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.-mirrored-strategy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-mirrored-strategy.pbtxt index 0c5db602029..85dd7f5eaa6 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.distribute.-mirrored-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-mirrored-strategy.pbtxt @@ -48,10 +48,6 @@ tf_class { name: "experimental_run" argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "experimental_run_v2" - argspec: "args=[\'self\', \'fn\', \'args\', \'kwargs\', \'options\'], varargs=None, keywords=None, defaults=[\'()\', \'None\', \'None\'], " - } member_method { name: "group" argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.-one-device-strategy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-one-device-strategy.pbtxt index ae62acffa44..23e03ceab02 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.distribute.-one-device-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-one-device-strategy.pbtxt @@ -48,10 +48,6 @@ tf_class { name: "experimental_run" argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "experimental_run_v2" - argspec: "args=[\'self\', \'fn\', \'args\', \'kwargs\', \'options\'], varargs=None, keywords=None, defaults=[\'()\', \'None\', \'None\'], " - } member_method { name: "group" argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.-strategy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-strategy.pbtxt index 9285405ea4f..7fbd9dded22 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.distribute.-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-strategy.pbtxt @@ -47,10 +47,6 @@ tf_class { name: "experimental_run" argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "experimental_run_v2" - argspec: "args=[\'self\', \'fn\', \'args\', \'kwargs\', \'options\'], varargs=None, keywords=None, defaults=[\'()\', \'None\', \'None\'], " - } member_method { name: "group" argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.experimental.-central-storage-strategy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.experimental.-central-storage-strategy.pbtxt index 3c3d785ac7c..2f7ba2db15c 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.distribute.experimental.-central-storage-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.experimental.-central-storage-strategy.pbtxt @@ -48,10 +48,6 @@ tf_class { name: "experimental_run" argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "experimental_run_v2" - argspec: "args=[\'self\', \'fn\', \'args\', \'kwargs\', \'options\'], varargs=None, keywords=None, defaults=[\'()\', \'None\', \'None\'], " - } member_method { name: "group" argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.experimental.-multi-worker-mirrored-strategy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.experimental.-multi-worker-mirrored-strategy.pbtxt index e1f8bea251b..dac5652c7fd 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.distribute.experimental.-multi-worker-mirrored-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.experimental.-multi-worker-mirrored-strategy.pbtxt @@ -48,10 +48,6 @@ tf_class { name: "experimental_run" argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "experimental_run_v2" - argspec: "args=[\'self\', \'fn\', \'args\', \'kwargs\', \'options\'], varargs=None, keywords=None, defaults=[\'()\', \'None\', \'None\'], " - } member_method { name: "group" argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.experimental.-parameter-server-strategy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.experimental.-parameter-server-strategy.pbtxt index 6ae83d18589..f63c16dec5a 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.distribute.experimental.-parameter-server-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.experimental.-parameter-server-strategy.pbtxt @@ -48,10 +48,6 @@ tf_class { name: "experimental_run" argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "experimental_run_v2" - argspec: "args=[\'self\', \'fn\', \'args\', \'kwargs\', \'options\'], varargs=None, keywords=None, defaults=[\'()\', \'None\', \'None\'], " - } member_method { name: "group" argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.experimental.-t-p-u-strategy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.experimental.-t-p-u-strategy.pbtxt index 0e548eca9b5..53d5b756568 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.distribute.experimental.-t-p-u-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.experimental.-t-p-u-strategy.pbtxt @@ -52,10 +52,6 @@ tf_class { name: "experimental_run" argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "experimental_run_v2" - argspec: "args=[\'self\', \'fn\', \'args\', \'kwargs\', \'options\'], varargs=None, keywords=None, defaults=[\'()\', \'None\', \'None\'], " - } member_method { name: "group" argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-mirrored-strategy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-mirrored-strategy.pbtxt index 8817f16d808..148c8c9d71f 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-mirrored-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-mirrored-strategy.pbtxt @@ -60,10 +60,6 @@ tf_class { name: "experimental_run" argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "experimental_run_v2" - argspec: "args=[\'self\', \'fn\', \'args\', \'kwargs\', \'options\'], varargs=None, keywords=None, defaults=[\'()\', \'None\', \'None\'], " - } member_method { name: "experimental_split_to_logical_devices" argspec: "args=[\'self\', \'tensor\', \'partition_dimensions\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-one-device-strategy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-one-device-strategy.pbtxt index b6604408536..51e0d889489 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-one-device-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-one-device-strategy.pbtxt @@ -60,10 +60,6 @@ tf_class { name: "experimental_run" argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "experimental_run_v2" - argspec: "args=[\'self\', \'fn\', \'args\', \'kwargs\', \'options\'], varargs=None, keywords=None, defaults=[\'()\', \'None\', \'None\'], " - } member_method { name: "experimental_split_to_logical_devices" argspec: "args=[\'self\', \'tensor\', \'partition_dimensions\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-strategy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-strategy.pbtxt index 8140088e701..dbd329d6874 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-strategy.pbtxt @@ -59,10 +59,6 @@ tf_class { name: "experimental_run" argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "experimental_run_v2" - argspec: "args=[\'self\', \'fn\', \'args\', \'kwargs\', \'options\'], varargs=None, keywords=None, defaults=[\'()\', \'None\', \'None\'], " - } member_method { name: "experimental_split_to_logical_devices" argspec: "args=[\'self\', \'tensor\', \'partition_dimensions\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-t-p-u-strategy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-t-p-u-strategy.pbtxt index 29947a1c9c5..505c77be2e2 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-t-p-u-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-t-p-u-strategy.pbtxt @@ -60,10 +60,6 @@ tf_class { name: "experimental_run" argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "experimental_run_v2" - argspec: "args=[\'self\', \'fn\', \'args\', \'kwargs\', \'options\'], varargs=None, keywords=None, defaults=[\'()\', \'None\', \'None\'], " - } member_method { name: "experimental_split_to_logical_devices" argspec: "args=[\'self\', \'tensor\', \'partition_dimensions\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-central-storage-strategy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-central-storage-strategy.pbtxt index ab030edd731..963ad04f6ab 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-central-storage-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-central-storage-strategy.pbtxt @@ -60,10 +60,6 @@ tf_class { name: "experimental_run" argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "experimental_run_v2" - argspec: "args=[\'self\', \'fn\', \'args\', \'kwargs\', \'options\'], varargs=None, keywords=None, defaults=[\'()\', \'None\', \'None\'], " - } member_method { name: "experimental_split_to_logical_devices" argspec: "args=[\'self\', \'tensor\', \'partition_dimensions\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-multi-worker-mirrored-strategy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-multi-worker-mirrored-strategy.pbtxt index 43632e17b6d..5a44eaf20b5 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-multi-worker-mirrored-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-multi-worker-mirrored-strategy.pbtxt @@ -60,10 +60,6 @@ tf_class { name: "experimental_run" argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "experimental_run_v2" - argspec: "args=[\'self\', \'fn\', \'args\', \'kwargs\', \'options\'], varargs=None, keywords=None, defaults=[\'()\', \'None\', \'None\'], " - } member_method { name: "experimental_split_to_logical_devices" argspec: "args=[\'self\', \'tensor\', \'partition_dimensions\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-parameter-server-strategy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-parameter-server-strategy.pbtxt index f792094bfdb..58bd5497817 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-parameter-server-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-parameter-server-strategy.pbtxt @@ -60,10 +60,6 @@ tf_class { name: "experimental_run" argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "experimental_run_v2" - argspec: "args=[\'self\', \'fn\', \'args\', \'kwargs\', \'options\'], varargs=None, keywords=None, defaults=[\'()\', \'None\', \'None\'], " - } member_method { name: "experimental_split_to_logical_devices" argspec: "args=[\'self\', \'tensor\', \'partition_dimensions\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-t-p-u-strategy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-t-p-u-strategy.pbtxt index 855cdbfb175..4bcd2277411 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-t-p-u-strategy.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.experimental.-t-p-u-strategy.pbtxt @@ -60,10 +60,6 @@ tf_class { name: "experimental_run" argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "experimental_run_v2" - argspec: "args=[\'self\', \'fn\', \'args\', \'kwargs\', \'options\'], varargs=None, keywords=None, defaults=[\'()\', \'None\', \'None\'], " - } member_method { name: "experimental_split_to_logical_devices" argspec: "args=[\'self\', \'tensor\', \'partition_dimensions\'], varargs=None, keywords=None, defaults=None" From a389e77b52587cdb80c56d589f24f07cd538b939 Mon Sep 17 00:00:00 2001 From: Chuanhao Zhuge Date: Mon, 13 Jul 2020 17:04:39 -0700 Subject: [PATCH 0328/2522] [TFRT:Servo] Fix Reference Variable handling in MLIR pass for Servo use cases. PiperOrigin-RevId: 321063686 Change-Id: Icef026825123a8e090ba59c50a095138b7ebf012 --- .../tests/readonly_references_to_resources.mlir | 12 ++++++++++++ .../transforms/readonly_references_to_resources.cc | 11 +++++++++-- .../mlir/tensorflow/translate/import_model.cc | 3 +-- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/readonly_references_to_resources.mlir b/tensorflow/compiler/mlir/tensorflow/tests/readonly_references_to_resources.mlir index 2b8f47a407e..7d36e6f4319 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/readonly_references_to_resources.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/readonly_references_to_resources.mlir @@ -12,6 +12,18 @@ func @f() { // ----- +// Test case: Basic converting. '_class' attribute is at IdentityOp. + +func @f() { + // CHECK: "tf.VarHandleOp" + // CHECK: "tf.ReadVariableOp" + %val0 = "tf.VariableV2"() {container = "", device = "", shape = #tf.shape<96>, shared_name = ""} : () -> tensor<96x!tf.f32ref> + %val1 = "tf.Identity"(%val0) {_class = ["loc:@v"]} : (tensor<96x!tf.f32ref>) -> tensor<96xf32> + return +} + +// ----- + // Test case: Two ReadVariable ops. func @f() { diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/readonly_references_to_resources.cc b/tensorflow/compiler/mlir/tensorflow/transforms/readonly_references_to_resources.cc index 5fc35361bca..f8bbb8994f4 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/readonly_references_to_resources.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/readonly_references_to_resources.cc @@ -65,8 +65,15 @@ class ConvertReadonlyReferenceVariablesToResourceVariablesPass StringRef GetNodeNameFromClassAttr(Operation *op) { ArrayAttr classes_attr = op->getAttrOfType(kClassAttr); if (!classes_attr) { - op->emitOpError() << "has no '_class' attribute"; - return StringRef(); + // Attampt to parse "_class" from the IdentityOp that follows VariableV2. + // For read-only reference variables, IdentityOp should be the only user of + // VariableV2. + auto identity_op = op->getUsers().begin(); + classes_attr = identity_op->getAttrOfType(kClassAttr); + if (!classes_attr) { + op->emitOpError() << "has no '_class' attribute"; + return StringRef(); + } } StringRef result; diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc index c7d5339f93c..a12378b66ba 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc @@ -3591,8 +3591,7 @@ Status SavedModelSignatureDefImporter::LiftVariables() { pm.addPass( mlir::tf_saved_model::CreateLiftVariablesPass(bundle_.GetSession())); if (mlir::failed(pm.run(*module_))) - return diag_handler.Combine( - errors::Internal("failed to lifting variables.")); + return diag_handler.Combine(errors::Internal("Failed to lift variables.")); return Status::OK(); } From 9e5c6eec900d23f6bf1b4aae6c239c4d7d8456d0 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Mon, 13 Jul 2020 17:16:29 -0700 Subject: [PATCH 0329/2522] Make server_lib_test explicitly enter a graph rather than using the run_v1_only decorator PiperOrigin-RevId: 321065548 Change-Id: Ie7ab9e60b2b11c2cad82d71af6f6fca5765e7d8a --- tensorflow/python/training/server_lib_test.py | 239 +++++++++--------- 1 file changed, 117 insertions(+), 122 deletions(-) diff --git a/tensorflow/python/training/server_lib_test.py b/tensorflow/python/training/server_lib_test.py index dc2adb7dee8..54ede81c9ea 100644 --- a/tensorflow/python/training/server_lib_test.py +++ b/tensorflow/python/training/server_lib_test.py @@ -29,7 +29,6 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops -from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import math_ops @@ -48,63 +47,62 @@ class GrpcServerTest(test.TestCase): def testRunStep(self): server = self._cached_server + with ops.Graph().as_default(): + with session.Session(server.target) as sess: + c = constant_op.constant([[2, 1]]) + d = constant_op.constant([[1], [2]]) + e = math_ops.matmul(c, d) + self.assertAllEqual([[4]], sess.run(e)) + # TODO(mrry): Add `server.stop()` and `server.join()` when these work. - with session.Session(server.target) as sess: + def testMultipleSessions(self): + server = self._cached_server + with ops.Graph().as_default(): c = constant_op.constant([[2, 1]]) d = constant_op.constant([[1], [2]]) e = math_ops.matmul(c, d) - self.assertAllEqual([[4]], sess.run(e)) - # TODO(mrry): Add `server.stop()` and `server.join()` when these work. - @test_util.run_v1_only("b/120545219") - def testMultipleSessions(self): - server = self._cached_server + sess_1 = session.Session(server.target) + sess_2 = session.Session(server.target) - c = constant_op.constant([[2, 1]]) - d = constant_op.constant([[1], [2]]) - e = math_ops.matmul(c, d) + self.assertAllEqual([[4]], sess_1.run(e)) + self.assertAllEqual([[4]], sess_2.run(e)) - sess_1 = session.Session(server.target) - sess_2 = session.Session(server.target) - - self.assertAllEqual([[4]], sess_1.run(e)) - self.assertAllEqual([[4]], sess_2.run(e)) - - sess_1.close() - sess_2.close() - # TODO(mrry): Add `server.stop()` and `server.join()` when these work. + sess_1.close() + sess_2.close() + # TODO(mrry): Add `server.stop()` and `server.join()` when these work. # Verifies various reset failures. - @test_util.run_v1_only("b/120545219") def testResetFails(self): - # Creates variable with container name. - with ops.container("test0"): - v0 = variables.VariableV1(1.0, name="v0") - # Creates variable with default container. - v1 = variables.VariableV1(2.0, name="v1") - # Verifies resetting the non-existent target returns error. - with self.assertRaises(errors_impl.NotFoundError): - session.Session.reset("nonexistent", ["test0"]) + with ops.Graph().as_default(): + # Creates variable with container name. + with ops.container("test0"): + v0 = variables.VariableV1(1.0, name="v0") + # Creates variable with default container. + v1 = variables.VariableV1(2.0, name="v1") + # Verifies resetting the non-existent target returns error. + with self.assertRaises(errors_impl.NotFoundError): + session.Session.reset("nonexistent", ["test0"]) - # Verifies resetting with config. - # Verifies that resetting target with no server times out. - with self.assertRaises(errors_impl.DeadlineExceededError): - session.Session.reset( - "grpc://localhost:0", ["test0"], - config=config_pb2.ConfigProto(operation_timeout_in_ms=5)) + # Verifies resetting with config. + # Verifies that resetting target with no server times out. + with self.assertRaises(errors_impl.DeadlineExceededError): + session.Session.reset( + "grpc://localhost:0", ["test0"], + config=config_pb2.ConfigProto(operation_timeout_in_ms=5)) - # Verifies no containers are reset with non-existent container. - server = self._cached_server - sess = session.Session(server.target) - sess.run(variables.global_variables_initializer()) - self.assertAllEqual(1.0, sess.run(v0)) - self.assertAllEqual(2.0, sess.run(v1)) - # No container is reset, but the server is reset. - session.Session.reset(server.target, ["test1"]) - # Verifies that both variables are still valid. - sess = session.Session(server.target) - self.assertAllEqual(1.0, sess.run(v0)) - self.assertAllEqual(2.0, sess.run(v1)) + # Verifies no containers are reset with non-existent container. + server = self._cached_server + sess = session.Session(server.target) + sess.run(variables.global_variables_initializer()) + self.assertAllEqual(1.0, sess.run(v0)) + self.assertAllEqual(2.0, sess.run(v1)) + # No container is reset, but the server is reset. + session.Session.reset(server.target, ["test1"]) + # Verifies that both variables are still valid. + sess = session.Session(server.target) + self.assertAllEqual(1.0, sess.run(v0)) + self.assertAllEqual(2.0, sess.run(v1)) def _useRPCConfig(self): """Return a `tf.compat.v1.ConfigProto` that ensures we use the RPC stack for tests. @@ -149,27 +147,28 @@ class GrpcServerTest(test.TestCase): self.assertEqual(0.5, min_val) self.assertEqual(0.5, max_val) - @test_util.run_v1_only("b/120545219") def testCloseCancelsBlockingOperation(self): server = self._cached_server - sess = session.Session(server.target, config=self._useRPCConfig()) + with ops.Graph().as_default(): + sess = session.Session(server.target, config=self._useRPCConfig()) - q = data_flow_ops.FIFOQueue(10, [dtypes.float32]) - enqueue_op = q.enqueue(37.0) - dequeue_t = q.dequeue() + q = data_flow_ops.FIFOQueue(10, [dtypes.float32]) + enqueue_op = q.enqueue(37.0) + dequeue_t = q.dequeue() - sess.run(enqueue_op) - sess.run(dequeue_t) + sess.run(enqueue_op) + sess.run(dequeue_t) - def blocking_dequeue(): - with self.assertRaisesRegex(errors_impl.CancelledError, "Session::Close"): - sess.run(dequeue_t) + def blocking_dequeue(): + with self.assertRaisesRegex(errors_impl.CancelledError, + "Session::Close"): + sess.run(dequeue_t) - blocking_thread = self.checkedThread(blocking_dequeue) - blocking_thread.start() - time.sleep(0.5) - sess.close() - blocking_thread.join() + blocking_thread = self.checkedThread(blocking_dequeue) + blocking_thread.start() + time.sleep(0.5) + sess.close() + blocking_thread.join() def testInteractiveSession(self): server = self._cached_server @@ -210,19 +209,21 @@ class GrpcServerTest(test.TestCase): "local": ["localhost"] }, job_name="local", task_index=0) - @test_util.run_v1_only("b/120545219") def testTimeoutRaisesException(self): server = self._cached_server - q = data_flow_ops.FIFOQueue(1, [dtypes.float32]) - blocking_t = q.dequeue() + with ops.Graph().as_default(): + q = data_flow_ops.FIFOQueue(1, [dtypes.float32]) + blocking_t = q.dequeue() - with session.Session(server.target) as sess: - with self.assertRaises(errors_impl.DeadlineExceededError): - sess.run(blocking_t, options=config_pb2.RunOptions(timeout_in_ms=1000)) + with session.Session(server.target) as sess: + with self.assertRaises(errors_impl.DeadlineExceededError): + sess.run( + blocking_t, options=config_pb2.RunOptions(timeout_in_ms=1000)) - with session.Session(server.target, config=self._useRPCConfig()) as sess: - with self.assertRaises(errors_impl.DeadlineExceededError): - sess.run(blocking_t, options=config_pb2.RunOptions(timeout_in_ms=1000)) + with session.Session(server.target, config=self._useRPCConfig()) as sess: + with self.assertRaises(errors_impl.DeadlineExceededError): + sess.run( + blocking_t, options=config_pb2.RunOptions(timeout_in_ms=1000)) def testTwoServersSamePort(self): # Starting a server with the same target as the cached server should fail. @@ -245,63 +246,63 @@ class GrpcServerTest(test.TestCase): queue_runner_impl.start_queue_runners(sess) sess.run(var.assign(3.0)) - @test_util.run_v1_only("b/120545219") def testIsolateSessionState(self): server = self._cached_server - init_value = array_ops.placeholder(dtypes.int32) - v = variables.VariableV1(init_value, validate_shape=False, name="v") + with ops.Graph().as_default(): + init_value = array_ops.placeholder(dtypes.int32) + v = variables.VariableV1(init_value, validate_shape=False, name="v") - sharing_config = config_pb2.ConfigProto(isolate_session_state=False) - sharing_sess_0 = session.Session(server.target, config=sharing_config) - sharing_sess_1 = session.Session(server.target, config=sharing_config) + sharing_config = config_pb2.ConfigProto(isolate_session_state=False) + sharing_sess_0 = session.Session(server.target, config=sharing_config) + sharing_sess_1 = session.Session(server.target, config=sharing_config) - isolate_config = config_pb2.ConfigProto(isolate_session_state=True) - isolate_sess_0 = session.Session(server.target, config=isolate_config) - isolate_sess_1 = session.Session(server.target, config=isolate_config) + isolate_config = config_pb2.ConfigProto(isolate_session_state=True) + isolate_sess_0 = session.Session(server.target, config=isolate_config) + isolate_sess_1 = session.Session(server.target, config=isolate_config) - # Initially all variables are initialized. - for sess in [sharing_sess_0, sharing_sess_1, - isolate_sess_0, isolate_sess_1]: + # Initially all variables are initialized. + for sess in [ + sharing_sess_0, sharing_sess_1, isolate_sess_0, isolate_sess_1 + ]: + with self.assertRaises(errors_impl.FailedPreconditionError): + sess.run(v) + + # Shared sessions will see each other's updates, but isolated sessions + # will not. + sharing_sess_0.run(v.initializer, feed_dict={init_value: 86}) + self.assertAllEqual(86, sharing_sess_0.run(v)) + self.assertAllEqual(86, sharing_sess_1.run(v)) with self.assertRaises(errors_impl.FailedPreconditionError): - sess.run(v) + isolate_sess_0.run(v) + with self.assertRaises(errors_impl.FailedPreconditionError): + isolate_sess_1.run(v) - # Shared sessions will see each other's updates, but isolated sessions - # will not. - sharing_sess_0.run(v.initializer, feed_dict={init_value: 86}) - self.assertAllEqual(86, sharing_sess_0.run(v)) - self.assertAllEqual(86, sharing_sess_1.run(v)) - with self.assertRaises(errors_impl.FailedPreconditionError): - isolate_sess_0.run(v) - with self.assertRaises(errors_impl.FailedPreconditionError): - isolate_sess_1.run(v) + # Changing the shape works because `validate_shape` is False. + sharing_sess_1.run(v.initializer, feed_dict={init_value: [86, 99]}) + self.assertAllEqual([86, 99], sharing_sess_0.run(v)) + self.assertAllEqual([86, 99], sharing_sess_1.run(v)) + with self.assertRaises(errors_impl.FailedPreconditionError): + isolate_sess_0.run(v) + with self.assertRaises(errors_impl.FailedPreconditionError): + isolate_sess_1.run(v) - # Changing the shape works because `validate_shape` is False. - sharing_sess_1.run(v.initializer, feed_dict={init_value: [86, 99]}) - self.assertAllEqual([86, 99], sharing_sess_0.run(v)) - self.assertAllEqual([86, 99], sharing_sess_1.run(v)) - with self.assertRaises(errors_impl.FailedPreconditionError): - isolate_sess_0.run(v) - with self.assertRaises(errors_impl.FailedPreconditionError): - isolate_sess_1.run(v) + # Initializing in an isolated session will only affect the state in that + # session. + isolate_sess_0.run(v.initializer, feed_dict={init_value: 37}) + self.assertAllEqual([86, 99], sharing_sess_0.run(v)) + self.assertAllEqual([86, 99], sharing_sess_1.run(v)) + self.assertAllEqual(37, isolate_sess_0.run(v)) + with self.assertRaises(errors_impl.FailedPreconditionError): + isolate_sess_1.run(v) - # Initializing in an isolated session will only affect the state in that - # session. - isolate_sess_0.run(v.initializer, feed_dict={init_value: 37}) - self.assertAllEqual([86, 99], sharing_sess_0.run(v)) - self.assertAllEqual([86, 99], sharing_sess_1.run(v)) - self.assertAllEqual(37, isolate_sess_0.run(v)) - with self.assertRaises(errors_impl.FailedPreconditionError): - isolate_sess_1.run(v) + # Isolated sessions can have different shapes for the same variable. + isolate_sess_1.run(v.initializer, feed_dict={init_value: [19, 86]}) + self.assertAllEqual([86, 99], sharing_sess_0.run(v)) + self.assertAllEqual([86, 99], sharing_sess_1.run(v)) + self.assertAllEqual(37, isolate_sess_0.run(v)) + self.assertAllEqual([19, 86], isolate_sess_1.run(v)) - # Isolated sessions can have different shapes for the same variable. - isolate_sess_1.run(v.initializer, feed_dict={init_value: [19, 86]}) - self.assertAllEqual([86, 99], sharing_sess_0.run(v)) - self.assertAllEqual([86, 99], sharing_sess_1.run(v)) - self.assertAllEqual(37, isolate_sess_0.run(v)) - self.assertAllEqual([19, 86], isolate_sess_1.run(v)) - - @test_util.run_v1_only("b/120545219") def testShapeChangingIsolateState(self): server = self._cached_server sharing_config = config_pb2.ConfigProto(isolate_session_state=False) @@ -322,12 +323,6 @@ class GrpcServerTest(test.TestCase): sess.run(w_vector.initializer) self.assertAllEqual([4, 5, 6], sess.run(w_vector)) - with ops.Graph().as_default(): - w_scalar = variables.VariableV1(86, name="w") - with session.Session(server.target, config=sharing_config) as sess: - with self.assertRaises(errors_impl.InvalidArgumentError): - sess.run(w_scalar.initializer) - with ops.Graph().as_default(): w_scalar = variables.VariableV1(37, name="w") with session.Session(server.target, config=isolate_config) as sess: From a7e6b483d3b14be2f2cb419693d16d0639be4822 Mon Sep 17 00:00:00 2001 From: Jaesung Chung Date: Mon, 13 Jul 2020 17:31:49 -0700 Subject: [PATCH 0330/2522] Use a fallback graphdef based conversion when saved model schema version is zero PiperOrigin-RevId: 321067895 Change-Id: I604657fdbd3c41a1ddc0b7bbfb21b919b3d8a187 --- tensorflow/lite/python/lite.py | 4 ++++ tensorflow/lite/python/lite_v2_test.py | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index 4c6bd362efa..e919aa4b00f 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -510,6 +510,10 @@ class TFLiteConverterBase(object): if not self._saved_model_exported_names: self._saved_model_exported_names = [] self._saved_model_version = saved_model_proto.saved_model_schema_version + if self._saved_model_version == 0: + self.saved_model_dir = None + logging.warning("SavedModel schema version is zero.") + return if self._saved_model_version not in [1, 2]: raise ValueError("SavedModel file format({0}) is not supported".format( self._saved_model_version)) diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py index 3b51991d674..6fab4fd6086 100644 --- a/tensorflow/lite/python/lite_v2_test.py +++ b/tensorflow/lite/python/lite_v2_test.py @@ -36,9 +36,11 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.keras.layers import recurrent from tensorflow.python.keras.layers import recurrent_v2 +from tensorflow.python.lib.io import file_io from tensorflow.python.platform import test from tensorflow.python.saved_model import save_options from tensorflow.python.saved_model import saved_model +from tensorflow.python.saved_model.loader_impl import parse_saved_model from tensorflow.python.saved_model.save import save from tensorflow.python.training.tracking import tracking @@ -548,6 +550,25 @@ class FromSavedModelTest(lite_v2_test_util.ModelTest): self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) self.assertEqual((0., 0.), output_details[0]['quantization']) + @test_util.run_v2_only + def testTF1HubFormattedModel(self): + """Test a TF1 hub formatted model.""" + saved_model_dir = self._createV1SavedModel(shape=[1, 16, 16, 3]) + + # TF1 hub model is based on V1 saved model and they omit the saved model + # schema version setting. + saved_model_proto = parse_saved_model(saved_model_dir) + saved_model_proto.saved_model_schema_version = 0 + + saved_model_pb_file_path = os.path.join(saved_model_dir, 'saved_model.pb') + with file_io.FileIO(saved_model_pb_file_path, 'wb') as writer: + writer.write(saved_model_proto.SerializeToString()) + + # Convert model and ensure model is not None. + converter = lite.TFLiteConverterV2.from_saved_model(saved_model_dir) + tflite_model = converter.convert() + self.assertTrue(tflite_model) + @test_util.run_v2_only def testConstModel(self): """Test a basic model with functions to make sure functions are inlined.""" From 1e1bcbbf803409fa05d14c6ca6bcd0f9e586959c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Jul 2020 17:33:04 -0700 Subject: [PATCH 0331/2522] Use tf._implements to fuse Tf.Text APIs PiperOrigin-RevId: 321068074 Change-Id: Ia43c6fe5b29f41a59461a458fb97e986602a2a63 --- tensorflow/compiler/mlir/lite/BUILD | 2 + .../compiler/mlir/lite/tests/fuse-tftext.mlir | 13 +++--- .../prepare_composite_functions_tf.cc | 44 +++++++++++++------ .../compiler/mlir/lite/utils/tftext_utils.cc | 21 +++++---- .../compiler/mlir/lite/utils/tftext_utils.h | 8 +++- .../mlir/lite/utils/tftext_utils_test.cc | 4 +- 6 files changed, 60 insertions(+), 32 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD index 8d0c204f434..56dd0854ee8 100644 --- a/tensorflow/compiler/mlir/lite/BUILD +++ b/tensorflow/compiler/mlir/lite/BUILD @@ -273,6 +273,7 @@ cc_library( deps = [ ":tensorflow_lite", "//tensorflow/compiler/mlir/tensorflow", + "//tensorflow/compiler/mlir/tensorflow:tensorflow_attributes", "//tensorflow/core:framework", "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", @@ -360,6 +361,7 @@ cc_library( "//tensorflow/compiler/mlir/tensorflow", "//tensorflow/compiler/mlir/tensorflow:convert_tensor", "//tensorflow/compiler/mlir/tensorflow:mangling_util", + "//tensorflow/compiler/mlir/tensorflow:tensorflow_attributes", "//tensorflow/compiler/mlir/tensorflow:tensorflow_types", "//tensorflow/compiler/mlir/tensorflow:unroll_batch_matmul_pass", "//tensorflow/compiler/xla:status", diff --git a/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir b/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir index 1a3ed0509c4..97c0c7358ca 100644 --- a/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir +++ b/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir @@ -1,7 +1,7 @@ // RUN: tf-opt -tfl-prepare-composite-funcs-tf -tfl-fuse-tftext=true %s -split-input-file | FileCheck %s module { - func @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {tf._input_shapes = [#tf.shape<1>], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { + func @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<1>], tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf.signature.is_stateful} { %0 = "tf.Const"() {value = dense<[0, 1]> : tensor<2xi64>} : () -> tensor<2xi64> %1 = "tf.Const"() {value = dense<[]> : tensor<0xi64>} : () -> tensor<0xi64> %2 = "tf.Const"() {value = dense : tensor} : () -> tensor @@ -1027,11 +1027,11 @@ module { return %1 : tensor } - // CHECK: func @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {tf._input_shapes = [#tf.shape<1>], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { + // CHECK: func @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf._input_shapes = [#tf.shape<1>], tf.signature.is_stateful} { // CHECK: %0:2 = "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor<1x!tf.string>) -> (tensor, tensor) // CHECK: return %0#0, %0#1 : tensor, tensor - func @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {tf._input_shapes = [#tf.shape], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { + func @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape], tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf.signature.is_stateful} { %0 = "tf.Const"() {value = dense<[]> : tensor<0xi64>} : () -> tensor<0xi64> %1 = "tf.Const"() {value = dense : tensor} : () -> tensor %2 = "tf.Const"() {value = dense<-1> : tensor} : () -> tensor @@ -2160,11 +2160,12 @@ module { } - // CHECK: func @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {tf._input_shapes = [#tf.shape], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { + + // CHECK: func @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf._input_shapes = [#tf.shape], tf.signature.is_stateful} { // CHECK: %0:3 = "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor) -> (tensor, tensor, tensor) // CHECK: return %0#0, %0#1, %0#2 : tensor, tensor, tensor - func @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {tf._input_shapes = [#tf.shape<>], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { + func @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>], tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf.signature.is_stateful} { %0 = "tf.Const"() {value = dense<[0, 1]> : tensor<2xi64>} : () -> tensor<2xi64> %1 = "tf.Const"() {value = dense<[]> : tensor<0xi64>} : () -> tensor<0xi64> %2 = "tf.Const"() {value = dense : tensor} : () -> tensor @@ -3190,7 +3191,7 @@ module { return %1 : tensor } - // CHECK: func @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {tf._input_shapes = [#tf.shape<>], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { + // CHECK: func @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf._input_shapes = [#tf.shape<>], tf.signature.is_stateful} { // CHECK: %0 = "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor) -> tensor // CHECK: return %0 : tensor } diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc index 3d2ab662e6f..20fc9fc0692 100644 --- a/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc @@ -42,6 +42,7 @@ limitations under the License. #include "tensorflow/compiler/mlir/lite/transforms/passes.h" #include "tensorflow/compiler/mlir/lite/utils/lstm_utils.h" #include "tensorflow/compiler/mlir/lite/utils/tftext_utils.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_attributes.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" // The cmd line flag to turn on/off Tf.Text API fusion. @@ -56,9 +57,11 @@ namespace TFL { namespace { constexpr char kTFAPIImplements[] = "tf.api_implements"; -constexpr char kTfTextAPIPRefix[] = "tftext:"; +constexpr char kTFTextAPIPrefix[] = "tftext:"; constexpr char kTfNMSPadded[] = "non_max_suppression_padded_v2"; +using mlir::TF::FuncAttr; + // Abstracts the conversion of the embedded lookup composite function. class ConvertEmbeddedLookupFunc { public: @@ -161,7 +164,9 @@ class PrepareCompositeFunctionsPass explicit PrepareCompositeFunctionsPass() {} private: + // TODO(b/160915525): Consolidate FuncAttr and StringAttr into one. void ConvertTFImplements(FuncOp func, StringAttr attr); + void ConvertTFImplementsWithAttributes(FuncOp func, FuncAttr attr); void ConvertTFAPIImplements(FuncOp func, StringAttr attr, ModuleOp module); void runOnOperation() override; }; @@ -204,6 +209,18 @@ void PrepareCompositeFunctionsPass::ConvertTFImplements(FuncOp func, } } +void PrepareCompositeFunctionsPass::ConvertTFImplementsWithAttributes( + FuncOp func, FuncAttr attr) { + auto api_name = attr.GetName().getLeafReference(); + bool enable_fuse_tftext = + fuse_tftext_flag || IsTFTextRegistered(tensorflow::OpRegistry::Global()); + if (api_name.startswith(kTFTextAPIPrefix) && enable_fuse_tftext) { + if (failed(ConvertTFTextAPI(func, api_name, attr))) { + return signalPassFailure(); + } + } +} + LogicalResult CheckOutputConsumer( Operation* call_op, int expected_num_outputs, llvm::DenseSet expected_consumer_indices) { @@ -256,26 +273,27 @@ void PrepareCompositeFunctionsPass::ConvertTFAPIImplements(FuncOp func, OpBuilder builder(func.getBody()); if (failed(ConvertKerasLSTMLayer(func, &builder))) return signalPassFailure(); - } else if (fuse_tftext_flag || - IsTfTextRegistered(tensorflow::OpRegistry::Global())) { - if (attr.getValue().startswith(kTfTextAPIPRefix)) { - if (failed(ConvertTFTextAPI(func, attr.getValue()))) { - return signalPassFailure(); - } - } } } void PrepareCompositeFunctionsPass::runOnOperation() { auto module = getOperation(); for (auto func : module.getOps()) { - // We have two kinds of implements: - // 1) tf._implements. - // 2) tf.api_implements. + // We have three kinds of implements: + // 1) tf._implements, with string attributes. + // 2) tf._implements, with proto attributes. + // 3) tf.api_implements. // We need to handle them separately. - auto tf_implements_attr = func.getAttrOfType(kTFImplements); + auto tf_implements_attr_str = func.getAttrOfType(kTFImplements); + if (tf_implements_attr_str) { + ConvertTFImplements(func, tf_implements_attr_str); + continue; + } + + auto tf_implements_attr = func.getAttrOfType(kTFImplements); if (tf_implements_attr) { - ConvertTFImplements(func, tf_implements_attr); + ConvertTFImplementsWithAttributes(func, tf_implements_attr); + continue; } auto tf_api_implements_attr = diff --git a/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc b/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc index 2ed0891dc59..1681f654b92 100644 --- a/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc +++ b/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc @@ -44,7 +44,9 @@ namespace TFL { namespace { constexpr char kWhitespaceTokenizer[] = "tftext:WhitespaceTokenizer"; -constexpr char kTFAPIImplements[] = "tf.api_implements"; +constexpr char kTFImplements[] = "tf._implements"; + +using mlir::TF::FuncAttr; inline OpaqueElementsAttr emptyCustomOption(OpBuilder* builder) { std::string content = ""; @@ -121,11 +123,11 @@ LogicalResult VerifyWhitespaceTokenizer(mlir::FuncOp func) { return success(); } -LogicalResult ConvertWhitespaceTokenizer(mlir::FuncOp func, - llvm::StringRef api) { +LogicalResult ConvertWhitespaceTokenizer(mlir::FuncOp func, llvm::StringRef api, + FuncAttr attr) { func.eraseBody(); func.addEntryBlock(); - func.setAttr(kTFAPIImplements, StringAttr::get(api, func.getContext())); + func.setAttr(kTFImplements, attr); Value text = func.getArgument(0); OpBuilder builder(func.getBody()); @@ -137,20 +139,21 @@ LogicalResult ConvertWhitespaceTokenizer(mlir::FuncOp func, } } // namespace -LogicalResult ConvertTFTextAPI(mlir::FuncOp func, llvm::StringRef api) { +LogicalResult ConvertTFTextAPI(mlir::FuncOp func, llvm::StringRef api, + FuncAttr attr) { if (api.str() == kWhitespaceTokenizer) { if (succeeded(VerifyWhitespaceTokenizer(func))) { - return ConvertWhitespaceTokenizer(func, api); + return ConvertWhitespaceTokenizer(func, api, attr); } } return failure(); } -bool IsTfTextRegistered(const tensorflow::OpRegistry* op_registery) { - const std::vector kTfTextOps = { +bool IsTFTextRegistered(const tensorflow::OpRegistry* op_registery) { + const std::vector kTFTextOps = { "WhitespaceTokenizeWithOffsets", }; - for (const auto& iter : kTfTextOps) { + for (const auto& iter : kTFTextOps) { if (op_registery->LookUp(iter)) { return true; } diff --git a/tensorflow/compiler/mlir/lite/utils/tftext_utils.h b/tensorflow/compiler/mlir/lite/utils/tftext_utils.h index c52ee019d8d..55e4680c3dd 100644 --- a/tensorflow/compiler/mlir/lite/utils/tftext_utils.h +++ b/tensorflow/compiler/mlir/lite/utils/tftext_utils.h @@ -27,14 +27,18 @@ limitations under the License. #include "mlir/IR/Value.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_attributes.h" #include "tensorflow/core/framework/op.h" namespace mlir { namespace TFL { -LogicalResult ConvertTFTextAPI(mlir::FuncOp func, llvm::StringRef api); +// Fuse TF.Text APIs annotated by tf.function to a TFLite custom op. +LogicalResult ConvertTFTextAPI(mlir::FuncOp func, llvm::StringRef api, + mlir::TF::FuncAttr attr); -bool IsTfTextRegistered(const tensorflow::OpRegistry* op_registery); +// Check if TF.Text Tensorflow ops are registered. +bool IsTFTextRegistered(const tensorflow::OpRegistry* op_registery); } // end namespace TFL } // end namespace mlir diff --git a/tensorflow/compiler/mlir/lite/utils/tftext_utils_test.cc b/tensorflow/compiler/mlir/lite/utils/tftext_utils_test.cc index 7d29264aaae..9bcfa89c544 100644 --- a/tensorflow/compiler/mlir/lite/utils/tftext_utils_test.cc +++ b/tensorflow/compiler/mlir/lite/utils/tftext_utils_test.cc @@ -41,13 +41,13 @@ void Register(const std::string& op_name, OpRegistry* registry) { TEST(TfTextUtilsTest, TestTfTextRegistered) { std::unique_ptr registry(new OpRegistry); Register("WhitespaceTokenizeWithOffsets", registry.get()); - EXPECT_TRUE(IsTfTextRegistered(registry.get())); + EXPECT_TRUE(IsTFTextRegistered(registry.get())); } TEST(TfTextUtilsTest, TestTfTextNotRegistered) { std::unique_ptr registry(new OpRegistry); Register("Test", registry.get()); - EXPECT_FALSE(IsTfTextRegistered(registry.get())); + EXPECT_FALSE(IsTFTextRegistered(registry.get())); } } // namespace TFL } // namespace mlir From 8b52123ca3136cc54cae5b17a08e18f99e33ce1e Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Mon, 13 Jul 2020 17:44:16 -0700 Subject: [PATCH 0332/2522] Add `TpuCompileOpImpl` to `core/tpu/kernels`. PiperOrigin-RevId: 321069369 Change-Id: Ic276b3e5a8b9dd8c8708a0ba4a2142bd76a2a9e4 --- tensorflow/core/tpu/kernels/BUILD | 26 ++++++++ .../core/tpu/kernels/tpu_compile_op_impl.cc | 39 +++++++++++ .../core/tpu/kernels/tpu_compile_op_impl.h | 66 +++++++++++++++++++ 3 files changed, 131 insertions(+) create mode 100644 tensorflow/core/tpu/kernels/tpu_compile_op_impl.cc create mode 100644 tensorflow/core/tpu/kernels/tpu_compile_op_impl.h diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index dfa37348c8d..b256790a0fb 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -508,3 +508,29 @@ cc_library( "//tensorflow/stream_executor/tpu:tpu_executor_c_api_hdrs", ], ) + +cc_library( + name = "tpu_compile_op_impl", + srcs = ["tpu_compile_op_impl.cc"], + hdrs = ["tpu_compile_op_impl.h"], + deps = [ + "//tensorflow/compiler/jit:shape_inference", + "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/xla:status", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:framework", + "//tensorflow/core/tpu/kernels:tpu_compilation_cache_key", + "//tensorflow/core/tpu/kernels:tpu_compile_c_api_hdrs", + "//tensorflow/core/tpu/kernels:tpu_compile_op_common", + "//tensorflow/core/tpu/kernels:tpu_compile_op_support", + "//tensorflow/core/tpu/kernels:tpu_compile_proto_cc", + "//tensorflow/core/tpu/kernels:tpu_mesh_state_c_api_hdrs", + "//tensorflow/core/tpu/kernels:tpu_program_group", + "//tensorflow/core/tpu/kernels:tpu_program_group_interface", + "//tensorflow/core/tpu/kernels:tpu_util", + "//tensorflow/stream_executor/tpu:tpu_executor", + "//tensorflow/stream_executor/tpu:tpu_executor_c_api_hdrs", + "@com_google_absl//absl/types:variant", + ], + alwayslink = 1, +) diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_impl.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_impl.cc new file mode 100644 index 00000000000..0d514997142 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_impl.cc @@ -0,0 +1,39 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/tpu/kernels/tpu_compile_op_impl.h" + +#include "tensorflow/compiler/xla/status.h" +#include "tensorflow/core/tpu/kernels/tpu_compile.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_op_support.h" +#include "tensorflow/core/tpu/kernels/tpu_mesh_state_c_api.h" +#include "tensorflow/core/tpu/kernels/tpu_program_group.h" +#include "tensorflow/core/tpu/kernels/tpu_program_group_interface.h" + +namespace tensorflow { +namespace tpu { +Status TpuCompileOpKernelImpl::Compile( + const std::variant& computation, + const XLA_TpuMeshState* mesh_state, + const std::vector& arg_shapes, + TpuProgramGroupInterface* tpu_program_group) { + TF_ASSIGN_OR_RETURN( + TpuCompilationRequestProto compilation_request, + CreateTpuCompilationRequest(computation, metadata_, arg_shapes)); + + return TpuProgramGroup::CompileAndBuild(compilation_request, mesh_state, + tpu_program_group); +} +} // namespace tpu +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_impl.h b/tensorflow/core/tpu/kernels/tpu_compile_op_impl.h new file mode 100644 index 00000000000..cd8ef78614a --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_impl.h @@ -0,0 +1,66 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_OP_IMPL_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_OP_IMPL_H_ + +#include +#include + +#include "absl/types/variant.h" +#include "tensorflow/compiler/jit/shape_inference.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_key.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_op_common.h" +#include "tensorflow/core/tpu/kernels/tpu_mesh_state_c_api.h" +#include "tensorflow/core/tpu/kernels/tpu_program_group_interface.h" + +namespace tensorflow { +namespace tpu { + +// Base class for TpuCompileOp and TpuCompileMlirOp. +// Depends on whether it is given a computation in the form of serialized MLIR +// module or a Tensorflow function, TpuCompileOpKernelImpl converts computation +// into XLA HLO and then into a TPU execuable binary. +class TpuCompileOpKernelImpl : public TpuCompileOpKernelCommon { + public: + TpuCompileOpKernelImpl(const std::string& mlir_module, + const tpu::TPUCompileMetadataProto& metadata, + int num_computations, bool return_hlo_protos, + bool unload_cache_on_session_close) + : TpuCompileOpKernelCommon(mlir_module, metadata, num_computations, + return_hlo_protos, + unload_cache_on_session_close) {} + + TpuCompileOpKernelImpl(const NameAttrList& function, + const tpu::TPUCompileMetadataProto& metadata, + int num_computations, bool return_hlo_protos, + bool unload_cache_on_session_close) + : TpuCompileOpKernelCommon( + function, metadata, num_computations, return_hlo_protos, + unload_cache_on_session_close, /*persistent_cache=*/nullptr) {} + + private: + FRIEND_TEST(TpuCompileOpImplTest, Compile); + + Status Compile( + const absl::variant& computation, + const XLA_TpuMeshState* mesh_state, + const std::vector& arg_shapes, + TpuProgramGroupInterface* tpu_program_group) override; +}; +} // namespace tpu +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_OP_IMPL_H_ From a03c8173f7fefa6d1945e7718e7f8f8b0128c6aa Mon Sep 17 00:00:00 2001 From: ShengYang1 Date: Tue, 14 Jul 2020 09:07:17 +0800 Subject: [PATCH 0333/2522] update --- .../kernel_tests/cwise_ops_unary_test.py | 1 + tensorflow/python/ops/math_ops_test.py | 24 ------------------- 2 files changed, 1 insertion(+), 24 deletions(-) diff --git a/tensorflow/python/kernel_tests/cwise_ops_unary_test.py b/tensorflow/python/kernel_tests/cwise_ops_unary_test.py index df848a653d4..33b43f5a086 100644 --- a/tensorflow/python/kernel_tests/cwise_ops_unary_test.py +++ b/tensorflow/python/kernel_tests/cwise_ops_unary_test.py @@ -389,6 +389,7 @@ class UnaryOpTest(test.TestCase): 2).reshape(1, 3, 2).astype(dtypes_lib.bfloat16.as_numpy_dtype) self._compareCpu(x, np.abs, math_ops.abs) self._compareCpu(x, np.abs, _ABS) + self._compareBoth(x, np.exp, math_ops.exp) self._compareBoth(x, np.negative, math_ops.negative) self._compareBoth(x, np.negative, _NEG) diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index dd4c47e0122..bf15bf86ee2 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -872,29 +872,5 @@ class RangeTest(test_util.TensorFlowTestCase): self.assertAllEqual(values, self.evaluate(tensor)) -@test_util.run_all_in_graph_and_eager_modes -class ExpTest(test_util.TensorFlowTestCase): - - def testExp(self): - x = np.random.randn(1000, 1000) - for dtype in [np.float32, np.float64, np.float16]: - x_np = np.array(x, dtype=dtype) - x_tf = constant_op.constant(x_np, shape=x_np.shape) - y_tf = math_ops.exp(x_tf) - y_tf_np = self.evaluate(y_tf) - y_np = np.exp(x_np) - self.assertAllClose(y_tf_np, y_np, atol=1e-5) - - def testExpExtendType(self): - in_bf16 = np.random.randn(1000, 1000).astype(dtypes.bfloat16.as_numpy_dtype) - out_bf16 = self.evaluate(math_ops.exp(in_bf16)) - - in_f32 = math_ops.cast(in_bf16, dtypes.float32) - out_f32 = self.evaluate(math_ops.exp(in_f32)) - expected = math_ops.cast(out_f32, dtypes.bfloat16) - - self.assertAllClose(out_bf16, expected, rtol=1e-5) - - if __name__ == "__main__": googletest.main() From afa9b26c2f1b33b02c84909452fb1bfa2aea41cc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Jul 2020 18:04:40 -0700 Subject: [PATCH 0334/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/83080a294ad7 PiperOrigin-RevId: 321071933 Change-Id: I510044ec8631a951c064c6d2d865c978042bc36c --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 9af009075db..e0e05a96323 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "6bda276f93023ae91937cb8a1f45bf27e5a3ced7" - LLVM_SHA256 = "661d02c6c56ec2e93e23c13d669b44c4506422a9d7af8323d9b368e1595af952" + LLVM_COMMIT = "83080a294ad7d145d758821bcf4354ad0cb7d299" + LLVM_SHA256 = "fff6d3233b8ad5ebf3362a7dea0d7bb323f996e3182e6785772696337eed484f" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 3749694080006f7b85862f6770b0aed117411785 Mon Sep 17 00:00:00 2001 From: Jaesung Chung Date: Mon, 13 Jul 2020 18:08:07 -0700 Subject: [PATCH 0335/2522] Add complex tensor support in TFLite Even though we do not support complex op kernels on mobile, it is inevitable to support complex tensors in order to enable TF complex ops via flex delegate. This CL enables the complex tensor type in MLIR converter only. PiperOrigin-RevId: 321072365 Change-Id: I5ecd631339b3d5e00b3d999b9f2c6102b554cea5 --- .../compiler/mlir/lite/flatbuffer_export.cc | 3 + .../lite/python/tf_tfl_flatbuffer_helpers.cc | 2 + .../lite/tests/flatbuffer2mlir/constants.mlir | 7 ++ .../flex_op_with_complex128.mlir | 66 +++++++++++++++++++ .../compiler/mlir/lite/utils/convert_type.cc | 4 ++ tensorflow/lite/c/common.c | 2 + tensorflow/lite/c/common.h | 8 +++ tensorflow/lite/c/common_test.cc | 2 + .../lite/core/api/flatbuffer_conversions.cc | 3 + tensorflow/lite/delegates/flex/util.cc | 6 ++ tensorflow/lite/delegates/flex/util_test.cc | 6 ++ .../objc/sources/TFLInterpreter.mm | 4 +- .../lite/experimental/writer/enum_mapping.h | 2 + tensorflow/lite/micro/memory_helpers.cc | 3 + tensorflow/lite/micro/memory_helpers_test.cc | 4 ++ tensorflow/lite/micro/micro_interpreter.cc | 3 + .../lite/micro/micro_optional_debug_tools.cc | 2 + tensorflow/lite/optional_debug_tools.cc | 2 + .../lite/python/interpreter_wrapper/numpy.cc | 2 + .../python/optimize/calibration_wrapper.cc | 2 + tensorflow/lite/python/util.py | 1 + tensorflow/lite/schema/schema.fbs | 1 + tensorflow/lite/schema/schema_generated.h | 13 ++-- tensorflow/lite/testing/split.h | 20 ++++++ tensorflow/lite/testing/tflite_driver.cc | 28 ++++++++ tensorflow/lite/toco/model.h | 1 + tensorflow/lite/toco/tflite/operator.cc | 1 + tensorflow/lite/toco/tooling_util.cc | 4 ++ tensorflow/lite/toco/types.proto | 3 + .../benchmark/experimental/c/c_api_types.h | 8 +++ tensorflow/lite/tools/verifier.cc | 3 + tensorflow/lite/type_to_tflitetype.h | 4 ++ tensorflow/lite/util.cc | 3 + 33 files changed, 217 insertions(+), 6 deletions(-) create mode 100644 tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/flex_op_with_complex128.mlir diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc index fb20e842a75..09c79d90e26 100644 --- a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc +++ b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc @@ -149,6 +149,9 @@ static StatusOr GetTFLiteType(Type type, if (ftype && ftype.isF32()) { return tflite::TensorType_COMPLEX64; } + if (ftype && ftype.isF64()) { + return tflite::TensorType_COMPLEX128; + } return Status(error::INVALID_ARGUMENT, "Unsupported type"); } case mlir::StandardTypes::Integer: { diff --git a/tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc b/tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc index 4725eb1ac5f..a4e58123e05 100644 --- a/tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc +++ b/tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc @@ -123,6 +123,8 @@ DataType ConvertIODataTypeToDataType(toco::IODataType dtype) { return DT_BOOL; case toco::IODataType::COMPLEX64: return DT_COMPLEX64; + case toco::IODataType::COMPLEX128: + return DT_COMPLEX128; default: return DT_INVALID; } diff --git a/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/constants.mlir b/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/constants.mlir index 50fe804f86c..a622c43c2f2 100644 --- a/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/constants.mlir +++ b/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/constants.mlir @@ -15,6 +15,13 @@ func @complex64() -> tensor<4xcomplex> { return %0 : tensor<4xcomplex> } +func @complex128() -> tensor<4xcomplex> { + // CHECK-LABEL: @complex128 + // CHECK: value = opaque<"tf", "0x746674656E736F722464747970653A2044545F434F4D504C45583132382074656E736F725F7368617065207B2064696D207B2073697A653A2034207D207D2074656E736F725F636F6E74656E743A20225C3030305C3030305C3030305C3030305C3030305C3030305C3336303F5C3030305C3030305C3030305C3030305C3030305C3030305C303030405C3030305C3030305C3030305C3030305C3030305C3030305C303030405C3030305C3030305C3030305C3030305C3030305C3030305C303030405C3030305C3030305C3030305C3030305C3030305C3030305C303130405C3030305C3030305C3030305C3030305C3030305C3030305C303030405C3030305C3030305C3030305C3030305C3030305C3030305C303230405C3030305C3030305C3030305C3030305C3030305C3030305C3030304022"> : tensor<4xcomplex> + %0 = "tfl.pseudo_const"() { value = opaque<"tf", "0x746674656E736F722464747970653A2044545F434F4D504C45583132382074656E736F725F7368617065207B2064696D207B2073697A653A2034207D207D2074656E736F725F636F6E74656E743A20225C3030305C3030305C3030305C3030305C3030305C3030305C3336303F5C3030305C3030305C3030305C3030305C3030305C3030305C303030405C3030305C3030305C3030305C3030305C3030305C3030305C303030405C3030305C3030305C3030305C3030305C3030305C3030305C303030405C3030305C3030305C3030305C3030305C3030305C3030305C303130405C3030305C3030305C3030305C3030305C3030305C3030305C303030405C3030305C3030305C3030305C3030305C3030305C3030305C303230405C3030305C3030305C3030305C3030305C3030305C3030305C3030304022"> : tensor<4xcomplex> } : () -> tensor<4xcomplex> + return %0 : tensor<4xcomplex> +} + // TODO(b/138847107) this should work but doesn't // func @f16() -> tensor<4xf16> { // %0 = "tfl.pseudo_const"() { value = dense<[1.0, 2.0, 3.0, 4.0]> : tensor<4xf16> } : () -> tensor<4xf16> diff --git a/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/flex_op_with_complex128.mlir b/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/flex_op_with_complex128.mlir new file mode 100644 index 00000000000..a5e6d4aabb5 --- /dev/null +++ b/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/flex_op_with_complex128.mlir @@ -0,0 +1,66 @@ +// RUN: flatbuffer_translate -mlir-to-tflite-flatbuffer %s -emit-select-tf-ops -o - | flatbuffer_to_string - | FileCheck %s + +func @main(tensor<4xcomplex>, tensor<4xcomplex>) -> tensor<4xcomplex> { +^bb0(%arg0: tensor<4xcomplex>, %arg1: tensor<4xcomplex>): +// CHECK: { +// CHECK-NEXT: version: 3, +// CHECK-NEXT: operator_codes: [ { +// CHECK-NEXT: builtin_code: CUSTOM, +// CHECK-NEXT: custom_code: "FlexAdd" +// CHECK-NEXT: } ], +// CHECK-NEXT: subgraphs: [ { +// CHECK-NEXT: tensors: [ { +// CHECK-NEXT: shape: [ 4 ], +// CHECK-NEXT: type: COMPLEX128, +// CHECK-NEXT: buffer: 1, +// CHECK-NEXT: name: "arg0", +// CHECK-NEXT: quantization: { +// CHECK-EMPTY: +// CHECK-NEXT: } +// CHECK-NEXT: }, { +// CHECK-NEXT: shape: [ 4 ], +// CHECK-NEXT: type: COMPLEX128, +// CHECK-NEXT: buffer: 2, +// CHECK-NEXT: name: "arg1", +// CHECK-NEXT: quantization: { +// CHECK-EMPTY: +// CHECK-NEXT: } +// CHECK-NEXT: }, { +// CHECK-NEXT: shape: [ 4 ], +// CHECK-NEXT: type: COMPLEX128, +// CHECK-NEXT: buffer: 3, +// CHECK-NEXT: name: "add", +// CHECK-NEXT: quantization: { +// CHECK-EMPTY: +// CHECK-NEXT: } +// CHECK-NEXT: } ], +// CHECK-NEXT: inputs: [ 0, 1 ], +// CHECK-NEXT: outputs: [ 2 ], +// CHECK-NEXT: operators: [ { +// CHECK-NEXT: inputs: [ 0, 1 ], +// CHECK-NEXT: outputs: [ 2 ], +// CHECK-NEXT: custom_options: [ 3, 65, 100, 100, 0, 20, 18, 3, 65, 100, 100, 26, 0, 26, 0, 42, 7, 10, 1, 84, 18, 2, 48, 18, 50, 0, 0, 2, 27, 23, 20, 20, 4, 40, 1 ] +// CHECK-NEXT: } ], +// CHECK-NEXT: name: "main" +// CHECK-NEXT: } ], +// CHECK-NEXT: description: "MLIR Converted.", +// CHECK-NEXT: buffers: [ { +// CHECK-EMPTY: +// CHECK-NEXT: }, { +// CHECK-EMPTY: +// CHECK-NEXT: }, { +// CHECK-EMPTY: +// CHECK-NEXT: }, { +// CHECK-EMPTY: +// CHECK-NEXT: }, { +// CHECK-NEXT: data: [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] +// CHECK-NEXT: } ], +// CHECK-NEXT: metadata: [ { +// CHECK-NEXT: name: "min_runtime_version", +// CHECK-NEXT: buffer: 4 +// CHECK-NEXT: } ] +// CHECK-NEXT:} + + %0 = "tf.Add"(%arg0, %arg1) : (tensor<4xcomplex>, tensor<4xcomplex>) -> tensor<4xcomplex> loc("add") + return %0 : tensor<4xcomplex> +} diff --git a/tensorflow/compiler/mlir/lite/utils/convert_type.cc b/tensorflow/compiler/mlir/lite/utils/convert_type.cc index 22283d7eace..6b3ad78a830 100644 --- a/tensorflow/compiler/mlir/lite/utils/convert_type.cc +++ b/tensorflow/compiler/mlir/lite/utils/convert_type.cc @@ -53,6 +53,8 @@ mlir::Type ConvertElementType(tflite::TensorType type, mlir::Builder builder) { return builder.getIntegerType(16); case tflite::TensorType_COMPLEX64: return mlir::ComplexType::get(builder.getF32Type()); + case tflite::TensorType_COMPLEX128: + return mlir::ComplexType::get(builder.getF64Type()); case tflite::TensorType_INT8: return builder.getIntegerType(8); } @@ -64,6 +66,8 @@ tensorflow::DataType TflTypeToTfType(tflite::TensorType type) { return tensorflow::DT_BOOL; case tflite::TensorType_COMPLEX64: return tensorflow::DT_COMPLEX64; + case tflite::TensorType_COMPLEX128: + return tensorflow::DT_COMPLEX128; case tflite::TensorType_FLOAT16: return tensorflow::DT_HALF; case tflite::TensorType_FLOAT32: diff --git a/tensorflow/lite/c/common.c b/tensorflow/lite/c/common.c index e6b47896528..0264f420b12 100644 --- a/tensorflow/lite/c/common.c +++ b/tensorflow/lite/c/common.c @@ -207,6 +207,8 @@ const char* TfLiteTypeGetName(TfLiteType type) { return "BOOL"; case kTfLiteComplex64: return "COMPLEX64"; + case kTfLiteComplex128: + return "COMPLEX128"; case kTfLiteString: return "STRING"; case kTfLiteFloat16: diff --git a/tensorflow/lite/c/common.h b/tensorflow/lite/c/common.h index 13e846406e6..cd6eeec4da2 100644 --- a/tensorflow/lite/c/common.h +++ b/tensorflow/lite/c/common.h @@ -238,6 +238,11 @@ typedef struct TfLiteComplex64 { float re, im; // real and imaginary parts, respectively. } TfLiteComplex64; +// Double-precision complex data type compatible with the C99 definition. +typedef struct TfLiteComplex128 { + double re, im; // real and imaginary parts, respectively. +} TfLiteComplex128; + // Half precision data type compatible with the C99 definition. typedef struct TfLiteFloat16 { uint16_t data; @@ -257,6 +262,7 @@ typedef enum { kTfLiteInt8 = 9, kTfLiteFloat16 = 10, kTfLiteFloat64 = 11, + kTfLiteComplex128 = 12, } TfLiteType; // Return the name of a given type, for error reporting purposes. @@ -313,12 +319,14 @@ typedef union TfLitePtrUnion { int64_t* i64; float* f; TfLiteFloat16* f16; + double* f64; char* raw; const char* raw_const; uint8_t* uint8; bool* b; int16_t* i16; TfLiteComplex64* c64; + TfLiteComplex128* c128; int8_t* int8; /* Only use this member. */ void* data; diff --git a/tensorflow/lite/c/common_test.cc b/tensorflow/lite/c/common_test.cc index 0421b50c05e..235c9c1b2cc 100644 --- a/tensorflow/lite/c/common_test.cc +++ b/tensorflow/lite/c/common_test.cc @@ -78,6 +78,7 @@ TEST(Types, TestTypeNames) { return std::string(TfLiteTypeGetName(t)); }; EXPECT_EQ(type_name(kTfLiteNoType), "NOTYPE"); + EXPECT_EQ(type_name(kTfLiteFloat64), "FLOAT64"); EXPECT_EQ(type_name(kTfLiteFloat32), "FLOAT32"); EXPECT_EQ(type_name(kTfLiteFloat16), "FLOAT16"); EXPECT_EQ(type_name(kTfLiteInt16), "INT16"); @@ -87,6 +88,7 @@ TEST(Types, TestTypeNames) { EXPECT_EQ(type_name(kTfLiteInt64), "INT64"); EXPECT_EQ(type_name(kTfLiteBool), "BOOL"); EXPECT_EQ(type_name(kTfLiteComplex64), "COMPLEX64"); + EXPECT_EQ(type_name(kTfLiteComplex128), "COMPLEX128"); EXPECT_EQ(type_name(kTfLiteString), "STRING"); } diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.cc b/tensorflow/lite/core/api/flatbuffer_conversions.cc index e5422697acc..0652c64f6c2 100644 --- a/tensorflow/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/lite/core/api/flatbuffer_conversions.cc @@ -863,6 +863,9 @@ TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type, case TensorType_COMPLEX64: *type = kTfLiteComplex64; return kTfLiteOk; + case TensorType_COMPLEX128: + *type = kTfLiteComplex128; + return kTfLiteOk; default: *type = kTfLiteNoType; TF_LITE_REPORT_ERROR(error_reporter, diff --git a/tensorflow/lite/delegates/flex/util.cc b/tensorflow/lite/delegates/flex/util.cc index 750de7397fa..11cf28073fa 100644 --- a/tensorflow/lite/delegates/flex/util.cc +++ b/tensorflow/lite/delegates/flex/util.cc @@ -76,6 +76,8 @@ TF_DataType GetTensorFlowDataType(TfLiteType type) { return TF_INT64; case kTfLiteComplex64: return TF_COMPLEX64; + case kTfLiteComplex128: + return TF_COMPLEX128; case kTfLiteString: return TF_STRING; case kTfLiteBool: @@ -89,6 +91,8 @@ TfLiteType GetTensorFlowLiteType(TF_DataType type) { return kTfLiteFloat32; case TF_HALF: return kTfLiteFloat16; + case TF_DOUBLE: + return kTfLiteFloat64; case TF_INT16: return kTfLiteInt16; case TF_INT32: @@ -101,6 +105,8 @@ TfLiteType GetTensorFlowLiteType(TF_DataType type) { return kTfLiteInt64; case TF_COMPLEX64: return kTfLiteComplex64; + case TF_COMPLEX128: + return kTfLiteComplex128; case TF_STRING: return kTfLiteString; case TF_BOOL: diff --git a/tensorflow/lite/delegates/flex/util_test.cc b/tensorflow/lite/delegates/flex/util_test.cc index 751289ef28f..0d4b50256f0 100644 --- a/tensorflow/lite/delegates/flex/util_test.cc +++ b/tensorflow/lite/delegates/flex/util_test.cc @@ -109,22 +109,28 @@ TEST(UtilTest, CopyShapeAndType) { TEST(UtilTest, TypeConversionsFromTFLite) { EXPECT_EQ(TF_FLOAT, GetTensorFlowDataType(kTfLiteNoType)); EXPECT_EQ(TF_FLOAT, GetTensorFlowDataType(kTfLiteFloat32)); + EXPECT_EQ(TF_HALF, GetTensorFlowDataType(kTfLiteFloat16)); + EXPECT_EQ(TF_DOUBLE, GetTensorFlowDataType(kTfLiteFloat64)); EXPECT_EQ(TF_INT16, GetTensorFlowDataType(kTfLiteInt16)); EXPECT_EQ(TF_INT32, GetTensorFlowDataType(kTfLiteInt32)); EXPECT_EQ(TF_UINT8, GetTensorFlowDataType(kTfLiteUInt8)); EXPECT_EQ(TF_INT64, GetTensorFlowDataType(kTfLiteInt64)); EXPECT_EQ(TF_COMPLEX64, GetTensorFlowDataType(kTfLiteComplex64)); + EXPECT_EQ(TF_COMPLEX128, GetTensorFlowDataType(kTfLiteComplex128)); EXPECT_EQ(TF_STRING, GetTensorFlowDataType(kTfLiteString)); EXPECT_EQ(TF_BOOL, GetTensorFlowDataType(kTfLiteBool)); } TEST(UtilTest, TypeConversionsFromTensorFlow) { + EXPECT_EQ(kTfLiteFloat16, GetTensorFlowLiteType(TF_HALF)); EXPECT_EQ(kTfLiteFloat32, GetTensorFlowLiteType(TF_FLOAT)); + EXPECT_EQ(kTfLiteFloat64, GetTensorFlowLiteType(TF_DOUBLE)); EXPECT_EQ(kTfLiteInt16, GetTensorFlowLiteType(TF_INT16)); EXPECT_EQ(kTfLiteInt32, GetTensorFlowLiteType(TF_INT32)); EXPECT_EQ(kTfLiteUInt8, GetTensorFlowLiteType(TF_UINT8)); EXPECT_EQ(kTfLiteInt64, GetTensorFlowLiteType(TF_INT64)); EXPECT_EQ(kTfLiteComplex64, GetTensorFlowLiteType(TF_COMPLEX64)); + EXPECT_EQ(kTfLiteComplex128, GetTensorFlowLiteType(TF_COMPLEX128)); EXPECT_EQ(kTfLiteString, GetTensorFlowLiteType(TF_STRING)); EXPECT_EQ(kTfLiteBool, GetTensorFlowLiteType(TF_BOOL)); EXPECT_EQ(kTfLiteNoType, GetTensorFlowLiteType(TF_RESOURCE)); diff --git a/tensorflow/lite/experimental/objc/sources/TFLInterpreter.mm b/tensorflow/lite/experimental/objc/sources/TFLInterpreter.mm index 34dd119885d..0ccafd71d1b 100644 --- a/tensorflow/lite/experimental/objc/sources/TFLInterpreter.mm +++ b/tensorflow/lite/experimental/objc/sources/TFLInterpreter.mm @@ -405,7 +405,9 @@ static void TFLInterpreterErrorReporter(void *user_data, const char *format, va_ case kTfLiteNoType: case kTfLiteString: case kTfLiteComplex64: - // kTfLiteString and kTfLiteComplex64 are not supported in TensorFlow Lite Objc API. + case kTfLiteComplex128: + // kTfLiteString, kTfLiteComplex64 and kTfLiteComplex128 are not supported in TensorFlow Lite + // Objc API. return TFLTensorDataTypeNoType; } } diff --git a/tensorflow/lite/experimental/writer/enum_mapping.h b/tensorflow/lite/experimental/writer/enum_mapping.h index 5eabbcb2015..0847fb7893d 100644 --- a/tensorflow/lite/experimental/writer/enum_mapping.h +++ b/tensorflow/lite/experimental/writer/enum_mapping.h @@ -82,6 +82,8 @@ inline TensorType TfLiteTypeToSchemaType(TfLiteType type) { return TensorType_INT16; case kTfLiteComplex64: return TensorType_COMPLEX64; + case kTfLiteComplex128: + return TensorType_COMPLEX128; } // TODO(aselle): consider an error } diff --git a/tensorflow/lite/micro/memory_helpers.cc b/tensorflow/lite/micro/memory_helpers.cc index bded4d6895a..0e8f335c049 100644 --- a/tensorflow/lite/micro/memory_helpers.cc +++ b/tensorflow/lite/micro/memory_helpers.cc @@ -72,6 +72,9 @@ TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size) { case kTfLiteComplex64: *size = sizeof(float) * 2; break; + case kTfLiteComplex128: + *size = sizeof(double) * 2; + break; default: return kTfLiteError; } diff --git a/tensorflow/lite/micro/memory_helpers_test.cc b/tensorflow/lite/micro/memory_helpers_test.cc index 791e30c944e..82096c6890d 100644 --- a/tensorflow/lite/micro/memory_helpers_test.cc +++ b/tensorflow/lite/micro/memory_helpers_test.cc @@ -141,6 +141,10 @@ TF_LITE_MICRO_TEST(TestTypeSizeOf) { tflite::TfLiteTypeSizeOf(kTfLiteComplex64, &size)); TF_LITE_MICRO_EXPECT_EQ(sizeof(float) * 2, size); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, + tflite::TfLiteTypeSizeOf(kTfLiteComplex128, &size)); + TF_LITE_MICRO_EXPECT_EQ(sizeof(double) * 2, size); + TF_LITE_MICRO_EXPECT_NE( kTfLiteOk, tflite::TfLiteTypeSizeOf(static_cast(-1), &size)); } diff --git a/tensorflow/lite/micro/micro_interpreter.cc b/tensorflow/lite/micro/micro_interpreter.cc index 08556a56a54..c16ede174aa 100644 --- a/tensorflow/lite/micro/micro_interpreter.cc +++ b/tensorflow/lite/micro/micro_interpreter.cc @@ -162,6 +162,9 @@ void MicroInterpreter::CorrectTensorEndianness(TfLiteTensor* tensorCorr) { case TfLiteType::kTfLiteComplex64: CorrectTensorDataEndianness(tensorCorr->data.c64, tensorSize); break; + case TfLiteType::kTfLiteComplex128: + CorrectTensorDataEndianness(tensorCorr->data.c128, tensorSize); + break; default: // Do nothing for other data types. break; diff --git a/tensorflow/lite/micro/micro_optional_debug_tools.cc b/tensorflow/lite/micro/micro_optional_debug_tools.cc index f94d67b5ee5..516def3ebe4 100644 --- a/tensorflow/lite/micro/micro_optional_debug_tools.cc +++ b/tensorflow/lite/micro/micro_optional_debug_tools.cc @@ -85,6 +85,8 @@ const char* TensorTypeName(TfLiteType type) { return "kTfLiteInt16"; case kTfLiteComplex64: return "kTfLiteComplex64"; + case kTfLiteComplex128: + return "kTfLiteComplex128"; case kTfLiteFloat16: return "kTfLiteFloat16"; case kTfLiteFloat64: diff --git a/tensorflow/lite/optional_debug_tools.cc b/tensorflow/lite/optional_debug_tools.cc index 2e25b0a17f7..8ee5c3b3f56 100644 --- a/tensorflow/lite/optional_debug_tools.cc +++ b/tensorflow/lite/optional_debug_tools.cc @@ -57,6 +57,8 @@ const char* TensorTypeName(TfLiteType type) { return "kTfLiteInt16"; case kTfLiteComplex64: return "kTfLiteComplex64"; + case kTfLiteComplex128: + return "kTfLiteComplex128"; case kTfLiteFloat16: return "kTfLiteFloat16"; case kTfLiteFloat64: diff --git a/tensorflow/lite/python/interpreter_wrapper/numpy.cc b/tensorflow/lite/python/interpreter_wrapper/numpy.cc index 00e5064e620..d2f308a74a2 100644 --- a/tensorflow/lite/python/interpreter_wrapper/numpy.cc +++ b/tensorflow/lite/python/interpreter_wrapper/numpy.cc @@ -56,6 +56,8 @@ int TfLiteTypeToPyArrayType(TfLiteType tf_lite_type) { return NPY_BOOL; case kTfLiteComplex64: return NPY_COMPLEX64; + case kTfLiteComplex128: + return NPY_COMPLEX128; case kTfLiteNoType: return NPY_NOTYPE; // Avoid default so compiler errors created when new types are made. diff --git a/tensorflow/lite/python/optimize/calibration_wrapper.cc b/tensorflow/lite/python/optimize/calibration_wrapper.cc index 4e4584c0fd7..b608d529c85 100644 --- a/tensorflow/lite/python/optimize/calibration_wrapper.cc +++ b/tensorflow/lite/python/optimize/calibration_wrapper.cc @@ -86,6 +86,8 @@ inline TensorType TfLiteTypeToSchemaType(TfLiteType type) { return TensorType_INT16; case kTfLiteComplex64: return TensorType_COMPLEX64; + case kTfLiteComplex128: + return TensorType_COMPLEX128; } // No default to get compiler error when new type is introduced. } diff --git a/tensorflow/lite/python/util.py b/tensorflow/lite/python/util.py index a69f59b2837..ff7caad0f88 100644 --- a/tensorflow/lite/python/util.py +++ b/tensorflow/lite/python/util.py @@ -51,6 +51,7 @@ _MAP_TF_TO_TFLITE_TYPES = { dtypes.int8: _types_pb2.INT8, dtypes.int16: _types_pb2.QUANTIZED_INT16, dtypes.complex64: _types_pb2.COMPLEX64, + dtypes.complex128: _types_pb2.COMPLEX128, dtypes.bool: _types_pb2.BOOL, } diff --git a/tensorflow/lite/schema/schema.fbs b/tensorflow/lite/schema/schema.fbs index b7f41c756e4..878acde1e16 100644 --- a/tensorflow/lite/schema/schema.fbs +++ b/tensorflow/lite/schema/schema.fbs @@ -41,6 +41,7 @@ enum TensorType : byte { COMPLEX64 = 8, INT8 = 9, FLOAT64 = 10, + COMPLEX128 = 11, } // Custom quantization parameters for experimenting with new quantization diff --git a/tensorflow/lite/schema/schema_generated.h b/tensorflow/lite/schema/schema_generated.h index b044acb4033..a6117dc72ab 100755 --- a/tensorflow/lite/schema/schema_generated.h +++ b/tensorflow/lite/schema/schema_generated.h @@ -379,11 +379,12 @@ enum TensorType { TensorType_COMPLEX64 = 8, TensorType_INT8 = 9, TensorType_FLOAT64 = 10, + TensorType_COMPLEX128 = 11, TensorType_MIN = TensorType_FLOAT32, - TensorType_MAX = TensorType_FLOAT64 + TensorType_MAX = TensorType_COMPLEX128 }; -inline const TensorType (&EnumValuesTensorType())[11] { +inline const TensorType (&EnumValuesTensorType())[12] { static const TensorType values[] = { TensorType_FLOAT32, TensorType_FLOAT16, @@ -395,13 +396,14 @@ inline const TensorType (&EnumValuesTensorType())[11] { TensorType_INT16, TensorType_COMPLEX64, TensorType_INT8, - TensorType_FLOAT64 + TensorType_FLOAT64, + TensorType_COMPLEX128 }; return values; } inline const char * const *EnumNamesTensorType() { - static const char * const names[12] = { + static const char * const names[13] = { "FLOAT32", "FLOAT16", "INT32", @@ -413,13 +415,14 @@ inline const char * const *EnumNamesTensorType() { "COMPLEX64", "INT8", "FLOAT64", + "COMPLEX128", nullptr }; return names; } inline const char *EnumNameTensorType(TensorType e) { - if (flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_FLOAT64)) return ""; + if (flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_COMPLEX128)) return ""; const size_t index = static_cast(e); return EnumNamesTensorType()[index]; } diff --git a/tensorflow/lite/testing/split.h b/tensorflow/lite/testing/split.h index d4e762164a4..6f7b9a68484 100644 --- a/tensorflow/lite/testing/split.h +++ b/tensorflow/lite/testing/split.h @@ -132,6 +132,26 @@ inline std::vector> Split(const string& s, return fields; } +template <> +inline std::vector> Split(const string& s, + const string& delimiter) { + std::vector> fields; + for (const auto& p : SplitToPos(s, delimiter)) { + std::string sc = s.substr(p.first, p.second - p.first); + std::string::size_type sz_real, sz_img; + double real = std::stod(sc, &sz_real); + double img = std::stod(sc.substr(sz_real), &sz_img); + if (sz_real + sz_img + 1 != sc.length()) { + std::cerr << "There were errors in parsing string, " << sc + << ", to complex value." << std::endl; + return fields; + } + std::complex c(real, img); + fields.push_back(c); + } + return fields; +} + } // namespace testing } // namespace tflite diff --git a/tensorflow/lite/testing/tflite_driver.cc b/tensorflow/lite/testing/tflite_driver.cc index ae53be09889..ae352ce04c4 100644 --- a/tensorflow/lite/testing/tflite_driver.cc +++ b/tensorflow/lite/testing/tflite_driver.cc @@ -127,15 +127,37 @@ class TfLiteDriver::DataExpectation { return error_is_large; } + bool CompareTwoValuesHelper(double v1, double v2) { + double diff = std::abs(v1 - v2); + bool error_is_large = false; + // For very small numbers, try absolute error, otherwise go with + // relative. + if (std::abs(v2) < relative_threshold_) { + error_is_large = (diff > absolute_threshold_); + } else { + error_is_large = (diff > relative_threshold_ * std::abs(v2)); + } + return error_is_large; + } + bool CompareTwoValues(std::complex v1, std::complex v2) { return CompareTwoValues(v1.real(), v2.real()) || CompareTwoValues(v1.imag(), v2.imag()); } + bool CompareTwoValues(std::complex v1, std::complex v2) { + return CompareTwoValues(v1.real(), v2.real()) || + CompareTwoValues(v1.imag(), v2.imag()); + } + bool CompareTwoValues(float v1, float v2) { return CompareTwoValuesHelper(v1, v2); } + bool CompareTwoValues(double v1, double v2) { + return CompareTwoValuesHelper(v1, v2); + } + template bool TypedCheck(bool verbose, const TfLiteTensor& tensor) { size_t tensor_size = tensor.bytes / sizeof(T); @@ -315,6 +337,9 @@ bool TfLiteDriver::DataExpectation::Check(bool verbose, case kTfLiteComplex64: return TypedCheck, std::complex>(verbose, tensor); + case kTfLiteComplex128: + return TypedCheck, std::complex>(verbose, + tensor); default: fprintf(stderr, "Unsupported type %d in Check\n", tensor.type); return false; @@ -527,6 +552,9 @@ void TfLiteDriver::SetExpectation(int id, const string& csv_values) { case kTfLiteComplex64: expected_output_[id]->SetData>(csv_values); break; + case kTfLiteComplex128: + expected_output_[id]->SetData>(csv_values); + break; default: Invalidate(absl::StrCat("Unsupported tensor type ", TfLiteTypeGetName(tensor->type), diff --git a/tensorflow/lite/toco/model.h b/tensorflow/lite/toco/model.h index 58397f5a3eb..b42fed6fbc1 100644 --- a/tensorflow/lite/toco/model.h +++ b/tensorflow/lite/toco/model.h @@ -236,6 +236,7 @@ enum class ArrayDataType : uint8 { kComplex64, kFloat16, kFloat64, + kComplex128, }; // Compile-time logic to map ArrayDataType to the corresponding C++ scalar type diff --git a/tensorflow/lite/toco/tflite/operator.cc b/tensorflow/lite/toco/tflite/operator.cc index bc12d49a115..794691f5724 100644 --- a/tensorflow/lite/toco/tflite/operator.cc +++ b/tensorflow/lite/toco/tflite/operator.cc @@ -51,6 +51,7 @@ namespace tflite { {ArrayDataType::kInt64, ::tflite::TensorType_INT64}, {ArrayDataType::kString, ::tflite::TensorType_STRING}, {ArrayDataType::kComplex64, ::tflite::TensorType_COMPLEX64}, + {ArrayDataType::kComplex128, ::tflite::TensorType_COMPLEX128}, {ArrayDataType::kFloat16, ::tflite::TensorType_FLOAT16}, {ArrayDataType::kFloat64, ::tflite::TensorType_FLOAT64}}; diff --git a/tensorflow/lite/toco/tooling_util.cc b/tensorflow/lite/toco/tooling_util.cc index be4cda8aa3d..d84763faee6 100644 --- a/tensorflow/lite/toco/tooling_util.cc +++ b/tensorflow/lite/toco/tooling_util.cc @@ -1769,6 +1769,8 @@ int ElementSize(ArrayDataType data_type) { return 8; case ArrayDataType::kComplex64: return 8; + case ArrayDataType::kComplex128: + return 16; case ArrayDataType::kFloat64: return 8; @@ -2313,6 +2315,8 @@ ArrayDataType ConvertIODataTypeToArrayDataType(IODataType type) { return ArrayDataType::kString; case COMPLEX64: return ArrayDataType::kComplex64; + case COMPLEX128: + return ArrayDataType::kComplex128; case FLOAT16: return ArrayDataType::kFloat16; case FLOAT64: diff --git a/tensorflow/lite/toco/types.proto b/tensorflow/lite/toco/types.proto index 029a159321e..009891c3bcb 100644 --- a/tensorflow/lite/toco/types.proto +++ b/tensorflow/lite/toco/types.proto @@ -52,4 +52,7 @@ enum IODataType { // Double precision float, not quantized. FLOAT64 = 11; + + // Complex128, not quantized + COMPLEX128 = 12; } diff --git a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h index 13e846406e6..cd6eeec4da2 100644 --- a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h +++ b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h @@ -238,6 +238,11 @@ typedef struct TfLiteComplex64 { float re, im; // real and imaginary parts, respectively. } TfLiteComplex64; +// Double-precision complex data type compatible with the C99 definition. +typedef struct TfLiteComplex128 { + double re, im; // real and imaginary parts, respectively. +} TfLiteComplex128; + // Half precision data type compatible with the C99 definition. typedef struct TfLiteFloat16 { uint16_t data; @@ -257,6 +262,7 @@ typedef enum { kTfLiteInt8 = 9, kTfLiteFloat16 = 10, kTfLiteFloat64 = 11, + kTfLiteComplex128 = 12, } TfLiteType; // Return the name of a given type, for error reporting purposes. @@ -313,12 +319,14 @@ typedef union TfLitePtrUnion { int64_t* i64; float* f; TfLiteFloat16* f16; + double* f64; char* raw; const char* raw_const; uint8_t* uint8; bool* b; int16_t* i16; TfLiteComplex64* c64; + TfLiteComplex128* c128; int8_t* int8; /* Only use this member. */ void* data; diff --git a/tensorflow/lite/tools/verifier.cc b/tensorflow/lite/tools/verifier.cc index 9befa7fd6f1..12b24e6f2d8 100644 --- a/tensorflow/lite/tools/verifier.cc +++ b/tensorflow/lite/tools/verifier.cc @@ -384,6 +384,9 @@ bool VerifyNumericTensorBuffer(const Tensor& tensor, const Buffer& buffer, case TensorType_COMPLEX64: bytes_required *= sizeof(std::complex); break; + case TensorType_COMPLEX128: + bytes_required *= sizeof(std::complex); + break; default: ReportError(error_reporter, "Tensor %s invalid type: %d", NameOrEmptyString(tensor.name()), tensor.type()); diff --git a/tensorflow/lite/type_to_tflitetype.h b/tensorflow/lite/type_to_tflitetype.h index 84cd54b5718..4ad36688bee 100644 --- a/tensorflow/lite/type_to_tflitetype.h +++ b/tensorflow/lite/type_to_tflitetype.h @@ -67,6 +67,10 @@ constexpr TfLiteType typeToTfLiteType>() { return kTfLiteComplex64; } template <> +constexpr TfLiteType typeToTfLiteType>() { + return kTfLiteComplex128; +} +template <> constexpr TfLiteType typeToTfLiteType() { return kTfLiteString; } diff --git a/tensorflow/lite/util.cc b/tensorflow/lite/util.cc index 09efaa77f15..9cfdaf4d695 100644 --- a/tensorflow/lite/util.cc +++ b/tensorflow/lite/util.cc @@ -102,6 +102,9 @@ TfLiteStatus GetSizeOfType(TfLiteContext* context, const TfLiteType type, case kTfLiteComplex64: *bytes = sizeof(std::complex); break; + case kTfLiteComplex128: + *bytes = sizeof(std::complex); + break; case kTfLiteInt16: *bytes = sizeof(int16_t); break; From f9e0cbcef300f2f933e35287b2ff7b5608f367e9 Mon Sep 17 00:00:00 2001 From: Jiho Choi Date: Mon, 13 Jul 2020 18:08:13 -0700 Subject: [PATCH 0336/2522] Fix the overview page for JAX profiles. PiperOrigin-RevId: 321072378 Change-Id: Ie06d27eb18c24f781bde026fd15dc2a3dc556627 --- tensorflow/core/profiler/utils/group_events.cc | 17 +++++++++++++---- tensorflow/core/profiler/utils/xplane_schema.cc | 2 ++ tensorflow/core/profiler/utils/xplane_schema.h | 2 ++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/profiler/utils/group_events.cc b/tensorflow/core/profiler/utils/group_events.cc index ffad67b3413..38ad6cd43ee 100644 --- a/tensorflow/core/profiler/utils/group_events.cc +++ b/tensorflow/core/profiler/utils/group_events.cc @@ -269,6 +269,11 @@ void SortEventList(EventList* event_list) { }); } +// Returns true if it has JAX-related events. +bool HasJaxEvent(const EventNodeMap& event_node_map) { + return event_node_map.contains(HostEventType::kExecuteOnLocalDevices); +} + } // namespace EventNode::EventNode(const XPlaneVisitor* plane, XLine* raw_line, @@ -492,6 +497,7 @@ void EventForest::ProcessLegacyRootEvents( for (int64 root_event_type : root_event_types) { if (auto root_events = gtl::FindOrNull(event_node_map_, root_event_type)) { for (const auto& root_event : *root_events) { + root_event->SetIsRoot(true); root_events_.push_back(root_event.get()); } } @@ -499,17 +505,20 @@ void EventForest::ProcessLegacyRootEvents( } void EventForest::CreateEventGroup() { - if (!tf_loop_root_events_.empty()) { - // If a TF loop is used, each TF loop iteration becomes a root. + // Create a group for each TF loop iteration in non-JAX profiles. + if (!HasJaxEvent(event_node_map_) && !tf_loop_root_events_.empty()) { for (EventNode* root_event : tf_loop_root_events_) { ProcessRootEvent(next_group_id_++, root_event, &group_metadata_map_); } return; } - SortEventList(&root_events_); + // Create a group for each top root event while ignoring TF's legacy root + // events for JAX profiles. for (EventNode* root_event : root_events_) { - if (IsTopRoot(root_event)) { + if (IsTopRoot(root_event) && + (!HasJaxEvent(event_node_map_) || + !IsLegacyRootEvent(root_event->GetEventVisitor()))) { ProcessRootEvent(next_group_id_++, root_event, &group_metadata_map_); } } diff --git a/tensorflow/core/profiler/utils/xplane_schema.cc b/tensorflow/core/profiler/utils/xplane_schema.cc index 5ca8326d72c..0cc839681a7 100644 --- a/tensorflow/core/profiler/utils/xplane_schema.cc +++ b/tensorflow/core/profiler/utils/xplane_schema.cc @@ -94,6 +94,8 @@ const HostEventTypeMap& GetHostEventTypeMap() { // tf.data related. {"IteratorGetNextOp::DoCompute", kIteratorGetNextOp}, {"IteratorGetNextAsOptionalOp::DoCompute", kIteratorGetNextAsOptionalOp}, + // JAX related. + {"LocalExecutable::ExecuteOnLocalDevices", kExecuteOnLocalDevices}, // GPU related. {"KernelLaunch", kKernelLaunch}, {"KernelExecute", kKernelExecute}, diff --git a/tensorflow/core/profiler/utils/xplane_schema.h b/tensorflow/core/profiler/utils/xplane_schema.h index 41774deaa59..2f2fea880f6 100644 --- a/tensorflow/core/profiler/utils/xplane_schema.h +++ b/tensorflow/core/profiler/utils/xplane_schema.h @@ -84,6 +84,8 @@ enum HostEventType { // tf.data related. kIteratorGetNextOp, kIteratorGetNextAsOptionalOp, + // JAX related. + kExecuteOnLocalDevices, // GPU related. kKernelLaunch, kKernelExecute, From 7fcb46caa26983d22f08a2e75eedb3e6c159e608 Mon Sep 17 00:00:00 2001 From: Russell Power Date: Mon, 13 Jul 2020 18:16:16 -0700 Subject: [PATCH 0337/2522] Add variable merger pass. PiperOrigin-RevId: 321073288 Change-Id: I9ef7272554a16288596bb3919a82e06c508f4dea --- tensorflow/core/tpu/BUILD | 2 +- tensorflow/core/tpu/graph_rewrite/BUILD | 44 +++- ...on.cc => tpu_rewrite_pass_registration.cc} | 3 + .../tpu/graph_rewrite/variable_merger_pass.cc | 204 +++++++++++++++++ .../tpu/graph_rewrite/variable_merger_pass.h | 47 ++++ .../variable_merger_pass_test.cc | 205 ++++++++++++++++++ 6 files changed, 502 insertions(+), 3 deletions(-) rename tensorflow/core/tpu/graph_rewrite/{distributed_tpu_configuration_rewrite_registration.cc => tpu_rewrite_pass_registration.cc} (87%) create mode 100644 tensorflow/core/tpu/graph_rewrite/variable_merger_pass.cc create mode 100644 tensorflow/core/tpu/graph_rewrite/variable_merger_pass.h create mode 100644 tensorflow/core/tpu/graph_rewrite/variable_merger_pass_test.cc diff --git a/tensorflow/core/tpu/BUILD b/tensorflow/core/tpu/BUILD index c5c3ab8c059..8181f682d70 100644 --- a/tensorflow/core/tpu/BUILD +++ b/tensorflow/core/tpu/BUILD @@ -136,7 +136,7 @@ cc_library( ":tpu_library_init_fns", "//tensorflow/core/platform:errors", "//tensorflow/core/platform:status", - "//tensorflow/core/tpu/graph_rewrite:distributed_tpu_configuration_rewrite_registration", + "//tensorflow/core/tpu/graph_rewrite:tpu_rewrite_pass_registration", "//tensorflow/core/tpu/kernels:tpu_compile_c_api_hdrs", "//tensorflow/core/tpu/kernels:tpu_mesh_state_c_api_hdrs", "//tensorflow/core/tpu/kernels:tpu_util_c_api_hdrs", diff --git a/tensorflow/core/tpu/graph_rewrite/BUILD b/tensorflow/core/tpu/graph_rewrite/BUILD index ef9e4a0a41e..dcf6af69fce 100644 --- a/tensorflow/core/tpu/graph_rewrite/BUILD +++ b/tensorflow/core/tpu/graph_rewrite/BUILD @@ -1,5 +1,10 @@ # Contains graph rewrites for TPU runtimes and optimizations. +load( + "//tensorflow:tensorflow.bzl", + "tf_cc_test", +) + package( default_visibility = [ "//tensorflow/core/tpu:__subpackages__", @@ -9,10 +14,11 @@ package( ) cc_library( - name = "distributed_tpu_configuration_rewrite_registration", - srcs = ["distributed_tpu_configuration_rewrite_registration.cc"], + name = "tpu_rewrite_pass_registration", + srcs = ["tpu_rewrite_pass_registration.cc"], deps = [ ":distributed_tpu_configuration_rewrite_pass", + ":variable_merger_pass", "//tensorflow/core:core_cpu", ], alwayslink = 1, @@ -53,3 +59,37 @@ cc_library( "//tensorflow/core/tpu:tpu_defs", ], ) + +cc_library( + name = "variable_merger_pass", + srcs = ["variable_merger_pass.cc"], + hdrs = ["variable_merger_pass.h"], + deps = [ + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/core:core_cpu", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:span", + ], +) + +tf_cc_test( + name = "variable_merger_pass_test", + size = "small", + srcs = ["variable_merger_pass_test.cc"], + deps = [ + ":variable_merger_pass", + "//tensorflow/cc:cc_ops", + "//tensorflow/cc:ops", + "//tensorflow/cc:resource_variable_ops", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:ops", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_registration.cc b/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc similarity index 87% rename from tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_registration.cc rename to tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc index db2b3a53f20..92173c1e79c 100644 --- a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_registration.cc +++ b/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/optimization_registry.h" #include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_pass.h" +#include "tensorflow/core/tpu/graph_rewrite/variable_merger_pass.h" namespace tensorflow { namespace { @@ -24,6 +25,8 @@ REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 20, DistributedTPUConfigurationRewritePass); REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 20, DistributedTPUShutdownRewritePass); +REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, 0, + VariableMergerPass); } // namespace } // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/variable_merger_pass.cc b/tensorflow/core/tpu/graph_rewrite/variable_merger_pass.cc new file mode 100644 index 00000000000..354acb32838 --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/variable_merger_pass.cc @@ -0,0 +1,204 @@ + +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/tpu/graph_rewrite/variable_merger_pass.h" + +#include +#include +#include + +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/strings/str_join.h" +#include "absl/strings/string_view.h" +#include "absl/types/span.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/platform/fingerprint.h" +#include "tensorflow/core/util/dump_graph.h" + +namespace tensorflow { + +namespace { + +// The name of a stateful op is semantically meaningful because ops with the +// same name will share the same kernel. We therefore form new op names using a +// deterministic function (a fingerprint) of the old names. +uint64 MergedOpFingerprint(absl::Span ops) { + std::vector op_names; + op_names.reserve(ops.size()); + for (const Node* node : ops) { + op_names.push_back(node->name()); + } + return Fingerprint64(absl::StrJoin(op_names, ",")); +} + +Status MergeVarHandleOps(const string& device, absl::Span nodes, + Graph* graph) { + int num_var_handles(nodes.size()); + if (num_var_handles <= 1) return Status::OK(); + + std::vector containers(num_var_handles); + std::vector names(num_var_handles); + DataTypeVector dtypes(num_var_handles); + std::vector shapes(num_var_handles); + for (int i = 0; i < num_var_handles; ++i) { + TF_RETURN_IF_ERROR( + GetNodeAttr(nodes[i]->attrs(), "container", &containers[i])); + TF_RETURN_IF_ERROR( + GetNodeAttr(nodes[i]->attrs(), "shared_name", &names[i])); + TF_RETURN_IF_ERROR(GetNodeAttr(nodes[i]->attrs(), "dtype", &dtypes[i])); + TF_RETURN_IF_ERROR(GetNodeAttr(nodes[i]->attrs(), "shape", &shapes[i])); + } + NodeDefBuilder builder(graph->NewName(strings::StrCat( + "VarHandles_", MergedOpFingerprint(nodes))), + "_VarHandlesOp"); + builder.Attr("N", num_var_handles); + builder.Attr("containers", containers); + builder.Attr("shared_names", names); + builder.Attr("dtypes", dtypes); + builder.Attr("shapes", shapes); + builder.Device(device); + NodeDef node_def; + TF_RETURN_IF_ERROR(builder.Finalize(&node_def)); + Status status; + Node* node = graph->AddNode(node_def, &status); + TF_RETURN_IF_ERROR(status); + node->set_assigned_device_name(device); + + graph->AddControlEdge(graph->source_node(), node); + for (int i = 0; i < num_var_handles; ++i) { + std::vector> consumers; + for (const Edge* e : nodes[i]->out_edges()) { + consumers.emplace_back(e->dst(), e->dst_input()); + } + graph->RemoveNode(nodes[i]); + for (const auto& t : consumers) { + graph->AddEdge(node, t.second < 0 ? -1 : i, t.first, t.second); + } + } + return Status::OK(); +} + +Status MergeReadVariableOps(Node* handle_op, Node* control_node, + absl::Span nodes, Graph* graph) { + int num_reads(nodes.size()); + if (num_reads <= 1) return Status::OK(); + + DataTypeVector dtypes(num_reads); + for (int i = 0; i < num_reads; ++i) { + TF_RETURN_IF_ERROR(GetNodeAttr(nodes[i]->attrs(), "dtype", &dtypes[i])); + } + NodeDef node_def; + node_def.set_name(graph->NewName( + strings::StrCat("ReadVariables_", MergedOpFingerprint(nodes)))); + node_def.set_op("_ReadVariablesOp"); + AddNodeAttr("N", num_reads, &node_def); + AddNodeAttr("dtypes", dtypes, &node_def); + node_def.set_device(handle_op->requested_device()); + Status status; + Node* node = graph->AddNode(node_def, &status); + TF_RETURN_IF_ERROR(status); + node->set_assigned_device_name(handle_op->assigned_device_name()); + if (control_node) graph->AddControlEdge(control_node, node); + for (int i = 0; i < num_reads; ++i) { + const Edge* handle_edge; + TF_RETURN_IF_ERROR(nodes[i]->input_edge(0, &handle_edge)); + graph->AddEdge(handle_edge->src(), handle_edge->src_output(), node, i); + + std::vector> consumers; + for (const Edge* e : nodes[i]->out_edges()) { + consumers.emplace_back(e->dst(), e->dst_input()); + } + graph->RemoveNode(nodes[i]); + for (const auto& t : consumers) { + graph->AddEdge(node, t.second < 0 ? -1 : i, t.first, t.second); + } + } + return Status::OK(); +} + +} // namespace + +Status VariableMergerPass::Run(const GraphOptimizationPassOptions& options) { + Graph* graph = options.graph->get(); + + VLOG(1) << DumpGraphToFile("variable_merger_pass_before", *graph); + + // Find VarHandleOps that are graph roots and group them by assigned device. + // Also find any ReadVariableOps that are consumers of those handles. + absl::flat_hash_map> var_handle_ops_by_device; + absl::flat_hash_set read_variable_ops; + + for (Node* m : graph->source_node()->out_nodes()) { + // We check that the VarHandleOp has no control edges, other than the one we + // followed from the source node. + if (m->type_string() == "VarHandleOp" && m->in_edges().size() == 1) { + var_handle_ops_by_device[m->assigned_device_name()].push_back(m); + for (Node* n : m->out_nodes()) { + // ReadVariableOp could have control edges, we will group them by + // merged VarHandleOp and control dependency. + if (n->type_string() == "ReadVariableOp" && n->in_edges().size() <= 2) { + read_variable_ops.insert(n); + } + } + } + } + + auto node_name_comparator = [](Node* a, Node* b) { + return a->name() < b->name(); + }; + + // First merge the var handle ops. + for (auto& vh : var_handle_ops_by_device) { + // Sort the handles by name for determinism. + std::sort(vh.second.begin(), vh.second.end(), node_name_comparator); + TF_RETURN_IF_ERROR(MergeVarHandleOps(vh.first, vh.second, graph)); + } + + // ReadVariableOps by a pair of . + // ControlDependencyNode could be nullptr. + absl::flat_hash_map, std::vector> read_var_ops; + + for (Node* n : read_variable_ops) { + Node* control_node = nullptr; + Node* var_handle_op = nullptr; + // Each ReadVariableOp has at most one control input since we only choose + // ReadVariableOp with at most 2 input edges. + for (const Edge* e : n->in_edges()) { + if (e->IsControlEdge()) { + control_node = e->src(); + } else { + var_handle_op = e->src(); + } + } + TF_RET_CHECK(var_handle_op != nullptr); + read_var_ops[std::pair(var_handle_op, control_node)] + .push_back(n); + } + + for (auto& r : read_var_ops) { + // Sort the reads by name for determinism. + std::sort(r.second.begin(), r.second.end(), node_name_comparator); + TF_RETURN_IF_ERROR( + MergeReadVariableOps(r.first.first, r.first.second, r.second, graph)); + } + + VLOG(1) << DumpGraphToFile("variable_merger_pass_after", *graph); + return Status::OK(); +} + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/variable_merger_pass.h b/tensorflow/core/tpu/graph_rewrite/variable_merger_pass.h new file mode 100644 index 00000000000..0f487da30e1 --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/variable_merger_pass.h @@ -0,0 +1,47 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Optimization pass that merges VarHandleOps and ReadVariableOps into their +// fused forms. +// +// The goal of this pass is to fix a latency problem sometimes observed in +// inference benchmarks. Often a inference step starts by reading the value of +// many weights. Reading a resource variable requires a VarHandleOp and a +// ReadVariableOp per variable. Running hundreds of trivial ops can add hundreds +// of microseconds of latency to the critical path of an inference step. The +// inter-op latency of the executor can be easily hundreds of nanoseconds, which +// rapidly adds up over many inexpensive ops. +// +// This pass merges VarHandleOps that have only the graph source node as a +// predecessor into a single VarHandlesOp that reads all at once. +// It then merges ReadVariablesOp that have no control inputs and originate from +// the same handle op into a single large ReadVariablesOp. + +#ifndef TENSORFLOW_CORE_TPU_GRAPH_REWRITE_VARIABLE_MERGER_PASS_H_ +#define TENSORFLOW_CORE_TPU_GRAPH_REWRITE_VARIABLE_MERGER_PASS_H_ + +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/graph/graph.h" + +namespace tensorflow { + +class VariableMergerPass : public GraphOptimizationPass { + public: + Status Run(const GraphOptimizationPassOptions& options) override; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_GRAPH_REWRITE_VARIABLE_MERGER_PASS_H_ diff --git a/tensorflow/core/tpu/graph_rewrite/variable_merger_pass_test.cc b/tensorflow/core/tpu/graph_rewrite/variable_merger_pass_test.cc new file mode 100644 index 00000000000..4241a481d09 --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/variable_merger_pass_test.cc @@ -0,0 +1,205 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/tpu/graph_rewrite/variable_merger_pass.h" + +#include +#include + +#include "tensorflow/cc/framework/ops.h" +#include "tensorflow/cc/ops/resource_variable_ops.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/util/equal_graph_def.h" + +namespace tensorflow { +namespace { + +TEST(VarHandleMerger, SimpleMergesWork) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto v = ops::VarHandleOp( + scope.WithOpName("V"), DT_FLOAT, TensorShape({}), + ops::VarHandleOp::Attrs().Container("c1").SharedName("n1")); + auto w = ops::VarHandleOp( + scope.WithOpName("W"), DT_INT32, TensorShape({77}), + ops::VarHandleOp::Attrs().Container("c2").SharedName("n2")); + auto v_read = ops::ReadVariableOp(scope.WithOpName("VRead"), v, DT_FLOAT); + auto w_read = ops::ReadVariableOp(scope.WithOpName("WRead"), w, DT_INT32); + auto w_cast = ops::Cast(scope.WithOpName("Cast"), w_read, DT_FLOAT); + ops::Sub(scope.WithOpName("Sub"), v_read, w_cast); + TF_ASSERT_OK(scope.ToGraph(graph.get())); + } + + VariableMergerPass merger; + GraphOptimizationPassOptions options; + options.graph = &graph; + TF_ASSERT_OK(merger.Run(options)); + GraphDef actual; + graph->ToGraphDef(&actual); + + GraphDef expected; + { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto handles = ops::_VarHandlesOp( + scope.WithOpName("VarHandles_10315266686041849873/_0"), + /*containers=*/{"c1", "c2"}, + /*shared_names=*/{"n1", "n2"}, /*N=*/2, /*dtypes=*/{DT_FLOAT, DT_INT32}, + /*shapes=*/{TensorShape({}), TensorShape({77})}); + auto read = ops::_ReadVariablesOp( + scope.WithOpName("ReadVariables_13269360303885824085/_1"), + /*resources=*/{handles[0], handles[1]}, + /*dtypes=*/{DT_FLOAT, DT_INT32}); + auto w_cast = ops::Cast(scope.WithOpName("Cast"), read[1], DT_FLOAT); + ops::Sub(scope.WithOpName("Sub"), read[0], w_cast); + TF_ASSERT_OK(scope.ToGraphDef(&expected)); + } + + TF_EXPECT_GRAPH_EQ(expected, actual); +} + +TEST(VarHandleMerger, VarHandlesWithControlDepsDontMerge) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto v = ops::VarHandleOp( + scope.WithOpName("V"), DT_FLOAT, TensorShape({}), + ops::VarHandleOp::Attrs().Container("c1").SharedName("n1")); + auto w = ops::VarHandleOp( + scope.WithOpName("W").WithControlDependencies(v), DT_INT32, + TensorShape({77}), + ops::VarHandleOp::Attrs().Container("c2").SharedName("n2")); + TF_ASSERT_OK(scope.ToGraph(graph.get())); + } + + GraphDef expected; + graph->ToGraphDef(&expected); + + VariableMergerPass merger; + GraphOptimizationPassOptions options; + options.graph = &graph; + TF_ASSERT_OK(merger.Run(options)); + GraphDef actual; + graph->ToGraphDef(&actual); + + TF_EXPECT_GRAPH_EQ(expected, actual); +} + +TEST(VarHandleMerger, ReadVariableOpsWithDifferentControlDepsDontMerge) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto w = ops::VarHandleOp( + scope.WithOpName("W"), DT_INT32, TensorShape({77}), + ops::VarHandleOp::Attrs().Container("c2").SharedName("n2")); + auto v = ops::VarHandleOp( + scope.WithOpName("V"), DT_FLOAT, TensorShape({}), + ops::VarHandleOp::Attrs().Container("c1").SharedName("n1")); + auto w_read = ops::ReadVariableOp(scope.WithOpName("WRead"), w, DT_INT32); + auto w_cast = ops::Cast(scope.WithOpName("Cast"), w_read, DT_FLOAT); + auto v_read = ops::ReadVariableOp( + scope.WithOpName("VRead").WithControlDependencies(w_cast), v, DT_FLOAT); + ops::Sub(scope.WithOpName("Sub"), v_read, w_cast); + TF_ASSERT_OK(scope.ToGraph(graph.get())); + } + + VariableMergerPass merger; + GraphOptimizationPassOptions options; + options.graph = &graph; + TF_ASSERT_OK(merger.Run(options)); + GraphDef actual; + graph->ToGraphDef(&actual); + + GraphDef expected; + { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto handles = ops::_VarHandlesOp( + scope.WithOpName("VarHandles_10315266686041849873/_0"), + /*containers=*/{"c1", "c2"}, + /*shared_names=*/{"n1", "n2"}, /*N=*/2, /*dtypes=*/{DT_FLOAT, DT_INT32}, + /*shapes=*/{TensorShape({}), TensorShape({77})}); + auto w_read = + ops::ReadVariableOp(scope.WithOpName("WRead"), handles[1], DT_INT32); + auto w_cast = ops::Cast(scope.WithOpName("Cast"), w_read, DT_FLOAT); + auto v_read = ops::ReadVariableOp( + scope.WithOpName("VRead").WithControlDependencies(w_cast), handles[0], + DT_FLOAT); + ops::Sub(scope.WithOpName("Sub"), v_read, w_cast); + TF_ASSERT_OK(scope.ToGraphDef(&expected)); + } + + TF_EXPECT_GRAPH_EQ(expected, actual); +} + +TEST(VarHandleMerger, ReadVariableOpsWithSameControlDepsMerge) { + std::unique_ptr graph(new Graph(OpRegistry::Global())); + { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto u = ops::VarHandleOp( + scope.WithOpName("U"), DT_FLOAT, TensorShape({}), + ops::VarHandleOp::Attrs().Container("c1").SharedName("n1")); + auto v = ops::VarHandleOp( + scope.WithOpName("V"), DT_FLOAT, TensorShape({}), + ops::VarHandleOp::Attrs().Container("c2").SharedName("n2")); + auto w = ops::VarHandleOp( + scope.WithOpName("W"), DT_INT32, TensorShape({77}), + ops::VarHandleOp::Attrs().Container("c3").SharedName("n3")); + + auto w_read = ops::ReadVariableOp(scope.WithOpName("WRead"), w, DT_INT32); + auto w_cast = ops::Cast(scope.WithOpName("Cast"), w_read, DT_FLOAT); + auto v_read = ops::ReadVariableOp( + scope.WithOpName("VRead").WithControlDependencies(w_cast), v, DT_FLOAT); + auto u_read = ops::ReadVariableOp( + scope.WithOpName("URead").WithControlDependencies(w_cast), u, DT_FLOAT); + auto d = ops::Sub(scope.WithOpName("Sub"), v_read, w_cast); + ops::Sub(scope.WithOpName("Add"), d, u_read); + TF_ASSERT_OK(scope.ToGraph(graph.get())); + } + + VariableMergerPass merger; + GraphOptimizationPassOptions options; + options.graph = &graph; + TF_ASSERT_OK(merger.Run(options)); + GraphDef actual; + graph->ToGraphDef(&actual); + + GraphDef expected; + { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto handles = ops::_VarHandlesOp( + scope.WithOpName("VarHandles_15520412301618992443/_0"), + /*containers=*/{"c1", "c2", "c3"}, + /*shared_names=*/{"n1", "n2", "n3"}, /*N=*/3, + /*dtypes=*/{DT_FLOAT, DT_FLOAT, DT_INT32}, + /*shapes=*/{TensorShape({}), TensorShape({}), TensorShape({77})}); + auto w_read = + ops::ReadVariableOp(scope.WithOpName("WRead"), handles[2], DT_INT32); + auto w_cast = ops::Cast(scope.WithOpName("Cast"), w_read, DT_FLOAT); + auto read = ops::_ReadVariablesOp( + scope.WithOpName("ReadVariables_8281595736094071329/_1") + .WithControlDependencies(w_cast), + /*resources=*/{handles[0], handles[1]}, + /*dtypes=*/{DT_FLOAT, DT_FLOAT}); + auto d = ops::Sub(scope.WithOpName("Sub"), read[1], w_cast); + ops::Sub(scope.WithOpName("Add"), d, read[0]); + TF_ASSERT_OK(scope.ToGraphDef(&expected)); + } + + TF_EXPECT_GRAPH_EQ(expected, actual); +} +} // namespace +} // namespace tensorflow From 3eecd923782d16826598fe50b1af03a0767f7050 Mon Sep 17 00:00:00 2001 From: CuiYifeng Date: Tue, 14 Jul 2020 09:51:06 +0800 Subject: [PATCH 0338/2522] Fix comments --- tensorflow/core/common_runtime/mkl_layout_pass_test.cc | 4 ++++ tensorflow/core/util/mkl_util.h | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/common_runtime/mkl_layout_pass_test.cc b/tensorflow/core/common_runtime/mkl_layout_pass_test.cc index bcc470cefd4..6997199a678 100644 --- a/tensorflow/core/common_runtime/mkl_layout_pass_test.cc +++ b/tensorflow/core/common_runtime/mkl_layout_pass_test.cc @@ -189,6 +189,10 @@ REGISTER_OP("BFloat16Output2") .SetIsStateful(); #endif // ENABLE_INTEL_MKL_BFLOAT16 +///////////////////////////////////////////////////////////////////// +// Unit tests related to node merge optimization +///////////////////////////////////////////////////////////////////// + // clang-format off TEST_F(MklLayoutPassTest, Basic) { InitGraph( diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index d94c6848ad7..56a14cca04a 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -2299,9 +2299,5 @@ inline bool IsConv1x1StrideNot1(memory::dims filter_dims, #define REGISTER_TEST_ALL_TYPES(TEST) REGISTER_TEST_FLOAT32(TEST); #endif // ENABLE_INTEL_MKL_BFLOAT16 -///////////////////////////////////////////////////////////////////// -// Unit tests related to node merge optimization -///////////////////////////////////////////////////////////////////// - #endif // INTEL_MKL #endif // TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ From 07daafc869841646d154a79a5246f6f1ebc2c3ab Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Jul 2020 18:53:57 -0700 Subject: [PATCH 0339/2522] Work around an ODR violation. This template specialization is not always seen and the compiler/linker can choose methods and/or struct instantiations from either this specialization or the primary template. 1. The `blocks` method is marked static in one place and not in the other meaning the arguments passed were shifted by 8 bytes due to the implicit this-pointer. 2. The sizes of the structs in the specialization versus primary template were different and so reading the values were reading uninitialized memory passed the end of the struct. Note: THIS DOES NOT FIX THE UNDERLYING ODR VIOLATION. PiperOrigin-RevId: 321077691 Change-Id: I3998c19ed8983438001b6621719a18d032f4fafe --- .../core/kernels/eigen_contraction_kernel.h | 42 +++++++------------ 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/tensorflow/core/kernels/eigen_contraction_kernel.h b/tensorflow/core/kernels/eigen_contraction_kernel.h index ef4b9dbc012..2a4cfd1637c 100644 --- a/tensorflow/core/kernels/eigen_contraction_kernel.h +++ b/tensorflow/core/kernels/eigen_contraction_kernel.h @@ -524,14 +524,7 @@ struct GemmKernelProvider { \ TensorContractionKernel(StorageIndex m, StorageIndex k, StorageIndex n, \ StorageIndex bm, StorageIndex bk, StorageIndex bn) \ - : m(m), \ - k(k), \ - n(n), \ - bm(bm), \ - bk(bk), \ - bn(bn), \ - nm0(bm > 0 ? divup(m, bm) : 0), \ - nn0(bn > 0 ? divup(n, bn) : 0) {} \ + : m(m), k(k), n(n), bm(bm), bk(bk), bn(bn) {} \ \ enum { HasBeta = true }; \ \ @@ -616,7 +609,8 @@ struct GemmKernelProvider \ - EIGEN_DEVICE_FUNC void deallocate(Device& d, BlockMemHandle handle) { \ + EIGEN_DEVICE_FUNC static void deallocate(Device& d, \ + BlockMemHandle handle) { \ BlockMemAllocator::deallocate(d, handle); \ } \ \ @@ -626,7 +620,8 @@ struct GemmKernelProvider 0 ? divup(n, bn) : 0, lhsBlock); \ \ if (!is_direct_access) { \ lhsBlock->is_direct_access = false; \ @@ -645,7 +640,8 @@ struct GemmKernelProvider 0 ? divup(m, bm) : 0, rhsBlock); \ \ if (!is_direct_access) { \ rhsBlock->is_direct_access = false; \ @@ -723,9 +719,6 @@ struct GemmKernelProvider { \ TensorContractionKernel(StorageIndex m, StorageIndex k, StorageIndex n, \ StorageIndex bm, StorageIndex bk, StorageIndex bn) \ - : m(m), \ - k(k), \ - n(n), \ - bm(bm), \ - bk(bk), \ - bn(bn), \ - nm0(bm > 0 ? divup(m, bm) : 0), \ - nn0(bn > 0 ? divup(n, bn) : 0) {} \ + : m(m), k(k), n(n), bm(bm), bk(bk), bn(bn) {} \ \ enum { HasBeta = true }; \ \ @@ -818,7 +804,8 @@ struct GemmKernelProvider \ - EIGEN_DEVICE_FUNC void deallocate(Device& d, BlockMemHandle handle) { \ + EIGEN_DEVICE_FUNC static void deallocate(Device& d, \ + BlockMemHandle handle) { \ BlockMemAllocator::deallocate(d, handle); \ } \ \ @@ -827,7 +814,8 @@ struct GemmKernelProvider 0 ? divup(n, bn) : 0, lhsBlock); \ \ if (!is_direct_access) { \ lhsBlock->is_direct_access = false; \ @@ -840,7 +828,8 @@ struct GemmKernelProvider 0 ? divup(m, bm) : 0, rhsBlock); \ \ if (!is_direct_access) { \ rhsBlock->is_direct_access = false; \ @@ -890,9 +879,6 @@ struct GemmKernelProvider Date: Mon, 13 Jul 2020 19:39:05 -0700 Subject: [PATCH 0340/2522] Add example and more doc --- tensorflow/python/keras/activations.py | 22 ++++++++++++++++++++-- tensorflow/python/ops/nn_ops.py | 20 ++++++++++++++++++-- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/keras/activations.py b/tensorflow/python/keras/activations.py index affc37beef4..9884aa24c10 100644 --- a/tensorflow/python/keras/activations.py +++ b/tensorflow/python/keras/activations.py @@ -305,16 +305,34 @@ def relu(x, alpha=0., max_value=None, threshold=0): @keras_export('keras.activations.gelu') @dispatch.add_dispatch_support def gelu(x, approximate=True): - """Gaussian Error Linear Unit. + """Applies the Gaussian error linear unit (GELU) activation function. + + Gaussian error linear unit (GELU) computes + `x * P(X <= x)`, where `P(X) ~ N(0, 1)`. + The (GELU) nonlinearity weights inputs by their value, rather than gates + inputs by their sign as in ReLU. + + For example: + + >>> x = tf.constant([-3.0, -1.0, 0.0, 1.0, 3.0], dtype=tf.float32) + >>> y = tf.keras.activations.gelu(x) + >>> y.numpy() + array([-0.00363752, -0.158808 , 0. , 0.841192 , 2.9963627 ], + dtype=float32) + >>> y = tf.keras.activations.gelu(x, approximate=False) + >>> y.numpy() + array([-0.00404951, -0.15865529, 0. , 0.8413447 , 2.9959507 ], + dtype=float32) Arguments: x: Input tensor. + approximate: A `bool`, whether to enable approximation. Returns: The gaussian error linear activation: `0.5 * x * (1 + tanh(sqrt(2 / pi) * (x + 0.044715 * x^3)))` if `approximate` is `True` or - `x * P(X <= x) = 0.5 * x * (1 + erf(x / sqrt(2)))`, where P(X) ~ N(0, 1), + `x * P(X <= x) = 0.5 * x * (1 + erf(x / sqrt(2)))`, where `P(X) ~ N(0, 1)`, if `approximate` is `False`. Reference: diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index df5303eadf1..6de2a50e67e 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -3498,6 +3498,23 @@ def leaky_relu(features, alpha=0.2, name=None): def gelu(features, approximate=True, name=None): """Compute the Gaussian Error Linear Unit (GELU) activation function. + Gaussian error linear unit (GELU) computes + `x * P(X <= x)`, where `P(X) ~ N(0, 1)`. + The (GELU) nonlinearity weights inputs by their value, rather than gates + inputs by their sign as in ReLU. + + For example: + + >>> x = tf.constant([-3.0, -1.0, 0.0, 1.0, 3.0], dtype=tf.float32) + >>> y = tf.keras.activations.gelu(x) + >>> y.numpy() + array([-0.00363752, -0.158808 , 0. , 0.841192 , 2.9963627 ], + dtype=float32) + >>> y = tf.keras.activations.gelu(x, approximate=False) + >>> y.numpy() + array([-0.00404951, -0.15865529, 0. , 0.8413447 , 2.9959507 ], + dtype=float32) + Args: features: A `Tensor` representing preactivation values. approximate: An optional `bool`. Defaults to `True`. @@ -3513,10 +3530,9 @@ def gelu(features, approximate=True, name=None): with ops.name_scope(name, "Gelu", [features]): features = ops.convert_to_tensor(features, name="features") if approximate: - pi = math_ops.cast(np.pi, features.dtype) coeff = math_ops.cast(0.044715, features.dtype) return 0.5 * features * ( - 1.0 + math_ops.tanh(math_ops.sqrt(2.0 / pi) * ( + 1.0 + math_ops.tanh(0.7978845608028654 * ( features + coeff * math_ops.pow(features, 3)))) else: return 0.5 * features * ( From 14ae0a4e8713fdd92552d9e8ff4127092c4b7b81 Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Mon, 13 Jul 2020 19:40:22 -0700 Subject: [PATCH 0341/2522] Fix wrong module --- tensorflow/python/ops/nn_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 6de2a50e67e..511a7a11ef4 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -3506,11 +3506,11 @@ def gelu(features, approximate=True, name=None): For example: >>> x = tf.constant([-3.0, -1.0, 0.0, 1.0, 3.0], dtype=tf.float32) - >>> y = tf.keras.activations.gelu(x) + >>> y = tf.nn.gelu(x) >>> y.numpy() array([-0.00363752, -0.158808 , 0. , 0.841192 , 2.9963627 ], dtype=float32) - >>> y = tf.keras.activations.gelu(x, approximate=False) + >>> y = tf.nn.gelu(x, approximate=False) >>> y.numpy() array([-0.00404951, -0.15865529, 0. , 0.8413447 , 2.9959507 ], dtype=float32) From 7d2b5cd83b8acd45a8c93309ac5d91164bce5c5d Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Mon, 13 Jul 2020 19:42:44 -0700 Subject: [PATCH 0342/2522] Export only v2 api --- tensorflow/python/keras/activations.py | 2 +- tensorflow/python/ops/nn_ops.py | 2 +- .../tools/api/golden/v1/tensorflow.keras.activations.pbtxt | 4 ---- tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt | 4 ---- 4 files changed, 2 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/keras/activations.py b/tensorflow/python/keras/activations.py index 9884aa24c10..0af46ea8f79 100644 --- a/tensorflow/python/keras/activations.py +++ b/tensorflow/python/keras/activations.py @@ -302,7 +302,7 @@ def relu(x, alpha=0., max_value=None, threshold=0): return K.relu(x, alpha=alpha, max_value=max_value, threshold=threshold) -@keras_export('keras.activations.gelu') +@keras_export('keras.activations.gelu', v1=[]) @dispatch.add_dispatch_support def gelu(x, approximate=True): """Applies the Gaussian error linear unit (GELU) activation function. diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 511a7a11ef4..0cb3fa12af3 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -3493,7 +3493,7 @@ def leaky_relu(features, alpha=0.2, name=None): return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name) -@tf_export("nn.gelu") +@tf_export("nn.gelu", v1=[]) @dispatch.add_dispatch_support def gelu(features, approximate=True, name=None): """Compute the Gaussian Error Linear Unit (GELU) activation function. diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt index 93daa37930c..ee3d1f3d4a2 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.activations.pbtxt @@ -12,10 +12,6 @@ tf_module { name: "exponential" argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "gelu" - argspec: "args=[\'x\', \'approximate\'], varargs=None, keywords=None, defaults=[\'True\'], " - } member_method { name: "get" argspec: "args=[\'identifier\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt index 2ab4259a207..932e5037d99 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.pbtxt @@ -212,10 +212,6 @@ tf_module { name: "fused_batch_norm" argspec: "args=[\'x\', \'scale\', \'offset\', \'mean\', \'variance\', \'epsilon\', \'data_format\', \'is_training\', \'name\', \'exponential_avg_factor\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'0.001\', \'NHWC\', \'True\', \'None\', \'1.0\'], " } - member_method { - name: "gelu" - argspec: "args=[\'features\', \'approximate\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " - } member_method { name: "in_top_k" argspec: "args=[\'predictions\', \'targets\', \'k\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " From 1a9b57d729ea4f7cc0da7d192d23872572ceb02d Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Mon, 13 Jul 2020 19:54:15 -0700 Subject: [PATCH 0343/2522] [XLA] Store host shape in ExecutionInput Simplify the APIs explicitly passing the host shape PiperOrigin-RevId: 321083080 Change-Id: I9e124dd4465ee4037f2d0cdbd33f04a43f35abc2 --- .../compiler/xla/client/local_client.cc | 4 +-- tensorflow/compiler/xla/client/local_client.h | 8 +++--- tensorflow/compiler/xla/pjrt/pjrt_client.cc | 19 ++++++-------- tensorflow/compiler/xla/service/executable.cc | 9 ++++--- tensorflow/compiler/xla/service/executable.h | 26 ++++++++++++++++--- tensorflow/compiler/xla/service/hlo_runner.cc | 3 ++- .../xla/tests/buffer_donation_test.cc | 3 ++- tensorflow/compiler/xrt/xrt_state.cc | 2 +- 8 files changed, 46 insertions(+), 28 deletions(-) diff --git a/tensorflow/compiler/xla/client/local_client.cc b/tensorflow/compiler/xla/client/local_client.cc index 5fc9909fa2a..fee92957096 100644 --- a/tensorflow/compiler/xla/client/local_client.cc +++ b/tensorflow/compiler/xla/client/local_client.cc @@ -187,7 +187,7 @@ StatusOr LocalExecutable::Run( std::vector argument_shapes; argument_shapes.reserve(arguments.size()); for (const ExecutionInput& arg : arguments) { - argument_shapes.push_back(&arg.shape()); + argument_shapes.push_back(&arg.host_shape()); } return AsyncCallAndBlockHostUntilDone( argument_shapes, run_options, [&](const ExecutableRunOptions& options) { @@ -325,7 +325,7 @@ StatusOr LocalExecutable::RunAsync( std::vector argument_shapes; argument_shapes.reserve(arguments.size()); for (const ExecutionInput& arg : arguments) { - argument_shapes.push_back(&arg.shape()); + argument_shapes.push_back(&arg.host_shape()); } return RunAsync(argument_shapes, std::move(arguments), run_options); } diff --git a/tensorflow/compiler/xla/client/local_client.h b/tensorflow/compiler/xla/client/local_client.h index 8b91f4a1739..bb072a0fe2c 100644 --- a/tensorflow/compiler/xla/client/local_client.h +++ b/tensorflow/compiler/xla/client/local_client.h @@ -64,10 +64,6 @@ class LocalExecutable { // Similar to RunAsync(), but allows for donating argument buffers to the // executable. - StatusOr RunAsync( - absl::Span argument_host_shapes, - std::vector arguments, ExecutableRunOptions run_options); - StatusOr RunAsync(std::vector arguments, ExecutableRunOptions run_options); @@ -78,6 +74,10 @@ class LocalExecutable { Executable* executable() const { return executable_.get(); } private: + StatusOr RunAsync( + absl::Span argument_host_shapes, + std::vector arguments, ExecutableRunOptions run_options); + // Validates that the given arguments and options satisfy various constraints // of the computation. // diff --git a/tensorflow/compiler/xla/pjrt/pjrt_client.cc b/tensorflow/compiler/xla/pjrt/pjrt_client.cc index 7e0d0159f4b..06ec69f44c1 100644 --- a/tensorflow/compiler/xla/pjrt/pjrt_client.cc +++ b/tensorflow/compiler/xla/pjrt/pjrt_client.cc @@ -1383,7 +1383,7 @@ StatusOr MakeTupleHelper( local_device->compute_stream()->parent(), root_table_memory.cref())); } - ExecutionInput execution_input(on_device_shape); + ExecutionInput execution_input(on_device_shape, on_host_shape); ShapeTree::iterator input_iterator = execution_input.MutableBuffers()->begin(); ShapeTree::iterator iterator_end = @@ -1521,7 +1521,6 @@ StatusOr PjRtExecutable::EnqueueExecution( << " mapped to device ordinal for execution: " << device_ordinal; absl::flat_hash_set events; - std::vector argument_host_shapes; std::vector execution_inputs; device_buffers->reserve(argument_handles.size()); const absl::flat_hash_set& parameters_that_must_be_donated = @@ -1570,24 +1569,22 @@ StatusOr PjRtExecutable::EnqueueExecution( } LocalDeviceState* device_state = &client_->device_state(device_ordinal); - TupleHandle tuple_handle; + absl::optional tuple_handle; if (parameter_is_tupled_arguments_ && !options.arguments_are_tupled) { TF_ASSIGN_OR_RETURN(tuple_handle, MakeTupleHelper(client_, device_state, argument_handles, *device_buffers, device_ordinal)); - events.insert(tuple_handle.event.get()); - execution_inputs.emplace_back(std::move(tuple_handle.execution_input)); - argument_host_shapes.push_back(&tuple_handle.on_host_shape); + events.insert(tuple_handle->event.get()); + execution_inputs.emplace_back(std::move(tuple_handle->execution_input)); } else { - argument_host_shapes.reserve(argument_handles.size()); execution_inputs.reserve(argument_handles.size()); for (int i = 0; i < argument_handles.size(); ++i) { PjRtBuffer* handle = argument_handles[i]; - argument_host_shapes.push_back(&handle->on_host_shape()); const PjRtBuffer::ScopedHold& device_buffer = (*device_buffers)[i]; // Make an ExecutionInput from the device buffer. - execution_inputs.emplace_back(handle->on_device_shape()); + execution_inputs.emplace_back(handle->on_device_shape(), + handle->on_host_shape()); ExecutionInput& execution_input = execution_inputs.back(); ShapeTree::iterator input_iterator = execution_input.MutableBuffers()->begin(); @@ -1623,8 +1620,8 @@ StatusOr PjRtExecutable::EnqueueExecution( device_state->compute_semaphore().ScopedAcquire(1)); StatusOr result_buffer_or_status = - executables_[executable_idx]->RunAsync( - argument_host_shapes, std::move(execution_inputs), run_options); + executables_[executable_idx]->RunAsync(std::move(execution_inputs), + run_options); VLOG(1) << "Replica " << replica << " partition " << partition << " completed; ok=" << result_buffer_or_status.ok(); diff --git a/tensorflow/compiler/xla/service/executable.cc b/tensorflow/compiler/xla/service/executable.cc index 61ce6200a28..d5cf2ee9ac0 100644 --- a/tensorflow/compiler/xla/service/executable.cc +++ b/tensorflow/compiler/xla/service/executable.cc @@ -93,7 +93,8 @@ StatusOr Executable::ExecuteOnStream( static ExecutionInput MakeMaybeOwningDeviceMemoryTree( const ShapedBuffer& shaped_buffer) { - ExecutionInput result(shaped_buffer.on_device_shape()); + ExecutionInput result(shaped_buffer.on_device_shape(), + shaped_buffer.on_host_shape()); shaped_buffer.buffers().ForEachElement( [&](const ShapeIndex& index, const se::DeviceMemoryBase& mem) { result.SetBuffer(index, MaybeOwningDeviceMemory(mem)); @@ -105,10 +106,10 @@ StatusOr Executable::ExecuteAsyncOnStream( const ServiceExecutableRunOptions* run_options, absl::Span arguments, HloExecutionProfile* hlo_execution_profile) { - std::vector args(arguments.size()); - auto out_it = args.begin(); + std::vector args; + args.reserve(arguments.size()); for (const ShapedBuffer* arg : arguments) { - *out_it++ = MakeMaybeOwningDeviceMemoryTree(*arg); + args.emplace_back(MakeMaybeOwningDeviceMemoryTree(*arg)); } TF_ASSIGN_OR_RETURN(ExecutionOutput out, ExecuteAsyncOnStream(run_options, std::move(args), diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h index 6881f6dd68a..f002807431c 100644 --- a/tensorflow/compiler/xla/service/executable.h +++ b/tensorflow/compiler/xla/service/executable.h @@ -60,10 +60,17 @@ namespace xla { // with their indices absent from unowned_indices_. class ExecutionInput { public: - ExecutionInput() = default; - explicit ExecutionInput(xla::Shape shape) : buffers_(std::move(shape)) {} - explicit ExecutionInput(ShapeTree buffers) - : buffers_(std::move(buffers)) {} + explicit ExecutionInput(xla::Shape shape, xla::Shape host_shape) + : buffers_(std::move(shape)) { + SetHostShape(std::move(host_shape)); + } + + explicit ExecutionInput(ShapeTree buffers, + xla::Shape host_shape) + : buffers_(std::move(buffers)) { + SetHostShape(std::move(host_shape)); + } + ExecutionInput(ExecutionInput&&) = default; ~ExecutionInput(); @@ -74,6 +81,10 @@ class ExecutionInput { return dynamic_shape_ != nullptr ? *dynamic_shape_ : buffers_.shape(); } + const Shape& host_shape() const { + return host_shape_ != nullptr ? *host_shape_ : shape(); + } + Status SetDynamicShape(Shape dynamic_shape); xla::StatusOr ToShapedBuffer( @@ -107,11 +118,18 @@ class ExecutionInput { } private: + void SetHostShape(xla::Shape host_shape) { + if (shape() != host_shape) { + host_shape_ = absl::make_unique(std::move(host_shape)); + } + } + ShapeTree buffers_; // Set of indices of buffers that should be returned to the caller if an error // occurs when enqueuing the computation. std::set unowned_indices_; std::unique_ptr dynamic_shape_; + std::unique_ptr host_shape_; }; // ExecutionOutput encapsulates the output buffers of a execution and the diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc index 30a7916c408..83130108dd7 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.cc +++ b/tensorflow/compiler/xla/service/hlo_runner.cc @@ -211,7 +211,8 @@ static std::vector ExecutionInputsFromScopedShapedBuffers( *buffer_tree.mutable_element(index) = execution_input_buffer; } }); - execution_inputs.emplace_back(std::move(buffer_tree)); + execution_inputs.emplace_back(std::move(buffer_tree), + input_buffer.on_host_shape()); } return execution_inputs; } diff --git a/tensorflow/compiler/xla/tests/buffer_donation_test.cc b/tensorflow/compiler/xla/tests/buffer_donation_test.cc index 5f936870103..856ea7c9b44 100644 --- a/tensorflow/compiler/xla/tests/buffer_donation_test.cc +++ b/tensorflow/compiler/xla/tests/buffer_donation_test.cc @@ -119,7 +119,8 @@ class BufferDonationTest : public HloTestBase { } }); - args.emplace_back(ExecutionInput(std::move(owned_buffers))); + args.emplace_back( + ExecutionInput(std::move(owned_buffers), argument_literal.shape())); } TF_ASSERT_OK_AND_ASSIGN( diff --git a/tensorflow/compiler/xrt/xrt_state.cc b/tensorflow/compiler/xrt/xrt_state.cc index c2f9a1c62c9..c4094795a96 100644 --- a/tensorflow/compiler/xrt/xrt_state.cc +++ b/tensorflow/compiler/xrt/xrt_state.cc @@ -650,7 +650,7 @@ Status XRTTupleAllocation::AliasBufferFrom(const XRTTupleAllocation& source, xla::StatusOr XRTTupleAllocation::ToExecutionInput( const std::function(const xla::ShapeIndex&)>& alias_checker) { - xla::ExecutionInput result(on_device_shape()); + xla::ExecutionInput result(on_device_shape(), on_host_shape()); for (const auto& index_buffer : buffers_) { if (index_buffer.second == nullptr || (index_buffer.second->allocation().is_null() && From 5121c68bd8211cceb2c9c2a7f3a704ea7ba86c41 Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Mon, 13 Jul 2020 19:56:55 -0700 Subject: [PATCH 0344/2522] Change Compile() visibility public to follow the base class. PiperOrigin-RevId: 321083287 Change-Id: I9ef48e60b324c9a24ebe404a83a3e3428a95a80e --- tensorflow/core/tpu/kernels/tpu_compile_op_impl.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_impl.h b/tensorflow/core/tpu/kernels/tpu_compile_op_impl.h index cd8ef78614a..3f058683223 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_impl.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_impl.h @@ -51,9 +51,6 @@ class TpuCompileOpKernelImpl : public TpuCompileOpKernelCommon { function, metadata, num_computations, return_hlo_protos, unload_cache_on_session_close, /*persistent_cache=*/nullptr) {} - private: - FRIEND_TEST(TpuCompileOpImplTest, Compile); - Status Compile( const absl::variant& computation, const XLA_TpuMeshState* mesh_state, From bbecc1f667623ff1fbf914cd07f34a0abff64e88 Mon Sep 17 00:00:00 2001 From: Teng Lu Date: Tue, 14 Jul 2020 11:15:37 +0800 Subject: [PATCH 0345/2522] Fix comment typo of RandomUniform. Co-authored-by: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com> --- tensorflow/core/kernels/random_op_cpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/random_op_cpu.h b/tensorflow/core/kernels/random_op_cpu.h index 86dc7752c46..232651dc429 100644 --- a/tensorflow/core/kernels/random_op_cpu.h +++ b/tensorflow/core/kernels/random_op_cpu.h @@ -89,7 +89,7 @@ struct FillPhiloxRandomTask { // Decide skip strides according to different kResultElementCount: // * `1 = (4 + 3) / 4` for normal Distribution. // * `1 = (2 + 3) / 4` for double/int64 Distribution. - // * `4 = (16 + 3) / 4` for vecotorized float/bfloat16 Distribution. + // * `4 = (16 + 3) / 4` for vectorized float/bfloat16 Distribution. const int skip_strides = (kGroupSize + gen.kResultElementCount - 1) / gen.kResultElementCount; gen.Skip(start_group * skip_strides); From 963717729afd197d9df9d6a3d88b3bd08f5e6ca2 Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Mon, 13 Jul 2020 20:29:20 -0700 Subject: [PATCH 0346/2522] Run pylint --- tensorflow/python/keras/activations.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/keras/activations.py b/tensorflow/python/keras/activations.py index 0af46ea8f79..5dba2be1536 100644 --- a/tensorflow/python/keras/activations.py +++ b/tensorflow/python/keras/activations.py @@ -332,7 +332,8 @@ def gelu(x, approximate=True): The gaussian error linear activation: `0.5 * x * (1 + tanh(sqrt(2 / pi) * (x + 0.044715 * x^3)))` if `approximate` is `True` or - `x * P(X <= x) = 0.5 * x * (1 + erf(x / sqrt(2)))`, where `P(X) ~ N(0, 1)`, + `x * P(X <= x) = 0.5 * x * (1 + erf(x / sqrt(2)))`, + where `P(X) ~ N(0, 1)`, if `approximate` is `False`. Reference: From e3078a5fa84ba69a6923408f33e29376f81d6bd3 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Mon, 13 Jul 2020 20:26:47 -0700 Subject: [PATCH 0347/2522] [MLIR:TF] Fold no-op reshape operations PiperOrigin-RevId: 321086115 Change-Id: I8a084a9680e0238aa03fe685ca381e034d6f630b --- .../mlir/tensorflow/ir/tf_generated_ops.td | 1 + .../compiler/mlir/tensorflow/ir/tf_ops.cc | 98 +++++++++++++++++ .../mlir/tensorflow/tests/canonicalize.mlir | 74 +++++++++++++ .../mlir/tensorflow/tests/lower_tf.mlir | 16 +-- .../tensorflow/tests/unroll-batch-matmul.mlir | 100 ++++++++---------- 5 files changed, 221 insertions(+), 68 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index 7bbdce6b985..0ef650487a8 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -7284,6 +7284,7 @@ reshape(t, []) ==> 7 }]; let hasCanonicalizer = 1; + let hasFolder = 1; } def TF_ResizeBilinearOp : TF_Op<"ResizeBilinear", [NoSideEffect]> { diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc index 101de17122a..cfd3b61b2c3 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc @@ -2917,6 +2917,104 @@ void ReshapeOp::getCanonicalizationPatterns(OwningRewritePatternList &results, results.insert(context); } +OpFoldResult ReshapeOp::fold(ArrayRef operands) { + Value tensor = this->tensor(); + Value shape = this->shape(); + + // Fold reshape if operand and result types are the same and all dimensions + // are statically known (no-op reshape). + // TODO(ezhulenev): Add the same folding for BroadcastToOp. + auto result_ty = getType().dyn_cast(); + if (result_ty && result_ty.hasStaticShape() && + result_ty == tensor.getType()) { + return tensor; + } + + // Fold reshape if the shape is computed from the input tensor: + // + // %shape = tf.Shape(%arg) // [? x ...] + // %dim0 = tf.StridedSlice(%shape, 0, 1, 1) // get unknown dim value + // %new_shape = tf.Pack(dim0, ...) { axis = 0 } // [? x ...] + // %reshape = tf.Reshape(%arg, %new_shape) // this is no-op + // + // Where `...` are some statically known dimensions. In this case reshape is + // a no-op and can be replaced by %arg (assuming `...` are equal). + auto pack_op = dyn_cast_or_null(shape.getDefiningOp()); + if (!pack_op || pack_op.values().size() < 2) return {}; + + // Dimensions packed along axis = 0 (pack scalars into vector). + if (pack_op.axis().getSExtValue() != 0) return {}; + + // First packed value is defined by a strided slice operation. + auto slice_op = + dyn_cast_or_null(pack_op.values()[0].getDefiningOp()); + if (!slice_op) return {}; + + // Input to the slice op is defined by shape operation. + auto shape_op = dyn_cast_or_null(slice_op.input().getDefiningOp()); + if (!shape_op || shape_op.input() != tensor) return {}; + + // All masks are `0` except `shrink_axis_mask` which is equal to `1` (slicing + // scalar value from input vector). + if (slice_op.begin_mask().getSExtValue() != 0 || + slice_op.ellipsis_mask().getSExtValue() != 0 || + slice_op.end_mask().getSExtValue() != 0 || + slice_op.new_axis_mask().getSExtValue() != 0 || + slice_op.shrink_axis_mask().getSExtValue() != 1) + return {}; + + // Returns a value if the `value` is defined by a ConstOp with a single + // integer element in it and has an expected rank. + auto get_value = [](Value value, int expected_rank) -> Optional { + auto const_op = dyn_cast_or_null(value.getDefiningOp()); + if (!const_op) return None; + + auto value_attr = const_op.value().dyn_cast(); + if (!value_attr || value_attr.getNumElements() != 1) return None; + + auto value_ty = value_attr.getType(); + if (!value_ty.hasRank() || value_ty.getRank() != expected_rank) return None; + + auto splat = value_attr.getSplatValue(); + return splat.getValue().getSExtValue(); + }; + + // All other packed values are scalar constants. + SmallVector packed_dims; + packed_dims.reserve(pack_op.values().size() - 1); + for (Value operand : llvm::drop_begin(pack_op.values(), 1)) { + if (auto dim = get_value(operand, /*expected_rank=*/0)) { + packed_dims.push_back(*dim); + } else { + return {}; + } + } + + // Slice exactly the first shape dimension: + // begin = [0] end = [1], strides = [1] + auto begin = get_value(slice_op.begin(), /*expected_rank=*/1); + auto end = get_value(slice_op.end(), /*expected_rank=*/1); + auto strides = get_value(slice_op.strides(), /*expected_rank=*/1); + if (!begin.hasValue() || !end.hasValue() || !strides.hasValue() || + *begin != 0 || *end != 1 || *strides != 1) + return {}; + + // First tensor dimension is dynamic. + auto arg_ty = tensor.getType().dyn_cast(); + if (!arg_ty || arg_ty.getNumDynamicDims() != 1 || !arg_ty.isDynamicDim(0)) + return {}; + + // Argument tensor rank is equal to the number of packed dimensions. + if (arg_ty.getRank() != pack_op.values().size()) return {}; + + // All other dimensions are statically known and equal to packed dims. + auto arg_dims = llvm::drop_begin(arg_ty.getShape(), 1); + if (!std::equal(arg_dims.begin(), arg_dims.end(), packed_dims.begin())) + return {}; + + return tensor; +} + //===----------------------------------------------------------------------===// // SelectOp //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir index 8597740a4ae..d61fc66a5e6 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir @@ -377,6 +377,80 @@ func @testRedundantReshape(%arg0: tensor<4x4xi32>) -> tensor<2x8xi32> { // CHECK: return %1 : tensor<2x8xi32> } +// CHECK-LABEL: func @testReshapeNoOp +func @testReshapeNoOp(%arg0: tensor<2x4xf32>, %arg1: tensor<2xi32>) -> tensor<2x4xf32> { + %0 = "tf.Reshape"(%arg0, %arg1) : (tensor<2x4xf32>, tensor<2xi32>) -> tensor<2x4xf32> + + // CHECK: return %arg0 + return %0 : tensor<2x4xf32> +} + +// CHECK-LABEL: func @testReshapeNoOpShapeComputation +func @testReshapeNoOpShapeComputation(%arg0: tensor, %arg1: tensor) -> (tensor, tensor, tensor, tensor, tensor, tensor) { + // Test dimensions sizes. + %d1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %d2 = "tf.Const"() {value = dense<2> : tensor} : () -> tensor + + // Slice bounds. + %0 = "tf.Const"() {value = dense<0> : tensor<1xi32>} : () -> tensor<1xi32> + %1 = "tf.Const"() {value = dense<1> : tensor<1xi32>} : () -> tensor<1xi32> + %2 = "tf.Const"() {value = dense<2> : tensor<1xi32>} : () -> tensor<1xi32> + + // Fold reshape if the shape is computed from the input tensor: + // + // %shape = tf.Shape(%arg) // [? x ...] + // %dim0 = tf.StridedSlice(%shape, 0, 1, 1) // get unknown dim value + // %new_shape = tf.Pack(dim0, ...) { axis = 0 } // [? x ...] + // %reshape = tf.Reshape(%arg, %new_shape) // this is no-op + // + // Where `...` are some statically known dimensions. In this case reshape is + // a no-op and can be replaced by %arg (assuming `...` are equal). + + // Test Rank 2 + %3 = "tf.Shape"(%arg0) : (tensor) -> tensor<2xi32> + %4 = "tf.StridedSlice"(%3, %0, %1, %1) {shrink_axis_mask = 1 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %5 = "tf.Pack"(%4, %d1) {axis = 0 : i64} : (tensor, tensor) -> tensor<2xi32> + %6 = "tf.Reshape"(%arg0, %5) : (tensor, tensor<2xi32>) -> tensor + + // Test Rank 3. + + %7 = "tf.Shape"(%arg1) : (tensor) -> tensor<3xi32> + %8 = "tf.StridedSlice"(%7, %0, %1, %1) {shrink_axis_mask = 1 : i64} : (tensor<3xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %9 = "tf.Pack"(%8, %d1, %d2) {axis = 0 : i64} : (tensor, tensor, tensor) -> tensor<3xi32> + %10 = "tf.Reshape"(%arg1, %9) : (tensor, tensor<3xi32>) -> tensor + + // Shape was taken from the op that is not reshaped in the end: + // Reshape(%arg1) vs Shape(%arg0) + %11 = "tf.StridedSlice"(%3, %0, %1, %1) {shrink_axis_mask = 1 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %12 = "tf.Pack"(%11, %d1, %d2) {axis = 0 : i64} : (tensor, tensor, tensor) -> tensor<3xi32> + // CHECK: %[[RESHAPE0:.*]] = "tf.Reshape" + %13 = "tf.Reshape"(%arg1, %12) : (tensor, tensor<3xi32>) -> tensor + + // Packed dimensions have different order from the reshape operand: + // [?, 1, 2] vs [?, 2, 1] + %14 = "tf.StridedSlice"(%7, %0, %1, %1) {shrink_axis_mask = 1 : i64} : (tensor<3xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %15 = "tf.Pack"(%14, %d2, %d1) {axis = 0 : i64} : (tensor, tensor, tensor) -> tensor<3xi32> + // CHECK: %[[RESHAPE1:.*]] = "tf.Reshape" + %16 = "tf.Reshape"(%arg1, %15) : (tensor, tensor<3xi32>) -> tensor + + // StridedSlice takes second dimension from the shape: + // begin = [1], end = [2], stride = [1] + %17 = "tf.StridedSlice"(%7, %1, %2, %1) {shrink_axis_mask = 1 : i64} : (tensor<3xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %18 = "tf.Pack"(%17, %d1, %d2) {axis = 0 : i64} : (tensor, tensor, tensor) -> tensor<3xi32> + // CHECK: %[[RESHAPE2:.*]] = "tf.Reshape" + %19 = "tf.Reshape"(%arg1, %18) : (tensor, tensor<3xi32>) -> tensor + + // Packed dimensions have higher rank than the reshape operand: + // [?, 1] vs [?, 1, 1] + %20 = "tf.StridedSlice"(%3, %0, %1, %1) {shrink_axis_mask = 1 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %21 = "tf.Pack"(%20, %d1, %d1) {axis = 0 : i64} : (tensor, tensor, tensor) -> tensor<3xi32> + // CHECK: %[[RESHAPE3:.*]] = "tf.Reshape" + %22 = "tf.Reshape"(%arg0, %21) : (tensor, tensor<3xi32>) -> tensor + + // CHECK: return %arg0, %arg1, %[[RESHAPE0]], %[[RESHAPE1]], %[[RESHAPE2]], %[[RESHAPE3]] + return %6, %10, %13, %16, %19, %22 : tensor, tensor, tensor, tensor, tensor, tensor +} + // CHECK-LABEL: testSelectScalarPred func @testSelectScalarPred(%arg0: tensor, %arg1: tensor<4x2xf16>, %arg2: tensor<4x2xf16>) -> tensor<4x2xf16> { // CHECK-NEXT: "tf.SelectV2"(%arg0, %arg1, %arg2) : (tensor, tensor<4x2xf16>, tensor<4x2xf16>) -> tensor<4x2xf16> diff --git a/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir b/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir index 3215055a249..78e10fa797f 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir @@ -371,9 +371,7 @@ func @addN_variant(%arg0: tensor>>, %arg1: tensor) -> tensor<2x2xf32> { - // CHECK-DAG: %[[SHAPE:.*]] = "tf.Const"() {value = dense<[-1, 2]> : tensor<2xi64>} : () -> tensor<2xi64> - // CHECK-DAG: %[[INP:.*]] = "tf.Reshape"(%arg0, %[[SHAPE]]) : (tensor<2x2xf32>, tensor<2xi64>) -> tensor<2x2xf32> - // CHECK-DAG: %[[ITEMS:.*]]:2 = "tf.Unpack"(%[[INP]]) {axis = 0 : i64} : (tensor<2x2xf32>) -> (tensor<2xf32>, tensor<2xf32>) + // CHECK-DAG: %[[ITEMS:.*]]:2 = "tf.Unpack"(%arg0) {axis = 0 : i64} : (tensor<2x2xf32>) -> (tensor<2xf32>, tensor<2xf32>) // CHECK-DAG: %[[AXIS:.*]] = "tf.Const"() {value = dense<0> : tensor} : () -> tensor // CHECK-DAG: %[[RESULT:.*]] = "tf.ConcatV2"(%[[ITEMS]]#1, %[[ITEMS]]#0, %[[AXIS]]) : (tensor<2xf32>, tensor<2xf32>, tensor) -> tensor<2x2xf32> // CHECK: return %[[RESULT]] @@ -411,9 +409,7 @@ func @DynamicStitch_uint8(%arg0: tensor<2x2xui8>) -> tensor<2x2xui8> { // CHECK-LABEL: func @DynamicStitch_scalar_item func @DynamicStitch_scalar_item(%arg0: tensor<2xf32>) -> tensor<2xf32> { - // CHECK-DAG: %[[SHAPE:.*]] = "tf.Const"() {value = dense<-1> : tensor<1xi64>} : () -> tensor<1xi64> - // CHECK-DAG: %[[INP:.*]] = "tf.Reshape"(%arg0, %[[SHAPE]]) : (tensor<2xf32>, tensor<1xi64>) -> tensor<2xf32> - // CHECK-DAG: %[[ITEMS]]:2 = "tf.Unpack"(%[[INP]]) {axis = 0 : i64} : (tensor<2xf32>) -> (tensor, tensor) + // CHECK-DAG: %[[ITEMS]]:2 = "tf.Unpack"(%arg0) {axis = 0 : i64} : (tensor<2xf32>) -> (tensor, tensor) // CHECK-DAG: %[[AXIS:.*]] = "tf.Const"() {value = dense<0> : tensor} : () -> tensor // CHECK-DAG: %[[RESULT]] = "tf.ConcatV2"(%[[ITEMS]]#1, %[[ITEMS]]#0, %[[AXIS]]) : (tensor, tensor, tensor) -> tensor<2xf32> // CHECK: return %[[RESULT]] @@ -425,9 +421,7 @@ func @DynamicStitch_scalar_item(%arg0: tensor<2xf32>) -> tensor<2xf32> { // CHECK-LABEL: func @DynamicStitch_matrix_item func @DynamicStitch_matrix_item(%arg0: tensor<2x2x2xf32>) -> tensor<2x2x2xf32> { - // CHECK-DAG: %[[SHAPE:.*]] = "tf.Const"() {value = dense<[-1, 2, 2]> : tensor<3xi64>} : () -> tensor<3xi64> - // CHECK-DAG: %[[INP:.*]] = "tf.Reshape"(%arg0, %[[SHAPE]]) : (tensor<2x2x2xf32>, tensor<3xi64>) -> tensor<2x2x2xf32> - // CHECK-DAG: %[[ITEMS:.*]]:2 = "tf.Unpack"(%[[INP]]) {axis = 0 : i64} : (tensor<2x2x2xf32>) -> (tensor<2x2xf32>, tensor<2x2xf32>) + // CHECK-DAG: %[[ITEMS:.*]]:2 = "tf.Unpack"(%arg0) {axis = 0 : i64} : (tensor<2x2x2xf32>) -> (tensor<2x2xf32>, tensor<2x2xf32>) // CHECK-DAG: %[[AXIS:.*]] = "tf.Const"() {value = dense<0> : tensor} : () -> tensor // CHECK-DAG: %[[RESULT:.*]] = "tf.ConcatV2"(%[[ITEMS]]#1, %[[ITEMS]]#0, %[[AXIS]]) : (tensor<2x2xf32>, tensor<2x2xf32>, tensor) -> tensor<2x2x2xf32> // CHECK: return %[[RESULT]] @@ -446,9 +440,7 @@ func @DynamicStitch_dynamic(%arg0: tensor<*xi32>, %arg1: tensor<*xf32>) -> tenso // CHECK-LABEL: func @DynamicStitch_duplicates func @DynamicStitch_duplicates(%arg0: tensor<2x2xf32>) -> tensor<1x2xf32> { - // CHECK-DAG: %[[SHAPE:.*]] = "tf.Const"() {value = dense<[-1, 2]> : tensor<2xi64>} : () -> tensor<2xi64> - // CHECK-DAG: %[[INP:.*]] = "tf.Reshape"(%arg0, %[[SHAPE]]) : (tensor<2x2xf32>, tensor<2xi64>) -> tensor<2x2xf32> - // CHECK-DAG: %[[ITEMS:.*]]:2 = "tf.Unpack"(%[[INP]]) {axis = 0 : i64} : (tensor<2x2xf32>) -> (tensor<2xf32>, tensor<2xf32>) + // CHECK-DAG: %[[ITEMS:.*]]:2 = "tf.Unpack"(%arg0) {axis = 0 : i64} : (tensor<2x2xf32>) -> (tensor<2xf32>, tensor<2xf32>) // CHECK-DAG: %[[AXIS:.*]] = "tf.Const"() {value = dense<0> : tensor} : () -> tensor // CHECK-DAG: %[[RESULT:.*]] = "tf.ConcatV2"(%[[ITEMS]]#1, %[[AXIS]]) : (tensor<2xf32>, tensor) -> tensor<1x2xf32> // CHECK: return %[[RESULT]] diff --git a/tensorflow/compiler/mlir/tensorflow/tests/unroll-batch-matmul.mlir b/tensorflow/compiler/mlir/tensorflow/tests/unroll-batch-matmul.mlir index 5a3f0b6e997..7cf5f19523d 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/unroll-batch-matmul.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/unroll-batch-matmul.mlir @@ -67,41 +67,35 @@ func @batchMatMulV2FlatInput(%arg0: tensor<3x4x5xf32>, %arg1: tensor<3x5x6xf32>) return %0 : tensor<3x4x6xf32> // CHECK-LABEL: batchMatMulV2FlatInput - // CHECK: %[[cst:.*]] = "tf.Const"() {value = dense<[3, 4, 5]> : tensor<3xi64>} // CHECK: %[[cst_0:.*]] = "tf.Const"() {value = dense<[1, 4, 5]> : tensor<3xi64>} // CHECK: %[[cst_1:.*]] = "tf.Const"() {value = dense<[4, 5]> : tensor<2xi64>} - // CHECK: %[[cst_2:.*]] = "tf.Const"() {value = dense<[3, 5, 6]> : tensor<3xi64>} - // CHECK: %[[cst_3:.*]] = "tf.Const"() {value = dense<0> : tensor<3xi64>} - // CHECK: %[[cst_4:.*]] = "tf.Const"() {value = dense<[1, 0, 0]> : tensor<3xi64>} - // CHECK: %[[cst_5:.*]] = "tf.Const"() {value = dense<[2, 0, 0]> : tensor<3xi64>} - // CHECK: %[[cst_6:.*]] = "tf.Const"() {value = dense<[1, 5, 6]> : tensor<3xi64>} - // CHECK: %[[cst_7:.*]] = "tf.Const"() {value = dense<[5, 6]> : tensor<2xi64>} - // CHECK: %[[cst_8:.*]] = "tf.Const"() {value = dense<[3, 4, 6]> : tensor<3xi64>} + // CHECK: %[[cst_2:.*]] = "tf.Const"() {value = dense<0> : tensor<3xi64>} + // CHECK: %[[cst_3:.*]] = "tf.Const"() {value = dense<[1, 0, 0]> : tensor<3xi64>} + // CHECK: %[[cst_4:.*]] = "tf.Const"() {value = dense<[2, 0, 0]> : tensor<3xi64>} + // CHECK: %[[cst_5:.*]] = "tf.Const"() {value = dense<[1, 5, 6]> : tensor<3xi64>} + // CHECK: %[[cst_6:.*]] = "tf.Const"() {value = dense<[5, 6]> : tensor<2xi64>} - // CHECK: %[[v0:.*]] = "tf.Reshape"(%arg0, %[[cst]]) : (tensor<3x4x5xf32>, tensor<3xi64>) -> tensor<3x4x5xf32> - // CHECK: %[[v1:.*]] = "tf.Slice"(%[[v0]], %[[cst_3]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> - // CHECK: %[[v2:.*]] = "tf.Reshape"(%[[v1]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> - // CHECK: %[[v3:.*]] = "tf.Slice"(%[[v0]], %[[cst_4]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> - // CHECK: %[[v4:.*]] = "tf.Reshape"(%[[v3]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> - // CHECK: %[[v5:.*]] = "tf.Slice"(%[[v0]], %[[cst_5]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> - // CHECK: %[[v6:.*]] = "tf.Reshape"(%[[v5]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> + // CHECK: %[[v0:.*]] = "tf.Slice"(%arg0, %[[cst_2]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> + // CHECK: %[[v1:.*]] = "tf.Reshape"(%[[v0]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> + // CHECK: %[[v2:.*]] = "tf.Slice"(%arg0, %[[cst_3]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> + // CHECK: %[[v3:.*]] = "tf.Reshape"(%[[v2]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> + // CHECK: %[[v4:.*]] = "tf.Slice"(%arg0, %[[cst_4]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> + // CHECK: %[[v5:.*]] = "tf.Reshape"(%[[v4]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> - // CHECK: %[[v7:.*]] = "tf.Reshape"(%arg1, %[[cst_2]]) : (tensor<3x5x6xf32>, tensor<3xi64>) -> tensor<3x5x6xf32> - // CHECK: %[[v8:.*]] = "tf.Slice"(%[[v7]], %[[cst_3]], %[[cst_6]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> - // CHECK: %[[v9:.*]] = "tf.Reshape"(%[[v8]], %[[cst_7]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> - // CHECK: %[[v10:.*]] = "tf.Slice"(%[[v7]], %[[cst_4]], %[[cst_6]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> - // CHECK: %[[v11:.*]] = "tf.Reshape"(%[[v10]], %[[cst_7]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> - // CHECK: %[[v12:.*]] = "tf.Slice"(%[[v7]], %[[cst_5]], %[[cst_6]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> - // CHECK: %[[v13:.*]] = "tf.Reshape"(%[[v12]], %[[cst_7]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> + // CHECK: %[[v6:.*]] = "tf.Slice"(%arg1, %[[cst_2]], %[[cst_5]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> + // CHECK: %[[v7:.*]] = "tf.Reshape"(%[[v6]], %[[cst_6]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> + // CHECK: %[[v8:.*]] = "tf.Slice"(%arg1, %[[cst_3]], %[[cst_5]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> + // CHECK: %[[v9:.*]] = "tf.Reshape"(%[[v8]], %[[cst_6]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> + // CHECK: %[[v10:.*]] = "tf.Slice"(%arg1, %[[cst_4]], %[[cst_5]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> + // CHECK: %[[v11:.*]] = "tf.Reshape"(%[[v10]], %[[cst_6]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> - // CHECK: %[[v14:.*]] = "tf.MatMul"(%[[v2]], %[[v9]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> - // CHECK: %[[v15:.*]] = "tf.MatMul"(%[[v4]], %[[v11]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> - // CHECK: %[[v16:.*]] = "tf.MatMul"(%[[v6]], %[[v13]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> + // CHECK: %[[mm0:.*]] = "tf.MatMul"(%[[v1]], %[[v7]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> + // CHECK: %[[mm1:.*]] = "tf.MatMul"(%[[v3]], %[[v9]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> + // CHECK: %[[mm2:.*]] = "tf.MatMul"(%[[v5]], %[[v11]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> - // CHECK: %[[v17:.*]] = "tf.Pack"(%[[v14]], %[[v15]], %[[v16]]) {axis = 0 : i64} : (tensor<4x6xf32>, tensor<4x6xf32>, tensor<4x6xf32>) -> tensor<3x4x6xf32> - // CHECK: %[[v18:.*]] = "tf.Reshape"(%[[v17]], %[[cst_8]]) : (tensor<3x4x6xf32>, tensor<3xi64>) -> tensor<3x4x6xf32> + // CHECK: %[[v17:.*]] = "tf.Pack"(%[[mm0]], %[[mm1]], %[[mm2]]) {axis = 0 : i64} : (tensor<4x6xf32>, tensor<4x6xf32>, tensor<4x6xf32>) -> tensor<3x4x6xf32> - // CHECK: return %[[v18]] : tensor<3x4x6xf32> + // CHECK: return %[[v17]] : tensor<3x4x6xf32> } // ----- @@ -184,41 +178,35 @@ func @batchMatMulFlatInput(%arg0: tensor<3x4x5xf32>, %arg1: tensor<3x5x6xf32>) - return %0 : tensor<3x4x6xf32> // CHECK-LABEL: batchMatMulFlatInput - // CHECK: %[[cst:.*]] = "tf.Const"() {value = dense<[3, 4, 5]> : tensor<3xi64>} // CHECK: %[[cst_0:.*]] = "tf.Const"() {value = dense<[1, 4, 5]> : tensor<3xi64>} // CHECK: %[[cst_1:.*]] = "tf.Const"() {value = dense<[4, 5]> : tensor<2xi64>} - // CHECK: %[[cst_2:.*]] = "tf.Const"() {value = dense<[3, 5, 6]> : tensor<3xi64>} - // CHECK: %[[cst_3:.*]] = "tf.Const"() {value = dense<0> : tensor<3xi64>} - // CHECK: %[[cst_4:.*]] = "tf.Const"() {value = dense<[1, 0, 0]> : tensor<3xi64>} - // CHECK: %[[cst_5:.*]] = "tf.Const"() {value = dense<[2, 0, 0]> : tensor<3xi64>} - // CHECK: %[[cst_6:.*]] = "tf.Const"() {value = dense<[1, 5, 6]> : tensor<3xi64>} - // CHECK: %[[cst_7:.*]] = "tf.Const"() {value = dense<[5, 6]> : tensor<2xi64>} - // CHECK: %[[cst_8:.*]] = "tf.Const"() {value = dense<[3, 4, 6]> : tensor<3xi64>} + // CHECK: %[[cst_2:.*]] = "tf.Const"() {value = dense<0> : tensor<3xi64>} + // CHECK: %[[cst_3:.*]] = "tf.Const"() {value = dense<[1, 0, 0]> : tensor<3xi64>} + // CHECK: %[[cst_4:.*]] = "tf.Const"() {value = dense<[2, 0, 0]> : tensor<3xi64>} + // CHECK: %[[cst_5:.*]] = "tf.Const"() {value = dense<[1, 5, 6]> : tensor<3xi64>} + // CHECK: %[[cst_6:.*]] = "tf.Const"() {value = dense<[5, 6]> : tensor<2xi64>} - // CHECK: %[[v0:.*]] = "tf.Reshape"(%arg0, %[[cst]]) : (tensor<3x4x5xf32>, tensor<3xi64>) -> tensor<3x4x5xf32> - // CHECK: %[[v1:.*]] = "tf.Slice"(%[[v0]], %[[cst_3]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> - // CHECK: %[[v2:.*]] = "tf.Reshape"(%[[v1]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> - // CHECK: %[[v3:.*]] = "tf.Slice"(%[[v0]], %[[cst_4]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> - // CHECK: %[[v4:.*]] = "tf.Reshape"(%[[v3]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> - // CHECK: %[[v5:.*]] = "tf.Slice"(%[[v0]], %[[cst_5]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> - // CHECK: %[[v6:.*]] = "tf.Reshape"(%[[v5]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> + // CHECK: %[[v0:.*]] = "tf.Slice"(%arg0, %[[cst_2]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> + // CHECK: %[[v1:.*]] = "tf.Reshape"(%[[v0]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> + // CHECK: %[[v2:.*]] = "tf.Slice"(%arg0, %[[cst_3]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> + // CHECK: %[[v3:.*]] = "tf.Reshape"(%[[v2]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> + // CHECK: %[[v4:.*]] = "tf.Slice"(%arg0, %[[cst_4]], %[[cst_0]]) : (tensor<3x4x5xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x4x5xf32> + // CHECK: %[[v5:.*]] = "tf.Reshape"(%[[v4]], %[[cst_1]]) : (tensor<1x4x5xf32>, tensor<2xi64>) -> tensor<4x5xf32> - // CHECK: %[[v7:.*]] = "tf.Reshape"(%arg1, %[[cst_2]]) : (tensor<3x5x6xf32>, tensor<3xi64>) -> tensor<3x5x6xf32> - // CHECK: %[[v8:.*]] = "tf.Slice"(%[[v7]], %[[cst_3]], %[[cst_6]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> - // CHECK: %[[v9:.*]] = "tf.Reshape"(%[[v8]], %[[cst_7]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> - // CHECK: %[[v10:.*]] = "tf.Slice"(%[[v7]], %[[cst_4]], %[[cst_6]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> - // CHECK: %[[v11:.*]] = "tf.Reshape"(%[[v10]], %[[cst_7]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> - // CHECK: %[[v12:.*]] = "tf.Slice"(%[[v7]], %[[cst_5]], %[[cst_6]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> - // CHECK: %[[v13:.*]] = "tf.Reshape"(%[[v12]], %[[cst_7]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> + // CHECK: %[[v6:.*]] = "tf.Slice"(%arg1, %[[cst_2]], %[[cst_5]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> + // CHECK: %[[v7:.*]] = "tf.Reshape"(%[[v6]], %[[cst_6]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> + // CHECK: %[[v8:.*]] = "tf.Slice"(%arg1, %[[cst_3]], %[[cst_5]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> + // CHECK: %[[v9:.*]] = "tf.Reshape"(%[[v8]], %[[cst_6]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> + // CHECK: %[[v10:.*]] = "tf.Slice"(%arg1, %[[cst_4]], %[[cst_5]]) : (tensor<3x5x6xf32>, tensor<3xi64>, tensor<3xi64>) -> tensor<1x5x6xf32> + // CHECK: %[[v11:.*]] = "tf.Reshape"(%[[v10]], %[[cst_6]]) : (tensor<1x5x6xf32>, tensor<2xi64>) -> tensor<5x6xf32> - // CHECK: %[[v14:.*]] = "tf.MatMul"(%[[v2]], %[[v9]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> - // CHECK: %[[v15:.*]] = "tf.MatMul"(%[[v4]], %[[v11]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> - // CHECK: %[[v16:.*]] = "tf.MatMul"(%[[v6]], %[[v13]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> + // CHECK: %[[mm0:.*]] = "tf.MatMul"(%[[v1]], %[[v7]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> + // CHECK: %[[mm1:.*]] = "tf.MatMul"(%[[v3]], %[[v9]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> + // CHECK: %[[mm2:.*]] = "tf.MatMul"(%[[v5]], %[[v11]]) {transpose_a = false, transpose_b = false} : (tensor<4x5xf32>, tensor<5x6xf32>) -> tensor<4x6xf32> - // CHECK: %[[v17:.*]] = "tf.Pack"(%[[v14]], %[[v15]], %[[v16]]) {axis = 0 : i64} : (tensor<4x6xf32>, tensor<4x6xf32>, tensor<4x6xf32>) -> tensor<3x4x6xf32> - // CHECK: %[[v18:.*]] = "tf.Reshape"(%[[v17]], %[[cst_8]]) : (tensor<3x4x6xf32>, tensor<3xi64>) -> tensor<3x4x6xf32> + // CHECK: %[[v17:.*]] = "tf.Pack"(%[[mm0]], %[[mm1]], %[[mm2]]) {axis = 0 : i64} : (tensor<4x6xf32>, tensor<4x6xf32>, tensor<4x6xf32>) -> tensor<3x4x6xf32> - // CHECK: return %[[v18]] : tensor<3x4x6xf32> + // CHECK: return %[[v17]] : tensor<3x4x6xf32> } // ----- From 29911204d0f558f17eb2440f6ce4c028838b55a9 Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Mon, 13 Jul 2020 23:32:47 -0400 Subject: [PATCH 0348/2522] Update benchmark_util and use metrics. --- .../python/keras/benchmarks/benchmark_util.py | 40 ++++++++++--------- .../bidirectional_lstm_benchmark_test.py | 12 +++--- ...assification_transformer_benchmark_test.py | 12 +++--- 3 files changed, 34 insertions(+), 30 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/benchmark_util.py b/tensorflow/python/keras/benchmarks/benchmark_util.py index dcf1fa437d2..be973031150 100644 --- a/tensorflow/python/keras/benchmarks/benchmark_util.py +++ b/tensorflow/python/keras/benchmarks/benchmark_util.py @@ -47,7 +47,7 @@ class TimerCallBack(tf.keras.callbacks.Callback): def measure_performance(model_fn, x=None, y=None, - epoch=2, + epochs=2, batch_size=32, run_iters=4, optimizer=None, @@ -62,8 +62,8 @@ def measure_performance(model_fn, model_fn: Model function to be benchmarked. x: Input data. See `x` in the `fit()` method of `keras.Model`. y: Target data. See `y` in the `fit()` method of `keras.Model`. - epoch: Integer. Number of epochs to train the model. If unspecified, `epoch` - will default to 2. + epochs: Integer. Number of epochs to train the model. + If unspecified, `epoch` will default to 2. batch_size: Integer. Number of samples per gradient update. If unspecified, `batch_size` will default to 32. run_iters: Integer. Number of iterations to run the performance measurement. @@ -84,8 +84,7 @@ def measure_performance(model_fn, Returns: Performance summary, which contains build_time, compile_time, - startup_time, avg_epoch_time, wall_time, exp_per_sec, distribution_strategy, - epoch. + startup_time, avg_epoch_time, wall_time, exp_per_sec,epochs. Raise: ValueError: If `x` is none or if `optimizer` is not provided or @@ -106,7 +105,7 @@ def measure_performance(model_fn, build_time_list, compile_time_list, startup_time_list = [], [], [] avg_epoch_time_list, wall_time_list, exp_per_sec_list = [], [], [] - total_num_examples = epoch * num_examples + total_num_examples = epochs * num_examples for _ in range(run_iters): timer = timeit.default_timer @@ -129,7 +128,7 @@ def measure_performance(model_fn, x=x, y=y, batch_size=batch_size, - epochs=epoch, + epochs=epochs, callbacks=[cbk], verbose=verbose) end_time = timer() @@ -141,15 +140,20 @@ def measure_performance(model_fn, wall_time_list.append(end_time - t0) exp_per_sec_list.append(total_num_examples / (end_time - t2)) - results = { - 'build_time': np.mean(build_time_list), - 'compile_time': np.mean(compile_time_list), - 'startup_time': np.mean(startup_time_list), - 'avg_epoch_time': np.mean(avg_epoch_time_list), - 'wall_time': np.mean(wall_time_list), - 'exp_per_sec': np.mean(exp_per_sec_list), - 'distribution_strategy': distribution_strategy, - 'epoch': epoch - } + metrics = [] + metrics.append({'name': 'build_time', + 'value': np.mean(build_time_list)}) + metrics.append({'name': 'compile_time', + 'value': np.mean(compile_time_list)}) + metrics.append({'name': 'startup_time', + 'value': np.mean(startup_time_list)}) + metrics.append({'name': 'avg_epoch_time', + 'value': np.mean(avg_epoch_time_list)}) + metrics.append({'name': 'exp_per_sec', + 'value': np.mean(exp_per_sec_list)}) + metrics.append({'name': 'epochs', + 'value': epochs}) - return results + wall_time = np.mean(wall_time_list) + + return metrics, wall_time diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py index f18c52cf882..4985533e299 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py @@ -59,7 +59,7 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): """Measure performance with batch_size=128 and run_iters=3.""" batch_size = 128 run_iters = 3 - results = benchmark_util.measure_performance( + metrics, wall_time = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, y=self.imdb_y, @@ -70,13 +70,13 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): metrics=['accuracy']) self.report_benchmark( - iters=run_iters, wall_time=results['wall_time'], extras=results) + iters=run_iters, wall_time=wall_time, metrics=metrics) def benchmark_bidirect_lstm_imdb_bs_256(self): """Measure performance with batch_size=256 and run_iters=2.""" batch_size = 256 run_iters = 2 - results = benchmark_util.measure_performance( + metrics, wall_time = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, y=self.imdb_y, @@ -87,13 +87,13 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): metrics=['accuracy']) self.report_benchmark( - iters=run_iters, wall_time=results['wall_time'], extras=results) + iters=run_iters, wall_time=wall_time, metrics=metrics) def benchmark_bidirect_lstm_imdb_bs_512(self): """Measure performance with batch_size=512 and run_iters=4.""" batch_size = 512 run_iters = 4 - results = benchmark_util.measure_performance( + metrics, wall_time = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, y=self.imdb_y, @@ -104,7 +104,7 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): metrics=['accuracy']) self.report_benchmark( - iters=run_iters, wall_time=results['wall_time'], extras=results) + iters=run_iters, wall_time=wall_time, metrics=metrics) if __name__ == '__main__': diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py index c5894379382..e77765e45a9 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py @@ -73,7 +73,7 @@ class TextWithTransformerBenchmark(tf.test.Benchmark): """Measure performance with batch_size=128 and run_iters=3.""" batch_size = 128 run_iters = 3 - results = benchmark_util.measure_performance( + metrics, wall_time = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, y=self.imdb_y, @@ -84,13 +84,13 @@ class TextWithTransformerBenchmark(tf.test.Benchmark): metrics=['accuracy']) self.report_benchmark( - iters=run_iters, wall_time=results['wall_time'], extras=results) + iters=run_iters, wall_time=wall_time, metrics=metrics) def benchmark_text_classification_bs_512(self): """Measure performance with batch_size=512 and run_iters=4.""" batch_size = 512 run_iters = 4 - results = benchmark_util.measure_performance( + metrics, wall_time = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, y=self.imdb_y, @@ -101,13 +101,13 @@ class TextWithTransformerBenchmark(tf.test.Benchmark): metrics=['accuracy']) self.report_benchmark( - iters=run_iters, wall_time=results['wall_time'], extras=results) + iters=run_iters, wall_time=wall_time, metrics=metrics) def benchmark_text_classification_bs_256(self): """Measure performance with batch_size=256 and run_iters=3.""" batch_size = 256 run_iters = 3 - results = benchmark_util.measure_performance( + metrics, wall_time = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, y=self.imdb_y, @@ -118,7 +118,7 @@ class TextWithTransformerBenchmark(tf.test.Benchmark): metrics=['accuracy']) self.report_benchmark( - iters=run_iters, wall_time=results['wall_time'], extras=results) + iters=run_iters, wall_time=wall_time, metrics=metrics) class MultiHeadSelfAttention(tf.keras.layers.Layer): """Implement multi head self attention as a Keras layer.""" From 6a572639d38b0889ed0cca6c3ef18849db001d40 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Tue, 14 Jul 2020 03:48:07 +0000 Subject: [PATCH 0349/2522] restore list_ops.py --- tensorflow/python/ops/list_ops.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tensorflow/python/ops/list_ops.py b/tensorflow/python/ops/list_ops.py index ccd4e6b0494..3e7c116ec97 100644 --- a/tensorflow/python/ops/list_ops.py +++ b/tensorflow/python/ops/list_ops.py @@ -248,7 +248,6 @@ def _TensorListFromTensorGrad(op, dlist): @ops.RegisterGradient("TensorListGetItem") def _TensorListGetItemGrad(op, ditem): """Gradient for TensorListGetItem.""" - print("---GetItemGrad---") list_size = gen_list_ops.tensor_list_length(op.inputs[0]) list_grad = gen_list_ops.tensor_list_set_item( gen_list_ops.tensor_list_reserve( @@ -257,21 +256,14 @@ def _TensorListGetItemGrad(op, ditem): list_size, element_dtype=ditem.dtype), index=op.inputs[1], item=ditem) - print("op inputs", op.inputs) - print("ditem", ditem) - print("list_grad", list_grad) index_grad = None element_shape_grad = None - print("------") return list_grad, index_grad, element_shape_grad @ops.RegisterGradient("TensorListSetItem") def _TensorListSetItemGrad(op, dlist): """Gradient function for TensorListSetItem.""" - print("---SetItemGrad---") - print("op inputs", op.inputs) - print("dlist", dlist) _, index, item = op.inputs list_grad = gen_list_ops.tensor_list_set_item( dlist, index=index, item=array_ops.zeros_like(item)) @@ -281,9 +273,6 @@ def _TensorListSetItemGrad(op, dlist): index, element_shape=array_ops.shape(item), element_dtype=item.dtype) - print("list_grad", list_grad) - print("value_grad", element_grad) - print("------") return list_grad, index_grad, element_grad From 395380e82d2b49d20b8cb46eaef98fc640a2cb58 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Tue, 14 Jul 2020 04:02:58 +0000 Subject: [PATCH 0350/2522] most pr edits --- tensorflow/core/framework/tensor_key.h | 6 ++--- tensorflow/core/kernels/map_kernels.cc | 2 +- tensorflow/core/kernels/map_kernels.h | 7 +++--- tensorflow/core/kernels/tensor_map.cc | 18 +++++---------- tensorflow/core/kernels/tensor_map.h | 17 ++++---------- tensorflow/core/kernels/tensor_map_test.cc | 22 +++++++++---------- tensorflow/core/ops/map_ops.cc | 7 +----- tensorflow/python/kernel_tests/BUILD | 3 --- .../python/kernel_tests/map_ops_test.py | 9 ++++---- tensorflow/python/ops/map_ops.py | 7 +----- 10 files changed, 35 insertions(+), 63 deletions(-) diff --git a/tensorflow/core/framework/tensor_key.h b/tensorflow/core/framework/tensor_key.h index aa6fe35181a..9a64969301f 100644 --- a/tensorflow/core/framework/tensor_key.h +++ b/tensorflow/core/framework/tensor_key.h @@ -47,13 +47,13 @@ class TensorKey : public Tensor { } friend bool operator!=(const TensorKey& t1, const TensorKey& t2) { - return !(t1==t2); + return !(t1 == t2); } - // AbslHashValue() function, needed for absl hashing. + // Needed for absl hash function. template friend H AbslHashValue(H h, const TensorKey& k) { - uint8* d = (uint8*)(k.data()); + const uint8* d = static_cast(k.data()); size_t s = k.AllocatedBytes(); std::vector vec; for (int i=0; i < s; i++) { diff --git a/tensorflow/core/kernels/map_kernels.cc b/tensorflow/core/kernels/map_kernels.cc index 7d45d3942e1..45fa86c2bf6 100644 --- a/tensorflow/core/kernels/map_kernels.cc +++ b/tensorflow/core/kernels/map_kernels.cc @@ -37,4 +37,4 @@ REGISTER_KERNEL_BUILDER(Name("TensorMapErase").Device(DEVICE_CPU), REGISTER_KERNEL_BUILDER(Name("TensorMapReplace").Device(DEVICE_CPU), TensorMapReplace); -} \ No newline at end of file +} diff --git a/tensorflow/core/kernels/map_kernels.h b/tensorflow/core/kernels/map_kernels.h index 33282a75e0a..98ce1bfac1b 100644 --- a/tensorflow/core/kernels/map_kernels.h +++ b/tensorflow/core/kernels/map_kernels.h @@ -20,14 +20,13 @@ limitations under the License. #include "tensorflow/core/framework/variant_encode_decode.h" #include -using namespace std; namespace tensorflow { Status GetInputMap(OpKernelContext* c, int index, const TensorMap** map) { if (!TensorShapeUtils::IsScalar(c->input(index).shape())) { - return errors::InvalidArgument("Input map must be a scalar saw: ", + return errors::InvalidArgument("Input map must be a scalar. Saw: ", c->input(index).shape().DebugString()); } const TensorMap* m = c->input(index).scalar()().get(); @@ -41,6 +40,7 @@ Status GetInputMap(OpKernelContext* c, int index, const TensorMap** map) { } +//TODO(kattian): change into templated function Status ForwardInputOrCreateNewMap(OpKernelContext* c, int32 input_index, int32 output_index, const TensorMap& input_map, @@ -68,7 +68,7 @@ Status ForwardInputOrCreateNewMap(OpKernelContext* c, int32 input_index, } // If forwarding is not possible allocate a new output tensor and copy - // the `input_list` to it. + // the `input_map` to it. AllocatorAttributes attr; attr.set_on_host(true); TF_RETURN_IF_ERROR( @@ -183,6 +183,7 @@ class TensorMapErase : public OpKernel { DataType element_dtype_; }; + class TensorMapReplace : public OpKernel { public: explicit TensorMapReplace(OpKernelConstruction* c) : OpKernel(c) { diff --git a/tensorflow/core/kernels/tensor_map.cc b/tensorflow/core/kernels/tensor_map.cc index cfba3892650..bcb2abebe01 100644 --- a/tensorflow/core/kernels/tensor_map.cc +++ b/tensorflow/core/kernels/tensor_map.cc @@ -44,7 +44,6 @@ void TensorMap::Encode(VariantTensorData* data) const { // Metadata format: // core::PutVarint64(&metadata, static_cast(element_dtype)); - core::PutVarint64(&metadata, static_cast(max_num_elements)); TensorShapeProto element_shape_proto; element_shape.AsProto(&element_shape_proto); element_shape_proto.AppendToString(&metadata); @@ -56,17 +55,15 @@ static Status TensorMapDeviceCopy( const UnaryVariantOpRegistry::AsyncTensorDeviceCopyFn& copy) { to->element_shape = from.element_shape; to->element_dtype = from.element_dtype; - to->max_num_elements = from.max_num_elements; for (const std::pair& p : from.tensors()) { - to->tensors().emplace(p); //TODO: check valid dtype - //if (t.dtype() != DT_INVALID) { - //TF_RETURN_IF_ERROR(copy(p, &to->tensors().back())); - //} + if (p.first.dtype() != DT_INVALID && p.second.dtype() != DT_INVALID) { + to->tensors().emplace(p.first, p.second); + } } return Status::OK(); } -#define REGISTER_LIST_COPY(DIRECTION) \ +#define REGISTER_LIST_COPY(DIRECTION) \ INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION(TensorMap, DIRECTION, \ TensorMapDeviceCopy) @@ -89,19 +86,16 @@ bool TensorMap::Decode(const VariantTensorData& data) { while (tensors_it != data.tensors().end()) { - // should assert that tensors_it + 1 is also not the end if (std::next(tensors_it) == data.tensors().end()) { return false; } - TensorKey k = TensorKey(*tensors_it); // copy inefficient? - tensors().emplace(k,*++tensors_it); - tensors_it++; + tensors().emplace(tensors_it[0], tensors_it[1]); + tensors_it += 2; } core::GetVarint64(&iter, &scratch); element_dtype = static_cast(scratch); core::GetVarint64(&iter, &scratch); - max_num_elements = static_cast(scratch); TensorShapeProto element_shape_proto; element_shape_proto.ParseFromString(string(iter.data(), iter.size())); element_shape = PartialTensorShape(element_shape_proto); diff --git a/tensorflow/core/kernels/tensor_map.h b/tensorflow/core/kernels/tensor_map.h index 633c7db8668..f0fe4ae5f57 100644 --- a/tensorflow/core/kernels/tensor_map.h +++ b/tensorflow/core/kernels/tensor_map.h @@ -38,11 +38,11 @@ namespace tensorflow { // // Do not create a true copy of the underlying container - but instead increment // a reference count. Modifying b.tensors() modifies a.tensors(). In this way, -// TensorList should be considered similar to the tf::Tensor object. +// TensorMap should be considered similar to the tf::Tensor object. // // In order to get a copy of the underlying map, use the Copy method: // -// TensorList b = a.Copy(); +// TensorMap b = a.Copy(); // b.tensors().insert(k, v); // This does not modify a.tensors(). // // Note that this is not a deep copy: the memory locations of the underlying @@ -50,8 +50,8 @@ namespace tensorflow { // in the original. To truly perform a deep copy, Device and Type-specific // code needs to be applied to the underlying tensors as usual. // -// The most important implication of RefCounted TLs is that OpKernels -// wishing to reuse TensorList inputs as outputs via context->forward_input() +// The most important implication of RefCounted TensorMaps is that OpKernels +// wishing to reuse TensorMap inputs as outputs via context->forward_input() // need to perform an additional check on the refcount of the TensorList, // to ensure aliasing can be performed safely. For example: // @@ -72,7 +72,6 @@ class TensorMap { TensorMap(const TensorMap& other) : element_shape(other.element_shape), element_dtype(other.element_dtype), - max_num_elements(other.max_num_elements), tensors_(other.tensors_) { tensors_->Ref(); } @@ -80,7 +79,6 @@ class TensorMap { TensorMap(TensorMap&& rhs) : element_shape(std::move(rhs.element_shape)), element_dtype(rhs.element_dtype), - max_num_elements(rhs.max_num_elements), tensors_(rhs.tensors_) { rhs.tensors_ = nullptr; } @@ -89,7 +87,6 @@ class TensorMap { if (this == &rhs) return *this; element_shape = rhs.element_shape; element_dtype = rhs.element_dtype; - max_num_elements = rhs.max_num_elements; tensors_->Unref(); tensors_ = rhs.tensors_; tensors_->Ref(); @@ -100,7 +97,6 @@ class TensorMap { if (this == &rhs) return *this; element_shape = rhs.element_shape; element_dtype = rhs.element_dtype; - max_num_elements = rhs.max_num_elements; std::swap(tensors_, rhs.tensors_); return *this; } @@ -120,10 +116,6 @@ class TensorMap { DataType element_dtype; - // The maximum allowed size of `tensors`. Defaults to -1 meaning that the size - // of `tensors` is unbounded. - int max_num_elements = -1; - // Access to the underlying tensor container. absl::flat_hash_map& tensors() { return tensors_->values_; } const absl::flat_hash_map& tensors() const { return tensors_->values_; } @@ -137,7 +129,6 @@ class TensorMap { TensorMap out; out.element_shape = element_shape; out.element_dtype = element_dtype; - out.max_num_elements = max_num_elements; // This performs a copy of the absl::hashmap. out.tensors_->values_ = tensors_->values_; return out; diff --git a/tensorflow/core/kernels/tensor_map_test.cc b/tensorflow/core/kernels/tensor_map_test.cc index 294aa07c963..1ee175be34d 100644 --- a/tensorflow/core/kernels/tensor_map_test.cc +++ b/tensorflow/core/kernels/tensor_map_test.cc @@ -35,21 +35,21 @@ TEST(TensorMapTest, Empty) { TEST(TensorKeyTest, Equal) { TensorKey k1 = Tensor(15); TensorKey k2 = Tensor(15); - EXPECT_EQ(k1,k2); + EXPECT_EQ(k1, k2); TensorKey k3 = Tensor(15); TensorKey k4 = Tensor(37); - EXPECT_NE(k3,k4); + EXPECT_NE(k3, k4); } TEST(TensorMapTest, Insert) { - EXPECT_EQ(1,1); + EXPECT_EQ(1, 1); TensorMap tm; TensorKey k = Tensor(11); Tensor v = Tensor(22); - tm.insert(k,v); + tm.insert(k, v); absl::flat_hash_map am; - am.try_emplace(k,v); + am.try_emplace(k, v); absl::flat_hash_map::iterator map_it = tm.tensors().begin(); EXPECT_EQ(map_it->first, k); @@ -62,7 +62,7 @@ TEST(TensorMapTest, Lookup) { TensorMap tm; TensorKey k = Tensor(11); Tensor v = Tensor(22); - tm.insert(k,v); + tm.insert(k, v); absl::flat_hash_map::iterator map_it = tm.find(k); Tensor f = map_it->second; @@ -74,7 +74,7 @@ TEST(TensorMapTest, Erase) { TensorMap tm; TensorKey k = Tensor(11); Tensor v = Tensor(22); - tm.insert(k,v); + tm.insert(k, v); tm.erase(k); EXPECT_EQ(tm.find(k), tm.tensors().end()); } @@ -84,8 +84,8 @@ TEST(TensorMapTest, SameKeyInsert) { TensorKey k = Tensor(11); Tensor v1 = Tensor(22); Tensor v2 = Tensor(23); - bool b1 = tm.insert(k,v1); - bool b2 = tm.insert(k,v2); + bool b1 = tm.insert(k, v1); + bool b2 = tm.insert(k, v2); EXPECT_EQ(b1, true); EXPECT_EQ(b2, false); absl::flat_hash_map::iterator map_it = tm.find(k); @@ -109,7 +109,7 @@ TEST(TensorMapTest, Copy) { TensorMap tm; TensorKey k = Tensor(11); Tensor v = Tensor(22); - tm.insert(k,v); + tm.insert(k, v); TensorMap tmc = tm.Copy(); EXPECT_EQ(tm.dtype(), tmc.dtype()); EXPECT_EQ(tm.size(), tmc.size()); @@ -123,7 +123,7 @@ TEST(TensorMapTest, EncodeDecode) { TensorMap tm; TensorKey k = Tensor(11); Tensor v = Tensor(22); - tm.insert(k,v); + tm.insert(k, v); VariantTensorData data; tm.Encode(&data); TensorMap tmc; diff --git a/tensorflow/core/ops/map_ops.cc b/tensorflow/core/ops/map_ops.cc index 445180c34ef..e95dd2486be 100644 --- a/tensorflow/core/ops/map_ops.cc +++ b/tensorflow/core/ops/map_ops.cc @@ -20,12 +20,7 @@ limitations under the License. namespace tensorflow { namespace { -bool IsValidTensorMapHandleData( - const std::vector* handle_data) { - std::cout << "is valid tensor map handle data " << handle_data->size() << std::endl; - return handle_data != nullptr && handle_data->size() == 1; -} - +//TODO(kttian): Support non-scalar values REGISTER_OP("EmptyTensorMap") .Output("handle: variant") .SetShapeFn([](shape_inference::InferenceContext* c) { diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index d8c8e3dc2a8..55a8feeb053 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -147,9 +147,6 @@ cuda_py_test( size = "small", srcs = ["map_ops_test.py"], grpc_enabled = True, - tags = [ - "noasan", # TODO(b/155406705): flaky - ], deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index 46f8f21d104..e95a1ab9bec 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -12,12 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ -"""Tests for zero_out ops.""" +"""Tests for TensorMap ops.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -#import numpy as np from tensorflow.python.platform import test from absl.testing import parameterized from tensorflow.python.framework import test_util @@ -40,7 +39,7 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testTensorMapSize(self): m = map_ops.empty_tensor_map() s = map_ops.tensor_map_size(m) - self.assertAllClose(s, 0) + self.assertAllEqual(s, 0) def testTensorMapInsert(self): m = map_ops.empty_tensor_map() @@ -48,7 +47,7 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): v = constant_op.constant(2.0) m = map_ops.tensor_map_insert(m, k, v) s = map_ops.tensor_map_size(m) - self.assertAllClose(s, 1) + self.assertAllEqual(s, 1) def testTensorMapLookup(self): m = map_ops.empty_tensor_map() @@ -98,4 +97,4 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): if __name__ == '__main__': - test.main() \ No newline at end of file + test.main() diff --git a/tensorflow/python/ops/map_ops.py b/tensorflow/python/ops/map_ops.py index 5d8d2b88f2f..4ea50e114ac 100644 --- a/tensorflow/python/ops/map_ops.py +++ b/tensorflow/python/ops/map_ops.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ -"""Use zero_out ops in python.""" +"""Ops to manipulate hashmap of tensors.""" from __future__ import absolute_import from __future__ import division @@ -27,11 +27,6 @@ from tensorflow.python.ops import gen_map_ops from tensorflow.python.ops.gen_map_ops import * from tensorflow.python.framework import constant_op - -#zero_out_ops = load_library.load_op_library( -# resource_loader.get_path_to_datafile('_zero_out_ops.so')) -#zero_out = zero_out_ops.zero_out - ops.NotDifferentiable("EmptyTensorMap") def empty_tensor_map(): From 865320a9a613d20190dc37ee205c0d2e5453e5fc Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 13 Jul 2020 21:13:24 -0700 Subject: [PATCH 0351/2522] Fix TF kokoro build: LLVM file introducing a dependency on TF itself needs to be excluded to avoid cyclic dependency PiperOrigin-RevId: 321091092 Change-Id: I075b1d4935b8636f4556583c9bea95564524e73e --- third_party/llvm/llvm.autogenerated.BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/llvm/llvm.autogenerated.BUILD b/third_party/llvm/llvm.autogenerated.BUILD index 6e53745166d..bade7ab37ff 100644 --- a/third_party/llvm/llvm.autogenerated.BUILD +++ b/third_party/llvm/llvm.autogenerated.BUILD @@ -723,6 +723,7 @@ cc_library( exclude = [ "lib/Analysis/MLInlineAdvisor.cpp", "lib/Analysis/ReleaseModeModelRunner.cpp", + "lib/Analysis/TFUtils.cpp", ], ), hdrs = glob([ From 052d45f39a62f4684801ca29ad6b6a593ce4a8fa Mon Sep 17 00:00:00 2001 From: Terry Heo Date: Mon, 13 Jul 2020 21:16:25 -0700 Subject: [PATCH 0352/2522] Update build commands of Android GPU delegate This PR fixes #41270. PiperOrigin-RevId: 321091336 Change-Id: Ib3f2697329a0b7fde8e0b092a311ba8942a19a4c --- tensorflow/lite/g3doc/performance/gpu_advanced.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/g3doc/performance/gpu_advanced.md b/tensorflow/lite/g3doc/performance/gpu_advanced.md index 8d498cd404b..eb6d6319db1 100644 --- a/tensorflow/lite/g3doc/performance/gpu_advanced.md +++ b/tensorflow/lite/g3doc/performance/gpu_advanced.md @@ -122,8 +122,8 @@ TFLite GPU for Android C/C++ uses the [Bazel](https://bazel.io) build system. The delegate can be built, for example, using the following command: ```sh -bazel build -c opt --config android_arm64 tensorflow/lite/delegates/gpu:gl_delegate # for static library -bazel build -c opt --config android_arm64 tensorflow/lite/delegates/gpu:libtensorflowlite_gpu_gl.so # for dynamic library +bazel build -c opt --config android_arm64 tensorflow/lite/delegates/gpu:delegate # for static library +bazel build -c opt --config android_arm64 tensorflow/lite/delegates/gpu:libtensorflowlite_gpu_delegate.so # for dynamic library ``` ### iOS (Swift) From 4d582a660b4e84fb283eba598127ae40fdd8d1ed Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Mon, 13 Jul 2020 21:17:33 -0700 Subject: [PATCH 0353/2522] Walk around the issue for fully masked input in cudnn kernel with LSTM/GRU. See https://github.com/tensorflow/tensorflow/issues/33148 for more details. This issue has been there for quite some time. We probably should walk around the issue for the moment until Nvidia fix it. For any fully masked inputs in a batch, it will fallback to generic kernel, which will have a performance dip, but better than error out to user. PiperOrigin-RevId: 321091439 Change-Id: Id26edd8c88fb7ea0f531c98d2eba8cca9f49473c --- tensorflow/python/keras/layers/gru_v2_test.py | 28 ++++++++++ .../python/keras/layers/lstm_v2_test.py | 28 ++++++++++ .../python/keras/layers/recurrent_v2.py | 54 ++++++++++++------- 3 files changed, 92 insertions(+), 18 deletions(-) diff --git a/tensorflow/python/keras/layers/gru_v2_test.py b/tensorflow/python/keras/layers/gru_v2_test.py index be71a4100bd..cc10f6fb4df 100644 --- a/tensorflow/python/keras/layers/gru_v2_test.py +++ b/tensorflow/python/keras/layers/gru_v2_test.py @@ -612,6 +612,34 @@ class GRUV2Test(keras_parameterized.TestCase): model.compile(loss='mse', optimizer='sgd') model.fit(dataset) + def test_with_fully_masked_inputs(self): + num_samples = 8 + timestep = 5 + embedding_dim = 4 + vocab_size = 20 + units = 2 + + inputs = np.random.randint(0, vocab_size, size=(num_samples, timestep)) + # Set the first inputs to be fully zero. + inputs[0, :] = 0.0 + + model = keras.models.Sequential() + model.add( + keras.layers.Embedding( + vocab_size, + embedding_dim, + mask_zero=True, + input_length=timestep, + batch_input_shape=(num_samples, timestep))) + layer = rnn.GRU(units) + model.add(layer) + model.compile( + optimizer=gradient_descent.GradientDescentOptimizer(0.01), + loss='mse', + run_eagerly=testing_utils.should_run_eagerly()) + # Make sure it doesn't crash with cudnn kernel. + model.predict(inputs) + class GRULayerGradientTapeTest(keras_parameterized.TestCase): diff --git a/tensorflow/python/keras/layers/lstm_v2_test.py b/tensorflow/python/keras/layers/lstm_v2_test.py index b60d8acb5f2..ca1b25465f3 100644 --- a/tensorflow/python/keras/layers/lstm_v2_test.py +++ b/tensorflow/python/keras/layers/lstm_v2_test.py @@ -813,6 +813,34 @@ class LSTMV2Test(keras_parameterized.TestCase): model.compile(loss='mse', optimizer='sgd') model.fit(dataset) + def test_with_fully_masked_inputs(self): + num_samples = 8 + timestep = 5 + embedding_dim = 4 + vocab_size = 20 + units = 2 + + inputs = np.random.randint(0, vocab_size, size=(num_samples, timestep)) + # Set the first inputs to be fully zero. + inputs[0, :] = 0.0 + + model = keras.models.Sequential() + model.add( + keras.layers.Embedding( + vocab_size, + embedding_dim, + mask_zero=True, + input_length=timestep, + batch_input_shape=(num_samples, timestep))) + layer = rnn.LSTM(units) + model.add(layer) + model.compile( + optimizer=gradient_descent.GradientDescentOptimizer(0.01), + loss='mse', + run_eagerly=testing_utils.should_run_eagerly()) + # Make sure it doesn't crash with cudnn kernel. + model.predict(inputs) + @keras_parameterized.run_all_keras_modes(config=_config) class LSTMGraphRewriteTest(keras_parameterized.TestCase): diff --git a/tensorflow/python/keras/layers/recurrent_v2.py b/tensorflow/python/keras/layers/recurrent_v2.py index 33babb54357..bad9ecee7d6 100644 --- a/tensorflow/python/keras/layers/recurrent_v2.py +++ b/tensorflow/python/keras/layers/recurrent_v2.py @@ -490,7 +490,7 @@ class GRU(recurrent.DropoutRNNCellMixin, recurrent.GRU): (device_type == _GPU_DEVICE_NAME or (device_type is None and context.num_gpus() > 0)) and - (mask is None or is_sequence_right_padded(mask, self.time_major))) + (mask is None or is_cudnn_supported_inputs(mask, self.time_major))) # Under eager context, check the device placement and prefer the if can_use_gpu: last_output, outputs, new_h, runtime = gpu_gru(**gpu_gru_kwargs) @@ -735,7 +735,7 @@ def gru_with_backend_selection(inputs, init_h, kernel, recurrent_kernel, bias, go_backwards=go_backwards, sequence_lengths=sequence_lengths) - def input_right_padded(): + def cudnn_gru_fn(): return gpu_gru( inputs=inputs, init_h=init_h, @@ -747,7 +747,7 @@ def gru_with_backend_selection(inputs, init_h, kernel, recurrent_kernel, bias, go_backwards=go_backwards, sequence_lengths=sequence_lengths) - def input_not_right_padded(): + def standard_gru_fn(): return standard_gru( inputs=inputs, init_h=init_h, @@ -761,9 +761,9 @@ def gru_with_backend_selection(inputs, init_h, kernel, recurrent_kernel, bias, zero_output_for_mask=zero_output_for_mask) return control_flow_ops.cond( - is_sequence_right_padded(mask, time_major), - true_fn=input_right_padded, - false_fn=input_not_right_padded) + is_cudnn_supported_inputs(mask, time_major), + true_fn=cudnn_gru_fn, + false_fn=standard_gru_fn) # Each time a `tf.function` is called, we will give it a unique # identifiable API name, so that Grappler won't get confused when it @@ -1169,7 +1169,7 @@ class LSTM(recurrent.DropoutRNNCellMixin, recurrent.LSTM): (device_type == _GPU_DEVICE_NAME or (device_type is None and context.num_gpus() > 0)) and - (mask is None or is_sequence_right_padded(mask, self.time_major))) + (mask is None or is_cudnn_supported_inputs(mask, self.time_major))) # Under eager context, check the device placement and prefer the # GPU implementation when GPU is available. if can_use_gpu: @@ -1506,7 +1506,7 @@ def lstm_with_backend_selection(inputs, init_h, init_c, kernel, go_backwards=go_backwards, sequence_lengths=sequence_lengths) - def input_right_padded(): + def cudnn_lstm_fn(): return gpu_lstm( inputs=inputs, init_h=init_h, @@ -1519,7 +1519,7 @@ def lstm_with_backend_selection(inputs, init_h, init_c, kernel, go_backwards=go_backwards, sequence_lengths=sequence_lengths) - def input_not_right_padded(): + def stardard_lstm_fn(): return standard_lstm( inputs=inputs, init_h=init_h, @@ -1534,9 +1534,9 @@ def lstm_with_backend_selection(inputs, init_h, init_c, kernel, zero_output_for_mask=zero_output_for_mask) return control_flow_ops.cond( - is_sequence_right_padded(mask, time_major), - true_fn=input_right_padded, - false_fn=input_not_right_padded) + is_cudnn_supported_inputs(mask, time_major), + true_fn=cudnn_lstm_fn, + false_fn=stardard_lstm_fn) # Each time a `tf.function` is called, we will give it a unique # identifiable API name, so that Grappler won't get confused when it @@ -1561,7 +1561,7 @@ def lstm_with_backend_selection(inputs, init_h, init_c, kernel, return last_output, outputs, new_h, new_c, runtime -def is_sequence_right_padded(mask, time_major): +def is_sequence_right_padded(mask): """Check the mask tensor and see if it right padded. For CuDNN kernel, it uses the sequence length param to skip the tailing @@ -1578,15 +1578,11 @@ def is_sequence_right_padded(mask, time_major): pollute the internal states. Args: - mask: the Boolean tensor with shape [batch, timestep] or [timestep, batch] - when time_major is True. - time_major: Boolean, whether the input mask is time major or batch major. + mask: the Boolean tensor with shape [batch, timestep] Returns: boolean scalar tensor, whether the mask is strictly right padded. """ - if time_major: - mask = array_ops.transpose(mask) max_seq_length = array_ops.shape(mask)[1] count_of_true = math_ops.reduce_sum(math_ops.cast(mask, dtypes.int32), axis=1) right_padded_mask = array_ops.sequence_mask( @@ -1594,6 +1590,28 @@ def is_sequence_right_padded(mask, time_major): return math_ops.reduce_all(math_ops.equal(mask, right_padded_mask)) +def has_fully_masked_sequence(mask): + # See https://github.com/tensorflow/tensorflow/issues/33148 for more details. + # Cudnn kernel will error out if the input sequence contains any fully masked + # data. We walk around this issue by rerouting the computation to standard + # kernel, until the issue on cudnn side has been fixed. + # For a fully masked sequence, it will contain all Falses. To make it easy to + # check, we inverse the boolean, check if any of the seqence has all True. + return math_ops.reduce_any( + math_ops.reduce_all( + math_ops.logical_not(mask), + axis=1)) + + +def is_cudnn_supported_inputs(mask, time_major): + if time_major: + mask = array_ops.transpose(mask) + + return math_ops.logical_and( + is_sequence_right_padded(mask), + math_ops.logical_not(has_fully_masked_sequence(mask))) + + def calculate_sequence_by_mask(mask, time_major): """Calculate the sequence length tensor (1-D) based on the masking tensor. From ac3456f3ad9ab8af38d933f823aa665358f7c4fe Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Mon, 13 Jul 2020 22:01:21 -0700 Subject: [PATCH 0354/2522] Updating std::variant with absl::variant. PiperOrigin-RevId: 321095741 Change-Id: Ib3be5422114c3e9b4581edcb1859bc5ed04bfa47 --- tensorflow/core/tpu/kernels/tpu_compile_op_impl.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_impl.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_impl.cc index 0d514997142..3e684f97a88 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_impl.cc +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_impl.cc @@ -24,7 +24,7 @@ limitations under the License. namespace tensorflow { namespace tpu { Status TpuCompileOpKernelImpl::Compile( - const std::variant& computation, + const absl::variant& computation, const XLA_TpuMeshState* mesh_state, const std::vector& arg_shapes, TpuProgramGroupInterface* tpu_program_group) { From cf5f8c76f54be33a60c3d3f8df289641a846d3fa Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Mon, 13 Jul 2020 22:10:37 -0700 Subject: [PATCH 0355/2522] Add tests to ensure flex ops have associated kernels This cl also add CombinedNonMaxSuppression to flex delegate PiperOrigin-RevId: 321096984 Change-Id: Ie1bc911faf2ba0fd7791b8620fb90e6f352bd0ec --- tensorflow/compiler/mlir/lite/BUILD | 2 +- tensorflow/lite/delegates/flex/BUILD | 26 +++++++- .../delegates/flex/allowlisted_flex_ops.cc | 15 +++-- .../flex/allowlisted_flex_ops_internal.h | 30 +++++++++ .../flex/allowlisted_flex_ops_test.cc | 61 +++++++++++++++++++ tensorflow/lite/toco/tflite/BUILD | 2 +- 6 files changed, 128 insertions(+), 8 deletions(-) create mode 100644 tensorflow/lite/delegates/flex/allowlisted_flex_ops_internal.h create mode 100644 tensorflow/lite/delegates/flex/allowlisted_flex_ops_test.cc diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD index 56dd0854ee8..ab523e9cb8d 100644 --- a/tensorflow/compiler/mlir/lite/BUILD +++ b/tensorflow/compiler/mlir/lite/BUILD @@ -622,7 +622,7 @@ cc_library( "//tensorflow/core/platform:status", "//tensorflow/lite:schema_fbs_version", "//tensorflow/lite:string_util", - "//tensorflow/lite/delegates/flex:whitelisted_flex_ops_lib", + "//tensorflow/lite/delegates/flex:allowlisted_flex_ops_lib", "//tensorflow/lite/kernels/internal:kernel_utils", "//tensorflow/lite/schema:schema_fbs", "//tensorflow/lite/tools/versioning", diff --git a/tensorflow/lite/delegates/flex/BUILD b/tensorflow/lite/delegates/flex/BUILD index ae7408f8e30..8320ecebf9a 100644 --- a/tensorflow/lite/delegates/flex/BUILD +++ b/tensorflow/lite/delegates/flex/BUILD @@ -230,11 +230,35 @@ tf_cc_test( ) cc_library( - name = "whitelisted_flex_ops_lib", + name = "allowlisted_flex_ops_lib", srcs = [ "allowlisted_flex_ops.cc", ], hdrs = [ "allowlisted_flex_ops.h", + "allowlisted_flex_ops_internal.h", ], ) + +tf_cc_test( + name = "allowlisted_flex_ops_test", + size = "small", + srcs = [ + "allowlisted_flex_ops_test.cc", + ], + deps = [ + ":delegate", + ":allowlisted_flex_ops_lib", + "@com_google_googletest//:gtest", + ] + select({ + "//tensorflow:android": [ + "//tensorflow/core:portable_tensorflow_lib_lite", + ], + "//tensorflow:ios": [ + "//tensorflow/core:portable_tensorflow_lib_lite", + ], + "//conditions:default": [ + "//tensorflow/core:framework", + ], + }), +) diff --git a/tensorflow/lite/delegates/flex/allowlisted_flex_ops.cc b/tensorflow/lite/delegates/flex/allowlisted_flex_ops.cc index fa29d81371d..b8cc4ca56fe 100644 --- a/tensorflow/lite/delegates/flex/allowlisted_flex_ops.cc +++ b/tensorflow/lite/delegates/flex/allowlisted_flex_ops.cc @@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/lite/delegates/flex/allowlisted_flex_ops.h" + #include -#include "tensorflow/lite/delegates/flex/allowlisted_flex_ops.h" +#include "tensorflow/lite/delegates/flex/allowlisted_flex_ops_internal.h" namespace tflite { namespace flex { -bool IsAllowlistedFlexOp(const std::string& tensorflow_op_name) { +const std::set& GetFlexAllowlist() { static const std::set* allowlisted_flex_ops = new std::set({ // go/keep-sorted start @@ -82,6 +84,7 @@ bool IsAllowlistedFlexOp(const std::string& tensorflow_op_name) { "Cast", "Ceil", "CheckNumerics", + "CombinedNonMaxSuppression", "Complex", "ComplexAbs", "Concat", @@ -108,7 +111,6 @@ bool IsAllowlistedFlexOp(const std::string& tensorflow_op_name) { "DebugGradientIdentity", "DebugGradientRefIdentity", "DecodeBase64", - "DecodeBmp", "DecodeWav", "DeepCopy", "DeleteSessionTensor", @@ -539,12 +541,15 @@ bool IsAllowlistedFlexOp(const std::string& tensorflow_op_name) { "_Send", // go/keep-sorted end }); - return allowlisted_flex_ops->find(tensorflow_op_name) != - allowlisted_flex_ops->end(); + return *allowlisted_flex_ops; // Prevent lint error about this function being too long. This function // is a set of ops, and making it shorter won't help readbility. // NOLINTNEXTLINE } +bool IsAllowlistedFlexOp(const std::string& tensorflow_op_name) { + return GetFlexAllowlist().count(tensorflow_op_name) != 0; +} + } // namespace flex } // namespace tflite diff --git a/tensorflow/lite/delegates/flex/allowlisted_flex_ops_internal.h b/tensorflow/lite/delegates/flex/allowlisted_flex_ops_internal.h new file mode 100644 index 00000000000..8ecb7e4dc99 --- /dev/null +++ b/tensorflow/lite/delegates/flex/allowlisted_flex_ops_internal.h @@ -0,0 +1,30 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_DELEGATES_FLEX_ALLOWLISTED_FLEX_OPS_INTERNAL_H_ +#define TENSORFLOW_LITE_DELEGATES_FLEX_ALLOWLISTED_FLEX_OPS_INTERNAL_H_ + +#include +#include + +namespace tflite { +namespace flex { + +// Return the list of allowlisted flex ops. +const std::set& GetFlexAllowlist(); + +} // namespace flex +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_FLEX_ALLOWLISTED_FLEX_OPS_INTERNAL_H_ diff --git a/tensorflow/lite/delegates/flex/allowlisted_flex_ops_test.cc b/tensorflow/lite/delegates/flex/allowlisted_flex_ops_test.cc new file mode 100644 index 00000000000..424013d33e0 --- /dev/null +++ b/tensorflow/lite/delegates/flex/allowlisted_flex_ops_test.cc @@ -0,0 +1,61 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/lite/delegates/flex/allowlisted_flex_ops.h" + +#include + +#include +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/lite/delegates/flex/allowlisted_flex_ops_internal.h" + +namespace tflite { +namespace flex { + +// Get all cpu kernels registered in Tensorflow. +std::set GetAllCpuKernels() { + auto is_cpu_kernel = [](const tensorflow::KernelDef& def) { + return (def.device_type() == "CPU" || def.device_type() == "DEFAULT"); + }; + + tensorflow::KernelList kernel_list = + tensorflow::GetFilteredRegisteredKernels(is_cpu_kernel); + std::set result; + + for (int i = 0; i < kernel_list.kernel_size(); ++i) { + tensorflow::KernelDef kernel_def = kernel_list.kernel(i); + result.insert(kernel_def.op()); + } + return result; +} + +// Test if every flex op has their kernel included in the flex delegate library. +// This test must be run on both Linux and Android. +TEST(AllowlistedFlexOpsTest, EveryOpHasKernel) { + const std::set& allowlist = GetFlexAllowlist(); + std::set all_kernels = GetAllCpuKernels(); + + for (const std::string& op_name : allowlist) { + EXPECT_EQ(all_kernels.count(op_name), 1) + << op_name << " op is added to flex allowlist " + << "but its kernel is not found."; + } +} +} // namespace flex +} // namespace tflite + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/lite/toco/tflite/BUILD b/tensorflow/lite/toco/tflite/BUILD index d34f38a6863..18b531fd5f1 100644 --- a/tensorflow/lite/toco/tflite/BUILD +++ b/tensorflow/lite/toco/tflite/BUILD @@ -28,7 +28,7 @@ cc_library( "//tensorflow/core:framework", "//tensorflow/core:protos_all_cc", "//tensorflow/core:ptr_util", - "//tensorflow/lite/delegates/flex:whitelisted_flex_ops_lib", + "//tensorflow/lite/delegates/flex:allowlisted_flex_ops_lib", "//tensorflow/lite/schema:schema_fbs", "//tensorflow/lite/toco:graph_transformations", "//tensorflow/lite/toco:model", From 6a22ad8f67f25a5a321ff6306ccbac4a3e33ee19 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Mon, 13 Jul 2020 22:25:21 -0700 Subject: [PATCH 0356/2522] Store OpData in the concatenation kernel. The reference kernel currently calculates quantized data every eval which is slow on device and does not match upcoming changes for RAM savings. PiperOrigin-RevId: 321098375 Change-Id: I6eb82e135b865634d3147e6ef56415f3bfc3b5e7 --- .../lite/micro/kernels/concatenation.cc | 184 +++++++++++------- .../lite/micro/kernels/concatenation_test.cc | 14 +- 2 files changed, 122 insertions(+), 76 deletions(-) diff --git a/tensorflow/lite/micro/kernels/concatenation.cc b/tensorflow/lite/micro/kernels/concatenation.cc index abfeb5eae83..9b5515a3e14 100644 --- a/tensorflow/lite/micro/kernels/concatenation.cc +++ b/tensorflow/lite/micro/kernels/concatenation.cc @@ -31,46 +31,9 @@ namespace concatenation { constexpr int kMaxInputNum = 10; // Maximum number of input tensors constexpr int kOutputTensor = 0; -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - // This function only checks the types. Additional shape validations are - // performed in the reference implementation called during Eval(). - const TfLiteConcatenationParams* params = - reinterpret_cast(node->builtin_data); - - TfLiteType input_type = GetInput(context, node, 0)->type; - TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type; - - // Check activation and input type - TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone); - TF_LITE_ENSURE(context, - input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || - input_type == kTfLiteInt8 || input_type == kTfLiteInt32 || - input_type == kTfLiteInt64); - - // Output type must match input type - TF_LITE_ENSURE_EQ(context, output_type, input_type); - - // This implementation does not support large number of input tensors - const int num_inputs = NumInputs(node); - TF_LITE_ENSURE(context, num_inputs <= kMaxInputNum); - - // Shapes with dimensions >4 are not yet supported with static allocation. - for (int i = 0; i < num_inputs; ++i) { - const TfLiteTensor* input = GetInput(context, node, i); - int num_dimensions = NumDimensions(input); - - if (num_dimensions > 4) { - TF_LITE_KERNEL_LOG( - context, - "Op Concatenation does not currently support num dimensions >4 " - "Tensor has %d dimensions.", - num_dimensions); - return kTfLiteError; - } - } - - return kTfLiteOk; -} +struct OpData { + ConcatenationParams params; +}; // Handles negative axis index, coerces to positive index value. inline int CalculatePositiveAxis(int axis, const TfLiteTensor* output_tensor) { @@ -115,18 +78,6 @@ inline void GetAllTensorData(const TfLiteContext& context, } } -// Gets scale and zero point from a list of tensors -inline void GetAllQuantizationParam(const TfLiteContext& context, - const TfLiteIntArray& tensor_list, - float scales[kMaxInputNum], - int32 zero_points[kMaxInputNum]) { - for (int i = 0; i < tensor_list.size; ++i) { - const TfLiteTensor* t = &context.tensors[tensor_list.data[i]]; - scales[i] = t->params.scale; - zero_points[i] = t->params.zero_point; - } -} - template void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) { // Collect the shapes and data pointer of input tensors @@ -139,14 +90,10 @@ void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - const TfLiteConcatenationParams* params = - reinterpret_cast(node->builtin_data); + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); - ConcatenationParams op_params; - op_params.axis = CalculatePositiveAxis(params->axis, output); - op_params.inputs_count = NumInputs(node); - - reference_ops::Concatenation(op_params, inputs_shape_ptr, inputs_data, + reference_ops::Concatenation(data->params, inputs_shape_ptr, inputs_data, GetTensorShape(output), GetTensorData(output)); } @@ -156,30 +103,119 @@ void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) { RuntimeShape inputs_shape[kMaxInputNum]; const RuntimeShape* inputs_shape_ptr[kMaxInputNum]; const uint8_t* inputs_data[kMaxInputNum]; - float inputs_scale[kMaxInputNum]; - int32 inputs_zero_point[kMaxInputNum]; GetAllTensorShapes(*context, *node->inputs, inputs_shape); GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr); GetAllTensorData(*context, *node->inputs, inputs_data); - GetAllQuantizationParam(*context, *node->inputs, inputs_scale, - inputs_zero_point); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); + + reference_ops::ConcatenationWithScaling(data->params, inputs_shape_ptr, + inputs_data, GetTensorShape(output), + GetTensorData(output)); +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + void* data = nullptr; + if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == + kTfLiteError) { + return nullptr; + } + return data; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + // This function only checks the types. Additional shape validations are + // performed in the reference implementation called during Eval(). const TfLiteConcatenationParams* params = reinterpret_cast(node->builtin_data); - ConcatenationParams op_params; - op_params.axis = CalculatePositiveAxis(params->axis, output); - op_params.inputs_count = NumInputs(node); - op_params.input_zeropoint = inputs_zero_point; - op_params.input_scale = inputs_scale; - op_params.output_zeropoint = output->params.zero_point; - op_params.output_scale = output->params.scale; + TfLiteType input_type = GetInput(context, node, 0)->type; + TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type; - reference_ops::ConcatenationWithScaling(op_params, inputs_shape_ptr, - inputs_data, GetTensorShape(output), - GetTensorData(output)); + // Check activation and input type + TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone); + TF_LITE_ENSURE(context, + input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8 || + input_type == kTfLiteInt8 || input_type == kTfLiteInt32 || + input_type == kTfLiteInt64); + + // Output type must match input type + TF_LITE_ENSURE_EQ(context, output_type, input_type); + + // This implementation does not support large number of input tensors + const int num_inputs = NumInputs(node); + TF_LITE_ENSURE(context, num_inputs <= kMaxInputNum); + + // Shapes with dimensions >4 are not yet supported with static allocation. + for (int i = 0; i < num_inputs; ++i) { + const TfLiteTensor* input = GetInput(context, node, i); + int num_dimensions = NumDimensions(input); + + if (num_dimensions > 4) { + TF_LITE_KERNEL_LOG( + context, + "Op Concatenation does not currently support num dimensions >4 " + "Tensor has %d dimensions.", + num_dimensions); + return kTfLiteError; + } + } + + // Calculate OpData. + TFLITE_DCHECK(node->user_data != nullptr); + OpData* data = static_cast(node->user_data); + + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + switch (output_type) { // Already know in/outtypes are same. + case kTfLiteFloat32: + case kTfLiteInt32: + case kTfLiteInt64: { + data->params.axis = CalculatePositiveAxis(params->axis, output); + data->params.inputs_count = node->inputs->size; + break; + } + case kTfLiteUInt8: + case kTfLiteInt8: { + data->params.axis = CalculatePositiveAxis(params->axis, output); + data->params.inputs_count = node->inputs->size; + + float* input_scales = nullptr; + TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( + context, node->inputs->size * sizeof(float), + reinterpret_cast(&input_scales))); + + int32_t* input_zero_points = nullptr; + TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( + context, node->inputs->size * sizeof(int32_t), + reinterpret_cast(&input_zero_points))); + + // Allocate persistent scale and zeropoint buffers. + // Store input scale and zero point values in OpParams: + for (int i = 0; i < node->inputs->size; ++i) { + const TfLiteTensor* t = GetInput(context, node, i); + input_scales[i] = t->params.scale; + input_zero_points[i] = t->params.zero_point; + } + + data->params.input_scale = input_scales; + data->params.input_zeropoint = input_zero_points; + data->params.output_zeropoint = output->params.zero_point; + data->params.output_scale = output->params.scale; + break; + } + default: + TF_LITE_KERNEL_LOG( + context, "Op Concatenation does not currently support Type '%s'.", + TfLiteTypeGetName(output_type)); + return kTfLiteError; + } + + return kTfLiteOk; } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { @@ -215,7 +251,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace concatenation TfLiteRegistration Register_CONCATENATION() { - return {/*init=*/nullptr, + return {/*init=*/concatenation::Init, /*free=*/nullptr, /*prepare=*/concatenation::Prepare, /*invoke=*/concatenation::Eval, diff --git a/tensorflow/lite/micro/kernels/concatenation_test.cc b/tensorflow/lite/micro/kernels/concatenation_test.cc index 8ac9e2ee2c8..e9ca6c93e81 100644 --- a/tensorflow/lite/micro/kernels/concatenation_test.cc +++ b/tensorflow/lite/micro/kernels/concatenation_test.cc @@ -62,10 +62,15 @@ void TestConcatenateTwoInputs(std::initializer_list input1_dims_data, int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + void* user_data = nullptr; + if (registration->init) { + user_data = registration->init(&context, /*buffer=*/nullptr, /*length=*/0); + } + TfLiteNode node; node.inputs = inputs_array; node.outputs = outputs_array; - node.user_data = nullptr; + node.user_data = user_data; node.builtin_data = reinterpret_cast(&builtin_data); node.custom_initial_data = nullptr; node.custom_initial_data_size = 0; @@ -120,10 +125,15 @@ void TestConcatenateQuantizedTwoInputs( int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + void* user_data = nullptr; + if (registration->init) { + user_data = registration->init(&context, /*buffer=*/nullptr, /*length=*/0); + } + TfLiteNode node; node.inputs = inputs_array; node.outputs = outputs_array; - node.user_data = nullptr; + node.user_data = user_data; node.builtin_data = reinterpret_cast(&builtin_data); node.custom_initial_data = nullptr; node.custom_initial_data_size = 0; From fa10b3fafcf3831004d67d5b9dbdc8090243e9b6 Mon Sep 17 00:00:00 2001 From: Meghna Natraj Date: Mon, 13 Jul 2020 22:52:25 -0700 Subject: [PATCH 0357/2522] Refactor flatbuffer_utils functions that parse model files. PiperOrigin-RevId: 321101293 Change-Id: I51430786d0403a9a6735b1c8e4cee3f9596d4f00 --- tensorflow/lite/tools/flatbuffer_utils.py | 65 +++++++++++++++++------ 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/tensorflow/lite/tools/flatbuffer_utils.py b/tensorflow/lite/tools/flatbuffer_utils.py index f80daad2519..ce29a8e0e89 100644 --- a/tensorflow/lite/tools/flatbuffer_utils.py +++ b/tensorflow/lite/tools/flatbuffer_utils.py @@ -25,53 +25,84 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import copy import os import random from flatbuffers.python import flatbuffers from tensorflow.lite.python import schema_py_generated as schema_fb -TFLITE_FILE_IDENTIFIER = b'TFL3' +_TFLITE_FILE_IDENTIFIER = b'TFL3' + + +def convert_bytearray_to_object(model_bytearray): + """Converts a tflite model from a bytearray to an object for parsing.""" + model_object = schema_fb.Model.GetRootAsModel(model_bytearray, 0) + return schema_fb.ModelT.InitFromObj(model_object) def read_model(input_tflite_file): - """Reads and parses a tflite model. + """Reads a tflite model as a python object. Args: input_tflite_file: Full path name to the input tflite file Raises: - RuntimeError: If input_tflite_file is not found. + RuntimeError: If input_tflite_file path is invalid. IOError: If input_tflite_file cannot be opened. Returns: - A python flatbuffer object corresponding to the input tflite file. + A python object corresponding to the input tflite file. """ if not os.path.exists(input_tflite_file): raise RuntimeError('Input file not found at %r\n' % input_tflite_file) with open(input_tflite_file, 'rb') as file_handle: - file_data = bytearray(file_handle.read()) - model_obj = schema_fb.Model.GetRootAsModel(file_data, 0) - return schema_fb.ModelT.InitFromObj(model_obj) + model_bytearray = bytearray(file_handle.read()) + return convert_bytearray_to_object(model_bytearray) -def write_model(model, output_tflite_file): - """Writes the model, a python flatbuffer object, into the output tflite file. +def read_model_with_mutable_tensors(input_tflite_file): + """Reads a tflite model as a python object with mutable tensors. + + Similar to read_model() with the addition that the returned object has + mutable tensors (read_model() returns an object with immutable tensors). Args: - model: tflite model + input_tflite_file: Full path name to the input tflite file + + Raises: + RuntimeError: If input_tflite_file path is invalid. + IOError: If input_tflite_file cannot be opened. + + Returns: + A mutable python object corresponding to the input tflite file. + """ + return copy.deepcopy(read_model(input_tflite_file)) + + +def convert_object_to_bytearray(model_object): + """Converts a tflite model from an object to a bytearray.""" + # Initial size of the buffer, which will grow automatically if needed + builder = flatbuffers.Builder(1024) + model_offset = model_object.Pack(builder) + builder.Finish(model_offset, file_identifier=_TFLITE_FILE_IDENTIFIER) + model_bytearray = bytes(builder.Output()) + return model_bytearray + + +def write_model(model_object, output_tflite_file): + """Writes the tflite model, a python object, into the output file. + + Args: + model_object: A tflite model as a python object output_tflite_file: Full path name to the output tflite file. Raises: - IOError: If output_tflite_file cannot be opened. + IOError: If output_tflite_file path is invalid or cannot be opened. """ - # Initial size of the buffer, which will grow automatically if needed - builder = flatbuffers.Builder(1024) - model_offset = model.Pack(builder) - builder.Finish(model_offset, file_identifier=TFLITE_FILE_IDENTIFIER) - model_data = builder.Output() + model_bytearray = convert_object_to_bytearray(model_object) with open(output_tflite_file, 'wb') as out_file: - out_file.write(model_data) + out_file.write(model_bytearray) def strip_strings(model): From 1325bdd327cefadf0337eaeff8a0e177e1610e95 Mon Sep 17 00:00:00 2001 From: ShengYang1 Date: Tue, 14 Jul 2020 14:02:39 +0800 Subject: [PATCH 0358/2522] fix --- tensorflow/core/kernels/mkl_fused_ops_test.cc | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/tensorflow/core/kernels/mkl_fused_ops_test.cc b/tensorflow/core/kernels/mkl_fused_ops_test.cc index b1751ed07b6..1f1bbd158f9 100644 --- a/tensorflow/core/kernels/mkl_fused_ops_test.cc +++ b/tensorflow/core/kernels/mkl_fused_ops_test.cc @@ -331,7 +331,7 @@ class MklFusedConv2DOpTest : public OpsTestBase { template class MklFusedConv2DWithBiasOpTest : public MklFusedConv2DOpTest {}; -TYPED_TEST_CASE_P(MklFusedConv2DWithBiasOpTest); +TYPED_TEST_SUITE_P(MklFusedConv2DWithBiasOpTest); // -------------------------------------------------------------------------- // // Conv2D + BiasAdd + {Activation} // @@ -437,7 +437,7 @@ TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, SpatialConvolutionAndAddElu) { this->VerifyFusedConv2D(kFilterSize, kFilterCount, {"BiasAdd", "Add", "Elu"}); } -REGISTER_TYPED_TEST_CASE_P( +REGISTER_TYPED_TEST_SUITE_P( MklFusedConv2DWithBiasOpTest, OneByOneConvolution, SpatialConvolution, OneByOneConvolutionAndRelu, SpatialConvolutionAndRelu, OneByOneConvolutionAndRelu6, SpatialConvolutionAndRelu6, @@ -448,8 +448,8 @@ REGISTER_TYPED_TEST_CASE_P( OneByOneConvolutionAndAddElu, SpatialConvolutionAndAddElu); using MklFusedBiasAddDataTypes = ::testing::Types; -INSTANTIATE_TYPED_TEST_CASE_P(Test, MklFusedConv2DWithBiasOpTest, - MklFusedBiasAddDataTypes); +INSTANTIATE_TYPED_TEST_SUITE_P(Test, MklFusedConv2DWithBiasOpTest, + MklFusedBiasAddDataTypes); // Testing MKL's fused depthwise convolution ops template @@ -747,21 +747,21 @@ class FusedPadConvOpTest : public OpsTestBase { } }; -TYPED_TEST_CASE_P(FusedPadConvOpTest); +TYPED_TEST_SUITE_P(FusedPadConvOpTest); TYPED_TEST_P(FusedPadConvOpTest, PaddingConvTest) { this->Run("NHWC"); } TYPED_TEST_P(FusedPadConvOpTest, PaddingConvTestNchw) { this->Run("NCHW"); } -REGISTER_TYPED_TEST_CASE_P(FusedPadConvOpTest, PaddingConvTest, - PaddingConvTestNchw); +REGISTER_TYPED_TEST_SUITE_P(FusedPadConvOpTest, PaddingConvTest, + PaddingConvTestNchw); #ifdef ENABLE_INTEL_MKL_BFLOAT16 using FusedPadConvDataTypes = ::testing::Types; #else using FusedPadConvDataTypes = ::testing::Types; #endif -INSTANTIATE_TYPED_TEST_CASE_P(Test, FusedPadConvOpTest, FusedPadConvDataTypes); +INSTANTIATE_TYPED_TEST_SUITE_P(Test, FusedPadConvOpTest, FusedPadConvDataTypes); class FilterCacheTest : public OpsTestBase { public: @@ -926,7 +926,7 @@ class MklFusedMatMulOpTest : public OpsTestBase { } }; -TYPED_TEST_CASE_P(MklFusedMatMulOpTest); +TYPED_TEST_SUITE_P(MklFusedMatMulOpTest); TYPED_TEST_P(MklFusedMatMulOpTest, WithBias) { const int batch = 3; @@ -963,15 +963,15 @@ TYPED_TEST_P(MklFusedMatMulOpTest, WithBiasAndElu) { {"BiasAdd", "Elu"}); } -REGISTER_TYPED_TEST_CASE_P(MklFusedMatMulOpTest, // - WithBias, // - WithBiasAndRelu, // - WithBiasAndRelu6, // - WithBiasAndElu); +REGISTER_TYPED_TEST_SUITE_P(MklFusedMatMulOpTest, // + WithBias, // + WithBiasAndRelu, // + WithBiasAndRelu6, // + WithBiasAndElu); using MklFusedMatMulDataTypes = ::testing::Types; -INSTANTIATE_TYPED_TEST_CASE_P(Test, MklFusedMatMulOpTest, - MklFusedMatMulDataTypes); +INSTANTIATE_TYPED_TEST_SUITE_P(Test, MklFusedMatMulOpTest, + MklFusedMatMulDataTypes); // Test the performance of MklFusedMatMul weight cache. // For the first time B matrix will be reordered and cached which will be @@ -1361,7 +1361,7 @@ class MklPadWithFusedConv2DOpTest : public OpsTestBase { } }; -TYPED_TEST_CASE_P(MklPadWithFusedConv2DOpTest); +TYPED_TEST_SUITE_P(MklPadWithFusedConv2DOpTest); TYPED_TEST_P(MklPadWithFusedConv2DOpTest, WithBiasAndRoundPad) { const int kFilterSize = 1; @@ -1391,15 +1391,15 @@ TYPED_TEST_P(MklPadWithFusedConv2DOpTest, WithBiasReluAndPartialPad) { this->VerifyPadAndConv2DWithBiasRelu(kFilterSize, kFilterCount); } -REGISTER_TYPED_TEST_CASE_P(MklPadWithFusedConv2DOpTest, // - WithBiasAndRoundPad, // - WithBiasAndPartialPad, // - WithBiasReluAndRoundPad, // - WithBiasReluAndPartialPad); +REGISTER_TYPED_TEST_SUITE_P(MklPadWithFusedConv2DOpTest, // + WithBiasAndRoundPad, // + WithBiasAndPartialPad, // + WithBiasReluAndRoundPad, // + WithBiasReluAndPartialPad); using MklPadWithFusedConv2DDataTypes = ::testing::Types; -INSTANTIATE_TYPED_TEST_CASE_P(Test, MklPadWithFusedConv2DOpTest, - MklPadWithFusedConv2DDataTypes); +INSTANTIATE_TYPED_TEST_SUITE_P(Test, MklPadWithFusedConv2DOpTest, + MklPadWithFusedConv2DDataTypes); } // namespace tensorflow #endif // INTEL_MKL From 45e2da1078b1fdef6c6eaa55cbdb1013f95511c2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 13 Jul 2020 23:51:40 -0700 Subject: [PATCH 0359/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/869d05fb3e44 PiperOrigin-RevId: 321106397 Change-Id: I17b3b43a8612747b9538792dce624813dc6bb671 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index e0e05a96323..11e70d52d8e 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "83080a294ad7d145d758821bcf4354ad0cb7d299" - LLVM_SHA256 = "fff6d3233b8ad5ebf3362a7dea0d7bb323f996e3182e6785772696337eed484f" + LLVM_COMMIT = "869d05fb3e449ec7ec835b8a61687f8df41b8651" + LLVM_SHA256 = "59b4eadf3b958a7787c287fa62f1b3f9936493fc8e7b1c2b769d37fd2998bd4e" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 5064508fba3a56c595fa430d9dcdc0a9a4cd67b3 Mon Sep 17 00:00:00 2001 From: Terry Heo Date: Mon, 13 Jul 2020 23:58:41 -0700 Subject: [PATCH 0360/2522] Enable Flex delegate for Windows Since weak symbol overriding is not available on Windows, disable weak symbol overriding path but use TF_AcquireFlexDelegate() acquisition with DLL. PiperOrigin-RevId: 321106995 Change-Id: I97398e6b473c78b6f68e418bcb1be3bad03be49a --- tensorflow/lite/delegates/flex/delegate.cc | 13 +++++++++++-- tensorflow/lite/python/BUILD | 1 - tensorflow/python/BUILD | 8 ++------ 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/tensorflow/lite/delegates/flex/delegate.cc b/tensorflow/lite/delegates/flex/delegate.cc index b8b0d4e6d01..f85b5e60f91 100644 --- a/tensorflow/lite/delegates/flex/delegate.cc +++ b/tensorflow/lite/delegates/flex/delegate.cc @@ -30,9 +30,13 @@ limitations under the License. namespace tflite { // Corresponding weak declaration found in lite/interpreter_builder.cc. +#if !defined(_WIN32) +// If weak symbol is not supported (Windows), it can use +// TF_AcquireFlexDelegate() path instead. TfLiteDelegateUniquePtr AcquireFlexDelegate() { return tflite::FlexDelegate::Create(); } +#endif TfLiteDelegateUniquePtr FlexDelegate::Create( std::unique_ptr base_delegate) { @@ -140,6 +144,11 @@ TfLiteStatus FlexDelegate::CopyFromBufferHandle( // Exported C interface function which is used by AcquireFlexDelegate() at // interpreter_build.cc. To export the function name globally, the function name // must be matched with patterns in tf_version_script.lds -extern "C" tflite::TfLiteDelegateUniquePtr TF_AcquireFlexDelegate() { - return tflite::AcquireFlexDelegate(); +extern "C" { +#if defined(_WIN32) +__declspec(dllexport) +#endif + tflite::TfLiteDelegateUniquePtr TF_AcquireFlexDelegate() { + return tflite::FlexDelegate::Create(); } +} // extern "C" diff --git a/tensorflow/lite/python/BUILD b/tensorflow/lite/python/BUILD index 63be9602bc2..dfcf46baa90 100644 --- a/tensorflow/lite/python/BUILD +++ b/tensorflow/lite/python/BUILD @@ -197,7 +197,6 @@ py_test( srcs_version = "PY2AND3", tags = [ "no_mac", # TODO(b/159077703): Enable Python API Flex support on MacOS. - "no_windows", # TODO(b/159077703): Enable Python API Flex support on Windows. ], deps = [ ":lite", diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 83448150935..fbfa414791c 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -6074,6 +6074,7 @@ pywrap_tensorflow_macro( "//tensorflow/core/distributed_runtime:server_lib", "//tensorflow/core/platform:stacktrace_handler", "//tensorflow/core/profiler/internal:print_model_analysis", + "//tensorflow/lite/delegates/flex:delegate", "//tensorflow/core/profiler/internal/cpu:python_tracer", "//tensorflow/tools/graph_transforms:transform_graph_lib", "//tensorflow/core/platform:tf32_utils", @@ -6088,12 +6089,7 @@ pywrap_tensorflow_macro( "@ngraph_tf//:ngraph_tf", ]) + if_xla_available([ "//tensorflow/compiler/aot:tfcompile_lib", - ]) + select({ - "//tensorflow:windows": [], # TODO(b/159077703): Enable Flex on Windows - "//conditions:default": [ - "//tensorflow/lite/delegates/flex:delegate", - ], - }) + if_tpu(["//tensorflow/core/tpu:tpu_api_dlsym_initializer"]), + ]) + if_tpu(["//tensorflow/core/tpu:tpu_api_dlsym_initializer"]), ) # ** Targets for Windows build (start) ** From 85219d8bd6b38e08bff5465eec90535929c0c242 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Tue, 14 Jul 2020 00:46:53 -0700 Subject: [PATCH 0361/2522] Made wrappers_test not as sensitive to the exact message of the raised error, to prevent failure when enabling KerasTensors causes the message to change `with layer` to `with the layer` PiperOrigin-RevId: 321112062 Change-Id: I808db0d25422f284649bdcf5b0c0bda6bc35fad5 --- tensorflow/python/keras/layers/wrappers_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/layers/wrappers_test.py b/tensorflow/python/keras/layers/wrappers_test.py index efff254c688..671fe65d520 100644 --- a/tensorflow/python/keras/layers/wrappers_test.py +++ b/tensorflow/python/keras/layers/wrappers_test.py @@ -305,7 +305,7 @@ class TimeDistributedTest(keras_parameterized.TestCase): self.assertEqual(out_2.shape.as_list(), [None, 1, 5]) ph_3 = keras.backend.placeholder(shape=(None, 1, 18)) - with self.assertRaisesRegex(ValueError, 'is incompatible with layer'): + with self.assertRaisesRegex(ValueError, 'is incompatible with'): time_dist(ph_3) def test_TimeDistributed_with_invalid_dimensions(self): From 4fadd3e6c9f0432447d403b1cfbaed8e585c765f Mon Sep 17 00:00:00 2001 From: Gaurav Jain Date: Tue, 14 Jul 2020 01:07:53 -0700 Subject: [PATCH 0362/2522] Ensure pfor uses the dtype of the indices or axis The ops support both int32 & int64. Thus we should not hard code it to int32. PiperOrigin-RevId: 321114488 Change-Id: I8455b4b94c14454f81ff9d8097aaa0c5b6e260b2 --- .../python/ops/parallel_for/array_test.py | 17 ++++++++++++----- tensorflow/python/ops/parallel_for/math_test.py | 8 ++++---- tensorflow/python/ops/parallel_for/pfor.py | 6 +++--- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/tensorflow/python/ops/parallel_for/array_test.py b/tensorflow/python/ops/parallel_for/array_test.py index 85a2f6c191b..1e2ecdbea7b 100644 --- a/tensorflow/python/ops/parallel_for/array_test.py +++ b/tensorflow/python/ops/parallel_for/array_test.py @@ -148,9 +148,12 @@ class ArrayTest(PForTestCase): def loop_fn(i): x1 = array_ops.gather(x, i) - return array_ops.expand_dims( - x1, axis=-1), array_ops.expand_dims( - x1, axis=1) + return [ + array_ops.expand_dims(x1, axis=-1), + array_ops.expand_dims(x1, axis=1), + array_ops.expand_dims( + x1, axis=constant_op.constant(1, dtype=dtypes.int64)) + ] self._test_loop_fn(loop_fn, 3) @@ -319,8 +322,12 @@ class ArrayTest(PForTestCase): def loop_fn(i): x1 = array_ops.gather(x, i) - return array_ops.concat([x1, x1, y], - axis=0), array_ops.concat([x1, x1, y], axis=-1) + return [ + array_ops.concat([x1, x1, y], axis=0), + array_ops.concat([x1, x1, y], axis=-1), + array_ops.concat([x1, x1, y], + axis=constant_op.constant(0, dtype=dtypes.int64)) + ] self._test_loop_fn(loop_fn, 3) diff --git a/tensorflow/python/ops/parallel_for/math_test.py b/tensorflow/python/ops/parallel_for/math_test.py index 26bce86de73..85b58055d8f 100644 --- a/tensorflow/python/ops/parallel_for/math_test.py +++ b/tensorflow/python/ops/parallel_for/math_test.py @@ -341,7 +341,7 @@ class MathTest(PForTestCase, parameterized.TestCase): math_ops.reduce_min, math_ops.reduce_mean, ]: - for axis in ([1], None, [0, 2]): + for axis in ([1], None, [0, 2], constant_op.constant([1], dtypes.int64)): for keepdims in (True, False): # pylint: disable=cell-var-from-loop @@ -356,7 +356,7 @@ class MathTest(PForTestCase, parameterized.TestCase): def test_boolean_reduction(self): x = random_ops.random_uniform([2, 3, 4, 5]) > 0.5 for op in [math_ops.reduce_any, math_ops.reduce_all]: - for axis in ([1], None, [0, 2]): + for axis in ([1], None, [0, 2], constant_op.constant([1], dtypes.int64)): for keepdims in (True, False): # pylint: disable=cell-var-from-loop @@ -402,7 +402,7 @@ class MathTest(PForTestCase, parameterized.TestCase): def test_cum_sum(self): x = random_ops.random_uniform([2, 3, 4, 5]) - for axis in (1, -2): + for axis in (1, -2, constant_op.constant(1, dtypes.int64)): for exclusive in (True, False): for reverse in (True, False): @@ -418,7 +418,7 @@ class MathTest(PForTestCase, parameterized.TestCase): def test_cum_prod(self): x = random_ops.random_uniform([2, 3, 4, 5]) - for axis in (1, -2): + for axis in (1, -2, constant_op.constant(1, dtypes.int64)): for exclusive in (True, False): for reverse in (True, False): diff --git a/tensorflow/python/ops/parallel_for/pfor.py b/tensorflow/python/ops/parallel_for/pfor.py index 1bbfb65bb23..ef06ebe61ec 100644 --- a/tensorflow/python/ops/parallel_for/pfor.py +++ b/tensorflow/python/ops/parallel_for/pfor.py @@ -2029,7 +2029,7 @@ def _convert_broadcast_to(pfor_input): def _convert_expanddims(pfor_input): t = pfor_input.stacked_input(0) dim = pfor_input.unstacked_input(1) - dim += math_ops.cast(dim >= 0, dtypes.int32) + dim += math_ops.cast(dim >= 0, dim.dtype) return wrap(array_ops.expand_dims(t, axis=dim), True) @@ -2510,7 +2510,7 @@ def _convert_reduction(pfor_input, _, op_func): t = pfor_input.stacked_input(0) indices = pfor_input.unstacked_input(1) # Shift positive indices by one to account for the extra dimension. - indices += math_ops.cast(indices >= 0, dtypes.int32) + indices += math_ops.cast(indices >= 0, indices.dtype) keep_dims = pfor_input.get_attr("keep_dims") return wrap(op_func(t, indices, keepdims=keep_dims), True) @@ -2547,7 +2547,7 @@ def _convert_cumfoo(pfor_input, _, op_func): t = pfor_input.stacked_input(0) axis = pfor_input.unstacked_input(1) # Shift positive indices by one to account for the extra dimension. - axis += math_ops.cast(axis >= 0, dtypes.int32) + axis += math_ops.cast(axis >= 0, axis.dtype) exclusive = pfor_input.get_attr("exclusive") reverse = pfor_input.get_attr("reverse") return wrap(op_func(t, axis, exclusive=exclusive, reverse=reverse), True) From b29e1cb1e4d10b079bbf10d8557f751579a0a54e Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Tue, 14 Jul 2020 01:16:24 -0700 Subject: [PATCH 0363/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/74c14202d90b PiperOrigin-RevId: 321115191 Change-Id: I8e2bd23aa64d1a78cecff1e60f65c4bf9b31d5f6 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 11e70d52d8e..5b77acb139a 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "869d05fb3e449ec7ec835b8a61687f8df41b8651" - LLVM_SHA256 = "59b4eadf3b958a7787c287fa62f1b3f9936493fc8e7b1c2b769d37fd2998bd4e" + LLVM_COMMIT = "74c14202d90b46dda64a2542602855727b7d7f60" + LLVM_SHA256 = "4d066245a61d94dbab0d15e00e6dffec5754dae0f0914ec47f6ac02e92dffe36" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 38d5874d64d0296238080bd46a073d4ae0c00e26 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Jul 2020 02:01:32 -0700 Subject: [PATCH 0364/2522] compat: Update forward compatibility horizon to 2020-07-14 PiperOrigin-RevId: 321119326 Change-Id: I9494d52c6bdd3999694b5056366b342c9dc5d39a --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 147aba08d04..c2b66b48178 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 13) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 14) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From d999b19398ef2c57d61677d49b3a3f86240fa9c6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Jul 2020 02:01:33 -0700 Subject: [PATCH 0365/2522] Update GraphDef version to 462. PiperOrigin-RevId: 321119328 Change-Id: Ic0f4e0015eb8561b93d60f5bf1336886fad0291e --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 5119d0c098a..1715c650b56 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 461 // Updated: 2020/7/13 +#define TF_GRAPH_DEF_VERSION 462 // Updated: 2020/7/14 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 820519bcf32e13b8229b34816ed65662f939e81b Mon Sep 17 00:00:00 2001 From: "902449@58880@bigcat_chen@ASIC" Date: Tue, 14 Jul 2020 17:42:11 +0800 Subject: [PATCH 0366/2522] TFLM: add HIMAX WE1 EVB to support TFLM example(magic wand and micro speech) --- .../lite/micro/examples/magic_wand/README.md | 133 ++++++++++++++++++ .../himax_we1_evb/accelerometer_handler.cc | 89 ++++++++++++ .../micro/examples/micro_speech/README.md | 100 +++++++++++++ .../himax_we1_evb/audio_provider.cc | 58 ++++++++ .../himax_we1_evb/command_responder.cc | 59 ++++++++ .../images/animation_on_himax_we1_evb.gif | Bin 0 -> 1063997 bytes .../tools/make/third_party_downloads.inc | 4 +- 7 files changed, 441 insertions(+), 2 deletions(-) create mode 100644 tensorflow/lite/micro/examples/magic_wand/himax_we1_evb/accelerometer_handler.cc create mode 100644 tensorflow/lite/micro/examples/micro_speech/himax_we1_evb/audio_provider.cc create mode 100644 tensorflow/lite/micro/examples/micro_speech/himax_we1_evb/command_responder.cc create mode 100644 tensorflow/lite/micro/examples/micro_speech/images/animation_on_himax_we1_evb.gif diff --git a/tensorflow/lite/micro/examples/magic_wand/README.md b/tensorflow/lite/micro/examples/magic_wand/README.md index 0cf3b8e74c3..094e985586f 100644 --- a/tensorflow/lite/micro/examples/magic_wand/README.md +++ b/tensorflow/lite/micro/examples/magic_wand/README.md @@ -12,6 +12,7 @@ then outputs the gesture to the serial port. - [Getting started](#getting-started) - [Deploy to Arduino](#deploy-to-arduino) +- [Deploy to Himax WE1 EVB](#deploy-to-himax-we1-evb) - [Deploy to SparkFun Edge](#deploy-to-sparkfun-edge) - [Run the tests on a development machine](#run-the-tests-on-a-development-machine) - [Train your own model](#train-your-own-model) @@ -140,6 +141,138 @@ SLOPE: * * * * * * * * ``` +## Deploy to Himax WE1 EVB + +The following instructions will help you build and deploy this example to +[HIMAX WE1 EVB](https://github.com/HimaxWiseEyePlus/bsp_tflu/tree/master/HIMAX_WE1_EVB_board_brief) +board. To undstand more about using this board, please check +[HIMAX WE1 EVB user guide](https://github.com/HimaxWiseEyePlus/bsp_tflu/tree/master/HIMAX_WE1_EVB_user_guide). + +### Initial Setup + +To use the HIMAX WE1 EVB, please make sure following software are installed: + +#### MetaWare Development Toolkit + +See +[Install the Synopsys DesignWare ARC MetaWare Development Toolkit](/tensorflow/lite/micro/tools/make/targets/arc/README.md#install-the-synopsys-designware-arc-metaware-development-toolkit) +section for instructions on toolchain installation. + +#### Make Tool version + +A `'make'` tool is required for deploying Tensorflow Lite Micro applications on +HIMAX WE1 EVB, See +[Check make tool version](/tensorflow/lite/micro/tools/make/targets/arc/README.md#make-tool) +section for proper environment. + +#### Serial Terminal Emulation Application + +There are 2 main purposes for HIMAX WE1 EVB Debug UART port + +- print application output +- burn application to flash by using xmodem send application binary + +You can use any terminal emulation program (like [PuTTY](https://www.putty.org/) +or [minicom](https://linux.die.net/man/1/minicom)). + +### Generate Example Project + +The example project for HIMAX WE1 EVB platform can be generated with the +following command: + +Download related third party data + +``` +make -f tensorflow/lite/micro/tools/make/Makefile TARGET=himax_we1_evb third_party_downloads +``` + +Generate magic wand project + +``` +make -f tensorflow/lite/micro/tools/make/Makefile generate_magic_wand_make_project TARGET=himax_we1_evb +``` + +### Build and Burn Example + +Following the Steps to run magic wand example at HIMAX WE1 EVB platform. + +1. Go to the generated example project directory. + + ``` + cd tensorflow/lite/micro/tools/make/gen/himax_we1_evb_arc/prj/magic_wand/make + ``` + +2. Build the example using + + ``` + make app + ``` + +3. After example build finish, copy ELF file and map file to image generate + tool directory. \ + image generate tool directory located at + `'tensorflow/lite/micro/tools/make/downloads/himax_we1_sdk/image_gen_linux_v3/'` + + ``` + cp magic_wand.elf himax_we1_evb.map ../../../../../downloads/himax_we1_sdk/image_gen_linux_v3/ + ``` + +4. Go to flash image generate tool directory. + + ``` + cd ../../../../../downloads/himax_we1_sdk/image_gen_linux_v3/ + ``` + +5. run image generate tool, generate flash image file. + + * Before running image generate tool, by typing `sudo chmod +x image_gen` + and `sudo chmod +x sign_tool` to make sure it is executable. + + ``` + image_gen -e magic_wand.elf -m himax_we1_evb.map -o out.img + ``` + +6. Download flash image file to HIMAX WE1 EVB by UART: + + * more detail about download image through UART can be found at + [HIMAX WE1 EVB update Flash image](https://github.com/HimaxWiseEyePlus/bsp_tflu/tree/master/HIMAX_WE1_EVB_user_guide#flash-image-update) + +After these steps, press reset button on the HIMAX WE1 EVB, you will see +application output in the serial terminal. Perform following gestures `'Wing'`,`'Ring'`,`'Slope'` and you can see the otuput in serial terminal. + +``` +WING: +* * * + * * * * + * * * * + * * * * + * * * * + * * +``` + +``` +RING: + * + * * + * * + * * + * * + * * + * +``` + +``` +SLOPE: + * + * + * + * + * + * + * + * * * * * * * * +``` + ## Deploy to SparkFun Edge The following instructions will help you build and deploy this sample on the diff --git a/tensorflow/lite/micro/examples/magic_wand/himax_we1_evb/accelerometer_handler.cc b/tensorflow/lite/micro/examples/magic_wand/himax_we1_evb/accelerometer_handler.cc new file mode 100644 index 00000000000..9d83b01be05 --- /dev/null +++ b/tensorflow/lite/micro/examples/magic_wand/himax_we1_evb/accelerometer_handler.cc @@ -0,0 +1,89 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/micro/examples/magic_wand/accelerometer_handler.h" + +#include "hx_drv_tflm.h" + +int begin_index = 0; + +namespace { +// Ring buffer size +constexpr int ring_buffer_size = 600; +// Ring buffer +float save_data[ring_buffer_size] = {0.0}; +// Flag to start detect gesture +bool pending_initial_data = true; +// Available data count in accelerometer FIFO +int available_count = 0; + +} // namespace + +TfLiteStatus SetupAccelerometer(tflite::ErrorReporter* error_reporter) { + if (hx_drv_accelerometer_initial() != HX_DRV_LIB_PASS) { + TF_LITE_REPORT_ERROR(error_reporter, "setup fail"); + return kTfLiteError; + } + + TF_LITE_REPORT_ERROR(error_reporter, "setup done"); + + return kTfLiteOk; +} + +bool ReadAccelerometer(tflite::ErrorReporter* error_reporter, float* input, + int length) { + // Check how many accelerometer data + available_count = hx_drv_accelerometer_available_count(); + + if (available_count == 0) return false; + + for (int i = 0; i < available_count; i++) { + float x, y, z; + hx_drv_accelerometer_receive(&x, &y, &z); + + const float norm_x = -x; + const float norm_y = y; + const float norm_z = z; + + // Save data in milli-g unit + save_data[begin_index++] = norm_x * 1000; + save_data[begin_index++] = norm_y * 1000; + save_data[begin_index++] = norm_z * 1000; + + // If reach end of buffer, return to 0 position + if (begin_index >= ring_buffer_size) begin_index = 0; + } + + // Check if data enough for prediction + if (pending_initial_data && begin_index >= 200) { + pending_initial_data = false; + } + + // Return if we don't have enough data + if (pending_initial_data) { + return false; + } + + // Copy the requested number of bytes to the provided input tensor + for (int i = 0; i < length; ++i) { + int ring_array_index = begin_index + i - length; + if (ring_array_index < 0) { + ring_array_index += ring_buffer_size; + } + input[i] = save_data[ring_array_index]; + } + + return true; +} diff --git a/tensorflow/lite/micro/examples/micro_speech/README.md b/tensorflow/lite/micro/examples/micro_speech/README.md index a4a2f2d3be7..0ee367bd854 100644 --- a/tensorflow/lite/micro/examples/micro_speech/README.md +++ b/tensorflow/lite/micro/examples/micro_speech/README.md @@ -22,6 +22,7 @@ kilobytes of Flash. - [Deploy to SparkFun Edge](#deploy-to-sparkfun-edge) - [Deploy to STM32F746](#deploy-to-STM32F746) - [Deploy to NXP FRDM K66F](#deploy-to-nxp-frdm-k66f) +- [Deploy to HIMAX WE1 EVB](#deploy-to-himax-we1-evb) - [Run on macOS](#run-on-macos) - [Run the tests on a development machine](#run-the-tests-on-a-development-machine) - [Train your own model](#train-your-own-model) @@ -562,6 +563,105 @@ using [ARM Mbed](https://github.com/ARMmbed/mbed-cli). in black color. If there is no output on the serial port, you can connect headphone to headphone port to check if audio loopback path is working. +## Deploy to HIMAX WE1 EVB + +The following instructions will help you build and deploy this example to +[HIMAX WE1 EVB](https://github.com/HimaxWiseEyePlus/bsp_tflu/tree/master/HIMAX_WE1_EVB_board_brief) +board. To undstand more about using this board, please check +[HIMAX WE1 EVB user guide](https://github.com/HimaxWiseEyePlus/bsp_tflu/tree/master/HIMAX_WE1_EVB_user_guide). + +### Initial Setup + +To use the HIMAX WE1 EVB, please make sure following software are installed: + +#### MetaWare Development Toolkit + +See +[Install the Synopsys DesignWare ARC MetaWare Development Toolkit](/tensorflow/lite/micro/tools/make/targets/arc/README.md#install-the-synopsys-designware-arc-metaware-development-toolkit) +section for instructions on toolchain installation. + +#### Make Tool version + +A `'make'` tool is required for deploying Tensorflow Lite Micro +applications on HIMAX WE1 EVB, See +[Check make tool version](/tensorflow/lite/micro/tools/make/targets/arc/README.md#make-tool) +section for proper environment. + +#### Serial Terminal Emulation Application + +There are 2 main purposes for HIMAX WE1 EVB Debug UART port + +- print application output +- burn application to flash by using xmodem send application binary + +You can use any terminal emulation program (like [PuTTY](https://www.putty.org/) or [minicom](https://linux.die.net/man/1/minicom)). + + +### Generate Example Project + +The example project for HIMAX WE1 EVB platform can be generated with the following +command: + +Download related third party data + +``` +make -f tensorflow/lite/micro/tools/make/Makefile TARGET=himax_we1_evb third_party_downloads +``` + +Generate micro speech project + +``` +make -f tensorflow/lite/micro/tools/make/Makefile generate_micro_speech_make_project TARGET=himax_we1_evb +``` + +### Build and Burn Example + +Following the Steps to run micro speech example at HIMAX WE1 EVB platform. + +1. Go to the generated example project directory. + + ``` + cd tensorflow/lite/micro/tools/make/gen/himax_we1_evb_arc/prj/micro_speech/make + ``` + +2. Build the example using + + ``` + make app + ``` + +3. After example build finish, copy ELF file and map file to image generate tool directory. + image generate tool directory located at `'tensorflow/lite/micro/tools/make/downloads/himax_we1_sdk/image_gen_linux_v3/'` + + ``` + cp micro_speech.elf himax_we1_evb.map ../../../../../downloads/himax_we1_sdk/image_gen_linux_v3/ + ``` + +4. Go to flash image generate tool directory. + + ``` + cd ../../../../../downloads/himax_we1_sdk/image_gen_linux_v3/ + ``` + +5. run image generate tool, generate flash image file. + + * Before running image generate tool, by typing `sudo chmod +x image_gen` + and `sudo chmod +x sign_tool` to make sure it is executable. + + ``` + image_gen -e micro_speech.elf -m himax_we1_evb.map -o out.img + ``` + + +6. Download flash image file to HIMAX WE1 EVB by UART: + + * more detail about download image through UART can be found at [HIMAX WE1 EVB update Flash image](https://github.com/HimaxWiseEyePlus/bsp_tflu/tree/master/HIMAX_WE1_EVB_user_guide#flash-image-update) + +After these steps, press reset button on the HIMAX WE1 EVB, you will see application output in the serial +terminal and lighting LED. + +![Animation on Himax WE1 EVB](images/animation_on_himax_we1_evb.gif) + ## Run on macOS The example contains an audio provider compatible with macOS. If you have access diff --git a/tensorflow/lite/micro/examples/micro_speech/himax_we1_evb/audio_provider.cc b/tensorflow/lite/micro/examples/micro_speech/himax_we1_evb/audio_provider.cc new file mode 100644 index 00000000000..a779b4c039d --- /dev/null +++ b/tensorflow/lite/micro/examples/micro_speech/himax_we1_evb/audio_provider.cc @@ -0,0 +1,58 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/micro/examples/micro_speech/audio_provider.h" + +#include "tensorflow/lite/micro/examples/micro_speech/micro_features/micro_model_settings.h" + +#include "hx_drv_tflm.h" + +namespace { +// Feedback silence buffer when beginning start_ms <= 0 +int16_t g_silence[kMaxAudioSampleSize] = {0}; +// Latest time-stamp +int32_t g_latest_audio_timestamp = 0; +// config about audio data size and address +hx_drv_mic_data_config_t mic_config; +// Flag for check if audio is initialize or not +bool g_is_audio_initialized = false; +} // namespace + +TfLiteStatus GetAudioSamples(tflite::ErrorReporter* error_reporter, + int start_ms, int duration_ms, + int* audio_samples_size, int16_t** audio_samples) { + if (!g_is_audio_initialized) { + if (hx_drv_mic_initial() != HX_DRV_LIB_PASS) return kTfLiteError; + + hx_drv_mic_on(); + g_is_audio_initialized = true; + } + + if (start_ms > 0) { + hx_drv_mic_capture(&mic_config); + } else { + mic_config.data_size = kMaxAudioSampleSize; + mic_config.data_address = (uint32_t)g_silence; + } + + *audio_samples_size = mic_config.data_size; + *audio_samples = (int16_t*)mic_config.data_address; + return kTfLiteOk; +} + +int32_t LatestAudioTimestamp() { + hx_drv_mic_timestamp_get(&g_latest_audio_timestamp); + return g_latest_audio_timestamp; +} diff --git a/tensorflow/lite/micro/examples/micro_speech/himax_we1_evb/command_responder.cc b/tensorflow/lite/micro/examples/micro_speech/himax_we1_evb/command_responder.cc new file mode 100644 index 00000000000..22e941df959 --- /dev/null +++ b/tensorflow/lite/micro/examples/micro_speech/himax_we1_evb/command_responder.cc @@ -0,0 +1,59 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/micro/examples/micro_speech/command_responder.h" +#include "hx_drv_tflm.h" + +static int32_t last_command_time = 0; +static uint32_t loop = 0; +static bool all_on = 0; + +void RespondToCommand(tflite::ErrorReporter* error_reporter, + int32_t current_time, const char* found_command, + uint8_t score, bool is_new_command) { + loop++; + if (is_new_command) { + TF_LITE_REPORT_ERROR(error_reporter, "Heard %s (%d) @%dms", found_command, + score, current_time); + if (found_command[0] == 'y') { + last_command_time = current_time; + hx_drv_led_off(HX_DRV_LED_RED); + hx_drv_led_on(HX_DRV_LED_GREEN); + } else if (found_command[0] == 'n') { + last_command_time = current_time; + hx_drv_led_off(HX_DRV_LED_GREEN); + hx_drv_led_on(HX_DRV_LED_RED); + } + } + + if (last_command_time != 0) { + if (last_command_time < (current_time - 3000)) { + last_command_time = 0; + hx_drv_led_off(HX_DRV_LED_GREEN); + hx_drv_led_off(HX_DRV_LED_RED); + } + } else { + if ((loop % 10) == 0) { + if (all_on) { + hx_drv_led_on(HX_DRV_LED_RED); + hx_drv_led_on(HX_DRV_LED_GREEN); + } else { + hx_drv_led_off(HX_DRV_LED_RED); + hx_drv_led_off(HX_DRV_LED_GREEN); + } + all_on = !all_on; + } + } +} diff --git a/tensorflow/lite/micro/examples/micro_speech/images/animation_on_himax_we1_evb.gif b/tensorflow/lite/micro/examples/micro_speech/images/animation_on_himax_we1_evb.gif new file mode 100644 index 0000000000000000000000000000000000000000..5897c43d5a20c735f8f6fd086720fce083ffb30c GIT binary patch literal 1063997 zcmWh!XIN89*F7h_5C|m@dT0`=p%+mTYN&?ZF*E@wf)r8ldJ+-{2nY&D6GIgdH6SQp z3!x(_s3o-s9ovZtoCF27|!=0RTaUAcI9J z5(s1hMoEF7u1HW-fy>G&5M)&qWEE9pNwNxxvP!Coic0DvB@I<&l9HO5vYMuv8cCHb zE05PFV|A5DdTJy+4RvEp^-ZcOn^lSWI+`YW8pir6HdHk$6N0j`j=Gwjx~hS)5>-!A zm#V3^S<_fc$3$OG&%{8_%-Fy{&&1fk)Xc=hkV>^Q(6chtw=^?xFr`{qm~F8#b+9%x zGqp4{vavF^wl}x2wzAq{W3|=M#@5qdU-HIf_#Ibf&wGLLj3##62rXm zqnPo*feDd8snL;1kzuL9to;d|Y4K4h31J1Pk-14>xoIIG;Yq&1sY%iC+0n6)@%tm< z(o=UO=A8X<2zOv60)HO^YJUi<9h*rqeod ztV-h53sWPCaw3lH^=;qhU$if!FgLsKU^egI!6SzbmK?|}%T24_v-4t^U46CAM62Wa z5Au(H4cFK0?%Xl2=d(mdcMNp~locK<;q9vt?5`=^+g+VCa5`tK!RPilzuOlw?w?J# zd&lS7H{aDq8P~^bcct-Ca|F5hg4}{*Q7MJRe-&2!Rd}$l<{+oy2%leDTGUuAC@wkC zP*Yc3QPWXh(%DdZuKrk0YuUi*(w?^JbFGbMI!|0|tL{HrbFsZ~;9TSIx%%P0lb3rN z2Ky>&>O0$;TQ4@Zbhmd7ba!02*m1f4@`ZE#^_>G#7t2Nm&%V4~a%;T*&ApR-LsQM& zBa-Wqq0tA|ZoRxUdw2ff>XTRhtgih1=H2(-8ynx&|NZcJ{f%_#P@#{`4+O>%005o; z@P2MVfi4V|o25Ao4*uyf{nLQ|rwM@B0N!8t(}~q{dX) z?D|l0N85M>#U(`5v#-6TDzLjfXnJp=ceUB+sV0X+Pv3g4cOA zuAKF03_Jb4U2+*mG6{OngY~J~Of;q|n8-@81|P&3Fbbw9vZN4+eZxKpB*F#8P=0-o(vUVv8K&E*20gVXG%-g44>|kCK^Tu zG1CkU_PGa)-wzA(53V`jJw;jx_-AGO=?M?4O-MM&9W@V9EMEsD7+vX^vMKQ}K6)l(O8`{h-;y3F(5ZEoJ7P2S2Cw`e zwo*g&`oT))=J5IOYknRCHIF0a2h`FG_b2>)L;q2Jir68}@%SRPN`SWoyxDT-5x4P9icM1Eys|r5PsN0kbzWKN7VQt=_ z#*Vsf{YyUBcOB{^jNuN29dD+;ivHQ{zM?g*>9OBLAw6dN;rXm5KP{w#AJeZ*Mdl76 z6eGNP$6wqVxn&>t&(^Ee^2{$`K{26^+E0CdaNR+A+u`LAC8d5!)&~l@GWIE7ncBrk zo=EXM`|!R(;+4DBZ*{M|jBXcCFrKd)?4N4GreCeqqtwjocj>HAe0KRNU2Wo@fnMNQJSt#mG- z`penPMmp6m-!Yo0Sl+4k%fy2;38{=#{RY!vpBh(@<$l*8=5|RAwp%e?O`nJGnmU5; ze|)zt#C@*aqWtoe=#RAZR%ZCceK#BZJ7qRGJJk|sl=phRepDr0O@l3OT4v;XLYehE zgUp+EU ?RNS1p}RMnYF|3Yjwu=n!X41QM{J7q)}*@YBs9#YJ9Fx`}ET0U^>S;B&KuFyEzaS9yYDs&Vn1s zpe+8^4%>FF!O$StIVv9cIINHjSDT}wY`Q=~91*U!ScBR38<79{6C|+d2;DjeA;SiU z8gVMaQxfG$p;n*%U61itMZceRSnwmq?i7-Tqx4Jr;g8~?~GKElgD!L+_TUKO-GC>+)LwN|w8m4YeY zm~Y$1hU?6UEh@uDaQ2nl57(tgjrS+c^+@mGL%@UKnRA;^xoEU@l~QN& z{O@U|G6I|~&d;YE{CsO?0*t)vafqS(&3LUFmS;ETexY`Kt*H3@_sLI9mS(jwUwYF- zwWcyKl|d2TDx()YbeD}ZB0J;k+0W2nkFL7ZiQ${*fZ3i$Gl<16Fts@dh7kkm>^&Pw zVhYYbCr zz4+^mvskRY>>heUQMD(rq%2wYr`NeQ*_sPCfYm&H+!#LwCcWa@eo zt_Jzd@Y!FpcVdp#3}-}Zq6fF|VkZIY=~a}?FsCA;DGoRK>=S}aNrzcOu=tN(PTW{p zF}GvG5-#lpZRtFlY{{)XAIBO~EbtKm3Em&zpo@fWbn+qM6+k3?{ZGeGeh&42`&&1k zSr48+gqLhz;}nMMTYkW|sQ1yEax+b41bCZ`>EAx#7uxhT_@AuJ{Tt;<8*xe&FMhwW z_i?D$cfY?jWY^tOEo1uI(&q|`uje9-StB9?3Yet=0d%BMpzhG8kDtdthyY?@%BRaW70KDnCIHv0fT7g1iXh)Vm*JNgr+89`yfU(e zAbvdo`@W55KGs`oW<3)AapdlSkN=|?B%eS0;O7d-CRL0!sHDVveF0OHq{#WVTdQ-X zNPVLH#FJAvM*|D>y~lTlqI0`M+ujC;+m9{B9P{y)BoXrGLevM@NKGk(o=HuF11Dh! zVB0+3ufQCA2&N>pJfR7JCibI$`ySnJXlI~FOkgPiD!Y2W4;@wCn%z`hMFR&FomH;P?K|+$HW)6_0XRyUswkU76 z9iAmdMy(+a)aAXMz-4|%y0F0C50MA=VK$D~*hO$i>;Gu%K4$e>hpcB4Ag-n~@S|l^ zBI>d@ACFmwi&3h3cw%L;5clK1Rlj z+Qz(0OpMsXXa5iI$M6Xhz|B54>;y0PL_uh1ZtP{im0IrNQ7&5v+e0CEJ>>g(5Z=rY zZqqR|I*`EJ6`xmeH}6o43Ln1)sIcLwC-QPINtv54Ip2{xrFo}Vhu7`!zM>=w3tgPQ z{k@W#Ppd^jZ2p0H?b4)xT$uGAe`Mtg*?0{`o(dqIP?gh{KhBZY$8bB6RilMSy)&iR z#3K&zd*wTKq>w_l5JS^R8}L0OxF&Sa@!^q`h3vQM>CT2_qaM7V%e-W2wc8Xg=wVs# zVwtxf*ZT)Auo!d#u+JBBd?w4iR0w}j;0em*cd6yiFICK_*RahrJQsi^Bne&+aj@^oj@J3lmlb^xnk5glv--@^+4&=gX!%F^ z6>nfhY@LQF)yB`(H~VAPEYY=D)S7wZL7*Xe4!ORL^cNz7GZE|UScVX5D<<0t3$593 z3womy`44uYwT00(^#@14H-;(m#@2I}RcfCq^8-&DiZiTsJ>8@Pr7zkRM5k0l) ztA*pXJwis!A=yL}TU5MrE=KlSg6&+Z+1rrEZlQyojX5=qmsQ}GPc+&YfpjXUE3AmR zl;|ZWnEAnb^u26$y6H}GZfH`w2a`WNUEq_6do-7`FefX0M0f%bmgaJvun2GHgj6XI zXIOr3SCO=~d3mBa@$X$xB5bP&|22gWK~eUoB)QWO(G<)@(a}@V42lSh0HBQ_A=2?x z&^v!pgr#=wyz&(#dSP!}PHQ-EI=r5`-*iRwLmbiXq7H1iWpXcaC%yHZJANRu`~3vg z2nRFY-I#9EXy)kO=NM`NH4dJM)~L*09*^GYh>%WZzr*Iu{4afakoTmx$UUXnn^``4 zAa{RldJwu=mIAM!6PAVui*HXZNl!j|loR||Mgb~&Jq^4X23|Ytiv5vGVj=R@u~9;N zcBxUJjxw8p*h#~mTl;IzYlJ!xdGG{2Oz2TDO(j|U-VIA<6;T> zR5iX>H_2yVK*p(FbcN#1)Q7l%Is7$1MmMtW8Xcb#x=)MVcqwTEuJ{c$lmg!@?K5v| zG+9e{_O0>8kF(t1^wi<9>5@ZH)9ta~xXd>?o|d{tS88M=rLPyo?SM zA+@&P2iEX6*nPTNWUfKO*J$|M)7T!zHiuzYg9v7&e#JiZ;vg<2FeHtE3A>BhHD=6T zyj(MFnB#u)kjqWpLJI#uZI2T1oGb-QmC8J;>sZ+UnioWb=W8ABp>xmagxeIj>{?BN zdwK}7{NM5Hmk*F{l~GYN-1{}zArUfgZQH#pjp)SC|3%=xoz8GuLk_VC!*qxDN+v!u zgIEczZ2zEa3MFrE2wiVzMdF6j-%tA2`lJO_DT^AtHl#m;(|LmLqxI>&!x^MWbZ_Ef z#W57x71~+sW?U#f4%q16paJOlZ@8Z)ZwdA-1*8Gswtl8gg1HLXJvvg5oem4 zjy}%4@?_wMwGifbHChg6&+klpZhKu(3QR5m@^b)jt!Edmd>&i=@KTe{U^U}M53}*m zt5sMg1$+NV&Z4+H1~+j~2&#&~?P8QuW=y64G+Kk@@9Ht4fGTTX!QA9G5BwEC*rsG3 z{8_7X40}b2jUu92SqMLx5bk8@s6Y5}SyQ!8Wz)A{+`ndVBBrh0VF%b761^pSx9Prp)407!!#&;z3fufKa&)01+VV+=i8w@~vdx^;xJ7u- zLJZ&d3TO~#AF7OnD36W#j71LBq%*6hyP9tO81S3{7TwE2UIWNAK$C?mWaIA(3BkkT zJ0l1SM2t2Yj7`VhVq=+Nxb+%fl8J`qPyzqD8Z`;JQ!q_L)LYwq{%a<#kWw+izO5SJ zD@I-iZf5;L_|ovO9tFtTn;WV0yU65q)iDQ`Y2t1QyxmP9%^W-CD6BEpJ~@J0M#(Sx zc zXg2Cx`a`2JKku<;?$ytV(r3xl3q4JY>7De|1v%-03iQY0MAx#%7c(Ese8*@?A9w)R z`;~rm=ft`Lj5$R55S z#t#Vb?_VP$h)7=|^3%;f%$E$TA$x{l&TP1^-$ko;eJNGwLln5C5N0t4=TWdnA%r#& zJqTP6hQQDV*)LNP-=_Cj3JQB0+3Qb1-h@f zh6-*XJQfn>**FUbRAIpjX@r~Zga!6Ft4gqnNVp{(kmm6y(!bum#_hh0{Ny@VHe>&n z2cl_h@#BFxS0ZkhetN{sJ7_+vxI4c6%T#5-i=a7D?%t5Po@t^4W-6T~&*+>xnNnO%UiA^Tawx?AuR%5lkuh77lCMK|Q$IhorKng+lvSnxx1 z!ksy^HXVJ3_26jbvwI&BeF}x<1LO_|=yP5bP2-KVdKlyD8tYpec9SDaiL2o+Y77Gqz0g>T=7QHne zFJKdqa(^-ZiA97YB7D~n7uWDT!aVj|jOZ^yws_9Jk2FGhk@k}0baBwZ!V#BR5-~>J zCd_me&3{VMVUGb_y=zaSC;y(g%A1SiKf6)F9$K@kZr)-!d?%pxc zZ6VVg$bxTWAt7mb2Mew__wf)!SOl=z5HkLwOfCy>I*=$wLol3Tu8}jAYhjjBGNv8& z@+=Jf6@Wl(=MA?C1o}h_`kXeC08%?)|1~Ya|R_RL)rFs^WfT+&<@F zYn~(MjYF1eTJvTw&PaibtQ^KEQ`m~ua0>o>KWkaY*Yiv?WmkS)Rno%3)S9B{LdIhh zf+iN^n5z$D@4c0n{d$TZN7git$|o*&_(KpFiW>49jTDkhuA^h3Z!*KL<5+|ZB z`AmFfo^y#7pM9C?vc0aZu#i%w1}cu<#f1n@^X($JAcs zZ&Ht}6~!J6(5N4Q8E7!vNKx(b3M#?PSKrl}qNEg*1MAF$_jp>#GAvTo+khCXk&P-u zcSi|1CN3FLP>nMEnl?W3!}7)E%bYSp$M$u4kpZRTwP-K<8S3?9&&Kkuh9sf?7zon{ z13;wGA_Yeqq})FqA+w^3(<`f#6bX*yzrg$NdZ#aAJ_y7@axx?Q#;x0a_oIyt(4DbH z%o=qER!yC*qrrpx+m7$fzXLJa+|`d))T1&vR~=r^KV6Htb+GN>|0?-M!#E#r>e6Yu z>V1__hrp*N!x8=5>&BhZOC(-1U$>o}AMZOa2k{YjD>zC*+2@0oniRxmB z)PD!r*vPQ+6CGpS%cz+vnSTRtWBrt8^@XENEs%lx`-$2lts|ox*y9K`xAI=;3?WJM zxvw$(dYY=m*o;oGS|H~0sQm7}<)Zk!2UZRPZE8=16Gj_fDhqG=uwjHs=!Zaxq{+Nh zJSQE$FP`I5oftURRpGsB70kvLzl))y9|V-nB-DSmYdnO z<*L1p=%|XfqgKrvzJXKWUklW6?F|v;W$sJWy>%mcUGwDuIc#i~{j7JxXY(MYuA1JA z1Swr>Px(56fkarFd>SAFaMg}3lhFuv?L`^{JCOw;@+l=hb6={Px9zl3%AVd^H>$Dw zm-e0O{ZWePs=NTCPp<9jC?XgR&4&%_8Kx{9rNqT^FR#U?-)HL5kbJk^OYGiWjCwWZ41D5H} zxg_1_x>oLiWn73-j`F~quF6>vvItgP3LjU(L{SiGb@JH5f+DL8Hg9_+*ZgT+krl(y$wj|vgWy54EAqyo_G1+^NY8%Mz$za?IRaCA2N80znKKd9 z2b3>gWwbwEbqCrUZv;Kyo1ge~%wf2SkoGlAePw+j^v2t|T^h?E4uavh20-%}$|ne!NXBPr6}lz&#v$0Pn96qiBpex%<_E-B)H8uc;g!oI#2(kn~UAfCuIpo%5D9JnH8h#N#tU=0Q zUC51~5#OfSp)47bf19rVFbnajs*&se=cLAzG}DyHcy+AHb^7VOtv_bD>OBzOZfTZR5 zm0-u@U(H=jdgXBS>3SimbYC4knT|ZPe#|&it$Z&%4p6QG^@kbdZN_TEdze@9I104U zz1a*%rsFhP%6E%Dc`vaHg5|R7(lZN_@P1{Xa*G!o-FoDdMPk7GA4mzqfOTdZ0gc&( z=1w0{vY)!9+xC_)_#?dFf5j-A$dH_*qw-)8uCNX1qFQvk?+i?mcyM+!bi+S(%ab2E zRg5joj#nA7-nB~#NL}>*SPu9!_PGP7pvnkJ!X;z}-`rK6ZxYl(j~s+?9T`WS7geR% z3P3^>y->K;am!}vAK}^MGJ9GqHiOFdRsIax`Lt==p;hTzM+nej;BF=lM6G4k1$0Se z90Dg$#uZ>^Z_w5dGUBQ;46-WbaH@Cg0}QjZE*{H9WT zm#cgp;+qw;JVGP&Oq%Hks~HN_TmY^el7p+>hhz`>@a5UGLcOtN-}X9Cc_Or2zd$T6 zZ^Cc*DVl7K2V`H99X{7EPFRVBSfS8cyJ%Z+%3}M%BQg(d?gC)}%Ap{yEi!K@*zNy) zN{oM6WUq6jZxofoDw~)aXx1~u=Huj`!X#R8vVhmqb!@2Z%K7Ch*IvMIkFUtWK=`7( z>1W6U!%v|ivRt_Obf*_=uD?S?c3{o*WsA4@6`I{nBdQvjWL;g$)&pOz7TmL&KWn)& zw(zb6@$g%if0*W-zmX;BAz9&LASjbxxYZF!5v zp5>PvDu%;ruFJzI$4U94BYArbNm3_mQ?q;BeK%DlM(Wr8>OP79QT$9kZymXlh?-v% z4*p3st5aiu!b)b@3#!5-}i;c#>t^f3S<-rkjoR~2#@ zG-TD!?(i>;=i5%T?vSt{z)Vsn*cWS6@d)#EDOc8z4e zbsw3&Nm0<%jpJnTjuu=w{e$UY+$LvAmE3()TLdeN2|LMw75%a8lv2@|zt6_xPs+UY zP+xBm-y& z{^X3??+AFH5S5f)w96i8KX<`t3U0Jm1!IBsLKqHjAQM*Tm5FeWg0geR+s@p+L~DQz zLV9b3k#(qOa!WM5BWT!sx1bQmykk8Lx>1V9^6MjX(QaLLh?Ie?st}G0!(o8Frei1w zz>w=v&fZ3}InPy4j|GzoO$>lu#L!Up-xf0!FN*wgaRvP@e%zg zpxhemGzclJ7N)EY{ke<{DtR7k&c%&N$gT^9$|?O%S5{vN$yP#_*6aFBZuX_&v6})(B(JpXEvttuAaFjYfOZ{ z?sC#zpU}YQ>Ux3s#*_B~bZzqs4;lOaWcrv?S{|<9k||t2(S!W{c~0NF6lYF5NR%U% zs;v2IM}wq%kOcV3(P4qfMGh=bjk>6o&tr6fH^f{99flPHczdpzD0J)JKvyLopI^G; z7*v^63W)qRF3yR}4hfC0(dfBt>((88-wD#D9X#1`7we@ z%je0mY}wLiVjcWcEB?Y`RazB-s1IOh5Dh@Fr@~=jkg(hlwH6atVhvKu3{EiMI!vU}kkfFowRx9RJYX8hC^5}w*r5jK}f(g0- zfG6|AFh#U{m<#btnlvtL3MoehWL=)I^SMbh@qc>4;4kpza|+LpDA|9W3BNu2@HD2% z^OT9s0%{QkUn$bZ#3rXLXyreaGIuW2>1aLa`4J4W!1P^3=teR$#7O5!)dvM?&guRh zebI2ENI0woW>WXZAfPUlYiQqvEMZ?c$M=I^x@&@K3p|x6>Frmu5wB-&&q~RZ>!fwy zkGd{P4a>3ym1*GC-$zuoUDBxo5@pNMpBx{&87m%D_;uXTcm&W|FMOS%Ra>G+>uvL1 zgBuGt=)tE(8T?QIpSCVm8!Y@^Khj$O!nf(FD_MysH@XyRM&oN7ePbRjAFWX;GUa5T$M zKOWBZKU$M<^gDAh=e?og0QZli=y#rLgIPvnOAMvl$1nJ(jg*LFBy}p}@Yi{jRiC!C zC)&TArT<8T`5e+*B>OK=k;?gW43HZH2y=i8YnPE0S1b+#S7Nm$j`{OnpzML)0ieU$ znawHadS8QoTZ(qoA)Ho0QYO@^feM58w8}SImm!%;oSXJxax}=y9+A#0V zi=tLJST>}C35E;!p_NG4Ht562mm532y^jofhr)nJ8rNmLIC_eU*qAX}hpwhnKM&h! z1i+pYUBxa0cMww~^);Fpn9k}=6{_xlCNbJ15atpzf%?H1mFDk7#qkfKxdC(C|5*w5K(y{Js1pKIWIoRk10yCQ{~$C37)i=F^5X~u>w81o#8M?DNLmGUJ^=OiZ*APg_Sc@F+JGVO`$d57sZ;idy#K(J+HAC8IFPP?1tF+pHJ^ditdTK(Rm7pyBO_$Yb*r9R}rb@a+^ z|F7%h!@z(lLaJgkt+&WWtP z0b_C;>yWW)@1h4$scXp7a;R`(Q7jo1E?SEv2A?keiGL$9M*SNG{CDAhsqgIoRW!(k zey;uZGjuRhmza=ItWL>P|8XPu?JCDwYMNpCzmHv{GND7=T=8<*u66qS=SLdKXwar`6X&CyfB-nfLBD!cT@ zWjZfU`_!8Rb$yk_7S79Qm#JyK;1uGOH_^hnSZrn?R*_5;ZqPV%xG45wX}U;g3c_G0 zr7n4F=)m5E;oRd-($%Af^M`U@hNM5a6fNc=)jLfZ44k(*Ddn^2u*S$Myphs@cMZ}o z5%i%Ucmx9366w|!He0|6y^wIIlfLZP6WW_F0MqOY7R?O!7f$`&oqg7DN5qKnw)Z7< zS~FE&*>_#AL?e2q_X%e{j`;o_2Z5Db`&U+bO9p00XAOCzFdWx?_}<*|zeOQSVeNxi zddj~tYE2o=0lT~?S9uEFo5zVRKHIOgA7^eE7v95Jx=tPAxtyD0pglMMOx~6G86i(8 z0Jvz)E+Gs>cFGI}6%#sYK!lG-Tt-+NgvvMKS1`(9vKR^-H2%uL+dLxIKa9>dz>?Wd zG46}BhfQtews(9iC(#x`StWMIgF}wkykIHcq+NsnwdAfLTM$VaLzRK&Rq0E+caWuZ zAc^N6MN8%aGN!r8^UI2{$(!&PFWN(RHl22QEIWwWax(8p`Wu&h41wV@*_EL=2_0{w zaBZc@b(`%8#$G{YeGTsZK9sY#!$v zzZJEAd0d};Tpl%Rv#wTuYs307eJ|c-$w&D};)AG{-f0P_;!O=oG%*A=fd8oO9;Ina zq&!s%=9r2Q@u$JVZ@;?|Jjgmx8GTN@NVy~CYMR7K*gQP1Cgkl&xov=a&S|?Tsbs&Y z(9uIVP5AcfnJ&lv?jEJ!*i&v9QRfC!GwOO>iI&yY?{VR>$pI{MbNhGC&9A;v2-|)r z={&I@C%Ve}X5O7P?O%`fHtR2wV3igf$48bm#6lS8B7Bl@znpJ-0AbvGK_ZZ{g+U?#kA2|wukq}m!Igp?|fqO_20@e>6F4tq=`Kfo+HGO zv0Y+Cb>p32R&RgYM#D@mueVUiD;XwHgd|^h1P|Kq#kM=kJ&2r|h#AeF(kx>h-d?7? zpMg2^zh`HZdpYB-UxS1A>OX=xZlv>_^0w4iw1-~F6O=1lW$hIMJyXL2s>N|`RK_G1 zl&Le&H7wvL{`Hb?5;lU55wLj8Qs*zt$6&@{1}aHhh^16Q8YhpKZn;|(-rrg?6?-1g zu!m{sf0o&OAG!HG70vGc+|JH?Z7w?NZo{5Dk$PW}U;Fu{x!-Bkgej$Z!7G4f>RkH0(zmG{xQ8 z!a&`8&5p4{#{ZWB;rO8c++0V5VN(fb%6RHXvde40Tm7EA# zOXgoB@shx-iapwU@j36pMvlEzckVIxcE}jmq2LWMuh#Fv3C7f zvr7A)vlB|a`z6o;#7cl4QnI;JYht3(GofBDyQGFL-I!9a%ntQtO5uvrubKVjr%ZE7MI!X;We3Wcfj2A6L1D$fsUs zAq-t(727DWW?}Osx=JAF$RePx8w=vA7r-q`{#K7_YWtgRnjG&0qSQs~!9PhE6`B~Z zq@jbURToq$G>@&ul}D7NsPOW+G>l%{9C6R7m<{)BOCy?#bks3Q>BuZ9^GervQP%b4 zRQ4{Pi-Rzv-@EY=qX+Q`rT1X^H`A=Q>e#s)mLz=)SGAXqw6kTy6x*R|cP3znJKyyx zt7s>izooUs5hK!5uUuDOt*t7r-eBsl1fKetrm(&5*g{SGFwZJkUti8?uCFMuP>#9J z@GkPL^e}9s4ch?^Z#7WqoWV!@l4!nXPi{|>iPk5#U|C&!^DR2h?ZfD@-4Nrj*(CU% z7agXEc?gf$j=-*GM!HBLguU=z*{~@?D~LhZ4HW8=b+BPfuEq<9Z(q>TJ6^&kJtWJ- zMs?=pb<^)%8JwWko64kG(&_u&{B}tH{G;KF4{btDEp`eeV-p5btY8-kEdRwk`=>d3 zOg7=~v|?O}{qr=?_gG@&ev#bqUOIz`a1qX&1?Jp>Zn=kz4gL8$WHh3{V#o zb*D%Mzhuhvt`1n*z;Fl@Jj=9_PaSW38h}}lY%W7N2}Vixs|Y@vn`DNPz}bv9XIj8e zY#KpXkLZ4d@J$$>pgG_Bu7vl%(XD6&A5gu3r+?B*bB>`!lxPzr6eS786=pyZD-sS%O9$jzxQy=Yh>;piko7C-~AOy7NLRymD>dfey>j4x?0mjFIjpX4uMD+;&=KXW&W3oSo#2~5U@ z9FwDD8dY4tF#9So2V;r042qPaKE|;_!88GhHXxz=8lli7dUR)_EG6qWThrOlEXU#Q) zQtr^k8fb(5u73KoS?3fo00@h~^*>Y`-A-nx(m1AEz)3Kn_!UT9);0)nnNI_%M;F9B+})^Mbj`IBxpf-+TJcX={_UFQN700mh~`u#g!MU zrx+Z~Tm%$E4=y4V4BRLG0~xupxAs9E$Eu1mek4$j>3WnssrF>|M3o2F8XO4WDCv(Z z5Q$!_71J@pkh`r+jN887tvkki_c7*VC<1;LfYx_YntJrgznQi!F?StNcG8ny1k4X$ zyB*JVEHoB!jSE(_H(VvU2#Gd=tMPtBp=U((t5_8PAAEjNGDas% z0WZ>S!kqvlbrB8#KvYF(F30?pWLHPorl^taPK6HE5?ojUNCwJh8{Gclm(ca zgEGL(2bCe8hTgYb8$+y!sbJaHEWp;Eu31-TT>v3_nmP$1_$7eY12l#;q(;p)m;!JL z6q17knEq)4*QHKUF}d3fuu1=KABMflV$F%Mc3Uee9x1a zg%%5rG5gbILj*L(XN|YGn-{`Q&cg&uj{NNNNHtJCnZt{SH`Koagh5fCnc>!`qhmEI zOJ~EZ1EJ-2eFu>J5oWb7FmO5m2sjSbqcTc#1e-n>bobZ zO+KxQ9c%Uy7uetO@_%1INgO5_4_hQ2=sun83Ci_|&%WkPD^wiWPrq(aDRWZz`e6z+OwK3QG#XU<$_2;YIWsFG_mA9>ihsoVaK48?xkIEX(X0y)3!z1rod5b;t zTeOuco-HI1gG3oo7#q=Ift}yS0wBN;diLdkDme%9dB}(9%4qt{osaWIR=b5iohpku zZq2ArZ$lC~E@(W&RLo&u)&Sxa;NUtcPlZrR96cso%}xxmQkISZ&)&`v5e#)9A%f;R;A<0uLC2&$D{0UgP8w z7zGojsji%W*;`D+h^m<5co$Tk;~23vX)e`RHA&RFOuZLxp>r6wOIao&iD@P%Pi7GG zIXBZd<(5BO{sY-u(Wus5C+KU!)-(( zGH!In<+Do}NW`o_!Ftr`1=rf%#hU42?K*0-1YriBo!ACpzK+;i!#DXjZ?cE&h<(>?JmxdC^f!NmWbH%Yo{@6FQJ9 zb#4_wUuEi1AX~h(nS*TNST~L8>5Idje$aG*5CS^ZXMARv5gWNlN6OI!ob{btEPp{V ztB~w3=;H>Y%N*5Rbe%aMCcffqYJ;{jpg#Z!_~#vIX*Y37H^#CT)D!O)E59F!#&apNBcXT2vKk|5 zS6?c)ave6dxmb5`wZC$#$Xq>pIBAaHJxfnGII=%D81qfC`Mz_r8+d9z!|*kolrjU#c0Hf0*$aE3+wq}@E+to$Qi;lnmTG`0|qv$*w zn!5fren<8og8&Hv-h_>SVG4+N!!isdOc7DTMnnvXii&zOfQX6_(ITQv01=d0#8Diz zm!-II<7_}w9JOfGqU|qz^Zo_7CucwBc|KoY2%TmG9fp;&!l8irK1Kyf%Ef58IL|-A z_p`)VnlJzVm+EjoppRE(v>E}XwFiSUFlgXDnGLzi`=>=_CqDrWY@D=AIz2M@CUxNx z(kjuD|M|MLVU(`=#JaWa`4lyzu zXQWK*@6^Ih=%V?ynaZYLUb7z@|A-y@(|9rUd>4K9`pR8yaKLAfkpcxYnA;{f^Uu)B z@eTg`{4DOahMeBcA*IOV=sM zwc`P_Z71itlDG%5e~**z4J3TVZQWoevE;a`a#l!q00Htw!;v7^?s_3}0B2lBwHV47 z4(~o0PI2GF_#QqJxY_a56i(^2+15F9MUs(u@Y-1se18k=4{5~odrB{T z_jDsPYiLjKgLRbOzK4<5Cow%0A3m5Kj_w!<_-Ef2(_osXMvsDfr1uV8DIJf0 z?2g{*T7EZrLfx^rQ`9A#y2zae&Mse7E8X}i9Mn*l6a|ud_3>WY=%UYE z2KS*|4s*(%zAPR0wnXM&z-}lqM&2*MrT|rt^o4!+x~DxKn~8Lu$f+mmMgGpcH&+BL zG$g(BTYh`RKJC)(#+o|!xO%5VQ54PUtbcKj{=|W3#_v5jWqM%_i51))8&&i9$o}q> zo?6e|%D4CGY<;uscE50;TB7r7v}1!;_s;nA^7an}gGcWk5g`uzGTq56nDX^wM9-?- zb4~r*{`)T?4&7$$HmR(R_w8_^u#HzZMK1gM1UYNqmU{|w z+xAmV^zs%{N+UTglhRkbka2OPE~e=$@rQy5-RSOujfDV6aVf{u@$t9Y=+laakDWL_ zcJ-YW(e)f7J0Ew7kL!p2GWc9pm3wwyNtgDh!;Rl)ds)a7X!1r=4YS zsxuC%t}No*+awIaF!V z^6b5dg@RibzFc3tcW+wY21rA9D951PcKn#R|3gBK&Hayis$E2Kqop}*cQGmStz`f3 zGZG?TXybV{8T!HLjgkRyd%1^Xx1U_+cs8iMgHW67a-wy=Bt1|Hs$iyJFg|I30IcX%tc}e7gG1`=SnL9;-2@oO7lJy(+J^b6R8Y zwT_lbGyz@moSnCxXZHJhWIA{Scg5r{dCiG^;Q!wL zWxSM0=`dVb@J+-2`Q17=^!a2KoBJDFLt70=D3QuNAgZLsKsG)^t--W?;df?8Z*I^VbYEhp|O>t`x zq$xEQ&fHf&3T_tc4H|T>8{H9r3xj{#BcXZgAVHUqxK$A?K)NmnWAql)_@J;ze$H)? z`^-J3xN;!5!MCt8PARaxT!Od>dyExMev1yI<=fuVx-9U|d3ob~!Q6*YEmMbS%X=1oUfCNhziVNjZVc3!CPs8S)9Xe2EUtZ zaw-8LMNL6&L8HV(?HK(}P(t)pfDEaeIHLy}YcQLq+~^`nrgbzygP8ay=7wL7@NM^@ zxIDehJEB8P=X}fXH&3!2PB!3_8LqnSwp)5^1CM?W7G2IMSH4 zExA5CdL=O6?yttPS6{VT4y=YG&lmb>$lx-W*@Aamh=Fs{|n{ z>%ZpsAXPcrK8<2FZ~{VuT9{AnxDlLml@am%EU7k^HTJ3NT}&sr;_UXppf@oIC13W~sTmf1 zMf z4rBwcHS)?iMHD$C33vBMJm3=B$Ky|`|Bz3@LG`1Q5WTOH#|cf`(>`mTy`V!YSlunf zJuQa%oDTJ-5a!j9!_dhuV|8=lKg8!Or}U*+CTC3_m&Z@c-a|2tXEh9Ndwk-QrK!mM z0B|Y4Aggl+h@>Mhrv zTuHd-wY}XA_Lo~Xh;e0RQo?;++|2z|5C2JAHONWksfH5LFu9=n&U48+0OQj;# z5$&bBEd~`oSVVTgl$ZSgiZrN?tLV`&$;GxiGB^P8hx6)ToB^pWNt%Vo#A#=Kv` zjZ>aDs+kiLHl3pUQ+|2}C4ODw4~u%4cbE1zeSU3g`ncnx)cA-~KU@Mn7jol`5=K~8 z3C;VhSD-k2kI~2|K*KLpvm@P746Dk5V4LJg^j@wWav=N*Pe;OW^FfB+fD;4_666r4 zI^+H~ggAk^r<~BPmcdp5ZuqE0iqk0fo>2XKT13U#>7vweB7Yd?Hl5HG)I+2j3c_nP z0s=uO@wS4*;>l7>&0Fi!ISOoAbjr?%#bcZ!J+RP0!hL#-ICPv_L1vkMD+A|vXK5aw z78wH^I|=DKYTHS~BaQ7POq(VHX{wL34mcXw)#2T76f86V+C{CWL|?{z0)EVd=O zzrPLJo8Ly`&HVWF^**~l_iY5I`w;3zG|;H$KG2iS?4W4%)H9y6CNb@d{MxvYTW4U^ zUtxchv!>*>t49SEqXIi6>Fsi2_ROzx1n#sTWuSj2A@EH$3F;M?FqnakTZ|#KESv0` zx=COa5`GiO{>8431)p%rcqgLOq$lB=eR@{0ffYJ#(JbMtd+C&fx$VPTPyH-JOw6(m z{SnrnRjcH4W3RQ@K{SbT;thf#qeY{T`vJQ%^3J00D9i(BH@``z_qZ|E1YslPi;0%{~M)uUZH(LX=x}i z7d5w;nJWJ~-bmoDP7cAD1&Bp^FFOPkxgo#UA+Sl$Yli3)<&r8q4GD*$Az%wdVy%H3 znnfPX;~ryxc}N`XqcotlJ@3ewB@h7x#JF`;ENFTk0lMI~eWBML23nj%ApKdepqKO3 z$N~&3`A7E|24WdTW%ruAuF`hZ&8 ztNicc0^p}Uj#%&cxgR_#=avodTL>SDj2%wV*$s(tUswBlO(M>Yrc|K5B}Uu-yonhd zllfINIWPKWX7q~uz;&1GHprG^Msk`b^(oH2fY5N^-C=00U_YPS#4j^{{$Za$T*fA|PvuPHi z;V-fyX7P?TJ7qh(?+x=m7Fq9O`V{G?u3u^--pqX*=^4-&5Y4eX-m{auwQKPSg8goM z-hXmVGe8Ud%VzkCO_FnJd6s44DRz4=@41}zR8Jezug9~A4)chac!F3?%s2Pv{rFcM za1c%_5nh#nyvUzfX~zh*czironu||02;# z*YEE2xHJ<(B5iW|Y$I&v^P*CDq(tN1xAPWU{1{Li{qan|yel7^b7B|gkvK|ImE<#%hm|xHret*I278((6+nVg!vFZ0Y+<-odz+*(;$z24 zcI}O8@)7G+7B&X>t_J!u*~D0U!HCBKspmfaNuO2Q3bu6F{XIthmQC%8xpon=F3PeA zvT-mf?E@ve-1Zn|@Bb$Ctgm`n8%AE&QW}Jld`FR0k?gY zFpb&|Je;aJ&PM+{qT^`rg&7K)6-&uZArbRDve~QaG~xiU=2XlM$}Wt zO}hs1QxA#tM3ID(jxCHSP~F0)@l5txjQRi}#R5l+{!lm$78}_EqS(;G-cjt{9j9lW z(dEMB4A*@$KRby7A?T<15ZKu&$Il8TNB z6cVfW0oy4ryAoyhA)&n!Xfr&@8JacYF=*og+V>eOo=fa}vusTR>uoO>nt#gT{%f>; zg>+CPG#4fNE5^3SBGxsMj$<@`C4A~(_@DJ`N)|0v@)?l8fkiu_$xx&b&}!1_oIxIT zv0Scf9O9S&))th5ZEzj&g)Y35bc0v!fcB3Kf-*hVLvsCkGuRefJ4fN|NIhV=qgQ-I zyanHLs>Y}Ok7Lp+#AR{rX>0+Y{5Ghf=i6N8LI(U&j258$OhezH8y-bne5 zyj%`vIZdA&F%uZrwC$TJWR6!VDPoIA`HI%65=gDzYptthd0YXeyl6 zrcW69CxJFdDtdS}YaNmFui-Vu{(3lXm5^TZSYX#n2%1-xxu~Ve5(=vcvqCu@O5S78 z{@?HR&q4bMoFf_~@o;Fejj#^ohz$1c!2!urhqf$|$iOAJ5JU)xqU4o;-XX-ojdHT> z`G`$O_s0h_2aF%sj;l5!6+^E{3;d%_?9NR`O5f$4Jc0&xhaQjq$7gT;;4o+PedeKs zowQRJ?+ii zkeq~84oaR3vx4F|g_dtI{LK^686zzWA)P^6c9RHh$6)^~A}XiI>~ntn z=PdOp9422&d}f%dC)+(H<>Cpvd8B=th@JA?KGy^cA@OSSUc-vH4P-b8CnA}1lfg?_ zcJ}c{FZ!ckuAKP^v9QZx4}*^-{|eU0)2QcHg&5f`N*=?r?)d(PAyI^_`s{OO#b57S zv21yMg`AtjBez{3bf=UqesS{QqEqUic?++X{&t}ka--7>^h22C#;iF{|FujvScmPg z)}zzoK%1`sAq0!d+lRk-e*b`~CbO6o*SL zel^MM@%7K)hVhLn0YZb*gMh|v|8p@W0@dx>-6ixlaf_PdNSzv}&kAW2#v zeeMUk*|j?1WG5NtI(9s`;>b@DyMbQYPJrbhA&2zv6h&_wK$|k5-GIcVbJSj8jwZ7& zU+#oI_}et(dJE2ZS6gioJrUD#KF+#yWl*1^wsY&@SAOZ;lCLb*XMv-uevt6*dwKM7P z+}5%;SJ%3wcxR@Lr8EkSQooq>-}$j$u0I&e9C;r6Lg}_hv!CtK8RYN&u--Pb=-$Zf z95Txxe8Xq)WB3XSLF@gANhB@51FaCBe(Sj6!L;BKW1$izZ|BBdO-d9UUjFEGtj>S! z_SVb09Z{Wl+w31Mb6n=uPRth_F@sSf_1E9Ntoh;CpmxLUnxye~;bK*2k4za{b<1i- z=140eiP19y%mupXu2^_CTv~c++!}fj5 z=rhGgH<6%a-CK$Ap}wl>_xI=id*-9Cs{RjYaT$6Cgh_Pg^dSXDbF*bmN7Lc4(n&o| zvg9t%0!nJFKP+{=igr_`SKEoC>XSxM<+R-sa?s&MEBA_MtFe1$z}<;#=B(H0Nf8cL zv;ab|D4!hiI(vOIIbrq)epeRY@FjwUihEO15`BoRDPi%4_p&mdVdD!uyD(^R(do4d z>!l-k50@%#I6vg9{)i9AqMg1(Pd|cwlJE`&rNw2R{v00X)uQMg5L7)~t!4V0!UfFe z`*Jle1@|E`&!6prm^1WW``C6tGQb3~cyc8vI>?v8a_egrv* z{M#Cip^c{Ev~o+M)-9ABspJIao){#T6@GkV7oB;cnv?uqImpww?N$<;>*Xr?%n|vZ z)8_WE9bWNnI+=v(BbLCF8=7jrn{LUmcALv~U-Nu(^uTsbtY@eQ3cUmNhS z_%1W#meJ5>f1?%YC6G3uLtZ_@J`o9$5q1hY@*`5pTJ1$_O_n7F+)qtSec5jj{lU1G zp1m_QZAt9s;j~4i&xiw@(i@&OmNCoUEW8=Y?>)jv{Va@eKfFa)<91Nd7j7NKf?B*iZD24ee)f5ajXjUZM^EW3^*IyTT1@+8Cl?R^2<= z#MxB#QE9Cj86Ff~vCodQ(MYqVtQXnU4O&%R7ZQbQDs;r37v*yLt8G3HJA@smpZodvUcDyA4g z?UR_&{zexdA#nZ01Yz(WSpf-i7tN_p9DCREB6*tM{S&&#qP0$S-k#ean&PLY4{8<8 zYZtNtXGGa>Gu|FP>3C003O&}#HHSqD?0PB7D1#o6w+SLPm^wem!RclfIb?Y@asMd7 zEW%8u9b`(9kDHJjV(hfIfWa&ZWOX$SgE9eFJOMjBKLarsqo9NI0C_^8JwLyhz6lw0 zI3uQd0W%hE8d5Qj%a1)PFCaeuSZ~#lGdTaCc#a*WWH_-whUXiiw-oaDj?oMB;Z$n6 zHry7~Y21v2?PlkvwX!Bfr{bM&sf=2;o6hR&<(ggA9OQO2V*s?7m?Q=Ee3X`%6$)E% zgkOftlH-ZPf`LJYif*-SXmNH_a4hRP*Xc;Tg!;zuPHIiP`nb_Z@(dEt>zcdI8E@oK2FGUgL$nzm%O)}+rDBPBE(}F0Z*=yMv5%fM$QYmz_BPPhwaa#M^HTW}UTi^9^5T5U!fYxrGtR>E+5jBf_)AoHPQfIu| zo>zyfAUi#l7jrvt+qcJaw+r*!wosQPEUh5eU(X|jNL4(S-9*?|XmMB^xprL+?8Q^@ z(#2686XN-T&nA?%wgE2uc50gBwpP<1{J^#~y0Owv!D}cQPMH1{{leED?b4ND!P>{$ z*7@I9aj1Rcy-tVS77MFw_Tuy;@oipv^eumO2c-&)7T|h;QbzPXG)f}dIn`1YVeyuF zKrm9DS*j(t8)ws(49%j>(?$wn^#J`I&hS-e0{cGx{@+GNmRPJ5)a_R9H_NeWktlD< z=&v$Uu3H#?dTmp8nor^NoK^p8NSla#_%k*|I7bNI=2exwtoD|h*Yo{Li(1vi1qaB| zo<8AMvw^ZFi@K(}mU%{2?Ooir;i2ZU z>9oDBAGyeYt%7i{=1yTjl3L(hGe2PEE4O*~pC%5^=biOLOKA80dV1o2|NK64M9C3Ry2p zz7t$zqqo@8Gw7Jji|sx)Rg~G+@U31(GKb=&UH^7z2gmj;Gu6cn*FJiD_TpN5`2li> zTxBt_MdLn|#a#>#oThMc65oGzkWk|xQdUYYoF^R~?d63(Iqgs2Lyt#>9FDgVu~+ZkFt zZMANPuNz8W>j3bTLc5qZHqLpzcG2a97mswUc7HRC`{>aRa1a}lmqMa5>)OU`=5bcy zyo|t4*#XG<5IOw#fn~_>$})s1R**U{`bjxOmq_SKjMTBo>67Gklp@c8Q-I((?7Y1T z13au@-s=QAEYAYV^s6VRvET!hbGW*U>h1*|0BmQppKg@VofBQ}MQ~3qSgf)2QH1BO zbNke`_%}nsz{`Z0Xh?~SmN7|o+f9C$Y{^srzW0IOUrsjxz;ccrRLUvuaVD~wc}~on z`b=KQ1}`nNwA54Q{mt-hW+s2J*o?q$E?016z~p-RZn2VM9V>j19TXlG+r^tJE|w$} zD_+9Eawm{)?bhYw@nGR*;Yyk`Yt%8FHDaj>hka%9H=ZD^(b4P`gg7H{X&1E#p`FBu z35eaA+k`ABWws(b3CIXE3Xb#P?=D2|AlR=Vu)Q>oh=9{7!GKB1F1hWpQ=8V@oo0^v zHs)=Y=fmP*(3$5TPYZuj=y2GO@Mrr1T-&M`8AGdS;M6G;FHcrgM6M_DgzKV8EH`{ z+qGRb-wYsY8g}_rMn8?xn`DGrw+WjSbWNA!aDe0aQPr;o*?A>l$2?8@Xa?VCE2;!p zGBX}xhwpHy>7ILZA;AxTquH`_{D){cL2OK)E^hFX1(1ljFGs=TJK!)Ejz%f807{%3 z-eM#vPC~&(=-+a}VT`y*7p>Mog+Xh`i*&RrfJhH;jO6z&6f2d3jOW8noL``9VpN)$tUk zgCpCrlrV7!s<4Wu4P`H$#XDym8!{=G?!;?yR^InVrN+wnGQzSh>bXh!IU|2dB>jMl z5UC)P=xAFJc&6O#f)IXpui-+Lz#lTWw%XdH0+2khs09i{DV5m1X+PMeq`=9ThZ!px z$8I=%XV$gbgiS^;5T7$$Oii5xH;IF{h{=9>;+9D;03#kwBz_+#r@F#<4+Kdq1))Yt z2|_+>WYl6{2z&?V$#13IIsqwb?AtG7`eQwI@1aYEG3%!hxhK+(QoOQ zZ;TYNoVv2BQGO3d5mV2gv|SBr!-eTqf5D-U)59366s6UFui@2J*0U7Qx=N@|b7*1m z$~RXmXX{-*T!AOwSnfYJEdV2=%4qX}y)8!iMWe+<+{~k+otorFiQ!Od>a=lz>{Wop z<;Bx=_3N_pdsE#k#DH~IRDeJAcLU_0gAztSwVpVgO}Rp={;i+ft|D4$nwNbAmysMC zu&CrRO_E#f9qoR_8uyl{aH*J*tWUoIP+ib6Kk{rE2>!gL@!<S;G7MQi`T{N(RibJdq^=i5y&7tR(2Mv+D(9+euD7?!V4na3GB`;eoBP>PfEi~3#I;**h zdJ3UMP73r2&pElUUmx6dJMsNJLEG(9)YtiT!*KC8K%hwU>mt_;!)6A{KlcQ`egt1i z?1_1Vy9WD$f6UovaJrZ%^wL$=$;idJvl^qh&j~*$ z$j)NR_tk{pEPAMAtjP2Nv%LmywIyIR2OmH3DIxln-nW`H3=!P2U!OgHW7@%)2hQuX z^(p;}0v5xE*KDt1Qx+h5=g?Un^lO&k#FHpJ0VR|Gw8L`ZA=N6gmrC|z!MjtdDxf6} zSmuRTz$R5=h9EX9wv}Fi+p2#OPlA4^y#UBB+y0XP`{}WAngg#`9jW>@>b0VaQf?%zVv|?Mz0Fc# zt)Aj4PHw%%-k_&o7+GValuwfX%b=FhKo0>Zk)PlHUGnIX<;FWJY+4R34`@gGvK-^O zvl1~=^He*l;l#s#x<3SGa4sGCIVi*T{3>6xV@?-Ak#05u(>ujx+$3%3BysHjF|mY3 zF)?s2k@0xi*@gLeq%+BDe*L$qphE&%qGmwLrlTjwTL4NLM&5vt_p>uLZJ#DKS_&q6 zpIrq%2NP$=$kHxoQ9rl=CEMYo4Q#?HJ;fb|F|lneo8SwOH{f7kH5gk3-un{8)Im2d zxp~N_Hvrh2i8%aiHI%5MbOHYpf_amaT7a@L#~VWjYUN}u&NjbV2oWfFn6%zV`tt&z zq>B{aMaq{!_x~E;$l%m!`aH2R&CyqI>_jls9@^~F7kI%gr}+6Blhx3%3xQ+Z0o<%m zS;yJ94xib-PA6Qh6v5%Lkgg%vzLc`Q^XQK_F$*VNn4%t#(T}Kz+l_}ut_mu`;e>Lx z2FcY>9U&C|seL!3&_jDW9V(`gl`@AUF?oZWINJzG^^lo*Sw1XQDBK(ez$887JA8%J zMJmR@Sd_T-D#_9a#mb=HW?6a&?O+TyDI<$G7-#z!}4$ zTXJG^D$IYE{cAq2_Zlx=Y&EwpcBQE@?Ae>Fr9PKDWgWxET;B__Tz|RrL2%M{aY%_L z>@#`k(h@jEN34_)5)j&Hoc5#q*&)p3)Fg2+n_Q)*Z4yWM=(9CRc@0+<#Vb~&l0s%+ zdDbrz#4_?e1;I=JRn@uuaVo-^o#8TU4sq{f0>Crhhjsv9CkA=C8w8`*ate^&0K8%V zfGUqHuYiVAEp$K#0x}l>wP%d&M6a}N8B0?vAH$_g~(UrC@LewVkr zI|5)j#$FxGeA!YWOpb8Hdsom)w)Pf6ZXzmYvz4L4E9OqZncOF@Thb_AR{s?2kvGBe z+tBGUNc!lIsmUt#5BK#h>wo>ZykqIoUoUyNefa10;ojiY01ZL!T)^p_Mp`FI|J6v} zHA=#Cv@;4yJl)R1k>RMH1o;7*)Qh?Nz5rKlooI!KeacK%{ zExVF_#>kXcJP!x!p8^hHZ>gm~KxtLS2Y`8~im3pI9#hE>mPa;*&i=K+y!jX ziRssW(X}22LW~q{qSP8eYIX3`^@^iZutv`)kkJk+2y0s@@%loyX1Rxg(rRRsAQ}FO z^>JGQ=E~Oy97)}CUdqXT_PrX<)~$3p02BGvA3oN&tnaYeS$~%P2${}ox}woF?8fzKwuK&;JIn}q$AimkT&_aPhq@c#KWsmdv5$A^cT ze$BFYWxM2+@3Gqj$6u{H1mC7OtP>Ga#gr{)iAxY#*@D+;Z-@&_!-NqN3rHlrs_pd$wJn|tnxa;>E~;ZoR9cFQ#r*KR!&SAm{_7@;iFc}N9p-KXz)um3 zYrbgq&H6>r+ZqncTJqMozyCGWPLek~ENq!bY>C-^N0%6JDY-4-aO_XJbD}NyK4Has zeFr%Xo}1AQuJh4kSFudXUk=UN?VECVylleQ3zIEO9-mfE=ob=~SZ$r%IecKOS547OJ(81aPfp-3dM!f6|iQwWXyRlKCe%KZBTig7`Q);N? ziNB=|^Vbk95r4K3V4V(RR?K3DDa}IP#THfpV)uPlZb7_k#^Asr|AG=xC}sr02^_Lj zRL<&4!abI@_0H4md&80)gj3<1t5jE?>>?S%Wnjv%WBQg! z3B!J1ayi2u6UA29qrB3iM`~=3c62m19<3DQ>$0l&#Xv7?TRUBg zr_k+w-?ap*Or%#tVx{z~=(3nJ(TcmQn%~C}1<*+)*+Q-#+Zph@q1PuRqz6^?H*7*x zvmM?TSo1<&peoXY-5;mONe3{DsCu#HmNEO}eAyiya(riDirXK+t769qi=xSLL`CId zM(p(=wfx+Cfia~zcdQHDpKQ`s*{&A(^ePGOR~QlE2komJN%dLToXnQQ0VOvQLkUz1 z;mds?JC0^K)fI`i;nOoVy7x7?6uY|=PV1EQ9TeY7+hJ%JGemNe8MB`9bKQQ3vd$U4 zO7Z}<%BVp+Qi(i9l*=<4kG39r(h-woWi=+KS8-#-+iRfS0a zp8wLe90%z}?JEm1z-L0F#;Ti1afV|h$&}U7OFJE}mTIU`jb?BxS3mpD3!krQqxZ+& z3>sxbi4t2Y)wDVQo)>@O@Ge2^%TcE~)3jj}w#n4Bk!=X!;XV%W6(8+zX6eyOtN05& z__^&DR$p6!87i$0eMg_!w)Z%4XEA&RYi1>ZgQSba?PL4m3mbCSDTvSDt{KJYqe?aF zqP)^pXT;SGM=`iIq(L^une|HH!VL+-`>YGf6{s(@+kEM8b}H@HTGVRW$h3*plS&ii zUW!-19gMUwKwp}_*Wad*BU=6>cG?gqC^o<_=nsI*p?iup$J4~(STjsX;_R8Vt#HD; z%N9mBUzc}$UU?Lux`CAo6c91pd|W@pir(@ClRSlN%4F6|(;9BxH-utUbY&EpWuQE2 zWian#2`Yvnrjt%E@{w%8p-(+WdT^qzESu|kd4O*&uHu{COKS~^5}eYno-Ver8e?yB zCX89H(W*VrQkC_99*$`kKnlggkkCHPvz|f3EX`dgH-dblBPkmp&KSmEpYj|BL{dq| zQDRIN(Oae@I_myk-2+H==7iy7B5__1Om%PA$ zZsh`A1KL7C%z=Be-U}c_@{_itNG1g5J|?h{CQ6EolZOmBt1<9*EuLga0SPX~8AP-0 zbKeuZU-5EA9CysxbO4loZG)#%CRWXrg`ZqE72$lr?+{Cs#@<$*Q1Mu~E$K=x_wTc} zoQpoVzf>vd&FS8rX}4<*`aPuGJ537K-DJm@!-lb0s?aSs>Bf+TM@g)i!w#n}Y*#Wp zFl9g%23xsdv>h@ci-+#AYwvf|DU^&d6pnGraT2{@1ZUoXIOy`_awEmbbJY4K27`88 zK*+%(iGIgkrmRf9AN_JB*lAMb4a@s|lem{5Vi!vE&?}jl$Shw+JxK0p$|<^wQ1vU0 zW#qM|C3+DKH9u}k??O3+YY4Q-R7oTPpga65=0ED)`u)r-2miQk->iZKZYd;}>$&0K z1Fm-O71g&~#w^zBYQ~}Ox3+qh)*L%5vI;mNtMo`Brg}c)JYa8g^pw=;WKoE(OerkK z4T0%u-Y3+ofs1%+#HuAh3Z(_o+qy0WvhKN~W|@7D8fhP|NT+e(_b!>Q81Oft54eo` zP(<9049TU7Ng;BmMY+oKe#L>$hgpGih5@3n@iT4to1Z*EIp>U(Gs_M13~@N|!3>N{ zL64J1O!X@RE3u;Hgnpu-QPKHmAv z{YY40^+&Ve_4;ttt=hDq_5WWSYvR^j=<^Wyev@R`lW`9kk3`!EX36DGB*>__HzK`L zZWBcqK$tozarlH+&C`o)R#*%=6tmS2{t#L*O8OlBRcPbehq;06{p*~4v_T``+@n)! z4&!5J^Kx8-_}v(8Dq8ma!SajWw*NTOt7)#9u{Q6wiUmas)O>f>Q!9WU7u44NRv>S>5IzUt=fi_9-(Sp#z=qUX&>f_3oq58)-z5~jX$s5xEdd(; z^T+Q40K~%q2SxbNv*ws^&?x=w`jET-{6X}>fCqDaxu?*#%s-oPx4(z~EDMY>Uuk(0|CUz2cS|M70tPcgquP4}_L>5g zB+Xz=s^_$;JR2mX8`blAh#u{#Su&*nFbgh~C=QKxS1y-U4)Bjg9drKHEcoZ@*_bE6 z59Ixww)GEt#3OouG+_yeF`5?II(x3GLiKq&bYxA4Yfu1jg^DTfU=;D-w{w@gIQG-K zV_l9u6!vT?J8%aZT(%#hl&j$;!s$5ILvod1Q=nbr`Lq3==PcEM{N_O5r;-&K{2x5F zosiQ*a9+_}IwSPT;$B`^=(5aT@Zsk2I90H|*KHznnVAyyUXwUX^hQ*3rr;!`QutIo zN6v-h43-%3Ite-rK~6ew)U>)MY%#$qU7b{3H7^JX#t511q{WzW?;gOn0(f(B<^iAn zfD1FXwE7=B)^(p^rkY|j1WFvyw8s7RWkmo#Na-T&nvbbw8ezhw(}&;q6W{XMZVy@H z3>2NC#MvE)_w@8XSt*tKn2QXa`3p=ej-i1nJFaMY#-Q-rk=Z!xca0E>YhtCO94Sff zPjVGq@I2b&aYy7Fa)Fzow2@x0kzeI0dTq*Mt%g8LGiy`d<29vPB#mBgp7!O{i!jw9 ze&tM^8cipRXPnH`9+E{cCeC5>DQs4Q>U_G=1Babus#r|fvYI3v!B~Y*N&_jcK^gbG z;l0l;)2!TM$LeNwZalTuXXhurcH!BXqwNvzj`frH=e&)CnX;iwpwbLrV+NUzOvhX% zp~OkmmKQL41Oow!*A1tmK6hPzKi3SaX0w%S?Z7J9%@rZL0vf`TjN7aiYS#i3^P8E6%UsVnRfaCZy*FVLM=m#3KE|6Q+254E8o1 z3kk)#eTQ0;=Pr>~qg@zR%i^LpnEZgm^pLikj*KE(b=Pk@a|d>gpPS#&w(@FH*1P#} zjPvg0=5VpjY&jM-A(Puo21pNlSN^FBs(`Y zw|d5w?Gd$gHg}XIOnr`XFtdQPD(?p3t-mUl0BUb7^!cC4R0<(QMohw0b8waCqK)yITJ_8-N1>=yUM&*pokx z4AuW@xDDeMN#^6h6tqAqb=4EM7FIKHAl^>N70);G2}CBNW^2T05g(%rC$jFkbUu-q ziTS2~ZY&$U!-Zws#k^W@C6@jvl^d1oa=dM*wJ8^c`yVgXQD zG@(A3aW>|5ie<=t?C)-dY}3yB>4{r*0A&5`TziZOJlt1F-a18MnMoCR z+TZ|2{jN$xhvpZmAVmLGckJlh8zu`n$U5fYnfKK5F`E?)|y- zdmlb0SUZ#xGJ8yw&I-J2>|x^dPH6ubBtz|6sGe|Yo-KwW^@Qc=BUO3NHcPXepM$pT z&s?_99#_^pn$!L9`jQ!qvF?qr>lQbc9|PUR;mDiNh4RX@NkZCi?~+OLcqXhbBaPc? z4xUzdq{CjG#Ce$7JqU8wsXcXs0x>a8R+W##_Gsl7rY2}4rJ#W%9mY=D_J2v5X$Vg= z^kEDwW(Kg*r65a(CCvu~IOZIyw4bn>Za%^fc>0yAedVxom)fsTCD5jQ{)@O3BTL$y zET{N;V?mx9$Y{q>Co~BYzgUz5H&OuZuykH)(vtIcRwiXx)n=0LNKTh>ULk3*DP3J( zal&`v;no|TXHUAOUU&6FeDh`GO~YiTwCwdI*=_C5Mt0I3?WH|(PIz`&WHa%srFfP= z+dF4SW%d!@y4LIWK$5}V9=)1WAT#5+Vc|E-ln&49df|+59EaV^?$d$>f`<;CWo|&b zreH!nmlBaOLP`~@E@Umw>i}(&RRJ<+7N%~r$UuA4k}37va*b3$khT{}F-GIj-{>s9MDD3y5p8Jf2IkfIbC=@2|`NIiQ(?MKmw6!On^ zD-&feXwn@F;2n23&AJDesVBM_XE}CVCe#4jc9(=l$1XkNE$hGIS@}-ay3qQ=4Q{&9 z-M@0xhZTn=6p0r+ORYz4N4;NjMja}yD#yvym8N2w-$9ps#n(!*U2DzLc!~4X_d8OX zw>)-j89%b|iQ2XS3QMm{mETyzu1sYU!$x*HmXdmq%9tPx|0|3d-KAQBPa|o`aMj{r zC{uRXu^r@30KeT!Nt*cS?>~m;dl8cqYozTOuQE;S)bh8)S$o>xSvvLpXedvqd>ALO zlz|KigDO*UwLp)LlB>WN>3b*?C1V62*A6A7CrJv$;lHOvKLq3#0g0aIJ*DwR&cJ99vW2{;pr15M3gLR+gAjIO~s?7F3!h49x?dn-tLhzJ%ki$vo zgcP7Ms(}>TpuF`4d+|UvzBstK*O@*!^KKV#cNlxhZ@d%4oPBoBn0VK&Wyj8>%6nd_ z=Nk!bL#x~-%)9-Ju@Kv)LN%iTgR0S>qI5I zw$2AyNs4%{q(Yd4oVRs8$Vw%ZtfM4^5aPXcpd7LiLRb{>&ij*d$lkx#_xE47{nc*Q zb-Q+5kH_PAe{9)Vfk$;vWO6EDU}u7D^YlRNnv9#thmI2sI%8}y(B3?hNo8hmf40`9 z66KI0vytRk$7C%Qzk2u6ZAp7haZZW@exo-#YupJEjG_dU&`5RG@)>DR%&MSD>0%hO zJ};~Mq2zmM=x^O)+K44 zylKP%^_Qo2$d1UfmeShm!<{>4-k&(IDKhR=(c2d_+KfXhNf}w}xuna#cYJ$;kKVZu5TgbKH0rXvbkmtzui*#fjkTt_yUw>>3nw|h%Skzs)LE%n)jcftz$ zj502gd{C2gjm8`Xgc9EZydlrCd`2Y8#7A>P_n!fRtTQo|o)WcUVs+f zS)O=f3kag2?A^zElJ&|Wsslbf>q8=;L=$(GlWU0Bz&RqvCq==|LeXP6_ts0i+>&S< zpMx*wym}$E<+2mxZL6}Nz~*IFhw9$sl5Ma8T0jTIZc-Zkx>5OB`? z@BM=(d=C!hEUliusYOVj_Xbco3vWK0brJ{8$}j|D7S}hohvhW(Kj3qYx{Mt2cC_@3 zO{sn4sj>+4fJIK@Ym$>Zs2kE ze6A37DYU&d_qvcP^5Of@{P%J0I5mjJ{S92AV9CnZY>6^%Yqyc@krU<7Pm(S2c&OSv zpS`-FteUg|K`ioMM zGpT&Z6<^(nrd744UkTBa)(-J~IF|DqlYu zTJZ06XK2a4o$FS_EY(<|X+f}^j@Z*8({do_Of`p zV(?v?xS6_aoH%>Ov6L-?lg%mH3fhk)TZGLtC!d=15^O4k6KHW!vU|s&sS9Kw^yV^>Rw`NQZ*!S#mf`-ugnGW#+ozSui zw*mdVYL!XGS8*>gT%Jj^E3p~}ai;9}Ox?^d0ghyx!tSwTMY#}IEKj8hYp;G0h&9>= zCxFv8P0nH7SopkwL{jagUw^OpdAI!Tf&=BZt*yt#5nNs85XssuPoEFw_^Waa;^6UK z+lGwk!iE3hdj_3t4xS${2w^wmugWiE(OVfSbFnYW9x&B9c)rqi|eYG_j*E&>|1d=h>6Lzk4ppyBIeDR-B`CBc1t$~ zD5`4fl2E(bv)mZ`>Srv@g7xY{4Qze8HywOR@~@N@lTN2`_*iL9&|p}sb9r>*_jPh&H_4yed(WM#Jed_`=gI}JJ+N?U8zx%o_hLX znQ~vZJ3?k#$F{X5k~hfAn0l#XysldAtEX)Nbf~GQ^#qWkGB|nc#l2;(^-lcyuC+oj zirA|({*-eh3B91>#ZKtBXe21W;;GNhSh4$+TtBM~drns^9N!Rr zk|RZ~N`#G8>*fc|duC(M)KVoV@GO;OntoM0+@WbGTfC##^4dJ@NYnv)os~ibRsLb* z!liokxUMZqWpo(&(`pn28{97%T{eW6zu;W-gWC-Hoo=(K z5S$mOo)X~M>z*`(;j=6Z9xJmQUw$gaEYhG{xP7jn<2mgW0)kitXWX&|N6Y;imaGbe z=pubU*$I_%nI0iR^hQqs>iKVj=(p<)9hHywteuq^6y45+{Xncq5BNy z=lh-0L5u1b7+e3P&<7D=LsY_%3-1pNRWx(M$+yY3&iCy26mh9`%N<(f%)h(-5$3w5 zZ%$phXzT9Yil~*rD^5iz(40INa+W)UPS0Q*r>_@iEm+7C0h)3A1>-MbL+MxGKFh&^ zXkX9QjDr#>Q|yxOG_BH6sQsPhMESuP>|R42Ke)mO7dk31u1I-}E)x^jZGB*B>}7lk z3oIgsHT+6I**DhZfk?ttEc_n5r9&SRB?8buwtekfwzHDSSk*FB=BSWh9RNow<;Skg zcNbM1irQzzUAkfMo1i20Nf6gj`T&s?lcT!lS5NEyNm{kM*?)oOE?$pm+g;M`M1u#Y z8tAsmCww4P(6XmfVBCeB_hmGa4MqISNYp>w{23-}HO$hj2e4~qWu`NuKkXIlf;i=O z%0oqfMi{RXHP+3z+}Y>2OqCv|g-s1wgjjo$OQz*aJY^M}%>K^CnII5col>|fOq>h@G( z?O5(p;gNqr$+s?De-R2Cq*qo#{ca`s;jti|9@cv2!8)^V?til{d@kO#r7y?VaGX%D z#yF3GlwJ2wB%!2(eN9e`3PbBl^Lp4p(t^kuR7zOqR3i|^ab-YUq)%;?gm`D`nYjsU^9{H{b8m#AbXs4)8g9905!`9ozAY_HT6;T}XG zW`lrKSxlTzUA8lcojDFsfjA!DOjL}uc|LwgTYuq_iafI>(NiM#m*BYos**3nAG#_L zcyN12Jk={MiV6ehiTrnuf(J*Q|42DD=t&o!GNacAPyzrdEO0FoH1w|B{sOC29&0qj z_2tu<^8**J(tY@G-4gN%;eMh%WbEtO=-V79FrMIZtxTT5lzX?0_rZ)J)Vz#tTcyps z8G&<3uz_6Q5#>w#!?#zM{{AM_??R?fRc0jFUz37NeU#2I38oUpTB$g3Abp |VJ8 z!2gMObD;fPB%LZ_-2?KKg(Nl@XQf*~YLo!(5*=*^&~;B~!9}}Sx3~j&uA?dh)$gIzF~|l8F)i=MS1vIky|jK!#~i^V$XTg6y1UQOL%2rF7k!q@m=CqNH`p<9Ch9#vkW0<)A%uU^Ug8edZ% zU+)pATURC{PT;*SZlPzl{eT|rHly&+?1M#Pedo8O!{GD^CNsJ7^;+LT=>?VsxIY+^`p-Z&6@mBFB@bOy&r%4 zxBfrkZzKY8mK;^;!ri|&Jht&zIox5Hu4;G{Nwg5j+|}sCsYkbub&of1eOs{41j%sfgeeUwET z0rW?tt7+Y4VFIR7VjhOJQyQCR37il^zH9C_8%6%zZdRC- zkuLQ#lIo5?*Yob)6iMp$NOXjJU9mDtEV1cT5+SKh27vqo*gOPq3@G&6SDiKhCsraK zhEe!ScLBH-KyM)ZdgtBcBq_GJA5)o4;(`wIBm@V(TMf$Xo0KR3P^sEmo8RlL8Fj54 z`wSJqn1e63?hbd(o!XE(v!Mgde zJ>w<&1U^d2S<0tF8(L4p?epRnLb@{mwOwj$sKSY)ObJTO7jW?a;ufON5!H> zFC=NARb2z`mR_pK2*3HF=toh*hjM^e16vh2n`zbIgxib;?nPUVi?1Tb+BL zecMiYHIx0%E@{Y4_A~xj7NyWF+YO@L4WV>OEv;0@HG}YWcaG z=rwT4Y!Gil$7b=LG-YfGp&iZ-@nxkB8~SiQVrI2dW_|iN3-@m6lguR(Jv$e5FWIwF z_rGHUr{YtXQ2e}!60<}i+pb57gI>k?6bjQqUEkcS2=w6KUKb$ubma2r4e|H3hYNZ?Q2wYNU&w`@}#2z zb5;U&wMTVn+~2Rq~>ZqJFP&{5uBn0d#r`$+K%v{TJ3& z)Na+E8~p0EF?EK3a5*{y+%zhJd&t7UobE?vz5XF?{~R_=@mVyMx#VkRIQVEmK4;)? z&A_pc$(`Qul=pWn6ZiDrAvYd+##OPxQ1mjChhJuLW;v6}e^-k9Qb_HEX!{m{l}NRP zEVHW7@qe(vDaFufrq`-VwPSPw=gw!RN>dNQH{yVc>m&?+3AU!V(=)vz>s43zZHk7T$u#hVh_fFB%EL4Es$E@n4U2HCu zh7#bh9KW#HpwA6>x`khnL@4mfqFD+mJ;DA`N1wpnDK)3LsMs~3vhrtqbCFR~=+T$w z*S#Gy4N;Fgz5ee1G|Sajci#8oE*}ls(f{Ep%Br8taY|8({z*8q`s!8$r#{aYzxJ~0 z?6nqtwr(BHX96mC*qDFB!hgP>U1Cbf#f^g@M)u$GMl6#b{@ORg>%CRC`r($3L#`UVt zIWqh^)*j*2J~WI5@g_Wxg2%s&PbyP+E^j^Tx-t2e@VXwqPEf-34PzUj;IbI|x-2}N zHJ-)v1N`X6v-Svc%@2rPw-Ap-ekeMraP1*4P#6w~T^Hxz_qo zX3{KUK|-f8lk%NhENH7m&*L5NUxe7n1$< zhxwG8_I?`^L79Ix!v$}gRe3kgVB|)`i%TC?#h=;nPg zy(=WojHt+Qvh|?!^+u;HSF5~cmtVolzQlSFxQ~i6ja_#)>ms6&q6;&BGa!ZNd9c*7 z@fe4?ewk-|jmHQo{QEZr(e@3AQNVsvTL)NCf%`(;MYp#|UbdJZqqvYRM|-<8&7 zP7uw{-EnbeLJrEa6ZRrGDDR^Jim^#NQWi2IJ|m~ zCtmv5Mg)NE-T=e`(Cc~Qq*I>VgjO0(PjVkc%$KyyzA-O_MbN07uUmaxQIw~z5j!30 zh0MhVH+EC{hgZKkAk1N2Zs!IYu`VrUSnSo#`kKdX7>gbiVeEnNTt3y}T^!(SLEpA! z*Za7sfIw%XuOdQI3W|1(SFonD$6!X1>)fd%Ec2A&gY*16gb~EJGJ*c(Jjlvy93Kk5 zrXS4nW1Ge2@ks)*aX*S3){yO#!q*Alq3H7VEW30K_-b#|Y`rg6fRrjxDkWB?JZbe$ z(vinQjSnDy?u$y%eutU0jWuroQl1j^Y};URw6X*34xru>*bu7H3}eWpH*w#P+!dq+ zF)*#TQ+KBT1?Jo%9s}$_KcD_AOP}r7^)XrQHGGG-IhCz_5rPbu5H$FN=7bK(qgp-} zpPul|U6|-gap(9gtY?rJmhO{bxRzl=hqPq7HzcHaWnNPbdWvKWNh!>$jtcwkhc+2QCZIjT zQB(!cGKm8%JYf_|gZ6S#fkYsSyI+LGYm#NZREHtC(UHm`d%Dr65}2<5b!zHZcjTFN zHJ|WiiEh2W7`c|b|AktBHX%#hZV8TiNq_B^zOqY^-0cxgaHx|)aQK=f# ztrei@9W`jmQt57)^`*PELxG<4SkL{y&$Kt(X~w0Nhxx_o_KOey=g$uRxafbIZ*4;9 z&kdZiZr@6DQnNySlH9X+x2*x{C7~Ul>bOUVHGu(~au_L|0CqCi?lCurD~%4!J6mxo zGqTKr+yZC2wsLS-?{$>s46IeLa)E*K<@175!pPz@&Op^VvuF;@+vv9b{23AM#GmPu z1@FT5+|gU>JadxzVZ+&>oy7w`tUeQWQnQq^n;5k-y|#;07|HGgrna6d(6hQ@v&6H9 zDUU&!wz0G4-HWG02BFyak}TM<9GgDH$2llK-9!zgCB|3x!f%{mj0P2X#%j;*jU(UA zE+}2Rn!Im+lJxoPiUq$eIdhMV<=oRge{iiL& zz%?6q;nRU-CjV;a6+ySHvGf(L%=39WqZ*7q;(KY$v_p&bwKKdcrQvB*jy~33kdRV$ zxtu=Oe;~#5Qzs|?SBd_CuNlXq*B6!@N=+OpczoiU`-RefV#5u(fLEVx?z;ItIMXbO zLubSEd6)04kDjS#UZP&4O^9^jdO_?xkC{YQwREw=IGXG^h&I-+A{PFNIO$+*z3x`=lMsHVQEK>MHp;E@^(sa>0d+aQR zvNo`*jaSSn-D*mBcRD(sc^hViA85bdH-D1=KJ_V*9zjT{v@{)Uoi6@;o^tEVc3i7t z!u98ewvsi~FGFtrDEYK#kNROXI`CFaP4IcCMMgXWdg3}S=2PKc;%;n!TIT#2~im>tLvlA zt1g|iW!@zv+#^9J?UYMRdq39PIpa6Dblz+oxlxJzmH%aKK)Y7+e(DT*ne(fuL$&Z$ z%;IpOZ#cF{gAa*EBLxU7LOaDIP|r`75h-Q#CBWaVQino z!##Za7l$Gd^_R(OE7}PwG?+Htx5dui6zVVVIkPuICvqgUsQ|%(kNav*ByMD6@I_@4 zv)^HQ?YNNxNJxOm8x>e4hx$uG!H!cA<@hX{(5%2l!RROv;qpI7hf95j2(6tDVC4Wd zgMD2C<2GW^Iq)9@|L8pwkydJB`!Sh5FgF&C4gs{T0Q%q452qh!wTb`jOWMCvME2VBbSEi{05DvY=NuHoBn>Jx z-WUb$3W?v^u>pJLBlf=w=b?ApX*P|^F}hWU+kVg7=A1bbcl%2t>b0VS~&}nC1Z_$v(luRUuJ|Xfrs)sBC4d81DGw>sk=#-fR*z-A}($)5_w-LRz zwqFvv4mdrOMJ_1V@^=-{?k~-p7>$yxL-Al@)i~2r!!{!UbjewtY^H&HgJ?s=KGDYz z4aN$FB9+rNa)UnCN3wcYz^-%!oE+KB?wZVBY>^Xwd8S)>rfc7<_Y^I>sh5;CJbdq5xNI55n~i?^U;*{(oDKWm`LsD3 zc+IUhZ0!-wFWV&ue#BC7@Ny)yR`YR{=H;N%^cD$>k3w0qq4ov}ljGzlB2l9!|jogs;qIkw+$Pmtk)bf#zoc|hHJUV@%16IC6vHj?(S}GZYL;i zr}P7VZo^5~6o8NYpsbIoqjtdhQFXjuu)gbu%rXvLqi85oVD~7nDWbKTHRuElS_EKS z71%Nb1r={BxTPZi5%Qi+RDttnCF%QvlB}M&Tl2i`}R;llLN^!z3BJjl4 z8;>?;7T|{qZ)A2ke+D94T9qdV8 zx?t^-IZp}quOAD%;hn$T?GN6GuoQVe&Lvi<5N4-+GGX}!-9-uC&&# zZT)`G3G-O_Axr`Pf@zfsiUX)$RmX5uQhzxeEsG~4va9Q(vAA$SLYu8NQcS!U)8`T! zn;!dR++I|MEt~$rd${1Neju%5voBJ%v_tZq7MT5Y0W!>6ro291iC-{ zGM<+~oB;IwQr0AENC6~9WS!|~yvf;d<8%(Ao{exvD|SdLRG@w{Fk7L?uIqKL(x43| z@eGi>N|e6+$&s|r5k5+AZY2D8!{2F6!iqN$aSQVE-{fvc^4_ztD_}d%lMQa5p&%vn z3?{c{0;*ZVbrC2Dz#qy;j#chyT7bW!(B)2(>^bB~#fldqY$V4RR?*7pj9NdfY(3+< zUN|#r6B8O`N0n}gWW=s4E4lm0{$DWQkb_j~IGJkqwDfs}g#WD(li|+4bmX8fCRKq& zfX&H(&UYpLS~~$@>XM`x8#N?m-Bvz=VT@-MTx(GQByv1c5YKG3HL$4rKLF>%lnbej zN*t$60ZoZYnpUYR_@Ub8S%rR7Qq?nJ#;(@8zhljTHbDtwSg`BEX!Uf)N2xIs)%OK zp+06uSBr>@G-?unoHrDkECRyuuQjYX{wDpt_eh23Dg2~csUbSa;H1U$j{Uy>r6nDy z8jOfm;6^1_V=Y#i-Yyv0)$^hc>En5zj%F^@8Z*tW=GuqG~OVEe;s|R z<&gGS#dQVM%#hCh7kzC8ovNT>RaotB5@YCQMNesE&)b`N`73|ySaCFOhwqtlu}5wO z?#t*|;IvKey?3%x2`-NPe=D{^lCuacFJ%*20ULfjcyk%*8Ngri@u7SRI{DAdVSwzw z$Au|2x{q9($3NLdi8OueuIcK{q6{YuTAn2$M+WC9P^l-WBW0AR@q0%Tmp+MNz*0Rw z&b@jKRaCI9gH1(A$rh!^brpsEgeTvJ>^GDmT|MS!Zd&#YS<}u8RaM^zOwqo)>s`|` zebeosW$H-4=B@-0u_~TE{1h=z2_Hn1x4`_v=cuhPIZbnJon1wof*$hh;q+;@F+v3p$@y+&yTCmGlyQl!TOj3|8R=7xC}eYtSte1`1>d0;sc! znFah$M&_io%c)gtU38s}K+3?!PtE~!h8cGLUP7b>AF0@@*3m2ckn4Q@Z`P^5yWH~8 znAV%`-fSj&Tb)T<@Tn_kXIC6Ri2_D2Fp#9lB9D8Jb{c1Cwfr=$WT` zo>WF9yJNCo!ju9Nm;G!0SA3PI?68Q|%F+KEW3;8cqwaCtUW@vR!7KU|TeZ#S?|9O&Z z=CkvJ_l~s4@t$KL3e+|fCWLD4teBO2w$Ll%My!EigA=%NMcmaOqkK+asfjLc>g!bz zKOYI3TKQy9FSSCaBTR#CNI3RSPOou^1LoCDkF?uIHRgGLlV+Zxne`sJBZbjqW{akd z(<5y4U2}nD;jx)wwFt%LE>^_&5gFDNfk_csS$00nGqOt^aT$Vil0qDZMc^%*4s$SS z`6Yb$MK|w+cZdUdnaoO9HmBwU+%_}~tIT`vc&o_B|IWmz>Rk^`{Jl^8d-9*a<9ik_ z;;&7y;mXhjjAB!Bhp7yO;3M`DWw(mQQRUHAKy?>#@nsQz`*^?l1{ItPH&LpwA=3Wd z+|Yeioxw6$#diNv+O&|b zm&dbQhFgvlFDeo&c#-~RrYC&y+;rv(ohj4AT0Ep1VaCVqKPgYbx+HG&rK(PLli|eA zy#^g~uXTL>EkgC#YImSy*!6z&V3`ZYP%l$9YE{70&k$x>O5N-Fm@7u)e0tMtE6GY; z<{D(=A`-twN1k6%Uc~ChH$amClTv8ini*xx=0`31*PK81SD*#;hEm9Nnuq7mscZln zXjPI$3rMN+r_bNlj-}XS?1fQOHcy4OD}s+*Sy#G2ptGqZn~$a?QdNX{m%2Od?>e^k z(gwMDdG>>2T9qI9%KhdqC;rYqU;O|sjBjxna(L3xAP=xV)p+zKPyLXU{;m>wwfjdU ztYDUH;Gev3dxsyc?_@qB%kWblw_!>UYAZ~CJ@C9iqL$K2%1~`={AU&6Jb94rk1^

EU474i}5RSe~BnWOd74A8XrPptS z?ASwYZY*gs`;=!xkkNY5QS>&?|7@lnWL131R2LZf77sn}GPY|9UD=AGRvJ%K!gTG- z2>)85H=Jcm5mZTtwzM=exxp#WCB8>9^mja7)1+G|#F-N(yLC4fO z8G2dVq)*vIL&O!G>Y=#A+btAtx<6e*1{oRZmpRc$t5`xQ}mCtsWS$Ht~mM(uYpfyI_sDNkvpfz+gpFXFC zsQcoXTi5kH&rs?7B~Cz_w(!lPBG^L1ZmQC?E;NcuAUEa1%qHCuegMD6KSMCVE*tTrPnpmCpaA4pFA-~vglyAx!l1l9a7_z(; z?LLzmk{)j4qT>fzP$gJD|K0r;0W36w^J%NY@+wuNR&_34QEwF34%|V-pS0}f&;xlg zw;GrGDcnrMas_0~s>Ay+^l7_-Hd?#n2fs1I(aI&a=3c8>e)$J(`1w50Y`9%^h9Fef$?XD^xNllLlmSgpKFvq2bbEZl%)Y0k)q`FVArN28W4C`Sb~IsO=Xw*ma1 zJR1fCiH8hgcEXt)Jb*3Mr~sBoy0BHUE3GAf#T@VVpMgP)5T+z409}W1a$pMLbezvJ zHmt*DB4NWgH($e<8Hg7&W>G1|MrmGCS`<01r6r_G)jF_M5JPMF5;y?+t*_p zeN;Lfo9pqChoG4@#v<20ak&k~mBUSvj{1%&h=$M5W$mIYqxKR){#}4RB?JbehXR}< zORm=DUfah6(oY>vj$M2G+TAnjck}Pi1hJ1u_9PfnBFsYl3*^xeD04w&qid36QIwS> zy(?Q~_e&%-c6d!PhD)s`)HjR`6X=76er}Z_P!~?11xln?*O^?s8l}gSZFU z!DxUN;P$%u{jy!p49hD5-1}*o<@KMvhNXjzF$0IgI^uCoFoi#V(he9-QzeyKuh#io zo7a_mW}c_aqTD1fhABZN(+&!p+xXw2|4XyT@?x_aTpj;OxZ+b)AX z>|W~@K!W0e`X!ycrWx|0q@V_hu`s}+jN|5eLIlW#ST;kM75SAz*dpn*el3Bl4C5(> zJF_i?D(EHh(BjF>NWa7~lIJk>{h=Ip1?NzmZ-Y_bl08j}?ypTLS~s%XdF@fM^S@E>fF^woJ52*ArBG)}4GZ_c%yL81&ue~6KHy98{@{*+a*ifA8-Bot@sld&X zq~vz6_%hz*O}2R&HjIkDkq!R!9T&<2Is$`k>`2Mr?JlJ1%0;Mmw*rvuwGV%Aa^->W zAL&EF1NHOsJ_T<4px_oou2_C$W7+4tISqcd8e2t$+|?kF=|QYS=}|puhE2~d*g2`% zl={ewaz2`(LT?e{E`qp0$u2vPm<$Aqg@lhIQt1p%dWD|dzCd<`Fuz2{#uLZWcy8k% zAWqCy7`IJ>^(v2aQG&>$%0|9kBiL#$v#5Wvd%Y_5>FA+X^_D-F-fKXo*DcOzF0QZ; zcOSr^8Z5z?@Y%>fv>YVLwP1b3+N4n&B7!}An{A9ow{Xy5RKhl8tf>Mxwo?B5iSI$S z89mw-&lN72Ub8rIM__h;skBXWWm#_&bx)wjWU^3W4_ zi4oQe;((EFOe2;v4IWc#P+^gN-geZbo7lcQ7}Eq387cU-h@3(y!OhZG3KJbTdKs4M zEIWy7VElSL+=~Th^BGXT9dwak?i(V{DSRkG{%J=C+IX)kIrOaZP-;Arn^x@CX}OZ` z{llgh6DYQ4gYHU@Au>QIBMLY^x7P*kOvj#7kjllV8@sTqmB+L}K?r1KKtu?V;wo39 zm23YGW(kGL3%3c=2B{lHgyauYQDYLhJ$NJ2tJ?5sRemM;J-c*p2w6kQ4LiHCG(u@V z)?q{bpOYR{Q+Bmeb{0VVi41!}o>SEq|g%JPP3jdV3PNu+YpTUQ!;{$p4BVwJsYTk`?czHWc%)^HPxDc_otzc_3zgWGP^DE#B(oP6*2cGO!Xvu6L(GhlMz@8p*kVdax^9;d5IL@c}_1lJgT`z0|AG zVpJFvSK=O=H38Tv(7QCG7I~~(jAg=cP4corfN&QV{7Af3TT+%6W-R(go(D2T!j08o z1=U}~A^+1*EQ3pnUst!jZMq^a-L1r~RHG|`(0^{5b)_Fk*b(}I7Ol%TIZ+bN5^mxT zfF9z7!3xl{9onmQa8pBRRNPW|WH8UIZ5wt!8~2xjx0H%mD#0w}okA$C2umnkRIy3| zH6lqYF*$1n3;=>VCAD%G?IK238La*_f9vrtt&8#gKbYV)yFKYp*j_t-Z&&fp*8o)w zxB}1>1>XFcH9><0cs_lZ076a2Q^DRh0H9udKil<@7<*$uL<@)?Moxss%K|0DrkTB_ zT;nDM$=8^W1``Lx;Zd%^Tjc%=8O*CDe>prI@HP7HK%z5Z57aD%Igv2 zyd5g5!lc8vO!<~*7+(;GpD#=aVB?D0lkL@@qsX=8;Zf*{SDocn>YlpS8D7Ev9NK!G zNts#YRlgp;!U?nl^w`)vBJ6Gv?(bHBZ4BZShIShR?W;0fh=d6t@kTD#-vAV{bsxdW z$#P=vB}6qsCyTMRueD5g89~Inx3(kFB)im?#Z0E7+FF(kIoz)M9&INrT#ecvb zDA=tCbjT z!1dIhI0NpoUF?>m6v$hB!TBNNA@6cZMhElo*+P5(wSxZnN{>E%u-|gmv7@hN9ltbo zn1_ht{6g=W3LG0{xZ7pr1Fsh z{onJ|H?M57%8i&5L$;c|PnYXXDoB*J$PxwVOG9UY?A&`{=l{GrSvA!w_cR^8-ehHW zRn5`8A|@P`6Lu?b$3iiiVGM5!9Vu$&!>jStfxF9^n}2&yKy&3+>Pi*Lhl&fG0ktMP zv|iN84j!>yoB$?3+vx;b1-R%6$P}WC<(@6l*fk}fu^4>#8Q&?PSdUf$7B$pHXEO$t#A4FPy)AIjxXPz#XLBGesbA7hG8UPR*B7~&%_$_*ggS5|!# zsf_liOnlD$k66l{?kqmtBsaNt{TStensiiNx z=c!lxImui&9XdIT3RExi6JJL<53g%~ZFF{YQd1oP0DGp-?uL!L1mZ3}Mtjdr|R-Lc=uDbmmBzY*p;6A&dt zZ8?t$NiY~B4&(;A+E}0n3Y0g3BvpWY%P%KW2?!~8QjE1$gm{u4J9B#D-UDi?hivXu+Fra)-g;N_>M7wAx#4* zRJ|Yk>{{@4f8nh@BQ+Qo5AA5j9nex^%@N>fG6=rLdagtVN(`14dPi1U%)HJz=_}ql z4bbo3wiY^V`ebQfywy--pvS@d?{_~zm`D}^v(*4fge?&gB<=VDHHu3-vp`5d6l_-o zdH}%L3JLY|&))c9lks@9tpdL!B<>EJT=B~@Iu&cSZqy669d)N))$GkRZ!wWRt3?klq?C5oWe!IrW?uiQ(ZO%rA zXgKu5LDO-hCJwb~OHYV@25^?b$`ELVg2hZk&czt`~uq4tO2jR(I1?F#o6o0@MJL4g9^o@+J|q zrs@`}QH~nyNeOQK-atpOn~kL2L5(`rhHiIJ76+hgm!pc45Xi)p#%(BxeC@g2A22|4#M#nLW2hK6J8ml;?n>EBX3t`REkCUgVkdo zZw4y?toN!N$#04dN|x8$M;lXDTYR`tchVwA%RIB@_N5ygd-Fl`?T_d6c@VGCNeJw8 z0YJ7E_;A(|u|U_H#Xnp}kOKpB;PNg8s9fV?<2q0@V6!SH=zq0f&3=^Mek^3Zd8B=2 zZ)ff#OrE5EqJ^KMn@>$GJy$UNve4v}`52iKd-pr91aN5XvAF8naRj5lI1thRhx4sN>geZgGClm0VR+LAD#egRc@JMb%C5@W$y=?+7Lu`-^KDv^NURn`y~xdZw2`$PQqKE9_t=C)z5KB7E&#j_P>hc0Y6#M~L@ zp7!QWt@V}S8n4SA##*;Oo_=fXy%gTUhhQG7HLjA|;;9 z!*5*V1MqMo!J=8$1YT9(N%Oz^FeSV#;l zDRo^=5y{Y9nYVo7(1|5mA_eY-DpBuy=hJ_k5UTl^IQluaULGHX&4qL_PI~UgU{FxT zoLx3izj1lUvQ-kcQigDRoJ>pXPD&Z=12)(+0hN#We-xd2Jd^$Z$FFNA8?(&}8^gx9 z5t?Hov^mc4og<^Em2 z|M%D)*W+{Tb6ua;`}KOhd`TrJ4bPN@t2^(XH799QtS!(_POntgNv=L>)k{39ujQTC z6gumz6S^F5ENdl({#*Z*!QHd9*WcamJ#_1xDvRL$j$|!Y7PoVup9EFWU!>tRD zAlR8uGYv0FsN_#nFfq8MJwQWoOZcLz@8feJjlMYji#OG*On{LO*nBS9=!*bRxQ=1j zM~j)2C_yx;3t5#CkXoqEHxe*JNR(93QcnmHpwJ8fLwZ>39;+3Ba5-HX*6QMgbgjK= zRuewsW!9G#-s`++`_*9IqX{=YY&e=EyH_&NK668Ad-Gk_w!22>D5WJz!s#`a^Y=uK z@K>y=1=H;P|E@Q|Suc;DmS}n`#RYEYqN7VpoEeS8w^gC$<=wG&?x6vBa{1+gM`#eeQJ;>zFEDJJgrvLkX=P7siKWp3Pdpal&|J zHN5Y9yBd$50!$Sr@1~-aGZPtyF`9s0W6O~KW;OhkIaBzm@8mRoB$X*f{MMW(Pw^*~ zq>MCM)(9DTDUqgVjq(9_NGX?^whGs;Rsu$KG6+Peu+(uJBHM|#`GXbDrBa5?jw72# z1nS&xEwoRPi8ztnwB{nIsYz(Dj%Oozb-Pr`eq=eoT&Kdr$Tz3p4QpIN0tI?Nh7Kg8eI@hmQ%QNZHIYFt?bE7&Ala!|ZZvUi_V zm{t)NYSkZApmnEZC98$8#K6d2as;Rz?}N~wEpgR^s+Gbpt_fKr^kg1CO#iXWBjiO& z@C?3V$kj({=XNTZZuA&7;~s`$KZl$q*@I#F-l=4P2o+NSP#2k`kfZla;)Eo0JCo)& z3~1NN@W#vPyo6OG6jh+q_{g##4J)+9sYPoCXHnUZw*%|5noq>&v2uabwB!0ie$LK< zN|{u%2iKGJ0v1cH`?Pk%miV`>^4py?eaLgojUKs@9Uf6i5uCYUDHgFV+!lA4Z`J}K#Uyg!F`Nr7u>2NZZrUSWi_30ymeu`CseUJo>op-6A538*A& z%oZoI=sk-G^)RbG<62a5>Y=rZ;Yx#l?-|77v0wJPuc&!*nHo`ijRYBB*k$7*;uJVc z2=A}0_QV82HI6D7ge~HVDXf>Mzw&t(PBx+X!V0qC_HWZjE}^*uWyHDb;x?v3$`g1S zw_o3Q=VPu`_0l$>B`il)U!m5>`)o#XubX{ISuH>r8jP^f`5;f*Prdtae#P@q7hm-= zoA9J#O*qyfmnhA|IJI4hetKb-{u7zN$$W!m#5_-=~(=zjq?pNc&BZx^Ix)*JO6x_rL`rIuDvkWxnat}FKa z8_Q2P(yV;VHMcJzEl1{Ftm)L%+fmDW!I#|9IEEYtU{Vx#){WghY_oIUBjCUuZ? z*Hos6jgXZh^@oPVMdDnGhcM)>rAn4FxM^Nvw-zOaQ zIN}C1ZFE+KI8s%|@GQlhMKhE)aQNyk9xclDt_fT{h9`~C%9nVp4;UD8*szjlA)!UQ zDs=W<$?MkH|IN^%G}e0p)GC_mIqyS{3mCW9T*vV?$4*@st|Ztw)drEoatUJ`dcIbk zRpwL~#hB@Oapp=o$?+1BrhvhGoXhE{23Ud2lOS+@t>p|zu2~@|QJ7fP5b3~znaGUB z?q>dvglo4-*4i1=M|RFMKv$eu`y>}a(**!GkcH4Bl?eKep9mr(AZB$Ap)yRiG(}0% zf0!ns2+i8~xhBwNfrp`Fl~7Iyf$i1>CPA6TnoV5J4goLJ$-??XmlRlb@fYk*tg4p+ zJTnVq`hy0%I=XRPO$x>~Iup)_GSoIc5)nI$-WiwbKiRtRD$OWe?Q%FwXTRKX_56-~ z+YBy^o^YEEyk&BFK6TYgjH6=DWNPNWw?}1|2T7e}PM4y#uiEt7ZSK2T-SC0zjqznW zBl>9N!>F!QRj~1b^YN#s-zFyZVDhTB%)DM;9uhCmn(IUv)(Ze$!PUxG68($K0}+N+ zH(T0^)x+e5LT3ip(_V}Ws}N(b5OC`^1HfoMS#Mr}!c~Aqd5~z`;PH&%z);H^q3E!x z!Ei3+u1hru!g&LrDTjw+bCI}q8OCY=2&J%h9=3Kx{qxtK_La>d^O=4`a)mSuo-GJK zdJ3ou7p}9A9xtmRIiTt`8ELSj0&lSh?ww z5VtmgdXXaC7?EC{NVim}lLP8ifOlSs=n2mHLTBcb< z-pd8eV$*mr2e>-1Q9YqarUMtvDfwdD6A^>Tk@{TuW<3DCBz85u2IP<4nFZE zT;K~5@|4Ux5tE7_u)&N&@Gy{Q_!UQA|;I4!Se30LEZe6C-3Gz~_ zD@HMJRpZsV3=e<=c%T5dR@_7C1(Iy=!d{f1H^>g_-V2yjK(9wZw+fI0NnuKuss!Z| ziQ+F@R%ZhSSk(U$J(nN^144C)5-bYVDKtD~l68D^@)Gjj#y*+=(Kw7-5l*$m`B#zx zDqoeLHp$Ui7NXnMBFfwYGSxSp(x2e1&{c>GI6-YaVEawbZU9W7OHv$>cCA_lN>qPb zqbt>LkI5}Y4oIborwkAj>O&?;#3T_%=?EO>?#>x#O8YLQ=&oKXPe%b!6l>h$?sFg% zKhH?K*OLkzej6|#jcD;|1z{R0aU9Ygm%Jf25wK0K`2(865DSv}Ln629@JI#FtP@c= z2yPLIzl4;quh+?NUK8bBk9*vkdN?R^^8us|ws&URIwS))K!u7_kOmdd=qlrI3(VvY zjF&}pk@$Hi?hCOyN$RCIPi+;MvPE^_D>w~!m@x;u&en|2D&z|=ehk+WB;9dETQ)~> zz4F%66pS`Wv^5B%@I_h+fiyGUxaGb$H$}YT;8};kzFtvJuPC_}yxTN*KmCk9*4Z3k zn9+M=M}krhdPs>3e(I=yl4%xJ1$`|@ay$bB+JixfE2*03uW0V+9X4$r5w^xNY+50 zPA1aP8tt-D7S4GC_!LR4i;(LU-~&BeDUbA|qPW9Q09N{{Q33-9H6z7okk5;rEI=If4()tstT(932jk}-CoDco=oM*GN0<*q zz3p1pGuP6mwC*(`f&uXkirDgryH*8I@@B;5P5Hr>BW16~Z4-<3od+2Tq~EyYw?$xf z0#a{ea|^ymV`mC|~DcsX?>%+uO6-Cl5-x3Nk^~f+YUL370v94b1GF zLwKqr?&HXHa?zRvnA|G9X)S$uPwdG+hw%$`a1Ia^VjflMpI7L=01>(%Cx#JcWRlNC zXmmlAt%cVa1)Ewy4S6tr?!txbP;6+;q&BdwbfpK$sF*z5O}gH;)J+!?h16)aG|XQT3MCy_v(T)no43Wd^ zX5z9UCv@Dk4r$qViQdk$H}uewMIP)MKNwi;!-8c3X$dmJqPSG)raTnv|I^?jj^$#) zNYr)uK+!FP4czs#QW5@20X;cL5HgQzoa~vrd9O~Uig%uI$$^l0kajJ?U;+Bx8sOz5 zcCSNdz^KCUmw!DhSVa}D?LxJ*0=xyW7Z&Ym0ry5}x6)5XEP99Hso(&eP_}rOz5F0T!NeEK9`NreNQMcv{ic0?8d}dczqgDcPRjae&FzB|y z&wciF=t$({C~w}=kLZ-7%l=S^i`beiwIHE3lhm@t(f(ULZNt9%qYmB$5KTkSX-$^t z@FXkpz4|a<+PcNHPHfE)JD>LyRZ85cHD*-CDi104gZOgqisZv4&gNp_@h%m<@NIm# z*D)@%sS-l-0X-Ey@PkHQB+^nYcAb0kJW6um`5iDb+0p`XA{B#Fz-CyysxJPdw9BIv zWu+_hazktbz0w329}e!c7jRkvXshg-7L2B&rK>VPJPG`3e*3E}DRHcz{q73r#9Wbw zugL5x0MlSLyoSZzJ`f->@09w7qC0^VeyMzZe zTjZR&1LuPZsH#%aE(kvdy;&aBmxmOTg0u<=Pgu^wO4rRTs(vCpgyk=yu9;Oxw&TKPnwF(B2=D_||>mct2sI8`PMQ?##?n#u6Xm z_~7ckkEK4|A49x9nj3Hi;UAT_3kqU}(Vod_Te&~{YZJoqp1Wo)UL}+h>%b!E0ROl5 z3-j>LJvuxEQRmhd&AVxgLuN~ekGI7{q0hC>q7y~iIuS>9TpNq>FT%L76?=U|1IePjhIxfX@pfytsIk2VZ9i(KfYWE`*Ls4w zt{_I9Ti-e!Hgi6*xO1=fUGlAvn3%;P#_&$84W06x6gV#8P6PEdX4I{@c$7g}VS{&NSYElAnh zwa+1fa9xZUu-8;L%64f+iV>wGeG6gL{NPcVsJibS_e_MN->#7+EfbVlNu711Oy=^{JVblVra`x? zuigi>IngAupZFu^H(j$0J~lI;)eyA%-L2U80`(dOFQq;6^R!P1#e`E-!HDgCj?>n6 zi+saotT%4wz-Mw=$&4#~mGg|G;k zIop2p3Q|+oC{ix)^g?L{s~7zIgHI^MYWc29S;g+&j3rPU$H!G`7XBuY9l4Bp@bljU z&Ju3s?jaIR6P1$^H{)7MrQYuf9r@=GgYTCXnLWCehxogP8OaWRH+X$%<-^bui*GAI zry{=B*o9Wu{n|`j48?hi>a)6W`}pKwhkk)4&ZKxwLE6Ve>9@kq={Z z=JI_j*mT%zaoXD@!1h_>&H+kzLS-MC$~8ZVVW)6slj~#UG|cHlp%UGE?<|B-r}F&t zHqT$`J*wr#;DOtHWG{X1fOnyW?&bqV*?81%T;wW_AKxs!31-GQFM&l&iFbDoG8wi- z{&8}M?PZqN^jk#+4Tj6%lJ-`YI8?d4#^VplM#N}j{fgY%tYQxC|HV<>p2A^I%9Xf^ zzAT*6F#ihv>R{wSf74=C_XyQ?Dzism+rg#~-jwq{cKI|L#5r7JnH94RyCK#`sxe^VRHj=J7nUby>okpY8$7l!xnvnLrB8}>>XqPwDf}zF$i?H;mH*o1Igm`x1TuPl!R2t} zm^I#NYg^@F6p8~WDe1R(1;%Wls^7$l&QxP$^$L8=4ldm_7~Qr_;A`nONAfd1I0ztF zXBPC8FIFyD3G&q!g3q9nQQ4mKGb`Q>@LVW-E#c~uxNd$YrE^9?s{3Ci@}CpeGJCpI zvlSn^_^;yB!-EaG8*k^%#TJx0e$O;jtdiD_MToF@&m;qRkIo|8h*1-x+l@aIZW&r7<@N&V^89moc$7wgPHb zqDZ_-rRJ~WC@~V??r@%V^m7TaB?15_{;g@+3EBZ45)1&X%C*)Yv7|_9U*CZJujwvG zeS6%`gj{@shaeB}`{m2=qEal&wCFx|qB={aZEZp`@9ME^iY?Dw5EyyH9B|9)D$Vu& z7+qIHD(Gcn?3SRFVwVAUFNDMB)$faczooPZiNysvzR?OTzHcE^aBUyJP6GXHrH=MVIet4Kxe3I;AK5}}nOmwNP0VI4T#M$?&A zUb05wDlZ!5si1frr(08>>&cqlCG}fUlI;}`BIfNGKlpfMW5LU34~xICgh#McVJ3Z6 zvMIvzX!!sVWx^;-Y89?wa|#SLSU@Q2r|STAME;fnE%>y*10m-E`tR5Lb=wNU3TxDV z?L_&oUK4)nl9(NYn{_N zcQA$OAi7B13}T~j;=h2F{+GoC)+X+O(SM(26`2+3=^RWEUs&PNIL#120&@JE(4g|0&k;id8Ekt;St-~2dH-tPREe}CbA0&tYPs; z=Ue=}CFKs(aNFeanb6xmG}pYh>e%!h@BSZ8A=}U#f|WAl>f>byQWNCSwCn0;+1(>= z!CAY5*9SjyzHWOq)s?0r$u1g3*6%xB=4XLL7(N zNOol-8M4sA^=7IIfB7p`>j4165lJAxvG+e270!SOlCZ=xu+5#>oZa;DEsv6q$~##G zkhqfkCo+4oA~(aqbQQhcz+#D#19mug9mq-9WUyv6H&# z+JuvUr~c@}GeQDMuGgwUQ-@EiU<*lK>wdGoX`yLd8=NWSa&eTc*n2p~VG&UQU>p_@ z)or0=o^Gakd)VTA{}~i>=Cia4VDl&fED39kV-1bppqFw>I4Lqj{ac7`eW}o4ITqtT zFk@0Kmxd4X_?|Z|U-{Hsxy}MfsBJ}ICO4}aVxz!*%aGB+PF-9_tfuC8IT57?Y6D6X zVF36WK)w0v($XbtCKXlQ49QTm_SULQ6sOX|PD5gjso{6h&bmJ9&#%6o z{5PrGhWNydkYe2vJ-C!-z6|~28kBf_yQ1XOf47j(*;{W4Lcpl@|M^}K{=qMQ`Y@eR zA%Yumb!7b9dn=g-MB0=oRPe&0ReNYrjN55aMv5nc(EhF6%Y?M~Os*BS&dS1z)%Aj0 zA!f38`1;>9*&s{iX(l(Z`0U1)TAhf_m>Kpuw$> z`D(2kpChDO&_)=^oXL%Z>2?DX{N}z>z@v(YqkqII@biz`w<|CyTwFDPse|eUVMF^u zy=_hpQ`PNA5$jTww0Y6QuZ`N{evHy!%-L=&Hjl+YKK)Qx+|*6BMp)97TGriAszPg- z$5;T)dUqSsmFl;v)Iw{KSP0Q8vhM=_q(HdHZbO;Q%lS?3at%;TT*pXp;x`_x8ba{B zjB~tM^`2(lTxXXTF0s;sS_fQb!?G6pHGYO|{xRaq)|}q1doE(b{KGA*)~C8JB<^F9 zb(0dVI?IF7#50=V6~=yYkJO3P64cy)?Lf`otdvf9qRFaME~;g;Irv0(=ypb%68v zyF28NYkHvxsmLVrv>VuMS&DSxBQYpaR>!2xdDH11Z>IL?c4@YpZolhn$JB7qAFhCO zJUty+d)zXobj1yxWR~X^^CWeazH5xzIE~3V@?Oq+y`JTpQ4rYrSpQqY$K|3!vOW^4 z^wB^l);jughk{HI@*U$)^}u~ME^D8H)-JY^_b{koOsO{}hl_CxOiBmv)`7!O5NmsX zV$dM7D~@=!@Unp#U5Et2$Ev@b@?AWv_fzKAi9-XSWr07?95RCI+j}eNcCfcQ(z*>{Tz}*~^a$-uK7&hks_0&X#JY}w*;S?Ya(t5p2JdI~ ztI@&zCqRo{US6ePlF+xh8&}_P@7R3lBMt!8aUZ{}5B3Km z5&SKB-K{>4WCiL8Cw{nqrnZ4eF&Oc(QT~2+!OTQuf``jHYUda3Et6k z6r$jSa;Oj7nc-WV?OWgRyUN921zWDZd~9E}*a0_hnNn%P>M`SZ5>b%&-S0>MBB(@! zNXYIOguuddKQFiEerEHSYP+%obkf?!*NqlsXYNEMTtCfaGtUk9;wrc&T1~7nTMrEN ztXp}_)n92U`Hp<_9aeegAAL`MH2W^`d)meQF-AdX7$K~a5^-rVww!UUp$r{`ddt5( z8#Vlxg!=ta5u~1y+}x~x(XJ7l{vN+qY5W#pn*cI}O7b9Zua29Pj!81& z(jI(LSu81;A8j0IO|SPPRAY<;NX$|{vBfN_OLbb@g>4DN5s2KP4K7KL!x- zV#|7oYp>+Me%es+DG`|G1njVQTd=v!rwi%38K@_oQPzY6SwBw9frxl4&}1(Ga&ONi-DtoYkx zW-H5EZ1bI`U4K7&I&S#f_UR%ERqxROWwgZsX5cx>E_QqZ~>DKr?Vz_SdT>LjkMJ!!jBxV07~Ev>@=7xmA59&XH9SLYed5tk+* zm@J!m_pSPMEwbG{a1!bY)UOd_HcqN%{}vjFg; zsweN|ysqEriY*_>X$a|8*N<=9X2U*JwVmy@#tlNo_1%sYNRI=bsef90y>aQ$g-eSc zkp^n#A59`GTL;3u6O1N4`+(qVYvo2cv=sL2(j8!9dV!|UQFB>iV=wdpkALtPx^SCP zJr)ssJHiD9PL3-K=ptPG@Bcz))MQ66y(@7nDj0e{!N%8fs9Juk9a2YK6`X+=jT>s9 z08k2m$^|J3pz9E-U1_{5vgE?t9HQ>rE2>DL0klitDyi5I=GJ( zH8t#}8ri^^l8mw^xtl*FZZ@7AKn!T<-v4{%b^5|(mcYxgLgGT*MK#?)d(d`%WhZ=9 z2^x+GUnN|m<2u;@ODGQKc$&^AkN?=fyWXpL3n-jx7~9an7*+VH@qDlDB6f6A#<@9X zcWJVeM!z1Lw|i#%_qcEQu`^W+E7JyaFd+xh5{eL=3HzLsWB_5=K#hPveF?4X(<0486oG8vNrCu&9 zXWG9zA4PX2{Qf=R+~;EJ0!tynJhL3g8%iF1ZZm~xR_wH5Fc!zk1r-*``(;kg%bjS%Tf5gU`l zTFano2hz0(2_uQ?QZPX!#qhST7NK<5`@=zsbY_1;27!!QJWEx#5jz*;D_N>;ms?&d zIzYFXG`H;ANR+K2${FechvRI5uj!N1Zh3a~10tz`{c!F7GEm%)!uT#n zmBWDtJ)ie46@9cG#yBQE%P$Vxb$%Vv!5ST&IdS|g7}&(SmiIuAfaPm{)5z&dQ{xc@ zyhAjQP?m3xNc7+kUC zT(?7Hk7Ljuj6pzmT!Uwku3kVoO1f7G7-93akMS*gQ4>bZ`I`XhPogjmdZZrP@2Y@@=b0Ue8n8Ev8wm z8?sz0t?n;^ERu8K>JXRAJeMXf*WsSHQ=i*MT`wH&I{!)H$d_D=5tpnHXP6=M+r3F0 zz0^95vOj$u0EyfA6^e0A#YALl*c zHUNVNl2_^9Mm`A3^kl}&-#1r8#uj}r1V0OjAonWh14>Gek6Py6ia>7OMcAewamFJI zBi1qpLG8RRhLkTaFu4kGuCiO>GxX8@CxeLk$?CYs{ICfL-!QiZLh`lu2@hgNoHNv zxveT?&jErios*J<390wkG2Ib0iYc#g-9Kj`mNdX#PMoeIXBbe5&PN*r4U_=ZQCDzn zEj~A&rXen`e`W%a^bi|wW^0e-uDxcv75k^@KnDbpSjmsu{Lksy=({A&4Ag|5$(VH8 z^Znzio&&$1y0ib(|2}KmhLJ^DSM`Hw%;a1|PGFmsV(rC2Pv{R>yS7 zw7d_Kt+tI1p8Z-)YVaB&8Xlu;cFleN2lnKW^?NB^c>BcrwB*ZqFRoi(dB{Q9bUYrIIpa9mBr2pOl~`hEzU1O7 z&Q2LOH4oV3?U^dnleRL87_Bms>npBb#!tJ~*J zBI+OXFdOQtxc3*XhxMLoyP=QefoM2C~<_4;r+8 z$O;ZlVq4BSxjeDhjkllg4KJ$M*wUb82}_|cIA&nx`keWRH@YWwzI7Ri*+t0L^&>uy zzo|WhmEH_Sf{81qV~#|2Yf`PbtC1RlSHSx4;5ZY4spGf|xtzrKh7BCy{ZpS@VZq>O z6x9gLg)XhJ<(o2M!4p|>&wq%tLKxn1= z86C*kcy9OZ_G{pyAJOmmIsXgzd{GNw5Cpm~Qd^E&p4*)l+uLaQTii~ImVfLAmmilp zl<{6ib~wHC#vJvouzbF|I<)`8P_1N+IeXOM>}i+!RVNb3vK?=umOtM+(m|wlX%D1c zgt@a-T>HAIB9Ec*g?EeTS6z|k%j!&VL|d7NNLOMrDthpksj{6iq~U(fhD$SrAdUn? z>U9AthH_Pf~Hs%V)dv2*!ivpU>7)`n~;yg0@1MYI{3>c z8&vTF|2JKNWN_tJsSIT@tUyjK6eS5vGmCapUD`le z8rNKZmTl@fqIKwx<&Q=yN{##INuVv4DI%A z-miP-&L&+{bFVR)1l2 zFe`8?B3?|2l%aGpC8im6H(!LtH{i|gb{qW^q}7EQ{3DBW-G6{Km6eot@&|rh-RG8B zC*q-m*5E~HMd3OGh)oxI>DC@Do{g}g91csc)ql=cM#bwN>L|3$yRYO2bYl%+g;6*3 zflqy`R;9v=0gFg{>uIdvFkltxQm2{dgrr&+7|Mm{RdjHj?@^FE&9Lwc0(nHJ7-hb& z!765&J|^?hy&x}e;VZ>7xhJ#!^#Rx~hqi{@RQp%5nL0w~IAU1ha4Z(*U*Aox%`9|y zA%k){wS91P^<@c7HSN6vz8-_mNPp+*FyDN&iHRa_wRi@ggR)A@ER;I0ghdeouK@h! zanEy?6%kYVg zWOvPar~UWC7ccxvn|tN6Co5K+)w^TAe`2>*ovP45Fon%n>M@?qJQ)!OoYQ41H@x;! z`}rBdop63OO(NP?EU&Vda@J{EBpZt6KT7O7fIk&DPiK2=Xp1d%rl??6~teU9*oRFubk7k4G9j?ZP;Bipq{$q2D%jj2^cafQu0i zqfxcB2)exC+{qWH^*2BKa%HSDql=V5d@S*T)d4?J9Kl(|z3@hNjhdqn?FIYq)E7il zek>ts4!{J>YqL@`qKHKxId9)hxVc(dC7a^O3J7eaG4bZ|5Hr>Ki& zX8t*EWw>YhyX=Dh`AwJ0&rH2$`Yk@p>~B5pnJp>bvid>dkmj4!fBtx;>VI(2u8a#C zpx*Lw3#}ZfV2)EwQ)i7Rbi31Hj1am0-2Qx9xL;U?GH48B-OcZyOGM+p^=2{#e7Oyzg_&e+Rg)gRE{MG-tTB+P)XUNX55Wn0iJlnj^i; zA9bJb*Ln16qCv|P$%6rPV4ehh0QnE2gSLCA-ydt6RS3-7?G@2Kar6svn?$}m|?-6F1C>mqAgdiZXCcMPy0JdaVs*) zN#(;Ko~ZpZU9R=5Bg^3IrEi0xE>?@3zC^!YMDJIYwzL1`gb+u#8rL)T+*$}7ct6uV zMT_{jbbt8A@UQ`_iq<|dz8vi90hrZfu#0o zCc$RA|3|D@=rzzAVLey|%$wP;W%v~a8D2~sr52-|JS{u>5r*J05Oe2Osl;0qZ#FM^ z+NMS+RWpMG5{+sM6d1aUWP-)S&(%x43JtlmecKzVZjQD>$ z*faK1#)NQJ5-z1TSu%g=_JF|Pe#MVFB7-|j`p-&(?pTMuScmIMeU=h4@8z{vLD<^( z&#g!QGa1>x%>FY-h7=@re^35cAzVRM0mK+ZCR>Olc;Af?V_>$edBgmykO=<)hd&~k zR2SxCgg6CeL8#f@hO`x`{{%o6ND;vx|5SlL`;XZ($N{mq;dl&c`dF1Un3laubsr39Tx@+ICh=c2{LvJVK;AK=C0N1yWNtH5e-t=~E1zB#9=; zwnqe0!be^GeEP2{yGEY=JCw#1(hMoMC4E+d7GBj$s_JJivcCSPzG(l~l!2=Q-B#Pd zJzR6$SKU&D*m0(9zJl;g{y!6;S7G&7$pzy1L(Eu(W@KMA2XI*{b2b#3It4|tAj~mS zjCGNhbqXeJMfD*ah;~&Wz01tED>UK2F;;Z(LI&F;melsfVtb;Nz>c)xF)cu;sq5D{ zZ3advv`m;7jw)^NJPL02OA}(e&r(7f(353bNs67r=-lcoYYlUD>)coxKr&$(yqF4q z)L=-@i7i_Es)1$?0iCuOZHP_hVUiT63K=E`hG;?rd_$wDMV~xslKq?vSz{6JN4|XtEB%@6)!g6}?-ab!I(&ia{kNU= zt<12Hh$^F_D#L1ePl33^6g{#_B1&B1=1Bv`e3KSZIMAHPXl2A3EukoYx0B7 z%XST+Y!m*uTr>7vEtLW49F-xfWW+Y^#so;w_HDIudvv9Pm$>_XgMiL4P$ykUG(x18 zLL>}3wnBxh6%zNOKLjaYDldWFr&|wVesXD6Jlb3bZJMz{+XYa+d3Jg;o=(Upj45Kd zN-bYTAWYH5l}4pPVn73C+UJX^uXthlK~kVlJxvw>2sM?o{O|dDY9sXcDH|W9+`D8~ z^?m(l!rncy;dcu5j$Cr~w`6^7dzU$qXnmxag9Nbmjdx5>%}IasG+n7Q7<_8{KP`ik zB7*PEe_Fml!5^Z}ttXx*J_!;2V=u&U8^|s1u=iVtX*djni;PbG+Ny)87HY!Kz^5wB z1%O}zkD^9DA(SK*lcac}ZmYnUC^6Oy%sWVp%~bz zp@O=fq)+8PmB_Thzq0hd1JLAm#(xs7|8~*U^-in3oor-3LlWUp8&3ES zK1rde8>_Vdr9md^2LZx;A+9d&C7>i?r{EI{jHkS8dybX~>eVZCGnqP*LNuHon^Q4O zcsq?R6X_z@6{*|OVBm|>nNV2UL0Qp7HdcP91%*~g7v-nw7z#$-0(vbC`lSrG*aVV7XcYVD9d(bF&Y>qshpuQ1v1S{DIvX^ioE)QFj=eU< zGB^H_TlwNWyvOd(8Z&m^>2NsL;m5V+A9{w>PaMvONFK6toh8iwPFEX4%!@S=|A*GW z=jWssemV;=AwcnK2C{;x^V{HD#52tVF4ENo?aD<5d|4@F5Cl_{V@jHUNdlOpASEp< z7Pmo&V}bA&s@Gy;Hb@Q&jVRfBd=iMe~!V`UGB}P0Bw3 zE;oL^&iMT(v$?_KQ}m^J`s1;76wFzFm*8$%~VL}BsBMoP*JeS$i-Gr#xM3L2r3Q5ImPHdn1b zSS?S8;nb>?GYTFEw*abBmI~XJ(#?fUMKG1u1OO>q+l+;8thi+4-GEK>b<~tsdDFJXKk?Lyr&|*|2;qtGX-xSo7u;{q2vRHV0x`9)34? z`c$8<#I%`3`yU+(cwT)a0J&i|vp|1adh;EbCdOHXosS?4N!lq-EGSB1Qn4B;lb{aj zb%{t7Dy>N&revHJHcndsTwJ(Yx69yi!?6Y6gDc}2YpMnV68L&q)B~R(*h;nx;Y_>>Clf6$_sKr>EDd_yTI?jY0*GR-nQH*|eyaZDYZnm%Z zMF-%t?(@}0c6B=C)0$4udLLBkq*k>veO~`jb=}Z!+oJtnzqT&py%NN1{I; zZ*VyA1e=o)7#w_K=g%9Dy|UU3wzbWsb((Gzm?2FNloef2!(&-Xmq(aU)R}?AbOv$* zfN2+!Jy(!YQoFU$5I~I~oZ0Dx800ayVv%eeh6Cm2lc2JHN7NJYx*q!eX ze6|8rrFa{354E2$>iPy1Bts{_M}NPVfpIGaJ%-F(Wx%k=5Nu`wHD6S4E8^V`b;(Z} z=uqv{P(vfrYwu_RT?RwinY6}N15XlYCTP6B>*ClJ`{!FUpVc}j8zwgAe|$Tc?sD|O zBWJptskh59TGT!G-^!=|kTstK57#u!$!8T46JyQ*W=N6hY(fZ|`U?RP=NS6`aWQo6 zZ_UC!e#1K>5g|f>pigQ|3o$6~6>nDsW>TTP;S|C4Kg}7&);BU@1oy5gDIgHI0AR+x z#Q}~0SrFS60bnR1+xf-j=RKXH~v zMRzMV8(RHby*hty^8ct2l3&fgejO1#4x`vN#MocoZ2$NE^wfxn_jmsLG}#P_H*qq$ z+GB=gO0j1FfubvyDLi}BbWI{6QXY$O1z0WE9>rs^ zV8^HgUJs01*HoKst0S8i9K@|+pQ*tTc zo^Ohb3zh~zNh|!Fqrbg48g_7+Gs+IU$gUP*_B_9obvbqQ&3(sdwlSmmo33;Q zzTC0xMX%0k+mQNenNzT{@n3rV;@h!voAeG4V!qFh9*w_J+uVcJcx~fmJXoyb3bFdm*|!d{duW1Y?up97VT{Vf&wj?3&d>t#wp*VIjrUL^i%skkG9z^S8J7^4 zRl_7NHRr(v9^QCd;Dy%{s+4$hK`Xoic{4; zZ}55F3m~xM+^txKjEfLC$&QrcEP&@kxN=&8aB6QrVF->8m`IqI8Zdia-(#Mfek^Gy zHM>o>%<=qf-A(qF4IN(Eb-mhvcewpF?xDj?(!ZOsr(=4`xRY+Kq@2MhztrZMY_;;8 ze|?w&`RX-)oZS78KdurAY6gIzCgL(r-hPV$ina!~sqihLjdvocjg&jYGpd#391 zDx>|2;DAZFxA*|g_x$5Qo2o@Y59=Z6i%9Emb(If(U)>i!P2)wdm~Q7SNPuuBH_$RwN|=Dc|F3P z;1;G!qnGnN%Uqf-S!UncdD;9SC(m;y-to`L{(nvlvcrRRya@VwJhPv6jlR?4cYVX= zI(5yEwWAGJPUH+;m25u!RJaE=YQ}Keb~pI;ZpmjV<=%|QCA|S-zdY8MC(bgbmzF%3 ziP6sXMid*c699?2Rh{6ESN7v45O{lP0jL&{GFoaDHTZeZ;o4rdn(g($*dCj%@h~6! z;fXH;WY~)ZL~`s&%8l51G*}h^YXztp#ic^~5)e^DVEdd#0r{71%B%70VHPritbzan zXFA9+F)|ZFtXvlTA4TUGmgM{QVfGZly%qO1+^AW&x8j}|?v;i)ODj~wt+;a54|AlY zMrw}ILQ^x-!rW$sP203>`G0s`aPaOvIBqyDuKRm^KIgd_g{uWj$k1tSW2!gr%S zC`r&J9{B>1>X1A!$B{}XfypdtSpcQoqe1h(;1I!7RrwSSSf3BbwjzO~oN`5G7lnMH zwo$_))2a-1fwP!$jO(f;%wC z_FVYG+vnj3L?Q^)PbjY%LRfQHQCUJ%hF z!IFw-01y|z5O;IQ21ZVF)dE1F&H#0mgCV{;_h2mu+~W3FRvd5T-NTSs#NYWLcge8s zgMnub=;`^r-8&KZ+c~doVJeK<7zgAF*=Jct{!WDt)*dXZejFa3t{Wtu9H=5`7>HP1 z_c_~R-wxQ)CG~=?mXL7Uh!m==j*_NGQ5+9~Uj)F*Bdk#td#;)zX(S+#0#?Q6!W5Vs zuyN~1_$Kf8{@Tfxi0xb*VjosvJ^uxh2J|{efy(WW;0(dO8^_Xpr+lwMYM>o9i#G(4 zNdO3KD(iA^usK9uj1H_Be6-~+OQsKV76IO52&Wj*YN-dmi2h*QDu{$it3x=m% znj2dUyp5zf}IM{^V<>&4S5JlLV?AL?LCx}lnwCGjR4u)RNXVhv=3b612d{TUBE6H zB*I_UN0Dw9)rn|`0|540JjfYMYdM#R9v}j*ED+fvvjD-z*W>SKg>1~n_%E(R3|_Ni z^g4R@;0LM)~O#g?&NnN)%_q^bxH{u#l4I@Cd}{9i`-7 zxrt;ApY(#dU5JX>mR@Ap0Q)SUHU}a_DjVoxa&ACnnHY!9E<)pTS{=gqQh>-`gt}K+ z9iZ7@V73>aHkc_Ps>y=v=`pP_AkD_R?@3M1onS{lrfn+1gJx|rdaJ5hVGgRw!bG1h z)#IPbSqAPM+AICk{69{*x0b8wP;k?#QRATNSY6uO)#LSN4t>m?Ms5sX9&8}d7xA43J|9TDvwns`QJ0Z58A zUd?@~rlWBktA``gh=4}8H0DwdEgdH7D-3O0x2`CYn!qWiq3p$_zC@GlNcn!hi zfGlRG)Y_|rViVU>px7oAb?Coq;g?}iM?)~hSu@A_u0|zisrWS7ycxaL z@=@34;?{}1%&3gXt08X!BV|$nxj=TKiC2NSUXTg^_|;vwpiAT6=Qp$DzizHp;7v_A z(iO}OwTP>D0Ua`80~KNwM{nYLgBtS2-mz`=^{~(ZPJ@_!X0cY+7Az#LRc_zsG95B) zK_zDbZO5hMs|f*zbY36nA7O)cc5W-4ZeWNzoI>o%u#?<=n{^vEnr`AnYbZf$H}U!zqIQORWMY7LZxtHpp8HLe%nVGw(p>fkOl+XI& z1c0Dv*J1~N@#6tvHXSx6`rEC)7`T4l_El#A?Y+dztd#`%C^<6pGy&1-h&W7!XS1Pu z@%9J-iBW-tbfAF60iY&wwnZ1x6aWGOK&Dh^009E#14G#G!@Rs~3M!S5YD)m_XM?nu zAT-X?MRT6JC%;F$~t^RH8VZA$SWeqnj{o`vGB9mnPSqJcur;UbJ@vOO!)=OgGD)L zR#Dw#=8YAn8!zSVM57e=@ex0O9(-ss8+o2mCE%#plY!W-Voa{S#G>H1Lh)1#5DWmf zjaW<2aml78id?V{8EnmiGk%2C@b?*}l@?GDH8|9O)X6;j$!z|7%sLD61dm$@kIu?u@<}e}+Ji@Ddz`}_9mIroYNCTPgS((pa(Ir_$3kE)n00;bCdyhB!QhjW|w)m9ilQpMid4cpwgJfCn2; z!0D8hnRwJF3vCIhwb80ABy`v|q9?o2$r>p&1mq}-Sjo>fWr3`?%|09$l?#629DZaJ zFw}NF>`h=;ak=|L$vEVHb`*#^3z~fidYCInkpmlc;~Bz0NfM%+Tt7t-f?n?`4Q5p) zu=w?e-7Z!fpdp?Bv*EF@zlFkrxQ{<0-mRVYytzQZ^!xXX3!m`}`89$W}TerF?)0KW@B- zZ0FUq;}LWMdVgkz{YGy)6J20vZfJMXz6+j8fm)HztrSD3BzPqi*)RaYk`l+3%F#6Y zu*UK?-RHAEOMbE|w&!2;U_m`_&?8*v5doN$4|OL&uz`RiU+L6tp*>{x6bT8veii0| zgkQ=?yjU4Wfk)E1Be|?7cK3I`xB?`zC`hiogxUGFwQ@tA$H9E^hacf0H4>`Y*r+)c zdWC?pFRdEZZj$eg@rSk<$|wpDsihnUIkAUD>v8A7vT?`}0^&Fq>`jIW*qo`X_Ci+K z0==U`6cvDy%QwqnJ%V^pN5y_Y!|-sL1wharui;eGky-7oNZIV`GWQU*97L~W z_m#_)R};H6U2oX1x|>VDA6IN}GOfi%a+}PCYVU#KxPg-zTDMXV=YJukkc00z*q6Mq zM||{k0;-vfYUV+%%LsOMF~yF;GGw3z6_SgG$BQC{^LiYq5C#SPltm1+KprI`M@Vv; zd}7&vY()(^@lQMBYzIB!%qm4dZZt_{qK60wO9S``J}dwS@uon7NC*K{&xs7K<_Z}Q zJb={lU}?|i(r1&kFS=J3>|2NA@S%HkH>&FIO*Xn#<=~)-1c0kDKs6J!SA6~AqwALM zuWw59A019Arohh>!y?Exh+I~42`JR!%HqTRap1K097Zz1`)2y3s;FouqeE^+^5Cq{ z0RrmaJLE0?*nRZ)BP!-SX#KkPLWoy+D^)+aL$~+AZ zH55Pt0bmgl@LC3e0s)9o0p;1}hidFGq{PaX=iQl;T5dhL)SLE(J@(|$FfO#cSgk@p zLnI?D=C7A=5#_W!L^%_&+>>N2H}%bIDuxEr)PZ4}rd-Ovl{92%@DaV`{(`ec9N7WS zn4%1`WX+Bc{%uUD&A8?qdWDOPp2z-~$FA_k!pD%IoT>@im^K@d;^Lho+hz>{*o|c7 zghQ_Ig%G|7$-{_5bnDz8+>{AfL!p%P9x5M3C>=&H1t4bvSb+~PBp`;Fn8!RJTF-1A z3AMzQS*Q2x$}r3>cVo_LVOZDgVJq(bjqph29XBBmVoFyL=E}=*We?#y|>*< zIL^B*BPmogI4<=4ObGOs+>g-l7IV}`9+uC3@PUna&&9kaY^h2Th2 z-EN%h3tt01;xTzq8iQ=*%?>j=^dasJ)I~FyhwT)E)L{gJf?%;8byFa6f(a1`aTYLk z?vh~bL=|%23?}+1AL|J&t)?JO@nKecK>wD#Q@Q=`1j6Y2W0(&Cga?T8COrVYrQCV> zW+CXNVhVLsd;{{IDWa2xVp3L@LKou4lQQsAHa)PTk5^+zuw(xgQuNl2pA#TIPlwL^ zr+Y)TU?b`M)_v~8nG($h%U+?4{g@k^#+Enhd%S(;x5th?8{2!&oEhXGo4G4r)k(*N zNVOW)eP3J@d59GPreawrA|&Z433TlvsUF;!bB3e@E=Uk~}!$N1Gw=u+qBG<&CYsJ0Mlj*XZiU2hLT zK}&k>G7o?H05z0Dbn}EtScr$AtLyVm6Ho2@55tSY^Bn#zIJb1Cl!G)`ltvPwk-GLO z=l(>HricXwfE>%8mGhoy<=bvEPRajoZ2cC0eGjnjBlp33Dz^6DSlrmm1@mGQ9{{bB z*ikwmou#M}4?075IZV21ar4%_T8Ih0(*dxDR9$$KgGbDrSiHXQsDl7E;>Gw=Y)x;< zUSgxh4kJc6*iSsn!AA4|tH#;`FdBNQ^5x?vk4Hvw?4cBhR_JIO7u&(Un$d@tBD@Bi zS&1c&woo8oHvAF=o!|Gg@H`@p|5R1y^I2_P3;}kC0JGq-%xE`Kt=@N_a9dHT(Z z`ZZnGhJs%=-XGBVluBbl4&DEZNsyZv3 z6`eP~nfMg*iH{kjBC6z2kM?Aa$C{^=X1@xGy2nc*l!XJwLg)FEeUc%s(>z`W^j_WY z8_nTC#5wR`3c7-ZD5n1Uqc>$cwyJs=7Q_D)Mfn^-nTn)>FiqfU{k59=Fr_PS)cF*C zDe|>jT-e*SWF5I8E$mRb7PEG{@e5`-WbhX4E1!p6W24pXW1sRdJA`+3?~ssxw0KkX z^X~srgx*U5ZKMJ4WhUl6{=4@fbUV8=n+K;65UWpSmE=)(PIW9%G2H;v=wU=VAMS&P zM(98lNst0cT&WO_1<9RE{~%BF&O81RrRGGIP=P^=ooM)pTnIo!m&oQR-+{^4n&K#} zcO^AE>?ZcU)?<7+TxyG^+f{3-l_OZ*r?-t*rI$kMaB415cRak>92RxuBV)@NFGjIS z77ZNW6>^c(qxQKV$F;tr>!FLbrb`=c4TkPb^fwRnMjo7hS#6orWHPoo7%zHJZ|mtT z9%LV^u#XTv^^8-D{`o=HGUM`r&%a;Z>(>uHd&)XSqvEjH?3k&bX+*gI2BG9MHCY@` zBK%5p2pcDx4;8`^rcN+EG-`W~Iz9Fl*Iv`}9-($9E1;HB$)lZbnlgIN>Dv~Nqz&!n zxT?$^DsGwl}h*U~<|25EIc zQz}rRtuu1k{I#9As+eC|w}YhLd7UVq4#te57AzXoNBAJN!imn8s6d} zlpH%BJ|mN47v6LAA-=T7;`9`xW{5G?Ast$h6Vr6!vv>7f>V0GxLF=N}duQtTEyg+B zm2Y%c&%o9m!|<(mDTU0_zou*4S?eqsRHk8o1|en$l-mAiY0s62CHGvoGRM6*CI?*6 z8d{>`G4f2VK&;jgV;|k>X8p$4(nDioId%uTp)dJvee@-6jC1!T)3%J1MB2L~!Bhb{ z&g}w#+C;4mFJ7)=eJBCiAr`6Y#*ng?=$?_MSZ`2@eaWq=Qwd)kgr|*tw|>}PjwnlU zxH2Ulj!6|gH0)@_{SB|VW00~o8#~xIb6oA(XV{Tbf4=|xI(>6v&%WN>ZsP@yt|c0i zUZ_;jAnAE9B_bcF9!LRJ%kQk`np^spdsw;;;iKVMY_;Zi-_hxj7RAHHVvZ@Nmwj9$ z{iYA>Jp3vMPoNti7Va3Y>86Y)ccJx}BL<2IT7v0$?xW|5xPX6M>f}Rx7=m(1t!YZ( zIgj~ghL>n|TFRg0{DkHuwQ}<$jkyrt+DYLR8DW{80*3KmtJda8;j$=5-<6yWDg_7< z@1jCei#v3qoh5^1gjMfioWmUY>t7ke8y5bd)9CT@b!poLOXu{%$9Jap`@<_&dXppm zV;+WrD3(C>jJtvgZT3~##i}~pD5V-}tu6yeTj@FCtG(9cwt2+34bvuWQ}{vZ!PnrWEc^ zOL0ETzAgX1=TLT&z|zoVljmTK(yEE5#T@L&-+z6MN9BMfrLhPSViGA{53^z`)tB44 zhHRR&#~gSrVgJzKrQvJLf`RjqCkQFd!JqXyLAH_}yLke|6goi6pm>rgH67(>DX2Dq zK7oTkS&p=d@yZg;WzkPILm=n4e3@>xv~=t1u{Y+92beSn9yD?8{ZS389G5?jsCY0d?|IVYizMO69JN*z8<8ROEC=k1kMaFtL|9~&-t zqv3O3IJV|^<|5kg_IAAnP`#&gTM#lz(b5x}K$7gv>4Yj76rU)LXcLI3V7!BppheESwzKK4mY>3-l!BaS&jhneP5GB`j-Kvd z`A=Dl-<EzBmSzt|?`Wt8Q8r*{s&pR=wxD1{#$D`CR_0tzP|Y z5|gZpJVN`m76^t_921Pive!9RdQK4hAr0biA0L;v%mDCIyt3Hhxn>Aiv6|(;PAgIV zWV{M=jIx#OB6>=9*LzwxJlhvD0>&#(cshmp541cxZ>@4;{G;wS`OJ8@+4+p3khEN| z+~wJ$$7Dm+VMT`uAl;-;>gs>A0rf!XL_rAoc!&M_5dn%wU)n9ZAS;s z=HVX_i^*cjVhrL{`JsqHOU(;O6}tif_f^CZNznRZqc~NBOG$j%bNQ}oGZB>og=ic} zMY6S*8EF{>l&?+{OfuJ+xQ|plVSTrF1@D)CCt-~700qDUl_#NMR;tr^y2t09QquAb zc+Fir?H0sn;wyQYB*G;<;aNufSw-__Dc05Dgz_|VtFO{Go7;%QhF_T9A~H8e8|WrtiqS~FtY{jLU!UF=D?SM%Sp4?u>B@DVU8Y#Ud#uC9o29$P*Q3_oz|TB3^4 zBRp@g-3EDuzuYf1Hg-taPDNtxY)nt4%wm>9#MRvL-I&`C#NfuP|JqYh0^6R8lZ-Ev zQnM+KZ~o{gUiw3=Nd0*#I1(gv@eld=%|^H)_yqBqk=P1UCC(Zh(r~-(j6cG@C>P=u zYk~Ek=Ba#Z#r;ngVm%sLWHrzG|K}+Q#)?(0xWAE^+jUHi`57U?Z0P^dE^{oKc@t z{%LTjFjG*<0Db!17c^IOaZWh(#K1znk>jpf46F2dSIfPP(%rYP)dAw)lU4&Zz544Q ztzAzE*^{OH#$Y_a_3NYnm}rm#5NgPeorS$lttG{Q zO6KzPN1z@lh;UKx#~WZfu8BQAKgUBT>d( z`V+_jgX|7b`T-f{fxBPz-Q%>i$u0A?t>Tx!kuMH>lM#ubj>xw#mYy3OL{FBA>Lha* zW9-uSE=DX{%xx0dKT+`23!7;G6>LYHvmj{)=+<~hMmspOC*Qmw-;A9n7!qsk>*(ObBbc;$9xg5SJgC3USP$+loPG%9>v^AvSS&bC+^eiBmfq zSkd(YXMVl~C0}q6Y!(2B1Gv9G1bTmHJ>f1e6ySOQ0Bzs^ximqY@W?My=RZR#taF-k zh!OlG-F-D~-tqAmvyLmcN`<$c@#i0;;0@#oQ>?T26s1O?- z_8<{rK3{S01=xbzCQZn*)ur1Vg{N@}1iq+KTlV4{8hrf%l0W`aNwtFL0qK%)wWGa& z09@T@wP_giEKSh5745o@+`9aS20*Zjz?ySW1V)4=%ni@*-y$Wx!7I+1i9jSJcFaWB z!jjb7*+;oy-=>XSwjTDvS<2S+B-b4xEbDJ9~^hkdJ8?zPGtFFVF#P^sh@ntBRV zD1}Ov0}0J=h$ugD8bv*RS*i37I(5CJXbyImRKXxZym|RL4WLiLnjg6)#+=-bviX*? z`ExQ5Tc*=T=IjSN-DafV7Xc*yGQUFizt|DF8y>R!7kr2&n8SguBJQz6!D_5LLl(%O zp+it7mX`u6;y?!1rn|RF1ne`>-GlPPT-vY!$nR!VW2yq^p4kzveNj8Vg|^b z8>Q?OvzGwB!*)enbG1(FRG%O}Ao`WwCSEUF35*sgO|7QxaC5H0LirWd>0GESQ?Tm<%uCm*N3FnpF6N4adscSdp? z_ZO-Vk*;nX;@1$(6;d;v;A|*N?#KN@Yy1kLHo#u2gZ1#idIXT;g=-(ECfozLdmnsO zMREBDnLY1m{$Gvg`m@-}Uujl5Cgc(5mlK6)WO#fWBYLdB$0@{zNgr;YyCTNY$04KV z9*vqXTz7ojc6=g9E|C<*O%p~W4K`80NC{+2$`?+^Z$5J57#IoQKG7f}65W2b=j(HY zg92uufKxn@r^mxt^T1Bbip2|s8AXiC*Xc1?rD*!2ba#7KLV-7XkM7NSKrmPU?A*6yqhXZK4X*TIem7Om z#HqJTZ%k*29HYR??)hCU(M;a)`_gc8pI{*)O!?xQZ22ZeT%Zv#-@Xgt6bO+L)gm-? zK_1vY5!C+SiWSww8xeBS3H}2!enBqaxX^*r(2zfR1tf(RuIo>4T!TbXcgD(hJ}*as zGhTohu-o4QTu#4m{kZ178WQzc0FtJ7>6ZkBc1(knt`vDnQeTQog^N_Yl*fT&8=~j3 zFG=Z2nz@b)F++MHV57KKmZgmOjZ(qN)pp)>FJ?~=*fvffm;L^Yj(JO_&K?eWT>r-= zB6e~m=T~^z4kLbDI+WvZc-PUpi0+mN{ompGadA&h`g^yV zQFE{2-0nvqJtV%`F+4vW6;?u>@abq(UXl^st~dQqFOj!A^uXwyeXw_UsuIx@KN8{g zc!1v59$FUG~8&oLs;4x!;W+Z$|U<5-chwpG91Z^=6nmCojL zdI_XCMFG~+z(UjPORY1WZcC#FwP_Z(e9CMnOp9YgWl8SVec~AH7(ICEgMV+W z5M}%1NXEjW3`}4dxMbhuy5O#z++~UPr-iWTBsoD3IE>C!Iuv&MQhYDYBm5{SQS< z?j${b`58N1dV`Jb4=U>a;qbC6-H(j@?|CD^rNF^_pSKfq$E%HulQC<0ceEigy8(WT z0t*?rbkpO}Ff-)%N~BL<>R!UFBmd4ma-jrFWvm??E{?K>MUWVgtdl9n-hXbVL^=P2 z!=p3LWQv_|k86TAV!KrsS@7@8K+7E*}Z$_MUQ^;8n z$ysfyA5yuJa$mO3v}P&G+4w68Jy?8o=1t3;Z*M2#;3)(@SW=MgRhX`DY>N83H~g94 zE8ounQ7?%5PMwCOHNexe;Mq=vo2JlYci7LME|=5%$vYZ0aFc1oiamLqB^1_U_M6gev&$#3} zkb!CLi@)Aum7P)CIF$0R$s}@heqAr-Vgtr1{cZJfRK43d!}OJhYOZ1X-7l#kmsI*I z<*IsL%zCN0BtI5WVYnr4U7votIqh1u80Ii5V&I}0$UbM6AhK~af9AfFal{NsVIUG- zKBwc)9qBnyz!cx_KQjA%vVe&g_cRy;>NzE>!p!O3YvFEi4@&gxeH674sK5!hL3&reM;)eB$e-TZ+!p zrN4}dPFJdGc)#tM{{K$@6aRik18eqR%ce+WxNJ$t*hQ_z&BxE#M9kmzu}z&;@Ch|! zpsa_ovsz{CHHqK~vQ0!t33;+ed6vC?q4ebTw6e-+r98KC8YU(%B4K?(MJAH)qd}@R z`fN~a^Qj-w)%KnKLDA=s*0&Sc@zso^k?_W5g|Y7va_avhEY+qwu(MTfoR5$6Xj)8y zJT~VhUF13(Kl$~XHOk9w(ONg&_q^FwmZ*pR@cV7JVm9u$lTszk5UEvJeF3F4+*MuL zQGc)vt-l)hZSbD)=F!v|;yQI;!E|XWs@!BG>NwJPp{TglJKW}bt#2p2amCl?aMre( ztiShN)#!637gVZhoscETS~fz;_xHqmOPA(|0p+H9b&(ZT%0ufB<<=5{a>6eN!Vmxe z=n7r{DWET~5Gy1kior-@P+}q&Suu>P6atGC!D3}Z_K8SiMfVAd3Ess-#bm|B+#a$ru_7%gd_Di4#=hl(pqm)a5jd6_gc}bX8SUbktOIwbj&AbhOoU^mKI8 zwbTvORE%_04fS*^^mNR%)Qk-DERA%{jnuWYjE(e-h@}sG%%r`HMG}pr>@0O1%(VP1G;K|c?5&Jl4j9{-TH0AzxL8@bm>T<<8@Xtz zClSmF=z-GbHK91OA4~akrO$U;Ri;bzh^#NBqOAlu&cV`oCS1Uh9b1!Fu7$?ol zK&@Cm^Q>Ug1P{WAD8r;gA)>j1zNx#4qNBZ~t)rE_hn1tHt)H0<*~mH8&CbTn#n##0 z$=BT`z{Sy@Y!~X|7U<*T=o}E@@9XXE8y)Bp72+Ek9B|Cv{X~@W(a4basDPZrkc`;C zv?PC5-?)IF75*|`P(EZPhP#B~q z*!?h*4K8LG*MvzH#5mCt0vTx`b!i@*8QzS{IC^$k;faijjH6vA;#mH!YsX?{g7p*s z6ASH?>Mt{8HR0!bh%*iQS~6!oqd)O z-%)>}uabJLGj*ujbGp&(K@a(U^|1$i$*)_f-@cMxZpI942PdQzWn>j*oT`aGUL2B; zUwkU>MOJ=xe%;Bu(h_>X*^+|h+QPz;hQ_)^CbOZVvG`(hU2j9><+jqn&XS(C`hm8( z!OOJ+?akMEo3HmaT<>ogx!O41UtV3`(RThqcYEixOC2``E)EY4_w@}nb`H%Al-(Zf zd3&z*)k;VI=-qR@lcQ71SOsp+!24JmCiWk-k+Le5|AHSE{)>~hHWoP{5Q9J)Uz+JCbjP5_tCUo_gw1Z^L6i+haEQd zVD{)#&0Wo*#Z;ui7JBYR`bW-{nzd4<#hHH~>1X*)+qWS~FpW~%_6Xmdsc}2BA|c^@ zP`qy0$2%PU`1pdChHqu0wc6P#XXCo4>Tu~dk5gMx(mX@ob7?Wo>v=9j~xD4 zU=El3^~(fu#$<{Orj?uc<7}ZebY2U_!@o)@gXHwkNudKa;~Yt4$RsG8%4$kvJXC$; z8b%kcb1Q$V-ktox1gcO!D=hsul_&ouGWD_aua-==<6a3h;pcwK)q9Ji%U#}c=)U;m zfmxP*j(sT#Kr2LrUa~XU#t)(WIi!5 zd0P?Zr6yTHJe?cudu+qMxqh^72YSk;%-r#`EyAGT3`0DsCnCjRs7E0-Dh+q-fxgr> z2^XeX?X1}N?R7#M=A>rgXL9>>c{Q!<{r1zxQpgzrzL`N+SN8Wl zvW`AB(Ruvv*)>?|-RHHhrmvm9{Vu%x(T3!()saU(s^U9we}55lg#fU?{=G=4b8?M& zT-Zlkl&C~S)wXDf7g4G--|NCN!lom(>1^89;HdyE)VN#Jfi>TT(uvUbSyi7c3@HWJ2=5YR}oRM~;lt9TuR(#+Ew{ zUXM_~{rGy|f3rLSM4KGZqj7N}Wvbw5>Gp4_pCia$?^DcF1EgYXZGS(0P`FoLs`!eY zRfDVgsD4T3aPo0*ZI<-=^mXBf0YjotutoXN-z6?0)qpy+h5ME$xzZ1I(`QdwyXiTZ zS8j&=w78xV@AggFr?JJ6zSoX~Ek(9|Zz-rM#qSgt(3RqjuHRehPE zy{Zo(wXfa2^_?e{5IcNaM0IxCa|&F1=Qmu)H*H*6h2|+{mmZ6KhDtm-wdnhD@orKR zPHv$6HPJ^I>#~(<@S6*#{|G;S+9Jm6e;iiea_JI@XWVNs&9y#X6*T|5VT3?~*hXkB zI{T~i-K$Ep<83*+bDTZ?N*icuyJ&bE{c^U@JU+`^`#H0JYjuYAH1tC) zN=Ibs+=n0Lp6f4EUHtwod=g1;SQ=fdzw<6{e*xa@7CW4t{^tSOJ8-W8qfsM zj>_M6eZA9recfaNC2e0$>uFo-LKg0oPLW{!@;!O3t&;V-brb)6f0+NX`@u|;xdxMpz=cy^3u&dFxt*kuMHg%DdyW*ls=eth7N^5_`P4ByKv^B$8{vC69 zMCBqU`F!wVfRrl~a_YAo!UUA$Lub-Q3JO^6fz^`D{ZRSAp~U+tdvQ|Vn23X_-I^9n ztWRYs)(hjW>lMcWujH7_U*BdC_ET~d4!vER`|HI#S*|~%ZF~Ftzhc!@H}{NN#eI&Q zrPUVyV#2d!EL4B^>+ST`t>~7jddeUV{IZ~gRg<_6GRg;83!h?Z@WGu4aD>mxNxvEj zL|@7p-nprK%CQJ6PYe`l+nLt)j~07kVDH)&qCrlsvp#ZO{Ls`tw8Np9e9?YYI&X=$Oi{Z9r<5V0{wpo_L^2gDQbfmXw{-rgaFhF z7npOvngFN+vnVMCZ2D%ps$%u4#qgH8dYEBY13}$UxRPCQ$D~ZiVy)*}h$&kUm~iWFk%f&X)pvxOh$e+m=ZLy@9S;^Ci&`$&)anhhTa-WEZnuA{>P=$v zTOs^k0K#@>!t1uQQsYDq*!VebgGkLCJWR@u&*y;!^@MY19so_}fK|EH5<{eq$7ORN z@+<)K`a%+13=b*ZnYR4Khw7h=)k#>E_bPs^u)iQYK(zP46^j`~1*sb7yzlm)I_NP; zJvrqh+ct*S&n%A$NICyCRO&5A6JMx*7iQA+a8GsRp#LYk7cOC?>FxcsC+m)vgzxQd zhz*mqeW5IQoolc4gPE`Rf&*1z<_d-KfzmlNup*VSD9_2YbY;ESI!gk=1jn&L6cAWY z#WN-3DIZnBB&^%%nF^!&FHZ=C5NzNC90*Ni0pw^uO`lj71uIIsjyE3N470BZ`TnK; zgmoA}t4UG$z$X08^z-)TweT6v-9Aj_wxXhb3gPHx)Bc6yqmv|<<0}1yYDE7Zq3gyP z`92!LkDmIr)oiZX!a3|?paI$Ud97;)A&R>IL>hn*Dqy}m1~{6gO%x`YJ${qvPXm5@ z4}fi%gG9QngWt=Yg4&${cwgke%J$e$n=G66&Fgf%U}03qdc^yLbsJ$DG7aR$dt9_r zU(2qa2IaAQY4n-RhM2#MbxMNc?fjix9^;A?^G*fuB-CKF$ zL=?{`f(#dlHMbnJy}B4lms} z4p4%V<{caBzLvIb1^{z_dN{-~4$EU=c{KVc8?Mgh#MSK2qOpW`zo6J z!W3`kC2r!DZl}9PV1+^!v(#?W(jHQue6^CO7LKCkt`6pA@N&OAKk?F!@ISDs$7gXe zPt@Tyl*|`X`38>l&(>vT3zJS|#-xL}IT%vPJyo(P!P9Y{|ThvF)j$~lvK_e z5+)Y_%3z#A@p2Xe(gijgT|)Vlk+R{yba6olj{*rFMurc=64*TK;n3z;FAV(B;xusn*H2mr~ z|4cW(hA5sV9L4doP(N6=ej@F&(?DyL{R`3Ci<-B?4Wgbw`{B^dMMxMvm9TsC;VP(? zxOWOmEr8+x0hu)c4K-$WOD#?mpeUu|yJe#M0xTKKWYQ-zu(zGcW}UzwN-TF5u=X=Y z7;ws?zSOr6_~;^Rn^>_eAQxw5#86;ynx}_(=olXGqyvz~Mc)FT+!$&PtzAzA+TVZ{ z{qvXknQ{Q{>@{o4OpkOkWYUnsQi^qWLFoe$?k;`TuOg^Qyqj3Dlxq|vdFDcj!Ap|M zGtqB#U=kCoAD=&}QR88jp5*`-3M{?0UgIB|J~msLrB1&Ym%e}~7vh#(-AP-0opWD< zu1JGVI+aZZmAM5W^mqUa;1qYgY+VEB_oiiZqX|L=Ixu0|YshDT=&?OMBh>tC3=7uC zhMi+0TPUZ4SjqBKSaLBs02=)>Abg@m?Q#Fgh_X9z!XrxARIuL z4;UW3;8&P4`??G)P#=(Cp`);kH|Uo<>{AXpfeH&D!=83Co{b`J;RJaDpcoZ4M#heD z8hw@J_oZsZ8_7xg%8>D)iel=gmE5jEA?Z;z#j}oksnxn0`SFI4aOk$~fFEvsi4_az zu9Jd!5lWB_M;^eMOvqqka=r=)tVGFIqUIaYvK@Z{H-EISHPf$4(alOqhvQ3y2(_YY zz+l2Epl4?!~upiQqFN%1PKLB5nlMO2&T1?fXPPMKK}IO_PIJus$~O`Ca5N1*XD0KgLJjV5;b8_#bc5$_|v1 zUamYD3W0kFyZ!9@{ko@Xgf~=-?mf)4OtY#w*%3?2h`$KII)E*B zpg|!~W`gD85+>)kbK_u{AFk)CLX(H=9}j zu9*)mUmp+$KyEsgJ;Akq#J4{owBK&T4Fw;4RtP}2zH)@Y$l!E!tLAR zVDkV$h#1QAPFS?r`G`zNqQ#Y`B$(GWB%*`t;bos;9(N*FL80^Fp=}){i4a>71fS}r z3#f|ao2`9iVsov)30)B^@V5@-x-J-AFo*gpyNN%rD#Q)~IbZ*=UwdS^y65vN`pF<`4D> z5Br8PzSc}%;zKA1z{qI9wYb=QYycW?>ccQ3dKeb<3;nDFAs2G%VAYT_3mnCQ4Rert z&*IPty!0_H`Z*3ZB%u&fE9a=@?;L)yBmBV0>v342E}BeyR$Ec&wt7yQAyamA6O5=E zfhvTeIfTh8%(+WPCpq|8S1Q;P05Kqe9{5XLOuTM-Ac>@r)?bt~Daxgy7y>oo_Mzdy&>k->IUH&lpK5wJ>| z5wB1#tYw>=KtZdtJnR-iYoD9b=s`N-k{2AVtCI6OU6YI#`t=giWSX)iaRbxYb@$hs z)?QDMg~}-i#*lff>%W>?m}!!1K|ljIi-}&sJv#lJ zvPp{$;1QwQixQw-O`tBbUlIV)C*(^KR`0Lnkl$X)f?e(%ZPt-_u?l}NZ2rPnrp@kN zy9i**7=Lo+RicU2#v+3cJk;${y7a ztVv=$$F#G+k5?f~3TB6it>OzxWneevhb129e(b404-lQ4@iqbdqybta=XvRy ztTP#Qhl+mTluSE43qG(uVlTdK?OHVMTK2>=-hS~uSQkc=!nG8M9({vu+|V{6iuJ+i z6d3CtY8JOS4A>JI=9~+6Nq!bPd4-A&!hwv4ON+2aCcR)14wl7*81G#t@!$n`*nds% zB_^hg1YS2=9hE)h`={VWbIwBHsfD$s*uHB9O*x*ZK>*>h?TKbZK3Fmzj2F~Y2p^F6 z4+H?5N%$tP(`}GYhe!Y=3YbB{*lc1pn3!cQ)EJN0ybrohtOc=w34oz@nAagxzo;)_ zE=-6kIkEeD`bX;1M5*g6{FxkzwwJ&Bkb0cA@B!?SsTb*P!Hy)%>9^GXQFJbjOz;06 z|9p1e4BN~$bK4k3E^|$iKAYQ!8mWY8liZRJrA~D|n_=WKk|=3}R4SE9ola*PA*odA z=sr@ZqjS*Zs2tzlet*LDd0$@d*X#LwJj+9CHhT2ikbgxAU3;+!VGkW5h<`vouTHma zCSl%Q!Jljxbp8a|4re_6_U!(*3s%ojG7;g#Fp8Z8x-Lv33sr`#Jd7s^_YBrsld$@c zK#!g>8g^Oh3R(}hEO`!GJ@!lfl}mQhEdk34FQ)-_4uAdquf4w0xK&HLDo;i4=jPZSBtA)xnEW;-|r)!0YYgDbzdS+`HwQkdmpi^(` zwpz0F_8Kx}t+SH7W#6=d9N1?-+xz9lz5~U^jotZ^&vqYJH>77l-&ZU-I8e*e%1g)x zLZ|5GJ(C8?*q7|?)JOIbt`1TJ{W!W;mTm!%1!C!zt-moICSO1+n9HrCk1)6btBhBj zB_%poR)90eS=v55)`P|pErd!QUm!R3PM&Dzp9l`6d5gW2!)gSHWfb6{R73M~GuE>c z0<5d~m9#3<<5SWm0ZMA>yjSJ^Mo?(Bv#s?Y?n-Ir5>9D!7nuXT!LU>?;tDsdJTmjb zmV+)=hBXFkmBI#pqFW?8V6Z*=j3Sh^w&duZe z&xeDq_v!wgkm zj(UKmF!fh)0V#1B#C}Nb9>e|(wTl6ay+T}M{Sg)(-r66X=$j*~9_3@NFJ5+<-W|4sOjYgr#2?qn&M9Y5gq zaXRYEIWI4d4t9mbxU*GK{zaEX7iunsyPSWqb?_!&V#2sXJn()fe}9^DnR{QB$we>0 zZnL^b;R3+K(i4V3T(}U=p$FasGqG8Lf->!9`{MJu*uU%huGd(I84gPf8x4y#o8Y%K@eGOo+&bpyadC9K zc5~*3AVcpf11hBSwmSzeKL(W=Ft_I33}J4cYVI$24WBaN7EPW0dBOLa1ZOYFpG94L zQMxtB>C{M1#le};zaqW;*;A&Hjd**jU0==@9^m!P$?obn7~Fs3`_-Mnw1qD%1xo;YwfOUo%K>`iy07WQYi&;J#$Y~Hs4Iq%*3gNsv`Q_0yGUm&=1jqgQQ1MZg z@x9o!KDl*9=`k>-RT%0QK`xBSJz@UqA^=d^TJ^ZAKzzIOggCGYSmRziqCl~Zun^=d zS95?M3KG08-{qUh9Ay zDM&t1ue7B$%RX2JJ4LC&du@siecDha6q_0B8j{ss?nLkVVEgn(7Y8%<>6X!MHIlLL z=fxiy8%963&wUUqb?*J=*NT!dW~QWG+&zxtACMX(FNl#26V2u=VwqJ0yJ}aet#Pp6)oH5kViv*7}81U2`ZyK#x=y_?4hs!v$F<*@+6d<4f{K;9M9ayf=L{Jy)@Fn#ZtQb~r3 zZ+I1!h=KqezA_)04H>I%GO@7ibMbNazkS1<>Ut^IVJIuw$)XsMqQ2#m#da%l_t>*o zTUBvHTX-hcj_~}z#4V3nPNZwe!3^^H+rpyOs6;$5zZ1FtWl)i4yM*^|!L5+jc?Udi z+>XTTjQDly%*A;1V^2+o)UX-An}+c5t#u*r%CWMvNra6~6+%r0N^W~2kcutsEGrvw zN(6X8*IATty4G+ZMaT#8gyXs~cl(4A)E@JG>|%>U>pcc#QR&DvYPp;KQeHFWQ2yX`v#WuGMUEwW~Af z&quF|A0w}0t284hL0Bv(Zbgc9yeVtgO21r3GSIX)x^i2xdH-gtJgMW3z;7l&+L zP}r)dx33&(il}lpgC#A+0Nc;K-7?{;0%9iJO8v4aD?6Xku*SiQUNzvoYcx1pobg}x zfYNQGh!BIVoRwAD0du?`;oVonw^Hgz1m*H(e}?tm_muGKFnh)Ho#;(dh@zj@|2bWA z%6j3&RB7BuqJd9OJtO+=rd4x)@2}HnaY-yM<}iy;*|~xfGK{dV(}%x@)Ox0{aKIrg zCW4F3An-vFq>Y!dU>vGxRM7rk+Efz4@#b8i<*M{6&sIVR)HJNM3k91mLn*s0MOocP zUv5Sn6KwzKQWVEQq2L=x+%!ms)wWxyFuDwW5K$*FL@w^B3*Js~=~&PZjsAhafW&j| zWr^n1(Aza6#N z^9Zvm-eq^eMJHP{Np%UFt}q@&hVkSUb^3%cm^_N`9G2hvc_V`N`v-!Rw{L-$2%&$& zWBEn|2C}rLaP(4@J&&;bH^)iMsbfF7&rCWhW*VzkFHlYhoXp^A(M7K_7Y!nMkTFiJ zJgT2-*qJ3yPe*1I`aM8R{Z^0qEEkcA5^>(^=J5BG(k2k3&jHt({h2~psE#J=23-?n zLIsYWD)j}N?dzmb3PmUvCg-CD>K(Ad$P%U8xDy;KMG!@ZYky(*EPy$RJo()w^8%7> zvKTLdC!mT-E`Vj(7&RzOwV=m_eG_v3;nlhneL^}E>+Diy&U)AKY^#e+92%KQ|zCD zF3zAM|EA-q)4UNkcSeD~+H7t?W3X)4P9i=d#&xb`nTV%RTaymDOrFQHg|X9Swp_z& z4^5bC!g1?botLjo-tV@RKWx_($c1oEChn; zELIPAZBQx%?XU%-a0l$1A6}f+Spbk+bkXo1$Vz|(W7=UW-mxNtW$!OnH_Ba-nlR=# z;2#F?ZW6IMyf};DV?TYi6oDtOr0+`mQvH!$z6H)|8EN<;K@qOx(fx&}4Y*K!KW+J? zWgm76Tqk9-TB~s8wexe!@~Hvi&jszzsvVyiz(b+?r~2PrD!5B5nEdVd*>rTzT%l-^ zUmSysWRxu=`d;^DNzE0Yl`fEullnK@_0N@B_y)gBmy!T!7~4`P0v&-4$9DLze>1Kf zUN$`tt-iyBZzl|Zpcpaegx-+aAUOfpXJJUcDWcRxxv(7J>jEfYELS>%JBMnAF(iQ4 zj*LH3UNRc&uvDK5ri=zqKzl7lY{Z@H$R%06ES2s1j9c#v8?iuJouzZWF9io;vu$vq zRZ7xX{oFEvQ5fLKuvc1XrbPcV6Zn6TKV4whfHKvd(2HCr8^-a=b8y>_miD-tF{ho5 z{#O4ES#Wm(`Q4HD6YuT0gqR;y;63g38h<%HtMFF|f`Mx*%LUB_dY0ubfB|)S0|dqr zkbe0k>}50P#6xc%LlOa0#sFrkzA_y^o^(VT=qeM0dLpqYD7yFSxObg1`h#=f4YGb^ z`e%&xVt9wJVvr42Pch&nJ8ZVzzF`i?g}x^?OciaLE85U;pC=%J0{}({{7{4813uYv zwA_J0=5)myT!S)88FLmO;Is&l5uPjk+EM$40rT=v32L<6G|9M?yz#NGCE3gIi|o^G zyiJr>6X9ma*6Pb*>7E+5!?vxb(~sZj?Rt--{`*uW8g>#Hc>i2dw0%ukwe4!t>=z2lV{}BcGh+K=d7wZ+U{M zXq#drbFOH+4wE@t^kmQ_i;JSh-UpRkR_&_osf)P>P`@JaSW!iEvS+^?4s-I-4_<6{_g9qkcj+C{PW5J%cFeO++=O;V zPF!33lzw(_$xn7s>_wtSQ_niMa9w+$GebH*Fkrv$;j)^C)=IhJu9QTQ8jBI(IBc{^ z50pS=sEhnnfN2&rr5lbkL@lH#FTQV~)dY#;`q{|>zv*kIkg zC|AtpXUWWP;EoyG-Xn1sN5FCz8P@(VQyrY$zCKfBv!Wz!N(nGXa=Ps7^X8(lZO^vN zZ(Qvtjdg~nQUM(A^{H*JeY^%C)67Ns#4PXH&qQ7H#rQP@q}#wjFV;cPi9(ML4;qk|bs|>o4tDD@%+kS_r z7SL9l&HddA+~o>H+BI6`s0T)$4l@icSD`E^B=?7BEr5z8XMm~yD*L77{okSOb2f~+ zXL_U85?k7@2LYp4%L!cN=g5X7ODhmUS>Px-J&VZV*4c{@4YSJSCK2!1Zl~0ZiT}MW z8#C(9c+mQz<+|dD+Xxb~e@WA3@o$%rC20sdmehIpdFFpZ>r{oAh8XQ*8JR0%wo7~0 zycqncGadl-8hdB7lr{&;6rd3RVpaY|oiyfP#goXVr+)W;APa`jE2v*^6KV&o=H#Xe zFvVjqE63t%YOVjU`_}J@H|-~GkW8-bc*WHk-YToP3b!y(L!U+8vFUgx_Mq50rJ?#WefVbXx+FK%%PB zy3Ke(0goNK^)?@V(_I_5p;>(@`9a(=jr$=D`|5-k9v+0rfckl}A~4%x+W?wBhoJec zpXD6}>%y1=$Z#knMS)Do9OY&e<)*d`lZzcb)4Vff7Aq|ro_Sz90W5yS4t&FsMd0Eh z{Xw(F(pSDY%!-SPH`*rqo`dk!C_Fsj8g~ig^k7~1Y_qj?SyBKyl0g$n|smA{K2zoa> zHLZxrQv&&b6$~~mtMD0w&EcSj+1L+QzX zOdK?+a;`I*>Hv3Nb6K7E=##K zPdV-j;`eRFbi?Mp@{OXhO`}D|BSpZv%8=pTwx*FD?*MQl4??OXjdXTBHRLas3|#2F z|I7TUOdJV7VOeY6-&D_fK-Sz7km%P;$8`?fOj5hq4T5(|3teik?&hz7y;J#N@AU zS1sPTd1>vt)*#a!cEfb)-~+*qF~Ot01&Pa_)rwgwO(s;F*@>Yq$gpV4X5XSUTwq2K zC+VW;kC2UXh55Xq`Z3J#SkblJsPCiqXIBz$@uxa-m=D*t6aDD-*u=;%%l)--2R-&=N>grV-$Mg@;6V;sh@1dXMz0GhE2Spe(WM( zw?)zfxIS3(MggFQ%e__Tg(MYVFumBw_nB<_2DcQLB8162d62GeroyE#8Ua=e*TZK z6$Y5*{mgc-{iKwnJ9ze={S@UMd*h$}N#HwL1x{#rg>E!_l_u4}4;SCZ<^OD*0JQx3 z*NkcQ&A+S$asDG+(mR-Y$9%WHQoAQ~E7{$dWV@ojz8#=nTz@GcPsf7*5P^iIvuiV- zpWob90lw+u9leqe;S|GV=>m}$S7uv`VD7&*u0MMfsyV1%$Q4Q-o*H+8&QE$wg zF_nyEiADvQ%_%c<_o_d1_4DWaxf)4NYiVwu*?4e$|UL7 zt+jDc`I=f%Iv(t;5e_jL*PTo#Hlzchlx00D%4|VABNFi z<{8KQ3m|Ia4!_JB4I54nqJ~I9M@;?UiXf8oQKR5BxA(4FZ|avjJ|oKNP$SR1{M)C- zzsRDOeEi+BoI>?V?YLm)r=+4S7$!9r=)TixG$~1^!&q{T@-ZkmfOG9`ko3~RogdsH zPcX+&R3F75)cVBMTWD$&bv=g)b=&<(%6nRP_~0)yUl9uiYpi_gHkY{AV;G74eRj!* zBsK?WkA{o?%62&(GLXNz zYVo5Qk4T?W>7Vbosk?r^>+`JPg7kt_>T^~BiYk&|FQL{n70_Uo78g2on8RF(GJC*Lz=Dp!f>N6Z$qOew^=UvQN3au{oDvicVtKGQgQOks3QBLqd5lOMt^B8B<~Ep zgTRo3^yR)u?#Y8~p|&qSV2NJpu16zpMYv~}l1om%jAxy&`ep0=#Rl#-E|eEDTFjqk z9a=mnDa&URYdqT!+ToB(JT_)>3u1l`Ns5|N{#VRO8G%D zy3>*nTEYxra{|tc<|U)N@~-82X`EAU6Y57OLDxoFH$ecyIJ%$iXgbAP*;}op^|bcG zr#z}J^w6D=7>~CQmZJvoWyJ*}nxg!a(iCO0096k%+EMuQNw2U(#;h6)SpeI*CP zs1v69h?s8~nnY4yepT(1?FNTD;$HX&?84|^kLHBskpL@d$q`O!$PcEE;}xXA5WWoj@cq9 zTa%&2xue=%!{192GXc_x?%{7<8oa>AUir zk#d}uSnd`34{e`3c##;4UVdeqc7<2ams2@LFYN?i=&hojAjCkSG#~4VhKJ&Ix0s zvBS*;1H833*P0=t%w{BK)E?V(irkc;Bc5nSdtAd0UA`{B>m$b2%Y)q5FSiO$EUhO0 zQ%$!~i%U?8BDBr80`Ce;(>?3I# z;>p>Hy!(p*gf9%3r3zRPQp2-fjn|riy2V!>lozL-sGZR`8E)Vs2=h6#p%*ZAa%b6p z$!y03(<5hI`st+J?U!Yyae@0Thk?I6Ywj>_DRc?KMTnF5_?4;J!lR_Z4zH$-K{A*p-LyiS#(ldnBK!#{O~lR49&N&*emEYGP58}XzXaSsf-$IKO`#jsFRwEdR}!I(Sdf82vQ6$*fvH9Pm=DgKGHjJberF) z78jTHW2Y;S_Q5vv3COrbT@)m;Spp;h(Ca9*6jd(vYxl+S!>ydvD|-SdZ#AVpD6c-ay(Y zr-y#kAqMeM9MiQVt@r(}y`$DaF=?qmI}#qERk> zTK{gsbpnP%YV0H)H-Rtrl^RuO7(|2*?@$O1Db2dOr#t%3Ttgnu8aqdqzCE<@ftJY$ z#!W@GMgf>FcjIoa*|?s>9uYKe(U>PNOp>I7-kWIC1F_vq(yb%$ij@gp3@o0-9c+lr zxS+=RAF~>jT9s`&3`O*a$hX8BF6`zrRA`dL2(G)ts0^^RsWEyHwt!xh4vo*qHym#X zyYm8rV}LKDF+5zs(%wM-8l;(+PXYLpEWVMmMxTq#n}11m)=J5k0#`BQTy!u@bpRO~qn$TDQvIT1#g2NDSAOuIL<(6r^YAD{5+D1vi;vW>>#|9Af{zsLjl;wOw7xZWA17d=Vr@7Q>=r3mTxV20 zwj9G5Mq5rM1wZ(7Ab#!+IqmpC9emFF7_ml;oN1G%NmZc3QK(KzOSbgqV+s&%;`Sfqz1Icq4R z5EZ3m@c6V~&?<;;6=SFOQnPRZHnf>7vgMQ^9F+psvbSC}cHT)y@3M7^bKf*>DOmjX zQ?DsOLUf>8H)$VdL7VRw6vzqkDHC8-o3Z(afeg4HSMAjG=8|}T5-kzsL(3A`?prj> z(I~8rzTfNs8J5u(Hop_sQIt&{Ym7+B!eYA#C1b6ks?L~m>>-}0>4fm}V^-;$AOA_} zSVtdAlafdI?47k?jbK)z)I_XVF?f9bQ%Q!!$jsd9qeBy!dR$J8#>(t1&!T|a*JGxX zuKEbjC*i^{KJ$fk(G`I`17tyhbrlHCgut;*=Gsv1I*IVkuswd^ZCu;iz4uaFBMS7t zGd2VP@7co6cx#12)PY(ST<-}Yr+32)s?0T_X9<<$Ga)me0-xOv6!VuZA;gYFB{ z<=7XESTTTBCw$%wH;Z8RW2sx5_%=FILy&Xttp(rbIQh%K=iuW-#~!88ESH)^Jq(-J z7=$;tkzBToWBqP5EiA;6VNR1KF%mXd=JR*uq|h3q4srpv9W*Ob8;gc5)wmiy52o^7$d+0ZEF}ff=2})&Yp(lBsTq83nZx0?Pxr>@k=bv2q5~ak zWUgh}W`Tf1_pq%jSEbEO;R^Kl&&Ruy)>u+4CcqI#~+P+H=*srUkpzBxrLm_0S!Ci zxFd+06oLCSfRrC`w4Iaw7BJEk^*g^$8-R#iYNA?qwBo2W9_dahaMK-Y+>bWl@hKU6 z7u-j3E@(XmT2Fvhxf+HqxR%Q|;%SJZYFrYG8?nX2K+9;*E;8&FJqQd>*@wZ1Ai@#T z@>bEqhg9{Vx$;F*kz3Lacpj4KJ4BS`%>})(5cwMSyhDR`WxN-66`3OUPZ2auGJ4In zr8`M3`M{+oh|A9u)M-bvhr^~Qj`-{3l3NQ*dM7`faO&(>VbI?IaX`KG)!3tL6U!vD zzbEa5Ya^1As_2svx}C;sM2{yAbV-$wd5S-N7|l!osW@jN`G>R@ub+Lu+$mGj6e-)k zS4~*)jjkP8b)w~FR_9Oi6%DBAYq#qC4*KhYSL*A)^9WvVVDU8&*JLPMQ zs9y&3+_E#$*pAlKXHCiH%E_e~=+Pkv-@bL~#iM}rIoaC{m&?rIk<_Ch; z8$XZ-;Icyg%f_V41BMf7c5{gPLVD)5A+r0sGxpxmQsWo6F|DjTS`$%bNl|T zwdCV+!x3Jv(8A*Wvk_n~1<8xVwNUaFAbcDD*b*A~$*|y@>C`gGIVVZK_t;Xz zBm6QZ9v3+GO6GY>8IWV5*twnDJBb35Od1Y|YO^bvCCt8mUN9}cy?pkEaDs9B_UmVw zM`JT{Urnrjm0HF`Y?(|37iD_bZ>CN{^fYJQoXjOvi#7$)5A`LNCDW65!gr9wsulm%9_>$wc zm4FDM>`Wcp!Ba2X%r4Fws<9t)K3y$ZyX=yCcaNVs=;LP-p3S{^EO77ic;a@cx zrS3jndKUWM#~G_XrKU?Fl31=Z!&%TFlOCj@s|wZC5M>16rrYXLBQ4zzqzt__&IPUJ z_^gb&wF0f}i2g9&%$@*nBWfIJy+MM$3PQLmVjO=yamheXbRr`%)ht8G#PztpsJ)Q~ z-3t%ZbM+&`)QFgMc`k6?Cl5Qza^BYBhYI<+e=~&>8TENkRI>Etz;M#6;cKe>cjYRO zyTEsOAwwQG`Fu{cbQ)n=23@f*!q;iszGz)_GS_K@n_T7s1TKm>i|Ovb^-j8U@b6_v zjrL0^S>W543QD(I+Md{W!13EtH~z_r*R_jQ`KL{(tZBhrTNvjub2P!f@aevMkJ_-K z3Tfz{GG+!p(wu5E0U1qbY-_e~rZhc=?DTil`nW(g3>hiZ#GIp8XRzjL*CMsnr5z0a zzGfBk@}g*#LyHs}A*rvwKkjb$NcGfV#(9^h|HxV6+`Q_YBQZOvccXFvw|Ols7bbrZ zynJdXu@AmSVBm96oLw5%F9Mg;WScnwcMfsOQG^$e1Ja~*&lRk8xt_yCwQeJNT_m|bn$L_Hl z&W#)GrcSVd05b**DT%+wQ^{u?%I2Sd?F_b(6ZGo)CmI{*Hxhk>DH%vmFOn424_sjoch zVwLBz@sHi5D7l)eC}1Z6EM=&V2o%P`sQg~6Qh!rwtQ7nKrq)d-LsY_)GBv9~=0if* zOv)UmWsXDF^JfItI_}U#Qfg|TTbTxv3hU)%3z#yFhzx-l3pWXc@c#VE8b7gLFuGF~cmi15Ie@j5Ugp*6s zvK#d01;%I;G8Db_!XZp1G>j>!kG|@VE#X8Uk91enS3+3fzbN6LAd|Xgn{EOu)J(#?U5kK!)O?_#Su<3G-iT2qA%UY=W*QHiv zORpL4zg932)Z>$}>*I~GB%>*7(=rXa_qrpXHS}6IHUS;__ZJFJ!yx z?fq5!!W}D@->9`@MF)0-ZK&j>?p>6jJ+%ee5m8(p6+ZY+fXC6jCzJBlZtuUhFnv4T zYBePAg9PL{*wTm_6C?m!bE+$3b9^!3*J7rr25{YyLk(K@v7-H1J`7^!26%%?%? z(119T^7~Tx8;1z+jInI=e}0&J1b0fy;_{!ZICI+`goN2Pt#}jrup^ud3Obz1-l)Qr z_|BA&*nzGD&|{$&Yh39L-4tJF4nq0oO^x!`zi+AY+a$g@Kl{)iqHe>@pt0G)75812 z9i7Z-3hc*ON&vHGJ%u5wEnKs7ownsMjeOZGb@0(ZJu9Ug6^ZmfhL&3drIk&KdY_zK z+xbc>vx{K3C+2>@yy?1KR%J-Tc`Q@)P(-92{R@nl!pto+4G z$BDL;GlGI`zKd7IM5LJ~Z7CoazQ)!+3{EYrc3e5Ui2skg#6*s*=g2**#{*0Lby?fx ziyBxkwzk}@0E_5Beob;p^hMiBw^`Atio>ji+xB%ke>B^>-cXDmaNYm0#GtDvy|n03 zk{&{7k;^?(jEj$vcEvyxoylt-?cqx39aZ+>5SVCwISi zXsu6FkHRR~Tm>&Oju%V&NS@A@Kx%A6#4Xnq8M3>By;D5Ag)UlVJZQl}@*kSLC2s39 zn;>NoE~up>&h(?gpq*p4t|sjHUas7z2QbsO1uQ8vk5LAq4~0g~^wK}<`GCIm>Eh3c zrHf*wD@dZ9OOo*auKgN?mku8=X0xd2LY(Wn-~KV2cP5NCTNh1pA#h9n7=)uPG~BMJ zONjn(V9Pe`u)rX1w%MR4H{GVBvAHz8y(vq{FUs1i1W#XVhFfZ)XX~`}yRcCpfp4Wb zQDmJlz$QSf+qR`qB(?`NzCvzr&f$1&tLe&-Q2~nR)O``wazu~}?flnNr+fd6h>IRQ zWuJ#YOqMEXQJByAEm#YHW4;>D^wk(<4%_KPvz;nn`8FPh>~W7|OiswhXRh8=gx}6s zHP}9`aPsH3mOAa7c}o=e^Es=h8q<0ShBuJ+sT%`6jQd;vn!8q-iGNc zLly59w^nTB(B*_jNKN+=C|9`vXk&8Cm^vyZ-}tZ8)>;Y@W5pd5NNtx`H%#9<*TWr2 ziu0a9MBaee#9%fdZ}d*9yKoDA_x?)Ph5m##caH5r2?1$# z+u6Q4u^PbrEmnkPfv8O+Ijb82`YhT25VN|@#{2C|6r~Nm zVo&N_HdA$fFN0ELKN?!(N!1#=B3L%KRLGfRAxlJ<92QR62eqs%YTv@{_m->j270V# zVXXh8)L4o`EwB(01w=)V%i&&IR?{KQunJ>40GhCx3<4%0;)ZsqQIM*jO*CeBVy>5( z>{T@FtU-oCJ@#HrB_cf(S~v}13B0427`^Gb6T+dsNC&dgc9%!Is<$`u2(J!{Ov+K) zwm&Er@S3P2Dh$ViBt7V@M&zk6-g=|z;qP2T@S^7lCHt`KSg!79t_yE z)gh!vcf4XmjD2gPl-hLOXAMpi^G@L-bEOjj8xVp=C(3={+b1FPAy|5DLe!hYNB0! zSOn;E#3|n(r2X6&c~xoz_@c`m+);MAVq$2P)niW=>jygd>#4b9^m?m4kfxLEIK*21 zwDS~_tXHv(96>ezH-4^UI@l%s0@@ZZ!8h{?92D-vD0P8(Pv-?U)KX%<4VWJNv)3~` z`|j2~x_Z^{Er%hKm3BXcteGlVLQ=^?388icQf}j7J0WXxDx_!T2cYj>yQgJ2<30g zi*Wq7(wqXmFOsY$ksHnqyYX4gq!UURCBog%E`|jfrXD;50r+&^+zr(LUvi^G=yN&D zvo^x&bJk0S-rImIAHdkO07k-8h*^{c?K1~-7QTz|{0&941%bG12haGh;X{7XF^Ec4tGQuf!G zB4;^&zpD3Lt{P>hM+T?JyC^l{0{c03Xz_57tAEwLjeBh^o~w#nbBCo(uo$&iuQ|ZKZP?(yOu^D<(rM-8`=LE=6 zAv{!wS(u68n@8~R;Y}n+?1NK-PAqH=+x)sX#Shs3%IVxf&^bj>)cz(KG)5nRBo$+f zQfXyI1XaeT`hj>?c6|}s)OT!lT%V9RMiKGN8zt#}l>~b(wQY=g50(k<%f)tU)9sE4 z|0+zvHKw*>0}`=8HbX`e{Jl6q&Z;*f&Q~823H~d)UJ;Ld6dPySV@o0Y-rKmze%zNV ze5n@yO*N5Rp%*BUyg=%eG4pmd{btkqTo$VHq+_NI6Qs+HgYqB%C4$f*m=LqD%~;X@ z3$q7IhAD0AmJmL@R|UF42#aZ8X~<^Mi4|4_Q_P+bh-G|)W!%?J?c$SkMkxPbq=6bE z$Rf@M{ncW4&JQ>TvB>+Sbhs&siDs;2Qxi0dWRaU*4VIHV(9f$)7QDn~;60+8;BiT89w3P{q6 zZwPF1g6=`OjuH%^;>45`03(9YaRAB-K#O#k1*U`v@DVmJ2t;_=53NsHl2uxOq39`nA1JAfd2VAI>k@* zjM&r*H_`U9nOilc34BHcpW!PtQ8Y1z)Wqr!vqRa8Qnsl~;_+)1qfugcG6cW$ACpF9 z{Wllgy})IF>){^*SClo0jusQGCkz`V4V+yUXL~w)6kjb8S{I*Z zo7&t9VcXt?O#a4I#<44(jnn-#xO*&mIA5P!j!y>4uZxMkEe|UUx1w~9g1`UydGWE=SuBIIX~|OBMhDU zbATHVg(Edi+HB0>lTuYlu|_TLR+X};hYeSG9XsQ zip>{eF+rTn`$W5-6@Re;KLUh3mkNm$UCipW%`9S{%J(XZnkjY731wu03^~Zi)DSCJ z#t|Ab#Ms6xRp~c13E&%l0gUHh;~X{m=4T|t;<~X=JQjMT4wVk$``~D@gxa~#M5O?T zY8PtcSp#kpo+PCT*#sxCmxc04&bCOST<518MDKfO^W&t(S!9y#YzL=yo}=dnAh`gl z2Bu!;8z(xVIvi?iqk;@n$a8=}I!IqTMtCANB^fg|j+v%P!=@9lyG66EO(woS!5#U6ZT^D2krjI` zW@zq|JvB%2I3ja&_^#eDLbYL6v#`7%;uH$+qY=ua48K}&=9sWchYQbPhBV=GSS9&A zq-hm?&?Z=-CL^8`bT(YzG#Qho;xPUluGqw-psSN^P%p72h`G8(lWb`Nb6_WTE_Y+rwB{zlO(2K{Hrmny868|2Xn8fbY`9@1C}}lb7SL%hFBa zxzxn!d7*6nto%ife7Fy<0<6svRUKttU);7n*<}l9!-Erj>{i~@M~7D80!K5hR;LDJ z1$P~R*Pa%}sdXfb#9)ZU5Z5sFOU#$B85xqKH>xu%wj)uaU#e7M6FwE5w|78gv3^NY zZ*f%biBs?M0FVu=(E078Ha!-37EqRYqaZl)#rxPo>=ri`TTL@|2AryC*ls#yUXAV! z)*p&QqQsmF;)qfM7@`nCEQf7Ats=F9!~g&%WYZ~p%9Sz8*x8OQ&4Z*jt}nxbR&*hS zKq2~HLVKPmZpSX%L0)}}N%tY_QccJj&@{)l*c~)&95Ztr%lR{JGFL*HD}OzVWcwfX z>Q7r1vSO2MvyBY`w+D@Drb0z_#C1UQgHA>2zw9#+T}(>mo?VGYSvb9A|E44f^)vZjF9L@WyTgx#_12y2>|Ykc>OoEQ4VCx;v3IC+2aHQ8Di=z zM0mDPR<>p5V-b7Xu4DP@0#buw433Gis%C} z1RC6cnjXnF?$aH#ug72fcj-}^9V-dg5S=6G>w+W1!dlfwJLVbp+S0`*EymdvePfn4 zn~=Gx%uRdDzo;o!B)_FUJ(mp{v};Ji&q$pRr56%`Ys+GiJP#LckeDi)EM90V6>KY|#&n~ECYBbt zfhIA0Qyt&(ZQY46jdjs{aTHteX7^DHpYT3vTfnwu!-0e^>vQ{KyZ%OsKZl5~m2LkR z=40HuGcSH;fpMn%{MEOC?9{hd#dB6Sa4nmUAJNe=G^Rd$gD%L>i*4d@omQsCC-&VC zsL=Z%0&NWzf}+m8cPulF+Ooiky7O&M&O2aYq~OBP7QN$Ia1Z3jQN zf@MKraA(+;TSpHSu5Zf9-it}e3QLw2Y1WwT7ll?@+`eULwYU3H0i*HlQ>xB%@YBp| zjP^@|Z|0XZrFMubU)zchEwM?S#nG(0$^R~9YxU;-GU@YOg+FzM+Pl|S#O59NhiV!O zhWlxVexPK^Uq6Q<&95TTHefO-D4YM{Z>~Of_a~)EhNk_8M6i_A<8~xAg~V>ANcD@I z98)?EskSeCr=fzp-tTioC9_cJxN zKt$aEQWpF0R5ijz9E3a?bV1*FT1vYYKvRNd6`)z?1BO^i%Wo(uY+_6>KL7p7y6?Z@ z;`tX#-Cl;p6OM_6Sb_ZBiIS+#_}=_Yt)uIlY_1^p;BItcpz-iL#of_}OkT@itfCOp zxGQZ-;^Oz%)Yi~$m^i23v9PcV<(eQhi38m-#&Bh@v2bhw^<7V-`HlQ_&(Kng9O0Ct zj@<%9WkU!AWV#L7KhaV1Rjm0>+%l-Zz+&y1bX2vPyLKN+%fbp^&T1!OnQX!M2!b8N z;_}Q|Q&~U%OhrW#a3ia+)UODt_lGb7RWL@(O`|_7MTAegg0euIk>$B89hA7HqtIZ( zEkQETtao!?>VxXwGbSCI`%7;XyuZKcrjq?xv*PQN+36L|Zfp2?A4gPntJ9ym?K#ji!&5YX-M$bpez0wd9 z&NUY!p9mxr6wZVlv43Psb?pHw=+Uh61FAElLsL-rJE8A`qT0VTe5OjnFEXIGI>kSk zK?{xU9^wxuXV_jOI)y;W5~;O}Fh%aL3KU?CM*VUX*QzFl6bt0dS0;szZWSDKq1JFd z@C3JU#)H9%;W2&?nPe04qJ6(D%GC0=JSAi1h(JN439=jl6^H+iqI-{Lvj6`8eqGzO z^Dr}Pj>9nLIpln}HlxX*5pt?VDoQ0(lxjytsu3kr6BQkGrqjJSlqfA7Zb~&$(Ouni zy4Bt7``hnde|bE1x<2pE>-~J0Ex)m?S}9PAiZKYB?PUHcW3MuzV?k?3sM6^qJ)w`{Gk$qV`I2vwTshY=LbudTx9>HIMuB|>Y{;`W zF{zK#qwjT7#ld+Lvk#cUuo^`QF32>Yz^+e|jo2N1z3BzuZ1`@@XB)YK*dQhMcoLqsLzbAjymnP` zb?on%UW4@uXm1S){uBIZu=&H4avvH00+)GhtQ+Teb!2!Gvum#>it!jIXe0Ra|5&Xb zRUBV#xxBMoLI~*8(vDg8YE?j}5e#DkU@tt@u7f0o^zZ+2ITgs} zJQ#-xqS#w~M^}ao|C?3ylMIF@|NcTfUmfiA^x3|J&Q+zm*W3J$)r&`hk@VwL1;{71 z+iN0d3oR}{X2KaS9z{`Wi`Db0(sI_jZqF>Y+&tEFdzH-WL*GMkiY~__=aIz(MV$#K z($IuLSE8%Dp$#}m@!`poiKCMhY5?wKs zR)8Z^4`%q?i~37fD{vUH$1?&4{)ppxobb2Dl1}3d*HVx>f1Y+-Zu2TZsyN{P?5$vN zaf;FBiJJ^wjCY2%hgsl>bdYC~Zfar;k0eVy=~<@Rc-{0xBCNSag>tJG5k5+hblqHb z%c+)~{cOz9*OgSoxfJW2koYStsM9;>7y4i zrqLK%_AbmY(mD2$ZSrCGF5;CU5sbmP@y(e7TeIUrsCt4lR7-J-x&Xel-!YJ za|xaZ?XVB9;Y6ohVv@-Jiq7W!>|sCbTsU>4zh=ot#AY)u)5Sx_BiuJs?~bhKHn;fI zcd~6NB4HXdSj)G2eO!RzP>+zFPqj>_tKB*B5q0hRo7Z7ijH~YN#AbpSYn-5Ki?hv{ zK?Xh0q8-dpbaW=3$7)8^T?U&}Dp}xDB6h*)Lyi2lOIVh&7Sf%z@i3B0IYyuhPe2TX zAEg@|U`yIpT<;)GT}FqRbJykQws5XoS0b!c)@VU4ha^y%QJMdz!P$=(e*SWvs7eI3P`Nh^(2y>M`6Aa_B@Q426Uxy z&^A~~cK!-tgmtKzdqzXo8$?`7?~TznRH)}`MFtsk#QIyn0_)$;G#zvw-KdXcn1ob2 ztVIWCM25wWx|e@LkSv9H{ssl%h08%g|1nw6>^=I*`-rs5)cb~KzsK#^;%=`rvg|xo zj9)^TzkFr7yMiL{OLu=Zj>{Bvhi^Q6j1pqMHAy|CzwKn55rya$SzwoDfj(|>rLF6J zU(3crtU7n8jp0fG!l*`B9)E_62`}z8Z2-_w3|oT}y1ALdG{RRC{<>Z-#>tksu<)2WQJ%%J>UQO=o-V|FadASW-UY>S7 zSN1{VDHyMt77&$mu5IxwA!m>!)MuXDab|~X-OBVAq1k*=5Me`7tF7^aG{tHf_c^8= zQYLjMYTB77$6dVL7wayDTc&Mnee#+6@8?fdTV__hrupp+t4w&?oS49*jb%FT@b5jO zkjTTD^((OHHsZB-4y74uM`v%bW8=eOZc>C)cHT|llejyJedn!QH$Av+rquw#cZRyb ztvjoXBUhJ#2jKS8UEZ4DuSiR@Z%5D=f)W^l{3E&cm=~OQm1B$?QUlC2q=wZ6zcFoV ze{9=UEl1GzmVs3l*u#5ew9{X0HakXPjL4qGub+$bNi!z}Tny-gCy=%fO@L=k9B zE?%O9&VJ3lxBz`AX(Que@y{QTzwaQ|^deauNDsak`JRHgRFVsNHoOn^$T8J_9~`;L zG>eWEk`OCd_+{njw0!jQo)94)6DGjC@y7af6m*)d*wUR4Cqrf^pmWts@Et&B73zFY z<9v{&g%ot`B^#tsWagz4UbK9{yG^=vBEZiWpaRsg!H!1-bU|Wd3pCV> z%-!t$+%ef^a#Mv8{VAT{MM+gB#4+eA=gQZg5fUHRRIX2LAwrAt`gnY;i;>bUY!p`gVtE(-m?H9rEviersaxHA4gO(154Yz$27-k@?jN$Wv9d4 z$tZenBFvKMYk?XNz-a*e1E3EdTLXk$c^6}&gv3ex`-qhqZ~DBqzbgcOFD1kVIlX5oW>0_&X67k?|XqwkxmOCRBUOr_8SU-F-L; zCyALZF*t~PL)%8)kMvU@!?m@4m0+Xu>s0ikgB;=?8r<1@LK>@nCxF`{K+f}!QNpNo z!(n$Rh%Fbr0)WCJYS2@F0aSo4^X14ziUFKXqIc&bU-==H!Ki`GQ-K|*hvP1`QcKoa zpQ-Sj-l}~+JD2*fkls4-Om!%-#j8>r8a0DjK}Q!0Te9=f$_~N}O_t$I)~dVUq}2M^ zoob7&xPMQQF5?Kp9KzL-#(Mv)Z$H#L6q4Ij#R$Fwa7^)&$xh-kvmnopTT!c$_d%zOf{!^hXhK1I2(7!`ThEpaTPpx1@RZbwUIwt!lk^?m$>({Y$ zon=W2nLANu=rgr+)`8n*DZw*nZy4!xFm!>+;k+7EEX0%wF%<%=n+dv8L%3kO-iJZ* zEZgG!uHj4veqD6yIl9}z2E)UG?0QAxyS3M*0*@!Ap^hrDB|x!p=W*-d`^3Cc5TI#3NN&qf7qwE~?ZK(7`MZs$Ww^5JqN#AG1P zjhhuCl9nAu&R@mG21<}yv;a|~JX#3~>DVj{R7}DSG$w|TGGPJ6T7lfcLb}eNlj~(zAjp`CQbpiuIspZ8W=p+fj+0fT|0x~XJ^Np!bg_jO`EP++`sl%do9&jWFp_c z{8XiWUu6~^l{W)*J?`{~*`4<2#O?kB%;^WcQ^<{CaQ>yE<6(8xQLd|K;A#+^R;a%Pv?a}BtCf(6 z@X#^^(pBJ{jYS`vL~b)iwj?7BH2|mM5sQu_&d@+OldJ+hb-*VDK+G9_DQ(S+YoB+s>Y;c#^6>ty4ZpQ=rl`xHKWe)8Ke4wISzP zY=gQ@8?a(ke7b7k5Y9o=mNIn(j3B}0~xB_eY^8P(~X{n z-N;Zu?}C}y@Vu0%Tx4KA(#sgN?hTmUas8}dut|f@$RaF+(IOVURmWinqF|QauHpmu zcTu4y1=d=G%S&JjZS!^*A4dnyPws`ZYx}O<)1@pB^8jF=01{+?o(kx6193u`iyL^n z7pV6I4B@fEAG{9-g2VxU_yH)HK?R(B{L>hu(5)&odfBkckt*ftMiq|OdQJ!McHmO; zp)|NHDdg!v*3-i(a$7z*ZnDNjeeR%2E#Wuq3_{tQX%d;D0C#OhQTK}L#aY*jD_tJ^ z^b&7cCO$|QFY4|pz16j$yCayi?M-RQ@o1zh1QDQxHr#Yd)S6Zz z1)S%@&tLF3?8Z9;MX&VMi=?LwNNf19;Y>SSMcobq2*Bs&4zt!zOP32VTa?3O;R|BN zi@pZ{Uek^hj1`uwK(atG0L-KKi;%+M!vil)y6IKZ*FJseeR4fW5nx}aFq96#_spwi zC8$$f4XFT8HACpt)Ey=fKPzxN-3Mv?RPr9&j1c!FWGiltI3#?XOrK5y5|6X67Q?S4 zj>TUbP@6Q3lfrS0&tqd}7g6a`?}nyz!TL;~P0c#OR?*dym*&_KH>99GEq| zHeDC3oPVE~BAU-5AsNZjyPAHWT>kXuUj>gR*AY!C%t zp9`=kl_0uszQL@0W_KZK+bevkqAs-q*9#aN;!h>#Pc1o&o0%DORzE$VAkV!#mkcDH z&|aH&6M^oX7n6!NsUl9zL6~#4Sq-+w2H4!tU z1KIuPQ30g8ViIa43oE62nIpYcYt5Iz=oTg61dR7p`7GPq-g;|o9Xqj>o7h3ox%)B` zrN(3i^_YYU3;_`Q!jq3sU3BaU7%$U6#&9XIWB6~~L!dC%i2pa1|94+VA;5a);|C@$ zAM)-E5oA2oj8(*yeD0Oe`UyO(}YEUb0l zJD$`325(I1Snjw%96OQs%@4>8;Uo1&$73}sN=TTaeEi87+~PB+RWKrP=9|^E@6`ar zdLMQNT1QtQMlmG!G+~aXqHMPTJtHwE{>|NeH0rU*V@1tkBnD!6Q5f=jOkUTQQY=fT z25QOtKz=TnEyj?Rhbn&x)v;(YHJG1Y2qQ7b{KgIY}wWg6#W$Buaji>iw z^iu6tg`Np z3vvcBof9Y?$-(vm6sJwC+zO+&QMG>cNAq?f-#*w+-zXO9lUo-W*oJIrd-do#THhj| zR9pX_-IhHT{>ul?J-Tu2^DE!dKRY+AMC(lk)N7q;*9^2JtU-&My|SP>>#e`xKq+DA zF@`&7uL4AHIVRRA#B3Ye@hY+R&Z5z;Zz8RDmg}E5MO2o_&_a&0Q!u^D2JZkJx>F9A z34{Q~@<|1V$=@b$XdrjqJAvDp&@R+7Zxflh<2#h%R_6{O0`E#{Y2>9$3dQ;s+EWtJ z#ScOk@?~2a$WE7t#HN=%>OGh_wrNd;-9#n9-R0CgFQPFqrsOZ$8hoLbyEEEzRfDb% z{UhrRWp~y^=5V5sx9{uX7JXk<;o}FxA1uUJ-)&URf~Uor2jzbj^{C6jwhxY9`?HKN z^Qf+5?Xwh@{AT5$-pBTtbS9yucuhWp_H?G6+=x5|h>a%`#F( z|2T%p8bto#4{bu%1SbMeV?q%f=q4x%wBiuDD60(rN)agskP4;fC{>X!#BciYBb{J9 z$4B7k#aan(Q63#h*>_9@;;$>wrba@&>U)v;0m-xg-qngcYHB5k2Y^haBHmc>gU^u#b6mB0u*#c^m z1y47hp}#+T;{^8(n3G4s^0*A7vqDT)8eyHsHWJO$*vMz!A>I>^d|cUUmqG&8 zXS%?K*Q2gEY~A0V6qg?phB8c)#wBL9{9GCNxEAyWa!Mf zb_EPsQbm|ZKH6dW9M+1|LVYvL@*$ruzU#f*BCeURFADTZ;CEZlj6$5U9q`VlUp4*q zKY4HmdiAa*Ba8b7nXjQwrPXQH|E^^rWm77Y=aep7^#wxhu{CJXhGQ9m2+|XPG{0Hi zjYoRQ+4<@vA1YuRgg8&~X~tRS3rjdEOhRcU+E*pSw$A(!ud7CR(l3eUQh zEsx}jsPl}1eJv*@ZT}Rm8*K$c@&Hsv?m4{>Eo4a7O(vCG17;>+6%R?<_FiP^$y#Wb zvMkw)KS+|R0;7o|)O($vOEHX09?@Nc3uKbD%1oqX8@}qjrYgioH#dvP3f;1>FZ-Qd zb=0Bn$M_hVjECFu^NlQc+c5j@fciV+;nqX{44ZY*Pk1WX#I+;|Oq)F#enlHQQ9t^hm*qs~Q~ z!v-rMd+f^qKc&>WjcwEb>*9*d=moWzgqe)P$WsXsPtTq^XhvciZs1!wtc9EH6lc*J z@`Ghw->xRItTyf-#Jj|(Dc6DCuTv=}_Q&3dIWW4?`n48S|5UxNz-!v_%)jNKzje#? z|AiB8u3IwmXA6skLZN^FjJ(w-`qN=f0N+J3f9}h#e zrq%r@cPv}(&9K48O@VsA1wwe!II(fAr$h!}z>)tDVQ-tJLi|jg|xH#@MZ9Qs(1L z`_~-`PImBi?jc%yfD=6KM_m_R{cPE}Jtr*3552$EDfZ#IC5NsG9_@eU6#LkPG|Oy7 zquSY&+a&CQbt>#CT@HM0JYjJwTd7d@q#D^|ZcR|*a)(u>MIr(2Z-lvaL>BVu5sCm9 zll&A|`VS>!jzyR&IvwXD`;(e)y{fHjBJ^FSe0LcX$`bMh6NY zZ<4JM{~4lo5P0j@$TnKdlwg!p1VIXKrp*XL7iyvnJGLPE9tjUwrltFuvQd7;myB!H z+$;|-!XMS#ar{_3)|Z#q^=Qec5$}V;H2V3r4cEgh&YgZ4F_&q$C*|kBN+I&^?y(ya zdLD7>hhEitZM)_BzMW`hyMzVV;%xmnG+!ORF~*s`#_ut#WXU3{vM3AdW=tYU2;DIf z8N=(PU!`ZZFIidt1d!+#C`4wHHl&hbjukX3Pv9dBshw+t%xrRAw7cU(C9G?aj$J>> zC^PVf_2&EZL{PT&B5%t@6NTopSI7RBtNW!l&&EZxPX$5eKiI2+{-H1N2JVmXbbOpM5E|FdHuyEUi(g`wGW2C3^uHUgZ#m7DKb(r#y>NJH zmB-?~9x?T@@?@a4`$C-pwcxicwTDzTylxXA)*^FaM^LY>n;PAktg-cmXYJ*1LD(FzMj_ zxe7ef%O3V}>z8`Lh%H&<|I!=>{(`+>$)gDm=YeawQ;osQMWPBIsQ^7C+l(0U3_l-Y zOa;rG_m*e1_^!OQjD@-*3>gWcF?w4D`85AIvEN+h7_y7=_}r^>sg`hiQN1)u(e>t1 z*VK#4jhx$4jh8)ZeP{~Mbf$|Y1g9U17%VA^FZSv{Wzx}Ed~y~`601eUDiPNj1D61@ z3m*_fo!=)1OsF79Rc)x1SPMXcQskf&2Np|t^&&^PG&cwO-+*LEe_28&NZBOBwkZze zH|4g#d0$L0V<+@P@P_C!O>hM?BGh9d7Z-cEsUjA$#2>ICcUzGgUlN;gvE+qVPkGG! zaq#tY%d@B1=4aHx9`*~{kn*fcPr^fop#rprVSZVG@po)xA`^q{g8^s-_(W*4*E zbp#QIm3hxWfLNNQMd{|uq4}a@XHjyqJtiLvniCN^0K*OmXAUIxBQY?@Cf%U%MgH~Z ztW1P)&xv(k4;+~@>AmidU#;5M+_c^s*U(vEZ7WRkK`dyyxr~C^K$o620!L?s@dR0{ z&?~mh&!X9O2U4V`@!Ihwc<3~H^g4U3k9XN%Lv~)Th(-)Un{}E`v)AK~3YmrJI<JKQo<^2HrHwY;dY}l| zb8#?iBD!W`ux9XFSPHbTO`6k&Ue~W#H^R?tL%VyMb|cY~c>6fzR)|DTnU+ zFhKeU5}p!!cgm7S=B3Fg5-+Tq+lX|9M*8e8mM04eVfh9F2k8+s5ruKO&O~|kGmv8H|c=`IwIinH&>q=zpXQv ztt9a4obc&|C59=-d^%-u{n8k!+m=UuZZEFwIK5aSG1dcy#4i?02z(&+)>(;k$%t%s zI+1bTF7`@vK!wCH*a@>B;I9wJuwrRW2YO2iX1lUH>A&*LDVQSR9)ks;GXqYvl$fZM z;eX@%MI<@3Glo|nVrX^vr`W3#l|CYkosupXf#Uej1fAz^+sI%h#0DTw01D?pK&6y7 zPe&zCQTg_$we@AW$})pOG<&LX{Bmu)rp)6WayPvXbuno00({T#nDJL%i?u5B@1X4q zxCK8Mm=VBEt8&JocFmM8SBkz*9TnXa(x@WOFS1oh5+AA^pU$;HO7C}P>@JXch=C-vZQM7Tzl+dpi}I0PLJanjm_6=;fx&qG<4BkHAVVa&G9 z^3s&&J9D0wYRXGhpsl>?@0za0>(9quEZiwIc-zx{@ApE>8L;jqXg4EG_J0vK1;y!@ zzNy*7(0dP7-?}}Jo(Qg1Vr5w5R-%aSkTdC}RbYAD^yc2WjT1{EjMLCvd zvg+RH+~>{4$j4+vWkTuLqmm3_r!#R*Xv8ug3c){S!Ws*W4@Q9R`ha>_>igK`@9IX00x`#5 zmdnyia=IB73(-4}braEbX0PM_lSFl*x9BYEPnexKuYbJ6%*g_NZWg7w-JYqu+~h3Z zdFH;5v26NZ5tAwPe2#b-^5T+ACZLz4(;;q(P2s0~A^8XgjR?2lK*Ee%_kUWFL<}Kx zvrC&WgRCz?PjID`<|>ZrAx#sg!;Ve#<5HV-iAA2cIbpAZK)kO~94r@yj7WlKGT9>% zm?X8?YtAc%cr(v#JE3x?8g&Yw8;o3>A6)hu#hDQj+Q8sXM``uo=ySk_S{A3PE}J@x z2SnqaE)wT}hxUW}W0z0LjLouBwP)Vc?RXWmchr_oVl|6D{b&Ct=-GHb_qv^C#YQ0J5g7c zy-g>fmQdXri^>w@Qf5DBPZgO{k&ZKTHdUenn|~TfVAfI|EHW1$C{tjUF;w{&wO&(3 zKud#JzAzwVPhlB5&3N7##pC6qclZz?mdUXP}wk?CKqLtL_U=2a1MP?js{J<3*vc<6UCJtVvMYaEhtSiGrmUwAO+JEN!$PI%kL7ljsa9nrOzmT! zm63pSK5_4x%C-&}xk<=l1)!(b~ zlzSxHWNB0mX1fN{aALaQ$YkT~+#~0*x!~TJ>uHBw);5N}1mZq;Qf15N(*NJGU@ce< zd%ng_+v>x#7qjMqloT96Yk&>$uRzf^|Ob#O1!yJxrF= zW26K44LMPzOZPX}yQ8=o)H2~lu0onWQWma7kTsHU(s0H|SqUg*`AajD&qDfT$$y#g zauCaaT;!vv*S)~^aKqlOS8x8RP-<{_;|G-TGmsDnuiD)E?*&LMOu6fj7mf!2y!~_W zgS2Y`ozRMvd5s8K_4{|!Ic)qAbZ6zu zvQ(1XOkss|7KrVLpS8~tYo33ZIeV_I68X6RD$M!(&w{T}zQY#U+|;+zKDz>b#!@0` zoIV|KihSW?|0&a~LloS>bK)T$d_*|bi&yI2@Va#W3=&pKFOZRzB;;>>iH)9gy|ILg zgLQOpO@Pzt}lOzwcZ8;_9att6$OT zUrpaKC>}y?{xkKk((M4lRbpnf6OGnJ{%YJpUHjT9TkDGkGq-@8gCF#P0qR7WI`REh1bA zUO6T*=?7o*0{`X+Y5bK40Hjn4sjOat&T?fQoW5Cs%iWnc`LjV;(O=`Ld))>CMJZt* zMsfSLUt?R>D$AlxWbdEn@Ub*pAuNu3?t|@4T_FKi{_YaTdeEBu| z=GsR4!2_;*ba$sB?@iRw}0%XU)vw2_aBPM-J6GH*N00DD!rThMH2}02mbjMVc<-65yg>JYR%jk zj5l_kFWS>1y%%0?c=V=d*K90h@21ba)7XoPt;6;nd)l&Zk&E6GIePO*X1OWr5!4$O zFelunFvQ)3sn(%VI4^r zw0=8L8uW|mwE^)bG;%%>{&_A*Pd`8~wI6oDF3$N}u3a?1mwNLF^uWb|t2BF0)clZmBe!DP90sZ=*w%_hzx`mxKXH3aMX3;uy| zNy|wfl(zcN@t!meY12WnAeC01neYro?BYp8dQT=Vs3}iIs@X`=*sGL*+?<3Ahr>DX zV}plFS2WS0XJSMJn*vKNW;($KMX)ThGBfrxc6LtlzZDS=qFgso>ewK!+p6hqC z%kRWmOweog6i@0`rt~tNiwwZ_2lg)~dZy>-WdX&R4t@a#r1{GIx0+cP{#>_RgYvd* z9;l~NBi%S6wB1d2axGKldm34 z6T4}C^p!v8S^%G|#BTMH&`fe~$FORugH|RAEJ!EG@BoXmTOY3sw>&=jkhOn!MH|}o zRNDhU->sl*yY?~i+=IKJzYNoPk=^o~6xMoLc7?s`czM{|a6ut9T;jm(Zv6O38W_<0 z`nE#u!B=6UHC&gxO}p1V;}fiaUdOb_@FCE)Nyw5~k!&hBa(0uDj-=^>Wh|~fMOwec za!4ft#Hk%%v=i&rnqWKbq zIE4i>oI{jH^++&!!#qpot*xk5_#n;leX-xx1J%DL%Lo3T_1cg#;~#E+rIGx+?`XGCi{-)O^017};?A1_(U#HZ6=3lae_<}j;$4-*S$he7hF2wwwDiabL=`E)BEV(=w;rm`-@tIQmb#O za&{F+5-A{SSSVjF%p$F!Lmpc$=!NG=%mU~D+RVGMkl#a-De5djRnIB{Cb$FT?8;RkljyN%n6;k{e9ndNFAgGyK>ofknisbx2Sbq;yT4 zU&1?lhE*wAQRI51eY#W$J$`ibNzCXg%G~0$8|Ny$iN)|zPF?aZ z(`tW+aX+B)eDQIY7M9FC(hwIWgo!Qn6RX*qBHXEBf<-=zOo3bd>Jj7;W*?m{+GCQa z1YxF(WX8Yf+T{?F%@q=}StnVm?tq9Kom2=yAWKc#)CRBIVoLKeanm1}>2o~AFJlvr z4cTc9ICe!p#@4EKRG(^R$_^k}Q!7zn|K)B!Zg=X7>e{WvZBp;Z<}34OEm@~7J?~$* zw&H0Rb0Q49qjg{U1sfIIt;%}ZW5FGHcErE)>|p7YbY1tj)!ZK48ecE=Dt~t-si(&( zSz@A_L3%f-DLNKl=}lX`WxO7j>!jW8i{d{eC^!qm0H}9IH~)4O5!3k`{XiJ$A*4f? z2Y}I?y7Nqht*J5HOTTqi@-l&VpL{9d+9J&%*AQ*AJ{6!DX(Wqo+JO8@U8>@cz3)*0 z{`efUfaED#Qpz#Cu1!U|YsNk=qTgDe=g}&>lcOW&PgdTsc8qp96>nJA7Q)9SEw7l4 zS$swGYrQtf61#RG9BzehztSFmcrDUVUVlGtUEG)C{VDU_m!wsnz<7k zCm<6v)MlC~I=-MX=z5{5EnPRAl;*6*i8GwGG*piq5_*ftT;YRkNJ22bsGBE84^o&0O z63dVb6)L#fNT9lT3tgQAOh?ctBQRQrYsQG2CPm&c5V)U-(5+JIJsBX>-0^aDG!0AiX8L12Rn(&L5U;7+*i=!BQR&U3iL}wMm}s) zA2zvl$^0CRV@mI4jtN_&Lg<~^#vepYsjxy7;FG}-32pdd(@3@xwp*{L2}o2~jj1yg z%K;M23M-^r&8S}Nog?wC z*HNfqUDD4<3!;k-HZ>7jp;-17a?AAM+4k{Px^-#an8mY9N++4ie{G}^MR<|U_Jj1w zUS5^-95-AzJ^W9XG+E2BEj#*Cq>ykYhVP|Ge@f#z;n;kUB~NTr1)E4YE}b05B7~#P zpTzcfw6V=}VCe@011nydAIE^ZO{V}oACO*!STOR~3P8Mv=0~xuLu7h4tx8CpLbx+Q zREqEbS7oiON9MF(XAD2B0tf;Cn_!}loIZngREq8D7DJ{q{R?3vekkZSG|;TZhW@u z#FvQYgSrA$hxK(a#?CfcT)82?nZ1LJsHd^;xP{tqS2T6uIX~^d}L%GoX8wM7FobhT0uO5;JZhJrwGoZwPv+#QpwMP@dZTx5P;f zGR5Pr_*H@t;x!RfXG@_V`aS zXzZLj8c%n5BGS|?VwW?zvm zAWwA;C_N18K^%8}kLAtbgSXkFlpMb9- z38SmP?fxrK8oOT!;q+{HUB2?O#eKLy=Sw_z?>zf{;`ARTMAZ@D^Z|eXrQu^X5$8br z>>f*1<%35XHl{I>M$3BwT}KsM`}**ZZ#|8lk?xb?+2yX*-$a&qABS;$mg)VA`Hp45 z=Plb@6WHB!p4f>f*6D#17WZfO%QeYnwxu$s8u6Y8@t#qtJ6F8EK}^^6+$Oe>`Dz`A zi>rb$ZF}#QB(Am1pU>PN7ati6l?e&0FECP}>tI3QfDQwie=0+wLl zA)N+djnIA6-GT^($54Hkm z6muRdk|Bdi`t)+(Q(U#eA8>d(gsjhOWvJquGW-g^YKjU~mF_C?3T5mjWdw%!M0F=c zLJv;bkOh_Ye;tzP{ixPMEt3R)tF&3In5(cJw#S-q-!GSA^(>t>QGPMh~)goo>22GUmX5k`e>Hq`f}yn$-Z$I>7edwad+(?vCRkDn{PyQ-%2RZrvOH4g;ZX*?qGD+u!10l-5&dHT_)T= zcW5hW($x4`!T*Z38gBhuY_S#h-R<$!t>Y0>h%#*cHA6mNn{*d9rW36*KWm~ISE)+= z6msgi(7a0vdj8q|=CmIs4aM6u8-m;U;>XSeSk5gDu4WuyDzuT%#v7902Bn*F<`^2il$&3^bNLagDrwNXdc_C4a4{i z9%q)(8aDX9j!SclR91K&xC)&5=J)B<6(NRQ+B-~@#2&jVvi+@Q)JuXVFNGd_-s){@ z{bbtWjJ0>{`<4GVt^8B3{LKYIlV%s@iK*sKM;+T9BQnktR@8MHaK9;zl@d>%6EWoTBEqF1=r4deBD$-G^yux->_*bPwJnyxVn8aySb${VVH1mTOp%D&DE3W}c>5PzN_u}GUrQ`n7P znAW=&7SFj<3{<^}Wp|jzd1|geG=F zH9fNeZx&rE)EIVGkR9U~Ja$cXnUQ0>K-cE?yv1tqwwu%HvIqP9*ss2Q#9?wYq4}S` zReXKF?^x8kk{I@On%37RDa%jq@ky!+U(Wm)mY3bxQuGJkm_fEO(?m?w? zZCp0jb_r91B19a>o+uhTa5io2-YR9nz@e+T+iT(BBxe#McRMTvz;6CAZrSz8idqcT zJ+Bih^18C#l;$qlnUb$kf@N5o2Uji>MG!E0WZyQDTD?I_Dh;BOes|7l2PH3#ycDDG zFRrY4adn>5Ec}Z=bLM7x4d6fm(w^Q)7m3(7J*z~)gzH|)`b3|^L=aa~NihC#6pOQA z?2G)>CN>iHBiqBwQacQJUsUTFBJ1lb_B3q3a~u!v#N*riwJiLxFq3AeVe@>6v-9c1 z)1>sCm`HFNWK0ac? zncrMHce&aYxg>_&TpgdF(n=Bk*-eM-BWwTmU321s!v&Wsj~p5kAW`cvtNJ$0iJWy4 z_Z)rwCpB%M^C@dBw|xu$RO}v={FSaW!dG@aSN+G&s^TI}Rp! za;q$EYt%9tYfdlz0}-L0&>&e-8xUa}t3Ui-{%MNe2X~>$7vs*__6NRzZ(Cvbe-xc* zTulAn$IqEFd(+Hmn)dCqFDlcj5@uSbMAL#$OiH^MXzW{eC|0H?>VJBJwt?L`1R$h8l{_!&e5iG+Kxv4R6US zBcqXa@qa*=xv0|F3+$f#duj&X!+i}W%2NAHkFt14oyy*n4eR=By*z0SyBGeBX=b)e zoO%Dez5itBo>LRDEV^SU|9IS)*H>JMx4w7S`O;|77V{FbtEg3(llz7Bs=S%9WNjzU z%514k=>1C=BKB4hYmcT&^B51uje_ytWldtIf5!bNt`FqmoklsOxIH$j?9M5h%>Q2f z^Af4>`S^9;-W*R|PB&TU6o*;UQ4z>&GvvTL4%Znji4fQmbUNyL z@mX{BCUQ2wUKBK764j8)SxFC39Zx|KtxbRlj->WK9z)upbA%orD5HhFL`F}KfB7H( zu@NqVHCvRP9S!=gxC|4Wfp~KKhuA&Otmj(ZkmtA}ILgOmbMwo-kmA^QgI#jUTG-Nb z<&Qv{PFdNiW-ckeuh=c;n|>s#DJ(?E)uW2YCfz3pdVG-Tii(VPnNv*pz54yiqPceF zP@dY~B#CdlBsIy8yliNjprE+IMa6o~-o<->@AanV7 zk}r_X;BkmaO=7$imYfU|QIQysRUKe$^|NUX zcelJu!IJ3L^ZjiGm1V2jday5kq6d0m{`v}lWTDSx7PmqsN|d;HqS@rTLP{&;;1^2* zutk8BL&h;KaSzYXYIp~?f91cPzx0=|tDPDHKxU;MNLa*)!3l0*mj18Mf-f>07trG5 z0P%GUWO@#X0PrvsZ>Lzc3;~@+8Po?i%nl85HrVsHlykpBqjs%>p9xFmt0KycCg{u~ zvK#JUJiwW6#nLuqLED_#XiQ#+1vn-h%-kPaZ`o~(l=LiPb?B2vR zn(k@R1Gwfl`DD&SE>R@#BkT%_eSVG=c`byTwd6MMP0$0J=g|*@Rs1O3ZosET3}o%m z714I3;)7s-Wf4J1jfI#Sy8%ca04cumc8C1+L|653jV9olK8LGWr+{>Z0Xlaaq=j`< zt*m{mE!Dpf^D>L9vumBY{#d+4_uKC9I3thMt|?DL7MLROBjb(rLg<7OTu@>+t@XiMRXQbIb`eb|Wkr zkvO+hYQ()+r-zXMoO`*N&cjor*=>1!v_rCzU^W-}x4tLSOsmvXfJA!wr*3$);z2{% zo`T7GvmK6V(a)FXTAbuu4f0>`LqI5w(kSBmGm57Ts&F2dUhSAJNN#xW01SJ6IwRH3 zW(L_DF6h;kBOh(78Yy9QV6+D+4(F_s_D=`S4Qj-l9t}2pioSEpcRHDJrsI@Q(Dyo} zo8DTm^R)e{N*K`4>+aVtTO8}MuH7lY>Y{+>k=xWYHVK)QsK|AM>^b5W)RnFAl%*W} z-tR)lUS>hf9U!C!o0mC_S!i977t{SX|I-~fOUUntG(9vZHD;Sr;j{jl7Uvs|XdO#B zo9{J*)sw#&IN{7@W!-Bs3~w^J(H3bYn7dvWvg<1eKV>8#;5rsft*Th?)p`~1cP|>t z1qk8OT+KqI#Bk;X(W9=17RwgHy8uQ;BpAK#t<)Y^C;hx;o&`L*D}z5G)$uWR-!Lg{ zNU19_ot2qYWohOPjE(&7b<*17xY~i<_18y&zLlRn7<2LA!82KhyDHWjKQ-ix(l<;h zwCaY-1;z{Y`$qmB9$Ol@D~qw9t#AJkgmTB;_HKSi83BY>r~G4Qz2fnfa%{SzM8SY1 zHF)lzTYW>GDj@^5gd|~jt&#UA`Ima)a=opiaQQQ0YNtVz6huL9H z;#mW7#&6Y}WzKgkPkbePk^gx-_-B>iZW2I9LMc-!Mxs64a8#3NN7yv95(II8W-#Ta22=}G%t^En!Cqa$B61;CT&O~731i){iPG}vnkcTB)T zD9i*|a1Z5&g4@J_hm*G(TVJrSH+LU0`mk%-;`+3+%BSfQbUWgo;rGz6ma^qIlF`!Y zEZ6vwj31KiknaHu0gmeU20DUv-l%$({3nJ(cnuRG0peDb71Iu!Qd7ck*YF@TB`~zH zVV)R;w>W+B?DP}*{r9f?KiznyywOAnenxDyXCIw1lfPbl86n+H{nGh)EHdkQ^ckBa zXTY)Dgn%<^LgiR+$NmuId;?$O@L)tf{pKUBnQIlPJkylWk;JMLN7=Q$KHma=F1OQ+`{wR3vfMIQ>@ zZ#QA)#Kp8UFV=Nr;QqMjF%XBek0k7P?BA|RH~VXOKl=#r;@o-YMxp>9&hpJ47GHfO zuox#_8!WakZ`85_jkW@xiUm);oc2q>t;!S&PaI{;$eB$&hR=GKE)ky#qdv<4XJft& zm;O8>o3+u^`$MMf^I+oaq<4a9mh=(H&rqaeJX_o-4V&5m=JbZfx>NlupobVF*Qh!T z1<0W`PDKC*O5#AK8;A#hupINHLjY?l$eL0Ss|0r^u&eY8Kg}Q4!XBG;9drHSlf>{c z95!|vYH3ctD4;?a)ahrfepgCTx=R*0AT;`@5l#fmwt@%d{>HaOOlfguNtPHv)saF- zLB%DJi7BCFsKRC$g2uMGn#dP2r9SblLRIpmi>3P6`*zX=riJp|;B2TH7jk%$n z7Vx>pEHcszjq=j&__7eYB-3n{X@xy{|K&!`bw#yzqh;9h9Fx@mNshpi0R9lV@&^&- znM=ZEN3{cZR2r4pFYn9-yHXTT4%YGDGBL>OGy?0MqTqTcfCvV941z)UDoW&65d9P$ zO*Y=tb<|%E_=jXI=$P@HGa9$PIVblS*Q9%=FpTppeYE)V`=KN zw|5e+S^&gJRofbbNUk7YP@RUKgmY?=ow<>t16U{5!qW}RK}$;uCcbgYZsC($g~xek zt^ZmqmknqxPsuWE49$Ny!v1Ee2E7jBs0D)S*<;>4)nV&VD3K+oj|$o{`Gte5jgxQB z%{F;n(vnAClEPv55?wGSZAeOHhTMccA(MtM!d1la%Lt1 z8;3Rqpwt^23P+ABb|8^IEKi<`KLCxPlBsdme@jmrf32GO(bO`vYpPA%{VK~}D=@11 zG(eSOZCDUit@!$l^okR6{OgKH6}{^;q3hnt)yOK!XF`mMkf&gnibzb=EySGOf-n*z z{H9Ptn-=@;hV-9Gbv3hiB3L+T(jEW!YBp;Z zs*yK3Ctal16>nt0B~ihROp2&dWEHrnZV)!PPh{>5{rh#o`CHc2 zxAvhi+4EiE|6_(-(c47ujMuK+wAPEgrK(C_H*6D!(Jx3|jZ*5+T2CatRsgptR5Jor znEBEIjDu~?N^30Jc)yI~uQ{xk3_`Tj&|_j_y4oL3L2*d{^QwX_SK;Ja0I}u8@d@9& zF77Rt$X%Y$bd_h9rSox1bB9@X{y-~^DlXW18Fke8tmGzZ_9Ov6(p8+O<#| zaHNH>RIu{O>5RWCj(dH_fBb~sif*)9zFe`%@SG@NYpd3D7is#j7@QY(a5x0B)ZV>g ztov^kVhdQ$Tio;E;-RScREMxDPrTPp?~|-7?sI2f*<$jQ498;aL)G{$0i{+>-mQRY zRYbN*lWHyr66w7d)hlSMzji?{vqvvuRIhN9HpF4Zs?HHblyVg@2}xSE81H~|7b@uK zDjZQzIFaX>z1|Z+Tdj64^B%%2ox9sjaQM>u4I{QbZMO^V>P0l~n+{Sn`Ek-n0wC5B z?}yxlxs)~mbrx5dHg`jsrJN{G;rRme*3`;*-*NbE(ia6|D>^?~z+BjvVVD@cxDoDn zY7p8;%vDr}ue98DS5pkoUpbJ|Hf8bN7^^ndT0g0E`)1GgihrVYJUOo}&Z};HPJDWI z-IW6^uZJ0F3S5wid`_Y3Jt}a;M(Z4W9S2T)3ojHIRmZRzrgiJR$~W|AD_4@HRq$&L zp;o??zVv*CswqRY{#j&X?wCnYlFwFZX3AK~+K(ws$BkXK75*OKt4F-M-$$q1z+tT5 zCQQwXM)V>SjL>~UMJu*9G&ldp`SXKS3WnNap2gFQ!;f~v1B61AR_7?g0fAnPg(|dX zU8I3Vy?f=6@{QXB zTbm~+`gN>qKUBV-u#Q8XRORnOG-qGy{(g?Z$XQ(|&K;prd$9bvW|%q4w@2Gu)Wem* zY4>%xqckR$bQRGnyRY9E24|u;M}&&wZj-U^(~cGx4tu_t+>ZCDILMqD!2fH2ktm@8aOnx@cN{70oQ0eh+<`p>Z|Y*C@DsTU!zZ8>Jl1i2x;qlTQ-?2Wm=17zt1Z|h(ldhL>KUr z#>KyUPgHmlQ3d@f2Os-h7gwxb+ye#nX!{DFOaUAOYOOP3>A%qKuY%bJdp2s(`ZzQs zo`FHlgmXAN+=*@0j8rEZyodvTwJ5ZgYa|QQ~6x`10dw$`o>v}+d$-In$$eTcJDKYL$qLYTpp(CPzGUJE*(ijBb|# z{(VhA=%zW|jTDa6P953v zzcYNQVT7eQ2?eMaAiV`(9dOZn7ofLBlbO@^K=r9wS$kWwa^H?Gi|AEL+60s^%m+FZ zzC^U{WISWn?ThcXzuNiR=3Kq-w*W)KNbhW0{F7_;yHWkKXXu;n?SC6o2LGw_Y6&#c zt7~I6oq4t`4j}gnXuAN-LU3o|O%w8w*Y^cbfC@WPz=_|qVXh--^ugNyX)iQq<@Jzx z@-XTs7t6bvq|tnXFi*Up*p`uq&WpS~JDTC|`L5gZ@)qv?BU7(!A}$8YSNQ%mK~r*G zfUYQ`&D=<+@}eRnG>z7R&fHu}fxqBdra*l;p3cDq_&pypTk%*6Vk=1M3+NI#J`Be+ zjmOaV$kwc#PsW4VBaehGNsm54+*7i^baI?~hE#8wd8i}*p(Nqvyo}lCi^Z8H3%31= zJT}pK`tY57+|tW}o+kWGhDUl~N9taiyNBG@PbZrD-yj|F+fx{G@kfh>^|mfY?xs0) zm-5aJHMkrJtZuQeP#5^Plkc~lmH4(VSl3l`3SU2d11Orj)Jc zT7xw-G;v-jDmjJ;&~psv7S8tcydl#a`MZ5rekW-hcACGN5Bf=N>BYo)Wr=d64>hqw zD$MOo&_Epc9c)BVWV5y|rj2dGH`UU)N@lXGxMd^;9FPZqxTL`@v&C(r??WU^+hhAU z-%?}^8V6n$U>%)q~$Zt)7J2(DtVb1?Fk{}QLb@>f*(M-2h z0L^ib6;GM}xmsI8D|UKsvHvgW!zJ^abb2CPXJjN~ah*BU_GCt9vEiwa2e+N-Q4fOE z5t=J`RaCTTVV8KgVPm8V%# z5~o2=gvs`9DMG$|MIxerY!OsJpjZh-!2X42ZZaGNO1}2SHxcC?^HMIxY8N#01Uu)A z10nX}>eqwr{|}gbb}96S^7NJ8t+{Xp?b{8D2Mv8gi*B_HhcCSAIR~$$^*A7&QnLC| zRQQ7R15Twu_pLIcjFpM6&E~NxZ6U4)`zZ6Uz7dxOs&a^p!)U0s~^9sk%ex{u5Y>C z<%+L(s|+yJhlM?s1%Ql0z>(o1q=+3+Zr`$t`Bt-zDD=~u_^!sY#8{Kdio9N8`bNYe z&kR!E}18l@Y>OUNbsdCVy^(>821&xTutYYbTV zmu!5NI8R${O!v1gRh*BJJJEeJ{i}AymU4!x86ya_5bz`Kl2Ibgz24N363F(-!efse z$`^wG+1*1iMDKTotvCPT&2LzYRO%&ubc#D$Y8ht zb=C2@yYAG#`p*O(WW`n$8Fl~KaJt&v&zIdzFjR?c@J$qKz-DELxe){6iFR@hD`ylR z)gMnC5*FLrMHuQ#k1`BUbnPoOIhF4Rdymg@O#t(i%_ zxuhqKY317%8tA@GYv+iH_*GfLcr~ht&}3#vgyxkC>Y^R1Wwd|I@n+=fwgwO4 zebbwqZi1EmFPq93`(2wD<8n%&x`!0hL2$!ja}B)f<(OTHnxddc^sk$@ptbC#cz8dV z>5!&X?#64PTpt)9C_nF8u!fcJr12M}&BB>@hNQ~8r7joG>5wvFWfJRi*4jxUcn9CC z(?;?$ClBlQtz39U_dIODYa9u%8vrOSrHydD(9iUSk}wLcD?B=I-BL;Bkw&6hPWDv& z;JNXhgnjjS_$g*X=bVRRfxyGPVo9p8z~m`<4@6QxO$QDNl2r^@o>bFNsTaxS0+D%p zJX>TaO`srR{vi8kzDP1Hq7=3*#-AQDTXCY@O3|L%7(WS?I)Q7 zoYi`r@ozcj5?i_fa$$!=^9J^!pYCNP>*ME!i7Fx{Mw44O_`of9R=;9eI9=8Wbn23k zeFH#dhH)QjfKB{&{2z+0l~HYPKT1xdM#@|hw=tGP$*&eK*>K?Q(vzEx&KtE7KiQj_ z@nW0EAQ2_HA?S6o3aZ1G(kc2t74#{`KLZtob9e=6eQ?^_x*~ zu4lg>cw^SDgNrCm^!B%X4Kcj4RZzs$z?{8>A2C2#hlJQHpsw9zaaYQbK3e#V^yh+Y zaQk<>-lE0`m)~MqDG&3~+gdEu)6XN8} z1v5d@IT-qUhg!@iMOfIihw~WCpcJ(A-3{7*Xii=2yPt8h{YO!RQaAw7GPY3(^#o?i zoa^i{fXpkfU}Q|;9c*Tg@YUiW5#JB_f)K37ITX(hP+NLa>eV)^a3h01k3GzOTrGb) znCyPLH~RYr^JTb>5TCnG-3P8KlRO6!K>A0F8b18bhb0?156y&^n>|#FAzp`B%mGsJ z;Jw-7n2YN_G$XVPB?;B>wn{Qdixi0lRj{dHO7b%_<3TR{setaCq{$rEqMIjrA`rb7 z(#N{!=*@X{_Y)|S37Wl#3(J3pfaI^rbOSejEB5jU$lp6p^0Rnj%~_WGCuoo@F@tg5 zGE$)u)>uJ33~wk^l3S!?FV{uWy-Bl|-mcyNQWH4jZ$~06P$*PEun>Upqk1HlIBzv! zfig~p-&9JnwP&ReUXfEU0BprDXOSQ%2dB@I zMPBPjjhe%puzfgCtR%OyNDcsHqYT)H?xe)qk2SeZ<67S;ROY@rD+e4-=~78qX2R zIvJP`&r-aYTO}|dj}Pf;bL|pLii@qAE93Tw3?8_JOw>e4h5^sb+SQGQBmWxi$};3U z0n7xXW|-m(0}yI$3xgEEq^cJH)xgnOz^rRw$k~Orms1rj?zyheLpGHy0}GX89VPjp zQg3?8j`laS?Jk-hP zpG_zZAuLwyD_@C^7T`lw_*{T2Rukel7~M!?1R(1RRwqSK(GHUPEC&J%jP}8v5E0)Vg!zSP~W#iKt|;xLED6xzn8$k64!XjS2Oj;5jzM6h>@ zvD1DaiIlF>Ve=^pl*mIJ6!@uD%@#OgxxkaVKh#wB=zAIclZ-wgXjScR#W%H%+t);W zU{n=lE~gy+#i#5MP{8+uI0}Bs0J@rz$iw<+uI-jg&=tu4+ajsAw(yC8Za3p~VVtD^ zpDL#`!Lhs!(6j@z;1KQ;Dbak+JUMPTg0oOJIVjx>QLw8VbOVkQu&Lds##TAx(?N=m zgITC$^fHng0_@`+2zg>d;TUEq3dV-^TH5;9M`f=5S=b~Uj#1S zw#I-DVZ_vCHdPLgm&i#N`_&VWhM^}mC#F>8SnHIzJSEudpDKAG+1@I$-A^<)=7Le* zQt2yAI^qowIkZ`rgTtH>RFetY(_d@>r_>Y*3S=qCcAULOQJfPS+9J?IXHnc#GBn>? z^RXNYI)k6INkaf#%pnY^iF?OsPdexyIY`dMGusZFF{nTERn7Qhuo6AM=&}>{Zp*yD zp_G)j#qcuJrfH_){`Wjeb3!2RUB-rHMKPL{_k*!exMgdLLg{frC z9ZKkd0v_UMK2}8Asg4c_H0|LG2YyRHUFh-}a8F0Ztj_8)+n$~I;C%@nc}8~*t=Fs{!zTmTEV0z`*Poqo!iozyF9K4iM6a9`iy9zD+sjc2v|1YH#PtwJJp zsIA9Xjxb~E1brn+Y~&M7RUJb@s(TvbffS$u@PO!ApXj_^$9`1aRHk;oSTEVtVApvP zhI97h7_^M)=Nm0>c`H*;dPer0q=!_F21ohu>X`!D)I1{~SaU5K*yaTymkaH@(IGYI z^K-IYhzS|NS13(r(Ij6!NW@ciVE@0BxQz;kBE(T-xN!ao3Y#pJ<7mr0AM%+Ks(!sY z{i6+s&d<4I+D*TF=~6-8JhXh48J&-)8<_v97#BFyJpyv6q0rulxCth%fw6@~C#R!r z&8;@&CrJ4mi!9;NW|V=AtYB{NHZ~cfiJj-uwi+|Os3Xk1nhb{*=RB~{{oZBTr=MG&`uyU_%G?E8&mCbk z92og`(?~GDltG&nq_^)Y@q#pZnLSyE>(Y|l=IGJls(tGkNGfBP zlN+)Z_acl?AtSAQPr}Lph5P8a2k2*4w|?bZ(lZ*m@Tqlz)){qqH>QitCfVI^CLRe556iL{sxqu?M>W{+q*5}sE|IyA?S6Gs?~Q3E9iyWN4I=@V({h3=FnDyJ3|wW zJI&@_pGaj~5On@Nv9mz7eyf1wWJ=5hh<7w@+)Y;={QLCq-8T8>nuT(RS^@cRxU(te z@WBX`cZYJYgIF)nxF*!-65yB1NDt*23GCqZ?c^g$f-WCuOarfLxBUBO^JNLSSna<5 z52ql$U|xs80Zh+zQTDP=b~^qaUd1poZ20l|H9Mk*!;_Yoq=y|OC*^#+zP^jH zv^9&gNba7m#5wq$wp?jXl=miYh_;Nyk%T~D9*`Jk5-yD4wFR>IAnC{5r?QVv(egK` zqdPu-G%y`8_``?@`_yInI*RhNO4znd{@Q+kheFFSXpjotau z$z4Orl~-W3$OiPy!468K^lG$V^%+-kZ`?%-J|2+|)Sj?+-hQnXE3ta236V;?H*hij z6%>IKEa`Z+Z=HRh4)545g*WvT@8Ga%%RlbteFL{2#T{yX-a}ZfiK|qbUi(a2zz2IW z)_?%;9)c=YtHN#Vg91(JoK4GD<3b>i`lSV9wmQgwZ8E`2LElidX2HIXr((W)hL@i# ztS&fq$oD^nsU73r%b)D8zFs{;nrXOvCw*e@_|w}%ga)S@gufF9U;cP4|}i-n&Z zvp@InX}tm>#;m`8K|>;V=%&6(i@LQW`&#w=CLN_!ZNU@*UHw1Gp-=KpC5GgH(>rq&_qMW0TY<4qhBAgtx_xzG=v8OK-wBLA zvE7_th&nGFrgp$I^)*Vc9E|xH=asjpggfHmn~)X%d45!gqd8Tdt$G3inf71AC- ztAk;=0pjf54K(8LyN&1hr6{u!pepMnRn;oFNV-?kB7pZH^5|j;+j}8zWe-U9f1EvP z7i|~bp87Df2bBSVEM8DZ4Kv4lmMg(C5nn}lO^*n&hOO+fJmx&sh$MJNMVO1I$wwUC zh^(CI-BTZa)ZJYC&-l5^+kY;<oOr^nrd0>Lhkp6M9*1TFUwHE zt(9G=M}!5bEDBuXyC>-LEl@AA8gD=q!m&8sr@e|3|g z{Wf&3!^+MA!J?`)hHP;S(h({?5R(m%=39*VVbL}OVMBQ{(BXrM zPGHj`96mG;0Z-0`vd>&^el5IVzzfT=srPco>!{8+7<9VH1E5o*T$U>yB=SZ zwZU4=`eS>;eZN^%^OYm(r1n(%K%=zK{pC$bL#&WOr5 z7MJx-%sfY7oLbjk10>PyG?-ynZ_;yYx7_^$ z3R%=(r6?zu@nN(ms?3#I@5NNAD|lN6?Wh z5vz;;A&tN1m|O^7l6~DVo-&8%*rp&HdQch!3k@rHB1)K25=0gU4Rr)rDl9;vonX$K zY%LsJ4&;`-4Tu*3+V7SfIkew`5FT^SeFh=xEK2btp3^d+Y)D(<6$OOR{r z8Ro6F{$99NBb4XIELOyP^DOq2RXBRaB1_T}6O0h`nLSV34B!c1mXbCzXtc;5Q|j#J zMTMDTW-Eb79lM&#i%;8(Id%-?*L-1$X!$lNlF2CwQf?%&2GFI>uTe6a4W7Hbtn=Ap zfLPtU;e3C6XUuEnh4{dQy`4IXVJk=7K{;+-9e8c4+9{5PVJzEwv=6C?I{l-X6b?Ej z7;eHFD*_!|*DZIxxx`SbBRMomG#a$8WWfv=yRGgutDSe<;lac5m=gh(llzZ`M6YB1 z_aoW5&gDj-gM_vB{yO7p!~5SnHD7XMrS_eqsQN{{sUACMR+%T>6XxTa;KRKC4K5W` z1T0BtUNE2F(TdKU8jjC#s;-tz@kKN_LjNw7A}y*zvq2m`f>=;Z=2LperJrl`?hil< zEx5SGeh$@78H%$KF=MV<(YkelbFVYoFTcV4Q>7v#sincNH5KOfm@Peu>rJ(T02t7Y zR97CeKx0>n(CCSer!>pHgK;9jco%;6p&A?^Ox`~}B`EQl8D->m_nBGDS$T8$a<>Co z{TAaNN`f4F%a=Cs-&A~E{jcJ-2?2frE`W@l%&%qS4O&p04 zn;SWJE6B6K<$U-=qgJaO0dbd$mi`>x6upbbXi}Fhz>=1B{2uL73CC<>sgVbVZ{$W& zPErG~{AHa8;-g~S*cJXi@=<6N3yGf%RN>DJmzU*v)T}B*v9Kp~v|y!G!}GAdXKStvOK_l`9> zYVR5ya`;M)0wXtk2r7m9-NWvaqxcDi&q@PWh8(mrKtYVU>~EYVEb;w;1dvkz+^T*Q zFZ&H*8Acoo1Itg(u6hKIW=xl)wFxpNPwfCf->ABzSk~*Z24TOvqy!`FiP?r@3~aM zmYON&leyjBc5gY6;6?WC+&&E{cAiMIo_hf+>YLVF%URFPGhY&W1B5M>;^&T`WG_YD z4a?o%T=xCi<25a!y9y+Bb-ji~ce(9IzU1vI1IP{gR-X#{V#Ky)=<|4FY!#C1uz=>6$nI~`plBI*gf?0CM za*tC6Z1nt?A?VmGfp9|5*B9s0#*AcTHEP6FH;F8P01KvQr#(mB&o15Ro)}BL9&TwQ zQz(Ka|K(iPaRF_nr5+Dfuxx(F7d%Yy(^w-D>nQ{zm%5%0`LJZmca-d_?zvn7OKm$w zZ*+Z~XY>UqS@@!nvAq;h>u8k(+O2fm+rDdGhcU6{7iY``0ktr*oI$|h-;ZyKXhO7T zXVi<=FZXpbtWH6&6eVUl2U@*GN*9pV!JY~rk0X3vw^Q!#&;~}~UBnV8fcLgLS{hTc z08#jkH~5yaxxBf!l%v!-aK5r)JO#fppK4S^=O5-YLLNbTzbcesNa>x_qIoFu(xq z4{5$SM+RCTqKQ7Qz!RXJ-6FUH{9IZZguxh5i9w8;mh4(xg7dp|Zc|4<4YFv}WvnlN zw@&dEDKI|+K;zI$QK_L29hlvO&RoEBv!X@a)j=F^wYeV9b}{m-&&^8o6!iazba>+8 z@dUs^Eg|OtmyTUzsBwBSU~Z}CSUL){VU1sa$|KL0{Rs}haV;D-2O{vsJS;0l6E=u%Qyx0zg zFvhDJP{f@UzpNJ>x(HsaC&rcH?8oPSY||=}`qY)aL-moDXw7$(caR6FZ50Qxh@ z)3p;Z1)@DUeY>r0OVptBeIhnplFFSRg170nwD}F)KV|fQb;fi>s$pCRYwdE z;x4m%{q>772a1efi62>dk}G}LDY6?ZdM6|E2Eby4WHUgD8^Gq~V%za^re2^jOXjg@a!_YKenTibA7ujoK=vC9J>x7>j%UvJE^8V z)szLg@F72-h;29bK$W2buOw%nc#A&aC%+-|+o_;#RNq^VJOK3HLPN661eQgy9TJ8Y z_+XAmuWmR*C;Yo(xIXH? z)&n_VB|yLUD0Ug3Nxgk2M`SjQ^O`lp)?m^A#4lli)Sq)QAVn;HDyex;)EFUlL8TEg zDeGmiVM;_u*<;Bx>O4@CWQhS=H3-K5LuHX80q;-VBla@N_^6Y8W)8##mL&i;dsYw> zPqtn;zAf;QK@Q-o6j>=HR(1dh0dN>=5)oM;5{z@foyH}ldvZol7j}^mg3`Po4j+mI z@EY=R;s|ujoqAJSYDi|<^E7m1Vr(%A!ofNBdz;m@cqPC~!FMsG7_fFOIb2sh%!?E4 zZWxvsUuoXlXOh>q)ZdK41u40#CZ#g71>>cup+GootX<&9UT_uCHuvBQM<3RJr;`fIF+$rI_w zqOm)r(s;qrioT;2{_QoZyoZNP^PWXo2keaKp0iE+{obqkGSO`HEV{CY(s_3orTfvE zo_f=d=2(6dB*(AK0>5A@QXW22Reb3$LHE`3jeHU^04^MXU_OpR##?uTw&cqJFyx$q zgU7|9hR4nwanxzlY8+>)j8DVxlsfo~k)k-D$c2Zq6^aus16x7>GlzYVpLFo*#>YBi zPwv`448mQ+2I?DK-vaAcsuuNXu^|Cmf z_^t$O?<%rUNw^3U(7hVt8D?gVZkR6Gz%OxA;TE!`D_?|ab&D)w7Q-EU>hz*m7G6&i z`>e%t*?0~|`tU=PW+{#XNZrOqxE*7`(<1x;xp#k3gi$bUqUKKj3dU>)(;$I1=yT#W zD0(CknSiz(BC`~nHJoa~$NA=;z#~9$8%P@v@yPp#Z6Y=T`YNF8h|2og5xUg?$P*#I zq_gp6T(9Fb?8;MlM{Nf#MV5*;8W7^_@VlF%XV!~YaJbAD#5k2Frq<#-7qp*W|05cN zYW)dIy}lz;nlLKJ9yMp!>;rkL&l5mxYbh(YBzrtb?_Kc* zq$E}$wo^fdWKc8!(0-0j&e5QCfb$jc^J~et00eLG#480XhDE+`3P&GelwMnVq5QbK zJawj=b#8@D^vl&z=*ZN%RAUU1k^v*vh|_TGV^Q&EoQVR+K1c=IfSF%ls9MsV4#I6B zeSO?Ok7B-E5h5U;7ji2W9;p3_jI?F+KBL~KJdI5uZ+1OxUgK7`epXXrLElT z3(E|6xCJ)|YetH*?MPduKQZ2`v&RYk7<5@5|EU*X@k(EH02*vEO_=pHPu?llIdB`@ z+mU7OC2=2*(c*oGy-n@ynfs__#ZAYwn?GJNpHgqeU#eDs{~ZCPn!t*cF;%+WxbZX8 zX&5ZUP46^h)`t~7D#y#aB8(+o@;gQ`^yC!$y$F%1-5agZ8rB8CR7m`B8?a;Ibq1ip zllUWXqUB$Bjmy?og^B#lu9ysL3~XKha2sff2v)uO7<;uUmgD3w#&k{fs_F5<+a@LF zm+?u7G}DHdX;-pAK`@zB;rQlcngDD_$bejaZBCMPuYRg zZpmo}J#wATa74qQi$e?V74hVKdfor~V-jA~@KFzbHZR(Lc6ZLHmQ}}=gtrrfCyI_l zUvc%UjNkSAUh|LBnA~UebdEu`1x4EEd_DblDr5L<5N!P*1hjshhr)9Kz>Suf2BGIFy*Hu1EgE` zi1sKxT`s2aS|cwO`^238^?*FH{fw{RY4SNJWPp%5y#T`(S#a*@pnpeNqvy>``@bK4 zu9yGk-A4VGAAz>Zq82PW@S^GwbN@)MmtkrCe#YjAJ)bU>2`?XdM-htVcI=OWi=sLT zBb2kB8AVb2t!pq9z&MU06w^9@R!}s3wzQ?}nuuq9^2zUy%PAyPV-c_M+~8fC4nWsk zme}%%P9zhlbh2X`M=qAo=m^5&o6ky0w|pEKwS41`5_NE(>$#hJZCt)-d#UAkC1Rr! z>J14SK@`t4MR=&HXK!T0BfRGabTPZo3m1HIf z$$FDDYWw&FKN^SQs5!vmmv}CPfstFRmRZBW_6s=Xl;w{&ZVB!lOM7c9;x+FRwGt|H zpjC5+-|Eulx5@Xd*;4uF$=V$uu64l$)0jZLJPX9bdjj8yX^tIgQPTcY8xSwwKK(RB zD@0(JcC}%0Rr;wjQ}66s>Ux)0T$so%)%Ps@Hee@vBjOT^mh)4H(ls(Jq0jnvAaX5) zbO>@B6oR168Dtf4E}aGkmIrLUHV^_YZU{3GQb>^x=;vK*7Uk8>F0- z2U|a=jtsG{?syYs|1f53kP^56+fBT+M#abjBwgkHbg4h9MCttVM3gn6lpsT1P>AKx zHsI#6cio@9?E3Z~Z_J|nhf4|U|6@F1kFV_QMn>QlShyxp=fl3esAf_~e1Y-uqj1c^ z<&Z3H)jR7trcf8euRq{mc!1i#?_*t5Ab9W>Q06WhQQ+=3k6@P1hh$d>a^0Snc04tx8tBFco>M@H|Fpuu z!>8Obw`$*P5(ssx2TTz;+<(EXANA9;ZBXV-k)>(?W-J&+WO)1U<%#cV|livrQnRqA2?$|aRd9osvyP{ z#>_UxVw)LqwW+~W$2!g%6-4(KR;=IWn9;Q|S#EC)w!7_5diIaUE^ez`i*)i9l0w7K z2RR3_sUF=b@m;$M-ry}(KCOy$w}@v;Pe1# zobvgx08XbZz!Fd)9UgKU^7yI%eJfAl=Hs)g*Cp(E^BC&dZ6YY8@VN}W5{i8D+PL+P zg?{*x-TAvtnVd+7`&5->xvgk@Ze~sbL)bds^ao0+>9FTZ=l zAy$3E=u^!G_oVL=8du)NLN^k-U9Xq4sr!*yJ|26ake7M+|Nbb|jFaICQDHADjJuFq z5zjix4zwaxo=7dQpb{Mk7O&CLR2mM#V}T{Usw9{rAhHKInx&p?MgfhiwFn32^M4eb zc|276|Hsel`(lhRmc|;ACR>O)V;xHwq9oPWLa8QHl5}P)*$t7kjD1P2O19K@1_`N4 zsVLQuN~Myt-0Hf&^ZRoik2#OWIggq1S>B)5`}wq1Ca|RlG9E-qrP;x+X9#&lF2I3$MM=|fO@|xxfRL3cg^5v>c^A1m&T?pqQfW`R*Kfn z+dM&uOg787h8&+e33LLSc+~zXIqULHX>fhPw(R|Wdp=a>XM4om-g{ymr8&8nHienV ziMK?15n}BfCab8&=zSqqori{;X%-K6hegz8+>B~as1*}^zUQeRljtwMh0c{F1i1tX#Dv|vi7Z&I=3ShcC?(sh1rYd zZkB~#>{=&2)SeLZ$4*J^l(2N^cWQjKn6FaP*Li!|e|r$VnSC?oXj*b^t-rp35QgkI zs@5+Ks1<@JB?v%oj5kri1{3$ri&-@?SL2vfxFrfw>HBYI9pV?lIoKssKh$q|{~N67 z3!x*0_f=jh$Q`SY#G~=ZaX$`R6RL8eh(RS(`E_7G#F8ICV;OYJ!9_5YVEYU?qJF7` z3t-iH0h%vgl#7wmj3e)pE{B<-{ZC{$bO#z0v6F=g*ed<6dc`5NB+*065l<%ZLL3^wTE3z@}&=?5baZ4OOQ%m zoY0>2`MAoG5av;7oAGxx#*kA*^@E(Rx*|f(Z1QT>{PLHyoR@-My;m9Evmh``Fw5Im8XIroIkE z-QkOecf!S~$%v7O`Mf<4!Rj}9g$-krq)7s--_Q}`_$xWi^C7CKLL}w5W9l!;O(jU; zxDt37(L39cG-Pty2$V(CZL#O>G^b?D>{qi&<5 z55QjQ5sdDJH)oc=E0!A1I)Hz@IhB2DnY{1Xk>{g6>HQyO)!tf&A_E_Sp4lf;SJ@8k zXj32ML6{4{VNLeu9daJAY0VW#Y=yGpmn3N84z&T}xN(yEVhAfuA;vgE2ql3VN_}y2 z6-SC-ay(Efi%HlO-%^}$p)O6Fp2B2Hk#0UxSS;@Cv)|DqjtZ4M3FBr>aE<@SC;=#9 zi59?-p^c#F*8#cL?|A&=VnIi1{9YfP$cmqADJ`?;1<+l)i?)_NF}=vObOv+IZ&C7l zEk8dv`Iz7*^Orik{b$F#=!L_yf-;*-{luodS5O|K-d)-C-d0ondjDo$ z(nS~u_*(#+DS0W>UtK*P{Hj;JW`U;v2WGS)l@~{=G>O8nvBn_C%MwJRWsxa7S2VN& z`~e#65m-wZ84tnv)H3NEw3`&SlNo%f|vF1HI;1AWI% zP9Jw44F$1qiF_z{M)#;p_o&Wx5Y#*EiXM_drG0Fs97{i296os1zVd>MX0{-POL>^+ zvhUxrYXlJSr&rBSFD!?vXNgRwh?$dt7%5mYsZs+AD1*R}D9IDc-K0MtlP-BIM`%un z9-kI%Jt@ca;vD13&smjcKSIDhhO{b~Hk)MZ+_zdEqEDqbd9Ca+Gt5USs@M7MEi;{p zm+iO%RM5B(;zF{yc7?;3)BKEp_CDEvZ=JtKD9v2cVP45=m)M$>rlpW;P8ZT)@&YH1 zIRf6+WR*Y}-lAdKn3}VvQk|-V)pv-_l?R3PCaDei2@Vf}C zHYGZ)%G|SO9Qs!-Zh}@ji-^(nVbwC64OQ>JAWD*HmE(bufqHXDo2LE7yyWY1V#~<8 zmXnAk0CX&LJNHOgAI-)3%QP(An17r#d$|e+`PoMP9HP*>XXI6pTandX$J3s#6Fu+v zz4hHJwg^DIHF#x?tg`sT-Vtc9WxhW?_pSfR0fWspdi2Lmy730=<^29X`N!)FzCtzR zA&{gQsCyLLKV*Q5!^T&Hj-=iRw-K=?hS`21Y&3W@`VMw(=fM)k^(B4kiwM{rq`- z%vBHPx$5+mJDv2xSXZTtfVTu2UQa;(rXkeZafRO-DK#Yg6Eb--_voL_isQxkid)MLrN829-?Z@K|tRv6vAj zqAntSWXS9f@2gg;#@ldpoZ+Bp(Pe8fvk}6M%Iq^ie3%T=4k@dN^%qqPK;-K5k;Xq4 z6Kz2FZ>fhDWBg=WJi;)OfNEGI7C;!_`!Mz(((Hv3+{*h?WH~L@^Gdd?b)ukUm;tVy zz4(e{A4zkZD_+Z3v0Lf?YdF=F0OCYF-M5d%l0eGvXG)SwV*CM=-<+^ z=Hi=o{-2^w7Bo~AM8vzkqMCT=1~?x3s9rS6T~YvF>GHr#z4(*vn=*@Qlib5c7e?>D z)(r*@w5X!yR6` zebyqvWVkTk49qf~044_k5*cdbhoE2$1s+6!_w+bi9R>G}3qo1J)rCoU5=UkBW1K$w z()9jwIx^BvZre|}hGic?!bc9dXjzevE$-*E2D)7LnzfD;Q|}Y^e424tU_aHOHY^n^-B?0ykZV{M{q@Blac zHf*}#%lATE|Fr!OSYL)TNfaAN#a}b~>MtTqWUk9!`e5cLRq^-tjD2Z_mDrV%eOM#J zK3frE9{QEW)nSW6G7+oOMbx>oN9J+(6fnFUSlKIsX@Gb@q&6XQaRjy0o?Ff#*_;tv zyxg+4e^YJ0sxv)){#;i98`%ai&0PeyoqYt`m6UQ3?6-)x@;Pjs)1NRfW9yBgJFnDV z`+6xk`M<@~0^>IQw$q$V75($)OHTwY+-^4efc-MKWW2DM5IlBxHAvR@35~pnAg90j z+_CV5RY0;&qbg)7A7tmd097|{l961kCGXqSw6C|^4_16J{@G^-xNLqO4oU~J+YpvhGx%*@)6QVJ7r9b+oS~7iHJ>`MgPDMi)SSrBpvz%_T%sM>qonx7~hE6(4ztp;U?sA_! zvfos^dReYDD%YGlOwPvaNe?Y%|Mey2=;`V@Wh6pXA3^^kquF3}Udxyjc|X0@gXfx; zA7@z4_o;DUNU>bVtB?P#GpLK6$480zA zOvbwQzcRLBcOlB;veD%*^Jl{L?^Xii|&Ik3pFic|j&Xecp5?lqzr!*<=kL z+^S{2PRkOh*?bR$VZSgec#g{g`qoJE_CF*1qW7)Q6dTVwo_~>tZ3I+)B5dTLnzEV| zrkZZo6Qe~DBb|F6l4;Mwo=XvY(|Vchb}`xLMA_;%lpPPbx&pxl#M&vcYgn$q*w#t| z%i~j`)l6i_YccIloHHP6QIny69y)$w#8JwLSuuBFLnXe4@gav}ts}Un?-6|?oHCZ7 z@Q9Uw%4CZ7*ZHT4cV%k^Tdj6`+d((>Tk^%Mne>|_vC02g?V5hO%L}Xe1Q-di$v7+0 zniuu=zCGXB({GclJz$r94xutFySEk5`Zl4}4SqsMc@9vv5iu8|=VahNQ z7LR~0WKJTOromcm&u09nKC7(&eJKL02Iv3r$UMRD+4h=s-TUH+H`uRchJV%{Cv%RE zVq^&Y#OqejH(OIr>W9Vt?>mwzKizKqRQscIcV*)4eNH*f{V`M5b>~)wG~48w?TEZN zOkn%9;C(g@u_D#zpOrFs=4K6%d{v5CK50 zT{=Jq5h+ld&Hb{VTJ*w*X~f1^0G!Hidim?nxsM;#7aMP_;3RNTWRJUHa#PXkXOYlW z%hRRrB4o;8z1ES*Rsl}cHT~6<$z;bChwNTj*4sc**Qd`;R$bfb?xTjyKpjx7(APvTcbL24pbnkRA#Sd ztycCzDDz$Zv+QG|hgSGtS}veG2|7kVY9O-mwA{@N2Qq~MIhTu5+8oeE*S({<;Z%&mkiiJU5ls(?a>o0Q4f!AE58=^w&lX6v$+>~&nz!I zzi~FNb@Pp1KWAQd7O#JKI_Z*2)_K!Xz5Yz*c$G!NRt?=l$DLp3gu0P6ZDN3#Sr~k< zxsrnMu(Zq9ckcpcd>x3z`h4jM&l5R;9>``LL%R>l!}4I@HG0EIhJiPeu3{86Bt;|> znI;u~^F=yDwxN2#hpymI?~59#6HO@9)?qFYtHtBP%lnko1@-TqKBn-~(3EQa!!5ai zx%f5P8bzsCENE;905G)|twC4l7diT7tf@{ZXfiBK&&+JBveAEEPs?ATH&xNoO-vdp z9*hk%FxrS>hKbD#QiFZewY&`mg_;Wc?AB;pe;g8V;mm?@Ol!}lo=fYKR-S(N>oi)U zMcqx!C;E!5+QDet&~bL3r*0y>kF>rIJ_z3eY#5MCZrgtEUf^{0cB{!-03wA|{nJUZ zWp)Tv;FENt>gQ+7XvS5fnz%Ho3BQOL0=Q?K=kbq8d0YGNn?a3h!>YWF z$GaMay5=0shkMV@dA7ON*IxBlt8uU-eck^C5a}D_KTRdzgyy|=o0IQKRQJS98HDXU z{-x`>gR@M4omm>H>GahVh2?3~4)w!HPni_?I_CZ^pUQ63TnM-*m`!u|9*NA4f-|2h z#jEe@HTvNeIL=#qU3YJvz%q#_3Fqe+Dh1C15n?rtPyn1`v3H7%YV-HAqGIv~OQSqa z>h={zIzoZuDl1|mHR9=*uv~(C?{uWJx_N-9%mdcqfnLS)_=fScvQzydC-$B`L%zDN zE8E^z(7U{JiPOKT!~AYqXIwVkdcbr$ZL_I7{$9>={gcHno2CqYzRXy!tqR5gqO^@k z9}efavcKrryhOj_MvzS>OW!)Hyf*7Hsveq2V=d8ZQ&&MU1bNG%k^ceTqw#31s9pdG zBSXikAh&;qT&&#tq$n1oKq?J!spex4xm|#T#~#cJc8-rdl94K9ujsevq!aaJB2##t zych$aeLN9k#C)Dnm=4Mw6KK`MKkHko+hB7rXkE4DUq;37IIPPkzl*hA@?Zb=zv=1o zLs&ceRBBwK6YoEB-_2ed;iyu*dUjiN(fnANsrBdvu*B)(f)wX4w{2?y4{a7uQ?^s( zi0ZRRjCBIS?ZEPE&P{2J8KVlNGzJnM{7z<;a8hmHvC1zU1-Iej4F5!*QdR7VE#coX zBA+Gr$hPW%5)sWAK*I`oZtW_Bk1HFSBuP=#+I;IEkup&a>=8tvN#9fQ>Es}u80Q&*$MM%PICifia7?c=Cx=LrqeW9faE`fA`vW=a`< zmyXv(UtN?M05~1hu~A(aD;&(Z8?<)mQC7Uai0P+5<{M$G(u|x>>3-4t)v|`+kTr1n zsta#9-t(Q5rz+m7loc3cbpBQ(F1?I!;_5^|x$w)Cm+}oYs~^p~ibz2)Xo<86dWUK6 zbZ|BhL~L^6ZaqYN{^7ZTP!iHdYyO?g3X@`!qytBsZ$ns1K_T`Zkn2cCYQ-UF=^ST6 z%q)_X&Bnc3hd*&hra^C)(({Kvz)WV(>($eGD8=d@;MaUUep@|Xl1A5s`icRzP}`i) zS?yaLgMD3Yr8Ayc8&VyU`!J1Tj@YCDWN07gxcD5nM}{zF{5!7o9z!b#%N!4XZ!y@-%|LcIoj(LNtG>gw9DASVoDLK`K7up z0u;->8mAHo?9|>s*?BCKWgYKqT*6Lui*%wNW8bE0w)HV?J3HV3$UWC!FHm^R1qVUi z<_(=ax7VpY>>9lP z*Gx7OaCw}jLF@|gEjW7G0mCVaMkZS1#93-+&6J$68%;XX-tcN=M$zyz!t~RG?z$Ga z=6yLZYB|?=%EuqMo%!UfQ{>tk`BP*!6!IVL+k;` zJrFiXM(t%I3)_%oAl+3l%2FT$Wpr&Fbi+t9aYe3P3!-3BB7KHn4g#70z+&%a0Dv}^ zwIrox_EQ%D+IIz|M~^1F&pMzmWwY}2$6KBtPXMgZ2<`)+$STFY2nZaSLa>f;AA=tj}wGisCm!t{WR)LFDX)N3MT z$ACm9sF^56nn?+tWChhyq&Wf2>8J7`EK&yt0*qeB!Ayz>0_jKu+BXAILTIv4jtcCc0BbEPF3I>SCB)Ds%s2%9WUvnH(X!K* zEkahUeZbY)QDE?rs~KQ!xFP?)F=|G|$B$Uy-mz-n@(kK%{v~WYXlT*~eAw3}(j3zHH;))s<>&T^uP}t0;Vhwxku?cw?#s z%*jlGGeo#0^<5HRdybMyJ$mP(&k_5Oc!hllus5DZvo}Kz`Op`j?YamW2-1!!nphB~ zSBSRMBOr}v9#zbC9V%9?ZnloP?NCsQ0kFbm1WKtl*u)a4j$gDrT8H*C8sN?Tp|V#H zx9W@{hpeB_uI|-G_2tsEbyWLw?7=#lmm=8<9m8D=<$vCN%uP=p3t4k)Zq4a}<2@A5 zpNFgU$F@!m<(e)USCYR(`aQ}eBSw;Y=s8Sg%P_Tm-);lzaAmUMh06GA|uJ$8)h}dqyR{xV>7ZpmMOo&7T z&72J*CUKFnra%RajG!&|Z(n57D!BB`RgM>QU;-w=QI~r3GJ^9JS0Bt`R}l`#;E4sM zRodXVeZp9zzPU%f9s$cX`UZpCbVt*nr2 z=q_Aa*JIl72hG?F!{gbJ2`H4|-{U3%cr%DPe;B_2Kj+~72eoSx+8zHJ*b+QQ{eF

i3Y#t&+!$Bo1}o$} zs(#PYQ9?yXBpVlPKr@55U)hX513P$f!h%d4Tjf%+ma4}_Ty>de2;AJOm}mq>(-~L- z=y4)erk?5oJEqLi<$(+VyjqhWxp0wmxuzTy9K1kr_T8ZVgXfzT-Q zCN2t?^&)1O0$CuV{}G^tGWy=X5k_l?JRN#u#`y)Q>mNaKAUf62!by#dcn>j}FSVMg zbO*8thao~Cn^+;Iy%^KWKwfD7M8DDcp;a6s`9LzG?72B{_)CXPY5I|ark~0}G!)irU~ zqBq}VAcZCi6XwT^^VTT^)6v09aw;*whPu2CwIB4p2oMiI*tctOi)^|sf?gq|pZ}ZL z9?YzUm0(p=la+{^b%8~9&0DLHOL=pvt3~hu> zLVrW8%3c_Cse7lb73QH?=Bg^0wDhG7TCKG&WOglU;nn;W&TDCQ8|*63vi@S|6v1qG zxJGHCxJEw|ID!nnl4Jfe*s3krwku}CiK0irngT{F;!(`TPSpULQy%;@3+uH;G-PJ} zS95P$Apvt(0kaaBFkQr)%_*Xx_#n}+Y5%g%{w_8y62J|x8CMaSx(MQCF8wf@TG;7! z3_)Xk;&Np=j_|4-;wQ<*`2gKjzO8G-dO8iE4E3chvNZuH53^fk~Ywmz8qR3&^)H6Q-?1h;)f+t3&WBIMtUK{szv@eSNMZKT(4z;IBD;|B zCqoa}AOc33&eYphIN8RHFzHeB5;lW1V|5Dkc6J;I3eYCow$Q5xP^N9Ibgjn>H2^?u z2DM_@QNFK9%lU-;(xsl_gPP!up*LFw38_*fLSF#{I2u&5=`W-gq6{z$Hg*<3Y2vxP zF7yl}0x2s4Zs-TxI|QrZnyYsj*}dWEXlU-LSyL)|EVA9GREd?WF!z+7+hi;DPv|&_ zQ=N>2J1cUm4?K_gvMP2%u{);mII zh)9eL^*?cpW=zA*Y|#-hH^V6YIwlcuK=K|(hD*6zS^;1aC`AS;VD>9@TP`MyjrGOQ zqGZ@w9G$3>&jB{-$Mkmak&;@tBzE}spU89@dAwEi z*cFq@1w(ny8vNBAPA3$FPigHaHQDWgvwj@1!9T5XWz$`gBUky*Qb$F^o9(E;AKv>X zj{U3br~D3H?cfUAE-^a^t7nuxvAP{)e}BJ(+{{LUQUWAEyUM7uGHjug5c4;3@8RA7 z&q5E#WN-DM5lmb7IlwfF(Egp8G)!yZVoY?1NNIn5^tJ@cNJ-#KXz2LP;vfp@n~~UT2x8#_|+v=-B*5b&oM*Ptdz+LX3nyyp91JaQ9_C# zZIPWlAfx?Jgs`}myzj@1qG<;daH|n*r2xW4%FJJHfZruTAltc|jr(-vFQtm;=8%eu z)%yBSvh}}8R}qsJ*@#}xqz%`U0I4(aZd=c{Kg@yym>FsU6#3}iEo+c#C+moDl@EqvK>i}*}aUmLo7V$I*hXb~eC z1pY{w+Y~SAflezkQp! z=Iiyh7lNPUoarbPsV6nei|(Q}Eq3NzNU=CETyybP>CK`IothP=HAh7L%aG}wu;fUa zAN?=YQcEp#sgf(WE@S+-ezn`BtnxJ_(;i} zT^QaGq7* zKJ+v;dTJ%mBI5qj>)nr9%~8L0=w^rdW-qfZodek#YqU4!YK9Gc zR&%=rM^NX6-WzWD9z97Dqm9S~L-W-tltG;%&cCyiz1#3s*_4W{1C z_+X;v94Rd@Nd9--K`LGX(2k;I&e|%qY&>ujoV`PEySL0l;@AQJ1+b;icD#ODFbZLB zevg90Lvl;8bk#J5W(oD_)m0Qxb|1VRe&6=!u>a27`{1=vbNiL~otF2&hhF*jm@CHv zY@bQ4&4tX_OqE2;`dQb7eTwYZbw2RG`G5%Tq-$Gs>y?CorG{086;bu!5{P)zGGv5l z!qXX|Fz0t5q7E9KK*Wp$w{fwf;r!{?`~EvY^eWR|avT-NCs}pRSE0;m6@97HPY8x1 zbz(k*hfj3=bsM+FlFr3E-nCPNZe8kJjUvLyStJ5TXOY*aU9a*{)rKYzWE+DIZG9fR zhiPgyud-M7jPLNrC`qRPYz{wU>+TJMyc(;RJ?BFAj4ezS=e@zYhV?kDDxI^=TDK!N z)aUU2sn83-9lOGJ9ba|Cc6oDmj=l~b1mr4o5JKTMLc)F(O92bZTH$Tr8K2~FQ`tU? z`w;|sjWq?XoNlxeK2-45pZ^rze{GrHr1&Ry- z`0;7=Js|$PSGyZNZBCa!uvF8{&}R!dBMI{^&2ESlh7Fjk{Yazl4t`bd`)1^hJIx?) zHFw>N-_o5e!P`}Kqz9))((ZyN(}6KSTL5Ct&{NnUfGDt3-(M_Dv%0!L^@zJrnDUUc zkJ`|6yXn$Vmynd;TVGm-PuIu3T6|!$Y^8qs-CeE9^3j`r>TC@@im}Y}+-ASqmPn4* zS^BBTGQY)?ET0(ySj)xQ9pCg)8hxT?ZK*VArzTSXVQ{}?+A&wM_VC^-1xw(i4qe+J zML^5vp{X$)B$1CoH_r%EJwBe}fM=hat8&%X&$ooH15m=E)do@sMxmc3s>!AnHd;Gx zGfE0~dd8+nH(wwA+la#P0N88@y&|34T-Uv7&l{u%|NXq1y6J24vMc2M!Zu!9BvwoF zs2YbGwfDk}3esQBxuaXL#rhM|H-R*YR*9hvT_Zp0LAyr!yR9fAKBB7NT!r2nciV&z z-6Pv%70=lX-lt7+VmBiTgn>x1rV*gZkmJnw0^0n99RE{-cN&ud8DIpWP-+1JQov3v zSzqSV&Wp>-Yc)ZTbfwn`3%EmP{Q&*X8(W&x4gy)dtM$X{Ze7;_4_XprX$(HH(60^= z`o&4VLMP`&BQxmu6%tlyr-$FF44&(Oa0@ljLzJArO8m0#seb4*z4J{`MdDUy^y2GV=8r+kWMis{#mon}vhZ#JNN|fs5eD z&T{4~d0ISd*L32!(48f)aM zy{qF&cw^G`U31&DAz+M}WkBZu=mLfay;nvn6729e7FQf@(_dJ}^vd_^_So4|K+Tr+ z2Ty;?5ekMI${)Ud6K(5vH_FUH>eQ?a=~bYekM%rM0i};qMxzgZYg+y6`6rXb8*hIe zZV7!|j{z4`mnJNfAS0Ud0NvZ%D43kUcKUlji0w{OkiS0%afM)-GdUU^WP@VgXJg;L z3tj=JV=PkGB61b;L()&w`8E~-5EQL*R6elptkuB(#&SOzXnKse^KLwSY|`&Gn)xw0 zs@!IOg?UlO9)lxCTJUy5h^FV0t{cT`r)TPgLu*D0Zhb0RzgB0;Lb_Hb*2ziqfPW0- z_X5JT1ifzbI95XGk_9A0mL0Z!dt&s6#$qDnofq( zX!pek={0=-k?sVAFcav>RsR6q8vrrQ&Imv)d=;y~mF{&_Em!nCbSM@RNvsQFsRnjc zIH8e=Jb#~Oii&cFeq-ilCx*(*^37vYHSJ0x^X6S^$ES9m5bxy zX6}(ejPs$x<;EuGn5{WqL&-RW9;SjMiR+_ER^z2g45odOqxVy+Ccb z={fZoTtwMh>fjRRu~v=ptgMz3H)CF$(8YiG{H^(0L>lnZXxZ!5<~&`mJ!5hl0~@R^ zV`Fg!kN?y=PPy$*&rh$FxNWqoY7S%U>}XcGEbv7Zw-_N$N>eNgxrFc*n?Z->Z!O_L z2X|ld?Y!~nQ0^&*v(|tHY@W8hV%y;P%5c-Wfnzb=o7R-p%!WK=E&_~EcIMV-v2x?s zFGI`x3@=U5$j)^?wtE&dH`pnq!~R5-5;lfRRzmqo`pAKNw*4H0wOMI%sx@OYoqFZS zK)u|kL)@!v^#|Vb+{UawF3K_WMQP0N-E5XY)Y)VJZxzX&W7zt z|9QpfF8D73rHjn)x_o3^{_*#QM!sYC%CY0(MFLC!6}RDX_&6&%1_o2w0Z2=@s4Kvo zk>Mf)ghx)48-j=wSv*9KSa}?uc7^n;jnqT;UYSrP&4G zMWFNZoX&n_fLyet^$Cx6C0Lma{1*(Ua8Q06OcWmz&c}vIun`h$6pR~`xKkf6sB~OC zq%zDW3`@x3D)K?K(;cb1a~^uc&U@Gn9t59YH%ZPucZ+jT4QlrbgY78qRMXw9sh^}t z9~zRTWh7S^kwzz03C;v}5~kCLRS;3oaLAvJ_H*IK2i%*J4Bkc;yzA;7`Os7na9eE?i=*uu+ zV;Bz^CZaIQU5ZOotmxK)1V<@Dm7(+fA+z}oztv60bDrOH#|;1x9ujOgAG5Oy?Jr36 zR)&4riBjrD9u{Oj|8Y8P zJam&mnD@?fP4^_TfuM6r>!;gDKLRp-6(52*jkjb4w`8Pz{P8jY_JR?xiVbx(Y<$7o zpeE4p~)8J8nNdNQQ8STy_nRYGfo{h}umVyQH|W$G9AX9sp8??$73dH;-W-p=)`hEuaN4t* z&tx3+O1H*I6Ivs5Xs`L5=74$|dQV9SkAn$SK&JqFmTlh762eLp2kCE7dd-JeK1@C* zk>m<%E>4lF-r;Rt*Vq(=o@V2OAnwvIR;e4cR#LmR4Ot7hj7wbt!ze#iMcY{=X}dd) zo;SE}O?}O#!p?cA>{$62;R2f|mr;g{sV@}N*Nddl(nBE|4^afjY8x;1IRwnnO&-H1 zD>AA06DI+}a|xvfBGmwlgnH65sdkE6R<{s;Rw%55eMju3qRgiA3HVbSB%CbGkdTU0 zsk~ImLt$fvl=OU&`dom&R)UV_pzPXpiudnQZ3g^NfJgi#+Uv~-KHx4W;qfsM0x(C= zk^@uRIQVS!yiz_^T?Wt>(e(mijbNpwqPu3(NX@emyu;HPkX0KqhPyO{b(Wk>=t3Ui z}{nFbZinKBD18$EiI;!313s5TnLH*|PCdAmx2j)3G#CzFT}6=X@R25GIAlQcj zjas4reCilJOHpcVM}XyGa)Jz=z4lH9A)b7 zbUh5n_Y32>-#nlM)(9FJfW6gFVe&9q`moy(!&~^ie`L_My+FJS+gzrb@N$X zX069Bd;y>E2vvRqsH0AclQg=PSz(z!^Uq!iDjW}UK9;`k7b)-#p;JLE_(1DI+s(=T zTA6|A^#{-R1BnUe47c58S(s+NEYehgkVXpJJ1M1GC@gs&zn&K#BG{47A*KU_)`L=c zIsShdy*x6~3XT0QrME{|^ezLREF(=VQa=lehD$CkSb2B%SU=yy%Uy2&=OO>HDxkiY z>nX)tRFG=M@OcV+5r>e+C!G3oYXl%d;|wS?}<-W5UfaqC8m|F0GBEy-4q^~_;n*gK%5@C-6O=OnOqZrDDU)y z&I5ol0C;hC;Ux*hsarZ@P@j!jZY@&JH?Sz=b7U4w&Ds*adF`Au}f#26=k>uY}vwa+9ICdMJS+Cy66;tTk$nQw8QsMR(HN* zs-qfN{S%$K@{3ep)YoC!U6j@LicNh5jZC+-l?n-d^efn8UBWVFsPNp~#H~F?aZ?g1 z>%zr)8L3`~cLk9S4=uJq_;c^C2K>_ld-Aqr*zSY#(->_1H&ujGa)x z2t%Yla|uU&*XTJsD452rS=Sl^m9703o+eP61j8rFhs%1AiHhOjo1JW~Tn~t zF8Z2f(Am4uy-{7oA4nZ-gnmA`K!ZlSba&@fwZ`7A0X8w%4MWxQH$-fRC?6eZIle}S zRHfr4A?n1~e{ z``Os-8w%}S8R@_MFe@2AaDWxEfG4&@AU_`6zumekLHSbvhDRrM{Gk$q2qTK?^*J@x z2Z+h+mMS4yn?ro4An(?Ap!@Go*X2;aw6$s1aI^yJAZ%4?YGriSt`)!w3Ec3%!*?$~ zEN~mSmXZH*F|@HlZAKCl_2RD=i}gC1-M2wRi6Vx6u`>kQ^~oNz4?c(t4JspB<*B8kn`^--MI8Mn9tec5=<5(o$ zRp$9lK~*TIPlcp1HnCGkk_$*3Lec>A`R6Aps{Hf29IT}~fC3F8mpEk9%kerx>7_+g zI=W6morIp`;@&^XoWFeVOQ&bk#4qtApQ!q200AHJeE8B;exqJi|9OlAjorT~_&XiO z!m?8|9d*iG$FAN_=`V2ilVCj|bfw^KcGqL2oIBQmgglPo7QaJekI_){)-B?)7IC?M z{?~KwS?}qx2cPCPanOB-WVcU$++>4Gkh~uHIGnLKJPt56QRIG4&pwyr&)g{=PBn-< zlOFwv^kc=XV)+SC&L$5C$#>xO37rP#O5|gGE*xFN%PpO6tr~(Cao$%5VJK=Ig-Lra zvx$7y^iNUdjS@SJ_?wS8zpq+uy0+ojJN^G|)PwIow^7pC)TY}g2|IB6J~Cx7xn(zJ z6{jw3+mNWYI2gt0t7a1i6r=`8%c-7J1wyD>)|B-<~O8P5+dZd|92J}?` zq`v@qU#`lg|4>xBqWiN$-5dr!{NU zHy4T9=ks)jPBXu~b85)b94(z`pBf#sfb*x0pC_;M-OGGXzq7@9=606WroV!p&4dnj z9CGijndwtGRD9&XGx7HRjzhaU4?pW$x27|3I(Gi=;jMYww(aRUI@@~D{G9RjhOHF> z%vwWwuzab)ysqjh7XmVrb@WRrrJ~!v(F@O>{O`-kzcg)IV}tDib2_u=0L!3Z#ar?V zUEqVY4eOIqxo9Pxk#mZ~32%I$37g8QD?a){2UmP%cEkNX2U9h+sBO^kL(BF19;9_` za8MOM&K2B+RY8O!Y9X0A*(LFk>9uO>B$FW$vL(H?&~;V*!pN}EIVVq>xyYIvo2QE{ zIXcH9lPxXBgAU!Zx=9yj+4f0LB$u|5eunAcYt97@236?dprmK1EJqUMQ`cP2F0+bv ztej8d?*`P@Xr1Vtsy}bpK9+KBn~e|V{7$UNymZ-oLrtdAi-Q6ngJAU<7 z+Cq<&uBG6+?Gr5))&nhFqZSXna_*^k!1wZGZ#l|(%K|?o@9E)J+c$z3tvH(*#=rID zrK)?jU2q*1#c9Uq|C^?CCo|Q0ivdM|5y!DrZ)C{1P>^Em?I)f!UfHj_&fME6ITz5WQ#xyW*L?Qw zulcKKG(w{vHIu!bN$Ip1GX-fRM?EH`O$=ghQJM{D2W6tTT&d26s6C6`5s6H;L z*K>U?97{9voxe5ZTD-wky7pZ4w||+d0{mtsZkQJekN9T4uB_b4gQ5Rh`HCD0LzQnX$);tFo%^bPrF`B;Pc`jn znpJkPrpvKVyGi-|A2E-wEdU?MdW(?Khy?pNIz+h?#KiZ;HtX_ol9WrpMEs-`5(~?o zW>%xn=>V1|piRvWT81Er&e`0elOTGl?w1UnfX+B6NY4 zor)B7iK2F$n%n_yq0%0r@+FAD5siT7-n~ao|rUm@zx_Zlo;Csl#v&ne~ zF1`=-0`|6Jw&;Jh&YfR5_cZ}^vsk{6Yqgsh*LCNbYt&nli7^z$S|QR6YbCy@YHDh3 zL(|{_7&<@&Wnv3>TpdYjseYQ9)}x2T15GpusCD1R;{x50peGeGmH-{H1D+v{cOp(N z`frrU&pQkyvmN7gs=_!l4iOEl%>|&9S9A;=yYBNC1x}fy1IE~F3*JoNC z2J}ALVaLWM(jNw-JhQ_5Ub3@3^G)|BoN`62*-hS8hXdmK8X0hI1_+2VoVhYw zWoBJ8Ej2UTv(i$tvO-(l*0pYRty_M7{Qlri9*+Z@&*5|4=ki=|%?d_8erfo3%0^-C{-NBr zkEJViR!Ao8yu!H>**dTOE5csBMT_?ii&+8c zts@sHV@P;DGQwg#Ihux@}iIqgflAy7_3_&N#Z-cwVVm>-Jsk!pxnvrrBYq z(faqIHVuiwY-Hj4?a_2vVFs=64Ph{hiV|Cge2GvoGV``Lq{Lu3)1ZR=hn#RM&{Kk= zco3rxZ2_R+Bq#v~;;->PloY3P;Xd0{l)CX41D1<`v#7L!VH^HaLr*fgWIy2`p@vg*_GRum#59Sx4NVzCw7WpfG0(p4`c~QKkNuCfkQ=R;U9jY=_2j zdy1N7xYd-R`~zjB@YoIo^5HHL-n7V5j(%MZ1dCt8@k+^qjQQm$#RK3&8S zZy(GF1?JZo5;(>_ee}>M-+8O+H~Om0Vuu@FA;)nS+N1&{;h~2Tz-TISARC>sSbKb@ zFqa3X`N99H_u!T;l$6fQO;BR4Q0}rgvHGr6zh;(L;-kMSrR%!#b)wdo1kHx7HI*LV zd76_=9?aCOo^(>|^d22&1pmStELG{b+ptUF4lVFx`rQ0W@)wV(*s!UM2cz#v27i9gjyo-x)IW7h?#SCM zI70;%z6f9X2Yzgg4K_gIe)c&F$3VBlM4-U)PjOf+mUzvp@tJn|tdSOuj3y~jQ&=_L2Ij^y!xrzPJdD50!(=G*J%GD!z%x9J zsB5r8PJ42>oEs$z)9tJ<-lD_RP}X^>TPL)%dIp{87faz3TY7qa^cxH3-1{|C4)Z^k zJv&f!YcQIne85rhcV*3_T0pF9P!33`&Oc;@B-u1sbh1jrsW+^zGs{=$RhZk00l0kz zc@=}m9_Y;`6lNzpNUjd(JZQSEW*V0hq83x#>xU2{a;rtCdcibb#A{06U+bt%XyIi; z9UO`5#4R@cJv)(K6!3(d#w|QlgGd6LZ`Oo5uqjSuEN?#aR`?of<>|1Kl^buZsLYT0 zF(`|&>p2GVuJX9M9OgM5z8**MmOpQ%JYRW;}{lRbCi9F9o4fWKbHU<$zypzLw`!=5+`skY#bTf<>I>>(N|6~aXcDAE0N=YxUMgBlV%{aEih68@rW6<@xgIUEwcc#sH1wa^!ABM_PXHQ&hl)1OWDlwbGJX;pW)?`Z$O&&|oKu~2*P@~M;x z4ufL=XGxK5Zj571J*-?*!NFFK$uRLJi06|nrk(Ib3Q^AOC)yi|2o)0zuG+VAC*1=n zpoeL`!&{+lHk;6O&rDacqNh;J@*#EQhu?F#_1k*Sj*DXT!u=>Idb-D*eI}QKg1r4} zPP)&?zxMg(_2jd2E-1^Iu!-N*RblsDzk1Z@FA=ns!BvB^@NULyYtcMU{d!V1Wo?ms zA%k0ysG%MbQgk_T!dpIAxF^xo_@1*tYX8-Vr>R5F^0ax@wuOFKqn4MJJ#q#MFC0l! z)?Gcf>zn#|Ip;_O;(hj0&vSm)d~iK%--y_H`Jl*-m4a)%dwx`T46Qn<2G1=`J`dmZ zmcDc}SwJQ*kT)Xra3{dh(1HU<%?XTH9LiJ0#MZVHHf2=n7isJn(l64?SYB!mqWo0u z&|O(;6!QNtd?>%VtFYd1{Fz1dD=%@z3)(wN9rAs zIr4V)tKgp`=hy0!YAz>?WKtt0A6?6M`qNA_@G157OlZy^R5rw|XL9r5=v^D%e+~{; zOH;lC`9%aiZolw-b;MS5&-P`BoLnCv<0ApVi+sjb(C=gZQ9aJ$R*mnHB+b~y=mVo|K6Wo^58ez#b*1l zFk=^zUAZ}sMYt6fiMcaWlK1Jj{D5(!UPf@$|FWI&i%M{_OYRHJVo)>Ml^cF8^4$BV zz(4a&wy7!82Q@pN;66lnx ztG(#rBINZp|Bs_+aOO!8%=4=#dV22F%i31!@|yTV2T4q9^_MGq4K%A?z9%>9qgWR{ zY~W52$K*??)o+mC_mc;80nUr$vDYj#hUc zYvh|Jn~ZC|np`Zg@b6X)gX`m2Bg3K1Yv0;s8$^vDnSEqs!@tVGh+%oaf?&ufy-p1 ztk+L#e9G%?(x{GTfRN;|R~efKL0x@ct?;mzI^lSpL1^>Iepn@xE`K_)D{Vc9YLBhK z`RPrd{0jZ9L_ao(4sp|!F|KcqRsva_;*YEU+UD$jB_Q(|riYHxAexIpGhXm0+J?vc zI3j|n=8fW}j2M~}0HVANAW7oGveQI+Z`sQglU`u`gkWi|5W0uR8dDEY4POKMfBjND zm~9X(@igph7oKF;&Y+XF$nSDV1yP0qMyvc)2i4ZjV*R;X?}uv1WFNchOG_!IKHohW z1I8bE5}+qNAFY1J{L0>(FZ!Kd%gcZNb^h-rbCX($FFh%OT4M>Y;(&m?FRceL_<#lz zGhxs+wC9MR$HNYFLdYHxU~=s79CYmqSb32SMhXEH+Ob_ZApz~39i?!6i={8zX8(*J zfw2TShyyUsY-y>U8nBlsPZv49`Lp(YeacbVKmHX~177GS52HsK7f>i2$z!AjPPHqp zs5C+Ar`y#w$(ld!0B@Gnl)6&>&GUQq%j~R6>}L%!EP1C1T?6~3_xz8A`w?erhSI{- z5CaETk<$R`k~ohw#3)Z|ib9V+{~L-Yq=GHz;OI|Z&fQsmZE8&e$K1#SS&>+lSz~vP zeVS-YG{b~&$1vUi1Hs|H)f8uhZUF+(n}0e6^Ro5-J<+=N@{a$~Y0TK+ZAL-B@r_ls zS-pq$jFWK!*~i^vLg(H&ufV2v{{Lj`?b|Jr+l`Vl+x-2?#>2M!_;O!uK-_~1EfYG2 zQWpyfM{;0>)KP&h0hFOg2S`#U7Zpi#@S0HSq0O}I$WmEa)3{QK2&h*P(7OVkJ$Vs9 zhx*`R6aT(BFN@D=HUgL?`DD4KEhv8ZYMNJilK2=|oSViN$E} zr_l!|KmAM8O}KVu_2+AO%VZ%!mq&>Bx(SvYOO*0%w@_is$oC3P;LgvW_v@OeWL6~w zB62zfhu--I+e~akRC;)i#bfgp15^fRFf|b!F_#<*0nUS=z@a6!SH3CYG+0h#eol=J zGO`)}%k%86Z95quHiWOo!1oU*?(*_?FOs0a&%gXJEddg{XaL1Ld&Rh(@BL@ABpO9v zy@Bl_ue?_IkIhbsufl;-vi6F%7LT9&?AWGdQ|_b+(o1JV2D2;hIX-$G1XFu$=z;C< zI`2f2Z$45M(yDlA{&862H2jzl-gF6u2QdlyumB<4TK=R-u1bBY;)@>mOphXI)veMw zGEEoqqyy|b1+sgT6+0DsqBedtm*upU3MFN)KT3^6WS_X0AMTtzqnL{&fGh#j8{qWP zBkW@l_8t#mBmyE8aswziFBF3GMLDbRG>Jt(l9WahqG*ELBx?*+2n*YWovh1!H-%3B z4s)P_em{bpI*!~ev9u0h$nX54npC;(w%NO4aGnf)DhRFr#xdS=HO<0*(IP!WZBhd;r;0?FI_!%GEIVC9_=_-b_4wBiCpcKxBX_GE9|$(d$A$w2~{h4q><5Ul1j@Pg@#K%7K8i11~kov8KuV07V~|P z+vbSc4E>rV*eRePd=a*@1M}v?{L`U3?I ze{T#@U`_1gY$(<~Q(*g^I~}E0?Rcq4j-I+^l^>p19+q3~`xY=5)D>_qkO2eh)Zk}tjHUN?IL0bIsup6;%-2Ay8tjFI=7iOF8 zC*%aix4Tf-v$Odg>FCFc*(-~ZqQ}^$bnN#G*wapt-&9Q&Q zg$Yot0?gM$YzRTYg(vS!f`${2*9B)Uh|&lGPzXTZ9puCIzw&$UEgxd8k+Y<5OO8~~ zQnKe!(%n$CKU%ANZ+b#{RDw2qBhkJQ7uV;l#{UQ zgV?oq=hsE&pFhs_e;+kk{tXzy(jCOh!BI;rtS{4PZH8+b4gvRBk=+Sq8c9rOHKAx{UhV}!?g$|DL4dHL!3I~&{Atio5v-5*X9fbUvJC@luwW~>)$yQUbG6d_ zwxJdKta{*XM0}#yu^pLLs%MHuXkHlySJz+04|zyM_TV=Vz5wv^< z`p|76Cg&2Wdw-kl5V%1A-luZQCi(gRI>I*~g^Ub(0FRq6WUt+<5p%em=TAG#vsMz> ztF_&7xT}%`P_jtkd0ofrU7&eIfPJyu@qvEz1qpkP0Fm6TiE&O1pp^YLne}oN_D%sF zMnr$umK+qoe76rh%uo%F4g5C*^X*ca(>837h8-f=evmWrAnHbr5lc@DNG1|;GXqH4 zgRjS&&^jmN|Fbx6rZ1PFAHF@PGl$lEis>cxYQ0D6ri^LLqa%e81mZ2?71Ul}+$uD@ zzp+u3bZcNJeAqCSSrg0bkHrdNUp6*58kfnDQkMpgF1nU^xU=ma54$9CHugYx+Y3mFvG+xAMPdG4{?V9P;Gj{TEG`zh4T=yIHWD!-0!+BT^=p+*VGOj1 zin+>9YokJUy@{AdL~IB#K@5pXN!~d4*w%S2bxYUgALMK z)K1j9zf4p;2|G`2ZA~5;7NM{4=Jbs*`gRzj554=0phm)UQ{(X`Ik%qnmss$@`-j7& zfcCtLu`jGfWJSRAIv~9$4l;Jfq05#~Wshz&c??vOe{@qCPrup%rxQ@iPqWvAWfAD9 zQ+$w;5Oi1ww@Z&m=YkBj!Fh4r1_UuQ4wSz*{mlh)3*fiO+4z4}FP=c%5~0Fy@K!3+ zi^zxAneW#bc)6jbRG_%$n?L$LlPGzM`h1A>Hu!wEseH12jUEEO1PdlrwmnAm({{(S z*D+nj$B)mVGvkN4-Jt}U=@Z!aN=LZ)(*Pr3fGV%el-hWJf67b<+5HNr;uan$P6Q}S zOn6L$4%Vbns%JZ!?*8a^odedKO9S2jux&t%3M-&tmibu!p~<5m*cBX7g9eIBMct*L zC_;$EHei&FpfAFG{v8gR200UuO*r^FtK;6=Mvip3La0q!HPllG8v*7senCBnVuq#+ z-QArTD)*aX^roWH)?pTxM1ZtA$)uQ~Cie4dOf}ArqBr2u8y?xD);`jht_Oy`7T0BX zrj0swdmD3gam+USx>@M;>p#M*M4*A2U`rv>MF=`(gsR0EsU})=0Z_|4>@pQyK}8!lNxO{rHmceNDMi5T-pcyc^NG2ZzzfK#vBgFLcF(2N*LU1roap-wo zyzg!8Xs&X23!$PXX#K49AW?%@u!0VSF>Iw#=bRa zB1;rb7zerEtxm5y{n&HmkzGDk2(nmI*fpes@KgXM0u<@XYTI!CChQYFc8P{Iqk|Nw zkOCrh-Wj_>ySk4Ds=#6Iiuy&lEQ08dcW=;1H(`G}4wTN>{NVy^+Ftu~a?uedMwDC{ zb#nJx3M%Z1ZvQgF%ir`{l+VcwsOz4^je(6tv-mm9tLGC7%^xsv=~Lm*U*eQZc`%93@0WiCqGqa%m_MC=PYO~T$^L}-u@_o=IAdCwny zjP=A$*t?a!ELpI7doSimcK1W1V^$V1|@{U3(ICzxjG5LMW9WU9+dqRW>GIY@=kMP(gS zh}z<(({$__fYPADqCZLGPyxG>u~I}R*)HNXK5e5GY%anh+rcldfDvB-um~{5RmcE9 z%+${#`F9nmh<2UNP?G+pb#JiqPO%z%}^mo^!;JFNa8X5c35|I@<;e35;kvF z^fyWbfCV?RR#6wkJYE9wD4%kK6!zSVdix8kt?|##(-9}9kH3$h_>Mwo82NT5{4jTq zH1Q@xC4eKxr_cth?P1U>msu6}x19$=!4OUvd8M<0`I&t0s*9CrHn%4VR;FgbnnT*d zxDt|5NV{}?D?-)I|MPOj2A{3%8f#4BecqB&M}d`_!byDc6F8J8jm>&9 zgO$RonTF-z);^Y4-@TV}Ano=l0S|_?C*?3 z*>t5Bl#_qz6LFL@MocjnokuTF{;B_3sqtdVRD}dBeM&XHc84z&-`N6(^xsROgW;Vi z`~u6$cK4pN5xWodKJfbJV*D?2pg?ZUCUzoAJ^GTW?XAIGxVdZ7Whex3uNVNs4s9{3 zybgiNYJ-K`G0vmPwlTQQ4FVGrlHc-L)>Nq9%9J8)cw)3!|6R4wsS7F}d`+scFW%yZ zM*DAd7JS?PiMiIYs+yM90#jIQe2M+{0Yu@D&3azJn&qqQ9Iaw#i&INPUgUI3c=LdL zm2sC4j4{%G*ZP>1z}PInJNJr(o3-h2%Aw%kP3gUtiXaopuk!Y&gw_fo&-$p=kAiho z$xiZN?b0%e{>{Vh>y6EQ1nKSNqY`|Gu^$r5!VpU_nrWORf7N!Om2;UCJqpmt3o7R zYoWDEd1D-IW!@&nJMO)Xgy{efNtcosWj7u?^t%sXaFS$?GN9C`*iviiwD#++=H1)> z{@QyGQiHkm>6S`ZI%C*&ll1rPh`XoS9{tN>pABMsniq$m;_5vW;HN>)&Id#Lm?MT4 zMK|PG&1|i9`IJ0yd78&%DTlriYRNZ_Sm+e#taA%y;skY(IQ{8k6_1cn%vc_}tr=I+CJfHZiAJsO*dU zJT9HoyTOUhUEOEf-=_SOKV`7{g;y}|K?7rX^dDRij41I=_Nr)dS-8m7w2e zHcEm?AhiD#`eSfKNh%(kJmu=OUgw~COX;mm_erlSoxbOTG71koe@vFl6p7)I9k zv7$ezu`58n%z3GlSvRWPxm4zpO+$6sEVws(Hua-uDQnM-!9?5biC;&NbOFjTj|fNr zOy#p1cm$MId!0xJpUa>_^9V&hb6%>LwjH&W%bLxs8&^&GrE!0xZ=dZ-rE=#M86_jA z{p!sJq4epXrdk8|VPJY=!=_o{+jr z4)M4ud-62_i57o*=_E{howH))w!tzVj=JYPCc}Yr4XDMV^Bc=wMCcl6D422!vl!^| zyylZnvWiBMw0fS?*$jzxN!s@0Uc10WFV>!xr(Iwt{7D$GbW>57&Ni9~GHvVR!~JmX z1h)#7whl<18ystEnpoBUtws5<&==9OP`&JgsgSZ5Ac#`vLoUj{ zA!%^ZS_p_APU+($7Ji0=v=j-^Cq;<;%K*-W&XCqxGS@l?aVtViNWWSfj(=r4{A{NH zUVOZclt6-=-l;Sc3*R$IQGh}nNN0##)@G=Tdx(4`jV91{yY`v@2_&>?OIeceZ})Yo zuD?`PU21wkJjSauP-^Y^ey46J1%VJS{f91xB!e%em4FOs1*Uc%6SJN&uuUDM*+zaXq|gB{b3eJ{1*am#iVMPqkqY?R9d|8f{^4KQ zD77I*qSEx)o(i8q)*fxD_lXsnBTz0v2s}tU z8Cgl=Z-Re^la@6SB0>5a(l_WnR(=MDd+r^1guRgy==;}7v)Z=Q=pJFYy10mCqa|f% zg9$|mBc-fEQ=*?vJlmUNpx;;LlA0*QT_d5BarQP=UY;50DW6__C8haoso+9hN8j5O zAYMq1U6nx`nLSuOd(o12s#|1~>c2af{qZs6EnOunVC|b?;RB|^H9FfQzvVFkrft+r zg6^9m=$mpu+oz=F|NA>O`WeSY^H%RTJeyiN zwbMVR(EIRVB4*08@Fz6g3(WINrFL;HEH9|bR#4?Q6HM&z|CNz`W&JcGI?eD7a=s8bYX& z&l?}j&D4_@*=ZP?JnZV7R~j==x&24iSttn3PG_@rU`KKAr8WM5_))Wpegsg!6Ano2 z;?R5L(0D3iGs`eZ$kySZ!~)eu3hd1oJ9CC(Ge8%SDM}QEE*E~Nl7Zxbh6HSFE?wD% zJw#%Ezm9SppevO+!$?|?!Q^(K_gYFN1F%8V_|GcENO9+(|8FJXpJks=>t zB!Ai%-Ba|E{o-fsvn%!VUt{v=pnEK_7#E~OVft=~E6)&l!l7M5oJ=U~nonKHVJ_&> z7)D(P)Y=us@fdI+a5c#3<+46ZF<~5(Wj!>cp_WzX_eT^sFue-X42nv zd%ex4*EB&WI0OHv!sJfbTpJ1h21{ZF$Czp*Cegz14D$h2Q^kdlu0Ay`0Hp){RJx&< zxU@;!HFie^UAY7dgX@^CLN*zHQzYn7r!f^IBsV%6Riet@b&h7ea~_?YC&*}NMKLm1 zXY+4e`ay9qY?Ct9AA9h)mIE$`2s+OIbHu;xj2!3kUsodXrX}9FsBF_K7CqW<#VvHu z<9cXn9mhHdBuNM9B*3#0I67Ov@fNuK%r2lr;hfu=`O z9ezWmN!$W7<-Wx%$ca!mkyjt0g>dS;k0bOSRHCEtU^EiwZK{OR0VKX2ZtreDTy)(6 zX;Z->zVR!b`%9W6id}vi1jgJZ|1FwV0ZerM8-AE+p08#9Q{O&ylxe81#(BWf6G7QL zSo{xm;%)o*6SdxXU=@I&C}2qPKy5fmi~bDZ8zf2JhtJ(-FD%iS0m>{8b^~;|t%Ag@ zLGi^{m&bCUF|P0O=+>QJcy{ssv?O&zbV(ll#!g+I(R^5a*(;mfxr4RMe+opu^KQ>sWUEjLh} z;q*fvEvvkO(}vzZDIP{V^D{L}F>%~Wu$tGm#OPfTp-qXWa8kHp#cgkmaFBc+NFMN% zB7uDqIJQ)fvi_QK9*f)wx+7$gNnn%^z}PUA1%U_u0y=qsbYAh1N;)s27!Y{vUSAZL zoaP&#!V!yyb}TwPjo3Ey>lhmhB99;u3PA828WPUhrvo5~bRqz!%>;vk==`$%@a+iSn*B#xWmIgSVR`V*gW!o%PlB&H->iS4 z)KIRxurfSB4*3kaJ6@we>7|`EA9XYl>)X%xhUPdfS<@g}TOgc1)068m$acBk6B?-B zqJ=}LKNBN%8Ev>nEx0h{ExL9cSS6idx@uFRNOpFnt0e~d1gOD6z9@qhQ zIYkteG6R#s1CkEUX?e_eqVRv(eE%!is3OuCl%8V#^Ts_6kdJMs)AhLdctidkZ2wVMKlh5Kq@7^@B1jKng_A z!QUl{hi+)r0kM*$sZXm0<|BpUGQX0H+zT zr(RyC80ipL$D$#GHIOdumBAKBMKJZve^Du{_-Q?M45L@$(q+ zi=nhXK(hLbzbgT$0RX!QNKoSp)R{tI5a@NJ`c$PidlPO0{PqFaZeQM;h^Y6}{jY@+ zR|mD*0?DV-dsN{;bT*Oq_TUCx;s#^hCP<1%H?e`HQc9^Y;++S3dV~|U#X!;Mnn-^L zmmSE1Nw(2Ho`1P}=cI2FrmA1COh=u#OfD8M-2e*Q_e7I#lHiW zI96Yyh6HPF%_|aH&9eTQiTrEcjAyvd8)CL#2@KRbFEw6dhG;6{0EUSuk^C9z6|_ey zJ+^|Ll=;J3qa*g>z1Mf46v;f$chfbwExH8p-kkwDnn%|h_&c^4xI`~b44CqUrS}P= zSHqb;i(UWQ$3I3Vc71`LmNO2FWAYf7J;~z}I{Z(b2iYIq|pvn17 z&@MK&NH65@v zaO_&Q=+Q<%3sNjVK)2wF(qV^0vc;0PVmS&(ehWCd3P>#iKh>FmQQ-sORR*kFg;l~Cr@jMn0&k5RjkES^x4eQ}M2c!T{jXWoO zhI5SgIf7nDoB2iqI01QJ@2fz~=?sh_CtzDQ_7pRK8f*s=_gq5HdD?wP5ZpmCpNM~FCB>w?~b~2@njHJy!vw3pNOy6?8}Ah!0q?XpB?lR zF)p2-dODJ77*v*(c~d3-=JBd8+c>Gsi!uN6*BZk#}cX1e`G$6Vs7x{>sS2jKn{Cq)T*IRx&miC=~_fgWWY!1h$;Ffjnp}&K?QhPc4+k-Fom(d4i>rn zK70V4ybZe~1rNq?BJuEG!FD9h|5D*k%v*sW{J$XJe}Dd){J|PfLV#%WpWFZY890!x zg^NimR3W6R{J7))Zi`_dGEOu8_fx0*MlMJdKq~HySU52rS1!-WT8laHa=WF56WmSf ziOTS-wM_0#={BE4%cxi+tmXBLamwUX112bgQS1`@-tZZIPgKlYg>~waQq7l_JnD`7 zI=_k{3znobN|n_%83h=HJ;b0+DveTrlEvfryF_vMUl4h{I90$m27$qFxlU; z3THsN=zh+gjmCX=$bh28ZvQ8#aUcyjsJRU*DF{Y)oq4#p@gFB(J*a&kLtEijTCFkJ z-Y3qDFwBy1-#dwO@Hl+Cy^3;RlK&8G?l@D)a=5xkMz}Blu(TuPGgO+84=@pGoqRAH zZ

5l8x!80YNcUq+F2m485qx@q#N-OB#T76G?C?}-*P_pY3f^t<7BShbII3(i(@2=ftgmnRsN*F-p5f+TI?Vd z-$?f#b@XO&HPW|*xMz}|=};>lg7(?S)2}xJt%}*veoq6}&5$l523`^^^>w!s?m@2E`c;F>SZcV_u^HPTZ>Fi6^GorB*#{B&*3Mu?| zUoUs=@9XN8^N+mXloociUn#Aw#}Q{wK)aCeiw(lM0W+WyJW4OOwz1?0egns)1b!(>{36sslx0tN_VsS}h<56hiB zCmtR3_-P(Qy#3)zp!HhH_+6_rC!)*F^(@<;+dJ}GK_Zn55OWN{W3u$IFD$GJap-#6{f%yDc1_5f+D}o2Aq!(8(cVW!UjFy_ zWrZHbq~lkAKeoybe}7oPh6Pj$qnz}~(@c;=ovxN% zK1248mu#b;QA`Av%4!zbe`&51EBVOqcwqsGz@w|4FEKvwpdz@hwPq&r8lY+eR@eC~ zk+clk`#~Q;>-yYIOMhc3xZ-R{n?9GkJeF7cdEV6PlG4gAxI!=R7>CC(OH<$M|V4ofcP1=@{80H>}4JE>}S0K3Y^T`Y!@pe+h;DFxA>KkP=|hU(M}hq(7r1o zpe|oHwFbEQ2e)g5TvuN0%k6(J4l$Yv7&x!qD@WAt!WA49AScW#E9_>N2V2PS`8NzH z90=XYhoM|&io-w2ux>Ru$x-^4R{SwBk6}tlOz=}_ad%!=KwBJJ2H**?Ad9i3mq?M= zWWtBI?G%_?WH=5IXODWUJAH^cUiQErqSB}0YtmPN4bKKaukv8y#cj%e!3a=vCy5wP zaB=gpb6K)o0&yLGkobWlYcUY<-Ov~6WZ+_ZZz=d*Z`)E?MDc;T2Y#2PvYHj!Nq&fo zhKI_%9P9N;I-~vaez>tJA=JPytq$G8U!Qp!%bTwgOer}l0#&T;}eCDK3Dow zIv<|7vwoD1$1h(l&(05iU`4EIqBww-y=x9ZPFx4xd1MA-V9@o_NHO(7UT?E2Z1hfJ1Lm5S)5HKeq{PipT zR4iAk+=?m-DNR&lU+v6l)t*^v{(I(`^jVyvAy1(l_=8&8z8qBCRTe`&1G2NugygLo-5SNZi5S_k zTp8R43q5NkTyLiC%?pm{_C|EF1P#}{mtuKN+b`U7hz8LnuA?G&`}Ph}S)>WjQis@) zrg}PH-t1w$$^{G1W>R9093KQr2L_Z27?|$$QMEO(QJzpdx&TkV)j7$@7}-ds#d%yG z0K>fAPZk-hM#mIC1nbPFSRB-}b~rN@|0!6>Mmp5miUyW#r{BIm1%}@LuFb0;o^Sq^ z(iVjaNlm^rqOw(6QC7W6(RuBA;b)4>;oi!Xn&=_cz62c|DZ9m9PHce`<+$Dh*gMe~ z@Ms&V0}|Y-t9Wq^6Y^_J?E`K4aEe5@4!#9N?PQxC(0pMNM3U^Hlh2q;gZ}460?Q&F zK_U)AQ9J3O4k9|#hX0>r(2U+bIvJY;6zJeJQ9%@@>L#6Sli$)aS;UrqgqMg6yO^8X zMS6H^V2V_4ERk$ZA{~GG+cx#{kA}-0#3?D|$QihVWe`}loL0a!{}1{6pXRIy$(X-V z3ekx+FH%6Bm9e2G1X5K!Br*l+z@NJUEIRq!bq<;syxKTqFe*bOOYt2Abigg_K1Twx zZcK3(SM_$03@iadHRiGPCmWynAXmormcs40|6lIq#;nNLI&Z?3?KL1%3SLz4v>`s$7AMIB85x~|>Yju#&6H+S11eJl&foD=sH`>taX%7<+ zkochXvk>)!lyrANA%y{d(&!0HuB%ek0X9=d-djME$mjx_58^vs2ci%nhB~D7f!`6AG8P*G3HON; z4a5O&M=u@Xf_{lRpLqj(pN;fL%=b8d&h<<|a za?^GK+%PxbG~p~DL)8O4TSA|kkmZfZo}M77KB#mGS&=@fNoS0Go4f5gv&0t(wxb$u zPq@c$eH=&i>CH`p1?FSpn)-#(6fzdi*h2wH+l(6j+GU>4u$TMAFmJb4YG&>eflcio zrZxqUUXZ9gZ|r==aoCE(Bie@5%$R0-13R#$jlKE~NL?@*SFRXLJRJ zbECdCp3n<;&`Lo%9sk{WWWw_}c~OGEWp@Gy9VfY&$FIDbi92Jaj-v{8+R|%)=}DwC zcWf^YhnE9s@a>iFMbq{dh>c^0`CA&hj$>NTF%2kF^~0!4_o&iWp%MUE{rt3Of`pv` zHdE#yb^uI&4FUi_Sb1>{!}QfyTu14iu+hVI1-2Gr=%5&})ue2p!RZfnDG7iq?3v7i zHH!X^qVtYx;{N~m-DU4VfQ02GYy=EbK*S5n2&iF-h#IB{h(S?Nsb0cH#E3`{(IzYb z(ISrGsJ$>ODk?ZpsSSu0M=eXJ+SO)va{RlN4PDF4`j@)R7~qjd`!bxMO%{w+BCVqsx+qT@RYOzv3?FSm73` zTjT5GzU2TX7j%ruiZ=xZP8mtL8Q3=Tw`7cvg_4pHFbe_cz>Po6FGe+VOOnzY?d|}HX!4wNwKws=;Nce&ClkG6(aATxy9no!-0G*u zI0OJHl>XiWbRL42BYknVAMXky>*t@_^9ElHKqc5vli`~2haechX^_k7M#v>7S$y(& z0qy7rc}|A%*!^ySxB`M?#e=b9M36h-a`Y?zzj)G$hp(7fCq$IN;}KYq->rz~w?%Nq zB~zD69vQp3PHbHUvbwR8FICsMHT!lVS3T79``D@X2xA;$zRjDpmP{YlEH5xJlq*PV ziDRPeu|H$nH0%KZ$Q`uz-lJPgeSfLVGTJF4GJAMEu6X-}s{8a^RMgDjiM#OJklmMVFSx?d56rIW>A=>78Ui34_H zb>VauQ`r1$rnD8yVRC zX)p!_9MNa*9}4&dfeT|lMi`IYAjm_jJ6-G9+7KC$&fdI zp9cIx2RKeBaYAV;MJyI#plstrE%5C1C>t-a)Wq2Y_S))|0w;9B?A@D6oxL=~OFG~s z#W@}b@t%=9>dhg0BaA@km)p@qqH@0$&;Evzb{{x!v7QTX|7!z}t)muBY?K~ZO6vh= z8kF21ioB>Z&>#J3AFaP+y^3)eG;IQRxq!dtcYD30a?&M5*G@QvDq}`&N%1-XrVO|X zT8=86We(O8hrOEmEjwHz$8V7eB)6+%^LzST6(EN%Vc02e<|_GTlgQtxemrc2uY2&k zZ_exDzV^ctD#6KiLC)8%c*|8A4?n5!^mxfl1Xy-BpB^1O43YeFaw3m_AF1Ga4+KuB zy!}GFr6Jy-1I`mDx$oKcK`8ly&d(d?5Ebm;2dzvC_D+UqB?uXw#g7BR>3(lN+l}XY z9rUNS_BcRulJueuN0hvBk%ggeIkT z(}1lVmiXJB)*&dq0-;0=-I$c5tZ@+Pl_DV6E-=LN8%m!+ncvWN22>;+5RHf4*oCpD zl@2ZF5(#R&Kw|!WV>*$|9u^h&pD0+OJuv3X{TBV7>FVv-sXH4;V%pY2rFr2K)=TNH zJ%4zYzwvFon?&Gb?j2x@?ZnL9>ekX!@Q7~JU5sg^v$*n{8>;d*1ci8a80F(iFTTpw z4`({ch#W!Eck6b>$CE$yFSs9kJPiC{r^N9-Bu=yy4`o(YY`6GpRgGdIv5scETA8PO zU$~-l9{@s-BtFukx^10^%?k~&h(PdUz-B7}7)C7hpv|aqVKC%V*H14(i0LRb9pib2 zcz%%BOEcyFzf&FT1@!X@^rqv0WrsgFAB6@Vkl2v3e84g^HTEfRbl0p&%=z)5SMjT! z5;u#U{_E+~j*@%oqHDJ~*EffEf4o%rk>H!b_{nGQ{Rp=^Z4TYoyFHkxRljRgzt9Te zo-0{yY+wya5+DE0?bw>Kt51kmxebH8-_J&^o}YYnVnKbl36tZ=;s8H< z5Ba$E)1|>n8gX`^7@)X}8^)}iRG#DgHXSPC<0r-ZW#CsO!}vIj)6waWi!@~3eWjbG zcb* z#dUNY?la_r;C_q6T)1Z}rMSvWU?)XHp>5e^wfeiCMv~82;;61A8U#Efy?^|A{&dt6 zXXH-L#(Uv2#hq*NzWVJ8U%uKot~ifV8@m7K#f_V5q`O`QgC%4JSplctcyibxJo`hB z)_M4V&Emp|7x~lfCh%euI0%I+GUMVN}zL-no66Ge^VezxSmTXoP7*mVZY?Sk;FUM|$J> zYFu@tukY7dc&1t%eC|LoL6%olOby*QJpcWRvF|qwb=^B5f^GN(>a!_0@~63wzRd@h z7zQf;`!6ICsWdaH=F6izJMGA9{dG>Z!;t|&Y)40lRbZ${ZqF4P^P?N;4n_zzx(d_V zkC4ykcx%Y{p&W-<=}TU~w768gtmz`*`wcVd$%7kqWf_GE4ux28RMc2I%`$iNb{dC`_pz&kps}*T902m zrXG&Y65P4+ab(Tm!wJ4Sp%R)+AqpL|;xAkBFC1`k$$#8)9U`I=HNkG5gF&HbC3y#5 zkPrZEJI}RAI{?mWlnjFV3SA@zz2s`!i+**T_?lRUGp$D?aq7YP0J$10_5zf}B)jS^ z(Sh%*wYEfQGuZF?FJFxvSk`{s?zF=uy^wt6{f#&7SJgoCRF$#ioHozzWnsCM-Bzuq zS-4bE;?pBf+kfX-@_@HXvW=UMOMQdtWk5<=FvVWovAoKoBfS>6%MVFc7+t$V`6O__c1_}bkL?w}8r^iZRe3lcCYKVW`(il^d7(yb5W_6`F3pw%& z-nix?Sp)g_173|6LpKPrI$D?%n~mWdzxOjOg4ZH&kh^rddJ<&T$wLTBo1^=|f8YMC zzm`ny)LvilrG)?En^SdAkB%B5Vhjw5qt4V+*3-+)Z$zcwDSN)M3ytx||Xx*8sNvU>3#;$CIB zg6`5+B1NX?`($Mtx|Zt5Q_@=arH)yWea0@bi2|qV&KgpZcqopjtO&VqxckasJH+J6 z1CF5g6yS$(4)lduC(o8eBtvAL%#Z-_MSx#J3(va8$N|#;cOU-|WW>=k>tZQ!PA!52 zWyzHb4~ifAHwzB?4LR3NR{LN=|6dMCsP1Y=&?6-5RfG%Ro-4v-8fG~*B+Qmya$4=Y z;Ltg)9Eff3%<7I*3M_ujgPnwZ`XW28)khODE$&x1tnf~IaqI1dC6BOhN{9! z$3w)h&ybVfBq6$DiuM;MAsE?ibg2;*+J{ax7{gOeG?66PEF50~8~A6I1doJxc6g8- zCdPOt)QDj^^br7dPg-?|B_N&+lGRLzB**aFFGio$30~f3oJ(yqb<(=S2*JnT#+1s~ zy5R6lz@W3s!6w^QWBj25w2A}B8@p3>eyv3DFU6GP&>w3HcQ(8_y`#h-V`XG12$`Dw zz1RaTOREI!hePXwI2FL2ydKrJWTN9g4_0RSKK4T7{pdy^$&(+oQfc)7Rm->NL1up9zUoh zD-**^Tx1}v=nTk}DNTt&f*pT~_nnjO0<*Q7Xw{JrO1F}bEJlg0XFxGO)Y~wvDai&z z2Q?7%es4N=eb|4{=7Ks~q@BUZ1*{=*k8?7GdYr z(k!@@QyDgk{&eO1Fs|wYdJy;@A`NBp?b&Y9VWmahDB$znAO}U;?t=lEGOtWMu-^d_ z4yFP)Gx()_oG5fy671{}ebgbMpU0mq{!>1S^Q)UA2WUL)T+Wn4PV}3390qM#!SY@y z=E@xIw>z$j!!NBR1tGH^r)n2Ry^BiQNbXNCiA|X|EsvU6bcn2QDV}h?}-}5GNOg$Gef_EcNT>D*8(76E;yS^*X^0esj2^B4=$wKXLiVbas^EvbSrw zk;*e&C5x8@%_A%SlAo_8M{RHXezlHqZ{0xChd(R~bJcTF{bNquXdd`X$c@rV=s`Vs zRQK0zzT)6RdOdvu5`3+k9qJUP-CW>@Gmo7`?x$-Y8~iuiJUAFLe#r0}Fub5a0w3p; zCq4KA6J`+SxHA=7WH?hFCv1`#XE({cFD!m$8B(-uo+y5rz#ql9O=l`L)Ir2s3j7~z z7zF$f!kB`{;>qF+&3o$-ISSM=JZ^u;nkmkSKAg}-!ksut7(UG{BC(9WD+%Mcr<6QI zm`Q^iD+%!{V$n^&LiHVa4D%)}@pC_MJJ1!{)9K#Xh3a?rO^@?K{u(*qIbuhMK=*_X z478(%Gux@W1#_SNsJHs7eiuNghbg-dU%iI=P(!>>O|H;TF1S*g#MBG&o6~x3t(H}H zo&8D9`YgBDGAUqA3apgG*Bc3`3qHwVTsM=Lg#0fC#@%KUA)Ua0;^@dUa|*6unWx@X z&jOp_;M;KOPgeab+!?#1HzI0PVhqNq*RXQ6tiWkzvxKw#g_A(otZH~G>dVfw1R9svr))>hu$53!_4a9xu!7Q!P{(7*w0|@agfc-AL*8J zbE_W(4t2UXzk7LF+%VQv<93ptcGA_AgdZ~MK?u^9o_Ad`lPWFOcKSSvnA@e^&^2A( z08rq~ltBlkMoqf}&@qJV2h#nNCIJ$jF$G+MGkuAX&uJ*l`g#9DXx-PT?-6POLP$r9 z$IOVA|1V09=WmG(z!)1~W`~X)fQX#npN!IKgND}((a6fRWmp0n4246$9% zXj$?(=lNPf0ZL)(jOV20ZN-09AYj=gXa!lrQIPiy@HEn?(8UH2-s#;_r`*T82jBIR z{xT~*d4*ah!jY1&$8s4lzBBXTjVEvaSP@lib+|pEpgkfXgegj4HKVj7W3*LRmaR0~ zr8O(mnuY5q@j5|6|Hg3!?Gw7_nVvDOVXELy!!knQU&JJh=dJ_v9DrDhFsIn2a{cq9 z9B8l`KLHDB0aI!UcLv4rgCXW%1(Nq~t^H~{Ir~l8nu41``me%dnDq_W>8YO?!SKdm zvK4^y)boz(7<|Yu!&iY!Oen(W{k_S6gwy<@0b2} zbp>#$pCi_|ei#6|!etw^CRH$1`WHu^O1YiS zBBrqQ2wI0yjj8{>ID#KQOw{1T8vK&zEU}%1F2ZGLDLY3CL|tEWu6#1fnF7A^M+S^r2NS zzQJ+xAYi^@rSVjny{owABIMSl#CNV3>nE?Eu;1FGRM~*s6(F}!D_}2cxt&U7P6j_u z4Oz(RYPL(Yd8-TZJ{el)VR#>ETU@tJPq>}_B-GWX+b5i3a=LFnX>ZS(GkEKR*wX*x zoMwO;__z7!NAno__`(#E#&hfr9q*aZHKCzSX?9?#1e>LVWNf8aPRKN#&j(R2J8^y( zHBWd$2J%9GWF?%$TVPR9Xns2Oxgnh#i00i@<@up`GwpyyIS`;B1~{>$eE8Sk4QJ=MkgzhhHyUadploH+=4b&(iC2>poABYhcQ4n2k)*Mn0Ms z>deP=))PwW0AX1}-O(nY0O)D3oBaRHl0f)VV`x?@LJWY>02BzsL&=THzvEYsjRP)4A!0E!XY0agOAF%Tx6US?urw2SMhuMramiREiK z_nB63?ti;Cr*ISW2D4F+(9L2dBZz_NY#RdQQBZ;ZKSu;jd>9MKPc?S+h zHhGBEo3a{xJhuP?$!tOdwqo36h19j4f7WAj<%T^yR{xtKeMzPCFS~gaHOo#h_cOQA zE3JJcy!4J`4Bfx88rCNbwH+mGZ^`$=h@O-1=4omd{N3S4>4Xb_E5@1D;DYpIY05X1 zf|Y)Pc^W1f0V8A`wMNeRDyQa6TD`_X-6uIubUaUqWt5OsV0Z9$Ei+GI*(17cW0&QAA*ydl*|+eSX1(l0$Jl ztG~F{#T29ithsg9pJZ0ls}bFe%EIYJ}tiAi?=&SBI0JP(4sll2~gG^3bb=a%JN% z#{jVQARKh32<<-IUe(f%&E(qa`A1~koNYGR^<*^T0aA= zXE2UvlE}lL*>?PPgd@^gzXb;+6E^KBM3I(Dbij*XB3a4H1Kk4%S-a#Ui_0Oq;oiB2 z3kLP?*tVO$gNug$Ag=HZJ998S5zc>;e)a_7+Z%W~{2!0QnM0$TEe{ySS9Md*p}Y$) zH9%{Ys51!yshGwj)2XZvB~QblFZH+y{vv7yeNjCzKx1T*;~Fe4wm4+|5kj;Rn%XFN zGSEEOFv&ir=WG|6ZxNcjMzL28O->^dnu zhDU0@g71yXU-SIzqt)k%{g$p8$^Yew4sxPVwY1}?$*z>e6W>e{wPrzw%rwZnNYMNf zfDb?;GwQK7ue5JE*Ut+esuxWkTyvW$r%a>2Iju7NjCTH2!2NppID+Bwu6{Det+5@? za1GPDHUansyN5oFz229W8SoURoA=gYS95;WlC~4-j}16XGCpa%g& zFt+rRWK6zkG;MLA)_j+4>L@^iCC?Yb>+tT=@%nq_waY9lMi)JkXC%2#bRPRaZX*liB{~D_?qumL20V4+?vyXE#X4##GLxo(0yqDW zbB7otCszXpJ_h3O_4yH)MJLAjgfulNzX2i;k7w|aEMO82Cfi&27q@oDv@K~Zcy(i& zQ=EHp{8U_{P%rgbw&N>5;^WA}q2%#r{?C<8t4ofs9lHIzogdX%1ZLkKA4?;#Y=U=w z5I@0PXTeCFHz9^t!S6(h#OGhz<~+3Y&(miqaio3Rh#N7{qPC5X&qt`ex9w~F^`I@H z7FRC%{#TB}lA4+2q7z0iYN%%9^^21@_65{$+Pe=O>Rhgp`sF z+j!)T>W$Lra$ysN-6w-XNVW$JR+V$gVurzTerjRG84{l37K&tTe6QVC&j`Pe3-=Q6 z6|DQ4P(IXOR{r+El5ZF0gk^PqN^=X4yC9B8vrimWa7u2sEbeS-o64WnU_=w{iV8qU zsqw~1?QbBxmxs=QoM`@72}j#b zRdHIm`C;2v$xf7VeACYi83EvPkFCO!&y;gw-ztZAYNvxrynUTqMO!c~AF}(dW2)LM z%1JGgP&~vE9QjsBxz}x{*a)ld3J%_MebsezA1A`Kxfsf?wxrng#SPT>Jj%Vt2)Luy z_FLa-g>`u1Ze-Z4Z`31XrDU8P#}1u?^I2Qm2(7WQXrBl1@$oMPnBnjAhiR$%;}h0K zd>Bnwo&OtQkduGQ)!by+##gIu2l90%IPo8Z%beTx2&+8vD!OI>8H&f zmY-mRe8ddl)(YvF0V?~c=83lBA_jsy9;PFh^XJNKeL56QVAHOE5=Pnmt|rdzf;pv` zYJ7A^c-=ZR)VxHR^4V;)Rqc>z>4=aZ+*+h2{J1)uGhmu9E^ec{$c;2t`w6(19#I$k zgI8bjJ~j>#VyNUma>?q&bwJ8OGiP7FM$;~LAN8?1EX_2eO!&@sG-4aP6`v$4C~LkifKN;onJGqO>`c^jFM z=;0(J1?an(S5O>_44K|YK%ry+C!WFCJ-YzW>64(1^eAaYQE_>BGi^6KWOG4GaRcVB z+C8jdoR**bt#AY3*<77zXWG#6W8%eDsFH5S2I;P^2<}qI5-Wl&5Os-@ z9>33+{4`Tm#i>=i%Ppm0+Yi!M-8!x@WzB)d%ISlk`OGZQw{K2bV4P4`h(o-RWF{O} z3?~>IvMK5sw+ON*Ard+1hm@b-b?yBdwmb>8>(qG zpZj+~12p{@%8}z;==}oUhDcZeR@q_z(F>Ntb!RFJULS;}hZ-Py3{Y(PIg}dcd_a>f zKF=?Z6Ko14ta33}81Nc2{mif?=9e%FjP?tofP3Ajk|mW;?|Y~MzKacG6(HIKBMa*rXOZ~2Jn0bXb>kEk?6xYWP-5Z7&g@AQ( zC~5YK9XX8hgY(MOK?z-^-yw}v{GM7<;WR>C)|K%sVf!yv9uhXR+r6wWFdMNb-Avuq z7sizZG$?!KnyyD(1B{4Ec<6?rUC)D3KL|0#`Zc z;M&Zkgp~Ulau}O5IMc#WrJ!z1WgG~}Su?8spf4uT52dM7AysQU6Y4(@DD&U!%c#Xv zkd+3_ST+`2`Q^!yeZmZ2>3QWekPAOYtoWVVSzw{1_ux$#uIL~)qQ zjCi@=g8`v#YrtiFIcF&ut0-v@zGGYHomh$Ia9hbGoFL5|+Lcc~SS2ch{41VRZuh>G zbG&2M{cf9s%vI&)hcQ}=c#PK(e#e{LNiIXe1(-&llo8yIPZCL1b~WVHXq1Tt5RBI) z=U3pJ^^0h0hZjd`1a5J{@*TJmRPJ5)E+E8VjRbAgTvg(lRwK0 z=}tlXc{NSF2_9J^X`BCTNSKLu^dtHie+eI4=~kAyzUGdT+p{CSs3`sIvWAwIXqfn)!~-D2B`@sCPA7|vU( z2jHu8oT(oMR^83o5K}C0uUhW2>7~=s2NThw%Xt@Fkv!`CzbDT8`_JzS#+CF!c@+{G zoUG7iNB!>ftP}zLk+M{JDaTm{;(-+!98FdL? zwEsH=8kJVX-Yl>ay2PA=yxA04McR%f=gZuC#KlXRgb+pHw)gHVJ?_7SR{I0XEw`wv zJ)ICdTMd9O6%}iEQ`4Mh+gAU&>gtJ}EzYkjxsM-zkMm=r@_a~iVSD?u`BKg%jF;s5 zKGg@_5g^Asd1w+ax~TxBh!w<6ly+85R!8IeqeInfQsOMB10l=PaB+bDVs^&iRX#3e zINl#Cttn46oBUg;P)5v3;?i^qXCt2@>`4nZ!v0z8J|Cp zHnG8Lt4vHZl%@ZpyEijpKQh0Aaj$+Y;>v*89khdDCC4m6_&n7wI4Girw?v#PiOE&G z!1>GVK)#t%kDbfIRo@9WQKc!9wu!896IC$IQ?`898RAwo)mnj%)DzbAP>lBHvlw9| zY_)X^pCTnMQUu2UNxpi)X+G|&1Hs)7=hfibk)J_;LCaFmXO?_GZn6H{?ydJMjdkC~ zjD7M6X0be*uj9z6j@g6cdME_G${!`HIpv+e&$zMUqfFY+X zixb#;4~>dHI`Vcbto(W?B1Y+9;cM4p7m4Fn*(Rin5~KXj+IRVRJTp=Hz)mva>Oi(* zhithKK-M(s@Ux6|9-%eK@OQ@W-zjJ%J(4yb+sl)xpS7~fN__Rwl8(tFzTQGq3bJHI zJVdqUXujdTb9ffs3&4f5Wr^7LMpKShpEyt4;3e}R64GBxg0XkOQ7$eVAya+Gk#gJ~ zJyCHM^4CM(3h`|yVYfQGSPf2euzbBM-RE7^# z;Pcefy)bTp-06xC_vU`Xl@x(Dq;+hyFpmcyd31IQ?uz+g?#e~2Jg|sN7S@C8pbO< zSuCe)>S>hUH!5B!7ZB=!hHb&ZMAN@C-0UlpBbO|FP<*_Mx*Ry%qNiQeGp}Ms9v$`EEI&+)3pArxP77o&eVD(l znWwJXo|>tPcVdbGvz{;?Z_4jl$VLsV90!Xvgn4Z8b!z!91EdZW!K9>l{U>le(Z&W1 zi!CULajLmnaYV7zxg{)EDkjHj5^n(%2c*D@w1^6VKW=S&^c+y=$;OFa>nN#J%y_J( zMvaoblY`#Ti{}HR#bU-GWZNA*{oZVQJxa1y7>*|U7~Lh*Bd<+@;2o3CaT|tsksi5$ zszWv{rf2LTVYB19l3u9f_%oAZ+E89#{uWOJcfcMOfb49FUB5+!Ux(0IsH`P4R&x*K z983+J6=)Q$i_>vl{kZnA=(qO;?PKRCe`H#X;&Q(L0!6e}52K_UOrK8FdRvssggvY)A-zZtC({87WtNu|#h?=lJgx zBzv*R+j6{r3N6qiLS%T3THM6#wZNlQ$DTa)$Rl{>KQLW53gMkn|G0ShmgTVpM=z@@ z>f#1g`>esWZQWPKCa-`GFQ&2HX|}G%2xk%6N(7$=P}}5$*Is@~JPUdu)&d~2VBaYLdg_VN(gt3^E~;;gHf9UV9sft1 zcaFFj17wUscWTHeN-EKl3uj6HB~kLJpo;*M$S)uHDtUa(WY^ss^Oj>9eL9f-6x+z& zlxWn@{JDeGaOTlJz3=>!IMu)H z)_F0q(__)k^YFixig3ZQfSzHTbv}7Vch?UXAq68``Aj(~qn%I@_UYTkZwQKlaVra* z8YDLY)%ZZ{RL4O`p@9x}+Z0)nlrozbF=?lqut*O{HIR{dSvV?ID4c8u!59tXIeMMd zL(Ij%2!ych2GK+hMaZCE7Mi$^LJU2`hk=d^E$tDIw|FfJ&8T?^Kx)7RYxR^Zriiyb zhL#?9Ixpi10I@-T0Ot?m-1Po#8k_?LIRJ1=&f}Hc_)I-9N@E=S;R^M)_}CsV4mf{Q zbVp8Tj>qxer2d@A)7|7niA|UEM{F{b2L1LbWu3=0S6S!iNyoQ>6vv;gy%U`EToaJz ziu0Jgc5N+gnVL{4!>@#?=P~LJ^52f54(DbGYuKbR4RyCT%tMn}5|hzzeRY)4^hpeu zk7k&?SSgl~>J@k+0aR7%^yj${Gj@{0sIkPop8){Bc|NiN04p)bQ{Tet-8Pbe%m&~k z9RO4rBzX}u8qZV%0Wio|0n}X3)4q?piwIwi#4IRhD;KwUO#=2B@UtnX&Ah&n3VZ#! z=gkQKRnrfvsm8^Y5@v9OId2_7H`%s3FsX^4T*Ouej^-?x#U*o}{?U>^b~F8F!y$PS zPCf>mFMy$ols zXC}LzctOvQ7d;CG>m~r3px2aqz$d?~^BurAUdGS>&U$h#47Am#Nt#D4diLBkh`WMK zJTq_PCw1#-AV5#%MoZ0LxJHfpJW|v}0jo6h4Kiw*0>7=59Hq%}YL>ew$gO&M9-QQ@ z*b%wMXNi2Pz?Rs%_=TMGSO3fDRP`piqc{TJ?A=_I!;VhV{dE^tn-SWTQCdOJ+<84A6eb ztqk%QfKRq((-e)4LAOTA*-Q#FMj%?BmHaW_IabHjL z@&IY|&;f+{lZ+6Qza>C39jL`Ezio@&h#>xp%TRw_A~vo3?_DbP?(MTmsmd;``rV`5 zKc_HXTC9EPd2(#S>6e?1iUuC8TER25_g=JMc-wz3)v@9i`C=)2`LHcNH^;QJ8z zO;05CcHQdm%7&v0*S^*t8Tf-@CCL~a6}HSow=COtR~;R4Ew+7STg0h@Y2i%1M^Nrz z&moSD>vu>e*S;&(Q7o(AZ-kZ}^o(npE|}5la3rR|<^7f!%__oL)4hwjM~_bFifR0r zU43f1_%6D+4UaEUqIin~E*@v_ovdmIwDQicn4S6tzDAhM?K4aXTRM%y-Nrtus2%|u+IB3iWahil*U2d)fUzU*y`(n^bJw6`9p)Ny*K2E0#Q8>jOUO{ z!_u}~6CN_LsI#}U)&-SGYV)plPS%yz91L<<);6yJ2&UlDZcrROQnO`rhr!R<>Un!+ zC3Ne-*^P8-R1{HWjqvijPE=WRb#^v4c9jY;)hT8CTtG*(Y~_a@v^8LD>X2|_w9_&7 zGqTU?YX#mk8Pn?FR|-vsYyQ9qbp{$jy4~Nuu-=1;-)%MJRD=OeYoN;U9~->t7eo8WVSf6x0V)DYXRKSs55N`1T`-v?P?8{qX^DCS=~-D#|H;{*hCf0Sz(O)u4|k5t6cc$ z9apy8T#IT;&5nOXezWN4v*j+Ndkj@Ai~)y;S93e2>Z7t6(%5mZ$IyZKxrvj?V%Al8 zsfAjP729;7xSD_l*%W8tON9eBAPDO>+fb-LJSn}#MTfmz{y+t@rlOrhooPL*G?4Gd zx%u2hiMxC>`I(2k%^NwQjZY&ihe1KE7Ka190gy3#U(s%Ro{$@1geghvT~jKvW{hvy ztPuMVdFO{sT`eK7Js(lY1Y}+yGh-OGax=cbWYg^Hlh8sf`Ee_q zaW_R!G#oOIXjC-Ase1W#Ei*YVduzEQ5?nW&?-`h^M3X|~K0o10FtEA8=9 zW?L(YU66d0*`NlutYHw&5)%Rf`#Hb$4Z+6I+?7H-$k*EvQ(?mVQ55Gi?ha0Yocf!*Dj5u(T%>wwfzPya#Fx0DKGI9UD>4sq*i-@_0+$a}?-R zuHZEwEo9hOxHryw0XSQJ)`A$yfN<$g@NBq=94F&sz{B>%DENB?7GpvN@eca=1moQ2 zfh+i+=*9d&0c0K8%Ct40{39VTf-$ha&*;Vb}(&Khmzrf zDt%H=oT(E^t(Fm3Jmi2?$AGO`p`;fexMfahcA{5w|Kz(62idq$t|!~MPMY0DaiCQX z5OC~7wAaZOahqZvguhq-b{kX~qw)dI81AnSp$8$jXq1d(c%i4Q1|;=0rDflPDVm&< zNf{jp(Qf$T%}?4Bdk{|6Ry=h!UJ?oeNVOMp`D5MQuNM~Dct`enrfgW@6i0LzNe>Pl zbhLV_D8J({#oVE;nufmK+3TKPb+S!l>T^O?>JmeUcYVZp$gZ??mDH+bVX&u6DJ;aa zzKO-W_lR)@F5+z!tJeA{luTIHx_udB)_1pJkUOZtZf#ptk{e$fZAk zLL#e^B-gIS1jwNlNfe7qS?(W2MF3mMfc-X?6kokAwqs^r6;YKfM`CmVEWGfL$)(|(o)&+<*g3} z;GnGXIb+~;WVGx~O~UYw|CeLU+_?vR7$!YvlB|C^?Ly@dsr$hcx%{aFo;03~Q1`eo zf(QlhpSvVB@3FF_8j*PpbI2x_UHtG*p(#DD-{za5BC2DQ>)SD~-QJ_ZZ#h>2jYTcb_c>%TtSc=gM^9~QVZEh(mN%lM^eMK&EVuH7}1BB0dV_Y7r0>n_&X ztbUS<*tkv6*FSIsgZ^>3@G|#C0Q|D&$7>182Mn)QF*L>0KiDB#jY*7yFLVZu%OF0> zdZsrhS;b;4CCuCIJ6r+xir8bMFj$~jPVdNWD}3iP;47Y=r8M4@8K#3kWS}f~UXRR( zu+Y+y^(u3}If6)kl)Y8mZ>OyDJ_EDuvU@VrZzP=#_Z_XokeF9Z_hlFW!_d4? z-DdaPDO$30)|^X!$)cn2y#_|b&^4>B$tSV>otY^<8FJOAa`6$iQ%Q{lE(XwZh~?9+nd?n|6Xi#R_tHa zlxDNFW6Rkb4c=C+y_6^(^p>4@e|Sy$)twQH`A41ulm_nAb8qkAvtBg?W&E-dn*QsD zuY&-@!vGsaaMwj+P1tXe_IgLay?_2BxS_zq#XsFwXj+zEOu9GFN1Pj1{h7nj2V`Gr zSE0qrdPfseb@(`*c#yI=R?R*G31@}Y##3QlC{rIo_ce# z$JT?)UPNL0RQn`mwGI-%{FgkY`6;xSqMe~w|gqKb%!f;+U@ex=_gp`;jC2G8h zj-o5BT}>``MfL$#xN%Bz=@oPN4W2@0UKn9I44N2Oo0>ki&#KkZ@Eyk6zOZ~Xs#?u2 zU7#*T67kbXXOkVh3O|x)-LGYb?&go{`dTfRTmdbc7(h+IY8uJa@XS*$cB@FQX_zk;b;70I_WSA zMbE1CJjb!eQ4nDM(QrQOL(jOQP3YD{6BxT2VnHIR#jYWI=VliIO=Qjf_m+Okky)1XADuAW$5cf zG$0V|^&D=EU9whQj`X113Kkc+#o+ryCkC{qbtYw76!-kHKYeij^rhvU?VE1Iq`X-k zNx$q|Xe<}2jge!4ZP?M_?#9o;#t~o9U2M@XE=o?^QbF3?ac_4=>~3lKmRONd^lRVP z-0B)pa7fhN-P~EQ^7DsC8zT#-Lgn5-xbt`ETAPVfb+&k6 zhe$9}TeuIj$vVid1Q((uJ~Hr^k_TQ=!VfnZvK#DZ>YHT0L-#7Y7w-g)3ZM`E^CHgv zI6hqWO*@9et`d!(2cMx86;ekHVQ*GB0|TP0lw9$0BcDKI(3k8DIWOX)J%{e6 zQX?_n@XxLFlXp3@w<15>dUnOZ-Va+-D6GvwGUlrxC7XQk`jO7f&$g0G#e*QMcnOB{*9SWQ zgbhGx_Du2Fq>IbOLhqqLC#rz@>m~t5(Es()WxMx3NQ~Z74UjZr>DDL%cvN3X+WVQv zGLkB=grPx{@>La$3@^`8L9phv`efI=TLvZ#WSw+y&6sd~aTgdIB>x{rXC4<*`~UxQ zW}j)At!dh(nrWT(X`w|pGi}pCQ>GNflvD^)LiSnOB~vO%GA%L)A%wVRDk@7RSwffy zamSrK?(ROn^ZordkMq|#kL#LidB0xo=SwdOm)O}ui_QLC8ng2fZlDe09}?F#Fn9Xg zTnH6aj0w1hE!s7*OJDT(MNuOKVWv!Gt3Z?X=GCHc8j$3~Gf!9|lh=YT}D zG;Sh4dg3xE54a}+XgWbyOKkGB2kT=~jLVZSnqX?XEG!eVd^SN*wey_U#FIVuT(4Yk zjJxaT&GJeYVt44UcJZlOH>9?zUySS{Jgp`?wU2&r(Z*!rMRS&~S+RD0zZ~`v-@l{v ze#~2Dx;HcjlOcrh+(`Cs>1+Zrto@C>lxnNv!0ywGT9mUA=?i~=IzMAVu@Dy|8o|VI z!usguleU^No}RFlm!4Tz zQ}|C*frFpG9adT-*kq-`uwS~MTS}Y$NaM;3Dfx*||DRmx$0_=1cY8tEyI0?Ce8<6!0~%DwBZgz=PF?}-$f^GNS@1$%DM4sphl%^Cgasphp= z+X~+gtgvAx*gQXy`ut|{!u6+{eK@Ya-H+^(acb9A-{Y-N7sLPsNwuLFtd$~e!F;9M zWunwygMi@n8vF^KUp~JdkQQ za?NIp424W?x}*r?7UpTNMFq22Ca*w;nyfn;QXR|Tzj3nt?a8vZrrpmRo5zoDf37gE zMJz}th!fvSq!h$a&m|pV{?R%gV>8n&63Pr4z$(}^=jD#Q&N5l(dv|^ys zQ$~|Wi3taAI2mySz?S#NB*gMEx%^l0p-%wu4S=UYyHDr4Ls2RP0C#qipF0pYt-IW? zZrclosF~`qw6P#-)N*ct6=(SFYJ}xqp__foRutcjedl3A!^yFWS(R1jRY6Wr=Rdku1-pWK;Sutr!Av%DZF0sUsvs;`?d z`wra-*B7qw%Xh5>{gq$mv(Q<(!X))4ekXc~TH#xa@}Gt$IWjf@B?1aUYBBz`vil#T zZypQBm-{zq?G2_phTDN*opjW+?t!1-yekJ@bNBCGdFoP}xbv-IksihA-^iJOcYBuw zf7XUxUPl7b)DlIqwxIna=tcOzrp5rRo)fHDPtRtqA2{g0Ez;Zm;(Ft^lx?8jeiW`Tuxh!xDLgsz6DcUl z$i}mzbq`$;EByi@HpiZ^l{H*@(79#m!_nRAVv}B#y?aqVo40Q%F)yDrlYZ&n&0k+N zt~@yKA!?H5n-DC>fWN1|Rqm63bXFl)6CfKWw`>R5ng0$nWJ@XhAeN!IJLX*{yQiS6 z&>*>Uq!C3@+0Jd7*1m1U*LY_z+z_Z$g%*NKHlR(~Dw2iErp%~qKI(`v)6m&>lxOGP z3u~P7syMLoK}R$+8+9EJN`p#qrd+@3DY0CEi)V}PJp=gprxR@bq&n4F1)j!;O3obTPJ-| zL(f|OwQKNb(4N7 zBb9F)DxG6bRK*|%YzkXm6Wx`9Cb+Ig(S(u(fnWPAEJYe{mbQDRzRzw>;4wMgxhgxK zV40-Sq8SpY7VQ)E1GsDsrvHK>JN8ac|SMbAftb)*#HH}T+G zsl`rh2}cNd?sdF6^Lj5w9LNi%hU{eDc5jx9g_wAXAnDS?0;wixW4D>(fuq&&Pcm$Z zxrm1CU;VpL`Sm#i=9fNbddMAP@r9<>tA{epx($~mWp5eP_Iv%{njC=%&y8W0+l74? zhSk@AV!G zrj;0)zpfHWy_I!w;F8*O;rCnl z&HFx&aPo%5S@av5z&9wdZ1&=xzmk97slMa8ySmTbesm1R)pZUL?VXB^d0=6Pw#Xz2 z8tZjz&YRpj?=!Av(8FQRxdD^V-sg{&CEN=^BzxZRH_7yWlJxEHskeuA-=_jLCHCir z3e`;koRi-->_<21p?{22)-X`0gq1^oEOt zytXlvLoJVa6+qZf(vfpnXuCg>whX@@<72f!hUsF3Z-N%HD=*l(T~c5brKK*(?6LnU z%_rrFkycze8}UU(j*^k)z@uiR1c{j{)N$|R>zc1p_J_T0|Na?Tdj7%rCn+~c2X!D* zogzm}=;_;Z5y)99ajm;4dE4RXV8=jDuVo1zjD&jmkMed9olxMMlm_6%j2lziFy5P; z7Of3c)ipGxBeu2Yd((M`pE23Kt9ARDS;kIpI(X#Rf2|kR*=oS_CTZfSp4{`zmgf2Q zWarcLu6;`==V8H(m(XMVQVI4-OAPAuDAOiDZi7M?4+;ytPsWtS5?_<4=DoHbA>5&) zZ`2~wUW+^k_XLi;U(Vtiq0SY#_iGt1I}1>?(}02L&D<|?!ktW1`=d|ATat61F4gOZ zPyK>03}uk_Z6TwrBYTB~^bOoIrduQ(YdowBGMZHb29zwsYCOo+njAm$;_l+tMo0hr zoV7#H%UFm!Ww4LCHh=m*R?M|4E1P$}8Tii(OTEcD<`jK}%uhq8PuQuBKTuj)Oak#d zsXa0=?vCihdX|1nYyMM@(fK!S-^xsMyg^6%yQU`2fWa`}7Rx8vBO5kU5eR!yR335GX+fAzcUeB?B1PsdfG1)2k;Z_qzEuC2P{Q9hspW zg+)HO>$4Wk-?*)}Mzl0M?u1B%WEVpy52b75=Pv4HLd1pr+jZK)4H-NZMuS zrtM&9e30L3`W~r_A@MA6pVS(tbRq6D;*#(@R<9|S7hYqAjU3^d*JQp%R!Q)zwwqv9 z;w4-r6D%V|HUCLPIJYzwgRsL@BK#S@p~Dz05(7w}z`0?jz(d2JFH=ucxv8WWSHR6q z^Qmk7o%wb9L_6&`i`Fc7BRJ5Mj^enmD^+4c89nv z&E!6!9@Xdh1s6u)tG9RZ&ATuz-$r6tsF>e*X~w5oze4zprul|V048}_ZaFpb+gZgb zP15`%5354;!W0A1XgB>*=S{c8+T5gB$kIeD#5fZ@6}D3;>n96q0Yh*NDxt0@ zNq%gL950(tWO(SeSk=)G%= z*8c@KF?}YYWB-dJ-MV<~MI^9?R$Ghe_pT_3P6TnZsFSzvud@2#^B3#<*Yd3!ZWaca zj^UehXpd2ly!9@ED3o@vt||$lD5SBhxQ8W>mB!X1GAoNII#vup&|+h}SJ+k+4Gqp` z#-IgvC;|;yQ;eQvg3hpbsnO6AHUGdzWP1uH7cWDlau3&Cvi-^fG$cekHWFnEY$xZf4e@`l+aoc;;H- zs5#XA_ld*}x`rNfpXs>OV#_{DR`<=-PZng3wsK$C*Yf);*7AQe$E_BN%kdGWL{#COf-b%lKbZN4@)bG_lrJ&~3ru(E^JN6Y^o+AI(63J4M4~iEQ6j;QZ{Y)Aa8OxW$*XEDJ6zsd``!~Ezz*^* zeD?Kl3ZW5~`Np>7z63o=UA79}p`|%6{~Ng#^&e!`@n+H6m%TB!E$#XOCW2b08y<0y z!!Nq+CG}HaoK+O|WE=ClSw0b$Z_qvp)sOM2mXh1GEP=LpNbZ;`w<_TEO+0b!uR9;u z^RF2n=9IjMgByo9G;&P0`^RbJwP3JFaNmI#beo|$K6vqJ&wLBqi z-xVo;4yT96)xP2&C=h@gFM0nceDLA(Uzvvn{b-UCRI84iD1wUBgFxQk$nG7s`<32F`Fn~(E2b{5Yd^4&i3oe;eY;gZ+w zsBv(a;(KIb^6 z1rQho{9h4o26Uc@rBUR}yFiI%FOdZ%*=d$sDzEdC0%BMJGyq)zEG*Lo-{RJUpt>MJ zJGu-Pk8F{OTBLxF)L^y)=(?-1;UK;3)jmM6*N7HI_0O3#Few1=3F;4H%Y8rIJ)#aJ zR{{ie;m7J`av;30B|aD}G`%aD9c^e4*4LXA8K-vtZ}h3ti*E1SF}GZ%kgfb$z0{)6 zA@k}OWY-eplW$R3MC|Xg=5%A|a5F8!rJe60aPpW&cocVAeFy)<1=+PMnVpYtujN~s zvWO3P?$gzCCV31d!Y(Sv3Fe@gY8jekwCtdOF7RE~0sNGd3F9}~il5(vzgbF1lFB15d{QopER?B6Aaab%&956NP|W(z z$Ph^nsr4Eg(f{Y}a(XN^CvB-_+KHNa4sGx|qcDdfE(9Xb>G&#$5-j(!tFK%Cps`D| zD4ZQQ_%yqW2CN@z_32qK7df1r)Nf{Ok)(BLOxnIPeLL<0KhGgZfa;myVSISIUYK}v zB0Q$LVK^AOljj^5G^fSdxux4q(Crvx{a1PASt%m`RTK^HgRi?tfS30*o z$TBb};GxXBOF>WK2kcCm=htmJ(&Nnn1HSA00~M&rHrwLay@+~X{k@ak!9jMJQUs*I zMTKVWqf_dllV|RCVHWLs1T#FbR zOon&;?I!MCnD*|AtaLxAm*;o%D*q^@-Q}%SOWQ+;G7ip~febprsjYkW7rEmU-%(}Z z)C-<&UFj?_b?TSd>iO24me`$|#iQNqyF7;7acWEVT(z8qlbejo-AB*NsqHSw)GtpO zLVlOJwe;9cYvD)_8kTS$_>Y5 zPHoWdZk;_#Mjv;@PUr}@G-MjWq91}MKg_3sP_t8IQ2ORy2keRRkLSqC?bc8|4gd|k?R zsbIqg^YPNwpxcBsq&02*0J&YZ&K0zb3UZdZbBeniBnWG|)=JQCEkY7bbpKl|w-(7{ zkMgMx0po|V<_`Hwr+2o%+RNYS^qIo zdWVuGH}29fGvx+wJ`oYL$mA2gYm5V>4&OC|`c}J0Sf|&}P4nq(8p1^2NO z-O?f@PeXWd#w@-TVg}1?J7xI%5IpBWb#mxE4y}Hqy57)D zZ`W_E)jsfP4Q!CXtsVAIP~gbZOQB&4e>T5%K9l}fW}JrI9=P(GXD~nZv){E*USVHT zcsIMX+gd4iHbpqq^NhapXs$fVULFf(b_XKul<-4@JCWbNYuMUPelg+B`_^u&5%_hx znNc!&uFTI&X7~_wt@zFjv9xKs)Ii8HlxXrLQiom*0VOlY1K=&dVHm*DQITiA8?*sf zi3WZcMdmHq3SiZM(LnCC&O1wpGE8efy0(DG0bO0BcvqfxJ;M8kjKBvFS+lp+e=tfO z@oE_TiYkN9dtPqb7VS|qu_kM3O$Su+Om0Cb&G`|EfBw=je01+e(B3sDU_`M%=$#uS zU$~-s%3uBke3V|eh(|-MIe7|dcS)ItGMoY^?J|2)EmkaJND(?7pL4gD*3YxngHAq1 z&aQmpc|Ak$GFdHm3N+eSgK(<#U^6RTGh=9WS}abFO@jytQWSA&NlU++$cwsWu$QPs z6Sb0PjbtH^_AoLh2!rMLmib6U6mi0({^g^Qo1b`M0=E*xQiBP|0``J&WVTfZ?*_S# zC=oGoK3yNL?HYK$=wf|d^o#8WlJ|NSc%vwHBFLRGTRSa$%wT=rRA@YDtJgZ^ZIQoA zXRD4#L*&*vt;w*K*(ozf*Y30KW{*G}(kx;W&w3}+-MtrCB}Gk04U#gh7e~6ht1GqJ znwDk1<<5KKf@2tm%OXL+&Irw(mj+4GcEU@zgvDD?laQH@B0vn8)ywTC03xTyNesnL zEIC^kpA2PA3%F}K)=U1&(IjT#+o6((AZFGB(?Iqo^z`h6)lFlpjZe4iiDKb+&zAY! z3%4&d{NJI06DgStREo=Zg;iRx4=PAlJ!|`&XVlMIi0x+iDRw@UTL&(&R?D5o9XJre zk|Hq9)tveE2Wu*$Qnb`hGP_Jf8eL4Bik+zMU*Z>npN19eJ4*r!u8XBgZGWfe8fJ@s z&A(}vpX`lZQ4t+~C2_h1H+tvb8(eqh){B4D5+*g~;~-iLQQ&KNKV;njjD&7JvWZHl zk?woy1rN^7y_)x@Z)JSfC!v^x(O>wMWt;~M4*x-3SicsZZ_@j=KBtiLr-K*`u@1RQ zg(ZJ~VOyV&Uf^+BzFDI9t$}N7#?^PEhMEWa|EDeS4|1~OG4=d8BYfwFozC@I8j8PR z!IJcYd~0SZZA4n{ptUlc<+Dw@89J?^)HG}~o*De^zcyL%sD{yidfu!NTrGR=pt+-hc16%nY1As#y445e+`_ zEnyGruOB!R@pel<3i-nw+qCWdw@EGgo^iCyCFr#X=Pr(QT9})!h@c92+eV{3-u% z`c@W)LPhX#7fwBTzqj%E{FCKd97^QpG{T0gMA(k(EocS6T*Ei3m*t*!l?(pqMA zEb`#XbF1DBT1My|KE3w-|7w=YuWY#&%vmxLwYmS}6@*NoY z93#9sOcL@PZq8JI@oob-LUG&sejqNum<^(9Gd)PFXX_@K%^h+D z7>WVC|NihSi{qrBgi^+i(H3(X(MD;VjRn?G6G4^ff2K}2s`8x`_@>)=Kl#M!?F4F= z`g}(4uBgC@QvvT1V#ssPO;!l4IZbN4fH3Y~Gu9y|jPuLxrqtBd5I4b`W zjwKut|4j63iu*|FJ|3IVutP9f^L*Dx-Y6ma1deaS8ca?}-n>-p`R(%K&Aywgm+JRo z>>vBRRoK*13K%PPjun_B2gK0w4eO-6heST*$V$<~w1O1yGOuT!@6Y0%{R?MV$H=BX zHTG5RC~B~e+k(#2LWEx@#*K{C#0K^A83x%k)LYs1E$2f0@ zJ}RPb0z@tEG~B{RfQ5GnhX7|VIIut8HgFSWbwa*RJk`nzjE0+7$yCC1?N8S+2c{V1i`f!Z=>n%Rg3n5zN}>KmE2 zl?tc&FF30-@57^2x|m!sy)X0*DHg0Ko{H zoh=CwtzYx+Z@D=Dl6GeDd21SPF3YunOGn+6b(G?Pad+e6!ka8ju8hk!5>11Id;Hxk@+%!uDapL}kgJwUlAbON*;cgcws5P;P@fP*K1Ep(Po!gaz@v)wLd zYECF(t87SWsKDza8;c28MQ+W**p+L1O*}50Q-6Bv#9}Qea3UA#Mq<% zO=kMOkKTUUXobhramvRvXNIZM5Rp4?rg&ZqDuy7UmkXtn-cY=p?Re^?Sda^np$ngBXSknYOBKEZ1_HAn) z{y5`XxnMbI=dN_w^XWL>KNmeXM@}LbYF6x(5(xF=mz14$8@&LQ7M_Uq0+}kfB2lQ{5vt)qzicU<;K<;3 z4CSw39X5}f)_B)faBp{XJ_|5aMDK3DcXRGKK6K(sEG-6~S!-)Ka&of#-*e_}C$q^*p`)I7xg>nqyHe7P3DwC%K2Q|q zem11_nW1KjDhy)BUo-M+3+F1O!*sRSaI0iC+B!m3KN825 z?SdCLTjP89$e%y-AHQDVE&YRn;Dp0q5m*(8oAix)x^FA zXd`+-G$AM&Q>Mp7q#)q{i^_Z$-epWW!^o5wk=O!nz`k(i_z&Uq?m;#vMKH?j2M`OV`TMG>}b(DJC@L^#6M@yA9t*ZV2^m< z_&#q{^mb&`=XDvA9v?oRf(BjuTthIiEWl0-9CH9W-5)cB+LxuLq#EqY#gi-alXI1y zUXsuoRE8`WEeb$H0UL0h_%=PcjD?hpnIYJ;6~efk2y9xrBVj-0rqtq|ic{D9r60_0 ze_PMl-}i@2mWunUlnpiQ_&7bfjr(JP#}Ae68+6v{&Ct<8X+su(x8Y&GI}-?N>3KYH z)%f&Jh}JM><_h9dA<|kEhQX%%k&-cE6u5GHhJ|lcVMGv8B*tI*8_sZP>JTGm=K>fd zfXQQB(?i&`7-S*zAB=wt7>dcMba4EXK^vGEjYdWQvt9u5^Qqy<`?IrY|J#|qYm1ol zRS#Na1zNR%-3m6$JftGP-ca&R)#M7+WULfDdIl9LHL=&ApXq7tG6)7>cxleMstD8&e~qd2a$NZ? zrq|K&TUys{_hEUgZ|R1=>IhCN^)nJALcR*Yg$Q+H3_m@~iU`o8X95ctCdxJ9H8nfM zpCa^VI|vo4nRHN^-1K@RZ-V%&S}(xejn(R-SNvhosv7CbCQ-erplI>biYSZU?>tPR zoy$^~y`Ft-sb9Bb^?j8R3h$!zU%q|56H4{*4q6h%^A7Xggf7&h16fwvR0M!c-U5&} z$kO7+3?R0NNJdAnBlhXZ(~Hd(kM&rk&|n}OZmb-mrC2rYV}i6$B`HE{E&}P#(#&%j z@hwu4T1C`D=qRVytW|!wwtl&@Z&v$>=iM+$&w23R?%8PhVsrot`R=|i<@<~S>+@Xd zj02Q>>kVu60Q-;QLKr_{tJ-rZ7PUhEX_@}zp!?(oDTEUt>{+PxW-^2A?k2|9h>;O& zb6e1uCBvtSpCQN=hW}u<%ZYXNSi?Tk78&z3+lqdHnN0Vp7Q4OPXSI+Vw$;bS+uQpn z+53cX*zattltl)3n2(w!Q6r@TG8Q#*|3JoG9~D(>nqJjhrNV4iVKT)l*6Wd}dZZY@ zc&RW|Dl#I)oPW!J55nX<15v5RR}Jy!{fhjanOiOaGvDV~I0X!z{f`S&xw;`IH|~m6 zAxUx3_OQFdrYyiY9C4Cslq8Kk^e)hjBTd~0lKENYs>$0Mkdd0<@B!xKd z*)15!IW(#wTNRtzXyYS!aXG4IXU=)xqYG+P9P@SDr?pJtkcq=E^bY;_b{0BKgS9%A<|%$Ror~i#kb#<*fN($B zBHtBHX8fd_mA`xgpe_IUIPbX~V=E4LTtukT!pu(R4E&NchVxT@o}PPGbnn5Z1o13# zoBfADcl2Y;$0!x_2cp)h$gZGqT_fE~L;2%=uqp+g#yZ>-kHJRsQ`;P8Vx1C1rOnb0)Ubl@zHl>dmFC)94Q^RA{mW+%TaJ$X84 zwQy?MAt6%aM3HTXr6(?}s<`vT`7;>0&s8?-_BPAuY31`8Dep%MI?so9@xY!Sbe0MO z1Di7dgP$7Q)pk70)FsOD*6N9j#*I7}!_cFuX(A`zap1jdH^0pB5S8kG(6dMfW(sl#}@3=xs=}6X!orpvh`d^fpWd_W z@BwPBPtkLDrjW8U_vZq$sIq*VSiJJErbP^u$AS@Cc_z6$aXNVI>|u4uGRjpcB}+_o zW>X%s;tz`n^c+e$0IxS>hkQKD@ozA#JMt#?Uckc|zZ1B(-j$}vbd%#Ylbd%1{Vyl| zK;2+Wyb3!a#h7crR3~`!5Dj}tFhU9@Mfd zZ>jgqKC8K=qF9;ISf7zsr;u4H3Py{W{g+4|x>3_pS=;mOhEd7VUz_6&7H25zheL!cW7>4akiDZae@; zt~_j%YOT-13og8a!CB^|i_DtXq?Se4&}$u=^^MeZ7EdoXCGXfhknO3RYyFXh%kk+9 z+ms<>-7bP|lR61yuY;G5H-ZcmG++Nr;rAKfB7QCP_t$cs4wF3O)E4 zs?bndWQ4Mxq_iZn(6Zk6cJl~OeQJ!T{xovx#?rV6jAAof-PN109gb)L_e27|XD&~lzi9aMl*B4jnN~K52{Me4!?;pDMuEwlsR?lKlQktK;KSFq zB&)zJM*}wJ#E$hGiclf45$FhtwTEh2`qjp$iW8HJN(@isC{;;U1ZE}duu2Oh7~=UNZp&`uQJA9;2_`S8*du(kcc)Q-~OIj0}{RNLRj_=k?Cq@iv_QxKpBs# z1#Q~7e7u7wo=>nZ)A4Joue8tBAnb!R!CrgsQi|8zW@>}ysHcBC!*|?sIK2M)QwKHa zT6GH3^MU$6`TR1z?~C03rh1|m%uFg?7))5EHQ-Q&F;+axuH(vdjAz=~Ad2>QHwjAn z+H2A=^V%Tra~a~Mn;I$PBX*?><2<3lR_4=qlM(k&$)g>cK1sAw zR(T1`JjIgN$k=mn)n&|nTr=uz=vx_TRq~W3Vf`bu@l}tn{WVB_lc@~lHOa-XX%rTK z3A3xnr-o)WhS28jY{!rt@^(N73X7}7Ih8?&F0ZOw!#7x`F5n@lX%sEK$+Pjc&-;!| zz0^UDQL*#j=&UxFbopNEx1)cRoI89U+MA;G9CCf4ZdQgmpJ+LFgR2{6=Dx2*z1sGx z7E&>)*6@y>@7o-Vy?MNZo^Sf)CZ~CVAL%H}eLe8JS*nxKDyk4|EZkQu{v2tL7J@eE zZx>j5X9;<*^eRCWZJ43<({{spF4}SMV(Q-2s~cYL6CL^67^6==lS*(;|53C2&qt1| z+?fJNk#jWJy)=eM8Eh>RQF?4P1UD&=bYGPzTq0FU5kh;_sxJ|MT!wT=lh)zA?Tq@( z;2FEm+nXuNV4d)57MQIj9z1DnR##2AVz!a_AIr@Rbk78XlsOL8|skG zAkQeDlm4ZEU<#|E6FpQPz>l1#!BvRk?cu8G!YK{HL>h4;AvpMa)LR6)_(?)ntnJyP zV{YOx;QC$pSu6yE*hcawBNBo+MT3xX6ohe|3}LFxHc9AqniZDL>0q8OIYw40*wYK#qh}>v1PlI64h9boThivP=0ANs4*ub_%47XNGJgF<x;By*McAGfes zFN5jwiEiUmWx>Kp9%+ZFhk25#uo{Gr=E5}?rv9{?P*U6&c~G?2w;CZh=Jsp!{TlG2 z=1deIkPMk5Y=IPP9Dpg;YXPQMHt(c#YmPdU$r$SonSwyH5F)3m07KU?QdlOebezjK zH*LfyV6$P8caZ7S6pAYtwW*b0MEcicwW`povVxeJ&_uh%4Yhx_9G{AdQGnCmN=lj*qn6PNYIFiVEPPM)h^CAYp=kU+yQ74LSQg{DAXgi z;z~o&)tw9=_r#Hm#1+@B-Z{N`8}BxipZJLAOoY%C!hFPMpqK_jnM-S1ywaufMRvBd zt^%#oAF<5b^)=BPs4IjmxiEB2727-Ls2ETRF)X?!KlVEtzd_n-|5}Q&Gfg4y z?<}wtYEduYixyXDMf@X?6a5}gKI|*>QL*y z%DNObBv44J|`WAmYcyEODUoy46;u3@WNO9u}}S=&q3jG12d;|2b4pgU{l{oJ1$VswT=^3l$pt#X1lKob=5e>}r5Gy3i1)Wb;pw##x99EB z-O;~thlIPE<`#bmTl-PPDT|F;a(Qjl*WwvHZkrBMM?v}MKmub9p%!68nN$D3@{I46 zx7u}CkF3b&;>lX%1_|~8h#iz}bpi<)K)6JR|MXBMo5IR2(+b*s<(KhuD-0a`uw1=g zHWvnQqBla=Y(2)mI@VJI!j~!sB+X$+Qg?L`(8EK{$d0qgYK`@9yy*~ zkPv$hz#^J$!Kvu!*f69LBq(RWrkEA!BUrcy_VitWIS#32BcmwzY)zu23OKY>`TR-H z9+o$Q5?&}I2>Xh5W|A0Yd(6X07TYR72KkeC?N2>(XpHk)vG9NrFM-fJF>-ky=%S1~ zij$gQtWaz)_KjuC5{AiRN^OGXrA0K!&Z2 z%c-9IKbR#dQn@!_t*~il(9;#LT{SFMe0(*xVQrngB?2V^;I}!J66KQMFH3dNq{4NV?3{% zuTEP{4gT_V**A57Zize0GC`Tx@ncu}P*D4YSdtF*xq(KI7flt|X9VkjAOZIIi821V zg!0V9&bKN&a4dNcaE5hT&VN$}e zsU4Oa^J6MvkU2gFA`D7MA2cl`d8Db6v!u7IV@pnQ;!hr4POw~3|t5?QMT1vx;w)-u_)8&VV)Kfa9DHh70oY1(XGRwAP_g5>YvGkYkU|K}CdYoofd7 zg#ROy$0k%&9Xc1nDwG@IAzWz~Zmuvhl!YyC&v4d(ZelO>@IlmN|3=%5lfKM>So)KZcZp4$~q4f1H-$TzVtX@I+EdX#0a9e>5zFWtQm|?6{13< zS%)=fPUy9)-&hmQ;;k%odL}ABciv+dHAmUyo`DSK;tGVgP)ZH$>*XF}++e@$)9CHFq@*++TGi;`b!s{`@a8Z;Rw7_*=eBkP3}7wS<7!nnFUYf0+s29u zoCjQObV0i4;{H3%m^T^u_5mVHw>VgG4bD8grZ>2%GRSPcW}!oQBWLm6!kb*(CI`*& z|3}fe$1~agfBd?3ZL_h>Fw6`a<3?zXjnL*ehct%@spgPFNmJ?8@xFE-gr<;6-6N!u zR!KU3H>Xt6lH~60Fp^X%9dC7O_wV}szsL5t9-nKU>-xOjuh;XnW~Q&M1~eCeo(w%H z8DB)UkC$VtI`bmfkaX4~Qc=N4>}XqbGi0pe#j1|o7H)ANZvTy#y|x5V0#x34RAid@ zkpAtx5grb}0TL=L}$c@0Pv5 zZNn!!bJ`}V(AxWTgy{IOa52`xHAaCwM~+YI0-CBI7(vTbYbuX+lyXR$C~zfJtKYw<9qO)&X>lzd&E7Z~0HAX#v^dh2 z2QFU-YY;OxCYOy%X7jRp3>rlXI#dP)uh48I`ZW2u4Y_q6SHBf`nOk{(A?f9-17IW@ zcR=;XT?2Y(%LUQ;yH#ih1uEzHjo=tjhpV|cFgek6EA03rH)!Fmh>p;m_8vhH0y~dW zf##KZu!iZtzO6ZuDdKJ(S?5m{G`@&(FEIr13{?M@VWt4Z)WQCz%jpjl`LE=Jgr|DV zvxJdZf|N>Zv>_TSBM}Whk^h@D-b6B9_KCWz`7POyN;iDNy>f_AFXQTM*Ziwn`ev{m zyAtThRlTWq!?6F*-d^&>!|-jc6`Ct3-9X+RD?s0#!k?JMGe5lz*)+!g-#Y}@i{A8` zHw@C)zOVn#QRU#f{a~(>UI80(`3yg%}c(2r-Nj zfdoyWvdCU@j0W{yLtMvp3NeNdUK>+$Ap;*KMjKHavr|*yVgjBX!WS3^NyJ1(Gz>7i zqY+`Onr)%Zz+>2T^?1tO+og@WBSUwN4(bkj<_tB))~s0mWO@683waf0+fuwXzr1tY z{&Ho#Z|B>mr?-sGjIzRI9GL6C%>r&K-RG&athy%-=ClmIbeG@e z4maU#TZoq2 z4?Fi7o}w9rtZRy$3NekHk35pEOQHYPe{pd8RPB|wcY606N^zbJ~kBVdF)nD`QSz%Jc)}!8KV7U*6s^4-QytQuwmM z=OstwDmC!$0;DLm2yV6@h%>7APvuN3zG)}WP}&mz)i3P9nV7~fJnh^yJ!e~B_&sh1 z7i0ZdfGS?XaO$&2nUbnO47v+CkR6#*OygS%7-9%5Q?=BSq6KISL%@(7lmw(2C7^sy zl*hID219ZP$id)@LZ? zWopuiMHhDLOdRIxoT~*B>^=W1x5c~6kDZVb1LxABR&>!ZWwt(yM#HSr?x7efxnFv* z-9*Vt9JO*fxY9!8IfOP9PAh=tLbjXK&Mc`{9y-L2xO(X73Ae@d7ftRzkkB0AYwtzv zF{qE_?RkuFt7&yrwp;A@ewq+yqJ-CKzpxyh0dsgEoWs}Ew_N*v3@b-bYK-4@%ee#!&Mf%ME|NNeVzQy^s46|AeuTxZQXpg`# zdkw9=s1_LB{7RlVw_v#S(3(Dp07vmdBBu>kTr64h`+MqaoP?>5sRN{aB%^m zV3YA#(dnjHA$`tqMkQRhmG)4DF`H17Efl62MkV$dCxH6tp(q23Ij){it1FJ<+8T?6 zLCm8E=|ARqq{2igfx-8RxpaT=^cGdq)gG&6{JmJ*r3cVQ)@nn6{3r%=< zq*5TW>$q}{zjaGdl|n}B!T01pLBvv*KBG;kWf83l!nfy7> zF9#Zo;lvQYBfmVhU(^fXG7XaW z1{n&#s>?N0%qjAGD!j9}ItUvD*LZ0aFs_6vp|Ji&|8EDc{dg0)FRo}a(*A9i$R#zG zp{+T$eEmmE$%TRto3<+}Z+<8+s-D{*bVB6F>WkDGWuNPC!4><@*$V||E6ZUvW(UYK z3D@sFxI_NeL*Fp{ldA~iBTabLSFWKbYkxW3jxR0z%qTTp@31d& z4Nmb8MC3w?qa;(oYeD)YB@pIsxsQ0{mACPI7HlYiTfXB6tmO*7><0QV$3=8 zk49RlPkjS%n@DP<74BX9w1H^aKWp__ks2}y8kNnlOa6_-hsq0;zqQLMTvnRj^ZkAv z&M7}YfnU|4;0jxkLKJpOwNiii zJP*2sWKX&;%RCj`HC5RXYp6=BFYoYw=1V#GbY()6`0};SUy=&Kd`G4eZ}|;xFM)>l zfu0@iB>gUr-9k;^A%D1OrH?wsi>f_BV5x3?wMT~l2QU5NStPmLvPG)L2;^al$~j)^ zT?WT+Ssz~nM zwb5jWQpy;E|5~ifukfx)VoY{DIe9Uc>~$WpP$6JG&V}3@OPoLvL=yPCH1YwYmz^O> z5;m1JMBY1VFSfT}cQgMb!sTHJV*4;jfV>C-OSDk@AF9;7p(fr zfBL&#oo4UmmFX2*68bDEHR!G!?NXarpQEGb-=03*i7l*NGx>6sxnDAC(Of6abD0GQ zDy~u9ocJGHA&9bazSh!KsvoDc68bQ}p0-jbPAo8W194?vpzRf5aaQZW%=QB{)gIjJGgWI(RuavJLL2e{(Hc_@qwAs~@~05weF zgZ$HHmWX8DaIjuoDVl*lFjj5G2n5JLAR0hw#RFx*Ala25>_rQDqup@g zMh#vk96y~FQALiddRB&BrNkIHif_1xDbsh2sbT4q{)`ZnnM!QQi9X!}wp|0=2f%E) zG}{YuZ#A-1qx;hu{is$uZLc%7f>gR>+!95h-eZ!*OtRraIttIZwPg|>H@QVAxn=L4<~CI0no@eRo6$eu_k2k+Gvo%7aPq+S=fiA&G)7GtJ1v&Pk9dZ^@@9X#aX@J zt){^{xhErVJ`O0Wyxv2bGSueqJxb!zQ7_tYrhVK2;L~i}h*$WQUH8ld2oMFN%Eh!g z&`u?pNtLiTC_9)hs|Ov}C`psQZQ?^01>{sGMo_wzFz5%Pec34FGd}P`PQ}Bi9q2fO z04RbV0bA;U5evl)M<5z{wF_N#}%|f`Y2)W7;#8YotoQ+Kl0nPYe`pkoj zua7f&#U|8Y<~?z5o6!}~V*0qz?MBp6K(dJ?B8rtB&(kMO?1LRcXglKnPVhVGda2C|gp9Mp$_? zC1@v-^6K`7t0aMivXL!jdDF!ys^V2Dl>Z#W5lE)_@L~S#y4ipq9W}m#HOcr8oC-eb za#MFSL%NFExm5_E6mUc{0!u~t$z?R82~%SqsKTTSNDOD8a1D6KROSXqgEc7sSdi#g zwDL5v-|XE=K4D@jNO071nO;hSy^bjX2OQcHzRU&ghbEd~IASBVc>Lmnn@=B)KYVP2 z3$qUG?{*nP8Y#v0iFAb1s46N*ZX@>^>}WiHeU*5J>AiBx=8!ixrZzs`C+#W{fvj0+ z`mtla(R9Qq}al|Z4 z>Vmx))|}zzd@jnM3wCNPzQQP4)e0IYmlCGi+c(0gu{F<4fFkV7 zsZxTlXrrb<)a(#R30l^PX^E1Iv@-!JOunYr%@Ou@EHXie84NVLL}2IW$z3H2r1IRwtkFWT47+SuP*f1_ztw3g}6eAZy`DuArTe|;t`&t3bSesw#T8lRN1OR zG~)j=;G>QdVB^TtB{ZPqI?5I4deX_`pQ+$S`$$6OQG??>&#&FCQ)ttDCVjWUP$6tm zi?W=BzqdvPdrJc9PzDI9aP05@-78u^l`QT;x3mJhSxGPs;pLTC4)Hp z{Oq9YlEHPrsz-62G9T@{6x`vx^b@~&hLOQL4r8+4URx7nbY0Hwb7pqThi)%E> zfEKj+s1AbuE5ZY&Ud9e5UPubzjefvnXI_YaV|*npY?&h&y_&3-KZc1|^Kk?2?eBHS zE`Vwpf=>`xcADp`#CQ4{z^-+TU!BB-Bk?&KB(9PMP;2a|j0J%*+<{MW4|{Qexv1y=oYW0mgEZ9G|QItLie0hXG80xy*B z#<$K|b@3)QKH_wamhVc&=H(2x%?As-wpIfKZLN&^H%DH=>rmfl+KkBHigxWd2uL|-qF zDh=zv64?O%w|BEM$j{wBI1W>%mzT`=8;rsBbEpqDB!b1~s60iish6=Hspf{yc+_L7OZbAfEYf~@*C_RBBN(*Anienxs1wnpMvBZH6SMax%?PcwlDmu!Xp~?>6^jf>-#q;U*(nv|d@AOQJ&I z`^6!~WYMZY|NJi@`%E~=Wc#0lLuXfAc3XO6a=@q|di&e!sp&=fH4I*Mo9NR-Xc@(p zQzB=icK?Mpq4_7i1TPdLBB*Gp?eIm2Xl9+L6a)pMiA(j1e*VrUmE(-UmSitrcdy2gC|+Y@+C{T3 z$i`k=Mm_lH1&K3F8a@e?=|kRa`#`Wsb6F9A>#V{Zv1Y(@lwxzK@i@ybXpunOS>-XG2rA3 zpq4EfSB?vKRhpU8>S?b@o@6O9cRRJ*z9;B#5dCIapIN%N_<9DABM_xIE-aybEmTEQx;t7I4D(_PI?6GtOrY zEMZDRx_h83#1{Gc@ga7wePPpY@;nO!E{D(DRbA$J!2czIuum~8!9evoWr)3e8tMPV zQ{McE#~oLy@$$ZWytjsbk#K1+abJX8DXV*!>NYOw5x8}*DWq4G{108B&6a7Nms$3u ztb_ijrj$4z0r;K74pxraIFsLzR@1R%g&pw(+%3z88#V~S zoWiHc;WqmQ0f=>SmZpB9JMS#mp+CFyBqj^JIf#Bz7vhP`h2j^JE{JV6H4=2!#}Xwv@S?XG@tc(9 zK*rOWy`FnkTiAaw3qHKhZKr%%XHV#e=GIvll2xjDPBAQQ=j zP!t!isaMwmdKZ((yec*E*HN?t0=S!eCcMIZqumSv0F?aJG~o?zLJkR*fKk^ePW!JjCx;&ZmpZaV)!%JGiIS`3e&kTMLJ-9;c?%%7R&e zbzsU~|H7{Ff{+i%btUAYUN+W!4%U_W4j_9W5=O6nSGw%F+BGH>ALaGRD7N&D^^B?A z0szD%e>!Av{d3LVzxe2N5e8Kw7iv4BVbFxHxSzq$Ti%+sUGOu#Pj^8-pFUysur9=` zT#H>5GGVljEy22dX{jktK*vrE5|DN&yONfjR9J1L1N6@Isf~8l@d!V)C7iDxvi|8= zD{>IH@W-(Mcr6i47QxRMC&8EvV^E4l>IbCik;%MI6c86a4;nWMTkI$exbTE-qm9{2 z6zY%>hNVSk$)~)IUcrT`8qnwBxVXfDpcOaXVt;om@<0^hP*y8NBhVx7ZoR)-Coc(c znV7ub-B44~pwI>bB>a=NW2M9#sAM#BZc7T8fi=@L#ik;V;TO>C2e?)R4^*#&B<`&c;)KI+ridI zzOjUda8#j)J|$h15Olb507Ba`iZfeeUeJ2F`$5Sa*G{@j%45j{91&Z7R z2R3cH^ht5+&>L{deczSAPn<6s9*=jeH3XeMk+EkIy%q$( zZbX?pOsUjv%lH(Vbf>ZQG9?FfIj06ARH0X&4Qg4M9TblNf)u}Z@; zKu_5@9uZ*!8-q;q0gQ7T(zd7wvOt7*#)S4Qv4a2=ih2oH#u3D4X}nshwuu3ycUP?7 zcf~rPXN7nUx6#;-4KWn4#mnur?f;8VEi?xJh8IL4A+h&A7;+!L79!2@EZqtwS8s0m z`wfq>16_E$0w8mxJ02-Kj8z4Bo^}i9^_DK<<(>zx43d5?46t?k?*ySTgJ&&pz7bi zbeHqRp7W{Lh=EDlN~J7b!{Y~Cy>RhkchwR{h*aB(#y(%IZ-q+&`<-GQ7I&KAJ5q_n zu}VXkFaUv?ZvLA*RkqlrA5$QI2RqU$O+N%D{jXMELICfUw z$y@6BTs}Sfa{QmnN>{^2{-kV|p5(!~LWgfST13*c!c_8L^jA$1UV@rr0O6bFeoat$7UABrkFi{wG2gpQChJlKiI2_ zx9XMH9{t?a^sb0gkzbAkKY(kowK)S=bV`u@NGYmAZN*W6t=5~yjPtp`bj9;3Z7Qq1&^qHlQ~+VETCPv)}-j8935Itxf&--ugpB(k$NQ zb5VWBBhd1C%uKb>*3d)b>+nhh$t>a~B6Pcfr~Kx=KY{!5gu}n5st7X=+BT}N*<5@z zfUSe;1`$L1Y`xpo9;UY2iy|?jsx4;3Pk*g684G8WFU6kfHe&Nw9BA}?RcTYVu?xzH zt~PS%hI3R#^E}2Za4MwRhOXAXaX>G&7Q(@(Ua?0P_y+~XCw5yYO#hx)^|rtg-Nf}u zl&t&4v#5qq{9v1{A*}i!`w*^=?-Sot**>Gaep7MzU;7Py#;yJ_9L6S2Y&1KQuwv%k z8dmG5*%N8Nh;+$wDOaBrNNM6(Px6XWKL(_1_@TM=D#_tGlirUrbySzn|Fwd@a%IQU z_X-u6PWuO3wu;b5Ka0_&+#|Lfao(lYOX<&P!((@;PVEvqYl9ZyP;S~FrWZ(H>kM|N zxlOK7|EPcm_kb(}shg`Z-g>MZ6NEm_`^{=yPKF9l?#IjxE}uB|d(jJ;qZBVo$2zLD z7&tOi1-eA~oV~S43H#+1+mcIcMJN2hZl`j{n-5{pInj2@ z$DL{5OVh|<)1V;F)*gS+xS6CO$e0zh#vxOmMe|GXAG2Vx4!u+IUM^>aKA?ST z_%L6xU(rWqmERwT#knMZ>QEU|gnX|wbUkp#pUc{4@A_h#?<1ithrK>vJ>5 z<8Ffd9RL?!AH;GD!bV7KXYb+VD{MMo(ln5~UN+v4Ug1PcrdfwmZ;jg@-*-0r-T8-M z1H*0toBG(#TtY9bPq8A5 za1Cac?(@yg7ahcfblZOw6RO1||3=g#prfnQ7Hl4t#a$Ct>$Cf4vR`Xz=bFXXmgEc8 z6l`s-3|OCtj3$CSJA(Jt{1Q1K;S1WiMP;UNF}Ym(9Z$SlCCsPYHY)6GLI0g< z!aA?UtP~(4B$nv*BJJhp8_mWG~HI;pb?te#A-kJN~bMH^RUH5(cxjiY?(HI0FY?_^L{&{L8 z<4i*ZCJFt9e`6|1^MH(AcE1GH&(3OYrk!(d#N@ss>{8pjLAhmsOrhF%5V&2(&CJDS zW?~HQj;msN*L%vK3ZlUBlvlZ52cZGju0F`-DMHp0zapWm z=Xf@`W%zNPt8aM^A+sWtoW)PJNpzvt2a&3=)&dAS*KgQjpWmfDIb4(4N$tAyvXPu* z@T{Mb45S|c3<(madZ}NpbnhOEq2#I34>R{aLtn!-ebskuZ*kl=AuSuo4!MB~#q!*r zl_;z>=HK7&A zbo?zNrGK6Lh3({8R*1y)JI|#4&gO|U&DlW&{)~=9IoF}+LKVOge0+NUmp`m8G9_$P z-4%`L_j2`8xO$E}Pfk!(tDW&4OTbrZPa!#Tq#o5UP|x+Kme^JYd3Xh{`QDG-RlCp~ zfLvhXLv7OI&3@kxEOyAg9qE{jF~iE>!9*2-W$9ff^=s`}zdf5<>sZ#(s);;DJM&4? zn7^bhD4ruJ&qOg~1%?V*)T3x7TzTG_^}Bz#^u%ha21(32(Lr|G);yoi&Z zkZoLyP_1>HkDa4?2xBmDfJbbFu_d=-+XwILQh^gF^zqGY7}WEi{#QB0$3|6ueufEy zfSw`?6qx0dzlJ(gHd>yu@dwIRzz`WGQh8v#4BYT zsui}-g7K#S;KzZU!oO$C!cWN8d@x>*l6~>}@XCKG?&E>er@CG6gRo6~x0f6W-1~|8 zhvS#4=kK?l|M~&4)H{3sIm)SZAU-6+`st@o5S(hQTB(HR;vS#B39QU5A_~2T^9Cz> z;r9f>-A{1)4NCP$!qOWFz6fw~Ol3(I>0i7Xm7)=~waRREO>QdB_Kesr7KW?t;XMYug&!78z{u>*i^-R{vlm!`U@y7Um%5E= zx5?t}>9e{m$W>(+cx8N%av`rJiUF2T63+>;n^YhDv4VG{mv|j0o^BXf(ZP7A3e)3- zUD{^Y(McKOZauY)$WmMXdf?C&l=t6*zWE0}R0*O?ThhUdt+0_$jOxtT?X5NjP)GtPV}bt!TZq8vn>S$~5(tC1)zGbfAR@Md6TYbQCQ z!y&QB^lR60f!Z{hCtoYB{I05uL5$TN5g)%tIUPG=rA87OCI~?i*JWd)%Sw@LUoS!Jx$B3g4Dw-rgQ7vkb)J*9+nQYf z^3^PDx9{~}ohi_5FDt0{Hk{<9O7elbKuwzK(#xjnC4t#@G9UUaSlaUV(tA{91N+|M zfAi4X55n{=FRka^yFH)w%$0m_(O|vSJ>F3owe9Q@$kPQAFM4|P4H(tLyIgozkb&cy zd^6bEw_cBDDB$hysT>uI(66iM8EWczH6;Nw)7sn*@L;%KIeAJloPWWRK7f@+{?`YZ z&8T;3(imKc({#6IVvkq!?~FmfY)s-aCh_){=*bpg`CQe}K4>cd7Q!~!(nXFKH-89Q z$OXT`HY|keDY5ao1h%Q{v=S#lj}bv)+X^fK_qsl?1WfS&4rqdIg{AE9`K8@8rTezl zwyxz88D(5?2K^EDkcwvmMsL;15*iX}w_UEUzhkoq`yUu=;dhw-~QY2-I zBzg8IT3ZOYqnBD|Q1M6T-GU-=cId^$JIgy_j=xuL(WtFEc2_*vXQuphKlHQh)+e@Y zLC^kt@#Aa|(gt9eKn^Z{VI7Kc%FSX-zPP3n+c@?i5d3@yW!$Tx52z{8p?abhswi&Z zImD(Q^&z0F5*9NDLCpMZBaXyIsHO`+<7nQ?Z*vJ86bot4ZMwK{_nJQzG<2B?D%P3! zy=#|VotLcj`}D`@)rZ>rJcytF_vKxv>WBtp`S+X+3HBoUoZ0*7lkM&$x<#Go=iJ*L z`>hM#?U|jz%`q6=a#y()S4`ia`nu1djy`jp_ih(`AHDnSob#`bAnQ-?*P0`zEpXg= z--j~cLHmeWc2)k=fF3bysJe_&g0He)I_ZLhl$O;x#mlLuhF|lsRF4((hVF5s+un#B zBA->M$@X0?1+7|j&t8%los*r9jj0dZG|`dt5L;Q}w`bZ>B5gptoOm-Yjs{SR&n8<& z50n8eNf+^_TSBjmu18&1{@4~Ko1<1<+iWsYu=ujuTHGIY1066(W@SA%9dX9YmFBx{ za-b&uWZrZCo!>t^>)E^P&zpPx{P&ZI>kgZoht2=+;_uBEyN8wQX!dc^u%$=b(`9la ziF8q=`NEVgg;B^sW9JQHgQvb!lN*AE46Tk(R{IsayAOH)C7K91_~H4nt>&9#5yn)S zW&`JZ$cdwz87QaR1b4gGfSoxl9KN*_uV-eh;$jY=t_e|u~02djr;fo>DIBJ^=M*|FC3{Z0S%xMz$9gMk}%;-P+M51cDrkyzLdg1AX z1!6{gLewqFmnWYV;$r-bxJww@i)p=-V1mcGI~G|N3ZGnYxpRDvvAL|3S;A;l*j~~3eI7gEJKeNkvR07Cd{z5L-j#L6 z5x<@4kaLH|+VSEcp@bnOcr=!BsD4Z-!3%D{FEZY7Za<%qdTBo^LBo)eDmk{59x-W` zu%yHTT6?^0_HD5qXBGyaVn>CD9eeGaqvHRD7*DiTzx(W(#Qb617>Yl zfbF=dFJ?S2_7GElr^mTcpQXKPzY^Db=JZt>jt63b;8QZxEG(m~uaC$*-^bWT^(YuV zKN!Vc{4RRX;~s12(oD9~l(+9A$L$16;XN z*r~Dwp#Z6s9&WvN>&i3Rx3^se@Bc`C$KU#I;bMv-FQjh(AezTueq1}ptBWqc6dWKh1ig;#^uR-2-xw8EegnL z4`;>s$zl*s1`^G?0G*XmU6B?2(qc=Y08=j7m!m~lY3JZ^mRzErG=MnY%>mpj2lN;6 z@i-tEQCFU{n#*@=ZQuTC!-CgWBUjD5T6krC!NUJ0${+?;iIXYNwi*@me6}Q0 zU?(csPW3$vTCC?f(5BdSe#1ulf1m$gts5&3EWUjH!3>mfPJHl7B=*MNIB-!HK8WUJ z^rnQq2iJh!lkUJwWxFnIZ=T&6G`pz+^;=}O>ZFB-|DvhDOg_@Yx!D{=(lID#dv;Z z;qf1YC3T-#ro0XJXS6Q;Dw7v4L4mkjVX#^4!P2P&XUf623^&?ee05SfZGT6xTj3ow zKe8KVg(!?>v3Eo3Q;n)r!3;!1;=4@XtTcdgtZyB0oi{{vw6szRF$?J6lCZ;|@dU#$ zC>rD$!lh`3*%i(y6Z8>9uvxpZ$d|8{SSW*-^)L4#emRT_;-=c8+srf-!bedWsppYY zd_;Y>ajmG>^N9l9+G!GsudA=fXsT)J9S92?d`x~(V9I>;#Wf|#c&+1O027^GX78vr zeI_hP5ClgOR*wapIj?ljlr~PX_xcQGKiDd)u2|b09_V*^f7Q7A{`K(jjs1#YF9DOd zMNs)&5$_cLv?D8kSns{(PW;#Qe=Me-h3?Ex)o1l?+7q#^+o(=k>?s(><<0fjOo)yr zqycBl*y`F%`(p^E}|N1lgyB}Z)|E= z_|pk;?f^u8!fp)zD%NM0^w`vCVT1&0c(MdCY!T=n7QKNulo;Y?D!KYt$Aewd>DxzkTOS)%toCUd*Uw3evC5#WIa+CW;Gy8-4yIKOf7wg3B;_N$7(n`G zh6;@itI=N%K-P&uRg**ZBaPizstZVl=Yr3G4JDzDX;^N~89f%cT2EDg+6;~AXQ-l$ zS7|n(4*W@MnZ>fu;$~;W(6DB?l*fS#%|$Sv1xh&_lz|QI8{vo%o~nW$I@HacFGJV1 zMQ`uOITcy^JL_uK1qapcguE^R8#$d~RIDV+V~mQ)>NHej{Yd{n$mjSpusY2K;o$akvven8@&nVW9RsfxMqG$`iwZ^&$F>DWQPIwA4a8hR{nzHKFTPJx zNX4H1maj{?EbJXV73wrS6W5ocFjs609Tk{SN>o~Xa|XwYV9<(h?e*9{S0E&Suh zV{QN4bM6&f!~pe%S5WNiMFqEd*EDt3h+{W;e@ziW%g^lD;fC}JDwD1v`E%R4l*DD}@fad{2eq2%u5b#t;ZtVwaxwd!4Esi%>5yl|_ z0zQNBwoKYnE;0qc!jey;5X?>X)Ir+Z5F> zIa*934O!yyaM#q9q`YxC`P+*9-_~sUy+74zl=pp_`Og6NWwi4(6mgR4W)jvOM=rhI z!Zf$m5r=!3Kk7abuJGv9hL$bkCXvu5BHIOnylK)D{%jVKQQKE+0g30e!_WgxR(}TbSp_pD1-z4q}`ZE^HjY zLMI}8WO$u|a$J3Hh$n9QL{}Pp>&Uk}b^hC+xQo@|PLtC5#q@r4c^ms5PK@C&*Wik1 z=k?jxfp?Q_;}!`Y=I&^IXf&3HDq4Hr=1MBfLCqB0{y8HsG-z8-*&M7hyI^>(g;C@d zP^dM^O?~UpK)LcJ(YDndB`1q`arzAuenTKf3ps%14h_ERxtLKBw4Q-4(>Hjl#o@}K ztt#Uo21M;8W+G<0|3|Djnm5oJ5j|J|%-FlK6$Bl_7+FkHwGpGuctUaN0|LQipr+3* z&`Q>-Uag)Ba$7GKW=4VAG^iZ_nbkm3ezLq^WO?p9?bYUSWmOk#sJvuDd2cKIHWRr{ z_kQqNUiv>B>`9Mt8&bR<8K2#oC7n5cV?bbeNB-lc*zzWm{C!VAi9Zk-37cOTg7cpsI07tMzi6j)E+&T^z! zOruyn$`n60b~_{(7vAsc=hJ^3aBt+%-l7dIk|)VaTXLriEaDIJk`MH=zp}pks6OZM zzi~@HPr93lXFxo0$rrP7mBefE^o*SJP5G~_Fu1sSq^#ZW?0#mdikR3}%>jHDD}1bk zcHYs6EEs#l4(n19?2?ULuhYKI1(N-=P)LQtMimhW9OK02+Vj}9spQkI95=2r61bCB zJg|sV6V1XqC+xvQm60tI%h9eMJc~x!{niVyA*U!Y4VdQ@YssoD8cacTzKekayLEbG zJwUc)T0R+%zu#a*--1CnQJm z5R^wj+GNRA)@L9gyzaa(5-uzD(gRY-t>TY?HCf@_R;-bPcf;R4$Cdxw9NgRxobJ6; z*Q#mfhP-e$xh{%}NjPA=#k~CE%D(Yewf8MY$YFiVu$~6b|ABNBlRnt#c|&X|;u)YE zVZod52qJ&!S;e*?v|GmO3pFF(^>P@X>0t$QKw)^AyD|e-o&L7ar!Be4Gg#97?;v1$ z1T@W68(O1s%V9E>ohsKtwL-%^nD@~tgvv{z_nFm$*q>YrXP(7$hs6X#XR!^Sehcah zVT?XiP#EKem0G@`o9_b}BS)RqRa##+$!o(M&vir^({wiT0jSz#}Z(D>r9r?)}kR zwc2uU)aG9!%j05F*p}BV-{7V1lg}(SJZtzUM)=x8i03vKx4gyOX)#=n$1=E3a@Lnt zQ*5=6h(H5JwZvI~WQ&ZV9)3ir$t)&W^+?}Mg|$^05p&N+t3 zRmKAZdN&Xr+9$fv%iHr{c28etuKSg+e=o~)k2n0vr+o+D=ieFsNx2al#g~?QFAVW^ zRs4)uhm6|r!qQ$N0??hH2F|0r|+MWZ3sRD;lT zg+Y?HSF-Psmo_-&%HIpBp1ec$*gg3pHUV2a4;Fa-xZM21+^YJK=SeX+P;sWSjQQV* zY8#k&u0|Sh&t>V^X<7SEA0ajdD1FI*SvM-a&;0XkP7D<-3O>vL0)wOl=#1CAAieJ%xs3${2Hpcz6%uM27i)p%-d$H%U$ zj~vT?sNWMF=^8=LoCdMn`)4|#h)f5YdiGv-oOOR}kl4qwygKUP9q;L#)coJ?f2@7d z{AiR$;zhY;zmD|1y6g&L*ssFP_TsUm1`6tRfZySn}+QpVs#> zOnMIMZ+lEuTrms;4BX!Nxihe*0lYKv{)58ia-h=;(-<*@#WS{6Q?AlT`rOVPz0Y(Q zlvY8RMVQ)Lz5b_-gEF`{anjBaweqUyOuSY-bkHHsm(nRQw-B=NXn{+sEOXy@YUY z#k~zTY8LLTxVPcnhFi-D6>%%Bob_Oiw6urR9A$;2W@d${rDlaq+uF3{^YVV+;M;w0 z0LOJ-_y79+&JznaK1LN(76k9^!4sZ5MKy$8M_rtoX{5h%VQ%#ih04iL0;SNAq$kC+^m?m8(w zb-PwPp{`%a{&`N_xV&T1SHo+6$9_nr+&*!9x}g4E!SMxuqm#?X%v4u*_e&YSE-l!m z^~)xmeU&((5_p^3>$q>+Zb6R`7A;;v z-Po{b!K1%p=+}EZ ze)j-@ zs5!bv3l%U<3dd8y6Oo~@TMtU?U_~~7ZnRUZ&Qq!lXf0^v6Y0-l!Ohw*dF@|&_a4rV z`z_^L^tb!(-==~EZ!yD;P{Z*s!>7lR6MWa-UH$LVlWtI92q|@}Ka|IwEVL=ciAiwS zT$>vR31wg3_=Q4LF;Z9%k5h_(V`cB!1^a6AM7+CQzNLL}tX8xzB8KuQwzm~5{8)cL zP+JI7NS;WgPg>gx;pFO6TfA(mWp!x$i3Px~0|k;&bz>1gHbobrZ#l%X$)HLXdhD)K z#-heri+9B{(jt*y4S^!0vZd}AfL;s*D^LZwGHb>yVYwH1jJb95te0zz z&-*`yj%U1@tF?4b2_0U(&%VfnA6dPSHkh#Y^3gVF{m{k3Awwgsj}9fRO^NT-_h=ub zZ3vQ$KjhlKzF9gKB3Xh9{r-NjC2HtI_YD~4x!yiq9`TtC;=P*o6o=TwM9P?^Pt*`^ zNK?AWCu{4y3AJCh9TnjtUd19w6WdJ1BT|87%7)Q2UvX!`1u$HPhqpzT&Tm-@DXvg# zg(Ru`Ng;JguV4kTBNQM-OP9IO7!$#v6^g_=ZP-l~xg%^&F1)X-A$Z=m1#qey@mkZPh;+gzy9ff0GL9@!$c7-J< zhk=E8y}GZI8>)Bkp9_TR8|v(Zjy8XG#43ITmB|^{QDOz4Nt1#q(Mt0Zfq)&m2_n@{ z=wfu%uFFX=uIqHAQ7V$g1z`yMrUtwmK&N_#YsKKs>^2A|W2)Q1&bWQv64G+jPBvBM z-50ddZ)FUs-y!ANWC$4_e=_s>pLwE>+o3hLFKx6L=~20i{hl)N-OXsMhvj0&(8uj;Ev?1qGPV=k_}2Lb4ogHo(309w*!~S5~G14 zw01Kk=dt0+NwAQiR51wQ8^2ho7Bs&-Z#0@uMCgytN8HdGTk*CRs$c&+D%K08OH?D;CpKS z6g-H=Hvp3|47yuY`-6HT-1mjviNV{ux_O*67R*`zXcqzB7>!z3PffXgnXhm(4%A1s!UF?vz!f@K$uD$+$wP77mAr|($NaU0$A={ z2W)BO8^BgV5SRf@#210*c_q@Sonm)bWC&DXcrNXxBKQ@P5**-4XaZ1I2G_p->};%{ z^@A4p5p3{QHg2a6zhHNM8)-6p$c?SJ%@h6SkLp#cmV5d0q=R?+C$E10JMZn>c5q?S zOui@PX8c<1!OYL2B>ZloJ1EHZWW@a&rw~qh5Cw?hDfKEu@ykU=-PO zP;3bC4T!^AMGDk%uLe9ebUhN+O0>sVqeV=kj=_<39C&2p&&?l0bXq; ztz__2y4@9HG8hHWQMtU+BAR4)N+}%x^8lu}o68YU)MRf101kBq)Y%S(gt~(L4PZ#S z+hbXAf|YkaQ)Yp1^Sj(x!^RH=p84RXr*?O4MiOr1zqW;|FmGTTP%mVko1WT1v;ejIE9s>7c4r+Yd;kxw@F4Y z1^aG1Tlbyv?P{r^F8D0L5JVvZQ2JEvcQud@>pn< zL8et$INJ_qJ$qsoCddm$WL}&l%{W-TU8o7R?W#&w>%^+UaII zv9X%eUr-^_5C`C_PePtdtCZ3PK~Qciq7^`gnrtA`o59{2i?spjss|jbk-i`V30rCi z&lp?R!$k=0A#}LjguT2!4Jql*hx~kOTN>c4I;-%Y)^XeYr|Ot~Fq7Ht@Cb4G20$7Bs^V>d859$I=2Fr5*B_`*k*-h0%U`rQx5EI}&9_ z;QGi86huf#baV`A^p0Box5`AaevABqwo!tP-jH5k+knnnz-;zKiPSVR#N^yS$}+JI z+b%*^=Crzni==?4pTx#jTHWB;V9*gSpdpwgA*#uS?&`6uv0%-XTkpxO&z<1OA2Zh! z5ka%I*#ot;9dHL#Wnq%f=ejG~Rcr(IZtdkkTG4MO-D^v=jc9~vW^J(&U>sMskLec+#q5$E91$syRX z+?hk?FGQ#2s`#|nycxgP{!!QG%zEB#PIUJ53n6a;qhvCG0uZOg#H-j`FGvLdeQ_5q z?$y}8{brW($IaCWvZg6V{ujGRE9EJk#)OR7z=YUkF>8c@pytAfcO08N{cKEt(=c|B zRi@Ru4iAa%klV9esY9XPQOTdg*zxE^YC@n9o!18k$2gG9%^QlxnwjDbN0B=+oD{cT zW_`vjrfUSzdTP+a%ez(=WFei?w1MCMR;64V3d1JmG7pt5_P%s-oO{#rBGu=u+)MS) z?2?$>oSdDE(D>bAZ(5fC)(+U)@NRstYq(`}PC*k%3M!1ddPuR0rK?xI-baar@qd9qMq{|lDoB-b5t^y881JtBv(TLsi=Z* zTXYBq)EHQXiwE{-#zq?}b}VLq8VUx6LGsoj-wZ^EJdjdEfE-{WV+gCEAvv0dhjPtz zr4AboBYtKfkFa36frpO}Y(ofw4-X6DAOctwS!`5ati2oyyq9VpxnU_pJ{qLrH$8fE-yP>ekpe|2CcA_5cwVCu?1`6piWV11spYd3JBL* zhAq&SSP&dnC?1UkK>$eSn6(50pK5BN$bqL|afE9#Y&;ipvlnH{2cWb<9S+!%FaC7aKF}#?B!ftRM}~cEEJ!1MgGQ=2 z0M9i6;l3mg!6SfSv4#L?(U2#2LR(HH(PlNd2{l_KDL;GR@!Xm}Bsg=|Kb};CU14LN z5b(w(n_vB zt$kf-PzM^6NQ1g@Ko?d8Xh(?O7Tkp`R(XOn&XpC(!7zY^5h=kB(hx?i;5ebb*`sF| zngN1L4$V)W1J>q%i7c=jIpWxcq>?(I2zMwj@PvJdIG$D|&>nz^z#b$`nx_@x0~pRI zJY8~-BO`lwLX8yEc5l`@#)|xs2lZjeLpkr+ zInUuS<+-JI41AaYPznLvR#vM~UfV`MON`g<;&1&yKP%A*ZVN38fo=J)Hs_|vsGlu z23zske7JBL5Awu0;@~(i(s?TUO<;IgmHU0kc+@|3RH!=}cH}H9j3-EugPL~|n8F}Q zGO~-(G({DHUFxj}X4fRLx0;YUz3h0PIe`eb;j?kSgu;X4%{rrZBw6!msa0jHw<9@@ zUz)otIy?p227Zgy;aZ8+mNqV`om1Q8Ox|as{lU7^0|s&9E4tI*1w6$H4lb? z{Su_eQ92-{ypM!PH{L~c@$0(?NCpwJH>cZvb)budDK<1Wv^#9yi^!nDtSBd!%b+we zqK1ZQ9s=XYNfV1z7`lCUOVyjcQ%ANXKiQSpZ=LaA!#wb?gFM(l0ho0Q=1zv<0)Z4? z>5M+1T~yx`83ns^0q%lAoXt);Qxi``MA7@Acucg z#eVWf9OR)i5^Fm-=s7lKnTWQps2$aAmG6u7hjkjtC<+j%6^ncjv>8;89~l zWI7MxO@Rs6oEhw{5_aVsMt8L+Ish$KWS09_0_X00(v*8%f!<~H|6naFLL25wL}c#r z5e&TEUZMn)LS(u|R(%V6;7AN=u`m60N|tp7_YT_l2sBbkV-q??0}t48qQ`(H&$on><_8jwxzKcWu>=at_0Y{%c#f zPdw}rN47y7*;J1asLV47Fhc<}5P*n~K^L=$R45=u1FDXk8mYI(l9Os)o^oeh*K+GG zpk1{$?6;?khx1@vWop#|8X^T{F@LF&hpeLSBCA-)rT!Faxv8&aQ?YcorVbp}I^|Lc zsiC7ngAeL04Hlm;;>r$r#+GKArD}GEY~8?C*j&+^!z}Y~G4r_J^SEXHM8pItlv_J# z8{26^R$RE5V%w%cgt(E-obagSEg_^YQZfvggz1*A>a* zjk3sm4wA)3&bpv91%PJP+VCw{YCBNFLeKx1=glKaX?K2ik&}%>6DZQYs9| zq$1huM}1VNoM1vkMxFpBPM#I4o#+xSg2}=>-NJc7D(a}nqg!yREnsln-l@v|S0ZtI z{xRH#2qpmH{OcZoZv}5&zD)?WrkF+>7hi?`V~Xsdqgm7ki=lTCuB2oWrfmA*$&Vkz zlHrH`-bvGYn0``#{5%#q_mA#n+2Yld_v?3glgG<7?=5+SwhUq~b6eWqtnBjloZ6U3 zem1fDo;5SfMYZvkzo?Vbg~$yW*5^OFDDsiZL~Q-~Y*9a2Q0yNgpcq8Nsv)Xc4m~{Z zh)F=I9$n}o!W76Lc^0CFj9uoF%H_~ZE`m+R+_myym||{Y#_;Qqd%i`*_+YL|?LXQKmi&DMwD4&3w z%Uif~=TSEiVZ@L1r`nocl|9QrPlO@Ixwub!?EV(a5WC*m0~ilITJ!SplgDFY`Svg> zR4a76lZWf(T*y9;oFcvkjxWbi#@ne-2nTVNiYYq(wB!^re(R~K&h`myek>7wfC#tX zvCZh0Gpyc_*oZ6&0(I=oi>8OVuFb_iFTdZX^(lkSf*!cLjZKuBnc%(M0`@Gtn=twh z+xQoIm2!nbfwpQ&_ZnRQzT)Ri^2CPmTL${8|Me)?*o`FPQ?*Ef=)y}crvRb zkG^@ddy$6i1JL7P$gVAf4*?db15+eJi>dJyLU=Z`;Cj{vd6IWw`b)H$6GcJ=4mEb7 z6DA9wfQT-M!&klum$5a)Q#)=+YIxX9?tW>)`gFTAlumc4*4HUVvb~RO7_lqPhBo5W zT%vD!cy&4~=*maMRkoaoW|uD*I3TL!qG-wX1z^XA=aW}L7i>)zSKXQo-CG%N?(2=+ zJN2^8GNsjI;=yo&=o!8Br`PzsXEI zFtgbSQ$f>+dKv;o%V}z|x!@$kzcC>kylfFn2uGaCV}5AS_8xb7>@BYSP|tge)~&37 zUdo`1_q=J%9yqCQTTGTVw439pI(IpVzrE@*kzorRD7vh+dE$H!?#Bv&XfXK~g0Ru& zA*ls)$R7wBR4KB;7iwsinpWIB&bt5=iIQ-hQEw?#immk%S&xS3Ipy`#S(a*R3mcbv z->B|xPe)^Wdt_`Y4F=cBb3QyO2XrB*cFJCXujz`8# zDQgKDO;!@unGIIB3mssv6q#@h`0^;8`)A*2KNZ2{mDONB3tszr6AEtGMDLxV8Ecp( z+lYr~FGNjsHrS-?Y9&Xsy9lMm%}310q}WCDU${@G=(jjF1+5=pPIOC$mgmQ|=52e| z-J;z^RT8z%h`o2Fomyv})Ls6{aP9d6cdj4eGhR+r*7ajW{0?^ecN z;wQMbUb1W}$tmRBvl1*7kmKAA0MaIDb$js(9GgOkux_y^T{otby+q%PJk@%YTINgX zP@PKr;vhV2?7RNm{#;~bn!~?S@)6h!(F3E7R=i(``kMx64`<_sTV~SLE^fmQ9{v68 z$Cv4=tGo71?sls$_zW%4*sKzz>SjsL{b`X!AoV~hs7`)!rNG?MzskeXeS{E$$mOWD zCHRg{kF_g?8H+il9b58ok@TD1w|W1IAUuI-fm*m@y{22UpWK2q<%}69CTa<$=LL_R zE8+wG_Nr43oW~NC%NtD7N=|yrKQlZ_x6@MImh%&um((gKlr-i+eH*R|FUtta^cORY zhdZ>^4hvUCL(l)4-%X=}z~a3$SVmd5Zj7^Ju#B+kEv$36!(h`ZV?^_vznDx$!hB=q zM)BfF{fH}@(|i39HOm93k^iv5U|^~xh%@7^phBO0m3gMNQ8!wt-dd~IK+;xvj`V7` zW2tiWFXMjwDUXB_pZ%^Xnw9FM+bxOVpSEWDaMq<-`^ZTc0^s=B=-Qw4 ze~&SQxQJu0k%O7fL|f8I_GYFzhjDJm|MwimX%$!+dTk0FtkGKakqy}W-9>vZ&GBhm z@O5b%l8n5L5^sWAag>^>Y+XavOuAzCJ(sY*@9@&_HTI5y^T8)bDenFsO*%ogk{&yS z0>u<2K+K@*I!kIg+R;)_Z32IS1c7rM>D5R~FeR5L3bV`_Z$`pjXCm;y<-DvP=mk3XmX(FCCTWW=m>_oXw+0&Ahg=8LX3^ zBA}ImoaoO4vrA3+m+_9C?hyG;xy)bOh^p>dWsD?^OVNz5+kul=uKdaP0mw1hR<@VqDc#rPY2ol}PwW_kpgig66zV_J{_K>s z%H=B`b-&8zBp}RAWtWCz7C_|A%_bj`4OxMg9w>o2JIkT^-7^a%>{QQBDVw4--%lZh zo#=3NYqTT3%FgO>n|8=oWjQgE%NN8XvDuuWAVU+Rrs^m3H){Ane-r)D(uWhLBCv^p zf~;ZbUbE(@c}}?r?`hzC*aW8lJI_Au_Q&r1?yF0vy`&DLY%dwKjsRTu<|-4~oW!5L z-amQxmpjG-q}1C{^-8r16Fgf;xKAphh$)LPNf+ctB1E=e%SeAvo+r0NMje~n-Ke(6^V)|db& z0QXd$go;_IP8aH?&poAP78&r{dimPz$nm6C@^ne0OIG5u?1U4F=Fifs>mrC%ndVks zq_4Jhl1R-zvA;xQu8ud`!L+voIPyH>#5Jg7I}d92C<{OnH2ye&-y&>xCJT$7(w}r) zFiRusS!KiZ6CV_-rhZcip{gzixy?BQp@UxeTM7OVfI|DLD=YYCd6LH3r0@7_Dyk z%P^IAYfMPgMbuS$N(e+q=_wEt^HvQ~I4e;XHFDbfRQlZhT6*YmY$tzT+gJ4oXYdynf4C)fk znCz4|H3%_c>reld7{D8ymnu32sSsufN*Ul!zs?8E)t;FX&d3|OQ)J}0qZZ4qxYXNz zd$nTcE&Ra{>Ca)SA)5jH6|mNhsfuQy`k0l*I^Fr)73>%tkxK`2Y?e6JD+Y}r1i)95p+MG`8l_m%mJwo z3voQ~=UH6n3iGE?Pvj+-#U`EqrKXxAbIuz0f9wUI1ErTiJM{X_%kX{XDjZ<3xm2)N zIPVgRL%59AcqrD5aXkuc+^au{8ZtQ2Ey~y@!`gT2i@tljwl<}G-nK*hEF|j1zOOPO zv9vMycIM)9qy3oc6{0$+T;>F)BB7TV#}RY84jY^-{^EtpF@OoSqt3a|%zX@N0yMh| zlG9&g-dtqHDHIHewKfZ6HbJtRARh?gpA5161jd<;B3%}gy-|I;5N(Jr3eA8z<}z%> zpmb%;&zex1_`uyJ{h3oLxWL81NxmKkl!j)0q zy5-v)a_NL!N6DnBCoCyd{KjOgw3p7o!^qW464aYt zq|*%kB&_+7XJX7P_$XUsIa@R*1GQy2ePqpkATVskihmNp@-K_3b^nPQW4IBZOMf5- z=z=*M%H_Zqk&28u z+9ogm0z7o9`nD>=Y6YyaQe<+t(+TNrxNLx{|fF@_IvpEC#QtohJ|9N@}fxa5{)jn@Fde`k(tcnK)*3l<9R>PCCDsw>kz-@7@m-t@g#Ru;rd?uUyR1jAX+p0#R^1^ z5UfW8JD$Gyfo8(nSFroR=Yc4`Xg{m}J>CC{5kr3#ckT<_YSV-=2K$^>l1V`%#4}?i zihZ0yd{~UpW`-+rBI^ot{N$r?6Q=8?kK3kC6xk(;%DigAjH1IQi_=6)ck{49cz5mO{jo`ZGcH-JX*8LZf#s2fv<|{9`X2Z4_#wr1(Z{goYVI zs+Xu-iQk;OLRNq9h1Vi}E(0%wxe|uAT1NR*YJi^nZlx&9HH9uqCT}iVN!h93AJqKT zxF=#@B!K(GfQ`ru``P|4&lUCyn1upP@mQfAA8*ZvII*f1PM2htGS6LN#O79D7>}~t z?Oln*-ke>AH~W!G@1x7Gz-_sEkyK`6AUu}DjKPQAaHn1m3q4j0Kc2-uuE!j9mi-b9 zwWl)dh=G@53%>EuO*;Ty9w!Ii^^zR%kP4WkiefeQyTQEwQeR}y?uIhI#=Wn6;XNH??Sm|>Zaf4=Bs-I3mI|Bm(Xgv zWn#nw8Ig+Yd!bH&P$^MO%8tp`q(U81@SYu#-Yl-zr6ea8aai+FjN2p31k7~|=FES7 z@y!sI;Db&1F-}yNLP-?s#dUXP%+|xG*%wi{>%)C0hn*Lb zyRzjIp!RQKg+yDBsPWnJy!D^?pkH6t@4cLpu-gzp7fv}BN$-1XR%Dc&hl%AD*EYh( zdCYjy>gvE_{7dHVR#%&(NPbqNhH&Mvf}Yq*d*hUc*TpLp_m})*UlJLK3+-i248v1= zi=D`wA3%2whQ!gIdF>B`MnGWu`4ImkaMy=_t!O6R$dJQMi0{}dr{x0Dh4y8HhWyqm zCM&#fU3q%>A~cG&IZ?H_y%Y_}egR>^Z+r`IIrhT!<3slcA<^3ckTlgxzdR_kdm5tj zZ>gsw?WMR>gh=&Ec|2IQIcDz2St(sfGuN>ZR!Dy&d>sGEvVu9kS|M1uI#0Ro#_kFN z+s0{>%3r@Qv2Q7~*|4C;O}}j-0J@=)tDaYa0fQj0U486J8E6!BBCqjFR}46)Or9qc5mR#Czp+_#E8N5mx^UW zA-j<`$-?fy@SQi!@Kj1aqxoG1`*|L*#D~iC;S@(xLJFSjKKC`={AE%+ zc@x4Y3O4~Z{!OwW(W!b<2opY9IjMH%6ZCesg1 z5`{}6_l(}z2YW|kD3MGFW07u;hsf^cH_poJ`sU5a3I6u$k5kltGS7@`$BH~VfI;MM zta6l%<3uVuANc)xOWBmDaW-c#%Asv(3edrikU8CZ7MfDYT~sHHY>|#HV*XHM;rfPgsCiPKEjLvZ#cIk)>sHHD;MQE$skvW`q?<4;3Um z5&yjqlBn$@)t>PcHVE{@Je;$87~16VRs6;Dy^~8p&+d0=OGfJMmFhWr@N5eE7#w2q z>WY-tlOB>n&%a7DX|kT*{0~H4-%NS_avL{YahZb|3@RP`?(niV%a4Nl=Xnd!rP#rI zkGB(S)2ow$moaO7m)sn6q#1FD3J)1Od)4F7C@Um=Im#z6V>j{I!M`USxljY9vL7ao zmPK2`BgxDt_TjWc@3*_C(at{*h?tz?IbwM(Qd%)K@{l7nE8@8;!vVjC<6bWHBm|{? za2i-DyuDAR;luFen=z-IG)it{YHsI)4;eg3xz8KNJ93rfZ2T34?k&7J{-*ur*SFW> z5ots}cuJ7&1-PzoT$=j3H(N8o|9(3OLcbvGIeHA9*^J1_MI3P|Su=&Dy2F1Q`<^EM zaP$f}&HZpv+(Xx9M&f)lwS4dgUp~-x%k#G+<@dJdk>Q8yzlJ^U#Z2-B)J$b}bFG>u`_5{+jcGHEGp}mA zjVB|c#79|D+J=H*bTfru!w|!&+x;NQnWgS|dJ(dks(PzV zZMDko7A}I6iJZPDs_{|QJ@sK~(eeybzdWi2CV_ghV(9?Y-k#hz)#T4lYh zTZ(RSJ877;d|%Bqe6RaiRn(%&V2xbuz>8TgHJ8-KA}UO`)b&f#Z`Y<>D;L5YWW={#Nh!>sOcrKv^?+CJ3(@7Q1QZ#Ok?X7|=@N>xTH z7ln*n)au=Q{G3h1{B0lGG-?Issi6X8J(Qi*Y8oy|1XojRB16h4*GrXWIV-0t4sT2= zs~l4*bgQCcV*?`-S0+_tqKMy{r5a*R1jV%-{VrW+-{T(?a}s5JBZ-qx$4nWEXlYZJ z_$DEz{vUC%A?==>t$NFRLX=1ALJIV;IWOf5&msNrmy_0LFS`Y6-2~rLX6w149{Qv2 zHxPO9;vdKcPrN0049aFF*?93o5}035jB{(pa>Z2v$}MD=US>;Y4sa z8Ie6A(m2sQ!eWAVF;OvDaWOe*ad9zeSqT|=85s#F37jxiO$MbbA+91Lt|BX|Eib1o zEomYzt0#$6P{(ViAxzbgs!H-YDsq~t5{9aBrdl$F#=`Qls&e8)6**;Xc@=dzO=AUR z1tnco6%`#d6dj zCFUN<%1%3!buc2NnyCAa8L=ctPq6!8r5c>cHLeerERJW-XXCUiIDov)!??8zAE^PFyRyVp;-TX*Q*`PA1Pw69+% zFV|v6Hi8o~OS5xJvX9m$q?d&x7L^?>e34spq^R+5VMRHk_(XYeTSG}nd2>r+3yam< z-BNa@t#P2a=3HmRa8G%EXVXw;Y*nbs^)yPmd@D>?f*9Z?d{@#`;Bt>M`DThPm@Ysg_euOijC49X{Q98 zIsf=__yoCS(XRCAHQQr5Tf!=Eq5b(=1dn)5gl#2M(Ia1zsVr5Ny%jq1C1%5^mS9z` z^|Ko{x1b(U?cZnk?Y4nI2(#?g%iBw*M$(>Sey+=re0u1}VXwDF3HL^A&q@3g?!Wa9D_9oC|AM8T z6*+C(fGWlE=qPzRX*-s=03p&Crs@@gHATW5WXrzSI;f^BwJ@bdmV%S^7MFS_M@;xW zlT=F=hEsIzcPV>B?99}=9axr-@ZK-pxa8vxcN=I{BlbH8aAGwAzgd5$e-m3SdelUS4G|dW2KhEIGzlq9tEd8@R z$1U9}u|DGDFS#afku14$yAFM4o*Yi^cpaXwm1p3XAMwhs2TvB7v5hbsd(#tz@|r1; zy1l;(A$8BtiC2iz-sH!KrHkE<6HR?Lz$_V2)i2j<9IGF%SbGpj(RE3z?=pQd+b-(v zWPUQh!A4xH>1kTPa^IP1abnIB6O*^q@m^|@)udwuF}{aZ{o9(x&u_wx+Eki59JTZbUF%8h zx+Jfrb!4yo^r19Lc7ShA(1qo_1COj@4o&u?hn;u`&$#ux;nnoTQ#am4R6Sai9JM<5 z=zDEK5B|?jqOK4C4;LKQm71y&qqF+{@gB zIxqID%qT1ES?S3|Q!URw`!%-HJ4GY~oxARnz7Kw!Ui;?j+=Vk;9+kcLgc9F^*zOEE zRubj&X(RZi+|_4hJx70nHnyq7(_4j4kRRN6(i%P|2*V3D@U;zoT=dwp{UIMo&rd|% z&3!f0Atm}tZREe7_x_1W9e;I6t9Y?<`~1na?u4S&pkqt*uSmBJp0lKWM8E3Vpxv>U zOp^&Igs45|#2q|1))*#0iA^kZ@4pnOfdBqw-+!}wB2=3a*{^YCGHt5(X~o7bsUKsg zpYPMmR0E`9ZEb%&zE`r_RH68akz0?i{iuFcCoDA`(vU0tK5Iqze!z$*3}R80{HxqW zqz-6QyK~nPEm!fuZu-PwYd1Y7^P08r9~PJL6WqQ^`?Rz>GIqOA@WrT(Z|%jk6@<-V z1BOz3^2!ZX!`@?!@4&zFrLL`%-md)+((u~t>-kfpa#FXCi>S^{SAMaJ@BFF@<*JQK zhtNFL?CfK)ZJ5NPqYJ(-7jC7r;^l_AUXy&3aW3l_2ETX+#`lO*$1Gya{^PO(mns&? zeB%L&X`c0|+MxMg&0|D5)HYIc!P#Hs{O#H#JN~+}JJ;FckFG^5+vJ<_Wp( z+Rs^oW0%#f-n3&L@#XhsE)Qrkn2y_ZAb} zu5lt5S-J^~+8y{UUF!RJNI2$vsC|2x zsb$=hOF69U_vz(~9)E7XuK`^U?Wp;6%hx+=z}HPCP}270w4S!LF7(driYYRDP`AaoSaC7iJ+Mx) zZ4f3uJd$)*Wj9{xD+{?_wNKNcmHnww#rn>b*G-BOf&b>4%wO7I6ZcXJ6b`&ynET_! zI$WhcqHTNQ)Za4I2X5}!*UHX2_Egka{Edw`B4eTY-Cu8WpmABZLe*0SweP0|HN1|@ zyO&)x%)av}uAUIwlZZh2yu9vLPlf7BStEMZl#e==LgYz-LY+ zm#-vo%K}&OCs7Ve=enHw;ise^4GZVH4^GU5-^OWqo){{E1WXr~OxIRF_+l}-uC5+#7~V`& zcNDJSRNpkI6tb9WzB2xPT>G}uDV*eU$$O-<^+;@l_1CD9mc#0s!mT8elIW8p-CK_P z%ML{^6|ZRxgxf91r+3-yU9WwU)b>`0@CQKJZccjLkXCA$?1vaX=dY4z1;b(FqJ$zo zL{LvSiQxkn1{b2rvz8bke@vGxfXcH0*rhuu2r&Y*Y;)T3>lRG^M4V3IlDt>hYlXeV z5doqD_x`n*QB;tsht2zL46B15lhsqx4s&c{NrSAa=zz3SUqYqcf;9;x`nTXFz4v!j zm-qXBvU}kYUXj%`*l@Tpy*y%fZ*yF@tnCYB$xA$Yt?#TN#TQ(d601NcbPFV%PlqVd zxC`>!0!vr+oAnc95L|E^D?|lD1XVm!Vxe-f5;k$gPR~>rJ$PN(AmYIo8XJ(K z|1f=GT^g(??Ruppc`e+&KIGfyrabF#qE@S-^1e01&FSY|&l?ak+*{|dIU95 z%uoUISP_iZZ- zyj)>^C}PUp+7`Rfq2|Sppn4<1 zI66fS_f1|`G@wM5^}1zVG&KC;IR8wysGcNVC>+i8vrymPxRRH-?KIR;YyU#@#)9UJ z2!rTnut5ZDZ2=li$RO?{-+ureAnhK-(TZVsARwncpt;`cR)s}gF`8O&Wv5bfs~AUt zuvm;q4crZ<%2_7}m>S310UrLy7Y2@cG*$SPfF7NJZ;+}t1mxnJ>{u#1Uh~)}9}~+5 z9d-cGd6;Ve+Ks7p-`e$Pp#5c7>0f`DA8Gpt&R(;&tgI+ELlzx1Dy7&+5R^Wk5$@8r z{HlX$#rsIriv>o}lE+V{8N4LBJQMxe2qCi|`Uyqj8ucD#S-B3tNMOaqm3sfUtclr* zTy@6P_^dm~Dj{Cwh0V+duk-I}Fcj&C>rR!|gDTyEka~Oo3moOGRIX@%{NA*WueL%d zAO{wF<00x%XLF`m{8a%ZO699|(5fCw1ukyV~ZDOfL z;k)RyucFG?UcU{bavE4wni`1D_@QO9C-u6kc5uxb!f`+g!AoDeFj*q1PQRqQPpb7| zPRjUA(DF)VS3;c6!?p`8^@F*^=cNVT?@9-zLcn-HcncUEKkZkNKl{28B2XVt;GyI2)i;=zeB4tmCXogY zp}?Q^F`td2uHglF1CSUEK0(1va9ezp<@aQ0B^b#``^r!Vp^9Sa$CTVIz@S;tHf6Jp zyBT%5t3?ThQ3%+E?vNjTWsw~R^fgMsy@=%~hlBUvttJ$RF{Ma_3{j#MDbb1y=|>!Y zfY!dZb2PIqNiodID@GG4goq8I9AG%{9ENjx5WbBO{+#09B z#--85^;UfqxtdTWq#cTo3Oz3{4v z(e3*MmYG(yhr8qG*$HQ$I0uLY9~{UB-;scjIDPHGMT-)d-HDmwCA(+!+s=kIw|U%b z8%b!>LU$5d^KH$GhqXZ?!EL4%z(w=nrAtHN0Q9P3c1mcmc5QElWC>-aspTwEa#gp8#=?}5jdor=tXCRzOZlnnRUfJSyxJiP3) z&ExY56cl>S9N5rNk_fRSLkSsfxp3JuSN^yRMb1EQ}@@z_J0||50=<4lVEh zAOC!Izt*-^TkE#fy6Up7LilXmR*ID*6}F_CBq>6i&(_6Cmz5+6E0qvJ5~t&AD@j5( zRBkIF>)U^?W{_%KG(8VBN=S5AE~NJj9FMqc^mA3rfA* z0aRS8f95un1VC+2VBQ?VzfLA@)#|7)h6O03MSy%D)A6O$q$UP>ow*rdyl#)?F`*ou#a5WxzhF80?OLM-fz9g5s; zXL8=3@w?OE^N#6Y^9LaE-v+k}*VG>yRv*f8JkiZ7I&gW(1;P9g>+ur;yTh8Lz)J0Y z)iIs(NAIOp@4fhsr!{%QvnVkR=(w4GJtLyvqa%N_xa9?K-S<41-*-n+OCBCT zfe>&jD64q`NN&w}ax4?AI3+eYbsZwjX^8;?=**7>kQNH!nZ&4wYSpEQJ*fZnlcz+N z;u@EIiCE>t@M#Zc{KstlLy*1I4W%BkXm4q)F}0-_8!Jh^WL8t$o<42}@iIi@b}sZ; zlSnCIRD@PMXw!8zrpt2K_QwTCx7l0H&#tOtkB;|TfBa8zc2DPs;{|a?r&n~1;Nyk+ zwIg>;+HQX+)>9yFipi>XjtC!WjtR+kTKk2-SOeq-+1W>#qTmA!vst+WlpG+Ab z{HfIwpBMjEyAB#a2PpB~V&d46wCn60CyyEY;pMqB?%1lirQf8q9g5K-K8hc6Nq!=v zUwQ~3X1N?JcPL0UFQMA|rlp99R<%RtNJNK|eYP$5KuX$!fLmNqZqusm9uKZMZOdex z*w3I`yR-l3!_gB@-*t;fC1PAb`;y94{~h_0d_YJ$`3ev< z3x(iGON|uiO2jct)E<%NjyR7vYn2ytMoo zz08mC+0PszIo4N)4*e~SWE;hmMlSA>!HDo=!kB>U=56nbA^%Wcg!{OrB=rE{#sJ}U z;7}8pv@IRgvgm4)$@akjWCy8h&Lvj5&NA zlVoWx>%4gMPf2jmV1XK8OR+6chd3A&%IN3JNs~6u2^UrP`hmBv-V+#?`fjvpcuw0d zhi6-IjO@c(2yXnL!VH$j}7wI$F;cqy~UnT!zRq;!&WdFg3NJoq}lTs@; zNPSN{KIY)=O}~WA6JwPB5qc@(H-H7w4b@3WaN1;|=5ji50|nvkJv@kc=6MkbQ83v9 zD366(kQSTI#+*KZ9afOfK*-U>Pi~qVi1?ZRdri)RLkAu_J&}0ny6cIYHH0>RcfogW zjm0dIk&9&OY%07@c=jhAfK~9m>eA`PAi_!r;E0h05Lqxz9#fD<2GAaC-1tMpL+?sN zE0_YbeNc{k!I~SN#RrMP58nO)`{R#cNG26I>f_c-9KU>@9_zAQcNPYZ${L%L- z=FdZ-0Eu@t(ysYDwi3++GpV9R>|WAV95Cqm2a%kmb62_w$MFh|pn- zUhU$-G1LpDZ%H&6LBf#i*h(Zjv}Wn{l&z~mjjUpo*>jT(hbydO<83xfzK^+o^y|&(F#?jepsI*8&1!wDlSvl3WhiH`Z zwe!H?L;9yWvZu%Q9$w$CV?jTXFFDd%#na>!QV$2tFfO~t_ZD-mS>Hb%+D*98M-$w| zF+9?>3xLcQ%P?#FojH_n1ubW-G?xxzaJlBGFIx)>wXiG~N6c8(JUh{a#*$2h3LamO zr|+3C)yzNRA86z$_D~F{5EQoF3O9uc`ZYgi@p(#sbv9{FE=N5+C#@5pq=t?;O4rwd zeB<3sjYn|pMXgJ?Md58!F8CV5R>FvOyX2D4v@2VWIJFO`i5#We5`U&cBs)ypvGeRv zPmi!JoW9RkW369NbYEeEGNr!9E~WMg{#?gAV$sDmO)&0~M_!O42llRTw2;dsSf)<- z?Yw;YD$)5?(}-{JjqgKl`YXOqz9tD^`{LFull(8o{Mxfaig_A&Kglg0TzFgM=`$cj zF}`Mk#mi>^D~eA=@0QBc?!s;iDXA5#NbNL6<(i-d(KMFsD9)wC&4SqX2^|yIzo2F@ zfU%K_3oJh1{1iUl=O=T4glsO&hKA!D3d|U^>MRTKTn7T?kRB%7GJK6?=GEgE+REzY zy8QnL&AQeC3*ZovuLeCaiI)r~(4i)Kf7v9Ht^}+^9`~vuQ#KS9bCu0kh|Yh73JM)Z zyj#vX`7ouFI$Dsx#^iV52RpOl!NCq@lFyTeT|Uf)UAW}o;nu<_HJx-ckI%m9wCGCZ z)nKQ~&$sp61`G_C_eqD}^=BVUb}V-7NjJFaA=qnN9V%P^SXf5v0Ei0~;<*gp+dv+x zFgVOI*C>O6YWGc5>U7U(xJw23|I>DrEpL#=>F1eC1TYRWuRXLpSe-DIcGW%)W;OB8 zR+^?JEj<~#LLr9nHev{zBf)^oU{Zyt&1?f_pJi5Owc||$y}%bzy&MIlS%2<{$!g>L zq3gTeWFzLdY%#1iAlhPp-(JTflm5PQ(%$Xr=w#KFwD*2w&-Pv=QhLvoi_bd=6%kq6 zGH(a4cFfe@Eqn!^)3Ym>IsfyD_jd`-Mw0y*b>n%_ws?ngBVDCO=0-n;diro?3?-ZJ zHs*W2Ud}(v>zCAC`H> zHAvf${a zjm1K-F>z17tm=9z`oMduu^(+*%>1W2dV5t!=EBpfKh&1YyLLUUePEGe_us#j78bM8 zB*(=alQ{liDKTL|jI^DqH)#;d%tJWkdy=g5gT*o$iobO2lonH}2{7YnPXlTZ^kE`$ zX+Zf++hPPXti-@?ZD0ava@QG{>`9PFpWxHd4S4I2JjJ2^<*hbZsO??6{6gnM~f^ia$2jO-g1Xac*L%`|PAi zVaG9AZMxN4v;4jNIu5V7v1`%RM{zH@qM~};P;81CyG#!L8qj@8Aw;6;j#oOzrNGr~-(pR9=?P8#g(z#nv%^z&+^V>t&V@%( zsT=MI3mU`X@TBZkJ}-v9>5z0 z@bQh+0r2XH;^b+BrB)e0PXG$TY;^9EV8kBhjuBQnfncod0dK`_8h%rd9mGg&OD z2u+VWhI@`G67YYINslua&R>LO4FFj24F+mak-=@)%9$%I|5kA-qH~~f=H#(Ov0<0X zi-IF8=9NJ_g&H3`dvDQJd}RY2>2R}hv$J)z!QE{1`j`pA`cX}(4yNR^Gf2Mi7|AS41fW%=5@jU)PWL%29#$Aa8SNx#>Av@WFF_5zl&34~A4r$jHTA)&4tZPAv;+lHT7CxFYV7YjK=O#VkeR^|Ev2BXfsx?%yUY2!k2x z=s^hgs$z(SY_Eh5e7Q69<9uJEYh{LuSlNF+Z?GurY=cpI*BCSP3EI`rY(%9ZwJclv zuc;r19YaVl94a+&YGR!%s>yv_>!GtR#F+b-JS3DZcOl4TWH3hvNPV(R6PtC(L(*2? zLq5$gDI0n7XKf1c#@a9#Eh^nShY(g5{9a?ZZy2*D#%XWvRR=3HMR^UJE!7`I2J!Mt zt91!uFm)8+K9KkDXJ-iSkM{&~Pw!k05khx|$MW?EOk`14{^+%G8y;ch@AlK`b0>dv zT$r|(&z-7RyFfc5a4?1|L{~j7TqTBdAtM|bdGw{uK^K;coexdVUpk1I`TaQROP+{Q z5Qp#LbTwJ{w?-BG3M7nRW%qF<6Wj!BbFanH_*-Tk6%yf2+6LF^$>1 z%T05pe$jmpfiuPQl6`txQ{^ZNmEKMr!$z)tuqyX`kyUbSkK3q>syhRQ5nxKsENx5 ztR>v+7^GrY5FprBaE6gwz4DN95P@fDf_l9HNoYjLWjbbAQh0Bik#9r z#-=={_&SUU4)~i1yq!jD2|kq0^sY!*3HZF%d_b#CZ7qK z$1Ci|h;V=4!I`^nujM`<f8D2iy8?L%<2j7eB1wk>Q&jGzLsaA3Z zut~>IewT--3Nqn5gtrr*fU$PcIou^wGmN1Cq-JEyg_6Qif7@lcTrfpAfC8GUFk(Hs z>6T22*^46CfiJiXj<6mZw9=Y6W_#0cAaO#9(3TLcT6Bj04lW?vrSi- z_9D;PqlwzGSfP$cYzT@T{x<1Z?TCKwnBPg&txTWP(QXXyC{_$|;EEY0ykwW zK%UV1%*L65?ehg2TORQQ6tEY-2!S6eFu2!i=e$v7Z$4|b^fj(lk* zC;iq^^_mIuvQe=rwDl}SzmdA>v9}r3!~U!6(_Xw~m`5GqcEGlZ>tgA?N|$3+jptKN z-|udFhot{ACKC-fh=`s)mlW(+SA6PObL}>??z>XuMyyd^TI++cR3)_;xuw3qw0Fpu z*Hi7-9hQyVxX_!c`yo@j4<$oVVzboe0U8ly4>X`m<$VNPUU)Ai#rtG34g~_z<=J`Q zx?n~t${DyX73T(L=Pm2i^%L~GGU|QN_bJ{bvC4w&@{zRpf*o2++CagRKBsg$6g}z@ zsAw~9R_;hzY*LpaA zeRIAeQ~Im7*XF>`^2#9#MV|bDltPi}ixI&%Y`9Vflt8Ac3Vf7+VLCmr0}dsl7Lw(8 z;S#_iJ8z{NZu$dWcbv^mLP&8iQ4B)=9ev)r(V}BfrkKM|ml@;0U30jc6VVuZz-#~+ z)I5}?^55CKAx&wysxW#+0Wc|f4B5qJ^#v2#$G895wANl4bkrk^d0!$EX>=pAYV<+hoA36#TzjupNKT4r0?Avzl#%9;aravIMDYOk|`DpKg-AXwk`KD3Xs2fI? z4l@9*RHDpi6xX4Prav>1*W<6rDvFaORDl2aNYK={B`r$ z8HLo2XO;;i_U^=(n!{$l1EhJRX?W+JX9byd`34KxM?P;K>5lBR{(1;uEu3bBsRPV} zd9m{tNC(hhg=BPZ78neE-8WV2db3ng(XbC}7p|C}NUE+`vevTE)bQ4=P|p)4k@d!J zLZ=7QJhw)PoIAo1)9@Qi@ZsXN2bg*vBkk4|@gPZAX3?ZSC5Ib-ZQB5M{Uuf84Q49JF z8i&d4wpgFBXnj=Su0Nzk%~GbAU59VlP!(>QC4D@vV`Y6580t+xrS+TiTz1u~?De|S z#+&{tC7PNptNNfaPH~+t19l#Q9nJt$Y-Wp-;&b$qq|*`Bw*wwa%%+lHtj-d)GG$jGKWKKfpHTLN@N9h8ySKgk z0`rNzN95o93L19B97o(+T;cS7`hG~`yZiB|-KVjbT$fo378@J{SXKjwP&@A!Z613u zvdVkCb@xlz3G#i>3ggmlHDOIYUwpwiFmmLVdedl z+M%mN)~_hMUYC4puIqhqst15Qw>N7R@7}Vk>TRQ+VHc-%HmPq= zuxmmv+_NBYdRig>Y_3j&4yCnX7z;8i8neZ_V4WQ>Cy$o28R@o=P4oHLyn^Esn1P9c zn|o3Jjz0SAG*3uZ=!tSazu0mxCSb9%lm^2@KYc{i|-PN_JyHaI*Vik-sLD8FWAw@B;*e#T#<*;K&Ohr>WteFp zV87!Ti*HwBlFZC^$7OGso>x8Df=>teNwu4K1$VKFfW4;iQ{V=F^=mnR9w_ltq8H+o z0C9G)p7*$H$3~Y#m?ng&ygWZ`K2L-&8=!KU%T0OxrV4YL*@3|5c*-Hf-%`Ye^oAf^B;Ysfu zFIBFw9STlI8r8bs@Ar29CY^^U6 zqaWpGr<$mCj5cF=r}I=>qzG?ptU@#i4(jFMCRoGPL3Fz{%3rvJGsur+HgR`LztQp! zZ*UPPoAWL64l6o#76PDKH+GY`Yn%$!d9KZuXQN&lJ7UV1%j5KN)mswh7_Q}iYLEXy z_cL6Url;FzbkbIuTT+K=LWUumxkAH~jVFY~tSkH)%qK+Nc81f6nFWgaM!r4=in2@l zh5Sr0GWAf*YSjQ9+2k(fDVKhq8NU#i2&mM@0!LNJ?xPAHza8WBtN&DmPI#r=^+3F~ z(t$$>D4zBn+BIg$n_uA%z1}AsdFT-q&j!$F5gWo&A_5_-8Pol+2oJIOC|WFqPxK7a zLTG9&0Ae`I3Y<$KccNQ=jStnI7VM|)Hdxr>Gl(*XKY1rIIxJgVMM=Sf-Q~h3{tW60 zO5IaEw|VraSjuUtjw|C}FvpJW4Kl8d-SiesIeACSO0FA(eMw7|0X0OoUPzYWJ;HvL zu(UNHhJ5qe_nLH0HV=UhC>AH-=v)@!jG<76ogE5dyvWjz_y<5#MIU>SH5xP!D@65E zg!Y)@$4dPu(&1BrS9aYGT)LCK-uD_&R0Qg|=kV{HoBS#ZU-ID(_YxZ2BdO(z^-={L zZOSyLyu$FFRiP>IS}n$mTf@ho@&KH3N3EpWDA;lE4t0t(fueiKkD@ljHQqtf!{{5h zbg0AnDJ5$x|JafJbKejP#C7Ig)msXkY%t6?pC0Rkqmtbl%;-mg{7%>82WXO=r_b5m zb)MOJLY4Rz`s^vQKgSR_r{4g?8FPAcfl)$rSb)*{eZ1y&-DMpJ3kY+NqK>8AIG6R5 z%#-dx(mgj~Vj)53;<5N6rH}u<@`1eg<4&j30lnF4%NGwmum#GHMqc;&TH)^u?i3)RmkqM)=%a586Azegfw3LmFdD* z*@Q+vSq7kAqkh47FJ+WGnzsCKDxg?pSnBnJ_1Aa)0O)MnP-nQ1zNp$k0p7i z+J;Boh;ZYW!fOuuC)3ZE@89-lG10a2O35K+gUPe>ql^0_#o5f;<@?DP4od=zV&@23VsXYs9x~cX&ni0) zwPVsf!toKB-_4Q6%@DxU55LQ>H=N0&q{F>Y@lELR2Mix1_F zs0*?a&l}7o8FUVh6kx1?Q^Ww38&$4C1`2a+yd{Uls56EKNSN=b)FQWOD2Abe$ax|p zJ9~kHGEe}B;_m>O2UpA<0e&%g_VpVBml8xLfT#f!zgWg1Y$)^Yd|Q$S=bDI->|hM8 zREHd~)P+e0C*g`WKgQ_uGt5>=@w!wM&K1@43jVh!E)AfZ>F^II zo*-;syPp_NMRZQS$u}2BH|a)s(Rl-T9d`HRt21boKP-_B8-rHq#q@WvhMBy=I=zbb zwh6q~`P^w$=r)H3%$f+dWn&4U+v~~9Am~Kd*xQnfkrJGTIL{;MZ=(Zw{)@z5_{#Q4 zqjp|yog0i;R*4`LDE;&>1MpvV%B0b~+|`Rc&C30hM-1j0hE~KGP>XdDXt$!u9R9s~ zeN+EkX7_x-BD_YCmigM`RV$8M0+K``F_~5F?-)B#6g5zv+sli@Ial`UrPU+3qc+&O zbJV(2E$K`%+U+L3|N1QfUKcU8AXF*#mH$@Rocuu9j(BphH0#k~0O1V-#z_Kph?IQs zn|`D*s9oGXSaK-oOx2v)fxMBAApFWO>VFPnr*{|sm%y=KFg$VL#Zs-*v-!HrFxvOP z^&s%~@%k2%hI}VKT!=WHk6)dn$*-a0w|LZT^2@k?duR!&d*Qx&YLT?^?O6A{6#=gQ z@t_+XPw~VAu%0JluI_ch3)qduA_@NZ?6fdOUAQ_` zX0Xo4UwGgdAr!Y5KLYrf2fNDCLY2TUCwkCr%)Zw7K~Bp6zo#%5dwJ_N#A$jRgx5)aP#p6{<#a> zw7C{#JV@MtoA7e2%y;T063o1aF@j^OEq`3tyken)m@2liHDuVhl4C~W=LUH4t%E)5 z`Yo?^iLq{BBOVVg9Z6Zc|HL8l%UXWO#L^E(l%ox85f+TU7vx^R8!H zwcjq;E#Q;q&K68SshR{C$h>cAF{?%wP!_8Qy^>$`M#;Upn9bHa$JEL8+i(1gieR}< zl9GNJ;vzRo&=BS}JDyf72rR@KPMhxiGo=-EZBa7Jd=Q zNq)Oxhfu4V+!pm_Y&p6{^@vtf@1uARnA}hQ`dGfGIQ?nKqxy`XYyl%9J?|>(j3zw& zdejGNu6`H#WQOGI#?s~}i*6WazpzS;K^7Rja^I~fHA`67@Vap03u5%1_G{bV6Rd`i zHJ?wfEPv`3zx@3x^cbcDr|n6%lNvuaa-1FHb>}YYK&8sudtZmo?gq&XnYTKHAm76Bt;VM3$|D^}_5O}JKo+^@n;<8f2?5^t$qshUYb zc=3(~;E>Yv2m3n0pJtMIJPYhRL;B|Era=vh>yMiWZ43i2Umrx@TeoQgg)<_k->No= zWg5gw`#d)r&Gtrhuqby<#K>32ekGcYM<1z;O1+}O`kXW$m6{iCJ_dz!iKutP8?Wr; zGnHtH=?Jc)P_G!UvaHm59<+d7lLAf7Jl)lp3stLPpYvaa{oUi|J#KM97*jgONDRQkUlyj&wNU4 z4#*ZPa0o6_z=aT8g~qN}<55ZToYtI@>J%)}!IK=-v{8tT(lB{^BY)7`k8d7ft@Bc|aRLss zg(I@!79;Ex0_Wm49+lRf@kr0&^@}rKpW2$c_^&aK89{8guS*Bz0Cz!?ZSUvH_46tg zV3k|2*~frXI5$(}(DwS8xR(|#5oJTm<2bHc)vVDltd?=m_%IcgF&4JG6<5;~b#CkQ zD2n_;)>8^*q`bUZpL=o$Pg1u+_+Jy|Dcm3bNvhd3kEQVmqkPWps-RO~`YEY_SiP$6 z^si5asiq@y^RH_9r_yw|oJzI1@f)6Lu3b-;u|m4$1HhPu^Mm-T=bAYws8T(*-Aola0)l8HWu8O>OeS6FU=%w{tDtt~Ck@Fk_k+N#mKZ z8_lx$domJ@YNfV$0=s6=xLBo6DNFeSe7ib)wtmH_gMYDWi&3wAF)X--K5bRo##@ zlH~D%SP2k+O;}70kNj(3bO3+VKl$fYY8)3Nw?x~GAa2tHt~UWnc1TS#H{}hWr!Bba z_%68@BDJYVDs4?^jRhX*O38K6o;-CBZNTHxQu$7}57bQ1Vji@Z0?jkkOm8sKj<3g4 zlSWmzco;Wgg^7Tc8$o-h@Y3);U|_~32uAo3P8gOniiU>hszvi9i)KQ%rW|%ZD%Ev} zX!ToiyJZ1+>s+&r_C1jCo$Wf3O>xQwu026qAD7c-?Tv>93{hP1 zx9KH!7Fdi{ehT5-#ff~szW`FNYTL_+;q6n)B}RWu+Xy2=65`7l(-MZY+IU2VC--$q zl2Li`KY!@WO#w+bN4>oF$Fvk&h8{bI z@q5dnvj*aS3^i}q;bDxtUk=-*HtA9r>DBj3BK7EBdv)Bh3(}~T#sh3KqMQUjH zD1>j`HuHRV#Rh5#$9dASh5^UB?aSATRloj$;C09JW&ybLfd6)45`O{YDHW$az;z)d zZF|2;69-Mqgw>YPzdK54LbU-P-Rvo)DWsV_e^r_Bahbsg4_Ihw`e=Lv$e|&5p}3~9 z)yg&84FrVusXw;{hJGUFUNW3nF1h3&x$8Nx3^9yf&cfq-=U>X)?Rk~C;TAmwOJLU95ko+o?@IgIn^0}#$s|I70LT|iZ6kG-u5VOvT6M<^|s zexGQhrr0F@I$2{ z;ji^RKto$fHMMuwxs8r2H>p7OaB1Pg+id*HQ+i2zPZwQ;{`XZ%}wt31N~=M$$?1WhZ_qtcC2r7T>R>+`D4Ea+iyppL5_8l*x*tj}_Svp%_5o0agk z9U7A7t^X%YIF))l3kpk+zUUo@|4e>GxA|AG2DDq?J3W^n51)NDFIzT?Ff4}JP4)2A zYL~AXXRXY67U7a7a{_!9MO?&mwBWj@om%)0Q>A(brL=VLO++cJ!zFoNR19GMeawY_ zw)9oiqBTCrGfE31|F*5nOKBNu|NVT1H{Y!)s75Xgd@5t5@hXf`*6UE)5cK#}GN=mSt z7ud}sPQT%=F#>OjN?9XY>x$vAg?%cc=QlR&0Cf2X7`ck0MYt%zn?7vs$GzwA-&O=# z9Lm=#SIrGRMyn|AJI+`8pK32DvH0uj+LaRz*|wJ^_4YF8H!UoJ63by$&9=p_P$ zQ7|gI8>`TrlxlN1zn7(P(aI2IFs)d{u9bOF5SG(2`&pTN|E=sf!OfQY43U(ceA>Li?Y6b+|Vw_jlNMj^83jRy{4Fr{YBNWFAb zhinNq1bL#P{CF9J75?K?ebx40+)SE`RVihC-RO!#3Xg+i_qzkD@9y2xr#4)g$q5~r zrtdb5&H8$Ec7Zp@yn~fNT)2y!sbc#&j#9*;FK0ESlMJcID;eKYVBVXSCjHb@;$ zAr@!rDDb&1*|{#YGM9)y>?sHrQ`r@LgyD?Lt3is%J21Kfuq>82Q#88F#UCFQlvh8p zy`@JI_Aq09xmVC%7{qSA-esT}zhYJeb^Nx>ym;A7{ew4ir~JCSQulo5ERNTku`n!F zbGmQY0~)f&!pSM<=s){uJT-F)^f>iEP1IP4a(CP<=jZQQy|S~)j2-UV611_5mz1+8 zRda4Dv@7J$@vz{&zgM``Og6%9 zM#Hw_kFUCL&jy5qJ1raW1~x$_xT!RBFrBkWi7WJ;E2MCIoe7}ZLN`{w%oVyVzEbap z^2wSR7>{@52dp)9PS>_t zl|-E4%RbZlhI@~*6H8E`NH=6)iK$<5@vNx($;HUlml~OM2-7t#^F8Ku+r8p)vJuXM z$&+XRJdMd?svy}!Pmy(qsD>>gqv;x!Q=$tLkza|z^E z*yBU~NktX*s|ObG|IRBk$ip7z=DAr+`WF7<6xrsD>Rr&cFxx8!3mAS&>+)#fSFOri zK8wzk9%I+uv#H+wqu$23Q$Bgv`QV2_Vp~Bl0BLxtjoQQ+-2`gRWYi=X&*Mq*qv%Tra^y2`|!*5lee0?vB?$ zdQ?cr9p_c4vIl+LGd#ShHe9CPXUazMhm7BlwznEjQPK%lR8k6eb~xW}_r#qWvHSil zQEbuym>JtwEXg;CQ23&c28PacGv4oekG}co>d&cViy~%ADWcs=;_?4Pehb4(2M+6V z*z^=3&iU={f0KVX5+>^{3TEvfaBKDigrhIi-Ycz+4S#=l>vqk6fSC2Uo>-8XVp({q zz9^-+E?vPdNZ+CW&tI*F8!E#;S8I;%!G?hZzPb8LfkkRBhXAqfSrvs*IBxWq(mdiN z+tZnihO0+L1t^k3$5mLv6+tqzKuv8DjmWG$fX zsU)WjSnEWytx90!b{?1N_K>1ajm^fVt=&_A-@#nd*E}hA@ZmQWIpoZ}A&GqW+_f{O zlDi4yPUIu{rWNleeJp;<+@ZAD^w?{Kosq15g$pc~ujl04F^Q6F8RcV)R*NT8Tf@S= z1c_VEpCjJcJ6RvCJXo>p(AfpMHCLWB!}1S@Y*KmKNG55@xFY`N4lvL7L`q#jQ%WnR zEDR{0G!SCutm&)Mc3v>dryg_*w({s@BM~jN^C3?9TZ&y>jwWhfyASu1e%#mm(jpm2 z9pXer0B?zmS6nsTO7N>vvh!Rr=AlHarBWrM7PBQJ0bq=zZ|WVLEcU?d)ID1q z^K7r5o!bO4E#iSe{j5K=nTJ>uA|XWb{JmDpd6HAuWF_y8U6t6}w#@;LkQ(L?XgBNt z(9+V&+g1w=^wY~Er>jq#$Cla-Xh3v7f$4$%?R61dxn zU3Zi_pu|}9f(Gn5;)-dAw6R&L=cmkV5>1fL%y-ihJPKwV)yP1o%f_RwP^5!G3uhrL zfmefx(3!4VAsp(fv^PC@Z%N3@<2J@_!4*NF@fj+s=D`vHuZ}*V#Bkjx(j%TKM3xHU zsWYlxFAxJ)93dQG_AGCvo^g9RpJlX;xJh>*@^6*7^ua}ll-%krDBngN)bHVUgs_f@ z(isQJNNEFvE%HkH9$>#cj8s&`Wi~RqK7Iyp4h<6qi9CcIRA+R3r~uIAh||79NSpZy>YAhw@OhgJ zxU2Yl=~Vw`c9#u9tQ+WLZ=h#V(HqQrKqIYe*HQM$vDR})s!qi=aspNV-{hsjS%0UL zD`+dg0AJ6`wUxV)!c@5?U9DGKP|HYnEy0xVpWW`kJ0EP@r#-G5xMSOIu-f{kkUdu} zD?6b@*~DGIZ|W#J`t11P1(MXtulgN%vsj4)T?N<4c98oN+Uoq?LgPt+!EGfrNMvO) zrYi7i-kG@k-Cov#Qw5&Q3$BC}I1MEi>bA`UYlV5QO$u=Q=%S2VzBiJpBaxFo54iBz z^^`LT87;(>Y#qS{$w^0!LI6I+J9A?Nz?XD32)(WcxmSgle@TDA)Oj0_CA}ES20%}k z1hERzp#vs>*3^43p1*Nx{++v0TojDtc>6jr?;PHwtY#$IAz2=h!;*MG>+NQQgCD2p zWWA+9@4Y;`()YEy3?h34rv!GHk|KLY{=GG)G)K_k8+R2&+J31kdog~zyd>Qi;@q3S zTZ#}ks2yu#8iGC_LWp}-I;M)!m1b(kwysHL`U@2oGkrJU$T@5>#3 z+2fVLU(m~vONksNf32S?Z_Pjw#oXy+35tq!@Zr!rSt)T5-=)HME1IIZfDP;%@w%Ao zDj)k{`J%fsY(>v1L^_12)}_GDfTr>R*jW`VsWN2aA7u$1{^|y!`aWNLyrRni3L|>9 z@X4~vx;cu3u{tk~d81h{>?}obJD>F&x5N1GF2C+w(M4F-{0}{3f_T7X?>(D%{Yr+$ zPxh8DA&_%Yej@Y!hibl#S~^f|##A1|jKy+)=#x0TP}a!`IjDoW}W=ykc~;W@7}AXkZSQzAaYEDYZygpUudqd;OW zoC37=x5SA{frBp{E-m<7k{5&>tTTcp7$cCRbb?tVElUld%lP!AAl{jCyntiq zJ@I)`mykI@6Y)(>Nm7=U5p3+}O%wEouuS+UPi!5TVtrEhF+U!6DybQ}A`a_k*{?Un z&%p^Y*1R5ZyfH)~_^fVwNjmvKte<9sErRel_i)p9abMH%MH>8f>zWnZfRu`B6)0#L90enWc5_ELVzZa#sa!J!?c=h;?j?=H~%+#+LD zJQGIOSJ0cJ_-?jIljM_SD`F}QB?L|9wPU8x-+zTCE>(_`>bl`Nqk|mQHnm|apIORh zdP@!Db_t?=?5XZl@~GDTX;SBmKxP`q%mbNeYEmg%KSXVenAnuAEc&jZ0DS$g zfc`wJpP@qE{(^+qb}nobkBwffMWw*_9ypvTp|>v7(P;pp!igSwk!V+kr%35S4#7d} zVXAnNu{~5T({YT8=za%nc^rT0GcsO#v4vapi>u=YAngEDB}~7?*N?MDwb)iwh4~Sc z$V&h*1!P1{5T1w)Df-M!6NX9BpxHR=UeRagIse$o#l$?<-7$IXd zhdlq_mDqnd#u7i?b*)Laj#uMODTAymaVNfF>%U?<)1xj$^v|EOp=U@Qhop@TJkU8t z=$4!cHkRi{nnB^c)Iyn*xwJ~0HX&@&;({|+0d@Ecc42lGWmbvrv-Fn>!g8;!ch@3K zwMdZ?k-*jumhA36^Qe*yE+Ed_j_LtwddKSP5*m$-B>Xw+EQDHcje;K=jBXqA?Nkr4aY zOmQXipu}VehnXsgf33X0=Gc?ex}{1d)!~!iUpBU=boTya)mD4ezG&3}Hvn174*a!) z(xk&8e+Cpqo+tKG8+SG&bubp26`>kq23W0p|KZNEiL)@-CxFfvd z4F(-YvCGr}>p;T{??bMj;i(B@=ZTD`S<{&k%6!SI0VK!gn8)4Zn1D*%Pmz9^O^jv|pJ;2&t}95EVE>Pz zGYv~>>)ZI=u$fU2QE>)GG*cWhXE%t3IfSO>RH0dES&^D)S#2govqICdw8FH~Mw+$h zfJ2#;kqt*1(PO1O)zPNMIvt;v_bZ>cuC@2tYu*3*{@t6|ijbD9&7E0~P0iV(l|G|| zBj)D4PQ*%Dam3YS*(-|)9*3NB9KXCfQpv#2t~mVb_+d)PgH@Y17A|?voff$ESdAFo z`C1d8)sv%{qyT1uoN51?Y2Vpmp@8UZxpOXM)B07U-*Uo@i7h#IF=VFYjNY_RYb;_| z9v?+Nd4^eE_ve%WU$9;XHLGaFOiRGrQUo5@tbNqUuq@G1OF$b9VEGyh6d6WuVp|~F zw(b|D3$eYGE&W4FS^^_}9Uw`CPN)kDtNMP_tSosdV6NMIsB}qqUKD^Kug|_6PBXv2E)-WxCR|wz+0+U2kF)4 z5c$$xd?)SFXSDEZw6LpY!-tpvlird|nI%$_^|EtU-$pX?-r^K57@a`ZMh@YL z-l9Nj9>5`8f{px{X1>=fYBYqL+c&r>%yyV)k%EKa#53>RYs?c@&N343{n%3Y4!6+D zvbbe#5qQp}#pZ^IB~}R$=aD_Z|L^|m7FO3Gtg{HI#!-~{o0=IaA6>$Wf_V;eq4i5Q zEC3Q6*8R@Y;}nuM5lF*}Enh$008V;=HM|%^9rLZPsX?CJBHCDvT`h`PC9cq>m~R&> z*kgOQ&(6NQ^P-e~`0WTyZ$9v8`VF@GTXW$0Zz91T0|JqSRmlSU@bWq%>Ec^n8Q(iFCtqk#jdA9j{j!fT734w zKhzcp#^PTzilM9*v=FxBsoI{QWY-`&CRP@sN;@K0H|MX+ng1S_cWgl?LY^_au?W8g^2!pMWq{rV zqxc%cgg-h9{WK71uA6?}az6wsLwV$C(pJKW8(|a*HeUyCA3t6HU8wz6*fyZRBEs&J zHBg;~mAVb;X5hGp^I{Kjjbzs3Ad1Oju-I0|@)-a8nFl2i@lO`xXx~w^d6O|j8h4aj zoNqZ)jaoS61xh0E#&%~#dN?QLw4AiGkGqO&)w|+$-lMvxQ)Z`E+^O!9zJIv_jS2Tb4zM=9-VZq-s_rhm?xHeYp1Ycci;(MzTHte zgr5AY0RQUSNz3elr^j4~!{_2<{`shJ&-x4T&$!uC2ye!W+x}!_-M*dn@Z&F@k7%Cs z(^{$gCh_*9qAv8VF^XcH3gqI9 zhl7h1UA5!)6|<%*u1xSB_euB8rPVu6vblHhCIe9l-6)4=N^y*S*}mNgGPk?FNlBkR z$W@RnxFWYm#eVB$*SFNFgj#Va4n=Sr7p*k+tsuJn zf_EurpCt@yF_h54YzrFf`83sxqwed?ECi<`cXB^76@F4d?d`d0$Td!=#TUiST|fu< zE4J(PD4bX02`l5kalPDrS;0QE@r5%#LDCu&1wo%U_6%~YQs~q3e32;-?_bl>JZlzP zs+LisS?A@rXwmuRHm5l2?zZbe=QHU8yExbl$3BQEjB^#WJUXH<<(4sJa3n6*nM!1K z&J69OGBtO=ZQn)~a5=>3|0;c^J#F?xnBSGu&Qa5g?q6Vjx+s+V9RK=I+QA#9#$7AV zS=<%!y$upDiQjo<&em_Es`=;fDL0dT3d~K?@{gqu`P<~l>C5U@rq;&)p6WGOH_z&= zN#Vc3KTS4$xLoNk7o6v@uD3DL{QG~bGL9)tth8I!*&!tb zb?U8-I`rx_K)4wUeQx{o8}g9X1gs`D@D;KGfoll zR=H%Zhhqoz=p73L$myjc6FNCPOLQJ*sYsd z^5xT%^^SNW-x@u4o#)ofv2s>w`s8@XupC)ntwOu zKZaU?Ly!y6Ie6fY1fKWtKo>ma6w!1I4ZZ#6DbHoh*GV$vzQAX1g^Nnl%s!9ZaOTJP zW$AlZg+6FkMK7RW?g*MI?PgDKa9IpAp z85da;>ObN$1wVT@4?7o39PY1M{1LUu+ShW?;ISyL^|d>rtGaD$fAyVcpNLAD1Wncm zoZcK0Vz~6fl;;zzW7=9Tu3}hQ|Nh1`*c0bz_?_HBvSyEx3~h0ac~j_+2YRfl4Tgcu zCi2-Wn1)MWvqmEieoDsAJ9VH*(0&omR@Fm>+cptK^Jqs&4CFZEtej8l#s)c3_LMfd zO464ypqBi#d4?3usDvT!pCgy9T7CZn$Z|4}VQOcJgD9cYlk)y$C z|BbV+aS_gGZab8ZB6n!~4oBM7gw;0oV%%8{KL9(~VSe!AZ4%d$7abL8mNd`8+MS^i z7VWU;=!^K(orlCgp(Cer-=O<#`P5D{0rmxJkfJ8V7BGWmJ!^AP#B3 z)ZH~3ymnB`v-8^!dtHNhzD8`4#XzmQ3Cwf&{Y$(OTq!XK#KI&ff z4Mnj-3Ia_EBa4=Sr2eDwkmL@MDWpj#l!g$Mx1~VFYZginjhAsA(#pU*{`+cn&4zL@%WK7c) zLX=sZsxt936&G3BZFvyD#yC5g95?7@u9GO{HiI9yF1~850DXK2z&RKjrCI4O{?34* zGMHz)N+%)t8!#3^Eh!6jx_b#Y7kd$gC{HCzwj+vU|}V0!;EN+m}7(ELgMNZL!YBg|Si)9e;vipD@g8+eu%w)r{! zSis$U;kvFZL&48hnhs=F6@FS%d31i>fg0a2P_*s8s7qwa*|{*QbFuZ0HffM&4tdAt zBIYWo>nKPq7iHRiVG|d>RAKxnk&Zu9IL=NE^?0exO5z~q&l=TUec7&oNWeNaRtS(q zFytu!O%=vF-FW`PvnkFT!&Lln~PTlr~562N}v$R=nr94oH=vR@QZ}Fm5~e zJ1;a`h_uVt(Dvjr@1M_~YD%V7zOkCWJEA)2ZA)?z%j#ve$F{)U14^kPqQ$rhpUIT0 zA#!Ogcqay@#EC|H9xj&sa2pDyDPw& z^PHjGa;sBcnVZ~VD0Al1$6Pn#>Uc5!RM%otqJQ5;ukJ;&)o5>l1pS_dyI7VF`W$>8>YZn4{5~{#rDYBSk5Eu6*u;iPK26$nC<_tIBqUrAFKrR98mqJkReQtZiNP0(D|GEp1Y+o$2V4~ zu%8l1zO;00Qi3yM#Z2Y8(+K&2V-?4F(M8Jo{f$H|kl_qc<9j!I*XEW{#3?efg}?^V z%2JbgA#Re)^dFA7GVj?RD_z>nF(=9dg{z{+LIoE@)UW6*&Qg=r$)4YrE(h>+deBLY z$yZ^U)INra=~W^qN`wxh;P$}b0m|?qv*lklS$UZ)FL;~c%SdTW1>g5WtrWs0Dw-iI z+JK4M#zGC4q7u{gI5AL6nvJ8{*N?$<{Kk1XZM%&hJJXK4{)iu>!Q~)+=ccXWqOfzv zxz`!91X&O0pBowIydq7xW7yTyw#Di(x^uuOvuMRPu!sIyO* z3`vyS60>|tu;iIIBOviwtM@Oi#9bg+B-wd$SaQ9+e$Yu8Foo{n*DMs)R0yGHr6J!+ z(oEIZTqG{@yBTA|Ge&vodb%}|V;uu3ZD>OZ+BkX5wE?PqFQxZL957c>;<)(@V z?RxSFM)lv%%o6rBw+CjPY@}T%%1m{IBH_eHq514>@5}u=VBNbt4`ND<1vihWSA{C)&q4Q zK+plk`#^sIz7B}E{4S2Ef+Q+i1sm(c#w)u57K-4zEjUYmtf3&@vJHAO9ed_#>~-h3 z{L40a(4Li?b8eJ_2)5kN*jrT9e$Ot++rMMY{&lC}CJT^siA_3-G`R+Cp3*cC@XUm9 zY6^U8`+jT^MJ$;w#?9N1csuF>I&p)_am6*qq+0LUwCR<q;ZpFCi~HUS#6>2 zMbB5FBlY$FDZ|H#8Z?X}_qgOgbc8b^QU<$mJ3!baM9=ckF-XkXp@=&)RLKS3AfV{5 z7W5I~01aU9e7W(^5`bVJ(R&Hdujiwd!kB^1lfi1t!_hg8GCTGf|B1-$ewsZ$JD2#g z(S8QxndU%rt8cX=JZ1{BoPjMxT60BMm6~*2mt#7Wv+@o&E^~NxyVkZV;hz(fO9avo zmvp79sWEW#+YfaQ5o(8qn#yK+w;1`h1RrCGKTubSbj_E|FkOAsnOG;^B&YljA^Z_T zXI&DkM~}PvBm|wPOljS?1szA3#pEFPOf_b3hBT$|G;mP|(@0Tl%vLs0u7vLJP;(+X zf)-otJsI9=TI9e01Jx=joHp)uaydJudJKKVE!AI{8mt4^zmBf$tVmhF+KxHRn5t*6 z_uaBi3!TFH!D#pW;qx@E=d_qo1XqsWs)YEt7T8W5>Ad4Qe`ktMMTy_LgQwNRwXtny z8FLpLG(9BDZB#bBTXS_H_*ilV=7=&^3Y4069&;EvmKd<*QW5R+3{=2?60;LiABC{t zdUw6#M!VHPrcb!a3fF|j{o`8r3ti!I!gD1$OotAbf~XLLz3N@0cMcQ&o{np$WGQ?jOn3;t9<-+;PDHuN?PJ)2tN}Qz&K3oS$rf`-*e7PQ#HHBda(4`0~cnW<95m8KEfiYROJwhhg{tANx3PwzrIk zz?lAyV_`m>iNbRs`74*TZTRP4$g{}}Yz^8Qmdr6Y4*70_Me2j+yPCGDBSh+i{N%r? z(8JC)(elGds?)30n1dRhCM98&5}PN)t`d@NiJ-+IxKahNoY7}TtxHiUOOK&vui)c@ zrRWkpK-Q^_RD%cupQD3HDfoe=q$8}=5sos5O*-N& zC1#@^v6X^d)nx^XE*=?;XsC_xTxkWa0mZ%D=}+TPcoF43>_f@f%rNCpRG0JUX4GfZPy1a9?)*xN*vZ- zwH?b%_ZK_=&K)SoywXRj*&lKqUv_Z@<*G)9>v!Jje9(No=ip9sxUhHLRDEPY+C)A& zScLX9$E^Je%v4`HBfQtFBWC507Qk3Bo7iT+VF+Vj_WT{C`-tyi!cPbt^r$(nfD)@s zTVY}X12{Ln8`7`oyL#8)vOvuOfQb@Fk^@E>pmQ!rK;(1gg2#G+#sI(+etGDF-=ScT zJOGeC0A*8{pfitunu9ckeU;8A7oKytMzyL*Lm;=EH2`?kgme*<0k@}wJ>AcKdPqZU z7f}<&>*i?B?$>Cgg68cZ80P6_u_XrZ(q|QSFTYlrbFH*`&V!%6l8s9x`$?n4-CgB3 zyViHBLn&MSDo;BWig7nb(8}7*lsP7a}#vyA}*mM=nbum#17x)WA^BzZBe}|$O zmEU}k^7J6u0e);c)xprvx55Al@V~Lmy6w}FWeBcBHAF>Tkkv2x9sopL2VVHHsBAf! z4N?JM79&`QMurXzyf`t}sFtzj=_|hz>p+?i|3ZVKsfFLOuUMC1PIetk2gsT!Qm?Mz z5QY3%N#Gk^q;(Uiy9iSV;Y-+N!VGy3d6UYROaYRQvGKM;Z=`OeUtBR8b(24y-T&!v zDCbu-+=YH(+f{bx&ya%AR|TURxP#=_!Hc^OJ6`gQSKb(yw!Av!vfPoZp%BwGm>ePT zF*_{GOLex^Z(GB*$A4bu$yr`{%y!D-^qGz_w$UD>sNgfuYJrin*-C(5m_Vh4m(P9h z@{l#aTDLIkwO>&Xh!HLSI141Ne{2j8hS=zmvfaiaaGC>R0Q_?y9-|S)7R?^C?wHzH zgxT_%n67L{R}*>xlLLZ@RMEuZLxicRdmh@S$CcEXS7%d!xX4EQPVHD$;S77@+ZAx~Ze>=9&ghay>6uO*3%!S{0kxet5%50e$v;ic%STa!1w2wb|3T-FT(1V4cw0zz3<}<{c?p0bmzRvKPVT}y9(US8h?W~q^Xy=t(U}) z6?~fyiV*ApZ6?a+ut}o?yHlhXUse)iZK9wWEQI>_P zej?gkwxi!Z*vr@;L5!(w3rrltO4?sPx`s8j4Jy|+{_9k-%QkS?y|a(5 zU;X?#p#0CyjVrK5<3Wvj_xjZX?MbV#Vh`UOsKH_LZ#Y;+T5{Cci?Uk@qIg^jhct37 z({Z#$;){^C=wcFGp#!%o0c#-w;Ow4MfjH3?q3c0v z=iTFk%}E`IkxjeU+KZ@GN!mQr2#V-QX>H=CjUy6cTm4BX<-!MK4)u~F6LK;*BJr8! zk9zlKzudUG%4w{cEv$#iF5wZ2&=+!?fNQ3kLDs2aYthcNZS|ld?CXPxeG12FpDKB$7J#LzWYXGUms{R%^C98iF$70_@{}Bjn#MXr- z15i^^F$3r(DGT+IForm%g7{h)tpLzUmG}rD~Im`%91V*V|%3oN(K-2ad z)quonDy*ehhzdcUDjfHK&R!Us51F9C`}muAK`6qaOVhp8j#r8=RC5G((F0X2(0!tL zJHMxX*(^3YuBz|2GOpU~cq!_E|7hN0zlSf&B}V(RjU~GdY!x~^4E-xwd8pB9DL4gr?0!>I`WunMA?X6Yf zqj}xC7eH0r6=7||D-I?xOvP163paA3!2Ro()%?$t@dx=8dF*aHPH@)^r(Fq0Hd`wu z!Ft<0?x+Y6Jm84;P^cuEBPlD$7Jhi<0%K?4%{SYY;D)4r!_}^b z3Wl@YQZ)3x>>^vTjuO?BR^DR$BWE0`fJK)YT~JY7j)W4r3evEyk8_&}SGgFxDcmg?^Oe zrG~i$6g;2jjP_7U7%DTo$IA_5Yb`$d*>{Nl1f(8Q^*W_dfOXk(VAE?cS6w&n?N3R_ z42gl!L^gop!&y`%h}>_erzUC8Hgqj^U%xl8{X~jkN`?-fI@_UyAv?Ml7cIcLPM*cv zQ(EbN4YB>H=SuJREwfE%A?=9)eUk*;wpM0g?zyf+k5jLke+Qn}zYV);N3)sj{r#*r z(5Ld+42OT#u+j1f4aR4}V6OTCVRkv1wCW@A&cZ0l6M!zsYRh}Eoez6~Y1-0MU%@?!LK7TI9;V;z z1m~2(=+t4u7+ffqu2E&9?b?Yo?{zg{{s!GFE+@QXL0|4WquQ7Q-;WA#%&do7L?Sa= z{ubQc+o16_MWn;ve}}9)8OME89P%29)Jk8xZEU0XMDcT37`_|pppEv1&&w%1AIMKA zF**#)Ur1}Ptit!;y+mUC69#3=ta=cCje&J5XPQ>%xl~6c!HsV4G$FnA(M-Gt4P_(k z#3%L3=Yo(TlZ@Al#Yr9z&yRE5Mwkiy2g}ny-@iwR6l4 z!Unry3wB<8HfbvB5c*_N)YCI(_ghmqrt1ZEu4~{HC*>LJdQqs{_uG|Zw*7`}q{KOK zTG}WvAP0 zbbm_A&DV{$pBFZ-h{&(gZXtm|&!*8=?#4Hf8LsbwIoMz!Uy(gwdQfc|&=%}w{|%7+tCI~NR1tn^;g*CU}{Qk@9acb{)i zV&?sJr1wxNhSqK*#oK0&Z42o&bW>y7QZp>>#jz%bJtNMBnbW~akKL6ytpO`;E@fkG zBVofKR?fYx_XJjfGm`l;oiEW{+{b5MXUg=XTMHXyG0LvLE_O}4xYWeGHPLj*r{3R6 z30h8dSs~!$W3e+^#ui9?)tGDsHb+3sVN2unn0OWHYE$rHKz{xMifrKbDF6#PNYT`q z>ZJ}skfaj3>LtOYGJd1jO(DzAgZ?)lUEE)h)CtlyBKUUYK2dXiD_ro!0{8N`kr-Yd zd%78}f`$3tl7+L3!*${WuiMAkgtK|6{_o8xiy>CZx)`AFO*XGmELIHN6 z0=rU)%`fd5&AdPBU4QBCFJ0SnBk5^9{`LMeVWfdsXorV<(op__;EEbtrT}fAz0GUK za06wxY9SxJ)Ne{^xQbZ7JcBcb#K%wPHCU*?&JHmd2Gb6Je31bMDGAeI1@$N#<Znt@eAB5LPI&s$ zNf-Ld&}(z&EWY-8&nR z1}$ip<+Wqi_UqOT3-a5sULU)g2HgKX=zQnpf^9Mr|KE4g4&3*51{uR>ctYaWDNh}q zm8GUheerYW4$GG7WY7M?_F+R|?0`@J9BnX)mj=qjOTB8;sxq10vw_H&6e(`J1P3n%H0=f1eQ?bITj)Z7Rdl)P9ZB?*A|n`fl5#l!NQnPlhtPVtvxgQ}!% zq3*bOLI3%HjwqGosj($#xUH(nlz%HXrQwQ^T_*Fwrv}`u(&A!PME*_e7gH4U&NzOd z*jaCYe@c8iF`2`%_zBs(VJJcHOxStu7E=zhAPxZe08k_!0;*;FSq3JFjuE+F)-+b+ zt13*2u$+me(M$D-x(e^R=$(u{%!QD9=i$4iN6o(m*sjrFe}`^cp3%Nk1sNZ#ZgN~ zW_jXHM|jv?dFF*nMf z#Q?#8(Yq`$R`#2~EE!iJ`)4Dz=w>>nA6w8lGV9J-vPM^tb3uXUTdcf$D*t(lIr=db zRh?A+@=;k9xzmF@gP5^R$OzapCbnq@Z786Nz}GJjl`~aYN|)E!;lBdn1NC!F@R-bg zS(%@KBOI)>yGPPp_1deouECol=KG_-cYQ#kJpFzAvUd$5WTAu`D9>l>#<|_jwhOS^ z&<$g;4c2cG|CPpcVoMAx>rc4td2fEa!p+Eoer^({&%HHOeW}?)vHkRY#ChrDKVlY3 z=JOo&D(uBYxm?Jo$YemgG-lDKJz*l0t4>TL=P;|l{?>Pex|`(Z`>sjDMY6TZ%B^|N zkIR?G?91(|Pf6d_HTIV{qwjglK*zhHTW_6S&2&h)26%$TGqV$OW$D{Y3T83gtEspm zNg-qPgD!Nj4IS+^#o*AT24M3~Gbzko!iU8+LKJNR>@tU{A7j?(Do9vaC_4ZKWSj}S z^LA_gEdI~g37e*QOZqOytW{NPJdQ0qUQxhWvQ}eLz>sILFrX(UJsg6Zpn0Mj%V#i) zhEXE}fUVMw(hqo0#F@@mRwSo(Tf@rx$f;oQ;L5zgwP0Y$>!?ky-+x-PbJen4yGN`# z#ay;L2_Z+M8vHKH;y0b;R-hZI1&N=eQHqLhPjNrqR>qvokKOlte^bUV1NHNA2jy}{ z^A`X27Z?0pXJS}tF9FJnkB(Pk5(UA_)$&EK>;?f|s>gKv2hY=^ePGCq4#luleD#M8 z@oFA>w^bfMRy!<4pmne;Usuu7hov6Iu4ZG>jbG+>R%lk{Bu(sb#;T$L*=+O_`sz+; z82mA`o!lUn*iMLpRiGXFp#8*{mp%Byi6W!(0Wb;DTvV`-KS!&4sff+rw!!{s-dBj0RE;VE^ah-kx2F?6FqA zvYG*ACSF1U@olcSH#U?u*+O{;qe`>#x2RAQ;Q8IYIKgb{V_tL=1X`qdTe4Ut3|Z)X zXr4-9+>a15s!arc7yy~HK=GfuC#-WvCi*6V2lIB_USZw{a)A60-%03Sa@alcqImcE zoQ{^1+*k#6iv~Bb`}4uWyPW&-hZW%A^M^SasNXk&wa77s?V+YEzRL{ct<*EO%=I5c70{Q4i6K{Hf z?~$gxU$5NwRi)Aq3P$&-6sI8sh^*Pv`_Fkufu!AWEr=un0MX^S`R;twgO4b;M#&1p18;EmPoZIz>^v21M?wEKmNJcG>&&G*JOl$vH~@Hyi3wA1 zekVwPp9ofgSap}(yGy|LUI!JxdT9%(qpArZ5GI&-;6h4qGiD2=bVOsZK$81ZmN_N& zEY;2{Zb$Ayn_1ERf_Y>-h|OV;CWg%zU!2@x-}fzgaplvCRj;iYUr*jNDIG*_`ZN8I z-DkJ@yDJwvE*kJt|TKUOpHHU1|f2(@@{^8NQFTbY$y1K#T-agNpcvBRl zFtH7oUHgk6X+8enaA3|isJf?bQIk&&#U%rcQ-;XdLWL8)t{y&KxND#xc7I3GvE%VY zsV8-09K%cBl875>4Q4tD`axDz%K|^I!Bz)jcCK2W?UtKaQtR7Aj-ZTa!r&`x z4~U_G2DjK!mFkOh@UTg}&~rRS#@MwTe{kR0J<|I|9=XFuqvKuY9w@ksa;>4OGh4R* zh-wO@0fcnpR8AUM~G1W$Dr)7;qQ4y^5=M01bX;$1zmyOEWq zM{bCBOvlr9Z~WXliNCPOA!7H@r>%Py&M}&x#%>zUuC!!7f_f8zW{|y$!U-wKvvSx@ zPBRVy#5iZnTMFKJp-|turBq+5Vk+yo@$R+$W06<~L=$Bcp-#bJjms%V@nYVW9*mJ* zE^yxzw^@v_8vo9L-25$`XH#G$oNLO~X;#*jy|Y4-LzM?m*(K2{osRJ+n1HyC)A)Ih z4LnkaqH*C`=QXxqsD}!UHnH#6{x#?`O^dwmnxU$_O|^DjBHnA7Md{egVqxt!KV4bL zDH*M*6vJZ#+8UzA4Wye~24A3<|L5|1olRWt>e^_Vx0P?UZ7aQ`aC-MrS||SF^Inyj zJalQXEO6St<$~phsl^w={mt(#^)F7uTiKMVL9Elv=pDP(T}Nl*-3#V%(0GRbG@2s6 z^t}h~Kf&f3+mDTQ*D><+y{xiaK@T}NO{m2caRC>o(r(`fnvgdFH!3&nOJ5~i=$gLT z)R}@fw5c>ua1x$@a+mX_(g>03RlQc1@*jP1ZF*Tq^67<&?ztY&*F;-I6|nzmO^Dut zDjtd=Hf%arxc}ScA#t#hh|(h6C+7G14EQVf+TN|1%bnM5|GCY%j=w`$PxhZ_gHDTg zOifqXt$(PM$K8I;;~9sHo;m$G+FTxKmIc>IF)#L=@1riT4VO@rf}>>jLBYjX_f}PZ zr9JZTk(^NrUBEdYQ`vZ{9GI$bD3=+e)m-u{svp0UUPp4cKkvUVA!Qi_gfdngIM$QF zrEJ_!6{cG?W+y#^Q9JljvC)(9^IF=I;aU!w^73`sKz?3Qmg}Ls#FzIDl`n6$ik*rR z7j6tLyO8Y;?-#@J?CSjNmofYQ*Os&+cuhMy`GNJZQpvLnOzlj+o0F&$Z#%}Q+f_Pl zT%FeI{9J4Tc06!-HP$mZ!>9-<&32t1v`;2d?Y-H;#tCM+oervQITnCM25RJ7t{Cfd z!;4zak_Q~mCME$nlnQ!|BiD@pf|s2Yacul1T2hn$KAAgP{kA zxhni-U#XQv{;fE6U2VvUWT7qP1Qi}&b9WjORgrebMjo>F4lQrTI-YEQ02t3Ltk|l5 zj6VC|PWUg=On!8?A}@`-&MLRc#dEYWVrHnY2p=hR<#jiG{3HtwYI$=@sqhwP@Uc4H zoPv!z*E|!D?15gljPb}p(6Jd|%j_x4Dz1Xlj4;qv#$W}TXH1hduJ*oip0sAB)yB)9 z4SP`x+9sD$CPbb50pG?3s!%Mv23_&QsKkvIaZP4TSFtsuwYm=n&hEQBG61%+Q9<`jWj8o`U7spiT!Tf!M{~SNP zB4Csx*G1y%h1j*PQS^BikI&sLfOvUM>c@;e#y3WVU)$c|HbiFsO;gFK0V!f7LNjpl{Bw5fXG3BEWPobb`C&+Z+mr_?jb8z@LT}E z_m|juL{vdEP*gb@jSn@j9ZfxG2{%e8D_N08PYP|LR<1>Z1|y|s`s`x*DD(LZ}BTwQ+Uejh4t=TYB?ME(!mv9?Wf0;Ke_@!244o1RDAE=nOp5I$PlHKT+ zeq36d#JzIusrF8{_?rKGyaB~#_6NQqWX#m0w18`LxgA4oS6Z+6Ca|mxgq-gORi7(8 zHm8*>_lhEofK4X$VUgAbo<=8Vjtt{ zHQQ=WcCh68P;Keen23M#w;ppk`9*W}=Aw3)Uv$gm*)w+RlNX=&FIZFcG=eo20p8Yo zE%}0v3GLS8JngaN4L>^^*m>q&`Q=PQ_qhGcE<=p37k@>tGn>-WW1lLuFldl|&03lP z3$WydqtQ}+&zx(Non8wQKP4%-^CSRhblaeRdy2`K0pxWT7IN%a+L`Xa~1I?rO$QPG$ zEwAa*v0l2DpBFN2&NK3ELvH68kn<<1Z#%ffx}QumZD}eJavQeVDBY`wP;SEBNxL4GH%>sQ1Z76%ejJt?m3Lbu z_XB&eZtB2|LX4@x;5GNStumVXHZE3sG^h--oFCt9s06{5cuV z3Z0#Q?g`Ze2rtJt{;p&59B&Qa!taZX(v+JX8^A=4nm4k0d@XYG0mSPJ)ilg zI$r(co~FjNhVv?0>OYG|z2kA64ZN^R8UrH#eQ*8BL!fdWo(qI0m^Hedx83wi!i|QGM`S&p zzS$b{;4y{wr3E%otP|!x-?3vzxUirr1H!h59C(adk@uV}yGJd9WB_hu6 zt`z44g>ToJ%#ENWABCrj-9+8@I?2mGshhJ+fUw73XyfcDG%gpL`ExA&In=hrvols) zOGY>ACDJNGpm$nkAc&dJ;1LbrpT!j;Oo7BQn&Xb|HY#ogk~Q`(wb{yL0L9801Ebp$ zjd@yJAPn%V=)@J9O#v_sFsji20t66fnC9?nQ&>202>;Q_tWn~ZC-rMIpim_Sr=PnX z#1`*wZYH-u@thKB>*S*8j?p%TLwVmz+h^IdPAX6E#!MrQ@}->V2N~79{2JL=UL-O( z^xrO7s-Ejuapb31Ddmj~-OZ5wlqGb+@glJuUt(4RTgbR`I=ODeC^rLt65r$9&apNC zOFy8T@scH?1ZTM0asn{&2N^Y}dBczG0n~eFb_CxxNM&?e)gbfW*V1k@FMXCXRY@at#^es8zrIF z0FDeU9*`J<1z6Q<243Qx78#I-uI6xQ^{Ty;MGz~x&6b={yM%jobh_*Km#F9W3^Dql5-aOjXFr>|Rb^qW63MhzMZ3M_k zfeOnhgR;jNXb11l9Qhw%<=l2o?j*{6chp6WWKB5-YxtU$gAt#(Ud`Z1=+6*(H=SX_ zY(%k;?f@m4K|`R~v?OgmjG@4I9fB9YbbxCq107F0x=(Q|`P~KtoEM!-6=JMvP+o}G zt3YZzjtZCpy=tWL-91ctcL+t|d;{&R)P{URG0LT0_a{OMv|hiZ?h2rwv648hb&Ep> z#-SZd)7J`Pzxys1n9d%%eda6Nbir5*$_S# zX9#%-2#0vYs**@YpgUx|o7vvurC|k4X+sL6&h0&c6Pl2kNXPP?z;Tw9o*M*od)<&$ zZjW>%ch9Gzm<2s7x_15)m#Rmd`_#=U)-VtW^`*uL%2%e28jc?!KlVadn&!|dZ63A0 zAZe|=L&LKCVqt!9J=`$BH1TwqGrHgY%kd3KPooQS`QMsT-%?|hxv_cjn4x^{5e^*> z`t`vV$?|wg-yLgAg<~hyb8%BzD;jKB3=#}gUa+1%ffy^qW-PG*v5JQz_BDuojTm3Q zm#RBTXMtwDpzVkHs(7uDbM~^{svD9e*-zGH|4vRg2LCaejA=xEdu9-T0?k>V{19ve zalHgRb~lFh-{Mfxa=+Jhc*^5{Y#VoUjtu#>P#pJhA+N`a*Aug_+nLYx-qm2){^nF| z!5gLLrsus$o_3T8vB9%#rtW7pqgbWfL!$?1c&<|~$0@hRUvaN@S1iqt4K{FKYrt0( zp!wSoRt*O`#qn}RnXh}sbVgat%0L1O*Ys(Sa7lxaA;x@^rNGrvsj->>2r{kr2LN5f zp+CV}PQY|$j*+w24pz~Jk+BbmVGFs2^)eEOjb9?h0?_ECM)F!iE=8bf7+V8w4P1fI zIsHnCWaPqYMXJxX_u)bVU*f@gkLmYgr~a@YYYrom`v4M@L5$l-o&jBQd+apT4<4=G zkl~y%QrQ#iIilpbG)9Jf>uLIo_8ON=FY|QxCbld1I7H~P%j{nyaH|MCXV>nT#OY@6 zCGKR2fgVU>^L}=}TAga`ST1+3lkAF;>>8na@g(aGN*IQo+vIktKx+Wv5^7*v`|dks z$!i=%v)StvlEe4H2i(M^I#4fL(KA#BO^;9^ic7@`16=xC@K9C=Uyiqpc*|vAafQs9#dil^~`V!T7)2?Vr7A z_yWd%37&5dgsHVrz~)W+=VsQ$zbI@LE%ADfS!O#g`{M;+7JSG2!Dax9Vab;U)RYA+3Sxhj66Dhfw_qPd+WWrU`z&wpjwHCf$x_HC5Xvy7n7HedeTfquU4V?Q)LahN9a5q`k;@bMQ$?A54 zD`K^<2n)j{tonxN^Ep{XTK;r-#5Ac{LrR_`&v2Y)bES;m8vF2u#eH|8yp;6ENupnt zm;*h*!_spfd%|r>zWg6WXC4q!_y6&GXP;@Bxid}scBg$ettw%rrW%Q+1)-RfcA1it zk~>Q)F;a=bv`ASKda_RovSkpmekWN%l8|`P^Si%)`?quMobx&F_v>Zzb;W~kkkhO9 zn{PK2ru(lbT?+76+#WN&#L}|I;DoKixR%w}Vz&Ga`dQpjb%)yOLFyCh7`SZF%A&CV zom9j?(1Tu#ZZgWWq+I7mURz?3z0>=*;NR-&{*(1z_7=vL@m!NFQ z;(r)Iz+%}=@=KIx9XGCCRJUH0_Z0E3zCaM1_3ZtXKXQC7aTQ*W`E5SA@WoVNWUpD@ zc~9)EEDTbWG1(^}B=yJDGi{s&V_5AzS1|EpC{f8d~Y^^0nM zubr6LL;HK|?M7Hs>5&Rhw4~S0!~eKrJdBeXH0S7y4lOWpv*7z*ki+;=!1)uGSfKKz z9J=|+t@3FVmTD+*`IWWLMj1}Fi3>abc`5g`8*%7|;^(;N5=i&TDEdc{QL<5o_#CPY6d)FS8RA7!~1_rSJ?(B%iL=^fnz`BetCT!A@U0P z9vaA&G~Se0%ye7T%KYA*U9j(Yv%7`);75}a=I%>A#Q(C7f2v(Faz?9Oy`4CyuYPKa zV%tL{y6H$+bq~wKpT4Bm*2Le8g|YYZcd5dd?F4e>f-b^%!P1E}@4w^A@V zRx^>`*aJUXB<5Y)!~NzB_0IR(+V=PCu{fUPRzG7+%IDn$hp#S3sDo~$HktZK0Xaf6 z0h!1_J(Ch*wuE0N^@^6cduN?p_U^F$JklW7Lg24g!87Ay%;mhfV;Fes1DooPF1M4u z09c?>YX8Km04;I+$1mrcb^gg*cmOreVzPRHc>G7%_Yb=p7rh^Pc4^;f^M!@~wk|)PN_JenAo|PD zHw=zQGxT1kfte#td)kw49dq8%i(h$RW9)S6Sz;w#E3hWvP4=CdldDp9?NmnG-rt#$ zU&Vd6%mK1W$>+)eut(U)FTVJ;tcpl-N$(;_+|F;%H*}E{MvGNSu$V-i!!Jc7fm%du zrdKP}-JPL<3g*GkOxHKf9odURucUa&i}Oh@I%lB<i1LVv0?C!-TDnt*No2Ow?w~)dQXOeOwr!!&%65ieQk8ZfsJ6DO;y?z) z-}Yc3h0^M+;ZPbE>NVgFY@W??a5x^-9OAfmTGMpgrw5RhcJ-HoH!lo0d_G}(opR^K zxdd@T*_=t(8prJH=U1&hz_M-^)(mlr{OU8`>lnB82kZ;J3h;N^BpdKbY<{S*iunWy zA7TV`6IlKPwp@QrpIWj5>Py?@`#QSoWT(bz=p3`gqSxD!iK8pb9*eXk)gbXJ>d~5jU4mW^5eqYOwCuX zq?R)_XB^Kzu&Il{NtzqYYFpJU$!TwH*?M`uRktpfjZQV)Iw?7JSa%sK-qA^P&T{AJ zw?=6vI>cj{!6uv1pW?T?JNAocH)T+Sv%F1OUmI6y7+tl5f2*0K{e}}*7eUD5LlHrY zK8S~D%iTTSNv~NUfWZX0Uyy6UQZD4m-)VAH-CbnJnbr>fM8(h|4#*;_d;)cs&>sF3 zA7{<|$UavqB`ta@F%*N*)n$K!;7}Q zlOelLjVUr|PNl-*v1i^~aV_5Z!C}WM zE}@9nTScth+OEwb0t_z_!hKgXN}c~1^(VVMR7!UkXP07j+pMxXr*35bz54GfxWf0- z*L{1lz4W+UNKuGRPt`SnPL_&b4S;eCYE4HZAhXqo3kkS97pNp$WRKTvuj|2OI_^#2 zZh*YW2!9gMQYu-Aj}ToiQ4zIOgbI%2_CKCrbwV7)o*&7>g*^m%cXvSfzX2mdJUVN( zC@m`r3|Mg)BDer?q_~eUyPsRnvAm(oc7rkGPs`@ym5mc)IXJ_eO7dFB(ro3AK${Ll z*{Wt9F|W7SJ^PzM1gkN0k(#GZk&sNgPT=*0AjJ)lnCxVe&4fJ$ed?k)c5H0!M1X0c z&}2zUs?XaIvtJoCX9Ex17tVB}^vlN>*5#AiB>tsJ7f~?3AVA-ubQO@X{5;VQNTUn5 zgv3TE&I(OVhDs=KG{~yrTU&kX7lcBUxSAeT-gF;l37h2TJ;G9y_IWgC%Iaz%vGjOZ zF)67l?Uf{Rm&_!}YOc>5)a%OycC7Z-*MD&4PU&cs{-smxGRxPqb}l}pJ%4v8%Tz3x zL7gzb=B~PIb!#_z<0p6`7v`<60EiX_JZAAxtf?9yY#wVi{jQQzOS!nkGCukcAm@@W z^h>$aku}XmOCK_vY z4h{!!5CvzaTDA-Zormd^hd0a*-Q{ku7x2jE{)9yCTn9ZDm(0_Imm81Km`#cs9-#vK zT(#8!Nq)dm%FTv8duj2Hp~Qt^Z=d#2*oj-dcr*dw_ZoG!7tOa|uGb87pL&ExGn;x^zw8{GfX!+}S|r^3{WmYi7cth5_o=Do7%%CcL!DQG#tO3|$lI_X(m zGzczRpT4t%bur7xtHUQ5V+IUM+{BH#^{B~W6`Q&a7oeSRyVoO|ji+)NAjy!-rDcy+ z?AvwvC<(x&ho|i_I7OVz&+VliQj`R9c-UWc-5KVLQZo@8;T4dw;q{7#^<}#YChN?% zJ55M_zBL*AsBLlg!|7hj(S&v@kI4%$Mir_{lkY~=!?_oDgHJy@a8a4 zkB$=lWMkDp4y^^FJP~Oacb&X%I&e-<1LpK_u+cN*y?dVX$>cNbr^KS(H_2VJqXj!o z+ppRK0b2T9eFkNVV_es@ImcUF6bU?Y8aqcOv1TP2QY}8qQTmdy@@O1+DHpf*yBKS) zu%P7hHqhwjQvIBW`ZHbC3re0>w+SmN0{?*X)) z{LRoAV?L|sT9a;clir22$TPs4_2NZ4zY=j%#xgvnec{xqiuqryR{?+bA~8GwA2!X? zE>z2mW?m9JYrCm294WLDpr=QGQTyJ>?SXajFKgynK*KxJg+p>(U$)1FNqK#8ZIRim z!n`t5JEwnS=znh#*B-~z^!Kd4J{0t={OrN#i;oVT$voUyvEJmF5qFrjVN%7Y9V{1_ zEHLOD`hR$AX~fP<`ux`3{Y^0WPJjiYCQNRy%J>0=bbxHpWv7b+Nq^ROU{Ut4PL_Dl zd;bjuL7P;YPhji+$IY60^cQ%@$bjjI8e+_1IttXJf{^q!xv!a!4&WvM2BAT#RYMqn>HU4=Ntl=_ zqEEuKRAk{)Q({fPNcyq+?~^Xv{owmLX<_#9iznJglp~b3USXmowx7#vXA=e1fRg@4 zMS2&KL4a zmE7g-(-!+NM5^81#yka@4h@@|eXv(70rqOwGlk>tX)nf?vOc=7f?ZjQ!wLReLNb>$ zres`>rxJ#B{CFrM3?)73yl*~76>M}8YNh~a61fSm8RNF`IM8sfEz~{+4ImH`WI^5J zA1YoW7aC03W@3H8!k+ChVEl3Cw8iyl7maV@XYB3pe+EBb2N~*?<8XS*Q5U7wcoJ?v zv0b?vFao%m;~Qu&(s85mdD6epT>Kjd9{~`yYOI)c;M4>;6mty+Vy8q#Ha5%?!_XGz z&t4t=V*mf%m;a|1$5b|$s=+U?t^ntPSWu08Hm*y}YAK9an9dc4Ey(1c4`>P25N&VLtv=ETL+ zRBzUGxc|YZ*)b3Uw++Q_e-hB9Ni+Xz`5>!_aBdZS=gG=^Fl}F!GBMa4`|;@@Oy8vsfG5 zd_HE_z6d7FPWr@aW{IERymVC>%Cp4`Q_-m{z^2tV)Sl{N0o{ZkrB>x>EMJLjy z*hDVYYy;r{5SqXwbxcprtwO zqKJY`r%XRT>VKsqxvONM15Bk28)GEE>{0N*oIkkM@F@m&mS~9*RGrB91O%U`g2TA@ zViDaa?xTZ}27|N{45kCm_!!?mZ2}QO;-a%B@9ghxI@Uw9|TCimW9p^>+l5> zDuvmn?8pK;lU3MkwBx~JqL9~V7}7gM#&nYb2?TT-27}O5gdnUS_^Uh{ZG0%}D8C;H z4=GxZ5tBP-v~GQKN$NGJPV-6*x%T>KKG5v5)d5q*rK$x$V-QBb($;Nj>mXdU00@(s z)-^DJRDnmKI`u#C=O&1DY-1+}uvW>y(G1z3rKJTE*RW-m_-T&H^Sle=zZT17{o2cu zGtC-8@*WLwzL`yc-iI-i0@3xX5ufg=(Dev5fhDSo4B9d|&V|>;D!1iinZ77#$)znx z=FKi&&$sRWKoignQjcIH$5NABw-(Yeh&ct^)GXsW9LK^r8pYF`yuWfayo&qNVeo&Bi?mi5j0*tWEmMHxczj zcUYqDy4n9N3k^To!PgLA>f%nhd?`;qw1Lst%o0c-@vv!E+>@(WteuEf?yzH;M87l6 z*n&r-fE&xjkWt_HY%Wk0#Ds@ui8(n_q{im=lfF3{T|8ZuZzxYL+wGE4-r^>T%+QT= za}I}6_Ki{p5|;2H+qa^><#(*b5TB33ASX#vW?X0uWo z$~HbIBL-*>swRVA#su~Wp&@M|07^zMi2(Deil)?Hlv@C)<;Br4zuZpVE!T*hUfAhM zuTD#s|u=O@38-TJTbT{RSXPy@?M z3r3Dw?ONtDfLS_cm%Hfjr4Jj1Y<*jA7u?eiZ{9Z@q-pf$rV>R!j3v$=z6bHhts=@S zW>4yz4XKt&f=GiCijZ4VE9ZX4;JS$8D*9GrUX+NrpdsBTA#8C2)c(vcq=As5stQ|a znSW1P3ea9VkWx2g3f`J%HrH4`t#SWmFZ7Ok$~c~sTN~?LyFNQEt*iFRftELe^i&ll zNJBcO((@S>xuK(VF0PgfCA@8J0x^>hmiPIYB4HsXd+)7(|K3&t8 zu37&)A|hwRv?$ScD3Wcxu>Wh`q*muJ!isW@4WuhB?Ka`n79wMMb}lmF>rt z@5is>k|s5I`(W+aH+p|wpfGY)CxY>SDU@zBzpfq1X8CpNcu2Z=3Mln~9&eb+--_cL_CxQBzJm{;!0UADROS)*3ATa< z&;{W3L#Kg50}HNC>J;#dM}x>#)`)=yH--?SHMlhfc%~k{z8asZ43@zgqOZrj&w-pK zbW}R1S{Y(Kl%FsbH+<|d$UK$)D0a&x)1^#n$?W&JMKEEY^NQpGZqlUqx8I2h9|EGH zUFG6pKImbJ4T`(5f!#WOB5Z~T3IZAHj9CURb^0nH&cW`D3~Dcz3dhk=sJWQOyurl* zaV#eac7hAV+#p0FUCEcdI(K-9AN#!7x$x>9&hAd~jvI4dC*FQlQc!BSta)z4A7^Y0 zotUK@eaO|H-MTci+cYZ^;&8Vkx<}@S?l%?O&g^}0;j7zvQL=-GSl_^ChQZMQzyl$p z3^6W{+AWz{5H>M0!c#kK7OHZ8)RXse*pMJAnpAUU$zX zm=1T7Gq|TLhl%N`L(ZBTkuP_pZM$$RV3&S&eu-YH)4d4sNX=B!?*E+;QjEea*+c}O zpn&uifOWuC`#oQOjW#p8_o3!Txh!^rRCFaF6h|I;w>vwPs1-|b)Z8pHoI`Zai}+10hto6bDXj|E75 zBI-^+yAa%waMP63^yYyG%h#Z13Mk>54#aaJ4nJJ`Kb-|^R&F;*pbVu9^U%DjX)4t( z2=&ApNp0x~$lQqAv%~2DUhliSE^p!OZ<=~z6MivRxx(*{DUzK15_Ci8t!!hl#+w2Y zkyL66GIMh+85+m5Oo0YU9F2?N`@a}5U-5*2<*0}g7my`NTquTV7Kfto;jNiFo{k2! zMKpyhNsDSC>@JyaHaW^WL##8)IMkl^NEUx{Zu)H0#o`Rp`T4&ij*T5XefZ8kUg>2~ zcOz~G-7~GQJ!P-Wy+a=BrxVx#H;7ICy9=W){%Fy%&hK>MZE~!=lzV=l-nA*Ps>Q-$ zqQKXK^x)`OnP1zI#exm3tAZ%r0t>tQ6_AyGsY*e7XvS?aBX2!-G+0YZ8{?gUz0Azn|s7VIEw=fJ8W zo3*vktsEPn8AJCfiOI6!l@aL>KNkQoiFZ5A7qpVZ2aNMocDPsUK#TH@>}M@nJz0n)#)xPjy&&6m5vr8 zW_oXNz;F4ZC3Bs1yCd9Y6hwG&Et_I{GQFeN=+w}|+s<`}C*G=w>PA}C+FV^@yCIKx z+kQ1mh|q3Sa3y=#2^ySL-Vf`3?d_ZgT>MklM8oxp^DQgWDjwJvCRSGLf0ktKjm(0H z+Vw$(X--P@s=6JEVBzOO3JF|X+O0LGOL&$@Y6+DItcdrZ2LXJcM;|}nhdS-_f148? z1Z@|t+qH;z>IYMHcd|E#>VA+LiF1rjjx2T*E1ZR<1W#6kchQ|Hs@K?=V8&v3&n=tJ zIS&_cdZ4pqL<_%^@?A;nH@9{lQa0#0Y}~+;0G2omiKYi}B?MR?iDygs%>JrXOzwyH z=o)*QL%TPp?@u_KayKQdrJ>VsLwZem(uUoCZAhN6R5fD0#cx)P(V`_lBzyN{vCzIE z0ajscVMK)|TZtvW{sm`l(w#(Vq0Yv);pLullP|{T6x4SIyX1}ni|nOUZ|-{hKVbIx zrH~)$(^vi+&4JRX-)>kutnVFIc&lYFY{5M*M;wFkgdayPS$!!oZ2tOw=hC1DRvD2d z>V!Aub6IS@y7hyrpS-VDgyd({wOsFX!&SUf^UVw(akpgw zpy1*$B&Y~3;)Ivmx9nuT)2=0m{k6xwPvBTmw8>>fS}!wuD`Amv!TKab(v3#0^pm-4 zWCAZ3#RNahzJ@K^pNnVcoe)lr(z0<23_ z=cAR*G{201${jJK+`%gP5KJiq{7E|%{*-o zjqmK2N#Kvds8X)C4`35)Ip8{XC6};Dg_~t4UC~=kybIQFumEw>^}e_6)GveQqK}GV ztBUm6UpCy%HjndV_u`E-QX5<&86B`$8KG`OejLG0$z^2^<0AXwC7z<=;Y2qvs2%T)#^&k;*UM466eU?jmKj`Uw;(l~#yDs{iRqBaD0de$lCO7T&8S`k z2elG9(vQ_~6M{BFAohOR-ZVRRL-IMo`ltC7>#2E<-d!LpetGHRt>jHtKJ^#7>h|;4 zt?W`QNJUGqq_W`QLlK;Y!v8N%Hw(*ctzeoKy8BxlioE$!W`#;({Do5<{=<#T0(i4# zrEM6U1P%Rgfg#~ODx9%vVj|koy;>9qRiq-I{vi7(p+q(< zAs4nTMztKc=U*DM;vsPI{U4%(Qi&!GTErw)`&~<+w@`iU~Z6}%i+|~LWaqqb25?Z@Pxq#zzs#2yCLjcN7`aYTAOH>l zv84dUKm#nAk7ik;*Ai5|+R=-<4J&H>O(txS8+>+Vj~9F+{(HeTxa~Vme_=zo>mMn#6yP&q zkRsr}(b@3R+ad!P(|fBVTu;+!(cT8~|GE55ac*hYwfx9C$mx6u&OAJP)25-WeGh2> z#;S)`M)wc_p*G)980sDr8f$F7?NvB#Sbu!gKCeV!RMwBKhzZDsZ6q@SJ@P?&! zckEhR*QHh&m_AWXdERAYTv#z;wB>2%%aeQJvlaiE5+-lXp9z}IM$zZnC#3XJn1!xA zjOTDVxuCW8UeNwSjVlU)c23*xQ$%w|B1WXYt1Bvn=IL&@fA84q<#0#6mTswSsaO+EA({wa7`{ zZVRV-5@#>HTfG6K#B)jCnj$O^Y={bPAp+xu^@*;rJ`?!)>R1JCQ*m+fW7lo&H-n!9 z82gsQdIUy}oi}*(bB#XQo|R*9s_cS(U@M9_ivU3-IDMWZ@m@!4&>rN5?!$mmHK~n7 zbO6X36~IPhdj*G-$01cHat72$QZLQ7m}WdgYwnVaifA8{a211Q!ou79hkT4kp78pE*H@;$T8Jgr!`3loA6Zc^&59^Emk8Mfk;sD6=BvK`M8ah5}WZba+AN{ zvFjdfF5slZmhW5S#KC%U85V7YqeP&HP-#K>Gik$s+_%M!FA+dUllcCvP74*9*j9mS>op#2%`h0YC(Ol_58*+_88e4`PU(T8! ztlGPet$2F4&NI(Zz0*?4_GZ@@xLGh^Z0a(SkFtQR@Cg*-mYo9wHK380ACL8rP3$kq z3t1|vjpPvGID~Kv>7apMiHL+KiLe5MIoMDbFNKJqN;^pIp}q2e-H4I2Dsn?(@SdCl z|I-Z{DRN)9szhTMIh#wuF%WFNkgPzK`yrKuquJW3LJ&rOTTGny1Bt{m-FBPLk)TW& z;-JD!9o24u(wB?8c>6=l^p1Q`&^{|@W1^#){YP<)M@Q|eBR|qBi!zpz5C0aDcZ9hi0&}3k9T0v zQyVhZDC6M)?sJ=)dn1X9<{xb^nx|XWkz(5UU}piWJ&0g;OE9(w#ts4r{}1t)1%jG_ z@8)5M08paWLgr{I)RmALLso(hZ17|y^b8_86=CLc@G@l!y^i>H5AT!W&}XPRJnziM zwxfE-4|OEdBNy!$_i2?L(3(sk=O{YXw0CgT9TlHir^^n9fEGuN>@;bY{cV=b{TDPr zunS>Q2p8PCpFmu%RS#1-735$c&QgP`5K|ONOv^e^RGR2R5x6{mjiC^W5>uNw6eU1f zq9mg1S1&*wik#S-kX)H2PD^eCFk|q_an;2vIz&2^^#QucAh7qIA<@eVau>Vp7DIw zcM27`duZQD+M+$f!I8d#I;O}rCD#}T)?Uj2^1VU$a-p3MGB82>@`7Zy$dm-*D%7Ub zD59SbB;d%~(f{8{%tjTKEXI%(m@wfAGKVBpVyMf#9toLanm+wIeZ%#K&O2T*>!Mx0 zbg7_sE>b?rjLL&2>Y2YZ^b1_dZV{=}NNjIR*aQ*QKb?`}2KZ_TFW# z`m*!q%idx&#q_za=y3BZbyE7_PzAAAj9V|%HQJ_iKuI{SB=?-h1uBR~m0C#<(au`C zMM#O^f=e3v*$_s5VsrFjY*-~}H$?K)$npSUs3tRS;<%^yB~tfK#=oj3M+;?rsf+%q zh5~gQgKxKorWlYhFo3qz09b)_pTJHW>|MPIqaYF2uAo#YJXWtVb@D^!iT2>@(!p;) z>DiuyZJ53J=|{x00WMy@ome$-udsqvxNUgLr>BPFPdA4gHM}!0=Cs3n-u1B*`UO$P zpA$O@6zjK&h|Xq&9Dr~~>&Cq_^})Z-4&Q54eyLud#8N7-zFgjH@;O{EOcC56A8aSo ziL|bXwK_$(rB*yAxNRG$NsZSN0u8C)b)A-9|8Bl4BNb10?EjZr5LYm_-S7aa z=enqP)vGui_m7}rkQqAoWc`}$QG;PgQNcxfyj8?U?L=qwJe+}ntGe`PCUK$CBTtQS z@H=g}(w?B~N!k!)8G|8;fx=uMA=WfZ94%-KNSCnP~&`ni*c{9;c&r{_V@eN z*$3(hj_p+WP+kiT4r;di<8j_EaN7~gq2?Fe_~qJ|JricvzEI~2!S3`mAOL*8V)v}p zgxT5$1)A2nn3b=_EW(16@fMWX>YxDf6{1(7-hnFZf_ec~!L^45~9YgG>#b=6ii1yeADaTc%97rGn)$O?$Gk%L`VqVm!G5Ey6+ zhYE(ad??I6nMboZ9ctQsB2vk~xNz1AW|Jvx9@<5UUg;1fZ-P`LBt3(*ot4>@9jS$- z01*K1?*WKnjKzsAH3)8u1@n<;ieN*iThn-7UddLsoW~o{mqnBJe(u;cN*dsmbB3#LBtOBjwR2 zz0Jk{jGnu^?dS4KUd|j%T9MZ3vxx&QFI@jJa6Vw)j;dyUvD3;oBR9hm=M|2XK5)0a zYvQ!&a{ZxRe1FQE<3*RJFsn(5r`)dAPE&k_8N*^H{Ge}9l;hGmMY7=KNJomeo_r@KMI-gurv_gz#gOR;q ze&4LBgzBMna(jw>pmA!*{&l;5hS=j25kanjkYMOVyo|!FmDn!=&Pd9*79H$e;UR4v zyLpq!2Fdi>`2J%GLAEaCR&aa4z-xa5ev#JPkO^;@&mI|cJ@X8 z`zb%+Dv%Iq(S}J?;l)h3_R?ul$-$L3_NQ9agjNPSY2nV^^R|o(op;yc9_`6eokq{O z!+v>Q^A@kUx5Mvfe=jTj+=J_^wOfbr`*=YQZWwOZI?x0Onvs>|%h73GkaKqnbLT>$Pgc#!taw(!cI=g+I$R^KGyI+!x_5uCVi(i%UwUPix$>fsmk%@ zLI^2}EOVpOc{9}$6@sn(%BrArtCxU|p`Bzx_p#|>r~XJQ6TE%Z9bdYxTWJ$BqJ?ph zDS%zCR0=E_aLkbCq`#X(5|3`1Doi^(KTf_cJaK=i<*8`qVyDl{*}L~{%uL5~RHWWr zHSzf;6P(sW-rMo%X@dK}qaVqs^V93U1=daEe=PEeZT)uP1CNj*r%?H=sxn@PB<-Of zy&sktnrz^w1TQwk>28Hec*3TLd~{4V*vJI?KVW?01h{lv<{yO>cV>9EbfWkl;^+si z>4mT*S=XK7$c_Z3Ru%rx!_pu~Y*ZnTkVDn7Ad)m_pgqV^WdRcGM2?PG3=B;P(Mc?DGjGzQF)Q)liPFAPAHI{O8Yp={J_B{Hq$ zRPh9Hshd&9j*;@3A4HZc&zHj)+@c`$MgprJS?clzA#ph1x!cP+o<9KyRm~gD_tkYo zzhPd83tZ6Cp}QEea?-o2#LTS)uWg-hj-{d)%eHQvLlXquzF}=L7a0)^HsVZFflhAg zmb=_sV#H`q3W<~q2kk4FKLf_(*S=x339dUld{iENf^Rvw|Hz`KbMBw5$G-Y9gC zvGzV#XL4^WwE#H^Me zQDdJ@X_tKmV=|D2TtIO1S*?V43H}RP7CR$J`eF3HSWxV6yopuFROji@19$$gAG< zeArk6is97T5F;g27o>TyRL1 zheHiDNp5WY!Tw^e*UFmH0ssWO5u9fg0$g6<6zOUaObTua6Ywa9dNa^euC`z3cWtji ztjX(%V(*s?v@PwurWW&_-S&M#-|jtWu6HKEX3ho1F6d;kW6qg7Cu%}(xog+0dlc-V z1CrKEWtsQhUGKKZr*ok|s_oCg+N#zY{dxZK$s47_xL_&PLe0f{w6D>f@Tt}%g{|bO zP;$e^pi-#MBlH0&QW$Uayp+!};vyY<6(RC+fJv&j#P0{pCnW=zRecCf@drdRj2H+4 zmY<$o^%#ua45T91W7+O?Bm-_)+ay4x@}vfk+%#EyG5GBf)|1k8He_2ux=3d7pMfl* z5aH&@Ri@w&e4 z+HxY^o8;56Z5pfEaU#Kb&IPonZ&rORdp#$Yy(H!a2w5z}%^5*R-m2Uimb<>W?)$yl zds;$s6UppqdyI>3ycoLB{#djTh~iZpOgG)rn|PgPb2R z0-d^KSd19-^TYVIG9y@-4O+2PjS@?MZ^4x8u;(iKIHfy05@IOV!z_&zDpkJl=cy}3lW_}`<)l$xLaAp27Fbs{D{0r72M7T`bm_Pjz=q@`mFiqPfDr?o z#=t(b5n+Ii)`6M*pfXi%+zuLh+t?f6lS=6f4b~sVp~Nsl0ixF`35M~XCutIu?59Cd zkY-=Rr0lX3!6Hbz0AVReNz?MBx3LDL@`%x50~PYX@`@gOMPD3XkSj6eTtQn@k2lTF z+k1sAlFfhFYa+wG*1F0alR5I7?+6!^JhU`bfq{1yd}>-iQ!daG^9_-{Ma@?o6`%zy z8SC{9JOS$4Eri;^FQuhHD2x%28AiJ^6xV9wUEZ&ApK1cNNRm~T(Y^rAI@w2}Lj4Q? zl}jr{DWd31V�%qf)|OGAKGwDAj21f+RN+<0>7+Ls9FGw7|6@x8JB)AC4`9M{2uxHhR0 z{iN2ax(>qP2DzSKkKP0SBVFu2uGYN%qI5v2MKrGxBELgz#Z7}+~BdGZ*G>sgZ0cte>nG`}R1!x-NaDt>c23*{{KtGzV zuLOKxK+jIX6iIex z_wKT~HEDIrlzab2RG8V2ggpv+iDfm-#Up18?x?A5aXJ;7(3`xTo(9m%B_E4W#I-pz zc&HnGgoRW|AO(1IMq;NpKxRm^H4D)h8bdt`GA0%>R5BF9I!A?#tR?Wpn9D4`0E428 z{vu;Y=1-EJxisqN@FGjCA0%@@}idgsK&OOqS&f|Bh1;w=XFpThc(Z>NH~5Cb25Qa{jl3t5z9F0w3&X_wKx!H13#{o27r zx?$g)!VHjrCGVnqqQZX2!KsA%rnSY6`*;31Cm&%LK38=Sj$A$DBq51F#zQ53B|y9Q zIA$53O}Tw2TVg(q@t!q8pTXpO*gt;0Jb-(WpDa~AlT|+~Y6zFQBJyyBob{^MC^>vl z*%R3`;?iG~Xo&(_weZIPBXyAz9v48`E%i1||D>CB#u3W_mc;`%yI0^7WLzgxjsk1vkiztogMwJeuKGcR$(82K zy{5UnOFgd~eI;QD!XCYl@RPss8KXsOM~gOS2%AYn^zUW0RssW@tsTZzAP-RC14`xmcEr^V zw?M%X0FwAYNkD&51pFXB4~@Vh3<$>;;9kv~9UmoK&NR7Hxk5L){Gl@pCr*D$$i^Fx+$IqNo)-8HX;>Ah;EaWWCK7uv&tB? z^r&R$8S)Ue2R8(OnwQh)n-N6|r);=7z2Ckch#ADsQz1IFQD0*uOo2pK5ryuQJtp&) zR`edJ2xzNbFmis)yn!h8w%Q0W)!``a~3dwBMEV8nQ+;MLix$E(o?mDwiY&1U# zlH%58g5&5DsQ{OuDZcaC98552+PZrR_Fp0k@Tx|5!|Ynqrtiv#ImA}EwsaT zJ}d*oh!G-B7kRMv6Hp|g9my7sQR1%?xI09|!lcUk6zWoRZZ_Ig>YZd_+P`tB;R z(a3l(mfy7+FuEib(TkyWA!T}D3=5~Pjb3YUJPwY_ zl|T9zsa=ZU0&@4!Azu4P@U#TiPwLs97;YR)9jm_6w}L*~&NPgt-t|3k8_qo|syT@tWUue*KRq5L**~U+Vql z%%a&3A1woMFAn{<&qTkLuB-9GqO4&y-DV#sSbZK3>R8KJIVD-6iTdx0H^3z^Dyf|Y zYeWJi{Q&jn=%k|-wH=(NiknwM!ti07#Z&KOuo#l~LCIVLEWPyF+6(2!mE|cj<*ain zbfaFajzos0)}@%Bkd$-?zDAgaYMw}nzhF#NK-NJD*b2=2216!fZD}CXDlss?^mnV~ z*%iSe(s?mAM9t`AuBDb@sX&?D9t6h$yf_rBRdY})un@t;mg4>m0l!l3FS7|-?iJ?U zyndAzh#mOK8jbqtaYbw7-5-xM_mw)Z_^JW$q8X=-z9PD?T#+(9`}ERQUe<+Wh62p| z8~8Ot#aVX5Ez_UrA11O!@c}4w*#P&s2VeNW`88MBq0~Ka8`;~QY40ub z7>#BKKE~Xp^mNa8T)pC^Q|iqhZp9h9Y&aLDc;rH-ZBpBA zttS1_fT}cdE;#qbi@FP|*G?3zP9Dy%A!OKYSv zl`>k9d61k!THI19)hm?;LC7(kn`?&4^(4}WX-P^hQJhQMAdQDcF-r) z`VNNIAG$cO;C_)n*{k36zkf}`D(gS#L(k_%1>Q`8kVuRU=) zU)*p0ahg>6_?WcplA(U&HAjrW9-=}5Z6;=D@ZlQ#T7y3gC7aiFy~u=#F(jGs07&ct zzKhbO%1q-qJBY`|w>%&dE+ZA|OVJa||50@3aWU=xAHdJ-`_eS2rp0MrB%@7HoN3=M zrAWfGsR$z>NzP1rTBa10XxbO)hO~)0EvSSd6k$pc!j!mOe7nE%`?JSm=JA;GnC7#- z-_O@F-+lX9e!88JYGBWrnE4RK&+^v4SFK4rW$?n4uBPo}jNO1#xJ82IN=XymVr*pN zilPtKL(puQDvBTJ#zS#tU?`bzcW@Arq*0v272{I@yymrUe8V-=6}*mBZO z7Bi0dZZLNT=&s6~mEZlu>_ z{4H|Vx%%SQr|l$yygbFdyk+R2VKcD8N)lG{nT{vo$#kMw9V8VeQRyJ)&evH^N*PaIZ1ZH^Di5M+@T`Wlw%4-jT;1f1>@$)C=2~MW|s&< zU{yGs*u(3il2^2DP*cc65yM_IbF;{e6^=MBhCQ?ifRa(mBb9sOzJ*|EOg*!(B~1Mz zW#74$`zs%LW1!6>XCdUn)E|auc|;ZgJTiS`(&+gF-w%z36cFI`8p|Uw5V=L)-+<|6 z;sEK8wm*X`L#?d0^c3m_(?;-0q1h^!z%`>YE2talq!;#_$b0r;?->t^BDaJkBtSj$ z2nx&0b$KtQnl&p#`--CsQCR8erSV854*_TYozjKC15MX{zBj2U>e;N@F&C4rW|KbO zZ!DP>@$f0z`H}eKJrW+S*Wi~6__qh)GRAC3h(f7M6C82aAwC9=u^I-7%d*$K#761L zWDPTRi*P)F*~iAvM?b*;3Co1*FvH7IH-jeyz8)q8&C}i{PXpc!kz5>*yNMYCSPVV^ z56G>?w>YuWhRuI8ZZiOflfXx|7!SSbI^e$l`}WUYv*w?~z0=KHT${#pTAU5dwU^8s zX1IK13ipJme7tZDrtR|xPSD(5%?#MM9V6Kt_}-w1C0v2RhL@OfO30=BUUsMKWM*_t z-%rlqD|!9{t28;!MY|e6XMmp6T2y%k{Y2zr^&!yRWVNjTg}U@FkYObmlc7hd#*mcH zr$h@3#^Q9`{wq`(*jlhk~|!xWqUaF=8Af}K7d&p z$-`2cCD*(6Z)cvAF{&mB+6p=S3DgbjhcY(=0ce#vXFWg;Hd{*#iL-&uYWg<9V^@}U zg@oo#KgYD@XbI9ud>(^8jPaeG(yaZhtG4w;&dIDMt;W#ZpYxCCok|HmekeASA*|h? zy$BQX_+#f-ydsAHG2@K%iE4gr*ZXG>K7St3r>fK+Cw~Yn-#LgB-6R=jO>M4I^nvI3 zSY$#W`rA7F`jkxDEkP&3!XCCaZzC@|m|e|v(Z}aCMkVNy@n$&PDL6ffipF4(f+fB@ z%?%RZx&06=-KI{%xtzThgwX44lu4izMI}OLsWdBiW|EX^>|*j&0jQe7(U_>mkz-JX znr%AagNsvPJI)I0O*(4tFjh`K z#_S2X?%dz+%&>gAD=eZa<91Y&2RiHkkNspFd11APjP(C2S;$eQRXV3@#_pQy2YVXw zud^j;&i~sJRqK#2RhZY`Yf^)HWZ!H|XE)RF*tKMpNF|K1Kx}U*#JD48v7MX>Y-Y&j z==VI19gH;ygvFZP?-)cbuzuxrc+lnZPAK7lJ0FJGr6AoN0ZeV#8rPT+5dPFz7f2geIzw0r^;X7K!_lFj;b|^KD7b}qXYxk zb#bOD_+av$Suv+l=4ukNg0MhCEB*ZKtV5nBokOyOYWjPv9{oUK`a+m!;Ukr|a%$@e zH2Fvzdc=>5oC%dX(ZrAv=HgADPsHiokHxc?xC06(g=F^%J*a-Akq6+_x&VeRUzCIE zW|%}iqFf%l-KAgU$51g4~|CjDBXE<}N#ngFg{A&2zA0#-kk%|7DBPD-)de=sOsyWuSsvaMOfJnC{5kxgcN%p|tf{X~~pQC(AwiT$Q` zjj=w&u(ty85rH4Af{9}bqy%50BPDu4I+!wB@?wq`>NS#%cp<&%r_37LC%z0k|Bcpf zg6R8YGHj4ssjwXe8`sieo57M*1t)I19c3Oiirv00>fp)k#vR6qpMgC#gE-x_@6Rm$ zlrPk!IY58CKb3W7k-GQB;nzbx>Ajz))IL~>A_Jd7URftoR@mL!-mE_44&yEbht=Dk zcgTJQG8#(J_)=xZ`9yf#cC|i}*b$0{B7~DF7vr2^l#;*=qpsLk&XuCrTu+ROA`!pr zTTC!1&}E3zli8pY?dBszg2lJ`>~l1YrNbpJ!gyy#c_x2kv;YjbNDJV~u*UGjOhC@f z`<_cYJmhFY+~eaevgRjQNlPrd0BrlN!p+4m%r5h+oT2RVo0R-!y3gMmdrtC``AeNX z{JVWt^u}RAPMb=ie+3zz=^`8qJQe_FirxzK zS60jhzw7EYnq%nyL5x;RSID8l6U2j zH=fJWvqGoS#O$#^oD`}YQ>jD(D1)HED9H<}U6el%n<;tTjnW(yJwGkle6pL+MR1J0 zc+UD_)-x26$BP5XlH&2PV87ws)-mK})%R1(Ak0*pCuPdgp;PdGqZy7yBARhW?-S7h$qJp%u) zTU-yXbQY1LYr-mI1Y5e^{(BfnrggR_Mh59kqiyT=nYbt2oEBR}KCl`?)dP@Yf!n!f z%KBIy-e0C+<;MQwtl7nrJ9MYj@uyJ*TRR8e6}lB#?{PfsHIv|V-|vI(MzLi8=7YgI z3v{{VS8#iv!KT^XxSS9E%li#B+UhZ%JL$$5fQxy(fAWr38+?Z=sr?W|Gf?*kw6EWQ z5Q~p14INCmA8soGM+ZPZ5k49^5`7;(z2iWUx#bp=XwRzwVgpcUjZ)04s>+3C%+ z^fxkPI6$utr9342Wv~}F0ci`JL85pigh~%{##+Ja^Ib6tK;OAXwTSbVOSgtIPs@s@ zJj%PtM*o!FpEALst;Cixh&sX34#1dM@P5w+YL+@_4m)3AVqd?miMj6SJYA9Ac)yKV zaMV>PBj5wc*1a>JcSAqs!-(APy_6mjesTd*_Ot$Eh(Y{Rl77pF9f7I~v6YO`4}q;R z%It@@yrAU@7x;{MiErsdL;|1;y)}}eIgwAqhae@hZe%RDq`1I}710%_e==nDhxS&e zRS<1?I?hO;Y2j5HF}n`N56SE^A!3*e*8(f6iS-pK1`v8>`e5Ck%L%p+@^7g}6yyA4 zn>@pCV}NQ{BpyH+5PNX;5Ze5W6Vl4NB(j?5*7HuXs&b-XWw<_`p1t^*RS!jTgeN06 z$_;r|eM;BJPIId^`|Z3{{4us?#W6>i)CKw#oVAo zr1tz9hc->_xtQ?MWEN&k11pj5YX22z!BxARrLT$B)kp6w79$;AyL?um!eoRn;0$6J zj{;-&016eZ(I1_vujbglB6Z~-B{ew(K~LyR)UG!Xl{Tij22ThRAHpVT=YJOH`ls%L zp_&r3X@b~TD*m3?Q*#+@Dsx?&??KE_y8Q9ajJ>IbW%%Wyy?A5PUON$M7XFpW(*eaH znW&ZNBKmaN;aLJP847O!miNdI4Uhlabz61yZm8;2b!NuR zo@>ts(akX1!bNb`*+;NVNht>fen7=uoW@r>{Rx9IHs30||4x0z*IUWS{{yZH7_sfO zo8WFJ?VUYed?Ik}ZiD$}{QSKIlevwg;Ne3nA*#j_JoqMpn*Q!v>)bpipJJa%m&;T> z%g(n0s%~2;#@%X-xj(L_e!t^!pmg43smBTutF;GdWqs2pIcjHhQ>FL7ITXkin^-A~ zMQG%Ar;`PPUA>#SoNOU6Js|_v3@uZF;HpH3k^s?Wd6xyf>*_Ez$pEnE29Bbv1xX+i z!Z0DayW_gX-c55nkl#aq)1jpJ@f+j*L=*8(-qaqXq2ixiup&zu*K`l$k?U1OWtdsa}A5nGi+vi)VK&zEE+C- zx$182ybAis0L!K;$>5Ze8`wir@YdD!x_b2x5bypxtmg(}glh@O-Ok_8bv{Y-EOPqE zZ6<1tcqRnMmtmb~jCGik&Js_*1#(X3V)_6}@($`hR z4MLftPj|(~8_}Dctl8MnEK5Xav`w;ex7;xMeAC27>;IaW-!Pk+Z`yo!+RJw|@DuCX ziVe&hSN`eN91}5{dAX%`ch=pdEEee8m4({ssOleAOGc@|JheEUn*MR*PnJH<;q0eJ ziQ>VrjK$^e{@JY8d-|8Izc6w>py%Ce>wIT+oMgEx(V3G zef?a@C^csxbOzkl6P4SxIev(mD^ zIg97a?c457CS=QE1>m8=0m-ijr)%IiKpMTDL zuir(9GjHFrdsB~YfcW;r1I@X9dR7W`PA0&7+i)h7F7OE1V1r!Ts%6cjo)xLt{tQL1 zUjz%D=5c@?BWdowSEOIe9wSY$$*kk~H@WyaKxGMK+a0PYt6XNQ>9)TxR)|>XyhpH1 zdlt#L6d@m4FSBKROcpjlwlWs8-W|QN6a@le?PS>vJkMZwbD4qF@o~{gHacWR%=i=Q z49FVQWZ0#H$8QZfO1Ux17ET~sx1a_Fc{1n=}ivTuY_#v&XZu^dpAMBDm(_NDw| ziP60#>s?#d!?*TX@x`3U^xH*8lm4^bIq_kqH(vDxFc@N+ktWib74>#~IN#RUYn!Fr zw?6$GN@YU!a5JjuLwu7P@`SMRG@xuNV$U_44ZI5SMPzr>DN>I%v4`!9`eiDjWr;v5 z9^Mw&a|6r-SkLvou3ZQg=+;|85vTxx4G>~s*W*g@!!o2|ExJqiGowA*KW5#dlPA~UsHg{{bpmL{ifh{+J_u4uf6{$uq zT_K2dVv~3DuyvTG-p83Zb$#AUgqmrO1 z?A)MP#JVW}8qceL`|IGjFQ3<3FxgznjpruIo_8Q}Q{l|3NO-f=>Ee$OGUc!?o5&>V z0H=z|-iop$s$-)=Ru|*!hd@gE*KbbN?OST@C*&lj1hjP1nP+yE;w0`m^q_Xt1eCV> zx^oz@_D!Q7Z6BoDH$724Nn9R+=m|V+ZZ|{tEW?slHpaWJGIjIpN@Z`9GT-Gts~!;B zzs!eHa{%Qr$T0#|gV1HCyWQLf5L+nd=J5zh8v~k|y7yJrp6aw^0>P=5%;TvdoiRC< ztWHC7nd-jNNdTyh1-P&XATgL+K69H0jVDBN>M{g|;-Za*O>JfJ4-bn7nc68keWyhP zs2u^QFt~Ug%M5Ul!*W)?=VMEEfT5Ue-t%t;eV~4e-c#Eq6Y$yiZ=4U-R+ha~eMtORL~N-INoULp0h_CrZKNArnIY zK-6BW22-wI=;)haRFRxtZ&;k3nORqEtN*Enk+;CCFK4Ekn%0&+8SblPHIv0GQ`8)@8p9ujfs%$!L~Q|H&tD{B&$Uw;1UG*+Wg-A&CW`kI~EfoMYi zaj?frH-XthS<{1Dgl_`Y_DROJYc`~!B;G1dTQ>e9 zLEBLaocubl+{hL*wQ9_XtP#`9BN!F|`V1=hW6j0bHa!LN)4LTQSrOP!i%}4|C(NGZW(xgPvgdbs(b76owfb#(~cGcUFWC0 znmuZ&u6wT1I8cqeeBV!6e+l@->8#L)|ZLQka_ZQ42<>hLJgDi+>OI@ zF!s1W>w12gZZ>uFO+3nN|Ke2k zZE2-Bs~n>=43VGwPGT2vQ*4p3$}b&*wBZvB|3sfsmG3-S#J^)qO(XfpHtRtW5yKh4 zA`$Yusud_7R}h~lNmkY51HL<@Kyf3lgc?k8O6M^|3Jlo-%5a|(k;Fx*_%GYieEURr z-7$51B0tkj3L@b?d|(+5nbnM8kJtpmC|7nttv!M*@nz^BTc-vqbCe;=!;Zo2Q0CJi zV_4N1wsk(auTo>tK4jhVCp3c!J*_!@+TY6ts!u2NJpLzTtx$PdwAyCw>`{eS>%DNp z#8r>bBO!V=&7xJ?qpT`5zNBcJTAQg924IaP^a@iL?OZOaTGuXmgwz{d4U^?rec6dK zm5LWo5zHsbeR}8-F+Lga*Gz{oj(ka__DxWANk+hPVKf^?ujq$y#!T^wEC9QnCR^E)CLV8LwkSg<=4P{huvm@2A__$RPLrk{XX{c$wOQ@^}$qOGrVl@GEm%K zWs-f-@#ihuUpDO%W2xsZX$A#-458{uO~Nmp-L`s@uib{D`r!u$9f~a4I@H>MiW14$ zW5y#K5cEeHW3;b(fd2pQTMFkJqP^cyR6&{J`ixB+wiLCJU##mQz;Ns<2r7}l4(+wH z9mhgBHgV1-MPQ0sq!aTPc$cZ!+{3!->_7xyj~s(NK*0?c0t|1Zz6w^#yQ06=OsS(T zMg>3_W0-Ny`dw2~s!6@u()o=qD$XC5oYz{Eq$w*kxEyQ?rt{7x zMX&bR~9;?eJKN{qJL}|lTJ`!>IYTG3D_>IEX(fjl+emPjSwnc=kiu!(5d(nM1 z_vUGX^$CU(&NX#WgS+i*ZT{8xl={!4(%!geAOyQ&DF(f+ZU!&6!U$5o{@r%La&SlD zDV1i3PE~T)1Wk1*olx3l0HDpM)m3j*_Nm&w-RrTu)Xm&1uGYg<_KAHA?-DFd>Q<}D8@ZW|IQV5(p(Xh6y^X_WZr|tJ1{WY1z23($}YLMGQ zeDjZ-cEE8`%?=zjw95EHf_dfJTwo8`d{PA5Zg5wq^j13CS7^867R?2kyfQU5y=86d$$=MjKaS zztwJ^01v^WKT>=@1+8m7HL8bAfLNWujC_#LE~V^8#)Gd&O%1sdqISagAQ`<2L>Dxp zOCYAJe261Q2g;b*I@sF52J&*ZdKH90q(tT<$pQj20e}PUVgZ0QkFy}9XZF$+0OO~e z)~Ux3KH}_`o3P=FOgDXPLNhdc16!$1=G#{zO#v{&1Q7 zl^Za!sqfJwD>y@w$6AQ3?DB7azP1KBS$n3&oi}@Zc}Vf*s^%S(>f3D0x@JvV;cL1G zZw%n=wmxXd!fKhQcbg5xyZZP6wYpKvxQwdAzi^#U9eoO`4|kbL!5E0_1UHbyXmcs) zt1Q1linbtOxxI8gj7RGbAb`~cJD5vRK@by-!un>Q@gNqVNJe))nI?isECu1<39eB*F1UK}BPPR1IlQt`sH348`#rVj+*1}zjC zeIm^}BGYzoMF-(sqcaS z-+6>m?AbLNeU99NCd%!LfIV^U4105IzYlW`-lmIUKoH}Iyq*K&x`bFOJrdfO;aSdZ z(V^qJ)y-Gaw;T*=Gys-CR-lxA3nUjwb^N03u{w;UXuy5y4;@@a-KjQ??6-NrxV}dp z)04x{)=}-zv4^T{--={!bPRX0l>hzsB_}<7IK=4Kw9)DO(_WEgf3WFOWz{uX2*!|^2iGin?OXTeg)Lkk?eJ5VF}MmT(mx<^mXUyR zx>Uvp6l^Dlb@ksvk%^c<02=_~SNc&sMW9=-_1`kQLay}HluSl3EI@>qBt**U1LX`V zim}+cO#w1WdCZOFj+b;00h8pYOFwcI#r;mG3Fd(1r2R5vVnMV@TLO<-qeCWwsJek& zg9b_|kAkEXrFWXJ&q=PP(seN?&7OC*xyw&&dy{O#^Z$EEFH1}Yj_GfH*lje1*@t%&1(N(SsM{N z9*m5~U<@CRmBi-R=5BTLBpQoMe^MqGta%4{nSk>N+D`jm z=s3;nHc0iI{@m#3sw>^^j?&8Cru7D;*~Ru7y=m~j43?deR>i^9vCiwqTR2U;y;nq* zQ=_dxKG~hy21i$KRnApi{?E^0MA_ll4b8d*jXDvv;S+Xd+r}fNW_hdLnyztW-;5~n z!6A(U%yRR0K>Q!s5C;YgdiL8oHTqHWJ>O>i!FCUBIG^Gf6p>8LU~9eNQgFI zn8Up9AnQ-xcK2@5oJ<{G?ozaht_Py7yUel#ZXV@qEDERT3@iZ5SP>^vPjwEADY0^S zvel%99VGm6t+`@HPZOP7&h81;y0dtQ6m9S#*ly<8e~TuuPsHp~PuTw)+k0hFc%?jK z|L1Q7`Pp~AZ>hY#_1RI=yTO`j$2ISDYprNi)bXgS3jP|Kw>j53dYT?|&gq|Yt+oKd z{2}tjV1^a6OwM1!$iIS)MgUtmL?YsR!~b@KumvE)J(%r@(kR$URtVTNB6f)!oiAhl z5nzQf=AOqW<5gsL9cE<4`8lcUA3;(eHpS7>Nex7Of>{k$n#@!>0zuLtm{b6gOS>6w zhV?Sgms-9uZ#8{x630kBQ_N|*Z;u|DZ?!E>KYYOKYh29ce{XW6JPuuz1BGtsxDl5) zxk2<<@W4{jra5Wksn!#bCKMMu5Uly=oTlbajplA9)e?Q(DE3D5#s@64&~$Fp;+RS9 zYNcQ%HkeIKA;;U&7guBUL0c~af(PL8wdJ`^`;= zhh~+ds$|;OlX}m3m3;xbeNl_3<~MgXTh2R)=|{6n00Nc0k)?+`KgvRtBe`w%)IXwTkV7-+#xw)C9)A+ z!~*0NGBA9IY*@c<(Pv*fNQeXoeIV;PN>dj_-pFGf0_g>9ZpTmz&Q~5!rsIgL+F^d8 zY=jRmUAwom586zmqV!>uAY&kl_Jr9!`KN?$1|8b2z zqapvi1z2SckmHv$U;eQDvLgRStKB-E3|LMSqU8$2@ z>>!&N#Vi6@oJs3bm=9AUXh?uH-Li>UPJ%P-s-&wtC+PtIW+S9^6pZqnp)BT+_DL5y zFC5T>w)el^bdQuGMWgiPK!Bq`1;~6Oy%c4DV}bZ70HaCdb-6Gz&?vO51h}Oi@bDlK z71vO)!+8CBcO4DQos~w#vgabZbxLJ;$ufKQ#d90%#QyQECkd)!kw|Anw$1+6G4m^q zt_@b*9vr;0HoT+Iqdj@#=xbqc71`Zv&9RMTr$Sf#6>GOOLTHFeI2!7I;uyn(fuG!@ zBVunvDE?|T8MR;Xi9kh;a(Rq=z&22d4wNJID}752E)2x`;uujfd=-I7*2&`n>-1x~ zTKQ<_dQ_`YN@+MsTYz?++Nt+N$$gS^S?=-{QO4bIQ;KJ5>u>PFE)KROBsHOQjrMPq znm+$X^*~kro@x0VZZNP|i5tG@HBz;e!-+{~_3u5A={ER$v+A*HrdRX(b6?f^t2>;I zFAN{o+FoqB%Y|U`JY=nZYT5FJ2d0Ow^WlZo(unulFoFMU-8Xvdf69K!ACc7#p`iH+ zyN$GRQt2zF!%_D5=N0ru5DQ63umI~Sqfg231yWMXWAvUwU432!p0Mejii3l==J0cX z*;!J{PkQ12qmhR*)ghy$y?GUXHQ<7PTc(+G6txS_Ie2N~Ez?A}=LRqTx5>?Ix61n2 zRppkOzpETMme6`^cXeg3dR%vFhEW{c)uzNq8F?D)V(XAMQG23fV4%CbQ1kq;BdP`R z^H(nnH3X$b6_VrDJ!TOtL5&KE(#*K2<%l}0y&lwPjT*N2mA{-<;uHFg<9 zb$KPOy`cn1oyiZHJAeFP=kLc&(&ORCXaCz|gkCjPjgFD(7_8L_*?n{J5O8>+>QXu9 zyiWDD_!p;MPjKTZz181HUXOWL`FF!yd)jU-q_5wk* zhR0Ie-@XK5{QOvb0EVi=Vs>xh%N85OXX<;Ju@^^;9$&zUSkVyhN6OwJe^cGf&?TVU zuP+BU^zAAg-Kl$@!I@LC5L{XN9nBJ4IwgVW}))q~!iiocm$)bbSHfTN!l* z(B}d-48$ZnlWQvhOvUviR*P`E=@y)d+9z)(pPTy0S95hYJEhqie0el_d^ylE;?c{S z9nTuL!(CL2u#MnZ37Y=SaL{)d+ z>pm;v;V2_o&h2t2&3u1VpRn98gG~Zcsd*%wRF1h5NS!RXmvT4bv#FkQq%_|k>3^#a z(1{X&aRe)K)>f$kiNFzP>ORTs;UXJN00jUB0HyFYqJDEQ24!#YkcKA0-Bx1hiU}OU z3hvRXE-j?(I&d@mk=>C2{~bAxpc_LL_RF(7tR6y7z4IQjmyZY7y^`FR4w<$cFN&D* zv#AdI8riz@eBl1`0TEjhZ*11BQ4$6g8J9mh%O$%)+F=V827L5YZIaDLHo8>;L+VCifYHRShxyRG}X{ws-yR4O+BisFP zO44xvpUn^1yld?}_e$%S-RDAg56_KV$bFA@4eNATQ9NyPcJ=n0P@hBl#zQX!x9$wz zd3?oTyTy$i+4?$s2I0yr;;f=iI2P4Y9#lk__z6-o3A`2vw-E=PjvXD2HtE0t`K7(vx` zD2W0#k^u~>c@f6`R`=iLzh{Sw{}|gd5*o)F^UIs5e*KdgdG0m8S@HRlD zy(qiwbOdE-*ZX1r7danFUWHC&{B@uzFV&}? zOr6#xk*w5oGxR|rcQAg|rNIrg%(8`&w4Z78+$XN+`qY5le!l?%uIH?t^jo;UDR`U8 z_VnPCNX7#QW7aneXbT|R8D=sV0*C^O_5H=dRO{<&RS$azg~?ATd+D|9ck8blaS2Hd zzBAu6aJuH`JH->*MQiobA0KFyT^zdor`pcoix|&N&uQ?xYlY@|r!h~BmG~)=&-zS~ zpei1I{qYTNrO_ukR~1VWcWAN&FpltBrX6$b>~8l@O2HCjsl(KENER^i+_ChSR*J|+ zuA6HNsU9E6b|8W$r^{XS_4BNd_W>AT;YtH3j8N#O$!fCkxpg+qTZ|LKonCBgG_ zkBwSi+*CD*9?gw5`;k$5f5BWhe;)SgSXN-2Li z=L~JeU(g?&xDBMzw2BOEnHqUfPg*q6KW@et^HJsb=Sub7d)UQ?=pNo8D}4>tZar<9 zeRLx_Ul@p{Y8nHotZsriU%;3h?Itcsh)%;&AOng(6-X^1Knko^OVXD)wYbOT<~Et4 zD7w;{q&dRDG(SN9+q%YjwFAJ}u9f=X)pu^{KnJWyvQ!oyUEo)Z3Y~Y-FV)GuRmToG zevN`x+TrQ9BE#Kvf4HTZ=qW?Y-}7(9&>i~=>e8Pq3odnkSF4C-9w5gWa6O{pZlF+p z%K}Vw#&-W>q5k$?_Fa*A6|?g6>Y9J$TvP>6#AZter_pl>OcD>}E=%Li!suXT63cZM zm0ns4ywO479Gg?j)k1P~iB7t%sIC75^qgwSgsEZha7cKI$@inizS=umuZ7noZreG% zO&bP=>1Pd?TmYNT5@Gkq7=?oEp2uP@gxmHORI|PF{5m{$bmr5uq`kotKeB~_f!d2t zXWmEK`8|j-x0E_HXv2D?Sm$G%PgNl4^W>rELqF`M@rdjv>J)HB!K+|*3!~NFF=cc`GLz!Qqqb}O+E43(W-EDCA za3gVjKdS!qnCm+6s)@-OVZYH({++LdYgXxuTS`~y9CdOMJ>eh2_`QJ%jX^VYp2v!4 z?XrOQ$dW@gA5IL-KipBaa}O27Vm|z@N9#r4R*ybQrF;Na%FxMh8tS+lA-$mwpwgY- z5OzE>sr+AH>smm}SZ@qq=Dv&7kdyA!<&D?$J#}c7qluhL!zl*q%Lzhb)9$>zUdbxT z`FrGyYGJCy9%`t1ZT>xR{k81fA>tDV+5^nAh8N@mtI&jg% z^jvo=5ygkm&tJ=wp1_&jk!Y36?if~67fSo3+xjO1PXjtX@1B{XH{SLd{{|^A_EtKC zgju|GUH93ujVEr$yg8vuod5Qt;YUO&uw=aGeP?5?uJ`WYZUPG*tS$rb1cT>)Y8=Pi z_NC{gS4rH~S(P`0v39gJs9Y8JqAxTWqfbhctqORg@J8Ev4h=sV!-Ed&y5ZY)>+8Xs zQx0i1fCiF0ZF9}8*6W?&hL3&6VzzEDDz2OgdC67)tRXOSbF^5wZupm>RbGa-rf6`- z>VLL*S4v0n6IF^p9F?ks@s;#Y1Nor+G>o@hZg#3NXVss2=g7jmU8h6dqiy{M z+4J1RtvN2rHuJ@3O!D1q7a>eq5BJ@)8qz#q{ z02qk}+M-#c%MT`Pn?0>$g*2xULBIg^@YDn~8vBCYTimy^++KRr-}6jimh4CN30kZT zGnnuXzSUZ+m2-%n(|^gUHSh7;#eVlx%`%Mgn)Q-XmT_X`pbyT5Es6hm$Lh}cD}Z7J zJG?p%U7dIQlcBNiFtKd-xL83#1W<7^A)8OIX5tW-ay@{yLJoBWgflWign;zSiFQj6 zkt~aYnGws+<5I6tUNuuXnOm1fl}UMgqMj53$oAG!EL08RB|~?7StZ}=VSd`xc}WZ+&{IsON8bq3{{m0Vm`TA0JSO+yqUxl1x`

R_t00o2l%>kj`T3OHaG&4grsFlsufOjGKqyZMKAex+(T?>Oq%f!QSyk#O9c2!#=r^?ZmV7QhP_rzpyA71dy-Es z8z!bhNU9n^BGZETJ3{z75`nF;`-nj@ln3mxvyW0Kqqv8t-ImdA!?c%jai|ny#YK-A z@3=LFJBGurmtbvSLZX}i3kZcE?Ko&(Ql)cwpAF}EylQ@M#CN`zC|-A&iTf6S+r`Iq zt;2Zdn~QMMdshT&NsA1rX2LJk`6`)5V;dh0}O567YN29Ip$Gawzw zoR`XuT^uG|0?FMnTE7YXjhsHCpbQls3|V)OCO}u%dV}Xsh@+b{OiYqz(upTd0;Jax zS|?1Y1X%Gklvh&iWVf>&LSmXwScc>sf%S!%_2=V>r?_aOvNS_NDO9Dqr_i1X>oTO2 z*9!V;0r5r=Hjax~-=cG2-%iyAz#jv6#$92|Y(((^4?&ST9~U8jvIULVh~ma2W~t{E z^YQ93fT_UN2*{O!<;v2I$_;~+uLg+@FDoHVRm?Eq$~fLxk`~{NKFH1UVd7bN zvCVoCqB~0jDT}ituVH|uz|_E(_q@6}@$2gAUzJ(RCdd6%S36F7WVU$+I;{WR^!k3C z-}1}QjX%@$to;6|sMhUR^Ho9ns;HsfI@SwO=b4Ndp~0Sp_CLQUy|U&IH~(;74ih%& zDff>Pqqoj%=o&$bne=fkxx_Sb{G3bD1tMbPe)Om>Q;?E{#4<24SwUKJmYBye9SJ5D zLbTT~tx`}{KqjU)*Uf|Ud;$4^T3McqU?p7jVJSW;6li~WI~-%??{qT^$aUxAlfVL- z!JB(GkOuiAAu^#B;V(18*Gs$dz2&%#sE>>`X(NduEmAFJeBQ^bB+H z7En!}5GQK1FLJ^%m-5oC1Qm>gIUh^k`->8IpVTI&=YM9jW7kj1{#u?y^!fv5{DFk{ zbB0^)axBd<-xg}Bz-VJR;iHt+AruyUid*9z7b4i6$0er&q^1MX?u*2~HM-oXXlpF~ zzvQk?Vd2LNVv>w9uAqMt77i3$p0nQC(P{H~gL}?m%fCRz6x+f1Z99!rd}e{Xv>uKXH5_?!KTxIOeiUFcQ5+32eQ_b95F~Msu}7a-DGym%Ng^-H8aD5QxN^5 zgpS~_ImwzrT3}n-2?!}t%5CA{(OL8aJ7a>{-rNs!_Wk`YAu8;^AOccq@g zll%#S(dwocxMbD;kZA(5K`?OQ;y_6kIzc`#aJvoUS&eW9uMgBgtD|pN1*JWR?uu%^ z@R`!uOzP!R^EDXcD-U*DSF7u4?*qxfZaBK0zaeUE#Kob(#^Xjpv?`M@2Gd7}$FA0lmofT>#9`pV$BBf?cuw(A!6kg@q7Vx zg<_?eSf@SHCWC#GAY6q8-tFSPYb&M1C%nVijaVNf7c<{oW=^MZX?eown2XbnC9k}5 zmP>BG#uxmN;Jm+_JTMV(Cn^2j%w_7}Uh|Xx9H(UkR!>H}cP#AQS>p9kPM6E+FNKs6 zklZGubPFi0LP{U}ZRsl=bMf29Y`m2RfPoAn7r0c++mUKR=@o@46I(5zkHIf;2%ny1 z&R#t*-{w_6`b#{f{~!;fwY9;YKLESRk6sB_VE`1+&d z-h2-~3Em6FmI)qYwLe$NzHbvq%H_)M@LNUpI1PDMwSsU~LCE>@cjv=bU8hT)e4XCF z#r7PO-97zfgDoLmGSmNMAVV=Q0g{zBtyQ%Gib#1VMu=N@#3`5VQu^FFawviv}|1HeCRkU6s z?)G!;@9S0@ZmfOvQUAYNHPEMT&9u~JwTWh0{C2|LFKk&%PRZ>l`Kc>g)+XpF?nQBX zDnN3doKh=kJoTHHCZqfb5C2xe@GyMgTMWItj?Ti&x=TG2v@d_sKdMwn8NnEt}ny(Bv}uE`@Eptuid<7xt6# zLnn*FITk8M*%eM@FnaT1{j3f@DKe4e89CZ?;#;dn=095|TAd^cKcWIgp|J#}nm_=+ zqH6-0bT^{!05q>b&HAYam+HLRGCO|X^{TGg_Vxjz*iwyJxjF{1yCaGIXHzphrjLWRJ?h3y1f= z5^w8mJ-Dmw(5s%+Ms5E`(Rs!txxat>W-mn)2hMQg3}?>58JZfdau%A~GBR^z1&D|n zH_mXTrX4ghN5h@Xqh*HX=}=i&S=l%z>y*Dg{|7wb(fxuK_m}&+KA-nHXZ7UzpSOL> z{QM$^PrSI)Y*}d(c)_=k4l^;pMNNEZwCrdZaA5)kcx^qwc_C}&Kj_BOCpSND|CZCT zH_{KJTjCU}lhpJtZ13m)gjV>&?PDf|SQn@~mE_LhyCIE|y5V_h9knN(X(MXSy|8#N z>1;|MvP844?|ZL2@Wz;lq(M~xyjF2nstQ6KCqx%&7Yj1^t0n|9zBrmMV~uOC@>DM0 zxO1CS>9)^qEupQ%Ze_E#L_0kp*V=j^;@Ewg>o|6i{UjfP@em276iibtyH_~tw?Jzn z&OXgkbH&Jh99|m@HE(>YR`PM-?X;?)X2!MU&Km2%c}}IDojopzymDD|6i-R7}rx{qo6sTIuNBAwK?woEJqqWTm zxJe^pyDD+^hhbwx&kMM^7xwuS8;+xmul57T@!o+so%g*`-WLGyUW7%X^z0U{Dn;?$ zekVlFLj;wkrPL#ZvX(%5mG}1Wm#Djgi@jF1)4jv@tRDH4+*k6JUdxk14G`Ns8#GSY zO3KT?T@;vRn%#4S-<=J0Dv^Fop0`D zFz3;?+#>_p;kAu2x0#Pc*pPkC)RKe7O*P-ZC@g?PcDZz=sEQe>i=2>C=Jyfog`Zt~8ng^`kSoW;7@D!R5Q8^^&`Qn&m72|ktLCCA0 zOz5>}W^&(;tH)d6P$B5QBjd1N%+i$gR*G@Uyk9~qJ_eheEc==+_6ztj1p^Aekr$4fGBSL==l)If*TSjqg=l zR{dr2s<5cogfhTVgz$EnJXOhRn*KQtw-4H|ibng^S$nFL_RSu-x&0M97XxY7Ehv^N zoUi<<*Wa-2udBjc;o21EYIvMF;vk-Q@I7_u@&@ohMt74be}@nMbTk@YM}cKtJK3W{ zEjf!9P;!uB8!%iddwRYD0_6d4DqU`Qebzb}jCL<}sm`K6eRVz;Q0X{@EPAfGm{{i_ z6&|s1(971tSL0+SYh|-*%&&be%aDN8#{k8hss2bc98d+PX)QHihyRyDDCla8&t{b~D0+v$R5`MbbujgcMj-Ne8tri22JWW}_WRc==U2 zZMCD*`a;xwa87=18OQ}W33(>&?c4Y^)Bu~BLhu9!A_>#}F!8{@@! z2+UT((uui*e%8|6-6Mj^Ne5ui5G9DQJ>WfX=WK85N-1|#7mftF<@glM2manoI9HUe zkgf0q(3WvVDzG!%(5sulyT&JKoab_hu9?~`F=RP!5?gUr((AVVLfwBN@1LRal1M5W z4iPSdrDrOueVjLqTW;BZmwgi&!9*P)L2k_b@H4Y6Xxt~BR4(NkZ&Amas~Ive23B=; z)7YXW`A4-}qS}Imz+)G;&baBu(mF3V+k9$XsPv?MNZ9LMzG$Cw)bD$? zXX!CI1gE(Uz??)Zf!cHAOQ*^0*j*IX_aLH9Z#@Mx`d?rxmYMWYDJcsv{|HW*PT>;jf+7zX%v^&eC6>jX&XBQS*hN<$=US_l$&s$_;)9f-DUK=c65B!akU(?#?iHZ zM0Lj=xKcCg>o-+`IeNJJ?QI#wD0DapqA)=7oBPQt5Gzp*>Ww43-OJ^Rc=x78p|qW+ z;*f1#>n@Mi!fby4LpLu65+>>SFYcpmizYsNssBD}r+g=SdAN;pY*u5#`kneC3qPGX zAyk1dfz+F4DHM~z7Wju9PxL7uJxQcpMs4sW>(53APTuJ&^-tGWrb<^iHfd`O=S9xb zTl=p-w(}p?b6p+k+w~JNRI*zw;fp>i4_v56a*@-eO&X%5?P7+m@kzZPa|WounaTV^ z{Xt9VyiUkuN~Xxh33!Z@fb>mS{nZ`45knN1tVjSt(35tgwiJ6?ACbR%0IIuXv|_>H z`dmG7%yr+%V^8XZP`h!KT(pVP>8&sS?Psghy-DuMAXflzX^Ad#9O1-#jyXK% zLbx*rlU8cfo{z(gVp4hf8`9J>!A%eQyZ{@SP&eiKZsgbE|xJq;t zE_nuZZEK^2^8t^O=ZT&pR`y1p8Yf z$-AgonnyQZT#p{-YHw{WxhclJNlA7;cN2|paeDD6Pcr{tkCYXl)q10oI*EkmAP-q@ z?b!PrJXZ;DI?fTnFUlxCbQz$pdnwX|b49A!w@VEr-aD$^-W%0Q?twr{D*RkZ60tKX#j>MKoWb=%#mG@UNE<;SI zZM#Wzl@op5o0WgjcNv|&`ee$<^hlNIUDYa`>-Tl@Q@7e0Z(sA6sCzGI+aZ&mhRlDz zIl-XkpP=WzCXPhVP*UqqAQ>t}W?q+ulvo^3I#lGa+XKe|JtewJ2QegQ3jmFzK#4ez zP%PXiDrE5B0h^Un!BmV9XRnB}Hw5a2gYMge?D^%kXA6>_sCj1?VlT)w9fjC3b3I}p z9yt&@Cd5M#Y?}#5d;b%qm=(pE8}w3b~ZJYw!BGlJPc^Bc&%8r7$$E%_iBP{nivnBNQ4lWNPnjOPxV3HlcH72;;1f#qr~n zU6%Ebsc_2`@3uX(=G8BiW%asIGFV3k#D@LscFeB{owa7Fe7?rCi{{ZP)iv*T63@hI zo|mp+^&(24V3?a2Y+(m+T4$@%!4v|zF9~W(W@Y^9F${sq8H?f*p;0<}T?o0^(#iB< z&AAXnT7uLQn8$S*3P_*^FsXBC)S_N$N#k71+}zb7cs$_6SM)kL3_nS@z3^+UItU&g z!j0W>hqzM&LF09ex%QjI3uOG_R{y~WR>-PXXuC(jPg{oLHS7A4@&){BuZcIISNW~n zWE|Gf(vvV)M?8wHHqL){udcVfba!iMqP6+U7j>Va6dOnC7!t1Z?}H^V*YYw;2FJ1@ zp+_f#p6flo(S3IF9o|CaQ|{Z)Ms+BK0NtGcM$_2CY3QT#H5ps^>3ldn2>!OthgW>B zsCe$qG&SZT^*)CiOY*AxHMhW#Uj2PJeXqygrfWF$=LP>nu>S#Okx`soIYurdydSS zI(F+-1+2b=#Yc^p3wO4Q`0nC{Jp$pLE{-QH@b{YiXjF~M)_@)ZtVsmqX>3``yhF(R zb6be4Ot)%|mUreot|=@)N&_b3A)9D-uid-44_*?%Esfw}VGBvf1->-`7YRQpT&xK4 zeH`Np@fm_{H;$=Q`Ubi8X^q{#`Cjmt2U1}MtFHS7uUdrZfwciuzP48DV3*@UrQtri zykfXp3p|;5=k80FFFv!e5wp)9PQ0fW{js4NcLtkuW%jt@{x|DzmO3nQ9=`Aueq5Xf zHbUcm4tkDHg2tpopo2|Mak4rb>2F?B$aW%eZnSU}w5jQITA~mdFN7wN7F|j>ZXFyd z&w^Jx=Pj7y9uwbr}$ijm0vG<<@EqDklJQxDy>ihH5<@sS> zV$tJ}joTANT^cjNa;BCq+VKWcW?l)+N-~Pk$0k;aL?+4D`1_vCExRtJRkim$-2Fm# zJMPt;Bz@QJ8F=7RSQWtz=FPW|E!;Z#D88GIIiWoG5Wep@{DiLwO$^)ZVUW(_4i(+I z)y|3F&%5=7bDC)0UC`pHIdp1JER|bmc~;o471bm} zjrP}|o{u03_}SNX4!(G{`$OQ#kgE~=@R7Wy9=?Bj#2)6pdV1rGxS%?upilg>W3li< z5cyC<9CF4JM-M1og`_V8MTC;WN@}ZFAu}LVBKScco%b^$OeXRUGY8nAq8S$x+!R<{LOf>xVk88Mn%3j=GAzqN)YnP!Fl6qiL+C zVWdy6A*osKl-JPIRns(3Q+LqS)G^R9Fxp|EMby>OHr=6brKhWFqOWUeq_3xIYHVO? zW@2JMGO#k$vobT;Wn^e&Vdh|EW^ZGtr*C0uVns5twKli1wz6`vv2=2;6i1Q9LAIMH$lyNL3kg9aTPi?Q8 zNr)%O!^OtS)z-tsX`j2JpO@1BVK&)wC>+uhq^Ux0^OppR>ik9X)kukb*h zkRYlTH8|KeIQ&46uYXWnh*wN_P<&VjG20MPlhuRybp%N`5Z`2 zh>Qt~$qI`~Opc9BOODA%jL$iFG&?OmJ~89;sq~X6>5)+Z`EjZRq2{~GF{moJ`Z?LBy}C-B~-R9t zN~=5S3NF;wbXS#KY%3b-@JJC^6A>w^;hrSy#M$8*WbUlxBvbA-`7u@pEn=gnBpS0flnLqnHPi; zxl!1MUhA(OOghT$$o{O***a6CGQoMP0GA=`j2!1|Sbov<;z!b2^==Pz+%*rUPp-ON zHn?wSdsWijoflVWw&!$;{L0ri`-Rk8o%>#u1-=IESEx4LN?k!0s7($XEA8)<4c$7< zMjEG)#D)piwXUcJ_lWx=MyHz|#g_gxmO47OeAmNI-rNJMqU}1(<4^55D&{>0!wS)g zden3>Lo(qUMNW>(G>jrR;N)yMJPR3*Vl|T$uaTc^v*qf6=|6OyVvbc@J$^Aq)yhDn zDfQG+j)I1fSd^0gMPr4tud+GpiqrW~50fufQ!Xb*flpn0@-d}vJNZ?85Jtt!uekKX zTle+K%MVXndX@O>+VOvP6`X7_x%F||{`6;9KAr)wYSrUSsxOo&%0{0gveS;J&s`5d&cDRwP`=~)2p1EKRcdOr#-e9Qwmu>GtvQTMA%ZQZyXAN z$yOn_H6`Wqn&oQHFt!@e9kC4Y9{G)Ks>RsI87aA3X5MKztI=H8YSGCzL7-~D$d>Ub4n zdaxkv=|uZki_9&;d50TYmba&a{9-LSp6#EQys1b#_;YcC_UTOUz`?g(w_d%D`Lg5B z0bMOUBd^pjd;FXBR?dc^3+C{;vPv_fs&gih>N=@B{Bhv?_P1Al=lWM3i?2Y`7bTG- zUhxw>ob8{I_ebNcs2yqp+G^VuvzIVaD$~FXcHer+wl`!-nM1Ivb@xfUy3Cf$s^E_; zui1r-mw(6}82%nQ6Tg&L7|hq#3_bPHe)HvnKYVO^KbR&u95hlM&@&u4T;83~GKLOP zBPk`zzjfZPQ0m9CuKMbZ%b-=}sz6cszz6-vT$Im}A3+U=y*eHF z`{aBc2&!m!i`00N)x?NX!;hMsj`^kcoGf~H6xCt(DuMd?Ihgp{4RK@b_D&Y)4N}G0 z39o#|;%k5+MzdP`_3e4>NAI7=7=m|Q4sOqG+)R$?-cL-JZjR_?+MPBAE5@d&GUEH}VVWOPx}#oEAe(^>A%Be+HKiyoDBX z<4Z59t370QE*#ZArtAII3;JO?&{1}v0c6a!gXwZ~G~y5iE>Y-L`l^^%UK8-OILHyN z)^liQ;1fsjzqU3F^}PKB7V=ZZH<`P?MPb+%kKPWg_N;X3`}52{aH?2XwE3 zaICG#{q%soo!<=ke{Enie2V7W)of^cN!wAMwRG#|tm#eQ+fxLsyrFJC@1C2Fs|>oI z!2FK6%Ve2cDw%e;^V$-Y==5-DrtxB9-Qt~eMvI(()e(d1HRa3(+2g7O;|DL~n3~_b zZeTk?>#(m-X;Du4ryNpeq`pR!aSJZ*zNeDHuaK%D64iSIQezxQpY=j=28dx*HC zcDlMsjNUgCGq9w2HRjR>^}8pMQZ;H?;=@2bfz>q{hbCzS314EtSsglm8}E9Lq9dW& z*#1`(Mn2XT$HsR?#A*fAAsr>G?fMLUF4*KXTzA+7_1X!dA)$;@hkKPoxIC>!2~>O7 z1sSo)k|&HpqlP_ffzPn$p~rrwRBPL3f`xKJLu7Y4U%2{sF3o}7RF@d!c%K{HQ$Kpl zuzt?@rLsbPmiq|#hq15Vu5O@L#;q0bYH3!h&xS#4OGIMg>Gy7j3{{@)^D(#=SAFbt z%=tQUk``D#lMFS0PAWbWdJY|HK{$Q_9&m4f8ux6nkW3<2X$ycE1FW=zfTg?;fKtex zAdUY4IufajgP&(1crFu%%vA23hv2Iu;B_tl#WDa>@eB1lq#Es)m5sw08ul=0{*!_i zVe04>XD8j~+BOMS2c^w+pT@zZH5mOHjR24Zp80~B#J5#=5v-!D>FNp{%feVT%{D)D-T$M+j7Rw14NJRjw zO9X3ox?ociay7&x7=cP=BO4>1V@b?s7zuzF90SYq0jL46HQ+}4gteo**vEwEM@-1n zi>FN^TrdZH&ML-@gK}6^ztR2 z<12_=61BiL`Y#l$b{_ti_9N(RUDAND{Kzd?4do%xudBN+Cz{L3sdT4(i{I0L2;9L~ zY}+=6YDwtukzIrEcjoC3#qp1)4ac9!2{ZmZ|NHluKOO&;JGXUxFB6(6g4qwX$fgWO zst?V>nl5C1dy|2k2fkfRF&CwLIsj6b&yb;=lc$4p<{_bcWIY)p%~mzevl=N`gA|OA z0k1y@j>-qg9>o03N4n%@UkN-R|F6rNiW50Hu6g~`ktc~p);rAx5s5E@y8cZiL`e~i z^43Fchsp953-Z21IB&+`=VQwBAT%~EE)jk#VJI$z3vuM1`k0#VDjkTv%80v~0h644 zdke_U&wQ7kne(0d=^e10WeD zQL!B|&z}20G7!l3D6ZXdHm9m#_rmsD{mg^U|e#7b=ti)+d9cBI+<2M#4u@ zSg}`f&9?h@3~l+sABN>>Lp?=_HWY}t2wWzwdZrc-rfU->wFUAy8sG?XTQ!QjwKo$M zO*s%vDe))5L+PQ0!c+QmumKaIM=p)}eyYHZn;oByrAV#Y02BaV08nr>qu>o7Bgu&0 zn;(~v`Mv9`3^l8+F{oj}DO}a9qca-Rmh?D~ zv5U@dWF{?1sNZPQr^90 z=I;eG$19YX&=@k>#=f$Rj3H;i4+c4fh- zM6fa)B)2KGN}u1@RhE8``{g_L)H+vAl5r+fSlnH@VV7T0z|ZRj(zb-3_wp+W1kw|~ z%)SD)2`G+3_~Yr1HtTY?&K=(+`#^|LVw^=YGVx?poRlv3FB3Oj3MGQ@bijbzU=dn* znU8U-$B;)FVudKX0#pO8(U5lDSyGtKhlMOD4T&4IIEG=gL?r_PYO;u}Lz2@eo)T35 zRiOF-n)9*B*1y_Kz1+(g2iB9shmtFu-&R&XsjNnzWBE{JGH~AD+<#fMO#6y&`SrT0 zT?MLj+hxmq{7Od2?F{TnMiH8YRuz=1c9&$eoSy}-+2?g7hzFbJbn3B$<+|ffWHOm4 zpKGge#7r!*O_SUvjh(6tqwMU_Z9>f3dF)jRJZ2Migb$0Oz-{X)JytLY;~)(o^a@>; zNAYri38@#dd(Nl# zD6$2=2nn@}2(SE;A$$cj{{PlR{~Io?HUb33$a$gc6Eg2FQGaDa|FL)km?lM- zw2F;ENL~gzKJ$zKCd3sAhcO%D*wakdAqrtP0`9$8;aG&)odwgu!EV`OyvZ)R`gde6 zGzRDfDcT1471><9WrOW<{raM*>_yYV$}16$I`xY1lM-0%b}YPcv+{`J(21X&Pw!qI zB>w@Q2T6zC@49&ZB0&cEOx+*Mj`|BfPy&TJLOyD^Y*~>1R+w$iN22(gi^E`}GH6{Z zKm*C)Af338N0ROp+K6;Zq3S4Gu?b*=H@>9SeLl^^5s#nAY!m$)82o>ieDkSqd;H2_ zR0e#IUnu~pe!ece!i05SfJRHTb;*Vq0cJN(Mis^o_VNk1$qxL$>3;@_=}D>&@y;^Z zHDW7xCIt~fhs8@S4Q^g)pj6&O7iuRA1<*mKN6Vn&rJTEf0u%U%0e#tGl0+kNM34z3 z6OzQ-exGrgbhFU0h1($F6?DPW`P<3E+gTd?ocDkg4fRsgFT48xkxwygHwa{K=-Rf-Xn2QkpeS2^L4jn5?3z3}IeGw1kEk8_Ra^H{4%f zIAGXBNGB}*Bw9-dw=XtfMSVZnwt^tHCi&}-%Wks$h-2?6KmM$2B4ZlK7$F%Q&xDhg zFQxXN(zCh$-4UG45Y)d1lyHy@10es&tO*SQ78a_JrFxQ97)p?DR0rP3;D#6QR)BLp z{LQ&rEEFG5Wx;S)^G)WGGSA9W5S0>{MW(Ep$wJ}1xs~y`M*!;O7En}9QltpEh9L7z zuo4|~Y=sps*+$FVf;n{XKYMu$K6E`$y4D zpKudS_@51bo&Vfd*Xq{TdNCS6<_ae_9As_^F*hWbG3L`Tw9AeA7Y8>nt79mtXz5dJ z*>_dY_im614sw!+%qD~7hya!dIkkx_BSTci;hkjJ`$PnQ22S+W7OS$-f0d#0firt0 z81-{%yKkM%8OCO1DiI}7@c`*8$Qg&cI=`%7wxIHt>>?SZ#{gN1pyl&2_vaB#WP}lKW;M$^My;B8r#7>GdK9y4G~s~*6}BM zFAu;%Y4B0v-&OH&9WwB}q&sRCq`DVmFYLZgL8y>{+CIc1K$gt_Mb67SBFlD4SIwcd z+3~gMBk#^G^KnAp(@-s%aXzDKB>w`SqPG0<=mT3}oF#*S{0_jGAO||4eSTTqNw!j2 zxDS9s^+|T%Knlm<1_2glkQ6sCcu+r(zF%n@|hQNBfEJ10u|z1Z9>3 z_hKR={Csq$viGAq{`AddPn_5--PC~ZEqm`=mQ`!Ec#~clt@s%h;tUTV!=rKNGuxFn zMc5DX*#GgLh61}UYs2>wVO_+(|B#^T8Qbm5tn`{~s3jlbBLZ1&LOEpY8h}y*2Gk{p zmt@%vBII1iyJ{SWm<;^v`+M3vT}7Bty_{krR)k?v^4l-V~CBXlPkiCL~-Bs~;T%mG7KR3Vqt|~nI#@mEE8H%$Ts#CIFBdLd5@A&S#=a#lg$i}8Tps!JFKr}cc`-p%Qn9TB zAc!EHe8fo{DoY5-+WwT@NqY$wfwIQaJEW4&P3V0F+6@PMKL~27g-ATXw){9N^1!>s zI!hchLb?N{1-a1P+tU#41U%PUzH(x|zFfh2FI7fXNycfI z8C9U^Qp|NCPPoymQdc??A>AJqD-1e2{|U8d83!Q|Ix+T_y#g;pUJ|V?w$6Cr4ea~Z z*1qT!svf>XKG7QcnnOw&PBDJ_#T`>?AGsFtO2^QoRwuPE2exm`L|)gmbVKC-5-zJq zG8XcQ6E5J9jE`%}x!)f&lhY@~u$)^(^ogiz1lx^Qq8Fx2*9A4(d(|dkAC|7%t=2CR zm_?4}Y~LR6C>B^;%JzB|Gt15e!?chjDaUY-+6gt2x9)psrU{o3iF8xyjWiu(+nvrmE2P*2J$L0D&a@ zgW^^~eKF12NT`o9oyg^B#0*PVGHTvr9nGH!aT>=Q9>Uf6UG_7wl|HvA?CLNFpvcPl zc?>=*t{-e@Fg#~;K__)38lxX!gs$|ByZr*{)_%eOqaVG-?jhzi9=X_CC`mHypElly zynHsqgsZetc=68gzdubxj@vH={xH8Ple>>RDRboBPR~c2nnMO{N1M_fE=(8h>;!MS z6_cjNql--E#GKkkwk}g_NLZ1O35yZsHYQTDS_=Z_{1@v;GQ;#-$KBEU5stV*62jT9 zK(8XavHZ273-v@?eR6%`P8Y488vQEVT79y>h$!vZF(?26M`zmb zHUyqm`3BQ*4T%-zFju4+S+6` zT9bh3$pV!t#AsvX>bXgD80UFA{$QN_bB2R8EZ zQECyFu;R)Bqt&QFmQ)q_bFt61Nw>U zv1@0C>a^iv;$O*~EwiqFK`khVzeM)2xWr2fo(hn#t@~qAR2#@xuUx5PG^B-nEF^ENKvb$gnE>Y zTip6{6{modW_NTKGuc>!F4@2c3Zfc`vI!Dk`PMI$7YTdqe~AFCOqxHR?>;;vkCKTX z?kY@z+jeTpnky24UTw+%fdU}u?s5;9L9cgz={?DZs5=?Mh|T2kUJ%cfX#WJ!T`r^S zH0}l!*27w*u$-7l_nUUPaVe>lTI*1B$ea|=@2|L@0aiSK10fpoi%3o^O!(%(a!!!N zrEzVqC?htG$AC&vya-!fK)5m864l#V`C5JI#~0!5v0t%1k!bt_zbwL!k`|fEe3=G^ z6Kz&UTN1(mx2D4MPM6|ImE`O^RrhUj7kGXz

=(TZn^xPH@vSKI&;Ll)oD1DqM!)?DHhra z+Wq`h%e|%lqeC>RE4d`m(Q5CJ|X3 zmo{Qs)-Z=REIS>UQTyoiby1_V&MM4zdHUVEtK{;M<7J~ePFh<~8lxOC@T`K#%TSq&{QI<`PCt#--#7E>TRpN)Q%rc4;|;6pov69t(mreKA97RFRt^+Gb-x>3GF4$OwJkSaFr zS3|bZDa7NN<xP;<#&GJ=ond=gfZ4sEa6@)4yYS~_K7O*CFXyY zHs(?Mq#y|1;3og}&O^S%2QjO86Z|N}Bl;H$9?-6uHIlTjXPR|Ykj8P0kz8<v3H@ZErT;!*Oykk<;2-Qe7iS&b zMH!PXwAqBUjrJ+zTGA>nH55m!!w%2Ge2KY^w;`5W(kL|dFyEv@#6%>>E`1pvZW~jg zbI=7G44LU>fl4BBWa!Fe3{w{X6(|NEbmdeLEd3FSP{hW}=U(;)uQIihNFZr#)u5;^ zk=%!%01uOyM_S}S^MD~AdbG&wp3$})X8C%g==#&IFmWO2DFz~F3?aqWcNyD$} z?Zw7S1q$1goa-~(WHk(4wTJBAgsP8#t>-uv6gKok0dfm)9?lCB+AE_9`*2$346q@g zqG;K=AIGM$|Inmi)Ntp~BpWpE@-Me5rsZ`v2{Kc>(EyoTd%nX?5hM%(>AqZKKF78I z@|p|urLGyNGuTpmqu0?x9>oC)H(aIewg}AkMwg!p(bUp-NT;=w_&d)*q4@ zM5fbcS_^-hdHv4@+_80d5;gx2Fm;Hoos`*m7{?9a&c(jS&FfF^I*YvWqqdw)Xx7^@ zb0EiI*rSC!D-l=@$HI!3`NK@Sh+RF(Hp$F$p60wCt9`DfXccWHl{-6k<%N%f`{`^Y zuW4^9y+7#gZ#nH=?d{&)7-0?uO}P{>R%GTidhw3&&L4YjZek8YAcxNk#g7EVuU*;4RA-};CZPhk7!ywvKR1pAl_fFJ3}CKJ+D2f>OAK(9 z%w%;$pcGL`Wc5aIH4=y}#QZqsJrEJ(IxnjfpcKz%fsvd<5scL3yU^1K7Tg0%*)}-# z2dbrb`4%XI`N^(2Zg^n%9a4PPj9djfOjJ1}b`2J4A}4bLx3=%CV7PRCp3G&S77vmi z_x0iDJsrFLU#QUK1gO(N6-i=|sebhhh3i@F&1wowi-HeEJ^K{uXvsy2w4M$&^E#z>OI7l)R!sL4ogPw?KUURIEIHu|_#7mSHz=XPPL9q8& z&Y56Qj-^u<1b#;L@i&MU32HbFlI1batlq+&22_DuDwV69*YK#HJ1@h1=0@aI&4GBb zh9X$%=j&#ixy&|mZvOA9Lh!2bO#tFBr~mEf{;)QPgP2`&nQ1C4?q>(V8jF1s z%za`te$vc_kCa5G-X11G!$}-@67xR-;K=7VWXh@%IeP^lT{PI4#D>sI4D^Ei$K@CS)9LJ)?{u~cS)J;^W` zc2ydsDKc{|1jn7T^YwVd{4U_0`295JcL2{l7lH&lPQ!TD9v;kug@~WfC9oPhRWuW* z?FB5l^1{|xm&EqlX1wNg>-LRMv9R7g zbNONo*G#aw7~6OTsFmz37D0|TnVtOn^l4?@PFXh$lBp@ZvnJG76&;Kc=Xw1O2t{&> z#_XcD0!*lE*o%Ou1Vj{V@I{zgNx-9q6?>g=u)ELBy+M(}`bWA63fjZ%8!;?JqAGfB zJFs#D5&)L55E(zaukSV!E%_ie7v|cpM?1{1R7uhyGaxTwTbFknBwgg+m6s6%%g@R6 zn)3!dK0g5_gy4w`aqW3USrQpG1_Lm9ADC`tAYGI&CT~RnIZ(j0dHLK_7?j7$`4tK*suP% z@nepJTMhb2v#r&jxySX5Fqj{Qm)&*)-~_2yWF0oYc(rJ(%D>M~-?JY&;k^_#(!_Ei z^o6RGhQ_doK}=v@>46xyL|KN(4kXO6 zVp>krIma_O&IGV}7t^kGViaAc904+ zx#c9r%;-(eD5OjY-`PGIY`u8>-{J%9Hclj{`5Z`t2vQ({CZADVy6>;#l$11wvP#EL z>k)T{5`KlO4_6+(b-T;B4j2_C*f-08`a)x@KCn7NJ<``ynVcdhKTU{JMi(5TND>+F zK)WeT($%qd&(Nf*8YY0VW6G9p!>~A(dS+{Aci;5mCq{eLitc_qQ@t9(%p>e()l{POSSs+3nvCDHysGoiA4S z&l9B5#dO*gKhoUOv;9R3;2DQ6C;#Kq{roy)k06(9enWwhTGPdxyQI*obZnpDywCS< z!$1Zo-VIg-iYEp zxXmK)n5(5tpdkS3$F`uu55Gm8;-%jH?LY|_JgpVz3YUN13BD7#O3#GJ^MFsU0(Vk4 z5nJpx2@kCHA2*~z%*EN?xu3ki|6Sf{-uHzy7>99s(mqb!f6;RPf%nFzkKX~Wem|`1 zF(%Gc9S2>EeOueGve=mkB_!OrR3CFUja5?=!&+a?8o;j%0CtESM#xW|KS5S|68SpM zbT|qunKS)s?!wT=k}6+o@soFl`l{=IoT{H*)qaan)^i%xq#Khr9Mm zP&NlgBEZ_nDhbnt1wotfa8Wu1D6E1RDTpc@halt>s1gz%GzW)j6#qgFFmu5~O=~;k za~7blA{o%EF*BWo@7t7ido};{DB*HbQl2m5DKrA>B8yCS2pk&JbGesrr`Os z-$RDci>(v#I5nc45V^4 zs>OpI&X|DV^G{50m5Vjuh!_taPpI_yzJE&2c5yybrg=m%i6&*f?a<5+rsu01N|WcR zTw+BT9J}`R-`6SEPkj!@AAje6TgLt#*-b_%h&Yi7!;#lLervX$_xu`rVLW{^s`fkx ztL=YCwVnw^8w-!N-d26@Zw+|kh0JlZNjry6Evev0DH@3fnIJh+`nUqe+N#9Y@om+k z+)-Pxuiq1@AkeBjq&$jzoq1B$e|Kn`?7_({yJM!RShsRln};@L172wW8>+|&2SLv~ zX2bBl$aDcdOm8+t%};S5B*xv&OI3dFdU%9v;{t9f{!qnh{q5vKLeITPhr})6b(oj% z@P@&ek(tIDpNI0a^u**`nKM7Tk3he?`e*&r6Hrf2y7&r|1u9Qva#Z6J25uWPFKCxc;WMh*lCWKi%XJyPIF0& zEiq3i_M!Y%*@g5llgUyiW*{<{?Ag-%F^1 z>3n(4ifWq29`{F|ESu%kAT8+j%gE6E2s?!D-*t)#F21Ez2#| z0pGW63M+ttXuT;7cGbgTk!+l1(5@xxt&5j#ARqZT-Uz|pe6m7=piaX#3!@Uc!pQaN zP&MKBh%C)Gu6)Mi5+C|G2eVU=SgVX{vW%t*o`S+O}Hv$M5yx4;UC2W}fHsdB4y0g`Mze7_DF=!i95bOj3?y?*`+X zncs0~0I7!NASqj~~le4SFS`PQT9nv#YW9{v}7_u411JJyO-Vz!(Rkd%9e#CBS&)Poin8f+kpGkPbt|gVMe+<@Iw5TJxYM-R&_M+ERueM5qbScsF z=SB~rt=UE|@58RytmJE8(_x34D|?onirS?JG73ZG>~w_Cw;c!=uVXv!DndRkGVjZ- z;*{8F@($~Yj?c?1nZxX2rlBdbyodL`KgjK2Rv2%bFHrq-S2g3`rFBg-PU-&Ikim^?*dX1+EqZOy_qk%5!xP4gy< zWCAZ{a;t?_o1?swrbjnt1-k9tYjkpTyu5CzgR%+|lkEgD{}*rY_*+i4WTD-x^C__F z*MX%^+;CDQMqaFJO7kJ!67(rZcJZV1wZo88WovolG!wozjM~&Em3~^C?JwY!?VM`n zv~DUz{vCaR^5HpF&yzRTR`UBR~0vn~^ch5dV zuVm)-;k%=<2_{{Gqo^PY zyZ%@&$C82u85j-kF8)yvyf?`)-D1VlaF;GX{;uE>f%fstVM7$O@>G)yz5yasLU8U! zt1efdUrbg#f7UjPoE7s)cF4!b!u)|k!)m;@NsX00 zYYFkrdu5PuaT*?`OX5Aq7}*iS{!cC@oyC1$TDqw0toDg@@dE$+jby31#L1 zdv>q+mTdjTCzxBMPO(W*<%y$g93I(HttXY{yYkdP&fRu`b8GIy(HFT{Rn0wd2;uz= zH96xn5v@(`zxu#LvWstgbgIXEP4}-2!DI42Jd|6EF*HB3%9fr$AyNb~ZjnyUQPTm) z9gwsDIZFKuum{>mhIsj!9M@C5rCPDT_~xxG=uDLW0*W6(`=TSs;mXqfrZ zY<|O~|MwWN^E*g39z1$Py4)<|!ofuQHix%La(3rZgQIAQ;eAj93gBhdJ3~j`4h+2w zjU=3WIdX{Q1JH5+pMxtX6dqk5n z|I00>G`FMwTgg~lTX^z_($ckG_&FXt#JE`gL)S*+`WG7$Ma05WM$F$-h z*_k?ktI!B4?+SpEnDG``4>1EZ&L99+2rfE9{|51}qv(nN#Duc_ z_r!Cr$wj`{mkEs{RcseT;01{tQLST`*0EOXHX?Rps$7_(E z4ikj+a>rP$U65RVI&EP>0-2!Ka?gTh4-**hLt?*6Y@ zw)XD2vXbB?Jov+?%>Zb2tt9jY%@;g!(z>aAJxKeMVEP=it3oNC)!}amF8y+o2F{uJ zk9V6_5_@V2p8uZG82L+>`hEs(jBA%P@z38AbA_U4SJ3$@z@5U0errco=#bC+xpX;# z%5A7(WLRc1A+ny(SY>FeVUcM%XqXMMr_>W)0He{6^|K1ZqQ zCoe>43&R8Xr+zi6dc8i!#Oh8&i!XU%g0XQ!ufcOPUB|O|ecVRdtCgl!k6GQ49=%U8 zs?CvMk_zr~5wBLg^oYoHS`6?-OUl&-x;l|Es9Xi*dMy*hqB(XzZh)GEp~l$mIoSlK z2LwT?h{zEwjn)h=I_waJ9Iyu|8EWz!l$;KbGoBK=V#0>(^2{GRq5-L(`qAp5D+ith zd*C~Cw{~mkm%)1fOpRwGu}qFoze5n z5?hu-!%mk29L|YR$gJK&ze;+&eEpTAWEeac(3>hev9{~A>@KV>C|}<)hmJ63)zhqd z$Vfe{hH!+}KC;EBsh!r#$<~+-YV1D)&U-b^!d8cP@FIp5pKH`9fOVVOnX%=oF4q{G zQa2~`0Pz5)maodu3cI!X&_2P*Y5x~B;VZf%93fdaFDY+F(b8XS4BD$LuNXkt@s!OP z=S6u-etmr#Oep2K+y9ls;{P<3lrNxA$HmPNCFPj;kiQ1(H{hkpG)E3Qvn+Ifdxa>vN`mg3q z_vg$0W*_{00ceap#s>91WjR+8#KpDF9_!4@fM7}6)%7*H6QgiCz<@!U3D9U7g@v+N zQW=wJ`L}ZqP0HPyIrZP(xTRS4= zo%_H?9lQwL^lH{UPS2(TMJJpRz$vv)7?B;1LezUhPLvC-&n244ln*xNb^+(w6BB*j z|2<|71w5us*OSvXUFE$yCk9wnat|vl$Js(>L0j(Fk|MOb5mx@!W59D0ZBu?Zq(li1 zUvGrCv&d{@Lv6{&UZY2M+pU}Ul`f-%>d~1ff+-FJp0XR?CpVL-BdTi~KTN$edcq|U0G@HwvF-3_w4X-DREi!F`-A2!?$ z6Trm56?v~p(Y!}zlRk2b`BC|lKLrUaJ5y6L7tHbY?i)6ZZfc8|chm{o0DH|!?`4^l zzFeBtJ6gt=0!-0GRD*J>0GWaC&{WW}f5)`E?qs%E$N}yfi~K{C@{Qw?kt#C^Gk6VM zze!G=H+ydX)f;nOemH)3`g!|_`gT&x0WXnJY==?5##%s_0~01}P>!8kP$AZj2)MQC zp|qmJJAh@F+;N!h_*pjNUMse%Sm_?7wL=4K%qoK&dx-Jqk_W?Z^9u)z@NOPv=q#$( zCZGFL`zcjK>!$2kRy`0EntJ;Dhex5gpF<<4XQwMI_c42@Oc$egfQ4#3q@r!orB0cK zYrW=BpQ~*W#5`DQufuqsb@ml%FyQg|_C`38;O;|oFMAanN^}pAP3`#Dx}L7A2Zh)t`|ZGJ<9JGga~)>a*7Gd$!`tN#!|&mgT49A`uVlx(%&Ym8S4 z!J%P>?~)-mj|cY$%gj0)Ra1RK8Q@juJgkTaSHS9vKon(fQ_qVd|77{XD_FL%bqR z?uZiI`|+vTErwRwuvI2c9eSzve!aW%UFDh6E3P{PYdz>ye>#-zsm_r_5&79m{%-$r zx;<0GA9*L)@;9eo(CCo}3DQ#A9_n*aMn!U$2CdzohALdkP)kU3JdoDAR+P3-nk zi$oUkvNZ+nzhwoWt1^MalxM39iKT;D^R}aAYhTR$Ntm6dO}~6Q?D@cZAK>T=%PVx+ zF)TYTSLzdCRg}y7`e$`sJG1C>GH?bUkO4xLZ=i+YyR|n4hV2_|&?H`h*50aX-||BD zJdt66{=QYe29j}&2_79uj{-H5qirptPu&YQ&Umg<-FixRlk4I^lVux-|r|$#@&(>puT357t zb^^$e;ya{Os2%zZ%Ol2ip7urwdCSlIaV3NuL@au8k5Du7S5~O0Z=fe2rv+&};N!F& zpFd%6&LaZx$8``_(8N@TpOPThU2wSrVE$BaQ6*l3ymeb^KOFOpwOyje*IW3f7 zFlagEEBX%7z8&$u)knI6lWQaIVnwVVtz$R1>xae>mOmEAY{CMOD*3GLn17_J7{lHl zt2E}q{cTspbHlK?liImTYf_?!i+1{+4=;@|5BSUPY)t0q{duPko8|4j&mE;I1Jq#0 z{koY~E_sDEEpF#`iQK~qJO7Y(`V(ET8I65~Y+s6qsw21>iv!2Co{z-5w)w;IA6-x6 zHW{+`PwH=Dw>JMy;CJ(L4!T_R2$8mSjidYE1{3-9uDY|)^oH#OV)EgIpxIB4KX%Mq z_sSy01Q;oHz44bdpPXmfulBnf6Z6mdnCm8ux7526)v(!k!CaZ$bGBoKniM2=tRff+ z@WwbbCqQOtC$nS{I85=e2HC2sp0lsXKLNUh2301{&wyd~3AJWoCjH^_xLMnLh|=!T zAEi5u&xU{7Y%=3_#k{A_82Az9sUO8>jf#CwJ_8HXrU~-rYY1p$Wy!C<*(W?`Up6P` zR^55GHT2Q;>h#Fdiw5A)?{PCO&o1go3%-`oGWi}dEDb7ZDK4v?9n}!9U3-hR*fn^w zmKR<0#{vF={Z!viH%s%~;S9rXch;xW>H?sIxVSG_9L=*yH2(7TR!a zWa>QGr=mNY3ma>kH6nc-pK9Wio+kv0i^#5sPZ>O_C2@BuPsIbl3TtyRGvUrh{G6P| zsHV~{*xh$@Qw7!CjSYEd9#tEVq|GKhbckAa4)K+$)rAnl%6M?K5+sr##28aN17$C+ z>@}K;BHo{VhpC^*EGzLg9SiZ^|?-C`Dj$Ku4U5fkdW{e ziRaC4b%NDZ8<=2fJ?bo&s28)!lvn6{feekydP=HEXh?d+Sv`IgVkE8*6&-Tp%)Hs( zlu+?vS(7+lKhff!YgWou8fuD-?Q3;!>}!3p39al!Ki|ye%SSXN7O|1-bB!&Om{VKJ zl|HAFIXxeHcdq~Lc6@dP(~K_ga!ziY{bAVF(5+Fg=wC7j z5DmhG07YE;v=9<36m;Pft8TP(;L~LPUO^PQxX`X&VEe~V{~i8CBj?)Fz=~1_76HIh zP{N1W5Raw57o0bK8{rZh6EhjLJE?@1Ayg8~v*LMj08uD2G~sn*wwuGy7+~dKJ`ND{ zT|YVJt6E&-w%huq`Hy&7Lrp{*Myi?L<|Cu08mYz@X^bTM13@zdFEo`K+x|tgRB6}* zH#$$394OtWEvNa2gN`I*FS({EHCYwAp?$m2^c>R3tgeqvHkY~&IM0?_Mc=KZ<`8z< zE2T}(sz3O&RIQBSt*u;Xclp-$RZ*(rbtTHDFO6MO%|MD#6)?dSRZ&y++wkFvR}`;~ z@v4BPur$~x%J){$;a>%Tbazfti5=jQPAtPg52fsC1xj`|8##tN?rP0!jec$(g}L7G zcrESoiOPfQs3OQ_QUoj7P>}Etr~4p4<#4ZEkVLMr0-(t^K#l;ASx+S~O*+d&@k2x> zNfO!cBOr6ADXW;;ZBl%PG^YcW!&Sg+_u84BEYA^?Sjz=rR#=g|LUG z&y>VGzfa(nMQ*Z!iL<*j-)r-!p%d1>j&fb`5hB)GnuELAEoaBr73cZN)b~?n5$mRZv2lWw4?aLY(2^w1#9FS$$7s3`2`N zZW_qb$&Su$g+Kv)@#2$FKkC;DK0v)4y|jHrR|c7@I<{FLowtDW?gmN_a^xTYQp2Lb zdu)93+shiWvBEtm<{ys6Qe{-}U}41PmOe&rbf66XWMq3Pb9IPH#&~|K3hM84G*+{3bYWHIyuqoh@**w=k z35VT)QRW9}4g4?}O`k#HV0eV(FhFsJ1Fg-p<}HF^k8IxVrTCcfZ<8Tj8J&B5tBg8L z%}@qeLlt#k;H?UPJq3`h69D=>;Vn46w~G>nQjVM5Mk^_1x6a-yu8%wkHSGyo2dW5JcYX>j9^0c4G<{TZc<#HqLC?eAqKCDa4EX56KZ zhe>nNGEZstf{aZs3vI&ivXZ`!#0&Hwmy3sZE?8{yIKReX@k}sY}#>2>&C*`njCa>K};3?A2@8F!Pzmk|8P)seKN*u(nd01(xu*rPHHi%qS z-FQCpc7~tVRI=uJ{KjzVJejc5akEJ;l&LZ`v1N4}*vD9c|EfwOQOqdT8J zryc9Y@5@b32g7rY!_#iDNuw#>yz4VfdiJFA2=&b<6sGeXe1Dnhs!4z2CPj^lTXErDCd`#TOPm}x$sZ@(iBFsF-t z+80_ZIdT)Iekh;_A9gIciR84^X@ZhM#kSQN$4GYhdS{Vgr~@!We)w%=mAY@1PkFut z{-=}az|qPd+J!^Y^hBUpS3Dj(-hq+~r`)Boz(7s#{oO_g!D8h%Jl%0x#(~DRy0C4# z)00CQ18Qc62y4ots`%W?dU>yEd{OR`2OqzNIUG8^eqd`9BuM||$JA;qw?3CKuNI*% zzauyIaHB6iwZ^CcAPJfjbu+0|V#;dC7h09FNFY$_Dtxuu$TD9uM7bI+QTS33x6n|FRy_oQafEc8o~ zb^v+XV1uzPs<@GALVL4c`sB-tRHzLz%@Uc!kIsq*&FUp4xQL65nh^Gx#)+h{C;vkC zn_inXfs+g~MlA$qnD0(OPLL_f@mp`%x43mzc8RG)xFFh$@?j@Gm2I>hGzmChG~+G8S1JvYPwBxEj@y+q zNCgvQRGW|F937D*g-+fC0kkfgNXP>WKj>)C=!rIwA*2Kb16}rq5cx@`-wtq;i13_k z%&w2=P@%utiCU;|{ikwz0pOFJ0{zsne*zR6j7lA47vb~GloIMq#u=IQP+|9&o6SZ6 zlr?&kdQKOz#H3q@h|cR`lQLJH)e-(cmWdd38gr$L;e?R0_QyH&@oN}2)N2!7!dLo& z2g5cC%EkoGNynnj8OES%(vm~F)%J;RkHH^(JvXrrv^wm?sKa|Gjq{i+9^3h&X|~2c zSH}Hqzu>HL@)OcaQ0dPuLvL5N)65u_;E)vtp_s5A^4qNTCIrTH*b?$E&aS@JrX&k~ z5^=C710Q!SL8z`u1HClf6<9143MEu<7NeN191vC!rsRQJlz@|R!5r4&5Is)-APNxz z9fzVz7KG}^HcCi}8McgarAQnfBX1}x5$n!>rWyEcVNPe7V1Jr4PMcsdggrn!j*L#T zXZ>5Q%i@{Vs=|+qvV^!@g04FQG|k5@SGE}3s!CvnYzr@%Sa41dUqqI)|MOF^Pm<+u zO&i*U?Z?LKH#+&%sjrU_S}z*T{!vA_g2Ge{tWYy0BGUq%^+=L+j*Qt47^j2Ai`52^ zA`56nWEd{u*o?CIpk5lPKR(K%W0W!^xEy6Is1olS0y0nz=e}_x!fE4~zjX@MTem_` z;)5bchyjhM!W@o^|q{oV5Bt0c8 zw30#nIFyrmU)%J6+Uw%s;$S|+uED6MwoF0hWawte1*?MfHhaN){rUN~Zk8;YlS&PGx0)oHT6t{fj$uwiu3Sp|nBn2OMIS zQ}m60MI^Dh2&zsccVV2L7=^Clbn9S;QN~xbVLna)Mp-x1+zm47SB%q*ar#vHk%hH| zQ@g_xZXUpS1nTe7%@Gw8u8d$Laik(^>L{au$CHjigG9pQ`MyWZp~up{#O_~o5<$xK z-=FHRBo)0rn;y4?j(2@=)nxsb!gLj{TKm`L&p^u7z7_xUUk>W|CTd*dDaend6p4Z8&JZ$XyZyRTJ{)qjNrwhye%HEMfBYXbRgfJ>{jIw3inIL+Pk|}hf z`Jl`&&=inCc{uAcz^uk8sW{|;L;apbU)7u(B{Y*l&BDTq0ImT-t;L~8fC8yP>{alY zeMp0bdkuxsyNs`>&7G1A�`fDY?TSCtPOjJZ2gH&7wiGY*Mo&v5h;a;vy=&LYRfu zR$Y{_GLkG_p0~=7QH9+LS!2cdO72Qi(+)iGNny92?nuV+quk$!u`<(Go(b$_(vFzu z;!QA>M|tj-C>;kF*tbJXjMC{>b=k$hT!&A|TSgTD+~%Lo6X*W+zFZ^NfCVQaYwdkW z2{Y)?h_Zia^URAzlmX8mZC zGkx6Zj%ePIQBH%(>I2eXm&CcK;l!(W*VN_?JZYwg@%$fHtCU_=!t^9^`*uTDeWJ04 zv2tv~4rQ6pOX?sqxq&ZeS8+Rl`M(wBh!AKq0`+(29nQ{Wf8Bc$Flfj2#w6`KoP9LT zaMo%3o-4?vt4hOl45fzFu`xmfz~P#R3(D~axdy*1sR%kleWVPUk@F+Mzmg#%;_x^dI0S(#L&dfhgh(t*Gw8H^Ud*iUBUr#j?4YBWz~BpjkwqO>!v z>>zcJ{WsaH?jZICC|sBEYNTfGitwCeL?(JTBgypYXJa^t`4!={^OFBoLRCumC&sh^ zm8;lUE5;a;%Bb0vl&Ompp`|PoBNx?W&%X+xfGTYH8Gk%jqzmr?j=DQ!ZIm&o6Wk7b z^Vpl0UtOB~5r|uz+(?6a4#FHB^_T`9^793e{I&AKSE9v-#q`iIkv+Kt^IEznbR ze&f6C&0q9Kphsm;#eUW=-zn3Q;AnjDv#z%Fx|vl2{GCR6bnvDVZczJ?eCo^P>vmqG ze7kRGr!j9rXdX#M-ZE|i(P*{C6y?z(aT=mBr)vlmE9$nOb^BM4+wH=mj_=)!LCGjN zS$&C)0Cb#zB)NV7+Bljvc=T<2Qde+%3Ht2(LZxevGR_Y@vu!TuR?G6n!7MppIIwky3=Pvp@qXWO_pfynNHQfB}6rxs>TW`rXqcE=hl1U^WAq z;VSD9k;zIC_G8&4;=s`}|I8XzS)aKKPXHDrsAEec@u!k&Q)AzSr+D_QST>t=c@?z+ zXxju-u0Tkf`IWYHi&yFFZ_K>1_p;kQCU>}M$T_mIDEp#pn-}$aETtM?e8tZ80PE6K zMt-9vk{2fVqo(OHqf=_A8fVugW&8f9U4&_Wc}kXIlq-j*6dchGv%1~o#TXguAAcuMr7N`3jm51&wjrI>QYkD zo<6^TKD(rZf>2uT?H6kwK_O~rfqzNZ->J<1psoy7@2G9l{I|aPyYe z|6Zw|G5YN+a{yuYq0F)UFlYbyh}eW&q_yWl)ZaSR*#F*X{tb3(==QS;B=3Y#%1lXQOmC9litla z+j;p$=hdQP<~4?{3u4u9TbzqaP*=L6K3bZ1Eo%2?7s4=A#}BEna^p|j*+1+5qotzp zTMTaHnr|IbHz`IAQC@*V&$gEzv=0g~G(R`d_0~S1q-)o`!qR>AqWHg*F>U6^3}X?w z+~SMlqDn!SLIE0Y7_3B(JB=cvZj^oIzImS!Z86K*#a)_rwA1!>*+&CUxC{W)GKwAH zz(`7159zp_wrZ zLat%LLL0ocgM!3YfaRQc{wqlzkEVGn5Kks8O?d6y<$aHqy zGvaFh3K+aTtD`-Je|c)G-`?5^kV_EeV{k7o(k(N#%h&pEh8nSAOJZ>vNhZ&S&_Vl`|yyE z*by(y+z{dOe6TQAu-KbGv+IFn27nKWpCMIADUPe}V1+QfN+|2h{&O^#mRK7BaUHik z3e2@vdk~);t^rolkbXRR(!N5+e%sR6_&3Wf7Jg;mh2x|-r&_)l?LmwgDHKw!WZby@ z&Y1DXj;YBd3x9kjcJ^>uF(5^B#^8|cX}Z+Wid=KvsHM@rdmian$Y=W58kvXQ(U&ek|Zt%EZ=d*a009b z@i}VhSD+l7(2>2{aH4|*q!*>z*|25ghCId3G7PfDtf@hz!6k}NXeA_?OooqsQ-m%M zcZjM&0D@(|zNCagbK7Nmg1$uw#2=6RSv3Y8|4HSZQv?gGE(W~*Cos8ZRq&h!j$JFp zG`j)xE^K!r@78G<(Hu6p7u~%ff#$zsl(6RK*g2uhF)dxb=H6z^-D0X@-H#OQkp`?Q z-U43V@+M|u?0L1A9q!dNqjFQQK9rVizo2Hk|@z0L{2X7g8BAb z>5MPNDB1Qd!Qi^Pjhls=6cV5y3nq)7i^y&Y8QDg9)Ue2DOUff)=K@?t*Z0FM)apEs z>FP^CI^vhz_|70>aMzW!(08j8=EVARJD_72&Is${SjwMu3zfs0s;(%oSh0MRa<;8}x|g?00c?+Su#BsGYO zG>PJ0R-XGd?$VzPAIM+JapD6bpXB0EIJZMX-7Iu2jMfoto*pjq&{Z4Gpcj?Y(@)Hk zLg%ud7O7IY7BM{(m)NqmRBu#cp4$-;)St|K+*V=*k_q^>E{r-(k4@!Ou$N`eY_JfrJ^4EsZM@lPd+&G>R^K3(KWb5Y>%=$ZExq(`W!M8M^ODD zX?r!;;8#Q(i(W75JGNt+lK3?8JxzuO8PzH(BtpKgLkCdE&!mSm@C`Q$iyiryrShsJ zo8}|3T(RWRy~m1eABXTgfHKh924fr?CbH02kcgrLgu1jK8BhK+x%Jk5Ymg*Fo_@5e z;yGVjejWYsw6t+woOd_vx(7_K;LAu{M0Bw9F0>HpN$s+xKSE!8P3ikFeS2XX zn^|?diN5NDIEK68`7aX$27HlZ&HYtD%@bY89eoqE@GemA=X^Td*pEA#;JQ&Z%-B(3~P*4K1Jqb2O+kdh*^k1mf0{p22Q5A zBO2B<$r;g*Q1tRtl3{O{L=CcT8m4P`(`b=d_6LxyL`ezoz|IdwT|b>=3l>I9xzVpe z7G+CF;cy}64niOe;aw3pSR22UxAegu$_?I(AYHO)@gxdW7F0D4dL^w|A1qB8625Z0 z-x4(xZt`L0mSx7dra!rV*>|knGe696=3bK7QPY4EF*mwb>@56;Q8$oXgnUzzM;EXC zcOJ25R@b~iFxx=2#oY4PEmNT~JUl1)jTt9>qTzP2SQ4|;?W^KADOWye+gVVBlCckjt=ffM3 zMiVaC2EF*-t>eGk@ASQ|A`a(poMoK0t?>rDz}c151u`lRSrwdVoL3~A`fc+`fT^7p z8WNe>$MnCeg)}1f+BE&VnxRv&rquNL651{u!8Qg=Lg@oS@=o6J9V+`RDt=mXWj!CZm(&Qf=QHyT}vV>4EMoYwqUXtazmw^_8)}Pkt zC%%L)w~0O&A;&1mMZ=C9WGA87NovS`n)JAbh~vb8B2F<(w!=462>k8E5DzDnE2ue7 zLhKP?h?@{fvE5Bc__HKnepcR+i)&9vWy{p`vr=-ql$=9hy~3$JLfWqBjhkd(-5<DsL%qPoecgLo_dOugb}9ckXT+FVS8!cQtBGoryUnS zQYoS*dO>=0q@F_dPNAoeLCaCbI{h$DNnDpl{YtSZ(mH*w@oU}cbm}+XaS`7{G6O8R z4FnVsi>IlP3ULr78UF~*8x%Vzq2fVKsgzSXNb-TH)9gs&kK#xLZ$3&{gA!H&NYEgZ zpW-cKBZs%fH)RoZqrNBFcEfGCoo0JTDYB{FwVGqH6*z(c&@R9Z9rVxXSo$hfSO zmkkmtQK}x97tH}Z${0y%(y+#Ky>3ya10@))QMK%C=`!=1m=L-Bkh$6XFL7HWm)9nLBX+&jRRNgK|R{XX}2<-OXedX>E755V-Pcd>+;FrBo88uAy!7JOt|b@-We zfOg6frR^O09b{bf3+uHniMQ#v7s024IFlI5A38zGmQr@;DBny&3Y!FRL^N+)GPFT z{;kP~p8eR|tOmWq=>fo?&#rMHM9zavQSD; z2mGky*ep@p`q`UiE)mW2{KDyyvlod$Lc*|$ks;C# z6-bFX$|;5GY$16-GQ;ozgcQu!048u50X7#}|IRVd9HZMj>qN6;=hOYur?-?TsAV{H zELoo+1|0#ycqhrBnG>$saoLU=pGpCNI&=!^HXYE6Sj52NbXQ1{e%gOs#RL94^ajVPm80fhr3@VV05MQP*%V0eRf19QrL8z+mZaNT z0_{Mc1RcdzL9Cgk_$#e#QA&}Jn2k{Vg?fQ^_Esrmk3t(Df<1pp zlq%V4ae4!;t{tSEK^c0oo}Qp95mL$3lz{DvKYeN*ZSl?wJiFb9e{LMObS{u2CCb$L zbs=FZeAOF)0udrfLDUnl=Alq;^+uNRurY4R>bMNU#30F47y!jWB7&Y-aE-{4fMUeh z9|jkA5a4O>4-esz1VEe#{PBo5fHC}qotDQ^$Lcy~G^ch$drvtO|4#(ET@ZGc1_%7tiqMrF;Y znW|%`>#vjhB$iET^y_H(Epd)THal1f`zkJ4jnI}*hcJaG=>jw; zBL;Plep|X>Vl@5%_lT0fN3?--9YDvT+)?TP8z7@|eJ6l_QUODSk6VZS@gtC=D24U! z?Nm=UAMEC61B`*;pWed_r6UdZT%&>GUwgWfCCjbA|Dp+(FvjTzS37+g$5z;;R&2Ri zwgp^F`gI4ff0gaia%u|BUL%>IUX<#UqeR<6aAb}_vCecmTlwG)`C;4nhr{QEDQ0i3 znt4nUgNO6$RGE@H?Ta_O9t50Z#4lP*Nzlu|XZhC^!Zpf%{|fiKj{ zg;x0r(Q#mLe=m~bKXDmvc{fr7DtrvU_+^gdZ~;PnJxC;{$OgL8y?eTCpJ ze|a;db4<4%c+*f?@Z;XDZT-U)10z--0oFgCkrWcbr$}dB!7U0-Ot1?4E5kUc!1(8> z&gJ4&aO;46IzaC_;TyA+hlP0Uh}c1w<$pzB`|s z4AFMq$O|d*n9|u6!PH~INK8mQ3OqWD zyzt5;%vM9&2brx>dJ9b5P)3{uSkr6kKe|YzYafXnErBi#t$QQ z`dL2&Z5B%?CC$*#HpY1cRZp2tlM)1OmJ*cyNQ$k7Uw)s>f4z8#?zEq`B8Ml=7%BMO zY#OiyMzf~@GW;ZE?+dvQIBzeda1=mj{(-{-m*@k59Hp+wM!Rbnkf3H} zEAPS6KvNV5V-kA_<01-mkZivT4R1m|1aQi z{oiMuZ=Zd(Ihk}r^^o`c5$OD4)H*^+IfhXr3QD(-nhMX*qlEym12jd55{$x2#YzU* zmZ;4sVkKrrlr136Q)g@nB#5V@d?XNWBm>?>F;oPeeEBj!_q!iP??-4$IzEjg0KElo z4kzkp2v}$Hgfj>zT|uY)quIK9un^?#8nwe110AD~vMP@ru;)mt=L0ZCNWO+VU1@kU zMKqP{OyI#^m+=WH5`+J|$cYkz(vAvOln{vP1=7&i;gRP9C!j0uAErZNt{;>eAQWjA z;YII1)SS6}x6{IA<>o&x)c^TFvoj^i8r$`7=B2f}5X#ZBQUQ`o%1B1YcQv579+)A7 zwjd*?t2T{E0KcVB(W)@9l`bIw&8dxFgU3m{y5V{1ldYBK)g{QUC)i zrm2-m;wGF@DWT27-V7G>Ggxm$zv$>6{?Z!?CZ7Y(?+12lmK;)2PA*91Ouy}jA{^5( zFDwwc%`+wz8jmmfa$~D-8O~USkpv4E=3B}7i}5TmP90M9P~rvx?6k%96&&XQiIuN(INs0pO@)!>fI~}OpKy5?|ZdtTC zzhAIay&0s=u$e(-Zg!zG1OEo;gnZqA?ffORb)U`$g`P;;CG=7D1N!l$1+{LZLxU^B z=UELOOSqCP?_=4urD0cO8)*g{N&3Htn3PtGOjQd(dC?0bysEPI6+opTRfUs-DvUY_ z9SJo2xb}&eNB2O-NbyF~IfguFmmo=4WVf!op>u3r`7+0KiGpw6B%{5Yux`ZUAD`ig z{|<=51MOy$W6Sq?4C8!*YO^4Pi2PV$dz@BCeJgINjP9Flpt=(7*xGi5PO%nOI8ltM zI|_}()q^+d<~R8iRT=P=sNC#B?F5b0Abh1*bi6Lr(3LOH;aBN^PR3@Qmlav%&FIAN zqEGR7DK9VVb#>}zM5IWg59vT4miR$VQ74EMJi>|85<=uw6r`+-INkNc>P}2lU+PAb zK*nz}ISX^P>OdNQ*hNkiA`{&4%2j8GZ>nwEWU{K0`%FCIPmuRG^}Zpt4z;vdz| z3eDr_qsh>02j2;nfgn#)Y#4{|jn~=mv^19m`D5jZZY*$L<#3)pJOidS8>|B(VKH^< zq+@u?xy8F3k7ZDoI&C>o2~y0}E=wuqd4tVWVO4qZ`j{T-E7*vRVBS^$yM-pG=S}Pj z{5mWzW3!~XL|agR68&B)MnMBt1T`5mc*;w!Zr$#sMipppE@Z(k-46httNB}dM zS!%r?cdzCj>A2K;L~?EKdbexB6Pud9Pe02#CcYG86$W1lGEbFg4WfI;aCXLKJph%h z?}WV_O$6mELP? zFm?K%jDo;u3Ch$59bG+OwWKd0=kmY|VG620SNB)Kc0Kvt1tIjGE7gHN-C|zI^~@ZN z{J(7ofdYfan8Px1o)WBo5KAqS6x#&+;cNX32(EUQ$G9x^wQIx43vkdNUAf7mOheC7 z9M~(BEllWs_Ci5Y5Y)23HdhZ7TMRV$)H5wD_y#ld?=+U zw(qfT_NbCp>QhW}RfB;FK08i_|Hr~XD|n2p=Sn+9CHXB?$eHseMXROL)J0_?+6prw zLI=w#U;_iR2$y|>S&EpMofj;V{gmD$i*=+ zPkJJEx;L8-j-{TbS2^tXb{I%wqNsg^sA-)rFoxk*;@lPuMeO-Mj?Oe7sq}yQ=dgna zh=jNUqT-Hg?g1{Lq2W?lS%G_HrDlz-b3k0v!X?wPLbI~6qIGOPpq7<2+G=HmW@Y7+ zl{Gcf@O#exc`LrTS8aP+pAY{bH*ooRi?={6=s1ZJ zb)A3JseRh+D}CQ)6=|YuCP=~Isg`?GGK|x&_X}3x*6*oC4tKmUJ8d4waY6jLKkk-! zZ+h`{U(<&X%UzXdhk#K^hekw9EI|3^p%`b83SDer8Lt4wq=ffza3Y)#dGVx^%3KH| zWPk4YlE(6Y-EG@9Gz-zTojsPtI?#R?ByHc|5SS&m`WEpeJ9MWddTHkK&?~W*c4jU6 zEpU0~%TyR(28)>e!t&&x8Rj{`2WQ-s4_eyEm*3$42eYc&X#Mw+57Ynj-ie!^p2UN_ z*~jotm}rNNKUodU>dv9w37lK~l_Ub*PL z6Ak+kTD!=D&I>obvBWQN!_wm+j9(2hr!VYtiPRGlvbe+~R`ev-X7Q6ltG8m=Do22W z^U)}%LU*-SYaQB^Jc{xJO4%3%{(S2g!?RD+`Xx05cbOb`Ok|Gcy~~-o>&@XElP8w* z3UZ^e*92P9l=R(CAEe~yNfMoyc|w-wU(-%@(WXA|JO}?;@$lNEj$&NlH|2SYA8-z$ zk>zsCPh8&VFB|a1jaUFa{(~qa)k0Sk zD}RfYcRIQcVXaHDZT@H$Ev{GA=1dvo4g=iOam_u3w3 zEjTM>G3jv=ivDyD)|2oLc|mZL^~M4_40o!D@ng+`x!+#KFTRLEeRozan3`kNOrzgx z-cuS=G&5*G^zeU5=27f|J6A);EY6%)yLl#6Wu$Sf<7WQT0(r8=XJ;LLZG%*K3sMWT4M=fL$&ms2KlOR!15|4rH27oGpZ<$Wakf zFaW|r3se1Je4No(d*2qKjL=9c0x}7OEAIT9W;_pGwsh{hR}HB7nanfn&4CAIF6(jC zK}4w$w1SmxCm}@t#~~fD%Wk9ow{gWTV=x<S~18Ciz9H6q|HI(!L0i+6(o6n$ zdGNkEVBZFa-j7;w2Bq{Xec}&VAV@a^0V`D1Zo1;Z!aDPn1n3}^8$o;M`8v*iyVRD=`&}MFg%mK{rPk=BW$(B}7cbeht)(%``_^`n8W)3jKd zt6iw57q@*j4xAi40O@YA8J*fDCxH#yMH#?}>n@YXQN*>Xyp6~9rJM`va+$5g!EQo7JIsGoOSHXthKGtnU{Jch%t?Y zmJMSPjq1l=uX`9Vi&$l%?y{V`<3c{h_y3a-y@yM=M%CE8QsIOud=Tw(pgyM`?OK8g zYew7WsT6kV6v6dyzB#D?vgxQn8Lj_3U`dccfApGxfX#;$045K*c=p6)OfU+*-MSos zh9hT<`MM*P$bAQFw!HTWMimem)P0bY> zhWppD@P)JrQ8@Rto9DXZ=%#@7WqU}QZiTi?UQ{@N4il;&(2nb0X<^BW7YUk!lYr2q z<}GtNcGq>>0vK@D?W<=`3~M`ZY~XgcRp~L{GGPeC2mi12ar7*mx}cz9gATvNh~GMl_w%~H#aJQu3%{9vcUu6So8gKC28Pp#g@rd~ zPqx}>l;O3~&0~qK1dIgGdJd}-`Ke!u%a;AXE(#V3gVi+kHiTjpK2;i~r8j;?hxI>* z=q^Q?Ub&U(et+ytw%Qb}r{|YjGB7a<=lt3L(Gqwcl!I-B{!=R+MvYp}zp{UEMB`8W zenL0Ua~b-45rNQM^5{pUPvKYTl&ngn1s`OV4OmTd@mSb>z3ONkMCot&B~}VdjO15n zrH|cJD|Rw>hTwj2@_C|U-4d{6WjR84!e^p0pX^C)NPszZkw4HQBX>3t?)wMchi0Vt zKCF0g7GWO^t7C?_T;J;()0!AoKNq)$Z;6%+c=q~3=D|uoe(C~#>LS9z0tWVeA#Q~L zn@qSokaXFZfLX6c_xXd?0+7!x%j1_N1ACF{YPbN)2v@gX#;#~qTfjiDtYIa>Qe+Q;*Nh;)U-^3MDVwEo1s1!csa&;tLX5YhwGLrv$M`}Yw|-Ue z1U^ouv}Gx6`AS;1lG+Vg1*@#RN*8vkq%7>bu`A;rl1y9Fz#U9aAy&y?J@%QtQ7yc4 zzZeWmeM~J42w2}R?EbPNDb2UM3&_4xtT=v|u(>o0 zzz7G?g66APY|QCk@15UPc7zGwpCogww-?1 zZy%PK$Z}Jb8|vfSruK5A3S}8?uiE&mSdJG$k1n2jcKP`;+cc2EN6Shg5X`fMEPXU3 z9U0d0nI2mE%}~wqK=lO7$a!ofUy#vmQqItU+Eus>=2f9I;{HofTb?h77gpTI4nMXZ zdDYM=NI;8}=2~T5?{$?xBr*-tDZk{&hVe1a-`*QXetB+!qzN#at?gfr=fgiI)&D`uWz?2 z@O9Cl7qd_|!mvx&Xr8v+_bRse%Q8!8rnnNbsv8#vsGT)+vk7WpclioU`J(>PXkA&3 zvFqGA9S5+Cm6XpFh{W0l5n6N@Q0h9NdbdR7Jdw_aP+Q7XC(BT_gSlOQ!LM#9&6^du zkCbGcl6D#-%9RxgCE57u0CWDIBA@NgFiXz+(zUO1u7KwKDc0;#_|9uS0`nO(Ec-sd zc`j|511`H2aC*g_86`@V9JHRl!1@p;=5H5 zegK05hQ5abTeC(K%sV#%-=HmZP*(CEIiPKcdT~L*#q9}w`kreG)iLt2wZi9XKCUb< ztz7w&Shi%$*;G{+$~C({vut=v+i=)U>l@Y&y!~m>G#|xdpBB+d%?T#d%u00@8|%|R z_c5xxb)_yqYB(R<|5+s-MEmwfJIgY;B|!i8o%m+(Uwi#)ig-8C{YRaDL@g0w=MJi4{GnqRBb;yZ zNmGv$u?wt#JFhS8w375Nv`VY&h(RLOT*h56^O;IiZ0kvmB`bfYzjNK zS?q6w2tN!Gbkv$vwj|p0;AYS#VCAh^C1bGU{2|arJ{|ox$dqTI)}Z%B1sXVZAMdxt z%5WP7D-w46im zQeeZxMbY}){q8r;?7g`SaaEe^1|e&sDvYJ7Xxk6^DZg%vkQQJU5T5u;Q0h=6PpI-` zVVCKK4vA3*%7DS?q5g04tEt8JuKlE)EGDh^T)OqpXVm9zVEAO2g!zZMG> zAooFlKxYNy8Ii$uqPbms0nI`ct)zR$HVTt6)p46*8KjQG^uXLQQg|?pgTndd45Y^y zZtgs`^s1@k&OOWu#_{5-#Y-+Em)8)@9Qm>W5s0Ziz^@n2+s$20vf_8O3qPY`YL0rI z-Fi@z*|&|Au~-MC9!+fsS~c9i;r5LL+sJkAr$%3OT3p=p>pMvCED{ zZ|#gvEN<56F}nt@PlVGwMceK7&HAL{^scoC(0H;yf-DB-^w%G_ba)$lBaj7C*CUFGiOE8|v}o3rfMa`~VLthF(VO(j^g z_tsi@E$a9Ii~Pk_E+WHF6!ol_%1xCCG5BWob!*YW(VL!Ue~fJtP)?lL?$ephi7{E{ zuobEEWg+Wk%u7mmtuk}VDw{Dsphkiw1$1b+)xMVpQRbfEraat(KEU929iXuz3h-Fzv*Ll^+*;PD0x#mVdw8jd=dME_% z=7^T)iLRZ$zL#4ebf3(VWkovn`OgLnu!0?~wR)vXz)3!mSS47Yt9sDC&0zM_Ts&-K-)BH@l{Oj@O0M zp1V$t40#fEFJ%fwJB7;zH@`go_uxkQk+(-_281MKrM z^vT7ch7SjpY~kPQN01t}q4Hm-*6eq(?QfdlM0Zn-jyAjGO|QKwAX7z5ZXP1K?=84@ z)3vjvK1TURKuW#Wp^3WL9H_E*Hv4ciz3hw1u6{N{!)ov0bW8l>t#|X<&4RnrU;W9m z7a|-z@dHk!!<>Er-l>Qm@p8z@8LAUgk6&EA>5Us?u~Q@BxwoNRrLQc^_|IZabCqYE z_0B%N=#MXQ%Db0vEVF6*2!rJei$I~t>`%AaY&SOUiu|Qt`)ROZBH9)2O;E%g_`EAO_P{}xt=FsO@`f#E#Kv%g=i0a+ zv6xU~wj@AeWfhKf@6a7eLFVlrbjP@vJ{qqJ5JP(%zt2rlR}oG|(KZ6nZWj+>0$eg^ zyZNQwMapf9ve5Ks4*;`XR#hlGU@e6$1Ew{~#MfH7Z?_~0VFuR-5NV5sfc;XTstA`8 z25A_7he9x=rUz%i+7YF5Rua;U>lc;~Kf0uq`ynOTm=J*EE(1GcaMZkawuDYU?!oXg z?!15Ildhhg!E7umbjwq|$}plGwPmyc)@;Y3aa_s6w%L)GWXEtT>fj9DWyMC-wI+;> zYp8`hr7Pn@_jG8v1&6BAl+)X%!UX3J*iX_*8GhhpekF&#B0bYTKWS+3#5?Bs;HCb8 z71cS-W6Z;{2DaGbTiK@JSX2ttR^F%3vFC4LmkwJ}>HMm!ej$K_2?4`w4KdZ&K-WTO zx;zM<=KTAoLde%%M#vMO@rl{j2^f`>C@n)dAxThT-8Mf#K$o6z2#f5}sr-YezWnO* znhXQ|$u3c9nAVCb31p zY&lo&e$09J9OX>iXED2=4P&VhSzVM&gcn`5awjqn_){W({4_qID@xHr90wxEeiof$Ik(20*ou3n( zyv_@XwdiXd{eRki5}r9Q)RkM}`Q*Vm4)y#XijMQGPU_0Es$%zepVU6}9Ouw`onkDD zf%0UHnED%`#{n7o7pOU8)o_pfG@F=076CH^Nam}V(SuS#l=BIv`a_l81L9SuEBE-V7#h1;#T4-lSm*bT zQr_O{ar~mE))t(~$V_L24>nhw_?Nz`WJ3awOo;E~|qfE-2J z51Bh8RngO7Vy=S$OA-L)f&&O=?WPdd9srQ_){X_RXVfYhE>-A+NS7R>;OE5OT~Pr$ zpk{#)lRMr)@Ah~Za$aSPcQb!L z*sXG0z>6=F7rPY}AMy*sr@_pXW^brpJq=CVZGvIN_h$2VcHtV^Xb*==v8Shq zzh#b*nkO(`_2(7|Wtlb)B-iufa;n3|71Ski6!ko;ma|bMc^7ewF6>#xHUKLCHl(6}Sms}VV<(14)^48v-^oiTuF3&( zc48Yu`tAt9=0!%pc0xfo8`2km@hxQ=6$Ac1Qp= zNpqUIS!epGbd#0Z8wn$K`V{N7`*la18{I3P&^s20EV7v0uha0TrFauGTD91q(Tsz##heKd zdm17(3aNmYI}I?|s<=uBDp^1on0q)Fv>Fx?k^8wFKw|PzhXbj&~7wP_V8sYBqS`K zQ@%x#-Z!uOuiEPIhr_EZx3SpUSc}c3m2Se)GoQijd6ckqcm2A1OcBVu@5j;?flcRE zu_rg%=CsJS*FH1fj1y!qNlj>;TqufI`bqZUG$eLW8!RWPZIq2}19d!UsaG+Gn`k2r z8@v#aK&6tUL9r#`kYO=LBZ_x6c>a=x7{x?S$HQ@Z?~TBO;RDP9B@to5BI_Rom{YIJ z(+yVBkmF{Mf)MSDumi(TBo$K!m0p`crqp0n40=d7NYKlqiM<+y2y_u)Q2CeW9fUnW zH2^LPrgw`RJ-IfO~cAhltl)>P1u9 zt5Mcl3@-U%mw;^yvcYR)h#S91sh>G2-Vhw|CKlF6I)|+jx?%neP zNC+~*AWb+_a|;t4O&yW4*oK+u23}{BSGp=xhvIei1a20sSZbIFy_u=&VPO(DI#gJ? zc-5G}L5KEWh(q#wh@CxS*+}(NG+3kO4ZCm~OG9deSe=@jB{Un?bCXNiau^FdG*!`D zxP2+YnE9z`q`&S2o}*RnJX~=FPCB~8FB-jLu+Kfx_N(=2>O)57hx4nkCj&Y4Zy;oT~>4u9dQ5#4ANJ5-AN^3Z{o54ugv zw}e2>h?3Y*MdX8QviREsh){6N?2wI~!5Z$hX3;&mQi*o(H}mKh)55^cC=UNEi>+F< zT_40y#hn)8B~f0}fWOW_97cJ!!VYYd@R5O70D6poGZ+TZmmK`m8>0FUp+>|BS8xd6 zjC6xTBk=MGL_P~+5WpEh#S`jLd9cbk?YDS<^vi9G@V*>+`ywxfG``bJT~>asvcldO zd;~ph5eEpgi>C(D8!a_cddFa~v$iMP==$O(XfMyE4zqUUJ9j@B?p~K|I%?wPRT8Zb zn6Iziq2|?+YEijZ&+gXK2?ln!Xmw@H-bS&1w8~bg{5mOefR-{ShP?s-+| zh7gt;`3Gn*C0yGG+ZQW|{d(jtR#OLT%YC&lAk1 zIYexc^b$(Ur(R7!6c**YlMUc2ZU$iF?quH7qI1bh%gD0!xa>@koe1MY&%RD zf$;+{W%v~Vf?sY02sO}Icqyh=Nc=HM$b+EaSNLe?Y!_yZ9fbTtr-g_L@5AK_Pci5E zvGf>dw~3QN!mKD?)t|VUTauHuAgy8PJj-QmZ^3($TtAiNG=ZA^{w}VHn<@^5(ev(& zbzbziw$FW`LuX#G@!od1kLhj1kltz7P#6RLrAD!f8PgM@Rj>84tH_*Ilz#wPVtvD@ zlRUsCeIuwijVhkW2!VOfw+J&2`~1ZI(;}i%-#LC6I(y#D zjBO9C-Cehim3iGA!z?CkwglU;4!T_4ROMpWRTJ#v$VWM=OTz}DB2%5^6ZY504)~(D z+yJlcC-A+d?1fLET>|~T@AW6%%tzYzUUn)@H-FeD;ypOIx>?2UJjsKUhMUJb0yqzM z15~Nl%_s!O(WHJ5@Nh)4quoM;WPvnZ4mr9fkd60>yLvY%Z~c)7^JfXK*G}=*OquwO zwK=P&h8m|lP697cmAD@Y3=1ZVujH`b(I!4SNrAtnl@I3Kru@77dUn|&&UTl?6bldk zB-}Hz$%keGU2|ficxfv_ziAYo^PPoJ;R8{F^w?RRkL>$J^SJ}vWw!WsUt*+}jgyxE zb--Mpw~H3vZdL6?0_}?iHXe?|Dd(ys<}|-ack^m(w!|+E397bW2QXW zT<}s=Ky8HyEZ`+c;I&5J*H$adg$eh+5FGFLhG?`~GyK=4nXNlV9hfW*LOZ3sKbZ41 z#a-pB4UBkz4!+Ngm}e30dVEQkm(48iEw0kgidd?t`QXA^+a1>eg#c;~9GhPtWE3o2 zwhPk~p|Y2~;>k+A3Pj&lLCi*#e}0cwfZo1U#b)HN1xg~4Bv;YC86NTVO1qmN#0?AmhDFB(VqRq_ zZ&<(b6hz*<;fYp=(|XQ3BfNO*Uw-WSR-Ubsr|tHvGk4#VaT(}<{tcU+J>R=&%{`N= z_U&)*4gdZAtM#8?yFuLXW+7dobkU-&Pf=W(!F*5fYDkpZgwQfIvvHovL2iZX7dniS z*~R*uS3bKT5j9WcQX{@)YbeUo19lKlvViz@Bz`meI_Jc8M@%wQ1U@z+o8X_{}0=azc=0wS$v;6`uVr;ir6iGUkKZON>v`4 zTN(HJGhuFl(h+WIvwgsyOmr<)Mb)YpX%Kb_if$2G{cS%69? z`iE|Z3f%nA8i6a1sQ7Y_SqTF_w(eV=?%Suh8}`=IcYo*ejek}=MPHu&@-|2oG5B?x zAASLJ`hak)Vd-bbFsQMX%n*BF#8{zF%)-RVujJ$aW|*fQA{4&=6Yd8>7=tmB?=T%O z-(>55@ywR_0R!}r{#ro2H)w-)*mtLAvTe{RmS6GLLG%5U)zwx0vrjr)-TdqOy6GPl zS^f4w`86Y_UX{nyJI5W(dh*3tY8$fXg{0~pZbz8L<(p8P)8xJ6!@ainWZXk7&#sZM z;*n<~FIm;xvN2ksVM|opY_HUQnOn;0VghMhrL;wqO$Yr>WfO^XKTL31lE_d&vYO{DUq&4b;aGjGt{rJb@-(QA@#R#&A(&)22bb}snwP^(bY&N)xo zQUHJ|%&pU(=R<-~&@4yg-ON49N%phcaQ$7sF2g`#+}?G#U>_=`dWJ`_Aj9;irsYh| zU}xZ-N1lJ`C9+{p>HVp;-l`_Ug#$CG%Vur0i+V(jJ3afBt#9^;=|8V^G)mfu2W`JZ z-Q;;sNHAu+TiU)|>wYbN+r(?{$IS4T7942&DBWG(?OOlql#2zgLl9-<*3o(PQu1z#Pel9TVL2{L zGSs6s3+gs1QJ76YMR>7>Q|%0FKuO5emC@Vnn6Q_Bp@psaslLX5(>M8r2NuNlkhjTi znsK`k)M!?x9Ah28Qm89#M=MlhZh^FvbyZ%bu^4VNM_uOSL8@VXrxxJwmb=jHz5AW0 zhzjR*#XIdBh3Yk-Hu3Gdtb$t<%OHUgYoD_(mX>0-M0NQK^B$wU%ANSOeS_qAWn}7y zGc)x6UEUh_$nn>){)Glw)zg}Glh@YURf zkg=5EE(GtkZirQujR$GK`keL>71FTsS+P~_P^qg_HrtEBlV=GkGjw_hE` zUT8-1?C)`se6CzBQqd}c)f`k3A=i}NV@BgcAQwi)a*Qae<`<}a8cmDNNLj_Yw~TPU z!@zn|wdmHs&8(Tq!VsVJr!zA!v4T>PhK+NEpOfv%ZEBX}Amv3pm~T{D4=56$r~wCW*Sw zI(;M|p@aJtx)JWzZt*{m#>3kgALkdr3uC6B2ZUt;oX?LGj#v#lsf`PJA7vr^Bv2C%j-MXs)7v?^+i+O76C9#VT7Xe<=L0#j@8#Yc%y~U|r%S;}&+O~cynuwBVk=*|CP_i@NXcevmDEWg zbyF~4#Y?I>8m%&;$b}>u_;SDW`u60D1B6JUijM5-mo@8iR~XSg&*2?rMkP<|TfSrx zWrxUp7}7TLSYGC$mg1+s0v_TL&$Ur%__4uvM;(r}-2451_jey(`b~%vYj!ToKIncb zUPMaNsiXw8gQr~OnTN96TM*;=;Qh9wtPC2$Y6@g6_mWH~@tWX1o zL;z|!8=cs%Bz-HW4nc%RQkH>MteX*T6yg?%2jB^}zB2k`(1MZt9Hz}Fzp5(J9H-_v zJ&Z%>fvs-2HYG4Vs{o%HoQa{al;{D5cTioIY-AI8FMs}F5&^sr#x@jPlVQAQZG;#B z%C;4-F=u$u>u5cm>lL*tG-7<6wwzsG7}s{!Cne@Jqw~-2<^g;wjx6ynhc=lO!d(7GD@#Muw+S~-skwxwz=d;nw^h5h+5Xcs11*K?dV!COn)#;`Y5w)*APbesDv*Qb?pFTVWmMK!$GOP7%N^kwo6BhHziCntuYYy}FcD!6xWV0Y8p znheH%B&$wTm_hvhlPYr6cVW-nkhde(*=EJT#o0 zkxkyD>!EqRV%=9%$#*UMvfO*^#oHq;{^cCMOD#2^`S`~dKMooH$c0hA}wyrAfBK*V-aCDwCcsL%#VF zb3c9BI%n{DL@NRb$rJNd*KJGDC}?d{|FG*1;rSAS^*|-Y3B{?*MRBRu955UlNQn5+ zM%xA7vd^zHi=Ns*YnKl?Y#hVR;sxM;tD3o5km>3>WLq1xer8Vd=D9AvS^PGCpPBf- z<1M#!M=&wEGXHoPfMRF`)pd7TWx|}`5GaGrGB{>HD{LDtJ_yo^%<}~*?x3EzSE01O zet(g@Cg!qQCva22xCF7NVU^5;{%=@@ujV<~y(+lDKYo5~`|mFuMydZZEhSyImHdZW zl6o;A4_Np2o|hB*mH*^s0N4LH{1T3yD)r(iOI`VYF%lKD5-*0qn_53g+b1tLbkM`0 zjXH9dGOeZl!=_GYsgp3}A1!6d*n%2mcC;~Z9E)g?RX#}5!Daf~5gn!$ z^_lx+5*oNxik(B*quxDr}Z~^KgQ{3+&A`q6>@{C=h@_2W9NoByj<4 zJ`Om1^bar|%SS!T0i5%onj-Cco*bLnnO#$gT?BdB#1wR85SM=Zxo^jiu#B!ddBivx z@%@IwN5#Y!lk#5gn&00m93)3*wI2_TQ~%31Of-3$MoyZ>eCY2^ewUeN#LDrLayn@h z@lCwVYLG_JGsL5eFLK&$1#N4!O}(f!NJwk~q>q*p_QALyE#@*7od)B+z!b_TGg0Iv z`Hb+p@X2J%wAPHA3CtJ9@6PjilrV>R^m%NLPg7RgMxFHXctCG74$j+r9sg1Q_k<4W}f-cWHExTmTWy189;Q#+ZQH=EDsvGyefoJwfUpYZ?iss6P)=KkFU7-=NL%d+ShV z+V}C@#Gky{6aP7Up80N#bM06E`CriTqL4Un)YEd;>8?`+K<60$tH)dxIB1Hz5joa?2tNtmtAW^I5|wSHlhRj za{ef-T^kwI=9-|KEM`*^^^Bu__G%NCUj#C)!=zXRGY6beBw}>wZA0(Vf58mJ8yZ)! zBCZ#(1Fr9GwWj%6o=5r>)pM4;RPsMPx_)ufu zK)$axjQsM8o18!%8PHP&1`qsqHDsm9091@G9&c5_3;|KGgnQ!MxXE`n7 zxZs-ZVK^8H^lGrbx+@i82E@ z7DWoPk3zbMH)_FZbI(qo$15lSLZHBC@dlul1BZ4?=AFyBnRwggOl4I7{DSM^T_(u& z(e5ake}1wfCB{EU3&KN)_{f@fHbV3^n9bV#5Yb4uG#+}$b+@yxXVnc zMaZVL^wCdClWOTZzB*^h{qHs^8l~u(94ahF)@F7BvJB!_atFxuFv=5|9DU>2FSL?z zEN*GZ0ScNQ$Kozf`6q#09oo7`%tD_fSuG}BKN&v`qy@Uzb{&H^bXmhHT2TO=&af(ajy6c!do zDwsjwB&`wC1pQ^~0FSK+gERr8w#U`-y_QUcd!-0k{>w->B z*lmxYjvbOKGt&Qs>FuL-1$Wmh`$@equ!d(%^TOX#k@=FM5VLA~Y^D zjQC0+^*V&s$Z4d9R(40oo0l`%<&j~Z5t%6C0c!HHH4P{5qKm8$P~x-}Pf#LCUEPW{ zTK6jY7j|Wu@naAh-_0&+&%-x(-ZU+j$2v*x`FS1O^_<|68+Bm+>6-0H*V`mUGw-%( z1P&St5h)z2r-l>jL^dQ5RRtwZN>KTb#nH)npC<>abvU~Xe#=i=Zw1yC{bM=6WkilL zZXN3uwU{r4O^2QDbyjn%zYOmDiKopeyZA51@xh(j%}ZXVr$dQ&5r`SPo~)VWYXVz%MJ)~ zGQTwsY~OYm^W8|+L&%9Bx6q-!{X^m_Km4ggLR$LT&dLHx`A#9%)n2^wO%b^{@uh#lodrVa z0^stcJtjD8*M=77`)i~N{4(d)D{8FqZVS*wTC)-{u^r01-)1ixUEL)lnNC2=CNAk8 zh{08oNxFNNLQeQBv6OKbo^l|IO%p%Iym*X}uunRQsJfrD*hp%noNzrm82gt#* zWRR_uZI<6OPe!khhpt$RuYsNgt`5C-p0Vk&%{`H=cq^aUY(96PxgVk**QPKy*50RI zxFy{AWcKCttG8QMzZ|-BWj3_w+Q0sUtH#^Uj(j`!{ny^nxRRo8228iGd?VZJE_=0X zl!O?*hTScsQA0k(pqxCW*~dIuJ9MSq3!j zauQFdxU^|I*%O}Gav&;J6B#={E6?&~0ylkXrRZkEWaE*VRZojCKR+L^jp?ggONswF zP)d#TapvEbn%z=c&hnm4RnB$N3RGs^y&6t(bZtRTa)KLn9@$a0t_|nd-89dtOe$7m zC3$`%*UWXQTRvx->w0ma0tU%COYPUcM96Hpp{de}ZxbW(3-Fu-Ph}}(4!OV;K6|SaBbVBj|91Yr0CTv3!L#K2Y2W5a0(ah{@ zVYU6@ba5=FW5B9*%L87|Y{%6Yk1pG5C{`^rHF)5j(p&$)C=K{*d0d(_PiIhjY!x8~AV0y1r0*yO2w5;nqWv$1`+FYLI?(f_1<<`E1 z3v5~r2byM-;^=9ALm1k4jS6X2$dIy0zW_$xxrv;ro26^?mUTb${cbf6TqTZv{3)bn z(CJEJY>iI7>9lD=tENLEepUYW`wKwE!i%SrPj~zWPxzv+ewgqGO}x4(kbH1gFNl)zz%c1y6C|(~2rf&vcEN zu|MlemCNp|jB;zcsfk!OaW#jOo&kjLmX6S)rCB@8B zTWar*%m!prg5@na?$0*376gaJi6kt2WMIQ^zdrRk2pO!+N8no z$U4Zid5joJ!7P_1n|&Qc_qb9hY@v)Xgl(7JfJY>?2~q8eRnxA z2>2uXEGJyX?tjiF)YgpT7L?wW_!&6tUbmIb_;}4Ln}Flkznt73KaERu?`ShC^jN#! zl62uHYUf~^B+yZIhaiAULj@?RXPVeSTv$DKY&HQ4X-KZ2eBun$$BQ z8iTnnL&4t2E_bNWBQ4Nq|JbSX-@C!^h!rBoVuE?$#MGWY33w{DczYJoeB72o8-HZo zuxFizyG=jywWQ+$IlDiHdyzKreIeI97IHrnRb%f^SHbKaWjq+{bE7M-r;Lm-Hn96L zq!!h(XpXAbq9=XQj~DJTmjOXf+3=K_9{`d!$fMmxw53r&uPj2FYw~XOp8kGkc~0xl zz}^6&?Qk(Y=s@qSJ4I*C?R`CWre!FMMrL`*WFfAG7ENJllt*4G0MFrrF*w}`^O1U+ z)kc6JEgWzqh^dfd9s?*O%rfdJi7VSoBi>qcZ12>bF~SYX0Sijr^uv?m)1x8O-tOzq z)qBmp5VIR6RMajQYk68zHt7~~DEOc_ej)d@wrJ+QXV?Hy2TLaTZiW$O!;h z{98pYmPanEb{$tFtEQrCC56>8az65Ci1|#y?GkClR#n1ig!NnL>e)ujS*)Uz%iPx= z`H8~GU*~E!wG0ndW&4-b<|I0Gnn{WdhkkgM8~o&Oo%zue8!h0mS6@Sr=U&|_7LpLH zF9vo8%?i+gO~!IhUoQR1#R7hl<{+`li1N~*om7J`y5a^xa#1^QMc1>&m65AE%FAbl z-!eY^PHn;8KSQ2{4VG%Pi8 z*31A&v8-^YOwG{D)XYB)m01~+Lu!d-W@Uz@W;1GojR(vUOACj}#)D;L!|7OA$A;s1 z_xpv5Z(P8$pS_=Tuk~A-mNB<>5O&^ekI6Y5-QvVz9yiSVihx0>)4VeNl$m*p8o3ga zQ&H|GB;P)h%gLwXo@}~&cjNDFH-oxt5off}mdiJf)X71K23G|jSk`+Vx{zOP9eD1& z`4OA(A$#+B-wT$U)qCH5EXr-Y*jAaHuT(tR1|XFz0IyX-$O%`FB;rF9m}@kb>s)Qn zq2eYM+b4O$QNwPJ*Cmk?`9btYc{i)wBafu2oET#x+JM_Ero?fHJ=^+wAS?{CT4;|Y z;pH7Cpq1^RzXVI$$mhe4A2;UAUGVN_-ue(VB3SK2b&X@P+5GZgCagc+jaxP692f5Yv~Hi+Bo%DCHe53d^IBvzp{=bW{Tyeoc=FYeix@Jf9@C$ z6*Ydo_~yi!298TYtOd1ZuW%WQgQg+oi!r;Bp;#BhLM?K=9(2{D*=n{g)x}#NQ4$#{ zg^Y4YheQ&;2n_-bYeAPVg-mRy6w@y?dtf5S;p46eQ<{%gkg>r!L?{m;8#2T`h#vw>%ku3!msR_-m^BZN0uGLr!GfAn)jZWW7ozh5eHKaGD(D-#J zJgQK&EbHBHS<*VVAp{=0A-|XI5Xqs*G3c>Zkiu!r{o9uRvN^6!91ar!pD`OXh$VTT z9aA2mwYHXkHr$FJ?IN2w@MOwHK}9VRNB=R2T*^dmmEww-_Cv1<@&*tz@}fn?V2K1` zYrs%44Bk@l>RgZWXPmG}dQ_)6+ZNN#ity4E{>Y8?ev-2C-+ZfB zcDaQlsKuFM!GnU;n4aduGwJB|o45l^)R|7env1AEZZS)PaFKylaE?*q+R^Eml?yoX zl5CBr7O7yX1ag&Ogt`SPCaxO5wR9C~{TbPit=CY_yp6lY_QrfuT7ADs4|2}^k!QQ_ z^U|?R2)_x8dIDQJfemAVc%$3+l$+Oz61=1&EZoKYh%oFw z(oC0L?^fHYSy-v0KULLYcx@0tV7FS+}8M zqpO~m<&3P=m}^1`R>mr7y$UJwFRfS?s7y+Wc48U04$k)M)|>uRDkILfdy)&;8;!8IaPB z+$zp4C1cq#kS2z*m6&3k$o+9gYCDjVu|L2pBE)w8Gi$^^YtoLC*b{8fLJWL-O<2GK zAx%j5(EL@M_O)sPb~s}n4-tL=vNU3Jiq&B{w51VB%HN*ViOf=iZh-EO^PsI7b3|)F zm$+@b+C<+%+I07z9bG>uIs1{BWp&z}N$*r#rEHaAHcLA5HJyofnk#=b@0oRZ4gPX+ z=H>mf>H~G>{CM?YOpJ<*Zu+2oC&RCUOS`2gFN$K5aZ#xTxkY9%U66-GkwaV$bw^bI z5q6-z5xJL#EoEYZx&B2)6q}i!rv(F;$j(ts-hmG7rFFw|Po@*L)><@v9@}5!#jrl; zy?pet5CGqpt>ezzh82VJb+q^Vi+yUurMGU_+Ig`TfUnI!@o949Z?}tjgNF|E3BuZ@ zx(&g|y|=p8;YFt(_rl1|IGZ~bktv09^^>sYAJ>VB?#!Rivp%A7Cs0XGIu~eAyHBBJ z)Ze{AsPEc>TzdzxfQQ)&V9$Burxf@!DQe+M{CO_k*>J|n8kqYitWw)L=V=200F1Y; zzOFz(NE~<)43uEZHHgy!Ot$y#JnK~<39k8XrVG(Q`!i18#^bHdeZDPo13hK}WWcUi z+;cq+vqgg0D4YK|5G-PZP!Jv1N*wyCpHe5MVxrAsjF6P=qZ-k zflButvM%Wn4`T)3vQ_?lYTv`tM-w+_!SPBec|yXp|i6C9A>9$*!Y|->M1MR}I>38ibA(y?j4-EYkeYPfWmu&LrJXd{9@s zuJdB$@P&`auDq@!%zNgt`2DNc}8e5mPe^^-qFryWzK1m+9NV_{d67eFq!h`RwuiQ{(7|V*}X5` z^AGZOUHyFf0C=J3C|I9|Aj4hMX?$u80-6A(TmUoe99y0Q4}iBAbw9_C-t4iZ9tNxy z4_Mjjw3`lQlY6IYQx5h^C_z+pSoYY9^J8Hzo-RomWT4Alz9+@rII-2Jzx(`b`7e}n zWyP@{aQZyYNKQEB~a7%gMI$ zKB2b-`1bNhkA^Y-EgLHw$s2r;SGdpO#re=)8EJ0KikC9d^XvepwAcBvp|ga$^R|Dg zoOQQ7Yx-`=r`Kyk?yh$D`>6*E(M4uRkQ-n;t;Eb5ekx$j&U2XP?66)WEq!tYuI$L3}1&RV&ZZ&0G8LTD2n(ykTKYYxV;R&=|LtT=zhuK>sNd3r5+8*cg?ksE}XSv z>-MgZwgGS9vSOG>w@(bHOz5UGe6tL5@We|o6AQ~S4#W07m+Zq`@}8fXoc-vPXg*j8 z5UWok&~&_qc2RqgYH?DZHoAi=O8rNWoq^}FquM^2#yyveuKRyk4yh+Qx zyY61+)4!UNGLeOUBMX_hejd`<7;NGBHw)Mq~iF$-&boe{nKKc z+PE951-wldY6sF>hbl5+mdZdeGwL{Zl6vfbUC8|bt5YvSz1Kh^DA_ym4etbR!$#L_ zMSs}_ex*N5wnYa(0x8CO0ubwDDRAr7nyL-0Up*Q?n$-4Fr@il4d$0Xa;NE}#_~cFP z_q|TR9U*7hnm*o>iLR+ERFA&hRuew_kMQaKG(DM6==`ygL~^MzFQS8QRo>taV+ z`4?gNSn0|WA?8qI9oKmeJFMm7#}kvHrRu{iOTOiN?cPNqS~`ddOr~l5P;1Qbh3cPx zAQqF*93&N+X2rJ0wh1=!tTT(xh@)bqTu`90Cll1Z5Jh@YA z3zABaE)j8A#Sm^^3_HCtMe*vcn=~!m@(%Tw+QYAN&V$H7=epyf3etm|e6O(H-7cYN z@9I9Sxl~0*I3#wqOMA;phhD8SZ(h0 zo+7Q{u}5aVU(vGyGn<}&vVX>%Y-D;?>{p;&_IB3>!3C6Z=FAi9s%VE3oIQbl+MC4a zqc|7Jt&w#6%#O2#GiPZ#%kKbMe3|LgnO21c>S*tBJKH~9zc5E}9@Y;P=Jl;S8FSB^ zV#cwBnQP=YYr{>deXVH@gU=T6M26r~?sBT#vB5cR_CxN)1TQJ;$&jrZYd6@g|ui)adifF#Ftb ztdq*5#|Ap&gfTYwHK)NZU1`Ubo@jF&jyYO@D`z&SM|Z|J4J4o2%Ra!9WAe342U3^3 z8>H4Lw340TJ-td4!3Wzn^LR`AcR8x{Y|@YH-|Q(OmYS%22LsH zA)OLs+LB%bKKWQ%sDUsd>E88UWJJsEX;zdj(qK6HtnyEH=b?{nxMw0)zdf5?x5HS2 zfn!x6)B^=63uu=E`n}!e?4|5}R6peZG#lYbUlaLaP1T#<$owGX**t92fpxV%)|Og# zIlWon;+rxUaruKYxq1)XcQ|!nW(z$-6!Z<^Y?`@%!eK|a?wRqCyWsjlLV*OZcKZB{ zYU#v6T?o!6kJY$%(s%$;RbyJY?$zygk2{YmfYTH+)50g`&X?m)5t}J?CwnfIw<&sj z%0LADJ-LbQ?`7hZ4qxn($Wi$cKE~b;t3cg$@-FB=F}_-RxgV*iT$HP!r6z~PNtN^j zWE;Z#*QW40AtCHfN{b4s%bn8&KGlJi2ZDB3E?HW8X^1{xuhQHoWEC<~FizTk0tZU6 zOJbv(cB?0`EG9*Ow%1It5B?LxsI{2O9Oanzzg^oz2~H(l2mPuc%HGb}hL>9j>}jDuIrqeCJ5 zQnT5XT}_*~(;|U&E+(AT^bNhUm51Jw_A*dl(Yo0n3eqbV{;rBJqM7!Iy1fWv{Tqr2 zNC*UKEU-KP&JbP>mcc{y6;GZ?@qYxfD1T+#cJ=~f92X|0N{gV5kdgk{WYqm=06vWa zXsoOTZ(NGXf{$lnO90>6f~e0pg*o52yh#Z7G-fPrqp`3{c3R;YT$LjB8uEp}c(yec z(1`^0V`9wQR80UT{cQIAFQ#`??QGYVs@3%teGvKkPEIF?x0UEEv#am2elxLStfh>5 zbtOJ02X_5WwZ?IZ8*L)qFEFe}y3~8y&q%FO`;X{}*<|RzU`>nnKDWq?nTF}m?={=r z%=+_3-wI^lZ#jl;;*Y^tL%zENmtBUWcB1c`isS)!GH>~+ccmUe6POaEBRcSj|6DfT z@6uS}z302mg>4B|Uj}R@B$)YHWcTfAZ1CE^k*8Rm%SbnM*fpBZSY6?U2t0xs#X45BE$aj7_-P? zN^Hed*dmi2?Z*+KE1e_xL)H(CX0i|TjHr3JtckgRy(GCN4R{G4bbKZgmc!K&BrS|q zpQ&WV8FvPbEs9u}g16@Bsh7yz_*9dU;d5o65-rGcu}8jVG+x=Ctz2j%)$<3t9 zIcqlEOp1Hfd$CvBwaQxuxNGH2cm)#O$&2+dm3Cz@SD8s{pEk{V!W~H=ar;hzTrp(E zYn)Nr8j-Xnab!K?p@GB$CEF|Gw89RGpbYe@u-i{3m;guhX|wcvkicLc&YT~fJ7O)Jr+ z1SI^xN>A4zld;OTHDojoMuz2e!u4QcOoVV`)lwKYldn;^Rwm(A!6kQ2Ish1vkP@(= z%mUukK%4-9(nzkKi1^#-*Hljt%6<7MH!MRu1QIwrRLUSChzgCaVnM7zv0}E$j?!e3 z;nXnAfc<&dBC?VOGkLx9?I^_nBJ-y%2HJK@_({<=X1SehY3nO`&bge_J^T#a%)2H3`w}S`DVXoluGlO3nyCf(gu8*~bpqMq4>u zjevoAo;qb(XR&Y`av>vN`l5h~Oz%VywJv;Sag18!F)oM6nmkT%LYXRbToGYZVgP`c zj@~FONf;>^`?ALm3u(tsJ@a5&u>_%iK>DM{5`ChHX9%9H<4Wi1mOAnMJ#6kQKD-IUrDWHW( ztr;f>R1@XUFI7Q2XwM0q&y+KOD=E|Nw~<+L#|e4UFA()x9=7O8)7C5Kd>$@~hpRvP z{wLz}Qxc2WWKPhNPXojF%|3Ljf#Wh^TCTfLwj-H~Zp!|^8GNE9je##QT4?xaD z?hWOtB3cCzMugLpGTfx_(4rE!DtEEM9YAsqsd7iqUCu~e8FHPr*hLyZVX87Z;c9l( ziY}!YP!Z0=L~~Tp0Fpkgk>5E<^FG}b)b8lro_gg{$YkgrcMeH|LO?jpXH*C{DpD(O z^M!}RfkOov#aC7TQa+w(a@mn$2 zV!qUB+F^?VV>%0O(j` zg0zd5uD~2BhjelQSvkj`bdcDBMmw@nHm^S7kDv39n%=IiUTNWC-ekmSEHDwHnB95r zM2ceGgrbHHo5CC{d8!1MAvg}A40Fy175o6DuoX4ObSPnT+DesUo?rySH0Ute+5VLZ zVjN(eLNJIp(pP}$fZg3W#lbR;BXFf*Rv7)8-1ZD8LhuksMf&;3o^5^aGI>m~(s2S2 zOuJCaP}m#5n5*!0B#)b=@2Ay2*w*u)dvGIL21x*F9Oz7gVCbj|6A@)BPOK~TZiau3 z#ZpOW1PAoQBArHLuAQPeV#qC66+NO{q(f9YBj?2_TsbNi*~O-EB$symozU9Oh{)8T zGq^~YXl9NFP%_|BL`4@x_{U6lv?juN>*B4p%}%z>=S3B?I&k*Y;&`lbO|=|j5(v5Q zf>N3V;9#5FY?@Lqu0YjU5~sc7^O#CuK(QxBhF>oqn6R@mf%Hz-#3I<9=Q z9ziIO5jh|s8sf%5_FRCB0NTot`%Ma06BIJ=C>X0cPCz!8sd(c(!troHtLky@$;_9h zkOfQ8ym;qDvaZZ0A$<{rdjWvvLTyFN2at0#*SM0dU@0_j1PVr}<{8ZRl0yirjG~iI zgR-4{IL*Xdq-R{XeHk(g@F$A>6W22MmYDQ(#Qa(IezLr}eXAZPWZ%ssND8{(UmJvre1;Oq5EQC7>k-k&1JLNGM2xeqMc1G8<2kmwEAF+osjNm+c<#U%F}0dGaf&dn)a7YL6RIeW{dxlLr1HRq`7o;|2^ z`6fn3E8f#<5TJ5zQ(8beQMY#6`J-$495(dbz0h}TeVFq%<-P&P<=Z2>bRVN#Tw4Wy z<~;pzgYD^n#s3!cJEyI3_uU^1s73-%t3;*w6R7ngD7P0VsTS?N2|Z_A9;^qn(K3)s z*q~^+6y0K_zWcNgZw;f^;W|1j0BQkOw2D8XNVk+B%gVc>sI8Jco~r?C%jj} zfa6jxlb-~!qiZKpt1;mA{wFG7w#uU?%u=Q!pV@M6hnZc`TpiHWJZsAHK}GgC%)25A zz2exZa^67lEP#rh%b8P;WJJD=EV}KK#n9pk5-X*2-eU1#cM7ade?n&(pC@Qg&m%Df zOG>k}=unewDV$v(&7wp!>&r#_qh8%80jXvq#_uLm&h2uOyeeN z@>cXN;N491X;OwAU89>$6<+CI<))-5TsB01$M={^+m#FWtsGWPQh(09Xr8-_K7vRz zmE;?WV|b`dVZ|{(aUu`3>p}_VEDH7j&F_@=7Ph=XwN-`OFhFho=+i@l7eiSxpi@@n zgA@iJ?Kx!D0J&)umOfI5})0F|IlNvn$oP4e1?Yc56^i;Q$A zfaZz41vCJ54l`tj!~(fnH9(Z@bkSh4r~0`p+-6(vf_Pw7;n6>TS{%w#=D8H7%2eSA zi77%}*e|fHWCbGs{JKb9`eamEwp{T4U^;~H)uSE#Z=}e%6zWxJwZ(=Ll^whXw4wD* z*3F{Reu+iOWM7s&dFVIx$a8g{=Q3P6&~>!s#C7G&$|PF=@EcPIM*a%z zL=ei}v9!vgA*gv=Ap6uA)FsN1TKHazvlo{RGfS~g_odA837e>pr%;L_K++dGe=NkA z&=5t$7&Rh5tUR*E=S`;P)b3~hxPJJxDGi-JD!SPh+{f0T}WPEG`=MxgCfB|eTd zGdcV^+RQ2B0n+#iV-S9yLM}F_mU5J4vz06Sm3%J3t87<7nF6LE^QM`g^$IV7C4en+ z56=G+t!<~BYoYFVwRnRoeC61sLoBIvy5Pz8nx%MV3jQebhY+YnKd4H(UiGkQmqT;R zF0hW#WTD1`YIz_p`o>lnt44a z)?Ilw<{`Jz>@HcSQLRZwhR3P=WeA?X;z*F9`Pg}e0sPgFXde=kntwaJe$-m0e{BdV zvVyYifsBG^jy*^fU&tNHU$U7%dvBdf+Nk9M^mIhD8ckUJjX#C-HcqUZt>n{+W3(kO zPd=K7toe=@e}EujWf;TEtoZ&vetyr~0|pvo)Be`7D55O$AU>w+&gu_o3jj(w;&d`{UMnKFK*eiCmYz6sILXyLz6BKr zYPa06+qk zbqj0VV}1N>BIj6^esT|5duQPuLa7uB`iFK$4Ia zJ1tMIGMNw-vKhc4JXWI%V=iRAL;{Jvd_a7Hql0k2!zBnRTPA?qe(%@e#VCE*ibaXK zKJQnWucYb)H}aMTM9l1noKvIw)|0qVHxOol!IB5D2+m< z3O=es*LppZh7vsU-w05S1!3~^Ez5oN%a(7TMk6Rxt_&SGJ`l-iKL8e!?4-t4(xz=r zx0uJpYbreM@di~Cg?djZvy=yxMYmunBJUG#+JrI3$q3Podq(iI zy3}n;qE}7i=R7}WyG~{^^gbm63=^}{?(G; zzq@1Fs)a|Jo8r3z==&45P3XgH*&R7wdN;qjfH{!YH`={M{aGDK_UjWP>JO~_&R@sb zhL$&2MFMi9&~k&tz_OK-aQrT6?>I9`*oZh5#ADpbY7{FMdE8TdK~fv*y)N1>Ql;E+ zTMeF~vbKo#es^0so?KzA43X#%n`*x305na?Cq=B4>g`&FKg%G?X=9JB7R0Ck`t};~s~`8qVYsMtfZ@=m{j9_1VkyF*N&-uF#!4$3hOi?J!*6*1uDrU# z^WwOrIJVIQ;ITG;&&K zj=J$9cbJd)wMyEEvdH+PZ6W$~;*OhfvRc5yo2F0CI5IWn*nIfSN|aXrbZ!%=2gjOr-0I!*vUCazpXFj4A{E<;tc3Y=I7MRfj1zG}3)xO9ETtSj=f! ziMEJvoPYz*V~yrkaWcem6Y@i<3?-&ph~F>c?q;)DJAOKeIWol&Bp0zGXxFmxQY!{5 z=UR%uuU_Jv>2*qnZ(0YoGp@H{rTdz7Q6+ooQgI8nEww0;Q5=7ALvH8mw+nfe76rUN z=p&}Zfwug#ap7^i$<5~?wLfes_58Nc3SVEt&5;n+1#5PDhUMwuhYMLmb5DKDBASr% zOkFC%I#HUWq0vbJ*mGqb_-ro9xj-v7Gw{%XoCS;Pz~XR`fC3BPOidJy!?plDGM{ZH zbAgz$WAqk-9I%^U)=HXkrW27i%nEMln+#^Ml?3UWDwU(OG7EeG90>6`isybvuze>A zy|I>Ak@z}czQvQUXmNC?^-{>EeE_*!d(AA)iSenm$N6uT`Qi7md-ilERT~w%+4Ek} zBfpF|+kqkTYkDI;@BNeX-XGyVz^n~9HMDLf8N>^DJ{Kkl15mmZ)<*{*3S@*ETgqGC znIL)`|MPO4V$E)%ZIezzlTN@$_{HQ+vaUjh&nlN-r)8BR0Jj=K&273vZI(fi`Dz3| zt_x>rlrw3cRm4darAlPe!A&{xi-!atQ8H(b9s)-sjE&l5JMF^bHLzynBf0JDBb5WD zMsxf$Oo97t4C4DZqs#B^zacouP`_WYd28WWR$hkTUHhbZsK45!E@!Cl*|DoH{>|RJ z`F+g$s6l(a{d@7v&y^Rma!E-h$g-0>-+_=^^lg1JF-6^Rc$dz5)04~<9e7=c6;Vwp z;DJ8ywovJjX(#=F^pYuF@)zQd8&STZfy$-HOlVsIfYvBLn1lq-40w3onVue*lV~6{U-0~fU0utrC>K$O(4?7O<)yS z=n|PMTEu>Z>m~R2bhA`P2@1EISY^SW>smY-8Sn_y`4Az z%W<1yBU`F#nxvh5C&c?suFbEd-;3oTHsZOmZ)`a>vT%?rlVx}=es~o|6p2NnOuJi( z*UKbZEG2T}SEgTs8h&p>=zU|AUSYN|E{YkMwI*AHRMxd?0PEe|0Ti ztz7eR269>01F!ujGTmX(X}U-^@ZW=(sOTkkjj1`hW66uB;ha$Yg)mT&eft|#c z1ZhA^-o+@LBS^jF?@@7<9sL-6!dGbGImW{~LL@rEDn*pQ1*e zf6g;+<=c%_*B>o@X4tO7R_Z=f*RK=CqkzTD zuW^6<<-iQy#SOfJ3Lc>_j{L7);X{R(BOuej1F@@C;Bjt+=KrU-(j73sYI4DZFjhA!4PB@KMI(DlN7RPY2S&4S@HQ96j0S z#zVZ=7SGPFo%@={A6XJkKrxzcJ07^5|NO~D`kv)>FX|V>x$S1?Gf>W4W#EVsTh7G2 zpGM68-L!PNNqi&ajYs00s2MUEl`2_|=S-X|FVn4ali;@MF3 zp(K5TgX+}?%NQr{j#bQW#;=bAs8irtTo-vtwUb)L|BZ~8r};$Km5+ZZL$0}PqP%`Xf9+`CucGNK z?DXd98TTSzuN{9xj4L5Nd%OI0l{Ne1QO}dxzVOV4XH1hu`Uo{Vg6cJz!*3?H%B?xn z2FTJKUY2@Pq6{$T$kZ_)i$@;Up?MG4{CS zac1al8F^gqpyD$}1b#IN5D5?PK(>_cD^j{m0&Btmb|PYKtCHm|_XyBGnzzWl&DGUw z6>ov8J-vFFDe6*W#=eE`kQY~?cOU&ychqg>x~G4$Z_7x`&#H{NmE#Bdi6 zx;g6fn>XQ(Jdv3{)2)C`5>j82bTcfua_en;n*uqEIki#oX$9`R*&lzgXKuSUgER(^ z+~K+!=f3Rs^7v4yC!ZOt7k-}RG1Vw7pT9yZA_ppiQbYY}5T3{`RwW;&hUV6U2EyRn zG2V(*%a&*8NNT|HnIb4F)OT{6WL#ts3|AFGEgxl|PzO>4-dUl6s#9)#dUftS)^zn$ z1dq3IRd~L=)K3+5n9Z1HLVoY9UC!JncXL|2;+0k9#szg-xriX6()onjp^NYIU2dC_ zjxRlEa^$+(waOWGAbB!_I06twrwXC(-I7YdYf%>>wNH7 zx!iW@w(oa`nKaIq=K|XJM@#oA(iG@6u4f7%s{8C?BYZYZ;4M;m$0D30d>FNh{Rp0W z2>AqeVW-cljOT}q=>Ta`w>#;khY9jb3?(sllgW?;mk*~{_XRrIM=IDO0yg#DC`TqR zmGYa82mzFQOk`|HCUJ9HfZc{v^pt-8WkfK~bIHyL?26;K zo_DyLZuo?3{dB6`dF|Um&+*;sh^EMY$9Q)R?(Dho$pVqXvJB(3BtqVy09Q? zh#4lB@~sfEdDjzjL!b35{Ik^N`iG|$hKLkuRMvF2IQ`vD+MbuorLP`*U_CIrY?Pr- z=fdpNz(@l2e(1sXV`FKauV3WWzx;v=y@2cC!zvcK9aPLQZyF@pHd=AKwW7CpJ{arntqFaULad7m0R}@==HR z-H!?e%<`9qoV&w3&rkDhvyJ23tJV?50c&&9bD<8aLbp7A@iqAgVno+xr_&I4#NyUc zHGrm`PdmpWjO{1B9jE>1`Sbl>H^z4tH8~==y0G!ph`qxZl~l%q^GE8~H>b?@d!Aj? z;8m9pT5L?_~P;P=Cak_X7@}qJkeRGE*yyL`G8hX z{k)wb1>5&RZo`72N6?NVilSHkMN7L9hc<2x@30pdlPkQb?o*1n#^DjUgA~k4FrYx)f^RX^c5XHczb*>5aCO&ywM~zcM3@0p5at-pL#SKnJqCxUvAbK%MZ8 zXQm~k$@NMH03El>9@&M;mwQJly?vDIZGVj(hI#dX)8BfQy6YQWP(bBbjtGk8BKG-C z#(EI1OPnAw@66&OVW_f2dEG9DR%ULqU_4k<0}X~5?_MXLoe^?r$@&FL69O^0KhP$A z)M`Xs@};`Hfx3e~Ht&*lEj#=ANxR26{g$nsgugR>Q~4GKdD!1_xKWpN`ofWaHaG5X z^L*CT^+ECL;@>y_?9x0{XfRhihP&86*TlDFJ{|#8DhO_=CJ{i({8!hmbxaWN2|ajg z{1-d;=+SZ0GL-4(I6Eca3097;l%+wnH)S0ZCGF@V5Vy1_T`ro-D>RIsK3i@sjc zTs`LrQ|}Xvs0f6wg!X3hjb9~2^r~q$ssbl0G(*}7FN0O=;(4>>j@&<6i&)orw}k(` zUho>A&HrLQ2GU?$G`0&p!aw~K@(Dm#7(eH)T7bmr$j{`CKB2x7ZR}ylB1?q)sAo^x z3EXNRQUUF+$2^n@6SnGzhzi;a%k8;*d$PXwG!T(|l=6Ti_1frio0ibtzdGz|I~{6FXrua{rY<7 zQm)JV?Y||rRrGM;V&6;?)eU-EVH33Vd*aO4aq`iKCeh=Q8_iY>wG8J+>_zW88GC9) z^XQ$ER*yUc3;nWWUg_#X{4KP@DO6lJiC^82NSk27iF*6!)mkCvwE5ZWrEGV9Nn{W46kcJ#8IKmf8qF z%-lNPyBlV0_E}f8oJ029ra)jQ)Bus>S*Pli+3xVwOfYEUhlexRby`{OKT51Q!#z&T zs<4{X*D(TEJh`y*6({Mc>+YksbL{#De$jFp4152HO?l^XK=R~m%b^#Boil=p@R_ZG zN~h-9=DI5j_NV1HhMml8Z3{b?8S&taE=V;2BsQf3) zWWOtf0v&X1I@jb$pCqeFXQUIHu)(z6QIO!HW@(+AxEkN9?{j(R`Q2MmRTf;WPKCwO zbS3j#6W^22Z)q&WJnJ>1xOo#&2>=vGO2~7sjCWMb&6=|oJq=O|fGYwU(eBI0`mLV= zaO?tLFx=(($E%;O9Q&xDdi}LKrkJo$j)Hq_?$>0v=bDG?df#DwIOiU{$8Ph>6Z_`x z`E&x)3?%G%w=3XKZW~exz=*;P0JlR92BYRb-AshU*^i`^!d3(GezyuoTI< zYwSjRTha7#@9fKOT#J38MK1d!SC@Db&0`kN>6cB%Cv@5O~DLA-ARdc~9xtrp| zS@S8Xu@t@f+b2)W>Cr{|!~V-y%=f4&{%2tAQrjs*WBT-u-nRQ8XL`1LtIzAY=vY{Z zTyZsir}l+(=lnzkJdw2R7#yRX8BY-wU41xxp}m@)1v^3?8YKQ&3&HxPHSrI0w;Z@+ z>yj*;ls=+ESxe!1^|4u!ohGRAH3~s100^&hL4-s@ek2X^K4^j6!rJAzx=U|^ zpMy(C45cv0GC;_jYd0xDGmJcxCl_HKtS8OxL%8KLZ8*=$?1nXAflYEH&qrxdIF6v8 z1_<+7KdtML^O`=TgxslFig^8NcR|-0H9qdPmsuP*6!cnM;)9$w(Glu)_TsKz531~* zFfoA_=j;}jdIj&G!YCpwYxF}9Rfjw<5BBx#ECO-Qo z8J7U_8I2+MQWoS&&A+Q)fE!({5TI%xOrwM6Hy@{)kaAH&kgzZpPfX*gP>+xjtCPCWmb zEtKC)VYc#|mly?6qFV!zKhP z$nrWqp@eMhB7@s~Izt_x(OYIPxMeeJCFnGj^2TN4UUIivWVKf&cMYyBhlf(X8exvG z$W?#}E}}2}_-52%i|(u=Qco_8G*vUw58ouE#QZH=$nx_w;Qho6#9%(f2)v&0`Oxa- zPc|Vmy&>inNCeFT>{^69fiEm{1)YQDf*&*EX80^`pZ1D>deYi!m)i>B{cpA}Wa|_E zJV=BIQAj&cV_{-vz^#ltt^01k@2!>XQ*1=NlM6ih;g+%j^yXD%K-0{d(^sy-hCj75 z%*DZHr(iq}pmA#*e)C{Wxr;Oy`N-Z64rHe3&xcdlx@%V5bn3EG<(gTn8W z+dY#*<}!Fqxi;ZA64Vl{(4|l!ANj#;uq^;Aq%y>`YuyrbBvVkkMFKk6K^V9E#smje z2llrJp-eBa)sV|m141cVrYg4lmwo^<+(_S&z9Q~Q9l~y0Vis`s7!$H@Snwi$prW=( zz+3R*pXu0nvoy1P?nGrA{?mWxRo&_x?}i#cb9=_X)TH-O`6P6Nv0U3AbQw%F!wDA6 z++d~W?D zQ_{Jfn92ebAtfBLOlO++V{iG;SY;X0KCg~BU5{LAXNuZXT>R9%j&BK*j6+98as3kr zU(8~|*Bko>X0JGZ_qg?HV&DFwZw;5f-A&&9@58^HhRWW@gdWasnzj9Rg#V6!-O=Uo zBZBwmr<>2W@B6U!d;HC{n5~chWNb>_TsLy-EZVJ5XNqh6;;B(<{(@TmDox4)*Kuxt zf&iF<_{>x=!NlOpEsGfZ_nu$Jgf_B#56n>OmGglE0w+G_n?;6aW}} zBlVgNStsdi?<;oTlN*5lqv$;2l5GDz%$|aX3s-^LoVhhEoVmk2(!#wJ&7Bn}u3VVg z7S7bn(A3n-aO5Z}bERe7SX!D^X5QV}@c;0<;0-UY^TWk?{?70BIPi#C91*zZMk(I0 z`SzLrn;O%ex{Y;Msl}S%e(}qIe5jc}p2OcN`y(9~IDw2M0=$6sWMJ$d@`^ObO?l&% zT^2Nmj*(?F$7_62Q=i>YIzH+z35r@DCQ0s6{tT~3{LvD8eCm&r|KEg3mCgW*whZIMEZovl=!8>U)Qhz-eBV^0D1EM$e#f~5D?d8 zCgbFfbRywFj!i+R<>&U`w1_KWveQq%gUwCwJiFgrnzIK_hK{R&B%92*AOYN8j;#8Q ztojvm8mX({eOv2_oCQK^@pI;C#G-??%m8r91uyKVt>D+$PF5B#w(AsGMp&7=9mHeHl5b_I>k7jKaeegJT6-GYsj$T50+n z>5DoF#~g%XYtgIfYC)|#XAqp-45e7*uQ-ms$4lUjtlGM&pavU>7!vtI1_Ow1GVebN*KtTK- zGW??&j!$i%PK7Ct3>+eYKXOIbM4SzS&;Hebkl`8ML@tLOb#;0;(46S?)kKVI0*NQe zV9vddtTmqf^?ao?zAVxtdfjPOu$rV-X~qxLNnBpBDcSYrWSl0%$)B5jrRKai(cV7s zTxx=}cx6_KQzq@wDV=?wv#5;_A0->dUdc+c(G;L4*_LRgX*YRAoYA*eyFo=NZX?C0 z$eB9KfjLkN5V%7p|2LexhXjZ4AZc8586D@eg3~`DQo)7nOo)LPdbwP|Dl&vmEBwj3 zziTdNXN?1`;3RboesOaIs2WEaHFW?+8C(%q;IjlWEQRW~(1_$SSa(YVLNx?}i-`5G z2oJ=?_xT9fTAh1$XSO5V$A%u6f0?bT<&GBXxx4D+@7ml zv*Fe}fE{%eVg}3B#>EVY5NmPrLwJW;{IwwJ)ek=nDXwg;h#RiYE<2TvNwon&OLB$e zTf))9O~bS+gbV=%q;;f|X$IRXvFpsZXxsCy3 zw1yedAsM+E4PA^w7G4OIM?=F}HW7g3$dU?VXyyn-ySOs?g}ZLg8x%Un#A0pc|o z4(eQ!l+_N7(BA)iAxvZNy=r9jP-F#aeQf3TpqSj`&kN`RmFS=Tmz_&6Xn{wjSY{)wfg za)Q1FQP2sf4k3jFz7X66;Cuw}z3iNiwt`VZ5;kT?fWKDdeQSqWTo4g-F`sgtESN-1 zM)?a8fwl=;0HhWdA_o#v$q@?&dBG<%>aRbK1dx+(2$GuG!G9nu180KGW?O65T3dN{ zMZ7Ay>x+2Y{IRyGHd6OJ78y0CrhTzd@8TD)iyM`MErUzqBy2m@piupP4ZE8IT_dO^ z`*Y8we)ialFU2~$oGJey#d2JN(4H=~pVb5u*a=blCi}!r0Zj;+O)oIAW;k;*L6avU zPG*8z)Kh-|_PNw)a;TOL zTx})&U-%!pdxDzyi{n3w){-UGD@7OeFMR-b9T?hNIU&%Vfl)Z@O?x|UZ|dsT=N2o` zUt4mIk&juztq9i!0sh*>-EA=zR{|RBHAs^IL_pKn2LFWwTc(HAlF@RtWk2XR7em31 zR1eJx^Z-?G&k`r^kMLk?*l=N%r;@a&a2s-<0|4#-_$P%9xflsnV?(@N<~4eQh`i#1 zi7>^+rrczp6G;MQrk=oaz3QL(i+dNwQ_|tjuWhx_0lwLEXac?cwm9k!3gb<_@!dgA zUi;5#uWY%$#9klggF$f#sbjyl_#fgc`MyQvm$4Elwa{0w4@DRX#`mEA`N^ID*Jpqz~c8)H87AVeO&Osw0b<2Y!yzkq>1Y#gL^ zKFX3RLIwx$@P}$~!&F!b9j90er*Y9ZGr=@|^mq}={)M$Z3BIveOkjhQ0WLkie%?Ue zhO2<(zYr3M;CBJY-Fp%c5=AORbITn3>;1r}R~&(XdriczC!BCD%4%DHUH-Ra;I6K% z`pwNn4*WFV_H+b(3E?YRmk`IX`qDCbWlB1%jbrSmqeWX_*Uhj6g zb0)gt%+rANOn94t;0-de?G=EpzhRD2-mf7cwQ2d|f=>S5aNy(7M0f@je$g3`O(x_K z5jJ#fYXCI*pq76y6uIUvHO%M5@=^Vas>UI)m|A!ZdD12-shR*dq>xGqHZH37cRQ&$ zDui-c$$|`tXmFnvY{uyV>|!E7TOgV0syL}w;AI22_{tlgG1trOne?%3|#nkqh$!NWqKl*0$chVwxLM){7%E{k~ zFXu9(zU9dNRJSJJti3b@=tfs8;=h@L^x3do^Ye*BOa&dfmJnm8c;{YpU*4^lc?5D1LppAmq%u0I=+kpcp{? zK#Uw*L(=e|-g$cRn7GQLzo19|`;+w=ch`{zlI1f(sS$F1aQPvz0u8|*(}J3BqYj&_ zu6arr%ShN9%vxMvGoaXsdEeP?>ABi@nSIQ^VrK*m&p4WVI`r!93QhjF! zbSFpgB&_Vah&jVjkR+%$o^tP5%7buoC=Udonu`(vMKZvx3;YoMxYVf%IWl*hS4C`adl|lpznqrc2%+s)dLQ%JVlo8a}3H^gg^IR16U)hY$XJxv*;b zcUC36nCrfv>Q&^y%PYzYZ9?K_OZH*Y0Hm)VRRHaNu2vDj(|;3R%4a@}P@u zZ*2V~f97ZNA{io8R0zWC?UIZT;C;nCgs3b**DGKY3Ku;@CT?>X5F}2~GKKyfmfoyNK)_JK%?rXFK?FO*X9mV&%No{^6p;i?NCqd zX__Gn{1zo+eNY+_%D>VrGpRTzSP6ykII2vnf()bIdtX&8#N$7bOnR^!AIQnJ>* z6z&i`7j;OP#^Z}JpM3;hY#fDd!;|1n)DDK$@v1Gw0r`dNdZ4CE=Q;V1stK7n&pJ&XIQ} z=l%iz4JHaxB-}VNU^Na6h?e3(pdu%ho(!Xej|&%SKZ#kd%^^Bj3246z=yvz?t=${* zQf*iEq(%-kVhyz3>3uyDwUHuju-xla*_XP}-EOcHrBirFfwDjdc0Ddu7Niv3vkJ4f z{Jx2Qv~%^P4aibNswT81JOqR|AQH{M!ek0XfMN1US)(+aF$6sApgpM|n{pH>>sv;# zQSYYtlb?3)Qb4lvhQgo0Cd4#**`6q4b6{$^Xx@c zOs;u{xJWON283x_OQ$-hR{Sde}CNitm6=Q!aAW!+C=(v}A+k~vZ#o8=SqZ0>< z$)_)j6})iP zr0g)t5b*4lv%SVa<($hCb;D*QGw9J?I;g$r`V&fIuZHPIx_YBL(58nEr$7m>pS@&D zHtX#cbs(F|6D^Nc4fS-3$}tW{5WB1`@8Dm4QGM-W@y=_v;;@jlOsJ4}-`TC5%Uz!%>{2tpy8t0CZU!O^2dlAS!_RVukytkKDF37=X2`(*q-TN) z;so++AQC@$I)|8cm-KVZUJ?OPNE?WQFmU#)7J2GL1m`&yggOWN%$bknDcTrB`)y}@4v>Bj<0hU6(2&`bt@H~R6(62PXy$D; ze)w~xFq}0Ex$#CCZjP7f;`)wy5-&+|E%iFh$Nf{03fIUqKsAsBAtgV zW(UX!52=_+XBfLLgNf7CxmwpsQxbRIA_rLr%M>6yNP2|uQwv5G0pYb-^9K=>%To$8P_@deGu=Z-h(%<6{b=lY+q3dXGI20Lx_E%Td2}BwUZ5uM1!?$kt32x@v-m+*i1z5HAn@b>%OpK>|%6>wtNc zl}9)kb1(%wqHEOd?U)N9;&IuTYANWrxvN+f-AZ$POn;%VeYC_v=EN6g9_VEK>32}S zynE;mOv|eEi}h0T94KCG+YKvkhPuJCR5awP$8tSUwkfS4+H_#02@y2IY)aRPX8_fR z%7VN~kVol}S`0bNplU?Ciw-8ay_bB&g597wqpxg;9&p^}Hff&`UVU}KLRY9(8LC4l z7oM!tRux2$MrWX=*f4G7IDsGqDg7bEwv4tYOK7#XrBj&#n&skNQvEjQ@&ahXaS~dIg7}2ROrIB`-5mY&;*NbDZx25I88L3k;e|Y%`ZekNg?Nf~E z7~{6M{9{U_fo9gdl#yefPEA)2jIYwm0J=8|9nQTMpB`d=VdWfOl;^vlEDnQ)z~Cyb zVCDLdfP+)Up)fPFDaBlTjFxA+SSR8xouzEKQvKC5RIHCCp%TH!k650PxDfw}?T?j2lEx#HULBzo;9R4=Pj*Q>BF@pr`&YEwSyepLI1cQe*u zNO5YAe0^6!z0|_Al~^4$+u+60vu1ME8&QpS+FF0AUYbmrY3$HXe4`;UrZcg8yey?k z7GJ;tkDa44OgZNGv+F=Hx93@!GzRAkArrsnjXrI7MBLzRmPWHbCfvqCe4cw;qphQ5 z^@17FlnXp@o_~KM!a!K^%uCOiszk!SEZ2 zZ#b!&orW(T8ypmj*as@6n#SAT-!;6%5X*@RIS9ebv6AXeruFmCb7agQ71NlL#>Z-Y zoOkn01j0JbNJAaX=nA5rU?n(I2@ZKm3!Jp=-$TYe=evGL(n;<}PLdAa*rPy1_Rvs$ zj|5#Ap3`mvP4GyUg>zc!?O{dlq)8ehsb9BsG7IuS;d1Z5e zX{MnPJ|Z3W+!RO>a3SKh72cQ!&)~r_S@42scq$GVLkFM3WxwJHG>}oHMARN4VTnRb z(*_f0h#V@SR*O|jXEoqZ21=+}Dx$#yZ3r;&p`zc=#eVKd_s|eo#B)%gGH?BdETENUZX;tTp)nyaX%o)JPVU}bbuVToi7fUK8T-8PzQUNS2mY_Ea99~mz^#4Rdoj_^jK=hFmbcZcU z5Q(TGVm@=R_i-?TZU0~0S5)Xy0z_S3r6?w!>Yi!*y&GaGk!BDsoE(a*;395mAwoVO zWBw~8OCX<7k(b$J@gwj>e12~t#91PvJ1k$51)@}do==CrzIDNTJYs=cu5RdNu@ii$ zM0|f=yt+~>G^B8qt?`y!nO=>&zKvc8tGrx_O{uOt?;UE3bBv$Ef7Wxnlr3RQEKBvE zK-oYcEpWiPKqAf256r?g8*iubZzqpI~`pJ0%#ZK*-EfZyvz&mlx17ctI7c+P|* z{B$d!p_$w8m8GWiZCET0{_DxzaCT7OQP&79S26$$%fF$*A4fgCu|&F%{`!`8DrN!D z{Dw{l`e-=Rt#RJ9`L`k=g2Y~)PCdVbZ02CUPzAqm%|25r^D|o@{gna}Kt(fSxRx!N z)++U#k2wwPc$I7NMv6aM(xxEa5Ha0aR}GR;-9$`UJp3#NEUy*cK&xrcM`Ti9d0MQN zmhM}eQ;#~}IXr$~CGuyAT$1WP?G4H7};h_8Y-CT}G7 z-_WF@+IXC6+2t3WMEIx#e;qRy+w3?ZpULmmhP<>ZL4_(lz#i}fw>YVAixUY#Lc}V9^Cb{VQV_t$H~~Na0KX7*g^ZoSHT+ga*`G(~&Vm*B*)jm) zUU0T?=1dE)xJ~=NnhwgYs+XfVuRHt5 zfRVLSj-LCi+6Nr;?Hi>OWI2!XweQxp%1y{v`IQeb7l98Boo}XQ-!$=hP*aJ8{z5Bl z3VtD1E-Ye}CfMj-=nwxIndtkdA3ZxSq6pb;Sh#J8J<}|6J5=XoXaJwA z%7Ob6PnFVHU|SSVJf}z&<@0o$PQK{Hh1gL+cGEJ?(pWu96^l63eHOZ&jtHfNlAQ0~ zDFFx*@lo21YKl@#L%^Wv(zjZIle~8I?)I(zp*sbojH^g#BH|ei87svQKNkjxE*y*{HwILi;JTzlST(ycs?Z!0b#O^uO80wC9gzt@s)nKKNg1$P(8~ z?l64vccEI;{*ea(K=j6VIR5G$TVU=ILu@y#<_p%TdvZ-4;D62Jv$P!LgSx6RYe znWv$0S#6ED22?cyW`t@Wv+qwodiS&&ED-@VLb3q4HQp>(9&3(`{Y=GVv(YnJ5rK5z zZD5}G<2$`80lof>w`B3guCj=bM^NynZiNXPko-e;dULw+(yV-$h=_5^l}BfQgzJ7^9R>s=F&&qLd1)VHTJ_( zLt)s6j#Rm;$S&OD1eMA`9(se$e07jgnDXqoqq4-_%X=StodwRlz!CvL%Z_$#z>D0m zeW}5cht+$CL2MoiErYs+yV^f3a342uD;y!Ovy|q`YV2e6PgiWv5U?h6HVY9zk;Foe z#td5?skjt2BdOj2oZraXoEF@o3~gQ(AoM0nO(7=fs0+5UxN8CxxW4<>o~lg^Kln3z z5-@W8H|XZ+`TG1@n|(Jjy%5C!WCdA!irNo{b-XrMu$VIE8aDR_ReX6be%AZJ`+&kZ z4z|fNZPNkWOh%_%e+4+i8{B^72ql*J11>1OIOe8!VL|F)ha!q+JiHIQ$jh%#xh>Gg z|C2`4(yGxL5{T;RxOCJ(&9e!Z%0$D)?dr+*+2s0L(*lvVF||a@A`x@122Pm1gT)0V1tUPslin8vu8pcCSUpMs&$en6`%2@ ziUdpii0q#}!1mKF=*5+O-7C7qS>JzK$({hF(bs6yIodWXkvpe;4R-3nTOEh;H_=ln zY*5{APznz&Z;<<5<~pH5W{UVJ;oU9sbj)DmCnJO}7qM+MpAz9Ow$xi#pUloaKws7n z7@o%B{-ON_nEARlZ&k?fgl#_k@niHs z`$!u{z;x=?>-*o1EPnf1@##Crh;Tl{*W&^4DdsI1bBlHz{ux>J3tjrZZKc!utmjPf z2EK0oo};T{bIFkt4-}znMd4Y=1TD<#X#ok1280AEt!-T9tAv^cvSgbNxJNLF2=}Ws zIplh^ueWu4SO%?V?rrR$XBa!ZT{$)&UiMm>h17U^K>N8d^f_Q~C=7D{akJKb>7(_c zgKbC*`{uS=Dg~9rh0`c-f9Ib8{(aWlYYXaAu-|=Ip&x0*|BFBIU&7swJoR_S1aNIt z9i8seOJ-s7*KBWlVBY9OzwM0v;PF!Gem>!0s$AvQHwHy(Pof=OMF;AK*EXaaJXvii zwfB4!^b4;zReATUrjG3jhcuD_6caFE@pxtoxNRxQu$4^*11U%ZfJZi)hsQUC&n8}W zYrfksn5``m;iBA?jg`2(J8_3?QL<=_>i#)>%OdsP&yVxhDSj@``deI;3a{&h=i-2} z9b7XI6opXs0_=ZZgJ5xCA+2O?mVonA+Z6M%hf?*Uu=bvPbh(nw%{nhGD|AM|nD&ml zf>C-Ft9LTvHV`j8Aw)jGkB2LNEQw^58{X&y*2`Wnef#CcGAlx6$G*uGstkg+Oc`fK|` z34gHrwEf}nqma9n1rC;XoOkh1@u)eK@#^9Dypj&M?5qnwy?e@m0nvYO3=YigUHtt~mnW2oFlFL2@LYOn?z`oV0^IxIj&FAKfPo8B5 zy4Wc_94)a&r8eN~)moSXjG5D?B>!vegI$g8+{mum_j$W67yD!>L_WrDN6|gfX@w4+ z@rhS4qaORNXO7jLAk#ru+>lT2bEu?XYyVJ%&I5NOH zU!yy|prM#JN6i#UL8dZGED#a~B^KL%^KH6+{@tvITvUNqMnnjyijxy~E)2XurT`Fz zpKu6xW?j%q*B0l~LqW5<%u!g}SX14ZnYmq-n+UELqq1 z$~$Cm!598zD)f-%{9oOK^#UxwFdEk(=0}p6WsDG8?6D9UKnK99XxY*cPPLJEGE2)W zPZ>f|IN2>L#4HqZ^ZIaKf0>qvF@SU&7--6UM=6w;jS?&h$iGC_DwO9tpyIv>eQOmK zK?JV-vpOpPL%3spDi%;zIOTwyZaWwKF*A5DNQKSgWz?=Wj|v`71wThz%@l5*wkXxv zg%QLq%!RUDSr$c_ys-bF>;AU~9(zAsGp(N7W_V+i*1_=m(n_8RJZ;~A%R)TCC|gKe&gs=tS2sha0F(hB6c+%;QEBcfbX=7gHasOb zK1d5Fb|_RsdVUR;W@CX~;K20$P@w9F3@O>8xes?YHB+@1NWR=e>1~}Pb23Yp20jH71l6dzucE1^|uR-V3GkGGhWh zD(Ml%4abo;YtzYK!XDN6Jd-OQ?9p7`I<&y0hr=Movl)@=w4YK(O6^s?3}9VkMqf++*WST0lWbP5fC z;A-bD4UagFh-k`Y5jx2zaxM^;AY|zW^Y7E&qhN0Q$SmHWYREUaYGyY#VNj!tE-UAsI-VUg+7OORTZ8)t%xRE$4=Dfs0Mpe}@qU5NFI~x~IH#a;~|&`PuVE zS%gvkbx9Q+7lC}N-Zq3y3=$H`LW~ml2u*y^%bOsD=utwpbzZq`%medM5ty0Mt20`a zA!z`JGB+oP@?J1~xll4hQQ0iJjFM+3Z~IK?y0xA*)QNiY_rj~=nq3LUmiH|VNLvOs zcCL+!-E=F~9eC>6>iwnczBY1UIJ=~HZ(LG2JXg2*`@10d4-flUchg&53Pc!!#qURs zeVKO6_bICrX<$KS70nQq#3~$9*^;!&&M~$05=`g?%b?TL2)ayX zuv-AoB9m~Gq&vjq9O;G19g)U^>FeJNhtqj~;+hv}@?D^K8VXj5>5o?r_L!*2p`TN`G zfM!o+*il3`H($`~kH6F&5n(0}?)&t4KFJA)aB;{$O0SEnI0+pK-D3>

1ok+)NpS z8R8^vSg>dtEH%x8Z?+E!rfdhWTPG@-ckpKBT_TOMmbB4H~5-hzLqA=Q) zYgOibo^b>lFYUCPTv=R_P*iq3j|?#h9YFYc-Au?&(*4z!6~5-3t;DAj{m)E}3^Y%C zsySoV@Low~jiYNg2*f(w0!t~Z=r+#^m7Kb_UzZ`&SM0vSHNBe+vrmamp$KyBdz_atqwk5V)o|Gd*K*};d14&G zDar2J^*ZJ|m6!iLf*;>5WJ6Y|iLD(mqZ8Pq$h13N|)HJ_@Nz1Y=UT4VReGf2zx8th9@~qb+0>r#Z?laKzw| zTKLC)`!j<}Ia3VNtqZx9^4l{PXIluku-FVFfOL@CX$VaVfS=3FwqrkVVL@yS$vcK* zcse)0kQ>0gX{XGW62b(0vqKljSb>~SL{1Q3C@3X+Pa77PdpCdxLt73^xk4sSL!SQ5 ze7C3mO{o2PUa!7A&>&F6I8bCgg^`aoFBmag_yDT-th0~os@@T~RW0+@Rq!VZ_K!~E z$w&o{lCINcg9q!`u{QAdX_)3}m*y5CzPCKSIaj@`t1(caR&~U4N$~D|Zi9t!JQwC` z2)&&Tnpgq}>brfRea~F_sF6iVyRybQPiI4;C z%7Pa3_$U`6%E;#VEyofnGxaY)F71%#IAp zJO?2-^uFa4el;q}DK*HujXjx%{p?kiFAO4Cnwmtm|D43wF;pBoAO7iRSxU_={e5#= zyV1R6*nLPaX1#cyhbSPzljPyQCu%%4ig)ecF7suY2i10Kz8q7CXY1`8gLXXy z$*(hxuL6#G;c>3$6LgU5d+@M%kCps`2U-sXh?&Y>*AH<3!qZJpuE5R_$&PIhR~Ezt z#{oOQ;JQ%wt_b`Z`HzYfS{LTgHt9Jq>7@(vOt%V5ft_}EEMfvX8^yGyK~55Mo}SP2 zzgGPXiCsy%|G}>PVBM>ME(|(^yDvk`kV6a?d;pd2WZ0Hy)NK?sFd#KgG?w4AH#1-^ z-ygb2-u*6nJR#i(;$!jEm7PyMsX2ux;lUEuv(uJyOL&NHZ(ywTK|TmIaXR~0dWkCs z+L7zb%AI@2D3{)oB~gx!{eyX@-`;$DigwV ztk8v?LOw=LIle8k4fM*c5rAJGuw7}J6xJOI(1HalS_N<*XVeDjd?C!+CXLe8|KLSl zMi%Y(k6Bee+4&Bn$z`-Lt?vk!t1tFOFf-RX$LecM%{I#%`3NMNuK!gJ-xrapj}-L` zC$cyP_8vz@&i#3DS!=L}`$DH-(2L~oN6W#xzLZ~ zRQ_YoGas&n1r8djoKI zngdK2GQ(d7KNDDO!t;88u@hC#-~Cd*sv;uY@WhJ2klJR*^tz~t8pz^K6}&OL`^QDj z@~R!{SqPKVKD0?#lh$E9hZ7{&36u_}js>gz?d9 zeQ72Z23wM!(I-k+<)cPfu59Cow2}lN@quRPE_Ob}Yt(K&|1lmK!O4!~!J@dZ&I6i$ z6cIip2@fGoLZarEf+pcnw&rp$^L$(T-j~wz&WGHy{erCy{heG@bFm9^s*8fGcMUM} z!HR2`7n{v5GZ`)E*dyxg(j1`LLl@a~#+~IS8qahL7Y+YATC;@K{rz3UA`t4EVjgOf z%f!L&FJ~Lw89ufK(^f)Tc&9i=mEFIt|&ry|-NvqmPf|ckbm| zsk!&(qGep_62kS9T8}36XGy3yhwt&zusP@G*m%e3o# z>HpShwaw~kn=R?@q_oV!88wrWEE6W$`nZ=zNVk)@vwU8?OUkhZ=?TU`H zNil^t*Q;Xg=@T~I5RlGY2w&V&x6Tx=1X4a1m!|Og7lXht9Z_0ysx0BEQnMg0`#zA3c9Fv;iVYFwZ z!M5e2%K`Lxn7ppjlLxjuAA1eM(eunld~wLfAfZDmfd`0%nKf@w4}UT2YpdK$x1!8i z{(}!&AR8B51KyN|y^~mENc1vfrq@*GJ+@6UXcq5cE>v^cMD{1keqLk#>43g2mR{m`CvOTySuf|BD|3*>XiiH4N!#f2oNT zzpubE7k!$o-rbq%Ek}9Qc?T%=sJwOcR1te~ko@)?Y-YT`#=n%&G?Y0|`ZH|&YuKraTLFHMz~~jGeehE7jDbz&vx^&E z+2QXeRwQ~iqz8e1@4~6?7;}p6;+{Uy2yT&c7=Hc1$DT`0@Pqjx;7q{DBNw|Ojp{xd?sp)A?=}ZRpcT4W-Z1H?{K1=}9#N02VMQJ*uk;qileTVrv9j8Z zt!s#STH)sr68yT+>5T#waUz2b0Fd?}2yOoLgc|!_<5 zs3_?iRX5gAH_}s*S2(Jss-vu;udAV>tD|G0r)^@aXL9VAp`Nk&Q9TRz^q6 z^>i#v^lXpoS(qNPJATYc--u$M8fbc0(gCdLh&&REcC`_WbCNu1q3UI=Y-OfrYp!o) zW@2x7%*oco-d5My%GBBJxTlSQhr2b_@0KqIoz(2Kt5vd8S96P73u*Id_^CX6NY@;@}Y(<`?7>l}n|MZY}F-xZ2%zwZFZlueEZZ<9c6bRc}|`mCDA(8+EM>jomHv1NZ9h zclEWmbyYWYk9A!c?QiAvXFnV3__Tbpvv2(Ro#CN}BR!+@15+Pf&A)j%IX%1d>HWJ8 zZ&rV-uYLZs`E&Ef-@gY3KmYsv|D0T9aosCa5PE&`M&F?8bQ_{cBYiP*|DOEpl zK1S&Ek58xAHkq=|Y)kVT6)n4M^z1~N-EP@jH*S30uqmcw(Rk7CNP_aBQrN>o-4gDt ziyglGqt|DHi{3tn@1IzDYUL!PVFeN=nUChOMovU6=B|LCmr)|}Hc47E-jJ!kR%A$u zl0U&1C#cWNJt|;TAgTJ=w(nQ{Sc#Nt(!T5)bo7<`vA5GDbQHy}$0shNXvEsqWm5xfkgVO1%B*Ys{U4$h8_bw79xcLD82D%k|QGGjZ)};qL}w ze;VYSt5qHUdSG<^J2Xd_#;0FUt#NUSLgs&HU`&B&_HYmu=T=Rw&N^xxDtSmmGPGWA z^)Ufm9`Z80J=)FLhS)L@5PiR~?2WzZcWb&ZxXoNwDi`)8m&+%+WI)R+U)OFdq^c>1dz8ctS-$c+ zc=obV+IMwh^ep2`_~=b@=~^$$(NW$av&5D~K2#pvyq|5uc`WD7U9gm z$M@FPixTiLozE6r3YrE-U1Ih3Q(VOlj{WTtE`Id3!Iu&NERS?|GV^?Uj^cjHFUcaf zx{UKye8U&t2N4 z^85J8)sXVwo|O>MIK;+a*%g>jo@n9Gm)rm*33l>qZbRL$lERjjeNTz8lwEV(>yNF+ zK+sLv@XG3_lhpZTX8Mc9SLF%wM<0lKtzYPCJnhwle3!zF5MSt?)qmwK>=cuH{j?XP zFzdAY!mW?urX{Oy#!SQQZ(V=?IxqAFLxxb5fqu*LfZ7RnDC0+{@>}?W=ziOQ*2ikKlEQ5|GRSMEa-WJvx|~pGxiR@@XSvtS z%6#eFdG*-w;FJKP@HzK4Sa22@HV4Db!-|X zJ4uoUx6>BTBjTfg!HhfWF$Z?w5iusgu+q{Y{Qgn~Z%kY`dTG_rhgOV%@l|5uf4t~_+=s;7MutqWa=~Gd74Ow{v1i_r+r#N?wh=H<*SQRxr(x+a{O%N zkvxCfa5c-h-K*W_

B4W@~+9fZf&xSIrf4M&#THP!bjsnRhPqk z6MJWGm_$1zwVunLi9$9Su7%lrTmcdPTEGWaCp8(sPY7{c6JfC@M}N48pk>NQA19|t zvzxC3lt2de+#Axa9egf3`L*dj6LVDYRF%-ai@R`ROT_6XwR!il8|426M}{7rot(gV zIo#aZsTy-|n$2F5@B0IBv>6dA9bJT|$>ZuD{c zc}Imtjy$3t-}Ogl+>V;`F1IcrIrVpY?EL&`r{VzGplHDejJMGf448$?^hEGvl?r82RvEFW> zr5*AoEG6|QCzTKUzaNsTp3tRU2ThI;4rTSUIN7y6nlDpqc?ETfwiHNJZ7&=(e)3Tp z8g4SP@c7#8Yt?g4l4!Mp&Sim$4=YOOS23{?d0fw1>1rB}9xCehQ5ub|h}SQZ1VUwQ zYYZcRXugRBU&T4QfGkAqeK8YEBJ)7s_z^zk$LeF0(&G5d6XzDM_g z(Y(xa8j0F?!kc>7%=71zPU28wYiOI2)}~_vHQiq0pC00ZC?N}qzaGAuH~ zw(g0pg&+F{c%D57ymn%nfuIvXqI&@75db5>6QC``=1XU^zJag(2FUWbNER#WXw`~9 z2qHzSZ5k|G#sjTq10Yx$KyC58R2JW%^hZa}_yPrcR(bjx-j*;Dwa3(?It&~r1fqz=P9?uEK9FLY8E#A|y)-CCF9IR1q*iWw?Nvk_+nryG4TDxd-wzDkX z)L2FHx|S*V2R4nBdF%rbu2W@(v|;~CE2b|CACE1-#mc$>0|-QB_TdbVH)hsLeyxXuTCScO*5{MQ@hA$HVsze3G&YYVm#6Run=aM zY2B`ILO;zuU5QJVHP7m>>3bEfqN}Oi1rPt=*8DS`;BPu@A*9=5aaK#{=u06-A}sf@ z@vnLEXcr_nBqSUb9o7>PlMOz`O8gohww444xK9hYpA6+)+!zO><)nPhNlE`Nd*@Sj z#9}gSBKf=cCH_PeK*<)Q15GHXKd+Kv|4Uk>qLay>10euVfKJEIaxQ)X8IZ#;dwP)R zT!-t0VU}`9}1esLXa83-I?kK9fy1N9APs) znIwocCtQyV*5H7Og(NOi0@z6;!Z4{yC|w%ltD!GR7Sgq&Lj%Yj0pvnwBFvlWt;9}L z_x+ zlv@0AJr~wj^xYExnoNd-7w3DiVw-7-IK=f2zY#i_A{GsfQ=x}gL#5(mvxfOG-0oK1Kv!>(g{Tq-Ov%ua`zN>(Zu zlFx|WTYr5!5m~1W*h~*R-I4&FN`16mv{Wcs4kJ@`pf-w7(QHwu2x2lI;`VGE$TT^p zI(vHoItBjGU0s-+v;R!~-juBW~zoVzv1onRPCPkfGRa8HZ zxAG-yRDv{NRg-|^JW>^Cup{6RcRz%wu;p1tB5PY{7LRHEzHV0+Z9|ie(mzI~s&(^9 z5A;@D^9O)Jc0e0tsJ=jjP_&}- z?2$R&g0do6-dP}XLGfv&tg=Y1yW^MpRifqrLlr`%%6d3ole_TSo~5LF3XGBHFkX~H zWz#8?oY{Y2is}fG0aAs49lOpw`sgJY!M~Qk?y1|R!1)&8>L~U0f|JX&#RW1{)MKM7 zntDsAeT-m-ksS@!Um{_$^q8ds@?+16=pT>=KUVpyI~HhG5xSfL*=V;%vyU!+d-T|o zqsK7#Z8D@z6M53^w;%hfb=}G@1+~`n)*^b%V)>+uI+a~^E1Nu(U4j?m>GBHt*|PnO zCx-#@fs@u+%)R;Fn6+fuWX+x@I`7i%Pu0gLj2trdlm+{go&YlG#`*f;Pbmm*$H-SX z=%jg6q70=2A^6lB4W1&Ts6cZC^0JUr$O(IgVjTF?Z1TjUdLwd^Da-Cl$*?upXAiD@ zePZo4N`=Al-MJf#$LV`6AzHf|YB|LNV}z=u#E=N|l5z(feS?_WgMi_i)g>4S=VRHefQp;nExR21MgY4O)MgiyixCiwg+ znX&$>^AVoj2d!vudVI)y%iMg`J zR8l8It!&GFr#mB43P==jSxUy418pa>+6*P~pS0h8YHBMT<(SZdy-_j)Q`!G!mH+EL za?AmcJ7ULlfpvD_GiArox{h7jF^~WxE1NWqAhs}DvOVXJ9HpbuD7p!AD)O%|Dvm>2 zfkCgDukrKr5#HSjMqn4M;iJ>hL?C>z3EME<^BEQMEQ#o4~|}r z^=H-^qW5Z1)r;HE_47v){ja2dZ+Uv>dKdd5dJNnimH)1_@Lw!wR{&dogHP9<`G%7l z+*ZhN-Q|<=g13qTeljdhCOy{;Ig}%7ng9W)M?thudLC-eP6>L*(<-_@sd645C~kbt zsQL6OOkwOflyge?y|YV6e#dFp_vM%H3KU<2?vfpq16AMO6kmo>XU`xL*j)Zj`|Jpp z-}lBBC(%~QXq5ib)Xrc3=`0nd(jQQl6RnPGJkdEEOq39{U3;Nx{z4t+=y(8>ttd0u;?C~H z{pPZR?*UH%?xnJWG);O2lQe>!x4OI+n?A-yieghX(qGhs!pRF9YM{C7{LMv$Q^0RwviXslVqNx>E=EzV7C4Qh<{Z}NV2X}oxJWJ)#i5Wu}$07@!Yh8#t{J?J_Q841B%Q{wH~Mf?o~A&C>_G=z$hMMqen zbq>H&9D)UZf2}>fk2279#{H3G!1r1V>NCo)#TCm8u&esDpASyPSS^=RDyjzOk;{pV zfyls(i2$~aKlDpz@g=w}Y3z8^Sc^h1l#$wJEGyr>vN|)Rz+H&&V?(8ztDPvwKmh_1 zfYkZWPo=iP4(0o>BZ$!9_nKM*MNiqXk(};A;O%{IN0uQ4BnuDgaiACgcb76gF{Uy+ z(J{d$6~G8R1|m==G#9dbg!h)p_AO`tSNNVW8*Mp)>rxU4}RK*=iaz`u4|q!-G}2VAOBTd{*?}XJqwyp5PKQe18hi-0gxGpee>9I zHo`=OZef$|GB5xhNIzd)N*8DSEXNlBhgNC{X1|%PxOw>ms7A; z$0iM2#!a4)CfGO|5$K^rR*Vtvj$s1W7*he5J&!refwFL*t1hZ0fY_7by7^1d+LRQX zEbhNe>lmmT%Kj@ZCr=4U?$fA0xB2f^PkR))4wf*k{T%qnL&Q$?RK{myGZNydf7V29 zur*lw=#)du8gwI!U)&HjbVC=PSCV^S^g8yc$xc+X0Nu;@qiQ>v$p*fbosD0LICdWN zQ=Gla!I-dt>UPXSfFu!tTgHeF*`yW@m=#@pV0(2|&%48uGKvED>q<3VbTYfOr{D}= zVmkRU?VgV!*+V44eg)7lxJ-y?9-Gt;ARUDfCQ5KMfU8?Z*2TF;buxIx+5(@cwb$tImoub^_nq{}^=5GErn7 zo80X!x@XGn?7RRvX@Sd&0F{EUcDh+yA~z-LE*<7L19xB^86bd6DOu+h0MB{k<9k_8 z1o+h=@GuMekU`#6KssUZZa?SUKHc_Pb%nx!qB%$9Y{GyN_te+RdcgMWVaweV)F#pE zQz__+u_WCN=#EM7qe_!Awz&n(cj)NNH?F;D_yJ!Hraai|qXz~uDg)0no{Q5p35p+I zP!df3Airflj#gNEav!dfU6@PV9}0t2TL82!v!q0{$S`{;xcv&m{>8ttS_CiAN^rj_ z>$GkiH;=q4!Us}-ubaRV)d+0}}OY;L5>Og^K##GA=#MuuMm3A#H z|3tet>TvKFW|H5f(Di4wTu@F=G!2DP?fg1kz4~lZOy7Eeo!+$Vjg*zzz1#8a=O9A0 z-_(R}7!D_=Onym3>X(PK_AA0A)6bIi6x?y{x#;Nq>$;8MB!g#)3_lZGX^5XutE zVhMXy~vO+L#Q*+k#`JYwAJ~!1rzX3Fs;vy%ojQ?U#JeN|N?5xf$G*9Z*iixJH z*i4J>Daq!$mOVh5$y)t}NJqnNaU{oLmH{Q6UNcG5nB)$~-mZJZ;S4ilN*AVzb-*88 zlYHw1GO#(_j$oVcO47#2t4}<4zF3>;+%f358hhz*lvBRZRPnjn-T!`fQu;5x=)CB9 zm6*Gl-A_y$bqjeYJsxLwD(yt(gYm&)w-#tIu#`2ZN+@w2(MYQw`fvv%h?o)uj7n1G z*6-l&Zz@_pvTmY=MeG*l`mab>i}9xvvoOoUi)<=4*H^qTH+<~EMyofme7i@&R3+Ln^S7mmC$#pv_^_$6d5+N5sw9Ekz zmUD_^9=@*wZANe(wWg=WL~(%Q8+k8EJ=2h$*dXt8n#+$7xVGIvsmXh^@&I9cXvXDTMy3+4tx5tt@L*^Pcr$1|DEXOw_oAu zhy`USp9`%Y6pLLd+pl>j15&T7F0I-1Z)LK_O+)L_adgelvG_Q5d1v$}EwIGx!U3Lf z(|q1NAc6WjJf$nb3&ju-wk0z_L(9f8ewvUnv2cHyVu0lays)0gk&x|LN$X=dm}6L+ zcch#w^LlAKL0jqfQwdn+2-eADLETsMal}~0(&ALKPm2}F)sO+4x8ii_AemTUklsBw z^396R=l9AGW&w^UMgzOzJXpwM_&vd#tsoi)r~;9ST2zxD-!rK{=y%`T~A6!shu+71Bue3o5J7c%2ugP=vJ7XfZy85t|)_9=P-8 zkG?S*`1V<`qVMOnuq}A%z3~0CZ)J_dOEO~Jvh-7)X^kl{K;VhutQKx*zsb%6dGw$~ zb}KZt60wgxbU+9D`IHi9;g}ZUrO>~c9Kc2Q^n~E{3ybdrGq+G{hpp!X_%QlS!R> zgci7F=;X(S3XqWkAen5KU?V1O^To3cb4xtr;MS#Yc_%tWdC}&2)ZGDr2F?PA!+&YK zN<nhYkkZmT>qX*#OysUW?TFVL$AL53VmRHaqSoshY!8SB{<+0M z&;ampsnGg9-h>vIuq-Vq(CiWarfW_I;c4={#>Wf6Yt9QG>?_Yx4uy9hA7x1R=p7^O zOKGULwTks>+^CNeXBSe1(kF6|c`B4^?L0Ej&V&Eh%wSV@Mn{KXV`BJs_1DK9r}2CS zs^QD4ZwWrDJmQZfXVD&R+&g@`-7$oiM0sDU=_*=Nn&7!qe@!U|Q5ZJYVR9k|#n|+~ z7Zbq2=ID}r3YnPTY(+n_6YW*6U#161P-3i!chJ?SMf`5Y9*c@WqN^6o_s+$H7mj&_ zi9mzQ8<*j3$eYzws~*EYJiJ}wWvP(}s@?ChTW)b{N;eUv^VG6< z!$jM2MfU{HoSVA{P4vL9iLMqo`Jwkvy&lqz0Rv$ikUqkF+^Sz}Yq%YES6*l4E4r1!AP6axsC*H9g>>hVr z$u+^7=eIv-BxErL=sDw_Ki1?MhmMcx6oUV{%dFO{tsA9>Gjxz)-E6CAZ9)8>Fit7# zct{)kKkuzYJ=0=>GKf~a*0+d-^_Sp=>eaSKl92J>+LXq`&`n3|MT|W=cZMKcWw{*R zf!KSs8q9_y<(sH~sKXo$p8DKrgS^FEXnMF5l6m1egBzbo~{Wb?p#kR?RhL^pIcVwUWzqW2~yFspp;OK?A()d}_)-lu? zMy~%YgvSDls-M4A=5$&KV^T}_Q4bsg>}ANb5|>ejMPI_? z^)1TlPya?~idjzy=neBXurl$A9Atn>tT)?S>IfTfB+l&IulX#XlzVOn|z{}1R zHNVq{1BJa2#9TkwGB+h627x$xsl;_e;tu3B6xr(XS6hHQ56zA9{{F;F)EbV&fDJsB zmxmYkVOSxCFgMi~NxcU2A#l8I*k?WSY@7(EZV?gMFOKJM1J&wr<_#|kQEyCCD*HL!OqEq<=alk;F&?+^j zWh*5=sxU8}QiYM#uTY?sTxonn7lH<3Rj|znec9Ei5nFdRhplq_;ne#XciK7nWXlh?jZkjuQ^)(l+ZRpCg0%G5 zODE&7o?8`p>sUSUX8F+(cw$arLcjgZM!lrc61*z6yp@qS!<7|EiQPWsN(6BPkrrr1 z7*X#K=36MLHf!tBuK~2bYc;*-)Dba;A z?zW@bw_kprCu~I^w!Vkl7oaw3t}|caqzL};4OMWk_Ts71hw{=lJjpKla6)Q7Qm&We z6oQlGCX10Iol#x{jGRLAg<^fJ9mNCQX@*(P5JL>{`S^VE6b6@3kPMH443IZQGKw(T zE)zpo=?*1|)w*W9tp$>gLSuj>Me>2~p_#l1M!|pi9!u`-oP3AX+;L%7p|&zi;xxPs z3JYw1?)rtr(J4Z{P?kr$1XNQ&ebt&US>DsW>p!9u+;p&I86C?Kv-#n*b;Yy$gBnZ? zPE5!@IJB)cs1fWeF%-0&mcZ9pr`HCZ{$FZu`>M5VP802prfp9pc2NpX1@RUfx%ptN zcSY%@LQ_uLU2q@Vk=9Q>rH5+;%nD@@BZ9}3;NxGVMl5)m!y_G&?vP8dY#0H{R&|3< zG=k6(@ka_w??!|w^CDm*ZvjEph1+u=CqQNC<|$?0!fANq3N6CUjaIh z%jf4?<<&jx$R8u-KM!OSR*is#Bzq+)#1ie3~kO8wUh zt|KA@_ZXAacA`Nm_vYLW_=XajlSkbu(m@keO=XYUtID=tKK%Z_zYR7_JN^zJmW|kc zNn0Cp3b9NhIerOtR+M%~B2o3FYf@Zw;OOrHm+r)}gp6C=4CH2(RG$U^paK3e>9QOW zogrN*2d(kYa+U-kEYp)An1uj_HcamZF@*#S=afD}Y^H*%twYFRodC@kFV{OB3qGDU z+gvF1r$HaI#u#}F`Jjg}427^!L`hX9=fswrktj<3i0_)Qtc3a>rbl!J=WJP~S9$9?H|7^prExVPrT%!)5){1#mb8)m(l=b|-=+92jU&1&}6 z$$>$dEb8O*di;oi``AU!w%)qX`?2`wz%^_A#~?Qa;({Ec`ho<$#LLgGlX*_&iLjx7 z2$tdSet!qEvF?39wGt2jjcT=_=Cg0gv;JfPoEymb0Y;3 zu&{;hWKB8lS+Xf!lNb75L^L+Pq|Y~gA;O6-LA{8GPr<|sx?aQtmPI_QoAP6-prA0f zm62N%wGXXR46M4F=aR&R3_5;fasAOAL3_Kr^t{2G)azA)z1H(57lh2?9SD> zxSgj_LQTKhJ`~pvux^z)y+^x9`P5NMDWM%$-or=lv0n9&7#I8 zu9oywt!odr4e3CtS3QpDIUx?9wMWxOqOA&cD)Tr($bcrss#f+0>v;~Kn@k#cHlKQA zQ#8d;UkR@9xwIvVvu^>*Q36ig%cE#;2_oOHWjgUjcfktSZmzjo4!k;_?=F-u7bGSX zUjOV2^5GcxQV98n-0VU6eB+j)HEov<)7@ErE_5Iwm}}iqhBgq{K-k*e8@)H=I+YlL zsdKN7hz&F&2-ZvKSry`rW$ zey|zro}t~mbKz;6Hc#7DbeSq99nV9|!a!l{q3b8L)-ZYgdG&D^+npQJn$J$DFveBp zZn?-5O4jVWSeCx!QsJ|?X3&JcvvDuM#o^DkUPQSOwWYatgV)6Me@izudk3(Pt|Op1 z12kZP{m=Q_vv;QsmX$R`i;wi-W@GPMN%!Uz{2*?L z4$gW-hovP*S*C(wj4#^7(e4nT*ZU4wu&(yKdyZ#S9ftvmFHAbJh$2(OW;spKXWIuK zJ#qNuRmq)?hmK80!FjY5@NwjZgV%CHyHgL}Dl+NrZ6JN^k%(G`*d>3<3@b`y<1{hAv}HL1b77?hfCar=&f(D`wBB|{kM!0(e zvh@#t4Gth*k;Kpnc8;a9O`mLeljqzW4{1l7pN*Wk^0BOHjhE)hySVmaHNe5D@1d%* zZ(=J#CW_MSzbX#wJ@Se6R|ohlro@CspR%PFZMl49Z7EwfA}IZlG@cX#F@HxFTtIN4+%v)lYf(2pWrltguyDm@7u$T?#4j%px4wz=ajbSEvve zJp;a$B?Cv$Nb}O4*iJYXVpw?jVrRsFg}8D@U5m>JpEOOz^NBZq+h5uJEbF_?%H6f? zEB9rNL^aO9&e7_)56r6{Ond4z=l{bm8ao-cAMqM&|zVO&ZAvOcRQoJCL5EuA9I|B=5eYg-7g^OwMDctwT+>d2vG? zqfVGiq1wrMSOdf@Qf5)0$>I3MVBuzw8bvKgSLYj-TA(pW!C@gt-TQsVfS%99STwPr zN868Q<-9#@k*&xoFp105&o{XsjxNKqXdmHiaYQjV1LjVoMoXbR*!SoPy{)dNI_&eTYj z$X|qA)C7z$aw(~#83A|4wlde9xPd#a%Vu8f^WJX0y1jTHbAB2FjzxXA;uvSe!}vx} zKom7->}8Ln{l|s)Q1YbBwsN|av`QYm-f&ANN@wv<{=`})kv(9gH_m@ej_ z5CN22hLmD;%i2bU6gN&bdPRI)^e(OhIumRL%q3M1N|mH!i%4C2W#Qa~8`y{8{x_ni zzdxB0AaK8;=ZoW0T4UI?W=Kaopc9A7nSGy7nfWM?E#t zk47^P{D}nI-IQSpH7$v`gE1?xtw6oj=EX&X@(Ym zUNzbom1ugD#{0k!iNX4a5CU$}(F}kZv$TsY@SHDtKnGI#ut3vlIhQ~Dm*lI%HYhd0>*zc&p1qqllgSK!9Cfq_f+;# z<%cBJipG#-z46qH%)6$KT2UtEjJ}rQNHIPv6pVD3*zn@>GFw@g4zvQ0IPPd48Jqze zQ>W^%%My2@Nnt22(dSm&B9AT6uTPTRj(mH5={)y%GT(GuxYy@d?3&&DP0QF-d+QBP zDRr#SI^Jv}(zKhC5IA=a5v1KwVSznpsbKx#eVc)1bB%Aq#o?KdQP&vi2>1zQE40G% zS(dDnc+YS?8{ZNI8n%uX=)f1i(aY`Ubnm$iOOVU1KjI@@MIb3nh=aTHejE%gi+4yd zjhpzQ+`BF{k4`ga`!`)pfVZqbpkfk}VNoiN+k*{2;^T>$#~;GdE8u@|@W20%|}!f?l{NWmv*u zO!vdH`U^0cAv4b&4oA${sEJ-6~-7V=H{ z0v23X`Y%(JQ6c?OncE$Ws%8;S%Kd$CCpcS2$2I@;*4 zvb>)*%0aucKupMRtKiT(K~B4fc-RhTNaAxt?=s!P%$# zt~D6NH2NEwX+8za07o4W0H^q4SIH8xi5 zTxyY{-Z`q?au;kvHPy`>JHJnS?sc_s0OAfOj=(dzlv^E3|JUKS{1i_7ow+^$Kue!_oVsU5I9n;>0hpXb2o zci{Is$a(HlJO^T<3vtH9H*nF@H`jB;DNilld*e$l8sFO#>l61fccGlQrrL*Lveb+3 zJ*UP1Km+&YmTQ(LTncnJz}~yz$a)jRjeGA$IT;+4BbTEWCh7hA~U`hvgd9i`^8E zot$eJ!1aI|d`$V`*hH@=TW?{R*NQ|>jYRixU?e(dAax_^_;&`UYCbXmhp{q_)y1T=!GpL=IVQJFe1j zL-nSCguK;9kx# z`GF_SSv0ln0CAsVO{PJcDh}q0YSlZ)WmIBRZ*XDxqhfj`VzBz)^dE^0VZZTJ_tRiQ zxPNUE^U__OF`FCV3OfG_7|+3oW!t8rwTv%JH@t+wkyv7R44p{%71!dG+B{8dLE)OD zfO?sr{+#O7KLGU$KqhjDbHLw+I5WX2Z0s1e`{DC(3k41aUvYgu!>C0ekgm_OrM&Us z2U_V4RaU_t+Pv($HHi;ulE}wSJfpil%X7(7>D)e8)stu3J?#U>M&`VlcKepTx& za@Q8ANZJ~%BB-blneDTI8_CJC0kQ*BC?UsCSnQMuIX-|`NnC^hcU^>96Iy4d18muX z7+rCHgo922&}oy1uJ!zJn;iOs2^^3FswTEbdn=v>d%?eIhl>00Jz%YWy4pJoQ7C~k zsyO5^wT*^1ER**;j3vehptH{}k{>D7sp=1L77-k~xaLhrjbl%MT_=x&q}6)qR!MI* z?dXk9pnyjMhLhN5w{^V{-=e4v7VT(u!ZV0VYH=2WXhtor8aiRsHnqpGsSP)5kg3*@ ztZlyl&Sh$6_8Ge<@QRR=*V&+z0o!-C5hIH>_f(V4t4_rZ0#Sg0oY|Pw&+hKmh4vvw z$MSzv(7&9us~9L#=e0#`98SR+E)>b7FTjMln8h#UNGJb$;YMR3-#<3;rFvlAHyIeoi!b$&i&rR}%rW=3^1nwh>b z>yK|Vrru)mqU-ggvu>v#{J`#?Jml#Tn z1wqDezq8kNdLa-jXzklktvx$KNdX8tllm)=I?ti7#Y;qDBGK%t^B_)ST$Vofzp{<% zGOUa#pg}$8ve3^KFbtxAo<}PoeV9M*`d6`(&(v%?DqlWAKZ=9I-W_+uTy~unsAm&DT6P`=I@{vnd_R1BK}QBW!p_&CQ+D-Pz3=1! zWOIp^dB1_Psn!&?$)P!sZF7TP^!Fpcdlzokfg+@W13i4R6KT8zttU!$H{0b1c~dUO@;ej@nwg%4Sf7 z08|LdZBV4zu@ABnIX7mla&iT4rHXGQpK$BunVtYXrrzYUD2TD>oYw`MoQaF^pN#XE z;Ri5(v0}+KCMGl&p~KDM8QJ6{T>s%RKu@K(HTlbqyX?>5?Z!|D0Qmb&@r zwzIWusPz>-Txz6}I;yr{K~5Ctl_kf(M#3uP=|%*`a@BaUG;Rbi<4YVgc!w|IMNc`; zrZmQr-*3YSw4_xAI}9SCIBOqhD5sv;38D8MLOo}0={||u&;HLzTwFJ%WW(`$;VYBc zFMXU?nf+zuYV5`NO0y%xK`haQ8U>I!{azyOKG8bIbiHjpPS|OcWh~E%(r>F3T7A*l zma4#jN0-_fC}EJNFT%6%b?{1rXF$K5qm$x`HS=jT#v=bOp~yAR9z<%+8ysDrnYwW27qQ*o_iE;JhD?j_fKiJ*5-Kb5kPiUJh+D%QX2eG(zN`bDNonuHz&mIsSzN!-V{ z+{atMF98JzXATru(ex_Zn6!Bt=;j1o{EPYT{TsKjoB{7yHNX~o$_8w@QXSjraR&jk z#`hJ@+HI}`>b(`)85IbP|4bp(CIG+3SenwcXRi^r&=i`h3&gOI=F7fGR zJw(A+KfU!NZQC=qpOAe{e@f3y{`9>MzQD;vvd_wS2YzNww#aw2xis7A-@lIMv=OCW z5`YT;ga)7tzd%#H_uFpV)7Umxa^kIG`)$p;j%>>H%;D-C)IGQA)<9xoL&!_nZ%%1F z^pDDXT7P((x$6vI#psMUo@{2B|AHOfy!=SjPgS&dxF@x(ex8rxn&il;iv3^%hw|y*VAcB7tRdH-M3&I>kWJmi_ z4wSR$!k@h%`auZkle_yQkD0Lf`wPfqT~P?(?{vRc9uo^LekB&4==d61$A z^a7&oRcLPp)Xe$zPG-3(+iV`auui$>XEKw#&+!+rkOG>$@Z)|5aSJE>hi;=rV6;5! zmQYF#>UZb{5C2d*P$Z97VoQD?qe`-*d;K@jW`btvrz$m_UEbQqbK?u$X8YX~7N|I` zF{i`t(yD^>^nkyYU0k2uUY^rlN6RU@Z#;ul1gJpe{hGzsE;+fD&F+^Db3OUF9e+wX z{1L9gMYV0I_(&obtA$(*d4Vtcy(f59t;;o%A6-u*mTBUs&#HwNL%aWgnBB~*qb_}3 zA)+%~FF8jj^+uBGT{RaY@b&v4L_*y!Anm8upT90%_sKx551`5pzx5ZNI_F_Bs#?~w ze*L!{>#rL%45^CaR216FgKlD*X?=$@6)H&LPzC9+;D(JVg8;Fajo6F`84!7=>&2V< zyzQ?^J_FicNL5DD7eM}fNKWGs@eijrF4^ac5Oo*)C^%$zan-_Zqs3*t^ugPPzz@E+ zZWNy}!}B}$6g;RhiIq%mg*aiA`F9@M#y)7<;1o1ebIE-xex~hsN?3d7J<81Yjf*Z% zujoh%er6G3NeP-F3X(SG6&|+_uMgPYKZIN38oayTDnj~a1#?9?*6;H`ez}=jtU};u zg+k#BSrA^t2KF}S?5+F8xj)$y@Ljwv~8 zFi(nhW%w3SX8KXahZN_dfMA&gT}Kuh`2=@LaD?Mn{AHW_4(^RXHMb%|9@4|*0rCBr zsE2mp+dCP4B9$r^Nia7Y+oAvwXe7f>D0>QupY@he-8hV;pZ}m>zo5zHynR|S!*`b! z{sD$QA%m56F0ZawU>IHiI0F~1zN(s86}dX0-QD1vcy%OXvUp*2b;jA2-(y9$W+@vD zrmz&3wDe?5>k615vBdWbQS1s8MMbr#2`^jfa`5X0nWb!A>ypFWVzg<3V3ot!rrI{p zP}y5t;(Fn8aY;>s=Eb>-wS{M$FBb+8imi%;I`rMXSuWZM^3^Nhiq+vOk8_ zN(N>r%fIygVY=-IsLOR4N@l_nw9T`$V{GUhBg$&|8Apn$DwPN()pDF!uWEVZLPam0 z$r5umW=x{0W9#Ez8*F*mha|*BbEU`J4Hn;)H^r7d+t9>2pqpqhJ5MRZF?#AeLtDA_ zt*zWQ6FQ?G`tz+lQ!=H_H;oKyb2Btm2pwCSdv80Q%NqPNd}zma_ZIt7A`Q>-aZWg6 z|50PD=iUGyi2n16Q)K^XrFHo)KUEq%|NJokL0ZKIFg*EWE|T>Ns|&`McXJBveV!dH zV}wk=Xtcd^0I3h=z5GdN&Uxf4J1)Ye^TzgGjV6-_h zwQWCj-U)RsqwABS<<;&AXM2fx#4R~C3o5o%h?<@r|LEIX6%%f?tun@@XXyLp@Wz&! ze8uE*L)Ro4h@n;iuZ+1>*u-*6CZ+T>#z*fQ;nRJ^-}1 zwD0KYnK;e5d}5k-W`}M9nVxwmcU73RX|_DjQ=VSt8l(k!YdAu?^s-RwuH|>$!Uq_F zrOv*Io36KQrv#7bRtB!f^V+-_Q6AyOKaQd(l^PquK`v9>rB7sli&#Yh#L>Jkq&=X# z9b9N+w(brJqTCh~zv!i16FRWuoAAOQ>qG4m+5S@!YTWlehC@+{*gPt>P#M6?RO?-3 zqa4WTg?ybz0tN;dvRce?>YksqGpDCkb2uSI!#{@A>90fouKLWg~5AS~0qo%#cElH$*JQ;}`x0Rr)u&GOU;PdfB}`;h-983b;!1a19g~=+_G|%Y*t!%YnQW9qIQ_jI0UpB%T)@&p;AD94nSMP0{FY^AxhM6 z7lzNlw9sxcD#uF$kUUIpsB^ZZN@MnyCM88uO7z%)iI|ok#NB8zQ4p>MV9HBjaHZ8e zxO1F@w18FqgaSg`+`FQ-4`QQyY{lV4PthX{$|*Vhyt)h|?0TMS$%hN`Z+}8u#s|4v zIcDV|j6C&{S#7#zF%v24eBbNVv?=n1k&)}@OvuI={ok_;kCRYq_ouoyP^bEl^`@2B zwW>e{^AnQ%a}(y2D~J*7lKhvhLN1ffI@~BEtRph@o5u3IyT=tw+L?2dC{DZwWe)Z8 zJLuGR2SrpO*UOn$#F$X%AuLE@H_@l8gV2S?8!n~aOk3tNm!Q5LwR08LL(J}Q*zKOn zi5wujK2N>_b&xhs_!YpQ_`r(QFd5box(_g&+JxQkeThkXFvY2bGVdN4Ka;qyv^L#n zuq4F_sy)R)^0j_rA9}E^>Xf&>%bKidR;-muwO*;Ds`A16W&_H_8?m=fxu!JYy!O8h zD=1jB;=b`Z^T9T02{OJoV3vUkTG>Bd-XOK%TlpA|amVzm3S;z|hi1X5)wz;NjPYeLZAF?p)hsw1BjI7ogbx{0D zp7n9HLzsTi4ri|3N;_a2{cs_sN_BgQZ_xo$_%BD3dnYS@^#3wGkBs-c?V9V-~r(FWU(<@-zc_`%3kcCavF)n;gmOenDBNTtqt=p zhw_s{n1#vgWc2aU)QsGXB1#@71mPoeS=C7Yq5_WRc9mK~NeV;qjA5mkl%&*{0w7HR z9n7t96w=L9h-1mVx|n;$tmL>ITYl=$zpBBcG@Pk!Nzw)|-ql+Q$yXYU!&Fe)sbwj% z&#z#StwNIwu2Iy?k|>Z?D=>n&#=;pR=!nTit|;={-<)!jYx72wc)he4Q`QCI`}0Ug zqzTz!VYqN%wo@Cj)~K7!;9k;3 z#;3yn!sJ6vEr}W+q9^>z36~16iqfUo+>aFK& zO-@+Zt+I`Kcbf9a&wH1C#TmOYAy!j@Y49MDt@NEgnPjT{v&F`b%UKr{v!5BmP^0eZ z66Uq~HtI#ad>CmiL~?}eQRK4Ss-;liMx`~BBQ!XC`%F{3DKj21CQXB1y5=*mI^#3a zO3xKoBVw}!Sa1m;53jh#u7c(yfqN8yqhf^Fy&j!3r4X^TU05vr=JTglwJW?wwDl4;NXArN_A~UnH0=2SZ zi_*-_1+{WpD=aHJ7wxiRTWhWDT0g(}{{Dr37{NJS@7MczT8DWGBwZ+{EUBuHnA*qb zW}z!sV=JxYe_J0IvzFrs=N5Dr86M+I`mfrwN?_Bdjo&|PF2Y>0OkG8wO{x5RU8~vE z`Ycw=nt0WV$TJ=pDvIpbx1Z{5vQ=)Ex@UCAx64OdmwJWn(O(`R9qF`i|4~o9fU;=@ zwp!1UiETCt93N#nuEkmXfK?%Am8dsM5EI@%u|>R?>om*}f);@o@%gBOhp1IZbTw)o zSufc%02HBI?rp0B2w@koed`rXsD@)u=#B~&$$iv!6|>3& ztdhe~KyIm?iD+rWoS9)6>9N@|da#}%lamQb*o-1OyxZN)cWBpq0)5Q9V4 zs^KbW=0zYkUq0t{=cctQEJnKFd?W7z2D7TE#Kylm0cude4SAFfIrpcW%FuGVOl-Gd z<`+HT{iXuL=2!H*B{=PioZBVm-p~?}g`=I8G=j5d51sHknsc$vyiJN*~<(QYqN50=*ISb`)djt+SGL7Tt-~b^ZwTbZ zk2jPFPd|ZcOOc_ZPEnR?5Wnr5NlJ0wOkumz3y|8i3{A?wm}ainTSzcaXB!> z?NiQ*VRbmK<)`MuKgwG6Wl z{$D4;)+I4cU>+g3mlnTg3Z!1eMi_L`Nl>Ky~LCkt04p|@Pc|cA1l7y0Ow$E z5lYd!QDRW)wNLbM6T4}cS7Z=8#2qi;j)TM8vBM61V*mZa+$OEV2c*d*o7-vNW@rVM z^tNsS#Y!>r$v1YlMsZ%lW@J;2t+((c8jZcoc_T|UXsSe06>hlo6)f_YmZt+E?w73< zBk*zr?(ZtuSG>rc1&KIn!Rmb@xgS9Nxxz@eL)UGTN!U+Y7BJU(wSm_9dDR< zR`dMMIJWBC5wjJr3dJo$wY#PbTl(Xcq5(!7NnoQ2Bxg*R`O_IAk^?fH*WRK5~Q&fA3 zBQC-yppBh#G7O6ro8miwg8^=}_Wu%WBNl!ND5oB#5iIwp%w!4e;L zv@JGGWZn{PvK&wcE<5T6AKPC_dp4(FQz!N7Z3`EJZ8Jg-%(k3{^Rghz`34(QKu^Hv zh}M>2Al1y>vjW|-eJ14BRUw>JpcxIe zxncsIy}1#|gvYJVt(-ReJ%}Ll688oC36PIz9UqCU=ZWP%W}ky@9X$DM(va5isE=e#E|)8u+(;-%|Um-Qn` zw~H3s{qt1Co%oumwC`!u27viReySHJDAZbp4qMBfT9*#n6ylaA^l$^lY0NGT`Oz3B z@BZZ>MIonN*hQnvYvjD$Jp-dI2S`5c=`7p|(BACOE3yy8$7`rja%utq_nw6lFk0{R zN1QHd>~7jC($HDQCl{@$q79S;>-KXRY68lhz5)JWo!+UM|$X+(ft*N%H#Fub4;G6I(Q#eWCT!_fSV{!pigo zn;PgOf#E6!okVx}8LXS0y_q)5YQ>;>?2%BQT!B1Tp?R=r6J_Im=BS){1xs6p#Gi>g zSuLN|cbMewyF7j4BYZt25}&aEu;~46BI! zH;?r{IQC=vxzEv36SWQDPUbci(4gFyZ2T7dv&z9uVY$7UEabAStHxo-0kf%)3Vkf zM|#gD{cAEG`R$$Izi7Xvu22UlWTF7gMnRZMevJ+lcGp@sul436OMb8BMz}2x5p=^b zmJ#tK-a7_VG;Rd)MI=7$GJ_Kln$jyCFyR(duU_5CT-NcZcEzyWU652|F15#KV0T%H z*D-m>CI&M@l%00;CFqhc&< z&%Ee(>l$9svH50MrJqgfEU3IOyt)rGXqEPji%IG+-}Q z&gCq_r50KBatVKCAkD-tRgaZViYU^~m|4DYpvQG#4ZDZNda9EqooL#zb=C$41pt#V zBakdz&sI`XHQ>EeLq|{U_PO(S#iwTfIR%C-IiIcei@v_w0xLT;i%>3MxwmA%(}CRx%qoTK0YjH-cjLxAc2iPT6}c>m%U5F z#@Y539jEV?Bb{uCa)KPYan%iX2ZabDJ2V1)o+@PcnrEk4A*E~U#}an1mqE571oX{c^~HZ^Z^--&j$Z6V|AsA{jMcH8;a*B-)~~Ci~Y( z?r?J@FFUWT+$meWvXU23@-dqJY(x`fPT1vi8;9qQj=LeXG6$@csn!wEJQi+b@t)V)xgPrN_?mqO@VXyxt;l*V;-K zyiqZMn;(&_t|BkUMF7fL+3I-99CglC-rZdyNSC2lxnySW6JyyrkHl#tx=Sw`Hv@uE z>?FBfLG_&9Cof|&>P7hJ;=hNZ=?fZX!aUD4gVJ@b`atOMzTLolI?|7&jJnpCIB#1I z9QfDVFOB`uY$}G4C!c8jYPl7$V&+oG>txTZj`fXLebl`loi+W(ICQ#~dqipNEmpez zAv#ramKnTztbkdcqLdq)cBZY~e`Ednhpr2-O}6~(D*EY0i<*1D!$pCu$et#2JZtYj zOUS?x0SX3fd~10yrqE(zk5iTUypudz=Ci~89d869vGpJ}Sx@@{RHH9Ulxb}kCqm@RiZ$=D%q|Bw`Pum?U=I{D zXHz6k#1ua@PH|Ekv`~4i$Q=YWMG{_jVn5tYuP+H4Yd9BWf3pE?viguGTju%boe7}g|QrU z$YIW6tIQ85w!{_zkV^(BXg5+7+a<92-6Psbwj1$LWK+Ycs@J_uKlgXj2g;Xf47y_( zoKrr`UZ*qAmW#a0QcRH3!+lkOrUr`%v{OU-^?_~nj47OlDs65@94kvSX`qpZxNI#AJ7p z9eIDKX(i$d>xSPXP5+%DBT*7Cj&iL{!1Sq?fu!$`|KuEal=j*8uUYZAQ|`)J$^?UM zSFW76>G1Zr%52DBr43fuR_%m2E@D=h9JViJGg8?(^Z>+Qeo1yxJRlN3JCSX1Emo!n z&3h~gy9HyYieLNzww50)K2z46HDGvE6$F2Z(Iv&ix6GHxG2A`f64Gcm9+ zW3?dp&TGw*Y0M~7j!pR}3fFC{Z!u2I&RiU=$Q}^A^t|1gG!Sq7VbhA)R_knj^8Rtv zE!r9p>p5{Vn}5(I{AlWxuDP4azA^XQDpnz1^_1bnMgRFjs!1LGMzGjSyTaD~&{dnM zsrzcp=Y8rpdRCyyK&$vChxWxNhluQX;p?h-u^yH?TGEwbi$&3Rz2 zu3SO1ih;FX0k?!^_PjpajC<#u$IBTHUx0r_K7T%on{hERw`SJc7=%~@yT9M)_rnal zp|;TQn8W#G-zL0@>#_dt8GekN`<{F-k)$_L^ACvJuDx`dGi=R&T<-k8j}~hX35~$B zW*)YpLBJ!N6rnahSeErmkXX4^LkYgL! z%QM?IOp)^=Fe&l6)LaCY%jpZ`&{Wx+Ewe#8qvNLnBJm}Cz77S;Noy^$eGHrgBPSaz z&ep@OW8{0i5QagwRNQhl#RXeZ;}Pa6fdv@3T1{K~AjTCD#rTQfT<0y+tiLP5BUY8n z>RfbGfzQ@6PAMqI6qL18^OqPcSVZ4EwsaW|?s?5xr;j5LQDId!MfymefP%lA1Ga*2 z+qu#cy90i-tVDHzGs^<8I}55CW;&nSmP}hn|Fq#ONUlT(q8Ch{BMA!GJ2gQegXf^k z0wT=QKm{eVFH|R0x7YXGp+{DGowy(JTr9MfO#rj51K}#De2kW$mPE;A&p(3xMv0pS zE;n*372HZAIhaiw<0M#plq9GH5h!&bO3DO~C?i~&J59tv_N~rnUIm$kLyop>VYjV2 z&EHDS#ou3BWH^M+U7Io&+$9*wqqi_Rr3A~7d0wNeG7{Eq8iCA9;evrx%xpb*$Y8VB z6t~ii8qMCVZLMwX;0LK~$N!lhsZgOt(vN_km;BiYjVLn^ejg0qGZa>O+K=%UCbz@O zMUpr*xy{79Via}B2{K^Gxsy1*qCkAQm4Ho3Sij4bAMrVTh3x#I*$O-jqs$iZEHcczg6Y1oG%iMBz9hFI zW*c<2A%(TX&?8A+D;Jiw1iBsC?Y8dd?$4GBPM(_%MisUVEU4UrCO)6}aTj!N2NY`5 zhU%bD(+0dwf=tG`se=&a*LKZALJe3rz zgC-WF2?vgnixt!jCTe^-2CY3a>(`Q9AZu#i2>!^u_MWCeA^8f?qgRajFxF z5|M_qw+JTAf}iI703%r)Dvo7cM;{5Y=RjKh^TTaGd79G z40k$vXe$EVR%cza=3;n<2QHLVJxjpPs=V$c&HJwHu$YoR7+QcDMp-Wi2y z`zW(a{>OenAt}<%VLre|b~-u6$eNwFTS19kYL{b-V$|CLhngpiP9x=SxYk3PziW z-lQNGsOfv;lmjTUMGeOTWJ)LeA>OO38{&AzJU4hn0a6O5eukbLUQD3Dpv`RB(ud^D zCh7*!Nf8HJvjdXKsLQ0(5Dl2bKDQd9PLg#^lffGhILkzJRzthTs9_pMXOyZELB$9y zOhhf$6t7mmt|+_&CbEIFCs8Ir)*}eIG7*i^ zKn-7?_~}#2aO<>{(o^d#g=d}v=gvsU3JBK|<3iGE_Qh!kEJYx)8X^c-{wRE{VX3)h zpA}|ft~<|WLs7DeYygyqAOt-bc?mL?ffB?jj15KxlGtP5>p;=0EI_gncs&T+k~70Z zr|l2rjqEu+VaUw1YGF*;aBcAkhOkOR2_P>NQ8IMY+%fV(jGU>UX39ubMjjm0en&$+ z(BT^KuHpVy*T~E(i&2=q1QzQw)6ru^b-Rc8THQd$KgYMp?3?XF&z@%J&v-RoIT919 zCtuDXX$Y3DkXtG(k$!0xAbEl7Tx2^r|P`fCX|K=h;a14qbD&3=9DeF6S&k%S84syP(1-$K( zCm+9)4L!>~n}ks-_0VoExkX2BGBKo2^>aiHrE2kEAhG`%vNmkWaBFd?<-JUX&<}y5 z<+mP&lW@gtifqTL7l4HfxY+*mGH)E`vK?81J)`@h5CGd0uSwddr(^etCPVpM?>M85R}ntfH9{59}^!|~J~ z9aGi6Io`JpAls zC}U6|pU-~w-Cg)9ahB=V&}r(m0!h)MjeqjT0B2yhcnqMhALQ0PRf^!lCThqS@YgaE zW%CZM8kkYKW8bZFj9XH!#?jq)LZ7Ak^e79sw(-rr1tvNI?s0m+H3Ax6&};i}vFRq8@J(+X zc9Ao0>4srVeF-Cc>;5A_cfeE;>FV5I_NT*1siD8Cy(TLPFNg4ZLo$}!pz1yiMg9XX-5&W1q~(cppy zYyNV9LQHTeN)15Z9c+4?oEoaOCy4Mbn<@c0U^hmq(?H8GYMqSkFMng)*v~Y7EB?jA z`0x*5C>VVLJh?5^Etl=mP>)CEaL3;2l1PV4th13KKYuHz%<6gE=PRp4voYpuIoTtU zX}g+2oQx+)Fxr5&mzsVn+@-tRwTA0`3pua(D(+#Y+=N4hyRXJ^rY1?YS^a*kb;r)4 znpXVGovJ?q#e*=NL~J@tLN~v7vctKOeK2zU*C%*Oq0AvfF_aPN{HDespf#!7%ewNe z>wF=3UfpY6gU1o!vOXQEG!yTvA7$sor?ke+FYWh8)-MNX6C73)-_K`T-)ow4?2Lq%&Zecz=hXLs=J!l{d^?+VbTW@TGJ!uB`As zL_*FlN8UGxK&9#_5?^0;?Il2?A@yaWMy;GS3h$R%d|dQ^AJ}zE_o#fS&14G!yxAjL zq;e^!ZaO{UUp?EiV1dWiud?B5+}JL}`djeOi~sGA#7kY=DQVTUfkT+ktbre;7E|uY zoDb8>Xm2IWbtyO8&9oQdJ&&|qU{D<;HC|N923?tzq`}y;C!#r6Rc|KLpi2IS#us$+ zCecfE+~GZW7QR9c6LygSm~alO9alM&Oqe9>xK9~arJyACRYTr5B39=!223E327ORc z^;r_NfOIskf|Rft1*!99{@U@tp)WP*M&44CM8SGk-05pqn?Sm7$VW*NAuo8(>oQM5 zZyKE2aJ>HbHtWEQqm->)*W~59{WL#J(CdcBWwz;z;T+iAE#!r{nMaAC+#($jS`|16 zy6HZVr6bkqF1d7D-B1Zpo&ob(%nHB+wuH8N)HA;IOyU;LLq)V?uN6n@K&q|YCz)zn zVr;38tuImTP3@(q)r^@YuOW9laPq_M{e%7N*%|LTZ1lW;_c9PmlAIMvE<78Ald@x11_x| zGT?ufKK2QLSbD=&4*FGp=T&2_uZoi5T7`IJh%2qr4_L0*k~*Kg{Z8t-(cubP)TpoE zjCu(uRvvdST1t~ZRqY-uOrN_mzV7*S5?It?u7780sWp64JXcJ6B)e3**zc0)=(3jY zV~>dG*a=l~LgxstrX=klN~j-3aoE+l3#U26 zcQsb~#_EZttcU;(iB}o%D97SH=RM)?3$Iz3z5jJ5PDR*g1p7RR+c?^^RHvK0T}J@l!QfLjpY%N=t`HMTI@l zW7(YLRb|7h2bAmNtCSFavsndUj!#}h*_GRzcH>@1@vw$o8C*{H)q_&Ckdto0{%4MX z4(t&Q!Ikz*$_`CdD_Iev;`xd(T3nTwK9>(646v$(O@i>k3MVHHUGzr4J%TC66T~Fa z*y2D&hF)uB8xVQ+T2e^n!kg*S_G;-{{CD>sfXqcGt4a?~tPX^1&k$?mO(=YH=HA15 zrJgTka%84`>|yUiO)THU-AB6R)~nHp$>=Kh!-AUknoQSF!nxxwq`1qNy?C>1j2=yI%R6j+&`DVir0L;;K=q z?-mu^n+;mJ3^VdQdWg@UvQ*2>i}?~^5{Hq@YG&nCk$6`Wl@2wiMTc`4pR$9wIIYEQ zTz0)th@U}6^;S25J@f+#W@?Q-u(tJXLSky_gWiNq0WG%1k-Tuj+mp!)Oy zP1ohCpQo=z#t~=Hjlrh3kSAxBZ!> z2YA6^ULRVKGS%8FKlqUKExFOsLB9B=063Ia?aA)DlX8T6+jkcwJuBHo?~5F#KIB18 ztw9zu6%zZQC3X3GqfTyMPBaDmH#Ucrs^rvbdYuDV88Php?(YM0LVXILt0#9JNNhU6 zGP=$!ePv0V?@8vyW3Yb>v8Rt-b&E966Z3@hWPa3ppwm`j9 zK219ych)e;8z@8IY-(rIh?VzManq-?RLUh*&~dRDy&tJSX-cdAE2|TYA}1DmN}N4LHkXqk#`FNFM>!L9 zN2qZoQ|(=Q%=`(Pu_Q2;wsOhMZ{t=^gBLEC_2&6bQhF}09oZPvGHqdxyAGo#m_Qpn z?s*C$^nU`zQHMoZi~mZO>^21>*!ZfXd%*mudh(U&-;Tyk%(gkvq_>I#{U&#jcrO4h zH=Bevy-7$*EqRSyJS=oD(B|%I=Kr1%`t9b5iI48mpYB`Yxo^%#Z76;BHp#p5er3dS zqBX~M6ZLo(uKcoAR_Ww%Dght|@ZtJul{i=>^?>L2@ks~}oIO0X-PW?_b+=G0CEVV8-ajDm9-l#Pv(_?L%3SZsx59_fIWSr~Yk3S15uwv9{< zh|UR!0E3+d4b3Dw2vtUj${tJ0lT8?RVhG##)`82${i?brdzAUJ!Iv2tvfY&vBm3y%_mN2IAA=wa|WKuo& z=h|cwJda;4?kdkO{1x?p{r*20(RZj!Xi|>cDMLq<;YMgmOI>~+N`Wvumv(nQU*jSkh$Jxn3p+*u7fs`eH;+;`G*+%3>RZjHu4Gq= z!-X$Bz1O5fH3T*<+{f5(BXrOE3)#+~)2LF4zyw1gc@$34!~Z;i=Z(U%@@7b5r>5{# zaccNm{q6V+R;sT2N0e=TY(;)m#o{VTt-NAqGo^1S$I>NUENTo+211h?H_dF_yQB36 zV5L9t$CZwgLz-3!0{qcsQ+6DnM^fafebl4aC2Cj6Es;nu_X8r;4LWCeqhH526z z!XW>l2vZBjFwWSNwwD4(Zw~ek|1)FY1Tygc`rTIi)mFT*1s^R_y{VRlJUJhl78L#h znyynR6qW0B)J-Pp<{_%T&)rR?O3`1`jUKnQ22zDN?nGeFeI{w;%HLEMHQA-aP-`nKi6aIM3;BCQM)o=Y{@<8uxi}zCAxBxM;#E9K3(gL{@%5QHacE|mLlU&_uzy}%*zEA-^a%?-4>0YP^$>kPkpmCrf|_R)|k zdPperyH=XfU}f_yvpIZIRBXR@ZvbW%jQe|}&GAT^N1Lm#BHvv}St25*&@S~SUvj0v zYYouV0MJ$hdLZS69_1;(e&W0uE+SintC}y7mo%z4dLU+Q1p5xgTQ6W2%*X#>1-AJf)3d=a|fll%4Qx=a><8-(^AGi0w z*Q+oeq4lV^`ye(^4OVv`EE3HPid<5v6!Q^v1vL2uqI27}TbaL<6A z%P1ZSL0Xya(FrJ|OE9DF=K4M=;YsoHxqHts1AkiS7))CF4KKG=3QJRkIdRq-+-IC{ z2)mr&Mr7c6j62poNlJbZbTyeq_6s8)EI*psc*9r0@*CsFd6c7#Wnln}8X-~Rl{^GK z6YRUInQ}bbU|CcVVT7g|%Z?2~5kt@vXE?S*^}QDUxyU9?v2~X}`SZJl6D;{eo+s(U ztcKG4){@lHV zKbh1MXr2GSMm{H}c>=fAftqEM^=8$fZ2G~A(oIk2#G{pW$wLnuM+E5^f+0{9p;VoH z>gaJq;I)I3>h$l{4A;6XU+&^2YeR`;d1`QU8oN9hAf^lDnS^f$z1QqbmJo#6V6ud- z8Z|78D6z^?P_h73tlU%FhnaO@tS-#mbXyi%ak`bVSW%HLr{u@xWnxF6nCIgWa-jB7 z{?~<{Q>VWUa!w8QM9;uPe=~*+quiIZ95~p`|BF8~&sgp)B8G;K?E$<`)gK-NqPl^} zMwNH0-L1iXKZk5f^P4N9vj?n1cyS7zmtvzx;bnl1>N_Qyju*T=|J%wk{lhskGAc?Y zzCIXS_Vm7MPJp#?61zaaRuU)>L_PoES@7{ywfRS$bLbAQwapF+KQ|qe$tPV8BhN=5 z7frL@6>{UJg_a4qk}7yv7bOl*xvF=}psCQViY4lbd3|M3y7GL}iL+~T0>CmRwqlk@ zEYS=`XrM5l%zae(X1>yOG|K}cZK_b7Dks?+3r_r{e|`fuYs@aVkF#_*`wU2zjPayKwNa@fTZu7B> z-mCQj3m-56dRTK`ttlnm52rt z1m#o=4w+g$Lqz5jKbv&AdzL>C_;71WH~MqWq#Gpwm@yN?V&*v3O3&B4*tV0wKllmU z+w66WQ8@djC%9z8<-|TXz$D}<2KZf|ofilNq`<@3&pFN0{Rg?@ZrTx8BV8WB8k2sDMC!ZQ~HYcs_ zeAJz^#(rq5|Gn&_&bb}c8fvC)&j_iEdvdBo`myF)vsJst2&5nTDhtXvAx z+c4`!fL!(OI_frh74XkU*JKS@Gg$7z#w~SG+~6wW?oc*Hp4BJ`Fk!T>-La5##WH&a zd)&AYoD#V7MlEh-Ea^N9+R4YG{snpRT+#|?zci>@;PBzDHbzESZ>&t*{!PFq3HVA! z-TL2vHNWY;t)?(ERd>yPW53Q~9R7-zxRJs#3h%8dOO?adnTwJmRXhE~xq;=W>@UCA zfexc2Ctz^VBn`(%Fsev#hzbzZ75xT^bt?HWBBYC;w;i>Qs$~T29K9f3TX4|pdi(z0 zw-T<(2?rx)Yf^^sm6h6qpg;akX+(m8Jcsr$AeN*G#a&RPAD_HXH*i=&YAFYd;{$!4 z)3;a@-}&w9jcD*}WHB$e*kkrPeo1!cN&K-dIS1$M++Eh|*>o^dHR)s}l{l)y-}cx{ z?E7u~tUtf)JN510*!p4$BlR5bwTsZDVSdJTm9(qbRZh@c-rnlMT!S4PtCYIf=vYkU zir`bcaGxO%wT8I>NZh@gc2~w>s{S!Eauq(Kui)N~RUW^^U<%?o2++9vz(NzT*iJWd zh*z*V$|#)c8&i)a=c?j1#8@#}k8pzu${FFoYypYlm*1Zi*Zuph;|s1#l-#@nFR}Wy z_)78o^C=ZIbmk-vnL-T0TUtEoB(wJl7c*=;PBfz*NzpaOygN1@66aps%FoHvVQI(G zc1~S3)VKbR>xuS}Yu=6xKX2n)*zofUXzpDi$~H(V#@Nb?)9T(@>%aNJyfY&XJAMA9 zo#|P%&GEqWzjcD%)toSOl8;GkNOW>_s8g6UQkiFrlE1$>|Ay63*O{fJj8z43NNi<^ z$D4uN?KVQS3unM&QdV%4UOs*@VWpWF-w~0^jbgCYjv13i<23iy+W5?C{i+uSNNn81 z-2+mK4vB>@O@_kMM&z2Uc<%7;-W^{@wu;P8wr`u#mL-Uuu+2xZ)6!)jYpiD{C%#lN zJ@d+~(*tW_Ax2=UM!3cAl96QQ9e##qwfwvx#<8et$L{$X$F&dqM+g6zlRZNi2XnT^ zU6bZ*5BZ&MA&zy6VyC)w_T*{)!g>m3(6W2X6NXk+dD;}|aC&l7_BAA5%_%ou9#z-x zc{pM9R?kB!PHb#B`wU?gKu4~+lp_}^x#j2?s>`}jxx^x;s<};jh%MG{;3kOvgsj6c z41nS^n3S2U7?7IF(l{}E+o&BrJLE{Ui@#ibJupgb1Cl3ai{8u>&o|KB+kSpx`g9bJ z#L1>CN>-i{ri>YqtyERZ(vStSbJG1Xu1<_Fe7DWy)%c%5`08J@_vo`KDs&B!7bcZs`4c5baYK?R?)w~$$GN2-93t*O zC5Zv8vHooc4zHr!Xk~wbUDQ^q`kcavmVH1Me^sn-QBB=UpYv%wx11+g$&<>k;V{gR!ja&j*B)hXS6AgIGDExHwank-ai@PMnyAsSXOz6^byNoCD(NYW19Y$biaWo%x7E}|wFH6 z?wBFKs)}bIN20jppOg-DGpyA7<{m*;Y(Tv2UYBNOa97sz+b)hM!O@cp3aXrD_KB#@ zt2`q99I$c4c1SFKy|8%0D^HBGOD%Ts(n9VD&pAww$I)kmc!#UOb&4PK;aPq~*L;EH zBK85AvBHWIgnB*_W|_WJ{K{GLN7UBLUw;$|^_MG@&x+WqF?K?cKddBXk3}=zpr9hPDg~M>5TMm1#N15*`eCxbCh#NH- zfY-{ZSE4Pp33|)GaW$UwQp5G@ij@+~;2IGjZBf(6pDLA=;c}Xh4F@=_1XF8zC>;KF zsm>LrWtrB_EunvK%c$@tO0>}-0K-cLw#xL<*>CJ=ZT`YT;q6{7-_sda&RFy6%U609 z;?HwT&?HScyPrQ}(yAAfl6%@2k#NeSa2pGwwco;Glk&F)*v>taBTqe%^R8<=w1OkB zP^v4q&0}Gr&KAe_>@q8V@RCQBfV(6sH=rnaAanE$uQPZ-fN05<{KgU95!p^eGT~RH zRSP&(sLICoG!)Z$gS=qK(t_(zo!5r~M3@jbgsAChrkz|3265$6sTr=n{Jj$MbCl5v zMG!S<(KQ;ZOrR%}lbnepC~3!5e^KBGgVkX&@sw`iafo!aXiKL$r*$THiH)hB7 zHrt#)$bO&j1-CyZ@^q{0*6Vj{8$23v>-^5DkM&0!s^qE-5>DU@xzONs+;!-zdHar! z5=5beE!AS13$df&#hh}NM$k&r*NyD;tIe+;$!#4<^!yTa{Vb6sp1qgqAAcIMC7QgU zXg9ZS40asXz)7NRs<3;IYSh}|I@IaC=(?FlBIT~W41z@ylYq7xAd4T%_DaUvgIvfA z8~}3k9kev1HLXe?HMmc`i=~GTb&-S2Ib?s9&`MiHS`&|gF;|d1*AiY)?bJR;WB>6G z)=jQH(XHfZiG%CXjT9m+*9P=dzr9Za-y5aHUp&ks-$OaOJ|;Q)bc%{K&~L4Mx9$H% z+gk=s6qI;B99$!?=roeJ6u&LWCvt77ksjYunn&I*1l(TdXv<=tB1J8+SV!w|B9{I| zDgjG5)Z;jg&{J7rz*jO0ZTX!Ohv<5XtFm_J05_h20wf|rHT$Wd#>PTy!s_(}5bR&# z`EuSY4zUq!OrS|!Pde8fuJY}dEIU)R&wt6l$dziI*rmm`sBhT(^_`wcp9~hYiqko{ zS^RKgWA(`&+}$OYa&Hd-YjYT8+iOkah$kalp{U2h-%zb-%C>6K_ntY!rleS{$W!(8 zcp_SeTZF-`dIFy+SNarz)L@#*=LG;zi{Fw$t5lPzL~zg|C$SG=W=_e~+$=r4z^R+e z5CLYQ7J{?(J4$H|1Xu>!NeaEUbQznHhB_0{C8q@NW703~q`>W1qsRmoywK9!3MB#X zN}Mrq0-;=wdYtN?uymQYWJPJ&;lR%qnpqaNdVIR`pXa^k;(f))o8^?iXP@3*=*s52 zD_R*o4(2XpzOwkoySqWSRWy|S_yuW}+Z7`js*_i&{sf#3`(h2Su-HI{3Ov3bLt4x6X zXf~}QVjs%%GAMP$#hA>dbDnX1JX4Zz3s#ApK$uZv3sM zWed&}Dp!_EUIj^=*b@Ku6v2?ha|k!90q9Tcee=o!Uw{ZA5sZqFag1J%S^yH^IKV@c zaa9;rGRM4s){$V)W(cJd*K-Ge%>xG5C@iXrcfjKLs4hxRYlH31#JBANi2p*6_5`fQ z$R;gC7iB@~V-@SEW2m{BXnx_IE@gIq#e9@Oo@`Kd@6R+0r7fOWu_-p|>gCUH}BTaIqA!|Of!ceo^!`4oecQ7wI_+lLSdRN;Iz35k`2 z3`qoPalC7{_s@h7lZ5U)=}6rEJ5Thqp%$J3rxQ$A;`oOF-q>@qtZtieY|=*1oFLkn z^iEdYs}dv@M%-s3$V=$9DF!FU3W%VW$CmiiqXcx3U{D2=7@W|asWkv4PtWZVPx2N{ z&3ooUOdntDDm!ZP2-ByHx^#}_b@t9)u;^;}S>x2%sk(3axmAsicECm_ z51HWuLV1?SrqE=no}j<~sW&e^l+S-$|Lv>TrLoM1jXSDHwwt=$iX?7< zTdi2#K2HXOQ+tGOs~z_cMuISJQMod^k~{BZXy2mW^~f>7dD0 zl8~YvdRx!PBBFXKHr5zihTMepWg#^vS*K#3uv+Sx}M=CGtlaI~!kAk)vuX;t=claoU!OF{nJ>Kaig2?JHwm)wNv8vTySp3pr zA|tMGoKMYZn7hs12_=Q-ToLwm-zJG|_%nC4_i$2nAg_&j!px`nchzGrHTYaxBpF8hr(Px;ibS9H~3T8&r`o-Lq()>++7^}wGG zF;<5jCIDwNr3-2x73!6)8SCN!#!t@?+S>~3^|Qig#*1A{Re8nPs!B&!@ILlvk0el} z$s99g)my5^43mN-u9}{3ll!x8prd?|#SnjYk!#n(p{_NHCXP)A3#;fh1kBgZ;8gQ` z{uXhC#DH`exU_C0T)e!hW`DgTAWCVE#{YQQ^Kpz|xv{>2 zoH2BDz231Hr}r6%S8T>wcu(-oQUi}|uz7+Pc>E6b>KSr9MN#Y6ucz=W8K|D-dROBX ztz9yrG2iwe+w$RT%SSh9W_1@-a_eUb04s~%3 zG*tcGwfO#>&}AYokCC})7AI=?stHK|OX4ExmYabqy9|hl9>gX+$Q%4P%4^sm@f_@4 z_E(IH8Z#?0a7VB6XsPrXls4FccErPlX#6w1#aZL-COv?;S%gPE{ zZJUl}E~SOqVmp?VHPu*IQ%&Q%=l#Mbu8WHU2cGAC?)Kxm^ci7@g!A0U>&XOY!V zw~+LGjF<+)qb~^|@Yyyf!U{&8&}uBABMta;VY}I4PxDR)-Z#ySAw%m6H}*wl*yX0g zEsLvKy~Jcq;~Q}Jy`87hWQs^%`|dWr#4c9shhUcszi7SWa;?F6xlLz=b3qA}TaWSb#)h$b?OUk>9P(G9l3T6hO&ee^4gR_c zFkxXx@A>vV08qB9d(TPm2>>;pgifhjm&o5L$x?8S2fOO7e=5%VO~ZNwYD0$JA06cz z5zmCYeqr=J0YO${Xw*(CZ)}67*lS8evcGYTUw~b^q(5=zJ+|}A{1*jow_iXj$=gl9 zCY+5fl{Z|Up@!1pr7%*=RLe9aP z|1oyqCNTKH>3QBl{=sF?_Jt72s2FZhQWpJk-F0-&88IA z=|Wq9^2bodO4AYSS}V_@wiSC^wUyJvrlljkZnvLl=Tp+Fiu;Kxq-T8oW;{6>n@HEWOjhGYs){W zS?1uRSxb5V@9h2*4JZ~_IdAMEvTq&p5BSdrHIa%PgN#NO7 zE=fg*BcF-3gC71GZGNIxb{u8%Ir_l+9AB(`+`E>PFEP$a2d&TCyI8*wySYn@181IC zHOI|jp8JlOl7OO6`gCo}@|(MCvw=(ivmXgfmkU|))ob=bHFK5L(w97`%1ti%x)C<3 zR(hphck}MFu2*tcDI9@e zcf4-W3W3%pCH`y>pX@L`rPVZH!A$uX+>?IeqkC+?Smcv>uo!egR?~k?i*W4sFqycx?G1l6++q$D+VmlM`B^<|!S^#5XOw zveG&MD;UUKMtbvP;dbOz%E?`}Li1zLLf1#zbyKJQ>y229xU#q5?!F9vvuF|p>j+=m zNNe23yQ=5Iwoz-JV3T2*>>r#I+n}tb8b}n`8*9@e5&>eqYoO7^USs0pH9RB-bVX9nVi_|n)-sQiK zoSCBxE?2VRVB91eQYSX~-eWJ=^8Z)>K_?UKeI_n22b4n5Po@>dXZt-iiYq@+@@1e| zF#>$gZ&(-aaU2V0-!-6?v6y)yOrjjwdB{>x5J!ZP^uAz^mc>QA_&zvuMql3kzr6pbqPTBmi{nxk6Q{Rn~`>cJ_%bv8qEDW!72tStm z@Uw%&(tpLXu##c??l~IAuR?K3jr*!UhZ`40J*caLnLo5meh&XGC|MwwImYL6Y`6T5_J`kjcFm1e?AaFM7&jCE-;Nk?5JBa@BY zB{f|a56z~pnU`-Be4ieEdcjRgkF_VK?p$lG4r?N{Sbh%f=ebXWK@{Fi?Tx*gf315{ z!)xk;OuZ!XL)9N8TW?m!*m09xL^e(im2(irp`9~3;KV|zGFM#2uVw_7IaW)RMQ*Qi z6Nw<+&sB*4CT)G!QBB+939q=kolvhvvua#ddY&PEd-2|zo#a-wX+Fs!L(S zNZ6;Y_zufo1{U*PsO)w=J2TVv$MpL-_YPG|{RtkfNN4Ep{g?djtRi-LEw_cisOJ*w zCiuKkp7+p=y<2{@I~wzv1;I0%np@9aj^1bd7s)zjRED1uHgsK0@$E1uFwi!jC@@FE zEp-64V#27U#UZ<_%n&!POk+!<_8ZmSr~CPthn6k8PTeW%r`UC%)M#?63}SnmC#Z{V zg(N7cc5;cz{0~`y#(1>a2y=y(1}jJTty+M~Tj$8McJH&Nqft1oir#7?5|poq<&kgR zCd+TqOne0joOMb=C?m#dmGa7Gvtd?Ki8JX<)7G%3#X+%uo-ytG_e#FcecN9iY zctdmzllfr}ZEm;F&SW~$^AnD>=yr!4WpP@+S{1mjvCu*sx$d=DOalsSI^^iE%sFK# ziTG<|zE;WoDN7hK%7N{e^Ri^YjL@kL1a>(0>z2aF_W1i*idDVQGUeLYt8NKi<;CHP zP4RC2w{Ci$QAZqWZ&BMl4op}2JEh5zAKS!ni;O%wfGZtBXV@`mBC?*FH1o$Lae&?Ro9@@-G#dRKD-UC;rp< zAh|%ea_rp_+2u|uKMe9`V5kdzgg6F_l}OT+W@ks$?w%EpwUEMa?YnLt_NjQCNXaPj zQ*$v<#MJ5d>l6kb2JH}ZEXRPEQTq(jpwZOn4782r!)u7=o4d?km#nxs(4Rb8k?B8c z%ju*RH6}xiYi>+@<#k{j)to5knC=Cl9F+Y8D(V}L%H*jyDXOr=N zAcIG+GCWAnLYB{;giqAVpwtqVl{5`;Gb6;ror7B>$J8RtH)iFJQJmx)vZ~{TTjF78 z9pRsBVp%CLle7Aj-TT^w3nPLBPsBjQ|DbV>&>K;e=Dc7|UP#~O2h89_l^hFEQfM+E z*#f!JC+XQ0eQAIgWKc5Eef^r+&eZh=?5wBAZi+#{6MGb{dXKR}!+ltiMx#(((u%sA zN58!9;UmvA(#rUuez(VLj@J$U{@=*H6RW=p@nX%MsJ3yHDP7Yr zCI{s6XWo6cGb%Zefm%&I=Ih+TCKLpX-wj@vqFAbVVOCY!?dvE4FjH%>k$npCS9z&F z8h9iz?_%WWOal!<{0i{^GU0Tifcf5c*^~4XGmCc5l9K5N``U=>ti#xWe5X{4T!fG; zC#3o%K@4*Rc7WyXThS(cvWB_n$h1?j8#Q6e@Wj$bF#ByU+Fs{39 zNhn{h5fdu3g`CRF@W$J-V&=bMwI05I_iu!7?BCJe?XTxN|1oKQ>~rTmXk*u!m3F#o zXWN{A|7Szhzh>W$|97vS^H~N{eubD=B_Aiw6P3=bOSHHRtIyHe)rW<4tP~G5msU|6 z!4gv(1PFMk7)!N+v5x$Z8B`d=QUPqlrY0zG3ItObOfB2v&k!pNapu$QbLh^QK)l)5gL9NW# z{Zlb1?&0FOoM1bqh%9#HGDig6MGM>2KX8pk#Y=;uMx;gm%)f3I`N}p(Tlzdd-eRP# zXG*d7((`|xl_D$Mbc-S%J&)dPz&j8-sgVH~OF;r%>DR3v*jKZ-ERl5(tyM=RQ07jI z1y}ZoXf-mWgPu=)XBoo%thKi@a0ra+3R^}y!J!r){pp3K@-XmL1`c(b?mENurTIug ziEQuk&+FW?FWq|L=vBz|yj)iX8jXK^_WiKoLMnpsy0N9%e^NOkNWYh84li1PAMYdM z>bzFIrtj8?sI;OrAvA(W(RU2W< zp$8^LImMMW=5;yS7v7`qhz|&|Xq?i2GFC-dj$UgF%2Px=U4wq}Cl>$pSAK-PXKp=; zgrtdi85KKYGzpBx$)B9c!vua<7ki)>vd3_XQ!#e*Yc>!U4-)5oZ)EI6Zd#`oQ$i-U zGMZ$1o4gmec|32zZzZ!c1W7YJhAhj2x6Dqd-M-lIH{;)yHc-U>o~XN}I||L$6?iR_ z0vMK7P+BoqFBPWv`NN4!^Df&&c)exyrMtdbkx{xpX{YZrJCLBT?ipEOt(kvCtrIvY z5&R;tsA{8h8vF04lu*jEw|Xh><)1j8-Sqo&n=$%-)2qonmSO+mSH)ghlm=}6d;jx^ zgNmQ2i9pZKBhQhLNtGK z(%&PrpIX|ap$;=<*4$`@=NgBIW~74*9YT?73FHKbD#YS%Jkdern0EZ z`-75Poy1+cGtBU8T!~ATZ696a<%qc38L)E0MH&o1Sl`6m+rk_{i;n{~AHBQ{$I~(Q zQUHfExGYQijwi##wyrHx;a0$|7W3t8iKNvZe>ChK5*9FZr;ZxN=6>t7`Is>Ad0H{t zz3KON2{vIz8RZ{a#_9j2cTLo|Pd_;|{bCmL?Wu24qr^}d;k}GW-bi}A&>{n5&^lS- zG1g}pV_yOzzto~qRPQS!)c}(F>xc~q-d77z@Y}f7yXvl^j{w2ZhAp&cVJx&Vf*D--H8+Gk+k6;$Ll zg9%dQF!4NqCCeZiZBBB;GvHmq#0OW~!<+Y|J?-YZeHeUJGvHOK!THK8&ow`By*?`x zrl4})Sz%G)p=~wTZ&rX!6kubD#YLwQ9d~)HC_y*-c+U)cE<*pN_43r#*T_gWotP4Z zkwt;A>kZZmfnDP)JD&oh?+D!$q+i%nO>C$CI70u_Y4fd@G0*djO~rJ>$F~!Ac;zSm zwfQvr?IwrvFJ4POV`W+X;V`iaX6%zqlUx%lBx7cF35=V6ZdzJl9LZ;Iue81^vL5AH z|Iug`sU>8ItpDh&=&rJA08%6{_VozV#3AP(J+TqQJProp6tjfq+xje8qMqfa`Y%qk zT$eg)%OvJSR^C84VX)a^WYD9v851Bw{Yt_gZ37neASA@{gy4$`jM*8f8Gv~Qv$KTw z*3!ZC211t1GgV7)Ran={Fu*I8U+RZSE}L0t5$c65{_MQgRmn2A!dK?5O#2flzo%E( z&pFNbb8POj2;=gPelgM0Hec{*NJORAzd;=T3NOdK+5X%VGfE@LhI1u-jM1bG3T~Xa zNbx?0Lyzoa9rLtSPuuacKvoYz4oxsi0Zp?+thP?efDz^|gq84`VVAHf`8ZGyv&kTV zjLLt8k{3pnE}IAkhh}w0#oEp?v&SElUYAjmo|>_|IEQ@sbjVIi2r&2zqKC2%o2q0C@OWuDTXHVBKk}Bvnc@;}Qe&0BwUIXpB$T>vp95S3|`trpl0EoAWU;A z14>~0JJPx+dkyQVR+{__W3WL@nv~DFUjBEU@TIywH8UnIHL6+ZA z{zfn0NX0-36+%SMt59$^i1HhIaict!`oUlz!8}yS^^wt4=u1rW^Zj%Vv)K zwK}Stx%-PllFaLNbwafSTb4pcWaxXNoP7icXHccI^|+kaCT0YN_4{ z3{S4{ZJ_uLjOx}@6v0G6SR$8z#=B``za11Z-=kLtBD>1~$D)P4E>9zi&Z16TO?uXi z?q_;GNyCK;!y9oYH&EJLlT%cA^Eep&w3@g=zld1~$qncmbJNHu69_3D+4aN&Uynq@ z!Raf7gs>*O5>EdOpqndj?*6d7h+iPk`dxZv{ zn#TWwFq_7#=P~w!9KvbZSy1Mm^jE;@W&z#~o z!$)5Zp@Wmvlt!p>?VnyJ3$>RFd%CskeM)pp4L)@6blI+`p2jc(#l3L~#X&=15{*0K zQP;`MA`7yJu7o4shhfrT<74kDXFY5w)#0tSdagUo&Ihuyewqx}v4X}}H;;FS>Wo$* z(?=YJTT8j@&-y(-2#kn=OaE|fPp+I$(-=trF43@N6=$y^hFKKDd%5|j2z&MNKWa2- zXeojp7t%iiq$XMYgU+}|V+=nbS*qAni&;z)?Z4OM{O8I2om*LHB8b#Tcv5p}v4U0H zPT+7E_e-)9L-AQ00S4<~@mB*a#qx%6LVivE_WR!Mr(!H;B{AQ*OAiTBlD^gs?Amz* z`evZz2}$HJv(W$J(u^!{1{L*-4PgE`Ox|YyOJ|A8b8wE?{d>P|*a72j3pUQ(+Tx`} zmpa(|V;x1&7yIomK9w0Y%`!4eIC^``Zlbew8*RjO@X})|#)-=l9S^)br+Y5{8F*!D z74ObtFDR?_dBb;yADmn>*j7xRY7kPKm z#;6i_0TEPtOwUm!>r*{MmNHU?40q%fJ#0%bD7OD&dod0g!M2!O_}b%#x0fQ zVf^UtzL*6mT=60lxMM4krQ&7pe;miIf zXRR`ka#^l2AeqAuKZTw>fWkPZY(;e44@PJZy;w%-#cb2h14wxAoP<|GsMaR}Zr@GKSNXr)awAVPeq0;F;55O>k)haCLjF2W|wD=V4quVqaqzDo*9c5rmurKVB zI@yQ**>U2|zBN(bAe)(ZCd-qYcbl)eH~h5Jv-r``nEp!9x{4J0>Rs!Dj%BJ`>->J3 zeEslD(X2Js_Wnxjc{2OV8p;0IU)Kp6x%fNgE{Q~I=jza?Ch-DZd1Lj_oyHaoOU*sb zbrFm#!qf`rXfMg@kmu%i)$2@7tob9&VN9cLY!s=)ASR^Hy)JO`+XisYp9sj@L962@ zbiz2QAEvI9Rl+(Hv_%kD95oGgVQ-n(#p-qbVl|;*?mY8<>vABYBObBR!L8W^Lv-Jy z4<(zs)a3IHu^Jd7F0c%7T;E=efZAZUuRLTJbH?+;lmCyU10cqDAlmp6VgYV20_0;)5v~tgTeoh$RI~>L441w>^nMIh$E(y)s@L%55H) zQNH6Y@A?AU49KO;@*2jx9G=u48dkV>-Ze`U>k7pi`^KaT*SRE? zI5xF{OZbtS(S)9fs1g%nIcGk*Ufb30*s5(RVVTMGNlrynyFyJclhnLwt=7(nT|5c& zaAknFn4>JNz;Y^t9tbe~POAW__iU}(+}h6W#7P?+AEh3=vGw!KhULpF>W=tKo2u~4 zxW8eDFz z+k{rlgr9hpc#ZVQLDuq1?FybrP3y$?=JRKX0|Fx`N2RTo!f9Oh)`1@}-~{#Z7lq+ZEF&u3D9~7djiTmgXm` zOL2a@)iFKO4A_@yTD#q1y+!v@5p@5m<9@tJ&Z8q4wC~}a)R1Wn#G~rqnYAxS0St)J zphE*I6B$R$)g+0iJjI)brOS$Go;pa>A!6)K!}EYl-?I;!%qlwyNxr~^z_Z*yDW~r# zpIBb@Bvr1u73SH+<#ao3a3CzyytME>f%{f5$U{2vAFjERkG%VJy)%bmsvd|q(s3X8*Uwa_O`!xR;Pid{xocOmy z%I|xKMdA3RVO;pb&Rp@>TftP6+JOeUa+=uZX`;KOQj)PWsdRn z`1hW?K*z-_7=FY-Ce?gzkh)bC;`BtT3if?z98g=9cC-8Rx50HO^+N*(yh)a$IZWR} z-8Tob&YU~&YVmB700fK9@=~cnd>JEzW~NabeU$*Xj<(Fl>rNUyskF#204z!7z)Yf; z4u>sa0hz>khE7`KhQ{e9?pkbU_vHQ;#9rDVV_MqOy;I{;WB&B+j-IFL1C-CCwbc_! zdK-c>Ijt#p?==6gUyFF*a=TaBtUb3IHaI)zm7LF7EML%t_G<4mrx|yK0Xtb0e%+!s zc2^9YuXd^WlI%iI_BG^1`4LTslcT6KIz&wPe?6oDxk}LDF-aCC2Ly6Q--^)6+pi?) zFaECcSeJFAy+4DRtULGk`&-{DN2zIBq{ZSuA+U!8L+8DoLqrbt;Vs0esZ((>TE{-3 z7x5(?2OW}SxJ4V3Hn}pzdkZLIN3qO?R#7JEXbdvbjKQMUNt5of_ zx>0?}S}#?3N~C=&B`oVmz@KkZ{T?2vFgg}vp#@wHbe0iisec?03(07#FN8RQ6gk$X z#!%?$VaL3BNzSj)w2;~i7&jf(Ua3d0MZKuxqPDRU^(2ogf-;&53ug!3Hrj39ZGX7v zw4_Yi<(qNiS6q9-MmrQG=)h>ow=CgoJ3}qGbvh=qE&8w>mvf}g`3n|>Qm2K*qVE*? zVGXpLkh!zeH-yo1B8#8HCOz76@z&;F-A;X);t2j&o~1xUmmmsIrX}qLuw1jM`wom3 z+lHQc{q}%o@n^EM6Fi`yAz7aTdF&zv*=?a%BDK^knJ#*XC}$KmlrrGXr@{&*{H zmB)*zmX|n&g#&ZrpMHfQEDW_QPCUadlaeZFqTuC99+WUm4)gwtlwEs0axiwEzT?iF zFONU|dXf5O5_9gz?M0>dzWvwch08Xu`VL~JFRCJhN{o?QcqBhGJM32ccM2L}{#mN} z$35ivRVof+n$=K7Y&urcC!~CQ8Coh5uwu^it?L)<$}T(fi|hVh@tM_}M3XIZ!CxdZ`44h3GCQRlVuEnCmT9^xjJPt8fP8gI^?ir{Gbh~RH6y=bW3}PUH4HjY; zdez>sRaq}mkBy~T_U26mTO6v$`o0s4cH@8sUg5)y8)@v}9Qpo#1U-0zz7_o*roc>%V;*`2DD9H&^qBn1<2%~%`5Y{| z$86MMBeOv(jv`!VW+nq?2zCbOB4&7?CsQ+&EL?;%`q~tI_R*T2EbH< z-7F>K!UUH|fGnSmJx~dpk1E`AzwrAj{38wZ!MOML7Hq%3sCP5ij}W&4OIv(w7w(NV zyN@w%G(rdS$EC#<;fa$7?TeveW$)&{jugM`E*5v=BBfA>00jl1IWp|5b=Vp{%Qn&L zQR4D{b4+7-rN**=!}fe*Atq2m=xvEVk%m8gom9uco#+6}1lal`#x7c{0}Ps?bBtQY zGac^fS%9r@wWS_+ST5cs!#Ki(5M;iZL#hBshr9B0vn?_(mYyS-b2pcdSH*l)nSQ&@ z4zSPqk!`u>)8g?hSl>y4W|CMlNetzHWWCeGcPGzfg=AS_=mH2I5q8yqIGSh+KGCWEaBy;BkFn0!M311a+4`qS} zPXlBvfYCQ#^$ko-0S?BVSxPCCQi`;=93fGSHY6KrlsZVF#riV#ej{%C@G@SB**3JK ze8%4J@V>nVTi7)Uw*SaHx)5ArpuW;!|IZ&A0ARA#XC_F6G4GGou{nT?-pNUaz6u7b z$CSy)mLW^NZwB{$t5XV$hBVn}xf|G-x?Dqwjf2J1SUT!z^V{u58hZUMb zB|xQ4%6%>6bT+w81DOdiGt6WZj`t6&3z-r0;VrO}3;+m#=U{)+uP)LMw=wW}u+m-z z`kQOhdoYK_Ry{4w99^rWYlCu^$0}<)^O*B4EKBuQtxk!yxiCyblFIfJ&mjK(>&&s&DI2Jus z0E7h$oFfOo=O|Qq>Rk>ol2Pue!A^KIB@Q>>V1WA$z{!B%<-~0Q z;;m#5#{h8p*ez(?SA#vcPHPlk!A8l%d)k(~y%5hmqbqDX@lnb zfauu`f6|LDcQ`yrER&H!bwJmG!iveZej$V14wUJsLkyz_sx>t-ayr9?Qek{h8@wTR z#S2ZviHpsr`e{}_nr;2*H$Bro%-sKu9FnU)nPb3R?8jYN4z0S0yZWPJihMByKIPwm zOBdO33GjTCiygwxo)o?B|OM-?-MiZQ0wa76Jw9^ zKw~NJ{uOn;5X5MM7L3eW)nQ$up%U+|+ats-IEOLO6A8%2jvQ@A{LPlK ze_ASCE+&yLCak}>*QMScIpr&?59JWl41DvSy0aCq{F6Fx(H%LHo!s}htV!JJ zM;-IExLwC_GwW|%qSkk9h1TB0&KDA@0OBbx@^>XUMUGqef_zp$w(mP(Y6i@C5L%{d z^?2Ms1pxhx%dd7~F%Stn2KvhgbS<_`Ovv!sm2I{vXr*J$>*+jvz}|IjJ!G=!sZTwy z6X@m)FaWCpY46oI!d4k!Gd%B;KbX(aoQWfqXIbt!y8GdE%uheiNxJUJSl_OCjKy4V zqak$3p+dH9(Os=k^s?coW%<1Fe%h7c1M$U^uwSui^YXo}ubtER9fS`*J5gVF4M14K zxMi1qYu7E@3Ivh}p^mbdy){@W3Hql=#2^f0H z%?9eP?EvzZ_!8apn8m(EdfC2yh6cQt;5fGEjfQ%4)v)E3VN6s0i+96^B=r4134R+p zRwE-T0=iZp9p}sLo_i04?zkKJp(R9r_tC#C6@G`kpHQ3irBND^SG4^}*gi#86Qwb3 zlzWp=bxH1x;ee8Jv3BnOs^&Q;0CLMsy(TU2`q9Owo z4-XbTx&7H|-ab+JdB0B=~-{+wvK z-fPJ^0GKWsGPOo@TlQrz`lo9W_YKOJ0W3{u#`v?dR-9G4kmui>IsCzus+mSQY@q3#<QQY9mg!`6XgI0w_ym= zvW{%VxmFYi*jBxmvOYJ41p-U}kO5*0j?3e2M|ly|0%(T8cL*1xHwBsQo@-y6Id<$= zbH$mI)7lg!|3cB7ANG6`DPXJL7B2_)z_V{Nj0W{oPXYM%=(;2RMgvl)IKF95&BCM2 zFK4e_cQL_o?nnG~Kc9Xf?ZI8be@n*mMze>XW#{cNes(svAEwQzUiJc}J(8 zkDR35n!Dp;nai!y>C?9oKfYQUbZd>xUyt2TL>F2ogEpc(t&HM@t_nD_vTY_i&X`5W ziymD9MI0jaYD$fgAVRSD98#7RAPP?_^TYr0w-_G4_ADW9xwoDgaHnw5)yuuNlbeEa z9J7q&3tfKSwxesbeaI_hNdYR-?U4d%1HL(h+yWE!9ep9?5Ye)X&0TBn3)Tx9vY#HG zn)Tpi*gUWdFsf+7;@M<3UBu~p+mS!M#xMTBZd~?g7wrX$HlZct%U|VcUd1!Igil}b ze|#Kqd!3STEA@8A;}0#X*F$-KL3td~pb)ax2bz8*9h63;aBz;Y6&Ds6L9gF;ut=g` z?<%x}!D%T;qu)i;0bT|I>kLFkaQS+|Vi=TiqK*irScmGYg6<5N9)A(+m4q3^!EYIV zc&+s6+w8bK|AVFf7xw)GOS~UOEGKwP0#XD{L{qntc5iI`;?@9~%Pl{4So@r`_S}o{ zulo1eM=#c(&s8SrAY;9y;r(qm?26i0{orelhWh6w>c>0N>;!6_&wryq-%@{n^}C_7 z(4}kT!mf<}UICpg#b$~XrB=VP^ahdLd)MuE8+f zIno+)WTECKAdV%hYzdG{4KA^#W81}>g=XsuPDrC-tTq+lt+dMxCI!}o)}(QS)%ggEhl+On#oPpA&r}FhsQn1uzl^3u${Df znK_=J{kt53Q{L8oOuDd}jkSsII4$olEgE^5N^h8xQ$JD>Ty!f(aj(SHJ5HWwtjJ<6 zT{bGcV*ELBTBuCf`Ot0spl|+@T*{2zzcYTu9c$#c@7$}zJ5+Vo1fT(w3Xbzp-tK6d zqx?#LU)^=1=q8c_^Tucz+4;=LJZBeON9j#KM=mxTKhdhxV$PiIaymIUUB56>c^0i7 zD(UsDLYPzOQDAXs;rb*6$*k`>%euzkVIks$3B&pVkGm>ZR)>Z?oUBJ&3#guQw5t)% zPCiO4o!V*abtlARFzI%V8#6*ys%A#y{&`~dSm6kE_q>mYYM1|OG+@WS)iRCqsaQD0 zDP%>T<%C*iO%UzWhEAftO=hUYM&Fhc^r0)|(Bh-*j(204a!I9}2F+MWjNMSesVZKb zP(jGiHP?Fyvq}{K7K{)}cli9?m zy40E|W1z@JSpFzxby;qoR`Sb2oY6`orvXVF6z;{4LJ&nS5TjW97{Eg85e$ zQgda1ncb(aEE78}?p$CFW4zkI-CPJj>T1LC)R#SPA9fs30&Pr+Vd0}wXG_V)jaryi z$9m6~wkvzRi$N^=9iy2&+tVN{x_f>=roiRMLbw3^evohuU_i#{(S z*M1n~1PjsO{Z58Di->v_04h;-g-Dcb$ zW8+!HYN%IbcXO^AdDWWFMISvPQDh%Wqw%KWA6#Td*wpLlCmf@p)$o!q_+kVCzf^~~hmn%SeBMi~rD-Ej3`uRTtZ z&FlN7gTGa8f9>-8$G|ek|CfTmHi*Vi)==asBV`mrtPcFm;}Ri2W(b$AdRyccVgM7P z5F;Cr(cc&8dmS1Jy(+&U4m=aE;vz6(Qbw4kID#Th8KgKXMpyTEgj9VW}TY)IX&LiuNa-lb{L1pVbn;_Cn7~f?;K54Os zHioGz1IfJ=TlUgfD?@C}z|}8)lu#s^XS8&giv2_jd0KZ8LK)AQ(|xn?zUQ1GgLSHZ z3t{3vZsW%39GRtbH!dLHLFS?6yX`zm z?(>o?loQF69FB?;r!Vmzj|g9wNH!C8vMw;X$;k$lh4-bQGQ2q3!5VsJ(RgWZhRWiJ z=ILytqWqcBDZ8KE2c}NXKJumfFT(l|1zkFg6#5J!gm{5+`DOU9d*55hZq3NoXR@mc zWL*xmu!17zcqDDPzB=wn|M`Ah*D5araMdZA$w~;{A&m7l6m_L@R#9Y@kDKQ{5{xF& z1OvxGffPd#HqNYR4PTuUKf1xK&k}? z!{ZmYHtt!;e$Z}Bnq7Z17wgxUuzI1X|Eq?w{LOGEJSSbmzq2PJu$k=Bvcc@rl7Y12-7 zPlI)(3Uj&V5|hgr6#!AQli5^rQd7fxL`F+f?W#GJ?GL z!SBz&n=KV=4HgCU+!0kuM?uI0#({xF^+mA&O6$NG=^R9yf*6h3Z9;*{nnHfT%3^i! zgfd*OA^?C<8h*39aOG&>_~%MrB1SiH{E3(A1?STW7tenugYCWA?2*!W5CF>Y;T@_K zI-F%O&@RJ>CzW$FP#^;PUsucxQ%7@fR-^DS_Eo<5JkQnwHz~$Z1L8_`i|Yzk#$iOG zrhFpKU2GmIH?ufOWf_?J|54|z0Im7K^Ee6%;8NnWYY&vJu$@#i{{!NFDMBMIHE+9w z&k>T+g{1nE?|x$29@Ds-W;(T#(FPb5cUn}y=mi=Cc3vUv+``&P&+KF>cQSv#mK;o= zzVo{YR^$gcLsh{7b$F{dT#vQ;u3BJFy6JE$1!`BR(iMON`_);a_%3@$SPZ4=3LNBq zOpbb82O7<;Ue=|e06Q0O2+@3XGyt(Dw2GU@%)Q#W0#4g{old@VA!sW2+Rgp)fFKZ^ z=F=<1d^N2V_~YRvn+e6NG$omV+E)rOf|JYf)^Tt$I__=`n3!LSV&G8a4KxRK?i6Iz z7G`k>vnOuE5DP?d(`lQnN`m18oLP1o9TbN#vjO*8{y33LVVVY7q=9Ur#DddMKim8yRALsx;A$uQmZ08>p@QeuhhO_S7t;23b@j{h773@q z+latqjB-}T?W2jxxs%FjHrfOpeIuM!uax+fV$4=mPbJ#%(n$DHMoSbzbY(GfMHl67E-32mGo{O zc)MQ6k*aJbv4Q62YAlr2ePGOG^gdF=xpWSi*WcUTd#`(VGY`hd09G7mZ;nCHQ3nn- zN?#CPTj14#UXDd_Sy4D2bSFY~qp)K~n1>YO6sV3KRYf4!3VUd7oYIl6c7V?}mqG&b zt8YWhtn}FR2!5RaLPaw=8Nk87h47tS%#dr&u6S*@{kBEhEL-d>Th4~UOCN<5*&)tVShdn)E~R3Ux^?tGZMK`}IGWn}=_9q=Q+ z0+2IAVVp7JYv>j;T$S@PCsnHK40>xN8mYNG2@j+@dMi7Uw761$^&|V7dH7Feon4qBtfkgEYD%3zx3nzL* zCKiZW)erlRt$%SG%3X{XuCR}QyVgGn8VJv;0swOdRx5TM0C{My2xMJ>a?IROOdw7@ zw~r!{?Z*;fCZd=I;gSK8c5)8n9=E`{7zzbu8%bx!ueA`F5Yp1H^IWWb;RDZ>E7$Ps zEp{l_dxo?W;dQEnUGTu_gyPVl2hAG-@{BMXMABwiU1QPgcmUM1dQB3!1xps>4n*I( zyW9MXXFCkAioxHEV?52Fvcb4DNL=|n+=lP?DYAO6C7gUh#~oD~@!{huxE3As&|JvD z+e4fSiDAe`1&L(JQ#Zj$m&L*km4i9PUUOl6CA@g!ismJc>Rj)Axq2bWbV=8{2+B>Ei<%q^3ina)MmT)Rqaf8EJznHj6;BJA z8EP|rlVfEiE={q;;!G>q-E$aV33`}^LADXHcKg|;qyd|a1Gml%9NG|S|5deT2;=be zfmNEf-YTwrr^tD3=aD|kHoryx<__AYtaA0)8wjXJ{cx+oigG4#8%A+X&v0@b-gOJ! zV?q(w3Fx9>kU`z3JbWSgu&L(O<2U4<&yUD5K{f5 zIolg28L*jqDJ=MsZHL-^Bh5V*lmArlKZ?#gE~fT>rsn>$u>3b*N3gO(GGhYgPQnVangB7-L3w<7$MaQdftd0K zfyg5E((|Yx?{0m-m;)MC-#R01`neuvH7R?5h4JS7AkwSJGJw~NI2R5O(+d%*Lc)x; zkvtn3B=RvbcSf7AKZ|;SxTB=l{3G_kPyINJ;g(DYix;jxd5yt|Q0xjBSk3K@a%jl% zI=M}?5aoIO-e$`zhRke7;166MS$sKbHLux%mAvI%^8G-rl|*k89D!+VdPP1t^1#E%id_l7I(Lv(wZAI zpB*rtjqjU*Sm<_wY&%L$k_tIH1|KkH!Ng70f6u*H9yyTmEG5Zm2OALX03abY7)0;@ zgw9Z{cs3Xzk<6}t{={kGsm<0woAuZ@pzCDQnVVTlinka5fYXGGKl<6D15U{Qz*1(N zT!~!40g}#LKwhPtsD!??SR-NH2s01!;z;CD2d|kTX(TPv8zjvdI%8lKGYX=B7$k?g z2(wOjHXe^ji)F(Yv&nLj;&GSuVJn46_fN=u9zgG#_^{RXNW z2g;?c+`wD=B&=#>XuvdU6yS++ce(i=3^VkVXCo)!P?6FEzd8uG%C>td1FNrpn6Q>= zD-PwxDzdl5A=ZRuJ4@hPXW0oiS>vfT{eJLwUAWOox2V+KxY}_YmHKVJTc$QF@jj@Z z7HDAv(uM6Q6RB(W5*QzKQb@a%96&D)9w%IuI?)jSfZmRL>V4gqPsvig%{(I@#8E z*rDj-U%f+Ki$=%XIJSl{fe(t)uQjQUm8#e3Mk05Ltu7EQY?X#|=HMS7!k&u5&|1^~ zL!4fW4DUD|G<6xoA$0n$oaxU{iktlYo(1y3Rn3Sf^%7C31p2WGRwGAox+y*>VkBo3{7Ul^X1dZi2 z{H-PtwceVVWvSohs%7P#Dc*{OJU;qB@uiXLF-qwPo-6`gR`#R zGOfI6E4S!7!)G_=o-aGK`rs>-_qP9%^i9&gp}e&wc2byLnFy9)&x5*EX_JHFue1B* zEWT`_I@!akc+f=H)2#W~nvFAH`K2*grXS_JMeai!PfGi`td$S%Z}TcNu-`LCIC{Wm z(}hh}NRDAC{iU?!vdYBC@>onRao3bMVi|Xhv4x$$Ju>$+7px@h@tpmZFGuOg{8mJK zoh&n3wE)8z2djX3Pn7<;pi~aHdi8JZp0d^0S`;1bwvd ztz8>jyqC24Evry{?+f3pdgx^kuZ++&$cR3ZOWnK>?MX?~B%`;5ivti*{3*FdwB4B4 zgJ6^Tj*ncF<|U0w-{7cTzhMVG5Kg0WBq-O(hkh29Yrq_mp?Ij7v}Zs2HuJP_TaopB z?r=6uCNIxp=5fLNz$Oe$V1MRai)YYj3JhsF#I=JVK*S7S+|HdOhBJWN{{&AI<%fZ<7NINpn!6rWysG+tA{DSWK}WS z4_n&uLj}7#o6Zy(OPf_N3$7zrq-Vuc7Eyl{td9?E;izHOoU2^>hqZ6kx_{{jYAN$P z+1RkL$3?FyeE*Ev%fPfXd3Ip$hjw($uEFu1ZSt>j4~o;E5MEmo{eu^4u^%O^)Aj?T z2v4mY)Q9WC=Aigpz~M<|fM-4Ylp9z7c4ECS%h&pT_AG*4Uu$>8$TvIkj^$SH9G#Ue zJp99Q-DE_OPS#413cjaeRt2CKVjjsmTC6r~8u==LX)O%(sapSArsSF(Ne#&}e78C% z8_+vYL3#7@{v-q!6+hHB9#no+;ZiU(&qu`smg<7G(sKlc88sex$Nd|2wI?MA6AON?va37KT}3{ z=-->g^+;;`w6cll)PX&Xw@7RPkMCJHzxc%b#G=OI@4}EGf6DC4k#+ejoKOQLBt?$r zl2y(laxY6USg<(tBR$>4)A%GaKPdYoy)2XuX+(vB4ypbC;{~lwRJa_W*a>z>jH1@ghgZ{_fG0tacsK%kf(0N1phwh-4dekT1j2VoaVVz_4iz zVvoQrE|^RC*0>}N(j-kO#rJbjt`@8PtHGQ#0zM5Az?q5wtc58Rv`!d1K;ZyE`KR=n z`lW#346{blPIwisgzK0X;2`NUt zO{=KSy#j@S9y;q_4woMyHYhvsp=|x>=01}zEb{S>!R6&WTgrxHhfG(z(es-fH8BKN zuBsUD`+E3a(nn{w^FwCk%5#rmmry{QC)c5Uj_?o`r;Tw?0q`^l;Z}NK^9Kz~FO>In zgG#pT5YeDPrC^9>ASAp$WsjsQ!}x2qnLB%Z@hHG4gCUnU+@Uv0V1B7`I4`sdt2HEL zGQMUL=UB8-fo>}&^2Bd00)Po3EXvg|a8#tfTe-f#aLq;qq!~GgdQBX}Qb1Er#w|b; zxIaT~JO>k0;oYiR{ImM<(SUO;C!Y(Uj z{mI^<8_me$6aql%gu^N+2E@#CfDKCGKz}#v52d6PR+@Z260<{l1|Bo10ZS>KW_}R@ zU(+|(0ZN}k4=eiwA+Es~3@QDeX&|iV`)XU-#KG5zCY~p`OG}jtmV4Fb-mh3Sds_F@ zXnI*igScbxjPS_W=+sKR`@vlJZX8GQ-BgP4%NV9eB=I)>Pi{bnBB5ZMX?R=qcD*QF zOC&{nV>%^fM|SUHfM6)w7Ve_xA?M3sIEpPg7_nrX3zB~8L+)-A4*V*ls7`hX5-ngZV(gT};qOfP?M~giK3hwnXCdY$$ue#TTT`zGu%GNInRZ~V)O8Da+ zhhofyM~PjFb-kVM%d&T|-Xw1J)VbW`{J3=`Et7m;O0TnY>f87JPo%xIZ#q-uS=(MO zMy&UGY*%$A!3q+c7BW>2|Mz$aGH~tPp{QilsR;iCC?{0g?gh${ZvI3%X)NrEKga8# zdN6Da;J3-NPkVeQ%t#1eEH;C1%8%1|JFxmDH`s$)utf=Z09`P_rbYmO{)+ov)LqE) z5zC?ivGe|1$n4`|?yyCZOQyD7 zdW;z|hP9n`c<4T+ud0(t&C`_k?Uhmjv`2+%y?B)PHQzl>0gxm&za1e-iz3^e^rE;R z4;h{YU*QmAA#uDDaJS@p1pUw=X?b z7Hi%e5n=bdTEQJN-RxIGky2+QM@Pu_hoB>V?a7?6{sSsZvFcMc_aF-qVn^I8f1r*I|o9$+@gvi zoedIV?J$i2tq;lN(4Iq%k_9XmNqM@jB9G}VYjKA;hRK#M>n3X+^27l%njV)j6@}Av zVHs>4v`J3z790rRL#u~1VBk&atM$PmT5&f8Ax#-#a2!0WO7K zy}=#(yK;~+nuB!FBo`$b0e44qLadlLZu?KEF@g`tl*~Dh(Jy)I@Blu@y=+*mV~Vs7 z=355Km~oPpoQlj;&)33Ah{_6^#~E}TBNj^tQxYa8i@JYg!BuZfKVUyEMa|V)a^bJ` zZ!}Gh5J8(q7GfYfj7T~yPX`ttd1z48r)uWDC^S{~9@$0JQRS+1j5m_jD@;{tru zpN4e{4Z>TI@2tc3CcoO7toysRfPzs|$EpY;T*8PV^8=x_gSPaUiZllt3-((262ML| z$p=Da1*DxE^9jUv_YG07hO{J0#>RpYc3HZv*vi5 zsVJme@B|oKaI6rFtEW^L54xy7(T@-BJ`V5kT_li^AaYm715!T#(KC%TTwS`_gQWI0 z&VDn>8Vcpx2g_D9;2b}G z_^P$DQ7fbBmf{+BZ#lh`))>H!-djU9q)Vw7De;ela6NGSLa!ux5LkMTOCDTDo&l(H z-2J_=`=$ZXlqBO77b|HD-M|g?lAvZJ420HZ9gEFds#!B!t@C$0f4)W59tnF{l2OrR zG%3MaNU>IGBSDu@^?HbR2x5TG$Zdo_Y;47&i~A?HkrWpQD$rV@<=&+;3OUJPWQ~d- z=dxTpT;(3jF`m7PS|BlqmmU-Jt4*eOna_aNep}<>R>zbFdxatL(|QEyBVw;Al~LUW zRfX@-$Ty9^@n|m6f^S#b?LP4gvo=<^@%_q~?vAhXl&)dvUQw`N*`QrP2 zZ=)xl5<_!|WA8Wgmg<KA{+=L6zO80yf;+3J&)OJaR5LIKD=*CCIi8(yWxyx)Dta*w{1aM*H@O1*av z_QZ1uc}x`oJ4=XqOHwV?;z)J&^K{ds5av`jo5x7A8c3S@ZMtOt{l%mSfMf;L)tC$> zeUxtWpxf}6?rP7k3tXlgspRoOgaV3dmRppEQw7`x(Zwp}Vdb#p6&|h-ICp{@vU&Z6 zcoj(wXpPC-55&cW+*k zY9w~b_Bw8=ze0ofF;KaIxm#+<_78cZUA%jB^*#>VZ79p+jODQ_AJkozd$ycjcT}^8 zV`bPZ)i(qwbMeGcfG9YBY~;Zo=2-}H+?T61?zEZeon|Nr{TXWA{ZgG&@KmAHV7}M! zhw&1I#q3KyV{%H%s+2SjT288&o-n$V?Ws|oDT8k>$g&THvqd}zwTqboFFk=dxOZU| z+Omh*Muy@5(wyqhmTqee%qHA}#5_cyz^EJ^lwv*Ny2!{+W;)6@rQaX7knlBmyoM8= zwYmT1cxHAQ#pEJ(t^c4&&JGPgrm&NcCWE!#i&*hMX+GS&!((Dk$hw&6x45^*?pSu+ z|FCXWZO|#zSE=h@yh*R~O9z0DUS9j-L8EN7k;;Y{&S#s#n3%6*xXk#Ea%z@CzM5n_ z*PHmqhJKL0Y2zJ-DK#k{`=}Wm*pvOh+{1E|2Mt)DC$t&+e-0*@&NmL_>N2{_#?&kd zf2a&~qPiD8YD^98a&&+%4d$~(V5Y@AFpSz{UW(NK&{nGVd}d*v9Zwelv!4LPuhAaG zbg$6t!~@hdoZ|_C_?xaz>&Eb1Lf!E#oRlWuP=6F^UR`w!?#{JYTQGwOIgRc6fbF)# zZAwxvL>aaz-)GoN9*QMu{QgdG?;I`YyERRPC$kLsEK?Yp4r3#}I;`5o7SvN7Eun3N zF@Y{thZKH|_hrushwsxx#4fiCkOZL(-PQ1L38{l;;mNn)s!^081Be)m>7wpUWKg8{ z-tn+;Tx^93LjjHD!1kfK4KI^(Czjc#stM3QbWVaDd50TPLmw1397L}3a?^n^z4-Hv zMV_YIo8c)QV|_mV=IP%2^n%*&9Z3&JT<8(TeJEg*zuq8z^Y|0%asTUj3F>?b#7=ef zBVZnQ9Q`;k5o7cARZ8vaS*%ApwvPv`O#C^J&@3e^lJ-0)rSv?|8U&5z#BMM?%l#XR z31jE;Hxwqe)C<=^a2?}q*pUHYPiP=DGam*!VOVqBq3snCD!A>9Fd z>V}niT^1T|>2eGdeET!tAK;6;|g?&+UkL?$GUb&C+1F)-uVjNrg*HM@@qE*?QNb zue$cYCz{UeEt+{7zXkh0Q=G^5m7QS5zUd8cb)8f6pXvc44bSb4^MQ3T$+x}M6J6Gr z>k9JqdieE)gTI|6co$2(VqAy&&bec z%NRg~mKY(rkf~C8zbt!)EYtm;$B#q2dVu{!&60P0$MMtXT&+=l=5qKE$GKo@;!P17 zM&VxYKjDSU_ubWFb}TGmKUM#+Zv|}FYv}Gx%EiSiuddy`dfg^hbjnYZh8M6EULA3@ z`mn3&=+C_+;;!`<-#))=eMz0Z&xY_N{tul;?U#CekwT5Stn=+B{@Yt$)nYT&)%8jC z`^uN@f4dYfWD4|k>ya)~pljy)dIxKl(rg&iQcWU&TB&b3J6mUn_dTt-+noK49n`4M z1qo8~D;CAax=S78-H*1w$%fqB>M%rb0vg~uig!4McN_n4)6&T7XDnQ;K;DhIA-{Oo zr4uj&_p&vR7Z>3B=7sTi+0Nljw}RunpEw+;vfF(#3N_O2Yd4r$^r7;Uky7!(Asp_I z55I9ZYtPof8*hSBXB)87cE|e8;NSe#2kZ@N1F8F=de@|li)5^M*UWEM6*n%~K-By2 zA*46lQN(z+_tx*+Oufdn?$L-cn{(9kVm?~8QZ_WcN75D?fBc6K*D=EXXsjdq4gS!YtxV8u_opX|O{f3YWPPhpIjcx}>0s+9 zKz-g`@Wx`x4f8`Mdy@_CJ^al`sp~)dUvT6Hvl`L!_f5xM^%pF5%fux#^NZPym5tTc zS69WP)_a{zXm0U3ny~T@#gp7}eThf7AdGG0%!p5xI}N6okS!GX4RqdfW`xsqLYfNJ zxsal<(VL@W=PiySurcn8fpL)FAZIDrY>vY5#>W&cYE@5qR5q2PRApmu3{~z5^YD)( z)O!X?wnAsyICjO1SOfrRqFl=I>yxcT%M+JHqZUAV8gQMjD>!rwQM+&21#6lH46iY} zId$Xf^;1&{y4~kPK{*5;DH7_nc~B8=l|o)w^0AeCJo&y}pW)uuXO65YpFV?b1U8j_ zC~FHz&9`>m!W#= z?@pn7$co9S>eZXSTy*J1hSeOX_fgV7UNJj)ioA-Q9+i(IU=$jRVcb2R=f>3vnNdDt zTngN=VgjH;bv@eJiPVF4|Ix86TidT7n_^)cZ(a`mcwJA+Ji!FvS^`N=h2f}UGfJ<} zKIz&!^Bjjjf!Pt!jkWefa*+SBdy<8qEW(nVtnZy9Lg=+e)9BlJnX$XQ9{Z*SPP@v-W*F^8g)o2U}&jjE73 zGhMfD7uoE4hTmeSa4UX`gfr3r!rK%OE>cjQioNWQQZebQ5}U1EYF*qi?51!Jnm$DW zcruq8&WTX^LtLZ{2X5r9CfN?cEmN7g7GwE_BML9q25A=8A&Z(Z38x_+5>_-%$M#9N z4bzb;?^LXVzx{nEt!tYc7usuw4+S5&y_M%WAXdz@dRSh(Qu6z8so`@b+V#q^L&DsY z_2c~*uV3hMyM}xczf-)uh5Wo#h})r*49H=YS`@C~>)y}z$)B#gO3CCHyqrWWD~b0b z8Ea)5IIx%to}6o4Ey85G+HvBEIgcIpKiRa@_<}L^5amPUU|Q|@_hJM|P%PO^GT6G+ zn3F}DQ|Xv@=GZ4j$Y2Zwu-rw8%~0kU&2xNwLuE9`(`F`Mmyk6)(_a+qCWy~CG|FfC zNgINM-Si!t+lHo0$r@Jx>AD}dOQ3Y$^yB8w4Xa>w3h9hO1heRegbM6{KWmuvK1>km zq{$6*FgaK^7quXFJM<@-X!1>(>*DuwL_GIJA1KaSgA%CrO+`qFZEsCIrFX+Qjh4gt zck&y|{K&?w)z9X=oL}jObnMA8*y$gGT2X~wGS7w$z4W~R`UqC6df}O z)b7*aVGJ9;jfcQ3dm$@9hbE6ZDWMEddMy3Q>=HQJur0}4r2Ne&bCg9X1+t+c`n;)k zBlZsUuyI(8+NA;bsY4PHsMhyQz&2oa(XhJyNx z@D7*T@w=2(hXAMd+6FH$-nDEqX!OI$W4WlwrTIX^lJ14;Hz32GT`=g;+yyW= zmBxR#kfz*48uoi?Tu1Hda(epsEmNa6HIrlFsnu+Ha`KDQAR8u#}B{f`oSK0l6EsWt40Q!jxHkca!|_l7O-&ZI>fTk|y;H4c~E`hrbd< z2eU?RRc?1Wn~c#Csd7ewzrKcT@7DctJ1^sdxg2LUoclq9vS$V%aRpMtF)55Jf!36H z1J*)>T&wgn%MeLNe{$*!(g3Ph0$(t%UW@W$@+;FtAlndzwoI+xWXx*C{1L#j;&KfX zl)@+%4_f+sQTqSlYS1I~dg*Z?q1UV7hLa+^%iU8Q>)r==K zoVpnPzW1q`l$ey}F*>GE=2Ke|6Mn3>6&7CE+(6us{`K~jt4mezN=ktbrVC3|@e=I; zB->Ne8v3y)`o2jPF5dx`eL$sU{1vmpJrW*))56ZFa)^^sx;u1lot9+jact1VY1xFx zH88C>Cix!bwgZEeAHU%6<`ny;U$mhnU{6lY3#)3L7DO`k7#+vnn}Iu`{rkV&s(NS} z(suW>&Q{`J)yen$*S_D4IPmw$7xtt4k3k;CQyZ2Z_~Y$-(B)8I;l@$^$F_yWw#!F8 zMgQ2?9gW`i>|gyo5qqmgZ(l@NW~elw&9gQNx#Bad>2uzeL@?H(cNzvj9K=^@H zEWXp(yRg075Eg9l;j~WZOPf<{-dkBenq9oICqf^VIN8P0pA-J+8rS>7A>Us2 zXKlpa)B(%Oksd2et1m}nJukdi{JHy<`V+h5;*U!gd#7mw7Y)wV>r|*9ya1pbo6;#% zp7+mxJ$-wc4evMuOdttL zJB!=drRnoldd=#k{t-{Q0%^5e#2yJ`DL}L-DAP>z93OS|-1o$K1N^BsI$Tt$s&Hf8 z^4`Av&lONB;19MG-8Dd(Ox9=e&^|1h;o>F&ziFX`8I#03Zq9t*0e=+>uhvkjrNgh} z!_!&+SiJtn{PN@=*`#V*FYudQg;cLjs#h%4yDqC5JiE~KdS^``w#Vx0r#nezH^-fq z7f%gK4R6&ON-h{?+M6x+)7n{&pRl%yIy<`sqnwj1-f1ySQATWd2+p#tCYO?J6j+R! z{)Y<(m@jNJWEcw_!$rj~(b*F89ue9p7xRN__>Z05+K5$VyazrJ7%vHl$s@<8@V|E9 z8Ea6lBG`-oh_a;)M(raHaCaY5N!m{PWhk!-r_ z(Nn8{(f*4U`|lg{vqsAw87M0Z11}rqF4&LSVRU=AZad3!G6c2+;bQMH+YGy=(1a~c zzE*W2toaO9M}!?dNnGHAI>5GB3HQIQz4KUjj0TaR!dFNrYsV>0OX!s<#B9F~OzN1g zB3E+}5Uucs=IR`u?Bhj&jZ^d;oPVkE2qK#$jkflHt4u{lZM&DsMWu;0KWW554A!|a zLaYrT`VjLJDq#aA^K}u1U2ordX?SM)`d;-v{Lhb0)~k9-9oGjr7R?C?>KTWmv_s}QcXNwJJCbNQC7&)k7~QFN>6>;g(Q{ylnpb5Q6@l?ie%VLJ}SXNXp0P>kiu%q04Qs{D5`Qw zvwGe^fe>&JUJA6A2JjJ0Ab6-;J4%w8&a9`Hs%d`pwAv`qu{S?lgh2`wJ(zOMCpR6! zqzXV}Sw7YDOgz4;sY_hWkP*zVUfb8MkSRiUx4oZ&|KWaqgqYIn;^|r0ISyPAlvFU< z+Nq;gsD|TzSI%}7TGsSvU)=MJ_QKJ%?9t{+6$U??cK&tpZ5{Cd#4NBatUMHddO~D%SDf= zh}F)R=Ug=G@>_kjU37xo&yNS*7}14meyc@>PwsvSmZ zs!jxTrCm~sP50Q6i%oOk->&%dO^25}+T1?3`TL$Jl8fs@A;9-FXPSsT}zj{g(6P8&d z+{Y#F5$(lAkeQ%&stQ2VQ({bDI+l4l@vu;Izs;%R_qPMwJroSf%4*?17)eTTC*>-< z?CQN%hqWaU8i85W3(ZC|^amC?lFJ3KPG4?7Jdp z^R?iede`gATBSwAXOwZR`Y7P9Px7TRZn}#>=lKRhF2DqudOtuL5^78mSI@=v;WDhGtw(=@OyRBoy;{v{;3w@W>gE^>~z|_(k$s1F!p3%22>8fWRxi ze$?U5p)KL2pBcNE@K=%8xmG;{OK1>dJH?0pdfnL*n#_<=o-=8asjC7G<(z+lD*S$? z^OA$N^@&fTO7teE?dgP;fu2r16`>#c>(c*(fm0NhvNyZSv|Y>e^n0|^PwM@2zQ%eQ z6Z&elJ+HqlnV$ik%0{1eCf9MX=br*l{Y`d{@bz0PqCTUjkGwYG3kAM%2NNw5p)&(8 zxm-p86C;q=c>%~B3-!>&kX{>M&;{{gAymJ#x>2o@RF6K&9S|h$sbK)dX6dYGw?n5Q zg4fvgP;qO=a}a8}jp}akr(_4fCn*6o1v9FtPBz#HK4&6LSj{n}dX);+U^mn3Y*?v{ z^1&V@_LvfXLr@l>XI|T7-C2sh-eNyywphRHUv0BpN%hxwMw}Oqm%Zruy=Ro$uM=~* ze9dq3ha)nBFL~@A)?N&XSEvm^;#%&J@`Vp`QlRGe`*t&ll@jDd46Ja)%fZifjH@uw zXV0&{3Uivwl`y zb72;CAs|W)F0Xbkv;A!_!kgcAe9G zU2@v;5}MKsEVar_q4Z?H%M~0a3A}W33rf*ow%6&`TpsqATaO?%aK?Z0U^)Vbw&2QzGVLN;i^FvaYL>`K${`@+IZa5Ucx` zs|?s);X;*Kl8-aC5Nn{KOk~@3xPV#4%Epjz6{?og!g|k*UWREi7=0U0v)<9`yHEEV z_dY)=0;2BfkhJoW91Q~0AcD5BGnF-bikm(1dg+E{4C@WD*u)V7q=OS{kX9j1sp-SM zT7F6>49%fsGuM6XDt`ICbdXj>e~=gGV0wI}J}QJY+xzyT-PlM%l5o9RM~%~gvQwdt@+kQ70cYmn@AsJxfCf*}MDXAW4Z5u~U&)q&;& zNx=3C10(;!|3)*lgnGfsVYro24B`zm2qb;gvpZclt(96Oc6X8{>+_gvR}<_WMz*gH z39p~;4PAQCB1E)Z-AHogymFl0lK3G_*LkcXwCY;=hxUuk&l2rRjLn2k7|}s1btR|?t*b15;O-=a(JkP7r>3VB3)n`SA#l=F?y+Dm_A)t2PDJhH$dQ@by8{rP@2A+tUw z#C6re=-y2Sy7Q;6P3gRgbem}Co|eDr`E<|enMezRE#|$e-WHrBQlr*Iq8@Gg8OC*+ zgdeJ)))4wo2C!XS5DlDYI%K{Y1sP8U^5x|i=GF=^K*|ARtI-?7?Q(U{V*&jP&wS;Y zIkQlT7b`bTs0O!A1^C)5RE-4Qv36-*%tJ2gkiafB9ls;o-eI%ioy5A)6ck*6;1oEm z$*l*yxx9{cZ9gvGl8O2g{ugwqZ zkT08s$Fd?cz0odxNb{7kbG@d1L)8)9y<2^8@@;WPK-(3}RsX{YefYAoj@DLAm}=Gb zEpQMCF>?`IyC4X{+F(;D2=KyM`N`}dxppLFqptq{Jup~wHGU%qL<07T~ zkYvG&lL-X^gx(L0y>YhBL8pA)LndH=6(Ibyz(=!M`rS|_yda=BHXwTA2*0<$T;RN8^Nj3cq~VREVBOt-jnOA|)#k_^-L4^w zZNaC;H-E}Y;^ke4x)+9M!Ct?p7?Q;neaa6Q;@;1})9ag4?p1H;)<-h|03!mJMjRPt zzlKz*h&0ktTdpvaxvd|AGly#OIkkt=cF(=U-jZWH(?E2TVK?Ik2gRksXm;|)3kkxk zncA~*vOon#{1g_r2Pe>T9>XU01{kj0mABz8fOAl+qVNB_T`ux6<9|?5sBaj35>fj^ zuFrboh$Ead>2(skTDL@O3pG}*^~gV^8krTBZep!PA1+BCv@SE$7^3TrW;c5+IM%=+ zY)jb&IUblYHGIjUXkU>8VMV5Tp`%9!5fZ@48f~HhmsayG@83YB%9+{I@7zL`+}Eb$ zQ&wA3%GOn}UXvCGaHE{oJnPye@s5zcE>bS#dS%P;)%{HDPqWizDW>pW<$qxfdUyuc z9C_6KJw{T%1_hXXS9~#{m0MfBNkHk z)Gj--Vori_8U=MbR9TDZJX9pkRLhL=P3vC%e%(+_XWxXR0&qA{;m2@ z!{OB`a@C}o;I~$09LK}25b||<#RcA@C+X`Ab1Xc^Yo=Gk>Rc1+Stdw}62=Dg4(_Pa zwF$`gO_O4lQ~2auO0FiDP20ubZtt%!uwP&4r*+0k8M{Amo&d<9XQ~l6=fM+u+S@lJXkqu>$ zAiS)wnoTQK!h3f~q$`ws+SW->C-{Djtyrqu!pNe{hv7H5EYWqolw;EzL5vr8=ss4h zv^n2$X5s)3yFvx7+6T>V=u(WQhL_$Y&JT2XL)G=Zx4r8qa5v+oyoy-hxhMlPSA&ZG zuvy&L)TTWN?_+8ps-SpFDHVG~h^^pa_r*#}lo&?nrYnv*+lQnwG~B*%QztSydv97^ z9uZ0;zEGxry6p1sw(~-9x9>Ld^c5+-SLa*~N_Fy*V-{kFBl0~b*JNDR;77Q`TOwj( zUIv8KeD^pwd^d<{*|i+Evg_;6dGE)#$X>{BjB zPT5m`>IQ5&?Z%>Rg)=gCZ-@gfJNjQyFG{e`G?-&qb4=MMa?bxe_3G((z^7RUG zo{2JxOS{h@K4uc1suG@ZY*j~gzAfi}dt&`5GTmq{I;K*mZG`xCDE$;OBgG3pA|bw% zlRhYjPX!CaW-%`H9oBz7*o>uzrZQ(%)7&&@nFf_5M;8yF(<#^_349kN_o;@^z{MSA z;^r}_&xE23J2*p($rEAfIr4gmyn%vqUW}_3VH!5zT>$ruBK!-9&X0M+D`HFzbJu-2 zv0aP~7hyA%RKJRYl-5X<6Zo_0V9%ZT zV40k7TY+IT7p81uDt}ZUk z%6k9WIlf-Et%6h;Qi6;XH$$jNamjOK`jh#c*Cn_b4Yp3Wh5OSMMFA}rz?`8Tm@fq@ zQyfaypoDTX1h(~nFr*yORW1&yV#t-0>YJw?v%|WG069la4wJ*CDCai?rABul|`T)N#;fd;q zwM($aZ1ll0-Jd^oYpQf&V@f6zHZK)b`)jbrXYfzrstOO2(rT*qg~fVP{8L6~A07P< z=IXgIE7CUzkqVH?fk#df4s(zG7T~TjiI0T%8YkF-4o7$@{pcx<`~)`vUV~}i+M|_d zOMBw57~d|&HAr@~ryrZr5OOG(@;&>D^MST)=4YshDxJlf(E%4&G#!PcZnC!JHm0A8 z-J5s#vE`O#3UajF`|u3oxB8C+ zp~CIEqCLvPTRchj^EUAxOk${&A0)(EK@M#r;`Q{8;;K<`$!Uc+M!2PB*+!IP z)i-dT9!P_DeCJxuf=maNTkdHlE?0Cixw+_`5T7Zwl~AyVpK^zUQ;r)?9X*3!Qvgp6 zwo!55K{Ae1gURCJRv%sLybB*ax`4QR8CaWf5GxED^J!e_wP<5B%)$Gxa}8nh^TrT0 zrbLD5Q(;1eV^V(vmx=M&GwAVWP5Wn1J1OX2cP_;%qP8syO5g->0XV9t&JvQN?$$kH z9oYZ;bXYp^3DEpP!iak3qHed@7u5XQoRPp%j18sldxmXRl0J*bpH;k%qN<{-Gl=U| zgnrPR=Z5BZrR<8S}1)ZlMjA$(8t84O@xDGjKN=R&eTTHx~w z$xsYIn+6ku_ZFVSoDgbl_Q8g>T+I7@5h*y_JQMsf02BWU_4E0q_!(5Z=vso@IynQ8 zAW`a0z&{Ms?Y&-SE5e=EC~I?%9JrIP(K33vm#_1wWr=APl&y_<=u?J^HNQbx(2%E; z>CaWXf{&ML@){rBbQPYDU08m|UL>Ko}z2rZoJv!1?aO zI6Q)zL*6m~(;9zte0u`^CwAj@%#a&u1@|9oH0otCp11L61DCknm)O8P@UsH5NtOTX zj$71g;j8Pr1EJJo3t%Xc>}bNj6kq&UNciM~EmmOGoPqGKs1P}}c_=$?h#*%r{ruIm zP;t!q2wwU8@-;5d{We|cxc+qg4JH0u-C-g2h(|j1r&qEAYSYZ@#;_J8iUHw)HXH95K`IA7qMGa)8k z8ItpTaF+{_kq}u`iH=~dJ1mjIy>S}dyhCi<#=CtIZf2+o;Uj|i3>n?akY9OLIZDA@ zmE%uIFtOrTR>0K@Wq=lwmT1>#Wp2=<2BU@!zvPeyG#9PgFHT)oUno8-J%%-8V(w9} zJM^!`tI!?iQ5AS4upRMs;2NjXM`3XNqwDo+9(H#{*RQAu&7a~Q1H{>Zh3hY7h!=}` za>m2mTyFd?J?5E;XYw1n>lrEd&rZnsN_wt%+Kk`yr-Y=N!%%dB`LyO?6}3#iI}9Kw zJpmP9Bg7wnI4Y(Q%$`#kZRa_nV=hfYIxLyL>6)OkwkA}%n1@aLE z>7$64tH2L)61GXeb6|n)?F${_ksT3@r`a?&?%|pl>?a9%f`Zi@#Ax@cFN~qWBxwDV5X|~r@^hVn>F*8TciRx-UxS^a_ssL;VPy7bUen?{#i!2yz!d+`cUNu zF$UFy&y`~$h597qvZOB0C6x!`hV`vmz{ej7J`Itlgz8U)1V+bRgF(!I1b4vu5v7(; zNx62l_O8`n*Ns12Yk=cii%PMMORc*R8&Nlf`#|b_itopHs?8xnaq7(*Lik{jfh^-$VPe%=AAsU zrsjASdbjr83|xDrOt)xBsU24w|iKTn*E z{`Bl0GN*)09mxp`{j+%L$wmHP;nCCB$a~n&Z=V0t*n2nKz!urJ)L!+_rK5o?cAfEf zQi%B=SfAwM&OWJnUHRAhfy2_Ze;qcHY*%sIxvJTd1E8s(bUOA41Jf7xzVZ?mJ@Uf1 zfSY->1}onU4<67xUxVA>4x+BUqoHeSu~nk%nM8oiC(*f*_p=m}v!7P0z)!wGjn3`j zM(L-`j+cMhF1g5G-+5NWn*n7qUejp{^iB9l!GeB0-23!16OYQLiL=@~aMLevx)7nh zm-AA&fpAWFmiRX1`9=FI?0D;2OJt}3x#_f+9v3FNGF;r8#$)ed3Jk<1=WzJ{tsk6a zy^d`B+N+4h-MkbIpqn*ee8)mh0M9_oQQjY)6_GXt z)eL|41S9`y`Xpw9g1dGG$>4z|3^oQ5H+m-kJFm5BQ0D3)>H`l`&p;RUV34ncv2>);OeK8x~BvT5;(r>|#w znat^;%jRz#F)^|H7-s%bOIdKzn0u&hyhYsCnC-0PBUoY9+geLJWZ2DzT z(YXQyor(xM@I5?i#Ch}elKw3G*GO*k8+z&glK=jZa`}x=|GBjozPq}&℘kE@rXb zwJQMo)GYB?U*fBPhl)dmglidURiB>jEqQ$>(c^LAQS;cwmh{~_YaQk8f!8B`N@~tl zT|Q!H;`)NGGL;RI6|>T$26r%(S|%5of&|>?M;cc2=Ykhlt$WAJ_Gfp2 zB$Z|)2;_uqhTW?hC*%x$TDbe3jiy-r(>K5J_11${_aqh1&gu8Tb~y;~fI9PgFQyW{ z=!Q(UA8#Hyrgr!A@0*GA!T3%I4U~4v%$mSUQc2Tnwwv_f+AukuRhfibhB)LTG zu08bG&KX`Z0Kiqd(9CuvAy^@?Ud2iJz_{@=*Y%E-dyl+0b4|kb=jN+2(O~y(&$WF< z(94cR9*&m|e3OL9B`j#)s+mmAFY869&ierR1G64XsQH+V4oEhLh}NDZ=V6xwt-sz)4@Kf-+OSpd@QK!7s*w$dAJ(B?%S*t$v2`1NmV!>+v5#CWBH?3>aW$x&V z7QDM&2kR>H(Lry8ADtucy2QSB?53TNXX|Rm?yN+qC%J8D`p0{{U_c&)Bx~EztiPDq zD^@LN6h-B!=X!qbH%uIRGnA=9iJhhj>uV#TmM7G{6o?fFyEKJN}w z!hAk*?k9(AYpC`Gtx;w-O>iip{FHfpjri4SjQJm?>3`dhtL{^$rq6~Wb=J?dH*{z=0w5%1s7d^cdA3=1s06^erbboCIzS<5Kn;w}QK?ce0i8fGL ze2vd^alkC`;by<7FnwgUqN)+++P4pe8Du6(BsbA|)}+9?oomWJfDK&+OIig1)sh0n zzmXr(j_&?Gl&?7j#0P7DqV6zojwTA|n-ZDF4l+PW0Laqsr9_r6Cn@}mmKoue#Vyo@ z#w-eiuuVJgX6$}SOHQ`s3=?-YwR93s=oxf^LfOHvx1TP3eCK+?o&I0?AP^_fLvQRx6uU|hH|Idx!4 z+E6u@&__X2I3RqAgySK2*ogTy6?^_`PU#lSK)ub^klo&fOBaBo7CszVAkOL=WET_Q z*$A*ulAz+uxX1mHRw_Y?SmIn=YjthJhvYL2wg)fh@c;=fL|0)V&+7M=y{o@=4>!VF zO$da(=z<9KpKbFbOM2tk&pA^q1}n;+L*0D2+Y83u5N7{*Ov%r;i5kb0@a<Uby?X91BLiof}B&bKh6qDi`i z3!T@rLplB9<|q7PgS$mWavE+E22V82fni0^6azlujI4T6ACb&oIO>r9SV$Hublx{=9GH5xog(GzT*EvrWD=@ z`^`olV!e(Yz4j%42CUzp(~I4#2$bS2c7BiXEoqK>P2E2+EmD%d?81b%2a@1M$N@p2 zxZUqC#cd+ePAoR`?)^d)FA&nlBO9f(E~o7!VI95AoD88&N^%4_%1Arpe*C0($tFZ` zPKfx>Ga;U}nUt=z1$*6WcP)M{J9v=t@2{Ib#`D^<28iwMer%64lY^A zYc;K^v@E5hq9LCGwTZrp3=O)FQkZJ`b0jzRbue2?L?`;6of-vXfAX&3Be#~9TFS5a zrWWHMoYzH&qQ(o;_IZgi?<-pY-S)5N4(@7q{aMTV8@qbbV#}l}xd?K6X!DNB44!?< zKFRQB0hIQbH}Y5Q@Q0T9yF{;OB!%-5A4Fn~+);I!ta#;HN!nB=9!Gkmx_xeFVm;S> z_~SZa-)1on`T}+g<>sjRrVABOH|FD#P*Hk`oGA;>DbWmQN@G?t67@EWCwO3bUKIBG!f2&hFiehDDa#UP=d$sGhy*3 z%aXkEz4=|Z{rR}}K^28kU=>GOoA{pZx3O+aZO^`Izk54YGT7z6E^HdN`gctFPlzY2 zm+lCWMMOlZI^x$%ZNR(IZ*GVr@_0&?%GQ2tu>N)W?R;<{<~oHb>_Y`z$i!( z?~D1H0akqpnY15tQXd;5k6k6^kb)Za@Byj0wmWCxCx{f!Zm2I8>VxM)Jm3gZ7~-2W zqMq_w+X-U|59q!fc=dLWDLgRC>1aCqpvMhq8~Bj~mNOl?pI~zLR8ClZ&1V$uMdr{e zx1QbgpcaM{cn^L^nV7AHoGf|;uH4FYEi-6(SJJ|S*4{B#ecIE`g!_EC=A&@=i|W3V zEK6vJ!zW)}A!WbeEV4`pKe^7%T;-Grk)NN!x$EO15bDV}wslsSFCW&+@wgr%ImThx ztV5h#%!h&sQ+zPAc}%D;_@OBXW5Hzgo5xZSsSLz%8mzLL1*&5~S)S)iVcw`4s9Dcv z6|P5v*tKGahO4eGx^GLFPK1-;;mc0peCT1lt4*O$R+mkylJmck(huWHw!&^Y)!f~LsPGgakX-uzS2H;zt=Jqd6BtY6@^{?>bf!-H zeo#OjFA+R2X&Dbv@%T;l2=0Awy#yE+>tjj4IcmK53o0~1X0lpU^aB#rG zqDh6+c33PaZ&4{vssakKhOKmay0_mG9Yxi*ir0=eq}AcIEv_$DUF$vIQqV`Db2wxJ zZ%n!0EOztn>if@s>ep#YE4AElVlovsnaaaHdNO-e@!myGEiV7|QFDCI`@6*Bc%fGf zG=K_^3i4$wYsXS8eMjNR>89`N#MAmYzlOyFtZrS~u;2G=JTv9ibp1%CjltGLXv6bZSj?MnT;LTb>o+hXAjNmv}89WR6@2;hCYbn^rvVpahWMZ677 zSX_y?jYx2{SA*LZy1EZPR9ZZ+$De&D(rM41+iQA0ZZTd>3DEWatE@tZ=4pR-9sM39K1lDHkLo(}(&`<9dnOjk7C($?S1_i(zDPS9g@vZuN4s!Xc*M{u+w#(+ z^=r7X9_n{O<)0bO4uAri`Pt~tPED7>JZDW>*rCH+zR2ktI)#1Pg-&|@qa2K~Pg6>) zd1|Lo>S(UK_JP=KQ5rVy1cMDxB7<~cXO&hUp#p?DJYd<{yOHHK{4ng9(^|JvUAL>s zpW8~xONaGr_H%7m80URK0Z{{9_6OASn|;!)Ta;#)yYbh2RFHdBX0NCUh7DzW-%mPw zHbJ*YBOeRc$F9OHD$T6StPE@pzMXpf)?w-otJ3B_ZyQ@4bn(G4(p0)FqP7(WwA^sIe?@<8kS#^w66@%?bO4b( zej<3z%;%Ar&%u7D!%p{U{$6i3R>Du)v0mjwGR8OE34gt7|~I0G>d3lj2>CwHZHp|B(~?k?pfuPp`$-!HtgW z$0sLfEW77T&uTy|l%MS{elqR1nDW&irR_s6ln-~ZXtm`6i$eRd>Jui;#B9|V$4weZBlf^x8;^g?1-Klqie){d(+(VVW;F7(Gtgs*Z3Ms=M%6``g#t%;(3CvX0WA z5$iFfUSSflFI4rPNhh&<&ODP1h}_44MCOJ)_v9n(f0xVl)+?p+wWU4aqUSuzE2h$Pg*q#A z9sg@69qPhkO)W)Cmp_PXTUJM%7x9s6?Th5Yb4 zrE$=q=drZs%mvNo$M4=Ti0n}Fn0)do#9cs1IRp|_a(#efxew|i9%n1A2O30!c+e6Ci={GgO357x|8gL%}?9h{(|a2fnRhiK^&vE z9zkOJUxJQ5|1cf}J7-D`hASl$j$fD#NVwq)F9~?@*lg)m>c;txPEMQ0npzU>o;wr} z75Svq>!}6~`FA!0091CMNMq4@LQnJq2p~UD9u|ugL&K!7Vscm{P6{g}g;SKm666s$ zoTNCGAc>Qd$4TKNMVDlyrKJ^Qq-7MO73F1QWaJfO6=W0?l@t{<)$}W-ey(?nW|BmOA!kCXO~{uKUa! zY^~k)Sv#3qQupc}wf#%M17hfj(uu_Qy2u>&QrPdH8{|xKvNLnFH+QnLad))#aDd%$g9pv&F>|GhE(R;eKtNnU!XQKqy6B@0Vu12Y$eJ&umH4lcINp8H&_9RqCb zJZ;^>oE+@k9qm2b9NnDUeViP8J?#BF+=AU*5Bhop`MJ2d`1^bM9}MvGJm41<6f@nO6V~=DqIX>n$LHy#q)s3Voq1ulf^2;|G+(JKosaHNL zzL1qs$;qg#$Sg03?W#O}rR#W8ci?QN?`+Sp+2%n0O7OR@-cMJfCY!9{Qu0$XIH{-4 zMjvB`#^&Um&OVh>nUP&m$jUuez^<#{HqS<&B8H_%--+EY8yS#`CyVWh8mxIh1FRcq_{rp}hufsW>@SDJ_VM|!&ZYuW~G z_Me>|?G%o(@7?TuyLzE-i)^JjPW`;T8=KW~27d^A4AM(zTiHzd+8@+NY^aF1NqU*Dg!#{QN0MY65wMxopU z>zyQAj9_?3z}hh0Qgh`VqBg1D?rXhkaF8~+YJWxZo|gHLu%#n6s$Bo?Q%Mp}zP|P5 zx#Xzcb1l#J)N~whF>{k?_q*uQVAcAh<%6u2L+hDCIw_=Ot(a?j2IT!amwQ7;ryJ%Y zOP-A-kIt;zb@Gxha01Jz*iUn~Q-3Ecb6$X91!!q?msB!cIAI$`j*m*$3L{wI#m!k9 zBQd8U1>GmEBR`vOmMQwC?x;S+B%U2Q)|Dl1qAAyqd~zvEQb~K6pOm+y^n~SJem-Qt z=EA6x?$%J!mH065$*#wrk}mDWzpnMe$mx3(m3(~XxL$tc(ea+wvCpp_`@T2-M5FGl zPrH_gWlu)n@P0Lvnp`92XoWq}3enAGYc~j(%BY92 zJ&Aq?T!J9Wp(x=4ttoRZhKiW8lUy?`o~TsPBpw{WC3$zJ#6( zmZ_BSbBsaw2fn-p)9`!Lpj9aqN~u&&^T~!)Ry}Eax0Io$As0|~e8TbZq1}5AtCT|5 z-c282evF;IV6W5|gf*HLF0;y9xs*MXiEY7lika)dF{(b9Px9Mp-sBPvUw!k^xuGOQ zGO6$0l21|F__WV4^PO~Gxn1i&{ZggZpS6Ti<3N@1{$6&0&rVSNFCI#Dh^(pLKhyqC zG02k<`R3dt?$r6JjDWg}vWIB8RiTSN-yA%j`fviKoQj%(DP?U1SBG&{Hff?hKB)#; z89DeOO6EB7-FU@WxJ158vC%_8I76r!^S%fj;?S zZQ4um)G90MLF?nnltrU4nV|L4BdrI6+ECBa1#xmq1M}vO{iVE;(i#p1L5p(_`Y&C4 zBWGK-_VlK0toy}=mrwGe&oh+?)!CRQLSAEuhE6a9hOI!b&lQ)=DVHgTK6uL6&Uq^k zkp|RFs+S}=MN{99+)W&Hz1n$0uTep&+e8^v@6EX@7w_}8M#uTv*SA)O{haJqo}bb` zb}KSH+%k5-|7q@h=)fDx8ozV35dFnDhZD?i8D^D>zURGvJCDi(bP~p9>q*&!_$S0^hmbdi^G1>#qlWYJ1eRU6VsBrQWtQu{NabFh|y9 z<<8S9+HS!;Lp`i z4nFzGa`V;w%O2+4AN69bME^bd)U`&Aly$^3j-mY&NHXyf?`*v#OT0Ly6<;0kY1Hx@ zIjCsu^-<$c4$5Q6i=cwaxJNuimyR-DP*MEuo|(dVnx93gTb|`;s;X2<&R6N=hq-6D zI=tQvFtZq3b96yeJiAn{exBxpIN7i6n>Os%t=gLBF79!^x`HZYlpL+Q8XJ0Yc>cUi zqE~9?iNZ$-s8);DF)nXjfQf$`5aVmN4Vj>~NI6p*DcL(lUwx!8%9Sc_ZqKRAe|RjW z1>Sqbza_JN_kG3wPi;ditdXX7wZx8(zf@~S+`&7I`B&I2>VG2RqyL(}J%bPOxUjKR zebd8hp8Z;VL-tqf|88O?o&29v!{Lb&GUtt$77rht^X<{D_*7ki z3PLu&sTQ(sntwM8LAJ@VW#@VSQDPi<-LjlHpN>@!p1D2gP4l^A_)UxZYy+d@Q8?qM zU`E|f+70`%N3CYYl-2;gJw@1)JJ{jn)_HxgLbKyB%q!7REJL@ac-rdD8)I0k&7-9o z^zSH`Hr9+S@(1YXS2Gq`?D(|m;5YI#QSN3HC`8=A1A% zX)H;2+6;em|30;kIMLb}-K|Cb;MqdUayR&Qkhr99sB2(eqL9?(w)} zrE`tZAs`Rm%5zGGCq>ZRtw?Z2tLncxdq1FPNT>>~_ceu{hx5c|OSOeY?(wTZS__%G zwQ1ZOu;|O_2$Fusbny+ix ztta5sl8h#g4b8~L(Ad~h9~=&A$vt)V(438`Oneh@p@tl{2P~0JhH645r62K}2NN3+ z)}Mj<>~T>2-GbIo(mf(q`Gvv>Z`d@&mP=MlcbB(HBh(#gOWxMAf zQWZk*IvarE=z!kxOT}D~L+Q7Pnbm13?g(k_v!p9wDq)*tNb|V5OTyPcsW&@L;UO)0 zude>v8FXVoDrsM$+H~o*iL+_hfaiFve#=f)^HgTj5!*C(ZQaIYmm_@@g|=4Oh7Dv} z%2!+_H^=%F5n)nohjQWlM=xcpN+3@yi}UD6X#lK71go^!;gVx=l$J>_f{Q39UVrEX zj>LG55dw(*F|Y&|fNBEUeGbIWI19>4cLqcwbV96ld0H>j4io5cMmmZQmb9Q|OI-a0 zfzR0+TrJ}mI;!kHk#4UzNIP!#eLt@2Kg=lfN{`2}Cy2d51>d(CFC|S3k9ivJTYc4Dv1rT=AQhN9P>IA2p^(H4W+?^XWrccGV{{k=cQ-;U~j!;$1SJPXVSjN z{Ue&F0#vp*17t%*|9+f$>__Sv4Uic+a#Wzs{8%#z z#DEVjm5@JO1@KZ;2$L$+5?M;%Py=%XiiD{f0~SsR2&WVuAi{%b!CJhN8vm!M7vH-b8MDMi9v~ZE;S@@z z9S1iR?&lG+w;Bm2-6b@@yjL8F`Zvm?LX&(oZ-oIhw?FWOZpPSnOq-^uP?B|}%uNJ-WOD)gBHUVh?geyp$->k{m{^!_U?0p_W zhJFT3PnROg9h8pXJMcgl2uJFh5Y~pjhT=iwCOHIWA)A%S&EizB+%U?*65Y(h zyqm)fQQ#4qu%ld<2!vo>Q||Nx6C(gA@t^}VEQjLp9!AXk(jxa*t||Z;s6f*ER(Mkl zG^Pdee%;_5i7%71P0I0?eJFqI3Z#Abd@ZG5Y7SGeSM1Y!^mS@M01X5eGGGAs^H6E@ zWA>NF94x)&j3qBUjlq@g$PccTsI4x3(k_u+S~$YZeb3v9;qG*R9BF7mO|QaOZ^Me1 z2@MbzAxk%7by*i?Opc*{0CF_2QN}L3o(|`6u^$Dva)AgR1aq5fE6YO#3ei_77*}d{ z6QkRWD=V?q?PFq=r9xmF4|CoOc^%=EmrZ*2t@Y{8oX_g29c^)9Mu;=cu%qORDPj4s zkilaL*>yD7{?4@bqGx1G9v06mj!C|l*?A$gQ#!l)H{qY(%e7~9su8YvCxW<=3f%v# z^Zz$oQmGA8>mcVufps$H8Nau@t~W6n0j7d*{HA3c5R#LY9i4u<8YZIB@P;uP0^BJE z>@bC}4*_@EJZoKu+Lr-S#lvn{V%*4fdwc)Nd|2nC4^&w-+Y*zemp-;p5AJcim1qqX+p z4ys!64v&i(E?ue4d&kSP&6iUQJn`7+^k)9gzJ5OLJ>~q#Z-0B0!CdI@er|a+Q1SCk!2ko+aS<9$wsVQs zO7qdbej=*xZN zm_Z*J=+tN_R8YdY3rI46PZ-eFHiN_;5y1d-Dd~_n#_or-Q>5z!){X2sJ}18&rpVom zAKuMS;%0pSOsJ?={9f!T_89}aOdWYA%6pNqOfsM_ck}E7Kw3qOgbLc;B?1|0v=&usH* zKZgd1YY@?bv=@>1zWmeA@&+=do{Zs<(a{Vzd8H@06P23D{&lDNOj>pA2S5f7+0X>? z9^cfZLcqKN1v1o{-ULHc=V{k~H`3S#^Epp|e>}LGGq;$c?5R8xh9An)orz07BSArw zi}KM7tb*>tg4vlTf|+>$^=ca^EF(!%cx)|@!6sOS21D^WIs_aAkaaFM{&p?6!l;XwI~O$9#-rXyjvJHZmPYTdN{PucqCqUl;G%<7x_GDq z6@svVihStJaqSH4(v!#;i0jP{%e9ickz%Wv^kEM0?g1z^RT>Y%(awldz##zY9{%CN zoItvvcY%z}V?e}-5C@UaoJKOI-QUYSxxEY+Fz(Be;d^FK{d`QbAo{>JHM_4cBmI!} z4NN;1_;RVH$)UcfD;z-P z@Fq8`#IEx&q7dyE_10;usoFF1I$zX9JfFnXq zZX!#`5IF(7jf}lVL;z^ucz0EiJTvuoDLM~0y|$S8F>$e0f;n-jY?hp-_d6sVxIO~e@rI28pl5JlCfVDbvHZu3g8 zFeXMMi~AFJfCwAsPY&u|Xr^I}R$*@=turQ8jX4I>g~Y4Br#{(3LZ6JB4NFcVhKEVL zt?~2Mkn~<`)(-K6H!{$>{vJ2RMe%t)Zj=E(Kz3W{g9TIJqr`tJqT#A!;74&s*j`9w zH^`FLagTzKBLh{J5c2>wn+`fOCpJ&UwoyQ-!Bv^jRjDKI&#Z9qJmB+S6`Fn_t$if# zA|R)*@+#rJIWNkXPDlO#;0%xz4bd{UB4L9qXF%lmAa?*&XN42Rxo1T(c;3n!1Hs5o z`ir;LFoTNLuYbBMQey`jDbITo;Q@S6rxvB31$Sj2L%lrIrZTo0zS2C8!*{~F5ORk zOhvoXL1#$Fc_J<`58I&pK7;c9q-gss7{n97!Ibi9GG>a8dTL>+Hl^|I%$_7XERg=X zIR<_?6f4>R-Ia4%JS%rmBPXxrt~~tU_|-S(e=&xf@Q+TIi-R19XB{p!b{!Tq2?~~a z_!zl=aqq}W!8|n+yZHm$o{hK+R|cr!5C9itmiUl1F5(0pmBE8#?0!ydqrQUkK^cP7 zR?#|c6MB!1cEAHa0znN`5TP^JoEv3A?t5QfV~mG}3gF39KRfCNODe*V+$Tkch*Arg zq+YGR{(0XG0$!-E3>=@UEt53e?;?hk5wjU)gyk#S6|rrI6Ao09fx_0G~IBO(zNVdTia4EkU!ExKHe1hhDC}SPSSa|<%p@W zJhT?{T2)K8N;SD43+BG2E1_mzvcW&_3XYW~>F~J32|Mse+NZUZoFDhklT#;`VOh5d zX%k^r3FaHG`7iYt_74@zXDf9>J}wR1t<)&2);~0wwR^kIsi@khC)4$L#Les+Fl-N! zM4poEcWHy_OPF4IrLPPZw(?*If6?=r=q*yZIkb0elUN^c^EqZBbLWQr-c zkyVv{#zZM4T0Md*a@#F$V9PyjQ?e^Vtbjrj(-#p^AyK_xEzRK>?Tf0(Pefs=P;GR% zXVmSNP=}V|ni!4nwd_t}ZvD}&?gC+)UhlMyJMzkzAYHc1lY*{0!{2}E@~wAY_FXm@ z63cNXPl_F#HFTb5oja`AoY0W+=;3sMVHTiEsdH5gi{L1C0&}&-Efx+7?5t&X{&7Hl?0bUHsOgq6O2}H;W6RrfEa#k3n+3Y z-xfP@*XWqR*QiJL8G6QEXLcXvo;x(R-L!5gpf{sz9w;5HIJ~sFCWcWZ_yx=J$WDs( zDRUyb_)H@lubfU$EiJ9p_QJ>Ybeu%`SfL-=Ctf`>Sfc`8CO(UAYrJXy3~EF{JmXtN z;$wRrI!i)A;_orxXHV;AD-mnpicS|QpQ^>UsZ5usm2+tkyN9cP4RAnb`ON|;? z&2%7p2}gQ?_9RcRG($f9=|JkesXtnxmDC58K62*8^i2~BvWi4(%ZjQC=TF11%cVhk z5wV~+W{actsDP<|_R>{jzD?Yqdr?i|_p?#Px1`mI9>QyGRE8Zks_qM(B{&os_GH@2 zHf`qK2g0SUGoFZw@H8e^jG=)ef~5B}7P}#$R7oupR10$X%mvw_U_C(9ah>kKbSsQ@^lvfLDqOwgQCLOO^jCuqru3;iC4u=j^Sr#J9IX(w)u1DL-`O|x(Q|{9#HEsK93=IqV2aBt zSvNY4I*M<}57W>MdANMj{HCo|1I$e|zVJ4Q$ZYM|h-_TL9N92#vu1?3(`swNMyYKT zIyW)lz_cE5N;9yc{apCwzjE&vSpOEQNVHRUx;`hs;j&*rGSxZSTbsKay9gpAX`6>1 zP*EDToq+zW*C?j?uq;9^48kHLnAFpmHYw0V2jPtA21T2)-X^^|Dv+v|kc_qo1OB^B zf)D`EQV~t<0a}jW5N?$a*-j9fEm*5=#=oAaH$X79?_ zy<)kVsR>AH4`s$Ehc%rmU=m^!Xs!TeP`e3r&@^^gQj`oFPVViMZanJsbK=L6aVy$| z2s{66Z71B^%{Z(wDwQxFaN_2jOFGVC5%>?a%l-MDMd2oUB|M8L5RS)2uUtbqj2QUH z0%1czri+q&JT4QXry_qb9&TDMQ7jJ5hB1+HW{yKayXYk1G3BypF#{po#ViNm#hEkp zpo1h+&MCE}1Fb3CfztYLK)m;+}nnSJ&OV{tGN^0 zFzKTjUHSK^&-7aQF-`Ig9t-_#)wp@H8};H)O(FyEpn&9O?3UUk3N)mnQTM9r6fJ0j zBZ(ANP9*esdRBzI4FfeAwm2Z8kptRIIF&d{oPfB|5C>T;@+cjkSAhXQZE#>T7i4LO za89UL32p1b6Xuywmqc@LSm{t<{?@3DF*9!Nuk2_1UnIKgl(EG?{*?MZjy znH_kv2dM*=Sf0{%Va~txsC9YQ;C8bg@27e4!P&h*g_;NWeen9C$j~Pa+7k=!$=aGr z{ewAjXeIWgM~#?N;*@;)Lz7>gY+2Wbvm%AycO$Nfr+3|~ycbaf85Yg9n!NME-ZD^p zTJJnz`v1JQ%Jr(s3;1cc;I)KuC_~~e)Q$QHjgkmx7|1)O@u+KHi58uBEI!^DYQW8* zSWJc9hfc#bAt0cAcOwXo7eMzsEMmv4ggyh*HmsA2O%Q6b6YIHVB$m~Y?aA+b{Or1e zK47I)0EPLSZ&$F6YVt@G3(kjs(JrW{HKDV#$|8T;oUajH8X>pgBN`3r#50pVi*A zz^q(5#J~3RD{Q%d^b`a4-yBDBnfubAHUg&jdP|WGLz0rMN6zsWZZH`JuUbO9H=&9n zVAC0v5hWXXJRi9Y*ba-bDVDORf=l>426V6%;cVfGX)iw8CHt~66{CQ^fF_xtIahu= z4Cs~BTqlT4aYlW_axA%4hI~i}1k!P(&|oIp2*^F3uOZ4`9RhNVm&d!Ut;JGco|J4! zGO$!#>Qyj?AUR7j?BXn-Qdfa72+Dq#VZLb-M+NT@tMcU_NsPv*878KhNr2>lsNe)O zgo6e2V2o_EfFZe@GtkJ+eQzXBmIY|~sF4y*)NC88A25jDEvN6?gb{cBc50(-z&>UqX z7uY<}|CgQPR#U&zsOT27e1Fy7x6D)o|FaVs>{#54y?apO| zVn?BT=#>kHktRoYxd%uiVT$ZvAGBCHCwx-tR-<@CQ6XB8Q`$~E`ofOOVTlc!m+~QE zGmr!aMa;}ZERC(qt2ijEJAM@)e63v`g~!b>5kyvUkhF4ocDH<<<(xGH&p4gXbh>`l z@~%knT1-qPvAQlD+PwDkt}Wj+FKpXZmuk8zYG7Lr!{#66n()EmcqWd|$Qx!z@v|#O zvvt#RZKqiu#;RT@NSlP~qqiBh?YRd9;9gp`jO(0*?l`E;N*+mKl3l!N z>k8I09M3CAHY`+s)b4bbtf9siN>e*qvl-V&t=^8U|BD;F!UA*C0OB`^1$hP5QKoQA^H8^!Y;d3+0Ao$3b$WnzJK@YiB$E{&@X=UnTul zb*=*ts~L@N3Em;i5Ub_vb5|I8yrSN0KUjT{XN-Xe9Q~83KYX+}Jo)x85qglsk{~gD z5ddp0%PJi!Ph{<{2C1RJwxnzbtyr83QRM&#!cF;M5Q2k2P?{x(Oho~sO3fMP(FYJ@ z(beMTLP1NbiU&C?YXbPAP!>@gg3B_NWq_T@FtO~46iUOP^qC+$d&a`kX`b<;ntlAw z)2u%}9LF385^y>N<6M0dm<|hCeoPa>&RNK#89-GxVAP%)vd-*Tw%pa{oL@I>*$7_d z)jFzf5>(c2UznOEaDzUr`l4qfjpi;>B1h|7AB3WV9X!3Q=fH+MNKZ9L$^wLO$u_mL z>{IRHnn2X?AP7nNW`?!4;@t2_po+zy>Vv?7TRUV1mU2_%sD}_hBPSUpYn0+REI5XN z#52$UFhaELzV@6}rd;LdJaTX7{z(RUxKv)Cki5+1E-TrmgB6!?^{0U<;r=2%yiz7=@S-%2&4AD3~?y- znXh|dsVV?FdKm8aJH)G35;WvzjvF790kv1MT81-VvvPP5bFC%}LZoRLBHov$fNe64za z07kn6rs->n7A5q_TVX&J6tF+P^3PNVl*7>IT!90OpLamw;QVB&wAVO1h6}YJWT)#^ z#Q%AIqsD*FaE{2ut+xuJEl(PrgxOgFYV9oD4{&{!i_}ecQahYV0B~n=i#@`0XCAGr z`V@@4c=>)cUODF=9qoFw%+i3gsUZq5JjJh;JXM7r7 zao7idW9nhtb=cg(P~lj`flFQ*&b`nHx22Gg2Br<+Qn36?uu5J$KbJxSOA?q!!Tu3i zy}dTbkjwJxU`v$*2IHkA_#jX7D~D1kC%2IlKA<~n8$@6fLfF!6t4GI&^Y$?`H(G|P zfwgY75iMJFJ6o>I^uKsVbBd$|9>YFus0EVWY&?|j**S1V-iY*eyBFf4>TMWv!(WUH zczBPFkB(Q1RALZSk$ZiHDY-lYA-!U)Jko1@eM7~)PA10cJmXU?z>!`5-^sPC%os%V zIw}Z=s8GIh9zV_SBQAnwbR9nlazv;rY z??nMEW;P_K!3;==2$Cd$CZD_5b=-TBRa|^Nm{~H0S`WQD81p-5eYpI{t=sK7HNYq@ z#`3&4=u&Wm$wy{euv6-qJcCsT<)-lPvgrIoiZGTA_qCW(CJl|fe~u;kYxm^JTus8PFikDPW5^ zx%1Z9F16}AaQVN6nS^vuzJGk9+FN}i@7+(BuB`#k{7bNh_mQt)8zR(TnWhuA}o15V&w_%$CR_YgwT;gvR#=MuTxLl0PE9$dKC3iJJQ|+z8jcu$6x3#gMP5=v95g zKXjoYp=yC{i2u5~l6`)&iWTYjUuMgX!;)Hdg%^q>e>sEX+8H)`qesqncJ6L1|38Y( z#jnNw|Kr!ST|2L}L#=bJt%FKs=^z!Zt)oh&k|?=4NC+#bB<^c#od_%GB-s*@Qz_)s zy-kwtP~?`xy$W}TJ8oz0zQ6q*kNpALbzS>hpU?aKem$Q6N3ynk)93J0V>j@NYYGIe zU2G(?su}A&$3A1cIc)Yo_}5RJ$!G1IPvneoDayV7JM@?Ti35b*)q@dcXV*0Di)oa{ zG`0V59qwfR_M9gv<>ku6<43UXpp|KOZErW2beL zh{$?-0@t@@6wM{DN`QY}M$JX!@!w>Br`>a3wsp2%?=+nEwdkKExS#Fc8p1wGJ5p(W zkDJtjWvAyYi+Ss`b88=N^y^xiz-z+=W@_;Cy4Tf*M}|)1LZ)eb=ju}a$dXp=O_5GM z*>Qn}=5z8V{Znr;9_H>jnH%%J+<=~}m|2r`YtGJmAHYH-Ra>xyPx!m~&2}kt840sx zYya{crT~mfvel-=n+7rj=8xCBDs=8l#OZpSpY@)+^nPFEQjg)sZxYTPss-{Ze=Jd- zdKF(DG*p@~`>Qf<+x`!x|FnVMSMN13H5;)rD_wK((y}svzC=*=Guu`2x&Qc=>-dGR z+zkE1`Udc;t(xDD`Y%f9*-$Spdhx5~KrfL@!tuzJI!q|VQ(a1!iTqLugYDuF9imj@ zNrvnYor41GB|?^EzsMJ`0vv4R;Y}Ws0#?$B4Tn#-9PvsY&U`-f>fhOywmi%JVH>=q z?riY(tln7F80Nf6lkkpz`JMTd`r$MGQD?YwxF?E?eU%h0y+-7rm1!D>rD}i`)78h9 z0JsDS$2wOiZ!YHAhj*lBXm6E{bw@mqIF5hIVDUwJELj(lZ0XI3bD?;3CUhX+$yCfjm4;z#UDa3Yn7>K;8em%=@-lVHQ2GCNb z1Kfwg+c^&7B5s)#k(3e`9t7#n_iZ-~y@m!> z(HpMlv{Z-O*T=1Ll-b2530bTnlXKEU`_0#1e|fd##`X_!oULz`-=Qz)7x>eSqhU=Z zfhm{_{LiZCXwbjwPO9~EwCbZEgSR}vtPaCbos{W~cg)@{*CUfTN=!|i(vm;HuZ~(ul_)HBs*6vE_+^8`w63<%?DwGgzr@_TKDl z#_%r}c|P!Txv(S)kU>WBI1t+PNJii+C1=-gR@?S$F^@1Bj7&mVAt`Cnu*K4UMod#<+(JG{aWVB(7G?9X$FOBhdV*Bvn9$t!DC?XWXw*!ODP(Z<5Ix{ZfV)=t-|Zr47o zKc2JvZ*GzvhG^xdQ~ymi{&jW&Vml9%=Z~1}G3mp(^{ok89;bgduS=_QpIx6-vE<{O zt>!a@1iT-!0G2a(cWi7GkW}?V&MDy3Cx1I6gnYWylm zhKO^lS4$+q_>iF_%H6acCiAmHrLt*p(a4ePc^6Dbf)=2aFSQEE*qZ>(&@B3Ym>0a5 z|GO$S+2qFbE!Uz7#&?5t=1qEItPvk)SU(wn_r@S|1&%QC5-@nJrksJTS60B#eEncu zuO~=O5t}EH%Q#f`^RrD~hwLq&SMC?fZ!dX8_G+#$NR!}Aer@u=+n8?hnt#|ei;;94 zY&>kAeRc2hGZDKL0h|y-#!7>6eLDcZiCUKPZaMs6iCJG(CA-L0lY3YvKQS+(Xb!!D zo{A*T_Ilal9E;)Qtpr1Ou*{3B?;*gecCT)n6|*@Qc&thCGZ4t^jFQSpc`4#^<({hk zxmMSPZ_WI=Y`iHp+5DiX=p8JPg7uL>G|Cev3jn9e*3DG%y+Pw|`aEW|4z&3Okf$$H zC+ru}0@6f@BP+sp+)F>5@ALsO%SXu3q9@Y+th_((=-)FC}*jr9EM1k$B_GG5(mwU?}yJ3V1l(u>r|HbQ!-#cVV)4f{S-uXul3i2B*e5p^!~cyvL-@=&mZOR6 zdx{>f?wUVpCYV^l*G}w-mQur)fYFXaE5<+i*eSyGpcTLX zaU*&uuuR~PW}_avEOVoqlq?Y?d)-Nx5ea0h`V@Iz^y@S8r-fCiA?AbVHm_&#OSgoq z^bu5Ut2a8A7tV}P(9 z{6MfndDIXGYlpN%_lOVy^+YUa)I3FT1fu=Pl^f36gb*{MK1vUHbIi>iC^U?QyPMT$@zbWD`MEC*(l1Rz&z0{f zU4sJ7t8RjyB^Pfu(~3|+F_7i#)@JeS9uykX`I27Re|spm8lgyDMx_lko`fekNV#&|LwvLjfO|p0BKRm-&osW*OfbYs z*LIth8Wag%-GI_x|`6|LsjkfKk(pf{k;=ZXZ(?&JnnYIev+F zvyvG(Y>i1toexj3zF|aIVVTntfuFL~&^-p+G`Dp`cLxnUPm;y2zf5x1fvmh%g0%;a z8kQzAO}}_B!LHfF zA5mJk_NyF&4`@yM5u%#|NDA7Tlx-~r2+g_h?6^Z;#pA;y2|%lt6wXPSazEi*P_6M)xKRzp7}i`e&@iVl4qe0WmhQSMET2*6v=WVV|A=+J z#(EO8o|3{Zggi(D_oF;N5f8!X_1${pjBTZi?|IXuM$6oIx8`_(6fIav{=Q1J%xDR=Ju@{hBp*!4^W};i*Gswo6aHtmgWGZjUI1gVJ-g09T=LD3jSMgoZU3 zWmrvCue{>`oJNhdQo8Z!h*3HYxQcVp8T!_Xh8=}h9Y9ndJ#bev_qt5D0R8+z<46|S z2Iw>UB1c5)7@~Eo5xI?s9O)_-`k0Hi|BSnLf%~Fk?mDVx%!G$&h^IN(D`BGG+X4R4 zYA@JizDJ1Xl!gQV4Z_=Nu3H~*Dc0ivuilM->P_%n_ulp+nTXm+h!}>;Jn}(~19;7W zZ_*1Ud4m?rqaPeOUCy%(rS}-90rhpjpth&lp-fv@XPwCj9XG50Ff@Wm1$Xc68sr3}-m}U?|k=c6R^i03$<-nWo?axz~PT4UbU4?Ok{!u`p zZ&|OaQV$E47>)u-g<<<^R30T7Zu6MXx~ZHwYa+a7BD~c7Kecq-o%JO}fsI(;`!VZ# zpy~CZ;4aDn@W^Sa#+D5r-WkUptH-{4&OWybaFa|<8sF{{9LSLcuZmD~{i zTZsC8I%b4vmo@U+?~1qrVWcbQ`~~1nVR)Z)Bdc`qC;nWj3`S(uWD%?vDZda}z0g>u zYpj?;lQhsU3uH~HUwj5Qp8y73NS^}!-Gz(=s>$&K_y^@x=?;rj0!Yqez;LXZ`AgwYEnqNvRsQyd^n$pTMcfmZIV^?x>Jgqh5YQ z=sGO|_`+qSY6D%ZP#I9Ff^xi82&0i~TOh|zO+XPNbkCeDoYMoGLy8d37A}v}3@QQMLEBuj2SXGf1ym2MkzXx;8t8%T)ZN;nrCtH+d^0qj z;rJ36mRX4~1~j(Aq8>$!)^Q+twHSBm`FX}8<#KiXE@TFe*e5itgod52_}O0&A&_aE zhrX5cYUPHjiAhZGpkGgl;MBT~SJFF7b$;oFra4rYKC6ym)lG!!DAl+lyq1xzPK_;; z9(I;SPqMcA1UT>0I15hN$AXtor0_z6P64dn(n60ejk{89a7KMBz8i=I*fo4rwpP%o z)er6CoSY7PR^z@P%R*s-x%0Bp76d8&!`h&w%Hpa4lodRf6-U?c0GTy{Dk+G@KtON)YH(%ROtvfZwOao|olllvgGmS6>(peHIoo?~fxtk*4?#r0^ z-@cgTnLI8NWY>c(-?RcT%#H=FJPO9Ku#jojfA+{j1bRdfkU*aMz*`--1ljy@)?Ie@ z=5qNdr+9Em?Hz(=#Uc>-?w}LtqU)$wGnxF}`oeDDLQ6t|_q(s-W{}?_@^l?BZSytW z+Y2IqVJY*l)Uut;b>@_nj!nq|+pdt(e;)y!n<<;~N+BhJd-!S-#GOTC!5eFeKJ;*g z?zC7n@+(}%a8+Y75jYbJ@IPZav0r8?QHNF4G`xSod?B<&@V3*4wcw^l0G@|vY;>Wv z-D*6b#xwgsiF(6!oV~`Vs-RMm?A+Pv`|KLS&A^RL4)! z8TT5IZP{w~5UnlZZ*5u;=-7>qMV37nW*&QHkK*3WLk*pUW!q(Qe`!Ca2q~SUy(_Bj zMFgjuZGS%$obxF-jC_8&!eT$Yn@o4%!~zUN>mdsH0)`(X`?5etdmmYkX zOCd-y@o5(fKJ$WcxKM~(g>f}R0d4l{;B0F*4X!+xM{J94Fgd7FOPIFNuj=3K1`m{q_yMRTScp4WR3{ly&s#Z*=lI14T&>; zV&6l)`^TMKZ!6B7U3J4gQ0qah{Kvj{Z&kK50?*G{_O<2n*_I3;f8?!r>sNOE0B1-D z2WZL75A}0W9J$P;UTZs`Aq!TxuG238Qb)3uoeo^4h}@oQ;qU@pmL}i*&rClgE(1tN zei~0=Uct{^o{n*LB?+wrj9P5_$1jJIjv!D+}Cng@*b1eXG6=B;^|6JX*Du z%;sI6)s~~Io^_V4V*n4n5LuOKVO=yXh-g~4zw(!Qm9*!|#tWU@S-6rg>aZ|k;$4vF zDMr` z?k_Yi;r$Q zJ0HR2t~~em)gV>?UjFzlu6pL*%wUrR{+@td5wG=No}hGl|HB054B_~nbhbk>_fe5a zgPLBiZV)E(V3A9!%w<^QG9=4bs2{ZK+cI%Y$oDr2>(CsFY2vr#ty_Pk@)_Hm7E4hkXfeJ(_yeSTJK}q*kI;(| zYr^lKaz=pGu@l_=Q{%{#J#vs*hxo&lvRR!`-z9Oh;rSmbHD-bX&DTV8L(sXC+PO+A zLV}Qsv@K{4Esiqt``71uRL0o@xn~cX=I*=C9V09K)L`rV+L@OwxdqluZtc5;?jZ$j zf6Ll@@vi8M#;#1dKUqlD;arVG{u5fyArY^6;jrvy$77jwx-|Bq`rG)eEq~$oo&4;B zF4sJQBquv2kp0YhW7&<4+Vhdr`W-lY(&5FR=`WAJch21K$|OYjaTL2>`$~_Uo@dgp z_PG)j^?gIs4daGe>OBc+rs+ifT&e9S%Q0O|2#`5e;tUnJbkXs8 zY1}o>+1F(s0o`JQO5@RUV90%3jj4!EeK;C3Yr8jI(pmhoc$d-n&~ICeXMC=jb@v$o zKSMnArTEM-(Sp-Y!F;tzyliwW4hgR)`r}W#_y;X3<^A-sQ25Uy+v8-8n{KviH0*P8(0MRUs2G%G+JQ0G8P{mVF`t0gH&RhhZ|M4#-`4f?0DcY`M zz}}m6*iUfDV+L4Z@ABf36$$nLz%i^O?6P_+G&(HltQ-5ZG%OltGIK7hI`hJvnD!;g($cz(NfRdy=Tet<^UDOH=2^Qtr9|^2aj4^| z#<~{JsP*cey{_jz?%7-0Fg$+xd|k;Y=Zhr)v^~5%s2+W{eNsrYOsx?TEwNyEwFj?%;}qzzH}J8fU{IiNeSV-H!B@Ry>Aus-V+`NXR+|Hhf2oHJeTxPFyg{e{Ac<<86hID(9Xc?Q zWhYX6@8e|uK8FZaVS#PGgU#QA{k{B49Os&|z^Y<<1`fcI5#0NlAdlsL<+mHX33CaI zikghrlUT${7btONnXx<>04tQ~n$TJz%gugp9I&)En*eb7te=eiMJ$AdrPS)9Mw22m6m8%9x#*tOjEI``9D}omV~_EM&|00{3V;Tr4(;bz>)Z@ zW!E*u#&OXbTXt}!=MYY3c6@NMzT8>vJX>ZNd8dY)joV|Vlr%oAdhgv-xjKTku41+A zm0LgJB2*`8iBqtxR<}=G)lDt|cD*YNmQkk5H1-Imf|8Vf9 zy0a6DYyp=vdnf5phA5|4VJ!4jRc z7=v%Z8(CDuNNaX?*k45!OG&Y6gF9qGzZa7dSt6vb5_I$QGbK@@_i@~k@XeM?{Ok@5 zF{~NSD*}KME@wAfxiWOz;o^}|`pFIY3S>rBdqHTpjd@Ouzk5x_KGy&paJ3de?KAc* zA#Yyz$7?JP7SDI~PF{JVWj!-+K;JTOh0f>-$0UWcuFp=ASha>6gg`#(yJka~;0#%r zghPhMjZ7afUw3~~@td~<1Tg!=d&aMtUPm1rYF1Lw0GnNBhjM&JWSoQ_*I>ue6H?3A z_>xvXewN1YvVh>o$S4WXizG0h8&=ej7DD3;sdu2eJxF<5Dl$6RdkfjJ+#ov|2kA4Z z1WN&6Wyv=?;Y~1dRgi+5rA$Q)1UoYeQ|pr`MD<;j5ffVCannGSMs##`D**DTOP8LG z_*u8X;XTyT+C$kd*R$s1TDP0SZMT55&U#V+eB>Yil0zcFyDV(Wn=2aA@q)d{X77(i zlci+QKtb52sEg2#*?-|y(gK)c(ELszYZ|>(6a+vSXIAiMBETT~%JQ;QsJsOG$gw$- zu_j0*rH$UIg!CZh43M0e{#K@1vnGc^k1XDv<=D823QKRC3x(Oc5GULl(B8;H@%%H3bl@ z;sNSi!7XNNPX{RkA)PS24Oa}7`9UJm)g{h3*6Lx4e}>c2WM%Yu8%U9(GQ6~t$e@cO zGyuk&Km#jy)8M8-1IP+f`_hVO2~%%MTi!{Hi^%1>XWS)^h6!_0GtOxCfwaxf3#>!1 zlA^v3_>0s4mrI9uE@o1Q$) zj71XXF{cQ>et>hX{Xw9&LQyq*GJXI>=b^E=pfA?#oQh1~TWIU{1S5 zCyph5n_rh<+`TuAhpRh=Kq0yX1MjYoT{UU1y?q*OOnDB$ts28JS!Kn8w@qy3`L6iB zW3Fi`ipP%E;l;%>JmD*klke&>-(nIVD zUA8PIr?n6@3VNzHxS-z=1L)K8Uv>rN%Z}UxsvbI!1P@!6 z-GsB7Yc&Ch!6KU~jbk{gbc3_dFxVa#BtHDMx>DUY%eyqs9Q((~V%1Ia!`CvK5d zW)-U4d?84lyU-TFnW8u*`i~{*+u|P;#GH0Oq8cU^WKFO>qLfgT5tL{StJo(B0V)A! z598h=&k;3X2{<9jjxX=}B9z>0sY!+6(=Uoa49HCZO%oES0Wmq9rq6{p6&gHtV3T6g$zWTBj%$`HFKRBW&5vhTHAme zz?IVy`Y2!0{h4hK{tWv$9QLzF>B87RQ7c6NxIR_N6|jusRUE-g8wXL*_e3yX8ImH- z>S=AHF9`D&LYu@Gf>dHv*~ZL*7zwiw3Br4!E}<&3Pimm5prgzNm!dRcj2TsE3{qN$ z%>mF{rg2nJ>gvD-Dy*=)n_?JCu3@FcT*wTYn&C>+lQ- zbowR;AhlU|TrObvUPpn(PBjY+Atf;2?{Xjr&rdx2<{n!K3oh71?f#Gk75J*1$OQ`5 z?-kRF0Pn11=$DT53m{peWbznGjx97*ipe)==cHDH1)bw=)|(ul%(0{73%a0X#+?FK z*shCC%vgP1hx;2|A*9u6%#>1^6HLrJ5M$rRucmdW*Tp|)UhNAU2-)ILGVU-+I397q zFbY|lniSlrwo7<(ocUpa=Vn&y2?$|B7Q)gj!n20!DPMh z8R8kb0!t+VftU==q7_n=_XL%=DVhIPCE%o7G>5S?NY4`h@B$b|#h}QdMZr3vwGxt` zhD~E!2^_;mi5p9bM7s7*6a$~F^yv&^^dIAfX=79hv+m(fz+=;_S^t&lGI=I7s?Z~2 z3;|{vuj@<)P4dty6-@@WD&y%v+e76q7F}?Nl@rA+-+w9gi!<%7YlAz`1L(NjCMTa- z^^I}d$xDW_e^!#NB22P|sZi6!LX&)+)kvaMwv^rv7^Q(mOVtM9LcRA-Xc#JFTaU5$ zprHq*|9+&QU`@T^FthbApy>SZEtA>LR`~x{8K!Ju7LAH(v z>p~gGnvzWr15kCA&`NYbCJF@r$$4rTtRm}k0zo=_9!laXX?mHq1fykTL0>RRst%e2 z*d-{*e$1$eXOgGH_iiAyOto*(;q!;^Ey}g0luK5D-2GX`Wzpzbp>YfP*(jTO;mqfT zNwaj7(22L!^JEO5;%?z(b+60n4~a)?4RSdSyE%bFl|(U0&{M*KD`?~ogV`zfwT%zR zJuV(D_GW{uYLt9t8@UoARiO-=lsUr(DZPNPSjrvI%o@>{Z*=5jCvqz9G0bh5RT%9H zqM!IBWx~>_B+@U8IjLmJ$c%K5orN^ElsdqDk3sZOl0NaT2*+2+p{f*O z2g?41lBg?P`^tz zgH=$dGK`kUmI$rLW3+l6PcjA#;Bk}feM85BkEeZ(KCt98443MkpQ?~V6}2vl8ncy( zb$oW!Wd1Li?kb8~_wSZZK=QV}Rp0xs1ayBBH3i7WZI(v_CSZiR^WV$bm+tC*mBh$J z$AA2}X=c~Y=%2Z|kgPAPo!Jzk=WmTLtt^|9CFRZpP`j0Mfg8mep@)DbfE3Eb7@q)o z6-G+IAP)@c_mqE8v$K`ZOfoqW4V4321DITcL3(97qzbZAG0*LX>owf#2$a@gbX9HU zlxTQPWqDo69R}HSgFU(VNSDTo$VhN86I(>9U(pMhk{)vy4nnd%AnO?>%u(2-I7*%=qE>XVk|lx83gq{x({E zqp{t7+n0l9IldO&|GNY;TO+7vFoy+F&N($b{t5jYj4e~sT*sLLG`~hk$F#QWJ?SI}Ba*6OG-p)#Dp?DoX@j5__p} z7q+NH#cc%^{#lSMgrO}k)ZdwVI4g(sW#4JQpatC*mALd^c#&O9kcFi5RHDCb;R0qOv|Z_-(v0j!Ns zs4o5GNcFx|q1h|&bmVY)qRF*SM$AO|7ns+=OZuvWDwWJ%DBT)V#<4P2jngKT5wk5w zQA4xHS605VtDP zx6WkPdmn9oh>*iwEzI>F!AsuGx+Jt39<%89&hA8P$9oBj^c0;x`EL92mi+DSQ4vsf zfbrW8(zG}*5?lJTqj`gFChi`87iUo6vH6r6)N&+`{Cws5U6)AT?i<=_%o<^eMQFiqYUi*Y`j( zThq)OdlQ@35g1#9JZ)dBbPZ6(_#o%D&n3QTbe#XReMtt?u_CB=L1_2-nr#3{pNGz` zCFgG<$zfI|Xh4Qcx^%$v*L{m<&?n=|8S}?}c!tA*_@e% zj-LBIYglDcmyJK8tf6b76WVJl&l61Ql`A0OV3ZQ*K&vygs(^MRvF=O#FI|(LuBIW8rGZYV2rc{z`3-?(4|Iz>TZWxl6X8cR7naz zN#Ow0eGv-B$lbR`Se>MhYVs@G@I|Z9YsMtOPGY!m`(-659HCuDdAU?(cKFntKZHpzPlNdwhSPuz#h{_-g(ilm=Sxd8^TStI_MshQs7atsdxw z|CoRO?a+z#xRX1QW|slm^M4$>&`q-bPT*f5L}D-3s$cHXyB2Y1<=?MprxY`Ll&r%( zm5Xah<0ifZ>h-&-C^(+x7KT5E9CFhb*FS$fe~jLQ;VZEbK2Nq4e!Nxrc-Jms{t?hL@M2N3%`10e|JNx;|Zt zN1vH4!AI^3Tt?9(n$$cUKP;tGh_)>Pm?2W5aO2s%C0}n%X7zT=Us#c{wwRdIurzKF zVYP01+U_#wRNU}Rir`&{rCZCy%C5G*)-Qav{t#-Y*RgMX_21RH8P1>Q>Gxn(A3`5L zz+@k24~ve^flqc{jQFZ!jQ{>t^IxD_eW#D5KVC-!8V7j3wy9;U2w%I;&?eiNwOn+k zl)cb?%L1Mj3gRpb&2=sxl#|&D;V%MVn{6;_p-*JDa8M^TBsq1rW+c;32eeDZ%pZcd z5{AD8Mh3M7kxnh>f?ZTvtROM^)VbA!O)FZWBUaqVBfOn+zU@j^+co)dvuZ=vMbTYjF1TT`jvrKJ>BfK2dtlc8kCy7KPc&}D+Hb8>H%T1(2rvJ^ zr#ngy+64p|nq7F&@y5=tsAKotg5v#l!q|V6QO#!X3~dRq)cmvKk_v|qg#t9%I8cF{ za2kWh+(`S)7UX_{wS^2T7k5eS(KeggB_9ktnI!tAfJ{^9OI*tu15TB-5X@`bV6p zFWIS*G`k_3 zDIM8x>d2S6O}?L)7KR;de-^@*m?GH>AtXPuZ?>gGfDtTffL=#AJl5lQNg?s4s47sI z!NMuDdX3B70e`Z0J@bu$Z9a-OVFEfn2=oJFTeKg=8=4w1LC7^kP+*PKw36W1GO&~# zYcw8cSX`EPcGhgDv6!2f(p+erh-MUX7v_Elq&y#22AIsQeTrY>TgC)$%xZ0k;$N8> z@3*tE1Y}~E{s`R13wH}NUf^@<*kZ=dhYZ28i~Oc26fAI?+x6?;r7p3bus>FB9y}fF z@}OzYv#K>;UIv)|R=A-iv6P^CRhNE3S8zD<2LK%jN7`m@5HsC8Sh?o!PHt+s!VOUQ zc5}Pag`L^Ow$d8OjFfRwysdlCd$&dvu^6$6@Jw z9L2VqDK!AR5$qhHQbKZE(~A}`sg(k0Th>2gfs}-rFo^58eaJt@PVIqzdbk=`LxKCT z$Vt009qUa~L&H~wTQu{ffft4m=A3Ez#@P!S(UM7o9PxxvOYgYRht{ddWs84)!nbv^ zPs)s)g)+N81n0{x(!8su*V8H^WvIsbVDz>leLHqMu}i>qnVBY*P}*t?%N_ww)_F9+ zwe`rS_`1Pk3kFZ}5YRLK4d-a=M4jk^a>Jh+?y7QuLVLX=-d>FmKT7tb1SEn)HL}YK zo+*ix2&rKIL-^5Pu9Rq>k`EkYcx^pQf~2FCVv4+krz7R{ALVq zF9xadG+S$ylvtmu_*H^JR;U#@pg6Ed5e%({gp*0kqu&(4i$tx$${+w|(XS6FA<*0w z>E3{E5e}jcNB*fC2T%MWb1x_Y1(uimUVZmZ>W&MXQ_r?NiPBBGfS$!IZp1x0EiIDG zBK9DAHpWwYcaGuK{u;j^kUFNO$=2T8g1Vbem977otUXeXcEp-9H#EJD+7#Wc7O_IT zItDdESCz#gXCcXAKzNdv0n*BpzNWD_UPvM-(gV-V0iMy{TqvGdfYOS#zl}Gz;co3_ z?k0g85RkznibjP*H-(gFEjel^ciNgf1ngR*_qyxz;pS>}uE%uMN@C$WitK62KnO+gV76nZ@tSOpNmoKg0iEjTDkntBL3DLe^{hMV}sPm!oZNAOk2T) zzg7C#0UE#VfmL{ni3`>frhLu*sU7OFv9({i@5-ur!OgxwXsJiruZQf7-%rNbOIli` zBdVf-Lc}tOwu~ufG)VS@qyu={Se$1yToTgBGrHptXf3XWT_lMl-;&atH!+w0see!W zQi|apaJ-WW$Cx>-8uAu_b3vpIZ~f$OiHEMra0a=gB%gh3mKZ#j^+c{p?pQ+iP+Vq7 z-;n1c8nc|%pn(1)?xW@+OOS}ews)Z932Jl-uPpnUje@Z)AtrK=dgjb{l3I7mHTOer z*p!NV@RvI2wH@)`;LC&YK1bJL=F#2C($sxzPwKLXPz+A>o4EPqK)sI~KOVV3+IM{C zb|wBv_&bUe3*gi!%ESWx0{d2=fS*APs^=SS5fnP|Gm2%E%Qi2Br8y$;<-3m*+dm9q zdjX}tl{HE`IE-f?(I6f{a4>m!eiD}SadO+O16CkG06+O)Tgh|2wDJb>^GR{T{+Ril zlp7vQoH<`+yfCB4GC8c3A(^QRBA?c-X#6|)*_Y(LpVPM&$FS&?CmN}7r$kZQRinSX zz|okO2v*!b6y#jt)ts@{5sU8-#W-R(#*$^5xG(m)0YvJ=KyJextb04C| z3V1{IU0rAHTE00gHyG9!sl4T8B?lpvt&m=TLKay}Y7{ex;tp#V*Tv_Ag9736Gl_;h zA!0SixM`TCPeObRCvs3O1e*nn4J+=f6&;-KKAz}T(zs6oqEgOh?+9`M}v@0R!tTX01 zt$b{uDBh}{Dh9H@0rugI%+=Pbnwi8&&Yqk!e&c1V?zb(t zufh*!vz?{v=54VCJpb7hZ1%`OK_7U}Km4JfhUZ1A6 zt7$qVV@ge3D5mVz;cTM7M1*=zK-|S!xl?7gl^9*P91__@?-geJbulk95^r{SvHXM_ zyO3~ke&$k+C(YJ#_T^nMcEoZ8!CsFY70|9u)2QAz@e_Y$w%vjQ|9u===OU-89#zK!=raoLj?`1g-YWDI zGH4}2TdyzXDe>!b$zMp;a;?*kYM+zaoX-5YU_!_@7S907ZUcUDeBm@XTp^MQB~&=TE|##12MFFw@-!>l=z}O+!CQ!s)*`q#01g;{@{;EZSn%O(v5lE{-PnRt z&3l;5Ic=tU3CYr_o^_hz(pA}!tH49N;S|a->Lq_Y%aL|PDJvP!Tf516WL_j2^eCYv zstLmylMT8h8TO<=X0@tmUsH#vr^4*hziSprb8$&S)TJtnb57X2;qi>c|u1-_znYE-ZmhP?K^Tdtk zi9VmYy>egfQoT%C_BUYiWPYKT96z17l^pah#B%t+vTXG+Z3S(WMM|4F)LwW(^&8{W z0s?RI2``*?D}E*+nm>4okR>7Q)R96%2|E?EM*>P?d!@fy!_rzoJTZRRtCOo#)Rii# zi8oIu ze=|ofYbKH@`Icg{lfie+NKr3^o==dyQ%!b*UXD;JS z_58x=qVt#V0Rr5xir}S&{3LiEJN!1u`FIH_<~Hx=2%aX?7ZnHzI?@@1>udq>o_L1l z4G2i6kpUCOrTJN3Joz=-SaY0e{j?3ql(wh&rcG}xQIJb8@_3Sdh8T1Na1(6=`(vbV zWmMkasRxq~`7m*rZXT)^nO{`~blM|W^>459-WKjJ?nyZCy~W~<%Bl}9o^v|&s%j~o za3v2HBB3_xDD@J;dIhBxB{m?mV+tq~AP_G>??asmw0M?d(1gY*0PrWWDudMozbri( z48NO6-t>g9TSwX{I459%+spC(V$x=R(gGzI!MwZ;Bh3vn0g%D;bzJYN2r|udExQ)OtEsA`c%e=Q`=%pS;6ib zWqE%QW*g7m|Dx9e(;~dDHB&XJsJE(baaPL@;(c{i)^%yil%!%MYaK?d$J8|glyeA8 zPu9~Dbj1QPv5MrkW9i3_$HtoGXZWAr!Qo$+04`tfCrI#8wZ2`5+s3>$ABOy4JVAli z6R_qXP*2q+hVrlxX2NK_!ertD#MhVrC=%ddHT7hMhl;d zZB+FIr^c?6K|X52jU=2>&+;9h(59$I5ZVod7K1^kLYR0F8j#`xS_ywH-}quI_5t^Z z632(N{!|@6MI+o1@;w$nMCLAd0envZ3>DsPt@`7KKS5E<)c>|K-JN`}ldbhL0)~Ih zAFeMRslV$Q37q)S-I*j_X$k%>5_cJ;oqcev&AVZIl}$?7)@vnO!KH*hcH;ZvY@U>o zlQGs>@eKKrglw*7Ef#)8@7caJE=J=k$X_{PV<%5g4ghjjv`CjXib^JcaNCu(ZDi&i0)< z)|8dUc@#(GyTMQ(dhdxJPAa)i6qmnx0T_ybyH(&E!M+6o@V9^G(AC^6sY*O|5(oyKw5(lm@1txoxs zA80M{b&Mp>mXPmZG&u@ZtX>kPKB^F#`~HNE{DAzzl#AcCCnZ6YJzcp$a*r8(ed`N; z#VytHZypVnr=xy$cqx9j?)#me_wa!fAHgIa7lfk%@=;*uF#OCb2RB;{?HHh+lu(

s)K?*QWOzzh~2?l@aIn{;&(t&A6$fO*aCX0Z( zTdf|`VO_?NVnRbPTYsN-&tafq*#JEN?sk#QV#D1-skBZdL2c7L59jx*{$yx|;Nr!Q zf;z!DCHoyKmnNJM^3dY+y`xZu`Shp zJ^z6xR_!@}Ysr#J9#sGLbLa1*(X5NAF^%en#l`8*cV>K0e4CHeL(qDum{fEO8r)86 zSCI9T=~M~M!OcR9P=_Sw8s_sKv-z)X@JOloV@Ru zOaPtGkrqq??>6g*yUW=MAULo5@V(2_d;V;tuF+b%dj$}$re`VdGN*yY2#|@2?ZmW8 z2-Hfj`BA?G-Yu|U98FVGuL}mwh~q2IzO~bV^VQUg1K>E3d~sm)!&Z`8h}dlaQb=er z$;ht3Q$f#9Eq*#L9f8uBPlXC%G4H`*th(hB{ofQY)D|yYgrBEQ z-|UYQO-Fc(A>K$jb2rIQ;eY!1b3ff*J}9*xrYvjyI1&%^FzT1zUkd#wkYjh>lmc0%%%G6J#y0X0T?A9UWcEoHawavoJw-W@t9v$@Nvmv zga3IE6T}9^t!1tV&L1N`PJqS_kBr_s1zmmjFbx`aeXraIA#jTjlfQdkeeU+1HgoIM zTmHFN_s;{(uH*&G_mA! z*EYcljJ5(LI4q)>Z6oRz<5?n%JgDj>#oY7zKaS2kAc_3{|1;bm0wOBjfT(C5c;?-J zM`&nxWL8>uq*hjD*4nuP;#FFBWLj2uR904Wt{qzgYMEK9mStsyW@Y8JR@T;T>*qJ$ z-@oD?zCmW@{d_-Pj|X30;#kRayU)3z`7Y{WB;3OVj_G@%P5k4;dsugFwN=;VRkkV4 z50&5b6+Og}G3ca&J+yOTwA!JJaU^WZ_fbV_uFNJdendckqt+FmWBG#1s?O)|1#6568Ew)P9+k#!(x)`S|t_TtYc8wWS#PbhQlJA^K5 zX#9L3V*ZKjox%W3KY+%UR;0$KS{vvfTeTVtjT9 zXR2NZDpk)pF?BV!UIQc&r>=N%pjl3u#2pH!e_HvN?b~@@`?O?@#T+^pw^NWVRPl2v zn$C@TS1fYLSuXhgT{d=$De2^x{RkMD_+Pa+CYh%hX3I7Tie zy%)FCB=k8OH(!l$Io^JiLbMfEx)M$6wZ$xP{Xkb^XiI>q&X}ixlENVM4v2P}vmC8pMp~&na*jw@C7)K5K>}QfE^J4Iq$&{iq}&sbV!3 zb0Vu0Bi#sr#H9;PUwmxSn;6%ZwFbfv;4Uh2-sX)4kjxu#SCWLB3G-Jq%g$im)jPB+ z6m_TenE9rhAnbO%B`?|UrTN!@eOv#m*fN7m3MvBn#44BnwoC8W3V$#M*mzcKmvAbN3=8@uE*Po~XiI#|aJkhdfptVj? zSEM|U*iCxPFr{$hb8P^VHnQLVcVbWYw-IHz2`%5S+yN9o*q}G+G0@n91Ja{sgg~*8 zM{lcp;_Ttx)<#LCG*;0cwzHp=I0JL;KjcHPe*}HFF1>$u^^GY%(sJxaMR>)CPVsW< zGxtC&718ZBfxaU{ZyGW^RD=Y_a*k_?sLU++u<52$>p7+SY%jpA{Sx z{}y2*Vf+?hnI+R3C)^r`P4d>E08|rnCRAw>w3yeE5Z8#7P(My8aE=b03X)AsXG4XD z1VNj!LKHY8Di}~i*cSFWG=@y&-eMU~ovu|7ISi;2qM}7d4_^i%>r35y<^C*Y3U}aq z`=QwXx63?TVaEH(6I+ zJVt#?xQ*{21hQL=n&E#F)`}+@LX+nrvj^JiD`eH55qhsVv5bnf+1IG@8yS$dk zISZ^M$J~xKQ9WY!AJ@zA8!jo?YAre|qsweNSLX3>t&pd9^kUCfX^!0*R8tY|_pgCC zPar{5~4 z7@#SCjFK(rLjMMpsaj4eWQ%!mCX{$;3pJ}8V}2vQ%%&2eUvwyD6RM3?CAC`3$Zj|A z6z8Eygmn*SBp=Q0wpdGHC=Xu_)T+4!gK8-EP{L#^cra+mFZ8?n(62l^(TigSlGoxk^d zomZd^x0<8`gd~|BRLXD;Ki@B2OW1sH7kaqkf!k;GP)_g{RR4Cr#ADlwFNfu_2uRtp#m+BFZt*qr^U9!orr70~&x3xCy0$NC#V>xV+FvFifO&w( zyca4>@HaEg4mfIdUp`=JBVToo3mna=aAEWgB^+n|;jxd9oEpzZJUFL_Pt37)ZGQ9x z`C{vl)iv1%BF=0xnr-s?Z+bB`QE5`A>2~m?rbICwIZyd!1i9y6d(PAzj&16o4md8^ z`o@%)=7ML&z&P(pbWR`YaSGFsW3zbVcy`1T&uZC|W9xR}IZAtgOYl^u(;-)tds8*m znL37X1ByAgbmHZvah6+;sOfW3BH=pK@07?G%YT z3iFfr#rJLmjhkG!ta5RSugFN@Sw_$Or^BK8K5Ksw;X}lyBxUH7nf0;{)@qPkuO%iz z;O+MBJwcDg=$q9FYy!~Z+|Mb6NJ`fKmDSNkksX!c8Ep;YX>v^HGy;GKWHMWGoEU8| zR6VeUX`>9vYTr!KhSm3e%&?vZtXRJA-K$zma;EtO&UU}nxhuMywJ=#~04)%O%UKxJ z|8XfNY*seY|J%BHzafAF&lJQB18M#U{>Ho?CnU3rEIOJHRy61}rxRm70Wg{A7)8@N zX;Na*Tlm^Bo{f&Q^d+TREIfwsr-W9`qcO_!m)(`LF+%7*V3w?#Q<`_=G zF1Oqb-!{w2>;z|}0N#gZ9nad?XK$h^x?gl0}yhDr&H?M$-;1yL5aj`ftAWE?U}i67 z^#zQ=yZDI@Xo4c$Py{TbtlIYb95&ujpdi6V@w~v)T^Elw0~Q*f=8OJk&_2ra(fY(N zmK=PGb!xnZmlR&`DH%nO+p?%UB*(D4h! z!s5bJvt_)@HMW-J;AwPL5L=wUz|Dr^;~+c=EcHT^e64b!req;om9549XIdPZ+aqQx z`sH;-2te(1MmQ*t7YvpYEP-ZY18fZ7etiN2OG-_%Fo#`@gTdd4!0&-$mSOQekr_Ur zV1V62T?@tv3n?Rn#m~Y^;teo!b_Cx3-iFCgc$Mqt2#UQK;Oi6(-s(mNCj7-+&#nDu z$68@+ABWYhsdo@sahw#s-002{i5x@TsLb7Z>Tu$v;0~wx8eBFBb&C^G@AVc9u_(W6 zV*s&`U6xOZ7H==jDk2<#DYt^6&6c@2Z=HAQ!n}=55t-MzC8#ltjg^hy;tZmk&V|C)*R4Lg4rZhxoP3*e}S5f;FwR?4k_q4Bi@;M#XI%J<-mKfbDA@@ zs>?ufOeD0a1IV-NW@R#;t<@IQQXo=+`_E+;S&D@wkleU z`%(8tObRtoTk>y&WwxX=yS#K&IiX5kTH8YCU1MU(j}ZwQ0^)(7_=X(|+748=^#LrT z>v9wf3SlyICc9$o8rxGMMJA%vRL)5B0O;PN>Gwt$OLwt zR+XP$wna}xWC(%$jfj>0|Wi}xD6Zb%%5tqRx3iPq}#{i zoJlwdpm7^f#R-x=7nZE}j$ax8K>;d;YBx$T3z;qs)-dZnVS{@ghISUCO|P7CRj&_z zE=M&Rp=0KinzC?_OBVdx0yARy9~1-48+@i$4~LIgCjWk9S!mr4-4RkJ&~+XDbQOir zU7OR3Nu4IH)hgKK3KIcnUea$d*}-Sy4|l5~v@or=@uyfJoMk1v!YVv%Zdh;G@I=hSWuQJ)&k9ksT*t3&P?;b=)RIuf5Ny77kj(*eE-C2pN9+*= z%2MPEfHw`GpjYvF{3(a6bOx3=r6`>>2Lla#(ki>)lv4u8)1_BgI7y7wDy-QGYk`8% zsi1d)76D32_u?g;N+}z^X#Dqy5Am}*RKPu4*9N?T#eVELb9)zb??EBxm-Lu<9&}kt z5ae@{inUKWuz{W2`MvkH^b%1|ikJI?VcQhN^D^52%$gsH5;G-lYa%Zr+H9Ni{0^Jo z-&35>3><-((--W+@e_VM@g%%gF#br%iNuCJk9?}vG&@>Q0yPu|12|{^D{Q!t#lf8q z@YvTvI2EEZ%_|KZz|I>eKKT$EI)e3dz)6Z!|5V|AuC$2G-@VTp|MkO)S*mkT1??nN^r+LCspi7UPN_2KmUf@Ey+=$3SHd@FS0H$GOQV+!0U}s#INi zZYwy>b*(l@wEKH^iet^LZx0j(GhLBsNn$``5~Cy@Ku;G+mZ82OXt>1TEt4tC!_)@iQGlh3mbRB)K1vh@P|xNdajMID!(*aPRN#IErU;Bqh8H~%7Lou z+22=uO`P}E&ml3u1v-yI`q9&{jQEwBuCH`ztp$6eCX-&rc~W zn*ILx;o9eq95Z~(l=h4qE<=f;Ksf5OCzrryJ!;w~bBCa6*Uc?9`Cd+1>@qgyb}&AT zgXL>lyl&tdKCdv9W{S&kYdZ(#ph&=Z^!=+vJVrpJz{|*}WCR%cek$bNND9mR-zv5dz_qnuw0l1>i_x$RE zWHsjD-pTN#NCsohy9J^gVQX09zk~6Ui__%?iL>1HMmox9XvvjrM&-0GP4>grgSQR)t@woY+%KVITPn z9%ykrNy%OG(*<0;?f2MToR5LW%pYWTf>vrp&U0A6z+2x#eYZ-|cVM7j-!t65Gix;6 z{N8Q9H&|0GoR#ofHfUX>T9zMsbx&-MuIuI!RiwORBlLXz$2Ix0Yu5ZAm!yq5&Q@#) z;u&3GnAW~!Y&mYD@e1w*-u^JDPsZ>$=S7TSW6~^UZn-LpgZHdudK#1-+F~alDMSDs z`J@yNV7+=H9A%ljBB1x%K4Js-udVJiO?-gt`n}pG>>YpTl}J+!P;vnlscsljkSC3Y zNv>2jc6~^(bVTLD#`76R0>^rQ5s#TLN5*Vp!0 zNV-@Wg~iH*h+jY_$JLK2RabT@r@9`mdGR<>R$4T&hV<7Ou0atuZ`^jeF#hbc0}ZqB z@>6}>ChL*uqHl~c<4?f>B_WfQw=-4Fxkf(Br`e>E5IOEh?EfA2+OQQw1hy5-6*s6L z4(Mz$Ti_cC($8#Ljhi20VEN_@}~hO%{4Nf`>>ao6q^Y$PXy{*SI7c zB)7w64FJCU?_Y>}$mPJ7@yba?T?KFil+R5@lD3tRLZgMiXtaWa7ylQL2;rSciU)Xe3+`%Ta@WrlF0b> zs}*Q7iLnD7uC&*f44{lEOf*LY2y60gf+DR-ei9AoLdoqXtRt!@ezlWVMVoVuxZb{S z=+17`RXJ-j09zWA!E9w&^AXTn@nvhMG#|g1^u$MkQ3WaZkkX5dU!fg5CdRau00T3F zye!D) zi`j!Ww=8`5Ew(0%!*sFGvtJ{8l18HtW!gO7D=`=%`a!%b{W%d`Fmvw&Jrq=y0 z()q4}J+_DE{iWr0Z!`&3$GIESx>!3$o0@!sCS2wcrDPv!$?jvFa9ps}kW!En&5@K9 z3EmB6?zP~l`6hz~gR+#VbanTV@yd*i*|r?Hd_V+NSs6tokxW{;t1R4?wtYuLK4J?e zk$y0oeo;*4CCMNhv4L~TQnY03j@!lW-qr3*e&F|Icwb&U6gHJXZ8NH?K91fy(b@jS)4wf7mYL7 z8+|K0Yj5BkHeDofieMxA}1`aoh&mRn@$&yT>*p?C1?$vTm1>ak)zU>>KwgB;S(uMC9h|+Xq=k}kUjodF5 zjz`NpSH>&P@)D+X@hnyO+9b~MMH$InDLu2}6puX%%qzXmL%r)3UcZ!Sy~$8yq5jqF ze1GuYoW}L(k#5o5wAr}&l}Ic8XCsnYTB@xVPZ(6TC8J7^;KYe1I3Kh*744M3F+t-F zNF3|aCh>0PC~yU&zBa~Z9AC{U=(P($TlPMk>;egIWo69*_bW+V_Z{q6@_76q+BAZ7 z2$2nJe|h@vfvwCFZ%;;ucfADP~l%pqp$St>9&?_W5 z6bM3J4q7$=1;-aI@(bEh2@kMrNXByXZ zCF3w@pp<3e2e~}ur5e7Ko;KNZH)8j)_}xnea8h^bjrBrjm0l@l6^K_a6xUx|f92;Q|# zdn^&1w}03f=`#D#a7&09+N%XUE|Ti95GsbT6^L-TdJO05l)*S4DE25&>|UCMWk$>a zaGPZn8=zK8DPrn7qgKSd)-b&~CE+MDxKfBpTht`{=Q3qkh@3RQ!1>s1023>_2qx^k z;aW!pDb=ufNfG&@Q%b2fTB3~%1SqaDuuX=9FM4N9YWL8-rY?%%gUT(FWVidEy!Rvx@E^~EircYk{;Iheg=9dGO`v_O>%5E4pKQ61~h-baZ zo7G&Ca!6(2aSj`Gxevd5#FWkyRAlu+02(Ivj&RiEBttDz17n$Te`1Q`uYYZTy=-Nq zTp^YixAGPVr<9VVB^U=Z35u)U?Je}}(6Nr;(Oo)Sa1_&%x9hSxL(lxUcD>V3-y%tO zi$wU#TtsRCnB_T&~(CHu0S==jmKeIgVVSU8w#f z=HxfyOw}TbtCGo(LX#4H1IW_!>Ny9zs6#^Qvt_CR|dNM%bHuJMWsetbMd z0RS1uMN*Q|j8u+9JUpo0M@4W)JMn%r6TCN-$7(LeY>I)vs2iNdThcc~E4BLx$rk~y zW==(iUTNNpF0NBMp37;yHHZ+s4vhLv4TKkd6VSq@uo?S4#W}cN78Yu-H`{vuu>K3W z&^p+WQ{?vK;U+Hq@&JZO@Y)sMk!exE>GC+MdFnR7Wp+D6nic}332HI@7gCoUI`l78 zajD9YF54LnIgu&?%!Fv>tHp^Y$1nsLkmxLbh!e;$+q6AQJ1rQ@cVJj{BG@cL<-DTB zAU$_jcJCF;i*O(?1J{S|$-X>$l#I}OWEZf9nK2Xyz@QPT@lOp=BEd3HtJgPxduX-G z>!k}#(23{)DJk6Xj6=<_a*uxT+VkZHy;l#8->5Jb@mnqPddFyQhq~-P>*!Va=Q1)= z*&zcB6=(is?k~EY`Ns&bIfG)nx5|JI9UW)#gk1t}U4^D8oz;YNpFhtaB(T7EA?14t_O2>>7#zQ+WWtL6~V;Gk8GVH|;t?cyt#sR%j8PLHPu z0b^k+%2~Su5n6l!s?O3rA8`v`%OE5{4ybg=P6~dC`_&oayBBT{8gMxi&19!su@7#8 zf->8|A>595mgp^AvsP5JerxeD->+9&sPy|??)vOkSreV+-(mbtIl=eE=c%im=_ViY zHiXQ8nQM&R(7(9p>v_9{BkA9s{r9sncMr#jXJ}>&j}+t2Pm_Pi9H%r);@oR4Ern#6 zRu3h&@}jeM1y7{Y)8rWXWk#vEY23Vx5zcW{l$k_uRN0Uc>N|cDRd^yJ5;Vdl)e;FaXw* z>7mLKmr(YST>Mdqq`|ThNCE6Jf zJC?pcDy9!8+_!_~QoThXI7h-ogI;rn*u4%yp^GSk%BM(Y2X*;Z0)#At*(tJjfJ{NdH!9z_Z7B{D7j2TW&?}Kn5aWa_6X?mUB?t?JUDvG?t|ZfSbqZoGN8fAK3qfu zeN@Wk=;x;D`R(EEsmdTNhTq=hw_UV)xqdGEX0Eo2jf>@KF~OK~AIhpmN45HM3t!V`Q=%JY z*u;$bC3~#xAWWdvk;B;Q(ImDEdEu-U-EI=wm5aP{v3}Q&x=bdU!XS54L2j!c3qTH4 z{B;sUDY!;9=tfU(iF8}CnR7al$TrAVhW3tU5-j6mdh+xg^jmRc}`ZynF&vFCsV; za7I${Asr?cQ97pl5(7|vx{Q6gOj5_&#GdkK>&xK19^n1>Zn(z=q*YiaN`>Erh&qq2<4%e68>0I?_ zC}^$FRWQCJiQZAgR?UiisA9fQFE<7^bm}+;vL6HUIP2l3N%Q(@vCBjK+CQWCYS=hW z$DF)vPD&(KLZpYS(8U;BE)??uG5rRUrVwgKI!y(Uf4YzFL1?20u^*w0yduHK%bftJ z626Em$8|&G?_;D~7#?{=jDRn8;1<}x=r6RJi0H^+LLPL^e4#g6hl3ByauX@I)un5D z=l0R#J z!Cd%j8eq!8kf#I=egIIpxO?9ja1_9`oxz<|wJ*wlr=-ZheO{Ke4+C@XzHbk&MsOX- z;M9o`fidYq#G7&BsdNNcj-gR?Hon-y-eR8_5!s>VlAr{;a?zcP-NTly&O66T-0zR$ zmQl8wf-QJEZ4Q6Bd>P`b4)C-WU>sG&!TsT3NsjVK+nZE7VnK9{uY2bcWT<}SlBe)~ zq3+-Jx-)N*(Kf!jjgs3b7%_>r~ z_E=7YOCUrQN@L`(y=yGhFjUyly-m^gTO1;o2i>Zg7ObD171&p0ubUpMo1SwPc!?<| zd{4)*5z@pOF6SL%@{@xU_;W__aM4}bzpHMoELqCk;}n-@GRG&L@XTmx*r>l_K~y+D zWp&V3b^50~M<_g`KYV~0HP7vlZLcVq*WX!UO>FTZhq+rhxC=3@#zLJzm zPnI6;kMo{TSOa2jX;l5G!I8zd?J)}$Kns4^<<)$iwD&Xll=l+|AX=Cb4EJKDkjC?8 z%x4s~d62zO`D1Y1GP4KRl{VgG9ZUDQX=-LkEy)l3-S5B9L>lGQmfs;Qm0j@rn=yJK zF%VCF%L)%@z=n>7&m*m^928~l)3EoSOFr<)(z$ZUP691J`sk_X(`2-rt)2sc%ubMY zbIB(ai_O01flh<4VAK|f^5|MBj0xK* zl(PtywSw^5%&*im5Z60kX}WeH+h7$(-E39= zvFgIiuZHh20r2FF=Ce)HiP+sEqX;*zi_b8|xh*}O`v&{E$%vikNYw##V!K|LPf}7R zMc#G!Pt?2Cd~5x9(#y=5*Oq4bh~!u?Eh{rScte;Zo@x(75z4L8U@x7 zzqpfEcueT)B4PXZm7~}!m?r-hFT);I)=>>)Bl%lOh5kg}*9e_shca!y$ql~W%5>oX`PUty2bH(Ngx zOvO1DD#NRktP~hO4M#MJO~2oA5N`fIEr3wRM7z(V`D{Q5iT+{QVEncZTcU8~QKdi* znwKNM_nn7VrF!+G-w%H4=5?g~`PM(mo?@@he17XM3my2f+ncx;KHmy$)Gz;J9}L%3 zQCVVloEQ&5Vm2;H{(E*dV1#=*M}#5v{zdqX62=hR)H_@oBAB)Ixq4wovTr|gw6_XS z9rE9z8S&cZmSF9_mhD~mWx)7I`L10RKJ(Ao-Pr!~+oqZCCRx3<{&|(7=U$aY*EmL> z%zE?{F zX~RJ0r_9?-S82O!O!S)KmDN#h()DHe&3%i%4{OquRSPatcH{$~64!VBQ!*?Z1C6qk z9u2&U+yrmaEw|qFYBTf{*4_Qb^ABSpcbUy86lTmms%*TFJ<#rV@R8deI*Dw=P5NNE zxx1oXf2DOUeZ{<;HsO!x(dXy)S$nNKGxNvIwmL}*`Ka~h@H>2uNeRw~->2!>zv<_y zxApv%UYwbaG{3d(qjXDOT_TU0HAiIU;#d=mFj{vzcfuK^RArI4Qc%YTuXL)DDa+j7 z6lIV=LV%kR0Zco3yHJ|8*C@Zd>Kw62g}Q3omU&+weH)+hwaf@GE+)yYox&JP<}|BJ zvjqT#h^hTEnj}yoV85Mwf)#qeB&#?qE?rl(O4-1{lN*9(2Htt2GSk&phx#I-TN<+& ziBik6*Zqg`Uxk$O$BTKpUtDmu|1moiJltA6^8q|ooyQm${$JLAo@JJK4cwy)MiZCF zn-uVO@qGt-_HX`q&dG$|CJeW5X=}fDE#ZL4Uu4_D5ji1PGT5au^6xY#Ft}|%SxBLp zyUP*Sf{{>nl}GHcF-P2eHkeo&pX;gfJ%2~Ap>=Ug7j?J%juEdDMU7^)%W;;z>~vMx z-H3E0m6tCqX5WyPs7*%dj4{{wxv+9X(5?Zv{8dg&TaR7`I;z5XUG`41AYJ*IR2BF3 zT~>Y{&D3A0z}sdYj$$O*q$#g|HXmZORJfAgwrr6+FAq!laKTLX-}Rk-kL-W$_Df!} zwnuamv*FKuw9sydXF40_{Rz*Cz2C_*T9|uBRUC4lxztJ=x9W{~;$alD=~$@6GJ`9# zGYB^+0*#XUk34SY(R|ve{v?WSWGU2ZV zYqDF4lvFRkB%FBkP{x&2{zGc}-(y9mdu@Jsf6h7V$N5M9IPlL6@4Q-SS~GQ+N!od@5fhQZmW{ zR9s9vDQ7md%ZMR>K^}sROf~tt!*9(ek1W%rmGg`_pL=o&B|S&epdBoUrs>wEm|7y`y2w z$qG?HTdmK8MniC8bh{I(_Y_WvZSTv({EK6()hNqRCJBGQK7S%MwvG1|z8&J-V)8$b z!6(`n9_JMxOCqP?(@k<*PQ{#MG&N}xBf`{!gOAO}G$3s~o;$~kT=F@T;?5rTjAOWY zBt31PHqw1v`tD_h7R!!&La^)LeoB^{q!iLvpBWY0Uf5*Q0P=l?L$ZsQ)c~xbdmpHo zJueSs<1Xu{`<(L6x~;q=&Gn7#Dzxxos^imXa}rl_Gz$6@MBf$wSn%U3PDUt=XmW^R zg%hb3(`j^bx7Z7h8H z(|4E3oJveZB`B#V_R_|M~@~G>%7!f5-tCW&dc5ZT|TQ0`* zP=2KI!}q)6voaVctI3bO%3U(4AgTj;_!33AOg(O1+tBUrBmyuqE3t9C3d+~~U4f|Z zNXphT3bke-28gg!+>cDU^pr5C{1=brWt&@_^RB3vUEt8Lpo?`3+rQH#$EpY+X5|xe z0y1$7wgTJF^6;BvzpQ6wmH3O zwOUN9)|7H;Hbghy_e_j@&1ygP=;7Z8@#KFax3|9ue)(hC;pAuCFmA1W#WJ4u=EV-z zU;kZG`=9yuQ~w*j!}%85f)S!#8Lt5g6S@1ZbtE~ho-$}OedF@yG1%)iIY-;s|Xh(l-ui( z1+uXH)I1K>JoVTSGZNM0qOce%jZ2-KZgRL|z&U^Pivq|p1p`36YQ$!KFahVT<-DNT7R6p>|#^uT3@egEW|3-H4;$GW_X?DHbnQHZ*@z#uD z;nkP_z1W2;bJxbkJ$;$5*FbP2>8No*7;9lVT^Z0l(0`zQVPyvE2%1&Lr5lA#j)m9s zifHw6rQ?8r`rbN%`&r|_GjNEEE`>eg9MQ1>Ab;>dtvn2Zje$emrtM<5y<$H|uaNIw z@_CiV#;bQnoqS5U-q#u{LF0*!FTNi${FZ}Ye0nyw1x_n1!Upz}*l_Gp!bC3x-{`aK z4SlaxM5R@}uZ_D-@ScT0i@Xgq>nw=yI!TY0Xs~vjV8AQC65p6+tiRjV+O;sI^|7gO zVR?-myRmS4%rJdNYEYO}^US~}lj3;mk(xpaSj-6c@Bn0ziEf#5*PwM(_~yCU4ciww{bKS<@?j(Kf2SMoYER%IwIx0=G62KU z2zOQAYm!0P0fBG^ldZSUfLB}BU47`U5gF$RmAnC+`Jr@$?X3q(ZPk(2Ra&8o5+THj zMYU^Xv)KPeWW-&32b)*;b#^wT>{yz9}@`&P} zoDAUBKgVAp5!1!)d_}Rd;7?XuI-|&)rT3uMj4=+&^N$^!W7kX{y-%Cb(Es7kr#18` zg!YeyHf?CcjG4DJn-jPu5h9B`kfBA4@-;+CI*tlq2|c4)TovYhI?w5dE9ALu8hmt5 zTBMbFXm(2sFD8`uEcy14SRW_EgJ;0z8?Mk`0EhL@*t<>Q1X_LUxBKYhYdDpM8O{bA zbK%MY&3nEapVYpxvKYS9^D{Ks8$og2AE1L2G1oF;qLG?DBda9EAheTYh8|#O^&4z{PEr7AN;B_|Ji+- z`)<8s)fb=S&scduU^Gn9!;Ay+S+ZNYm2}LUm(J+>us*rkBu-$ty~g&T$aaKld!yMr zPD3mZ+1}7qch}k+2C}6v_RRxa3x`sO+)7$N+RDM;IOQzpa!0S#(fAiTa{?FUSg*?Q z+&qmLFWA~&MZDK$_28aYdmAQ5j`AysH#+*Q96%g|rLi?_J zs|~~gxp$6+=%%o3l4F3^tbm+9Yq*xo8mlND^x==^v#M6UjtjkJyD=S3XaAa4>kxdN z@nJ0V#R8M6j{%7ZvvywyxyXVVpZ~%*fz>`v`!@!1v(1f~$#$IUd1H(gO;~udbF5;j zkVB8tu}*s1s%CkD0+4kJp+u#dXM<)1B36gaI_LrOC&Eg9!{DW-Wt{?=U`q-}q@d(K zL&dnU^_=y@BZHpZ@k#cc<_?4-vMxC_^SL?8htujWpyPNNVvxbFKhMhpLBIKh^$sAP z1Iqtcyb+rnQm*-``bnLMO-72_E1`VAk;7O-}NQ+s%QZT82I z@6d^9S>3qKVSTNC&-$2o1$IKEU0i|G(Tn-}#L--M?%RdZc@8VMPK5dgc*%R_zC6;5 zhS9)@us|?x64L`Y*ZpCf5~B0RSk!dJX5B)f87rHcDN>lrM45hElY(@kj}UW~KW4&i zc3m0Ej7g{YLO{O3kqePhFso7hQe0i$PKO1{k zXkyZoZuEX4dIpab!Yn7QlM%aPtdy;TOp}V$$}y6PG>TQsRH=|FRqK`&m%SpbyS!*74u|wfwg_$5&q+|Mzvl zXO|;3y9!pKl;CtDJ_lRsL~eRbY2%PS-mD4xSvQ}BO8_ql7Q-?CR0I>%fpgPHlIUKU>gBini92DX(*^v=N~eAX z-WxF%^RK;mIKTMf^_{-8l>lW#P3!tfYmBLz8CZYhq5m9q+G(e`bd$hs8<}H&Esw8a z?)~DJDfhWwmtH5uR%X)?Il4BZGq9;Y*Ob>!twAtu=;Y`dw_eCX#x?2FP<_)G-dvOW zK=~gqO1Gw>5KI&ZOX3nyy_;J8%MpkJ8))<|0%gQd^I*)@Hm{pOM4#YA`arp*xVeV?V(Ii4nMfQw(5t@(%DRAC0 zh$v|xDB-+c06JTN_Xva?M1qpR`L*2O#pjHda_XAlL=9vty%O?|Jo@is`pX3lpLFye z&h;);^fiquA{&_J3KzUxxZ#uM3hrVlH;Uu9e|H!n%|b{Y(G(UDhozhQgOAs(W%U7M z4^5bZD5BfIx^HmRo%z_oEWayf6)LC$N+PM5aTqGafsA^D##hjgmDs~_T1w&3(YC`w ze}W9BJB-phV!x!wSDyL^somZ!wtsndoO-^lc#ow4OECbOku|!+|ysEjcMYPrk&|m9$iTbS;O(*4d882LKbprFJV4UwckmAcy1&5X{q%+H+i6vIb< z4zZI{(2`56b-OdWN*?7P9rAWRy8k)JDJQ)3$oa}W@wb{K1|yH=859m01(Ru9i>LZC zn?zO=5nTz#O-V3$u*u1(8qX(3cWDVWTfA4Dx7-PAEcnN?pT`OtWA&Zt6g3(zLuQXV z4z=&%T7Dkb_k+k-P;&KOuKk&1lPVe`6ToK}R-|$ED-xMyiTqcaPKdC-U;9^unud}Q z!URPB43JyoO^7aPExmoQs51zx>KT2hF2+IVOG*EKs}!hPY|&@D%OH0Yxa zcIU~?vV<#l9IiZ<8@G5Go2Q?+KgOHXwQr+6aJzT)nGNIgwaL!MKHjspu6+o(zNMD` z$1@*XLBq?=YkmM8cs~3FD1BpXw(IEsk()c9oA)%P^(}6`OQ=zzGa$&g+DIMW`bX{< zRSM51fs8s1i_S%dBFnCd-Q14O871wY7sm`#S?#FbnzN((x5>hmPmhddd@ujK$KzGj z#rYVXf#5M_MFTMfkGsyptf3r6H^PY1S$r{|Bp*CmZnv1z_lGQfHB87=<~ds8ljU16 z0rdA@%)=CJ1Q80jvs5C_uYFyX{*fz=R#siT_cc9id)!N(*n5j1>0;pewS%)r@cu20 zjt|yL7kg(W*Q8fk5?vN!3p7SWVsZD$jkW6?zgU-h_7sB|mG=sstiY>a{$zD8V4cc!}( zcCf_F6Z6J~JVWV1TSgz!DAK*&gbZy%W?$1aI3Ykh-$KT)DM1fbZd?%UHbXQzu)E=x z-J}DPzYR)%Zh5YqNUzzL!Oz@t^D!tf*rZ`WSA;w%r=@0NpWdh0A!Rrb(@752hx@~R zIn%x9&5o0D@tvuvhhlE(ot2;Ix|f|cJoJ2*clpy3iFay5tE#gd>h`P-JGr6Qtuf%2 z={HX^8AFoXqyR zq$e6%NmL5liQ8nKR_{&k>sFbbUU4JWaZIghZWa}bK}Bp ze^zYLt0kJtgcoWYtSJ;s=%(L`Z!u5DQlzwg79;zEM zx3M)mN*xxJoRw>OCzh8wy+(AWcB<|~<=Ur(xF4Tdts{HNH_~Fh^cT~^JRJoOq(*%z z(|H~FMJL?x<{%Yyd>9~;+(lj+2?t& zp}Lz8BxF%6vsB)_X^?s2ZFPl4e3u-Wmrvxzx+#ij3k1#K_SF+Q@!rjoa+u&xG9cJY zF=J`2CS5bD!ymRZzy}Epn~!B{n!hbQG^bLl&+Ci@6q{H{+GTi_T1K3+Ed$L4} z5WP}1Zan@#?xi0zMfrWUJQEeYOxvZJwUc+SNU+&3EMAyCE4kFQu@zj#S+WIb=oyPH zwX~2j=QHXxUA?ZYn#NL^wbZ!It(fRkqz#4!~i(qqFamICW3SAc)!SbcWg|Q&wwUpPq3Tkb8`iEF6Xa~s-DS*s>d6oNE=)cdaG33E`K*5(I9#tJbN6Z04a++WR8efc4}c$-z|j=i6N~vaEqa8?5Sf z+HADxUe1Rf-*7#QvCMsTB9lB7sV54j)i9f=mCdYsjSr!~W=FM1VP!Jq1YL!X;#a2n zu~Fud60(mL=6CWbRcRPDkQH#@X`^*bXAwRCxEy+c6)I*LUUG1i6)!eOm3KuxT`Xp| z+a?$6LiHOPzcc8+oZX*1i;Qz`YB0;1m2=FJaOEVl&)6XHcNF*IxH@GB7c%#V7dQyA z%9g(PWZ4*0$Y})pW9t<($dbDHqo7Dp-9XeB3mFveqM0gz`65cVgHBtxbszM}o_f0&K=_Poxc5 zvpXxmUg*U5Cu{l^^Zj1pM&7W}M?L75tAhg7T_kT>5;LD!FGn z8~Kj8I8xTw_~H9FtT{S&PYU9EERUs(JZ9`Yyk(ZVjlp_U)O3Zo-jM2al`{5irIUMv z%)KSN!rq~}*t&C=Hfa3ajViw#`{FfiJM;JCD2p<&kfq4AxRv!(;}+z!$w%i#CWnK~yE|{cR2?y!z^|_zQ<$IDp)Jp=3qQKeZwYD^ zEL`a{s>$AW@8~9X7o&nXp+RxDU5Hn^oKCjTi-2-kbx=3|Pp9i9{pcR0VV#o@!M+M# zSDsJ{-CX%)F+u_M|Mifgx*J#x23luhd4wx<4XBQ&xi=b>@%o>G-s#yV+IusJDcXzA zr``ozZzHB{6_*G?dB8rr48G+12aIE)p5E2DHR|POW3-GDMm{IX`~`A-@v z!hHQ2%tJ@r*bg?O<|Muu$SQLGVhiFTkO=@$u&uZo%_e4*%^XRJQH+P%in7YY#0=!s z5dKWS9N=pNGfH_vg!Nl8)1v{OnxG&TiQTuheTOiT(G8j%2ZxNM>wT4#sY{$&%|zKJ zLO#6P5cKp!HR)umjRu%?L|=iEY`A$uz#|~FzOc?6G?SwI_nV45yq%~wu1Y!k)y??R zCdgBZa#k31sNx=kxPL(*pwwwz34hAWyhRPK#AcP31qdm(&t!9Q z8Te;guiV}8tJ}?>E?LAG&bQ_A&7*X3P@=(C0VtOB-p4MK*E6(o!J5EbDX&EYy%8+7RS@x}IQe37VO zx94b4)Ocp~5c&O$&#Ak$q(APEvGe}@FV8>ydcEP#ap>ZSdyC63 zhQq(jiC$9b~OjG{YJr*u%_R}|+luhM<&%8ko1Ue~$rkW|6)fQq6 z#Z2Q6BD2Vpf{WC|=F!{+*`n*U7K2Q33cSVXTd`9r`A-e`ttoufhGZU1s9KisZm?v{ z2BaYb8N9*Yig%0T(Bug8)F(vYwC?_G%YWG%S0@gKi9+_64I0#v9MFy_kJMUQOTZc2 z@(}H!8Q#cbYKDS_UL=nBZ4zF}#O#pb3z_y0-{j>Cpy-rEi;STX3B=Zb;S>bCrQ+4O zp5V`Ho3Z9eo$73R?0HtCk1qdv4&$14w)r^dqDA?b04p_Wix`_H!_FE92-4}eBQ?OK z=%Rg(i>5{~&(y>xBYsoOs9r9qXA9V?!)*taH~Tv-JP>337@{|jkYN6ZxWpzhd0a>S zVyaH)+4A>^l6T!Df^PH@F)ZXFKtXtp1T{MywVy+CO!j@2yz<{%t2lOtzaOADAHKJRjf*lgb zRfZMn=Bt?aY5?ETm9Gu7$%Jfu9%jwmQaQ3W_N&tB+bu?jbN2Tf+kKyxj%-B*jAPZ~ zxc%d}2qs7{x{Xb_`K%}+NQxpB_*>P1crdqxOA<>#`t-mo+gfm&h;*aI0{uHqyn#G= zqT05%x_GEy7OSQ_{%~1qNe&asIanh!9vx4WvGtj zW<_GNVhuW%hf^VjWYd0y7LL-O0w@Q*;dcD>CQ$+H7+6+0<3MDj;lQzG#(p^?_~ATr z9=O&-9Mz)!AAf8Dfbly2nIKUHeQZ0(U;?v@Zf;uSS0I!Ts*n(Dh0CV4fQP;vRB%ZH z>YTK^O^htUGab%Wy6U00WSY^|bQXWu0bkR>XznOAzmR@-iid5OiM`s3P87rLk#GbL z7VzM%l`UTnpuVms4P>IG7C=8Im)=|HH1K_?aId!=(nG~XNS$VnHD>2?2nW@$H4mC$ zEiq&IeMikAP{RIt2b2>4KnGx$sBOkI#cJFR3MOBsaF&3==kvJmnB(4>ow+@ zu)LLViv2$M)OnXzYzS7aNsV!08UYFqL~Fu697KHkIl*uI&2-Q|b-tVFB(WC$Cwi5? z78+0!n*yS;MI@ti*KoT_g@$wjKtqwo2Q;Mj(x9_fs!R5Iw7VTH;WS5YY|b^FZ3x6( zJ6Lo5>Ga_wn-&3*cesOE?_SScx!aeEGW60i?<|c&rV0SBkb-t)q8l`3AB^N_4f&km z$V#SMX(SFZaZ4zb9%|H>cVqH{qi7kxdJmu_K=?}B4leF)3W%lvXk*+~ME6yrj-_iz zAvO#InfOT4oPR*n$@rA(do_B)r9byPZrBqg#j`bG^Zh|&cEcI>pUqMJPKL+=)ur*y5{2{a02WI4y?!BLkN( zaci_kTGeFAbMgsYZHxv_9dyRI;M8J*+>~;Hf`c@Gw{0UJ1ES4<#mC!WKN7x4P4Q zznX{}Oy9>tEw~6-8nHUX+6W!S(g>}|-IdV^XQ)9pK=<1P&{mB-rnO*5+_qe6Vr(aG zy?e-xp`VnT{m9I)I_<$^bSkb5jG&D-BPp1hiFxSJ~(d9s-9T;eY_wsPX zOk5~8u)v6BGjnsaU=S1T9MXwd?z}lGk09i`iRaRN3VI-IbVa`a|%7P{_a&`eb;t) z{T$5=!LHd7q|px!x<}UV9t|>3T>XM&c<>|` zEWw&r zoA)^wEMTfT;_;Q)w)@(up5B6f1c7ei^y|ZhJ@t^yTyV1~V%hN`hIa9wh7_~nLG+3O zc4aU5`hz2hCF8Q766Kba2Rz=oXK*^PfBn!SeBreKb}i+uQ^wsrchReKa1;;jtQfq} zR^wXHb@N?~kclney1A<1^C-d*8UCmd|C{mAG1F}9d{VX4FX#?r3q+!INB7*hcKxR- z=O-LB4kuD7vx5WKTJZm|z2moC7r74X>)w>u@X#!>?Y*Y1TD7^SP-+J%J$fj*HBWe0 zD*&IV3hYz+ADM1U+^hx1dQl@6d%mo<6z;^2csIlPn`v=FP%n$#-25s(KqkFh2q-&L~^HRm&#gA~r zuEB`EnuW%}Xa6==2etUUAT}AxqSbic80WJx=Tyo5XthOy)Q^Brm!y6W6O6hT=ky*R zs{a7FKwhPl@3;-d7*_=(JV?TN7*`+v;X2%5GUYI=PIedk^4#(^mpz)b6H%aafQ@OPWKA7hQTdTeP&0IS6VR`xpW)9k%(8%m#leUgDH9ao*Z|xZHG~R_vP8LpJ?Z(!HbD^!|h)t zFL}Az=H;4=Z>wxuBq4Aj*S>`Zo#vnqGOgErHao&ZZyJD_(+Sqh--<&4$Gxv7?Jo|a zfDlUnq=1mgb!Gg$XkVO)3(qhG44}h|Rv^{OXZ`DrlP6C$Rd=MG*Q8Q8my7Rzcjj1% z0Y~GGL@BsW7I=?B>NgU7xZpoS=_i6oePXyIv2ox2g>6l50@tKpNwS^$39~cEzn4dT zGKl?e*+~9S&VyGu`THzhT?p@$k>}K|cr7Ep%nWi$9nFs)!+Mdl_}NvE&%_aLr0!Q>`8req z6Q8XCaJ=)1g2=yvZ65TYZZ9KjeYBAna=&QtjcYykQX0c@U9&Bu3uo=#v8!vSeZW_^ ztPmm6?Gpnk6Q(JZ&@96qYI`ka;t*NJVbI?1vi$;=oR_C2XFqu(ng><@r0UZs41?gQ zU39*{@!`#{iA(=tG^}{Ghy0pG9@Ah8q@#K2(L_oY@8u}x`=^JVZ&NexZn)R^{IBLU z8{z!F;d~~(p9eb|L#@8zkBOsGnP^wZs>_Q>@Y|1_G(7*;`)Uoge_D)J8~5O}fUgNl z>wwL5=mH~lsSFe|qfc-rX~z%Rh20;pI`ul-cO5i@mc66g^iA+JY;oOL@Rx1y7slfx zTTBonkYatu0kKY&jI?g8tJ>W9#q%ggm)d^nwD&)2?{feO-uv%wpL}Wk{x_)jW0Z}y zrjPezqU$OP)swHc)x;0~BYwI+%}65V`~No-_HD!N(cLC}(X6h7gZtY zVdhX&9oM;r9ntdfW80)?srpFElCN1`x+}>fO9xS&$uzBh*cy9cq520Ph{Gl{he*Yy zS#js%+67y9)*B1Yh@<1ATu`90rx4YYk(u|~Qu>|N=W$u#j|&7 z4vC%TrM+dv58rGsKRPG3{$X`^@!eecqf!sQcxk?cJe#_F#gO>A#l(_no+7pUsprQ2 zfPxo!W;1&J$@~$2vVrMUen5e7+1tH81Sz1DGu_+RRWS~2oSNVO?JZJFBi@Dj`%oIe zz2j`Y`z&o|*&RSjC^4Nn)2h%w9p}5;&h}5&FU(S0K=ea}d3`HS#;ox!v^l`^5_ycb@`-dKdL?0>QYQ!*7+K6S7;$*krZGR4@Ftl>w3rLb2>a|YoRi9=#|1lN zMc8Z(Xih~wx>AoXZEJTOjBU)rmobm3hf8Cf29nP0Wgq0pvANo&gDFeiJ)qSow31SB zO|KG7^uzVJpJ-Y2O^$9oc}GsYmah?R>Ulsobtyol^QO-%k(|Mcfl~@bSf@mpx}+CH zNIKpgZXgaxx-0*Sifq|4&5G7V84QgtD*o_re)zE+|3c&%P_uRPJc2bCI9?S-JD8U| zpME8%-`CB1FLlq8`iFtfGf`fQby2U@RlWTM=Y}ZH=HQ|aZrJ~QeX(_y)7$wj{>cv_ zuY7Q(RM#;42U8YqY+735>p+YTmM@JwvA$ zEwV0jW~0pYj0B?Vco;9Of~aMpmN056EOIs1tGfI7~FBq|x>QYfp+= zY4c1X#2i)I@IzUojI?mu7utxh)76!kQ}%zmM}&r~Ic?D!>)=!SC zWP2vI1PHAysQUC%*b5EIn}mR0L;B)&It#a?)CzypRViYxqg)JL#kS@GI+4JBM2wx2 zq6xyLoz1*IVR}c?&USsRT3c_?2T{J2aymhRtwe8`S$&uFi-{XyEw#B4n z$*~L*e+0oA@;xN@%o3Q^iMewsiU$xVyydIk6?+OzU~;sMd71IL0F)X4m;kWruUUi<0NLp2w7ch@Uc31sc5~|9+KVwf5DAL| zhGFdeLn!u9F*_E4SEVg3vd2h~egUP;OGyvr{CedA;jN1UQjA~XtUXK*9?GWVIYd>1 zu3J(ePlJrSN+%z_q)TsO!GFq3N=LsyEMGfjF-U>^)8dYA7Ab8*2|d+Y517*ugpStW zn%Cb;&7#z=$QcqPzpeWSB(n4w}KT5HGi&V#H zUj;aPOd_v24?Dlt_S64)q%EG$Lsy6w!$G=p5uZ*0XM^PnPHUh?xzC^9B*;)fGJvW_ z9%Db?$lBM-nwaz1OOk3+f!6>^$7eDTIb1D)=@IlQC-2h7>~%JZu@tjhrji+NEDau6 z6uB^&V9nFhE>pS*DJG?j-_?N%j3CFw9)54raP>f@(&mNwWuUIQ@)hZv(+|HRljj3Z ze5w2!yHO}N7f=erd2xQG;;szlYBP!L^QO7axI@WgZr>@8D~8N?4Kw$*My^?x zIJC*;v4PA3CA%u(wZaanpacvkw>!WfngB=jX|uFkkZ8k%*qlPoeBh4QOTWUJtG6bC zxM;e77-RtPd1@j*B@?t3J@M?ZE8DUxg>0_dyl**0)ljIg7>HlHaY6%H8+jN%AfIIY za?INA2i6l13^Z(t_P@X+d5Ui}`=5EYWZSccJGkeH@(NzuAZJX zT&7Fp018~E4eU4t;1aq2Q2Okj8UW(vmay2T~iZ%GvNXea(4giKEM+Oiq{jEVI0pSq9mnNOKSA^_dBmctO*^h)a(Va+9=`tU`yZ&&&&e!i zlQ~gOISr6X^fuKp%P zF&tG405irk@;fK#zNfoF&O7>^Pq}(IY%=_}JBOtqVIY#`Gb#ie6}c6-`Shy8n0$7c zfg76S*2H4}Lz^g*AiOQa) zhsQ!OZgctM4UI`nE| zf1=tvorBHqgsJ0@Fw(3mF6$zO-7!^0pqcX*`4>=k`;5u6}1e+ z6y{*bQzak_!7&hR@IE6{@Pm}XRXl-XiZPa1XxiCUBGbaFO z8E`qWyo)OQ&D{f|iFDqvc!zDXlWp?_Q8~R1oPDiu6;8RXT8=ddgk0o;Qkn(f5u4m> zx>7KvK-XE4rhVjdnMz?$p%+I+*d!kqx3e>Wj856JphA#2U`FY%=BUkWW_T#(XN}Ra zMVPN{=vbrDN+QqkjDS)uZ0V17S5aFX{D&AO+aBP8~RrR#@s(1!s1%wz1cjnia}8#E$zc>uM%BruL0M@ZUNb%i z_KII%Ujjz}fh2KY;(8mtB{nS$HE))Efb7Vtm5Q}&XPaGe#=ZfK8SfS`ic$E)?u`8S z@h452Lh?xvQx|pGwsyZwlj{-CNb9joW)&`5kk=RUXt0Xj;nOYyXeHn_i&!5zT+tuD zRu^CS2)$_vGf7Y#u$85p(XxgVB#!LVDwa*Bc|;!U;^!^Mg9S3!UkURiigS0s@mWiG zok|xvCJ0I`sf!z3OmeRw@b^gBIq$*^!N_=#v$tfL+eA@Wa~fT1YS3x&tu`I41TUWl z0F8T_+5$32y7fCRG_LD&*xYybV&Cyi5zb$g`vxGFuTSjK{ET+-?d5#;x%v|Z+tWdd z|IO=nPF?Nce;^c44F#cBi;8o{(VK?QZm-Z%EyiOj#(PX2st2?&GLS;ttZ2C$(_*E* z`#hgujiA_(IyxKxY5`Y_ia(@Cvy{OlW!*8WFcQ~!TUSlo2uxk*Dn>8+p12rff^5TD zY6(CL@0jY3MKIi~aa5YsU54c85rdumDmh6F4rT$?t?>3$G7?ixe6K_R$HhLT010Y$ zS1GL381Q@l6BRLAMiCgH~fB(Bpn#pcW0fNO#v>CfX=O$@#R`ky2-AeIm zQbshc(@m!culBEYQ_>YKn`6Eadd#KgmGk+n99Gtv{;Yd3JP#RT2$g6m$~6?m^3Yo& z3S)u7L>{{GViD&o8u0+l>y-EAx4c2OSB2d)K<$B;(+`QS9%jgZPFaZ`EDVC}ITY5@ z-muh`?VFzUMI=AocRxJ9!)ZSNbt>lqDnXr+UbhM~$@d?48eO-83?$_4)qrb8LuJlWmBt?lz#hED^!8e7_KDBUa8*aASnh(a**S9m9iSn`ggRW^p9 z=W>C}Q)keZsmJys&sw~_xOk9RjC;N>d6r+qc)2{8S`Z16C+q@o5NBLN5|Luns35WO zSdHJ?jb2lGUi{{5AzIHmH_zlxDM)D*}~#F3P8*GND9)(2#l4Owf9T57839m3V~a{(;e+r=M$~ z?S8X(vnz7v*rh=%sdbv**|*xI1ZFa!k@;N+RAU}hrQWD|Tvh4N99s$2*)&-7D=Dsa5DP6jG!#BB+Z6E<5==QGj(<-;H?6tu%Xz zHfU7q(%=R0sz4cv7pOQEqG&#T!NvgoJep`97Lt;CJFR}$TBjd1gcMjo8TUY&ycmu> zNE2Vo9?4y@jYxlQolV}NF(j1^XJuSI$=Q>4mY{B7`R&!-Q+Up~fDe zNH`hR;GVIn|F<9CHr9Z_2HA9=wJe$>+jxi&+ja6{;Iih6-?AK<+5eMaxZgI$JQJkU z#IApNrRwvA@;7JohqZYCH4Sw-37*@E3e8jTTH)fhGe_3AdaP!PUD5A71%Hx)N*o=|A|y zOmt%Bk=T#tK>}=TxYTCI*GgNW{yhnlfb&}66urBoN``!yhYJdkR0hWba{$~T#YU!7 zE#oUNcwPJ?_t*2<1j!fP_KV3@<9t>R*Flc>4oU`>i! z&Em*jHu|7O2li;p*dCHtx0USJg?ps?yZX`v-REDM?AD^VZxb& zOJx6**@i0W9L4^Ov6>CIB2vyrX_7Z@fdj`!6q;Ogf(Py=Zxv2_DN$n>im16spU0|Q z=1oHchd3S3AjH@#jm_nP^XLDx>aL!TugB0(0m7~*c17KU0Vcy1rIx2xn@lJR*-T&& z0jJSLFc&gk!$6`x9}u^3bP)d60tt%7mI)xYUk7vqF5&-aby3%Pp!&79>y zk?tK)-nF`~J&9X%0};0A+N2q_`Rm(?ELKmi7gF*}S=e=n(r7qE@KGJU-sgohoamMN zR)BWQi;$;nU+%A8wtO=!21TWDWtiZxfhf-TgJ2=qPHJo=Z{6wiJM)BiUAgBy-UAg? zp{^-r7W2T8m=+vWu8*TjDQDV^QTCSIO!YaB+6eL3o!A1r5hFtz0tKT4OS$G3C!kSF&t&v&5y&63c+yJOp{g^kTktGa_2 z2NHLV>mzL09a$5-+umKo9?a<*?p~+IeiyxWj2SI#KphX^+5Dc-AXYB&yr-IgX$|#0m+Tj*lJB^!1y9jf+r@joxh)+_ zDz{dKNpz^KwG%o3LznW&k?W;;yOzPvGRSh;*rV(C$6L#D*_XSb*zWE8SQWt7RZDsE z{oWV?7nKg!IP_^h>j>F6s_Ufb<;Y0Od_*I)t&xEX)X zCIqBvcVeKriN4?&Ydw~RPQ=gNb$h|hj^zo9V_ugymHlF6+in?*zWF_SkdOVjTH1iN zNdKg5Aq8~ePndBsTEHV)r%%s3HZ|hdeB|v)v}7@5Vtn7y5*9&hLK2b^|5I7Iz(I9{ zn-naanfso$JxJ)#$SjFfHPWi%1&BseChC;i^wOVGk4nnsz?%Pooih8n&&b{pB={nt zAQH^A7CBLGDqfb*WWc{%*))L5(-Exduw{k@hCgCSU~3!;IZZ1u7LktQNZ@&-!Q3id zhFWfdKcvXeVupqI{W9(zHk-Bkhm)8iQyhc2sNErz%gTzaY!EruQUreS5tpXbDILCQ z9oWwJ-tv_m>o!Ce)zqcn7w%kYQ6Qr_{@{k)&eiV{@+>X#c)u}*OpAkU`Ke>V69ki+ z-z8dq#8k?Kox|n+{)SsaVXTYR?DOfCrw1P|WRc9h^s$TRLe2|yu?XixZIXt^tO>%M zEAb>`a?#FtTDh5lhY9A)Ut9+kE)WT*hyc#iMB_PZ3(#}pi(M2h5LzU<=qY3jYo<+on zW5TVMLVo85;N{xuX7NropIUpI|7MvVc^_9()4itJsMy1v`-TxUG30CqhRv((jrzRz z5Ayp!RNw$}f7q#q8{8=%LCEvFI7u9U(yVZPIsla?Bi`Iz*80u_G2;23m+KVk_K<9w zbQ-#J96`b_rfijUFa%TCHX2Vzpe*GJLw15 zN2YkqUr0D%MEi>dDwZZOp`8f;Mxy``5)wc+=qXqYfHwbhcst{MOB`lVVkV?Di? zTLu8RBYyMbI1o492{vg-y@p%RFG^(xRF!ov8MoPL9F;m|0;?!Om#8GsBK8}6FQvz? zo25ENOlxugS5*DSj)BU*&Yw*k+5I}hS=h*%Ri&MF+onJD=iGS{CuSTU+Fo7TB<<{L z6Yo2@K6gLkUK|g#g}{}4Wy^6<`41>ES-RKa$JY=o2AW87oB*^cf>G z{;->Z(hZFxQP{Srp^9*khIh4^$qi@r*WC_sYLEpSab!SZ!%U3tQ`X_F@3XSq-8n-%b>7#@jewbLv}zFW*=U&_p9n7D)hLGxuJEMGpFOlKfnTL>rRb z-h&tO$m{{4NUI^|aY*wI8Zq|Tvrf^EkH~pcT2(=A^Wdmu{mcx5AQfoIxfHE)1W9>$ zfsSSy^16J_Z7&MEel6(hm){%z*;9V(oiX6~|L#q$n0RzJ6mlGM|G4(tBb?Czx^Tj8 zAoPWe?x;fUny0HtSE|Sj6 z+9GfBuAwZ^O0fApb(+*IJ4OUu_er|lJS@sQ{)UgVlq^LPlGT_CFL~yzeETsSKr~@a zt1P+UzQ%4sx^55aiAZ0)5m&g3{s4-4@|*r?E`LpweasuriX)4Uj=l^Jt1!LYC&6!h zHJA5-y*lb3MQ$-JJ2Ys1&xpL@*EbfvFznLdDs&&J>o*8jp@GHCqxir6a$ts5a)a-n zLx-p~$NpEZ@S{P@A&_a{fw-&UH2d!J4)sUuboj$8>JEtTLWl=z!Ygs}RrCh&Gc zB=Oag9(^h4l_d3gFK+5JPAwz9*L=`B;QAbnrihM{koitF+iTI33JbU;uolu}$kE0o{9b-edmHf3t@PxPa0*Xucw225<%|%Z z^S~EnU|IvE)}b#*|ABVo!ImSa?nRCw1sNfCb>1f%0EC|DVB*c7>BGtTNC(xMA(k;- z;2Woy*G$+H1<)Lz^OiLyW>K4?oz`zVXl_T7Tj1oRUozs=m}S$qW$XKZS^YfozNO~lfW;(l=j{zU zJ^|#9viw&(ysSBXIWIm!h8dU9VawIloYiaUSmQj)(|3g5N742BR=%vvukErQli@jX zyocUi)Ma0{4B;JuIN&ezI^r)UeC@-tJIB_MHD`!A&{Jj^+GRb2ILRS%la8q7v4X;b z)#1z+eBU6wNM@TZKP)onoj>vmo&v9<)~2P+PpyfISP7Tpb&IwSN+S;F?2j(j)h;@e z{H7T=vYvS6Q=9HhP`hIl(g@oHLb?jh^kQxhu=(Tm0NRYMxw zN1=@X89d6E4#RuQrgVAMcClcH`H|0l0b)dgst~2;6iANKk&D(7$C%-JWRx+zgNn}_ z5(LyLKo}X|fov(?U!-)K1lC0W>_pU@Rwc_r?ir+iGIx=EyQ{0uYTkU=`Lya~rs&I2 z>H8MGgD}i~+Yjktpu;?rjfb0o2h96q~DmV4dEqC0u+IWZ`xH;rOWx|x<-x%E!|j66AlIh~>6)AKxfGe7=hyYIX=lRN^DJ&?K@=f2GM@>St9 zFFrF=FZ?{sW2(_wK7WN+L$ZEjy zg(4&)+<$V6Y+Pg!ic}RsEgxl|PzTZkz8T@cs#9)#dUf_a)^zn$B#*ac^@3b`X@Dx? z2-{|^3I5i*e>roD+|6n6iZ@mjTjtm8;G#l|O6NAW!NX{x9Z91asU|q<6UP_3`#6x|V3j*U#7~x6TERm&t9XZu@_8aHn%7 zUJB@AA1ytq$Wx%(n4T$wXdbhV5AoS_fv-sE8;5d|@DbE5?jv~aG2|E8g`2*hGF})o zrUB$h-JUhKJWY^SVmO((heClYxO^nVx-Zz#K1#tJ60m9ahB-2UshHn%Ojuv^N1?Fb zx_xR9oU`nmDN7X}jm~z;L_7D@e=TRF163uc(9ZCYtt*zMei|i=9=_w&b?@EM3BB!g zxs6VL6ylFXoSxMSypM?3nGV{k^!7S0W}?7}4KZ<_%~1-6U)qloT=UDB4wJVtetFS$ z3ldh{@%yMJm*5|^Vq>~h_g%x?hWN;U1vx^m7yr+}G^Yt1;(0UZT`n*5EQ-KbjX73# z8#Ux$5!dDKhnf{9U=2d-if#y}cb<~tO#rf#GMiD96?VGD7A_LJ$8Gi6B!KgbHt$Aa zcKC~)g?|*!xbfk+g&{JT7M(HOElzt^O0RjnT>9qG2i7CQ>joL-bT-0H4UQt>?uQ?G zKQfZ)HTo*M{`CYt{35=Gk6f8%r$Az>oaiX;eq2TAzHiwF+D}SDAOS1%3yTRkl?W2b zAvdNVvBN*X7IL!dS~cg3?};tQe=>vS*c88*I>w_$Rg&qKs2_FMU;XG%z$|xp*tt8* z3;b07cH4N~y=onC46rsgy%g$jDoo4MS6`Bzp@wvQb~+7_M=ESBRs$H?h17FA;>ZEg zpJVjzJ%7Ca>*m;=f+k0ptBV+0i`qMwUO}^Ybm3SX`_`1%0k5-*j{4N4$}nU6=&D;m z^~zuw*W(E)b~@01^wUJ#hY8Owx0bE_I=g55=rf&#>f*tuo(~uWEx^|)O0a7$2$0r5JpyAwBd6eSx2)PQ-P3^P?`tVA9$)pl+( z7JgZ@z{2g#(6H1Jx4GfAxUT5aRC2Yw-WE3E^MEu7mkIJ}pP-j|Qy&Ro828;)mP`+_h98`?Ae*q{|a`Rbzg& z0d;kH_s>9?S&*Aumwko|-~ELdWeoBa{PRxc5Cl3<XI(k?G4r) z`o67F+O_QL=(F>l=k(imcoF|j|3%|l7~~Ou%aKN1*6EAK{@K=Wpxx_5SJwx{&r5&b z`lCzpT%o~U^&ISC16|`^m-%@HRjDAPrJ76xEpy*ozuqxUx+nDHtqWZ2;HSsLPs`Az z9~&?Ne5l+{ee2;Gl(`*mi+&{>w-E~n{H2@y6K^^E3b8aPe8)M#9w7hPQDam(bnX}g z^Uiw$#c9!t-aL00uKwSHgqv~ck&pfM9q`$5BLy>PSme`}TmEkUaeJ-ixnCm6uLN~% zuX5|!zH4vdawnSbpL`DYw3)s6X#@D$)yG)uMCx5LS2-$JQ^5saE^Drx^FpZiiAHoJ z%3nf%yY1%Bq5?+Mv>Q!<7v`J6w!$l56}xcmY`G)%kJbX#4c_mwOvw%Z0dI5?EtnPls9CmY}wP+p%m#10f?Pb=YMzetZjZ9 zs+MyoUON>iER_}{lDz0ty)oOfU@a32+4AA>Om>}Cmi>+u59>DQTkdpD~jHt`hTf(DmtT zlNV!>qAH%5Ms&i3(tC$NqMw?jb#mfr{I9*w=3(Y_Z%$DPsaR7 zXDQ}duN%hC9hXV~AWu?6nR9ilqkK+=_j=4UNXrAR3TBA*T!HI%dOKsy6aL%EB3E0w#@zHlA*KY?IM?asRW;ZToJoYRd%QK=wh#2G6en z@dCt($vtqJF?GKD+_ij}Jv}$8}yY=(*@An=*BhS75yO~+n9hs68WZ<1M)1-zf zubjMKhPshIa<`M1VmQtX`8al+960izq1>}%tU;CMT&GFGxf<&FsK`tK_T0){BEv1@ zHoz`YBNMXqV}j;0;pi1LqH!Nyv53vua45zV-wg{6ZCBM^bV}-`I&s#0if$;zto{1Q zOLKa7(SeBn(iihRs|x=aSijVE%FvKD{k^ySe%P6w?O*G2x-L29SHLT-tt!>Nl9tX( zR3H;c+l~h#v@>JL!h&m$r!Stb=4T*|(8mTzpw>dLscGG+N4nn~xD@N644srQq(fVe zcm4i*xz~=TW^3#;Ar+%&6g>|hj%I@>iH7n-8sU4$0=J!2>9w}&|0p{5K&JQqkMDl9 znYqoaHg|H%om89qy}5>@x!;8(blaHwr78DRbBiR65R&G8t=yw{oW68%e*68g z|F_R)`|SOGy`Rs=qemQd6nVzm3@?eJ1I&rXq<3jB$q6!4Lklcp(u36;2dl&p#g!K` zq-WTcM%9cIvTlk{(mEIqofJ4;w-r3ZAXjhsofs`R1z!7|m(c6aLU{~oqdkDr#%ruJ zUC8n6CuS;*ZMnZ^Z%Dr-!j0OF=26pP-ByMXmcQT+FHG1*{EM@(p_-*h9ihDP0orz+9`Nw}~2~UA}_3w`&&pYD1%` z)D9S=!>4@EbmdFT|2Fz!t#jxXu~O&G9nwm{?{dWyyHa%`e3L;0HsF{c1DV80m)X;@ zwev{E^D}J<*0Q;RJc|4;9P$jG&p0t}O0;KGyHNW?BDDsj<%x7FBLLLs3SdkC*Z16> zoUuE_1!=Px+iV(0d6++_K#n+&rbwSWtv&R4Y0kQG*YEBI9I%Y@{sdy=elXGu?e}M* zcfUyja?-6}*4)dRz6?yun!Gt<%v-BQWh!GdqgugU@X(Wzg*{7yWy#a+@hMX09fDxT z3lO1_%v5pC{r3@jeOy1H=E+T`2)_dtWjH@zDo9lW0=5}e=2dvSf_a~ounOYOfQACi zlvK_@2Q4Iv81l2$wq(Q^lVlvDgd&iJ6H}n`?LO^hhd?U>Xg;{*5q1&^BF(z$BMR8IHa3tx_$vb&?-9ghs>WT_hP5)-^l3m3z?N9v zW6~k=#FDWytXWm3{JEB9-`uEmNonbiS_V_!F|_L`^W8<^`t>mgVH!7e#`XJqPJ!<_bwP#{9n4U8w|sP&6HU zpjhGvvnQGsCDMSh(jd4>T*a9~q$h|!*5H(oG$}SNE5OhUpSYKu_+LmVe5OJoG32yI zX9-w(orX3Xt0IDA?piFzO=cHWo02V-|99Ye{0Li9cho8L&OZjzA4<+$|2%ycC@3R2 zxworR$=C(mlXU0YHCLDlKqF17;)qN=MJy*~}U>A^-sC78`G@;DhnO%Y)~ z-889LZD*@7k%`|2e4ZZRf{SpOwuoM&5SJ361Axk!)1G*I$qsxamx9XB1u?I3g%5p4 z95?X;`=W%AyWCXFIzz;Szi-{5r$}gNz;hCq0)AE?;Si$WOT>UKfs_$?p!=ap_PKqa zG|wqMHT9iZ2}PJsGB%rELB8Au>%kp{e{>g5YMyQxs}(zk885EUK2PEcRE>{jcdh5x`*O!7UJg&p}+|C2SMnD86ile73Ry)D`?h)dJ)JfNrj}9HN1L zaYf2V6802}<_1U+0G|MWqbfwYxR4TBd-HheAqws;0FS7^5rKQI)Ehh2-@fpFQ-j*$ z53%+u6<7-H7rzY1hno4~IsB!vFVdcY6G&blz;mKUy9N)!Z%Tt)l|R?ora}Yg7+FSL zti~rb_30g@6aBuDposNelH@M+Pw$GvA1%RwQ-2(N|Hh4|H2ayIkS}TWO?jKunDe!- zyJthTsqyFI#=$N8NTXDJg;)WXj~4)l2FG?$ISmdO@3#iFWWntZ0IN^|dkX%`Iw*mR z3o;iBAo^NXSxr3h730BkXozbuLm6z0ElKj`3gOfWQWl8gBT^wGKqed4LIWfL@F!w^ zd603PV-ojCpHL|hzZCf7&Ts2P_@CZ88fL(|Yy&;#w#&j=o4o5+3Am zKLEA($_|_qc2i7tVieq6cORZ<`@2PR`e3x@gc?ZlK7|Vs!2RXOs_)3E-!zRTwN$@v zs9%vYLrBekNm&h>x7U{G0&Y6vg&njN&NeramBkCVJm(C64lNcn0tiNk3QeTBrfZgG zi)zx4zX0m;Kop-}*kzBiMBI^)a9 zka#*jQUGaT7!+#?xq|j9##K^EXZkAX2vB9A$Z`ph+;7?fwymth<9wSMl zz->u*X{GxDruPq;h(S@rJBq~DX5i&ua0ySSgfX?un%YWvq5S@ZV)OdApj`2ag!2!H z0*1t0hC~iSqBFU8y#Ao~{rS=?WWUz zcp6+k4e^sK{X$#9(f6bR<+ZIZ!jg_9QUrsz=wBDm1S{CGpCbr}|3ij%MDRzh2%CtrX7Jg+8W1u(`I|^q@KG1X=UsIdJ+@55xF(QTq73H3`|t|m>0hr_ zZp9Xan?$ZVP79Wj^ol9`P@P29igo_3Cnxy~Ax8ef^cyv&`QgUK;a5__r1_iEQk(*5 z=Vs}&1D$zogm^#M`25Y(Bx_9ps*+8wMJ71V3f=if%(6TBf}DCXXk2=dZg2G(>hdw4lOhLdD>3>s zocnfGfE*xziY~Gs>Sn`?x^rr~diB`gTud%m6FGo*}-`!$zBR{VelCGr}5Y~$$WqO3fe1%SEM>T2mEvh!&-}U~{ zW;?XyojZNDb@%MQ@tasfqZd798kDd1ZO*wg0K|s?iLtloQ?aAwn#ytd8bm=ypgM#U z;{RH37l88;#J984KiUXJ^hj7!kN{t;;-{AO6}SK*=vp@QGFdR5b_wMxNCeu%aRHDD zT#y_{OeI|`6yyOP)~LGkDjYzL$00~+Y6t%VVHr3RY#Q5AyTa1KvnA|J-eYgXK;6fR zl8SKM_gG}ajGFef8og^@J+6H&CTtp97bjsGu?D&7|5fj9bhY%M7VIv(lKRlo<}I|Vc$Xx@K~k)_~FDS{@WB90WnP1>nH0DLE1 zFDOV*5hw)ky{N!|GZA3F2|)5E*y@A8|FPjobOZ<}m=zFrl!i{@6QJo}&mo{UEx-4Z zwmZ*o&H<=R1EzgJ+LJ^U$$>j`NyiCCaBgVO7vlKFX{jGM*9)OqI&ig>EC0g(*gg@| z#9tfwnYVUHV!c>&UjOf5OPdEZ|mz zD*^z2ZDSrkFcwz=8thd|lL17){lU-t2NGovmTbg_)m<*P_9#$^P~LxIN&X1UlqeI9QDh@tDl4@dOchHx7nF6l?BhTmm|h zBw!TvIG)RG-^5?s$1t9f4u5`aqmB0SPNPHP=#3A>QGZYvPx9UG_Hy#te^%RN3w5E*aVI2Ef18c@_Dg*R6nDnrbjHQ0$xTQv5tr@B>)a7B$5J+^QwGZtu*_EP>w69kRA~Y z?pV%7j4r@7Dh#vVo7YirA#_KB7Xs=M4?c>v#_GHV6I8I^S19 zvAkEk>ju2DNoP?(NizQ*QH2Xp6&(~At#`9W-}V2FpC=EC1vTHa`mH!QlPvWuUG}HC zB>`vYp&>vwx@i{s%@m~1hV7bOzDUFr(V>k9P}cFK@vqv1ryyFbrY;Sl+9MIgKQ<-f zw?YNFx{5l6C5kHH1pt+Z9tTUB?s()wB`7F%eK9AEo9Ze@%5!p+i0JD^ADidL|K?;@rQ zb3u}z;!whqmkG~8O`$vxgk~yA1mwv8H?Q(T^g~joisZ=P?&o5-3OJTA{*4XUA`1Nk zz^!>=e(iCJzH^;R4n(>hufdQ#U;Q8A?j8u<=HKmsczlsA^!9SOJF(5v_9E8ww%9ez z0Qx@OI>6Yg&cr?6@`{;dmLn!JYP!AvcJ2|&{{O`2Nl_smdm>KSq zf8Eczoy|Y&DKkv^nD}3_g-O2VZ>br|p8uUq>HpfzLO_jk%nZAh6OQn)KEaUHM<92)==SH$zvM6cY+g7+ zq=W`RP?i>CgaGf0o*+bJ3A!GB{ZP2*Au@5B%YYzpislLQ@4z(pk+_iIZCXzKC-FR^`4f!tHQUBrc6DAo>}iicPX5l>3VT6$BtJ@gFJA?0Wu zUz7=L2?tvf^GQqu7FED)s-;$#ds#{+Y$GH!%oPUbJ1^Gu-?3?!rvU=S#IVBYQF>BN zJwOQFt1FH)RYhjbyg%Yzhai1`=E&(G0Exg=E2!FO{kX&_J7IeczYMK~XA+H9^ZE9A zlb{>%LXzts?gz=u^X#nK>6nCn3q8WPe27rQqtrJB2{GU|URrls z#e9tht*i3>X|H)hyIwf!pg*;FFiGGq^wN54q#^SGw@Q55dW3TX-dl<|wSQ;A_@hE6 zAi+orAHjJZe;))gWYIx*DiIjPd0+*Rf5}VER!`lfQ-GIs`)o-H3Hf)#53l>$>6{gz zA~c>VH}%L*8M-haLM$Fswv{jMB_1bG0gwk*ReB)UfL-p@x-hy^_@j}Tf53kOiNaI~ zSI!hzjS~$-OYtC3k&_Ffy(r-m!nxX`QR@}yL`Mq&?Mc5@x6|GgdxIWF8kJAe!h344 z23pH{TRst=6T}S`+dYaq5&JhA#PKXr*D22AI!tBhyZ{T0-+@7=s znTtr31($^efe;5oA_Xi=rcwnMCZCiwZe3yY0MFQKk0{6{97W1{7f`L$Tcdr+W39VX zknF6X@E5QNG09H0EyCCoSmEbQsX6!F3-}Nl5?$ZW9LBAmRvTcRyLJSVVcH}v($27! z*v_J0&@H?p=o?x;1fC3vOslm-*&XxCRK6xUe>|Gp>LcpaQqAXF+a1d{??=;hPE zG4d0N7BGJBWF&)i6G1#z5Dmbl0+KD@Uf$YiV(>+$VS;%E$7)x>17}G}3!(M^&uu!{ zX&e;KIFG6u)+te-N89P3#`||hsp0J!$3N25Yvh5}ZG1QdN_hR;bsI9Jy;annY${JQ zKU&h$)+#E;I2=Z7u{3{#pZt2{t(V!d$8OPKAxoKHA@PpADKWuj1@B;zFK;l=a0sdx zoBwc7-hQIQ*JkjX9g5{2QV7&MPCWS^ifU_`T(76DrjID$hWmhlP=5abNh5EnY9)V1 ze8Od{&k%NL$>3dpkOwyzk%WVlS+L>fxV=a$n|YMKQxr2~;D6Fnz&SAjnbr`ApFEvI zOxx@F8I(yPKniIMu@?qT4+v!Dd#KX@)tkpXHl-=7XcbF8?wb`{rbgwClH#M8#N#P+{_JS^+N8x zlZIPgvJVKtD*H&{eu5w`_s479>SAA(`1DD_i2wkC1|Z64B_pozus7I#GQvG7$EA~v z-4?;biP8+M@>>ZPcbAaeEQEOi5FQ}iNBF4)Ba49W+N{}wFlx$nc|A+enhS>PQ!`Cm z!$eYNLF<>Cq#ZA$yN>}-y6lr8SH6a`XqF15pSd`p4}>8)t#vic*6omQp;iK;6ALa8 z&yCjP8!H{nGOD;UJJC1JkR0K+a>E3k9F^ut!|#?c>nsoSO2K?=OTIe`4cL^IULxT{ z+vqMxn!JR^eAzgJ4p38v%kh9mN+``&+=YZ#MCR5<e}ns?ID!&t^VBe_)!ItY52= zn&m+8YTK?@c?#+-&s@=vuO7=djj~Co57MRsi%p22DdznvTCohE8c|u0R}6B$rCNa@ zhZvOfiMPxh<631ei3EsjfEkGw0c;gn&Qz(5?snF^-i z(s-`2_zMJb_;7)gj$4tv@B=kY@Rf)?EZ^D1GJW!r3e#c#6%O3%S0Y5s4mAM4!AH^* zI+rs1b^X3QIk>8F)8)Zv~rz}H|;!4{q8iKET7OC=O-3jhJM@#^q7lgxDOf&UmRcf<&*6XvFVGps zIi~n?>p(HrSE-uO42};W1;6KsK4YjVZtyr&qs|u-YHcPy%RQmd(DY#SDg}9*3p{z5 ze|{svK$!C=iM`Po&W0a^#BE-N_m%<1lAg1QF`4FvlYojnK=%KTDdga|`|)9)f=c;R z!DycwLdZ5MvVe@d9L&h&AP633+VsT2Cm6|auujdHE@&Y0QbKw*YG++EIlUZAP6(Vim$Xv>g1tk$e3;#rY1d!kJbD*>*{?G2y60@ zhB{E_3ZkE2`8ZTQ4tYuo9KY?`M#jG4yM9R0@oq>?ybj;kqd-KqMWZ^%S0&@oojm1> z3A8&uk43J-pK2lL0L0`4WbroCgbaD7BR%QwEKUUQ1@^Jcew#W%`{GV#YK=)k=fH%d zi|)bB+wu2U^srhayAFKnX5}NLSjr9ShE*cg05$Uk! zsz8!}3lX<1@WwoNG7px*g6B-Y6LH8WI`{%E?F~<$nvA+dMC~Es7O1o&Z7?AkkxoNY zXt65jtZE#}KnYbrLsYw?4FM)zH1s>V*v~!bwrE5u@xm(>rZpPwO+#Me2-z20$35}m z>I3(SulL8Rs(lBD@iK$B%-a7UbE0phY$Kzn!BIglX%`j-oW!}k^PO~5DGzy<>ObFq z(Vmh~d=*0){f4As+-+^7Aq~LNVhMV(z@cSTM*mMV)bW>A_D8EGpqp$^f=EOq5%YzM zeTst_Z2SIdy{ST%5+Le&D@8E*RQD9)@2!yI5=jQ3!k2=PMO;L!79!{qGU|V~$P&nx zG-MXLAhr)akI!zu2yv1~ZVk!SWPzwfpjQ*2Z)>lb4u#Ee3)Kx>&2|E>=Zo*}ivNe|2#aBv^cec@UA;np@unDEbmpy}Ra1OCE_%C`6*V817i3N%7R45xLqy_d{ z7q~?(`)!SSO2kZ4(WUyJ12HXXRbp9{7V9243|xw+ChNdCa1|ZQR5ZFZ8dXid(3*IA zlP8dhL)?tNl9K^w@mG8x6u}i!ItzDtM8b=)>37%5n+6cWWaOpvTeB)YbvWpJtIe%# z_;q%{6)v2X5BtC$mH`SMnrDA51zkb~W=@5k{w&a07u7vn=mPWB=?yfy>D^?G)!EYs z{VW$GRP>pS=>`PfnLu8!DaLdreu*xgkIt=SM+Z@TN{@QM=qG;wFG~RNd>-G!rg|Vz zmt>q)OGKs7Dr4%hUsBQ6qBZF_BxIkZ8&;*~RaN!?B8vl0JyaQ*<&+zfC`x{joJF51={u{`V*2-3B=Wl$<`_HE7S9z-q|F~mijo(hWl>6#yn zW^Th*7Vcl!hMmX3e~ms4We4~lbqUjQAp^j$?7J%ban#t|1=7_kZ)-ghF>`>rcXUF) zN5h_0jms`|zZD5#B=+J&;^hTo9S8fBCis<0`9dqsPI&<7EEX6BDpHK$S~lqDda3Vx z%xQ4bn+%h8QvBJHHWm4fh-uZjZEy+IO2j0^!q0KQ@>;Rg(Ph>8h!iR;Q;YTBL2E7N z)Qcu~I!|DRr@U8EnodNuv#^^~OzU=4CB4*-2H1f{VC3-5Zy&|*YY3uvP&Dx6!CcTi zs_Z8WLMr|`SAL6f%ms9?os*aM#6CMWSNmy-FVd9Jfg#JyUPgOX|N(cTLwTp2`yU* z4@2)Gy}}U_#<1h$7wT}>QVg2nRaQ;Lgxg`N$yfIa5NEg<3!}yX@2Sh37oGX7$0mR@ zjUIU)y%gQ}B}-t#7Ma6FSUuq5Uty7e{eyzr>V;^|+vW~3pl>aa zqvuvz@r;9hc=r|+S;*sTb<}RF+=C1j-uw`C4fyQP-WyM(NFSOEz;8$|- z+&pGsn2r90{_wAniN2Tm(Q~sRijeK z)Gay-Y=h#7r{~F{yvBy;=0)WudF+h~Vg8lGD>i`2b-e zK0>=jO;PH84=`Zj){>Us2(MASwQ;kv=TXir#%-iD5%Cg-jFjvM<-*$=UlZG_GOFgqg$o$t0WjoJOFE8fP2&;CmcTHsRTej_g| zU|s*5=O@HvizHW*ulrP!m2@v3z1GoSUDk4Jdp=r_u(| ztDy#0jVeXJj8N5N_S1Dck_E^p^JKv?Su8+5@{98!E7Av)aiXc2yQv^Yv>Y?6O?RyG#1eLsvfR)pr< z{@qKaau9aoUqSU$;WmlfeWv(L_olzgRLv@8h3DCsyqLA z=^mmRo5@1UplWfqJ0}F5;)ZKO5%M|^?e;h47wEEYp3mO<<<4-h_fIEn z*rV&n=!83O0Ec+p+ix78!~$QyRmIoGTotd*Nj+~;MDdJ!_kq`V*+nW31v>bD(uj)a zQuJpDL}_Wv71Tl5%VC(xaP{Yhr6cdt$W^ry0^tua6-3ND5%Z)BPMCOv#T{eFOC@bg zwFpVU*Z|68&`RsGFMM|BTe!@ks8l&Th5pY-ZnPK?!G|k9?Rwlb-rH@@fgGvT5r8mH zCoe~i2X8H1*UHG#qIRVCdjFlh{I-_UmhFEP6ZjJI?c?(Gil(syjpK;sxw_&>>j!tV z1YXeJ^zzUf-1S}#_U7B-wZg++T75*y4*Tfg>qzGNU5bq<02CFN5RG|DM>mIkDSX62 zb*yM-v68weB=i}RVhL_=GhJa934JE{t`3JGWfLeTlsy_^VBoS$aDbm(6zQF zd~FJ{3ikOeTH-_hMf4^a`}`V$&Ia~qY_>*jHuV4~j;kfWq{Ug}1{+;YM`bsn5hJ-b zPJF+^`npyFui+u$09(=IpIrj*;*FWuq@_D{;J~Qa%6CgDWO&>*pZ@qU@}RM=fg^By zy!P$WZ>sa(wu(M|2N@AA2YI_cBaUH~$e7ybJMb^af?w!c|7|Os*=N0Cl0V}sXP-E@ zIMn56#L$` zYa$hu%7sT$;l55k{d_wtx7X&>$6>!aQiDH6-}o=~SGJ!Xfn~1H}YPSUetu0k^q@GOTCQ!9Xe!0pO7-v+&saq0<+$Tq+unpc84Fa&GP3hQLR5GYRwY={W6ffPCe`Vvh#t9QtlnS&!v-rV2QXH;gm0+?7gSgfzD8qYIUE?p1nlS;12Z#?i~%6^zo$^Lkdv z4}o~;VIhK6#2%gUVc>$ergJsx#8PLR@%85Eg&#w&YwV&(4)y(CJSiz~LHFDGFIV~^ zz8~@GuZyXAdR}hq>Yu^L=(g~BAqAvL_3$d!5ihN%cTnn`I3lcalgCa)AL7vHY&?so z^6*gfc>ORt765?BK14C;B40nvY^stOeIAfi;pwNeyZ1H7oM zv(ZU5S*H>@d7yWo&yTSGOi}wz$)z0wAxs%`V8?37aN4=T+dvVPQf?#L00yX9nINMF0rH58L~_w9Kie zYm4*gp`htq<}HXs1gFOS#E5(*2A06fvq9xoJ=MLEVwOuzl`3dA#XwD+KHa@6;{4f% z?CbnB<5iU4wwCC5{Lc838i{}Bj?B)4Hd< zQ=yGCo=m z5HnZI)#Jlc{l(}Mi~*$Sz(7;(J4zv+(oZnUA^#FxD^i|qf{J^`b*vSc1rWG)FDoqo z4B@8P@$-Pn+;MyCM8k#1k12uO0V-@BFS%m9u3zv(BKQ^Jc8YN0gxM{fT^K>^>P#@( zg=Lnf$qV@pUHRW0xXZ5Za{o;~gz9=x-JJ)D6#yWD{IpW=ntrlgL*PrPg}uS~=qAS5 zul3|{|1);gxKzX_M%hdvE4^J$U7dnX04M`MC@uhwqeZ)^&~YUcY-mDYY=9O}>`<_V z^z0fg$=VD($ARhnp+eOW$x^aMGoJ5mXeMegkbJp`(o&@)b0k%l%Oa2Fc&Son10m3~MfI*{lthtUPYEUTY{24k>8UQqMeJ^x{mlEaoQAv+5WH^Mp zS8;_5ChXCiK0bRDSDlhj5{p-6Zc|Y!NKL$wkxG%^5lO<`(0<}2le{XxH$nI z=)?}UBjOnrd}bU}J%h+FwroIX(4wTY&&@|-lyJ)7>-NzKe(p z1Xwa0#KRLvmh_jb@0V@}DF@7DbgkCdRN%Za85(y6?*Y@*kdevL0V~Fewmb2o`@DKKOOQCEKf@ zQly#%nO3AA%!ws9rm{I{mz{px+(R&~9V~;2X%AhLRSV#ZiS1}rT_fcSUBzu0ihdWP z2!Jl`k7hrIO?sGF`=J#|%zaetPF*zg7d*~o2&X#R!?oBzKKlqIguAbq_IcPQk8tyb zy@bwLw#WJD9=A}>2ELITBpFMG9B^{1lud=6ef7VWW>$A2&DCh&DnrnDBANXF0L?QA zMM=6rOfHaKt2`2^IheTf&9GO!ki(RZ$U<*fI*>3ABoKVKKWbvHdjtG=l<)} zC;6GzqSrZzarF0p{ylRTq<%-W3A0^rT9`ds|1-oTuPSVfYSq)vSCYSfi1w>HtqePg zXys-LQvUc#?GX_afl%+USJ@;-Ai~)`87aLkuHqqYCuXK+(w5EO(Ju2(Q` z8!R=!gKsqU2wvHaN>JK?u2oTDC>&i9MenAA!^*f-wo)>g!DDKc$6J7TS59k!CXU6C7n{IHFs%0{*en&Zm1J zeVlQ8^J<2<{Pxtf=?4T{$oXU>fOL@2YzR&AgI~xVXWohrv=ZGu>$GAi1YwJPe4N2o;J)s99sCe(N* zvt8c~Xy7km>@TvOz{p0M=JXlPeE=1G(b-3~lxWj{?Vzi3RiH?Z#hHh zK3GpXZw-%~fN8F_Xl^26+Y4jsGSmxNYWx){j`W>g5PZC!QEg@%%Y}IxLLX*>h8IA> zIAG*xu{mFIMk|I;fiL$l1~jHMFZ(&Ul1fZ@3f$@6qGOhWHu6Y{{^c3lM^R`w}mA z%P24XmOSOpyQ zz~fxdC+Q&B_uyXBHVgS@&$OO(5mS^s?i}I(geUHg-h^Esk{uc#E-Z*Mjsv!b!F8eV zT@iRW`HzYPS{LTtFmk$U#6uT$`ig~r0_=?afQSj~Tm;iH8e&Dz8M~a~TVDDNiCsy0 z`oXsGVBMpdE(|(^dn!XrmP7RBd;k^hB-`X`RDRB@WH^V*3}%adC>0)0hsV&Mg$+#LZ6<{2P^1ezg&aVR zJ1iC0_A}3&RwW6D>zNp-$`KGTIHlj;T$IXAgj0dU4In zaL{1oY-$}eR5^WCI$gK`0z3v?XmGHrd&$2Q#k&P-<^v+~u(E2K_lq{=_Ap_{6#qV) zHGx&cy{ZxzJX!K;`Iq`_6%py`Q40n`YMUX`?yM$iAd5Sd^Um<`A7?r9*G+%&>|?SW zOCj!5SdfPcb6zEsYUI)hi%rn|a$E3HOUCbZL3iV!=bNS{mb#PThWg7pl1$7DHYLBH zPv)}<`;Agv*v4T=`Ef$xU3Jne>};w>zwK=H03I5~Nekz}BDk>TgJ}H-B79sD9z-00 zM9eM(jKCvoOyyvv**13VlhU(JhuqT623j2YJF=?gY#ZWO839>u>0)Ms71uDYH|i!+ z7!R&sRn;4%IY71N&a&%_M~kBxFLey(4gYtvYyqwN`@4vlKh!(HG}tA9MykN5T8fI5upRHu2jKXgI#52$80?`2!4xpiisWt=PH zLiOY8kH&YVN~k!6?(x&G=@;l}U}-Jj5$H|n1+X_4E)R2`KX$5uY1=;OyJWH2U~#*_ zhV*wtT4v5i&BTgj!bDr1@Nf@mbu@L7&#ZDzI94q^%s2>t?o2`$kj9_La@#dUUB2{M zI#fb-lsmp=rJe7B85Zgp>ltg9ocYxE=95|9UuL1nzo$%&vmvvu&nx#O9EaCc9l<=& zCwzWKKsvP`ym4c$%?TcHq--w68>_%n*hdF%%+Knk$I?>$qTLEwQaz57B_A>U*G1fk zY26n*C-j`B^qkMMSom1HqPaPK++6UxN@0FT3Ap3gvQ7Ao;yuIvnC+_`byQ! z9XX!NZ>>klJSDD9<=#r!oyj|SdD$LsWXFBJXKoZid!v%$(o{NqgV$RVeu677k#way zQDEMr^+so*S$7Ov|7$y7GeLL zWM&(+j%ubg8H=z;o-gd~CG(zitQ=i4UTD|;d_)2~&4hr*eh_8c(M8CWhXj9m~=trN0 z$?H0fKC|I@*=ZQ|UuM4Gi$gvJ2pwASKS0b)t$B*N`-)-TT4bcS=A~5dFMP-x**LEp zcuyL(EHTfJXlKYwtR0zk-!@5(Hd~ImTFPk<*&iwRSOD?BX#NHiTT3Uup4dqyyoz4u?(tjKgU!ybk8Y(JPd%L%LcT8tD z?V@5%X?FLERo#o!;3t;hAC84zTsMt(e4L}4dB93LrFJ8lqatDt<3Hz_jvGvu`e-C#CDw&yb<5kW<$-{mw%CBUhMqfeV3C2G%JruYLAN3w=MlBGLX? zx*K?QIh3}{m{DAg85`9Id?07v`}TvE9hV$;7UqqBGXYkr*IL3(u;VrbBOR@>*%I;A=lMzPi^R=}n8+-r8feWR;xex(0_51kq~i z0m^A^k09O48{I+BB3&&{m~=#T_q~4ihyhDjp8Luhy}6AR!58#oplci zd|TuAP63NJnM?-&Nc#|kHvf1+jsF4=fUZCQ;sEdoaYZMc-D7|dR`njn`4$-n9yVnX zj%F9CiK%yYgw#|GC2X3j3-ev-t6fK81MLdbYlq8?0&N^hZ9|+(Z4@#KO7*WQ z^1X-ar&LbVD4!j5eK1+;a7WfgyY-8Rg6k7iC+DDW$9tC5Ooz%@e%t9zfKM31=6H6ZOvG;ykA{1I$#!Vp$mLjYCoUF+C0RxFAd*G|_N zd?lom*4FGKHcHK{^ zwEgDq_N{mnKhH!e5=0}d^V1kU?!{!}haNuU<(2usx8F(BldBkNqxQ;DC9?HV|GUV}r35*a zfl<2ag&w`y8T*SJ>Gw{BN-E41AGk!W&0IX?1K)^y=TPk0=!|Phb@2%-74`6~nN4YI zR@}d3-#KS4?KwI*uzdEwNvQT*IEO-HAliJDaETxxNXjt;sUMBMe}pBC4lJrjFOMlu z&OEHxshmlQnVGEiu>K`kqcA&vo6EkYIxRYPAhuX`X-T)>K{zR~{-X1Umx#+m((E1Q zA^n=nN6taP3Z;I&BQ=F{q1qo^15}37|mqWG(~ zYf9?n+w74WHBzDorBnD*pGI~$Kf)~64h_gyB_6>?`V72jjCuF#D6f4h^7-(YiyaVQ zFWaVp_tQNu4PAa5=R0DpcRXBuPA^p7`#INb#Cv7*AzMVGg&6ad2xs{&rHU!nF+v7I z9Ui`!!3RE@()}X){KMI5?jYr#J%z;KDr96=ksPsbG|{a7^{(<@Z<9!<$)}@YCFGCQ znxY$S^z5m>Fe}6D1#Zj%IPT{A_6x*k-oEG;o!JRnmqgpue3fE6Z~Yf%Ntlp;9j&z+ zze{9lplmYE7W)=&_?>>7aJ4#AtW^O7#>SL)e*(O=&AxUH1&U1~LmhT`{zIdyABrjs@(P1b949AZ3U(f9 zs*nmkY*6khBboO$S8$iLdPQ@n1)mT#hT(pBF_u$CKCd$4koCg;lJZQ5?Qk~4>FrdB zyX8dn(cGuc&TeV#Ra$#Kxiog$>ArxXsYww=i?}K0;HWGjE2D7iQ^lxI!)fbMzNlkt zYE>x1)F8;I8D4*2QuM(zQB9sFd>-&JK-nnm)b+t(gnovb7mG=in&BS(__hZna4Fk+ z(G(roNf?9Go_yah6jc7fPAX2Z6M)}_e{f#-i)Hs^-z4?uVO30vI@~VaBGgGTr zod*uj<*k$#{14gn-ezlutM`#s2PRSu*Dh+hkr{UGKKI-0?z*5nJw~LesbCcW*wO!0 zgHAjuQ&I>q_4bFp^+=(C`89HwUL6qQhXc#a7a}hG<-n zdGGoUgbX^6)q#aLSEYWWYr5-0{{?k_FIxq?T4}Yv1W^Ann-2ilPCM8oBCaqrPF$ja zNfX?oEYBX4AuIW7HxDYE(1RO1!XQ}#z>?d1%^m-3nUGLU>!u=E^+yu81k?|duQRxc9RX^P+!sR@9@ z+c~K^QBClUDH>FTV2U!NEyFz)y39(rU_~-O{rtixWPS%EIY9>@xBw~I`2!Iy5#n-S z`P%qvpBsgpQDiY^UBY|Ek#Wbom#zA2FIO2e~2#ej-QT|6Mh-H_E#- z(^BLfGKcPa=W` zD3#*S>rXGe{l|yS-^B-petSA5Ot^6O2q0Pg3Pq%R9YUCS6y+H0Cwzwueb%!L!R;1< zex6`z^f#re>-KnV_52@2=N{K``~UIJ=d<&v)_I-RN#{%_m9TX(DpoolScyspOGyZy zt*urnE0t8T4hV6t6f37~9aKUgLfk8-ZsHza+;P+W+wcGFvB&4J>$*PI^}b&3=kvk- zPyf(t(k>l-{PT^TM}`bzBcg8kItS?}(lx`{-Ho{;dfk$JoqvLr8*=`kxD9)knvTYF0 zPD7iALEk62+lROBt4n>H&kvl=LeleQo@6Cq^7fr6Obp5!F~}!TK@R};7C1Qhg!EKN zx+lUoDuE>Z{3zz0KlNkmlzSedvxyUc20fcC!?7j#J694|GE{;JwZA_9{V<;M9p%pg ze?LJT*o$2{;;}N0tGD<+lbzLWzbz84Z5cd&OI?1c8%(}TJ#LHWx#XGtL@w4mI>JZ0AW}Gi3$XI z?TI)jk~TaMgsvmKn;_kzV`+3Cg}Gx}e&vJwgIf#) z1QlSwMr0h%&r>HvmSOY0qqZvZPq7X!t|Wv?QmHIVNx|9$-N2C66)8yt`zOuIQp5K^ z?&|%>n=cdx6+{^cCm%P|wAMPt)w0I&PNx|ricwbQ%5up^ytnPvI^W`*S)k zb`o^}Xh@tuEiR(M#HdIPa$6i;!HI~%cr-snqFkDWwwaiYhG0|Vb1 zWAyohi+g4b>R#vzHXlEjY+Dm>wn-Pxn)WPAOs`DHuiX3&aG1bME=e*cuaahBNG}$# zn#wHoTJ+ExBm>!7Zg1y*L-NGg>E46|Ky!&hIv_^ne?tb5Q9qc+cX<-FN{;t)NbQms z&%qensDmKgEb#%mqRve-pQGiSG`*ecwC4ox0l9Q8r0Fcq49{zOic+JT*km+YQnGbo zi^jJU&xuy&_fgLRqowN`^J*Kf8X&G7Z(L~)((OP?ab?1l?Hh!JV?X#$zn71XHr-F# z6P?<=jwu)&Eezq{pHAdWO=u{elAgn)nTfpTEYdr5TuKQf+g3c>QLLt#A{BdCa`cokn<@e00C=fSjPiLM@gYnv&eeR+r2aKsdQn$> zUTZ9QH1>quKBqT^ACt)&FIzyfEw^{0w$fWa1gS%V=NE=?_ISu;XJeK}qq9#~w@mW<$3o^W;u6G!+%o$j3w<^Nxs^t^s5-Rk zEz+2bI&hp2BMvJ&zhb&-g(As>n?vy~qOR6Sn(L-&Bqx#f8k&4@Z_>)af{asLm`a_k z8Bg%N6NDQ8(K5dK2Az->z1NK1cqMfSq4N#uOGhkMcDpn-I;v7$ervq-;{q=-E2F=B zwDe%YXnSZw;dF6fFh1{{wC3^p9cm=$>IvdImc|Qu?{k?(#D~*Qf1Vz@0*%_|Z%!3N z6&ysdz%Wt%DM|iKK%I}nc(PG_k5@#pk?~4YkLZsa4BA+Q0=!6&r|#o{m{?cSav$&b zDmTwA^adIworWq(Dk!&g!DB2g3_vv}&Qbb9G?Th0vl#WU+-w!0S4FtR?zUV-yaD&$ zpb_?*#dZ3$dG|vN63ET?npehUDSpf5s7wYn<^g)gpo3ugYVD|PUeKL`{&)CO>4L{~ zox0?U8dNYtNqkv<`ppv1JS8FhsXDy?U;L9!x<^H5sA^M!vZ9z3{|@E8dW?Fnk4~WB z7gQR364YkZnuj?iiQA+9i6eYFn;ob^^|49)bngXS#}JxLl7d#gPpXkl&EFkGx9WSN z2*g|O)BD%`1AJ+j+IydA-|7lI^b=n*y9Fbm>`>eOo8iSA+x${GMK4mtH)(3?WhxBgZbj zP>~{^k*46@r}3owO0w4T`$ZB^IM)8MbTxC&5R%3_XJEIW4C9 zP6WS7H`$Lm$!KlO=)bPSUlNVkIT7r9362ZhZjMMt8OOzG;MtyQ&wG!!iy@!uiJCxr z!I|xUdfwDg0=H)XtqFjv>fC{^n8Z~)xzZFOtzrD=WHugrJr8lHxJS?Prezgd@WThh zprJY-FGKrsws3@?y$UMW(P>Ww4OC#^#O-hE2-g5{o32;nXS1?F+%+XGfsAftAve;* z2wztpYw4?5Q^O*IW#1z4|2ik@xt}ORtW-fKJ6-fL>}qW>lu1-9y}Iowu9v-}u$m#9 zciOvm6u&#QzcUy~WxG5>4NRR*bbTJ>D2p-?wYjhwSBVd-kRg`70w(0~CkBI2`h$ZZ zgYkW}S8Eei4ld}Fl{;clr zaaRG391J{x4*%zR!fh~!iftmJ-+S&2Q#l5}x+O@jwi;xp40RK@lk*E1N+Up>+Hi0% zFJAX|vE$pSYH$Bh@=hwEJy1K-1vls;u5~d#DaX&EwPr)|=&e0;g-r+np5AfZ;cy#E zaYIG8IHB-Pyu2d*^5q{1o=Q-974ner*2%zKj<`A%T6+l|Pq>)%#C~vN@Zig!nwP~{ zi1dhsuVUdHSctPK;&JND z*V3&)sU^Hn!cGHJiL-tAKt_Nd9C0JzcvyyO$Mq zNks{h$J(jnzSzp!qjg+va~OttunW&c=_c{SIP_(9|1BAzS4>!Vi;5$oLdmGV@2F?K zqF06L?1%i=h|rCf-9B`uS78oP5vF2j#RP&+#T|u_=46Z%xETq9(T{UqrEh=d+{7T3 z3_8UG->u=)AAA}*^~AT3Bm>BU2SRvWfIeOqyilSHPeF3sFQP z-3cO1u`jxbz$$ehP}!@@=Tnu3-oM4~yo&leKw3WLb!Z*3Nj3dx|3m;8-%md)4-Af& zj43(4t^Lc0sBkV~LT}IRsQS)PvI25aj#9=fE}l#;aecre<1c4*-;!zEq}?tNmo4i+ zg{n|JtbWT?1S<8I;$uU0V6ILeYWD>39*j{_msYS42kE5y6Bu(k=05B3QPInXAGe2+ z2K_dazdADR`|e)B+qeGLYYUbE7~0^b>pe&+NE2a3IfP0W6Qcg^#^R*6fGLcGIXHFV zJpu+zC&0fqF7>|QXehl&2Ss2oophIlfLK6$EMP`&Eg%TlWR&J1=KqqAQQMKBD&%Dq zp;MgCp4cKeM9P&-gmvr5_2x2P>G@ukdav-oXOzYbQr3t$ev-+*GcDMIefk+)1TM<* z;Kx0(v%U0g>wgKK0L@QSjehlL{~y=?6Uw5zpCvoLY54pK;`z-i#A+4_R#u#5AxtMe z9fZ}OUz|CN+V+XKhXwfV->ykRGJLn(pk&Y1L9R*)wjFwP0m6I*ASK{LuF?j8l;NM7 z3hx@QF!$IR??j|YB~iq|FsTT8$tfy?T2e{l5-d} zISoo07{=T4RoB%$zN5y$?>bh;Lx|!E##BcY?=$1x#dV%8dBUz_RE4QKe+=t~5)w5u zvA!H}E5^h(^7EsdS+T%s{dOm|=<~d;84faPN~DV!&(Lbdt1!>SxHotA-R-$=M@X6! zQ=D1yhkjdb+ zWAyAju24!9qtSnCI_Y&#P1_b-+LbFZ2|MZ2TWzg4ZO5>*Db;?vxrW6xJ4hm*p5!KI zKCr@ysA~!r$a8ZjbzN-y)XGzDuK)CoeR=&b0&`Q38`qt7ZW|}xZwt{_4JB=L?Ch{4 zKv#jbI(|aSn^MgHBR^{+?7rcuaO<3HnyKVm^byS+^+}pL&vy+84XeciSd33c63**9 zr>K(g(~d@FW^+@@Ahm&`$mplt8^gvkEyGnrHfPu*wQf2}9_POmUG% zb$f7E_D$b~6XEC++sY`vT!A9pF|X}|xn|qWns{I9TSB|8Z#suWVD6Ox5Ngdl@7TsQ zU`1W5OsFV+Qr)INTVJ8_2yuliZ**K_cC9>3`fMn{ivRCLFYEf4%Dy-BTEDV+as0N( z`HrG*KA(8gEsssI)%*$liN;r?e;y$8*Lcko6is`)R_(DYL$(CAY$-^(-ICZW^*iQt zUIq~y?cTRO<)?AyiYP(dTr{PQv%@GJik;J1ezp`bX!N>ZnQ?rbBn(k39v9s6`Ecn2 zh&AWUf4XLzz~Nr^o}>SLW6RwGZBPCY3692aKi#pU({`K))khx$U!06Z_V8{xoKarU z<2MT|+x0RFEQ*apLcUS_Yni29<4t$#QtKIE(Z~)-eG=L3Unv_GA9HE=^x*kf^w>4d zUjV|vI{oE|qJgFs*d}OUxGvS~$N(357RTm^9+r)fQYD|e8?$a^8m<@|vnVmzjQu>I zwX=J+cw7EsH}Bpyqvzsb`=u|UqB|jMh$Mo42$d)r@x9JP<g7YY#0Yrdj zbaa-2L9**E(_rX$4vZ|Imj2xH%GjlC>q_0+(cSd}COdzb-@n=8<~>zy)G^P%X-n$9 zhN%-+YR^KGTI{uv-{KiHgIXt5{eSBvAwfjKIY)YsBgBC5w7<(G1~#nkZlim9(hTdg zLv-zCTo=Acxu12x)MR6{Q-QzfxPRY>{PyaY`n?mD z1{Wl#VyLDJF{F!4pdyXxwQz@p#coS%{#p^wYVu)s-~Z%4@*JtpG;*03*AlNKFmSSX|A1{j1j*?SIY(0c}3FA|4A| zj{mCm?yn+ce$6y~vp5`m>wWzWlUWcC!;_Czf?I>sZP|-{sI^+f8w--duZaU~WvnGe zT*yV8{cosPwZ{eG&>_^ec=L}>Vf#YzDOIwE+9YGGJ zZm8m0S%Z4Q*zMjf>BoA%wHQ5>1)|G2WqmaYBt;l7$ZUc122UGR$>`?0>&n1P8TK>> zr{lzeiGxW2tT7lUzoOs!^*ErWYK*s@r$CQ)9mS0Ayd$x=l|fcrNP02s{b?Qm1K7?Q zzuJEqu>k}uCR~|CQWOCC+5?`!!N3k%MgF;f*vkM?1?$Ijpq3)!p-tm`IYh^Qn{NJa zj@nRFtJ(9vlO`idjuVsdy1FA^^u@!~KgM~+ov>i##O*aFbOv2y%!w#TvVISCLhmiz zyg0jsiun=98KCpk<{+$xQik8J#P~b{$m?LPmgS_Yxt{Buf4CoSS&)9 z?X9P$(NPB%s~tE-6}#!lfPOt_-Os3Kb1))5#J<+TQ{koa%bkHVI^LwEVyEoy;2D-p zq-Jh?7N;ncu(3Q^bNaGRV5g?OMBSA0KUf#bfX-we0Ug*yF1{brb6*5xd3KdD1af}l zzcMslVoOQ2FnBzT&|ZV`H!MEZeGXD=KQKNJy%d(T>iG|gD#og2pX&^b;JO)YlqKdL zmNy}jeQ`fJLKH_$xq><^a0$>7I7YP44fk%eE7Q>VF1ThP0X7r&U?pL<%u7-~opZqE(#w@D3%>d-E{bfmT#=YJg-QqicuX zoXQr@LBA8}kId7OK)YG3D{#1Hg#FsCdz+q+uI!24{MS^osY+A6OjcN1iKs^|-VHNQ`6}R@kzqfCDQS4=Lt$P5S$0{G5 z=xw%g!WG~(;i6PpNt%$~*-_oscJ0#awHtE~{@FDR2t-V4Iems5v4V6vWR5@2n(jNt z=7o7z=5DUNIlJP0L8a+=U%8=)DaF##wRQiO>xBG^hqgK9K#h;ZyMEu6j4#S%*AOQII&o?yp(UH)>@uJ zd#7oC+6)p4idOre^n|b(+b~rG66T9;(*<-aGL?OTA%TsU-kB0qWPR~2aq&t@FHU@G zW7(=U3qS*a{JaEt7Lr`yF@%x0`A7BU>SMLs6Kb(7NG?~qQ#_@D|8hz<01&j;js^gY z4>ZdHp4|jQliXO&KBMjk)=*acFCEyx7dsa4Q@!|qik)Tw-nI1=SG_CyV{30}A*jdn z3I^o)yKVK9zUFFV<@xSAh6;P*8;8yxMS^GnN5Ef1Z6%{8r^QnBRkN|(7NAHZlWH!J z@ym7b6c%?b*I}njU@gK?gos8a>a9Ysd!*P)3M&~*LnhZoh+b38#frdwiNHz-8+i%( z>4NWvEH=lR$#K9|}~weBX~ zrlYLuudTM{>c{YcnJ0o90;$4PXP#Bz0j?&Mr#TIg=f^ZD5JFm>(xmtzB4<*HKBWya>|6E$W#8jX`H-*?D!-v`M@?@+ZZEt9!9{G)}} z&i-Job7<3avQs>GT2~AeZHYO_g^Jbx?TBvixxX%KDY#91zs^_%S51Vpk;__O@aYL; zR(Lg#r>h_q=p}~m1RG*Z(FP9ETM24R9BVsw?+UE}QtO3PMe+6ML`q8{{d7dX z%T8eto_XJW6bz)63>KV-v%~~;+$U3eR~y233WUc3-A&bKH3@)n0__)UPn%dj4_dJx zrP%4UDEJjy11G4w0YZcujK7t4jMebaf3H8>?OJH*_tVZVOU`q!GcA6=w^bqqB2?-R zLHZ59)O~ef1&}eoHIQ&ML|_}4*(gG$;4 zY~Bu>YrAE?0;Bfn4 zxTR-7(Lsl>pUeGFic`JmVy zmx!5Iv*><0)KTCCUnY1FFa=a%Jb<7Sv$1IYTycH@@rN9pS-{s|d9sy>B|8 z3~5zJvNeFE!88EP9*KowU~z>HTD4_!t+k&;y#0S2Ws`1-Ry&|@&IQ~PSN4b@p}R$SF#cF^B~!d7b;xWCs=pCD?Zw;#gdFO zdjT4VxNYPoEo8Nn8n&v3j5%Dd%`+M-E|JNlsLkGK_^~PY5AjCL0(e{Q5p{{=tT+HX zc?;7|`&irmH5LHt0U{lGP@|JCGXI%VysiVHD3{=r6h}-0fJhU>V2f!5kU#lcrZPEm z1f@vH^2-Wa*PV#4k4Hc)kYm024*@Oh zc(z_0s8aT|i-L{nG_*>1V~TGuqP+KdJnrrIy>TUldQ)!^P*>MA>|%178jM>15e#56S?5n@+!E!-Ay6M_xf`XttwZ_|;{m zqcxSkYa2rCn6n|jwx0)$CBxq$Ck*3iM~n0lPu9wxf&)3W*4q&5u%orvN53;~=PlUk z^zzgt#f_JE9@tq7uGCh^dp0LPFQvz&q3lMY4u73Am=9@FJvaaDr~IEnT)@?vh-V)H zb?mr*R|A?-fHVORSv%~_crsZG_$JA0xH?QQhxP(~hlAd#bIa2)CqixhZ7JSSkMx}f z^*C^s2|5NA&_wT6&%(qj+&f+|nl#wi3z@|%XQilj9)jC9ixcL#I5up7jYJ3q(IS*a z8~pL)tEElf7UCm5ps^r|#`9k+NgUxJm&TkI;p^!&f5vRJ2cYN0*Kt#kr^)Gx6SbyT z$YTDE0j+gK?q4HGf8ALja&QYntQhwIq$h$V8_)3KEDXp1*IAj)_>9~bv&@pSy$asB z`$w4h>FsCky}65IY7~IqU8Z&CVIu9`Eh&r_!4}fLw>JZ4;j;9o;V=}ZN4D*8A}@U6 z{C{rZT`=wZ7xZxv6Ux2#ziG~*gckTry_T>CLK*eGKGEfvXVfpb8k9z;Hk;phyct_e4^fqRRlz&Vw zdr(8J=C<^6RU@^DTH|$Bc(1;nSyVjlyXpOlsaV^qf zM_k^qa=NXbWiw12>S#5;_yN63N=Oy)X_Vra#SeIPAlBgodU2H|0M_aiCyo^FqJ7>1 zm#{~^v4P^K0x0YvP4#4WqXRugGc zF@H|Qkc>Mu@;5^TA1du``x?mftUu^=zxgsU`bF-$j!U21xK#Oxw*2IE9Iu7UYf(m3 z0V5c&%7qv@_#@10NLoe15AJ*a^}FAf4ItZPT0H}Abu<&a)ShFmwjGEAh`Z~OFSoy4 zA8iiU0>9_NLrzY}vGqL6F8F+QQ)*&o@~t!bV}$D2w@2`a_F5n~S0ad3eR%m|b*Pei_T=#M zn^_Jq6}h{w8W&#Od+dvftT}fk<-bTvMd2sK!j}_Kk8fh$>#o@zV$JAC)a&Ls^gur=tbuH^mLLBOmk8O@Hl9Aw_p z5g>cG{I?GzJa=T}4+m?{4SyiMWyjDm?e;sG6}!X~QP_F$HMbxp*QXwpr23F3MQ5l` zXEo8WI#!}eB1fJr`AK*uaX|kY1N`^LKN$-v0YePPhJUL5`x!0eSdvq+ON^-; z;~%#o-_LX1l}_1C|9tAuo6T3ZAz}@dZyDb=u%l9spF5qh@0F^hwm7zv-IbgZTIZ3` znc3+&gx5B9Pn$02QWRI{nM4gT`;rAmD9aN^#9hfLV^u4&o|Rj?IvaArF{0zEGO1`% z)4bfsbdFm@&|gN2>0q&$MK~P_S-eE!(h-uR%Vk*-u@eYEXx$DiTy*d5ss263o@W^+ z_80W+c@>rQ{7RyXhcZ6n*kI$g+E=%LO@|ueukqz&y&oEsF%tM=L!=xA*$U_yn_s-l4wwvPQ4r>-l>&?chzOot&G)}J!N9EJ0+3b!^D|P5S_tqkrX6AkP4VSKf;uUu zYe8g)L{?6kmckWF+umUHV>GZ-I_U6~OYnF~Iq@X1zy?QQzrY4f)1EZ7Re0VHalMUM z5s_26@>pcl5sx{R{vFz77%)of=<@_odV}b4@RA{YX6Gh~vL4jqhb7R`cmUC9kN)JW zPEy)3f?D751d&UpogK`LuxmM)|2*rR|6YdB_NB&~z6k}LV6PkIrxE)(=-@6kcW0pP z-MwLf8SL3%!{B*XOmZg0P&e3{E`Gq=JY%iB$!*lSh^4n*;ujvEtA)~Ks}It>TLZ%Q zLUWEvMt-638Af`BQ>~6B9egtv?O7(+R_XcE4!~si&JjJH#tPOnjg9r&sR{k2WW+I9*jEm zEG7m!_mnGey3;;G@>#Xd4Fdhe&$F+6Xc)?l(&X1hAs|4_i{Hnb3-MT7w(rT-ke{wG zv>OXwqE}954&3!Tyl-2@@vcXH$CuyytxwFN7G2RB-Zi{lPL>bukm(pZYz1?=dXkqK z#=7`jMYYWYBoEIH7QahnGc>uoG;3%NRnUI`py z@BwX^w?hBjD?xOVr^bz}9tE+rRc&l3&=sRz@nrat!s~Ck_ zKT?+XNt=JIDNvW$!}Am?NL8F+Lv0ph+!7o#K8|IldrH!7}6CZHY^r7 zpWz1d(MGq1%mgkZ@2z+ci7@Ul-t62{MM}&Akrzd%fwDHEzfe>pzJpGSDmpWFE~p~I zH;pz!SxsA@>$pp|e0SK4476Y8eQz@MUU%DM#g?*F^$#M>4(B!-w9_LnISmhux{FuN zoV9d0IPjkjBm?4F@X=zPE)Njv@pnh(9BbT^_C=?CDrJc~w4szRzYIZP3hg|pe!4!~ zJKEg}ov1f9%hdrwJK+fF9_@^qr51>yixXX1lVW!cj%UQ_IHjh`5IyVRVU8HRdvRu~ zm_m7Ut}?GM_JJquSR;F(bwj-pHYBNE1O-m4Zs9Sk>-VyXmeB&?vkAaK9DBF9G+8FP z>-4^(l$th&8|WZnuaJ>{M?MOB=2c6{0kE!C~M(688CccuA_qx){Hx8FagFZ|v>hIF0< zYemH^ryRvubjJoqHfTkE{A29DYhOavh6|uM ze&$UdR*107y`j@N+I>B_8{f_&o~NVD?I22;mXzGh)9)k;>~6A*o~ApF$Ibu=#uBzveJNu=lL4+YOr%}a9|o1C)$_>} zapma-`~n^gYn41f-gRor5(6yLd0sX>0>$gP$!J&v+CBZKe%m)tb3=T0jBgS~?_x)8 ztJTPK^WP&cwA7At2a$d|aEMjc{wSvGd_@Z5FzCB-H=i6{v*O_i z<5TY7+|KkaT``E_08*nOE}?TqZaPgzS5wR6<7m#cj-le5o3k109U+&c5NhLxp;G(D z+fvFNLe_UO-B(+z^gpaf{S<5Hr4_%@lMU&#!#C~^L&*EztwdF{lg-~U+mgv~SsB-E z8qe2NRn#mot>0`u^)pj@ZFlul?Y4ftTbi}Crtd^|@%AE3=3d(esQ1dlkh~3d8WG!S zV{m4S5cf-Ay1>4@Hj|iWO=-cgIs`7OEdKP0p=+Fn8Hb&3ga2)yLps7<>J>Act26C9@IeQJbc@4PFE7+W^ z202xutgZJMTTwA^7DdxwPnh2jt`Ueg`q+}8EM9Y3>npcP2>M&xlWEn+fBnXN)nnW# ze32{E{2@BJ*rd)!Z>atVkm_$+o~GjmHI1F9#!|WTg{GPq*8k>qh%*SJKdAdG!#$(# zygRO+$wfbF3`pu+CQ?W7gazD%mDE`RBAg*bna$uTwiqp@uPO;9!u1 zfi#N_ZHa&B%+PaUgoc8eJxHxgh5;i~1Op28fRdU0P!Qd2Oc!1mvZf@!; za#aji*p+B886*mK857jr&+&NQGcd3+hAZ4-3+ zKajCUsVlL-#T!h*Y7V1Hf1Z>W(~p9o9@+RJa7l^?j|hrU94zSS_Jqxrj{B1}SYFXO{gM zSesUq>k6jh7^YlVf&=F0RC&gm5Gq$Q4~YHwD2hx}LKP9}(>V)o5PWh2($ZjEy>0(R zu4C(S-3Yn9wS!3~*D0G(c*U>q5Cc_%-B7O1d(|RNBJG{0{^*3)CyY2%8MyqVy}VCwIT=-=!B zaa6^=aB;Y+&EXKa?)B5U5>Rgt)=Fa(LK@7Sb(s1GMGJi5I6Pfg&Fsk>j}p>+UN+w4 zEuV@#ISC1wT&?jxh^v;hyE4Y_n$wTyB^f+lwEks8`Z8nvX6dqg^2h_cJT|HG%eFu$X1y$UN?(F^I&3% z5JP6z##o*F815`452z8IgEsDmX3kQ!%Z#<^DRZE~(}=onvfgG#K!xq;uw| z^<+&D(Ls-D@V?ddFy*{%2$=BclfH?c^SB@RuzaDFKix^cOM7!|eT$xip+}W#56aD0 z;F<4(;V@tezx?vpkdlY}+!w+OU^M;aO(qAUY64?ljFcxFb7xcQM(MlP}N}s6)UcR3oBN@ix2s7VqD<#*^4xZ8f^r2L4!}ND! z4!AK9wnzQl=NV7BGqCP-k}q`YUKkq9t!P1#f5DgoN00W^X#?8-od!>C!57S=Gf%7| zNC1Ku#@1T~_sN%tPj1_UDf>;MN!LKVCUCz4_-9V1+grR=3ZroNtX%*%eEcpGDL0gH zeI9_u6I^@NGSit_wfXJD-K9kDFLZa|7ASV7Op=e4l(1tvI*8#K<@eUU+G+yi`Gr^k|`J|!B~ zEP)>W(ZQKq>T89Q3C}K%p^YTm`Z#Ii#d8IASd$g%_nG|!!(6Ozb}zF3N^e;G60Ivg zl@xgrwSv;IQzx&Pa^G)x%qv_L^RkbOnqib~l?R{rPCAWxQy*9v@s)T24YFmco4Kea zu6xsfm9;G9#Ya435Q zGV*KgvNm`v16B`^@c(_#o7qVovCQ*6o42;*=%l^&udx60V)J(<@2khq2)V}u*+Da= z>xl1NPkqWsU#N3t7}^=Q`&h~L$@CZGi3A>XTps;E?eCHsU4Nw=!1G>O@Z}fL`C1)n4hM*p; z8W%m^IKe~@0HV|5D0B($o@tECJiy;*109J4%yLn{2;Eo-noV$5`avEw3SuE0odV-i zWV-JDu3s58%pL6izp3_f0~ES>O1){oxWgNCgrUJH3>mU#4;Tj|Z+;Fm?pNuA_f8M} z6&d{;z030XZ&$lBFji6%R=vl*t~IFhTYu>{l;?KRRgc4uHaOouy{uFA>5xp%MBx;o zMsGvfJ^WC02i71X=INi>9l1&Sr4~q@^9b0D-MXZ1Gf#PyiJiLaDD_*u$WuSCEeE0$ zK*TsJ$B6k8okD~)JvP)Rf!lQT9r^L4&+y5=l{X{RMA)jmf}?f_kFOvx0`jO=KO5u9 zzawv6?{lZ@NYXB?-zM)I;aV`nC@o1T5+m z?lE$+zb;kup8H1yWRxbhiS9=pVN~Hr%XhUBC{)@}`hSPMv6lV{B<}p{vE;75+ve_G z)MHn@^2bnVh1NJ1bqq3HkYOGTZu%-SvQsmF8JBFiD}DStkdO=AkLram8BZe(%8281 zAxzhr@aw+phqXUI{OPwGxOeXRk{?!XP}(^%%7{xid8f~kQbC4IUR`*3c6nWpj)s=2 zrWSDOIOO4^ATs^dHXy76WB}(HVnk+Y$8()ln04^f%TJQQTi>5tl6pCfkQ0fJ1JmF9 z5!YGVZLWk?jvVv8F4u12xy#_Sf%F)dzKmU{{|LtH4JSEqK7CqxGFv|F@pF`*%%6kX zi(0XKmvy3V2d$sPtvh@#V?kGSTRVn&*jnzEuRVj^iYd=@*Eq6q#^Kr>1~q_ZA@Va% zIA{X!3v|*+EmG8Qc;<{p*53k8E|@MakcWn_wj@tV+7ubFQ$XXn2(xA zb6$r$v!~yeq(2OrFFF^W{nP76P-v`uL{YX@bwEX9U;3^}=Ki;SARkM@a;Pa|&uWc= zc7Bz#*pD1FTU{{oCg-(_4s|sQR6zmj*#iuC5#V_-Uzz&l&qzPFq6oT{*3zU`J1T1L zWq3umguii1pUkKeZs~3)tF{lWu@18g#Ow9BXG+CgjX}gerQ7nv7TeI8-W?kP%f6mH z(H$YJbdn3+KdLrzO*W}|wG5|24=oeU4UN@0et9!_Jay>&!?TuDGfJN5auNdj{jf5QfB_Xcotg&R}sMgM9Ck6~RcXaSgd;=`mHfrni8^hF^sttze zP8KO=PGGy_y7AbY0Ig*z=38B#*EE4>eN!Lmc7$~6SjIL8t%K&KJz!(IN6gV)d+}I+kKChZ{$jZ&jy4p`LB1D6AK5P+KZ;D8Ebp4r8>r2bBlor_QU&cPT+!O(=b7kr-=WXU*ZwpFH ztaDpj^@ErUzK4Xpt>XU6gaLPl1T$w3hq`*+Tc4zDemEOZM`)R8eY*wLJRj>868hGU zhg(+J(c)#A5W~RvSjIwVJiu#DkY?; zp%4N%qv^U4w571)R_R8(r%Gnn%H~<5#Bk9#Yo$y;MW)#S$xD&%+Zw_vWBf5FOO;%J zFk;hhA=PI-j460vy4c}XWgWBuzMYEtr4ci~Jt~E>n z{$BW>ayS8du4Q!N4?g9O4;O17i5%~5Ng5cWg>+Sx))7k##zK;iINyw0ZudJ>QE7l_f~t5!0)*9NEk!{TOTeqXP1jTcBTU4=gQxG-V1;yzn$5Tf zyEelpF8}(KsiLRet%kK;L-y6PYDdCj82gmZ+yK?cf&0}`2Rl%o`$t5=7o>{DQK8L3@X)CaZRJ{GkIhi2$(aooWwZDU5te!zBd#QC+aF{;x}W`OCI% z3KoEnp8o$uhD+O&v0_xUi34ue|65g)_wC%rBn|63U|8pr{KRO=Gvb!Q1N<6uO zlp5i0u>a!+_hz+lJz|?thu45%HAmpGU}4ZM^|Q#%IpQ-Y+jP_0kIK>#uCZ&wrP7Bi z3++oH2V1b*m_Qu|7Dy=U0<~FOJ+uYNnlh(*&`y3uqrDSt(h(IvG+|(jCeTh@AdM2> zzVuqm-w9Y^C)E;?AGyd}_ibojdxv@5Jaaww0mDa30WNrw$egF5DJr@q@EKlf?Z4+N zXV`R83wC9MpH|a*#sbg<(Y@M8&mY(!twjR=kO57mFhz6ji{!)lPE29nks+_Y)^E!G zcFd%RsG-<-Lc5)M$R*w|Zj+m>_te6URmbj@?hO)HA<2Xy&amg=dt#zEdALvN=n`Iq z4{x8-zb{F&XXXeK_;ZH+!cNbZD{#y9$X3!~iX6gQ@R255oVdc@o*q`VjKbBGii*%q zWN^hQS+9thMs3$B4OGEbulJ51%D%!?z&~_m0GVP#c4m%njj|^Ik1tDX&~ncs&_`VC zwnGV2ER$IbCIbk4>RX!#%YYGvpS?p^=s+q<5h+VB6y7<8}3jGze zpjkUu(#@1PX^beWo8=M6b=8eV5iLbj!26;V2ylgwYUL*`c*~>2+G*bi4DUz{mY&Z1$=3Cef`rsGg?0l6X*Djxh9`bDYnm(%d|j*CiVbwR`MDxJ z#^3ma>h38vP(+b(HANE&%tj7hr%5RZ8If%dvnU}@k6556gYK3|!t}uwOh){PvE0Aez2(@D{D$MPa?-;s15IDw87+M-|IAcA)vB4u247le zhbS4uHIi(+%X?=}n%^;{l9)>O8!jOS*hRGL%2TsNH>>E6QpAuM@)yFK>L*igWGMAA zq+vaS+N@O)H}!*xny1@GbP0E}5PBYVvRRD?>sjqb7DX?O{R{V)TSmS{Q6^;COarr1S`XL6sD19nK@k=H@pD>dr3ec%hW5+_)W{|6LX4*R zx8h`4E=l~ybd9({EfWU%C*7zMB@0k?WRnbB)`;{W&f zS=VuE)CTxa(7?bMxOT@GyzcVP-`~}l{9U(SO|Pfo_R~F;V(rIb^p$d~P>j3cgm0qZ zudwgDR%%zbXx5C9zq2(L*+#o(4K-#BO}Oay>8Rx8-`P||rv^HX{!cg+agU6K?2+Dbg z8`y*Gu$+gUdRUI{FTejFlbPL}dGp@qF`Gpu8(umlVy*`;m(#!U(P>7>`~EO<(5zMT zsX=FMbP!GACUII`p@G@J=e);mjlE?S9OJrYG2Wx=?XuWUP)-xbW)@%TlyZt{MuP`C z+-AOibwW@-a;(x#eZ-uwK&P_ax@SW_A9t7YFJA3c7|e z;LOyhY3BhtE@TIQ^Z>bOkcew^0oTH2uW<5d4W-#I<6j}IbC~*3NKF+Ia)ib)Gg74g z#u*UHwMjuZBOhh9>)1g;dsp-~8x)?_aGN0-S-zwKPentaNC?=vTJ^joWv+%0At1obD`LRucVVD2KH?4i>87Q|ig@B*c?)zVZwxFz%aVSW zIZI0@!65_s7?+u>W&pgLE2j2IQ{x(-0 z@1@q+!=w!SzEna?>Bzfx<-xl@7sS=r>~D)HX^Tk>W7=o2nlW02(b~$dD3qIT)|!`U z%_DV`6dkX=Z^amc_63{!#K0KSFcs*RAqk=MZ(@eVd-HDkN`Sam$b7~&lNz39tb_(S zkw++RFJMN^;!I$$WhlfPs1mOJZ?99jqm=!2`QnnReEMa6Cd&FD+~jSTxP~J}G1&%y zy$#$Z9m5iup0HGaOq_3z)BCzJ0g*}bM~-9_yQ+h?m(N)sb>Rw04>YXzLgt%S+N5C9OAZh^nT(mrqddPW_4PtIuUWw9r6~y2XRiaf#IBG{HTQ7 zvVXCGCQSZ_2B0@0`@jX=#q{Pg6`GI=5?n%tsW_M_yrD@VIBI}kjATEZ@I4f}XA_Fl zyQ~=iEH*AQPNn(#s@raS&fct)js+uqy{b_H*y|?yZd$eMVgqN=-EZu-nIB7V!H>A`k&OK z8K4ILW1;_Sk?53Cnq}H>+E^0KeIlhk(ommiHsaX?yC^~?zEB_~RXD{0odw^Ho&S92-1*(rQ`q%3c?ueM!yqk zT7bg=7_4$w?gO%ljNn&SB2nboikgx|gg1cMbd%ib8&Gp46cHZd2k&2F6g@5N!7sxm zeYebG#^2iUkF-xRW27$j8ut>EW20qFYM2HMVF& ztrc=QE;kht7@)G}PzX+}(7^2&BiI-K#ffbID-~D~j1o^oo7#<=Bpax2gr?3S>o-!) z6D{xbzgyO1aZ(M_=6aCj%(o(gdHH>zcgJnj0PB;R6_DjpnmfPV-&kA^PWGpCz^q4D ze!YftO+z=}mNr?!@e-G{xXBHa5Qh?^80>C*r~o3A9QxOF^L7OB4g<^W`F5W4d@saGM( zlqP|OU@tOB!xly7D{f+xI0pL-M!AO)V}PRuUnmp@iwx`m`tPe|vALQ-WetOs9t!mP4j8uc;H9|@k=KUK$&C{9}>TH`z&BJw+0J(Xd z(M*9dAN|cx8yLrMVxg1}gcGt4^vkr&SHkXW)U@szF&igr@R^q{rQBOZ5svd@BD1&w zt0@ng)$;BpzO5r@p&vIHnltMGXyFXn_Gy_RoY%Bkc(X`mC752 zOr`U}Bd28VBfwhy$t5)H>qbnRL**cMnjPv+@BIb>R+6ATNs;rmWk zd)57QTs%xzCPi?s*p+p8&)odmG^Fi4q(w!8Rz-usH_7jhP`ia@%?4VA);wd})L%}! zh#u}iE1ETwZ>Uv+j@5?JzG;}4bWW=A(DscGU@Xu5{Mq{+ycEbwlE&H^$ltK%bS1JL%zH`^zm z?F4ROKThQ0&}193L1<#HwS5N;h#uLsWfASQ9HKK~j}pmp?rP98h)}RuO0v2Twgv6} zaBud2;XT`7%~rH*=uhGT--wfYa?;S^w>hVd3jMo-PegwA+MhS5H(7g^(X^rg^RvJs9}~8ve=vKeC)l&85#hKn&8roNl<@`do{1-k)JaN4}Yz zoGSq>LZ`>srwt|>_!evVrf)F(^+V!W12qICo)xz2AtJ8FVc#r*P)e5AF8}%aIm#nA zM7o~vSUX=svUxzv!54C)hzGV1I;DHOuJY)7!j)j_T_s+QJ-nQqRS_vnXdn$|pIpSeajBNnvy z0w6(HZ0-U4?Q8AZj%71~i0Zj7_AI_Zl~P_{k6jm;O<^5>mvFvaXcFS^>Wg2drMCFS zCvg3X&8q;U-tnGaL$~kw=xK!FdiC}a{Nl>rwWJM%1Bd&a#~EKVzI>1<&KE!N0bOlc z9dVMQ#N--wBCzb}FCv=(omD5maubn)dbqN}*LI*q3EyTwWYIZpD>Fuu*_SVO!tZ@8 z>T^7K=RGUUw%9)uP?vw0b7^=`vs`0CT8&kBv|L%`dxGkkdD$fD;lV5T=hK^T&jp8? z{xqia;k-*FA%J3*fyN z7u8Z*4OHK8>IroDzJD8PaB`IaK8uq>gtW83_g%QLM~zk%GJR8o(qv1i*}{qC8Yp

7&%ZJgIjgrCSQb&jq)FWJ%}I-OJM;pj)sz*Yyy?7ory>0x0bxV zvfee>Gc)B`as%HW_KDv3%`)cmwR?k^V^0E~%U#n|2ieY@0lqUw>#Twc?~IKsC$a28 zH+>R3fQMPAu+Eo|NUXB#5S9tfym450&pL3mp+F9kc5-5_BqrD&S#keNjM{ho&eqF& z9E@EhJLdj$*~EE%?L@r&Q6m^NTyyQs%j%zw4_0j&sZM+m$6u@n?vcm?D{h+2${ec? zD<|!@^D5FCa1o)B-$-HiNYF5n!=7oI9UtUM#|vU2o+K+iy2?@AA7nk$H;leHi40ZF9e=g1jHg&#%S!vU$W*=Ih5#(K2Kx(9d_kJeT(yiVghM&wlY&$A^CaO%g0e zR3_Rss=}m|Qyj5jk>qH(iGR+?K~hPnI|hvlHSP&xoX!va>S`ls-VptlMXs= zZGTqd5$CFwh$vnH5lp_Ws`RoztN)G zjG&taZJ+J+R#b-&w+M$kdh}jl3q@n>WOn!mw3xNlgV35JN$|Uyl9KYWpBeeyu%DK_ zD@z+cX)6<-?5DCHY946Y&*31*<>ESmh2@7z2fuciE7-U>NX4kQbF9&1 zOUVbhxnfK|$RD=N4!2N=v!=|`ZT1eDm0#l%`0L8lgkRHhO#01I#{@^{Zc-!7)#(wc zq{q~S{_N9}xswe;d>j)!iIix2MqHOCx?ad^rCpejY(g=G-N)l5<(gXy=Jb?J(F58i zF}dyaE`b$H==$Gx8JJ0Pm;mq+(%WlrlC zvHWL6Sa34l969B1M@kGnuHzWPqWLyD@-p(ETlk2v))GvK53qB*(M1Yckxe)_jxq|d z=`(f`Inm3NPYN=0GJ7$YMTX3-q+(De01GBy$0uhYI(;0p6CWZ?$f_>HH`BJDgLY>H z6c1q5qAf!T#tG^1$EEp%Cm-s}I+hQ{9~R8B!Q^yDHb{4WP4E;$Zr?Ckz9y7HNvjIA z5~@|M24ttv`Dre%PF5?p7h1|AHteCXI&~bQ%i07TsiY5p788?1|DF%x5~D(4B?$A$ zkeHgd<1lYv(5|doX%$?Q9T6DA`o?iQS|_5scDR*NU8g)@Fc94Xc(lFE-RBKAxBKtg zIau3l@!`MO_0Ws^&`K%dM(^YK*TrW z%-xzC!5PaEDZ#E(#Htj4r9p2%vnhrxu~@|{5fT_`fV5OEXNg7BJ6?*Qtw|BA5}`KL zcyHyYj#y|cwwYH35GA+6n5Bm1cF`{ChDm@@^@%WBt+SGOdEE67p>&jNEGlw*;E#0gQVtIl?TNmYcal6$0n&~MD-j^>R$CAMmS z=Pm`E-J3a-uuA{wm5fyy*sN49=hclUjDnDqHKFJW`D&VQPn`WMta`C`>VZ!L%B;6L zbNAv3$VP+ZMvo-y`1)Y}PJXWIHp;SvOUn@3Yq`WAv4ZQohX8x?nMVZS>o+WiJ-7;P znjpe$LJ-gUG%cjAuZIi1p0*Z^RH^Fu@7Y!cS613-cpX_~5~8_D>;3YxO`1F;u)jqoIo z|1O!%aSgGYQQO#^>Q!)U`I`UgQzv3Zf5oPd^GN6pkBaPNwKrWoo*Z0S*s3hbKSUDu z^zpwK4U|1uly%*;jI)wT&mtWwydvCFhB%y;NE) z3=hqe84BZ`x;`mK0my9;>BG*x~c}B9auKKH&$PJ|G~M7>usfnNI_Bs zb7Grn=2RAEF@QKu;iN=M-?;&Nm7Bf1T-p*^bL6`vo`!tc6iLn)+9aoe{*bh1y@?H zRo8gC3K6y%0AIqfQ?CuA^Wo%+Fn>hnXJOO z8U7!${m_j;Qux6=(;)q-5|kp45j!y2DJfZg_OtT54law+*8&Op~P&I=e* zpGd3N;H5>TrW#7rzjV)LM$%{IRuq1Hxr`$LCO6Xd2;?T_G5qJ*0ihu=UEKMCB2i+I z>?Is1bp$QVUAr9J?k(ENUquyXjXR{V#!MBVu(u?B^GV`5HPu#z#2N@oyC{t)^%PE6 zh}x_hL9)c;xw6niAj99lJ7Ecbb0&BOz&`c zotMhqzE3as?84nRXUDgFF^O_7D}To>$5`01+95SdPmBvZ<#a5->xrq{4|SCg(}USA z?UHySfUHsP{JVs9Mo4RvAU8*lt;QzkF3}M`hYRD1-?fqpa-=3o)jpnKX|S>{2U!v$ z9-_uuUp#$hW@G{41Hh4NNgDoBq!bYt(q;(ieI$NFLe9%^FzFVk=fIIdGS!b9D}}ci zh_X{qpaJ?(iX6cRThx(CHB=C=j+CyZ4g>ba9ybH&oio{p8kPUEYR7s~#$CXkJvUTM zIjA8uOdFZ680`_p#5B;bjC9ulGMxk#O_I+U|8FOU`Fw6}#E$x^9YX10QKH$2WXHo} zJF?_3VF)TSi=z%@FP_6aZypmgDVpKPZFG{~^%aT@r zA*e|bP-`=V3&Eok&T9;NUixdEoCKR?n{z7s&cEI{4tK?)j33D@IMPu3Z zXKu~8I)ZF5fd2Tr83IbmB)CNoxJ^Lv(Ga#xf_@m`NCM&8KqQgl>N?P@L z*mSPO<^3=``P%g0d22rmNs&BNB5vf-QBO}=MhM_wbBgr~p5&Du^YY>u z>bectdAbx=rT{SSitzKLJk>&WYG~mYsMHWMB2+9OCu!2I0~BXri4SQm6$F1>*D(4VkQvBE#jjOQY85c=UlPX`tVpY&(FFoMh97GW`fCKT!|9F%3aCjz5R<2f49cMZt=F zb}eQn9b-}RCY`DqQZ+p>J**Aq1{bgO7Q(xo;2`0q#-wFyCCE}Ct%b^(Ph&NAQBI@O z@JXIV<~}b6_UVJ$MiSoL;kAvNru>;_qlb&W0z6rQPZw#g9yT&qzTe^f{sDX`vL)mq zx3#wU-;LR5aE1VhDpmdQ0c;@TMWm@3wN|!Wn)+dpNKGmNNb3PICWV5#)~eM|n8gv^ z*Aw^GHju`FRRZ#MDOD*XZ8MP766#uvR1Fle*$^K9J=B$ZC8Q$tIhDaU=g3bol9Ryn zT_qBjMGH2Kv7dg9SzU#~`k&;@l>U2=cA z-(vX4x}6nl@&a`KJR0l0X5BKJa7sv9C`48R)FVl|oF=Zp zK=T&JX!GodfH&jZ&HrDCJ9DLsy6;6<@*5AIqZMDrJyv#+OAY_uCBIVYX_N|UHDnh- zQtMUrCJh4k&eME5V5ZoakBqUG=kzjk3?mP2fJRsuHN zJk`#sKRNon`+Z=B$)%=W12ViXtnn6h%`5eJeLpLtDx=AS4 z0oa&{IP!HZl%OVe0sryA+(~jRKwh=n6B7>9N=Y1?ZM?PMqfnrpxY0oTyBAsAMU3kr z=1HKt{|uN&;FL;Qlt7;9;LSUJG7xGHZuaW)@3mQ8^yKxl+0gM`|7YEPoUC9;$GO-J zuera^KrWZt!=aL(t|8dAn7py`*v~j23n%nWQ4UFHM-_yfh9hHFcx9pR!cy0I(Uo8| z5{&=Sz6X+Np#7b8W!5CQ#4b@l+9V~+H9%qwWTak}>IE{HtHS`8sDZro!>lf15e~)( z3G1&AO$|_t1o~r+siz)d7$8d&XwTKsMuFAymawqg+E)Oi2HfygZ^=B0VBLKvYWKq# zxeow{4F&>mAPRdJ0zEXaGY&ZeXv@m`3ptnl7(7;xfxLR)6&(N+xg=>B zq)%b0fgluQEC6cH8fZTy-xBg)T}zx@$(GMM;xi67X}~EnP@6Zrkc#?T-uCt=fT`*G z)l}nQOO7&3_$%MJfF6?dH&Id}K|Yr)57w`oKM7}Y9{$;qO7<}Op1)7p2unwxGbNDt ze$#ZLSF>T*FS6CE91p<+OY`?1s+~7>nC+@NNBeo`zY)ShzoIA8uT-0m6d`qJ z9BCHPdiAuDkPl}Jgm9F!UlVymV8<@P-(3Y#2f#kgZ2`gnn~1JG_E z^*0G2q{{G#Q9b$#!@|yRfTYk@CzOq{K z%KP|8{)tzsn&1(#-3EI?ih#WB9AODcEm`nq>TAM+X+1JFO@HJ__~uxP{%@h)Nvn@2 zK65mODFiLiJRbwGT|(^|$v8glnkb<>K0)3iI|v!J9a&w*?8->HH%{yNufy|G%ZWFq zo&bzJ^d&DnOxf#NJ;9AWUHbbTctA&hVfE~mQr~*VH zonmL*;i?w*1&tAFThsTlMT#o2Gc!Ik_ee#pmG6cLL&B}fTD$q10q|oO{i?Sx`$k=Q z-jAc2`$k(Z>bZ)E+w@sJQDNEp5>8DORtck{oaEncJ5n+ z9UP})NiG6OmE{U3YL9pFkryQs1|3XdPP=_vJE2)bSYoz)Zm0gxGo6xVIkCA%?U-_m zZeho@w3lOu)owShlSl`v1_Et-3k-P4Ya-(8F1fy0q#W&v{-Z6v;j|KJdFX4Z#(Y%- zQ`DDj0E}gEjIufG5V=w4yV%0YM{U0C&dHCH%o-d>_sw5zAB-7*P{f2}7O{NoCH_8B zt2!rZTU|(nXz%Lbj`6z6+C3p|(MM*~1ECan`4x(bS9YOF4encG>X|#5_6io?t`h2TG z%ra4(J&~2%v$BghIZZ9sGUUI31YJ9%U`ulD7*ax1j~ zSK1xJ;M$;i$upBVuVl`gpb)&zJik;X^rmzh4;@a9#r;*x+Nw4Zb)xl@e42bG*~9M^ zM%?VDDbCyPYtdk0zv5wx^$^G_(!wz43xJHFJF+&1GlZfTBTPx;KICK1o~bzQLAfBY>q1n8qk3C0waDulao8xkMz|vT*;+>|WwgyD-PHWcA5LlQ!WAAW6o|&(! zQo0F?73KpPIJ$lSEf5fbg8NJ!_Y9&&X|A`_09qOxh}kG%mL7w>N|)QAqH-E8Bt&-+ zJSB31gZlr~J%DIqOc+ik5Td%mQJW8yLf`(q(*SxM=}d|_U{W1;toQ!f)z2}YTfTr> zFKi*B#=yN%?*-68=_xB>I0J%n9w2PAk(?|s$wh~ploPYorpHb?aGsHmk4R*9KY#5WJ2;PPhuMerX*Z>`>aGb~(chllx_1 zu$e1Ht&tE|T;Xn;_I?MoOinKm!qKkF9qnJ!12bFTVLbb}-Qnw^tHsCugyvL#6g~DQM zGDMdyCI(5N7Wta#cPkHlI>Pd&(X|kjjh}7H+xqaK(B!a!s_WNFsqy{OZx%5$O6s5NuysZ!#-5kDX%n{rq`-Ef zJ0w%VVnq>VZ15kdLc8tRBcuqFr-`Sx7al2n@7nJ#m{lM*K9sp;13+xBBy>iX#E7ua z(lQOkvhfcDd&432I(47pYLF@-H`CHfXPUL?)o37P_zDlKnmRCb4SjIb|f?SCwwL`a^UKKfT9Z8~Vw*yFY3O4l#Xifq@sG#7M! zI54-*WP(j%mubinHo>F7xL&UH(INnlGbW%UavUTOZ>Tv+x#rxYB;()xGY~z3cip z8xQ?*X?gu+8r_0Uw-ZBwW3Vx4>&Yf8HB6T;Bmb`dM`6{7BnisH3`);2*kcM(Jx=S`LVmd*aaFRNFo@b7p`JF>h7 zaa!43JS%v3v5s33ye#uKT;E(8s|eKSTqlB;87X1!R0(>5C#sk?1t+5A{71@pQVt}g zvjmXGDbR5Ua#Vxk(`$P|79(b9%EZ!&r~oJsLo(Zmi!u5BeSmiv@cPv3Ltg!Uy|cHq z`W`;sb(d_UnqpKGN*GhM#{ThnrJrSh+*#Zek11vwU}VdgBd>i4Z@6tEL(Js^h3Cn! zHizQe-F;7$i=|%1lEJh8fC)v>R8V2Vv7eDK$UlE{E)M%#MPhJOjF`AwOw{-iUF>_^ zk2Sj8vUdvVA2sAaaHZ?sSQ;KwP?NJVu+s=F}@Lq{-cI*h!*@!Q?G#iQ+MgRRAT|6Lah3vES<7dF`H_ zlXLv|-r1d-Pw)5I_0h6w(Ye{vd6teG8ITKwM!-T8mZ|TUHjREAFgqcC`O(pryHFXYYn2`4Fv!TYBZ_i$)v+9flO1 z0)^*P@G>BF`&hZFOtDZ;SYR+7aefaKAYlQjpq`)j?EtfiKJg5*sshiw!;G{-W7j(q zH~w113R#5{1_lZV|IeT80vJ85)m0Z85ABLk#(is^eLy=qEM2qc z-b1ZP^Or~~g&W0&|!!>{VTlm}|I8I7jTSeN^etS!M(iU;$+9dmszml99np@pt zO7_{ebvAdDES&ljYiDEuRVh5{2{->KUjisStDsNc%TvfmvV@R`E9T(}_etfPc6;8$ z-qM|*UBMp99pD^HU2#YSTO z^!Mw_#&4NSUXT5B{YkI0QvklWl)tw?m^(`i$=T#R!v`}5w{$CSDw0-+Nt-80TPH}{ zO4GJYRBjssEi_}7?%#HJp5$zNTX00CzRBgAMAF@Vtn<)rOXM3#|Mfu{}* zV3co)1mRG8fdWD`Z`8++-M&7}q=BsC&hEL7++W@T1_sCmd9=*YNw|Rbc2PpvS@Liv zF)$*fbJ&0Cq`we}t0D$nCsl48+iIx1_qbBahIr~Fd<|&PwPgPF$LrqSZlkA{7yfo# zF8})h?BEJ5&=69mke7cUsa44dRj{!ECbD6=2}lfT2a}wF{^oIbpl&$Mt1e%3|=u=^rT=+l_Ur z3Sd}bETFtf1r>g-%nX>dKQWGphtp9-Le;(a3FC{F8J~c$#fu(gj?R31Oa1Qr#{HvD z5OcdyB(rC_+)0L)JR428)(IUvi)JXj3zQR%&2t5CxCU8~Hdc}Qc&j+u=?Q4n{@8gN z^+9>{{dwIVt}U6>5HqtOX2as<(&OMvK`8ny}I!*E6!Qg!sZrb@+_J zEe-Z?Cq!Y&nE}vDwbEUUL~l8QVWwiT9tW~c(jlJo0{Vm)@ss{eE1Y;9mWhcES zMz>#gKX=L{<(i8R>YXPcZPAk)Q?oa&&TeaeJhqE^e?RrU)56DR>@6l9w-n9cRq5sp zDU3ein^$!{_e2?%J0sT;^Cd<+H_ZQvO{c+8UC*5`69>Jk(S4dqiCW1R!b18m&eR)@K+a&R(uG6cO0YMR++0s5am}lz zpY}g3S#&w~(TWp`o_@-)yjboNKq0*V3y!m||L#tGku~izk!9Zv`*bP!{*_y#`J?Oe zhw`$m>Wem2{5ibTo}Xs_@No9S!6oxo{nFy=>i*U9@GiM)ec}G=qNVNS2|#&fePlkm zELl)KPpz0WQ5}dwLSwv!(jf{^g$_aaT?BEvoIV7Y8GmKLL`S&kB>v3hf~zwJYaTA%RoIMGhyXA;~~_ zL_IOEUVig6_WYjY#p1wbofBEd>G56fQXM)|kmmNQwBJFvpj5i>z zLu*_oj9>d%A>Rrke_O^LA|xz4!wBeKmH2AGp2^FP<7aJsOR1cEQub|S_baZLA8nw} z_(}9){01XU?lETi2OO71Dic1tyn4d{GIW}yP_rq+8?s&6-VHY`DZQA#>oC=#C(XGO z4iZ4t^~&g>@@eOK$h|<}j8!?MsX}B=|NQDRzpJ9QqQIS87=)_EJ(18cD6}5Ab4Zmm zDHo?@i0kDcLPBnpYX5EQ%ih@UjDypEJ^1nazMld&#rim%S!MQ;y$KB1jgYm$b&EYM zjf+#2&PLc{Ykp;}yyFfn3kZr?pLoJS-gMzs&xXm@KW|%^nEABk)#JwL(p`(_r7HAC z-r4W#KR(qi`)T-f+&j*kv~Y30aXdq93g0x?O#`75AfK$T?E?7)--nxuF?MJe%CPbz zzUtxkm)BI#mh_Bi5vIn$zjI3WvW;@UC*RxyLY7xpi_hAF_BqR$*0a_X#BK6zkEyYY zoPNcA(EX?6fLm#utI_$Or!#nu+=nGtST)&F5U_tzs!)kx@6P-uO9e`;ChDL;fq%Z`4* zM-&vdx?&a8HdCV`n9DF7VP?!^T{0^`XqF~Rzk3g1t6rucKkXmHk@?=Z0x@khgd$O= zvnnX-M8;F8b(wC5t3>Fv)8+h+XFFY`!J=@Jh%){a&lcHugoRhK5L=X9j^UYW`gkt; ze%_xtnr~kvfSNXc4D3TxjcbRk&c4R`S*OQSD=dHCKT^Q!V=T%n-tb5_0?GMbJ*89~E3Pd@1i3m12eixcCdd#cTS^dnI-YF(ir z`y=g&ojy0hIXf5@r3Mkp>yuyzU0Wg*k-UPRiR^ZZ$EN|JtV0$#v!p;TPq3BZqMep$ zQaNlMJP8n!cbKIjbKYN=jm&@Fvpgj&-rxY6h=nt0vH*Lf!?5^yRZ~E!L~c?h`$*W| zRGPSR_Kd$1X&$F@qOUq`O&w zhR?QJ&i`~t)v`M>=n-$#*dDU;PkHB8lx%$Q;q}qmpmxWGr`njNjh%ZBA1i%1?AE{0 z6B&Wcl}`#?r|IkNw#Q~lA(+XCDE-4(0&J-WG{-1 zO#rSVy6GQB5qV|8Z1CMh>t6}a?Sr2qQu*|mZ+|TLdUgNRIotMMc5-?&Zp78~jL@BC zR+)*wiU?h$MW%3kz@?@1-Ojo1$^AEGJ8wTZY!Nx|@NP!h^$2K**VRCag22(te-0gg zd2riJ6Tp6l)5(#F_U0hhW1m*b~fg)n_V1o#Tjey~yS~0NJziV1Jb0_)XTC9}s4Cq&|$1Csoc#(-F6phR^7d zmD|PXOp*%vojzhJW~mgm6Dau52RSQF&h#_Rnw=V?7if^9K1+VbKVtmXJ3YRC8(GwS zt9vwSkol7yv}w;$KobV;CVdP%Z6~ewv}A2MG!^a=?B$)2_S#fpP<*cK0tsp5Zkag% zMQVO=aud;K{ml5nNKHeNHV@j=rSjp54n45p&&k#AYC+9rK0hsDC4QeVzrg_q^O~{r z+ou`YJT4T?RnK6AlHm*IS4l|u@2WadfD3=2Abrwet zKEAf#nd#5pzDzqp+!_=rRfqd}>Qqzzp$QkxEo<5KeE2^e$z%{cGIQZMR%i~?-r=Y@ zdP{9<{SHVKVNP&*%GKnKRj6THXZ6KE)CX!eZ?s8wxyU61x@V{V0K<)dTT&?9K5aKJ zkp8cbY^?mDV8$x@Hto(ZRVcevAK@pDJXH8iYohJsu{ax76mujV7uhKX@BRqJP$o+RQE#4w9RlAUOVw|Hc{sL zTV=T8ip|Fx=8msec$6u`S7 z^qu5wZnQ-b4O{)@PWPGW124wqutPU&mn>&m+AB=My}Xy!6md^mMwm2es#o});+?fb z%)bdM;hUGaoGX2H6EUPfdNV6BV8AzTgdno9x44Iwdp!A2OPFVXyj%ypW5+AH4un!< zI$`N0Cva?kM9ch)ScD@r1MnVXzpJ@lj07$W2!$Sgw@7`JYw3aC+q`s2VNr9rQWy>r zt?Pe*`VT`VR?b znpR&ish@nm@vWrNw`5gO{Jb@r1`Z@IicaZB*1-HKAu?O-9{X;R;PLK4^|ZZ>@rZy- zdVCX-PZ47|zX=B&Zd9j+1w7+!$K>WRuN|K6bTo}V!t;l8M|3GVU?~tqAK;N<$3m?R z6g-3LWE8Y>5G+bROD?bhYnX8@-?AaMR&5n%bhyeSUs6|}HX|fU0T?KEYx+?>8#m`> zv`^G|XfUEX;Ngh>)w}BIyoO!LWsa`#D?*-&_ciAst{(E6Mp-dGxzGR5JH~%`8A)vs za{@LB`fWR}(l_N;+=LpD%U&PIQS8w6%{`%3y~J7nj3uH-!q?Iqv*T?ag_JdxDn>Iv zTr#DwogDk>ra`MS@h_~~nn;5ri^rv^34*{pE155IHS( zsG()*ZENc80adXL_YbG#L?FOkLZdOeB=>Revyjwzw}%p3aR4J^aBFs}g@skDf5)yB z6di1wjC(_|DWpHL6863ANMEgQ>L*;bd||i1_OdP7H<&vbQt+rv@Ytz7^s=qugYB|m zkGsQxGp-)F&TEJYC(GGA0BaI6qxYGIgDyp8UUJZ)tk1nB^n>PSpCJ)Ps&f$cS)(FO za1|p>W#k=lEOpz$`{+3UJqIV|U^bH?Q`ZPkFw#@^m=_RcBhbw7Ra(n10@rw2fb4K` zW({Fc&EPbI5Ap&+DRqRWzYM|%4kxEm146BR0U}vg)4Aj6er#_e;0W~bY(To}?IPXn z+JX8%y#T7wLEcL#{ymya)shRIJM6eABRn$6SWY>u<2c*=H+CuRKOyh*^Z1uf1`@8= zI$jQ%2y2^ax+{QhKkjpqHBN#lc5$R*oi?9%Dmqz3>v|+?92eCsVs+_Iv94uA;ZmZo zD;Hg!7hi%{X_2j53_wDKutuIqyWMBgioV!CXbXIYLqnj%Mo!WdMnd zA-xeu9Y!3GdmFh2$;QiuP8+RW5TX67307uqg$7zmaMFMPL zsCQjx%fQlgkBQUx!B$Js;0}(te^f{YNRxd=Zj@A!^ ztZSF8%7ok+XjW;T3+_B?GIVx9w1qmG!n_Av8jgG@_PNDwTeo4Hc3W>ZqK~lu8|)w|<;b$8o;b=lh@AZvNnQUANb+>-ppU zFb{{pXyE^fc+;TUbSx86+Vla1x`R|MnCPU7yINW!L;xAu0W<;K0V3L_mE03lh2U<% zlvaE(X&JT=iEl&zU&Lgl4Y<{(bKqlroVC6{f%mW;rTYD5Of2#Ma)S2#=&HFN`i^Tu z>E!@LoByHmBs~Z{*j5aT2fLyU8b{C8tz>2f2i^kT-a1U4L~Kxgqov@*UG#rOD2a%& zEs7$@Gm+^`xQgB0|7T@d8FZ9!1?v`I(+Zllid@2BYrdG4&815fnqin8qwo-RV8c~2 z0n-Xh;t-=&=Mew@-CcgCt*+UTVW}Uls^v5Lzq0eQ__BN;5}!)0fFX&}+o`r@(?I>L z_(c)Cp#Eo>#Y|w+=qcaM1^(E<%*1X>d+S8~?E1uAds26i-ivcwg2Qp0(;|YeXz%Y9 zbJ{U;COc3ZjMpx53kvpYw0CRla0>76k*I9if5*e5nwMgW6uncQ!l_N=gD80VKD{VA=Ji_l(onN_*+zmTvl#VF_W_G6XktJObzjgiC+CmnjV(hkGzQLCf@~JXwWVXE~68U2;^;5GF7|X1U`C575)iW(-EZ)TH+b$pd@y9d__S(rRkW$ zwFUmsVQ}UuSYz(QaRY^vf=$6#cf;t-kNQ+Ft{O3ViJaT@w{#!Y4ZkRvvi@#Z+|Me0 zKl=1n^FQT(Q_1LAaztkecl@*dsf*w8fi}l%{mDOKDGoBFuK~LtG-EoX&%BRg4b1Xfa!d*hgbsbyoJdVo%fuO3~Z6-z~aQKnX|BwhkYI!qm6ZruIV= z@&HOvn%~mdq$^F(d?G4noi3())tLt&E?;$&+EY$ZXrErk;^eYgbd(`QV`><&xEF;_ zw>Kj6DVQuvn;}=jHWH;{yMmk>Lgo)tt`2?3XVwlEzF)WEmsdTGdo5*kI&yr+#;(w< zZ#HeI)(`le3aV3}s~yg8aM19xtD#{Fzny&Lc0To~!aRkzE9jRmUJ`NaU&5OsqWt>} z5goi!9rkLa8wcZBD>C~kV!DfLyF^^n><+>@snHKHo>X!7-a&hz@=C&kcc(h+hS5K_ z!zNn6%2o(16{bVDn*|T<$dHCzh>28WD%a)85tl9<1*b5{0no1imqCEY!bM&9YSID_ za7`51_MqaNw|Nfz&5E zTDfpd$7F!=6Zj-Gf02lZTi<*ZZk?Sp4`(_FK&=XAj-Dt}un~+wB(NfkxkG&xAs zm!>@fD607@s<& zZ~O|(@TFAaGX1TdcZ;sn<}A7Md-3<;lOHMpN-gYEJjZUvu!MY{b&`L1mfz(+Vn=*} zRe?jn3r7ClO8sLp@6R2I$nD(cqVqQNgFbmaIOstnyh!xL>#Cn@>0z^{zSF zm2Xpdn&U`_(%zuA7}VReD@;=LCH5V>VYm&+phk=A_rM(;2eB0hZX7X5OuJnWHT!K% zk<*Tp4Cn0+-kIk$5nQgthv)5y)a`$1k~rlgy-G@0yaP7@Tl%U3Ww2$f(s>-9@;hB+ z@Uro^3;D}d!|7Aug7s~ilQ5}c5sENJq4rQ4Vb zAI8{1;pW-8^Iv`vISK}(XM9vRrDIZ9GU{aPcx`u_FoZmXcCa7B1?AnAA!>bhd;Cqp z_JFE?Qm*}WaLJmoCCjd_m}(@AJUH^2)RDg9%CFUw37yp#h?l_-`dZ!%+qVJ3q1%pc zWl*Y+l4suN#o2RE_wL-iw6DfQoDF5m)Vk>HI5T1%r-N!7|JEe^z_^nt%%eOu7!z zrdDIRFRiJn+hRW5|21Gk#v~baIod;gjC!(fkp zX~j-5{bridsn#Uq(R$Bxj^|{T(=GkB;W47Gh?}9v*bm=M1TL>dSRn|ZwzTc=zm*FE zio5l!!kP8FAWCs$YhNIF$) z*%Wo;<;Asc`)wl)L(gu$`@fpys_WYyO89ZZ(c8K|T*o+d(|Dfg2HBs<=T=?cf^zD! zgGp9yqzM~N}SxTQ_;Uc523*394DINDLtMgl8{3zVy}*Ou*IHyBW8rsgpB>IaDyL$_mozw%*a(z$JaX4QM%Nf}m9gwdaLScf~~ zDeRTFQy~$v{KFm8@y)z-`OfY>V|~I5U=TZdd^ngEFj6pf$SVGa3u`XE$N9+Y1={3S zq8~$M*Qmmr_}EdJbHC!%2G7mcYXYX?u91{qRs@jwPfAr*-aAj3qD3RkExi9GK!IZx zzzWC4qdaKcVqrtA%VK^AW15ZWS(qRBv%8VBxjmQ>0%S_cX2Mx#oR0b#((Fov0HT7z^QjrT3ZmdB4zsc3;Ob2>}rekDsWq0OmcJi^Ejg;W>O}6aO{U zS3{kf05eM4O}C3NV16IvDBuQ4g1U1ZgSHaZCFJ_nirf>Y^|RcHQ8Hjmd@?91KkBhM zUi-Q9>{#%Dc`3nkU!G)9?F?!ysvc0*r~~?Xwifk)JV zTDSxQX#njTBIqE5F*ZhQfTtd$0J;3VG6Kn%ru?Ztgi~7{t}b?CS`O;~e+_6-TW`~= zR=L)GBH5+*92==H#AM4@_d_4hV!^V4X+?i_)H&<}q8Gx=xT>*VtAAN;23=nk#Xz&EEoyGM{xJ_AZxo`IQ zsxzwC3J01N&hu{O5eb27>8ElCPNllJ7IUs%6rYjCmS*vKYS!8<<1qp)@0`Z3QH1Ne{PQ5M* zg`I450>-w5n>V{Ji5?q{;gZU7VUJ2e_PB`Xt^rL`jC5^6u<50Lh@1o?Cia}uuALi( z{ysmqe8DQ(p1rAx7gI~;{=71Wf4mvP)^cO77s9yTz9YPpUi<>IweVb)e@4%&YK`qa z>mUbixWMzC^(?mj%gC$>zwb<7F7;+#EZ81@(&{6ri*brkGJj7iE3jIzBn#r16GFtv z>GfADnf=}S(rrJr^9p~KneY3Wb9~vlgB2wiDUXYup7`7MQu&`NmRQ^Z{`hof$DQ{P zD!X_dlM6FvU+r7BY_frU1-i@_lbIxTfrOW~IBuQdU_`W%b!~=wmlvK{x}K{Dw~>R~ zDh*;8WJc)zsG@iq6bsx(u{2>n*2-uTGw)~2X?JJmwf7=M!gT{Tdk}Jd=`ZGO8hn+T ze)q*-zIlt$bf;WLf4;0Raty?pa%5te$403L!8@lbaFJ>yvsH(8NEcD0Iwk9t@fJha z>FV$}cRlx*)zT@QPi>i?x2^qoAV;-iU+crW{u{;cnNP9I7;<{GqwR3>MCm^l>G#fU zB{q8`-+WQBg=ReVGV;#%vQP7O83vDG!|v7AMqE@lL08pIKrbsmP;t*MASw>h0N?`NslfBpu_q zyvA|C?kQHRq+>kR=KjJeJ@KF932z#hg5!#mk_Fkr(4?O@P`8^lzC zR>u8Y?$cEA1gdqeF%pK}5ac?1mIm))z;8Daw!;-%4AWth6su>u7f}LrmbIhwNWJCa z`n%BVW94sVu^&*A`>43nuDV6GT_0p(wW8hR4!J=B~y8jCzl`tX3!W}BSIWG@Yabd^)3v^>JK0_RZyHU z1GC=ctX;ATTk+S%w23+I|2hlz&lb9e5MsH2lMHBb0oy&F+DP>;i_RiC;>*PoYm5{A z>W?pJ_{|zqu7Vj2V4{J|BrkG{kzUNj%113RJmwnd(mfbrO6x4jA;Mk6`k{tj)A6Yr z%xryA%RhAgCy$QEx+~OmRjuTuMtqCl?*()I))>CP7wld?K9P^qWdLLc5%H@Vg|dN_ zBa&5&O?`uzb)%N-JCT<{L^WlF{{xR@zOjfzetdD8Tp6QV#*hpZ;8^Hc`c5vd+%uLFEds6pq zm(e~OLA#6~yB4rR#Y4?Q1_ta3rSH*9tl>_?BKVQ>xKPBxS%-gaWO^!K6u|J-U2xY> zQjM65Br6QKBQj}A+j_#ekA(j&UBtcPPjDH0p5?uI;?_QUmkai6;_=TsdZW&x0&*?$ z#y1P>{^#{a%Ot)|PVqZz_lu15((~^PGE-JPl@wz#r8}6K7IDM)>>FUht>+y<$mn)< zx|>Ne*W@iK%*iNfz@(LXQ){G-=-B8W`k2h`h#7hr7=UPoJPALM$0R)!wsy zc^lI;Yu1;PTl+i*m9cY+Hvd{fas9(T@-kxj0Nw*K*U zj>QtU;v}0culp^@pSNe+|BO)b`|!q3U;cU!%JB6Gjtdj{g!yd6=Ns`sT)Ukb3c#ap z2k4s>Da%GpV4g+1f`#ElmKf<%i!B$AcG@K|Q4o$aT1wDEcJ(DTATv}=i_}}igRJK% zR$2ArMueu-P>nD?+BG&~tuWhBm_74)olrLKj#+Bfz(C)HCCbJ4KrZ&}qq)%6X&3Ha z{)}lCc=f&4+?m6?pR*RC{D`Aw|J7LB8so>6#+Ut`6Ppp36pwM{;#yDA**s4V8M#V^ zjpSK5g63QWIaT%?L$@~lA90tGTH{PKz0YY>*u3J|u`b)JW_ef2JYJR9Eu@F-@b&fa z@i{^FIb$C7Ba?`5>41pvLDvvp54FMO@%4hAu(|gKRRxb})SRr)5O!$@>9RGOjM!u& zRt6BfHG~Qc9g}1wzGor^QF+fKzG%*89rfFzvfR$;d$R+lzs|FE4eY=0uK=iU_rOkU z*&D0D(v~i9MyER*iUZvuFwFw9L?rg;+aM=Cl3W7P#TizbiQdiFDBWObKknYZnJfSc zQRh9shmxF+M>KT10xJjovMABgEY0)t7w3?d@4C>J|iW2`vcJ%#EJhMu^uC2zpYuLKa3~wHe2Daq%fSqFqypm+b9SHc7z72I*n~ zBZSOFbJzSfEkyXge4iT#Z~w>FZGUf!qb%_00ZNS?H9OtX$Z_jU=O=$V=ie9qaNuKt zYzDj2`F+19{;BRmvUP%-gEFgW+Tc^IQ@Q!pB~v<-RB2+R9A7hUG9(B1Uckyci+elG5c z2Q6YnmhdRbgb-Ta;gT%x}fn!6=4md#`^F6EMCFmW@m zVRZ}P-+@`1rGg!;2@Idlt#9(DN9`u{_BPj7T#$x6Vk5JE>?d>Vz?`r+nQy=f-@Pj0 zB%4qwvznbl83W8E>8sO>)KDrbw%&F)$@=`LRW^^+z(w8Bs%?rYJ!pOkn5r`7)pz;U z7_po;Bo;_pDa+pa+u^LwF+nPSB+e}?e0^YjYT&L7w?elHgj{ev z1B27yp2M_O6`-Hutc}4Sfaq~f+6vvSrn#hEjVXVE>c*qJ(JcL4Mu_EE!FonTy=C*K z<;~}U*GVU*ToR(Hv%yGZ*KZM_3}X_z=!RAgP-#hQaT5E~EDMMF{{uM3 z#=WTZ5M`7x4m73c7&AIk@pt_mVGZ_4Sxx7V9ru{o!)>eYZU3+99l`Hi*7x$3lUh;} zxB)S(;n6WLD2oBmNC%@XwS_rMGEPN8M!9{B?^xZ+%rO!mt$U84*xjRhljh%B;2vQ+Tqg=5;nmCy|Y z%8)VKc+gYsvSTs|D+@{m(DR1wl1oM%|2j_1@z>c81Ba@FXGm{+$~o9ni_;Dh+x7 zv}v^QghhSy*7uziB1;UYJ+3$PRX@i(3SJN0D{ta2vu2coR!@0e&KFMZm?-;KMAss9 z)b{cdDy&rI+mzV5-?|ALzLvM@@VtQOMA+;tK>k@wBK4X`lFVU|SJ25USIX9BNfzm% zo7j8Wl9}gKH#LwQhspg5dt(xtp@9f`!puKZ*5f->o#oY?Z||5DF8{u5>5+nM!RIcl zID98;Pfq7t&rGxTfoYy)#6<4@U9qJ|{(P*uoJ(#7Ttp4voyC~vfN0^zLG@;A+P`-O z0Ghjq7_HghJ9K%r=!hi4s%VjA1CQ3Yh!}daZJV*4vC;b3wT9KZ_w{6Y>HY0La7kId z?O|Kfq}<*Ea4&i4g~A$9WREL60BpJCd^Kw|Ew; zFq8258jO&vXl)x8!YF7C<>=!ohVY3*dx&#eUyf$=*q^)Oc=xe0iA7u2mfd8QjP%})+GG18gT9v3_Mm`^$IE(N9 z3og?!8Woh{Z?u#|%h2MkWvy0`p!V!2Rr_)H?49LH#|f%!=)+7mW9z;L4EycT_Tduy zudvw=TXd9ykA-ph$F5x->HTMJ`-`dv`yB~L7bQqXPXE(lC9!|LsL#!a)zVoQkR=X< zrZkgtMV~C~scYh(8m=j}-bAcmk&@m_15B13!GAY7)<}xg>^7K~9sF=$&c$DC&is1I zrx1&8zVq(&CR(7=xs`SsdW_W zGp8>0 z=>&(ig&eB=oTVN4YJ5KX*^A%nK0c#y^EI6E1{p;`FmoAGD@P$z?o3WVgP-i7n85d+ z-_Y!Wwx0;xmK8hNc{Eak$;99zA^SO+Dde+*Sp`34HEu;jGGDDpydG{@$O|jCHWiG2 zy)F|KqQj<6K{?QsX585r+wj{6&+WIftGyC|+4U1ct#0v+1%ZDnRA(4=T_wE=7)xVo zjrF`vX>;)EOEwOEo9IsSFj9XoSu$D zyB5qBtx0#`E3rkaQd@iX@f?l#BjE~tr(VF*7ciFM>MtE2RmA)n6!^#AxT4cbtaS!L zq@ug)K-3510-#+=XHRakKYM}hO65RiCG zhzk(HoXT<;q3QJ@O#eNt1iDMkZWse`1$vTeF?{se+Vb^cla1Ou5tfkx>B$XV^}W9D z+O~Ev`uS!BZv7)OdI{~?!&6^Q{91VN*dzF0lGf|7`)}Hl>QJ{cjYsYX41+e=@2YWs z?EGF0YuFX*MW-*_-zFj6JzdDk<$Su!KRGUrnkCJC)$`&cVo)&4Dljcv(q}#SBCVep zg16{y4Y&8nkc!aOs~mS=^E9KIxex8<;%D_=Nj|u4UEQma_~U!-`?VzT4a5_?5F)akHU(xAe!=4v=-iByeXq|TbPpCSRJ0_&23)R29g z%(^YnJGR8f$3~IHJtI6BZn=(n1e+=R&t?2kZq+B1p@koeW<ggNgc)liM*2_kz9&?gTWX4mT)W9 z@j_yghinwM-KV@jgn; zbZiRrl#7nI2EsU^^#6_SIaT$G|7x6bSk-DPV^bk)9#@^_+RfC zE*?!=*N$~=6qV-mb=p1L$=de$n(urm7&k}ve21NbA4{?*?sdS#_U?-uy~!`~WUQ@LWFH^Hm2x^Dop*gyJf9*!Z8tZain>q(()Vz&H)n^~ zltTxEw?Mx~*wwcJXL-tQob`wZdUaE`ZOP@9#oJb|FnJG$g$)?7j>TaFfKKmU@DqT2E~@91v+5EW=uXUQE(y$8H`o zOrZ3s^?I1jS8*yeIA@!Bl7wZ>*b%)8{R$LKn}`XR`Eyxq131cY-Eag&B%@{ZKw@1e0p zVy+dZo}fa94HJEWIg^t(LHLM6wVV)dd_~u4^1aK;sL2Tp%zJ`R=eMiK$)u&)BAH~L zxrm(X6x{q)u+r`D)yQRU(VqSIA8V(HcB>*k?x+5!5I-NRi}h+$1gFcWrG^7&F9o-C z9o{2IZ_>7@frJc#b~6bM2SQTfT=3pJeyUJjYhF%5p7R%gBWf0Oq(9=wJ3Y zbWUv9^|}K$_OOBMGsn|bthssP!MSxiMZFC1iYHVzDvU3a=3@Q=3YaL$TvXlYor=tl zcXDLj%G104lqsy-Us0{#a;GuFZ7WVPv!5gJsg{AJa55tdQ4qW*517^J{Pu_A0~+-& zQ*2IJ_6S0KULAYCc*k>2WmTweH^Ug$@Ht>W(chTRb11qkiRcN_MgDJGfq^W2YNhik zlh1Vpx6;nd7Amb@>B6kjM@^2wFWkOeJwP9VX$U`rHzr%uaWBQ}U_s$Uc~*f(1`m4l zyrV-sGYKw4>vw9kicvaAG?la6vR1?t!Pjn(qu*ptW|`b>%#m0$@v~Q6zjS+B{M}cX zSHU1|m$Mc?FnrLwti8)NM_rs6ev)n_4fU(w`2?Sk0ct6M%hKh>e&vxjBVEp~5S$Yy ziGHX(&rzz!y+mKM1Zq3#Pq~sR9Dv@JYMTFWO?vU#p*X)QOHX&w zTizamc8x2b8^NkyzB7IWo_kGk69)@U3TS~JojkOWEn}~G-I$TpkuHu8k-GEzs^V=?mb)g zCHvq9-8P3anxe3?W_zn%J1EF4A%H0#6&GZn-B^;Tu~*?RhVrsDNsO}zjTnLes1Cuh zEQJ_;sp33us0is@65=L zG?ZpFvsp^nv4hYg-`b>Rdb%X!HqF)O$g_v=2L!}WfSlJ#QUN4|oK!CL2$g}MhQ(P% zHbb+sWQQwANdqF}Qu4$DovXY?_AfJW5fTMP zVWt3uIPqIxVy2N0P#No`1JRc%7xg-$c#E6Tq2afk>+~z04VV1U;P{;#xEl0)rJa-I zZlVW#FAi#7j$v+5@eH^V6FvXo` zmgBhAv7NFSCap6g{yrCLKM9(*f?f#zAqRb(B1NI{Pb)UeC2(z7$@A)xj3hQct5nkN zxLg$Y-K7*CCU@h4zB-U4v%u(Lig-cy*M@D+CY;t#E9IEmI|w$*kIsaLqsUB;j2x~Y zRxiz}ocTY@5*MXDm?_Qbht>~CX&)e2V=ApRVgozi80T3{VKwbNx4i!`dJXBfU0o}( z$mQJx_w|MUp7yI5cdZ+DbpVo1)Zn*HlA;VrVFJ=&xyf#W;PzUQx|JvwkfH%%q&#qzc*`NkJB(DVdWBj}V8e+`>WYH^`2jKFiG0mWSw%s#mF!P-0mzm~ zHyn#DI`%~#`9CAwF`~Tm)v@NcP1n@rJ9UKR25eP0_TT$Z;Se!vCNcpdaNwT zM!Hed1NzD5MQA|VR@`obyN>~v1rZmiV=)Dp^; zRjovDjp!tnoR&KYh5`}oNSzwSddaah7OQ^xZ#n*@c>yWpI~&Y&-Ia}t-t8I^=q>;8 z3V;lNH-NjQA=%$>CL6JUAn2|NK-F{-1a`d!0K=+>dEQUtgxhmtv><5!JrSNR4nrtS zle=yCR!tgeuoXEAru55c?{$rZIkd^<8QL3eQ(QBDS@W?~6x+B1zS9QksYz18eZn#r zUm1=i9c+EBuv>N#>HSeCOGA6efv5Q)?Ah3BmvPqUqqPB-v;vpqhaG)#5k5f4G`Ob; z&ICNc&Dn~psKIB$M3s8;GMH2pM)H@YhjNLft!Zus&_m{}9Xx`&7EtfF1=>~rDkmV~ z&yp<{+4RYk0S)U&OFcnHz>G`SB_r&V5r1t4xKNo^L;T?8|rB!S-X19$yIm zLQVQWIw~M$z_kQxT+K`?*25L&3vHaEVG~3*QkS2mOhvJ0A2)*`4+A!bOSDA!R4Fb5 z$vCFN^FwcD{2*HJ7w_O|Q`2#QhD&n>aenGso@v+!0Vz*P3WchepRaYAllr?IcN{(P zYRcnFW1D@XJSnOE=-T=@-e;#`gYFIjbTtNLJ9oBXP=K}6NUGM5ykV~*eG7MK82_i` z?cb`OXt4jiSas)Grqh9#H*(x8N3zu4i0`MwIe2H)VzTq zq5#1kY$cW%QD*>0mx0eo^)Y81zq;J7Yicvqv^6+hSzeawtW#|(zH`^i+zlnCWG1uK zEhOOo&`p+EkE!(Pwzg^>0C0@y6gloOK*H_8cs&W%0{L30=>YdATZRdsP}U?+p2#si z0QsS==A%q+xkqmubm4!La_&TX>DeZ=b>GdS^hXBj5q0@Cb)S3f^_sJ|ctIx`0O)rM zI}f-~KN~ySu=L;4?0FMWZw4@7hQ$*3O*HfHrqSev#w7jZO=|d5fhng(NF0)5aI3OfFWxzfhb9;0q@3LO@qj&6#Pa`a5fMFd*jktuYTtc z2c#5BI9y{xg7%;o*n>)R4^6E<_~-fP)$K~_Cs%^nf4G(N-22q4`c4VRxtfk$0-{`f zePb)3i;NRQqZJ#R$bhX(KQ%P{^l5Etzy#*m$kO#?AgYx|bBPZJaJ8_>gn`jEK<@d5 zw}x@Xlhi1~UhuPb(c8TT@7=XDfQd=CZLP$8Gtdfq6g*7>!B+&~a%>o45qB^!_L#%u ztK8GU^4${v^WlAGsprN|juuv1I5G<}9{zv-bd#kiG$}B348X_;Wl}QIN-8p7_>^;V zrDU{%?XAJ~0K{2Ra)bZ*+uvPsp01juAuWtd?1j?~4pN8Yw72JIf4Nv{^7yFgg=)0l{>EJyM~ZmYx}mf_C4q$td~C(wBXkNjwRXQD0iNE1U&R_HF~=K z<^^Hxx!6b}kD1bMJBr4{p?nmsj!OxFahXWWlIx_ETFuvPtUlgeT2V4{e*VhHe}Hqh&*GM^lYv+E1suw(zvC_TKZ4)@1Hk42 zzwWKRLjj#3(^3P*!$>%d5ZCPv^N{oAnro>WauewHy*qBDQ+>I zbe>FJ+&M69G+FS_PlnZQ##-2selg-s>P&1$O@419AD<+jG~y)EYyA)@3m~%!cBtah z9V#EK8UY291O#y2ZSbJIF5ZvC)jq^pL8}}-+^#?E5I(~`xBC8-+iklGLG1mH7tIAY zLA9q8*zN^@+!^4*c}KJby81k6a4lI4^e};|J6ND@b&!j9PjQdS%JA_2)q%BpF_OIm zoc*Su*2&%N2j0N6H_#`>k_$}x@$p3$iUwXDwEn|>gvML(;G1B7#j5M8KmJkG-%-8z zlt=N?^i5Ca1GtP5BZ>CqvHKV58g!T&pCKTv6o}G0O&a90+jJ(|-rxkSxMt|3J)}Jd zQ?%&A-10|rXs_RtgOLJKx$cRV0rWN&OQR`!bvPFdX8p_C!HeZ>9u^kB*sJ#Yp><=h zc+Q@PwvgS<-eN3@oyY1xi&7$D;JFBT3^l28IBRHK5F~B*Bg(6Q4wAW;dtU~4QVeef z32H938PsIHQB&hbh)t8!p-HNWW7=SEYO;i8O8*4?H_6;WV=no`S<>_YqH}EM@8Ro5 zxV5=3ai`&*nMLpWY6;7M&J5lAT2H)Szp-cG7jOE93|ky5E!+t0jFIAc-jYvFl6jv# zNVW`z|N18i_Tsk8@oh;ME)Uc|>m6uY;p$Z&>#ZOO4Lh`9( z8%em7DOM^>xzPx~>xf1RV-*5ReF_;bsIDcm4)hc@?2QcB{j_i9sL%TThM3A_OP?=2 z|M=486882)->lcYb}fIVp=&H*4p@w zsER5&V-ex0f!AK@9(bUU>~PA|8eUl>WQ05zOkdKH^RLy?#_+=-{oEBo#y_~IDqHuI zZW$Pi&I4Qbj~nXqi>Vx6T|wN2$0rtUjurcI^s=t^bI$&9LTV7Hh|CK=q?Z~$kL zb6U6;kH_G0rd{(W=8bWYi&r9Cof36?JZ;NdPb*lhJuZYSF(kr=LPafp*W zZh+{(u(uG*H_Lfl;{RHx^H7-twF4@QK#nw7QWYs>;Uj7uf!w$os4qMf_cbu|@!7=< zp=7HIH;HZzz`#e+E*Nk3S&GSD$aTGInfO+v2l2ReYT^oK{>ICA5p z=e*j>W{(~#t@y~zkMi@2G5;$*znL3mJFYKF_ly}mWF-R{Sz%SiFHSBaKyrNX3qp9=C=?pU43*(u5s>?x03fSG3LPyg$0x%STdpv=z0 z0r{Et>1n5quG5rlJRd>gbQ#&kM8!JDPIYSo4ikvG&-@?5r7yOhG%7r;Mbz0$ALNdW_1bIy= z!A3Y&hR=*UqW|@s?1sOf65T-9+~cL`5%i+;fhN~V8P_5`!4Yp#+Ko0TTXP+?vZZtM zfL*l)0dYnWXPAu1@)NE8=q3s%(~z=~nJ)2B=Dpf{?^KOKn9yO_RFLcF5L0L_ZeY%7 z$u)^sN{-SQHdP8j(TMkATc+wj&t$!qXA;$c{g^Y}AzA&jl5F?>?wozRa&zdg)6lv8 z@a1j4CGX z^ZMT6vJi=n(c+XXbOXSn7V4u_dV}LOVNbXXdLz z;<-0ETlIpt_+pmy^bNPrw_c&cN)~Pd9;H;wo8b`B+4`dO8Q9z>MfZGr$EgA7XiROl zj!MYMk9T)KW9JC2#WKa~?s_JYAaNT(ieiU6$qIhg8A8kg=Ze#T1w|(wx(C?K)JJrk z#ylSS*L&w>q6FrdtOz$r(EzsX9w7>0p3jJ^cV2~<5QS9w8Z`*109LY`Uf_P+B|2Z% zh`Sq`mxZQ(J0`%irXrl3@P0thkU5kimDsmjUw-GWZH$VkjWSoX99eOhQ`vggV_@5L z=g;YLq&Qpa0Res&C^8G9bR6ELel{2oN;$oX3e_}{oxZ~KTsgt7`Qpl_=eJuu(MbJV zmYK#+h)nVg%6p})IB7*Pc`H?7=hLb%Ye%7g`JH6mD1pP_LCsP$b4urJ(neo@Xu~i6 z@_XV6b3StmnHyXVMh<=QH}c%kd^n~6d(BHh4xCc?Jl8Fy&S-Uvz3ETv%SW-M`>w$M z;h*AnJ0B-Omr91+LiRUU75dlKn{Jn@Y>l!5^G51T&F@Xxe%2&O#z02Vl%U|BNOFie zr*+o(+!FWY77qiyT}0Lwm$R_B+>Gs)z3(kAJ??Ai7V2WUX$tE)F#de*Rkp>J+Itsk zZju%(bD(MDq4#ymDNCnC*!$@2NyoKZA5BMPS-R2=Tc)6>+k$=%V4WUW9v`K+cJuR> z7284shQ<@_2M+8kzz!S)ecHTely>jEGQ)x+K}h3rfBj-#jg$683r$pKZT!ITau(FP?vW)*d-(ND5(_y~vZ(DwGl4JyA6pVMC3$aO# z9OkdC7D`-5qvs7}G%vrZXh_+r;tnIv=Cx*)_?4~Tj%?@iCSL9~=a9oF@HJMSz8$pjr5rLIi#5yhmHo50H zM9b|>8)4xVsVK&e?|!^J7nlx+knUSoUYiSuocY*{2lZ46zF}cyI>B?L9NiJ+5|=+x z(A(@j*cV$p`DOVPOS=r>r8rb)zjW%#xh;D)T3;MI9`q{emcz-3^|N0Pe095Du9ttm zJ*Xi(NWEI(cX7q0*{hxfjejFn8xCYGUt6*@{;p-I0oT6XxX6Ce|JYO9tdWu3gq(^s zW3MN9hm@0+P1TCc?vnsj2QxNLDZZ0(K#Z;1%~LIfl-O2VnLiioY$?RXs+9y10^FKq z4cP5tT1@D0WHo4)gD4jE9ouY(igC@BDGnR!ZO7B^2UI`^SptBLLJ?U2W63QVf?GEb z!xC&g>>h}F_v?P|vL=Pg*gZ^|IyV;WEl5F|bdZe(t*3)6YhinX!rlo zKJDRdBX}TROpj}=F(H%f&A}**y=$6K@ys%8tR>t3J%z920}&&yHpI2fAXKFf$~iw5aAAPv!5 zCMviYBC7<@8pR7CF-HLIimHn@asqO^Il7>V9_==j!W3}g=#*hoDmu_;0+qtT`s&PX zSaEbS2A_)xNDwezkI=%w@aD4%<+*-HFtNH2$0e5!SFpJY00>xs1JGFU1-Zc>MWSQt z6xPD%=1#EXCg{}-rrW5}eX(9zhP57dFS#L*W4qho7B?H@*eFM>F)YqQ9;Dzw^g$a8 zS$Kcz7&2;hpH*<*LMt-YAuQe8`N_RP1Q(uTr`#jvAcx;acIU>Q3f6+ChDskstcF!@ zn2j`23J*v5reE^vT@6c`Z`4q(z=B82mtTM?9(#k51=IXL*aaz2zBtUJfiDjWrh>bM!Q z3EIpU2VQ5HuFcF3mlDFbfyW{Cu&y~vsJdQu>uEZ{3{w0O4YI-&jL9fV=g{QoyU9Vi zwZ464eERNoDLT6pXGpEzE*WS=h$-K(`4+B zZ`js74=to9hyrrd3ae_+QKy_pQaTASj!1ZJE$AY^C>w(u5(eEhSl4O=O6gi7U?74E z5Ma>X@ka)QC4+Bw;bKt%P>#^lO80Ke!YN1Rg!(W(80Sqs0s%)f+6mWm??=9z5zlCa@`BC3n$SRdNfiV zE9T%}0}@Hf{k=hj0)(as$_)v*M`H8Ssuir>8c2Kouuu^jUKTRNU3u$48tS^DaupvW zbM9E4i%F7z>~L`H#N(8&zom33%s2zQhl;Ki#&!8Tn^8tj17;DR+`>}2KAF!@14VBJ zJl>QZd@)e+`>^M_g1zTJt_~YEteW)|csqhnvwZjw+RHoz~G*=*UV(l9eJkUE4aW zqm@bsiz0*}$?3kgPC^($++-z$u$=Drvio=a{(;sWpKG7%^Lf9Y@8_!o1-tUhKsrVs^|l#OjN%Acu7QB))=5dQN>AD(E23+?0HY}Ew(>}mELZBD`m;}p;V=fAWE zGQ`Oyy+wQ^LI#C*!m?C^pR~*jPQp~#1uD=fBO+Z5GE4>TH7s)l#6c>A|8kJzP`>IU zyx&w7QI6`{@Zue?&NI;@$n&5au*ac1pi3;^UPyuw+DE9z6i!~a|JU;;lMkQh;X*A# zhWnjI;Y3QIT>=gEG%CuAle#H`I;BU>T}u|08s4cj?+ku-{mq7n8gXB_0Ay&yYY!gu zok6(3&fXaWM=cH*M=q5M7i(a1tLUn8**`Z$96CCdTfWh!Qdc2jo0Nr@RD^3Fav$VH zxAb{1!DS(I`XI-yip%u!lA6Qze9YE5h6MF^%7yu3j8OlYl zi#BowL}>1k<)C@1&@4fek$x&bfi6&0xH>@o4(0l=F@ug`N_e&o9<1cDKp*k4j*88= zvecwg9&|+YcJmdJzU{13G)1KkLlX0cVR_{`IbhTmeZ6~?DLQJaVW?tk`C5YnBT53= zQ{kB}^mvo1`&&`8eB7@U>3HNEt()y`5vV6is2LisiEIjCKzQ6VhBA!?j(yn#7|#gd z<#EQ9rLprSO+-ujP{+Cewnh|$L;E?vxl#IEw4?DQm~}ZqTNx%s zoQ^i>+k<1G-0-(+4y*&vi`UNYtZTd6o4wlOt}Nv$H5Ow5=Oi}X~w5;1oM>1@K_Nsy!& zK=80!3p=P?W?(674^F*D?v%{8p&;6;GT@-)4Q~L4s+xaE2eD&w`xc6IH;avdH zaThvDV%VxC7zrN;D!{gDwO^~q*+=Aknj>rw2beV5na~#omf=2%&JSwEACrAjtM~`{ z)WnK+YwLsdv!LV#2+akI)x^k;IztIaN4dyv=JnG=@tLQ8fr4yD2gr+31)2aZD$&B$ zwTH_30=rO7M*JXu#CkAj9S;-YgFhMs92EolYWEI}ZG>`RAuO3eqG@J)?=K*t7K(Qyncf)%p zHT(bvlXxnf1Oz;YKDH26fC~+M5SFUGJ$>+M3qg7sLJk>m^?ZD)UpOJ{A&5AI$ML}E zn%!%kLZ&$P4FJOT?#s8o+07$9T^H%{LoW474ocR`5%z~Wimwb8AMjk{ zHw~BrLS2~<%NtDT6&_ptP&*|+TB=jf0SGKl6sZQ$^D$op61#=p#Fzk3)q^F#b%%_7LGnws5;eNrmNFx5X^f*!CLNPx?9F z)Ct<9fvCO7Y;7Qws1z79yw>ld9Mun?OHbvvCdY=eOR9 z6p7HXRcu-E?eQu7^JnzSoyjc}UgK0<;AP02;ix|Xe0;syg{!`vq5i@308SNaj>(e$ z3jj=xA!YBDJcY>or73p6c3Y8s&ijf4q)z5$q5zrHRA$Hb!Aj|T1P({xww)--y6+Ns zX-eT>7oDg*5t3cY^zB575VaID2Y%!1=sr_n)6h-U@p#X$%?~>CY*5(hR_6qn%=fE( zjQ--}QU8Lk!8Q6mNff(Z#J#7JFS$nVpB~X`kJ|eFTH4xjLNlG6(=C{r3aK`*@sUXB zY5h;}`eyzKA6PCh;R^5f|0sln#huSKJiHiTtX>X*pRa7Lv$&MIzjv%*$~p+6Z^~!g zm4oLi;3BBtP4~VFNRp9df}F<*LXn~g<-fLY$+bAW(4{#Gnf<9avVrILxO%bXC57tA zqPKx_FUdah_~{$FDR>g9C?|c|uf@dVR^RdmZ|~8$Ve1kKA1+}dUJh6%Fyr2jUY^%} zy!*)EyEf`@{Eue0SnYgg#K{){4FOKI_z{f8tfb4`Xph~h(TON!w}kpWxo%@%wfIBZcCG!Vvc6=mFUi! zCm;;LgdJv_I(u$#pPoOR4Q>dL{S*2Zcmrzh@2s>eB%&6ukPCgnxORCRurtnk9+X&% zgZl@N*)SHl<$#u$LORXUJuWfqsu+9x z5i8=0$K0^yxhd=+@{)0uSjYDy#fU&P$TLb>XKLD(qQj5qK)MeKb(&(6d|x;5?jDJ7 zh|QaPZLl{Jvv1$N%q(S=QHCNhH@(H^O`Cla+*o%ThX#qAj;w!>WPj#?JThbN3`nI3 zfc$|!a5!8&W^pifNtaxNLis>SgS>0(aw{fZJ#6Ddv&Pa326>@&HS;&jAFf}|3v~>e zp@do0i~>l;agCYsnf9C$uauyPK90^t6>@0DwZR^6Unp(~`QoGWer~Tp@3ffG`ac2k z?ZHcefj;#c>8C#Ki*!O%}u^<+Ywa1h$HWWz8Z~-)D&&6?nl*3G*gZxk&1a zs?uX;s;S*qqo92?uG^$jqm05)ey|mUYAi$B@4ozHzY(`LabM(id120x;w*SB#6Wf) zlXhhdfW44NVK*pAe-5BTNPxA`d&)D+H_BBP06@`#jwvr_BYa6P2lN`2ID@*?k}~(U zcHDm*H$anj2f}QqRae*u>Rs-z{P$X6EskL;xrKY)R3OuLwlTHu8*uDMt1s5@ECW+2 z{Y(04i-SKUwMr=EgG`LuJfyAi9f9{kco@CuL)EftN|%^4e5B`Vz2#N6EN2aE7XlzI zX>PC1wJ**8{NbW9!*v?Q&bRbNL7+ZYb}xgDUD1`kmG^7ykoG*GfHr0Hur=7IR*gvx zp3<6S_DHQgc0f-Cx4N^P#$7~JSabfZOdK+@A5s1Bd zXoebpt^V@Grp7((tna9jkgKS+FO`eU(W9IBH!ZLX^--&RnOtSE zPDBlZVd89U2&I%eu$0(9$yDdbzEFq4w7T$F5FZWFGx(kC$F^_pwOMUuDW-Jb4YI9lGyprm!fDkF~j)%e5D;EcLDN`l-&fxFG zneYX)H{#@Y&c6B)B+7TRB5nud0eSG~chuzkkdea*Ul%1JSvSda{-k{|Um=T;( z+`4A;PSxa?cnHs$q=L9ayy$P>*ziA9<<2$%kx3Or#usrN*<>FWBWfqU*)T!` zDin`(bJ0T{+VM~t!H;2$Z(s(~iF#NUh_ke2+}1VroWxFIY*>>cI-_No`gB5MG3+^L zV(trUN+-DS`t{n_>B>$L5Q3 zvKxEffp6SuE|1Rnd|m%!vTvQCxKN@(w(U4j;q8w&qIX9-Xvv6Jek#FpVF2VxDwBrF z&F*Zgfyj$2)_imiisW9zVA zfC)UpF)RSkPO)&>qKsn-7vkwt>i47`BChyVaGr3*aoGj zl0$U>g(WV1By*=Kit{~e7t-3yohNHO_FNt%Zz&0|ah-Gfyr;5OjPdMVLCkx%QrG#) zCX?h#&%JnT!o81A$Qcy5ahDoxqB^L>RZzaRE^xl;Kyq%07L>7Ac!M>#n|M!^P*(w9 zJT!==lgsNk{a;M);mI5^20%7iA3U z14=)7_mMw2$9W)WbiTy?H}uvoD)aINMfKtTt|6fl*It*$fQjAz3%$VK!mWRN zC(l4CgcETMW!&PMT0x~y-(V6It@-BEyn8!CL%X+fIv=U$bgjw!lW2Yyk3SdfDme-2RF#GkDdHlXos3` zF}yACsVk_32S0$T%Ug0sFsKxc-B=Z(M`_`s0J|(Vj#CR*z)amlgO|e9;!eYk#{%M{ zZodB8pP{IZJ@{aV8sG2H-4ZYu6fkpW@I_DnWF0s*VA7KI`J?5h_CyWd>PvZB@FURt zTFh*dUQx(i$~9;etYj9j5@5UC$a8Mz&|%=7G=ATfGzD??e)k3iCWnP@0x+#m>nIGg zYua3k2CUWno(3W#6Q${_@cExr`r~2r+Gxy)emy3e;e&kqv7xG?pX!Woq$%~B`=MNg z-fuQt1Dpu%x1uQt*Bi0RTaY*iF(`EJ19urf_=J87nc+XP$?uEJQ5`JL1X1R9wrLZD z;09S01vA<>cEK!f-)FvQWi@)0es5w6z76aAicS6bAe2d(+F*1le&y_))r_vkM$g0n zW8$R~VitiBXwbp7oMu;~eGW)j|5J72b)x+>YuYf*&_P-I`{_#Vs#QJDKgtvon%TeL zvSi{B+G(^lF%C*K8j1)}fX)%# zr*CYOLw??^uiU;vz99Nsut14!Ynk!>9+ zTK=SdltxcqwPCP<<_5Ff3C<@GR&9H4LHE$X)Z_SUFKPcbNMz(0S=d;6vnVucWb~F` z7)t|p9t0f**(D7YS^UtZetcWc&Hb~r_j~}bm39AJ+ww0g;vhsYupo}HbTrX5zBvVg zJ-Fb0yKh2blaTCx1ThV0X$qw&lZ|1pR)@BD?>La;*Ok`0dI{!O(s@e*OiNxFur2{^ zO>jJVf+|~{F0E+K+}~AwciqyNpwo}vSNG^M3AZ+x^fJ&sWVz*8P~| zmdA5f8~1mG+>@0PG7kRq1#Mj;vr<^-JeFPQGp{a@_49r!1?DDi_*N4!)ALAn3Xl<; zL_;CU4=V2tft&I}n+ikQdX}ku&9%6C!teb%nnWJ>S;w3P7sh~{4@VaTiQX?m0uWg( zBqu;_`ym7lw)+M7KMk{+#WcCqWuUW`9C>xbq3-x!2^p7bzTe`Bmryp z1G+?!W1HA-P+Ymw^ls9ds!y}`z97GaZu~}YIdjaRW=dQ=k`sI#7lL8Cy^zQ())TZ` z7Xshv$DFk^{jk+=TO#TRO0Ky3y&=k1$Rh)#A>g(@>z#v1#(9a|QUpP^~*vBQtg!P~eHwpwd@ zpEyMg5XT|X2%FReVI((~pBcTiT>(xZP=^Y;(TEAo@ax=)gO3%5zd*!M04vJ|c}BUl zZ;?F;D>d7R`pMuMpbIw>P1r^%wr+yR)*qfJRJaOV-`fF}ObL0F$TKAq(FK{R!MGCu z@UwBCMtiGFANC}h>v?m6o3f}G*AG2yHVXrtKULL7VPa|@sgD;Jp)VH2C)A6L|Bxn9|lCR5Ga<80yOVo z-TOw;ahp8C1ddnd*F7pu{hXO*31y5yPbl!ednHy4TGlOLxVLwTltjx2mfD4Yan#|`o_@b;|@ z@@06N{=4a3Qg6nx9~r0Ss+`Lm`3QSKZ}vYR+hQzIZJnFVZGShis;ifh(PN*`Vfd|Y z1y5-h#g?uS*8fn{$G~8x=67TiNWaE*RQFpli$SiEq3-v+ z7Nj)<`t8b!>%KoobX6pJLw%qs-6i^x;W|-Z&aJG6ehZ_IJ-PT1k=4$;v*dsIDAp(b z+CERUN9FB-xt;TpPtGcgXXcaAs>sc!mm)o!(Q$(32i}2^9qdabw|N;juKstOqM>zI zwl0sod!YU?Z;WZtYq!IrOfkv~+ZK`xS zOXN7y?~yRz8MTEz3K)%xyvIde{vvGIVhoq1I531P0>Bc;Do4E70qyFK#FVhWZ;%xO z_Iiq}JTHRX3R|_vkvE`66xnnEi@{2l`{sZl9>4(|$Qz+4rQUzk+uvxvudyi^cHT^kO=Caf;OTwhzh|9UMeD4C1+twLF`RL zSi~=}9tF|AZ|eDotoTYAAEZXHUwxmC_dzh=1l^7ci*~HuwXnU7y%V}ICAtD*J3pLhE68Dty=|E%7B(iF#P^L<#x-(wfv z!fYsbGlC_B-fgNjkl-8aqNknVA*E$4j&V|x6A#{UF(&RSY3==!aJIc3GgzuoC@FS* z&c$77r;iFy3Qs4QM~zei&WRWB zCy#|(eY_5Fe#H|Th+>Rbb+u4`ta!;K*EP6Zwj(_dNMU5(KN)_?$i>Vzb9$sX?s)!$ z|F$2WUJO(&JN(zq!~dJpcPX{Xec1UgJMPXb)h)DEYi1WK4vpULwzf>FClW8NH(r#| zC({ewLv>m|K6>J76Qw=quCB#?gH*rb5BHE>PX$xKdp=DZEHd6$7EU!WQ?2(o7ku=9 z&nz;xRv&k>LWh|(!+&ri8jm%yRIt!{5m)&LVp1auZJ47`Ksp2sAA(6w7D%EgSV$e$ z5vZKHnkz&a#+r1O;miOX2n|SXpE^lqeSpwclIX*ypNSORSJhJ|UCuv0zfeezi;uiv z@b%f;B3z8W9&0ImYlYdMK@ibB^OkA$`I2XsoiE(+LAvzZADKSx`LIJ+VVYI#h%pVN zR$cJ+%D+mgbSv35Sz%n(Wi6q1$!sob{l1JJ^_}ThINid_w|?ESEB|sPHGIK|9x3bY z_!+$LE?-0!65Wqf`Iz`wi;134JAN^>^z3dfJ?-LdM!bqHCfEDe)Vs%|U&N3T8r61q zo1B|Mtd9l)kQELxk#3%XE|z)rDG=sUl1cDCX$Z6HVr*Lh2%1pANt#z3YsWgmrBGNLGctX7#{S@Iql25?`94V5OfEGF z(|x-3s{UPE+0|$yn5jLQvNxfhWa7+Pgw)}^09M3Br`wQiJ;!Cp-&yqUxX1_Ww{6){ z2Rd6v(#*H#`*x+(CkvFv%$E`z6r#n)@Y~IjQ%d>zkW4}u1@%kqivoBcc_;hR2^5KtnTf{TgZ}xedIO^NB@L(oYw((u!@41_Mdvs0u z^ha{f!rs|NmV4`@B=GLI=KZ(4i+;${b*;8|U00bcGN4=O(i^Zt)M1Xjj&M9Lz4`th>$n}lkdg=PU8dm^SO8&kOWs= zNB@KrqDqXVrA249zFxoZ?Ujh++1HCM|6aK0zo}{@oh8SW$xt>b1#&_o$>P}xBwI~< zPlBfFSoUUbn6`co^mcFg{mD{0UK?0(>D>KUWX4(Hp05#@>;K@u#eH~=nWvtE_cfvK z-L1i$JB|K2^bTQMJ2vEDxvwUBy1$_hct+3TBYxsD0`(G63?mQ;PqkA#xVjk3o`~P! z>oJA!m^p%t^__Mi7qCROT?IOE{$i3#ZdH^G6U_{O7GE}`$hbwx;6b_fqo}$R#YJb071{P_W^DZ!!mbl{W` zQ@Qe081~m3gg@vfw)v$(=ocAc*j>tV2m_db>i`LbWgc%>aMf6yXat|WF=^Fd&!;GP zBS_ja=8>>(LIP7VVAZOIU=ytFaS2lQ7*C62(Ar6N6N3E=1$V%X3*3Yg*?nfrXXwH8 zx7M9oC7tNJV*kFg=y!FxGKwOk$_h#K@{o_B|0Pdmtv&Lh>y&TpwiMq34O`xDj}Bey zxxam8?bfj!mIoimQoTDS3AqU|78z!%57g^6KIDBawYJFRE_-E^D1U^F0mO$h6ev_^ zGhrQov`pYDI_z^EsccW1ID-^uKIjzKE(vi+$FOowVHuPrtfClEh!7}t5=BGK1f|-0R*UlDjy$&juq}ZalankL;*dtZ`GWuMzwTDZSBcD z5z(@RaiwoM^Y+ey(|JgJIF8VuM|SyQxj=jhmYyCpM}7`fo=U6FZ+%4@4EfadZgffU zAoA#6w><)X=f&1Nu~!MR#FpQr*Mm3RTfTS)gJ*GFhr1SO{_@sA&UsU1zIjN;;S&p1_vQPRoYYSx+8MvJkYd~^`Z z-w`xI6K)zgaR$Id&FV99bP{zXAhkqONgM;HJ0TU&Eg;1AyrT7T%eUqU36U}c^|-`> z!NbeXb}V_Z){#oNZ+N9=K(hd!pYMFEb?mZ=xr7*-{AEvg^yfuYGE8e?5Zc|&qhpPdc+s6V4)u{e%SrbHw5}J|k zn&a|uSC!`E{7EV0`^w$lS8v=hoM!Qu{bR=Z-x1cUD5t9k(lpCeKlDs2rRv%-Yhz0- z>A|4&&(_bx%WPVcuKBS^N+2ET!90rk2y*WvMQsQo+!{OiM!~ZSa91S#lK*;e&q!r$ z)-!2ZDzk$v1>3G^ zmXCax?w&M_|1^I~^;4xXhpVD3_pC0bnb|9?c{hK}@^p2&*BKNBX^qb7o;^k{cMT{} z>*b}rcW*bi{4T+!%MKx>2-vZNb^~sEppP194;tII`>tc5ACr;m==f@a&U-ZuSBorC zQ18-_CWE9b7`FR=#hQb0JM9Qu50(M5b}mdAQA?-7i)pCTqjyuMWG6ns5?nfB=JZ0f zXpQ1^>O9AFos^#y32s&)N&yO^-Aq1beqPV~>^Za7g_H7zKC`>ElJ&KNU9_9l@b`2Z z2CwF?{kMlX?LKKmj`O47a|W};v*)gl@XT*XfBq#j|I3>8tHHcK&0{Fdp~TDv z1$~o}*Bp6!?cV=v9_;+h+!ZB5@-q8>WPg(Kwdh7bH$@>}@^R$g8z~|T?6$RUpMAsE zg@1smPlyinnHd>5U4hZ?N!=%ru6)8T0Q7|v@pS4hHTY|IzdM~8k%m{T#h|A4H#&ps z1l%O`z_(8Zu5bv;q`|isQ{LqH4r^)xw6GfI+S@qM-&p8y50P^l#f4-B5SCzTK2{+# zGcbRYC7htT?&VGL@AVCHX@45sjh>@|-?7D1Y^S0pI zdPOBLAk2PSjI$)jIR~>&tNxe=B>Ab4!FBc<6eM`y7$-b?CZB1OMmhP~VMC@K&yBM3 zzG;M#WE9prWd|lG^lYp#KI(O&r%`aa-#R`f_=G`BJ9?sS4Mnj@g)VL?aMrPBcFl~f z11L7u=FcYM?zLOcisG$2IWHfWPv+H*oLh9K!L1{W*1;Uk;pf2S>@Y72##sD^2j9QM*8VCC>TC~M>lLl- zQk`K1zjV{TCWwuRZ?xQGT>E*|(B$iud*)-5&>`#4fp(9-L7KvvHrnfPU1%;~>mVE; z-n+1H5;yv^Z1Y`|YsTC2&0{~XxpdHQpA6Y3(>=*rl>sSEeqZF>oz&nFBCRHO___{mMAEOkom6uE#GHL@czgVVDY}1(@(vbTeS`ozojk zfBal#K%dmDS7S?MWb&lxxYDwguN%>hnF{%$9;(tz-Oh^S6V}Nh06s}+TKc1OTT8q# zH)qwooSWy|8h@;Kn6Z7EO!Z#D+*nL4{GM&5@9sN85w1wE^AH2>p=&+!a*OirJFu%aQ1r*7Q!8{&>pqI%zjf#1S?$zg?{T+|>8`_L=qzMX_SY^$ zOcS33O9LOPNg9A`1Gl0cenu!M3~P$w5y4e~u~A~2>6rHrmPudYA=A~$biKRGXD5-r z0K+LYF6$#P3BcvpY`uR3rJ++{{fV`xJmfN0_!QyyPh%?fW~b~BZ0Ny z1Ro90kIks9ZP=Tv&g;^IFcoQZtlc~mZ}?o{>;@GiNnD)5P#T3^b)Ug6^?nqryakMp zwVTz_;b15H>}z1S_0UNAi`J2gTBj9!=J#M!hpuJX=l3Stf(&;*%G!NF?=Is_JKu1n zPNJ7*lJEXkRygZbTZ$W=eSr7aodsi70h>Jb6np%<)cMocqUn*xaUms8cB;49`oE)1 zR*?1CW^wo(=jhWjWoLeQ^D!|%)hjwuYGSxxbSC}@DT9Udb3yyD&=Fs>wRAFX(qO;R zlxIx=tSM1S)7UipN~J);gpcuQ!Pe8uZ}V3J>cfr-*JHJ*6&(`T%4-9F1}xWnP3$Q{q(-gR z1aAA8fB9K^eVn=ZLQmQvGc#ss?_SWXMQGZF?1-LhsM}~vUY^kY`%*zu-`f41samrQ z_aApg;*Q<5)@25rSHq7)Rx|m4= z%U?bJfH^g(?$SRr0G`%;x@hm_UawNqj)SIyw;K#|8@sJTUTtZ-Y!SBpoBQ8?`u;WE zaJyyKkZ{jE;ja5DJq|v?73D`pM_<|W>&pG0f^PHmC*Q2=wOz%tL)st=v__y#;BP%6 zHhEgnY6t4((UB_wOgEp(Swu;P`qSu`H!=gvaosB?_s8VvcC+%10b~ptv$G%Z3($3! zZ8SJgAwgHCGkf17xJ(7AQSmP7CTb`Bq2Fs%lnk8#U;V>UM#i1!v=l09p*h_lpOa9M8^GLECZ)7QL~ z>Bh5esIwv>finPR?0Y)k2~c@yC*uK(fe?QSirL2pJlC(L@DbP2Q?fLVD}&Ku4&a=@ z&06@9&jFt$j(_&~{ZM$LA=Sd^*P=zG+q3^?5-$8dh)R=fO--^MY<$`mK=u22nxC3Zey*4=LFpL^7?Bv2S@$z4=0c`r3V9 zMLQleVMIykAV+*I<7Q}%j0M(4Kv`PuriTlj?Tf9M@_ERlvymvQ3C-jHpj>@UWG;!; z0ibde!jcbjW$dz>dbH`SWw)OD7QcSh{Hps?%H>UOZZ#~4-jLGs=z;8F1!mjRa|P#f z7hT?&?S7Eiy zB%#X2Jwp(07*0Qjz${czg0SAB8aB~toEJni=Bt%NdtMj30|nECIDN?@1uD}EZd&1e zR7c!9S=2jvzAVuLn+5QFKL{W*3>VAuyBt zd~(F@Y1@E(cHaK1ySarYjp{s4UpGp2KX2jjk6Yi1l|+y0@6zvhT&4V*TsW07P{*3^ z_oEbzCWhs9HWy;+H~sx_666w^w;bB~$!lzaW6>($=rG&lClRJinnPSB&%;= z70oS}mj#O^y@Yl8wZ1J0K(K5t(xgcg;c+s!gKL%DairEGA16?NI&}5kCJGIJ__3=U zGs(xbO?r7c^=*i7vVW|RDXpH%sq^hT=U8}i(|P+lJ|&z@M9*Cl!*36dGUK8)K8yN# zKrn22iMAUh^S8mEW(w=%*7nfB)uq--c`!X;-gv|+ ztb{Sf!8;ZWnr4p*F#3hTh$?et1|TLlweq&Sa34R2AiA5BgV^|-hqZQzqjRGkm$oyp zu9rvC23-2aV?&60o_`sk!Yme$;NwnFuEMpU!Mb?3R)A_&oU3rJ1`!f6Gvp`=C_Q&o z-b7@o7|3|48Un~ZQ&FCo2?C66!*Ai^3`!HAG%JIm>e?Rf@gV=W-*xT$ZLRYKn#bt* zn=41%N9b{1+Xp^6OId-*g#T2}65*0q#KHinS7ixv^`n(FW_mX>K3W?=%75ZPygf=} z5Z1RF&6G+dZFdkww;{Cn3k8=XU|19=kgghFTL24T!WB|S?i1rOCJC4Nh8Pv{@^Bi5 zK&KDppVtV()FK0~8HixX6rxM+hLR+;?|>j97?=TCa>R&HRi!E8sKFg63yFdgo+X`p zthz>NBnPgiqp{%!*4&?2Y;i2}M$WT_HAP=8`B1d$qQS(VgJ};24q>@2$WUCF z6Jkzi|1B*q`ztQ&J+eVsRwm}O^EmXJCTyujY{{=)-b5j#JWnU@XNKsoDdM2{NrGk0 zbB`G{$Qkc93YY<~mN4lBEI;72pM@AK48HEkLf`=iU!pofbzosONFV^A0xArnBDR5R zm0e8$A|)KKmxa318-;;Q2xME-3v)wyKx}t~*?szrAGDM1%^%&CZAZL3q@BN!P9NC* zHV9)|F+d8%Pn%Dbtoc2d*3%SN+4v$Yx7az+B0s^_*gXOFcs#D9JEQ}6?#w!kI9JW0 zOyUXzK?|)NsFEBtihdu!)}^xve%eTfz9JUr!$&gcLX4$Uf%I(eOM9j+T-4P68aF9i z=yQf_IWC@-Lg4a1KAJYe!dAfNZi>wLgU#)FlM5Y~5xP6bV*w~=u;hmwNR&PREi3LV z1nJaf$)JU^02?2#40vC3h>9zy@e2>?DV~}LPK9~Fw4#e6KEt6)-rwJy6x2~yXr-Fn z>-nkAvr(0nyJU8i=UeWL9A={c#*|>NR>qC;bV>b0YcOj?2?+^&%axKJSr2uEVQjPriB1n?Nt<)KvG`iC& z!YyHz`%O=330xBnc=GBhbKxfET~*LWm5u)}L+uRA>maC_aaRIsK+;TIUj(t9ojSDX&hw@zNqKe~L1?kI41L@tsQ z_3dseWSot9{hoJz=S1JI$1$D|5wEaLjNFZIk^$dp-+`z;7Cae_wa?pv36lsHTFVW3hH=HU#b{Y!l)j}QFnKXqQldo-+R!DJ;NW771#j02wW zLv#GsQg^qO8PK9Y8?&s?xd`I*TaKN2ViREPPiFHQ$4*m4o&O$d zXeMC2iyF$U%YHu0bX@`5j|1`o`HTgE6&NQun7E&Gji$_W@lr{NU9uLVC{Ufwl%$Mp zp_Yw{uAOF9MKL|}D_g~#;V;l|^bXaj8#3Iywj5ysU2jyqbaB{#MDifuBUMRx<$Dxh z0RRQpy*lsGCS7b+#HwoQARPiM?kq%#UBNWGp>=G3^v4BQKd$RWiKs%(;yG65F_aoOhQmwtaGrJN|sDc1yjwjdhkZBao=9j_4%!anM9rRpvAL z>BLsiwGVQRZnsQl6}N|UURveY^&FF&(@xW!t6j*0?$~Zu>IADGu^kcYUHEp*`93*v zW@f-@XKRIF=nmYxnS4Xkuf-=k_jx~~>+j(wKDnw5IA@MLIwUy#zq554E?!6_tt+Y6 zD0$fRnj3cO-LdKXxc9Wz_9>eyGqqc`%x_A`8Oq|%xf(J7c+##d7{F+90Rv7aXY2V6 z_7}Iv@00QNGB9Mh!D6;G93MsFp?5NI?P0i79(J1?t<4R=!6rsa89tPcGz1X)dE5ND z@ZJCd1Q6Z=^eQF_sR5Ja*i^M>8=tU=xhaSVE|Mc0r3f9$vN&qul~-vg(;=Mf9dV+f z8pDckY)obQ4jUOl%)}4NcW(buysKwteh%?tc4fwv&7m!uxB2W^C&lEy-8^BxSqs>nHju0vLar&asIO<^=n7U1L#r58$kolQ#Jd$2!?aoeP5=Y-O2^q^!q{LiCP<=9oynqnFLL{T^ z7!CS>9Dj)40DIIzcp$l}iX?P3x(~N2tafC9NC0`Xk8M;&$@liMQX%7bNVXbV@niKK zjjnr9O}PNKheEhuQYxaCZqw*~F0S#tjE!tXUoO$P(S;9E11N!Lp&YS9ZTNB|B!RoW zcN3G2Nl*ROEG%GtN2S|_0N0CZ4O+)!zNIWZgiv@A6d+A8_p$Mmgy`(HE|r9Dw&-@ z`*iwG=;SXb*u%tMmig?~h`nWq0M&sw1xBDkPK2kexeW9kJ+%5=!s@EJ=m*9*_`|_` zRJ07e?Ib!?1ykf;>wIp79%#(Mb@28M@`>mxT{TIv-RoqUy}0=ZF{-M>FP)rK1J;byd>$y<5V*ws!cnFN2Q_>V1x-4MtD*9Z=xgqmzvo4`(O(_wfGM8o(?*$jH$C|OJ~T_(;ZTs za(7?%;L?yT>?S@sfOqgw6-vOu?iS!%FCfUQ_17NO=|CZ?_tm{SacJ9|@#o;G5Y6!z z89Ii7UZX;XT3gQ6@eyRT=K$* z936hpTXDt`>InFMuoj)>gpOxoi?nKN8O3E0(+I_5Rz#-PeNT8C#!TE7BKdyETzZ`R z0oD8uDn@YG;3s-D9SivZ@cj55MSU|*))9#}-*(09Hm*s5JH6z@*;BjEtv}%qd6gqO zaiALc=@Exw)?Vd7dnRpdjJ;5mbD?yB`L+7mKN_7YOu_*<;eZ-- z#SA{#WmSdgneo5{0+f)B&DFO;Vj+CT@uvcB=-nih_#eqZ3&;*s90nAsbahH)aeds7o?k_R6#+ z-|2XCW4Z1)YP$xlZNA#D_jdoj$cF6{v=tMGv;i1f2->PE$N#yqWcJE4qw{%}HdOJk zyQ(nhysJJc*@0?Air?A$56)i)hYW9#jMv5fReZ7`Ba`c~G|IyQS(0*rNeKLa|66nI zuFmy;1f*Hkb)%~5=86;&-!kDVMXOKEy?|S}4O_#YQW|#Qq~c{{3tG zJo==J(9R;rSR>*#9dR3>L$l|SycEq``AkFptRjuc&716Td-v$TQRYQ-v^y*f1d#tn z(Rqd?**0L9y)oRlagWq+<0{-+anA}zZVgLK%L)~6X1H?qVve-bNX=1JI5V@tk-I`$ zR#vvH-h4m49~}JS2M5Q^^IXq$ou?QXcq5%ig#aXIfODrV_LVze$qDCQUG!j&XuEf0 z)9yGJb~sQ55AmVR1?nY28X^U0F@3X`k2pu)MVw z*e<%smaH^$FUhV>lL&Svn>pi{fbc8=71XIL}5+|2xf}MT&KEz7OM<6U+Z__eoQrRf z=pd0o5JP|{r#$)^au~P!u6+*2d%tFcM;!sjhHXuRLVwBs2)kWxj`}DN+2%g}z{PIy zv0KFDMeal$ACjD#!Pm$nTo(4 zM6jp`4rjHU3XvBkL}bJTz)-^#;n|7G

M^z&^IFK|&A6>)kSX1xvQUUP6h=kPn8 zI5_($e_Ejq16@%{p$$qdL;f*Ew9-**>cU*u!?@c?>4Y)c4p`#T zg=jMD=--DadW**zgvie`VN?I;-jZW3CvB}g;t!uK)OtMc9ahtgy~V4kf4j6R*mH4x zC~;$GcZ)sI%R|=jpM2FIAH$FmgH@_matcOO(hObD%xfL)~8!__PT zVk&dy=EKz%BHTz29YD1+y(4#piyAtJ803k35@7e&pnEvwHlBdNu+!&XJ$?3cU?9r@ zN`+{L4L0#bTDaHKyAWfOuD18{1sy>&5q5wG zv*2^g=(mnrza??tX%sl}%-fe$i@I*r%wM;*th7HJr?Vjk9(~5f%TEmP-)#f-%)B2m z`VU+A7kh_tn?ixqYRR-2T?c%_O`Bv&^y0P+^q0x6nhBGI_2^v_74- zhXaSgrnjkkq(k4NdcFzlyuR!|m?eNn^5FecbO{|%K>PJaZ_I9JLG3Cmdiz%t^>YMu zERqJo)`H9Q7t0^Pl&`^27gM&2kZ;^$54~GV-YcK4Ez*~!&8}Fl`GTDf?Y&F?x-CF2 za?$FKM4oSBH;M1<-y2anS7d`%_V7Bl;TNwa+o{fD(`0jH6-OMdIC4ke3h=pg9 z$_l7^r(5P|*meMF@F1dj8}3VhM(l+uks(ZKY!L>>fn<-QeNZ6zCC;QzImZ5SmfstfoL%N7Yw)6 z=Od`ubnw4m4zO5kNr0)S$B!%Rp5*b0^_(+X%PjMCbVQ8{ zeMU=K>W`tYZLP9)#RlCgg?qbrwvFeEmz-K46@6{hH>~)j7{o~66p=t{G zhB$_?bLXZb{=BYDNEIC~_s&>nN(IEWCkh*@NQ z)8h0k?PLMWbn4CD$oTphq*exxsX|G1F3557H}>Xg68@>}j?(@Y_eS}CAbb0Hdo`{a zS0$~)X;xWFjj$@LMXoo3KzK6YF5v5GEbm{dOa7|Dk5^HL{>pyi=R?T8ZyU95jApE9 znrJH-sxuQg)>L7eva6jKUhj%Yj+qXhkWI1=@3=loDC)2{GX^Q|V-2;)gcW8**Jgh9 zDZ5X5ge)d%UzXT%pF)FOTS+l ze1#k0-G9ZlD<&tAcdtmXRe?@ZI{=^#NxQ{cknL0zMufIVMC!V;@D5V#6ADzDWom&R zrBQ7x{;Q+txUt{b4~MG}#VL;0#uUP_$HfoyJ6ZF8!^`g(q%2Ox^wvxqQ@`;UcKGz4 z??1ne-&x*usPeF1ekovRi$|yBDVJ1Bd+kq&$OUQyQGsO&n@ic|mI3EHEj{`ON8lM; z^}0B}!SR85rGv&2PAO;ReO;yf$E`MJzY5C}=o*NH2iAMMHvQRsXw``U1EqLvVS1ju z`a%g8__s}ia-a)KR4J@5P04HUoZc|JLbume`7G~`nU>bh&XG3eL;Na6M4!lt%62eW z#=VW&DN<0-K|lQV=J>t<`1vQD$r1ms4?;myOCWc`Ls6AJ`8xG-X{BzIa=DFmn}M{Q%oOSM zZsUB@G~%``+ctJZ^zrf8@{68vdA|GIRJDpV@;}$aANsUC(Jo?>uWdz6KobB?UySbl zyLRmi1H*%#f%Y9v-4L%y$=jEj;&PBXs_@?nD7RKvlCwj^4^M$;t;-75$KrXSlvh7?MnZ`FmZ_S;o z*mMX1G~BS2sA-)g@UzKF=X0kOROx{$bnIwj!;`V;nOB4XY-_=?G^(uAg z3FpdMwmBSPp{LrG^lff2F>9Sz^*cx0GA;+?Ntkk|7t@wntAmd7F8NP6V?t+8=f<8? z5@J=bt}Qt}-Wkm6#FDC*aOqpspS!|E%8#9xK^u;)S7`z@I*QhXC8HE=J&9o?*}h?K zn6g1ZW_<(y0ACx&NdD5%Ofw~zD1w8*espBKyB(<_VltB+Iq}Bcw!2JW zjDS)OcBXF#vr8?7SFuiB9$>4*^8s1-#9D&^a2OQg7NhPYuov%PoI#tD8n7LB2Wx51Uw&Wo*- z>FTIG@LL28;z7Pwzv^g|eH+0h>mm=+e=P=qVI@a}vDm34-nEWQVgRIC67K8k8hauT zJQk-SG1E{7p(vGc9J#55DxZuOfKE|%a&06pnf59#3&)K;(F0(D%CMJnSU^wx#zh;| zTem;zep5IR2RFNzo*$Z;4VJ$;nRrw#bP1M!AP?f=B9H9wNX?P5SL+&6F-2)@jUhyx z=`al&l#}3`z4g;NozQP8@)9Puu1iQ`)4939h9(FtwNI$;)I)~@O!WKn7l$r}W8;H_ zUBmo+X4PZU+(Hxn^PuU2L)>iaH0P}QzxG?ZuWusvks1+lZDimY9N@MuLxoW1EcyJ+ z{^66qJ|{q;MgbMd5`;lv_d;~X3E7zp$B66M=XRn; zA4|ZEIsY`Lqy#m+kR%&lDx#gDKE3m!rC{z4t>pO6(;<-{{N+EC7k6slO5jY=4I_ys zP}Nu)bZGTx<=Fs)Lw+{IJ;p-Bla{0Utr7R1F2rUqCf|Bm@c;WMDV1RbO2viq1Kf!~ zy_h1KHaY-E*JJ~Y?h2kNy*wp)JhSIvu94G@dNil#W?TJ( z<)WQ;u!SDdzbCDGY&-RrK-xQ|s#-Z31J;^rbeF5Qu>*8?1|7t;o#&1ebsK{T0JpCr zLSUjn764P78#4)ebG(8a3o4w-(I0?%rXa$_!5?pd?fE7S+qs8~Ao~GYUouL_T%am3 zM}!ahGARU`m0 z;j&9)hVmG-u@I~~V@hgiBF?aqWn10!cEB)&J9#~R`m#kH|wxDxIo(C%U8S8tIM22kPUs7nSU)rw(5fTTBr zPjuv(SLd2>bA%zW_GY&1CQxn@=nIDbb6jFSfpNJpSClj*XZYeCHweuzj00Eh#4Y#jh?9axIL4-5miSplHc zJRqMgoD&}YW$N;$PnFZ4HH9cd(A}I_(L6>(TLC6(+|n6kRPBJ5vDp=F0=K>t%6{&b zXwH_z4A8&oi(dkna@9S88o$h{r`i-<5fhV9X`OnDOn$EVF{Kz z`Cw>`m0V3a@U^IajFr&lIRD;gzwhl^FMH-zdu}foS>2LP6|e^J=dFp1misw-@q3){ zh=MSP0}*UU2DXzSt~7|PKx97&Vm@86|0URh(j-I7vD0PPC&E*COrbC8^qPYtj}BkD zgxtRUQ(3KqU|G)xMOA`ik$@JJ|sN-9+&ZeF?Q5Bm60ef|{eAi0D^g7^q> z_f~^GiE4f1n;7%5Kg#7=PUcR@LhRViAK8;12n^c+<}V^h;Z<&l?msaD40i%#{$KC` zx-f?WUq?LPhJn>NIffjNL3N98PON|jE8#!}H^$r73We-5@tyq&q->ekY$5(loSZF& z%f_|&isG}yh^#1tS0v8stKU?hu6nWzczDL_o}6+=7*Lk;SQ3*%f1gd5^zsPmX>pa8 zjPGfWFS_SFp`v^{vw#J1;76%=NAJeN?{VD_H{5IzS~Z3#k4gSTqokX~PlAq!6&)|5 zZSu3O!@{;p9;h*_mq4mZxh5x@oDm*|OLw9w3;QzfDyH2~#4`5gB2+i+>^oRryy3@U zSPkjEA3p^W2kc$TqKzRl%>oc?DhKhrH_vDmDUg)30XGC%l4G!wp6_U;GY1#w*Nes;$eJ>aVmLw{1_>Q}n;rU_*L`Xw_jl>(28WgQt} z`Z|aDvKjr=3^&A3+HJ^S!|I?3%Wc!webYCR>>5dB-7#TB(qY3)R!R_SL?LfjVJ%7! zc=pNa{u}JqPmozTN=)LRVZ(Y*=I>!;8Ic(dlO11QDDD?B3x%BGfgC*n&PD)sW|z!d%1h5@UA@VO&L~1NR?|Ej z+=xsc?k>ZJv+CNmdJ7u#S^jYZl@$>Lizcy-;KD{ds3Ql%&M;wT(*$SrSiLTCU!x!n zRE9k<$dap%7tQ(6>zY#ICA#2x-_pn_$;N}z;^J{7^yZMvnpN+&ma8c1*lK04f7x;T zlZW2^*JUw6f8IoX@bB8PiCB&L@#XOhv7=OY@dN+sg<8p*{$Hx^>=7Ph#4$fYt=+bX z5g%wo%5`XiI0r%S;#!m)lWz$*dr2XCc1U`YB23r36B+Q6TB}FgSJ6XIw*jb&;KhxH zhshU0-3C@~J$>%F{oGB9WiKTFnF@|LQ=y7^k?fZv9;_qVi;S!09i+EZFGxK-V)b-99dQEQCgV7J{Uy-ui{XVJ+ie)woU53we1O#YuP~cV$iU;yx1U_sx8uXR*Ip?V^n_N2 zN2|p<&a;|-u+~2CnEjWlmOfy~^}Ml~m4r z-(#cq4k13_$CXK@gn&sjf z;~25xrUn1s?;1p`{S5C)7du)l5=tsz=F7Z)OWYJ~agURze=G4`qc$j4k#5M)I}{S-cJw4C zGKu!w;QUc5sn}!9&yDVV`uAX<*@+_avjvfHixK$+bahsN1})_PYod=GK@Sr)J>mbo z#7LDkkxDQ7i5diXp%ow(x6gjW&0S6*RHj`ew`YbYDbc>7b4$klX;rOa8&7UIj&x;zOE7Q?@kB>8S z)4Pd+lQpY-pI9Avsv3Tj3JV>$a>sMEpB;MaNu+Pk@!j~lhyPw!b)^Q5r7tG-7ev{> zBFL;r&dHRcTc4Y$Q7%8>@FOSAo{-3N#cLn2RREtlza*Kl)EoGxH|EuRYg};h2j|ZD zoCj926(4#x-wrtMq);*o8r>m=uloZ?`8xu6=I+qF$2roIV3ft%j#%z)v~nt(ZcSJzzi2{76w)?7vM; z@i>_fv*=dMNbO`elM|h`(_P71sOz|PC)g{e3&#(@B4}sc9QfH8xcg{=iYeJ{CY2gc z3%h>`6oUM=z5Ym3{(Sa2)w{U%yVvXSkzszPx~bf5hIRFD`xPDc0UgF!)*a1vu>?5& z`MPq92Au?dBxi9HJe-Y&f0QGotScHuRZ|GI3<<1;{ZFF2S@NFeH^9ruYWK_3m(SVX z7YQe&BF1lsYkrjTNM1~i=t;-cb;aH6us)SuP}7$(TWb=zFukM~eYqNIo%XJ5KB~&S z!7%N~th(EweI8fTkaMct=jBT~Uru_fyCy#sQ)Rg)uiYGfw=(WlJafo#Qp~_rEr{D- zA1}6iJ$K>}-Z)}{tk@F?KR2~EfIrZY$z)6J3mBN(8ey^trW&ibh*1Z&g3Tdt;|Oa? zQYWsfIsAC2SL?GHqQ}Vmw}sA_=^WQnt$Mq)eXk$hw#ifc7)Hun=*%k`AcosApGyQ_ z4)ZtcQWZ_Q=QF>qxToxWKKC!Hq}7e47T#^QsQ=%YzmnhYX^NOVUbD?t?Ju6g7`v*M zyZidPm`DWJJ+-aW4#`wU2FZDr#SRoN?zkX z;#@__V|zP|n(4Sm&)S(J$WwEE(q+Emv6EjLY*60zGd8+$eizNwGQ>Ug`?uENN~dtg zoR!bh4UyXC%Pyg```gNjTB`Opq4gJnzV$vZUP(M&PFkY%JT#qKi#lgA5OoY`{4l?u z!YADJdxc*sqvna9@4<|9b-4ha`)WrTOfIRGRyZRIleKLzDvx$0JB#L~NP*`}cPk@H ztX2A!BF@=J3C9V)AP7SM0H7;;0q{USV4esDBaX$&U{MlcSUCx-93CMeA|@gtE4D{W zMnrs%sD$ucLR>;lQbJxvQc^-jPD)lmR#plxB_fJdmqn^bNvg_9s>;dfD9CHbNSi3g z=}99LHE^2ha8nJ0nzF)PRe3EnDMK}RQ*Bv8V^IY;HF-&*s=SJhf~tnRma(FWqOz`< zs_I^KRb3r*b=AE(>U;I}?$yxNFjQAH(p59m+iRh>*IZlO$Ux81NY~s*LtER}NZ*L0 zXKZ3*WNBh#Wnp4zY-w$2VPa&VZ(!u0CF5)+<83UKV2ro7)O9q|4zSR&Gcj_oHg>f# zwllS~x3+M#wsbW$_A@tf)zU~JYPcyV=(~f>LS%fMM3OzFf*iFR$tJG0rVch%ZuXX* zF4i6{CO&S~{!ZrJE(X!gS|@_EWBkoCLQLa5iJ4J`NeLK|xud?RhpLj3gQcC5wS%X% zlck-%nJvZ0CC1&}*4@?4#lhLn!#2>>DS%=h=Ib8h>+Iwb7#iT`PNn8$ zrkA8Awq?e00^AmlMo)z3C;TH3)`{;fHs#b3raMRz)%!Rv6u&i}DV8yjmWp$(*7>2J=E8v#ug9uduqNvWCsBZmB7_ zTvypyeg0}wQEzKuM^jZ#Q)Tbfik{}W8=ZAGJF9PY*AHB;x!rxPtg5By;-&WH)*Dw^ zZuMO5@9ppE>aA(*o9ZbZ9qf45Q1SXnOZVXYhR%_}vH9E6FCIRen^}6Z^6JZ*S8rdx z|M_=!_xsN8&)>d(e?32OXB4pu_%4`f$~(^!8c8-}GAQTqc_3-mdgtpZiU#=c;``Mp zO%uh3Nf8*AFAu8sX?eAV6_mZI(m!};f}!oJFLdkU?s|Ib z&=9$1&OZP7UAr?o+oGy4%>Jxxf@dr@+^!g+uliu%);vVulT|zpOeZ=qRazI_xVAm$}v~97u(-}L2LG@Xd zWK>5)ieq1gVoX#j?#5$%{5lzTNUh98spi|8_$KU0t%R#gAncM}TBBG&=}K#I^GyYH z?Nj?4#*d~@(gXcY1Ydu$uXEMr$kE}}V+SuR!j9j6QSo~G#>LV1;pbMDrTeW9ul^{F zYsLNhi>QkMz=FDWBk>LLH936PM_iP+R7vT&c%e55UzF>8X~Q_7Z8o;#thc!*Wd}hAKrIy5g2r;IV57p?!s{~SR6+Db7~ z3&cm;+5LX{IB&PANa-~rqa0WIQRB+qgUQFh6&W&HX-lHBfqmjou*JE=--WJXWq?Zc zhmR~#@19{S9b4B%%rw>P z%2SEYP^s0^Gk&jT?kCma779ZYqD@*qohkmF)?G+nq`0&HrYw4w$tN*_bKZ(UV&JE5~-Fu(2k4bR9%ME9w z{dtV`3A$bBu~((shHb3CD788W3tjZ7-!MI9bS!Su{=grZe7`R}qDQ*I9O?^9En~)9 z3!w#nE9@W<3UZ=(u7g{Isr1iH+$t3h&wT1FDyK$rnRr0`YE8$m!qq2oq3}FN4h(U@|!#L1of)-B5+z zzJy1ryRrChY{Y)Gb}frq&ZlBkn}@gGR4EMwUCT0=zPZjJ?xSWa9(Xr1^{+SkX!xeP#Ky3IQ?8N z=i#TAazaRJJRITsYQ(>s3em^gz*|>TPCMm;6-Ystrp@6LLNzJLl{SYj zN*);di*`IPkt^O!=qkw$j_mC*b5gLTh0CLb3sZ+4h-iCV=*a~KjvfB5HlFd{hc`KY%x!l;jl;r6 zQM%K>1eh5;|L+B7cMmCfVBWnM|OTv7TEOdcxlPPSBw5N4UI#FhpLGhPNL_z zCHG8yYo%`!>fT`p{{j$po5S9tGRif>9bn@Zf@KmdyZ0bDH!fEI7S0nI z&;kIO!2_%DZKV3hACJjpLligw=*@>oa0vpWU~}B^+cs4HLd@Ryc?Iu+H;Vh1;eq0v zkFQxwC@JF0q0@fry&9mWWR2vMlU%!KQaAftRA9=*uVMIiAT2_k{(YE9+w88|ll=jo z>|eSbDoShat~gnFtT23cUvs>V8y#!sBMPrR^1XqFGLg- zx)iY$zhoZ6foa;e|p6u<5h zI(i7m{wlCJ4*cXbh(OCF@PJahRG*j_kHyp_GXR(e)Wad>aUue?h=9%*fP1mcdPP|CK1>Njvqz$sn!DN*w26VYj4eioLTwiK8axXJW0 z0}AW*OFw~B0@f8FGKSB3OU7mcKgMtmXjoNM5B2A78?62moig7JNGn}I=mDgozWT%H;CGR zcEh17Gmt}ssv^HU;Ob@AI;mt`NG?8+9!-VCYMtp9prZxAla4?- zAAJ{qa%ZW}+PIw#a<~P}{~I9tGsTMF;yr1{PK$ImWYdxTc%@2$aP$EQ_mH{oUlLp@ z*-k2%%QlLVK6@#};1$_*L;PDMn9K(2$K?)cmV26|WjF%*f{Jb|l?TM64NVqhXfW=? zraeTQ!|;o*Z>BE1$$F&8P@=;}oQp?-i`|0}dIA6zaGJkVyrc>Ae_KDeTnnK99oew; zMdU^hdT4i>6=r@Rnggrh!Wy{9dg_^APO<_GmRx`igdX`B7(QIC`lCvHX#Tw75AnO- z#8uM0{}|v;RItx!X~MtYdY+g)E7x7NhiQ$9#sF#v-ugN@iBgee`gw)z_}UvMk_PVq zpDd*|$Hn+A)?KeD@6KR$$q2vS(_$RJHS-Ex3*f`*OF~1G(aAb~evIfgxS5ZA%f^mv zimU)cp0OD&)mSDK48j3Kw*mcwm;CdxCf^i;h3W$eENl?A{1*L6Ao84tj;FywDX{15 ztc^kBU7WCQ0FyorTK$HR|dwI&p>F(p@(3|6M*D${Ze>8G520#|<2 zbG6cL;u&UzMg4I_7-EGu7tkAj70tcW_wJHGNZRe+#Sh||_$2UIGIn%FWQAK{>soA$ zLK+bO`~kqN%%&4R06sEx!uGQx84)`GYov&L#_jn;L`AV7FHDm|IIu1*^2L2*KNY6R zzBsgvzQtD6(+oIPt9>d+US|IM$q+aQ5dm_}AoKo7E7ve&hu!byKI+yzQ!cu!YV=?> z+cMR<^khp6Jw5I+M8pwnApivlKo6zBByM|sNba0eYD;|TVBYQ({kkh*)peft>iXj9 zv{6mO+AKSBX0HygFQm@Y0&v5;cm8INBmi>9srVVL`6HqE8L@e^2AjhN2)P~lvPBQI zfs3eyhNt zvlSIvFCSRnt1K02M}`oNyXyiUI3po_AMo1*P42aLMN#h$(1#pd%vSV7)AxDZ6gP%WSk38Gz*gbUBv-@c{q#J&&{* zP$KB*WM#iVWS1k7%ZEg89a0k@;Q&Z58}_OS`63Q|L?EQrTzkofhT_}?qxKyOkstDt zO$KK$cJ`_|;tkkb%>{3*3EhXsIPe_9z80`$DlDFY?xLYb2$*Z+5mgMPlZy-?gIw8Q zq4t+V2JNPV%?2cC$z5@fzNJ@mMVAI_dj|;k)oggRXZU@yXI`3a?HIA}^fmLVwmaqO zE48*-MI#n~1AZ=LwxiL9IQ`DF}gzDNj<gm5A7VUcv35@W;lKd%Fty_;;D+55 ztj*MTcs+KG4*`+^H(k=&f`M3W)>6i<1Rp7ouD!4?WE5 zT0a5#ZyNiO(iPES`PI)&Pui-_NBS?Gd)qeSh#_QnR;jOpTPLrHlab+I?69(}5hte9 z+|k#_Xtkl;DRm4co(l03P-iqoFNcn*SSCGTfep8B>aTVWhgGgj+<5Y{IAdaHUiT_> zV8}G2&i4J)2ley!u1|BXq|>B%?_j!F8QJ3O*Kkxt6P%s z!*0EVRN2~7(zu@SQ zx0!4pWE<#Bg_{5xz!ab!5iCQCK~q3VR3U~0R3JFF9)rFsK)>j72qH=yqF6S&V>;;# zjl^W;mPqRRsEk#e0A=;=*}PBL0R*Xtk^r;O96HD+4z+BHH6&@IFK?0(g^7L6N-MDM-ATw*PF|`&qCeU(2u;u-5U1uP#o$D9OjnOA7-=f z*s{*Id!&)=e?yQccN zBSgKVYaU73hekQUrv^2JXOe^w4rEM)GpPt;+UmUKB`~EZ=shzaaL8=ss$J1bL!Az7 z*(Wp3&oub}N#PRW5YUthZy{rsan0jP*k%s+=>mjJ#cr}iO1FihGO#=Q!<+#0DCTsK zCx}5we;1E_Rt?RU_qy^;&V>TIM?=4KPNtuk1Y0c)I7lwpxaHq=D}H7g=P>gKtP3OI zarOD)iEq(0%Q{9Ri7q&U3gi4mP2yJi0lS!fUPHKR@BTd zEQ1d*-n~f{z?lTtKeg~VHnxcjUNT!4lsoPBhk2_m>tWvMhl{l_T{jGBv%FBf0OD1< z%sQoQuyih%Ae^ZXKOhMohyXa7_)Tc1TPC9pkO9h6FpG?}UBNE1vGaVWF#)mi2=s_l z0pbGV0e$bWZ$fGQQC}on*$_8MLi_i$kF@6r_?sNU*(|D#cYs2lMy!vhmB^f_H~Btu zQyS*{8-LEJ;*jAz716U`XZhErYnH+2j5oUx#tk8wiB)IUAz^ITi>T+X5AE?A{HeUtwH=N4o z>rvls-fzvk5eIls_5JqyEYdcjMSv;AAu=goBRWu$0tARQ4nt*ZcgZUL_a89PMj($l zm`J@@!7}72-tJro7KDLevjQa#@nE)<+a85 zU-Jp_9DSEW)0AVqZsPjh7fgd?TjC_D4-eV5zfez5P!`T>M8yJh*~&*wZ|95JO`xT+ zz73r&%G+~sAY*eSvnX&{$bx>HZ(crmfjGh_5G``rlD=jXHCe3OqyC`AV+_?lC1!gM zA#F?(>;iJ^;L-{$xA3#k*P;2c2NbjCV2E7B#An=s0s%cI7lA%z*|xnt3Wa0jo!LYi zTgD!f=$CCaO%6^{CRAfKp9jJbMD`o9dE|e0J}Z5BX$#kv>x(OeJZaBvw1s3#>Sl2a zUf42a(py`~5q*VRbG1UBUQspR3mAbz2lZ*io%2Y(exzKlk3Wo7=H#m&n+7O3Md+g` zmn%(-y+;vytd?8q?F&Pu3Yt0bmnO90>#rl*2X;VvJ9|f55naX%7hPpKsZ3WXKR+Cf z7dF53tv9>T`ZrtW?TY(<%o^_5|6}6?#+I&K%#GZz`C4V)m*HMa9Lb-?7&3vouP>Nb z@v|Y)-%^3a)>{BooK@LmWJTgw!59n^%LSGt4#_}rf;UmeM$WD8uqRdS_{;UIl>i5y|?IT_K#V*V!>J#XEdAF+p_7;Z~Pz;*_R%gNo^ z47zNq&E*RD#8``lu>EW*9jQSj13zQvfE@7+9;`mK7r4EeTAkL1l(U@#e8pVKhme!i zzf6Rt^(x;M=B|ST&}TIQl`g`kM2aJm8i`oUr||}D!|tVx3z5lh?Xf0(lN@mNeFHTl zqk&wAmQ6^#<0yM{>(ae~=k#_FE!NhRKi5e=!Vwzb8DAj%YlR6>I_>kLCFP%&-g}r> zDQ`)JA3|y~m8+;07( znRS0hoP-Ll)cMVuE6$>`!_l%$n>L`Od<9K09yktnXgeeYP31jEA6m`H@8cq?K96DI z2w;REHQQ)=L#|+;O01ry`dWdRIW6SyD#{oX9(f$`bTlYv(hx$x4Z|(-fxm)iAnSTB zeVWY0CRp28JiiMoLR+N0?WG)KRNN zFUJipsCnoYlqZS?-*;xUct;>H8C>w0r}iwPws7Ll+;PX}X+=gu_dQ_g?q?G|cZc$x z7!7x1V=n=al8!dWmTE`3=X!D6CP+oVag>M#6x=rff%A{F5foKKW88t)1zc9*l?&M2 z7-t(mhO-t{hXT}awj(cR*h3-wn@BBs@9S=|qHIj)(N5FUek|pr8ffrnfVbS-5*R8= zrUM8uX^Nuv<;=p%JZKSBh&4!|t<9}aT7>u#J9b-^+johOVOmmqtp2 zFeSYdm;8gzt_`Q8*mi{my2NM|bm8ITH8#DjVHP zHLt>PmN3B4u2)B1k~jdr&3%OyDPo@7WhwX`hrQ40ktP+x2vx+M_m2RDNjP^$bb^{^ z>ByuU(gOARJLpHD#Js!MjYuoJQq~(W*PC=@x9tZ$hqpkiKQ}*Jsc2W)UE3-QnU6r5 zj@ICP9v=4E`FN&!cmxqeFqY*LQ0KXBYL0x6Mzv7>?s&l{hJXN6j=N#Co*t51^x8b@!8*3rdkXtj_spzrXMh73cW8kOZAN| z;}aL429gT%JRYXQ()X9VJrKSO%7!V6ipIQL_sU@_AG&2hz$8^p<#?Jx&$)fGamgJawpz?i&8WpMpq;@us{l9r}_^u^r9MJatOdDJmB>v zDAF~LhBwvN?kof$QH1Q*g%hz>U}I9QE-wei??&ajvWe1G&X9u$Ct;p;pvU@crmg;F z&X*BISJ}a9JJuNz`!`l?+g8f7m(lcTr!!mkUiaia#N=+?`lmA z?|~Nme%K@sNCi#lKo7>&hc^K^Z;x+Tr2xCq6^YShv5L2HgM?G za|KHdnwCN@7~2N`0kmC!QAM;iJ5q59PYZKS|Bs?`4`h1(|M=&#`wiR7HusHT9ZM)&5fVp&6Mz;HXQda z(x>xuJ5a7FT5*BxVn;uAX-?YtqrEHfM(in~gBRbKfPZ%;{KWctFCi307yC|o zyuw~el9cWh!|0gKs(jFVsBc;R0vOb2FhHQJ0om*?VK3T17Y=fZ9!>xdIYX#ThMVaS z{DdLAXpz!YA*S?Le$Pcci*GYz%8JIKPkK!Cei$JEdZi+5lvdwPOt?QOR ziNOE(x~a0wb7kw!+~e?w;1GZk06)}V_>kY$Icok;sl{~l8%(oOqKrKY;4oU4&=|{> zeLd6ghA!n4B9hfehiRfo2WkBye`}JD^Q`>SPOM$DPb=<5=%%_$B3V(r$07TUQ`yIE z_xHSqQ~rG-7mm6J@xDKolx>NtYI)Yxya{O-SCWEiF&<3rzVjqUMe2g5wv|~8Juv4C zG`jXj7oyiK3>6y|WVU~K79_)W$pY^nVF}JaJHkpajKh?~45709k7i*IARt>-SOUg{ z)4CDvz-^hRIJ~fA>5!qGVCYixyVLM8^GtvBEh>(EghvKcSr+Uhz z4|3HZTf5fgs_a%)Bu*&-I{6()^{#)&774G!7jh$zF-O9R9OIQyhXwJgiP4?Kl;Ofs5UCf1~I2_z# z=ztI7fbe*!*N6MfaMT(hBmYrf>)LCmI&k1@;Bx`3z=)L=gfx^QbN|l);DAg6in{Hj%;Q6}qcZgyIZNk{y+quAAu7G7-rPhplp$zFT zqu_EC!kSFYJG5oaj^6mxVDwsIPu+U}FixWLU@nFvVjvwvU6puZ|;HW)IT2n>8 zU4mC+!5kPe*U@LW|Bb9smF60uw2$N@w%npi)*ttL_@~Zf2sCKyU6C^CoK&s=jR6p? z3N-GfS`1e|j(l?R_rUw|5D2-FGK-l|J83m1Hk_A|y@n-uR$nt40!KYJeOJ8cIx(e` zu{qZH(8i&yC@UMO^=|-S4sI25eaEx1e0Hhng6>}6yL*Nsdv%C0OwcUNwMVrA)`F7c zITU07Xjnw#^>5&r-k&X+sPep8Ev{=X0xt`K=Jc73O-okWby%5QyB6tt#4^6k{B7jq z{aoMl1fhFx3~W;R78Sm)s^<=>Es&~94@v_Gsv6r)lL>`XfAr?tLg|~{hTwH=>XRAw z6PIZ`4{DfKCPdQFVJQhvKWkG2Z?)Pygyhb_sQzm|a}I%xVHQL1aA@O31$<-fI9pPd zpV>J|DtG!q_05%AMOisL^+I<8Xza>u*ybgRz{O>TjV8|8za%xxhK)%w-YoxK0CSAh zF1Jj&aP-yp7eo(;Y{Vto0k#YJra|S_eTxo9R~vtDCBd5(WkuacatGuO=%C6O9)&>_ z_uHIVzgpA*EYmB704dV8CY3=@yv|%P<7jSV~=fZYG>pA+^3!Y zRpYI!mNtA;n`e8@)c{+|r7p(-5;{NAB(M-h>y=JTD`E?jKp|iw1zVO?`wdGirThn5 z(H{=JbXr+{cYol)PvYyYQWG}tNNhckDMcGBVav0(RdBU_5B-NO*Pf<7 zcJ_(?hg(NR2hA`=LsfNdA0}@{biBWvhS+`#jVkt-wzXh@_W=w0QCK9~KS@`@SPXCQ zU*nK@&76MZ^}vFlDnEuOOr{pb&?d8pYN>4lpuYMURv2iUAemr;KXgm=u!hwY*&e^d zt-0>>mgDt?IDGBpOXnuc4nYmWjxFeg+h)oz5@mdW^0hyR-JOc+m0J3jtQS^o7%wv! zD+AWlgpU5UDU0}M8-O8t;WDkM#ItKDp?|sMU{>7y&$4u#F!NpOp>;QFx!=4s^*A=; zS9kwk#Yh(x>69QU>ECdCN0IEx&9sQ)r;zJEd0Blx8^amB^2#$q*2`fpm%!}(|0MMq zZX#=fDlWBVU7P9qP?aNV{VsOdXdu;q??i?_G2`poRg1T$E^T<%!8hw;Hcw{`-{)=9 z^BxW?h~56IQ+&43rtcnva350 z-^cHLcALXVl*Yp1&o5H=pNpDSa&bAym7awi3v!dyL z%lvfJp-b^Q`zOt=xa=D9c+`1+{r|pNbzJoj6uA?RJdl8`JBuua4g^*#vXs8e&Z&HCpqUHM{;@gQ!7G3IOLV z&n&p?Cd({(sW+xDb#hMqI29`axtYxyIAw$AMZiw0vM7{l*B3F>q7)-9-@_FC?3e(w+? zK3lDCbWOCj@RxAZsWO;9I$0@v4c<2{!RRd>I)qW!t5oxth2!wQY8+DrtG|*<_pSBd zsk%z-O7*XjTYftDQaorI?mBo2(6Y3MJWHZezXlj9|xl(Gvgc8`f^YG6^ zs+Es&TBiZ9@J??LM-}jG>gnkf8v(WE(28+&miM?akiX?A<-)%Wk$S(}K_A!~djmKW zhv4XLAw83pyq%A^>-Qn^z+Io{GzNe~3K z_L@VEi)75s#*`W+3U%n<&M@=FReLO%@P{?-jfMjz@py>}|g>W!(y6?_JzmCtuIXW0rjU-FrWo;*)viyhDJJg0!NW z)}N>OPpgr{G@TA*&1&MJ5G4S{y|-E1PYrjyf0Hy}p+`{s6bBJ&Q#x)UDbbX*EDF@? z@F%h0N$H^jzs!7vE#Tv9{2Eg$+#FH#l)wRpjDzCsYpp2A6mhyP8y@RGbZE4T3~?y;%E^L1jX1I$ zaEk9bR5HYiT>K>MK=q@4&VMvo{MS~uW1&NZt7{iOtoMraJDL6Ewuids_d9-1o6pP6 z+hjgt6eB1?G5R7RuCU3}`nq;qids9CxPZFwgW zIl0+-$&b2~uaVD;;WSsecuhc6qjj>^{M$o>ecwZ@dArJvv|PHn(;~b5XmVoKX?=Ib zu=O)Pc$QtadLa{2k{ey-*fgGp|2Fo93T5(7D3?#RoLON8R&q z?5z;<6jgE2<(Joz<2Ju+y0;kbdHwwUa(cVvGs(fl!{Vw!dhzDxjn6FBx7HmT)tasK zk4MMa?K8Tor8h~4SeN&eSc^Ai=TvHxpHjx<%V{o4LaE6!xYT5fIn_r_e(Br5_R+Xz z-ohOoBlEA0b!>nDx=G9+&Dm^<6V+d*rS^3UN>6&#o$sT$!ckstAv8-3VynuF$24Vy z8&8?eWSU-oI97(T2U_r!FK8uHRPDQJ6-3O-( z&YM!!q&jkw%N=@Ctk=>f8IhaYjOby|k(wv(_UDc5$M}d!d=mbl?k)*gBm!fWUw%!! z%qed5lERkO!w6+6zAwb<9XmajTsI@kUnD8-aiWr#q#mQdsJeelxy zkwJR@T-hS5R+*ds#`0A+#%MoC5DG;`7PTR+$@OM7N$K zwdUvu$GebTSFt0PuJN#jh_SULd=KV}O@iY~>q!69(d^Ws3dEuaE#({C$tyHsb#ql3 zEZoywr&3w<$q0OElf|wK-_tXVn~TavtM8zE?bZ966AbdedX4E7jI&@#YtoBv=gn?h zy6ITWfsuA;EQsNZbs2e73sa-eLgeF9pmqd5j2|(b=yi z@#dgz@#Xvb%QKHR%xGMU)^TCD`8?{#b18aqd)0p#Oy>o&Bd1>k=w!ZKm*i%N!MiVo zfxkU%J7d{i>c+=Jh|;*&s7!5X6S4G+PwRSq-t8L?mLU2UZhPlc%Ie=e>Aw{e>iO@k z^VZpip4=X#6U-itaD!I|)<%x`P|OZz`=UZEo@b#h>~zEO7#-$9G4}BEq##LC5tSo1 zjiZJLc0a>KVise^0KQGQry@5}1w3RX-uHUq-0Xg5SLSwar+tn-?}#dx{~XLNTxPWS z4BATa)7I-Usru*1>^Ss%NW90m?8kQ4+JRFJ_0ehW$pYgQzTx0%T)A&w)$Qm;mt z>A3e5w=o^)FbN~ZazgB5mC{M#oz+MBCz>_~oNRY<>pFU>8txckN1K35+SO%zq1__j zTid1&S@#z2;o>dAi|*ed{6(^Q!Ke{h1m|CrqBC1ZeCgis?GaZ)A9t7hwm#F|tXgi` ze6lvxN{@>fZB8hhTf&qVKIRFg{%A%t{eE5gXU)zZ`#OfY|11sj8hSjHt_Bdy_XAJW zzp%5sLO=O#J%b9Z)({|5x&KuiYUTI>!geKYNIc(0%^EVqY<8Ep=Dfaq<1L?BDOCh! zswl@GF03*%ES!3F=HRaD`7p-A!|TzR4Ogzi?Ud6adv&m3tPEq}UXj)ResBM{Ek8CZ zlfNzb0V1l?Ytc|%>cLg&d*sTtK;;L(^0s95kz!Gm-2q2fAP{@xIu+$oE$)$HgAX?KeX7UrvZVx{n8DK1`(7O4&ICQLVAG@?dNVlflg{r)oJY7LzL z^Wz*0#lU5fJ4L-Qf94x;IJW3Hn(Xbt_4lUdBq^ejXYp1~6Av^eyQQKSi7gRm?B+N|7gut z;_Z+lrMnVQ2>^8sJkwlHgb^7Hf)+o|?GA-o(r+rCtw@F#Y4(O!k ze2)_9)o9j8-^4=-ccjX9%EHXu!FH6@+t6mhU9yDt z92v~6Z1Ye`$t;a?9!O(rNSRu<>%2t-TM2-;)3!!BxOwV+U#dtJYN84k*k0h1T7E<4d8p+C5uQ+3( zvb5Y`LP?KT)Ha&1jy}K=G~Ez(UT>4l`thH*kmXYXK!KGE5HFnnosa4Y-_ojg9$Rj#V*xpwC<>TlEp3O-f6{xE9Z~i!SpV=^z8*t*M5w zCV0+`a`)!_?vpUz9Q$ME-zIjx-F0`PdqlC}I}<`-(xP-`&RZJ{tP#{Qq=&sh`1DRG zog#OS=vzV|`c2636yS^JzT!ZO;#dJya=>$8dO!Mk3t9vq)yZFWO521|&!d^?E?hgE znGwjk`__ulsy>S+?(1Q^P{7>6TPk8a;WP zbs=|0-bQM(%&COO?gGuL)F#B5?BB6R@~KD0Us+yNKRplNq-sn(mm{Tcp39e76)z*elM+BP1KMPS=aY2ts#nA0*5;6Tf< zruoQPs?>r1mCf;`+gZ{A7Lp@5U?H$TJXg~ha_{;TEQlMUI$j*1Z2xrUWqY$%ffT5> za%&f&3ZK8qx7xQ|O)xKBu%L783~MGXiXNcZ=(XW(*F(?jab-8s{!U_W#w((_^?@v& z;PV1s58&tZwyz(KeK)1<#$E`?`k78jVSz?x64_(08)Tm6Re)F+(bUDtehV1u$_8EE zXAMDw9yLL&YpQOt#lk&_#U8q&Eqjor94xTJA$_H)dK-&q>CSODM2jkgXV~&O} zuEwNEF=O_qSZEm)+Ch>A#0&$YQ;uO$7$0}UY=4LF!2^nV(cJz;Q<3S}`@9dz3>6}B zTWWE?JhUXvv*6(H9XaRu9YwCt^HT(sm4S@gymUL!tq{2Q7!F{$SJ$!4iF) zTyk^4g4WH=#+^K?FXjIY5Qfy7Ug{rio>(TP{ypg^h>yrftEEkfX$~6mF$11F*ez2| z;wb+3VLUSdWMW*6OFm>hfA#bO>UNcys@S;Yd+me`*ZAs@RY%T{GtY2<;)4aNFv$=$ zR*jVQN4)Ldest~p#F+{4gCDcG`BUd!FIzuq6B#oflbqFR;Eul#b$PkH`|E?5&(??7 zoi&LCqiH`LbgkXuV-CMvD|O20G^8+68gK89H>S)E8MtMqWeI0GR-ZK^o!d{U)Ibjp zLfEcNQ_mj;ttIVey1%w-qDj-dib{=>HE({vIQ>aO1pr18`rlU6#(BVKLd|Ro^<2o# z-8`b!ra=0s=;mt5H&+>1pfLqx8+>JCr7VBo?*?lwCO;hJBNbR#-FrF)>>|TCk(kb! zDAg*~S{%&3<&X3gk)Mo;&zVgv6Q6St5Blnt!X9FmSzs~2b1&uIH0x z{$3LZwD)8;l7_aOT<2=xCF9BO22|X=!N9(3G0xn1tnw`MzmGFEf6C03M5Hm?sYahc zr(7CeLsONi>mc$N%tN=ytzK5S2iQ3B$|N7Ond35Y8sm9d`!U02p30m6Fk@;AaVY}_nmk)Cto@ZGpm9wTNz^; zH6F8Ccb(jQ8s8vSD z-$4h%S+zDXm??#f6>36W6WSH5|Jt)it##`H!@tL^vbeM;hT+sMLq~`YAHMh6)99h< ziPMbh4&mUDvnKfkb=${cw^Qy!=K~(|T57(OG|PMO#7OKIau-L(<|9};H14xJx6BN? zIUaiscI+*86&1WHthN|SZ>^F(StwFdpI=$K1u*0zpcHDR4(6c(uLd&w9}S(tehpe- zTV85htDd?42&pE1=sndK(sFtKe%rriS1;G!WjLLCZM>64xoT@0wy{=fQU9v}7srrr zvqwb6<7(46*>Nh**$2i-D|V)6S1v7P_y@a>K#>oD+#V@fk=J}p>s%Z=^;m8g!{#mE z3MAZf_P%3|3{6oN-uvu@H`3@`HR1>P`71PsYnt3_3f$KJv9l6UqGl_KnP~t+xx!Bf z3KFDUa8hb)6x<;Tt%ptytC5nc)Qo1i9}#9ZDR-WhJC9r|oZ($PbDJiVQ8I%) zsx+ufsX<=0fU1J*s(2m*c}^1jdjIyC+4cmHYfVGO#$UGAjo-Y2s`~eL=%FS9JMU@p z%P-dRTYLWmuhP!tM&)PZ%S>8t+WrLow*)juIN3xkv)Qm;pp3^MBjH;g49s*wL!XlQ z8Ezf&C9DYek>1+FH4s|xuUq2>xno#k7Ld=3 zd@xDbZk1dxdtrKkbDw?_Er(drK1Pn3O$hWO$ag$?(n$koI<5N@%y;PR=$|liFZL)b_Gp%S#QyF~gn3xW&zr!^rsRI@GE~X#@hyN|mE4`EHDoUS__(0D z@E5~hb|7O1J!#&%j`G4ZdBdeXQ|;69)(uea-laBGORt*jxmrBI@AJ#q@$q_9n(>sa zS(S#_f6W=t8u=_7oq!Jh`wN+)p-+H5Eq63{t-Y{#%U~_HbX(N&>kZb7nBX&E>uNZe zyB6hWPo_iLBFYa(hY$Y~a~Hw26HDe*vgM z7<)>~U~`|YJblX%galjdI z#zzDeOpSBbzHe^~*dV$w|M>wvqH)~~zW#IR%6sn1nkFT!!GjoUF<}0*uQYVEmAgdO zZNERZg)9F|8Gbl)n6Ys`A`+eJ*+a!g-=IYKB+5*?;k>*oZ94wDe+} z(JS=f2O*i2bQy3C330aq6qdcTvj zI=q6XjOlcCK$Er!v{;8D30j3059`=?MY#*vu%^WG!ohgI=styUhNVin$RtT58z6eS zUIZx#5fL}tSLVp?4EIlQuvWSlxyi5<1I~S5{+6)0+kAp3!JSvjh^*;{rTp#sn^%&H zzVBDAHvpJvn}e2=TE;4akq1{q&h*nh6n#Km{dD2y#L`8v)73=b_9bc9f8)PKV`ZcJ zOqdKxwgBV)?zex8=3Q~G+ic6G*$|jssE06=h2~q;jma?|_N8yuj`HvYpWEX1i6u^35C;$?GoBgp^!}&0W2t47-KCYPjpQ!X=Q~QR%X4<}E=O$Yrgb zYRT%y8C{3pqpT15@H)_TZ~hjQs8J%lIz2kZ zkGC=X)Jgo!ov+&xReS1|mY-N)uR8y%ORD%-?wG^bM6k?NrBrf1_ktznN3vVXInb12R>QZY z&`%8U*;{pIQ;!Q4CNtZGBVSHeIq9(xCi^wMHspMb-ot5qY0$ynXDZ$cZ8ie*Ku75JKmYsu>WhMN6CZ4*^4> z)hjy0xE0-6@+qivRVLibrCa;wjRUCFjds3oXQIiS(#wv-{$(?@_jb|AwT|N}%DgFB z6L%QH4wDJF60PJZFw;YVNk@Krd|6jIbI`X$RWQ_N`&o((oRpc!Fo*>POss$?Fw%Z$ zzdfV%AZt{GvL6CX8LjxBNrYh6?Y2tM#ty+DH%Rx)24lH4eYt@E{85mU|3KD`DBijeSt5qatj?hkpPU zM@JZp12u%)QCzB`-gy9sl3ZJX{UnRS4n9Hjb{q)WvB@d4On0n$OoV=Gr z1h-Y4s-776%;qrcn+*1&oEN=*hce#D=^lkoKo`R+zu$nch&L!-V#h zPt;|8U0XLUd%x3ScT1UX*McW52%{wyL;X(qV6&j)jb#~zn^>7w%=L$p3?y=+&!Zk( zMjP?CQcjNWG;)Y#fJT`I4nhDn+dqF@9l#Y|Zx{Go3iED=u=yf+K{t3C;QNPAcI|+% zAQQ4ElR&#I0iBiqVk~!Edg;wU874srXZi=b(QocsuWF=iWWz0d#QVf)yzU!aFc&_v z@@Si#>CmkbcJ+tmZKm-V}56| zJQYuV*uCh#9vw8W5+;FAjfNEXDbPwWDs@*UiW|%r*n0)MyYu~E!tm#_kAnJKplH1B z8LpB1S3@5~Op7W>pQbn z^5ySvdY*!r{S#jDKSb9hM8IgHJwNG%0S}gY?lE!e*O@Zc67C*V|GRuO!oh$H-dNH@ zt``+M&M{Y%kCwRy*6v=v%iijls?0rqRMsjLAr>1nr@4$$D=7ozmkAou>*WhJ?JD$s zHqr(W9O&mZVBA3uVRdt9EGB{nTPr;S14YCXMDR+AG!k#i{lAl99ExQuD$ zumAkokdUb-3%Qmp;_QGLoFkjksi)kP$_4jIL=N%U4o3xlm8M}@GP}@0DQLdki17q> z7lxO&>dlzzl?Mb|U{udb!qJZ+lUzr1C4}8|3o|*0nU!EGwb*Z}iHvH4K#}MJQZDN) zyO^{at?%<0i0%{4xjGbIm!Aj~KmbArA%#+0?80Uf#o#X%ePD)^+{tVY<UNU2pomu)hM2XFwg(1-RN8Za1LUS_Q+_` z)@TbPJ)TKP*3dJA9tu6R475p^qVkz$IcMnF^Yjr{IH5J!Y~@q5iUd60TqQMCd>lFyCT9}9f1rI4dNRdla#ExpRFT@K!`*Uc_V-l zN|A{G!UsSKb*Kdku?4_|*-1el!p9DRF=P~+j5Z*p*yv$oX=HEpDTckqzYnxIxyW3d zMwcSn>L{HuY(K-YQ~b%U8#a-P5P(*5x+hl9KYzz;3{X8KwDwC|se70fn>1$0TzWN^ z?k_V{w9-e^gt`x(N4C-{nPzga*Io&|MQnW{6ua~v(-!67Z*ID~!OH;m;ivYvm@Quu zCv$h*T=Q+9Ofrd#5fN-BjG8C$uI`JsdOLj-T`3dTmjB8$v%4G0w7&zH{*9?gWY#== zO$*du?lNfMTtjj>HUrpyO+@f-ccQwWAYD=hV9+DNOE#hkbqFEaaFF8~Ma1pm1&W;t z+$83^!pLo4+Z+{!|g3;zMJ*cN?WR8U|e7=av(v@NGKGcQG8bJJ%R&& zz+_HplBY)A_yUI*Y!3#4!$3yq5ZO}ffHa0ArgSf~QpfwgcWK1yr(3{TUYJ;Q34XBqeba5jLbmr|~AO;Vf@XPg=uqWO3g z{2YMK25Iqn++&d$(S*KUZ5u?;87p640WU zMss&wi9(j4?66P!E_NE`aTKOS6=rXTIWmiGn?+xjB%F&KnLFu7$rC?{$Q>WOV{nX6 z?3i^7G>1=^LSX$g0=bMH&>+gy3wm^z@H~sqR%{-lqOgxRt-=o5g(!H@#TV9i>)=*8 zxKIVlV3>r9xAz~vSI+49Wx)cZ!Id^Oa0;V(8skA{N z08-=k=-_G}_Bvf%vZ~9qAGrrWKOaO+0~j6y%Wy$l6H)S7Ev?S_op}$Rbw>yohydM| zFotArc%lq6muk#XG>N}Hio671dvr-Vr>$-mrI_7Hn7HMMzGDxW`F^1QEPbO5UY zY|T?tXR5wFzh!%(#~#v#hoty9MBOn&ht^<%$8)aKWd=z?dX7lrPYDv$IwDGpA7Ri% z_4GYr%Oy;DjyUa&>NJDtOwbsnDuvLB&6LhNIw2&CU$WHc&g!B>^=>Z!-pp7rzlGRo zz#@MJl$E{+NE-S4eL^WZ-GjkYQ%zg}mpUrCmqwmfBYQ&(n<9ZAwq%1Cf($Q($OI70 zVp>kCh+QBd2*3!KG%}ZbS#O>2`Am=Ie%c%N7vcPsJ#Ya~iu@PXRbYnMwgYp3bGY5K z_aJ(yCNvH-%kwYy1kGCX=I;8uKMN-F#l*S&uSVg_z(YQRS<6CKZm@5&vx8xZkeD_K zROUcf1H|0#R%HImJRQ+vLC!7Ok70gCPCyu1 z4WaiyCUk#N9+v=c+X=1r=O8k`B!qIwT9Ktzd~gRy+^)A~iA@$-8?K0Hy$maV3Qd01 zlC8i0E{B#UwtOx&8G>kcT}b?%%#)w`a8gugyG;RGrW0{xSz`1Kj$&zR z@{Zo*ha@wOzT8uv_r%Ps-yT;^$zOhDy*ND|>vEKpV)yyov2q4xA+_a4XA5!ros1n@ z^VZzaCirjasFFeZUaI{xIzlKD@5hWPW!lU$ZF*a&3Ix49ZBdVyGNi)KmtwC^7Ny)m z5}33(9Vt&^EMd?(_3*LB$lSU=rw#amToIyKMagH<05h5dJiJ45zn4MVt|4y+E!6<+ zCFn0PwBBS^fY_?;2l1-Z>V~-Z4-Hwcgw!oEjJb*6JFZKACe;h;Dl_afh)ic%RRBq% zCk}}yEkzW_K%AOc#s2>mDm+ca57Xgbhf3Fo&2Bs?T{mtP04Ck~p5zIdI)3@g*<$Va z>(OKSqow1xAQ>%6Ogsft{5)IoTBLKN2!G?>Jhx zCMYumGu(^yE627#`&uBvU4T{v8GX)Ng7h#NQxW_?I!}WcQqv;2CIh+yj)$=q{$1K+ zw=D{X4%Immzb-gK47APR80P|$etVkegw<=N)qvjmMk_pDmAj$Ha#l^gEdDL~$+@i{ zzDq+KeM;vt?eiDoMFHHN&s3jzn!2-#0L#mzkn)u zP!4~cu6v2D`&Vh{IlCYi!Hr%cGf! zH#?h9T-^KU%|V-&jRxap*X9o<^!yDMeF+s^t=jT2%+I8MdqL9nVw2pGU$49kW@f%c zE1ofWfva1&*fAY7M`Py4#rHr)K1@@uYt$+=Hf7*CPlenA;iz$F2#P-Y-nq&wI(>l= zefwKc-aB--J1xJ}Z5#NjL#yR=6B?@tGTO_BTfh(QU==@1+8s7~v*}>z+E&TdU8s$c zunbw5CeCb+a7B&Pt((?1yLvAc(_7v?q3FzpKh3;CX=j`Lb7$i!9blKgc9y}~6VklP zV;FZb{$0$}8qEDwvS&L=f9j01cdjxBZQHO9)Kn=L9-tuvfa0k@Lm!SfzluOzhsq@* z?EXu-vHIMdpX63KlKLMU#!%M1b|5mzL}nXVW|-`x*vffGwPoQu6~Xs;zuO&=!9Zl| z{`BztWreN?Qdia8zzz#K@}(xj0+UL4ZYTp~bG|DSBIx!JCCo!pbuc>-AKt`2Z>T&i zrQQvqDnavV(7gLTT_mFxHkXyQ(kJL&{(fmY@LzfP{PU$AFT#>=M@0fOujKCWis&!c z{=y9%<7-^(F2jp3*Sk^B>+k}_o$-iVPWy0zq7>D#BP%^+@q2V;$BJGlVa~8(rGhGi zd$PW2&Sk0{sHwyMaja8|iD&$Wl}Z%q2~okPby}g)>9 z`>+Bf6=~zL5p`;I{BDGnf#ykBt6d0H@&%J4FeabD=9qV6GJgJ*A zu%BlKm9~MW{#Jl*ZtkNbU4>>|JK)BDjV$rWf=#&AUx<9n6O$1bQ)a}TvGlrzeKyd; z&#w0=uJkjS;?m!710NK1fgRy9wHj_2p6u?D^mqm-Furq;JEWXp`VeVk98n@vYw2N% z{81IiLmQ6=eQtPl;u+D%B(&AiG8PI>K_q)-d<)gzva(lL|e z|IM0`1D%&Buq)#dWR=b*7%?{~KKh>3<*R2W_zJ#th{4p(aOgK(i3D$&a1oyUv2izv zea2qb)jX_6A-91BK^i16Fi%XSK(3GCO}X7S`;+p(nc#iAznKc}VqxuoTQ|ZnLZrcL zTj{of4hmBD=yWi&d*canWB&B_?H5^^Z}d?`9qa=y9UKEguV{@arCN%Z0UT-271kCHOuG(|0+E>)0tJDDX= zFglGz#xZ;69_%AC)q~*f*(WP_T-?mRO3xWr%bMk8x8${py6M|SE-*h`6v@9&e7RSA zl0Qz4xV9^0_0^?c+Yv%0?knGvW$`woQg9v<_j}BDp_xfSR!1C8u)8#N?fUxk_}a+% z>3);VzUFUD^8OS3X;Sdkv*j3kh)qly1SP_Ut zE~Plwd_`3*fABM<>N^<>j-UTb|Ft&I>(S$y6)sh~_HAbVkJFEXgQ1L$syz6Z-7j?^ z<}0kuBP^uTUIL1;-VS5rRi)!?cKt?tDs3YFSw^nV5&g z*WONEd@JlPL#)72gaeKhF#Jap-=ovt0Yf~EBV|+Id%mA`UC(?OQ=)9}fBaUosxZOy z@7LE^f(Y+qT^~Eo6Yi)G6R(d(l3LF2h)ks*!GuGW=8toMEWTZa)=q zXLRv$V*vwgN{8+lbnC$kUa87i3g;DTIg?y;G6RmS7rK0@f)3eF0y}unQR0*rJfg`A zQ{^oGtm;P6FW@o-n{OhsJTA$^cLdN$>XmzweCE{xTgO7^=ptT+STwop%rhz(c=wiadw&0@TGTj zC`U^q10}`@IIYNoJz%q1T^jI+fbl(jxJlS`5yMf{BMh%?90cc6+VBi%CxWG1Owpp2 z*b!?A8y)3q*D(;SS(`EqE}To(R4`k$ElQNdBZ^c=R{)I8QWsPC0FvItHO^L+u9w1S zpOysc1G;^-F1$o9XC*l8Ro)NYtM+aQwyIiI+t`nEX4`%PtV>!10S|Y}9Zy}fQzXzR zu6ecl0_AKHQR z7f>v(1Q7*to^c~Xk!+|1b}IO$*An6ET{4qhK(WoHzF6YZF!UGv{(BK007GaTq6jJ> zyL^y|=9VO^tk-Ei*}Gz&8I5_}b1t9_?oM7)pH{QXE|4n2lO8 zDkBx%*RG$15v`>;{w8_B+t!2lp|;YcGq)J2Be29C`UvUl*Qnjw-5pe>*1c_oxYd+} zo=YFJ6%^6pB=^U9tXQTE-g3H)vdm#eoMzIv=)^%&3c)Qj&py!#)j_?~b#>(C@hyir z2i;4Uq*M{iv`$qXeTIwhU{gy{&|_jbQghF2EbB?abJJtYS|^gY6&I&A(9nh- zXD-|E6w~2%D@(NbYSDv$$LXYDaYf#z?DDq7nTM;qUW1a||H66*v;{Ya-MiZ2TYK>m z*A&9uzqc_niXHVJd6y)~<}(Mk`nd|}Lkzb2rb2V~amk&9mg%^=$Um!82Xs644+jGl zD_4pDf&@ai3PF-WPmEGcoLR9s(p;_j6H(@XCYG`<$M9cKt99GG!L(-TV6YGr^exp1`# zarR5v1z*&~xGk&;h2Ou0&JVyh_QN^da1Wsz{+@!qxHAj%Jn}x!Ba>$QJ}@+$mcqbD ziLg`-Ze2MlQG$Bfw@fNTFBhTTcw-iK=k?MyY}dv_mBN#ih;y}U@EyS5Y%}md%_N|? z4xwaVE;8Y!q2i03Ugi?fyR8O05p3a}*ymn;gd1QJpagYXu={=k!e z)G3}hv9&^l`V@`#qO8@#M6npDbLE@QNC|iBD!8mw7m2xt8gUvRkp+??`*(QMrtKuk z;z~@zfi3v-LKEMmPV%I+-|W&#JQlvCJ9L>LPwW)tWrV&C6kd>!zrc%F#U>kLUB9l| z0AT8LpnW$oONDCc_B2FHXGjns68JJAx&{jJQwD7_-SD}<+}(6T&f7RIM%?ju@cj_t zI1yVwhBHLlwjd*Rvth#|Sad>-Jp;z1SlV@ac{6x+v)t!1R=Wi&=ep3p-4`rTqf?~b zzs^`hgrM9ec)u|guqAzXs6cifR=)#^GRBmwjt`Uc!hz{L4v-T!n9HYvam zH0Q|;IroUkkcX7gkQ=6jYaYuI{i3fO_xQn+I}7Ejx=38DqUtS?}L6x|asUlNUh-41RdC<~Zy0@EXh<0K4b0X5IQS2f1j%{lQHoa;F3nCPE|%@%@Zr{i=h8EVCwcQ3SC;J_;WSg}vx?9GnPa z^v6P+Qez!Z2Ljkpz_f<;tA61U=vFJ5=7TZ>#M9al zuVz-By|VH*Rz%h%OC4y#j!pSh&y@;`uB&aR6}4Z*+cOXC%|5jG4Aevg@jV>;IcbyQ zk{#3PCM<>-H9?MpI`$kw#Smrk#WJ+-mgs?y3-IVID!bHcb}_Xc3kfsn^V*|f*y4x} zJ53J3-Bq&uk{T1-_6T}`Y{s9kvd1q6j|BkmG>+VU(C)MP54%4I6$g4G=gDv|=PM3@}Uou5ry;+%r zviFLy&-%Dj@WhZ=7X#_w6V9GdOHP!>9p736k034}Q>2)rZsgrL{IuHDz(u|Le-zz| zKa>6c2k`6Kt{n|C!<;V+BW8{{cWs7MXe1%kNJUggqEy#TPN}9Sp&Cha)LGrty*ZR9 zEjs8tQqkQ(I^60m-`{@!!5)w6b6xv<-k;a|`A*ae=is(*$Z`dAi;tNV))uhPeDCql zBSsr+VbH%_NkbCHoR2T(%&i#3UUE+GQ6vOvK+dnGtd5dJk*saFlkj9U%-MItA~ARp z?~Pzx_J{haoX)6mg(#sIC6tSZ?xy$-jow-NwLVO$XGwwg+q#qOT z!7dlgUQWe%iwF`FELITA9EqVCNHR$<6A_EGn3PEzEW{R~tiVa^t$6HbJq(VD>1fwa z0g_h!3tGW8t5#E2*IzoTy`+DCMaU2Gr6SUXC6^0!klS>Z56ICEeW$yN$S8vAZ95v` z*%2!`6O_4fS@Zh;>Vh6ktmmk(9*AU?E^#Pu9@x-cclJvCmi9S0?J=40ugkH+Ov?!Q zp*ZEq)$O=Cm1n(zv`T@`5aCye^ls!p3v-ZCCB$N4Pmfp>Viqkuik-Pcj1H7y3$y@5 zqdZ&zqA)Q{0~Jzru;OHI zsE81t!SFN~FRcR>hC;PK%~>B<1RibxiOJx!!*&~(1b+Bi{&n8!)iDaNIjjBB384RU zJN~qWbonGsn4TVWoE%X?Hfy+Sb?5Ss_HvS|*tBf#vf~v7Jr!wWT=pb%WvIg=YDe<@ zmK(isgcEnW$Fb`r*u(wpexgKQQFp{q?E4~Vgc7e3>D6mUSsGFkOs?0EZzyoN-sB@x z{HiPFNY44gBXer1=DMvk2Umgk!6}V&#DH(+X8j^scHsW25u2Z*a0~YqFw`?H~YcE@krYHG` znZMKfvX@-yp;zt?I!i1%KaFu}$A)Tm+~~O5@K;ye4s58X+jp`$EIV;L6C0R=^)kU_ zy#|-GUp*!2Z_toa()1z`yqH67)^Ql3xd>n!qL7FCIS&u2XstQ4n6W*x z)Tj5W)KX!?wji9{$p*0*4q$6j@;jGbElj&wSTXDFPcKRCQptY3k^Igp#n-Q_>ue9E zZhl>ycyvBi9)bzbLhG)%#OqMsNwSPfQ7iFDN`liuvI5EW5$5;~&H3vshOS?H{n?_2 zby!aZYjc&;P1ROEN$vGSaK+@enC59He-+g}20${D3 zm-5nkLjZ`&S^i-Lh+j8k0FVYb*n*;6207ptE{Fq&PenwWN;H4NOr1sBzjPRsFC?f}8CLc!b z_!@QAvBeAU+GTxr=8oL)^niXjLD`x!?Be%|$Z98{^J#-`$ojCwBRIeC4z~c{AP3cdldk2-p#oQWx*=U69Sc;jAnQ$N_H5+ zDe)PxuW#)(XEcp98BXO`V&39d{P6FI0FsHO=6g_$(2gg{*us{DznqwzFG zurOx>f+b*c#`cG<;HL3 zR?n_IoV^YE=I&m2y#zI&HAfoShZMBFynhvMU=>iTt^MOru+z$aS^w$#e_j6c(y#b& zNA3!|{#Zb*)}?xNUu)cIyx7$%4XUx-^cxA3=`C(zvZ=ciAcoH~wN0d?+u4s)O1!t_ zKl%DP!bV`d_JK=yMTs1b@?2el;VX7zC+NVLPN=dV8JBlH@QIFIM630-9ftB{SlzQ$xI*WF}-VSomB_fGSitl&tPko-d zy4+#3LXSP`_>3UDK0d1GAM@4Z4PIyB_aB4X7GF;7HaO6KF;q_m(1DHKXUnTiQd5M*M+TMfW_t&O(Z_P924jW+z+Qv zBXY!~-{cV~b#BC2CiQuGhwF92P!%9GUDewtr>69d5Lldk?BD)SE`CpNJOI_l<-K&T6gen4ck0IyJ=D88iIZlxrsS*DjTupQ^nhVDsqar9WjwL0smS8Aptw+kmO&OiLBBAom zl91-%6?Jj2k+?i=UM?>j+`rbYLhy+?b~m#mgVRYQ2yZ(R)GLv&28*%@u-a<33uaCZ z(q~U}EmKN1g;7^fOudPAkp@iB^;cUL69%Q;#hyKT9V;{;dmV#C!xc^kvxifi7pWMp zZ6NO+v#vg<=&bjVpt0KAVwxnXU{a>_qclbbp~TZ zX;ItWwVON2T4~`q0La%+6INqrt3eRmGK{ka)Ho}Fu08`l!&&5rO|>I>>~@5gO(hEW zOsuOy0xOM)uFuy~EY!q^N8cgA1CVx9+3k=>1=gm{LX57?z3jAUZ||a*B|&o$ESUpf z1V{=^0it(ms%dd5tR+K@-`DFwZaub0cT0v1ojl#9Kp<;|m=GbvJ58J>+E9-$UJr77 zXlDv;0OXhCF8`dj^~xd*;C2?=&7_crI`7+VQ86ZYN& z4Yrnr*$(_WXwd<;cq+M+HB_m&Hh=4AuJ~B~Q&|YH6K|`I@IcPWseDh!TO`px1k9dC zuQ4kpb`jY*V&Vgsx_PF3fOr+gI~Us-m1uc1dpnXdLzihnyKN%uh^};urL=<>*DH4i zp|T|uqG~isa+iE|jO+ZwF2-jJu1m?v1CJxP)8VIy!Ai)H_#$AoQs&*tHLgQ+am7Zw zZ*{8PWXeJ8@wo7Zr%vy;pmL4Y39X&hAdL=+Q}}f`!E&!}m*P1#>$mE~&WcjguL9j) z#}ixjM&FFu_hg0bD=n_}p?Y_o*M#-S|4KuD>zeETgi)?#Eu8#uv!!ZJX3^?E>D1^0 zT)-QbBB#tDTP?TMM2Ufexn*7qHPJ?i;+uqI1hO})>cz2%+%j*b9XVzk)CbP$g*A*& zO2=p9>{jv4?H?rgrM+^d;4C5d`5AhVu4}aQ3z3`x)ervy=AD$I03UGlA9=JiddlDG zS2wThq0y9lMb24V*_g1?^-^*Hf;F9S6-nTB%L$jz~Sj~7`8<&2AO7zWA5m)MR;A>*_7Bq8} z3UybKv0dt>I5jDAP-T`c7Lh($m-v}+akC50o9tPAtu!p3d{}eS`9tCJp6vK5_ZL1f z7QA%2z{^^%&DZ_jwNWhW7jhtb zlD$9A+)u}EJm*bZ7Iqm`aO4q{Y4pgNNz(`_Mt6+FMhUv%OK@u2!WFd-0I5NqLToW+ zM=dJk*+5^);)GZuM#pNDl}^i^&vqWIKy*{2qt`xRmKgdY`ZGQHVkljEPEc^pRH6Cg z)xP(|s$SW(Q!(Lf<3Z4w_l~Nd|LG<~KPtp|`14H+VjmoNNsgQD_Hi4K1@4XVbRNp+ z3ytaJ8vYvFE-Z3L9GHrP{`c3b>n>kP?~RA=h#VYW>9L@vOTxIIJQk?!JX@o{`Tn+N zbWuwNvvT#Ktx`v~26gLZQ{$TxRQ4uRHKVf~$5wC+mjjT&Hzk4NGmBt1`Ot^_MHZ{U zt=5#pQpI=;(flvJzlVbQZ`-ZwKZ{2j^>6=mkh*{GbUB&j<%oE>_sYBwOhFp$PqK60 zKZrLXy+7*V+ILxZsxh2AM^OP(6`-%=T2MkBk!M0o7+|UEuF|w4ek-mo<=}3jA;UrD z%n(JY+3kGV^G>yy3L`)6wRR&C+U*~9i{1%w^p%_^47ZG{SDk@H$1cJZ0dYc71= zEsntQJ5J3>UdTJmg++#%i)s3GQxINsoO9nhA8)*h5(5yS&P%vnL~5L z39jOVMn^&p7&I-`YX^+lrMzj7(u*Y^AeZ`=xlrt1i%(0%m~^#decg9x+O+$sQ%;p? zePhE~Z&F=Hxvf2#?1S-by|$E&TL;Td7=ur~ps{-LXw)mZb+%Qb{Z_14U*om)b@0Fm z?vtzBH9p=Y{Sgm_(6v6e^--rC=#jx&Bu{6#Y$(Q7d$r}KM9vXi*>n<`C&bSy!>?4} zGYhYbEV(n|QGMa_)Rk@NVT{BspK2evC`@M-S`#79M2wFxu%wc(M2OXCZ@#TKp1}fQbh{L5_}tl!R#T!fFhGdT{jy1^jCt zY1-7z)$RHg`sa+!-!t03r~mYvL@2UV zmeGpO>eXZo3o~2s><^vweJ-Etn75usZj~AO{JxcV;EoRygom-nxWv0do-jNkOGuP@ z5#8N~Wy>|PNB?jhMpA zR|js%uq=99<~t0<2p@?$&fKt*BP@suK%M{;CV+qnnP3LS#W8R>j<_|oC7H?+!wq=u zc>TzQ>R3&Q$8GEmxCeJGsQ)Z-+o;LptDn^x75;b77GHkePbOg)aL}q;iMZ{PrOTA! z@8gHX*HCkY*mFw0a*@=B;hdbTq_8>o6xzlTUdX%DE6;D_esQHg}X3CqCVl@w8Y|TC4)?%dULh zbEW?3(~)P9+hm4sy4r64jq5MiiK?ynQ0`X`=~th=!?%D}H{z zD23ADN|{EDIffJztREFywt|*a&{62+?T<;DEG=Zn_gE9Z0^$SJ?uJC%l3rPnx6Tm` zR9N@xX)d$(sx2ysCaB5&aPVypP%BS*7rpFl%@c}9!tlT+6ndyn`~APW&fkv1Fc4F)u=% zotMi+aLE!F;wRc|__#YH2jip>lh*Bvn=I4)PHU=|i9*+|XyYcawfXo_zO2Gh#Z%og z*97XYV^f2eq()t0Q6OoI+vOyZ?5>ammq|i~rNNV_++is~mD%mG6cj>&$wxO_aGB%v zIt9=j!7j)NF8Ph)O`>|OVDQJo=C$CHr+^QmBt|#7Z2TY@5RZI3N0|ohIri_3UN$B- zu}DwSo_t-i^<~hmC-y=rr&03pkK?BhbN}8WZ}T+QDNpxZm4@cXvJ|CTGM=``m(Sgo z-c!9OY3r5I*W%=!r*r$--fp<@#^J?uo0O-+6I5sVn|~Th5zb-(BDNG^=;PoO6R2Vcw^mc4hnEF&{18CK9VarkSqNr` zk7vd$>wOpZT%4PwEXi%b=e3k%vleHmEVE&G3JV9i;gUij$N}=rxwd>7w_q6aqz|xC zSW|leSE_gk6VD3cR&A|Wc?UfaC>~guF^~oN7rYGL`10My1v^$P+qvtBd54(Ck;kEw zISIPF%d+T=r+FpVnhIg;M`?Ij$+w4uA8$(Mp3a=V@9F;f3yvJPO-Y!o_^m6Duheq5(A2}NiW71lKWj_*&&jV#QTu+_2 zH@5VFMK<{E)W^V8FZXSIx%1K~-RHxTc4x?quRA0DfK=Jv{qJRw8_ zA_^I=^~u^{JaI^xqQUVMN7)EfDm$_ifGYljxYHy4)C2hcIl8o_mZG~Y+FzJY1JmQr zA))vtPux8}m_E@+y@y~5Q!+OzF;w8`?Vc#%Ou`U9A{+uu6FtmWEJIipdC@NXTEN%t2&-ZgHK{$#voJa8am=gk!+wICPB4DySSqfxg?~lLg2EKlLS<;;EvM`-G60YrpGxi~Ul!?vR-)W%7(L1p(+fdH zhZv0&y$qK+k;ln-z@wR)fP=^7fm3VuZmn@zKNWOy#fy?8YMF(?2J0%4I8Z)mAI0x^ zIyHIfbWH{JQy#P-P#<^CqLbu>`>HbM9qLiJbVXdjy-v&}Uec4(QDW3=-z+x@{-mt@klyQH@NPwRx zz6>?*Eb6~aB6j};mI3Reje2e6^(cunZbt;pEy{1eZKf7JQJF?c(jUr}Ov>E~)xP4QHYmPlABOAhkGOGh(sISI( zexXu1B^@|qSS@lJn=6BNZX?$1%i1lyqwktN+!PV*EH$7%C74yTryaqdIiF#*%)-UXqgMe#Fo#9B@_2mr(c zChiTD$ebtAc5g1!Rw?Zi)x2nzDxc9Xye+B<*PqkAh=4aJrZy49{HZRSzE&=D*%-A+ zj58nm&V`(POrNGw5e1TN#L=i$Ru#Q9$5Mh72QaAx5i1>z@~Jq#s1IL=zC${X6r!pe zc@`P9jX2uD?529oEvwf9o?qx;cbt-yRX1rC%*EopMk&;`O)M7PX4Au!rQCv%@=`G} zN}{hJyIw;(DMiRRs>weW-)StPx>r|4SiUKJwRLOZg))b?&!v0Be}CGmY^MxfSSa)V z;?sD}?EU1z^PxT_x0m|l#}dsg3)?}w!}Q24>mxgxW};oPeYsd7?DGXnm0$SYMf4fx z2n=jSM>_Yw8QN}EQM#~;5|}7b6E^SwN2t_#-xDk;;|Wr$*tjogm1v$*(rP0n6}4?v zs-VC)B8+j7^CuHYVe6FL<`**WPdU{;&(rhlhBov&9ne-rn1^R`ULT2x*o?^^3@6uY ztjpW~ZPTDQP(jA1(avMDyFL4S$^`1}ElZX&v$p-*%G@K^uBfK?OgBR(#oH&plv=O5 zrS}xG&^9QEJwNbdLxG*obGg}Q{J%82K9Anq&rWHceuu;RkP%N zEve)&=fye(@@0+pFa^yv;fPFs`RWJ+EYob_jd=?yb;Y zMoR<{on;w`oVDiZ<&JJ6rE{hS^EME}q)z70~}l8$R4alw=M%M1Ex#R5fCnO1pzRGAbg*hl)O<>Uk0vk zj94wgXT8KQe9yPI?-D}%42Sk1xCj0Qmv}et&2L6!Hs4gG+)9utRzS80TDBHVqppS_ zkAk!MVcAlP02shqcvox?c9}~vs{F`Y6iCq;>~7@(ROBZ6iqUS0Vg=t30K9&QEuQ3* zkadbGSEcf#`L|+eb!JGdgu0Ry)^uEC6}~bH3+jTDZb>r>1qV-ZD#Xk4-7dCG6f2>j z`-dMyJ$Xr=Ua)5UbcHvi5LwKtN%&<_ANi}jY{@_vvh=Rf?#9#pbx%@jy^~s``Ek5U zS0Acxb&9Y0%qHq6HsimEB|)P`hK1QY{fn(QTC2ih%{QT0l|RJ16Hsxc@aU`~9639} zh%^U9C`W2XS8;R0*$j!ERStqpM4D&UVrWII9vBwyG>un+2un`25T0|p;uMw6M=9x? zW1N*YK}?2DDg+^rqc+cNMOGA;nR}VK8w^kOI3E|5a4Ag#4w`+=SLP29t5sX8j<>Po z`!LN(6}UNnGPfLcI6kGie0@Qy%sZm-;>;;)&hhh4dn4DBKb*rFode#~vKLPg=LUDG z(jInM@rNHB^6xm+UwmB@HGBP~ zg05LtsXN&7Vn4f7tc$tL~F{6{~`zLZcwIWE-i>$zqr#pz8BbwcW(F3 z72%A^bXjxPn@WB6H&OG|O#wxi{av$v>f};mixJA|l&5b$mvK4U|K7vl+u!ITgx(SBCn`1$>0lyz)hk(l@pjVMJy;K!nC`=po?UXkBD(!SzpB!y zlKUc6>N7*cyyf$lIxp;kN@pYy%TVGtdiem|9>M6-kxzUTMYEKKs+J20WFNCH> zi06oT!zx7zU=S~{rlbHqR#*UC&z*M@;W}IZCOmYRM)QKs}=zh z;pX&V1h2z2qr@&_V(%!aei>krVxXU{0*b2|x`FEF3vj^lBF~+h=&u$Sz1lMO?hutf z)rc6j6KB~84ZFpb!JwtK!7Zrtk?87zl;nJEH&P<#0%-~MebM%M| zh2E;|{6XBfiioNJpA?=LwG&FrBDgNZPW}7_AYNtjT%D>|22jl{a0s?F)_|`j_#*({ zoIzf($p`=w0sTr9AVdM7%FYCNX@rQz4-!9^8`ny_Go;?NIto>yOZvHJL45xHh6YMA z6wNK59hq40rER1cwk_^?ZuKaY-a+FFUm2^!;a=2Jy&zoCEvS^8=7*sZga2NUC1`o} zC5L~C6;l4_;O%7DPgzU{5}hNq7D$XM5mOm&RtL{HALFd^Cy8Aity~KoSo$8rBuWH_ z3-HR9Fj3-?80MFOt>E(L)ylop1rRHu*@_ZVwU~E$b|;&uO~#o(MzJaz+k9C{qY=w!f_cC{E5+Ucjj z!gLgxNsLqXA~-5S)SyHm!T@+?GSL2{y~`xmOwg$#;MfcvO@uSA#IR8@J6mcnhVh#O z*_BfHt}Z)9XAo7wyoU8qsDr*?;9@EJ&UjF^n*CeqQU+w#mXgQR&QVzZD6FkfQkE#L zLD87naj%J6YB58HsIN%zNX;du?15El_jQK(n|xmi@l~K@;^BcmcH4i0+ue9`?_i%& zU(Bmbw@={Bt1$UhuJ9uDoa|ZtyGKQO(-u9fwi%QB!vu9E_Z?ywhlu6KBo@UOd$%OR z*sl6Y-UJ$MqeG*f0xmk*joGQw<6OGAHli*+ZD(*1&m$+yvO5LD^DfYnT7HDyTBdzPQVOjAgFn1puAwYtqtfVfK{H*$j+b zb{C7Go;}H zmIbavtBRxIgk@seC@HfTWD3JP9&|eCRK+rhbHjq8_6Ph#QkU*SE`gv8pJ#)r_LQmo ztCqh*cvji2ekkT#5ys=ekhvBE=Oes{8G~vcJN>cFIa-W){4BT7~)Vpr>K zn-82GL>UYoVNz{QN6hj;l5Uh2qGgPu24!MnmRLuu5+R9AC2CVCCRXpIX_^=;(6}44 zdS6{0t=4CzF6%D8CRv>NAS?BEe9Teg@0oa9E&AInnFM63P6NdU5q*fq7Is-*8{B_` zOHEAwUe)F%kN&ZB%$^w*^lhFv>cc#K7n|QTcU~t`!1LHyW7hiWL{;`Hh1AK!voda75z9<1r-aKJda`BT~ zHWOpA_K_VEV?HATNi0JB#{tp>6;6g42rykOj)E@5v>;^950nuz6VKBM+ zOtCehWDKLD?@`?+~m&P*Of^Q z^@qw)z2;{G8S2)bx54*u!JF`9#G8BE=JGbC;8_HZ`Vo&XT*@jHarz@Y1I1G?m{01( z-w;eX(jX*}=}|_@Rcg1oG-NJ0#rsR_gmKuHi3m@<#4jX~r;AVuYd(G8$<|e5GL|pL zR^_@!3vu_uLZLJB>1XniF%EuB)*Tk#@bZ`ZGci`Ug~gz4RF}REppJjbf+dNGsXxh` zHmKZ>dEnxK(T=Ed3wanSTt15PdA`dhb>r|koX=vSKvxi^ zRfPkaHtu&{vM2gkUPI1e_B-4%E8o-)=R_&UEt9*O06dO01Ln)IK}9|K8OU+ITHy~k zy&1sPrZzKGF)k^yH+G=v-wuf$~6gGf@f$FItxd759D}kn8`b(#I{8 z27jxtTcwyTw;gmOn)2T*fG-@5aekFjCLc$s!=TH_Nbo%mPK&)aY z!1p}ki@j(E*=3(aFs~=izVh4v##z|+_PkPJ`^X+8)y3gW{>V8VA z53xJPn8DtpmuEl+jMtzHL8tCubjzq*PlC7)`E6Q??wvlc2{&eDayjpR`J0S3eJZrt zMEdSNbZOH__&BD7m~+`k2-q*WMS89ity4c~=GL!N6+K3IHCOP0^S)jGZh3uTHX#`& z$o&#R+Vcz%_bnL`IUU0AQ_6v@$@peVYF<-;t;b!jCqe}hL^CFXVO{9nZ+J4YzXz4u)L zj(?l|@#RI7z%A|`WJsf%Zi(%GtC_XZ;IZ?e`=2&@+uJ^vusUh$9sO>_zb-2t>zBSh ztJk2}PIzFZdF-rX+oQxL*=Tu9C*6l<7~SP;#j~MfT!MM*YK%=KP%s;03&UoQ^K6u4 zcv{RXn-S}sAAtS=7{lUQ`XnBSQgcK_VEhaWL>@jpciwFLqz&cCz2kcc7xN>pIQlkb zCTA}Ew*%HmvHlMHqUZcS+E2|}ynY}yQBrb7m>)&=mUh`>RG5zj99q&wdcI55be8P=XamLl2r-l@jvB{CbIBqSV_z@BE@Shs}bB4-k6+&vYl(hL5q8ac6(P zz#rb(QFyxLb}A46i&aYf?>H5pm$PEx;{|u)%eiy@`bB8Ib#}q@tny6PEyy+mJClyk z=85r~Rv=}?Du9o{)0lj?aI_Q0XsPXPG<6>E;l?#^>#E??am!aZtJ%T>Xua&A+uhUYCgJA0KjxXAN>9a#GBslW3i>seqnxQtL|LB>!@>9L~ zNT43p5$=G+VlGMFCSEk^wu`3tNU~$@j`-CoF_!dV+vZp#wHpbh zR9STqwYB9t>(-HZ&Ih-V$*ulc4!LQLX(Lp(ai+-C^+bI0yjk<7wMR|_bOMsHj@}CJ z)|CO*e@#jd%yOJ8M_q%fyaTm$b~`Q@RG&ELg%j;2_fQ z2rCD~YEu(5jjLX|ysVHCHqDmG4~ zm0|ukOw8F$(|-0_-E!9H?5vCTo$BKtadVSRZL6jwBWv`wv!DN%rZsdXagFunDe>{c zCX47RX1R$jI8>e)v&cGmF6TpTq}7J($GA;zj{i$w{b0x;hd$!U+SD@h#Oi4N?Pj9> zC?~u=4qwEl#zh!)Q@NP7!rdWmrp*cg45rJ1BRtb%5UMA?(&~mrEjQ;(>&L#QU<~5w zqzkJ9!cC$L2LI%opwE8KMpb?UHfQtSWlDaH#nYQ=b>y^b1+FNCThS{m)e zaTd3gRdMlfo8*5$l%2SW?GJX${5ky_QgcVC>59y5o1=8Ws&ksojJ0W9&i;NB*Il8% z5}Ro)AI`pe*4BM$-tN;M6uAa&Wx^9n&yHUAEZO|db;k?KIh*WCZEv8=a(ACAE478c z%U2t_`&H%1oaWvCOGm`smeY6Zdp5roV7LpZ&{sv1#QpL(kK{F|lIuu1wB?sQI3p4`kO6m^w&=YL_u(VL>?YPe=(G10o?B+g6-lD$x|Riz!tM%c;d z*2q0rcG$wnFzZ!9tLV%%0b{Ri{$024e0beG;cvB0zkG26N69dVZbYr3MW z+ow5KT3<`ek(?+mA+GModLho;CAErY&JOU<&Gkiq9ZPs-X7|tbmyK7O{dKxs>hN-g z#SSRz{Ig40uA)mB^};ZxJDTz(t!mwgAMcA^30k=mAlNgww31_3YYmFu_@UYQt4c;G zL!kLmzV0GGh7d7^e{m20rql=yZv8AA4T|}hbE%D-2w-i?CJ4HSlOYn>LMwZ<&0Z+B z7#C5NDgpdxBG%>t91B2HGQ?6XS_FgcBZlO`o3@AUAZuL&T+)SK^A=X7QJ?0N25Dm} zEI$~~jwo*WL6_zcsk1OvbfYP53#4M z)DEam58-r~&EOt@=uq1zsvnn`d)+55`5kU-sY`?(tJ3j3k{P;!1v%1;%Kmed>^`DH zHaj+OEvMB@W&qdjmXL^iD8P$FJX~c&%CigfxWDHTbEEqNV-bTGzoQelWZ^>74j`~b z0_5&C6;moRp$IBKw~r-dreJC7I{+*L07=2hHrJw+c(2J5dQHIJ41}v+tHPQ*01UX} zASJ4OPtv|oV;SRS;)`6ONoATvAEnXzf2I(4Ea&tB&D`jdE8Uf>0b0=ur;a>U%!Y^r zJX4`o2;EGXR~eDzE3+$0=~weC{LTerU~GU9u~$x$NrP^ZMY0_w4GPmw@9Xl(x5H65 zja2cll-*LLw{$GcOeX=bJGuJo2R{fiTMD}9Zwd1BBv0-o!jCyt^ zJCo=18{W7;)M>1Q-#alINl~?6ye}$QfTYQ~r^7uW8Zl=^A}yYvZ@r7$Pi35KKb<4$ z8qMf1I99ylj7xGA70@&5=w_79Pw`A^b6>%{DiZh>G+lZSNyk!2 zF$lExYYx^$VNWjT#brg>mAk*TH~LFiV!%UQnf9K?6}89N9J(X3;@FX?=fm{vFE$)8 zI=%Eiz7CuHlD;byh8GR4E+HH5zf$eF)}v%XCZVtEM>+&6>?DOsyiAK znKBig^RiQl84amBErg4>3u;$^fZ-hRZ z5`F5{cl~9Lg-gq~?kvd9>Q>3mwD4SY$wg?^Ibjs|Pt51Bn9pn6K3OT+Cr^{6l+iv) zVjpZwX)#%6NB0p^qZ;Y_h(voeejRMMN=rzYZ>R$`DIitSN9u0IX9Lg_V1#efYt`Zh zV8h;S(iBWsBQl(V4c4G@ejHh~FYI;p@q2GqU%B%x@a5_``6sTPY=5nMO>XNFu5!Tk zBD8ipg1`|_8vfGGTExEVaPl+Zlk&eucm7Kj-CYIXR-vRHTElb~1B(%T8Vk31Mr0U| zQ0-5yIG2hq_(*!LlCkeRZgKg5O7z>?Xs1Ez;SpP#cP@%(V6T1yP59}p!Lv^>^a(aS z(vv3Vy^mn5#3wbkdXY+vW=t!Fo5L_6I?fMxUHlK%n_v zXKMQg>Htcmfpn^x^i9QWLZ}Z`Z?$r~V(((-GhjJUIc?uF&DI7s{e$g`z5L)E_JNV6 z=jrPI5)s@e6&DBKH*1+R7WiY56ovU4 z0XDrB(Ia?oIGmyTS;qDTRc_MJYzp@ zTli54u&sn9T!UxXdGz&O0Tb(-pGD$lrUF)IX9(}%qHI-`&a=ggP;^yWz|NqdvG#N~ z9Z=&VlzMy4VEIaHE3%W1U{@hno3;2lAS&N3W&ogX2I)Vv_&uV#A9SnqtZ+>jriJr* z+UvE~r@&Q){uU3cya!sEv#yHB*lhCj(__5rr5PQib6jDH!H6YB49pw@>pXrzt+79h zkQsu5PEd6t;nGpuDiyo{!Ig*%Em9|3l?E_qaMFn8%C$Vsw-oK9x7ZmJ0(B{0ecjXj zNRe_snURy71Yn4Wjfw>*%;9yUK z8aM6Ad0e3Ky~sBDUyH+{Uj2lOT${#uMMJ}!FE*2)|6vTdSk#mEIzU|=wGzdq(?#_Q zBQ{NaLg3{~m0Jt)te=&(6dFWlAcnzsPu)z~xf$!SRbO;20*SnQ9f7KwAe>t>!iIQSwcMgeWm3t_0#l!fFqw4*MFF{uuZAx9p77=RH5ZHMRVx z{Lwnu@iI5eD12e{8Gx+Rb;E*G?c&d02rrSu6Q38yX$>x&!CktSv;F z8!>^Vy$#zLw7`bxvAiFsg;R^mSJAzK+;@^2M*x2E3qeFiY4#FctOFMPE(w|Fmb#JVH((TzNMC8}3AGAm1Lb}7};o=YHu>nn1o7Tp)~5uiGP29L}T3JQJ@o0{KF z1s1Gl`?3?(R%Dd#W@lEkc!?ISGg;{69t$f2?(7F9$5wc0ZETf4F-nY$9gU-!&2-H2 z*$LLVX2-(1*D~_ZBK$IK_K+X$(W*uBK%*Ae0Ad^bP|;7O(rXJ`*Sw!*~2i6+JG2#i&if| zNww%cKR9)th}x>s{{BCT&O9!r{r}@;0l0k@zJ6S{59qWzT?fW~wKWF}%$DHN!d4FE7=TmGFWxuG1 z;9%REWHpwpzgI>LVGV1(grSCd_z|UXtv&?Fz@gQE(RB@5rGr#k0jcAavGJh%OZ;12 zvAg}@i7Nj~jx$a=+Ivj-iu#X13~AA}0caC|Y>;rgRZE|I{OO4Un$46i$1cs23YuHV zi^Pf7&Sd|$^kl$S(ua?vZTNb(B}+6LEiXvYwjDE^xI~?JBn4+B9~zDzEVXxTTbuop zhq?mJvleu{zj8P(CDSA7+T*}g6Z>U}r9Hm9Yg=vprXfj0`%pKwTTHK2(e`NIS{;R_ zV=?SyVG{Faqvl182Y*GvVS5>*A$FpQN=)#BPkfY zgYfBXV#X9ynKg4=mZORy){#VF{ML`eSzjSi2lbPNvkjjUFE*Ojm~ELJy`T|MKe32x zq-@btM<+TK-eE}r_6rYM*2X;HYir%+TIa{LKA+u10V$6SPp-+YOAe@8wJBw7N8Pmp ztuKc;SsExzN4uag3mg@D6QgyEREHtyZ;*KstLg-kh6%HS0p;tujJ1i>2_5nhBiE|7 zu@_y;)-`48Ry~c4-SWz&WcBQAjGT;D87n_zG@Z2eDlGmrBGQipcD#$vxB(HY;3h)N zi^p`Mbb`=*L&q$2x3staZN^{smuWb9zgZ4XvX4C6kphs5b%y6gIUXqd;#H)^c*;v2 zYSq0@6{}ibD5h=ExwQRTuOcto*3DwB8FArToF26vv0q^|%X!CF+uXo~7F$aF% zi%Wi&dAY?TfBpKgybVY4+<60!D6f35R@92BV9}ud|};O@g|NeI;FdC6aZ| zj6cdW;#04q=4JP|jZsJr4tb&sh+nAW-D4gtvK;n*{bf68cEw4a{r!L|vxn~^p?w_Z zETg07Be;SOaD?EF!)JiQL-YE+7#H!a#=_`M&Nx>`8po*#I?@U**~pCCP(#jAh0D-& z34JMVw;-N+V~sJPRz{5O%iFDT8{c6+v+>5Fyg z&!F~-Knkv5U&lyE@64c5ZfO@B+GQLhhI7P77-*PkWx{=C+*^h44t1?JWcFZ8G=)Pz z&2#xiH!u>AV&chw^%#`PVCXdoR&<-I(=a^}zF;MGD8o zEwf^OO^0ha)D5b!`lb@cwqKaJSIv@qgRl-?^1v-~8jHOU#vU)zY9}Hh9LSXP`IE zX|uP=)det6DxSq`#V2pBq$8i8)*m2OMPg%+Ao!10c1s@_!aNOCe;Hq>B1J()wkZS} zAKjL>>+x9F@!00bg=^!RDSLO$wf!>2KTADmn{!xQ@IaP!b5{0L{FTxio4JKQV^549 zJ9Fgre!ikp+|@+d#qnEPtj^r$a_6w`s)=;_kQ>zI;Jw8OSH86xI2T^>NkvrAZ^_s@^6zvI0{7N-AGJVb-5g07HWuA}}kSEzBf50LVsR+M2 z%>eZfsd=cT#ARh&LL1LTWNT=8on~a>#4n?A5J5fwLaPTa*)3=reHS4!ay_vh`<#JT zthhFC^n9U6>$avPb(niD&0(*N;rpfOv7ehlcdh^CWi;pGNSZ40XXk69`I8+^0;cDn zNeX?=U)9D2hDj6qN<)6iA1s_T-Lxy#ds0b77u4A^Tu)`6E44g5a(`g@LEMk*)Xemz zEo*D3sdZgfU^L*q+(d-4Z&YBCO8ayj$*JI*b3bbb?;b{e?6ulrmGWY1;@XOPE*7h+ zD(auCu?xVbz}2jVFpIUGD($j^yCP7l)ohU%W)%*KFkmx{U@Xn2VKTHzjKNOD@h_<) zb5xHX;&f8@glJi5na&;*Q`x4y$W3GS2BicoRdgB5>=2!&(psZrLMQ4Sh?}hCUrS6HKp8jSe8~oA}#`HOa#gV2ZWW<$D%T-Pn*--pUVgpKUS2O*` z&xRW=kayp5`MBkN1g{%8Uq*EZ$}Hcr#{T84y@yqeW*+O;@g;yG38Jyh5KKZrg_1OT zslceedYKXSE-j(f-R1D!&Dr(oM=}R9*S0oZvRId0o4sb;-v3-^e#uNNZnrgPN(&jV z(-E4xPe!iDy&@gez^*8+A=905B|!bWvo|@OVy(z{{p*-=zgZbql8lQQy25AVj{yiL=(GzqFfWQhB5N)TSZ2XWvNb4GIzZ>AyCo_zM9BxZ+%H=`;pFwo*Z( zfIZtO4>l$uy}isBLCPxFB1wb6HLD_*uU)a>$=O<)rC|qG4KIK6wniCQxZz-H-z9HS z#T%`_mW$+eITis*j119`610RDQ|{in+vp9ej*=V98vm*%nMjEymlJK3%=Wd!ghmQC zr{R(=B64M(f!X7FvTzIvf6#x!ZYr0`elg0nN|Diaie?%8z;j%qmJ40@RZycK@jsgN zdCDwfl{GuWS)sX@pqkDO$_c63m88Iit2rYmqZkOL?N-ubF1*~R&IsiN?y81Fx5weQI=^)Q-&%pVwaOP1GX40!o5uc z?xddnhx=6gURml?kzMzX3wG(a@5Qn^X_h*v3#o}t4A`732{)o3h2o~dOg4>@;(AjU zL%F5yZZVdo6Qdjp9AEiDPsg>NEXgU=O00ZkYiUU3R-Z78%9Wz6hF)va zUm5Pp?{&2PtO)f@HQY6#qx`Mi$$2r#LYYcCG=_EUvc6=;awYR0dy*Zk&(-37O3>P- z={Ld3kmpThoOY_37rS5DZa7#SM6jN515ir4a*QjmI@kw=zc zxsR}k-q!`&GW{tYS`~7%A2#+Dfi5E;e0S}>wQfEZ^b3?#j|(eSF$*5NxlCE`?CSel z85^&C=r8p$?dRLK*((eX4LjYDX@ZU(j^#BL|9^S9MO5zUgt85YyT8q&BtRfFs?bQR z{^FI#eDkK80Q@Oi#c{|uUB@|6WI?%02xr`yjc``B*fpjs!neurVeeZ>}bIY#b4|^#gP-MI%y>8*F<+OCAMNi;wpAIcjM_>PUR+sjjMTLLcGd zsJ~}F_?WQn+_VQYvDnYIVqvDW*yag-7eq5amIsD|G#y8oFK1b5&0~3dAU1!mf2$H_ ziZv9%ALJe{lE@|`^y0P!gqDN!^sh#PGz9$e_7~MdrGiTG<&4_EOPLKJmJgL-zEev& zQ`fll^bP4xuB@?7TO|mwd=wBKkuJpQ{DqOj^iI>z>6etWAOUEU zZQWzi&!hY^_8Z;I$*Q)y7pJ8&VwK+Mg`9$k!4L-Nzu$^B`oN&sHh9J*1O_21a#O`2&iL}(xCVz z!+2A<&p9s`3us0N4D;;M{B^|mx$j~BmU3c*o|!+VZ+Iwg&d8Mg-u@H4b_~e-{3n!T zZSwQbeEM|u@i#pU3BvPLaLo14P5X*J5P&kZjM5@ztlaHzM~=xK&ihRL_p)pF@vkKF z`HeANzobkBATUBfhEVX*c-{8^ha6zs?6r#0x7N6owH*}v*7-(xVNvw&g>kp>Glde8 zT};fzjUyfV@3R5OsheNM@f8E~3=GKH?V^XY)2&~O^R!{s$fWE>ye_`5 z4IhaU*_cl9N2PAJ-CDg;v#Pl^k5oVW1`TsAFMVaX_3@==rz+DnDgU*he7QMyGHhZK zfj-}%mvR)S332T~exup+qPCtpVfBZn)z!ZJo;=li6i4aB{V+4TkU>63Hfo*L$9o0P zghdXV>>nf#m#JovUOFTZ1*4x(vhx^5_g90)@|*I2wqcDA4g7xM!G=fGsyBmcdUL;osgbT#r4l%k6CO({YA`YP*C3)HQ>5r%n=S@`KL5mGCI|K?;Qj4{S z!BWN%WSv4wYn9Uiyys7Jub#U4X8Aghk%rMeH^(~QaHNLpAO=%L&8c3=fqL>>ZL*TI zv9vVfq1Sewo8gZ_tY+^__6?02zsP<5eT6yEo|VHS&8DJ$U>kusiv>XyIB}6C2}q?j zvWBs!{SYYC(vF)@JplT8C9odfQNg1X@MsmvEkjy-O%Ho^Dcfp<-O?c$6SLo`%8ffD zZ9?>mo>R1s^MjW^??+ncJ8BV1EDX{GDFvlz1vS)ypi1LDg|SI@$OEadkxCZ+Sti9` zR70**MfQH9(Lj2vo_Zt~X+~*jxiqHSlD+1C1o2qUA)B{Ir0lQ1{BF@hnSi;b>`fl-tAMhAFC0MMbnQCbdwB{3A zcN~As-0sDCkF&#WvBT=CTkEQ4`){Xa>hYfjZVLno)_?Dpv&qq))^7^F~8z zzUJ@U8Pe=m@Y5`MXaZE;1Q(1#^Kfbj56ZD1=jkc&JSdV!S%i_}RS>Yo{|JvdDC%K{qlO0cc$D^3`z+Tut?z!eGF{s=hYv>6>IRwBqRd z?IxMYIrrl>1`8j0RkAR^Qw5jrkMQKdewd-d@!~NmP(o2Smbuqg0j)|FuYQwfLD>^z zOP-kV+gqlGp0uilny1`S??#U1`4>}b#(6oq`s%Qs)zS2tk81YWfSQX5H6^p8^Ihn* zN@;Q#{Ar1ap0aG;etYHPBM1EorfGLOO6>#eYq{PIMkt)Mm=>rivR4L%=8_g)07G@4 zWv(C%_Lb@DO9~7mDagd`S>qRXnzdr6~qc;@x6By&hOi zU2Cd#`4|VvRFNJU(vM@TRwR3g*q>h?X=`@$os#`g$sQLU)72j%H60ssuZerlsVd1? zLO=3TMBgi>gYU@6bkYwC_YZZ2MZu70PHktrSn)yZ55)0=qm7@3z@D$o*-1u5@;@uEK{{|4pLuN@;@jK ze?)3x3eLVie$4FT;d2?BxQJb!0^6hq48CO2wY;q9B#GpaT@o0fS!A-mN7*!ftt{g}kHo?>yri=e(`vJ-98 zDHf;--kiG$BAgNon{n4OpIm|InHh8($kWo?uzg2yXgUwxDrVtRIP_x;JSUL#NJRvl z!H>q&A%HE#$U}O{zA@%wHTwgGZn|=|@W5G%gNOgtb3R%m;s-dF+@#%wIhQf|&hoY` zC}pGi+zQ>f=^x<$jI`%h&cyracIS4}25N_B1>P!H7XT%x$c;Q^uZY>pV>Y6U3{j}J zMR|N4$gkKVzwV(kMGQU2`qF4?Ir;#VsrPm_CszoXBg*}gMAnOLtYOdECrH0-$vi}r2qH;7mZnp5Ck=Qje-wFt4t>giP0U+90>=uX*>8XGHL34|+ zp`oM-tqn7t8YBWKB>E2G|4W3{YhZdVL{~!5qNQ{mO{#*JOZ*>*jK+1n=C^xC8xCKb zcGb3n-FfwDQO_*Ae99=k0M$1b{iEYt#u$6WG=*iZyA@?4LRojQuWnK&EPz=4iiRxS-% zwhIhYQ72C}If}u9nXr$#od``vBMUa&cQN~V$+m~PS2Wz?rj0kCs;LPi>&;tPyNW#6IX* z6uCl4UHOhmlmm+Qv$q^zpI?6LZ|thM)zIaS$HtlG;yOn@XQTh1tQM61tFsR@&mH)9 z7nr)!iD^5hf9wp%)iP|J&K4hOd9GcPeI!arEzKpZ5}8_VH#nf8TvXA!FOou))MF}x zH3-$snbj&{Bw^sfrha<_GS_cTSO7;?(e@&=**aMPK#kJn73fd;ZnXE9YO)3tp=b<*hdfR5A`fx1}SOOl}j1bO5f$nY&?UAd7?Y` zLNWXqP@L~h-wy37%s!gS9>U1xYHGFqPH_dhc>Czq505QAJ>DF7%;NUYxaTgrIep`q zoXg^KzuI>dDOYV1Q>WWfwg8md1~=}k)gJoq$&ov4s=sRHsbEG0JR9RrWn3VIqYU9~ z`XM#tpxEH|T!Twu(h?>0fyy9_7k+#@tyxPp69J7`V4rd8KmTs-l+jA{zV-iNMJYwI z)D{N_J=YcG^B(1yl;4CE!$wiVk5;YN5kDNgCO*8RGC)Ippr%gO&LMHRURuSmJnDRv zZ-Ex_2s-1K=uS~}uUQxGm;_OCf#Q51J=rEYH$m7I$`gUqZ+D(3KRm(9UuTZ)`0Im( z?TE#{oS3MOmuz3g(VtZ1wiT*gy7yD3A3nDQc(p=I6%UeA2FZt>pWBu7>h`B|{1wzK zstTeO*?_-3q^8Hpug8m*pY^77Ctq<8kx*5CZM*x7?Z4L&C02huIaW&w1g@mKfMd|2 zh3dEaQ{6*Ng(r4v0vRuahlX`qfAhT<6uSK=bhzb@F7gr;?gReW_0-yrm-AaI(3puoRXhCp&GFXVOmGS(83zk;u?>x}{du=_d$!fA_7VD{))wh1q z=g6ernew)d>Obj+KdL_Nw4@2n?AoHayNyTP2E2;oXcKtUf$QT#FU~dmH%{;;w%d~} z@fnMT8ES;7|D7JL0u#QK?c_mr`s;h>ak2Z|D#k)lx z&wLQsg)4zj6Jc0HjnO9iDZY$kga&EKYq}(`Gvefy=Qr(!7vg#?FSrJ>>q(nI}s+e?ZG=%Ewqo+a()vj;*1?ZuDMnMEtZx2bI}cupw!~@z;h6pi%hz;Dj!OGiPFAkj)!pzVe;1XyYlr8S?zE{sHTEJ-GDfin0wc zY~jAz7ajE3sam8RNtL@Z+(WIhBI{H4evfn~DPzODLJ{G}A89fMRwr?f0M1IvFo$#Y zdtxGw`|jm`Q8h{?UZ?dhKH>k=O+p*<(L0f?@^O$FkP1L`P91I6lb3oUT?dxLDoZ#- z2Nv+k;ew}QfE2e9(rWyK^aAN;nnwc{{RcdQDhy8=iL4E#Re01v4`b(*Ii3nY%j5^9 zez>`p{P|tp?j@8^*$EbFs$)ux>$@}?X_ik9M+cwmU2wU*P^2PT2*Tz3? z^b@OvFK8&jkIL&d5l$-7+~OhwNy?xPDv5o@nhl#z5Yoq5K4>w?ao%X|dE9Z&H#lN` zgOjG5Y$rnSlDIN&#=!t1t-eCItzT6gmhJQmFt%`$=uIEmEb#1)bFwD8SKpq^G4)n7 z8^yL7UNI7)Ue~IG4vi$E$b>cjZH`=hZ2OPmwMXWr$oI#ru2(poPB2>F`O#=<@Xn3N zi8P*u*0ZNJt?*R3=Zd&HyFNTl_ZfQdEhB4gcEjh;gZjevC4tFppD(}TQ!?cYrl3tz z#*dV&y)Vq}M`ae)>jatM3oJ;c+mM}nQFCk|F{T@CG6MVGL9>XxvIeOuSKF<{TG>T`NhJ5(GA`Hp3tPo1*QCe9TO&T_&4s+BvfD||Jv}qd* zA+`$GQuaoWA_0uwE^a7RryT$`R|h3e1lee7d_j002jtjR&E`5}Z-r(PHr zWlxx`#5Q%jYQ8WfYct{4u~hvYgwQ2R3gu`HRuZONPciAo7tMHy(|A1a!ocElPagr4 z>XvmEdk>yVcxiMwC3If*InxD*lc(9B3Yt|1{=Q8=J()>hERS~?AJ$V$dq-Jx41Xma zZX#K0LOs1xm&~}i(9%%7CNfSk8n%Du+(|I0u&a@yaK=a8C^Bf>ui7fC7v z3)5QW&LR69!)N{&PT4fQx?1@|Bw?yh_E)JKZPgu~3d4fQQgJm+MDL<0{;Dy**ALHg z;6shQ7$aC42{}oO68fB&9j4)1`izcuzJ`9Q(ver|<>81k0}*xEEjkKyXS%@v0vN~Y zD^ED!Ny{a8{P>5{tg^3QvIMZ+P1>Q>g-dwW8=YV0a z!`SRPOBw`@!T;VZUL>u^DN4JcN$-4psLXGD7Hgf`3Gey8V*GwPlwr3KlUecj zv4gk5{2RP3Mvpfd9&;n3zADM0@53A8cMCa9dc|BKY3U~FGCrMl!ZnEzduaH^mRR~J zhJeUl)=6LyE;UP98uG0GhnJb4DN~_3(uLvjvV6aqWyLrV_GF9}C7QC50gBTt%;qo- zksKT@dt}ICkjC0I-uC9h{iXgdRJCV>00;!&B)@7L=v?X<=j9Mi3vZ4V@)?JFa)?we zD=6wK%fA@5exWZ7c-F|?s_wCInDb;{_9M=YfG>7tXVYC~UN+o=oXVKC0MN2toTU5( z5*bDaL4YM^rj|Vf6E*``IDBH0&p{d&D?9!LU^4kqE+V(d$h{K&`l!hxMXC$km69!% zS^tkKi>-pFkwTf#7JEkW7hThy-6I0eU=4k^NqYCV7d_mQ z{+?y^3D`OBStDn=0@j-v?hJKH^xk)T_kOiCrRFF0$_oVz3A3Dq;K;8>HzmwmykuA0 zvzO2JcW|6e!-yQG2{`iG0 z(a64YOTu*fW538cr8tb^Y9Vf-q^+5dFB*Wk3VG~UDOZEvbG&ATUNg@P;pR(hc-M#) z)x(W*3-(>J7t7{8>#>%>FAT0@9$uz)0 zFQerHS5I8w=pl0@FjFBpu@(o~h{i9#5YpzTNS`tkE1%)Ck6?gKCJ2CoC**M^a?&{y z1arxfn6GK8K6s9@piyontTek9e6+*sq!867P45h?HA)l?S;Nx;HE&ek_>W07sLy z2)87szr+fW1=HlG`10rHByNKxZN>64NxU*r2ysTG;s;VI z%!r97dnB8{XY`k>b|iqU2ILcfrMASAObVgxl?GU4e=yx}b{fnB7N-F>yOxqQkGEYr zSr~fNViPb!D{<1woZJ8^3P1#F5|uciGJt87aEuuW8Wdu;N17WrK}_FQ5jjDGNkB>&)DT|x70J=3cWR`f{zYnhiw7S*|l^4b^My|kx~ zES|b}YVpc4+%iKd%!7hQ$X|YvbH++mj+Ly_Q8v@4#P?-&PC_op)eUkL%0o2d5QSWz z#=YD~^OPn+K$12r3F$A1Meh|B5D|FX5FrVKr00|8KaJ5k^Q^B{Ej8WLaK_Obo;Ydy zRKpstF~|h~Av7yej`iaZ0s#H+^BU4zn#6!6pREQtHe@~`ap96(#2TK6xJ?r2T!58V zZADmmG%{ihc^K{{jR2tT*#vQB#4sSK8m~{(yY~Z0!{j*{+_*0O?<9$lP-3c#C+?I= z>$!_6dX83v9IsgxI6Q2d|1{QFuq&oxTA}f~eb>24$yD_eo>)RZcV{uZ~gSH>+ ziTo%?OIeu*ej<)Yg`^x^>DB+pW-tC&FQTIT;JgtS5kVM@EvPj0xOn406?`WD$5uQ&RRwNitC4C4^jarD>hO)(F7PV}I;NQ%k?E_)cU)jv zmk}{80l;}UDP-)XAY<0;ZOWNC*qfVd5`SX8 z8~M`xG9VYX#O2vyU;BCrA%t|AbXhM2xC$65hr?e!iBVE8^%-N)8)I7Z)V27k;o~)A;&#TJ~j0pJekefb8w!8ekNe5P3bDjyS$jl(Ab%S|pU zHI0A1JPsfEk(y~uKvJ?1^mobxQu|0!`WIxQ0X7_BfNj9!PcTw1JH8e~+9X^q)Ze9< z<5q%-X%};`NUh-|qm@hr%mm8JDsd7I@aHjLr?=^#0Z+`i%^+_~FbFXuOKUzrIglpo(k2UFMQLb^|6t#S1lHz@ss9^Zw3Pol}_}7{K?b^V|bu zzGDf7!uLr7jP9GwoBs8o$5uTk1Z`3AvbP_Su2&^t(>-1pd1nUHbOn%HSFb536Pf2&CCf8Lb6hAnZadd$$Tm|tNMuiAiou4oY()9Fwf=Lw z6kz46nzx}DWltC?>GwoCXF}gNrea@zQSAKX>c9_MqcpqNFIU4h&lEy4Xt24+VCH}8 zsuoEN+weJvoIzXAs*su~+xATkD3tD-UFL$j*3} z)9Pjq$21(iGBodQiBQ#J-tp(ZHqliLAIy=bv*JUh_G~)cy6nWl=;P$v_LAoKYu^5q zDZBr;+w$!UP4V!AwCt*ddED>QAZ{g9DM2<z=vcb1LUYIbe59@&87S#m}0Y2zfXBhk3!j zZ`X4ZzJE)m+Fdi?9d5;|4aSR!n^FnGsB4Vd{V(_25X zDKrX$P4jHSR8kp}jiN%4{gh0x^~1=hi0OgTjoq=QDa@*{!7oJy2XCz*ov*pb|Xrz%&u$P(mrROG_J97n2 zyCs3cndYA9MH3=vXc^5gQm8;jeKNF$7ECd3A01tHOi;G$+5x|hB6YziBZMM4?1J(J z>%f=6B?eqx67UJl|I7fro!XF22~0I=3fa1X-^*l}U7EwyYb0y53s^HeGv+k~lNWG; zu=xNc2dC~eugw2DmBd0EJ<})P<3q-=;}>q)4MoA&Lb@Oqi$R>U7^{+IL`Cyn=Z(5P zdM1j8AWV+|3!ansbPhpoaa!z*guBnh?CDD$Vm@iUev7(m98y^KD28bjrtq?v!*9%( zbJ|qBfZgGO`WP2e|t8x`p_f!V_!aDYxi@HSXGr3!bxUmY9a zUafu|<^CYy)gV1|E^#*T#u=BQ_tCXF-zSSgOm=E#d~c6)Mzu0@C;*OdJlY1_T>Q4< zSM z22r8=^Z+pk9{LHC`6@>=(7l!s!t(im<_*K%+w-$dW`!`IlR029rP^Y&x)k z;n$&)-f_De2;MR&U{+D@l+YadSTfzXoR;scaN&VQOAhiWd-h4@Ug+mYxlbHXFW3rm zVC+?dkS%$asRGnt0UaaF&>yEb0kBsFpTtxv&vj)jK?*dSmQkv$UdO(Vbpcse`J5yG zGHnw$0yN^`k$pz|y15Bb&JNXm^QvQ=Zy#cms?P*{ zs>R~2{9RgO;s6o4k-6=Dxv)*&i`I%r#0!PCI3VQL2RSQQ2^HhH-6m1|dHjkLlcT&) zCrV{|W|0Gf;z@RRi7PUh5KJNhOCsIQa7;|$^8J{1(lAJ+ z+MV)dl)l$3)cunlF!#W-;mo@kBUl+XKtX-1vnP@6(QHWod#FQp8WP<(@0?fiSAoMx z?UTc#bk?D%nrUMJk70?&JPqH47jttPLMIZBwqKPyL*EeMwRDjX`rq8rhqX?$mRF{(b?+=fU#yH7#Y=GEYZ9>1+8W_h&J_ zOqVc-{=cEO4)kcT-`+aXH^V6<(nWO6M9CHWw<7b{+@hB?mHj>Lt>93Q+Kp|ZW>ZLB z4ATrl*i9@A^2vpT#4PrTWfQ49bz$~fzGWaza$bf@t)pIXXgpxH-8p<9^kX7cbXzFG zagzp=9Yg?315BP2+ls;TX@5cKZ!Qi5e~z#D?@_GTFMEoc^8uuzYw?KW-tz&O?e(dn z;p=^7b1HJGck8{weXBpuK0W-gWvjSn;}hl0I!g!SW^=5K$*U5@^ymda-|w%QhhD45 zE0yhOS?uJTOF;I5&0F=o+-eLq!~xQ*d=FDf3S-5X)UsI<=Dxv<`h^7<{`gm5&-hH= zN4SJq`+Izcz7v|hR0PT$GW}D>ykrJ3^75g3@jPNC)EL5&VutX+i$Jf$vMY;3F+#{| z^;ii#@DFIf;$%A?pl~|@POM0>hup<+&loZ|thJS~ezao#)x`gf0t@%}-dztg>4$!L>7 zEYd4y1eJuk=6ZMuypRp8nP- z%fP)V>s6;3MC$DyH!S$+O{{#&lE!}g_D%G)>q)!5#Id16Z2kOd@QK|KQB4SsB|v{@ z-}Sr(uS%_*>{%9bKSxGyJN#s1&A{;3-ejEa_b+J*B``{=VlgGYf4zJD*$Tl}s(DE+ z#M%;E@{4uN5LX6IEjtBvzHBe<_vmvY#n96K6f^R~@vJ6wZ)(iCm%{lKOR&&x8fnnIne-78jOQK7vr;*aC_Z1-7|>Lr&^Zk zq+|gO8j8bV3;n!Co(eSKMZ%`~V#-u(J=MM1fg@GOO;8mS9-|_};?XgygFQB}@dJ<3 z`MZYs?vol;0?eqh0eBjc8~*G~!k&vaBPaM2EZCp6B1S0j6Uls42YfmKQpfU|h4qg; z5A%HmSVir8!}vE{wKql{(c?9XRUz+wE*X=&nEOn}dMRUnLOCCFtVi(svxy%9tt~G0 z^etXmZC$!>mAMH~Yz!#<+6ySfQLA$CziTwqjNN9?%KSP?y#QwnK9v>NF&Y4~)i=m( zxT&gUC<2-xlG!UHWE{9F26oi_nd@ZNG?wzJv&(gy<1|SyS|MZoV52o7H(lG7gh%;e zB$6gWE({~#Q@|w976ej!HB1^#W+P8yql5HbUD7X_cL7I6mg)4g(& zQLD_*7a4WB6V<6T-{XQ;u||WiG9WGFXN4@ocFtr-eNAF_;pf(?`{_3jESh`i6{z>@8}CDSW=UbWn>lBJYw2R2q=@VflTFN-{l6A zm z%lcV0LXa1DdrbzgK5NT5yewLBD{#YTT}Bk?0zTu~W0Sg-b!tss24Gqwy(NZc05@zE z;6JMX*2Ik;60FIS5o}hToO}=euS-Q2F7ZKBs;b@Y{iS2f`^ zek+2vFL+b5;AZ^0*m+W)1o)lvD{rXL=M%bQne+Uyp8P%U7Efh4&kJxw9)&pMJEPw! zdnPNl9CiMRH#7S&1M6iDdD!ZHXX++OenZ+o$<0*(5_I?$8ZV(_V|m#(DX*8VtqY!6 z*KMH(&Br9PbPO|N?07wzJE+;dkfnH`F~$=ddqLwt%`fMbVqhHm|ch6WV1Z;M@4}jfeRcO`spDn=jo-kgwE7gE61+Z4+QmZ~niZmsvN2 zSZf6*5z_PLtsBlo3SKuU+Hd}bRBsG3EK7Jt4-j5S=$X?Gzq_Z4f2(4rt}6HfANg^- zAk}%>OPSM!cT1Ol32`a~E=^F9{+?XqM z(wjPCkeyOZDD-k)%y3Fy3IBRMyiJq}6B-H{24i;|d3<{v|7Iv?tm8Wn zq&#J?G5Uz=gr0o1dY`?BjEBE zYnuH%9Gve{pS??ecI@+yYKO(8t8uLL0OV05byG>dmUp)vhCDQ3-#&K}%u%-P?vJvS zr7lM(Q*|pTZs5v5iP0GTtCVk#N>j^0JB5V(dguBvDou{9x&TbA&=5308X(~gYeIuD z8}4)8*AU7Zpyqe^cJ|C^=`j}|`D6P!s!&Lcn|Ozc`vfuKI77odAo>p6bnrFhXvkzb zR<`hJ&COR9Z(^eiLl(ayw*dD7dIF#E<}~z-)z)uaHr{t)#z)HM8(-YV=h0KNId&M+ zY62g4v4*L5_4lRm&z4mNL8WY+#`L46wgWH^k2Y}Y;-%j*k+udDE)eUqUL`|gbzIrzaMBI1wyK%I$XMSf$048M@m66rhltB{SQj8fC zVXQI!VR6r_HrQVd0N=vVVbD)3LlF!{Ft#vwR(sFQrs4{0w;J?)GD=$%^-O@J zJV?8^9N+>*f%4bCrq$2rJvr%A*j^qJwxDa7ZAeW&nUD&Ix_zIieRt*b{^0qRyCMd8 z$jQ_5UZ3g?f_glqcHg>e2D_|Z=P-AZlND+E(KDYn0hW*k*>?in67chZ>2lsvfu_dBi zcSg0Onbgf1%k#elf=Xj}8fYjMD*lx1RdPIV#`Fk7s65IL=zE#`zY0?7h&7HPz z;92Vmtoj0w6jt^oa4}5idb%#K{2dlz?_pb+!7qOPL(j-5a)K?u(D_Ww-qd zi41$z0NVG5oKdZA%71c+_tAE!#suLNnPa0CVWSrLrl#}l)@#B`5eY|nzuWx!ap7zG z5lpgSpGVMv5q1Bx+siY`8_gH6>N6cp2=ZaL< zrJhTkbkG{P-TUM7p{b&siIDwF@Xk6A6-ZOf5Z#0`w8ZY0wMcz#`fB6(Z0 zYvP*e5zqbH!3Yyp8f7K>c2z?9f2N0*-yC*>D?A3KL(Frscq;3>(V;gLy#u4>`Kn|4 zvr9qp%c6VkplfeZt~p8HkSn(a$eHs<8y$H8SE(!>nh83K8c{`#knBM}^>{M*JgNx; zZZ17@1B6trhT~mzix4Ol@E_*Z2|R_^ ztt*H$wZx1=BP2C~o?8->nmsHNm~~J0-+M~~#AU7w%Niu*K!l0lVSr!Vx|Dh9j2t<% zR3sP1JtY>&m@}kr*vg_qSR#i20OBu7WjaN6(WSK7|K#b?1p?#`6zeH_`ll zO<)srW7;5c-zq>~DsF%AGp_W*`#qJq?sc>jT88LRzm(im{_1HI*WKh?_1j31T-cCV zRJy6ZT|?t&!{v0WZKrkq5I*ls0H*KbCp*(VkCwZsg&CRt-NQJ-g~N3)0aFbZ*r$*R zQf2Ndg`}z8+V#Enjqu=s2XbeS+p~0BDnpevjORSe*~_@R$ha%NdMj_IoEu1v+s3oOtfFf+gFm-c zUiDV)d2#Oi6^FbEgKCY@T+&?q-d1-d^|({^&(%Vmu*&A~z{;u=d|-1~2IgY2zgW|` zkG|q>4GtNR~|r-r6# zE0(uLjll-dhbB1kmLZM}1LHDr{uYgcFI=C;+FYrbp27k?qN*Aue!^jhm4h{sxqUFbBT zp7{z`iF(qpwo3{MHQG;Y>Zybg%$^bHMfd^0XiTu^;dAd^z;eEi>Eay#ltF3OvJ)IworIKlogb{v z+Sd6c09#Maj@bhg(xu zb4D*0+H%Kg**|kS5OtY)4sB%4=QJW)+B42{`A4s|t9$D8gwo9^1!m8q!>yBjYqrKy zLltYTUCu2{H(d0Uj#Xvv{%Z`BZ8W@PAfPWO-|IF%^R`MMC~-+E?C7b_eYdRbMQr%W zm&wLnZ*^W+ky6v-s5I#*5`tgF-#_eixgW9?Lf>Ss@~!`l0vAV1GoRa)JMU|>`64!G z_6NKjkN7OWNoa_Gwk3vSASrZE8kxpn1Emz|p){SQ;s{D~eDOqew96Uwk+LXTZUDC4 z6y1i4d?M!53c&B|PE9i2XX^xOsPjX(PGt-RSo-@aU- zjqd0%p5gZ<<|EC<41Dl52E)ns3Z7}6seIACq_y=TXOF4^JrUsX_nm)#yvULIS;PDV z^erP2Yp_`T#$NuFVV_RJ`aS7j z>O3NZP`DDX>?8wcEaqan*>LIH1A9&_@Uj9EGlkS`%}%v!w5Et>AYCUf#dD!vt{^eG zn5h${2C;$#n6|UByqnZp&5s4{ZczSHr&2l(s}pNy;AUU^_WbX6cJ}%te7{vDF0svy z`Jb`3joUtHRH;QVzpJ5qUCcKyn>C_W*?rg$BCOXu+)J2HL;uE_L-HpSo-V@7Qb0~I z-&PB53mXlI^$=Mx9sS_nbW#;9(_A`N`DR0;ZTJZ7U$JKuB!?5KSa)==Stu`&y9Qmr z!`lN;sY0I7v;*YjNQIvlWGE=J0Pn*>pt_S$gAGR6Wi#-+O$geUp2OVWq0*}O@DDy& z-eWwt`l2HIJS*2pNR`TcSim-%ZqTv_J!uxq1v!$+TCJq8C6)*rXl~bGVhGYEI$Pjr z{cpFj z0Cp$U%oVo9WfKK6f%;z_<|X^_NWKzqu@20XuVa5!{j>7V&Rs<7ys>jP9GKsdUEd0s z3Ta1W^8x~OD(*xfvTIPMZ!GxNaz0;wF>KVh^WnN94-+T_En#+W-@>|kt?jkgk(PE0 z_c-*k_s=A#s*n+U$G_>6Fd_WBaO{NrI2UfhE`$F=XV? zBTvVPlnC8ILh&RWFceuahcr~?g>cOnR^z8rZ0^2JeP=_^Vn| zwYl7;kcRlec|Oe;WT^z?sK)eMdIsFJ4l)SPdY91M7=(%bH|fcPd$4-!%xZZTcU$X8 zK-y%JZb7Z>--G5q&H9!X`K1??0|VcNVAX}X;k9`_yAOCr^o z``hB>0gw)akZi9xw7RO0KI$W(n(Z%)i#>lHt^8K_+2(OW#VJ9B$~QrloNR|fTyHRr zQIQ_I+v|wy@{Zo|yih0JOu>oRmrBpKCI(+zJ${@6Q5q!MA-g)mx1DsSy{6lB6_X6IrXosd~G%O7U*Nk_Y|Ak7+`fmDZ&>6QV=j?x-l8#SrKBd1cYUa zA?>5WI#bXa4=boTzCIz)@1(4yCrhe0ed@D9J9V@CS!{$x?T5I_M%_GUQ}ow7)o;x8 zqFd**_NQtu+qbkuPX}3N0%7w%{?%K#clyxz zv+`YZ99Gu)0DW~x_Oz^z763L}Q&bpi98)lV@rU#F)-b_c$WRpuZ(x^&Y8Gt_*yO=< zmrXS_$T|RmdF5J-4WsqQXO+ztxqoTXr9t3~Q~ zb&{VyIdQeYAU8Srm{OI&bK1Oz z6Fc)|F-_OtyfIcy}tB-clq^+ z%T`xza;1m*5oPoyWp*X08`(pu|Mcr)Pk^1N3qQHS;Mt|w&F5E@?|>9u6hQWMDVuYi z;dpQz0B+$q)141h(N!Eam`!o?@&#JeqY245P3QlF4ba!v{F$sJH>Rx?IZ zHr`Yk<0;?a>9u<+k1!N^gGq^^&mOE9b)k~`=5&p|r0gZs1Yc?H32R`J7IRVdT+|(* z_ZK#N;3THnWoSLN6g>h)O024YqsdIXl_7M}i?G4`Nmp7)$m)f0CV*hj=BxH)2tIUY4#X@ z_d;`{(V4!t88u$gZTdUFS+p4am4|9dFPSyDv@_ z8f@xb#(s|V(rb>_8|5ia2J6ksDeUn;=+F(EX$A#GFPL3lJZN63;b$*FU=2cP|A4?= zgaYbuLJ>ZIeGnZsG&uo6BOw6*)SnCA>5FyYQJsQK{}RDBC9)s&&}b0em?}j~BBDg? z0TMhGg#R|`vq{C*F$gyGwinf;1Wc5zI_~6E5bZ0XC7433N5zPw3yYLiX^voETh!1< zDyVJh$h4Mh=KVlnL*1Qg&_^goJ>kB1kn+e&^P=r}^U-wk@RI+Y;4Yuf0FpG_?+x!- zA|W$?1{=HO$$RdPm{WCMrWR!&D+LPwEa=UD^}PC^AfB#vC*J0joJ3&MU#8zYV9@#t zO0O<=Z7M-9u`jXSNve={ssDGK0O3eN zqNs>iExaL@@s&#aHFl6WjM@+>!s{KX^l=(g&<%%mBHPKio&*KKl}o_86udY zx3OTHq+7IGHkbenCSfztDdxCuyCD&D&r1M0h6{hAkJzLV)Io$gA)(@br0!r+gH#Dt zkJH=!1W6R?4|bpogt!eN+T$Tf3ejfPJN*ZMh3X7-wmMG?PLFS-s8?{)DN%RT-)8n+lTbyHTN{+^==GM8)j0wp}Etb$*xZzO+Wo zP~ZeO{Ca`$RUoUJ-p%L==D+qY`g`l+{6K8wTMx6<5~llmx<4J~9|0 zHCySfoy$5enqvV3$6=4YX|rWMNRtb)O;}KVqq#vP9Q7dpEBL0?)NL@uoT7$2dM~5% zt&Z{9FUQymN|N^;fW9GsPT5j^^4b{$GOLA;nf#az*%3{O;4wH0wWihm_@?3_Th1C8`gcd?qx`9OOUU zexb#p&dwrnnnZ{uR8fhP71OhjH>-1CIvc9zaez>d;^ta33H4o8aQ*a-H6`W4jk~Hvk9ZdQWG}-7+oYh{QcH_8zm%Rc2!+KcX=g=& zSUTN` zuZxGT98lwtY@{guW)d0{BY20vN=vy6dV_sc(%i@h#W6CQk|_KDEbh^mMM%^donLr0aTDOD2n+ zw)!brpGzqZUs5?(t#`x$VfHA*$d7+{+j2(l#0?hrOHWe}7*A+fbo&A&Sd}I*M?sB6yh)75_i*(c?p7u4OJE0n<}MgN38T4S#mP0)aujTpS2H1gFGZv>DVp&mFnRDEU|S zme1hjNm5h&UiYu^CzDcpl7gBWgB6p9dvbOqbBB6m@tJe;!4Bru#mlXyYo?}#`^uFo zl1?g=Nh+>Z&UOUyqpK?CZhWCMe`Qq;Bxyrdg(ONaX_bom=ubfNQZlsho7DU<5&oAX zgu=Muc|U#@ijR>zF*l&y>H2&uc&psC4CH~TGn z1GIRF3L0`fZ*)@@5Za?>x(2@gB9+9zR&XiYsE5A~>;mgAHiP4ZYFb7rAwjoRjsqu_ zn=aNUNxasHS^-{XQ%2h`2jOy<@`}x zYYC;brV$Zv2-ZfIQSuwOZC$OT_6)(>ivXn{+i}~}i0~OHO9lT$fKO?}U77()(>zT) z(ien63xl|M_uv*4;_FKs10Yy6B4PLdOZ2TX?LzxV7qoU^*Z(S^JYp;z_$4HHNS-$j z#-ws3y0P3;wc-l#_TIbU%^E7#p64{Y=B6M(vGxx^ColG*IREc20t#i z&{NG*Jm0#`n}Jwv_7+{t*n4`a@zT%g+vP^R%5~?Ir+I$gxCTLC=~3oCMqiLKtM~p8 z5>`cc^oruSrJ1i^YumY~Rqg-~fX*gE>;V|@5-dOh0Y+e$9lKd$^pk2M2*U@biqUef zm#+oxA6Xk|j?aZrR{*d zk@uh6>VMc_ab>WR(tSJiU)wx*f#S`$+dYalZib(G$MThSsqQOO4x9W$cDf^tpsr26 z(?0Nb>>?o(BkyvE$u>59hYmeb`ADe#zcz2dD%SX+hFvChV9)w}KyCQZ107?|I5)WW zI98GQ2XkG-fMMoT?E572EW?pt1o7KoxR*keXe=5RjGeJsB{PJ8Y6B z+_4P9n{Y=on(NACjvTuce&6Ecl;5Gk`}CW$##YaRphY>iS7-QPMUJo&Bl8j{HK7xq0xP|o)CNkCj z=Qv`Q34sB7bodYtdhJW^P6%3>oCSk`YzlT4`BuG`f+}|bgf-WC-!TXGOO=3iOLFVeHTwk(>9`!p7bMD1A{E!KuQXtG%9=|x zf(m`~bPTl<4M2!p-?-3!75#ktjvjLGTO_$e(U;<okmj@8pV7v6UKA<}_=ZoxbRKQ)6mtNKaVvP^L6|EOTPrXRt$R=_>ae{fYrf&BHjsn za8-Sh92$&h(#aWtlDfKTj@LYx94GC)9`!-}jiR(PoEd8zPj35F2f=ay&-O)7^rowDxG;RM^h@LLO>Y<}oN(rFGUhRV;9C2}+pnB- z2+0V(^ZDA;xt4^Nn-9#tnJS)pJEL;BcJ}tKW(%zkX>d|@VTbRy37F=VML4@yyTW7l2CTLbru$HqHGzf?k7NSnt-I6b)V(*IdU&aJra@QE(OIlVlnr`sl^M zI?SK;g|qJPO3j7k+W=oirAphJpi~_Fpj#>Xtvf`A1*$J8t^V8e!@dk_l(-= zc}}k&!3JEy2>@f2bpQq8Fv5_<#;-37Bew)dJFyVRp+|zsgnQ|L5ZF&n*A&@xGZTx7 zuIYg=>cU&74Mbd)FQEBpUuQe{7?3x#Q!~8z&Mh_iF%y)CPh^41e49a`pY1g3)CvaM zNP(xWW8kufTzq%rFdbvU_mX+@_%znhe;Q`*SXC+t|0WvKurWhDJQ+iO2~>;b9G!d< z1oGWxLz;6A`Xz<>S$&AOB66)J7HhP1{Vc3i06@sDz1DUMrKtoI1H=?%(bl=(U_v_4 zQ4GqiYXzRGfnc^>nTF($LUp8_x+BQ*hyTdgYq?x(7*`w;-mUvJLC0J5aL@Jdw(~xR z*L+mDfEbsjMW6xD5+V^QA9P7dtPD3FEo&yZ75nzP92zLWY(;X1M~89;iHV%*yG>ZcRqdgQEzkb&DQbG&{r)m`ex>r zg}t&|9lZS*!P8hL7@I=1``;J`-p`j{|9(bCl+pS2*jQ+wIfQzj3V-)DcpJcpDVVTL z@D9@Z^IMRLE(!`@m+!Wfi}@qVbnGv2;Rh{cmuY9_;FCvsqfWEAA7Z0x%_HiJ%X@;f zPMqjO?wd4QZzIS|&~gK%AxKy2qx;wTXXfn-u4`Df6Ze z?f&88TGG+FUp8CnPn55s!mwCbh__$_8^EGkt#RSz+g)}|hQ#)>FKvmC7yH!EM^#OK zNzXjvusx@E1qR*_r4^Qw`8PTwD;+?ZMlobkS?QS0G2Qer@e?or0JLwma3uYS@)Pn{Kv@>xG8Ya(qi@gWy68u~)l;?}S7o3(! zG$RBfe!zQ7(^LwNvkE6Ky7mx&Y?6tek|L2ydzTvkC zRBw{Pn~TBC#i!nB>v)Tim&K>jHc?UnXj&JdfQ2w6z@%Wx{Q%fRTGVADE{G73Y}7+L znL&1BhA5d!h}?dZ%)gF#+JzY)cy2Gqp^8~Z4IzXs0Uux^PIV#pEaVvpSca{(RjL*t z2Vlz&Tzkx9UJsU|0{;aA@-&Dq4HnIUg|pzH0(hhV9xcTU3Y>8dh&TeGg)2YBLQM&< zY4zA+2V~qwBe7c59p;3+n4Y`|qxH%&Pkk4@}wAR**xavCz$IdXFr^jeyMD zglVwTHRup$0BT(db!Hu^;9~j!bRP>7&&M1N%O4bDCbDG~#V1}5!!$*(Ffq(U1dA-o za~2{}CEMyvbd;@-s6bTvd!E#Bmvzla&2^pCe%A@0gIomga2D)PAJmVX>B@o!CBt0_ zep{cT&Tzc%3;8iRDN65t*&zO*1;f zLSGgmv*}W+8bTma;j#{eunwIEJQE|3fg+GI*WvIOrcs1phLCTIWNve1o=DO{g%A@O zctPjT;36yu2H!7$nsX87B?vAXQBIXPMYXDFQo9skMtPK?P!b&ZmF3DyQQs!OKKa9r zuwX;`V6N<u9StxTykQAH+ zb)bV_R22e^8{OddqzUW>AkPpGxG3}qW2Drq&ZVQD(PRc_Xhc@z{;!M$DtmtyhM7<< z6k!I~m%sO8iVj^C8q-uZVS@zRAPt>EL*{UisS;=i0cngx-t0p(aS^JkHWzYEx@DUo zgelh|)$p%ay~coI7c{7)#^HUp1PShn}M%ICR97DY$GuL|@RP-vw^sI?M?j0>Wf|?1k!JOqb;1kf+Z8 zsAmG10WPKyAf~ino(fenobviP$Se-$veb7(Z7Cz7OK^KZR!Sc@j#lhKK~;Wf z@|w;fN0V0ot*OjfYIUvGTckwbdXL+ z>#1oaHv)n@50Mgd_2@7%4Prq>F41M)MYkvMF~v^Fd|E{lfXd<`%P8$B6-c8zr0Ktr zJ6XU7HPq+mDjcodxX7Z|f*mF(9)EY2jWv`t;WF&#TXVHs%wq*~Tq=nRT$p`Q_8&hn0e4Zo5?sl9d>?fGn&{8%6)C;` z*abfzHKkPBW1Or2a9RwYyfrbm~glj`IpiV z6ALzl!vD({8sL<_%|WJ%FiV@bPn`0ps!JQDp8W%6&n%gR-@5;qXI)hQ6gLZ9g|JH! zOrscCBte$bP(>`%*^X>~E?x{Jcl~vU;+mE_M=>IIie-^jA z5>Kzvac>1UDI9i*4?joa$qXr}wwY34a}WgHob_2UzH7G19VQGQSJCeFa%$ zfY5M9lo%iq=~GEpvg{=_hz)qn#uR@OiE=}g5M=rYGJY0mH&L)QqfX~^d|=D z#J`Oi)(=XS5xaI@i|5wp|CTNj5KH#d>DsB9A#kc>YU*|`m0>bRo4zsCM&BKK(rE(N|^;-5|FO^^H-{!A>xohMzXbD6)2Ty7Sa74p@PJSmfds z#EVytTonRH7^S_0g}kaCC4~^P*Fu2x!cq&!`x8_`vZNEnBZ_;N=Hxh`c-%~SJ=S!TXZ{`%g z*bHr}Bd-Voqo4oze6vMOxqqCF?G^Mx&ejcd&WKVm?^OJd3H!!0l|L1@T|P1@Pk=_& zzgUPt7iN_&m16$RdA$2rdo~+ANV^@oU#9Q{{%rG0%!@-PkKW%-)L25s}Er6Ri0}#4))E62S@?x%8TXVL6v3;>4^ugtQ*!{(QfAoqN!Bw#@ygxWsZ6be3)!(BF+aAQ z>b^Zj4^y#Y9PC}`{)B*+=1PXeBMvlm(2E1TZ%kW*HxZuKQDG2Vkqim{PGlbSw*8;- z+`+2-O3Ak$(f++*VtLc(>08bJ23zRwK6T0PyU5F3GARcUe|#W`;tOkTuS(8d@i0o& z*qn%_jWkfvV-ifOp!4j%$Se`&FA1)m3w4#Ce1xEEcD2`@?9iPUq z!k^H0a`)_vPZS!J-`c=o!@9SWf{$}=#}s#| zoE}J?TwkqM+Js7&5NufFI&Vy>zweatz5pP5H6R3~X+GC0ADsR62RTFxsVX%nWQOL-7z0gZF57)yf@iuH zI`^85b@t8fop&p^C+{L1%j2W95VHdttc;>3$6xyFqr+4Z%~y!OTWYHnf;=udPVo|z z<28Ttm4c>e4TJ9cGqvRXb9Ft*!D%bs7G#$-sN5~pWKqL!a$gY-+FkMRuWNGIBK%RG z)hT*)Yyf(xju5?8944(fl@MyjX;-3nH&{BD%jmoILEcU9$~x>6_hA>#-}O1=n6Jn{ z!caLVVb9||$!g@k6JjFJ1FV4$B;Z~v7#U#F=_xhbB z$4Vndp?+z@_ur%){=O>kPEO{uQL!DW0UapFsid-}W!=Xl?D3M5Ut$-~yt z7AjR8HI##Yj&Yf^P`gUaS~@uqVyPo1yUb=SH@BFy2HyjxW~CQ_7|0RGbE&r-c0lu! zS>gJ2>DLs>B!ceS3RH7ZfiofIw?tu`OSf%*VR*)~PMz+KE(Rn|7 zsp_3IbkM-RJ`Ff-%uD&0GYIiKoIXrhsdCf}Nki)8g9y?nZuX^H5BEvCv5zB5l`d^& z?E3{ukGe;F(LzmY(_N_?5KUMv7h~2kG%V(s_PB$$Dkj`LMhh^pf3&}y!jgi~M|2>> z8!V5>pV>K!^7KQRB7}>5v20%U?C1cLwI?Y8+338^c(@i|_7BkW*G(Vt2s>-_9%`y< z_`{c)9~0&~rc;*s8Z+W1ls8P?Da{*ss80%^MDj3FQ_Lgg)4)b@51Joq(LtJ$AoJ6e zew)1i)j8OAbcc9rBWTD(kf zuBHwHufZDGg_d>~b2Ri4G<^*@pd4#1_fMt!^@U<}|B>U#U8YvRL!<{k2U`eLfMiVXFYTJRz#j!Z=f{ z_T>od)u_eUw&W24Z_#f2hX`TCUBUb@$skWp!lW%Vlob(NH!MC|YeSlPCfjM^R&sDb zQI)sG8VohIq9e|Q5Nw^_>(p`8b}dtFg^wJ4+qTtEhTK2|70H9OJ$igIIpOHdOW$AO zXrLH9NgyQI6W-PV?Yfx+SEK@La3wT$QA_6}N^8(y~NFv%8o`;YBoo>7V2T)iD4;;U_KKC&Z)zd3%){=#2q z1jA}|K2svTe?ZC#P;I!=!W==uQ;|nawst!G08f?z8O;}{;MF4fKN=j+S0}pk!n)(< z)rkez*nw@i>orSuB|L1#7>JTnY1V#tI}N78JHW9F3x!8S@%w5mDW&-rrW^7s^2SGxk2+v%QZ;tqelT3}zI| z)NkL@$co=;teYBg7_NRVY21m;N$cF}%A~SDGLrT01 zM>e!;zl{SS1N4+?FD=CAK1lR*?eyfJYsXq zMbPamwA35GnpLIGZ>xisubVv!zDgUQD-f)t1*`1%Qci-*YW?mK#-*oRtD;;!y?W@~ zb-qN@s2^H*l2aHKR=9K7SlIv(RadzC=I{aUtgsDLn`z^nWjMu+i8oynN9$2^0Qy!* zI?JaElPehXiq(Yl-(Y<+=|C+r7}N}z4ij4z+HM_8AZK{GLVZN=_^zy^b@;J1eov|D zTeVE@Y@;kqSXL7>Jo|c1-DGwdGbh!uARiv?&yV5qW020pq^T%rE+d;CF5t(kGd)e{ zkUA(dtjEB@-i$epT)2o%9Vd%;{tKcLrESJFkkKQ?Ph1)gur4isDJ`ne2*%>>8_Dqm4`UAo0){v1tiVe9&#K%W1y z^Wi3koS$YK%OR7Rg5o*3*UrOl4)+Qg_{Ru1OJhfJcQrK@SFV@!ZnnC!snDjOFxtfM zBcvCSORaa4tii%=(yhT8-3MW-0ULUkz~IOty6;1$xxdp zFq)OtmxzuPSDfC;IwgR!ec^Ab-Lms%^YSOB$CzPPnD;XH;X2Nxzb5B0q@%xw*K&UN zZLGq84o!qE*5z9Y+>IH#A52y~N^wwV^BNxI9sWf&AcG~OH1j!!Jlkn&XNEZ#clt`B zWkdBkb|k#%z6xpPzzD8x&gzqzp=1Aqfn~d9nfLNbSY&X&p>R)Am%x6p_CS^JKwE|* z%jfr+#XxWcW9x<{2dqK`WLar4#+gTvSr@htXOiv8GgMuYXZZ#&Zz&C!oQb^1ni-ny zb%hrM@(Tm`IM`eaa!z1^AOyh^gbO9U?hnJ|k z`YEov`z;#T|N95cY5X}md-9I-p zBF^DrZcUsZdA?bP^ORsgV))!w_=&|#ur8YLv)fTL0vePOfeu}KN{~^{kbd(j!Za(L zj63ywq8jrQn-wjDMhc^tXBW#quA+EIQ_%v=)pxl{8V`fp4P=d~%1`{50YSDWOtM|V3%U6RL= zLgI#>QnM1H9vmw>*k*0GtYR3E6rd7TcEcATMdX%AP}N;` zg%Uwsl<-<}MO3{Y5o%{o%Zy&nQZBhaAjY1dBNZzN`=a3CwNx5tvu0+r5QA_tBu`2&R|(MI*{gi4EOv`ZsZ}mz+_W zpZ_?IT;(UVP#&KV9r@iOxURE047Q-DT6RI3mJe6t7>AnT!p5x<{cR(^-?x6^Lishl z&IvePwi2Fyuw)(MU~FcTWvPtC3fbehgxN?Q}2g`Ps|<*PSH2qARrn(R6E}Z zdQtF*T)3h(6c8k~W(9vlUCKszYrmo%%z>#@^q-+#Y8&YsFuCO}zR;C*zB%KV;3;qS zi%H*wf#xIMo84~9M1&n%bgo+Dhx5by18H~sJ^b#C?e_ADbC2La?T9gEw*kKV8q(+4$Bwl9umHJumNwV;+|sn=;%R*4z&N$*l2+c7eV3ZE~&< z1gXEe;#TY5y3sV!ie$zJu9Z4opOQX!H@fpk%4y?}&CE#|eAsM>0h7K#q>y29*MXd~BgC=k zOjAt`wRo(?S>yJpao0mk(EM?qq4gs!=Bv3t%vOPC zkX;NG7efW^I@R<<=AxK!`DAu+^iI`)fSmT=F)#VUBJF5rgZqxUaks9HJ&hZ9maHMz zXOZQbFl^eo;Fi>%b=fnTqV=e9+b8bTLXziE#QVgj?v=jRyb0GceU64#%lij6KTN-N zL;Ignw}D6Y%7>@t#-9gmd&#!`ajc6$Ii zdfzo`&bjIx5vf%j5vax3P*gN>Es*C zjz_@)WyL#Z1-Ac3(Rs%uwLX6Q9QIV)xN#-U$d#HEI4MVH?onu(nj=j!D!UyJ3{6c- z&6VM(%&gF~&=%lG%}OiF$_j1SywtU>^)2s@-~aG>aX8QOoO7P@`MlrFF;g+uLjs5f z0f!y;8Dv`TO@4pV@(Cwh>XGDmYzA|2(J(lIzDFzURL|4!TSluR9?m|v6W^q5Jb3-< z^7zGhgI%$Svc4CQTc3&r#7)ev-7EYHuW9dQ+j!`N4l@qx<+*)cJlU5K-g5qOR!|jc zYug!(3rPgrXV}08^XmNj#JxZCg{Y*huCO#MFQaiY5Bk95C9zWDfxq9E_gZcGW z3GLDm#>*wEy`yc|tZTIvl$!CqKAra#sUz}k?R2kqt90w=Z`)Vue2jn&-~WROa92K& z<}SnYkIqZ-C|BbX@izw!bH41+k=Q3%=f)iSFW;X$sgGK9CUBxz1!~4^5GEW?JpM5$ z>LTYrd%AAD!F15;i&ZlVe%J8Lr=!iiJpaP&UE@LS5Z<~~+_J-AHgtM(Q>vANKDd9w z25-gbN%$M3=&!9f*sgtaWbmS5+pY1}uWDOYSJb4gTTAB=tG}IFX=7IX>La6>ishgD zG)LM`lk1eRs^4Kwxo0F*OIQR};R_ zmRgxHm-gIl3_VR);q6sRq)|Ru1cy+y&5f{a{9d#}BvLTe{*CzvsN7jxA+V3+gR;OmwF)F<0a z0_BY&rwoTdv)99uMW$zFS4PmefstEIO9GoW<6@%64BJ_78v<(8zwNMf=Nqi8(!Arh zA)iJUsfw4AbX8&pBqovI7Be-NJ%0d=#a|$iO-Gp5K?njHAQ>g;4UEclD^)E{%liSt ziCQ7QR+BT|bkSv&3JGOL&`JYhIvjicW3}#sQr3{~2`;5AEdRkVsYb=Kp=FKL{n+-D z8dA9R5H|c!__=LQ9Ja-7w9>S%Z{Ma5y6qPYn*3H4FE3IsgeW`z#~LoWTh5x->PFx&P~( zt;sO*x~GxW8n?HZ+;lp(a`!iC$6`grpZ6#JZgO&{C4B476xZ4lV0sdZI@$biiTflX1k%H<-aB1j9%HL-x*PZ7Bs8nxCY5X@O6bDh0o` z&|yRh1Qr9lgA3+H`z79*6*&suw-;+a)@N>I{}WN=CJDhkeX#9D;{+CKskW2eP+Yx% zR8p2^{%hm;Hnp0IH-WbgjrBV-|K)@~|Gnx+K+0EB1~Ox*30H&sZvEn~iF;d))C^=N z7^5X?_=&dw>={)aYk*sk-IRnIeDODeOv!?|K(K9Jz8$|g`^IrK8tAJ)z8VN{=2tz<=Lf(sRMZVdL_3ga?ji& z!K(Toi%~!OG)7pU!;t0O;~`N^A0qzAUD>-_uDzV3y=wl?Yxf|Jiaif|Ya_ScKiM*5 zxh`w6=+KSbNL!XnY()Wc^&o(*zHd^G4#8%m>JQv+dEA&P&u$vj&r|^QhGM8Q>iN@` zTOdRjDJA{yE4Q`CdChhJ&!Lc^-L!xpKRllul9Qx$LWI03QzA%<$6-I#@g0Y{4>`EV zV!p`k@B8vE%_{A}!AHN|=(uJ`(N>j|EsOIoEqS_nXuFFccT}fGyq9!h6z6HR%5c}Q z9Z~4r9pZH#B4XT!=C&LS3YDi4Ig^ow5;oFU0ink;QsBS|7y?)~B?u{WL?3`@E1ZrS zL!h1e;QQbs%O33vw8{`Zn~d_5=q*lOi2xo7}AjO?iGGUpmh#6F-A~D-W_HgL~-=CK&(+F z)P<%yd=FE1(J&%wi1r{GZ=rTdQB7x%WRORpA|s`!)jIne3JmL8^&Py3d*;||XsDSS`lZBw?&pysdMCb24U6|mXKdM(iq=2pf_APK= z>M`+&f_S$QZKnVdbqgYyyI<-?Qx&@(p|S~+fEqoUEyc3M1v^&bSyE)Y61lIg;KK+m z=Lgb<1^#)A+`k92eAsn$ELUggAEO;do&T7TEnLH-)szN70zogfE|>e|s7s6#rN2wV zp1CFu;+@2x^F+v4$hQg2Q=c-oU()ND(L4LtXv8V9_e#-IH z0g(dux$llX&W|`=7!|cU@e1HeE%ym1SF42Wq!2eg76b#F2L&b}$r z($uisFc;PR$eV9uTP2J<8b&&AVk1S>D!Vg8}^TEBrKSwcjW|{Bc>tOj)3Cci>O{=0l(t zfPFcWA9SmHqaN`u3Orf2{64k(<>iWTy#iI9k)lMNP!u#uv6sa7PZYJQIT#SNwXm)r zQHgD);duZoIaN4M!ohy%db)64&S?cBj7uJy=Jq&bu{knCg2odSxDRH-HftEwZ~W?V zyEjqkgs#n{)=VMy0}Tn2$fYG@s0bO6gIH?EGDKK6DcMa_X_=B;-%#@ttn?t>WVY zImC8xwA)ZLcFb0gZjx}HU0&;~UXY{VkvNmWwclOOyH6^a4{ADtHNo-Po*-4yM>Yw? z79ZL=xkc@JvfE^MQjrlc=SM=aljyJBh0qIoGTvQQ%Ic1};aQr9#$&a#$ zv8H=zyQ5Ou1DJxbvBIDn+>^=t=}9%k6XG+7I6ImDj75A$CuS&sBHIW;wZXw0cQj_XHad8Q z_IO6wL5-5L2}YH9bHfR1JrRmqc^|O_?E=vx*Zhp5#Y7C1^rrZ^wYw1HfC4?TJ5k>#z_7OW1f3KCjfK$V`{bKy0Pq z&ny4h`Bp{VLGC|}j~0cNcCMPKS|v|3;^vY)iYRNf66bp;YDtO2J^Du9T$(g;F(Bh) zH@ZS=YsO<--z5GzKroN%xlYIDN9{48H(pL%hHHI?^{2yEDta6n8|{?IFTOY4{CSQS zk(Dt}Hdb;VeylyXp>U?SFc6pjPE!47!**4Xe(faT9ZT&wz3-V+E&SuDC%;aOUxtm@ z6l_ivL~;%wSzu^o!AWt!4M6>q812SJ_CHz`!A8U>kiC@`bI~Y6B@%Ea!rZhU4MxW} z8JBr_#8o-Fb)z=Y$muG_b|RIkKf#zv3@ul+AC+Pf3I`J+AuBNO>_RorBmj7=!@6{vZ z2VGP=4fjc@)-OhGR<3)HYm|@@^=~Zx`6C(9 zk#vjxH*!CmR4t1*8|xt$WEs8^pH2e)QK0&-;@tEbI zWJD7Se_oW`#zHKI((sRI#8_II3KHc{bFb)QJ(EfI%k+x6QHRXNN;9n2|0BjlISxOB z+2phs@2wAfm2R{Td4kc}nlW%qfxA#SPPNBVz3_IQdYtVLcG4V2`@yHXuRiNL>>`4B zUQ191+Bv6FUb@}TQUJGR0gXvjkg;n!u6znx{`hiJkffUNvy0hy;PnD5hk||hEPqB? zz6Ce3Uj*t)!Szy{i~$#DdoBJJ=ug2kkx(Dp_Jk_!d?D>a2=}&XM6eWj1Gt_08xc%X zF*G%yz(8J{_Mc+Aw^c_ye8xySDDZYa%}ht^kf*4|(e#82H;2-g3(BXr_R{56L3n6p zd#CNeHkSOl5`TVD?vZe5RotaZKjYmLpyUe7RmuyHf;;W7wMvxcGBgo?KI^f~P)Oj= z3;*gD#aR>N>glc~#@JT+!nC?#S`Sj)aiVM4!!tRL#(!Xp6^{Y{?88dpLl&-zg|lVB z&nOAUDK}nAw)&?Y;sxV(=phd|=!QY-3QlSF`Xbm0d>7>)`a1HG2>*bt%{`3ZKl0h# zy{oHdMPaw3GJfh`V<9xisG*iZ*e=WApiE2l#RRp6@X|`&V1Q7=}S@} zO3ujq0>h`G4|Q&9|2A4#I3GT#vwK%$UDp^%4zpK`k;l!>pGZICbe~7UUCQdYDOI~c zyLCuZx}pOatVH&*2F%ysDU@UK&kfmrd0KwRU6X{n5Sj+SRlcwTbmF~9v?(2ZkM-zC z<%7(~p{ty2JP*+8e2UMf|AFj_Aq*>VjOWL_J1NEtyYs+cl223yKJ?0mBc9pp)v=}q`++y@1S+f2q~!e@gPS*cmiFI@5;~kj55eMiSH|7*0d9`@!tNwwxv>j> z9OI1?64cZ&UOA#xw2@cD*N3@tB7wz*6nl2%*9C18EX<%Ofi7Y^MImTXVZNhbU*4`e zDS2;4iRxry$9N8D=JVmzckb?96Lmh$qhNYGqER<}lXHL(rXyG(_12;_x*u3h@(Zj! zPHhk}8N7D1j*aVOa*2E_>f)vo?gyR#^OYBN=2aSnp788DYAHWOWtdx)XujQC&ElFI zAd*f^apTqRTVN_F>k2=S=I4>BeX)s?t53eU_RAyY#kGTQ^bH+uY){(RZ8-(rTL^~o zkGxj9t`2iN>V3)XZNsB|EOpX zgZAu5#JYFp6jd;OQE4P*HaEExhT}j*2EV9p^czpNj8qZWIU`1?wM!!P)Q%PuyzlM~ z2!`s&6cxE1Z4d0uzTx$$J`7dAt(5%R2`JJYcTXA0Gud{=$m44NGRkQ~(^&)@eYX^V zA=fSNj)klP%WGq#LV5Ayqiu4OerSEdPvg!Pk^OsA!p+*8?FMnMn0bwrXG-8h2Cq3Q4C89WiATbW z>SZu1BZi+&e7lCGUPSY?kM;H_1qJ27Iy(|LoLMpli&G1!K6Lv>OM;^WNQBf0GC||ob$@!%Hv8z_SQ}=Ip$Ds`&fL@L z5$CSw#|s}pj45y7%T>ep9PTxbdHUbix7^v^_W0jQ!I5b0m)oi^)b{#dUDN^a`H2`r zFYku!X~kt7ezU;5T_=;HDoVyGAv_cL&$8MkI_~AckW&LLe6z;J`tUJXw8c!47FVf zRosb!yv8#std8zaa{apYM+?({d@0vo7k_!v+4^pDDJt^HdAQ}~Ew|7$YUJfW_Yw{H#? zshzLeH)*bSPK+#uRhPntwK4G&gh8DK_MouXd6~^$U&(WPURUf)acvzn4SDx7YdW9aY*q6rr$pL*<&q@J@+wDz31x;^h`AJ5yap z;h{V(u7cCN?`dAijUDFQFCYPz(5}W-4zKiznhfEuXnEEN^CRy=(#q!bq&Wh+k!Z)Z z4iPGx6iC@v#ka5ob%Ze~9**h9`o6aqJdygL%5qBktK|r?(07R00@E2fWl$xho9?PD z1v90XQ#n{Idlp0(O7vxoLqOSO-M+=+fE|RGXze7!9_>7Wp4f3)Y<4q)q&%1Se8l66 z3h(C2c2N7>{>wmBpoT>F%X0{_96(*Y&(k~L*I_L$IO`j82|y?z-RKVFa=0wGX`(-u zVE4bK8$TT)H&)fC_x^LjXjGA7&t$wl+TlC){K49v6FkE%NU(bH*1CGFAx9~5GE$tR z+l!ghc}q7f&TgTgfBNMN()r|3I>uEY#qCp|Js$$30Enw$KILS-7QV3rJEZY?@>=R^ zk897DicqC{>gZ{7BBEbx0rm;$uB$Ve*vy6N6F9p^0mr{WPm$tZDYV#!w5a zoz+B|qyJ@j;4|6h_o2coV=2j(k*5TXz8V6%@D_TU1jh4Z9DZ3`mFXq~iShIzk@D0X z*U^7O=jO`XX^GfusvubRE4VtS&FCJj%IpS0h?WBPbNC=inNt_mp&)o{9zogCimFcz z67{5X&9EEO*`j&ap9K0t)3ij8I;U|N3Udp$S>Jtk(_`Z0-BFw0Pd6JY)nzNB6(*`8 zmb0W>Y4uvgG z&`1lrW;()ooI-UJg31HxaQ%mn+WsJc2Afu7F*Fz4UI*%qM3q|>N;Nd>1P#B$x0<&B zYH!INUuzi0ouwkINEB#VyzWr?O#Ac`G9T^e17QKmf$)uM2p=fe_5b~yvh8`XyV=#A zK~z4gY-F;p*}@*n!Kp(BpNHX|UR8sE@EHyJuaF%^kWNM9 z`tU62Uh`~Ts7FQK=9(LGt3Gflj61z#`bNfNb91NGec!GXK3F|<&S`66(fw7VgCAg_ z(Z=`QBkc^#66n=44|v;O@waO%7jGdKqhylDC0tQ7S46pCPzRDLi_)cz_6tD$Qqjs1 zzC{|$>@3P7P4mlUkiaQg>xtA6LMCke)JhP)P;`qfQ29_(+4T%DWWe;u6eA<*ig${N zSCjj&qMIS5Yud~JH2~)AF32|%=LwI&4Mfd9k8VDCtcF`327w@{45ftJB!d5POH|=& z*it(R0MtIx%sD*jB#b7$zLI^~K@6%Xs{Tm_Hu6Pw9Db@h|E0)&4&YtgP=3XuVj!mG zh6bE+OouZlE7)bNqwq4dcX-g*b6a0-V|e|q&LapACCCx*mylaYsHqu|MD?oKkh%pZ zs+3C9mr1yl+Bhi2@Z$_ z7DC9tT`)iw{J4bOAc6FAiZeN=^-EBLJ9-0z!_i&QM^!QO^&L5QeG!jALstFjjQQqy zq3vYrF2XH3(z5RAS{trzG%t`@AK2hW5w1D?vh^b3}m8N{^HqVthaeviXr^CE> z7Df?^a zvkUsr74ohSs%e5VNj4ElhcY^}3fu`14X(7@VtbaGibRVfbDbpormB;%oxR3F009Ag zEXY3RM4#ZoidE-!RJ-`>`*T}3w+J5s43$vTWKbKa zv;_j6)FZO$idRR2Y7l6dhRRDTwp;-Aw4l64mjQid@$rSxZ#^d{D*6}%e|FaIPb_@1 z2$jky-tLZcpS-Bjh1<+j!C0WX2!_L35u%GW<{&&2pxWfIwzGFH(;8qZXuq;3t`3zz zZb_h@3LkLXAuPf%@41YDew0H)ocdUEv|qUFywlacc2Yz za>D{>!GbA7_OB}gU$ND&f{N=P41b;Rx8k-T;9&W`YY%og6`FhhqIze^c(zpI;`@AS z1wv4XO#LZHzwVv7w>FdmGX%JLVy;>x*hXTuP)8}>;X9~dBB8Ul^swb9Q0{{El0e!E zMd=Ius>Rr4PU4HvvPGbiD+S%)Bwj}u54lmQC5hIMb z`9_dqd+R}%xf`eGfNkh6W{_Q_)4>{^iZ~u3e#GgW;`U4_aE`S7Bz01$o-))l0o37u zIzX^G9kw~G*nu3!b=%9-w`$8CZ+~z&e0HhuyE%DyTI5m0KLp0V6Axq2y#cbOQiS6GUT*X&jgj z>1?JVDRUI5P6pI`Ua&d5RGRcZ3sJ;3c=LJ97f6)aKQBbzUwnSPrE0@TJF{uj3=ICJ z*b+b1Ki;y#raozIt8|0;w!R!t2v~y-~&8*AuaCC+jLW1 zz_ou4>#e(NRtKc0m1R9UCb^v`9n}1N%+po|m@E~&O*g46(V}&?sdC1)LM~eJ+$LKP zB^1z>qz00re|wRELJ;0@x5*W9x%QXeb`}9xWvwYJ;_XNMlC$v*Of zc`N^uwN@WbwWSzxf#-^uLt{&<6}+eO0@QMPY#Nd}8hLPWN^c>kP5I39kGJ9-^}Ah5REnDC_1Xz)#<$;i=A!92dr%O>qc?>0vF4M%rFsf zp&+Ufsn!O4KJjXK)Avtt;U7^L5J}_tEFDT1D4czx7q-(XT{gB(-Egg z>GJv-V+_n};kF*FbyeQ`(Zu(+S5?|Nhr(A)xB`-T|Eb2)yjU|m62Ns(q%*!ELZVlg z=cH6YJ9hmHH9eJb`tF-M2&Nhb{NXsGy#NttcW+7{TqR^C`8%Z`<|?!-34NioE`xd6 z&YMc5-2S!sGLD11`-O%R0k`k^CG_V0+E2k?R6Vew$Vv0hfAJ{y9vM^`1`BUXn*85J zFB1!n>0}S7$;^xj2Dr-6ngorB+RMCGKdjwoBpYW^*8A7@(`(=tvRgYkN^#f=mM{xo z=Y=lCTnefaM0CpiBiz$;bN0NV& zH@al>j%bEo+P-?GZGdGpLK*I8H9h|kwNrvmt>n|l#nDS2acn<~Z9QsfjXD6<=oBZ6 z7Vo5e-2xqAkA7zZ#gQCX=y{;#KrUXdI8teqvY!{pidl`yghxa08L>4R{o(*@^SgvC zJ4qIkY17d!r=y96U1|lJVVsW@)LUM9QXR{WIz4Z`jgEbjc?EFkQz6S0UuY{&T*LBO zNW2zBWEC)q2CEzi!9zboO@<|^ru@*}4~sv%zikBBjx(wmc(bD!@4jfpLg32v*%FXgq|B!N9k6LTt|0Gn-Nqx{_|5-WM%Y&A#1&FSOSJ!P!HCDCNf&&({Vk zxMxm`JiC!)8(p5a>xyCFl|9G4DM{+{r<4DSFqap8k$?JDANlA8`pu5e4|>tpb$2*qQB*0exMQ!ni@dJQKRT7FF zeaYXnx5HQZVC5fAnDFf3)jw@5-8No?f6tDlWm5OqnG`!l7m?Zdan(0rOs;1gGEw<4 zL4wLqBG0I!Vo1e_WK@h;nMjH_bLbcTo!A!jzi8mUi~nYPS`Fx>RTo;9Xw*9|fI&|g=KqCO7w{pwG-ofn^I{dtu*_ZHwtpB``#So!t;shO_H#`K#hk%x<7aGeJqOwRqUIC3_+U6N~|`#ZbVp5Yy~ zeIw->Uo&v!Fv%xq{q^=^%r(QJ2RJ9+(W88y^OFp802hF1_%gpDG$@4t4{h8bf}zM> zIT4^%@~IjS5hRwD5oaV&`SP|m7~N9adFkrj2I4nH)Wmc|1J z_Pceb=Cl&iR^TxkX^-J~blRDryl`sEiGpWY?|k+!gw`+A-t3AFMP;a%Eul@&i zvbj6_wD0T*^~+$-jpzq1Kq8_8F`BZ`#(3!?`o?KX%}vf@mPIU`ePZu0Uu_MQgH-h( z-M7^@lrJ>RQA$bA)xJUqw=jyuk;DUU=A+z71=}jzeo+B*hSxm7^=baGJMnwxhAr90 zG*A<+O9u6KmR+vUci~-UgLDt4Jc@r6n3;?^w4yM zx4SoKxnaDU-(95Q7{cUQko+5;7(g4i)OK&K3z1(FJ8t|-k!#DxyA#@b8JM&GzBsGf z%^mh#j0hJP{*2)I>0CIi;YLkF2UwImL;1q>l&=}+@fto*Elu*HGH&reLYNski~CScz-s5IM;Si z?67>=%?2KB-zq9GN%Pk7<#>;UL>Bq2+UPM^rPe-r=7h`d%(Ob()6-s7B;EF91qFNd z3JlJ=C}kANHY8g6#zts*AmgNXB$$idP!gZD@Z0@p#OMHZc& zKkHwf;gv?4C9kD@qHDQGw*0W&jPSDw@VGk_bGN5$s(ef7n!5YpXGZdx_1fv-=-h?} z20g{AXU~{B9vJ-36D9>>TX0b#o;D8<>F{?&7oS3*2I_{LlYUXTK1{wQh4tMXe393 z+O;&hRYWE~JX?`p7<1o^cC3;8sdZzW0@5d{E(9EVR*&!)#_30CdCM3__;eDm6~)|X zE=iJB-m(ACQ9?V~KKW`3ClRTmbC_|VFIHOtZ2z(cwp_dB+<<&n?d9e-b}qX#-v0Qgdg1p5 zrcdXYu~wDew9h3k1X+8;TsbU=@i+W$;gd3`$SY>_Z3sxQ$e7n|nCF8C9WpRXpD=WP zBu6zULel&GMVN$Fd+2>;_+M-b^^V1*d9Qh`R{G;C*X2H?!UYo;(V!9a`QrG0SHA_V z4->%V`I$F7SwX^5mxeBfD3=YS9$Y(*&`C#`QeosY4GF1-r`tsmP;amh*1m~aZIncZ z=&2HOZBXa%3}9)V2;!@!!8Nm?&QEG;dz)_C9Xk!g8;aQy)k_&YniO!Np(ChO7xgNP zk1iyY$Cjn*@i{yi#v*BwwDaVaWd>NP^{jMe6c(rLETv)KD3|mjx^3S<^^I{o(O!vY zo%0=etrnv*&3}(R*EmA*wXM``2mWH!wm*z6?JQ4b90a{q?}Bq??XJp*8x`DqEukjq zlZ*9geNxXZt1BmcbG+>!27yiLTFG=hZV{f~IKYNm&}Ok)DxFvMv-tEOaO#xGk!Ext z^7g6+^@b;1!1*2N-P$4$nFB}+in#c$S()*S$|z4Ml}(^>u67I;=iZphU~dn)B!MAA zJ`R`IJld9A`T%BmJJV&Y*=nDI^3*Re`tBNWtKHZzt#;`8y%8AV-Vck)D%y$W@0o2$ zq}Z&Ct2Yc6YOBhtmzkDtHlO^Jsky%A=yc7t0lssZrKP&pWKVHQkvema^?l?A#X*>? z4SNb6(`u!6dK@47TWk-(coH!o^gm4%APc3@X+8JUHcK}6}WHW3ulV>y!3ET@?x`#^yI*-YO#Q4(%1)cN< zS0y9#dV4~K6P>>2-5-yvuRj~k2GK^;4H8FD1lMSoRt39A==49#i+`H)hSZYZXCA_( zyT8l?gO8@f?G>vZ>!vf92p`e-IbhN+{EmP0Q2+VH0UMbni=i&^6+^(r_yY&Q`tLsxj8qdyK8fXef)UuD3hXdl=gY#2I{U$xsxpJEgJ6trCJhW_P6;)w17z zu!}m3+l9~bgz7&lk1RE*^3fZQeg=qjx6Dt`aYO2c_7p>jO!8bEv5`IS=2nmc2&CVy z{VK&irSG^ip_|D?J#7pIhG&hKvw(nSsS_$@YYBP#zw%Z}i=4jjh)l+^I+LqyAAhKj z;jk!o1QSVa42puQvX`tz!Hfa9N?A?$Eu+xo7Ibf$4hwyiL$=z?Ry5~Km~vtHxUk@^ z4_|qGuy7T%IJ@{bzxG~9LNRnL8Mq?%PpT(gADR@df;wdN!amTZawi+2DEDYX4Kzup zZ-^yr!aH3hq!JZ_ORA|jxSq_=r*Z9hfR}JYYZ1tpGqs9LvPw$&HKPj-Kzl_2S3HJb z4dW8OA%8IgVG`k`J0u4{0zNL^5{;6Vz~*+5cYVoyhgp{>^o2_ttObIV)g}UT{bEqQnq* z(u%iUVS^-M)%qJ2w$;4kW^WrWn3=n`$r5NHmTj*v*P_a_DY8TCd)ABIcA3BrzAQ`8 zXw{+YpUKpt<^g5TsyDE^;>*L_toB86wJl}SB+^FK8~Y*a z$os8KjNhI=t(%5|_b?_26rrF4KwJ80k3M-F+Du!LJNn9Wy(~0l0LOh84WWExC5fyJ znu@#)?rvjy7O9M5wuK9(%f51RgxnOtGx{Xc?2_p(O7#Ko(bs1sLv-ROFrRrF?gb#J zGjIR^;wwsba~)sHx1TCo5ieWsRpj9!$3-WrtS03X4Ss(K%IE-mV#P2A*{B%17z&`- zPZ1AKulPNbUEOMkZ(-lop`_4xU$$a#N(cq37{ZWEzByr-Ny9kN}K-*6R;y z=>E{L+|P;~jZk~b0^CCB#&tz5g%Gl<=`>M>p9M%=Kx1@6M*KvBJpiXbk@-lF3Bbyv z!!&3(&9CMI=?dQnDQqgUhceQ57PE4fMLJQNL`s7-JQeMtr)GefckAvIq>cAwng366 zWDhRt58GE1S-v++6y{`gFi56-?Uc3{)ER;_(inv>HRg_5blv@;Pkh1zG*ft#*_$~L zDWrM6XuQK)IURFi3MOQ7H6~uh8GssTTz>2}Ys-5;G!*}p8RLRFGF0ttV8WQ22KY6A=krYkf2RRu<4TUyHED{VH9?~e|B&1PaugwQc-G{x1R@^60%<+8J}~Q1eD< zq5GYUp`QvVDFZsT1FKT@b)L!F0IGLM&%Nc1X;vKGOTV$IlJHP^{u`Y*BUSs9tu$u5 zZWaaSs~E~cG>KsyZE@mrn1hHks1IrmS-JcrOEAd#77g*9pHs{Fo7?9g+4w%DY_^Q` zRFY%^O3t~)JV4AiAU@GOeBtUMqYlbUgAp{lg-|Xz_tXi(2mu^Z`ZvX`_O8Akld*q1jc{)qHE;sFyT!)DErjPpXs9Cp}o1fp+!f` z(4ojQhh!!!@br(NFbJ@QUVM9`PtM1D?GIx5GMfJIrji1Y)qXM0N6X@mxiBgW+mOUl z7mXfcDkd>77Xa}Z@bDefphBowt_72Buv!YT(r1qXFFs6?5DY_6xQSPfg@UV3g=TfX zd@PY!F@2nwgU(F2^${PJ1;*o^42%n%=mooZHxz~9mbV~CzajMgBS-pcH37~4PJt)3 z;5f7C%=!Sl7{H4lOr3dPzigTC_?A_uV!$|xcoo!X0{7X1FLS$`-{Lfq8HFQfsJ`5= zi91Y$Okc|Nybl^qa&1^EjAv_{o8*Qa_JK2Z(D{tJr7SzK+&%}?ATx-T+}oL4@}*eJ z0p7(LBSiJ|E6G>q{==thW_kMOPU~m<>WnaqO0Rh;bXxURGa690gu9Am1V1|C%DLHW zBw|9S=`!dw&vSA2abfzn_zv34LKw-5wJrK?a(Kk6*JionoZb`9Y9PsTbYLjgeuk0yDXk*K&I!9yOn#iNN3dJZ0d{a{{(olH{uEnaw1_R3vwBDY99l zH>`bu(w3b}j5vW@Mef+4mETOh=e;ue6*h}`$x}+nFi5w^hw6V2Pa)sb`Bj8366#SP zTe`NHi)`Y$G!0rNbNWDWf?MO z4w}#Sj_;F_UUOHpLF*ZiYJi0O^HFDZ2Wiwi-{VaF`j#V8Hk!Xf|I>*n*pali4o$=7 z9TQ~x&l(31J~*BHlAHdi)`6i<)pPN*knWZ0ev&2Nd6WrR)P1R%k<8%QD@|XX$MQNK zmkyPDoa;>HS-8V>CW(O)^yK}ry%7!hkK`1bVOE(c2K$*(do)0gZd-Yb`Q zssmOb7|9$MA=bh!e4)NeB_gHA1RKP2o36YgJ-YA}I`OySM#NPyzG{!)2o>(S2ol0! zu63$oV_g2<=$qGjUC7%LHB0KY$+||l=F1i)2$-dKO|kP2XMks;;33!hn1sucuF(Jb zz84$<%vEx-38?0El3 ze8X;ErGg+Eh^{f|-E+wsW3!*|%X=ko_w0{lZ;VW@q-%7H5gUKvcE35foCF!YI``tt z%36OdH4P_q4dCQ)n5(^F->}7%v504r+B^gz{T7lJ~2bT)xhsSH|zP*_`o;uw5;EXxNgq&Y_ zDG`qO@zqDNvugeFV99^~`2^BSbXBVPLqSewEioj;n8uD3^@IAGJ39DAUcTmRD@~dz zKbl%ywNXFa-YofaJ*Hcx9f!&F)mWiqzSZ`8O%qgZXzE8EPhF^-t4iIMrV=Ww;7E&t zf~HR64KpXtbzhwy|0twXO3tCJE?5Qy4#NIvS#^uYvF{m+X3ByzW|W_Zlk5KJa(U26 z%Iftv%Q@d?#R}wIHsOgAk?Ye*^C)_haPA&?$7CXkJPTQBx)*k|X>X`ynH5_p=-wzY zM-^sW&3wNN%M43TstHZgzjy8XL|jcxq}yH?A8*2eU{yxs4eS3^xV+cIVv1L>_vnwt7-K)j&%Q<9Znb+c^>(6BHUB&Ee<@a9154zaKn>e`I*4FXfdM0l3##&W&1eY4uwVPo~ z3oy<>!Ee2J*cBBjN4Rl(Gz05t9s@(^3f`U zN{d>mRKV2L3=W>hcp8UD(678 zYPJtDpfywu{`vHeVk91Owq-2jC!c)r>GOoctYQs93(i`bi*F+ftkW2WRb6DQ9Js72ZJ3MJhTtf! z(C2bZ*R>i;y4KHW4gH?;fV))Vh{V@8URk^`9){6oEl0x0=74)$o3^o7#Q_rm_n*95 zr^3;zRd2?IQ`__-x%_LFr;GlNqVsTQ;{5;kT`qek1V~t3!bZR_1w>3(MnDZyM9{Di z5rd+lqFx3=RE&rgaWr8GN-fUfs6Cc~GdOWJAS#Ypv}$pD)8GC60&?74@;smC{eHb3 z1~&=z1`W8^jqV6Qg~5O9ky5>NI6;??uvHl?fV(aV=joYM=zy?Lao%mQ`;0xOxe6e$ z!MC6@Rwb~xQVhEZyG`Xzev1#J5;$Im2GHveSE#vC3j(L1&Fh>`e(KeT$#NM zbRLY{7sjauwiI`%f20##zu#M!7x?&3^E#_VONer}v*2jJMgX=Pa|OlW5nT5=pm{l5C7b&l8}SAL(x%*J}#_5qWq5wEDUR zzs5hC80zjZ!7H6+>E9=~E32Ty4%1~5Fp)ngVF4eIIZhF4k0>wOFEo&6CWUhjzLCBI zPT(X#&5Xyl-SAGjiio&AmaIC5HTJRV?YvG>`MK=_L9gd6EdIR5PD5w*7VhV^z+1eR z2+|h%xdz{Rmp5zAjcx0rD;)7`nnrm!aOw>`L!Wi|T=#VEgx%DHmNBbaT^a7OlInEr znBaVMXW%m*ipyGbrZ`pOkzk-E+o>q}oe}Pm_!<|!ir1~v5IxzT&n)zePF4W7aJEHF zBEMZcGYHjuLUsfHN@S62zBAiPHmI^GHUa@3%?ePo_AVHruk_0^0Xtn#VSg3?S-~%z zlSGk&(r|Z=_yaC6y*&P;<}bx06jVP-4$=EMd7RM3KIygc*$XcNJi^R-n$8x@dGq!-XFFU1fjt{<&trQr;|xlMNn!4>2TR6BcK z*fg^0Z^fA%KP~QI+*#Up@Y5?B^M@TDWTuCldSfy8OvsHhN$Fu-#Z>P%UV-B9 zJth-9UlM+~iXG{eWLQ-i1X(9eO77<9aSr$&&@?z4#TGLB1{5!7kiv&KRcZIW!h{Kl zd(w&O)pE!(zzrQ`COM6A?+P`~Y$M9oP7@`M6Zl4yd+r(hUh6q}(UR2!qGDoH&D^(8O#Ma^EAjN8U0Ej`BP+8Smk>4w>v{F!wOXW|myHR_-L)yF@E+jPfrql0MmmIWG{Vhd*r2Uv(4xTcoy?vqb~pG_`U{!0i_IO!1k!11!N7z;ry?H$0Z%gp2QHsdy{w8nD|RLEH|zy=^h2Z80ekOi?DQ38AH7(NYNulDcEnbmy});(ys{9Vb%}H24B;}brg>clkEV= z*Tg&A!{Fo06MQwuM1>-h-g`0~kaC(na^)-8)g6L5A$Nhol_w!R(zD)6n0G7wyR-oK zrI#btdw%Kzk1DvO{rqP9`$AKPQ*>5CeC(IiK3@_Dv!co665nEz&41p6w2sNVN~(ev z{VOASMPA^#%XS;&%MlYP#gp;`WnYA;DC)OKWxd59+aR2`++a}yQ)Pd1Oj(qhB`jhl z+ay8i5h|AY?@l5F0mM{2UaZGo{3KjLr!`6}>kT&bQsSg1&Q}cWN4ZHRhD#>)qY`#o zG`@}|N%{x}!8fCO!38Hv=uKxT^Xyylx(ROV>-^Q?%O zyrWG{Sq|@d!u*d#*87-0MmlQhmzoGSa~?%{26P5Qb1aT`?<8&QT5X?2#nniG!OGrl-iWP)B?0(*hd)Wa6q10mG zRXNCu{FRk*3~z(R#UUj*=u|@vITR_rtu78iiYM9tn<^kgPYP9#gAiN@!f40xPOwD* z3I!pM)-l@;WEJTH*0~XhQtuYnf-EJx0W8gjRW{#%9aqEglB2@Vy{oXHCtn198RGQb zwu%{j>mY1xpJc`;Tt#}%QkZLJU`^_oCOu;?g)*rZd;_2e6WC%}v|s*z(@!JgrW+;8E6M;MeSizL0Xc* z27fv(Hw7m);mV)m!cbzR9%@Gzp;!SFCAI;q6kvHMOgui%!U403o2YLj7A{iTHwx}E zgW&YPn^t9VQ%q(nBgk?Q+K|D#{NC_;qxP6>^OJ`Skm*{MJEtKKv;x4%zT^&w^#sXp z(37s~=_ZtKmnj)7bzO~eZo-5(n4mx)Pwb@vh`6NizkVe#4cBUiiE0x%cD#7k-q?da zV%^Gu#sJ^dKwmnW5Q8ol@mL`9+{ZuVvua!ZmM*)0$4FnZD82KpT|%r1Gp&QH9ZV|w zKq)V$eIBFdf0KIF7d^ENA+2jJ2||g!qwuP6>QQ*}-Um5^vw$bc8P`K$CbBH^hg!+X zIl?>+6EVODxrbV-V0}|ii%0F=ppo9AoJT!8U#V@JkXGun`%eS2SZdoP8gej?+V?-O zUCqGU7J}6)xz{tfAyy9#3$2Sr1v))7CfJ79W3JRn2MZSIz5rGL&dMb56mnCp;rPji z#d?BB%1K2Q&C6HcLMU+z_8Ww9A11~CM@;^>a1<;uvHL|ap{?Fg?4BKGW}MaTN$Ohs z)w@2SGzVaPSUU75Z`;<>JC|(Mk#-@Z9Vog(Lg_+$e*>s_2CKpz`@>~ckv)_kl~o=V zrodcFe=~F@#!-}5s33%(gsgpiN(1wyHtCw&GSxR+L zvA-Z}W+tJok$4=T`m3PR55oVhXOlCjG15ii zGr-y+;UF7aM|^P?Ur0}aL$^WuM+QNup6emKe!U573$C55^me2iu-MTf9ujXs_nfZr zssHPkY>2Q-fk%B}SJdZ?c?7oUNn5)~i;4zpiUvY&k>9scPf9GCOtdtERobXUpo(@0 zKGY3YHt8wfV4KDsRvS$Fre`9GnJL(&?HeJ;L=b)c?0X+t3gjg!V(m@jZ^(1HfqM|N zOkZOC%wXRyv^y-d2t}<9j+z(zY+yPFr+b`d`i`<4q!!9d=Ee0)bc(7*scrg&gWnfY z2Z)6a&SkD65Puk6A?z=$xvPYT;()7j6l@_?LnlMWV$3w+?1ls@jX8#Pd zpFlaHQ6dk;O}63JNjM^d{X4K<`oy6vlPEH9i7t2%OeCv##h`Zxpio8j`oSHuPWQ73liq{1a{b50$R1fC2%9{t^CZ{C2Bv-%$6@S;xYX@qwc zriK`-QhO}IKq{)Y$aAagM#$rE2ngn%b9leaYrxt zOTZij<0H(p%VZnDN75gHb&3?qg;gOYwu_2K_pCd<|3OF;eycv~{5kQL+d~%3&xS}j z2|QBUMf}O6k|obiJy?8N6Et_x^^!j>_Tb!TR0HiWVzDuE_LCnLsRpaCJyv?jv{=yk z3xE$nVsrPQZ(keUb}X9~Le$L~-@W7}RY4g?p1LivoI*POF6Dl^a9Dyu#h1RA753=* zXHdiV#+3lR!RdZL<4OPX^UQdP+qF9x=%p3E8%XO2`wsQFj55CH{e>V=mM?qe2fEp{ zU|4r2Ip;cZJh=SGFH*by9-B^p&#(Pj5iI7aTbJ z3Tx@9$f#naF?)8I!FprQ*a3hBOP|k%m*Tz0lTCN5>*m?m7-v0G(8|Nix1=ylxs;kEnaI}TLRY<7 zaf=wHAnyQneGY~2`$}R^n+}xoMRM?<>IWbK@puMbQUHv?;bdpKU`P9-%OQNj zC-Ea_hy_dP{Rs)gYJP{LTzuw@@Jx)(f9>{`E4v*f zI`OtyKV9Lt%&DE2FFJyOQ6u%&-@K^#>DWN^hT)oo@i^fUb!fL-6 zuY*sK(S%8-tAqzB>~1+6L2}$}w%hhWrD2$Dr)QN_pCI8mUXhZ5;E_sN`sGHqY)j*g94+sYi(x+y&Ku zic;$j$(*lBPLiip*@_fD}EbfEiaNe<}-0(pMq2aprbAJ%dV zZB#Xj1&Fy15 zyyDz+aw)|}EQQE7v{imL-4bK$HkaQf$f|a&n6A7WQQ%qsXM8!HsC?g9Y)A4 zlcCrCMho17CvK7qdUYFpA{I(V*h%cj4{!-{I$7@}oV-aUGdv#Ioh z%1S+A91srKXGL0TWtme}i|y(LEGw=H3BonyI>N7ub2xpLDI?-mx`zUzxjH|AHT0PJ z@Spv<)3>uBoDfB%&!Xjq?qRp=1h)(6Ewl^Mk~kC#$a6GqQl-DWU}ksu6g{YY5>eUT z=mMm8uD=*B4E`%KVBws_v+Lu>-gZAvwDr4pLKj)M)~U|ha~qDT{N>60TBY;aMXbQ- zQP$kFH?5}}?<$C)$9lL}Sv23ShrEnD;1M~D7qP+Q`9Tg&H@iq7%d-gkM`1=GVm{*_ zR}p>OgrpEtC-Wi#vB)^ft0@RB9e~6WkkhlXI68e4bdVh&O(?4`%x|J?f(IPViYZ>e z^hKKn)r{kcV^7QS3C}*%TXtj*%s(WaZHK7nPHd3w`I_J@!+Cr|X!-hZ3MI8V+(x3) zx|#6XvE-+fswPRN=3Qv6h+4Os#_H_hVkv75JX}TZ2dyV2iGkf8WTn`I!bTk7mnOI1 zc%qP?f54&qq{b$+C@U&BhV_l>bOaOSzIMEwTvM+(ZZZ))g9Nm?rjzGQx3&iE**;L) zWc}fPGa7K?4{<9Lcn^B7Ah018R)RH-C_was6>$BDYO~)bq2-&Fah^cqlzVyQt$exLpCQyu}UR|6ztr} zTNkyk1b}9j14Q|q2xgh7sa?90x^5DnRDUAO(Dm3TeZ1~@NntunwsS>+RZ38zEZ2}HWjO= zYrCVk3Z*to-z!MPPSvOUH6CVOub^@e=bmr^&+nHI;TD|jh!OhPs5sr`?lmh|r?wA6 z0|;d(bUw9eZk$0oJ@v0b=73=Ho3oJJpk(fbYmc{oJlkyc9EorS&hM=J)Wi%Lg_1M^ z>mUVAG+#x-hEVw}#|8Op87{{<5g#Xksct>?3pmwLZW>-|!$80Y=dS*Zh!R`fFYr`= zE}l(XN<>wlc?Dyo9WDmlUX@F!0l`Rp zdPz0j-874qF*uVlw>nY~qX%erQM#{E8`%5d&;K?$vczJQpl-KjKQ@kC3rBgAM}L=_ zbKJuC(`pZ%Oz|nWp1taS4Ji{b4}L|a@aOU2+q^2Xmet;J^Ln;_X<>_|DE|OS*4-=o zf(0nMGbw9M)-uk@tGtVPSdo>H9z+8~uaDLynC3G!Mg-g(0S2&_x;ZqBPx2CqDB%K zo~|?%#{KE`tO5pb9+RL2{`a}-QX}t#2lR4y@x*9%3HT!u@=N%IQBsi2T_NvbDRzR3 zt@X@3-2;wUyqJ^crwY@18@|@diC8IK*7c*SdSGndGIL$5vG(DkbC=fID-MuC6l&(g z7OneKCU*&dcbY;;34H%qK|-yENL3-bc!AhD+QSQfeBf!znp5A;?uq^on(=@wxz@hF zwmjqq?JMoRoH~au z+PPVY#AVm8R|%Yi9to#${%nf8I(z+`GnL+5V$Ga`LL5cuwe|KSJ?X!hb~{6>Y**`c z__|5(Y#ji;R8}wHjg51jtzCR&(WN6@tKDDQavwhY2?}B(iV~dY?7Fsb>$#kjC@(GW zV^#pXK12aMx^EF;Tv-ZJ#7bfZLOZ1(>*Ddf@sT<UW^RU|*H_B9a$s^jZMRs(v5FBs&k70;i|OLc5f@1lij*&) zV1*ONw{q)p^0>cfvv4IOTizgp(m_zhG9pyhI?H0m(jgW$slw26+wt997 zd+Q#f__Hf-{mgCO_QWKpd~5=px}0JmevM;FrjZyIe9HM~kk2y(+^B60)!QVBL(&Ax<4I13Mz&?D zAi^N7+%k?jn6+dk@4Qt^$fR_d6Yrq2>Yl$uW~!Jk$1m%moS&qfH}SVb(hkV+kxG2A zj=B|wW+>b)3Zb`m8!l!F{BZ`?78~ni0H=sAY{msj$Q8&w+n;QblAuJy1H+2OvK!9a zo_TE;zsUp!qO+%oDan)ICUNirAM!Xsr)vY0lm2zUfi(cNLr3sKNM&7B>6W;_5`x*^xOcyfc1zEAZ6b>m zl$BkLio2NNm2y@>-PN!*T$pP44-|@XdN9v2NoD!pD`>T))l4OBT?MXJdw5ae%GW~{ zv-Ga-hoH&V7W>cJ1|axkIdwj;x7kFyWMW=IF&-WD^dvt@424=zY{v!imjTQxOQz}S z*Jb7PB)c)ifK^vifIsC=1I|H*TQ~x0^n___@({J^k3LennqZ-ATJ{B8Ms#pMq7qBB z32wEwtM@C{xHm_I%f#eFed-N>;vy;aBh8|M;IC^MA3O(?CNehhYcY~q#EgeJYMha@ zSpoXT&z=sDW{VknBx`S(=yxXD_8}x^rTIX50Ol^C?tfzu2CpA|4&@KNYAPQ%p5 zNr7JJIXegP>xJ5egk1a` zgMI#YtQ!qZ6XWNWY5({DHWKopQnd#SHuhcmh9Rj`M=Am^9SRvy;6l4r>vXsX>sGS}~k0~E5^I3WOf>8k4Fq$1rptqHs5_@9&{XR*b* zDtvGzEz}}LWPXm=T!XgS;E|d`j~@CI6MReVStXQpiGr0a`X4Jwe(6kW&!S286VqowjMat=MECnCy9a75o%T zm@X&Dx^Rp8zzq_T9ZKB5#;?+o-BAb;+tjl0z5r{!q z0AZbo)~~B^@j7xB@IN7#J4vnu$SbqG5lMfog2YAH*r|mO#swRR>rKSJFXD^4h;d!S zJUQ;(Km8mzlw3udD^{gA`U;Mn2*$OCHu>}hUbM?DdiL6EIe6@1;MmCkZf2;w<6LZq z&#d34;jdJPpm2Fe*C1qHLSEl_^k8ZaTxWo%qUhW-5mSD1U=5zIK=8A7NKB_ zgs}E1(ZYm_k>mcDY2j_eF-$l<47BGOXb*tm*%>S(xArA~(*Yi6i?4h(MZD%AZtkwf z({djHI5rpzK*2ENWeWDvLoO)J1%R7ZJY3j`&odF@^w{7JDl^^Sqg(tq;B=$tmV(ff z4DsJ){hr6`xyFkVTh8f?S!u2ad-^(asn2Ckd57_s>pMZF>u;Ce3r_hi2`To3d?qhn z&Vc6W2o-YtLYR66rT(mVdKhszJxN%?CROUGo5WE*`YdfiZo|;xIL!1(44IDPTD@2( zmXr1=@fZPAUFY`K=?E)!nu`%DaqnaRz*FA`b^u@}26?(0c$3$1GLY8*yrctwI+vs< z#~G8EIv@lF84G~gvnJY4Nw+1!m)8?!RIydFTm41>XFWJ&2^#W-7E)oqD_h&I;s!-#KIg?O2_wlRd6tb7)_xwGIgOFkvccv64 zdwAG<&@$#P_w_F8fB&_tW9ic0FMGMY|32K>6TBLr!jjvVCA-r^?Uc}dH_>*D5)mEs ztdiU~Y3o0^BS8}Q;*d?pFgTi&yD?8wt4`0gBF$O~5WCVMB;YY;5bHWFO@gfCL#by? z3`O~~aIpRf;1KqPQUU~&RCc@v7>6qvdcfU8E`ov9S{+IMz{AA;a2dy4z$TuUcKtV9 z%W)vYMCQiJtYElS2TffsKS}{>^z?i=wN;5<+d_`h7q~SkJe1@X6TKKt^H;8q-4ZZI zu}0uXJURP?g7kOq%keDTN~Z%5fp7KxLygP&4$GbO=V(6<{%@GDFres}`K5LPK3PH? z9K|)u$QgFy6EP^1|IfaRapDm%`4hvI z2hU`5;gaMbsWl`QlN;XXMawUz9yc|Iz418%kW3+wGd``JT~Mn7DDpQX*tCX09tQB~ z&TN{p(KYPG^(r<~56m&rLtfDJ>xj2?bg?+`cJcEXHp7)oy2>WU*)Z;!h<-jGZ69uz zg!-GD5LU7}L_Z#Cfac$HM6SjV{|Dvjrsjw(xBd4%3w{6Y**2NlDSOBJ2b+G+WWKb? zc^{kASX;&Ni!!q%8yc4tR3`95JqdwmBu4xXDO z9bD(5iLPRKHGes7?rz_t*74E_Qx8O9nmscW~ zuz>y9CV+J&kWoI99j3wteV3Y9`LNx$T{-!2^63Npi~aM9MWKiZ2*-0smQmTOFAMir z*wj1Q+V_N2O6!V;I!5cOYIlcu%xj(20EAPZ?5h-4pR7VT-KBrZuw&YmNh#gFe{wn9 z9udV<+DmvPM~~Fl9PQ|6YCKvY$kSz3@{52TqHPO5a<`)yZPi6YW6@66qAAICzi-ue z%XGL$L}Vp(ujpbutak%$COVG3XD3)yir z)2XgdybYa}w$Z)!pi7avOMz{ty!Vj!Zpscr!$)lyO{f@>$#z3cqUbfWElAlz zsO&d0`y@D$Luwz_!57Kgm7k(SfQMIS&0qL3_1T@keTxs@2>kPXk)*O90YLL!*p#6l z%~bu8Ndou`9I>(TWn2Tj^*lS{eAJ%)#~W|F*k!o=~1Hi zmI@8E4uIyyooL-9sC_Z&RA;s|N+9cWT^q?7A0F=G5Lf=e4rP=ay}XLQ$b+BLesT4+ z48%}jb@-d)sZD#gBX=gNXN}qUDszOun1s6>u9N zZVb?u{ zNJ`XJ{d#C#LqA*~CWM6ca-Mb%z}RT+Vwnl#n;eN*Fk!k8fqcrc9bjn%4V4h)brHPf zDuSc#|EGHZ(GF`EPR0}Fc16N=AF3q&eRXC4zJc#djM>Mj2|jx9;p*b&2yjxhfY%^t zCc{|a9-H?9aG~Oq4Kb2|gK{3>+3-PflAMzZ4?1fQ@Xu;A!Ga9pT};yn*xcuyCwQ;? z#q?P2n3eeeDEra|O(RdNnj;TCwQefH`J&%pmO6#Kt!!cWBh|Kqp&stP=WaO{zIT72 zR?(VLy**QgYYzE6px!-04A$Lb&%?@wF`4SnEhzEEpq59DubIsbr!8t%F+31eKqdlN zx*^mZasrDd*=5(>=crSv=%o^9o?Et)=ruh!{WgvxS-xChB0G7GTHQn-(5?#zIdml6 z@7Rl^m5KMFU(5hI&FWmEqR%&hdj&`6k`O%fDn>dy)7McClDZFO7v6y>`W460a@$kl zz3_*d9<`-*NjL>-@YKmUG3f} zt8TfBG1u#A#&O?nZS^jxIo2w&3^*dM@JJvedp_XYXK!=#l-B9wQLwLEB`iY?fvFna zM+r6q7xC7J)fqub6%+1hSvL=7)qPvT!g7yVSud|pr*+}?WXxCg` zT)#4~0x4`-*herlDmx!8VrTb<2_@u*b|%u2CpcQbB$%_IcaP(Qi8NP!cz(;hJ~*tP z>H`*dy>6_$Rhu%n{{Q7z6SwZ*J`IxY9h5G6Jnli|5ve=DOoigH6duLyMx=MrFhPU> z_^G2(hmUCGT)oJ81#`fmh^@K*m(Y@4-0Sc|SsmAI1q~hE@9fbz`<(xq=-FCih>3D`SkX}*0T3*eSzZX1uE>N%(d(XVngNO)4Jps!oomH zH>s_IJ`hBv1MD@rUZ-M^DkV437MHnOwi#6(p&DM=NMGa`AI6G{&+U^^L3^hU)gIRs zY+VE1O2H=QxAfxPCi@dV#R3fv=(pSOi@ly+xtex7I?%Wcl_b1Acvp@BFpQfP*|W;K zXpELFo3!TAU$W>(@<}tJdf>9%(a}eVy&ZX(0pxdV8T{@+OQOA#iWbg)apLN}k{rX% zJsZD?4+k&R@Net24hMP|5ns-Vek`Cw$;VC~`RgFf9P$A3cx}4c^~+e1{i+vM;?58I zX7zF=*d%tjo-Ah*yb7@A<$5nI3IMqyVoCxpt0!%w_rOXR+S%5`{^?H*=B(H`?_jpW zn)cPFR_O7L3d8wSalgO(#K*l$axQI%VNBou9H2CCFP(mO2Vd~|U|8-S3vuIr|NN~V z!0}MPK^cDZ999z!8l}BiA9CmWUj#1%xIg>1yGniY{Bvn{`nri9M$~_;;Fv-R&$p=& z&AhIPxomcwQN{lb63YYeJ=-V`J%vYS!RXmdA)1x{TtJ((F_Wm%3eoy&b)NGsuIb~h zKk&=t?1n2ex;35dAj1WZKv>h(n@w11GFL1k|8D$8ZF3NFQCCEmG~OeS*A$SEFr782 zncc4TY><|0)XeQBc(ki$%2fgY8@N!wL=PfjW{H8$Qm*N##^gD`KXS2KU z&MQuqOb;De(!(naU6%eEYHTWtRR`;P+$KVoVU)0U+ISw<|FDECJ4XjOzUfZ!W&-O@E|k0zwkUc z*6u)@r>Fm^3Yo$OlMJ5y2aGS8M+Mb(T+y_&0pa;0vrx$I8a@Wq#>j}-GNRs}=qkGC zdGw&iZIN@xMQ)PHT6WP|ah0d+u`Y|T90V;e)~3GOYf8OX7QG&O?aQhbBkINciWxeM zBo#lNb}GI4usni3aUP*fAu}7)7gALoDC8_x#~`Za)kN6{!YaTeH4t+fRI%S0-udh@ z&&)Y?tZqi<#?yO!c7EhnFFH44v_0bOu|6XIytfHILq3=eRA2x$Mi91SI_5Hoi=R|) zc@D8h5D;L#YB&@1sq6Z?IT);(%~r9i`&UtKt_ax`&=8(r+Ge!`-B)^HUClhZ1M_^Q zd|nqtiJI|w1;pii(rUP>gkM#ltGv6q$w~BEe{)mm_KWKhRfDu0yt1oD#t)(h=s2kM z7OTCdLYD!tTSqF~l!U7Yv#rZv4jE@M?hIIcVYzLUt^ocRVrW!o=4#8-J4qX5f z#XhZixw6t30fQxk|L4zk0gUdJn(9jj5A2N5#C>a;vCl9gVzGYF33Hr&9`Ys?2?<3` z`VO`v&dE?zNxBejHH#~`!QcnPr-rm;ccc~CXuAH`nbW^>{QUfmwv|^CGT+XRrC)F_ z!^*`vEOIPxj5;vb**GO^ydEgJjm{c`;uOTy)uc`BcQ&;rZjx24P84CHU+2cA7SEW{ zJ)*YGrjF8uQ=eiTFcwg?+Pi^p>z|4YK;vDF`}DmcnT$`86B1DMY*g(zshQa>5=_*U zZ3i6+cJsG^GZAfo9Q;Fj&re49`D#O9gA+}6jU2S+PPPB+4Zr~b?o;rzm{Y%w4A%cJ z3`6K8BDOr3!Y!zlx#|g93#u3>5ND_2isxf|0+HFI-5PO5#7D@+c-9@4&c`y0m~Z}m zW7+6!&g6~QPdA=jba4(smy`+X3M9GHbvPB9ynASW`oN}>np^6`oxlcS_+y?sl$tKl2rIWK{f#B_;`0}&lp-y6O zRC4D~;M9pg2|lix7;=MDwRvQ-sp|gIDgzrQ&}9hqpmkTqoEuNqyuH&#UtCf6+YOcK z??;fM8*YJ~kTQjT@fSX&Iw`&y!VEA;)%fLh`0z6AJlUfJS!E&yb*;nCBZn_3BVttg zgaV?YftaMMoRf-6ZYL&{RnB$;7%0+jYBu=n1Zswl+wc5wW0`@n1U!EFM>4|pVBM|; z7<_~UR8(tmg`ca^gQo9Ih-0FWbXXlD}fEXC+!X_8|UxCvN{)I8EbQpqX%Lnk9xJF%fwxrg(n3S(fGe476!~>avCU zsG{cK?2{j^XH0L5ac_)Sx1_1;80anzhhK*-QdFc&;!})08I#!aj9*_$9JkRPI-~YT zg}gioa}kYu5YAnv@zmk-#e`USWgZIIODaAyv_T_D`3*#w5jkbk_c>vPAw1sDi_ohP z48Tg0fh--8Fdr14h;xj}e!^}VwuJBZ^efZ&Dj?@BjbDLUP@VGWAHr6IByD%HnBwn^ z0eNm9y&XxO&@P<#jadfVNCLP<+1!?dj0?9{CS+RHrW4UfPM2zK0da{rRa0Mn!gu1q z)*GJZPPryuclCpP^W>yWMv_xX*81YCw)UqZJE;%%QXe`me0oM?J@K@;Xr`dLXZD~P z%MstY*5h|yn#SKAy_%RW$MD>c@GD|Yh30lWcSbmlMmH>dn%{u;&_Oe?3uxEoPiW?l zBa%jl$zt`z%;mWqpiQDWK#rSdq;as|82iTWp49P7v)@Ff=p^LVr4a^JZBpbvSerkX zmu1sXw4w6V&{B~wRrL5!*5iSUIV*o@_ILCA>V0UZ%B`Vr?+xkF_KJ9*BE2CxA6}Lu zu9&S;PoJm>MsX6XUPI}S25G|wars>YS-Xlp2v}mDvS1<#`LnS5-j$K?*Jrg}p@btu z*7CB-{K5AH6YmT3#7VUH$htQax zYzG~9%QdE0bptc>1UJ)6$F3{*8i3pGvhd-t%TIaB`fhtxyw$ZVvU-1mo2qj6uUPed z#o-BM{6)_atC8WTcWcgSLdBJ3D5s*Rai{)RAfPa8B-r`aEk%T=i!#7ZsXAe~I7 z$W`2GpxZ~qRU-7%J!G>bS+jhKdCBCc?7yK_AkXFrvf^yvxb$h_BaMFY{=5`ERZ%^B7U zv-rD8_}1G5KR>;GHpAX*t+vyvZ1t)wZ(N6VQ>SmFdNqR1C^RQX?IEsMF0Y7RQjwlY zc)lk4<67d*{g>yNDwhXoJsZGa-N)H5p~zU7-u_p3HzBcIGqV~WJcT{vP(ms`38;u_ zAO<(6ZoNjH-dNYd<~gTVeEX%h-d2_=RT}L47L|UM|=@dFAQ+>6_nDswSV6e_L_#B~K=tXQI%A ziS!cmCL>klHDd7x6qiaWmyBI0Ubl~oJIzw-*p#7lS*~sGh7MQV-U zh;gXMx%Usdi4)hAIDi_0?}qS7mKwfhV~z1lEpv>wT_ufIi)Wv>h>^J}Q=*RDSehS_Y>*suCrR59u1~6zBo@@xMU*gTj z9!~9IHl97Q>R0B9yPmjZK_M|~6OKEo4qv$4y>9aL&s$a`r2o0&<<)c&cEhaLw{fI_m`z6%r=Yog0RCQV1|u1 z@nyHLuVP0fEu(wHfM@C*13RaBUbIp6`Q=-A;qcj&wz9J#P?VF+w4J`DAZ~+yd&~}g zwD~3b(aBfxeI8}?ZdmfcNM{J1cn-;th#E3q9JF^*q1KR7go+!FfN|cHU3^ z20m6A>zS!9N9vxt)OjRy)W<_ZqRN&ry0=c&gw6HXXf#U}`fYFpm3%}p-x+3tG&$Jll^MW{5A6ICv}>fNjyjk5Mhk|2u~S0HHms$PNX zo}c%oJjfSSi*biHehlu#YY(m-vN`)2?PHxDO{wJnzIV7l(92kqUb5~<&!F#TvGpO0 z@NAfCzfpdLXgRweU&EFnzeO;{Thk67do~P5be?$D6%i@x9F--+DR$Rb`5A}jPOEjp z4T@UZ)!Y4UMsl|?tjkPNmd_{gAiTO%Atm{Qj!7MM$wtipQQ4u4b}ub3s^aYxsB}9& zRiT2cLMH)Y(l*N!{LJ?kX5i<%@6Jw6oo{l4I5LSlO&KK8I1b64S04^ak*hf6%8!J- zhszSS&zSai0?q4m&b(_b>63;ElCom${(|D0FWL%9YP$C4i{d8R@;hDxhp(uUWL+*g zGjz6{Kj+hBZS$_^kSBtbBfH5iKjocYUb=q#!|Nk=K*P3me;Q&AukWlobhPZnkVoHo zZ~QQ1sqxGVr^VKvyaHHmGWOV(eNy&g<#G+xeYevnNV11BH4IH$IEijk0QWiCCHYY7 z?1Io9qKEP6T%w>{k_EoIWcw@OxhV8GUZFCd`SwS~*K2#P&D^s0inH^RQ4CktJxq6Y zueOwel~Fxa*6EVbe%I!*ciU&ZC->c);j;C_kacwb<9lhTH==MEKG%Y+3xXe{|8wBj zi~U<}aRAXa=M%$~?M)$WM?V$s!nxX&eE&24b;!R-HwteJLEx;Do6@}MKPbYowIPC$r_jty?ICV1i=5V_ ztZ<0y;UpIHIe$d7%rXV!AXW=;A5^S36*B<4YYsY)UZBSx@yqxf{fG!(Z}}j-h9@E15=T%p+3H8sjn^MCiUmqE|8E~;gOyL zP!v{|CN~iM*1FFxjMg_EHss+pbZPx~(gTm|gfnxEJDXuk_vfdjtc35=<}^B@U|til z_*h@*$!2>S?G4q*9FymjMD|JSz)hOkH|?1Y>_@iI4w~@%CO|F1<1BB=EB%hf*Cf!# zSR9*v$2Su4{`7x1Rs4S2G70$sroCSRhg;xJRCy2fSUu~mz&A_*H2y&82Q~Ft0lw?r zU)Ad}N*^vV8R-v$B8XPH2*0axtIpFUOKp+oSZB;Q9~m_q&_!67+W{K82$xF%g+10s z_CLM8V9es@Z(qz#INlBzSEh^f_wLb7{SS`6aBf-imghtN6OcFqaI5>mbFAW{ez3n?7MT$5>i<7S7$VnB3I z(Fux*4$cqDC_2bI_y4@h%By6ZyUsml@BRI3ZBym4?PZoX>=(vj_S=!;#45V-!`P)O zCDl`tanir zo6<-N@alC>9l;7rEcBni`Hn9?m0_1^Q7*hb*U-s=j%p!Ms)lmzT7%-ZBQT*iemuR(p5!zHnP zUawdOWOBCDrPygkO;&0Ao#rLQ!8x2>1HK@*+7KT)E;Oo6e}%4;5;^UCU`E1KLb?f9 zN)2oJnT)b)ZYTnggsW8iEp|(%9yUe-pn-h5`uTikn9W+%I$i0glHnWxM=SW_tIBA}glFbbB73(fO6SD*BZFVy~V29@F-FlwB$M4^efP zewR;#K)SGXcbCxU7S8_5coYW{`6DMq@9e415OJ$Po=zixTQjFHogM#Sr{a_(!ry3* zQ~_FX5;bHt@gE3yaI?YumyfN9 z`*cLquTbjUD>q++0tc8iHJAan^5TdDkiZOU8+y3T>>u~vIhQ_{?cCB=;Ab#NY}8<# zCqUZH`zVT7*2%e{B*lcG_2fl8oFI8gR2?e4qKK~HM3RD#*r0p# zAQ%f5VHZq5JH)(%VOG4tsP0h8mRY+x+y6D zB?ZQ%$V_Ghx;%f7KhjnC!qCglgs(Dp&o!3Huq@=ZfHH##iKW=M(!K>0+-K+|BvxYo zJlBWD!04C+l9$lf9l#Nw()Qv%t7H{*fECbfXaZ7rk22Jc%6eoTDS zuZO}{|H(tqApkv9{O)n^@YwV3>Bok>n9?(5%*Fs(06>R?u9d>3-nH9a;1=Lx%?9|s zVkX-&aM>!IM<3TMr=Aq=Bk5s`{9K#;P6Y~$rUX1I<7W`YHlLPoD}ECN`2aD#_5I}P#UJjUXbqrM0HoG} z4^>SxUu0uT7@a6ExF54HQQs`Ct1~RrOYZ(};;HS6Znx}7$7~ZZSNXXr&bYuP{l+9@ z)$HempbFfGcyLueml>pX{YF6uV>3jSQ_XVo0NuoUG~BQ!POP{ssK zb5$NQ0=5atD$LIo;h-6)+3+f6rk~F(@a?T5#95gMg5PK_dftcpXBjC`rsxbM@$<oJAfEE$IC>FPQjbNDN}Ad4p{079^-#7c-RQn*^x)ovbbxD~S^nCmZho!?#rz@bKPVLE2Up#5^Hu1j^f*p|9tcCX4gox>y~l-S>ABv$GS50cQ(r%y zF}d5VTvnpcXK&&XuWrln9ybo?^Ht;Rn~Rxgw=7yXh^hlNKWK9k`B|mQP!LQA3rIi2 zqSr>OnZMUTolKt&_Bd7VQ2`SNAS;!^GG$cPOh-Bq(Hs0n?~lzE(_>x^h3qZ*kM{lL zDCu6F^7fOwS%7d&_B zH#@0;?SR};D>Uyi#qWi~C%U=!1#GL+jOK37Rt1Nk(4SH`O`P|r=`K#!u1*|5f0a2l z_gKxTm(&Pta&oQdD4%$RUJcp__~d&Z-6CKPUn#52)NO6+$0iU0u>I#Y7ncIZd9 z#+oB%O*!DFH6%g`It67s03i=Q!l!~TH8QP>z`9F)3;wWv{Q03dyU4908PXE>|{>7qHY^0%ABP(3mPzwL6gpO|68RnkCDqC=P||r zy)pS}Mz>j*kPXYs!_c;{k$Ilb3E=~!s7)ODO0E=0y>Z9;+QDjmeu+1M#Ip2>5hIvy3(e z5hZC0#2Uy%q_FIg6Z8CuywR#P0S|c0y7A)o>l1!^HE6ldP*M*QW4br>25ft?d25Y& z)curiy&M_su!j76$Ddve2weWH>6P92)F*Pi6#Q=A-@dqrgi(Kc-JB2<^fv}~b5C`f zD;0JIDBC)L?pFcRL15Y|;2_LyU$m7H`4Z(s5f1EoWbUQ75`XXAscy4ztXQSuD_z!g*4eO)pHs$jLbX@(Qqd1mIbi&YO*QETFNhH%sjVEaFDG(`MhM~+xHxDQ%g0P$9gA_HEqLxO$ z(&a$PSZJ0X4$oU$>Mo0+OX9B%teyzn{)Y?BcPB|ABTqxdNEVDIx2#0D)hpa%5>Vrp zQnj(_TZ8XbT&c^6xbvd)duh{$Du7f6S(Q4QEodT${j4V5mR4%yn8EqFtw3+aQmeMj*`auw%2l0^ z`76vdYW+uQlP)!A5$}%Wn0`t94clSYbr3^DIBPXWaE(*1OTU%naGbO`% z$GvxY`7Jn`YcWCjdqdy@FUg5>R^qFK`0$;W8OYE**GB>w)+wx~0Sd3jRszLNM_(w2 zT?3`h1@Sj@ZkGPaf)mn-9Z+$IpGn53fiL$Xc5Y$XtWOVb;odF(BX{{!&yJ7A(ICbO!{>@!cG++`Uf~=DyM9tqQv-8y4|K)c#O?5|{x{{? z9|t4WmPf>1Pnc^aOx!#An$VrT^UCivq#4*~3dBkvI&v)^fXq9A@qq0owlPT6vO`Z@ zk;U125Po;=ZrWFEBF2O?@txSbiZ@zs^9yptdo{a%kv+6@KZ{;E=f|#(vdNAU7ytE% zYko#{S-@-lVu}%-^4(xD)p}2+1CJj5PhIToXKN)e(F#4rg?3|Ic6DkdM!02Jbm~!| zxk)l}TvlhJHZxcdat*rK8nso7;QaLy;$e*H#u{8a_HyXR`^1rG(aqc9U2jDJx(U0k z=t5U@pI04O(w)6$L#g$qnm%0h&1fl1>I?$uFhABX=HE4rzsF;I0dns_>f9RCz@@d- z^;`AkhrfDn%$Ox2T#l{)H?5lJ9h^wwwO3u| zZnO&WbsF%#Q<#!R&BaG7{5?gsDe2{xHx7pw+Wk{BW|{Wr=iXU<@7;#gkG6aqaZP`y z+SbzK{`ArMgDl~-ODvc^=~*yg&2QGilLF~vPw{e|cyAY)s6HC1RCZs_BMzTl4lkH; z6@+&tyT#Z0&=49V81p46zM!9wTB-JmZaeI{G3}@LrmmntSkCba;~1i#%6PkmJOaUF zGLJ0+M9dR;yTyg(`z5bhNynl-l%7_(_E4Ab$^%Sy&kQayKX=2&<*@I7-C648QH{v) z+N`+PIt3~PCPAYg48R!_A(0ADzvD5lIvO{ zXt-;NzE8WD$WN!D-x54#ef0K|6{y@Hw}-w2A3ZLJtx0#Lu3xB~ZZfjT4#LsNtbvD* z7C1axm>xi%Y+;7;8?d@5T@Cr>VbgvUsXu4WEK=p!Xin+yBaHaJrK+E+=_~eS*;zccxR=t1zeh zgBN)QStw)Lxa$k79o(kwdu0HR(Du{gepK&?qN&41F+Xfri?M^&N9~ujCchH=7}LE* z@ydybnxt9}%U^AD+H$?tdoKDKLGdNQo5*`ymTTm?>y!>68mVq!JvRe#46_K5TQ(o# z($|N3HP+dL^ZXg}Y}DZLf{>pB&4evoehhygQ&heX#5!Yj%tM<#8-G&qGZ;@gCi#}& z(zx;iwfl5bTK%4&iR$P3#&ae}nP&(>UC!{Dq&3^)T3xaeyD70+Y&Ox{m9w`TADqGJfpg(R1YAQyctY4u))^v^czaw--VF z*+v3@9X{T6JRwr5M5~cmneyewe^8D* z8EJlq@#np$k?eDrUE5su8lV)xsArkOX|Zr8+8sc>Cvq^T8Z)c`kJ-X|O>tLI7RN)3 zvM!w+LKIkVpL7hc14X_Ad6vH0aO>mq-0K7miSz13cBKe1U`p8J8(I)LqKs+%Ty}QK zuW(6_46QPYKkO^q72ozlf zw7$WE7G4m_M2q%zQiCKQkGD^PBWTkUKh=jZO2Z>HrFKliaTxGaf#kXdlObiUZQUn= zS&G~7iAqgmwuIFmaE}@VmKV*-hqJQ|l9tjHI?rB37)k!d3TNM~o+Vy*pvPD!_K(@T z;olz$BLJlCO&1C_H1w^?wm_t#?uuG^(cqMm-f3|k2hNsLdjWc`S2qsPT2UEV)ZLY! z>5v9AHBswqne$`1c3=X%8gX9xfFD_}c{8jlt&fV!G#!^8{m-8h@p0+@Hs9TZ(wiSV zW8JZp?j-jZuOb8gPsS^ZDQ5W_AXkc#>#djvlkk9LS3sNo8?|TY-K#k zw4IZ0e?N&96@=mt%JU${DqQxo0PmmzbyBpn)_6aiOaI~x;6?(DqtJ~-DbhRftY zO#7?%*T>E_vairDGo~cu#9k2h(iFp~mmdrcQ?RZraQe7GnPnR|@*oo_$f;Jz41INR z10Qoqt{V9=heYHv7qcXy-pRj+D^Uaq90GM zU8}Q83e%p&7KcoMXdMHIP~x~rERbQX)8&{DrGnW3V=dAJBr&XD-O}D-c=gy?jO12A%Vdi-%E?hvuh@At1)fUTBUYTj;ecikXu|PjDnKLc0S@O$ zDscv(b*|Oo#@^thFg8ntwb5XAXmLBBN)C!?F-eG0vmHuEKCofkBrQa37~arFw?AI- zW)b@yCHX!DbJ`YOVcPq#Zs?q7c!m931GO2({Ve`6Ke(_~`+oWyI^6ls=|lC+K6h8aopjWVLiB&CL<7XmVa)O4hV$)VSX03L{Q~B{F6}v^H z!;v$?08|*Th2TPL*V0NkXz8ROipyLpUbz>APw7}hI*jX+89z|*YP&xTfSDa{>Uf9y ze{yLu$v~yDzPf|BQj2Zp|Fg{bAC=|{biwTP$jJg(eFi|Z5a7Sskw_a^IRZ)L)Z90S zSwCs$01}fS>P8ig&87d8(QuP=L^(dsA)ZpWoBhw_ z&-Phty_Do}#OyZ-;ic0*8znld1_~jPJO>|4O$)xEefkX`a~iluWkh5;JKc`l#v#8& z2y+sW5+HYQZnkPE6EGVIqEAUYj_M-IzyJgb#98REL~_PV2JHSES>w9sx3`hKixz!J zxwYTvks@kw$(G-1Nw&Xf=cQ8FV0hs|xYKYn>24p=;G0JKnHp{53 zDvB1uhS^4Ctn6F8H#4C^FRD<1*O&+uhVbPEH`<@HDg^iS;KOzk~qFPWVW0ZxYOO;&CTs3&Fzd{ z;Eznaj6(wixDRk+Oar|W(u--}|Ah2hKjc<&nOap-r3$xOg-e&L-K<3?Yta$_=c>Y0 zs%WSrBjH`L5Jbp( zMvmzr4b!@0$WtQsM`HRlPi2NEA&uA0>>D^ECZT?$>GHdi)&!|p{A_GruO$*w# z>eI)M?f-myvIA*83fftK8`A37snJRm{Zx`sGeIv%pvOs~4PPNbL56k>ebo}3C-5?F z4z68BZ{vi`ZpX!_a5yy~NG=J)u{yO=yE)ht7;n~+;v#uFmrdZa(Y|n`Pp}tr#p1Pp z%zMdrzkOfq1MT?N-R{6pq@~2?Ng=6LjW9dy(umO;be1N6JLh>n=E3O4c*z2Km-YK$ zC+rjWLzoKs2{CF^GzU>|;_wl|twzJ3X}z2^50SpXM!oWsuip%R%82tET&ap!$|1$NDScJwjS{>>g&hc4Yy_h@ zo{O9_msr8*p!eHd80)ORZSPOJHTQL%u)f0Le&nJvZwq8Ng$&?m0XBbQvf~n=zrutL zCdcCF0@Sk2X&~cccQ^vg-hI4*vgJJ$xI}yFrDvDSh+W8FT*@O#Ap90!h1DsHK=zAn5)+2H}tyKYS9L72rQ7g zN|L?pk0V*1BYk0Tek|fd)9+bM;_BCtiHnPiUl(pj_1V4gR=_sC7YE$HKw)6aGl<%e z3#jJ|)q-In$7`AvC&%8>(p*bEg80XC-r zQdx68?_X6!&U);VN7C|+-Ow;6lxM%n8ZG{NZ+F2{{Wm*(=L<-I%$$I z4YZ`dj0IRn_+7o%s}}vZytZe|jB{M~!S*$Gcl_7;4(IVM>w86eQ!Akb+$htj<I!RxdXe7A7Ny> z7d}Ux6lw5J1ogut&t1v*P>Yuil*}l*3zpT_FwZoCI zD*U(%X9R=Ew#e$C>JL&8gcL%Gc1Ktb+Lfc0C?-5JG;Dfg3%X}Ga)%@hL7RqYPwF>> zZF}ESDKJEVttZqv?#gGVhkhIAcPm=XDhl_BKz)F*+fadY^_ZW#W7#)i%{Vtwd;E4_`ohb5K6O1acibLX zW{d@FzLhK?)h01VpCSe-@q>i+7VtcLf!+pDv$Pl1*;Xg2m?6&|&78#!{|%MHjAl8h z^cyuL(J-L2H@3ql1Z+J!NooB!e)dk>%4uBgcH}TKz}U9`9>aV`n0b)M{41n8#ugkS zVWS{S!SQRCCx-rA-1WTr-T_OTte4~q6Vw0FUxgoDD!6Z_MYqyeD3B!#pwFph7YjZa znp4(B(`z|8=mxS-&LSkenFp9GGo0rhVw9E;rP`w*>mK}2=zQ^alQX~Haw|q-+wQ!3 zy_xD`buMM`r&~cgZY2V=7+?$w0jb(N>ZAu{htUUkfi@0T`=CG@h*D2N&pzG#M@>wc zJ2np@POGqq`9D3s5^5!thb4?QuHNT(!z~@14Nn^OSTtS^UOAxJx^U{^eVBiEyZ_<3 z&snV#U$xKdpFV$4|M4l6Q=l@aXq1rTI9;15rDBqfkUNv(k>V$FdZf?$&u^$^zB^9( zY|o0C>^T;qLS>?`A#`(R)tv0Jg>EG;at$USIGLwZC0-9QEanDQ80+w-zh0LJijlCX zm2VFHN*m^Eq-oG?8Q1QQvuj)u0sDrTu@1YK<|3bePkzdU4in^tVyo$k)Q$rj_CE#5=nLQj6eT7hU z+07^U9pXS zbpFDX#Y+Ja%sffLD#^rU zwDm~Y(o*5#7ummNdm@(2&*Z)!Pn)LH6EHfFW&+&4)5=twOUgz+y83iC6-xQstKT{Q zitPJ$DXMSL!giDbw_d~yR=98tbaNHsRwZn`9C5Ct+`XZXy>d(`V7AP)QLL1eu0e(_ z66q^+)Wwxmr6vP}Cd`|FH*(CnHM4O1=EtpiH=I8YRHKEB26Bw+44=Sd(m4Pw(5gI- z5s=>C&-C2efuq^v?14~p4qr{MErpI~}SoKX-kvfmH0u z4T95``nQYleW#0Ac?O^Qcumv7&_&|xSA)-+WEwfMyb{&UA$(R7FH(n@{#gBijv#Zl z46y*QUZt4AE%S^4=6=M_#V#7Yl6-Le`ubOgVov;}htsB>PbN8~{!_jB=Lep=%#IGp z&>nDRFOwak6q(Co=sgx&M2)#<)?$?bB9SU(D6zF_-KP*hAxGP!$ZCmhR=NX*$P;_W z&CNuf#yR8F6lA!baul8E^`Fi3!@TNGxtbCKzp{~s-bS`9p>b_^dX3Rk4a8iS9TVKh zz0#ktIVSOS?QSCrdBH&XGFdv3=o=}HoPVcVFTpzRcPX*RR^iLUSaXz1JLZ^%oTHn^ zOZ}8jGC*9>=^lnV;6=-U3FVSlYeZFDJ`1DtWg&OsMWRb#Z&28xKjLpiSzbtNagg1BS}k7K7(WI1S|1(4Vj=p+@y zIq-9>6OrkCRjzTE3NWaAwR?zg(zQm4TCYlGv>>VEVJY5iLQTDkeDJ(JI9@uLw!RB( z-7F}}x!+^9rq=WFgu#bC5E{A{P0g$GNtBKq~sc(dffs& z9-`OW@>%30xpCGp*!R^?!F1Hn8smgk^`)pxI zc7&k|Sp-_pWjK-lu7S${4l|4QZEwKwYt)oBO`$;5Xc*W5455-vTMlrUfqaEqoy&uC zUamov3S({3KoGI?7(0VDTDuenzc#=#s$_TPU#VMv`Uh_K@jTFMv_o-@sEpPY8|}RH z#XYg?!_wFCh|@+DC?k%5x@sI+V01y`RUHbw^Aaq6^|ahH>@s}~UrGGuEh^3hB z)=O6AFP{-m_o#YI+W5Idvd^k76P$ih?c4XbQ~tBf$50TaWT)xpaGYQ zgbfqj{0wGiG5nwjiyA2|M*E7^sw!};D5oUHH!|S%p5ilxz$KO9VnR*D3jw{XA5=fV2=YqSnp=_%M_C)!$E$4 z+pFX6!*@P2sHzTdA7E&s8$WxG%7>fd2M>pJCgGhRn!xjoEijs;POY+DP5xY8bSv$g zy_drH6&z@sK1n_fJ-7RIwU9OjQDr=T?v!Lj_uUlTgGI#`rCCLe8C?3qXWiY(g;{Vp zqTeZ%xs0+|ypEJ@mz5A%0$sa7411G3n?=6eoFme2;bpJ7e(Co1n7&t;S3zG+uXQUR zL-9bp@~&Rf9A#;0P!r8a9N^xe~P z`i&5~^Z9AAbQvP)@xRw%6A9@$*}~H)q=*Th=xMjs7yAl{JJ1|uZNz+9A2c6oQvENn)IhZ${zxK#f0|W$1-O~9<;&flZ2kJr@dGBWYc?)v z^O3BK8v0PC$z|=(JPsRKg_%ahPEMKvp;fqEAP!hmXf6DM7uv8-P4sdkF{L#O;sxo;@Po z-{@KNDRAQl6|XdEW%RX;m7j~|wS-+7TrC~rt^rAG4^jVoixt<1p5b5Vh4B}h^(fQ^xBQ^)a$2=?@Cz7YZ4%0-9KiJ5SMsR}q2r+og0-vN#rn;u*s zCW-qC_oh?Xh6jv-sm8m?K{oB9WaBrj$;c${NAB_?N}?3P3MA;&{h+-v^dv!Mh%>`* z!Kl}!kn0I?#bOKI_U4J*zpCk;LX@m5I76;{-n6ZOX8L zQacXl4udR-J_?R3;rjNk3*3>7JFTKrNl~|V;!NU>Ed&K2keNXVF-VTDS(#O}@P9B% zOsMi;rZ{VuzF|yE{XmyAr&2qDH?qBt8$7KouA#o?R1A+GYe>6oYg?H`tQf#KY$*P9 z+M{;bwtm{y0!TWUV@E7OEO85yZ@7yb;rzL`(hr`oI% z6P{QRp0d{|RM>5^giuXVAfIqVO5UU4-(E*hcHpIaLKuJ#k@_qWZjBX`30qUvGen<0 zulmyJqltDxSSA>KD`C`ncRJ><7*Ew8IX6%ja%HM~`*jf?5F{ZvKMBrTvl=Z$h0*cl z?!kFez#o`23M55I^OoS<(jf$7mSP3psRz>zxvH?Wgki29{Pe2tHU}yRi0!b-Xs_VJ)}#5 zRiJ4HW{<|fO@qmz<5wu7g86RknYeu%{BJ7$3OaU$47-AV2BEkjETL>k^(q;r8A)PE zsd=+t01(_It5-s37b&_{fAvq#ttY;;EhG4UXM>rxyR$K2du;uET%|u=0dx)E3Sh3O z2iV--^0ztIn>#<1?n44OLQAAw(a<8l4*3I`@Dv7YL*b+kBfz{RY7RN!H#Eg zv)Crt(2rtC8sZ`QoaXtn?a|jRV~mlbwFZ-vfXVVePd>f~6_PSF4r%-|-j6ZP+c1^2 z*lY-&tK1R`5lR9Hp5pWX4!*1-%}xV4N?cnX9mQPpZm`@+-`(&k$2<71Lt8JhX|t=m z8`l$7I)RpeE(f<;g4-p*|K0|0j6j0Qz;Zzs zbvR>8?Lr6I(H7%Q-{h=f;{-QS<4%+2LfQ5wbRoK<2A#vf8zT5rF~(n(aU90-0&Zsf z!0YqEcXC=&(=k4pOU{ol9?DxzY3N`+Azw@gpjR_LU+d8$3=dfDJa+Weoa2|~PV*3H zQc}aQbq&t0XXm1P`yK%_B?`fI?&?4x0M;@sp+-e;g(R)vvoq?kq8Jx`-`-l!Y0ao7KRx=iYh$L(%8e6z*M_PU0v zb4^M-tR(JI;g5x4H$zzd1UgD`N&u}U91q-8dFs@^9yHJ#{z{KiqkQT3&{=T7n2*+t ziRh0r)@YCQElv zM|^dAXAKts47BMaDP{y9VD_S19tX7o1+8M8QO+^81m#U4t&Jx=mZIDM;sdz$qeN}E zS8eQj;eVtG&P-R?*%qbo{hP;V4>gpd%8KpE`wn&2YtLe0_&rDgpvNur65vLIJ=$Kg z^4Dqhl9|vqqo_blxJY^v={&rtCErky)0=J@YcOu;1)`g{kFVKg1D{?y(^pdk>Pta4 z4nCJo%%@vNE3u|mw*>RixwEbzszYA!o$ZV7h{ikrT-~|P&?!pf^xqi!oiT_KqqbZ` zg(T|_lLiZeU2QB-L>0;hL6WM#zUZrIbRt3uej~+MYw$ip(P93#Uw*0NM%Bx z??qr>_o|RNG^P6BU+2TtbSaDMdPwG0nXHZWkR&FQ<5n6c|2mfFcQwNb}G>pzs;@~%|F@(`JQ9?EQ zz|XEFZ}%O%+h?c&6O%C8JMjA#=n3Ws@H7nsU*Wvs(19}j=z~5{$1P@G<(>AE?wJ9Y z5BjaePMbbi>Kkn}km&1jvH$ybfGAF(ih;S~07`-@7ZYV2gc1#kM>@AyOhgoHR~32? zz%LRL8$Hk8{%(`=Wc4ByVR=a65R`WC5oJtDeS41jx7~%96Vx}e=etVPU+5KaAMPED z|8u2Bg=ND1Ycis3-Tmzy_wU`uZIC|Uw_`TEKoiU`q&rU>y&rgXXq{~TaG|5kIp`2A zmzgqbI*HW8A^0dvJ%{8EVKQZr5!VT;IIRa&m>-m<3&9_hF{|(26ngr zAZG#iaNZKJK-ZinJX%Lo0)tH8>P{92ukp2U9V{KRSrruYzk0B4A4;?jhcVwg)-k)M ztMCm(eMA4mh`7KspPpWEp=9*sLE}HoC#c+nd*Ap6Dpy}$^YPE>;qIF7Q;wxi(lgzWZU zN=nQGzGap&HcQDh&}lT+AxBVkXrJi+%`&%AnGv51B3eGsX$B^=_t5pDoVq*+zf1G) z!ix9z>u_;EPX_$H&I!vupdDQP#g+CU!xRIFi#O4CMT#+lZ;4H_MDC{#qOIdWzyE~* zdof!VdB{K`zW2KW3kFK?uft%5Eq*&6J9u;9<u55q!(f|3O^Cvz6pHyNBDS_4DRhLqT0dfq3#Y<00k`+*hCZZrQQ>c*Z zaKdB&w>u0G#wz)S>J%cDUsFe96%H0R?hEnX^W^@*F}DrFjgeKcE1#`AKXPfyA@+_$ z_pH}L_14$Q>b$Rhm}uMfWaf!gV4mxnGr8Bw>bz+V*HB&j+|V+B}(XSpqBK$cRu^uNwG$di)UVN>*WhjxIzp$=d{;8EEa{y znYYa+=`}}(gs+luULFl67T*8*|#r;Q^ERdU4@R*b>q*rC(JV$6HW1?&<{= z=;pjWZoC243;D2j;tot@b_j<10d}T$zv6LGhV~?e(21_Q|r*7mq-TctC zso(4@tF%N-ZCZS3_n!FCpar{1@hkrR?<*~c4)fzpxjM_{l0##=Ot2*uo}5NnTAPbM z)>P@UuE6qD(QAT1*|cAoq10^{qbHeG0WT$dr(8>RLa)+)I4JnW(Hl)pOX@D`J{(aP z@nknYO3#O*{#SZ_3n$QYT3w#*6ghd=NCGso0;{!OtZXKLymgDSTDLD+W6Ya{(U~7S z;!jxc8!Dp8ypjssDv48hQ}qICt57F!up(+1YM!M&{mavE-JPYrnLS6m3o`p@X{U~@ zSCwx%A57r((oDa=_Ov@+=`$DXM_Z4s?NjhZUs@lWiYu{ILX9O3-=<#|S=DOS=|%<^ z`m-YI#wF{bq-Qc0h+a_w>ng#wAeX9TKgGBTubi83tJhR~GR|y5_L^`A9c*0{v@mUn z{Odc}jepY*!J~nTPn4wx(@N4uTWqT&9R2ioODwr;05K^$@+?~=E1lJVS&d2tVzdN< zaUv?qL$KzfoxrPn^L}oH9{q^b*ty}LxXwS4cXRTs zf|AIW?GXi=PD~41r$=4Nuq_u_cCKZ(mH<>*>DcROSB6G zQIG{wE)jB%A2IncFQDec=TbO9?%`J+E}7m2H{a+nZ6Q90Bzy{QqPv1oMxqT&u^f}A z1Ps1%>EXwQERrR3Y&+L5Py^^zt0?C4BteQj9b+I?lmE8j(KRq*+@NU5{kNDbnaIuR z6W%}OQEizx&$9FSgP`nf`Q@rSogQLO<}*Yrb?9SkO)3d)wGZ5t`RcH6@r|AiH9tD0 zlqEiW!!F>hOTf5-h1rNjP%3(DkU!yUL&=5=bl&5VyY55N6z_DTrgi|P;BpFL94wI9 zIh<{&ME-i9fhmg@*-gkwqQ;zva$fHlT;wwA%F{kYB_|&^c$+RX1oxgsjg0+r-E|o+ zg1F>_AaaTdFzs^mmwV;AO-P28R0?sSvMP72N=7Jx6fCRfnBR8`&eu0%`l9l)koIrO zc!=6kf-&>z_x2srqvwc4=Iz(x?)<%-Q8~9sVvER;m6r{wI{F+(w_msZobD{fm>Q4r zvAaQmZXl`q$ZqA+N8SOX)2k`;+Ge8FSBRD;#d)+{T=nGq4x`5^v4>5pPRxveT%b|h zE$hIDE0c-aC?Yer4!Ld@0t#5#L*!243=FvRHZjsU1-p_vX&Vl2{OuQSFuFMBGpCrj z(dJ;t*e6dd*8%B=WAf40T;xQbxm>qr@Jh-;E6muJ{@A=?60NiU3iKcL33iY52?G7n zp>aF^1C2(-o^=g6JEXa$T1nxOi3T0LyR)XBRY{^LkWn(nFZwrx=&#J_Sad$`kVBmQ z1MhDaWgAK>Sm-=X#*WLbcjL-VxEtC9*ywDYL)(r{KU;j2t-rPI?#0@hgk`Z7RFyQK zAC4ofoEM<`k=c{(Yk6*}?yB;1g&DeBPEvOIzVK$9o`{PHm0!F0`Afp~0PnHs_cd3zgL6x=*56cGOqa9OtPHcQ?@1Qmo)RU#&4B;K-F<0>cESehZ{}j6xqZ zESnRw-s9kGkig1%Uan!xivmy4LQc2d{(nq)rAeX*kWn<f&+Agg24jL)C8UZyUo$*` z+-r7dRst@`YM5R)Yc01nR+SQ9Y-g<7F>=q2L2 z$4`V&Q&4a94r8`htZh!n%lKir9z$)>rA#-2j9^P)OLuD(t=kuqZ6mM+R>+UMva^dHZk@4yyRt(tf_&D&`aCCNqrm)7K>sHzWds6bAP@~8wc>$=K_JJ@bfxH! zc(h=;O^*#&;?e(4^qhqu28`~Mq&XHXQtR`TvPA++#w_aJ`aW|ZRIeq>iN?;PhF6j! zDqobMlT;WZN6B>;33c)|g%-Mo+MgDzHdRY3Igw|2z?N&E`v90pmt}gP+?$OoHR%4- zdOw=g4%;q9K1icW$1EW#?GBSHVUi7p=n#Q(W77mYW_*JxzhTzQGf~4-^@4!ZTO1fI zD6p39GTsl^P`>=y^rZ{hok^9uC^UVE|=<@10n`2*m*Q&|tX^)~5mbo#h;Ek-Z8= z@Ul=RSx|uHK9vTL3xm|Hv}TDNTT&A{N6<%x8I!=vJcBj4#R4HVjNx~TY&xPolgE)= zsqA``i8UrmR!4$Vfy8KfvAKO{>aV_3fAt!#!PB0DeZ7*NUP(qTc%xzPR`#iIypIE9 zmD77@W17YczC(?VdgNtsoM|7kADEer8TJa>wDXRc5CNiqG=;>X2DDR4r&6RW4rB)l z3Tr_JHY9EEw~ZfiQKFn`BnV2^5(fQXv@aV%UgHA~amt5YZ%4-<1VAxNQb=6}phdHG z_VIOLj$o_@;Sd=(q$C9R0|CQ5XfE)BxXdh5_UlOxkO??>l?F@wLijb=uK?3bB^VaB;V9-=@_VsaEuf&-4fO$vK z+hTM@ypTR-bh93c0;C(MQm2OjMhc+x)dSa86?>mOD1R~HwpPBY6=bMUVI#6Xz5=hG z!+OoI<1fHq7Ispg!_>zG>LM;z>Z-EiDNzVjIBphL;8?h*0FAKnEL5PKSjMZ_7p9g5 z78DL|GR>JRPF5EusUiPa6h|nX6u^fCH*2N?ezYI-9Xyyw2*#`7qb`5V9Zi!Z(K_-) zC`bv1HzKf9$WKvdfiz)i?E}@AMSG%8FB9x^F(17ty3$Uho1a4cGJ2H9`+(Y>^<5PayEhbt=HLGwOQF3Uf^4Hj_~GaP%5#yeT;zk$Apd zY4}T>A3fq%gL10BKyTrBSOpp>|IV76@Yzo}?O+Gz;6~(Og<#QUMzA|LF^$Bb%`? zWZEJNK;8wpB3(~&ir5!w_~BlXh9+^8lMT@d>B;#8&^Y?)A0A^h#+ri zKn&mOg|Y{fL)gR(s-_reyyGw?^DR~w9AupqNXOgO{^QC(uT){zj}#s6z^)v@tS9f; zGL7t%s6{%E1^mOB1?&{EaD>KmP-ajgA%8u;of0=WN{ZC@hpr)xpt~k5<)#0uJhdgJ-Y5+`egkavIkC@E#ws3ffuhfNI=txGdB;#^NFyX5{t;fIHT7&EYP{UpLqyfuL zJIacGucrmRW1UCCwDovm;k+D>NCy9&+VEmkW-2RkcYqo`HYpDb zmDqoQz58yp*M?<2xIVkL7imCdy`^^M4Fx;LP~<_yqoXIYK57j~Ky%QDEQq^ti)hbE z$NUq2A^D&pny$>Q6DCf=uYL_#sFoC!f#wPsPgKFf7cQFos{4rw6jeM;xNNVGtujTs zav^KB%#K z_2VHVh(0_IL4Zq4IFPlr^FSxE+7ghF0;ynAff0vO`obCI0K>#_0$vD4?cKNL32ct{ z*Z@Gje?Q)71)9<%JLgNDTldpn-7hiysPW3d1b<8yxb#Wago*0?;rg6xA7+6j#qbV6 z0M76GSQZleaY^t;2TRT%@}aT-VNtRc!_UC2=Kct;PKzmc;wS!kiBw@&1Ih~r1b@7r zoi+s{HDuS*R@A(uPk$K>l(ki&_F%~$(3AN88=Hvw1z5<{f~ zC+tn`kTk6tnj06RtaZtl00<|PF4lpV*(IOFa)Pbo48$d)U0^RjA-FJZ}aCY-_lQQYQ0D#tnQoddI z1SW|VCEEi#?W7Kw?~3D5`fIlk#i+!pLVFPhr(lR6Jf1A5J)NI^-*xfjakZm;RKncp zkc=|6ZzD`fhSh#d(EodC*{Fi^i4KxWp@!zF!=c7%xZ~^w0eg zyw{kMNVfk;Jk*+W*)8hG#DGy<^i!$f2yI0~#RIiZ{-Gb?V$P_OwqZXX`hs2pB zOx)N*T;z$k z?+4wZ74E-R(!NF$f+e-N-Gp5NN|a~6kWa8JnUsWDq;?hd=yS3Lprs4ORAWNk7$Z*q zF6ZR1Ym|2gp1C`+5oeh#1PdK_c+EpSQ6syzB1pi15=mX&5>(#rj^!)2VB{f;p^3Iz z&WP!-(eY%kZ-m?9_)P=U*tE(%42|n>7|YJ&zRsvkQJG^;t`%u8jWBs^e^xRT-V*W$2yOQ{)3B}#|abIXEz|sIGe~|p{Bf0rpyr7|j7`eT|Q2V{JH!O$NuNQ_og-w#fZ1xTUDAuWI z3(eEH3(msrdec#-Fd67PKK;~OuqQGXid{&$^f0ws&_V5(kdbTtCq}(Fc3Irhse7&X z*eQ6Csuvq&)m?vm^JGdFZ8Ib!~fTC1Ejq@ z8fHr=xypkmcLn{bUuz^~c$S^~7XEovuF}}W*4&}9$EhKuV)L}nIxu;Ue?e#2=HQQs zHF9!MFB|JV3(uAK4j_9W5=O6jU$V4InRhvc>st{oH?Z5_4(kl z-vad7aQ({Rw(5>Z7&I0r@1-$t%bHWS34gBXn`_g{rH`8q)dZWC>99$`<3@YgQmosT zrh}W6sAH!GiAcMYoxEuW4c1uA0l4#h8lxRGJmOECob&Z##=kwQ#STIj!5B7xpff;| z#qcQO6u4yl2r5}C^8-?FWHN7t3gW|lfs{s3lO44V9~Rebv>}rTL5GYmEFC&S@!IR? z6?}-g4t+j`i;o}RFTef{yVbGC15u1aSj|+e5J%qCd~esBLLTfgKGEh~cTirZoD0kx z=k>8e?rv~j_4%{yUXPUXzJnUQEOn{Fc9qN$Gq^=`(+bDZowPZ;R-j4JkJ!s%(Oj4& zaB`6&SAz?bk2jv_40X&)DG#3l3E@B=Wj|RSpk@$q;~}GTRbgQ77~Yf9Z9Oh_4pP<| z&I>ZfJ`$EJ;&dBWaQUq9ErnsT8j6P+hLYc&3PX-pu8zFzY_0SgOLPcN6N%}sWl3@T z!xaN4v@N4Jy;-z?%_*{6?g*n@9xVdcaoblF86l_rjgX29SiE2G_jP9&FFL6AdlNc@ z^^){Qr_BCHWktb&?9#7^Q|te9+{ffsnBP#N?JXqog_9c6hB-lxg`RV=d-J5x*}a)G zPsw@ON)Vqwkp2xc^*<{qaRlLOI&LOwz+vs;N!NiLJu4NaKSX$|Wj<>{{ zvzm=)?U~VEr=qX_Fj(;3xjpGSG2lO*TDiP&3BHh_(id|Cf^taF`-EKjaQ)$Tf4?BfsB8{L`fwPr;`pN;pL7XZ5%6e>bhD&3Z}ndpRD z_0^ZDSLe!`1W60D$3zB9V~T`=1Pj4`7QT&GIbWHcza~Yk2mSN8OGtI9?8rxw7pK zl^zuJ<{VGEdGuOKm$5R>16Kw~TMGhg-DccB9oSPQ!+NzWBWAx{Zs>A#6D{fTb8jI} z@7{;Uq%^Y1tXYSlX^+j}i^*SV{&2bGXy8&EB`)N03Dnj2n*=Yd#83@jJ*T0nGtuRI ze><}sZ0WB5EK4|3xkg2Zc@zRmW13T0cSo+$%ebYSOeIwN7N)ygDE9o7f(;*-u&q!P z#%g&2{?)dNpSmj-Iig6_&1mfCN`> zU;TYy_5xl^Ls#^}C`JSW7(qn3Jg@Sd)jC`CiR_q@u#{tG^_{$_rq5;O#h2s%rdPNc zKJ+JLy7VLt&K5ZQg5UT>uD!BBU3%ibE);ya>s8SbFrnptp%+D41r?9(WK$Iqq!HId zDcF2-PP|8AOdUf&5K9vOw%NC8wxO%KW zY6H)Df>)gKDIj_M5ABUt2@YLMdOzO8QB(Ho$#TJp746SIDAi=T#lPUvBw{_i6*HH7 z$Drki^G*#exj(CgfZeG+y;I_><1fHNZaO~G3na4V^tWrc4X%s-RRj0$02v5UH(O1~ zKXwkoM<3_?VYN0ZO%15_VP*%HjUU@u^pAz3j8M1+>!{IT;D``4=n~=6dSjys_RB7| zCChEaC;h>0r!thc0EI=9bK6HTcpma9tJIjSs0AX=VHAUr)L91m&Muyn6;sQZR8QUOCz1l zW4K5TbBk-@i!%nolYEcj0)Vx12HYgXkPjRsbb`zDxangte}RJS0GCk9XF2k*;WFFl zI|RjYn|7Eq2_&v79IIPX?qrZ?VI4-hF=l^!Z)@253qzp;58mI5H23dDIFkN@r~pB2 z0C6Hf*CRJ=2-L{}8=6b&9n&pX=AW&Zk^JD0zPMqK-YzL~1j&Nc%58o5(T^(z>5SAB z8+t40?ufTL-sKErQ+wzZd=C>$IYr3uR&;-brKVns^Y)kB%nwZ;7`!F!$I-#v$3Vwk zUO}Z*x+t`&n^4<+^T<@$Jq`fYaPPmZjsDCf_QHBpE8;L$e>!TfZ)UFe06w_e{;Pyo zB_a9OLleLpU9B-^^RO)Ls?chmT}KoBno~MfEyOk@wpmlL)!Bu>x_D$X!S`$r+Ee{x zQE}bcBh96E*Dab1YJK#sv?G%i*v+DwF^4sl-GZsw`=O_wub7mt`#vTpiW96d>uwIY zr!3M-JNCmDv~ydWp3KE$bM3c3^=_6jpLg4+u{VYNx2lM1z3MZPfwbU6IvOfErn%b( zZpsO*$_uS+U#j!9oWob>{W`L%O6p0Na>}f9W%bx|_=ad$`fe!-fJr(DDIRt|0z-I& z?ib|$B+PyaTjgG!hDl#o|FYh({M29ZX;)5i+03&8p#%l@ShKCOxOvaro<(ynxfXK` zrQcEaz9TB{)V=T7_g=qS`+eQ{-O1LG7z80~k{NelG^K)ZwyqqLfPO2u{yIT>pNw95 zPY&y4W;8ZhoOiFsWWOiw)Y!a*+|od%NJAL}Zq{(qv$5%E*tNn?R>%Wlq(zstWWg~> zUaGqNs)XM8fjX#0QDH^$tL(3Xr~%lnmT&VMA?rzAks#|?o=tWsVT|YMTh>ENFHa$7 z2oi1LUFfxZQWe%(h{DeH8#dYJcIr+&IGEBw>%8=`o}8fnqMw=wtT_T05~WVHGQVEg zp55km6JM8noVxcJ^)+KddJ zQFdDbzA}3%$(bYbsDgo7u1A&Bwuxq9K=xeH%EqAYQ(_eLS7 z=7HGYH0$RxAt3m=xpIXHo{f2Y;V)oCc9DU|%ix#(ieC5wk$8IsK6jm3H5?apJkp${U+vTmH0FYAye~Bp{k&NeNxQynY6|pEL_1lLU zVC&o_HA=)a&`OxSlV0K;q=P}PUBmdL@r*=Ssg|N5C7uPS75Cs%6ZQ?dk3G&eHum;i z#w^LW>9+8x+qf0y)ay$`*_jXH`$L81eokCMLnzV&XfFuzruz7n1^Kdk%>UhVFR>$S z>G!m=GbJuXP9n%b+>!AN3B z2$Hxa6B}7tf^7S^IF0AdAD)FMANDs?B*^IEIqABs*_%OuhNbKF?FyPR0lMuAHw5HQb=eg&0&&=*w z`9~Km)@$wK?Inx1v@SwwQbWgaF`y2Gotv%s>LgpZd`E}EX zBefA}=pv93$$R;2HjV?akOtj`i}QD_`g2}gr-`t9t+C(xbF!Y|3 z!RNofybnjP+=sMEvPV?pWLZ53O+z_UOzH; z`b!nLF6geI)e-7Szs>LOp}e1n$Ab@i96gqAwy`jrLbK4W=UfOrd6YAS$|^I)-z?T= zr%#F=+=wFJOs&;i%pvHS2qGr#=VDAUr`52&-n0ltoSt0JfUf4E^xX#FBFZX(1Z5IK zYbnHA0QxWnkl#LghQxgjW3DDL`dgn$)h*X_<7Zskp0~}DFk<5t-=KbZIx`=?#NUX! zh_S8MqL&&(^jLe#Jfp4P=@plYcQ`25_WJ`9r@V$5B*o_GrA}D$P)f-~AMc!N27BBK zc8nF96*e>FjAo_nl{vr9BPV?)8|F<^3v-ySs{hQnvX&D5$LV$j_wL9!g5<79%8(E} z>Pt8@Kc_XVG9YM&S?lLv+pp?L822e2653XJj4SO$ z>ict7VtUV>xoUyuffzn~x)7R%3oX{whG$>sW9+4QY<_TIa4~z~`^Z6$JFKXvbhguL zZ{LTG+lU@hy|MCxE1K$XPKXqWfMW(Gu1q={du4iT(_7yM$y-R!eS|lZJ7vasMF=_}?ie`j>^o`0s?Ca8bApohF9%j8Kf5q8t z+gdJz_kJY47v%phV&=RNWElziGP0VEI-T8CkkVW4^hfF@$EG(PgTL;Vd6x5D#pa4p-Ou%? z8IuRz9hrXjHT#kusR$kC@u;(K>naN};%zmC6! z%*uxQJgpD!&6wz}>;#@NvPIAjLRz3v0-9wCqL5TO*;8PM#U6@ACCsvcWD^!{yp@^P^acS3_&0 zw+qo==rIux??7na;A8SXn@yOnzPKhQP*yuW1~8GirS^^*lNTa+oG>VYxN?Ml_JYbi zT~l-`JGWu;=6C>;V-0DZ4)Gt3;12 z@3E=T!3YV~@RS^7*d&}ovFLRMcjJToOvHD?5CLC^|#htSfLngyz20- zG5=R-swR>wp(yhVDpVmKr2k7AOJ7s}qWP?E*^Xr2qm^4<3r_ZRwcp=4xn|q&F6(0t zlq-E2#`Ln{msq7)tU6j@xPM6aX*<&@OR)5%X@cq@ZV4bek)}qYLl5e$15noSB6Wj9 z=0mMrE6oKY!?QtW!8&<}V=9)Lbr#1WSK-u~p*++hy)<9ZLZ^R=;1ezKia;TZUP>QZ@iQLwnbvmMYE9+|K#M;}t zb6c}f#z-9@wOx7TQju787J;6gv_yRh)tpVK$fqLi zf9#-@q|2(e1AKXj3P-Oej<#6t*vjk}YGuC9jY zs=BkspQ4vt`|#V3vB;h-(L@UHhUc93gppH8K05BXSEdVaULs5o;=j|Ime2$#q=ZQT z8#!gn%G6IVl!KIl={>|@fU+Cb0xe>_xb~Oy4nfhjY>D1tB}6$Tw_*tis`CvCU#xMW zknfvZZ6A0wKfG=6J17V;7}*lEn1*(4sWZqX)_!fw_~QFKnN;lQZ}~>vX>RW@Q!uCP z8UL<47v7rGR&i?VB{S^n9dUp2QGTARV%7ZHYwsGon*ZmI$GZO8=iSS>hym&?Z*#G; z7Y)q!KG@K4P!het`)jfYwe0Nf?QTfFuo7kYw6jmI%YFe4zv+n6G#7V{i|<$tDM8Ib-!Io+9oQq5Wu&{LI zkrdEMlgI5#)6VAKef8oD|B|-;v!cO`iuCqzLA_FRcZ;$LCPzvvNOvvqIrwWDOH$65 zg8XgyzHh5GZtYL8dc^xa$^3VK`!dq`8e}lRbu$h<7eg-TYGRsM&lx=EW&Wu7M7+YI zR~cG1jgbQxa69&4# z=a9epg9DS;gEKp$glyBWy?61S0pu=`-~lkF*XXs0$eFN1x{{jHLX801T)4J13^Ms8 z5zv@ZVTXzEylepoaREK$F@)K+E>o1_C>SqsRSjaC=FhJmz@kou`xFx9l+@#zJ9l}K zmKnOr=v#ZP<>?FG1|^-WCU*;&MZbjJuPJL`|I1lo_<*Z_MZBYHI(p#!M9Y|Y+{f8l z+8a8#5ec=9K`7PJf2YIa%TW1h%3TJE)@zWCnC<=_ zvF2!2N3TcpU?nhR@5)vZ=NJ@ZF-RrWw76R)UtiYSOuxxQey4jscr9nmzwPV^k1-ok ztRI<>*_$Dox^R6!Xn9NV<1dNjUrhSXO3UsP&%PATD;f)y20IlL^i@GxUH_)*-v4YL z?Ec06Gg65XuI>Jw@lhd~!|VqPlhtCj2u}*Wku1d`Y+HxAsn;Syo+l zUT436{(HZBJf*jS9{aW8Vc7HE&uU-^B#YXS^9a>-Hbg0Vht93 zF)dR|y(Z2~DM;T`|FabZ6;}o80NIvl`E)GyUY!*^ zKhDOB|IY)e&N9&Rh;7JVcc5CdkX6*9FRh8mV634m=We}#Tt%#|KQu_n6jUFL5+1mYrLc8nziS+!T-1$cZp+` z#O=4$~>U);Mv?Nw~21&dBHwc&<0Zpwa9Qc=0l_!q4JXGeWtY__9xfenP)!PZa&VKGv5NxzVSPP8IPVTsf;nh3LS2{ zl0+IaAJJHsi3}s^u;U@0b$un%l)P^l7RDaFlVr)VczaJ7;1RmUD?2Mc``)8$jmC2Dk3HP%*xbzxxN!8kT!p{LStPHE`VY&kWC`U#kf>+tCxh=~9` z({|hadh|4d!qu981RV9$*h!tac5cin5GDyh!qXBLng|>y$S;+HaPR!j^;nDp+QSpa8wD&sx^y&DJ%=@Vb? zkA7$TC*y{1kX%~kJwMpnRrzztT4dBl5WOL$ zs|`$3jHcmrukyy%K$oytKhH zS8Ux}`Sd-q$L`J@wh7qed0?~WkIRie%&e*&dY+Py1C?hxN}2zitg?Zb=MT!l@3=&@ zP8Oc~=_A4}0ZLvnPzsvKAA^%|j}6keC_h(>9~Tqxd9I8>5{^-iXv~F7GQcE9YRqF& z2+KtT7Kpr|9!q0B0`S-jQUqc~X6UO#dLo9Q5Hy5y-gl6~kF((osPY^z!LI>UcOEcUb7pa8&FdMzL#OrD6F^TF>dS_SA%IH}#5%2pOP$Pp^9EdWxvZaLF2XH8JkI-@FZ$B&#VPwOgT zEiLDH(dS!Ou(x*{0xhZ~=C!DXsIkiOjb^0i__|-0bE`Vn9BEuRXR+b_qsGPfraRv) zA3d@VXs~DO6T=S=M?9&z7=c>8jag)|KD+TRr2*DQho6cg-IcXaA39PshUq0Zw3ws^ znsrLZ3Z2oY2wOU09y4M-2l)DOS8q@v<%Yx4zz09ZW!Bh1EJzZV<9@|c|0S8ZtF-hiI8mGrgmLbSe6ld!V#D3c&e!5Y(z;LPAKKF4z1_Uwn0fE*N|UVpEzFRYTlZhF3S0lx{|C(*MUHxaDFvtM`L)TP)X~dA+W~ZiUbuWeZW~OhNs?Uq+@p(uBIk9yH8m zpq2yJ77>L%pPUMJr!cUul~n90!>ebGEXg)(;bu1hBrFfRyBqom7O zj(3oNtw!%xzm2?!-pv^DdxefvV$zVKe^~Osh%Tx_V~N&Y?8A5`nZc&FfzbJWMmU#Sp{&z7DVQc(TgiakZzW?)l2?AK#8< z`yRf1&xfvJnr-n)lyndNH}~m3)WJ`}I|~{nRj<|0pAY*0*t_a1A6rt)*xwLnILWa1 zkBg;q|7aBT3F_WiOGt5Qh(2mGF2bUN=Ysvz*ip6K@)IPt{|qJ=t6wP%&d!Q^{{FB3K8-el z(j-n!XG^kHsgd%FNK{jmQqAwdncBw1W!{$(itt8=J&u`zHL$!JoEYbNYj`0<5`91GaCA_GprlojsVhFkqQRL zmt9D^8Jel&f@KkK`kY|X&>v3^$Lt;F46zwJ6dFgP(~bg~RTrd|@+f@(E$&gdS5FVVZ26dcWz*|hl?$UbB)30&puAL!-SOl?Zd=y;YrBq`yCn~8 zPrBH#_}RwwPkT+~yDh1`EFMFgjsMbXzrGzln`E|!l>B{a=R#cRa6=|4qspPwpw_j{5tN^9K{cvKkNO z;VL%${b39g=pEd8eA`Fw;ZeR-jhL^`u2r3Z%&Vr43s7k)(0DJJvL^yNHzubS0Q(Vp zrfz1Q_3p3WfNe#vbPpl4_5Psi{;$Fw)*bR^iP3?o{UO9%HJ`%_Y`%geboXFkIs%&X zmXsTp`BujR!OBA@T9q`y^GtAqz$T-izRWWRFII#44BeqBG97?LF{_-`l1|N48D;BN z)I#AT{}@wqdWC>r?%R05DevZ{HitW$0{$kV*Po;PZ%z!dVVC4 zdB&g^Yn&Ghl~}UV0GXb1jd1G=kC9^#(Sue5;^H!g%Ip&cX9hhl?_}fLt_-I1xOR@j zgb)ur|2#lJyjVay$Ahd{fv?4Y<#C8sfIg_cQ0!3(LUIy2X?{a4!t5{VOPUq_}7`-`d(~>Zq zlshlUn1Z{Z2J*7EK#(b7m;tIYWze8@k2&ik^^SszLL&{&@j#P<){?G$V2Nnv$LI z1t0bfRjDW}l=16?d`4y!Zu_*%T2vZcMK(x&o=Q5x4$-9pD;Ga}@w3hXW4E zP!Gm_NnjO(s*QYMX~GDIX(_h2&$#iOe#YbA2lu5rp?{9g&E805^z3{SgtaT~F$g70 zSdJI0{?(h(UKO}!|BIBY%`OR6Iq`O89`X1`BeB&jAq~KD7j7$bp_EG=!{>^F<}n>9 z@=P6?aUZ~yr}Fgt<`z44=5s-g2*qMZu+|DS%B!w3<*6=jepUA?{Fr1O=N!p;L^h#- z!RSB{hCa!~6(i5xWTne{+uIeiiw)QTh6gC*0%*9m;JZCYR6GE!i|@??8I*(aUMm+d zE-p?J@Gk#21)sauFFdGy^Z01+O2jXil7DG{(;vF<-Ti%uK@H`3HrlB}ULQNX_G?qJ z7EZ13dLy`z$v#-YX_4%;sLB9Hykn@*K0CR%4MeNcaYq2XWbZM2UJVp8HdML7fwd@< zhmQks90R!rc2nnbS5gXc4}r0+T_IM>#dx#jI@FIBLGnbw9{s^F(>pa%{6cn--^AFQ zo`A*&&sC5jwOl`juT-+LF%uHG@IvG9Aa1y&dB;_mfJNMP*vCc=n zdM9k#J=)pt*(8)eacXA5;(d^_68Kv77DRV)kz_c|A$uz}OfH$noaLT+u%G|gNN<^| zeAQ?CtZB&(tQ)?LfFd%DFL$Cn--}9rmwTHXnC4E1$M)!@jhWw9^1*X{7{31+%D$RH zDm@akwMY-0i6CCT<(7!5z^+RieVw*ing|b5-yRAjiWur?^L6 z3b5H9^Pk$_5(a{{v{qKuVMAHAMx5Bs`>)TGE8U|SogcP|W&abn{5@QGP!H#mUs=Q~ z{4uoFZ5ePs7RU}1vHlP*!#b@sZtgJST1a+xwdGhd9O5upKtCd|6 z$@VnfQzL5(e}O@&cPP#+u*sG+MGy_XzF+%~tK$X~N(ciVC>ruh-;)3r0Lb{3RoRy} z8RF6-R+LhD88BdVXC739;U?-1U&1TYkl#SBeM2QNW4)EdO-|_J{|1QbY);bB7M-}1 zau%DGon%l_(7*BAg|wUkyV%40w^uIJesqaF^?cjh*0e3%7n0t_ud&z!NI=zL@8}|5 z*91EN{1RnU)ci-_%-aFsugJv#sHcu}?sN957xpmdm`8|@6GDAP8Di$vd3n3O|ePn zF8r*8YD47D1*g3Z`#fbB9}p!xzBU(d!4h?{Pu%*y^W_^ZU0i9fuAq3Me5m=AAnexL zriq-`cl1{d$y@fUo!h!~c2ja@U%F_<&4djgNxRx$06U!pQ2CAgZEX#_&+d!Ajm0@A z!H{U`0v;2okD?1PyV>}LZ}F30zgNEJN%jnJ^%y)kdGL% zf{jK^gGnmfN}Y6vNN*E+QxF@RuY#NukUn{7EG6OU%ar7a5Prt4SZV%Vlj3mPl09|1 zY?Y9VP3Twc-uZd+pY6MIGKn8D_M~my5?Z}w2j|aq3T)1sEu#)wzoQDKhqmrAD=5yi z8f4P<2|+s+NScOGD)3-@2`LjWbWBh4vZ))|1Xf8}+d(TH<(oT&tp%AWE9d~iQm415tPJ0zC3273b#VBce9dkIz5E0faZ%rQpXk& zxw}@5m|?#Z1^w}V6rE*QlWiY|*U4~n!{`uj^e7$OFgnH2B{-B+)B)RQ93`pGkPsAb z0ul;3Qbg1t9qPcso{jp*^X~nyzUUmo%lJ6M%kG$wGC&Z#7W3 zy=vQ6mHm7~B{Myx&^s-tAU&&KiYOFwx!|+e*&^cE1kSml=W^rCn6gWlNAvQ~tiqb{ z%v(#&w_eKMjX^2$6C!^AJ^9c^Hu565LcmdTAOVFtiZHqQlJkP&3Z>JrKrjH@GGrr3 z!={*;C~?8QB(Mz+&iD~t#ouR`RzjyBs<5a&>63Z5lc)ImKBbqK55wK+pu<_{yB$b7 z9sokg+s_7B@+6+kI0QK-4V=K^V3CKv)#Rq)ze6Hb9Rbg^03w}9K%8eF&0-x6)TSWL zbA|ZM1<_{bvJ%em3zC0zz~VXQ{w2T|djau;e9S5f^Av|&iipX|Wb(ygP7W3trlT2f ziYE`+ghO5A)JwfVHgTXwNDw&z%p*&r^Em*Eb(2wc`(@+jMu9j|Ab1MIoejLcCO|ua z{rNCgmU!uT!Z1fpG!sn&R1ZiC9!T|JatYfR_1ytAP16b#WO69}`fQL68-!N$?;n*Z>DMAcND% zP16aeVHVmFQf;eUU4U=1t4EJ_qEj?etMJHS7U3K}-;@Qi<~I6rU=%L+sY}F>VZcDk z#qc*l;YDR0kEP;~|Jaiu9xUjoE6~GSL5dt$x0}Ea0ZI`Ot)$vXvJmuUM@b0lTq28K zi`ea8#RKXR@Gx5*OZc}?cu2fiOZ2W3b1pTdtcdw`Ak*n&+Z@)qTbXFOp}C>`Nrw*j2{P20bYZCoLLtJ>QIK_gAYo$C$U+&K>JVOE z_NMdVsn1fM?28=umpxffPb~BZ7kWehX5~XYh!Ei*K(e3AiB6$CWalIi3B7q8=8A-0 z$w<00&5Z84Np5+G7bI1o}&7mM2E!GoQ`B8TvZ<6N*02`XT7o?x{Wuu31$ z+RDXHfhhTWv#ckQ!X7RcOgTM@)K=^N2Wz1bI#546JZ+B$r(yLD;3Xj>Ji{%r?0e9% z6F#`!q44`fIp$^IcaWM5h$m%O{1-GF2d7#91pV`i!@1&;oom9ldUr1?W|9g(`OWw!^ zAAJ*#YGk7td61j3f}LG#k&}ol38+bd(`{A>{{l5~r}Wi=Lq!Yr=MKkgj`PD#vSq!N8@r(davj&U|R|--d^p6USXk{i6|CvCcg=t--K%AR}HdHLh%9XeBn=A%pzN^S_4sAg%+sH({NBj z0W=T*79|32WZ=mVfH(zEcIx6ll>>&DbnfLv59YYGdsi;ywu51p18F#%3vDe@FBi}d zNl1&io26Vt8Fdd)#zZW3CELhPelwelrNXrK!-N|qT}#2|sK~I8BYKOy^z%j>xjwJh z!VI$%t+r791g6AxRBIN!#1)R26aF(Nyu=%c7(s?{D#q+$TWpC+^LLZ&8a44?ccPgy z7P-V1Lii!14kMD#&9nV*Qzm2ug;Lggta2Ekd>FwLfSmDQMLxg~j~HZPp74aIT{C$^ z)B;y{k8kY61RAhG;z5958X(OT2*3e69;@|Dw-1MnD-nll1}b*N`LFy|pUt3{MH*z5B9&H8Np(0bdn+Rf(a23aUL*o z;fi4GL=|x03?}*+U)T#=Qb|Uf=EJP{fZk09=Q4-iiTL5UCoo?;2nUehje7$8O1N_h zjY80MrBuqW#2VxuQ$#xz#Uw8;ggr!jAoYkgB(G{DJ`ac_wW3 zAKhDW^tI&on-95TXN$ESEqaI5_hN2w>YLuI?(z0r+!{H$J+k+nIo;1eHgcD~Y7mbL z5vw(AdcL?S@eoUROx5O0eiuqm>>t7*X?XaWA+lT^)!)6rz#-I5&v)XXibS9S6W&h5 zEb$1%@+bxe&Z43pTKm#WF%_Gsij>80KTn0_C%ENy=tBGFGzaHZsE!b7mW`Ms-fRs; zL5sWYF%N(G05z0Hbn=9XS%}AB%d2zGl1}gZhvCKJc#i)*aB1pHEdyz>$n``-J!R!p z&cm@_El~>$06CIBBkwca%(vTOoL2a6Wc3bzbq}!bBlppJig5Mck@%76OXfu=J^)%H zxucvdlcl7Z06L3*IY_)`ar@5wYKRH0-4U>dRC};?V)3v;c$fv3Wk$Vq!ukz?1y3izk!Rk# zs9n)@tE2zA_1;SR(+MgQa_He_Orres2=^@?uy6j|h|zzTn!lLaq)`$H(x4^NVRRkv z4LfI&E#8mi8|beQU$)|Aikr}TCMxQYV1E)qU|^}vnfJMdy5GIg%7&|}fKAB|e==gE z8J#z`p7ad!iH{kkAS&ch8+)?Hmqq|b!shtoeNv&X(>z}Xc3)rf zAI{-HBslOvGP<0KD5CuOqc>?cvaEI$7R&z?P5vB7o{XY^Fb&{J{gtYRFy(7-)WuYO z3G%gjeE8dyl>PFB+QI|r+RW;$`Y)Kp(EdBruY4YQg^gB!DEy3%*}=cFe}{zpr6!na zT=e*xD)e3&Xe$GNFETL?ao>FopblNmUrc*qMqO%Y^_$bqQ>q%3YYOohg>w zd{0W#(|&C4Yc0mN&9%C4s#UG3QYDh*b7sqkRdOY)2CMEGeb>{w#c^I&Au6u4{&F;{ zc;3JfUM?R+IqHxLa$4y*x*9fbXS%TFUT5ghKzs98Z|Kp*mz9>u4JISY{Rv{1^){c~ z;X(Gn3VQGnliQpk^v@4+mKj&AKL37szgIux{Art5&GN%$Gb5&grV;rP7=)77(qeHy zN$_hip=_*NK2%5;Kbg(=P_N@N?EJ(>LT5$KXNc0KqKH~NK^ks<(~!}9LEnx}lrglQ z<*K=~I!nC0?K*P84$_@}OMT~jPq6UMRUF=6>@OH@tItJ{bE)8e!7N~@=qgXBx=DIU zY40reIz%)|(q&qszEC-~!e4YV8m#A>-Ck)~sG}odT z!?A5HGhTIW5w^~(ZS zi!K=u-+5G!IE{K;>utK$TKHHro|6ki#j~hOJ83L{g`O5`V0AYKV5ZY#{#wS@&mgTP zcv2Nguya99nZLF-R}=S7>vWXzzqmiz_XE+}&)chc!?-qiJwdb9T5_CGZ7qDg83d9h z;_d*xZp3r`wYuc5D!9FhI`miWYd;@c?mgS+1CtbEP1B>c5}`WtQIjpzwyAsCMEhK+7>AfzLhGVMx<$;7=Yl1x>g z)9fw)s6)_h^XBC`)rR4rZQ@b7?hI)M$4 z(+iZ#>!iF6rbgxiHG;^%N`;-(Tyx8SGEYm70bC3`i>=<6;5R%q)TDISSllV~%%ZQW zl>d~~&f~9w@C3RZV&Q@Do@&T=dJkHgIb@)es4bYD=WaY#!Uq2B&>$V^!QfSjt4&i2 zE_lvu8(yK>YpZ;g_ZOOz($3A3GUh`3s>el^WJP4V=nUijX6^NpBBjxgo@+U66fzJb z(LsTpC~DJ=%7~PpV5CA{7)SVLf z5A!e-M79L7r#%!^sWY$AE?3m(Mk`m@Xm=P$*~!cjUhOq6w#*?$ZJD<5>mrX%Jg&Ov znNZ++&`njVRHN{7ePZ}0{&c6XO`*0GF$s+WIDIj?^Y7-hGc+L%{0wy9NZPhoeQLph zv{aYF>>z9)$xMmk~w~5#vaST9`Fkxwg#C zEp*+aHP-66q{Cy!mxiw~4-8z6JVi)z4*sm&A8aS(xtk|YOrZnC4T{E@(o@k+mV#;% z=o2^?l;uP%A3ayhxhnR_b^zoOpD)|VmXT?GJ@Urf$%;vZ;6P&+-XGPp&f)pl6l8Ec zIwNJ@XBFsQ`Yf2dsY=laGzmriLD&lv0K(nVS_bti_2>!e%38L0EPS!2+Iy){b}2q* z3t#&?Ps}nQ59EoPb!ZaRmfUQFPH`^z&pHc*&ZEjEmuhhFDj3(cJRk2Y`gMGHZCr%Z zt-8-W5yDl+Gw0EU6I->KK#i`FEkVdARa;Md3`w-Vus=-MpeVa2vPB@Kg7FTbW?X=; zjN>GJ>1d{#;!G65!C*fsGSS_RP#rmwO^upUJqc0BzKY zCIXcT?X|^IU3WsQA0veVE`Ck31qfz;7 z9FwApJVO1o5(I{o9}|qlPOWmTb!FoNAaxROUtibw%s}vDf{OV3g+>TTsgmW$PAgXV zWV{S?inf#MAb81i)_PeuZtsg70^?N1yqv=V`kJ;c+NjMFnvdbeaPtsnkXHvx!rS*OiA>vGhY1p8gcxCq1Pa1VXzp2QJ zo7}oCE``Zp=LZ{_Ahgszp}v#Dj|7<*aR%YK!a!t!rPih7@?C*|`#R!?6lnE{QM?+$wKyT|xkAT{>Bw_^1!yc$ zRjRq08D$v_RH#f6Ofpv*xEpF-u%0_S`uoM-$rxiCKoRgr^=X*6wc1pk?(x}YX$z+|EciuVlWRYEv13FH1AG{wj03v4udW`-S-}Dtmjl&K|16 z7vRXVjT6@)QcYZl{YE+fh12}k8T1xzw>y?ce@1=UdfhA)w{MLF(@$KctLE^)KC2U1 z(R&`=cHg!_qY)?I_c=79-J4T-!jtlBHpG0yzz9i0hV)-m6!s^bABvqpg z9IwCC7I}@o*eg9Ua!AEqRdVlqY*(i2e3oP6_1v=E*ojBtaAVd#t*NO&Ezc#0#+OPc zr^ruk|7a^(_(Lf_@$+;@6iE8=AJX&N^>8I{HsOYm_!3k#-Uc07H&Jso0O3%W3vrLL z5cZ_xseWt5{-+DE8ICKop5y)h|CFT4m;$N#{JA0abf8{biA@I;0Ho?M{~p-G0a(;4 zTju~T?);Xf+9%~1%~+EYI9l914Jr^S4QQyM`3^x`<7s0pk{5fyMlAiQKa$;8qaNw} zGvE>trl6Do`t-Xec(&s5tjLM%z6be6PP^){tdg4@P50MIcHhF5`w0J@wC=O*)?Wo_ z@0zM=S3T51( zKZfixIMpUbvyx?6-TSKV5wD{|YMQfambe0rdSUfVRy39}q|n4zcy4qMJzgTVKZU~> zVV5LyFyh$a?&HwjG5S|;;YDbAeDk_|Gj^U}NUXh+E4u@f z+X4E5;s2ZvKZv7UZqCQ{25SXUC^3E z6eFl^&WuZKLb%aA-O9m$yNga-h z?~D%mj^29Fx3JMSx@u%~OFoUq7?wU~ji3r>$QlWY$|Zl$CsvyZXJLnlm%31n3MZaF5hw{ ze^wS^$8`S4ocVyG*$&Zv;Xw*7^UHPri5sH1;~)cSe=z;$O0ME zwF&CP3esRDEXd%-ROe=~fPE&mdr*OpD-)k9z`uzRb49Va*bZM2>0D7fBO2iqh4uRC zHyfy{o+1Mtn>V{Fr`#0=lx01V5Xz&z%f-!jc?9*fxynl<_H`zf-1VMTQ69}MVt^dD z(JJ1tdx`M7Y&XOWH=Cq(jWNb;b!SlP>g8FiAu^2H|IJmj9-3Vjb^kvl@U!~oH1dz3hWor5R5#kHAr1)_xwIX zowMK)i7cQjY4LEs*S{}(beylWhvr|m-dK^fpkvciX@LgaW<+35)87cAH zU#J2^x`s`te_ae$NZojhv!*zH0Q(oM`74-G2Yay!*29J9;XzK9ZhWAaaIJFpKKL$+ zVe=0%yWUd+z8cZ=XN0eQrCRTpkcOaNvJ28k@Pv3q%n04rIn2H)gBarq1!D@E(Y(O3OB)AoFmsoPBL>Ab>WiV` z%3Wzdhc+)MY@X3O*CXG0r2ch~i8czeRaSbVK0rYaAv6k9N8@+KMu{5BU%B-XS5IJt z(4&X3)=NnLQcd8~KMf?s+1jursiQkf*3$N>*yVG7G#`l?7zyA$F(4x%&0(hN>vP3} z0%oCrQ#_QX$HUt2z|PF_`AY>Eg^a5=X|Y))Xxc`)hl3lQ?!(@r`LH%zJ2q}XgFeeY ziX<~4gJ7`)MhrG=!h<}1IP44^b~c@NR*%u|BKI{K;y|X^%Byl zUGG^MIVIb8u-aTau7utkk=eBB|JHUDWgTCq4EC=)A-(j#+yA<(kiefeT^Rhkp>jH2 zqiJewDogYj8D4td|9Y`j%8vh+y4(8%3mJaW57%JFH!<-$i7{I-i$EawIDMKeo|{A#(e`l0(BdLy6~RgcyNSxKGbb!|mz<4lGs7DO>$jC(L*_$yH}FQT$G`#X`2yD!G}CdA9!5I>O@%RA3Zuz@rB#!vs*VHQIwsLvYpS1(cp|1 zU9CtLj%mvu1MH0_LXG`V{WZPuyVCrblZ#F69l%6Q%R-2f1_jHk|;BWgP+v?v5kxy zU&{Fvk+#D~Sd|ImI3C`0@+qWwq}~vFep$>e>EU|$2LkT1$of{0?nhF`yy(W+6c}ad zdG6lxLuWSJW{%i|@6Z{@$=ji~Yj}~YcyIQY}^dAG+4^&A1!Pj$n5Q2sMq5cPN>R>6P zE?V8Y6RhXi_yS)t!IkBzq#Uj2Kxc2vTPlm6cQcJ%D4 zc=w0VNKeVH_6)C&M@5uT*}iSfDhskATeYSiY9;d)2Ob%{a|rQ?IH621#SKNeKj|ZS zm`_}h-Sf+yl^6W%H=iVH{9;@l_#7*`eFzQD*;-?(7{`g0wk-SqeoNYsJm+FgqZLCM zQxze-AHlP_k1VvzzEr#5Xm(UuaElh*^;2eDL0UW`I!kJ|<`c(g$LP^BU)*~e#b~>y zM=~C4WMG0?A!N`Y{%pok8gyj#Q-X0qOQDN3JBZ&cMJXuSTW4pSBYe+pj54r^%M#zLWg? zHZ|)f1cOlUFnYI`+S_CJKilMtgKnXyQ6hcr|RIx$gt3% zE4Mv22AQG9m!f=wPV6P#Ir8`XhATO6GGpcFU{SOUERx8GVx3Gq_WpA#IojnX93GQ- zHd8#?Ra!g7RsnqK+^R&@YJcFL{BB#oj-917D+kt`q0nrz`cjlWTA3?E1|-GT-^JUrx4`F_xdA6`t#ZARR7B6 z?|!dGF=O0rbyK;$EbF?l&MP|ZLprpxjN6)T<8g55XIsi~8dL)Op`67r@K`Py{!xyQ zx}|6sT}Q&%(!?`A!UWqm56sD2>zaf3%?wayMRF&bLvUzjr?fR5k>3q23jHrRDS`hn!eWK{v_5A6F(#Da~M8&=+c-id! z0Pav%Hk~PPAYf?b{WzV8Gu7C@Mou^|6>P%6&Et$&37v$V)`%0KUhPlo@gC!g-OzyOH(xIUCjQv?w-2;*}}h! z@^&|hT12niivE9R{z`nmt0`>uXw$Y(b+B|n$kL<>gN)oJR9bdAy3S? z$(Ol~$4`E}V1x3upSRIX@VjWXnI-0_KlpwNu5=1}+*$b?)exzDuJRH}d$6Ojq^plbZmlw~8d+u|DJUoWf*=e50D!LG z1CR##0SknMgv2ly84OBX6eA~&k&{LU3yTU1%Zlz3l@S)(Cn7Gmii?TMNr=nKNJxmw z$Vtj7$jVAeOA3o%)Mb$>k`k)25~^}?Itua{GEycAa(Yq-MGdT`I^0wPp{A^`UsYa9 zP0~r*fPZw(s7ZV>hYkwzmZx@4DXRXX2 z?Kpq)tPs;gPkeT?VRDiX!Q4^b)I(Lt$-&aj$=bow+R4(+-^`X|J&h-5A$^o@^yA{2@DPJ^YQSD3381N^@|G$JQm=Q9qn>7Dl{QFFefQABQ7W{ zIl#>?J}@}eBj9NK;pmX)%%s@p%*5E76G_==adC-Rr_wW0kEI`p2rb9!{$qwO2-Xwq zewZl+m$Qtk!lmf3&a}iJMp|f1nrC~44D}D}ON$zH46lv?=iW-MlDwnEwCtL~?8eGd=NSoY zwb?!AC^yD;>)PszE;rV6*PXlCQqtdE+|^Rs*HY7e zwYsmh@kV#!&F;FJy-h>c>qmRbDr?(XE?(+vZNG7)?N;CA!T!OXp8opwf!V&&iQ%rd z7ph+^we=3)yU;y8Jh?bJ_x!o32&e);CryPtpe_P+1_{`~Fx_g9P4w4=xC_}_+jHWZ)hbC^}Ta4WZwG5S80TucPyLr(_Xx4+jsE$ z@b|Y1-5wLg3LD~yk1r7lUWFBl$BGY9HguAMFZVpT6+S|&U$8HHcE|3_E?-0yCUh`| zkMoRYN7$7@lst2^7%I|58T_#6uQ6NB6*%i+?O$!ev-29E)SUc2oYw7;OL=m!=KbQJ|c<~cE0o01VlNOOGPT!%h)r`1qhLv!)o3! z7*hnyQLgAmg`-;XVm(88U@;`=0KL%XXvB!$wv_sD5g2Lz<5m^Vh~4Qb_d`pPl0F9| zY8HKcBH&MsKk(M{I~Qf6e!jxRxF)(XLgvkrR6EHj@v!MmdbqfY1JKXwdcEn9!$0Zf zaH(IvOdw}XCfQ(WnMnZF4r)u|HDNphDx@=r&X1iH+hEf!Q51%3qVnmih9t&gwGFp$ znn;a%*;|dyln*9Q#o8GWnI|WB3U8uLJdyd;l<9ulJFzO_!f*LnAJKIAt9y=}m!F(M_^DZ{UBYSR~5 zKKEQ`Y)i;*2tKn|^@?!s$W=@7N7Sphc^o((1_fJ`9sOPGDq0DsQGf8z5+z^q!G7xeNgH=PXY+IG z;Xf^I<|Md(lku%@a-!|EB4GngT?w>5|E>NmIMO1f?9+AK6jHoNjf{4-Q?D_+rvO!XIFQ@c$Y;++H?w3pwVZ91nd2Y>8wYxQA|9pR(|FiSabc562-X^C0 zt`67mkgP|YwV2oG$^GHiVq<4V#*gg&b&%%eZ@`hxpcHS!$#p9b8$%Z zGAHF?$b6u*8x(T-w>`oHlhq%{*DAKcHhbaq(}F+OoSx#+{-br}mOc zi@&iEr(`YEegx?4bk{8DmZ*8jBCUQ|ki#p9+(#K@{j3L{;;L{V?TK)N@5^!jDl$Z0 z+6Lafu5#L`5UfB55^C9*(hrCce`?_1))T5pN~y6qa#7;Y54ckQGqJL>38V&80`}tB=;87Oy68ih@>i#*mH-m&(kl{%3?>O$(QY z%jajq?+a^to$t#B2TmUOuOX54--p+Ef6Q(7K+R)V*r; zIbbaS)R9@3oC7v}GgVQ({MBM`Q$r)%FuV@0;Usd7U4GZ3RLEksZglwlu+Dwwi^5XR zr5+JdHzP3-Hs7KO>Q8Fyh%^vP3ZgF%bniJGEIJmwNMG0P4!56IINoZ1V6);)QsY}8 z+`j;X-OiZzgp6|iSQps%Id6?X$?ZQ(%umSYfd%!13uqnyP2+&oxHggl#E-}2av=&V z0QBaAWVkpEQnWK=`Hc_NKOeV0aZ$m$=(XYjdPJaD_oHhT(@KicRnR%Vt$q#A6QV{+ z>Pfa;ETNZK79E&+@oSj$TaXs6K>r@hq~r0P+S0**Pxdcd!%Nazd#g{@950U8J5U!F zE@$^bMd~KkLHh?YU+D!0s?5w43gZK1a;RV>3TIw{lWXb5db4?+2!siaV}-~du%L=( ziqBIys*FiowbwHhLG@l86AH!K!tq!Tn!*CeQ-7L1wJ8iyl5rcYKe`_7P!;<9OKrAI zI9|I!NyTa%es}75>+@>(H0NFqCUZ+kNk0{TbiLuggX6>FM3~bu?S)!o?;oL?#+v!Q znj#y|{93Bkm+jyj_AyYO-21%dm4gtaT>v5tzz7pCU!DM*Ow+~+lFXjG$qb+ZKfVXR zHqAkz9XG-6kaX6oo6AO~x~=2-C3D-e#7kK+I( zIcYv|aULsat7ZT&2dIZdEMkRuOkp0CHq3_Wa{;2%bnmPN%GY{!V z1RlE!OrOCNu^>8d#e9bRz0cwk&FbE~2(l0SFnfRgiq+V-k^w2QsKY(&hN0nCr@3w2 z{3?P(o=7yu-$LVH&1!bqXXn1=3WpbB6Z2XV5eCuQ&|WxneI62yJAvOl`gj@CP1rjv zOrb-ufWXYEz`824dnFdxbQHN{bhlKDPZuVEnM~T4rtpMw>5MZNM2_R`0#<(JhyYG| z)|U7c05>kfwg}~00&;O?Ml2Z?uXSdShmPd|PdWmrT=X3P%AKM9*v9R2ki#u#;okt+ zpQ%zs$0G$ks0 z+_`i-xYRuup~nMY0H?XDrK_4i|2Iv;YYh+*(2)t-T0w3Hp-1-kj4<=_u`F0U8+L(> zY$BftW~C@lU@1lDKxoX*z=*Lb)gQI$Ba7!0e~8`rCZ>|%{l`E$vzl3^r3wFn?OQT? zTBW;Y57U|ui38N*y!CbRj!H&V>K7DuN;lldOdh@qTv|)S7{hB!|wI6ANJ~=sS;UJHM;*e*D}qz;$&MKH6!6NMA#8*!2<>H zKo2Cr1a@arNdAIkT3ce;aKYXc{l+U{b&Z~P8wU~^wNWkjh8#O{dcO{EAf(aM0&v5; zfAMCY1ORf|sq`tf^&_tJDZX{09+Srf2)G^kvLz3+fh(vBk%h2}EMz0`{3dqa9!EG2 z5R8Z=KW~S{m|cv_gd|y9dq#wLZ$Tp4NS@vf8RqfXxr&PImk(|2SC$O5BSLT|+;ssJ zaeT9tuT0@wYj9#mBn$klO{Jy-Mq{H=g3zxAkqt~(Fon#LNj4;c^eJI`&0qt5CxP0F zeU-EeiqVrnr z>j?8$7%$6K(wY_BbF}OYVw;)GfV| zE4mb5$6G+auU5mWePi!hJqyxx8z%9^r>~h`wcV{!UvIG0DjBx`9O9kg)C!tE!cTd^ zZ^)x7jkECZGz&J!LGEqG zqZ4`QBV6=zENnnhF}7OXNj<0+NIj8lmS6qc^rWl? ze5C$Lvqx>SVhkZ;kCg_x*p2dh*lffn^hMQN+d)4c;tDF ziOZoADwfGh46q^prv66nSXj;a^o^yTrCHM>i@I0ILnEdkjkfQu-fvpGE7yPfZK*I5 zJWR%LUSmQ=raUd1o;G1}|8F{=rnlpDBX7iz^uO-(E!Yu!Fue9sj<#$2tkV*dUdZ%+c-Q`}LZ)`h?d9Y%s; znTWPsN+KDp+Vr?n2(5ErRQv3c)rOVV zlO&-sGJ-K+UhVd`>JDa#C`S*h17|VO3)q}-?97lYm;Y#TXK_?O>SSW8KtQHF=>iE^3gAdhYbN9V3)PfpqQfZI?@) zcLWa!P0UMzy0w71%w8z~NFSdsgmHoT39 zS;MwYtzueP;3vxvCKpn&2@15UdL$NMoA{#g4u~*RSaq5yX4oG%}3! z7d3-j9|Y_P4RS6-xTb8!jbEdngRvkZ!ooak!=xKb#KN+;5aYd@L>`=ugZv#t!3~0prh*$|Pn0gcM(RZX^&cCJ0oT|eO@2ZHL1v~3c z!oyr1Yq|^FA^sWUw?$!LF9(p9n`U)C09Cn1mwv22`f*)x1CmEVwQwQI6rlbdA53Rq z+VC(VEMlE1s3yS$M*S56u!P{?wu2waaO8;6+l4t3=95qcn-tQ)+?|t4h zShg)eyza<|edlxaBn4$by+%YdK$oc;bDCc$VmFPJ%=tEQx};#=#i6X7_3VAlo7qv)AZC(AV85Q4D(U(@lr|at%r#os_ST`|3!5OMhmh4WmH11 z%d%W_#6qk%v0d9CZ(CZN1yCg!-#Z_W7+h4U#_G6^*8k{XTco>4=(?&i)fwz3`t3AfXhF!|#^4q$K(OJEE`>$8`y6RoqOq#t75EGNW zFI4h+F6&H!ZmGdEMXcA@He067!+8%N3zrJx0uipxNHrsahsO0PDJD%);zxmx~cyv2pEgG@3N zsX-?rY>k3_O5Q9k4kxCk1-jTVS#h+8K@y?7l_4Z*`&1FjOGaR;q>`t)|ASdzi@PfBjs;@s0EJFAcX_o2&CexxP1?U0!-8DgcJR!ur_BW3wIH^cUp_w+5G?jM^$mc zfCWT$1{B*Xws8lv3qMkgojj$=n zMaS-u!J02F$+eR66y238GUHYExsvllAvPAiFLaYqF_?;pSziTHLaz{0U&ShEyF zTwuYduQaGAq3shbRE!BKB6k^?LJ`CJgYnFQ|7f0adrdQD)`MLM;wsn*=hv^VIEy?U ziq|XDrkywzzNuJen={s%($9`PQc++lrbodc@+6%A}DCa5Q4*w!7U4czk(n1OKpT|D;`d=Kp#Z^BO$DP#s z__H_w0+#0NLh+0Ic3>sYGa8#Oo0uvT0hy2t!Q~44XLF+_K+$+LKMT3yJp^$CB_dJ} z6FW&nekWdnde<=sX&tuF*IyHP2wlPWUnH4mi}G1yARzdB=bVrKYdHnFa)VK@P<S8mOK`<~%Y=1g*R{6Hk(-UKG-;Y!e4v^6h~1R)VXqtn~@U2O5k zYoY}aTjo^G8tR>^hnixl-Ogf3i!i{J^Y1o{2WfzW37_)dt(0Y*Pia`o@OiHq@By_t zoHNeyuX{lGW>QRq*(T1iYW;vMKA7D1l$b(4sa zpV$96Q+wKR?!{zz@@Se_VDAw|{P4QvGk@=`*XeQTEPv(*i%`|MloK(6aIH6mzem)~ z-^juNhxC{jE;@(62T71VMb3h8sHQPN`+r%J=?M3mGsX7Hv#&l|1|d*Wu-+vSY`y|* z+-WbCbsu}V9(7!><)>FkG6#i%Zy<3~ARX2^Z=%BJ3iv@xz0@4JptnAJ3&pErPDix( z2LeOW&ihuR+15;v^ntH#EV=LCQO)_swgX``>5}{a!o99}NI_n$h4*~E_3+1;!B*ee zJRgZ>=&tfXdEBWM+ST5tjGQNEUu*kOt(MraXvKFsA&7m7kYm_n^2Tvvq9Unneqv|o zsh1MWU5pY5$y7cB#iRmeivW2@q20!IQ}U3!GpsbE9VQkcdww=#n_XKG2cyN6>!uN+ z`jTIdIqiCc*^%P4v*?nCGn%Bi3{F*Ajv=FXO1pYf!Wc{*L(Csh-urnYhWFbCf*4r=XW7=+3|Ce)7h7=f-+=gJXy0!#kBygxGp=^s?&sa23hE)i7GY92jAgj$1( zG}Y6VTweTPr@(tc@k#Fx&Fr}_v$&8N`sTTy{aKCsQ!{uda_{8r_m_)?iA58?o;a6{ z?wu(XPw-0GDgC6KLnGMpDfU68c`c)ODEy;(Kh#)UO4P&|TY20~K zJB%R##CBxL*#l)`;ckmexnSye00p$yVI&sZiH>}d{mXL2p3k_|p0EWAbT-&~76wsp zAa<)0PP|-AI%n!zrZS2{=QCW@4!TM4Kg|UGU*wM$8BU-=vn%ov*Tac%;>rx%_M@de z`DVfyk7K_cd5j4KM5@_CKFk|HbKCq*!DZ58>T%|P$s{5deR z-eiEl*aPw}zeK%k20eJ_Ek+~}K;;ZzHkodw1ISbEXfs1qs>nnnwg$!b_Pq(H_e6j2 zEWSZD&CHOTu|5p%5LN=R;hISXyl|V-raL!m0i`JDkKV_pqM#T`4-opmZ=lY0R)^LA-2GCvPz-^-T7OyM0}MAgTX8RftDC#AX3M7nW>EtZ01R z-mnR68dtK4#vBWybPhkw(U9Ab>zhmL1|Hh*`s+RW;tH{A<_3yP3o<*XGz*fOwaY_> z(TF5>pao^88YJM9@dKFbpyOFM6bQ%{7b?NTXnH5g8@MZ%6h#**7Y&%|2^N8Q&0*-f zq@V+-nv%_`(cGDmEe1^PNXe5yFBum_O}Y=NyByjzTQV1L_oMb9@mO(Xd~D)BI6Hz1 zj1>Vm7O<}|L1W zp7@Adj-KpU@RWXTaN$pONzx^vUsLZYxOi22u_r@5J22q7=i%bohmLBcYFJJp$t@*_ zXdE_PV**MbbF?KP8o*jc-PjGsnxp2jRLXcM;8>_!qJrCggAG zar3jyvujtl%ac6e$xHypdwqH{?AoqeGlt%IM{yEHaHrJ27}ZODtv;(EoE>v;@?BZ8 zv?jt9(Zvd#VcK}fpV$=i{7f^%UrI?NAbkc7`m+v-PZs-qc+h~vtP->GANMw`zK*E@ z`_F|e2|DgN0FY*2nzksOtF#nJ`D&$|_AZm8bgQ4g=ZA^H!yi~Lw@gxN%LX^yxwg*t zP1|n})ehQ(bGpBCj=MyK$m*e0jrn0r=`bVU5)I0pLh^lh&JL(tcovxauj;n^!0qqQ zmKi5T{WFu%YoRl3-$B44$$lJH^*Od-;i5`}ND(%M&Xy5b+6v4YH1 zFx%yQi7y6!8Y~Aulg8c?EvL=E3KeJpfLKk4MJJ6pSotLO>FM7>9w@>g^it{<+_=_F zuRFEwB24icl;zod&8!O<@!j-Y^|t-gq*~79+2=zW2exAD9ANuj0pbkOF8;=j=Oy{v zVyijB{gC(fO-J^Ma0`T}0nT;CGy(P^W$FwDG66K0F?oIK1Xd5e>>97|?WvU3wCn<} zib7|Mnf1pOu5fC#v%Y>kHsGl3%4VB)u@et+12!ayy}RQP6Yx7s^!|#jVN7!f&9EUf z9VBY19NR3%Rj~2+t#^g++wQusHO<=784r>d>wFLC*w@A-@W>!c2DHzcRbgB0HV>fr zGYDGH>QB7GV0{#G02vK!+^9ls%pK#(O7b(?M#!aZpJ@TP3cF=?ZqNL%od6cUbQ`{5 z;XH6biD{!rv=36Qk8J`3eDEYAkIlDRY_&rev{MMd3%KA%SZDE8eo7YUY~PtGU~4JtaS|Y7^D`|& z3K8^fcydaWP@o110S6dtTwEDA2;0KKhg-294!v?)T6*t5$f1wY8=kNw7kDhSAJ2rb zCQI0o>}_TIsHtT_k>WGq`Ljns16QlhGM>2mul$E!L&1hlvn2x+HC`Vk?#8tKaW@^c z{R9?Mw;FfWL~#n9DUP2C#uMgCy7&Oh4J)>ERq&> ztOK+?pAdy17D=*kF8IT+$cU(0QI_rVOX8{kVmWa|-AKnw zIZmctS)hIs1mgFs$8^KCLCUq_igjZpmZK%Ws;Y>QUpHlu9&ZD1BtJr?J&km3H8tWd zuN++Dz5i^B*N8LUHyvJctD669zOI(QWnJ(Nx*$b+aL8vw%ToT0Hf!A_zjiA<=Hwal z+K+yAAFvikR=1+OW59k1;?+WgbI|YPZqrRU-)mH#9CW!*X>KQmKBvH^c90lJj&U|OtvHx2fXsC=>WVW+!!up!W?opq|Z4cJ|-&ecV z9v{&zZ_?3+GO%gqF6O|&kg|EU@VmCRMC<|3ZfsYUBeOLu*ZuG(OWsqnAh1MJ)#dZv zOq+MomG+T#?x9na-ll5R?tfRIsxT8GzcVEZXK$#C%8KYJ%QA!^F_uX}uGU!ZnQYGtlJT_b<%l3`s#^R8d|;u( z8JUkEnpxc3N#tKuu4%U`zfy|#ul5mW+KZi(`_AuzfGaQ*k57B*sWinU2N!m*B%k0(~6&Q1SU+58O%~4SW z^CJHk<|9J6neG)jIY-i6E4E}qN!;97EF6oV_@!Wf5cm76F8KB;u-FseNz^`n~lF0_#yM)J^#3L7Jx>J zSrDF-5D8)J8T0px@eqrTqNI}eW&v>q2u*GPKn$BvgY((QG4@$54lf#4$6!0Z5S@G|GCO5!{)PV&3!h+c6otkcYrWR7f&$R1Ony(pqG~ z&OVHyzbvp!_!mIbB_DoSFcvkEDnbpBMDCa)hbx67`J+a`Yi{4LPhaMjyMd$Xnn;WJ zyZLuczxkqwU-;*5^AAv{{+S&YU4qqAv>n5$_98uKN{c3?8w?nG&M`g)r37%^-3`({ zTD0ebTjX)35k(DD9YU>6YrTc0#!**us8F}d@1%mK#fK06GW``XXO`#?Siio^%N4^& z3+Z>sI3(S^+MaqaN_gT}afCk0d*Za)ZSTnqN3|ROMxSc5|KC#t&TAilIO#x5E}>=A z$3@US?BcawH9gjWxQI9pIqGokwbKQ^oAcy5kko)Rm{dp*yWna1!OF+~T>R5~!Czaw zPDBh8uBcw{sMaqw@O1X)yFS{k--ZL9HC&WmbjWF>*4UXbX#YGAndQ`}oy){2bK^=}kB#M-eH;6w zxtP2?@-6~H7Mia6R{5t6wnaL>{D39;Yr7tez7yk~VahIh{PIS2(&3j)_ZOJ?-ne+6 zl+j}QTy|)|ptPcpQMCC*{d4BprkX<|dh69eE3pYq`_1p^8OLNKyvHBqc;;K{^BOHC zP%Y;R6?Bh<5wz54!unL44b5Lcc@H4WIU_x!6nhL}L8jL0FC!#8;FSjp|AYH=ePY&a}GmXtV_53^bZWX#L14EizJ6 zL4)7;JG)ii3}!) zj{->}D1L>4NmyMKbmRR2C0t}HL9(JTxJnap#K{yU9i54*Si4(~JU=4B`sA~d)A=d1 zPtP$Mq!o!dTAVMc?KS+pJS`U>o$L;es4x;%v*sUtl!LhO=3TLaK)%-0%1c&`D7(3L zRLk-xwLhGY9&4hOTBHp1G9NK`Wla_}AKWr{Z;Lq-+SpAV!;E7Iev6)F#BOdjXGB3q ztDe3;kT-e&=PyzEC;da)qYR%X0ppikeM7s-D{AtC5sPXO#1f6r0y6^t&DCU%?c^+5 z5MW;|)Eu;$X?Yl$W<{T z)#0A0#`2_*=Ar@KN}PA?kVS4Yk~8LtZ8}YE$}td6wxj)e@IzOw3-G3hvDHeU5BudN z(aA+M<<`ij}F);F)*a;@Yc$h&pST|uZoAWnWH^wPnG7q#E( zT#x@r+KhN|uCn0%0ss*N12&lgR*c;I+*iw$HlSg_)dvSkGf&n{>paZY@DYUBJlfC; z7(21O;=c^G`<(UBvoC`U@__a$3hU&sJy)W@U!OI1*tQgV32`x!bUuDrroQ+XskprO%lB$9BZ( z{xc+Ou;C!V{1KA6>+I`HIXa`~@mc=qvz*-N4ox~}wi;&)^sO!S>N6wQ2b!e_4li^1 zadutkT(?<{#M#Z7?&@ovGL}9)!c%Q{(4RQubg5T@^@|(zzjyv%_KIJQmO5MrbSTmt zU-mhfz8k{K=P7K2y2zxlW@`mC+;vD?>{lL3Yt#U`Odv=Jym03PH9SEYUU9T< z{MhE;(=A?J?Z?klB3;9s=;M%Oi?&24cA5u#Yu@xB>;8hhd^2YBt_OFBf06B8vZ}<) zu>8v~HnV9cfH5DrJ*Fq(NvHDHwVBS=l?tnd)79(kjD+}+hNQxog=|IP6M<;*e+{T( zzuka;uiE)zf9pW!@5NDm15YM5XaN-akN#)cU$|L*5g!A#c3>hZbwtQY5!7SAEFGIe zIW8p(NN1aASp%k+%}%9f&YP<@-wA2uuqq@|Lp=fU5#`IkC$CZ`qPDD*^54&Cq1>MvOUSM55~FAE!AO= zCA8P`x9cnIGv-#mEnD-_EP2P(%bVb%%$AttpH3{P{#}^9_`_@TQ_KOJpU zKF~8c;ke1z);Iw3WjOgx;@Z_D_NbtFgU&XUVU;c)3|L2-8c6D9l5QPMQ7udTVrKU& z`Cvm*&P6RYH2SRYvdTxc@zm^g(p>LEH zEjZdZ@3+CZ%-*^7!%W_Z7GJoC3eT&3ylN}yZursS-N~3FfYzTl{5TaGm*m8eVmHXi zl6s38;}Q&K1Z_W&9{%9t{*;-!#>E0fgHFfU+dBJ_OfJv5-$}OLW|Sb zz8iGRu{f-OzSm|y8CKBe*1wn3Q`Ajpbwc=AqeC|5 z$A8j#*0IO(^o%h+dwX3}BPeT>TS;_F2T#m?EX%PQot}ApY-l{!gv+VbIoQ18*%fj7 zdu`P6<$nV73Ai|l&wQbucU9oZ09lY=RV9KmE^x0`csCsIoOnmVJ~AIfPVIylRE2j;??Ngma9m+V1wQ-kFA8B6C5ULU zgT8Z92e2<1u@V5SP5rzRZWhD7$1^v0@SP0S<{;>@ybwYXY752o^7C>GfjEh2@{W?Gil=&lFJsX2UZh1=nGPj@GqklAko z2EA9S1n9mhe3d>tbQT*>8I@N~p6EiN6#jXGlzpv5$32N@2I9|T4wU87)x zkZ{!cK&$xSL#lS(%z=56u^Y1Y&p#wLRfs6f>x=pn5z0j0ft zFP75&o^TbdjLAr^rcX%eE;^f06P`TGD^o${ss8uFVtO3N#CcjMKV-dl{p@jj7uN>_>!AHdH&7f zwIdF(@w4%%SxqMH_)AHMi~@$h+0BTTYuPg^^l+ z_rOXE>X!i%x9qGusiSqpIcxIy1LSfY^ymeG*INfhou|=xx z?GFU6FJ+(rz{w*1+ltvZ3z&~<+07BYbJ@9@hqU@M$T%6-P)YsfDW`~ZR)Bn6fSjV1 z=lB0zXV1swMd-tZ)%Gpqz6lhVZq=|~T(`BdS<3AH=LOUL+skMF6OA+O z+_sBRs^VP~>pWHqnX2V1VI4{@Fbv<gJ6TOE# zp7E(85YEo>^NR>mKvitNyPFo_R!jX?BtJ0mVIxh$`|{vY4SXaKivnU@r17!t+T|ZR z5nu9rHW3g)%J$5`Z9MJV&8?*cL$$6(&oedRm5VR?cJ~H56%}|JDHdb%M)WBAB3`$` zCR2y&Y{#baFFBa>o`N-3hxcwc#EW@!PovKGdsPU~(v@9L9@utzjVIGjE>PSHF1vS= zg@4s(k-76k`8nwS{+xFBU2eTFCY|L?Gyep-<)GP~Zd8jStu{+F-GUIlss#6+PrPyv6oc4;Ots08GjYAX zFY0a-K=+~}OfqnYkz4GQQ)J+~gi1=cPu8xa6u|x4Qy>1R%GY)CmWO0bY8+|dT^ktZbMtiJzwqfn ze80M=V=8&%?+RuPKh~CNF%DUb>zr#ha3*!Vhg?i&)#~IhHVj#)w8Xq)SWmF_Yu7xj z-m4vq{+_s;d1YQa%dJI@jgcNXa{rC5`6JC!w`tEE;@d~hS>_kiY#&Y7P92WR2YhDr zw0xNSMey>exzsiM9)W?+M{#!OyuS#%GBccJ1l$?KiFf?vG_Xfp$sFC#Q~^Jo+oh$w zxVCx=V9H0psI+VY!bb!4gs_4h51he&4UKdxEw-rEPCs~z){;JSpQ#UTyn5h(%i{;eJFrNw?CKEQv%jPf@kW+=ghp5O&Vch83UiGCV zN#a>mm$C7e?KNY!u3;+v{SA7g%fQe2I{fmB{p{B6-@zXG`P^ms8ToR{rdy6bf&W_w znk1ZDlAhgQ+AlB`W01M{oj(p?HLhb!$^%We4#h%F4Dx7q^^qzFEBe=~{*v3?w8>lr zvsTXhvc?yO6deK0=ieS#c6;ZJL7jDQK0Ef|1a-SzYQdLFQ**orj9XX*#DRNRIa)3; zB#@}s@%U*Q9iSWZ-jj%cfp;UnBW%W4lkK7p&ZJ z``+BtofL-^kRTWmkn0r->>hJfFnXi|O@we0kdrznz~o=}f>7LE)bWqrt%dJH>}4g$ z-CGoa4SKf*#MzC%EmFR9iviDCD#rdV7*mMgPU=}){Np8-5dT16?zviaU z_^Ej5eecD`CS*-vw{iATz~)(Ral{HcZ<(Rf`9MM=U-5}L_-Nn=YvTb_EYc4ddB9GX zRWT**dvb1N=PSL!C5GXfmj40sw(CwswK)yv&)`Y*0G`6&F|?4v*kA4vBko}NzUwjB&DtSD5H#-P!bntGKVf3)2uU0u^n`TC}R`1l3) zQ(ak~lrLVokGCzG78Gp`TChAJCd)Q`LlMFJHTKBE@XYcW_hlpV`2Q%&td!Uz9HpP* zo3OHfy;gPwp$6tG9qo6jz!JJJxJgNgzvNuy^GSTV@-VC6j%)q)AI+}bH&k!-d++_T z%&e;P+wsELSp&_-SnT_#Y+x4FAu$=gJFyvxxfy~jaeUm)AogrRfIcUd1 z@*mo~BW~`r87IjI7qxN{XX;V0aJ%u=wbWhT52)9g0L=8wp$m&`6VzeoLy@u5ee@5z zKA?L(Uivw{XkNlpB}u$}VLJZbm0#oV@{#?PY!)?Jg!6v?>p$kRo`g5ejwMrE2;5L; zgmBckhC7w@sqr87Z`iCK5ttQxYBnp$&vq(nY%b4kZ<49`C9?Hu@XV!VxTQA!Q@#Gk z4s0Aq;5+C}mN@1Nun7?Bj&pe&iS0*CsZ^SscRP{aYQ1c9On@SKbYFt?95JLod;T@q z>3jP|%q73xiqAtJMy5%h7ZtdA1J(}U*scI{{k7(~BQ7S-h?>r%ip4LUHdpN zluJ`Ihx))O)hhPxTeeBk^<#VtZJETV-4GWaDA>5+%xSY*JKr=XYxdSGDm^vFU4HR- zJFNP%)HR2IRiIN3)ws+Oj*@RdGP|8!HiJ+J(9-ck5Z}V)9yO+xd@%h(GS?XAMYtX18P`J%YCrupsW&f}xjeHV}k$8kW*0 zZwA_kb<$`RT zFFY`GI>0=eWv=1f;?_wV+`2sQ2)Xrc0_7SP0G+IQEU9A>@{Rw>ogL*MF-g)vfwV4Z z^&|A%Grio=^yGkPMC=WiO$=uf3dRmaz@6tf9Fve2HHV04o>DG=beKR=wYi2=mNm{t z#XjI$Y#+U4yIV>nNQVQdLvj)xJ<~CWT!|&SREEkxC&cVmb7nR z-wse}3I=)|Kf&0L3Av>lhniy{q$-GtARmDHoLNnWI3pU2^8jeYYBCF*fQW0_Urn{1}voG50} z63y0{9z@}t3ZFr^9FbjA-wPGHs-lMcg$D_V43x<@P({ogA*@%`y7vRiWY;6{ALWVo z+fPy5tp`JQY;ubzF`TFzm0;gFspU?c_gRDErM%Pl*u0uP_MfO#m75gyvj9F?jiAUz z(LQteP5alxgwaRq`&VB!zKG<2qGhD8BvQ3f$kCI6gWSz?B}jE$T7D~|_peU?&ZEU> zwULM5LQS+Q4@&@3jyUBTgmj%Tl9y-3fG@gS!EF_1D#wRDv3gzU5>rE`a5Xibj9%@~ z57G>ZZHHJ(o_3x_l1(bM(W9v5|K6N0n+o^JzKC`PtnkgeA~%&UF-}`#+uM232epWJ z+X>8$|JgS`dh76}U4|o?ky~yWC2D?MjKgQyONPnYfIKjOacTi9M41q?L8ol{|)j%qU@b+G8!tNy?|{+vaDN^Ztx@K*@6%H<=OBnPkK z2T)|JM+ln|z|2ld_$C*nJ5$v4B`jd;XyCalU)9qePA~gz#D(@RMaUpby(tBL60}o| zz~0(qX`M9-f4@L*Z?+Fi8vJzbacHjx6lWID!8cc2F!fQS^kvF4#;&O5H@mbG-fQ1PB%6sXwQKatA+L$zN)1&`D_3sYX2 z@L;*;pOChGoi6cI^7m@`-sfvkE+%B~Mr9YJR#N0T!;UN+De(@e-m`YMv)yw|iFf{p zya|?|7ML`rxvXM4n1u<<1WoDniaCcC4fX&R?SKjk3H0i>Xk~#n9P6wU@IU0nY2Js1IV)(W(e0+2h36ccjw4etIo5!}jF4-6i?0w~NeolB^RT8(i zi3Sl1BFP`5UNzdbv*|aR{>W#cI#0Rh8Zbgbelk=50Vpwq z7Q=*uxy_cU+rKb-!3>zv#%_t=)B7}_H-xa80v1KAC!JjCP&CQx9feqyM_HEr?bI$l z$zXvB89|z9F#;KJR`^$o`T1brJj5dJmDAy-I3}90l1)w3F*3wHDkH4~bV!?|3E9>; z9Sr?N#*nYIm~Cy4e-6H2&;N(oCMC{ozEhvWT6Q1bk3VkVy^3;g6%UaFw;Hu#;?}AY z#2OWlt;#=p?fuC?LmRu1MVORWM)UClntfL#W`mH~poTcaG9NM!uR{2K4V5D$egR1L zB*c3LA_XLw#5V*sIo0qW+dv72P{|U?MgSv*(a8YHA3%!@m^r4D3Gfk4FbKr>J3$DR zoJx?>`(dhs5kY}tyDQJIoOMCHpu_2THrjLsjBc)>w#o5*EZa8eN2gB2crHo=+RYfA z+Cl&P9ltSH^NiTk2RG67vYDH7)~S3(C7%%_w^B7RhP1?*51)p%GRoQ33aQ^d8KY5Z ze<}jM=pU;_^^tF0hI?U)0q?c7O?pxZ;0a@#nrvrS$fW*VVjfBWBZD+ZJ=}-- z>_fh94fDP0l-?@-C(Z>cg%Nw}f!%$wLpgi?3M1v&gU_$FR&WPG{Pz>~DI4x(eYMjU zYgjlBM56*oQm~Q=C0LA*lY5`&B3$|xE9_5zu8Px3a$TI72^ z)Xj2C`~V~uK-I$3>wL>JcT|U4U0s~eOoKcRm}P_Xl}5r7i8aZRvDRpvDUX^;!|oJ+ z@@}#UVq3eeNL|6UZq%io;Aih1{frUOm$A)fhF?p<7h|08&w4Mnndb2_T%#t+*$H>_ z3%2*RZfWA#oCg57dbiaz=2SB-beFGT@@~ zm=R6*JXTp@FKJ4HA9M;=3F3+_t(tE@+8K~y4I+bO877&Eb0$r93o1#Oj zaA9LP*J?6DW#L^%;gx4Z$yx&mBQ+ahF(kE&y;9qSY(|bW{jKIKi|tO-nWie0*o4o7 zXI;+udIBCb zG;BAWGOI;*hnqGKIv0Om+nFO zTkn_A!lhkE5m1c&m(X5djoY>ZcaV3a#j5)dc9AY35wy+=D)j}e8;v&J#=PGPCi11E znFFs!knE7d{qOe*ByBD%#1d;RjM1bEyYn$l51ll7BiLqv>}ADl zhgr5mcN0y8Vzi{s>ylCjG-k8K_!|?u((a&%Z2F9WoTsypvFL3^B#G7u1l zi&sglZ$2$vGiDtOrri0S;tN{2e*VPUV()w5_zC0j;xR&~oW4v-Is=sbJg59I?C<~? z&=j=f#}Tt4i8JC-blI(M6&toc7PB|+I9|9aG&3AG*o_Y?#WzCx8zJI7fL;Naf682l z_AwvT5CcJmK!+R9(qs9S{f2|CNAQ>aU3AQ8+cE++!r)H&I_D0tuntG!-3u)HoavHN zc5m2r{YLwnO~`yr?z&yJU$m5~(qFTmp5F?Zwd+VD&q$pRr4JH=YipYDU4J7b zpCcqa6*>N%?$=-JB0j~Zw>I_p5bSr=sP&$jZr zPOH%3)B0}+H0Zq$ftHAcpty5?xK~)mZJ1-l-Tk&J?>#o!o1Wj~wGF)B(qwzXl8#ZM z2mwS-@c(E3=_TbgF!wxcR^cQ|nWdzJ6dP9w!y%!g7b4$LZG?{g;NI+%JjcE{LmN+e#2EN$K-T<5|NQ|1MzbP3Haz`STscKMm&k;T{&T zc^m$rmIi~-!8&3vD4h&3_2Edf%Zao#m|P0V>A#elE6xx9q%l=n`S#eK&26|QR?Tjk+&6}by{qZ0$Yi7rqLkHzHj zS_YF;#hAt&SsT(8{DIAEjqHYrGo}>_iz-mwsdB4i&?m=;tAH&<#yQk?6Orau^4DEM zi!e%rN4hp?0~EIvLLeaP&CuTQj@qvh-G7pnK@|p;=o7P1HCpbx-=F!QCj6{b$NJmlw~GF_zwV}*{aLs4>!i(@rJg>C{DMD6H7+Z% zpVa!f+@ZR~H*ky}IOPXjmo*CIiZAUX?$UiY{HHIs)s7)&{#Jo+Z|$e1^gz>ZTnJ+q zVwL_`h;h%_OR-M`QVI%Z#g5rKx}m0SzXSB>ljj4fC!<4GRQwyE|AVR~q&0fFTE{Oj zqj-CyJefv|EQSy92h`JSe-fQSAj!m9JtIn$KcWEzSc|dXd{s~N_>gLjy#DHh=+UjB zgI?5H&Ig|04$g8gTs30k3&|v>h!^d9ol(~IzZR$&(?D{L;iEdAa*3R{Slhzp-mY z>54DoLY2@y++=EJx%62rMT562`6%DMgv489GBRzuBt!MdIpyuTGR}j6{dqw$v+<0gBe|H5d?B@A^|V7D3~NVu!Uk zbx4a#=e~nx*$5JhJk|OH9YyuLPR6#OOYD!AC5rFkpYIo)7LHM3uJ2A;(X;4l3rfr;d=*-8nD4^N zMHg|2zr}wSTU#b&wI&iodx{fQudUs%vO0Els?TzBfXzG0y#FM>TNeCzrPRMzd{M}~ z_Ck+$yE^u8E4Qc87tMJD23Y5jrk9lW;xm!!f&MmrPa(WJbPEXsYmPMRNK1;yaq!R@mPNyl&w`nkt*2jP+ zMv57!vWr-{Dt=17ldXnu+@{e>cLP)MA^Gkvagg}pOo7aEU%$Ffvt)WGuiEor{9k0n zQ`d>*(f7wb6I~4}x88BTxx1#RCurfcR270pFQU3Qe8rS6dH6G};yVQlTRHoQaiKcI z@9~qWNRNu$`!}=ykJm>)f)T9Niag|~(=RpQHj#E0QA~KsPefJMI^oRyDhz_np1(*- z?F(Ks+}u#i{PWg*N|H&(q!LjSX6i#UP(H#UmIn1Q2;IfP2IERIf8iNGhfOGxqt=s( z$O*UXy}73Xomk+GeDz!rd9-cOho1lj(r}p-2I3VF%CL)Hg2>k*5KZ?X3x#@?HIH_? z`$?#PV!0k#N-HuB%scw7YWU;&l*XlH6Uljav+BF43+_byWr`Izj&dPzf(QS%Oz6`V zVXGC6Z;T%WgjtGd!|jiG_%TwPHZ`O}h0kmY`VU4kHCc zdfo&do617sYsDU)E1<*96Tl7ubeuHlhm1L9jjM21eA0BH892UK>Xe*01Ec9`(99&` znIo%pbWI`K21i%4+K-#ynFH2@Po$NdY3$3;o6t^|g>1pnirRAdRx06w@$TBgvWmXEwjbWuUjQ(0i%)hY~th_UA4DB zHKwcmm@OQ^-Zb28^pMnuU=PM7$>Sp)RmNt1S+GOvDA8 zJ#tLkei_Hp)S^tUZ32W8Qk%_Ka2txFUO?4j7CMot3hP}JtJknlO<9{VO)i`(*EI-7 zts_Q~B_K&OXior)%hDFngaDe+&bP=`7q5j8^p6X}jKQ7$TjyS3mvEBY_NgC)?bFV0 z4700PTwULXcIP^N1MHQJqTok+6s~73JE@ZBRL{KX{UHi2wNcjmJQPjf{Rt+XzhvUj z-YSfUHnV$M*g5XBymh-g-h%T)dAe&}Zg-}_JQq+cDF;yn3V}sEOO1-7Y#R@ak} z+}(1^-9VAUrrre7vncFmvjg{IJ^-fBIMgmkN%8m$V&OyRnmbnat{;^P?dNY<`l}ZG zEL(1w%0g`Z4G3_YeKOL?@;8tL0?9dEJ*QwV#G`3`mil$o2iRk zb|mU1EOwnbWJM)(Q*INiVEUu&dq3(k4Qp)Y%x^743q8|_`~Kd>&M0y6-@2-_NJC720kf+(Z9fp*d*Sd0;RMU@l!M0f;gPFcx^^gXe2pRBZ^WvB}#Se#7H$lifW>wqt0}?H-{3X zMRn^uQqkQ3cXg|~+xNHMf3U}6r|a|nyxz~(#%NQe;!mi|9!HikuO@VM=SA0wUu^?d zl_#IimVOj^3dZWD1SAEWYg;^{lQqaf^q41ioY^7WuqNe2NCuxAsIzH#tF6(4WcgZC z?sHr_q*&e|uW4su9Cz_{U#z&`0U$W*0ZJ=}h}%Ud1vH`)^G+Y$H@MGXi>|B+C8%nOdc$}vI@sQ~6W@}af) zzj1Bre{9=UEkn`wmVs3l*u#6JrldyG9)hESg>K@C^Xk*0FkGAE58l zUh_WOWho5_5Ll=chO)#sc`753ZlYL{aw|M;Gh@AMMrJ~)pMnyN;YKQE^ zzSHO)Ue#hjRha;akZbo_b(GUp=9dVI1eSa?eqsd8IDx(_^7ieCzf=>Vr3$ksC{MW` z_}-fWfO@%m{j`PLh(qo@&~IJTy=G`278>w$8hC^;FEqb;p}d0cjFmW-DWcOAP$~=S zbl2HV4Sic~9&yRbxi-g6jwkje-c+Y&nt+ddU9@T4?DAcS9Yy*ovTdZ!If20`HQ@kr zYrzUqpPcACf3U{ABqJf0(54}sq*s1@Y8X@B)aIXZs)2g3ASJ;83WHOmhqz6J=r@;=H&0f`hAG8Wd3g_n2vF3yGTHRDWt zu-bxnlUC@>%#yR$mfT=OWnVGZfL6@-^z&nEgum^k(#p7^`g>YY)`4B?4{SUGHyVRF zmsvXJkVc1vTc?$c1iVem1Z6pVeCGjd99byx6XF(Zj=d9p5gof(VY}wKZCtg-eBw;z z@9x8q1aZ_{TbdW>(LrplFtCP%X*a;B!2+;F9 zbR-hFVL0?I6;*W6Yavi@LZaDWofX1*M`Xc0irO7!l0^lLxVN*FWHc`Bd-^Ki_? zR$|Fo?=u;;(_6XkXXgqZ7TQ}&o+%GTw0KpDLL#RztLfNcq$ML4tLV_Vp-wlLPS3mx zPDreu-l?|eiuv~>`7%Lgn4@#Gq_M$&>${IN4-rbcl9Ir(@o3idY7RKg6n>;+=Bk^n z7-G8mDpRm_UU7EW9|L*Mw4HTvum(Nh7ajyf&wQ_IG$R~imV(*Uv*6IktYPG zLDsKh8#>FDFJkV*oS{$G(pmMl%@TvAvEDG+=|IQ=rNem@rWnDMBDe|x-pv@>sn)q* zyU~Y1_AD#%et+mp2XVua)^l{XMTZOy3o;tyjqlfAn+!M}pNu&w&kzH}MxDp4hmXhl zZo6DSJu?U8(4pA0*n~%cEV$N5Bf8mUddS8zL}7|+MC1N-$p3{dvp?yYi4InyeWxKx zAU27wqvnxj)HEJD?(Ia|ia!Sy_{^(A4?kMY49xCq+~7XFSi4NgoEFdld|3x> z*AGn5G$w0!x#VKm9MAH6+V2E(C?AaMBI3JjuxvF*ZzlQ>!PHko>(H^kKMVmO} zISv(c9JgkE7?W;-dq^=pqe3QY(&Df(RX-u7--q_<_dwP;we9z*)=DMJ%-I zG)D0r^JoIMos2H!K-~o&x&S=d1mcsy zIlEm}bOQM459!~F*R6|@gRL7nuAT-4&vjtWsR`H4VE7psQKyIzWki#vYZmvfjcBeV zISY+t`&XT+)a$EECt`A@p{|imkEq?rk51g~kHejQ&^w9VEJ7b0?C=#NE)eua97lgF z5l1MnN`X$Jny^7lI7TNns)@Jdm^^P{3mLn%%M{MNbaX7Vt~$~+(-d3_Vv`H>ia=ZP zJib~186yW*%F(U@?+iTlzyx}m5xOM-ZJ-7?9gkRaG-=utgwrTWz?BR*?HOi1Xs3_? zT{VEFk3ib#L6MmeqR{30&*F$LIv(b1AzQM@q1(=$sd$_u$81nMPL^Ym)r4dO;;9Kd zmdb#0E&7nEO|x*>fNJ}8;;81D#dt=NkC5>@V<0ExY9F=gK;Q*@$)!1zLkBuUv-@`E zgQgojhjybw1icHUYr}F9C$rH3xo9sV%!apMO2_rHg25&=F*RLh5sVeGh^<-RMoD^7VP%f{4BGWC~VPXs&I6tu$(yZ^hc2AqKK+OYyz8r{?0=i0|(+wmb zQWrPycrVc43mCvJ4}bJN8~~CA0MbXGWE!*Z?8r|ekV?0zQ0r#EE=Q^qYa5jWQtLS_ zz}rDc%7v2Qw&lT34zQjaR#Mt>DKQf@E~;|}lqxa5X=fnD=1h~&1OvEhQVY9RUoTF- zUR>$&;HQ@;Z>8vf&RAi0SLv;;P2C+qgOVE_gA+}vT-`gz4F1XrXO zrXVj!9WVMG07P{=UhuM@WHp)vQUG8c!(WU>h7S+CIO(QaO<(`ymG{YwAXR{Wp~O); z1mDxHnw4Nqbsb6qNXluQUUl7JGUG|LJ@<{Z}R2g??<&RdV>xz?`vHIb)kSL!>1`m-Zg9z3de& zzd0~ta&6jvwJk|WCMGE{=>p;iD>&F)ajx2XN8OH*KW}iQOm_`tCwU}kuDyh%yALVI z`2w^UW27vW93W_Kpb|q$-5$I=Yz8nlE>3;zU9b?uEOb%HUcFp! zh7Do>{Br>wqZBMDm_KCJKE1mDv+Xr8NnV%KLFfha5Ar7yawnG^CQMHcI;)7^OWJI<{;cT+w?=W$78O~Kh<2Gd$9Rf*S60Npl%+5(t_-M^vH$eyCO1X4GS-!dzquX)@saG!q^st&Iy?4tMpm9rM>mm`Z{)eEjPY{ zs&)6J#tOCZG-iZM2nYsH{DPB@FkN)~YM3ZhLq>2Zsbl!7_8}0-HspWB^S|~57XYmH zK7L@rs+Yd600_Xec6|N$)gzRuEL~)WRjJN zz?&Cf7O|$vj!e3!X585}3AR(W_7RW5_ZJv+A95Ei#cEa!+>ac)@96>ka)5Hw=iN&` z$`{o-@Ey z+V^SzVtoj`18txyQO|H>_hcl?Q(m@PpPm|@75nz?K2zGUiDQM$A~X(Sc~Kd%drV%} zY$=|FsDN4uKY*W2VT*9&RUwL>h$&o)IGVE){qDhj`eqTLM`>N8ZyQ|H_WIFv zte(ZfQcc5uc13$E{8tU0dvxR4m)E|fe|F}r!Rk&dY|uE>t{Z5JTZa`od!<8l)?0tW z0TP`R#~AM9y>bx6W6pL`2or36HHbe*L z;GJ^7On?A5%f}TUE_a*2;SiyiI84PV7*KTAe!(6w#I3(#T7kKty^L znp0x(#gB*!<+7~}WT#C;;!{c=^&UujnYXUOZoE>*-R0CgFT626s^l-zb;JTMcW10; z<{@n#`X|;M>hAQ5%;9)LZ{Ih?EqcDJf{_QqA1y?9-)%I`f+xl52jzbj_NdB2w-1h8 z`?E}E$nR}&8@SMH+nQqpF17*{U=KVBCy^s^g@oV4F)=wZ;sS&GGNaS^mVSs55F4-U zZ10BwklwO$-AS2>NvXgfn#R;8-TAT6nRN&A*2psAz`n6OGzUpNJy74&vJx81>6Ao)KlTCrO7W{x*t z7JHX^_U*T?R0r?34-$-4Ivma!O>Lz-V7`GqmsTfR|GS=rmQE@$o|D>e)hvYBW2@hy3Bxl4;pE2v zd47wmn~3(5v2#_+K32d42z8$9(@e0=MM^kITwG}y)>nz(Tc`hs)mEcC?ia`MsR}%# z#K7cNmPc}hw0TDUzLpacwtpfUp0$F(IRK_3`zYC$M`s#)Q8)0a}6zc+i?5ufO8l!6v??!+N;c zPJR}=A1XyUj>}q6e{MUNjZnN`YXH z@rf{GYf{~hamTY|-V7UJ%p|A_T+j(?8Y7iY&P(0fgnM><5@A>NOBuWixS*HksU_O2 z(Y6^pF%@bY{RJ#OBSio%VDCTnbY=9czs0X!j_irU8aqIExBfIl-~m%X2=>7#mG&tH zGP|$9cr%~j%{x{Yo&BvH|IyKQ{o1;%mxJa$u}3dHIZVviu-5q{e3iC6tI={G#R$LE zOu`(=+rQypP=bTEa}UYlBOK>(Kk~Zh>KDt-?OCB&e%SrBPSFoHEIW8r@M!;gr|1zE z@(i;Ti)m+5ZPZIUZ#$?{u7x=wIG^>vhAO=lM-*LbGdB+jPK%PiN3q??pF~=nn4# z*w_F8xi0x|`);g~X%kJC2`8>ZXk& z$SCbG5*@|srC+6|wJ%%K@E8#5<;#U;6E@_MVvZFwBa7ps4QQR~5M~A?XNkMxcqOcD zk&fQ@j8UfV59`kN=?b9?%|%|(MPs@8i&w|~S8MwvH_yg|w@(H_=Revj1OKOOh<;j( z@$lyw>BT;7c}-BLRk_7CJ@^l=@8VHH$XY2o(*u^igOB|YA1pV*En_EsZA%=B2=*CV1`R-6pbbYG~GV;20jrS*`@hBxHtL|ded?+EPGc2i?p6O^__WEHKu z1LH1d>#qhNy>AK}%f|(0GyTMeI3tNwW7et5V#z|R8gF{T_m7dl!Fx8F1}|WdX5D)? z4wDb;pQ|7;z3gExw|Y99Uxsh{wvhlKKt&gc3G@0%)MZl>MA%i7h@kL%8m^3;zoli+;iK8``Xa(w8 zW56;%df_9Ar1kq`fH4guE2|AOVrv1YqYyf1L;=MTUW3q4Cdtl%{x=|A)?XIa2~zVA ze4D&Jw<)^?&Y3mFy*!~Sgf}fY(*##QqlhjOy|mcNO&PwFCHjaLy4wof_~PiqizP2a zx{71&BSF_wEYF^1o1al3J?s~@!R6_f9)|@#b-~#Bm^yW|J==e4aK2%G^>$8Zjk`Z{ zVOR^trBo8x1{F&dEpCI{OV58@fB9>T&konHovt$D=`LeImtZk8GAJl@jn=7f9O?1g=L-5%Q z600MPM!cfi{4APncc6v3YOft{gN9DCpIv9K_wgk8!bU-g7JV;g7JTifBIQLcyOfcBP7cNDcW(T|~qwicJ$ zPg*}CfaK|8c5Tuvr*GQfX-}r?X)lAWy9}-{0M(86R^%~BSxaQjw}!*Kd1`4K&GcF0 ztq1bpJr@T<$CuQM57rEx3r&O;wMnwtup9c-8%FurZCLkD-HiiIUk@?vzFf3JqVMzj zZsNiFJ`9jPiiRge-ks8f(RoQiqSy=X<~Ax>t(H9fi{;6Jf?2*n0Lpq!_)IBupxe{? zjRO0{i|EoIMC4J7=X*+nCKcIp6i@zzjDyYn7b=c0ToYebV8>h%pquo70WA^m<(sQd zmfzNC+*UI9_Z;%%!ZL%zV?LeIn0`qV&8_H>pWBOTJ5DcEi;Z-FA<>JaVjVsZee0}P zvTRhkJB7r!Zx?-K$-)Y;V~`VW!NR{jqC<-%SsmD-MBH{o`SSnDwN6{s08s=sRvs%kIjEz)=DuHLR)sf0j1Gi@@B4rTY0`)2RqyMb zkpv=+zcibrp5SydEEZvRpzFq$)S11B{ZAa(i7nDv)}L`Zv)=r8g`1Nu{JBM#BOOQA#DM4N)o`+{>(4r(D`Q+?dDO#3=5 z$wCGK-R#oDO`{tMvEy7xrMZ%$d}yi;&|=5NdNE1O+QcGH)Eu|hK_J>!DGHK_f=9(c z(`oEcF-(@&>^0{VL%ivyx1BKAlZ{#h&<#c}%?&F1jp0lqI&EOk=cA?#;Irp|53MXl zTU|DJm{+9Ro z>RY7e_1DEAxsna?^6gpAPe@ls)@St9E>GIgHU3tZ-1j_kp#6Qp?RR#s=Gw&^EuNrs zP3gp3UHLABj9Er=Z!9c}lS!EUpgm1!PD49R)7dn!7Hs}$D27=pc(Bl1fTB)=gguF8>@?%eLCk#2#ph%X2*YUAkxQ@%(Ynla+I;@u=&_-C2tuE{7!%~i5*Re23dM~p(%aobv zag+NPSVaUNnU9!8U)wDXhCcfuvYH%sw*ntB*0^;@6B51b`Y$38 z50R&i`lG1ywjjxm#L`QE?6&jS(+?+BJ~qn%Kb-v>u=aKRj@Nsxp4I+*c;fFq7)=_I zRejCOQS1?O6C{yYxb14(p%YVwj!ZP(&OUN3gA4ARzMg#8Wqo7VDI|{c`BY9!6jGs0=)^>F+Vb$AWb}=EUW` z(mhO;#ACDr_zgMHBrEnGvUkUD)tHsYX0BY4J6aZ|K~dD=F!FHfXjus;Vfjl^6;Ffv zr3rtT@v=}Wfo$}n$v3^g_b`Lrzpvi>RiRK5a>nWvvNI3@gjH?n{r3VSLlW;gxY&U>Nz4bH&ffv-JgyC$-MD{8ox|qY zz&mSRl_im7W^yaEvp{4=`l5LnU-Nu+`s}&7O7xd}s37ahKMVej^c}X)WGB6w^4YcU zXEZgu#_97Br-&Cm_Mg+tI)p(TJSQIN!6%esgJ_NR1Fuc@PorUlIV%l9Yc#nYZy#dOAP2y&q_KHS?K$x__0~ePU zHet4ri=Qct7l|^SNK&SyuEnYag>A?^Xe%?qhrfV?2eIjNoyk!{`mCMfk`H}LUtInC zV(n|whSyWK^oxhkTmDQsZ285pc7O0G_x(%m&eyMR4{ei_(Es-ObL$)i)%OobSM1q! zQrE|1Zt<6?QNJ=(9F}_vYb}mbmKi@66ZdgL1R{6;af^mZ zvIZ5V0IwVq8ux=QdVznl5L5md6aZ4I5gMykM{Bt<4@}*xAY|{1pZIx5QPE%Hs(s%E z0)&a7AWnY!w)o5@XVLuuNsL<5dIN(=5T7<-zIucTOugu3{A|Hh?enkVwDT;9l~47b zo%mj~dGqUQyYdm95QQO;oY#IbedqeH)-mn~EScjEm{)Fo-K)Z{vug_6q4`!Z^5NmJ ztl3{PZ?A2(AFOx1g*QM!GJOl*`3=A5qLvd6j`*j4gDQLamNt5(lkJnyIC-FyC6L+S zYii*W`FjTHmK`BYM#V)#&lqFk6)5=X) zkD%U|g>%UMr6Gjn@$*vHQcBfZ2ncZu%sVoku~?w#-BzrrR@lgEInhqlKI36nYeX5Y z8``lPht(@3AHxf|vppDHjg;@SC2Fe>V>6bGt{vA4#|q@I}|g< z3gLc(RC4=PCKGG5^+{$qyJ)PUTnLX7sOyQ&H<4~q34D=k^w*USYV)Yxb=48(@5^U|c?Ub{CjmFb`X3%8m1CE=@OwxBi2@a_fCJb=<(Bo& z(1fgKaD#kHebQRNVuz%41`IM{-KtPR0daUb%1O$dP9%hFQuLZ$&VDrO(D*W6$FmnI z7<4$OsfsWS&tbi7iHX>TDjW_c)@?bIf8g8JVPSxrh*BZlC;fUo2Yh5aRqytc)r<{0 zf9_z^@OH^-Nj`I}&>7*b>6voNO%GMls5{TOT)n`tvu9pM7)iqnQ{fsh=0*L5KFT7C z5D`VrKSpvI;$K?g)S~Dww?aNWlG3Z8i#Tg!It_1{4pU^-r4sFEHG?#d>c_97)#zB? zU+@o1SiXu3Ldk0n9`8x!kn;{u1WBe1X>m_s)GnS_sQY;0f{OZhw3>}3zkHoIkewBm z>TozK_T}K=($!6-OQxfQ`FQ~)7t@^J143AuR+*jlGV;LxTH|&FtZ!q*KQKF9EP9%Z zsh;b1w9D!BhnJ^IyyQc`FS|TCx_XFHr*BwE)&%LwMZ4 z#$K*2Cz85N{pf3c&@})tLxJDwB{ntAz8%G?sSaEdFR&n=q`(6#&Tc)TBFysmvxltx z!>ik{wx`-20D5luW!p6)=yMP5hWs)};YD=IvJzPvO*1O&UB}8p=Z5nO@L^&HZg=CS z&ys+J&2MhYWgdJbeuQN;P>pZSr&{;p%V(zY;4ZD-IvrP`UM1VT=1HOaxSD=`9CA#dfX?bNw~avc?}^x^FDC8XBscWycD>ICwIk(*+kjEAEv!Fl(g7~+h422Kkqx*ZPws+b&II;ra)NqY{}}PK}so4 z9wpN#E{!Xa0(kX=zfPQ4<2$A!Rfpkg1=tO*QM3h@PPpymL)?6t7aP0xW>zB~%S#B9otEjS-N>7S^8=7*+1a1(KGnrw>P6;f+GdG|? zZIIG6X?_{+@EKO6aCM>UmG-Gp1vK*L=;Nqouc>oO*KeMy^d=R!utzl<6e{pygV z428ifA1G{YKOfxmEUm#i>4dm2j&t?;6V=^r;dLKByq01!`~zPWIBuX{oWs$*(uSe5 z$<5Y(eqmxlra0~ zbm1Q3cm)VErDQYyMb|Ees0=Pb%3z&jW!?c%Sz4(Ogg}|z${Y^`?`Ygt%;F=1x|QiqdMvo3PmlO_o*gW`lA`S%x0>6dUE}M;U*+#k zBlq-JC5Vl+Gf3|y6;;authi~byOP)Aa-F=}eR1sPI5}s52mp2OXy@OqLQ)E!qZa_9 zJrFvCdjJ^TsXNb<+nN~Bz4TgV#INE=_bHd+t}Rv{bPd)l(W3#ThHCNRn>HZ7Qk$wc zXzzPeKs-JNEg*YJmz8o%u4|I8?&_Cc7SnGn(Di6V?qq4n`ID7*tR0s)or*Q6YYXP% zm#?ariduR__-mtPxg~!6A~?(n<$k3-_V9YNqpab6&W4!Tg#C&0-j^g>-=eH2d15!o zxN$d99$1=I=0&|>e26Z2l1jzcGbGeyPFxrhD+{7?$YlW6=1z^UKuSm3_V-+MQiA&e z3C-MbjuVgu8fenY`V<6LJ&7JQkblu)XEm0j?Sb{px9g%7wRU;w~T0EB3tDf$zbU& z!p0i3heSyAVTv!LJgSWDcs!`Aa;RdzN)!9c<5BOq9ER2lyR6h2Nq8B8pG2-5q}swL zT`K&At0bvcQV`{8K91;PGBXL<=Ym+4kTa^3rviHMB1=*#;A4RXkj?DHcVM>NWnjt^ z=oxzi#FwENN=#617-$x@iX zcjt=djwk<2;)rtDzq^(*Vq{)jZ&LC=6CMgr6FTO04|bAPfnrC7xv!wdM_|ry73h@; z4Sm=qK5RqBN1jH+? zUaHdMs{pd883snT#p-cYIDZ)6n$n1Cwi*CnBA{EP1o#NRSK1iCuMJ?qZ?6E@rqRrvd+p^E6-RZn;%+xDrN=UJG=Wp5tssT)=$ zzq)_uA{pdItJm8u`$wUz4U#qp;+o{mX`K&`vCGZosSx=MAs()~>X0+Ec4K{an7`5Y zl@M16nkOC|`p;(9Z*Z3@C-Oe}Ir-J%h78*T&f-d=UlnpMLf@Ib)xZ0cgm>-HqbjQj z(O(QuYjWSmhq3TzmPBM$in4W0(vR(Ftl~@|;Z|BS>N((~rQI0aT0PFGmu)5J@zrz( zm2fgoOa2JPdLmaRuqNV0^NZV-8OAK?n>suX;om3n9g( zJYz|gG;%oG;~ATV2fh2?OC)JDx$mwSrc9%ZBV+h7p>>p)Q3^8nVIGgW9ki;VOytx6Q1n8WGc~VQ>tmxk z(H*KmnZ~=&tI%3*a3KRl(1>gr+^h?MbHfOY#=%XiOsTL*AxO|xdBGamB%&u18Zw1i zVigaGtf~;JDj~jhKSh0v#sm#}L5q*I711hPM%t?0ikqSpX^%Ig{f>`04*xM9k7+=D zyCxHW9OXHn^f0UoaoqVmmN$nF+-8##Grm{1yGo;f?3l1+gav+EER6cJnA_vd?TK96 z&ERo7_SBiQy*XW-^G5Ev<#}(Mt0j3-sLgB}cJ$jcp_s*-!(#`jc#d5!+b*NWM>g2I zX9-oi8?1F;tH4)fpwYW$%qli^n(fX&8Et%O!$6tNOF#k>*Z6sea9N3wAbLEM3E#m) zt~8wl2ojaYM*v;Grai`+Ou{qV?RiFX=ZZES64A9ixAAQh zzDf(?5~^Ta+upk+@#}4K=hHUHL`Mcgq=-)I3!DUT9n3FS@MqDx@KyLbHLj7IEvZ;0 z&ZBY6BMg(eR*9ecOwB^DBoyk4YQ;Agl>s;L2}Ej?!D^+-^-ww-Nlf*giJdYGo0*F6 z)JdEn2t1wmV!HavCyr!OWj1Z~YIJp;TQnc@C@cgzzl3@|KN)4`d(5)a>>F14ntMLR z0<)|Xw2tc0)dJM<@0oN_Vq)4)Vz(6{^<^Bqa&Wve>f$mE%GnrYYat4;?XHq&>z5QX zIfyAlFg`E$`lM|cy@>Hyf#+!l!j$T8VC$9xZYedEnkH$E@NtH<_5%MVL zbY>CwkwZ_6cH1c$q|EwEqb?ofIHZ`7VH$Q({dJU^JlV^bM`G!}rlQ$;fqL%(ssFYl30n0E|aK>j5r3#?K z-6VChL(98H(>rwzi0R@2EDRShYwIE|q^B0Bcr&G;GdfL5o%kuz9GfwpA*TOUT7}dt z>$@B7E~kye<^D3k9P9}g6`%jq6JlO8>+p5OqZzX6t2K8g3U=mjR>)!iHWlu+;tK8T zx@l(|oxkWXJ}t443~GNCch?S*+I;Z6xrQ|NtvdO7RKRdOLgRI74@P$lDs)7!+lcSh zmB{|NgIh5ZCPvrt|5v!xVC$D+i>-w3ZX;K>j)hO6%J8|@4ETWU^1FnWTG2Y~i#oC~ zQ(5vS!l~=R@-8jt`Dgpv(|)*Q3@>jcn6UH3kDbeUta`t!yu>agZHve^JF8jbw$ccL zIUGD87MonP=XL(vvs@JX!P3sh`a|BE207^)MD9?(sN?unq2`Yq>~>;^t27>Dtut9o7QxF0A2HUm47C{^=kL}ck zWBdk>Gs{d5ZSsE;lk6CwsPL}83Y_}p_xbe|1jjDz9j1w+kKGm8{#G#>#6c66LJmA{ z^|rNsJY{jl+B^Egntzbj{u9R27XM+=S> z73CDfaj!*LRRKkQpgTXzZ<1rBAkycA4B5O;=h6`L7eE^o-qj^=2&B3eC64wpAOL>y z{Ng2($um}@XAe*9$6YC0)MdY*IXgLf**~3htrY8T$1Qou{ZI3yb-ULO*eXg)%nI`* zX}_gCR#}y%;|q_Zv=d(LRrXwQXm>Ad2S`l=+lVMb#_we_HbdV}2(3GJ#RdhA_5=rQ zYya=ksWi6Twjgs&_Fwx-kKPW+Xe8fT-(ndg0wf5|0;C{^jV;P?g+gwl$Tv~!?Vo>X z`MaYgf%wO;EstZohi%10SxeY{pHTEqA8be*?Z@qG005RM6}sOsN@6GqQxz?8*8EUOJjU9%S8qOcSOmtH*HjiH@r=xsK39P(ku zHL(v>)6*)5W=m=jwLy0U#W9A#W7lMq89K%av~7ORTdbCDyE&yQd$8Y+{rcM{0xnA( zlKbUb#or(H9gBQF`s~Jm^VXp||7}aSvYz0Q5R&+LbOr_C^i2KJHFU~b*XoX}d#BuX z_h6E`Hm{s(yM(L3=!Dm2j28~ppG{uBw@MK=aPVsO_F8y&xiguOy&aYSU^o95x9s{v zMJ*2Rp3{jJdR^IQV(Kp3nV73ofMs}s2Ums&!*y`F6yG+oO0`KtE)ArUe|OGk2E{Lq zyb@uEFRrY6adn>DjQpiz>dZ~^8X$l=XnT4mT_|J|bgkkAWA<3U1ArH;{$ zqj-W1V_(FtHj$yIAKe~mmegUun^kT&6w%O7vFFexBFFLYP9m|*U&A6E3pH+r4sDq) zadti(-@4dk$(*L;^ulgHB)h}DlCQxJOlY2Mp z4S1c{dG@M?F?v?J0K0)M;vBRSs5!XIAt7<6i(D#kK0DO)Othbh2_l!QqoSP2N?)P& z#K%XCJM)`s=Pp;qcr#;BvQ8_ILpp8;_EUg;o9=_I8x5Po0-;$d3oZR@@|G-GMZG+EnVk z=k^|UcNB$QbQIy^a*vXVFSwN~$h1m9SWULGO8OS=a8X{7+{(t4^ptrQ&d}nh$D$(D zY!jjoZ`pYUW5NfCUWm}5%7$RY@22&tr5;Qhbn$eUCbqIt9uq+fZE`Y7NTer%)LO1R zW2&DQMXrJByQw8}y{u>(yo>)^s$ACV)0!u4Y#`@~PFIxS*L7}qCCu9^wuoo=E%ea# z`oh5O6po3>qYHzw$y$>e=Q_l8ujiTW3uWE^?NwALsbX3qKg{92x+0}br9JUu7a(1M znR@`7EsaenJBhYbBiJpUS}p%pN=PyoS|a9ZF9IYm9;JVSee^e}j(2#+SN?cl%+K7b z?ZiX?Z7rL^X(LYh@S+nG*_)lVe4*K-fV5HpV8;{DR+s6q07NE2Oy%Cw4Cp}7w{Ekl)596n191?C1iy_`^?sXECeH&dH zn(M!?W0LL)S1y^%K)5|-S39J^wvI3k04b6haNr|b~BVx+$a6@x# zBK%~vmhTbGQ{_y^o?;;T*Iu-*MT=~97O>6dRL4vJw#{8W9{yB>6$&_*s)*DVm#J}o zMB?+J2Y3?^!zkb5QlMRI75hqO8?AS_P7A-kx_rV-!Pde52TpMpqX^AH z_}Ummy01Yk(AKM+G6wNY!^2?Ii}N|z9EacZElUO6h6?&e2Szg?ssW5&fQXjD>5|^L zaF2*))cLVRX3voK-X(5lvo3U;%N6vDXLahGEZu$HDY=>q=$Lf&(khmudZxF#tzldj z@D>yuyZUw(ZB?$pH(>HzL@$X+C({TqFtq>gT(pzimRQt}*|5l_!tJfC!3{;39tVD7 z+;dxF2(CjQMx<&nON8gOEU40r=-`}0t zHySZhdFgQC^^wCD@{V5JlWFnH3?9?lIIA>hcw8y42%+_i{y!coi>u1h4{qx@*g_`^ zhS?g_FO|@e%^wl*hX}TvOd|-y|61jP*4o2{`MHtrIU7qO3Y1&Upqt*n`9DVf1BPMN z$af(;G5X&!=v-Lpynh;Gr_fG!)CkCpu7lX~_Ri?U9r&9neG?f-bak3F=3n%(Q3z>G zX?x80%h^jC*8DxI0NDa@^zr@(U3XIS*}LC$SN+LfS+QeJQQ?MOrS$v>j9AQ=w?W4dC&<#l^j5gU&eT3wwW-2!#(N=}sMAu)d!KE(I z*MgcPkSrP?^tNJi0B9C4z&7i&X|O|d{r+CUEFHIApg&93TaSePXjxky_BQ9#!}sgD z?tch)y)Lxy^z}0xZxwHe?LGXpcIbYX(qV(+*#ipwUmBO??howFe8qiH{QLC&zsZ6J zYXQt!gz!V7pW&qUe-xd0Tul4_$IqE_W^IpNLP+!4BQyWQXU{o8-@=q&H^{=8n#r^y(D?MU1{ zD`Lg+5bLw4?J1qJ`b9Yj#*l~cTy|`4l?T5!Y}Wb&%Z3= zd~oH2x^b37kpg(6bRK0wsoRxGCynX(^KnKPPI}UN*K~<4*yJMAOaaIwb_=l5{o;@c zgT@CPkhTeA7(>_~2kEAMSMeKp$jJJgW_DMsovb{EO+W0Nw(gsD)%Z1jf(E00A9)Xr z=&GB~pjpi=u1cLR>)~O=F6Dl}1mJ1TY-FNXdw=D#_5Z~4h_4W0G(g&}v1L1gQ(9^S z;SU&urbH(8_UyA`$hNtkyxRlB0sp-#|IZ+Wt!yw?gP&0cz1c_SEtRiUb)n?Jj4$n< zUq|Qk#a^^udJ%lRj~H|@S*c=LSTSn>`#gyuFGYtZF-+blw{PT}J7nuhaKWe`)tV zrghzrdDV{!2kaxxoV}Wv>BH$m2kwtsoCXQ#snOJ3j{{F>Hdy{^bAR&*($#tMu}vfq zK$;a=JuJELQe-_&`D3WW+Nwd<5j5Qnd@2z=`Ens3ov<=nDn5IXHKSxVcAGrwX1hgw zE{gfA1YFI8`h4c|tj(NF?!F(g9iE4hW+#19HM8W8XhD{01J1K0jM4F}Envm0Z>T-r z%K^GcAxfRf3s8^}YUNb~@t}1)$YLYuFc6VzwQLyRYzH}08d9a`?gW07UKC~q5}LWM zr`=z>fAL$#@-Z1P^B8V!+Hh4wgR*GT&sqYm?@sUBz1SJ0GsjE`5@5CkJUs6g+!{5d z%bO+J-~?3{DlrWsu2Z3rJYtE6Ws>s2S;<5}=2=~~GvD-dkg2GG(`?1e2M#BG^O~FH zO${yfmh0MEnK-t zicA9VF)ZOb2@zPyB4)?*f&?@=joqtk-wbx7tDs!GsJt5OeQ9fGKOYE>DVnj@W_K>?-2UpizQ?S3gLiuPA1_;W08PHzoe5Pu zx>^7ydemq_4BAaC85NbO!*MEeja=ndX34Z#@+ueybc3dEK+)G%6uB3h@#%qZADwc(W z%+}mSt(eUyD=jKu3Nx{9cKU`|74X6eFv`Tega=+LA3t(tB{oMbSM?|%>2WdzseMyI0zKm}e(V*MW#1Tjk z@tApM%HtcGIeRgk{INM3B!(R+rq+Bi4ct^NMofAy=kkE65H>nCOU}!iqBJ(WoAk@u z`ChMU$@$tIl;h=6a%7k5+Q!38c6i%(3>9f1?d=aIp>Pg;4aiix% z_QCLf|C(_9nsei=Q+WL51#T(-u_LY_(ju2MAZ<^L!y8C>s;bBYn;mxv$e+VNsY%>C!tTu6WufG z^J|lQYS-qbZ0M}Lez^J72rE-X2+>e3sSJF_MDF-%od?(QkhHhRB8h2LJg0uz;D}H8 z#%?`z0(n}4yy6jSl-rrhE@x>Pvovd;MMvkoHZNZ1x1E-q{yKg2$MnWCW^Ox*evJyX zqrRQ*W7GQyI4igjSMy>~H&H51={9vVurS+O`{WRdAh6)Dz781;ZY_v1&2_^>??y`&v6(zrvoE+qBL4Jo?mA~ zSaJNj^*klrd(~Y0*zlshNxee8>39g$)($h$z!MlbUIQl^ z5!nXB`f6gPGE|0cjPFZ%mxs7$^;CMeS{Y$Kke@XbH+;Bhlyg4oVbZoO=F8Z2lG$(b zi&4^W$92gSc+#xor~lasUlOKb-r&KB?+pkgMkU=)aJQbn2+9^AA)xLWQ;yLKz1~WM zd$fC#F1?3GM^jih)LhJG_w!&Nh2uhlv^*rSpA?UErgwRF?C};q@_n^;@r?uA{T=9_a6E^im6e{!Da-igA&oSx@b z{&HU{N_Q5K>l<{NP;fjD@WiP2`r=LGrbuJioX)U^%f;_AS2bC6y?s&hA@TFX%yox~ zl)wGC)EhhL7NQl(d(gwBA67*yW>T~~`d*$+P>Rt$t35N_hJz}D*%sS0bWE>bWUxN! zp~q2!jTa3%RLFO9FVHZD-+gln%0j!T*}U^MW8^H=vALT5m>2st?7VV1aGzoBj@<^C zE_b8FuWP1G?El|IAh4I{s^W62M`<*x?qkM7~G{)2zl zYmEQX7}gN!7B^PMZ@KtvM-o8k718$sj3RJP+AVX+iC6bUP>=?nDUh_UdI;ZzJoaGq z|MV7Faq_z<0%ZhkjF0DC%`@r#A-E^rMC!mw!{$d1&W>dTdcW)R?%Kw$J2CajKI&?y za)tjdb1Xgg1?Y~kTCGgQ8Xp=;#xm*6*vzffROAbxc?vXA!b~0^DB$^P%N37xA+Cz7 zy@D-O!Vv_vMGB6_N4Mwfc`_byD*8nD(hadENc(p$w3r;{UnC!~$UfFq@KBa|Ykt;j z?A4NN^MyNpMxUN&xp4f>A%1C>sJju~!}8it)RuA3{_Zi)wbN-!$mM#%&Y+V^b^AT7(-m5@t14>m2 z@&gOr2@C4>OD&-~It+qOx<-j(0t{V(I>ocy-ESyN7vVntgl}Y><8v466M_M<+lKLp zJ~@(H`9lVtNJaQJQ*}^hVH+1!72B__jc?`J3oUf@Z&27ATYedtg#_gTAYt85hvkyi zvG-vzw!`T|ysznqaq{)SlfUf{svXxitr;*;-pX)e6G^d(M_B_SQ7YC{&4C1x%%DF?lTH9x}?^M=5Q{ny~O1F z=!3zzM=&pxKJ{;gScX~#2$GBz|fbQo{Us>xcvasR*e z6t5(@8nfKyKdUC_Fw+oU=;AGwi%2ch+w>-?+-rXN z)kM9*`tDHI{Ba=6Nm})4$n*aJv(K)De^+0){;MSq$)bPlw|-FHGrahA^GM{PyWVqP zUERk)Db(GouEj(yTsttgG~~W*c8r-i?Um(x&Vl~&bGgG*m%h<8{Uki}GjM%D(IgHA zxN28!Vml5HETlmVK}PE(#)&E(RJ}HQ)rREFPcPP(uLwD^c4XD# zch!pU9h;9d_jS0#6>rr+7DkA;+olju@Q4HoQj8XJqspC{_p;wIYDwY%#>6)*%#q?v zE?df4nZ+9khXRKhQZPw39=S41=UHJ|qF|g5`f#A%a#l=cz8qtkrpPF}h4a}{ShhpN zDuF$}lF$&8@Gr&eH`07Pr5Q8Ou2gk7UOAWPpB-4aC$W?_QpFlYX+=N)Wv_x7b?McX zw)9}GPY#SfcBo&BgUp88FSxRAuFm`#G}k|e6f}4I+1=n~ed8Cypcve^G zhJWRxx2olCkmA3gaqcbfK;WN^Wvn{)M%K8JTBPkJhqMtr@Yr0vpsyUaOVQF*RGCqq zRWnw@ZA?MxC~W6U-Et2>Beky^vY>mDoYhL? zSW7Q>`1T5E$%|_rZl`a#{&AqhO@DxI)oN9$gQ%EkHgpa;b}X9PQ1t)h=_X;hgDuL` z!SDW7=VG5ADZ4@?G5yRfkNWOTyjUel>9nTvK#%pwC84 z5s*h?o6mJueiwKMo<#iR_wZBv#`ZZ6DI$@VXT{PCGm-gI>>h}wgA8XL22wOEMZTP2 zqBe}?@`335{oc(Aj4o1K$Ay`5YO4Tt~Ki&d-{g8XI58RrLGMMGc zQa8SxICYLaz*}Y5p7NG=DXqB^pcJ*q82$K*e)=Y7y%9VwLQ)YmF}A*$2M2Gvv+5<= zdTy6~@Z1gs#XksSXPNbI2DqfZ$A709*qYYd?8PW)v}lEU+78yz80C$Ur5g|5U3PBE z$@ycp(kBNqvR>?v7^h(*4;1U8C@=uVK&TWT7-@j8g?N@Vel*u~l19|263Oyr#LEp%5&Vtcd ztNjC~do5`M_usytuo>s)(Z$rc%u{cB>f;5!RYFlWf^!cReZ&D}Z8B1mh_-sK^<6nn z{%Fxx@;_G`LQj2z4Hq{=x&4yTOMxIZ3ep5Yuk<$l@UhMYCJf#vNqrmini)I6px?XR zDlRRH{9{MV9qhsm32Ye^wPnj_=b;BofM9!*U&itj0n~IJVc#u0u7Uc6M}q2Qk0|Gx zC-rjFA!f*otUp#0yQ38wjS-sb&+x~jj&~fJ-PUAQ8JRy;PI(RKW?xzH+GN|4ju+<+ zq~Y2-u+0L?LCR1)S>~1 zp0$HUJVInQ&*|g71}K6;YgX12>})@KLa3DvON0UF7nEo>&ZBy_fqL>=a-Y@_l?Mg< zar)ur$5qO=L+d>UZ^nN6V6~jk7Up;NspmkSdY$(`D#-kZQ^QAo|FCou@1do*tI12l z8Wyx!#vdlHAG$Yt26u7&hh>GgVq~#4#X(IW>yo3fkP0p>LQQ#wWj)AaJ{2*2*D=@w z+YIt0PehXUV&>}(CU$GSc$D&B_aoEvOT~}UrT)af(j1Km;ET&RQ(%A z`2#fGEVD!iz6x@Y8qrxnJC1BDRa2Vf6d(7+(>K@6UVFQ0BS=f-QNEstw#J}v713G* zri>Yq-I9E@#D(f41-zxCB>j=wPLErmj{{Bpb|-lT$4p!{`twJ!A>N*qL$E5hZ~)kj zW6q*MPzg?7rbv9&kQ*2yyof^tP^zY!;*gyI>LvxS3ENe{r4(=}6^guJHMYKo=~u!u z9c4CkO2$RZ_sVj;PD!f(oz$`l53;7X`HQAfOWunqjc z+zfmu)nhAVp?+!fSRq)dwa(Eh4+ya-35f)+OVlE-un3$s_#c~PZT+%Wg zF;+D&7txHI93FQYv4S9BGnR;csSiiXB40qiB_$Pp z7@OAB#5{YOBqAiA{IHXgk(B))W=nwJk=p?V4{%XJ<%hytxR4i5*ZNe^I2kA=mD-d! z)l~w`3I?|>*QKEBDUz2XqW%7k?yQB^R+DoSd3BD&NUnDgxq5<|t*NUD`B@c7t^U-; zyy#PXIlj7hzI3rYwMHRLDucenakQkB2M<{(o*X~oRWL`r*G6jPV^w41Zp}ua%;gkc zWucYAH&_fWzXS$rKofCLDC))g0oFB8?qa7ig#QWSx5)X0Cgh=M7}r~*W}Py|Yp zBIF3ABO>=?Bph~pZ6d9T+1MC*An)-1^dnyvdn{hLTVoS5n@@ptF=&U7s=(s>vC5+H zT!yL$MDcfv+1xr{9eIO(oBgL4P^JubR>4y(jAkS&PUOw63%4*h`Ch^Nq+m{nS~PVn zaAV83Q+3P-R%LN^9QF85A$7lq3ce>MQQ;|L=tg>)0Pm-{pR!?t?m)r6&9WnQ)_(EO zU=wVB5Nt$nhLYNdBnsL#tM1!m4rByV6AO*R(qIWU`ILV0h}o0(mFAn z?Mle6jU1%}b1<9OOIK)33_QambGs%8%nD@;7*1;HZ*us_ulLh)m+B3{M?(Z!` z86z09UqW!e2#yFy`hSSWED&-cbf|zp27uiL%~UQ!p{_*K1ga9eZ%?Euk*5gRrI@gg zOOz>_Sx3lk4)8xJj(tL^qY5s5IMrfs=2&|=D<*8u7vEOtVV%hgYM!D!xvib2Zm;;* zI$d@=9JD@pa<5sN?B6EY{C_|*3|fp*!g=8KIud!UPCZKNP*6jKu#E<;5YrS&Lh~9? z>;}>MVlZw;vat}tiK$Ioni8NaRg!V`t2ZEzz|L+>ORvne(=T&-O0=(=+Wkbfvqj=i zM>0O`hEv}%mDaNQ0+A7W^;7$pe`GmuTFAl)mjC3jnYH=C5D{+&NY2;V1Ju=fXxEcJWV>kyf4w6-dY&Ji!fE~i zx~bspViG>o9>H0T2y6QUGXWzt2uT*2wqY^NGZXSc3o#LRSn@}Ymj< zm(5jR$7M2(bN1#LH;)+=m@agCt58w9M-QE2h8-9Sjqwv4VT&9x@=bwI#%eCG!v{pW ziX45hVJ-RdbBbe_IR%9))aLY9vcC`{!PH&&|1W{CNd-~G1ge4%DO^G2Qlv@(Ja0&ZLP_-Th*T+^>aM3M97>`2RCp+`Jp?w6r3h@SxDb*#`>b6CMb!A71W}H=7#`< zz_HV;;Fmle(LfOt7+!X{6xo_{#38eIv$}Qmx$rDLVc5+I){~{ z%S!6a%W$xQ+@jQ3kB}Yh7|lXjA`e{JIADbk47FS1mq3w~l>G?BPa`V;$Pt>H0__Nnds{yD2^3X!sqdluu5)>5j>J_vq zh3Bf3<}UvDKG6w$Svv9+DE;kb+D^j39a$&E%wZnUu#H@$y<1elEZRA??c)>UFHg3H zw;10Uo^aV?xu9<%gLOsJ{_E_XLdDwcBJx}dQXW9MqtkzPgZk*dPmkYiRer8sq=aY{ zkROjfn|=unMQMUN)T3>rBO;wY#5x@!I8H%+sMJa2hMw9X!CWCo{(kqV z;^R}S{7uH#uFoHhEk=$1VMRrJ>achfLw#B)Zr!1LnCN7mn6pPX*IS~WwSsqeb8VPfda)4^e4{oH=yzY|CQ{2rw? zjNuZNh(Eh*{@g93-xQH>X8rvuIx^8i5A6+l%-_AE|j-+R{f89?341-ZWBt-g*|zyd)v9>?ZE4BmO7qG9=tIz z{Ib3N-wB*QvD1oZg1Igmp|v4&?H|-oB^dv`Y&Vx+sl8#-23BwQqJ6#@kbpfzx^X}( z1(z-r(;&T3tSS^y8l$eBoj?Nt-e)#HAYGl43RT)It*p{0CGvv|>mb30=#xvRT;D~4 zgl>=$_;~Y}W2|H3sf>r=-IxLh<_JQ9qa|$-i46j?W|3|nJtcVVA3q}N^&r@YI zUaiC_47eyM<5{;`?TZRO<+-0fsceu;zeycfe%kw)ql7Z`7Ei}`aQ{h}o zcoe;;%;uaAS|&d{JLTw9{J$Uavu*+jnHg`qzACDOEoUs77VSQo&|jBnTN6Y@Yx zcGt%yCSt*m=UwLGWqOTX^T+%PycaA{eI((+lNme zf+j4Xe7P0>jowuM06r~DqQMA1BIhPYFjJc97!&RbEBrqyC06O{H*YOV4H;xZ6p zYesacy5q;vcP~A`j&9by$`+t**DD3q4KO=Ae*M2&!`HR!oGRLId|`_GP}I7*Qk(Pf z>?JOr*t0`-`)8(8xhhJ}zM9k>=h9q~WA5(x_$19^_~G~T%!OI?UxSZmcYG-JO=|sm zLD*wc-5%XpuFa(9p}H1*pRbwi0%HpDn#pKQyn8eX8pX;rB`nah+BkirCQ*VYlV7 zlZ2ZYju3zVdePbgr>(KXRT3p2Ox;^I6G>-JiYf7l)3a-VGSmRoo-Q!YPx=yr&Az1!u;i3Z&kM6G8`Ew@qVeSxgY>Qzz2i9sxY8yg-eW^btonD zM5KUEJJyqpr*awo5#JczMUdG`ZHdo|2IjW59&_sjPY3-Tvv&DRS{htTv!8cGcOP;t zeNNuRJ7;SmZhJ6}ta%vfss~b%r#4&m46SwF;@h!UAY}w_AqUl3lV8tXJbtB=m=-RB ztkpcCXIrwq*0)-p5}CkL;pB!7A*D#KXT*IgtGrzQb9oi8;)~CQK*r9qq9sjg8u#rT4RQ%kQAZl3_fCl*2O*#4>Q&)4 zf|32lyRYr$$}QCkDE!W^`?j4;^`ZE-@0^BId(NiW&AWmZ^)0IZ$X(0Lw_2Lm48+83_aw z{3v9-kKepac_ip3;k2dHwnTC0Qv*qDnEh%4@yR1;Ot7!(i=Dn zDK>y3qFG8?oY#3{E_V5~3Fy)(g9u{K-=E;u%8usbH0UH%HA-xNAZxZ{j}uSX%PrmG znU+ZFi?lISs8k`7fAhNZ-9Yr z-Ces8xkKAnf5%_*O}_xU7rkg;?JR|~`ntP=9TVISp4xk;&5Tt2llR&U1$A+=oN>_c zUypA|=whsRPt1!~e;(>&*`9|GIY!EM4YqxSmM)~MLA+H!K2Q9-|f)e8D_I`bkO*_0#fYgOE|FJbJvuNQK?Exo&`7 zHxvgN4i!_$+AXxEN^zKbMw<;4nr^W*qOKtN5cf)%D#PkEs)& z4n?xE3-1fScImzn74ByM=sadACN~jd1G8JOnJc(%RmX^^%dwW0VAvY=8co^8gyp9+<546q6Tsu$1#)=`%<}|x`L0% z-PG966saQu;fz-gpqM``{qu{MtZqGj3+mp7p&3X5}$3|L=O032~Z?UYVspR!X-H z4C_1<6jMtI5)-;O{((lt*#pIQ1M!4VA^__s%SofqAD*;ZdU6vA+ex6KuaM#gqaYHRd;eyWwgy%Mf^@ipEysbi z^C*!9$`L`5WMBP=!t8p})*kcxo@HLwTmF=A1d$J)OY)E0(-fSzqoupmLZv;Df14I1 z5QVjl!kn|qEnittN0-lDKDT^z8D^3$732^CMv0R@iP_`DtH+BsYDiltWc=@CwYCBy z*ujzDAdm;Dh=HZ@pf=3S5niO=2mndyh$L{JI2ygbqX3V00e~j;d0&pMnC{y0Q$2UUPzJXP~?7XAj_P{MYTGf@BP6WMheMn)hw1! zOl`lroZ9&)x%-I4M=Lx(3Q|&5=YU`EBT@mJttq+oFVW!T^G!lBIsh&jg%BZuM}h4+ zK?h1#5CXZT6OeJKr2esMTM}&=vmGZms8cd=Jf%LIHCmhm6uSus4q|C)7qBf1uyj5Y z{Yf9zHaymU{p7AaMCoSZ>kIA72)q!%C985*1mfpQEA#=HNcxvu zG1{t_r-9s!k~p!a7dsL=9+UxM%mkBXi9D_9NVpKvcE)~_6fi1<2<1@dtEW*460hxo zp75<6C35z9Y~{$v13gXakJ#3&E0pw3k|&oDcCY~ytCM&WJ`4Pg;ip70`Ew^L#rzXWL$ z(xw+Da$rLS{#gt2xiF6>fA}GWQA*$ea*y#*e%tHNX$d?)xmmX^$~2TdQGKU(1#7mA zZJbIU@;f^SN*+lh=Ac8H#4?>=hh&%w3H}9VVH7B71?dA40p$>>Rl-FcEk~ zD1#~h6iCpY^4SzizR#Iz?t%09CmjZ^MVCr9855Hn;eAc9GixOrBvRoI;+)E}Q>zKy z3r}5M`#lzfY66MNd|q7)n|=SmCK!Hx?E5`7{`S2tr}&CqC(52_o@F(s@P@ ztMrf6SIW;Q%QI%mIhR)G$Nsr01{A57IPJ^H@^ynP9F0HXo&dt-#Dr zFkCA;wE;w0B}PVsfo|0T$6{1Oxh&>|t93irtLdc>9Vjz6fWchAn@fXin?hQF#TcAa z3jY%h{zbpH+&(hSJJP3V?MiPD8vctj9{a=dy3XsjzdSd1BUz!8+bYy)(-6uLO) zuY6^@QvdKEcCanS$w%fn9|AnYX^bVn3zbO1W000{=S!${E0l zg!oDWU&8oB+B5=|5~lZc;@JrvbPhg|B#HYXXjs0sGC~q)dEIb7DF5IK#qq`6dTP$~~Wu+w_ClE=yoB+}?Hl)=0jYQQSyE?q+Ua>&gW7zq>f6OB*>pvPI&*sMl&hE=S-@Nkl z(#TUp@!8@NvDe+b52WmUey{2K1xo3o)6%kQ#)dI}%pn*ZAS)!uR&ut67^NYuHu}}D zd+X}X=Q$`jks>o42FabkH&K>UnPWO{5Bc<$ZTG39F05j0DL%o>3I%a)!ct!c!<9GZ zZT`B3HPz7Z$DDaZJM5l>FpVshIKm6?gj+i5sw;aDA%)Y=8q~gTUqj%i3_)RfRNw(^ z2w)-1Tn~_M-y=C;@CK!nE@+9qTH+Uf`R4=5%+8DcqNnRGL16>LjOm3qzQ~$)Papet zv?X@_%yi)Uk>`d5|Ge8|6#qTgVR_8L<%eHXK4RC6-t;jkEvRE{joSa|TA8@(*gL9N zGOw*J1}TndD~eLjer6TN2)8HWDu8hUPb{Uk0WF|p`nS^NvOgpOt8-6&eT<`$H4Vjr zhD$?t?b`qYPeoeuCnkwPqA@8htvsbvMrWd^Kxj29E8X^Sbj;>WAV$(BfbN%W3H1mC z7N<&W#t)$O`r*EixB$sf|qM)!WhMF>d3%E~r zZ0&V2b~v}x$nuwL}UrCXN65fE=FHAu{h zLTuGM#sLbOjOM(_8FP62LKsUR@U%Q&$)7Nv%EHMlw#)61P^X1FD{9;$o=2*u*Rq?{ z)+vnpB;C{seQ4#pk+%l)1+B_M$=eP*da`;~n0sw#;WRE#FV6vCWPk8GDcz+_D_K{U zVGqK}ozqX#b;Cp^nK$YuS8h0eaq68@bM4Kg)>kGrml}DOejRXBypixp#c{%Pk~~?# zC-&I=3P%43BOilYhQuJK|ArhugolI>3G!3}b;m2_KZ7%@RjSi(eZOPd$w)5;Vvc;o z04j`Nt&n{h$}I2&V! zs%7Y~4-{r|vK6?s{9Wg#FMGc}$bW5J{@rai68L>Qb-%yj?Iu?67et(#rvKs4L5#63 zETzya?j#bwC=OC2t$b%!%N84e@Y=)9CWmSD!XD05RY7@HbMKVN;B!so9nakwez3(! zfKY~p(_6rXGUm4Qd&Wbkz-g|J2n3J*1j;-WqbkDvrcvDT`H)0-Lv1l_VQaC zo8^m^hk?5aIQvIQWwbUsXPQ?z$y#vuhNR?`^S zLaSju;B^C;yz&K!071Vs$Oh1$ZC*-y%J|A4BU^9r)-$uK)}-!#^BC&bXD%wG3i&Ld z8j61N%B3a8wN2>ga)2Et@@5$e%HY5zsMH52Au%GcwkAW*&WIgk+=gqMyYqJX;1@a zHOeFSdFxS8B}k|62ukJNl(%El{f@y-U$lUsGnNHq+)E$jl^OYzlAma-VE8`80tT4F zowD-;;oaZta*96}Sf5cpJqD*Sj?Pxk8TWYtNt_p{`1ahWTiX#To_MtQrq~_)LAQTi z<+8o&Vv{~K9@rb6+n2m;95Nf#mOLenRpfiZ5!5aI3r8E6TalY<+JgoLf><68SEa3Q zD!ZO>K8%pIfzNp`pEfyHLc{z2y6zg#xyfp0^Jt%|ZA!Sku$`kY3i>xWqh(>?tLg&- zJxmA?37lZ9qVp6k+*DMMb z@Uv{k27^B(@Cxq;gc!!DN16UOz|@EHw8%mPrp@_GNPByEFyL!!^}mm!4SrdX9PJJx zo!!evCHMavlv&)EJsG;m!;e*wUA0&19_m^3)$jbsr>1S9o-I!mw`xtSky|IC%{i}& zOXo%|@&EB)?IQGgMNWxqU(<41J24K~3ovZf_8L|3AYBZA=jS`?lTv8O<5H6ost~8m z2INTwL3iq(AS>Ee>OtHk)I8ARIs60H^sPY%;$i*&Y3LmW1X@l$;eIR^-w8DYGNe3R z=*VTDS7Oqg36t0eaf6muO!a+E7&Ja}mJg7Ox&T(RP_mEM&2mZ~);XrOkTK<;?~Sjc z*%Qd%zHiF&D>rk_3<#-?I3sl2DA6{GmOIn)dpG0DrcVv;ynZX_OmVU4N(_AvJo1UP zlRFzEP5DS(JkgyJ+gQ0qF19=B$J$y!+o3fNeQ{jW{S;sUUu%nhVYAbr};1IO%W5OUeMOgK6=^l240Hhs5 zdMGd6lrBG(^%S1Wc8JUyykBXy_FTQNZRe-WOMbfJlW$q#_|)&;h2OfLp4ArH)OU)^ z{=61&dT&@nBf@0_p;PMjJ#WBkQZrjChVjC$;gQ>qJsn*?I5NIJ38Q-b1J9y3MoC#D zBE=4Da>~D05j4KWuvkoBhv!snyB@j)+2oKLH$P)zL7d|i$D&U_viBg-+U?!0?^Bw|*4+zv7vAmId*eHO z^>3U09EvLK`Si-@y@w=yVvzWs4+#zO*(cr+#Llz+{_ibp8xbUlGouNUyO z2%8^g)a#UcUnfkv#-v0eB?e)jt|$~T)-Gz`DnSl69Mabok!GXo$WB$(ENQ8ngDMH3 zQA%7a9vQVRz&V$R9e$F=-!sB@no%+0AzH0Dz*WJH*o(LE`!3%KpXQTz0p674D1pRF zDDzYv_UHoO&Q;Y*>z=qA<9i12Dry$%#=h;Y=^uSeja4mI2EP9pH!gX(@P&r)O2+(x zvOa1UkD(726F&Ny8DH+{TfU;otYqn0Lk?bS^eOq)3n)YpYsJ`K)hcrOUIW7F{9007 z5Jnq%CM&R{)dL3W`iZTWzOs5ajNmGiS(QqN7;sMn?5h2r*w(UfJmqy)x5EU>b_Qp( z67;k~4Q4cPnz}U+i}2*Zuqs_H2!XL#U9v~ERcoq$#00EmjQR7*fYiXB6|!`T1v7zl)d@Eb zBr{(IvR=s+`M>rcS$(W~8r)Z93EbRT@FFj4D3wRTSL6dJW7at;>l~{X9?xcH2t1&| z>TaT(ZW6Nl(AxfN|KmqhEY}GszRDwJ9iIBJ_k;+eO-G!Vjv#UQHlBo1hLkh9lBndJ z7}Y<43n&wLgBgk^o~v|bcpOCWxEnMZkv!fhSz6Fd?ji@|9y#cBWbY#o_c4?-s4P$& zr=XjbwixnM9vCs3;A($>plO^NoqX8kX}&7%7EsUPW8hJlyG8v%b>R4zi1ksnJo)7E z@Klz&*}`$%hyet{Ch3HnC_pF@EToc0S4tsN%ygTyQQZv%OD9Yt2wPvMfPFZ zWcEXfa<7VTNX4^xL-0AZ&PgD<{6^}Te#dhZJO%IqszMJw(zAv6q~$78&rr@xkXaXB zv#@)2tMCn26?t{=b@l&Ibmn0(uKyo@W?!3`rlDzCOsh&V+Ds~*X`Kp9DUvX4T7*#{ z3C~RXKJ6<_`yx(AJ7-#?5~m17m=Zz=9r`*>=lA^noa>tBy6)$ix$paPfA07D^>!(= zJZOK;{Y||4UGH~Z+a%_`sCW9W&5%{*pV@o-^tUez#Ad(qS&h)&#?_@ganOm?XD{at z{LVd9r~e(QArFHj4L_X|;Nf9?Tnsk0B4{+}ZU|S*o*ZF&i?Na5iO9Ry`MpO<>^GJ4 zt1Tf^7sU8r5L`>VFh46Alb_qEl=@Px0{Q9=B2>oYzw{4bWLc*k*8_FQ<>!5WFUXY?#>yZkronjIY^%2Y5Af|%y|EN_0y1)m6|u!kpYDzX zi&_UIv>Y?Y)3udcv*;&jOz`By7KH)NVo>=S(P44JPVap^rSD_<*PgV82)%6Yf(&yR z%BBT%@!`Psmi3q3zW4bYekQM>GA}gN`8CCOhmNoP$xmv9W4vz);5)6)FI_kGjH^w0 zaqW56kg>(F2XAx&fQXjWzv|E18<6||7mGAb>@SahX0!m&=Ye(bZxwMF{N}9HN!Qar z>gXX4l}ccT*M83ph+sJ`7`Onlj3KJHtjDw;UYoq`w=z>ff}tlA8e;#; z&>Aaa9sP&6nRmf64PSp0==dZbhQRtVq;b5&P$v1F-d}$SX)JeM{@M>SM=6TGexw~r zHmJm|mK?$wA`V%L=?l=WWS%x#5}1xymnx>rryO6v;S<4-HemIj9Hs%{0Wot@?qm;Y zGGAECBiWo$Tx_33?|@D1fT|-cb|JqrkBw}F7-mkwJC2^h-O9?@2=+TfO!+*v&f#}3 zn6|U0@a}81H(ooG9emzlYJmyvfb|T=rea{BsPweo;+CTXO!sp8Wl`>ye7DMl1ampoH1! zubKHpk6^Er*+-TQu@@0+hQ!EX$xw`h=bd&Y#O@u~-s`{xC6xFyOe?rb@`q~AgQR#+ zX@Pf1IIyJ=Wt9j3%P#CmgrzWnO#@Lhkixg$(lW4pk;fMF1BVI-`YmQ!Ei#S`{@_@oucwht z&@NxyHh-nx7CB%dS-0G$Io78!e~g@oIglDu%>LtR)QNM|bt*`Nsvd&&TTbO-wcp4Y z6*((AHi7xg%TLm*7W$bS7*Z@AuXwiMPR`dg$cH0ztKJ0tvkoroex)Vemc{{R_kJAA zyDx%tT_BWbGcK{u@e8uiGl7yxOg*#Ph-r_dfq-Ha(vCz~20Li4@bub1$5ak#@EZc$ zBgnWvJC5FjJ1J+~__K_ySiMg8yrHyNB)9kMtA2kYa;JkO13j5(4ig%=3D$gbn}sLa zXWm=>*Gl_>+TME8>bt`sfxW)BM2{=cH?d#$yH8_EOl#%kRNUFuc!#uHpnXRk#MM^S zJH<>yFd-f@mdDgP1%IUL@$51_JUA~Iol09?{qB=Re{pbl<@(e0jz@HN^uZ)etsMw9 z+y3k)SdN&+BujqT_Z!+GjaK?_=&-}X9MUAnUT`%fH~#|x2oUIf!u$C|91SPpBqG)+ z5bM}R`kl~pl1Om(x;+zm_4%Xns*fq1=P2kVdjC|}Ty7NipGwRFq648OT6S6K$bfb5 zfOV#&<)bFMdD*4I(fd38-uC1T_je+$zKgr+0>2@oG7qS5#f-(~48JRE zftbigoF(*Y;rkhmh+oEHrA#r~f`{e8XKsKN59vDJ)v*pl1AV$H2pk!}F#uc)1YbZS zD?n;|unNqC#UtPc8B+*`Nq}bS^I30-=Q>M3Plf=i!G-_1rJtsI?s~(z>2c}wTkLmJ zgWsD@kvXTvFmi-m{7p;fZ)+2G%EzUFAA8PMe!g?{bM4QL{gv_i4>@Ex4n$4g)R|u$ z)@be1XhY=8WBl4~1RUbpMT=DqRfeW+X|xYsHy!P3{Ex@MrdGJ$O0Vwc%V&3NZW>iqo0!LoB#{oVNN z#3bLgJ__x^z6y+#uTAmqREgKcETpAFNe9<(HEY_YJFqcCaTOEcB<~dPJIIBs z!HoiD=V0)`wn`<8o5j{#J=acf*2|7qtS6AI^4*b>=)p|ZNwn)oUvLf#yhd+6L)Z6U z(AF3R56chAF5l36$?^h@VRvsLZy92Fr!N(wcDqT{$yXnaOb?e*+_5EynCx_eeHF(4eCcqQaAoNh)CVsw_+j%CpGT3anHOj zQ9Tedtsi{w)Ys0Nc8+o(cJ|wFO^26`I5Nb|4s%>Js&eT9C{3niOOmA-Ra^TxE(uzGklne~lUo3s{^BgosA41FTJ zPQ;=$!+QZFj0_#Gf?WO`cCvKsm!VjY0;xRAqnM3DkeovBl<(99rDCchfQBRFd9`a1p3ZFSd1<1mh5+!|Cj&}bcxB`OfGNqL?5lNydKKo#Hhp#_CL|Vb2TFU1V%H+(!VcVnLJoDf9`e00`mCC}^;S zVM_uz(x2z zLrBLedF_@?@dH?IbR}4xYw>9x##ko#h6trSR2kGoPDrqcfR9Ehgt8Y%YqV~%RafLV zEC)g|AmrL%2xCZ-tjz?_Tk$d=dK^--t&Yz%Nm_>MsV^A}upQN3~aV6!uNIrS?R6ZD4g&_M>Eu8Nx;#NYjdFzw`pQYj?oJPya)7Tg?Gd z_&+C~J-m;pBHx=UY=u_!pZQ6KYmKtX?SJ%ee_3_TOeN=E)bRIzA4t}b8HJQ*?B2ND z%i1PdFXZT`U5Qyo7qb(nE|X@QG@M|u**>r_M(esO+vorJmZGoCkvm?KRM~R-t!dj? z3>jjbpj5|6h+^4Rmu~k``$$OH)*oo1Kn!1Wy0n9TTC{ zdzcRR)|QY}7o$0$ zyh#XEDNm`5l*k;D}lP`2?9z!9gl?#I57=_8{-^#S`RAqZ`Ccu#`clD0N#0YG3p(`^U)h{nvb8r&)9PXiDZ0w>NiZ|>;6 zvr+X?=g@;cW-}Rp)01R%VrQUN-idQ|7*1IvGTuBZ#zI|lw&cRrvGW(&8eXr?DjJ@J zntYzr+1S#j@t_Zwwy*kjjIzKUPR<-2x>9YD9vgc^y~N}t{marIPvd2lhA)N&%9{zj;{K0sLls6P}+-MUoK16G8>hyj(;T=cYXt%5|SCmvBPIv8LO5ud=oD{A=XM>T$1)jVxR#x*eDSH zbf^yP*0R@&Ek;&tc*xV-lc)cRr{Qa6usQd?aY|anr%zbX!SQO~$}H>l7ZwZU&Js}; z<@B-IHSVYmq36i6)Dd6IsM(VGpjhL!*tp3{qo1eR5x|I%f88axrz2C}xing~K~eA; zZ9y&OdSI%BjHz^jBSg3@^ZF*lcAp@Xy7ewZ<`esoc!g~Va4?onwKYW#d(sx6-8u*= z2vScdnphB~SA@3EB_Iu{ZdHsnZ3?ze&2%GW*HQl#ePESM_mfe2*u)Z3XJ&o(4j0eOT!vV&RR8%+Z!Wn`lt{juB&9C^a z-*$04wL;EbLSd?9uzx`6C?^0_6q%grC)`5}?j0UN5b-EK0PPE5*LjoO#B7%U%RlAV zC57^5Vo#fljizsd;qmOSI26j@uL)xzd>BL(9m6le?>Th%VeN*vHv9hu zw+9SSejK4LLdY-~4f`0u1W?FXW$yx3JB$wsAXm&%z2G|#xwQc5wNEwFSfChI;adCD zkh4fMy#2{&>;R41@YjT_58xi9cLw(Jv_Ac0ulbKH_Rc!82tGnHZ@1=4ghnsJd-A6K zziD)9WzFiN8)F=|PPehH@(x`Vo6k+Q`+H_}?;f4p=%JFMy855D-GqwW;~N@{-_#q$ zn3RdvFiGkFY3L!&W(!Y@q0}5!am-=t39Q zDh3*X(QpL50kjw~D_vK0ksVcL;q=hMsGi|3`gEFHV}BG|Jhi96&<{Fvv;7I4)&*BkC~daS0$sK-hO1a7%2O z4uV!8qZR$dXbWIe!;xTB6ywz|s2JIXo@RoHjIu~4?t09CA=>;Zm;V5yOoyi<^aVba z*&~TU1G*N18#mBUyXGK_y41PbdKKoOS!Am!8@Kc)4_R)oEnsvmYvP&Si$$6mUHbd- zG_AfGID|499<5j2BB{{}0*)g?u4S361X#8vSa(KkK3(`YKto86Mm&z%(xK|hJ?kb& zHn-YfNJXaSemC>577;MV6fi510nXkp@Sf9 zDFq!aClOTEXC6KnsZNE#&WbFnh!;^`*G6v+P~8&{u&*JctI)MGaU%MKD4>?eH`#P@TjklH z4S&Q~dxVM%5b@DLKBrGoji}h!?b>3-HWmiq#tl(xRuMJ<^fzk8PZPyvtKE@ zvN6GItQUqFF2~m5XhiK?4zNWps<&N$bZkPjD<@TiAhd)?$GLsFpOpEtgi8viS1@Iq zkC|I~%-ni|7hFEt5ub1sp|d{1LuUNsJ=qme^Us^Me?s&}mTNHMH{B;{Jy@)$_;#Ox z)9EgwPj;%Fyk>kQZ#d_9gO8fsxwyiRY0W*Q#`~RcR!;&q`y^Mc+T1ffeq8{4Yp)1> zyBp>A)8p{u$$wS6Ro=tZ4z8f}GNXgAZdUm-tIJ;g*N@A{ZEQ3sBS1p5vz#&~#}>#4 zQGX#19_t-+FK~m54^|%?#k7Xx111@SwjY%9Bh(fi##oz(lnvxo|Iv)`2YQUtDF|{e zmUZ;vwjSg2Q2z~H-ake+clT5dGis~McYa@U;$(dL$pdvY0cx>*?P=>{q23Nitwz+mH+!=go=XHjwqrRP_xai% z$`z5#ffbjk_4J?wtACZRBc?8~5xwr`H{VbOWRAppt=)hBX5>X+W+`z{*yDe`y1QqEjq>)U4+1E%Hl5bDT_9g>kz8o%Z$+0+ zuK%kTEv83;z;792m*QnzA5{m3;9p<$wHw@DF}Y9YE|s;YY$m+C`8$#>ym)LKQV7sF zVFsbL@7^V?|9 z)@~FGf+w{5ai$MXPx#Qo& zVlAhv6Sb&NmeRUNzwt06h+HLtjF`(l}fxU@U1O(Az|NjCWzyFhA^Y@Vu#) zcqK(Pz<-)2n{wp%0$j?0Vd6eYvNF)pjfXANve!mfDS@x7yXV?J)h?F_@{o zUTaIXM)2?#rps+uf;vC^!C?E3$SGP9X^q=nzR=9_12JfS@e`TOUyWYUIkMSfU56xc z_ojshfOg-LhlXaNJ5WH!F|r!}5Pw0=!;mI49Xl0ZifKgcps30qjX_{1lXD5$$t+U` zHhH#eDCthxM`K;bFj<~{!oM4jQ1DWKdIBwX)LK)^#seq7xw{0H`^yX@jx7XG09yv_ z#_P2Ppb)lZ_mz-%sLw(oTRVfHT0s4}brpq5`;XiVd0>5F#Ak2z1MtR}neFPrUW@zS zqaC^T8LOv!t)EM8%m>bMr%OWTyshejKZmvNEAopd@(uMke`BXky|T!!)S$|sBD_9C z3K36O1dcL{`P#!ujD5Ovq%Jx$|A34-mLOe)q*Aw zWUl_l)_ynsqhu!cb>_P6i9J3TW!W@<%@PFe+`oB1hi!h!-z|@vm?)ssaD9K0uHWUn5`zQQ|7`kf~uWh0*T#11W@$WWeEL`n$K>ZgL|EPrWkHv;561yP}NHI z9-dXtwbkY;ofbSwWjFAIuYA{ZmSlCD3ni`We>?8;H2YoI^Psu3KaSMqCL4A6=6~~h zs}}sh|9`vQ_|5hF@9+odZc0g~B8`KgTeOJ!(TvvCvSVcrhwm`VuZ4Zmd8Y=qhLa-A zqL_`pD^O$zz)#Gm9RTq~JK9|E$@4k{f(272O^+?&jK(cEHM<~I>0B^D>#=(OUHsbK z56#Fucbh@rdiKUy?{9av2kc(6Cp91`jCv15nGB8tT0#(WftJV)1jK%&dOi|SvgP&7 zs>fYLqQpm}LzITjJ585QI0YsK-2QrX!pX>WlObl@9$|=mXF=~U1zQTNrGjh zW;c7^u|RTmq|nYzm3c2EW_ZpLz*-)5>nWR8vdGij8%ky8_i8YN5C->;Tr28Y#sU5Z zd>_85{1-UKAIBMP7-@6baD(q)l(B$c6j!g`6_2Uy<7|UIRGUpT&FLCU=;c} zB2zxSxW&qGm*M#khv#gnY}?I|zYLW)ZUCDNp;u+|+v>X39(ap%6MR^3Q8RgCR(6ei zSk%go3Bzh?oM3Wz;Ri4FRFM8~%pTi`E!LZyxdkLEX_gppY3jM*58KpJ-|s{j3J_I! z`4zfvU9IB+b&l_nSG-_1c$_oNir$9I6Zs*@8is%>y$@$95KT*G5;AQ0ZGLR4Ma9-oszKDM6@UzWR<(sOkiTYqJjs{#motGS)S zWPUu2z(ermDVzle89+;*JC7q$D;j{8+6at&YmzB5Fk1)jpyQ12`03k!_G&sr4u-}9 zL)whKM;m%+?Q6dl(s+LNzWLo+5HL>3(5G<#bRJ!dJ}9Ra3ir63j42M`4iwZecI0|@ zx$W)Fqh!hk0%rcs5(!5d${)RX8)@x*FWl5z=FqGK=~kc}Pj)|A1InHxjzu2(yJ_9? z7oUxndfu%ZYYBQ&j{%pGzD=4dLxxnxL7In|VE{Rf?eN#22-}scApdv);tIfIM{*?E zpNnEYU}HbL4_E~#<1A9}5^^o$!7)E z9)PK!YNt7jbzKUT-OvLNsSZ#eBaW6(^*_L4Ga#XEH3U$L-z7}A)4j2(<(i(Gwvzee zdDg}8B>k;bIFX@oU+y9I#5F2;2Nl#hQL@=Va*$d>-jJl}TGoN-=Pw_MDbXb;tH#-& zb*FWQS1(P3n7W4f(~E*e%8iWk`(p420fc<=LazJ}O5YKNTyu$!qF?PrXgzb;`f%it zZ}-OoZx$&nx7??{fJ-P_3vFEd0@kvzFC(Mn^sT6ur*-gOzx>_&cW5%OVz|8H_O=|I z9S6qyaCB^dnw*Wr=|A~hZ$IsFI5ju5R_e0FqN+KVzPG)3%@v^+vbe<%c}A9KQNSaF zv~Y*)n*VMI@jtTvhF3?==cCzY?NY1&bvSw2>Y8(XG!Dkw0tk5(7KuKMZX02i!K>%U*!Z~FN*qn-I1NxAwn+99GWzhev z*k;S+bt!U*~bLg!x!D#bN_m^Jj_qls6?r3+A2M3 z9xGAtf9Gh>cK*K|F*=JrOF-!oBcv`DS(kh2gMp#fIKFcHlw^qj6F?=cxGVwAl7@l7 zlv@F$1>DpX;x5Q>p+drA2c;fiXreq8qJ^$LiA}yndfrOvrg^MRst|GocwHF^R3Ntt zai>~w$pZWt1yV_9&-W*K0023m>21&7@QfxSk z88LB!n7UNMrj zWT2{olL&})La<#Kw}XaHTEgfG6Ldk8D}c7kN4pC47D1#=fY>P@MJ1E=1!wk*lZH~2 zrpAxI?ZfEFF~Q>)H#sJ>Aj4ILi&w1b)Pe+i8C{jG{o@g%`L5uqi?;iMpy{p)03zI^ z*bo6`ZztMEnB*?N9*D)d(|lIGhMf_4J&+|w7&4Fe5+s`m54RAyZbmlj)pBS8Jn3xbuZ55GXi4eI6f7GQ`K-%xh5po8z|U&9`J}cjQ5u#Tp_1@gQOZc zi66+kC0DuyDLqq2f@CNQ4sz0PZ_gCwBnG=xisnMN^9md!#1*oYPO)vvYPByNwqiYr zQ_Twq{Vs49$LXxnFkgHz`vsWZEf{xUPFh;hp3WSq04WkC*GZGdq{+CZ()&_rz5-wI z9$&fd+?Kb|Jy=}SI5u(|d!{7i7%jJpX5?oP^C!k1nsR^pG}p8sGN($d>JceIMp!OB zTFxd8C@6Oc_A@f2+tQN(DQG7UfnjUXI8b=C&$C+O4gi0KhNFZLkDKA)WOWE6zThZz zbBMT<(5>HjlWgJERuVtDP$nmJ3oHNbBIWF@l$mifmoPmvN)Lya#=)mS_;>|6kcKzI z<8O50Y9XA~+_np8Cw8P-;bd`FL$#@I1RZ98S}XcMNeQ2W2~t330epsa&fXHjYB&e! zV_tg0lUP1N&X-EFMKzbE$yM+1+&4Ac!k}|(oCw5S9>FSip*BcsH?$&aA*TtMlV7mX z&$W?TEzfJY+K(0KKd_>_VN>ADd71oV`8eSso7g8;8aAT5R8Zb5k;Y1o25vd3Bt%wo zcd+vjFh@6G9G{>_r{GVY0SGUoO5G5t2B62)lb*}85?wO7MEDeus1nXQVmB40HxN{vS<|7nL&@~1zOb*JLjh_aUK7==&OeW>J#3pl! zY5_tDgfCz<#TDTQ3Xy&kbV{yO*!r6p2 ztMlPid@+DvAN4bA2?y{=M zUA2#>z0!5gHN9hppWW8)S6|$1^j>`ywCzWVu7&r{>N=gSO`n&PJ}=didrl630NCWcLKo_47FBT12-(TeuD?xg^vFV*aN@$d64zbKQY&zd5p%@P{a^Jf) zRW1<{Mfgf~SmF|4QwBblWjqmpF9eldKuR^j$^s%jwYBjpo02Ca-eXqg%5fH=4ewUs z!h?X$XSYI7CO!@~gMl2r0Gq%ruo}I2$c8W~Ac)`%wFG;K7P3{=o2w{2rrx1w#z(2w z+NYN_Tch7Jr|{Ej-G1To1cb+^@*bd$G9x*!-nq;QPG8AQx$Ivs5$t#}_0TVp-(5n7 zf|B=<+KJveFaKk87N*x9x!?oD$K@OBy2COzNq<$Su?9jKDsb;*N?jsR$%oiY{MbO@ zo?H$w6(C$aBI_&1|Eb=~CnGJ<*#8oHyG4cX)9?v$()1GLi>Pp<qaWiG7T=MA+xMr&Oo@I^Gq*?%&qwvvgZj&IL`W(9rAUY&mQO=Tza zQ%dT`x(GLEU0)T{ujBP^#z*Hs%6lmVhQm%LWB1TP`X{>jX1k7j^Tw{It4eVT<4OzG ztHj6Yq{qhCU=Vq;5jFDc`mPFmC=qD5$MH7J!YT1sJEyi_kI)r;`XAkyxU`7Q{b|KYmt|GMFvyj_`i*(yNT1w3_?q7Eo} zZwsz;X5f2eN(aVBDMyJiQ7kkb)kwiO(j&5a};G!tsA?^E4Ph{RD7S<3j7_<<)YF?WsMH)5 z$jM5OO`K68Rc~S8<4>3{hZew6{NWM*BI*w z%cI0n+UrZS`4o;)u4q20eBQq7`Ht+>vRf~(1;3@39iPq|nen}ykUI3{5_$BH>6xFW zlrsJ5W<%fF7xwKdbAPX(C=`@uB2pQf*dZeI2}$iD(jfF@Mf!^=Xo0ry8o#B&bd!ETwI*=&G4s@w55>=fNrDI=lv|@ zg*12eZuv-(e)NUZ$j_vot1cDGPl>D@^Yx*{U9U>j~$d>lQXQHa~x__g_yv_~A>dQgSPErd26! z5AM(>hCC{}?ADy(?B!jX<8_yY!a4obY~rAT)F5p+`wu=vPWn?pse;h%3c_v~B2`%G zx#{n`_~&Ei|2EeAR5*s(eel!AxKGhSTmZN_z3a#50lO-bGuPjD`20#~>a`wttgoM# z`Wm1dZzhxh{S^S|4}jL6y~gI0$)!GVpH-F7jQ|HE3^228l(KnuuxnTrH0avgU81%+ zSEOvDX?36moLxoUAu$Yh*92$(LVr-X?_hR3+iXoVquQYoLhf8{TF@3Ggq^3mg-u>P z{iWSC{pYTkb_eN_H+~61AkjD)Qz!%=@in26>;O#P0hHWFHJaw`U2NRZk>2&s9rwE0 z-G9y~>lUSbUcW(YTcMREk%ha|0`{@ts_qJHg z-pSCk`6J-@Y|u#iQP-}T+5R<0i;qVpS9Sq8I)e*_pFz*MZIx zb5}2!=*8zx|NOf8FICIhNPoA` zjK(O8VCgrkdPskv^E|QE!TmA{53S5MbWD^w;Ef`hut}`C;u9~mam5$rHb3aMGhwpD ztwZ)7TW&sZBW-Mjg{lB@KJOkJ6+}4B3{2O~EQyuQY+!DbP6bNI7PQ&|=e4dzDs-{``R7S2 zdy>-U&iU?rrWNm5S)V7~52~%#Jl#88Uu4lXo|wOj>xn7ai#1-5Et_qwNmqV(LNTcY3M!r0s{*Sov$?=B&{-WOl%?>%`f#~H&lpL z3&Mx~x4y!^c@C@YN|_yyU}*yR%ryWnZ@E6K^RWfyeE~q~oL?Y8k2}$^CLs0wPiEja zswCeen;(>}WCqk0xUF`63%J)d*{%A+m{WWgzSe_m%M_FxO6eQ+6JNA0_ z77(Ku!=0u7TVGzPdSKT@=Miy?MwH&a$?6A2In_Now;_LpBb;Mh4qWKO3iUC88{Z-x3VaEbtqLGKUWX@>yu*ya=4~K+o zK)X&k#ps^d+`V55*WEMh%=1&9Kk`2s^kmOUEX5=-c4KDlr-g)FCSL7&xvo+9da4W2 zx!5@SBy)e%5)F z5-=V=Wr@VXVNbK`Q0PSulIh z1(dAq9Uyp4*)uv6iE2_s?M7yw9c-b}GSntG`KFl_z>V5DcFOdLp+Fl$73CSBV5Y9# zq9NcuGCn0C2f@Slqh7+f?U?O)U#zkhR`b8dp>7rTEoNKpXT)^gz2O}G&UkVhg|SkI zb%L)FUsg3WHMgRva03kOy9Q-!4Y*w&J>OFOEIYYd7mEj)s8Ue#fv4L=nms{RCZT@= zw8?gOx+KkJYXU=1|=XC5$0Oi^Ia4x@TX~-|e|s2lx^!N;ctl zDt0aF^SH_3=_-$Vl0VV@5FyAAb$j@KyEa;+Rk%;}ug#VkukhuU3Pt2#1M^buASL6X z@}uHpHfutUVR;PD{jwE1kt)fozT==9BB*b$v;1;tBHx++Da^$&chWjOa@Wr|=j@|G z6isshz}U8mOIWk`*S7tuAzMK4Pj9wPOBsmh{%3bJ1&VmRCgKck{1F!HW6hR08wE=6 z#+e8jEX7T|bo22R*o}P*pRaytDRIkhL_*ko_N#ifq|s=$xLZDCGm}f2Pt>)3#CN3Z zxmh%{bJsu?De_R$`w=oDfVkffMZYfCHT)|zZE6j;S5J;}bIeuFNF5vKMhiBbJdCe# zUEn>Q_p|yR(EjI*-OPSr%G`azNNeAxZ)HD+xNAq_raNmAj}B=pT6|D{w0W1#unbLS z_Tlx$#q>-Lay>l{z7A&`f6Dpna^=3W`5HbCn3}I40%*tLH~Lp>@F7R8b|ASn~D%=b`bQ3`h%+M zaUI|OW3jE4w!mY&0-XuZpM~l2@gWT-ml;(xi^s?A%m)WkcRCgu(@VP*mz3WHA3P^kX4BKAz#M!%X zRhwW6`QzAlF{bg!(;KnT2X9>Z`!$7wh%i?8qhdXA)sIR-l-qkdKjN350g)~JZ47xHXT|c+P}1}yis*NF-d>maBy^^N@T&7wCwhl z7s=Ok3%{1wm9Ht**l&8*O1j~8>ABbHv&>ryE`H|oi7%JUdu+L;m3AUN)B0}LkuCf) znvqL$w*uwK+AB*>92g<*jvaBl@Fx++vz>dCq!@p&9cBe+Ro|}X_v5kg_=6TJ>s)>! zhx38Nx(jUNTn_j@T@m8D3kY9WH~+daG>IA9zN&JqY}z4DOimm`u+oaHn)aI`cY{EYb4_nsrKOXzr89fcK}ElD-*&ApZ{x`8s`*K9K^ z&F{P7wv_)uxW(vf$I}5@lb`~Vdujzbx9;nvM6J|RjPyG7m3~yztRtty;8Q*>^@)Tj zCxj_)*h;H4w&54$Xc+Pf&8!?9HDgb=|M6+oK*_3QC zBVCk=^;*j3mkb0OCAzdGy7;0UXlVB()b`&F+gDI2;hLkJDC?30lWvq1l;9MMa*9WB zA(RsXX%&kKe~#F~R&h^F_7Y(aFAcg$+gxQf#@%GAl7vUAI>?!8RxbjsaJoTNnl0S0 zN{z}FMJvr!ZOr3cdJfsbu`e1a^thzAzUMJN1=++YO{)FzR~PE=~Puon;= z7LYV%qK7+rw;UVQnsRahd!#YBJzwj+c9K`><`g|lN-a7h^;%lRaB2ZRE#4+86C2{2 z904Uq;9YYWBVn*EBQ-fhnjEpf_pks_6=-yDyNR`<75@%?@**+*4pT1nos=KXZ!{}I z4eU32=2o*^aB1dSeomQg0GVR56}MN=oPiOuTUpYl%kf#}0pf>OJLE#+U>wLAB zqUWEDMq(VMkej(E+Xacb5D7AgS0d0>9P#Pj&Fg*9RO8mraCCr<3y+x)2Y03?i!S)$ zV#45@z@%i`cCRqL7sMZq;b*k)vnqyzhle{du%Upfl;IlPiH&BCO#B`$^u~tzCWovz zq8z~zZ+U4&g7s471V?3Z<=S3A<+zFMpu6^(Jbx5m|kI*PK zX3osEQg$Sz&^YD8SZPab_O|Nma0}DdbERJb7!_TmB1JM>KA5o+%x;LuG_koMvAa*^ zywLnF@d|gm&P}%Vq~j{OLLCh<(c8k1M1iC;h8Q_sbb2M_loTuU#=bAzk(xP{kvTj% z$Pd2Ezn_>KV(6Oxdw3!d9{qi)zU$-V!6Fk79fO{#$h48}G~;c2I9&21&Plb=vu{AW z?>EzgiBvEo+zF=)`%ybr9v4YDeYwV_x^#iuA5!~3lW}i*Kc!;A_KUWubJviGO_N@M z*UW27Xh4^#Y*THk)N!hOdx>m&W1_QQ_rLSj-2p|smFs#Uq$V3s7D$xLk`CfiF0A0r z#5xovYPrRZC7WQpU>Yzp34c*=uXn7&9h>E!ob8`X!AwNpC!`iQ+5v2|Y%~o9zI8jxH)v3;qh((D^|xG7{^Y0#~zn2-N3HB!-|Jr=8AX}8>^Y;w~on9-(+ zQ%TJkc~T*^*d?Di6VTS~e72yj>-?P8vf)O?tJ^z`eNK-dH$O$x5nT`=0$Y6fdiMRy z(*o=swf;F|=n`^IxCLzi;pJ~qSjHQun4ak3CJCgz=eKfOX(2uE%DPFpyqNSLUb$m< z_{W&hWZvC>Cyydxj~7f0)?ORh!PRv4Q~$4~eoSXex^nzph=wpWah@vQGR7~i)${L5 z>hH^&?k5ZDgS^~Z=$rACmJSoX7EA^^J?F0g@)Nl1I3G%PvMv36p zk{2-ZPlx@TLIex^suovV{iYbWFMt$ z%Z`m7V-bb1G7Dwy9m&@J1_Zt2U6K(Q}w1tn*Zy4H7YU>$2ch%u?xb%p)^l&$Km*6SO z>hH;z2Uokde(w&uj?YNjwGb>^;HC4@;*%&h;v-^bZdpY|<%DN2;S+n|HQz073pur| z92>#daDISYe#^W^baw+Qt@ocDnkOdL|A= zhK3eKhL&c=#)cLaMwV6<7N%sfow1?4g^|6Lg`1UyGuhPM*2>-9%FV&ljO<`?r87f=IG(z?&a?4=-_5$>mkbk1UqSj*{TNU7;n)rX6l&uyIA-*+WUDrhIl&q zdANsoy8Ah~ZFcvJb90WcG0U~KPBhbs4Nzo;5#s~Q0;rB5K2HAL9$~)j5y9@!{+`hx zUU7lW(IIw>AaZ7~b78#mo@ndhH2b~TSZCKjYo`zc{qRr^-$3^Onnyssr;odDtebb3 zeNcL+pHE0&Kunm=mf)bRo5SKa2L%Ofi4BR_x+Nw&Dmo)BBtJ1QZEH+sLfqb@xXjSt zojVc^?AltG5x*~s78jopwq+M1DJ>yw->&rZeLFJ_-c$RYx2%EB;RTZy?vTCbCLN_#QeCFu(N<)SX5ScsFuMjP1?yRKg_vsnEj=Q zH+z6{fKy-0sXAOGswm+e=kuED%6R3+8ykc*f`)D(|5Q``rPhYCRb^*8D$jKtztmaZ zf3~i_tLgIj=Bwu#ulBWE>1`b7si+Zlo$l_we70+-@8p%sgXb>vw{)J9R#vSYtGV4* zH9jcYL|*vT(R6q0()-ztzH8&H7e)uh?q9#V{P5wEsoQt&KY8nF+ z?|(M_+t~Q=ef`JRk3XKxE?yrw%#J~C0H4vfHfNGXID3_p!Y0M-H{Iw4AuU0@%j!0C z>*CqL*^0np$zF$XkGs331lvMRe(UQFy<=swW3(ms(#hIJ=aknir^kZto$%u9)NqR- zM|F4|%U!bj7943x>+~vdi+wlS7TH3by81n1OHWd}nB~)8K4%rty7RG50k+P1cdPK5 zUFoNmYei3)Ll!Gj%KBgl4q|r;b&P1cZJ4fT0EeRs4MH1Qq!GS%8uSkDAu6Vh-BZ2> zjx0olO;+^87XIpMO1o`+a;!h?ZQsCD!@%~!>h}Z251ueGz4p7dSgE%p^8QY0OL}w9 zB=U-pRdIc&^2x}{R0Z??KT_kk&J8bp_kX7f&#heh?P>nAXpfb3rca~&|2_r}C#^~! zHy;Vrw$6c@5L3KVh&ScSZ669@Mc^!5Pd=cmR7c(`Z7HLfwWPu5+)Jzuq?e7%2A<*Z#@Y%ZX7^_TAnY`Vj6!Q*qZYyI8JM~^O)(g#E6j??FcXLpV|fCF8k&a{aQ=Y z@@OYjUqya!te&rr5MD_uoHiTtZ*cnrKasSj-XTzZ?~-HS;mGM~r266Dxj1@>h8j%X zc5+R23+ypY(j_=8h)9aj3^GyZ*|o&eKJ3y&>`eLQA7F5LRdJWj%--sqz4hM=axF)H zZVBzY3P*%(f1>htZs_y6XUZEBBixwi?XKx*)lWQKpI=A{>L@Yt);O)bwLj$1gQ&&5 zf%VD(xvq5=eP`?5DJRoHjgl6gl)g^hCf)OBTYmo(Lfr?BUZ9sONni?gA0%w{QPmZZ zNQte;@cU+6C*b{S_k>(V=jzv}rxZj{_-4I741%r4ikd4Z%Nb3lOy7=gJ$G`Tt~1HE z?MXu&)vBVda7ey;mvV$rs7~RGNxIJFhgp#PezJ2ySCnk|y6YqRAr^v#qx*IaEWF5k zr2S`h_9P)6tvI1v^X*{nRd?+{A^NB*VU!Df{M3=F5<)~?Z}d(p>{K}f=P|Uq8Ox_E za{9;A^Zy#`P;VU%bl#{KdS3Q4bQpi$@StyTckQ_l|FKruO!Q-;djDLAj@U4AHG88r`~t!pP4#*`3AR2d@OfJta{^5*IVm? zh0&&_lMZ=~;pi-)AmvPh&?|ig^0kKcc{SaG??B$K7p;{=-rG?v95-Y>eY!K_i)Ppi zH=zG*a}`<5t6zNEYQz1VPkzL!zrIg}<-M6bdRO;Nt%131sHf}Q+Mt0==AU;T`MvAe z|0Fl<#jCjVq*=Lmnn~C52NS!g57Z849OvyGG3Uqs;g~}I6AxGqv(50C^DiQGL zqls;0EpKYrhjymm|HK5RbxlSNF|fPuEd9N+t8*DQ5%_9Ej>a|mi*6fQ5ZZ2VyysGD zvelS}+y0u3exR-MTwl=73gd#r#~}mgDVy)R)Oy?c;E^GvVQKt|h;rV^oI@3xa<4w$ zzG268dmJ3O5BGwzc6mbCXYyFqSK3#D?rJ$M>fkS)z^JwUq!gs5@ zVb^C%HJdG)07CHeM&3iEZ_cevtl1O6nmSR%Qz2WnDSDjOUj1JUS8`6}n+LTB`Zh_! z-ZUF-Jz~@maU<V)Bk>FFXr$d~ zm%84_vv#zz~S>L{I23!!leHxJry3RDEh6A27I${IVJm6bC#azuF;$( za&*@a85ak2e!hMs^%Iw0VeZsBmuK0}+nn?JaH{3S+u7&3RC=gY?pIG|C1cx_`s_wX&?R%q7XCZg@0d;v`RUlPi#tT+F&3QS$PIPNU+uG~w9TcxF7s7`Eo zuQiAQQ#M;5kM5AU7<^(nHB$&OlCe+CZqIz!;uD3r#aro++0#^)ptq@-Fi69Vx@|v4 zVH+B+p9VG;cX>OH*}Om^@0PJPORiSW9N8UT>+yAF_vc>4u!vV@Qf%L34AmG<@hp~% z`FG=PB7DmKkxlF!d{jD#{)WG$CGUauF)z?sp0UVQ>leYgoxWEev*Z}`CnAD#sfaXB z7A6~jlPErF;{t?zB?Ox=$yTINSWv%3TY`*GmZPAPm^T%!XN%&(_MWg_F=gk7VE2Us z)A~4c@AW^eJG#s^qmyfK4Bc%D1l9ISOEIS`;8B6-0gC3~hk<>G^{)T@3G%wM`S0t! zX$?JW;~vq6#lEnjj(jQn(l`U@f02 z-2K5JHjs44%nt#*un$cFvu!pJR-1|L2cl z(B4b_ClIE;e&g1px8Zv>YU>5;``W*a6B}YKi%XR6F^QX#P|g9D$31EA+orBlibU{r zas+TZP=t06`&x}E9?h=v(yn6qpyFSE2tUbpY}p|=k@>;sV#_N6_zRkS7etb^;GUaq zno?N&CFM=4{Xt$PP&)& z<%fy7h}ioLUJV~BN41`58S#Gm%Du?fHNVv&+SlNBwyfyK5szM@^A1}o-l)fPr<$Ei zmA-;nOQ3;U;8u@-)}MJzrVtnjA}89fv7VG-Vb#L1eW9GKlY%jJgOs*Z^XvkL3CVSP_T_ zrDm%$A*xK|6)8S*(PGY4tsS8b8e301uwX`8=g+B|Il(spg&3$uQCFbD<-kvBPFj8X zZY8KMaYqc=GZSe?W|x&*t0Vm&R*ZUZ>Pr42Aqg zGPiLMP!a=LJ%HRAt7DbldkP@`l(|&$NbAMDD2S~XJwq*~4B{z+xco& z_oi5$nH{<%W3vV%E*MZ2KtdN~o_S^vR4 z>vKsq)A#!xlj8RD8mU8WAA&&)%o(({6&$mqLes{hkH&g@kM=QL(a12|$27&HQX7Am zBGU-Sk?~!jOoRZ%4>?t!Eyz}+XE$bM!)Pp%A`W??+D4W_0XZ-U&@jkXiw6vXYsjbr zM*I24Cu@!m)}CUNJGr*}^|o1Ng20IZ2o2&TLC*mkN)g_M*t#Uafl{c`#vpLswB9MO z-l{aOs30Flfw<9;+kfR}5=vzB#0hG{4hk|IXlSAyjA9^3;_SpSd_gcHdokvLlDmPU zx51c04RzZgmNhR*#?1&M#Vzq3ZBzG_F8-{RNWVakuMmTM7f z4P0wR-PPLI)%{&YZ{m=+IK zr+Ufyf*Oz6P@y|A>ys}9Dn-lC2K{4Ziq#1yS!W`0Xh$d^I|D>!O64wuU@wFe1fie^ z=6>72CimZYu0aQ$O*NZLmfJe#izh0?`}muR&t3e3n{DiV%EUh~YqfE~Pg79VS6dXsfOTR9j(T!Q!$Uy;D2w4C#Hte@{-Z>GK#Fqi zho?}HL5wP)05ily>H*LM5c`58cRkHKtsSv@k8BJbcE}7KmCs|PY~%2W@=e0`kAy}s zo7LXs3WCotPr>5WO5>Qw3smei@dmbkO^yk}UV8>6u-vt>_gp6Oq3^Fg4EF$vfv%=O8)CR;v zDMBbXc(Vb)D#pg_Q$F|AbH9c)T+Z}xaPKx095D)u-68yINBt2ypI-)>vnOo+TSgE* z7p8(JPZMMYh`AubUIQ*s-pdXAk+TZLW-t*Uad1T{aQ^s#n~B5mW^J4K<&?*lJe&KY zhHB<_*4}d_TczyNON4j*{No|qG50B0eLAPPruUV+;4(m7rd{yuv%mH zi+?e$Jmy@Dxmip2uFFWU<}b!r<4+hVZXkWvKtLnjx(6R*hMyt8af=qvqbb9ONOn8O>JIBEEGm(LG_#h2`g%G*LS#2H~Ki;n{hFZKd z#*lDCuJ_^CPNI#Zcl73Ah2{8;Cxx>9(;_l50l2oY>)K^3_WYvU)n{^7DVGNROa@T* z!|2(YJvu$k^v7fR;~Ov7Jpv{_bcVklTfo*jW!5Y-kDVkQ*f9)vVy?SS)x^7AM@sW_ zsB+dt122BFmk4=JNcb0;_{S2ot@Hp9_-p)~=|aUx8vv5XAN=>}nN?6eO>Eq?V~d`g zT;+$!^GrBOYs6DYHA$>G&$cRav(8`CTe9nIx4fMfGg`qCCf+=D&Ld&$l1e9WPT|F*i4?pm~XpM)sjo%=bCsma^*> z2V2xu_I4cHraJ?TH@vD}mr*&D`$y?X>z+9}4Vkdkclj)KlV!ovg_G&hxkl0@@tWM# zMY;9IQ2Q(UTTQv6Pl4_X{wq0%H4SDZ%J!n84v8PS4fcD5&Wp&kThDeq+Q_asu`qM{ z-3-|^|85fC@(2hA(8r`W($DeS>47(gAv}K)A?)tz`>Ew{1L4O za3(}s04tEf`-1@p2#FG-3Z+n46K16Z*C~JnKD`ujt-sm%oONK09sjx~|N70Wng_UY zYYm77rtZ3PGn@u1l%Vebd9QLFJ|uC}1rS$KH|Yv`qYywlkX{{ILpu`wGH1$i>+qS? zN@yrHja?D8Qb|nDmP6>>Cj;_^oJ!9h(}ZNQ;h?6PB&j~%!~VQ>BRaag5bKj#^t~>`*j2` zCL5+o`rG``0c{C_Bf-s5@YY26ZW>BO0yvV;cSPMwRQ$9A7eJD#UDi6c2fN%eLfMhfF=0q!f?cMvYZFJKv>!F)VCH&D2 z3j%`m?86UX3hayud)d5gS*g8b;KH?Qg7+Um`QbGMjG2~+0Wwa~XCI2a0PdnCOnm(a zBg%FZAVif8+b6*-O3;2m5F#Bl1ITxQ5TXQ^_Y6&3JYeD(eFEdS=K(f4d>9XRcojM0Io22U?>pK5ySUN(bH1g3>|e(y?8*>eV_Il zyBnn^d%1=SBLPn}Aj(waOb_O~WU|c+>F4dX<+z=13OrMSJx@Dvf{qBJy__c@bL?~< zJI>lfil#Oqt(P<7y-8V{#MX|?cTN9!XN(I)=1%QR%UzlqdmdhmVq6=NU_S~rUNDg; z{-5IsNS6+n|At@55UJ}=mDW;x+x5{giq`~y@R$0qT zLdB0V#a4Y=gDoTp3C2@!&8A4G9Q-&B}c9`Obl)Yt7G28wPZGCm8qT+2n zZ_9<`i`!l;&qo}vwk^C~q?5x~Kh7 z5Ju0Ay7bq=tW#(ljmm1FWV7qC`x-MQvd>j#s8Fwp+e>ifE_PN%uB&X7i^bn|PUz++ z8WFOB+Cc!SRzzZ9ZVkef!z~}bYOuX9i4|WXH)r!LFD#a*$2t7kNb@A*ju>lF21L_* z>z2j$eCvVLQG4<#0WVuc`*3t13k@yP+g72Ed>l_$ayp^CTIC$+`Eh1&rOa@C$VTF# zinEarUN$73xn%9WUHZ$^!)AbS?%L;DE-#0Z8QwS13YukrFEj+`YRm}fy@|2dpT^_$ zxCuFpS;CYKj^2j26DpUuMj%i%HA?DyKGC~WvV>WFrPY>;Bl6K*jVI4CFN|=Y?v)1V zd@W+*c$2p|Q37C$%1CU@-9``rqiFbbla8T$pk=@(QXc|mzBtG>A+}$6q1%lsoyhn= z=c(X3QbnPV2FgoZxI=rodRX7&D0*0$-4SWR+!%+qCQHfLm3S%vaeU|K`d5tUZxxmD z9_+^t_WDlmEA0bM5TR&&-WsIzQCd-olOpqJj5BSMf3EYuF^gE&s07cOs_(Y#@fW*X zbM16b`d|N9_ig)@{M8dFiUVjUvPZ(kibx3U=pK4UHDlnHVh1s4eY7FFXI39p*EnJb z)6%U*YI7x@%QZSjA*L-+xgf}O4X8G07QDCOvR#`n6HT-=5i*FHwj2ya8Bs`36!G5d zM3%ve*{0%@?Z>MHk#O8VejY)k7;4QjM-iV$_{TFMq#G1-;n0QDy&x5y5g}8mGNkYW%)uf+@2n1 zW7f_|yIPC1{U8QCf3HBW3!q}^NxuC9iaK%{3Om8OWJw?K=$FDZ@6wP2?HT2HaS3s4 z*?~Iaz4Pc>AIjN{A6(ahllX*PF$4z}uKpT;aw%isCdZ-S?cq7b@L1yO@zLd5=jFG# zs207D4I`v%-i1Mg<}IMj>YzR@Wg%9h>m@dSzz3yUCIQqQfmlk4;)P2i_<#}Iu58$` zF47ozr~bH20e$=EB?$iYe0@5_Com;}Hyk}thFu3$sB&ZYY|)8qK?f8{V8jm_RC%Y7 z0JkNhFyA?rb}T6{gVtALw-PO07nZxN??jN zI69L6(vs$#6ENtcqI*hyy!Ym_%1aBo4}UEwa)IrY*6PMRn86MHzN>yxTw+;(!kv&J zY__%Q+Aih;W2pePgIezI(V?g;9nsS*#CL+6y`JR(I=CA5=x20fWzP=#cNC7#N`z)g zkHj};4@ZyUyY~>etp0+x9PaHMzI&g)e80-(*6FlRhxl9iu!hM+ow>m1+sOAnHp{Sj z%aC%93Isv$070HM{nY}Ju|1eDVT~G(0@E0$^Mv!3>W!pyjd6h8*Q6SmLO7=Uq*%i{ zYut9sA>*Oj4$X_t9ntigECsJg=#vLBzc^Fk$iHv4$h_2Svt{1-I;C`j$nkC)65qus zypaq0_7R)B*sk6uhTBdm^xu&Br(c#qr7QN(f3-&q57RkO6gt$ERC0@wrNW8mR3I{Z z?E6M=rUmq>`=oKaiFho_!55|?f9uu|>;~RmX7DURcYc7=<)hRlgO^D&~3lm2YwL z^;H$@O%#Y+JP0ir9J7)L)RAAtA~)Y`VgwlS?=Q?pRm;wwmbW9sx+*7ffR@ufj&@YZ zJ23@$Q~SoHoSXwJ`RbB+*G*9oc412sVHSC6lk~F4ja#Qx>vdAS>aPMvljxASogfxH z>qcE&QUKB49O2avla;$_5fXs(&c7V9+Nin2-~r|mxP_ge=-!~3vSx0no^2-_w+^Tz z#a&FCrUKuR+ag|Tftp=+$(uB7x=x=;=T|9MJ{BLJY`ZmwjiDQv4ge^UfPz@L1n3cb zTQhjbgjApTv*TQ2DOh4J)X&r+0EmA1vZVKRqmlVI73<*=CmP;i!~{=>sQqxkadY-e zTR4Bu4lPynhdI_;Vk_fbAvE6W;12a@oTJamHMP#|{^x_`MX%ZNTeikfheS6)h-90)2bqG{o-3?vgmk<#)*6%V3*;*Q`a1EijI9aobOV#S zbd_fDS@}P#d7wF@8ZvmZ?kPC1b8By;+0ij``5S-aUy6FOUrKTeVU#lc_e2k!J2GB{ zV~gfkE?U|OPDPShH1bMpg^s2bU|W#+ngxhmg%yv~RIdVpYGl+Zs3k`1nY6<7z^r%j z6nel%uo^50u=L&Vg!I7t%n>pdinmKSL(IY!L7Q*4+8%%+i_BU8qi!hB2(bNW>R-!= zS;C=gsol3_#r9<{cO!(>q*67Fh6-9QV&2cMw#G$vogwAIv^|S4F0L4>Z)OV){gdF zFRv*iSu%f1^kkmDvU3R(?|85zZmHx?ubYgIBRU*QAbNyS*pvBvzoUIyNgaqqz=PS5 zxHcrW3Rv{=%&)Q1W#M26XezMR5?;@s>!{N~t;O;zAqPGQ-mm2a#KWW3${Fh{rHysi zzB(S2P_nq6t0}zb=+}=JlnIb!MR5>!0wSY~S308Ic=o1FuTgb%TXsB>ECOpoE1>(~ zA?78nL1!X=cR|7P7DP@!!Cz<2x}CLWCs=?DbTF(R+%)OvL4d^;YtoD))*#fHSvgC@ z&(S1lq5cCfsNz@BU#Tp0g5@(1ygU-J9DnZBBK1Pr(cC}m&2NuVR^?+kh*XDwWX7pG)@JzUDglRBgnHa4E?rdc8{>7y(|eJ32Re2A;o1_A_h$o{#XNaq>VogOdVI@WVHZ2k|~tr zuAGFED7}1&1&z!m-65;YP^tF@HDgvNfp43(WXtK6nOrinMWI=!!GQ>t){6A@&EIU- zg?l!uw>7@rvsr+SRG(0C7>@%Hm%o*Kl|dHmGOL^ZtzUpb&&B(F0a14_rWS;Z9%P09 zf)=1{NImI2Wu864ms=gO7(7=JcpKMAB7w*vvYQx$_kcd2mu>W{sTk-eLPH3kb^_8Z z9>5U!*dB;312UnA;e#`3hoDsPMoETC*UW<()z$qOgD|8RM2Lai-eK0ac?Iiof8PQg zh5^%OLD^TJunua)L!JKv9^V{Z$p+^Sf^w^zBo1YJt+?$pu2*Yp#sx2R!8fl`=F93H zg10kjA!;O6f}gF2IP#B?>dST8*OHixl}OV2fxS=#TPe%Jg%j5!A3xa}mq7cozy6)5 zeaoZ_uWcA(nc+PDl8eP^5=wRzAvG2;ND(UvU78+`wWgH2$72NvNG%4YZa+_x2qMc^ zzsjK927tN*rhZx$Fy(Gq1ra?EPZ`!VXG&Ux%G&lsvMS>RShTE9e3GTMHYA8|vEr8_ zC6N8?bNr1Z#n76XoSvZ!0In1SpyL6hB2W`({NCq~A(`I4wW2!6{J;Bd{8w{wl!cXk z08IvE((A2g7_}OToP*v5O!{6{eo0cfALNfOuhV1Te;&Md+R|3e;b_QzDp9i!UN2q~ ze@kF~iOkt*YAuBl07w`ubhAJ0Jy@@OAU3!~`smzn+d>`g{KXs3JQP$N(In6ekOF#$#D7^|pO9{v#6EqdPUnA=og{X@l@??OmfjJ}xK!gC4(tlqQ zqb7kpN>C5{&AL9%{|PGm%mSDnpaIO*_T?LGS9uJ8%j=i@T?;6cL8#=r8*i(Lb|1?F zq;C9GK7lSBQ=)#z^`n* z+o8gJBhRa8=6v8XRtQjf5J@E1KL?wV`*1Xm+p`O7SxIO}L$c(;XULTUV~zN8XND6Z zQ_k&jxBSneUJol3XX7ZZ*}IpaHULzc3F*8JjU#T4qil}{Uw-q1t$ha+MBq-p@NDg? z+DSJ*u@|jU!0^*F6<8WZ0@NpqW6I#{G^yHg)grb|MTl?i`PU+-oB9-}xBj&J z%H^D^G$sZ^dp&7+0v}3o+2J<=(E*^yjb_m4@cf|+M@wOV0=Zf{?1tm?$hxQPI%r0H zap9*%TGX{WpF)US7QH(JbZB+{S!IU2%lOaAO$!3Le*`FEgB;0FwnSS`vGhYPM^l1U zIbipCv$=*eE-R><6m-|Y7_xuF^?4n#V2x+GlV?G;Xix#m<)8mHXJ6BKXCN5t5rI>! zz$tUkV!bqp%CT@^TS(0@hE>B-w$&oE&(Pi4<>Y-Ae#6*Vv0|^L;N;-7IQ5Jct09!BZ5$~pq$mywGD0Gi;??UVcP3uzLoLol&wfQ zJM|3Y&gL@(w{-TbR$q{`n0lkO_Grm8;tBLA-!RZRp$*DJtZbD2AU9l&-%MbRp5zpYCe-sh2 zN;t9tA?EOpDryDK0Akjfxkm8)zO10qP&vcGUobKg0_VqHE_(MwEkkY)B*!m=n9FdS zQhP8rp$w70+$e#P`{&a@12Sm9EcqA!Yp9HpV;r+6LKp>pi4Xc>bi5?a1sYkWd6yhA zXfW25t#UC7YF4Y^y&5*-IwoW8-YBV6D1a9 zUtnU;FZ-C;aYPtHK|Tmnn#}z8H>=`bfrhR?h>^+4V6L$`$`)YAtc ziW>y|JfysP=DByTZp*Ly1K>|i%bko#P_^oL6Y`m*!+@wQ0;&QJa%o=2fp0-avV0q| z-*~V7_{i_z6mWY(|49UoKB}qiVf^%82`fFl3A!@-`T{p&rQs62#_8?h=PT8}6oa0o zWe^ZlkgX<&t3L>ENIahK(QUXhA(sfo)zrzd1vH3L{{>$d-p((AAP zSg#+Y%dP(-g=$K->M$V+s{phY9S|rq$^RC($tqR9Vai%K8-SFoHA*cJDsZ~2TH^6!keJcr(lY_E_xmzc}> zb@L1=iLv`pPhC7NQ;}?|M9w5$dcXV2z2W;#IbZ+$l~w6R=lp|KumZ{y+!#en)=dOT zLDxkkD_$f4(O5N?GI}+Yi_~zANFwd&xP^uV$gc%`_D0||sf>lib_lB!VdB=4wK;79 zB4=Q}>e8DQe8LB&(rX`*IM5Y*D(6~5&J)K|QKup{NCl?_wl!7IdaiaHn zAX3rxu6ovy6*f}c?jXfy?6&F-;%5k&q(Y^_JW6C9P%K3v@l*0M@`kJY=#m8LBnL%B zVEwufhBNW@9nsNId#jwI+F!*uJxqfgb)2nC4U~88r=#pQmePFZ9U{YP?>bDo=u6-@ z6@YEgdy74u+&364fh%#>2}fnY^q!Hl@ZIkbD$(#<8hg)0)C+&3dusUm>{!tqk(gxy?x>~f6RfP1~e z=F7quhiS^_q}`%NeM5OD;w=verI8+Oz=Oxx8R;B<08)2xu_S=C_4gMhL_MCuI{v?Z zJ2~nuVGWNgni3{(w;ktraA>n{;qBt@}Tmmzpk zQq*JL)2U%WhMXV+jCvAIZ==E{jyO`HobS+%XbJDv*CgzP?$K;J`-{)P*-j5!Li2ni z?2f99lUjRj`v3Od^F8p2!~Lyc?-1(w1)pIGJ87H`z;1o3Rp8!}m*xULkv=|4;k*I! zY!d&D!AiAE0L|oD(mOckC4SE-viiC*8u$3*C~|oeFfoG49SPzpfChDK>u}#kSJkeN z3-pfRKmbmcAo9eh0E(fe^^>GGf+7Rma9^!Oda~1iKEA-?mDxS86%oONA{`ckZX76U zJok-Y^a`O&Lbq9%I8{JJvY{;i(a*J&GxcNPY2;eVgDjqfOeP&6WvdOYaZNn*@g1y2 z0$X}rpS&@babdqVma$n&jZp4`5z5!G=q`3z6%xa1~V_ zz0!1`FumHqVU0x)h~RH_DE$c%0eGPVb&iR=HUw2}Sf!%^T}I_IAaLlUM7u#BZSbTW ziXl>AM&O{le;Lo=bQw(4JyS!VLpEsxFmoxK`T&|gPuAHI5mTkwo*o$Tw))rwCHXAC zps?=%6{<_^eyS=$^M+bkass1ULQlulg`#{3ff|4iq3O*dH_o3pq~;x z&qALaf!u}rHxm|-&i`mNF^dv)oQS7f8k6ptcK(^Nl16qKsNU5Xmg@Y&yh7T#3xXvv zPOzN-4@_4qOuH*Csm~m$x-OB${;#ChtSKK~^HuAkZy_92Ro*!83$TUGBBPBA3JYVC zwq1Uu3_JMMR3lHyAujqti|81T;IH`05x;xzXhCT2xZ^YE<4Zw?1Y;3U9@XMx)0TAx zf$p@x_0vMLF>qe*o$VpCM10w@+5my0)*wM1^_oV9iw(81u}x0!6ajVNJVEk?qq`dzp$5E&NWb$MXnS1xB z2N8PuUTESTFxGnQE6#2a?o-#XS3d%~rK|VeN$%j@`$f0#N>rA3uo!Bqam?py zDj)x#e5_CUT>Xjzi;oGg)f8_~k+`Uet)MqHVLe{A%> zvZlY92ltjICXG3@+;x!s7N8{vE~3~Y2quD8=uE?D0EYB5SsPb*S_kbi{WzQ|mky$c zqgD^tL_Z{YL$aU>G&ZG;XX2uRyIHNIXo7T_KuZ8dB+DNw8}=j|4}PH?7NT(2y9-`Q z^ABuloW9|WQYk=Y(opIQh&Q23p5fg@WXa>W`z8Tj2LL|24Mu@uTgn4gLi~!jaE&RH z5c2hTxOG2F>kC^x4Km=LqD&c4(uPc29!=0-DK1FxK@bvU1W)lUT%qDuBnqpNDT=o` zp=R~@cJ*hF|NisVS%8q9jgSDDb=b#*2bI~+&A75hsu|^`9mUt);if$e7L67;_;KF% zk2x|pTf%&;WDO~30BHn5%V#)gw1vyh;qhRSilpjsNrr~^E$zBmrl`M_wc#J^-4%u0 zRyLwq*+wi2s5unyvp``t#2e=nO&E+>9s#k_JUOnUeRXk<_C(jWm^?~gQ+l*wAxn+8 z^m?!N0}p&ATm!!fpjSbnU__$@@)>_?*SD}UV!0DmaMUDKw0Hhc*)2dtf@<-$?$Wmr zr?~$ML&Fn3dyXhkSVR!?KP=S{LeN|mX~A2jbtS<^4dA2Kr_*F+TXMM%xyHw8!N-ci ziEQgIOTBA(wPZ_=f#Zkm4n$)|Dksv8W3|9G4}s}(NA1!|t`g|SD84dNGfdVHZA3lm zK}CCj3k8ZA-Xuf|b^m|mBV>MU67O^RV#?qsa<$F>@CG|AMs^57-8quc@9o6i+Uk%% zaPTIq@OOXbpIBJLK11eGscr!;m@(yZ)Z+y+h6S`h&ke=BK@-YG6KdK?X)Pn0q-_1V zk&(R;;5!b*Xg}vdi19Sya{b~Sr)$pn*ogJrDo4=8D@uU3GJUJs%BV%ZPl66hk8+b7 z?W@?DY99nk_0BZ6^R=*pIWDjxz0%FTrICva42#(0f=qq5Pw0;ky#;SQG&^zzGWCF^ zyC6f-AfzOiqYDc)X=+Bxj1Ee+e*lsBv!K?%0eDO}7lb8E+jOu}({Xn6}NmW^y$88sbb6aPi5X~T&;{{uDf`K{P)3>)>7&+bP@9#}PyQ*N{o5TgAXXosCJd+=2$H~5a-M;qCG>l2N78qh z1A6OT_!~8uKleeIvXqKkr6T)5`2q>904RwCAk;HwpD!Od+^vAYB?aG}nd24i7LK_YVQ8<&3|k*AnoAJkRmHRi30^3bZsd`i=KHG)sc>PGGAWp#o) zhz^s*NL=yP9Nk6^z}B_>@TMXJ5(E>E5`$QJY=E%pPz=|9A;UuAW3uYgJO@R8%V^p; zvYN%xSQ)jJVFzpBNL&PpE_<_N?8UD0F%oEYD-d>z69z|olLF7QAFfSIQc*A z_8<+_Oq#+jjo7b6BO}!-Rt||4!|&leFHWnBAY4oy5xH0~6-Is5<|#x}8-0E2W74&> zxYJuDKwlLK`5{`ePGcJjS;*|UML8qP(2`Qg6pt<@9H2~F$gg6)IeVuKy&bENR#FUjFBw*@dwT&EQp_X?fxCO!^cv;fOzX|_j;&q2DgP8 z6WS_k!q~v)CgP?liwD_lL7uEB(%;Hb2qG?J6q#$?G7_YOd9^PgZy!Wz>jklWK*=V$Sf2=~D?~kVpej)QU$#vG-VmOL z>ijFw^MD#YkQG|qLb#A2+D*87V&lQl4UMD`RHaB>MQ?R%&6<0`L_~8n6Jp=PFZ3iT z|4rji%6OzbN`bwnT_t0zJs1Jh4gv=gwSN@p@-gOLKhK3QIVnv z2t-7U3W|MF=!j7iQPE9k0wRluiak_O(G7}}WX2&aOx}O&f}7pVeGgATk>J5VP*{ z)TT)NP(n@}6)s~Kjd5*6z1wWs*S_epS7e{iNy9pa)51a6x!+?<_(M-M4qfYfjkc)9EjC$144*ylnMuYac5Z z-sVQS=t1<6c=sjxCWZxtLobvgx89puP(y8!CVv0gYs`}5zvLPe62b}*%B;$a({KHg zYt66s8iTBNiDBn3yU2cOXACgM<7B8Q!=^3>C)+E`KGc1xSm zEGQY~wceu3V!wFls?yw0*L&yv_HQ9^LVau`_-BB-4C6U3+Eh+rW0R5gn&o&tU~s6< z^N^(MxSuDZfBqV%udU>C7cy+bre;}ij~FIG#xk)vBdlzR4>{=@c8$m$3JO%ZKZoJ`yX`%E3sf&qVAcCwcboXP9+8TM zKV|^dGK}>aKtB!V-O2sCZTwNX%3!4T&L#2uzQ86J;WA>uE=r$+EX>{})E*nkmW7_% zCMk&ff33StTA=x9{2)-J6H zS9Kz5EoB5TrDGgqT=QpQ+kjs4aqWc*wbT&J`46DECxbNz0aZ1wJ-_^~yPhFiT&E%3 z6;F-%y@rJ-Fh}1rhwUz`Mpyjr^vwd93$TOjE?a93%fi?L&%DfR9fUon*FUHJw@`Kq9!!yL*)K)0U|)FYZHwT-*%mzvo~rL0`YNB^*L%-@)t;?K z9}HHgU8An#z5V=(eWWQ>kvGwWdud?5w447B3csW8l2Sgco{+Q?bS(n-BFhFMfo7gE z0(3jm8$$QidOLdMl1|1q5@XKE%47yte)ur+&V; z%6d%^@X9Ob`qh3tp&SMTyl(GD3;M-jmVy829R3aY#~c_G=8Q<0der# zt^9~g;RGQPILf6b^s!%7-du2Q;9#se=$zr*5!TCch^#fIk1|VDAjzy0KnY2wKXl`$ z$$)r_D&q6FIv)&t{?boIm+CX4HQ=*o&Rt(TyED@yqft?8f5NVQI zYB}^E{s(`^bfen+RP{fh+$PqyD&-W5OVl4o%Sorq)<*6?@<9#+OE!*Rc2(3LV{<)Pe7t`r&urY z|52g~>%3nY#NHkWul78PNT-~UTKFCqS-}^NGSIu~NYf=trpNRZE+9c(Y?AwMgXfu3 z8b>}LSuA~d?i7}-&NNAA9&Tpn)bi~PnKqt7#aJ)CjN$u?Uyv2gzwnyz*~kBNZKD3$ z@G{2@9~#ROHpEBD0SQx8-+chsuhfLe2@ zkjO&1@SVlEN!lqTr53?7YZdK^IGx;C42`3}hG;-MgmjG|aN+|tP9G2uI+t+CCeaAw z$Tug$VsbP+9DdfhFhdUN?wJm^k8>v_@m?~P751Cv7gWYwonZWTwvW*Q6S+2)ba{1M z7keA!&Jr=nbYh+ueOX=n_CzDM@jqryN~r6W(MH@%%43XrxT8?7h!S?7{Mtc}!!6-O zTheBaR3=+lTXuhi3uxhmL2^&DEg>0f-0P zAH}i(t?Qak$k-(yE9nFHDh@tG+XFtr?q^l;WhcM4gQ=OIi5sVj+F|G28?SSM#byg= zUw941I=`6>N`iwj7$~eVTRv0-R22go&q+^ZBjEp%5r9EQ|7^|_n_{i#tr?N-rF!KF zJ-tMeaYTQ|cv)t5&dHl|raR&el-s6&q~*yO_VOaEn0#pSz0cCfw&aMrYu8oSO=Qe7 z?$e7l>GVpxHQ?n~g_2%(z+P7a@t#H>YGy|`CdBLgot=NC+D}Qc_vcAv0M;^3K$Fso zT=L>64lnwcJ^Vkb8*Opc+s`YN6Kg?N3oC^TW^O$ZoZFw!FEyu*EgJTy8aUfF-cu;FBj+j-K{Pq4{iD+7C1V{T zM{x=r1RP3Qg0WKcUjKML&M8})Zno5M8&R25)`q!UK!CQHT4)1tMu&a6@2&7mz-{A4 zKg#3D3A-lnf47GUUvH--jXyZKXXk$UveQrFY`FC4MUei%*({r+UdBaRxj~~s&ui~M z#L{T>R*QhkFcO`{2AFg!LjhfSKX^rB)|8DLz9{@Este76R{Buo#i2z1Ap)f^0Z=U1Zo@V&TqNhqnh zXPM)V6XjdvZ_Hw*Wqro9DVtdFrk;Hg2EIFce@nM=2llaONzR+GtmyO|Ossn<}&Ox9-)*CSACM6d=s}*m|tv`=A=Qv`J5OLuUO9CGDK`vd-H? z3DZh>i;3QCXOuJ~zK-`3N+J5K^+u=fw71*y8P+QPI6Y=&P8%P%b>ih+Sa#ee<1a$rQpiis4MB7tGwE(&! zYz3yV=v+2GpmGW|K?v8SjAQ$ZYXM-!=4q}JjCvWPWpGMJkjbF$Px9g&FSUn5JWF6) zBVcf}CL6JFX|ImT0o&G(j{(gZk)nCmVQ2JH=<6c&rWJ+L#XqOa;sl0Oy{v=kla)%( z{anLJOT*>duSs0P$Svjx;xApCk>uYuT|mhBr&lvRBNvGGB>bLQv_9@;ntXA(&*y*l zY^3d*+#0Um^qzgC_IKUiGz6&mibP|1w3^h8qlSy=^aISzmPUn}15+oc9RR6E%DAe+ zWGgTmV8T2=pGP{h?bmOLd0gcXh{i8Gk>KqJ6E3UpwZMP1O5;2PpxJsici|U+bd%Uc zu4o%t2L8$wng9UQvSycqUvn55GX=5p18TK)3>E1zDNq4pm@xj#OMJAzq)~Oe?K9MM z0E$sTG01VOm_@8-<%@`$CV~^dP4C~DRSHOl`JVeGSj&~3e}Gf6P>cy5 zNJG{TrJy6=>ZZU1D4+lslX4d4fdY04Oa@=FL8TKnrV~>HIii>jLgZou(-prv)ROT6 zCcV%Q_H!o?H^UYx=}4!(qd?zBU{F5BirKPif4676fCO$PBbG5Fnau0MKSQ&3dxSmp zeA`6HOIMqM`>%cX{*4`BR8{`2PN-`7(fR!aV}~NAXMs(?UJYpqrG{HB4G|k{78|L> zMulSLJ%wQkXix(EL#O9u-D} z0vX6-AsV(;L7Gt{7btXFmQwUdi=xFQ&0Koh`Q-;5LL5!RdUc2wfHQk+_Cy-=v)I$q zz;JvSY5cH08(g&bOZ1y%hW7-9cH%E>Flm@-6vUkzdk_EqwF$qRxLLcLK&{qI;tH2q z4|-Wo5UcJbIkr%2j~itEm0$SRR{{zX4(5>B2Whh!W|fAFaE-Qt=1-M7HcgQ4sXV&> zUaWaXA&d29HMEa>ipSdA0zkLcUjM3Q9@B!=C2JrP&EKo?vXa-3sx}x4`I*jmfuah7 zDg@L9@B&}&Iz*pUkW2)6`3jmAd~+4hMOBnz0(v)}u3llP;!-2IW<-ZJYNuY0!uVJ( z1*&kk2%j=hGP}j*GaB9E7!%^f|DgrX21cPr>p$_(PlsMdF5zn8xSDe10()`#lZWZA z3k?0l26EP_l>&pR{QPAP$TCjBFYj;pPLaX~N)?VsUS4mdU*NiikOnuz!Pcfeu_yXg zDd3C;`CMLkQ`>9Zzr5lqE|Q>8PNM;_PDNw)5rv!WN0k<8&}bpJC&kjJ6D41V88kJ2 zMzgo

EU474i}5RSe~BnWOd74A8XrPptS z?ASwYZY*gs`;=!xkkNY5QS>&?|7@lnWL131R2LZf77sn}GPY|9UD=AGRvJ%K!gTG- z2>)85H=Jcm5mZTtwzM=exxp#WCB8>9^mja7)1+G|#F-N(yLC4fO z8G2dVq)*vIL&O!G>Y=#A+btAtx<6e*1{oRZmpRc$t5`xQ}mCtsWS$Ht~mM(uYpfyI_sDNkvpfz+gpFXFC zsQcoXTi5kH&rs?7B~Cz_w(!lPBG^L1ZmQC?E;NcuAUEa1%qHCuegMD6KSMCVE*tTrPnpmCpaA4pFA-~vglyAx!l1l9a7_z(; z?LLzmk{)j4qT>fzP$gJD|K0r;0W36w^J%NY@+wuNR&_34QEwF34%|V-pS0}f&;xlg zw;GrGDcnrMas_0~s>Ay+^l7_-Hd?#n2fs1I(aI&a=3c8>e)$J(`1w50Y`9%^h9Fef$?XD^xNllLlmSgpKFvq2bbEZl%)Y0k)q`FVArN28W4C`Sb~IsO=Xw*ma1 zJR1fCiH8hgcEXt)Jb*3Mr~sBoy0BHUE3GAf#T@VVpMgP)5T+z409}W1a$pMLbezvJ zHmt*DB4NWgH($e<8Hg7&W>G1|MrmGCS`<01r6r_G)jF_M5JPMF5;y?+t*_p zeN;Lfo9pqChoG4@#v<20ak&k~mBUSvj{1%&h=$M5W$mIYqxKR){#}4RB?JbehXR}< zORm=DUfah6(oY>vj$M2G+TAnjck}Pi1hJ1u_9PfnBFsYl3*^xeD04w&qid36QIwS> zy(?Q~_e&%-c6d!PhD)s`)HjR`6X=76er}Z_P!~?11xln?*O^?s8l}gSZFU z!DxUN;P$%u{jy!p49hD5-1}*o<@KMvhNXjzF$0IgI^uCoFoi#V(he9-QzeyKuh#io zo7a_mW}c_aqTD1fhABZN(+&!p+xXw2|4XyT@?x_aTpj;OxZ+b)AX z>|W~@K!W0e`X!ycrWx|0q@V_hu`s}+jN|5eLIlW#ST;kM75SAz*dpn*el3Bl4C5(> zJF_i?D(EHh(BjF>NWa7~lIJk>{h=Ip1?NzmZ-Y_bl08j}?ypTLS~s%XdF@fM^S@E>fF^woJ52*ArBG)}4GZ_c%yL81&ue~6KHy98{@{*+a*ifA8-Bot@sld&X zq~vz6_%hz*O}2R&HjIkDkq!R!9T&<2Is$`k>`2Mr?JlJ1%0;Mmw*rvuwGV%Aa^->W zAL&EF1NHOsJ_T<4px_oou2_C$W7+4tISqcd8e2t$+|?kF=|QYS=}|puhE2~d*g2`% zl={ewaz2`(LT?e{E`qp0$u2vPm<$Aqg@lhIQt1p%dWD|dzCd<`Fuz2{#uLZWcy8k% zAWqCy7`IJ>^(v2aQG&>$%0|9kBiL#$v#5Wvd%Y_5>FA+X^_D-F-fKXo*DcOzF0QZ; zcOSr^8Z5z?@Y%>fv>YVLwP1b3+N4n&B7!}An{A9ow{Xy5RKhl8tf>Mxwo?B5iSI$S z89mw-&lN72Ub8rIM__h;skBXWWm#_&bx)wjWU^3W4_ zi4oQe;((EFOe2;v4IWc#P+^gN-geZbo7lcQ7}Eq387cU-h@3(y!OhZG3KJbTdKs4M zEIWy7VElSL+=~Th^BGXT9dwak?i(V{DSRkG{%J=C+IX)kIrOaZP-;Arn^x@CX}OZ` z{llgh6DYQ4gYHU@Au>QIBMLY^x7P*kOvj#7kjllV8@sTqmB+L}K?r1KKtu?V;wo39 zm23YGW(kGL3%3c=2B{lHgyauYQDYLhJ$NJ2tJ?5sRemM;J-c*p2w6kQ4LiHCG(u@V z)?q{bpOYR{Q+Bmeb{0VVi41!}o>SEq|g%JPP3jdV3PNu+YpTUQ!;{$p4BVwJsYTk`?czHWc%)^HPxDc_otzc_3zgWGP^DE#B(oP6*2cGO!Xvu6L(GhlMz@8p*kVdax^9;d5IL@c}_1lJgT`z0|AG zVpJFvSK=O=H38Tv(7QCG7I~~(jAg=cP4corfN&QV{7Af3TT+%6W-R(go(D2T!j08o z1=U}~A^+1*EQ3pnUst!jZMq^a-L1r~RHG|`(0^{5b)_Fk*b(}I7Ol%TIZ+bN5^mxT zfF9z7!3xl{9onmQa8pBRRNPW|WH8UIZ5wt!8~2xjx0H%mD#0w}okA$C2umnkRIy3| zH6lqYF*$1n3;=>VCAD%G?IK238La*_f9vrtt&8#gKbYV)yFKYp*j_t-Z&&fp*8o)w zxB}1>1>XFcH9><0cs_lZ076a2Q^DRh0H9udKil<@7<*$uL<@)?Moxss%K|0DrkTB_ zT;nDM$=8^W1``Lx;Zd%^Tjc%=8O*CDe>prI@HP7HK%z5Z57aD%Igv2 zyd5g5!lc8vO!<~*7+(;GpD#=aVB?D0lkL@@qsX=8;Zf*{SDocn>YlpS8D7Ev9NK!G zNts#YRlgp;!U?nl^w`)vBJ6Gv?(bHBZ4BZShIShR?W;0fh=d6t@kTD#-vAV{bsxdW z$#P=vB}6qsCyTMRueD5g89~Inx3(kFB)im?#Z0E7+FF(kIoz)M9&INrT#ecvb zDA=tCbjT z!1dIhI0NpoUF?>m6v$hB!TBNNA@6cZMhElo*+P5(wSxZnN{>E%u-|gmv7@hN9ltbo zn1_ht{6g=W3LG0{xZ7pr1Fsh z{onJ|H?M57%8i&5L$;c|PnYXXDoB*J$PxwVOG9UY?A&`{=l{GrSvA!w_cR^8-ehHW zRn5`8A|@P`6Lu?b$3iiiVGM5!9Vu$&!>jStfxF9^n}2&yKy&3+>Pi*Lhl&fG0ktMP zv|iN84j!>yoB$?3+vx;b1-R%6$P}WC<(@6l*fk}fu^4>#8Q&?PSdUf$7B$pHXEO$t#A4FPy)AIjxXPz#XLBGesbA7hG8UPR*B7~&%_$_*ggS5|!# zsf_liOnlD$k66l{?kqmtBsaNt{TStensiiNx z=c!lxImui&9XdIT3RExi6JJL<53g%~ZFF{YQd1oP0DGp-?uL!L1mZ3}Mtjdr|R-Lc=uDbmmBzY*p;6A&dt zZ8?t$NiY~B4&(;A+E}0n3Y0g3BvpWY%P%KW2?!~8QjE1$gm{u4J9B#D-UDi?hivXu+Fra)-g;N_>M7wAx#4* zRJ|Yk>{{@4f8nh@BQ+Qo5AA5j9nex^%@N>fG6=rLdagtVN(`14dPi1U%)HJz=_}ql z4bbo3wiY^V`ebQfywy--pvS@d?{_~zm`D}^v(*4fge?&gB<=VDHHu3-vp`5d6l_-o zdH}%L3JLY|&))c9lks@9tpdL!B<>EJT=B~@Iu&cSZqy669d)N))$GkRZ!wWRt3?klq?C5oWe!IrW?uiQ(ZO%rA zXgKu5LDO-hCJwb~OHYV@25^?b$`ELVg2hZk&czt`~uq4tO2jR(I1?F#o6o0@MJL4g9^o@+J|q zrs@`}QH~nyNeOQK-atpOn~kL2L5(`rhHiIJ76+hgm!pc45Xi)p#%(BxeC@g2A22|4#M#nLW2hK6J8ml;?n>EBX3t`REkCUgVkdo zZw4y?toN!N$#04dN|x8$M;lXDTYR`tchVwA%RIB@_N5ygd-Fl`?T_d6c@VGCNeJw8 z0YJ7E_;A(|u|U_H#Xnp}kOKpB;PNg8s9fV?<2q0@V6!SH=zq0f&3=^Mek^3Zd8B=2 zZ)ff#OrE5EqJ^KMn@>$GJy$UNve4v}`52iKd-pr91aN5XvAF8naRj5lI1thRhx4sN>geZgGClm0VR+LAD#egRc@JMb%C5@W$y=?+7Lu`-^KDv^NURn`y~xdZw2`$PQqKE9_t=C)z5KB7E&#j_P>hc0Y6#M~L@ zp7!QWt@V}S8n4SA##*;Oo_=fXy%gTUhhQG7HLjA|;;9 z!*5*V1MqMo!J=8$1YT9(N%Oz^FeSV#;l zDRo^=5y{Y9nYVo7(1|5mA_eY-DpBuy=hJ_k5UTl^IQluaULGHX&4qL_PI~UgU{FxT zoLx3izj1lUvQ-kcQigDRoJ>pXPD&Z=12)(+0hN#We-xd2Jd^$Z$FFNA8?(&}8^gx9 z5t?Hov^mc4og<^Em2 z|M%D)*W+{Tb6ua;`}KOhd`TrJ4bPN@t2^(XH799QtS!(_POntgNv=L>)k{39ujQTC z6gumz6S^F5ENdl({#*Z*!QHd9*WcamJ#_1xDvRL$j$|!Y7PoVup9EFWU!>tRD zAlR8uGYv0FsN_#nFfq8MJwQWoOZcLz@8feJjlMYji#OG*On{LO*nBS9=!*bRxQ=1j zM~j)2C_yx;3t5#CkXoqEHxe*JNR(93QcnmHpwJ8fLwZ>39;+3Ba5-HX*6QMgbgjK= zRuewsW!9G#-s`++`_*9IqX{=YY&e=EyH_&NK668Ad-Gk_w!22>D5WJz!s#`a^Y=uK z@K>y=1=H;P|E@Q|Suc;DmS}n`#RYEYqN7VpoEeS8w^gC$<=wG&?x6vBa{1+gM`#eeQJ;>zFEDJJgrvLkX=P7siKWp3Pdpal&|J zHN5Y9yBd$50!$Sr@1~-aGZPtyF`9s0W6O~KW;OhkIaBzm@8mRoB$X*f{MMW(Pw^*~ zq>MCM)(9DTDUqgVjq(9_NGX?^whGs;Rsu$KG6+Peu+(uJBHM|#`GXbDrBa5?jw72# z1nS&xEwoRPi8ztnwB{nIsYz(Dj%Oozb-Pr`eq=eoT&Kdr$Tz3p4QpIN0tI?Nh7Kg8eI@hmQ%QNZHIYFt?bE7&Ala!|ZZvUi_V zm{t)NYSkZApmnEZC98$8#K6d2as;Rz?}N~wEpgR^s+Gbpt_fKr^kg1CO#iXWBjiO& z@C?3V$kj({=XNTZZuA&7;~s`$KZl$q*@I#F-l=4P2o+NSP#2k`kfZla;)Eo0JCo)& z3~1NN@W#vPyo6OG6jh+q_{g##4J)+9sYPoCXHnUZw*%|5noq>&v2uabwB!0ie$LK< zN|{u%2iKGJ0v1cH`?Pk%miV`>^4py?eaLgojUKs@9Uf6i5uCYUDHgFV+!lA4Z`J}K#Uyg!F`Nr7u>2NZZrUSWi_30ymeu`CseUJo>op-6A538*A& z%oZoI=sk-G^)RbG<62a5>Y=rZ;Yx#l?-|77v0wJPuc&!*nHo`ijRYBB*k$7*;uJVc z2=A}0_QV82HI6D7ge~HVDXf>Mzw&t(PBx+X!V0qC_HWZjE}^*uWyHDb;x?v3$`g1S zw_o3Q=VPu`_0l$>B`il)U!m5>`)o#XubX{ISuH>r8jP^f`5;f*Prdtae#P@q7hm-= zoA9J#O*qyfmnhA|IJI4hetKb-{u7zN$$W!m#5_-=~(=zjq?pNc&BZx^Ix)*JO6x_rL`rIuDvkWxnat}FKa z8_Q2P(yV;VHMcJzEl1{Ftm)L%+fmDW!I#|9IEEYtU{Vx#){WghY_oIUBjCUuZ? z*Hos6jgXZh^@oPVMdDnGhcM)>rAn4FxM^Nvw-zOaQ zIN}C1ZFE+KI8s%|@GQlhMKhE)aQNyk9xclDt_fT{h9`~C%9nVp4;UD8*szjlA)!UQ zDs=W<$?MkH|IN^%G}e0p)GC_mIqyS{3mCW9T*vV?$4*@st|Ztw)drEoatUJ`dcIbk zRpwL~#hB@Oapp=o$?+1BrhvhGoXhE{23Ud2lOS+@t>p|zu2~@|QJ7fP5b3~znaGUB z?q>dvglo4-*4i1=M|RFMKv$eu`y>}a(**!GkcH4Bl?eKep9mr(AZB$Ap)yRiG(}0% zf0!ns2+i8~xhBwNfrp`Fl~7Iyf$i1>CPA6TnoV5J4goLJ$-??XmlRlb@fYk*tg4p+ zJTnVq`hy0%I=XRPO$x>~Iup)_GSoIc5)nI$-WiwbKiRtRD$OWe?Q%FwXTRKX_56-~ z+YBy^o^YEEyk&BFK6TYgjH6=DWNPNWw?}1|2T7e}PM4y#uiEt7ZSK2T-SC0zjqznW zBl>9N!>F!QRj~1b^YN#s-zFyZVDhTB%)DM;9uhCmn(IUv)(Ze$!PUxG68($K0}+N+ zH(T0^)x+e5LT3ip(_V}Ws}N(b5OC`^1HfoMS#Mr}!c~Aqd5~z`;PH&%z);H^q3E!x z!Ei3+u1hru!g&LrDTjw+bCI}q8OCY=2&J%h9=3Kx{qxtK_La>d^O=4`a)mSuo-GJK zdJ3ou7p}9A9xtmRIiTt`8ELSj0&lSh?ww z5VtmgdXXaC7?EC{NVim}lLP8ifOlSs=n2mHLTBcb< z-pd8eV$*mr2e>-1Q9YqarUMtvDfwdD6A^>Tk@{TuW<3DCBz85u2IP<4nFZE zT;K~5@|4Ux5tE7_u)&N&@Gy{Q_!UQA|;I4!Se30LEZe6C-3Gz~_ zD@HMJRpZsV3=e<=c%T5dR@_7C1(Iy=!d{f1H^>g_-V2yjK(9wZw+fI0NnuKuss!Z| ziQ+F@R%ZhSSk(U$J(nN^144C)5-bYVDKtD~l68D^@)Gjj#y*+=(Kw7-5l*$m`B#zx zDqoeLHp$Ui7NXnMBFfwYGSxSp(x2e1&{c>GI6-YaVEawbZU9W7OHv$>cCA_lN>qPb zqbt>LkI5}Y4oIborwkAj>O&?;#3T_%=?EO>?#>x#O8YLQ=&oKXPe%b!6l>h$?sFg% zKhH?K*OLkzej6|#jcD;|1z{R0aU9Ygm%Jf25wK0K`2(865DSv}Ln629@JI#FtP@c= z2yPLIzl4;quh+?NUK8bBk9*vkdN?R^^8us|ws&URIwS))K!u7_kOmdd=qlrI3(VvY zjF&}pk@$Hi?hCOyN$RCIPi+;MvPE^_D>w~!m@x;u&en|2D&z|=ehk+WB;9dETQ)~> zz4F%66pS`Wv^5B%@I_h+fiyGUxaGb$H$}YT;8};kzFtvJuPC_}yxTN*KmCk9*4Z3k zn9+M=M}krhdPs>3e(I=yl4%xJ1$`|@ay$bB+JixfE2*03uW0V+9X4$r5w^xNY+50 zPA1aP8tt-D7S4GC_!LR4i;(LU-~&BeDUbA|qPW9Q09N{{Q33-9H6z7okk5;rEI=If4()tstT(932jk}-CoDco=oM*GN0<*q zz3p1pGuP6mwC*(`f&uXkirDgryH*8I@@B;5P5Hr>BW16~Z4-<3od+2Tq~EyYw?$xf z0#a{ea|^ymV`mC|~DcsX?>%+uO6-Cl5-x3Nk^~f+YUL370v94b1GF zLwKqr?&HXHa?zRvnA|G9X)S$uPwdG+hw%$`a1Ia^VjflMpI7L=01>(%Cx#JcWRlNC zXmmlAt%cVa1)Ewy4S6tr?!txbP;6+;q&BdwbfpK$sF*z5O}gH;)J+!?h16)aG|XQT3MCy_v(T)no43Wd^ zX5z9UCv@Dk4r$qViQdk$H}uewMIP)MKNwi;!-8c3X$dmJqPSG)raTnv|I^?jj^$#) zNYr)uK+!FP4czs#QW5@20X;cL5HgQzoa~vrd9O~Uig%uI$$^l0kajJ?U;+Bx8sOz5 zcCSNdz^KCUmw!DhSVa}D?LxJ*0=xyW7Z&Ym0ry5}x6)5XEP99Hso(&eP_}rOz5F0T!NeEK9`NreNQMcv{ic0?8d}dczqgDcPRjae&FzB|y z&wciF=t$({C~w}=kLZ-7%l=S^i`beiwIHE3lhm@t(f(ULZNt9%qYmB$5KTkSX-$^t z@FXkpz4|a<+PcNHPHfE)JD>LyRZ85cHD*-CDi104gZOgqisZv4&gNp_@h%m<@NIm# z*D)@%sS-l-0X-Ey@PkHQB+^nYcAb0kJW6um`5iDb+0p`XA{B#Fz-CyysxJPdw9BIv zWu+_hazktbz0w329}e!c7jRkvXshg-7L2B&rK>VPJPG`3e*3E}DRHcz{q73r#9Wbw zugL5x0MlSLyoSZzJ`f->@09w7qC0^VeyMzZe zTjZR&1LuPZsH#%aE(kvdy;&aBmxmOTg0u<=Pgu^wO4rRTs(vCpgyk=yu9;Oxw&TKPnwF(B2=D_||>mct2sI8`PMQ?##?n#u6Xm z_~7ckkEK4|A49x9nj3Hi;UAT_3kqU}(Vod_Te&~{YZJoqp1Wo)UL}+h>%b!E0ROl5 z3-j>LJvuxEQRmhd&AVxgLuN~ekGI7{q0hC>q7y~iIuS>9TpNq>FT%L76?=U|1IePjhIxfX@pfytsIk2VZ9i(KfYWE`*Ls4w zt{_I9Ti-e!Hgi6*xO1=fUGlAvn3%;P#_&$84W06x6gV#8P6PEdX4I{@c$7g}VS{&NSYElAnh zwa+1fa9xZUu-8;L%64f+iV>wGeG6gL{NPcVsJibS_e_MN->#7+EfbVlNu711Oy=^{JVblVra`x? zuigi>IngAupZFu^H(j$0J~lI;)eyA%-L2U80`(dOFQq;6^R!P1#e`E-!HDgCj?>n6 zi+saotT%4wz-Mw=$&4#~mGg|G;k zIop2p3Q|+oC{ix)^g?L{s~7zIgHI^MYWc29S;g+&j3rPU$H!G`7XBuY9l4Bp@bljU z&Ju3s?jaIR6P1$^H{)7MrQYuf9r@=GgYTCXnLWCehxogP8OaWRH+X$%<-^bui*GAI zry{=B*o9Wu{n|`j48?hi>a)6W`}pKwhkk)4&ZKxwLE6Ve>9@kq={Z z=JI_j*mT%zaoXD@!1h_>&H+kzLS-MC$~8ZVVW)6slj~#UG|cHlp%UGE?<|B-r}F&t zHqT$`J*wr#;DOtHWG{X1fOnyW?&bqV*?81%T;wW_AKxs!31-GQFM&l&iFbDoG8wi- z{&8}M?PZqN^jk#+4Tj6%lJ-`YI8?d4#^VplM#N}j{fgY%tYQxC|HV<>p2A^I%9Xf^ zzAT*6F#ihv>R{wSf74=C_XyQ?Dzism+rg#~-jwq{cKI|L#5r7JnH94RyCK#`sxe^VRHj=J7nUby>okpY8$7l!xnvnLrB8}>>XqPwDf}zF$i?H;mH*o1Igm`x1TuPl!R2t} zm^I#NYg^@F6p8~WDe1R(1;%Wls^7$l&QxP$^$L8=4ldm_7~Qr_;A`nONAfd1I0ztF zXBPC8FIFyD3G&q!g3q9nQQ4mKGb`Q>@LVW-E#c~uxNd$YrE^9?s{3Ci@}CpeGJCpI zvlSn^_^;yB!-EaG8*k^%#TJx0e$O;jtdiD_MToF@&m;qRkIo|8h*1-x+l@aIZW&r7<@N&V^89moc$7wgPHb zqDZ_-rRJ~WC@~V??r@%V^m7TaB?15_{;g@+3EBZ45)1&X%C*)Yv7|_9U*CZJujwvG zeS6%`gj{@shaeB}`{m2=qEal&wCFx|qB={aZEZp`@9ME^iY?Dw5EyyH9B|9)D$Vu& z7+qIHD(Gcn?3SRFVwVAUFNDMB)$faczooPZiNysvzR?OTzHcE^aBUyJP6GXHrH=MVIet4Kxe3I;AK5}}nOmwNP0VI4T#M$?&A zUb05wDlZ!5si1frr(08>>&cqlCG}fUlI;}`BIfNGKlpfMW5LU34~xICgh#McVJ3Z6 zvMIvzX!!sVWx^;-Y89?wa|#SLSU@Q2r|STAME;fnE%>y*10m-E`tR5Lb=wNU3TxDV z?L_&oUK4)nl9(NYn{_N zcQA$OAi7B13}T~j;=h2F{+GoC)+X+O(SM(26`2+3=^RWEUs&PNIL#120&@JE(4g|0&k;id8Ekt;St-~2dH-tPREe}CbA0&tYPs; z=Ue=}CFKs(aNFeanb6xmG}pYh>e%!h@BSZ8A=}U#f|WAl>f>byQWNCSwCn0;+1(>= z!CAY5*9SjyzHWOq)s?0r$u1g3*6%xB=4XLL7(N zNOol-8M4sA^=7IIfB7p`>j4165lJAxvG+e270!SOlCZ=xu+5#>oZa;DEsv6q$~##G zkhqfkCo+4oA~(aqbQQhcz+#D#19mug9mq-9WUyv6H&# z+JuvUr~c@}GeQDMuGgwUQ-@EiU<*lK>wdGoX`yLd8=NWSa&eTc*n2p~VG&UQU>p_@ z)or0=o^Gakd)VTA{}~i>=Cia4VDl&fED39kV-1bppqFw>I4Lqj{ac7`eW}o4ITqtT zFk@0Kmxd4X_?|Z|U-{Hsxy}MfsBJ}ICO4}aVxz!*%aGB+PF-9_tfuC8IT57?Y6D6X zVF36WK)w0v($XbtCKXlQ49QTm_SULQ6sOX|PD5gjso{6h&bmJ9&#%6o z{5PrGhWNydkYe2vJ-C!-z6|~28kBf_yQ1XOf47j(*;{W4Lcpl@|M^}K{=qMQ`Y@eR zA%Yumb!7b9dn=g-MB0=oRPe&0ReNYrjN55aMv5nc(EhF6%Y?M~Os*BS&dS1z)%Aj0 zA!f38`1;>9*&s{iX(l(Z`0U1)TAhf_m>Kpuw$> z`D(2kpChDO&_)=^oXL%Z>2?DX{N}z>z@v(YqkqII@biz`w<|CyTwFDPse|eUVMF^u zy=_hpQ`PNA5$jTww0Y6QuZ`N{evHy!%-L=&Hjl+YKK)Qx+|*6BMp)97TGriAszPg- z$5;T)dUqSsmFl;v)Iw{KSP0Q8vhM=_q(HdHZbO;Q%lS?3at%;TT*pXp;x`_x8ba{B zjB~tM^`2(lTxXXTF0s;sS_fQb!?G6pHGYO|{xRaq)|}q1doE(b{KGA*)~C8JB<^F9 zb(0dVI?IF7#50=V6~=yYkJO3P64cy)?Lf`otdvf9qRFaME~;g;Irv0(=ypb%68v zyF28NYkHvxsmLVrv>VuMS&DSxBQYpaR>!2xdDH11Z>IL?c4@YpZolhn$JB7qAFhCO zJUty+d)zXobj1yxWR~X^^CWeazH5xzIE~3V@?Oq+y`JTpQ4rYrSpQqY$K|3!vOW^4 z^wB^l);jughk{HI@*U$)^}u~ME^D8H)-JY^_b{koOsO{}hl_CxOiBmv)`7!O5NmsX zV$dM7D~@=!@Unp#U5Et2$Ev@b@?AWv_fzKAi9-XSWr07?95RCI+j}eNcCfcQ(z*>{Tz}*~^a$-uK7&hks_0&X#JY}w*;S?Ya(t5p2JdI~ ztI@&zCqRo{US6ePlF+xh8&}_P@7R3lBMt!8aUZ{}5B3Km z5&SKB-K{>4WCiL8Cw{nqrnZ4eF&Oc(QT~2+!OTQuf``jHYUda3Et6k z6r$jSa;Oj7nc-WV?OWgRyUN921zWDZd~9E}*a0_hnNn%P>M`SZ5>b%&-S0>MBB(@! zNXYIOguuddKQFiEerEHSYP+%obkf?!*NqlsXYNEMTtCfaGtUk9;wrc&T1~7nTMrEN ztXp}_)n92U`Hp<_9aeegAAL`MH2W^`d)meQF-AdX7$K~a5^-rVww!UUp$r{`ddt5( z8#Vlxg!=ta5u~1y+}x~x(XJ7l{vN+qY5W#pn*cI}O7b9Zua29Pj!81& z(jI(LSu81;A8j0IO|SPPRAY<;NX$|{vBfN_OLbb@g>4DN5s2KP4K7KL!x- zV#|7oYp>+Me%es+DG`|G1njVQTd=v!rwi%38K@_oQPzY6SwBw9frxl4&}1(Ga&ONi-DtoYkx zW-H5EZ1bI`U4K7&I&S#f_UR%ERqxROWwgZsX5cx>E_QqZ~>DKr?Vz_SdT>LjkMJ!!jBxV07~Ev>@=7xmA59&XH9SLYed5tk+* zm@J!m_pSPMEwbG{a1!bY)UOd_HcqN%{}vjFg; zsweN|ysqEriY*_>X$a|8*N<=9X2U*JwVmy@#tlNo_1%sYNRI=bsef90y>aQ$g-eSc zkp^n#A59`GTL;3u6O1N4`+(qVYvo2cv=sL2(j8!9dV!|UQFB>iV=wdpkALtPx^SCP zJr)ssJHiD9PL3-K=ptPG@Bcz))MQ66y(@7nDj0e{!N%8fs9Juk9a2YK6`X+=jT>s9 z08k2m$^|J3pz9E-U1_{5vgE?t9HQ>rE2>DL0klitDyi5I=GJ( zH8t#}8ri^^l8mw^xtl*FZZ@7AKn!T<-v4{%b^5|(mcYxgLgGT*MK#?)d(d`%WhZ=9 z2^x+GUnN|m<2u;@ODGQKc$&^AkN?=fyWXpL3n-jx7~9an7*+VH@qDlDB6f6A#<@9X zcWJVeM!z1Lw|i#%_qcEQu`^W+E7JyaFd+xh5{eL=3HzLsWB_5=K#hPveF?4X(<0486oG8vNrCu&9 zXWG9zA4PX2{Qf=R+~;EJ0!tynJhL3g8%iF1ZZm~xR_wH5Fc!zk1r-*``(;kg%bjS%Tf5gU`l zTFano2hz0(2_uQ?QZPX!#qhST7NK<5`@=zsbY_1;27!!QJWEx#5jz*;D_N>;ms?&d zIzYFXG`H;ANR+K2${FechvRI5uj!N1Zh3a~10tz`{c!F7GEm%)!uT#n zmBWDtJ)ie46@9cG#yBQE%P$Vxb$%Vv!5ST&IdS|g7}&(SmiIuAfaPm{)5z&dQ{xc@ zyhAjQP?m3xNc7+kUC zT(?7Hk7Ljuj6pzmT!Uwku3kVoO1f7G7-93akMS*gQ4>bZ`I`XhPogjmdZZrP@2Y@@=b0Ue8n8Ev8wm z8?sz0t?n;^ERu8K>JXRAJeMXf*WsSHQ=i*MT`wH&I{!)H$d_D=5tpnHXP6=M+r3F0 zz0^95vOj$u0EyfA6^e0A#YALl*c zHUNVNl2_^9Mm`A3^kl}&-#1r8#uj}r1V0OjAonWh14>Gek6Py6ia>7OMcAewamFJI zBi1qpLG8RRhLkTaFu4kGuCiO>GxX8@CxeLk$?CYs{ICfL-!QiZLh`lu2@hgNoHNv zxveT?&jErios*J<390wkG2Ib0iYc#g-9Kj`mNdX#PMoeIXBbe5&PN*r4U_=ZQCDzn zEj~A&rXen`e`W%a^bi|wW^0e-uDxcv75k^@KnDbpSjmsu{Lksy=({A&4Ag|5$(VH8 z^Znzio&&$1y0ib(|2}KmhLJ^DSM`Hw%;a1|PGFmsV(rC2Pv{R>yS7 zw7d_Kt+tI1p8Z-)YVaB&8Xlu;cFleN2lnKW^?NB^c>BcrwB*ZqFRoi(dB{Q9bUYrIIpa9mBr2pOl~`hEzU1O7 z&Q2LOH4oV3?U^dnleRL87_Bms>npBb#!tJ~*J zBI+OXFdOQtxc3*XhxMLoyP=QefoM2C~<_4;r+8 z$O;ZlVq4BSxjeDhjkllg4KJ$M*wUb82}_|cIA&nx`keWRH@YWwzI7Ri*+t0L^&>uy zzo|WhmEH_Sf{81qV~#|2Yf`PbtC1RlSHSx4;5ZY4spGf|xtzrKh7BCy{ZpS@VZq>O z6x9gLg)XhJ<(o2M!4p|>&wq%tLKxn1= z86C*kcy9OZ_G{pyAJOmmIsXgzd{GNw5Cpm~Qd^E&p4*)l+uLaQTii~ImVfLAmmilp zl<{6ib~wHC#vJvouzbF|I<)`8P_1N+IeXOM>}i+!RVNb3vK?=umOtM+(m|wlX%D1c zgt@a-T>HAIB9Ec*g?EeTS6z|k%j!&VL|d7NNLOMrDthpksj{6iq~U(fhD$SrAdUn? z>U9AthH_Pf~Hs%V)dv2*!ivpU>7)`n~;yg0@1MYI{3>c z8&vTF|2JKNWN_tJsSIT@tUyjK6eS5vGmCapUD`le z8rNKZmTl@fqIKwx<&Q=yN{##INuVv4DI%A z-miP-&L&+{bFVR)1l2 zFe`8?B3?|2l%aGpC8im6H(!LtH{i|gb{qW^q}7EQ{3DBW-G6{Km6eot@&|rh-RG8B zC*q-m*5E~HMd3OGh)oxI>DC@Do{g}g91csc)ql=cM#bwN>L|3$yRYO2bYl%+g;6*3 zflqy`R;9v=0gFg{>uIdvFkltxQm2{dgrr&+7|Mm{RdjHj?@^FE&9Lwc0(nHJ7-hb& z!765&J|^?hy&x}e;VZ>7xhJ#!^#Rx~hqi{@RQp%5nL0w~IAU1ha4Z(*U*Aox%`9|y zA%k){wS91P^<@c7HSN6vz8-_mNPp+*FyDN&iHRa_wRi@ggR)A@ER;I0ghdeouK@h! zanEy?6%kYVg zWOvPar~UWC7ccxvn|tN6Co5K+)w^TAe`2>*ovP45Fon%n>M@?qJQ)!OoYQ41H@x;! z`}rBdop63OO(NP?EU&Vda@J{EBpZt6KT7O7fIk&DPiK2=Xp1d%rl??6~teU9*oRFubk7k4G9j?ZP;Bipq{$q2D%jj2^cafQu0i zqfxcB2)exC+{qWH^*2BKa%HSDql=V5d@S*T)d4?J9Kl(|z3@hNjhdqn?FIYq)E7il zek>ts4!{J>YqL@`qKHKxId9)hxVc(dC7a^O3J7eaG4bZ|5Hr>Ki& zX8t*EWw>YhyX=Dh`AwJ0&rH2$`Yk@p>~B5pnJp>bvid>dkmj4!fBtx;>VI(2u8a#C zpx*Lw3#}ZfV2)EwQ)i7Rbi31Hj1am0-2Qx9xL;U?GH48B-OcZyOGM+p^=2{#e7Oyzg_&e+Rg)gRE{MG-tTB+P)XUNX55Wn0iJlnj^i; zA9bJb*Ln16qCv|P$%6rPV4ehh0QnE2gSLCA-ydt6RS3-7?G@2Kar6svn?$}m|?-6F1C>mqAgdiZXCcMPy0JdaVs*) zN#(;Ko~ZpZU9R=5Bg^3IrEi0xE>?@3zC^!YMDJIYwzL1`gb+u#8rL)T+*$}7ct6uV zMT_{jbbt8A@UQ`_iq<|dz8vi90hrZfu#0o zCc$RA|3|D@=rzzAVLey|%$wP;W%v~a8D2~sr52-|JS{u>5r*J05Oe2Osl;0qZ#FM^ z+NMS+RWpMG5{+sM6d1aUWP-)S&(%x43JtlmecKzVZjQD>$ z*faK1#)NQJ5-z1TSu%g=_JF|Pe#MVFB7-|j`p-&(?pTMuScmIMeU=h4@8z{vLD<^( z&#g!QGa1>x%>FY-h7=@re^35cAzVRM0mK+ZCR>Olc;Af?V_>$edBgmykO=<)hd&~k zR2SxCgg6CeL8#f@hO`x`{{%o6ND;vx|5SlL`;XZ($N{mq;dl&c`dF1Un3laubsr39Tx@+ICh=c2{LvJVK;AK=C0N1yWNtH5e-t=~E1zB#9=; zwnqe0!be^GeEP2{yGEY=JCw#1(hMoMC4E+d7GBj$s_JJivcCSPzG(l~l!2=Q-B#Pd zJzR6$SKU&D*m0(9zJl;g{y!6;S7G&7$pzy1L(Eu(W@KMA2XI*{b2b#3It4|tAj~mS zjCGNhbqXeJMfD*ah;~&Wz01tED>UK2F;;Z(LI&F;melsfVtb;Nz>c)xF)cu;sq5D{ zZ3advv`m;7jw)^NJPL02OA}(e&r(7f(353bNs67r=-lcoYYlUD>)coxKr&$(yqF4q z)L=-@i7i_Es)1$?0iCuOZHP_hVUiT63K=E`hG;?rd_$wDMV~xslKq?vSz{6JN4|XtEB%@6)!g6}?-ab!I(&ia{kNU= zt<12Hh$^F_D#L1ePl33^6g{#_B1&B1=1Bv`e3KSZIMAHPXl2A3EukoYx0B7 z%XST+Y!m*uTr>7vEtLW49F-xfWW+Y^#so;w_HDIudvv9Pm$>_XgMiL4P$ykUG(x18 zLL>}3wnBxh6%zNOKLjaYDldWFr&|wVesXD6Jlb3bZJMz{+XYa+d3Jg;o=(Upj45Kd zN-bYTAWYH5l}4pPVn73C+UJX^uXthlK~kVlJxvw>2sM?o{O|dDY9sXcDH|W9+`D8~ z^?m(l!rncy;dcu5j$Cr~w`6^7dzU$qXnmxag9Nbmjdx5>%}IasG+n7Q7<_8{KP`ik zB7*PEe_Fml!5^Z}ttXx*J_!;2V=u&U8^|s1u=iVtX*djni;PbG+Ny)87HY!Kz^5wB z1%O}zkD^9DA(SK*lcac}ZmYnUC^6Oy%sWVp%~bz zp@O=fq)+8PmB_Thzq0hd1JLAm#(xs7|8~*U^-in3oor-3LlWUp8&3ES zK1rde8>_Vdr9md^2LZx;A+9d&C7>i?r{EI{jHkS8dybX~>eVZCGnqP*LNuHon^Q4O zcsq?R6X_z@6{*|OVBm|>nNV2UL0Qp7HdcP91%*~g7v-nw7z#$-0(vbC`lSrG*aVV7XcYVD9d(bF&Y>qshpuQ1v1S{DIvX^ioE)QFj=eU< zGB^H_TlwNWyvOd(8Z&m^>2NsL;m5V+A9{w>PaMvONFK6toh8iwPFEX4%!@S=|A*GW z=jWssemV;=AwcnK2C{;x^V{HD#52tVF4ENo?aD<5d|4@F5Cl_{V@jHUNdlOpASEp< z7Pmo&V}bA&s@Gy;Hb@Q&jVRfBd=iMe~!V`UGB}P0Bw3 zE;oL^&iMT(v$?_KQ}m^J`s1;76wFzFm*8$%~VL}BsBMoP*JeS$i-Gr#xM3L2r3Q5ImPHdn1b zSS?S8;nb>?GYTFEw*abBmI~XJ(#?fUMKG1u1OO>q+l+;8thi+4-GEK>b<~tsdDFJXKk?Lyr&|*|2;qtGX-xSo7u;{q2vRHV0x`9)34? z`c$8<#I%`3`yU+(cwT)a0J&i|vp|1adh;EbCdOHXosS?4N!lq-EGSB1Qn4B;lb{aj zb%{t7Dy>N&revHJHcndsTwJ(Yx69yi!?6Y6gDc}2YpMnV68L&q)B~R(*h;nx;Y_>>Clf6$_sKr>EDd_yTI?jY0*GR-nQH*|eyaZDYZnm%Z zMF-%t?(@}0c6B=C)0$4udLLBkq*k>veO~`jb=}Z!+oJtnzqT&py%NN1{I; zZ*VyA1e=o)7#w_K=g%9Dy|UU3wzbWsb((Gzm?2FNloef2!(&-Xmq(aU)R}?AbOv$* zfN2+!Jy(!YQoFU$5I~I~oZ0Dx800ayVv%eeh6Cm2lc2JHN7NJYx*q!eX ze6|8rrFa{354E2$>iPy1Bts{_M}NPVfpIGaJ%-F(Wx%k=5Nu`wHD6S4E8^V`b;(Z} z=uqv{P(vfrYwu_RT?RwinY6}N15XlYCTP6B>*ClJ`{!FUpVc}j8zwgAe|$Tc?sD|O zBWJptskh59TGT!G-^!=|kTstK57#u!$!8T46JyQ*W=N6hY(fZ|`U?RP=NS6`aWQo6 zZ_UC!e#1K>5g|f>pigQ|3o$6~6>nDsW>TTP;S|C4Kg}7&);BU@1oy5gDIgHI0AR+x z#Q}~0SrFS60bnR1+xf-j=RKXH~v zMRzMV8(RHby*hty^8ct2l3&fgejO1#4x`vN#MocoZ2$NE^wfxn_jmsLG}#P_H*qq$ z+GB=gO0j1FfubvyDLi}BbWI{6QXY$O1z0WE9>rs^ zV8^HgUJs01*HoKst0S8i9K@|+pQ*tTc zo^Ohb3zh~zNh|!Fqrbg48g_7+Gs+IU$gUP*_B_9obvbqQ&3(sdwlSmmo33;Q zzTC0xMX%0k+mQNenNzT{@n3rV;@h!voAeG4V!qFh9*w_J+uVcJcx~fmJXoyb3bFdm*|!d{duW1Y?up97VT{Vf&wj?3&d>t#wp*VIjrUL^i%skkG9z^S8J7^4 zRl_7NHRr(v9^QCd;Dy%{s+4$hK`Xoic{4; zZ}55F3m~xM+^txKjEfLC$&QrcEP&@kxN=&8aB6QrVF->8m`IqI8Zdia-(#Mfek^Gy zHM>o>%<=qf-A(qF4IN(Eb-mhvcewpF?xDj?(!ZOsr(=4`xRY+Kq@2MhztrZMY_;;8 ze|?w&`RX-)oZS78KdurAY6gIzCgL(r-hPV$ina!~sqihLjdvocjg&jYGpd#391 zDx>|2;DAZFxA*|g_x$5Qo2o@Y59=Z6i%9Emb(If(U)>i!P2)wdm~Q7SNPuuBH_$RwN|=Dc|F3P z;1;G!qnGnN%Uqf-S!UncdD;9SC(m;y-to`L{(nvlvcrRRya@VwJhPv6jlR?4cYVX= zI(5yEwWAGJPUH+;m25u!RJaE=YQ}Keb~pI;ZpmjV<=%|QCA|S-zdY8MC(bgbmzF%3 ziP6sXMid*c699?2Rh{6ESN7v45O{lP0jL&{GFoaDHTZeZ;o4rdn(g($*dCj%@h~6! z;fXH;WY~)ZL~`s&%8l51G*}h^YXztp#ic^~5)e^DVEdd#0r{71%B%70VHPritbzan zXFA9+F)|ZFtXvlTA4TUGmgM{QVfGZly%qO1+^AW&x8j}|?v;i)ODj~wt+;a54|AlY zMrw}ILQ^x-!rW$sP203>`G0s`aPaOvIBqyDuKRm^KIgd_g{uWj$k1tSW2!gr%S zC`r&J9{B>1>X1A!$B{}XfypdtSpcQoqe1h(;1I!7RrwSSSf3BbwjzO~oN`5G7lnMH zwo$_))2a-1fwP!$jO(f;%wC z_FVYG+vnj3L?Q^)PbjY%LRfQHQCUJ%hF z!IFw-01y|z5O;IQ21ZVF)dE1F&H#0mgCV{;_h2mu+~W3FRvd5T-NTSs#NYWLcge8s zgMnub=;`^r-8&KZ+c~doVJeK<7zgAF*=Jct{!WDt)*dXZejFa3t{Wtu9H=5`7>HP1 z_c_~R-wxQ)CG~=?mXL7Uh!m==j*_NGQ5+9~Uj)F*Bdk#td#;)zX(S+#0#?Q6!W5Vs zuyN~1_$Kf8{@Tfxi0xb*VjosvJ^uxh2J|{efy(WW;0(dO8^_Xpr+lwMYM>o9i#G(4 zNdO3KD(iA^usK9uj1H_Be6-~+OQsKV76IO52&Wj*YN-dmi2h*QDu{$it3x=m% znj2dUyp5zf}IM{^V<>&4S5JlLV?AL?LCx}lnwCGjR4u)RNXVhv=3b612d{TUBE6H zB*I_UN0Dw9)rn|`0|540JjfYMYdM#R9v}j*ED+fvvjD-z*W>SKg>1~n_%E(R3|_Ni z^g4R@;0LM)~O#g?&NnN)%_q^bxH{u#l4I@Cd}{9i`-7 zxrt;ApY(#dU5JX>mR@Ap0Q)SUHU}a_DjVoxa&ACnnHY!9E<)pTS{=gqQh>-`gt}K+ z9iZ7@V73>aHkc_Ps>y=v=`pP_AkD_R?@3M1onS{lrfn+1gJx|rdaJ5hVGgRw!bG1h z)#IPbSqAPM+AICk{69{*x0b8wP;k?#QRATNSY6uO)#LSN4t>m?Ms5sX9&8}d7xA43J|9TDvwns`QJ0Z58A zUd?@~rlWBktA``gh=4}8H0DwdEgdH7D-3O0x2`CYn!qWiq3p$_zC@GlNcn!hi zfGlRG)Y_|rViVU>px7oAb?Coq;g?}iM?)~hSu@A_u0|zisrWS7ycxaL z@=@34;?{}1%&3gXt08X!BV|$nxj=TKiC2NSUXTg^_|;vwpiAT6=Qp$DzizHp;7v_A z(iO}OwTP>D0Ua`80~KNwM{nYLgBtS2-mz`=^{~(ZPJ@_!X0cY+7Az#LRc_zsG95B) zK_zDbZO5hMs|f*zbY36nA7O)cc5W-4ZeWNzoI>o%u#?<=n{^vEnr`AnYbZf$H}U!zqIQORWMY7LZxtHpp8HLe%nVGw(p>fkOl+XI& z1c0Dv*J1~N@#6tvHXSx6`rEC)7`T4l_El#A?Y+dztd#`%C^<6pGy&1-h&W7!XS1Pu z@%9J-iBW-tbfAF60iY&wwnZ1x6aWGOK&Dh^009E#14G#G!@Rs~3M!S5YD)m_XM?nu zAT-X?MRT6JC%;F$~t^RH8VZA$SWeqnj{o`vGB9mnPSqJcur;UbJ@vOO!)=OgGD)L zR#Dw#=8YAn8!zSVM57e=@ex0O9(-ss8+o2mCE%#plY!W-Voa{S#G>H1Lh)1#5DWmf zjaW<2aml78id?V{8EnmiGk%2C@b?*}l@?GDH8|9O)X6;j$!z|7%sLD61dm$@kIu?u@<}e}+Ji@Ddz`}_9mIroYNCTPgS((pa(Ir_$3kE)n00;bCdyhB!QhjW|w)m9ilQpMid4cpwgJfCn2; z!0D8hnRwJF3vCIhwb80ABy`v|q9?o2$r>p&1mq}-Sjo>fWr3`?%|09$l?#629DZaJ zFw}NF>`h=;ak=|L$vEVHb`*#^3z~fidYCInkpmlc;~Bz0NfM%+Tt7t-f?n?`4Q5p) zu=w?e-7Z!fpdp?Bv*EF@zlFkrxQ{<0-mRVYytzQZ^!xXX3!m`}`89$W}TerF?)0KW@B- zZ0FUq;}LWMdVgkz{YGy)6J20vZfJMXz6+j8fm)HztrSD3BzPqi*)RaYk`l+3%F#6Y zu*UK?-RHAEOMbE|w&!2;U_m`_&?8*v5doN$4|OL&uz`RiU+L6tp*>{x6bT8veii0| zgkQ=?yjU4Wfk)E1Be|?7cK3I`xB?`zC`hiogxUGFwQ@tA$H9E^hacf0H4>`Y*r+)c zdWC?pFRdEZZj$eg@rSk<$|wpDsihnUIkAUD>v8A7vT?`}0^&Fq>`jIW*qo`X_Ci+K z0==U`6cvDy%QwqnJ%V^pN5y_Y!|-sL1wharui;eGky-7oNZIV`GWQU*97L~W z_m#_)R};H6U2oX1x|>VDA6IN}GOfi%a+}PCYVU#KxPg-zTDMXV=YJukkc00z*q6Mq zM||{k0;-vfYUV+%%LsOMF~yF;GGw3z6_SgG$BQC{^LiYq5C#SPltm1+KprI`M@Vv; zd}7&vY()(^@lQMBYzIB!%qm4dZZt_{qK60wO9S``J}dwS@uon7NC*K{&xs7K<_Z}Q zJb={lU}?|i(r1&kFS=J3>|2NA@S%HkH>&FIO*Xn#<=~)-1c0kDKs6J!SA6~AqwALM zuWw59A019Arohh>!y?Exh+I~42`JR!%HqTRap1K097Zz1`)2y3s;FouqeE^+^5Cq{ z0RrmaJLE0?*nRZ)BP!-SX#KkPLWoy+D^)+aL$~+AZ zH55Pt0bmgl@LC3e0s)9o0p;1}hidFGq{PaX=iQl;T5dhL)SLE(J@(|$FfO#cSgk@p zLnI?D=C7A=5#_W!L^%_&+>>N2H}%bIDuxEr)PZ4}rd-Ovl{92%@DaV`{(`ec9N7WS zn4%1`WX+Bc{%uUD&A8?qdWDOPp2z-~$FA_k!pD%IoT>@im^K@d;^Lho+hz>{*o|c7 zghQ_Ig%G|7$-{_5bnDz8+>{AfL!p%P9x5M3C>=&H1t4bvSb+~PBp`;Fn8!RJTF-1A z3AMzQS*Q2x$}r3>cVo_LVOZDgVJq(bjqph29XBBmVoFyL=E}=*We?#y|>*< zIL^B*BPmogI4<=4ObGOs+>g-l7IV}`9+uC3@PUna&&9kaY^h2Th2 z-EN%h3tt01;xTzq8iQ=*%?>j=^dasJ)I~FyhwT)E)L{gJf?%;8byFa6f(a1`aTYLk z?vh~bL=|%23?}+1AL|J&t)?JO@nKecK>wD#Q@Q=`1j6Y2W0(&Cga?T8COrVYrQCV> zW+CXNVhVLsd;{{IDWa2xVp3L@LKou4lQQsAHa)PTk5^+zuw(xgQuNl2pA#TIPlwL^ zr+Y)TU?b`M)_v~8nG($h%U+?4{g@k^#+Enhd%S(;x5th?8{2!&oEhXGo4G4r)k(*N zNVOW)eP3J@d59GPreawrA|&Z433TlvsUF;!bB3e@E=Uk~}!$N1Gw=u+qBG<&CYsJ0Mlj*XZiU2hLT zK}&k>G7o?H05z0Dbn}EtScr$AtLyVm6Ho2@55tSY^Bn#zIJb1Cl!G)`ltvPwk-GLO z=l(>HricXwfE>%8mGhoy<=bvEPRajoZ2cC0eGjnjBlp33Dz^6DSlrmm1@mGQ9{{bB z*ikwmou#M}4?075IZV21ar4%_T8Ih0(*dxDR9$$KgGbDrSiHXQsDl7E;>Gw=Y)x;< zUSgxh4kJc6*iSsn!AA4|tH#;`FdBNQ^5x?vk4Hvw?4cBhR_JIO7u&(Un$d@tBD@Bi zS&1c&woo8oHvAF=o!|Gg@H`@p|5R1y^I2_P3;}kC0JGq-%xE`Kt=@N_a9dHT(Z z`ZZnGhJs%=-XGBVluBbl4&DEZNsyZv3 z6`eP~nfMg*iH{kjBC6z2kM?Aa$C{^=X1@xGy2nc*l!XJwLg)FEeUc%s(>z`W^j_WY z8_nTC#5wR`3c7-ZD5n1Uqc>$cwyJs=7Q_D)Mfn^-nTn)>FiqfU{k59=Fr_PS)cF*C zDe|>jT-e*SWF5I8E$mRb7PEG{@e5`-WbhX4E1!p6W24pXW1sRdJA`+3?~ssxw0KkX z^X~srgx*U5ZKMJ4WhUl6{=4@fbUV8=n+K;65UWpSmE=)(PIW9%G2H;v=wU=VAMS&P zM(98lNst0cT&WO_1<9RE{~%BF&O81RrRGGIP=P^=ooM)pTnIo!m&oQR-+{^4n&K#} zcO^AE>?ZcU)?<7+TxyG^+f{3-l_OZ*r?-t*rI$kMaB415cRak>92RxuBV)@NFGjIS z77ZNW6>^c(qxQKV$F;tr>!FLbrb`=c4TkPb^fwRnMjo7hS#6orWHPoo7%zHJZ|mtT z9%LV^u#XTv^^8-D{`o=HGUM`r&%a;Z>(>uHd&)XSqvEjH?3k&bX+*gI2BG9MHCY@` zBK%5p2pcDx4;8`^rcN+EG-`W~Iz9Fl*Iv`}9-($9E1;HB$)lZbnlgIN>Dv~Nqz&!n zxT?$^DsGwl}h*U~<|25EIc zQz}rRtuu1k{I#9As+eC|w}YhLd7UVq4#te57AzXoNBAJN!imn8s6d} zlpH%BJ|mN47v6LAA-=T7;`9`xW{5G?Ast$h6Vr6!vv>7f>V0GxLF=N}duQtTEyg+B zm2Y%c&%o9m!|<(mDTU0_zou*4S?eqsRHk8o1|en$l-mAiY0s62CHGvoGRM6*CI?*6 z8d{>`G4f2VK&;jgV;|k>X8p$4(nDioId%uTp)dJvee@-6jC1!T)3%J1MB2L~!Bhb{ z&g}w#+C;4mFJ7)=eJBCiAr`6Y#*ng?=$?_MSZ`2@eaWq=Qwd)kgr|*tw|>}PjwnlU zxH2Ulj!6|gH0)@_{SB|VW00~o8#~xIb6oA(XV{Tbf4=|xI(>6v&%WN>ZsP@yt|c0i zUZ_;jAnAE9B_bcF9!LRJ%kQk`np^spdsw;;;iKVMY_;Zi-_hxj7RAHHVvZ@Nmwj9$ z{iYA>Jp3vMPoNti7Va3Y>86Y)ccJx}BL<2IT7v0$?xW|5xPX6M>f}Rx7=m(1t!YZ( zIgj~ghL>n|TFRg0{DkHuwQ}<$jkyrt+DYLR8DW{80*3KmtJda8;j$=5-<6yWDg_7< z@1jCei#v3qoh5^1gjMfioWmUY>t7ke8y5bd)9CT@b!poLOXu{%$9Jap`@<_&dXppm zV;+WrD3(C>jJtvgZT3~##i}~pD5V-}tu6yeTj@FCtG(9cwt2+34bvuWQ}{vZ!PnrWEc^ zOL0ETzAgX1=TLT&z|zoVljmTK(yEE5#T@L&-+z6MN9BMfrLhPSViGA{53^z`)tB44 zhHRR&#~gSrVgJzKrQvJLf`RjqCkQFd!JqXyLAH_}yLke|6goi6pm>rgH67(>DX2Dq zK7oTkS&p=d@yZg;WzkPILm=n4e3@>xv~=t1u{Y+92beSn9yD?8{ZS389G5?jsCY0d?|IVYizMO69JN*z8<8ROEC=k1kMaFtL|9~&-t zqv3O3IJV|^<|5kg_IAAnP`#&gTM#lz(b5x}K$7gv>4Yj76rU)LXcLI3V7!BppheESwzKK4mY>3-l!BaS&jhneP5GB`j-Kvd z`A=Dl-<EzBmSzt|?`Wt8Q8r*{s&pR=wxD1{#$D`CR_0tzP|Y z5|gZpJVN`m76^t_921Pive!9RdQK4hAr0biA0L;v%mDCIyt3Hhxn>Aiv6|(;PAgIV zWV{M=jIx#OB6>=9*LzwxJlhvD0>&#(cshmp541cxZ>@4;{G;wS`OJ8@+4+p3khEN| z+~wJ$$7Dm+VMT`uAl;-;>gs>A0rf!XL_rAoc!&M_5dn%wU)n9ZAS;s z=HVX_i^*cjVhrL{`JsqHOU(;O6}tif_f^CZNznRZqc~NBOG$j%bNQ}oGZB>og=ic} zMY6S*8EF{>l&?+{OfuJ+xQ|plVSTrF1@D)CCt-~700qDUl_#NMR;tr^y2t09QquAb zc+Fir?H0sn;wyQYB*G;<;aNufSw-__Dc05Dgz_|VtFO{Go7;%QhF_T9A~H8e8|WrtiqS~FtY{jLU!UF=D?SM%Sp4?u>B@DVU8Y#Ud#uC9o29$P*Q3_oz|TB3^4 zBRp@g-3EDuzuYf1Hg-taPDNtxY)nt4%wm>9#MRvL-I&`C#NfuP|JqYh0^6R8lZ-Ev zQnM+KZ~o{gUiw3=Nd0*#I1(gv@eld=%|^H)_yqBqk=P1UCC(Zh(r~-(j6cG@C>P=u zYk~Ek=Ba#Z#r;ngVm%sLWHrzG|K}+Q#)?(0xWAE^+jUHi`57U?Z0P^dE^{oKc@t z{%LTjFjG*<0Db!17c^IOaZWh(#K1znk>jpf46F2dSIfPP(%rYP)dAw)lU4&Zz544Q ztzAzE*^{OH#$Y_a_3NYnm}rm#5NgPeorS$lttG{Q zO6KzPN1z@lh;UKx#~WZfu8BQAKgUBT>d( z`V+_jgX|7b`T-f{fxBPz-Q%>i$u0A?t>Tx!kuMH>lM#ubj>xw#mYy3OL{FBA>Lha* zW9-uSE=DX{%xx0dKT+`23!7;G6>LYHvmj{)=+<~hMmspOC*Qmw-;A9n7!qsk>*(ObBbc;$9xg5SJgC3USP$+loPG%9>v^AvSS&bC+^eiBmfq zSkd(YXMVl~C0}q6Y!(2B1Gv9G1bTmHJ>f1e6ySOQ0Bzs^ximqY@W?My=RZR#taF-k zh!OlG-F-D~-tqAmvyLmcN`<$c@#i0;;0@#oQ>?T26s1O?- z_8<{rK3{S01=xbzCQZn*)ur1Vg{N@}1iq+KTlV4{8hrf%l0W`aNwtFL0qK%)wWGa& z09@T@wP_giEKSh5745o@+`9aS20*Zjz?ySW1V)4=%ni@*-y$Wx!7I+1i9jSJcFaWB z!jjb7*+;oy-=>XSwjTDvS<2S+B-b4xEbDJ9~^hkdJ8?zPGtFFVF#P^sh@ntBRV zD1}Ov0}0J=h$ugD8bv*RS*i37I(5CJXbyImRKXxZym|RL4WLiLnjg6)#+=-bviX*? z`ExQ5Tc*=T=IjSN-DafV7Xc*yGQUFizt|DF8y>R!7kr2&n8SguBJQz6!D_5LLl(%O zp+it7mX`u6;y?!1rn|RF1ne`>-GlPPT-vY!$nR!VW2yq^p4kzveNj8Vg|^b z8>Q?OvzGwB!*)enbG1(FRG%O}Ao`WwCSEUF35*sgO|7QxaC5H0LirWd>0GESQ?Tm<%uCm*N3FnpF6N4adscSdp? z_ZO-Vk*;nX;@1$(6;d;v;A|*N?#KN@Yy1kLHo#u2gZ1#idIXT;g=-(ECfozLdmnsO zMREBDnLY1m{$Gvg`m@-}Uujl5Cgc(5mlK6)WO#fWBYLdB$0@{zNgr;YyCTNY$04KV z9*vqXTz7ojc6=g9E|C<*O%p~W4K`80NC{+2$`?+^Z$5J57#IoQKG7f}65W2b=j(HY zg92uufKxn@r^mxt^T1Bbip2|s8AXiC*Xc1?rD*!2ba#7KLV-7XkM7NSKrmPU?A*6yqhXZK4X*TIem7Om z#HqJTZ%k*29HYR??)hCU(M;a)`_gc8pI{*)O!?xQZ22ZeT%Zv#-@Xgt6bO+L)gm-? zK_1vY5!C+SiWSww8xeBS3H}2!enBqaxX^*r(2zfR1tf(RuIo>4T!TbXcgD(hJ}*as zGhTohu-o4QTu#4m{kZ178WQzc0FtJ7>6ZkBc1(knt`vDnQeTQog^N_Yl*fT&8=~j3 zFG=Z2nz@b)F++MHV57KKmZgmOjZ(qN)pp)>FJ?~=*fvffm;L^Yj(JO_&K?eWT>r-= zB6e~m=T~^z4kLbDI+WvZc-PUpi0+mN{ompGadA&h`g^yV zQFE{2-0nvqJtV%`F+4vW6;?u>@abq(UXl^st~dQqFOj!A^uXwyeXw_UsuIx@KN8{g zc!1v59$FUG~8&oLs;4x!;W+Z$|U<5-chwpG91Z^=6nmCojL zdI_XCMFG~+z(UjPORY1WZcC#FwP_Z(e9CMnOp9YgWl8SVec~AH7(ICEgMV+W z5M}%1NXEjW3`}4dxMbhuy5O#z++~UPr-iWTBsoD3IE>C!Iuv&MQhYDYBm5{SQS< z?j${b`58N1dV`Jb4=U>a;qbC6-H(j@?|CD^rNF^_pSKfq$E%HulQC<0ceEigy8(WT z0t*?rbkpO}Ff-)%N~BL<>R!UFBmd4ma-jrFWvm??E{?K>MUWVgtdl9n-hXbVL^=P2 z!=p3LWQv_|k86TAV!KrsS@7@8K+7E*}Z$_MUQ^;8n z$ysfyA5yuJa$mO3v}P&G+4w68Jy?8o=1t3;Z*M2#;3)(@SW=MgRhX`DY>N83H~g94 zE8ounQ7?%5PMwCOHNexe;Mq=vo2JlYci7LME|=5%$vYZ0aFc1oiamLqB^1_U_M6gev&$#3} zkb!CLi@)Aum7P)CIF$0R$s}@heqAr-Vgtr1{cZJfRK43d!}OJhYOZ1X-7l#kmsI*I z<*IsL%zCN0BtI5WVYnr4U7votIqh1u80Ii5V&I}0$UbM6AhK~af9AfFal{NsVIUG- zKBwc)9qBnyz!cx_KQjA%vVe&g_cRy;>NzE>!p!O3YvFEi4@&gxeH674sK5!hL3&reM;)eB$e-TZ+!p zrN4}dPFJdGc)#tM{{K$@6aRik18eqR%ce+WxNJ$t*hQ_z&BxE#M9kmzu}z&;@Ch|! zpsa_ovsz{CHHqK~vQ0!t33;+ed6vC?q4ebTw6e-+r98KC8YU(%B4K?(MJAH)qd}@R z`fN~a^Qj-w)%KnKLDA=s*0&Sc@zso^k?_W5g|Y7va_avhEY+qwu(MTfoR5$6Xj)8y zJT~VhUF13(Kl$~XHOk9w(ONg&_q^FwmZ*pR@cV7JVm9u$lTszk5UEvJeF3F4+*MuL zQGc)vt-l)hZSbD)=F!v|;yQI;!E|XWs@!BG>NwJPp{TglJKW}bt#2p2amCl?aMre( ztiShN)#!637gVZhoscETS~fz;_xHqmOPA(|0p+H9b&(ZT%0ufB<<=5{a>6eN!Vmxe z=n7r{DWET~5Gy1kior-@P+}q&Suu>P6atGC!D3}Z_K8SiMfVAd3Ess-#bm|B+#a$ru_7%gd_Di4#=hl(pqm)a5jd6_gc}bX8SUbktOIwbj&AbhOoU^mKI8 zwbTvORE%_04fS*^^mNR%)Qk-DERA%{jnuWYjE(e-h@}sG%%r`HMG}pr>@0O1%(VP1G;K|c?5&Jl4j9{-TH0AzxL8@bm>T<<8@Xtz zClSmF=z-GbHK91OA4~akrO$U;Ri;bzh^#NBqOAlu&cV`oCS1Uh9b1!Fu7$?ol zK&@Cm^Q>Ug1P{WAD8r;gA)>j1zNx#4qNBZ~t)rE_hn1tHt)H0<*~mH8&CbTn#n##0 z$=BT`z{Sy@Y!~X|7U<*T=o}E@@9XXE8y)Bp72+Ek9B|Cv{X~@W(a4basDPZrkc`;C zv?PC5-?)IF75*|`P(EZPhP#B~q z*!?h*4K8LG*MvzH#5mCt0vTx`b!i@*8QzS{IC^$k;faijjH6vA;#mH!YsX?{g7p*s z6ASH?>Mt{8HR0!bh%*iQS~6!oqd)O z-%)>}uabJLGj*ujbGp&(K@a(U^|1$i$*)_f-@cMxZpI942PdQzWn>j*oT`aGUL2B; zUwkU>MOJ=xe%;Bu(h_>X*^+|h+QPz;hQ_)^CbOZVvG`(hU2j9><+jqn&XS(C`hm8( z!OOJ+?akMEo3HmaT<>ogx!O41UtV3`(RThqcYEixOC2``E)EY4_w@}nb`H%Al-(Zf zd3&z*)k;VI=-qR@lcQ71SOsp+!24JmCiWk-k+Le5|AHSE{)>~hHWoP{5Q9J)Uz+JCbjP5_tCUo_gw1Z^L6i+haEQd zVD{)#&0Wo*#Z;ui7JBYR`bW-{nzd4<#hHH~>1X*)+qWS~FpW~%_6Xmdsc}2BA|c^@ zP`qy0$2%PU`1pdChHqu0wc6P#XXCo4>Tu~dk5gMx(mX@ob7?Wo>v=9j~xD4 zU=El3^~(fu#$<{Orj?uc<7}ZebY2U_!@o)@gXHwkNudKa;~Yt4$RsG8%4$kvJXC$; z8b%kcb1Q$V-ktox1gcO!D=hsul_&ouGWD_aua-==<6a3h;pcwK)q9Ji%U#}c=)U;m zfmxP*j(sT#Kr2LrUa~XU#t)(WIi!5 zd0P?Zr6yTHJe?cudu+qMxqh^72YSk;%-r#`EyAGT3`0DsCnCjRs7E0-Dh+q-fxgr> z2^XeX?X1}N?R7#M=A>rgXL9>>c{Q!<{r1zxQpgzrzL`N+SN8Wl zvW`AB(Ruvv*)>?|-RHHhrmvm9{Vu%x(T3!()saU(s^U9we}55lg#fU?{=G=4b8?M& zT-Zlkl&C~S)wXDf7g4G--|NCN!lom(>1^89;HdyE)VN#Jfi>TT(uvUbSyi7c3@HWJ2=5YR}oRM~;lt9TuR(#+Ew{ zUXM_~{rGy|f3rLSM4KGZqj7N}Wvbw5>Gp4_pCia$?^DcF1EgYXZGS(0P`FoLs`!eY zRfDVgsD4T3aPo0*ZI<-=^mXBf0YjotutoXN-z6?0)qpy+h5ME$xzZ1I(`QdwyXiTZ zS8j&=w78xV@AggFr?JJ6zSoX~Ek(9|Zz-rM#qSgt(3RqjuHRehPE zy{Zo(wXfa2^_?e{5IcNaM0IxCa|&F1=Qmu)H*H*6h2|+{mmZ6KhDtm-wdnhD@orKR zPHv$6HPJ^I>#~(<@S6*#{|G;S+9Jm6e;iiea_JI@XWVNs&9y#X6*T|5VT3?~*hXkB zI{T~i-K$Ep<83*+bDTZ?N*icuyJ&bE{c^U@JU+`^`#H0JYjuYAH1tC) zN=Ibs+=n0Lp6f4EUHtwod=g1;SQ=fdzw<6{e*xa@7CW4t{^tSOJ8-W8qfsM zj>_M6eZA9recfaNC2e0$>uFo-LKg0oPLW{!@;!O3t&;V-brb)6f0+NX`@u|;xdxMpz=cy^3u&dFxt*kuMHg%DdyW*ls=eth7N^5_`P4ByKv^B$8{vC69 zMCBqU`F!wVfRrl~a_YAo!UUA$Lub-Q3JO^6fz^`D{ZRSAp~U+tdvQ|Vn23X_-I^9n ztWRYs)(hjW>lMcWujH7_U*BdC_ET~d4!vER`|HI#S*|~%ZF~Ftzhc!@H}{NN#eI&Q zrPUVyV#2d!EL4B^>+ST`t>~7jddeUV{IZ~gRg<_6GRg;83!h?Z@WGu4aD>mxNxvEj zL|@7p-nprK%CQJ6PYe`l+nLt)j~07kVDH)&qCrlsvp#ZO{Ls`tw8Np9e9?YYI&X=$Oi{Z9r<5V0{wpo_L^2gDQbfmXw{-rgaFhF z7npOvngFN+vnVMCZ2D%ps$%u4#qgH8dYEBY13}$UxRPCQ$D~ZiVy)*}h$&kUm~iWFk%f&X)pvxOh$e+m=ZLy@9S;^Ci&`$&)anhhTa-WEZnuA{>P=$v zTOs^k0K#@>!t1uQQsYDq*!VebgGkLCJWR@u&*y;!^@MY19so_}fK|EH5<{eq$7ORN z@+<)K`a%+13=b*ZnYR4Khw7h=)k#>E_bPs^u)iQYK(zP46^j`~1*sb7yzlm)I_NP; zJvrqh+ct*S&n%A$NICyCRO&5A6JMx*7iQA+a8GsRp#LYk7cOC?>FxcsC+m)vgzxQd zhz*mqeW5IQoolc4gPE`Rf&*1z<_d-KfzmlNup*VSD9_2YbY;ESI!gk=1jn&L6cAWY z#WN-3DIZnBB&^%%nF^!&FHZ=C5NzNC90*Ni0pw^uO`lj71uIIsjyE3N470BZ`TnK; zgmoA}t4UG$z$X08^z-)TweT6v-9Aj_wxXhb3gPHx)Bc6yqmv|<<0}1yYDE7Zq3gyP z`92!LkDmIr)oiZX!a3|?paI$Ud97;)A&R>IL>hn*Dqy}m1~{6gO%x`YJ${qvPXm5@ z4}fi%gG9QngWt=Yg4&${cwgke%J$e$n=G66&Fgf%U}03qdc^yLbsJ$DG7aR$dt9_r zU(2qa2IaAQY4n-RhM2#MbxMNc?fjix9^;A?^G*fuB-CKF$ zL=?{`f(#dlHMbnJy}B4lms} z4p4%V<{caBzLvIb1^{z_dN{-~4$EU=c{KVc8?Mgh#MSK2qOpW`zo6J z!W3`kC2r!DZl}9PV1+^!v(#?W(jHQue6^CO7LKCkt`6pA@N&OAKk?F!@ISDs$7gXe zPt@Tyl*|`X`38>l&(>vT3zJS|#-xL}IT%vPJyo(P!P9Y{|ThvF)j$~lvK_e z5+)Y_%3z#A@p2Xe(gijgT|)Vlk+R{yba6olj{*rFMurc=64*TK;n3z;FAV(B;xusn*H2mr~ z|4cW(hA5sV9L4doP(N6=ej@F&(?DyL{R`3Ci<-B?4Wgbw`{B^dMMxMvm9TsC;VP(? zxOWOmEr8+x0hu)c4K-$WOD#?mpeUu|yJe#M0xTKKWYQ-zu(zGcW}UzwN-TF5u=X=Y z7;ws?zSOr6_~;^Rn^>_eAQxw5#86;ynx}_(=olXGqyvz~Mc)FT+!$&PtzAzA+TVZ{ z{qvXknQ{Q{>@{o4OpkOkWYUnsQi^qWLFoe$?k;`TuOg^Qyqj3Dlxq|vdFDcj!Ap|M zGtqB#U=kCoAD=&}QR88jp5*`-3M{?0UgIB|J~msLrB1&Ym%e}~7vh#(-AP-0opWD< zu1JGVI+aZZmAM5W^mqUa;1qYgY+VEB_oiiZqX|L=Ixu0|YshDT=&?OMBh>tC3=7uC zhMi+0TPUZ4SjqBKSaLBs02=)>Abg@m?Q#Fgh_X9z!XrxARIuL z4;UW3;8&P4`??G)P#=(Cp`);kH|Uo<>{AXpfeH&D!=83Co{b`J;RJaDpcoZ4M#heD z8hw@J_oZsZ8_7xg%8>D)iel=gmE5jEA?Z;z#j}oksnxn0`SFI4aOk$~fFEvsi4_az zu9Jd!5lWB_M;^eMOvqqka=r=)tVGFIqUIaYvK@Z{H-EISHPf$4(alOqhvQ3y2(_YY zz+l2Epl4?!~upiQqFN%1PKLB5nlMO2&T1?fXPPMKK}IO_PIJus$~O`Ca5N1*XD0KgLJjV5;b8_#bc5$_|v1 zUamYD3W0kFyZ!9@{ko@Xgf~=-?mf)4OtY#w*%3?2h`$KII)E*B zpg|!~W`gD85+>)kbK_u{AFk)CLX(H=9}j zu9*)mUmp+$KyEsgJ;Akq#J4{owBK&T4Fw;4RtP}2zH)@Y$l!E!tLAR zVDkV$h#1QAPFS?r`G`zNqQ#Y`B$(GWB%*`t;bos;9(N*FL80^Fp=}){i4a>71fS}r z3#f|ao2`9iVsov)30)B^@V5@-x-J-AFo*gpyNN%rD#Q)~IbZ*=UwdS^y65vN`pF<`4D> z5Br8PzSc}%;zKA1z{qI9wYb=QYycW?>ccQ3dKeb<3;nDFAs2G%VAYT_3mnCQ4Rert z&*IPty!0_H`Z*3ZB%u&fE9a=@?;L)yBmBV0>v342E}BeyR$Ec&wt7yQAyamA6O5=E zfhvTeIfTh8%(+WPCpq|8S1Q;P05Kqe9{5XLOuTM-Ac>@r)?bt~Daxgy7y>oo_Mzdy&>k->IUH&lpK5wJ>| z5wB1#tYw>=KtZdtJnR-iYoD9b=s`N-k{2AVtCI6OU6YI#`t=giWSX)iaRbxYb@$hs z)?QDMg~}-i#*lff>%W>?m}!!1K|ljIi-}&sJv#lJ zvPp{$;1QwQixQw-O`tBbUlIV)C*(^KR`0Lnkl$X)f?e(%ZPt-_u?l}NZ2rPnrp@kN zy9i**7=Lo+RicU2#v+3cJk;${y7a ztVv=$$F#G+k5?f~3TB6it>OzxWneevhb129e(b404-lQ4@iqbdqybta=XvRy ztTP#Qhl+mTluSE43qG(uVlTdK?OHVMTK2>=-hS~uSQkc=!nG8M9({vu+|V{6iuJ+i z6d3CtY8JOS4A>JI=9~+6Nq!bPd4-A&!hwv4ON+2aCcR)14wl7*81G#t@!$n`*nds% zB_^hg1YS2=9hE)h`={VWbIwBHsfD$s*uHB9O*x*ZK>*>h?TKbZK3Fmzj2F~Y2p^F6 z4+H?5N%$tP(`}GYhe!Y=3YbB{*lc1pn3!cQ)EJN0ybrohtOc=w34oz@nAagxzo;)_ zE=-6kIkEeD`bX;1M5*g6{FxkzwwJ&Bkb0cA@B!?SsTb*P!Hy)%>9^GXQFJbjOz;06 z|9p1e4BN~$bK4k3E^|$iKAYQ!8mWY8liZRJrA~D|n_=WKk|=3}R4SE9ola*PA*odA z=sr@ZqjS*Zs2tzlet*LDd0$@d*X#LwJj+9CHhT2ikbgxAU3;+!VGkW5h<`vouTHma zCSl%Q!Jljxbp8a|4re_6_U!(*3s%ojG7;g#Fp8Z8x-Lv33sr`#Jd7s^_YBrsld$@c zK#!g>8g^Oh3R(}hEO`!GJ@!lfl}mQhEdk34FQ)-_4uAdquf4w0xK&HLDo;i4=jPZSBtA)xnEW;-|r)!0YYgDbzdS+`HwQkdmpi^(` zwpz0F_8Kx}t+SH7W#6=d9N1?-+xz9lz5~U^jotZ^&vqYJH>77l-&ZU-I8e*e%1g)x zLZ|5GJ(C8?*q7|?)JOIbt`1TJ{W!W;mTm!%1!C!zt-moICSO1+n9HrCk1)6btBhBj zB_%poR)90eS=v55)`P|pErd!QUm!R3PM&Dzp9l`6d5gW2!)gSHWfb6{R73M~GuE>c z0<5d~m9#3<<5SWm0ZMA>yjSJ^Mo?(Bv#s?Y?n-Ir5>9D!7nuXT!LU>?;tDsdJTmjb zmV+)=hBXFkmBI#pqFW?8V6Z*=j3Sh^w&duZe z&xeDq_v!wgkm zj(UKmF!fh)0V#1B#C}Nb9>e|(wTl6ay+T}M{Sg)(-r66X=$j*~9_3@NFJ5+<-W|4sOjYgr#2?qn&M9Y5gq zaXRYEIWI4d4t9mbxU*GK{zaEX7iunsyPSWqb?_!&V#2sXJn()fe}9^DnR{QB$we>0 zZnL^b;R3+K(i4V3T(}U=p$FasGqG8Lf->!9`{MJu*uU%huGd(I84gPf8x4y#o8Y%K@eGOo+&bpyadC9K zc5~*3AVcpf11hBSwmSzeKL(W=Ft_I33}J4cYVI$24WBaN7EPW0dBOLa1ZOYFpG94L zQMxtB>C{M1#le};zaqW;*;A&Hjd**jU0==@9^m!P$?obn7~Fs3`_-Mnw1qD%1xo;YwfOUo%K>`iy07WQYi&;J#$Y~Hs4Iq%*3gNsv`Q_0yGUm&=1jqgQQ1MZg z@x9o!KDl*9=`k>-RT%0QK`xBSJz@UqA^=d^TJ^ZAKzzIOggCGYSmRziqCl~Zun^=d zS95?M3KG08-{qUh9Ay zDM&t1ue7B$%RX2JJ4LC&du@siecDha6q_0B8j{ss?nLkVVEgn(7Y8%<>6X!MHIlLL z=fxiy8%963&wUUqb?*J=*NT!dW~QWG+&zxtACMX(FNl#26V2u=VwqJ0yJ}aet#Pp6)oH5kViv*7}81U2`ZyK#x=y_?4hs!v$F<*@+6d<4f{K;9M9ayf=L{Jy)@Fn#ZtQb~r3 zZ+I1!h=KqezA_)04H>I%GO@7ibMbNazkS1<>Ut^IVJIuw$)XsMqQ2#m#da%l_t>*o zTUBvHTX-hcj_~}z#4V3nPNZwe!3^^H+rpyOs6;$5zZ1FtWl)i4yM*^|!L5+jc?Udi z+>XTTjQDly%*A;1V^2+o)UX-An}+c5t#u*r%CWMvNra6~6+%r0N^W~2kcutsEGrvw zN(6X8*IATty4G+ZMaT#8gyXs~cl(4A)E@JG>|%>U>pcc#QR&DvYPp;KQeHFWQ2yX`v#WuGMUEwW~Af z&quF|A0w}0t284hL0Bv(Zbgc9yeVtgO21r3GSIX)x^i2xdH-gtJgMW3z;7l&+L zP}r)dx33&(il}lpgC#A+0Nc;K-7?{;0%9iJO8v4aD?6Xku*SiQUNzvoYcx1pobg}x zfYNQGh!BIVoRwAD0du?`;oVonw^Hgz1m*H(e}?tm_muGKFnh)Ho#;(dh@zj@|2bWA z%6j3&RB7BuqJd9OJtO+=rd4x)@2}HnaY-yM<}iy;*|~xfGK{dV(}%x@)Ox0{aKIrg zCW4F3An-vFq>Y!dU>vGxRM7rk+Efz4@#b8i<*M{6&sIVR)HJNM3k91mLn*s0MOocP zUv5Sn6KwzKQWVEQq2L=x+%!ms)wWxyFuDwW5K$*FL@w^B3*Js~=~&PZjsAhafW&j| zWr^n1(Aza6#N z^9Zvm-eq^eMJHP{Np%UFt}q@&hVkSUb^3%cm^_N`9G2hvc_V`N`v-!Rw{L-$2%&$& zWBEn|2C}rLaP(4@J&&;bH^)iMsbfF7&rCWhW*VzkFHlYhoXp^A(M7K_7Y!nMkTFiJ zJgT2-*qJ3yPe*1I`aM8R{Z^0qEEkcA5^>(^=J5BG(k2k3&jHt({h2~psE#J=23-?n zLIsYWD)j}N?dzmb3PmUvCg-CD>K(Ad$P%U8xDy;KMG!@ZYky(*EPy$RJo()w^8%7> zvKTLdC!mT-E`Vj(7&RzOwV=m_eG_v3;nlhneL^}E>+Diy&U)AKY^#e+92%KQ|zCD zF3zAM|EA-q)4UNkcSeD~+H7t?W3X)4P9i=d#&xb`nTV%RTaymDOrFQHg|X9Swp_z& z4^5bC!g1?botLjo-tV@RKWx_($c1oEChn; zELIPAZBQx%?XU%-a0l$1A6}f+Spbk+bkXo1$Vz|(W7=UW-mxNtW$!OnH_Ba-nlR=# z;2#F?ZW6IMyf};DV?TYi6oDtOr0+`mQvH!$z6H)|8EN<;K@qOx(fx&}4Y*K!KW+J? zWgm76Tqk9-TB~s8wexe!@~Hvi&jszzsvVyiz(b+?r~2PrD!5B5nEdVd*>rTzT%l-^ zUmSysWRxu=`d;^DNzE0Yl`fEullnK@_0N@B_y)gBmy!T!7~4`P0v&-4$9DLze>1Kf zUN$`tt-iyBZzl|Zpcpaegx-+aAUOfpXJJUcDWcRxxv(7J>jEfYELS>%JBMnAF(iQ4 zj*LH3UNRc&uvDK5ri=zqKzl7lY{Z@H$R%06ES2s1j9c#v8?iuJouzZWF9io;vu$vq zRZ7xX{oFEvQ5fLKuvc1XrbPcV6Zn6TKV4whfHKvd(2HCr8^-a=b8y>_miD-tF{ho5 z{#O4ES#Wm(`Q4HD6YuT0gqR;y;63g38h<%HtMFF|f`Mx*%LUB_dY0ubfB|)S0|dqr zkbe0k>}50P#6xc%LlOa0#sFrkzA_y^o^(VT=qeM0dLpqYD7yFSxObg1`h#=f4YGb^ z`e%&xVt9wJVvr42Pch&nJ8ZVzzF`i?g}x^?OciaLE85U;pC=%J0{}({{7{4813uYv zwA_J0=5)myT!S)88FLmO;Is&l5uPjk+EM$40rT=v32L<6G|9M?yz#NGCE3gIi|o^G zyiJr>6X9ma*6Pb*>7E+5!?vxb(~sZj?Rt--{`*uW8g>#Hc>i2dw0%ukwe4!t>=z2lV{}BcGh+K=d7wZ+U{M zXq#drbFOH+4wE@t^kmQ_i;JSh-UpRkR_&_osf)P>P`@JaSW!iEvS+^?4s-I-4_<6{_g9qkcj+C{PW5J%cFeO++=O;V zPF!33lzw(_$xn7s>_wtSQ_niMa9w+$GebH*Fkrv$;j)^C)=IhJu9QTQ8jBI(IBc{^ z50pS=sEhnnfN2&rr5lbkL@lH#FTQV~)dY#;`q{|>zv*kIkg zC|AtpXUWWP;EoyG-Xn1sN5FCz8P@(VQyrY$zCKfBv!Wz!N(nGXa=Ps7^X8(lZO^vN zZ(Qvtjdg~nQUM(A^{H*JeY^%C)67Ns#4PXH&qQ7H#rQP@q}#wjFV;cPi9(ML4;qk|bs|>o4tDD@%+kS_r z7SL9l&HddA+~o>H+BI6`s0T)$4l@icSD`E^B=?7BEr5z8XMm~yD*L77{okSOb2f~+ zXL_U85?k7@2LYp4%L!cN=g5X7ODhmUS>Px-J&VZV*4c{@4YSJSCK2!1Zl~0ZiT}MW z8#C(9c+mQz<+|dD+Xxb~e@WA3@o$%rC20sdmehIpdFFpZ>r{oAh8XQ*8JR0%wo7~0 zycqncGadl-8hdB7lr{&;6rd3RVpaY|oiyfP#goXVr+)W;APa`jE2v*^6KV&o=H#Xe zFvVjqE63t%YOVjU`_}J@H|-~GkW8-bc*WHk-YToP3b!y(L!U+8vFUgx_Mq50rJ?#WefVbXx+FK%%PB zy3Ke(0goNK^)?@V(_I_5p;>(@`9a(=jr$=D`|5-k9v+0rfckl}A~4%x+W?wBhoJec zpXD6}>%y1=$Z#knMS)Do9OY&e<)*d`lZzcb)4Vff7Aq|ro_Sz90W5yS4t&FsMd0Eh z{Xw(F(pSDY%!-SPH`*rqo`dk!C_Fsj8g~ig^k7~1Y_qj?SyBKyl0g$n|smA{K2zoa> zHLZxrQv&&b6$~~mtMD0w&EcSj+1L+QzX zOdK?+a;`I*>Hv3Nb6K7E=##K zPdV-j;`eRFbi?Mp@{OXhO`}D|BSpZv%8=pTwx*FD?*MQl4??OXjdXTBHRLas3|#2F z|I7TUOdJV7VOeY6-&D_fK-Sz7km%P;$8`?fOj5hq4T5(|3teik?&hz7y;J#N@AU zS1sPTd1>vt)*#a!cEfb)-~+*qF~Ot01&Pa_)rwgwO(s;F*@>Yq$gpV4X5XSUTwq2K zC+VW;kC2UXh55Xq`Z3J#SkblJsPCiqXIBz$@uxa-m=D*t6aDD-*u=;%%l)--2R-&=N>grV-$Mg@;6V;sh@1dXMz0GhE2Spe(WM( zw?)zfxIS3(MggFQ%e__Tg(MYVFumBw_nB<_2DcQLB8162d62GeroyE#8Ua=e*TZK z6$Y5*{mgc-{iKwnJ9ze={S@UMd*h$}N#HwL1x{#rg>E!_l_u4}4;SCZ<^OD*0JQx3 z*NkcQ&A+S$asDG+(mR-Y$9%WHQoAQ~E7{$dWV@ojz8#=nTz@GcPsf7*5P^iIvuiV- zpWob90lw+u9leqe;S|GV=>m}$S7uv`VD7&*u0MMfsyV1%$Q4Q-o*H+8&QE$wg zF_nyEiADvQ%_%c<_o_d1_4DWaxf)4NYiVwu*?4e$|UL7 zt+jDc`I=f%Iv(t;5e_jL*PTo#Hlzchlx00D%4|VABNFi z<{8KQ3m|Ia4!_JB4I54nqJ~I9M@;?UiXf8oQKR5BxA(4FZ|avjJ|oKNP$SR1{M)C- zzsRDOeEi+BoI>?V?YLm)r=+4S7$!9r=)TixG$~1^!&q{T@-ZkmfOG9`ko3~RogdsH zPcX+&R3F75)cVBMTWD$&bv=g)b=&<(%6nRP_~0)yUl9uiYpi_gHkY{AV;G74eRj!* zBsK?WkA{o?%62&(GLXNz zYVo5Qk4T?W>7Vbosk?r^>+`JPg7kt_>T^~BiYk&|FQL{n70_Uo78g2on8RF(GJC*Lz=Dp!f>N6Z$qOew^=UvQN3au{oDvicVtKGQgQOks3QBLqd5lOMt^B8B<~Ep zgTRo3^yR)u?#Y8~p|&qSV2NJpu16zpMYv~}l1om%jAxy&`ep0=#Rl#-E|eEDTFjqk z9a=mnDa&URYdqT!+ToB(JT_)>3u1l`Ns5|N{#VRO8G%D zy3>*nTEYxra{|tc<|U)N@~-82X`EAU6Y57OLDxoFH$ecyIJ%$iXgbAP*;}op^|bcG zr#z}J^w6D=7>~CQmZJvoWyJ*}nxg!a(iCO0096k%+EMuQNw2U(#;h6)SpeI*CP zs1v69h?s8~nnY4yepT(1?FNTD;$HX&?84|^kLHBskpL@d$q`O!$PcEE;}xXA5WWoj@cq9 zTa%&2xue=%!{192GXc_x?%{7<8oa>AUir zk#d}uSnd`34{e`3c##;4UVdeqc7<2ams2@LFYN?i=&hojAjCkSG#~4VhKJ&Ix0s zvBS*;1H833*P0=t%w{BK)E?V(irkc;Bc5nSdtAd0UA`{B>m$b2%Y)q5FSiO$EUhO0 zQ%$!~i%U?8BDBr80`Ce;(>?3I# z;>p>Hy!(p*gf9%3r3zRPQp2-fjn|riy2V!>lozL-sGZR`8E)Vs2=h6#p%*ZAa%b6p z$!y03(<5hI`st+J?U!Yyae@0Thk?I6Ywj>_DRc?KMTnF5_?4;J!lR_Z4zH$-K{A*p-LyiS#(ldnBK!#{O~lR49&N&*emEYGP58}XzXaSsf-$IKO`#jsFRwEdR}!I(Sdf82vQ6$*fvH9Pm=DgKGHjJberF) z78jTHW2Y;S_Q5vv3COrbT@)m;Spp;h(Ca9*6jd(vYxl+S!>ydvD|-SdZ#AVpD6c-ay(Y zr-y#kAqMeM9MiQVt@r(}y`$DaF=?qmI}#qERk> zTK{gsbpnP%YV0H)H-Rtrl^RuO7(|2*?@$O1Db2dOr#t%3Ttgnu8aqdqzCE<@ftJY$ z#!W@GMgf>FcjIoa*|?s>9uYKe(U>PNOp>I7-kWIC1F_vq(yb%$ij@gp3@o0-9c+lr zxS+=RAF~>jT9s`&3`O*a$hX8BF6`zrRA`dL2(G)ts0^^RsWEyHwt!xh4vo*qHym#X zyYm8rV}LKDF+5zs(%wM-8l;(+PXYLpEWVMmMxTq#n}11m)=J5k0#`BQTy!u@bpRO~qn$TDQvIT1#g2NDSAOuIL<(6r^YAD{5+D1vi;vW>>#|9Af{zsLjl;wOw7xZWA17d=Vr@7Q>=r3mTxV20 zwj9G5Mq5rM1wZ(7Ab#!+IqmpC9emFF7_ml;oN1G%NmZc3QK(KzOSbgqV+s&%;`Sfqz1Icq4R z5EZ3m@c6V~&?<;;6=SFOQnPRZHnf>7vgMQ^9F+psvbSC}cHT)y@3M7^bKf*>DOmjX zQ?DsOLUf>8H)$VdL7VRw6vzqkDHC8-o3Z(afeg4HSMAjG=8|}T5-kzsL(3A`?prj> z(I~8rzTfNs8J5u(Hop_sQIt&{Ym7+B!eYA#C1b6ks?L~m>>-}0>4fm}V^-;$AOA_} zSVtdAlafdI?47k?jbK)z)I_XVF?f9bQ%Q!!$jsd9qeBy!dR$J8#>(t1&!T|a*JGxX zuKEbjC*i^{KJ$fk(G`I`17tyhbrlHCgut;*=Gsv1I*IVkuswd^ZCu;iz4uaFBMS7t zGd2VP@7co6cx#12)PY(ST<-}Yr+32)s?0T_X9<<$Ga)me0-xOv6!VuZA;gYFB{ z<=7XESTTTBCw$%wH;Z8RW2sx5_%=FILy&Xttp(rbIQh%K=iuW-#~!88ESH)^Jq(-J z7=$;tkzBToWBqP5EiA;6VNR1KF%mXd=JR*uq|h3q4srpv9W*Ob8;gc5)wmiy52o^7$d+0ZEF}ff=2})&Yp(lBsTq83nZx0?Pxr>@k=bv2q5~ak zWUgh}W`Tf1_pq%jSEbEO;R^Kl&&Ruy)>u+4CcqI#~+P+H=*srUkpzBxrLm_0S!Ci zxFd+06oLCSfRrC`w4Iaw7BJEk^*g^$8-R#iYNA?qwBo2W9_dahaMK-Y+>bWl@hKU6 z7u-j3E@(XmT2Fvhxf+HqxR%Q|;%SJZYFrYG8?nX2K+9;*E;8&FJqQd>*@wZ1Ai@#T z@>bEqhg9{Vx$;F*kz3Lacpj4KJ4BS`%>})(5cwMSyhDR`WxN-66`3OUPZ2auGJ4In zr8`M3`M{+oh|A9u)M-bvhr^~Qj`-{3l3NQ*dM7`faO&(>VbI?IaX`KG)!3tL6U!vD zzbEa5Ya^1As_2svx}C;sM2{yAbV-$wd5S-N7|l!osW@jN`G>R@ub+Lu+$mGj6e-)k zS4~*)jjkP8b)w~FR_9Oi6%DBAYq#qC4*KhYSL*A)^9WvVVDU8&*JLPMQ zs9y&3+_E#$*pAlKXHCiH%E_e~=+Pkv-@bL~#iM}rIoaC{m&?rIk<_Ch; z8$XZ-;Icyg%f_V41BMf7c5{gPLVD)5A+r0sGxpxmQsWo6F|DjTS`$%bNl|T zwdCV+!x3Jv(8A*Wvk_n~1<8xVwNUaFAbcDD*b*A~$*|y@>C`gGIVVZK_t;Xz zBm6QZ9v3+GO6GY>8IWV5*twnDJBb35Od1Y|YO^bvCCt8mUN9}cy?pkEaDs9B_UmVw zM`JT{Urnrjm0HF`Y?(|37iD_bZ>CN{^fYJQoXjOvi#7$)5A`LNCDW65!gr9wsulm%9_>$wc zm4FDM>`Wcp!Ba2X%r4Fws<9t)K3y$ZyX=yCcaNVs=;LP-p3S{^EO77ic;a@cx zrS3jndKUWM#~G_XrKU?Fl31=Z!&%TFlOCj@s|wZC5M>16rrYXLBQ4zzqzt__&IPUJ z_^gb&wF0f}i2g9&%$@*nBWfIJy+MM$3PQLmVjO=yamheXbRr`%)ht8G#PztpsJ)Q~ z-3t%ZbM+&`)QFgMc`k6?Cl5Qza^BYBhYI<+e=~&>8TENkRI>Etz;M#6;cKe>cjYRO zyTEsOAwwQG`Fu{cbQ)n=23@f*!q;iszGz)_GS_K@n_T7s1TKm>i|Ovb^-j8U@b6_v zjrL0^S>W543QD(I+Md{W!13EtH~z_r*R_jQ`KL{(tZBhrTNvjub2P!f@aevMkJ_-K z3Tfz{GG+!p(wu5E0U1qbY-_e~rZhc=?DTil`nW(g3>hiZ#GIp8XRzjL*CMsnr5z0a zzGfBk@}g*#LyHs}A*rvwKkjb$NcGfV#(9^h|HxV6+`Q_YBQZOvccXFvw|Ols7bbrZ zynJdXu@AmSVBm96oLw5%F9Mg;WScnwcMfsOQG^$e1Ja~*&lRk8xt_yCwQeJNT_m|bn$L_Hl z&W#)GrcSVd05b**DT%+wQ^{u?%I2Sd?F_b(6ZGo)CmI{*Hxhk>DH%vmFOn424_sjoch zVwLBz@sHi5D7l)eC}1Z6EM=&V2o%P`sQg~6Qh!rwtQ7nKrq)d-LsY_)GBv9~=0if* zOv)UmWsXDF^JfItI_}U#Qfg|TTbTxv3hU)%3z#yFhzx-l3pWXc@c#VE8b7gLFuGF~cmi15Ie@j5Ugp*6s zvK#d01;%I;G8Db_!XZp1G>j>!kG|@VE#X8Uk91enS3+3fzbN6LAd|Xgn{EOu)J(#?U5kK!)O?_#Su<3G-iT2qA%UY=W*QHiv zORpL4zg932)Z>$}>*I~GB%>*7(=rXa_qrpXHS}6IHUS;__ZJFJ!yx z?fq5!!W}D@->9`@MF)0-ZK&j>?p>6jJ+%ee5m8(p6+ZY+fXC6jCzJBlZtuUhFnv4T zYBePAg9PL{*wTm_6C?m!bE+$3b9^!3*J7rr25{YyLk(K@v7-H1J`7^!26%%?%? z(119T^7~Tx8;1z+jInI=e}0&J1b0fy;_{!ZICI+`goN2Pt#}jrup^ud3Obz1-l)Qr z_|BA&*nzGD&|{$&Yh39L-4tJF4nq0oO^x!`zi+AY+a$g@Kl{)iqHe>@pt0G)75812 z9i7Z-3hc*ON&vHGJ%u5wEnKs7ownsMjeOZGb@0(ZJu9Ug6^ZmfhL&3drIk&KdY_zK z+xbc>vx{K3C+2>@yy?1KR%J-Tc`Q@)P(-92{R@nl!pto+4G z$BDL;GlGI`zKd7IM5LJ~Z7CoazQ)!+3{EYrc3e5Ui2skg#6*s*=g2**#{*0Lby?fx ziyBxkwzk}@0E_5Beob;p^hMiBw^`Atio>ji+xB%ke>B^>-cXDmaNYm0#GtDvy|n03 zk{&{7k;^?(jEj$vcEvyxoylt-?cqx39aZ+>5SVCwISi zXsu6FkHRR~Tm>&Oju%V&NS@A@Kx%A6#4Xnq8M3>By;D5Ag)UlVJZQl}@*kSLC2s39 zn;>NoE~up>&h(?gpq*p4t|sjHUas7z2QbsO1uQ8vk5LAq4~0g~^wK}<`GCIm>Eh3c zrHf*wD@dZ9OOo*auKgN?mku8=X0xd2LY(Wn-~KV2cP5NCTNh1pA#h9n7=)uPG~BMJ zONjn(V9Pe`u)rX1w%MR4H{GVBvAHz8y(vq{FUs1i1W#XVhFfZ)XX~`}yRcCpfp4Wb zQDmJlz$QSf+qR`qB(?`NzCvzr&f$1&tLe&-Q2~nR)O``wazu~}?flnNr+fd6h>IRQ zWuJ#YOqMEXQJByAEm#YHW4;>D^wk(<4%_KPvz;nn`8FPh>~W7|OiswhXRh8=gx}6s zHP}9`aPsH3mOAa7c}o=e^Es=h8q<0ShBuJ+sT%`6jQd;vn!8q-iGNc zLly59w^nTB(B*_jNKN+=C|9`vXk&8Cm^vyZ-}tZ8)>;Y@W5pd5NNtx`H%#9<*TWr2 ziu0a9MBaee#9%fdZ}d*9yKoDA_x?)Ph5m##caH5r2?1$# z+u6Q4u^PbrEmnkPfv8O+Ijb82`YhT25VN|@#{2C|6r~Nm zVo&N_HdA$fFN0ELKN?!(N!1#=B3L%KRLGfRAxlJ<92QR62eqs%YTv@{_m->j270V# zVXXh8)L4o`EwB(01w=)V%i&&IR?{KQunJ>40GhCx3<4%0;)ZsqQIM*jO*CeBVy>5( z>{T@FtU-oCJ@#HrB_cf(S~v}13B0427`^Gb6T+dsNC&dgc9%!Is<$`u2(J!{Ov+K) zwm&Er@S3P2Dh$ViBt7V@M&zk6-g=|z;qP2T@S^7lCHt`KSg!79t_yE z)gh!vcf4XmjD2gPl-hLOXAMpi^G@L-bEOjj8xVp=C(3={+b1FPAy|5DLe!hYNB0! zSOn;E#3|n(r2X6&c~xoz_@c`m+);MAVq$2P)niW=>jygd>#4b9^m?m4kfxLEIK*21 zwDS~_tXHv(96>ezH-4^UI@l%s0@@ZZ!8h{?92D-vD0P8(Pv-?U)KX%<4VWJNv)3~` z`|j2~x_Z^{Er%hKm3BXcteGlVLQ=^?388icQf}j7J0WXxDx_!T2cYj>yQgJ2<30g zi*Wq7(wqXmFOsY$ksHnqyYX4gq!UURCBog%E`|jfrXD;50r+&^+zr(LUvi^G=yN&D zvo^x&bJk0S-rImIAHdkO07k-8h*^{c?K1~-7QTz|{0&941%bG12haGh;X{7XF^Ec4tGQuf!G zB4;^&zpD3Lt{P>hM+T?JyC^l{0{c03Xz_57tAEwLjeBh^o~w#nbBCo(uo$&iuQ|ZKZP?(yOu^D<(rM-8`=LE=6 zAv{!wS(u68n@8~R;Y}n+?1NK-PAqH=+x)sX#Shs3%IVxf&^bj>)cz(KG)5nRBo$+f zQfXyI1XaeT`hj>?c6|}s)OT!lT%V9RMiKGN8zt#}l>~b(wQY=g50(k<%f)tU)9sE4 z|0+zvHKw*>0}`=8HbX`e{Jl6q&Z;*f&Q~823H~d)UJ;Ld6dPySV@o0Y-rKmze%zNV ze5n@yO*N5Rp%*BUyg=%eG4pmd{btkqTo$VHq+_NI6Qs+HgYqB%C4$f*m=LqD%~;X@ z3$q7IhAD0AmJmL@R|UF42#aZ8X~<^Mi4|4_Q_P+bh-G|)W!%?J?c$SkMkxPbq=6bE z$Rf@M{ncW4&JQ>TvB>+Sbhs&siDs;2Qxi0dWRaU*4VIHV(9f$)7QDn~;60+8;BiT89w3P{q6 zZwPF1g6=`OjuH%^;>45`03(9YaRAB-K#O#k1*U`v@DVmJ2t;_=53NsHl2uxOq39`nA1JAfd2VAI>k@* zjM&r*H_`U9nOilc34BHcpW!PtQ8Y1z)Wqr!vqRa8Qnsl~;_+)1qfugcG6cW$ACpF9 z{Wllgy})IF>){^*SClo0jusQGCkz`V4V+yUXL~w)6kjb8S{I*Z zo7&t9VcXt?O#a4I#<44(jnn-#xO*&mIA5P!j!y>4uZxMkEe|UUx1w~9g1`UydGWE=SuBIIX~|OBMhDU zbATHVg(Edi+HB0>lTuYlu|_TLR+X};hYeSG9XsQ zip>{eF+rTn`$W5-6@Re;KLUh3mkNm$UCipW%`9S{%J(XZnkjY731wu03^~Zi)DSCJ z#t|Ab#Ms6xRp~c13E&%l0gUHh;~X{m=4T|t;<~X=JQjMT4wVk$``~D@gxa~#M5O?T zY8PtcSp#kpo+PCT*#sxCmxc04&bCOST<518MDKfO^W&t(S!9y#YzL=yo}=dnAh`gl z2Bu!;8z(xVIvi?iqk;@n$a8=}I!IqTMtCANB^fg|j+v%P!=@9lyG66EO(woS!5#U6ZT^D2krjI` zW@zq|JvB%2I3ja&_^#eDLbYL6v#`7%;uH$+qY=ua48K}&=9sWchYQbPhBV=GSS9&A zq-hm?&?Z=-CL^8`bT(YzG#Qho;xPUluGqw-psSN^P%p72h`G8(lWb`Nb6_WTE_Y+rwB{zlO(2K{Hrmny868|2Xn8fbY`9@1C}}lb7SL%hFBa zxzxn!d7*6nto%ife7Fy<0<6svRUKttU);7n*<}l9!-Erj>{i~@M~7D80!K5hR;LDJ z1$P~R*Pa%}sdXfb#9)ZU5Z5sFOU#$B85xqKH>xu%wj)uaU#e7M6FwE5w|78gv3^NY zZ*f%biBs?M0FVu=(E078Ha!-37EqRYqaZl)#rxPo>=ri`TTL@|2AryC*ls#yUXAV! z)*p&QqQsmF;)qfM7@`nCEQf7Ats=F9!~g&%WYZ~p%9Sz8*x8OQ&4Z*jt}nxbR&*hS zKq2~HLVKPmZpSX%L0)}}N%tY_QccJj&@{)l*c~)&95Ztr%lR{JGFL*HD}OzVWcwfX z>Q7r1vSO2MvyBY`w+D@Drb0z_#C1UQgHA>2zw9#+T}(>mo?VGYSvb9A|E44f^)vZjF9L@WyTgx#_12y2>|Ykc>OoEQ4VCx;v3IC+2aHQ8Di=z zM0mDPR<>p5V-b7Xu4DP@0#buw433Gis%C} z1RC6cnjXnF?$aH#ug72fcj-}^9V-dg5S=6G>w+W1!dlfwJLVbp+S0`*EymdvePfn4 zn~=Gx%uRdDzo;o!B)_FUJ(mp{v};Ji&q$pRr56%`Ys+GiJP#LckeDi)EM90V6>KY|#&n~ECYBbt zfhIA0Qyt&(ZQY46jdjs{aTHteX7^DHpYT3vTfnwu!-0e^>vQ{KyZ%OsKZl5~m2LkR z=40HuGcSH;fpMn%{MEOC?9{hd#dB6Sa4nmUAJNe=G^Rd$gD%L>i*4d@omQsCC-&VC zsL=Z%0&NWzf}+m8cPulF+Ooiky7O&M&O2aYq~OBP7QN$Ia1Z3jQN zf@MKraA(+;TSpHSu5Zf9-it}e3QLw2Y1WwT7ll?@+`eULwYU3H0i*HlQ>xB%@YBp| zjP^@|Z|0XZrFMubU)zchEwM?S#nG(0$^R~9YxU;-GU@YOg+FzM+Pl|S#O59NhiV!O zhWlxVexPK^Uq6Q<&95TTHefO-D4YM{Z>~Of_a~)EhNk_8M6i_A<8~xAg~V>ANcD@I z98)?EskSeCr=fzp-tTioC9_cJxN zKt$aEQWpF0R5ijz9E3a?bV1*FT1vYYKvRNd6`)z?1BO^i%Wo(uY+_6>KL7p7y6?Z@ z;`tX#-Cl;p6OM_6Sb_ZBiIS+#_}=_Yt)uIlY_1^p;BItcpz-iL#of_}OkT@itfCOp zxGQZ-;^Oz%)Yi~$m^i23v9PcV<(eQhi38m-#&Bh@v2bhw^<7V-`HlQ_&(Kng9O0Ct zj@<%9WkU!AWV#L7KhaV1Rjm0>+%l-Zz+&y1bX2vPyLKN+%fbp^&T1!OnQX!M2!b8N z;_}Q|Q&~U%OhrW#a3ia+)UODt_lGb7RWL@(O`|_7MTAegg0euIk>$B89hA7HqtIZ( zEkQETtao!?>VxXwGbSCI`%7;XyuZKcrjq?xv*PQN+36L|Zfp2?A4gPntJ9ym?K#ji!&5YX-M$bpez0wd9 z&NUY!p9mxr6wZVlv43Psb?pHw=+Uh61FAElLsL-rJE8A`qT0VTe5OjnFEXIGI>kSk zK?{xU9^wxuXV_jOI)y;W5~;O}Fh%aL3KU?CM*VUX*QzFl6bt0dS0;szZWSDKq1JFd z@C3JU#)H9%;W2&?nPe04qJ6(D%GC0=JSAi1h(JN439=jl6^H+iqI-{Lvj6`8eqGzO z^Dr}Pj>9nLIpln}HlxX*5pt?VDoQ0(lxjytsu3kr6BQkGrqjJSlqfA7Zb~&$(Ouni zy4Bt7``hnde|bE1x<2pE>-~J0Ex)m?S}9PAiZKYB?PUHcW3MuzV?k?3sM6^qJ)w`{Gk$qV`I2vwTshY=LbudTx9>HIMuB|>Y{;`W zF{zK#qwjT7#ld+Lvk#cUuo^`QF32>Yz^+e|jo2N1z3BzuZ1`@@XB)YK*dQhMcoLqsLzbAjymnP` zb?on%UW4@uXm1S){uBIZu=&H4avvH00+)GhtQ+Teb!2!Gvum#>it!jIXe0Ra|5&Xb zRUBV#xxBMoLI~*8(vDg8YE?j}5e#DkU@tt@u7f0o^zZ+2ITgs} zJQ#-xqS#w~M^}ao|C?3ylMIF@|NcTfUmfiA^x3|J&Q+zm*W3J$)r&`hk@VwL1;{71 z+iN0d3oR}{X2KaS9z{`Wi`Db0(sI_jZqF>Y+&tEFdzH-WL*GMkiY~__=aIz(MV$#K z($IuLSE8%Dp$#}m@!`poiKCMhY5?wKs zR)8Z^4`%q?i~37fD{vUH$1?&4{)ppxobb2Dl1}3d*HVx>f1Y+-Zu2TZsyN{P?5$vN zaf;FBiJJ^wjCY2%hgsl>bdYC~Zfar;k0eVy=~<@Rc-{0xBCNSag>tJG5k5+hblqHb z%c+)~{cOz9*OgSoxfJW2koYStsM9;>7y4i zrqLK%_AbmY(mD2$ZSrCGF5;CU5sbmP@y(e7TeIUrsCt4lR7-J-x&Xel-!YJ za|xaZ?XVB9;Y6ohVv@-Jiq7W!>|sCbTsU>4zh=ot#AY)u)5Sx_BiuJs?~bhKHn;fI zcd~6NB4HXdSj)G2eO!RzP>+zFPqj>_tKB*B5q0hRo7Z7ijH~YN#AbpSYn-5Ki?hv{ zK?Xh0q8-dpbaW=3$7)8^T?U&}Dp}xDB6h*)Lyi2lOIVh&7Sf%z@i3B0IYyuhPe2TX zAEg@|U`yIpT<;)GT}FqRbJykQws5XoS0b!c)@VU4ha^y%QJMdz!P$=(e*SWvs7eI3P`Nh^(2y>M`6Aa_B@Q426Uxy z&^A~~cK!-tgmtKzdqzXo8$?`7?~TznRH)}`MFtsk#QIyn0_)$;G#zvw-KdXcn1ob2 ztVIWCM25wWx|e@LkSv9H{ssl%h08%g|1nw6>^=I*`-rs5)cb~KzsK#^;%=`rvg|xo zj9)^TzkFr7yMiL{OLu=Zj>{Bvhi^Q6j1pqMHAy|CzwKn55rya$SzwoDfj(|>rLF6J zU(3crtU7n8jp0fG!l*`B9)E_62`}z8Z2-_w3|oT}y1ALdG{RRC{<>Z-#>tksu<)2WQJ%%J>UQO=o-V|FadASW-UY>S7 zSN1{VDHyMt77&$mu5IxwA!m>!)MuXDab|~X-OBVAq1k*=5Me`7tF7^aG{tHf_c^8= zQYLjMYTB77$6dVL7wayDTc&Mnee#+6@8?fdTV__hrupp+t4w&?oS49*jb%FT@b5jO zkjTTD^((OHHsZB-4y74uM`v%bW8=eOZc>C)cHT|llejyJedn!QH$Av+rquw#cZRyb ztvjoXBUhJ#2jKS8UEZ4DuSiR@Z%5D=f)W^l{3E&cm=~OQm1B$?QUlC2q=wZ6zcFoV ze{9=UEl1GzmVs3l*u#5ew9{X0HakXPjL4qGub+$bNi!z}Tny-gCy=%fO@L=k9B zE?%O9&VJ3lxBz`AX(Que@y{QTzwaQ|^deauNDsak`JRHgRFVsNHoOn^$T8J_9~`;L zG>eWEk`OCd_+{njw0!jQo)94)6DGjC@y7af6m*)d*wUR4Cqrf^pmWts@Et&B73zFY z<9v{&g%ot`B^#tsWagz4UbK9{yG^=vBEZiWpaRsg!H!1-bU|Wd3pCV> z%-!t$+%ef^a#Mv8{VAT{MM+gB#4+eA=gQZg5fUHRRIX2LAwrAt`gnY;i;>bUY!p`gVtE(-m?H9rEviersaxHA4gO(154Yz$27-k@?jN$Wv9d4 z$tZenBFvKMYk?XNz-a*e1E3EdTLXk$c^6}&gv3ex`-qhqZ~DBqzbgcOFD1kVIlX5oW>0_&X67k?|XqwkxmOCRBUOr_8SU-F-L; zCyALZF*t~PL)%8)kMvU@!?m@4m0+Xu>s0ikgB;=?8r<1@LK>@nCxF`{K+f}!QNpNo z!(n$Rh%Fbr0)WCJYS2@F0aSo4^X14ziUFKXqIc&bU-==H!Ki`GQ-K|*hvP1`QcKoa zpQ-Sj-l}~+JD2*fkls4-Om!%-#j8>r8a0DjK}Q!0Te9=f$_~N}O_t$I)~dVUq}2M^ zoob7&xPMQQF5?Kp9KzL-#(Mv)Z$H#L6q4Ij#R$Fwa7^)&$xh-kvmnopTT!c$_d%zOf{!^hXhK1I2(7!`ThEpaTPpx1@RZbwUIwt!lk^?m$>({Y$ zon=W2nLANu=rgr+)`8n*DZw*nZy4!xFm!>+;k+7EEX0%wF%<%=n+dv8L%3kO-iJZ* zEZgG!uHj4veqD6yIl9}z2E)UG?0QAxyS3M*0*@!Ap^hrDB|x!p=W*-d`^3Cc5TI#3NN&qf7qwE~?ZK(7`MZs$Ww^5JqN#AG1P zjhhuCl9nAu&R@mG21<}yv;a|~JX#3~>DVj{R7}DSG$w|TGGPJ6T7lfcLb}eNlj~(zAjp`CQbpiuIspZ8W=p+fj+0fT|0x~XJ^Np!bg_jO`EP++`sl%do9&jWFp_c z{8XiWUu6~^l{W)*J?`{~*`4<2#O?kB%;^WcQ^<{CaQ>yE<6(8xQLd|K;A#+^R;a%Pv?a}BtCf(6 z@X#^^(pBJ{jYS`vL~b)iwj?7BH2|mM5sQu_&d@+OldJ+hb-*VDK+G9_DQ(S+YoB+s>Y;c#^6>ty4ZpQ=rl`xHKWe)8Ke4wISzP zY=gQ@8?a(ke7b7k5Y9o=mNIn(j3B}0~xB_eY^8P(~X{n z-N;Zu?}C}y@Vu0%Tx4KA(#sgN?hTmUas8}dut|f@$RaF+(IOVURmWinqF|QauHpmu zcTu4y1=d=G%S&JjZS!^*A4dnyPws`ZYx}O<)1@pB^8jF=01{+?o(kx6193u`iyL^n z7pV6I4B@fEAG{9-g2VxU_yH)HK?R(B{L>hu(5)&odfBkckt*ftMiq|OdQJ!McHmO; zp)|NHDdg!v*3-i(a$7z*ZnDNjeeR%2E#Wuq3_{tQX%d;D0C#OhQTK}L#aY*jD_tJ^ z^b&7cCO$|QFY4|pz16j$yCayi?M-RQ@o1zh1QDQxHr#Yd)S6Zz z1)S%@&tLF3?8Z9;MX&VMi=?LwNNf19;Y>SSMcobq2*Bs&4zt!zOP32VTa?3O;R|BN zi@pZ{Uek^hj1`uwK(atG0L-KKi;%+M!vil)y6IKZ*FJseeR4fW5nx}aFq96#_spwi zC8$$f4XFT8HACpt)Ey=fKPzxN-3Mv?RPr9&j1c!FWGiltI3#?XOrK5y5|6X67Q?S4 zj>TUbP@6Q3lfrS0&tqd}7g6a`?}nyz!TL;~P0c#OR?*dym*&_KH>99GEq| zHeDC3oPVE~BAU-5AsNZjyPAHWT>kXuUj>gR*AY!C%t zp9`=kl_0uszQL@0W_KZK+bevkqAs-q*9#aN;!h>#Pc1o&o0%DORzE$VAkV!#mkcDH z&|aH&6M^oX7n6!NsUl9zL6~#4Sq-+w2H4!tU z1KIuPQ30g8ViIa43oE62nIpYcYt5Iz=oTg61dR7p`7GPq-g;|o9Xqj>o7h3ox%)B` zrN(3i^_YYU3;_`Q!jq3sU3BaU7%$U6#&9XIWB6~~L!dC%i2pa1|94+VA;5a);|C@$ zAM)-E5oA2oj8(*yeD0Oe`UyO(}YEUb0l zJD$`325(I1Snjw%96OQs%@4>8;Uo1&$73}sN=TTaeEi87+~PB+RWKrP=9|^E@6`ar zdLMQNT1QtQMlmG!G+~aXqHMPTJtHwE{>|NeH0rU*V@1tkBnD!6Q5f=jOkUTQQY=fT z25QOtKz=TnEyj?Rhbn&x)v;(YHJG1Y2qQ7b{KgIY}wWg6#W$Buaji>iw z^iu6tg`Np z3vvcBof9Y?$-(vm6sJwC+zO+&QMG>cNAq?f-#*w+-zXO9lUo-W*oJIrd-do#THhj| zR9pX_-IhHT{>ul?J-Tu2^DE!dKRY+AMC(lk)N7q;*9^2JtU-&My|SP>>#e`xKq+DA zF@`&7uL4AHIVRRA#B3Ye@hY+R&Z5z;Zz8RDmg}E5MO2o_&_a&0Q!u^D2JZkJx>F9A z34{Q~@<|1V$=@b$XdrjqJAvDp&@R+7Zxflh<2#h%R_6{O0`E#{Y2>9$3dQ;s+EWtJ z#ScOk@?~2a$WE7t#HN=%>OGh_wrNd;-9#n9-R0CgFQPFqrsOZ$8hoLbyEEEzRfDb% z{UhrRWp~y^=5V5sx9{uX7JXk<;o}FxA1uUJ-)&URf~Uor2jzbj^{C6jwhxY9`?HKN z^Qf+5?Xwh@{AT5$-pBTtbS9yucuhWp_H?G6+=x5|h>a%`#F( z|2T%p8bto#4{bu%1SbMeV?q%f=q4x%wBiuDD60(rN)agskP4;fC{>X!#BciYBb{J9 z$4B7k#aan(Q63#h*>_9@;;$>wrba@&>U)v;0m-xg-qngcYHB5k2Y^haBHmc>gU^u#b6mB0u*#c^m z1y47hp}#+T;{^8(n3G4s^0*A7vqDT)8eyHsHWJO$*vMz!A>I>^d|cUUmqG&8 zXS%?K*Q2gEY~A0V6qg?phB8c)#wBL9{9GCNxEAyWa!Mf zb_EPsQbm|ZKH6dW9M+1|LVYvL@*$ruzU#f*BCeURFADTZ;CEZlj6$5U9q`VlUp4*q zKY4HmdiAa*Ba8b7nXjQwrPXQH|E^^rWm77Y=aep7^#wxhu{CJXhGQ9m2+|XPG{0Hi zjYoRQ+4<@vA1YuRgg8&~X~tRS3rjdEOhRcU+E*pSw$A(!ud7CR(l3eUQh zEsx}jsPl}1eJv*@ZT}Rm8*K$c@&Hsv?m4{>Eo4a7O(vCG17;>+6%R?<_FiP^$y#Wb zvMkw)KS+|R0;7o|)O($vOEHX09?@Nc3uKbD%1oqX8@}qjrYgioH#dvP3f;1>FZ-Qd zb=0Bn$M_hVjECFu^NlQc+c5j@fciV+;nqX{44ZY*Pk1WX#I+;|Oq)F#enlHQQ9t^hm*qs~Q~ z!v-rMd+f^qKc&>WjcwEb>*9*d=moWzgqe)P$WsXsPtTq^XhvciZs1!wtc9EH6lc*J z@`Ghw->xRItTyf-#Jj|(Dc6DCuTv=}_Q&3dIWW4?`n48S|5UxNz-!v_%)jNKzje#? z|AiB8u3IwmXA6skLZN^FjJ(w-`qN=f0N+J3f9}h#e zrq%r@cPv}(&9K48O@VsA1wwe!II(fAr$h!}z>)tDVQ-tJLi|jg|xH#@MZ9Qs(1L z`_~-`PImBi?jc%yfD=6KM_m_R{cPE}Jtr*3552$EDfZ#IC5NsG9_@eU6#LkPG|Oy7 zquSY&+a&CQbt>#CT@HM0JYjJwTd7d@q#D^|ZcR|*a)(u>MIr(2Z-lvaL>BVu5sCm9 zll&A|`VS>!jzyR&IvwXD`;(e)y{fHjBJ^FSe0LcX$`bMh6NY zZ<4JM{~4lo5P0j@$TnKdlwg!p1VIXKrp*XL7iyvnJGLPE9tjUwrltFuvQd7;myB!H z+$;|-!XMS#ar{_3)|Z#q^=Qec5$}V;H2V3r4cEgh&YgZ4F_&q$C*|kBN+I&^?y(ya zdLD7>hhEitZM)_BzMW`hyMzVV;%xmnG+!ORF~*s`#_ut#WXU3{vM3AdW=tYU2;DIf z8N=(PU!`ZZFIidt1d!+#C`4wHHl&hbjukX3Pv9dBshw+t%xrRAw7cU(C9G?aj$J>> zC^PVf_2&EZL{PT&B5%t@6NTopSI7RBtNW!l&&EZxPX$5eKiI2+{-H1N2JVmXbbOpM5E|FdHuyEUi(g`wGW2C3^uHUgZ#m7DKb(r#y>NJH zmB-?~9x?T@@?@a4`$C-pwcxicwTDzTylxXA)*^FaM^LY>n;PAktg-cmXYJ*1LD(FzMj_ zxe7ef%O3V}>z8`Lh%H&<|I!=>{(`+>$)gDm=YeawQ;osQMWPBIsQ^7C+l(0U3_l-Y zOa;rG_m*e1_^!OQjD@-*3>gWcF?w4D`85AIvEN+h7_y7=_}r^>sg`hiQN1)u(e>t1 z*VK#4jhx$4jh8)ZeP{~Mbf$|Y1g9U17%VA^FZSv{Wzx}Ed~y~`601eUDiPNj1D61@ z3m*_fo!=)1OsF79Rc)x1SPMXcQskf&2Np|t^&&^PG&cwO-+*LEe_28&NZBOBwkZze zH|4g#d0$L0V<+@P@P_C!O>hM?BGh9d7Z-cEsUjA$#2>ICcUzGgUlN;gvE+qVPkGG! zaq#tY%d@B1=4aHx9`*~{kn*fcPr^fop#rprVSZVG@po)xA`^q{g8^s-_(W*4*E zbp#QIm3hxWfLNNQMd{|uq4}a@XHjyqJtiLvniCN^0K*OmXAUIxBQY?@Cf%U%MgH~Z ztW1P)&xv(k4;+~@>AmidU#;5M+_c^s*U(vEZ7WRkK`dyyxr~C^K$o620!L?s@dR0{ z&?~mh&!X9O2U4V`@!Ihwc<3~H^g4U3k9XN%Lv~)Th(-)Un{}E`v)AK~3YmrJI<JKQo<^2HrHwY;dY}l| zb8#?iBD!W`ux9XFSPHbTO`6k&Ue~W#H^R?tL%VyMb|cY~c>6fzR)|DTnU+ zFhKeU5}p!!cgm7S=B3Fg5-+Tq+lX|9M*8e8mM04eVfh9F2k8+s5ruKO&O~|kGmv8H|c=`IwIinH&>q=zpXQv ztt9a4obc&|C59=-d^%-u{n8k!+m=UuZZEFwIK5aSG1dcy#4i?02z(&+)>(;k$%t%s zI+1bTF7`@vK!wCH*a@>B;I9wJuwrRW2YO2iX1lUH>A&*LDVQSR9)ks;GXqYvl$fZM z;eX@%MI<@3Glo|nVrX^vr`W3#l|CYkosupXf#Uej1fAz^+sI%h#0DTw01D?pK&6y7 zPe&zCQTg_$we@AW$})pOG<&LX{Bmu)rp)6WayPvXbuno00({T#nDJL%i?u5B@1X4q zxCK8Mm=VBEt8&JocFmM8SBkz*9TnXa(x@WOFS1oh5+AA^pU$;HO7C}P>@JXch=C-vZQM7Tzl+dpi}I0PLJanjm_6=;fx&qG<4BkHAVVa&G9 z^3s&&J9D0wYRXGhpsl>?@0za0>(9quEZiwIc-zx{@ApE>8L;jqXg4EG_J0vK1;y!@ zzNy*7(0dP7-?}}Jo(Qg1Vr5w5R-%aSkTdC}RbYAD^yc2WjT1{EjMLCvd zvg+RH+~>{4$j4+vWkTuLqmm3_r!#R*Xv8ug3c){S!Ws*W4@Q9R`ha>_>igK`@9IX00x`#5 zmdnyia=IB73(-4}braEbX0PM_lSFl*x9BYEPnexKuYbJ6%*g_NZWg7w-JYqu+~h3Z zdFH;5v26NZ5tAwPe2#b-^5T+ACZLz4(;;q(P2s0~A^8XgjR?2lK*Ee%_kUWFL<}Kx zvrC&WgRCz?PjID`<|>ZrAx#sg!;Ve#<5HV-iAA2cIbpAZK)kO~94r@yj7WlKGT9>% zm?X8?YtAc%cr(v#JE3x?8g&Yw8;o3>A6)hu#hDQj+Q8sXM``uo=ySk_S{A3PE}J@x z2SnqaE)wT}hxUW}W0z0LjLouBwP)Vc?RXWmchr_oVl|6D{b&Ct=-GHb_qv^C#YQ0J5g7c zy-g>fmQdXri^>w@Qf5DBPZgO{k&ZKTHdUenn|~TfVAfI|EHW1$C{tjUF;w{&wO&(3 zKud#JzAzwVPhlB5&3N7##pC6qclZz?mdUXP}wk?CKqLtL_U=2a1MP?js{J<3*vc<6UCJtVvMYaEhtSiGrmUwAO+JEN!$PI%kL7ljsa9nrOzmT! zm63pSK5_4x%C-&}xk<=l1)!(b~ zlzSxHWNB0mX1fN{aALaQ$YkT~+#~0*x!~TJ>uHBw);5N}1mZq;Qf15N(*NJGU@ce< zd%ng_+v>x#7qjMqloT96Yk&>$uRzf^|Ob#O1!yJxrF= zW26K44LMPzOZPX}yQ8=o)H2~lu0onWQWma7kTsHU(s0H|SqUg*`AajD&qDfT$$y#g zauCaaT;!vv*S)~^aKqlOS8x8RP-<{_;|G-TGmsDnuiD)E?*&LMOu6fj7mf!2y!~_W zgS2Y`ozRMvd5s8K_4{|!Ic)qAbZ6zu zvQ(1XOkss|7KrVLpS8~tYo33ZIeV_I68X6RD$M!(&w{T}zQY#U+|;+zKDz>b#!@0` zoIV|KihSW?|0&a~LloS>bK)T$d_*|bi&yI2@Va#W3=&pKFOZRzB;;>>iH)9gy|ILg zgLQOpO@Pzt}lOzwcZ8;_9att6$OT zUrpaKC>}y?{xkKk((M4lRbpnf6OGnJ{%YJpUHjT9TkDGkGq-@8gCF#P0qR7WI`REh1bA zUO6T*=?7o*0{`X+Y5bK40Hjn4sjOat&T?fQoW5Cs%iWnc`LjV;(O=`Ld))>CMJZt* zMsfSLUt?R>D$AlxWbdEn@Ub*pAuNu3?t|@4T_FKi{_YaTdeEBu| z=GsR4!2_;*ba$sB?@iRw}0%XU)vw2_aBPM-J6GH*N00DD!rThMH2}02mbjMVc<-65yg>JYR%jk zj5l_kFWS>1y%%0?c=V=d*K90h@21ba)7XoPt;6;nd)l&Zk&E6GIePO*X1OWr5!4$O zFelunFvQ)3sn(%VI4^r zw0=8L8uW|mwE^)bG;%%>{&_A*Pd`8~wI6oDF3$N}u3a?1mwNLF^uWb|t2BF0)clZmBe!DP90sZ=*w%_hzx`mxKXH3aMX3;uy| zNy|wfl(zcN@t!meY12WnAeC01neYro?BYp8dQT=Vs3}iIs@X`=*sGL*+?<3Ahr>DX zV}plFS2WS0XJSMJn*vKNW;($KMX)ThGBfrxc6LtlzZDS=qFgso>ewK!+p6hqC z%kRWmOweog6i@0`rt~tNiwwZ_2lg)~dZy>-WdX&R4t@a#r1{GIx0+cP{#>_RgYvd* z9;l~NBi%S6wB1d2axGKldm34 z6T4}C^p!v8S^%G|#BTMH&`fe~$FORugH|RAEJ!EG@BoXmTOY3sw>&=jkhOn!MH|}o zRNDhU->sl*yY?~i+=IKJzYNoPk=^o~6xMoLc7?s`czM{|a6ut9T;jm(Zv6O38W_<0 z`nE#u!B=6UHC&gxO}p1V;}fiaUdOb_@FCE)Nyw5~k!&hBa(0uDj-=^>Wh|~fMOwec za!4ft#Hk%%v=i&rnqWKbq zIE4i>oI{jH^++&!!#qpot*xk5_#n;leX-xx1J%DL%Lo3T_1cg#;~#E+rIGx+?`XGCi{-)O^017};?A1_(U#HZ6=3lae_<}j;$4-*S$he7hF2wwwDiabL=`E)BEV(=w;rm`-@tIQmb#O za&{F+5-A{SSSVjF%p$F!Lmpc$=!NG=%mU~D+RVGMkl#a-De5djRnIB{Cb$FT?8;RkljyN%n6;k{e9ndNFAgGyK>ofknisbx2Sbq;yT4 zU&1?lhE*wAQRI51eY#W$J$`ibNzCXg%G~0$8|Ny$iN)|zPF?aZ z(`tW+aX+B)eDQIY7M9FC(hwIWgo!Qn6RX*qBHXEBf<-=zOo3bd>Jj7;W*?m{+GCQa z1YxF(WX8Yf+T{?F%@q=}StnVm?tq9Kom2=yAWKc#)CRBIVoLKeanm1}>2o~AFJlvr z4cTc9ICe!p#@4EKRG(^R$_^k}Q!7zn|K)B!Zg=X7>e{WvZBp;Z<}34OEm@~7J?~$* zw&H0Rb0Q49qjg{U1sfIIt;%}ZW5FGHcErE)>|p7YbY1tj)!ZK48ecE=Dt~t-si(&( zSz@A_L3%f-DLNKl=}lX`WxO7j>!jW8i{d{eC^!qm0H}9IH~)4O5!3k`{XiJ$A*4f? z2Y}I?y7Nqht*J5HOTTqi@-l&VpL{9d+9J&%*AQ*AJ{6!DX(Wqo+JO8@U8>@cz3)*0 z{`efUfaED#Qpz#Cu1!U|YsNk=qTgDe=g}&>lcOW&PgdTsc8qp96>nJA7Q)9SEw7l4 zS$swGYrQtf61#RG9BzehztSFmcrDUVUVlGtUEG)C{VDU_m!wsnz<7k zCm<6v)MlC~I=-MX=z5{5EnPRAl;*6*i8GwGG*piq5_*ftT;YRkNJ22bsGBE84^o&0O z63dVb6)L#fNT9lT3tgQAOh?ctBQRQrYsQG2CPm&c5V)U-(5+JIJsBX>-0^aDG!0AiX8L12Rn(&L5U;7+*i=!BQR&U3iL}wMm}s) zA2zvl$^0CRV@mI4jtN_&Lg<~^#vepYsjxy7;FG}-32pdd(@3@xwp*{L2}o2~jj1yg z%K;M23M-^r&8S}Nog?wC z*HNfqUDD4<3!;k-HZ>7jp;-17a?AAM+4k{Px^-#an8mY9N++4ie{G}^MR<|U_Jj1w zUS5^-95-AzJ^W9XG+E2BEj#*Cq>ykYhVP|Ge@f#z;n;kUB~NTr1)E4YE}b05B7~#P zpTzcfw6V=}VCe@011nydAIE^ZO{V}oACO*!STOR~3P8Mv=0~xuLu7h4tx8CpLbx+Q zREqEbS7oiON9MF(XAD2B0tf;Cn_!}loIZngREq8D7DJ{q{R?3vekkZSG|;TZhW@u z#FvQYgSrA$hxK(a#?CfcT)82?nZ1LJsHd^;xP{tqS2T6uIX~^d}L%GoX8wM7FobhT0uO5;JZhJrwGoZwPv+#QpwMP@dZTx5P;f zGR5Pr_*H@t;x!RfXG@_V`aS zXzZLj8c%n5BGS|?VwW?zvm zAWwA;C_N18K^%8}kLAtbgSXkFlpMb9- z38SmP?fxrK8oOT!;q+{HUB2?O#eKLy=Sw_z?>zf{;`ARTMAZ@D^Z|eXrQu^X5$8br z>>f*1<%35XHl{I>M$3BwT}KsM`}**ZZ#|8lk?xb?+2yX*-$a&qABS;$mg)VA`Hp45 z=Plb@6WHB!p4f>f*6D#17WZfO%QeYnwxu$s8u6Y8@t#qtJ6F8EK}^^6+$Oe>`Dz`A zi>rb$ZF}#QB(Am1pU>PN7ati6l?e&0FECP}>tI3QfDQwie=0+wLl zA)N+djnIA6-GT^($54Hkm z6muRdk|Bdi`t)+(Q(U#eA8>d(gsjhOWvJquGW-g^YKjU~mF_C?3T5mjWdw%!M0F=c zLJv;bkOh_Ye;tzP{ixPMEt3R)tF&3In5(cJw#S-q-!GSA^(>t>QGPMh~)goo>22GUmX5k`e>Hq`f}yn$-Z$I>7edwad+(?vCRkDn{PyQ-%2RZrvOH4g;ZX*?qGD+u!10l-5&dHT_)T= zcW5hW($x4`!T*Z38gBhuY_S#h-R<$!t>Y0>h%#*cHA6mNn{*d9rW36*KWm~ISE)+= z6msgi(7a0vdj8q|=CmIs4aM6u8-m;U;>XSeSk5gDu4WuyDzuT%#v7902Bn*F<`^2il$&3^bNLagDrwNXdc_C4a4{i z9%q)(8aDX9j!SclR91K&xC)&5=J)B<6(NRQ+B-~@#2&jVvi+@Q)JuXVFNGd_-s){@ z{bbtWjJ0>{`<4GVt^8B3{LKYIlV%s@iK*sKM;+T9BQnktR@8MHaK9;zl@d>%6EWoTBEqF1=r4deBD$-G^yux->_*bPwJnyxVn8aySb${VVH1mTOp%D&DE3W}c>5PzN_u}GUrQ`n7P znAW=&7SFj<3{<^}Wp|jzd1|geG=F zH9fNeZx&rE)EIVGkR9U~Ja$cXnUQ0>K-cE?yv1tqwwu%HvIqP9*ss2Q#9?wYq4}S` zReXKF?^x8kk{I@On%37RDa%jq@ky!+U(Wm)mY3bxQuGJkm_fEO(?m?w? zZCp0jb_r91B19a>o+uhTa5io2-YR9nz@e+T+iT(BBxe#McRMTvz;6CAZrSz8idqcT zJ+Bih^18C#l;$qlnUb$kf@N5o2Uji>MG!E0WZyQDTD?I_Dh;BOes|7l2PH3#ycDDG zFRrY4adn>5Ec}Z=bLM7x4d6fm(w^Q)7m3(7J*z~)gzH|)`b3|^L=aa~NihC#6pOQA z?2G)>CN>iHBiqBwQacQJUsUTFBJ1lb_B3q3a~u!v#N*riwJiLxFq3AeVe@>6v-9c1 z)1>sCm`HFNWK0ac? zncrMHce&aYxg>_&TpgdF(n=Bk*-eM-BWwTmU321s!v&Wsj~p5kAW`cvtNJ$0iJWy4 z_Z)rwCpB%M^C@dBw|xu$RO}v={FSaW!dG@aSN+G&s^TI}Rp! za;q$EYt%9tYfdlz0}-L0&>&e-8xUa}t3Ui-{%MNe2X~>$7vs*__6NRzZ(Cvbe-xc* zTulAn$IqEFd(+Hmn)dCqFDlcj5@uSbMAL#$OiH^MXzW{eC|0H?>VJBJwt?L`1R$h8l{_!&e5iG+Kxv4R6US zBcqXa@qa*=xv0|F3+$f#duj&X!+i}W%2NAHkFt14oyy*n4eR=By*z0SyBGeBX=b)e zoO%Dez5itBo>LRDEV^SU|9IS)*H>JMx4w7S`O;|77V{FbtEg3(llz7Bs=S%9WNjzU z%514k=>1C=BKB4hYmcT&^B51uje_ytWldtIf5!bNt`FqmoklsOxIH$j?9M5h%>Q2f z^Af4>`S^9;-W*R|PB&TU6o*;UQ4z>&GvvTL4%Znji4fQmbUNyL z@mX{BCUQ2wUKBK764j8)SxFC39Zx|KtxbRlj->WK9z)upbA%orD5HhFL`F}KfB7H( zu@NqVHCvRP9S!=gxC|4Wfp~KKhuA&Otmj(ZkmtA}ILgOmbMwo-kmA^QgI#jUTG-Nb z<&Qv{PFdNiW-ckeuh=c;n|>s#DJ(?E)uW2YCfz3pdVG-Tii(VPnNv*pz54yiqPceF zP@dY~B#CdlBsIy8yliNjprE+IMa6o~-o<->@AanV7 zk}r_X;BkmaO=7$imYfU|QIQysRUKe$^|NUX zcelJu!IJ3L^ZjiGm1V2jday5kq6d0m{`v}lWTDSx7PmqsN|d;HqS@rTLP{&;;1^2* zutk8BL&h;KaSzYXYIp~?f91cPzx0=|tDPDHKxU;MNLa*)!3l0*mj18Mf-f>07trG5 z0P%GUWO@#X0PrvsZ>Lzc3;~@+8Po?i%nl85HrVsHlykpBqjs%>p9xFmt0KycCg{u~ zvK#JUJiwW6#nLuqLED_#XiQ#+1vn-h%-kPaZ`o~(l=LiPb?B2vR zn(k@R1Gwfl`DD&SE>R@#BkT%_eSVG=c`byTwd6MMP0$0J=g|*@Rs1O3ZosET3}o%m z714I3;)7s-Wf4J1jfI#Sy8%ca04cumc8C1+L|653jV9olK8LGWr+{>Z0Xlaaq=j`< zt*m{mE!Dpf^D>L9vumBY{#d+4_uKC9I3thMt|?DL7MLROBjb(rLg<7OTu@>+t@XiMRXQbIb`eb|Wkr zkvO+hYQ()+r-zXMoO`*N&cjor*=>1!v_rCzU^W-}x4tLSOsmvXfJA!wr*3$);z2{% zo`T7GvmK6V(a)FXTAbuu4f0>`LqI5w(kSBmGm57Ts&F2dUhSAJNN#xW01SJ6IwRH3 zW(L_DF6h;kBOh(78Yy9QV6+D+4(F_s_D=`S4Qj-l9t}2pioSEpcRHDJrsI@Q(Dyo} zo8DTm^R)e{N*K`4>+aVtTO8}MuH7lY>Y{+>k=xWYHVK)QsK|AM>^b5W)RnFAl%*W} z-tR)lUS>hf9U!C!o0mC_S!i977t{SX|I-~fOUUntG(9vZHD;Sr;j{jl7Uvs|XdO#B zo9{J*)sw#&IN{7@W!-Bs3~w^J(H3bYn7dvWvg<1eKV>8#;5rsft*Th?)p`~1cP|>t z1qk8OT+KqI#Bk;X(W9=17RwgHy8uQ;BpAK#t<)Y^C;hx;o&`L*D}z5G)$uWR-!Lg{ zNU19_ot2qYWohOPjE(&7b<*17xY~i<_18y&zLlRn7<2LA!82KhyDHWjKQ-ix(l<;h zwCaY-1;z{Y`$qmB9$Ol@D~qw9t#AJkgmTB;_HKSi83BY>r~G4Qz2fnfa%{SzM8SY1 zHF)lzTYW>GDj@^5gd|~jt&#UA`Ima)a=opiaQQQ0YNtVz6huL9H z;#mW7#&6Y}WzKgkPkbePk^gx-_-B>iZW2I9LMc-!Mxs64a8#3NN7yv95(II8W-#Ta22=}G%t^En!Cqa$B61;CT&O~731i){iPG}vnkcTB)T zD9i*|a1Z5&g4@J_hm*G(TVJrSH+LU0`mk%-;`+3+%BSfQbUWgo;rGz6ma^qIlF`!Y zEZ6vwj31KiknaHu0gmeU20DUv-l%$({3nJ(cnuRG0peDb71Iu!Qd7ck*YF@TB`~zH zVV)R;w>W+B?DP}*{r9f?KiznyywOAnenxDyXCIw1lfPbl86n+H{nGh)EHdkQ^ckBa zXTY)Dgn%<^LgiR+$NmuId;?$O@L)tf{pKUBnQIlPJkylWk;JMLN7=Q$KHma=F1OQ+`{wR3vfMIQ>@ zZ#QA)#Kp8UFV=Nr;QqMjF%XBek0k7P?BA|RH~VXOKl=#r;@o-YMxp>9&hpJ47GHfO zuox#_8!WakZ`85_jkW@xiUm);oc2q>t;!S&PaI{;$eB$&hR=GKE)ky#qdv<4XJft& zm;O8>o3+u^`$MMf^I+oaq<4a9mh=(H&rqaeJX_o-4V&5m=JbZfx>NlupobVF*Qh!T z1<0W`PDKC*O5#AK8;A#hupINHLjY?l$eL0Ss|0r^u&eY8Kg}Q4!XBG;9drHSlf>{c z95!|vYH3ctD4;?a)ahrfepgCTx=R*0AT;`@5l#fmwt@%d{>HaOOlfguNtPHv)saF- zLB%DJi7BCFsKRC$g2uMGn#dP2r9SblLRIpmi>3P6`*zX=riJp|;B2TH7jk%$n z7Vx>pEHcszjq=j&__7eYB-3n{X@xy{|K&!`bw#yzqh;9h9Fx@mNshpi0R9lV@&^&- znM=ZEN3{cZR2r4pFYn9-yHXTT4%YGDGBL>OGy?0MqTqTcfCvV941z)UDoW&65d9P$ zO*Y=tb<|%E_=jXI=$P@HGa9$PIVblS*Q9%=FpTppeYE)V`=KN zw|5e+S^&gJRofbbNUk7YP@RUKgmY?=ow<>t16U{5!qW}RK}$;uCcbgYZsC($g~xek zt^ZmqmknqxPsuWE49$Ny!v1Ee2E7jBs0D)S*<;>4)nV&VD3K+oj|$o{`Gte5jgxQB z%{F;n(vnAClEPv55?wGSZAeOHhTMccA(MtM!d1la%Lt1 z8;3Rqpwt^23P+ABb|8^IEKi<`KLCxPlBsdme@jmrf32GO(bO`vYpPA%{VK~}D=@11 zG(eSOZCDUit@!$l^okR6{OgKH6}{^;q3hnt)yOK!XF`mMkf&gnibzb=EySGOf-n*z z{H9Ptn-=@;hV-9Gbv3hiB3L+T(jEW!YBp;Z zs*yK3Ctal16>nt0B~ihROp2&dWEHrnZV)!PPh{>5{rh#o`CHc2 zxAvhi+4EiE|6_(-(c47ujMuK+wAPEgrK(C_H*6D!(Jx3|jZ*5+T2CatRsgptR5Jor znEBEIjDu~?N^30Jc)yI~uQ{xk3_`Tj&|_j_y4oL3L2*d{^QwX_SK;Ja0I}u8@d@9& zF77Rt$X%Y$bd_h9rSox1bB9@X{y-~^DlXW18Fke8tmGzZ_9Ov6(p8+O<#| zaHNH>RIu{O>5RWCj(dH_fBb~sif*)9zFe`%@SG@NYpd3D7is#j7@QY(a5x0B)ZV>g ztov^kVhdQ$Tio;E;-RScREMxDPrTPp?~|-7?sI2f*<$jQ498;aL)G{$0i{+>-mQRY zRYbN*lWHyr66w7d)hlSMzji?{vqvvuRIhN9HpF4Zs?HHblyVg@2}xSE81H~|7b@uK zDjZQzIFaX>z1|Z+Tdj64^B%%2ox9sjaQM>u4I{QbZMO^V>P0l~n+{Sn`Ek-n0wC5B z?}yxlxs)~mbrx5dHg`jsrJN{G;rRme*3`;*-*NbE(ia6|D>^?~z+BjvVVD@cxDoDn zY7p8;%vDr}ue98DS5pkoUpbJ|Hf8bN7^^ndT0g0E`)1GgihrVYJUOo}&Z};HPJDWI z-IW6^uZJ0F3S5wid`_Y3Jt}a;M(Z4W9S2T)3ojHIRmZRzrgiJR$~W|AD_4@HRq$&L zp;o??zVv*CswqRY{#j&X?wCnYlFwFZX3AK~+K(ws$BkXK75*OKt4F-M-$$q1z+tT5 zCQQwXM)V>SjL>~UMJu*9G&ldp`SXKS3WnNap2gFQ!;f~v1B61AR_7?g0fAnPg(|dX zU8I3Vy?f=6@{QXB zTbm~+`gN>qKUBV-u#Q8XRORnOG-qGy{(g?Z$XQ(|&K;prd$9bvW|%q4w@2Gu)Wem* zY4>%xqckR$bQRGnyRY9E24|u;M}&&wZj-U^(~cGx4tu_t+>ZCDILMqD!2fH2ktm@8aOnx@cN{70oQ0eh+<`p>Z|Y*C@DsTU!zZ8>Jl1i2x;qlTQ-?2Wm=17zt1Z|h(ldhL>KUr z#>KyUPgHmlQ3d@f2Os-h7gwxb+ye#nX!{DFOaUAOYOOP3>A%qKuY%bJdp2s(`ZzQs zo`FHlgmXAN+=*@0j8rEZyodvTwJ5ZgYa|QQ~6x`10dw$`o>v}+d$-In$$eTcJDKYL$qLYTpp(CPzGUJE*(ijBb|# z{(VhA=%zW|jTDa6P953v zzcYNQVT7eQ2?eMaAiV`(9dOZn7ofLBlbO@^K=r9wS$kWwa^H?Gi|AEL+60s^%m+FZ zzC^U{WISWn?ThcXzuNiR=3Kq-w*W)KNbhW0{F7_;yHWkKXXu;n?SC6o2LGw_Y6&#c zt7~I6oq4t`4j}gnXuAN-LU3o|O%w8w*Y^cbfC@WPz=_|qVXh--^ugNyX)iQq<@Jzx z@-XTs7t6bvq|tnXFi*Up*p`uq&WpS~JDTC|`L5gZ@)qv?BU7(!A}$8YSNQ%mK~r*G zfUYQ`&D=<+@}eRnG>z7R&fHu}fxqBdra*l;p3cDq_&pypTk%*6Vk=1M3+NI#J`Be+ zjmOaV$kwc#PsW4VBaehGNsm54+*7i^baI?~hE#8wd8i}*p(Nqvyo}lCi^Z8H3%31= zJT}pK`tY57+|tW}o+kWGhDUl~N9taiyNBG@PbZrD-yj|F+fx{G@kfh>^|mfY?xs0) zm-5aJHMkrJtZuQeP#5^Plkc~lmH4(VSl3l`3SU2d11Orj)Jc zT7xw-G;v-jDmjJ;&~psv7S8tcydl#a`MZ5rekW-hcACGN5Bf=N>BYo)Wr=d64>hqw zD$MOo&_Epc9c)BVWV5y|rj2dGH`UU)N@lXGxMd^;9FPZqxTL`@v&C(r??WU^+hhAU z-%?}^8V6n$U>%)q~$Zt)7J2(DtVb1?Fk{}QLb@>f*(M-2h z0L^ib6;GM}xmsI8D|UKsvHvgW!zJ^abb2CPXJjN~ah*BU_GCt9vEiwa2e+N-Q4fOE z5t=J`RaCTTVV8KgVPm8V%# z5~o2=gvs`9DMG$|MIxerY!OsJpjZh-!2X42ZZaGNO1}2SHxcC?^HMIxY8N#01Uu)A z10nX}>eqwr{|}gbb}96S^7NJ8t+{Xp?b{8D2Mv8gi*B_HhcCSAIR~$$^*A7&QnLC| zRQQ7R15Twu_pLIcjFpM6&E~NxZ6U4)`zZ6Uz7dxOs&a^p!)U0s~^9sk%ex{u5Y>C z<%+L(s|+yJhlM?s1%Ql0z>(o1q=+3+Zr`$t`Bt-zDD=~u_^!sY#8{Kdio9N8`bNYe z&kR!E}18l@Y>OUNbsdCVy^(>821&xTutYYbTV zmu!5NI8R${O!v1gRh*BJJJEeJ{i}AymU4!x86ya_5bz`Kl2Ibgz24N363F(-!efse z$`^wG+1*1iMDKTotvCPT&2LzYRO%&ubc#D$Y8ht zb=C2@yYAG#`p*O(WW`n$8Fl~KaJt&v&zIdzFjR?c@J$qKz-DELxe){6iFR@hD`ylR z)gMnC5*FLrMHuQ#k1`BUbnPoOIhF4Rdymg@O#t(i%_ zxuhqKY317%8tA@GYv+iH_*GfLcr~ht&}3#vgyxkC>Y^R1Wwd|I@n+=fwgwO4 zebbwqZi1EmFPq93`(2wD<8n%&x`!0hL2$!ja}B)f<(OTHnxddc^sk$@ptbC#cz8dV z>5!&X?#64PTpt)9C_nF8u!fcJr12M}&BB>@hNQ~8r7joG>5wvFWfJRi*4jxUcn9CC z(?;?$ClBlQtz39U_dIODYa9u%8vrOSrHydD(9iUSk}wLcD?B=I-BL;Bkw&6hPWDv& z;JNXhgnjjS_$g*X=bVRRfxyGPVo9p8z~m`<4@6QxO$QDNl2r^@o>bFNsTaxS0+D%p zJX>TaO`srR{vi8kzDP1Hq7=3*#-AQDTXCY@O3|L%7(WS?I)Q7 zoYi`r@ozcj5?i_fa$$!=^9J^!pYCNP>*ME!i7Fx{Mw44O_`of9R=;9eI9=8Wbn23k zeFH#dhH)QjfKB{&{2z+0l~HYPKT1xdM#@|hw=tGP$*&eK*>K?Q(vzEx&KtE7KiQj_ z@nW0EAQ2_HA?S6o3aZ1G(kc2t74#{`KLZtob9e=6eQ?^_x*~ zu4lg>cw^SDgNrCm^!B%X4Kcj4RZzs$z?{8>A2C2#hlJQHpsw9zaaYQbK3e#V^yh+Y zaQk<>-lE0`m)~MqDG&3~+gdEu)6XN8} z1v5d@IT-qUhg!@iMOfIihw~WCpcJ(A-3{7*Xii=2yPt8h{YO!RQaAw7GPY3(^#o?i zoa^i{fXpkfU}Q|;9c*Tg@YUiW5#JB_f)K37ITX(hP+NLa>eV)^a3h01k3GzOTrGb) znCyPLH~RYr^JTb>5TCnG-3P8KlRO6!K>A0F8b18bhb0?156y&^n>|#FAzp`B%mGsJ z;Jw-7n2YN_G$XVPB?;B>wn{Qdixi0lRj{dHO7b%_<3TR{setaCq{$rEqMIjrA`rb7 z(#N{!=*@X{_Y)|S37Wl#3(J3pfaI^rbOSejEB5jU$lp6p^0Rnj%~_WGCuoo@F@tg5 zGE$)u)>uJ33~wk^l3S!?FV{uWy-Bl|-mcyNQWH4jZ$~06P$*PEun>Upqk1HlIBzv! zfig~p-&9JnwP&ReUXfEU0BprDXOSQ%2dB@I zMPBPjjhe%puzfgCtR%OyNDcsHqYT)H?xe)qk2SeZ<67S;ROY@rD+e4-=~78qX2R zIvJP`&r-aYTO}|dj}Pf;bL|pLii@qAE93Tw3?8_JOw>e4h5^sb+SQGQBmWxi$};3U z0n7xXW|-m(0}yI$3xgEEq^cJH)xgnOz^rRw$k~Orms1rj?zyheLpGHy0}GX89VPjp zQg3?8j`laS?Jk-hP zpG_zZAuLwyD_@C^7T`lw_*{T2Rukel7~M!?1R(1RRwqSK(GHUPEC&J%jP}8v5E0)Vg!zSP~W#iKt|;xLED6xzn8$k64!XjS2Oj;5jzM6h>@ zvD1DaiIlF>Ve=^pl*mIJ6!@uD%@#OgxxkaVKh#wB=zAIclZ-wgXjScR#W%H%+t);W zU{n=lE~gy+#i#5MP{8+uI0}Bs0J@rz$iw<+uI-jg&=tu4+ajsAw(yC8Za3p~VVtD^ zpDL#`!Lhs!(6j@z;1KQ;Dbak+JUMPTg0oOJIVjx>QLw8VbOVkQu&Lds##TAx(?N=m zgITC$^fHng0_@`+2zg>d;TUEq3dV-^TH5;9M`f=5S=b~Uj#1S zw#I-DVZ_vCHdPLgm&i#N`_&VWhM^}mC#F>8SnHIzJSEudpDKAG+1@I$-A^<)=7Le* zQt2yAI^qowIkZ`rgTtH>RFetY(_d@>r_>Y*3S=qCcAULOQJfPS+9J?IXHnc#GBn>? z^RXNYI)k6INkaf#%pnY^iF?OsPdexyIY`dMGusZFF{nTERn7Qhuo6AM=&}>{Zp*yD zp_G)j#qcuJrfH_){`Wjeb3!2RUB-rHMKPL{_k*!exMgdLLg{frC z9ZKkd0v_UMK2}8Asg4c_H0|LG2YyRHUFh-}a8F0Ztj_8)+n$~I;C%@nc}8~*t=Fs{!zTmTEV0z`*Poqo!iozyF9K4iM6a9`iy9zD+sjc2v|1YH#PtwJJp zsIA9Xjxb~E1brn+Y~&M7RUJb@s(TvbffS$u@PO!ApXj_^$9`1aRHk;oSTEVtVApvP zhI97h7_^M)=Nm0>c`H*;dPer0q=!_F21ohu>X`!D)I1{~SaU5K*yaTymkaH@(IGYI z^K-IYhzS|NS13(r(Ij6!NW@ciVE@0BxQz;kBE(T-xN!ao3Y#pJ<7mr0AM%+Ks(!sY z{i6+s&d<4I+D*TF=~6-8JhXh48J&-)8<_v97#BFyJpyv6q0rulxCth%fw6@~C#R!r z&8;@&CrJ4mi!9;NW|V=AtYB{NHZ~cfiJj-uwi+|Os3Xk1nhb{*=RB~{{oZBTr=MG&`uyU_%G?E8&mCbk z92og`(?~GDltG&nq_^)Y@q#pZnLSyE>(Y|l=IGJls(tGkNGfBP zlN+)Z_acl?AtSAQPr}Lph5P8a2k2*4w|?bZ(lZ*m@Tqlz)){qqH>QitCfVI^CLRe556iL{sxqu?M>W{+q*5}sE|IyA?S6Gs?~Q3E9iyWN4I=@V({h3=FnDyJ3|wW zJI&@_pGaj~5On@Nv9mz7eyf1wWJ=5hh<7w@+)Y;={QLCq-8T8>nuT(RS^@cRxU(te z@WBX`cZYJYgIF)nxF*!-65yB1NDt*23GCqZ?c^g$f-WCuOarfLxBUBO^JNLSSna<5 z52ql$U|xs80Zh+zQTDP=b~^qaUd1poZ20l|H9Mk*!;_Yoq=y|OC*^#+zP^jH zv^9&gNba7m#5wq$wp?jXl=miYh_;Nyk%T~D9*`Jk5-yD4wFR>IAnC{5r?QVv(egK` zqdPu-G%y`8_``?@`_yInI*RhNO4znd{@Q+kheFFSXpjotau z$z4Orl~-W3$OiPy!468K^lG$V^%+-kZ`?%-J|2+|)Sj?+-hQnXE3ta236V;?H*hij z6%>IKEa`Z+Z=HRh4)545g*WvT@8Ga%%RlbteFL{2#T{yX-a}ZfiK|qbUi(a2zz2IW z)_?%;9)c=YtHN#Vg91(JoK4GD<3b>i`lSV9wmQgwZ8E`2LElidX2HIXr((W)hL@i# ztS&fq$oD^nsU73r%b)D8zFs{;nrXOvCw*e@_|w}%ga)S@gufF9U;cP4|}i-n&Z zvp@InX}tm>#;m`8K|>;V=%&6(i@LQW`&#w=CLN_!ZNU@*UHw1Gp-=KpC5GgH(>rq&_qMW0TY<4qhBAgtx_xzG=v8OK-wBLA zvE7_th&nGFrgp$I^)*Vc9E|xH=asjpggfHmn~)X%d45!gqd8Tdt$G3inf71AC- ztAk;=0pjf54K(8LyN&1hr6{u!pepMnRn;oFNV-?kB7pZH^5|j;+j}8zWe-U9f1EvP z7i|~bp87Df2bBSVEM8DZ4Kv4lmMg(C5nn}lO^*n&hOO+fJmx&sh$MJNMVO1I$wwUC zh^(CI-BTZa)ZJYC&-l5^+kY;<oOr^nrd0>Lhkp6M9*1TFUwHE zt(9G=M}!5bEDBuXyC>-LEl@AA8gD=q!m&8sr@e|3|g z{Wf&3!^+MA!J?`)hHP;S(h({?5R(m%=39*VVbL}OVMBQ{(BXrM zPGHj`96mG;0Z-0`vd>&^el5IVzzfT=srPco>!{8+7<9VH1E5o*T$U>yB=SZ zwZU4=`eS>;eZN^%^OYm(r1n(%K%=zK{pC$bL#&WOr5 z7MJx-%sfY7oLbjk10>PyG?-ynZ_;yYx7_^$ z3R%=(r6?zu@nN(ms?3#I@5NNAD|lN6?Wh z5vz;;A&tN1m|O^7l6~DVo-&8%*rp&HdQch!3k@rHB1)K25=0gU4Rr)rDl9;vonX$K zY%LsJ4&;`-4Tu*3+V7SfIkew`5FT^SeFh=xEK2btp3^d+Y)D(<6$OOR{r z8Ro6F{$99NBb4XIELOyP^DOq2RXBRaB1_T}6O0h`nLSV34B!c1mXbCzXtc;5Q|j#J zMTMDTW-Eb79lM&#i%;8(Id%-?*L-1$X!$lNlF2CwQf?%&2GFI>uTe6a4W7Hbtn=Ap zfLPtU;e3C6XUuEnh4{dQy`4IXVJk=7K{;+-9e8c4+9{5PVJzEwv=6C?I{l-X6b?Ej z7;eHFD*_!|*DZIxxx`SbBRMomG#a$8WWfv=yRGgutDSe<;lac5m=gh(llzZ`M6YB1 z_aoW5&gDj-gM_vB{yO7p!~5SnHD7XMrS_eqsQN{{sUACMR+%T>6XxTa;KRKC4K5W` z1T0BtUNE2F(TdKU8jjC#s;-tz@kKN_LjNw7A}y*zvq2m`f>=;Z=2LperJrl`?hil< zEx5SGeh$@78H%$KF=MV<(YkelbFVYoFTcV4Q>7v#sincNH5KOfm@Peu>rJ(T02t7Y zR97CeKx0>n(CCSer!>pHgK;9jco%;6p&A?^Ox`~}B`EQl8D->m_nBGDS$T8$a<>Co z{TAaNN`f4F%a=Cs-&A~E{jcJ-2?2frE`W@l%&%qS4O&p04 zn;SWJE6B6K<$U-=qgJaO0dbd$mi`>x6upbbXi}Fhz>=1B{2uL73CC<>sgVbVZ{$W& zPErG~{AHa8;-g~S*cJXi@=<6N3yGf%RN>DJmzU*v)T}B*v9Kp~v|y!G!}GAdXKStvOK_l`9> zYVR5ya`;M)0wXtk2r7m9-NWvaqxcDi&q@PWh8(mrKtYVU>~EYVEb;w;1dvkz+^T*Q zFZ&H*8Acoo1Itg(u6hKIW=xl)wFxpNPwfCf->ABzSk~*Z24TOvqy!`FiP?r@3~aM zmYON&leyjBc5gY6;6?WC+&&E{cAiMIo_hf+>YLVF%URFPGhY&W1B5M>;^&T`WG_YD z4a?o%T=xCi<25a!y9y+Bb-ji~ce(9IzU1vI1IP{gR-X#{V#Ky)=<|4FY!#C1uz=>6$nI~`plBI*gf?0CM za*tC6Z1nt?A?VmGfp9|5*B9s0#*AcTHEP6FH;F8P01KvQr#(mB&o15Ro)}BL9&TwQ zQz(Ka|K(iPaRF_nr5+Dfuxx(F7d%Yy(^w-D>nQ{zm%5%0`LJZmca-d_?zvn7OKm$w zZ*+Z~XY>UqS@@!nvAq;h>u8k(+O2fm+rDdGhcU6{7iY``0ktr*oI$|h-;ZyKXhO7T zXVi<=FZXpbtWH6&6eVUl2U@*GN*9pV!JY~rk0X3vw^Q!#&;~}~UBnV8fcLgLS{hTc z08#jkH~5yaxxBf!l%v!-aK5r)JO#fppK4S^=O5-YLLNbTzbcesNa>x_qIoFu(xq z4{5$SM+RCTqKQ7Qz!RXJ-6FUH{9IZZguxh5i9w8;mh4(xg7dp|Zc|4<4YFv}WvnlN zw@&dEDKI|+K;zI$QK_L29hlvO&RoEBv!X@a)j=F^wYeV9b}{m-&&^8o6!iazba>+8 z@dUs^Eg|OtmyTUzsBwBSU~Z}CSUL){VU1sa$|KL0{Rs}haV;D-2O{vsJS;0l6E=u%Qyx0zg zFvhDJP{f@UzpNJ>x(HsaC&rcH?8oPSY||=}`qY)aL-moDXw7$(caR6FZ50Qxh@ z)3p;Z1)@DUeY>r0OVptBeIhnplFFSRg170nwD}F)KV|fQb;fi>s$pCRYwdE z;x4m%{q>772a1efi62>dk}G}LDY6?ZdM6|E2Eby4WHUgD8^Gq~V%za^re2^jOXjg@a!_YKenTibA7ujoK=vC9J>x7>j%UvJE^8V z)szLg@F72-h;29bK$W2buOw%nc#A&aC%+-|+o_;#RNq^VJOK3HLPN661eQgy9TJ8Y z_+XAmuWmR*C;Yo(xIXH? z)&n_VB|yLUD0Ug3Nxgk2M`SjQ^O`lp)?m^A#4lli)Sq)QAVn;HDyex;)EFUlL8TEg zDeGmiVM;_u*<;Bx>O4@CWQhS=H3-K5LuHX80q;-VBla@N_^6Y8W)8##mL&i;dsYw> zPqtn;zAf;QK@Q-o6j>=HR(1dh0dN>=5)oM;5{z@foyH}ldvZol7j}^mg3`Po4j+mI z@EY=R;s|ujoqAJSYDi|<^E7m1Vr(%A!ofNBdz;m@cqPC~!FMsG7_fFOIb2sh%!?E4 zZWxvsUuoXlXOh>q)ZdK41u40#CZ#g71>>cup+GootX<&9UT_uCHuvBQM<3RJr;`fIF+$rI_w zqOm)r(s;qrioT;2{_QoZyoZNP^PWXo2keaKp0iE+{obqkGSO`HEV{CY(s_3orTfvE zo_f=d=2(6dB*(AK0>5A@QXW22Reb3$LHE`3jeHU^04^MXU_OpR##?uTw&cqJFyx$q zgU7|9hR4nwanxzlY8+>)j8DVxlsfo~k)k-D$c2Zq6^aus16x7>GlzYVpLFo*#>YBi zPwv`448mQ+2I?DK-vaAcsuuNXu^|Cmf z_^t$O?<%rUNw^3U(7hVt8D?gVZkR6Gz%OxA;TE!`D_?|ab&D)w7Q-EU>hz*m7G6&i z`>e%t*?0~|`tU=PW+{#XNZrOqxE*7`(<1x;xp#k3gi$bUqUKKj3dU>)(;$I1=yT#W zD0(CknSiz(BC`~nHJoa~$NA=;z#~9$8%P@v@yPp#Z6Y=T`YNF8h|2og5xUg?$P*#I zq_gp6T(9Fb?8;MlM{Nf#MV5*;8W7^_@VlF%XV!~YaJbAD#5k2Frq<#-7qp*W|05cN zYW)dIy}lz;nlLKJ9yMp!>;rkL&l5mxYbh(YBzrtb?_Kc* zq$E}$wo^fdWKc8!(0-0j&e5QCfb$jc^J~et00eLG#480XhDE+`3P&GelwMnVq5QbK zJawj=b#8@D^vl&z=*ZN%RAUU1k^v*vh|_TGV^Q&EoQVR+K1c=IfSF%ls9MsV4#I6B zeSO?Ok7B-E5h5U;7ji2W9;p3_jI?F+KBL~KJdI5uZ+1OxUgK7`epXXrLElT z3(E|6xCJ)|YetH*?MPduKQZ2`v&RYk7<5@5|EU*X@k(EH02*vEO_=pHPu?llIdB`@ z+mU7OC2=2*(c*oGy-n@ynfs__#ZAYwn?GJNpHgqeU#eDs{~ZCPn!t*cF;%+WxbZX8 zX&5ZUP46^h)`t~7D#y#aB8(+o@;gQ`^yC!$y$F%1-5agZ8rB8CR7m`B8?a;Ibq1ip zllUWXqUB$Bjmy?og^B#lu9ysL3~XKha2sff2v)uO7<;uUmgD3w#&k{fs_F5<+a@LF zm+?u7G}DHdX;-pAK`@zB;rQlcngDD_$bejaZBCMPuYRg zZpmo}J#wATa74qQi$e?V74hVKdfor~V-jA~@KFzbHZR(Lc6ZLHmQ}}=gtrrfCyI_l zUvc%UjNkSAUh|LBnA~UebdEu`1x4EEd_DblDr5L<5N!P*1hjshhr)9Kz>Suf2BGIFy*Hu1EgE` zi1sKxT`s2aS|cwO`^238^?*FH{fw{RY4SNJWPp%5y#T`(S#a*@pnpeNqvy>``@bK4 zu9yGk-A4VGAAz>Zq82PW@S^GwbN@)MmtkrCe#YjAJ)bU>2`?XdM-htVcI=OWi=sLT zBb2kB8AVb2t!pq9z&MU06w^9@R!}s3wzQ?}nuuq9^2zUy%PAyPV-c_M+~8fC4nWsk zme}%%P9zhlbh2X`M=qAo=m^5&o6ky0w|pEKwS41`5_NE(>$#hJZCt)-d#UAkC1Rr! z>J14SK@`t4MR=&HXK!T0BfRGabTPZo3m1HIf z$$FDDYWw&FKN^SQs5!vmmv}CPfstFRmRZBW_6s=Xl;w{&ZVB!lOM7c9;x+FRwGt|H zpjC5+-|Eulx5@Xd*;4uF$=V$uu64l$)0jZLJPX9bdjj8yX^tIgQPTcY8xSwwKK(RB zD@0(JcC}%0Rr;wjQ}66s>Ux)0T$so%)%Ps@Hee@vBjOT^mh)4H(ls(Jq0jnvAaX5) zbO>@B6oR168Dtf4E}aGkmIrLUHV^_YZU{3GQb>^x=;vK*7Uk8>F0- z2U|a=jtsG{?syYs|1f53kP^56+fBT+M#abjBwgkHbg4h9MCttVM3gn6lpsT1P>AKx zHsI#6cio@9?E3Z~Z_J|nhf4|U|6@F1kFV_QMn>QlShyxp=fl3esAf_~e1Y-uqj1c^ z<&Z3H)jR7trcf8euRq{mc!1i#?_*t5Ab9W>Q06WhQQ+=3k6@P1hh$d>a^0Snc04tx8tBFco>M@H|Fpuu z!>8Obw`$*P5(ssx2TTz;+<(EXANA9;ZBXV-k)>(?W-J&+WO)1U<%#cV|livrQnRqA2?$|aRd9osvyP{ z#>_UxVw)LqwW+~W$2!g%6-4(KR;=IWn9;Q|S#EC)w!7_5diIaUE^ez`i*)i9l0w7K z2RR3_sUF=b@m;$M-ry}(KCOy$w}@v;Pe1# zobvgx08XbZz!Fd)9UgKU^7yI%eJfAl=Hs)g*Cp(E^BC&dZ6YY8@VN}W5{i8D+PL+P zg?{*x-TAvtnVd+7`&5->xvgk@Ze~sbL)bds^ao0+>9FTZ=l zAy$3E=u^!G_oVL=8du)NLN^k-U9Xq4sr!*yJ|26ake7M+|Nbb|jFaICQDHADjJuFq z5zjix4zwaxo=7dQpb{Mk7O&CLR2mM#V}T{Usw9{rAhHKInx&p?MgfhiwFn32^M4eb zc|276|Hsel`(lhRmc|;ACR>O)V;xHwq9oPWLa8QHl5}P)*$t7kjD1P2O19K@1_`N4 zsVLQuN~Myt-0Hf&^ZRoik2#OWIggq1S>B)5`}wq1Ca|RlG9E-qrP;x+X9#&lF2I3$MM=|fO@|xxfRL3cg^5v>c^A1m&T?pqQfW`R*Kfn z+dM&uOg787h8&+e33LLSc+~zXIqULHX>fhPw(R|Wdp=a>XM4om-g{ymr8&8nHienV ziMK?15n}BfCab8&=zSqqori{;X%-K6hegz8+>B~as1*}^zUQeRljtwMh0c{F1i1tX#Dv|vi7Z&I=3ShcC?(sh1rYd zZkB~#>{=&2)SeLZ$4*J^l(2N^cWQjKn6FaP*Li!|e|r$VnSC?oXj*b^t-rp35QgkI zs@5+Ks1<@JB?v%oj5kri1{3$ri&-@?SL2vfxFrfw>HBYI9pV?lIoKssKh$q|{~N67 z3!x*0_f=jh$Q`SY#G~=ZaX$`R6RL8eh(RS(`E_7G#F8ICV;OYJ!9_5YVEYU?qJF7` z3t-iH0h%vgl#7wmj3e)pE{B<-{ZC{$bO#z0v6F=g*ed<6dc`5NB+*065l<%ZLL3^wTE3z@}&=?5baZ4OOQ%m zoY0>2`MAoG5av;7oAGxx#*kA*^@E(Rx*|f(Z1QT>{PLHyoR@-My;m9Evmh``Fw5Im8XIroIkE z-QkOecf!S~$%v7O`Mf<4!Rj}9g$-krq)7s--_Q}`_$xWi^C7CKLL}w5W9l!;O(jU; zxDt37(L39cG-Pty2$V(CZL#O>G^b?D>{qi&<5 z55QjQ5sdDJH)oc=E0!A1I)Hz@IhB2DnY{1Xk>{g6>HQyO)!tf&A_E_Sp4lf;SJ@8k zXj32ML6{4{VNLeu9daJAY0VW#Y=yGpmn3N84z&T}xN(yEVhAfuA;vgE2ql3VN_}y2 z6-SC-ay(Efi%HlO-%^}$p)O6Fp2B2Hk#0UxSS;@Cv)|DqjtZ4M3FBr>aE<@SC;=#9 zi59?-p^c#F*8#cL?|A&=VnIi1{9YfP$cmqADJ`?;1<+l)i?)_NF}=vObOv+IZ&C7l zEk8dv`Iz7*^Orik{b$F#=!L_yf-;*-{luodS5O|K-d)-C-d0ondjDo$ z(nS~u_*(#+DS0W>UtK*P{Hj;JW`U;v2WGS)l@~{=G>O8nvBn_C%MwJRWsxa7S2VN& z`~e#65m-wZ84tnv)H3NEw3`&SlNo%f|vF1HI;1AWI% zP9Jw44F$1qiF_z{M)#;p_o&Wx5Y#*EiXM_drG0Fs97{i296os1zVd>MX0{-POL>^+ zvhUxrYXlJSr&rBSFD!?vXNgRwh?$dt7%5mYsZs+AD1*R}D9IDc-K0MtlP-BIM`%un z9-kI%Jt@ca;vD13&smjcKSIDhhO{b~Hk)MZ+_zdEqEDqbd9Ca+Gt5USs@M7MEi;{p zm+iO%RM5B(;zF{yc7?;3)BKEp_CDEvZ=JtKD9v2cVP45=m)M$>rlpW;P8ZT)@&YH1 zIRf6+WR*Y}-lAdKn3}VvQk|-V)pv-_l?R3PCaDei2@Vf}C zHYGZ)%G|SO9Qs!-Zh}@ji-^(nVbwC64OQ>JAWD*HmE(bufqHXDo2LE7yyWY1V#~<8 zmXnAk0CX&LJNHOgAI-)3%QP(An17r#d$|e+`PoMP9HP*>XXI6pTandX$J3s#6Fu+v zz4hHJwg^DIHF#x?tg`sT-Vtc9WxhW?_pSfR0fWspdi2Lmy730=<^29X`N!)FzCtzR zA&{gQsCyLLKV*Q5!^T&Hj-=iRw-K=?hS`21Y&3W@`VMw(=fM)k^(B4kiwM{rq`- z%vBHPx$5+mJDv2xSXZTtfVTu2UQa;(rXkeZafRO-DK#Yg6Eb--_voL_isQxkid)MLrN829-?Z@K|tRv6vAj zqAntSWXS9f@2gg;#@ldpoZ+Bp(Pe8fvk}6M%Iq^ie3%T=4k@dN^%qqPK;-K5k;Xq4 z6Kz2FZ>fhDWBg=WJi;)OfNEGI7C;!_`!Mz(((Hv3+{*h?WH~L@^Gdd?b)ukUm;tVy zz4(e{A4zkZD_+Z3v0Lf?YdF=F0OCYF-M5d%l0eGvXG)SwV*CM=-<+^ z=Hi=o{-2^w7Bo~AM8vzkqMCT=1~?x3s9rS6T~YvF>GHr#z4(*vn=*@Qlib5c7e?>D z)(r*@w5X!yR6` zebyqvWVkTk49qf~044_k5*cdbhoE2$1s+6!_w+bi9R>G}3qo1J)rCoU5=UkBW1K$w z()9jwIx^BvZre|}hGic?!bc9dXjzevE$-*E2D)7LnzfD;Q|}Y^e424tU_aHOHY^n^-B?0ykZV{M{q@Blac zHf*}#%lATE|Fr!OSYL)TNfaAN#a}b~>MtTqWUk9!`e5cLRq^-tjD2Z_mDrV%eOM#J zK3frE9{QEW)nSW6G7+oOMbx>oN9J+(6fnFUSlKIsX@Gb@q&6XQaRjy0o?Ff#*_;tv zyxg+4e^YJ0sxv)){#;i98`%ai&0PeyoqYt`m6UQ3?6-)x@;Pjs)1NRfW9yBgJFnDV z`+6xk`M<@~0^>IQw$q$V75($)OHTwY+-^4efc-MKWW2DM5IlBxHAvR@35~pnAg90j z+_CV5RY0;&qbg)7A7tmd097|{l961kCGXqSw6C|^4_16J{@G^-xNLqO4oU~J+YpvhGx%*@)6QVJ7r9b+oS~7iHJ>`MgPDMi)SSrBpvz%_T%sM>qonx7~hE6(4ztp;U?sA_! zvfos^dReYDD%YGlOwPvaNe?Y%|Mey2=;`V@Wh6pXA3^^kquF3}Udxyjc|X0@gXfx; zA7@z4_o;DUNU>bVtB?P#GpLK6$480zA zOvbwQzcRLBcOlB;veD%*^Jl{L?^Xii|&Ik3pFic|j&Xecp5?lqzr!*<=kL z+^S{2PRkOh*?bR$VZSgec#g{g`qoJE_CF*1qW7)Q6dTVwo_~>tZ3I+)B5dTLnzEV| zrkZZo6Qe~DBb|F6l4;Mwo=XvY(|Vchb}`xLMA_;%lpPPbx&pxl#M&vcYgn$q*w#t| z%i~j`)l6i_YccIloHHP6QIny69y)$w#8JwLSuuBFLnXe4@gav}ts}Un?-6|?oHCZ7 z@Q9Uw%4CZ7*ZHT4cV%k^Tdj6`+d((>Tk^%Mne>|_vC02g?V5hO%L}Xe1Q-di$v7+0 zniuu=zCGXB({GclJz$r94xutFySEk5`Zl4}4SqsMc@9vv5iu8|=VahNQ z7LR~0WKJTOromcm&u09nKC7(&eJKL02Iv3r$UMRD+4h=s-TUH+H`uRchJV%{Cv%RE zVq^&Y#OqejH(OIr>W9Vt?>mwzKizKqRQscIcV*)4eNH*f{V`M5b>~)wG~48w?TEZN zOkn%9;C(g@u_D#zpOrFs=4K6%d{v5CK50 zT{=Jq5h+ld&Hb{VTJ*w*X~f1^0G!Hidim?nxsM;#7aMP_;3RNTWRJUHa#PXkXOYlW z%hRRrB4o;8z1ES*Rsl}cHT~6<$z;bChwNTj*4sc**Qd`;R$bfb?xTjyKpjx7(APvTcbL24pbnkRA#Sd ztycCzDDz$Zv+QG|hgSGtS}veG2|7kVY9O-mwA{@N2Qq~MIhTu5+8oeE*S({<;Z%&mkiiJU5ls(?a>o0Q4f!AE58=^w&lX6v$+>~&nz!I zzi~FNb@Pp1KWAQd7O#JKI_Z*2)_K!Xz5Yz*c$G!NRt?=l$DLp3gu0P6ZDN3#Sr~k< zxsrnMu(Zq9ckcpcd>x3z`h4jM&l5R;9>``LL%R>l!}4I@HG0EIhJiPeu3{86Bt;|> znI;u~^F=yDwxN2#hpymI?~59#6HO@9)?qFYtHtBP%lnko1@-TqKBn-~(3EQa!!5ai zx%f5P8bzsCENE;905G)|twC4l7diT7tf@{ZXfiBK&&+JBveAEEPs?ATH&xNoO-vdp z9*hk%FxrS>hKbD#QiFZewY&`mg_;Wc?AB;pe;g8V;mm?@Ol!}lo=fYKR-S(N>oi)U zMcqx!C;E!5+QDet&~bL3r*0y>kF>rIJ_z3eY#5MCZrgtEUf^{0cB{!-03wA|{nJUZ zWp)Tv;FENt>gQ+7XvS5fnz%Ho3BQOL0=Q?K=kbq8d0YGNn?a3h!>YWF z$GaMay5=0shkMV@dA7ON*IxBlt8uU-eck^C5a}D_KTRdzgyy|=o0IQKRQJS98HDXU z{-x`>gR@M4omm>H>GahVh2?3~4)w!HPni_?I_CZ^pUQ63TnM-*m`!u|9*NA4f-|2h z#jEe@HTvNeIL=#qU3YJvz%q#_3Fqe+Dh1C15n?rtPyn1`v3H7%YV-HAqGIv~OQSqa z>h={zIzoZuDl1|mHR9=*uv~(C?{uWJx_N-9%mdcqfnLS)_=fScvQzydC-$B`L%zDN zE8E^z(7U{JiPOKT!~AYqXIwVkdcbr$ZL_I7{$9>={gcHno2CqYzRXy!tqR5gqO^@k z9}efavcKrryhOj_MvzS>OW!)Hyf*7Hsveq2V=d8ZQ&&MU1bNG%k^ceTqw#31s9pdG zBSXikAh&;qT&&#tq$n1oKq?J!spex4xm|#T#~#cJc8-rdl94K9ujsevq!aaJB2##t zych$aeLN9k#C)Dnm=4Mw6KK`MKkHko+hB7rXkE4DUq;37IIPPkzl*hA@?Zb=zv=1o zLs&ceRBBwK6YoEB-_2ed;iyu*dUjiN(fnANsrBdvu*B)(f)wX4w{2?y4{a7uQ?^s( zi0ZRRjCBIS?ZEPE&P{2J8KVlNGzJnM{7z<;a8hmHvC1zU1-Iej4F5!*QdR7VE#coX zBA+Gr$hPW%5)sWAK*I`oZtW_Bk1HFSBuP=#+I;IEkup&a>=8tvN#9fQ>Es}u80Q&*$MM%PICifia7?c=Cx=LrqeW9faE`fA`vW=a`< zmyXv(UtN?M05~1hu~A(aD;&(Z8?<)mQC7Uai0P+5<{M$G(u|x>>3-4t)v|`+kTr1n zsta#9-t(Q5rz+m7loc3cbpBQ(F1?I!;_5^|x$w)Cm+}oYs~^p~ibz2)Xo<86dWUK6 zbZ|BhL~L^6ZaqYN{^7ZTP!iHdYyO?g3X@`!qytBsZ$ns1K_T`Zkn2cCYQ-UF=^ST6 z%q)_X&Bnc3hd*&hra^C)(({Kvz)WV(>($eGD8=d@;MaUUep@|Xl1A5s`icRzP}`i) zS?yaLgMD3Yr8Ayc8&VyU`!J1Tj@YCDWN07gxcD5nM}{zF{5!7o9z!b#%N!4XZ!y@-%|LcIoj(LNtG>gw9DASVoDLK`K7up z0u;->8mAHo?9|>s*?BCKWgYKqT*6Lui*%wNW8bE0w)HV?J3HV3$UWC!FHm^R1qVUi z<_(=ax7VpY>>9lP z*Gx7OaCw}jLF@|gEjW7G0mCVaMkZS1#93-+&6J$68%;XX-tcN=M$zyz!t~RG?z$Ga z=6yLZYB|?=%EuqMo%!UfQ{>tk`BP*!6!IVL+k;` zJrFiXM(t%I3)_%oAl+3l%2FT$Wpr&Fbi+t9aYe3P3!-3BB7KHn4g#70z+&%a0Dv}^ zwIrox_EQ%D+IIz|M~^1F&pMzmWwY}2$6KBtPXMgZ2<`)+$STFY2nZaSLa>f;AA=tj}wGisCm!t{WR)LFDX)N3MT z$ACm9sF^56nn?+tWChhyq&Wf2>8J7`EK&yt0*qeB!Ayz>0_jKu+BXAILTIv4jtcCc0BbEPF3I>SCB)Ds%s2%9WUvnH(X!K* zEkahUeZbY)QDE?rs~KQ!xFP?)F=|G|$B$Uy-mz-n@(kK%{v~WYXlT*~eAw3}(j3zHH;))s<>&T^uP}t0;Vhwxku?cw?#s z%*jlGGeo#0^<5HRdybMyJ$mP(&k_5Oc!hllus5DZvo}Kz`Op`j?YamW2-1!!nphB~ zSBSRMBOr}v9#zbC9V%9?ZnloP?NCsQ0kFbm1WKtl*u)a4j$gDrT8H*C8sN?Tp|V#H zx9W@{hpeB_uI|-G_2tsEbyWLw?7=#lmm=8<9m8D=<$vCN%uP=p3t4k)Zq4a}<2@A5 zpNFgU$F@!m<(e)USCYR(`aQ}eBSw;Y=s8Sg%P_Tm-);lzaAmUMh06GA|uJ$8)h}dqyR{xV>7ZpmMOo&7T z&72J*CUKFnra%RajG!&|Z(n57D!BB`RgM>QU;-w=QI~r3GJ^9JS0Bt`R}l`#;E4sM zRodXVeZp9zzPU%f9s$cX`UZpCbVt*nr2 z=q_Aa*JIl72hG?F!{gbJ2`H4|-{U3%cr%DPe;B_2Kj+~72eoSx+8zHJ*b+QQ{eF

i3Y#t&+!$Bo1}o$} zs(#PYQ9?yXBpVlPKr@55U)hX513P$f!h%d4Tjf%+ma4}_Ty>de2;AJOm}mq>(-~L- z=y4)erk?5oJEqLi<$(+VyjqhWxp0wmxuzTy9K1kr_T8ZVgXfzT-Q zCN2t?^&)1O0$CuV{}G^tGWy=X5k_l?JRN#u#`y)Q>mNaKAUf62!by#dcn>j}FSVMg zbO*8thao~Cn^+;Iy%^KWKwfD7M8DDcp;a6s`9LzG?72B{_)CXPY5I|ark~0}G!)irU~ zqBq}VAcZCi6XwT^^VTT^)6v09aw;*whPu2CwIB4p2oMiI*tctOi)^|sf?gq|pZ}ZL z9?YzUm0(p=la+{^b%8~9&0DLHOL=pvt3~hu> zLVrW8%3c_Cse7lb73QH?=Bg^0wDhG7TCKG&WOglU;nn;W&TDCQ8|*63vi@S|6v1qG zxJGHCxJEw|ID!nnl4Jfe*s3krwku}CiK0irngT{F;!(`TPSpULQy%;@3+uH;G-PJ} zS95P$Apvt(0kaaBFkQr)%_*Xx_#n}+Y5%g%{w_8y62J|x8CMaSx(MQCF8wf@TG;7! z3_)Xk;&Np=j_|4-;wQ<*`2gKjzO8G-dO8iE4E3chvNZuH53^fk~Ywmz8qR3&^)H6Q-?1h;)f+t3&WBIMtUK{szv@eSNMZKT(4z;IBD;|B zCqoa}AOc33&eYphIN8RHFzHeB5;lW1V|5Dkc6J;I3eYCow$Q5xP^N9Ibgjn>H2^?u z2DM_@QNFK9%lU-;(xsl_gPP!up*LFw38_*fLSF#{I2u&5=`W-gq6{z$Hg*<3Y2vxP zF7yl}0x2s4Zs-TxI|QrZnyYsj*}dWEXlU-LSyL)|EVA9GREd?WF!z+7+hi;DPv|&_ zQ=N>2J1cUm4?K_gvMP2%u{);mII zh)9eL^*?cpW=zA*Y|#-hH^V6YIwlcuK=K|(hD*6zS^;1aC`AS;VD>9@TP`MyjrGOQ zqGZ@w9G$3>&jB{-$Mkmak&;@tBzE}spU89@dAwEi z*cFq@1w(ny8vNBAPA3$FPigHaHQDWgvwj@1!9T5XWz$`gBUky*Qb$F^o9(E;AKv>X zj{U3br~D3H?cfUAE-^a^t7nuxvAP{)e}BJ(+{{LUQUWAEyUM7uGHjug5c4;3@8RA7 z&q5E#WN-DM5lmb7IlwfF(Egp8G)!yZVoY?1NNIn5^tJ@cNJ-#KXz2LP;vfp@n~~UT2x8#_|+v=-B*5b&oM*Ptdz+LX3nyyp91JaQ9_C# zZIPWlAfx?Jgs`}myzj@1qG<;daH|n*r2xW4%FJJHfZruTAltc|jr(-vFQtm;=8%eu z)%yBSvh}}8R}qsJ*@#}xqz%`U0I4(aZd=c{Kg@yym>FsU6#3}iEo+c#C+moDl@EqvK>i}*}aUmLo7V$I*hXb~eC z1pY{w+Y~SAflezkQp! z=Iiyh7lNPUoarbPsV6nei|(Q}Eq3NzNU=CETyybP>CK`IothP=HAh7L%aG}wu;fUa zAN?=YQcEp#sgf(WE@S+-ezn`BtnxJ_(;i} zT^QaGq7* zKJ+v;dTJ%mBI5qj>)nr9%~8L0=w^rdW-qfZodek#YqU4!YK9Gc zR&%=rM^NX6-WzWD9z97Dqm9S~L-W-tltG;%&cCyiz1#3s*_4W{1C z_+X;v94Rd@Nd9--K`LGX(2k;I&e|%qY&>ujoV`PEySL0l;@AQJ1+b;icD#ODFbZLB zevg90Lvl;8bk#J5W(oD_)m0Qxb|1VRe&6=!u>a27`{1=vbNiL~otF2&hhF*jm@CHv zY@bQ4&4tX_OqE2;`dQb7eTwYZbw2RG`G5%Tq-$Gs>y?CorG{086;bu!5{P)zGGv5l z!qXX|Fz0t5q7E9KK*Wp$w{fwf;r!{?`~EvY^eWR|avT-NCs}pRSE0;m6@97HPY8x1 zbz(k*hfj3=bsM+FlFr3E-nCPNZe8kJjUvLyStJ5TXOY*aU9a*{)rKYzWE+DIZG9fR zhiPgyud-M7jPLNrC`qRPYz{wU>+TJMyc(;RJ?BFAj4ezS=e@zYhV?kDDxI^=TDK!N z)aUU2sn83-9lOGJ9ba|Cc6oDmj=l~b1mr4o5JKTMLc)F(O92bZTH$Tr8K2~FQ`tU? z`w;|sjWq?XoNlxeK2-45pZ^rze{GrHr1&Ry- z`0;7=Js|$PSGyZNZBCa!uvF8{&}R!dBMI{^&2ESlh7Fjk{Yazl4t`bd`)1^hJIx?) zHFw>N-_o5e!P`}Kqz9))((ZyN(}6KSTL5Ct&{NnUfGDt3-(M_Dv%0!L^@zJrnDUUc zkJ`|6yXn$Vmynd;TVGm-PuIu3T6|!$Y^8qs-CeE9^3j`r>TC@@im}Y}+-ASqmPn4* zS^BBTGQY)?ET0(ySj)xQ9pCg)8hxT?ZK*VArzTSXVQ{}?+A&wM_VC^-1xw(i4qe+J zML^5vp{X$)B$1CoH_r%EJwBe}fM=hat8&%X&$ooH15m=E)do@sMxmc3s>!AnHd;Gx zGfE0~dd8+nH(wwA+la#P0N88@y&|34T-Uv7&l{u%|NXq1y6J24vMc2M!Zu!9BvwoF zs2YbGwfDk}3esQBxuaXL#rhM|H-R*YR*9hvT_Zp0LAyr!yR9fAKBB7NT!r2nciV&z z-6Pv%70=lX-lt7+VmBiTgn>x1rV*gZkmJnw0^0n99RE{-cN&ud8DIpWP-+1JQov3v zSzqSV&Wp>-Yc)ZTbfwn`3%EmP{Q&*X8(W&x4gy)dtM$X{Ze7;_4_XprX$(HH(60^= z`o&4VLMP`&BQxmu6%tlyr-$FF44&(Oa0@ljLzJArO8m0#seb4*z4J{`MdDUy^y2GV=8r+kWMis{#mon}vhZ#JNN|fs5eD z&T{4~d0ISd*L32!(48f)aM zy{qF&cw^G`U31&DAz+M}WkBZu=mLfay;nvn6729e7FQf@(_dJ}^vd_^_So4|K+Tr+ z2Ty;?5ekMI${)Ud6K(5vH_FUH>eQ?a=~bYekM%rM0i};qMxzgZYg+y6`6rXb8*hIe zZV7!|j{z4`mnJNfAS0Ud0NvZ%D43kUcKUlji0w{OkiS0%afM)-GdUU^WP@VgXJg;L z3tj=JV=PkGB61b;L()&w`8E~-5EQL*R6elptkuB(#&SOzXnKse^KLwSY|`&Gn)xw0 zs@!IOg?UlO9)lxCTJUy5h^FV0t{cT`r)TPgLu*D0Zhb0RzgB0;Lb_Hb*2ziqfPW0- z_X5JT1ifzbI95XGk_9A0mL0Z!dt&s6#$qDnofq( zX!pek={0=-k?sVAFcav>RsR6q8vrrQ&Imv)d=;y~mF{&_Em!nCbSM@RNvsQFsRnjc zIH8e=Jb#~Oii&cFeq-ilCx*(*^37vYHSJ0x^X6S^$ES9m5bxy zX6}(ejPs$x<;EuGn5{WqL&-RW9;SjMiR+_ER^z2g45odOqxVy+Ccb z={fZoTtwMh>fjRRu~v=ptgMz3H)CF$(8YiG{H^(0L>lnZXxZ!5<~&`mJ!5hl0~@R^ zV`Fg!kN?y=PPy$*&rh$FxNWqoY7S%U>}XcGEbv7Zw-_N$N>eNgxrFc*n?Z->Z!O_L z2X|ld?Y!~nQ0^&*v(|tHY@W8hV%y;P%5c-Wfnzb=o7R-p%!WK=E&_~EcIMV-v2x?s zFGI`x3@=U5$j)^?wtE&dH`pnq!~R5-5;lfRRzmqo`pAKNw*4H0wOMI%sx@OYoqFZS zK)u|kL)@!v^#|Vb+{UawF3K_WMQP0N-E5XY)Y)VJZxzX&W7zt z|9QpfF8D73rHjn)x_o3^{_*#QM!sYC%CY0(MFLC!6}RDX_&6&%1_o2w0Z2=@s4Kvo zk>Mf)ghx)48-j=wSv*9KSa}?uc7^n;jnqT;UYSrP&4G zMWFNZoX&n_fLyet^$Cx6C0Lma{1*(Ua8Q06OcWmz&c}vIun`h$6pR~`xKkf6sB~OC zq%zDW3`@x3D)K?K(;cb1a~^uc&U@Gn9t59YH%ZPucZ+jT4QlrbgY78qRMXw9sh^}t z9~zRTWh7S^kwzz03C;v}5~kCLRS;3oaLAvJ_H*IK2i%*J4Bkc;yzA;7`Os7na9eE?i=*uu+ zV;Bz^CZaIQU5ZOotmxK)1V<@Dm7(+fA+z}oztv60bDrOH#|;1x9ujOgAG5Oy?Jr36 zR)&4riBjrD9u{Oj|8Y8P zJam&mnD@?fP4^_TfuM6r>!;gDKLRp-6(52*jkjb4w`8Pz{P8jY_JR?xiVbx(Y<$7o zpeE4p~)8J8nNdNQQ8STy_nRYGfo{h}umVyQH|W$G9AX9sp8??$73dH;-W-p=)`hEuaN4t* z&tx3+O1H*I6Ivs5Xs`L5=74$|dQV9SkAn$SK&JqFmTlh762eLp2kCE7dd-JeK1@C* zk>m<%E>4lF-r;Rt*Vq(=o@V2OAnwvIR;e4cR#LmR4Ot7hj7wbt!ze#iMcY{=X}dd) zo;SE}O?}O#!p?cA>{$62;R2f|mr;g{sV@}N*Nddl(nBE|4^afjY8x;1IRwnnO&-H1 zD>AA06DI+}a|xvfBGmwlgnH65sdkE6R<{s;Rw%55eMju3qRgiA3HVbSB%CbGkdTU0 zsk~ImLt$fvl=OU&`dom&R)UV_pzPXpiudnQZ3g^NfJgi#+Uv~-KHx4W;qfsM0x(C= zk^@uRIQVS!yiz_^T?Wt>(e(mijbNpwqPu3(NX@emyu;HPkX0KqhPyO{b(Wk>=t3Ui z}{nFbZinKBD18$EiI;!313s5TnLH*|PCdAmx2j)3G#CzFT}6=X@R25GIAlQcj zjas4reCilJOHpcVM}XyGa)Jz=z4lH9A)b7 zbUh5n_Y32>-#nlM)(9FJfW6gFVe&9q`moy(!&~^ie`L_My+FJS+gzrb@N$X zX069Bd;y>E2vvRqsH0AclQg=PSz(z!^Uq!iDjW}UK9;`k7b)-#p;JLE_(1DI+s(=T zTA6|A^#{-R1BnUe47c58S(s+NEYehgkVXpJJ1M1GC@gs&zn&K#BG{47A*KU_)`L=c zIsShdy*x6~3XT0QrME{|^ezLREF(=VQa=lehD$CkSb2B%SU=yy%Uy2&=OO>HDxkiY z>nX)tRFG=M@OcV+5r>e+C!G3oYXl%d;|wS?}<-W5UfaqC8m|F0GBEy-4q^~_;n*gK%5@C-6O=OnOqZrDDU)y z&I5ol0C;hC;Ux*hsarZ@P@j!jZY@&JH?Sz=b7U4w&Ds*adF`Au}f#26=k>uY}vwa+9ICdMJS+Cy66;tTk$nQw8QsMR(HN* zs-qfN{S%$K@{3ep)YoC!U6j@LicNh5jZC+-l?n-d^efn8UBWVFsPNp~#H~F?aZ?g1 z>%zr)8L3`~cLk9S4=uJq_;c^C2K>_ld-Aqr*zSY#(->_1H&ujGa)x z2t%Yla|uU&*XTJsD452rS=Sl^m9703o+eP61j8rFhs%1AiHhOjo1JW~Tn~t zF8Z2f(Am4uy-{7oA4nZ-gnmA`K!ZlSba&@fwZ`7A0X8w%4MWxQH$-fRC?6eZIle}S zRHfr4A?n1~e{ z``Os-8w%}S8R@_MFe@2AaDWxEfG4&@AU_`6zumekLHSbvhDRrM{Gk$q2qTK?^*J@x z2Z+h+mMS4yn?ro4An(?Ap!@Go*X2;aw6$s1aI^yJAZ%4?YGriSt`)!w3Ec3%!*?$~ zEN~mSmXZH*F|@HlZAKCl_2RD=i}gC1-M2wRi6Vx6u`>kQ^~oNz4?c(t4JspB<*B8kn`^--MI8Mn9tec5=<5(o$ zRp$9lK~*TIPlcp1HnCGkk_$*3Lec>A`R6Aps{Hf29IT}~fC3F8mpEk9%kerx>7_+g zI=W6morIp`;@&^XoWFeVOQ&bk#4qtApQ!q200AHJeE8B;exqJi|9OlAjorT~_&XiO z!m?8|9d*iG$FAN_=`V2ilVCj|bfw^KcGqL2oIBQmgglPo7QaJekI_){)-B?)7IC?M z{?~KwS?}qx2cPCPanOB-WVcU$++>4Gkh~uHIGnLKJPt56QRIG4&pwyr&)g{=PBn-< zlOFwv^kc=XV)+SC&L$5C$#>xO37rP#O5|gGE*xFN%PpO6tr~(Cao$%5VJK=Ig-Lra zvx$7y^iNUdjS@SJ_?wS8zpq+uy0+ojJN^G|)PwIow^7pC)TY}g2|IB6J~Cx7xn(zJ z6{jw3+mNWYI2gt0t7a1i6r=`8%c-7J1wyD>)|B-<~O8P5+dZd|92J}?` zq`v@qU#`lg|4>xBqWiN$-5dr!{NU zHy4T9=ks)jPBXu~b85)b94(z`pBf#sfb*x0pC_;M-OGGXzq7@9=606WroV!p&4dnj z9CGijndwtGRD9&XGx7HRjzhaU4?pW$x27|3I(Gi=;jMYww(aRUI@@~D{G9RjhOHF> z%vwWwuzab)ysqjh7XmVrb@WRrrJ~!v(F@O>{O`-kzcg)IV}tDib2_u=0L!3Z#ar?V zUEqVY4eOIqxo9Pxk#mZ~32%I$37g8QD?a){2UmP%cEkNX2U9h+sBO^kL(BF19;9_` za8MOM&K2B+RY8O!Y9X0A*(LFk>9uO>B$FW$vL(H?&~;V*!pN}EIVVq>xyYIvo2QE{ zIXcH9lPxXBgAU!Zx=9yj+4f0LB$u|5eunAcYt97@236?dprmK1EJqUMQ`cP2F0+bv ztej8d?*`P@Xr1Vtsy}bpK9+KBn~e|V{7$UNymZ-oLrtdAi-Q6ngJAU<7 z+Cq<&uBG6+?Gr5))&nhFqZSXna_*^k!1wZGZ#l|(%K|?o@9E)J+c$z3tvH(*#=rID zrK)?jU2q*1#c9Uq|C^?CCo|Q0ivdM|5y!DrZ)C{1P>^Em?I)f!UfHj_&fME6ITz5WQ#xyW*L?Qw zulcKKG(w{vHIu!bN$Ip1GX-fRM?EH`O$=ghQJM{D2W6tTT&d26s6C6`5s6H;L z*K>U?97{9voxe5ZTD-wky7pZ4w||+d0{mtsZkQJekN9T4uB_b4gQ5Rh`HCD0LzQnX$);tFo%^bPrF`B;Pc`jn znpJkPrpvKVyGi-|A2E-wEdU?MdW(?Khy?pNIz+h?#KiZ;HtX_ol9WrpMEs-`5(~?o zW>%xn=>V1|piRvWT81Er&e`0elOTGl?w1UnfX+B6NY4 zor)B7iK2F$n%n_yq0%0r@+FAD5siT7-n~ao|rUm@zx_Zlo;Csl#v&ne~ zF1`=-0`|6Jw&;Jh&YfR5_cZ}^vsk{6Yqgsh*LCNbYt&nli7^z$S|QR6YbCy@YHDh3 zL(|{_7&<@&Wnv3>TpdYjseYQ9)}x2T15GpusCD1R;{x50peGeGmH-{H1D+v{cOp(N z`frrU&pQkyvmN7gs=_!l4iOEl%>|&9S9A;=yYBNC1x}fy1IE~F3*JoNC z2J}ALVaLWM(jNw-JhQ_5Ub3@3^G)|BoN`62*-hS8hXdmK8X0hI1_+2VoVhYw zWoBJ8Ej2UTv(i$tvO-(l*0pYRty_M7{Qlri9*+Z@&*5|4=ki=|%?d_8erfo3%0^-C{-NBr zkEJViR!Ao8yu!H>**dTOE5csBMT_?ii&+8c zts@sHV@P;DGQwg#Ihux@}iIqgflAy7_3_&N#Z-cwVVm>-Jsk!pxnvrrBYq z(faqIHVuiwY-Hj4?a_2vVFs=64Ph{hiV|Cge2GvoGV``Lq{Lu3)1ZR=hn#RM&{Kk= zco3rxZ2_R+Bq#v~;;->PloY3P;Xd0{l)CX41D1<`v#7L!VH^HaLr*fgWIy2`p@vg*_GRum#59Sx4NVzCw7WpfG0(p4`c~QKkNuCfkQ=R;U9jY=_2j zdy1N7xYd-R`~zjB@YoIo^5HHL-n7V5j(%MZ1dCt8@k+^qjQQm$#RK3&8S zZy(GF1?JZo5;(>_ee}>M-+8O+H~Om0Vuu@FA;)nS+N1&{;h~2Tz-TISARC>sSbKb@ zFqa3X`N99H_u!T;l$6fQO;BR4Q0}rgvHGr6zh;(L;-kMSrR%!#b)wdo1kHx7HI*LV zd76_=9?aCOo^(>|^d22&1pmStELG{b+ptUF4lVFx`rQ0W@)wV(*s!UM2cz#v27i9gjyo-x)IW7h?#SCM zI70;%z6f9X2Yzgg4K_gIe)c&F$3VBlM4-U)PjOf+mUzvp@tJn|tdSOuj3y~jQ&=_L2Ij^y!xrzPJdD50!(=G*J%GD!z%x9J zsB5r8PJ42>oEs$z)9tJ<-lD_RP}X^>TPL)%dIp{87faz3TY7qa^cxH3-1{|C4)Z^k zJv&f!YcQIne85rhcV*3_T0pF9P!33`&Oc;@B-u1sbh1jrsW+^zGs{=$RhZk00l0kz zc@=}m9_Y;`6lNzpNUjd(JZQSEW*V0hq83x#>xU2{a;rtCdcibb#A{06U+bt%XyIi; z9UO`5#4R@cJv)(K6!3(d#w|QlgGd6LZ`Oo5uqjSuEN?#aR`?of<>|1Kl^buZsLYT0 zF(`|&>p2GVuJX9M9OgM5z8**MmOpQ%JYRW;}{lRbCi9F9o4fWKbHU<$zypzLw`!=5+`skY#bTf<>I>>(N|6~aXcDAE0N=YxUMgBlV%{aEih68@rW6<@xgIUEwcc#sH1wa^!ABM_PXHQ&hl)1OWDlwbGJX;pW)?`Z$O&&|oKu~2*P@~M;x z4ufL=XGxK5Zj571J*-?*!NFFK$uRLJi06|nrk(Ib3Q^AOC)yi|2o)0zuG+VAC*1=n zpoeL`!&{+lHk;6O&rDacqNh;J@*#EQhu?F#_1k*Sj*DXT!u=>Idb-D*eI}QKg1r4} zPP)&?zxMg(_2jd2E-1^Iu!-N*RblsDzk1Z@FA=ns!BvB^@NULyYtcMU{d!V1Wo?ms zA%k0ysG%MbQgk_T!dpIAxF^xo_@1*tYX8-Vr>R5F^0ax@wuOFKqn4MJJ#q#MFC0l! z)?Gcf>zn#|Ip;_O;(hj0&vSm)d~iK%--y_H`Jl*-m4a)%dwx`T46Qn<2G1=`J`dmZ zmcDc}SwJQ*kT)Xra3{dh(1HU<%?XTH9LiJ0#MZVHHf2=n7isJn(l64?SYB!mqWo0u z&|O(;6!QNtd?>%VtFYd1{Fz1dD=%@z3)(wN9rAs zIr4V)tKgp`=hy0!YAz>?WKtt0A6?6M`qNA_@G157OlZy^R5rw|XL9r5=v^D%e+~{; zOH;lC`9%aiZolw-b;MS5&-P`BoLnCv<0ApVi+sjb(C=gZQ9aJ$R*mnHB+b~y=mVo|K6Wo^58ez#b*1l zFk=^zUAZ}sMYt6fiMcaWlK1Jj{D5(!UPf@$|FWI&i%M{_OYRHJVo)>Ml^cF8^4$BV zz(4a&wy7!82Q@pN;66lnx ztG(#rBINZp|Bs_+aOO!8%=4=#dV22F%i31!@|yTV2T4q9^_MGq4K%A?z9%>9qgWR{ zY~W52$K*??)o+mC_mc;80nUr$vDYj#hUc zYvh|Jn~ZC|np`Zg@b6X)gX`m2Bg3K1Yv0;s8$^vDnSEqs!@tVGh+%oaf?&ufy-p1 ztk+L#e9G%?(x{GTfRN;|R~efKL0x@ct?;mzI^lSpL1^>Iepn@xE`K_)D{Vc9YLBhK z`RPrd{0jZ9L_ao(4sp|!F|KcqRsva_;*YEU+UD$jB_Q(|riYHxAexIpGhXm0+J?vc zI3j|n=8fW}j2M~}0HVANAW7oGveQI+Z`sQglU`u`gkWi|5W0uR8dDEY4POKMfBjND zm~9X(@igph7oKF;&Y+XF$nSDV1yP0qMyvc)2i4ZjV*R;X?}uv1WFNchOG_!IKHohW z1I8bE5}+qNAFY1J{L0>(FZ!Kd%gcZNb^h-rbCX($FFh%OT4M>Y;(&m?FRceL_<#lz zGhxs+wC9MR$HNYFLdYHxU~=s79CYmqSb32SMhXEH+Ob_ZApz~39i?!6i={8zX8(*J zfw2TShyyUsY-y>U8nBlsPZv49`Lp(YeacbVKmHX~177GS52HsK7f>i2$z!AjPPHqp zs5C+Ar`y#w$(ld!0B@Gnl)6&>&GUQq%j~R6>}L%!EP1C1T?6~3_xz8A`w?erhSI{- z5CaETk<$R`k~ohw#3)Z|ib9V+{~L-Yq=GHz;OI|Z&fQsmZE8&e$K1#SS&>+lSz~vP zeVS-YG{b~&$1vUi1Hs|H)f8uhZUF+(n}0e6^Ro5-J<+=N@{a$~Y0TK+ZAL-B@r_ls zS-pq$jFWK!*~i^vLg(H&ufV2v{{Lj`?b|Jr+l`Vl+x-2?#>2M!_;O!uK-_~1EfYG2 zQWpyfM{;0>)KP&h0hFOg2S`#U7Zpi#@S0HSq0O}I$WmEa)3{QK2&h*P(7OVkJ$Vs9 zhx*`R6aT(BFN@D=HUgL?`DD4KEhv8ZYMNJilK2=|oSViN$E} zr_l!|KmAM8O}KVu_2+AO%VZ%!mq&>Bx(SvYOO*0%w@_is$oC3P;LgvW_v@OeWL6~w zB62zfhu--I+e~akRC;)i#bfgp15^fRFf|b!F_#<*0nUS=z@a6!SH3CYG+0h#eol=J zGO`)}%k%86Z95quHiWOo!1oU*?(*_?FOs0a&%gXJEddg{XaL1Ld&Rh(@BL@ABpO9v zy@Bl_ue?_IkIhbsufl;-vi6F%7LT9&?AWGdQ|_b+(o1JV2D2;hIX-$G1XFu$=z;C< zI`2f2Z$45M(yDlA{&862H2jzl-gF6u2QdlyumB<4TK=R-u1bBY;)@>mOphXI)veMw zGEEoqqyy|b1+sgT6+0DsqBedtm*upU3MFN)KT3^6WS_X0AMTtzqnL{&fGh#j8{qWP zBkW@l_8t#mBmyE8aswziFBF3GMLDbRG>Jt(l9WahqG*ELBx?*+2n*YWovh1!H-%3B z4s)P_em{bpI*!~ev9u0h$nX54npC;(w%NO4aGnf)DhRFr#xdS=HO<0*(IP!WZBhd;r;0?FI_!%GEIVC9_=_-b_4wBiCpcKxBX_GE9|$(d$A$w2~{h4q><5Ul1j@Pg@#K%7K8i11~kov8KuV07V~|P z+vbSc4E>rV*eRePd=a*@1M}v?{L`U3?I ze{T#@U`_1gY$(<~Q(*g^I~}E0?Rcq4j-I+^l^>p19+q3~`xY=5)D>_qkO2eh)Zk}tjHUN?IL0bIsup6;%-2Ay8tjFI=7iOF8 zC*%aix4Tf-v$Odg>FCFc*(-~ZqQ}^$bnN#G*wapt-&9Q&Q zg$Yot0?gM$YzRTYg(vS!f`${2*9B)Uh|&lGPzXTZ9puCIzw&$UEgxd8k+Y<5OO8~~ zQnKe!(%n$CKU%ANZ+b#{RDw2qBhkJQ7uV;l#{UQ zgV?oq=hsE&pFhs_e;+kk{tXzy(jCOh!BI;rtS{4PZH8+b4gvRBk=+Sq8c9rOHKAx{UhV}!?g$|DL4dHL!3I~&{Atio5v-5*X9fbUvJC@luwW~>)$yQUbG6d_ zwxJdKta{*XM0}#yu^pLLs%MHuXkHlySJz+04|zyM_TV=Vz5wv^< z`p|76Cg&2Wdw-kl5V%1A-luZQCi(gRI>I*~g^Ub(0FRq6WUt+<5p%em=TAG#vsMz> ztF_&7xT}%`P_jtkd0ofrU7&eIfPJyu@qvEz1qpkP0Fm6TiE&O1pp^YLne}oN_D%sF zMnr$umK+qoe76rh%uo%F4g5C*^X*ca(>837h8-f=evmWrAnHbr5lc@DNG1|;GXqH4 zgRjS&&^jmN|Fbx6rZ1PFAHF@PGl$lEis>cxYQ0D6ri^LLqa%e81mZ2?71Ul}+$uD@ zzp+u3bZcNJeAqCSSrg0bkHrdNUp6*58kfnDQkMpgF1nU^xU=ma54$9CHugYx+Y3mFvG+xAMPdG4{?V9P;Gj{TEG`zh4T=yIHWD!-0!+BT^=p+*VGOj1 zin+>9YokJUy@{AdL~IB#K@5pXN!~d4*w%S2bxYUgALMK z)K1j9zf4p;2|G`2ZA~5;7NM{4=Jbs*`gRzj554=0phm)UQ{(X`Ik%qnmss$@`-j7& zfcCtLu`jGfWJSRAIv~9$4l;Jfq05#~Wshz&c??vOe{@qCPrup%rxQ@iPqWvAWfAD9 zQ+$w;5Oi1ww@Z&m=YkBj!Fh4r1_UuQ4wSz*{mlh)3*fiO+4z4}FP=c%5~0Fy@K!3+ zi^zxAneW#bc)6jbRG_%$n?L$LlPGzM`h1A>Hu!wEseH12jUEEO1PdlrwmnAm({{(S z*D+nj$B)mVGvkN4-Jt}U=@Z!aN=LZ)(*Pr3fGV%el-hWJf67b<+5HNr;uan$P6Q}S zOn6L$4%Vbns%JZ!?*8a^odedKO9S2jux&t%3M-&tmibu!p~<5m*cBX7g9eIBMct*L zC_;$EHei&FpfAFG{v8gR200UuO*r^FtK;6=Mvip3La0q!HPllG8v*7senCBnVuq#+ z-QArTD)*aX^roWH)?pTxM1ZtA$)uQ~Cie4dOf}ArqBr2u8y?xD);`jht_Oy`7T0BX zrj0swdmD3gam+USx>@M;>p#M*M4*A2U`rv>MF=`(gsR0EsU})=0Z_|4>@pQyK}8!lNxO{rHmceNDMi5T-pcyc^NG2ZzzfK#vBgFLcF(2N*LU1roap-wo zyzg!8Xs&X23!$PXX#K49AW?%@u!0VSF>Iw#=bRa zB1;rb7zerEtxm5y{n&HmkzGDk2(nmI*fpes@KgXM0u<@XYTI!CChQYFc8P{Iqk|Nw zkOCrh-Wj_>ySk4Ds=#6Iiuy&lEQ08dcW=;1H(`G}4wTN>{NVy^+Ftu~a?uedMwDC{ zb#nJx3M%Z1ZvQgF%ir`{l+VcwsOz4^je(6tv-mm9tLGC7%^xsv=~Lm*U*eQZc`%93@0WiCqGqa%m_MC=PYO~T$^L}-u@_o=IAdCwny zjP=A$*t?a!ELpI7doSimcK1W1V^$V1|@{U3(ICzxjG5LMW9WU9+dqRW>GIY@=kMP(gS zh}z<(({$__fYPADqCZLGPyxG>u~I}R*)HNXK5e5GY%anh+rcldfDvB-um~{5RmcE9 z%+${#`F9nmh<2UNP?G+pb#JiqPO%z%}^mo^!;JFNa8X5c35|I@<;e35;kvF z^fyWbfCV?RR#6wkJYE9wD4%kK6!zSVdix8kt?|##(-9}9kH3$h_>Mwo82NT5{4jTq zH1Q@xC4eKxr_cth?P1U>msu6}x19$=!4OUvd8M<0`I&t0s*9CrHn%4VR;FgbnnT*d zxDt|5NV{}?D?-)I|MPOj2A{3%8f#4BecqB&M}d`_!byDc6F8J8jm>&9 zgO$RonTF-z);^Y4-@TV}Ano=l0S|_?C*?3 z*>t5Bl#_qz6LFL@MocjnokuTF{;B_3sqtdVRD}dBeM&XHc84z&-`N6(^xsROgW;Vi z`~u6$cK4pN5xWodKJfbJV*D?2pg?ZUCUzoAJ^GTW?XAIGxVdZ7Whex3uNVNs4s9{3 zybgiNYJ-K`G0vmPwlTQQ4FVGrlHc-L)>Nq9%9J8)cw)3!|6R4wsS7F}d`+scFW%yZ zM*DAd7JS?PiMiIYs+yM90#jIQe2M+{0Yu@D&3azJn&qqQ9Iaw#i&INPUgUI3c=LdL zm2sC4j4{%G*ZP>1z}PInJNJr(o3-h2%Aw%kP3gUtiXaopuk!Y&gw_fo&-$p=kAiho z$xiZN?b0%e{>{Vh>y6EQ1nKSNqY`|Gu^$r5!VpU_nrWORf7N!Om2;UCJqpmt3o7R zYoWDEd1D-IW!@&nJMO)Xgy{efNtcosWj7u?^t%sXaFS$?GN9C`*iviiwD#++=H1)> z{@QyGQiHkm>6S`ZI%C*&ll1rPh`XoS9{tN>pABMsniq$m;_5vW;HN>)&Id#Lm?MT4 zMK|PG&1|i9`IJ0yd78&%DTlriYRNZ_Sm+e#taA%y;skY(IQ{8k6_1cn%vc_}tr=I+CJfHZiAJsO*dU zJT9HoyTOUhUEOEf-=_SOKV`7{g;y}|K?7rX^dDRij41I=_Nr)dS-8m7w2e zHcEm?AhiD#`eSfKNh%(kJmu=OUgw~COX;mm_erlSoxbOTG71koe@vFl6p7)I9k zv7$ezu`58n%z3GlSvRWPxm4zpO+$6sEVws(Hua-uDQnM-!9?5biC;&NbOFjTj|fNr zOy#p1cm$MId!0xJpUa>_^9V&hb6%>LwjH&W%bLxs8&^&GrE!0xZ=dZ-rE=#M86_jA z{p!sJq4epXrdk8|VPJY=!=_o{+jr z4)M4ud-62_i57o*=_E{howH))w!tzVj=JYPCc}Yr4XDMV^Bc=wMCcl6D422!vl!^| zyylZnvWiBMw0fS?*$jzxN!s@0Uc10WFV>!xr(Iwt{7D$GbW>57&Ni9~GHvVR!~JmX z1h)#7whl<18ystEnpoBUtws5<&==9OP`&JgsgSZ5Ac#`vLoUj{ zA!%^ZS_p_APU+($7Ji0=v=j-^Cq;<;%K*-W&XCqxGS@l?aVtViNWWSfj(=r4{A{NH zUVOZclt6-=-l;Sc3*R$IQGh}nNN0##)@G=Tdx(4`jV91{yY`v@2_&>?OIeceZ})Yo zuD?`PU21wkJjSauP-^Y^ey46J1%VJS{f91xB!e%em4FOs1*Uc%6SJN&uuUDM*+zaXq|gB{b3eJ{1*am#iVMPqkqY?R9d|8f{^4KQ zD77I*qSEx)o(i8q)*fxD_lXsnBTz0v2s}tU z8Cgl=Z-Re^la@6SB0>5a(l_WnR(=MDd+r^1guRgy==;}7v)Z=Q=pJFYy10mCqa|f% zg9$|mBc-fEQ=*?vJlmUNpx;;LlA0*QT_d5BarQP=UY;50DW6__C8haoso+9hN8j5O zAYMq1U6nx`nLSuOd(o12s#|1~>c2af{qZs6EnOunVC|b?;RB|^H9FfQzvVFkrft+r zg6^9m=$mpu+oz=F|NA>O`WeSY^H%RTJeyiN zwbMVR(EIRVB4*08@Fz6g3(WINrFL;HEH9|bR#4?Q6HM&z|CNz`W&JcGI?eD7a=s8bYX& z&l?}j&D4_@*=ZP?JnZV7R~j==x&24iSttn3PG_@rU`KKAr8WM5_))Wpegsg!6Ano2 z;?R5L(0D3iGs`eZ$kySZ!~)eu3hd1oJ9CC(Ge8%SDM}QEE*E~Nl7Zxbh6HSFE?wD% zJw#%Ezm9SppevO+!$?|?!Q^(K_gYFN1F%8V_|GcENO9+(|8FJXpJks=>t zB!Ai%-Ba|E{o-fsvn%!VUt{v=pnEK_7#E~OVft=~E6)&l!l7M5oJ=U~nonKHVJ_&> z7)D(P)Y=us@fdI+a5c#3<+46ZF<~5(Wj!>cp_WzX_eT^sFue-X42nv zd%ex4*EB&WI0OHv!sJfbTpJ1h21{ZF$Czp*Cegz14D$h2Q^kdlu0Ay`0Hp){RJx&< zxU@;!HFie^UAY7dgX@^CLN*zHQzYn7r!f^IBsV%6Riet@b&h7ea~_?YC&*}NMKLm1 zXY+4e`ay9qY?Ct9AA9h)mIE$`2s+OIbHu;xj2!3kUsodXrX}9FsBF_K7CqW<#VvHu z<9cXn9mhHdBuNM9B*3#0I67Ov@fNuK%r2lr;hfu=`O z9ezWmN!$W7<-Wx%$ca!mkyjt0g>dS;k0bOSRHCEtU^EiwZK{OR0VKX2ZtreDTy)(6 zX;Z->zVR!b`%9W6id}vi1jgJZ|1FwV0ZerM8-AE+p08#9Q{O&ylxe81#(BWf6G7QL zSo{xm;%)o*6SdxXU=@I&C}2qPKy5fmi~bDZ8zf2JhtJ(-FD%iS0m>{8b^~;|t%Ag@ zLGi^{m&bCUF|P0O=+>QJcy{ssv?O&zbV(ll#!g+I(R^5a*(;mfxr4RMe+opu^KQ>sWUEjLh} z;q*fvEvvkO(}vzZDIP{V^D{L}F>%~Wu$tGm#OPfTp-qXWa8kHp#cgkmaFBc+NFMN% zB7uDqIJQ)fvi_QK9*f)wx+7$gNnn%^z}PUA1%U_u0y=qsbYAh1N;)s27!Y{vUSAZL zoaP&#!V!yyb}TwPjo3Ey>lhmhB99;u3PA828WPUhrvo5~bRqz!%>;vk==`$%@a+iSn*B#xWmIgSVR`V*gW!o%PlB&H->iS4 z)KIRxurfSB4*3kaJ6@we>7|`EA9XYl>)X%xhUPdfS<@g}TOgc1)068m$acBk6B?-B zqJ=}LKNBN%8Ev>nEx0h{ExL9cSS6idx@uFRNOpFnt0e~d1gOD6z9@qhQ zIYkteG6R#s1CkEUX?e_eqVRv(eE%!is3OuCl%8V#^Ts_6kdJMs)AhLdctidkZ2wVMKlh5Kq@7^@B1jKng_A z!QUl{hi+)r0kM*$sZXm0<|BpUGQX0H+zT zr(RyC80ipL$D$#GHIOdumBAKBMKJZve^Du{_-Q?M45L@$(q+ zi=nhXK(hLbzbgT$0RX!QNKoSp)R{tI5a@NJ`c$PidlPO0{PqFaZeQM;h^Y6}{jY@+ zR|mD*0?DV-dsN{;bT*Oq_TUCx;s#^hCP<1%H?e`HQc9^Y;++S3dV~|U#X!;Mnn-^L zmmSE1Nw(2Ho`1P}=cI2FrmA1COh=u#OfD8M-2e*Q_e7I#lHiW zI96Yyh6HPF%_|aH&9eTQiTrEcjAyvd8)CL#2@KRbFEw6dhG;6{0EUSuk^C9z6|_ey zJ+^|Ll=;J3qa*g>z1Mf46v;f$chfbwExH8p-kkwDnn%|h_&c^4xI`~b44CqUrS}P= zSHqb;i(UWQ$3I3Vc71`LmNO2FWAYf7J;~z}I{Z(b2iYIq|pvn17 z&@MK&NH65@v zaO_&Q=+Q<%3sNjVK)2wF(qV^0vc;0PVmS&(ehWCd3P>#iKh>FmQQ-sORR*kFg;l~Cr@jMn0&k5RjkES^x4eQ}M2c!T{jXWoO zhI5SgIf7nDoB2iqI01QJ@2fz~=?sh_CtzDQ_7pRK8f*s=_gq5HdD?wP5ZpmCpNM~FCB>w?~b~2@njHJy!vw3pNOy6?8}Ah!0q?XpB?lR zF)p2-dODJ77*v*(c~d3-=JBd8+c>Gsi!uN6*BZk#}cX1e`G$6Vs7x{>sS2jKn{Cq)T*IRx&miC=~_fgWWY!1h$;Ffjnp}&K?QhPc4+k-Fom(d4i>rn zK70V4ybZe~1rNq?BJuEG!FD9h|5D*k%v*sW{J$XJe}Dd){J|PfLV#%WpWFZY890!x zg^NimR3W6R{J7))Zi`_dGEOu8_fx0*MlMJdKq~HySU52rS1!-WT8laHa=WF56WmSf ziOTS-wM_0#={BE4%cxi+tmXBLamwUX112bgQS1`@-tZZIPgKlYg>~waQq7l_JnD`7 zI=_k{3znobN|n_%83h=HJ;b0+DveTrlEvfryF_vMUl4h{I90$m27$qFxlU; z3THsN=zh+gjmCX=$bh28ZvQ8#aUcyjsJRU*DF{Y)oq4#p@gFB(J*a&kLtEijTCFkJ z-Y3qDFwBy1-#dwO@Hl+Cy^3;RlK&8G?l@D)a=5xkMz}Blu(TuPGgO+84=@pGoqRAH zZ

5l8x!80YNcUq+F2m485qx@q#N-OB#T76G?C?}-*P_pY3f^t<7BShbII3(i(@2=ftgmnRsN*F-p5f+TI?Vd z-$?f#b@XO&HPW|*xMz}|=};>lg7(?S)2}xJt%}*veoq6}&5$l523`^^^>w!s?m@2E`c;F>SZcV_u^HPTZ>Fi6^GorB*#{B&*3Mu?| zUoUs=@9XN8^N+mXloociUn#Aw#}Q{wK)aCeiw(lM0W+WyJW4OOwz1?0egns)1b!(>{36sslx0tN_VsS}h<56hiB zCmtR3_-P(Qy#3)zp!HhH_+6_rC!)*F^(@<;+dJ}GK_Zn55OWN{W3u$IFD$GJap-#6{f%yDc1_5f+D}o2Aq!(8(cVW!UjFy_ zWrZHbq~lkAKeoybe}7oPh6Pj$qnz}~(@c;=ovxN% zK1248mu#b;QA`Av%4!zbe`&51EBVOqcwqsGz@w|4FEKvwpdz@hwPq&r8lY+eR@eC~ zk+clk`#~Q;>-yYIOMhc3xZ-R{n?9GkJeF7cdEV6PlG4gAxI!=R7>CC(OH<$M|V4ofcP1=@{80H>}4JE>}S0K3Y^T`Y!@pe+h;DFxA>KkP=|hU(M}hq(7r1o zpe|oHwFbEQ2e)g5TvuN0%k6(J4l$Yv7&x!qD@WAt!WA49AScW#E9_>N2V2PS`8NzH z90=XYhoM|&io-w2ux>Ru$x-^4R{SwBk6}tlOz=}_ad%!=KwBJJ2H**?Ad9i3mq?M= zWWtBI?G%_?WH=5IXODWUJAH^cUiQErqSB}0YtmPN4bKKaukv8y#cj%e!3a=vCy5wP zaB=gpb6K)o0&yLGkobWlYcUY<-Ov~6WZ+_ZZz=d*Z`)E?MDc;T2Y#2PvYHj!Nq&fo zhKI_%9P9N;I-~vaez>tJA=JPytq$G8U!Qp!%bTwgOer}l0#&T;}eCDK3Dow zIv<|7vwoD1$1h(l&(05iU`4EIqBww-y=x9ZPFx4xd1MA-V9@o_NHO(7UT?E2Z1hfJ1Lm5S)5HKeq{PipT zR4iAk+=?m-DNR&lU+v6l)t*^v{(I(`^jVyvAy1(l_=8&8z8qBCRTe`&1G2NugygLo-5SNZi5S_k zTp8R43q5NkTyLiC%?pm{_C|EF1P#}{mtuKN+b`U7hz8LnuA?G&`}Ph}S)>WjQis@) zrg}PH-t1w$$^{G1W>R9093KQr2L_Z27?|$$QMEO(QJzpdx&TkV)j7$@7}-ds#d%yG z0K>fAPZk-hM#mIC1nbPFSRB-}b~rN@|0!6>Mmp5miUyW#r{BIm1%}@LuFb0;o^Sq^ z(iVjaNlm^rqOw(6QC7W6(RuBA;b)4>;oi!Xn&=_cz62c|DZ9m9PHce`<+$Dh*gMe~ z@Ms&V0}|Y-t9Wq^6Y^_J?E`K4aEe5@4!#9N?PQxC(0pMNM3U^Hlh2q;gZ}460?Q&F zK_U)AQ9J3O4k9|#hX0>r(2U+bIvJY;6zJeJQ9%@@>L#6Sli$)aS;UrqgqMg6yO^8X zMS6H^V2V_4ERk$ZA{~GG+cx#{kA}-0#3?D|$QihVWe`}loL0a!{}1{6pXRIy$(X-V z3ekx+FH%6Bm9e2G1X5K!Br*l+z@NJUEIRq!bq<;syxKTqFe*bOOYt2Abigg_K1Twx zZcK3(SM_$03@iadHRiGPCmWynAXmormcs40|6lIq#;nNLI&Z?3?KL1%3SLz4v>`s$7AMIB85x~|>Yju#&6H+S11eJl&foD=sH`>taX%7<+ zkochXvk>)!lyrANA%y{d(&!0HuB%ek0X9=d-djME$mjx_58^vs2ci%nhB~D7f!`6AG8P*G3HON; z4a5O&M=u@Xf_{lRpLqj(pN;fL%=b8d&h<<|a za?^GK+%PxbG~p~DL)8O4TSA|kkmZfZo}M77KB#mGS&=@fNoS0Go4f5gv&0t(wxb$u zPq@c$eH=&i>CH`p1?FSpn)-#(6fzdi*h2wH+l(6j+GU>4u$TMAFmJb4YG&>eflcio zrZxqUUXZ9gZ|r==aoCE(Bie@5%$R0-13R#$jlKE~NL?@*SFRXLJRJ zbECdCp3n<;&`Lo%9sk{WWWw_}c~OGEWp@Gy9VfY&$FIDbi92Jaj-v{8+R|%)=}DwC zcWf^YhnE9s@a>iFMbq{dh>c^0`CA&hj$>NTF%2kF^~0!4_o&iWp%MUE{rt3Of`pv` zHdE#yb^uI&4FUi_Sb1>{!}QfyTu14iu+hVI1-2Gr=%5&})ue2p!RZfnDG7iq?3v7i zHH!X^qVtYx;{N~m-DU4VfQ02GYy=EbK*S5n2&iF-h#IB{h(S?Nsb0cH#E3`{(IzYb z(ISrGsJ$>ODk?ZpsSSu0M=eXJ+SO)va{RlN4PDF4`j@)R7~qjd`!bxMO%{w+BCVqsx+qT@RYOzv3?FSm73` zTjT5GzU2TX7j%ruiZ=xZP8mtL8Q3=Tw`7cvg_4pHFbe_cz>Po6FGe+VOOnzY?d|}HX!4wNwKws=;Nce&ClkG6(aATxy9no!-0G*u zI0OJHl>XiWbRL42BYknVAMXky>*t@_^9ElHKqc5vli`~2haechX^_k7M#v>7S$y(& z0qy7rc}|A%*!^ySxB`M?#e=b9M36h-a`Y?zzj)G$hp(7fCq$IN;}KYq->rz~w?%Nq zB~zD69vQp3PHbHUvbwR8FICsMHT!lVS3T79``D@X2xA;$zRjDpmP{YlEH5xJlq*PV ziDRPeu|H$nH0%KZ$Q`uz-lJPgeSfLVGTJF4GJAMEu6X-}s{8a^RMgDjiM#OJklmMVFSx?d56rIW>A=>78Ui34_H zb>VauQ`r1$rnD8yVRC zX)p!_9MNa*9}4&dfeT|lMi`IYAjm_jJ6-G9+7KC$&fdI zp9cIx2RKeBaYAV;MJyI#plstrE%5C1C>t-a)Wq2Y_S))|0w;9B?A@D6oxL=~OFG~s z#W@}b@t%=9>dhg0BaA@km)p@qqH@0$&;Evzb{{x!v7QTX|7!z}t)muBY?K~ZO6vh= z8kF21ioB>Z&>#J3AFaP+y^3)eG;IQRxq!dtcYD30a?&M5*G@QvDq}`&N%1-XrVO|X zT8=86We(O8hrOEmEjwHz$8V7eB)6+%^LzST6(EN%Vc02e<|_GTlgQtxemrc2uY2&k zZ_exDzV^ctD#6KiLC)8%c*|8A4?n5!^mxfl1Xy-BpB^1O43YeFaw3m_AF1Ga4+KuB zy!}GFr6Jy-1I`mDx$oKcK`8ly&d(d?5Ebm;2dzvC_D+UqB?uXw#g7BR>3(lN+l}XY z9rUNS_BcRulJueuN0hvBk%ggeIkT z(}1lVmiXJB)*&dq0-;0=-I$c5tZ@+Pl_DV6E-=LN8%m!+ncvWN22>;+5RHf4*oCpD zl@2ZF5(#R&Kw|!WV>*$|9u^h&pD0+OJuv3X{TBV7>FVv-sXH4;V%pY2rFr2K)=TNH zJ%4zYzwvFon?&Gb?j2x@?ZnL9>ekX!@Q7~JU5sg^v$*n{8>;d*1ci8a80F(iFTTpw z4`({ch#W!Eck6b>$CE$yFSs9kJPiC{r^N9-Bu=yy4`o(YY`6GpRgGdIv5scETA8PO zU$~-l9{@s-BtFukx^10^%?k~&h(PdUz-B7}7)C7hpv|aqVKC%V*H14(i0LRb9pib2 zcz%%BOEcyFzf&FT1@!X@^rqv0WrsgFAB6@Vkl2v3e84g^HTEfRbl0p&%=z)5SMjT! z5;u#U{_E+~j*@%oqHDJ~*EffEf4o%rk>H!b_{nGQ{Rp=^Z4TYoyFHkxRljRgzt9Te zo-0{yY+wya5+DE0?bw>Kt51kmxebH8-_J&^o}YYnVnKbl36tZ=;s8H< z5Ba$E)1|>n8gX`^7@)X}8^)}iRG#DgHXSPC<0r-ZW#CsO!}vIj)6waWi!@~3eWjbG zcb* z#dUNY?la_r;C_q6T)1Z}rMSvWU?)XHp>5e^wfeiCMv~82;;61A8U#Efy?^|A{&dt6 zXXH-L#(Uv2#hq*NzWVJ8U%uKot~ifV8@m7K#f_V5q`O`QgC%4JSplctcyibxJo`hB z)_M4V&Emp|7x~lfCh%euI0%I+GUMVN}zL-no66Ge^VezxSmTXoP7*mVZY?Sk;FUM|$J> zYFu@tukY7dc&1t%eC|LoL6%olOby*QJpcWRvF|qwb=^B5f^GN(>a!_0@~63wzRd@h z7zQf;`!6ICsWdaH=F6izJMGA9{dG>Z!;t|&Y)40lRbZ${ZqF4P^P?N;4n_zzx(d_V zkC4ykcx%Y{p&W-<=}TU~w768gtmz`*`wcVd$%7kqWf_GE4ux28RMc2I%`$iNb{dC`_pz&kps}*T902m zrXG&Y65P4+ab(Tm!wJ4Sp%R)+AqpL|;xAkBFC1`k$$#8)9U`I=HNkG5gF&HbC3y#5 zkPrZEJI}RAI{?mWlnjFV3SA@zz2s`!i+**T_?lRUGp$D?aq7YP0J$10_5zf}B)jS^ z(Sh%*wYEfQGuZF?FJFxvSk`{s?zF=uy^wt6{f#&7SJgoCRF$#ioHozzWnsCM-Bzuq zS-4bE;?pBf+kfX-@_@HXvW=UMOMQdtWk5<=FvVWovAoKoBfS>6%MVFc7+t$V`6O__c1_}bkL?w}8r^iZRe3lcCYKVW`(il^d7(yb5W_6`F3pw%& z-nix?Sp)g_173|6LpKPrI$D?%n~mWdzxOjOg4ZH&kh^rddJ<&T$wLTBo1^=|f8YMC zzm`ny)LvilrG)?En^SdAkB%B5Vhjw5qt4V+*3-+)Z$zcwDSN)M3ytx||Xx*8sNvU>3#;$CIB zg6`5+B1NX?`($Mtx|Zt5Q_@=arH)yWea0@bi2|qV&KgpZcqopjtO&VqxckasJH+J6 z1CF5g6yS$(4)lduC(o8eBtvAL%#Z-_MSx#J3(va8$N|#;cOU-|WW>=k>tZQ!PA!52 zWyzHb4~ifAHwzB?4LR3NR{LN=|6dMCsP1Y=&?6-5RfG%Ro-4v-8fG~*B+Qmya$4=Y z;Ltg)9Eff3%<7I*3M_ujgPnwZ`XW28)khODE$&x1tnf~IaqI1dC6BOhN{9! z$3w)h&ybVfBq6$DiuM;MAsE?ibg2;*+J{ax7{gOeG?66PEF50~8~A6I1doJxc6g8- zCdPOt)QDj^^br7dPg-?|B_N&+lGRLzB**aFFGio$30~f3oJ(yqb<(=S2*JnT#+1s~ zy5R6lz@W3s!6w^QWBj25w2A}B8@p3>eyv3DFU6GP&>w3HcQ(8_y`#h-V`XG12$`Dw zz1RaTOREI!hePXwI2FL2ydKrJWTN9g4_0RSKK4T7{pdy^$&(+oQfc)7Rm->NL1up9zUoh zD-**^Tx1}v=nTk}DNTt&f*pT~_nnjO0<*Q7Xw{JrO1F}bEJlg0XFxGO)Y~wvDai&z z2Q?7%es4N=eb|4{=7Ks~q@BUZ1*{=*k8?7GdYr z(k!@@QyDgk{&eO1Fs|wYdJy;@A`NBp?b&Y9VWmahDB$znAO}U;?t=lEGOtWMu-^d_ z4yFP)Gx()_oG5fy671{}ebgbMpU0mq{!>1S^Q)UA2WUL)T+Wn4PV}3390qM#!SY@y z=E@xIw>z$j!!NBR1tGH^r)n2Ry^BiQNbXNCiA|X|EsvU6bcn2QDV}h?}-}5GNOg$Gef_EcNT>D*8(76E;yS^*X^0esj2^B4=$wKXLiVbas^EvbSrw zk;*e&C5x8@%_A%SlAo_8M{RHXezlHqZ{0xChd(R~bJcTF{bNquXdd`X$c@rV=s`Vs zRQK0zzT)6RdOdvu5`3+k9qJUP-CW>@Gmo7`?x$-Y8~iuiJUAFLe#r0}Fub5a0w3p; zCq4KA6J`+SxHA=7WH?hFCv1`#XE({cFD!m$8B(-uo+y5rz#ql9O=l`L)Ir2s3j7~z z7zF$f!kB`{;>qF+&3o$-ISSM=JZ^u;nkmkSKAg}-!ksut7(UG{BC(9WD+%Mcr<6QI zm`Q^iD+%!{V$n^&LiHVa4D%)}@pC_MJJ1!{)9K#Xh3a?rO^@?K{u(*qIbuhMK=*_X z478(%Gux@W1#_SNsJHs7eiuNghbg-dU%iI=P(!>>O|H;TF1S*g#MBG&o6~x3t(H}H zo&8D9`YgBDGAUqA3apgG*Bc3`3qHwVTsM=Lg#0fC#@%KUA)Ua0;^@dUa|*6unWx@X z&jOp_;M;KOPgeab+!?#1HzI0PVhqNq*RXQ6tiWkzvxKw#g_A(otZH~G>dVfw1R9svr))>hu$53!_4a9xu!7Q!P{(7*w0|@agfc-AL*8J zbE_W(4t2UXzk7LF+%VQv<93ptcGA_AgdZ~MK?u^9o_Ad`lPWFOcKSSvnA@e^&^2A( z08rq~ltBlkMoqf}&@qJV2h#nNCIJ$jF$G+MGkuAX&uJ*l`g#9DXx-PT?-6POLP$r9 z$IOVA|1V09=WmG(z!)1~W`~X)fQX#npN!IKgND}((a6fRWmp0n4246$9% zXj$?(=lNPf0ZL)(jOV20ZN-09AYj=gXa!lrQIPiy@HEn?(8UH2-s#;_r`*T82jBIR z{xT~*d4*ah!jY1&$8s4lzBBXTjVEvaSP@lib+|pEpgkfXgegj4HKVj7W3*LRmaR0~ zr8O(mnuY5q@j5|6|Hg3!?Gw7_nVvDOVXELy!!knQU&JJh=dJ_v9DrDhFsIn2a{cq9 z9B8l`KLHDB0aI!UcLv4rgCXW%1(Nq~t^H~{Ir~l8nu41``me%dnDq_W>8YO?!SKdm zvK4^y)boz(7<|Yu!&iY!Oen(W{k_S6gwy<@0b2} zbp>#$pCi_|ei#6|!etw^CRH$1`WHu^O1YiS zBBrqQ2wI0yjj8{>ID#KQOw{1T8vK&zEU}%1F2ZGLDLY3CL|tEWu6#1fnF7A^M+S^r2NS zzQJ+xAYi^@rSVjny{owABIMSl#CNV3>nE?Eu;1FGRM~*s6(F}!D_}2cxt&U7P6j_u z4Oz(RYPL(Yd8-TZJ{el)VR#>ETU@tJPq>}_B-GWX+b5i3a=LFnX>ZS(GkEKR*wX*x zoMwO;__z7!NAno__`(#E#&hfr9q*aZHKCzSX?9?#1e>LVWNf8aPRKN#&j(R2J8^y( zHBWd$2J%9GWF?%$TVPR9Xns2Oxgnh#i00i@<@up`GwpyyIS`;B1~{>$eE8Sk4QJ=MkgzhhHyUadploH+=4b&(iC2>poABYhcQ4n2k)*Mn0Ms z>deP=))PwW0AX1}-O(nY0O)D3oBaRHl0f)VV`x?@LJWY>02BzsL&=THzvEYsjRP)4A!0E!XY0agOAF%Tx6US?urw2SMhuMramiREiK z_nB63?ti;Cr*ISW2D4F+(9L2dBZz_NY#RdQQBZ;ZKSu;jd>9MKPc?S+h zHhGBEo3a{xJhuP?$!tOdwqo36h19j4f7WAj<%T^yR{xtKeMzPCFS~gaHOo#h_cOQA zE3JJcy!4J`4Bfx88rCNbwH+mGZ^`$=h@O-1=4omd{N3S4>4Xb_E5@1D;DYpIY05X1 zf|Y)Pc^W1f0V8A`wMNeRDyQa6TD`_X-6uIubUaUqWt5OsV0Z9$Ei+GI*(17cW0&QAA*ydl*|+eSX1(l0$Jl ztG~F{#T29ithsg9pJZ0ls}bFe%EIYJ}tiAi?=&SBI0JP(4sll2~gG^3bb=a%JN% z#{jVQARKh32<<-IUe(f%&E(qa`A1~koNYGR^<*^T0aA= zXE2UvlE}lL*>?PPgd@^gzXb;+6E^KBM3I(Dbij*XB3a4H1Kk4%S-a#Ui_0Oq;oiB2 z3kLP?*tVO$gNug$Ag=HZJ998S5zc>;e)a_7+Z%W~{2!0QnM0$TEe{ySS9Md*p}Y$) zH9%{Ys51!yshGwj)2XZvB~QblFZH+y{vv7yeNjCzKx1T*;~Fe4wm4+|5kj;Rn%XFN zGSEEOFv&ir=WG|6ZxNcjMzL28O->^dnu zhDU0@g71yXU-SIzqt)k%{g$p8$^Yew4sxPVwY1}?$*z>e6W>e{wPrzw%rwZnNYMNf zfDb?;GwQK7ue5JE*Ut+esuxWkTyvW$r%a>2Iju7NjCTH2!2NppID+Bwu6{Det+5@? za1GPDHUansyN5oFz229W8SoURoA=gYS95;WlC~4-j}16XGCpa%g& zFt+rRWK6zkG;MLA)_j+4>L@^iCC?Yb>+tT=@%nq_waY9lMi)JkXC%2#bRPRaZX*liB{~D_?qumL20V4+?vyXE#X4##GLxo(0yqDW zbB7otCszXpJ_h3O_4yH)MJLAjgfulNzX2i;k7w|aEMO82Cfi&27q@oDv@K~Zcy(i& zQ=EHp{8U_{P%rgbw&N>5;^WA}q2%#r{?C<8t4ofs9lHIzogdX%1ZLkKA4?;#Y=U=w z5I@0PXTeCFHz9^t!S6(h#OGhz<~+3Y&(miqaio3Rh#N7{qPC5X&qt`ex9w~F^`I@H z7FRC%{#TB}lA4+2q7z0iYN%%9^^21@_65{$+Pe=O>Rhgp`sF z+j!)T>W$Lra$ysN-6w-XNVW$JR+V$gVurzTerjRG84{l37K&tTe6QVC&j`Pe3-=Q6 z6|DQ4P(IXOR{r+El5ZF0gk^PqN^=X4yC9B8vrimWa7u2sEbeS-o64WnU_=w{iV8qU zsqw~1?QbBxmxs=QoM`@72}j#b zRdHIm`C;2v$xf7VeACYi83EvPkFCO!&y;gw-ztZAYNvxrynUTqMO!c~AF}(dW2)LM z%1JGgP&~vE9QjsBxz}x{*a)ld3J%_MebsezA1A`Kxfsf?wxrng#SPT>Jj%Vt2)Luy z_FLa-g>`u1Ze-Z4Z`31XrDU8P#}1u?^I2Qm2(7WQXrBl1@$oMPnBnjAhiR$%;}h0K zd>Bnwo&OtQkduGQ)!by+##gIu2l90%IPo8Z%beTx2&+8vD!OI>8H&f zmY-mRe8ddl)(YvF0V?~c=83lBA_jsy9;PFh^XJNKeL56QVAHOE5=Pnmt|rdzf;pv` zYJ7A^c-=ZR)VxHR^4V;)Rqc>z>4=aZ+*+h2{J1)uGhmu9E^ec{$c;2t`w6(19#I$k zgI8bjJ~j>#VyNUma>?q&bwJ8OGiP7FM$;~LAN8?1EX_2eO!&@sG-4aP6`v$4C~LkifKN;onJGqO>`c^jFM z=;0(J1?an(S5O>_44K|YK%ry+C!WFCJ-YzW>64(1^eAaYQE_>BGi^6KWOG4GaRcVB z+C8jdoR**bt#AY3*<77zXWG#6W8%eDsFH5S2I;P^2<}qI5-Wl&5Os-@ z9>33+{4`Tm#i>=i%Ppm0+Yi!M-8!x@WzB)d%ISlk`OGZQw{K2bV4P4`h(o-RWF{O} z3?~>IvMK5sw+ON*Ard+1hm@b-b?yBdwmb>8>(qG zpZj+~12p{@%8}z;==}oUhDcZeR@q_z(F>Ntb!RFJULS;}hZ-Py3{Y(PIg}dcd_a>f zKF=?Z6Ko14ta33}81Nc2{mif?=9e%FjP?tofP3Ajk|mW;?|Y~MzKacG6(HIKBMa*rXOZ~2Jn0bXb>kEk?6xYWP-5Z7&g@AQ( zC~5YK9XX8hgY(MOK?z-^-yw}v{GM7<;WR>C)|K%sVf!yv9uhXR+r6wWFdMNb-Avuq z7sizZG$?!KnyyD(1B{4Ec<6?rUC)D3KL|0#`Zc z;M&Zkgp~Ulau}O5IMc#WrJ!z1WgG~}Su?8spf4uT52dM7AysQU6Y4(@DD&U!%c#Xv zkd+3_ST+`2`Q^!yeZmZ2>3QWekPAOYtoWVVSzw{1_ux$#uIL~)qQ zjCi@=g8`v#YrtiFIcF&ut0-v@zGGYHomh$Ia9hbGoFL5|+Lcc~SS2ch{41VRZuh>G zbG&2M{cf9s%vI&)hcQ}=c#PK(e#e{LNiIXe1(-&llo8yIPZCL1b~WVHXq1Tt5RBI) z=U3pJ^^0h0hZjd`1a5J{@*TJmRPJ5)E+E8VjRbAgTvg(lRwK0 z=}tlXc{NSF2_9J^X`BCTNSKLu^dtHie+eI4=~kAyzUGdT+p{CSs3`sIvWAwIXqfn)!~-D2B`@sCPA7|vU( z2jHu8oT(oMR^83o5K}C0uUhW2>7~=s2NThw%Xt@Fkv!`CzbDT8`_JzS#+CF!c@+{G zoUG7iNB!>ftP}zLk+M{JDaTm{;(-+!98FdL? zwEsH=8kJVX-Yl>ay2PA=yxA04McR%f=gZuC#KlXRgb+pHw)gHVJ?_7SR{I0XEw`wv zJ)ICdTMd9O6%}iEQ`4Mh+gAU&>gtJ}EzYkjxsM-zkMm=r@_a~iVSD?u`BKg%jF;s5 zKGg@_5g^Asd1w+ax~TxBh!w<6ly+85R!8IeqeInfQsOMB10l=PaB+bDVs^&iRX#3e zINl#Cttn46oBUg;P)5v3;?i^qXCt2@>`4nZ!v0z8J|Cp zHnG8Lt4vHZl%@ZpyEijpKQh0Aaj$+Y;>v*89khdDCC4m6_&n7wI4Girw?v#PiOE&G z!1>GVK)#t%kDbfIRo@9WQKc!9wu!896IC$IQ?`898RAwo)mnj%)DzbAP>lBHvlw9| zY_)X^pCTnMQUu2UNxpi)X+G|&1Hs)7=hfibk)J_;LCaFmXO?_GZn6H{?ydJMjdkC~ zjD7M6X0be*uj9z6j@g6cdME_G${!`HIpv+e&$zMUqfFY+X zixb#;4~>dHI`Vcbto(W?B1Y+9;cM4p7m4Fn*(Rin5~KXj+IRVRJTp=Hz)mva>Oi(* zhithKK-M(s@Ux6|9-%eK@OQ@W-zjJ%J(4yb+sl)xpS7~fN__Rwl8(tFzTQGq3bJHI zJVdqUXujdTb9ffs3&4f5Wr^7LMpKShpEyt4;3e}R64GBxg0XkOQ7$eVAya+Gk#gJ~ zJyCHM^4CM(3h`|yVYfQGSPf2euzbBM-RE7^# z;Pcefy)bTp-06xC_vU`Xl@x(Dq;+hyFpmcyd31IQ?uz+g?#e~2Jg|sN7S@C8pbO< zSuCe)>S>hUH!5B!7ZB=!hHb&ZMAN@C-0UlpBbO|FP<*_Mx*Ry%qNiQeGp}Ms9v$`EEI&+)3pArxP77o&eVD(l znWwJXo|>tPcVdbGvz{;?Z_4jl$VLsV90!Xvgn4Z8b!z!91EdZW!K9>l{U>le(Z&W1 zi!CULajLmnaYV7zxg{)EDkjHj5^n(%2c*D@w1^6VKW=S&^c+y=$;OFa>nN#J%y_J( zMvaoblY`#Ti{}HR#bU-GWZNA*{oZVQJxa1y7>*|U7~Lh*Bd<+@;2o3CaT|tsksi5$ zszWv{rf2LTVYB19l3u9f_%oAZ+E89#{uWOJcfcMOfb49FUB5+!Ux(0IsH`P4R&x*K z983+J6=)Q$i_>vl{kZnA=(qO;?PKRCe`H#X;&Q(L0!6e}52K_UOrK8FdRvssggvY)A-zZtC({87WtNu|#h?=lJgx zBzv*R+j6{r3N6qiLS%T3THM6#wZNlQ$DTa)$Rl{>KQLW53gMkn|G0ShmgTVpM=z@@ z>f#1g`>esWZQWPKCa-`GFQ&2HX|}G%2xk%6N(7$=P}}5$*Is@~JPUdu)&d~2VBaYLdg_VN(gt3^E~;;gHf9UV9sft1 zcaFFj17wUscWTHeN-EKl3uj6HB~kLJpo;*M$S)uHDtUa(WY^ss^Oj>9eL9f-6x+z& zlxWn@{JDeGaOTlJz3=>!IMu)H z)_F0q(__)k^YFixig3ZQfSzHTbv}7Vch?UXAq68``Aj(~qn%I@_UYTkZwQKlaVra* z8YDLY)%ZZ{RL4O`p@9x}+Z0)nlrozbF=?lqut*O{HIR{dSvV?ID4c8u!59tXIeMMd zL(Ij%2!ych2GK+hMaZCE7Mi$^LJU2`hk=d^E$tDIw|FfJ&8T?^Kx)7RYxR^Zriiyb zhL#?9Ixpi10I@-T0Ot?m-1Po#8k_?LIRJ1=&f}Hc_)I-9N@E=S;R^M)_}CsV4mf{Q zbVp8Tj>qxer2d@A)7|7niA|UEM{F{b2L1LbWu3=0S6S!iNyoQ>6vv;gy%U`EToaJz ziu0Jgc5N+gnVL{4!>@#?=P~LJ^52f54(DbGYuKbR4RyCT%tMn}5|hzzeRY)4^hpeu zk7k&?SSgl~>J@k+0aR7%^yj${Gj@{0sIkPop8){Bc|NiN04p)bQ{Tet-8Pbe%m&~k z9RO4rBzX}u8qZV%0Wio|0n}X3)4q?piwIwi#4IRhD;KwUO#=2B@UtnX&Ah&n3VZ#! z=gkQKRnrfvsm8^Y5@v9OId2_7H`%s3FsX^4T*Ouej^-?x#U*o}{?U>^b~F8F!y$PS zPCf>mFMy$ols zXC}LzctOvQ7d;CG>m~r3px2aqz$d?~^BurAUdGS>&U$h#47Am#Nt#D4diLBkh`WMK zJTq_PCw1#-AV5#%MoZ0LxJHfpJW|v}0jo6h4Kiw*0>7=59Hq%}YL>ew$gO&M9-QQ@ z*b%wMXNi2Pz?Rs%_=TMGSO3fDRP`piqc{TJ?A=_I!;VhV{dE^tn-SWTQCdOJ+<84A6eb ztqk%QfKRq((-e)4LAOTA*-Q#FMj%?BmHaW_IabHjL z@&IY|&;f+{lZ+6Qza>C39jL`Ezio@&h#>xp%TRw_A~vo3?_DbP?(MTmsmd;``rV`5 zKc_HXTC9EPd2(#S>6e?1iUuC8TER25_g=JMc-wz3)v@9i`C=)2`LHcNH^;QJ8z zO;05CcHQdm%7&v0*S^*t8Tf-@CCL~a6}HSow=COtR~;R4Ew+7STg0h@Y2i%1M^Nrz z&moSD>vu>e*S;&(Q7o(AZ-kZ}^o(npE|}5la3rR|<^7f!%__oL)4hwjM~_bFifR0r zU43f1_%6D+4UaEUqIin~E*@v_ovdmIwDQicn4S6tzDAhM?K4aXTRM%y-Nrtus2%|u+IB3iWahil*U2d)fUzU*y`(n^bJw6`9p)Ny*K2E0#Q8>jOUO{ z!_u}~6CN_LsI#}U)&-SGYV)plPS%yz91L<<);6yJ2&UlDZcrROQnO`rhr!R<>Un!+ zC3Ne-*^P8-R1{HWjqvijPE=WRb#^v4c9jY;)hT8CTtG*(Y~_a@v^8LD>X2|_w9_&7 zGqTU?YX#mk8Pn?FR|-vsYyQ9qbp{$jy4~Nuu-=1;-)%MJRD=OeYoN;U9~->t7eo8WVSf6x0V)DYXRKSs55N`1T`-v?P?8{qX^DCS=~-D#|H;{*hCf0Sz(O)u4|k5t6cc$ z9apy8T#IT;&5nOXezWN4v*j+Ndkj@Ai~)y;S93e2>Z7t6(%5mZ$IyZKxrvj?V%Al8 zsfAjP729;7xSD_l*%W8tON9eBAPDO>+fb-LJSn}#MTfmz{y+t@rlOrhooPL*G?4Gd zx%u2hiMxC>`I(2k%^NwQjZY&ihe1KE7Ka190gy3#U(s%Ro{$@1geghvT~jKvW{hvy ztPuMVdFO{sT`eK7Js(lY1Y}+yGh-OGax=cbWYg^Hlh8sf`Ee_q zaW_R!G#oOIXjC-Ase1W#Ei*YVduzEQ5?nW&?-`h^M3X|~K0o10FtEA8=9 zW?L(YU66d0*`NlutYHw&5)%Rf`#Hb$4Z+6I+?7H-$k*EvQ(?mVQ55Gi?ha0Yocf!*Dj5u(T%>wwfzPya#Fx0DKGI9UD>4sq*i-@_0+$a}?-R zuHZEwEo9hOxHryw0XSQJ)`A$yfN<$g@NBq=94F&sz{B>%DENB?7GpvN@eca=1moQ2 zfh+i+=*9d&0c0K8%Ct40{39VTf-$ha&*;Vb}(&Khmzrf zDt%H=oT(E^t(Fm3Jmi2?$AGO`p`;fexMfahcA{5w|Kz(62idq$t|!~MPMY0DaiCQX z5OC~7wAaZOahqZvguhq-b{kX~qw)dI81AnSp$8$jXq1d(c%i4Q1|;=0rDflPDVm&< zNf{jp(Qf$T%}?4Bdk{|6Ry=h!UJ?oeNVOMp`D5MQuNM~Dct`enrfgW@6i0LzNe>Pl zbhLV_D8J({#oVE;nufmK+3TKPb+S!l>T^O?>JmeUcYVZp$gZ??mDH+bVX&u6DJ;aa zzKO-W_lR)@F5+z!tJeA{luTIHx_udB)_1pJkUOZtZf#ptk{e$fZAk zLL#e^B-gIS1jwNlNfe7qS?(W2MF3mMfc-X?6kokAwqs^r6;YKfM`CmVEWGfL$)(|(o)&+<*g3} z;GnGXIb+~;WVGx~O~UYw|CeLU+_?vR7$!YvlB|C^?Ly@dsr$hcx%{aFo;03~Q1`eo zf(QlhpSvVB@3FF_8j*PpbI2x_UHtG*p(#DD-{za5BC2DQ>)SD~-QJ_ZZ#h>2jYTcb_c>%TtSc=gM^9~QVZEh(mN%lM^eMK&EVuH7}1BB0dV_Y7r0>n_&X ztbUS<*tkv6*FSIsgZ^>3@G|#C0Q|D&$7>182Mn)QF*L>0KiDB#jY*7yFLVZu%OF0> zdZsrhS;b;4CCuCIJ6r+xir8bMFj$~jPVdNWD}3iP;47Y=r8M4@8K#3kWS}f~UXRR( zu+Y+y^(u3}If6)kl)Y8mZ>OyDJ_EDuvU@VrZzP=#_Z_XokeF9Z_hlFW!_d4? z-DdaPDO$30)|^X!$)cn2y#_|b&^4>B$tSV>otY^<8FJOAa`6$iQ%Q{lE(XwZh~?9+nd?n|6Xi#R_tHa zlxDNFW6Rkb4c=C+y_6^(^p>4@e|Sy$)twQH`A41ulm_nAb8qkAvtBg?W&E-dn*QsD zuY&-@!vGsaaMwj+P1tXe_IgLay?_2BxS_zq#XsFwXj+zEOu9GFN1Pj1{h7nj2V`Gr zSE0qrdPfseb@(`*c#yI=R?R*G31@}Y##3QlC{rIo_ce# z$JT?)UPNL0RQn`mwGI-%{FgkY`6;xSqMe~w|gqKb%!f;+U@ex=_gp`;jC2G8h zj-o5BT}>``MfL$#xN%Bz=@oPN4W2@0UKn9I44N2Oo0>ki&#KkZ@Eyk6zOZ~Xs#?u2 zU7#*T67kbXXOkVh3O|x)-LGYb?&go{`dTfRTmdbc7(h+IY8uJa@XS*$cB@FQX_zk;b;70I_WSA zMbE1CJjb!eQ4nDM(QrQOL(jOQP3YD{6BxT2VnHIR#jYWI=VliIO=Qjf_m+Okky)1XADuAW$5cf zG$0V|^&D=EU9whQj`X113Kkc+#o+ryCkC{qbtYw76!-kHKYeij^rhvU?VE1Iq`X-k zNx$q|Xe<}2jge!4ZP?M_?#9o;#t~o9U2M@XE=o?^QbF3?ac_4=>~3lKmRONd^lRVP z-0B)pa7fhN-P~EQ^7DsC8zT#-Lgn5-xbt`ETAPVfb+&k6 zhe$9}TeuIj$vVid1Q((uJ~Hr^k_TQ=!VfnZvK#DZ>YHT0L-#7Y7w-g)3ZM`E^CHgv zI6hqWO*@9et`d!(2cMx86;ekHVQ*GB0|TP0lw9$0BcDKI(3k8DIWOX)J%{e6 zQX?_n@XxLFlXp3@w<15>dUnOZ-Va+-D6GvwGUlrxC7XQk`jO7f&$g0G#e*QMcnOB{*9SWQ zgbhGx_Du2Fq>IbOLhqqLC#rz@>m~t5(Es()WxMx3NQ~Z74UjZr>DDL%cvN3X+WVQv zGLkB=grPx{@>La$3@^`8L9phv`efI=TLvZ#WSw+y&6sd~aTgdIB>x{rXC4<*`~UxQ zW}j)At!dh(nrWT(X`w|pGi}pCQ>GNflvD^)LiSnOB~vO%GA%L)A%wVRDk@7RSwffy zamSrK?(ROn^ZordkMq|#kL#LidB0xo=SwdOm)O}ui_QLC8ng2fZlDe09}?F#Fn9Xg zTnH6aj0w1hE!s7*OJDT(MNuOKVWv!Gt3Z?X=GCHc8j$3~Gf!9|lh=YT}D zG;Sh4dg3xE54a}+XgWbyOKkGB2kT=~jLVZSnqX?XEG!eVd^SN*wey_U#FIVuT(4Yk zjJxaT&GJeYVt44UcJZlOH>9?zUySS{Jgp`?wU2&r(Z*!rMRS&~S+RD0zZ~`v-@l{v ze#~2Dx;HcjlOcrh+(`Cs>1+Zrto@C>lxnNv!0ywGT9mUA=?i~=IzMAVu@Dy|8o|VI z!usguleU^No}RFlm!4Tz zQ}|C*frFpG9adT-*kq-`uwS~MTS}Y$NaM;3Dfx*||DRmx$0_=1cY8tEyI0?Ce8<6!0~%DwBZgz=PF?}-$f^GNS@1$%DM4sphl%^Cgasphp= z+X~+gtgvAx*gQXy`ut|{!u6+{eK@Ya-H+^(acb9A-{Y-N7sLPsNwuLFtd$~e!F;9M zWunwygMi@n8vF^KUp~JdkQQ za?NIp424W?x}*r?7UpTNMFq22Ca*w;nyfn;QXR|Tzj3nt?a8vZrrpmRo5zoDf37gE zMJz}th!fvSq!h$a&m|pV{?R%gV>8n&63Pr4z$(}^=jD#Q&N5l(dv|^ys zQ$~|Wi3taAI2mySz?S#NB*gMEx%^l0p-%wu4S=UYyHDr4Ls2RP0C#qipF0pYt-IW? zZrclosF~`qw6P#-)N*ct6=(SFYJ}xqp__foRutcjedl3A!^yFWS(R1jRY6Wr=Rdku1-pWK;Sutr!Av%DZF0sUsvs;`?d z`wra-*B7qw%Xh5>{gq$mv(Q<(!X))4ekXc~TH#xa@}Gt$IWjf@B?1aUYBBz`vil#T zZypQBm-{zq?G2_phTDN*opjW+?t!1-yekJ@bNBCGdFoP}xbv-IksihA-^iJOcYBuw zf7XUxUPl7b)DlIqwxIna=tcOzrp5rRo)fHDPtRtqA2{g0Ez;Zm;(Ft^lx?8jeiW`Tuxh!xDLgsz6DcUl z$i}mzbq`$;EByi@HpiZ^l{H*@(79#m!_nRAVv}B#y?aqVo40Q%F)yDrlYZ&n&0k+N zt~@yKA!?H5n-DC>fWN1|Rqm63bXFl)6CfKWw`>R5ng0$nWJ@XhAeN!IJLX*{yQiS6 z&>*>Uq!C3@+0Jd7*1m1U*LY_z+z_Z$g%*NKHlR(~Dw2iErp%~qKI(`v)6m&>lxOGP z3u~P7syMLoK}R$+8+9EJN`p#qrd+@3DY0CEi)V}PJp=gprxR@bq&n4F1)j!;O3obTPJ-| zL(f|OwQKNb(4N7 zBb9F)DxG6bRK*|%YzkXm6Wx`9Cb+Ig(S(u(fnWPAEJYe{mbQDRzRzw>;4wMgxhgxK zV40-Sq8SpY7VQ)E1GsDsrvHK>JN8ac|SMbAftb)*#HH}T+G zsl`rh2}cNd?sdF6^Lj5w9LNi%hU{eDc5jx9g_wAXAnDS?0;wixW4D>(fuq&&Pcm$Z zxrm1CU;VpL`Sm#i=9fNbddMAP@r9<>tA{epx($~mWp5eP_Iv%{njC=%&y8W0+l74? zhSk@AV!G zrj;0)zpfHWy_I!w;F8*O;rCnl z&HFx&aPo%5S@av5z&9wdZ1&=xzmk97slMa8ySmTbesm1R)pZUL?VXB^d0=6Pw#Xz2 z8tZjz&YRpj?=!Av(8FQRxdD^V-sg{&CEN=^BzxZRH_7yWlJxEHskeuA-=_jLCHCir z3e`;koRi-->_<21p?{22)-X`0gq1^oEOt zytXlvLoJVa6+qZf(vfpnXuCg>whX@@<72f!hUsF3Z-N%HD=*l(T~c5brKK*(?6LnU z%_rrFkycze8}UU(j*^k)z@uiR1c{j{)N$|R>zc1p_J_T0|Na?Tdj7%rCn+~c2X!D* zogzm}=;_;Z5y)99ajm;4dE4RXV8=jDuVo1zjD&jmkMed9olxMMlm_6%j2lziFy5P; z7Of3c)ipGxBeu2Yd((M`pE23Kt9ARDS;kIpI(X#Rf2|kR*=oS_CTZfSp4{`zmgf2Q zWarcLu6;`==V8H(m(XMVQVI4-OAPAuDAOiDZi7M?4+;ytPsWtS5?_<4=DoHbA>5&) zZ`2~wUW+^k_XLi;U(Vtiq0SY#_iGt1I}1>?(}02L&D<|?!ktW1`=d|ATat61F4gOZ zPyK>03}uk_Z6TwrBYTB~^bOoIrduQ(YdowBGMZHb29zwsYCOo+njAm$;_l+tMo0hr zoV7#H%UFm!Ww4LCHh=m*R?M|4E1P$}8Tii(OTEcD<`jK}%uhq8PuQuBKTuj)Oak#d zsXa0=?vCihdX|1nYyMM@(fK!S-^xsMyg^6%yQU`2fWa`}7Rx8vBO5kU5eR!yR335GX+fAzcUeB?B1PsdfG1)2k;Z_qzEuC2P{Q9hspW zg+)HO>$4Wk-?*)}Mzl0M?u1B%WEVpy52b75=Pv4HLd1pr+jZK)4H-NZMuS zrtM&9e30L3`W~r_A@MA6pVS(tbRq6D;*#(@R<9|S7hYqAjU3^d*JQp%R!Q)zwwqv9 z;w4-r6D%V|HUCLPIJYzwgRsL@BK#S@p~Dz05(7w}z`0?jz(d2JFH=ucxv8WWSHR6q z^Qmk7o%wb9L_6&`i`Fc7BRJ5Mj^enmD^+4c89nv z&E!6!9@Xdh1s6u)tG9RZ&ATuz-$r6tsF>e*X~w5oze4zprul|V048}_ZaFpb+gZgb zP15`%5354;!W0A1XgB>*=S{c8+T5gB$kIeD#5fZ@6}D3;>n96q0Yh*NDxt0@ zNq%gL950(tWO(SeSk=)G%= z*8c@KF?}YYWB-dJ-MV<~MI^9?R$Ghe_pT_3P6TnZsFSzvud@2#^B3#<*Yd3!ZWaca zj^UehXpd2ly!9@ED3o@vt||$lD5SBhxQ8W>mB!X1GAoNII#vup&|+h}SJ+k+4Gqp` z#-IgvC;|;yQ;eQvg3hpbsnO6AHUGdzWP1uH7cWDlau3&Cvi-^fG$cekHWFnEY$xZf4e@`l+aoc;;H- zs5#XA_ld*}x`rNfpXs>OV#_{DR`<=-PZng3wsK$C*Yf);*7AQe$E_BN%kdGWL{#COf-b%lKbZN4@)bG_lrJ&~3ru(E^JN6Y^o+AI(63J4M4~iEQ6j;QZ{Y)Aa8OxW$*XEDJ6zsd``!~Ezz*^* zeD?Kl3ZW5~`Np>7z63o=UA79}p`|%6{~Ng#^&e!`@n+H6m%TB!E$#XOCW2b08y<0y z!!Nq+CG}HaoK+O|WE=ClSw0b$Z_qvp)sOM2mXh1GEP=LpNbZ;`w<_TEO+0b!uR9;u z^RF2n=9IjMgByo9G;&P0`^RbJwP3JFaNmI#beo|$K6vqJ&wLBqi z-xVo;4yT96)xP2&C=h@gFM0nceDLA(Uzvvn{b-UCRI84iD1wUBgFxQk$nG7s`<32F`Fn~(E2b{5Yd^4&i3oe;eY;gZ+w zsBv(a;(KIb^6 z1rQho{9h4o26Uc@rBUR}yFiI%FOdZ%*=d$sDzEdC0%BMJGyq)zEG*Lo-{RJUpt>MJ zJGu-Pk8F{OTBLxF)L^y)=(?-1;UK;3)jmM6*N7HI_0O3#Few1=3F;4H%Y8rIJ)#aJ zR{{ie;m7J`av;30B|aD}G`%aD9c^e4*4LXA8K-vtZ}h3ti*E1SF}GZ%kgfb$z0{)6 zA@k}OWY-eplW$R3MC|Xg=5%A|a5F8!rJe60aPpW&cocVAeFy)<1=+PMnVpYtujN~s zvWO3P?$gzCCV31d!Y(Sv3Fe@gY8jekwCtdOF7RE~0sNGd3F9}~il5(vzgbF1lFB15d{QopER?B6Aaab%&956NP|W(z z$Ph^nsr4Eg(f{Y}a(XN^CvB-_+KHNa4sGx|qcDdfE(9Xb>G&#$5-j(!tFK%Cps`D| zD4ZQQ_%yqW2CN@z_32qK7df1r)Nf{Ok)(BLOxnIPeLL<0KhGgZfa;myVSISIUYK}v zB0Q$LVK^AOljj^5G^fSdxux4q(Crvx{a1PASt%m`RTK^HgRi?tfS30*o z$TBb};GxXBOF>WK2kcCm=htmJ(&Nnn1HSA00~M&rHrwLay@+~X{k@ak!9jMJQUs*I zMTKVWqf_dllV|RCVHWLs1T#FbR zOon&;?I!MCnD*|AtaLxAm*;o%D*q^@-Q}%SOWQ+;G7ip~febprsjYkW7rEmU-%(}Z z)C-<&UFj?_b?TSd>iO24me`$|#iQNqyF7;7acWEVT(z8qlbejo-AB*NsqHSw)GtpO zLVlOJwe;9cYvD)_8kTS$_>Y5 zPHoWdZk;_#Mjv;@PUr}@G-MjWq91}MKg_3sP_t8IQ2ORy2keRRkLSqC?bc8|4gd|k?R zsbIqg^YPNwpxcBsq&02*0J&YZ&K0zb3UZdZbBeniBnWG|)=JQCEkY7bbpKl|w-(7{ zkMgMx0po|V<_`Hwr+2o%+RNYS^qIo zdWVuGH}29fGvx+wJ`oYL$mA2gYm5V>4&OC|`c}J0Sf|&}P4nq(8p1^2NO z-O?f@PeXWd#w@-TVg}1?J7xI%5IpBWb#mxE4y}Hqy57)D zZ`W_E)jsfP4Q!CXtsVAIP~gbZOQB&4e>T5%K9l}fW}JrI9=P(GXD~nZv){E*USVHT zcsIMX+gd4iHbpqq^NhapXs$fVULFf(b_XKul<-4@JCWbNYuMUPelg+B`_^u&5%_hx znNc!&uFTI&X7~_wt@zFjv9xKs)Ii8HlxXrLQiom*0VOlY1K=&dVHm*DQITiA8?*sf zi3WZcMdmHq3SiZM(LnCC&O1wpGE8efy0(DG0bO0BcvqfxJ;M8kjKBvFS+lp+e=tfO z@oE_TiYkN9dtPqb7VS|qu_kM3O$Su+Om0Cb&G`|EfBw=je01+e(B3sDU_`M%=$#uS zU$~-s%3uBke3V|eh(|-MIe7|dcS)ItGMoY^?J|2)EmkaJND(?7pL4gD*3YxngHAq1 z&aQmpc|Ak$GFdHm3N+eSgK(<#U^6RTGh=9WS}abFO@jytQWSA&NlU++$cwsWu$QPs z6Sb0PjbtH^_AoLh2!rMLmib6U6mi0({^g^Qo1b`M0=E*xQiBP|0``J&WVTfZ?*_S# zC=oGoK3yNL?HYK$=wf|d^o#8WlJ|NSc%vwHBFLRGTRSa$%wT=rRA@YDtJgZ^ZIQoA zXRD4#L*&*vt;w*K*(ozf*Y30KW{*G}(kx;W&w3}+-MtrCB}Gk04U#gh7e~6ht1GqJ znwDk1<<5KKf@2tm%OXL+&Irw(mj+4GcEU@zgvDD?laQH@B0vn8)ywTC03xTyNesnL zEIC^kpA2PA3%F}K)=U1&(IjT#+o6((AZFGB(?Iqo^z`h6)lFlpjZe4iiDKb+&zAY! z3%4&d{NJI06DgStREo=Zg;iRx4=PAlJ!|`&XVlMIi0x+iDRw@UTL&(&R?D5o9XJre zk|Hq9)tveE2Wu*$Qnb`hGP_Jf8eL4Bik+zMU*Z>npN19eJ4*r!u8XBgZGWfe8fJ@s z&A(}vpX`lZQ4t+~C2_h1H+tvb8(eqh){B4D5+*g~;~-iLQQ&KNKV;njjD&7JvWZHl zk?woy1rN^7y_)x@Z)JSfC!v^x(O>wMWt;~M4*x-3SicsZZ_@j=KBtiLr-K*`u@1RQ zg(ZJ~VOyV&Uf^+BzFDI9t$}N7#?^PEhMEWa|EDeS4|1~OG4=d8BYfwFozC@I8j8PR z!IJcYd~0SZZA4n{ptUlc<+Dw@89J?^)HG}~o*De^zcyL%sD{yidfu!NTrGR=pt+-hc16%nY1As#y445e+`_ zEnyGruOB!R@pel<3i-nw+qCWdw@EGgo^iCyCFr#X=Pr(QT9})!h@c92+eV{3-u% z`c@W)LPhX#7fwBTzqj%E{FCKd97^QpG{T0gMA(k(EocS6T*Ei3m*t*!l?(pqMA zEb`#XbF1DBT1My|KE3w-|7w=YuWY#&%vmxLwYmS}6@*NoY z93#9sOcL@PZq8JI@oob-LUG&sejqNum<^(9Gd)PFXX_@K%^h+D z7>WVC|NihSi{qrBgi^+i(H3(X(MD;VjRn?G6G4^ff2K}2s`8x`_@>)=Kl#M!?F4F= z`g}(4uBgC@QvvT1V#ssPO;!l4IZbN4fH3Y~Gu9y|jPuLxrqtBd5I4b`W zjwKut|4j63iu*|FJ|3IVutP9f^L*Dx-Y6ma1deaS8ca?}-n>-p`R(%K&Aywgm+JRo z>>vBRRoK*13K%PPjun_B2gK0w4eO-6heST*$V$<~w1O1yGOuT!@6Y0%{R?MV$H=BX zHTG5RC~B~e+k(#2LWEx@#*K{C#0K^A83x%k)LYs1E$2f0@ zJ}RPb0z@tEG~B{RfQ5GnhX7|VIIut8HgFSWbwa*RJk`nzjE0+7$yCC1?N8S+2c{V1i`f!Z=>n%Rg3n5zN}>KmE2 zl?tc&FF30-@57^2x|m!sy)X0*DHg0Ko{H zoh=CwtzYx+Z@D=Dl6GeDd21SPF3YunOGn+6b(G?Pad+e6!ka8ju8hk!5>11Id;Hxk@+%!uDapL}kgJwUlAbON*;cgcws5P;P@fP*K1Ep(Po!gaz@v)wLd zYECF(t87SWsKDza8;c28MQ+W**p+L1O*}50Q-6Bv#9}Qea3UA#Mq<% zO=kMOkKTUUXobhramvRvXNIZM5Rp4?rg&ZqDuy7UmkXtn-cY=p?Re^?Sda^np$ngBXSknYOBKEZ1_HAn) z{y5`XxnMbI=dN_w^XWL>KNmeXM@}LbYF6x(5(xF=mz14$8@&LQ7M_Uq0+}kfB2lQ{5vt)qzicU<;K<;3 z4CSw39X5}f)_B)faBp{XJ_|5aMDK3DcXRGKK6K(sEG-6~S!-)Ka&of#-*e_}C$q^*p`)I7xg>nqyHe7P3DwC%K2Q|q zem11_nW1KjDhy)BUo-M+3+F1O!*sRSaI0iC+B!m3KN825 z?SdCLTjP89$e%y-AHQDVE&YRn;Dp0q5m*(8oAix)x^FA zXd`+-G$AM&Q>Mp7q#)q{i^_Z$-epWW!^o5wk=O!nz`k(i_z&Uq?m;#vMKH?j2M`OV`TMG>}b(DJC@L^#6M@yA9t*ZV2^m< z_&#q{^mb&`=XDvA9v?oRf(BjuTthIiEWl0-9CH9W-5)cB+LxuLq#EqY#gi-alXI1y zUXsuoRE8`WEeb$H0UL0h_%=PcjD?hpnIYJ;6~efk2y9xrBVj-0rqtq|ic{D9r60_0 ze_PMl-}i@2mWunUlnpiQ_&7bfjr(JP#}Ae68+6v{&Ct<8X+su(x8Y&GI}-?N>3KYH z)%f&Jh}JM><_h9dA<|kEhQX%%k&-cE6u5GHhJ|lcVMGv8B*tI*8_sZP>JTGm=K>fd zfXQQB(?i&`7-S*zAB=wt7>dcMba4EXK^vGEjYdWQvt9u5^Qqy<`?IrY|J#|qYm1ol zRS#Na1zNR%-3m6$JftGP-ca&R)#M7+WULfDdIl9LHL=&ApXq7tG6)7>cxleMstD8&e~qd2a$NZ? zrq|K&TUys{_hEUgZ|R1=>IhCN^)nJALcR*Yg$Q+H3_m@~iU`o8X95ctCdxJ9H8nfM zpCa^VI|vo4nRHN^-1K@RZ-V%&S}(xejn(R-SNvhosv7CbCQ-erplI>biYSZU?>tPR zoy$^~y`Ft-sb9Bb^?j8R3h$!zU%q|56H4{*4q6h%^A7Xggf7&h16fwvR0M!c-U5&} z$kO7+3?R0NNJdAnBlhXZ(~Hd(kM&rk&|n}OZmb-mrC2rYV}i6$B`HE{E&}P#(#&%j z@hwu4T1C`D=qRVytW|!wwtl&@Z&v$>=iM+$&w23R?%8PhVsrot`R=|i<@<~S>+@Xd zj02Q>>kVu60Q-;QLKr_{tJ-rZ7PUhEX_@}zp!?(oDTEUt>{+PxW-^2A?k2|9h>;O& zb6e1uCBvtSpCQN=hW}u<%ZYXNSi?Tk78&z3+lqdHnN0Vp7Q4OPXSI+Vw$;bS+uQpn z+53cX*zattltl)3n2(w!Q6r@TG8Q#*|3JoG9~D(>nqJjhrNV4iVKT)l*6Wd}dZZY@ zc&RW|Dl#I)oPW!J55nX<15v5RR}Jy!{fhjanOiOaGvDV~I0X!z{f`S&xw;`IH|~m6 zAxUx3_OQFdrYyiY9C4Cslq8Kk^e)hjBTd~0lKENYs>$0Mkdd0<@B!xKd z*)15!IW(#wTNRtzXyYS!aXG4IXU=)xqYG+P9P@SDr?pJtkcq=E^bY;_b{0BKgS9%A<|%$Ror~i#kb#<*fN($B zBHtBHX8fd_mA`xgpe_IUIPbX~V=E4LTtukT!pu(R4E&NchVxT@o}PPGbnn5Z1o13# zoBfADcl2Y;$0!x_2cp)h$gZGqT_fE~L;2%=uqp+g#yZ>-kHJRsQ`;P8Vx1C1rOnb0)Ubl@zHl>dmFC)94Q^RA{mW+%TaJ$X84 zwQy?MAt6%aM3HTXr6(?}s<`vT`7;>0&s8?-_BPAuY31`8Dep%MI?so9@xY!Sbe0MO z1Di7dgP$7Q)pk70)FsOD*6N9j#*I7}!_cFuX(A`zap1jdH^0pB5S8kG(6dMfW(sl#}@3=xs=}6X!orpvh`d^fpWd_W z@BwPBPtkLDrjW8U_vZq$sIq*VSiJJErbP^u$AS@Cc_z6$aXNVI>|u4uGRjpcB}+_o zW>X%s;tz`n^c+e$0IxS>hkQKD@ozA#JMt#?Uckc|zZ1B(-j$}vbd%#Ylbd%1{Vyl| zK;2+Wyb3!a#h7crR3~`!5Dj}tFhU9@Mfd zZ>jgqKC8K=qF9;ISf7zsr;u4H3Py{W{g+4|x>3_pS=;mOhEd7VUz_6&7H25zheL!cW7>4akiDZae@; zt~_j%YOT-13og8a!CB^|i_DtXq?Se4&}$u=^^MeZ7EdoXCGXfhknO3RYyFXh%kk+9 z+ms<>-7bP|lR61yuY;G5H-ZcmG++Nr;rAKfB7QCP_t$cs4wF3O)E4 zs?bndWQ4Mxq_iZn(6Zk6cJl~OeQJ!T{xovx#?rV6jAAof-PN109gb)L_e27|XD&~lzi9aMl*B4jnN~K52{Me4!?;pDMuEwlsR?lKlQktK;KSFq zB&)zJM*}wJ#E$hGiclf45$FhtwTEh2`qjp$iW8HJN(@isC{;;U1ZE}duu2Oh7~=UNZp&`uQJA9;2_`S8*du(kcc)Q-~OIj0}{RNLRj_=k?Cq@iv_QxKpBs# z1#Q~7e7u7wo=>nZ)A4Joue8tBAnb!R!CrgsQi|8zW@>}ysHcBC!*|?sIK2M)QwKHa zT6GH3^MU$6`TR1z?~C03rh1|m%uFg?7))5EHQ-Q&F;+axuH(vdjAz=~Ad2>QHwjAn z+H2A=^V%Tra~a~Mn;I$PBX*?><2<3lR_4=qlM(k&$)g>cK1sAw zR(T1`JjIgN$k=mn)n&|nTr=uz=vx_TRq~W3Vf`bu@l}tn{WVB_lc@~lHOa-XX%rTK z3A3xnr-o)WhS28jY{!rt@^(N73X7}7Ih8?&F0ZOw!#7x`F5n@lX%sEK$+Pjc&-;!| zz0^UDQL*#j=&UxFbopNEx1)cRoI89U+MA;G9CCf4ZdQgmpJ+LFgR2{6=Dx2*z1sGx z7E&>)*6@y>@7o-Vy?MNZo^Sf)CZ~CVAL%H}eLe8JS*nxKDyk4|EZkQu{v2tL7J@eE zZx>j5X9;<*^eRCWZJ43<({{spF4}SMV(Q-2s~cYL6CL^67^6==lS*(;|53C2&qt1| z+?fJNk#jWJy)=eM8Eh>RQF?4P1UD&=bYGPzTq0FU5kh;_sxJ|MT!wT=lh)zA?Tq@( z;2FEm+nXuNV4d)57MQIj9z1DnR##2AVz!a_AIr@Rbk78XlsOL8|skG zAkQeDlm4ZEU<#|E6FpQPz>l1#!BvRk?cu8G!YK{HL>h4;AvpMa)LR6)_(?)ntnJyP zV{YOx;QC$pSu6yE*hcawBNBo+MT3xX6ohe|3}LFxHc9AqniZDL>0q8OIYw40*wYK#qh}>v1PlI64h9boThivP=0ANs4*ub_%47XNGJgF<x;By*McAGfes zFN5jwiEiUmWx>Kp9%+ZFhk25#uo{Gr=E5}?rv9{?P*U6&c~G?2w;CZh=Jsp!{TlG2 z=1deIkPMk5Y=IPP9Dpg;YXPQMHt(c#YmPdU$r$SonSwyH5F)3m07KU?QdlOebezjK zH*LfyV6$P8caZ7S6pAYtwW*b0MEcicwW`povVxeJ&_uh%4Yhx_9G{AdQGnCmN=lj*qn6PNYIFiVEPPM)h^CAYp=kU+yQ74LSQg{DAXgi z;z~o&)tw9=_r#Hm#1+@B-Z{N`8}BxipZJLAOoY%C!hFPMpqK_jnM-S1ywaufMRvBd zt^%#oAF<5b^)=BPs4IjmxiEB2727-Ls2ETRF)X?!KlVEtzd_n-|5}Q&Gfg4y z?<}wtYEduYixyXDMf@X?6a5}gKI|*>QL*y z%DNObBv44J|`WAmYcyEODUoy46;u3@WNO9u}}S=&q3jG12d;|2b4pgU{l{oJ1$VswT=^3l$pt#X1lKob=5e>}r5Gy3i1)Wb;pw##x99EB z-O;~thlIPE<`#bmTl-PPDT|F;a(Qjl*WwvHZkrBMM?v}MKmub9p%!68nN$D3@{I46 zx7u}CkF3b&;>lX%1_|~8h#iz}bpi<)K)6JR|MXBMo5IR2(+b*s<(KhuD-0a`uw1=g zHWvnQqBla=Y(2)mI@VJI!j~!sB+X$+Qg?L`(8EK{$d0qgYK`@9yy*~ zkPv$hz#^J$!Kvu!*f69LBq(RWrkEA!BUrcy_VitWIS#32BcmwzY)zu23OKY>`TR-H z9+o$Q5?&}I2>Xh5W|A0Yd(6X07TYR72KkeC?N2>(XpHk)vG9NrFM-fJF>-ky=%S1~ zij$gQtWaz)_KjuC5{AiRN^OGXrA0K!&Z2 z%c-9IKbR#dQn@!_t*~il(9;#LT{SFMe0(*xVQrngB?2V^;I}!J66KQMFH3dNq{4NV?3{% zuTEP{4gT_V**A57Zize0GC`Tx@ncu}P*D4YSdtF*xq(KI7flt|X9VkjAOZIIi821V zg!0V9&bKN&a4dNcaE5hT&VN$}e zsU4Oa^J6MvkU2gFA`D7MA2cl`d8Db6v!u7IV@pnQ;!hr4POw~3|t5?QMT1vx;w)-u_)8&VV)Kfa9DHh70oY1(XGRwAP_g5>YvGkYkU|K}CdYoofd7 zg#ROy$0k%&9Xc1nDwG@IAzWz~Zmuvhl!YyC&v4d(ZelO>@IlmN|3=%5lfKM>So)KZcZp4$~q4f1H-$TzVtX@I+EdX#0a9e>5zFWtQm|?6{13< zS%)=fPUy9)-&hmQ;;k%odL}ABciv+dHAmUyo`DSK;tGVgP)ZH$>*XF}++e@$)9CHFq@*++TGi;`b!s{`@a8Z;Rw7_*=eBkP3}7wS<7!nnFUYf0+s29u zoCjQObV0i4;{H3%m^T^u_5mVHw>VgG4bD8grZ>2%GRSPcW}!oQBWLm6!kb*(CI`*& z|3}fe$1~agfBd?3ZL_h>Fw6`a<3?zXjnL*ehct%@spgPFNmJ?8@xFE-gr<;6-6N!u zR!KU3H>Xt6lH~60Fp^X%9dC7O_wV}szsL5t9-nKU>-xOjuh;XnW~Q&M1~eCeo(w%H z8DB)UkC$VtI`bmfkaX4~Qc=N4>}XqbGi0pe#j1|o7H)ANZvTy#y|x5V0#x34RAid@ zkpAtx5grb}0TL=L}$c@0Pv5 zZNn!!bJ`}V(AxWTgy{IOa52`xHAaCwM~+YI0-CBI7(vTbYbuX+lyXR$C~zfJtKYw<9qO)&X>lzd&E7Z~0HAX#v^dh2 z2QFU-YY;OxCYOy%X7jRp3>rlXI#dP)uh48I`ZW2u4Y_q6SHBf`nOk{(A?f9-17IW@ zcR=;XT?2Y(%LUQ;yH#ih1uEzHjo=tjhpV|cFgek6EA03rH)!Fmh>p;m_8vhH0y~dW zf##KZu!iZtzO6ZuDdKJ(S?5m{G`@&(FEIr13{?M@VWt4Z)WQCz%jpjl`LE=Jgr|DV zvxJdZf|N>Zv>_TSBM}Whk^h@D-b6B9_KCWz`7POyN;iDNy>f_AFXQTM*Ziwn`ev{m zyAtThRlTWq!?6F*-d^&>!|-jc6`Ct3-9X+RD?s0#!k?JMGe5lz*)+!g-#Y}@i{A8` zHw@C)zOVn#QRU#f{a~(>UI80(`3yg%}c(2r-Nj zfdoyWvdCU@j0W{yLtMvp3NeNdUK>+$Ap;*KMjKHavr|*yVgjBX!WS3^NyJ1(Gz>7i zqY+`Onr)%Zz+>2T^?1tO+og@WBSUwN4(bkj<_tB))~s0mWO@683waf0+fuwXzr1tY z{&Ho#Z|B>mr?-sGjIzRI9GL6C%>r&K-RG&athy%-=ClmIbeG@e z4maU#TZoq2 z4?Fi7o}w9rtZRy$3NekHk35pEOQHYPe{pd8RPB|wcY606N^zbJ~kBVdF)nD`QSz%Jc)}!8KV7U*6s^4-QytQuwmM z=OstwDmC!$0;DLm2yV6@h%>7APvuN3zG)}WP}&mz)i3P9nV7~fJnh^yJ!e~B_&sh1 z7i0ZdfGS?XaO$&2nUbnO47v+CkR6#*OygS%7-9%5Q?=BSq6KISL%@(7lmw(2C7^sy zl*hID219ZP$id)@LZ? zWopuiMHhDLOdRIxoT~*B>^=W1x5c~6kDZVb1LxABR&>!ZWwt(yM#HSr?x7efxnFv* z-9*Vt9JO*fxY9!8IfOP9PAh=tLbjXK&Mc`{9y-L2xO(X73Ae@d7ftRzkkB0AYwtzv zF{qE_?RkuFt7&yrwp;A@ewq+yqJ-CKzpxyh0dsgEoWs}Ew_N*v3@b-bYK-4@%ee#!&Mf%ME|NNeVzQy^s46|AeuTxZQXpg`# zdkw9=s1_LB{7RlVw_v#S(3(Dp07vmdBBu>kTr64h`+MqaoP?>5sRN{aB%^m zV3YA#(dnjHA$`tqMkQRhmG)4DF`H17Efl62MkV$dCxH6tp(q23Ij){it1FJ<+8T?6 zLCm8E=|ARqq{2igfx-8RxpaT=^cGdq)gG&6{JmJ*r3cVQ)@nn6{3r%=< zq*5TW>$q}{zjaGdl|n}B!T01pLBvv*KBG;kWf83l!nfy7> zF9#Zo;lvQYBfmVhU(^fXG7XaW z1{n&#s>?N0%qjAGD!j9}ItUvD*LZ0aFs_6vp|Ji&|8EDc{dg0)FRo}a(*A9i$R#zG zp{+T$eEmmE$%TRto3<+}Z+<8+s-D{*bVB6F>WkDGWuNPC!4><@*$V||E6ZUvW(UYK z3D@sFxI_NeL*Fp{ldA~iBTabLSFWKbYkxW3jxR0z%qTTp@31d& z4Nmb8MC3w?qa;(oYeD)YB@pIsxsQ0{mACPI7HlYiTfXB6tmO*7><0QV$3=8 zk49RlPkjS%n@DP<74BX9w1H^aKWp__ks2}y8kNnlOa6_-hsq0;zqQLMTvnRj^ZkAv z&M7}YfnU|4;0jxkLKJpOwNiii zJP*2sWKX&;%RCj`HC5RXYp6=BFYoYw=1V#GbY()6`0};SUy=&Kd`G4eZ}|;xFM)>l zfu0@iB>gUr-9k;^A%D1OrH?wsi>f_BV5x3?wMT~l2QU5NStPmLvPG)L2;^al$~j)^ zT?WT+Ssz~nM zwb5jWQpy;E|5~ifukfx)VoY{DIe9Uc>~$WpP$6JG&V}3@OPoLvL=yPCH1YwYmz^O> z5;m1JMBY1VFSfT}cQgMb!sTHJV*4;jfV>C-OSDk@AF9;7p(fr zfBL&#oo4UmmFX2*68bDEHR!G!?NXarpQEGb-=03*i7l*NGx>6sxnDAC(Of6abD0GQ zDy~u9ocJGHA&9bazSh!KsvoDc68bQ}p0-jbPAo8W194?vpzRf5aaQZW%=QB{)gIjJGgWI(RuavJLL2e{(Hc_@qwAs~@~05weF zgZ$HHmWX8DaIjuoDVl*lFjj5G2n5JLAR0hw#RFx*Ala25>_rQDqup@g zMh#vk96y~FQALiddRB&BrNkIHif_1xDbsh2sbT4q{)`ZnnM!QQi9X!}wp|0=2f%E) zG}{YuZ#A-1qx;hu{is$uZLc%7f>gR>+!95h-eZ!*OtRraIttIZwPg|>H@QVAxn=L4<~CI0no@eRo6$eu_k2k+Gvo%7aPq+S=fiA&G)7GtJ1v&Pk9dZ^@@9X#aX@J zt){^{xhErVJ`O0Wyxv2bGSueqJxb!zQ7_tYrhVK2;L~i}h*$WQUH8ld2oMFN%Eh!g z&`u?pNtLiTC_9)hs|Ov}C`psQZQ?^01>{sGMo_wzFz5%Pec34FGd}P`PQ}Bi9q2fO z04RbV0bA;U5evl)M<5z{wF_N#}%|f`Y2)W7;#8YotoQ+Kl0nPYe`pkoj zua7f&#U|8Y<~?z5o6!}~V*0qz?MBp6K(dJ?B8rtB&(kMO?1LRcXglKnPVhVGda2C|gp9Mp$_? zC1@v-^6K`7t0aMivXL!jdDF!ys^V2Dl>Z#W5lE)_@L~S#y4ipq9W}m#HOcr8oC-eb za#MFSL%NFExm5_E6mUc{0!u~t$z?R82~%SqsKTTSNDOD8a1D6KROSXqgEc7sSdi#g zwDL5v-|XE=K4D@jNO071nO;hSy^bjX2OQcHzRU&ghbEd~IASBVc>Lmnn@=B)KYVP2 z3$qUG?{*nP8Y#v0iFAb1s46N*ZX@>^>}WiHeU*5J>AiBx=8!ixrZzs`C+#W{fvj0+ z`mtla(R9Qq}al|Z4 z>Vmx))|}zzd@jnM3wCNPzQQP4)e0IYmlCGi+c(0gu{F<4fFkV7 zsZxTlXrrb<)a(#R30l^PX^E1Iv@-!JOunYr%@Ou@EHXie84NVLL}2IW$z3H2r1IRwtkFWT47+SuP*f1_ztw3g}6eAZy`DuArTe|;t`&t3bSesw#T8lRN1OR zG~)j=;G>QdVB^TtB{ZPqI?5I4deX_`pQ+$S`$$6OQG??>&#&FCQ)ttDCVjWUP$6tm zi?W=BzqdvPdrJc9PzDI9aP05@-78u^l`QT;x3mJhSxGPs;pLTC4)Hp z{Oq9YlEHPrsz-62G9T@{6x`vx^b@~&hLOQL4r8+4URx7nbY0Hwb7pqThi)%E> zfEKj+s1AbuE5ZY&Ud9e5UPubzjefvnXI_YaV|*npY?&h&y_&3-KZc1|^Kk?2?eBHS zE`Vwpf=>`xcADp`#CQ4{z^-+TU!BB-Bk?&KB(9PMP;2a|j0J%*+<{MW4|{Qexv1y=oYW0mgEZ9G|QItLie0hXG80xy*B z#<$K|b@3)QKH_wamhVc&=H(2x%?As-wpIfKZLN&^H%DH=>rmfl+KkBHigxWd2uL|-qF zDh=zv64?O%w|BEM$j{wBI1W>%mzT`=8;rsBbEpqDB!b1~s60iish6=Hspf{yc+_L7OZbAfEYf~@*C_RBBN(*Anienxs1wnpMvBZH6SMax%?PcwlDmu!Xp~?>6^jf>-#q;U*(nv|d@AOQJ&I z`^6!~WYMZY|NJi@`%E~=Wc#0lLuXfAc3XO6a=@q|di&e!sp&=fH4I*Mo9NR-Xc@(p zQzB=icK?Mpq4_7i1TPdLBB*Gp?eIm2Xl9+L6a)pMiA(j1e*VrUmE(-UmSitrcdy2gC|+Y@+C{T3 z$i`k=Mm_lH1&K3F8a@e?=|kRa`#`Wsb6F9A>#V{Zv1Y(@lwxzK@i@ybXpunOS>-XG2rA3 zpq4EfSB?vKRhpU8>S?b@o@6O9cRRJ*z9;B#5dCIapIN%N_<9DABM_xIE-aybEmTEQx;t7I4D(_PI?6GtOrY zEMZDRx_h83#1{Gc@ga7wePPpY@;nO!E{D(DRbA$J!2czIuum~8!9evoWr)3e8tMPV zQ{McE#~oLy@$$ZWytjsbk#K1+abJX8DXV*!>NYOw5x8}*DWq4G{108B&6a7Nms$3u ztb_ijrj$4z0r;K74pxraIFsLzR@1R%g&pw(+%3z88#V~S zoWiHc;WqmQ0f=>SmZpB9JMS#mp+CFyBqj^JIf#Bz7vhP`h2j^JE{JV6H4=2!#}Xwv@S?XG@tc(9 zK*rOWy`FnkTiAaw3qHKhZKr%%XHV#e=GIvll2xjDPBAQQ=j zP!t!isaMwmdKZ((yec*E*HN?t0=S!eCcMIZqumSv0F?aJG~o?zLJkR*fKk^ePW!JjCx;&ZmpZaV)!%JGiIS`3e&kTMLJ-9;c?%%7R&e zbzsU~|H7{Ff{+i%btUAYUN+W!4%U_W4j_9W5=O6nSGw%F+BGH>ALaGRD7N&D^^B?A z0szD%e>!Av{d3LVzxe2N5e8Kw7iv4BVbFxHxSzq$Ti%+sUGOu#Pj^8-pFUysur9=` zT#H>5GGVljEy22dX{jktK*vrE5|DN&yONfjR9J1L1N6@Isf~8l@d!V)C7iDxvi|8= zD{>IH@W-(Mcr6i47QxRMC&8EvV^E4l>IbCik;%MI6c86a4;nWMTkI$exbTE-qm9{2 z6zY%>hNVSk$)~)IUcrT`8qnwBxVXfDpcOaXVt;om@<0^hP*y8NBhVx7ZoR)-Coc(c znV7ub-B44~pwI>bB>a=NW2M9#sAM#BZc7T8fi=@L#ik;V;TO>C2e?)R4^*#&B<`&c;)KI+ridI zzOjUda8#j)J|$h15Olb507Ba`iZfeeUeJ2F`$5Sa*G{@j%45j{91&Z7R z2R3cH^ht5+&>L{deczSAPn<6s9*=jeH3XeMk+EkIy%q$( zZbX?pOsUjv%lH(Vbf>ZQG9?FfIj06ARH0X&4Qg4M9TblNf)u}Z@; zKu_5@9uZ*!8-q;q0gQ7T(zd7wvOt7*#)S4Qv4a2=ih2oH#u3D4X}nshwuu3ycUP?7 zcf~rPXN7nUx6#;-4KWn4#mnur?f;8VEi?xJh8IL4A+h&A7;+!L79!2@EZqtwS8s0m z`wfq>16_E$0w8mxJ02-Kj8z4Bo^}i9^_DK<<(>zx43d5?46t?k?*ySTgJ&&pz7bi zbeHqRp7W{Lh=EDlN~J7b!{Y~Cy>RhkchwR{h*aB(#y(%IZ-q+&`<-GQ7I&KAJ5q_n zu}VXkFaUv?ZvLA*RkqlrA5$QI2RqU$O+N%D{jXMELICfUw z$y@6BTs}Sfa{QmnN>{^2{-kV|p5(!~LWgfST13*c!c_8L^jA$1UV@rr0O6bFeoat$7UABrkFi{wG2gpQChJlKiI2_ zx9XMH9{t?a^sb0gkzbAkKY(kowK)S=bV`u@NGYmAZN*W6t=5~yjPtp`bj9;3Z7Qq1&^qHlQ~+VETCPv)}-j8935Itxf&--ugpB(k$NQ zb5VWBBhd1C%uKb>*3d)b>+nhh$t>a~B6Pcfr~Kx=KY{!5gu}n5st7X=+BT}N*<5@z zfUSe;1`$L1Y`xpo9;UY2iy|?jsx4;3Pk*g684G8WFU6kfHe&Nw9BA}?RcTYVu?xzH zt~PS%hI3R#^E}2Za4MwRhOXAXaX>G&7Q(@(Ua?0P_y+~XCw5yYO#hx)^|rtg-Nf}u zl&t&4v#5qq{9v1{A*}i!`w*^=?-Sot**>Gaep7MzU;7Py#;yJ_9L6S2Y&1KQuwv%k z8dmG5*%N8Nh;+$wDOaBrNNM6(Px6XWKL(_1_@TM=D#_tGlirUrbySzn|Fwd@a%IQU z_X-u6PWuO3wu;b5Ka0_&+#|Lfao(lYOX<&P!((@;PVEvqYl9ZyP;S~FrWZ(H>kM|N zxlOK7|EPcm_kb(}shg`Z-g>MZ6NEm_`^{=yPKF9l?#IjxE}uB|d(jJ;qZBVo$2zLD z7&tOi1-eA~oV~S43H#+1+mcIcMJN2hZl`j{n-5{pInj2@ z$DL{5OVh|<)1V;F)*gS+xS6CO$e0zh#vxOmMe|GXAG2Vx4!u+IUM^>aKA?ST z_%L6xU(rWqmERwT#knMZ>QEU|gnX|wbUkp#pUc{4@A_h#?<1ithrK>vJ>5 z<8Ffd9RL?!AH;GD!bV7KXYb+VD{MMo(ln5~UN+v4Ug1PcrdfwmZ;jg@-*-0r-T8-M z1H*0toBG(#TtY9bPq8A5 za1Cac?(@yg7ahcfblZOw6RO1||3=g#prfnQ7Hl4t#a$Ct>$Cf4vR`Xz=bFXXmgEc8 z6l`s-3|OCtj3$CSJA(Jt{1Q1K;S1WiMP;UNF}Ym(9Z$SlCCsPYHY)6GLI0g< z!aA?UtP~(4B$nv*BJJhp8_mWG~HI;pb?te#A-kJN~bMH^RUH5(cxjiY?(HI0FY?_^L{&{L8 z<4i*ZCJFt9e`6|1^MH(AcE1GH&(3OYrk!(d#N@ss>{8pjLAhmsOrhF%5V&2(&CJDS zW?~HQj;msN*L%vK3ZlUBlvlZ52cZGju0F`-DMHp0zapWm z=Xf@`W%zNPt8aM^A+sWtoW)PJNpzvt2a&3=)&dAS*KgQjpWmfDIb4(4N$tAyvXPu* z@T{Mb45S|c3<(madZ}NpbnhOEq2#I34>R{aLtn!-ebskuZ*kl=AuSuo4!MB~#q!*r zl_;z>=HK7&A zbo?zNrGK6Lh3({8R*1y)JI|#4&gO|U&DlW&{)~=9IoF}+LKVOge0+NUmp`m8G9_$P z-4%`L_j2`8xO$E}Pfk!(tDW&4OTbrZPa!#Tq#o5UP|x+Kme^JYd3Xh{`QDG-RlCp~ zfLvhXLv7OI&3@kxEOyAg9qE{jF~iE>!9*2-W$9ff^=s`}zdf5<>sZ#(s);;DJM&4? zn7^bhD4ruJ&qOg~1%?V*)T3x7TzTG_^}Bz#^u%ha21(32(Lr|G);yoi&Z zkZoLyP_1>HkDa4?2xBmDfJbbFu_d=-+XwILQh^gF^zqGY7}WEi{#QB0$3|6ueufEy zfSw`?6qx0dzlJ(gHd>yu@dwIRzz`WGQh8v#4BYT zsui}-g7K#S;KzZU!oO$C!cWN8d@x>*l6~>}@XCKG?&E>er@CG6gRo6~x0f6W-1~|8 zhvS#4=kK?l|M~&4)H{3sIm)SZAU-6+`st@o5S(hQTB(HR;vS#B39QU5A_~2T^9Cz> z;r9f>-A{1)4NCP$!qOWFz6fw~Ol3(I>0i7Xm7)=~waRREO>QdB_Kesr7KW?t;XMYug&!78z{u>*i^-R{vlm!`U@y7Um%5E= zx5?t}>9e{m$W>(+cx8N%av`rJiUF2T63+>;n^YhDv4VG{mv|j0o^BXf(ZP7A3e)3- zUD{^Y(McKOZauY)$WmMXdf?C&l=t6*zWE0}R0*O?ThhUdt+0_$jOxtT?X5NjP)GtPV}bt!TZq8vn>S$~5(tC1)zGbfAR@Md6TYbQCQ z!y&QB^lR60f!Z{hCtoYB{I05uL5$TN5g)%tIUPG=rA87OCI~?i*JWd)%Sw@LUoS!Jx$B3g4Dw-rgQ7vkb)J*9+nQYf z^3^PDx9{~}ohi_5FDt0{Hk{<9O7elbKuwzK(#xjnC4t#@G9UUaSlaUV(tA{91N+|M zfAi4X55n{=FRka^yFH)w%$0m_(O|vSJ>F3owe9Q@$kPQAFM4|P4H(tLyIgozkb&cy zd^6bEw_cBDDB$hysT>uI(66iM8EWczH6;Nw)7sn*@L;%KIeAJloPWWRK7f@+{?`YZ z&8T;3(imKc({#6IVvkq!?~FmfY)s-aCh_){=*bpg`CQe}K4>cd7Q!~!(nXFKH-89Q z$OXT`HY|keDY5ao1h%Q{v=S#lj}bv)+X^fK_qsl?1WfS&4rqdIg{AE9`K8@8rTezl zwyxz88D(5?2K^EDkcwvmMsL;15*iX}w_UEUzhkoq`yUu=;dhw-~QY2-I zBzg8IT3ZOYqnBD|Q1M6T-GU-=cId^$JIgy_j=xuL(WtFEc2_*vXQuphKlHQh)+e@Y zLC^kt@#Aa|(gt9eKn^Z{VI7Kc%FSX-zPP3n+c@?i5d3@yW!$Tx52z{8p?abhswi&Z zImD(Q^&z0F5*9NDLCpMZBaXyIsHO`+<7nQ?Z*vJ86bot4ZMwK{_nJQzG<2B?D%P3! zy=#|VotLcj`}D`@)rZ>rJcytF_vKxv>WBtp`S+X+3HBoUoZ0*7lkM&$x<#Go=iJ*L z`>hM#?U|jz%`q6=a#y()S4`ia`nu1djy`jp_ih(`AHDnSob#`bAnQ-?*P0`zEpXg= z--j~cLHmeWc2)k=fF3bysJe_&g0He)I_ZLhl$O;x#mlLuhF|lsRF4((hVF5s+un#B zBA->M$@X0?1+7|j&t8%los*r9jj0dZG|`dt5L;Q}w`bZ>B5gptoOm-Yjs{SR&n8<& z50n8eNf+^_TSBjmu18&1{@4~Ko1<1<+iWsYu=ujuTHGIY1066(W@SA%9dX9YmFBx{ za-b&uWZrZCo!>t^>)E^P&zpPx{P&ZI>kgZoht2=+;_uBEyN8wQX!dc^u%$=b(`9la ziF8q=`NEVgg;B^sW9JQHgQvb!lN*AE46Tk(R{IsayAOH)C7K91_~H4nt>&9#5yn)S zW&`JZ$cdwz87QaR1b4gGfSoxl9KN*_uV-eh;$jY=t_e|u~02djr;fo>DIBJ^=M*|FC3{Z0S%xMz$9gMk}%;-P+M51cDrkyzLdg1AX z1!6{gLewqFmnWYV;$r-bxJww@i)p=-V1mcGI~G|N3ZGnYxpRDvvAL|3S;A;l*j~~3eI7gEJKeNkvR07Cd{z5L-j#L6 z5x<@4kaLH|+VSEcp@bnOcr=!BsD4Z-!3%D{FEZY7Za<%qdTBo^LBo)eDmk{59x-W` zu%yHTT6?^0_HD5qXBGyaVn>CD9eeGaqvHRD7*DiTzx(W(#Qb617>Yl zfbF=dFJ?S2_7GElr^mTcpQXKPzY^Db=JZt>jt63b;8QZxEG(m~uaC$*-^bWT^(YuV zKN!Vc{4RRX;~s12(oD9~l(+9A$L$16;XN z*r~Dwp#Z6s9&WvN>&i3Rx3^se@Bc`C$KU#I;bMv-FQjh(AezTueq1}ptBWqc6dWKh1ig;#^uR-2-xw8EegnL z4`;>s$zl*s1`^G?0G*XmU6B?2(qc=Y08=j7m!m~lY3JZ^mRzErG=MnY%>mpj2lN;6 z@i-tEQCFU{n#*@=ZQuTC!-CgWBUjD5T6krC!NUJ0${+?;iIXYNwi*@me6}Q0 zU?(csPW3$vTCC?f(5BdSe#1ulf1m$gts5&3EWUjH!3>mfPJHl7B=*MNIB-!HK8WUJ z^rnQq2iJh!lkUJwWxFnIZ=T&6G`pz+^;=}O>ZFB-|DvhDOg_@Yx!D{=(lID#dv;Z z;qf1YC3T-#ro0XJXS6Q;Dw7v4L4mkjVX#^4!P2P&XUf623^&?ee05SfZGT6xTj3ow zKe8KVg(!?>v3Eo3Q;n)r!3;!1;=4@XtTcdgtZyB0oi{{vw6szRF$?J6lCZ;|@dU#$ zC>rD$!lh`3*%i(y6Z8>9uvxpZ$d|8{SSW*-^)L4#emRT_;-=c8+srf-!bedWsppYY zd_;Y>ajmG>^N9l9+G!GsudA=fXsT)J9S92?d`x~(V9I>;#Wf|#c&+1O027^GX78vr zeI_hP5ClgOR*wapIj?ljlr~PX_xcQGKiDd)u2|b09_V*^f7Q7A{`K(jjs1#YF9DOd zMNs)&5$_cLv?D8kSns{(PW;#Qe=Me-h3?Ex)o1l?+7q#^+o(=k>?s(><<0fjOo)yr zqycBl*y`F%`(p^E}|N1lgyB}Z)|E= z_|pk;?f^u8!fp)zD%NM0^w`vCVT1&0c(MdCY!T=n7QKNulo;Y?D!KYt$Aewd>DxzkTOS)%toCUd*Uw3evC5#WIa+CW;Gy8-4yIKOf7wg3B;_N$7(n`G zh6;@itI=N%K-P&uRg**ZBaPizstZVl=Yr3G4JDzDX;^N~89f%cT2EDg+6;~AXQ-l$ zS7|n(4*W@MnZ>fu;$~;W(6DB?l*fS#%|$Sv1xh&_lz|QI8{vo%o~nW$I@HacFGJV1 zMQ`uOITcy^JL_uK1qapcguE^R8#$d~RIDV+V~mQ)>NHej{Yd{n$mjSpusY2K;o$akvven8@&nVW9RsfxMqG$`iwZ^&$F>DWQPIwA4a8hR{nzHKFTPJx zNX4H1maj{?EbJXV73wrS6W5ocFjs609Tk{SN>o~Xa|XwYV9<(h?e*9{S0E&Suh zV{QN4bM6&f!~pe%S5WNiMFqEd*EDt3h+{W;e@ziW%g^lD;fC}JDwD1v`E%R4l*DD}@fad{2eq2%u5b#t;ZtVwaxwd!4Esi%>5yl|_ z0zQNBwoKYnE;0qc!jey;5X?>X)Ir+Z5F> zIa*934O!yyaM#q9q`YxC`P+*9-_~sUy+74zl=pp_`Og6NWwi4(6mgR4W)jvOM=rhI z!Zf$m5r=!3Kk7abuJGv9hL$bkCXvu5BHIOnylK)D{%jVKQQKE+0g30e!_WgxR(}TbSp_pD1-z4q}`ZE^HjY zLMI}8WO$u|a$J3Hh$n9QL{}Pp>&Uk}b^hC+xQo@|PLtC5#q@r4c^ms5PK@C&*Wik1 z=k?jxfp?Q_;}!`Y=I&^IXf&3HDq4Hr=1MBfLCqB0{y8HsG-z8-*&M7hyI^>(g;C@d zP^dM^O?~UpK)LcJ(YDndB`1q`arzAuenTKf3ps%14h_ERxtLKBw4Q-4(>Hjl#o@}K ztt#Uo21M;8W+G<0|3|Djnm5oJ5j|J|%-FlK6$Bl_7+FkHwGpGuctUaN0|LQipr+3* z&`Q>-Uag)Ba$7GKW=4VAG^iZ_nbkm3ezLq^WO?p9?bYUSWmOk#sJvuDd2cKIHWRr{ z_kQqNUiv>B>`9Mt8&bR<8K2#oC7n5cV?bbeNB-lc*zzWm{C!VAi9Zk-37cOTg7cpsI07tMzi6j)E+&T^z! zOruyn$`n60b~_{(7vAsc=hJ^3aBt+%-l7dIk|)VaTXLriEaDIJk`MH=zp}pks6OZM zzi~@HPr93lXFxo0$rrP7mBefE^o*SJP5G~_Fu1sSq^#ZW?0#mdikR3}%>jHDD}1bk zcHYs6EEs#l4(n19?2?ULuhYKI1(N-=P)LQtMimhW9OK02+Vj}9spQkI95=2r61bCB zJg|sV6V1XqC+xvQm60tI%h9eMJc~x!{niVyA*U!Y4VdQ@YssoD8cacTzKekayLEbG zJwUc)T0R+%zu#a*--1CnQJm z5R^wj+GNRA)@L9gyzaa(5-uzD(gRY-t>TY?HCf@_R;-bPcf;R4$Cdxw9NgRxobJ6; z*Q#mfhP-e$xh{%}NjPA=#k~CE%D(Yewf8MY$YFiVu$~6b|ABNBlRnt#c|&X|;u)YE zVZod52qJ&!S;e*?v|GmO3pFF(^>P@X>0t$QKw)^AyD|e-o&L7ar!Be4Gg#97?;v1$ z1T@W68(O1s%V9E>ohsKtwL-%^nD@~tgvv{z_nFm$*q>YrXP(7$hs6X#XR!^Sehcah zVT?XiP#EKem0G@`o9_b}BS)RqRa##+$!o(M&vir^({wiT0jSz#}Z(D>r9r?)}kR zwc2uU)aG9!%j05F*p}BV-{7V1lg}(SJZtzUM)=x8i03vKx4gyOX)#=n$1=E3a@Lnt zQ*5=6h(H5JwZvI~WQ&ZV9)3ir$t)&W^+?}Mg|$^05p&N+t3 zRmKAZdN&Xr+9$fv%iHr{c28etuKSg+e=o~)k2n0vr+o+D=ieFsNx2al#g~?QFAVW^ zRs4)uhm6|r!qQ$N0??hH2F|0r|+MWZ3sRD;lT zg+Y?HSF-Psmo_-&%HIpBp1ec$*gg3pHUV2a4;Fa-xZM21+^YJK=SeX+P;sWSjQQV* zY8#k&u0|Sh&t>V^X<7SEA0ajdD1FI*SvM-a&;0XkP7D<-3O>vL0)wOl=#1CAAieJ%xs3${2Hpcz6%uM27i)p%-d$H%U$ zj~vT?sNWMF=^8=LoCdMn`)4|#h)f5YdiGv-oOOR}kl4qwygKUP9q;L#)coJ?f2@7d z{AiR$;zhY;zmD|1y6g&L*ssFP_TsUm1`6tRfZySn}+QpVs#> zOnMIMZ+lEuTrms;4BX!Nxihe*0lYKv{)58ia-h=;(-<*@#WS{6Q?AlT`rOVPz0Y(Q zlvY8RMVQ)Lz5b_-gEF`{anjBaweqUyOuSY-bkHHsm(nRQw-B=NXn{+sEOXy@YUY z#k~zTY8LLTxVPcnhFi-D6>%%Bob_Oiw6urR9A$;2W@d${rDlaq+uF3{^YVV+;M;w0 z0LOJ-_y79+&JznaK1LN(76k9^!4sZ5MKy$8M_rtoX{5h%VQ%#ih04iL0;SNAq$kC+^m?m8(w zb-PwPp{`%a{&`N_xV&T1SHo+6$9_nr+&*!9x}g4E!SMxuqm#?X%v4u*_e&YSE-l!m z^~)xmeU&((5_p^3>$q>+Zb6R`7A;;v z-Po{b!K1%p=+}EZ ze)j-@ zs5!bv3l%U<3dd8y6Oo~@TMtU?U_~~7ZnRUZ&Qq!lXf0^v6Y0-l!Ohw*dF@|&_a4rV z`z_^L^tb!(-==~EZ!yD;P{Z*s!>7lR6MWa-UH$LVlWtI92q|@}Ka|IwEVL=ciAiwS zT$>vR31wg3_=Q4LF;Z9%k5h_(V`cB!1^a6AM7+CQzNLL}tX8xzB8KuQwzm~5{8)cL zP+JI7NS;WgPg>gx;pFO6TfA(mWp!x$i3Px~0|k;&bz>1gHbobrZ#l%X$)HLXdhD)K z#-heri+9B{(jt*y4S^!0vZd}AfL;s*D^LZwGHb>yVYwH1jJb95te0zz z&-*`yj%U1@tF?4b2_0U(&%VfnA6dPSHkh#Y^3gVF{m{k3Awwgsj}9fRO^NT-_h=ub zZ3vQ$KjhlKzF9gKB3Xh9{r-NjC2HtI_YD~4x!yiq9`TtC;=P*o6o=TwM9P?^Pt*`^ zNK?AWCu{4y3AJCh9TnjtUd19w6WdJ1BT|87%7)Q2UvX!`1u$HPhqpzT&Tm-@DXvg# zg(Ru`Ng;JguV4kTBNQM-OP9IO7!$#v6^g_=ZP-l~xg%^&F1)X-A$Z=m1#qey@mkZPh;+gzy9ff0GL9@!$c7-J< zhk=E8y}GZI8>)Bkp9_TR8|v(Zjy8XG#43ITmB|^{QDOz4Nt1#q(Mt0Zfq)&m2_n@{ z=wfu%uFFX=uIqHAQ7V$g1z`yMrUtwmK&N_#YsKKs>^2A|W2)Q1&bWQv64G+jPBvBM z-50ddZ)FUs-y!ANWC$4_e=_s>pLwE>+o3hLFKx6L=~20i{hl)N-OXsMhvj0&(8uj;Ev?1qGPV=k_}2Lb4ogHo(309w*!~S5~G14 zw01Kk=dt0+NwAQiR51wQ8^2ho7Bs&-Z#0@uMCgytN8HdGTk*CRs$c&+D%K08OH?D;CpKS z6g-H=Hvp3|47yuY`-6HT-1mjviNV{ux_O*67R*`zXcqzB7>!z3PffXgnXhm(4%A1s!UF?vz!f@K$uD$+$wP77mAr|($NaU0$A={ z2W)BO8^BgV5SRf@#210*c_q@Sonm)bWC&DXcrNXxBKQ@P5**-4XaZ1I2G_p->};%{ z^@A4p5p3{QHg2a6zhHNM8)-6p$c?SJ%@h6SkLp#cmV5d0q=R?+C$E10JMZn>c5q?S zOui@PX8c<1!OYL2B>ZloJ1EHZWW@a&rw~qh5Cw?hDfKEu@ykU=-PO zP;3bC4T!^AMGDk%uLe9ebUhN+O0>sVqeV=kj=_<39C&2p&&?l0bXq; ztz__2y4@9HG8hHWQMtU+BAR4)N+}%x^8lu}o68YU)MRf101kBq)Y%S(gt~(L4PZ#S z+hbXAf|YkaQ)Yp1^Sj(x!^RH=p84RXr*?O4MiOr1zqW;|FmGTTP%mVko1WT1v;ejIE9s>7c4r+Yd;kxw@F4Y z1^aG1Tlbyv?P{r^F8D0L5JVvZQ2JEvcQud@>pn< zL8et$INJ_qJ$qsoCddm$WL}&l%{W-TU8o7R?W#&w>%^+UaII zv9X%eUr-^_5C`C_PePtdtCZ3PK~Qciq7^`gnrtA`o59{2i?spjss|jbk-i`V30rCi z&lp?R!$k=0A#}LjguT2!4Jql*hx~kOTN>c4I;-%Y)^XeYr|Ot~Fq7Ht@Cb4G20$7Bs^V>d859$I=2Fr5*B_`*k*-h0%U`rQx5EI}&9_ z;QGi86huf#baV`A^p0Box5`AaevABqwo!tP-jH5k+knnnz-;zKiPSVR#N^yS$}+JI z+b%*^=Crzni==?4pTx#jTHWB;V9*gSpdpwgA*#uS?&`6uv0%-XTkpxO&z<1OA2Zh! z5ka%I*#ot;9dHL#Wnq%f=ejG~Rcr(IZtdkkTG4MO-D^v=jc9~vW^J(&U>sMskLec+#q5$E91$syRX z+?hk?FGQ#2s`#|nycxgP{!!QG%zEB#PIUJ53n6a;qhvCG0uZOg#H-j`FGvLdeQ_5q z?$y}8{brW($IaCWvZg6V{ujGRE9EJk#)OR7z=YUkF>8c@pytAfcO08N{cKEt(=c|B zRi@Ru4iAa%klV9esY9XPQOTdg*zxE^YC@n9o!18k$2gG9%^QlxnwjDbN0B=+oD{cT zW_`vjrfUSzdTP+a%ez(=WFei?w1MCMR;64V3d1JmG7pt5_P%s-oO{#rBGu=u+)MS) z?2?$>oSdDE(D>bAZ(5fC)(+U)@NRstYq(`}PC*k%3M!1ddPuR0rK?xI-baar@qd9qMq{|lDoB-b5t^y881JtBv(TLsi=Z* zTXYBq)EHQXiwE{-#zq?}b}VLq8VUx6LGsoj-wZ^EJdjdEfE-{WV+gCEAvv0dhjPtz zr4AboBYtKfkFa36frpO}Y(ofw4-X6DAOctwS!`5ati2oyyq9VpxnU_pJ{qLrH$8fE-yP>ekpe|2CcA_5cwVCu?1`6piWV11spYd3JBL* zhAq&SSP&dnC?1UkK>$eSn6(50pK5BN$bqL|afE9#Y&;ipvlnH{2cWb<9S+!%FaC7aKF}#?B!ftRM}~cEEJ!1MgGQ=2 z0M9i6;l3mg!6SfSv4#L?(U2#2LR(HH(PlNd2{l_KDL;GR@!Xm}Bsg=|Kb};CU14LN z5b(w(n_vB zt$kf-PzM^6NQ1g@Ko?d8Xh(?O7Tkp`R(XOn&XpC(!7zY^5h=kB(hx?i;5ebb*`sF| zngN1L4$V)W1J>q%i7c=jIpWxcq>?(I2zMwj@PvJdIG$D|&>nz^z#b$`nx_@x0~pRI zJY8~-BO`lwLX8yEc5l`@#)|xs2lZjeLpkr+ zInUuS<+-JI41AaYPznLvR#vM~UfV`MON`g<;&1&yKP%A*ZVN38fo=J)Hs_|vsGlu z23zske7JBL5Awu0;@~(i(s?TUO<;IgmHU0kc+@|3RH!=}cH}H9j3-EugPL~|n8F}Q zGO~-(G({DHUFxj}X4fRLx0;YUz3h0PIe`eb;j?kSgu;X4%{rrZBw6!msa0jHw<9@@ zUz)otIy?p227Zgy;aZ8+mNqV`om1Q8Ox|as{lU7^0|s&9E4tI*1w6$H4lb? z{Su_eQ92-{ypM!PH{L~c@$0(?NCpwJH>cZvb)budDK<1Wv^#9yi^!nDtSBd!%b+we zqK1ZQ9s=XYNfV1z7`lCUOVyjcQ%ANXKiQSpZ=LaA!#wb?gFM(l0ho0Q=1zv<0)Z4? z>5M+1T~yx`83ns^0q%lAoXt);Qxi``MA7@Acucg z#eVWf9OR)i5^Fm-=s7lKnTWQps2$aAmG6u7hjkjtC<+j%6^ncjv>8;89~l zWI7MxO@Rs6oEhw{5_aVsMt8L+Ish$KWS09_0_X00(v*8%f!<~H|6naFLL25wL}c#r z5e&TEUZMn)LS(u|R(%V6;7AN=u`m60N|tp7_YT_l2sBbkV-q??0}t48qQ`(H&$on><_8jwxzKcWu>=at_0Y{%c#f zPdw}rN47y7*;J1asLV47Fhc<}5P*n~K^L=$R45=u1FDXk8mYI(l9Os)o^oeh*K+GG zpk1{$?6;?khx1@vWop#|8X^T{F@LF&hpeLSBCA-)rT!Faxv8&aQ?YcorVbp}I^|Lc zsiC7ngAeL04Hlm;;>r$r#+GKArD}GEY~8?C*j&+^!z}Y~G4r_J^SEXHM8pItlv_J# z8{26^R$RE5V%w%cgt(E-obagSEg_^YQZfvggz1*A>a* zjk3sm4wA)3&bpv91%PJP+VCw{YCBNFLeKx1=glKaX?K2ik&}%>6DZQYs9| zq$1huM}1VNoM1vkMxFpBPM#I4o#+xSg2}=>-NJc7D(a}nqg!yREnsln-l@v|S0ZtI z{xRH#2qpmH{OcZoZv}5&zD)?WrkF+>7hi?`V~Xsdqgm7ki=lTCuB2oWrfmA*$&Vkz zlHrH`-bvGYn0``#{5%#q_mA#n+2Yld_v?3glgG<7?=5+SwhUq~b6eWqtnBjloZ6U3 zem1fDo;5SfMYZvkzo?Vbg~$yW*5^OFDDsiZL~Q-~Y*9a2Q0yNgpcq8Nsv)Xc4m~{Z zh)F=I9$n}o!W76Lc^0CFj9uoF%H_~ZE`m+R+_myym||{Y#_;Qqd%i`*_+YL|?LXQKmi&DMwD4&3w z%Uif~=TSEiVZ@L1r`nocl|9QrPlO@Ixwub!?EV(a5WC*m0~ilITJ!SplgDFY`Svg> zR4a76lZWf(T*y9;oFcvkjxWbi#@ne-2nTVNiYYq(wB!^re(R~K&h`myek>7wfC#tX zvCZh0Gpyc_*oZ6&0(I=oi>8OVuFb_iFTdZX^(lkSf*!cLjZKuBnc%(M0`@Gtn=twh z+xQoIm2!nbfwpQ&_ZnRQzT)Ri^2CPmTL${8|Me)?*o`FPQ?*Ef=)y}crvRb zkG^@ddy$6i1JL7P$gVAf4*?db15+eJi>dJyLU=Z`;Cj{vd6IWw`b)H$6GcJ=4mEb7 z6DA9wfQT-M!&klum$5a)Q#)=+YIxX9?tW>)`gFTAlumc4*4HUVvb~RO7_lqPhBo5W zT%vD!cy&4~=*maMRkoaoW|uD*I3TL!qG-wX1z^XA=aW}L7i>)zSKXQo-CG%N?(2=+ zJN2^8GNsjI;=yo&=o!8Br`PzsXEI zFtgbSQ$f>+dKv;o%V}z|x!@$kzcC>kylfFn2uGaCV}5AS_8xb7>@BYSP|tge)~&37 zUdo`1_q=J%9yqCQTTGTVw439pI(IpVzrE@*kzorRD7vh+dE$H!?#Bv&XfXK~g0Ru& zA*ls)$R7wBR4KB;7iwsinpWIB&bt5=iIQ-hQEw?#immk%S&xS3Ipy`#S(a*R3mcbv z->B|xPe)^Wdt_`Y4F=cBb3QyO2XrB*cFJCXujz`8# zDQgKDO;!@unGIIB3mssv6q#@h`0^;8`)A*2KNZ2{mDONB3tszr6AEtGMDLxV8Ecp( z+lYr~FGNjsHrS-?Y9&Xsy9lMm%}310q}WCDU${@G=(jjF1+5=pPIOC$mgmQ|=52e| z-J;z^RT8z%h`o2Fomyv})Ls6{aP9d6cdj4eGhR+r*7ajW{0?^ecN z;wQMbUb1W}$tmRBvl1*7kmKAA0MaIDb$js(9GgOkux_y^T{otby+q%PJk@%YTINgX zP@PKr;vhV2?7RNm{#;~bn!~?S@)6h!(F3E7R=i(``kMx64`<_sTV~SLE^fmQ9{v68 z$Cv4=tGo71?sls$_zW%4*sKzz>SjsL{b`X!AoV~hs7`)!rNG?MzskeXeS{E$$mOWD zCHRg{kF_g?8H+il9b58ok@TD1w|W1IAUuI-fm*m@y{22UpWK2q<%}69CTa<$=LL_R zE8+wG_Nr43oW~NC%NtD7N=|yrKQlZ_x6@MImh%&um((gKlr-i+eH*R|FUtta^cORY zhdZ>^4hvUCL(l)4-%X=}z~a3$SVmd5Zj7^Ju#B+kEv$36!(h`ZV?^_vznDx$!hB=q zM)BfF{fH}@(|i39HOm93k^iv5U|^~xh%@7^phBO0m3gMNQ8!wt-dd~IK+;xvj`V7` zW2tiWFXMjwDUXB_pZ%^Xnw9FM+bxOVpSEWDaMq<-`^ZTc0^s=B=-Qw4 ze~&SQxQJu0k%O7fL|f8I_GYFzhjDJm|MwimX%$!+dTk0FtkGKakqy}W-9>vZ&GBhm z@O5b%l8n5L5^sWAag>^>Y+XavOuAzCJ(sY*@9@&_HTI5y^T8)bDenFsO*%ogk{&yS z0>u<2K+K@*I!kIg+R;)_Z32IS1c7rM>D5R~FeR5L3bV`_Z$`pjXCm;y<-DvP=mk3XmX(FCCTWW=m>_oXw+0&Ahg=8LX3^ zBA}ImoaoO4vrA3+m+_9C?hyG;xy)bOh^p>dWsD?^OVNz5+kul=uKdaP0mw1hR<@VqDc#rPY2ol}PwW_kpgig66zV_J{_K>s z%H=B`b-&8zBp}RAWtWCz7C_|A%_bj`4OxMg9w>o2JIkT^-7^a%>{QQBDVw4--%lZh zo#=3NYqTT3%FgO>n|8=oWjQgE%NN8XvDuuWAVU+Rrs^m3H){Ane-r)D(uWhLBCv^p zf~;ZbUbE(@c}}?r?`hzC*aW8lJI_Au_Q&r1?yF0vy`&DLY%dwKjsRTu<|-4~oW!5L z-amQxmpjG-q}1C{^-8r16Fgf;xKAphh$)LPNf+ctB1E=e%SeAvo+r0NMje~n-Ke(6^V)|db& z0QXd$go;_IP8aH?&poAP78&r{dimPz$nm6C@^ne0OIG5u?1U4F=Fifs>mrC%ndVks zq_4Jhl1R-zvA;xQu8ud`!L+voIPyH>#5Jg7I}d92C<{OnH2ye&-y&>xCJT$7(w}r) zFiRusS!KiZ6CV_-rhZcip{gzixy?BQp@UxeTM7OVfI|DLD=YYCd6LH3r0@7_Dyk z%P^IAYfMPgMbuS$N(e+q=_wEt^HvQ~I4e;XHFDbfRQlZhT6*YmY$tzT+gJ4oXYdynf4C)fk znCz4|H3%_c>reld7{D8ymnu32sSsufN*Ul!zs?8E)t;FX&d3|OQ)J}0qZZ4qxYXNz zd$nTcE&Ra{>Ca)SA)5jH6|mNhsfuQy`k0l*I^Fr)73>%tkxK`2Y?e6JD+Y}r1i)95p+MG`8l_m%mJwo z3voQ~=UH6n3iGE?Pvj+-#U`EqrKXxAbIuz0f9wUI1ErTiJM{X_%kX{XDjZ<3xm2)N zIPVgRL%59AcqrD5aXkuc+^au{8ZtQ2Ey~y@!`gT2i@tljwl<}G-nK*hEF|j1zOOPO zv9vMycIM)9qy3oc6{0$+T;>F)BB7TV#}RY84jY^-{^EtpF@OoSqt3a|%zX@N0yMh| zlG9&g-dtqHDHIHewKfZ6HbJtRARh?gpA5161jd<;B3%}gy-|I;5N(Jr3eA8z<}z%> zpmb%;&zex1_`uyJ{h3oLxWL81NxmKkl!j)0q zy5-v)a_NL!N6DnBCoCyd{KjOgw3p7o!^qW464aYt zq|*%kB&_+7XJX7P_$XUsIa@R*1GQy2ePqpkATVskihmNp@-K_3b^nPQW4IBZOMf5- z=z=*M%H_Zqk&28u z+9ogm0z7o9`nD>=Y6YyaQe<+t(+TNrxNLx{|fF@_IvpEC#QtohJ|9N@}fxa5{)jn@Fde`k(tcnK)*3l<9R>PCCDsw>kz-@7@m-t@g#Ru;rd?uUyR1jAX+p0#R^1^ z5UfW8JD$Gyfo8(nSFroR=Yc4`Xg{m}J>CC{5kr3#ckT<_YSV-=2K$^>l1V`%#4}?i zihZ0yd{~UpW`-+rBI^ot{N$r?6Q=8?kK3kC6xk(;%DigAjH1IQi_=6)ck{49cz5mO{jo`ZGcH-JX*8LZf#s2fv<|{9`X2Z4_#wr1(Z{goYVI zs+Xu-iQk;OLRNq9h1Vi}E(0%wxe|uAT1NR*YJi^nZlx&9HH9uqCT}iVN!h93AJqKT zxF=#@B!K(GfQ`ru``P|4&lUCyn1upP@mQfAA8*ZvII*f1PM2htGS6LN#O79D7>}~t z?Oln*-ke>AH~W!G@1x7Gz-_sEkyK`6AUu}DjKPQAaHn1m3q4j0Kc2-uuE!j9mi-b9 zwWl)dh=G@53%>EuO*;Ty9w!Ii^^zR%kP4WkiefeQyTQEwQeR}y?uIhI#=Wn6;XNH??Sm|>Zaf4=Bs-I3mI|Bm(Xgv zWn#nw8Ig+Yd!bH&P$^MO%8tp`q(U81@SYu#-Yl-zr6ea8aai+FjN2p31k7~|=FES7 z@y!sI;Db&1F-}yNLP-?s#dUXP%+|xG*%wi{>%)C0hn*Lb zyRzjIp!RQKg+yDBsPWnJy!D^?pkH6t@4cLpu-gzp7fv}BN$-1XR%Dc&hl%AD*EYh( zdCYjy>gvE_{7dHVR#%&(NPbqNhH&Mvf}Yq*d*hUc*TpLp_m})*UlJLK3+-i248v1= zi=D`wA3%2whQ!gIdF>B`MnGWu`4ImkaMy=_t!O6R$dJQMi0{}dr{x0Dh4y8HhWyqm zCM&#fU3q%>A~cG&IZ?H_y%Y_}egR>^Z+r`IIrhT!<3slcA<^3ckTlgxzdR_kdm5tj zZ>gsw?WMR>gh=&Ec|2IQIcDz2St(sfGuN>ZR!Dy&d>sGEvVu9kS|M1uI#0Ro#_kFN z+s0{>%3r@Qv2Q7~*|4C;O}}j-0J@=)tDaYa0fQj0U486J8E6!BBCqjFR}46)Or9qc5mR#Czp+_#E8N5mx^UW zA-j<`$-?fy@SQi!@Kj1aqxoG1`*|L*#D~iC;S@(xLJFSjKKC`={AE%+ zc@x4Y3O4~Z{!OwW(W!b<2opY9IjMH%6ZCesg1 z5`{}6_l(}z2YW|kD3MGFW07u;hsf^cH_poJ`sU5a3I6u$k5kltGS7@`$BH~VfI;MM zta6l%<3uVuANc)xOWBmDaW-c#%Asv(3edrikU8CZ7MfDYT~sHHY>|#HV*XHM;rfPgsCiPKEjLvZ#cIk)>sHHD;MQE$skvW`q?<4;3Um z5&yjqlBn$@)t>PcHVE{@Je;$87~16VRs6;Dy^~8p&+d0=OGfJMmFhWr@N5eE7#w2q z>WY-tlOB>n&%a7DX|kT*{0~H4-%NS_avL{YahZb|3@RP`?(niV%a4Nl=Xnd!rP#rI zkGB(S)2ow$moaO7m)sn6q#1FD3J)1Od)4F7C@Um=Im#z6V>j{I!M`USxljY9vL7ao zmPK2`BgxDt_TjWc@3*_C(at{*h?tz?IbwM(Qd%)K@{l7nE8@8;!vVjC<6bWHBm|{? za2i-DyuDAR;luFen=z-IG)it{YHsI)4;eg3xz8KNJ93rfZ2T34?k&7J{-*ur*SFW> z5ots}cuJ7&1-PzoT$=j3H(N8o|9(3OLcbvGIeHA9*^J1_MI3P|Su=&Dy2F1Q`<^EM zaP$f}&HZpv+(Xx9M&f)lwS4dgUp~-x%k#G+<@dJdk>Q8yzlJ^U#Z2-B)J$b}bFG>u`_5{+jcGHEGp}mA zjVB|c#79|D+J=H*bTfru!w|!&+x;NQnWgS|dJ(dks(PzV zZMDko7A}I6iJZPDs_{|QJ@sK~(eeybzdWi2CV_ghV(9?Y-k#hz)#T4lYh zTZ(RSJ877;d|%Bqe6RaiRn(%&V2xbuz>8TgHJ8-KA}UO`)b&f#Z`Y<>D;L5YWW={#Nh!>sOcrKv^?+CJ3(@7Q1QZ#Ok?X7|=@N>xTH z7ln*n)au=Q{G3h1{B0lGG-?Issi6X8J(Qi*Y8oy|1XojRB16h4*GrXWIV-0t4sT2= zs~l4*bgQCcV*?`-S0+_tqKMy{r5a*R1jV%-{VrW+-{T(?a}s5JBZ-qx$4nWEXlYZJ z_$DEz{vUC%A?==>t$NFRLX=1ALJIV;IWOf5&msNrmy_0LFS`Y6-2~rLX6w149{Qv2 zHxPO9;vdKcPrN0049aFF*?93o5}035jB{(pa>Z2v$}MD=US>;Y4sa z8Ie6A(m2sQ!eWAVF;OvDaWOe*ad9zeSqT|=85s#F37jxiO$MbbA+91Lt|BX|Eib1o zEomYzt0#$6P{(ViAxzbgs!H-YDsq~t5{9aBrdl$F#=`Qls&e8)6**;Xc@=dzO=AUR z1tnco6%`#d6dj zCFUN<%1%3!buc2NnyCAa8L=ctPq6!8r5c>cHLeerERJW-XXCUiIDov)!??8zAE^PFyRyVp;-TX*Q*`PA1Pw69+% zFV|v6Hi8o~OS5xJvX9m$q?d&x7L^?>e34spq^R+5VMRHk_(XYeTSG}nd2>r+3yam< z-BNa@t#P2a=3HmRa8G%EXVXw;Y*nbs^)yPmd@D>?f*9Z?d{@#`;Bt>M`DThPm@Ysg_euOijC49X{Q98 zIsf=__yoCS(XRCAHQQr5Tf!=Eq5b(=1dn)5gl#2M(Ia1zsVr5Ny%jq1C1%5^mS9z` z^|Ko{x1b(U?cZnk?Y4nI2(#?g%iBw*M$(>Sey+=re0u1}VXwDF3HL^A&q@3g?!Wa9D_9oC|AM8T z6*+C(fGWlE=qPzRX*-s=03p&Crs@@gHATW5WXrzSI;f^BwJ@bdmV%S^7MFS_M@;xW zlT=F=hEsIzcPV>B?99}=9axr-@ZK-pxa8vxcN=I{BlbH8aAGwAzgd5$e-m3SdelUS4G|dW2KhEIGzlq9tEd8@R z$1U9}u|DGDFS#afku14$yAFM4o*Yi^cpaXwm1p3XAMwhs2TvB7v5hbsd(#tz@|r1; zy1l;(A$8BtiC2iz-sH!KrHkE<6HR?Lz$_V2)i2j<9IGF%SbGpj(RE3z?=pQd+b-(v zWPUQh!A4xH>1kTPa^IP1abnIB6O*^q@m^|@)udwuF}{aZ{o9(x&u_wx+Eki59JTZbUF%8h zx+Jfrb!4yo^r19Lc7ShA(1qo_1COj@4o&u?hn;u`&$#ux;nnoTQ#am4R6Sai9JM<5 z=zDEK5B|?jqOK4C4;LKQm71y&qqF+{@gB zIxqID%qT1ES?S3|Q!URw`!%-HJ4GY~oxARnz7Kw!Ui;?j+=Vk;9+kcLgc9F^*zOEE zRubj&X(RZi+|_4hJx70nHnyq7(_4j4kRRN6(i%P|2*V3D@U;zoT=dwp{UIMo&rd|% z&3!f0Atm}tZREe7_x_1W9e;I6t9Y?<`~1na?u4S&pkqt*uSmBJp0lKWM8E3Vpxv>U zOp^&Igs45|#2q|1))*#0iA^kZ@4pnOfdBqw-+!}wB2=3a*{^YCGHt5(X~o7bsUKsg zpYPMmR0E`9ZEb%&zE`r_RH68akz0?i{iuFcCoDA`(vU0tK5Iqze!z$*3}R80{HxqW zqz-6QyK~nPEm!fuZu-PwYd1Y7^P08r9~PJL6WqQ^`?Rz>GIqOA@WrT(Z|%jk6@<-V z1BOz3^2!ZX!`@?!@4&zFrLL`%-md)+((u~t>-kfpa#FXCi>S^{SAMaJ@BFF@<*JQK zhtNFL?CfK)ZJ5NPqYJ(-7jC7r;^l_AUXy&3aW3l_2ETX+#`lO*$1Gya{^PO(mns&? zeB%L&X`c0|+MxMg&0|D5)HYIc!P#Hs{O#H#JN~+}JJ;FckFG^5+vJ<_Wp( z+Rs^oW0%#f-n3&L@#XhsE)Qrkn2y_ZAb} zu5lt5S-J^~+8y{UUF!RJNI2$vsC|2x zsb$=hOF69U_vz(~9)E7XuK`^U?Wp;6%hx+=z}HPCP}270w4S!LF7(driYYRDP`AaoSaC7iJ+Mx) zZ4f3uJd$)*Wj9{xD+{?_wNKNcmHnww#rn>b*G-BOf&b>4%wO7I6ZcXJ6b`&ynET_! zI$WhcqHTNQ)Za4I2X5}!*UHX2_Egka{Edw`B4eTY-Cu8WpmABZLe*0SweP0|HN1|@ zyO&)x%)av}uAUIwlZZh2yu9vLPlf7BStEMZl#e==LgYz-LY+ zm#-vo%K}&OCs7Ve=enHw;ise^4GZVH4^GU5-^OWqo){{E1WXr~OxIRF_+l}-uC5+#7~V`& zcNDJSRNpkI6tb9WzB2xPT>G}uDV*eU$$O-<^+;@l_1CD9mc#0s!mT8elIW8p-CK_P z%ML{^6|ZRxgxf91r+3-yU9WwU)b>`0@CQKJZccjLkXCA$?1vaX=dY4z1;b(FqJ$zo zL{LvSiQxkn1{b2rvz8bke@vGxfXcH0*rhuu2r&Y*Y;)T3>lRG^M4V3IlDt>hYlXeV z5doqD_x`n*QB;tsht2zL46B15lhsqx4s&c{NrSAa=zz3SUqYqcf;9;x`nTXFz4v!j zm-qXBvU}kYUXj%`*l@Tpy*y%fZ*yF@tnCYB$xA$Yt?#TN#TQ(d601NcbPFV%PlqVd zxC`>!0!vr+oAnc95L|E^D?|lD1XVm!Vxe-f5;k$gPR~>rJ$PN(AmYIo8XJ(K z|1f=GT^g(??Ruppc`e+&KIGfyrabF#qE@S-^1e01&FSY|&l?ak+*{|dIU95 z%uoUISP_iZZ- zyj)>^C}PUp+7`Rfq2|Sppn4<1 zI66fS_f1|`G@wM5^}1zVG&KC;IR8wysGcNVC>+i8vrymPxRRH-?KIR;YyU#@#)9UJ z2!rTnut5ZDZ2=li$RO?{-+ureAnhK-(TZVsARwncpt;`cR)s}gF`8O&Wv5bfs~AUt zuvm;q4crZ<%2_7}m>S310UrLy7Y2@cG*$SPfF7NJZ;+}t1mxnJ>{u#1Uh~)}9}~+5 z9d-cGd6;Ve+Ks7p-`e$Pp#5c7>0f`DA8Gpt&R(;&tgI+ELlzx1Dy7&+5R^Wk5$@8r z{HlX$#rsIriv>o}lE+V{8N4LBJQMxe2qCi|`Uyqj8ucD#S-B3tNMOaqm3sfUtclr* zTy@6P_^dm~Dj{Cwh0V+duk-I}Fcj&C>rR!|gDTyEka~Oo3moOGRIX@%{NA*WueL%d zAO{wF<00x%XLF`m{8a%ZO699|(5fCw1ukyV~ZDOfL z;k)RyucFG?UcU{bavE4wni`1D_@QO9C-u6kc5uxb!f`+g!AoDeFj*q1PQRqQPpb7| zPRjUA(DF)VS3;c6!?p`8^@F*^=cNVT?@9-zLcn-HcncUEKkZkNKl{28B2XVt;GyI2)i;=zeB4tmCXogY zp}?Q^F`td2uHglF1CSUEK0(1va9ezp<@aQ0B^b#``^r!Vp^9Sa$CTVIz@S;tHf6Jp zyBT%5t3?ThQ3%+E?vNjTWsw~R^fgMsy@=%~hlBUvttJ$RF{Ma_3{j#MDbb1y=|>!Y zfY!dZb2PIqNiodID@GG4goq8I9AG%{9ENjx5WbBO{+#09B z#--85^;UfqxtdTWq#cTo3Oz3{4v z(e3*MmYG(yhr8qG*$HQ$I0uLY9~{UB-;scjIDPHGMT-)d-HDmwCA(+!+s=kIw|U%b z8%b!>LU$5d^KH$GhqXZ?!EL4%z(w=nrAtHN0Q9P3c1mcmc5QElWC>-aspTwEa#gp8#=?}5jdor=tXCRzOZlnnRUfJSyxJiP3) z&ExY56cl>S9N5rNk_fRSLkSsfxp3JuSN^yRMb1EQ}@@z_J0||50=<4lVEh zAOC!Izt*-^TkE#fy6Up7LilXmR*ID*6}F_CBq>6i&(_6Cmz5+6E0qvJ5~t&AD@j5( zRBkIF>)U^?W{_%KG(8VBN=S5AE~NJj9FMqc^mA3rfA* z0aRS8f95un1VC+2VBQ?VzfLA@)#|7)h6O03MSy%D)A6O$q$UP>ow*rdyl#)?F`*ou#a5WxzhF80?OLM-fz9g5s; zXL8=3@w?OE^N#6Y^9LaE-v+k}*VG>yRv*f8JkiZ7I&gW(1;P9g>+ur;yTh8Lz)J0Y z)iIs(NAIOp@4fhsr!{%QvnVkR=(w4GJtLyvqa%N_xa9?K-S<41-*-n+OCBCT zfe>&jD64q`NN&w}ax4?AI3+eYbsZwjX^8;?=**7>kQNH!nZ&4wYSpEQJ*fZnlcz+N z;u@EIiCE>t@M#Zc{KstlLy*1I4W%BkXm4q)F}0-_8!Jh^WL8t$o<42}@iIi@b}sZ; zlSnCIRD@PMXw!8zrpt2K_QwTCx7l0H&#tOtkB;|TfBa8zc2DPs;{|a?r&n~1;Nyk+ zwIg>;+HQX+)>9yFipi>XjtC!WjtR+kTKk2-SOeq-+1W>#qTmA!vst+WlpG+Ab z{HfIwpBMjEyAB#a2PpB~V&d46wCn60CyyEY;pMqB?%1lirQf8q9g5K-K8hc6Nq!=v zUwQ~3X1N?JcPL0UFQMA|rlp99R<%RtNJNK|eYP$5KuX$!fLmNqZqusm9uKZMZOdex z*w3I`yR-l3!_gB@-*t;fC1PAb`;y94{~h_0d_YJ$`3ev< z3x(iGON|uiO2jct)E<%NjyR7vYn2ytMoo zz08mC+0PszIo4N)4*e~SWE;hmMlSA>!HDo=!kB>U=56nbA^%Wcg!{OrB=rE{#sJ}U z;7}8pv@IRgvgm4)$@akjWCy8h&Lvj5&NA zlVoWx>%4gMPf2jmV1XK8OR+6chd3A&%IN3JNs~6u2^UrP`hmBv-V+#?`fjvpcuw0d zhi6-IjO@c(2yXnL!VH$j}7wI$F;cqy~UnT!zRq;!&WdFg3NJoq}lTs@; zNPSN{KIY)=O}~WA6JwPB5qc@(H-H7w4b@3WaN1;|=5ji50|nvkJv@kc=6MkbQ83v9 zD366(kQSTI#+*KZ9afOfK*-U>Pi~qVi1?ZRdri)RLkAu_J&}0ny6cIYHH0>RcfogW zjm0dIk&9&OY%07@c=jhAfK~9m>eA`PAi_!r;E0h05Lqxz9#fD<2GAaC-1tMpL+?sN zE0_YbeNc{k!I~SN#RrMP58nO)`{R#cNG26I>f_c-9KU>@9_zAQcNPYZ${L%L- z=FdZ-0Eu@t(ysYDwi3++GpV9R>|WAV95Cqm2a%kmb62_w$MFh|pn- zUhU$-G1LpDZ%H&6LBf#i*h(Zjv}Wn{l&z~mjjUpo*>jT(hbydO<83xfzK^+o^y|&(F#?jepsI*8&1!wDlSvl3WhiH`Z zwe!H?L;9yWvZu%Q9$w$CV?jTXFFDd%#na>!QV$2tFfO~t_ZD-mS>Hb%+D*98M-$w| zF+9?>3xLcQ%P?#FojH_n1ubW-G?xxzaJlBGFIx)>wXiG~N6c8(JUh{a#*$2h3LamO zr|+3C)yzNRA86z$_D~F{5EQoF3O9uc`ZYgi@p(#sbv9{FE=N5+C#@5pq=t?;O4rwd zeB<3sjYn|pMXgJ?Md58!F8CV5R>FvOyX2D4v@2VWIJFO`i5#We5`U&cBs)ypvGeRv zPmi!JoW9RkW369NbYEeEGNr!9E~WMg{#?gAV$sDmO)&0~M_!O42llRTw2;dsSf)<- z?Yw;YD$)5?(}-{JjqgKl`YXOqz9tD^`{LFull(8o{Mxfaig_A&Kglg0TzFgM=`$cj zF}`Mk#mi>^D~eA=@0QBc?!s;iDXA5#NbNL6<(i-d(KMFsD9)wC&4SqX2^|yIzo2F@ zfU%K_3oJh1{1iUl=O=T4glsO&hKA!D3d|U^>MRTKTn7T?kRB%7GJK6?=GEgE+REzY zy8QnL&AQeC3*ZovuLeCaiI)r~(4i)Kf7v9Ht^}+^9`~vuQ#KS9bCu0kh|Yh73JM)Z zyj#vX`7ouFI$Dsx#^iV52RpOl!NCq@lFyTeT|Uf)UAW}o;nu<_HJx-ckI%m9wCGCZ z)nKQ~&$sp61`G_C_eqD}^=BVUb}V-7NjJFaA=qnN9V%P^SXf5v0Ei0~;<*gp+dv+x zFgVOI*C>O6YWGc5>U7U(xJw23|I>DrEpL#=>F1eC1TYRWuRXLpSe-DIcGW%)W;OB8 zR+^?JEj<~#LLr9nHev{zBf)^oU{Zyt&1?f_pJi5Owc||$y}%bzy&MIlS%2<{$!g>L zq3gTeWFzLdY%#1iAlhPp-(JTflm5PQ(%$Xr=w#KFwD*2w&-Pv=QhLvoi_bd=6%kq6 zGH(a4cFfe@Eqn!^)3Ym>IsfyD_jd`-Mw0y*b>n%_ws?ngBVDCO=0-n;diro?3?-ZJ zHs*W2Ud}(v>zCAC`H> zHAvf${a zjm1K-F>z17tm=9z`oMduu^(+*%>1W2dV5t!=EBpfKh&1YyLLUUePEGe_us#j78bM8 zB*(=alQ{liDKTL|jI^DqH)#;d%tJWkdy=g5gT*o$iobO2lonH}2{7YnPXlTZ^kE`$ zX+Zf++hPPXti-@?ZD0ava@QG{>`9PFpWxHd4S4I2JjJ2^<*hbZsO??6{6gnM~f^ia$2jO-g1Xac*L%`|PAi zVaG9AZMxN4v;4jNIu5V7v1`%RM{zH@qM~};P;81CyG#!L8qj@8Aw;6;j#oOzrNGr~-(pR9=?P8#g(z#nv%^z&+^V>t&V@%( zsT=MI3mU`X@TBZkJ}-v9>5z0 z@bQh+0r2XH;^b+BrB)e0PXG$TY;^9EV8kBhjuBQnfncod0dK`_8h%rd9mGg&OD z2u+VWhI@`G67YYINslua&R>LO4FFj24F+mak-=@)%9$%I|5kA-qH~~f=H#(Ov0<0X zi-IF8=9NJ_g&H3`dvDQJd}RY2>2R}hv$J)z!QE{1`j`pA`cX}(4yNR^Gf2Mi7|AS41fW%=5@jU)PWL%29#$Aa8SNx#>Av@WFF_5zl&34~A4r$jHTA)&4tZPAv;+lHT7CxFYV7YjK=O#VkeR^|Ev2BXfsx?%yUY2!k2x z=s^hgs$z(SY_Eh5e7Q69<9uJEYh{LuSlNF+Z?GurY=cpI*BCSP3EI`rY(%9ZwJclv zuc;r19YaVl94a+&YGR!%s>yv_>!GtR#F+b-JS3DZcOl4TWH3hvNPV(R6PtC(L(*2? zLq5$gDI0n7XKf1c#@a9#Eh^nShY(g5{9a?ZZy2*D#%XWvRR=3HMR^UJE!7`I2J!Mt zt91!uFm)8+K9KkDXJ-iSkM{&~Pw!k05khx|$MW?EOk`14{^+%G8y;ch@AlK`b0>dv zT$r|(&z-7RyFfc5a4?1|L{~j7TqTBdAtM|bdGw{uK^K;coexdVUpk1I`TaQROP+{Q z5Qp#LbTwJ{w?-BG3M7nRW%qF<6Wj!BbFanH_*-Tk6%yf2+6LF^$>1 z%T05pe$jmpfiuPQl6`txQ{^ZNmEKMr!$z)tuqyX`kyUbSkK3q>syhRQ5nxKsENx5 ztR>v+7^GrY5FprBaE6gwz4DN95P@fDf_l9HNoYjLWjbbAQh0Bik#9r z#-=={_&SUU4)~i1yq!jD2|kq0^sY!*3HZF%d_b#CZ7qK z$1Ci|h;V=4!I`^nujM`<f8D2iy8?L%<2j7eB1wk>Q&jGzLsaA3Z zut~>IewT--3Nqn5gtrr*fU$PcIou^wGmN1Cq-JEyg_6Qif7@lcTrfpAfC8GUFk(Hs z>6T22*^46CfiJiXj<6mZw9=Y6W_#0cAaO#9(3TLcT6Bj04lW?vrSi- z_9D;PqlwzGSfP$cYzT@T{x<1Z?TCKwnBPg&txTWP(QXXyC{_$|;EEY0ykwW zK%UV1%*L65?ehg2TORQQ6tEY-2!S6eFu2!i=e$v7Z$4|b^fj(lk* zC;iq^^_mIuvQe=rwDl}SzmdA>v9}r3!~U!6(_Xw~m`5GqcEGlZ>tgA?N|$3+jptKN z-|udFhot{ACKC-fh=`s)mlW(+SA6PObL}>??z>XuMyyd^TI++cR3)_;xuw3qw0Fpu z*Hi7-9hQyVxX_!c`yo@j4<$oVVzboe0U8ly4>X`m<$VNPUU)Ai#rtG34g~_z<=J`Q zx?n~t${DyX73T(L=Pm2i^%L~GGU|QN_bJ{bvC4w&@{zRpf*o2++CagRKBsg$6g}z@ zsAw~9R_;hzY*LpaA zeRIAeQ~Im7*XF>`^2#9#MV|bDltPi}ixI&%Y`9Vflt8Ac3Vf7+VLCmr0}dsl7Lw(8 z;S#_iJ8z{NZu$dWcbv^mLP&8iQ4B)=9ev)r(V}BfrkKM|ml@;0U30jc6VVuZz-#~+ z)I5}?^55CKAx&wysxW#+0Wc|f4B5qJ^#v2#$G895wANl4bkrk^d0!$EX>=pAYV<+hoA36#TzjupNKT4r0?Avzl#%9;aravIMDYOk|`DpKg-AXwk`KD3Xs2fI? z4l@9*RHDpi6xX4Prav>1*W<6rDvFaORDl2aNYK={B`r$ z8HLo2XO;;i_U^=(n!{$l1EhJRX?W+JX9byd`34KxM?P;K>5lBR{(1;uEu3bBsRPV} zd9m{tNC(hhg=BPZ78neE-8WV2db3ng(XbC}7p|C}NUE+`vevTE)bQ4=P|p)4k@d!J zLZ=7QJhw)PoIAo1)9@Qi@ZsXN2bg*vBkk4|@gPZAX3?ZSC5Ib-ZQB5M{Uuf84Q49JF z8i&d4wpgFBXnj=Su0Nzk%~GbAU59VlP!(>QC4D@vV`Y6580t+xrS+TiTz1u~?De|S z#+&{tC7PNptNNfaPH~+t19l#Q9nJt$Y-Wp-;&b$qq|*`Bw*wwa%%+lHtj-d)GG$jGKWKKfpHTLN@N9h8ySKgk z0`rNzN95o93L19B97o(+T;cS7`hG~`yZiB|-KVjbT$fo378@J{SXKjwP&@A!Z613u zvdVkCb@xlz3G#i>3ggmlHDOIYUwpwiFmmLVdedl z+M%mN)~_hMUYC4puIqhqst15Qw>N7R@7}Vk>TRQ+VHc-%HmPq= zuxmmv+_NBYdRig>Y_3j&4yCnX7z;8i8neZ_V4WQ>Cy$o28R@o=P4oHLyn^Esn1P9c zn|o3Jjz0SAG*3uZ=!tSazu0mxCSb9%lm^2@KYc{i|-PN_JyHaI*Vik-sLD8FWAw@B;*e#T#<*;K&Ohr>WteFp zV87!Ti*HwBlFZC^$7OGso>x8Df=>teNwu4K1$VKFfW4;iQ{V=F^=mnR9w_ltq8H+o z0C9G)p7*$H$3~Y#m?ng&ygWZ`K2L-&8=!KU%T0OxrV4YL*@3|5c*-Hf-%`Ye^oAf^B;Ysfu zFIBFw9STlI8r8bs@Ar29CY^^U6 zqaWpGr<$mCj5cF=r}I=>qzG?ptU@#i4(jFMCRoGPL3Fz{%3rvJGsur+HgR`LztQp! zZ*UPPoAWL64l6o#76PDKH+GY`Yn%$!d9KZuXQN&lJ7UV1%j5KN)mswh7_Q}iYLEXy z_cL6Url;FzbkbIuTT+K=LWUumxkAH~jVFY~tSkH)%qK+Nc81f6nFWgaM!r4=in2@l zh5Sr0GWAf*YSjQ9+2k(fDVKhq8NU#i2&mM@0!LNJ?xPAHza8WBtN&DmPI#r=^+3F~ z(t$$>D4zBn+BIg$n_uA%z1}AsdFT-q&j!$F5gWo&A_5_-8Pol+2oJIOC|WFqPxK7a zLTG9&0Ae`I3Y<$KccNQ=jStnI7VM|)Hdxr>Gl(*XKY1rIIxJgVMM=Sf-Q~h3{tW60 zO5IaEw|VraSjuUtjw|C}FvpJW4Kl8d-SiesIeACSO0FA(eMw7|0X0OoUPzYWJ;HvL zu(UNHhJ5qe_nLH0HV=UhC>AH-=v)@!jG<76ogE5dyvWjz_y<5#MIU>SH5xP!D@65E zg!Y)@$4dPu(&1BrS9aYGT)LCK-uD_&R0Qg|=kV{HoBS#ZU-ID(_YxZ2BdO(z^-={L zZOSyLyu$FFRiP>IS}n$mTf@ho@&KH3N3EpWDA;lE4t0t(fueiKkD@ljHQqtf!{{5h zbg0AnDJ5$x|JafJbKejP#C7Ig)msXkY%t6?pC0Rkqmtbl%;-mg{7%>82WXO=r_b5m zb)MOJLY4Rz`s^vQKgSR_r{4g?8FPAcfl)$rSb)*{eZ1y&-DMpJ3kY+NqK>8AIG6R5 z%#-dx(mgj~Vj)53;<5N6rH}u<@`1eg<4&j30lnF4%NGwmum#GHMqc;&TH)^u?i3)RmkqM)=%a586Azegfw3LmFdD* z*@Q+vSq7kAqkh47FJ+WGnzsCKDxg?pSnBnJ_1Aa)0O)MnP-nQ1zNp$k0p7i z+J;Boh;ZYW!fOuuC)3ZE@89-lG10a2O35K+gUPe>ql^0_#o5f;<@?DP4od=zV&@23VsXYs9x~cX&ni0) zwPVsf!toKB-_4Q6%@DxU55LQ>H=N0&q{F>Y@lELR2Mix1_F zs0*?a&l}7o8FUVh6kx1?Q^Ww38&$4C1`2a+yd{Uls56EKNSN=b)FQWOD2Abe$ax|p zJ9~kHGEe}B;_m>O2UpA<0e&%g_VpVBml8xLfT#f!zgWg1Y$)^Yd|Q$S=bDI->|hM8 zREHd~)P+e0C*g`WKgQ_uGt5>=@w!wM&K1@43jVh!E)AfZ>F^II zo*-;syPp_NMRZQS$u}2BH|a)s(Rl-T9d`HRt21boKP-_B8-rHq#q@WvhMBy=I=zbb zwh6q~`P^w$=r)H3%$f+dWn&4U+v~~9Am~Kd*xQnfkrJGTIL{;MZ=(Zw{)@z5_{#Q4 zqjp|yog0i;R*4`LDE;&>1MpvV%B0b~+|`Rc&C30hM-1j0hE~KGP>XdDXt$!u9R9s~ zeN+EkX7_x-BD_YCmigM`RV$8M0+K``F_~5F?-)B#6g5zv+sli@Ial`UrPU+3qc+&O zbJV(2E$K`%+U+L3|N1QfUKcU8AXF*#mH$@Rocuu9j(BphH0#k~0O1V-#z_Kph?IQs zn|`D*s9oGXSaK-oOx2v)fxMBAApFWO>VFPnr*{|sm%y=KFg$VL#Zs-*v-!HrFxvOP z^&s%~@%k2%hI}VKT!=WHk6)dn$*-a0w|LZT^2@k?duR!&d*Qx&YLT?^?O6A{6#=gQ z@t_+XPw~VAu%0JluI_ch3)qduA_@NZ?6fdOUAQ_` zX0Xo4UwGgdAr!Y5KLYrf2fNDCLY2TUCwkCr%)Zw7K~Bp6zo#%5dwJ_N#A$jRgx5)aP#p6{<#a> zw7C{#JV@MtoA7e2%y;T063o1aF@j^OEq`3tyken)m@2liHDuVhl4C~W=LUH4t%E)5 z`Yo?^iLq{BBOVVg9Z6Zc|HL8l%UXWO#L^E(l%ox85f+TU7vx^R8!H zwcjq;E#Q;q&K68SshR{C$h>cAF{?%wP!_8Qy^>$`M#;Upn9bHa$JEL8+i(1gieR}< zl9GNJ;vzRo&=BS}JDyf72rR@KPMhxiGo=-EZBa7Jd=Q zNq)Oxhfu4V+!pm_Y&p6{^@vtf@1uARnA}hQ`dGfGIQ?nKqxy`XYyl%9J?|>(j3zw& zdejGNu6`H#WQOGI#?s~}i*6WazpzS;K^7Rja^I~fHA`67@Vap03u5%1_G{bV6Rd`i zHJ?wfEPv`3zx@3x^cbcDr|n6%lNvuaa-1FHb>}YYK&8sudtZmo?gq&XnYTKHAm76Bt;VM3$|D^}_5O}JKo+^@n;<8f2?5^t$qshUYb zc=3(~;E>Yv2m3n0pJtMIJPYhRL;B|Era=vh>yMiWZ43i2Umrx@TeoQgg)<_k->No= zWg5gw`#d)r&Gtrhuqby<#K>32ekGcYM<1z;O1+}O`kXW$m6{iCJ_dz!iKutP8?Wr; zGnHtH=?Jc)P_G!UvaHm59<+d7lLAf7Jl)lp3stLPpYvaa{oUi|J#KM97*jgONDRQkUlyj&wNU4 z4#*ZPa0o6_z=aT8g~qN}<55ZToYtI@>J%)}!IK=-v{8tT(lB{^BY)7`k8d7ft@Bc|aRLss zg(I@!79;Ex0_Wm49+lRf@kr0&^@}rKpW2$c_^&aK89{8guS*Bz0Cz!?ZSUvH_46tg zV3k|2*~frXI5$(}(DwS8xR(|#5oJTm<2bHc)vVDltd?=m_%IcgF&4JG6<5;~b#CkQ zD2n_;)>8^*q`bUZpL=o$Pg1u+_+Jy|Dcm3bNvhd3kEQVmqkPWps-RO~`YEY_SiP$6 z^si5asiq@y^RH_9r_yw|oJzI1@f)6Lu3b-;u|m4$1HhPu^Mm-T=bAYws8T(*-Aola0)l8HWu8O>OeS6FU=%w{tDtt~Ck@Fk_k+N#mKZ z8_lx$domJ@YNfV$0=s6=xLBo6DNFeSe7ib)wtmH_gMYDWi&3wAF)X--K5bRo##@ zlH~D%SP2k+O;}70kNj(3bO3+VKl$fYY8)3Nw?x~GAa2tHt~UWnc1TS#H{}hWr!Bba z_%68@BDJYVDs4?^jRhX*O38K6o;-CBZNTHxQu$7}57bQ1Vji@Z0?jkkOm8sKj<3g4 zlSWmzco;Wgg^7Tc8$o-h@Y3);U|_~32uAo3P8gOniiU>hszvi9i)KQ%rW|%ZD%Ev} zX!ToiyJZ1+>s+&r_C1jCo$Wf3O>xQwu026qAD7c-?Tv>93{hP1 zx9KH!7Fdi{ehT5-#ff~szW`FNYTL_+;q6n)B}RWu+Xy2=65`7l(-MZY+IU2VC--$q zl2Li`KY!@WO#w+bN4>oF$Fvk&h8{bI z@q5dnvj*aS3^i}q;bDxtUk=-*HtA9r>DBj3BK7EBdv)Bh3(}~T#sh3KqMQUjH zD1>j`HuHRV#Rh5#$9dASh5^UB?aSATRloj$;C09JW&ybLfd6)45`O{YDHW$az;z)d zZF|2;69-Mqgw>YPzdK54LbU-P-Rvo)DWsV_e^r_Bahbsg4_Ihw`e=Lv$e|&5p}3~9 z)yg&84FrVusXw;{hJGUFUNW3nF1h3&x$8Nx3^9yf&cfq-=U>X)?Rk~C;TAmwOJLU95ko+o?@IgIn^0}#$s|I70LT|iZ6kG-u5VOvT6M<^|s zexGQhrr0F@I$2{ z;ji^RKto$fHMMuwxs8r2H>p7OaB1Pg+id*HQ+i2zPZwQ;{`XZ%}wt31N~=M$$?1WhZ_qtcC2r7T>R>+`D4Ea+iyppL5_8l*x*tj}_Svp%_5o0agk z9U7A7t^X%YIF))l3kpk+zUUo@|4e>GxA|AG2DDq?J3W^n51)NDFIzT?Ff4}JP4)2A zYL~AXXRXY67U7a7a{_!9MO?&mwBWj@om%)0Q>A(brL=VLO++cJ!zFoNR19GMeawY_ zw)9oiqBTCrGfE31|F*5nOKBNu|NVT1H{Y!)s75Xgd@5t5@hXf`*6UE)5cK#}GN=mSt z7ud}sPQT%=F#>OjN?9XY>x$vAg?%cc=QlR&0Cf2X7`ck0MYt%zn?7vs$GzwA-&O=# z9Lm=#SIrGRMyn|AJI+`8pK32DvH0uj+LaRz*|wJ^_4YF8H!UoJ63by$&9=p_P$ zQ7|gI8>`TrlxlN1zn7(P(aI2IFs)d{u9bOF5SG(2`&pTN|E=sf!OfQY43U(ceA>Li?Y6b+|Vw_jlNMj^83jRy{4Fr{YBNWFAb zhinNq1bL#P{CF9J75?K?ebx40+)SE`RVihC-RO!#3Xg+i_qzkD@9y2xr#4)g$q5~r zrtdb5&H8$Ec7Zp@yn~fNT)2y!sbc#&j#9*;FK0ESlMJcID;eKYVBVXSCjHb@;$ zAr@!rDDb&1*|{#YGM9)y>?sHrQ`r@LgyD?Lt3is%J21Kfuq>82Q#88F#UCFQlvh8p zy`@JI_Aq09xmVC%7{qSA-esT}zhYJeb^Nx>ym;A7{ew4ir~JCSQulo5ERNTku`n!F zbGmQY0~)f&!pSM<=s){uJT-F)^f>iEP1IP4a(CP<=jZQQy|S~)j2-UV611_5mz1+8 zRda4Dv@7J$@vz{&zgM``Og6%9 zM#Hw_kFUCL&jy5qJ1raW1~x$_xT!RBFrBkWi7WJ;E2MCIoe7}ZLN`{w%oVyVzEbap z^2wSR7>{@52dp)9PS>_t zl|-E4%RbZlhI@~*6H8E`NH=6)iK$<5@vNx($;HUlml~OM2-7t#^F8Ku+r8p)vJuXM z$&+XRJdMd?svy}!Pmy(qsD>>gqv;x!Q=$tLkza|z^E z*yBU~NktX*s|ObG|IRBk$ip7z=DAr+`WF7<6xrsD>Rr&cFxx8!3mAS&>+)#fSFOri zK8wzk9%I+uv#H+wqu$23Q$Bgv`QV2_Vp~Bl0BLxtjoQQ+-2`gRWYi=X&*Mq*qv%Tra^y2`|!*5lee0?vB?$ zdQ?cr9p_c4vIl+LGd#ShHe9CPXUazMhm7BlwznEjQPK%lR8k6eb~xW}_r#qWvHSil zQEbuym>JtwEXg;CQ23&c28PacGv4oekG}co>d&cViy~%ADWcs=;_?4Pehb4(2M+6V z*z^=3&iU={f0KVX5+>^{3TEvfaBKDigrhIi-Ycz+4S#=l>vqk6fSC2Uo>-8XVp({q zz9^-+E?vPdNZ+CW&tI*F8!E#;S8I;%!G?hZzPb8LfkkRBhXAqfSrvs*IBxWq(mdiN z+tZnihO0+L1t^k3$5mLv6+tqzKuv8DjmWG$fX zsU)WjSnEWytx90!b{?1N_K>1ajm^fVt=&_A-@#nd*E}hA@ZmQWIpoZ}A&GqW+_f{O zlDi4yPUIu{rWNleeJp;<+@ZAD^w?{Kosq15g$pc~ujl04F^Q6F8RcV)R*NT8Tf@S= z1c_VEpCjJcJ6RvCJXo>p(AfpMHCLWB!}1S@Y*KmKNG55@xFY`N4lvL7L`q#jQ%WnR zEDR{0G!SCutm&)Mc3v>dryg_*w({s@BM~jN^C3?9TZ&y>jwWhfyASu1e%#mm(jpm2 z9pXer0B?zmS6nsTO7N>vvh!Rr=AlHarBWrM7PBQJ0bq=zZ|WVLEcU?d)ID1q z^K7r5o!bO4E#iSe{j5K=nTJ>uA|XWb{JmDpd6HAuWF_y8U6t6}w#@;LkQ(L?XgBNt z(9+V&+g1w=^wY~Er>jq#$Cla-Xh3v7f$4$%?R61dxn zU3Zi_pu|}9f(Gn5;)-dAw6R&L=cmkV5>1fL%y-ihJPKwV)yP1o%f_RwP^5!G3uhrL zfmefx(3!4VAsp(fv^PC@Z%N3@<2J@_!4*NF@fj+s=D`vHuZ}*V#Bkjx(j%TKM3xHU zsWYlxFAxJ)93dQG_AGCvo^g9RpJlX;xJh>*@^6*7^ua}ll-%krDBngN)bHVUgs_f@ z(isQJNNEFvE%HkH9$>#cj8s&`Wi~RqK7Iyp4h<6qi9CcIRA+R3r~uIAh||79NSpZy>YAhw@OhgJ zxU2Yl=~Vw`c9#u9tQ+WLZ=h#V(HqQrKqIYe*HQM$vDR})s!qi=aspNV-{hsjS%0UL zD`+dg0AJ6`wUxV)!c@5?U9DGKP|HYnEy0xVpWW`kJ0EP@r#-G5xMSOIu-f{kkUdu} zD?6b@*~DGIZ|W#J`t11P1(MXtulgN%vsj4)T?N<4c98oN+Uoq?LgPt+!EGfrNMvO) zrYi7i-kG@k-Cov#Qw5&Q3$BC}I1MEi>bA`UYlV5QO$u=Q=%S2VzBiJpBaxFo54iBz z^^`LT87;(>Y#qS{$w^0!LI6I+J9A?Nz?XD32)(WcxmSgle@TDA)Oj0_CA}ES20%}k z1hERzp#vs>*3^43p1*Nx{++v0TojDtc>6jr?;PHwtY#$IAz2=h!;*MG>+NQQgCD2p zWWA+9@4Y;`()YEy3?h34rv!GHk|KLY{=GG)G)K_k8+R2&+J31kdog~zyd>Qi;@q3S zTZ#}ks2yu#8iGC_LWp}-I;M)!m1b(kwysHL`U@2oGkrJU$T@5>#3 z+2fVLU(m~vONksNf32S?Z_Pjw#oXy+35tq!@Zr!rSt)T5-=)HME1IIZfDP;%@w%Ao zDj)k{`J%fsY(>v1L^_12)}_GDfTr>R*jW`VsWN2aA7u$1{^|y!`aWNLyrRni3L|>9 z@X4~vx;cu3u{tk~d81h{>?}obJD>F&x5N1GF2C+w(M4F-{0}{3f_T7X?>(D%{Yr+$ zPxh8DA&_%Yej@Y!hibl#S~^f|##A1|jKy+)=#x0TP}a!`IjDoW}W=ykc~;W@7}AXkZSQzAaYEDYZygpUudqd;OW zoC37=x5SA{frBp{E-m<7k{5&>tTTcp7$cCRbb?tVElUld%lP!AAl{jCyntiq zJ@I)`mykI@6Y)(>Nm7=U5p3+}O%wEouuS+UPi!5TVtrEhF+U!6DybQ}A`a_k*{?Un z&%p^Y*1R5ZyfH)~_^fVwNjmvKte<9sErRel_i)p9abMH%MH>8f>zWnZfRu`B6)0#L90enWc5_ELVzZa#sa!J!?c=h;?j?=H~%+#+LD zJQGIOSJ0cJ_-?jIljM_SD`F}QB?L|9wPU8x-+zTCE>(_`>bl`Nqk|mQHnm|apIORh zdP@!Db_t?=?5XZl@~GDTX;SBmKxP`q%mbNeYEmg%KSXVenAnuAEc&jZ0DS$g zfc`wJpP@qE{(^+qb}nobkBwffMWw*_9ypvTp|>v7(P;pp!igSwk!V+kr%35S4#7d} zVXAnNu{~5T({YT8=za%nc^rT0GcsO#v4vapi>u=YAngEDB}~7?*N?MDwb)iwh4~Sc z$V&h*1!P1{5T1w)Df-M!6NX9BpxHR=UeRagIse$o#l$?<-7$IXd zhdlq_mDqnd#u7i?b*)Laj#uMODTAymaVNfF>%U?<)1xj$^v|EOp=U@Qhop@TJkU8t z=$4!cHkRi{nnB^c)Iyn*xwJ~0HX&@&;({|+0d@Ecc42lGWmbvrv-Fn>!g8;!ch@3K zwMdZ?k-*jumhA36^Qe*yE+Ed_j_LtwddKSP5*m$-B>Xw+EQDHcje;K=jBXqA?Nkr4aY zOmQXipu}VehnXsgf33X0=Gc?ex}{1d)!~!iUpBU=boTya)mD4ezG&3}Hvn174*a!) z(xk&8e+Cpqo+tKG8+SG&bubp26`>kq23W0p|KZNEiL)@-CxFfvd z4F(-YvCGr}>p;T{??bMj;i(B@=ZTD`S<{&k%6!SI0VK!gn8)4Zn1D*%Pmz9^O^jv|pJ;2&t}95EVE>Pz zGYv~>>)ZI=u$fU2QE>)GG*cWhXE%t3IfSO>RH0dES&^D)S#2govqICdw8FH~Mw+$h zfJ2#;kqt*1(PO1O)zPNMIvt;v_bZ>cuC@2tYu*3*{@t6|ijbD9&7E0~P0iV(l|G|| zBj)D4PQ*%Dam3YS*(-|)9*3NB9KXCfQpv#2t~mVb_+d)PgH@Y17A|?voff$ESdAFo z`C1d8)sv%{qyT1uoN51?Y2Vpmp@8UZxpOXM)B07U-*Uo@i7h#IF=VFYjNY_RYb;_| z9v?+Nd4^eE_ve%WU$9;XHLGaFOiRGrQUo5@tbNqUuq@G1OF$b9VEGyh6d6WuVp|~F zw(b|D3$eYGE&W4FS^^_}9Uw`CPN)kDtNMP_tSosdV6NMIsB}qqUKD^Kug|_6PBXv2E)-WxCR|wz+0+U2kF)4 z5c$$xd?)SFXSDEZw6LpY!-tpvlird|nI%$_^|EtU-$pX?-r^K57@a`ZMh@YL z-l9Nj9>5`8f{px{X1>=fYBYqL+c&r>%yyV)k%EKa#53>RYs?c@&N343{n%3Y4!6+D zvbbe#5qQp}#pZ^IB~}R$=aD_Z|L^|m7FO3Gtg{HI#!-~{o0=IaA6>$Wf_V;eq4i5Q zEC3Q6*8R@Y;}nuM5lF*}Enh$008V;=HM|%^9rLZPsX?CJBHCDvT`h`PC9cq>m~R&> z*kgOQ&(6NQ^P-e~`0WTyZ$9v8`VF@GTXW$0Zz91T0|JqSRmlSU@bWq%>Ec^n8Q(iFCtqk#jdA9j{j!fT734w zKhzcp#^PTzilM9*v=FxBsoI{QWY-`&CRP@sN;@K0H|MX+ng1S_cWgl?LY^_au?W8g^2!pMWq{rV zqxc%cgg-h9{WK71uA6?}az6wsLwV$C(pJKW8(|a*HeUyCA3t6HU8wz6*fyZRBEs&J zHBg;~mAVb;X5hGp^I{Kjjbzs3Ad1Oju-I0|@)-a8nFl2i@lO`xXx~w^d6O|j8h4aj zoNqZ)jaoS61xh0E#&%~#dN?QLw4AiGkGqO&)w|+$-lMvxQ)Z`E+^O!9zJIv_jS2Tb4zM=9-VZq-s_rhm?xHeYp1Ycci;(MzTHte zgr5AY0RQUSNz3elr^j4~!{_2<{`shJ&-x4T&$!uC2ye!W+x}!_-M*dn@Z&F@k7%Cs z(^{$gCh_*9qAv8VF^XcH3gqI9 zhl7h1UA5!)6|<%*u1xSB_euB8rPVu6vblHhCIe9l-6)4=N^y*S*}mNgGPk?FNlBkR z$W@RnxFWYm#eVB$*SFNFgj#Va4n=Sr7p*k+tsuJn zf_EurpCt@yF_h54YzrFf`83sxqwed?ECi<`cXB^76@F4d?d`d0$Td!=#TUiST|fu< zE4J(PD4bX02`l5kalPDrS;0QE@r5%#LDCu&1wo%U_6%~YQs~q3e32;-?_bl>JZlzP zs+LisS?A@rXwmuRHm5l2?zZbe=QHU8yExbl$3BQEjB^#WJUXH<<(4sJa3n6*nM!1K z&J69OGBtO=ZQn)~a5=>3|0;c^J#F?xnBSGu&Qa5g?q6Vjx+s+V9RK=I+QA#9#$7AV zS=<%!y$upDiQjo<&em_Es`=;fDL0dT3d~K?@{gqu`P<~l>C5U@rq;&)p6WGOH_z&= zN#Vc3KTS4$xLoNk7o6v@uD3DL{QG~bGL9)tth8I!*&!tb zb?U8-I`rx_K)4wUeQx{o8}g9X1gs`D@D;KGfoll zR=H%Zhhqoz=p73L$myjc6FNCPOLQJ*sYsd z^5xT%^^SNW-x@u4o#)ofv2s>w`s8@XupC)ntwOu zKZaU?Ly!y6Ie6fY1fKWtKo>ma6w!1I4ZZ#6DbHoh*GV$vzQAX1g^Nnl%s!9ZaOTJP zW$AlZg+6FkMK7RW?g*MI?PgDKa9IpAp z85da;>ObN$1wVT@4?7o39PY1M{1LUu+ShW?;ISyL^|d>rtGaD$fAyVcpNLAD1Wncm zoZcK0Vz~6fl;;zzW7=9Tu3}hQ|Nh1`*c0bz_?_HBvSyEx3~h0ac~j_+2YRfl4Tgcu zCi2-Wn1)MWvqmEieoDsAJ9VH*(0&omR@Fm>+cptK^Jqs&4CFZEtej8l#s)c3_LMfd zO464ypqBi#d4?3usDvT!pCgy9T7CZn$Z|4}VQOcJgD9cYlk)y$C z|BbV+aS_gGZab8ZB6n!~4oBM7gw;0oV%%8{KL9(~VSe!AZ4%d$7abL8mNd`8+MS^i z7VWU;=!^K(orlCgp(Cer-=O<#`P5D{0rmxJkfJ8V7BGWmJ!^AP#B3 z)ZH~3ymnB`v-8^!dtHNhzD8`4#XzmQ3Cwf&{Y$(OTq!XK#KI&ff z4Mnj-3Ia_EBa4=Sr2eDwkmL@MDWpj#l!g$Mx1~VFYZginjhAsA(#pU*{`+cn&4zL@%WK7c) zLX=sZsxt936&G3BZFvyD#yC5g95?7@u9GO{HiI9yF1~850DXK2z&RKjrCI4O{?34* zGMHz)N+%)t8!#3^Eh!6jx_b#Y7kd$gC{HCzwj+vU|}V0!;EN+m}7(ELgMNZL!YBg|Si)9e;vipD@g8+eu%w)r{! zSis$U;kvFZL&48hnhs=F6@FS%d31i>fg0a2P_*s8s7qwa*|{*QbFuZ0HffM&4tdAt zBIYWo>nKPq7iHRiVG|d>RAKxnk&Zu9IL=NE^?0exO5z~q&l=TUec7&oNWeNaRtS(q zFytu!O%=vF-FW`PvnkFT!&Lln~PTlr~562N}v$R=nr94oH=vR@QZ}Fm5~e zJ1;a`h_uVt(Dvjr@1M_~YD%V7zOkCWJEA)2ZA)?z%j#ve$F{)U14^kPqQ$rhpUIT0 zA#!Ogcqay@#EC|H9xj&sa2pDyDPw& z^PHjGa;sBcnVZ~VD0Al1$6Pn#>Uc5!RM%otqJQ5;ukJ;&)o5>l1pS_dyI7VF`W$>8>YZn4{5~{#rDYBSk5Eu6*u;iPK26$nC<_tIBqUrAFKrR98mqJkReQtZiNP0(D|GEp1Y+o$2V4~ zu%8l1zO;00Qi3yM#Z2Y8(+K&2V-?4F(M8Jo{f$H|kl_qc<9j!I*XEW{#3?efg}?^V z%2JbgA#Re)^dFA7GVj?RD_z>nF(=9dg{z{+LIoE@)UW6*&Qg=r$)4YrE(h>+deBLY z$yZ^U)INra=~W^qN`wxh;P$}b0m|?qv*lklS$UZ)FL;~c%SdTW1>g5WtrWs0Dw-iI z+JK4M#zGC4q7u{gI5AL6nvJ8{*N?$<{Kk1XZM%&hJJXK4{)iu>!Q~)+=ccXWqOfzv zxz`!91X&O0pBowIydq7xW7yTyw#Di(x^uuOvuMRPu!sIyO* z3`vyS60>|tu;iIIBOviwtM@Oi#9bg+B-wd$SaQ9+e$Yu8Foo{n*DMs)R0yGHr6J!+ z(oEIZTqG{@yBTA|Ge&vodb%}|V;uu3ZD>OZ+BkX5wE?PqFQxZL957c>;<)(@V z?RxSFM)lv%%o6rBw+CjPY@}T%%1m{IBH_eHq514>@5}u=VBNbt4`ND<1vihWSA{C)&q4Q zK+plk`#^sIz7B}E{4S2Ef+Q+i1sm(c#w)u57K-4zEjUYmtf3&@vJHAO9ed_#>~-h3 z{L40a(4Li?b8eJ_2)5kN*jrT9e$Ot++rMMY{&lC}CJT^siA_3-G`R+Cp3*cC@XUm9 zY6^U8`+jT^MJ$;w#?9N1csuF>I&p)_am6*qq+0LUwCR<q;ZpFCi~HUS#6>2 zMbB5FBlY$FDZ|H#8Z?X}_qgOgbc8b^QU<$mJ3!baM9=ckF-XkXp@=&)RLKS3AfV{5 z7W5I~01aU9e7W(^5`bVJ(R&Hdujiwd!kB^1lfi1t!_hg8GCTGf|B1-$ewsZ$JD2#g z(S8QxndU%rt8cX=JZ1{BoPjMxT60BMm6~*2mt#7Wv+@o&E^~NxyVkZV;hz(fO9avo zmvp79sWEW#+YfaQ5o(8qn#yK+w;1`h1RrCGKTubSbj_E|FkOAsnOG;^B&YljA^Z_T zXI&DkM~}PvBm|wPOljS?1szA3#pEFPOf_b3hBT$|G;mP|(@0Tl%vLs0u7vLJP;(+X zf)-otJsI9=TI9e01Jx=joHp)uaydJudJKKVE!AI{8mt4^zmBf$tVmhF+KxHRn5t*6 z_uaBi3!TFH!D#pW;qx@E=d_qo1XqsWs)YEt7T8W5>Ad4Qe`ktMMTy_LgQwNRwXtny z8FLpLG(9BDZB#bBTXS_H_*ilV=7=&^3Y4069&;EvmKd<*QW5R+3{=2?60;LiABC{t zdUw6#M!VHPrcb!a3fF|j{o`8r3ti!I!gD1$OotAbf~XLLz3N@0cMcQ&o{np$WGQ?jOn3;t9<-+;PDHuN?PJ)2tN}Qz&K3oS$rf`-*e7PQ#HHBda(4`0~cnW<95m8KEfiYROJwhhg{tANx3PwzrIk zz?lAyV_`m>iNbRs`74*TZTRP4$g{}}Yz^8Qmdr6Y4*70_Me2j+yPCGDBSh+i{N%r? z(8JC)(elGds?)30n1dRhCM98&5}PN)t`d@NiJ-+IxKahNoY7}TtxHiUOOK&vui)c@ zrRWkpK-Q^_RD%cupQD3HDfoe=q$8}=5sos5O*-N& zC1#@^v6X^d)nx^XE*=?;XsC_xTxkWa0mZ%D=}+TPcoF43>_f@f%rNCpRG0JUX4GfZPy1a9?)*xN*vZ- zwH?b%_ZK_=&K)SoywXRj*&lKqUv_Z@<*G)9>v!Jje9(No=ip9sxUhHLRDEPY+C)A& zScLX9$E^Je%v4`HBfQtFBWC507Qk3Bo7iT+VF+Vj_WT{C`-tyi!cPbt^r$(nfD)@s zTVY}X12{Ln8`7`oyL#8)vOvuOfQb@Fk^@E>pmQ!rK;(1gg2#G+#sI(+etGDF-=ScT zJOGeC0A*8{pfitunu9ckeU;8A7oKytMzyL*Lm;=EH2`?kgme*<0k@}wJ>AcKdPqZU z7f}<&>*i?B?$>Cgg68cZ80P6_u_XrZ(q|QSFTYlrbFH*`&V!%6l8s9x`$?n4-CgB3 zyViHBLn&MSDo;BWig7nb(8}7*lsP7a}#vyA}*mM=nbum#17x)WA^BzZBe}|$O zmEU}k^7J6u0e);c)xprvx55Al@V~Lmy6w}FWeBcBHAF>Tkkv2x9sopL2VVHHsBAf! z4N?JM79&`QMurXzyf`t}sFtzj=_|hz>p+?i|3ZVKsfFLOuUMC1PIetk2gsT!Qm?Mz z5QY3%N#Gk^q;(Uiy9iSV;Y-+N!VGy3d6UYROaYRQvGKM;Z=`OeUtBR8b(24y-T&!v zDCbu-+=YH(+f{bx&ya%AR|TURxP#=_!Hc^OJ6`gQSKb(yw!Av!vfPoZp%BwGm>ePT zF*_{GOLex^Z(GB*$A4bu$yr`{%y!D-^qGz_w$UD>sNgfuYJrin*-C(5m_Vh4m(P9h z@{l#aTDLIkwO>&Xh!HLSI141Ne{2j8hS=zmvfaiaaGC>R0Q_?y9-|S)7R?^C?wHzH zgxT_%n67L{R}*>xlLLZ@RMEuZLxicRdmh@S$CcEXS7%d!xX4EQPVHD$;S77@+ZAx~Ze>=9&ghay>6uO*3%!S{0kxet5%50e$v;ic%STa!1w2wb|3T-FT(1V4cw0zz3<}<{c?p0bmzRvKPVT}y9(US8h?W~q^Xy=t(U}) z6?~fyiV*ApZ6?a+ut}o?yHlhXUse)iZK9wWEQI>_P zej?gkwxi!Z*vr@;L5!(w3rrltO4?sPx`s8j4Jy|+{_9k-%QkS?y|a(5 zU;X?#p#0CyjVrK5<3Wvj_xjZX?MbV#Vh`UOsKH_LZ#Y;+T5{Cci?Uk@qIg^jhct37 z({Z#$;){^C=wcFGp#!%o0c#-w;Ow4MfjH3?q3c0v z=iTFk%}E`IkxjeU+KZ@GN!mQr2#V-QX>H=CjUy6cTm4BX<-!MK4)u~F6LK;*BJr8! zk9zlKzudUG%4w{cEv$#iF5wZ2&=+!?fNQ3kLDs2aYthcNZS|ld?CXPxeG12FpDKB$7J#LzWYXGUms{R%^C98iF$70_@{}Bjn#MXr- z15i^^F$3r(DGT+IForm%g7{h)tpLzUmG}rD~Im`%91V*V|%3oN(K-2ad z)quonDy*ehhzdcUDjfHK&R!Us51F9C`}muAK`6qaOVhp8j#r8=RC5G((F0X2(0!tL zJHMxX*(^3YuBz|2GOpU~cq!_E|7hN0zlSf&B}V(RjU~GdY!x~^4E-xwd8pB9DL4gr?0!>I`WunMA?X6Yf zqj}xC7eH0r6=7||D-I?xOvP163paA3!2Ro()%?$t@dx=8dF*aHPH@)^r(Fq0Hd`wu z!Ft<0?x+Y6Jm84;P^cuEBPlD$7Jhi<0%K?4%{SYY;D)4r!_}^b z3Wl@YQZ)3x>>^vTjuO?BR^DR$BWE0`fJK)YT~JY7j)W4r3evEyk8_&}SGgFxDcmg?^Oe zrG~i$6g;2jjP_7U7%DTo$IA_5Yb`$d*>{Nl1f(8Q^*W_dfOXk(VAE?cS6w&n?N3R_ z42gl!L^gop!&y`%h}>_erzUC8Hgqj^U%xl8{X~jkN`?-fI@_UyAv?Ml7cIcLPM*cv zQ(EbN4YB>H=SuJREwfE%A?=9)eUk*;wpM0g?zyf+k5jLke+Qn}zYV);N3)sj{r#*r z(5Ld+42OT#u+j1f4aR4}V6OTCVRkv1wCW@A&cZ0l6M!zsYRh}Eoez6~Y1-0MU%@?!LK7TI9;V;z z1m~2(=+t4u7+ffqu2E&9?b?Yo?{zg{{s!GFE+@QXL0|4WquQ7Q-;WA#%&do7L?Sa= z{ubQc+o16_MWn;ve}}9)8OME89P%29)Jk8xZEU0XMDcT37`_|pppEv1&&w%1AIMKA zF**#)Ur1}Ptit!;y+mUC69#3=ta=cCje&J5XPQ>%xl~6c!HsV4G$FnA(M-Gt4P_(k z#3%L3=Yo(TlZ@Al#Yr9z&yRE5Mwkiy2g}ny-@iwR6l4 z!Unry3wB<8HfbvB5c*_N)YCI(_ghmqrt1ZEu4~{HC*>LJdQqs{_uG|Zw*7`}q{KOK zTG}WvAP0 zbbm_A&DV{$pBFZ-h{&(gZXtm|&!*8=?#4Hf8LsbwIoMz!Uy(gwdQfc|&=%}w{|%7+tCI~NR1tn^;g*CU}{Qk@9acb{)i zV&?sJr1wxNhSqK*#oK0&Z42o&bW>y7QZp>>#jz%bJtNMBnbW~akKL6ytpO`;E@fkG zBVofKR?fYx_XJjfGm`l;oiEW{+{b5MXUg=XTMHXyG0LvLE_O}4xYWeGHPLj*r{3R6 z30h8dSs~!$W3e+^#ui9?)tGDsHb+3sVN2unn0OWHYE$rHKz{xMifrKbDF6#PNYT`q z>ZJ}skfaj3>LtOYGJd1jO(DzAgZ?)lUEE)h)CtlyBKUUYK2dXiD_ro!0{8N`kr-Yd zd%78}f`$3tl7+L3!*${WuiMAkgtK|6{_o8xiy>CZx)`AFO*XGmELIHN6 z0=rU)%`fd5&AdPBU4QBCFJ0SnBk5^9{`LMeVWfdsXorV<(op__;EEbtrT}fAz0GUK za06wxY9SxJ)Ne{^xQbZ7JcBcb#K%wPHCU*?&JHmd2Gb6Je31bMDGAeI1@$N#<Znt@eAB5LPI&s$ zNf-Ld&}(z&EWY-8&nR z1}$ip<+Wqi_UqOT3-a5sULU)g2HgKX=zQnpf^9Mr|KE4g4&3*51{uR>ctYaWDNh}q zm8GUheerYW4$GG7WY7M?_F+R|?0`@J9BnX)mj=qjOTB8;sxq10vw_H&6e(`J1P3n%H0=f1eQ?bITj)Z7Rdl)P9ZB?*A|n`fl5#l!NQnPlhtPVtvxgQ}!% zq3*bOLI3%HjwqGosj($#xUH(nlz%HXrQwQ^T_*Fwrv}`u(&A!PME*_e7gH4U&NzOd z*jaCYe@c8iF`2`%_zBs(VJJcHOxStu7E=zhAPxZe08k_!0;*;FSq3JFjuE+F)-+b+ zt13*2u$+me(M$D-x(e^R=$(u{%!QD9=i$4iN6o(m*sjrFe}`^cp3%Nk1sNZ#ZgN~ zW_jXHM|jv?dFF*nMf z#Q?#8(Yq`$R`#2~EE!iJ`)4Dz=w>>nA6w8lGV9J-vPM^tb3uXUTdcf$D*t(lIr=db zRh?A+@=;k9xzmF@gP5^R$OzapCbnq@Z786Nz}GJjl`~aYN|)E!;lBdn1NC!F@R-bg zS(%@KBOI)>yGPPp_1deouECol=KG_-cYQ#kJpFzAvUd$5WTAu`D9>l>#<|_jwhOS^ z&<$g;4c2cG|CPpcVoMAx>rc4td2fEa!p+Eoer^({&%HHOeW}?)vHkRY#ChrDKVlY3 z=JOo&D(uBYxm?Jo$YemgG-lDKJz*l0t4>TL=P;|l{?>Pex|`(Z`>sjDMY6TZ%B^|N zkIR?G?91(|Pf6d_HTIV{qwjglK*zhHTW_6S&2&h)26%$TGqV$OW$D{Y3T83gtEspm zNg-qPgD!Nj4IS+^#o*AT24M3~Gbzko!iU8+LKJNR>@tU{A7j?(Do9vaC_4ZKWSj}S z^LA_gEdI~g37e*QOZqOytW{NPJdQ0qUQxhWvQ}eLz>sILFrX(UJsg6Zpn0Mj%V#i) zhEXE}fUVMw(hqo0#F@@mRwSo(Tf@rx$f;oQ;L5zgwP0Y$>!?ky-+x-PbJen4yGN`# z#ay;L2_Z+M8vHKH;y0b;R-hZI1&N=eQHqLhPjNrqR>qvokKOlte^bUV1NHNA2jy}{ z^A`X27Z?0pXJS}tF9FJnkB(Pk5(UA_)$&EK>;?f|s>gKv2hY=^ePGCq4#luleD#M8 z@oFA>w^bfMRy!<4pmne;Usuu7hov6Iu4ZG>jbG+>R%lk{Bu(sb#;T$L*=+O_`sz+; z82mA`o!lUn*iMLpRiGXFp#8*{mp%Byi6W!(0Wb;DTvV`-KS!&4sff+rw!!{s-dBj0RE;VE^ah-kx2F?6FqA zvYG*ACSF1U@olcSH#U?u*+O{;qe`>#x2RAQ;Q8IYIKgb{V_tL=1X`qdTe4Ut3|Z)X zXr4-9+>a15s!arc7yy~HK=GfuC#-WvCi*6V2lIB_USZw{a)A60-%03Sa@alcqImcE zoQ{^1+*k#6iv~Bb`}4uWyPW&-hZW%A^M^SasNXk&wa77s?V+YEzRL{ct<*EO%=I5c70{Q4i6K{Hf z?~$gxU$5NwRi)Aq3P$&-6sI8sh^*Pv`_Fkufu!AWEr=un0MX^S`R;twgO4b;M#&1p18;EmPoZIz>^v21M?wEKmNJcG>&&G*JOl$vH~@Hyi3wA1 zekVwPp9ofgSap}(yGy|LUI!JxdT9%(qpArZ5GI&-;6h4qGiD2=bVOsZK$81ZmN_N& zEY;2{Zb$Ayn_1ERf_Y>-h|OV;CWg%zU!2@x-}fzgaplvCRj;iYUr*jNDIG*_`ZN8I z-DkJ@yDJwvE*kJt|TKUOpHHU1|f2(@@{^8NQFTbY$y1K#T-agNpcvBRl zFtH7oUHgk6X+8enaA3|isJf?bQIk&&#U%rcQ-;XdLWL8)t{y&KxND#xc7I3GvE%VY zsV8-09K%cBl875>4Q4tD`axDz%K|^I!Bz)jcCK2W?UtKaQtR7Aj-ZTa!r&`x z4~U_G2DjK!mFkOh@UTg}&~rRS#@MwTe{kR0J<|I|9=XFuqvKuY9w@ksa;>4OGh4R* zh-wO@0fcnpR8AUM~G1W$Dr)7;qQ4y^5=M01bX;$1zmyOEWq zM{bCBOvlr9Z~WXliNCPOA!7H@r>%Py&M}&x#%>zUuC!!7f_f8zW{|y$!U-wKvvSx@ zPBRVy#5iZnTMFKJp-|turBq+5Vk+yo@$R+$W06<~L=$Bcp-#bJjms%V@nYVW9*mJ* zE^yxzw^@v_8vo9L-25$`XH#G$oNLO~X;#*jy|Y4-LzM?m*(K2{osRJ+n1HyC)A)Ih z4LnkaqH*C`=QXxqsD}!UHnH#6{x#?`O^dwmnxU$_O|^DjBHnA7Md{egVqxt!KV4bL zDH*M*6vJZ#+8UzA4Wye~24A3<|L5|1olRWt>e^_Vx0P?UZ7aQ`aC-MrS||SF^Inyj zJalQXEO6St<$~phsl^w={mt(#^)F7uTiKMVL9Elv=pDP(T}Nl*-3#V%(0GRbG@2s6 z^t}h~Kf&f3+mDTQ*D><+y{xiaK@T}NO{m2caRC>o(r(`fnvgdFH!3&nOJ5~i=$gLT z)R}@fw5c>ua1x$@a+mX_(g>03RlQc1@*jP1ZF*Tq^67<&?ztY&*F;-I6|nzmO^Dut zDjtd=Hf%arxc}ScA#t#hh|(h6C+7G14EQVf+TN|1%bnM5|GCY%j=w`$PxhZ_gHDTg zOifqXt$(PM$K8I;;~9sHo;m$G+FTxKmIc>IF)#L=@1riT4VO@rf}>>jLBYjX_f}PZ zr9JZTk(^NrUBEdYQ`vZ{9GI$bD3=+e)m-u{svp0UUPp4cKkvUVA!Qi_gfdngIM$QF zrEJ_!6{cG?W+y#^Q9JljvC)(9^IF=I;aU!w^73`sKz?3Qmg}Ls#FzIDl`n6$ik*rR z7j6tLyO8Y;?-#@J?CSjNmofYQ*Os&+cuhMy`GNJZQpvLnOzlj+o0F&$Z#%}Q+f_Pl zT%FeI{9J4Tc06!-HP$mZ!>9-<&32t1v`;2d?Y-H;#tCM+oervQITnCM25RJ7t{Cfd z!;4zak_Q~mCME$nlnQ!|BiD@pf|s2Yacul1T2hn$KAAgP{kA zxhni-U#XQv{;fE6U2VvUWT7qP1Qi}&b9WjORgrebMjo>F4lQrTI-YEQ02t3Ltk|l5 zj6VC|PWUg=On!8?A}@`-&MLRc#dEYWVrHnY2p=hR<#jiG{3HtwYI$=@sqhwP@Uc4H zoPv!z*E|!D?15gljPb}p(6Jd|%j_x4Dz1Xlj4;qv#$W}TXH1hduJ*oip0sAB)yB)9 z4SP`x+9sD$CPbb50pG?3s!%Mv23_&QsKkvIaZP4TSFtsuwYm=n&hEQBG61%+Q9<`jWj8o`U7spiT!Tf!M{~SNP zB4Csx*G1y%h1j*PQS^BikI&sLfOvUM>c@;e#y3WVU)$c|HbiFsO;gFK0V!f7LNjpl{Bw5fXG3BEWPobb`C&+Z+mr_?jb8z@LT}E z_m|juL{vdEP*gb@jSn@j9ZfxG2{%e8D_N08PYP|LR<1>Z1|y|s`s`x*DD(LZ}BTwQ+Uejh4t=TYB?ME(!mv9?Wf0;Ke_@!244o1RDAE=nOp5I$PlHKT+ zeq36d#JzIusrF8{_?rKGyaB~#_6NQqWX#m0w18`LxgA4oS6Z+6Ca|mxgq-gORi7(8 zHm8*>_lhEofK4X$VUgAbo<=8Vjtt{ zHQQ=WcCh68P;Keen23M#w;ppk`9*W}=Aw3)Uv$gm*)w+RlNX=&FIZFcG=eo20p8Yo zE%}0v3GLS8JngaN4L>^^*m>q&`Q=PQ_qhGcE<=p37k@>tGn>-WW1lLuFldl|&03lP z3$WydqtQ}+&zx(Non8wQKP4%-^CSRhblaeRdy2`K0pxWT7IN%a+L`Xa~1I?rO$QPG$ zEwAa*v0l2DpBFN2&NK3ELvH68kn<<1Z#%ffx}QumZD}eJavQeVDBY`wP;SEBNxL4GH%>sQ1Z76%ejJt?m3Lbu z_XB&eZtB2|LX4@x;5GNStumVXHZE3sG^h--oFCt9s06{5cuV z3Z0#Q?g`Ze2rtJt{;p&59B&Qa!taZX(v+JX8^A=4nm4k0d@XYG0mSPJ)ilg zI$r(co~FjNhVv?0>OYG|z2kA64ZN^R8UrH#eQ*8BL!fdWo(qI0m^Hedx83wi!i|QGM`S&p zzS$b{;4y{wr3E%otP|!x-?3vzxUirr1H!h59C(adk@uV}yGJd9WB_hu6 zt`z44g>ToJ%#ENWABCrj-9+8@I?2mGshhJ+fUw73XyfcDG%gpL`ExA&In=hrvols) zOGY>ACDJNGpm$nkAc&dJ;1LbrpT!j;Oo7BQn&Xb|HY#ogk~Q`(wb{yL0L9801Ebp$ zjd@yJAPn%V=)@J9O#v_sFsji20t66fnC9?nQ&>202>;Q_tWn~ZC-rMIpim_Sr=PnX z#1`*wZYH-u@thKB>*S*8j?p%TLwVmz+h^IdPAX6E#!MrQ@}->V2N~79{2JL=UL-O( z^xrO7s-Ejuapb31Ddmj~-OZ5wlqGb+@glJuUt(4RTgbR`I=ODeC^rLt65r$9&apNC zOFy8T@scH?1ZTM0asn{&2N^Y}dBczG0n~eFb_CxxNM&?e)gbfW*V1k@FMXCXRY@at#^es8zrIF z0FDeU9*`J<1z6Q<243Qx78#I-uI6xQ^{Ty;MGz~x&6b={yM%jobh_*Km#F9W3^Dql5-aOjXFr>|Rb^qW63MhzMZ3M_k zfeOnhgR;jNXb11l9Qhw%<=l2o?j*{6chp6WWKB5-YxtU$gAt#(Ud`Z1=+6*(H=SX_ zY(%k;?f@m4K|`R~v?OgmjG@4I9fB9YbbxCq107F0x=(Q|`P~KtoEM!-6=JMvP+o}G zt3YZzjtZCpy=tWL-91ctcL+t|d;{&R)P{URG0LT0_a{OMv|hiZ?h2rwv648hb&Ep> z#-SZd)7J`Pzxys1n9d%%eda6Nbir5*$_S# zX9#%-2#0vYs**@YpgUx|o7vvurC|k4X+sL6&h0&c6Pl2kNXPP?z;Tw9o*M*od)<&$ zZjW>%ch9Gzm<2s7x_15)m#Rmd`_#=U)-VtW^`*uL%2%e28jc?!KlVadn&!|dZ63A0 zAZe|=L&LKCVqt!9J=`$BH1TwqGrHgY%kd3KPooQS`QMsT-%?|hxv_cjn4x^{5e^*> z`t`vV$?|wg-yLgAg<~hyb8%BzD;jKB3=#}gUa+1%ffy^qW-PG*v5JQz_BDuojTm3Q zm#RBTXMtwDpzVkHs(7uDbM~^{svD9e*-zGH|4vRg2LCaejA=xEdu9-T0?k>V{19ve zalHgRb~lFh-{Mfxa=+Jhc*^5{Y#VoUjtu#>P#pJhA+N`a*Aug_+nLYx-qm2){^nF| z!5gLLrsus$o_3T8vB9%#rtW7pqgbWfL!$?1c&<|~$0@hRUvaN@S1iqt4K{FKYrt0( zp!wSoRt*O`#qn}RnXh}sbVgat%0L1O*Ys(Sa7lxaA;x@^rNGrvsj->>2r{kr2LN5f zp+CV}PQY|$j*+w24pz~Jk+BbmVGFs2^)eEOjb9?h0?_ECM)F!iE=8bf7+V8w4P1fI zIsHnCWaPqYMXJxX_u)bVU*f@gkLmYgr~a@YYYrom`v4M@L5$l-o&jBQd+apT4<4=G zkl~y%QrQ#iIilpbG)9Jf>uLIo_8ON=FY|QxCbld1I7H~P%j{nyaH|MCXV>nT#OY@6 zCGKR2fgVU>^L}=}TAga`ST1+3lkAF;>>8na@g(aGN*IQo+vIktKx+Wv5^7*v`|dks z$!i=%v)StvlEe4H2i(M^I#4fL(KA#BO^;9^ic7@`16=xC@K9C=Uyiqpc*|vAafQs9#dil^~`V!T7)2?Vr7A z_yWd%37&5dgsHVrz~)W+=VsQ$zbI@LE%ADfS!O#g`{M;+7JSG2!Dax9Vab;U)RYA+3Sxhj66Dhfw_qPd+WWrU`z&wpjwHCf$x_HC5Xvy7n7HedeTfquU4V?Q)LahN9a5q`k;@bMQ$?A54 zD`K^<2n)j{tonxN^Ep{XTK;r-#5Ac{LrR_`&v2Y)bES;m8vF2u#eH|8yp;6ENupnt zm;*h*!_spfd%|r>zWg6WXC4q!_y6&GXP;@Bxid}scBg$ettw%rrW%Q+1)-RfcA1it zk~>Q)F;a=bv`ASKda_RovSkpmekWN%l8|`P^Si%)`?quMobx&F_v>Zzb;W~kkkhO9 zn{PK2ru(lbT?+76+#WN&#L}|I;DoKixR%w}Vz&Ga`dQpjb%)yOLFyCh7`SZF%A&CV zom9j?(1Tu#ZZgWWq+I7mURz?3z0>=*;NR-&{*(1z_7=vL@m!NFQ z;(r)Iz+%}=@=KIx9XGCCRJUH0_Z0E3zCaM1_3ZtXKXQC7aTQ*W`E5SA@WoVNWUpD@ zc~9)EEDTbWG1(^}B=yJDGi{s&V_5AzS1|EpC{f8d~Y^^0nM zubr6LL;HK|?M7Hs>5&Rhw4~S0!~eKrJdBeXH0S7y4lOWpv*7z*ki+;=!1)uGSfKKz z9J=|+t@3FVmTD+*`IWWLMj1}Fi3>abc`5g`8*%7|;^(;N5=i&TDEdc{QL<5o_#CPY6d)FS8RA7!~1_rSJ?(B%iL=^fnz`BetCT!A@U0P z9vaA&G~Se0%ye7T%KYA*U9j(Yv%7`);75}a=I%>A#Q(C7f2v(Faz?9Oy`4CyuYPKa zV%tL{y6H$+bq~wKpT4Bm*2Le8g|YYZcd5dd?F4e>f-b^%!P1E}@4w^A@V zRx^>`*aJUXB<5Y)!~NzB_0IR(+V=PCu{fUPRzG7+%IDn$hp#S3sDo~$HktZK0Xaf6 z0h!1_J(Ch*wuE0N^@^6cduN?p_U^F$JklW7Lg24g!87Ay%;mhfV;Fes1DooPF1M4u z09c?>YX8Km04;I+$1mrcb^gg*cmOreVzPRHc>G7%_Yb=p7rh^Pc4^;f^M!@~wk|)PN_JenAo|PD zHw=zQGxT1kfte#td)kw49dq8%i(h$RW9)S6Sz;w#E3hWvP4=CdldDp9?NmnG-rt#$ zU&Vd6%mK1W$>+)eut(U)FTVJ;tcpl-N$(;_+|F;%H*}E{MvGNSu$V-i!!Jc7fm%du zrdKP}-JPL<3g*GkOxHKf9odURucUa&i}Oh@I%lB<i1LVv0?C!-TDnt*No2Ow?w~)dQXOeOwr!!&%65ieQk8ZfsJ6DO;y?z) z-}Yc3h0^M+;ZPbE>NVgFY@W??a5x^-9OAfmTGMpgrw5RhcJ-HoH!lo0d_G}(opR^K zxdd@T*_=t(8prJH=U1&hz_M-^)(mlr{OU8`>lnB82kZ;J3h;N^BpdKbY<{S*iunWy zA7TV`6IlKPwp@QrpIWj5>Py?@`#QSoWT(bz=p3`gqSxD!iK8pb9*eXk)gbXJ>d~5jU4mW^5eqYOwCuX zq?R)_XB^Kzu&Il{NtzqYYFpJU$!TwH*?M`uRktpfjZQV)Iw?7JSa%sK-qA^P&T{AJ zw?=6vI>cj{!6uv1pW?T?JNAocH)T+Sv%F1OUmI6y7+tl5f2*0K{e}}*7eUD5LlHrY zK8S~D%iTTSNv~NUfWZX0Uyy6UQZD4m-)VAH-CbnJnbr>fM8(h|4#*;_d;)cs&>sF3 zA7{<|$UavqB`ta@F%*N*)n$K!;7}Q zlOelLjVUr|PNl-*v1i^~aV_5Z!C}WM zE}@9nTScth+OEwb0t_z_!hKgXN}c~1^(VVMR7!UkXP07j+pMxXr*35bz54GfxWf0- z*L{1lz4W+UNKuGRPt`SnPL_&b4S;eCYE4HZAhXqo3kkS97pNp$WRKTvuj|2OI_^#2 zZh*YW2!9gMQYu-Aj}ToiQ4zIOgbI%2_CKCrbwV7)o*&7>g*^m%cXvSfzX2mdJUVN( zC@m`r3|Mg)BDer?q_~eUyPsRnvAm(oc7rkGPs`@ym5mc)IXJ_eO7dFB(ro3AK${Ll z*{Wt9F|W7SJ^PzM1gkN0k(#GZk&sNgPT=*0AjJ)lnCxVe&4fJ$ed?k)c5H0!M1X0c z&}2zUs?XaIvtJoCX9Ex17tVB}^vlN>*5#AiB>tsJ7f~?3AVA-ubQO@X{5;VQNTUn5 zgv3TE&I(OVhDs=KG{~yrTU&kX7lcBUxSAeT-gF;l37h2TJ;G9y_IWgC%Iaz%vGjOZ zF)67l?Uf{Rm&_!}YOc>5)a%OycC7Z-*MD&4PU&cs{-smxGRxPqb}l}pJ%4v8%Tz3x zL7gzb=B~PIb!#_z<0p6`7v`<60EiX_JZAAxtf?9yY#wVi{jQQzOS!nkGCukcAm@@W z^h>$aku}XmOCK_vY z4h{!!5CvzaTDA-Zormd^hd0a*-Q{ku7x2jE{)9yCTn9ZDm(0_Imm81Km`#cs9-#vK zT(#8!Nq)dm%FTv8duj2Hp~Qt^Z=d#2*oj-dcr*dw_ZoG!7tOa|uGb87pL&ExGn;x^zw8{GfX!+}S|r^3{WmYi7cth5_o=Do7%%CcL!DQG#tO3|$lI_X(m zGzczRpT4t%bur7xtHUQ5V+IUM+{BH#^{B~W6`Q&a7oeSRyVoO|ji+)NAjy!-rDcy+ z?AvwvC<(x&ho|i_I7OVz&+VliQj`R9c-UWc-5KVLQZo@8;T4dw;q{7#^<}#YChN?% zJ55M_zBL*AsBLlg!|7hj(S&v@kI4%$Mir_{lkY~=!?_oDgHJy@a8a4 zkB$=lWMkDp4y^^FJP~Oacb&X%I&e-<1LpK_u+cN*y?dVX$>cNbr^KS(H_2VJqXj!o z+ppRK0b2T9eFkNVV_es@ImcUF6bU?Y8aqcOv1TP2QY}8qQTmdy@@O1+DHpf*yBKS) zu%P7hHqhwjQvIBW`ZHbC3re0>w+SmN0{?*X)) z{LRoAV?L|sT9a;clir22$TPs4_2NZ4zY=j%#xgvnec{xqiuqryR{?+bA~8GwA2!X? zE>z2mW?m9JYrCm294WLDpr=QGQTyJ>?SXajFKgynK*KxJg+p>(U$)1FNqK#8ZIRim z!n`t5JEwnS=znh#*B-~z^!Kd4J{0t={OrN#i;oVT$voUyvEJmF5qFrjVN%7Y9V{1_ zEHLOD`hR$AX~fP<`ux`3{Y^0WPJjiYCQNRy%J>0=bbxHpWv7b+Nq^ROU{Ut4PL_Dl zd;bjuL7P;YPhji+$IY60^cQ%@$bjjI8e+_1IttXJf{^q!xv!a!4&WvM2BAT#RYMqn>HU4=Ntl=_ zqEEuKRAk{)Q({fPNcyq+?~^Xv{owmLX<_#9iznJglp~b3USXmowx7#vXA=e1fRg@4 zMS2&KL4a zmE7g-(-!+NM5^81#yka@4h@@|eXv(70rqOwGlk>tX)nf?vOc=7f?ZjQ!wLReLNb>$ zres`>rxJ#B{CFrM3?)73yl*~76>M}8YNh~a61fSm8RNF`IM8sfEz~{+4ImH`WI^5J zA1YoW7aC03W@3H8!k+ChVEl3Cw8iyl7maV@XYB3pe+EBb2N~*?<8XS*Q5U7wcoJ?v zv0b?vFao%m;~Qu&(s85mdD6epT>Kjd9{~`yYOI)c;M4>;6mty+Vy8q#Ha5%?!_XGz z&t4t=V*mf%m;a|1$5b|$s=+U?t^ntPSWu08Hm*y}YAK9an9dc4Ey(1c4`>P25N&VLtv=ETL+ zRBzUGxc|YZ*)b3Uw++Q_e-hB9Ni+Xz`5>!_aBdZS=gG=^Fl}F!GBMa4`|;@@Oy8vsfG5 zd_HE_z6d7FPWr@aW{IERymVC>%Cp4`Q_-m{z^2tV)Sl{N0o{ZkrB>x>EMJLjy z*hDVYYy;r{5SqXwbxcprtwO zqKJY`r%XRT>VKsqxvONM15Bk28)GEE>{0N*oIkkM@F@m&mS~9*RGrB91O%U`g2TA@ zViDaa?xTZ}27|N{45kCm_!!?mZ2}QO;-a%B@9ghxI@Uw9|TCimW9p^>+l5> zDuvmn?8pK;lU3MkwBx~JqL9~V7}7gM#&nYb2?TT-27}O5gdnUS_^Uh{ZG0%}D8C;H z4=GxZ5tBP-v~GQKN$NGJPV-6*x%T>KKG5v5)d5q*rK$x$V-QBb($;Nj>mXdU00@(s z)-^DJRDnmKI`u#C=O&1DY-1+}uvW>y(G1z3rKJTE*RW-m_-T&H^Sle=zZT17{o2cu zGtC-8@*WLwzL`yc-iI-i0@3xX5ufg=(Dev5fhDSo4B9d|&V|>;D!1iinZ77#$)znx z=FKi&&$sRWKoignQjcIH$5NABw-(Yeh&ct^)GXsW9LK^r8pYF`yuWfayo&qNVeo&Bi?mi5j0*tWEmMHxczj zcUYqDy4n9N3k^To!PgLA>f%nhd?`;qw1Lst%o0c-@vv!E+>@(WteuEf?yzH;M87l6 z*n&r-fE&xjkWt_HY%Wk0#Ds@ui8(n_q{im=lfF3{T|8ZuZzxYL+wGE4-r^>T%+QT= za}I}6_Ki{p5|;2H+qa^><#(*b5TB33ASX#vW?X0uWo z$~HbIBL-*>swRVA#su~Wp&@M|07^zMi2(Deil)?Hlv@C)<;Br4zuZpVE!T*hUfAhM zuTD#s|u=O@38-TJTbT{RSXPy@?M z3r3Dw?ONtDfLS_cm%Hfjr4Jj1Y<*jA7u?eiZ{9Z@q-pf$rV>R!j3v$=z6bHhts=@S zW>4yz4XKt&f=GiCijZ4VE9ZX4;JS$8D*9GrUX+NrpdsBTA#8C2)c(vcq=As5stQ|a znSW1P3ea9VkWx2g3f`J%HrH4`t#SWmFZ7Ok$~c~sTN~?LyFNQEt*iFRftELe^i&ll zNJBcO((@S>xuK(VF0PgfCA@8J0x^>hmiPIYB4HsXd+)7(|K3&t8 zu37&)A|hwRv?$ScD3Wcxu>Wh`q*muJ!isW@4WuhB?Ka`n79wMMb}lmF>rt z@5is>k|s5I`(W+aH+p|wpfGY)CxY>SDU@zBzpfq1X8CpNcu2Z=3Mln~9&eb+--_cL_CxQBzJm{;!0UADROS)*3ATa< z&;{W3L#Kg50}HNC>J;#dM}x>#)`)=yH--?SHMlhfc%~k{z8asZ43@zgqOZrj&w-pK zbW}R1S{Y(Kl%FsbH+<|d$UK$)D0a&x)1^#n$?W&JMKEEY^NQpGZqlUqx8I2h9|EGH zUFG6pKImbJ4T`(5f!#WOB5Z~T3IZAHj9CURb^0nH&cW`D3~Dcz3dhk=sJWQOyurl* zaV#eac7hAV+#p0FUCEcdI(K-9AN#!7x$x>9&hAd~jvI4dC*FQlQc!BSta)z4A7^Y0 zotUK@eaO|H-MTci+cYZ^;&8Vkx<}@S?l%?O&g^}0;j7zvQL=-GSl_^ChQZMQzyl$p z3^6W{+AWz{5H>M0!c#kK7OHZ8)RXse*pMJAnpAUU$zX zm=1T7Gq|TLhl%N`L(ZBTkuP_pZM$$RV3&S&eu-YH)4d4sNX=B!?*E+;QjEea*+c}O zpn&uifOWuC`#oQOjW#p8_o3!Txh!^rRCFaF6h|I;w>vwPs1-|b)Z8pHoI`Zai}+10hto6bDXj|E75 zBI-^+yAa%waMP63^yYyG%h#Z13Mk>54#aaJ4nJJ`Kb-|^R&F;*pbVu9^U%DjX)4t( z2=&ApNp0x~$lQqAv%~2DUhliSE^p!OZ<=~z6MivRxx(*{DUzK15_Ci8t!!hl#+w2Y zkyL66GIMh+85+m5Oo0YU9F2?N`@a}5U-5*2<*0}g7my`NTquTV7Kfto;jNiFo{k2! zMKpyhNsDSC>@JyaHaW^WL##8)IMkl^NEUx{Zu)H0#o`Rp`T4&ij*T5XefZ8kUg>2~ zcOz~G-7~GQJ!P-Wy+a=BrxVx#H;7ICy9=W){%Fy%&hK>MZE~!=lzV=l-nA*Ps>Q-$ zqQKXK^x)`OnP1zI#exm3tAZ%r0t>tQ6_AyGsY*e7XvS?aBX2!-G+0YZ8{?gUz0Azn|s7VIEw=fJ8W zo3*vktsEPn8AJCfiOI6!l@aL>KNkQoiFZ5A7qpVZ2aNMocDPsUK#TH@>}M@nJz0n)#)xPjy&&6m5vr8 zW_oXNz;F4ZC3Bs1yCd9Y6hwG&Et_I{GQFeN=+w}|+s<`}C*G=w>PA}C+FV^@yCIKx z+kQ1mh|q3Sa3y=#2^ySL-Vf`3?d_ZgT>MklM8oxp^DQgWDjwJvCRSGLf0ktKjm(0H z+Vw$(X--P@s=6JEVBzOO3JF|X+O0LGOL&$@Y6+DItcdrZ2LXJcM;|}nhdS-_f148? z1Z@|t+qH;z>IYMHcd|E#>VA+LiF1rjjx2T*E1ZR<1W#6kchQ|Hs@K?=V8&v3&n=tJ zIS&_cdZ4pqL<_%^@?A;nH@9{lQa0#0Y}~+;0G2omiKYi}B?MR?iDygs%>JrXOzwyH z=o)*QL%TPp?@u_KayKQdrJ>VsLwZem(uUoCZAhN6R5fD0#cx)P(V`_lBzyN{vCzIE z0ajscVMK)|TZtvW{sm`l(w#(Vq0Yv);pLullP|{T6x4SIyX1}ni|nOUZ|-{hKVbIx zrH~)$(^vi+&4JRX-)>kutnVFIc&lYFY{5M*M;wFkgdayPS$!!oZ2tOw=hC1DRvD2d z>V!Aub6IS@y7hyrpS-VDgyd({wOsFX!&SUf^UVw(akpgw zpy1*$B&Y~3;)Ivmx9nuT)2=0m{k6xwPvBTmw8>>fS}!wuD`Amv!TKab(v3#0^pm-4 zWCAZ3#RNahzJ@K^pNnVcoe)lr(z0<23_ z=cAR*G{201${jJK+`%gP5KJiq{7E|%{*-o zjqmK2N#Kvds8X)C4`35)Ip8{XC6};Dg_~t4UC~=kybIQFumEw>^}e_6)GveQqK}GV ztBUm6UpCy%HjndV_u`E-QX5<&86B`$8KG`OejLG0$z^2^<0AXwC7z<=;Y2qvs2%T)#^&k;*UM466eU?jmKj`Uw;(l~#yDs{iRqBaD0de$lCO7T&8S`k z2elG9(vQ_~6M{BFAohOR-ZVRRL-IMo`ltC7>#2E<-d!LpetGHRt>jHtKJ^#7>h|;4 zt?W`QNJUGqq_W`QLlK;Y!v8N%Hw(*ctzeoKy8BxlioE$!W`#;({Do5<{=<#T0(i4# zrEM6U1P%Rgfg#~ODx9%vVj|koy;>9qRiq-I{vi7(p+q(< zAs4nTMztKc=U*DM;vsPI{U4%(Qi&!GTErw)`&~<+w@`iU~Z6}%i+|~LWaqqb25?Z@Pxq#zzs#2yCLjcN7`aYTAOH>l zv84dUKm#nAk7ik;*Ai5|+R=-<4J&H>O(txS8+>+Vj~9F+{(HeTxa~Vme_=zo>mMn#6yP&q zkRsr}(b@3R+ad!P(|fBVTu;+!(cT8~|GE55ac*hYwfx9C$mx6u&OAJP)25-WeGh2> z#;S)`M)wc_p*G)980sDr8f$F7?NvB#Sbu!gKCeV!RMwBKhzZDsZ6q@SJ@P?&! zckEhR*QHh&m_AWXdERAYTv#z;wB>2%%aeQJvlaiE5+-lXp9z}IM$zZnC#3XJn1!xA zjOTDVxuCW8UeNwSjVlU)c23*xQ$%w|B1WXYt1Bvn=IL&@fA84q<#0#6mTswSsaO+EA({wa7`{ zZVRV-5@#>HTfG6K#B)jCnj$O^Y={bPAp+xu^@*;rJ`?!)>R1JCQ*m+fW7lo&H-n!9 z82gsQdIUy}oi}*(bB#XQo|R*9s_cS(U@M9_ivU3-IDMWZ@m@!4&>rN5?!$mmHK~n7 zbO6X36~IPhdj*G-$01cHat72$QZLQ7m}WdgYwnVaifA8{a211Q!ou79hkT4kp78pE*H@;$T8Jgr!`3loA6Zc^&59^Emk8Mfk;sD6=BvK`M8ah5}WZba+AN{ zvFjdfF5slZmhW5S#KC%U85V7YqeP&HP-#K>Gik$s+_%M!FA+dUllcCvP74*9*j9mS>op#2%`h0YC(Ol_58*+_88e4`PU(T8! ztlGPet$2F4&NI(Zz0*?4_GZ@@xLGh^Z0a(SkFtQR@Cg*-mYo9wHK380ACL8rP3$kq z3t1|vjpPvGID~Kv>7apMiHL+KiLe5MIoMDbFNKJqN;^pIp}q2e-H4I2Dsn?(@SdCl z|I-Z{DRN)9szhTMIh#wuF%WFNkgPzK`yrKuquJW3LJ&rOTTGny1Bt{m-FBPLk)TW& z;-JD!9o24u(wB?8c>6=l^p1Q`&^{|@W1^#){YP<)M@Q|eBR|qBi!zpz5C0aDcZ9hi0&}3k9T0v zQyVhZDC6M)?sJ=)dn1X9<{xb^nx|XWkz(5UU}piWJ&0g;OE9(w#ts4r{}1t)1%jG_ z@8)5M08paWLgr{I)RmALLso(hZ17|y^b8_86=CLc@G@l!y^i>H5AT!W&}XPRJnziM zwxfE-4|OEdBNy!$_i2?L(3(sk=O{YXw0CgT9TlHir^^n9fEGuN>@;bY{cV=b{TDPr zunS>Q2p8PCpFmu%RS#1-735$c&QgP`5K|ONOv^e^RGR2R5x6{mjiC^W5>uNw6eU1f zq9mg1S1&*wik#S-kX)H2PD^eCFk|q_an;2vIz&2^^#QucAh7qIA<@eVau>Vp7DIw zcM27`duZQD+M+$f!I8d#I;O}rCD#}T)?Uj2^1VU$a-p3MGB82>@`7Zy$dm-*D%7Ub zD59SbB;d%~(f{8{%tjTKEXI%(m@wfAGKVBpVyMf#9toLanm+wIeZ%#K&O2T*>!Mx0 zbg7_sE>b?rjLL&2>Y2YZ^b1_dZV{=}NNjIR*aQ*QKb?`}2KZ_TFW# z`m*!q%idx&#q_za=y3BZbyE7_PzAAAj9V|%HQJ_iKuI{SB=?-h1uBR~m0C#<(au`C zMM#O^f=e3v*$_s5VsrFjY*-~}H$?K)$npSUs3tRS;<%^yB~tfK#=oj3M+;?rsf+%q zh5~gQgKxKorWlYhFo3qz09b)_pTJHW>|MPIqaYF2uAo#YJXWtVb@D^!iT2>@(!p;) z>DiuyZJ53J=|{x00WMy@ome$-udsqvxNUgLr>BPFPdA4gHM}!0=Cs3n-u1B*`UO$P zpA$O@6zjK&h|Xq&9Dr~~>&Cq_^})Z-4&Q54eyLud#8N7-zFgjH@;O{EOcC56A8aSo ziL|bXwK_$(rB*yAxNRG$NsZSN0u8C)b)A-9|8Bl4BNb10?EjZr5LYm_-S7aa z=enqP)vGui_m7}rkQqAoWc`}$QG;PgQNcxfyj8?U?L=qwJe+}ntGe`PCUK$CBTtQS z@H=g}(w?B~N!k!)8G|8;fx=uMA=WfZ94%-KNSCnP~&`ni*c{9;c&r{_V@eN z*$3(hj_p+WP+kiT4r;di<8j_EaN7~gq2?Fe_~qJ|JricvzEI~2!S3`mAOL*8V)v}p zgxT5$1)A2nn3b=_EW(16@fMWX>YxDf6{1(7-hnFZf_ec~!L^45~9YgG>#b=6ii1yeADaTc%97rGn)$O?$Gk%L`VqVm!G5Ey6+ zhYE(ad??I6nMboZ9ctQsB2vk~xNz1AW|Jvx9@<5UUg;1fZ-P`LBt3(*ot4>@9jS$- z01*K1?*WKnjKzsAH3)8u1@n<;ieN*iThn-7UddLsoW~o{mqnBJe(u;cN*dsmbB3#LBtOBjwR2 zz0Jk{jGnu^?dS4KUd|j%T9MZ3vxx&QFI@jJa6Vw)j;dyUvD3;oBR9hm=M|2XK5)0a zYvQ!&a{ZxRe1FQE<3*RJFsn(5r`)dAPE&k_8N*^H{Ge}9l;hGmMY7=KNJomeo_r@KMI-gurv_gz#gOR;q ze&4LBgzBMna(jw>pmA!*{&l;5hS=j25kanjkYMOVyo|!FmDn!=&Pd9*79H$e;UR4v zyLpq!2Fdi>`2J%GLAEaCR&aa4z-xa5ev#JPkO^;@&mI|cJ@X8 z`zb%+Dv%Iq(S}J?;l)h3_R?ul$-$L3_NQ9agjNPSY2nV^^R|o(op;yc9_`6eokq{O z!+v>Q^A@kUx5Mvfe=jTj+=J_^wOfbr`*=YQZWwOZI?x0Onvs>|%h73GkaKqnbLT>$Pgc#!taw(!cI=g+I$R^KGyI+!x_5uCVi(i%UwUPix$>fsmk%@ zLI^2}EOVpOc{9}$6@sn(%BrArtCxU|p`Bzx_p#|>r~XJQ6TE%Z9bdYxTWJ$BqJ?ph zDS%zCR0=E_aLkbCq`#X(5|3`1Doi^(KTf_cJaK=i<*8`qVyDl{*}L~{%uL5~RHWWr zHSzf;6P(sW-rMo%X@dK}qaVqs^V93U1=daEe=PEeZT)uP1CNj*r%?H=sxn@PB<-Of zy&sktnrz^w1TQwk>28Hec*3TLd~{4V*vJI?KVW?01h{lv<{yO>cV>9EbfWkl;^+si z>4mT*S=XK7$c_Z3Ru%rx!_pu~Y*ZnTkVDn7Ad)m_pgqV^WdRcGM2?PG3=B;P(Mc?DGjGzQF)Q)liPFAPAHI{O8Yp={J_B{Hq$ zRPh9Hshd&9j*;@3A4HZc&zHj)+@c`$MgprJS?clzA#ph1x!cP+o<9KyRm~gD_tkYo zzhPd83tZ6Cp}QEea?-o2#LTS)uWg-hj-{d)%eHQvLlXquzF}=L7a0)^HsVZFflhAg zmb=_sV#H`q3W<~q2kk4FKLf_(*S=x339dUld{iENf^Rvw|Hz`KbMBw5$G-Y9gC zvGzV#XL4^WwE#H^Me zQDdJ@X_tKmV=|D2TtIO1S*?V43H}RP7CR$J`eF3HSWxV6yopuFROji@19$$gAG< zeArk6is97T5F;g27o>TyRL1 zheHiDNp5WY!Tw^e*UFmH0ssWO5u9fg0$g6<6zOUaObTua6Ywa9dNa^euC`z3cWtji ztjX(%V(*s?v@PwurWW&_-S&M#-|jtWu6HKEX3ho1F6d;kW6qg7Cu%}(xog+0dlc-V z1CrKEWtsQhUGKKZr*ok|s_oCg+N#zY{dxZK$s47_xL_&PLe0f{w6D>f@Tt}%g{|bO zP;$e^pi-#MBlH0&QW$Uayp+!};vyY<6(RC+fJv&j#P0{pCnW=zRecCf@drdRj2H+4 zmY<$o^%#ua45T91W7+O?Bm-_)+ay4x@}vfk+%#EyG5GBf)|1k8He_2ux=3d7pMfl* z5aH&@Ri@w&e4 z+HxY^o8;56Z5pfEaU#Kb&IPonZ&rORdp#$Yy(H!a2w5z}%^5*R-m2Uimb<>W?)$yl zds;$s6UppqdyI>3ycoLB{#djTh~iZpOgG)rn|PgPb2R z0-d^KSd19-^TYVIG9y@-4O+2PjS@?MZ^4x8u;(iKIHfy05@IOV!z_&zDpkJl=cy}3lW_}`<)l$xLaAp27Fbs{D{0r72M7T`bm_Pjz=q@`mFiqPfDr?o z#=t(b5n+Ii)`6M*pfXi%+zuLh+t?f6lS=6f4b~sVp~Nsl0ixF`35M~XCutIu?59Cd zkY-=Rr0lX3!6Hbz0AVReNz?MBx3LDL@`%x50~PYX@`@gOMPD3XkSj6eTtQn@k2lTF z+k1sAlFfhFYa+wG*1F0alR5I7?+6!^JhU`bfq{1yd}>-iQ!daG^9_-{Ma@?o6`%zy z8SC{9JOS$4Eri;^FQuhHD2x%28AiJ^6xV9wUEZ&ApK1cNNRm~T(Y^rAI@w2}Lj4Q? zl}jr{DWd31V�%qf)|OGAKGwDAj21f+RN+<0>7+Ls9FGw7|6@x8JB)AC4`9M{2uxHhR0 z{iN2ax(>qP2DzSKkKP0SBVFu2uGYN%qI5v2MKrGxBELgz#Z7}+~BdGZ*G>sgZ0cte>nG`}R1!x-NaDt>c23*{{KtGzV zuLOKxK+jIX6iIex z_wKT~HEDIrlzab2RG8V2ggpv+iDfm-#Up18?x?A5aXJ;7(3`xTo(9m%B_E4W#I-pz zc&HnGgoRW|AO(1IMq;NpKxRm^H4D)h8bdt`GA0%>R5BF9I!A?#tR?Wpn9D4`0E428 z{vu;Y=1-EJxisqN@FGjCA0%@@}idgsK&OOqS&f|Bh1;w=XFpThc(Z>NH~5Cb25Qa{jl3t5z9F0w3&X_wKx!H13#{o27r zx?$g)!VHjrCGVnqqQZX2!KsA%rnSY6`*;31Cm&%LK38=Sj$A$DBq51F#zQ53B|y9Q zIA$53O}Tw2TVg(q@t!q8pTXpO*gt;0Jb-(WpDa~AlT|+~Y6zFQBJyyBob{^MC^>vl z*%R3`;?iG~Xo&(_weZIPBXyAz9v48`E%i1||D>CB#u3W_mc;`%yI0^7WLzgxjsk1vkiztogMwJeuKGcR$(82K zy{5UnOFgd~eI;QD!XCYl@RPss8KXsOM~gOS2%AYn^zUW0RssW@tsTZzAP-RC14`xmcEr^V zw?M%X0FwAYNkD&51pFXB4~@Vh3<$>;;9kv~9UmoK&NR7Hxk5L){Gl@pCr*D$$i^Fx+$IqNo)-8HX;>Ah;EaWWCK7uv&tB? z^r&R$8S)Ue2R8(OnwQh)n-N6|r);=7z2Ckch#ADsQz1IFQD0*uOo2pK5ryuQJtp&) zR`edJ2xzNbFmis)yn!h8w%Q0W)!``a~3dwBMEV8nQ+;MLix$E(o?mDwiY&1U# zlH%58g5&5DsQ{OuDZcaC98552+PZrR_Fp0k@Tx|5!|Ynqrtiv#ImA}EwsaT zJ}d*oh!G-B7kRMv6Hp|g9my7sQR1%?xI09|!lcUk6zWoRZZ_Ig>YZd_+P`tB;R z(a3l(mfy7+FuEib(TkyWA!T}D3=5~Pjb3YUJPwY_ zl|T9zsa=ZU0&@4!Azu4P@U#TiPwLs97;YR)9jm_6w}L*~&NPgt-t|3k8_qo|syT@tWUue*KRq5L**~U+Vql z%%a&3A1woMFAn{<&qTkLuB-9GqO4&y-DV#sSbZK3>R8KJIVD-6iTdx0H^3z^Dyf|Y zYeWJi{Q&jn=%k|-wH=(NiknwM!ti07#Z&KOuo#l~LCIVLEWPyF+6(2!mE|cj<*ain zbfaFajzos0)}@%Bkd$-?zDAgaYMw}nzhF#NK-NJD*b2=2216!fZD}CXDlss?^mnV~ z*%iSe(s?mAM9t`AuBDb@sX&?D9t6h$yf_rBRdY})un@t;mg4>m0l!l3FS7|-?iJ?U zyndAzh#mOK8jbqtaYbw7-5-xM_mw)Z_^JW$q8X=-z9PD?T#+(9`}ERQUe<+Wh62p| z8~8Ot#aVX5Ez_UrA11O!@c}4w*#P&s2VeNW`88MBq0~Ka8`;~QY40ub z7>#BKKE~Xp^mNa8T)pC^Q|iqhZp9h9Y&aLDc;rH-ZBpBA zttS1_fT}cdE;#qbi@FP|*G?3zP9Dy%A!OKYSv zl`>k9d61k!THI19)hm?;LC7(kn`?&4^(4}WX-P^hQJhQMAdQDcF-r) z`VNNIAG$cO;C_)n*{k36zkf}`D(gS#L(k_%1>Q`8kVuRU=) zU)*p0ahg>6_?WcplA(U&HAjrW9-=}5Z6;=D@ZlQ#T7y3gC7aiFy~u=#F(jGs07&ct zzKhbO%1q-qJBY`|w>%&dE+ZA|OVJa||50@3aWU=xAHdJ-`_eS2rp0MrB%@7HoN3=M zrAWfGsR$z>NzP1rTBa10XxbO)hO~)0EvSSd6k$pc!j!mOe7nE%`?JSm=JA;GnC7#- z-_O@F-+lX9e!88JYGBWrnE4RK&+^v4SFK4rW$?n4uBPo}jNO1#xJ82IN=XymVr*pN zilPtKL(puQDvBTJ#zS#tU?`bzcW@Arq*0v272{I@yymrUe8V-=6}*mBZO z7Bi0dZZLNT=&s6~mEZlu>_ z{4H|Vx%%SQr|l$yygbFdyk+R2VKcD8N)lG{nT{vo$#kMw9V8VeQRyJ)&evH^N*PaIZ1ZH^Di5M+@T`Wlw%4-jT;1f1>@$)C=2~MW|s&< zU{yGs*u(3il2^2DP*cc65yM_IbF;{e6^=MBhCQ?ifRa(mBb9sOzJ*|EOg*!(B~1Mz zW#74$`zs%LW1!6>XCdUn)E|auc|;ZgJTiS`(&+gF-w%z36cFI`8p|Uw5V=L)-+<|6 z;sEK8wm*X`L#?d0^c3m_(?;-0q1h^!z%`>YE2talq!;#_$b0r;?->t^BDaJkBtSj$ z2nx&0b$KtQnl&p#`--CsQCR8erSV854*_TYozjKC15MX{zBj2U>e;N@F&C4rW|KbO zZ!DP>@$f0z`H}eKJrW+S*Wi~6__qh)GRAC3h(f7M6C82aAwC9=u^I-7%d*$K#761L zWDPTRi*P)F*~iAvM?b*;3Co1*FvH7IH-jeyz8)q8&C}i{PXpc!kz5>*yNMYCSPVV^ z56G>?w>YuWhRuI8ZZiOflfXx|7!SSbI^e$l`}WUYv*w?~z0=KHT${#pTAU5dwU^8s zX1IK13ipJme7tZDrtR|xPSD(5%?#MM9V6Kt_}-w1C0v2RhL@OfO30=BUUsMKWM*_t z-%rlqD|!9{t28;!MY|e6XMmp6T2y%k{Y2zr^&!yRWVNjTg}U@FkYObmlc7hd#*mcH zr$h@3#^Q9`{wq`(*jlhk~|!xWqUaF=8Af}K7d&p z$-`2cCD*(6Z)cvAF{&mB+6p=S3DgbjhcY(=0ce#vXFWg;Hd{*#iL-&uYWg<9V^@}U zg@oo#KgYD@XbI9ud>(^8jPaeG(yaZhtG4w;&dIDMt;W#ZpYxCCok|HmekeASA*|h? zy$BQX_+#f-ydsAHG2@K%iE4gr*ZXG>K7St3r>fK+Cw~Yn-#LgB-6R=jO>M4I^nvI3 zSY$#W`rA7F`jkxDEkP&3!XCCaZzC@|m|e|v(Z}aCMkVNy@n$&PDL6ffipF4(f+fB@ z%?%RZx&06=-KI{%xtzThgwX44lu4izMI}OLsWdBiW|EX^>|*j&0jQe7(U_>mkz-JX znr%AagNsvPJI)I0O*(4tFjh`K z#_S2X?%dz+%&>gAD=eZa<91Y&2RiHkkNspFd11APjP(C2S;$eQRXV3@#_pQy2YVXw zud^j;&i~sJRqK#2RhZY`Yf^)HWZ!H|XE)RF*tKMpNF|K1Kx}U*#JD48v7MX>Y-Y&j z==VI19gH;ygvFZP?-)cbuzuxrc+lnZPAK7lJ0FJGr6AoN0ZeV#8rPT+5dPFz7f2geIzw0r^;X7K!_lFj;b|^KD7b}qXYxk zb#bOD_+av$Suv+l=4ukNg0MhCEB*ZKtV5nBokOyOYWjPv9{oUK`a+m!;Ukr|a%$@e zH2Fvzdc=>5oC%dX(ZrAv=HgADPsHiokHxc?xC06(g=F^%J*a-Akq6+_x&VeRUzCIE zW|%}iqFf%l-KAgU$51g4~|CjDBXE<}N#ngFg{A&2zA0#-kk%|7DBPD-)de=sOsyWuSsvaMOfJnC{5kxgcN%p|tf{X~~pQC(AwiT$Q` zjj=w&u(ty85rH4Af{9}bqy%50BPDu4I+!wB@?wq`>NS#%cp<&%r_37LC%z0k|Bcpf zg6R8YGHj4ssjwXe8`sieo57M*1t)I19c3Oiirv00>fp)k#vR6qpMgC#gE-x_@6Rm$ zlrPk!IY58CKb3W7k-GQB;nzbx>Ajz))IL~>A_Jd7URftoR@mL!-mE_44&yEbht=Dk zcgTJQG8#(J_)=xZ`9yf#cC|i}*b$0{B7~DF7vr2^l#;*=qpsLk&XuCrTu+ROA`!pr zTTC!1&}E3zli8pY?dBszg2lJ`>~l1YrNbpJ!gyy#c_x2kv;YjbNDJV~u*UGjOhC@f z`<_cYJmhFY+~eaevgRjQNlPrd0BrlN!p+4m%r5h+oT2RVo0R-!y3gMmdrtC``AeNX z{JVWt^u}RAPMb=ie+3zz=^`8qJQe_FirxzK zS60jhzw7EYnq%nyL5x;RSID8l6U2j zH=fJWvqGoS#O$#^oD`}YQ>jD(D1)HED9H<}U6el%n<;tTjnW(yJwGkle6pL+MR1J0 zc+UD_)-x26$BP5XlH&2PV87ws)-mK})%R1(Ak0*pCuPdgp;PdGqZy7yBARhW?-S7h$qJp%u) zTU-yXbQY1LYr-mI1Y5e^{(BfnrggR_Mh59kqiyT=nYbt2oEBR}KCl`?)dP@Yf!n!f z%KBIy-e0C+<;MQwtl7nrJ9MYj@uyJ*TRR8e6}lB#?{PfsHIv|V-|vI(MzLi8=7YgI z3v{{VS8#iv!KT^XxSS9E%li#B+UhZ%JL$$5fQxy(fAWr38+?Z=sr?W|Gf?*kw6EWQ z5Q~p14INCmA8soGM+ZPZ5k49^5`7;(z2iWUx#bp=XwRzwVgpcUjZ)04s>+3C%+ z^fxkPI6$utr9342Wv~}F0ci`JL85pigh~%{##+Ja^Ib6tK;OAXwTSbVOSgtIPs@s@ zJj%PtM*o!FpEALst;Cixh&sX34#1dM@P5w+YL+@_4m)3AVqd?miMj6SJYA9Ac)yKV zaMV>PBj5wc*1a>JcSAqs!-(APy_6mjesTd*_Ot$Eh(Y{Rl77pF9f7I~v6YO`4}q;R z%It@@yrAU@7x;{MiErsdL;|1;y)}}eIgwAqhae@hZe%RDq`1I}710%_e==nDhxS&e zRS<1?I?hO;Y2j5HF}n`N56SE^A!3*e*8(f6iS-pK1`v8>`e5Ck%L%p+@^7g}6yyA4 zn>@pCV}NQ{BpyH+5PNX;5Ze5W6Vl4NB(j?5*7HuXs&b-XWw<_`p1t^*RS!jTgeN06 z$_;r|eM;BJPIId^`|Z3{{4us?#W6>i)CKw#oVAo zr1tz9hc->_xtQ?MWEN&k11pj5YX22z!BxARrLT$B)kp6w79$;AyL?um!eoRn;0$6J zj{;-&016eZ(I1_vujbglB6Z~-B{ew(K~LyR)UG!Xl{Tij22ThRAHpVT=YJOH`ls%L zp_&r3X@b~TD*m3?Q*#+@Dsx?&??KE_y8Q9ajJ>IbW%%Wyy?A5PUON$M7XFpW(*eaH znW&ZNBKmaN;aLJP847O!miNdI4Uhlabz61yZm8;2b!NuR zo@>ts(akX1!bNb`*+;NVNht>fen7=uoW@r>{Rx9IHs30||4x0z*IUWS{{yZH7_sfO zo8WFJ?VUYed?Ik}ZiD$}{QSKIlevwg;Ne3nA*#j_JoqMpn*Q!v>)bpipJJa%m&;T> z%g(n0s%~2;#@%X-xj(L_e!t^!pmg43smBTutF;GdWqs2pIcjHhQ>FL7ITXkin^-A~ zMQG%Ar;`PPUA>#SoNOU6Js|_v3@uZF;HpH3k^s?Wd6xyf>*_Ez$pEnE29Bbv1xX+i z!Z0DayW_gX-c55nkl#aq)1jpJ@f+j*L=*8(-qaqXq2ixiup&zu*K`l$k?U1OWtdsa}A5nGi+vi)VK&zEE+C- zx$182ybAis0L!K;$>5Ze8`wir@YdD!x_b2x5bypxtmg(}glh@O-Ok_8bv{Y-EOPqE zZ6<1tcqRnMmtmb~jCGik&Js_*1#(X3V)_6}@($`hR z4MLftPj|(~8_}Dctl8MnEK5Xav`w;ex7;xMeAC27>;IaW-!Pk+Z`yo!+RJw|@DuCX ziVe&hSN`eN91}5{dAX%`ch=pdEEee8m4({ssOleAOGc@|JheEUn*MR*PnJH<;q0eJ ziQ>VrjK$^e{@JY8d-|8Izc6w>py%Ce>wIT+oMgEx(V3G zef?a@C^csxbOzkl6P4SxIev(mD^ zIg97a?c457CS=QE1>m8=0m-ijr)%IiKpMTDL zuir(9GjHFrdsB~YfcW;r1I@X9dR7W`PA0&7+i)h7F7OE1V1r!Ts%6cjo)xLt{tQL1 zUjz%D=5c@?BWdowSEOIe9wSY$$*kk~H@WyaKxGMK+a0PYt6XNQ>9)TxR)|>XyhpH1 zdlt#L6d@m4FSBKROcpjlwlWs8-W|QN6a@le?PS>vJkMZwbD4qF@o~{gHacWR%=i=Q z49FVQWZ0#H$8QZfO1Ux17ET~sx1a_Fc{1n=}ivTuY_#v&XZu^dpAMBDm(_NDw| ziP60#>s?#d!?*TX@x`3U^xH*8lm4^bIq_kqH(vDxFc@N+ktWib74>#~IN#RUYn!Fr zw?6$GN@YU!a5JjuLwu7P@`SMRG@xuNV$U_44ZI5SMPzr>DN>I%v4`!9`eiDjWr;v5 z9^Mw&a|6r-SkLvou3ZQg=+;|85vTxx4G>~s*W*g@!!o2|ExJqiGowA*KW5#dlPA~UsHg{{bpmL{ifh{+J_u4uf6{$uq zT_K2dVv~3DuyvTG-p83Zb$#AUgqmrO1 z?A)MP#JVW}8qceL`|IGjFQ3<3FxgznjpruIo_8Q}Q{l|3NO-f=>Ee$OGUc!?o5&>V z0H=z|-iop$s$-)=Ru|*!hd@gE*KbbN?OST@C*&lj1hjP1nP+yE;w0`m^q_Xt1eCV> zx^oz@_D!Q7Z6BoDH$724Nn9R+=m|V+ZZ|{tEW?slHpaWJGIjIpN@Z`9GT-Gts~!;B zzs!eHa{%Qr$T0#|gV1HCyWQLf5L+nd=J5zh8v~k|y7yJrp6aw^0>P=5%;TvdoiRC< ztWHC7nd-jNNdTyh1-P&XATgL+K69H0jVDBN>M{g|;-Za*O>JfJ4-bn7nc68keWyhP zs2u^QFt~Ug%M5Ul!*W)?=VMEEfT5Ue-t%t;eV~4e-c#Eq6Y$yiZ=4U-R+ha~eMtORL~N-INoULp0h_CrZKNArnIY zK-6BW22-wI=;)haRFRxtZ&;k3nORqEtN*Enk+;CCFK4Ekn%0&+8SblPHIv0GQ`8)@8p9ujfs%$!L~Q|H&tD{B&$Uw;1UG*+Wg-A&CW`kI~EfoMYi zaj?frH-XthS<{1Dgl_`Y_DROJYc`~!B;G1dTQ>e9 zLEBLaocubl+{hL*wQ9_XtP#`9BN!F|`V1=hW6j0bHa!LN)4LTQSrOP!i%}4|C(NGZW(xgPvgdbs(b76owfb#(~cGcUFWC0 znmuZ&u6wT1I8cqeeBV!6e+l@->8#L)|ZLQka_ZQ42<>hLJgDi+>OI@ zF!s1W>w12gZZ>uFO+3nN|Ke2k zZE2-Bs~n>=43VGwPGT2vQ*4p3$}b&*wBZvB|3sfsmG3-S#J^)qO(XfpHtRtW5yKh4 zA`$Yusud_7R}h~lNmkY51HL<@Kyf3lgc?k8O6M^|3Jlo-%5a|(k;Fx*_%GYieEURr z-7$51B0tkj3L@b?d|(+5nbnM8kJtpmC|7nttv!M*@nz^BTc-vqbCe;=!;Zo2Q0CJi zV_4N1wsk(auTo>tK4jhVCp3c!J*_!@+TY6ts!u2NJpLzTtx$PdwAyCw>`{eS>%DNp z#8r>bBO!V=&7xJ?qpT`5zNBcJTAQg924IaP^a@iL?OZOaTGuXmgwz{d4U^?rec6dK zm5LWo5zHsbeR}8-F+Lga*Gz{oj(ka__DxWANk+hPVKf^?ujq$y#!T^wEC9QnCR^E)CLV8LwkSg<=4P{huvm@2A__$RPLrk{XX{c$wOQ@^}$qOGrVl@GEm%K zWs-f-@#ihuUpDO%W2xsZX$A#-458{uO~Nmp-L`s@uib{D`r!u$9f~a4I@H>MiW14$ zW5y#K5cEeHW3;b(fd2pQTMFkJqP^cyR6&{J`ixB+wiLCJU##mQz;Ns<2r7}l4(+wH z9mhgBHgV1-MPQ0sq!aTPc$cZ!+{3!->_7xyj~s(NK*0?c0t|1Zz6w^#yQ06=OsS(T zMg>3_W0-Ny`dw2~s!6@u()o=qD$XC5oYz{Eq$w*kxEyQ?rt{7x zMX&bR~9;?eJKN{qJL}|lTJ`!>IYTG3D_>IEX(fjl+emPjSwnc=kiu!(5d(nM1 z_vUGX^$CU(&NX#WgS+i*ZT{8xl={!4(%!geAOyQ&DF(f+ZU!&6!U$5o{@r%La&SlD zDV1i3PE~T)1Wk1*olx3l0HDpM)m3j*_Nm&w-RrTu)Xm&1uGYg<_KAHA?-DFd>Q<}D8@ZW|IQV5(p(Xh6y^X_WZr|tJ1{WY1z23($}YLMGQ zeDjZ-cEE8`%?=zjw95EHf_dfJTwo8`d{PA5Zg5wq^j13CS7^867R?2kyfQU5y=86d$$=MjKaS zztwJ^01v^WKT>=@1+8m7HL8bAfLNWujC_#LE~V^8#)Gd&O%1sdqISagAQ`<2L>Dxp zOCYAJe261Q2g;b*I@sF52J&*ZdKH90q(tT<$pQj20e}PUVgZ0QkFy}9XZF$+0OO~e z)~Ux3KH}_`o3P=FOgDXPLNhdc16!$1=G#{zO#v{&1Q7 zl^Za!sqfJwD>y@w$6AQ3?DB7azP1KBS$n3&oi}@Zc}Vf*s^%S(>f3D0x@JvV;cL1G zZw%n=wmxXd!fKhQcbg5xyZZP6wYpKvxQwdAzi^#U9eoO`4|kbL!5E0_1UHbyXmcs) zt1Q1linbtOxxI8gj7RGbAb`~cJD5vRK@by-!un>Q@gNqVNJe))nI?isECu1<39eB*F1UK}BPPR1IlQt`sH348`#rVj+*1}zjC zeIm^}BGYzoMF-(sqcaS z-+6>m?AbLNeU99NCd%!LfIV^U4105IzYlW`-lmIUKoH}Iyq*K&x`bFOJrdfO;aSdZ z(V^qJ)y-Gaw;T*=Gys-CR-lxA3nUjwb^N03u{w;UXuy5y4;@@a-KjQ??6-NrxV}dp z)04x{)=}-zv4^T{--={!bPRX0l>hzsB_}<7IK=4Kw9)DO(_WEgf3WFOWz{uX2*!|^2iGin?OXTeg)Lkk?eJ5VF}MmT(mx<^mXUyR zx>Uvp6l^Dlb@ksvk%^c<02=_~SNc&sMW9=-_1`kQLay}HluSl3EI@>qBt**U1LX`V zim}+cO#w1WdCZOFj+b;00h8pYOFwcI#r;mG3Fd(1r2R5vVnMV@TLO<-qeCWwsJek& zg9b_|kAkEXrFWXJ&q=PP(seN?&7OC*xyw&&dy{O#^Z$EEFH1}Yj_GfH*lje1*@t%&1(N(SsM{N z9*m5~U<@CRmBi-R=5BTLBpQoMe^MqGta%4{nSk>N+D`jm z=s3;nHc0iI{@m#3sw>^^j?&8Cru7D;*~Ru7y=m~j43?deR>i^9vCiwqTR2U;y;nq* zQ=_dxKG~hy21i$KRnApi{?E^0MA_ll4b8d*jXDvv;S+Xd+r}fNW_hdLnyztW-;5~n z!6A(U%yRR0K>Q!s5C;YgdiL8oHTqHWJ>O>i!FCUBIG^Gf6p>8LU~9eNQgFI zn8Up9AnQ-xcK2@5oJ<{G?ozaht_Py7yUel#ZXV@qEDERT3@iZ5SP>^vPjwEADY0^S zvel%99VGm6t+`@HPZOP7&h81;y0dtQ6m9S#*ly<8e~TuuPsHp~PuTw)+k0hFc%?jK z|L1Q7`Pp~AZ>hY#_1RI=yTO`j$2ISDYprNi)bXgS3jP|Kw>j53dYT?|&gq|Yt+oKd z{2}tjV1^a6OwM1!$iIS)MgUtmL?YsR!~b@KumvE)J(%r@(kR$URtVTNB6f)!oiAhl z5nzQf=AOqW<5gsL9cE<4`8lcUA3;(eHpS7>Nex7Of>{k$n#@!>0zuLtm{b6gOS>6w zhV?Sgms-9uZ#8{x630kBQ_N|*Z;u|DZ?!E>KYYOKYh29ce{XW6JPuuz1BGtsxDl5) zxk2<<@W4{jra5Wksn!#bCKMMu5Uly=oTlbajplA9)e?Q(DE3D5#s@64&~$Fp;+RS9 zYNcQ%HkeIKA;;U&7guBUL0c~af(PL8wdJ`^`;= zhh~+ds$|;OlX}m3m3;xbeNl_3<~MgXTh2R)=|{6n00Nc0k)?+`KgvRtBe`w%)IXwTkV7-+#xw)C9)A+ z!~*0NGBA9IY*@c<(Pv*fNQeXoeIV;PN>dj_-pFGf0_g>9ZpTmz&Q~5!rsIgL+F^d8 zY=jRmUAwom586zmqV!>uAY&kl_Jr9!`KN?$1|8b2z zqapvi1z2SckmHv$U;eQDvLgRStKB-E3|LMSqU8$2@ z>>!&N#Vi6@oJs3bm=9AUXh?uH-Li>UPJ%P-s-&wtC+PtIW+S9^6pZqnp)BT+_DL5y zFC5T>w)el^bdQuGMWgiPK!Bq`1;~6Oy%c4DV}bZ70HaCdb-6Gz&?vO51h}Oi@bDlK z71vO)!+8CBcO4DQos~w#vgabZbxLJ;$ufKQ#d90%#QyQECkd)!kw|Anw$1+6G4m^q zt_@b*9vr;0HoT+Iqdj@#=xbqc71`Zv&9RMTr$Sf#6>GOOLTHFeI2!7I;uyn(fuG!@ zBVunvDE?|T8MR;Xi9kh;a(Rq=z&22d4wNJID}752E)2x`;uujfd=-I7*2&`n>-1x~ zTKQ<_dQ_`YN@+MsTYz?++Nt+N$$gS^S?=-{QO4bIQ;KJ5>u>PFE)KROBsHOQjrMPq znm+$X^*~kro@x0VZZNP|i5tG@HBz;e!-+{~_3u5A={ER$v+A*HrdRX(b6?f^t2>;I zFAN{o+FoqB%Y|U`JY=nZYT5FJ2d0Ow^WlZo(unulFoFMU-8Xvdf69K!ACc7#p`iH+ zyN$GRQt2zF!%_D5=N0ru5DQ63umI~Sqfg231yWMXWAvUwU432!p0Mejii3l==J0cX z*;!J{PkQ12qmhR*)ghy$y?GUXHQ<7PTc(+G6txS_Ie2N~Ez?A}=LRqTx5>?Ix61n2 zRppkOzpETMme6`^cXeg3dR%vFhEW{c)uzNq8F?D)V(XAMQG23fV4%CbQ1kq;BdP`R z^H(nnH3X$b6_VrDJ!TOtL5&KE(#*K2<%l}0y&lwPjT*N2mA{-<;uHFg<9 zb$KPOy`cn1oyiZHJAeFP=kLc&(&ORCXaCz|gkCjPjgFD(7_8L_*?n{J5O8>+>QXu9 zyiWDD_!p;MPjKTZz181HUXOWL`FF!yd)jU-q_5wk* zhR0Ie-@XK5{QOvb0EVi=Vs>xh%N85OXX<;Ju@^^;9$&zUSkVyhN6OwJe^cGf&?TVU zuP+BU^zAAg-Kl$@!I@LC5L{XN9nBJ4IwgVW}))q~!iiocm$)bbSHfTN!l* z(B}d-48$ZnlWQvhOvUviR*P`E=@y)d+9z)(pPTy0S95hYJEhqie0el_d^ylE;?c{S z9nTuL!(CL2u#MnZ37Y=SaL{)d+ z>pm;v;V2_o&h2t2&3u1VpRn98gG~Zcsd*%wRF1h5NS!RXmvT4bv#FkQq%_|k>3^#a z(1{X&aRe)K)>f$kiNFzP>ORTs;UXJN00jUB0HyFYqJDEQ24!#YkcKA0-Bx1hiU}OU z3hvRXE-j?(I&d@mk=>C2{~bAxpc_LL_RF(7tR6y7z4IQjmyZY7y^`FR4w<$cFN&D* zv#AdI8riz@eBl1`0TEjhZ*11BQ4$6g8J9mh%O$%)+F=V827L5YZIaDLHo8>;L+VCifYHRShxyRG}X{ws-yR4O+BisFP zO44xvpUn^1yld?}_e$%S-RDAg56_KV$bFA@4eNATQ9NyPcJ=n0P@hBl#zQX!x9$wz zd3?oTyTy$i+4?$s2I0yr;;f=iI2P4Y9#lk__z6-o3A`2vw-E=PjvXD2HtE0t`K7(vx` zD2W0#k^u~>c@f6`R`=iLzh{Sw{}|gd5*o)F^UIs5e*KdgdG0m8S@HRlD zy(qiwbOdE-*ZX1r7danFUWHC&{B@uzFV&}? zOr6#xk*w5oGxR|rcQAg|rNIrg%(8`&w4Z78+$XN+`qY5le!l?%uIH?t^jo;UDR`U8 z_VnPCNX7#QW7aneXbT|R8D=sV0*C^O_5H=dRO{<&RS$azg~?ATd+D|9ck8blaS2Hd zzBAu6aJuH`JH->*MQiobA0KFyT^zdor`pcoix|&N&uQ?xYlY@|r!h~BmG~)=&-zS~ zpei1I{qYTNrO_ukR~1VWcWAN&FpltBrX6$b>~8l@O2HCjsl(KENER^i+_ChSR*J|+ zuA6HNsU9E6b|8W$r^{XS_4BNd_W>AT;YtH3j8N#O$!fCkxpg+qTZ|LKonCBgG_ zkBwSi+*CD*9?gw5`;k$5f5BWhe;)SgSXN-2Li z=L~JeU(g?&xDBMzw2BOEnHqUfPg*q6KW@et^HJsb=Sub7d)UQ?=pNo8D}4>tZar<9 zeRLx_Ul@p{Y8nHotZsriU%;3h?Itcsh)%;&AOng(6-X^1Knko^OVXD)wYbOT<~Et4 zD7w;{q&dRDG(SN9+q%YjwFAJ}u9f=X)pu^{KnJWyvQ!oyUEo)Z3Y~Y-FV)GuRmToG zevN`x+TrQ9BE#Kvf4HTZ=qW?Y-}7(9&>i~=>e8Pq3odnkSF4C-9w5gWa6O{pZlF+p z%K}Vw#&-W>q5k$?_Fa*A6|?g6>Y9J$TvP>6#AZter_pl>OcD>}E=%Li!suXT63cZM zm0ns4ywO479Gg?j)k1P~iB7t%sIC75^qgwSgsEZha7cKI$@inizS=umuZ7noZreG% zO&bP=>1Pd?TmYNT5@Gkq7=?oEp2uP@gxmHORI|PF{5m{$bmr5uq`kotKeB~_f!d2t zXWmEK`8|j-x0E_HXv2D?Sm$G%PgNl4^W>rELqF`M@rdjv>J)HB!K+|*3!~NFF=cc`GLz!Qqqb}O+E43(W-EDCA za3gVjKdS!qnCm+6s)@-OVZYH({++LdYgXxuTS`~y9CdOMJ>eh2_`QJ%jX^VYp2v!4 z?XrOQ$dW@gA5IL-KipBaa}O27Vm|z@N9#r4R*ybQrF;Na%FxMh8tS+lA-$mwpwgY- z5OzE>sr+AH>smm}SZ@qq=Dv&7kdyA!<&D?$J#}c7qluhL!zl*q%Lzhb)9$>zUdbxT z`FrGyYGJCy9%`t1ZT>xR{k81fA>tDV+5^nAh8N@mtI&jg% z^jvo=5ygkm&tJ=wp1_&jk!Y36?if~67fSo3+xjO1PXjtX@1B{XH{SLd{{|^A_EtKC zgju|GUH93ujVEr$yg8vuod5Qt;YUO&uw=aGeP?5?uJ`WYZUPG*tS$rb1cT>)Y8=Pi z_NC{gS4rH~S(P`0v39gJs9Y8JqAxTWqfbhctqORg@J8Ev4h=sV!-Ed&y5ZY)>+8Xs zQx0i1fCiF0ZF9}8*6W?&hL3&6VzzEDDz2OgdC67)tRXOSbF^5wZupm>RbGa-rf6`- z>VLL*S4v0n6IF^p9F?ks@s;#Y1Nor+G>o@hZg#3NXVss2=g7jmU8h6dqiy{M z+4J1RtvN2rHuJ@3O!D1q7a>eq5BJ@)8qz#q{ z02qk}+M-#c%MT`Pn?0>$g*2xULBIg^@YDn~8vBCYTimy^++KRr-}6jimh4CN30kZT zGnnuXzSUZ+m2-%n(|^gUHSh7;#eVlx%`%Mgn)Q-XmT_X`pbyT5Es6hm$Lh}cD}Z7J zJG?p%U7dIQlcBNiFtKd-xL83#1W<7^A)8OIX5tW-ay@{yLJoBWgflWign;zSiFQj6 zkt~aYnGws+<5I6tUNuuXnOm1fl}UMgqMj53$oAG!EL08RB|~?7StZ}=VSd`xc}WZ+&{IsON8bq3{{m0Vm`TA0JSO+yqUxl1x`

R_t00o2l%>kj`T3OHaG&4grsFlsufOjGKqyZMKAex+(T?>Oq%f!QSyk#O9c2!#=r^?ZmV7QhP_rzpyA71dy-Es z8z!bhNU9n^BGZETJ3{z75`nF;`-nj@ln3mxvyW0Kqqv8t-ImdA!?c%jai|ny#YK-A z@3=LFJBGurmtbvSLZX}i3kZcE?Ko&(Ql)cwpAF}EylQ@M#CN`zC|-A&iTf6S+r`Iq zt;2Zdn~QMMdshT&NsA1rX2LJk`6`)5V;dh0}O567YN29Ip$Gawzw zoR`XuT^uG|0?FMnTE7YXjhsHCpbQls3|V)OCO}u%dV}Xsh@+b{OiYqz(upTd0;Jax zS|?1Y1X%Gklvh&iWVf>&LSmXwScc>sf%S!%_2=V>r?_aOvNS_NDO9Dqr_i1X>oTO2 z*9!V;0r5r=Hjax~-=cG2-%iyAz#jv6#$92|Y(((^4?&ST9~U8jvIULVh~ma2W~t{E z^YQ93fT_UN2*{O!<;v2I$_;~+uLg+@FDoHVRm?Eq$~fLxk`~{NKFH1UVd7bN zvCVoCqB~0jDT}ituVH|uz|_E(_q@6}@$2gAUzJ(RCdd6%S36F7WVU$+I;{WR^!k3C z-}1}QjX%@$to;6|sMhUR^Ho9ns;HsfI@SwO=b4Ndp~0Sp_CLQUy|U&IH~(;74ih%& zDff>Pqqoj%=o&$bne=fkxx_Sb{G3bD1tMbPe)Om>Q;?E{#4<24SwUKJmYBye9SJ5D zLbTT~tx`}{KqjU)*Uf|Ud;$4^T3McqU?p7jVJSW;6li~WI~-%??{qT^$aUxAlfVL- z!JB(GkOuiAAu^#B;V(18*Gs$dz2&%#sE>>`X(NduEmAFJeBQ^bB+H z7En!}5GQK1FLJ^%m-5oC1Qm>gIUh^k`->8IpVTI&=YM9jW7kj1{#u?y^!fv5{DFk{ zbB0^)axBd<-xg}Bz-VJR;iHt+AruyUid*9z7b4i6$0er&q^1MX?u*2~HM-oXXlpF~ zzvQk?Vd2LNVv>w9uAqMt77i3$p0nQC(P{H~gL}?m%fCRz6x+f1Z99!rd}e{Xv>uKXH5_?!KTxIOeiUFcQ5+32eQ_b95F~Msu}7a-DGym%Ng^-H8aD5QxN^5 zgpS~_ImwzrT3}n-2?!}t%5CA{(OL8aJ7a>{-rNs!_Wk`YAu8;^AOccq@g zll%#S(dwocxMbD;kZA(5K`?OQ;y_6kIzc`#aJvoUS&eW9uMgBgtD|pN1*JWR?uu%^ z@R`!uOzP!R^EDXcD-U*DSF7u4?*qxfZaBK0zaeUE#Kob(#^Xjpv?`M@2Gd7}$FA0lmofT>#9`pV$BBf?cuw(A!6kg@q7Vx zg<_?eSf@SHCWC#GAY6q8-tFSPYb&M1C%nVijaVNf7c<{oW=^MZX?eown2XbnC9k}5 zmP>BG#uxmN;Jm+_JTMV(Cn^2j%w_7}Uh|Xx9H(UkR!>H}cP#AQS>p9kPM6E+FNKs6 zklZGubPFi0LP{U}ZRsl=bMf29Y`m2RfPoAn7r0c++mUKR=@o@46I(5zkHIf;2%ny1 z&R#t*-{w_6`b#{f{~!;fwY9;YKLESRk6sB_VE`1+&d z-h2-~3Em6FmI)qYwLe$NzHbvq%H_)M@LNUpI1PDMwSsU~LCE>@cjv=bU8hT)e4XCF z#r7PO-97zfgDoLmGSmNMAVV=Q0g{zBtyQ%Gib#1VMu=N@#3`5VQu^FFawviv}|1HeCRkU6s z?)G!;@9S0@ZmfOvQUAYNHPEMT&9u~JwTWh0{C2|LFKk&%PRZ>l`Kc>g)+XpF?nQBX zDnN3doKh=kJoTHHCZqfb5C2xe@GyMgTMWItj?Ti&x=TG2v@d_sKdMwn8NnEt}ny(Bv}uE`@Eptuid<7xt6# zLnn*FITk8M*%eM@FnaT1{j3f@DKe4e89CZ?;#;dn=095|TAd^cKcWIgp|J#}nm_=+ zqH6-0bT^{!05q>b&HAYam+HLRGCO|X^{TGg_Vxjz*iwyJxjF{1yCaGIXHzphrjLWRJ?h3y1f= z5^w8mJ-Dmw(5s%+Ms5E`(Rs!txxat>W-mn)2hMQg3}?>58JZfdau%A~GBR^z1&D|n zH_mXTrX4ghN5h@Xqh*HX=}=i&S=l%z>y*Dg{|7wb(fxuK_m}&+KA-nHXZ7UzpSOL> z{QM$^PrSI)Y*}d(c)_=k4l^;pMNNEZwCrdZaA5)kcx^qwc_C}&Kj_BOCpSND|CZCT zH_{KJTjCU}lhpJtZ13m)gjV>&?PDf|SQn@~mE_LhyCIE|y5V_h9knN(X(MXSy|8#N z>1;|MvP844?|ZL2@Wz;lq(M~xyjF2nstQ6KCqx%&7Yj1^t0n|9zBrmMV~uOC@>DM0 zxO1CS>9)^qEupQ%Ze_E#L_0kp*V=j^;@Ewg>o|6i{UjfP@em276iibtyH_~tw?Jzn z&OXgkbH&Jh99|m@HE(>YR`PM-?X;?)X2!MU&Km2%c}}IDojopzymDD|6i-R7}rx{qo6sTIuNBAwK?woEJqqWTm zxJe^pyDD+^hhbwx&kMM^7xwuS8;+xmul57T@!o+so%g*`-WLGyUW7%X^z0U{Dn;?$ zekVlFLj;wkrPL#ZvX(%5mG}1Wm#Djgi@jF1)4jv@tRDH4+*k6JUdxk14G`Ns8#GSY zO3KT?T@;vRn%#4S-<=J0Dv^Fop0`D zFz3;?+#>_p;kAu2x0#Pc*pPkC)RKe7O*P-ZC@g?PcDZz=sEQe>i=2>C=Jyfog`Zt~8ng^`kSoW;7@D!R5Q8^^&`Qn&m72|ktLCCA0 zOz5>}W^&(;tH)d6P$B5QBjd1N%+i$gR*G@Uyk9~qJ_eheEc==+_6ztj1p^Aekr$4fGBSL==l)If*TSjqg=l zR{dr2s<5cogfhTVgz$EnJXOhRn*KQtw-4H|ibng^S$nFL_RSu-x&0M97XxY7Ehv^N zoUi<<*Wa-2udBjc;o21EYIvMF;vk-Q@I7_u@&@ohMt74be}@nMbTk@YM}cKtJK3W{ zEjf!9P;!uB8!%iddwRYD0_6d4DqU`Qebzb}jCL<}sm`K6eRVz;Q0X{@EPAfGm{{i_ z6&|s1(971tSL0+SYh|-*%&&be%aDN8#{k8hss2bc98d+PX)QHihyRyDDCla8&t{b~D0+v$R5`MbbujgcMj-Ne8tri22JWW}_WRc==U2 zZMCD*`a;xwa87=18OQ}W33(>&?c4Y^)Bu~BLhu9!A_>#}F!8{@@! z2+UT((uui*e%8|6-6Mj^Ne5ui5G9DQJ>WfX=WK85N-1|#7mftF<@glM2manoI9HUe zkgf0q(3WvVDzG!%(5sulyT&JKoab_hu9?~`F=RP!5?gUr((AVVLfwBN@1LRal1M5W z4iPSdrDrOueVjLqTW;BZmwgi&!9*P)L2k_b@H4Y6Xxt~BR4(NkZ&Amas~Ive23B=; z)7YXW`A4-}qS}Imz+)G;&baBu(mF3V+k9$XsPv?MNZ9LMzG$Cw)bD$? zXX!CI1gE(Uz??)Zf!cHAOQ*^0*j*IX_aLH9Z#@Mx`d?rxmYMWYDJcsv{|HW*PT>;jf+7zX%v^&eC6>jX&XBQS*hN<$=US_l$&s$_;)9f-DUK=c65B!akU(?#?iHZ zM0Lj=xKcCg>o-+`IeNJJ?QI#wD0DapqA)=7oBPQt5Gzp*>Ww43-OJ^Rc=x78p|qW+ z;*f1#>n@Mi!fby4LpLu65+>>SFYcpmizYsNssBD}r+g=SdAN;pY*u5#`kneC3qPGX zAyk1dfz+F4DHM~z7Wju9PxL7uJxQcpMs4sW>(53APTuJ&^-tGWrb<^iHfd`O=S9xb zTl=p-w(}p?b6p+k+w~JNRI*zw;fp>i4_v56a*@-eO&X%5?P7+m@kzZPa|WounaTV^ z{Xt9VyiUkuN~Xxh33!Z@fb>mS{nZ`45knN1tVjSt(35tgwiJ6?ACbR%0IIuXv|_>H z`dmG7%yr+%V^8XZP`h!KT(pVP>8&sS?Psghy-DuMAXflzX^Ad#9O1-#jyXK% zLbx*rlU8cfo{z(gVp4hf8`9J>!A%eQyZ{@SP&eiKZsgbE|xJq;t zE_nuZZEK^2^8t^O=ZT&pR`y1p8Yf z$-AgonnyQZT#p{-YHw{WxhclJNlA7;cN2|paeDD6Pcr{tkCYXl)q10oI*EkmAP-q@ z?b!PrJXZ;DI?fTnFUlxCbQz$pdnwX|b49A!w@VEr-aD$^-W%0Q?twr{D*RkZ60tKX#j>MKoWb=%#mG@UNE<;SI zZM#Wzl@op5o0WgjcNv|&`ee$<^hlNIUDYa`>-Tl@Q@7e0Z(sA6sCzGI+aZ&mhRlDz zIl-XkpP=WzCXPhVP*UqqAQ>t}W?q+ulvo^3I#lGa+XKe|JtewJ2QegQ3jmFzK#4ez zP%PXiDrE5B0h^Un!BmV9XRnB}Hw5a2gYMge?D^%kXA6>_sCj1?VlT)w9fjC3b3I}p z9yt&@Cd5M#Y?}#5d;b%qm=(pE8}w3b~ZJYw!BGlJPc^Bc&%8r7$$E%_iBP{nivnBNQ4lWNPnjOPxV3HlcH72;;1f#qr~n zU6%Ebsc_2`@3uX(=G8BiW%asIGFV3k#D@LscFeB{owa7Fe7?rCi{{ZP)iv*T63@hI zo|mp+^&(24V3?a2Y+(m+T4$@%!4v|zF9~W(W@Y^9F${sq8H?f*p;0<}T?o0^(#iB< z&AAXnT7uLQn8$S*3P_*^FsXBC)S_N$N#k71+}zb7cs$_6SM)kL3_nS@z3^+UItU&g z!j0W>hqzM&LF09ex%QjI3uOG_R{y~WR>-PXXuC(jPg{oLHS7A4@&){BuZcIISNW~n zWE|Gf(vvV)M?8wHHqL){udcVfba!iMqP6+U7j>Va6dOnC7!t1Z?}H^V*YYw;2FJ1@ zp+_f#p6flo(S3IF9o|CaQ|{Z)Ms+BK0NtGcM$_2CY3QT#H5ps^>3ldn2>!OthgW>B zsCe$qG&SZT^*)CiOY*AxHMhW#Uj2PJeXqygrfWF$=LP>nu>S#Okx`soIYurdydSS zI(F+-1+2b=#Yc^p3wO4Q`0nC{Jp$pLE{-QH@b{YiXjF~M)_@)ZtVsmqX>3``yhF(R zb6be4Ot)%|mUreot|=@)N&_b3A)9D-uid-44_*?%Esfw}VGBvf1->-`7YRQpT&xK4 zeH`Np@fm_{H;$=Q`Ubi8X^q{#`Cjmt2U1}MtFHS7uUdrZfwciuzP48DV3*@UrQtri zykfXp3p|;5=k80FFFv!e5wp)9PQ0fW{js4NcLtkuW%jt@{x|DzmO3nQ9=`Aueq5Xf zHbUcm4tkDHg2tpopo2|Mak4rb>2F?B$aW%eZnSU}w5jQITA~mdFN7wN7F|j>ZXFyd z&w^Jx=Pj7y9uwbr}$ijm0vG<<@EqDklJQxDy>ihH5<@sS> zV$tJ}joTANT^cjNa;BCq+VKWcW?l)+N-~Pk$0k;aL?+4D`1_vCExRtJRkim$-2Fm# zJMPt;Bz@QJ8F=7RSQWtz=FPW|E!;Z#D88GIIiWoG5Wep@{DiLwO$^)ZVUW(_4i(+I z)y|3F&%5=7bDC)0UC`pHIdp1JER|bmc~;o471bm} zjrP}|o{u03_}SNX4!(G{`$OQ#kgE~=@R7Wy9=?Bj#2)6pdV1rGxS%?upilg>W3li< z5cyC<9CF4JM-M1og`_V8MTC;WN@}ZFAu}LVBKScco%b^$OeXRUGY8nAq8S$x+!R<{LOf>xVk88Mn%3j=GAzqN)YnP!Fl6qiL+C zVWdy6A*osKl-JPIRns(3Q+LqS)G^R9Fxp|EMby>OHr=6brKhWFqOWUeq_3xIYHVO? zW@2JMGO#k$vobT;Wn^e&Vdh|EW^ZGtr*C0uVns5twKli1wz6`vv2=2;6i1Q9LAIMH$lyNL3kg9aTPi?Q8 zNr)%O!^OtS)z-tsX`j2JpO@1BVK&)wC>+uhq^Ux0^OppR>ik9X)kukb*h zkRYlTH8|KeIQ&46uYXWnh*wN_P<&VjG20MPlhuRybp%N`5Z`2 zh>Qt~$qI`~Opc9BOODA%jL$iFG&?OmJ~89;sq~X6>5)+Z`EjZRq2{~GF{moJ`Z?LBy}C-B~-R9t zN~=5S3NF;wbXS#KY%3b-@JJC^6A>w^;hrSy#M$8*WbUlxBvbA-`7u@pEn=gnBpS0flnLqnHPi; zxl!1MUhA(OOghT$$o{O***a6CGQoMP0GA=`j2!1|Sbov<;z!b2^==Pz+%*rUPp-ON zHn?wSdsWijoflVWw&!$;{L0ri`-Rk8o%>#u1-=IESEx4LN?k!0s7($XEA8)<4c$7< zMjEG)#D)piwXUcJ_lWx=MyHz|#g_gxmO47OeAmNI-rNJMqU}1(<4^55D&{>0!wS)g zden3>Lo(qUMNW>(G>jrR;N)yMJPR3*Vl|T$uaTc^v*qf6=|6OyVvbc@J$^Aq)yhDn zDfQG+j)I1fSd^0gMPr4tud+GpiqrW~50fufQ!Xb*flpn0@-d}vJNZ?85Jtt!uekKX zTle+K%MVXndX@O>+VOvP6`X7_x%F||{`6;9KAr)wYSrUSsxOo&%0{0gveS;J&s`5d&cDRwP`=~)2p1EKRcdOr#-e9Qwmu>GtvQTMA%ZQZyXAN z$yOn_H6`Wqn&oQHFt!@e9kC4Y9{G)Ks>RsI87aA3X5MKztI=H8YSGCzL7-~D$d>Ub4n zdaxkv=|uZki_9&;d50TYmba&a{9-LSp6#EQys1b#_;YcC_UTOUz`?g(w_d%D`Lg5B z0bMOUBd^pjd;FXBR?dc^3+C{;vPv_fs&gih>N=@B{Bhv?_P1Al=lWM3i?2Y`7bTG- zUhxw>ob8{I_ebNcs2yqp+G^VuvzIVaD$~FXcHer+wl`!-nM1Ivb@xfUy3Cf$s^E_; zui1r-mw(6}82%nQ6Tg&L7|hq#3_bPHe)HvnKYVO^KbR&u95hlM&@&u4T;83~GKLOP zBPk`zzjfZPQ0m9CuKMbZ%b-=}sz6cszz6-vT$Im}A3+U=y*eHF z`{aBc2&!m!i`00N)x?NX!;hMsj`^kcoGf~H6xCt(DuMd?Ihgp{4RK@b_D&Y)4N}G0 z39o#|;%k5+MzdP`_3e4>NAI7=7=m|Q4sOqG+)R$?-cL-JZjR_?+MPBAE5@d&GUEH}VVWOPx}#oEAe(^>A%Be+HKiyoDBX z<4Z59t370QE*#ZArtAII3;JO?&{1}v0c6a!gXwZ~G~y5iE>Y-L`l^^%UK8-OILHyN z)^liQ;1fsjzqU3F^}PKB7V=ZZH<`P?MPb+%kKPWg_N;X3`}52{aH?2XwE3 zaICG#{q%soo!<=ke{Enie2V7W)of^cN!wAMwRG#|tm#eQ+fxLsyrFJC@1C2Fs|>oI z!2FK6%Ve2cDw%e;^V$-Y==5-DrtxB9-Qt~eMvI(()e(d1HRa3(+2g7O;|DL~n3~_b zZeTk?>#(m-X;Du4ryNpeq`pR!aSJZ*zNeDHuaK%D64iSIQezxQpY=j=28dx*HC zcDlMsjNUgCGq9w2HRjR>^}8pMQZ;H?;=@2bfz>q{hbCzS314EtSsglm8}E9Lq9dW& z*#1`(Mn2XT$HsR?#A*fAAsr>G?fMLUF4*KXTzA+7_1X!dA)$;@hkKPoxIC>!2~>O7 z1sSo)k|&HpqlP_ffzPn$p~rrwRBPL3f`xKJLu7Y4U%2{sF3o}7RF@d!c%K{HQ$Kpl zuzt?@rLsbPmiq|#hq15Vu5O@L#;q0bYH3!h&xS#4OGIMg>Gy7j3{{@)^D(#=SAFbt z%=tQUk``D#lMFS0PAWbWdJY|HK{$Q_9&m4f8ux6nkW3<2X$ycE1FW=zfTg?;fKtex zAdUY4IufajgP&(1crFu%%vA23hv2Iu;B_tl#WDa>@eB1lq#Es)m5sw08ul=0{*!_i zVe04>XD8j~+BOMS2c^w+pT@zZH5mOHjR24Zp80~B#J5#=5v-!D>FNp{%feVT%{D)D-T$M+j7Rw14NJRjw zO9X3ox?ociay7&x7=cP=BO4>1V@b?s7zuzF90SYq0jL46HQ+}4gteo**vEwEM@-1n zi>FN^TrdZH&ML-@gK}6^ztR2 z<12_=61BiL`Y#l$b{_ti_9N(RUDAND{Kzd?4do%xudBN+Cz{L3sdT4(i{I0L2;9L~ zY}+=6YDwtukzIrEcjoC3#qp1)4ac9!2{ZmZ|NHluKOO&;JGXUxFB6(6g4qwX$fgWO zst?V>nl5C1dy|2k2fkfRF&CwLIsj6b&yb;=lc$4p<{_bcWIY)p%~mzevl=N`gA|OA z0k1y@j>-qg9>o03N4n%@UkN-R|F6rNiW50Hu6g~`ktc~p);rAx5s5E@y8cZiL`e~i z^43Fchsp953-Z21IB&+`=VQwBAT%~EE)jk#VJI$z3vuM1`k0#VDjkTv%80v~0h644 zdke_U&wQ7kne(0d=^e10WeD zQL!B|&z}20G7!l3D6ZXdHm9m#_rmsD{mg^U|e#7b=ti)+d9cBI+<2M#4u@ zSg}`f&9?h@3~l+sABN>>Lp?=_HWY}t2wWzwdZrc-rfU->wFUAy8sG?XTQ!QjwKo$M zO*s%vDe))5L+PQ0!c+QmumKaIM=p)}eyYHZn;oByrAV#Y02BaV08nr>qu>o7Bgu&0 zn;(~v`Mv9`3^l8+F{oj}DO}a9qca-Rmh?D~ zv5U@dWF{?1sNZPQr^90 z=I;eG$19YX&=@k>#=f$Rj3H;i4+c4fh- zM6fa)B)2KGN}u1@RhE8``{g_L)H+vAl5r+fSlnH@VV7T0z|ZRj(zb-3_wp+W1kw|~ z%)SD)2`G+3_~Yr1HtTY?&K=(+`#^|LVw^=YGVx?poRlv3FB3Oj3MGQ@bijbzU=dn* znU8U-$B;)FVudKX0#pO8(U5lDSyGtKhlMOD4T&4IIEG=gL?r_PYO;u}Lz2@eo)T35 zRiOF-n)9*B*1y_Kz1+(g2iB9shmtFu-&R&XsjNnzWBE{JGH~AD+<#fMO#6y&`SrT0 zT?MLj+hxmq{7Od2?F{TnMiH8YRuz=1c9&$eoSy}-+2?g7hzFbJbn3B$<+|ffWHOm4 zpKGge#7r!*O_SUvjh(6tqwMU_Z9>f3dF)jRJZ2Migb$0Oz-{X)JytLY;~)(o^a@>; zNAYri38@#dd(Nl# zD6$2=2nn@}2(SE;A$$cj{{PlR{~Io?HUb33$a$gc6Eg2FQGaDa|FL)km?lM- zw2F;ENL~gzKJ$zKCd3sAhcO%D*wakdAqrtP0`9$8;aG&)odwgu!EV`OyvZ)R`gde6 zGzRDfDcT1471><9WrOW<{raM*>_yYV$}16$I`xY1lM-0%b}YPcv+{`J(21X&Pw!qI zB>w@Q2T6zC@49&ZB0&cEOx+*Mj`|BfPy&TJLOyD^Y*~>1R+w$iN22(gi^E`}GH6{Z zKm*C)Af338N0ROp+K6;Zq3S4Gu?b*=H@>9SeLl^^5s#nAY!m$)82o>ieDkSqd;H2_ zR0e#IUnu~pe!ece!i05SfJRHTb;*Vq0cJN(Mis^o_VNk1$qxL$>3;@_=}D>&@y;^Z zHDW7xCIt~fhs8@S4Q^g)pj6&O7iuRA1<*mKN6Vn&rJTEf0u%U%0e#tGl0+kNM34z3 z6OzQ-exGrgbhFU0h1($F6?DPW`P<3E+gTd?ocDkg4fRsgFT48xkxwygHwa{K=-Rf-Xn2QkpeS2^L4jn5?3z3}IeGw1kEk8_Ra^H{4%f zIAGXBNGB}*Bw9-dw=XtfMSVZnwt^tHCi&}-%Wks$h-2?6KmM$2B4ZlK7$F%Q&xDhg zFQxXN(zCh$-4UG45Y)d1lyHy@10es&tO*SQ78a_JrFxQ97)p?DR0rP3;D#6QR)BLp z{LQ&rEEFG5Wx;S)^G)WGGSA9W5S0>{MW(Ep$wJ}1xs~y`M*!;O7En}9QltpEh9L7z zuo4|~Y=sps*+$FVf;n{XKYMu$K6E`$y4D zpKudS_@51bo&Vfd*Xq{TdNCS6<_ae_9As_^F*hWbG3L`Tw9AeA7Y8>nt79mtXz5dJ z*>_dY_im614sw!+%qD~7hya!dIkkx_BSTci;hkjJ`$PnQ22S+W7OS$-f0d#0firt0 z81-{%yKkM%8OCO1DiI}7@c`*8$Qg&cI=`%7wxIHt>>?SZ#{gN1pyl&2_vaB#WP}lKW;M$^My;B8r#7>GdK9y4G~s~*6}BM zFAu;%Y4B0v-&OH&9WwB}q&sRCq`DVmFYLZgL8y>{+CIc1K$gt_Mb67SBFlD4SIwcd z+3~gMBk#^G^KnAp(@-s%aXzDKB>w`SqPG0<=mT3}oF#*S{0_jGAO||4eSTTqNw!j2 zxDS9s^+|T%Knlm<1_2glkQ6sCcu+r(zF%n@|hQNBfEJ10u|z1Z9>3 z_hKR={Csq$viGAq{`AddPn_5--PC~ZEqm`=mQ`!Ec#~clt@s%h;tUTV!=rKNGuxFn zMc5DX*#GgLh61}UYs2>wVO_+(|B#^T8Qbm5tn`{~s3jlbBLZ1&LOEpY8h}y*2Gk{p zmt@%vBII1iyJ{SWm<;^v`+M3vT}7Bty_{krR)k?v^4l-V~CBXlPkiCL~-Bs~;T%mG7KR3Vqt|~nI#@mEE8H%$Ts#CIFBdLd5@A&S#=a#lg$i}8Tps!JFKr}cc`-p%Qn9TB zAc!EHe8fo{DoY5-+WwT@NqY$wfwIQaJEW4&P3V0F+6@PMKL~27g-ATXw){9N^1!>s zI!hchLb?N{1-a1P+tU#41U%PUzH(x|zFfh2FI7fXNycfI z8C9U^Qp|NCPPoymQdc??A>AJqD-1e2{|U8d83!Q|Ix+T_y#g;pUJ|V?w$6Cr4ea~Z z*1qT!svf>XKG7QcnnOw&PBDJ_#T`>?AGsFtO2^QoRwuPE2exm`L|)gmbVKC-5-zJq zG8XcQ6E5J9jE`%}x!)f&lhY@~u$)^(^ogiz1lx^Qq8Fx2*9A4(d(|dkAC|7%t=2CR zm_?4}Y~LR6C>B^;%JzB|Gt15e!?chjDaUY-+6gt2x9)psrU{o3iF8xyjWiu(+nvrmE2P*2J$L0D&a@ zgW^^~eKF12NT`o9oyg^B#0*PVGHTvr9nGH!aT>=Q9>Uf6UG_7wl|HvA?CLNFpvcPl zc?>=*t{-e@Fg#~;K__)38lxX!gs$|ByZr*{)_%eOqaVG-?jhzi9=X_CC`mHypElly zynHsqgsZetc=68gzdubxj@vH={xH8Ple>>RDRboBPR~c2nnMO{N1M_fE=(8h>;!MS z6_cjNql--E#GKkkwk}g_NLZ1O35yZsHYQTDS_=Z_{1@v;GQ;#-$KBEU5stV*62jT9 zK(8XavHZ273-v@?eR6%`P8Y488vQEVT79y>h$!vZF(?26M`zmb zHUyqm`3BQ*4T%-zFju4+S+6` zT9bh3$pV!t#AsvX>bXgD80UFA{$QN_bB2R8EZ zQECyFu;R)Bqt&QFmQ)q_bFt61Nw>U zv1@0C>a^iv;$O*~EwiqFK`khVzeM)2xWr2fo(hn#t@~qAR2#@xuUx5PG^B-nEF^ENKvb$gnE>Y zTip6{6{modW_NTKGuc>!F4@2c3Zfc`vI!Dk`PMI$7YTdqe~AFCOqxHR?>;;vkCKTX z?kY@z+jeTpnky24UTw+%fdU}u?s5;9L9cgz={?DZs5=?Mh|T2kUJ%cfX#WJ!T`r^S zH0}l!*27w*u$-7l_nUUPaVe>lTI*1B$ea|=@2|L@0aiSK10fpoi%3o^O!(%(a!!!N zrEzVqC?htG$AC&vya-!fK)5m864l#V`C5JI#~0!5v0t%1k!bt_zbwL!k`|fEe3=G^ z6Kz&UTN1(mx2D4MPM6|ImE`O^RrhUj7kGXz

=(TZn^xPH@vSKI&;Ll)oD1DqM!)?DHhra z+Wq`h%e|%lqeC>RE4d`m(Q5CJ|X3 zmo{Qs)-Z=REIS>UQTyoiby1_V&MM4zdHUVEtK{;M<7J~ePFh<~8lxOC@T`K#%TSq&{QI<`PCt#--#7E>TRpN)Q%rc4;|;6pov69t(mreKA97RFRt^+Gb-x>3GF4$OwJkSaFr zS3|bZDa7NN<xP;<#&GJ=ond=gfZ4sEa6@)4yYS~_K7O*CFXyY zHs(?Mq#y|1;3og}&O^S%2QjO86Z|N}Bl;H$9?-6uHIlTjXPR|Ykj8P0kz8<v3H@ZErT;!*Oykk<;2-Qe7iS&b zMH!PXwAqBUjrJ+zTGA>nH55m!!w%2Ge2KY^w;`5W(kL|dFyEv@#6%>>E`1pvZW~jg zbI=7G44LU>fl4BBWa!Fe3{w{X6(|NEbmdeLEd3FSP{hW}=U(;)uQIihNFZr#)u5;^ zk=%!%01uOyM_S}S^MD~AdbG&wp3$})X8C%g==#&IFmWO2DFz~F3?aqWcNyD$} z?Zw7S1q$1goa-~(WHk(4wTJBAgsP8#t>-uv6gKok0dfm)9?lCB+AE_9`*2$346q@g zqG;K=AIGM$|Inmi)Ntp~BpWpE@-Me5rsZ`v2{Kc>(EyoTd%nX?5hM%(>AqZKKF78I z@|p|urLGyNGuTpmqu0?x9>oC)H(aIewg}AkMwg!p(bUp-NT;=w_&d)*q4@ zM5fbcS_^-hdHv4@+_80d5;gx2Fm;Hoos`*m7{?9a&c(jS&FfF^I*YvWqqdw)Xx7^@ zb0EiI*rSC!D-l=@$HI!3`NK@Sh+RF(Hp$F$p60wCt9`DfXccWHl{-6k<%N%f`{`^Y zuW4^9y+7#gZ#nH=?d{&)7-0?uO}P{>R%GTidhw3&&L4YjZek8YAcxNk#g7EVuU*;4RA-};CZPhk7!ywvKR1pAl_fFJ3}CKJ+D2f>OAK(9 z%w%;$pcGL`Wc5aIH4=y}#QZqsJrEJ(IxnjfpcKz%fsvd<5scL3yU^1K7Tg0%*)}-# z2dbrb`4%XI`N^(2Zg^n%9a4PPj9djfOjJ1}b`2J4A}4bLx3=%CV7PRCp3G&S77vmi z_x0iDJsrFLU#QUK1gO(N6-i=|sebhhh3i@F&1wowi-HeEJ^K{uXvsy2w4M$&^E#z>OI7l)R!sL4ogPw?KUURIEIHu|_#7mSHz=XPPL9q8& z&Y56Qj-^u<1b#;L@i&MU32HbFlI1batlq+&22_DuDwV69*YK#HJ1@h1=0@aI&4GBb zh9X$%=j&#ixy&|mZvOA9Lh!2bO#tFBr~mEf{;)QPgP2`&nQ1C4?q>(V8jF1s z%za`te$vc_kCa5G-X11G!$}-@67xR-;K=7VWXh@%IeP^lT{PI4#D>sI4D^Ei$K@CS)9LJ)?{u~cS)J;^W` zc2ydsDKc{|1jn7T^YwVd{4U_0`295JcL2{l7lH&lPQ!TD9v;kug@~WfC9oPhRWuW* z?FB5l^1{|xm&EqlX1wNg>-LRMv9R7g zbNONo*G#aw7~6OTsFmz37D0|TnVtOn^l4?@PFXh$lBp@ZvnJG76&;Kc=Xw1O2t{&> z#_XcD0!*lE*o%Ou1Vj{V@I{zgNx-9q6?>g=u)ELBy+M(}`bWA63fjZ%8!;?JqAGfB zJFs#D5&)L55E(zaukSV!E%_ie7v|cpM?1{1R7uhyGaxTwTbFknBwgg+m6s6%%g@R6 zn)3!dK0g5_gy4w`aqW3USrQpG1_Lm9ADC`tAYGI&CT~RnIZ(j0dHLK_7?j7$`4tK*suP% z@nepJTMhb2v#r&jxySX5Fqj{Qm)&*)-~_2yWF0oYc(rJ(%D>M~-?JY&;k^_#(!_Ei z^o6RGhQ_doK}=v@>46xyL|KN(4kXO6 zVp>krIma_O&IGV}7t^kGViaAc904+ zx#c9r%;-(eD5OjY-`PGIY`u8>-{J%9Hclj{`5Z`t2vQ({CZADVy6>;#l$11wvP#EL z>k)T{5`KlO4_6+(b-T;B4j2_C*f-08`a)x@KCn7NJ<``ynVcdhKTU{JMi(5TND>+F zK)WeT($%qd&(Nf*8YY0VW6G9p!>~A(dS+{Aci;5mCq{eLitc_qQ@t9(%p>e()l{POSSs+3nvCDHysGoiA4S z&l9B5#dO*gKhoUOv;9R3;2DQ6C;#Kq{roy)k06(9enWwhTGPdxyQI*obZnpDywCS< z!$1Zo-VIg-iYEp zxXmK)n5(5tpdkS3$F`uu55Gm8;-%jH?LY|_JgpVz3YUN13BD7#O3#GJ^MFsU0(Vk4 z5nJpx2@kCHA2*~z%*EN?xu3ki|6Sf{-uHzy7>99s(mqb!f6;RPf%nFzkKX~Wem|`1 zF(%Gc9S2>EeOueGve=mkB_!OrR3CFUja5?=!&+a?8o;j%0CtESM#xW|KS5S|68SpM zbT|qunKS)s?!wT=k}6+o@soFl`l{=IoT{H*)qaan)^i%xq#Khr9Mm zP&NlgBEZ_nDhbnt1wotfa8Wu1D6E1RDTpc@halt>s1gz%GzW)j6#qgFFmu5~O=~;k za~7blA{o%EF*BWo@7t7ido};{DB*HbQl2m5DKrA>B8yCS2pk&JbGesrr`Os z-$RDci>(v#I5nc45V^4 zs>OpI&X|DV^G{50m5Vjuh!_taPpI_yzJE&2c5yybrg=m%i6&*f?a<5+rsu01N|WcR zTw+BT9J}`R-`6SEPkj!@AAje6TgLt#*-b_%h&Yi7!;#lLervX$_xu`rVLW{^s`fkx ztL=YCwVnw^8w-!N-d26@Zw+|kh0JlZNjry6Evev0DH@3fnIJh+`nUqe+N#9Y@om+k z+)-Pxuiq1@AkeBjq&$jzoq1B$e|Kn`?7_({yJM!RShsRln};@L172wW8>+|&2SLv~ zX2bBl$aDcdOm8+t%};S5B*xv&OI3dFdU%9v;{t9f{!qnh{q5vKLeITPhr})6b(oj% z@P@&ek(tIDpNI0a^u**`nKM7Tk3he?`e*&r6Hrf2y7&r|1u9Qva#Z6J25uWPFKCxc;WMh*lCWKi%XJyPIF0& zEiq3i_M!Y%*@g5llgUyiW*{<{?Ag-%F^1 z>3n(4ifWq29`{F|ESu%kAT8+j%gE6E2s?!D-*t)#F21Ez2#| z0pGW63M+ttXuT;7cGbgTk!+l1(5@xxt&5j#ARqZT-Uz|pe6m7=piaX#3!@Uc!pQaN zP&MKBh%C)Gu6)Mi5+C|G2eVU=SgVX{vW%t*o`S+O}Hv$M5yx4;UC2W}fHsdB4y0g`Mze7_DF=!i95bOj3?y?*`+X zncs0~0I7!NASqj~~le4SFS`PQT9nv#YW9{v}7_u411JJyO-Vz!(Rkd%9e#CBS&)Poin8f+kpGkPbt|gVMe+<@Iw5TJxYM-R&_M+ERueM5qbScsF z=SB~rt=UE|@58RytmJE8(_x34D|?onirS?JG73ZG>~w_Cw;c!=uVXv!DndRkGVjZ- z;*{8F@($~Yj?c?1nZxX2rlBdbyodL`KgjK2Rv2%bFHrq-S2g3`rFBg-PU-&Ikim^?*dX1+EqZOy_qk%5!xP4gy< zWCAZ{a;t?_o1?swrbjnt1-k9tYjkpTyu5CzgR%+|lkEgD{}*rY_*+i4WTD-x^C__F z*MX%^+;CDQMqaFJO7kJ!67(rZcJZV1wZo88WovolG!wozjM~&Em3~^C?JwY!?VM`n zv~DUz{vCaR^5HpF&yzRTR`UBR~0vn~^ch5dV zuVm)-;k%=<2_{{Gqo^PY zyZ%@&$C82u85j-kF8)yvyf?`)-D1VlaF;GX{;uE>f%fstVM7$O@>G)yz5yasLU8U! zt1efdUrbg#f7UjPoE7s)cF4!b!u)|k!)m;@NsX00 zYYFkrdu5PuaT*?`OX5Aq7}*iS{!cC@oyC1$TDqw0toDg@@dE$+jby31#L1 zdv>q+mTdjTCzxBMPO(W*<%y$g93I(HttXY{yYkdP&fRu`b8GIy(HFT{Rn0wd2;uz= zH96xn5v@(`zxu#LvWstgbgIXEP4}-2!DI42Jd|6EF*HB3%9fr$AyNb~ZjnyUQPTm) z9gwsDIZFKuum{>mhIsj!9M@C5rCPDT_~xxG=uDLW0*W6(`=TSs;mXqfrZ zY<|O~|MwWN^E*g39z1$Py4)<|!ofuQHix%La(3rZgQIAQ;eAj93gBhdJ3~j`4h+2w zjU=3WIdX{Q1JH5+pMxtX6dqk5n z|I00>G`FMwTgg~lTX^z_($ckG_&FXt#JE`gL)S*+`WG7$Ma05WM$F$-h z*_k?ktI!B4?+SpEnDG``4>1EZ&L99+2rfE9{|51}qv(nN#Duc_ z_r!Cr$wj`{mkEs{RcseT;01{tQLST`*0EOXHX?Rps$7_(E z4ikj+a>rP$U65RVI&EP>0-2!Ka?gTh4-**hLt?*6Y@ zw)XD2vXbB?Jov+?%>Zb2tt9jY%@;g!(z>aAJxKeMVEP=it3oNC)!}amF8y+o2F{uJ zk9V6_5_@V2p8uZG82L+>`hEs(jBA%P@z38AbA_U4SJ3$@z@5U0errco=#bC+xpX;# z%5A7(WLRc1A+ny(SY>FeVUcM%XqXMMr_>W)0He{6^|K1ZqQ zCoe>43&R8Xr+zi6dc8i!#Oh8&i!XU%g0XQ!ufcOPUB|O|ecVRdtCgl!k6GQ49=%U8 zs?CvMk_zr~5wBLg^oYoHS`6?-OUl&-x;l|Es9Xi*dMy*hqB(XzZh)GEp~l$mIoSlK z2LwT?h{zEwjn)h=I_waJ9Iyu|8EWz!l$;KbGoBK=V#0>(^2{GRq5-L(`qAp5D+ith zd*C~Cw{~mkm%)1fOpRwGu}qFoze5n z5?hu-!%mk29L|YR$gJK&ze;+&eEpTAWEeac(3>hev9{~A>@KV>C|}<)hmJ63)zhqd z$Vfe{hH!+}KC;EBsh!r#$<~+-YV1D)&U-b^!d8cP@FIp5pKH`9fOVVOnX%=oF4q{G zQa2~`0Pz5)maodu3cI!X&_2P*Y5x~B;VZf%93fdaFDY+F(b8XS4BD$LuNXkt@s!OP z=S6u-etmr#Oep2K+y9ls;{P<3lrNxA$HmPNCFPj;kiQ1(H{hkpG)E3Qvn+Ifdxa>vN`mg3q z_vg$0W*_{00ceap#s>91WjR+8#KpDF9_!4@fM7}6)%7*H6QgiCz<@!U3D9U7g@v+N zQW=wJ`L}ZqP0HPyIrZP(xTRS4= zo%_H?9lQwL^lH{UPS2(TMJJpRz$vv)7?B;1LezUhPLvC-&n244ln*xNb^+(w6BB*j z|2<|71w5us*OSvXUFE$yCk9wnat|vl$Js(>L0j(Fk|MOb5mx@!W59D0ZBu?Zq(li1 zUvGrCv&d{@Lv6{&UZY2M+pU}Ul`f-%>d~1ff+-FJp0XR?CpVL-BdTi~KTN$edcq|U0G@HwvF-3_w4X-DREi!F`-A2!?$ z6Trm56?v~p(Y!}zlRk2b`BC|lKLrUaJ5y6L7tHbY?i)6ZZfc8|chm{o0DH|!?`4^l zzFeBtJ6gt=0!-0GRD*J>0GWaC&{WW}f5)`E?qs%E$N}yfi~K{C@{Qw?kt#C^Gk6VM zze!G=H+ydX)f;nOemH)3`g!|_`gT&x0WXnJY==?5##%s_0~01}P>!8kP$AZj2)MQC zp|qmJJAh@F+;N!h_*pjNUMse%Sm_?7wL=4K%qoK&dx-Jqk_W?Z^9u)z@NOPv=q#$( zCZGFL`zcjK>!$2kRy`0EntJ;Dhex5gpF<<4XQwMI_c42@Oc$egfQ4#3q@r!orB0cK zYrW=BpQ~*W#5`DQufuqsb@ml%FyQg|_C`38;O;|oFMAanN^}pAP3`#Dx}L7A2Zh)t`|ZGJ<9JGga~)>a*7Gd$!`tN#!|&mgT49A`uVlx(%&Ym8S4 z!J%P>?~)-mj|cY$%gj0)Ra1RK8Q@juJgkTaSHS9vKon(fQ_qVd|77{XD_FL%bqR z?uZiI`|+vTErwRwuvI2c9eSzve!aW%UFDh6E3P{PYdz>ye>#-zsm_r_5&79m{%-$r zx;<0GA9*L)@;9eo(CCo}3DQ#A9_n*aMn!U$2CdzohALdkP)kU3JdoDAR+P3-nk zi$oUkvNZ+nzhwoWt1^MalxM39iKT;D^R}aAYhTR$Ntm6dO}~6Q?D@cZAK>T=%PVx+ zF)TYTSLzdCRg}y7`e$`sJG1C>GH?bUkO4xLZ=i+YyR|n4hV2_|&?H`h*50aX-||BD zJdt66{=QYe29j}&2_79uj{-H5qirptPu&YQ&Umg<-FixRlk4I^lVux-|r|$#@&(>puT357t zb^^$e;ya{Os2%zZ%Ol2ip7urwdCSlIaV3NuL@au8k5Du7S5~O0Z=fe2rv+&};N!F& zpFd%6&LaZx$8``_(8N@TpOPThU2wSrVE$BaQ6*l3ymeb^KOFOpwOyje*IW3f7 zFlagEEBX%7z8&$u)knI6lWQaIVnwVVtz$R1>xae>mOmEAY{CMOD*3GLn17_J7{lHl zt2E}q{cTspbHlK?liImTYf_?!i+1{+4=;@|5BSUPY)t0q{duPko8|4j&mE;I1Jq#0 z{koY~E_sDEEpF#`iQK~qJO7Y(`V(ET8I65~Y+s6qsw21>iv!2Co{z-5w)w;IA6-x6 zHW{+`PwH=Dw>JMy;CJ(L4!T_R2$8mSjidYE1{3-9uDY|)^oH#OV)EgIpxIB4KX%Mq z_sSy01Q;oHz44bdpPXmfulBnf6Z6mdnCm8ux7526)v(!k!CaZ$bGBoKniM2=tRff+ z@WwbbCqQOtC$nS{I85=e2HC2sp0lsXKLNUh2301{&wyd~3AJWoCjH^_xLMnLh|=!T zAEi5u&xU{7Y%=3_#k{A_82Az9sUO8>jf#CwJ_8HXrU~-rYY1p$Wy!C<*(W?`Up6P` zR^55GHT2Q;>h#Fdiw5A)?{PCO&o1go3%-`oGWi}dEDb7ZDK4v?9n}!9U3-hR*fn^w zmKR<0#{vF={Z!viH%s%~;S9rXch;xW>H?sIxVSG_9L=*yH2(7TR!a zWa>QGr=mNY3ma>kH6nc-pK9Wio+kv0i^#5sPZ>O_C2@BuPsIbl3TtyRGvUrh{G6P| zsHV~{*xh$@Qw7!CjSYEd9#tEVq|GKhbckAa4)K+$)rAnl%6M?K5+sr##28aN17$C+ z>@}K;BHo{VhpC^*EGzLg9SiZ^|?-C`Dj$Ku4U5fkdW{e ziRaC4b%NDZ8<=2fJ?bo&s28)!lvn6{feekydP=HEXh?d+Sv`IgVkE8*6&-Tp%)Hs( zlu+?vS(7+lKhff!YgWou8fuD-?Q3;!>}!3p39al!Ki|ye%SSXN7O|1-bB!&Om{VKJ zl|HAFIXxeHcdq~Lc6@dP(~K_ga!ziY{bAVF(5+Fg=wC7j z5DmhG07YE;v=9<36m;Pft8TP(;L~LPUO^PQxX`X&VEe~V{~i8CBj?)Fz=~1_76HIh zP{N1W5Raw57o0bK8{rZh6EhjLJE?@1Ayg8~v*LMj08uD2G~sn*wwuGy7+~dKJ`ND{ zT|YVJt6E&-w%huq`Hy&7Lrp{*Myi?L<|Cu08mYz@X^bTM13@zdFEo`K+x|tgRB6}* zH#$$394OtWEvNa2gN`I*FS({EHCYwAp?$m2^c>R3tgeqvHkY~&IM0?_Mc=KZ<`8z< zE2T}(sz3O&RIQBSt*u;Xclp-$RZ*(rbtTHDFO6MO%|MD#6)?dSRZ&y++wkFvR}`;~ z@v4BPur$~x%J){$;a>%Tbazfti5=jQPAtPg52fsC1xj`|8##tN?rP0!jec$(g}L7G zcrESoiOPfQs3OQ_QUoj7P>}Etr~4p4<#4ZEkVLMr0-(t^K#l;ASx+S~O*+d&@k2x> zNfO!cBOr6ADXW;;ZBl%PG^YcW!&Sg+_u84BEYA^?Sjz=rR#=g|LUG z&y>VGzfa(nMQ*Z!iL<*j-)r-!p%d1>j&fb`5hB)GnuELAEoaBr73cZN)b~?n5$mRZv2lWw4?aLY(2^w1#9FS$$7s3`2`N zZW_qb$&Su$g+Kv)@#2$FKkC;DK0v)4y|jHrR|c7@I<{FLowtDW?gmN_a^xTYQp2Lb zdu)93+shiWvBEtm<{ys6Qe{-}U}41PmOe&rbf66XWMq3Pb9IPH#&~|K3hM84G*+{3bYWHIyuqoh@**w=k z35VT)QRW9}4g4?}O`k#HV0eV(FhFsJ1Fg-p<}HF^k8IxVrTCcfZ<8Tj8J&B5tBg8L z%}@qeLlt#k;H?UPJq3`h69D=>;Vn46w~G>nQjVM5Mk^_1x6a-yu8%wkHSGyo2dW5JcYX>j9^0c4G<{TZc<#HqLC?eAqKCDa4EX56KZ zhe>nNGEZstf{aZs3vI&ivXZ`!#0&Hwmy3sZE?8{yIKReX@k}sY}#>2>&C*`njCa>K};3?A2@8F!Pzmk|8P)seKN*u(nd01(xu*rPHHi%qS z-FQCpc7~tVRI=uJ{KjzVJejc5akEJ;l&LZ`v1N4}*vD9c|EfwOQOqdT8J zryc9Y@5@b32g7rY!_#iDNuw#>yz4VfdiJFA2=&b<6sGeXe1Dnhs!4z2CPj^lTXErDCd`#TOPm}x$sZ@(iBFsF-t z+80_ZIdT)Iekh;_A9gIciR84^X@ZhM#kSQN$4GYhdS{Vgr~@!We)w%=mAY@1PkFut z{-=}az|qPd+J!^Y^hBUpS3Dj(-hq+~r`)Boz(7s#{oO_g!D8h%Jl%0x#(~DRy0C4# z)00CQ18Qc62y4ots`%W?dU>yEd{OR`2OqzNIUG8^eqd`9BuM||$JA;qw?3CKuNI*% zzauyIaHB6iwZ^CcAPJfjbu+0|V#;dC7h09FNFY$_Dtxuu$TD9uM7bI+QTS33x6n|FRy_oQafEc8o~ zb^v+XV1uzPs<@GALVL4c`sB-tRHzLz%@Uc!kIsq*&FUp4xQL65nh^Gx#)+h{C;vkC zn_inXfs+g~MlA$qnD0(OPLL_f@mp`%x43mzc8RG)xFFh$@?j@Gm2I>hGzmChG~+G8S1JvYPwBxEj@y+q zNCgvQRGW|F937D*g-+fC0kkfgNXP>WKj>)C=!rIwA*2Kb16}rq5cx@`-wtq;i13_k z%&w2=P@%utiCU;|{ikwz0pOFJ0{zsne*zR6j7lA47vb~GloIMq#u=IQP+|9&o6SZ6 zlr?&kdQKOz#H3q@h|cR`lQLJH)e-(cmWdd38gr$L;e?R0_QyH&@oN}2)N2!7!dLo& z2g5cC%EkoGNynnj8OES%(vm~F)%J;RkHH^(JvXrrv^wm?sKa|Gjq{i+9^3h&X|~2c zSH}Hqzu>HL@)OcaQ0dPuLvL5N)65u_;E)vtp_s5A^4qNTCIrTH*b?$E&aS@JrX&k~ z5^=C710Q!SL8z`u1HClf6<9143MEu<7NeN191vC!rsRQJlz@|R!5r4&5Is)-APNxz z9fzVz7KG}^HcCi}8McgarAQnfBX1}x5$n!>rWyEcVNPe7V1Jr4PMcsdggrn!j*L#T zXZ>5Q%i@{Vs=|+qvV^!@g04FQG|k5@SGE}3s!CvnYzr@%Sa41dUqqI)|MOF^Pm<+u zO&i*U?Z?LKH#+&%sjrU_S}z*T{!vA_g2Ge{tWYy0BGUq%^+=L+j*Qt47^j2Ai`52^ zA`56nWEd{u*o?CIpk5lPKR(K%W0W!^xEy6Is1olS0y0nz=e}_x!fE4~zjX@MTem_` z;)5bchyjhM!W@o^|q{oV5Bt0c8 zw30#nIFyrmU)%J6+Uw%s;$S|+uED6MwoF0hWawte1*?MfHhaN){rUN~Zk8;YlS&PGx0)oHT6t{fj$uwiu3Sp|nBn2OMIS zQ}m60MI^Dh2&zsccVV2L7=^Clbn9S;QN~xbVLna)Mp-x1+zm47SB%q*ar#vHk%hH| zQ@g_xZXUpS1nTe7%@Gw8u8d$Laik(^>L{au$CHjigG9pQ`MyWZp~up{#O_~o5<$xK z-=FHRBo)0rn;y4?j(2@=)nxsb!gLj{TKm`L&p^u7z7_xUUk>W|CTd*dDaend6p4Z8&JZ$XyZyRTJ{)qjNrwhye%HEMfBYXbRgfJ>{jIw3inIL+Pk|}hf z`Jl`&&=inCc{uAcz^uk8sW{|;L;apbU)7u(B{Y*l&BDTq0ImT-t;L~8fC8yP>{alY zeMp0bdkuxsyNs`>&7G1A�`fDY?TSCtPOjJZ2gH&7wiGY*Mo&v5h;a;vy=&LYRfu zR$Y{_GLkG_p0~=7QH9+LS!2cdO72Qi(+)iGNny92?nuV+quk$!u`<(Go(b$_(vFzu z;!QA>M|tj-C>;kF*tbJXjMC{>b=k$hT!&A|TSgTD+~%Lo6X*W+zFZ^NfCVQaYwdkW z2{Y)?h_Zia^URAzlmX8mZC zGkx6Zj%ePIQBH%(>I2eXm&CcK;l!(W*VN_?JZYwg@%$fHtCU_=!t^9^`*uTDeWJ04 zv2tv~4rQ6pOX?sqxq&ZeS8+Rl`M(wBh!AKq0`+(29nQ{Wf8Bc$Flfj2#w6`KoP9LT zaMo%3o-4?vt4hOl45fzFu`xmfz~P#R3(D~axdy*1sR%kleWVPUk@F+Mzmg#%;_x^dI0S(#L&dfhgh(t*Gw8H^Ud*iUBUr#j?4YBWz~BpjkwqO>!v z>>zcJ{WsaH?jZICC|sBEYNTfGitwCeL?(JTBgypYXJa^t`4!={^OFBoLRCumC&sh^ zm8;lUE5;a;%Bb0vl&Ompp`|PoBNx?W&%X+xfGTYH8Gk%jqzmr?j=DQ!ZIm&o6Wk7b z^Vpl0UtOB~5r|uz+(?6a4#FHB^_T`9^793e{I&AKSE9v-#q`iIkv+Kt^IEznbR ze&f6C&0q9Kphsm;#eUW=-zn3Q;AnjDv#z%Fx|vl2{GCR6bnvDVZczJ?eCo^P>vmqG ze7kRGr!j9rXdX#M-ZE|i(P*{C6y?z(aT=mBr)vlmE9$nOb^BM4+wH=mj_=)!LCGjN zS$&C)0Cb#zB)NV7+Bljvc=T<2Qde+%3Ht2(LZxevGR_Y@vu!TuR?G6n!7MppIIwky3=Pvp@qXWO_pfynNHQfB}6rxs>TW`rXqcE=hl1U^WAq z;VSD9k;zIC_G8&4;=s`}|I8XzS)aKKPXHDrsAEec@u!k&Q)AzSr+D_QST>t=c@?z+ zXxju-u0Tkf`IWYHi&yFFZ_K>1_p;kQCU>}M$T_mIDEp#pn-}$aETtM?e8tZ80PE6K zMt-9vk{2fVqo(OHqf=_A8fVugW&8f9U4&_Wc}kXIlq-j*6dchGv%1~o#TXguAAcuMr7N`3jm51&wjrI>QYkD zo<6^TKD(rZf>2uT?H6kwK_O~rfqzNZ->J<1psoy7@2G9l{I|aPyYe z|6Zw|G5YN+a{yuYq0F)UFlYbyh}eW&q_yWl)ZaSR*#F*X{tb3(==QS;B=3Y#%1lXQOmC9litla z+j;p$=hdQP<~4?{3u4u9TbzqaP*=L6K3bZ1Eo%2?7s4=A#}BEna^p|j*+1+5qotzp zTMTaHnr|IbHz`IAQC@*V&$gEzv=0g~G(R`d_0~S1q-)o`!qR>AqWHg*F>U6^3}X?w z+~SMlqDn!SLIE0Y7_3B(JB=cvZj^oIzImS!Z86K*#a)_rwA1!>*+&CUxC{W)GKwAH zz(`7159zp_wrZ zLat%LLL0ocgM!3YfaRQc{wqlzkEVGn5Kks8O?d6y<$aHqy zGvaFh3K+aTtD`-Je|c)G-`?5^kV_EeV{k7o(k(N#%h&pEh8nSAOJZ>vNhZ&S&_Vl`|yyE z*by(y+z{dOe6TQAu-KbGv+IFn27nKWpCMIADUPe}V1+QfN+|2h{&O^#mRK7BaUHik z3e2@vdk~);t^rolkbXRR(!N5+e%sR6_&3Wf7Jg;mh2x|-r&_)l?LmwgDHKw!WZby@ z&Y1DXj;YBd3x9kjcJ^>uF(5^B#^8|cX}Z+Wid=KvsHM@rdmian$Y=W58kvXQ(U&ek|Zt%EZ=d*a009b z@i}VhSD+l7(2>2{aH4|*q!*>z*|25ghCId3G7PfDtf@hz!6k}NXeA_?OooqsQ-m%M zcZjM&0D@(|zNCagbK7Nmg1$uw#2=6RSv3Y8|4HSZQv?gGE(W~*Cos8ZRq&h!j$JFp zG`j)xE^K!r@78G<(Hu6p7u~%ff#$zsl(6RK*g2uhF)dxb=H6z^-D0X@-H#OQkp`?Q z-U43V@+M|u?0L1A9q!dNqjFQQK9rVizo2Hk|@z0L{2X7g8BAb z>5MPNDB1Qd!Qi^Pjhls=6cV5y3nq)7i^y&Y8QDg9)Ue2DOUff)=K@?t*Z0FM)apEs z>FP^CI^vhz_|70>aMzW!(08j8=EVARJD_72&Is${SjwMu3zfs0s;(%oSh0MRa<;8}x|g?00c?+Su#BsGYO zG>PJ0R-XGd?$VzPAIM+JapD6bpXB0EIJZMX-7Iu2jMfoto*pjq&{Z4Gpcj?Y(@)Hk zLg%ud7O7IY7BM{(m)NqmRBu#cp4$-;)St|K+*V=*k_q^>E{r-(k4@!Ou$N`eY_JfrJ^4EsZM@lPd+&G>R^K3(KWb5Y>%=$ZExq(`W!M8M^ODD zX?r!;;8#Q(i(W75JGNt+lK3?8JxzuO8PzH(BtpKgLkCdE&!mSm@C`Q$iyiryrShsJ zo8}|3T(RWRy~m1eABXTgfHKh924fr?CbH02kcgrLgu1jK8BhK+x%Jk5Ymg*Fo_@5e z;yGVjejWYsw6t+woOd_vx(7_K;LAu{M0Bw9F0>HpN$s+xKSE!8P3ikFeS2XX zn^|?diN5NDIEK68`7aX$27HlZ&HYtD%@bY89eoqE@GemA=X^Td*pEA#;JQ&Z%-B(3~P*4K1Jqb2O+kdh*^k1mf0{p22Q5A zBO2B<$r;g*Q1tRtl3{O{L=CcT8m4P`(`b=d_6LxyL`ezoz|IdwT|b>=3l>I9xzVpe z7G+CF;cy}64niOe;aw3pSR22UxAegu$_?I(AYHO)@gxdW7F0D4dL^w|A1qB8625Z0 z-x4(xZt`L0mSx7dra!rV*>|knGe696=3bK7QPY4EF*mwb>@56;Q8$oXgnUzzM;EXC zcOJ25R@b~iFxx=2#oY4PEmNT~JUl1)jTt9>qTzP2SQ4|;?W^KADOWye+gVVBlCckjt=ffM3 zMiVaC2EF*-t>eGk@ASQ|A`a(poMoK0t?>rDz}c151u`lRSrwdVoL3~A`fc+`fT^7p z8WNe>$MnCeg)}1f+BE&VnxRv&rquNL651{u!8Qg=Lg@oS@=o6J9V+`RDt=mXWj!CZm(&Qf=QHyT}vV>4EMoYwqUXtazmw^_8)}Pkt zC%%L)w~0O&A;&1mMZ=C9WGA87NovS`n)JAbh~vb8B2F<(w!=462>k8E5DzDnE2ue7 zLhKP?h?@{fvE5Bc__HKnepcR+i)&9vWy{p`vr=-ql$=9hy~3$JLfWqBjhkd(-5<DsL%qPoecgLo_dOugb}9ckXT+FVS8!cQtBGoryUnS zQYoS*dO>=0q@F_dPNAoeLCaCbI{h$DNnDpl{YtSZ(mH*w@oU}cbm}+XaS`7{G6O8R z4FnVsi>IlP3ULr78UF~*8x%Vzq2fVKsgzSXNb-TH)9gs&kK#xLZ$3&{gA!H&NYEgZ zpW-cKBZs%fH)RoZqrNBFcEfGCoo0JTDYB{FwVGqH6*z(c&@R9Z9rVxXSo$hfSO zmkkmtQK}x97tH}Z${0y%(y+#Ky>3ya10@))QMK%C=`!=1m=L-Bkh$6XFL7HWm)9nLBX+&jRRNgK|R{XX}2<-OXedX>E755V-Pcd>+;FrBo88uAy!7JOt|b@-We zfOg6frR^O09b{bf3+uHniMQ#v7s024IFlI5A38zGmQr@;DBny&3Y!FRL^N+)GPFT z{;kP~p8eR|tOmWq=>fo?&#rMHM9zavQSD; z2mGky*ep@p`q`UiE)mW2{KDyyvlod$Lc*|$ks;C# z6-bFX$|;5GY$16-GQ;ozgcQu!048u50X7#}|IRVd9HZMj>qN6;=hOYur?-?TsAV{H zELoo+1|0#ycqhrBnG>$saoLU=pGpCNI&=!^HXYE6Sj52NbXQ1{e%gOs#RL94^ajVPm80fhr3@VV05MQP*%V0eRf19QrL8z+mZaNT z0_{Mc1RcdzL9Cgk_$#e#QA&}Jn2k{Vg?fQ^_Esrmk3t(Df<1pp zlq%V4ae4!;t{tSEK^c0oo}Qp95mL$3lz{DvKYeN*ZSl?wJiFb9e{LMObS{u2CCb$L zbs=FZeAOF)0udrfLDUnl=Alq;^+uNRurY4R>bMNU#30F47y!jWB7&Y-aE-{4fMUeh z9|jkA5a4O>4-esz1VEe#{PBo5fHC}qotDQ^$Lcy~G^ch$drvtO|4#(ET@ZGc1_%7tiqMrF;Y znW|%`>#vjhB$iET^y_H(Epd)THal1f`zkJ4jnI}*hcJaG=>jw; zBL;Plep|X>Vl@5%_lT0fN3?--9YDvT+)?TP8z7@|eJ6l_QUODSk6VZS@gtC=D24U! z?Nm=UAMEC61B`*;pWed_r6UdZT%&>GUwgWfCCjbA|Dp+(FvjTzS37+g$5z;;R&2Ri zwgp^F`gI4ff0gaia%u|BUL%>IUX<#UqeR<6aAb}_vCecmTlwG)`C;4nhr{QEDQ0i3 znt4nUgNO6$RGE@H?Ta_O9t50Z#4lP*Nzlu|XZhC^!Zpf%{|fiKj{ zg;x0r(Q#mLe=m~bKXDmvc{fr7DtrvU_+^gdZ~;PnJxC;{$OgL8y?eTCpJ ze|a;db4<4%c+*f?@Z;XDZT-U)10z--0oFgCkrWcbr$}dB!7U0-Ot1?4E5kUc!1(8> z&gJ4&aO;46IzaC_;TyA+hlP0Uh}c1w<$pzB`|s z4AFMq$O|d*n9|u6!PH~INK8mQ3OqWD zyzt5;%vM9&2brx>dJ9b5P)3{uSkr6kKe|YzYafXnErBi#t$QQ z`dL2&Z5B%?CC$*#HpY1cRZp2tlM)1OmJ*cyNQ$k7Uw)s>f4z8#?zEq`B8Ml=7%BMO zY#OiyMzf~@GW;ZE?+dvQIBzeda1=mj{(-{-m*@k59Hp+wM!Rbnkf3H} zEAPS6KvNV5V-kA_<01-mkZivT4R1m|1aQi z{oiMuZ=Zd(Ihk}r^^o`c5$OD4)H*^+IfhXr3QD(-nhMX*qlEym12jd55{$x2#YzU* zmZ;4sVkKrrlr136Q)g@nB#5V@d?XNWBm>?>F;oPeeEBj!_q!iP??-4$IzEjg0KElo z4kzkp2v}$Hgfj>zT|uY)quIK9un^?#8nwe110AD~vMP@ru;)mt=L0ZCNWO+VU1@kU zMKqP{OyI#^m+=WH5`+J|$cYkz(vAvOln{vP1=7&i;gRP9C!j0uAErZNt{;>eAQWjA z;YII1)SS6}x6{IA<>o&x)c^TFvoj^i8r$`7=B2f}5X#ZBQUQ`o%1B1YcQv579+)A7 zwjd*?t2T{E0KcVB(W)@9l`bIw&8dxFgU3m{y5V{1ldYBK)g{QUC)i zrm2-m;wGF@DWT27-V7G>Ggxm$zv$>6{?Z!?CZ7Y(?+12lmK;)2PA*91Ouy}jA{^5( zFDwwc%`+wz8jmmfa$~D-8O~USkpv4E=3B}7i}5TmP90M9P~rvx?6k%96&&XQiIuN(INs0pO@)!>fI~}OpKy5?|ZdtTC zzhAIay&0s=u$e(-Zg!zG1OEo;gnZqA?ffORb)U`$g`P;;CG=7D1N!l$1+{LZLxU^B z=UELOOSqCP?_=4urD0cO8)*g{N&3Htn3PtGOjQd(dC?0bysEPI6+opTRfUs-DvUY_ z9SJo2xb}&eNB2O-NbyF~IfguFmmo=4WVf!op>u3r`7+0KiGpw6B%{5Yux`ZUAD`ig z{|<=51MOy$W6Sq?4C8!*YO^4Pi2PV$dz@BCeJgINjP9Flpt=(7*xGi5PO%nOI8ltM zI|_}()q^+d<~R8iRT=P=sNC#B?F5b0Abh1*bi6Lr(3LOH;aBN^PR3@Qmlav%&FIAN zqEGR7DK9VVb#>}zM5IWg59vT4miR$VQ74EMJi>|85<=uw6r`+-INkNc>P}2lU+PAb zK*nz}ISX^P>OdNQ*hNkiA`{&4%2j8GZ>nwEWU{K0`%FCIPmuRG^}Zpt4z;vdz| z3eDr_qsh>02j2;nfgn#)Y#4{|jn~=mv^19m`D5jZZY*$L<#3)pJOidS8>|B(VKH^< zq+@u?xy8F3k7ZDoI&C>o2~y0}E=wuqd4tVWVO4qZ`j{T-E7*vRVBS^$yM-pG=S}Pj z{5mWzW3!~XL|agR68&B)MnMBt1T`5mc*;w!Zr$#sMipppE@Z(k-46httNB}dM zS!%r?cdzCj>A2K;L~?EKdbexB6Pud9Pe02#CcYG86$W1lGEbFg4WfI;aCXLKJph%h z?}WV_O$6mELP? zFm?K%jDo;u3Ch$59bG+OwWKd0=kmY|VG620SNB)Kc0Kvt1tIjGE7gHN-C|zI^~@ZN z{J(7ofdYfan8Px1o)WBo5KAqS6x#&+;cNX32(EUQ$G9x^wQIx43vkdNUAf7mOheC7 z9M~(BEllWs_Ci5Y5Y)23HdhZ7TMRV$)H5wD_y#ld?=+U zw(qfT_NbCp>QhW}RfB;FK08i_|Hr~XD|n2p=Sn+9CHXB?$eHseMXROL)J0_?+6prw zLI=w#U;_iR2$y|>S&EpMofj;V{gmD$i*=+ zPkJJEx;L8-j-{TbS2^tXb{I%wqNsg^sA-)rFoxk*;@lPuMeO-Mj?Oe7sq}yQ=dgna zh=jNUqT-Hg?g1{Lq2W?lS%G_HrDlz-b3k0v!X?wPLbI~6qIGOPpq7<2+G=HmW@Y7+ zl{Gcf@O#exc`LrTS8aP+pAY{bH*ooRi?={6=s1ZJ zb)A3JseRh+D}CQ)6=|YuCP=~Isg`?GGK|x&_X}3x*6*oC4tKmUJ8d4waY6jLKkk-! zZ+h`{U(<&X%UzXdhk#K^hekw9EI|3^p%`b83SDer8Lt4wq=ffza3Y)#dGVx^%3KH| zWPk4YlE(6Y-EG@9Gz-zTojsPtI?#R?ByHc|5SS&m`WEpeJ9MWddTHkK&?~W*c4jU6 zEpU0~%TyR(28)>e!t&&x8Rj{`2WQ-s4_eyEm*3$42eYc&X#Mw+57Ynj-ie!^p2UN_ z*~jotm}rNNKUodU>dv9w37lK~l_Ub*PL z6Ak+kTD!=D&I>obvBWQN!_wm+j9(2hr!VYtiPRGlvbe+~R`ev-X7Q6ltG8m=Do22W z^U)}%LU*-SYaQB^Jc{xJO4%3%{(S2g!?RD+`Xx05cbOb`Ok|Gcy~~-o>&@XElP8w* z3UZ^e*92P9l=R(CAEe~yNfMoyc|w-wU(-%@(WXA|JO}?;@$lNEj$&NlH|2SYA8-z$ zk>zsCPh8&VFB|a1jaUFa{(~qa)k0Sk zD}RfYcRIQcVXaHDZT@H$Ev{GA=1dvo4g=iOam_u3w3 zEjTM>G3jv=ivDyD)|2oLc|mZL^~M4_40o!D@ng+`x!+#KFTRLEeRozan3`kNOrzgx z-cuS=G&5*G^zeU5=27f|J6A);EY6%)yLl#6Wu$Sf<7WQT0(r8=XJ;LLZG%*K3sMWT4M=fL$&ms2KlOR!15|4rH27oGpZ<$Wakf zFaW|r3se1Je4No(d*2qKjL=9c0x}7OEAIT9W;_pGwsh{hR}HB7nanfn&4CAIF6(jC zK}4w$w1SmxCm}@t#~~fD%Wk9ow{gWTV=x<S~18Ciz9H6q|HI(!L0i+6(o6n$ zdGNkEVBZFa-j7;w2Bq{Xec}&VAV@a^0V`D1Zo1;Z!aDPn1n3}^8$o;M`8v*iyVRD=`&}MFg%mK{rPk=BW$(B}7cbeht)(%``_^`n8W)3jKd zt6iw57q@*j4xAi40O@YA8J*fDCxH#yMH#?}>n@YXQN*>Xyp6~9rJM`va+$5g!EQo7JIsGoOSHXthKGtnU{Jch%t?Y zmJMSPjq1l=uX`9Vi&$l%?y{V`<3c{h_y3a-y@yM=M%CE8QsIOud=Tw(pgyM`?OK8g zYew7WsT6kV6v6dyzB#D?vgxQn8Lj_3U`dccfApGxfX#;$045K*c=p6)OfU+*-MSos zh9hT<`MM*P$bAQFw!HTWMimem)P0bY> zhWppD@P)JrQ8@Rto9DXZ=%#@7WqU}QZiTi?UQ{@N4il;&(2nb0X<^BW7YUk!lYr2q z<}GtNcGq>>0vK@D?W<=`3~M`ZY~XgcRp~L{GGPeC2mi12ar7*mx}cz9gATvNh~GMl_w%~H#aJQu3%{9vcUu6So8gKC28Pp#g@rd~ zPqx}>l;O3~&0~qK1dIgGdJd}-`Ke!u%a;AXE(#V3gVi+kHiTjpK2;i~r8j;?hxI>* z=q^Q?Ub&U(et+ytw%Qb}r{|YjGB7a<=lt3L(Gqwcl!I-B{!=R+MvYp}zp{UEMB`8W zenL0Ua~b-45rNQM^5{pUPvKYTl&ngn1s`OV4OmTd@mSb>z3ONkMCot&B~}VdjO15n zrH|cJD|Rw>hTwj2@_C|U-4d{6WjR84!e^p0pX^C)NPszZkw4HQBX>3t?)wMchi0Vt zKCF0g7GWO^t7C?_T;J;()0!AoKNq)$Z;6%+c=q~3=D|uoe(C~#>LS9z0tWVeA#Q~L zn@qSokaXFZfLX6c_xXd?0+7!x%j1_N1ACF{YPbN)2v@gX#;#~qTfjiDtYIa>Qe+Q;*Nh;)U-^3MDVwEo1s1!csa&;tLX5YhwGLrv$M`}Yw|-Ue z1U^ouv}Gx6`AS;1lG+Vg1*@#RN*8vkq%7>bu`A;rl1y9Fz#U9aAy&y?J@%QtQ7yc4 zzZeWmeM~J42w2}R?EbPNDb2UM3&_4xtT=v|u(>o0 zzz7G?g66APY|QCk@15UPc7zGwpCogww-?1 zZy%PK$Z}Jb8|vfSruK5A3S}8?uiE&mSdJG$k1n2jcKP`;+cc2EN6Shg5X`fMEPXU3 z9U0d0nI2mE%}~wqK=lO7$a!ofUy#vmQqItU+Eus>=2f9I;{HofTb?h77gpTI4nMXZ zdDYM=NI;8}=2~T5?{$?xBr*-tDZk{&hVe1a-`*QXetB+!qzN#at?gfr=fgiI)&D`uWz?2 z@O9Cl7qd_|!mvx&Xr8v+_bRse%Q8!8rnnNbsv8#vsGT)+vk7WpclioU`J(>PXkA&3 zvFqGA9S5+Cm6XpFh{W0l5n6N@Q0h9NdbdR7Jdw_aP+Q7XC(BT_gSlOQ!LM#9&6^du zkCbGcl6D#-%9RxgCE57u0CWDIBA@NgFiXz+(zUO1u7KwKDc0;#_|9uS0`nO(Ec-sd zc`j|511`H2aC*g_86`@V9JHRl!1@p;=5H5 zegK05hQ5abTeC(K%sV#%-=HmZP*(CEIiPKcdT~L*#q9}w`kreG)iLt2wZi9XKCUb< ztz7w&Shi%$*;G{+$~C({vut=v+i=)U>l@Y&y!~m>G#|xdpBB+d%?T#d%u00@8|%|R z_c5xxb)_yqYB(R<|5+s-MEmwfJIgY;B|!i8o%m+(Uwi#)ig-8C{YRaDL@g0w=MJi4{GnqRBb;yZ zNmGv$u?wt#JFhS8w375Nv`VY&h(RLOT*h56^O;IiZ0kvmB`bfYzjNK zS?q6w2tN!Gbkv$vwj|p0;AYS#VCAh^C1bGU{2|arJ{|ox$dqTI)}Z%B1sXVZAMdxt z%5WP7D-w46im zQeeZxMbY}){q8r;?7g`SaaEe^1|e&sDvYJ7Xxk6^DZg%vkQQJU5T5u;Q0h=6PpI-` zVVCKK4vA3*%7DS?q5g04tEt8JuKlE)EGDh^T)OqpXVm9zVEAO2g!zZMG> zAooFlKxYNy8Ii$uqPbms0nI`ct)zR$HVTt6)p46*8KjQG^uXLQQg|?pgTndd45Y^y zZtgs`^s1@k&OOWu#_{5-#Y-+Em)8)@9Qm>W5s0Ziz^@n2+s$20vf_8O3qPY`YL0rI z-Fi@z*|&|Au~-MC9!+fsS~c9i;r5LL+sJkAr$%3OT3p=p>pMvCED{ zZ|#gvEN<56F}nt@PlVGwMceK7&HAL{^scoC(0H;yf-DB-^w%G_ba)$lBaj7C*CUFGiOE8|v}o3rfMa`~VLthF(VO(j^g z_tsi@E$a9Ii~Pk_E+WHF6!ol_%1xCCG5BWob!*YW(VL!Ue~fJtP)?lL?$ephi7{E{ zuobEEWg+Wk%u7mmtuk}VDw{Dsphkiw1$1b+)xMVpQRbfEraat(KEU929iXuz3h-Fzv*Ll^+*;PD0x#mVdw8jd=dME_% z=7^T)iLRZ$zL#4ebf3(VWkovn`OgLnu!0?~wR)vXz)3!mSS47Yt9sDC&0zM_Ts&-K-)BH@l{Oj@O0M zp1V$t40#fEFJ%fwJB7;zH@`go_uxkQk+(-_281MKrM z^vT7ch7SjpY~kPQN01t}q4Hm-*6eq(?QfdlM0Zn-jyAjGO|QKwAX7z5ZXP1K?=84@ z)3vjvK1TURKuW#Wp^3WL9H_E*Hv4ciz3hw1u6{N{!)ov0bW8l>t#|X<&4RnrU;W9m z7a|-z@dHk!!<>Er-l>Qm@p8z@8LAUgk6&EA>5Us?u~Q@BxwoNRrLQc^_|IZabCqYE z_0B%N=#MXQ%Db0vEVF6*2!rJei$I~t>`%AaY&SOUiu|Qt`)ROZBH9)2O;E%g_`EAO_P{}xt=FsO@`f#E#Kv%g=i0a+ zv6xU~wj@AeWfhKf@6a7eLFVlrbjP@vJ{qqJ5JP(%zt2rlR}oG|(KZ6nZWj+>0$eg^ zyZNQwMapf9ve5Ks4*;`XR#hlGU@e6$1Ew{~#MfH7Z?_~0VFuR-5NV5sfc;XTstA`8 z25A_7he9x=rUz%i+7YF5Rua;U>lc;~Kf0uq`ynOTm=J*EE(1GcaMZkawuDYU?!oXg z?!15Ildhhg!E7umbjwq|$}plGwPmyc)@;Y3aa_s6w%L)GWXEtT>fj9DWyMC-wI+;> zYp8`hr7Pn@_jG8v1&6BAl+)X%!UX3J*iX_*8GhhpekF&#B0bYTKWS+3#5?Bs;HCb8 z71cS-W6Z;{2DaGbTiK@JSX2ttR^F%3vFC4LmkwJ}>HMm!ej$K_2?4`w4KdZ&K-WTO zx;zM<=KTAoLde%%M#vMO@rl{j2^f`>C@n)dAxThT-8Mf#K$o6z2#f5}sr-YezWnO* znhXQ|$u3c9nAVCb31p zY&lo&e$09J9OX>iXED2=4P&VhSzVM&gcn`5awjqn_){W({4_qID@xHr90wxEeiof$Ik(20*ou3n( zyv_@XwdiXd{eRki5}r9Q)RkM}`Q*Vm4)y#XijMQGPU_0Es$%zepVU6}9Ouw`onkDD zf%0UHnED%`#{n7o7pOU8)o_pfG@F=076CH^Nam}V(SuS#l=BIv`a_l81L9SuEBE-V7#h1;#T4-lSm*bT zQr_O{ar~mE))t(~$V_L24>nhw_?Nz`WJ3awOo;E~|qfE-2J z51Bh8RngO7Vy=S$OA-L)f&&O=?WPdd9srQ_){X_RXVfYhE>-A+NS7R>;OE5OT~Pr$ zpk{#)lRMr)@Ah~Za$aSPcQb!L z*sXG0z>6=F7rPY}AMy*sr@_pXW^brpJq=CVZGvIN_h$2VcHtV^Xb*==v8Shq zzh#b*nkO(`_2(7|Wtlb)B-iufa;n3|71Ski6!ko;ma|bMc^7ewF6>#xHUKLCHl(6}Sms}VV<(14)^48v-^oiTuF3&( zc48Yu`tAt9=0!%pc0xfo8`2km@hxQ=6$Ac1Qp= zNpqUIS!epGbd#0Z8wn$K`V{N7`*la18{I3P&^s20EV7v0uha0TrFauGTD91q(Tsz##heKd zdm17(3aNmYI}I?|s<=uBDp^1on0q)Fv>Fx?k^8wFKw|PzhXbj&~7wP_V8sYBqS`K zQ@%x#-Z!uOuiEPIhr_EZx3SpUSc}c3m2Se)GoQijd6ckqcm2A1OcBVu@5j;?flcRE zu_rg%=CsJS*FH1fj1y!qNlj>;TqufI`bqZUG$eLW8!RWPZIq2}19d!UsaG+Gn`k2r z8@v#aK&6tUL9r#`kYO=LBZ_x6c>a=x7{x?S$HQ@Z?~TBO;RDP9B@to5BI_Rom{YIJ z(+yVBkmF{Mf)MSDumi(TBo$K!m0p`crqp0n40=d7NYKlqiM<+y2y_u)Q2CeW9fUnW zH2^LPrgw`RJ-IfO~cAhltl)>P1u9 zt5Mcl3@-U%mw;^yvcYR)h#S91sh>G2-Vhw|CKlF6I)|+jx?%neP zNC+~*AWb+_a|;t4O&yW4*oK+u23}{BSGp=xhvIei1a20sSZbIFy_u=&VPO(DI#gJ? zc-5G}L5KEWh(q#wh@CxS*+}(NG+3kO4ZCm~OG9deSe=@jB{Un?bCXNiau^FdG*!`D zxP2+YnE9z`q`&S2o}*RnJX~=FPCB~8FB-jLu+Kfx_N(=2>O)57hx4nkCj&Y4Zy;oT~>4u9dQ5#4ANJ5-AN^3Z{o54ugv zw}e2>h?3Y*MdX8QviREsh){6N?2wI~!5Z$hX3;&mQi*o(H}mKh)55^cC=UNEi>+F< zT_40y#hn)8B~f0}fWOW_97cJ!!VYYd@R5O70D6poGZ+TZmmK`m8>0FUp+>|BS8xd6 zjC6xTBk=MGL_P~+5WpEh#S`jLd9cbk?YDS<^vi9G@V*>+`ywxfG``bJT~>asvcldO zd;~ph5eEpgi>C(D8!a_cddFa~v$iMP==$O(XfMyE4zqUUJ9j@B?p~K|I%?wPRT8Zb zn6Iziq2|?+YEijZ&+gXK2?ln!Xmw@H-bS&1w8~bg{5mOefR-{ShP?s-+| zh7gt;`3Gn*C0yGG+ZQW|{d(jtR#OLT%YC&lAk1 zIYexc^b$(Ur(R7!6c**YlMUc2ZU$iF?quH7qI1bh%gD0!xa>@koe1MY&%RD zf$;+{W%v~Vf?sY02sO}Icqyh=Nc=HM$b+EaSNLe?Y!_yZ9fbTtr-g_L@5AK_Pci5E zvGf>dw~3QN!mKD?)t|VUTauHuAgy8PJj-QmZ^3($TtAiNG=ZA^{w}VHn<@^5(ev(& zbzbziw$FW`LuX#G@!od1kLhj1kltz7P#6RLrAD!f8PgM@Rj>84tH_*Ilz#wPVtvD@ zlRUsCeIuwijVhkW2!VOfw+J&2`~1ZI(;}i%-#LC6I(y#D zjBO9C-Cehim3iGA!z?CkwglU;4!T_4ROMpWRTJ#v$VWM=OTz}DB2%5^6ZY504)~(D z+yJlcC-A+d?1fLET>|~T@AW6%%tzYzUUn)@H-FeD;ypOIx>?2UJjsKUhMUJb0yqzM z15~Nl%_s!O(WHJ5@Nh)4quoM;WPvnZ4mr9fkd60>yLvY%Z~c)7^JfXK*G}=*OquwO zwK=P&h8m|lP697cmAD@Y3=1ZVujH`b(I!4SNrAtnl@I3Kru@77dUn|&&UTl?6bldk zB-}Hz$%keGU2|ficxfv_ziAYo^PPoJ;R8{F^w?RRkL>$J^SJ}vWw!WsUt*+}jgyxE zb--Mpw~H3vZdL6?0_}?iHXe?|Dd(ys<}|-ack^m(w!|+E397bW2QXW zT<}s=Ky8HyEZ`+c;I&5J*H$adg$eh+5FGFLhG?`~GyK=4nXNlV9hfW*LOZ3sKbZ41 z#a-pB4UBkz4!+Ngm}e30dVEQkm(48iEw0kgidd?t`QXA^+a1>eg#c;~9GhPtWE3o2 zwhPk~p|Y2~;>k+A3Pj&lLCi*#e}0cwfZo1U#b)HN1xg~4Bv;YC86NTVO1qmN#0?AmhDFB(VqRq_ zZ&<(b6hz*<;fYp=(|XQ3BfNO*Uw-WSR-Ubsr|tHvGk4#VaT(}<{tcU+J>R=&%{`N= z_U&)*4gdZAtM#8?yFuLXW+7dobkU-&Pf=W(!F*5fYDkpZgwQfIvvHovL2iZX7dniS z*~R*uS3bKT5j9WcQX{@)YbeUo19lKlvViz@Bz`meI_Jc8M@%wQ1U@z+o8X_{}0=azc=0wS$v;6`uVr;ir6iGUkKZON>v`4 zTN(HJGhuFl(h+WIvwgsyOmr<)Mb)YpX%Kb_if$2G{cS%69? z`iE|Z3f%nA8i6a1sQ7Y_SqTF_w(eV=?%Suh8}`=IcYo*ejek}=MPHu&@-|2oG5B?x zAASLJ`hak)Vd-bbFsQMX%n*BF#8{zF%)-RVujJ$aW|*fQA{4&=6Yd8>7=tmB?=T%O z-(>55@ywR_0R!}r{#ro2H)w-)*mtLAvTe{RmS6GLLG%5U)zwx0vrjr)-TdqOy6GPl zS^f4w`86Y_UX{nyJI5W(dh*3tY8$fXg{0~pZbz8L<(p8P)8xJ6!@ainWZXk7&#sZM z;*n<~FIm;xvN2ksVM|opY_HUQnOn;0VghMhrL;wqO$Yr>WfO^XKTL31lE_d&vYO{DUq&4b;aGjGt{rJb@-(QA@#R#&A(&)22bb}snwP^(bY&N)xo zQUHJ|%&pU(=R<-~&@4yg-ON49N%phcaQ$7sF2g`#+}?G#U>_=`dWJ`_Aj9;irsYh| zU}xZ-N1lJ`C9+{p>HVp;-l`_Ug#$CG%Vur0i+V(jJ3afBt#9^;=|8V^G)mfu2W`JZ z-Q;;sNHAu+TiU)|>wYbN+r(?{$IS4T7942&DBWG(?OOlql#2zgLl9-<*3o(PQu1z#Pel9TVL2{L zGSs6s3+gs1QJ76YMR>7>Q|%0FKuO5emC@Vnn6Q_Bp@psaslLX5(>M8r2NuNlkhjTi znsK`k)M!?x9Ah28Qm89#M=MlhZh^FvbyZ%bu^4VNM_uOSL8@VXrxxJwmb=jHz5AW0 zhzjR*#XIdBh3Yk-Hu3Gdtb$t<%OHUgYoD_(mX>0-M0NQK^B$wU%ANSOeS_qAWn}7y zGc)x6UEUh_$nn>){)Glw)zg}Glh@YURf zkg=5EE(GtkZirQujR$GK`keL>71FTsS+P~_P^qg_HrtEBlV=GkGjw_hE` zUT8-1?C)`se6CzBQqd}c)f`k3A=i}NV@BgcAQwi)a*Qae<`<}a8cmDNNLj_Yw~TPU z!@zn|wdmHs&8(Tq!VsVJr!zA!v4T>PhK+NEpOfv%ZEBX}Amv3pm~T{D4=56$r~wCW*Sw zI(;M|p@aJtx)JWzZt*{m#>3kgALkdr3uC6B2ZUt;oX?LGj#v#lsf`PJA7vr^Bv2C%j-MXs)7v?^+i+O76C9#VT7Xe<=L0#j@8#Yc%y~U|r%S;}&+O~cynuwBVk=*|CP_i@NXcevmDEWg zbyF~4#Y?I>8m%&;$b}>u_;SDW`u60D1B6JUijM5-mo@8iR~XSg&*2?rMkP<|TfSrx zWrxUp7}7TLSYGC$mg1+s0v_TL&$Ur%__4uvM;(r}-2451_jey(`b~%vYj!ToKIncb zUPMaNsiXw8gQr~OnTN96TM*;=;Qh9wtPC2$Y6@g6_mWH~@tWX1o zL;z|!8=cs%Bz-HW4nc%RQkH>MteX*T6yg?%2jB^}zB2k`(1MZt9Hz}Fzp5(J9H-_v zJ&Z%>fvs-2HYG4Vs{o%HoQa{al;{D5cTioIY-AI8FMs}F5&^sr#x@jPlVQAQZG;#B z%C;4-F=u$u>u5cm>lL*tG-7<6wwzsG7}s{!Cne@Jqw~-2<^g;wjx6ynhc=lO!d(7GD@#Muw+S~-skwxwz=d;nw^h5h+5Xcs11*K?dV!COn)#;`Y5w)*APbesDv*Qb?pFTVWmMK!$GOP7%N^kwo6BhHziCntuYYy}FcD!6xWV0Y8p znheH%B&$wTm_hvhlPYr6cVW-nkhde(*=EJT#o0 zkxkyD>!EqRV%=9%$#*UMvfO*^#oHq;{^cCMOD#2^`S`~dKMooH$c0hA}wyrAfBK*V-aCDwCcsL%#VF zb3c9BI%n{DL@NRb$rJNd*KJGDC}?d{|FG*1;rSAS^*|-Y3B{?*MRBRu955UlNQn5+ zM%xA7vd^zHi=Ns*YnKl?Y#hVR;sxM;tD3o5km>3>WLq1xer8Vd=D9AvS^PGCpPBf- z<1M#!M=&wEGXHoPfMRF`)pd7TWx|}`5GaGrGB{>HD{LDtJ_yo^%<}~*?x3EzSE01O zet(g@Cg!qQCva22xCF7NVU^5;{%=@@ujV<~y(+lDKYo5~`|mFuMydZZEhSyImHdZW zl6o;A4_Np2o|hB*mH*^s0N4LH{1T3yD)r(iOI`VYF%lKD5-*0qn_53g+b1tLbkM`0 zjXH9dGOeZl!=_GYsgp3}A1!6d*n%2mcC;~Z9E)g?RX#}5!Daf~5gn!$ z^_lx+5*oNxik(B*quxDr}Z~^KgQ{3+&A`q6>@{C=h@_2W9NoByj<4 zJ`Om1^bar|%SS!T0i5%onj-Cco*bLnnO#$gT?BdB#1wR85SM=Zxo^jiu#B!ddBivx z@%@IwN5#Y!lk#5gn&00m93)3*wI2_TQ~%31Of-3$MoyZ>eCY2^ewUeN#LDrLayn@h z@lCwVYLG_JGsL5eFLK&$1#N4!O}(f!NJwk~q>q*p_QALyE#@*7od)B+z!b_TGg0Iv z`Hb+p@X2J%wAPHA3CtJ9@6PjilrV>R^m%NLPg7RgMxFHXctCG74$j+r9sg1Q_k<4W}f-cWHExTmTWy189;Q#+ZQH=EDsvGyefoJwfUpYZ?iss6P)=KkFU7-=NL%d+ShV z+V}C@#Gky{6aP7Up80N#bM06E`CriTqL4Un)YEd;>8?`+K<60$tH)dxIB1Hz5joa?2tNtmtAW^I5|wSHlhRj za{ef-T^kwI=9-|KEM`*^^^Bu__G%NCUj#C)!=zXRGY6beBw}>wZA0(Vf58mJ8yZ)! zBCZ#(1Fr9GwWj%6o=5r>)pM4;RPsMPx_)ufu zK)$axjQsM8o18!%8PHP&1`qsqHDsm9091@G9&c5_3;|KGgnQ!MxXE`n7 zxZs-ZVK^8H^lGrbx+@i82E@ z7DWoPk3zbMH)_FZbI(qo$15lSLZHBC@dlul1BZ4?=AFyBnRwggOl4I7{DSM^T_(u& z(e5ake}1wfCB{EU3&KN)_{f@fHbV3^n9bV#5Yb4uG#+}$b+@yxXVnc zMaZVL^wCdClWOTZzB*^h{qHs^8l~u(94ahF)@F7BvJB!_atFxuFv=5|9DU>2FSL?z zEN*GZ0ScNQ$Kozf`6q#09oo7`%tD_fSuG}BKN&v`qy@Uzb{&H^bXmhHT2TO=&af(ajy6c!do zDwsjwB&`wC1pQ^~0FSK+gERr8w#U`-y_QUcd!-0k{>w->B z*lmxYjvbOKGt&Qs>FuL-1$Wmh`$@equ!d(%^TOX#k@=FM5VLA~Y^D zjQC0+^*V&s$Z4d9R(40oo0l`%<&j~Z5t%6C0c!HHH4P{5qKm8$P~x-}Pf#LCUEPW{ zTK6jY7j|Wu@naAh-_0&+&%-x(-ZU+j$2v*x`FS1O^_<|68+Bm+>6-0H*V`mUGw-%( z1P&St5h)z2r-l>jL^dQ5RRtwZN>KTb#nH)npC<>abvU~Xe#=i=Zw1yC{bM=6WkilL zZXN3uwU{r4O^2QDbyjn%zYOmDiKopeyZA51@xh(j%}ZXVr$dQ&5r`SPo~)VWYXVz%MJ)~ zGQTwsY~OYm^W8|+L&%9Bx6q-!{X^m_Km4ggLR$LT&dLHx`A#9%)n2^wO%b^{@uh#lodrVa z0^stcJtjD8*M=77`)i~N{4(d)D{8FqZVS*wTC)-{u^r01-)1ixUEL)lnNC2=CNAk8 zh{08oNxFNNLQeQBv6OKbo^l|IO%p%Iym*X}uunRQsJfrD*hp%noNzrm82gt#* zWRR_uZI<6OPe!khhpt$RuYsNgt`5C-p0Vk&%{`H=cq^aUY(96PxgVk**QPKy*50RI zxFy{AWcKCttG8QMzZ|-BWj3_w+Q0sUtH#^Uj(j`!{ny^nxRRo8228iGd?VZJE_=0X zl!O?*hTScsQA0k(pqxCW*~dIuJ9MSq3!j zauQFdxU^|I*%O}Gav&;J6B#={E6?&~0ylkXrRZkEWaE*VRZojCKR+L^jp?ggONswF zP)d#TapvEbn%z=c&hnm4RnB$N3RGs^y&6t(bZtRTa)KLn9@$a0t_|nd-89dtOe$7m zC3$`%*UWXQTRvx->w0ma0tU%COYPUcM96Hpp{de}ZxbW(3-Fu-Ph}}(4!OV;K6|SaBbVBj|91Yr0CTv3!L#K2Y2W5a0(ah{@ zVYU6@ba5=FW5B9*%L87|Y{%6Yk1pG5C{`^rHF)5j(p&$)C=K{*d0d(_PiIhjY!x8~AV0y1r0*yO2w5;nqWv$1`+FYLI?(f_1<<`E1 z3v5~r2byM-;^=9ALm1k4jS6X2$dIy0zW_$xxrv;ro26^?mUTb${cbf6TqTZv{3)bn z(CJEJY>iI7>9lD=tENLEepUYW`wKwE!i%SrPj~zWPxzv+ewgqGO}x4(kbH1gFNl)zz%c1y6C|(~2rf&vcEN zu|MlemCNp|jB;zcsfk!OaW#jOo&kjLmX6S)rCB@8B zTWar*%m!prg5@na?$0*376gaJi6kt2WMIQ^zdrRk2pO!+N8no z$U4Zid5joJ!7P_1n|&Qc_qb9hY@v)Xgl(7JfJY>?2~q8eRnxA z2>2uXEGJyX?tjiF)YgpT7L?wW_!&6tUbmIb_;}4Ln}Flkznt73KaERu?`ShC^jN#! zl62uHYUf~^B+yZIhaiAULj@?RXPVeSTv$DKY&HQ4X-KZ2eBun$$BQ z8iTnnL&4t2E_bNWBQ4Nq|JbSX-@C!^h!rBoVuE?$#MGWY33w{DczYJoeB72o8-HZo zuxFizyG=jywWQ+$IlDiHdyzKreIeI97IHrnRb%f^SHbKaWjq+{bE7M-r;Lm-Hn96L zq!!h(XpXAbq9=XQj~DJTmjOXf+3=K_9{`d!$fMmxw53r&uPj2FYw~XOp8kGkc~0xl zz}^6&?Qk(Y=s@qSJ4I*C?R`CWre!FMMrL`*WFfAG7ENJllt*4G0MFrrF*w}`^O1U+ z)kc6JEgWzqh^dfd9s?*O%rfdJi7VSoBi>qcZ12>bF~SYX0Sijr^uv?m)1x8O-tOzq z)qBmp5VIR6RMajQYk68zHt7~~DEOc_ej)d@wrJ+QXV?Hy2TLaTZiW$O!;h z{98pYmPanEb{$tFtEQrCC56>8az65Ci1|#y?GkClR#n1ig!NnL>e)ujS*)Uz%iPx= z`H8~GU*~E!wG0ndW&4-b<|I0Gnn{WdhkkgM8~o&Oo%zue8!h0mS6@Sr=U&|_7LpLH zF9vo8%?i+gO~!IhUoQR1#R7hl<{+`li1N~*om7J`y5a^xa#1^QMc1>&m65AE%FAbl z-!eY^PHn;8KSQ2{4VG%Pi8 z*31A&v8-^YOwG{D)XYB)m01~+Lu!d-W@Uz@W;1GojR(vUOACj}#)D;L!|7OA$A;s1 z_xpv5Z(P8$pS_=Tuk~A-mNB<>5O&^ekI6Y5-QvVz9yiSVihx0>)4VeNl$m*p8o3ga zQ&H|GB;P)h%gLwXo@}~&cjNDFH-oxt5off}mdiJf)X71K23G|jSk`+Vx{zOP9eD1& z`4OA(A$#+B-wT$U)qCH5EXr-Y*jAaHuT(tR1|XFz0IyX-$O%`FB;rF9m}@kb>s)Qn zq2eYM+b4O$QNwPJ*Cmk?`9btYc{i)wBafu2oET#x+JM_Ero?fHJ=^+wAS?{CT4;|Y z;pH7Cpq1^RzXVI$$mhe4A2;UAUGVN_-ue(VB3SK2b&X@P+5GZgCagc+jaxP692f5Yv~Hi+Bo%DCHe53d^IBvzp{=bW{Tyeoc=FYeix@Jf9@C$ z6*Ydo_~yi!298TYtOd1ZuW%WQgQg+oi!r;Bp;#BhLM?K=9(2{D*=n{g)x}#NQ4$#{ zg^Y4YheQ&;2n_-bYeAPVg-mRy6w@y?dtf5S;p46eQ<{%gkg>r!L?{m;8#2T`h#vw>%ku3!msR_-m^BZN0uGLr!GfAn)jZWW7ozh5eHKaGD(D-#J zJgQK&EbHBHS<*VVAp{=0A-|XI5Xqs*G3c>Zkiu!r{o9uRvN^6!91ar!pD`OXh$VTT z9aA2mwYHXkHr$FJ?IN2w@MOwHK}9VRNB=R2T*^dmmEww-_Cv1<@&*tz@}fn?V2K1` zYrs%44Bk@l>RgZWXPmG}dQ_)6+ZNN#ity4E{>Y8?ev-2C-+ZfB zcDaQlsKuFM!GnU;n4aduGwJB|o45l^)R|7env1AEZZS)PaFKylaE?*q+R^Eml?yoX zl5CBr7O7yX1ag&Ogt`SPCaxO5wR9C~{TbPit=CY_yp6lY_QrfuT7ADs4|2}^k!QQ_ z^U|?R2)_x8dIDQJfemAVc%$3+l$+Oz61=1&EZoKYh%oFw z(oC0L?^fHYSy-v0KULLYcx@0tV7FS+}8M zqpO~m<&3P=m}^1`R>mr7y$UJwFRfS?s7y+Wc48U04$k)M)|>uRDkILfdy)&;8;!8IaPB z+$zp4C1cq#kS2z*m6&3k$o+9gYCDjVu|L2pBE)w8Gi$^^YtoLC*b{8fLJWL-O<2GK zAx%j5(EL@M_O)sPb~s}n4-tL=vNU3Jiq&B{w51VB%HN*ViOf=iZh-EO^PsI7b3|)F zm$+@b+C<+%+I07z9bG>uIs1{BWp&z}N$*r#rEHaAHcLA5HJyofnk#=b@0oRZ4gPX+ z=H>mf>H~G>{CM?YOpJ<*Zu+2oC&RCUOS`2gFN$K5aZ#xTxkY9%U66-GkwaV$bw^bI z5q6-z5xJL#EoEYZx&B2)6q}i!rv(F;$j(ts-hmG7rFFw|Po@*L)><@v9@}5!#jrl; zy?pet5CGqpt>ezzh82VJb+q^Vi+yUurMGU_+Ig`TfUnI!@o949Z?}tjgNF|E3BuZ@ zx(&g|y|=p8;YFt(_rl1|IGZ~bktv09^^>sYAJ>VB?#!Rivp%A7Cs0XGIu~eAyHBBJ z)Ze{AsPEc>TzdzxfQQ)&V9$Burxf@!DQe+M{CO_k*>J|n8kqYitWw)L=V=200F1Y; zzOFz(NE~<)43uEZHHgy!Ot$y#JnK~<39k8XrVG(Q`!i18#^bHdeZDPo13hK}WWcUi z+;cq+vqgg0D4YK|5G-PZP!Jv1N*wyCpHe5MVxrAsjF6P=qZ-k zflButvM%Wn4`T)3vQ_?lYTv`tM-w+_!SPBec|yXp|i6C9A>9$*!Y|->M1MR}I>38ibA(y?j4-EYkeYPfWmu&LrJXd{9@s zuJdB$@P&`auDq@!%zNgt`2DNc}8e5mPe^^-qFryWzK1m+9NV_{d67eFq!h`RwuiQ{(7|V*}X5` z^AGZOUHyFf0C=J3C|I9|Aj4hMX?$u80-6A(TmUoe99y0Q4}iBAbw9_C-t4iZ9tNxy z4_Mjjw3`lQlY6IYQx5h^C_z+pSoYY9^J8Hzo-RomWT4Alz9+@rII-2Jzx(`b`7e}n zWyP@{aQZyYNKQEB~a7%gMI$ zKB2b-`1bNhkA^Y-EgLHw$s2r;SGdpO#re=)8EJ0KikC9d^XvepwAcBvp|ga$^R|Dg zoOQQ7Yx-`=r`Kyk?yh$D`>6*E(M4uRkQ-n;t;Eb5ekx$j&U2XP?66)WEq!tYuI$L3}1&RV&ZZ&0G8LTD2n(ykTKYYxV;R&=|LtT=zhuK>sNd3r5+8*cg?ksE}XSv z>-MgZwgGS9vSOG>w@(bHOz5UGe6tL5@We|o6AQ~S4#W07m+Zq`@}8fXoc-vPXg*j8 z5UWok&~&_qc2RqgYH?DZHoAi=O8rNWoq^}FquM^2#yyveuKRyk4yh+Qx zyY61+)4!UNGLeOUBMX_hejd`<7;NGBHw)Mq~iF$-&boe{nKKc z+PE951-wldY6sF>hbl5+mdZdeGwL{Zl6vfbUC8|bt5YvSz1Kh^DA_ym4etbR!$#L_ zMSs}_ex*N5wnYa(0x8CO0ubwDDRAr7nyL-0Up*Q?n$-4Fr@il4d$0Xa;NE}#_~cFP z_q|TR9U*7hnm*o>iLR+ERFA&hRuew_kMQaKG(DM6==`ygL~^MzFQS8QRo>taV+ z`4?gNSn0|WA?8qI9oKmeJFMm7#}kvHrRu{iOTOiN?cPNqS~`ddOr~l5P;1Qbh3cPx zAQqF*93&N+X2rJ0wh1=!tTT(xh@)bqTu`90Cll1Z5Jh@YA z3zABaE)j8A#Sm^^3_HCtMe*vcn=~!m@(%Tw+QYAN&V$H7=epyf3etm|e6O(H-7cYN z@9I9Sxl~0*I3#wqOMA;phhD8SZ(h0 zo+7Q{u}5aVU(vGyGn<}&vVX>%Y-D;?>{p;&_IB3>!3C6Z=FAi9s%VE3oIQbl+MC4a zqc|7Jt&w#6%#O2#GiPZ#%kKbMe3|LgnO21c>S*tBJKH~9zc5E}9@Y;P=Jl;S8FSB^ zV#cwBnQP=YYr{>deXVH@gU=T6M26r~?sBT#vB5cR_CxN)1TQJ;$&jrZYd6@g|ui)adifF#Ftb ztdq*5#|Ap&gfTYwHK)NZU1`Ubo@jF&jyYO@D`z&SM|Z|J4J4o2%Ra!9WAe342U3^3 z8>H4Lw340TJ-td4!3Wzn^LR`AcR8x{Y|@YH-|Q(OmYS%22LsH zA)OLs+LB%bKKWQ%sDUsd>E88UWJJsEX;zdj(qK6HtnyEH=b?{nxMw0)zdf5?x5HS2 zfn!x6)B^=63uu=E`n}!e?4|5}R6peZG#lYbUlaLaP1T#<$owGX**t92fpxV%)|Og# zIlWon;+rxUaruKYxq1)XcQ|!nW(z$-6!Z<^Y?`@%!eK|a?wRqCyWsjlLV*OZcKZB{ zYU#v6T?o!6kJY$%(s%$;RbyJY?$zygk2{YmfYTH+)50g`&X?m)5t}J?CwnfIw<&sj z%0LADJ-LbQ?`7hZ4qxn($Wi$cKE~b;t3cg$@-FB=F}_-RxgV*iT$HP!r6z~PNtN^j zWE;Z#*QW40AtCHfN{b4s%bn8&KGlJi2ZDB3E?HW8X^1{xuhQHoWEC<~FizTk0tZU6 zOJbv(cB?0`EG9*Ow%1It5B?LxsI{2O9Oanzzg^oz2~H(l2mPuc%HGb}hL>9j>}jDuIrqeCJ5 zQnT5XT}_*~(;|U&E+(AT^bNhUm51Jw_A*dl(Yo0n3eqbV{;rBJqM7!Iy1fWv{Tqr2 zNC*UKEU-KP&JbP>mcc{y6;GZ?@qYxfD1T+#cJ=~f92X|0N{gV5kdgk{WYqm=06vWa zXsoOTZ(NGXf{$lnO90>6f~e0pg*o52yh#Z7G-fPrqp`3{c3R;YT$LjB8uEp}c(yec z(1`^0V`9wQR80UT{cQIAFQ#`??QGYVs@3%teGvKkPEIF?x0UEEv#am2elxLStfh>5 zbtOJ02X_5WwZ?IZ8*L)qFEFe}y3~8y&q%FO`;X{}*<|RzU`>nnKDWq?nTF}m?={=r z%=+_3-wI^lZ#jl;;*Y^tL%zENmtBUWcB1c`isS)!GH>~+ccmUe6POaEBRcSj|6DfT z@6uS}z302mg>4B|Uj}R@B$)YHWcTfAZ1CE^k*8Rm%SbnM*fpBZSY6?U2t0xs#X45BE$aj7_-P? zN^Hed*dmi2?Z*+KE1e_xL)H(CX0i|TjHr3JtckgRy(GCN4R{G4bbKZgmc!K&BrS|q zpQ&WV8FvPbEs9u}g16@Bsh7yz_*9dU;d5o65-rGcu}8jVG+x=Ctz2j%)$<3t9 zIcqlEOp1Hfd$CvBwaQxuxNGH2cm)#O$&2+dm3Cz@SD8s{pEk{V!W~H=ar;hzTrp(E zYn)Nr8j-Xnab!K?p@GB$CEF|Gw89RGpbYe@u-i{3m;guhX|wcvkicLc&YT~fJ7O)Jr+ z1SI^xN>A4zld;OTHDojoMuz2e!u4QcOoVV`)lwKYldn;^Rwm(A!6kQ2Ish1vkP@(= z%mUukK%4-9(nzkKi1^#-*Hljt%6<7MH!MRu1QIwrRLUSChzgCaVnM7zv0}E$j?!e3 z;nXnAfc<&dBC?VOGkLx9?I^_nBJ-y%2HJK@_({<=X1SehY3nO`&bge_J^T#a%)2H3`w}S`DVXoluGlO3nyCf(gu8*~bpqMq4>u zjevoAo;qb(XR&Y`av>vN`l5h~Oz%VywJv;Sag18!F)oM6nmkT%LYXRbToGYZVgP`c zj@~FONf;>^`?ALm3u(tsJ@a5&u>_%iK>DM{5`ChHX9%9H<4Wi1mOAnMJ#6kQKD-IUrDWHW( ztr;f>R1@XUFI7Q2XwM0q&y+KOD=E|Nw~<+L#|e4UFA()x9=7O8)7C5Kd>$@~hpRvP z{wLz}Qxc2WWKPhNPXojF%|3Ljf#Wh^TCTfLwj-H~Zp!|^8GNE9je##QT4?xaD z?hWOtB3cCzMugLpGTfx_(4rE!DtEEM9YAsqsd7iqUCu~e8FHPr*hLyZVX87Z;c9l( ziY}!YP!Z0=L~~Tp0Fpkgk>5E<^FG}b)b8lro_gg{$YkgrcMeH|LO?jpXH*C{DpD(O z^M!}RfkOov#aC7TQa+w(a@mn$2 zV!qUB+F^?VV>%0O(j` zg0zd5uD~2BhjelQSvkj`bdcDBMmw@nHm^S7kDv39n%=IiUTNWC-ekmSEHDwHnB95r zM2ceGgrbHHo5CC{d8!1MAvg}A40Fy175o6DuoX4ObSPnT+DesUo?rySH0Ute+5VLZ zVjN(eLNJIp(pP}$fZg3W#lbR;BXFf*Rv7)8-1ZD8LhuksMf&;3o^5^aGI>m~(s2S2 zOuJCaP}m#5n5*!0B#)b=@2Ay2*w*u)dvGIL21x*F9Oz7gVCbj|6A@)BPOK~TZiau3 z#ZpOW1PAoQBArHLuAQPeV#qC66+NO{q(f9YBj?2_TsbNi*~O-EB$symozU9Oh{)8T zGq^~YXl9NFP%_|BL`4@x_{U6lv?juN>*B4p%}%z>=S3B?I&k*Y;&`lbO|=|j5(v5Q zf>N3V;9#5FY?@Lqu0YjU5~sc7^O#CuK(QxBhF>oqn6R@mf%Hz-#3I<9=Q z9ziIO5jh|s8sf%5_FRCB0NTot`%Ma06BIJ=C>X0cPCz!8sd(c(!troHtLky@$;_9h zkOfQ8ym;qDvaZZ0A$<{rdjWvvLTyFN2at0#*SM0dU@0_j1PVr}<{8ZRl0yirjG~iI zgR-4{IL*Xdq-R{XeHk(g@F$A>6W22MmYDQ(#Qa(IezLr}eXAZPWZ%ssND8{(UmJvre1;Oq5EQC7>k-k&1JLNGM2xeqMc1G8<2kmwEAF+osjNm+c<#U%F}0dGaf&dn)a7YL6RIeW{dxlLr1HRq`7o;|2^ z`6fn3E8f#<5TJ5zQ(8beQMY#6`J-$495(dbz0h}TeVFq%<-P&P<=Z2>bRVN#Tw4Wy z<~;pzgYD^n#s3!cJEyI3_uU^1s73-%t3;*w6R7ngD7P0VsTS?N2|Z_A9;^qn(K3)s z*q~^+6y0K_zWcNgZw;f^;W|1j0BQkOw2D8XNVk+B%gVc>sI8Jco~r?C%jj} zfa6jxlb-~!qiZKpt1;mA{wFG7w#uU?%u=Q!pV@M6hnZc`TpiHWJZsAHK}GgC%)25A zz2exZa^67lEP#rh%b8P;WJJD=EV}KK#n9pk5-X*2-eU1#cM7ade?n&(pC@Qg&m%Df zOG>k}=unewDV$v(&7wp!>&r#_qh8%80jXvq#_uLm&h2uOyeeN z@>cXN;N491X;OwAU89>$6<+CI<))-5TsB01$M={^+m#FWtsGWPQh(09Xr8-_K7vRz zmE;?WV|b`dVZ|{(aUu`3>p}_VEDH7j&F_@=7Ph=XwN-`OFhFho=+i@l7eiSxpi@@n zgA@iJ?Kx!D0J&)umOfI5})0F|IlNvn$oP4e1?Yc56^i;Q$A zfaZz41vCJ54l`tj!~(fnH9(Z@bkSh4r~0`p+-6(vf_Pw7;n6>TS{%w#=D8H7%2eSA zi77%}*e|fHWCbGs{JKb9`eamEwp{T4U^;~H)uSE#Z=}e%6zWxJwZ(=Ll^whXw4wD* z*3F{Reu+iOWM7s&dFVIx$a8g{=Q3P6&~>!s#C7G&$|PF=@EcPIM*a%z zL=ei}v9!vgA*gv=Ap6uA)FsN1TKHazvlo{RGfS~g_odA837e>pr%;L_K++dGe=NkA z&=5t$7&Rh5tUR*E=S`;P)b3~hxPJJxDGi-JD!SPh+{f0T}WPEG`=MxgCfB|eTd zGdcV^+RQ2B0n+#iV-S9yLM}F_mU5J4vz06Sm3%J3t87<7nF6LE^QM`g^$IV7C4en+ z56=G+t!<~BYoYFVwRnRoeC61sLoBIvy5Pz8nx%MV3jQebhY+YnKd4H(UiGkQmqT;R zF0hW#WTD1`YIz_p`o>lnt44a z)?Ilw<{`Jz>@HcSQLRZwhR3P=WeA?X;z*F9`Pg}e0sPgFXde=kntwaJe$-m0e{BdV zvVyYifsBG^jy*^fU&tNHU$U7%dvBdf+Nk9M^mIhD8ckUJjX#C-HcqUZt>n{+W3(kO zPd=K7toe=@e}EujWf;TEtoZ&vetyr~0|pvo)Be`7D55O$AU>w+&gu_o3jj(w;&d`{UMnKFK*eiCmYz6sILXyLz6BKr zYPa06+qk zbqj0VV}1N>BIj6^esT|5duQPuLa7uB`iFK$4Ia zJ1tMIGMNw-vKhc4JXWI%V=iRAL;{Jvd_a7Hql0k2!zBnRTPA?qe(%@e#VCE*ibaXK zKJQnWucYb)H}aMTM9l1noKvIw)|0qVHxOol!IB5D2+m< z3O=es*LppZh7vsU-w05S1!3~^Ez5oN%a(7TMk6Rxt_&SGJ`l-iKL8e!?4-t4(xz=r zx0uJpYbreM@di~Cg?djZvy=yxMYmunBJUG#+JrI3$q3Podq(iI zy3}n;qE}7i=R7}WyG~{^^gbm63=^}{?(G; zzq@1Fs)a|Jo8r3z==&45P3XgH*&R7wdN;qjfH{!YH`={M{aGDK_UjWP>JO~_&R@sb zhL$&2MFMi9&~k&tz_OK-aQrT6?>I9`*oZh5#ADpbY7{FMdE8TdK~fv*y)N1>Ql;E+ zTMeF~vbKo#es^0so?KzA43X#%n`*x305na?Cq=B4>g`&FKg%G?X=9JB7R0Ck`t};~s~`8qVYsMtfZ@=m{j9_1VkyF*N&-uF#!4$3hOi?J!*6*1uDrU# z^WwOrIJVIQ;ITG;&&K zj=J$9cbJd)wMyEEvdH+PZ6W$~;*OhfvRc5yo2F0CI5IWn*nIfSN|aXrbZ!%=2gjOr-0I!*vUCazpXFj4A{E<;tc3Y=I7MRfj1zG}3)xO9ETtSj=f! ziMEJvoPYz*V~yrkaWcem6Y@i<3?-&ph~F>c?q;)DJAOKeIWol&Bp0zGXxFmxQY!{5 z=UR%uuU_Jv>2*qnZ(0YoGp@H{rTdz7Q6+ooQgI8nEww0;Q5=7ALvH8mw+nfe76rUN z=p&}Zfwug#ap7^i$<5~?wLfes_58Nc3SVEt&5;n+1#5PDhUMwuhYMLmb5DKDBASr% zOkFC%I#HUWq0vbJ*mGqb_-ro9xj-v7Gw{%XoCS;Pz~XR`fC3BPOidJy!?plDGM{ZH zbAgz$WAqk-9I%^U)=HXkrW27i%nEMln+#^Ml?3UWDwU(OG7EeG90>6`isybvuze>A zy|I>Ak@z}czQvQUXmNC?^-{>EeE_*!d(AA)iSenm$N6uT`Qi7md-ilERT~w%+4Ek} zBfpF|+kqkTYkDI;@BNeX-XGyVz^n~9HMDLf8N>^DJ{Kkl15mmZ)<*{*3S@*ETgqGC znIL)`|MPO4V$E)%ZIezzlTN@$_{HQ+vaUjh&nlN-r)8BR0Jj=K&273vZI(fi`Dz3| zt_x>rlrw3cRm4darAlPe!A&{xi-!atQ8H(b9s)-sjE&l5JMF^bHLzynBf0JDBb5WD zMsxf$Oo97t4C4DZqs#B^zacouP`_WYd28WWR$hkTUHhbZsK45!E@!Cl*|DoH{>|RJ z`F+g$s6l(a{d@7v&y^Rma!E-h$g-0>-+_=^^lg1JF-6^Rc$dz5)04~<9e7=c6;Vwp z;DJ8ywovJjX(#=F^pYuF@)zQd8&STZfy$-HOlVsIfYvBLn1lq-40w3onVue*lV~6{U-0~fU0utrC>K$O(4?7O<)yS z=n|PMTEu>Z>m~R2bhA`P2@1EISY^SW>smY-8Sn_y`4Az z%W<1yBU`F#nxvh5C&c?suFbEd-;3oTHsZOmZ)`a>vT%?rlVx}=es~o|6p2NnOuJi( z*UKbZEG2T}SEgTs8h&p>=zU|AUSYN|E{YkMwI*AHRMxd?0PEe|0Ti ztz7eR269>01F!ujGTmX(X}U-^@ZW=(sOTkkjj1`hW66uB;ha$Yg)mT&eft|#c z1ZhA^-o+@LBS^jF?@@7<9sL-6!dGbGImW{~LL@rEDn*pQ1*e zf6g;+<=c%_*B>o@X4tO7R_Z=f*RK=CqkzTD zuW^6<<-iQy#SOfJ3Lc>_j{L7);X{R(BOuej1F@@C;Bjt+=KrU-(j73sYI4DZFjhA!4PB@KMI(DlN7RPY2S&4S@HQ96j0S z#zVZ=7SGPFo%@={A6XJkKrxzcJ07^5|NO~D`kv)>FX|V>x$S1?Gf>W4W#EVsTh7G2 zpGM68-L!PNNqi&ajYs00s2MUEl`2_|=S-X|FVn4ali;@MF3 zp(K5TgX+}?%NQr{j#bQW#;=bAs8irtTo-vtwUb)L|BZ~8r};$Km5+ZZL$0}PqP%`Xf9+`CucGNK z?DXd98TTSzuN{9xj4L5Nd%OI0l{Ne1QO}dxzVOV4XH1hu`Uo{Vg6cJz!*3?H%B?xn z2FTJKUY2@Pq6{$T$kZ_)i$@;Up?MG4{CS zac1al8F^gqpyD$}1b#IN5D5?PK(>_cD^j{m0&Btmb|PYKtCHm|_XyBGnzzWl&DGUw z6>ov8J-vFFDe6*W#=eE`kQY~?cOU&ychqg>x~G4$Z_7x`&#H{NmE#Bdi6 zx;g6fn>XQ(Jdv3{)2)C`5>j82bTcfua_en;n*uqEIki#oX$9`R*&lzgXKuSUgER(^ z+~K+!=f3Rs^7v4yC!ZOt7k-}RG1Vw7pT9yZA_ppiQbYY}5T3{`RwW;&hUV6U2EyRn zG2V(*%a&*8NNT|HnIb4F)OT{6WL#ts3|AFGEgxl|PzO>4-dUl6s#9)#dUftS)^zn$ z1dq3IRd~L=)K3+5n9Z1HLVoY9UC!JncXL|2;+0k9#szg-xriX6()onjp^NYIU2dC_ zjxRlEa^$+(waOWGAbB!_I06twrwXC(-I7YdYf%>>wNH7 zx!iW@w(oa`nKaIq=K|XJM@#oA(iG@6u4f7%s{8C?BYZYZ;4M;m$0D30d>FNh{Rp0W z2>AqeVW-cljOT}q=>Ta`w>#;khY9jb3?(sllgW?;mk*~{_XRrIM=IDO0yg#DC`TqR zmGYa82mzFQOk`|HCUJ9HfZc{v^pt-8WkfK~bIHyL?26;K zo_DyLZuo?3{dB6`dF|Um&+*;sh^EMY$9Q)R?(Dho$pVqXvJB(3BtqVy09Q? zh#4lB@~sfEdDjzjL!b35{Ik^N`iG|$hKLkuRMvF2IQ`vD+MbuorLP`*U_CIrY?Pr- z=fdpNz(@l2e(1sXV`FKauV3WWzx;v=y@2cC!zvcK9aPLQZyF@pHd=AKwW7CpJ{arntqFaULad7m0R}@==HR z-H!?e%<`9qoV&w3&rkDhvyJ23tJV?50c&&9bD<8aLbp7A@iqAgVno+xr_&I4#NyUc zHGrm`PdmpWjO{1B9jE>1`Sbl>H^z4tH8~==y0G!ph`qxZl~l%q^GE8~H>b?@d!Aj? z;8m9pT5L?_~P;P=Cak_X7@}qJkeRGE*yyL`G8hX z{k)wb1>5&RZo`72N6?NVilSHkMN7L9hc<2x@30pdlPkQb?o*1n#^DjUgA~k4FrYx)f^RX^c5XHczb*>5aCO&ywM~zcM3@0p5at-pL#SKnJqCxUvAbK%MZ8 zXQm~k$@NMH03El>9@&M;mwQJly?vDIZGVj(hI#dX)8BfQy6YQWP(bBbjtGk8BKG-C z#(EI1OPnAw@66&OVW_f2dEG9DR%ULqU_4k<0}X~5?_MXLoe^?r$@&FL69O^0KhP$A z)M`Xs@};`Hfx3e~Ht&*lEj#=ANxR26{g$nsgugR>Q~4GKdD!1_xKWpN`ofWaHaG5X z^L*CT^+ECL;@>y_?9x0{XfRhihP&86*TlDFJ{|#8DhO_=CJ{i({8!hmbxaWN2|ajg z{1-d;=+SZ0GL-4(I6Eca3097;l%+wnH)S0ZCGF@V5Vy1_T`ro-D>RIsK3i@sjc zTs`LrQ|}Xvs0f6wg!X3hjb9~2^r~q$ssbl0G(*}7FN0O=;(4>>j@&<6i&)orw}k(` zUho>A&HrLQ2GU?$G`0&p!aw~K@(Dm#7(eH)T7bmr$j{`CKB2x7ZR}ylB1?q)sAo^x z3EXNRQUUF+$2^n@6SnGzhzi;a%k8;*d$PXwG!T(|l=6Ti_1frio0ibtzdGz|I~{6FXrua{rY<7 zQm)JV?Y||rRrGM;V&6;?)eU-EVH33Vd*aO4aq`iKCeh=Q8_iY>wG8J+>_zW88GC9) z^XQ$ER*yUc3;nWWUg_#X{4KP@DO6lJiC^82NSk27iF*6!)mkCvwE5ZWrEGV9Nn{W46kcJ#8IKmf8qF z%-lNPyBlV0_E}f8oJ029ra)jQ)Bus>S*Pli+3xVwOfYEUhlexRby`{OKT51Q!#z&T zs<4{X*D(TEJh`y*6({Mc>+YksbL{#De$jFp4152HO?l^XK=R~m%b^#Boil=p@R_ZG zN~h-9=DI5j_NV1HhMml8Z3{b?8S&taE=V;2BsQf3) zWWOtf0v&X1I@jb$pCqeFXQUIHu)(z6QIO!HW@(+AxEkN9?{j(R`Q2MmRTf;WPKCwO zbS3j#6W^22Z)q&WJnJ>1xOo#&2>=vGO2~7sjCWMb&6=|oJq=O|fGYwU(eBI0`mLV= zaO?tLFx=(($E%;O9Q&xDdi}LKrkJo$j)Hq_?$>0v=bDG?df#DwIOiU{$8Ph>6Z_`x z`E&x)3?%G%w=3XKZW~exz=*;P0JlR92BYRb-AshU*^i`^!d3(GezyuoTI< zYwSjRTha7#@9fKOT#J38MK1d!SC@Db&0`kN>6cB%Cv@5O~DLA-ARdc~9xtrp| zS@S8Xu@t@f+b2)W>Cr{|!~V-y%=f4&{%2tAQrjs*WBT-u-nRQ8XL`1LtIzAY=vY{Z zTyZsir}l+(=lnzkJdw2R7#yRX8BY-wU41xxp}m@)1v^3?8YKQ&3&HxPHSrI0w;Z@+ z>yj*;ls=+ESxe!1^|4u!ohGRAH3~s100^&hL4-s@ek2X^K4^j6!rJAzx=U|^ zpMy(C45cv0GC;_jYd0xDGmJcxCl_HKtS8OxL%8KLZ8*=$?1nXAflYEH&qrxdIF6v8 z1_<+7KdtML^O`=TgxslFig^8NcR|-0H9qdPmsuP*6!cnM;)9$w(Glu)_TsKz531~* zFfoA_=j;}jdIj&G!YCpwYxF}9Rfjw<5BBx#ECO-Qo z8J7U_8I2+MQWoS&&A+Q)fE!({5TI%xOrwM6Hy@{)kaAH&kgzZpPfX*gP>+xjtCPCWmb zEtKC)VYc#|mly?6qFV!zKhP z$nrWqp@eMhB7@s~Izt_x(OYIPxMeeJCFnGj^2TN4UUIivWVKf&cMYyBhlf(X8exvG z$W?#}E}}2}_-52%i|(u=Qco_8G*vUw58ouE#QZH=$nx_w;Qho6#9%(f2)v&0`Oxa- zPc|Vmy&>inNCeFT>{^69fiEm{1)YQDf*&*EX80^`pZ1D>deYi!m)i>B{cpA}Wa|_E zJV=BIQAj&cV_{-vz^#ltt^01k@2!>XQ*1=NlM6ih;g+%j^yXD%K-0{d(^sy-hCj75 z%*DZHr(iq}pmA#*e)C{Wxr;Oy`N-Z64rHe3&xcdlx@%V5bn3EG<(gTn8W z+dY#*<}!Fqxi;ZA64Vl{(4|l!ANj#;uq^;Aq%y>`YuyrbBvVkkMFKk6K^V9E#smje z2llrJp-eBa)sV|m141cVrYg4lmwo^<+(_S&z9Q~Q9l~y0Vis`s7!$H@Snwi$prW=( zz+3R*pXu0nvoy1P?nGrA{?mWxRo&_x?}i#cb9=_X)TH-O`6P6Nv0U3AbQw%F!wDA6 z++d~W?D zQ_{Jfn92ebAtfBLOlO++V{iG;SY;X0KCg~BU5{LAXNuZXT>R9%j&BK*j6+98as3kr zU(8~|*Bko>X0JGZ_qg?HV&DFwZw;5f-A&&9@58^HhRWW@gdWasnzj9Rg#V6!-O=Uo zBZBwmr<>2W@B6U!d;HC{n5~chWNb>_TsLy-EZVJ5XNqh6;;B(<{(@TmDox4)*Kuxt zf&iF<_{>x=!NlOpEsGfZ_nu$Jgf_B#56n>OmGglE0w+G_n?;6aW}} zBlVgNStsdi?<;oTlN*5lqv$;2l5GDz%$|aX3s-^LoVhhEoVmk2(!#wJ&7Bn}u3VVg z7S7bn(A3n-aO5Z}bERe7SX!D^X5QV}@c;0<;0-UY^TWk?{?70BIPi#C91*zZMk(I0 z`SzLrn;O%ex{Y;Msl}S%e(}qIe5jc}p2OcN`y(9~IDw2M0=$6sWMJ$d@`^ObO?l&% zT^2Nmj*(?F$7_62Q=i>YIzH+z35r@DCQ0s6{tT~3{LvD8eCm&r|KEg3mCgW*whZIMEZovl=!8>U)Qhz-eBV^0D1EM$e#f~5D?d8 zCgbFfbRywFj!i+R<>&U`w1_KWveQq%gUwCwJiFgrnzIK_hK{R&B%92*AOYN8j;#8Q ztojvm8mX({eOv2_oCQK^@pI;C#G-??%m8r91uyKVt>D+$PF5B#w(AsGMp&7=9mHeHl5b_I>k7jKaeegJT6-GYsj$T50+n z>5DoF#~g%XYtgIfYC)|#XAqp-45e7*uQ-ms$4lUjtlGM&pavU>7!vtI1_Ow1GVebN*KtTK- zGW??&j!$i%PK7Ct3>+eYKXOIbM4SzS&;Hebkl`8ML@tLOb#;0;(46S?)kKVI0*NQe zV9vddtTmqf^?ao?zAVxtdfjPOu$rV-X~qxLNnBpBDcSYrWSl0%$)B5jrRKai(cV7s zTxx=}cx6_KQzq@wDV=?wv#5;_A0->dUdc+c(G;L4*_LRgX*YRAoYA*eyFo=NZX?C0 z$eB9KfjLkN5V%7p|2LexhXjZ4AZc8586D@eg3~`DQo)7nOo)LPdbwP|Dl&vmEBwj3 zziTdNXN?1`;3RboesOaIs2WEaHFW?+8C(%q;IjlWEQRW~(1_$SSa(YVLNx?}i-`5G z2oJ=?_xT9fTAh1$XSO5V$A%u6f0?bT<&GBXxx4D+@7ml zv*Fe}fE{%eVg}3B#>EVY5NmPrLwJW;{IwwJ)ek=nDXwg;h#RiYE<2TvNwon&OLB$e zTf))9O~bS+gbV=%q;;f|X$IRXvFpsZXxsCy3 zw1yedAsM+E4PA^w7G4OIM?=F}HW7g3$dU?VXyyn-ySOs?g}ZLg8x%Un#A0pc|o z4(eQ!l+_N7(BA)iAxvZNy=r9jP-F#aeQf3TpqSj`&kN`RmFS=Tmz_&6Xn{wjSY{)wfg za)Q1FQP2sf4k3jFz7X66;Cuw}z3iNiwt`VZ5;kT?fWKDdeQSqWTo4g-F`sgtESN-1 zM)?a8fwl=;0HhWdA_o#v$q@?&dBG<%>aRbK1dx+(2$GuG!G9nu180KGW?O65T3dN{ zMZ7Ay>x+2Y{IRyGHd6OJ78y0CrhTzd@8TD)iyM`MErUzqBy2m@piupP4ZE8IT_dO^ z`*Y8we)ialFU2~$oGJey#d2JN(4H=~pVb5u*a=blCi}!r0Zj;+O)oIAW;k;*L6avU zPG*8z)Kh-|_PNw)a;TOL zTx})&U-%!pdxDzyi{n3w){-UGD@7OeFMR-b9T?hNIU&%Vfl)Z@O?x|UZ|dsT=N2o` zUt4mIk&juztq9i!0sh*>-EA=zR{|RBHAs^IL_pKn2LFWwTc(HAlF@RtWk2XR7em31 zR1eJx^Z-?G&k`r^kMLk?*l=N%r;@a&a2s-<0|4#-_$P%9xflsnV?(@N<~4eQh`i#1 zi7>^+rrczp6G;MQrk=oaz3QL(i+dNwQ_|tjuWhx_0lwLEXac?cwm9k!3gb<_@!dgA zUi;5#uWY%$#9klggF$f#sbjyl_#fgc`MyQvm$4Elwa{0w4@DRX#`mEA`N^ID*Jpqz~c8)H87AVeO&Osw0b<2Y!yzkq>1Y#gL^ zKFX3RLIwx$@P}$~!&F!b9j90er*Y9ZGr=@|^mq}={)M$Z3BIveOkjhQ0WLkie%?Ue zhO2<(zYr3M;CBJY-Fp%c5=AORbITn3>;1r}R~&(XdriczC!BCD%4%DHUH-Ra;I6K% z`pwNn4*WFV_H+b(3E?YRmk`IX`qDCbWlB1%jbrSmqeWX_*Uhj6g zb0)gt%+rANOn94t;0-de?G=EpzhRD2-mf7cwQ2d|f=>S5aNy(7M0f@je$g3`O(x_K z5jJ#fYXCI*pq76y6uIUvHO%M5@=^Vas>UI)m|A!ZdD12-shR*dq>xGqHZH37cRQ&$ zDui-c$$|`tXmFnvY{uyV>|!E7TOgV0syL}w;AI22_{tlgG1trOne?%3|#nkqh$!NWqKl*0$chVwxLM){7%E{k~ zFXu9(zU9dNRJSJJti3b@=tfs8;=h@L^x3do^Ye*BOa&dfmJnm8c;{YpU*4^lc?5D1LppAmq%u0I=+kpcp{? zK#Uw*L(=e|-g$cRn7GQLzo19|`;+w=ch`{zlI1f(sS$F1aQPvz0u8|*(}J3BqYj&_ zu6arr%ShN9%vxMvGoaXsdEeP?>ABi@nSIQ^VrK*m&p4WVI`r!93QhjF! zbSFpgB&_Vah&jVjkR+%$o^tP5%7buoC=Udonu`(vMKZvx3;YoMxYVf%IWl*hS4C`adl|lpznqrc2%+s)dLQ%JVlo8a}3H^gg^IR16U)hY$XJxv*;b zcUC36nCrfv>Q&^y%PYzYZ9?K_OZH*Y0Hm)VRRHaNu2vDj(|;3R%4a@}P@u zZ*2V~f97ZNA{io8R0zWC?UIZT;C;nCgs3b**DGKY3Ku;@CT?>X5F}2~GKKyfmfoyNK)_JK%?rXFK?FO*X9mV&%No{^6p;i?NCqd zX__Gn{1zo+eNY+_%D>VrGpRTzSP6ykII2vnf()bIdtX&8#N$7bOnR^!AIQnJ>* z6z&i`7j;OP#^Z}JpM3;hY#fDd!;|1n)DDK$@v1Gw0r`dNdZ4CE=Q;V1stK7n&pJ&XIQ} z=l%iz4JHaxB-}VNU^Na6h?e3(pdu%ho(!Xej|&%SKZ#kd%^^Bj3246z=yvz?t=${* zQf*iEq(%-kVhyz3>3uyDwUHuju-xla*_XP}-EOcHrBirFfwDjdc0Ddu7Niv3vkJ4f z{Jx2Qv~%^P4aibNswT81JOqR|AQH{M!ek0XfMN1US)(+aF$6sApgpM|n{pH>>sv;# zQSYYtlb?3)Qb4lvhQgo0Cd4#**`6q4b6{$^Xx@c zOs;u{xJWON283x_OQ$-hR{Sde}CNitm6=Q!aAW!+C=(v}A+k~vZ#o8=SqZ0>< z$)_)j6})iP zr0g)t5b*4lv%SVa<($hCb;D*QGw9J?I;g$r`V&fIuZHPIx_YBL(58nEr$7m>pS@&D zHtX#cbs(F|6D^Nc4fS-3$}tW{5WB1`@8Dm4QGM-W@y=_v;;@jlOsJ4}-`TC5%Uz!%>{2tpy8t0CZU!O^2dlAS!_RVukytkKDF37=X2`(*q-TN) z;so++AQC@$I)|8cm-KVZUJ?OPNE?WQFmU#)7J2GL1m`&yggOWN%$bknDcTrB`)y}@4v>Bj<0hU6(2&`bt@H~R6(62PXy$D; ze)w~xFq}0Ex$#CCZjP7f;`)wy5-&+|E%iFh$Nf{03fIUqKsAsBAtgV zW(UX!52=_+XBfLLgNf7CxmwpsQxbRIA_rLr%M>6yNP2|uQwv5G0pYb-^9K=>%To$8P_@deGu=Z-h(%<6{b=lY+q3dXGI20Lx_E%Td2}BwUZ5uM1!?$kt32x@v-m+*i1z5HAn@b>%OpK>|%6>wtNc zl}9)kb1(%wqHEOd?U)N9;&IuTYANWrxvN+f-AZ$POn;%VeYC_v=EN6g9_VEK>32}S zynE;mOv|eEi}h0T94KCG+YKvkhPuJCR5awP$8tSUwkfS4+H_#02@y2IY)aRPX8_fR z%7VN~kVol}S`0bNplU?Ciw-8ay_bB&g597wqpxg;9&p^}Hff&`UVU}KLRY9(8LC4l z7oM!tRux2$MrWX=*f4G7IDsGqDg7bEwv4tYOK7#XrBj&#n&skNQvEjQ@&ahXaS~dIg7}2ROrIB`-5mY&;*NbDZx25I88L3k;e|Y%`ZekNg?Nf~E z7~{6M{9{U_fo9gdl#yefPEA)2jIYwm0J=8|9nQTMpB`d=VdWfOl;^vlEDnQ)z~Cyb zVCDLdfP+)Up)fPFDaBlTjFxA+SSR8xouzEKQvKC5RIHCCp%TH!k650PxDfw}?T?j2lEx#HULBzo;9R4=Pj*Q>BF@pr`&YEwSyepLI1cQe*u zNO5YAe0^6!z0|_Al~^4$+u+60vu1ME8&QpS+FF0AUYbmrY3$HXe4`;UrZcg8yey?k z7GJ;tkDa44OgZNGv+F=Hx93@!GzRAkArrsnjXrI7MBLzRmPWHbCfvqCe4cw;qphQ5 z^@17FlnXp@o_~KM!a!K^%uCOiszk!SEZ2 zZ#b!&orW(T8ypmj*as@6n#SAT-!;6%5X*@RIS9ebv6AXeruFmCb7agQ71NlL#>Z-Y zoOkn01j0JbNJAaX=nA5rU?n(I2@ZKm3!Jp=-$TYe=evGL(n;<}PLdAa*rPy1_Rvs$ zj|5#Ap3`mvP4GyUg>zc!?O{dlq)8ehsb9BsG7IuS;d1Z5e zX{MnPJ|Z3W+!RO>a3SKh72cQ!&)~r_S@42scq$GVLkFM3WxwJHG>}oHMARN4VTnRb z(*_f0h#V@SR*O|jXEoqZ21=+}Dx$#yZ3r;&p`zc=#eVKd_s|eo#B)%gGH?BdETENUZX;tTp)nyaX%o)JPVU}bbuVToi7fUK8T-8PzQUNS2mY_Ea99~mz^#4Rdoj_^jK=hFmbcZcU z5Q(TGVm@=R_i-?TZU0~0S5)Xy0z_S3r6?w!>Yi!*y&GaGk!BDsoE(a*;395mAwoVO zWBw~8OCX<7k(b$J@gwj>e12~t#91PvJ1k$51)@}do==CrzIDNTJYs=cu5RdNu@ii$ zM0|f=yt+~>G^B8qt?`y!nO=>&zKvc8tGrx_O{uOt?;UE3bBv$Ef7Wxnlr3RQEKBvE zK-oYcEpWiPKqAf256r?g8*iubZzqpI~`pJ0%#ZK*-EfZyvz&mlx17ctI7c+P|* z{B$d!p_$w8m8GWiZCET0{_DxzaCT7OQP&79S26$$%fF$*A4fgCu|&F%{`!`8DrN!D z{Dw{l`e-=Rt#RJ9`L`k=g2Y~)PCdVbZ02CUPzAqm%|25r^D|o@{gna}Kt(fSxRx!N z)++U#k2wwPc$I7NMv6aM(xxEa5Ha0aR}GR;-9$`UJp3#NEUy*cK&xrcM`Ti9d0MQN zmhM}eQ;#~}IXr$~CGuyAT$1WP?G4H7};h_8Y-CT}G7 z-_WF@+IXC6+2t3WMEIx#e;qRy+w3?ZpULmmhP<>ZL4_(lz#i}fw>YVAixUY#Lc}V9^Cb{VQV_t$H~~Na0KX7*g^ZoSHT+ga*`G(~&Vm*B*)jm) zUU0T?=1dE)xJ~=NnhwgYs+XfVuRHt5 zfRVLSj-LCi+6Nr;?Hi>OWI2!XweQxp%1y{v`IQeb7l98Boo}XQ-!$=hP*aJ8{z5Bl z3VtD1E-Ye}CfMj-=nwxIndtkdA3ZxSq6pb;Sh#J8J<}|6J5=XoXaJwA z%7Ob6PnFVHU|SSVJf}z&<@0o$PQK{Hh1gL+cGEJ?(pWu96^l63eHOZ&jtHfNlAQ0~ zDFFx*@lo21YKl@#L%^Wv(zjZIle~8I?)I(zp*sbojH^g#BH|ei87svQKNkjxE*y*{HwILi;JTzlST(ycs?Z!0b#O^uO80wC9gzt@s)nKKNg1$P(8~ z?l64vccEI;{*ea(K=j6VIR5G$TVU=ILu@y#<_p%TdvZ-4;D62Jv$P!LgSx6RYe znWv$0S#6ED22?cyW`t@Wv+qwodiS&&ED-@VLb3q4HQp>(9&3(`{Y=GVv(YnJ5rK5z zZD5}G<2$`80lof>w`B3guCj=bM^NynZiNXPko-e;dULw+(yV-$h=_5^l}BfQgzJ7^9R>s=F&&qLd1)VHTJ_( zLt)s6j#Rm;$S&OD1eMA`9(se$e07jgnDXqoqq4-_%X=StodwRlz!CvL%Z_$#z>D0m zeW}5cht+$CL2MoiErYs+yV^f3a342uD;y!Ovy|q`YV2e6PgiWv5U?h6HVY9zk;Foe z#td5?skjt2BdOj2oZraXoEF@o3~gQ(AoM0nO(7=fs0+5UxN8CxxW4<>o~lg^Kln3z z5-@W8H|XZ+`TG1@n|(Jjy%5C!WCdA!irNo{b-XrMu$VIE8aDR_ReX6be%AZJ`+&kZ z4z|fNZPNkWOh%_%e+4+i8{B^72ql*J11>1OIOe8!VL|F)ha!q+JiHIQ$jh%#xh>Gg z|C2`4(yGxL5{T;RxOCJ(&9e!Z%0$D)?dr+*+2s0L(*lvVF||a@A`x@122Pm1gT)0V1tUPslin8vu8pcCSUpMs&$en6`%2@ ziUdpii0q#}!1mKF=*5+O-7C7qS>JzK$({hF(bs6yIodWXkvpe;4R-3nTOEh;H_=ln zY*5{APznz&Z;<<5<~pH5W{UVJ;oU9sbj)DmCnJO}7qM+MpAz9Ow$xi#pUloaKws7n z7@o%B{-ON_nEARlZ&k?fgl#_k@niHs z`$!u{z;x=?>-*o1EPnf1@##Crh;Tl{*W&^4DdsI1bBlHz{ux>J3tjrZZKc!utmjPf z2EK0oo};T{bIFkt4-}znMd4Y=1TD<#X#ok1280AEt!-T9tAv^cvSgbNxJNLF2=}Ws zIplh^ueWu4SO%?V?rrR$XBa!ZT{$)&UiMm>h17U^K>N8d^f_Q~C=7D{akJKb>7(_c zgKbC*`{uS=Dg~9rh0`c-f9Ib8{(aWlYYXaAu-|=Ip&x0*|BFBIU&7swJoR_S1aNIt z9i8seOJ-s7*KBWlVBY9OzwM0v;PF!Gem>!0s$AvQHwHy(Pof=OMF;AK*EXaaJXvii zwfB4!^b4;zReATUrjG3jhcuD_6caFE@pxtoxNRxQu$4^*11U%ZfJZi)hsQUC&n8}W zYrfksn5``m;iBA?jg`2(J8_3?QL<=_>i#)>%OdsP&yVxhDSj@``deI;3a{&h=i-2} z9b7XI6opXs0_=ZZgJ5xCA+2O?mVonA+Z6M%hf?*Uu=bvPbh(nw%{nhGD|AM|nD&ml zf>C-Ft9LTvHV`j8Aw)jGkB2LNEQw^58{X&y*2`Wnef#CcGAlx6$G*uGstkg+Oc`fK|` z34gHrwEf}nqma9n1rC;XoOkh1@u)eK@#^9Dypj&M?5qnwy?e@m0nvYO3=YigUHtt~mnW2oFlFL2@LYOn?z`oV0^IxIj&FAKfPo8B5 zy4Wc_94)a&r8eN~)moSXjG5D?B>!vegI$g8+{mum_j$W67yD!>L_WrDN6|gfX@w4+ z@rhS4qaORNXO7jLAk#ru+>lT2bEu?XYyVJ%&I5NOH zU!yy|prM#JN6i#UL8dZGED#a~B^KL%^KH6+{@tvITvUNqMnnjyijxy~E)2XurT`Fz zpKu6xW?j%q*B0l~LqW5<%u!g}SX14ZnYmq-n+UELqq1 z$~$Cm!598zD)f-%{9oOK^#UxwFdEk(=0}p6WsDG8?6D9UKnK99XxY*cPPLJEGE2)W zPZ>f|IN2>L#4HqZ^ZIaKf0>qvF@SU&7--6UM=6w;jS?&h$iGC_DwO9tpyIv>eQOmK zK?JV-vpOpPL%3spDi%;zIOTwyZaWwKF*A5DNQKSgWz?=Wj|v`71wThz%@l5*wkXxv zg%QLq%!RUDSr$c_ys-bF>;AU~9(zAsGp(N7W_V+i*1_=m(n_8RJZ;~A%R)TCC|gKe&gs=tS2sha0F(hB6c+%;QEBcfbX=7gHasOb zK1d5Fb|_RsdVUR;W@CX~;K20$P@w9F3@O>8xes?YHB+@1NWR=e>1~}Pb23Yp20jH71l6dzucE1^|uR-V3GkGGhWh zD(Ml%4abo;YtzYK!XDN6Jd-OQ?9p7`I<&y0hr=Movl)@=w4YK(O6^s?3}9VkMqf++*WST0lWbP5fC z;A-bD4UagFh-k`Y5jx2zaxM^;AY|zW^Y7E&qhN0Q$SmHWYREUaYGyY#VNj!tE-UAsI-VUg+7OORTZ8)t%xRE$4=Dfs0Mpe}@qU5NFI~x~IH#a;~|&`PuVE zS%gvkbx9Q+7lC}N-Zq3y3=$H`LW~ml2u*y^%bOsD=utwpbzZq`%medM5ty0Mt20`a zA!z`JGB+oP@?J1~xll4hQQ0iJjFM+3Z~IK?y0xA*)QNiY_rj~=nq3LUmiH|VNLvOs zcCL+!-E=F~9eC>6>iwnczBY1UIJ=~HZ(LG2JXg2*`@10d4-flUchg&53Pc!!#qURs zeVKO6_bICrX<$KS70nQq#3~$9*^;!&&M~$05=`g?%b?TL2)ayX zuv-AoB9m~Gq&vjq9O;G19g)U^>FeJNhtqj~;+hv}@?D^K8VXj5>5o?r_L!*2p`TN`G zfM!o+*il3`H($`~kH6F&5n(0}?)&t4KFJA)aB;{$O0SEnI0+pK-D3>

1ok+)NpS z8R8^vSg>dtEH%x8Z?+E!rfdhWTPG@-ckpKBT_TOMmbB4H~5-hzLqA=Q) zYgOibo^b>lFYUCPTv=R_P*iq3j|?#h9YFYc-Au?&(*4z!6~5-3t;DAj{m)E}3^Y%C zsySoV@Low~jiYNg2*f(w0!t~Z=r+#^m7Kb_UzZ`&SM0vSHNBe+vrmamp$KyBdz_atqwk5V)o|Gd*K*};d14&G zDar2J^*ZJ|m6!iLf*;>5WJ6Y|iLD(mqZ8Pq$h13N|)HJ_@Nz1Y=UT4VReGf2zx8th9@~qb+0>r#Z?laKzw| zTKLC)`!j<}Ia3VNtqZx9^4l{PXIluku-FVFfOL@CX$VaVfS=3FwqrkVVL@yS$vcK* zcse)0kQ>0gX{XGW62b(0vqKljSb>~SL{1Q3C@3X+Pa77PdpCdxLt73^xk4sSL!SQ5 ze7C3mO{o2PUa!7A&>&F6I8bCgg^`aoFBmag_yDT-th0~os@@T~RW0+@Rq!VZ_K!~E z$w&o{lCINcg9q!`u{QAdX_)3}m*y5CzPCKSIaj@`t1(caR&~U4N$~D|Zi9t!JQwC` z2)&&Tnpgq}>brfRea~F_sF6iVyRybQPiI4;C z%7Pa3_$U`6%E;#VEyofnGxaY)F71%#IAp zJO?2-^uFa4el;q}DK*HujXjx%{p?kiFAO4Cnwmtm|D43wF;pBoAO7iRSxU_={e5#= zyV1R6*nLPaX1#cyhbSPzljPyQCu%%4ig)ecF7suY2i10Kz8q7CXY1`8gLXXy z$*(hxuL6#G;c>3$6LgU5d+@M%kCps`2U-sXh?&Y>*AH<3!qZJpuE5R_$&PIhR~Ezt z#{oOQ;JQ%wt_b`Z`HzYfS{LTgHt9Jq>7@(vOt%V5ft_}EEMfvX8^yGyK~55Mo}SP2 zzgGPXiCsy%|G}>PVBM>ME(|(^yDvk`kV6a?d;pd2WZ0Hy)NK?sFd#KgG?w4AH#1-^ z-ygb2-u*6nJR#i(;$!jEm7PyMsX2ux;lUEuv(uJyOL&NHZ(ywTK|TmIaXR~0dWkCs z+L7zb%AI@2D3{)oB~gx!{eyX@-`;$DigwV ztk8v?LOw=LIle8k4fM*c5rAJGuw7}J6xJOI(1HalS_N<*XVeDjd?C!+CXLe8|KLSl zMi%Y(k6Bee+4&Bn$z`-Lt?vk!t1tFOFf-RX$LecM%{I#%`3NMNuK!gJ-xrapj}-L` zC$cyP_8vz@&i#3DS!=L}`$DH-(2L~oN6W#xzLZ~ zRQ_YoGas&n1r8djoKI zngdK2GQ(d7KNDDO!t;88u@hC#-~Cd*sv;uY@WhJ2klJR*^tz~t8pz^K6}&OL`^QDj z@~R!{SqPKVKD0?#lh$E9hZ7{&36u_}js>gz?d9 zeQ72Z23wM!(I-k+<)cPfu59Cow2}lN@quRPE_Ob}Yt(K&|1lmK!O4!~!J@dZ&I6i$ z6cIip2@fGoLZarEf+pcnw&rp$^L$(T-j~wz&WGHy{erCy{heG@bFm9^s*8fGcMUM} z!HR2`7n{v5GZ`)E*dyxg(j1`LLl@a~#+~IS8qahL7Y+YATC;@K{rz3UA`t4EVjgOf z%f!L&FJ~Lw89ufK(^f)Tc&9i=mEFIt|&ry|-NvqmPf|ckbm| zsk!&(qGep_62kS9T8}36XGy3yhwt&zusP@G*m%e3o# z>HpShwaw~kn=R?@q_oV!88wrWEE6W$`nZ=zNVk)@vwU8?OUkhZ=?TU`H zNil^t*Q;Xg=@T~I5RlGY2w&V&x6Tx=1X4a1m!|Og7lXht9Z_0ysx0BEQnMg0`#zA3c9Fv;iVYFwZ z!M5e2%K`Lxn7ppjlLxjuAA1eM(eunld~wLfAfZDmfd`0%nKf@w4}UT2YpdK$x1!8i z{(}!&AR8B51KyN|y^~mENc1vfrq@*GJ+@6UXcq5cE>v^cMD{1keqLk#>43g2mR{m`CvOTySuf|BD|3*>XiiH4N!#f2oNT zzpubE7k!$o-rbq%Ek}9Qc?T%=sJwOcR1te~ko@)?Y-YT`#=n%&G?Y0|`ZH|&YuKraTLFHMz~~jGeehE7jDbz&vx^&E z+2QXeRwQ~iqz8e1@4~6?7;}p6;+{Uy2yT&c7=Hc1$DT`0@Pqjx;7q{DBNw|Ojp{xd?sp)A?=}ZRpcT4W-Z1H?{K1=}9#N02VMQJ*uk;qileTVrv9j8Z zt!s#STH)sr68yT+>5T#waUz2b0Fd?}2yOoLgc|!_<5 zs3_?iRX5gAH_}s*S2(Jss-vu;udAV>tD|G0r)^@aXL9VAp`Nk&Q9TRz^q6 z^>i#v^lXpoS(qNPJATYc--u$M8fbc0(gCdLh&&REcC`_WbCNu1q3UI=Y-OfrYp!o) zW@2x7%*oco-d5My%GBBJxTlSQhr2b_@0KqIoz(2Kt5vd8S96P73u*Id_^CX6NY@;@}Y(<`?7>l}n|MZY}F-xZ2%zwZFZlueEZZ<9c6bRc}|`mCDA(8+EM>jomHv1NZ9h zclEWmbyYWYk9A!c?QiAvXFnV3__Tbpvv2(Ro#CN}BR!+@15+Pf&A)j%IX%1d>HWJ8 zZ&rV-uYLZs`E&Ef-@gY3KmYsv|D0T9aosCa5PE&`M&F?8bQ_{cBYiP*|DOEpl zK1S&Ek58xAHkq=|Y)kVT6)n4M^z1~N-EP@jH*S30uqmcw(Rk7CNP_aBQrN>o-4gDt ziyglGqt|DHi{3tn@1IzDYUL!PVFeN=nUChOMovU6=B|LCmr)|}Hc47E-jJ!kR%A$u zl0U&1C#cWNJt|;TAgTJ=w(nQ{Sc#Nt(!T5)bo7<`vA5GDbQHy}$0shNXvEsqWm5xfkgVO1%B*Ys{U4$h8_bw79xcLD82D%k|QGGjZ)};qL}w ze;VYSt5qHUdSG<^J2Xd_#;0FUt#NUSLgs&HU`&B&_HYmu=T=Rw&N^xxDtSmmGPGWA z^)Ufm9`Z80J=)FLhS)L@5PiR~?2WzZcWb&ZxXoNwDi`)8m&+%+WI)R+U)OFdq^c>1dz8ctS-$c+ zc=obV+IMwh^ep2`_~=b@=~^$$(NW$av&5D~K2#pvyq|5uc`WD7U9gm z$M@FPixTiLozE6r3YrE-U1Ih3Q(VOlj{WTtE`Id3!Iu&NERS?|GV^?Uj^cjHFUcaf zx{UKye8U&t2N4 z^85J8)sXVwo|O>MIK;+a*%g>jo@n9Gm)rm*33l>qZbRL$lERjjeNTz8lwEV(>yNF+ zK+sLv@XG3_lhpZTX8Mc9SLF%wM<0lKtzYPCJnhwle3!zF5MSt?)qmwK>=cuH{j?XP zFzdAY!mW?urX{Oy#!SQQZ(V=?IxqAFLxxb5fqu*LfZ7RnDC0+{@>}?W=ziOQ*2ikKlEQ5|GRSMEa-WJvx|~pGxiR@@XSvtS z%6#eFdG*-w;FJKP@HzK4Sa22@HV4Db!-|X zJ4uoUx6>BTBjTfg!HhfWF$Z?w5iusgu+q{Y{Qgn~Z%kY`dTG_rhgOV%@l|5uf4t~_+=s;7MutqWa=~Gd74Ow{v1i_r+r#N?wh=H<*SQRxr(x+a{O%N zkvxCfa5c-h-K*W_

B4W@~+9fZf&xSIrf4M&#THP!bjsnRhPqk z6MJWGm_$1zwVunLi9$9Su7%lrTmcdPTEGWaCp8(sPY7{c6JfC@M}N48pk>NQA19|t zvzxC3lt2de+#Axa9egf3`L*dj6LVDYRF%-ai@R`ROT_6XwR!il8|426M}{7rot(gV zIo#aZsTy-|n$2F5@B0IBv>6dA9bJT|$>ZuD{c zc}Imtjy$3t-}Ogl+>V;`F1IcrIrVpY?EL&`r{VzGplHDejJMGf448$?^hEGvl?r82RvEFW> zr5*AoEG6|QCzTKUzaNsTp3tRU2ThI;4rTSUIN7y6nlDpqc?ETfwiHNJZ7&=(e)3Tp z8g4SP@c7#8Yt?g4l4!Mp&Sim$4=YOOS23{?d0fw1>1rB}9xCehQ5ub|h}SQZ1VUwQ zYYZcRXugRBU&T4QfGkAqeK8YEBJ)7s_z^zk$LeF0(&G5d6XzDM_g z(Y(xa8j0F?!kc>7%=71zPU28wYiOI2)}~_vHQiq0pC00ZC?N}qzaGAuH~ zw(g0pg&+F{c%D57ymn%nfuIvXqI&@75db5>6QC``=1XU^zJag(2FUWbNER#WXw`~9 z2qHzSZ5k|G#sjTq10Yx$KyC58R2JW%^hZa}_yPrcR(bjx-j*;Dwa3(?It&~r1fqz=P9?uEK9FLY8E#A|y)-CCF9IR1q*iWw?Nvk_+nryG4TDxd-wzDkX z)L2FHx|S*V2R4nBdF%rbu2W@(v|;~CE2b|CACE1-#mc$>0|-QB_TdbVH)hsLeyxXuTCScO*5{MQ@hA$HVsze3G&YYVm#6Run=aM zY2B`ILO;zuU5QJVHP7m>>3bEfqN}Oi1rPt=*8DS`;BPu@A*9=5aaK#{=u06-A}sf@ z@vnLEXcr_nBqSUb9o7>PlMOz`O8gohww444xK9hYpA6+)+!zO><)nPhNlE`Nd*@Sj z#9}gSBKf=cCH_PeK*<)Q15GHXKd+Kv|4Uk>qLay>10euVfKJEIaxQ)X8IZ#;dwP)R zT!-t0VU}`9}1esLXa83-I?kK9fy1N9APs) znIwocCtQyV*5H7Og(NOi0@z6;!Z4{yC|w%ltD!GR7Sgq&Lj%Yj0pvnwBFvlWt;9}L z_x+ zlv@0AJr~wj^xYExnoNd-7w3DiVw-7-IK=f2zY#i_A{GsfQ=x}gL#5(mvxfOG-0oK1Kv!>(g{Tq-Ov%ua`zN>(Zu zlFx|WTYr5!5m~1W*h~*R-I4&FN`16mv{Wcs4kJ@`pf-w7(QHwu2x2lI;`VGE$TT^p zI(vHoItBjGU0s-+v;R!~-juBW~zoVzv1onRPCPkfGRa8HZ zxAG-yRDv{NRg-|^JW>^Cup{6RcRz%wu;p1tB5PY{7LRHEzHV0+Z9|ie(mzI~s&(^9 z5A;@D^9O)Jc0e0tsJ=jjP_&}- z?2$R&g0do6-dP}XLGfv&tg=Y1yW^MpRifqrLlr`%%6d3ole_TSo~5LF3XGBHFkX~H zWz#8?oY{Y2is}fG0aAs49lOpw`sgJY!M~Qk?y1|R!1)&8>L~U0f|JX&#RW1{)MKM7 zntDsAeT-m-ksS@!Um{_$^q8ds@?+16=pT>=KUVpyI~HhG5xSfL*=V;%vyU!+d-T|o zqsK7#Z8D@z6M53^w;%hfb=}G@1+~`n)*^b%V)>+uI+a~^E1Nu(U4j?m>GBHt*|PnO zCx-#@fs@u+%)R;Fn6+fuWX+x@I`7i%Pu0gLj2trdlm+{go&YlG#`*f;Pbmm*$H-SX z=%jg6q70=2A^6lB4W1&Ts6cZC^0JUr$O(IgVjTF?Z1TjUdLwd^Da-Cl$*?upXAiD@ zePZo4N`=Al-MJf#$LV`6AzHf|YB|LNV}z=u#E=N|l5z(feS?_WgMi_i)g>4S=VRHefQp;nExR21MgY4O)MgiyixCiwg+ znX&$>^AVoj2d!vudVI)y%iMg`J zR8l8It!&GFr#mB43P==jSxUy418pa>+6*P~pS0h8YHBMT<(SZdy-_j)Q`!G!mH+EL za?AmcJ7ULlfpvD_GiArox{h7jF^~WxE1NWqAhs}DvOVXJ9HpbuD7p!AD)O%|Dvm>2 zfkCgDukrKr5#HSjMqn4M;iJ>hL?C>z3EME<^BEQMEQ#o4~|}r z^=H-^qW5Z1)r;HE_47v){ja2dZ+Uv>dKdd5dJNnimH)1_@Lw!wR{&dogHP9<`G%7l z+*ZhN-Q|<=g13qTeljdhCOy{;Ig}%7ng9W)M?thudLC-eP6>L*(<-_@sd645C~kbt zsQL6OOkwOflyge?y|YV6e#dFp_vM%H3KU<2?vfpq16AMO6kmo>XU`xL*j)Zj`|Jpp z-}lBBC(%~QXq5ib)Xrc3=`0nd(jQQl6RnPGJkdEEOq39{U3;Nx{z4t+=y(8>ttd0u;?C~H z{pPZR?*UH%?xnJWG);O2lQe>!x4OI+n?A-yieghX(qGhs!pRF9YM{C7{LMv$Q^0RwviXslVqNx>E=EzV7C4Qh<{Z}NV2X}oxJWJ)#i5Wu}$07@!Yh8#t{J?J_Q841B%Q{wH~Mf?o~A&C>_G=z$hMMqen zbq>H&9D)UZf2}>fk2279#{H3G!1r1V>NCo)#TCm8u&esDpASyPSS^=RDyjzOk;{pV zfyls(i2$~aKlDpz@g=w}Y3z8^Sc^h1l#$wJEGyr>vN|)Rz+H&&V?(8ztDPvwKmh_1 zfYkZWPo=iP4(0o>BZ$!9_nKM*MNiqXk(};A;O%{IN0uQ4BnuDgaiACgcb76gF{Uy+ z(J{d$6~G8R1|m==G#9dbg!h)p_AO`tSNNVW8*Mp)>rxU4}RK*=iaz`u4|q!-G}2VAOBTd{*?}XJqwyp5PKQe18hi-0gxGpee>9I zHo`=OZef$|GB5xhNIzd)N*8DSEXNlBhgNC{X1|%PxOw>ms7A; z$0iM2#!a4)CfGO|5$K^rR*Vtvj$s1W7*he5J&!refwFL*t1hZ0fY_7by7^1d+LRQX zEbhNe>lmmT%Kj@ZCr=4U?$fA0xB2f^PkR))4wf*k{T%qnL&Q$?RK{myGZNydf7V29 zur*lw=#)du8gwI!U)&HjbVC=PSCV^S^g8yc$xc+X0Nu;@qiQ>v$p*fbosD0LICdWN zQ=Gla!I-dt>UPXSfFu!tTgHeF*`yW@m=#@pV0(2|&%48uGKvED>q<3VbTYfOr{D}= zVmkRU?VgV!*+V44eg)7lxJ-y?9-Gt;ARUDfCQ5KMfU8?Z*2TF;buxIx+5(@cwb$tImoub^_nq{}^=5GErn7 zo80X!x@XGn?7RRvX@Sd&0F{EUcDh+yA~z-LE*<7L19xB^86bd6DOu+h0MB{k<9k_8 z1o+h=@GuMekU`#6KssUZZa?SUKHc_Pb%nx!qB%$9Y{GyN_te+RdcgMWVaweV)F#pE zQz__+u_WCN=#EM7qe_!Awz&n(cj)NNH?F;D_yJ!Hraai|qXz~uDg)0no{Q5p35p+I zP!df3Airflj#gNEav!dfU6@PV9}0t2TL82!v!q0{$S`{;xcv&m{>8ttS_CiAN^rj_ z>$GkiH;=q4!Us}-ubaRV)d+0}}OY;L5>Og^K##GA=#MuuMm3A#H z|3tet>TvKFW|H5f(Di4wTu@F=G!2DP?fg1kz4~lZOy7Eeo!+$Vjg*zzz1#8a=O9A0 z-_(R}7!D_=Onym3>X(PK_AA0A)6bIi6x?y{x#;Nq>$;8MB!g#)3_lZGX^5XutE zVhMXy~vO+L#Q*+k#`JYwAJ~!1rzX3Fs;vy%ojQ?U#JeN|N?5xf$G*9Z*iixJH z*i4J>Daq!$mOVh5$y)t}NJqnNaU{oLmH{Q6UNcG5nB)$~-mZJZ;S4ilN*AVzb-*88 zlYHw1GO#(_j$oVcO47#2t4}<4zF3>;+%f358hhz*lvBRZRPnjn-T!`fQu;5x=)CB9 zm6*Gl-A_y$bqjeYJsxLwD(yt(gYm&)w-#tIu#`2ZN+@w2(MYQw`fvv%h?o)uj7n1G z*6-l&Zz@_pvTmY=MeG*l`mab>i}9xvvoOoUi)<=4*H^qTH+<~EMyofme7i@&R3+Ln^S7mmC$#pv_^_$6d5+N5sw9Ekz zmUD_^9=@*wZANe(wWg=WL~(%Q8+k8EJ=2h$*dXt8n#+$7xVGIvsmXh^@&I9cXvXDTMy3+4tx5tt@L*^Pcr$1|DEXOw_oAu zhy`USp9`%Y6pLLd+pl>j15&T7F0I-1Z)LK_O+)L_adgelvG_Q5d1v$}EwIGx!U3Lf z(|q1NAc6WjJf$nb3&ju-wk0z_L(9f8ewvUnv2cHyVu0lays)0gk&x|LN$X=dm}6L+ zcch#w^LlAKL0jqfQwdn+2-eADLETsMal}~0(&ALKPm2}F)sO+4x8ii_AemTUklsBw z^396R=l9AGW&w^UMgzOzJXpwM_&vd#tsoi)r~;9ST2zxD-!rK{=y%`T~A6!shu+71Bue3o5J7c%2ugP=vJ7XfZy85t|)_9=P-8 zkG?S*`1V<`qVMOnuq}A%z3~0CZ)J_dOEO~Jvh-7)X^kl{K;VhutQKx*zsb%6dGw$~ zb}KZt60wgxbU+9D`IHi9;g}ZUrO>~c9Kc2Q^n~E{3ybdrGq+G{hpp!X_%QlS!R> zgci7F=;X(S3XqWkAen5KU?V1O^To3cb4xtr;MS#Yc_%tWdC}&2)ZGDr2F?PA!+&YK zN<nhYkkZmT>qX*#OysUW?TFVL$AL53VmRHaqSoshY!8SB{<+0M z&;ampsnGg9-h>vIuq-Vq(CiWarfW_I;c4={#>Wf6Yt9QG>?_Yx4uy9hA7x1R=p7^O zOKGULwTks>+^CNeXBSe1(kF6|c`B4^?L0Ej&V&Eh%wSV@Mn{KXV`BJs_1DK9r}2CS zs^QD4ZwWrDJmQZfXVD&R+&g@`-7$oiM0sDU=_*=Nn&7!qe@!U|Q5ZJYVR9k|#n|+~ z7Zbq2=ID}r3YnPTY(+n_6YW*6U#161P-3i!chJ?SMf`5Y9*c@WqN^6o_s+$H7mj&_ zi9mzQ8<*j3$eYzws~*EYJiJ}wWvP(}s@?ChTW)b{N;eUv^VG6< z!$jM2MfU{HoSVA{P4vL9iLMqo`Jwkvy&lqz0Rv$ikUqkF+^Sz}Yq%YES6*l4E4r1!AP6axsC*H9g>>hVr z$u+^7=eIv-BxErL=sDw_Ki1?MhmMcx6oUV{%dFO{tsA9>Gjxz)-E6CAZ9)8>Fit7# zct{)kKkuzYJ=0=>GKf~a*0+d-^_Sp=>eaSKl92J>+LXq`&`n3|MT|W=cZMKcWw{*R zf!KSs8q9_y<(sH~sKXo$p8DKrgS^FEXnMF5l6m1egBzbo~{Wb?p#kR?RhL^pIcVwUWzqW2~yFspp;OK?A()d}_)-lu? zMy~%YgvSDls-M4A=5$&KV^T}_Q4bsg>}ANb5|>ejMPI_? z^)1TlPya?~idjzy=neBXurl$A9Atn>tT)?S>IfTfB+l&IulX#XlzVOn|z{}1R zHNVq{1BJa2#9TkwGB+h627x$xsl;_e;tu3B6xr(XS6hHQ56zA9{{F;F)EbV&fDJsB zmxmYkVOSxCFgMi~NxcU2A#l8I*k?WSY@7(EZV?gMFOKJM1J&wr<_#|kQEyCCD*HL!OqEq<=alk;F&?+^j zWh*5=sxU8}QiYM#uTY?sTxonn7lH<3Rj|znec9Ei5nFdRhplq_;ne#XciK7nWXlh?jZkjuQ^)(l+ZRpCg0%G5 zODE&7o?8`p>sUSUX8F+(cw$arLcjgZM!lrc61*z6yp@qS!<7|EiQPWsN(6BPkrrr1 z7*X#K=36MLHf!tBuK~2bYc;*-)Dba;A z?zW@bw_kprCu~I^w!Vkl7oaw3t}|caqzL};4OMWk_Ts71hw{=lJjpKla6)Q7Qm&We z6oQlGCX10Iol#x{jGRLAg<^fJ9mNCQX@*(P5JL>{`S^VE6b6@3kPMH443IZQGKw(T zE)zpo=?*1|)w*W9tp$>gLSuj>Me>2~p_#l1M!|pi9!u`-oP3AX+;L%7p|&zi;xxPs z3JYw1?)rtr(J4Z{P?kr$1XNQ&ebt&US>DsW>p!9u+;p&I86C?Kv-#n*b;Yy$gBnZ? zPE5!@IJB)cs1fWeF%-0&mcZ9pr`HCZ{$FZu`>M5VP802prfp9pc2NpX1@RUfx%ptN zcSY%@LQ_uLU2q@Vk=9Q>rH5+;%nD@@BZ9}3;NxGVMl5)m!y_G&?vP8dY#0H{R&|3< zG=k6(@ka_w??!|w^CDm*ZvjEph1+u=CqQNC<|$?0!fANq3N6CUjaIh z%jf4?<<&jx$R8u-KM!OSR*is#Bzq+)#1ie3~kO8wUh zt|KA@_ZXAacA`Nm_vYLW_=XajlSkbu(m@keO=XYUtID=tKK%Z_zYR7_JN^zJmW|kc zNn0Cp3b9NhIerOtR+M%~B2o3FYf@Zw;OOrHm+r)}gp6C=4CH2(RG$U^paK3e>9QOW zogrN*2d(kYa+U-kEYp)An1uj_HcamZF@*#S=afD}Y^H*%twYFRodC@kFV{OB3qGDU z+gvF1r$HaI#u#}F`Jjg}427^!L`hX9=fswrktj<3i0_)Qtc3a>rbl!J=WJP~S9$9?H|7^prExVPrT%!)5){1#mb8)m(l=b|-=+92jU&1&}6 z$$>$dEb8O*di;oi``AU!w%)qX`?2`wz%^_A#~?Qa;({Ec`ho<$#LLgGlX*_&iLjx7 z2$tdSet!qEvF?39wGt2jjcT=_=Cg0gv;JfPoEymb0Y;3 zu&{;hWKB8lS+Xf!lNb75L^L+Pq|Y~gA;O6-LA{8GPr<|sx?aQtmPI_QoAP6-prA0f zm62N%wGXXR46M4F=aR&R3_5;fasAOAL3_Kr^t{2G)azA)z1H(57lh2?9SD> zxSgj_LQTKhJ`~pvux^z)y+^x9`P5NMDWM%$-or=lv0n9&7#I8 zu9oywt!odr4e3CtS3QpDIUx?9wMWxOqOA&cD)Tr($bcrss#f+0>v;~Kn@k#cHlKQA zQ#8d;UkR@9xwIvVvu^>*Q36ig%cE#;2_oOHWjgUjcfktSZmzjo4!k;_?=F-u7bGSX zUjOV2^5GcxQV98n-0VU6eB+j)HEov<)7@ErE_5Iwm}}iqhBgq{K-k*e8@)H=I+YlL zsdKN7hz&F&2-ZvKSry`rW$ zey|zro}t~mbKz;6Hc#7DbeSq99nV9|!a!l{q3b8L)-ZYgdG&D^+npQJn$J$DFveBp zZn?-5O4jVWSeCx!QsJ|?X3&JcvvDuM#o^DkUPQSOwWYatgV)6Me@izudk3(Pt|Op1 z12kZP{m=Q_vv;QsmX$R`i;wi-W@GPMN%!Uz{2*?L z4$gW-hovP*S*C(wj4#^7(e4nT*ZU4wu&(yKdyZ#S9ftvmFHAbJh$2(OW;spKXWIuK zJ#qNuRmq)?hmK80!FjY5@NwjZgV%CHyHgL}Dl+NrZ6JN^k%(G`*d>3<3@b`y<1{hAv}HL1b77?hfCar=&f(D`wBB|{kM!0(e zvh@#t4Gth*k;Kpnc8;a9O`mLeljqzW4{1l7pN*Wk^0BOHjhE)hySVmaHNe5D@1d%* zZ(=J#CW_MSzbX#wJ@Se6R|ohlro@CspR%PFZMl49Z7EwfA}IZlG@cX#F@HxFTtIN4+%v)lYf(2pWrltguyDm@7u$T?#4j%px4wz=ajbSEvve zJp;a$B?Cv$Nb}O4*iJYXVpw?jVrRsFg}8D@U5m>JpEOOz^NBZq+h5uJEbF_?%H6f? zEB9rNL^aO9&e7_)56r6{Ond4z=l{bm8ao-cAMqM&|zVO&ZAvOcRQoJCL5EuA9I|B=5eYg-7g^OwMDctwT+>d2vG? zqfVGiq1wrMSOdf@Qf5)0$>I3MVBuzw8bvKgSLYj-TA(pW!C@gt-TQsVfS%99STwPr zN868Q<-9#@k*&xoFp105&o{XsjxNKqXdmHiaYQjV1LjVoMoXbR*!SoPy{)dNI_&eTYj z$X|qA)C7z$aw(~#83A|4wlde9xPd#a%Vu8f^WJX0y1jTHbAB2FjzxXA;uvSe!}vx} zKom7->}8Ln{l|s)Q1YbBwsN|av`QYm-f&ANN@wv<{=`})kv(9gH_m@ej_ z5CN22hLmD;%i2bU6gN&bdPRI)^e(OhIumRL%q3M1N|mH!i%4C2W#Qa~8`y{8{x_ni zzdxB0AaK8;=ZoW0T4UI?W=Kaopc9A7nSGy7nfWM?E#t zk47^P{D}nI-IQSpH7$v`gE1?xtw6oj=EX&X@(Ym zUNzbom1ugD#{0k!iNX4a5CU$}(F}kZv$TsY@SHDtKnGI#ut3vlIhQ~Dm*lI%HYhd0>*zc&p1qqllgSK!9Cfq_f+;# z<%cBJipG#-z46qH%)6$KT2UtEjJ}rQNHIPv6pVD3*zn@>GFw@g4zvQ0IPPd48Jqze zQ>W^%%My2@Nnt22(dSm&B9AT6uTPTRj(mH5={)y%GT(GuxYy@d?3&&DP0QF-d+QBP zDRr#SI^Jv}(zKhC5IA=a5v1KwVSznpsbKx#eVc)1bB%Aq#o?KdQP&vi2>1zQE40G% zS(dDnc+YS?8{ZNI8n%uX=)f1i(aY`Ubnm$iOOVU1KjI@@MIb3nh=aTHejE%gi+4yd zjhpzQ+`BF{k4`ga`!`)pfVZqbpkfk}VNoiN+k*{2;^T>$#~;GdE8u@|@W20%|}!f?l{NWmv*u zO!vdH`U^0cAv4b&4oA${sEJ-6~-7V=H{ z0v23X`Y%(JQ6c?OncE$Ws%8;S%Kd$CCpcS2$2I@;*4 zvb>)*%0aucKupMRtKiT(K~B4fc-RhTNaAxt?=s!P%$# zt~D6NH2NEwX+8za07o4W0H^q4SIH8xi5 zTxyY{-Z`q?au;kvHPy`>JHJnS?sc_s0OAfOj=(dzlv^E3|JUKS{1i_7ow+^$Kue!_oVsU5I9n;>0hpXb2o zci{Is$a(HlJO^T<3vtH9H*nF@H`jB;DNilld*e$l8sFO#>l61fccGlQrrL*Lveb+3 zJ*UP1Km+&YmTQ(LTncnJz}~yz$a)jRjeGA$IT;+4BbTEWCh7hA~U`hvgd9i`^8E zot$eJ!1aI|d`$V`*hH@=TW?{R*NQ|>jYRixU?e(dAax_^_;&`UYCbXmhp{q_)y1T=!GpL=IVQJFe1j zL-nSCguK;9kx# z`GF_SSv0ln0CAsVO{PJcDh}q0YSlZ)WmIBRZ*XDxqhfj`VzBz)^dE^0VZZTJ_tRiQ zxPNUE^U__OF`FCV3OfG_7|+3oW!t8rwTv%JH@t+wkyv7R44p{%71!dG+B{8dLE)OD zfO?sr{+#O7KLGU$KqhjDbHLw+I5WX2Z0s1e`{DC(3k41aUvYgu!>C0ekgm_OrM&Us z2U_V4RaU_t+Pv($HHi;ulE}wSJfpil%X7(7>D)e8)stu3J?#U>M&`VlcKepTx& za@Q8ANZJ~%BB-blneDTI8_CJC0kQ*BC?UsCSnQMuIX-|`NnC^hcU^>96Iy4d18muX z7+rCHgo922&}oy1uJ!zJn;iOs2^^3FswTEbdn=v>d%?eIhl>00Jz%YWy4pJoQ7C~k zsyO5^wT*^1ER**;j3vehptH{}k{>D7sp=1L77-k~xaLhrjbl%MT_=x&q}6)qR!MI* z?dXk9pnyjMhLhN5w{^V{-=e4v7VT(u!ZV0VYH=2WXhtor8aiRsHnqpGsSP)5kg3*@ ztZlyl&Sh$6_8Ge<@QRR=*V&+z0o!-C5hIH>_f(V4t4_rZ0#Sg0oY|Pw&+hKmh4vvw z$MSzv(7&9us~9L#=e0#`98SR+E)>b7FTjMln8h#UNGJb$;YMR3-#<3;rFvlAHyIeoi!b$&i&rR}%rW=3^1nwh>b z>yK|Vrru)mqU-ggvu>v#{J`#?Jml#Tn z1wqDezq8kNdLa-jXzklktvx$KNdX8tllm)=I?ti7#Y;qDBGK%t^B_)ST$Vofzp{<% zGOUa#pg}$8ve3^KFbtxAo<}PoeV9M*`d6`(&(v%?DqlWAKZ=9I-W_+uTy~unsAm&DT6P`=I@{vnd_R1BK}QBW!p_&CQ+D-Pz3=1! zWOIp^dB1_Psn!&?$)P!sZF7TP^!Fpcdlzokfg+@W13i4R6KT8zttU!$H{0b1c~dUO@;ej@nwg%4Sf7 z08|LdZBV4zu@ABnIX7mla&iT4rHXGQpK$BunVtYXrrzYUD2TD>oYw`MoQaF^pN#XE z;Ri5(v0}+KCMGl&p~KDM8QJ6{T>s%RKu@K(HTlbqyX?>5?Z!|D0Qmb&@r zwzIWusPz>-Txz6}I;yr{K~5Ctl_kf(M#3uP=|%*`a@BaUG;Rbi<4YVgc!w|IMNc`; zrZmQr-*3YSw4_xAI}9SCIBOqhD5sv;38D8MLOo}0={||u&;HLzTwFJ%WW(`$;VYBc zFMXU?nf+zuYV5`NO0y%xK`haQ8U>I!{azyOKG8bIbiHjpPS|OcWh~E%(r>F3T7A*l zma4#jN0-_fC}EJNFT%6%b?{1rXF$K5qm$x`HS=jT#v=bOp~yAR9z<%+8ysDrnYwW27qQ*o_iE;JhD?j_fKiJ*5-Kb5kPiUJh+D%QX2eG(zN`bDNonuHz&mIsSzN!-V{ z+{atMF98JzXATru(ex_Zn6!Bt=;j1o{EPYT{TsKjoB{7yHNX~o$_8w@QXSjraR&jk z#`hJ@+HI}`>b(`)85IbP|4bp(CIG+3SenwcXRi^r&=i`h3&gOI=F7fGR zJw(A+KfU!NZQC=qpOAe{e@f3y{`9>MzQD;vvd_wS2YzNww#aw2xis7A-@lIMv=OCW z5`YT;ga)7tzd%#H_uFpV)7Umxa^kIG`)$p;j%>>H%;D-C)IGQA)<9xoL&!_nZ%%1F z^pDDXT7P((x$6vI#psMUo@{2B|AHOfy!=SjPgS&dxF@x(ex8rxn&il;iv3^%hw|y*VAcB7tRdH-M3&I>kWJmi_ z4wSR$!k@h%`auZkle_yQkD0Lf`wPfqT~P?(?{vRc9uo^LekB&4==d61$A z^a7&oRcLPp)Xe$zPG-3(+iV`auui$>XEKw#&+!+rkOG>$@Z)|5aSJE>hi;=rV6;5! zmQYF#>UZb{5C2d*P$Z97VoQD?qe`-*d;K@jW`btvrz$m_UEbQqbK?u$X8YX~7N|I` zF{i`t(yD^>^nkyYU0k2uUY^rlN6RU@Z#;ul1gJpe{hGzsE;+fD&F+^Db3OUF9e+wX z{1L9gMYV0I_(&obtA$(*d4Vtcy(f59t;;o%A6-u*mTBUs&#HwNL%aWgnBB~*qb_}3 zA)+%~FF8jj^+uBGT{RaY@b&v4L_*y!Anm8upT90%_sKx551`5pzx5ZNI_F_Bs#?~w ze*L!{>#rL%45^CaR216FgKlD*X?=$@6)H&LPzC9+;D(JVg8;Fajo6F`84!7=>&2V< zyzQ?^J_FicNL5DD7eM}fNKWGs@eijrF4^ac5Oo*)C^%$zan-_Zqs3*t^ugPPzz@E+ zZWNy}!}B}$6g;RhiIq%mg*aiA`F9@M#y)7<;1o1ebIE-xex~hsN?3d7J<81Yjf*Z% zujoh%er6G3NeP-F3X(SG6&|+_uMgPYKZIN38oayTDnj~a1#?9?*6;H`ez}=jtU};u zg+k#BSrA^t2KF}S?5+F8xj)$y@Ljwv~8 zFi(nhW%w3SX8KXahZN_dfMA&gT}Kuh`2=@LaD?Mn{AHW_4(^RXHMb%|9@4|*0rCBr zsE2mp+dCP4B9$r^Nia7Y+oAvwXe7f>D0>QupY@he-8hV;pZ}m>zo5zHynR|S!*`b! z{sD$QA%m56F0ZawU>IHiI0F~1zN(s86}dX0-QD1vcy%OXvUp*2b;jA2-(y9$W+@vD zrmz&3wDe?5>k615vBdWbQS1s8MMbr#2`^jfa`5X0nWb!A>ypFWVzg<3V3ot!rrI{p zP}y5t;(Fn8aY;>s=Eb>-wS{M$FBb+8imi%;I`rMXSuWZM^3^Nhiq+vOk8_ zN(N>r%fIygVY=-IsLOR4N@l_nw9T`$V{GUhBg$&|8Apn$DwPN()pDF!uWEVZLPam0 z$r5umW=x{0W9#Ez8*F*mha|*BbEU`J4Hn;)H^r7d+t9>2pqpqhJ5MRZF?#AeLtDA_ zt*zWQ6FQ?G`tz+lQ!=H_H;oKyb2Btm2pwCSdv80Q%NqPNd}zma_ZIt7A`Q>-aZWg6 z|50PD=iUGyi2n16Q)K^XrFHo)KUEq%|NJokL0ZKIFg*EWE|T>Ns|&`McXJBveV!dH zV}wk=Xtcd^0I3h=z5GdN&Uxf4J1)Ye^TzgGjV6-_h zwQWCj-U)RsqwABS<<;&AXM2fx#4R~C3o5o%h?<@r|LEIX6%%f?tun@@XXyLp@Wz&! ze8uE*L)Ro4h@n;iuZ+1>*u-*6CZ+T>#z*fQ;nRJ^-}1 zwD0KYnK;e5d}5k-W`}M9nVxwmcU73RX|_DjQ=VSt8l(k!YdAu?^s-RwuH|>$!Uq_F zrOv*Io36KQrv#7bRtB!f^V+-_Q6AyOKaQd(l^PquK`v9>rB7sli&#Yh#L>Jkq&=X# z9b9N+w(brJqTCh~zv!i16FRWuoAAOQ>qG4m+5S@!YTWlehC@+{*gPt>P#M6?RO?-3 zqa4WTg?ybz0tN;dvRce?>YksqGpDCkb2uSI!#{@A>90fouKLWg~5AS~0qo%#cElH$*JQ;}`x0Rr)u&GOU;PdfB}`;h-983b;!1a19g~=+_G|%Y*t!%YnQW9qIQ_jI0UpB%T)@&p;AD94nSMP0{FY^AxhM6 z7lzNlw9sxcD#uF$kUUIpsB^ZZN@MnyCM88uO7z%)iI|ok#NB8zQ4p>MV9HBjaHZ8e zxO1F@w18FqgaSg`+`FQ-4`QQyY{lV4PthX{$|*Vhyt)h|?0TMS$%hN`Z+}8u#s|4v zIcDV|j6C&{S#7#zF%v24eBbNVv?=n1k&)}@OvuI={ok_;kCRYq_ouoyP^bEl^`@2B zwW>e{^AnQ%a}(y2D~J*7lKhvhLN1ffI@~BEtRph@o5u3IyT=tw+L?2dC{DZwWe)Z8 zJLuGR2SrpO*UOn$#F$X%AuLE@H_@l8gV2S?8!n~aOk3tNm!Q5LwR08LL(J}Q*zKOn zi5wujK2N>_b&xhs_!YpQ_`r(QFd5box(_g&+JxQkeThkXFvY2bGVdN4Ka;qyv^L#n zuq4F_sy)R)^0j_rA9}E^>Xf&>%bKidR;-muwO*;Ds`A16W&_H_8?m=fxu!JYy!O8h zD=1jB;=b`Z^T9T02{OJoV3vUkTG>Bd-XOK%TlpA|amVzm3S;z|hi1X5)wz;NjPYeLZAF?p)hsw1BjI7ogbx{0D zp7n9HLzsTi4ri|3N;_a2{cs_sN_BgQZ_xo$_%BD3dnYS@^#3wGkBs-c?V9V-~r(FWU(<@-zc_`%3kcCavF)n;gmOenDBNTtqt=p zhw_s{n1#vgWc2aU)QsGXB1#@71mPoeS=C7Yq5_WRc9mK~NeV;qjA5mkl%&*{0w7HR z9n7t96w=L9h-1mVx|n;$tmL>ITYl=$zpBBcG@Pk!Nzw)|-ql+Q$yXYU!&Fe)sbwj% z&#z#StwNIwu2Iy?k|>Z?D=>n&#=;pR=!nTit|;={-<)!jYx72wc)he4Q`QCI`}0Ug zqzTz!VYqN%wo@Cj)~K7!;9k;3 z#;3yn!sJ6vEr}W+q9^>z36~16iqfUo+>aFK& zO-@+Zt+I`Kcbf9a&wH1C#TmOYAy!j@Y49MDt@NEgnPjT{v&F`b%UKr{v!5BmP^0eZ z66Uq~HtI#ad>CmiL~?}eQRK4Ss-;liMx`~BBQ!XC`%F{3DKj21CQXB1y5=*mI^#3a zO3xKoBVw}!Sa1m;53jh#u7c(yfqN8yqhf^Fy&j!3r4X^TU05vr=JTglwJW?wwDl4;NXArN_A~UnH0=2SZ zi_*-_1+{WpD=aHJ7wxiRTWhWDT0g(}{{Dr37{NJS@7MczT8DWGBwZ+{EUBuHnA*qb zW}z!sV=JxYe_J0IvzFrs=N5Dr86M+I`mfrwN?_Bdjo&|PF2Y>0OkG8wO{x5RU8~vE z`Ycw=nt0WV$TJ=pDvIpbx1Z{5vQ=)Ex@UCAx64OdmwJWn(O(`R9qF`i|4~o9fU;=@ zwp!1UiETCt93N#nuEkmXfK?%Am8dsM5EI@%u|>R?>om*}f);@o@%gBOhp1IZbTw)o zSufc%02HBI?rp0B2w@koed`rXsD@)u=#B~&$$iv!6|>3& ztdhe~KyIm?iD+rWoS9)6>9N@|da#}%lamQb*o-1OyxZN)cWBpq0)5Q9V4 zs^KbW=0zYkUq0t{=cctQEJnKFd?W7z2D7TE#Kylm0cude4SAFfIrpcW%FuGVOl-Gd z<`+HT{iXuL=2!H*B{=PioZBVm-p~?}g`=I8G=j5d51sHknsc$vyiJN*~<(QYqN50=*ISb`)djt+SGL7Tt-~b^ZwTbZ zk2jPFPd|ZcOOc_ZPEnR?5Wnr5NlJ0wOkumz3y|8i3{A?wm}ainTSzcaXB!> z?NiQ*VRbmK<)`MuKgwG6Wl z{$D4;)+I4cU>+g3mlnTg3Z!1eMi_L`Nl>Ky~LCkt04p|@Pc|cA1l7y0Ow$E z5lYd!QDRW)wNLbM6T4}cS7Z=8#2qi;j)TM8vBM61V*mZa+$OEV2c*d*o7-vNW@rVM z^tNsS#Y!>r$v1YlMsZ%lW@J;2t+((c8jZcoc_T|UXsSe06>hlo6)f_YmZt+E?w73< zBk*zr?(ZtuSG>rc1&KIn!Rmb@xgS9Nxxz@eL)UGTN!U+Y7BJU(wSm_9dDR< zR`dMMIJWBC5wjJr3dJo$wY#PbTl(Xcq5(!7NnoQ2Bxg*R`O_IAk^?fH*WRK5~Q&fA3 zBQC-yppBh#G7O6ro8miwg8^=}_Wu%WBNl!ND5oB#5iIwp%w!4e;L zv@JGGWZn{PvK&wcE<5T6AKPC_dp4(FQz!N7Z3`EJZ8Jg-%(k3{^Rghz`34(QKu^Hv zh}M>2Al1y>vjW|-eJ14BRUw>JpcxIe zxncsIy}1#|gvYJVt(-ReJ%}Ll688oC36PIz9UqCU=ZWP%W}ky@9X$DM(va5isE=e#E|)8u+(;-%|Um-Qn` zw~H3s{qt1Co%oumwC`!u27viReySHJDAZbp4qMBfT9*#n6ylaA^l$^lY0NGT`Oz3B z@BZZ>MIonN*hQnvYvjD$Jp-dI2S`5c=`7p|(BACOE3yy8$7`rja%utq_nw6lFk0{R zN1QHd>~7jC($HDQCl{@$q79S;>-KXRY68lhz5)JWo!+UM|$X+(ft*N%H#Fub4;G6I(Q#eWCT!_fSV{!pigo zn;PgOf#E6!okVx}8LXS0y_q)5YQ>;>?2%BQT!B1Tp?R=r6J_Im=BS){1xs6p#Gi>g zSuLN|cbMewyF7j4BYZt25}&aEu;~46BI! zH;?r{IQC=vxzEv36SWQDPUbci(4gFyZ2T7dv&z9uVY$7UEabAStHxo-0kf%)3Vkf zM|#gD{cAEG`R$$Izi7Xvu22UlWTF7gMnRZMevJ+lcGp@sul436OMb8BMz}2x5p=^b zmJ#tK-a7_VG;Rd)MI=7$GJ_Kln$jyCFyR(duU_5CT-NcZcEzyWU652|F15#KV0T%H z*D-m>CI&M@l%00;CFqhc&< z&%Ee(>l$9svH50MrJqgfEU3IOyt)rGXqEPji%IG+-}Q z&gCq_r50KBatVKCAkD-tRgaZViYU^~m|4DYpvQG#4ZDZNda9EqooL#zb=C$41pt#V zBakdz&sI`XHQ>EeLq|{U_PO(S#iwTfIR%C-IiIcei@v_w0xLT;i%>3MxwmA%(}CRx%qoTK0YjH-cjLxAc2iPT6}c>m%U5F z#@Y539jEV?Bb{uCa)KPYan%iX2ZabDJ2V1)o+@PcnrEk4A*E~U#}an1mqE571oX{c^~HZ^Z^--&j$Z6V|AsA{jMcH8;a*B-)~~Ci~Y( z?r?J@FFUWT+$meWvXU23@-dqJY(x`fPT1vi8;9qQj=LeXG6$@csn!wEJQi+b@t)V)xgPrN_?mqO@VXyxt;l*V;-K zyiqZMn;(&_t|BkUMF7fL+3I-99CglC-rZdyNSC2lxnySW6JyyrkHl#tx=Sw`Hv@uE z>?FBfLG_&9Cof|&>P7hJ;=hNZ=?fZX!aUD4gVJ@b`atOMzTLolI?|7&jJnpCIB#1I z9QfDVFOB`uY$}G4C!c8jYPl7$V&+oG>txTZj`fXLebl`loi+W(ICQ#~dqipNEmpez zAv#ramKnTztbkdcqLdq)cBZY~e`Ednhpr2-O}6~(D*EY0i<*1D!$pCu$et#2JZtYj zOUS?x0SX3fd~10yrqE(zk5iTUypudz=Ci~89d869vGpJ}Sx@@{RHH9Ulxb}kCqm@RiZ$=D%q|Bw`Pum?U=I{D zXHz6k#1ua@PH|Ekv`~4i$Q=YWMG{_jVn5tYuP+H4Yd9BWf3pE?viguGTju%boe7}g|QrU z$YIW6tIQ85w!{_zkV^(BXg5+7+a<92-6Psbwj1$LWK+Ycs@J_uKlgXj2g;Xf47y_( zoKrr`UZ*qAmW#a0QcRH3!+lkOrUr`%v{OU-^?_~nj47OlDs65@94kvSX`qpZxNI#AJ7p z9eIDKX(i$d>xSPXP5+%DBT*7Cj&iL{!1Sq?fu!$`|KuEal=j*8uUYZAQ|`)J$^?UM zSFW76>G1Zr%52DBr43fuR_%m2E@D=h9JViJGg8?(^Z>+Qeo1yxJRlN3JCSX1Emo!n z&3h~gy9HyYieLNzww50)K2z46HDGvE6$F2Z(Iv&ix6GHxG2A`f64Gcm9+ zW3?dp&TGw*Y0M~7j!pR}3fFC{Z!u2I&RiU=$Q}^A^t|1gG!Sq7VbhA)R_knj^8Rtv zE!r9p>p5{Vn}5(I{AlWxuDP4azA^XQDpnz1^_1bnMgRFjs!1LGMzGjSyTaD~&{dnM zsrzcp=Y8rpdRCyyK&$vChxWxNhluQX;p?h-u^yH?TGEwbi$&3Rz2 zu3SO1ih;FX0k?!^_PjpajC<#u$IBTHUx0r_K7T%on{hERw`SJc7=%~@yT9M)_rnal zp|;TQn8W#G-zL0@>#_dt8GekN`<{F-k)$_L^ACvJuDx`dGi=R&T<-k8j}~hX35~$B zW*)YpLBJ!N6rnahSeErmkXX4^LkYgL! z%QM?IOp)^=Fe&l6)LaCY%jpZ`&{Wx+Ewe#8qvNLnBJm}Cz77S;Noy^$eGHrgBPSaz z&ep@OW8{0i5QagwRNQhl#RXeZ;}Pa6fdv@3T1{K~AjTCD#rTQfT<0y+tiLP5BUY8n z>RfbGfzQ@6PAMqI6qL18^OqPcSVZ4EwsaW|?s?5xr;j5LQDId!MfymefP%lA1Ga*2 z+qu#cy90i-tVDHzGs^<8I}55CW;&nSmP}hn|Fq#ONUlT(q8Ch{BMA!GJ2gQegXf^k z0wT=QKm{eVFH|R0x7YXGp+{DGowy(JTr9MfO#rj51K}#De2kW$mPE;A&p(3xMv0pS zE;n*372HZAIhaiw<0M#plq9GH5h!&bO3DO~C?i~&J59tv_N~rnUIm$kLyop>VYjV2 z&EHDS#ou3BWH^M+U7Io&+$9*wqqi_Rr3A~7d0wNeG7{Eq8iCA9;evrx%xpb*$Y8VB z6t~ii8qMCVZLMwX;0LK~$N!lhsZgOt(vN_km;BiYjVLn^ejg0qGZa>O+K=%UCbz@O zMUpr*xy{79Via}B2{K^Gxsy1*qCkAQm4Ho3Sij4bAMrVTh3x#I*$O-jqs$iZEHcczg6Y1oG%iMBz9hFI zW*c<2A%(TX&?8A+D;Jiw1iBsC?Y8dd?$4GBPM(_%MisUVEU4UrCO)6}aTj!N2NY`5 zhU%bD(+0dwf=tG`se=&a*LKZALJe3rz zgC-WF2?vgnixt!jCTe^-2CY3a>(`Q9AZu#i2>!^u_MWCeA^8f?qgRajFxF z5|M_qw+JTAf}iI703%r)Dvo7cM;{5Y=RjKh^TTaGd79G z40k$vXe$EVR%cza=3;n<2QHLVJxjpPs=V$c&HJwHu$YoR7+QcDMp-Wi2y z`zW(a{>OenAt}<%VLre|b~-u6$eNwFTS19kYL{b-V$|CLhngpiP9x=SxYk3PziW z-lQNGsOfv;lmjTUMGeOTWJ)LeA>OO38{&AzJU4hn0a6O5eukbLUQD3Dpv`RB(ud^D zCh7*!Nf8HJvjdXKsLQ0(5Dl2bKDQd9PLg#^lffGhILkzJRzthTs9_pMXOyZELB$9y zOhhf$6t7mmt|+_&CbEIFCs8Ir)*}eIG7*i^ zKn-7?_~}#2aO<>{(o^d#g=d}v=gvsU3JBK|<3iGE_Qh!kEJYx)8X^c-{wRE{VX3)h zpA}|ft~<|WLs7DeYygyqAOt-bc?mL?ffB?jj15KxlGtP5>p;=0EI_gncs&T+k~70Z zr|l2rjqEu+VaUw1YGF*;aBcAkhOkOR2_P>NQ8IMY+%fV(jGU>UX39ubMjjm0en&$+ z(BT^KuHpVy*T~E(i&2=q1QzQw)6ru^b-Rc8THQd$KgYMp?3?XF&z@%J&v-RoIT919 zCtuDXX$Y3DkXtG(k$!0xAbEl7Tx2^r|P`fCX|K=h;a14qbD&3=9DeF6S&k%S84syP(1-$K( zCm+9)4L!>~n}ks-_0VoExkX2BGBKo2^>aiHrE2kEAhG`%vNmkWaBFd?<-JUX&<}y5 z<+mP&lW@gtifqTL7l4HfxY+*mGH)E`vK?81J)`@h5CGd0uSwddr(^etCPVpM?>M85R}ntfH9{59}^!|~J~ z9aGi6Io`JpAls zC}U6|pU-~w-Cg)9ahB=V&}r(m0!h)MjeqjT0B2yhcnqMhALQ0PRf^!lCThqS@YgaE zW%CZM8kkYKW8bZFj9XH!#?jq)LZ7Ak^e79sw(-rr1tvNI?s0m+H3Ax6&};i}vFRq8@J(+X zc9Ao0>4srVeF-Cc>;5A_cfeE;>FV5I_NT*1siD8Cy(TLPFNg4ZLo$}!pz1yiMg9XX-5&W1q~(cppy zYyNV9LQHTeN)15Z9c+4?oEoaOCy4Mbn<@c0U^hmq(?H8GYMqSkFMng)*v~Y7EB?jA z`0x*5C>VVLJh?5^Etl=mP>)CEaL3;2l1PV4th13KKYuHz%<6gE=PRp4voYpuIoTtU zX}g+2oQx+)Fxr5&mzsVn+@-tRwTA0`3pua(D(+#Y+=N4hyRXJ^rY1?YS^a*kb;r)4 znpXVGovJ?q#e*=NL~J@tLN~v7vctKOeK2zU*C%*Oq0AvfF_aPN{HDespf#!7%ewNe z>wF=3UfpY6gU1o!vOXQEG!yTvA7$sor?ke+FYWh8)-MNX6C73)-_K`T-)ow4?2Lq%&Zecz=hXLs=J!l{d^?+VbTW@TGJ!uB`As zL_*FlN8UGxK&9#_5?^0;?Il2?A@yaWMy;GS3h$R%d|dQ^AJ}zE_o#fS&14G!yxAjL zq;e^!ZaO{UUp?EiV1dWiud?B5+}JL}`djeOi~sGA#7kY=DQVTUfkT+ktbre;7E|uY zoDb8>Xm2IWbtyO8&9oQdJ&&|qU{D<;HC|N923?tzq`}y;C!#r6Rc|KLpi2IS#us$+ zCecfE+~GZW7QR9c6LygSm~alO9alM&Oqe9>xK9~arJyACRYTr5B39=!223E327ORc z^;r_NfOIskf|Rft1*!99{@U@tp)WP*M&44CM8SGk-05pqn?Sm7$VW*NAuo8(>oQM5 zZyKE2aJ>HbHtWEQqm->)*W~59{WL#J(CdcBWwz;z;T+iAE#!r{nMaAC+#($jS`|16 zy6HZVr6bkqF1d7D-B1Zpo&ob(%nHB+wuH8N)HA;IOyU;LLq)V?uN6n@K&q|YCz)zn zVr;38tuImTP3@(q)r^@YuOW9laPq_M{e%7N*%|LTZ1lW;_c9PmlAIMvE<78Ald@x11_x| zGT?ufKK2QLSbD=&4*FGp=T&2_uZoi5T7`IJh%2qr4_L0*k~*Kg{Z8t-(cubP)TpoE zjCu(uRvvdST1t~ZRqY-uOrN_mzV7*S5?It?u7780sWp64JXcJ6B)e3**zc0)=(3jY zV~>dG*a=l~LgxstrX=klN~j-3aoE+l3#U26 zcQsb~#_EZttcU;(iB}o%D97SH=RM)?3$Iz3z5jJ5PDR*g1p7RR+c?^^RHvK0T}J@l!QfLjpY%N=t`HMTI@l zW7(YLRb|7h2bAmNtCSFavsndUj!#}h*_GRzcH>@1@vw$o8C*{H)q_&Ckdto0{%4MX z4(t&Q!Ikz*$_`CdD_Iev;`xd(T3nTwK9>(646v$(O@i>k3MVHHUGzr4J%TC66T~Fa z*y2D&hF)uB8xVQ+T2e^n!kg*S_G;-{{CD>sfXqcGt4a?~tPX^1&k$?mO(=YH=HA15 zrJgTka%84`>|yUiO)THU-AB6R)~nHp$>=Kh!-AUknoQSF!nxxwq`1qNy?C>1j2=yI%R6j+&`DVir0L;;K=q z?-mu^n+;mJ3^VdQdWg@UvQ*2>i}?~^5{Hq@YG&nCk$6`Wl@2wiMTc`4pR$9wIIYEQ zTz0)th@U}6^;S25J@f+#W@?Q-u(tJXLSky_gWiNq0WG%1k-Tuj+mp!)Oy zP1ohCpQo=z#t~=Hjlrh3kSAxBZ!> z2YA6^ULRVKGS%8FKlqUKExFOsLB9B=063Ia?aA)DlX8T6+jkcwJuBHo?~5F#KIB18 ztw9zu6%zZQC3X3GqfTyMPBaDmH#Ucrs^rvbdYuDV88Php?(YM0LVXILt0#9JNNhU6 zGP=$!ePv0V?@8vyW3Yb>v8Rt-b&E966Z3@hWPa3ppwm`j9 zK219ych)e;8z@8IY-(rIh?VzManq-?RLUh*&~dRDy&tJSX-cdAE2|TYA}1DmN}N4LHkXqk#`FNFM>!L9 zN2qZoQ|(=Q%=`(Pu_Q2;wsOhMZ{t=^gBLEC_2&6bQhF}09oZPvGHqdxyAGo#m_Qpn z?s*C$^nU`zQHMoZi~mZO>^21>*!ZfXd%*mudh(U&-;Tyk%(gkvq_>I#{U&#jcrO4h zH=Bevy-7$*EqRSyJS=oD(B|%I=Kr1%`t9b5iI48mpYB`Yxo^%#Z76;BHp#p5er3dS zqBX~M6ZLo(uKcoAR_Ww%Dght|@ZtJul{i=>^?>L2@ks~}oIO0X-PW?_b+=G0CEVV8-ajDm9-l#Pv(_?L%3SZsx59_fIWSr~Yk3S15uwv9{< zh|UR!0E3+d4b3Dw2vtUj${tJ0lT8?RVhG##)`82${i?brdzAUJ!Iv2tvfY&vBm3y%_mN2IAA=wa|WKuo& z=h|cwJda;4?kdkO{1x?p{r*20(RZj!Xi|>cDMLq<;YMgmOI>~+N`Wvumv(nQU*jSkh$Jxn3p+*u7fs`eH;+;`G*+%3>RZjHu4Gq= z!-X$Bz1O5fH3T*<+{f5(BXrOE3)#+~)2LF4zyw1gc@$34!~Z;i=Z(U%@@7b5r>5{# zaccNm{q6V+R;sT2N0e=TY(;)m#o{VTt-NAqGo^1S$I>NUENTo+211h?H_dF_yQB36 zV5L9t$CZwgLz-3!0{qcsQ+6DnM^fafebl4aC2Cj6Es;nu_X8r;4LWCeqhH526z z!XW>l2vZBjFwWSNwwD4(Zw~ek|1)FY1Tygc`rTIi)mFT*1s^R_y{VRlJUJhl78L#h znyynR6qW0B)J-Pp<{_%T&)rR?O3`1`jUKnQ22zDN?nGeFeI{w;%HLEMHQA-aP-`nKi6aIM3;BCQM)o=Y{@<8uxi}zCAxBxM;#E9K3(gL{@%5QHacE|mLlU&_uzy}%*zEA-^a%?-4>0YP^$>kPkpmCrf|_R)|k zdPperyH=XfU}f_yvpIZIRBXR@ZvbW%jQe|}&GAT^N1Lm#BHvv}St25*&@S~SUvj0v zYYouV0MJ$hdLZS69_1;(e&W0uE+SintC}y7mo%z4dLU+Q1p5xgTQ6W2%*X#>1-AJf)3d=a|fll%4Qx=a><8-(^AGi0w z*Q+oeq4lV^`ye(^4OVv`EE3HPid<5v6!Q^v1vL2uqI27}TbaL<6A z%P1ZSL0Xya(FrJ|OE9DF=K4M=;YsoHxqHts1AkiS7))CF4KKG=3QJRkIdRq-+-IC{ z2)mr&Mr7c6j62poNlJbZbTyeq_6s8)EI*psc*9r0@*CsFd6c7#Wnln}8X-~Rl{^GK z6YRUInQ}bbU|CcVVT7g|%Z?2~5kt@vXE?S*^}QDUxyU9?v2~X}`SZJl6D;{eo+s(U ztcKG4){@lHV zKbh1MXr2GSMm{H}c>=fAftqEM^=8$fZ2G~A(oIk2#G{pW$wLnuM+E5^f+0{9p;VoH z>gaJq;I)I3>h$l{4A;6XU+&^2YeR`;d1`QU8oN9hAf^lDnS^f$z1QqbmJo#6V6ud- z8Z|78D6z^?P_h73tlU%FhnaO@tS-#mbXyi%ak`bVSW%HLr{u@xWnxF6nCIgWa-jB7 z{?~<{Q>VWUa!w8QM9;uPe=~*+quiIZ95~p`|BF8~&sgp)B8G;K?E$<`)gK-NqPl^} zMwNH0-L1iXKZk5f^P4N9vj?n1cyS7zmtvzx;bnl1>N_Qyju*T=|J%wk{lhskGAc?Y zzCIXS_Vm7MPJp#?61zaaRuU)>L_PoES@7{ywfRS$bLbAQwapF+KQ|qe$tPV8BhN=5 z7frL@6>{UJg_a4qk}7yv7bOl*xvF=}psCQViY4lbd3|M3y7GL}iL+~T0>CmRwqlk@ zEYS=`XrM5l%zae(X1>yOG|K}cZK_b7Dks?+3r_r{e|`fuYs@aVkF#_*`wU2zjPayKwNa@fTZu7B> z-mCQj3m-56dRTK`ttlnm52rt z1m#o=4w+g$Lqz5jKbv&AdzL>C_;71WH~MqWq#Gpwm@yN?V&*v3O3&B4*tV0wKllmU z+w66WQ8@djC%9z8<-|TXz$D}<2KZf|ofilNq`<@3&pFN0{Rg?@ZrTx8BV8WB8k2sDMC!ZQ~HYcs_ zeAJz^#(rq5|Gn&_&bb}c8fvC)&j_iEdvdBo`myF)vsJst2&5nTDhtXvAx z+c4`!fL!(OI_frh74XkU*JKS@Gg$7z#w~SG+~6wW?oc*Hp4BJ`Fk!T>-La5##WH&a zd)&AYoD#V7MlEh-Ea^N9+R4YG{snpRT+#|?zci>@;PBzDHbzESZ>&t*{!PFq3HVA! z-TL2vHNWY;t)?(ERd>yPW53Q~9R7-zxRJs#3h%8dOO?adnTwJmRXhE~xq;=W>@UCA zfexc2Ctz^VBn`(%Fsev#hzbzZ75xT^bt?HWBBYC;w;i>Qs$~T29K9f3TX4|pdi(z0 zw-T<(2?rx)Yf^^sm6h6qpg;akX+(m8Jcsr$AeN*G#a&RPAD_HXH*i=&YAFYd;{$!4 z)3;a@-}&w9jcD*}WHB$e*kkrPeo1!cN&K-dIS1$M++Eh|*>o^dHR)s}l{l)y-}cx{ z?E7u~tUtf)JN510*!p4$BlR5bwTsZDVSdJTm9(qbRZh@c-rnlMT!S4PtCYIf=vYkU zir`bcaGxO%wT8I>NZh@gc2~w>s{S!Eauq(Kui)N~RUW^^U<%?o2++9vz(NzT*iJWd zh*z*V$|#)c8&i)a=c?j1#8@#}k8pzu${FFoYypYlm*1Zi*Zuph;|s1#l-#@nFR}Wy z_)78o^C=ZIbmk-vnL-T0TUtEoB(wJl7c*=;PBfz*NzpaOygN1@66aps%FoHvVQI(G zc1~S3)VKbR>xuS}Yu=6xKX2n)*zofUXzpDi$~H(V#@Nb?)9T(@>%aNJyfY&XJAMA9 zo#|P%&GEqWzjcD%)toSOl8;GkNOW>_s8g6UQkiFrlE1$>|Ay63*O{fJj8z43NNi<^ z$D4uN?KVQS3unM&QdV%4UOs*@VWpWF-w~0^jbgCYjv13i<23iy+W5?C{i+uSNNn81 z-2+mK4vB>@O@_kMM&z2Uc<%7;-W^{@wu;P8wr`u#mL-Uuu+2xZ)6!)jYpiD{C%#lN zJ@d+~(*tW_Ax2=UM!3cAl96QQ9e##qwfwvx#<8et$L{$X$F&dqM+g6zlRZNi2XnT^ zU6bZ*5BZ&MA&zy6VyC)w_T*{)!g>m3(6W2X6NXk+dD;}|aC&l7_BAA5%_%ou9#z-x zc{pM9R?kB!PHb#B`wU?gKu4~+lp_}^x#j2?s>`}jxx^x;s<};jh%MG{;3kOvgsj6c z41nS^n3S2U7?7IF(l{}E+o&BrJLE{Ui@#ibJupgb1Cl3ai{8u>&o|KB+kSpx`g9bJ z#L1>CN>-i{ri>YqtyERZ(vStSbJG1Xu1<_Fe7DWy)%c%5`08J@_vo`KDs&B!7bcZs`4c5baYK?R?)w~$$GN2-93t*O zC5Zv8vHooc4zHr!Xk~wbUDQ^q`kcavmVH1Me^sn-QBB=UpYv%wx11+g$&<>k;V{gR!ja&j*B)hXS6AgIGDExHwank-ai@PMnyAsSXOz6^byNoCD(NYW19Y$biaWo%x7E}|wFH6 z?wBFKs)}bIN20jppOg-DGpyA7<{m*;Y(Tv2UYBNOa97sz+b)hM!O@cp3aXrD_KB#@ zt2`q99I$c4c1SFKy|8%0D^HBGOD%Ts(n9VD&pAww$I)kmc!#UOb&4PK;aPq~*L;EH zBK85AvBHWIgnB*_W|_WJ{K{GLN7UBLUw;$|^_MG@&x+WqF?K?cKddBXk3}=zpr9hPDg~M>5TMm1#N15*`eCxbCh#NH- zfY-{ZSE4Pp33|)GaW$UwQp5G@ij@+~;2IGjZBf(6pDLA=;c}Xh4F@=_1XF8zC>;KF zsm>LrWtrB_EunvK%c$@tO0>}-0K-cLw#xL<*>CJ=ZT`YT;q6{7-_sda&RFy6%U609 z;?HwT&?HScyPrQ}(yAAfl6%@2k#NeSa2pGwwco;Glk&F)*v>taBTqe%^R8<=w1OkB zP^v4q&0}Gr&KAe_>@q8V@RCQBfV(6sH=rnaAanE$uQPZ-fN05<{KgU95!p^eGT~RH zRSP&(sLICoG!)Z$gS=qK(t_(zo!5r~M3@jbgsAChrkz|3265$6sTr=n{Jj$MbCl5v zMG!S<(KQ;ZOrR%}lbnepC~3!5e^KBGgVkX&@sw`iafo!aXiKL$r*$THiH)hB7 zHrt#)$bO&j1-CyZ@^q{0*6Vj{8$23v>-^5DkM&0!s^qE-5>DU@xzONs+;!-zdHar! z5=5beE!AS13$df&#hh}NM$k&r*NyD;tIe+;$!#4<^!yTa{Vb6sp1qgqAAcIMC7QgU zXg9ZS40asXz)7NRs<3;IYSh}|I@IaC=(?FlBIT~W41z@ylYq7xAd4T%_DaUvgIvfA z8~}3k9kev1HLXe?HMmc`i=~GTb&-S2Ib?s9&`MiHS`&|gF;|d1*AiY)?bJR;WB>6G z)=jQH(XHfZiG%CXjT9m+*9P=dzr9Za-y5aHUp&ks-$OaOJ|;Q)bc%{K&~L4Mx9$H% z+gk=s6qI;B99$!?=roeJ6u&LWCvt77ksjYunn&I*1l(TdXv<=tB1J8+SV!w|B9{I| zDgjG5)Z;jg&{J7rz*jO0ZTX!Ohv<5XtFm_J05_h20wf|rHT$Wd#>PTy!s_(}5bR&# z`EuSY4zUq!OrS|!Pde8fuJY}dEIU)R&wt6l$dziI*rmm`sBhT(^_`wcp9~hYiqko{ zS^RKgWA(`&+}$OYa&Hd-YjYT8+iOkah$kalp{U2h-%zb-%C>6K_ntY!rleS{$W!(8 zcp_SeTZF-`dIFy+SNarz)L@#*=LG;zi{Fw$t5lPzL~zg|C$SG=W=_e~+$=r4z^R+e z5CLYQ7J{?(J4$H|1Xu>!NeaEUbQznHhB_0{C8q@NW703~q`>W1qsRmoywK9!3MB#X zN}Mrq0-;=wdYtN?uymQYWJPJ&;lR%qnpqaNdVIR`pXa^k;(f))o8^?iXP@3*=*s52 zD_R*o4(2XpzOwkoySqWSRWy|S_yuW}+Z7`js*_i&{sf#3`(h2Su-HI{3Ov3bLt4x6X zXf~}QVjs%%GAMP$#hA>dbDnX1JX4Zz3s#ApK$uZv3sM zWed&}Dp!_EUIj^=*b@Ku6v2?ha|k!90q9Tcee=o!Uw{ZA5sZqFag1J%S^yH^IKV@c zaa9;rGRM4s){$V)W(cJd*K-Ge%>xG5C@iXrcfjKLs4hxRYlH31#JBANi2p*6_5`fQ z$R;gC7iB@~V-@SEW2m{BXnx_IE@gIq#e9@Oo@`Kd@6R+0r7fOWu_-p|>gCUH}BTaIqA!|Of!ceo^!`4oecQ7wI_+lLSdRN;Iz35k`2 z3`qoPalC7{_s@h7lZ5U)=}6rEJ5Thqp%$J3rxQ$A;`oOF-q>@qtZtieY|=*1oFLkn z^iEdYs}dv@M%-s3$V=$9DF!FU3W%VW$CmiiqXcx3U{D2=7@W|asWkv4PtWZVPx2N{ z&3ooUOdntDDm!ZP2-ByHx^#}_b@t9)u;^;}S>x2%sk(3axmAsicECm_ z51HWuLV1?SrqE=no}j<~sW&e^l+S-$|Lv>TrLoM1jXSDHwwt=$iX?7< zTdi2#K2HXOQ+tGOs~z_cMuISJQMod^k~{BZXy2mW^~f>7dD0 zl8~YvdRx!PBBFXKHr5zihTMepWg#^vS*K#3uv+Sx}M=CGtlaI~!kAk)vuX;t=claoU!OF{nJ>Kaig2?JHwm)wNv8vTySp3pr zA|tMGoKMYZn7hs12_=Q-ToLwm-zJG|_%nC4_i$2nAg_&j!px`nchzGrHTYaxBpF8hr(Px;ibS9H~3T8&r`o-Lq()>++7^}wGG zF;<5jCIDwNr3-2x73!6)8SCN!#!t@?+S>~3^|Qig#*1A{Re8nPs!B&!@ILlvk0el} z$s99g)my5^43mN-u9}{3ll!x8prd?|#SnjYk!#n(p{_NHCXP)A3#;fh1kBgZ;8gQ` z{uXhC#DH`exU_C0T)e!hW`DgTAWCVE#{YQQ^Kpz|xv{>2 zoH2BDz231Hr}r6%S8T>wcu(-oQUi}|uz7+Pc>E6b>KSr9MN#Y6ucz=W8K|D-dROBX ztz9yrG2iwe+w$RT%SSh9W_1@-a_eUb04s~%3 zG*tcGwfO#>&}AYokCC})7AI=?stHK|OX4ExmYabqy9|hl9>gX+$Q%4P%4^sm@f_@4 z_E(IH8Z#?0a7VB6XsPrXls4FccErPlX#6w1#aZL-COv?;S%gPE{ zZJUl}E~SOqVmp?VHPu*IQ%&Q%=l#Mbu8WHU2cGAC?)Kxm^ci7@g!A0U>&XOY!V zw~+LGjF<+)qb~^|@Yyyf!U{&8&}uBABMta;VY}I4PxDR)-Z#ySAw%m6H}*wl*yX0g zEsLvKy~Jcq;~Q}Jy`87hWQs^%`|dWr#4c9shhUcszi7SWa;?F6xlLz=b3qA}TaWSb#)h$b?OUk>9P(G9l3T6hO&ee^4gR_c zFkxXx@A>vV08qB9d(TPm2>>;pgifhjm&o5L$x?8S2fOO7e=5%VO~ZNwYD0$JA06cz z5zmCYeqr=J0YO${Xw*(CZ)}67*lS8evcGYTUw~b^q(5=zJ+|}A{1*jow_iXj$=gl9 zCY+5fl{Z|Up@!1pr7%*=RLe9aP z|1oyqCNTKH>3QBl{=sF?_Jt72s2FZhQWpJk-F0-&88IA z=|Wq9^2bodO4AYSS}V_@wiSC^wUyJvrlljkZnvLl=Tp+Fiu;Kxq-T8oW;{6>n@HEWOjhGYs){W zS?1uRSxb5V@9h2*4JZ~_IdAMEvTq&p5BSdrHIa%PgN#NO7 zE=fg*BcF-3gC71GZGNIxb{u8%Ir_l+9AB(`+`E>PFEP$a2d&TCyI8*wySYn@181IC zHOI|jp8JlOl7OO6`gCo}@|(MCvw=(ivmXgfmkU|))ob=bHFK5L(w97`%1ti%x)C<3 zR(hphck}MFu2*tcDI9@e zcf4-W3W3%pCH`y>pX@L`rPVZH!A$uX+>?IeqkC+?Smcv>uo!egR?~k?i*W4sFqycx?G1l6++q$D+VmlM`B^<|!S^#5XOw zveG&MD;UUKMtbvP;dbOz%E?`}Li1zLLf1#zbyKJQ>y229xU#q5?!F9vvuF|p>j+=m zNNe23yQ=5Iwoz-JV3T2*>>r#I+n}tb8b}n`8*9@e5&>eqYoO7^USs0pH9RB-bVX9nVi_|n)-sQiK zoSCBxE?2VRVB91eQYSX~-eWJ=^8Z)>K_?UKeI_n22b4n5Po@>dXZt-iiYq@+@@1e| zF#>$gZ&(-aaU2V0-!-6?v6y)yOrjjwdB{>x5J!ZP^uAz^mc>QA_&zvuMql3kzr6pbqPTBmi{nxk6Q{Rn~`>cJ_%bv8qEDW!72tStm z@Uw%&(tpLXu##c??l~IAuR?K3jr*!UhZ`40J*caLnLo5meh&XGC|MwwImYL6Y`6T5_J`kjcFm1e?AaFM7&jCE-;Nk?5JBa@BY zB{f|a56z~pnU`-Be4ieEdcjRgkF_VK?p$lG4r?N{Sbh%f=ebXWK@{Fi?Tx*gf315{ z!)xk;OuZ!XL)9N8TW?m!*m09xL^e(im2(irp`9~3;KV|zGFM#2uVw_7IaW)RMQ*Qi z6Nw<+&sB*4CT)G!QBB+939q=kolvhvvua#ddY&PEd-2|zo#a-wX+Fs!L(S zNZ6;Y_zufo1{U*PsO)w=J2TVv$MpL-_YPG|{RtkfNN4Ep{g?djtRi-LEw_cisOJ*w zCiuKkp7+p=y<2{@I~wzv1;I0%np@9aj^1bd7s)zjRED1uHgsK0@$E1uFwi!jC@@FE zEp-64V#27U#UZ<_%n&!POk+!<_8ZmSr~CPthn6k8PTeW%r`UC%)M#?63}SnmC#Z{V zg(N7cc5;cz{0~`y#(1>a2y=y(1}jJTty+M~Tj$8McJH&Nqft1oir#7?5|poq<&kgR zCd+TqOne0joOMb=C?m#dmGa7Gvtd?Ki8JX<)7G%3#X+%uo-ytG_e#FcecN9iY zctdmzllfr}ZEm;F&SW~$^AnD>=yr!4WpP@+S{1mjvCu*sx$d=DOalsSI^^iE%sFK# ziTG<|zE;WoDN7hK%7N{e^Ri^YjL@kL1a>(0>z2aF_W1i*idDVQGUeLYt8NKi<;CHP zP4RC2w{Ci$QAZqWZ&BMl4op}2JEh5zAKS!ni;O%wfGZtBXV@`mBC?*FH1o$Lae&?Ro9@@-G#dRKD-UC;rp< zAh|%ea_rp_+2u|uKMe9`V5kdzgg6F_l}OT+W@ks$?w%EpwUEMa?YnLt_NjQCNXaPj zQ*$v<#MJ5d>l6kb2JH}ZEXRPEQTq(jpwZOn4782r!)u7=o4d?km#nxs(4Rb8k?B8c z%ju*RH6}xiYi>+@<#k{j)to5knC=Cl9F+Y8D(V}L%H*jyDXOr=N zAcIG+GCWAnLYB{;giqAVpwtqVl{5`;Gb6;ror7B>$J8RtH)iFJQJmx)vZ~{TTjF78 z9pRsBVp%CLle7Aj-TT^w3nPLBPsBjQ|DbV>&>K;e=Dc7|UP#~O2h89_l^hFEQfM+E z*#f!JC+XQ0eQAIgWKc5Eef^r+&eZh=?5wBAZi+#{6MGb{dXKR}!+ltiMx#(((u%sA zN58!9;UmvA(#rUuez(VLj@J$U{@=*H6RW=p@nX%MsJ3yHDP7Yr zCI{s6XWo6cGb%Zefm%&I=Ih+TCKLpX-wj@vqFAbVVOCY!?dvE4FjH%>k$npCS9z&F z8h9iz?_%WWOal!<{0i{^GU0Tifcf5c*^~4XGmCc5l9K5N``U=>ti#xWe5X{4T!fG; zC#3o%K@4*Rc7WyXThS(cvWB_n$h1?j8#Q6e@Wj$bF#ByU+Fs{39 zNhn{h5fdu3g`CRF@W$J-V&=bMwI05I_iu!7?BCJe?XTxN|1oKQ>~rTmXk*u!m3F#o zXWN{A|7Szhzh>W$|97vS^H~N{eubD=B_Aiw6P3=bOSHHRtIyHe)rW<4tP~G5msU|6 z!4gv(1PFMk7)!N+v5x$Z8B`d=QUPqlrY0zG3ItObOfB2v&k!pNapu$QbLh^QK)l)5gL9NW# z{Zlb1?&0FOoM1bqh%9#HGDig6MGM>2KX8pk#Y=;uMx;gm%)f3I`N}p(Tlzdd-eRP# zXG*d7((`|xl_D$Mbc-S%J&)dPz&j8-sgVH~OF;r%>DR3v*jKZ-ERl5(tyM=RQ07jI z1y}ZoXf-mWgPu=)XBoo%thKi@a0ra+3R^}y!J!r){pp3K@-XmL1`c(b?mENurTIug ziEQuk&+FW?FWq|L=vBz|yj)iX8jXK^_WiKoLMnpsy0N9%e^NOkNWYh84li1PAMYdM z>bzFIrtj8?sI;OrAvA(W(RU2W< zp$8^LImMMW=5;yS7v7`qhz|&|Xq?i2GFC-dj$UgF%2Px=U4wq}Cl>$pSAK-PXKp=; zgrtdi85KKYGzpBx$)B9c!vua<7ki)>vd3_XQ!#e*Yc>!U4-)5oZ)EI6Zd#`oQ$i-U zGMZ$1o4gmec|32zZzZ!c1W7YJhAhj2x6Dqd-M-lIH{;)yHc-U>o~XN}I||L$6?iR_ z0vMK7P+BoqFBPWv`NN4!^Df&&c)exyrMtdbkx{xpX{YZrJCLBT?ipEOt(kvCtrIvY z5&R;tsA{8h8vF04lu*jEw|Xh><)1j8-Sqo&n=$%-)2qonmSO+mSH)ghlm=}6d;jx^ zgNmQ2i9pZKBhQhLNtGK z(%&PrpIX|ap$;=<*4$`@=NgBIW~74*9YT?73FHKbD#YS%Jkdern0EZ z`-75Poy1+cGtBU8T!~ATZ696a<%qc38L)E0MH&o1Sl`6m+rk_{i;n{~AHBQ{$I~(Q zQUHfExGYQijwi##wyrHx;a0$|7W3t8iKNvZe>ChK5*9FZr;ZxN=6>t7`Is>Ad0H{t zz3KON2{vIz8RZ{a#_9j2cTLo|Pd_;|{bCmL?Wu24qr^}d;k}GW-bi}A&>{n5&^lS- zG1g}pV_yOzzto~qRPQS!)c}(F>xc~q-d77z@Y}f7yXvl^j{w2ZhAp&cVJx&Vf*D--H8+Gk+k6;$Ll zg9%dQF!4NqCCeZiZBBB;GvHmq#0OW~!<+Y|J?-YZeHeUJGvHOK!THK8&ow`By*?`x zrl4})Sz%G)p=~wTZ&rX!6kubD#YLwQ9d~)HC_y*-c+U)cE<*pN_43r#*T_gWotP4Z zkwt;A>kZZmfnDP)JD&oh?+D!$q+i%nO>C$CI70u_Y4fd@G0*djO~rJ>$F~!Ac;zSm zwfQvr?IwrvFJ4POV`W+X;V`iaX6%zqlUx%lBx7cF35=V6ZdzJl9LZ;Iue81^vL5AH z|Iug`sU>8ItpDh&=&rJA08%6{_VozV#3AP(J+TqQJProp6tjfq+xje8qMqfa`Y%qk zT$eg)%OvJSR^C84VX)a^WYD9v851Bw{Yt_gZ37neASA@{gy4$`jM*8f8Gv~Qv$KTw z*3!ZC211t1GgV7)Ran={Fu*I8U+RZSE}L0t5$c65{_MQgRmn2A!dK?5O#2flzo%E( z&pFNbb8POj2;=gPelgM0Hec{*NJORAzd;=T3NOdK+5X%VGfE@LhI1u-jM1bG3T~Xa zNbx?0Lyzoa9rLtSPuuacKvoYz4oxsi0Zp?+thP?efDz^|gq84`VVAHf`8ZGyv&kTV zjLLt8k{3pnE}IAkhh}w0#oEp?v&SElUYAjmo|>_|IEQ@sbjVIi2r&2zqKC2%o2q0C@OWuDTXHVBKk}Bvnc@;}Qe&0BwUIXpB$T>vp95S3|`trpl0EoAWU;A z14>~0JJPx+dkyQVR+{__W3WL@nv~DFUjBEU@TIywH8UnIHL6+ZA z{zfn0NX0-36+%SMt59$^i1HhIaict!`oUlz!8}yS^^wt4=u1rW^Zj%Vv)K zwK}Stx%-PllFaLNbwafSTb4pcWaxXNoP7icXHccI^|+kaCT0YN_4{ z3{S4{ZJ_uLjOx}@6v0G6SR$8z#=B``za11Z-=kLtBD>1~$D)P4E>9zi&Z16TO?uXi z?q_;GNyCK;!y9oYH&EJLlT%cA^Eep&w3@g=zld1~$qncmbJNHu69_3D+4aN&Uynq@ z!Raf7gs>*O5>EdOpqndj?*6d7h+iPk`dxZv{ zn#TWwFq_7#=P~w!9KvbZSy1Mm^jE;@W&z#~o z!$)5Zp@Wmvlt!p>?VnyJ3$>RFd%CskeM)pp4L)@6blI+`p2jc(#l3L~#X&=15{*0K zQP;`MA`7yJu7o4shhfrT<74kDXFY5w)#0tSdagUo&Ihuyewqx}v4X}}H;;FS>Wo$* z(?=YJTT8j@&-y(-2#kn=OaE|fPp+I$(-=trF43@N6=$y^hFKKDd%5|j2z&MNKWa2- zXeojp7t%iiq$XMYgU+}|V+=nbS*qAni&;z)?Z4OM{O8I2om*LHB8b#Tcv5p}v4U0H zPT+7E_e-)9L-AQ00S4<~@mB*a#qx%6LVivE_WR!Mr(!H;B{AQ*OAiTBlD^gs?Amz* z`evZz2}$HJv(W$J(u^!{1{L*-4PgE`Ox|YyOJ|A8b8wE?{d>P|*a72j3pUQ(+Tx`} zmpa(|V;x1&7yIomK9w0Y%`!4eIC^``Zlbew8*RjO@X})|#)-=l9S^)br+Y5{8F*!D z74ObtFDR?_dBb;yADmn>*j7xRY7kPKm z#;6i_0TEPtOwUm!>r*{MmNHU?40q%fJ#0%bD7OD&dod0g!M2!O_}b%#x0fQ zVf^UtzL*6mT=60lxMM4krQ&7pe;miIf zXRR`ka#^l2AeqAuKZTw>fWkPZY(;e44@PJZy;w%-#cb2h14wxAoP<|GsMaR}Zr@GKSNXr)awAVPeq0;F;55O>k)haCLjF2W|wD=V4quVqaqzDo*9c5rmurKVB zI@yQ**>U2|zBN(bAe)(ZCd-qYcbl)eH~h5Jv-r``nEp!9x{4J0>Rs!Dj%BJ`>->J3 zeEslD(X2Js_Wnxjc{2OV8p;0IU)Kp6x%fNgE{Q~I=jza?Ch-DZd1Lj_oyHaoOU*sb zbrFm#!qf`rXfMg@kmu%i)$2@7tob9&VN9cLY!s=)ASR^Hy)JO`+XisYp9sj@L962@ zbiz2QAEvI9Rl+(Hv_%kD95oGgVQ-n(#p-qbVl|;*?mY8<>vABYBObBR!L8W^Lv-Jy z4<(zs)a3IHu^Jd7F0c%7T;E=efZAZUuRLTJbH?+;lmCyU10cqDAlmp6VgYV20_0;)5v~tgTeoh$RI~>L441w>^nMIh$E(y)s@L%55H) zQNH6Y@A?AU49KO;@*2jx9G=u48dkV>-Ze`U>k7pi`^KaT*SRE? zI5xF{OZbtS(S)9fs1g%nIcGk*Ufb30*s5(RVVTMGNlrynyFyJclhnLwt=7(nT|5c& zaAknFn4>JNz;Y^t9tbe~POAW__iU}(+}h6W#7P?+AEh3=vGw!KhULpF>W=tKo2u~4 zxW8eDFz z+k{rlgr9hpc#ZVQLDuq1?FybrP3y$?=JRKX0|Fx`N2RTo!f9Oh)`1@}-~{#Z7lq+ZEF&u3D9~7djiTmgXm` zOL2a@)iFKO4A_@yTD#q1y+!v@5p@5m<9@tJ&Z8q4wC~}a)R1Wn#G~rqnYAxS0St)J zphE*I6B$R$)g+0iJjI)brOS$Go;pa>A!6)K!}EYl-?I;!%qlwyNxr~^z_Z*yDW~r# zpIBb@Bvr1u73SH+<#ao3a3CzyytME>f%{f5$U{2vAFjERkG%VJy)%bmsvd|q(s3X8*Uwa_O`!xR;Pid{xocOmy z%I|xKMdA3RVO;pb&Rp@>TftP6+JOeUa+=uZX`;KOQj)PWsdRn z`1hW?K*z-_7=FY-Ce?gzkh)bC;`BtT3if?z98g=9cC-8Rx50HO^+N*(yh)a$IZWR} z-8Tob&YU~&YVmB700fK9@=~cnd>JEzW~NabeU$*Xj<(Fl>rNUyskF#204z!7z)Yf; z4u>sa0hz>khE7`KhQ{e9?pkbU_vHQ;#9rDVV_MqOy;I{;WB&B+j-IFL1C-CCwbc_! zdK-c>Ijt#p?==6gUyFF*a=TaBtUb3IHaI)zm7LF7EML%t_G<4mrx|yK0Xtb0e%+!s zc2^9YuXd^WlI%iI_BG^1`4LTslcT6KIz&wPe?6oDxk}LDF-aCC2Ly6Q--^)6+pi?) zFaECcSeJFAy+4DRtULGk`&-{DN2zIBq{ZSuA+U!8L+8DoLqrbt;Vs0esZ((>TE{-3 z7x5(?2OW}SxJ4V3Hn}pzdkZLIN3qO?R#7JEXbdvbjKQMUNt5of_ zx>0?}S}#?3N~C=&B`oVmz@KkZ{T?2vFgg}vp#@wHbe0iisec?03(07#FN8RQ6gk$X z#!%?$VaL3BNzSj)w2;~i7&jf(Ua3d0MZKuxqPDRU^(2ogf-;&53ug!3Hrj39ZGX7v zw4_Yi<(qNiS6q9-MmrQG=)h>ow=CgoJ3}qGbvh=qE&8w>mvf}g`3n|>Qm2K*qVE*? zVGXpLkh!zeH-yo1B8#8HCOz76@z&;F-A;X);t2j&o~1xUmmmsIrX}qLuw1jM`wom3 z+lHQc{q}%o@n^EM6Fi`yAz7aTdF&zv*=?a%BDK^knJ#*XC}$KmlrrGXr@{&*{H zmB)*zmX|n&g#&ZrpMHfQEDW_QPCUadlaeZFqTuC99+WUm4)gwtlwEs0axiwEzT?iF zFONU|dXf5O5_9gz?M0>dzWvwch08Xu`VL~JFRCJhN{o?QcqBhGJM32ccM2L}{#mN} z$35ivRVof+n$=K7Y&urcC!~CQ8Coh5uwu^it?L)<$}T(fi|hVh@tM_}M3XIZ!CxdZ`44h3GCQRlVuEnCmT9^xjJPt8fP8gI^?ir{Gbh~RH6y=bW3}PUH4HjY; zdez>sRaq}mkBy~T_U26mTO6v$`o0s4cH@8sUg5)y8)@v}9Qpo#1U-0zz7_o*roc>%V;*`2DD9H&^qBn1<2%~%`5Y{| z$86MMBeOv(jv`!VW+nq?2zCbOB4&7?CsQ+&EL?;%`q~tI_R*T2EbH< z-7F>K!UUH|fGnSmJx~dpk1E`AzwrAj{38wZ!MOML7Hq%3sCP5ij}W&4OIv(w7w(NV zyN@w%G(rdS$EC#<;fa$7?TeveW$)&{jugM`E*5v=BBfA>00jl1IWp|5b=Vp{%Qn&L zQR4D{b4+7-rN**=!}fe*Atq2m=xvEVk%m8gom9uco#+6}1lal`#x7c{0}Ps?bBtQY zGac^fS%9r@wWS_+ST5cs!#Ki(5M;iZL#hBshr9B0vn?_(mYyS-b2pcdSH*l)nSQ&@ z4zSPqk!`u>)8g?hSl>y4W|CMlNetzHWWCeGcPGzfg=AS_=mH2I5q8yqIGSh+KGCWEaBy;BkFn0!M311a+4`qS} zPXlBvfYCQ#^$ko-0S?BVSxPCCQi`;=93fGSHY6KrlsZVF#riV#ej{%C@G@SB**3JK ze8%4J@V>nVTi7)Uw*SaHx)5ArpuW;!|IZ&A0ARA#XC_F6G4GGou{nT?-pNUaz6u7b z$CSy)mLW^NZwB{$t5XV$hBVn}xf|G-x?Dqwjf2J1SUT!z^V{u58hZUMb zB|xQ4%6%>6bT+w81DOdiGt6WZj`t6&3z-r0;VrO}3;+m#=U{)+uP)LMw=wW}u+m-z z`kQOhdoYK_Ry{4w99^rWYlCu^$0}<)^O*B4EKBuQtxk!yxiCyblFIfJ&mjK(>&&s&DI2Jus z0E7h$oFfOo=O|Qq>Rk>ol2Pue!A^KIB@Q>>V1WA$z{!B%<-~0Q z;;m#5#{h8p*ez(?SA#vcPHPlk!A8l%d)k(~y%5hmqbqDX@lnb zfauu`f6|LDcQ`yrER&H!bwJmG!iveZej$V14wUJsLkyz_sx>t-ayr9?Qek{h8@wTR z#S2ZviHpsr`e{}_nr;2*H$Bro%-sKu9FnU)nPb3R?8jYN4z0S0yZWPJihMByKIPwm zOBdO33GjTCiygwxo)o?B|OM-?-MiZQ0wa76Jw9^ zKw~NJ{uOn;5X5MM7L3eW)nQ$up%U+|+ats-IEOLO6A8%2jvQ@A{LPlK ze_ASCE+&yLCak}>*QMScIpr&?59JWl41DvSy0aCq{F6Fx(H%LHo!s}htV!JJ zM;-IExLwC_GwW|%qSkk9h1TB0&KDA@0OBbx@^>XUMUGqef_zp$w(mP(Y6i@C5L%{d z^?2Ms1pxhx%dd7~F%Stn2KvhgbS<_`Ovv!sm2I{vXr*J$>*+jvz}|IjJ!G=!sZTwy z6X@m)FaWCpY46oI!d4k!Gd%B;KbX(aoQWfqXIbt!y8GdE%uheiNxJUJSl_OCjKy4V zqak$3p+dH9(Os=k^s?coW%<1Fe%h7c1M$U^uwSui^YXo}ubtER9fS`*J5gVF4M14K zxMi1qYu7E@3Ivh}p^mbdy){@W3Hql=#2^f0H z%?9eP?EvzZ_!8apn8m(EdfC2yh6cQt;5fGEjfQ%4)v)E3VN6s0i+96^B=r4134R+p zRwE-T0=iZp9p}sLo_i04?zkKJp(R9r_tC#C6@G`kpHQ3irBND^SG4^}*gi#86Qwb3 zlzWp=bxH1x;ee8Jv3BnOs^&Q;0CLMsy(TU2`q9Owo z4-XbTx&7H|-ab+JdB0B=~-{+wvK z-fPJ^0GKWsGPOo@TlQrz`lo9W_YKOJ0W3{u#`v?dR-9G4kmui>IsCzus+mSQY@q3#<QQY9mg!`6XgI0w_ym= zvW{%VxmFYi*jBxmvOYJ41p-U}kO5*0j?3e2M|ly|0%(T8cL*1xHwBsQo@-y6Id<$= zbH$mI)7lg!|3cB7ANG6`DPXJL7B2_)z_V{Nj0W{oPXYM%=(;2RMgvl)IKF95&BCM2 zFK4e_cQL_o?nnG~Kc9Xf?ZI8be@n*mMze>XW#{cNes(svAEwQzUiJc}J(8 zkDR35n!Dp;nai!y>C?9oKfYQUbZd>xUyt2TL>F2ogEpc(t&HM@t_nD_vTY_i&X`5W ziymD9MI0jaYD$fgAVRSD98#7RAPP?_^TYr0w-_G4_ADW9xwoDgaHnw5)yuuNlbeEa z9J7q&3tfKSwxesbeaI_hNdYR-?U4d%1HL(h+yWE!9ep9?5Ye)X&0TBn3)Tx9vY#HG zn)Tpi*gUWdFsf+7;@M<3UBu~p+mS!M#xMTBZd~?g7wrX$HlZct%U|VcUd1!Igil}b ze|#Kqd!3STEA@8A;}0#X*F$-KL3td~pb)ax2bz8*9h63;aBz;Y6&Ds6L9gF;ut=g` z?<%x}!D%T;qu)i;0bT|I>kLFkaQS+|Vi=TiqK*irScmGYg6<5N9)A(+m4q3^!EYIV zc&+s6+w8bK|AVFf7xw)GOS~UOEGKwP0#XD{L{qntc5iI`;?@9~%Pl{4So@r`_S}o{ zulo1eM=#c(&s8SrAY;9y;r(qm?26i0{orelhWh6w>c>0N>;!6_&wryq-%@{n^}C_7 z(4}kT!mf<}UICpg#b$~XrB=VP^ahdLd)MuE8+f zIno+)WTECKAdV%hYzdG{4KA^#W81}>g=XsuPDrC-tTq+lt+dMxCI!}o)}(QS)%ggEhl+On#oPpA&r}FhsQn1uzl^3u${Df znK_=J{kt53Q{L8oOuDd}jkSsII4$olEgE^5N^h8xQ$JD>Ty!f(aj(SHJ5HWwtjJ<6 zT{bGcV*ELBTBuCf`Ot0spl|+@T*{2zzcYTu9c$#c@7$}zJ5+Vo1fT(w3Xbzp-tK6d zqx?#LU)^=1=q8c_^Tucz+4;=LJZBeON9j#KM=mxTKhdhxV$PiIaymIUUB56>c^0i7 zD(UsDLYPzOQDAXs;rb*6$*k`>%euzkVIks$3B&pVkGm>ZR)>Z?oUBJ&3#guQw5t)% zPCiO4o!V*abtlARFzI%V8#6*ys%A#y{&`~dSm6kE_q>mYYM1|OG+@WS)iRCqsaQD0 zDP%>T<%C*iO%UzWhEAftO=hUYM&Fhc^r0)|(Bh-*j(204a!I9}2F+MWjNMSesVZKb zP(jGiHP?Fyvq}{K7K{)}cli9?m zy40E|W1z@JSpFzxby;qoR`Sb2oY6`orvXVF6z;{4LJ&nS5TjW97{Eg85e$ zQgda1ncb(aEE78}?p$CFW4zkI-CPJj>T1LC)R#SPA9fs30&Pr+Vd0}wXG_V)jaryi z$9m6~wkvzRi$N^=9iy2&+tVN{x_f>=roiRMLbw3^evohuU_i#{(S z*M1n~1PjsO{Z58Di->v_04h;-g-Dcb$ zW8+!HYN%IbcXO^AdDWWFMISvPQDh%Wqw%KWA6#Td*wpLlCmf@p)$o!q_+kVCzf^~~hmn%SeBMi~rD-Ej3`uRTtZ z&FlN7gTGa8f9>-8$G|ek|CfTmHi*Vi)==asBV`mrtPcFm;}Ri2W(b$AdRyccVgM7P z5F;Cr(cc&8dmS1Jy(+&U4m=aE;vz6(Qbw4kID#Th8KgKXMpyTEgj9VW}TY)IX&LiuNa-lb{L1pVbn;_Cn7~f?;K54Os zHioGz1IfJ=TlUgfD?@C}z|}8)lu#s^XS8&giv2_jd0KZ8LK)AQ(|xn?zUQ1GgLSHZ z3t{3vZsW%39GRtbH!dLHLFS?6yX`zm z?(>o?loQF69FB?;r!Vmzj|g9wNH!C8vMw;X$;k$lh4-bQGQ2q3!5VsJ(RgWZhRWiJ z=ILytqWqcBDZ8KE2c}NXKJumfFT(l|1zkFg6#5J!gm{5+`DOU9d*55hZq3NoXR@mc zWL*xmu!17zcqDDPzB=wn|M`Ah*D5araMdZA$w~;{A&m7l6m_L@R#9Y@kDKQ{5{xF& z1OvxGffPd#HqNYR4PTuUKf1xK&k}? z!{ZmYHtt!;e$Z}Bnq7Z17wgxUuzI1X|Eq?w{LOGEJSSbmzq2PJu$k=Bvcc@rl7Y12-7 zPlI)(3Uj&V5|hgr6#!AQli5^rQd7fxL`F+f?W#GJ?GL z!SBz&n=KV=4HgCU+!0kuM?uI0#({xF^+mA&O6$NG=^R9yf*6h3Z9;*{nnHfT%3^i! zgfd*OA^?C<8h*39aOG&>_~%MrB1SiH{E3(A1?STW7tenugYCWA?2*!W5CF>Y;T@_K zI-F%O&@RJ>CzW$FP#^;PUsucxQ%7@fR-^DS_Eo<5JkQnwHz~$Z1L8_`i|Yzk#$iOG zrhFpKU2GmIH?ufOWf_?J|54|z0Im7K^Ee6%;8NnWYY&vJu$@#i{{!NFDMBMIHE+9w z&k>T+g{1nE?|x$29@Ds-W;(T#(FPb5cUn}y=mi=Cc3vUv+``&P&+KF>cQSv#mK;o= zzVo{YR^$gcLsh{7b$F{dT#vQ;u3BJFy6JE$1!`BR(iMON`_);a_%3@$SPZ4=3LNBq zOpbb82O7<;Ue=|e06Q0O2+@3XGyt(Dw2GU@%)Q#W0#4g{old@VA!sW2+Rgp)fFKZ^ z=F=<1d^N2V_~YRvn+e6NG$omV+E)rOf|JYf)^Tt$I__=`n3!LSV&G8a4KxRK?i6Iz z7G`k>vnOuE5DP?d(`lQnN`m18oLP1o9TbN#vjO*8{y33LVVVY7q=9Ur#DddMKim8yRALsx;A$uQmZ08>p@QeuhhO_S7t;23b@j{h773@q z+latqjB-}T?W2jxxs%FjHrfOpeIuM!uax+fV$4=mPbJ#%(n$DHMoSbzbY(GfMHl67E-32mGo{O zc)MQ6k*aJbv4Q62YAlr2ePGOG^gdF=xpWSi*WcUTd#`(VGY`hd09G7mZ;nCHQ3nn- zN?#CPTj14#UXDd_Sy4D2bSFY~qp)K~n1>YO6sV3KRYf4!3VUd7oYIl6c7V?}mqG&b zt8YWhtn}FR2!5RaLPaw=8Nk87h47tS%#dr&u6S*@{kBEhEL-d>Th4~UOCN<5*&)tVShdn)E~R3Ux^?tGZMK`}IGWn}=_9q=Q+ z0+2IAVVp7JYv>j;T$S@PCsnHK40>xN8mYNG2@j+@dMi7Uw761$^&|V7dH7Feon4qBtfkgEYD%3zx3nzL* zCKiZW)erlRt$%SG%3X{XuCR}QyVgGn8VJv;0swOdRx5TM0C{My2xMJ>a?IROOdw7@ zw~r!{?Z*;fCZd=I;gSK8c5)8n9=E`{7zzbu8%bx!ueA`F5Yp1H^IWWb;RDZ>E7$Ps zEp{l_dxo?W;dQEnUGTu_gyPVl2hAG-@{BMXMABwiU1QPgcmUM1dQB3!1xps>4n*I( zyW9MXXFCkAioxHEV?52Fvcb4DNL=|n+=lP?DYAO6C7gUh#~oD~@!{huxE3As&|JvD z+e4fSiDAe`1&L(JQ#Zj$m&L*km4i9PUUOl6CA@g!ismJc>Rj)Axq2bWbV=8{2+B>Ei<%q^3ina)MmT)Rqaf8EJznHj6;BJA z8EP|rlVfEiE={q;;!G>q-E$aV33`}^LADXHcKg|;qyd|a1Gml%9NG|S|5deT2;=be zfmNEf-YTwrr^tD3=aD|kHoryx<__AYtaA0)8wjXJ{cx+oigG4#8%A+X&v0@b-gOJ! zV?q(w3Fx9>kU`z3JbWSgu&L(O<2U4<&yUD5K{f5 zIolg28L*jqDJ=MsZHL-^Bh5V*lmArlKZ?#gE~fT>rsn>$u>3b*N3gO(GGhYgPQnVangB7-L3w<7$MaQdftd0K zfyg5E((|Yx?{0m-m;)MC-#R01`neuvH7R?5h4JS7AkwSJGJw~NI2R5O(+d%*Lc)x; zkvtn3B=RvbcSf7AKZ|;SxTB=l{3G_kPyINJ;g(DYix;jxd5yt|Q0xjBSk3K@a%jl% zI=M}?5aoIO-e$`zhRke7;166MS$sKbHLux%mAvI%^8G-rl|*k89D!+VdPP1t^1#E%id_l7I(Lv(wZAI zpB*rtjqjU*Sm<_wY&%L$k_tIH1|KkH!Ng70f6u*H9yyTmEG5Zm2OALX03abY7)0;@ zgw9Z{cs3Xzk<6}t{={kGsm<0woAuZ@pzCDQnVVTlinka5fYXGGKl<6D15U{Qz*1(N zT!~!40g}#LKwhPtsD!??SR-NH2s01!;z;CD2d|kTX(TPv8zjvdI%8lKGYX=B7$k?g z2(wOjHXe^ji)F(Yv&nLj;&GSuVJn46_fN=u9zgG#_^{RXNW z2g;?c+`wD=B&=#>XuvdU6yS++ce(i=3^VkVXCo)!P?6FEzd8uG%C>td1FNrpn6Q>= zD-PwxDzdl5A=ZRuJ4@hPXW0oiS>vfT{eJLwUAWOox2V+KxY}_YmHKVJTc$QF@jj@Z z7HDAv(uM6Q6RB(W5*QzKQb@a%96&D)9w%IuI?)jSfZmRL>V4gqPsvig%{(I@#8E z*rDj-U%f+Ki$=%XIJSl{fe(t)uQjQUm8#e3Mk05Ltu7EQY?X#|=HMS7!k&u5&|1^~ zL!4fW4DUD|G<6xoA$0n$oaxU{iktlYo(1y3Rn3Sf^%7C31p2WGRwGAox+y*>VkBo3{7Ul^X1dZi2 z{H-PtwceVVWvSohs%7P#Dc*{OJU;qB@uiXLF-qwPo-6`gR`#R zGOfI6E4S!7!)G_=o-aGK`rs>-_qP9%^i9&gp}e&wc2byLnFy9)&x5*EX_JHFue1B* zEWT`_I@!akc+f=H)2#W~nvFAH`K2*grXS_JMeai!PfGi`td$S%Z}TcNu-`LCIC{Wm z(}hh}NRDAC{iU?!vdYBC@>onRao3bMVi|Xhv4x$$Ju>$+7px@h@tpmZFGuOg{8mJK zoh&n3wE)8z2djX3Pn7<;pi~aHdi8JZp0d^0S`;1bwvd ztz8>jyqC24Evry{?+f3pdgx^kuZ++&$cR3ZOWnK>?MX?~B%`;5ivti*{3*FdwB4B4 zgJ6^Tj*ncF<|U0w-{7cTzhMVG5Kg0WBq-O(hkh29Yrq_mp?Ij7v}Zs2HuJP_TaopB z?r=6uCNIxp=5fLNz$Oe$V1MRai)YYj3JhsF#I=JVK*S7S+|HdOhBJWN{{&AI<%fZ<7NINpn!6rWysG+tA{DSWK}WS z4_n&uLj}7#o6Zy(OPf_N3$7zrq-Vuc7Eyl{td9?E;izHOoU2^>hqZ6kx_{{jYAN$P z+1RkL$3?FyeE*Ev%fPfXd3Ip$hjw($uEFu1ZSt>j4~o;E5MEmo{eu^4u^%O^)Aj?T z2v4mY)Q9WC=Aigpz~M<|fM-4Ylp9z7c4ECS%h&pT_AG*4Uu$>8$TvIkj^$SH9G#Ue zJp99Q-DE_OPS#413cjaeRt2CKVjjsmTC6r~8u==LX)O%(sapSArsSF(Ne#&}e78C% z8_+vYL3#7@{v-q!6+hHB9#no+;ZiU(&qu`smg<7G(sKlc88sex$Nd|2wI?MA6AON?va37KT}3{ z=-->g^+;;`w6cll)PX&Xw@7RPkMCJHzxc%b#G=OI@4}EGf6DC4k#+ejoKOQLBt?$r zl2y(laxY6USg<(tBR$>4)A%GaKPdYoy)2XuX+(vB4ypbC;{~lwRJa_W*a>z>jH1@ghgZ{_fG0tacsK%kf(0N1phwh-4dekT1j2VoaVVz_4iz zVvoQrE|^RC*0>}N(j-kO#rJbjt`@8PtHGQ#0zM5Az?q5wtc58Rv`!d1K;ZyE`KR=n z`lW#346{blPIwisgzK0X;2`NUt zO{=KSy#j@S9y;q_4woMyHYhvsp=|x>=01}zEb{S>!R6&WTgrxHhfG(z(es-fH8BKN zuBsUD`+E3a(nn{w^FwCk%5#rmmry{QC)c5Uj_?o`r;Tw?0q`^l;Z}NK^9Kz~FO>In zgG#pT5YeDPrC^9>ASAp$WsjsQ!}x2qnLB%Z@hHG4gCUnU+@Uv0V1B7`I4`sdt2HEL zGQMUL=UB8-fo>}&^2Bd00)Po3EXvg|a8#tfTe-f#aLq;qq!~GgdQBX}Qb1Er#w|b; zxIaT~JO>k0;oYiR{ImM<(SUO;C!Y(Uj z{mI^<8_me$6aql%gu^N+2E@#CfDKCGKz}#v52d6PR+@Z260<{l1|Bo10ZS>KW_}R@ zU(+|(0ZN}k4=eiwA+Es~3@QDeX&|iV`)XU-#KG5zCY~p`OG}jtmV4Fb-mh3Sds_F@ zXnI*igScbxjPS_W=+sKR`@vlJZX8GQ-BgP4%NV9eB=I)>Pi{bnBB5ZMX?R=qcD*QF zOC&{nV>%^fM|SUHfM6)w7Ve_xA?M3sIEpPg7_nrX3zB~8L+)-A4*V*ls7`hX5-ngZV(gT};qOfP?M~giK3hwnXCdY$$ue#TTT`zGu%GNInRZ~V)O8Da+ zhhofyM~PjFb-kVM%d&T|-Xw1J)VbW`{J3=`Et7m;O0TnY>f87JPo%xIZ#q-uS=(MO zMy&UGY*%$A!3q+c7BW>2|Mz$aGH~tPp{QilsR;iCC?{0g?gh${ZvI3%X)NrEKga8# zdN6Da;J3-NPkVeQ%t#1eEH;C1%8%1|JFxmDH`s$)utf=Z09`P_rbYmO{)+ov)LqE) z5zC?ivGe|1$n4`|?yyCZOQyD7 zdW;z|hP9n`c<4T+ud0(t&C`_k?Uhmjv`2+%y?B)PHQzl>0gxm&za1e-iz3^e^rE;R z4;h{YU*QmAA#uDDaJS@p1pUw=X?b z7Hi%e5n=bdTEQJN-RxIGky2+QM@Pu_hoB>V?a7?6{sSsZvFcMc_aF-qVn^I8f1r*I|o9$+@gvi zoedIV?J$i2tq;lN(4Iq%k_9XmNqM@jB9G}VYjKA;hRK#M>n3X+^27l%njV)j6@}Av zVHs>4v`J3z790rRL#u~1VBk&atM$PmT5&f8Ax#-#a2!0WO7K zy}=#(yK;~+nuB!FBo`$b0e44qLadlLZu?KEF@g`tl*~Dh(Jy)I@Blu@y=+*mV~Vs7 z=355Km~oPpoQlj;&)33Ah{_6^#~E}TBNj^tQxYa8i@JYg!BuZfKVUyEMa|V)a^bJ` zZ!}Gh5J8(q7GfYfj7T~yPX`ttd1z48r)uWDC^S{~9@$0JQRS+1j5m_jD@;{tru zpN4e{4Z>TI@2tc3CcoO7toysRfPzs|$EpY;T*8PV^8=x_gSPaUiZllt3-((262ML| z$p=Da1*DxE^9jUv_YG07hO{J0#>RpYc3HZv*vi5 zsVJme@B|oKaI6rFtEW^L54xy7(T@-BJ`V5kT_li^AaYm715!T#(KC%TTwS`_gQWI0 z&VDn>8Vcpx2g_D9;2b}G z_^P$DQ7fbBmf{+BZ#lh`))>H!-djU9q)Vw7De;ela6NGSLa!ux5LkMTOCDTDo&l(H z-2J_=`=$ZXlqBO77b|HD-M|g?lAvZJ420HZ9gEFds#!B!t@C$0f4)W59tnF{l2OrR zG%3MaNU>IGBSDu@^?HbR2x5TG$Zdo_Y;47&i~A?HkrWpQD$rV@<=&+;3OUJPWQ~d- z=dxTpT;(3jF`m7PS|BlqmmU-Jt4*eOna_aNep}<>R>zbFdxatL(|QEyBVw;Al~LUW zRfX@-$Ty9^@n|m6f^S#b?LP4gvo=<^@%_q~?vAhXl&)dvUQw`N*`QrP2 zZ=)xl5<_!|WA8Wgmg<KA{+=L6zO80yf;+3J&)OJaR5LIKD=*CCIi8(yWxyx)Dta*w{1aM*H@O1*av z_QZ1uc}x`oJ4=XqOHwV?;z)J&^K{ds5av`jo5x7A8c3S@ZMtOt{l%mSfMf;L)tC$> zeUxtWpxf}6?rP7k3tXlgspRoOgaV3dmRppEQw7`x(Zwp}Vdb#p6&|h-ICp{@vU&Z6 zcoj(wXpPC-55&cW+*k zY9w~b_Bw8=ze0ofF;KaIxm#+<_78cZUA%jB^*#>VZ79p+jODQ_AJkozd$ycjcT}^8 zV`bPZ)i(qwbMeGcfG9YBY~;Zo=2-}H+?T61?zEZeon|Nr{TXWA{ZgG&@KmAHV7}M! zhw&1I#q3KyV{%H%s+2SjT288&o-n$V?Ws|oDT8k>$g&THvqd}zwTqboFFk=dxOZU| z+Omh*Muy@5(wyqhmTqee%qHA}#5_cyz^EJ^lwv*Ny2!{+W;)6@rQaX7knlBmyoM8= zwYmT1cxHAQ#pEJ(t^c4&&JGPgrm&NcCWE!#i&*hMX+GS&!((Dk$hw&6x45^*?pSu+ z|FCXWZO|#zSE=h@yh*R~O9z0DUS9j-L8EN7k;;Y{&S#s#n3%6*xXk#Ea%z@CzM5n_ z*PHmqhJKL0Y2zJ-DK#k{`=}Wm*pvOh+{1E|2Mt)DC$t&+e-0*@&NmL_>N2{_#?&kd zf2a&~qPiD8YD^98a&&+%4d$~(V5Y@AFpSz{UW(NK&{nGVd}d*v9Zwelv!4LPuhAaG zbg$6t!~@hdoZ|_C_?xaz>&Eb1Lf!E#oRlWuP=6F^UR`w!?#{JYTQGwOIgRc6fbF)# zZAwxvL>aaz-)GoN9*QMu{QgdG?;I`YyERRPC$kLsEK?Yp4r3#}I;`5o7SvN7Eun3N zF@Y{thZKH|_hrushwsxx#4fiCkOZL(-PQ1L38{l;;mNn)s!^081Be)m>7wpUWKg8{ z-tn+;Tx^93LjjHD!1kfK4KI^(Czjc#stM3QbWVaDd50TPLmw1397L}3a?^n^z4-Hv zMV_YIo8c)QV|_mV=IP%2^n%*&9Z3&JT<8(TeJEg*zuq8z^Y|0%asTUj3F>?b#7=ef zBVZnQ9Q`;k5o7cARZ8vaS*%ApwvPv`O#C^J&@3e^lJ-0)rSv?|8U&5z#BMM?%l#XR z31jE;Hxwqe)C<=^a2?}q*pUHYPiP=DGam*!VOVqBq3snCD!A>9Fd z>V}niT^1T|>2eGdeET!tAK;6;|g?&+UkL?$GUb&C+1F)-uVjNrg*HM@@qE*?QNb zue$cYCz{UeEt+{7zXkh0Q=G^5m7QS5zUd8cb)8f6pXvc44bSb4^MQ3T$+x}M6J6Gr z>k9JqdieE)gTI|6co$2(VqAy&&bec z%NRg~mKY(rkf~C8zbt!)EYtm;$B#q2dVu{!&60P0$MMtXT&+=l=5qKE$GKo@;!P17 zM&VxYKjDSU_ubWFb}TGmKUM#+Zv|}FYv}Gx%EiSiuddy`dfg^hbjnYZh8M6EULA3@ z`mn3&=+C_+;;!`<-#))=eMz0Z&xY_N{tul;?U#CekwT5Stn=+B{@Yt$)nYT&)%8jC z`^uN@f4dYfWD4|k>ya)~pljy)dIxKl(rg&iQcWU&TB&b3J6mUn_dTt-+noK49n`4M z1qo8~D;CAax=S78-H*1w$%fqB>M%rb0vg~uig!4McN_n4)6&T7XDnQ;K;DhIA-{Oo zr4uj&_p&vR7Z>3B=7sTi+0Nljw}RunpEw+;vfF(#3N_O2Yd4r$^r7;Uky7!(Asp_I z55I9ZYtPof8*hSBXB)87cE|e8;NSe#2kZ@N1F8F=de@|li)5^M*UWEM6*n%~K-By2 zA*46lQN(z+_tx*+Oufdn?$L-cn{(9kVm?~8QZ_WcN75D?fBc6K*D=EXXsjdq4gS!YtxV8u_opX|O{f3YWPPhpIjcx}>0s+9 zKz-g`@Wx`x4f8`Mdy@_CJ^al`sp~)dUvT6Hvl`L!_f5xM^%pF5%fux#^NZPym5tTc zS69WP)_a{zXm0U3ny~T@#gp7}eThf7AdGG0%!p5xI}N6okS!GX4RqdfW`xsqLYfNJ zxsal<(VL@W=PiySurcn8fpL)FAZIDrY>vY5#>W&cYE@5qR5q2PRApmu3{~z5^YD)( z)O!X?wnAsyICjO1SOfrRqFl=I>yxcT%M+JHqZUAV8gQMjD>!rwQM+&21#6lH46iY} zId$Xf^;1&{y4~kPK{*5;DH7_nc~B8=l|o)w^0AeCJo&y}pW)uuXO65YpFV?b1U8j_ zC~FHz&9`>m!W#= z?@pn7$co9S>eZXSTy*J1hSeOX_fgV7UNJj)ioA-Q9+i(IU=$jRVcb2R=f>3vnNdDt zTngN=VgjH;bv@eJiPVF4|Ix86TidT7n_^)cZ(a`mcwJA+Ji!FvS^`N=h2f}UGfJ<} zKIz&!^Bjjjf!Pt!jkWefa*+SBdy<8qEW(nVtnZy9Lg=+e)9BlJnX$XQ9{Z*SPP@v-W*F^8g)o2U}&jjE73 zGhMfD7uoE4hTmeSa4UX`gfr3r!rK%OE>cjQioNWQQZebQ5}U1EYF*qi?51!Jnm$DW zcruq8&WTX^LtLZ{2X5r9CfN?cEmN7g7GwE_BML9q25A=8A&Z(Z38x_+5>_-%$M#9N z4bzb;?^LXVzx{nEt!tYc7usuw4+S5&y_M%WAXdz@dRSh(Qu6z8so`@b+V#q^L&DsY z_2c~*uV3hMyM}xczf-)uh5Wo#h})r*49H=YS`@C~>)y}z$)B#gO3CCHyqrWWD~b0b z8Ea)5IIx%to}6o4Ey85G+HvBEIgcIpKiRa@_<}L^5amPUU|Q|@_hJM|P%PO^GT6G+ zn3F}DQ|Xv@=GZ4j$Y2Zwu-rw8%~0kU&2xNwLuE9`(`F`Mmyk6)(_a+qCWy~CG|FfC zNgINM-Si!t+lHo0$r@Jx>AD}dOQ3Y$^yB8w4Xa>w3h9hO1heRegbM6{KWmuvK1>km zq{$6*FgaK^7quXFJM<@-X!1>(>*DuwL_GIJA1KaSgA%CrO+`qFZEsCIrFX+Qjh4gt zck&y|{K&?w)z9X=oL}jObnMA8*y$gGT2X~wGS7w$z4W~R`UqC6df}O z)b7*aVGJ9;jfcQ3dm$@9hbE6ZDWMEddMy3Q>=HQJur0}4r2Ne&bCg9X1+t+c`n;)k zBlZsUuyI(8+NA;bsY4PHsMhyQz&2oa(XhJyNx z@D7*T@w=2(hXAMd+6FH$-nDEqX!OI$W4WlwrTIX^lJ14;Hz32GT`=g;+yyW= zmBxR#kfz*48uoi?Tu1Hda(epsEmNa6HIrlFsnu+Ha`KDQAR8u#}B{f`oSK0l6EsWt40Q!jxHkca!|_l7O-&ZI>fTk|y;H4c~E`hrbd< z2eU?RRc?1Wn~c#Csd7ewzrKcT@7DctJ1^sdxg2LUoclq9vS$V%aRpMtF)55Jf!36H z1J*)>T&wgn%MeLNe{$*!(g3Ph0$(t%UW@W$@+;FtAlndzwoI+xWXx*C{1L#j;&KfX zl)@+%4_f+sQTqSlYS1I~dg*Z?q1UV7hLa+^%iU8Q>)r==K zoVpnPzW1q`l$ey}F*>GE=2Ke|6Mn3>6&7CE+(6us{`K~jt4mezN=ktbrVC3|@e=I; zB->Ne8v3y)`o2jPF5dx`eL$sU{1vmpJrW*))56ZFa)^^sx;u1lot9+jact1VY1xFx zH88C>Cix!bwgZEeAHU%6<`ny;U$mhnU{6lY3#)3L7DO`k7#+vnn}Iu`{rkV&s(NS} z(suW>&Q{`J)yen$*S_D4IPmw$7xtt4k3k;CQyZ2Z_~Y$-(B)8I;l@$^$F_yWw#!F8 zMgQ2?9gW`i>|gyo5qqmgZ(l@NW~elw&9gQNx#Bad>2uzeL@?H(cNzvj9K=^@H zEWXp(yRg075Eg9l;j~WZOPf<{-dkBenq9oICqf^VIN8P0pA-J+8rS>7A>Us2 zXKlpa)B(%Oksd2et1m}nJukdi{JHy<`V+h5;*U!gd#7mw7Y)wV>r|*9ya1pbo6;#% zp7+mxJ$-wc4evMuOdttL zJB!=drRnoldd=#k{t-{Q0%^5e#2yJ`DL}L-DAP>z93OS|-1o$K1N^BsI$Tt$s&Hf8 z^4`Av&lONB;19MG-8Dd(Ox9=e&^|1h;o>F&ziFX`8I#03Zq9t*0e=+>uhvkjrNgh} z!_!&+SiJtn{PN@=*`#V*FYudQg;cLjs#h%4yDqC5JiE~KdS^``w#Vx0r#nezH^-fq z7f%gK4R6&ON-h{?+M6x+)7n{&pRl%yIy<`sqnwj1-f1ySQATWd2+p#tCYO?J6j+R! z{)Y<(m@jNJWEcw_!$rj~(b*F89ue9p7xRN__>Z05+K5$VyazrJ7%vHl$s@<8@V|E9 z8Ea6lBG`-oh_a;)M(raHaCaY5N!m{PWhk!-r_ z(Nn8{(f*4U`|lg{vqsAw87M0Z11}rqF4&LSVRU=AZad3!G6c2+;bQMH+YGy=(1a~c zzE*W2toaO9M}!?dNnGHAI>5GB3HQIQz4KUjj0TaR!dFNrYsV>0OX!s<#B9F~OzN1g zB3E+}5Uucs=IR`u?Bhj&jZ^d;oPVkE2qK#$jkflHt4u{lZM&DsMWu;0KWW554A!|a zLaYrT`VjLJDq#aA^K}u1U2ordX?SM)`d;-v{Lhb0)~k9-9oGjr7R?C?>KTWmv_s}QcXNwJJCbNQC7&)k7~QFN>6>;g(Q{ylnpb5Q6@l?ie%VLJ}SXNXp0P>kiu%q04Qs{D5`Qw zvwGe^fe>&JUJA6A2JjJ0Ab6-;J4%w8&a9`Hs%d`pwAv`qu{S?lgh2`wJ(zOMCpR6! zqzXV}Sw7YDOgz4;sY_hWkP*zVUfb8MkSRiUx4oZ&|KWaqgqYIn;^|r0ISyPAlvFU< z+Nq;gsD|TzSI%}7TGsSvU)=MJ_QKJ%?9t{+6$U??cK&tpZ5{Cd#4NBatUMHddO~D%SDf= zh}F)R=Ug=G@>_kjU37xo&yNS*7}14meyc@>PwsvSmZ zs!jxTrCm~sP50Q6i%oOk->&%dO^25}+T1?3`TL$Jl8fs@A;9-FXPSsT}zj{g(6P8&d z+{Y#F5$(lAkeQ%&stQ2VQ({bDI+l4l@vu;Izs;%R_qPMwJroSf%4*?17)eTTC*>-< z?CQN%hqWaU8i85W3(ZC|^amC?lFJ3KPG4?7Jdp z^R?iede`gATBSwAXOwZR`Y7P9Px7TRZn}#>=lKRhF2DqudOtuL5^78mSI@=v;WDhGtw(=@OyRBoy;{v{;3w@W>gE^>~z|_(k$s1F!p3%22>8fWRxi ze$?U5p)KL2pBcNE@K=%8xmG;{OK1>dJH?0pdfnL*n#_<=o-=8asjC7G<(z+lD*S$? z^OA$N^@&fTO7teE?dgP;fu2r16`>#c>(c*(fm0NhvNyZSv|Y>e^n0|^PwM@2zQ%eQ z6Z&elJ+HqlnV$ik%0{1eCf9MX=br*l{Y`d{@bz0PqCTUjkGwYG3kAM%2NNw5p)&(8 zxm-p86C;q=c>%~B3-!>&kX{>M&;{{gAymJ#x>2o@RF6K&9S|h$sbK)dX6dYGw?n5Q zg4fvgP;qO=a}a8}jp}akr(_4fCn*6o1v9FtPBz#HK4&6LSj{n}dX);+U^mn3Y*?v{ z^1&V@_LvfXLr@l>XI|T7-C2sh-eNyywphRHUv0BpN%hxwMw}Oqm%Zruy=Ro$uM=~* ze9dq3ha)nBFL~@A)?N&XSEvm^;#%&J@`Vp`QlRGe`*t&ll@jDd46Ja)%fZifjH@uw zXV0&{3Uivwl`y zb72;CAs|W)F0Xbkv;A!_!kgcAe9G zU2@v;5}MKsEVar_q4Z?H%M~0a3A}W33rf*ow%6&`TpsqATaO?%aK?Z0U^)Vbw&2QzGVLN;i^FvaYL>`K${`@+IZa5Ucx` zs|?s);X;*Kl8-aC5Nn{KOk~@3xPV#4%Epjz6{?og!g|k*UWREi7=0U0v)<9`yHEEV z_dY)=0;2BfkhJoW91Q~0AcD5BGnF-bikm(1dg+E{4C@WD*u)V7q=OS{kX9j1sp-SM zT7F6>49%fsGuM6XDt`ICbdXj>e~=gGV0wI}J}QJY+xzyT-PlM%l5o9RM~%~gvQwdt@+kQ70cYmn@AsJxfCf*}MDXAW4Z5u~U&)q&;& zNx=3C10(;!|3)*lgnGfsVYro24B`zm2qb;gvpZclt(96Oc6X8{>+_gvR}<_WMz*gH z39p~;4PAQCB1E)Z-AHogymFl0lK3G_*LkcXwCY;=hxUuk&l2rRjLn2k7|}s1btR|?t*b15;O-=a(JkP7r>3VB3)n`SA#l=F?y+Dm_A)t2PDJhH$dQ@by8{rP@2A+tUw z#C6re=-y2Sy7Q;6P3gRgbem}Co|eDr`E<|enMezRE#|$e-WHrBQlr*Iq8@Gg8OC*+ zgdeJ)))4wo2C!XS5DlDYI%K{Y1sP8U^5x|i=GF=^K*|ARtI-?7?Q(U{V*&jP&wS;Y zIkQlT7b`bTs0O!A1^C)5RE-4Qv36-*%tJ2gkiafB9ls;o-eI%ioy5A)6ck*6;1oEm z$*l*yxx9{cZ9gvGl8O2g{ugwqZ zkT08s$Fd?cz0odxNb{7kbG@d1L)8)9y<2^8@@;WPK-(3}RsX{YefYAoj@DLAm}=Gb zEpQMCF>?`IyC4X{+F(;D2=KyM`N`}dxppLFqptq{Jup~wHGU%qL<07T~ zkYvG&lL-X^gx(L0y>YhBL8pA)LndH=6(Ibyz(=!M`rS|_yda=BHXwTA2*0<$T;RN8^Nj3cq~VREVBOt-jnOA|)#k_^-L4^w zZNaC;H-E}Y;^ke4x)+9M!Ct?p7?Q;neaa6Q;@;1})9ag4?p1H;)<-h|03!mJMjRPt zzlKz*h&0ktTdpvaxvd|AGly#OIkkt=cF(=U-jZWH(?E2TVK?Ik2gRksXm;|)3kkxk zncA~*vOon#{1g_r2Pe>T9>XU01{kj0mABz8fOAl+qVNB_T`ux6<9|?5sBaj35>fj^ zuFrboh$Ead>2(skTDL@O3pG}*^~gV^8krTBZep!PA1+BCv@SE$7^3TrW;c5+IM%=+ zY)jb&IUblYHGIjUXkU>8VMV5Tp`%9!5fZ@48f~HhmsayG@83YB%9+{I@7zL`+}Eb$ zQ&wA3%GOn}UXvCGaHE{oJnPye@s5zcE>bS#dS%P;)%{HDPqWizDW>pW<$qxfdUyuc z9C_6KJw{T%1_hXXS9~#{m0MfBNkHk z)Gj--Vori_8U=MbR9TDZJX9pkRLhL=P3vC%e%(+_XWxXR0&qA{;m2@ z!{OB`a@C}o;I~$09LK}25b||<#RcA@C+X`Ab1Xc^Yo=Gk>Rc1+Stdw}62=Dg4(_Pa zwF$`gO_O4lQ~2auO0FiDP20ubZtt%!uwP&4r*+0k8M{Amo&d<9XQ~l6=fM+u+S@lJXkqu>$ zAiS)wnoTQK!h3f~q$`ws+SW->C-{Djtyrqu!pNe{hv7H5EYWqolw;EzL5vr8=ss4h zv^n2$X5s)3yFvx7+6T>V=u(WQhL_$Y&JT2XL)G=Zx4r8qa5v+oyoy-hxhMlPSA&ZG zuvy&L)TTWN?_+8ps-SpFDHVG~h^^pa_r*#}lo&?nrYnv*+lQnwG~B*%QztSydv97^ z9uZ0;zEGxry6p1sw(~-9x9>Ld^c5+-SLa*~N_Fy*V-{kFBl0~b*JNDR;77Q`TOwj( zUIv8KeD^pwd^d<{*|i+Evg_;6dGE)#$X>{BjB zPT5m`>IQ5&?Z%>Rg)=gCZ-@gfJNjQyFG{e`G?-&qb4=MMa?bxe_3G((z^7RUG zo{2JxOS{h@K4uc1suG@ZY*j~gzAfi}dt&`5GTmq{I;K*mZG`xCDE$;OBgG3pA|bw% zlRhYjPX!CaW-%`H9oBz7*o>uzrZQ(%)7&&@nFf_5M;8yF(<#^_349kN_o;@^z{MSA z;^r}_&xE23J2*p($rEAfIr4gmyn%vqUW}_3VH!5zT>$ruBK!-9&X0M+D`HFzbJu-2 zv0aP~7hyA%RKJRYl-5X<6Zo_0V9%ZT zV40k7TY+IT7p81uDt}ZUk z%6k9WIlf-Et%6h;Qi6;XH$$jNamjOK`jh#c*Cn_b4Yp3Wh5OSMMFA}rz?`8Tm@fq@ zQyfaypoDTX1h(~nFr*yORW1&yV#t-0>YJw?v%|WG069la4wJ*CDCai?rABul|`T)N#;fd;q zwM($aZ1ll0-Jd^oYpQf&V@f6zHZK)b`)jbrXYfzrstOO2(rT*qg~fVP{8L6~A07P< z=IXgIE7CUzkqVH?fk#df4s(zG7T~TjiI0T%8YkF-4o7$@{pcx<`~)`vUV~}i+M|_d zOMBw57~d|&HAr@~ryrZr5OOG(@;&>D^MST)=4YshDxJlf(E%4&G#!PcZnC!JHm0A8 z-J5s#vE`O#3UajF`|u3oxB8C+ zp~CIEqCLvPTRchj^EUAxOk${&A0)(EK@M#r;`Q{8;;K<`$!Uc+M!2PB*+!IP z)i-dT9!P_DeCJxuf=maNTkdHlE?0Cixw+_`5T7Zwl~AyVpK^zUQ;r)?9X*3!Qvgp6 zwo!55K{Ae1gURCJRv%sLybB*ax`4QR8CaWf5GxED^J!e_wP<5B%)$Gxa}8nh^TrT0 zrbLD5Q(;1eV^V(vmx=M&GwAVWP5Wn1J1OX2cP_;%qP8syO5g->0XV9t&JvQN?$$kH z9oYZ;bXYp^3DEpP!iak3qHed@7u5XQoRPp%j18sldxmXRl0J*bpH;k%qN<{-Gl=U| zgnrPR=Z5BZrR<8S}1)ZlMjA$(8t84O@xDGjKN=R&eTTHx~w z$xsYIn+6ku_ZFVSoDgbl_Q8g>T+I7@5h*y_JQMsf02BWU_4E0q_!(5Z=vso@IynQ8 zAW`a0z&{Ms?Y&-SE5e=EC~I?%9JrIP(K33vm#_1wWr=APl&y_<=u?J^HNQbx(2%E; z>CaWXf{&ML@){rBbQPYDU08m|UL>Ko}z2rZoJv!1?aO zI6Q)zL*6m~(;9zte0u`^CwAj@%#a&u1@|9oH0otCp11L61DCknm)O8P@UsH5NtOTX zj$71g;j8Pr1EJJo3t%Xc>}bNj6kq&UNciM~EmmOGoPqGKs1P}}c_=$?h#*%r{ruIm zP;t!q2wwU8@-;5d{We|cxc+qg4JH0u-C-g2h(|j1r&qEAYSYZ@#;_J8iUHw)HXH95K`IA7qMGa)8k z8ItpTaF+{_kq}u`iH=~dJ1mjIy>S}dyhCi<#=CtIZf2+o;Uj|i3>n?akY9OLIZDA@ zmE%uIFtOrTR>0K@Wq=lwmT1>#Wp2=<2BU@!zvPeyG#9PgFHT)oUno8-J%%-8V(w9} zJM^!`tI!?iQ5AS4upRMs;2NjXM`3XNqwDo+9(H#{*RQAu&7a~Q1H{>Zh3hY7h!=}` za>m2mTyFd?J?5E;XYw1n>lrEd&rZnsN_wt%+Kk`yr-Y=N!%%dB`LyO?6}3#iI}9Kw zJpmP9Bg7wnI4Y(Q%$`#kZRa_nV=hfYIxLyL>6)OkwkA}%n1@aLE z>7$64tH2L)61GXeb6|n)?F${_ksT3@r`a?&?%|pl>?a9%f`Zi@#Ax@cFN~qWBxwDV5X|~r@^hVn>F*8TciRx-UxS^a_ssL;VPy7bUen?{#i!2yz!d+`cUNu zF$UFy&y`~$h597qvZOB0C6x!`hV`vmz{ej7J`Itlgz8U)1V+bRgF(!I1b4vu5v7(; zNx62l_O8`n*Ns12Yk=cii%PMMORc*R8&Nlf`#|b_itopHs?8xnaq7(*Lik{jfh^-$VPe%=AAsU zrsjASdbjr83|xDrOt)xBsU24w|iKTn*E z{`Bl0GN*)09mxp`{j+%L$wmHP;nCCB$a~n&Z=V0t*n2nKz!urJ)L!+_rK5o?cAfEf zQi%B=SfAwM&OWJnUHRAhfy2_Ze;qcHY*%sIxvJTd1E8s(bUOA41Jf7xzVZ?mJ@Uf1 zfSY->1}onU4<67xUxVA>4x+BUqoHeSu~nk%nM8oiC(*f*_p=m}v!7P0z)!wGjn3`j zM(L-`j+cMhF1g5G-+5NWn*n7qUejp{^iB9l!GeB0-23!16OYQLiL=@~aMLevx)7nh zm-AA&fpAWFmiRX1`9=FI?0D;2OJt}3x#_f+9v3FNGF;r8#$)ed3Jk<1=WzJ{tsk6a zy^d`B+N+4h-MkbIpqn*ee8)mh0M9_oQQjY)6_GXt z)eL|41S9`y`Xpw9g1dGG$>4z|3^oQ5H+m-kJFm5BQ0D3)>H`l`&p;RUV34ncv2>);OeK8x~BvT5;(r>|#w znat^;%jRz#F)^|H7-s%bOIdKzn0u&hyhYsCnC-0PBUoY9+geLJWZ2DzT z(YXQyor(xM@I5?i#Ch}elKw3G*GO*k8+z&glK=jZa`}x=|GBjozPq}&℘kE@rXb zwJQMo)GYB?U*fBPhl)dmglidURiB>jEqQ$>(c^LAQS;cwmh{~_YaQk8f!8B`N@~tl zT|Q!H;`)NGGL;RI6|>T$26r%(S|%5of&|>?M;cc2=Ykhlt$WAJ_Gfp2 zB$Z|)2;_uqhTW?hC*%x$TDbe3jiy-r(>K5J_11${_aqh1&gu8Tb~y;~fI9PgFQyW{ z=!Q(UA8#Hyrgr!A@0*GA!T3%I4U~4v%$mSUQc2Tnwwv_f+AukuRhfibhB)LTG zu08bG&KX`Z0Kiqd(9CuvAy^@?Ud2iJz_{@=*Y%E-dyl+0b4|kb=jN+2(O~y(&$WF< z(94cR9*&m|e3OL9B`j#)s+mmAFY869&ierR1G64XsQH+V4oEhLh}NDZ=V6xwt-sz)4@Kf-+OSpd@QK!7s*w$dAJ(B?%S*t$v2`1NmV!>+v5#CWBH?3>aW$x&V z7QDM&2kR>H(Lry8ADtucy2QSB?53TNXX|Rm?yN+qC%J8D`p0{{U_c&)Bx~EztiPDq zD^@LN6h-B!=X!qbH%uIRGnA=9iJhhj>uV#TmM7G{6o?fFyEKJN}w z!hAk*?k9(AYpC`Gtx;w-O>iip{FHfpjri4SjQJm?>3`dhtL{^$rq6~Wb=J?dH*{z=0w5%1s7d^cdA3=1s06^erbboCIzS<5Kn;w}QK?ce0i8fGL ze2vd^alkC`;by<7FnwgUqN)+++P4pe8Du6(BsbA|)}+9?oomWJfDK&+OIig1)sh0n zzmXr(j_&?Gl&?7j#0P7DqV6zojwTA|n-ZDF4l+PW0Laqsr9_r6Cn@}mmKoue#Vyo@ z#w-eiuuVJgX6$}SOHQ`s3=?-YwR93s=oxf^LfOHvx1TP3eCK+?o&I0?AP^_fLvQRx6uU|hH|Idx!4 z+E6u@&__X2I3RqAgySK2*ogTy6?^_`PU#lSK)ub^klo&fOBaBo7CszVAkOL=WET_Q z*$A*ulAz+uxX1mHRw_Y?SmIn=YjthJhvYL2wg)fh@c;=fL|0)V&+7M=y{o@=4>!VF zO$da(=z<9KpKbFbOM2tk&pA^q1}n;+L*0D2+Y83u5N7{*Ov%r;i5kb0@a<Uby?X91BLiof}B&bKh6qDi`i z3!T@rLplB9<|q7PgS$mWavE+E22V82fni0^6azlujI4T6ACb&oIO>r9SV$Hublx{=9GH5xog(GzT*EvrWD=@ z`^`olV!e(Yz4j%42CUzp(~I4#2$bS2c7BiXEoqK>P2E2+EmD%d?81b%2a@1M$N@p2 zxZUqC#cd+ePAoR`?)^d)FA&nlBO9f(E~o7!VI95AoD88&N^%4_%1Arpe*C0($tFZ` zPKfx>Ga;U}nUt=z1$*6WcP)M{J9v=t@2{Ib#`D^<28iwMer%64lY^A zYc;K^v@E5hq9LCGwTZrp3=O)FQkZJ`b0jzRbue2?L?`;6of-vXfAX&3Be#~9TFS5a zrWWHMoYzH&qQ(o;_IZgi?<-pY-S)5N4(@7q{aMTV8@qbbV#}l}xd?K6X!DNB44!?< zKFRQB0hIQbH}Y5Q@Q0T9yF{;OB!%-5A4Fn~+);I!ta#;HN!nB=9!Gkmx_xeFVm;S> z_~SZa-)1on`T}+g<>sjRrVABOH|FD#P*Hk`oGA;>DbWmQN@G?t67@EWCwO3bUKIBG!f2&hFiehDDa#UP=d$sGhy*3 z%aXkEz4=|Z{rR}}K^28kU=>GOoA{pZx3O+aZO^`Izk54YGT7z6E^HdN`gctFPlzY2 zm+lCWMMOlZI^x$%ZNR(IZ*GVr@_0&?%GQ2tu>N)W?R;<{<~oHb>_Y`z$i!( z?~D1H0akqpnY15tQXd;5k6k6^kb)Za@Byj0wmWCxCx{f!Zm2I8>VxM)Jm3gZ7~-2W zqMq_w+X-U|59q!fc=dLWDLgRC>1aCqpvMhq8~Bj~mNOl?pI~zLR8ClZ&1V$uMdr{e zx1QbgpcaM{cn^L^nV7AHoGf|;uH4FYEi-6(SJJ|S*4{B#ecIE`g!_EC=A&@=i|W3V zEK6vJ!zW)}A!WbeEV4`pKe^7%T;-Grk)NN!x$EO15bDV}wslsSFCW&+@wgr%ImThx ztV5h#%!h&sQ+zPAc}%D;_@OBXW5Hzgo5xZSsSLz%8mzLL1*&5~S)S)iVcw`4s9Dcv z6|P5v*tKGahO4eGx^GLFPK1-;;mc0peCT1lt4*O$R+mkylJmck(huWHw!&^Y)!f~LsPGgakX-uzS2H;zt=Jqd6BtY6@^{?>bf!-H zeo#OjFA+R2X&Dbv@%T;l2=0Awy#yE+>tjj4IcmK53o0~1X0lpU^aB#rG zqDh6+c33PaZ&4{vssakKhOKmay0_mG9Yxi*ir0=eq}AcIEv_$DUF$vIQqV`Db2wxJ zZ%n!0EOztn>if@s>ep#YE4AElVlovsnaaaHdNO-e@!myGEiV7|QFDCI`@6*Bc%fGf zG=K_^3i4$wYsXS8eMjNR>89`N#MAmYzlOyFtZrS~u;2G=JTv9ibp1%CjltGLXv6bZSj?MnT;LTb>o+hXAjNmv}89WR6@2;hCYbn^rvVpahWMZ677 zSX_y?jYx2{SA*LZy1EZPR9ZZ+$De&D(rM41+iQA0ZZTd>3DEWatE@tZ=4pR-9sM39K1lDHkLo(}(&`<9dnOjk7C($?S1_i(zDPS9g@vZuN4s!Xc*M{u+w#(+ z^=r7X9_n{O<)0bO4uAri`Pt~tPED7>JZDW>*rCH+zR2ktI)#1Pg-&|@qa2K~Pg6>) zd1|Lo>S(UK_JP=KQ5rVy1cMDxB7<~cXO&hUp#p?DJYd<{yOHHK{4ng9(^|JvUAL>s zpW8~xONaGr_H%7m80URK0Z{{9_6OASn|;!)Ta;#)yYbh2RFHdBX0NCUh7DzW-%mPw zHbJ*YBOeRc$F9OHD$T6StPE@pzMXpf)?w-otJ3B_ZyQ@4bn(G4(p0)FqP7(WwA^sIe?@<8kS#^w66@%?bO4b( zej<3z%;%Ar&%u7D!%p{U{$6i3R>Du)v0mjwGR8OE34gt7|~I0G>d3lj2>CwHZHp|B(~?k?pfuPp`$-!HtgW z$0sLfEW77T&uTy|l%MS{elqR1nDW&irR_s6ln-~ZXtm`6i$eRd>Jui;#B9|V$4weZBlf^x8;^g?1-Klqie){d(+(VVW;F7(Gtgs*Z3Ms=M%6``g#t%;(3CvX0WA z5$iFfUSSflFI4rPNhh&<&ODP1h}_44MCOJ)_v9n(f0xVl)+?p+wWU4aqUSuzE2h$Pg*q#A z9sg@69qPhkO)W)Cmp_PXTUJM%7x9s6?Th5Yb4 zrE$=q=drZs%mvNo$M4=Ti0n}Fn0)do#9cs1IRp|_a(#efxew|i9%n1A2O30!c+e6Ci={GgO357x|8gL%}?9h{(|a2fnRhiK^&vE z9zkOJUxJQ5|1cf}J7-D`hASl$j$fD#NVwq)F9~?@*lg)m>c;txPEMQ0npzU>o;wr} z75Svq>!}6~`FA!0091CMNMq4@LQnJq2p~UD9u|ugL&K!7Vscm{P6{g}g;SKm666s$ zoTNCGAc>Qd$4TKNMVDlyrKJ^Qq-7MO73F1QWaJfO6=W0?l@t{<)$}W-ey(?nW|BmOA!kCXO~{uKUa! zY^~k)Sv#3qQupc}wf#%M17hfj(uu_Qy2u>&QrPdH8{|xKvNLnFH+QnLad))#aDd%$g9pv&F>|GhE(R;eKtNnU!XQKqy6B@0Vu12Y$eJ&umH4lcINp8H&_9RqCb zJZ;^>oE+@k9qm2b9NnDUeViP8J?#BF+=AU*5Bhop`MJ2d`1^bM9}MvGJm41<6f@nO6V~=DqIX>n$LHy#q)s3Voq1ulf^2;|G+(JKosaHNL zzL1qs$;qg#$Sg03?W#O}rR#W8ci?QN?`+Sp+2%n0O7OR@-cMJfCY!9{Qu0$XIH{-4 zMjvB`#^&Um&OVh>nUP&m$jUuez^<#{HqS<&B8H_%--+EY8yS#`CyVWh8mxIh1FRcq_{rp}hufsW>@SDJ_VM|!&ZYuW~G z_Me>|?G%o(@7?TuyLzE-i)^JjPW`;T8=KW~27d^A4AM(zTiHzd+8@+NY^aF1NqU*Dg!#{QN0MY65wMxopU z>zyQAj9_?3z}hh0Qgh`VqBg1D?rXhkaF8~+YJWxZo|gHLu%#n6s$Bo?Q%Mp}zP|P5 zx#Xzcb1l#J)N~whF>{k?_q*uQVAcAh<%6u2L+hDCIw_=Ot(a?j2IT!amwQ7;ryJ%Y zOP-A-kIt;zb@Gxha01Jz*iUn~Q-3Ecb6$X91!!q?msB!cIAI$`j*m*$3L{wI#m!k9 zBQd8U1>GmEBR`vOmMQwC?x;S+B%U2Q)|Dl1qAAyqd~zvEQb~K6pOm+y^n~SJem-Qt z=EA6x?$%J!mH065$*#wrk}mDWzpnMe$mx3(m3(~XxL$tc(ea+wvCpp_`@T2-M5FGl zPrH_gWlu)n@P0Lvnp`92XoWq}3enAGYc~j(%BY92 zJ&Aq?T!J9Wp(x=4ttoRZhKiW8lUy?`o~TsPBpw{WC3$zJ#6( zmZ_BSbBsaw2fn-p)9`!Lpj9aqN~u&&^T~!)Ry}Eax0Io$As0|~e8TbZq1}5AtCT|5 z-c282evF;IV6W5|gf*HLF0;y9xs*MXiEY7lika)dF{(b9Px9Mp-sBPvUw!k^xuGOQ zGO6$0l21|F__WV4^PO~Gxn1i&{ZggZpS6Ti<3N@1{$6&0&rVSNFCI#Dh^(pLKhyqC zG02k<`R3dt?$r6JjDWg}vWIB8RiTSN-yA%j`fviKoQj%(DP?U1SBG&{Hff?hKB)#; z89DeOO6EB7-FU@WxJ158vC%_8I76r!^S%fj;?S zZQ4um)G90MLF?nnltrU4nV|L4BdrI6+ECBa1#xmq1M}vO{iVE;(i#p1L5p(_`Y&C4 zBWGK-_VlK0toy}=mrwGe&oh+?)!CRQLSAEuhE6a9hOI!b&lQ)=DVHgTK6uL6&Uq^k zkp|RFs+S}=MN{99+)W&Hz1n$0uTep&+e8^v@6EX@7w_}8M#uTv*SA)O{haJqo}bb` zb}KSH+%k5-|7q@h=)fDx8ozV35dFnDhZD?i8D^D>zURGvJCDi(bP~p9>q*&!_$S0^hmbdi^G1>#qlWYJ1eRU6VsBrQWtQu{NabFh|y9 z<<8S9+HS!;Lp`i z4nFzGa`V;w%O2+4AN69bME^bd)U`&Aly$^3j-mY&NHXyf?`*v#OT0Ly6<;0kY1Hx@ zIjCsu^-<$c4$5Q6i=cwaxJNuimyR-DP*MEuo|(dVnx93gTb|`;s;X2<&R6N=hq-6D zI=tQvFtZq3b96yeJiAn{exBxpIN7i6n>Os%t=gLBF79!^x`HZYlpL+Q8XJ0Yc>cUi zqE~9?iNZ$-s8);DF)nXjfQf$`5aVmN4Vj>~NI6p*DcL(lUwx!8%9Sc_ZqKRAe|RjW z1>Sqbza_JN_kG3wPi;ditdXX7wZx8(zf@~S+`&7I`B&I2>VG2RqyL(}J%bPOxUjKR zebd8hp8Z;VL-tqf|88O?o&29v!{Lb&GUtt$77rht^X<{D_*7ki z3PLu&sTQ(sntwM8LAJ@VW#@VSQDPi<-LjlHpN>@!p1D2gP4l^A_)UxZYy+d@Q8?qM zU`E|f+70`%N3CYYl-2;gJw@1)JJ{jn)_HxgLbKyB%q!7REJL@ac-rdD8)I0k&7-9o z^zSH`Hr9+S@(1YXS2Gq`?D(|m;5YI#QSN3HC`8=A1A% zX)H;2+6;em|30;kIMLb}-K|Cb;MqdUayR&Qkhr99sB2(eqL9?(w)} zrE`tZAs`Rm%5zGGCq>ZRtw?Z2tLncxdq1FPNT>>~_ceu{hx5c|OSOeY?(wTZS__%G zwQ1ZOu;|O_2$Fusbny+ix ztta5sl8h#g4b8~L(Ad~h9~=&A$vt)V(438`Oneh@p@tl{2P~0JhH645r62K}2NN3+ z)}Mj<>~T>2-GbIo(mf(q`Gvv>Z`d@&mP=MlcbB(HBh(#gOWxMAf zQWZk*IvarE=z!kxOT}D~L+Q7Pnbm13?g(k_v!p9wDq)*tNb|V5OTyPcsW&@L;UO)0 zude>v8FXVoDrsM$+H~o*iL+_hfaiFve#=f)^HgTj5!*C(ZQaIYmm_@@g|=4Oh7Dv} z%2!+_H^=%F5n)nohjQWlM=xcpN+3@yi}UD6X#lK71go^!;gVx=l$J>_f{Q39UVrEX zj>LG55dw(*F|Y&|fNBEUeGbIWI19>4cLqcwbV96ld0H>j4io5cMmmZQmb9Q|OI-a0 zfzR0+TrJ}mI;!kHk#4UzNIP!#eLt@2Kg=lfN{`2}Cy2d51>d(CFC|S3k9ivJTYc4Dv1rT=AQhN9P>IA2p^(H4W+?^XWrccGV{{k=cQ-;U~j!;$1SJPXVSjN z{Ue&F0#vp*17t%*|9+f$>__Sv4Uic+a#Wzs{8%#z z#DEVjm5@JO1@KZ;2$L$+5?M;%Py=%XiiD{f0~SsR2&WVuAi{%b!CJhN8vm!M7vH-b8MDMi9v~ZE;S@@z z9S1iR?&lG+w;Bm2-6b@@yjL8F`Zvm?LX&(oZ-oIhw?FWOZpPSnOq-^uP?B|}%uNJ-WOD)gBHUVh?geyp$->k{m{^!_U?0p_W zhJFT3PnROg9h8pXJMcgl2uJFh5Y~pjhT=iwCOHIWA)A%S&EizB+%U?*65Y(h zyqm)fQQ#4qu%ld<2!vo>Q||Nx6C(gA@t^}VEQjLp9!AXk(jxa*t||Z;s6f*ER(Mkl zG^Pdee%;_5i7%71P0I0?eJFqI3Z#Abd@ZG5Y7SGeSM1Y!^mS@M01X5eGGGAs^H6E@ zWA>NF94x)&j3qBUjlq@g$PccTsI4x3(k_u+S~$YZeb3v9;qG*R9BF7mO|QaOZ^Me1 z2@MbzAxk%7by*i?Opc*{0CF_2QN}L3o(|`6u^$Dva)AgR1aq5fE6YO#3ei_77*}d{ z6QkRWD=V?q?PFq=r9xmF4|CoOc^%=EmrZ*2t@Y{8oX_g29c^)9Mu;=cu%qORDPj4s zkilaL*>yD7{?4@bqGx1G9v06mj!C|l*?A$gQ#!l)H{qY(%e7~9su8YvCxW<=3f%v# z^Zz$oQmGA8>mcVufps$H8Nau@t~W6n0j7d*{HA3c5R#LY9i4u<8YZIB@P;uP0^BJE z>@bC}4*_@EJZoKu+Lr-S#lvn{V%*4fdwc)Nd|2nC4^&w-+Y*zemp-;p5AJcim1qqX+p z4ys!64v&i(E?ue4d&kSP&6iUQJn`7+^k)9gzJ5OLJ>~q#Z-0B0!CdI@er|a+Q1SCk!2ko+aS<9$wsVQs zO7qdbej=*xZN zm_Z*J=+tN_R8YdY3rI46PZ-eFHiN_;5y1d-Dd~_n#_or-Q>5z!){X2sJ}18&rpVom zAKuMS;%0pSOsJ?={9f!T_89}aOdWYA%6pNqOfsM_ck}E7Kw3qOgbLc;B?1|0v=&usH* zKZgd1YY@?bv=@>1zWmeA@&+=do{Zs<(a{Vzd8H@06P23D{&lDNOj>pA2S5f7+0X>? z9^cfZLcqKN1v1o{-ULHc=V{k~H`3S#^Epp|e>}LGGq;$c?5R8xh9An)orz07BSArw zi}KM7tb*>tg4vlTf|+>$^=ca^EF(!%cx)|@!6sOS21D^WIs_aAkaaFM{&p?6!l;XwI~O$9#-rXyjvJHZmPYTdN{PucqCqUl;G%<7x_GDq z6@svVihStJaqSH4(v!#;i0jP{%e9ickz%Wv^kEM0?g1z^RT>Y%(awldz##zY9{%CN zoItvvcY%z}V?e}-5C@UaoJKOI-QUYSxxEY+Fz(Be;d^FK{d`QbAo{>JHM_4cBmI!} z4NN;1_;RVH$)UcfD;z-P z@Fq8`#IEx&q7dyE_10;usoFF1I$zX9JfFnXq zZX!#`5IF(7jf}lVL;z^ucz0EiJTvuoDLM~0y|$S8F>$e0f;n-jY?hp-_d6sVxIO~e@rI28pl5JlCfVDbvHZu3g8 zFeXMMi~AFJfCwAsPY&u|Xr^I}R$*@=turQ8jX4I>g~Y4Br#{(3LZ6JB4NFcVhKEVL zt?~2Mkn~<`)(-K6H!{$>{vJ2RMe%t)Zj=E(Kz3W{g9TIJqr`tJqT#A!;74&s*j`9w zH^`FLagTzKBLh{J5c2>wn+`fOCpJ&UwoyQ-!Bv^jRjDKI&#Z9qJmB+S6`Fn_t$if# zA|R)*@+#rJIWNkXPDlO#;0%xz4bd{UB4L9qXF%lmAa?*&XN42Rxo1T(c;3n!1Hs5o z`ir;LFoTNLuYbBMQey`jDbITo;Q@S6rxvB31$Sj2L%lrIrZTo0zS2C8!*{~F5ORk zOhvoXL1#$Fc_J<`58I&pK7;c9q-gss7{n97!Ibi9GG>a8dTL>+Hl^|I%$_7XERg=X zIR<_?6f4>R-Ia4%JS%rmBPXxrt~~tU_|-S(e=&xf@Q+TIi-R19XB{p!b{!Tq2?~~a z_!zl=aqq}W!8|n+yZHm$o{hK+R|cr!5C9itmiUl1F5(0pmBE8#?0!ydqrQUkK^cP7 zR?#|c6MB!1cEAHa0znN`5TP^JoEv3A?t5QfV~mG}3gF39KRfCNODe*V+$Tkch*Arg zq+YGR{(0XG0$!-E3>=@UEt53e?;?hk5wjU)gyk#S6|rrI6Ao09fx_0G~IBO(zNVdTia4EkU!ExKHe1hhDC}SPSSa|<%p@W zJhT?{T2)K8N;SD43+BG2E1_mzvcW&_3XYW~>F~J32|Mse+NZUZoFDhklT#;`VOh5d zX%k^r3FaHG`7iYt_74@zXDf9>J}wR1t<)&2);~0wwR^kIsi@khC)4$L#Les+Fl-N! zM4poEcWHy_OPF4IrLPPZw(?*If6?=r=q*yZIkb0elUN^c^EqZBbLWQr-c zkyVv{#zZM4T0Md*a@#F$V9PyjQ?e^Vtbjrj(-#p^AyK_xEzRK>?Tf0(Pefs=P;GR% zXVmSNP=}V|ni!4nwd_t}ZvD}&?gC+)UhlMyJMzkzAYHc1lY*{0!{2}E@~wAY_FXm@ z63cNXPl_F#HFTb5oja`AoY0W+=;3sMVHTiEsdH5gi{L1C0&}&-Efx+7?5t&X{&7Hl?0bUHsOgq6O2}H;W6RrfEa#k3n+3Y z-xfP@*XWqR*QiJL8G6QEXLcXvo;x(R-L!5gpf{sz9w;5HIJ~sFCWcWZ_yx=J$WDs( zDRUyb_)H@lubfU$EiJ9p_QJ>Ybeu%`SfL-=Ctf`>Sfc`8CO(UAYrJXy3~EF{JmXtN z;$wRrI!i)A;_orxXHV;AD-mnpicS|QpQ^>UsZ5usm2+tkyN9cP4RAnb`ON|;? z&2%7p2}gQ?_9RcRG($f9=|JkesXtnxmDC58K62*8^i2~BvWi4(%ZjQC=TF11%cVhk z5wV~+W{actsDP<|_R>{jzD?Yqdr?i|_p?#Px1`mI9>QyGRE8Zks_qM(B{&os_GH@2 zHf`qK2g0SUGoFZw@H8e^jG=)ef~5B}7P}#$R7oupR10$X%mvw_U_C(9ah>kKbSsQ@^lvfLDqOwgQCLOO^jCuqru3;iC4u=j^Sr#J9IX(w)u1DL-`O|x(Q|{9#HEsK93=IqV2aBt zSvNY4I*M<}57W>MdANMj{HCo|1I$e|zVJ4Q$ZYM|h-_TL9N92#vu1?3(`swNMyYKT zIyW)lz_cE5N;9yc{apCwzjE&vSpOEQNVHRUx;`hs;j&*rGSxZSTbsKay9gpAX`6>1 zP*EDToq+zW*C?j?uq;9^48kHLnAFpmHYw0V2jPtA21T2)-X^^|Dv+v|kc_qo1OB^B zf)D`EQV~t<0a}jW5N?$a*-j9fEm*5=#=oAaH$X79?_ zy<)kVsR>AH4`s$Ehc%rmU=m^!Xs!TeP`e3r&@^^gQj`oFPVViMZanJsbK=L6aVy$| z2s{66Z71B^%{Z(wDwQxFaN_2jOFGVC5%>?a%l-MDMd2oUB|M8L5RS)2uUtbqj2QUH z0%1czri+q&JT4QXry_qb9&TDMQ7jJ5hB1+HW{yKayXYk1G3BypF#{po#ViNm#hEkp zpo1h+&MCE}1Fb3CfztYLK)m;+}nnSJ&OV{tGN^0 zFzKTjUHSK^&-7aQF-`Ig9t-_#)wp@H8};H)O(FyEpn&9O?3UUk3N)mnQTM9r6fJ0j zBZ(ANP9*esdRBzI4FfeAwm2Z8kptRIIF&d{oPfB|5C>T;@+cjkSAhXQZE#>T7i4LO za89UL32p1b6Xuywmqc@LSm{t<{?@3DF*9!Nuk2_1UnIKgl(EG?{*?MZjy znH_kv2dM*=Sf0{%Va~txsC9YQ;C8bg@27e4!P&h*g_;NWeen9C$j~Pa+7k=!$=aGr z{ewAjXeIWgM~#?N;*@;)Lz7>gY+2Wbvm%AycO$Nfr+3|~ycbaf85Yg9n!NME-ZD^p zTJJnz`v1JQ%Jr(s3;1cc;I)KuC_~~e)Q$QHjgkmx7|1)O@u+KHi58uBEI!^DYQW8* zSWJc9hfc#bAt0cAcOwXo7eMzsEMmv4ggyh*HmsA2O%Q6b6YIHVB$m~Y?aA+b{Or1e zK47I)0EPLSZ&$F6YVt@G3(kjs(JrW{HKDV#$|8T;oUajH8X>pgBN`3r#50pVi*A zz^q(5#J~3RD{Q%d^b`a4-yBDBnfubAHUg&jdP|WGLz0rMN6zsWZZH`JuUbO9H=&9n zVAC0v5hWXXJRi9Y*ba-bDVDORf=l>426V6%;cVfGX)iw8CHt~66{CQ^fF_xtIahu= z4Cs~BTqlT4aYlW_axA%4hI~i}1k!P(&|oIp2*^F3uOZ4`9RhNVm&d!Ut;JGco|J4! zGO$!#>Qyj?AUR7j?BXn-Qdfa72+Dq#VZLb-M+NT@tMcU_NsPv*878KhNr2>lsNe)O zgo6e2V2o_EfFZe@GtkJ+eQzXBmIY|~sF4y*)NC88A25jDEvN6?gb{cBc50(-z&>UqX z7uY<}|CgQPR#U&zsOT27e1Fy7x6D)o|FaVs>{#54y?apO| zVn?BT=#>kHktRoYxd%uiVT$ZvAGBCHCwx-tR-<@CQ6XB8Q`$~E`ofOOVTlc!m+~QE zGmr!aMa;}ZERC(qt2ijEJAM@)e63v`g~!b>5kyvUkhF4ocDH<<<(xGH&p4gXbh>`l z@~%knT1-qPvAQlD+PwDkt}Wj+FKpXZmuk8zYG7Lr!{#66n()EmcqWd|$Qx!z@v|#O zvvt#RZKqiu#;RT@NSlP~qqiBh?YRd9;9gp`jO(0*?l`E;N*+mKl3l!N z>k8I09M3CAHY`+s)b4bbtf9siN>e*qvl-V&t=^8U|BD;F!UA*C0OB`^1$hP5QKoQA^H8^!Y;d3+0Ao$3b$WnzJK@YiB$E{&@X=UnTul zb*=*ts~L@N3Em;i5Ub_vb5|I8yrSN0KUjT{XN-Xe9Q~83KYX+}Jo)x85qglsk{~gD z5ddp0%PJi!Ph{<{2C1RJwxnzbtyr83QRM&#!cF;M5Q2k2P?{x(Oho~sO3fMP(FYJ@ z(beMTLP1NbiU&C?YXbPAP!>@gg3B_NWq_T@FtO~46iUOP^qC+$d&a`kX`b<;ntlAw z)2u%}9LF385^y>N<6M0dm<|hCeoPa>&RNK#89-GxVAP%)vd-*Tw%pa{oL@I>*$7_d z)jFzf5>(c2UznOEaDzUr`l4qfjpi;>B1h|7AB3WV9X!3Q=fH+MNKZ9L$^wLO$u_mL z>{IRHnn2X?AP7nNW`?!4;@t2_po+zy>Vv?7TRUV1mU2_%sD}_hBPSUpYn0+REI5XN z#52$UFhaELzV@6}rd;LdJaTX7{z(RUxKv)Cki5+1E-TrmgB6!?^{0U<;r=2%yiz7=@S-%2&4AD3~?y- znXh|dsVV?FdKm8aJH)G35;WvzjvF790kv1MT81-VvvPP5bFC%}LZoRLBHov$fNe64za z07kn6rs->n7A5q_TVX&J6tF+P^3PNVl*7>IT!90OpLamw;QVB&wAVO1h6}YJWT)#^ z#Q%AIqsD*FaE{2ut+xuJEl(PrgxOgFYV9oD4{&{!i_}ecQahYV0B~n=i#@`0XCAGr z`V@@4c=>)cUODF=9qoFw%+i3gsUZq5JjJh;JXM7r7 zao7idW9nhtb=cg(P~lj`flFQ*&b`nHx22Gg2Br<+Qn36?uu5J$KbJxSOA?q!!Tu3i zy}dTbkjwJxU`v$*2IHkA_#jX7D~D1kC%2IlKA<~n8$@6fLfF!6t4GI&^Y$?`H(G|P zfwgY75iMJFJ6o>I^uKsVbBd$|9>YFus0EVWY&?|j**S1V-iY*eyBFf4>TMWv!(WUH zczBPFkB(Q1RALZSk$ZiHDY-lYA-!U)Jko1@eM7~)PA10cJmXU?z>!`5-^sPC%os%V zIw}Z=s8GIh9zV_SBQAnwbR9nlazv;rY z??nMEW;P_K!3;==2$Cd$CZD_5b=-TBRa|^Nm{~H0S`WQD81p-5eYpI{t=sK7HNYq@ z#`3&4=u&Wm$wy{euv6-qJcCsT<)-lPvgrIoiZGTA_qCW(CJl|fe~u;kYxm^JTus8PFikDPW5^ zx%1Z9F16}AaQVN6nS^vuzJGk9+FN}i@7+(BuB`#k{7bNh_mQt)8zR(TnWhuA}o15V&w_%$CR_YgwT;gvR#=MuTxLl0PE9$dKC3iJJQ|+z8jcu$6x3#gMP5=v95g zKXjoYp=yC{i2u5~l6`)&iWTYjUuMgX!;)Hdg%^q>e>sEX+8H)`qesqncJ6L1|38Y( z#jnNw|Kr!ST|2L}L#=bJt%FKs=^z!Zt)oh&k|?=4NC+#bB<^c#od_%GB-s*@Qz_)s zy-kwtP~?`xy$W}TJ8oz0zQ6q*kNpALbzS>hpU?aKem$Q6N3ynk)93J0V>j@NYYGIe zU2G(?su}A&$3A1cIc)Yo_}5RJ$!G1IPvneoDayV7JM@?Ti35b*)q@dcXV*0Di)oa{ zG`0V59qwfR_M9gv<>ku6<43UXpp|KOZErW2beL zh{$?-0@t@@6wM{DN`QY}M$JX!@!w>Br`>a3wsp2%?=+nEwdkKExS#Fc8p1wGJ5p(W zkDJtjWvAyYi+Ss`b88=N^y^xiz-z+=W@_;Cy4Tf*M}|)1LZ)eb=ju}a$dXp=O_5GM z*>Qn}=5z8V{Znr;9_H>jnH%%J+<=~}m|2r`YtGJmAHYH-Ra>xyPx!m~&2}kt840sx zYya{crT~mfvel-=n+7rj=8xCBDs=8l#OZpSpY@)+^nPFEQjg)sZxYTPss-{Ze=Jd- zdKF(DG*p@~`>Qf<+x`!x|FnVMSMN13H5;)rD_wK((y}svzC=*=Guu`2x&Qc=>-dGR z+zkE1`Udc;t(xDD`Y%f9*-$Spdhx5~KrfL@!tuzJI!q|VQ(a1!iTqLugYDuF9imj@ zNrvnYor41GB|?^EzsMJ`0vv4R;Y}Ws0#?$B4Tn#-9PvsY&U`-f>fhOywmi%JVH>=q z?riY(tln7F80Nf6lkkpz`JMTd`r$MGQD?YwxF?E?eU%h0y+-7rm1!D>rD}i`)78h9 z0JsDS$2wOiZ!YHAhj*lBXm6E{bw@mqIF5hIVDUwJELj(lZ0XI3bD?;3CUhX+$yCfjm4;z#UDa3Yn7>K;8em%=@-lVHQ2GCNb z1Kfwg+c^&7B5s)#k(3e`9t7#n_iZ-~y@m!> z(HpMlv{Z-O*T=1Ll-b2530bTnlXKEU`_0#1e|fd##`X_!oULz`-=Qz)7x>eSqhU=Z zfhm{_{LiZCXwbjwPO9~EwCbZEgSR}vtPaCbos{W~cg)@{*CUfTN=!|i(vm;HuZ~(ul_)HBs*6vE_+^8`w63<%?DwGgzr@_TKDl z#_%r}c|P!Txv(S)kU>WBI1t+PNJii+C1=-gR@?S$F^@1Bj7&mVAt`Cnu*K4UMod#<+(JG{aWVB(7G?9X$FOBhdV*Bvn9$t!DC?XWXw*!ODP(Z<5Ix{ZfV)=t-|Zr47o zKc2JvZ*GzvhG^xdQ~ymi{&jW&Vml9%=Z~1}G3mp(^{ok89;bgduS=_QpIx6-vE<{O zt>!a@1iT-!0G2a(cWi7GkW}?V&MDy3Cx1I6gnYWylm zhKO^lS4$+q_>iF_%H6acCiAmHrLt*p(a4ePc^6Dbf)=2aFSQEE*qZ>(&@B3Ym>0a5 z|GO$S+2qFbE!Uz7#&?5t=1qEItPvk)SU(wn_r@S|1&%QC5-@nJrksJTS60B#eEncu zuO~=O5t}EH%Q#f`^RrD~hwLq&SMC?fZ!dX8_G+#$NR!}Aer@u=+n8?hnt#|ei;;94 zY&>kAeRc2hGZDKL0h|y-#!7>6eLDcZiCUKPZaMs6iCJG(CA-L0lY3YvKQS+(Xb!!D zo{A*T_Ilal9E;)Qtpr1Ou*{3B?;*gecCT)n6|*@Qc&thCGZ4t^jFQSpc`4#^<({hk zxmMSPZ_WI=Y`iHp+5DiX=p8JPg7uL>G|Cev3jn9e*3DG%y+Pw|`aEW|4z&3Okf$$H zC+ru}0@6f@BP+sp+)F>5@ALsO%SXu3q9@Y+th_((=-)FC}*jr9EM1k$B_GG5(mwU?}yJ3V1l(u>r|HbQ!-#cVV)4f{S-uXul3i2B*e5p^!~cyvL-@=&mZOR6 zdx{>f?wUVpCYV^l*G}w-mQur)fYFXaE5<+i*eSyGpcTLX zaU*&uuuR~PW}_avEOVoqlq?Y?d)-Nx5ea0h`V@Iz^y@S8r-fCiA?AbVHm_&#OSgoq z^bu5Ut2a8A7tV}P(9 z{6MfndDIXGYlpN%_lOVy^+YUa)I3FT1fu=Pl^f36gb*{MK1vUHbIi>iC^U?QyPMT$@zbWD`MEC*(l1Rz&z0{f zU4sJ7t8RjyB^Pfu(~3|+F_7i#)@JeS9uykX`I27Re|spm8lgyDMx_lko`fekNV#&|LwvLjfO|p0BKRm-&osW*OfbYs z*LIth8Wag%-GI_x|`6|LsjkfKk(pf{k;=ZXZ(?&JnnYIev+F zvyvG(Y>i1toexj3zF|aIVVTntfuFL~&^-p+G`Dp`cLxnUPm;y2zf5x1fvmh%g0%;a z8kQzAO}}_B!LHfF zA5mJk_NyF&4`@yM5u%#|NDA7Tlx-~r2+g_h?6^Z;#pA;y2|%lt6wXPSazEi*P_6M)xKRzp7}i`e&@iVl4qe0WmhQSMET2*6v=WVV|A=+J z#(EO8o|3{Zggi(D_oF;N5f8!X_1${pjBTZi?|IXuM$6oIx8`_(6fIav{=Q1J%xDR=Ju@{hBp*!4^W};i*Gswo6aHtmgWGZjUI1gVJ-g09T=LD3jSMgoZU3 zWmrvCue{>`oJNhdQo8Z!h*3HYxQcVp8T!_Xh8=}h9Y9ndJ#bev_qt5D0R8+z<46|S z2Iw>UB1c5)7@~Eo5xI?s9O)_-`k0Hi|BSnLf%~Fk?mDVx%!G$&h^IN(D`BGG+X4R4 zYA@JizDJ1Xl!gQV4Z_=Nu3H~*Dc0ivuilM->P_%n_ulp+nTXm+h!}>;Jn}(~19;7W zZ_*1Ud4m?rqaPeOUCy%(rS}-90rhpjpth&lp-fv@XPwCj9XG50Ff@Wm1$Xc68sr3}-m}U?|k=c6R^i03$<-nWo?axz~PT4UbU4?Ok{!u`p zZ&|OaQV$E47>)u-g<<<^R30T7Zu6MXx~ZHwYa+a7BD~c7Kecq-o%JO}fsI(;`!VZ# zpy~CZ;4aDn@W^Sa#+D5r-WkUptH-{4&OWybaFa|<8sF{{9LSLcuZmD~{i zTZsC8I%b4vmo@U+?~1qrVWcbQ`~~1nVR)Z)Bdc`qC;nWj3`S(uWD%?vDZda}z0g>u zYpj?;lQhsU3uH~HUwj5Qp8y73NS^}!-Gz(=s>$&K_y^@x=?;rj0!Yqez;LXZ`AgwYEnqNvRsQyd^n$pTMcfmZIV^?x>Jgqh5YQ z=sGO|_`+qSY6D%ZP#I9Ff^xi82&0i~TOh|zO+XPNbkCeDoYMoGLy8d37A}v}3@QQMLEBuj2SXGf1ym2MkzXx;8t8%T)ZN;nrCtH+d^0qj z;rJ36mRX4~1~j(Aq8>$!)^Q+twHSBm`FX}8<#KiXE@TFe*e5itgod52_}O0&A&_aE zhrX5cYUPHjiAhZGpkGgl;MBT~SJFF7b$;oFra4rYKC6ym)lG!!DAl+lyq1xzPK_;; z9(I;SPqMcA1UT>0I15hN$AXtor0_z6P64dn(n60ejk{89a7KMBz8i=I*fo4rwpP%o z)er6CoSY7PR^z@P%R*s-x%0Bp76d8&!`h&w%Hpa4lodRf6-U?c0GTy{Dk+G@KtON)YH(%ROtvfZwOao|olllvgGmS6>(peHIoo?~fxtk*4?#r0^ z-@cgTnLI8NWY>c(-?RcT%#H=FJPO9Ku#jojfA+{j1bRdfkU*aMz*`--1ljy@)?Ie@ z=5qNdr+9Em?Hz(=#Uc>-?w}LtqU)$wGnxF}`oeDDLQ6t|_q(s-W{}?_@^l?BZSytW z+Y2IqVJY*l)Uut;b>@_nj!nq|+pdt(e;)y!n<<;~N+BhJd-!S-#GOTC!5eFeKJ;*g z?zC7n@+(}%a8+Y75jYbJ@IPZav0r8?QHNF4G`xSod?B<&@V3*4wcw^l0G@|vY;>Wv z-D*6b#xwgsiF(6!oV~`Vs-RMm?A+Pv`|KLS&A^RL4)! z8TT5IZP{w~5UnlZZ*5u;=-7>qMV37nW*&QHkK*3WLk*pUW!q(Qe`!Ca2q~SUy(_Bj zMFgjuZGS%$obxF-jC_8&!eT$Yn@o4%!~zUN>mdsH0)`(X`?5etdmmYkX zOCd-y@o5(fKJ$WcxKM~(g>f}R0d4l{;B0F*4X!+xM{J94Fgd7FOPIFNuj=3K1`m{q_yMRTScp4WR3{ly&s#Z*=lI14T&>; zV&6l)`^TMKZ!6B7U3J4gQ0qah{Kvj{Z&kK50?*G{_O<2n*_I3;f8?!r>sNOE0B1-D z2WZL75A}0W9J$P;UTZs`Aq!TxuG238Qb)3uoeo^4h}@oQ;qU@pmL}i*&rClgE(1tN zei~0=Uct{^o{n*LB?+wrj9P5_$1jJIjv!D+}Cng@*b1eXG6=B;^|6JX*Du z%;sI6)s~~Io^_V4V*n4n5LuOKVO=yXh-g~4zw(!Qm9*!|#tWU@S-6rg>aZ|k;$4vF zDMr` z?k_Yi;r$Q zJ0HR2t~~em)gV>?UjFzlu6pL*%wUrR{+@td5wG=No}hGl|HB054B_~nbhbk>_fe5a zgPLBiZV)E(V3A9!%w<^QG9=4bs2{ZK+cI%Y$oDr2>(CsFY2vr#ty_Pk@)_Hm7E4hkXfeJ(_yeSTJK}q*kI;(| zYr^lKaz=pGu@l_=Q{%{#J#vs*hxo&lvRR!`-z9Oh;rSmbHD-bX&DTV8L(sXC+PO+A zLV}Qsv@K{4Esiqt``71uRL0o@xn~cX=I*=C9V09K)L`rV+L@OwxdqluZtc5;?jZ$j zf6Ll@@vi8M#;#1dKUqlD;arVG{u5fyArY^6;jrvy$77jwx-|Bq`rG)eEq~$oo&4;B zF4sJQBquv2kp0YhW7&<4+Vhdr`W-lY(&5FR=`WAJch21K$|OYjaTL2>`$~_Uo@dgp z_PG)j^?gIs4daGe>OBc+rs+ifT&e9S%Q0O|2#`5e;tUnJbkXs8 zY1}o>+1F(s0o`JQO5@RUV90%3jj4!EeK;C3Yr8jI(pmhoc$d-n&~ICeXMC=jb@v$o zKSMnArTEM-(Sp-Y!F;tzyliwW4hgR)`r}W#_y;X3<^A-sQ25Uy+v8-8n{KviH0*P8(0MRUs2G%G+JQ0G8P{mVF`t0gH&RhhZ|M4#-`4f?0DcY`M zz}}m6*iUfDV+L4Z@ABf36$$nLz%i^O?6P_+G&(HltQ-5ZG%OltGIK7hI`hJvnD!;g($cz(NfRdy=Tet<^UDOH=2^Qtr9|^2aj4^| z#<~{JsP*cey{_jz?%7-0Fg$+xd|k;Y=Zhr)v^~5%s2+W{eNsrYOsx?TEwNyEwFj?%;}qzzH}J8fU{IiNeSV-H!B@Ry>Aus-V+`NXR+|Hhf2oHJeTxPFyg{e{Ac<<86hID(9Xc?Q zWhYX6@8e|uK8FZaVS#PGgU#QA{k{B49Os&|z^Y<<1`fcI5#0NlAdlsL<+mHX33CaI zikghrlUT${7btONnXx<>04tQ~n$TJz%gugp9I&)En*eb7te=eiMJ$AdrPS)9Mw22m6m8%9x#*tOjEI``9D}omV~_EM&|00{3V;Tr4(;bz>)Z@ zW!E*u#&OXbTXt}!=MYY3c6@NMzT8>vJX>ZNd8dY)joV|Vlr%oAdhgv-xjKTku41+A zm0LgJB2*`8iBqtxR<}=G)lDt|cD*YNmQkk5H1-Imf|8Vf9 zy0a6DYyp=vdnf5phA5|4VJ!4jRc z7=v%Z8(CDuNNaX?*k45!OG&Y6gF9qGzZa7dSt6vb5_I$QGbK@@_i@~k@XeM?{Ok@5 zF{~NSD*}KME@wAfxiWOz;o^}|`pFIY3S>rBdqHTpjd@Ouzk5x_KGy&paJ3de?KAc* zA#Yyz$7?JP7SDI~PF{JVWj!-+K;JTOh0f>-$0UWcuFp=ASha>6gg`#(yJka~;0#%r zghPhMjZ7afUw3~~@td~<1Tg!=d&aMtUPm1rYF1Lw0GnNBhjM&JWSoQ_*I>ue6H?3A z_>xvXewN1YvVh>o$S4WXizG0h8&=ej7DD3;sdu2eJxF<5Dl$6RdkfjJ+#ov|2kA4Z z1WN&6Wyv=?;Y~1dRgi+5rA$Q)1UoYeQ|pr`MD<;j5ffVCannGSMs##`D**DTOP8LG z_*u8X;XTyT+C$kd*R$s1TDP0SZMT55&U#V+eB>Yil0zcFyDV(Wn=2aA@q)d{X77(i zlci+QKtb52sEg2#*?-|y(gK)c(ELszYZ|>(6a+vSXIAiMBETT~%JQ;QsJsOG$gw$- zu_j0*rH$UIg!CZh43M0e{#K@1vnGc^k1XDv<=D823QKRC3x(Oc5GULl(B8;H@%%H3bl@ z;sNSi!7XNNPX{RkA)PS24Oa}7`9UJm)g{h3*6Lx4e}>c2WM%Yu8%U9(GQ6~t$e@cO zGyuk&Km#jy)8M8-1IP+f`_hVO2~%%MTi!{Hi^%1>XWS)^h6!_0GtOxCfwaxf3#>!1 zlA^v3_>0s4mrI9uE@o1Q$) zj71XXF{cQ>et>hX{Xw9&LQyq*GJXI>=b^E=pfA?#oQh1~TWIU{1S5 zCyph5n_rh<+`TuAhpRh=Kq0yX1MjYoT{UU1y?q*OOnDB$ts28JS!Kn8w@qy3`L6iB zW3Fi`ipP%E;l;%>JmD*klke&>-(nIVD zUA8PIr?n6@3VNzHxS-z=1L)K8Uv>rN%Z}UxsvbI!1P@!6 z-GsB7Yc&Ch!6KU~jbk{gbc3_dFxVa#BtHDMx>DUY%eyqs9Q((~V%1Ia!`CvK5d zW)-U4d?84lyU-TFnW8u*`i~{*+u|P;#GH0Oq8cU^WKFO>qLfgT5tL{StJo(B0V)A! z598h=&k;3X2{<9jjxX=}B9z>0sY!+6(=Uoa49HCZO%oES0Wmq9rq6{p6&gHtV3T6g$zWTBj%$`HFKRBW&5vhTHAme zz?IVy`Y2!0{h4hK{tWv$9QLzF>B87RQ7c6NxIR_N6|jusRUE-g8wXL*_e3yX8ImH- z>S=AHF9`D&LYu@Gf>dHv*~ZL*7zwiw3Br4!E}<&3Pimm5prgzNm!dRcj2TsE3{qN$ z%>mF{rg2nJ>gvD-Dy*=)n_?JCu3@FcT*wTYn&C>+lQ- zbowR;AhlU|TrObvUPpn(PBjY+Atf;2?{Xjr&rdx2<{n!K3oh71?f#Gk75J*1$OQ`5 z?-kRF0Pn11=$DT53m{peWbznGjx97*ipe)==cHDH1)bw=)|(ul%(0{73%a0X#+?FK z*shCC%vgP1hx;2|A*9u6%#>1^6HLrJ5M$rRucmdW*Tp|)UhNAU2-)ILGVU-+I397q zFbY|lniSlrwo7<(ocUpa=Vn&y2?$|B7Q)gj!n20!DPMh z8R8kb0!t+VftU==q7_n=_XL%=DVhIPCE%o7G>5S?NY4`h@B$b|#h}QdMZr3vwGxt` zhD~E!2^_;mi5p9bM7s7*6a$~F^yv&^^dIAfX=79hv+m(fz+=;_S^t&lGI=I7s?Z~2 z3;|{vuj@<)P4dty6-@@WD&y%v+e76q7F}?Nl@rA+-+w9gi!<%7YlAz`1L(NjCMTa- z^^I}d$xDW_e^!#NB22P|sZi6!LX&)+)kvaMwv^rv7^Q(mOVtM9LcRA-Xc#JFTaU5$ zprHq*|9+&QU`@T^FthbApy>SZEtA>LR`~x{8K!Ju7LAH(v z>p~gGnvzWr15kCA&`NYbCJF@r$$4rTtRm}k0zo=_9!laXX?mHq1fykTL0>RRst%e2 z*d-{*e$1$eXOgGH_iiAyOto*(;q!;^Ey}g0luK5D-2GX`Wzpzbp>YfP*(jTO;mqfT zNwaj7(22L!^JEO5;%?z(b+60n4~a)?4RSdSyE%bFl|(U0&{M*KD`?~ogV`zfwT%zR zJuV(D_GW{uYLt9t8@UoARiO-=lsUr(DZPNPSjrvI%o@>{Z*=5jCvqz9G0bh5RT%9H zqM!IBWx~>_B+@U8IjLmJ$c%K5orN^ElsdqDk3sZOl0NaT2*+2+p{f*O z2g?41lBg?P`^tz zgH=$dGK`kUmI$rLW3+l6PcjA#;Bk}feM85BkEeZ(KCt98443MkpQ?~V6}2vl8ncy( zb$oW!Wd1Li?kb8~_wSZZK=QV}Rp0xs1ayBBH3i7WZI(v_CSZiR^WV$bm+tC*mBh$J z$AA2}X=c~Y=%2Z|kgPAPo!Jzk=WmTLtt^|9CFRZpP`j0Mfg8mep@)DbfE3Eb7@q)o z6-G+IAP)@c_mqE8v$K`ZOfoqW4V4321DITcL3(97qzbZAG0*LX>owf#2$a@gbX9HU zlxTQPWqDo69R}HSgFU(VNSDTo$VhN86I(>9U(pMhk{)vy4nnd%AnO?>%u(2-I7*%=qE>XVk|lx83gq{x({E zqp{t7+n0l9IldO&|GNY;TO+7vFoy+F&N($b{t5jYj4e~sT*sLLG`~hk$F#QWJ?SI}Ba*6OG-p)#Dp?DoX@j5__p} z7q+NH#cc%^{#lSMgrO}k)ZdwVI4g(sW#4JQpatC*mALd^c#&O9kcFi5RHDCb;R0qOv|Z_-(v0j!Ns zs4o5GNcFx|q1h|&bmVY)qRF*SM$AO|7ns+=OZuvWDwWJ%DBT)V#<4P2jngKT5wk5w zQA4xHS605VtDP zx6WkPdmn9oh>*iwEzI>F!AsuGx+Jt39<%89&hA8P$9oBj^c0;x`EL92mi+DSQ4vsf zfbrW8(zG}*5?lJTqj`gFChi`87iUo6vH6r6)N&+`{Cws5U6)AT?i<=_%o<^eMQFiqYUi*Y`j( zThq)OdlQ@35g1#9JZ)dBbPZ6(_#o%D&n3QTbe#XReMtt?u_CB=L1_2-nr#3{pNGz` zCFgG<$zfI|Xh4Qcx^%$v*L{m<&?n=|8S}?}c!tA*_@e% zj-LBIYglDcmyJK8tf6b76WVJl&l61Ql`A0OV3ZQ*K&vygs(^MRvF=O#FI|(LuBIW8rGZYV2rc{z`3-?(4|Iz>TZWxl6X8cR7naz zN#Ow0eGv-B$lbR`Se>MhYVs@G@I|Z9YsMtOPGY!m`(-659HCuDdAU?(cKFntKZHpzPlNdwhSPuz#h{_-g(ilm=Sxd8^TStI_MshQs7atsdxw z|CoRO?a+z#xRX1QW|slm^M4$>&`q-bPT*f5L}D-3s$cHXyB2Y1<=?MprxY`Ll&r%( zm5Xah<0ifZ>h-&-C^(+x7KT5E9CFhb*FS$fe~jLQ;VZEbK2Nq4e!Nxrc-Jms{t?hL@M2N3%`10e|JNx;|Zt zN1vH4!AI^3Tt?9(n$$cUKP;tGh_)>Pm?2W5aO2s%C0}n%X7zT=Us#c{wwRdIurzKF zVYP01+U_#wRNU}Rir`&{rCZCy%C5G*)-Qav{t#-Y*RgMX_21RH8P1>Q>Gxn(A3`5L zz+@k24~ve^flqc{jQFZ!jQ{>t^IxD_eW#D5KVC-!8V7j3wy9;U2w%I;&?eiNwOn+k zl)cb?%L1Mj3gRpb&2=sxl#|&D;V%MVn{6;_p-*JDa8M^TBsq1rW+c;32eeDZ%pZcd z5{AD8Mh3M7kxnh>f?ZTvtROM^)VbA!O)FZWBUaqVBfOn+zU@j^+co)dvuZ=vMbTYjF1TT`jvrKJ>BfK2dtlc8kCy7KPc&}D+Hb8>H%T1(2rvJ^ zr#ngy+64p|nq7F&@y5=tsAKotg5v#l!q|V6QO#!X3~dRq)cmvKk_v|qg#t9%I8cF{ za2kWh+(`S)7UX_{wS^2T7k5eS(KeggB_9ktnI!tAfJ{^9OI*tu15TB-5X@`bV6p zFWIS*G`k_3 zDIM8x>d2S6O}?L)7KR;de-^@*m?GH>AtXPuZ?>gGfDtTffL=#AJl5lQNg?s4s47sI z!NMuDdX3B70e`Z0J@bu$Z9a-OVFEfn2=oJFTeKg=8=4w1LC7^kP+*PKw36W1GO&~# zYcw8cSX`EPcGhgDv6!2f(p+erh-MUX7v_Elq&y#22AIsQeTrY>TgC)$%xZ0k;$N8> z@3*tE1Y}~E{s`R13wH}NUf^@<*kZ=dhYZ28i~Oc26fAI?+x6?;r7p3bus>FB9y}fF z@}OzYv#K>;UIv)|R=A-iv6P^CRhNE3S8zD<2LK%jN7`m@5HsC8Sh?o!PHt+s!VOUQ zc5}Pag`L^Ow$d8OjFfRwysdlCd$&dvu^6$6@Jw z9L2VqDK!AR5$qhHQbKZE(~A}`sg(k0Th>2gfs}-rFo^58eaJt@PVIqzdbk=`LxKCT z$Vt009qUa~L&H~wTQu{ffft4m=A3Ez#@P!S(UM7o9PxxvOYgYRht{ddWs84)!nbv^ zPs)s)g)+N81n0{x(!8su*V8H^WvIsbVDz>leLHqMu}i>qnVBY*P}*t?%N_ww)_F9+ zwe`rS_`1Pk3kFZ}5YRLK4d-a=M4jk^a>Jh+?y7QuLVLX=-d>FmKT7tb1SEn)HL}YK zo+*ix2&rKIL-^5Pu9Rq>k`EkYcx^pQf~2FCVv4+krz7R{ALVq zF9xadG+S$ylvtmu_*H^JR;U#@pg6Ed5e%({gp*0kqu&(4i$tx$${+w|(XS6FA<*0w z>E3{E5e}jcNB*fC2T%MWb1x_Y1(uimUVZmZ>W&MXQ_r?NiPBBGfS$!IZp1x0EiIDG zBK9DAHpWwYcaGuK{u;j^kUFNO$=2T8g1Vbem977otUXeXcEp-9H#EJD+7#Wc7O_IT zItDdESCz#gXCcXAKzNdv0n*BpzNWD_UPvM-(gV-V0iMy{TqvGdfYOS#zl}Gz;co3_ z?k0g85RkznibjP*H-(gFEjel^ciNgf1ngR*_qyxz;pS>}uE%uMN@C$WitK62KnO+gV76nZ@tSOpNmoKg0iEjTDkntBL3DLe^{hMV}sPm!oZNAOk2T) zzg7C#0UE#VfmL{ni3`>frhLu*sU7OFv9({i@5-ur!OgxwXsJiruZQf7-%rNbOIli` zBdVf-Lc}tOwu~ufG)VS@qyu={Se$1yToTgBGrHptXf3XWT_lMl-;&atH!+w0see!W zQi|apaJ-WW$Cx>-8uAu_b3vpIZ~f$OiHEMra0a=gB%gh3mKZ#j^+c{p?pQ+iP+Vq7 z-;n1c8nc|%pn(1)?xW@+OOS}ews)Z932Jl-uPpnUje@Z)AtrK=dgjb{l3I7mHTOer z*p!NV@RvI2wH@)`;LC&YK1bJL=F#2C($sxzPwKLXPz+A>o4EPqK)sI~KOVV3+IM{C zb|wBv_&bUe3*gi!%ESWx0{d2=fS*APs^=SS5fnP|Gm2%E%Qi2Br8y$;<-3m*+dm9q zdjX}tl{HE`IE-f?(I6f{a4>m!eiD}SadO+O16CkG06+O)Tgh|2wDJb>^GR{T{+Ril zlp7vQoH<`+yfCB4GC8c3A(^QRBA?c-X#6|)*_Y(LpVPM&$FS&?CmN}7r$kZQRinSX zz|okO2v*!b6y#jt)ts@{5sU8-#W-R(#*$^5xG(m)0YvJ=KyJextb04C| z3V1{IU0rAHTE00gHyG9!sl4T8B?lpvt&m=TLKay}Y7{ex;tp#V*Tv_Ag9736Gl_;h zA!0SixM`TCPeObRCvs3O1e*nn4J+=f6&;-KKAz}T(zs6oqEgOh?+9`M}v@0R!tTX01 zt$b{uDBh}{Dh9H@0rugI%+=Pbnwi8&&Yqk!e&c1V?zb(t zufh*!vz?{v=54VCJpb7hZ1%`OK_7U}Km4JfhUZ1A6 zt7$qVV@ge3D5mVz;cTM7M1*=zK-|S!xl?7gl^9*P91__@?-geJbulk95^r{SvHXM_ zyO3~ke&$k+C(YJ#_T^nMcEoZ8!CsFY70|9u)2QAz@e_Y$w%vjQ|9u===OU-89#zK!=raoLj?`1g-YWDI zGH4}2TdyzXDe>!b$zMp;a;?*kYM+zaoX-5YU_!_@7S907ZUcUDeBm@XTp^MQB~&=TE|##12MFFw@-!>l=z}O+!CQ!s)*`q#01g;{@{;EZSn%O(v5lE{-PnRt z&3l;5Ic=tU3CYr_o^_hz(pA}!tH49N;S|a->Lq_Y%aL|PDJvP!Tf516WL_j2^eCYv zstLmylMT8h8TO<=X0@tmUsH#vr^4*hziSprb8$&S)TJtnb57X2;qi>c|u1-_znYE-ZmhP?K^Tdtk zi9VmYy>egfQoT%C_BUYiWPYKT96z17l^pah#B%t+vTXG+Z3S(WMM|4F)LwW(^&8{W z0s?RI2``*?D}E*+nm>4okR>7Q)R96%2|E?EM*>P?d!@fy!_rzoJTZRRtCOo#)Rii# zi8oIu ze=|ofYbKH@`Icg{lfie+NKr3^o==dyQ%!b*UXD;JS z_58x=qVt#V0Rr5xir}S&{3LiEJN!1u`FIH_<~Hx=2%aX?7ZnHzI?@@1>udq>o_L1l z4G2i6kpUCOrTJN3Joz=-SaY0e{j?3ql(wh&rcG}xQIJb8@_3Sdh8T1Na1(6=`(vbV zWmMkasRxq~`7m*rZXT)^nO{`~blM|W^>459-WKjJ?nyZCy~W~<%Bl}9o^v|&s%j~o za3v2HBB3_xDD@J;dIhBxB{m?mV+tq~AP_G>??asmw0M?d(1gY*0PrWWDudMozbri( z48NO6-t>g9TSwX{I459%+spC(V$x=R(gGzI!MwZ;Bh3vn0g%D;bzJYN2r|udExQ)OtEsA`c%e=Q`=%pS;6ib zWqE%QW*g7m|Dx9e(;~dDHB&XJsJE(baaPL@;(c{i)^%yil%!%MYaK?d$J8|glyeA8 zPu9~Dbj1QPv5MrkW9i3_$HtoGXZWAr!Qo$+04`tfCrI#8wZ2`5+s3>$ABOy4JVAli z6R_qXP*2q+hVrlxX2NK_!ertD#MhVrC=%ddHT7hMhl;d zZB+FIr^c?6K|X52jU=2>&+;9h(59$I5ZVod7K1^kLYR0F8j#`xS_ywH-}quI_5t^Z z632(N{!|@6MI+o1@;w$nMCLAd0envZ3>DsPt@`7KKS5E<)c>|K-JN`}ldbhL0)~Ih zAFeMRslV$Q37q)S-I*j_X$k%>5_cJ;oqcev&AVZIl}$?7)@vnO!KH*hcH;ZvY@U>o zlQGs>@eKKrglw*7Ef#)8@7caJE=J=k$X_{PV<%5g4ghjjv`CjXib^JcaNCu(ZDi&i0)< z)|8dUc@#(GyTMQ(dhdxJPAa)i6qmnx0T_ybyH(&E!M+6o@V9^G(AC^6sY*O|5(oyKw5(lm@1txoxs zA80M{b&Mp>mXPmZG&u@ZtX>kPKB^F#`~HNE{DAzzl#AcCCnZ6YJzcp$a*r8(ed`N; z#VytHZypVnr=xy$cqx9j?)#me_wa!fAHgIa7lfk%@=;*uF#OCb2RB;{?HHh+lu(

s)K?*QWOzzh~2?l@aIn{;&(t&A6$fO*aCX0Z( zTdf|`VO_?NVnRbPTYsN-&tafq*#JEN?sk#QV#D1-skBZdL2c7L59jx*{$yx|;Nr!Q zf;z!DCHoyKmnNJM^3dY+y`xZu`Shp zJ^z6xR_!@}Ysr#J9#sGLbLa1*(X5NAF^%en#l`8*cV>K0e4CHeL(qDum{fEO8r)86 zSCI9T=~M~M!OcR9P=_Sw8s_sKv-z)X@JOloV@Ru zOaPtGkrqq??>6g*yUW=MAULo5@V(2_d;V;tuF+b%dj$}$re`VdGN*yY2#|@2?ZmW8 z2-Hfj`BA?G-Yu|U98FVGuL}mwh~q2IzO~bV^VQUg1K>E3d~sm)!&Z`8h}dlaQb=er z$;ht3Q$f#9Eq*#L9f8uBPlXC%G4H`*th(hB{ofQY)D|yYgrBEQ z-|UYQO-Fc(A>K$jb2rIQ;eY!1b3ff*J}9*xrYvjyI1&%^FzT1zUkd#wkYjh>lmc0%%%G6J#y0X0T?A9UWcEoHawavoJw-W@t9v$@Nvmv zga3IE6T}9^t!1tV&L1N`PJqS_kBr_s1zmmjFbx`aeXraIA#jTjlfQdkeeU+1HgoIM zTmHFN_s;{(uH*&G_mA! z*EYcljJ5(LI4q)>Z6oRz<5?n%JgDj>#oY7zKaS2kAc_3{|1;bm0wOBjfT(C5c;?-J zM`&nxWL8>uq*hjD*4nuP;#FFBWLj2uR904Wt{qzgYMEK9mStsyW@Y8JR@T;T>*qJ$ z-@oD?zCmW@{d_-Pj|X30;#kRayU)3z`7Y{WB;3OVj_G@%P5k4;dsugFwN=;VRkkV4 z50&5b6+Og}G3ca&J+yOTwA!JJaU^WZ_fbV_uFNJdendckqt+FmWBG#1s?O)|1#6568Ew)P9+k#!(x)`S|t_TtYc8wWS#PbhQlJA^K5 zX#9L3V*ZKjox%W3KY+%UR;0$KS{vvfTeTVtjT9 zXR2NZDpk)pF?BV!UIQc&r>=N%pjl3u#2pH!e_HvN?b~@@`?O?@#T+^pw^NWVRPl2v zn$C@TS1fYLSuXhgT{d=$De2^x{RkMD_+Pa+CYh%hX3I7Tie zy%)FCB=k8OH(!l$Io^JiLbMfEx)M$6wZ$xP{Xkb^XiI>q&X}ixlENVM4v2P}vmC8pMp~&na*jw@C7)K5K>}QfE^J4Iq$&{iq}&sbV!3 zb0Vu0Bi#sr#H9;PUwmxSn;6%ZwFbfv;4Uh2-sX)4kjxu#SCWLB3G-Jq%g$im)jPB+ z6m_TenE9rhAnbO%B`?|UrTN!@eOv#m*fN7m3MvBn#44BnwoC8W3V$#M*mzcKmvAbN3=8@uE*Po~XiI#|aJkhdfptVj? zSEM|U*iCxPFr{$hb8P^VHnQLVcVbWYw-IHz2`%5S+yN9o*q}G+G0@n91Ja{sgg~*8 zM{lcp;_Ttx)<#LCG*;0cwzHp=I0JL;KjcHPe*}HFF1>$u^^GY%(sJxaMR>)CPVsW< zGxtC&718ZBfxaU{ZyGW^RD=Y_a*k_?sLU++u<52$>p7+SY%jpA{Sx z{}y2*Vf+?hnI+R3C)^r`P4d>E08|rnCRAw>w3yeE5Z8#7P(My8aE=b03X)AsXG4XD z1VNj!LKHY8Di}~i*cSFWG=@y&-eMU~ovu|7ISi;2qM}7d4_^i%>r35y<^C*Y3U}aq z`=QwXx63?TVaEH(6I+ zJVt#?xQ*{21hQL=n&E#F)`}+@LX+nrvj^JiD`eH55qhsVv5bnf+1IG@8yS$dk zISZ^M$J~xKQ9WY!AJ@zA8!jo?YAre|qsweNSLX3>t&pd9^kUCfX^!0*R8tY|_pgCC zPar{5~4 z7@#SCjFK(rLjMMpsaj4eWQ%!mCX{$;3pJ}8V}2vQ%%&2eUvwyD6RM3?CAC`3$Zj|A z6z8Eygmn*SBp=Q0wpdGHC=Xu_)T+4!gK8-EP{L#^cra+mFZ8?n(62l^(TigSlGoxk^d zomZd^x0<8`gd~|BRLXD;Ki@B2OW1sH7kaqkf!k;GP)_g{RR4Cr#ADlwFNfu_2uRtp#m+BFZt*qr^U9!orr70~&x3xCy0$NC#V>xV+FvFifO&w( zyca4>@HaEg4mfIdUp`=JBVToo3mna=aAEWgB^+n|;jxd9oEpzZJUFL_Pt37)ZGQ9x z`C{vl)iv1%BF=0xnr-s?Z+bB`QE5`A>2~m?rbICwIZyd!1i9y6d(PAzj&16o4md8^ z`o@%)=7ML&z&P(pbWR`YaSGFsW3zbVcy`1T&uZC|W9xR}IZAtgOYl^u(;-)tds8*m znL37X1ByAgbmHZvah6+;sOfW3BH=pK@07?G%YT z3iFfr#rJLmjhkG!ta5RSugFN@Sw_$Or^BK8K5Ksw;X}lyBxUH7nf0;{)@qPkuO%iz z;O+MBJwcDg=$q9FYy!~Z+|Mb6NJ`fKmDSNkksX!c8Ep;YX>v^HGy;GKWHMWGoEU8| zR6VeUX`>9vYTr!KhSm3e%&?vZtXRJA-K$zma;EtO&UU}nxhuMywJ=#~04)%O%UKxJ z|8XfNY*seY|J%BHzafAF&lJQB18M#U{>Ho?CnU3rEIOJHRy61}rxRm70Wg{A7)8@N zX;Na*Tlm^Bo{f&Q^d+TREIfwsr-W9`qcO_!m)(`LF+%7*V3w?#Q<`_=G zF1Oqb-!{w2>;z|}0N#gZ9nad?XK$h^x?gl0}yhDr&H?M$-;1yL5aj`ftAWE?U}i67 z^#zQ=yZDI@Xo4c$Py{TbtlIYb95&ujpdi6V@w~v)T^Elw0~Q*f=8OJk&_2ra(fY(N zmK=PGb!xnZmlR&`DH%nO+p?%UB*(D4h! z!s5bJvt_)@HMW-J;AwPL5L=wUz|Dr^;~+c=EcHT^e64b!req;om9549XIdPZ+aqQx z`sH;-2te(1MmQ*t7YvpYEP-ZY18fZ7etiN2OG-_%Fo#`@gTdd4!0&-$mSOQekr_Ur zV1V62T?@tv3n?Rn#m~Y^;teo!b_Cx3-iFCgc$Mqt2#UQK;Oi6(-s(mNCj7-+&#nDu z$68@+ABWYhsdo@sahw#s-002{i5x@TsLb7Z>Tu$v;0~wx8eBFBb&C^G@AVc9u_(W6 zV*s&`U6xOZ7H==jDk2<#DYt^6&6c@2Z=HAQ!n}=55t-MzC8#ltjg^hy;tZmk&V|C)*R4Lg4rZhxoP3*e}S5f;FwR?4k_q4Bi@;M#XI%J<-mKfbDA@@ zs>?ufOeD0a1IV-NW@R#;t<@IQQXo=+`_E+;S&D@wkleU z`%(8tObRtoTk>y&WwxX=yS#K&IiX5kTH8YCU1MU(j}ZwQ0^)(7_=X(|+748=^#LrT z>v9wf3SlyICc9$o8rxGMMJA%vRL)5B0O;PN>Gwt$OLwt zR+XP$wna}xWC(%$jfj>0|Wi}xD6Zb%%5tqRx3iPq}#{i zoJlwdpm7^f#R-x=7nZE}j$ax8K>;d;YBx$T3z;qs)-dZnVS{@ghISUCO|P7CRj&_z zE=M&Rp=0KinzC?_OBVdx0yARy9~1-48+@i$4~LIgCjWk9S!mr4-4RkJ&~+XDbQOir zU7OR3Nu4IH)hgKK3KIcnUea$d*}-Sy4|l5~v@or=@uyfJoMk1v!YVv%Zdh;G@I=hSWuQJ)&k9ksT*t3&P?;b=)RIuf5Ny77kj(*eE-C2pN9+*= z%2MPEfHw`GpjYvF{3(a6bOx3=r6`>>2Lla#(ki>)lv4u8)1_BgI7y7wDy-QGYk`8% zsi1d)76D32_u?g;N+}z^X#Dqy5Am}*RKPu4*9N?T#eVELb9)zb??EBxm-Lu<9&}kt z5ae@{inUKWuz{W2`MvkH^b%1|ikJI?VcQhN^D^52%$gsH5;G-lYa%Zr+H9Ni{0^Jo z-&35>3><-((--W+@e_VM@g%%gF#br%iNuCJk9?}vG&@>Q0yPu|12|{^D{Q!t#lf8q z@YvTvI2EEZ%_|KZz|I>eKKT$EI)e3dz)6Z!|5V|AuC$2G-@VTp|MkO)S*mkT1??nN^r+LCspi7UPN_2KmUf@Ey+=$3SHd@FS0H$GOQV+!0U}s#INi zZYwy>b*(l@wEKH^iet^LZx0j(GhLBsNn$``5~Cy@Ku;G+mZ82OXt>1TEt4tC!_)@iQGlh3mbRB)K1vh@P|xNdajMID!(*aPRN#IErU;Bqh8H~%7Lou z+22=uO`P}E&ml3u1v-yI`q9&{jQEwBuCH`ztp$6eCX-&rc~W zn*ILx;o9eq95Z~(l=h4qE<=f;Ksf5OCzrryJ!;w~bBCa6*Uc?9`Cd+1>@qgyb}&AT zgXL>lyl&tdKCdv9W{S&kYdZ(#ph&=Z^!=+vJVrpJz{|*}WCR%cek$bNND9mR-zv5dz_qnuw0l1>i_x$RE zWHsjD-pTN#NCsohy9J^gVQX09zk~6Ui__%?iL>1HMmox9XvvjrM&-0GP4>grgSQR)t@woY+%KVITPn z9%ykrNy%OG(*<0;?f2MToR5LW%pYWTf>vrp&U0A6z+2x#eYZ-|cVM7j-!t65Gix;6 z{N8Q9H&|0GoR#ofHfUX>T9zMsbx&-MuIuI!RiwORBlLXz$2Ix0Yu5ZAm!yq5&Q@#) z;u&3GnAW~!Y&mYD@e1w*-u^JDPsZ>$=S7TSW6~^UZn-LpgZHdudK#1-+F~alDMSDs z`J@yNV7+=H9A%ljBB1x%K4Js-udVJiO?-gt`n}pG>>YpTl}J+!P;vnlscsljkSC3Y zNv>2jc6~^(bVTLD#`76R0>^rQ5s#TLN5*Vp!0 zNV-@Wg~iH*h+jY_$JLK2RabT@r@9`mdGR<>R$4T&hV<7Ou0atuZ`^jeF#hbc0}ZqB z@>6}>ChL*uqHl~c<4?f>B_WfQw=-4Fxkf(Br`e>E5IOEh?EfA2+OQQw1hy5-6*s6L z4(Mz$Ti_cC($8#Ljhi20VEN_@}~hO%{4Nf`>>ao6q^Y$PXy{*SI7c zB)7w64FJCU?_Y>}$mPJ7@yba?T?KFil+R5@lD3tRLZgMiXtaWa7ylQL2;rSciU)Xe3+`%Ta@WrlF0b> zs}*Q7iLnD7uC&*f44{lEOf*LY2y60gf+DR-ei9AoLdoqXtRt!@ezlWVMVoVuxZb{S z=+17`RXJ-j09zWA!E9w&^AXTn@nvhMG#|g1^u$MkQ3WaZkkX5dU!fg5CdRau00T3F zye!D) zi`j!Ww=8`5Ew(0%!*sFGvtJ{8l18HtW!gO7D=`=%`a!%b{W%d`Fmvw&Jrq=y0 z()q4}J+_DE{iWr0Z!`&3$GIESx>!3$o0@!sCS2wcrDPv!$?jvFa9ps}kW!En&5@K9 z3EmB6?zP~l`6hz~gR+#VbanTV@yd*i*|r?Hd_V+NSs6tokxW{;t1R4?wtYuLK4J?e zk$y0oeo;*4CCMNhv4L~TQnY03j@!lW-qr3*e&F|Icwb&U6gHJXZ8NH?K91fy(b@jS)4wf7mYL7 z8+|K0Yj5BkHeDofieMxA}1`aoh&mRn@$&yT>*p?C1?$vTm1>ak)zU>>KwgB;S(uMC9h|+Xq=k}kUjodF5 zjz`NpSH>&P@)D+X@hnyO+9b~MMH$InDLu2}6puX%%qzXmL%r)3UcZ!Sy~$8yq5jqF ze1GuYoW}L(k#5o5wAr}&l}Ic8XCsnYTB@xVPZ(6TC8J7^;KYe1I3Kh*744M3F+t-F zNF3|aCh>0PC~yU&zBa~Z9AC{U=(P($TlPMk>;egIWo69*_bW+V_Z{q6@_76q+BAZ7 z2$2nJe|h@vfvwCFZ%;;ucfADP~l%pqp$St>9&?_W5 z6bM3J4q7$=1;-aI@(bEh2@kMrNXByXZ zCF3w@pp<3e2e~}ur5e7Ko;KNZH)8j)_}xnea8h^bjrBrjm0l@l6^K_a6xUx|f92;Q|# zdn^&1w}03f=`#D#a7&09+N%XUE|Ti95GsbT6^L-TdJO05l)*S4DE25&>|UCMWk$>a zaGPZn8=zK8DPrn7qgKSd)-b&~CE+MDxKfBpTht`{=Q3qkh@3RQ!1>s1023>_2qx^k z;aW!pDb=ufNfG&@Q%b2fTB3~%1SqaDuuX=9FM4N9YWL8-rY?%%gUT(FWVidEy!Rvx@E^~EircYk{;Iheg=9dGO`v_O>%5E4pKQ61~h-baZ zo7G&Ca!6(2aSj`Gxevd5#FWkyRAlu+02(Ivj&RiEBttDz17n$Te`1Q`uYYZTy=-Nq zTp^YixAGPVr<9VVB^U=Z35u)U?Je}}(6Nr;(Oo)Sa1_&%x9hSxL(lxUcD>V3-y%tO zi$wU#TtsRCnB_T&~(CHu0S==jmKeIgVVSU8w#f z=HxfyOw}TbtCGo(LX#4H1IW_!>Ny9zs6#^Qvt_CR|dNM%bHuJMWsetbMd z0RS1uMN*Q|j8u+9JUpo0M@4W)JMn%r6TCN-$7(LeY>I)vs2iNdThcc~E4BLx$rk~y zW==(iUTNNpF0NBMp37;yHHZ+s4vhLv4TKkd6VSq@uo?S4#W}cN78Yu-H`{vuu>K3W z&^p+WQ{?vK;U+Hq@&JZO@Y)sMk!exE>GC+MdFnR7Wp+D6nic}332HI@7gCoUI`l78 zajD9YF54LnIgu&?%!Fv>tHp^Y$1nsLkmxLbh!e;$+q6AQJ1rQ@cVJj{BG@cL<-DTB zAU$_jcJCF;i*O(?1J{S|$-X>$l#I}OWEZf9nK2Xyz@QPT@lOp=BEd3HtJgPxduX-G z>!k}#(23{)DJk6Xj6=<_a*uxT+VkZHy;l#8->5Jb@mnqPddFyQhq~-P>*!Va=Q1)= z*&zcB6=(is?k~EY`Ns&bIfG)nx5|JI9UW)#gk1t}U4^D8oz;YNpFhtaB(T7EA?14t_O2>>7#zQ+WWtL6~V;Gk8GVH|;t?cyt#sR%j8PLHPu z0b^k+%2~Su5n6l!s?O3rA8`v`%OE5{4ybg=P6~dC`_&oayBBT{8gMxi&19!su@7#8 zf->8|A>595mgp^AvsP5JerxeD->+9&sPy|??)vOkSreV+-(mbtIl=eE=c%im=_ViY zHiXQ8nQM&R(7(9p>v_9{BkA9s{r9sncMr#jXJ}>&j}+t2Pm_Pi9H%r);@oR4Ern#6 zRu3h&@}jeM1y7{Y)8rWXWk#vEY23Vx5zcW{l$k_uRN0Uc>N|cDRd^yJ5;Vdl)e;FaXw* z>7mLKmr(YST>Mdqq`|ThNCE6Jf zJC?pcDy9!8+_!_~QoThXI7h-ogI;rn*u4%yp^GSk%BM(Y2X*;Z0)#At*(tJjfJ{NdH!9z_Z7B{D7j2TW&?}Kn5aWa_6X?mUB?t?JUDvG?t|ZfSbqZoGN8fAK3qfu zeN@Wk=;x;D`R(EEsmdTNhTq=hw_UV)xqdGEX0Eo2jf>@KF~OK~AIhpmN45HM3t!V`Q=%JY z*u;$bC3~#xAWWdvk;B;Q(ImDEdEu-U-EI=wm5aP{v3}Q&x=bdU!XS54L2j!c3qTH4 z{B;sUDY!;9=tfU(iF8}CnR7al$TrAVhW3tU5-j6mdh+xg^jmRc}`ZynF&vFCsV; za7I${Asr?cQ97pl5(7|vx{Q6gOj5_&#GdkK>&xK19^n1>Zn(z=q*YiaN`>Erh&qq2<4%e68>0I?_ zC}^$FRWQCJiQZAgR?UiisA9fQFE<7^bm}+;vL6HUIP2l3N%Q(@vCBjK+CQWCYS=hW z$DF)vPD&(KLZpYS(8U;BE)??uG5rRUrVwgKI!y(Uf4YzFL1?20u^*w0yduHK%bftJ z626Em$8|&G?_;D~7#?{=jDRn8;1<}x=r6RJi0H^+LLPL^e4#g6hl3ByauX@I)un5D z=l0R#J z!Cd%j8eq!8kf#I=egIIpxO?9ja1_9`oxz<|wJ*wlr=-ZheO{Ke4+C@XzHbk&MsOX- z;M9o`fidYq#G7&BsdNNcj-gR?Hon-y-eR8_5!s>VlAr{;a?zcP-NTly&O66T-0zR$ zmQl8wf-QJEZ4Q6Bd>P`b4)C-WU>sG&!TsT3NsjVK+nZE7VnK9{uY2bcWT<}SlBe)~ zq3+-Jx-)N*(Kf!jjgs3b7%_>r~ z_E=7YOCUrQN@L`(y=yGhFjUyly-m^gTO1;o2i>Zg7ObD171&p0ubUpMo1SwPc!?<| zd{4)*5z@pOF6SL%@{@xU_;W__aM4}bzpHMoELqCk;}n-@GRG&L@XTmx*r>l_K~y+D zWp&V3b^50~M<_g`KYV~0HP7vlZLcVq*WX!UO>FTZhq+rhxC=3@#zLJzm zPnI6;kMo{TSOa2jX;l5G!I8zd?J)}$Kns4^<<)$iwD&Xll=l+|AX=Cb4EJKDkjC?8 z%x4s~d62zO`D1Y1GP4KRl{VgG9ZUDQX=-LkEy)l3-S5B9L>lGQmfs;Qm0j@rn=yJK zF%VCF%L)%@z=n>7&m*m^928~l)3EoSOFr<)(z$ZUP691J`sk_X(`2-rt)2sc%ubMY zbIB(ai_O01flh<4VAK|f^5|MBj0xK* zl(PtywSw^5%&*im5Z60kX}WeH+h7$(-E39= zvFgIiuZHh20r2FF=Ce)HiP+sEqX;*zi_b8|xh*}O`v&{E$%vikNYw##V!K|LPf}7R zMc#G!Pt?2Cd~5x9(#y=5*Oq4bh~!u?Eh{rScte;Zo@x(75z4L8U@x7 zzqpfEcueT)B4PXZm7~}!m?r-hFT);I)=>>)Bl%lOh5kg}*9e_shca!y$ql~W%5>oX`PUty2bH(Ngx zOvO1DD#NRktP~hO4M#MJO~2oA5N`fIEr3wRM7z(V`D{Q5iT+{QVEncZTcU8~QKdi* znwKNM_nn7VrF!+G-w%H4=5?g~`PM(mo?@@he17XM3my2f+ncx;KHmy$)Gz;J9}L%3 zQCVVloEQ&5Vm2;H{(E*dV1#=*M}#5v{zdqX62=hR)H_@oBAB)Ixq4wovTr|gw6_XS z9rE9z8S&cZmSF9_mhD~mWx)7I`L10RKJ(Ao-Pr!~+oqZCCRx3<{&|(7=U$aY*EmL> z%zE?{F zX~RJ0r_9?-S82O!O!S)KmDN#h()DHe&3%i%4{OquRSPatcH{$~64!VBQ!*?Z1C6qk z9u2&U+yrmaEw|qFYBTf{*4_Qb^ABSpcbUy86lTmms%*TFJ<#rV@R8deI*Dw=P5NNE zxx1oXf2DOUeZ{<;HsO!x(dXy)S$nNKGxNvIwmL}*`Ka~h@H>2uNeRw~->2!>zv<_y zxApv%UYwbaG{3d(qjXDOT_TU0HAiIU;#d=mFj{vzcfuK^RArI4Qc%YTuXL)DDa+j7 z6lIV=LV%kR0Zco3yHJ|8*C@Zd>Kw62g}Q3omU&+weH)+hwaf@GE+)yYox&JP<}|BJ zvjqT#h^hTEnj}yoV85Mwf)#qeB&#?qE?rl(O4-1{lN*9(2Htt2GSk&phx#I-TN<+& ziBik6*Zqg`Uxk$O$BTKpUtDmu|1moiJltA6^8q|ooyQm${$JLAo@JJK4cwy)MiZCF zn-uVO@qGt-_HX`q&dG$|CJeW5X=}fDE#ZL4Uu4_D5ji1PGT5au^6xY#Ft}|%SxBLp zyUP*Sf{{>nl}GHcF-P2eHkeo&pX;gfJ%2~Ap>=Ug7j?J%juEdDMU7^)%W;;z>~vMx z-H3E0m6tCqX5WyPs7*%dj4{{wxv+9X(5?Zv{8dg&TaR7`I;z5XUG`41AYJ*IR2BF3 zT~>Y{&D3A0z}sdYj$$O*q$#g|HXmZORJfAgwrr6+FAq!laKTLX-}Rk-kL-W$_Df!} zwnuamv*FKuw9sydXF40_{Rz*Cz2C_*T9|uBRUC4lxztJ=x9W{~;$alD=~$@6GJ`9# zGYB^+0*#XUk34SY(R|ve{v?WSWGU2ZV zYqDF4lvFRkB%FBkP{x&2{zGc}-(y9mdu@Jsf6h7V$N5M9IPlL6@4Q-SS~GQ+N!od@5fhQZmW{ zR9s9vDQ7md%ZMR>K^}sROf~tt!*9(ek1W%rmGg`_pL=o&B|S&epdBoUrs>wEm|7y`y2w z$qG?HTdmK8MniC8bh{I(_Y_WvZSTv({EK6()hNqRCJBGQK7S%MwvG1|z8&J-V)8$b z!6(`n9_JMxOCqP?(@k<*PQ{#MG&N}xBf`{!gOAO}G$3s~o;$~kT=F@T;?5rTjAOWY zBt31PHqw1v`tD_h7R!!&La^)LeoB^{q!iLvpBWY0Uf5*Q0P=l?L$ZsQ)c~xbdmpHo zJueSs<1Xu{`<(L6x~;q=&Gn7#Dzxxos^imXa}rl_Gz$6@MBf$wSn%U3PDUt=XmW^R zg%hb3(`j^bx7Z7h8H z(|4E3oJveZB`B#V_R_|M~@~G>%7!f5-tCW&dc5ZT|TQ0`* zP=2KI!}q)6voaVctI3bO%3U(4AgTj;_!33AOg(O1+tBUrBmyuqE3t9C3d+~~U4f|Z zNXphT3bke-28gg!+>cDU^pr5C{1=brWt&@_^RB3vUEt8Lpo?`3+rQH#$EpY+X5|xe z0y1$7wgTJF^6;BvzpQ6wmH3O zwOUN9)|7H;Hbghy_e_j@&1ygP=;7Z8@#KFax3|9ue)(hC;pAuCFmA1W#WJ4u=EV-z zU;kZG`=9yuQ~w*j!}%85f)S!#8Lt5g6S@1ZbtE~ho-$}OedF@yG1%)iIY-;s|Xh(l-ui( z1+uXH)I1K>JoVTSGZNM0qOce%jZ2-KZgRL|z&U^Pivq|p1p`36YQ$!KFahVT<-DNT7R6p>|#^uT3@egEW|3-H4;$GW_X?DHbnQHZ*@z#uD z;nkP_z1W2;bJxbkJ$;$5*FbP2>8No*7;9lVT^Z0l(0`zQVPyvE2%1&Lr5lA#j)m9s zifHw6rQ?8r`rbN%`&r|_GjNEEE`>eg9MQ1>Ab;>dtvn2Zje$emrtM<5y<$H|uaNIw z@_CiV#;bQnoqS5U-q#u{LF0*!FTNi${FZ}Ye0nyw1x_n1!Upz}*l_Gp!bC3x-{`aK z4SlaxM5R@}uZ_D-@ScT0i@Xgq>nw=yI!TY0Xs~vjV8AQC65p6+tiRjV+O;sI^|7gO zVR?-myRmS4%rJdNYEYO}^US~}lj3;mk(xpaSj-6c@Bn0ziEf#5*PwM(_~yCU4ciww{bKS<@?j(Kf2SMoYER%IwIx0=G62KU z2zOQAYm!0P0fBG^ldZSUfLB}BU47`U5gF$RmAnC+`Jr@$?X3q(ZPk(2Ra&8o5+THj zMYU^Xv)KPeWW-&32b)*;b#^wT>{yz9}@`&P} zoDAUBKgVAp5!1!)d_}Rd;7?XuI-|&)rT3uMj4=+&^N$^!W7kX{y-%Cb(Es7kr#18` zg!YeyHf?CcjG4DJn-jPu5h9B`kfBA4@-;+CI*tlq2|c4)TovYhI?w5dE9ALu8hmt5 zTBMbFXm(2sFD8`uEcy14SRW_EgJ;0z8?Mk`0EhL@*t<>Q1X_LUxBKYhYdDpM8O{bA zbK%MY&3nEapVYpxvKYS9^D{Ks8$og2AE1L2G1oF;qLG?DBda9EAheTYh8|#O^&4z{PEr7AN;B_|Ji+- z`)<8s)fb=S&scduU^Gn9!;Ay+S+ZNYm2}LUm(J+>us*rkBu-$ty~g&T$aaKld!yMr zPD3mZ+1}7qch}k+2C}6v_RRxa3x`sO+)7$N+RDM;IOQzpa!0S#(fAiTa{?FUSg*?Q z+&qmLFWA~&MZDK$_28aYdmAQ5j`AysH#+*Q96%g|rLi?_J zs|~~gxp$6+=%%o3l4F3^tbm+9Yq*xo8mlND^x==^v#M6UjtjkJyD=S3XaAa4>kxdN z@nJ0V#R8M6j{%7ZvvywyxyXVVpZ~%*fz>`v`!@!1v(1f~$#$IUd1H(gO;~udbF5;j zkVB8tu}*s1s%CkD0+4kJp+u#dXM<)1B36gaI_LrOC&Eg9!{DW-Wt{?=U`q-}q@d(K zL&dnU^_=y@BZHpZ@k#cc<_?4-vMxC_^SL?8htujWpyPNNVvxbFKhMhpLBIKh^$sAP z1Iqtcyb+rnQm*-``bnLMO-72_E1`VAk;7O-}NQ+s%QZT82I z@6d^9S>3qKVSTNC&-$2o1$IKEU0i|G(Tn-}#L--M?%RdZc@8VMPK5dgc*%R_zC6;5 zhS9)@us|?x64L`Y*ZpCf5~B0RSk!dJX5B)f87rHcDN>lrM45hElY(@kj}UW~KW4&i zc3m0Ej7g{YLO{O3kqePhFso7hQe0i$PKO1{k zXkyZoZuEX4dIpab!Yn7QlM%aPtdy;TOp}V$$}y6PG>TQsRH=|FRqK`&m%SpbyS!*74u|wfwg_$5&q+|Mzvl zXO|;3y9!pKl;CtDJ_lRsL~eRbY2%PS-mD4xSvQ}BO8_ql7Q-?CR0I>%fpgPHlIUKU>gBini92DX(*^v=N~eAX z-WxF%^RK;mIKTMf^_{-8l>lW#P3!tfYmBLz8CZYhq5m9q+G(e`bd$hs8<}H&Esw8a z?)~DJDfhWwmtH5uR%X)?Il4BZGq9;Y*Ob>!twAtu=;Y`dw_eCX#x?2FP<_)G-dvOW zK=~gqO1Gw>5KI&ZOX3nyy_;J8%MpkJ8))<|0%gQd^I*)@Hm{pOM4#YA`arp*xVeV?V(Ii4nMfQw(5t@(%DRAC0 zh$v|xDB-+c06JTN_Xva?M1qpR`L*2O#pjHda_XAlL=9vty%O?|Jo@is`pX3lpLFye z&h;);^fiquA{&_J3KzUxxZ#uM3hrVlH;Uu9e|H!n%|b{Y(G(UDhozhQgOAs(W%U7M z4^5bZD5BfIx^HmRo%z_oEWayf6)LC$N+PM5aTqGafsA^D##hjgmDs~_T1w&3(YC`w ze}W9BJB-phV!x!wSDyL^somZ!wtsndoO-^lc#ow4OECbOku|!+|ysEjcMYPrk&|m9$iTbS;O(*4d882LKbprFJV4UwckmAcy1&5X{q%+H+i6vIb< z4zZI{(2`56b-OdWN*?7P9rAWRy8k)JDJQ)3$oa}W@wb{K1|yH=859m01(Ru9i>LZC zn?zO=5nTz#O-V3$u*u1(8qX(3cWDVWTfA4Dx7-PAEcnN?pT`OtWA&Zt6g3(zLuQXV z4z=&%T7Dkb_k+k-P;&KOuKk&1lPVe`6ToK}R-|$ED-xMyiTqcaPKdC-U;9^unud}Q z!URPB43JyoO^7aPExmoQs51zx>KT2hF2+IVOG*EKs}!hPY|&@D%OH0Yxa zcIU~?vV<#l9IiZ<8@G5Go2Q?+KgOHXwQr+6aJzT)nGNIgwaL!MKHjspu6+o(zNMD` z$1@*XLBq?=YkmM8cs~3FD1BpXw(IEsk()c9oA)%P^(}6`OQ=zzGa$&g+DIMW`bX{< zRSM51fs8s1i_S%dBFnCd-Q14O871wY7sm`#S?#FbnzN((x5>hmPmhddd@ujK$KzGj z#rYVXf#5M_MFTMfkGsyptf3r6H^PY1S$r{|Bp*CmZnv1z_lGQfHB87=<~ds8ljU16 z0rdA@%)=CJ1Q80jvs5C_uYFyX{*fz=R#siT_cc9id)!N(*n5j1>0;pewS%)r@cu20 zjt|yL7kg(W*Q8fk5?vN!3p7SWVsZD$jkW6?zgU-h_7sB|mG=sstiY>a{$zD8V4cc!}( zcCf_F6Z6J~JVWV1TSgz!DAK*&gbZy%W?$1aI3Ykh-$KT)DM1fbZd?%UHbXQzu)E=x z-J}DPzYR)%Zh5YqNUzzL!Oz@t^D!tf*rZ`WSA;w%r=@0NpWdh0A!Rrb(@752hx@~R zIn%x9&5o0D@tvuvhhlE(ot2;Ix|f|cJoJ2*clpy3iFay5tE#gd>h`P-JGr6Qtuf%2 z={HX^8AFoXqyR zq$e6%NmL5liQ8nKR_{&k>sFbbUU4JWaZIghZWa}bK}Bp ze^zYLt0kJtgcoWYtSJ;s=%(L`Z!u5DQlzwg79;zEM zx3M)mN*xxJoRw>OCzh8wy+(AWcB<|~<=Ur(xF4Tdts{HNH_~Fh^cT~^JRJoOq(*%z z(|H~FMJL?x<{%Yyd>9~;+(lj+2?t& zp}Lz8BxF%6vsB)_X^?s2ZFPl4e3u-Wmrvxzx+#ij3k1#K_SF+Q@!rjoa+u&xG9cJY zF=J`2CS5bD!ymRZzy}Epn~!B{n!hbQG^bLl&+Ci@6q{H{+GTi_T1K3+Ed$L4} z5WP}1Zan@#?xi0zMfrWUJQEeYOxvZJwUc+SNU+&3EMAyCE4kFQu@zj#S+WIb=oyPH zwX~2j=QHXxUA?ZYn#NL^wbZ!It(fRkqz#4!~i(qqFamICW3SAc)!SbcWg|Q&wwUpPq3Tkb8`iEF6Xa~s-DS*s>d6oNE=)cdaG33E`K*5(I9#tJbN6Z04a++WR8efc4}c$-z|j=i6N~vaEqa8?5Sf z+HADxUe1Rf-*7#QvCMsTB9lB7sV54j)i9f=mCdYsjSr!~W=FM1VP!Jq1YL!X;#a2n zu~Fud60(mL=6CWbRcRPDkQH#@X`^*bXAwRCxEy+c6)I*LUUG1i6)!eOm3KuxT`Xp| z+a?$6LiHOPzcc8+oZX*1i;Qz`YB0;1m2=FJaOEVl&)6XHcNF*IxH@GB7c%#V7dQyA z%9g(PWZ4*0$Y})pW9t<($dbDHqo7Dp-9XeB3mFveqM0gz`65cVgHBtxbszM}o_f0&K=_Poxc5 zvpXxmUg*U5Cu{l^^Zj1pM&7W}M?L75tAhg7T_kT>5;LD!FGn z8~Kj8I8xTw_~H9FtT{S&PYU9EERUs(JZ9`Yyk(ZVjlp_U)O3Zo-jM2al`{5irIUMv z%)KSN!rq~}*t&C=Hfa3ajViw#`{FfiJM;JCD2p<&kfq4AxRv!(;}+z!$w%i#CWnK~yE|{cR2?y!z^|_zQ<$IDp)Jp=3qQKeZwYD^ zEL`a{s>$AW@8~9X7o&nXp+RxDU5Hn^oKCjTi-2-kbx=3|Pp9i9{pcR0VV#o@!M+M# zSDsJ{-CX%)F+u_M|Mifgx*J#x23luhd4wx<4XBQ&xi=b>@%o>G-s#yV+IusJDcXzA zr``ozZzHB{6_*G?dB8rr48G+12aIE)p5E2DHR|POW3-GDMm{IX`~`A-@v z!hHQ2%tJ@r*bg?O<|Muu$SQLGVhiFTkO=@$u&uZo%_e4*%^XRJQH+P%in7YY#0=!s z5dKWS9N=pNGfH_vg!Nl8)1v{OnxG&TiQTuheTOiT(G8j%2ZxNM>wT4#sY{$&%|zKJ zLO#6P5cKp!HR)umjRu%?L|=iEY`A$uz#|~FzOc?6G?SwI_nV45yq%~wu1Y!k)y??R zCdgBZa#k31sNx=kxPL(*pwwwz34hAWyhRPK#AcP31qdm(&t!9Q z8Te;guiV}8tJ}?>E?LAG&bQ_A&7*X3P@=(C0VtOB-p4MK*E6(o!J5EbDX&EYy%8+7RS@x}IQe37VO zx94b4)Ocp~5c&O$&#Ak$q(APEvGe}@FV8>ydcEP#ap>ZSdyC63 zhQq(jiC$9b~OjG{YJr*u%_R}|+luhM<&%8ko1Ue~$rkW|6)fQq6 z#Z2Q6BD2Vpf{WC|=F!{+*`n*U7K2Q33cSVXTd`9r`A-e`ttoufhGZU1s9KisZm?v{ z2BaYb8N9*Yig%0T(Bug8)F(vYwC?_G%YWG%S0@gKi9+_64I0#v9MFy_kJMUQOTZc2 z@(}H!8Q#cbYKDS_UL=nBZ4zF}#O#pb3z_y0-{j>Cpy-rEi;STX3B=Zb;S>bCrQ+4O zp5V`Ho3Z9eo$73R?0HtCk1qdv4&$14w)r^dqDA?b04p_Wix`_H!_FE92-4}eBQ?OK z=%Rg(i>5{~&(y>xBYsoOs9r9qXA9V?!)*taH~Tv-JP>337@{|jkYN6ZxWpzhd0a>S zVyaH)+4A>^l6T!Df^PH@F)ZXFKtXtp1T{MywVy+CO!j@2yz<{%t2lOtzaOADAHKJRjf*lgb zRfZMn=Bt?aY5?ETm9Gu7$%Jfu9%jwmQaQ3W_N&tB+bu?jbN2Tf+kKyxj%-B*jAPZ~ zxc%d}2qs7{x{Xb_`K%}+NQxpB_*>P1crdqxOA<>#`t-mo+gfm&h;*aI0{uHqyn#G= zqT05%x_GEy7OSQ_{%~1qNe&asIanh!9vx4WvGtj zW<_GNVhuW%hf^VjWYd0y7LL-O0w@Q*;dcD>CQ$+H7+6+0<3MDj;lQzG#(p^?_~ATr z9=O&-9Mz)!AAf8Dfbly2nIKUHeQZ0(U;?v@Zf;uSS0I!Ts*n(Dh0CV4fQP;vRB%ZH z>YTK^O^htUGab%Wy6U00WSY^|bQXWu0bkR>XznOAzmR@-iid5OiM`s3P87rLk#GbL z7VzM%l`UTnpuVms4P>IG7C=8Im)=|HH1K_?aId!=(nG~XNS$VnHD>2?2nW@$H4mC$ zEiq&IeMikAP{RIt2b2>4KnGx$sBOkI#cJFR3MOBsaF&3==kvJmnB(4>ow+@ zu)LLViv2$M)OnXzYzS7aNsV!08UYFqL~Fu697KHkIl*uI&2-Q|b-tVFB(WC$Cwi5? z78+0!n*yS;MI@ti*KoT_g@$wjKtqwo2Q;Mj(x9_fs!R5Iw7VTH;WS5YY|b^FZ3x6( zJ6Lo5>Ga_wn-&3*cesOE?_SScx!aeEGW60i?<|c&rV0SBkb-t)q8l`3AB^N_4f&km z$V#SMX(SFZaZ4zb9%|H>cVqH{qi7kxdJmu_K=?}B4leF)3W%lvXk*+~ME6yrj-_iz zAvO#InfOT4oPR*n$@rA(do_B)r9byPZrBqg#j`bG^Zh|&cEcI>pUqMJPKL+=)ur*y5{2{a02WI4y?!BLkN( zaci_kTGeFAbMgsYZHxv_9dyRI;M8J*+>~;Hf`c@Gw{0UJ1ES4<#mC!WKN7x4P4Q zznX{}Oy9>tEw~6-8nHUX+6W!S(g>}|-IdV^XQ)9pK=<1P&{mB-rnO*5+_qe6Vr(aG zy?e-xp`VnT{m9I)I_<$^bSkb5jG&D-BPp1hiFxSJ~(d9s-9T;eY_wsPX zOk5~8u)v6BGjnsaU=S1T9MXwd?z}lGk09i`iRaRN3VI-IbVa`a|%7P{_a&`eb;t) z{T$5=!LHd7q|px!x<}UV9t|>3T>XM&c<>|` zEWw&r zoA)^wEMTfT;_;Q)w)@(up5B6f1c7ei^y|ZhJ@t^yTyV1~V%hN`hIa9wh7_~nLG+3O zc4aU5`hz2hCF8Q766Kba2Rz=oXK*^PfBn!SeBreKb}i+uQ^wsrchReKa1;;jtQfq} zR^wXHb@N?~kclney1A<1^C-d*8UCmd|C{mAG1F}9d{VX4FX#?r3q+!INB7*hcKxR- z=O-LB4kuD7vx5WKTJZm|z2moC7r74X>)w>u@X#!>?Y*Y1TD7^SP-+J%J$fj*HBWe0 zD*&IV3hYz+ADM1U+^hx1dQl@6d%mo<6z;^2csIlPn`v=FP%n$#-25s(KqkFh2q-&L~^HRm&#gA~r zuEB`EnuW%}Xa6==2etUUAT}AxqSbic80WJx=Tyo5XthOy)Q^Brm!y6W6O6hT=ky*R zs{a7FKwhPl@3;-d7*_=(JV?TN7*`+v;X2%5GUYI=PIedk^4#(^mpz)b6H%aafQ@OPWKA7hQTdTeP&0IS6VR`xpW)9k%(8%m#leUgDH9ao*Z|xZHG~R_vP8LpJ?Z(!HbD^!|h)t zFL}Az=H;4=Z>wxuBq4Aj*S>`Zo#vnqGOgErHao&ZZyJD_(+Sqh--<&4$Gxv7?Jo|a zfDlUnq=1mgb!Gg$XkVO)3(qhG44}h|Rv^{OXZ`DrlP6C$Rd=MG*Q8Q8my7Rzcjj1% z0Y~GGL@BsW7I=?B>NgU7xZpoS=_i6oePXyIv2ox2g>6l50@tKpNwS^$39~cEzn4dT zGKl?e*+~9S&VyGu`THzhT?p@$k>}K|cr7Ep%nWi$9nFs)!+Mdl_}NvE&%_aLr0!Q>`8req z6Q8XCaJ=)1g2=yvZ65TYZZ9KjeYBAna=&QtjcYykQX0c@U9&Bu3uo=#v8!vSeZW_^ ztPmm6?Gpnk6Q(JZ&@96qYI`ka;t*NJVbI?1vi$;=oR_C2XFqu(ng><@r0UZs41?gQ zU39*{@!`#{iA(=tG^}{Ghy0pG9@Ah8q@#K2(L_oY@8u}x`=^JVZ&NexZn)R^{IBLU z8{z!F;d~~(p9eb|L#@8zkBOsGnP^wZs>_Q>@Y|1_G(7*;`)Uoge_D)J8~5O}fUgNl z>wwL5=mH~lsSFe|qfc-rX~z%Rh20;pI`ul-cO5i@mc66g^iA+JY;oOL@Rx1y7slfx zTTBonkYatu0kKY&jI?g8tJ>W9#q%ggm)d^nwD&)2?{feO-uv%wpL}Wk{x_)jW0Z}y zrjPezqU$OP)swHc)x;0~BYwI+%}65V`~No-_HD!N(cLC}(X6h7gZtY zVdhX&9oM;r9ntdfW80)?srpFElCN1`x+}>fO9xS&$uzBh*cy9cq520Ph{Gl{he*Yy zS#js%+67y9)*B1Yh@<1ATu`90rx4YYk(u|~Qu>|N=W$u#j|&7 z4vC%TrM+dv58rGsKRPG3{$X`^@!eecqf!sQcxk?cJe#_F#gO>A#l(_no+7pUsprQ2 zfPxo!W;1&J$@~$2vVrMUen5e7+1tH81Sz1DGu_+RRWS~2oSNVO?JZJFBi@Dj`%oIe zz2j`Y`z&o|*&RSjC^4Nn)2h%w9p}5;&h}5&FU(S0K=ea}d3`HS#;ox!v^l`^5_ycb@`-dKdL?0>QYQ!*7+K6S7;$*krZGR4@Ftl>w3rLb2>a|YoRi9=#|1lN zMc8Z(Xih~wx>AoXZEJTOjBU)rmobm3hf8Cf29nP0Wgq0pvANo&gDFeiJ)qSow31SB zO|KG7^uzVJpJ-Y2O^$9oc}GsYmah?R>Ulsobtyol^QO-%k(|Mcfl~@bSf@mpx}+CH zNIKpgZXgaxx-0*Sifq|4&5G7V84QgtD*o_re)zE+|3c&%P_uRPJc2bCI9?S-JD8U| zpME8%-`CB1FLlq8`iFtfGf`fQby2U@RlWTM=Y}ZH=HQ|aZrJ~QeX(_y)7$wj{>cv_ zuY7Q(RM#;42U8YqY+735>p+YTmM@JwvA$ zEwV0jW~0pYj0B?Vco;9Of~aMpmN056EOIs1tGfI7~FBq|x>QYfp+= zY4c1X#2i)I@IzUojI?mu7utxh)76!kQ}%zmM}&r~Ic?D!>)=!SC zWP2vI1PHAysQUC%*b5EIn}mR0L;B)&It#a?)CzypRViYxqg)JL#kS@GI+4JBM2wx2 zq6xyLoz1*IVR}c?&USsRT3c_?2T{J2aymhRtwe8`S$&uFi-{XyEw#B4n z$*~L*e+0oA@;xN@%o3Q^iMewsiU$xVyydIk6?+OzU~;sMd71IL0F)X4m;kWruUUi<0NLp2w7ch@Uc31sc5~|9+KVwf5DAL| zhGFdeLn!u9F*_E4SEVg3vd2h~egUP;OGyvr{CedA;jN1UQjA~XtUXK*9?GWVIYd>1 zu3J(ePlJrSN+%z_q)TsO!GFq3N=LsyEMGfjF-U>^)8dYA7Ab8*2|d+Y517*ugpStW zn%Cb;&7#z=$QcqPzpeWSB(n4w}KT5HGi&V#H zUj;aPOd_v24?Dlt_S64)q%EG$Lsy6w!$G=p5uZ*0XM^PnPHUh?xzC^9B*;)fGJvW_ z9%Db?$lBM-nwaz1OOk3+f!6>^$7eDTIb1D)=@IlQC-2h7>~%JZu@tjhrji+NEDau6 z6uB^&V9nFhE>pS*DJG?j-_?N%j3CFw9)54raP>f@(&mNwWuUIQ@)hZv(+|HRljj3Z ze5w2!yHO}N7f=erd2xQG;;szlYBP!L^QO7axI@WgZr>@8D~8N?4Kw$*My^?x zIJC*;v4PA3CA%u(wZaanpacvkw>!WfngB=jX|uFkkZ8k%*qlPoeBh4QOTWUJtG6bC zxM;e77-RtPd1@j*B@?t3J@M?ZE8DUxg>0_dyl**0)ljIg7>HlHaY6%H8+jN%AfIIY za?INA2i6l13^Z(t_P@X+d5Ui}`=5EYWZSccJGkeH@(NzuAZJX zT&7Fp018~E4eU4t;1aq2Q2Okj8UW(vmay2T~iZ%GvNXea(4giKEM+Oiq{jEVI0pSq9mnNOKSA^_dBmctO*^h)a(Va+9=`tU`yZ&&&&e!i zlQ~gOISr6X^fuKp%P zF&tG405irk@;fK#zNfoF&O7>^Pq}(IY%=_}JBOtqVIY#`Gb#ie6}c6-`Shy8n0$7c zfg76S*2H4}Lz^g*AiOQa) zhsQ!OZgctM4UI`nE| zf1=tvorBHqgsJ0@Fw(3mF6$zO-7!^0pqcX*`4>=k`;5u6}1e+ z6y{*bQzak_!7&hR@IE6{@Pm}XRXl-XiZPa1XxiCUBGbaFO z8E`qWyo)OQ&D{f|iFDqvc!zDXlWp?_Q8~R1oPDiu6;8RXT8=ddgk0o;Qkn(f5u4m> zx>7KvK-XE4rhVjdnMz?$p%+I+*d!kqx3e>Wj856JphA#2U`FY%=BUkWW_T#(XN}Ra zMVPN{=vbrDN+QqkjDS)uZ0V17S5aFX{D&AO+aBP8~RrR#@s(1!s1%wz1cjnia}8#E$zc>uM%BruL0M@ZUNb%i z_KII%Ujjz}fh2KY;(8mtB{nS$HE))Efb7Vtm5Q}&XPaGe#=ZfK8SfS`ic$E)?u`8S z@h452Lh?xvQx|pGwsyZwlj{-CNb9joW)&`5kk=RUXt0Xj;nOYyXeHn_i&!5zT+tuD zRu^CS2)$_vGf7Y#u$85p(XxgVB#!LVDwa*Bc|;!U;^!^Mg9S3!UkURiigS0s@mWiG zok|xvCJ0I`sf!z3OmeRw@b^gBIq$*^!N_=#v$tfL+eA@Wa~fT1YS3x&tu`I41TUWl z0F8T_+5$32y7fCRG_LD&*xYybV&Cyi5zb$g`vxGFuTSjK{ET+-?d5#;x%v|Z+tWdd z|IO=nPF?Nce;^c44F#cBi;8o{(VK?QZm-Z%EyiOj#(PX2st2?&GLS;ttZ2C$(_*E* z`#hgujiA_(IyxKxY5`Y_ia(@Cvy{OlW!*8WFcQ~!TUSlo2uxk*Dn>8+p12rff^5TD zY6(CL@0jY3MKIi~aa5YsU54c85rdumDmh6F4rT$?t?>3$G7?ixe6K_R$HhLT010Y$ zS1GL381Q@l6BRLAMiCgH~fB(Bpn#pcW0fNO#v>CfX=O$@#R`ky2-AeIm zQbshc(@m!culBEYQ_>YKn`6Eadd#KgmGk+n99Gtv{;Yd3JP#RT2$g6m$~6?m^3Yo& z3S)u7L>{{GViD&o8u0+l>y-EAx4c2OSB2d)K<$B;(+`QS9%jgZPFaZ`EDVC}ITY5@ z-muh`?VFzUMI=AocRxJ9!)ZSNbt>lqDnXr+UbhM~$@d?48eO-83?$_4)qrb8LuJlWmBt?lz#hED^!8e7_KDBUa8*aASnh(a**S9m9iSn`ggRW^p9 z=W>C}Q)keZsmJys&sw~_xOk9RjC;N>d6r+qc)2{8S`Z16C+q@o5NBLN5|Luns35WO zSdHJ?jb2lGUi{{5AzIHmH_zlxDM)D*}~#F3P8*GND9)(2#l4Owf9T57839m3V~a{(;e+r=M$~ z?S8X(vnz7v*rh=%sdbv**|*xI1ZFa!k@;N+RAU}hrQWD|Tvh4N99s$2*)&-7D=Dsa5DP6jG!#BB+Z6E<5==QGj(<-;H?6tu%Xz zHfU7q(%=R0sz4cv7pOQEqG&#T!NvgoJep`97Lt;CJFR}$TBjd1gcMjo8TUY&ycmu> zNE2Vo9?4y@jYxlQolV}NF(j1^XJuSI$=Q>4mY{B7`R&!-Q+Up~fDe zNH`hR;GVIn|F<9CHr9Z_2HA9=wJe$>+jxi&+ja6{;Iih6-?AK<+5eMaxZgI$JQJkU z#IApNrRwvA@;7JohqZYCH4Sw-37*@E3e8jTTH)fhGe_3AdaP!PUD5A71%Hx)N*o=|A|y zOmt%Bk=T#tK>}=TxYTCI*GgNW{yhnlfb&}66urBoN``!yhYJdkR0hWba{$~T#YU!7 zE#oUNcwPJ?_t*2<1j!fP_KV3@<9t>R*Flc>4oU`>i! z&Em*jHu|7O2li;p*dCHtx0USJg?ps?yZX`v-REDM?AD^VZxb& zOJx6**@i0W9L4^Ov6>CIB2vyrX_7Z@fdj`!6q;Ogf(Py=Zxv2_DN$n>im16spU0|Q z=1oHchd3S3AjH@#jm_nP^XLDx>aL!TugB0(0m7~*c17KU0Vcy1rIx2xn@lJR*-T&& z0jJSLFc&gk!$6`x9}u^3bP)d60tt%7mI)xYUk7vqF5&-aby3%Pp!&79>y zk?tK)-nF`~J&9X%0};0A+N2q_`Rm(?ELKmi7gF*}S=e=n(r7qE@KGJU-sgohoamMN zR)BWQi;$;nU+%A8wtO=!21TWDWtiZxfhf-TgJ2=qPHJo=Z{6wiJM)BiUAgBy-UAg? zp{^-r7W2T8m=+vWu8*TjDQDV^QTCSIO!YaB+6eL3o!A1r5hFtz0tKT4OS$G3C!kSF&t&v&5y&63c+yJOp{g^kTktGa_2 z2NHLV>mzL09a$5-+umKo9?a<*?p~+IeiyxWj2SI#KphX^+5Dc-AXYB&yr-IgX$|#0m+Tj*lJB^!1y9jf+r@joxh)+_ zDz{dKNpz^KwG%o3LznW&k?W;;yOzPvGRSh;*rV(C$6L#D*_XSb*zWE8SQWt7RZDsE z{oWV?7nKg!IP_^h>j>F6s_Ufb<;Y0Od_*I)t&xEX)X zCIqBvcVeKriN4?&Ydw~RPQ=gNb$h|hj^zo9V_ugymHlF6+in?*zWF_SkdOVjTH1iN zNdKg5Aq8~ePndBsTEHV)r%%s3HZ|hdeB|v)v}7@5Vtn7y5*9&hLK2b^|5I7Iz(I9{ zn-naanfso$JxJ)#$SjFfHPWi%1&BseChC;i^wOVGk4nnsz?%Pooih8n&&b{pB={nt zAQH^A7CBLGDqfb*WWc{%*))L5(-Exduw{k@hCgCSU~3!;IZZ1u7LktQNZ@&-!Q3id zhFWfdKcvXeVupqI{W9(zHk-Bkhm)8iQyhc2sNErz%gTzaY!EruQUreS5tpXbDILCQ z9oWwJ-tv_m>o!Ce)zqcn7w%kYQ6Qr_{@{k)&eiV{@+>X#c)u}*OpAkU`Ke>V69ki+ z-z8dq#8k?Kox|n+{)SsaVXTYR?DOfCrw1P|WRc9h^s$TRLe2|yu?XixZIXt^tO>%M zEAb>`a?#FtTDh5lhY9A)Ut9+kE)WT*hyc#iMB_PZ3(#}pi(M2h5LzU<=qY3jYo<+on zW5TVMLVo85;N{xuX7NropIUpI|7MvVc^_9()4itJsMy1v`-TxUG30CqhRv((jrzRz z5Ayp!RNw$}f7q#q8{8=%LCEvFI7u9U(yVZPIsla?Bi`Iz*80u_G2;23m+KVk_K<9w zbQ-#J96`b_rfijUFa%TCHX2Vzpe*GJLw15 zN2YkqUr0D%MEi>dDwZZOp`8f;Mxy``5)wc+=qXqYfHwbhcst{MOB`lVVkV?Di? zTLu8RBYyMbI1o492{vg-y@p%RFG^(xRF!ov8MoPL9F;m|0;?!Om#8GsBK8}6FQvz? zo25ENOlxugS5*DSj)BU*&Yw*k+5I}hS=h*%Ri&MF+onJD=iGS{CuSTU+Fo7TB<<{L z6Yo2@K6gLkUK|g#g}{}4Wy^6<`41>ES-RKa$JY=o2AW87oB*^cf>G z{;->Z(hZFxQP{Srp^9*khIh4^$qi@r*WC_sYLEpSab!SZ!%U3tQ`X_F@3XSq-8n-%b>7#@jewbLv}zFW*=U&_p9n7D)hLGxuJEMGpFOlKfnTL>rRb z-h&tO$m{{4NUI^|aY*wI8Zq|Tvrf^EkH~pcT2(=A^Wdmu{mcx5AQfoIxfHE)1W9>$ zfsSSy^16J_Z7&MEel6(hm){%z*;9V(oiX6~|L#q$n0RzJ6mlGM|G4(tBb?Czx^Tj8 zAoPWe?x;fUny0HtSE|Sj6 z+9GfBuAwZ^O0fApb(+*IJ4OUu_er|lJS@sQ{)UgVlq^LPlGT_CFL~yzeETsSKr~@a zt1P+UzQ%4sx^55aiAZ0)5m&g3{s4-4@|*r?E`LpweasuriX)4Uj=l^Jt1!LYC&6!h zHJA5-y*lb3MQ$-JJ2Ys1&xpL@*EbfvFznLdDs&&J>o*8jp@GHCqxir6a$ts5a)a-n zLx-p~$NpEZ@S{P@A&_a{fw-&UH2d!J4)sUuboj$8>JEtTLWl=z!Ygs}RrCh&Gc zB=Oag9(^h4l_d3gFK+5JPAwz9*L=`B;QAbnrihM{koitF+iTI33JbU;uolu}$kE0o{9b-edmHf3t@PxPa0*Xucw225<%|%Z z^S~EnU|IvE)}b#*|ABVo!ImSa?nRCw1sNfCb>1f%0EC|DVB*c7>BGtTNC(xMA(k;- z;2Woy*G$+H1<)Lz^OiLyW>K4?oz`zVXl_T7Tj1oRUozs=m}S$qW$XKZS^YfozNO~lfW;(l=j{zU zJ^|#9viw&(ysSBXIWIm!h8dU9VawIloYiaUSmQj)(|3g5N742BR=%vvukErQli@jX zyocUi)Ma0{4B;JuIN&ezI^r)UeC@-tJIB_MHD`!A&{Jj^+GRb2ILRS%la8q7v4X;b z)#1z+eBU6wNM@TZKP)onoj>vmo&v9<)~2P+PpyfISP7Tpb&IwSN+S;F?2j(j)h;@e z{H7T=vYvS6Q=9HhP`hIl(g@oHLb?jh^kQxhu=(Tm0NRYMxw zN1=@X89d6E4#RuQrgVAMcClcH`H|0l0b)dgst~2;6iANKk&D(7$C%-JWRx+zgNn}_ z5(LyLKo}X|fov(?U!-)K1lC0W>_pU@Rwc_r?ir+iGIx=EyQ{0uYTkU=`Lya~rs&I2 z>H8MGgD}i~+Yjktpu;?rjfb0o2h96q~DmV4dEqC0u+IWZ`xH;rOWx|x<-x%E!|j66AlIh~>6)AKxfGe7=hyYIX=lRN^DJ&?K@=f2GM@>St9 zFFrF=FZ?{sW2(_wK7WN+L$ZEjy zg(4&)+<$V6Y+Pg!ic}RsEgxl|PzTZkz8T@cs#9)#dUf_a)^zn$B#*ac^@3b`X@Dx? z2-{|^3I5i*e>roD+|6n6iZ@mjTjtm8;G#l|O6NAW!NX{x9Z91asU|q<6UP_3`#6x|V3j*U#7~x6TERm&t9XZu@_8aHn%7 zUJB@AA1ytq$Wx%(n4T$wXdbhV5AoS_fv-sE8;5d|@DbE5?jv~aG2|E8g`2*hGF})o zrUB$h-JUhKJWY^SVmO((heClYxO^nVx-Zz#K1#tJ60m9ahB-2UshHn%Ojuv^N1?Fb zx_xR9oU`nmDN7X}jm~z;L_7D@e=TRF163uc(9ZCYtt*zMei|i=9=_w&b?@EM3BB!g zxs6VL6ylFXoSxMSypM?3nGV{k^!7S0W}?7}4KZ<_%~1-6U)qloT=UDB4wJVtetFS$ z3ldh{@%yMJm*5|^Vq>~h_g%x?hWN;U1vx^m7yr+}G^Yt1;(0UZT`n*5EQ-KbjX73# z8#Ux$5!dDKhnf{9U=2d-if#y}cb<~tO#rf#GMiD96?VGD7A_LJ$8Gi6B!KgbHt$Aa zcKC~)g?|*!xbfk+g&{JT7M(HOElzt^O0RjnT>9qG2i7CQ>joL-bT-0H4UQt>?uQ?G zKQfZ)HTo*M{`CYt{35=Gk6f8%r$Az>oaiX;eq2TAzHiwF+D}SDAOS1%3yTRkl?W2b zAvdNVvBN*X7IL!dS~cg3?};tQe=>vS*c88*I>w_$Rg&qKs2_FMU;XG%z$|xp*tt8* z3;b07cH4N~y=onC46rsgy%g$jDoo4MS6`Bzp@wvQb~+7_M=ESBRs$H?h17FA;>ZEg zpJVjzJ%7Ca>*m;=f+k0ptBV+0i`qMwUO}^Ybm3SX`_`1%0k5-*j{4N4$}nU6=&D;m z^~zuw*W(E)b~@01^wUJ#hY8Owx0bE_I=g55=rf&#>f*tuo(~uWEx^|)O0a7$2$0r5JpyAwBd6eSx2)PQ-P3^P?`tVA9$)pl+( z7JgZ@z{2g#(6H1Jx4GfAxUT5aRC2Yw-WE3E^MEu7mkIJ}pP-j|Qy&Ro828;)mP`+_h98`?Ae*q{|a`Rbzg& z0d;kH_s>9?S&*Aumwko|-~ELdWeoBa{PRxc5Cl3<XI(k?G4r) z`o67F+O_QL=(F>l=k(imcoF|j|3%|l7~~Ou%aKN1*6EAK{@K=Wpxx_5SJwx{&r5&b z`lCzpT%o~U^&ISC16|`^m-%@HRjDAPrJ76xEpy*ozuqxUx+nDHtqWZ2;HSsLPs`Az z9~&?Ne5l+{ee2;Gl(`*mi+&{>w-E~n{H2@y6K^^E3b8aPe8)M#9w7hPQDam(bnX}g z^Uiw$#c9!t-aL00uKwSHgqv~ck&pfM9q`$5BLy>PSme`}TmEkUaeJ-ixnCm6uLN~% zuX5|!zH4vdawnSbpL`DYw3)s6X#@D$)yG)uMCx5LS2-$JQ^5saE^Drx^FpZiiAHoJ z%3nf%yY1%Bq5?+Mv>Q!<7v`J6w!$l56}xcmY`G)%kJbX#4c_mwOvw%Z0dI5?EtnPls9CmY}wP+p%m#10f?Pb=YMzetZjZ9 zs+MyoUON>iER_}{lDz0ty)oOfU@a32+4AA>Om>}Cmi>+u59>DQTkdpD~jHt`hTf(DmtT zlNV!>qAH%5Ms&i3(tC$NqMw?jb#mfr{I9*w=3(Y_Z%$DPsaR7 zXDQ}duN%hC9hXV~AWu?6nR9ilqkK+=_j=4UNXrAR3TBA*T!HI%dOKsy6aL%EB3E0w#@zHlA*KY?IM?asRW;ZToJoYRd%QK=wh#2G6en z@dCt($vtqJF?GKD+_ij}Jv}$8}yY=(*@An=*BhS75yO~+n9hs68WZ<1M)1-zf zubjMKhPshIa<`M1VmQtX`8al+960izq1>}%tU;CMT&GFGxf<&FsK`tK_T0){BEv1@ zHoz`YBNMXqV}j;0;pi1LqH!Nyv53vua45zV-wg{6ZCBM^bV}-`I&s#0if$;zto{1Q zOLKa7(SeBn(iihRs|x=aSijVE%FvKD{k^ySe%P6w?O*G2x-L29SHLT-tt!>Nl9tX( zR3H;c+l~h#v@>JL!h&m$r!Stb=4T*|(8mTzpw>dLscGG+N4nn~xD@N644srQq(fVe zcm4i*xz~=TW^3#;Ar+%&6g>|hj%I@>iH7n-8sU4$0=J!2>9w}&|0p{5K&JQqkMDl9 znYqoaHg|H%om89qy}5>@x!;8(blaHwr78DRbBiR65R&G8t=yw{oW68%e*68g z|F_R)`|SOGy`Rs=qemQd6nVzm3@?eJ1I&rXq<3jB$q6!4Lklcp(u36;2dl&p#g!K` zq-WTcM%9cIvTlk{(mEIqofJ4;w-r3ZAXjhsofs`R1z!7|m(c6aLU{~oqdkDr#%ruJ zUC8n6CuS;*ZMnZ^Z%Dr-!j0OF=26pP-ByMXmcQT+FHG1*{EM@(p_-*h9ihDP0orz+9`Nw}~2~UA}_3w`&&pYD1%` z)D9S=!>4@EbmdFT|2Fz!t#jxXu~O&G9nwm{?{dWyyHa%`e3L;0HsF{c1DV80m)X;@ zwev{E^D}J<*0Q;RJc|4;9P$jG&p0t}O0;KGyHNW?BDDsj<%x7FBLLLs3SdkC*Z16> zoUuE_1!=Px+iV(0d6++_K#n+&rbwSWtv&R4Y0kQG*YEBI9I%Y@{sdy=elXGu?e}M* zcfUyja?-6}*4)dRz6?yun!Gt<%v-BQWh!GdqgugU@X(Wzg*{7yWy#a+@hMX09fDxT z3lO1_%v5pC{r3@jeOy1H=E+T`2)_dtWjH@zDo9lW0=5}e=2dvSf_a~ounOYOfQACi zlvK_@2Q4Iv81l2$wq(Q^lVlvDgd&iJ6H}n`?LO^hhd?U>Xg;{*5q1&^BF(z$BMR8IHa3tx_$vb&?-9ghs>WT_hP5)-^l3m3z?N9v zW6~k=#FDWytXWm3{JEB9-`uEmNonbiS_V_!F|_L`^W8<^`t>mgVH!7e#`XJqPJ!<_bwP#{9n4U8w|sP&6HU zpjhGvvnQGsCDMSh(jd4>T*a9~q$h|!*5H(oG$}SNE5OhUpSYKu_+LmVe5OJoG32yI zX9-w(orX3Xt0IDA?piFzO=cHWo02V-|99Ye{0Li9cho8L&OZjzA4<+$|2%ycC@3R2 zxworR$=C(mlXU0YHCLDlKqF17;)qN=MJy*~}U>A^-sC78`G@;DhnO%Y)~ z-889LZD*@7k%`|2e4ZZRf{SpOwuoM&5SJ361Axk!)1G*I$qsxamx9XB1u?I3g%5p4 z95?X;`=W%AyWCXFIzz;Szi-{5r$}gNz;hCq0)AE?;Si$WOT>UKfs_$?p!=ap_PKqa zG|wqMHT9iZ2}PJsGB%rELB8Au>%kp{e{>g5YMyQxs}(zk885EUK2PEcRE>{jcdh5x`*O!7UJg&p}+|C2SMnD86ile73Ry)D`?h)dJ)JfNrj}9HN1L zaYf2V6802}<_1U+0G|MWqbfwYxR4TBd-HheAqws;0FS7^5rKQI)Ehh2-@fpFQ-j*$ z53%+u6<7-H7rzY1hno4~IsB!vFVdcY6G&blz;mKUy9N)!Z%Tt)l|R?ora}Yg7+FSL zti~rb_30g@6aBuDposNelH@M+Pw$GvA1%RwQ-2(N|Hh4|H2ayIkS}TWO?jKunDe!- zyJthTsqyFI#=$N8NTXDJg;)WXj~4)l2FG?$ISmdO@3#iFWWntZ0IN^|dkX%`Iw*mR z3o;iBAo^NXSxr3h730BkXozbuLm6z0ElKj`3gOfWQWl8gBT^wGKqed4LIWfL@F!w^ zd603PV-ojCpHL|hzZCf7&Ts2P_@CZ88fL(|Yy&;#w#&j=o4o5+3Am zKLEA($_|_qc2i7tVieq6cORZ<`@2PR`e3x@gc?ZlK7|Vs!2RXOs_)3E-!zRTwN$@v zs9%vYLrBekNm&h>x7U{G0&Y6vg&njN&NeramBkCVJm(C64lNcn0tiNk3QeTBrfZgG zi)zx4zX0m;Kop-}*kzBiMBI^)a9 zka#*jQUGaT7!+#?xq|j9##K^EXZkAX2vB9A$Z`ph+;7?fwymth<9wSMl zz->u*X{GxDruPq;h(S@rJBq~DX5i&ua0ySSgfX?un%YWvq5S@ZV)OdApj`2ag!2!H z0*1t0hC~iSqBFU8y#Ao~{rS=?WWUz zcp6+k4e^sK{X$#9(f6bR<+ZIZ!jg_9QUrsz=wBDm1S{CGpCbr}|3ij%MDRzh2%CtrX7Jg+8W1u(`I|^q@KG1X=UsIdJ+@55xF(QTq73H3`|t|m>0hr_ zZp9Xan?$ZVP79Wj^ol9`P@P29igo_3Cnxy~Ax8ef^cyv&`QgUK;a5__r1_iEQk(*5 z=Vs}&1D$zogm^#M`25Y(Bx_9ps*+8wMJ71V3f=if%(6TBf}DCXXk2=dZg2G(>hdw4lOhLdD>3>s zocnfGfE*xziY~Gs>Sn`?x^rr~diB`gTud%m6FGo*}-`!$zBR{VelCGr}5Y~$$WqO3fe1%SEM>T2mEvh!&-}U~{ zW;?XyojZNDb@%MQ@tasfqZd798kDd1ZO*wg0K|s?iLtloQ?aAwn#ytd8bm=ypgM#U z;{RH37l88;#J984KiUXJ^hj7!kN{t;;-{AO6}SK*=vp@QGFdR5b_wMxNCeu%aRHDD zT#y_{OeI|`6yyOP)~LGkDjYzL$00~+Y6t%VVHr3RY#Q5AyTa1KvnA|J-eYgXK;6fR zl8SKM_gG}ajGFef8og^@J+6H&CTtp97bjsGu?D&7|5fj9bhY%M7VIv(lKRlo<}I|Vc$Xx@K~k)_~FDS{@WB90WnP1>nH0DLE1 zFDOV*5hw)ky{N!|GZA3F2|)5E*y@A8|FPjobOZ<}m=zFrl!i{@6QJo}&mo{UEx-4Z zwmZ*o&H<=R1EzgJ+LJ^U$$>j`NyiCCaBgVO7vlKFX{jGM*9)OqI&ig>EC0g(*gg@| z#9tfwnYVUHV!c>&UjOf5OPdEZ|mz zD*^z2ZDSrkFcwz=8thd|lL17){lU-t2NGovmTbg_)m<*P_9#$^P~LxIN&X1UlqeI9QDh@tDl4@dOchHx7nF6l?BhTmm|h zBw!TvIG)RG-^5?s$1t9f4u5`aqmB0SPNPHP=#3A>QGZYvPx9UG_Hy#te^%RN3w5E*aVI2Ef18c@_Dg*R6nDnrbjHQ0$xTQv5tr@B>)a7B$5J+^QwGZtu*_EP>w69kRA~Y z?pV%7j4r@7Dh#vVo7YirA#_KB7Xs=M4?c>v#_GHV6I8I^S19 zvAkEk>ju2DNoP?(NizQ*QH2Xp6&(~At#`9W-}V2FpC=EC1vTHa`mH!QlPvWuUG}HC zB>`vYp&>vwx@i{s%@m~1hV7bOzDUFr(V>k9P}cFK@vqv1ryyFbrY;Sl+9MIgKQ<-f zw?YNFx{5l6C5kHH1pt+Z9tTUB?s()wB`7F%eK9AEo9Ze@%5!p+i0JD^ADidL|K?;@rQ zb3u}z;!whqmkG~8O`$vxgk~yA1mwv8H?Q(T^g~joisZ=P?&o5-3OJTA{*4XUA`1Nk zz^!>=e(iCJzH^;R4n(>hufdQ#U;Q8A?j8u<=HKmsczlsA^!9SOJF(5v_9E8ww%9ez z0Qx@OI>6Yg&cr?6@`{;dmLn!JYP!AvcJ2|&{{O`2Nl_smdm>KSq zf8Eczoy|Y&DKkv^nD}3_g-O2VZ>br|p8uUq>HpfzLO_jk%nZAh6OQn)KEaUHM<92)==SH$zvM6cY+g7+ zq=W`RP?i>CgaGf0o*+bJ3A!GB{ZP2*Au@5B%YYzpislLQ@4z(pk+_iIZCXzKC-FR^`4f!tHQUBrc6DAo>}iicPX5l>3VT6$BtJ@gFJA?0Wu zUz7=L2?tvf^GQqu7FED)s-;$#ds#{+Y$GH!%oPUbJ1^Gu-?3?!rvU=S#IVBYQF>BN zJwOQFt1FH)RYhjbyg%Yzhai1`=E&(G0Exg=E2!FO{kX&_J7IeczYMK~XA+H9^ZE9A zlb{>%LXzts?gz=u^X#nK>6nCn3q8WPe27rQqtrJB2{GU|URrls z#e9tht*i3>X|H)hyIwf!pg*;FFiGGq^wN54q#^SGw@Q55dW3TX-dl<|wSQ;A_@hE6 zAi+orAHjJZe;))gWYIx*DiIjPd0+*Rf5}VER!`lfQ-GIs`)o-H3Hf)#53l>$>6{gz zA~c>VH}%L*8M-haLM$Fswv{jMB_1bG0gwk*ReB)UfL-p@x-hy^_@j}Tf53kOiNaI~ zSI!hzjS~$-OYtC3k&_Ffy(r-m!nxX`QR@}yL`Mq&?Mc5@x6|GgdxIWF8kJAe!h344 z23pH{TRst=6T}S`+dYaq5&JhA#PKXr*D22AI!tBhyZ{T0-+@7=s znTtr31($^efe;5oA_Xi=rcwnMCZCiwZe3yY0MFQKk0{6{97W1{7f`L$Tcdr+W39VX zknF6X@E5QNG09H0EyCCoSmEbQsX6!F3-}Nl5?$ZW9LBAmRvTcRyLJSVVcH}v($27! z*v_J0&@H?p=o?x;1fC3vOslm-*&XxCRK6xUe>|Gp>LcpaQqAXF+a1d{??=;hPE zG4d0N7BGJBWF&)i6G1#z5Dmbl0+KD@Uf$YiV(>+$VS;%E$7)x>17}G}3!(M^&uu!{ zX&e;KIFG6u)+te-N89P3#`||hsp0J!$3N25Yvh5}ZG1QdN_hR;bsI9Jy;annY${JQ zKU&h$)+#E;I2=Z7u{3{#pZt2{t(V!d$8OPKAxoKHA@PpADKWuj1@B;zFK;l=a0sdx zoBwc7-hQIQ*JkjX9g5{2QV7&MPCWS^ifU_`T(76DrjID$hWmhlP=5abNh5EnY9)V1 ze8Od{&k%NL$>3dpkOwyzk%WVlS+L>fxV=a$n|YMKQxr2~;D6Fnz&SAjnbr`ApFEvI zOxx@F8I(yPKniIMu@?qT4+v!Dd#KX@)tkpXHl-=7XcbF8?wb`{rbgwClH#M8#N#P+{_JS^+N8x zlZIPgvJVKtD*H&{eu5w`_s479>SAA(`1DD_i2wkC1|Z64B_pozus7I#GQvG7$EA~v z-4?;biP8+M@>>ZPcbAaeEQEOi5FQ}iNBF4)Ba49W+N{}wFlx$nc|A+enhS>PQ!`Cm z!$eYNLF<>Cq#ZA$yN>}-y6lr8SH6a`XqF15pSd`p4}>8)t#vic*6omQp;iK;6ALa8 z&yCjP8!H{nGOD;UJJC1JkR0K+a>E3k9F^ut!|#?c>nsoSO2K?=OTIe`4cL^IULxT{ z+vqMxn!JR^eAzgJ4p38v%kh9mN+``&+=YZ#MCR5<e}ns?ID!&t^VBe_)!ItY52= zn&m+8YTK?@c?#+-&s@=vuO7=djj~Co57MRsi%p22DdznvTCohE8c|u0R}6B$rCNa@ zhZvOfiMPxh<631ei3EsjfEkGw0c;gn&Qz(5?snF^-i z(s-`2_zMJb_;7)gj$4tv@B=kY@Rf)?EZ^D1GJW!r3e#c#6%O3%S0Y5s4mAM4!AH^* zI+rs1b^X3QIk>8F)8)Zv~rz}H|;!4{q8iKET7OC=O-3jhJM@#^q7lgxDOf&UmRcf<&*6XvFVGps zIi~n?>p(HrSE-uO42};W1;6KsK4YjVZtyr&qs|u-YHcPy%RQmd(DY#SDg}9*3p{z5 ze|{svK$!C=iM`Po&W0a^#BE-N_m%<1lAg1QF`4FvlYojnK=%KTDdga|`|)9)f=c;R z!DycwLdZ5MvVe@d9L&h&AP633+VsT2Cm6|auujdHE@&Y0QbKw*YG++EIlUZAP6(Vim$Xv>g1tk$e3;#rY1d!kJbD*>*{?G2y60@ zhB{E_3ZkE2`8ZTQ4tYuo9KY?`M#jG4yM9R0@oq>?ybj;kqd-KqMWZ^%S0&@oojm1> z3A8&uk43J-pK2lL0L0`4WbroCgbaD7BR%QwEKUUQ1@^Jcew#W%`{GV#YK=)k=fH%d zi|)bB+wu2U^srhayAFKnX5}NLSjr9ShE*cg05$Uk! zsz8!}3lX<1@WwoNG7px*g6B-Y6LH8WI`{%E?F~<$nvA+dMC~Es7O1o&Z7?AkkxoNY zXt65jtZE#}KnYbrLsYw?4FM)zH1s>V*v~!bwrE5u@xm(>rZpPwO+#Me2-z20$35}m z>I3(SulL8Rs(lBD@iK$B%-a7UbE0phY$Kzn!BIglX%`j-oW!}k^PO~5DGzy<>ObFq z(Vmh~d=*0){f4As+-+^7Aq~LNVhMV(z@cSTM*mMV)bW>A_D8EGpqp$^f=EOq5%YzM zeTst_Z2SIdy{ST%5+Le&D@8E*RQD9)@2!yI5=jQ3!k2=PMO;L!79!{qGU|V~$P&nx zG-MXLAhr)akI!zu2yv1~ZVk!SWPzwfpjQ*2Z)>lb4u#Ee3)Kx>&2|E>=Zo*}ivNe|2#aBv^cec@UA;np@unDEbmpy}Ra1OCE_%C`6*V817i3N%7R45xLqy_d{ z7q~?(`)!SSO2kZ4(WUyJ12HXXRbp9{7V9243|xw+ChNdCa1|ZQR5ZFZ8dXid(3*IA zlP8dhL)?tNl9K^w@mG8x6u}i!ItzDtM8b=)>37%5n+6cWWaOpvTeB)YbvWpJtIe%# z_;q%{6)v2X5BtC$mH`SMnrDA51zkb~W=@5k{w&a07u7vn=mPWB=?yfy>D^?G)!EYs z{VW$GRP>pS=>`PfnLu8!DaLdreu*xgkIt=SM+Z@TN{@QM=qG;wFG~RNd>-G!rg|Vz zmt>q)OGKs7Dr4%hUsBQ6qBZF_BxIkZ8&;*~RaN!?B8vl0JyaQ*<&+zfC`x{joJF51={u{`V*2-3B=Wl$<`_HE7S9z-q|F~mijo(hWl>6#yn zW^Th*7Vcl!hMmX3e~ms4We4~lbqUjQAp^j$?7J%ban#t|1=7_kZ)-ghF>`>rcXUF) zN5h_0jms`|zZD5#B=+J&;^hTo9S8fBCis<0`9dqsPI&<7EEX6BDpHK$S~lqDda3Vx z%xQ4bn+%h8QvBJHHWm4fh-uZjZEy+IO2j0^!q0KQ@>;Rg(Ph>8h!iR;Q;YTBL2E7N z)Qcu~I!|DRr@U8EnodNuv#^^~OzU=4CB4*-2H1f{VC3-5Zy&|*YY3uvP&Dx6!CcTi zs_Z8WLMr|`SAL6f%ms9?os*aM#6CMWSNmy-FVd9Jfg#JyUPgOX|N(cTLwTp2`yU* z4@2)Gy}}U_#<1h$7wT}>QVg2nRaQ;Lgxg`N$yfIa5NEg<3!}yX@2Sh37oGX7$0mR@ zjUIU)y%gQ}B}-t#7Ma6FSUuq5Uty7e{eyzr>V;^|+vW~3pl>aa zqvuvz@r;9hc=r|+S;*sTb<}RF+=C1j-uw`C4fyQP-WyM(NFSOEz;8$|- z+&pGsn2r90{_wAniN2Tm(Q~sRijeK z)Gay-Y=h#7r{~F{yvBy;=0)WudF+h~Vg8lGD>i`2b-e zK0>=jO;PH84=`Zj){>Us2(MASwQ;kv=TXir#%-iD5%Cg-jFjvM<-*$=UlZG_GOFgqg$o$t0WjoJOFE8fP2&;CmcTHsRTej_g| zU|s*5=O@HvizHW*ulrP!m2@v3z1GoSUDk4Jdp=r_u(| ztDy#0jVeXJj8N5N_S1Dck_E^p^JKv?Su8+5@{98!E7Av)aiXc2yQv^Yv>Y?6O?RyG#1eLsvfR)pr< z{@qKaau9aoUqSU$;WmlfeWv(L_olzgRLv@8h3DCsyqLA z=^mmRo5@1UplWfqJ0}F5;)ZKO5%M|^?e;h47wEEYp3mO<<<4-h_fIEn z*rV&n=!83O0Ec+p+ix78!~$QyRmIoGTotd*Nj+~;MDdJ!_kq`V*+nW31v>bD(uj)a zQuJpDL}_Wv71Tl5%VC(xaP{Yhr6cdt$W^ry0^tua6-3ND5%Z)BPMCOv#T{eFOC@bg zwFpVU*Z|68&`RsGFMM|BTe!@ks8l&Th5pY-ZnPK?!G|k9?Rwlb-rH@@fgGvT5r8mH zCoe~i2X8H1*UHG#qIRVCdjFlh{I-_UmhFEP6ZjJI?c?(Gil(syjpK;sxw_&>>j!tV z1YXeJ^zzUf-1S}#_U7B-wZg++T75*y4*Tfg>qzGNU5bq<02CFN5RG|DM>mIkDSX62 zb*yM-v68weB=i}RVhL_=GhJa934JE{t`3JGWfLeTlsy_^VBoS$aDbm(6zQF zd~FJ{3ikOeTH-_hMf4^a`}`V$&Ia~qY_>*jHuV4~j;kfWq{Ug}1{+;YM`bsn5hJ-b zPJF+^`npyFui+u$09(=IpIrj*;*FWuq@_D{;J~Qa%6CgDWO&>*pZ@qU@}RM=fg^By zy!P$WZ>sa(wu(M|2N@AA2YI_cBaUH~$e7ybJMb^af?w!c|7|Os*=N0Cl0V}sXP-E@ zIMn56#L$` zYa$hu%7sT$;l55k{d_wtx7X&>$6>!aQiDH6-}o=~SGJ!Xfn~1H}YPSUetu0k^q@GOTCQ!9Xe!0pO7-v+&saq0<+$Tq+unpc84Fa&GP3hQLR5GYRwY={W6ffPCe`Vvh#t9QtlnS&!v-rV2QXH;gm0+?7gSgfzD8qYIUE?p1nlS;12Z#?i~%6^zo$^Lkdv z4}o~;VIhK6#2%gUVc>$ergJsx#8PLR@%85Eg&#w&YwV&(4)y(CJSiz~LHFDGFIV~^ zz8~@GuZyXAdR}hq>Yu^L=(g~BAqAvL_3$d!5ihN%cTnn`I3lcalgCa)AL7vHY&?so z^6*gfc>ORt765?BK14C;B40nvY^stOeIAfi;pwNeyZ1H7oM zv(ZU5S*H>@d7yWo&yTSGOi}wz$)z0wAxs%`V8?37aN4=T+dvVPQf?#L00yX9nINMF0rH58L~_w9Kie zYm4*gp`htq<}HXs1gFOS#E5(*2A06fvq9xoJ=MLEVwOuzl`3dA#XwD+KHa@6;{4f% z?CbnB<5iU4wwCC5{Lc838i{}Bj?B)4Hd< zQ=yGCo=m z5HnZI)#Jlc{l(}Mi~*$Sz(7;(J4zv+(oZnUA^#FxD^i|qf{J^`b*vSc1rWG)FDoqo z4B@8P@$-Pn+;MyCM8k#1k12uO0V-@BFS%m9u3zv(BKQ^Jc8YN0gxM{fT^K>^>P#@( zg=Lnf$qV@pUHRW0xXZ5Za{o;~gz9=x-JJ)D6#yWD{IpW=ntrlgL*PrPg}uS~=qAS5 zul3|{|1);gxKzX_M%hdvE4^J$U7dnX04M`MC@uhwqeZ)^&~YUcY-mDYY=9O}>`<_V z^z0fg$=VD($ARhnp+eOW$x^aMGoJ5mXeMegkbJp`(o&@)b0k%l%Oa2Fc&Son10m3~MfI*{lthtUPYEUTY{24k>8UQqMeJ^x{mlEaoQAv+5WH^Mp zS8;_5ChXCiK0bRDSDlhj5{p-6Zc|Y!NKL$wkxG%^5lO<`(0<}2le{XxH$nI z=)?}UBjOnrd}bU}J%h+FwroIX(4wTY&&@|-lyJ)7>-NzKe(p z1Xwa0#KRLvmh_jb@0V@}DF@7DbgkCdRN%Za85(y6?*Y@*kdevL0V~Fewmb2o`@DKKOOQCEKf@ zQly#%nO3AA%!ws9rm{I{mz{px+(R&~9V~;2X%AhLRSV#ZiS1}rT_fcSUBzu0ihdWP z2!Jl`k7hrIO?sGF`=J#|%zaetPF*zg7d*~o2&X#R!?oBzKKlqIguAbq_IcPQk8tyb zy@bwLw#WJD9=A}>2ELITBpFMG9B^{1lud=6ef7VWW>$A2&DCh&DnrnDBANXF0L?QA zMM=6rOfHaKt2`2^IheTf&9GO!ki(RZ$U<*fI*>3ABoKVKKWbvHdjtG=l<)} zC;6GzqSrZzarF0p{ylRTq<%-W3A0^rT9`ds|1-oTuPSVfYSq)vSCYSfi1w>HtqePg zXys-LQvUc#?GX_afl%+USJ@;-Ai~)`87aLkuHqqYCuXK+(w5EO(Ju2(Q` z8!R=!gKsqU2wvHaN>JK?u2oTDC>&i9MenAA!^*f-wo)>g!DDKc$6J7TS59k!CXU6C7n{IHFs%0{*en&Zm1J zeVlQ8^J<2<{Pxtf=?4T{$oXU>fOL@2YzR&AgI~xVXWohrv=ZGu>$GAi1YwJPe4N2o;J)s99sCe(N* zvt8c~Xy7km>@TvOz{p0M=JXlPeE=1G(b-3~lxWj{?Vzi3RiH?Z#hHh zK3GpXZw-%~fN8F_Xl^26+Y4jsGSmxNYWx){j`W>g5PZC!QEg@%%Y}IxLLX*>h8IA> zIAG*xu{mFIMk|I;fiL$l1~jHMFZ(&Ul1fZ@3f$@6qGOhWHu6Y{{^c3lM^R`w}mA z%P24XmOSOpyQ zz~fxdC+Q&B_uyXBHVgS@&$OO(5mS^s?i}I(geUHg-h^Esk{uc#E-Z*Mjsv!b!F8eV zT@iRW`HzYPS{LTtFmk$U#6uT$`ig~r0_=?afQSj~Tm;iH8e&Dz8M~a~TVDDNiCsy0 z`oXsGVBMpdE(|(^dn!XrmP7RBd;k^hB-`X`RDRB@WH^V*3}%adC>0)0hsV&Mg$+#LZ6<{2P^1ezg&aVR zJ1iC0_A}3&RwW6D>zNp-$`KGTIHlj;T$IXAgj0dU4In zaL{1oY-$}eR5^WCI$gK`0z3v?XmGHrd&$2Q#k&P-<^v+~u(E2K_lq{=_Ap_{6#qV) zHGx&cy{ZxzJX!K;`Iq`_6%py`Q40n`YMUX`?yM$iAd5Sd^Um<`A7?r9*G+%&>|?SW zOCj!5SdfPcb6zEsYUI)hi%rn|a$E3HOUCbZL3iV!=bNS{mb#PThWg7pl1$7DHYLBH zPv)}<`;Agv*v4T=`Ef$xU3Jne>};w>zwK=H03I5~Nekz}BDk>TgJ}H-B79sD9z-00 zM9eM(jKCvoOyyvv**13VlhU(JhuqT623j2YJF=?gY#ZWO839>u>0)Ms71uDYH|i!+ z7!R&sRn;4%IY71N&a&%_M~kBxFLey(4gYtvYyqwN`@4vlKh!(HG}tA9MykN5T8fI5upRHu2jKXgI#52$80?`2!4xpiisWt=PH zLiOY8kH&YVN~k!6?(x&G=@;l}U}-Jj5$H|n1+X_4E)R2`KX$5uY1=;OyJWH2U~#*_ zhV*wtT4v5i&BTgj!bDr1@Nf@mbu@L7&#ZDzI94q^%s2>t?o2`$kj9_La@#dUUB2{M zI#fb-lsmp=rJe7B85Zgp>ltg9ocYxE=95|9UuL1nzo$%&vmvvu&nx#O9EaCc9l<=& zCwzWKKsvP`ym4c$%?TcHq--w68>_%n*hdF%%+Knk$I?>$qTLEwQaz57B_A>U*G1fk zY26n*C-j`B^qkMMSom1HqPaPK++6UxN@0FT3Ap3gvQ7Ao;yuIvnC+_`byQ! z9XX!NZ>>klJSDD9<=#r!oyj|SdD$LsWXFBJXKoZid!v%$(o{NqgV$RVeu677k#way zQDEMr^+so*S$7Ov|7$y7GeLL zWM&(+j%ubg8H=z;o-gd~CG(zitQ=i4UTD|;d_)2~&4hr*eh_8c(M8CWhXj9m~=trN0 z$?H0fKC|I@*=ZQ|UuM4Gi$gvJ2pwASKS0b)t$B*N`-)-TT4bcS=A~5dFMP-x**LEp zcuyL(EHTfJXlKYwtR0zk-!@5(Hd~ImTFPk<*&iwRSOD?BX#NHiTT3Uup4dqyyoz4u?(tjKgU!ybk8Y(JPd%L%LcT8tD z?V@5%X?FLERo#o!;3t;hAC84zTsMt(e4L}4dB93LrFJ8lqatDt<3Hz_jvGvu`e-C#CDw&yb<5kW<$-{mw%CBUhMqfeV3C2G%JruYLAN3w=MlBGLX? zx*K?QIh3}{m{DAg85`9Id?07v`}TvE9hV$;7UqqBGXYkr*IL3(u;VrbBOR@>*%I;A=lMzPi^R=}n8+-r8feWR;xex(0_51kq~i z0m^A^k09O48{I+BB3&&{m~=#T_q~4ihyhDjp8Luhy}6AR!58#oplci zd|TuAP63NJnM?-&Nc#|kHvf1+jsF4=fUZCQ;sEdoaYZMc-D7|dR`njn`4$-n9yVnX zj%F9CiK%yYgw#|GC2X3j3-ev-t6fK81MLdbYlq8?0&N^hZ9|+(Z4@#KO7*WQ z^1X-ar&LbVD4!j5eK1+;a7WfgyY-8Rg6k7iC+DDW$9tC5Ooz%@e%t9zfKM31=6H6ZOvG;ykA{1I$#!Vp$mLjYCoUF+C0RxFAd*G|_N zd?lom*4FGKHcHK{^ zwEgDq_N{mnKhH!e5=0}d^V1kU?!{!}haNuU<(2usx8F(BldBkNqxQ;DC9?HV|GUV}r35*a zfl<2ag&w`y8T*SJ>Gw{BN-E41AGk!W&0IX?1K)^y=TPk0=!|Phb@2%-74`6~nN4YI zR@}d3-#KS4?KwI*uzdEwNvQT*IEO-HAliJDaETxxNXjt;sUMBMe}pBC4lJrjFOMlu z&OEHxshmlQnVGEiu>K`kqcA&vo6EkYIxRYPAhuX`X-T)>K{zR~{-X1Umx#+m((E1Q zA^n=nN6taP3Z;I&BQ=F{q1qo^15}37|mqWG(~ zYf9?n+w74WHBzDorBnD*pGI~$Kf)~64h_gyB_6>?`V72jjCuF#D6f4h^7-(YiyaVQ zFWaVp_tQNu4PAa5=R0DpcRXBuPA^p7`#INb#Cv7*AzMVGg&6ad2xs{&rHU!nF+v7I z9Ui`!!3RE@()}X){KMI5?jYr#J%z;KDr96=ksPsbG|{a7^{(<@Z<9!<$)}@YCFGCQ znxY$S^z5m>Fe}6D1#Zj%IPT{A_6x*k-oEG;o!JRnmqgpue3fE6Z~Yf%Ntlp;9j&z+ zze{9lplmYE7W)=&_?>>7aJ4#AtW^O7#>SL)e*(O=&AxUH1&U1~LmhT`{zIdyABrjs@(P1b949AZ3U(f9 zs*nmkY*6khBboO$S8$iLdPQ@n1)mT#hT(pBF_u$CKCd$4koCg;lJZQ5?Qk~4>FrdB zyX8dn(cGuc&TeV#Ra$#Kxiog$>ArxXsYww=i?}K0;HWGjE2D7iQ^lxI!)fbMzNlkt zYE>x1)F8;I8D4*2QuM(zQB9sFd>-&JK-nnm)b+t(gnovb7mG=in&BS(__hZna4Fk+ z(G(roNf?9Go_yah6jc7fPAX2Z6M)}_e{f#-i)Hs^-z4?uVO30vI@~VaBGgGTr zod*uj<*k$#{14gn-ezlutM`#s2PRSu*Dh+hkr{UGKKI-0?z*5nJw~LesbCcW*wO!0 zgHAjuQ&I>q_4bFp^+=(C`89HwUL6qQhXc#a7a}hG<-n zdGGoUgbX^6)q#aLSEYWWYr5-0{{?k_FIxq?T4}Yv1W^Ann-2ilPCM8oBCaqrPF$ja zNfX?oEYBX4AuIW7HxDYE(1RO1!XQ}#z>?d1%^m-3nUGLU>!u=E^+yu81k?|duQRxc9RX^P+!sR@9@ z+c~K^QBClUDH>FTV2U!NEyFz)y39(rU_~-O{rtixWPS%EIY9>@xBw~I`2!Iy5#n-S z`P%qvpBsgpQDiY^UBY|Ek#Wbom#zA2FIO2e~2#ej-QT|6Mh-H_E#- z(^BLfGKcPa=W` zD3#*S>rXGe{l|yS-^B-petSA5Ot^6O2q0Pg3Pq%R9YUCS6y+H0Cwzwueb%!L!R;1< zex6`z^f#re>-KnV_52@2=N{K``~UIJ=d<&v)_I-RN#{%_m9TX(DpoolScyspOGyZy zt*urnE0t8T4hV6t6f37~9aKUgLfk8-ZsHza+;P+W+wcGFvB&4J>$*PI^}b&3=kvk- zPyf(t(k>l-{PT^TM}`bzBcg8kItS?}(lx`{-Ho{;dfk$JoqvLr8*=`kxD9)knvTYF0 zPD7iALEk62+lROBt4n>H&kvl=LeleQo@6Cq^7fr6Obp5!F~}!TK@R};7C1Qhg!EKN zx+lUoDuE>Z{3zz0KlNkmlzSedvxyUc20fcC!?7j#J694|GE{;JwZA_9{V<;M9p%pg ze?LJT*o$2{;;}N0tGD<+lbzLWzbz84Z5cd&OI?1c8%(}TJ#LHWx#XGtL@w4mI>JZ0AW}Gi3$XI z?TI)jk~TaMgsvmKn;_kzV`+3Cg}Gx}e&vJwgIf#) z1QlSwMr0h%&r>HvmSOY0qqZvZPq7X!t|Wv?QmHIVNx|9$-N2C66)8yt`zOuIQp5K^ z?&|%>n=cdx6+{^cCm%P|wAMPt)w0I&PNx|ricwbQ%5up^ytnPvI^W`*S)k zb`o^}Xh@tuEiR(M#HdIPa$6i;!HI~%cr-snqFkDWwwaiYhG0|Vb1 zWAyohi+g4b>R#vzHXlEjY+Dm>wn-Pxn)WPAOs`DHuiX3&aG1bME=e*cuaahBNG}$# zn#wHoTJ+ExBm>!7Zg1y*L-NGg>E46|Ky!&hIv_^ne?tb5Q9qc+cX<-FN{;t)NbQms z&%qensDmKgEb#%mqRve-pQGiSG`*ecwC4ox0l9Q8r0Fcq49{zOic+JT*km+YQnGbo zi^jJU&xuy&_fgLRqowN`^J*Kf8X&G7Z(L~)((OP?ab?1l?Hh!JV?X#$zn71XHr-F# z6P?<=jwu)&Eezq{pHAdWO=u{elAgn)nTfpTEYdr5TuKQf+g3c>QLLt#A{BdCa`cokn<@e00C=fSjPiLM@gYnv&eeR+r2aKsdQn$> zUTZ9QH1>quKBqT^ACt)&FIzyfEw^{0w$fWa1gS%V=NE=?_ISu;XJeK}qq9#~w@mW<$3o^W;u6G!+%o$j3w<^Nxs^t^s5-Rk zEz+2bI&hp2BMvJ&zhb&-g(As>n?vy~qOR6Sn(L-&Bqx#f8k&4@Z_>)af{asLm`a_k z8Bg%N6NDQ8(K5dK2Az->z1NK1cqMfSq4N#uOGhkMcDpn-I;v7$ervq-;{q=-E2F=B zwDe%YXnSZw;dF6fFh1{{wC3^p9cm=$>IvdImc|Qu?{k?(#D~*Qf1Vz@0*%_|Z%!3N z6&ysdz%Wt%DM|iKK%I}nc(PG_k5@#pk?~4YkLZsa4BA+Q0=!6&r|#o{m{?cSav$&b zDmTwA^adIworWq(Dk!&g!DB2g3_vv}&Qbb9G?Th0vl#WU+-w!0S4FtR?zUV-yaD&$ zpb_?*#dZ3$dG|vN63ET?npehUDSpf5s7wYn<^g)gpo3ugYVD|PUeKL`{&)CO>4L{~ zox0?U8dNYtNqkv<`ppv1JS8FhsXDy?U;L9!x<^H5sA^M!vZ9z3{|@E8dW?Fnk4~WB z7gQR364YkZnuj?iiQA+9i6eYFn;ob^^|49)bngXS#}JxLl7d#gPpXkl&EFkGx9WSN z2*g|O)BD%`1AJ+j+IydA-|7lI^b=n*y9Fbm>`>eOo8iSA+x${GMK4mtH)(3?WhxBgZbj zP>~{^k*46@r}3owO0w4T`$ZB^IM)8MbTxC&5R%3_XJEIW4C9 zP6WS7H`$Lm$!KlO=)bPSUlNVkIT7r9362ZhZjMMt8OOzG;MtyQ&wG!!iy@!uiJCxr z!I|xUdfwDg0=H)XtqFjv>fC{^n8Z~)xzZFOtzrD=WHugrJr8lHxJS?Prezgd@WThh zprJY-FGKrsws3@?y$UMW(P>Ww4OC#^#O-hE2-g5{o32;nXS1?F+%+XGfsAftAve;* z2wztpYw4?5Q^O*IW#1z4|2ik@xt}ORtW-fKJ6-fL>}qW>lu1-9y}Iowu9v-}u$m#9 zciOvm6u&#QzcUy~WxG5>4NRR*bbTJ>D2p-?wYjhwSBVd-kRg`70w(0~CkBI2`h$ZZ zgYkW}S8Eei4ld}Fl{;clr zaaRG391J{x4*%zR!fh~!iftmJ-+S&2Q#l5}x+O@jwi;xp40RK@lk*E1N+Up>+Hi0% zFJAX|vE$pSYH$Bh@=hwEJy1K-1vls;u5~d#DaX&EwPr)|=&e0;g-r+np5AfZ;cy#E zaYIG8IHB-Pyu2d*^5q{1o=Q-974ner*2%zKj<`A%T6+l|Pq>)%#C~vN@Zig!nwP~{ zi1dhsuVUdHSctPK;&JND z*V3&)sU^Hn!cGHJiL-tAKt_Nd9C0JzcvyyO$Mq zNks{h$J(jnzSzp!qjg+va~OttunW&c=_c{SIP_(9|1BAzS4>!Vi;5$oLdmGV@2F?K zqF06L?1%i=h|rCf-9B`uS78oP5vF2j#RP&+#T|u_=46Z%xETq9(T{UqrEh=d+{7T3 z3_8UG->u=)AAA}*^~AT3Bm>BU2SRvWfIeOqyilSHPeF3sFQP z-3cO1u`jxbz$$ehP}!@@=Tnu3-oM4~yo&leKw3WLb!Z*3Nj3dx|3m;8-%md)4-Af& zj43(4t^Lc0sBkV~LT}IRsQS)PvI25aj#9=fE}l#;aecre<1c4*-;!zEq}?tNmo4i+ zg{n|JtbWT?1S<8I;$uU0V6ILeYWD>39*j{_msYS42kE5y6Bu(k=05B3QPInXAGe2+ z2K_dazdADR`|e)B+qeGLYYUbE7~0^b>pe&+NE2a3IfP0W6Qcg^#^R*6fGLcGIXHFV zJpu+zC&0fqF7>|QXehl&2Ss2oophIlfLK6$EMP`&Eg%TlWR&J1=KqqAQQMKBD&%Dq zp;MgCp4cKeM9P&-gmvr5_2x2P>G@ukdav-oXOzYbQr3t$ev-+*GcDMIefk+)1TM<* z;Kx0(v%U0g>wgKK0L@QSjehlL{~y=?6Uw5zpCvoLY54pK;`z-i#A+4_R#u#5AxtMe z9fZ}OUz|CN+V+XKhXwfV->ykRGJLn(pk&Y1L9R*)wjFwP0m6I*ASK{LuF?j8l;NM7 z3hx@QF!$IR??j|YB~iq|FsTT8$tfy?T2e{l5-d} zISoo07{=T4RoB%$zN5y$?>bh;Lx|!E##BcY?=$1x#dV%8dBUz_RE4QKe+=t~5)w5u zvA!H}E5^h(^7EsdS+T%s{dOm|=<~d;84faPN~DV!&(Lbdt1!>SxHotA-R-$=M@X6! zQ=D1yhkjdb+ zWAyAju24!9qtSnCI_Y&#P1_b-+LbFZ2|MZ2TWzg4ZO5>*Db;?vxrW6xJ4hm*p5!KI zKCr@ysA~!r$a8ZjbzN-y)XGzDuK)CoeR=&b0&`Q38`qt7ZW|}xZwt{_4JB=L?Ch{4 zKv#jbI(|aSn^MgHBR^{+?7rcuaO<3HnyKVm^byS+^+}pL&vy+84XeciSd33c63**9 zr>K(g(~d@FW^+@@Ahm&`$mplt8^gvkEyGnrHfPu*wQf2}9_POmUG% zb$f7E_D$b~6XEC++sY`vT!A9pF|X}|xn|qWns{I9TSB|8Z#suWVD6Ox5Ngdl@7TsQ zU`1W5OsFV+Qr)INTVJ8_2yuliZ**K_cC9>3`fMn{ivRCLFYEf4%Dy-BTEDV+as0N( z`HrG*KA(8gEsssI)%*$liN;r?e;y$8*Lcko6is`)R_(DYL$(CAY$-^(-ICZW^*iQt zUIq~y?cTRO<)?AyiYP(dTr{PQv%@GJik;J1ezp`bX!N>ZnQ?rbBn(k39v9s6`Ecn2 zh&AWUf4XLzz~Nr^o}>SLW6RwGZBPCY3692aKi#pU({`K))khx$U!06Z_V8{xoKarU z<2MT|+x0RFEQ*apLcUS_Yni29<4t$#QtKIE(Z~)-eG=L3Unv_GA9HE=^x*kf^w>4d zUjV|vI{oE|qJgFs*d}OUxGvS~$N(357RTm^9+r)fQYD|e8?$a^8m<@|vnVmzjQu>I zwX=J+cw7EsH}Bpyqvzsb`=u|UqB|jMh$Mo42$d)r@x9JP<g7YY#0Yrdj zbaa-2L9**E(_rX$4vZ|Imj2xH%GjlC>q_0+(cSd}COdzb-@n=8<~>zy)G^P%X-n$9 zhN%-+YR^KGTI{uv-{KiHgIXt5{eSBvAwfjKIY)YsBgBC5w7<(G1~#nkZlim9(hTdg zLv-zCTo=Acxu12x)MR6{Q-QzfxPRY>{PyaY`n?mD z1{Wl#VyLDJF{F!4pdyXxwQz@p#coS%{#p^wYVu)s-~Z%4@*JtpG;*03*AlNKFmSSX|A1{j1j*?SIY(0c}3FA|4A| zj{mCm?yn+ce$6y~vp5`m>wWzWlUWcC!;_Czf?I>sZP|-{sI^+f8w--duZaU~WvnGe zT*yV8{cosPwZ{eG&>_^ec=L}>Vf#YzDOIwE+9YGGJ zZm8m0S%Z4Q*zMjf>BoA%wHQ5>1)|G2WqmaYBt;l7$ZUc122UGR$>`?0>&n1P8TK>> zr{lzeiGxW2tT7lUzoOs!^*ErWYK*s@r$CQ)9mS0Ayd$x=l|fcrNP02s{b?Qm1K7?Q zzuJEqu>k}uCR~|CQWOCC+5?`!!N3k%MgF;f*vkM?1?$Ijpq3)!p-tm`IYh^Qn{NJa zj@nRFtJ(9vlO`idjuVsdy1FA^^u@!~KgM~+ov>i##O*aFbOv2y%!w#TvVISCLhmiz zyg0jsiun=98KCpk<{+$xQik8J#P~b{$m?LPmgS_Yxt{Buf4CoSS&)9 z?X9P$(NPB%s~tE-6}#!lfPOt_-Os3Kb1))5#J<+TQ{koa%bkHVI^LwEVyEoy;2D-p zq-Jh?7N;ncu(3Q^bNaGRV5g?OMBSA0KUf#bfX-we0Ug*yF1{brb6*5xd3KdD1af}l zzcMslVoOQ2FnBzT&|ZV`H!MEZeGXD=KQKNJy%d(T>iG|gD#og2pX&^b;JO)YlqKdL zmNy}jeQ`fJLKH_$xq><^a0$>7I7YP44fk%eE7Q>VF1ThP0X7r&U?pL<%u7-~opZqE(#w@D3%>d-E{bfmT#=YJg-QqicuX zoXQr@LBA8}kId7OK)YG3D{#1Hg#FsCdz+q+uI!24{MS^osY+A6OjcN1iKs^|-VHNQ`6}R@kzqfCDQS4=Lt$P5S$0{G5 z=xw%g!WG~(;i6PpNt%$~*-_oscJ0#awHtE~{@FDR2t-V4Iems5v4V6vWR5@2n(jNt z=7o7z=5DUNIlJP0L8a+=U%8=)DaF##wRQiO>xBG^hqgK9K#h;ZyMEu6j4#S%*AOQII&o?yp(UH)>@uJ zd#7oC+6)p4idOre^n|b(+b~rG66T9;(*<-aGL?OTA%TsU-kB0qWPR~2aq&t@FHU@G zW7(=U3qS*a{JaEt7Lr`yF@%x0`A7BU>SMLs6Kb(7NG?~qQ#_@D|8hz<01&j;js^gY z4>ZdHp4|jQliXO&KBMjk)=*acFCEyx7dsa4Q@!|qik)Tw-nI1=SG_CyV{30}A*jdn z3I^o)yKVK9zUFFV<@xSAh6;P*8;8yxMS^GnN5Ef1Z6%{8r^QnBRkN|(7NAHZlWH!J z@ym7b6c%?b*I}njU@gK?gos8a>a9Ysd!*P)3M&~*LnhZoh+b38#frdwiNHz-8+i%( z>4NWvEH=lR$#K9|}~weBX~ zrlYLuudTM{>c{YcnJ0o90;$4PXP#Bz0j?&Mr#TIg=f^ZD5JFm>(xmtzB4<*HKBWya>|6E$W#8jX`H-*?D!-v`M@?@+ZZEt9!9{G)}} z&i-Job7<3avQs>GT2~AeZHYO_g^Jbx?TBvixxX%KDY#91zs^_%S51Vpk;__O@aYL; zR(Lg#r>h_q=p}~m1RG*Z(FP9ETM24R9BVsw?+UE}QtO3PMe+6ML`q8{{d7dX z%T8eto_XJW6bz)63>KV-v%~~;+$U3eR~y233WUc3-A&bKH3@)n0__)UPn%dj4_dJx zrP%4UDEJjy11G4w0YZcujK7t4jMebaf3H8>?OJH*_tVZVOU`q!GcA6=w^bqqB2?-R zLHZ59)O~ef1&}eoHIQ&ML|_}4*(gG$;4 zY~Bu>YrAE?0;Bfn4 zxTR-7(Lsl>pUeGFic`JmVy zmx!5Iv*><0)KTCCUnY1FFa=a%Jb<7Sv$1IYTycH@@rN9pS-{s|d9sy>B|8 z3~5zJvNeFE!88EP9*KowU~z>HTD4_!t+k&;y#0S2Ws`1-Ry&|@&IQ~PSN4b@p}R$SF#cF^B~!d7b;xWCs=pCD?Zw;#gdFO zdjT4VxNYPoEo8Nn8n&v3j5%Dd%`+M-E|JNlsLkGK_^~PY5AjCL0(e{Q5p{{=tT+HX zc?;7|`&irmH5LHt0U{lGP@|JCGXI%VysiVHD3{=r6h}-0fJhU>V2f!5kU#lcrZPEm z1f@vH^2-Wa*PV#4k4Hc)kYm024*@Oh zc(z_0s8aT|i-L{nG_*>1V~TGuqP+KdJnrrIy>TUldQ)!^P*>MA>|%178jM>15e#56S?5n@+!E!-Ay6M_xf`XttwZ_|;{m zqcxSkYa2rCn6n|jwx0)$CBxq$Ck*3iM~n0lPu9wxf&)3W*4q&5u%orvN53;~=PlUk z^zzgt#f_JE9@tq7uGCh^dp0LPFQvz&q3lMY4u73Am=9@FJvaaDr~IEnT)@?vh-V)H zb?mr*R|A?-fHVORSv%~_crsZG_$JA0xH?QQhxP(~hlAd#bIa2)CqixhZ7JSSkMx}f z^*C^s2|5NA&_wT6&%(qj+&f+|nl#wi3z@|%XQilj9)jC9ixcL#I5up7jYJ3q(IS*a z8~pL)tEElf7UCm5ps^r|#`9k+NgUxJm&TkI;p^!&f5vRJ2cYN0*Kt#kr^)Gx6SbyT z$YTDE0j+gK?q4HGf8ALja&QYntQhwIq$h$V8_)3KEDXp1*IAj)_>9~bv&@pSy$asB z`$w4h>FsCky}65IY7~IqU8Z&CVIu9`Eh&r_!4}fLw>JZ4;j;9o;V=}ZN4D*8A}@U6 z{C{rZT`=wZ7xZxv6Ux2#ziG~*gckTry_T>CLK*eGKGEfvXVfpb8k9z;Hk;phyct_e4^fqRRlz&Vw zdr(8J=C<^6RU@^DTH|$Bc(1;nSyVjlyXpOlsaV^qf zM_k^qa=NXbWiw12>S#5;_yN63N=Oy)X_Vra#SeIPAlBgodU2H|0M_aiCyo^FqJ7>1 zm#{~^v4P^K0x0YvP4#4WqXRugGc zF@H|Qkc>Mu@;5^TA1du``x?mftUu^=zxgsU`bF-$j!U21xK#Oxw*2IE9Iu7UYf(m3 z0V5c&%7qv@_#@10NLoe15AJ*a^}FAf4ItZPT0H}Abu<&a)ShFmwjGEAh`Z~OFSoy4 zA8iiU0>9_NLrzY}vGqL6F8F+QQ)*&o@~t!bV}$D2w@2`a_F5n~S0ad3eR%m|b*Pei_T=#M zn^_Jq6}h{w8W&#Od+dvftT}fk<-bTvMd2sK!j}_Kk8fh$>#o@zV$JAC)a&Ls^gur=tbuH^mLLBOmk8O@Hl9Aw_p z5g>cG{I?GzJa=T}4+m?{4SyiMWyjDm?e;sG6}!X~QP_F$HMbxp*QXwpr23F3MQ5l` zXEo8WI#!}eB1fJr`AK*uaX|kY1N`^LKN$-v0YePPhJUL5`x!0eSdvq+ON^-; z;~%#o-_LX1l}_1C|9tAuo6T3ZAz}@dZyDb=u%l9spF5qh@0F^hwm7zv-IbgZTIZ3` znc3+&gx5B9Pn$02QWRI{nM4gT`;rAmD9aN^#9hfLV^u4&o|Rj?IvaArF{0zEGO1`% z)4bfsbdFm@&|gN2>0q&$MK~P_S-eE!(h-uR%Vk*-u@eYEXx$DiTy*d5ss263o@W^+ z_80W+c@>rQ{7RyXhcZ6n*kI$g+E=%LO@|ueukqz&y&oEsF%tM=L!=xA*$U_yn_s-l4wwvPQ4r>-l>&?chzOot&G)}J!N9EJ0+3b!^D|P5S_tqkrX6AkP4VSKf;uUu zYe8g)L{?6kmckWF+umUHV>GZ-I_U6~OYnF~Iq@X1zy?QQzrY4f)1EZ7Re0VHalMUM z5s_26@>pcl5sx{R{vFz77%)of=<@_odV}b4@RA{YX6Gh~vL4jqhb7R`cmUC9kN)JW zPEy)3f?D751d&UpogK`LuxmM)|2*rR|6YdB_NB&~z6k}LV6PkIrxE)(=-@6kcW0pP z-MwLf8SL3%!{B*XOmZg0P&e3{E`Gq=JY%iB$!*lSh^4n*;ujvEtA)~Ks}It>TLZ%Q zLUWEvMt-638Af`BQ>~6B9egtv?O7(+R_XcE4!~si&JjJH#tPOnjg9r&sR{k2WW+I9*jEm zEG7m!_mnGey3;;G@>#Xd4Fdhe&$F+6Xc)?l(&X1hAs|4_i{Hnb3-MT7w(rT-ke{wG zv>OXwqE}954&3!Tyl-2@@vcXH$CuyytxwFN7G2RB-Zi{lPL>bukm(pZYz1?=dXkqK z#=7`jMYYWYBoEIH7QahnGc>uoG;3%NRnUI`py z@BwX^w?hBjD?xOVr^bz}9tE+rRc&l3&=sRz@nrat!s~Ck_ zKT?+XNt=JIDNvW$!}Am?NL8F+Lv0ph+!7o#K8|IldrH!7}6CZHY^r7 zpWz1d(MGq1%mgkZ@2z+ci7@Ul-t62{MM}&Akrzd%fwDHEzfe>pzJpGSDmpWFE~p~I zH;pz!SxsA@>$pp|e0SK4476Y8eQz@MUU%DM#g?*F^$#M>4(B!-w9_LnISmhux{FuN zoV9d0IPjkjBm?4F@X=zPE)Njv@pnh(9BbT^_C=?CDrJc~w4szRzYIZP3hg|pe!4!~ zJKEg}ov1f9%hdrwJK+fF9_@^qr51>yixXX1lVW!cj%UQ_IHjh`5IyVRVU8HRdvRu~ zm_m7Ut}?GM_JJquSR;F(bwj-pHYBNE1O-m4Zs9Sk>-VyXmeB&?vkAaK9DBF9G+8FP z>-4^(l$th&8|WZnuaJ>{M?MOB=2c6{0kE!C~M(688CccuA_qx){Hx8FagFZ|v>hIF0< zYemH^ryRvubjJoqHfTkE{A29DYhOavh6|uM ze&$UdR*107y`j@N+I>B_8{f_&o~NVD?I22;mXzGh)9)k;>~6A*o~ApF$Ibu=#uBzveJNu=lL4+YOr%}a9|o1C)$_>} zapma-`~n^gYn41f-gRor5(6yLd0sX>0>$gP$!J&v+CBZKe%m)tb3=T0jBgS~?_x)8 ztJTPK^WP&cwA7At2a$d|aEMjc{wSvGd_@Z5FzCB-H=i6{v*O_i z<5TY7+|KkaT``E_08*nOE}?TqZaPgzS5wR6<7m#cj-le5o3k109U+&c5NhLxp;G(D z+fvFNLe_UO-B(+z^gpaf{S<5Hr4_%@lMU&#!#C~^L&*EztwdF{lg-~U+mgv~SsB-E z8qe2NRn#mot>0`u^)pj@ZFlul?Y4ftTbi}Crtd^|@%AE3=3d(esQ1dlkh~3d8WG!S zV{m4S5cf-Ay1>4@Hj|iWO=-cgIs`7OEdKP0p=+Fn8Hb&3ga2)yLps7<>J>Act26C9@IeQJbc@4PFE7+W^ z202xutgZJMTTwA^7DdxwPnh2jt`Ueg`q+}8EM9Y3>npcP2>M&xlWEn+fBnXN)nnW# ze32{E{2@BJ*rd)!Z>atVkm_$+o~GjmHI1F9#!|WTg{GPq*8k>qh%*SJKdAdG!#$(# zygRO+$wfbF3`pu+CQ?W7gazD%mDE`RBAg*bna$uTwiqp@uPO;9!u1 zfi#N_ZHa&B%+PaUgoc8eJxHxgh5;i~1Op28fRdU0P!Qd2Oc!1mvZf@!; za#aji*p+B886*mK857jr&+&NQGcd3+hAZ4-3+ zKajCUsVlL-#T!h*Y7V1Hf1Z>W(~p9o9@+RJa7l^?j|hrU94zSS_Jqxrj{B1}SYFXO{gM zSesUq>k6jh7^YlVf&=F0RC&gm5Gq$Q4~YHwD2hx}LKP9}(>V)o5PWh2($ZjEy>0(R zu4C(S-3Yn9wS!3~*D0G(c*U>q5Cc_%-B7O1d(|RNBJG{0{^*3)CyY2%8MyqVy}VCwIT=-=!B zaa6^=aB;Y+&EXKa?)B5U5>Rgt)=Fa(LK@7Sb(s1GMGJi5I6Pfg&Fsk>j}p>+UN+w4 zEuV@#ISC1wT&?jxh^v;hyE4Y_n$wTyB^f+lwEks8`Z8nvX6dqg^2h_cJT|HG%eFu$X1y$UN?(F^I&3% z5JP6z##o*F815`452z8IgEsDmX3kQ!%Z#<^DRZE~(}=onvfgG#K!xq;uw| z^<+&D(Ls-D@V?ddFy*{%2$=BclfH?c^SB@RuzaDFKix^cOM7!|eT$xip+}W#56aD0 z;F<4(;V@tezx?vpkdlY}+!w+OU^M;aO(qAUY64?ljFcxFb7xcQM(MlP}N}s6)UcR3oBN@ix2s7VqD<#*^4xZ8f^r2L4!}ND! z4!AK9wnzQl=NV7BGqCP-k}q`YUKkq9t!P1#f5DgoN00W^X#?8-od!>C!57S=Gf%7| zNC1Ku#@1T~_sN%tPj1_UDf>;MN!LKVCUCz4_-9V1+grR=3ZroNtX%*%eEcpGDL0gH zeI9_u6I^@NGSit_wfXJD-K9kDFLZa|7ASV7Op=e4l(1tvI*8#K<@eUU+G+yi`Gr^k|`J|!B~ zEP)>W(ZQKq>T89Q3C}K%p^YTm`Z#Ii#d8IASd$g%_nG|!!(6Ozb}zF3N^e;G60Ivg zl@xgrwSv;IQzx&Pa^G)x%qv_L^RkbOnqib~l?R{rPCAWxQy*9v@s)T24YFmco4Kea zu6xsfm9;G9#Ya435Q zGV*KgvNm`v16B`^@c(_#o7qVovCQ*6o42;*=%l^&udx60V)J(<@2khq2)V}u*+Da= z>xl1NPkqWsU#N3t7}^=Q`&h~L$@CZGi3A>XTps;E?eCHsU4Nw=!1G>O@Z}fL`C1)n4hM*p; z8W%m^IKe~@0HV|5D0B($o@tECJiy;*109J4%yLn{2;Eo-noV$5`avEw3SuE0odV-i zWV-JDu3s58%pL6izp3_f0~ES>O1){oxWgNCgrUJH3>mU#4;Tj|Z+;Fm?pNuA_f8M} z6&d{;z030XZ&$lBFji6%R=vl*t~IFhTYu>{l;?KRRgc4uHaOouy{uFA>5xp%MBx;o zMsGvfJ^WC02i71X=INi>9l1&Sr4~q@^9b0D-MXZ1Gf#PyiJiLaDD_*u$WuSCEeE0$ zK*TsJ$B6k8okD~)JvP)Rf!lQT9r^L4&+y5=l{X{RMA)jmf}?f_kFOvx0`jO=KO5u9 zzawv6?{lZ@NYXB?-zM)I;aV`nC@o1T5+m z?lE$+zb;kup8H1yWRxbhiS9=pVN~Hr%XhUBC{)@}`hSPMv6lV{B<}p{vE;75+ve_G z)MHn@^2bnVh1NJ1bqq3HkYOGTZu%-SvQsmF8JBFiD}DStkdO=AkLram8BZe(%8281 zAxzhr@aw+phqXUI{OPwGxOeXRk{?!XP}(^%%7{xid8f~kQbC4IUR`*3c6nWpj)s=2 zrWSDOIOO4^ATs^dHXy76WB}(HVnk+Y$8()ln04^f%TJQQTi>5tl6pCfkQ0fJ1JmF9 z5!YGVZLWk?jvVv8F4u12xy#_Sf%F)dzKmU{{|LtH4JSEqK7CqxGFv|F@pF`*%%6kX zi(0XKmvy3V2d$sPtvh@#V?kGSTRVn&*jnzEuRVj^iYd=@*Eq6q#^Kr>1~q_ZA@Va% zIA{X!3v|*+EmG8Qc;<{p*53k8E|@MakcWn_wj@tV+7ubFQ$XXn2(xA zb6$r$v!~yeq(2OrFFF^W{nP76P-v`uL{YX@bwEX9U;3^}=Ki;SARkM@a;Pa|&uWc= zc7Bz#*pD1FTU{{oCg-(_4s|sQR6zmj*#iuC5#V_-Uzz&l&qzPFq6oT{*3zU`J1T1L zWq3umguii1pUkKeZs~3)tF{lWu@18g#Ow9BXG+CgjX}gerQ7nv7TeI8-W?kP%f6mH z(H$YJbdn3+KdLrzO*W}|wG5|24=oeU4UN@0et9!_Jay>&!?TuDGfJN5auNdj{jf5QfB_Xcotg&R}sMgM9Ck6~RcXaSgd;=`mHfrni8^hF^sttze zP8KO=PGGy_y7AbY0Ig*z=38B#*EE4>eN!Lmc7$~6SjIL8t%K&KJz!(IN6gV)d+}I+kKChZ{$jZ&jy4p`LB1D6AK5P+KZ;D8Ebp4r8>r2bBlor_QU&cPT+!O(=b7kr-=WXU*ZwpFH ztaDpj^@ErUzK4Xpt>XU6gaLPl1T$w3hq`*+Tc4zDemEOZM`)R8eY*wLJRj>868hGU zhg(+J(c)#A5W~RvSjIwVJiu#DkY?; zp%4N%qv^U4w571)R_R8(r%Gnn%H~<5#Bk9#Yo$y;MW)#S$xD&%+Zw_vWBf5FOO;%J zFk;hhA=PI-j460vy4c}XWgWBuzMYEtr4ci~Jt~E>n z{$BW>ayS8du4Q!N4?g9O4;O17i5%~5Ng5cWg>+Sx))7k##zK;iINyw0ZudJ>QE7l_f~t5!0)*9NEk!{TOTeqXP1jTcBTU4=gQxG-V1;yzn$5Tf zyEelpF8}(KsiLRet%kK;L-y6PYDdCj82gmZ+yK?cf&0}`2Rl%o`$t5=7o>{DQK8L3@X)CaZRJ{GkIhi2$(aooWwZDU5te!zBd#QC+aF{;x}W`OCI% z3KoEnp8o$uhD+O&v0_xUi34ue|65g)_wC%rBn|63U|8pr{KRO=Gvb!Q1N<6uO zlp5i0u>a!+_hz+lJz|?thu45%HAmpGU}4ZM^|Q#%IpQ-Y+jP_0kIK>#uCZ&wrP7Bi z3++oH2V1b*m_Qu|7Dy=U0<~FOJ+uYNnlh(*&`y3uqrDSt(h(IvG+|(jCeTh@AdM2> zzVuqm-w9Y^C)E;?AGyd}_ibojdxv@5Jaaww0mDa30WNrw$egF5DJr@q@EKlf?Z4+N zXV`R83wC9MpH|a*#sbg<(Y@M8&mY(!twjR=kO57mFhz6ji{!)lPE29nks+_Y)^E!G zcFd%RsG-<-Lc5)M$R*w|Zj+m>_te6URmbj@?hO)HA<2Xy&amg=dt#zEdALvN=n`Iq z4{x8-zb{F&XXXeK_;ZH+!cNbZD{#y9$X3!~iX6gQ@R255oVdc@o*q`VjKbBGii*%q zWN^hQS+9thMs3$B4OGEbulJ51%D%!?z&~_m0GVP#c4m%njj|^Ik1tDX&~ncs&_`VC zwnGV2ER$IbCIbk4>RX!#%YYGvpS?p^=s+q<5h+VB6y7<8}3jGze zpjkUu(#@1PX^beWo8=M6b=8eV5iLbj!26;V2ylgwYUL*`c*~>2+G*bi4DUz{mY&Z1$=3Cef`rsGg?0l6X*Djxh9`bDYnm(%d|j*CiVbwR`MDxJ z#^3ma>h38vP(+b(HANE&%tj7hr%5RZ8If%dvnU}@k6556gYK3|!t}uwOh){PvE0Aez2(@D{D$MPa?-;s15IDw87+M-|IAcA)vB4u247le zhbS4uHIi(+%X?=}n%^;{l9)>O8!jOS*hRGL%2TsNH>>E6QpAuM@)yFK>L*igWGMAA zq+vaS+N@O)H}!*xny1@GbP0E}5PBYVvRRD?>sjqb7DX?O{R{V)TSmS{Q6^;COarr1S`XL6sD19nK@k=H@pD>dr3ec%hW5+_)W{|6LX4*R zx8h`4E=l~ybd9({EfWU%C*7zMB@0k?WRnbB)`;{W&f zS=VuE)CTxa(7?bMxOT@GyzcVP-`~}l{9U(SO|Pfo_R~F;V(rIb^p$d~P>j3cgm0qZ zudwgDR%%zbXx5C9zq2(L*+#o(4K-#BO}Oay>8Rx8-`P||rv^HX{!cg+agU6K?2+Dbg z8`y*Gu$+gUdRUI{FTejFlbPL}dGp@qF`Gpu8(umlVy*`;m(#!U(P>7>`~EO<(5zMT zsX=FMbP!GACUII`p@G@J=e);mjlE?S9OJrYG2Wx=?XuWUP)-xbW)@%TlyZt{MuP`C z+-AOibwW@-a;(x#eZ-uwK&P_ax@SW_A9t7YFJA3c7|e z;LOyhY3BhtE@TIQ^Z>bOkcew^0oTH2uW<5d4W-#I<6j}IbC~*3NKF+Ia)ib)Gg74g z#u*UHwMjuZBOhh9>)1g;dsp-~8x)?_aGN0-S-zwKPentaNC?=vTJ^joWv+%0At1obD`LRucVVD2KH?4i>87Q|ig@B*c?)zVZwxFz%aVSW zIZI0@!65_s7?+u>W&pgLE2j2IQ{x(-0 z@1@q+!=w!SzEna?>Bzfx<-xl@7sS=r>~D)HX^Tk>W7=o2nlW02(b~$dD3qIT)|!`U z%_DV`6dkX=Z^amc_63{!#K0KSFcs*RAqk=MZ(@eVd-HDkN`Sam$b7~&lNz39tb_(S zkw++RFJMN^;!I$$WhlfPs1mOJZ?99jqm=!2`QnnReEMa6Cd&FD+~jSTxP~J}G1&%y zy$#$Z9m5iup0HGaOq_3z)BCzJ0g*}bM~-9_yQ+h?m(N)sb>Rw04>YXzLgt%S+N5C9OAZh^nT(mrqddPW_4PtIuUWw9r6~y2XRiaf#IBG{HTQ7 zvVXCGCQSZ_2B0@0`@jX=#q{Pg6`GI=5?n%tsW_M_yrD@VIBI}kjATEZ@I4f}XA_Fl zyQ~=iEH*AQPNn(#s@raS&fct)js+uqy{b_H*y|?yZd$eMVgqN=-EZu-nIB7V!H>A`k&OK z8K4ILW1;_Sk?53Cnq}H>+E^0KeIlhk(ommiHsaX?yC^~?zEB_~RXD{0odw^Ho&S92-1*(rQ`q%3c?ueM!yqk zT7bg=7_4$w?gO%ljNn&SB2nboikgx|gg1cMbd%ib8&Gp46cHZd2k&2F6g@5N!7sxm zeYebG#^2iUkF-xRW27$j8ut>EW20qFYM2HMVF& ztrc=QE;kht7@)G}PzX+}(7^2&BiI-K#ffbID-~D~j1o^oo7#<=Bpax2gr?3S>o-!) z6D{xbzgyO1aZ(M_=6aCj%(o(gdHH>zcgJnj0PB;R6_DjpnmfPV-&kA^PWGpCz^q4D ze!YftO+z=}mNr?!@e-G{xXBHa5Qh?^80>C*r~o3A9QxOF^L7OB4g<^W`F5W4d@saGM( zlqP|OU@tOB!xly7D{f+xI0pL-M!AO)V}PRuUnmp@iwx`m`tPe|vALQ-WetOs9t!mP4j8uc;H9|@k=KUK$&C{9}>TH`z&BJw+0J(Xd z(M*9dAN|cx8yLrMVxg1}gcGt4^vkr&SHkXW)U@szF&igr@R^q{rQBOZ5svd@BD1&w zt0@ng)$;BpzO5r@p&vIHnltMGXyFXn_Gy_RoY%Bkc(X`mC752 zOr`U}Bd28VBfwhy$t5)H>qbnRL**cMnjPv+@BIb>R+6ATNs;rmWk zd)57QTs%xzCPi?s*p+p8&)odmG^Fi4q(w!8Rz-usH_7jhP`ia@%?4VA);wd})L%}! zh#u}iE1ETwZ>Uv+j@5?JzG;}4bWW=A(DscGU@Xu5{Mq{+ycEbwlE&H^$ltK%bS1JL%zH`^zm z?F4ROKThQ0&}193L1<#HwS5N;h#uLsWfASQ9HKK~j}pmp?rP98h)}RuO0v2Twgv6} zaBud2;XT`7%~rH*=uhGT--wfYa?;S^w>hVd3jMo-PegwA+MhS5H(7g^(X^rg^RvJs9}~8ve=vKeC)l&85#hKn&8roNl<@`do{1-k)JaN4}Yz zoGSq>LZ`>srwt|>_!evVrf)F(^+V!W12qICo)xz2AtJ8FVc#r*P)e5AF8}%aIm#nA zM7o~vSUX=svUxzv!54C)hzGV1I;DHOuJY)7!j)j_T_s+QJ-nQqRS_vnXdn$|pIpSeajBNnvy z0w6(HZ0-U4?Q8AZj%71~i0Zj7_AI_Zl~P_{k6jm;O<^5>mvFvaXcFS^>Wg2drMCFS zCvg3X&8q;U-tnGaL$~kw=xK!FdiC}a{Nl>rwWJM%1Bd&a#~EKVzI>1<&KE!N0bOlc z9dVMQ#N--wBCzb}FCv=(omD5maubn)dbqN}*LI*q3EyTwWYIZpD>Fuu*_SVO!tZ@8 z>T^7K=RGUUw%9)uP?vw0b7^=`vs`0CT8&kBv|L%`dxGkkdD$fD;lV5T=hK^T&jp8? z{xqia;k-*FA%J3*fyN z7u8Z*4OHK8>IroDzJD8PaB`IaK8uq>gtW83_g%QLM~zk%GJR8o(qv1i*}{qC8Yp

7&%ZJgIjgrCSQb&jq)FWJ%}I-OJM;pj)sz*Yyy?7ory>0x0bxV zvfee>Gc)B`as%HW_KDv3%`)cmwR?k^V^0E~%U#n|2ieY@0lqUw>#Twc?~IKsC$a28 zH+>R3fQMPAu+Eo|NUXB#5S9tfym450&pL3mp+F9kc5-5_BqrD&S#keNjM{ho&eqF& z9E@EhJLdj$*~EE%?L@r&Q6m^NTyyQs%j%zw4_0j&sZM+m$6u@n?vcm?D{h+2${ec? zD<|!@^D5FCa1o)B-$-HiNYF5n!=7oI9UtUM#|vU2o+K+iy2?@AA7nk$H;leHi40ZF9e=g1jHg&#%S!vU$W*=Ih5#(K2Kx(9d_kJeT(yiVghM&wlY&$A^CaO%g0e zR3_Rss=}m|Qyj5jk>qH(iGR+?K~hPnI|hvlHSP&xoX!va>S`ls-VptlMXs= zZGTqd5$CFwh$vnH5lp_Ws`RoztN)G zjG&taZJ+J+R#b-&w+M$kdh}jl3q@n>WOn!mw3xNlgV35JN$|Uyl9KYWpBeeyu%DK_ zD@z+cX)6<-?5DCHY946Y&*31*<>ESmh2@7z2fuciE7-U>NX4kQbF9&1 zOUVbhxnfK|$RD=N4!2N=v!=|`ZT1eDm0#l%`0L8lgkRHhO#01I#{@^{Zc-!7)#(wc zq{q~S{_N9}xswe;d>j)!iIix2MqHOCx?ad^rCpejY(g=G-N)l5<(gXy=Jb?J(F58i zF}dyaE`b$H==$Gx8JJ0Pm;mq+(%WlrlC zvHWL6Sa34l969B1M@kGnuHzWPqWLyD@-p(ETlk2v))GvK53qB*(M1Yckxe)_jxq|d z=`(f`Inm3NPYN=0GJ7$YMTX3-q+(De01GBy$0uhYI(;0p6CWZ?$f_>HH`BJDgLY>H z6c1q5qAf!T#tG^1$EEp%Cm-s}I+hQ{9~R8B!Q^yDHb{4WP4E;$Zr?Ckz9y7HNvjIA z5~@|M24ttv`Dre%PF5?p7h1|AHteCXI&~bQ%i07TsiY5p788?1|DF%x5~D(4B?$A$ zkeHgd<1lYv(5|doX%$?Q9T6DA`o?iQS|_5scDR*NU8g)@Fc94Xc(lFE-RBKAxBKtg zIau3l@!`MO_0Ws^&`K%dM(^YK*TrW z%-xzC!5PaEDZ#E(#Htj4r9p2%vnhrxu~@|{5fT_`fV5OEXNg7BJ6?*Qtw|BA5}`KL zcyHyYj#y|cwwYH35GA+6n5Bm1cF`{ChDm@@^@%WBt+SGOdEE67p>&jNEGlw*;E#0gQVtIl?TNmYcal6$0n&~MD-j^>R$CAMmS z=Pm`E-J3a-uuA{wm5fyy*sN49=hclUjDnDqHKFJW`D&VQPn`WMta`C`>VZ!L%B;6L zbNAv3$VP+ZMvo-y`1)Y}PJXWIHp;SvOUn@3Yq`WAv4ZQohX8x?nMVZS>o+WiJ-7;P znjpe$LJ-gUG%cjAuZIi1p0*Z^RH^Fu@7Y!cS613-cpX_~5~8_D>;3YxO`1F;u)jqoIo z|1O!%aSgGYQQO#^>Q!)U`I`UgQzv3Zf5oPd^GN6pkBaPNwKrWoo*Z0S*s3hbKSUDu z^zpwK4U|1uly%*;jI)wT&mtWwydvCFhB%y;NE) z3=hqe84BZ`x;`mK0my9;>BG*x~c}B9auKKH&$PJ|G~M7>usfnNI_Bs zb7Grn=2RAEF@QKu;iN=M-?;&Nm7Bf1T-p*^bL6`vo`!tc6iLn)+9aoe{*bh1y@?H zRo8gC3K6y%0AIqfQ?CuA^Wo%+Fn>hnXJOO z8U7!${m_j;Qux6=(;)q-5|kp45j!y2DJfZg_OtT54law+*8&Op~P&I=e* zpGd3N;H5>TrW#7rzjV)LM$%{IRuq1Hxr`$LCO6Xd2;?T_G5qJ*0ihu=UEKMCB2i+I z>?Is1bp$QVUAr9J?k(ENUquyXjXR{V#!MBVu(u?B^GV`5HPu#z#2N@oyC{t)^%PE6 zh}x_hL9)c;xw6niAj99lJ7Ecbb0&BOz&`c zotMhqzE3as?84nRXUDgFF^O_7D}To>$5`01+95SdPmBvZ<#a5->xrq{4|SCg(}USA z?UHySfUHsP{JVs9Mo4RvAU8*lt;QzkF3}M`hYRD1-?fqpa-=3o)jpnKX|S>{2U!v$ z9-_uuUp#$hW@G{41Hh4NNgDoBq!bYt(q;(ieI$NFLe9%^FzFVk=fIIdGS!b9D}}ci zh_X{qpaJ?(iX6cRThx(CHB=C=j+CyZ4g>ba9ybH&oio{p8kPUEYR7s~#$CXkJvUTM zIjA8uOdFZ680`_p#5B;bjC9ulGMxk#O_I+U|8FOU`Fw6}#E$x^9YX10QKH$2WXHo} zJF?_3VF)TSi=z%@FP_6aZypmgDVpKPZFG{~^%aT@r zA*e|bP-`=V3&Eok&T9;NUixdEoCKR?n{z7s&cEI{4tK?)j33D@IMPu3Z zXKu~8I)ZF5fd2Tr83IbmB)CNoxJ^Lv(Ga#xf_@m`NCM&8KqQgl>N?P@L z*mSPO<^3=``P%g0d22rmNs&BNB5vf-QBO}=MhM_wbBgr~p5&Du^YY>u z>bectdAbx=rT{SSitzKLJk>&WYG~mYsMHWMB2+9OCu!2I0~BXri4SQm6$F1>*D(4VkQvBE#jjOQY85c=UlPX`tVpY&(FFoMh97GW`fCKT!|9F%3aCjz5R<2f49cMZt=F zb}eQn9b-}RCY`DqQZ+p>J**Aq1{bgO7Q(xo;2`0q#-wFyCCE}Ct%b^(Ph&NAQBI@O z@JXIV<~}b6_UVJ$MiSoL;kAvNru>;_qlb&W0z6rQPZw#g9yT&qzTe^f{sDX`vL)mq zx3#wU-;LR5aE1VhDpmdQ0c;@TMWm@3wN|!Wn)+dpNKGmNNb3PICWV5#)~eM|n8gv^ z*Aw^GHju`FRRZ#MDOD*XZ8MP766#uvR1Fle*$^K9J=B$ZC8Q$tIhDaU=g3bol9Ryn zT_qBjMGH2Kv7dg9SzU#~`k&;@l>U2=cA z-(vX4x}6nl@&a`KJR0l0X5BKJa7sv9C`48R)FVl|oF=Zp zK=T&JX!GodfH&jZ&HrDCJ9DLsy6;6<@*5AIqZMDrJyv#+OAY_uCBIVYX_N|UHDnh- zQtMUrCJh4k&eME5V5ZoakBqUG=kzjk3?mP2fJRsuHN zJk`#sKRNon`+Z=B$)%=W12ViXtnn6h%`5eJeLpLtDx=AS4 z0oa&{IP!HZl%OVe0sryA+(~jRKwh=n6B7>9N=Y1?ZM?PMqfnrpxY0oTyBAsAMU3kr z=1HKt{|uN&;FL;Qlt7;9;LSUJG7xGHZuaW)@3mQ8^yKxl+0gM`|7YEPoUC9;$GO-J zuera^KrWZt!=aL(t|8dAn7py`*v~j23n%nWQ4UFHM-_yfh9hHFcx9pR!cy0I(Uo8| z5{&=Sz6X+Np#7b8W!5CQ#4b@l+9V~+H9%qwWTak}>IE{HtHS`8sDZro!>lf15e~)( z3G1&AO$|_t1o~r+siz)d7$8d&XwTKsMuFAymawqg+E)Oi2HfygZ^=B0VBLKvYWKq# zxeow{4F&>mAPRdJ0zEXaGY&ZeXv@m`3ptnl7(7;xfxLR)6&(N+xg=>B zq)%b0fgluQEC6cH8fZTy-xBg)T}zx@$(GMM;xi67X}~EnP@6Zrkc#?T-uCt=fT`*G z)l}nQOO7&3_$%MJfF6?dH&Id}K|Yr)57w`oKM7}Y9{$;qO7<}Op1)7p2unwxGbNDt ze$#ZLSF>T*FS6CE91p<+OY`?1s+~7>nC+@NNBeo`zY)ShzoIA8uT-0m6d`qJ z9BCHPdiAuDkPl}Jgm9F!UlVymV8<@P-(3Y#2f#kgZ2`gnn~1JG_E z^*0G2q{{G#Q9b$#!@|yRfTYk@CzOq{K z%KP|8{)tzsn&1(#-3EI?ih#WB9AODcEm`nq>TAM+X+1JFO@HJ__~uxP{%@h)Nvn@2 zK65mODFiLiJRbwGT|(^|$v8glnkb<>K0)3iI|v!J9a&w*?8->HH%{yNufy|G%ZWFq zo&bzJ^d&DnOxf#NJ;9AWUHbbTctA&hVfE~mQr~*VH zonmL*;i?w*1&tAFThsTlMT#o2Gc!Ik_ee#pmG6cLL&B}fTD$q10q|oO{i?Sx`$k=Q z-jAc2`$k(Z>bZ)E+w@sJQDNEp5>8DORtck{oaEncJ5n+ z9UP})NiG6OmE{U3YL9pFkryQs1|3XdPP=_vJE2)bSYoz)Zm0gxGo6xVIkCA%?U-_m zZeho@w3lOu)owShlSl`v1_Et-3k-P4Ya-(8F1fy0q#W&v{-Z6v;j|KJdFX4Z#(Y%- zQ`DDj0E}gEjIufG5V=w4yV%0YM{U0C&dHCH%o-d>_sw5zAB-7*P{f2}7O{NoCH_8B zt2!rZTU|(nXz%Lbj`6z6+C3p|(MM*~1ECan`4x(bS9YOF4encG>X|#5_6io?t`h2TG z%ra4(J&~2%v$BghIZZ9sGUUI31YJ9%U`ulD7*ax1j~ zSK1xJ;M$;i$upBVuVl`gpb)&zJik;X^rmzh4;@a9#r;*x+Nw4Zb)xl@e42bG*~9M^ zM%?VDDbCyPYtdk0zv5wx^$^G_(!wz43xJHFJF+&1GlZfTBTPx;KICK1o~bzQLAfBY>q1n8qk3C0waDulao8xkMz|vT*;+>|WwgyD-PHWcA5LlQ!WAAW6o|&(! zQo0F?73KpPIJ$lSEf5fbg8NJ!_Y9&&X|A`_09qOxh}kG%mL7w>N|)QAqH-E8Bt&-+ zJSB31gZlr~J%DIqOc+ik5Td%mQJW8yLf`(q(*SxM=}d|_U{W1;toQ!f)z2}YTfTr> zFKi*B#=yN%?*-68=_xB>I0J%n9w2PAk(?|s$wh~ploPYorpHb?aGsHmk4R*9KY#5WJ2;PPhuMerX*Z>`>aGb~(chllx_1 zu$e1Ht&tE|T;Xn;_I?MoOinKm!qKkF9qnJ!12bFTVLbb}-Qnw^tHsCugyvL#6g~DQM zGDMdyCI(5N7Wta#cPkHlI>Pd&(X|kjjh}7H+xqaK(B!a!s_WNFsqy{OZx%5$O6s5NuysZ!#-5kDX%n{rq`-Ef zJ0w%VVnq>VZ15kdLc8tRBcuqFr-`Sx7al2n@7nJ#m{lM*K9sp;13+xBBy>iX#E7ua z(lQOkvhfcDd&432I(47pYLF@-H`CHfXPUL?)o37P_zDlKnmRCb4SjIb|f?SCwwL`a^UKKfT9Z8~Vw*yFY3O4l#Xifq@sG#7M! zI54-*WP(j%mubinHo>F7xL&UH(INnlGbW%UavUTOZ>Tv+x#rxYB;()xGY~z3cip z8xQ?*X?gu+8r_0Uw-ZBwW3Vx4>&Yf8HB6T;Bmb`dM`6{7BnisH3`);2*kcM(Jx=S`LVmd*aaFRNFo@b7p`JF>h7 zaa!43JS%v3v5s33ye#uKT;E(8s|eKSTqlB;87X1!R0(>5C#sk?1t+5A{71@pQVt}g zvjmXGDbR5Ua#Vxk(`$P|79(b9%EZ!&r~oJsLo(Zmi!u5BeSmiv@cPv3Ltg!Uy|cHq z`W`;sb(d_UnqpKGN*GhM#{ThnrJrSh+*#Zek11vwU}VdgBd>i4Z@6tEL(Js^h3Cn! zHizQe-F;7$i=|%1lEJh8fC)v>R8V2Vv7eDK$UlE{E)M%#MPhJOjF`AwOw{-iUF>_^ zk2Sj8vUdvVA2sAaaHZ?sSQ;KwP?NJVu+s=F}@Lq{-cI*h!*@!Q?G#iQ+MgRRAT|6Lah3vES<7dF`H_ zlXLv|-r1d-Pw)5I_0h6w(Ye{vd6teG8ITKwM!-T8mZ|TUHjREAFgqcC`O(pryHFXYYn2`4Fv!TYBZ_i$)v+9flO1 z0)^*P@G>BF`&hZFOtDZ;SYR+7aefaKAYlQjpq`)j?EtfiKJg5*sshiw!;G{-W7j(q zH~w113R#5{1_lZV|IeT80vJ85)m0Z85ABLk#(is^eLy=qEM2qc z-b1ZP^Or~~g&W0&|!!>{VTlm}|I8I7jTSeN^etS!M(iU;$+9dmszml99np@pt zO7_{ebvAdDES&ljYiDEuRVh5{2{->KUjisStDsNc%TvfmvV@R`E9T(}_etfPc6;8$ z-qM|*UBMp99pD^HU2#YSTO z^!Mw_#&4NSUXT5B{YkI0QvklWl)tw?m^(`i$=T#R!v`}5w{$CSDw0-+Nt-80TPH}{ zO4GJYRBjssEi_}7?%#HJp5$zNTX00CzRBgAMAF@Vtn<)rOXM3#|Mfu{}* zV3co)1mRG8fdWD`Z`8++-M&7}q=BsC&hEL7++W@T1_sCmd9=*YNw|Rbc2PpvS@Liv zF)$*fbJ&0Cq`we}t0D$nCsl48+iIx1_qbBahIr~Fd<|&PwPgPF$LrqSZlkA{7yfo# zF8})h?BEJ5&=69mke7cUsa44dRj{!ECbD6=2}lfT2a}wF{^oIbpl&$Mt1e%3|=u=^rT=+l_Ur z3Sd}bETFtf1r>g-%nX>dKQWGphtp9-Le;(a3FC{F8J~c$#fu(gj?R31Oa1Qr#{HvD z5OcdyB(rC_+)0L)JR428)(IUvi)JXj3zQR%&2t5CxCU8~Hdc}Qc&j+u=?Q4n{@8gN z^+9>{{dwIVt}U6>5HqtOX2as<(&OMvK`8ny}I!*E6!Qg!sZrb@+_J zEe-Z?Cq!Y&nE}vDwbEUUL~l8QVWwiT9tW~c(jlJo0{Vm)@ss{eE1Y;9mWhcES zMz>#gKX=L{<(i8R>YXPcZPAk)Q?oa&&TeaeJhqE^e?RrU)56DR>@6l9w-n9cRq5sp zDU3ein^$!{_e2?%J0sT;^Cd<+H_ZQvO{c+8UC*5`69>Jk(S4dqiCW1R!b18m&eR)@K+a&R(uG6cO0YMR++0s5am}lz zpY}g3S#&w~(TWp`o_@-)yjboNKq0*V3y!m||L#tGku~izk!9Zv`*bP!{*_y#`J?Oe zhw`$m>Wem2{5ibTo}Xs_@No9S!6oxo{nFy=>i*U9@GiM)ec}G=qNVNS2|#&fePlkm zELl)KPpz0WQ5}dwLSwv!(jf{^g$_aaT?BEvoIV7Y8GmKLL`S&kB>v3hf~zwJYaTA%RoIMGhyXA;~~_ zL_IOEUVig6_WYjY#p1wbofBEd>G56fQXM)|kmmNQwBJFvpj5i>z zLu*_oj9>d%A>Rrke_O^LA|xz4!wBeKmH2AGp2^FP<7aJsOR1cEQub|S_baZLA8nw} z_(}9){01XU?lETi2OO71Dic1tyn4d{GIW}yP_rq+8?s&6-VHY`DZQA#>oC=#C(XGO z4iZ4t^~&g>@@eOK$h|<}j8!?MsX}B=|NQDRzpJ9QqQIS87=)_EJ(18cD6}5Ab4Zmm zDHo?@i0kDcLPBnpYX5EQ%ih@UjDypEJ^1nazMld&#rim%S!MQ;y$KB1jgYm$b&EYM zjf+#2&PLc{Ykp;}yyFfn3kZr?pLoJS-gMzs&xXm@KW|%^nEABk)#JwL(p`(_r7HAC z-r4W#KR(qi`)T-f+&j*kv~Y30aXdq93g0x?O#`75AfK$T?E?7)--nxuF?MJe%CPbz zzUtxkm)BI#mh_Bi5vIn$zjI3WvW;@UC*RxyLY7xpi_hAF_BqR$*0a_X#BK6zkEyYY zoPNcA(EX?6fLm#utI_$Or!#nu+=nGtST)&F5U_tzs!)kx@6P-uO9e`;ChDL;fq%Z`4* zM-&vdx?&a8HdCV`n9DF7VP?!^T{0^`XqF~Rzk3g1t6rucKkXmHk@?=Z0x@khgd$O= zvnnX-M8;F8b(wC5t3>Fv)8+h+XFFY`!J=@Jh%){a&lcHugoRhK5L=X9j^UYW`gkt; ze%_xtnr~kvfSNXc4D3TxjcbRk&c4R`S*OQSD=dHCKT^Q!V=T%n-tb5_0?GMbJ*89~E3Pd@1i3m12eixcCdd#cTS^dnI-YF(ir z`y=g&ojy0hIXf5@r3Mkp>yuyzU0Wg*k-UPRiR^ZZ$EN|JtV0$#v!p;TPq3BZqMep$ zQaNlMJP8n!cbKIjbKYN=jm&@Fvpgj&-rxY6h=nt0vH*Lf!?5^yRZ~E!L~c?h`$*W| zRGPSR_Kd$1X&$F@qOUq`O&w zhR?QJ&i`~t)v`M>=n-$#*dDU;PkHB8lx%$Q;q}qmpmxWGr`njNjh%ZBA1i%1?AE{0 z6B&Wcl}`#?r|IkNw#Q~lA(+XCDE-4(0&J-WG{-1 zO#rSVy6GQB5qV|8Z1CMh>t6}a?Sr2qQu*|mZ+|TLdUgNRIotMMc5-?&Zp78~jL@BC zR+)*wiU?h$MW%3kz@?@1-Ojo1$^AEGJ8wTZY!Nx|@NP!h^$2K**VRCag22(te-0gg zd2riJ6Tp6l)5(#F_U0hhW1m*b~fg)n_V1o#Tjey~yS~0NJziV1Jb0_)XTC9}s4Cq&|$1Csoc#(-F6phR^7d zmD|PXOp*%vojzhJW~mgm6Dau52RSQF&h#_Rnw=V?7if^9K1+VbKVtmXJ3YRC8(GwS zt9vwSkol7yv}w;$KobV;CVdP%Z6~ewv}A2MG!^a=?B$)2_S#fpP<*cK0tsp5Zkag% zMQVO=aud;K{ml5nNKHeNHV@j=rSjp54n45p&&k#AYC+9rK0hsDC4QeVzrg_q^O~{r z+ou`YJT4T?RnK6AlHm*IS4l|u@2WadfD3=2Abrwet zKEAf#nd#5pzDzqp+!_=rRfqd}>Qqzzp$QkxEo<5KeE2^e$z%{cGIQZMR%i~?-r=Y@ zdP{9<{SHVKVNP&*%GKnKRj6THXZ6KE)CX!eZ?s8wxyU61x@V{V0K<)dTT&?9K5aKJ zkp8cbY^?mDV8$x@Hto(ZRVcevAK@pDJXH8iYohJsu{ax76mujV7uhKX@BRqJP$o+RQE#4w9RlAUOVw|Hc{sL zTV=T8ip|Fx=8msec$6u`S7 z^qu5wZnQ-b4O{)@PWPGW124wqutPU&mn>&m+AB=My}Xy!6md^mMwm2es#o});+?fb z%)bdM;hUGaoGX2H6EUPfdNV6BV8AzTgdno9x44Iwdp!A2OPFVXyj%ypW5+AH4un!< zI$`N0Cva?kM9ch)ScD@r1MnVXzpJ@lj07$W2!$Sgw@7`JYw3aC+q`s2VNr9rQWy>r zt?Pe*`VT`VR?b znpR&ish@nm@vWrNw`5gO{Jb@r1`Z@IicaZB*1-HKAu?O-9{X;R;PLK4^|ZZ>@rZy- zdVCX-PZ47|zX=B&Zd9j+1w7+!$K>WRuN|K6bTo}V!t;l8M|3GVU?~tqAK;N<$3m?R z6g-3LWE8Y>5G+bROD?bhYnX8@-?AaMR&5n%bhyeSUs6|}HX|fU0T?KEYx+?>8#m`> zv`^G|XfUEX;Ngh>)w}BIyoO!LWsa`#D?*-&_ciAst{(E6Mp-dGxzGR5JH~%`8A)vs za{@LB`fWR}(l_N;+=LpD%U&PIQS8w6%{`%3y~J7nj3uH-!q?Iqv*T?ag_JdxDn>Iv zTr#DwogDk>ra`MS@h_~~nn;5ri^rv^34*{pE155IHS( zsG()*ZENc80adXL_YbG#L?FOkLZdOeB=>Revyjwzw}%p3aR4J^aBFs}g@skDf5)yB z6di1wjC(_|DWpHL6863ANMEgQ>L*;bd||i1_OdP7H<&vbQt+rv@Ytz7^s=qugYB|m zkGsQxGp-)F&TEJYC(GGA0BaI6qxYGIgDyp8UUJZ)tk1nB^n>PSpCJ)Ps&f$cS)(FO za1|p>W#k=lEOpz$`{+3UJqIV|U^bH?Q`ZPkFw#@^m=_RcBhbw7Ra(n10@rw2fb4K` zW({Fc&EPbI5Ap&+DRqRWzYM|%4kxEm146BR0U}vg)4Aj6er#_e;0W~bY(To}?IPXn z+JX8%y#T7wLEcL#{ymya)shRIJM6eABRn$6SWY>u<2c*=H+CuRKOyh*^Z1uf1`@8= zI$jQ%2y2^ax+{QhKkjpqHBN#lc5$R*oi?9%Dmqz3>v|+?92eCsVs+_Iv94uA;ZmZo zD;Hg!7hi%{X_2j53_wDKutuIqyWMBgioV!CXbXIYLqnj%Mo!WdMnd zA-xeu9Y!3GdmFh2$;QiuP8+RW5TX67307uqg$7zmaMFMPL zsCQjx%fQlgkBQUx!B$Js;0}(te^f{YNRxd=Zj@A!^ ztZSF8%7ok+XjW;T3+_B?GIVx9w1qmG!n_Av8jgG@_PNDwTeo4Hc3W>ZqK~lu8|)w|<;b$8o;b=lh@AZvNnQUANb+>-ppU zFb{{pXyE^fc+;TUbSx86+Vla1x`R|MnCPU7yINW!L;xAu0W<;K0V3L_mE03lh2U<% zlvaE(X&JT=iEl&zU&Lgl4Y<{(bKqlroVC6{f%mW;rTYD5Of2#Ma)S2#=&HFN`i^Tu z>E!@LoByHmBs~Z{*j5aT2fLyU8b{C8tz>2f2i^kT-a1U4L~Kxgqov@*UG#rOD2a%& zEs7$@Gm+^`xQgB0|7T@d8FZ9!1?v`I(+Zllid@2BYrdG4&815fnqin8qwo-RV8c~2 z0n-Xh;t-=&=Mew@-CcgCt*+UTVW}Uls^v5Lzq0eQ__BN;5}!)0fFX&}+o`r@(?I>L z_(c)Cp#Eo>#Y|w+=qcaM1^(E<%*1X>d+S8~?E1uAds26i-ivcwg2Qp0(;|YeXz%Y9 zbJ{U;COc3ZjMpx53kvpYw0CRla0>76k*I9if5*e5nwMgW6uncQ!l_N=gD80VKD{VA=Ji_l(onN_*+zmTvl#VF_W_G6XktJObzjgiC+CmnjV(hkGzQLCf@~JXwWVXE~68U2;^;5GF7|X1U`C575)iW(-EZ)TH+b$pd@y9d__S(rRkW$ zwFUmsVQ}UuSYz(QaRY^vf=$6#cf;t-kNQ+Ft{O3ViJaT@w{#!Y4ZkRvvi@#Z+|Me0 zKl=1n^FQT(Q_1LAaztkecl@*dsf*w8fi}l%{mDOKDGoBFuK~LtG-EoX&%BRg4b1Xfa!d*hgbsbyoJdVo%fuO3~Z6-z~aQKnX|BwhkYI!qm6ZruIV= z@&HOvn%~mdq$^F(d?G4noi3())tLt&E?;$&+EY$ZXrErk;^eYgbd(`QV`><&xEF;_ zw>Kj6DVQuvn;}=jHWH;{yMmk>Lgo)tt`2?3XVwlEzF)WEmsdTGdo5*kI&yr+#;(w< zZ#HeI)(`le3aV3}s~yg8aM19xtD#{Fzny&Lc0To~!aRkzE9jRmUJ`NaU&5OsqWt>} z5goi!9rkLa8wcZBD>C~kV!DfLyF^^n><+>@snHKHo>X!7-a&hz@=C&kcc(h+hS5K_ z!zNn6%2o(16{bVDn*|T<$dHCzh>28WD%a)85tl9<1*b5{0no1imqCEY!bM&9YSID_ za7`51_MqaNw|Nfz&5E zTDfpd$7F!=6Zj-Gf02lZTi<*ZZk?Sp4`(_FK&=XAj-Dt}un~+wB(NfkxkG&xAs zm!>@fD607@s<& zZ~O|(@TFAaGX1TdcZ;sn<}A7Md-3<;lOHMpN-gYEJjZUvu!MY{b&`L1mfz(+Vn=*} zRe?jn3r7ClO8sLp@6R2I$nD(cqVqQNgFbmaIOstnyh!xL>#Cn@>0z^{zSF zm2Xpdn&U`_(%zuA7}VReD@;=LCH5V>VYm&+phk=A_rM(;2eB0hZX7X5OuJnWHT!K% zk<*Tp4Cn0+-kIk$5nQgthv)5y)a`$1k~rlgy-G@0yaP7@Tl%U3Ww2$f(s>-9@;hB+ z@Uro^3;D}d!|7Aug7s~ilQ5}c5sENJq4rQ4Vb zAI8{1;pW-8^Iv`vISK}(XM9vRrDIZ9GU{aPcx`u_FoZmXcCa7B1?AnAA!>bhd;Cqp z_JFE?Qm*}WaLJmoCCjd_m}(@AJUH^2)RDg9%CFUw37yp#h?l_-`dZ!%+qVJ3q1%pc zWl*Y+l4suN#o2RE_wL-iw6DfQoDF5m)Vk>HI5T1%r-N!7|JEe^z_^nt%%eOu7!z zrdDIRFRiJn+hRW5|21Gk#v~baIod;gjC!(fkp zX~j-5{bridsn#Uq(R$Bxj^|{T(=GkB;W47Gh?}9v*bm=M1TL>dSRn|ZwzTc=zm*FE zio5l!!kP8FAWCs$YhNIF$) z*%Wo;<;Asc`)wl)L(gu$`@fpys_WYyO89ZZ(c8K|T*o+d(|Dfg2HBs<=T=?cf^zD! zgGp9yqzM~N}SxTQ_;Uc523*394DINDLtMgl8{3zVy}*Ou*IHyBW8rsgpB>IaDyL$_mozw%*a(z$JaX4QM%Nf}m9gwdaLScf~~ zDeRTFQy~$v{KFm8@y)z-`OfY>V|~I5U=TZdd^ngEFj6pf$SVGa3u`XE$N9+Y1={3S zq8~$M*Qmmr_}EdJbHC!%2G7mcYXYX?u91{qRs@jwPfAr*-aAj3qD3RkExi9GK!IZx zzzWC4qdaKcVqrtA%VK^AW15ZWS(qRBv%8VBxjmQ>0%S_cX2Mx#oR0b#((Fov0HT7z^QjrT3ZmdB4zsc3;Ob2>}rekDsWq0OmcJi^Ejg;W>O}6aO{U zS3{kf05eM4O}C3NV16IvDBuQ4g1U1ZgSHaZCFJ_nirf>Y^|RcHQ8Hjmd@?91KkBhM zUi-Q9>{#%Dc`3nkU!G)9?F?!ysvc0*r~~?Xwifk)JV zTDSxQX#njTBIqE5F*ZhQfTtd$0J;3VG6Kn%ru?Ztgi~7{t}b?CS`O;~e+_6-TW`~= zR=L)GBH5+*92==H#AM4@_d_4hV!^V4X+?i_)H&<}q8Gx=xT>*VtAAN;23=nk#Xz&EEoyGM{xJ_AZxo`IQ zsxzwC3J01N&hu{O5eb27>8ElCPNllJ7IUs%6rYjCmS*vKYS!8<<1qp)@0`Z3QH1Ne{PQ5M* zg`I450>-w5n>V{Ji5?q{;gZU7VUJ2e_PB`Xt^rL`jC5^6u<50Lh@1o?Cia}uuALi( z{ysmqe8DQ(p1rAx7gI~;{=71Wf4mvP)^cO77s9yTz9YPpUi<>IweVb)e@4%&YK`qa z>mUbixWMzC^(?mj%gC$>zwb<7F7;+#EZ81@(&{6ri*brkGJj7iE3jIzBn#r16GFtv z>GfADnf=}S(rrJr^9p~KneY3Wb9~vlgB2wiDUXYup7`7MQu&`NmRQ^Z{`hof$DQ{P zD!X_dlM6FvU+r7BY_frU1-i@_lbIxTfrOW~IBuQdU_`W%b!~=wmlvK{x}K{Dw~>R~ zDh*;8WJc)zsG@iq6bsx(u{2>n*2-uTGw)~2X?JJmwf7=M!gT{Tdk}Jd=`ZGO8hn+T ze)q*-zIlt$bf;WLf4;0Raty?pa%5te$403L!8@lbaFJ>yvsH(8NEcD0Iwk9t@fJha z>FV$}cRlx*)zT@QPi>i?x2^qoAV;-iU+crW{u{;cnNP9I7;<{GqwR3>MCm^l>G#fU zB{q8`-+WQBg=ReVGV;#%vQP7O83vDG!|v7AMqE@lL08pIKrbsmP;t*MASw>h0N?`NslfBpu_q zyvA|C?kQHRq+>kR=KjJeJ@KF932z#hg5!#mk_Fkr(4?O@P`8^lzC zR>u8Y?$cEA1gdqeF%pK}5ac?1mIm))z;8Daw!;-%4AWth6su>u7f}LrmbIhwNWJCa z`n%BVW94sVu^&*A`>43nuDV6GT_0p(wW8hR4!J=B~y8jCzl`tX3!W}BSIWG@Yabd^)3v^>JK0_RZyHU z1GC=ctX;ATTk+S%w23+I|2hlz&lb9e5MsH2lMHBb0oy&F+DP>;i_RiC;>*PoYm5{A z>W?pJ_{|zqu7Vj2V4{J|BrkG{kzUNj%113RJmwnd(mfbrO6x4jA;Mk6`k{tj)A6Yr z%xryA%RhAgCy$QEx+~OmRjuTuMtqCl?*()I))>CP7wld?K9P^qWdLLc5%H@Vg|dN_ zBa&5&O?`uzb)%N-JCT<{L^WlF{{xR@zOjfzetdD8Tp6QV#*hpZ;8^Hc`c5vd+%uLFEds6pq zm(e~OLA#6~yB4rR#Y4?Q1_ta3rSH*9tl>_?BKVQ>xKPBxS%-gaWO^!K6u|J-U2xY> zQjM65Br6QKBQj}A+j_#ekA(j&UBtcPPjDH0p5?uI;?_QUmkai6;_=TsdZW&x0&*?$ z#y1P>{^#{a%Ot)|PVqZz_lu15((~^PGE-JPl@wz#r8}6K7IDM)>>FUht>+y<$mn)< zx|>Ne*W@iK%*iNfz@(LXQ){G-=-B8W`k2h`h#7hr7=UPoJPALM$0R)!wsy zc^lI;Yu1;PTl+i*m9cY+Hvd{fas9(T@-kxj0Nw*K*U zj>QtU;v}0culp^@pSNe+|BO)b`|!q3U;cU!%JB6Gjtdj{g!yd6=Ns`sT)Ukb3c#ap z2k4s>Da%GpV4g+1f`#ElmKf<%i!B$AcG@K|Q4o$aT1wDEcJ(DTATv}=i_}}igRJK% zR$2ArMueu-P>nD?+BG&~tuWhBm_74)olrLKj#+Bfz(C)HCCbJ4KrZ&}qq)%6X&3Ha z{)}lCc=f&4+?m6?pR*RC{D`Aw|J7LB8so>6#+Ut`6Ppp36pwM{;#yDA**s4V8M#V^ zjpSK5g63QWIaT%?L$@~lA90tGTH{PKz0YY>*u3J|u`b)JW_ef2JYJR9Eu@F-@b&fa z@i{^FIb$C7Ba?`5>41pvLDvvp54FMO@%4hAu(|gKRRxb})SRr)5O!$@>9RGOjM!u& zRt6BfHG~Qc9g}1wzGor^QF+fKzG%*89rfFzvfR$;d$R+lzs|FE4eY=0uK=iU_rOkU z*&D0D(v~i9MyER*iUZvuFwFw9L?rg;+aM=Cl3W7P#TizbiQdiFDBWObKknYZnJfSc zQRh9shmxF+M>KT10xJjovMABgEY0)t7w3?d@4C>J|iW2`vcJ%#EJhMu^uC2zpYuLKa3~wHe2Daq%fSqFqypm+b9SHc7z72I*n~ zBZSOFbJzSfEkyXge4iT#Z~w>FZGUf!qb%_00ZNS?H9OtX$Z_jU=O=$V=ie9qaNuKt zYzDj2`F+19{;BRmvUP%-gEFgW+Tc^IQ@Q!pB~v<-RB2+R9A7hUG9(B1Uckyci+elG5c z2Q6YnmhdRbgb-Ta;gT%x}fn!6=4md#`^F6EMCFmW@m zVRZ}P-+@`1rGg!;2@Idlt#9(DN9`u{_BPj7T#$x6Vk5JE>?d>Vz?`r+nQy=f-@Pj0 zB%4qwvznbl83W8E>8sO>)KDrbw%&F)$@=`LRW^^+z(w8Bs%?rYJ!pOkn5r`7)pz;U z7_po;Bo;_pDa+pa+u^LwF+nPSB+e}?e0^YjYT&L7w?elHgj{ev z1B27yp2M_O6`-Hutc}4Sfaq~f+6vvSrn#hEjVXVE>c*qJ(JcL4Mu_EE!FonTy=C*K z<;~}U*GVU*ToR(Hv%yGZ*KZM_3}X_z=!RAgP-#hQaT5E~EDMMF{{uM3 z#=WTZ5M`7x4m73c7&AIk@pt_mVGZ_4Sxx7V9ru{o!)>eYZU3+99l`Hi*7x$3lUh;} zxB)S(;n6WLD2oBmNC%@XwS_rMGEPN8M!9{B?^xZ+%rO!mt$U84*xjRhljh%B;2vQ+Tqg=5;nmCy|Y z%8)VKc+gYsvSTs|D+@{m(DR1wl1oM%|2j_1@z>c81Ba@FXGm{+$~o9ni_;Dh+x7 zv}v^QghhSy*7uziB1;UYJ+3$PRX@i(3SJN0D{ta2vu2coR!@0e&KFMZm?-;KMAss9 z)b{cdDy&rI+mzV5-?|ALzLvM@@VtQOMA+;tK>k@wBK4X`lFVU|SJ25USIX9BNfzm% zo7j8Wl9}gKH#LwQhspg5dt(xtp@9f`!puKZ*5f->o#oY?Z||5DF8{u5>5+nM!RIcl zID98;Pfq7t&rGxTfoYy)#6<4@U9qJ|{(P*uoJ(#7Ttp4voyC~vfN0^zLG@;A+P`-O z0Ghjq7_HghJ9K%r=!hi4s%VjA1CQ3Yh!}daZJV*4vC;b3wT9KZ_w{6Y>HY0La7kId z?O|Kfq}<*Ea4&i4g~A$9WREL60BpJCd^Kw|Ew; zFq8258jO&vXl)x8!YF7C<>=!ohVY3*dx&#eUyf$=*q^)Oc=xe0iA7u2mfd8QjP%})+GG18gT9v3_Mm`^$IE(N9 z3og?!8Woh{Z?u#|%h2MkWvy0`p!V!2Rr_)H?49LH#|f%!=)+7mW9z;L4EycT_Tduy zudvw=TXd9ykA-ph$F5x->HTMJ`-`dv`yB~L7bQqXPXE(lC9!|LsL#!a)zVoQkR=X< zrZkgtMV~C~scYh(8m=j}-bAcmk&@m_15B13!GAY7)<}xg>^7K~9sF=$&c$DC&is1I zrx1&8zVq(&CR(7=xs`SsdW_W zGp8>0 z=>&(ig&eB=oTVN4YJ5KX*^A%nK0c#y^EI6E1{p;`FmoAGD@P$z?o3WVgP-i7n85d+ z-_Y!Wwx0;xmK8hNc{Eak$;99zA^SO+Dde+*Sp`34HEu;jGGDDpydG{@$O|jCHWiG2 zy)F|KqQj<6K{?QsX585r+wj{6&+WIftGyC|+4U1ct#0v+1%ZDnRA(4=T_wE=7)xVo zjrF`vX>;)EOEwOEo9IsSFj9XoSu$D zyB5qBtx0#`E3rkaQd@iX@f?l#BjE~tr(VF*7ciFM>MtE2RmA)n6!^#AxT4cbtaS!L zq@ug)K-3510-#+=XHRakKYM}hO65RiCG zhzk(HoXT<;q3QJ@O#eNt1iDMkZWse`1$vTeF?{se+Vb^cla1Ou5tfkx>B$XV^}W9D z+O~Ev`uS!BZv7)OdI{~?!&6^Q{91VN*dzF0lGf|7`)}Hl>QJ{cjYsYX41+e=@2YWs z?EGF0YuFX*MW-*_-zFj6JzdDk<$Su!KRGUrnkCJC)$`&cVo)&4Dljcv(q}#SBCVep zg16{y4Y&8nkc!aOs~mS=^E9KIxex8<;%D_=Nj|u4UEQma_~U!-`?VzT4a5_?5F)akHU(xAe!=4v=-iByeXq|TbPpCSRJ0_&23)R29g z%(^YnJGR8f$3~IHJtI6BZn=(n1e+=R&t?2kZq+B1p@koeW<ggNgc)liM*2_kz9&?gTWX4mT)W9 z@j_yghinwM-KV@jgn; zbZiRrl#7nI2EsU^^#6_SIaT$G|7x6bSk-DPV^bk)9#@^_+RfC zE*?!=*N$~=6qV-mb=p1L$=de$n(urm7&k}ve21NbA4{?*?sdS#_U?-uy~!`~WUQ@LWFH^Hm2x^Dop*gyJf9*!Z8tZain>q(()Vz&H)n^~ zltTxEw?Mx~*wwcJXL-tQob`wZdUaE`ZOP@9#oJb|FnJG$g$)?7j>TaFfKKmU@DqT2E~@91v+5EW=uXUQE(y$8H`o zOrZ3s^?I1jS8*yeIA@!Bl7wZ>*b%)8{R$LKn}`XR`Eyxq131cY-Eag&B%@{ZKw@1e0p zVy+dZo}fa94HJEWIg^t(LHLM6wVV)dd_~u4^1aK;sL2Tp%zJ`R=eMiK$)u&)BAH~L zxrm(X6x{q)u+r`D)yQRU(VqSIA8V(HcB>*k?x+5!5I-NRi}h+$1gFcWrG^7&F9o-C z9o{2IZ_>7@frJc#b~6bM2SQTfT=3pJeyUJjYhF%5p7R%gBWf0Oq(9=wJ3Y zbWUv9^|}K$_OOBMGsn|bthssP!MSxiMZFC1iYHVzDvU3a=3@Q=3YaL$TvXlYor=tl zcXDLj%G104lqsy-Us0{#a;GuFZ7WVPv!5gJsg{AJa55tdQ4qW*517^J{Pu_A0~+-& zQ*2IJ_6S0KULAYCc*k>2WmTweH^Ug$@Ht>W(chTRb11qkiRcN_MgDJGfq^W2YNhik zlh1Vpx6;nd7Amb@>B6kjM@^2wFWkOeJwP9VX$U`rHzr%uaWBQ}U_s$Uc~*f(1`m4l zyrV-sGYKw4>vw9kicvaAG?la6vR1?t!Pjn(qu*ptW|`b>%#m0$@v~Q6zjS+B{M}cX zSHU1|m$Mc?FnrLwti8)NM_rs6ev)n_4fU(w`2?Sk0ct6M%hKh>e&vxjBVEp~5S$Yy ziGHX(&rzz!y+mKM1Zq3#Pq~sR9Dv@JYMTFWO?vU#p*X)QOHX&w zTizamc8x2b8^NkyzB7IWo_kGk69)@U3TS~JojkOWEn}~G-I$TpkuHu8k-GEzs^V=?mb)g zCHvq9-8P3anxe3?W_zn%J1EF4A%H0#6&GZn-B^;Tu~*?RhVrsDNsO}zjTnLes1Cuh zEQJ_;sp33us0is@65=L zG?ZpFvsp^nv4hYg-`b>Rdb%X!HqF)O$g_v=2L!}WfSlJ#QUN4|oK!CL2$g}MhQ(P% zHbb+sWQQwANdqF}Qu4$DovXY?_AfJW5fTMP zVWt3uIPqIxVy2N0P#No`1JRc%7xg-$c#E6Tq2afk>+~z04VV1U;P{;#xEl0)rJa-I zZlVW#FAi#7j$v+5@eH^V6FvXo` zmgBhAv7NFSCap6g{yrCLKM9(*f?f#zAqRb(B1NI{Pb)UeC2(z7$@A)xj3hQct5nkN zxLg$Y-K7*CCU@h4zB-U4v%u(Lig-cy*M@D+CY;t#E9IEmI|w$*kIsaLqsUB;j2x~Y zRxiz}ocTY@5*MXDm?_Qbht>~CX&)e2V=ApRVgozi80T3{VKwbNx4i!`dJXBfU0o}( z$mQJx_w|MUp7yI5cdZ+DbpVo1)Zn*HlA;VrVFJ=&xyf#W;PzUQx|JvwkfH%%q&#qzc*`NkJB(DVdWBj}V8e+`>WYH^`2jKFiG0mWSw%s#mF!P-0mzm~ zHyn#DI`%~#`9CAwF`~Tm)v@NcP1n@rJ9UKR25eP0_TT$Z;Se!vCNcpdaNwT zM!Hed1NzD5MQA|VR@`obyN>~v1rZmiV=)Dp^; zRjovDjp!tnoR&KYh5`}oNSzwSddaah7OQ^xZ#n*@c>yWpI~&Y&-Ia}t-t8I^=q>;8 z3V;lNH-NjQA=%$>CL6JUAn2|NK-F{-1a`d!0K=+>dEQUtgxhmtv><5!JrSNR4nrtS zle=yCR!tgeuoXEAru55c?{$rZIkd^<8QL3eQ(QBDS@W?~6x+B1zS9QksYz18eZn#r zUm1=i9c+EBuv>N#>HSeCOGA6efv5Q)?Ah3BmvPqUqqPB-v;vpqhaG)#5k5f4G`Ob; z&ICNc&Dn~psKIB$M3s8;GMH2pM)H@YhjNLft!Zus&_m{}9Xx`&7EtfF1=>~rDkmV~ z&yp<{+4RYk0S)U&OFcnHz>G`SB_r&V5r1t4xKNo^L;T?8|rB!S-X19$yIm zLQVQWIw~M$z_kQxT+K`?*25L&3vHaEVG~3*QkS2mOhvJ0A2)*`4+A!bOSDA!R4Fb5 z$vCFN^FwcD{2*HJ7w_O|Q`2#QhD&n>aenGso@v+!0Vz*P3WchepRaYAllr?IcN{(P zYRcnFW1D@XJSnOE=-T=@-e;#`gYFIjbTtNLJ9oBXP=K}6NUGM5ykV~*eG7MK82_i` z?cb`OXt4jiSas)Grqh9#H*(x8N3zu4i0`MwIe2H)VzTq zq5#1kY$cW%QD*>0mx0eo^)Y81zq;J7Yicvqv^6+hSzeawtW#|(zH`^i+zlnCWG1uK zEhOOo&`p+EkE!(Pwzg^>0C0@y6gloOK*H_8cs&W%0{L30=>YdATZRdsP}U?+p2#si z0QsS==A%q+xkqmubm4!La_&TX>DeZ=b>GdS^hXBj5q0@Cb)S3f^_sJ|ctIx`0O)rM zI}f-~KN~ySu=L;4?0FMWZw4@7hQ$*3O*HfHrqSev#w7jZO=|d5fhng(NF0)5aI3OfFWxzfhb9;0q@3LO@qj&6#Pa`a5fMFd*jktuYTtc z2c#5BI9y{xg7%;o*n>)R4^6E<_~-fP)$K~_Cs%^nf4G(N-22q4`c4VRxtfk$0-{`f zePb)3i;NRQqZJ#R$bhX(KQ%P{^l5Etzy#*m$kO#?AgYx|bBPZJaJ8_>gn`jEK<@d5 zw}x@Xlhi1~UhuPb(c8TT@7=XDfQd=CZLP$8Gtdfq6g*7>!B+&~a%>o45qB^!_L#%u ztK8GU^4${v^WlAGsprN|juuv1I5G<}9{zv-bd#kiG$}B348X_;Wl}QIN-8p7_>^;V zrDU{%?XAJ~0K{2Ra)bZ*+uvPsp01juAuWtd?1j?~4pN8Yw72JIf4Nv{^7yFgg=)0l{>EJyM~ZmYx}mf_C4q$td~C(wBXkNjwRXQD0iNE1U&R_HF~=K z<^^Hxx!6b}kD1bMJBr4{p?nmsj!OxFahXWWlIx_ETFuvPtUlgeT2V4{e*VhHe}Hqh&*GM^lYv+E1suw(zvC_TKZ4)@1Hk42 zzwWKRLjj#3(^3P*!$>%d5ZCPv^N{oAnro>WauewHy*qBDQ+>I zbe>FJ+&M69G+FS_PlnZQ##-2selg-s>P&1$O@419AD<+jG~y)EYyA)@3m~%!cBtah z9V#EK8UY291O#y2ZSbJIF5ZvC)jq^pL8}}-+^#?E5I(~`xBC8-+iklGLG1mH7tIAY zLA9q8*zN^@+!^4*c}KJby81k6a4lI4^e};|J6ND@b&!j9PjQdS%JA_2)q%BpF_OIm zoc*Su*2&%N2j0N6H_#`>k_$}x@$p3$iUwXDwEn|>gvML(;G1B7#j5M8KmJkG-%-8z zlt=N?^i5Ca1GtP5BZ>CqvHKV58g!T&pCKTv6o}G0O&a90+jJ(|-rxkSxMt|3J)}Jd zQ?%&A-10|rXs_RtgOLJKx$cRV0rWN&OQR`!bvPFdX8p_C!HeZ>9u^kB*sJ#Yp><=h zc+Q@PwvgS<-eN3@oyY1xi&7$D;JFBT3^l28IBRHK5F~B*Bg(6Q4wAW;dtU~4QVeef z32H938PsIHQB&hbh)t8!p-HNWW7=SEYO;i8O8*4?H_6;WV=no`S<>_YqH}EM@8Ro5 zxV5=3ai`&*nMLpWY6;7M&J5lAT2H)Szp-cG7jOE93|ky5E!+t0jFIAc-jYvFl6jv# zNVW`z|N18i_Tsk8@oh;ME)Uc|>m6uY;p$Z&>#ZOO4Lh`9( z8%em7DOM^>xzPx~>xf1RV-*5ReF_;bsIDcm4)hc@?2QcB{j_i9sL%TThM3A_OP?=2 z|M=486882)->lcYb}fIVp=&H*4p@w zsER5&V-ex0f!AK@9(bUU>~PA|8eUl>WQ05zOkdKH^RLy?#_+=-{oEBo#y_~IDqHuI zZW$Pi&I4Qbj~nXqi>Vx6T|wN2$0rtUjurcI^s=t^bI$&9LTV7Hh|CK=q?Z~$kL zb6U6;kH_G0rd{(W=8bWYi&r9Cof36?JZ;NdPb*lhJuZYSF(kr=LPafp*W zZh+{(u(uG*H_Lfl;{RHx^H7-twF4@QK#nw7QWYs>;Uj7uf!w$os4qMf_cbu|@!7=< zp=7HIH;HZzz`#e+E*Nk3S&GSD$aTGInfO+v2l2ReYT^oK{>ICA5p z=e*j>W{(~#t@y~zkMi@2G5;$*znL3mJFYKF_ly}mWF-R{Sz%SiFHSBaKyrNX3qp9=C=?pU43*(u5s>?x03fSG3LPyg$0x%STdpv=z0 z0r{Et>1n5quG5rlJRd>gbQ#&kM8!JDPIYSo4ikvG&-@?5r7yOhG%7r;Mbz0$ALNdW_1bIy= z!A3Y&hR=*UqW|@s?1sOf65T-9+~cL`5%i+;fhN~V8P_5`!4Yp#+Ko0TTXP+?vZZtM zfL*l)0dYnWXPAu1@)NE8=q3s%(~z=~nJ)2B=Dpf{?^KOKn9yO_RFLcF5L0L_ZeY%7 z$u)^sN{-SQHdP8j(TMkATc+wj&t$!qXA;$c{g^Y}AzA&jl5F?>?wozRa&zdg)6lv8 z@a1j4CGX z^ZMT6vJi=n(c+XXbOXSn7V4u_dV}LOVNbXXdLz z;<-0ETlIpt_+pmy^bNPrw_c&cN)~Pd9;H;wo8b`B+4`dO8Q9z>MfZGr$EgA7XiROl zj!MYMk9T)KW9JC2#WKa~?s_JYAaNT(ieiU6$qIhg8A8kg=Ze#T1w|(wx(C?K)JJrk z#ylSS*L&w>q6FrdtOz$r(EzsX9w7>0p3jJ^cV2~<5QS9w8Z`*109LY`Uf_P+B|2Z% zh`Sq`mxZQ(J0`%irXrl3@P0thkU5kimDsmjUw-GWZH$VkjWSoX99eOhQ`vggV_@5L z=g;YLq&Qpa0Res&C^8G9bR6ELel{2oN;$oX3e_}{oxZ~KTsgt7`Qpl_=eJuu(MbJV zmYK#+h)nVg%6p})IB7*Pc`H?7=hLb%Ye%7g`JH6mD1pP_LCsP$b4urJ(neo@Xu~i6 z@_XV6b3StmnHyXVMh<=QH}c%kd^n~6d(BHh4xCc?Jl8Fy&S-Uvz3ETv%SW-M`>w$M z;h*AnJ0B-Omr91+LiRUU75dlKn{Jn@Y>l!5^G51T&F@Xxe%2&O#z02Vl%U|BNOFie zr*+o(+!FWY77qiyT}0Lwm$R_B+>Gs)z3(kAJ??Ai7V2WUX$tE)F#de*Rkp>J+Itsk zZju%(bD(MDq4#ymDNCnC*!$@2NyoKZA5BMPS-R2=Tc)6>+k$=%V4WUW9v`K+cJuR> z7284shQ<@_2M+8kzz!S)ecHTely>jEGQ)x+K}h3rfBj-#jg$683r$pKZT!ITau(FP?vW)*d-(ND5(_y~vZ(DwGl4JyA6pVMC3$aO# z9OkdC7D`-5qvs7}G%vrZXh_+r;tnIv=Cx*)_?4~Tj%?@iCSL9~=a9oF@HJMSz8$pjr5rLIi#5yhmHo50H zM9b|>8)4xVsVK&e?|!^J7nlx+knUSoUYiSuocY*{2lZ46zF}cyI>B?L9NiJ+5|=+x z(A(@j*cV$p`DOVPOS=r>r8rb)zjW%#xh;D)T3;MI9`q{emcz-3^|N0Pe095Du9ttm zJ*Xi(NWEI(cX7q0*{hxfjejFn8xCYGUt6*@{;p-I0oT6XxX6Ce|JYO9tdWu3gq(^s zW3MN9hm@0+P1TCc?vnsj2QxNLDZZ0(K#Z;1%~LIfl-O2VnLiioY$?RXs+9y10^FKq z4cP5tT1@D0WHo4)gD4jE9ouY(igC@BDGnR!ZO7B^2UI`^SptBLLJ?U2W63QVf?GEb z!xC&g>>h}F_v?P|vL=Pg*gZ^|IyV;WEl5F|bdZe(t*3)6YhinX!rlo zKJDRdBX}TROpj}=F(H%f&A}**y=$6K@ys%8tR>t3J%z920}&&yHpI2fAXKFf$~iw5aAAPv!5 zCMviYBC7<@8pR7CF-HLIimHn@asqO^Il7>V9_==j!W3}g=#*hoDmu_;0+qtT`s&PX zSaEbS2A_)xNDwezkI=%w@aD4%<+*-HFtNH2$0e5!SFpJY00>xs1JGFU1-Zc>MWSQt z6xPD%=1#EXCg{}-rrW5}eX(9zhP57dFS#L*W4qho7B?H@*eFM>F)YqQ9;Dzw^g$a8 zS$Kcz7&2;hpH*<*LMt-YAuQe8`N_RP1Q(uTr`#jvAcx;acIU>Q3f6+ChDskstcF!@ zn2j`23J*v5reE^vT@6c`Z`4q(z=B82mtTM?9(#k51=IXL*aaz2zBtUJfiDjWrh>bM!Q z3EIpU2VQ5HuFcF3mlDFbfyW{Cu&y~vsJdQu>uEZ{3{w0O4YI-&jL9fV=g{QoyU9Vi zwZ464eERNoDLT6pXGpEzE*WS=h$-K(`4+B zZ`js74=to9hyrrd3ae_+QKy_pQaTASj!1ZJE$AY^C>w(u5(eEhSl4O=O6gi7U?74E z5Ma>X@ka)QC4+Bw;bKt%P>#^lO80Ke!YN1Rg!(W(80Sqs0s%)f+6mWm??=9z5zlCa@`BC3n$SRdNfiV zE9T%}0}@Hf{k=hj0)(as$_)v*M`H8Ssuir>8c2Kouuu^jUKTRNU3u$48tS^DaupvW zbM9E4i%F7z>~L`H#N(8&zom33%s2zQhl;Ki#&!8Tn^8tj17;DR+`>}2KAF!@14VBJ zJl>QZd@)e+`>^M_g1zTJt_~YEteW)|csqhnvwZjw+RHoz~G*=*UV(l9eJkUE4aW zqm@bsiz0*}$?3kgPC^($++-z$u$=Drvio=a{(;sWpKG7%^Lf9Y@8_!o1-tUhKsrVs^|l#OjN%Acu7QB))=5dQN>AD(E23+?0HY}Ew(>}mELZBD`m;}p;V=fAWE zGQ`Oyy+wQ^LI#C*!m?C^pR~*jPQp~#1uD=fBO+Z5GE4>TH7s)l#6c>A|8kJzP`>IU zyx&w7QI6`{@Zue?&NI;@$n&5au*ac1pi3;^UPyuw+DE9z6i!~a|JU;;lMkQh;X*A# zhWnjI;Y3QIT>=gEG%CuAle#H`I;BU>T}u|08s4cj?+ku-{mq7n8gXB_0Ay&yYY!gu zok6(3&fXaWM=cH*M=q5M7i(a1tLUn8**`Z$96CCdTfWh!Qdc2jo0Nr@RD^3Fav$VH zxAb{1!DS(I`XI-yip%u!lA6Qze9YE5h6MF^%7yu3j8OlYl zi#BowL}>1k<)C@1&@4fek$x&bfi6&0xH>@o4(0l=F@ug`N_e&o9<1cDKp*k4j*88= zvecwg9&|+YcJmdJzU{13G)1KkLlX0cVR_{`IbhTmeZ6~?DLQJaVW?tk`C5YnBT53= zQ{kB}^mvo1`&&`8eB7@U>3HNEt()y`5vV6is2LisiEIjCKzQ6VhBA!?j(yn#7|#gd z<#EQ9rLprSO+-ujP{+Cewnh|$L;E?vxl#IEw4?DQm~}ZqTNx%s zoQ^i>+k<1G-0-(+4y*&vi`UNYtZTd6o4wlOt}Nv$H5Ow5=Oi}X~w5;1oM>1@K_Nsy!& zK=80!3p=P?W?(674^F*D?v%{8p&;6;GT@-)4Q~L4s+xaE2eD&w`xc6IH;avdH zaThvDV%VxC7zrN;D!{gDwO^~q*+=Aknj>rw2beV5na~#omf=2%&JSwEACrAjtM~`{ z)WnK+YwLsdv!LV#2+akI)x^k;IztIaN4dyv=JnG=@tLQ8fr4yD2gr+31)2aZD$&B$ zwTH_30=rO7M*JXu#CkAj9S;-YgFhMs92EolYWEI}ZG>`RAuO3eqG@J)?=K*t7K(Qyncf)%p zHT(bvlXxnf1Oz;YKDH26fC~+M5SFUGJ$>+M3qg7sLJk>m^?ZD)UpOJ{A&5AI$ML}E zn%!%kLZ&$P4FJOT?#s8o+07$9T^H%{LoW474ocR`5%z~Wimwb8AMjk{ zHw~BrLS2~<%NtDT6&_ptP&*|+TB=jf0SGKl6sZQ$^D$op61#=p#Fzk3)q^F#b%%_7LGnws5;eNrmNFx5X^f*!CLNPx?9F z)Ct<9fvCO7Y;7Qws1z79yw>ld9Mun?OHbvvCdY=eOR9 z6p7HXRcu-E?eQu7^JnzSoyjc}UgK0<;AP02;ix|Xe0;syg{!`vq5i@308SNaj>(e$ z3jj=xA!YBDJcY>or73p6c3Y8s&ijf4q)z5$q5zrHRA$Hb!Aj|T1P({xww)--y6+Ns zX-eT>7oDg*5t3cY^zB575VaID2Y%!1=sr_n)6h-U@p#X$%?~>CY*5(hR_6qn%=fE( zjQ--}QU8Lk!8Q6mNff(Z#J#7JFS$nVpB~X`kJ|eFTH4xjLNlG6(=C{r3aK`*@sUXB zY5h;}`eyzKA6PCh;R^5f|0sln#huSKJiHiTtX>X*pRa7Lv$&MIzjv%*$~p+6Z^~!g zm4oLi;3BBtP4~VFNRp9df}F<*LXn~g<-fLY$+bAW(4{#Gnf<9avVrILxO%bXC57tA zqPKx_FUdah_~{$FDR>g9C?|c|uf@dVR^RdmZ|~8$Ve1kKA1+}dUJh6%Fyr2jUY^%} zy!*)EyEf`@{Eue0SnYgg#K{){4FOKI_z{f8tfb4`Xph~h(TON!w}kpWxo%@%wfIBZcCG!Vvc6=mFUi! zCm;;LgdJv_I(u$#pPoOR4Q>dL{S*2Zcmrzh@2s>eB%&6ukPCgnxORCRurtnk9+X&% zgZl@N*)SHl<$#u$LORXUJuWfqsu+9x z5i8=0$K0^yxhd=+@{)0uSjYDy#fU&P$TLb>XKLD(qQj5qK)MeKb(&(6d|x;5?jDJ7 zh|QaPZLl{Jvv1$N%q(S=QHCNhH@(H^O`Cla+*o%ThX#qAj;w!>WPj#?JThbN3`nI3 zfc$|!a5!8&W^pifNtaxNLis>SgS>0(aw{fZJ#6Ddv&Pa326>@&HS;&jAFf}|3v~>e zp@do0i~>l;agCYsnf9C$uauyPK90^t6>@0DwZR^6Unp(~`QoGWer~Tp@3ffG`ac2k z?ZHcefj;#c>8C#Ki*!O%}u^<+Ywa1h$HWWz8Z~-)D&&6?nl*3G*gZxk&1a zs?uX;s;S*qqo92?uG^$jqm05)ey|mUYAi$B@4ozHzY(`LabM(id120x;w*SB#6Wf) zlXhhdfW44NVK*pAe-5BTNPxA`d&)D+H_BBP06@`#jwvr_BYa6P2lN`2ID@*?k}~(U zcHDm*H$anj2f}QqRae*u>Rs-z{P$X6EskL;xrKY)R3OuLwlTHu8*uDMt1s5@ECW+2 z{Y(04i-SKUwMr=EgG`LuJfyAi9f9{kco@CuL)EftN|%^4e5B`Vz2#N6EN2aE7XlzI zX>PC1wJ**8{NbW9!*v?Q&bRbNL7+ZYb}xgDUD1`kmG^7ykoG*GfHr0Hur=7IR*gvx zp3<6S_DHQgc0f-Cx4N^P#$7~JSabfZOdK+@A5s1Bd zXoebpt^V@Grp7((tna9jkgKS+FO`eU(W9IBH!ZLX^--&RnOtSE zPDBlZVd89U2&I%eu$0(9$yDdbzEFq4w7T$F5FZWFGx(kC$F^_pwOMUuDW-Jb4YI9lGyprm!fDkF~j)%e5D;EcLDN`l-&fxFG zneYX)H{#@Y&c6B)B+7TRB5nud0eSG~chuzkkdea*Ul%1JSvSda{-k{|Um=T;( z+`4A;PSxa?cnHs$q=L9ayy$P>*ziA9<<2$%kx3Or#usrN*<>FWBWfqU*)T!` zDin`(bJ0T{+VM~t!H;2$Z(s(~iF#NUh_ke2+}1VroWxFIY*>>cI-_No`gB5MG3+^L zV(trUN+-DS`t{n_>B>$L5Q3 zvKxEffp6SuE|1Rnd|m%!vTvQCxKN@(w(U4j;q8w&qIX9-Xvv6Jek#FpVF2VxDwBrF z&F*Zgfyj$2)_imiisW9zVA zfC)UpF)RSkPO)&>qKsn-7vkwt>i47`BChyVaGr3*aoGj zl0$U>g(WV1By*=Kit{~e7t-3yohNHO_FNt%Zz&0|ah-Gfyr;5OjPdMVLCkx%QrG#) zCX?h#&%JnT!o81A$Qcy5ahDoxqB^L>RZzaRE^xl;Kyq%07L>7Ac!M>#n|M!^P*(w9 zJT!==lgsNk{a;M);mI5^20%7iA3U z14=)7_mMw2$9W)WbiTy?H}uvoD)aINMfKtTt|6fl*It*$fQjAz3%$VK!mWRN zC(l4CgcETMW!&PMT0x~y-(V6It@-BEyn8!CL%X+fIv=U$bgjw!lW2Yyk3SdfDme-2RF#GkDdHlXos3` zF}yACsVk_32S0$T%Ug0sFsKxc-B=Z(M`_`s0J|(Vj#CR*z)amlgO|e9;!eYk#{%M{ zZodB8pP{IZJ@{aV8sG2H-4ZYu6fkpW@I_DnWF0s*VA7KI`J?5h_CyWd>PvZB@FURt zTFh*dUQx(i$~9;etYj9j5@5UC$a8Mz&|%=7G=ATfGzD??e)k3iCWnP@0x+#m>nIGg zYua3k2CUWno(3W#6Q${_@cExr`r~2r+Gxy)emy3e;e&kqv7xG?pX!Woq$%~B`=MNg z-fuQt1Dpu%x1uQt*Bi0RTaY*iF(`EJ19urf_=J87nc+XP$?uEJQ5`JL1X1R9wrLZD z;09S01vA<>cEK!f-)FvQWi@)0es5w6z76aAicS6bAe2d(+F*1le&y_))r_vkM$g0n zW8$R~VitiBXwbp7oMu;~eGW)j|5J72b)x+>YuYf*&_P-I`{_#Vs#QJDKgtvon%TeL zvSi{B+G(^lF%C*K8j1)}fX)%# zr*CYOLw??^uiU;vz99Nsut14!Ynk!>9+ zTK=SdltxcqwPCP<<_5Ff3C<@GR&9H4LHE$X)Z_SUFKPcbNMz(0S=d;6vnVucWb~F` z7)t|p9t0f**(D7YS^UtZetcWc&Hb~r_j~}bm39AJ+ww0g;vhsYupo}HbTrX5zBvVg zJ-Fb0yKh2blaTCx1ThV0X$qw&lZ|1pR)@BD?>La;*Ok`0dI{!O(s@e*OiNxFur2{^ zO>jJVf+|~{F0E+K+}~AwciqyNpwo}vSNG^M3AZ+x^fJ&sWVz*8P~| zmdA5f8~1mG+>@0PG7kRq1#Mj;vr<^-JeFPQGp{a@_49r!1?DDi_*N4!)ALAn3Xl<; zL_;CU4=V2tft&I}n+ikQdX}ku&9%6C!teb%nnWJ>S;w3P7sh~{4@VaTiQX?m0uWg( zBqu;_`ym7lw)+M7KMk{+#WcCqWuUW`9C>xbq3-x!2^p7bzTe`Bmryp z1G+?!W1HA-P+Ymw^ls9ds!y}`z97GaZu~}YIdjaRW=dQ=k`sI#7lL8Cy^zQ())TZ` z7Xshv$DFk^{jk+=TO#TRO0Ky3y&=k1$Rh)#A>g(@>z#v1#(9a|QUpP^~*vBQtg!P~eHwpwd@ zpEyMg5XT|X2%FReVI((~pBcTiT>(xZP=^Y;(TEAo@ax=)gO3%5zd*!M04vJ|c}BUl zZ;?F;D>d7R`pMuMpbIw>P1r^%wr+yR)*qfJRJaOV-`fF}ObL0F$TKAq(FK{R!MGCu z@UwBCMtiGFANC}h>v?m6o3f}G*AG2yHVXrtKULL7VPa|@sgD;Jp)VH2C)A6L|Bxn9|lCR5Ga<80yOVo z-TOw;ahp8C1ddnd*F7pu{hXO*31y5yPbl!ednHy4TGlOLxVLwTltjx2mfD4Yan#|`o_@b;|@ z@@06N{=4a3Qg6nx9~r0Ss+`Lm`3QSKZ}vYR+hQzIZJnFVZGShis;ifh(PN*`Vfd|Y z1y5-h#g?uS*8fn{$G~8x=67TiNWaE*RQFpli$SiEq3-v+ z7Nj)<`t8b!>%KoobX6pJLw%qs-6i^x;W|-Z&aJG6ehZ_IJ-PT1k=4$;v*dsIDAp(b z+CERUN9FB-xt;TpPtGcgXXcaAs>sc!mm)o!(Q$(32i}2^9qdabw|N;juKstOqM>zI zwl0sod!YU?Z;WZtYq!IrOfkv~+ZK`xS zOXN7y?~yRz8MTEz3K)%xyvIde{vvGIVhoq1I531P0>Bc;Do4E70qyFK#FVhWZ;%xO z_Iiq}JTHRX3R|_vkvE`66xnnEi@{2l`{sZl9>4(|$Qz+4rQUzk+uvxvudyi^cHT^kO=Caf;OTwhzh|9UMeD4C1+twLF`RL zSi~=}9tF|AZ|eDotoTYAAEZXHUwxmC_dzh=1l^7ci*~HuwXnU7y%V}ICAtD*J3pLhE68Dty=|E%7B(iF#P^L<#x-(wfv z!fYsbGlC_B-fgNjkl-8aqNknVA*E$4j&V|x6A#{UF(&RSY3==!aJIc3GgzuoC@FS* z&c$77r;iFy3Qs4QM~zei&WRWB zCy#|(eY_5Fe#H|Th+>Rbb+u4`ta!;K*EP6Zwj(_dNMU5(KN)_?$i>Vzb9$sX?s)!$ z|F$2WUJO(&JN(zq!~dJpcPX{Xec1UgJMPXb)h)DEYi1WK4vpULwzf>FClW8NH(r#| zC({ewLv>m|K6>J76Qw=quCB#?gH*rb5BHE>PX$xKdp=DZEHd6$7EU!WQ?2(o7ku=9 z&nz;xRv&k>LWh|(!+&ri8jm%yRIt!{5m)&LVp1auZJ47`Ksp2sAA(6w7D%EgSV$e$ z5vZKHnkz&a#+r1O;miOX2n|SXpE^lqeSpwclIX*ypNSORSJhJ|UCuv0zfeezi;uiv z@b%f;B3z8W9&0ImYlYdMK@ibB^OkA$`I2XsoiE(+LAvzZADKSx`LIJ+VVYI#h%pVN zR$cJ+%D+mgbSv35Sz%n(Wi6q1$!sob{l1JJ^_}ThINid_w|?ESEB|sPHGIK|9x3bY z_!+$LE?-0!65Wqf`Iz`wi;134JAN^>^z3dfJ?-LdM!bqHCfEDe)Vs%|U&N3T8r61q zo1B|Mtd9l)kQELxk#3%XE|z)rDG=sUl1cDCX$Z6HVr*Lh2%1pANt#z3YsWgmrBGNLGctX7#{S@Iql25?`94V5OfEGF z(|x-3s{UPE+0|$yn5jLQvNxfhWa7+Pgw)}^09M3Br`wQiJ;!Cp-&yqUxX1_Ww{6){ z2Rd6v(#*H#`*x+(CkvFv%$E`z6r#n)@Y~IjQ%d>zkW4}u1@%kqivoBcc_;hR2^5KtnTf{TgZ}xedIO^NB@L(oYw((u!@41_Mdvs0u z^ha{f!rs|NmV4`@B=GLI=KZ(4i+;${b*;8|U00bcGN4=O(i^Zt)M1Xjj&M9Lz4`th>$n}lkdg=PU8dm^SO8&kOWs= zNB@KrqDqXVrA249zFxoZ?Ujh++1HCM|6aK0zo}{@oh8SW$xt>b1#&_o$>P}xBwI~< zPlBfFSoUUbn6`co^mcFg{mD{0UK?0(>D>KUWX4(Hp05#@>;K@u#eH~=nWvtE_cfvK z-L1i$JB|K2^bTQMJ2vEDxvwUBy1$_hct+3TBYxsD0`(G63?mQ;PqkA#xVjk3o`~P! z>oJA!m^p%t^__Mi7qCROT?IOE{$i3#ZdH^G6U_{O7GE}`$hbwx;6b_fqo}$R#YJb071{P_W^DZ!!mbl{W` zQ@Qe081~m3gg@vfw)v$(=ocAc*j>tV2m_db>i`LbWgc%>aMf6yXat|WF=^Fd&!;GP zBS_ja=8>>(LIP7VVAZOIU=ytFaS2lQ7*C62(Ar6N6N3E=1$V%X3*3Yg*?nfrXXwH8 zx7M9oC7tNJV*kFg=y!FxGKwOk$_h#K@{o_B|0Pdmtv&Lh>y&TpwiMq34O`xDj}Bey zxxam8?bfj!mIoimQoTDS3AqU|78z!%57g^6KIDBawYJFRE_-E^D1U^F0mO$h6ev_^ zGhrQov`pYDI_z^EsccW1ID-^uKIjzKE(vi+$FOowVHuPrtfClEh!7}t5=BGK1f|-0R*UlDjy$&juq}ZalankL;*dtZ`GWuMzwTDZSBcD z5z(@RaiwoM^Y+ey(|JgJIF8VuM|SyQxj=jhmYyCpM}7`fo=U6FZ+%4@4EfadZgffU zAoA#6w><)X=f&1Nu~!MR#FpQr*Mm3RTfTS)gJ*GFhr1SO{_@sA&UsU1zIjN;;S&p1_vQPRoYYSx+8MvJkYd~^`Z z-w`xI6K)zgaR$Id&FV99bP{zXAhkqONgM;HJ0TU&Eg;1AyrT7T%eUqU36U}c^|-`> z!NbeXb}V_Z){#oNZ+N9=K(hd!pYMFEb?mZ=xr7*-{AEvg^yfuYGE8e?5Zc|&qhpPdc+s6V4)u{e%SrbHw5}J|k zn&a|uSC!`E{7EV0`^w$lS8v=hoM!Qu{bR=Z-x1cUD5t9k(lpCeKlDs2rRv%-Yhz0- z>A|4&&(_bx%WPVcuKBS^N+2ET!90rk2y*WvMQsQo+!{OiM!~ZSa91S#lK*;e&q!r$ z)-!2ZDzk$v1>3G^ zmXCax?w&M_|1^I~^;4xXhpVD3_pC0bnb|9?c{hK}@^p2&*BKNBX^qb7o;^k{cMT{} z>*b}rcW*bi{4T+!%MKx>2-vZNb^~sEppP194;tII`>tc5ACr;m==f@a&U-ZuSBorC zQ18-_CWE9b7`FR=#hQb0JM9Qu50(M5b}mdAQA?-7i)pCTqjyuMWG6ns5?nfB=JZ0f zXpQ1^>O9AFos^#y32s&)N&yO^-Aq1beqPV~>^Za7g_H7zKC`>ElJ&KNU9_9l@b`2Z z2CwF?{kMlX?LKKmj`O47a|W};v*)gl@XT*XfBq#j|I3>8tHHcK&0{Fdp~TDv z1$~o}*Bp6!?cV=v9_;+h+!ZB5@-q8>WPg(Kwdh7bH$@>}@^R$g8z~|T?6$RUpMAsE zg@1smPlyinnHd>5U4hZ?N!=%ru6)8T0Q7|v@pS4hHTY|IzdM~8k%m{T#h|A4H#&ps z1l%O`z_(8Zu5bv;q`|isQ{LqH4r^)xw6GfI+S@qM-&p8y50P^l#f4-B5SCzTK2{+# zGcbRYC7htT?&VGL@AVCHX@45sjh>@|-?7D1Y^S0pI zdPOBLAk2PSjI$)jIR~>&tNxe=B>Ab4!FBc<6eM`y7$-b?CZB1OMmhP~VMC@K&yBM3 zzG;M#WE9prWd|lG^lYp#KI(O&r%`aa-#R`f_=G`BJ9?sS4Mnj@g)VL?aMrPBcFl~f z11L7u=FcYM?zLOcisG$2IWHfWPv+H*oLh9K!L1{W*1;Uk;pf2S>@Y72##sD^2j9QM*8VCC>TC~M>lLl- zQk`K1zjV{TCWwuRZ?xQGT>E*|(B$iud*)-5&>`#4fp(9-L7KvvHrnfPU1%;~>mVE; z-n+1H5;yv^Z1Y`|YsTC2&0{~XxpdHQpA6Y3(>=*rl>sSEeqZF>oz&nFBCRHO___{mMAEOkom6uE#GHL@czgVVDY}1(@(vbTeS`ozojk zfBal#K%dmDS7S?MWb&lxxYDwguN%>hnF{%$9;(tz-Oh^S6V}Nh06s}+TKc1OTT8q# zH)qwooSWy|8h@;Kn6Z7EO!Z#D+*nL4{GM&5@9sN85w1wE^AH2>p=&+!a*OirJFu%aQ1r*7Q!8{&>pqI%zjf#1S?$zg?{T+|>8`_L=qzMX_SY^$ zOcS33O9LOPNg9A`1Gl0cenu!M3~P$w5y4e~u~A~2>6rHrmPudYA=A~$biKRGXD5-r z0K+LYF6$#P3BcvpY`uR3rJ++{{fV`xJmfN0_!QyyPh%?fW~b~BZ0Ny z1Ro90kIks9ZP=Tv&g;^IFcoQZtlc~mZ}?o{>;@GiNnD)5P#T3^b)Ug6^?nqryakMp zwVTz_;b15H>}z1S_0UNAi`J2gTBj9!=J#M!hpuJX=l3Stf(&;*%G!NF?=Is_JKu1n zPNJ7*lJEXkRygZbTZ$W=eSr7aodsi70h>Jb6np%<)cMocqUn*xaUms8cB;49`oE)1 zR*?1CW^wo(=jhWjWoLeQ^D!|%)hjwuYGSxxbSC}@DT9Udb3yyD&=Fs>wRAFX(qO;R zlxIx=tSM1S)7UipN~J);gpcuQ!Pe8uZ}V3J>cfr-*JHJ*6&(`T%4-9F1}xWnP3$Q{q(-gR z1aAA8fB9K^eVn=ZLQmQvGc#ss?_SWXMQGZF?1-LhsM}~vUY^kY`%*zu-`f41samrQ z_aApg;*Q<5)@25rSHq7)Rx|m4= z%U?bJfH^g(?$SRr0G`%;x@hm_UawNqj)SIyw;K#|8@sJTUTtZ-Y!SBpoBQ8?`u;WE zaJyyKkZ{jE;ja5DJq|v?73D`pM_<|W>&pG0f^PHmC*Q2=wOz%tL)st=v__y#;BP%6 zHhEgnY6t4((UB_wOgEp(Swu;P`qSu`H!=gvaosB?_s8VvcC+%10b~ptv$G%Z3($3! zZ8SJgAwgHCGkf17xJ(7AQSmP7CTb`Bq2Fs%lnk8#U;V>UM#i1!v=l09p*h_lpOa9M8^GLECZ)7QL~ z>Bh5esIwv>finPR?0Y)k2~c@yC*uK(fe?QSirL2pJlC(L@DbP2Q?fLVD}&Ku4&a=@ z&06@9&jFt$j(_&~{ZM$LA=Sd^*P=zG+q3^?5-$8dh)R=fO--^MY<$`mK=u22nxC3Zey*4=LFpL^7?Bv2S@$z4=0c`r3V9 zMLQleVMIykAV+*I<7Q}%j0M(4Kv`PuriTlj?Tf9M@_ERlvymvQ3C-jHpj>@UWG;!; z0ibde!jcbjW$dz>dbH`SWw)OD7QcSh{Hps?%H>UOZZ#~4-jLGs=z;8F1!mjRa|P#f z7hT?&?S7Eiy zB%#X2Jwp(07*0Qjz${czg0SAB8aB~toEJni=Bt%NdtMj30|nECIDN?@1uD}EZd&1e zR7c!9S=2jvzAVuLn+5QFKL{W*3>VAuyBt zd~(F@Y1@E(cHaK1ySarYjp{s4UpGp2KX2jjk6Yi1l|+y0@6zvhT&4V*TsW07P{*3^ z_oEbzCWhs9HWy;+H~sx_666w^w;bB~$!lzaW6>($=rG&lClRJinnPSB&%;= z70oS}mj#O^y@Yl8wZ1J0K(K5t(xgcg;c+s!gKL%DairEGA16?NI&}5kCJGIJ__3=U zGs(xbO?r7c^=*i7vVW|RDXpH%sq^hT=U8}i(|P+lJ|&z@M9*Cl!*36dGUK8)K8yN# zKrn22iMAUh^S8mEW(w=%*7nfB)uq--c`!X;-gv|+ ztb{Sf!8;ZWnr4p*F#3hTh$?et1|TLlweq&Sa34R2AiA5BgV^|-hqZQzqjRGkm$oyp zu9rvC23-2aV?&60o_`sk!Yme$;NwnFuEMpU!Mb?3R)A_&oU3rJ1`!f6Gvp`=C_Q&o z-b7@o7|3|48Un~ZQ&FCo2?C66!*Ai^3`!HAG%JIm>e?Rf@gV=W-*xT$ZLRYKn#bt* zn=41%N9b{1+Xp^6OId-*g#T2}65*0q#KHinS7ixv^`n(FW_mX>K3W?=%75ZPygf=} z5Z1RF&6G+dZFdkww;{Cn3k8=XU|19=kgghFTL24T!WB|S?i1rOCJC4Nh8Pv{@^Bi5 zK&KDppVtV()FK0~8HixX6rxM+hLR+;?|>j97?=TCa>R&HRi!E8sKFg63yFdgo+X`p zthz>NBnPgiqp{%!*4&?2Y;i2}M$WT_HAP=8`B1d$qQS(VgJ};24q>@2$WUCF z6Jkzi|1B*q`ztQ&J+eVsRwm}O^EmXJCTyujY{{=)-b5j#JWnU@XNKsoDdM2{NrGk0 zbB`G{$Qkc93YY<~mN4lBEI;72pM@AK48HEkLf`=iU!pofbzosONFV^A0xArnBDR5R zm0e8$A|)KKmxa318-;;Q2xME-3v)wyKx}t~*?szrAGDM1%^%&CZAZL3q@BN!P9NC* zHV9)|F+d8%Pn%Dbtoc2d*3%SN+4v$Yx7az+B0s^_*gXOFcs#D9JEQ}6?#w!kI9JW0 zOyUXzK?|)NsFEBtihdu!)}^xve%eTfz9JUr!$&gcLX4$Uf%I(eOM9j+T-4P68aF9i z=yQf_IWC@-Lg4a1KAJYe!dAfNZi>wLgU#)FlM5Y~5xP6bV*w~=u;hmwNR&PREi3LV z1nJaf$)JU^02?2#40vC3h>9zy@e2>?DV~}LPK9~Fw4#e6KEt6)-rwJy6x2~yXr-Fn z>-nkAvr(0nyJU8i=UeWL9A={c#*|>NR>qC;bV>b0YcOj?2?+^&%axKJSr2uEVQjPriB1n?Nt<)KvG`iC& z!YyHz`%O=330xBnc=GBhbKxfET~*LWm5u)}L+uRA>maC_aaRIsK+;TIUj(t9ojSDX&hw@zNqKe~L1?kI41L@tsQ z_3dseWSot9{hoJz=S1JI$1$D|5wEaLjNFZIk^$dp-+`z;7Cae_wa?pv36lsHTFVW3hH=HU#b{Y!l)j}QFnKXqQldo-+R!DJ;NW771#j02wW zLv#GsQg^qO8PK9Y8?&s?xd`I*TaKN2ViREPPiFHQ$4*m4o&O$d zXeMC2iyF$U%YHu0bX@`5j|1`o`HTgE6&NQun7E&Gji$_W@lr{NU9uLVC{Ufwl%$Mp zp_Yw{uAOF9MKL|}D_g~#;V;l|^bXaj8#3Iywj5ysU2jyqbaB{#MDifuBUMRx<$Dxh z0RRQpy*lsGCS7b+#HwoQARPiM?kq%#UBNWGp>=G3^v4BQKd$RWiKs%(;yG65F_aoOhQmwtaGrJN|sDc1yjwjdhkZBao=9j_4%!anM9rRpvAL z>BLsiwGVQRZnsQl6}N|UURveY^&FF&(@xW!t6j*0?$~Zu>IADGu^kcYUHEp*`93*v zW@f-@XKRIF=nmYxnS4Xkuf-=k_jx~~>+j(wKDnw5IA@MLIwUy#zq554E?!6_tt+Y6 zD0$fRnj3cO-LdKXxc9Wz_9>eyGqqc`%x_A`8Oq|%xf(J7c+##d7{F+90Rv7aXY2V6 z_7}Iv@00QNGB9Mh!D6;G93MsFp?5NI?P0i79(J1?t<4R=!6rsa89tPcGz1X)dE5ND z@ZJCd1Q6Z=^eQF_sR5Ja*i^M>8=tU=xhaSVE|Mc0r3f9$vN&qul~-vg(;=Mf9dV+f z8pDckY)obQ4jUOl%)}4NcW(buysKwteh%?tc4fwv&7m!uxB2W^C&lEy-8^BxSqs>nHju0vLar&asIO<^=n7U1L#r58$kolQ#Jd$2!?aoeP5=Y-O2^q^!q{LiCP<=9oynqnFLL{T^ z7!CS>9Dj)40DIIzcp$l}iX?P3x(~N2tafC9NC0`Xk8M;&$@liMQX%7bNVXbV@niKK zjjnr9O}PNKheEhuQYxaCZqw*~F0S#tjE!tXUoO$P(S;9E11N!Lp&YS9ZTNB|B!RoW zcN3G2Nl*ROEG%GtN2S|_0N0CZ4O+)!zNIWZgiv@A6d+A8_p$Mmgy`(HE|r9Dw&-@ z`*iwG=;SXb*u%tMmig?~h`nWq0M&sw1xBDkPK2kexeW9kJ+%5=!s@EJ=m*9*_`|_` zRJ07e?Ib!?1ykf;>wIp79%#(Mb@28M@`>mxT{TIv-RoqUy}0=ZF{-M>FP)rK1J;byd>$y<5V*ws!cnFN2Q_>V1x-4MtD*9Z=xgqmzvo4`(O(_wfGM8o(?*$jH$C|OJ~T_(;ZTs za(7?%;L?yT>?S@sfOqgw6-vOu?iS!%FCfUQ_17NO=|CZ?_tm{SacJ9|@#o;G5Y6!z z89Ii7UZX;XT3gQ6@eyRT=K$* z936hpTXDt`>InFMuoj)>gpOxoi?nKN8O3E0(+I_5Rz#-PeNT8C#!TE7BKdyETzZ`R z0oD8uDn@YG;3s-D9SivZ@cj55MSU|*))9#}-*(09Hm*s5JH6z@*;BjEtv}%qd6gqO zaiALc=@Exw)?Vd7dnRpdjJ;5mbD?yB`L+7mKN_7YOu_*<;eZ-- z#SA{#WmSdgneo5{0+f)B&DFO;Vj+CT@uvcB=-nih_#eqZ3&;*s90nAsbahH)aeds7o?k_R6#+ z-|2XCW4Z1)YP$xlZNA#D_jdoj$cF6{v=tMGv;i1f2->PE$N#yqWcJE4qw{%}HdOJk zyQ(nhysJJc*@0?Air?A$56)i)hYW9#jMv5fReZ7`Ba`c~G|IyQS(0*rNeKLa|66nI zuFmy;1f*Hkb)%~5=86;&-!kDVMXOKEy?|S}4O_#YQW|#Qq~c{{3tG zJo==J(9R;rSR>*#9dR3>L$l|SycEq``AkFptRjuc&716Td-v$TQRYQ-v^y*f1d#tn z(Rqd?**0L9y)oRlagWq+<0{-+anA}zZVgLK%L)~6X1H?qVve-bNX=1JI5V@tk-I`$ zR#vvH-h4m49~}JS2M5Q^^IXq$ou?QXcq5%ig#aXIfODrV_LVze$qDCQUG!j&XuEf0 z)9yGJb~sQ55AmVR1?nY28X^U0F@3X`k2pu)MVw z*e<%smaH^$FUhV>lL&Svn>pi{fbc8=71XIL}5+|2xf}MT&KEz7OM<6U+Z__eoQrRf z=pd0o5JP|{r#$)^au~P!u6+*2d%tFcM;!sjhHXuRLVwBs2)kWxj`}DN+2%g}z{PIy zv0KFDMeal$ACjD#!Pm$nTo(4 zM6jp`4rjHU3XvBkL}bJTz)-^#;n|7G

M^z&^IFK|&A6>)kSX1xvQUUP6h=kPn8 zI5_($e_Ejq16@%{p$$qdL;f*Ew9-**>cU*u!?@c?>4Y)c4p`#T zg=jMD=--DadW**zgvie`VN?I;-jZW3CvB}g;t!uK)OtMc9ahtgy~V4kf4j6R*mH4x zC~;$GcZ)sI%R|=jpM2FIAH$FmgH@_matcOO(hObD%xfL)~8!__PT zVk&dy=EKz%BHTz29YD1+y(4#piyAtJ803k35@7e&pnEvwHlBdNu+!&XJ$?3cU?9r@ zN`+{L4L0#bTDaHKyAWfOuD18{1sy>&5q5wG zv*2^g=(mnrza??tX%sl}%-fe$i@I*r%wM;*th7HJr?Vjk9(~5f%TEmP-)#f-%)B2m z`VU+A7kh_tn?ixqYRR-2T?c%_O`Bv&^y0P+^q0x6nhBGI_2^v_74- zhXaSgrnjkkq(k4NdcFzlyuR!|m?eNn^5FecbO{|%K>PJaZ_I9JLG3Cmdiz%t^>YMu zERqJo)`H9Q7t0^Pl&`^27gM&2kZ;^$54~GV-YcK4Ez*~!&8}Fl`GTDf?Y&F?x-CF2 za?$FKM4oSBH;M1<-y2anS7d`%_V7Bl;TNwa+o{fD(`0jH6-OMdIC4ke3h=pg9 z$_l7^r(5P|*meMF@F1dj8}3VhM(l+uks(ZKY!L>>fn<-QeNZ6zCC;QzImZ5SmfstfoL%N7Yw)6 z=Od`ubnw4m4zO5kNr0)S$B!%Rp5*b0^_(+X%PjMCbVQ8{ zeMU=K>W`tYZLP9)#RlCgg?qbrwvFeEmz-K46@6{hH>~)j7{o~66p=t{G zhB$_?bLXZb{=BYDNEIC~_s&>nN(IEWCkh*@NQ z)8h0k?PLMWbn4CD$oTphq*exxsX|G1F3557H}>Xg68@>}j?(@Y_eS}CAbb0Hdo`{a zS0$~)X;xWFjj$@LMXoo3KzK6YF5v5GEbm{dOa7|Dk5^HL{>pyi=R?T8ZyU95jApE9 znrJH-sxuQg)>L7eva6jKUhj%Yj+qXhkWI1=@3=loDC)2{GX^Q|V-2;)gcW8**Jgh9 zDZ5X5ge)d%UzXT%pF)FOTS+l ze1#k0-G9ZlD<&tAcdtmXRe?@ZI{=^#NxQ{cknL0zMufIVMC!V;@D5V#6ADzDWom&R zrBQ7x{;Q+txUt{b4~MG}#VL;0#uUP_$HfoyJ6ZF8!^`g(q%2Ox^wvxqQ@`;UcKGz4 z??1ne-&x*usPeF1ekovRi$|yBDVJ1Bd+kq&$OUQyQGsO&n@ic|mI3EHEj{`ON8lM; z^}0B}!SR85rGv&2PAO;ReO;yf$E`MJzY5C}=o*NH2iAMMHvQRsXw``U1EqLvVS1ju z`a%g8__s}ia-a)KR4J@5P04HUoZc|JLbume`7G~`nU>bh&XG3eL;Na6M4!lt%62eW z#=VW&DN<0-K|lQV=J>t<`1vQD$r1ms4?;myOCWc`Ls6AJ`8xG-X{BzIa=DFmn}M{Q%oOSM zZsUB@G~%``+ctJZ^zrf8@{68vdA|GIRJDpV@;}$aANsUC(Jo?>uWdz6KobB?UySbl zyLRmi1H*%#f%Y9v-4L%y$=jEj;&PBXs_@?nD7RKvlCwj^4^M$;t;-75$KrXSlvh7?MnZ`FmZ_S;o z*mMX1G~BS2sA-)g@UzKF=X0kOROx{$bnIwj!;`V;nOB4XY-_=?G^(uAg z3FpdMwmBSPp{LrG^lff2F>9Sz^*cx0GA;+?Ntkk|7t@wntAmd7F8NP6V?t+8=f<8? z5@J=bt}Qt}-Wkm6#FDC*aOqpspS!|E%8#9xK^u;)S7`z@I*QhXC8HE=J&9o?*}h?K zn6g1ZW_<(y0ACx&NdD5%Ofw~zD1w8*espBKyB(<_VltB+Iq}Bcw!2JW zjDS)OcBXF#vr8?7SFuiB9$>4*^8s1-#9D&^a2OQg7NhPYuov%PoI#tD8n7LB2Wx51Uw&Wo*- z>FTIG@LL28;z7Pwzv^g|eH+0h>mm=+e=P=qVI@a}vDm34-nEWQVgRIC67K8k8hauT zJQk-SG1E{7p(vGc9J#55DxZuOfKE|%a&06pnf59#3&)K;(F0(D%CMJnSU^wx#zh;| zTem;zep5IR2RFNzo*$Z;4VJ$;nRrw#bP1M!AP?f=B9H9wNX?P5SL+&6F-2)@jUhyx z=`al&l#}3`z4g;NozQP8@)9Puu1iQ`)4939h9(FtwNI$;)I)~@O!WKn7l$r}W8;H_ zUBmo+X4PZU+(Hxn^PuU2L)>iaH0P}QzxG?ZuWusvks1+lZDimY9N@MuLxoW1EcyJ+ z{^66qJ|{q;MgbMd5`;lv_d;~X3E7zp$B66M=XRn; zA4|ZEIsY`Lqy#m+kR%&lDx#gDKE3m!rC{z4t>pO6(;<-{{N+EC7k6slO5jY=4I_ys zP}Nu)bZGTx<=Fs)Lw+{IJ;p-Bla{0Utr7R1F2rUqCf|Bm@c;WMDV1RbO2viq1Kf!~ zy_h1KHaY-E*JJ~Y?h2kNy*wp)JhSIvu94G@dNil#W?TJ( z<)WQ;u!SDdzbCDGY&-RrK-xQ|s#-Z31J;^rbeF5Qu>*8?1|7t;o#&1ebsK{T0JpCr zLSUjn764P78#4)ebG(8a3o4w-(I0?%rXa$_!5?pd?fE7S+qs8~Ao~GYUouL_T%am3 zM}!ahGARU`m0 z;j&9)hVmG-u@I~~V@hgiBF?aqWn10!cEB)&J9#~R`m#kH|wxDxIo(C%U8S8tIM22kPUs7nSU)rw(5fTTBr zPjuv(SLd2>bA%zW_GY&1CQxn@=nIDbb6jFSfpNJpSClj*XZYeCHweuzj00Eh#4Y#jh?9axIL4-5miSplHc zJRqMgoD&}YW$N;$PnFZ4HH9cd(A}I_(L6>(TLC6(+|n6kRPBJ5vDp=F0=K>t%6{&b zXwH_z4A8&oi(dkna@9S88o$h{r`i-<5fhV9X`OnDOn$EVF{Kz z`Cw>`m0V3a@U^IajFr&lIRD;gzwhl^FMH-zdu}foS>2LP6|e^J=dFp1misw-@q3){ zh=MSP0}*UU2DXzSt~7|PKx97&Vm@86|0URh(j-I7vD0PPC&E*COrbC8^qPYtj}BkD zgxtRUQ(3KqU|G)xMOA`ik$@JJ|sN-9+&ZeF?Q5Bm60ef|{eAi0D^g7^q> z_f~^GiE4f1n;7%5Kg#7=PUcR@LhRViAK8;12n^c+<}V^h;Z<&l?msaD40i%#{$KC` zx-f?WUq?LPhJn>NIffjNL3N98PON|jE8#!}H^$r73We-5@tyq&q->ekY$5(loSZF& z%f_|&isG}yh^#1tS0v8stKU?hu6nWzczDL_o}6+=7*Lk;SQ3*%f1gd5^zsPmX>pa8 zjPGfWFS_SFp`v^{vw#J1;76%=NAJeN?{VD_H{5IzS~Z3#k4gSTqokX~PlAq!6&)|5 zZSu3O!@{;p9;h*_mq4mZxh5x@oDm*|OLw9w3;QzfDyH2~#4`5gB2+i+>^oRryy3@U zSPkjEA3p^W2kc$TqKzRl%>oc?DhKhrH_vDmDUg)30XGC%l4G!wp6_U;GY1#w*Nes;$eJ>aVmLw{1_>Q}n;rU_*L`Xw_jl>(28WgQt} z`Z|aDvKjr=3^&A3+HJ^S!|I?3%Wc!webYCR>>5dB-7#TB(qY3)R!R_SL?LfjVJ%7! zc=pNa{u}JqPmozTN=)LRVZ(Y*=I>!;8Ic(dlO11QDDD?B3x%BGfgC*n&PD)sW|z!d%1h5@UA@VO&L~1NR?|Ej z+=xsc?k>ZJv+CNmdJ7u#S^jYZl@$>Lizcy-;KD{ds3Ql%&M;wT(*$SrSiLTCU!x!n zRE9k<$dap%7tQ(6>zY#ICA#2x-_pn_$;N}z;^J{7^yZMvnpN+&ma8c1*lK04f7x;T zlZW2^*JUw6f8IoX@bB8PiCB&L@#XOhv7=OY@dN+sg<8p*{$Hx^>=7Ph#4$fYt=+bX z5g%wo%5`XiI0r%S;#!m)lWz$*dr2XCc1U`YB23r36B+Q6TB}FgSJ6XIw*jb&;KhxH zhshU0-3C@~J$>%F{oGB9WiKTFnF@|LQ=y7^k?fZv9;_qVi;S!09i+EZFGxK-V)b-99dQEQCgV7J{Uy-ui{XVJ+ie)woU53we1O#YuP~cV$iU;yx1U_sx8uXR*Ip?V^n_N2 zN2|p<&a;|-u+~2CnEjWlmOfy~^}Ml~m4r z-(#cq4k13_$CXK@gn&sjf z;~25xrUn1s?;1p`{S5C)7du)l5=tsz=F7Z)OWYJ~agURze=G4`qc$j4k#5M)I}{S-cJw4C zGKu!w;QUc5sn}!9&yDVV`uAX<*@+_avjvfHixK$+bahsN1})_PYod=GK@Sr)J>mbo z#7LDkkxDQ7i5diXp%ow(x6gjW&0S6*RHj`ew`YbYDbc>7b4$klX;rOa8&7UIj&x;zOE7Q?@kB>8S z)4Pd+lQpY-pI9Avsv3Tj3JV>$a>sMEpB;MaNu+Pk@!j~lhyPw!b)^Q5r7tG-7ev{> zBFL;r&dHRcTc4Y$Q7%8>@FOSAo{-3N#cLn2RREtlza*Kl)EoGxH|EuRYg};h2j|ZD zoCj926(4#x-wrtMq);*o8r>m=uloZ?`8xu6=I+qF$2roIV3ft%j#%z)v~nt(ZcSJzzi2{76w)?7vM; z@i>_fv*=dMNbO`elM|h`(_P71sOz|PC)g{e3&#(@B4}sc9QfH8xcg{=iYeJ{CY2gc z3%h>`6oUM=z5Ym3{(Sa2)w{U%yVvXSkzszPx~bf5hIRFD`xPDc0UgF!)*a1vu>?5& z`MPq92Au?dBxi9HJe-Y&f0QGotScHuRZ|GI3<<1;{ZFF2S@NFeH^9ruYWK_3m(SVX z7YQe&BF1lsYkrjTNM1~i=t;-cb;aH6us)SuP}7$(TWb=zFukM~eYqNIo%XJ5KB~&S z!7%N~th(EweI8fTkaMct=jBT~Uru_fyCy#sQ)Rg)uiYGfw=(WlJafo#Qp~_rEr{D- zA1}6iJ$K>}-Z)}{tk@F?KR2~EfIrZY$z)6J3mBN(8ey^trW&ibh*1Z&g3Tdt;|Oa? zQYWsfIsAC2SL?GHqQ}Vmw}sA_=^WQnt$Mq)eXk$hw#ifc7)Hun=*%k`AcosApGyQ_ z4)ZtcQWZ_Q=QF>qxToxWKKC!Hq}7e47T#^QsQ=%YzmnhYX^NOVUbD?t?Ju6g7`v*M zyZidPm`DWJJ+-aW4#`wU2FZDr#SRoN?zkX z;#@__V|zP|n(4Sm&)S(J$WwEE(q+Emv6EjLY*60zGd8+$eizNwGQ>Ug`?uENN~dtg zoR!bh4UyXC%Pyg```gNjTB`Opq4gJnzV$vZUP(M&PFkY%JT#qKi#lgA5OoY`{4l?u z!YADJdxc*sqvna9@4<|9b-4ha`)WrTOfIRGRyZRIleKLzDvx$0JB#L~NP*`}cPk@H ztX2A!BF@=J3C9V)AP7SM0H7;;0q{USV4esDBaX$&U{MlcSUCx-93CMeA|@gtE4D{W zMnrs%sD$ucLR>;lQbJxvQc^-jPD)lmR#plxB_fJdmqn^bNvg_9s>;dfD9CHbNSi3g z=}99LHE^2ha8nJ0nzF)PRe3EnDMK}RQ*Bv8V^IY;HF-&*s=SJhf~tnRma(FWqOz`< zs_I^KRb3r*b=AE(>U;I}?$yxNFjQAH(p59m+iRh>*IZlO$Ux81NY~s*LtER}NZ*L0 zXKZ3*WNBh#Wnp4zY-w$2VPa&VZ(!u0CF5)+<83UKV2ro7)O9q|4zSR&Gcj_oHg>f# zwllS~x3+M#wsbW$_A@tf)zU~JYPcyV=(~f>LS%fMM3OzFf*iFR$tJG0rVch%ZuXX* zF4i6{CO&S~{!ZrJE(X!gS|@_EWBkoCLQLa5iJ4J`NeLK|xud?RhpLj3gQcC5wS%X% zlck-%nJvZ0CC1&}*4@?4#lhLn!#2>>DS%=h=Ib8h>+Iwb7#iT`PNn8$ zrkA8Awq?e00^AmlMo)z3C;TH3)`{;fHs#b3raMRz)%!Rv6u&i}DV8yjmWp$(*7>2J=E8v#ug9uduqNvWCsBZmB7_ zTvypyeg0}wQEzKuM^jZ#Q)Tbfik{}W8=ZAGJF9PY*AHB;x!rxPtg5By;-&WH)*Dw^ zZuMO5@9ppE>aA(*o9ZbZ9qf45Q1SXnOZVXYhR%_}vH9E6FCIRen^}6Z^6JZ*S8rdx z|M_=!_xsN8&)>d(e?32OXB4pu_%4`f$~(^!8c8-}GAQTqc_3-mdgtpZiU#=c;``Mp zO%uh3Nf8*AFAu8sX?eAV6_mZI(m!};f}!oJFLdkU?s|Ib z&=9$1&OZP7UAr?o+oGy4%>Jxxf@dr@+^!g+uliu%);vVulT|zpOeZ=qRazI_xVAm$}v~97u(-}L2LG@Xd zWK>5)ieq1gVoX#j?#5$%{5lzTNUh98spi|8_$KU0t%R#gAncM}TBBG&=}K#I^GyYH z?Nj?4#*d~@(gXcY1Ydu$uXEMr$kE}}V+SuR!j9j6QSo~G#>LV1;pbMDrTeW9ul^{F zYsLNhi>QkMz=FDWBk>LLH936PM_iP+R7vT&c%e55UzF>8X~Q_7Z8o;#thc!*Wd}hAKrIy5g2r;IV57p?!s{~SR6+Db7~ z3&cm;+5LX{IB&PANa-~rqa0WIQRB+qgUQFh6&W&HX-lHBfqmjou*JE=--WJXWq?Zc zhmR~#@19{S9b4B%%rw>P z%2SEYP^s0^Gk&jT?kCma779ZYqD@*qohkmF)?G+nq`0&HrYw4w$tN*_bKZ(UV&JE5~-Fu(2k4bR9%ME9w z{dtV`3A$bBu~((shHb3CD788W3tjZ7-!MI9bS!Su{=grZe7`R}qDQ*I9O?^9En~)9 z3!w#nE9@W<3UZ=(u7g{Isr1iH+$t3h&wT1FDyK$rnRr0`YE8$m!qq2oq3}FN4h(U@|!#L1of)-B5+z zzJy1ryRrChY{Y)Gb}frq&ZlBkn}@gGR4EMwUCT0=zPZjJ?xSWa9(Xr1^{+SkX!xeP#Ky3IQ?8N z=i#TAazaRJJRITsYQ(>s3em^gz*|>TPCMm;6-Ystrp@6LLNzJLl{SYj zN*);di*`IPkt^O!=qkw$j_mC*b5gLTh0CLb3sZ+4h-iCV=*a~KjvfB5HlFd{hc`KY%x!l;jl;r6 zQM%K>1eh5;|L+B7cMmCfVBWnM|OTv7TEOdcxlPPSBw5N4UI#FhpLGhPNL_z zCHG8yYo%`!>fT`p{{j$po5S9tGRif>9bn@Zf@KmdyZ0bDH!fEI7S0nI z&;kIO!2_%DZKV3hACJjpLligw=*@>oa0vpWU~}B^+cs4HLd@Ryc?Iu+H;Vh1;eq0v zkFQxwC@JF0q0@fry&9mWWR2vMlU%!KQaAftRA9=*uVMIiAT2_k{(YE9+w88|ll=jo z>|eSbDoShat~gnFtT23cUvs>V8y#!sBMPrR^1XqFGLg- zx)iY$zhoZ6foa;e|p6u<5h zI(i7m{wlCJ4*cXbh(OCF@PJahRG*j_kHyp_GXR(e)Wad>aUue?h=9%*fP1mcdPP|CK1>Njvqz$sn!DN*w26VYj4eioLTwiK8axXJW0 z0}AW*OFw~B0@f8FGKSB3OU7mcKgMtmXjoNM5B2A78?62moig7JNGn}I=mDgozWT%H;CGR zcEh17Gmt}ssv^HU;Ob@AI;mt`NG?8+9!-VCYMtp9prZxAla4?- zAAJ{qa%ZW}+PIw#a<~P}{~I9tGsTMF;yr1{PK$ImWYdxTc%@2$aP$EQ_mH{oUlLp@ z*-k2%%QlLVK6@#};1$_*L;PDMn9K(2$K?)cmV26|WjF%*f{Jb|l?TM64NVqhXfW=? zraeTQ!|;o*Z>BE1$$F&8P@=;}oQp?-i`|0}dIA6zaGJkVyrc>Ae_KDeTnnK99oew; zMdU^hdT4i>6=r@Rnggrh!Wy{9dg_^APO<_GmRx`igdX`B7(QIC`lCvHX#Tw75AnO- z#8uM0{}|v;RItx!X~MtYdY+g)E7x7NhiQ$9#sF#v-ugN@iBgee`gw)z_}UvMk_PVq zpDd*|$Hn+A)?KeD@6KR$$q2vS(_$RJHS-Ex3*f`*OF~1G(aAb~evIfgxS5ZA%f^mv zimU)cp0OD&)mSDK48j3Kw*mcwm;CdxCf^i;h3W$eENl?A{1*L6Ao84tj;FywDX{15 ztc^kBU7WCQ0FyorTK$HR|dwI&p>F(p@(3|6M*D${Ze>8G520#|<2 zbG6cL;u&UzMg4I_7-EGu7tkAj70tcW_wJHGNZRe+#Sh||_$2UIGIn%FWQAK{>soA$ zLK+bO`~kqN%%&4R06sEx!uGQx84)`GYov&L#_jn;L`AV7FHDm|IIu1*^2L2*KNY6R zzBsgvzQtD6(+oIPt9>d+US|IM$q+aQ5dm_}AoKo7E7ve&hu!byKI+yzQ!cu!YV=?> z+cMR<^khp6Jw5I+M8pwnApivlKo6zBByM|sNba0eYD;|TVBYQ({kkh*)peft>iXj9 zv{6mO+AKSBX0HygFQm@Y0&v5;cm8INBmi>9srVVL`6HqE8L@e^2AjhN2)P~lvPBQI zfs3eyhNt zvlSIvFCSRnt1K02M}`oNyXyiUI3po_AMo1*P42aLMN#h$(1#pd%vSV7)AxDZ6gP%WSk38Gz*gbUBv-@c{q#J&&{* zP$KB*WM#iVWS1k7%ZEg89a0k@;Q&Z58}_OS`63Q|L?EQrTzkofhT_}?qxKyOkstDt zO$KK$cJ`_|;tkkb%>{3*3EhXsIPe_9z80`$DlDFY?xLYb2$*Z+5mgMPlZy-?gIw8Q zq4t+V2JNPV%?2cC$z5@fzNJ@mMVAI_dj|;k)oggRXZU@yXI`3a?HIA}^fmLVwmaqO zE48*-MI#n~1AZ=LwxiL9IQ`DF}gzDNj<gm5A7VUcv35@W;lKd%Fty_;;D+55 ztj*MTcs+KG4*`+^H(k=&f`M3W)>6i<1Rp7ouD!4?WE5 zT0a5#ZyNiO(iPES`PI)&Pui-_NBS?Gd)qeSh#_QnR;jOpTPLrHlab+I?69(}5hte9 z+|k#_Xtkl;DRm4co(l03P-iqoFNcn*SSCGTfep8B>aTVWhgGgj+<5Y{IAdaHUiT_> zV8}G2&i4J)2ley!u1|BXq|>B%?_j!F8QJ3O*Kkxt6P%s z!*0EVRN2~7(zu@SQ zx0!4pWE<#Bg_{5xz!ab!5iCQCK~q3VR3U~0R3JFF9)rFsK)>j72qH=yqF6S&V>;;# zjl^W;mPqRRsEk#e0A=;=*}PBL0R*Xtk^r;O96HD+4z+BHH6&@IFK?0(g^7L6N-MDM-ATw*PF|`&qCeU(2u;u-5U1uP#o$D9OjnOA7-=f z*s{*Id!&)=e?yQccN zBSgKVYaU73hekQUrv^2JXOe^w4rEM)GpPt;+UmUKB`~EZ=shzaaL8=ss$J1bL!Az7 z*(Wp3&oub}N#PRW5YUthZy{rsan0jP*k%s+=>mjJ#cr}iO1FihGO#=Q!<+#0DCTsK zCx}5we;1E_Rt?RU_qy^;&V>TIM?=4KPNtuk1Y0c)I7lwpxaHq=D}H7g=P>gKtP3OI zarOD)iEq(0%Q{9Ri7q&U3gi4mP2yJi0lS!fUPHKR@BTd zEQ1d*-n~f{z?lTtKeg~VHnxcjUNT!4lsoPBhk2_m>tWvMhl{l_T{jGBv%FBf0OD1< z%sQoQuyih%Ae^ZXKOhMohyXa7_)Tc1TPC9pkO9h6FpG?}UBNE1vGaVWF#)mi2=s_l z0pbGV0e$bWZ$fGQQC}on*$_8MLi_i$kF@6r_?sNU*(|D#cYs2lMy!vhmB^f_H~Btu zQyS*{8-LEJ;*jAz716U`XZhErYnH+2j5oUx#tk8wiB)IUAz^ITi>T+X5AE?A{HeUtwH=N4o z>rvls-fzvk5eIls_5JqyEYdcjMSv;AAu=goBRWu$0tARQ4nt*ZcgZUL_a89PMj($l zm`J@@!7}72-tJro7KDLevjQa#@nE)<+a85 zU-Jp_9DSEW)0AVqZsPjh7fgd?TjC_D4-eV5zfez5P!`T>M8yJh*~&*wZ|95JO`xT+ zz73r&%G+~sAY*eSvnX&{$bx>HZ(crmfjGh_5G``rlD=jXHCe3OqyC`AV+_?lC1!gM zA#F?(>;iJ^;L-{$xA3#k*P;2c2NbjCV2E7B#An=s0s%cI7lA%z*|xnt3Wa0jo!LYi zTgD!f=$CCaO%6^{CRAfKp9jJbMD`o9dE|e0J}Z5BX$#kv>x(OeJZaBvw1s3#>Sl2a zUf42a(py`~5q*VRbG1UBUQspR3mAbz2lZ*io%2Y(exzKlk3Wo7=H#m&n+7O3Md+g` zmn%(-y+;vytd?8q?F&Pu3Yt0bmnO90>#rl*2X;VvJ9|f55naX%7hPpKsZ3WXKR+Cf z7dF53tv9>T`ZrtW?TY(<%o^_5|6}6?#+I&K%#GZz`C4V)m*HMa9Lb-?7&3vouP>Nb z@v|Y)-%^3a)>{BooK@LmWJTgw!59n^%LSGt4#_}rf;UmeM$WD8uqRdS_{;UIl>i5y|?IT_K#V*V!>J#XEdAF+p_7;Z~Pz;*_R%gNo^ z47zNq&E*RD#8``lu>EW*9jQSj13zQvfE@7+9;`mK7r4EeTAkL1l(U@#e8pVKhme!i zzf6Rt^(x;M=B|ST&}TIQl`g`kM2aJm8i`oUr||}D!|tVx3z5lh?Xf0(lN@mNeFHTl zqk&wAmQ6^#<0yM{>(ae~=k#_FE!NhRKi5e=!Vwzb8DAj%YlR6>I_>kLCFP%&-g}r> zDQ`)JA3|y~m8+;07( znRS0hoP-Ll)cMVuE6$>`!_l%$n>L`Od<9K09yktnXgeeYP31jEA6m`H@8cq?K96DI z2w;REHQQ)=L#|+;O01ry`dWdRIW6SyD#{oX9(f$`bTlYv(hx$x4Z|(-fxm)iAnSTB zeVWY0CRp28JiiMoLR+N0?WG)KRNN zFUJipsCnoYlqZS?-*;xUct;>H8C>w0r}iwPws7Ll+;PX}X+=gu_dQ_g?q?G|cZc$x z7!7x1V=n=al8!dWmTE`3=X!D6CP+oVag>M#6x=rff%A{F5foKKW88t)1zc9*l?&M2 z7-t(mhO-t{hXT}awj(cR*h3-wn@BBs@9S=|qHIj)(N5FUek|pr8ffrnfVbS-5*R8= zrUM8uX^Nuv<;=p%JZKSBh&4!|t<9}aT7>u#J9b-^+johOVOmmqtp2 zFeSYdm;8gzt_`Q8*mi{my2NM|bm8ITH8#DjVHP zHLt>PmN3B4u2)B1k~jdr&3%OyDPo@7WhwX`hrQ40ktP+x2vx+M_m2RDNjP^$bb^{^ z>ByuU(gOARJLpHD#Js!MjYuoJQq~(W*PC=@x9tZ$hqpkiKQ}*Jsc2W)UE3-QnU6r5 zj@ICP9v=4E`FN&!cmxqeFqY*LQ0KXBYL0x6Mzv7>?s&l{hJXN6j=N#Co*t51^x8b@!8*3rdkXtj_spzrXMh73cW8kOZAN| z;}aL429gT%JRYXQ()X9VJrKSO%7!V6ipIQL_sU@_AG&2hz$8^p<#?Jx&$)fGamgJawpz?i&8WpMpq;@us{l9r}_^u^r9MJatOdDJmB>v zDAF~LhBwvN?kof$QH1Q*g%hz>U}I9QE-wei??&ajvWe1G&X9u$Ct;p;pvU@crmg;F z&X*BISJ}a9JJuNz`!`l?+g8f7m(lcTr!!mkUiaia#N=+?`lmA z?|~Nme%K@sNCi#lKo7>&hc^K^Z;x+Tr2xCq6^YShv5L2HgM?G za|KHdnwCN@7~2N`0kmC!QAM;iJ5q59PYZKS|Bs?`4`h1(|M=&#`wiR7HusHT9ZM)&5fVp&6Mz;HXQda z(x>xuJ5a7FT5*BxVn;uAX-?YtqrEHfM(in~gBRbKfPZ%;{KWctFCi307yC|o zyuw~el9cWh!|0gKs(jFVsBc;R0vOb2FhHQJ0om*?VK3T17Y=fZ9!>xdIYX#ThMVaS z{DdLAXpz!YA*S?Le$Pcci*GYz%8JIKPkK!Cei$JEdZi+5lvdwPOt?QOR ziNOE(x~a0wb7kw!+~e?w;1GZk06)}V_>kY$Icok;sl{~l8%(oOqKrKY;4oU4&=|{> zeLd6ghA!n4B9hfehiRfo2WkBye`}JD^Q`>SPOM$DPb=<5=%%_$B3V(r$07TUQ`yIE z_xHSqQ~rG-7mm6J@xDKolx>NtYI)Yxya{O-SCWEiF&<3rzVjqUMe2g5wv|~8Juv4C zG`jXj7oyiK3>6y|WVU~K79_)W$pY^nVF}JaJHkpajKh?~45709k7i*IARt>-SOUg{ z)4CDvz-^hRIJ~fA>5!qGVCYixyVLM8^GtvBEh>(EghvKcSr+Uhz z4|3HZTf5fgs_a%)Bu*&-I{6()^{#)&774G!7jh$zF-O9R9OIQyhXwJgiP4?Kl;Ofs5UCf1~I2_z# z=ztI7fbe*!*N6MfaMT(hBmYrf>)LCmI&k1@;Bx`3z=)L=gfx^QbN|l);DAg6in{Hj%;Q6}qcZgyIZNk{y+quAAu7G7-rPhplp$zFT zqu_EC!kSFYJG5oaj^6mxVDwsIPu+U}FixWLU@nFvVjvwvU6puZ|;HW)IT2n>8 zU4mC+!5kPe*U@LW|Bb9smF60uw2$N@w%npi)*ttL_@~Zf2sCKyU6C^CoK&s=jR6p? z3N-GfS`1e|j(l?R_rUw|5D2-FGK-l|J83m1Hk_A|y@n-uR$nt40!KYJeOJ8cIx(e` zu{qZH(8i&yC@UMO^=|-S4sI25eaEx1e0Hhng6>}6yL*Nsdv%C0OwcUNwMVrA)`F7c zITU07Xjnw#^>5&r-k&X+sPep8Ev{=X0xt`K=Jc73O-okWby%5QyB6tt#4^6k{B7jq z{aoMl1fhFx3~W;R78Sm)s^<=>Es&~94@v_Gsv6r)lL>`XfAr?tLg|~{hTwH=>XRAw z6PIZ`4{DfKCPdQFVJQhvKWkG2Z?)Pygyhb_sQzm|a}I%xVHQL1aA@O31$<-fI9pPd zpV>J|DtG!q_05%AMOisL^+I<8Xza>u*ybgRz{O>TjV8|8za%xxhK)%w-YoxK0CSAh zF1Jj&aP-yp7eo(;Y{Vto0k#YJra|S_eTxo9R~vtDCBd5(WkuacatGuO=%C6O9)&>_ z_uHIVzgpA*EYmB704dV8CY3=@yv|%P<7jSV~=fZYG>pA+^3!Y zRpYI!mNtA;n`e8@)c{+|r7p(-5;{NAB(M-h>y=JTD`E?jKp|iw1zVO?`wdGirThn5 z(H{=JbXr+{cYol)PvYyYQWG}tNNhckDMcGBVav0(RdBU_5B-NO*Pf<7 zcJ_(?hg(NR2hA`=LsfNdA0}@{biBWvhS+`#jVkt-wzXh@_W=w0QCK9~KS@`@SPXCQ zU*nK@&76MZ^}vFlDnEuOOr{pb&?d8pYN>4lpuYMURv2iUAemr;KXgm=u!hwY*&e^d zt-0>>mgDt?IDGBpOXnuc4nYmWjxFeg+h)oz5@mdW^0hyR-JOc+m0J3jtQS^o7%wv! zD+AWlgpU5UDU0}M8-O8t;WDkM#ItKDp?|sMU{>7y&$4u#F!NpOp>;QFx!=4s^*A=; zS9kwk#Yh(x>69QU>ECdCN0IEx&9sQ)r;zJEd0Blx8^amB^2#$q*2`fpm%!}(|0MMq zZX#=fDlWBVU7P9qP?aNV{VsOdXdu;q??i?_G2`poRg1T$E^T<%!8hw;Hcw{`-{)=9 z^BxW?h~56IQ+&43rtcnva350 z-^cHLcALXVl*Yp1&o5H=pNpDSa&bAym7awi3v!dyL z%lvfJp-b^Q`zOt=xa=D9c+`1+{r|pNbzJoj6uA?RJdl8`JBuua4g^*#vXs8e&Z&HCpqUHM{;@gQ!7G3IOLV z&n&p?Cd({(sW+xDb#hMqI29`axtYxyIAw$AMZiw0vM7{l*B3F>q7)-9-@_FC?3e(w+? zK3lDCbWOCj@RxAZsWO;9I$0@v4c<2{!RRd>I)qW!t5oxth2!wQY8+DrtG|*<_pSBd zsk%z-O7*XjTYftDQaorI?mBo2(6Y3MJWHZezXlj9|xl(Gvgc8`f^YG6^ zs+Es&TBiZ9@J??LM-}jG>gnkf8v(WE(28+&miM?akiX?A<-)%Wk$S(}K_A!~djmKW zhv4XLAw83pyq%A^>-Qn^z+Io{GzNe~3K z_L@VEi)75s#*`W+3U%n<&M@=FReLO%@P{?-jfMjz@py>}|g>W!(y6?_JzmCtuIXW0rjU-FrWo;*)viyhDJJg0!NW z)}N>OPpgr{G@TA*&1&MJ5G4S{y|-E1PYrjyf0Hy}p+`{s6bBJ&Q#x)UDbbX*EDF@? z@F%h0N$H^jzs!7vE#Tv9{2Eg$+#FH#l)wRpjDzCsYpp2A6mhyP8y@RGbZE4T3~?y;%E^L1jX1I$ zaEk9bR5HYiT>K>MK=q@4&VMvo{MS~uW1&NZt7{iOtoMraJDL6Ewuids_d9-1o6pP6 z+hjgt6eB1?G5R7RuCU3}`nq;qids9CxPZFwgW zIl0+-$&b2~uaVD;;WSsecuhc6qjj>^{M$o>ecwZ@dArJvv|PHn(;~b5XmVoKX?=Ib zu=O)Pc$QtadLa{2k{ey-*fgGp|2Fo93T5(7D3?#RoLON8R&q z?5z;<6jgE2<(Joz<2Ju+y0;kbdHwwUa(cVvGs(fl!{Vw!dhzDxjn6FBx7HmT)tasK zk4MMa?K8Tor8h~4SeN&eSc^Ai=TvHxpHjx<%V{o4LaE6!xYT5fIn_r_e(Br5_R+Xz z-ohOoBlEA0b!>nDx=G9+&Dm^<6V+d*rS^3UN>6&#o$sT$!ckstAv8-3VynuF$24Vy z8&8?eWSU-oI97(T2U_r!FK8uHRPDQJ6-3O-( z&YM!!q&jkw%N=@Ctk=>f8IhaYjOby|k(wv(_UDc5$M}d!d=mbl?k)*gBm!fWUw%!! z%qed5lERkO!w6+6zAwb<9XmajTsI@kUnD8-aiWr#q#mQdsJeelxy zkwJR@T-hS5R+*ds#`0A+#%MoC5DG;`7PTR+$@OM7N$K zwdUvu$GebTSFt0PuJN#jh_SULd=KV}O@iY~>q!69(d^Ws3dEuaE#({C$tyHsb#ql3 zEZoywr&3w<$q0OElf|wK-_tXVn~TavtM8zE?bZ966AbdedX4E7jI&@#YtoBv=gn?h zy6ITWfsuA;EQsNZbs2e73sa-eLgeF9pmqd5j2|(b=yi z@#dgz@#Xvb%QKHR%xGMU)^TCD`8?{#b18aqd)0p#Oy>o&Bd1>k=w!ZKm*i%N!MiVo zfxkU%J7d{i>c+=Jh|;*&s7!5X6S4G+PwRSq-t8L?mLU2UZhPlc%Ie=e>Aw{e>iO@k z^VZpip4=X#6U-itaD!I|)<%x`P|OZz`=UZEo@b#h>~zEO7#-$9G4}BEq##LC5tSo1 zjiZJLc0a>KVise^0KQGQry@5}1w3RX-uHUq-0Xg5SLSwar+tn-?}#dx{~XLNTxPWS z4BATa)7I-Usru*1>^Ss%NW90m?8kQ4+JRFJ_0ehW$pYgQzTx0%T)A&w)$Qm;mt z>A3e5w=o^)FbN~ZazgB5mC{M#oz+MBCz>_~oNRY<>pFU>8txckN1K35+SO%zq1__j zTid1&S@#z2;o>dAi|*ed{6(^Q!Ke{h1m|CrqBC1ZeCgis?GaZ)A9t7hwm#F|tXgi` ze6lvxN{@>fZB8hhTf&qVKIRFg{%A%t{eE5gXU)zZ`#OfY|11sj8hSjHt_Bdy_XAJW zzp%5sLO=O#J%b9Z)({|5x&KuiYUTI>!geKYNIc(0%^EVqY<8Ep=Dfaq<1L?BDOCh! zswl@GF03*%ES!3F=HRaD`7p-A!|TzR4Ogzi?Ud6adv&m3tPEq}UXj)ResBM{Ek8CZ zlfNzb0V1l?Ytc|%>cLg&d*sTtK;;L(^0s95kz!Gm-2q2fAP{@xIu+$oE$)$HgAX?KeX7UrvZVx{n8DK1`(7O4&ICQLVAG@?dNVlflg{r)oJY7LzL z^Wz*0#lU5fJ4L-Qf94x;IJW3Hn(Xbt_4lUdBq^ejXYp1~6Av^eyQQKSi7gRm?B+N|7gut z;_Z+lrMnVQ2>^8sJkwlHgb^7Hf)+o|?GA-o(r+rCtw@F#Y4(O!k ze2)_9)o9j8-^4=-ccjX9%EHXu!FH6@+t6mhU9yDt z92v~6Z1Ye`$t;a?9!O(rNSRu<>%2t-TM2-;)3!!BxOwV+U#dtJYN84k*k0h1T7E<4d8p+C5uQ+3( zvb5Y`LP?KT)Ha&1jy}K=G~Ez(UT>4l`thH*kmXYXK!KGE5HFnnosa4Y-_ojg9$Rj#V*xpwC<>TlEp3O-f6{xE9Z~i!SpV=^z8*t*M5w zCV0+`a`)!_?vpUz9Q$ME-zIjx-F0`PdqlC}I}<`-(xP-`&RZJ{tP#{Qq=&sh`1DRG zog#OS=vzV|`c2636yS^JzT!ZO;#dJya=>$8dO!Mk3t9vq)yZFWO521|&!d^?E?hgE znGwjk`__ulsy>S+?(1Q^P{7>6TPk8a;WP zbs=|0-bQM(%&COO?gGuL)F#B5?BB6R@~KD0Us+yNKRplNq-sn(mm{Tcp39e76)z*elM+BP1KMPS=aY2ts#nA0*5;6Tf< zruoQPs?>r1mCf;`+gZ{A7Lp@5U?H$TJXg~ha_{;TEQlMUI$j*1Z2xrUWqY$%ffT5> za%&f&3ZK8qx7xQ|O)xKBu%L783~MGXiXNcZ=(XW(*F(?jab-8s{!U_W#w((_^?@v& z;PV1s58&tZwyz(KeK)1<#$E`?`k78jVSz?x64_(08)Tm6Re)F+(bUDtehV1u$_8EE zXAMDw9yLL&YpQOt#lk&_#U8q&Eqjor94xTJA$_H)dK-&q>CSODM2jkgXV~&O} zuEwNEF=O_qSZEm)+Ch>A#0&$YQ;uO$7$0}UY=4LF!2^nV(cJz;Q<3S}`@9dz3>6}B zTWWE?JhUXvv*6(H9XaRu9YwCt^HT(sm4S@gymUL!tq{2Q7!F{$SJ$!4iF) zTyk^4g4WH=#+^K?FXjIY5Qfy7Ug{rio>(TP{ypg^h>yrftEEkfX$~6mF$11F*ez2| z;wb+3VLUSdWMW*6OFm>hfA#bO>UNcys@S;Yd+me`*ZAs@RY%T{GtY2<;)4aNFv$=$ zR*jVQN4)Ldest~p#F+{4gCDcG`BUd!FIzuq6B#oflbqFR;Eul#b$PkH`|E?5&(??7 zoi&LCqiH`LbgkXuV-CMvD|O20G^8+68gK89H>S)E8MtMqWeI0GR-ZK^o!d{U)Ibjp zLfEcNQ_mj;ttIVey1%w-qDj-dib{=>HE({vIQ>aO1pr18`rlU6#(BVKLd|Ro^<2o# z-8`b!ra=0s=;mt5H&+>1pfLqx8+>JCr7VBo?*?lwCO;hJBNbR#-FrF)>>|TCk(kb! zDAg*~S{%&3<&X3gk)Mo;&zVgv6Q6St5Blnt!X9FmSzs~2b1&uIH0x z{$3LZwD)8;l7_aOT<2=xCF9BO22|X=!N9(3G0xn1tnw`MzmGFEf6C03M5Hm?sYahc zr(7CeLsONi>mc$N%tN=ytzK5S2iQ3B$|N7Ond35Y8sm9d`!U02p30m6Fk@;AaVY}_nmk)Cto@ZGpm9wTNz^; zH6F8Ccb(jQ8s8vSD z-$4h%S+zDXm??#f6>36W6WSH5|Jt)it##`H!@tL^vbeM;hT+sMLq~`YAHMh6)99h< ziPMbh4&mUDvnKfkb=${cw^Qy!=K~(|T57(OG|PMO#7OKIau-L(<|9};H14xJx6BN? zIUaiscI+*86&1WHthN|SZ>^F(StwFdpI=$K1u*0zpcHDR4(6c(uLd&w9}S(tehpe- zTV85htDd?42&pE1=sndK(sFtKe%rriS1;G!WjLLCZM>64xoT@0wy{=fQU9v}7srrr zvqwb6<7(46*>Nh**$2i-D|V)6S1v7P_y@a>K#>oD+#V@fk=J}p>s%Z=^;m8g!{#mE z3MAZf_P%3|3{6oN-uvu@H`3@`HR1>P`71PsYnt3_3f$KJv9l6UqGl_KnP~t+xx!Bf z3KFDUa8hb)6x<;Tt%ptytC5nc)Qo1i9}#9ZDR-WhJC9r|oZ($PbDJiVQ8I%) zsx+ufsX<=0fU1J*s(2m*c}^1jdjIyC+4cmHYfVGO#$UGAjo-Y2s`~eL=%FS9JMU@p z%P-dRTYLWmuhP!tM&)PZ%S>8t+WrLow*)juIN3xkv)Qm;pp3^MBjH;g49s*wL!XlQ z8Ezf&C9DYek>1+FH4s|xuUq2>xno#k7Ld=3 zd@xDbZk1dxdtrKkbDw?_Er(drK1Pn3O$hWO$ag$?(n$koI<5N@%y;PR=$|liFZL)b_Gp%S#QyF~gn3xW&zr!^rsRI@GE~X#@hyN|mE4`EHDoUS__(0D z@E5~hb|7O1J!#&%j`G4ZdBdeXQ|;69)(uea-laBGORt*jxmrBI@AJ#q@$q_9n(>sa zS(S#_f6W=t8u=_7oq!Jh`wN+)p-+H5Eq63{t-Y{#%U~_HbX(N&>kZb7nBX&E>uNZe zyB6hWPo_iLBFYa(hY$Y~a~Hw26HDe*vgM z7<)>~U~`|YJblX%galjdI z#zzDeOpSBbzHe^~*dV$w|M>wvqH)~~zW#IR%6sn1nkFT!!GjoUF<}0*uQYVEmAgdO zZNERZg)9F|8Gbl)n6Ys`A`+eJ*+a!g-=IYKB+5*?;k>*oZ94wDe+} z(JS=f2O*i2bQy3C330aq6qdcTvj zI=q6XjOlcCK$Er!v{;8D30j3059`=?MY#*vu%^WG!ohgI=styUhNVin$RtT58z6eS zUIZx#5fL}tSLVp?4EIlQuvWSlxyi5<1I~S5{+6)0+kAp3!JSvjh^*;{rTp#sn^%&H zzVBDAHvpJvn}e2=TE;4akq1{q&h*nh6n#Km{dD2y#L`8v)73=b_9bc9f8)PKV`ZcJ zOqdKxwgBV)?zex8=3Q~G+ic6G*$|jssE06=h2~q;jma?|_N8yuj`HvYpWEX1i6u^35C;$?GoBgp^!}&0W2t47-KCYPjpQ!X=Q~QR%X4<}E=O$Yrgb zYRT%y8C{3pqpT15@H)_TZ~hjQs8J%lIz2kZ zkGC=X)Jgo!ov+&xReS1|mY-N)uR8y%ORD%-?wG^bM6k?NrBrf1_ktznN3vVXInb12R>QZY z&`%8U*;{pIQ;!Q4CNtZGBVSHeIq9(xCi^wMHspMb-ot5qY0$ynXDZ$cZ8ie*Ku75JKmYsu>WhMN6CZ4*^4> z)hjy0xE0-6@+qivRVLibrCa;wjRUCFjds3oXQIiS(#wv-{$(?@_jb|AwT|N}%DgFB z6L%QH4wDJF60PJZFw;YVNk@Krd|6jIbI`X$RWQ_N`&o((oRpc!Fo*>POss$?Fw%Z$ zzdfV%AZt{GvL6CX8LjxBNrYh6?Y2tM#ty+DH%Rx)24lH4eYt@E{85mU|3KD`DBijeSt5qatj?hkpPU zM@JZp12u%)QCzB`-gy9sl3ZJX{UnRS4n9Hjb{q)WvB@d4On0n$OoV=Gr z1h-Y4s-776%;qrcn+*1&oEN=*hce#D=^lkoKo`R+zu$nch&L!-V#h zPt;|8U0XLUd%x3ScT1UX*McW52%{wyL;X(qV6&j)jb#~zn^>7w%=L$p3?y=+&!Zk( zMjP?CQcjNWG;)Y#fJT`I4nhDn+dqF@9l#Y|Zx{Go3iED=u=yf+K{t3C;QNPAcI|+% zAQQ4ElR&#I0iBiqVk~!Edg;wU874srXZi=b(QocsuWF=iWWz0d#QVf)yzU!aFc&_v z@@Si#>CmkbcJ+tmZKm-V}56| zJQYuV*uCh#9vw8W5+;FAjfNEXDbPwWDs@*UiW|%r*n0)MyYu~E!tm#_kAnJKplH1B z8LpB1S3@5~Op7W>pQbn z^5ySvdY*!r{S#jDKSb9hM8IgHJwNG%0S}gY?lE!e*O@Zc67C*V|GRuO!oh$H-dNH@ zt``+M&M{Y%kCwRy*6v=v%iijls?0rqRMsjLAr>1nr@4$$D=7ozmkAou>*WhJ?JD$s zHqr(W9O&mZVBA3uVRdt9EGB{nTPr;S14YCXMDR+AG!k#i{lAl99ExQuD$ zumAkokdUb-3%Qmp;_QGLoFkjksi)kP$_4jIL=N%U4o3xlm8M}@GP}@0DQLdki17q> z7lxO&>dlzzl?Mb|U{udb!qJZ+lUzr1C4}8|3o|*0nU!EGwb*Z}iHvH4K#}MJQZDN) zyO^{at?%<0i0%{4xjGbIm!Aj~KmbArA%#+0?80Uf#o#X%ePD)^+{tVY<UNU2pomu)hM2XFwg(1-RN8Za1LUS_Q+_` z)@TbPJ)TKP*3dJA9tu6R475p^qVkz$IcMnF^Yjr{IH5J!Y~@q5iUd60TqQMCd>lFyCT9}9f1rI4dNRdla#ExpRFT@K!`*Uc_V-l zN|A{G!UsSKb*Kdku?4_|*-1el!p9DRF=P~+j5Z*p*yv$oX=HEpDTckqzYnxIxyW3d zMwcSn>L{HuY(K-YQ~b%U8#a-P5P(*5x+hl9KYzz;3{X8KwDwC|se70fn>1$0TzWN^ z?k_V{w9-e^gt`x(N4C-{nPzga*Io&|MQnW{6ua~v(-!67Z*ID~!OH;m;ivYvm@Quu zCv$h*T=Q+9Ofrd#5fN-BjG8C$uI`JsdOLj-T`3dTmjB8$v%4G0w7&zH{*9?gWY#== zO$*du?lNfMTtjj>HUrpyO+@f-ccQwWAYD=hV9+DNOE#hkbqFEaaFF8~Ma1pm1&W;t z+$83^!pLo4+Z+{!|g3;zMJ*cN?WR8U|e7=av(v@NGKGcQG8bJJ%R&& zz+_HplBY)A_yUI*Y!3#4!$3yq5ZO}ffHa0ArgSf~QpfwgcWK1yr(3{TUYJ;Q34XBqeba5jLbmr|~AO;Vf@XPg=uqWO3g z{2YMK25Iqn++&d$(S*KUZ5u?;87p640WU zMss&wi9(j4?66P!E_NE`aTKOS6=rXTIWmiGn?+xjB%F&KnLFu7$rC?{$Q>WOV{nX6 z?3i^7G>1=^LSX$g0=bMH&>+gy3wm^z@H~sqR%{-lqOgxRt-=o5g(!H@#TV9i>)=*8 zxKIVlV3>r9xAz~vSI+49Wx)cZ!Id^Oa0;V(8skA{N z08-=k=-_G}_Bvf%vZ~9qAGrrWKOaO+0~j6y%Wy$l6H)S7Ev?S_op}$Rbw>yohydM| zFotArc%lq6muk#XG>N}Hio671dvr-Vr>$-mrI_7Hn7HMMzGDxW`F^1QEPbO5UY zY|T?tXR5wFzh!%(#~#v#hoty9MBOn&ht^<%$8)aKWd=z?dX7lrPYDv$IwDGpA7Ri% z_4GYr%Oy;DjyUa&>NJDtOwbsnDuvLB&6LhNIw2&CU$WHc&g!B>^=>Z!-pp7rzlGRo zz#@MJl$E{+NE-S4eL^WZ-GjkYQ%zg}mpUrCmqwmfBYQ&(n<9ZAwq%1Cf($Q($OI70 zVp>kCh+QBd2*3!KG%}ZbS#O>2`Am=Ie%c%N7vcPsJ#Ya~iu@PXRbYnMwgYp3bGY5K z_aJ(yCNvH-%kwYy1kGCX=I;8uKMN-F#l*S&uSVg_z(YQRS<6CKZm@5&vx8xZkeD_K zROUcf1H|0#R%HImJRQ+vLC!7Ok70gCPCyu1 z4WaiyCUk#N9+v=c+X=1r=O8k`B!qIwT9Ktzd~gRy+^)A~iA@$-8?K0Hy$maV3Qd01 zlC8i0E{B#UwtOx&8G>kcT}b?%%#)w`a8gugyG;RGrW0{xSz`1Kj$&zR z@{Zo*ha@wOzT8uv_r%Ps-yT;^$zOhDy*ND|>vEKpV)yyov2q4xA+_a4XA5!ros1n@ z^VZzaCirjasFFeZUaI{xIzlKD@5hWPW!lU$ZF*a&3Ix49ZBdVyGNi)KmtwC^7Ny)m z5}33(9Vt&^EMd?(_3*LB$lSU=rw#amToIyKMagH<05h5dJiJ45zn4MVt|4y+E!6<+ zCFn0PwBBS^fY_?;2l1-Z>V~-Z4-Hwcgw!oEjJb*6JFZKACe;h;Dl_afh)ic%RRBq% zCk}}yEkzW_K%AOc#s2>mDm+ca57Xgbhf3Fo&2Bs?T{mtP04Ck~p5zIdI)3@g*<$Va z>(OKSqow1xAQ>%6Ogsft{5)IoTBLKN2!G?>Jhx zCMYumGu(^yE627#`&uBvU4T{v8GX)Ng7h#NQxW_?I!}WcQqv;2CIh+yj)$=q{$1K+ zw=D{X4%Immzb-gK47APR80P|$etVkegw<=N)qvjmMk_pDmAj$Ha#l^gEdDL~$+@i{ zzDq+KeM;vt?eiDoMFHHN&s3jzn!2-#0L#mzkn)u zP!4~cu6v2D`&Vh{IlCYi!Hr%cGf! zH#?h9T-^KU%|V-&jRxap*X9o<^!yDMeF+s^t=jT2%+I8MdqL9nVw2pGU$49kW@f%c zE1ofWfva1&*fAY7M`Py4#rHr)K1@@uYt$+=Hf7*CPlenA;iz$F2#P-Y-nq&wI(>l= zefwKc-aB--J1xJ}Z5#NjL#yR=6B?@tGTO_BTfh(QU==@1+8s7~v*}>z+E&TdU8s$c zunbw5CeCb+a7B&Pt((?1yLvAc(_7v?q3FzpKh3;CX=j`Lb7$i!9blKgc9y}~6VklP zV;FZb{$0$}8qEDwvS&L=f9j01cdjxBZQHO9)Kn=L9-tuvfa0k@Lm!SfzluOzhsq@* z?EXu-vHIMdpX63KlKLMU#!%M1b|5mzL}nXVW|-`x*vffGwPoQu6~Xs;zuO&=!9Zl| z{`BztWreN?Qdia8zzz#K@}(xj0+UL4ZYTp~bG|DSBIx!JCCo!pbuc>-AKt`2Z>T&i zrQQvqDnavV(7gLTT_mFxHkXyQ(kJL&{(fmY@LzfP{PU$AFT#>=M@0fOujKCWis&!c z{=y9%<7-^(F2jp3*Sk^B>+k}_o$-iVPWy0zq7>D#BP%^+@q2V;$BJGlVa~8(rGhGi zd$PW2&Sk0{sHwyMaja8|iD&$Wl}Z%q2~okPby}g)>9 z`>+Bf6=~zL5p`;I{BDGnf#ykBt6d0H@&%J4FeabD=9qV6GJgJ*A zu%BlKm9~MW{#Jl*ZtkNbU4>>|JK)BDjV$rWf=#&AUx<9n6O$1bQ)a}TvGlrzeKyd; z&#w0=uJkjS;?m!710NK1fgRy9wHj_2p6u?D^mqm-Furq;JEWXp`VeVk98n@vYw2N% z{81IiLmQ6=eQtPl;u+D%B(&AiG8PI>K_q)-d<)gzva(lL|e z|IM0`1D%&Buq)#dWR=b*7%?{~KKh>3<*R2W_zJ#th{4p(aOgK(i3D$&a1oyUv2izv zea2qb)jX_6A-91BK^i16Fi%XSK(3GCO}X7S`;+p(nc#iAznKc}VqxuoTQ|ZnLZrcL zTj{of4hmBD=yWi&d*canWB&B_?H5^^Z}d?`9qa=y9UKEguV{@arCN%Z0UT-271kCHOuG(|0+E>)0tJDDX= zFglGz#xZ;69_%AC)q~*f*(WP_T-?mRO3xWr%bMk8x8${py6M|SE-*h`6v@9&e7RSA zl0Qz4xV9^0_0^?c+Yv%0?knGvW$`woQg9v<_j}BDp_xfSR!1C8u)8#N?fUxk_}a+% z>3);VzUFUD^8OS3X;Sdkv*j3kh)qly1SP_Ut zE~Plwd_`3*fABM<>N^<>j-UTb|Ft&I>(S$y6)sh~_HAbVkJFEXgQ1L$syz6Z-7j?^ z<}0kuBP^uTUIL1;-VS5rRi)!?cKt?tDs3YFSw^nV5&g z*WONEd@JlPL#)72gaeKhF#Jap-=ovt0Yf~EBV|+Id%mA`UC(?OQ=)9}fBaUosxZOy z@7LE^f(Y+qT^~Eo6Yi)G6R(d(l3LF2h)ks*!GuGW=8toMEWTZa)=q zXLRv$V*vwgN{8+lbnC$kUa87i3g;DTIg?y;G6RmS7rK0@f)3eF0y}unQR0*rJfg`A zQ{^oGtm;P6FW@o-n{OhsJTA$^cLdN$>XmzweCE{xTgO7^=ptT+STwop%rhz(c=wiadw&0@TGTj zC`U^q10}`@IIYNoJz%q1T^jI+fbl(jxJlS`5yMf{BMh%?90cc6+VBi%CxWG1Owpp2 z*b!?A8y)3q*D(;SS(`EqE}To(R4`k$ElQNdBZ^c=R{)I8QWsPC0FvItHO^L+u9w1S zpOysc1G;^-F1$o9XC*l8Ro)NYtM+aQwyIiI+t`nEX4`%PtV>!10S|Y}9Zy}fQzXzR zu6ecl0_AKHQR z7f>v(1Q7*to^c~Xk!+|1b}IO$*An6ET{4qhK(WoHzF6YZF!UGv{(BK007GaTq6jJ> zyL^y|=9VO^tk-Ei*}Gz&8I5_}b1t9_?oM7)pH{QXE|4n2lO8 zDkBx%*RG$15v`>;{w8_B+t!2lp|;YcGq)J2Be29C`UvUl*Qnjw-5pe>*1c_oxYd+} zo=YFJ6%^6pB=^U9tXQTE-g3H)vdm#eoMzIv=)^%&3c)Qj&py!#)j_?~b#>(C@hyir z2i;4Uq*M{iv`$qXeTIwhU{gy{&|_jbQghF2EbB?abJJtYS|^gY6&I&A(9nh- zXD-|E6w~2%D@(NbYSDv$$LXYDaYf#z?DDq7nTM;qUW1a||H66*v;{Ya-MiZ2TYK>m z*A&9uzqc_niXHVJd6y)~<}(Mk`nd|}Lkzb2rb2V~amk&9mg%^=$Um!82Xs644+jGl zD_4pDf&@ai3PF-WPmEGcoLR9s(p;_j6H(@XCYG`<$M9cKt99GG!L(-TV6YGr^exp1`# zarR5v1z*&~xGk&;h2Ou0&JVyh_QN^da1Wsz{+@!qxHAj%Jn}x!Ba>$QJ}@+$mcqbD ziLg`-Ze2MlQG$Bfw@fNTFBhTTcw-iK=k?MyY}dv_mBN#ih;y}U@EyS5Y%}md%_N|? z4xwaVE;8Y!q2i03Ugi?fyR8O05p3a}*ymn;gd1QJpagYXu={=k!e z)G3}hv9&^l`V@`#qO8@#M6npDbLE@QNC|iBD!8mw7m2xt8gUvRkp+??`*(QMrtKuk z;z~@zfi3v-LKEMmPV%I+-|W&#JQlvCJ9L>LPwW)tWrV&C6kd>!zrc%F#U>kLUB9l| z0AT8LpnW$oONDCc_B2FHXGjns68JJAx&{jJQwD7_-SD}<+}(6T&f7RIM%?ju@cj_t zI1yVwhBHLlwjd*Rvth#|Sad>-Jp;z1SlV@ac{6x+v)t!1R=Wi&=ep3p-4`rTqf?~b zzs^`hgrM9ec)u|guqAzXs6cifR=)#^GRBmwjt`Uc!hz{L4v-T!n9HYvam zH0Q|;IroUkkcX7gkQ=6jYaYuI{i3fO_xQn+I}7Ejx=38DqUtS?}L6x|asUlNUh-41RdC<~Zy0@EXh<0K4b0X5IQS2f1j%{lQHoa;F3nCPE|%@%@Zr{i=h8EVCwcQ3SC;J_;WSg}vx?9GnPa z^v6P+Qez!Z2Ljkpz_f<;tA61U=vFJ5=7TZ>#M9al zuVz-By|VH*Rz%h%OC4y#j!pSh&y@;`uB&aR6}4Z*+cOXC%|5jG4Aevg@jV>;IcbyQ zk{#3PCM<>-H9?MpI`$kw#Smrk#WJ+-mgs?y3-IVID!bHcb}_Xc3kfsn^V*|f*y4x} zJ53J3-Bq&uk{T1-_6T}`Y{s9kvd1q6j|BkmG>+VU(C)MP54%4I6$g4G=gDv|=PM3@}Uou5ry;+%r zviFLy&-%Dj@WhZ=7X#_w6V9GdOHP!>9p736k034}Q>2)rZsgrL{IuHDz(u|Le-zz| zKa>6c2k`6Kt{n|C!<;V+BW8{{cWs7MXe1%kNJUggqEy#TPN}9Sp&Cha)LGrty*ZR9 zEjs8tQqkQ(I^60m-`{@!!5)w6b6xv<-k;a|`A*ae=is(*$Z`dAi;tNV))uhPeDCql zBSsr+VbH%_NkbCHoR2T(%&i#3UUE+GQ6vOvK+dnGtd5dJk*saFlkj9U%-MItA~ARp z?~Pzx_J{haoX)6mg(#sIC6tSZ?xy$-jow-NwLVO$XGwwg+q#qOT z!7dlgUQWe%iwF`FELITA9EqVCNHR$<6A_EGn3PEzEW{R~tiVa^t$6HbJq(VD>1fwa z0g_h!3tGW8t5#E2*IzoTy`+DCMaU2Gr6SUXC6^0!klS>Z56ICEeW$yN$S8vAZ95v` z*%2!`6O_4fS@Zh;>Vh6ktmmk(9*AU?E^#Pu9@x-cclJvCmi9S0?J=40ugkH+Ov?!Q zp*ZEq)$O=Cm1n(zv`T@`5aCye^ls!p3v-ZCCB$N4Pmfp>Viqkuik-Pcj1H7y3$y@5 zqdZ&zqA)Q{0~Jzru;OHI zsE81t!SFN~FRcR>hC;PK%~>B<1RibxiOJx!!*&~(1b+Bi{&n8!)iDaNIjjBB384RU zJN~qWbonGsn4TVWoE%X?Hfy+Sb?5Ss_HvS|*tBf#vf~v7Jr!wWT=pb%WvIg=YDe<@ zmK(isgcEnW$Fb`r*u(wpexgKQQFp{q?E4~Vgc7e3>D6mUSsGFkOs?0EZzyoN-sB@x z{HiPFNY44gBXer1=DMvk2Umgk!6}V&#DH(+X8j^scHsW25u2Z*a0~YqFw`?H~YcE@krYHG` znZMKfvX@-yp;zt?I!i1%KaFu}$A)Tm+~~O5@K;ye4s58X+jp`$EIV;L6C0R=^)kU_ zy#|-GUp*!2Z_toa()1z`yqH67)^Ql3xd>n!qL7FCIS&u2XstQ4n6W*x z)Tj5W)KX!?wji9{$p*0*4q$6j@;jGbElj&wSTXDFPcKRCQptY3k^Igp#n-Q_>ue9E zZhl>ycyvBi9)bzbLhG)%#OqMsNwSPfQ7iFDN`liuvI5EW5$5;~&H3vshOS?H{n?_2 zby!aZYjc&;P1ROEN$vGSaK+@enC59He-+g}20${D3 zm-5nkLjZ`&S^i-Lh+j8k0FVYb*n*;6207ptE{Fq&PenwWN;H4NOr1sBzjPRsFC?f}8CLc!b z_!@QAvBeAU+GTxr=8oL)^niXjLD`x!?Be%|$Z98{^J#-`$ojCwBRIeC4z~c{AP3cdldk2-p#oQWx*=U69Sc;jAnQ$N_H5+ zDe)PxuW#)(XEcp98BXO`V&39d{P6FI0FsHO=6g_$(2gg{*us{DznqwzFG zurOx>f+b*c#`cG<;HL3 zR?n_IoV^YE=I&m2y#zI&HAfoShZMBFynhvMU=>iTt^MOru+z$aS^w$#e_j6c(y#b& zNA3!|{#Zb*)}?xNUu)cIyx7$%4XUx-^cxA3=`C(zvZ=ciAcoH~wN0d?+u4s)O1!t_ zKl%DP!bV`d_JK=yMTs1b@?2el;VX7zC+NVLPN=dV8JBlH@QIFIM630-9ftB{SlzQ$xI*WF}-VSomB_fGSitl&tPko-d zy4+#3LXSP`_>3UDK0d1GAM@4Z4PIyB_aB4X7GF;7HaO6KF;q_m(1DHKXUnTiQd5M*M+TMfW_t&O(Z_P924jW+z+Qv zBXY!~-{cV~b#BC2CiQuGhwF92P!%9GUDewtr>69d5Lldk?BD)SE`CpNJOI_l<-K&T6gen4ck0IyJ=D88iIZlxrsS*DjTupQ^nhVDsqar9WjwL0smS8Aptw+kmO&OiLBBAom zl91-%6?Jj2k+?i=UM?>j+`rbYLhy+?b~m#mgVRYQ2yZ(R)GLv&28*%@u-a<33uaCZ z(q~U}EmKN1g;7^fOudPAkp@iB^;cUL69%Q;#hyKT9V;{;dmV#C!xc^kvxifi7pWMp zZ6NO+v#vg<=&bjVpt0KAVwxnXU{a>_qclbbp~TZ zX;ItWwVON2T4~`q0La%+6INqrt3eRmGK{ka)Ho}Fu08`l!&&5rO|>I>>~@5gO(hEW zOsuOy0xOM)uFuy~EY!q^N8cgA1CVx9+3k=>1=gm{LX57?z3jAUZ||a*B|&o$ESUpf z1V{=^0it(ms%dd5tR+K@-`DFwZaub0cT0v1ojl#9Kp<;|m=GbvJ58J>+E9-$UJr77 zXlDv;0OXhCF8`dj^~xd*;C2?=&7_crI`7+VQ86ZYN& z4Yrnr*$(_WXwd<;cq+M+HB_m&Hh=4AuJ~B~Q&|YH6K|`I@IcPWseDh!TO`px1k9dC zuQ4kpb`jY*V&Vgsx_PF3fOr+gI~Us-m1uc1dpnXdLzihnyKN%uh^};urL=<>*DH4i zp|T|uqG~isa+iE|jO+ZwF2-jJu1m?v1CJxP)8VIy!Ai)H_#$AoQs&*tHLgQ+am7Zw zZ*{8PWXeJ8@wo7Zr%vy;pmL4Y39X&hAdL=+Q}}f`!E&!}m*P1#>$mE~&WcjguL9j) z#}ixjM&FFu_hg0bD=n_}p?Y_o*M#-S|4KuD>zeETgi)?#Eu8#uv!!ZJX3^?E>D1^0 zT)-QbBB#tDTP?TMM2Ufexn*7qHPJ?i;+uqI1hO})>cz2%+%j*b9XVzk)CbP$g*A*& zO2=p9>{jv4?H?rgrM+^d;4C5d`5AhVu4}aQ3z3`x)ervy=AD$I03UGlA9=JiddlDG zS2wThq0y9lMb24V*_g1?^-^*Hf;F9S6-nTB%L$jz~Sj~7`8<&2AO7zWA5m)MR;A>*_7Bq8} z3UybKv0dt>I5jDAP-T`c7Lh($m-v}+akC50o9tPAtu!p3d{}eS`9tCJp6vK5_ZL1f z7QA%2z{^^%&DZ_jwNWhW7jhtb zlD$9A+)u}EJm*bZ7Iqm`aO4q{Y4pgNNz(`_Mt6+FMhUv%OK@u2!WFd-0I5NqLToW+ zM=dJk*+5^);)GZuM#pNDl}^i^&vqWIKy*{2qt`xRmKgdY`ZGQHVkljEPEc^pRH6Cg z)xP(|s$SW(Q!(Lf<3Z4w_l~Nd|LG<~KPtp|`14H+VjmoNNsgQD_Hi4K1@4XVbRNp+ z3ytaJ8vYvFE-Z3L9GHrP{`c3b>n>kP?~RA=h#VYW>9L@vOTxIIJQk?!JX@o{`Tn+N zbWuwNvvT#Ktx`v~26gLZQ{$TxRQ4uRHKVf~$5wC+mjjT&Hzk4NGmBt1`Ot^_MHZ{U zt=5#pQpI=;(flvJzlVbQZ`-ZwKZ{2j^>6=mkh*{GbUB&j<%oE>_sYBwOhFp$PqK60 zKZrLXy+7*V+ILxZsxh2AM^OP(6`-%=T2MkBk!M0o7+|UEuF|w4ek-mo<=}3jA;UrD z%n(JY+3kGV^G>yy3L`)6wRR&C+U*~9i{1%w^p%_^47ZG{SDk@H$1cJZ0dYc71= zEsntQJ5J3>UdTJmg++#%i)s3GQxINsoO9nhA8)*h5(5yS&P%vnL~5L z39jOVMn^&p7&I-`YX^+lrMzj7(u*Y^AeZ`=xlrt1i%(0%m~^#decg9x+O+$sQ%;p? zePhE~Z&F=Hxvf2#?1S-by|$E&TL;Td7=ur~ps{-LXw)mZb+%Qb{Z_14U*om)b@0Fm z?vtzBH9p=Y{Sgm_(6v6e^--rC=#jx&Bu{6#Y$(Q7d$r}KM9vXi*>n<`C&bSy!>?4} zGYhYbEV(n|QGMa_)Rk@NVT{BspK2evC`@M-S`#79M2wFxu%wc(M2OXCZ@#TKp1}fQbh{L5_}tl!R#T!fFhGdT{jy1^jCt zY1-7z)$RHg`sa+!-!t03r~mYvL@2UV zmeGpO>eXZo3o~2s><^vweJ-Etn75usZj~AO{JxcV;EoRygom-nxWv0do-jNkOGuP@ z5#8N~Wy>|PNB?jhMpA zR|js%uq=99<~t0<2p@?$&fKt*BP@suK%M{;CV+qnnP3LS#W8R>j<_|oC7H?+!wq=u zc>TzQ>R3&Q$8GEmxCeJGsQ)Z-+o;LptDn^x75;b77GHkePbOg)aL}q;iMZ{PrOTA! z@8gHX*HCkY*mFw0a*@=B;hdbTq_8>o6xzlTUdX%DE6;D_esQHg}X3CqCVl@w8Y|TC4)?%dULh zbEW?3(~)P9+hm4sy4r64jq5MiiK?ynQ0`X`=~th=!?%D}H{z zD23ADN|{EDIffJztREFywt|*a&{62+?T<;DEG=Zn_gE9Z0^$SJ?uJC%l3rPnx6Tm` zR9N@xX)d$(sx2ysCaB5&aPVypP%BS*7rpFl%@c}9!tlT+6ndyn`~APW&fkv1Fc4F)u=% zotMi+aLE!F;wRc|__#YH2jip>lh*Bvn=I4)PHU=|i9*+|XyYcawfXo_zO2Gh#Z%og z*97XYV^f2eq()t0Q6OoI+vOyZ?5>ammq|i~rNNV_++is~mD%mG6cj>&$wxO_aGB%v zIt9=j!7j)NF8Ph)O`>|OVDQJo=C$CHr+^QmBt|#7Z2TY@5RZI3N0|ohIri_3UN$B- zu}DwSo_t-i^<~hmC-y=rr&03pkK?BhbN}8WZ}T+QDNpxZm4@cXvJ|CTGM=``m(Sgo z-c!9OY3r5I*W%=!r*r$--fp<@#^J?uo0O-+6I5sVn|~Th5zb-(BDNG^=;PoO6R2Vcw^mc4hnEF&{18CK9VarkSqNr` zk7vd$>wOpZT%4PwEXi%b=e3k%vleHmEVE&G3JV9i;gUij$N}=rxwd>7w_q6aqz|xC zSW|leSE_gk6VD3cR&A|Wc?UfaC>~guF^~oN7rYGL`10My1v^$P+qvtBd54(Ck;kEw zISIPF%d+T=r+FpVnhIg;M`?Ij$+w4uA8$(Mp3a=V@9F;f3yvJPO-Y!o_^m6Duheq5(A2}NiW71lKWj_*&&jV#QTu+_2 zH@5VFMK<{E)W^V8FZXSIx%1K~-RHxTc4x?quRA0DfK=Jv{qJRw8_ zA_^I=^~u^{JaI^xqQUVMN7)EfDm$_ifGYljxYHy4)C2hcIl8o_mZG~Y+FzJY1JmQr zA))vtPux8}m_E@+y@y~5Q!+OzF;w8`?Vc#%Ou`U9A{+uu6FtmWEJIipdC@NXTEN%t2&-ZgHK{$#voJa8am=gk!+wICPB4DySSqfxg?~lLg2EKlLS<;;EvM`-G60YrpGxi~Ul!?vR-)W%7(L1p(+fdH zhZv0&y$qK+k;ln-z@wR)fP=^7fm3VuZmn@zKNWOy#fy?8YMF(?2J0%4I8Z)mAI0x^ zIyHIfbWH{JQy#P-P#<^CqLbu>`>HbM9qLiJbVXdjy-v&}Uec4(QDW3=-z+x@{-mt@klyQH@NPwRx zz6>?*Eb6~aB6j};mI3Reje2e6^(cunZbt;pEy{1eZKf7JQJF?c(jUr}Ov>E~)xP4QHYmPlABOAhkGOGh(sISI( zexXu1B^@|qSS@lJn=6BNZX?$1%i1lyqwktN+!PV*EH$7%C74yTryaqdIiF#*%)-UXqgMe#Fo#9B@_2mr(c zChiTD$ebtAc5g1!Rw?Zi)x2nzDxc9Xye+B<*PqkAh=4aJrZy49{HZRSzE&=D*%-A+ zj58nm&V`(POrNGw5e1TN#L=i$Ru#Q9$5Mh72QaAx5i1>z@~Jq#s1IL=zC${X6r!pe zc@`P9jX2uD?529oEvwf9o?qx;cbt-yRX1rC%*EopMk&;`O)M7PX4Au!rQCv%@=`G} zN}{hJyIw;(DMiRRs>weW-)StPx>r|4SiUKJwRLOZg))b?&!v0Be}CGmY^MxfSSa)V z;?sD}?EU1z^PxT_x0m|l#}dsg3)?}w!}Q24>mxgxW};oPeYsd7?DGXnm0$SYMf4fx z2n=jSM>_Yw8QN}EQM#~;5|}7b6E^SwN2t_#-xDk;;|Wr$*tjogm1v$*(rP0n6}4?v zs-VC)B8+j7^CuHYVe6FL<`**WPdU{;&(rhlhBov&9ne-rn1^R`ULT2x*o?^^3@6uY ztjpW~ZPTDQP(jA1(avMDyFL4S$^`1}ElZX&v$p-*%G@K^uBfK?OgBR(#oH&plv=O5 zrS}xG&^9QEJwNbdLxG*obGg}Q{J%82K9Anq&rWHceuu;RkP%N zEve)&=fye(@@0+pFa^yv;fPFs`RWJ+EYob_jd=?yb;Y zMoR<{on;w`oVDiZ<&JJ6rE{hS^EME}q)z70~}l8$R4alw=M%M1Ex#R5fCnO1pzRGAbg*hl)O<>Uk0vk zj94wgXT8KQe9yPI?-D}%42Sk1xCj0Qmv}et&2L6!Hs4gG+)9utRzS80TDBHVqppS_ zkAk!MVcAlP02shqcvox?c9}~vs{F`Y6iCq;>~7@(ROBZ6iqUS0Vg=t30K9&QEuQ3* zkadbGSEcf#`L|+eb!JGdgu0Ry)^uEC6}~bH3+jTDZb>r>1qV-ZD#Xk4-7dCG6f2>j z`-dMyJ$Xr=Ua)5UbcHvi5LwKtN%&<_ANi}jY{@_vvh=Rf?#9#pbx%@jy^~s``Ek5U zS0Acxb&9Y0%qHq6HsimEB|)P`hK1QY{fn(QTC2ih%{QT0l|RJ16Hsxc@aU`~9639} zh%^U9C`W2XS8;R0*$j!ERStqpM4D&UVrWII9vBwyG>un+2un`25T0|p;uMw6M=9x? zW1N*YK}?2DDg+^rqc+cNMOGA;nR}VK8w^kOI3E|5a4Ag#4w`+=SLP29t5sX8j<>Po z`!LN(6}UNnGPfLcI6kGie0@Qy%sZm-;>;;)&hhh4dn4DBKb*rFode#~vKLPg=LUDG z(jInM@rNHB^6xm+UwmB@HGBP~ zg05LtsXN&7Vn4f7tc$tL~F{6{~`zLZcwIWE-i>$zqr#pz8BbwcW(F3 z72%A^bXjxPn@WB6H&OG|O#wxi{av$v>f};mixJA|l&5b$mvK4U|K7vl+u!ITgx(SBCn`1$>0lyz)hk(l@pjVMJy;K!nC`=po?UXkBD(!SzpB!y zlKUc6>N7*cyyf$lIxp;kN@pYy%TVGtdiem|9>M6-kxzUTMYEKKs+J20WFNCH> zi06oT!zx7zU=S~{rlbHqR#*UC&z*M@;W}IZCOmYRM)QKs}=zh z;pX&V1h2z2qr@&_V(%!aei>krVxXU{0*b2|x`FEF3vj^lBF~+h=&u$Sz1lMO?hutf z)rc6j6KB~84ZFpb!JwtK!7Zrtk?87zl;nJEH&P<#0%-~MebM%M| zh2E;|{6XBfiioNJpA?=LwG&FrBDgNZPW}7_AYNtjT%D>|22jl{a0s?F)_|`j_#*({ zoIzf($p`=w0sTr9AVdM7%FYCNX@rQz4-!9^8`ny_Go;?NIto>yOZvHJL45xHh6YMA z6wNK59hq40rER1cwk_^?ZuKaY-a+FFUm2^!;a=2Jy&zoCEvS^8=7*sZga2NUC1`o} zC5L~C6;l4_;O%7DPgzU{5}hNq7D$XM5mOm&RtL{HALFd^Cy8Aity~KoSo$8rBuWH_ z3-HR9Fj3-?80MFOt>E(L)ylop1rRHu*@_ZVwU~E$b|;&uO~#o(MzJaz+k9C{qY=w!f_cC{E5+Ucjj z!gLgxNsLqXA~-5S)SyHm!T@+?GSL2{y~`xmOwg$#;MfcvO@uSA#IR8@J6mcnhVh#O z*_BfHt}Z)9XAo7wyoU8qsDr*?;9@EJ&UjF^n*CeqQU+w#mXgQR&QVzZD6FkfQkE#L zLD87naj%J6YB58HsIN%zNX;du?15El_jQK(n|xmi@l~K@;^BcmcH4i0+ue9`?_i%& zU(Bmbw@={Bt1$UhuJ9uDoa|ZtyGKQO(-u9fwi%QB!vu9E_Z?ywhlu6KBo@UOd$%OR z*sl6Y-UJ$MqeG*f0xmk*joGQw<6OGAHli*+ZD(*1&m$+yvO5LD^DfYnT7HDyTBdzPQVOjAgFn1puAwYtqtfVfK{H*$j+b zb{C7Go;}H zmIbavtBRxIgk@seC@HfTWD3JP9&|eCRK+rhbHjq8_6Ph#QkU*SE`gv8pJ#)r_LQmo ztCqh*cvji2ekkT#5ys=ekhvBE=Oes{8G~vcJN>cFIa-W){4BT7~)Vpr>K zn-82GL>UYoVNz{QN6hj;l5Uh2qGgPu24!MnmRLuu5+R9AC2CVCCRXpIX_^=;(6}44 zdS6{0t=4CzF6%D8CRv>NAS?BEe9Teg@0oa9E&AInnFM63P6NdU5q*fq7Is-*8{B_` zOHEAwUe)F%kN&ZB%$^w*^lhFv>cc#K7n|QTcU~t`!1LHyW7hiWL{;`Hh1AK!voda75z9<1r-aKJda`BT~ zHWOpA_K_VEV?HATNi0JB#{tp>6;6g42rykOj)E@5v>;^950nuz6VKBM+ zOtCehWDKLD?@`?+~m&P*Of^Q z^@qw)z2;{G8S2)bx54*u!JF`9#G8BE=JGbC;8_HZ`Vo&XT*@jHarz@Y1I1G?m{01( z-w;eX(jX*}=}|_@Rcg1oG-NJ0#rsR_gmKuHi3m@<#4jX~r;AVuYd(G8$<|e5GL|pL zR^_@!3vu_uLZLJB>1XniF%EuB)*Tk#@bZ`ZGci`Ug~gz4RF}REppJjbf+dNGsXxh` zHmKZ>dEnxK(T=Ed3wanSTt15PdA`dhb>r|koX=vSKvxi^ zRfPkaHtu&{vM2gkUPI1e_B-4%E8o-)=R_&UEt9*O06dO01Ln)IK}9|K8OU+ITHy~k zy&1sPrZzKGF)k^yH+G=v-wuf$~6gGf@f$FItxd759D}kn8`b(#I{8 z27jxtTcwyTw;gmOn)2T*fG-@5aekFjCLc$s!=TH_Nbo%mPK&)aY z!1p}ki@j(E*=3(aFs~=izVh4v##z|+_PkPJ`^X+8)y3gW{>V8VA z53xJPn8DtpmuEl+jMtzHL8tCubjzq*PlC7)`E6Q??wvlc2{&eDayjpR`J0S3eJZrt zMEdSNbZOH__&BD7m~+`k2-q*WMS89ity4c~=GL!N6+K3IHCOP0^S)jGZh3uTHX#`& z$o&#R+Vcz%_bnL`IUU0AQ_6v@$@peVYF<-;t;b!jCqe}hL^CFXVO{9nZ+J4YzXz4u)L zj(?l|@#RI7z%A|`WJsf%Zi(%GtC_XZ;IZ?e`=2&@+uJ^vusUh$9sO>_zb-2t>zBSh ztJk2}PIzFZdF-rX+oQxL*=Tu9C*6l<7~SP;#j~MfT!MM*YK%=KP%s;03&UoQ^K6u4 zcv{RXn-S}sAAtS=7{lUQ`XnBSQgcK_VEhaWL>@jpciwFLqz&cCz2kcc7xN>pIQlkb zCTA}Ew*%HmvHlMHqUZcS+E2|}ynY}yQBrb7m>)&=mUh`>RG5zj99q&wdcI55be8P=XamLl2r-l@jvB{CbIBqSV_z@BE@Shs}bB4-k6+&vYl(hL5q8ac6(P zz#rb(QFyxLb}A46i&aYf?>H5pm$PEx;{|u)%eiy@`bB8Ib#}q@tny6PEyy+mJClyk z=85r~Rv=}?Du9o{)0lj?aI_Q0XsPXPG<6>E;l?#^>#E??am!aZtJ%T>Xua&A+uhUYCgJA0KjxXAN>9a#GBslW3i>seqnxQtL|LB>!@>9L~ zNT43p5$=G+VlGMFCSEk^wu`3tNU~$@j`-CoF_!dV+vZp#wHpbh zR9STqwYB9t>(-HZ&Ih-V$*ulc4!LQLX(Lp(ai+-C^+bI0yjk<7wMR|_bOMsHj@}CJ z)|CO*e@#jd%yOJ8M_q%fyaTm$b~`Q@RG&ELg%j;2_fQ z2rCD~YEu(5jjLX|ysVHCHqDmG4~ zm0|ukOw8F$(|-0_-E!9H?5vCTo$BKtadVSRZL6jwBWv`wv!DN%rZsdXagFunDe>{c zCX47RX1R$jI8>e)v&cGmF6TpTq}7J($GA;zj{i$w{b0x;hd$!U+SD@h#Oi4N?Pj9> zC?~u=4qwEl#zh!)Q@NP7!rdWmrp*cg45rJ1BRtb%5UMA?(&~mrEjQ;(>&L#QU<~5w zqzkJ9!cC$L2LI%opwE8KMpb?UHfQtSWlDaH#nYQ=b>y^b1+FNCThS{m)e zaTd3gRdMlfo8*5$l%2SW?GJX${5ky_QgcVC>59y5o1=8Ws&ksojJ0W9&i;NB*Il8% z5}Ro)AI`pe*4BM$-tN;M6uAa&Wx^9n&yHUAEZO|db;k?KIh*WCZEv8=a(ACAE478c z%U2t_`&H%1oaWvCOGm`smeY6Zdp5roV7LpZ&{sv1#QpL(kK{F|lIuu1wB?sQI3p4`kO6m^w&=YL_u(VL>?YPe=(G10o?B+g6-lD$x|Riz!tM%c;d z*2q0rcG$wnFzZ!9tLV%%0b{Ri{$024e0beG;cvB0zkG26N69dVZbYr3MW z+ow5KT3<`ek(?+mA+GModLho;CAErY&JOU<&Gkiq9ZPs-X7|tbmyK7O{dKxs>hN-g z#SSRz{Ig40uA)mB^};ZxJDTz(t!mwgAMcA^30k=mAlNgww31_3YYmFu_@UYQt4c;G zL!kLmzV0GGh7d7^e{m20rql=yZv8AA4T|}hbE%D-2w-i?CJ4HSlOYn>LMwZ<&0Z+B z7#C5NDgpdxBG%>t91B2HGQ?6XS_FgcBZlO`o3@AUAZuL&T+)SK^A=X7QJ?0N25Dm} zEI$~~jwo*WL6_zcsk1OvbfYP53#4M z)DEam58-r~&EOt@=uq1zsvnn`d)+55`5kU-sY`?(tJ3j3k{P;!1v%1;%Kmed>^`DH zHaj+OEvMB@W&qdjmXL^iD8P$FJX~c&%CigfxWDHTbEEqNV-bTGzoQelWZ^>74j`~b z0_5&C6;moRp$IBKw~r-dreJC7I{+*L07=2hHrJw+c(2J5dQHIJ41}v+tHPQ*01UX} zASJ4OPtv|oV;SRS;)`6ONoATvAEnXzf2I(4Ea&tB&D`jdE8Uf>0b0=ur;a>U%!Y^r zJX4`o2;EGXR~eDzE3+$0=~weC{LTerU~GU9u~$x$NrP^ZMY0_w4GPmw@9Xl(x5H65 zja2cll-*LLw{$GcOeX=bJGuJo2R{fiTMD}9Zwd1BBv0-o!jCyt^ zJCo=18{W7;)M>1Q-#alINl~?6ye}$QfTYQ~r^7uW8Zl=^A}yYvZ@r7$Pi35KKb<4$ z8qMf1I99ylj7xGA70@&5=w_79Pw`A^b6>%{DiZh>G+lZSNyk!2 zF$lExYYx^$VNWjT#brg>mAk*TH~LFiV!%UQnf9K?6}89N9J(X3;@FX?=fm{vFE$)8 zI=%Eiz7CuHlD;byh8GR4E+HH5zf$eF)}v%XCZVtEM>+&6>?DOsyiAK znKBig^RiQl84amBErg4>3u;$^fZ-hRZ z5`F5{cl~9Lg-gq~?kvd9>Q>3mwD4SY$wg?^Ibjs|Pt51Bn9pn6K3OT+Cr^{6l+iv) zVjpZwX)#%6NB0p^qZ;Y_h(voeejRMMN=rzYZ>R$`DIitSN9u0IX9Lg_V1#efYt`Zh zV8h;S(iBWsBQl(V4c4G@ejHh~FYI;p@q2GqU%B%x@a5_``6sTPY=5nMO>XNFu5!Tk zBD8ipg1`|_8vfGGTExEVaPl+Zlk&eucm7Kj-CYIXR-vRHTElb~1B(%T8Vk31Mr0U| zQ0-5yIG2hq_(*!LlCkeRZgKg5O7z>?Xs1Ez;SpP#cP@%(V6T1yP59}p!Lv^>^a(aS z(vv3Vy^mn5#3wbkdXY+vW=t!Fo5L_6I?fMxUHlK%n_v zXKMQg>Htcmfpn^x^i9QWLZ}Z`Z?$r~V(((-GhjJUIc?uF&DI7s{e$g`z5L)E_JNV6 z=jrPI5)s@e6&DBKH*1+R7WiY56ovU4 z0XDrB(Ia?oIGmyTS;qDTRc_MJYzp@ zTli54u&sn9T!UxXdGz&O0Tb(-pGD$lrUF)IX9(}%qHI-`&a=ggP;^yWz|NqdvG#N~ z9Z=&VlzMy4VEIaHE3%W1U{@hno3;2lAS&N3W&ogX2I)Vv_&uV#A9SnqtZ+>jriJr* z+UvE~r@&Q){uU3cya!sEv#yHB*lhCj(__5rr5PQib6jDH!H6YB49pw@>pXrzt+79h zkQsu5PEd6t;nGpuDiyo{!Ig*%Em9|3l?E_qaMFn8%C$Vsw-oK9x7ZmJ0(B{0ecjXj zNRe_snURy71Yn4Wjfw>*%;9yUK z8aM6Ad0e3Ky~sBDUyH+{Uj2lOT${#uMMJ}!FE*2)|6vTdSk#mEIzU|=wGzdq(?#_Q zBQ{NaLg3{~m0Jt)te=&(6dFWlAcnzsPu)z~xf$!SRbO;20*SnQ9f7KwAe>t>!iIQSwcMgeWm3t_0#l!fFqw4*MFF{uuZAx9p77=RH5ZHMRVx z{Lwnu@iI5eD12e{8Gx+Rb;E*G?c&d02rrSu6Q38yX$>x&!CktSv;F z8!>^Vy$#zLw7`bxvAiFsg;R^mSJAzK+;@^2M*x2E3qeFiY4#FctOFMPE(w|Fmb#JVH((TzNMC8}3AGAm1Lb}7};o=YHu>nn1o7Tp)~5uiGP29L}T3JQJ@o0{KF z1s1Gl`?3?(R%Dd#W@lEkc!?ISGg;{69t$f2?(7F9$5wc0ZETf4F-nY$9gU-!&2-H2 z*$LLVX2-(1*D~_ZBK$IK_K+X$(W*uBK%*Ae0Ad^bP|;7O(rXJ`*Sw!*~2i6+JG2#i&if| zNww%cKR9)th}x>s{{BCT&O9!r{r}@;0l0k@zJ6S{59qWzT?fW~wKWF}%$DHN!d4FE7=TmGFWxuG1 z;9%REWHpwpzgI>LVGV1(grSCd_z|UXtv&?Fz@gQE(RB@5rGr#k0jcAavGJh%OZ;12 zvAg}@i7Nj~jx$a=+Ivj-iu#X13~AA}0caC|Y>;rgRZE|I{OO4Un$46i$1cs23YuHV zi^Pf7&Sd|$^kl$S(ua?vZTNb(B}+6LEiXvYwjDE^xI~?JBn4+B9~zDzEVXxTTbuop zhq?mJvleu{zj8P(CDSA7+T*}g6Z>U}r9Hm9Yg=vprXfj0`%pKwTTHK2(e`NIS{;R_ zV=?SyVG{Faqvl182Y*GvVS5>*A$FpQN=)#BPkfY zgYfBXV#X9ynKg4=mZORy){#VF{ML`eSzjSi2lbPNvkjjUFE*Ojm~ELJy`T|MKe32x zq-@btM<+TK-eE}r_6rYM*2X;HYir%+TIa{LKA+u10V$6SPp-+YOAe@8wJBw7N8Pmp ztuKc;SsExzN4uag3mg@D6QgyEREHtyZ;*KstLg-kh6%HS0p;tujJ1i>2_5nhBiE|7 zu@_y;)-`48Ry~c4-SWz&WcBQAjGT;D87n_zG@Z2eDlGmrBGQipcD#$vxB(HY;3h)N zi^p`Mbb`=*L&q$2x3staZN^{smuWb9zgZ4XvX4C6kphs5b%y6gIUXqd;#H)^c*;v2 zYSq0@6{}ibD5h=ExwQRTuOcto*3DwB8FArToF26vv0q^|%X!CF+uXo~7F$aF% zi%Wi&dAY?TfBpKgybVY4+<60!D6f35R@92BV9}ud|};O@g|NeI;FdC6aZ| zj6cdW;#04q=4JP|jZsJr4tb&sh+nAW-D4gtvK;n*{bf68cEw4a{r!L|vxn~^p?w_Z zETg07Be;SOaD?EF!)JiQL-YE+7#H!a#=_`M&Nx>`8po*#I?@U**~pCCP(#jAh0D-& z34JMVw;-N+V~sJPRz{5O%iFDT8{c6+v+>5Fyg z&!F~-Knkv5U&lyE@64c5ZfO@B+GQLhhI7P77-*PkWx{=C+*^h44t1?JWcFZ8G=)Pz z&2#xiH!u>AV&chw^%#`PVCXdoR&<-I(=a^}zF;MGD8o zEwf^OO^0ha)D5b!`lb@cwqKaJSIv@qgRl-?^1v-~8jHOU#vU)zY9}Hh9LSXP`IE zX|uP=)det6DxSq`#V2pBq$8i8)*m2OMPg%+Ao!10c1s@_!aNOCe;Hq>B1J()wkZS} zAKjL>>+x9F@!00bg=^!RDSLO$wf!>2KTADmn{!xQ@IaP!b5{0L{FTxio4JKQV^549 zJ9Fgre!ikp+|@+d#qnEPtj^r$a_6w`s)=;_kQ>zI;Jw8OSH86xI2T^>NkvrAZ^_s@^6zvI0{7N-AGJVb-5g07HWuA}}kSEzBf50LVsR+M2 z%>eZfsd=cT#ARh&LL1LTWNT=8on~a>#4n?A5J5fwLaPTa*)3=reHS4!ay_vh`<#JT zthhFC^n9U6>$avPb(niD&0(*N;rpfOv7ehlcdh^CWi;pGNSZ40XXk69`I8+^0;cDn zNeX?=U)9D2hDj6qN<)6iA1s_T-Lxy#ds0b77u4A^Tu)`6E44g5a(`g@LEMk*)Xemz zEo*D3sdZgfU^L*q+(d-4Z&YBCO8ayj$*JI*b3bbb?;b{e?6ulrmGWY1;@XOPE*7h+ zD(auCu?xVbz}2jVFpIUGD($j^yCP7l)ohU%W)%*KFkmx{U@Xn2VKTHzjKNOD@h_<) zb5xHX;&f8@glJi5na&;*Q`x4y$W3GS2BicoRdgB5>=2!&(psZrLMQ4Sh?}hCUrS6HKp8jSe8~oA}#`HOa#gV2ZWW<$D%T-Pn*--pUVgpKUS2O*` z&xRW=kayp5`MBkN1g{%8Uq*EZ$}Hcr#{T84y@yqeW*+O;@g;yG38Jyh5KKZrg_1OT zslceedYKXSE-j(f-R1D!&Dr(oM=}R9*S0oZvRId0o4sb;-v3-^e#uNNZnrgPN(&jV z(-E4xPe!iDy&@gez^*8+A=905B|!bWvo|@OVy(z{{p*-=zgZbql8lQQy25AVj{yiL=(GzqFfWQhB5N)TSZ2XWvNb4GIzZ>AyCo_zM9BxZ+%H=`;pFwo*Z( zfIZtO4>l$uy}isBLCPxFB1wb6HLD_*uU)a>$=O<)rC|qG4KIK6wniCQxZz-H-z9HS z#T%`_mW$+eITis*j119`610RDQ|{in+vp9ej*=V98vm*%nMjEymlJK3%=Wd!ghmQC zr{R(=B64M(f!X7FvTzIvf6#x!ZYr0`elg0nN|Diaie?%8z;j%qmJ40@RZycK@jsgN zdCDwfl{GuWS)sX@pqkDO$_c63m88Iit2rYmqZkOL?N-ubF1*~R&IsiN?y81Fx5weQI=^)Q-&%pVwaOP1GX40!o5uc z?xddnhx=6gURml?kzMzX3wG(a@5Qn^X_h*v3#o}t4A`732{)o3h2o~dOg4>@;(AjU zL%F5yZZVdo6Qdjp9AEiDPsg>NEXgU=O00ZkYiUU3R-Z78%9Wz6hF)va zUm5Pp?{&2PtO)f@HQY6#qx`Mi$$2r#LYYcCG=_EUvc6=;awYR0dy*Zk&(-37O3>P- z={Ld3kmpThoOY_37rS5DZa7#SM6jN515ir4a*QjmI@kw=zc zxsR}k-q!`&GW{tYS`~7%A2#+Dfi5E;e0S}>wQfEZ^b3?#j|(eSF$*5NxlCE`?CSel z85^&C=r8p$?dRLK*((eX4LjYDX@ZU(j^#BL|9^S9MO5zUgt85YyT8q&BtRfFs?bQR z{^FI#eDkK80Q@Oi#c{|uUB@|6WI?%02xr`yjc``B*fpjs!neurVeeZ>}bIY#b4|^#gP-MI%y>8*F<+OCAMNi;wpAIcjM_>PUR+sjjMTLLcGd zsJ~}F_?WQn+_VQYvDnYIVqvDW*yag-7eq5amIsD|G#y8oFK1b5&0~3dAU1!mf2$H_ ziZv9%ALJe{lE@|`^y0P!gqDN!^sh#PGz9$e_7~MdrGiTG<&4_EOPLKJmJgL-zEev& zQ`fll^bP4xuB@?7TO|mwd=wBKkuJpQ{DqOj^iI>z>6etWAOUEU zZQWzi&!hY^_8Z;I$*Q)y7pJ8&VwK+Mg`9$k!4L-Nzu$^B`oN&sHh9J*1O_21a#O`2&iL}(xCVz z!+2A<&p9s`3us0N4D;;M{B^|mx$j~BmU3c*o|!+VZ+Iwg&d8Mg-u@H4b_~e-{3n!T zZSwQbeEM|u@i#pU3BvPLaLo14P5X*J5P&kZjM5@ztlaHzM~=xK&ihRL_p)pF@vkKF z`HeANzobkBATUBfhEVX*c-{8^ha6zs?6r#0x7N6owH*}v*7-(xVNvw&g>kp>Glde8 zT};fzjUyfV@3R5OsheNM@f8E~3=GKH?V^XY)2&~O^R!{s$fWE>ye_`5 z4IhaU*_cl9N2PAJ-CDg;v#Pl^k5oVW1`TsAFMVaX_3@==rz+DnDgU*he7QMyGHhZK zfj-}%mvR)S332T~exup+qPCtpVfBZn)z!ZJo;=li6i4aB{V+4TkU>63Hfo*L$9o0P zghdXV>>nf#m#JovUOFTZ1*4x(vhx^5_g90)@|*I2wqcDA4g7xM!G=fGsyBmcdUL;osgbT#r4l%k6CO({YA`YP*C3)HQ>5r%n=S@`KL5mGCI|K?;Qj4{S z!BWN%WSv4wYn9Uiyys7Jub#U4X8Aghk%rMeH^(~QaHNLpAO=%L&8c3=fqL>>ZL*TI zv9vVfq1Sewo8gZ_tY+^__6?02zsP<5eT6yEo|VHS&8DJ$U>kusiv>XyIB}6C2}q?j zvWBs!{SYYC(vF)@JplT8C9odfQNg1X@MsmvEkjy-O%Ho^Dcfp<-O?c$6SLo`%8ffD zZ9?>mo>R1s^MjW^??+ncJ8BV1EDX{GDFvlz1vS)ypi1LDg|SI@$OEadkxCZ+Sti9` zR70**MfQH9(Lj2vo_Zt~X+~*jxiqHSlD+1C1o2qUA)B{Ir0lQ1{BF@hnSi;b>`fl-tAMhAFC0MMbnQCbdwB{3A zcN~As-0sDCkF&#WvBT=CTkEQ4`){Xa>hYfjZVLno)_?Dpv&qq))^7^F~8z zzUJ@U8Pe=m@Y5`MXaZE;1Q(1#^Kfbj56ZD1=jkc&JSdV!S%i_}RS>Yo{|JvdDC%K{qlO0cc$D^3`z+Tut?z!eGF{s=hYv>6>IRwBqRd z?IxMYIrrl>1`8j0RkAR^Qw5jrkMQKdewd-d@!~NmP(o2Smbuqg0j)|FuYQwfLD>^z zOP-kV+gqlGp0uilny1`S??#U1`4>}b#(6oq`s%Qs)zS2tk81YWfSQX5H6^p8^Ihn* zN@;Q#{Ar1ap0aG;etYHPBM1EorfGLOO6>#eYq{PIMkt)Mm=>rivR4L%=8_g)07G@4 zWv(C%_Lb@DO9~7mDagd`S>qRXnzdr6~qc;@x6By&hOi zU2Cd#`4|VvRFNJU(vM@TRwR3g*q>h?X=`@$os#`g$sQLU)72j%H60ssuZerlsVd1? zLO=3TMBgi>gYU@6bkYwC_YZZ2MZu70PHktrSn)yZ55)0=qm7@3z@D$o*-1u5@;@uEK{{|4pLuN@;@jK ze?)3x3eLVie$4FT;d2?BxQJb!0^6hq48CO2wY;q9B#GpaT@o0fS!A-mN7*!ftt{g}kHo?>yri=e(`vJ-98 zDHf;--kiG$BAgNon{n4OpIm|InHh8($kWo?uzg2yXgUwxDrVtRIP_x;JSUL#NJRvl z!H>q&A%HE#$U}O{zA@%wHTwgGZn|=|@W5G%gNOgtb3R%m;s-dF+@#%wIhQf|&hoY` zC}pGi+zQ>f=^x<$jI`%h&cyracIS4}25N_B1>P!H7XT%x$c;Q^uZY>pV>Y6U3{j}J zMR|N4$gkKVzwV(kMGQU2`qF4?Ir;#VsrPm_CszoXBg*}gMAnOLtYOdECrH0-$vi}r2qH;7mZnp5Ck=Qje-wFt4t>giP0U+90>=uX*>8XGHL34|+ zp`oM-tqn7t8YBWKB>E2G|4W3{YhZdVL{~!5qNQ{mO{#*JOZ*>*jK+1n=C^xC8xCKb zcGb3n-FfwDQO_*Ae99=k0M$1b{iEYt#u$6WG=*iZyA@?4LRojQuWnK&EPz=4iiRxS-% zwhIhYQ72C}If}u9nXr$#od``vBMUa&cQN~V$+m~PS2Wz?rj0kCs;LPi>&;tPyNW#6IX* z6uCl4UHOhmlmm+Qv$q^zpI?6LZ|thM)zIaS$HtlG;yOn@XQTh1tQM61tFsR@&mH)9 z7nr)!iD^5hf9wp%)iP|J&K4hOd9GcPeI!arEzKpZ5}8_VH#nf8TvXA!FOou))MF}x zH3-$snbj&{Bw^sfrha<_GS_cTSO7;?(e@&=**aMPK#kJn73fd;ZnXE9YO)3tp=b<*hdfR5A`fx1}SOOl}j1bO5f$nY&?UAd7?Y` zLNWXqP@L~h-wy37%s!gS9>U1xYHGFqPH_dhc>Czq505QAJ>DF7%;NUYxaTgrIep`q zoXg^KzuI>dDOYV1Q>WWfwg8md1~=}k)gJoq$&ov4s=sRHsbEG0JR9RrWn3VIqYU9~ z`XM#tpxEH|T!Twu(h?>0fyy9_7k+#@tyxPp69J7`V4rd8KmTs-l+jA{zV-iNMJYwI z)D{N_J=YcG^B(1yl;4CE!$wiVk5;YN5kDNgCO*8RGC)Ippr%gO&LMHRURuSmJnDRv zZ-Ex_2s-1K=uS~}uUQxGm;_OCf#Q51J=rEYH$m7I$`gUqZ+D(3KRm(9UuTZ)`0Im( z?TE#{oS3MOmuz3g(VtZ1wiT*gy7yD3A3nDQc(p=I6%UeA2FZt>pWBu7>h`B|{1wzK zstTeO*?_-3q^8Hpug8m*pY^77Ctq<8kx*5CZM*x7?Z4L&C02huIaW&w1g@mKfMd|2 zh3dEaQ{6*Ng(r4v0vRuahlX`qfAhT<6uSK=bhzb@F7gr;?gReW_0-yrm-AaI(3puoRXhCp&GFXVOmGS(83zk;u?>x}{du=_d$!fA_7VD{))wh1q z=g6ernew)d>Obj+KdL_Nw4@2n?AoHayNyTP2E2;oXcKtUf$QT#FU~dmH%{;;w%d~} z@fnMT8ES;7|D7JL0u#QK?c_mr`s;h>ak2Z|D#k)lx z&wLQsg)4zj6Jc0HjnO9iDZY$kga&EKYq}(`Gvefy=Qr(!7vg#?FSrJ>>q(nI}s+e?ZG=%Ewqo+a()vj;*1?ZuDMnMEtZx2bI}cupw!~@z;h6pi%hz;Dj!OGiPFAkj)!pzVe;1XyYlr8S?zE{sHTEJ-GDfin0wc zY~jAz7ajE3sam8RNtL@Z+(WIhBI{H4evfn~DPzODLJ{G}A89fMRwr?f0M1IvFo$#Y zdtxGw`|jm`Q8h{?UZ?dhKH>k=O+p*<(L0f?@^O$FkP1L`P91I6lb3oUT?dxLDoZ#- z2Nv+k;ew}QfE2e9(rWyK^aAN;nnwc{{RcdQDhy8=iL4E#Re01v4`b(*Ii3nY%j5^9 zez>`p{P|tp?j@8^*$EbFs$)ux>$@}?X_ik9M+cwmU2wU*P^2PT2*Tz3? z^b@OvFK8&jkIL&d5l$-7+~OhwNy?xPDv5o@nhl#z5Yoq5K4>w?ao%X|dE9Z&H#lN` zgOjG5Y$rnSlDIN&#=!t1t-eCItzT6gmhJQmFt%`$=uIEmEb#1)bFwD8SKpq^G4)n7 z8^yL7UNI7)Ue~IG4vi$E$b>cjZH`=hZ2OPmwMXWr$oI#ru2(poPB2>F`O#=<@Xn3N zi8P*u*0ZNJt?*R3=Zd&HyFNTl_ZfQdEhB4gcEjh;gZjevC4tFppD(}TQ!?cYrl3tz z#*dV&y)Vq}M`ae)>jatM3oJ;c+mM}nQFCk|F{T@CG6MVGL9>XxvIeOuSKF<{TG>T`NhJ5(GA`Hp3tPo1*QCe9TO&T_&4s+BvfD||Jv}qd* zA+`$GQuaoWA_0uwE^a7RryT$`R|h3e1lee7d_j002jtjR&E`5}Z-r(PHr zWlxx`#5Q%jYQ8WfYct{4u~hvYgwQ2R3gu`HRuZONPciAo7tMHy(|A1a!ocElPagr4 z>XvmEdk>yVcxiMwC3If*InxD*lc(9B3Yt|1{=Q8=J()>hERS~?AJ$V$dq-Jx41Xma zZX#K0LOs1xm&~}i(9%%7CNfSk8n%Du+(|I0u&a@yaK=a8C^Bf>ui7fC7v z3)5QW&LR69!)N{&PT4fQx?1@|Bw?yh_E)JKZPgu~3d4fQQgJm+MDL<0{;Dy**ALHg z;6shQ7$aC42{}oO68fB&9j4)1`izcuzJ`9Q(ver|<>81k0}*xEEjkKyXS%@v0vN~Y zD^ED!Ny{a8{P>5{tg^3QvIMZ+P1>Q>g-dwW8=YV0a z!`SRPOBw`@!T;VZUL>u^DN4JcN$-4psLXGD7Hgf`3Gey8V*GwPlwr3KlUecj zv4gk5{2RP3Mvpfd9&;n3zADM0@53A8cMCa9dc|BKY3U~FGCrMl!ZnEzduaH^mRR~J zhJeUl)=6LyE;UP98uG0GhnJb4DN~_3(uLvjvV6aqWyLrV_GF9}C7QC50gBTt%;qo- zksKT@dt}ICkjC0I-uC9h{iXgdRJCV>00;!&B)@7L=v?X<=j9Mi3vZ4V@)?JFa)?we zD=6wK%fA@5exWZ7c-F|?s_wCInDb;{_9M=YfG>7tXVYC~UN+o=oXVKC0MN2toTU5( z5*bDaL4YM^rj|Vf6E*``IDBH0&p{d&D?9!LU^4kqE+V(d$h{K&`l!hxMXC$km69!% zS^tkKi>-pFkwTf#7JEkW7hThy-6I0eU=4k^NqYCV7d_mQ z{+?y^3D`OBStDn=0@j-v?hJKH^xk)T_kOiCrRFF0$_oVz3A3Dq;K;8>HzmwmykuA0 zvzO2JcW|6e!-yQG2{`iG0 z(a64YOTu*fW538cr8tb^Y9Vf-q^+5dFB*Wk3VG~UDOZEvbG&ATUNg@P;pR(hc-M#) z)x(W*3-(>J7t7{8>#>%>FAT0@9$uz)0 zFQerHS5I8w=pl0@FjFBpu@(o~h{i9#5YpzTNS`tkE1%)Ck6?gKCJ2CoC**M^a?&{y z1arxfn6GK8K6s9@piyontTek9e6+*sq!867P45h?HA)l?S;Nx;HE&ek_>W07sLy z2)87szr+fW1=HlG`10rHByNKxZN>64NxU*r2ysTG;s;VI z%!r97dnB8{XY`k>b|iqU2ILcfrMASAObVgxl?GU4e=yx}b{fnB7N-F>yOxqQkGEYr zSr~fNViPb!D{<1woZJ8^3P1#F5|uciGJt87aEuuW8Wdu;N17WrK}_FQ5jjDGNkB>&)DT|x70J=3cWR`f{zYnhiw7S*|l^4b^My|kx~ zES|b}YVpc4+%iKd%!7hQ$X|YvbH++mj+Ly_Q8v@4#P?-&PC_op)eUkL%0o2d5QSWz z#=YD~^OPn+K$12r3F$A1Meh|B5D|FX5FrVKr00|8KaJ5k^Q^B{Ej8WLaK_Obo;Ydy zRKpstF~|h~Av7yej`iaZ0s#H+^BU4zn#6!6pREQtHe@~`ap96(#2TK6xJ?r2T!58V zZADmmG%{ihc^K{{jR2tT*#vQB#4sSK8m~{(yY~Z0!{j*{+_*0O?<9$lP-3c#C+?I= z>$!_6dX83v9IsgxI6Q2d|1{QFuq&oxTA}f~eb>24$yD_eo>)RZcV{uZ~gSH>+ ziTo%?OIeu*ej<)Yg`^x^>DB+pW-tC&FQTIT;JgtS5kVM@EvPj0xOn406?`WD$5uQ&RRwNitC4C4^jarD>hO)(F7PV}I;NQ%k?E_)cU)jv zmk}{80l;}UDP-)XAY<0;ZOWNC*qfVd5`SX8 z8~M`xG9VYX#O2vyU;BCrA%t|AbXhM2xC$65hr?e!iBVE8^%-N)8)I7Z)V27k;o~)A;&#TJ~j0pJekefb8w!8ekNe5P3bDjyS$jl(Ab%S|pU zHI0A1JPsfEk(y~uKvJ?1^mobxQu|0!`WIxQ0X7_BfNj9!PcTw1JH8e~+9X^q)Ze9< z<5q%-X%};`NUh-|qm@hr%mm8JDsd7I@aHjLr?=^#0Z+`i%^+_~FbFXuOKUzrIglpo(k2UFMQLb^|6t#S1lHz@ss9^Zw3Pol}_}7{K?b^V|bu zzGDf7!uLr7jP9GwoBs8o$5uTk1Z`3AvbP_Su2&^t(>-1pd1nUHbOn%HSFb536Pf2&CCf8Lb6hAnZadd$$Tm|tNMuiAiou4oY()9Fwf=Lw z6kz46nzx}DWltC?>GwoCXF}gNrea@zQSAKX>c9_MqcpqNFIU4h&lEy4Xt24+VCH}8 zsuoEN+weJvoIzXAs*su~+xATkD3tD-UFL$j*3} z)9Pjq$21(iGBodQiBQ#J-tp(ZHqliLAIy=bv*JUh_G~)cy6nWl=;P$v_LAoKYu^5q zDZBr;+w$!UP4V!AwCt*ddED>QAZ{g9DM2<z=vcb1LUYIbe59@&87S#m}0Y2zfXBhk3!j zZ`X4ZzJE)m+Fdi?9d5;|4aSR!n^FnGsB4Vd{V(_25X zDKrX$P4jHSR8kp}jiN%4{gh0x^~1=hi0OgTjoq=QDa@*{!7oJy2XCz*ov*pb|Xrz%&u$P(mrROG_J97n2 zyCs3cndYA9MH3=vXc^5gQm8;jeKNF$7ECd3A01tHOi;G$+5x|hB6YziBZMM4?1J(J z>%f=6B?eqx67UJl|I7fro!XF22~0I=3fa1X-^*l}U7EwyYb0y53s^HeGv+k~lNWG; zu=xNc2dC~eugw2DmBd0EJ<})P<3q-=;}>q)4MoA&Lb@Oqi$R>U7^{+IL`Cyn=Z(5P zdM1j8AWV+|3!ansbPhpoaa!z*guBnh?CDD$Vm@iUev7(m98y^KD28bjrtq?v!*9%( zbJ|qBfZgGO`WP2e|t8x`p_f!V_!aDYxi@HSXGr3!bxUmY9a zUafu|<^CYy)gV1|E^#*T#u=BQ_tCXF-zSSgOm=E#d~c6)Mzu0@C;*OdJlY1_T>Q4< zSM z22r8=^Z+pk9{LHC`6@>=(7l!s!t(im<_*K%+w-$dW`!`IlR029rP^Y&x)k z;n$&)-f_De2;MR&U{+D@l+YadSTfzXoR;scaN&VQOAhiWd-h4@Ug+mYxlbHXFW3rm zVC+?dkS%$asRGnt0UaaF&>yEb0kBsFpTtxv&vj)jK?*dSmQkv$UdO(Vbpcse`J5yG zGHnw$0yN^`k$pz|y15Bb&JNXm^QvQ=Zy#cms?P*{ zs>R~2{9RgO;s6o4k-6=Dxv)*&i`I%r#0!PCI3VQL2RSQQ2^HhH-6m1|dHjkLlcT&) zCrV{|W|0Gf;z@RRi7PUh5KJNhOCsIQa7;|$^8J{1(lAJ+ z+MV)dl)l$3)cunlF!#W-;mo@kBUl+XKtX-1vnP@6(QHWod#FQp8WP<(@0?fiSAoMx z?UTc#bk?D%nrUMJk70?&JPqH47jttPLMIZBwqKPyL*EeMwRDjX`rq8rhqX?$mRF{(b?+=fU#yH7#Y=GEYZ9>1+8W_h&J_ zOqVc-{=cEO4)kcT-`+aXH^V6<(nWO6M9CHWw<7b{+@hB?mHj>Lt>93Q+Kp|ZW>ZLB z4ATrl*i9@A^2vpT#4PrTWfQ49bz$~fzGWaza$bf@t)pIXXgpxH-8p<9^kX7cbXzFG zagzp=9Yg?315BP2+ls;TX@5cKZ!Qi5e~z#D?@_GTFMEoc^8uuzYw?KW-tz&O?e(dn z;p=^7b1HJGck8{weXBpuK0W-gWvjSn;}hl0I!g!SW^=5K$*U5@^ymda-|w%QhhD45 zE0yhOS?uJTOF;I5&0F=o+-eLq!~xQ*d=FDf3S-5X)UsI<=Dxv<`h^7<{`gm5&-hH= zN4SJq`+Izcz7v|hR0PT$GW}D>ykrJ3^75g3@jPNC)EL5&VutX+i$Jf$vMY;3F+#{| z^;ii#@DFIf;$%A?pl~|@POM0>hup<+&loZ|thJS~ezao#)x`gf0t@%}-dztg>4$!L>7 zEYd4y1eJuk=6ZMuypRp8nP- z%fP)V>s6;3MC$DyH!S$+O{{#&lE!}g_D%G)>q)!5#Id16Z2kOd@QK|KQB4SsB|v{@ z-}Sr(uS%_*>{%9bKSxGyJN#s1&A{;3-ejEa_b+J*B``{=VlgGYf4zJD*$Tl}s(DE+ z#M%;E@{4uN5LX6IEjtBvzHBe<_vmvY#n96K6f^R~@vJ6wZ)(iCm%{lKOR&&x8fnnIne-78jOQK7vr;*aC_Z1-7|>Lr&^Zk zq+|gO8j8bV3;n!Co(eSKMZ%`~V#-u(J=MM1fg@GOO;8mS9-|_};?XgygFQB}@dJ<3 z`MZYs?vol;0?eqh0eBjc8~*G~!k&vaBPaM2EZCp6B1S0j6Uls42YfmKQpfU|h4qg; z5A%HmSVir8!}vE{wKql{(c?9XRUz+wE*X=&nEOn}dMRUnLOCCFtVi(svxy%9tt~G0 z^etXmZC$!>mAMH~Yz!#<+6ySfQLA$CziTwqjNN9?%KSP?y#QwnK9v>NF&Y4~)i=m( zxT&gUC<2-xlG!UHWE{9F26oi_nd@ZNG?wzJv&(gy<1|SyS|MZoV52o7H(lG7gh%;e zB$6gWE({~#Q@|w976ej!HB1^#W+P8yql5HbUD7X_cL7I6mg)4g(& zQLD_*7a4WB6V<6T-{XQ;u||WiG9WGFXN4@ocFtr-eNAF_;pf(?`{_3jESh`i6{z>@8}CDSW=UbWn>lBJYw2R2q=@VflTFN-{l6A zm z%lcV0LXa1DdrbzgK5NT5yewLBD{#YTT}Bk?0zTu~W0Sg-b!tss24Gqwy(NZc05@zE z;6JMX*2Ik;60FIS5o}hToO}=euS-Q2F7ZKBs;b@Y{iS2f`^ zek+2vFL+b5;AZ^0*m+W)1o)lvD{rXL=M%bQne+Uyp8P%U7Efh4&kJxw9)&pMJEPw! zdnPNl9CiMRH#7S&1M6iDdD!ZHXX++OenZ+o$<0*(5_I?$8ZV(_V|m#(DX*8VtqY!6 z*KMH(&Br9PbPO|N?07wzJE+;dkfnH`F~$=ddqLwt%`fMbVqhHm|ch6WV1Z;M@4}jfeRcO`spDn=jo-kgwE7gE61+Z4+QmZ~niZmsvN2 zSZf6*5z_PLtsBlo3SKuU+Hd}bRBsG3EK7Jt4-j5S=$X?Gzq_Z4f2(4rt}6HfANg^- zAk}%>OPSM!cT1Ol32`a~E=^F9{+?XqM z(wjPCkeyOZDD-k)%y3Fy3IBRMyiJq}6B-H{24i;|d3<{v|7Iv?tm8Wn zq&#J?G5Uz=gr0o1dY`?BjEBE zYnuH%9Gve{pS??ecI@+yYKO(8t8uLL0OV05byG>dmUp)vhCDQ3-#&K}%u%-P?vJvS zr7lM(Q*|pTZs5v5iP0GTtCVk#N>j^0JB5V(dguBvDou{9x&TbA&=5308X(~gYeIuD z8}4)8*AU7Zpyqe^cJ|C^=`j}|`D6P!s!&Lcn|Ozc`vfuKI77odAo>p6bnrFhXvkzb zR<`hJ&COR9Z(^eiLl(ayw*dD7dIF#E<}~z-)z)uaHr{t)#z)HM8(-YV=h0KNId&M+ zY62g4v4*L5_4lRm&z4mNL8WY+#`L46wgWH^k2Y}Y;-%j*k+udDE)eUqUL`|gbzIrzaMBI1wyK%I$XMSf$048M@m66rhltB{SQj8fC zVXQI!VR6r_HrQVd0N=vVVbD)3LlF!{Ft#vwR(sFQrs4{0w;J?)GD=$%^-O@J zJV?8^9N+>*f%4bCrq$2rJvr%A*j^qJwxDa7ZAeW&nUD&Ix_zIieRt*b{^0qRyCMd8 z$jQ_5UZ3g?f_glqcHg>e2D_|Z=P-AZlND+E(KDYn0hW*k*>?in67chZ>2lsvfu_dBi zcSg0Onbgf1%k#elf=Xj}8fYjMD*lx1RdPIV#`Fk7s65IL=zE#`zY0?7h&7HPz z;92Vmtoj0w6jt^oa4}5idb%#K{2dlz?_pb+!7qOPL(j-5a)K?u(D_Ww-qd zi41$z0NVG5oKdZA%71c+_tAE!#suLNnPa0CVWSrLrl#}l)@#B`5eY|nzuWx!ap7zG z5lpgSpGVMv5q1Bx+siY`8_gH6>N6cp2=ZaL< zrJhTkbkG{P-TUM7p{b&siIDwF@Xk6A6-ZOf5Z#0`w8ZY0wMcz#`fB6(Z0 zYvP*e5zqbH!3Yyp8f7K>c2z?9f2N0*-yC*>D?A3KL(Frscq;3>(V;gLy#u4>`Kn|4 zvr9qp%c6VkplfeZt~p8HkSn(a$eHs<8y$H8SE(!>nh83K8c{`#knBM}^>{M*JgNx; zZZ17@1B6trhT~mzix4Ol@E_*Z2|R_^ ztt*H$wZx1=BP2C~o?8->nmsHNm~~J0-+M~~#AU7w%Niu*K!l0lVSr!Vx|Dh9j2t<% zR3sP1JtY>&m@}kr*vg_qSR#i20OBu7WjaN6(WSK7|K#b?1p?#`6zeH_`ll zO<)srW7;5c-zq>~DsF%AGp_W*`#qJq?sc>jT88LRzm(im{_1HI*WKh?_1j31T-cCV zRJy6ZT|?t&!{v0WZKrkq5I*ls0H*KbCp*(VkCwZsg&CRt-NQJ-g~N3)0aFbZ*r$*R zQf2Ndg`}z8+V#Enjqu=s2XbeS+p~0BDnpevjORSe*~_@R$ha%NdMj_IoEu1v+s3oOtfFf+gFm-c zUiDV)d2#Oi6^FbEgKCY@T+&?q-d1-d^|({^&(%Vmu*&A~z{;u=d|-1~2IgY2zgW|` zkG|q>4GtNR~|r-r6# zE0(uLjll-dhbB1kmLZM}1LHDr{uYgcFI=C;+FYrbp27k?qN*Aue!^jhm4h{sxqUFbBT zp7{z`iF(qpwo3{MHQG;Y>Zybg%$^bHMfd^0XiTu^;dAd^z;eEi>Eay#ltF3OvJ)IworIKlogb{v z+Sd6c09#Maj@bhg(xu zb4D*0+H%Kg**|kS5OtY)4sB%4=QJW)+B42{`A4s|t9$D8gwo9^1!m8q!>yBjYqrKy zLltYTUCu2{H(d0Uj#Xvv{%Z`BZ8W@PAfPWO-|IF%^R`MMC~-+E?C7b_eYdRbMQr%W zm&wLnZ*^W+ky6v-s5I#*5`tgF-#_eixgW9?Lf>Ss@~!`l0vAV1GoRa)JMU|>`64!G z_6NKjkN7OWNoa_Gwk3vSASrZE8kxpn1Emz|p){SQ;s{D~eDOqew96Uwk+LXTZUDC4 z6y1i4d?M!53c&B|PE9i2XX^xOsPjX(PGt-RSo-@aU- zjqd0%p5gZ<<|EC<41Dl52E)ns3Z7}6seIACq_y=TXOF4^JrUsX_nm)#yvULIS;PDV z^erP2Yp_`T#$NuFVV_RJ`aS7j z>O3NZP`DDX>?8wcEaqan*>LIH1A9&_@Uj9EGlkS`%}%v!w5Et>AYCUf#dD!vt{^eG zn5h${2C;$#n6|UByqnZp&5s4{ZczSHr&2l(s}pNy;AUU^_WbX6cJ}%te7{vDF0svy z`Jb`3joUtHRH;QVzpJ5qUCcKyn>C_W*?rg$BCOXu+)J2HL;uE_L-HpSo-V@7Qb0~I z-&PB53mXlI^$=Mx9sS_nbW#;9(_A`N`DR0;ZTJZ7U$JKuB!?5KSa)==Stu`&y9Qmr z!`lN;sY0I7v;*YjNQIvlWGE=J0Pn*>pt_S$gAGR6Wi#-+O$geUp2OVWq0*}O@DDy& z-eWwt`l2HIJS*2pNR`TcSim-%ZqTv_J!uxq1v!$+TCJq8C6)*rXl~bGVhGYEI$Pjr z{cpFj z0Cp$U%oVo9WfKK6f%;z_<|X^_NWKzqu@20XuVa5!{j>7V&Rs<7ys>jP9GKsdUEd0s z3Ta1W^8x~OD(*xfvTIPMZ!GxNaz0;wF>KVh^WnN94-+T_En#+W-@>|kt?jkgk(PE0 z_c-*k_s=A#s*n+U$G_>6Fd_WBaO{NrI2UfhE`$F=XV? zBTvVPlnC8ILh&RWFceuahcr~?g>cOnR^z8rZ0^2JeP=_^Vn| zwYl7;kcRlec|Oe;WT^z?sK)eMdIsFJ4l)SPdY91M7=(%bH|fcPd$4-!%xZZTcU$X8 zK-y%JZb7Z>--G5q&H9!X`K1??0|VcNVAX}X;k9`_yAOCr^o z``hB>0gw)akZi9xw7RO0KI$W(n(Z%)i#>lHt^8K_+2(OW#VJ9B$~QrloNR|fTyHRr zQIQ_I+v|wy@{Zo|yih0JOu>oRmrBpKCI(+zJ${@6Q5q!MA-g)mx1DsSy{6lB6_X6IrXosd~G%O7U*Nk_Y|Ak7+`fmDZ&>6QV=j?x-l8#SrKBd1cYUa zA?>5WI#bXa4=boTzCIz)@1(4yCrhe0ed@D9J9V@CS!{$x?T5I_M%_GUQ}ow7)o;x8 zqFd**_NQtu+qbkuPX}3N0%7w%{?%K#clyxz zv+`YZ99Gu)0DW~x_Oz^z763L}Q&bpi98)lV@rU#F)-b_c$WRpuZ(x^&Y8Gt_*yO=< zmrXS_$T|RmdF5J-4WsqQXO+ztxqoTXr9t3~Q~ zb&{VyIdQeYAU8Srm{OI&bK1Oz z6Fc)|F-_OtyfIcy}tB-clq^+ z%T`xza;1m*5oPoyWp*X08`(pu|Mcr)Pk^1N3qQHS;Mt|w&F5E@?|>9u6hQWMDVuYi z;dpQz0B+$q)141h(N!Eam`!o?@&#JeqY245P3QlF4ba!v{F$sJH>Rx?IZ zHr`Yk<0;?a>9u<+k1!N^gGq^^&mOE9b)k~`=5&p|r0gZs1Yc?H32R`J7IRVdT+|(* z_ZK#N;3THnWoSLN6g>h)O024YqsdIXl_7M}i?G4`Nmp7)$m)f0CV*hj=BxH)2tIUY4#X@ z_d;`{(V4!t88u$gZTdUFS+p4am4|9dFPSyDv@_ z8f@xb#(s|V(rb>_8|5ia2J6ksDeUn;=+F(EX$A#GFPL3lJZN63;b$*FU=2cP|A4?= zgaYbuLJ>ZIeGnZsG&uo6BOw6*)SnCA>5FyYQJsQK{}RDBC9)s&&}b0em?}j~BBDg? z0TMhGg#R|`vq{C*F$gyGwinf;1Wc5zI_~6E5bZ0XC7433N5zPw3yYLiX^voETh!1< zDyVJh$h4Mh=KVlnL*1Qg&_^goJ>kB1kn+e&^P=r}^U-wk@RI+Y;4Yuf0FpG_?+x!- zA|W$?1{=HO$$RdPm{WCMrWR!&D+LPwEa=UD^}PC^AfB#vC*J0joJ3&MU#8zYV9@#t zO0O<=Z7M-9u`jXSNve={ssDGK0O3eN zqNs>iExaL@@s&#aHFl6WjM@+>!s{KX^l=(g&<%%mBHPKio&*KKl}o_86udY zx3OTHq+7IGHkbenCSfztDdxCuyCD&D&r1M0h6{hAkJzLV)Io$gA)(@br0!r+gH#Dt zkJH=!1W6R?4|bpogt!eN+T$Tf3ejfPJN*ZMh3X7-wmMG?PLFS-s8?{)DN%RT-)8n+lTbyHTN{+^==GM8)j0wp}Etb$*xZzO+Wo zP~ZeO{Ca`$RUoUJ-p%L==D+qY`g`l+{6K8wTMx6<5~llmx<4J~9|0 zHCySfoy$5enqvV3$6=4YX|rWMNRtb)O;}KVqq#vP9Q7dpEBL0?)NL@uoT7$2dM~5% zt&Z{9FUQymN|N^;fW9GsPT5j^^4b{$GOLA;nf#az*%3{O;4wH0wWihm_@?3_Th1C8`gcd?qx`9OOUU zexb#p&dwrnnnZ{uR8fhP71OhjH>-1CIvc9zaez>d;^ta33H4o8aQ*a-H6`W4jk~Hvk9ZdQWG}-7+oYh{QcH_8zm%Rc2!+KcX=g=& zSUTN` zuZxGT98lwtY@{guW)d0{BY20vN=vy6dV_sc(%i@h#W6CQk|_KDEbh^mMM%^donLr0aTDOD2n+ zw)!brpGzqZUs5?(t#`x$VfHA*$d7+{+j2(l#0?hrOHWe}7*A+fbo&A&Sd}I*M?sB6yh)75_i*(c?p7u4OJE0n<}MgN38T4S#mP0)aujTpS2H1gFGZv>DVp&mFnRDEU|S zme1hjNm5h&UiYu^CzDcpl7gBWgB6p9dvbOqbBB6m@tJe;!4Bru#mlXyYo?}#`^uFo zl1?g=Nh+>Z&UOUyqpK?CZhWCMe`Qq;Bxyrdg(ONaX_bom=ubfNQZlsho7DU<5&oAX zgu=Muc|U#@ijR>zF*l&y>H2&uc&psC4CH~TGn z1GIRF3L0`fZ*)@@5Za?>x(2@gB9+9zR&XiYsE5A~>;mgAHiP4ZYFb7rAwjoRjsqu_ zn=aNUNxasHS^-{XQ%2h`2jOy<@`}x zYYC;brV$Zv2-ZfIQSuwOZC$OT_6)(>ivXn{+i}~}i0~OHO9lT$fKO?}U77()(>zT) z(ien63xl|M_uv*4;_FKs10Yy6B4PLdOZ2TX?LzxV7qoU^*Z(S^JYp;z_$4HHNS-$j z#-ws3y0P3;wc-l#_TIbU%^E7#p64{Y=B6M(vGxx^ColG*IREc20t#i z&{NG*Jm0#`n}Jwv_7+{t*n4`a@zT%g+vP^R%5~?Ir+I$gxCTLC=~3oCMqiLKtM~p8 z5>`cc^oruSrJ1i^YumY~Rqg-~fX*gE>;V|@5-dOh0Y+e$9lKd$^pk2M2*U@biqUef zm#+oxA6Xk|j?aZrR{*d zk@uh6>VMc_ab>WR(tSJiU)wx*f#S`$+dYalZib(G$MThSsqQOO4x9W$cDf^tpsr26 z(?0Nb>>?o(BkyvE$u>59hYmeb`ADe#zcz2dD%SX+hFvChV9)w}KyCQZ107?|I5)WW zI98GQ2XkG-fMMoT?E572EW?pt1o7KoxR*keXe=5RjGeJsB{PJ8Y6B z+_4P9n{Y=on(NACjvTuce&6Ecl;5Gk`}CW$##YaRphY>iS7-QPMUJo&Bl8j{HK7xq0xP|o)CNkCj z=Qv`Q34sB7bodYtdhJW^P6%3>oCSk`YzlT4`BuG`f+}|bgf-WC-!TXGOO=3iOLFVeHTwk(>9`!p7bMD1A{E!KuQXtG%9=|x zf(m`~bPTl<4M2!p-?-3!75#ktjvjLGTO_$e(U;<okmj@8pV7v6UKA<}_=ZoxbRKQ)6mtNKaVvP^L6|EOTPrXRt$R=_>ae{fYrf&BHjsn za8-Sh92$&h(#aWtlDfKTj@LYx94GC)9`!-}jiR(PoEd8zPj35F2f=ay&-O)7^rowDxG;RM^h@LLO>Y<}oN(rFGUhRV;9C2}+pnB- z2+0V(^ZDA;xt4^Nn-9#tnJS)pJEL;BcJ}tKW(%zkX>d|@VTbRy37F=VML4@yyTW7l2CTLbru$HqHGzf?k7NSnt-I6b)V(*IdU&aJra@QE(OIlVlnr`sl^M zI?SK;g|qJPO3j7k+W=oirAphJpi~_Fpj#>Xtvf`A1*$J8t^V8e!@dk_l(-= zc}}k&!3JEy2>@f2bpQq8Fv5_<#;-37Bew)dJFyVRp+|zsgnQ|L5ZF&n*A&@xGZTx7 zuIYg=>cU&74Mbd)FQEBpUuQe{7?3x#Q!~8z&Mh_iF%y)CPh^41e49a`pY1g3)CvaM zNP(xWW8kufTzq%rFdbvU_mX+@_%znhe;Q`*SXC+t|0WvKurWhDJQ+iO2~>;b9G!d< z1oGWxLz;6A`Xz<>S$&AOB66)J7HhP1{Vc3i06@sDz1DUMrKtoI1H=?%(bl=(U_v_4 zQ4GqiYXzRGfnc^>nTF($LUp8_x+BQ*hyTdgYq?x(7*`w;-mUvJLC0J5aL@Jdw(~xR z*L+mDfEbsjMW6xD5+V^QA9P7dtPD3FEo&yZ75nzP92zLWY(;X1M~89;iHV%*yG>ZcRqdgQEzkb&DQbG&{r)m`ex>r zg}t&|9lZS*!P8hL7@I=1``;J`-p`j{|9(bCl+pS2*jQ+wIfQzj3V-)DcpJcpDVVTL z@D9@Z^IMRLE(!`@m+!Wfi}@qVbnGv2;Rh{cmuY9_;FCvsqfWEAA7Z0x%_HiJ%X@;f zPMqjO?wd4QZzIS|&~gK%AxKy2qx;wTXXfn-u4`Df6Ze z?f&88TGG+FUp8CnPn55s!mwCbh__$_8^EGkt#RSz+g)}|hQ#)>FKvmC7yH!EM^#OK zNzXjvusx@E1qR*_r4^Qw`8PTwD;+?ZMlobkS?QS0G2Qer@e?or0JLwma3uYS@)Pn{Kv@>xG8Ya(qi@gWy68u~)l;?}S7o3(! zG$RBfe!zQ7(^LwNvkE6Ky7mx&Y?6tek|L2ydzTvkC zRBw{Pn~TBC#i!nB>v)Tim&K>jHc?UnXj&JdfQ2w6z@%Wx{Q%fRTGVADE{G73Y}7+L znL&1BhA5d!h}?dZ%)gF#+JzY)cy2Gqp^8~Z4IzXs0Uux^PIV#pEaVvpSca{(RjL*t z2Vlz&Tzkx9UJsU|0{;aA@-&Dq4HnIUg|pzH0(hhV9xcTU3Y>8dh&TeGg)2YBLQM&< zY4zA+2V~qwBe7c59p;3+n4Y`|qxH%&Pkk4@}wAR**xavCz$IdXFr^jeyMD zglVwTHRup$0BT(db!Hu^;9~j!bRP>7&&M1N%O4bDCbDG~#V1}5!!$*(Ffq(U1dA-o za~2{}CEMyvbd;@-s6bTvd!E#Bmvzla&2^pCe%A@0gIomga2D)PAJmVX>B@o!CBt0_ zep{cT&Tzc%3;8iRDN65t*&zO*1;f zLSGgmv*}W+8bTma;j#{eunwIEJQE|3fg+GI*WvIOrcs1phLCTIWNve1o=DO{g%A@O zctPjT;36yu2H!7$nsX87B?vAXQBIXPMYXDFQo9skMtPK?P!b&ZmF3DyQQs!OKKa9r zuwX;`V6N<u9StxTykQAH+ zb)bV_R22e^8{OddqzUW>AkPpGxG3}qW2Drq&ZVQD(PRc_Xhc@z{;!M$DtmtyhM7<< z6k!I~m%sO8iVj^C8q-uZVS@zRAPt>EL*{UisS;=i0cngx-t0p(aS^JkHWzYEx@DUo zgelh|)$p%ay~coI7c{7)#^HUp1PShn}M%ICR97DY$GuL|@RP-vw^sI?M?j0>Wf|?1k!JOqb;1kf+Z8 zsAmG10WPKyAf~ino(fenobviP$Se-$veb7(Z7Cz7OK^KZR!Sc@j#lhKK~;Wf z@|w;fN0V0ot*OjfYIUvGTckwbdXL+ z>#1oaHv)n@50Mgd_2@7%4Prq>F41M)MYkvMF~v^Fd|E{lfXd<`%P8$B6-c8zr0Ktr zJ6XU7HPq+mDjcodxX7Z|f*mF(9)EY2jWv`t;WF&#TXVHs%wq*~Tq=nRT$p`Q_8&hn0e4Zo5?sl9d>?fGn&{8%6)C;` z*abfzHKkPBW1Or2a9RwYyfrbm~glj`IpiV z6ALzl!vD({8sL<_%|WJ%FiV@bPn`0ps!JQDp8W%6&n%gR-@5;qXI)hQ6gLZ9g|JH! zOrscCBte$bP(>`%*^X>~E?x{Jcl~vU;+mE_M=>IIie-^jA z5>Kzvac>1UDI9i*4?joa$qXr}wwY34a}WgHob_2UzH7G19VQGQSJCeFa%$ zfY5M9lo%iq=~GEpvg{=_hz)qn#uR@OiE=}g5M=rYGJY0mH&L)QqfX~^d|=D z#J`Oi)(=XS5xaI@i|5wp|CTNj5KH#d>DsB9A#kc>YU*|`m0>bRo4zsCM&BKK(rE(N|^;-5|FO^^H-{!A>xohMzXbD6)2Ty7Sa74p@PJSmfds z#EVytTonRH7^S_0g}kaCC4~^P*Fu2x!cq&!`x8_`vZNEnBZ_;N=Hxh`c-%~SJ=S!TXZ{`%g z*bHr}Bd-Voqo4oze6vMOxqqCF?G^Mx&ejcd&WKVm?^OJd3H!!0l|L1@T|P1@Pk=_& zzgUPt7iN_&m16$RdA$2rdo~+ANV^@oU#9Q{{%rG0%!@-PkKW%-)L25s}Er6Ri0}#4))E62S@?x%8TXVL6v3;>4^ugtQ*!{(QfAoqN!Bw#@ygxWsZ6be3)!(BF+aAQ z>b^Zj4^y#Y9PC}`{)B*+=1PXeBMvlm(2E1TZ%kW*HxZuKQDG2Vkqim{PGlbSw*8;- z+`+2-O3Ak$(f++*VtLc(>08bJ23zRwK6T0PyU5F3GARcUe|#W`;tOkTuS(8d@i0o& z*qn%_jWkfvV-ifOp!4j%$Se`&FA1)m3w4#Ce1xEEcD2`@?9iPUq z!k^H0a`)_vPZS!J-`c=o!@9SWf{$}=#}s#| zoE}J?TwkqM+Js7&5NufFI&Vy>zweatz5pP5H6R3~X+GC0ADsR62RTFxsVX%nWQOL-7z0gZF57)yf@iuH zI`^85b@t8fop&p^C+{L1%j2W95VHdttc;>3$6xyFqr+4Z%~y!OTWYHnf;=udPVo|z z<28Ttm4c>e4TJ9cGqvRXb9Ft*!D%bs7G#$-sN5~pWKqL!a$gY-+FkMRuWNGIBK%RG z)hT*)Yyf(xju5?8944(fl@MyjX;-3nH&{BD%jmoILEcU9$~x>6_hA>#-}O1=n6Jn{ z!caLVVb9||$!g@k6JjFJ1FV4$B;Z~v7#U#F=_xhbB z$4Vndp?+z@_ur%){=O>kPEO{uQL!DW0UapFsid-}W!=Xl?D3M5Ut$-~yt z7AjR8HI##Yj&Yf^P`gUaS~@uqVyPo1yUb=SH@BFy2HyjxW~CQ_7|0RGbE&r-c0lu! zS>gJ2>DLs>B!ceS3RH7ZfiofIw?tu`OSf%*VR*)~PMz+KE(Rn|7 zsp_3IbkM-RJ`Ff-%uD&0GYIiKoIXrhsdCf}Nki)8g9y?nZuX^H5BEvCv5zB5l`d^& z?E3{ukGe;F(LzmY(_N_?5KUMv7h~2kG%V(s_PB$$Dkj`LMhh^pf3&}y!jgi~M|2>> z8!V5>pV>K!^7KQRB7}>5v20%U?C1cLwI?Y8+338^c(@i|_7BkW*G(Vt2s>-_9%`y< z_`{c)9~0&~rc;*s8Z+W1ls8P?Da{*ss80%^MDj3FQ_Lgg)4)b@51Joq(LtJ$AoJ6e zew)1i)j8OAbcc9rBWTD(kf zuBHwHufZDGg_d>~b2Ri4G<^*@pd4#1_fMt!^@U<}|B>U#U8YvRL!<{k2U`eLfMiVXFYTJRz#j!Z=f{ z_T>od)u_eUw&W24Z_#f2hX`TCUBUb@$skWp!lW%Vlob(NH!MC|YeSlPCfjM^R&sDb zQI)sG8VohIq9e|Q5Nw^_>(p`8b}dtFg^wJ4+qTtEhTK2|70H9OJ$igIIpOHdOW$AO zXrLH9NgyQI6W-PV?Yfx+SEK@La3wT$QA_6}N^8(y~NFv%8o`;YBoo>7V2T)iD4;;U_KKC&Z)zd3%){=#2q z1jA}|K2svTe?ZC#P;I!=!W==uQ;|nawst!G08f?z8O;}{;MF4fKN=j+S0}pk!n)(< z)rkez*nw@i>orSuB|L1#7>JTnY1V#tI}N78JHW9F3x!8S@%w5mDW&-rrW^7s^2SGxk2+v%QZ;tqelT3}zI| z)NkL@$co=;teYBg7_NRVY21m;N$cF}%A~SDGLrT01 zM>e!;zl{SS1N4+?FD=CAK1lR*?eyfJYsXq zMbPamwA35GnpLIGZ>xisubVv!zDgUQD-f)t1*`1%Qci-*YW?mK#-*oRtD;;!y?W@~ zb-qN@s2^H*l2aHKR=9K7SlIv(RadzC=I{aUtgsDLn`z^nWjMu+i8oynN9$2^0Qy!* zI?JaElPehXiq(Yl-(Y<+=|C+r7}N}z4ij4z+HM_8AZK{GLVZN=_^zy^b@;J1eov|D zTeVE@Y@;kqSXL7>Jo|c1-DGwdGbh!uARiv?&yV5qW020pq^T%rE+d;CF5t(kGd)e{ zkUA(dtjEB@-i$epT)2o%9Vd%;{tKcLrESJFkkKQ?Ph1)gur4isDJ`ne2*%>>8_Dqm4`UAo0){v1tiVe9&#K%W1y z^Wi3koS$YK%OR7Rg5o*3*UrOl4)+Qg_{Ru1OJhfJcQrK@SFV@!ZnnC!snDjOFxtfM zBcvCSORaa4tii%=(yhT8-3MW-0ULUkz~IOty6;1$xxdp zFq)OtmxzuPSDfC;IwgR!ec^Ab-Lms%^YSOB$CzPPnD;XH;X2Nxzb5B0q@%xw*K&UN zZLGq84o!qE*5z9Y+>IH#A52y~N^wwV^BNxI9sWf&AcG~OH1j!!Jlkn&XNEZ#clt`B zWkdBkb|k#%z6xpPzzD8x&gzqzp=1Aqfn~d9nfLNbSY&X&p>R)Am%x6p_CS^JKwE|* z%jfr+#XxWcW9x<{2dqK`WLar4#+gTvSr@htXOiv8GgMuYXZZ#&Zz&C!oQb^1ni-ny zb%hrM@(Tm`IM`eaa!z1^AOyh^gbO9U?hnJ|k z`YEov`z;#T|N95cY5X}md-9I-p zBF^DrZcUsZdA?bP^ORsgV))!w_=&|#ur8YLv)fTL0vePOfeu}KN{~^{kbd(j!Za(L zj63ywq8jrQn-wjDMhc^tXBW#quA+EIQ_%v=)pxl{8V`fp4P=d~%1`{50YSDWOtM|V3%U6RL= zLgI#>QnM1H9vmw>*k*0GtYR3E6rd7TcEcATMdX%AP}N;` zg%Uwsl<-<}MO3{Y5o%{o%Zy&nQZBhaAjY1dBNZzN`=a3CwNx5tvu0+r5QA_tBu`2&R|(MI*{gi4EOv`ZsZ}mz+_W zpZ_?IT;(UVP#&KV9r@iOxURE047Q-DT6RI3mJe6t7>AnT!p5x<{cR(^-?x6^Lishl z&IvePwi2Fyuw)(MU~FcTWvPtC3fbehgxN?Q}2g`Ps|<*PSH2qARrn(R6E}Z zdQtF*T)3h(6c8k~W(9vlUCKszYrmo%%z>#@^q-+#Y8&YsFuCO}zR;C*zB%KV;3;qS zi%H*wf#xIMo84~9M1&n%bgo+Dhx5by18H~sJ^b#C?e_ADbC2La?T9gEw*kKV8q(+4$Bwl9umHJumNwV;+|sn=;%R*4z&N$*l2+c7eV3ZE~&< z1gXEe;#TY5y3sV!ie$zJu9Z4opOQX!H@fpk%4y?}&CE#|eAsM>0h7K#q>y29*MXd~BgC=k zOjAt`wRo(?S>yJpao0mk(EM?qq4gs!=Bv3t%vOPC zkX;NG7efW^I@R<<=AxK!`DAu+^iI`)fSmT=F)#VUBJF5rgZqxUaks9HJ&hZ9maHMz zXOZQbFl^eo;Fi>%b=fnTqV=e9+b8bTLXziE#QVgj?v=jRyb0GceU64#%lij6KTN-N zL;Ignw}D6Y%7>@t#-9gmd&#!`ajc6$Ii zdfzo`&bjIx5vf%j5vax3P*gN>Es*C zjz_@)WyL#Z1-Ac3(Rs%uwLX6Q9QIV)xN#-U$d#HEI4MVH?onu(nj=j!D!UyJ3{6c- z&6VM(%&gF~&=%lG%}OiF$_j1SywtU>^)2s@-~aG>aX8QOoO7P@`MlrFF;g+uLjs5f z0f!y;8Dv`TO@4pV@(Cwh>XGDmYzA|2(J(lIzDFzURL|4!TSluR9?m|v6W^q5Jb3-< z^7zGhgI%$Svc4CQTc3&r#7)ev-7EYHuW9dQ+j!`N4l@qx<+*)cJlU5K-g5qOR!|jc zYug!(3rPgrXV}08^XmNj#JxZCg{Y*huCO#MFQaiY5Bk95C9zWDfxq9E_gZcGW z3GLDm#>*wEy`yc|tZTIvl$!CqKAra#sUz}k?R2kqt90w=Z`)Vue2jn&-~WROa92K& z<}SnYkIqZ-C|BbX@izw!bH41+k=Q3%=f)iSFW;X$sgGK9CUBxz1!~4^5GEW?JpM5$ z>LTYrd%AAD!F15;i&ZlVe%J8Lr=!iiJpaP&UE@LS5Z<~~+_J-AHgtM(Q>vANKDd9w z25-gbN%$M3=&!9f*sgtaWbmS5+pY1}uWDOYSJb4gTTAB=tG}IFX=7IX>La6>ishgD zG)LM`lk1eRs^4Kwxo0F*OIQR};R_ zmRgxHm-gIl3_VR);q6sRq)|Ru1cy+y&5f{a{9d#}BvLTe{*CzvsN7jxA+V3+gR;OmwF)F<0a z0_BY&rwoTdv)99uMW$zFS4PmefstEIO9GoW<6@%64BJ_78v<(8zwNMf=Nqi8(!Arh zA)iJUsfw4AbX8&pBqovI7Be-NJ%0d=#a|$iO-Gp5K?njHAQ>g;4UEclD^)E{%liSt ziCQ7QR+BT|bkSv&3JGOL&`JYhIvjicW3}#sQr3{~2`;5AEdRkVsYb=Kp=FKL{n+-D z8dA9R5H|c!__=LQ9Ja-7w9>S%Z{Ma5y6qPYn*3H4FE3IsgeW`z#~LoWTh5x->PFx&P~( zt;sO*x~GxW8n?HZ+;lp(a`!iC$6`grpZ6#JZgO&{C4B476xZ4lV0sdZI@$biiTflX1k%H<-aB1j9%HL-x*PZ7Bs8nxCY5X@O6bDh0o` z&|yRh1Qr9lgA3+H`z79*6*&suw-;+a)@N>I{}WN=CJDhkeX#9D;{+CKskW2eP+Yx% zR8p2^{%hm;Hnp0IH-WbgjrBV-|K)@~|Gnx+K+0EB1~Ox*30H&sZvEn~iF;d))C^=N z7^5X?_=&dw>={)aYk*sk-IRnIeDODeOv!?|K(K9Jz8$|g`^IrK8tAJ)z8VN{=2tz<=Lf(sRMZVdL_3ga?ji& z!K(Toi%~!OG)7pU!;t0O;~`N^A0qzAUD>-_uDzV3y=wl?Yxf|Jiaif|Ya_ScKiM*5 zxh`w6=+KSbNL!XnY()Wc^&o(*zHd^G4#8%m>JQv+dEA&P&u$vj&r|^QhGM8Q>iN@` zTOdRjDJA{yE4Q`CdChhJ&!Lc^-L!xpKRllul9Qx$LWI03QzA%<$6-I#@g0Y{4>`EV zV!p`k@B8vE%_{A}!AHN|=(uJ`(N>j|EsOIoEqS_nXuFFccT}fGyq9!h6z6HR%5c}Q z9Z~4r9pZH#B4XT!=C&LS3YDi4Ig^ow5;oFU0ink;QsBS|7y?)~B?u{WL?3`@E1ZrS zL!h1e;QQbs%O33vw8{`Zn~d_5=q*lOi2xo7}AjO?iGGUpmh#6F-A~D-W_HgL~-=CK&(+F z)P<%yd=FE1(J&%wi1r{GZ=rTdQB7x%WRORpA|s`!)jIne3JmL8^&Py3d*;||XsDSS`lZBw?&pysdMCb24U6|mXKdM(iq=2pf_APK= z>M`+&f_S$QZKnVdbqgYyyI<-?Qx&@(p|S~+fEqoUEyc3M1v^&bSyE)Y61lIg;KK+m z=Lgb<1^#)A+`k92eAsn$ELUggAEO;do&T7TEnLH-)szN70zogfE|>e|s7s6#rN2wV zp1CFu;+@2x^F+v4$hQg2Q=c-oU()ND(L4LtXv8V9_e#-IH z0g(dux$llX&W|`=7!|cU@e1HeE%ym1SF42Wq!2eg76b#F2L&b}$r z($uisFc;PR$eV9uTP2J<8b&&AVk1S>D!Vg8}^TEBrKSwcjW|{Bc>tOj)3Cci>O{=0l(t zfPFcWA9SmHqaN`u3Orf2{64k(<>iWTy#iI9k)lMNP!u#uv6sa7PZYJQIT#SNwXm)r zQHgD);duZoIaN4M!ohy%db)64&S?cBj7uJy=Jq&bu{knCg2odSxDRH-HftEwZ~W?V zyEjqkgs#n{)=VMy0}Tn2$fYG@s0bO6gIH?EGDKK6DcMa_X_=B;-%#@ttn?t>WVY zImC8xwA)ZLcFb0gZjx}HU0&;~UXY{VkvNmWwclOOyH6^a4{ADtHNo-Po*-4yM>Yw? z79ZL=xkc@JvfE^MQjrlc=SM=aljyJBh0qIoGTvQQ%Ic1};aQr9#$&a#$ zv8H=zyQ5Ou1DJxbvBIDn+>^=t=}9%k6XG+7I6ImDj75A$CuS&sBHIW;wZXw0cQj_XHad8Q z_IO6wL5-5L2}YH9bHfR1JrRmqc^|O_?E=vx*Zhp5#Y7C1^rrZ^wYw1HfC4?TJ5k>#z_7OW1f3KCjfK$V`{bKy0Pq z&ny4h`Bp{VLGC|}j~0cNcCMPKS|v|3;^vY)iYRNf66bp;YDtO2J^Du9T$(g;F(Bh) zH@ZS=YsO<--z5GzKroN%xlYIDN9{48H(pL%hHHI?^{2yEDta6n8|{?IFTOY4{CSQS zk(Dt}Hdb;VeylyXp>U?SFc6pjPE!47!**4Xe(faT9ZT&wz3-V+E&SuDC%;aOUxtm@ z6l_ivL~;%wSzu^o!AWt!4M6>q812SJ_CHz`!A8U>kiC@`bI~Y6B@%Ea!rZhU4MxW} z8JBr_#8o-Fb)z=Y$muG_b|RIkKf#zv3@ul+AC+Pf3I`J+AuBNO>_RorBmj7=!@6{vZ z2VGP=4fjc@)-OhGR<3)HYm|@@^=~Zx`6C(9 zk#vjxH*!CmR4t1*8|xt$WEs8^pH2e)QK0&-;@tEbI zWJD7Se_oW`#zHKI((sRI#8_II3KHc{bFb)QJ(EfI%k+x6QHRXNN;9n2|0BjlISxOB z+2phs@2wAfm2R{Td4kc}nlW%qfxA#SPPNBVz3_IQdYtVLcG4V2`@yHXuRiNL>>`4B zUQ191+Bv6FUb@}TQUJGR0gXvjkg;n!u6znx{`hiJkffUNvy0hy;PnD5hk||hEPqB? zz6Ce3Uj*t)!Szy{i~$#DdoBJJ=ug2kkx(Dp_Jk_!d?D>a2=}&XM6eWj1Gt_08xc%X zF*G%yz(8J{_Mc+Aw^c_ye8xySDDZYa%}ht^kf*4|(e#82H;2-g3(BXr_R{56L3n6p zd#CNeHkSOl5`TVD?vZe5RotaZKjYmLpyUe7RmuyHf;;W7wMvxcGBgo?KI^f~P)Oj= z3;*gD#aR>N>glc~#@JT+!nC?#S`Sj)aiVM4!!tRL#(!Xp6^{Y{?88dpLl&-zg|lVB z&nOAUDK}nAw)&?Y;sxV(=phd|=!QY-3QlSF`Xbm0d>7>)`a1HG2>*bt%{`3ZKl0h# zy{oHdMPaw3GJfh`V<9xisG*iZ*e=WApiE2l#RRp6@X|`&V1Q7=}S@} zO3ujq0>h`G4|Q&9|2A4#I3GT#vwK%$UDp^%4zpK`k;l!>pGZICbe~7UUCQdYDOI~c zyLCuZx}pOatVH&*2F%ysDU@UK&kfmrd0KwRU6X{n5Sj+SRlcwTbmF~9v?(2ZkM-zC z<%7(~p{ty2JP*+8e2UMf|AFj_Aq*>VjOWL_J1NEtyYs+cl223yKJ?0mBc9pp)v=}q`++y@1S+f2q~!e@gPS*cmiFI@5;~kj55eMiSH|7*0d9`@!tNwwxv>j> z9OI1?64cZ&UOA#xw2@cD*N3@tB7wz*6nl2%*9C18EX<%Ofi7Y^MImTXVZNhbU*4`e zDS2;4iRxry$9N8D=JVmzckb?96Lmh$qhNYGqER<}lXHL(rXyG(_12;_x*u3h@(Zj! zPHhk}8N7D1j*aVOa*2E_>f)vo?gyR#^OYBN=2aSnp788DYAHWOWtdx)XujQC&ElFI zAd*f^apTqRTVN_F>k2=S=I4>BeX)s?t53eU_RAyY#kGTQ^bH+uY){(RZ8-(rTL^~o zkGxj9t`2iN>V3)XZNsB|EOpX zgZAu5#JYFp6jd;OQE4P*HaEExhT}j*2EV9p^czpNj8qZWIU`1?wM!!P)Q%PuyzlM~ z2!`s&6cxE1Z4d0uzTx$$J`7dAt(5%R2`JJYcTXA0Gud{=$m44NGRkQ~(^&)@eYX^V zA=fSNj)klP%WGq#LV5Ayqiu4OerSEdPvg!Pk^OsA!p+*8?FMnMn0bwrXG-8h2Cq3Q4C89WiATbW z>SZu1BZi+&e7lCGUPSY?kM;H_1qJ27Iy(|LoLMpli&G1!K6Lv>OM;^WNQBf0GC||ob$@!%Hv8z_SQ}=Ip$Ds`&fL@L z5$CSw#|s}pj45y7%T>ep9PTxbdHUbix7^v^_W0jQ!I5b0m)oi^)b{#dUDN^a`H2`r zFYku!X~kt7ezU;5T_=;HDoVyGAv_cL&$8MkI_~AckW&LLe6z;J`tUJXw8c!47FVf zRosb!yv8#std8zaa{apYM+?({d@0vo7k_!v+4^pDDJt^HdAQ}~Ew|7$YUJfW_Yw{H#? zshzLeH)*bSPK+#uRhPntwK4G&gh8DK_MouXd6~^$U&(WPURUf)acvzn4SDx7YdW9aY*q6rr$pL*<&q@J@+wDz31x;^h`AJ5yap z;h{V(u7cCN?`dAijUDFQFCYPz(5}W-4zKiznhfEuXnEEN^CRy=(#q!bq&Wh+k!Z)Z z4iPGx6iC@v#ka5ob%Ze~9**h9`o6aqJdygL%5qBktK|r?(07R00@E2fWl$xho9?PD z1v90XQ#n{Idlp0(O7vxoLqOSO-M+=+fE|RGXze7!9_>7Wp4f3)Y<4q)q&%1Se8l66 z3h(C2c2N7>{>wmBpoT>F%X0{_96(*Y&(k~L*I_L$IO`j82|y?z-RKVFa=0wGX`(-u zVE4bK8$TT)H&)fC_x^LjXjGA7&t$wl+TlC){K49v6FkE%NU(bH*1CGFAx9~5GE$tR z+l!ghc}q7f&TgTgfBNMN()r|3I>uEY#qCp|Js$$30Enw$KILS-7QV3rJEZY?@>=R^ zk897DicqC{>gZ{7BBEbx0rm;$uB$Ve*vy6N6F9p^0mr{WPm$tZDYV#!w5a zoz+B|qyJ@j;4|6h_o2coV=2j(k*5TXz8V6%@D_TU1jh4Z9DZ3`mFXq~iShIzk@D0X z*U^7O=jO`XX^GfusvubRE4VtS&FCJj%IpS0h?WBPbNC=inNt_mp&)o{9zogCimFcz z67{5X&9EEO*`j&ap9K0t)3ij8I;U|N3Udp$S>Jtk(_`Z0-BFw0Pd6JY)nzNB6(*`8 zmb0W>Y4uvgG z&`1lrW;()ooI-UJg31HxaQ%mn+WsJc2Afu7F*Fz4UI*%qM3q|>N;Nd>1P#B$x0<&B zYH!INUuzi0ouwkINEB#VyzWr?O#Ac`G9T^e17QKmf$)uM2p=fe_5b~yvh8`XyV=#A zK~z4gY-F;p*}@*n!Kp(BpNHX|UR8sE@EHyJuaF%^kWNM9 z`tU62Uh`~Ts7FQK=9(LGt3Gflj61z#`bNfNb91NGec!GXK3F|<&S`66(fw7VgCAg_ z(Z=`QBkc^#66n=44|v;O@waO%7jGdKqhylDC0tQ7S46pCPzRDLi_)cz_6tD$Qqjs1 zzC{|$>@3P7P4mlUkiaQg>xtA6LMCke)JhP)P;`qfQ29_(+4T%DWWe;u6eA<*ig${N zSCjj&qMIS5Yud~JH2~)AF32|%=LwI&4Mfd9k8VDCtcF`327w@{45ftJB!d5POH|=& z*it(R0MtIx%sD*jB#b7$zLI^~K@6%Xs{Tm_Hu6Pw9Db@h|E0)&4&YtgP=3XuVj!mG zh6bE+OouZlE7)bNqwq4dcX-g*b6a0-V|e|q&LapACCCx*mylaYsHqu|MD?oKkh%pZ zs+3C9mr1yl+Bhi2@Z$_ z7DC9tT`)iw{J4bOAc6FAiZeN=^-EBLJ9-0z!_i&QM^!QO^&L5QeG!jALstFjjQQqy zq3vYrF2XH3(z5RAS{trzG%t`@AK2hW5w1D?vh^b3}m8N{^HqVthaeviXr^CE> z7Df?^a zvkUsr74ohSs%e5VNj4ElhcY^}3fu`14X(7@VtbaGibRVfbDbpormB;%oxR3F009Ag zEXY3RM4#ZoidE-!RJ-`>`*T}3w+J5s43$vTWKbKa zv;_j6)FZO$idRR2Y7l6dhRRDTwp;-Aw4l64mjQid@$rSxZ#^d{D*6}%e|FaIPb_@1 z2$jky-tLZcpS-Bjh1<+j!C0WX2!_L35u%GW<{&&2pxWfIwzGFH(;8qZXuq;3t`3zz zZb_h@3LkLXAuPf%@41YDew0H)ocdUEv|qUFywlacc2Yz za>D{>!GbA7_OB}gU$ND&f{N=P41b;Rx8k-T;9&W`YY%og6`FhhqIze^c(zpI;`@AS z1wv4XO#LZHzwVv7w>FdmGX%JLVy;>x*hXTuP)8}>;X9~dBB8Ul^swb9Q0{{El0e!E zMd=Ius>Rr4PU4HvvPGbiD+S%)Bwj}u54lmQC5hIMb z`9_dqd+R}%xf`eGfNkh6W{_Q_)4>{^iZ~u3e#GgW;`U4_aE`S7Bz01$o-))l0o37u zIzX^G9kw~G*nu3!b=%9-w`$8CZ+~z&e0HhuyE%DyTI5m0KLp0V6Axq2y#cbOQiS6GUT*X&jgj z>1?JVDRUI5P6pI`Ua&d5RGRcZ3sJ;3c=LJ97f6)aKQBbzUwnSPrE0@TJF{uj3=ICJ z*b+b1Ki;y#raozIt8|0;w!R!t2v~y-~&8*AuaCC+jLW1 zz_ou4>#e(NRtKc0m1R9UCb^v`9n}1N%+po|m@E~&O*g46(V}&?sdC1)LM~eJ+$LKP zB^1z>qz00re|wRELJ;0@x5*W9x%QXeb`}9xWvwYJ;_XNMlC$v*Of zc`N^uwN@WbwWSzxf#-^uLt{&<6}+eO0@QMPY#Nd}8hLPWN^c>kP5I39kGJ9-^}Ah5REnDC_1Xz)#<$;i=A!92dr%O>qc?>0vF4M%rFsf zp&+Ufsn!O4KJjXK)Avtt;U7^L5J}_tEFDT1D4czx7q-(XT{gB(-Egg z>GJv-V+_n};kF*FbyeQ`(Zu(+S5?|Nhr(A)xB`-T|Eb2)yjU|m62Ns(q%*!ELZVlg z=cH6YJ9hmHH9eJb`tF-M2&Nhb{NXsGy#NttcW+7{TqR^C`8%Z`<|?!-34NioE`xd6 z&YMc5-2S!sGLD11`-O%R0k`k^CG_V0+E2k?R6Vew$Vv0hfAJ{y9vM^`1`BUXn*85J zFB1!n>0}S7$;^xj2Dr-6ngorB+RMCGKdjwoBpYW^*8A7@(`(=tvRgYkN^#f=mM{xo z=Y=lCTnefaM0CpiBiz$;bN0NV& zH@al>j%bEo+P-?GZGdGpLK*I8H9h|kwNrvmt>n|l#nDS2acn<~Z9QsfjXD6<=oBZ6 z7Vo5e-2xqAkA7zZ#gQCX=y{;#KrUXdI8teqvY!{pidl`yghxa08L>4R{o(*@^SgvC zJ4qIkY17d!r=y96U1|lJVVsW@)LUM9QXR{WIz4Z`jgEbjc?EFkQz6S0UuY{&T*LBO zNW2zBWEC)q2CEzi!9zboO@<|^ru@*}4~sv%zikBBjx(wmc(bD!@4jfpLg32v*%FXgq|B!N9k6LTt|0Gn-Nqx{_|5-WM%Y&A#1&FSOSJ!P!HCDCNf&&({Vk zxMxm`JiC!)8(p5a>xyCFl|9G4DM{+{r<4DSFqap8k$?JDANlA8`pu5e4|>tpb$2*qQB*0exMQ!ni@dJQKRT7FF zeaYXnx5HQZVC5fAnDFf3)jw@5-8No?f6tDlWm5OqnG`!l7m?Zdan(0rOs;1gGEw<4 zL4wLqBG0I!Vo1e_WK@h;nMjH_bLbcTo!A!jzi8mUi~nYPS`Fx>RTo;9Xw*9|fI&|g=KqCO7w{pwG-ofn^I{dtu*_ZHwtpB``#So!t;shO_H#`K#hk%x<7aGeJqOwRqUIC3_+U6N~|`#ZbVp5Yy~ zeIw->Uo&v!Fv%xq{q^=^%r(QJ2RJ9+(W88y^OFp802hF1_%gpDG$@4t4{h8bf}zM> zIT4^%@~IjS5hRwD5oaV&`SP|m7~N9adFkrj2I4nH)Wmc|1J z_Pceb=Cl&iR^TxkX^-J~blRDryl`sEiGpWY?|k+!gw`+A-t3AFMP;a%Eul@&i zvbj6_wD0T*^~+$-jpzq1Kq8_8F`BZ`#(3!?`o?KX%}vf@mPIU`ePZu0Uu_MQgH-h( z-M7^@lrJ>RQA$bA)xJUqw=jyuk;DUU=A+z71=}jzeo+B*hSxm7^=baGJMnwxhAr90 zG*A<+O9u6KmR+vUci~-UgLDt4Jc@r6n3;?^w4yM zx4SoKxnaDU-(95Q7{cUQko+5;7(g4i)OK&K3z1(FJ8t|-k!#DxyA#@b8JM&GzBsGf z%^mh#j0hJP{*2)I>0CIi;YLkF2UwImL;1q>l&=}+@fto*Elu*HGH&reLYNski~CScz-s5IM;Si z?67>=%?2KB-zq9GN%Pk7<#>;UL>Bq2+UPM^rPe-r=7h`d%(Ob()6-s7B;EF91qFNd z3JlJ=C}kANHY8g6#zts*AmgNXB$$idP!gZD@Z0@p#OMHZc& zKkHwf;gv?4C9kD@qHDQGw*0W&jPSDw@VGk_bGN5$s(ef7n!5YpXGZdx_1fv-=-h?} z20g{AXU~{B9vJ-36D9>>TX0b#o;D8<>F{?&7oS3*2I_{LlYUXTK1{wQh4tMXe393 z+O;&hRYWE~JX?`p7<1o^cC3;8sdZzW0@5d{E(9EVR*&!)#_30CdCM3__;eDm6~)|X zE=iJB-m(ACQ9?V~KKW`3ClRTmbC_|VFIHOtZ2z(cwp_dB+<<&n?d9e-b}qX#-v0Qgdg1p5 zrcdXYu~wDew9h3k1X+8;TsbU=@i+W$;gd3`$SY>_Z3sxQ$e7n|nCF8C9WpRXpD=WP zBu6zULel&GMVN$Fd+2>;_+M-b^^V1*d9Qh`R{G;C*X2H?!UYo;(V!9a`QrG0SHA_V z4->%V`I$F7SwX^5mxeBfD3=YS9$Y(*&`C#`QeosY4GF1-r`tsmP;amh*1m~aZIncZ z=&2HOZBXa%3}9)V2;!@!!8Nm?&QEG;dz)_C9Xk!g8;aQy)k_&YniO!Np(ChO7xgNP zk1iyY$Cjn*@i{yi#v*BwwDaVaWd>NP^{jMe6c(rLETv)KD3|mjx^3S<^^I{o(O!vY zo%0=etrnv*&3}(R*EmA*wXM``2mWH!wm*z6?JQ4b90a{q?}Bq??XJp*8x`DqEukjq zlZ*9geNxXZt1BmcbG+>!27yiLTFG=hZV{f~IKYNm&}Ok)DxFvMv-tEOaO#xGk!Ext z^7g6+^@b;1!1*2N-P$4$nFB}+in#c$S()*S$|z4Ml}(^>u67I;=iZphU~dn)B!MAA zJ`R`IJld9A`T%BmJJV&Y*=nDI^3*Re`tBNWtKHZzt#;`8y%8AV-Vck)D%y$W@0o2$ zq}Z&Ct2Yc6YOBhtmzkDtHlO^Jsky%A=yc7t0lssZrKP&pWKVHQkvema^?l?A#X*>? z4SNb6(`u!6dK@47TWk-(coH!o^gm4%APc3@X+8JUHcK}6}WHW3ulV>y!3ET@?x`#^yI*-YO#Q4(%1)cN< zS0y9#dV4~K6P>>2-5-yvuRj~k2GK^;4H8FD1lMSoRt39A==49#i+`H)hSZYZXCA_( zyT8l?gO8@f?G>vZ>!vf92p`e-IbhN+{EmP0Q2+VH0UMbni=i&^6+^(r_yY&Q`tLsxj8qdyK8fXef)UuD3hXdl=gY#2I{U$xsxpJEgJ6trCJhW_P6;)w17z zu!}m3+l9~bgz7&lk1RE*^3fZQeg=qjx6Dt`aYO2c_7p>jO!8bEv5`IS=2nmc2&CVy z{VK&irSG^ip_|D?J#7pIhG&hKvw(nSsS_$@YYBP#zw%Z}i=4jjh)l+^I+LqyAAhKj z;jk!o1QSVa42puQvX`tz!Hfa9N?A?$Eu+xo7Ibf$4hwyiL$=z?Ry5~Km~vtHxUk@^ z4_|qGuy7T%IJ@{bzxG~9LNRnL8Mq?%PpT(gADR@df;wdN!amTZawi+2DEDYX4Kzup zZ-^yr!aH3hq!JZ_ORA|jxSq_=r*Z9hfR}JYYZ1tpGqs9LvPw$&HKPj-Kzl_2S3HJb z4dW8OA%8IgVG`k`J0u4{0zNL^5{;6Vz~*+5cYVoyhgp{>^o2_ttObIV)g}UT{bEqQnq* z(u%iUVS^-M)%qJ2w$;4kW^WrWn3=n`$r5NHmTj*v*P_a_DY8TCd)ABIcA3BrzAQ`8 zXw{+YpUKpt<^g5TsyDE^;>*L_toB86wJl}SB+^FK8~Y*a z$os8KjNhI=t(%5|_b?_26rrF4KwJ80k3M-F+Du!LJNn9Wy(~0l0LOh84WWExC5fyJ znu@#)?rvjy7O9M5wuK9(%f51RgxnOtGx{Xc?2_p(O7#Ko(bs1sLv-ROFrRrF?gb#J zGjIR^;wwsba~)sHx1TCo5ieWsRpj9!$3-WrtS03X4Ss(K%IE-mV#P2A*{B%17z&`- zPZ1AKulPNbUEOMkZ(-lop`_4xU$$a#N(cq37{ZWEzByr-Ny9kN}K-*6R;y z=>E{L+|P;~jZk~b0^CCB#&tz5g%Gl<=`>M>p9M%=Kx1@6M*KvBJpiXbk@-lF3Bbyv z!!&3(&9CMI=?dQnDQqgUhceQ57PE4fMLJQNL`s7-JQeMtr)GefckAvIq>cAwng366 zWDhRt58GE1S-v++6y{`gFi56-?Uc3{)ER;_(inv>HRg_5blv@;Pkh1zG*ft#*_$~L zDWrM6XuQK)IURFi3MOQ7H6~uh8GssTTz>2}Ys-5;G!*}p8RLRFGF0ttV8WQ22KY6A=krYkf2RRu<4TUyHED{VH9?~e|B&1PaugwQc-G{x1R@^60%<+8J}~Q1eD< zq5GYUp`QvVDFZsT1FKT@b)L!F0IGLM&%Nc1X;vKGOTV$IlJHP^{u`Y*BUSs9tu$u5 zZWaaSs~E~cG>KsyZE@mrn1hHks1IrmS-JcrOEAd#77g*9pHs{Fo7?9g+4w%DY_^Q` zRFY%^O3t~)JV4AiAU@GOeBtUMqYlbUgAp{lg-|Xz_tXi(2mu^Z`ZvX`_O8Akld*q1jc{)qHE;sFyT!)DErjPpXs9Cp}o1fp+!f` z(4ojQhh!!!@br(NFbJ@QUVM9`PtM1D?GIx5GMfJIrji1Y)qXM0N6X@mxiBgW+mOUl z7mXfcDkd>77Xa}Z@bDefphBowt_72Buv!YT(r1qXFFs6?5DY_6xQSPfg@UV3g=TfX zd@PY!F@2nwgU(F2^${PJ1;*o^42%n%=mooZHxz~9mbV~CzajMgBS-pcH37~4PJt)3 z;5f7C%=!Sl7{H4lOr3dPzigTC_?A_uV!$|xcoo!X0{7X1FLS$`-{Lfq8HFQfsJ`5= zi91Y$Okc|Nybl^qa&1^EjAv_{o8*Qa_JK2Z(D{tJr7SzK+&%}?ATx-T+}oL4@}*eJ z0p7(LBSiJ|E6G>q{==thW_kMOPU~m<>WnaqO0Rh;bXxURGa690gu9Am1V1|C%DLHW zBw|9S=`!dw&vSA2abfzn_zv34LKw-5wJrK?a(Kk6*JionoZb`9Y9PsTbYLjgeuk0yDXk*K&I!9yOn#iNN3dJZ0d{a{{(olH{uEnaw1_R3vwBDY99l zH>`bu(w3b}j5vW@Mef+4mETOh=e;ue6*h}`$x}+nFi5w^hw6V2Pa)sb`Bj8366#SP zTe`NHi)`Y$G!0rNbNWDWf?MO z4w}#Sj_;F_UUOHpLF*ZiYJi0O^HFDZ2Wiwi-{VaF`j#V8Hk!Xf|I>*n*pali4o$=7 z9TQ~x&l(31J~*BHlAHdi)`6i<)pPN*knWZ0ev&2Nd6WrR)P1R%k<8%QD@|XX$MQNK zmkyPDoa;>HS-8V>CW(O)^yK}ry%7!hkK`1bVOE(c2K$*(do)0gZd-Yb`Q zssmOb7|9$MA=bh!e4)NeB_gHA1RKP2o36YgJ-YA}I`OySM#NPyzG{!)2o>(S2ol0! zu63$oV_g2<=$qGjUC7%LHB0KY$+||l=F1i)2$-dKO|kP2XMks;;33!hn1sucuF(Jb zz84$<%vEx-38?0El3 ze8X;ErGg+Eh^{f|-E+wsW3!*|%X=ko_w0{lZ;VW@q-%7H5gUKvcE35foCF!YI``tt z%36OdH4P_q4dCQ)n5(^F->}7%v504r+B^gz{T7lJ~2bT)xhsSH|zP*_`o;uw5;EXxNgq&Y_ zDG`qO@zqDNvugeFV99^~`2^BSbXBVPLqSewEioj;n8uD3^@IAGJ39DAUcTmRD@~dz zKbl%ywNXFa-YofaJ*Hcx9f!&F)mWiqzSZ`8O%qgZXzE8EPhF^-t4iIMrV=Ww;7E&t zf~HR64KpXtbzhwy|0twXO3tCJE?5Qy4#NIvS#^uYvF{m+X3ByzW|W_Zlk5KJa(U26 z%Iftv%Q@d?#R}wIHsOgAk?Ye*^C)_haPA&?$7CXkJPTQBx)*k|X>X`ynH5_p=-wzY zM-^sW&3wNN%M43TstHZgzjy8XL|jcxq}yH?A8*2eU{yxs4eS3^xV+cIVv1L>_vnwt7-K)j&%Q<9Znb+c^>(6BHUB&Ee<@a9154zaKn>e`I*4FXfdM0l3##&W&1eY4uwVPo~ z3oy<>!Ee2J*cBBjN4Rl(Gz05t9s@(^3f`U zN{d>mRKV2L3=W>hcp8UD(678 zYPJtDpfywu{`vHeVk91Owq-2jC!c)r>GOoctYQs93(i`bi*F+ftkW2WRb6DQ9Js72ZJ3MJhTtf! z(C2bZ*R>i;y4KHW4gH?;fV))Vh{V@8URk^`9){6oEl0x0=74)$o3^o7#Q_rm_n*95 zr^3;zRd2?IQ`__-x%_LFr;GlNqVsTQ;{5;kT`qek1V~t3!bZR_1w>3(MnDZyM9{Di z5rd+lqFx3=RE&rgaWr8GN-fUfs6Cc~GdOWJAS#Ypv}$pD)8GC60&?74@;smC{eHb3 z1~&=z1`W8^jqV6Qg~5O9ky5>NI6;??uvHl?fV(aV=joYM=zy?Lao%mQ`;0xOxe6e$ z!MC6@Rwb~xQVhEZyG`Xzev1#J5;$Im2GHveSE#vC3j(L1&Fh>`e(KeT$#NM zbRLY{7sjauwiI`%f20##zu#M!7x?&3^E#_VONer}v*2jJMgX=Pa|OlW5nT5=pm{l5C7b&l8}SAL(x%*J}#_5qWq5wEDUR zzs5hC80zjZ!7H6+>E9=~E32Ty4%1~5Fp)ngVF4eIIZhF4k0>wOFEo&6CWUhjzLCBI zPT(X#&5Xyl-SAGjiio&AmaIC5HTJRV?YvG>`MK=_L9gd6EdIR5PD5w*7VhV^z+1eR z2+|h%xdz{Rmp5zAjcx0rD;)7`nnrm!aOw>`L!Wi|T=#VEgx%DHmNBbaT^a7OlInEr znBaVMXW%m*ipyGbrZ`pOkzk-E+o>q}oe}Pm_!<|!ir1~v5IxzT&n)zePF4W7aJEHF zBEMZcGYHjuLUsfHN@S62zBAiPHmI^GHUa@3%?ePo_AVHruk_0^0Xtn#VSg3?S-~%z zlSGk&(r|Z=_yaC6y*&P;<}bx06jVP-4$=EMd7RM3KIygc*$XcNJi^R-n$8x@dGq!-XFFU1fjt{<&trQr;|xlMNn!4>2TR6BcK z*fg^0Z^fA%KP~QI+*#Up@Y5?B^M@TDWTuCldSfy8OvsHhN$Fu-#Z>P%UV-B9 zJth-9UlM+~iXG{eWLQ-i1X(9eO77<9aSr$&&@?z4#TGLB1{5!7kiv&KRcZIW!h{Kl zd(w&O)pE!(zzrQ`COM6A?+P`~Y$M9oP7@`M6Zl4yd+r(hUh6q}(UR2!qGDoH&D^(8O#Ma^EAjN8U0Ej`BP+8Smk>4w>v{F!wOXW|myHR_-L)yF@E+jPfrql0MmmIWG{Vhd*r2Uv(4xTcoy?vqb~pG_`U{!0i_IO!1k!11!N7z;ry?H$0Z%gp2QHsdy{w8nD|RLEH|zy=^h2Z80ekOi?DQ38AH7(NYNulDcEnbmy});(ys{9Vb%}H24B;}brg>clkEV= z*Tg&A!{Fo06MQwuM1>-h-g`0~kaC(na^)-8)g6L5A$Nhol_w!R(zD)6n0G7wyR-oK zrI#btdw%Kzk1DvO{rqP9`$AKPQ*>5CeC(IiK3@_Dv!co665nEz&41p6w2sNVN~(ev z{VOASMPA^#%XS;&%MlYP#gp;`WnYA;DC)OKWxd59+aR2`++a}yQ)Pd1Oj(qhB`jhl z+ay8i5h|AY?@l5F0mM{2UaZGo{3KjLr!`6}>kT&bQsSg1&Q}cWN4ZHRhD#>)qY`#o zG`@}|N%{x}!8fCO!38Hv=uKxT^Xyylx(ROV>-^Q?%O zyrWG{Sq|@d!u*d#*87-0MmlQhmzoGSa~?%{26P5Qb1aT`?<8&QT5X?2#nniG!OGrl-iWP)B?0(*hd)Wa6q10mG zRXNCu{FRk*3~z(R#UUj*=u|@vITR_rtu78iiYM9tn<^kgPYP9#gAiN@!f40xPOwD* z3I!pM)-l@;WEJTH*0~XhQtuYnf-EJx0W8gjRW{#%9aqEglB2@Vy{oXHCtn198RGQb zwu%{j>mY1xpJc`;Tt#}%QkZLJU`^_oCOu;?g)*rZd;_2e6WC%}v|s*z(@!JgrW+;8E6M;MeSizL0Xc* z27fv(Hw7m);mV)m!cbzR9%@Gzp;!SFCAI;q6kvHMOgui%!U403o2YLj7A{iTHwx}E zgW&YPn^t9VQ%q(nBgk?Q+K|D#{NC_;qxP6>^OJ`Skm*{MJEtKKv;x4%zT^&w^#sXp z(37s~=_ZtKmnj)7bzO~eZo-5(n4mx)Pwb@vh`6NizkVe#4cBUiiE0x%cD#7k-q?da zV%^Gu#sJ^dKwmnW5Q8ol@mL`9+{ZuVvua!ZmM*)0$4FnZD82KpT|%r1Gp&QH9ZV|w zKq)V$eIBFdf0KIF7d^ENA+2jJ2||g!qwuP6>QQ*}-Um5^vw$bc8P`K$CbBH^hg!+X zIl?>+6EVODxrbV-V0}|ii%0F=ppo9AoJT!8U#V@JkXGun`%eS2SZdoP8gej?+V?-O zUCqGU7J}6)xz{tfAyy9#3$2Sr1v))7CfJ79W3JRn2MZSIz5rGL&dMb56mnCp;rPji z#d?BB%1K2Q&C6HcLMU+z_8Ww9A11~CM@;^>a1<;uvHL|ap{?Fg?4BKGW}MaTN$Ohs z)w@2SGzVaPSUU75Z`;<>JC|(Mk#-@Z9Vog(Lg_+$e*>s_2CKpz`@>~ckv)_kl~o=V zrodcFe=~F@#!-}5s33%(gsgpiN(1wyHtCw&GSxR+L zvA-Z}W+tJok$4=T`m3PR55oVhXOlCjG15ii zGr-y+;UF7aM|^P?Ur0}aL$^WuM+QNup6emKe!U573$C55^me2iu-MTf9ujXs_nfZr zssHPkY>2Q-fk%B}SJdZ?c?7oUNn5)~i;4zpiUvY&k>9scPf9GCOtdtERobXUpo(@0 zKGY3YHt8wfV4KDsRvS$Fre`9GnJL(&?HeJ;L=b)c?0X+t3gjg!V(m@jZ^(1HfqM|N zOkZOC%wXRyv^y-d2t}<9j+z(zY+yPFr+b`d`i`<4q!!9d=Ee0)bc(7*scrg&gWnfY z2Z)6a&SkD65Puk6A?z=$xvPYT;()7j6l@_?LnlMWV$3w+?1ls@jX8#Pd zpFlaHQ6dk;O}63JNjM^d{X4K<`oy6vlPEH9i7t2%OeCv##h`Zxpio8j`oSHuPWQ73liq{1a{b50$R1fC2%9{t^CZ{C2Bv-%$6@S;xYX@qwc zriK`-QhO}IKq{)Y$aAagM#$rE2ngn%b9leaYrxt zOTZij<0H(p%VZnDN75gHb&3?qg;gOYwu_2K_pCd<|3OF;eycv~{5kQL+d~%3&xS}j z2|QBUMf}O6k|obiJy?8N6Et_x^^!j>_Tb!TR0HiWVzDuE_LCnLsRpaCJyv?jv{=yk z3xE$nVsrPQZ(keUb}X9~Le$L~-@W7}RY4g?p1LivoI*POF6Dl^a9Dyu#h1RA753=* zXHdiV#+3lR!RdZL<4OPX^UQdP+qF9x=%p3E8%XO2`wsQFj55CH{e>V=mM?qe2fEp{ zU|4r2Ip;cZJh=SGFH*by9-B^p&#(Pj5iI7aTbJ z3Tx@9$f#naF?)8I!FprQ*a3hBOP|k%m*Tz0lTCN5>*m?m7-v0G(8|Nix1=ylxs;kEnaI}TLRY<7 zaf=wHAnyQneGY~2`$}R^n+}xoMRM?<>IWbK@puMbQUHv?;bdpKU`P9-%OQNj zC-Ea_hy_dP{Rs)gYJP{LTzuw@@Jx)(f9>{`E4v*f zI`OtyKV9Lt%&DE2FFJyOQ6u%&-@K^#>DWN^hT)oo@i^fUb!fL-6 zuY*sK(S%8-tAqzB>~1+6L2}$}w%hhWrD2$Dr)QN_pCI8mUXhZ5;E_sN`sGHqY)j*g94+sYi(x+y&Ku zic;$j$(*lBPLiip*@_fD}EbfEiaNe<}-0(pMq2aprbAJ%dV zZB#Xj1&Fy15 zyyDz+aw)|}EQQE7v{imL-4bK$HkaQf$f|a&n6A7WQQ%qsXM8!HsC?g9Y)A4 zlcCrCMho17CvK7qdUYFpA{I(V*h%cj4{!-{I$7@}oV-aUGdv#Ioh z%1S+A91srKXGL0TWtme}i|y(LEGw=H3BonyI>N7ub2xpLDI?-mx`zUzxjH|AHT0PJ z@Spv<)3>uBoDfB%&!Xjq?qRp=1h)(6Ewl^Mk~kC#$a6GqQl-DWU}ksu6g{YY5>eUT z=mMm8uD=*B4E`%KVBws_v+Lu>-gZAvwDr4pLKj)M)~U|ha~qDT{N>60TBY;aMXbQ- zQP$kFH?5}}?<$C)$9lL}Sv23ShrEnD;1M~D7qP+Q`9Tg&H@iq7%d-gkM`1=GVm{*_ zR}p>OgrpEtC-Wi#vB)^ft0@RB9e~6WkkhlXI68e4bdVh&O(?4`%x|J?f(IPViYZ>e z^hKKn)r{kcV^7QS3C}*%TXtj*%s(WaZHK7nPHd3w`I_J@!+Cr|X!-hZ3MI8V+(x3) zx|#6XvE-+fswPRN=3Qv6h+4Os#_H_hVkv75JX}TZ2dyV2iGkf8WTn`I!bTk7mnOI1 zc%qP?f54&qq{b$+C@U&BhV_l>bOaOSzIMEwTvM+(ZZZ))g9Nm?rjzGQx3&iE**;L) zWc}fPGa7K?4{<9Lcn^B7Ah018R)RH-C_was6>$BDYO~)bq2-&Fah^cqlzVyQt$exLpCQyu}UR|6ztr} zTNkyk1b}9j14Q|q2xgh7sa?90x^5DnRDUAO(Dm3TeZ1~@NntunwsS>+RZ38zEZ2}HWjO= zYrCVk3Z*to-z!MPPSvOUH6CVOub^@e=bmr^&+nHI;TD|jh!OhPs5sr`?lmh|r?wA6 z0|;d(bUw9eZk$0oJ@v0b=73=Ho3oJJpk(fbYmc{oJlkyc9EorS&hM=J)Wi%Lg_1M^ z>mUVAG+#x-hEVw}#|8Op87{{<5g#Xksct>?3pmwLZW>-|!$80Y=dS*Zh!R`fFYr`= zE}l(XN<>wlc?Dyo9WDmlUX@F!0l`Rp zdPz0j-874qF*uVlw>nY~qX%erQM#{E8`%5d&;K?$vczJQpl-KjKQ@kC3rBgAM}L=_ zbKJuC(`pZ%Oz|nWp1taS4Ji{b4}L|a@aOU2+q^2Xmet;J^Ln;_X<>_|DE|OS*4-=o zf(0nMGbw9M)-uk@tGtVPSdo>H9z+8~uaDLynC3G!Mg-g(0S2&_x;ZqBPx2CqDB%K zo~|?%#{KE`tO5pb9+RL2{`a}-QX}t#2lR4y@x*9%3HT!u@=N%IQBsi2T_NvbDRzR3 zt@X@3-2;wUyqJ^crwY@18@|@diC8IK*7c*SdSGndGIL$5vG(DkbC=fID-MuC6l&(g z7OneKCU*&dcbY;;34H%qK|-yENL3-bc!AhD+QSQfeBf!znp5A;?uq^on(=@wxz@hF zwmjqq?JMoRoH~au z+PPVY#AVm8R|%Yi9to#${%nf8I(z+`GnL+5V$Ga`LL5cuwe|KSJ?X!hb~{6>Y**`c z__|5(Y#ji;R8}wHjg51jtzCR&(WN6@tKDDQavwhY2?}B(iV~dY?7Fsb>$#kjC@(GW zV^#pXK12aMx^EF;Tv-ZJ#7bfZLOZ1(>*Ddf@sT<UW^RU|*H_B9a$s^jZMRs(v5FBs&k70;i|OLc5f@1lij*&) zV1*ONw{q)p^0>cfvv4IOTizgp(m_zhG9pyhI?H0m(jgW$slw26+wt997 zd+Q#f__Hf-{mgCO_QWKpd~5=px}0JmevM;FrjZyIe9HM~kk2y(+^B60)!QVBL(&Ax<4I13Mz&?D zAi^N7+%k?jn6+dk@4Qt^$fR_d6Yrq2>Yl$uW~!Jk$1m%moS&qfH}SVb(hkV+kxG2A zj=B|wW+>b)3Zb`m8!l!F{BZ`?78~ni0H=sAY{msj$Q8&w+n;QblAuJy1H+2OvK!9a zo_TE;zsUp!qO+%oDan)ICUNirAM!Xsr)vY0lm2zUfi(cNLr3sKNM&7B>6W;_5`x*^xOcyfc1zEAZ6b>m zl$BkLio2NNm2y@>-PN!*T$pP44-|@XdN9v2NoD!pD`>T))l4OBT?MXJdw5ae%GW~{ zv-Ga-hoH&V7W>cJ1|axkIdwj;x7kFyWMW=IF&-WD^dvt@424=zY{v!imjTQxOQz}S z*Jb7PB)c)ifK^vifIsC=1I|H*TQ~x0^n___@({J^k3LennqZ-ATJ{B8Ms#pMq7qBB z32wEwtM@C{xHm_I%f#eFed-N>;vy;aBh8|M;IC^MA3O(?CNehhYcY~q#EgeJYMha@ zSpoXT&z=sDW{VknBx`S(=yxXD_8}x^rTIX50Ol^C?tfzu2CpA|4&@KNYAPQ%p5 zNr7JJIXegP>xJ5egk1a` zgMI#YtQ!qZ6XWNWY5({DHWKopQnd#SHuhcmh9Rj`M=Am^9SRvy;6l4r>vXsX>sGS}~k0~E5^I3WOf>8k4Fq$1rptqHs5_@9&{XR*b* zDtvGzEz}}LWPXm=T!XgS;E|d`j~@CI6MReVStXQpiGr0a`X4Jwe(6kW&!S286VqowjMat=MECnCy9a75o%T zm@X&Dx^Rp8zzq_T9ZKB5#;?+o-BAb;+tjl0z5r{!q z0AZbo)~~B^@j7xB@IN7#J4vnu$SbqG5lMfog2YAH*r|mO#swRR>rKSJFXD^4h;d!S zJUQ;(Km8mzlw3udD^{gA`U;Mn2*$OCHu>}hUbM?DdiL6EIe6@1;MmCkZf2;w<6LZq z&#d34;jdJPpm2Fe*C1qHLSEl_^k8ZaTxWo%qUhW-5mSD1U=5zIK=8A7NKB_ zgs}E1(ZYm_k>mcDY2j_eF-$l<47BGOXb*tm*%>S(xArA~(*Yi6i?4h(MZD%AZtkwf z({djHI5rpzK*2ENWeWDvLoO)J1%R7ZJY3j`&odF@^w{7JDl^^Sqg(tq;B=$tmV(ff z4DsJ){hr6`xyFkVTh8f?S!u2ad-^(asn2Ckd57_s>pMZF>u;Ce3r_hi2`To3d?qhn z&Vc6W2o-YtLYR66rT(mVdKhszJxN%?CROUGo5WE*`YdfiZo|;xIL!1(44IDPTD@2( zmXr1=@fZPAUFY`K=?E)!nu`%DaqnaRz*FA`b^u@}26?(0c$3$1GLY8*yrctwI+vs< z#~G8EIv@lF84G~gvnJY4Nw+1!m)8?!RIydFTm41>XFWJ&2^#W-7E)oqD_h&I;s!-#KIg?O2_wlRd6tb7)_xwGIgOFkvccv64 zdwAG<&@$#P_w_F8fB&_tW9ic0FMGMY|32K>6TBLr!jjvVCA-r^?Uc}dH_>*D5)mEs ztdiU~Y3o0^BS8}Q;*d?pFgTi&yD?8wt4`0gBF$O~5WCVMB;YY;5bHWFO@gfCL#by? z3`O~~aIpRf;1KqPQUU~&RCc@v7>6qvdcfU8E`ov9S{+IMz{AA;a2dy4z$TuUcKtV9 z%W)vYMCQiJtYElS2TffsKS}{>^z?i=wN;5<+d_`h7q~SkJe1@X6TKKt^H;8q-4ZZI zu}0uXJURP?g7kOq%keDTN~Z%5fp7KxLygP&4$GbO=V(6<{%@GDFres}`K5LPK3PH? z9K|)u$QgFy6EP^1|IfaRapDm%`4hvI z2hU`5;gaMbsWl`QlN;XXMawUz9yc|Iz418%kW3+wGd``JT~Mn7DDpQX*tCX09tQB~ z&TN{p(KYPG^(r<~56m&rLtfDJ>xj2?bg?+`cJcEXHp7)oy2>WU*)Z;!h<-jGZ69uz zg!-GD5LU7}L_Z#Cfac$HM6SjV{|Dvjrsjw(xBd4%3w{6Y**2NlDSOBJ2b+G+WWKb? zc^{kASX;&Ni!!q%8yc4tR3`95JqdwmBu4xXDO z9bD(5iLPRKHGes7?rz_t*74E_Qx8O9nmscW~ zuz>y9CV+J&kWoI99j3wteV3Y9`LNx$T{-!2^63Npi~aM9MWKiZ2*-0smQmTOFAMir z*wj1Q+V_N2O6!V;I!5cOYIlcu%xj(20EAPZ?5h-4pR7VT-KBrZuw&YmNh#gFe{wn9 z9udV<+DmvPM~~Fl9PQ|6YCKvY$kSz3@{52TqHPO5a<`)yZPi6YW6@66qAAICzi-ue z%XGL$L}Vp(ujpbutak%$COVG3XD3)yir z)2XgdybYa}w$Z)!pi7avOMz{ty!Vj!Zpscr!$)lyO{f@>$#z3cqUbfWElAlz zsO&d0`y@D$Luwz_!57Kgm7k(SfQMIS&0qL3_1T@keTxs@2>kPXk)*O90YLL!*p#6l z%~bu8Ndou`9I>(TWn2Tj^*lS{eAJ%)#~W|F*k!o=~1Hi zmI@8E4uIyyooL-9sC_Z&RA;s|N+9cWT^q?7A0F=G5Lf=e4rP=ay}XLQ$b+BLesT4+ z48%}jb@-d)sZD#gBX=gNXN}qUDszOun1s6>u9N zZVb?u{ zNJ`XJ{d#C#LqA*~CWM6ca-Mb%z}RT+Vwnl#n;eN*Fk!k8fqcrc9bjn%4V4h)brHPf zDuSc#|EGHZ(GF`EPR0}Fc16N=AF3q&eRXC4zJc#djM>Mj2|jx9;p*b&2yjxhfY%^t zCc{|a9-H?9aG~Oq4Kb2|gK{3>+3-PflAMzZ4?1fQ@Xu;A!Ga9pT};yn*xcuyCwQ;? z#q?P2n3eeeDEra|O(RdNnj;TCwQefH`J&%pmO6#Kt!!cWBh|Kqp&stP=WaO{zIT72 zR?(VLy**QgYYzE6px!-04A$Lb&%?@wF`4SnEhzEEpq59DubIsbr!8t%F+31eKqdlN zx*^mZasrDd*=5(>=crSv=%o^9o?Et)=ruh!{WgvxS-xChB0G7GTHQn-(5?#zIdml6 z@7Rl^m5KMFU(5hI&FWmEqR%&hdj&`6k`O%fDn>dy)7McClDZFO7v6y>`W460a@$kl zz3_*d9<`-*NjL>-@YKmUG3f} zt8TfBG1u#A#&O?nZS^jxIo2w&3^*dM@JJvedp_XYXK!=#l-B9wQLwLEB`iY?fvFna zM+r6q7xC7J)fqub6%+1hSvL=7)qPvT!g7yVSud|pr*+}?WXxCg` zT)#4~0x4`-*herlDmx!8VrTb<2_@u*b|%u2CpcQbB$%_IcaP(Qi8NP!cz(;hJ~*tP z>H`*dy>6_$Rhu%n{{Q7z6SwZ*J`IxY9h5G6Jnli|5ve=DOoigH6duLyMx=MrFhPU> z_^G2(hmUCGT)oJ81#`fmh^@K*m(Y@4-0Sc|SsmAI1q~hE@9fbz`<(xq=-FCih>3D`SkX}*0T3*eSzZX1uE>N%(d(XVngNO)4Jps!oomH zH>s_IJ`hBv1MD@rUZ-M^DkV437MHnOwi#6(p&DM=NMGa`AI6G{&+U^^L3^hU)gIRs zY+VE1O2H=QxAfxPCi@dV#R3fv=(pSOi@ly+xtex7I?%Wcl_b1Acvp@BFpQfP*|W;K zXpELFo3!TAU$W>(@<}tJdf>9%(a}eVy&ZX(0pxdV8T{@+OQOA#iWbg)apLN}k{rX% zJsZD?4+k&R@Net24hMP|5ns-Vek`Cw$;VC~`RgFf9P$A3cx}4c^~+e1{i+vM;?58I zX7zF=*d%tjo-Ah*yb7@A<$5nI3IMqyVoCxpt0!%w_rOXR+S%5`{^?H*=B(H`?_jpW zn)cPFR_O7L3d8wSalgO(#K*l$axQI%VNBou9H2CCFP(mO2Vd~|U|8-S3vuIr|NN~V z!0}MPK^cDZ999z!8l}BiA9CmWUj#1%xIg>1yGniY{Bvn{`nri9M$~_;;Fv-R&$p=& z&AhIPxomcwQN{lb63YYeJ=-V`J%vYS!RXmdA)1x{TtJ((F_Wm%3eoy&b)NGsuIb~h zKk&=t?1n2ex;35dAj1WZKv>h(n@w11GFL1k|8D$8ZF3NFQCCEmG~OeS*A$SEFr782 zncc4TY><|0)XeQBc(ki$%2fgY8@N!wL=PfjW{H8$Qm*N##^gD`KXS2KU z&MQuqOb;De(!(naU6%eEYHTWtRR`;P+$KVoVU)0U+ISw<|FDECJ4XjOzUfZ!W&-O@E|k0zwkUc z*6u)@r>Fm^3Yo$OlMJ5y2aGS8M+Mb(T+y_&0pa;0vrx$I8a@Wq#>j}-GNRs}=qkGC zdGw&iZIN@xMQ)PHT6WP|ah0d+u`Y|T90V;e)~3GOYf8OX7QG&O?aQhbBkINciWxeM zBo#lNb}GI4usni3aUP*fAu}7)7gALoDC8_x#~`Za)kN6{!YaTeH4t+fRI%S0-udh@ z&&)Y?tZqi<#?yO!c7EhnFFH44v_0bOu|6XIytfHILq3=eRA2x$Mi91SI_5Hoi=R|) zc@D8h5D;L#YB&@1sq6Z?IT);(%~r9i`&UtKt_ax`&=8(r+Ge!`-B)^HUClhZ1M_^Q zd|nqtiJI|w1;pii(rUP>gkM#ltGv6q$w~BEe{)mm_KWKhRfDu0yt1oD#t)(h=s2kM z7OTCdLYD!tTSqF~l!U7Yv#rZv4jE@M?hIIcVYzLUt^ocRVrW!o=4#8-J4qX5f z#XhZixw6t30fQxk|L4zk0gUdJn(9jj5A2N5#C>a;vCl9gVzGYF33Hr&9`Ys?2?<3` z`VO`v&dE?zNxBejHH#~`!QcnPr-rm;ccc~CXuAH`nbW^>{QUfmwv|^CGT+XRrC)F_ z!^*`vEOIPxj5;vb**GO^ydEgJjm{c`;uOTy)uc`BcQ&;rZjx24P84CHU+2cA7SEW{ zJ)*YGrjF8uQ=eiTFcwg?+Pi^p>z|4YK;vDF`}DmcnT$`86B1DMY*g(zshQa>5=_*U zZ3i6+cJsG^GZAfo9Q;Fj&re49`D#O9gA+}6jU2S+PPPB+4Zr~b?o;rzm{Y%w4A%cJ z3`6K8BDOr3!Y!zlx#|g93#u3>5ND_2isxf|0+HFI-5PO5#7D@+c-9@4&c`y0m~Z}m zW7+6!&g6~QPdA=jba4(smy`+X3M9GHbvPB9ynASW`oN}>np^6`oxlcS_+y?sl$tKl2rIWK{f#B_;`0}&lp-y6O zRC4D~;M9pg2|lix7;=MDwRvQ-sp|gIDgzrQ&}9hqpmkTqoEuNqyuH&#UtCf6+YOcK z??;fM8*YJ~kTQjT@fSX&Iw`&y!VEA;)%fLh`0z6AJlUfJS!E&yb*;nCBZn_3BVttg zgaV?YftaMMoRf-6ZYL&{RnB$;7%0+jYBu=n1Zswl+wc5wW0`@n1U!EFM>4|pVBM|; z7<_~UR8(tmg`ca^gQo9Ih-0FWbXXlD}fEXC+!X_8|UxCvN{)I8EbQpqX%Lnk9xJF%fwxrg(n3S(fGe476!~>avCU zsG{cK?2{j^XH0L5ac_)Sx1_1;80anzhhK*-QdFc&;!})08I#!aj9*_$9JkRPI-~YT zg}gioa}kYu5YAnv@zmk-#e`USWgZIIODaAyv_T_D`3*#w5jkbk_c>vPAw1sDi_ohP z48Tg0fh--8Fdr14h;xj}e!^}VwuJBZ^efZ&Dj?@BjbDLUP@VGWAHr6IByD%HnBwn^ z0eNm9y&XxO&@P<#jadfVNCLP<+1!?dj0?9{CS+RHrW4UfPM2zK0da{rRa0Mn!gu1q z)*GJZPPryuclCpP^W>yWMv_xX*81YCw)UqZJE;%%QXe`me0oM?J@K@;Xr`dLXZD~P z%MstY*5h|yn#SKAy_%RW$MD>c@GD|Yh30lWcSbmlMmH>dn%{u;&_Oe?3uxEoPiW?l zBa%jl$zt`z%;mWqpiQDWK#rSdq;as|82iTWp49P7v)@Ff=p^LVr4a^JZBpbvSerkX zmu1sXw4w6V&{B~wRrL5!*5iSUIV*o@_ILCA>V0UZ%B`Vr?+xkF_KJ9*BE2CxA6}Lu zu9&S;PoJm>MsX6XUPI}S25G|wars>YS-Xlp2v}mDvS1<#`LnS5-j$K?*Jrg}p@btu z*7CB-{K5AH6YmT3#7VUH$htQax zYzG~9%QdE0bptc>1UJ)6$F3{*8i3pGvhd-t%TIaB`fhtxyw$ZVvU-1mo2qj6uUPed z#o-BM{6)_atC8WTcWcgSLdBJ3D5s*Rai{)RAfPa8B-r`aEk%T=i!#7ZsXAe~I7 z$W`2GpxZ~qRU-7%J!G>bS+jhKdCBCc?7yK_AkXFrvf^yvxb$h_BaMFY{=5`ERZ%^B7U zv-rD8_}1G5KR>;GHpAX*t+vyvZ1t)wZ(N6VQ>SmFdNqR1C^RQX?IEsMF0Y7RQjwlY zc)lk4<67d*{g>yNDwhXoJsZGa-N)H5p~zU7-u_p3HzBcIGqV~WJcT{vP(ms`38;u_ zAO<(6ZoNjH-dNYd<~gTVeEX%h-d2_=RT}L47L|UM|=@dFAQ+>6_nDswSV6e_L_#B~K=tXQI%A ziS!cmCL>klHDd7x6qiaWmyBI0Ubl~oJIzw-*p#7lS*~sGh7MQV-U zh;gXMx%Usdi4)hAIDi_0?}qS7mKwfhV~z1lEpv>wT_ufIi)Wv>h>^J}Q=*RDSehS_Y>*suCrR59u1~6zBo@@xMU*gTj z9!~9IHl97Q>R0B9yPmjZK_M|~6OKEo4qv$4y>9aL&s$a`r2o0&<<)c&cEhaLw{fI_m`z6%r=Yog0RCQV1|u1 z@nyHLuVP0fEu(wHfM@C*13RaBUbIp6`Q=-A;qcj&wz9J#P?VF+w4J`DAZ~+yd&~}g zwD~3b(aBfxeI8}?ZdmfcNM{J1cn-;th#E3q9JF^*q1KR7go+!FfN|cHU3^ z20m6A>zS!9N9vxt)OjRy)W<_ZqRN&ry0=c&gw6HXXf#U}`fYFpm3%}p-x+3tG&$Jll^MW{5A6ICv}>fNjyjk5Mhk|2u~S0HHms$PNX zo}c%oJjfSSi*biHehlu#YY(m-vN`)2?PHxDO{wJnzIV7l(92kqUb5~<&!F#TvGpO0 z@NAfCzfpdLXgRweU&EFnzeO;{Thk67do~P5be?$D6%i@x9F--+DR$Rb`5A}jPOEjp z4T@UZ)!Y4UMsl|?tjkPNmd_{gAiTO%Atm{Qj!7MM$wtipQQ4u4b}ub3s^aYxsB}9& zRiT2cLMH)Y(l*N!{LJ?kX5i<%@6Jw6oo{l4I5LSlO&KK8I1b64S04^ak*hf6%8!J- zhszSS&zSai0?q4m&b(_b>63;ElCom${(|D0FWL%9YP$C4i{d8R@;hDxhp(uUWL+*g zGjz6{Kj+hBZS$_^kSBtbBfH5iKjocYUb=q#!|Nk=K*P3me;Q&AukWlobhPZnkVoHo zZ~QQ1sqxGVr^VKvyaHHmGWOV(eNy&g<#G+xeYevnNV11BH4IH$IEijk0QWiCCHYY7 z?1Io9qKEP6T%w>{k_EoIWcw@OxhV8GUZFCd`SwS~*K2#P&D^s0inH^RQ4CktJxq6Y zueOwel~Fxa*6EVbe%I!*ciU&ZC->c);j;C_kacwb<9lhTH==MEKG%Y+3xXe{|8wBj zi~U<}aRAXa=M%$~?M)$WM?V$s!nxX&eE&24b;!R-HwteJLEx;Do6@}MKPbYowIPC$r_jty?ICV1i=5V_ ztZ<0y;UpIHIe$d7%rXV!AXW=;A5^S36*B<4YYsY)UZBSx@yqxf{fG!(Z}}j-h9@E15=T%p+3H8sjn^MCiUmqE|8E~;gOyL zP!v{|CN~iM*1FFxjMg_EHss+pbZPx~(gTm|gfnxEJDXuk_vfdjtc35=<}^B@U|til z_*h@*$!2>S?G4q*9FymjMD|JSz)hOkH|?1Y>_@iI4w~@%CO|F1<1BB=EB%hf*Cf!# zSR9*v$2Su4{`7x1Rs4S2G70$sroCSRhg;xJRCy2fSUu~mz&A_*H2y&82Q~Ft0lw?r zU)Ad}N*^vV8R-v$B8XPH2*0axtIpFUOKp+oSZB;Q9~m_q&_!67+W{K82$xF%g+10s z_CLM8V9es@Z(qz#INlBzSEh^f_wLb7{SS`6aBf-imghtN6OcFqaI5>mbFAW{ez3n?7MT$5>i<7S7$VnB3I z(Fux*4$cqDC_2bI_y4@h%By6ZyUsml@BRI3ZBym4?PZoX>=(vj_S=!;#45V-!`P)O zCDl`tanir zo6<-N@alC>9l;7rEcBni`Hn9?m0_1^Q7*hb*U-s=j%p!Ms)lmzT7%-ZBQT*iemuR(p5!zHnP zUawdOWOBCDrPygkO;&0Ao#rLQ!8x2>1HK@*+7KT)E;Oo6e}%4;5;^UCU`E1KLb?f9 zN)2oJnT)b)ZYTnggsW8iEp|(%9yUe-pn-h5`uTikn9W+%I$i0glHnWxM=SW_tIBA}glFbbB73(fO6SD*BZFVy~V29@F-FlwB$M4^efP zewR;#K)SGXcbCxU7S8_5coYW{`6DMq@9e415OJ$Po=zixTQjFHogM#Sr{a_(!ry3* zQ~_FX5;bHt@gE3yaI?YumyfN9 z`*cLquTbjUD>q++0tc8iHJAan^5TdDkiZOU8+y3T>>u~vIhQ_{?cCB=;Ab#NY}8<# zCqUZH`zVT7*2%e{B*lcG_2fl8oFI8gR2?e4qKK~HM3RD#*r0p# zAQ%f5VHZq5JH)(%VOG4tsP0h8mRY+x+y6D zB?ZQ%$V_Ghx;%f7KhjnC!qCglgs(Dp&o!3Huq@=ZfHH##iKW=M(!K>0+-K+|BvxYo zJlBWD!04C+l9$lf9l#Nw()Qv%t7H{*fECbfXaZ7rk22Jc%6eoTDS zuZO}{|H(tqApkv9{O)n^@YwV3>Bok>n9?(5%*Fs(06>R?u9d>3-nH9a;1=Lx%?9|s zVkX-&aM>!IM<3TMr=Aq=Bk5s`{9K#;P6Y~$rUX1I<7W`YHlLPoD}ECN`2aD#_5I}P#UJjUXbqrM0HoG} z4^>SxUu0uT7@a6ExF54HQQs`Ct1~RrOYZ(};;HS6Znx}7$7~ZZSNXXr&bYuP{l+9@ z)$HempbFfGcyLueml>pX{YF6uV>3jSQ_XVo0NuoUG~BQ!POP{ssK zb5$NQ0=5atD$LIo;h-6)+3+f6rk~F(@a?T5#95gMg5PK_dftcpXBjC`rsxbM@$<oJAfEE$IC>FPQjbNDN}Ad4p{079^-#7c-RQn*^x)ovbbxD~S^nCmZho!?#rz@bKPVLE2Up#5^Hu1j^f*p|9tcCX4gox>y~l-S>ABv$GS50cQ(r%y zF}d5VTvnpcXK&&XuWrln9ybo?^Ht;Rn~Rxgw=7yXh^hlNKWK9k`B|mQP!LQA3rIi2 zqSr>OnZMUTolKt&_Bd7VQ2`SNAS;!^GG$cPOh-Bq(Hs0n?~lzE(_>x^h3qZ*kM{lL zDCu6F^7fOwS%7d&_B zH#@0;?SR};D>Uyi#qWi~C%U=!1#GL+jOK37Rt1Nk(4SH`O`P|r=`K#!u1*|5f0a2l z_gKxTm(&Pta&oQdD4%$RUJcp__~d&Z-6CKPUn#52)NO6+$0iU0u>I#Y7ncIZd9 z#+oB%O*!DFH6%g`It67s03i=Q!l!~TH8QP>z`9F)3;wWv{Q03dyU4908PXE>|{>7qHY^0%ABP(3mPzwL6gpO|68RnkCDqC=P||r zy)pS}Mz>j*kPXYs!_c;{k$Ilb3E=~!s7)ODO0E=0y>Z9;+QDjmeu+1M#Ip2>5hIvy3(e z5hZC0#2Uy%q_FIg6Z8CuywR#P0S|c0y7A)o>l1!^HE6ldP*M*QW4br>25ft?d25Y& z)curiy&M_su!j76$Ddve2weWH>6P92)F*Pi6#Q=A-@dqrgi(Kc-JB2<^fv}~b5C`f zD;0JIDBC)L?pFcRL15Y|;2_LyU$m7H`4Z(s5f1EoWbUQ75`XXAscy4ztXQSuD_z!g*4eO)pHs$jLbX@(Qqd1mIbi&YO*QETFNhH%sjVEaFDG(`MhM~+xHxDQ%g0P$9gA_HEqLxO$ z(&a$PSZJ0X4$oU$>Mo0+OX9B%teyzn{)Y?BcPB|ABTqxdNEVDIx2#0D)hpa%5>Vrp zQnj(_TZ8XbT&c^6xbvd)duh{$Du7f6S(Q4QEodT${j4V5mR4%yn8EqFtw3+aQmeMj*`auw%2l0^ z`76vdYW+uQlP)!A5$}%Wn0`t94clSYbr3^DIBPXWaE(*1OTU%naGbO`% z$GvxY`7Jn`YcWCjdqdy@FUg5>R^qFK`0$;W8OYE**GB>w)+wx~0Sd3jRszLNM_(w2 zT?3`h1@Sj@ZkGPaf)mn-9Z+$IpGn53fiL$Xc5Y$XtWOVb;odF(BX{{!&yJ7A(ICbO!{>@!cG++`Uf~=DyM9tqQv-8y4|K)c#O?5|{x{{? z9|t4WmPf>1Pnc^aOx!#An$VrT^UCivq#4*~3dBkvI&v)^fXq9A@qq0owlPT6vO`Z@ zk;U125Po;=ZrWFEBF2O?@txSbiZ@zs^9yptdo{a%kv+6@KZ{;E=f|#(vdNAU7ytE% zYko#{S-@-lVu}%-^4(xD)p}2+1CJj5PhIToXKN)e(F#4rg?3|Ic6DkdM!02Jbm~!| zxk)l}TvlhJHZxcdat*rK8nso7;QaLy;$e*H#u{8a_HyXR`^1rG(aqc9U2jDJx(U0k z=t5U@pI04O(w)6$L#g$qnm%0h&1fl1>I?$uFhABX=HE4rzsF;I0dns_>f9RCz@@d- z^;`AkhrfDn%$Ox2T#l{)H?5lJ9h^wwwO3u| zZnO&WbsF%#Q<#!R&BaG7{5?gsDe2{xHx7pw+Wk{BW|{Wr=iXU<@7;#gkG6aqaZP`y z+SbzK{`ArMgDl~-ODvc^=~*yg&2QGilLF~vPw{e|cyAY)s6HC1RCZs_BMzTl4lkH; z6@+&tyT#Z0&=49V81p46zM!9wTB-JmZaeI{G3}@LrmmntSkCba;~1i#%6PkmJOaUF zGLJ0+M9dR;yTyg(`z5bhNynl-l%7_(_E4Ab$^%Sy&kQayKX=2&<*@I7-C648QH{v) z+N`+PIt3~PCPAYg48R!_A(0ADzvD5lIvO{ zXt-;NzE8WD$WN!D-x54#ef0K|6{y@Hw}-w2A3ZLJtx0#Lu3xB~ZZfjT4#LsNtbvD* z7C1axm>xi%Y+;7;8?d@5T@Cr>VbgvUsXu4WEK=p!Xin+yBaHaJrK+E+=_~eS*;zccxR=t1zeh zgBN)QStw)Lxa$k79o(kwdu0HR(Du{gepK&?qN&41F+Xfri?M^&N9~ujCchH=7}LE* z@ydybnxt9}%U^AD+H$?tdoKDKLGdNQo5*`ymTTm?>y!>68mVq!JvRe#46_K5TQ(o# z($|N3HP+dL^ZXg}Y}DZLf{>pB&4evoehhygQ&heX#5!Yj%tM<#8-G&qGZ;@gCi#}& z(zx;iwfl5bTK%4&iR$P3#&ae}nP&(>UC!{Dq&3^)T3xaeyD70+Y&Ox{m9w`TADqGJfpg(R1YAQyctY4u))^v^czaw--VF z*+v3@9X{T6JRwr5M5~cmneyewe^8D* z8EJlq@#np$k?eDrUE5su8lV)xsArkOX|Zr8+8sc>Cvq^T8Z)c`kJ-X|O>tLI7RN)3 zvM!w+LKIkVpL7hc14X_Ad6vH0aO>mq-0K7miSz13cBKe1U`p8J8(I)LqKs+%Ty}QK zuW(6_46QPYKkO^q72ozlf zw7$WE7G4m_M2q%zQiCKQkGD^PBWTkUKh=jZO2Z>HrFKliaTxGaf#kXdlObiUZQUn= zS&G~7iAqgmwuIFmaE}@VmKV*-hqJQ|l9tjHI?rB37)k!d3TNM~o+Vy*pvPD!_K(@T z;olz$BLJlCO&1C_H1w^?wm_t#?uuG^(cqMm-f3|k2hNsLdjWc`S2qsPT2UEV)ZLY! z>5v9AHBswqne$`1c3=X%8gX9xfFD_}c{8jlt&fV!G#!^8{m-8h@p0+@Hs9TZ(wiSV zW8JZp?j-jZuOb8gPsS^ZDQ5W_AXkc#>#djvlkk9LS3sNo8?|TY-K#k zw4IZ0e?N&96@=mt%JU${DqQxo0PmmzbyBpn)_6aiOaI~x;6?(DqtJ~-DbhRftY zO#7?%*T>E_vairDGo~cu#9k2h(iFp~mmdrcQ?RZraQe7GnPnR|@*oo_$f;Jz41INR z10Qoqt{V9=heYHv7qcXy-pRj+D^Uaq90GM zU8}Q83e%p&7KcoMXdMHIP~x~rERbQX)8&{DrGnW3V=dAJBr&XD-O}D-c=gy?jO12A%Vdi-%E?hvuh@At1)fUTBUYTj;ecikXu|PjDnKLc0S@O$ zDscv(b*|Oo#@^thFg8ntwb5XAXmLBBN)C!?F-eG0vmHuEKCofkBrQa37~arFw?AI- zW)b@yCHX!DbJ`YOVcPq#Zs?q7c!m931GO2({Ve`6Ke(_~`+oWyI^6ls=|lC+K6h8aopjWVLiB&CL<7XmVa)O4hV$)VSX03L{Q~B{F6}v^H z!;v$?08|*Th2TPL*V0NkXz8ROipyLpUbz>APw7}hI*jX+89z|*YP&xTfSDa{>Uf9y ze{yLu$v~yDzPf|BQj2Zp|Fg{bAC=|{biwTP$jJg(eFi|Z5a7Sskw_a^IRZ)L)Z90S zSwCs$01}fS>P8ig&87d8(QuP=L^(dsA)ZpWoBhw_ z&-Phty_Do}#OyZ-;ic0*8znld1_~jPJO>|4O$)xEefkX`a~iluWkh5;JKc`l#v#8& z2y+sW5+HYQZnkPE6EGVIqEAUYj_M-IzyJgb#98REL~_PV2JHSES>w9sx3`hKixz!J zxwYTvks@kw$(G-1Nw&Xf=cQ8FV0hs|xYKYn>24p=;G0JKnHp{53 zDvB1uhS^4Ctn6F8H#4C^FRD<1*O&+uhVbPEH`<@HDg^iS;KOzk~qFPWVW0ZxYOO;&CTs3&Fzd{ z;Eznaj6(wixDRk+Oar|W(u--}|Ah2hKjc<&nOap-r3$xOg-e&L-K<3?Yta$_=c>Y0 zs%WSrBjH`L5Jbp( zMvmzr4b!@0$WtQsM`HRlPi2NEA&uA0>>D^ECZT?$>GHdi)&!|p{A_GruO$*w# z>eI)M?f-myvIA*83fftK8`A37snJRm{Zx`sGeIv%pvOs~4PPNbL56k>ebo}3C-5?F z4z68BZ{vi`ZpX!_a5yy~NG=J)u{yO=yE)ht7;n~+;v#uFmrdZa(Y|n`Pp}tr#p1Pp z%zMdrzkOfq1MT?N-R{6pq@~2?Ng=6LjW9dy(umO;be1N6JLh>n=E3O4c*z2Km-YK$ zC+rjWLzoKs2{CF^GzU>|;_wl|twzJ3X}z2^50SpXM!oWsuip%R%82tET&ap!$|1$NDScJwjS{>>g&hc4Yy_h@ zo{O9_msr8*p!eHd80)ORZSPOJHTQL%u)f0Le&nJvZwq8Ng$&?m0XBbQvf~n=zrutL zCdcCF0@Sk2X&~cccQ^vg-hI4*vgJJ$xI}yFrDvDSh+W8FT*@O#Ap90!h1DsHK=zAn5)+2H}tyKYS9L72rQ7g zN|L?pk0V*1BYk0Tek|fd)9+bM;_BCtiHnPiUl(pj_1V4gR=_sC7YE$HKw)6aGl<%e z3#jJ|)q-In$7`AvC&%8>(p*bEg80XC-r zQdx68?_X6!&U);VN7C|+-Ow;6lxM%n8ZG{NZ+F2{{Wm*(=L<-I%$$I z4YZ`dj0IRn_+7o%s}}vZytZe|jB{M~!S*$Gcl_7;4(IVM>w86eQ!Akb+$htj<I!RxdXe7A7Ny> z7d}Ux6lw5J1ogut&t1v*P>Yuil*}l*3zpT_FwZoCI zD*U(%X9R=Ew#e$C>JL&8gcL%Gc1Ktb+Lfc0C?-5JG;Dfg3%X}Ga)%@hL7RqYPwF>> zZF}ESDKJEVttZqv?#gGVhkhIAcPm=XDhl_BKz)F*+fadY^_ZW#W7#)i%{Vtwd;E4_`ohb5K6O1acibLX zW{d@FzLhK?)h01VpCSe-@q>i+7VtcLf!+pDv$Pl1*;Xg2m?6&|&78#!{|%MHjAl8h z^cyuL(J-L2H@3ql1Z+J!NooB!e)dk>%4uBgcH}TKz}U9`9>aV`n0b)M{41n8#ugkS zVWS{S!SQRCCx-rA-1WTr-T_OTte4~q6Vw0FUxgoDD!6Z_MYqyeD3B!#pwFph7YjZa znp4(B(`z|8=mxS-&LSkenFp9GGo0rhVw9E;rP`w*>mK}2=zQ^alQX~Haw|q-+wQ!3 zy_xD`buMM`r&~cgZY2V=7+?$w0jb(N>ZAu{htUUkfi@0T`=CG@h*D2N&pzG#M@>wc zJ2np@POGqq`9D3s5^5!thb4?QuHNT(!z~@14Nn^OSTtS^UOAxJx^U{^eVBiEyZ_<3 z&snV#U$xKdpFV$4|M4l6Q=l@aXq1rTI9;15rDBqfkUNv(k>V$FdZf?$&u^$^zB^9( zY|o0C>^T;qLS>?`A#`(R)tv0Jg>EG;at$USIGLwZC0-9QEanDQ80+w-zh0LJijlCX zm2VFHN*m^Eq-oG?8Q1QQvuj)u0sDrTu@1YK<|3bePkzdU4in^tVyo$k)Q$rj_CE#5=nLQj6eT7hU z+07^U9pXS zbpFDX#Y+Ja%sffLD#^rU zwDm~Y(o*5#7ummNdm@(2&*Z)!Pn)LH6EHfFW&+&4)5=twOUgz+y83iC6-xQstKT{Q zitPJ$DXMSL!giDbw_d~yR=98tbaNHsRwZn`9C5Ct+`XZXy>d(`V7AP)QLL1eu0e(_ z66q^+)Wwxmr6vP}Cd`|FH*(CnHM4O1=EtpiH=I8YRHKEB26Bw+44=Sd(m4Pw(5gI- z5s=>C&-C2efuq^v?14~p4qr{MErpI~}SoKX-kvfmH0u z4T95``nQYleW#0Ac?O^Qcumv7&_&|xSA)-+WEwfMyb{&UA$(R7FH(n@{#gBijv#Zl z46y*QUZt4AE%S^4=6=M_#V#7Yl6-Le`ubOgVov;}htsB>PbN8~{!_jB=Lep=%#IGp z&>nDRFOwak6q(Co=sgx&M2)#<)?$?bB9SU(D6zF_-KP*hAxGP!$ZCmhR=NX*$P;_W z&CNuf#yR8F6lA!baul8E^`Fi3!@TNGxtbCKzp{~s-bS`9p>b_^dX3Rk4a8iS9TVKh zz0#ktIVSOS?QSCrdBH&XGFdv3=o=}HoPVcVFTpzRcPX*RR^iLUSaXz1JLZ^%oTHn^ zOZ}8jGC*9>=^lnV;6=-U3FVSlYeZFDJ`1DtWg&OsMWRb#Z&28xKjLpiSzbtNagg1BS}k7K7(WI1S|1(4Vj=p+@y zIq-9>6OrkCRjzTE3NWaAwR?zg(zQm4TCYlGv>>VEVJY5iLQTDkeDJ(JI9@uLw!RB( z-7F}}x!+^9rq=WFgu#bC5E{A{P0g$GNtBKq~sc(dffs& z9-`OW@>%30xpCGp*!R^?!F1Hn8smgk^`)pxI zc7&k|Sp-_pWjK-lu7S${4l|4QZEwKwYt)oBO`$;5Xc*W5455-vTMlrUfqaEqoy&uC zUamov3S({3KoGI?7(0VDTDuenzc#=#s$_TPU#VMv`Uh_K@jTFMv_o-@sEpPY8|}RH z#XYg?!_wFCh|@+DC?k%5x@sI+V01y`RUHbw^Aaq6^|ahH>@s}~UrGGuEh^3hB z)=O6AFP{-m_o#YI+W5Idvd^k76P$ih?c4XbQ~tBf$50TaWT)xpaGYQ zgbfqj{0wGiG5nwjiyA2|M*E7^sw!};D5oUHH!|S%p5ilxz$KO9VnR*D3jw{XA5=fV2=YqSnp=_%M_C)!$E$4 z+pFX6!*@P2sHzTdA7E&s8$WxG%7>fd2M>pJCgGhRn!xjoEijs;POY+DP5xY8bSv$g zy_drH6&z@sK1n_fJ-7RIwU9OjQDr=T?v!Lj_uUlTgGI#`rCCLe8C?3qXWiY(g;{Vp zqTeZ%xs0+|ypEJ@mz5A%0$sa7411G3n?=6eoFme2;bpJ7e(Co1n7&t;S3zG+uXQUR zL-9bp@~&Rf9A#;0P!r8a9N^xe~P z`i&5~^Z9AAbQvP)@xRw%6A9@$*}~H)q=*Th=xMjs7yAl{JJ1|uZNz+9A2c6oQvENn)IhZ${zxK#f0|W$1-O~9<;&flZ2kJr@dGBWYc?)v z^O3BK8v0PC$z|=(JPsRKg_%ahPEMKvp;fqEAP!hmXf6DM7uv8-P4sdkF{L#O;sxo;@Po z-{@KNDRAQl6|XdEW%RX;m7j~|wS-+7TrC~rt^rAG4^jVoixt<1p5b5Vh4B}h^(fQ^xBQ^)a$2=?@Cz7YZ4%0-9KiJ5SMsR}q2r+og0-vN#rn;u*s zCW-qC_oh?Xh6jv-sm8m?K{oB9WaBrj$;c${NAB_?N}?3P3MA;&{h+-v^dv!Mh%>`* z!Kl}!kn0I?#bOKI_U4J*zpCk;LX@m5I76;{-n6ZOX8L zQacXl4udR-J_?R3;rjNk3*3>7JFTKrNl~|V;!NU>Ed&K2keNXVF-VTDS(#O}@P9B% zOsMi;rZ{VuzF|yE{XmyAr&2qDH?qBt8$7KouA#o?R1A+GYe>6oYg?H`tQf#KY$*P9 z+M{;bwtm{y0!TWUV@E7OEO85yZ@7yb;rzL`(hr`oI% z6P{QRp0d{|RM>5^giuXVAfIqVO5UU4-(E*hcHpIaLKuJ#k@_qWZjBX`30qUvGen<0 zulmyJqltDxSSA>KD`C`ncRJ><7*Ew8IX6%ja%HM~`*jf?5F{ZvKMBrTvl=Z$h0*cl z?!kFez#o`23M55I^OoS<(jf$7mSP3psRz>zxvH?Wgki29{Pe2tHU}yRi0!b-Xs_VJ)}#5 zRiJ4HW{<|fO@qmz<5wu7g86RknYeu%{BJ7$3OaU$47-AV2BEkjETL>k^(q;r8A)PE zsd=+t01(_It5-s37b&_{fAvq#ttY;;EhG4UXM>rxyR$K2du;uET%|u=0dx)E3Sh3O z2iV--^0ztIn>#<1?n44OLQAAw(a<8l4*3I`@Dv7YL*b+kBfz{RY7RN!H#Eg zv)Crt(2rtC8sZ`QoaXtn?a|jRV~mlbwFZ-vfXVVePd>f~6_PSF4r%-|-j6ZP+c1^2 z*lY-&tK1R`5lR9Hp5pWX4!*1-%}xV4N?cnX9mQPpZm`@+-`(&k$2<71Lt8JhX|t=m z8`l$7I)RpeE(f<;g4-p*|K0|0j6j0Qz;Zzs zbvR>8?Lr6I(H7%Q-{h=f;{-QS<4%+2LfQ5wbRoK<2A#vf8zT5rF~(n(aU90-0&Zsf z!0YqEcXC=&(=k4pOU{ol9?DxzY3N`+Azw@gpjR_LU+d8$3=dfDJa+Weoa2|~PV*3H zQc}aQbq&t0XXm1P`yK%_B?`fI?&?4x0M;@sp+-e;g(R)vvoq?kq8Jx`-`-l!Y0ao7KRx=iYh$L(%8e6z*M_PU0v zb4^M-tR(JI;g5x4H$zzd1UgD`N&u}U91q-8dFs@^9yHJ#{z{KiqkQT3&{=T7n2*+t ziRh0r)@YCQElv zM|^dAXAKts47BMaDP{y9VD_S19tX7o1+8M8QO+^81m#U4t&Jx=mZIDM;sdz$qeN}E zS8eQj;eVtG&P-R?*%qbo{hP;V4>gpd%8KpE`wn&2YtLe0_&rDgpvNur65vLIJ=$Kg z^4Dqhl9|vqqo_blxJY^v={&rtCErky)0=J@YcOu;1)`g{kFVKg1D{?y(^pdk>Pta4 z4nCJo%%@vNE3u|mw*>RixwEbzszYA!o$ZV7h{ikrT-~|P&?!pf^xqi!oiT_KqqbZ` zg(T|_lLiZeU2QB-L>0;hL6WM#zUZrIbRt3uej~+MYw$ip(P93#Uw*0NM%Bx z??qr>_o|RNG^P6BU+2TtbSaDMdPwG0nXHZWkR&FQ<5n6c|2mfFcQwNb}G>pzs;@~%|F@(`JQ9?EQ zz|XEFZ}%O%+h?c&6O%C8JMjA#=n3Ws@H7nsU*Wvs(19}j=z~5{$1P@G<(>AE?wJ9Y z5BjaePMbbi>Kkn}km&1jvH$ybfGAF(ih;S~07`-@7ZYV2gc1#kM>@AyOhgoHR~32? zz%LRL8$Hk8{%(`=Wc4ByVR=a65R`WC5oJtDeS41jx7~%96Vx}e=etVPU+5KaAMPED z|8u2Bg=ND1Ycis3-Tmzy_wU`uZIC|Uw_`TEKoiU`q&rU>y&rgXXq{~TaG|5kIp`2A zmzgqbI*HW8A^0dvJ%{8EVKQZr5!VT;IIRa&m>-m<3&9_hF{|(26ngr zAZG#iaNZKJK-ZinJX%Lo0)tH8>P{92ukp2U9V{KRSrruYzk0B4A4;?jhcVwg)-k)M ztMCm(eMA4mh`7KspPpWEp=9*sLE}HoC#c+nd*Ap6Dpy}$^YPE>;qIF7Q;wxi(lgzWZU zN=nQGzGap&HcQDh&}lT+AxBVkXrJi+%`&%AnGv51B3eGsX$B^=_t5pDoVq*+zf1G) z!ix9z>u_;EPX_$H&I!vupdDQP#g+CU!xRIFi#O4CMT#+lZ;4H_MDC{#qOIdWzyE~* zdof!VdB{K`zW2KW3kFK?uft%5Eq*&6J9u;9<u55q!(f|3O^Cvz6pHyNBDS_4DRhLqT0dfq3#Y<00k`+*hCZZrQQ>c*Z zaKdB&w>u0G#wz)S>J%cDUsFe96%H0R?hEnX^W^@*F}DrFjgeKcE1#`AKXPfyA@+_$ z_pH}L_14$Q>b$Rhm}uMfWaf!gV4mxnGr8Bw>bz+V*HB&j+|V+B}(XSpqBK$cRu^uNwG$di)UVN>*WhjxIzp$=d{;8EEa{y znYYa+=`}}(gs+luULFl67T*8*|#r;Q^ERdU4@R*b>q*rC(JV$6HW1?&<{= z=;pjWZoC243;D2j;tot@b_j<10d}T$zv6LGhV~?e(21_Q|r*7mq-TctC zso(4@tF%N-ZCZS3_n!FCpar{1@hkrR?<*~c4)fzpxjM_{l0##=Ot2*uo}5NnTAPbM z)>P@UuE6qD(QAT1*|cAoq10^{qbHeG0WT$dr(8>RLa)+)I4JnW(Hl)pOX@D`J{(aP z@nknYO3#O*{#SZ_3n$QYT3w#*6ghd=NCGso0;{!OtZXKLymgDSTDLD+W6Ya{(U~7S z;!jxc8!Dp8ypjssDv48hQ}qICt57F!up(+1YM!M&{mavE-JPYrnLS6m3o`p@X{U~@ zSCwx%A57r((oDa=_Ov@+=`$DXM_Z4s?NjhZUs@lWiYu{ILX9O3-=<#|S=DOS=|%<^ z`m-YI#wF{bq-Qc0h+a_w>ng#wAeX9TKgGBTubi83tJhR~GR|y5_L^`A9c*0{v@mUn z{Odc}jepY*!J~nTPn4wx(@N4uTWqT&9R2ioODwr;05K^$@+?~=E1lJVS&d2tVzdN< zaUv?qL$KzfoxrPn^L}oH9{q^b*ty}LxXwS4cXRTs zf|AIW?GXi=PD~41r$=4Nuq_u_cCKZ(mH<>*>DcROSB6G zQIG{wE)jB%A2IncFQDec=TbO9?%`J+E}7m2H{a+nZ6Q90Bzy{QqPv1oMxqT&u^f}A z1Ps1%>EXwQERrR3Y&+L5Py^^zt0?C4BteQj9b+I?lmE8j(KRq*+@NU5{kNDbnaIuR z6W%}OQEizx&$9FSgP`nf`Q@rSogQLO<}*Yrb?9SkO)3d)wGZ5t`RcH6@r|AiH9tD0 zlqEiW!!F>hOTf5-h1rNjP%3(DkU!yUL&=5=bl&5VyY55N6z_DTrgi|P;BpFL94wI9 zIh<{&ME-i9fhmg@*-gkwqQ;zva$fHlT;wwA%F{kYB_|&^c$+RX1oxgsjg0+r-E|o+ zg1F>_AaaTdFzs^mmwV;AO-P28R0?sSvMP72N=7Jx6fCRfnBR8`&eu0%`l9l)koIrO zc!=6kf-&>z_x2srqvwc4=Iz(x?)<%-Q8~9sVvER;m6r{wI{F+(w_msZobD{fm>Q4r zvAaQmZXl`q$ZqA+N8SOX)2k`;+Ge8FSBRD;#d)+{T=nGq4x`5^v4>5pPRxveT%b|h zE$hIDE0c-aC?Yer4!Ld@0t#5#L*!243=FvRHZjsU1-p_vX&Vl2{OuQSFuFMBGpCrj z(dJ;t*e6dd*8%B=WAf40T;xQbxm>qr@Jh-;E6muJ{@A=?60NiU3iKcL33iY52?G7n zp>aF^1C2(-o^=g6JEXa$T1nxOi3T0LyR)XBRY{^LkWn(nFZwrx=&#J_Sad$`kVBmQ z1MhDaWgAK>Sm-=X#*WLbcjL-VxEtC9*ywDYL)(r{KU;j2t-rPI?#0@hgk`Z7RFyQK zAC4ofoEM<`k=c{(Yk6*}?yB;1g&DeBPEvOIzVK$9o`{PHm0!F0`Afp~0PnHs_cd3zgL6x=*56cGOqa9OtPHcQ?@1Qmo)RU#&4B;K-F<0>cESehZ{}j6xqZ zESnRw-s9kGkig1%Uan!xivmy4LQc2d{(nq)rAeX*kWn<f&+Agg24jL)C8UZyUo$*` z+-r7dRst@`YM5R)Yc01nR+SQ9Y-g<7F>=q2L2 z$4`V&Q&4a94r8`htZh!n%lKir9z$)>rA#-2j9^P)OLuD(t=kuqZ6mM+R>+UMva^dHZk@4yyRt(tf_&D&`aCCNqrm)7K>sHzWds6bAP@~8wc>$=K_JJ@bfxH! zc(h=;O^*#&;?e(4^qhqu28`~Mq&XHXQtR`TvPA++#w_aJ`aW|ZRIeq>iN?;PhF6j! zDqobMlT;WZN6B>;33c)|g%-Mo+MgDzHdRY3Igw|2z?N&E`v90pmt}gP+?$OoHR%4- zdOw=g4%;q9K1icW$1EW#?GBSHVUi7p=n#Q(W77mYW_*JxzhTzQGf~4-^@4!ZTO1fI zD6p39GTsl^P`>=y^rZ{hok^9uC^UVE|=<@10n`2*m*Q&|tX^)~5mbo#h;Ek-Z8= z@Ul=RSx|uHK9vTL3xm|Hv}TDNTT&A{N6<%x8I!=vJcBj4#R4HVjNx~TY&xPolgE)= zsqA``i8UrmR!4$Vfy8KfvAKO{>aV_3fAt!#!PB0DeZ7*NUP(qTc%xzPR`#iIypIE9 zmD77@W17YczC(?VdgNtsoM|7kADEer8TJa>wDXRc5CNiqG=;>X2DDR4r&6RW4rB)l z3Tr_JHY9EEw~ZfiQKFn`BnV2^5(fQXv@aV%UgHA~amt5YZ%4-<1VAxNQb=6}phdHG z_VIOLj$o_@;Sd=(q$C9R0|CQ5XfE)BxXdh5_UlOxkO??>l?F@wLijb=uK?3bB^VaB;V9-=@_VsaEuf&-4fO$vK z+hTM@ypTR-bh93c0;C(MQm2OjMhc+x)dSa86?>mOD1R~HwpPBY6=bMUVI#6Xz5=hG z!+OoI<1fHq7Ispg!_>zG>LM;z>Z-EiDNzVjIBphL;8?h*0FAKnEL5PKSjMZ_7p9g5 z78DL|GR>JRPF5EusUiPa6h|nX6u^fCH*2N?ezYI-9Xyyw2*#`7qb`5V9Zi!Z(K_-) zC`bv1HzKf9$WKvdfiz)i?E}@AMSG%8FB9x^F(17ty3$Uho1a4cGJ2H9`+(Y>^<5PayEhbt=HLGwOQF3Uf^4Hj_~GaP%5#yeT;zk$Apd zY4}T>A3fq%gL10BKyTrBSOpp>|IV76@Yzo}?O+Gz;6~(Og<#QUMzA|LF^$Bb%`? zWZEJNK;8wpB3(~&ir5!w_~BlXh9+^8lMT@d>B;#8&^Y?)A0A^h#+ri zKn&mOg|Y{fL)gR(s-_reyyGw?^DR~w9AupqNXOgO{^QC(uT){zj}#s6z^)v@tS9f; zGL7t%s6{%E1^mOB1?&{EaD>KmP-ajgA%8u;of0=WN{ZC@hpr)xpt~k5<)#0uJhdgJ-Y5+`egkavIkC@E#ws3ffuhfNI=txGdB;#^NFyX5{t;fIHT7&EYP{UpLqyfuL zJIacGucrmRW1UCCwDovm;k+D>NCy9&+VEmkW-2RkcYqo`HYpDb zmDqoQz58yp*M?<2xIVkL7imCdy`^^M4Fx;LP~<_yqoXIYK57j~Ky%QDEQq^ti)hbE z$NUq2A^D&pny$>Q6DCf=uYL_#sFoC!f#wPsPgKFf7cQFos{4rw6jeM;xNNVGtujTs zav^KB%#K z_2VHVh(0_IL4Zq4IFPlr^FSxE+7ghF0;ynAff0vO`obCI0K>#_0$vD4?cKNL32ct{ z*Z@Gje?Q)71)9<%JLgNDTldpn-7hiysPW3d1b<8yxb#Wago*0?;rg6xA7+6j#qbV6 z0M76GSQZleaY^t;2TRT%@}aT-VNtRc!_UC2=Kct;PKzmc;wS!kiBw@&1Ih~r1b@7r zoi+s{HDuS*R@A(uPk$K>l(ki&_F%~$(3AN88=Hvw1z5<{f~ zC+tn`kTk6tnj06RtaZtl00<|PF4lpV*(IOFa)Pbo48$d)U0^RjA-FJZ}aCY-_lQQYQ0D#tnQoddI z1SW|VCEEi#?W7Kw?~3D5`fIlk#i+!pLVFPhr(lR6Jf1A5J)NI^-*xfjakZm;RKncp zkc=|6ZzD`fhSh#d(EodC*{Fi^i4KxWp@!zF!=c7%xZ~^w0eg zyw{kMNVfk;Jk*+W*)8hG#DGy<^i!$f2yI0~#RIiZ{-Gb?V$P_OwqZXX`hs2pB zOx)N*T;z$k z?+4wZ74E-R(!NF$f+e-N-Gp5NN|a~6kWa8JnUsWDq;?hd=yS3Lprs4ORAWNk7$Z*q zF6ZR1Ym|2gp1C`+5oeh#1PdK_c+EpSQ6syzB1pi15=mX&5>(#rj^!)2VB{f;p^3Iz z&WP!-(eY%kZ-m?9_)P=U*tE(%42|n>7|YJ&zRsvkQJG^;t`%u8jWBs^e^xRT-V*W$2yOQ{)3B}#|abIXEz|sIGe~|p{Bf0rpyr7|j7`eT|Q2V{JH!O$NuNQ_og-w#fZ1xTUDAuWI z3(eEH3(msrdec#-Fd67PKK;~OuqQGXid{&$^f0ws&_V5(kdbTtCq}(Fc3Irhse7&X z*eQ6Csuvq&)m?vm^JGdFZ8Ib!~fTC1Ejq@ z8fHr=xypkmcLn{bUuz^~c$S^~7XEovuF}}W*4&}9$EhKuV)L}nIxu;Ue?e#2=HQQs zHF9!MFB|JV3(uAK4j_9W5=O6jU$V4InRhvc>st{oH?Z5_4(kl z-vad7aQ({Rw(5>Z7&I0r@1-$t%bHWS34gBXn`_g{rH`8q)dZWC>99$`<3@YgQmosT zrh}W6sAH!GiAcMYoxEuW4c1uA0l4#h8lxRGJmOECob&Z##=kwQ#STIj!5B7xpff;| z#qcQO6u4yl2r5}C^8-?FWHN7t3gW|lfs{s3lO44V9~Rebv>}rTL5GYmEFC&S@!IR? z6?}-g4t+j`i;o}RFTef{yVbGC15u1aSj|+e5J%qCd~esBLLTfgKGEh~cTirZoD0kx z=k>8e?rv~j_4%{yUXPUXzJnUQEOn{Fc9qN$Gq^=`(+bDZowPZ;R-j4JkJ!s%(Oj4& zaB`6&SAz?bk2jv_40X&)DG#3l3E@B=Wj|RSpk@$q;~}GTRbgQ77~Yf9Z9Oh_4pP<| z&I>ZfJ`$EJ;&dBWaQUq9ErnsT8j6P+hLYc&3PX-pu8zFzY_0SgOLPcN6N%}sWl3@T z!xaN4v@N4Jy;-z?%_*{6?g*n@9xVdcaoblF86l_rjgX29SiE2G_jP9&FFL6AdlNc@ z^^){Qr_BCHWktb&?9#7^Q|te9+{ffsnBP#N?JXqog_9c6hB-lxg`RV=d-J5x*}a)G zPsw@ON)Vqwkp2xc^*<{qaRlLOI&LOwz+vs;N!NiLJu4NaKSX$|Wj<>{{ zvzm=)?U~VEr=qX_Fj(;3xjpGSG2lO*TDiP&3BHh_(id|Cf^taF`-EKjaQ)$Tf4?BfsB8{L`fwPr;`pN;pL7XZ5%6e>bhD&3Z}ndpRD z_0^ZDSLe!`1W60D$3zB9V~T`=1Pj4`7QT&GIbWHcza~Yk2mSN8OGtI9?8rxw7pK zl^zuJ<{VGEdGuOKm$5R>16Kw~TMGhg-DccB9oSPQ!+NzWBWAx{Zs>A#6D{fTb8jI} z@7{;Uq%^Y1tXYSlX^+j}i^*SV{&2bGXy8&EB`)N03Dnj2n*=Yd#83@jJ*T0nGtuRI ze><}sZ0WB5EK4|3xkg2Zc@zRmW13T0cSo+$%ebYSOeIwN7N)ygDE9o7f(;*-u&q!P z#%g&2{?)dNpSmj-Iig6_&1mfCN`> zU;TYy_5xl^Ls#^}C`JSW7(qn3Jg@Sd)jC`CiR_q@u#{tG^_{$_rq5;O#h2s%rdPNc zKJ+JLy7VLt&K5ZQg5UT>uD!BBU3%ibE);ya>s8SbFrnptp%+D41r?9(WK$Iqq!HId zDcF2-PP|8AOdUf&5K9vOw%NC8wxO%KW zY6H)Df>)gKDIj_M5ABUt2@YLMdOzO8QB(Ho$#TJp746SIDAi=T#lPUvBw{_i6*HH7 z$Drki^G*#exj(CgfZeG+y;I_><1fHNZaO~G3na4V^tWrc4X%s-RRj0$02v5UH(O1~ zKXwkoM<3_?VYN0ZO%15_VP*%HjUU@u^pAz3j8M1+>!{IT;D``4=n~=6dSjys_RB7| zCChEaC;h>0r!thc0EI=9bK6HTcpma9tJIjSs0AX=VHAUr)L91m&Muyn6;sQZR8QUOCz1l zW4K5TbBk-@i!%nolYEcj0)Vx12HYgXkPjRsbb`zDxangte}RJS0GCk9XF2k*;WFFl zI|RjYn|7Eq2_&v79IIPX?qrZ?VI4-hF=l^!Z)@253qzp;58mI5H23dDIFkN@r~pB2 z0C6Hf*CRJ=2-L{}8=6b&9n&pX=AW&Zk^JD0zPMqK-YzL~1j&Nc%58o5(T^(z>5SAB z8+t40?ufTL-sKErQ+wzZd=C>$IYr3uR&;-brKVns^Y)kB%nwZ;7`!F!$I-#v$3Vwk zUO}Z*x+t`&n^4<+^T<@$Jq`fYaPPmZjsDCf_QHBpE8;L$e>!TfZ)UFe06w_e{;Pyo zB_a9OLleLpU9B-^^RO)Ls?chmT}KoBno~MfEyOk@wpmlL)!Bu>x_D$X!S`$r+Ee{x zQE}bcBh96E*Dab1YJK#sv?G%i*v+DwF^4sl-GZsw`=O_wub7mt`#vTpiW96d>uwIY zr!3M-JNCmDv~ydWp3KE$bM3c3^=_6jpLg4+u{VYNx2lM1z3MZPfwbU6IvOfErn%b( zZpsO*$_uS+U#j!9oWob>{W`L%O6p0Na>}f9W%bx|_=ad$`fe!-fJr(DDIRt|0z-I& z?ib|$B+PyaTjgG!hDl#o|FYh({M29ZX;)5i+03&8p#%l@ShKCOxOvaro<(ynxfXK` zrQcEaz9TB{)V=T7_g=qS`+eQ{-O1LG7z80~k{NelG^K)ZwyqqLfPO2u{yIT>pNw95 zPY&y4W;8ZhoOiFsWWOiw)Y!a*+|od%NJAL}Zq{(qv$5%E*tNn?R>%Wlq(zstWWg~> zUaGqNs)XM8fjX#0QDH^$tL(3Xr~%lnmT&VMA?rzAks#|?o=tWsVT|YMTh>ENFHa$7 z2oi1LUFfxZQWe%(h{DeH8#dYJcIr+&IGEBw>%8=`o}8fnqMw=wtT_T05~WVHGQVEg zp55km6JM8noVxcJ^)+KddJ zQFdDbzA}3%$(bYbsDgo7u1A&Bwuxq9K=xeH%EqAYQ(_eLS7 z=7HGYH0$RxAt3m=xpIXHo{f2Y;V)oCc9DU|%ix#(ieC5wk$8IsK6jm3H5?apJkp${U+vTmH0FYAye~Bp{k&NeNxQynY6|pEL_1lLU zVC&o_HA=)a&`OxSlV0K;q=P}PUBmdL@r*=Ssg|N5C7uPS75Cs%6ZQ?dk3G&eHum;i z#w^LW>9+8x+qf0y)ay$`*_jXH`$L81eokCMLnzV&XfFuzruz7n1^Kdk%>UhVFR>$S z>G!m=GbJuXP9n%b+>!AN3B z2$Hxa6B}7tf^7S^IF0AdAD)FMANDs?B*^IEIqABs*_%OuhNbKF?FyPR0lMuAHw5HQb=eg&0&&=*w z`9~Km)@$wK?Inx1v@SwwQbWgaF`y2Gotv%s>LgpZd`E}EX zBefA}=pv93$$R;2HjV?akOtj`i}QD_`g2}gr-`t9t+C(xbF!Y|3 z!RNofybnjP+=sMEvPV?pWLZ53O+z_UOzH; z`b!nLF6geI)e-7Szs>LOp}e1n$Ab@i96gqAwy`jrLbK4W=UfOrd6YAS$|^I)-z?T= zr%#F=+=wFJOs&;i%pvHS2qGr#=VDAUr`52&-n0ltoSt0JfUf4E^xX#FBFZX(1Z5IK zYbnHA0QxWnkl#LghQxgjW3DDL`dgn$)h*X_<7Zskp0~}DFk<5t-=KbZIx`=?#NUX! zh_S8MqL&&(^jLe#Jfp4P=@plYcQ`25_WJ`9r@V$5B*o_GrA}D$P)f-~AMc!N27BBK zc8nF96*e>FjAo_nl{vr9BPV?)8|F<^3v-ySs{hQnvX&D5$LV$j_wL9!g5<79%8(E} z>Pt8@Kc_XVG9YM&S?lLv+pp?L822e2653XJj4SO$ z>ict7VtUV>xoUyuffzn~x)7R%3oX{whG$>sW9+4QY<_TIa4~z~`^Z6$JFKXvbhguL zZ{LTG+lU@hy|MCxE1K$XPKXqWfMW(Gu1q={du4iT(_7yM$y-R!eS|lZJ7vasMF=_}?ie`j>^o`0s?Ca8bApohF9%j8Kf5q8t z+gdJz_kJY47v%phV&=RNWElziGP0VEI-T8CkkVW4^hfF@$EG(PgTL;Vd6x5D#pa4p-Ou%? z8IuRz9hrXjHT#kusR$kC@u;(K>naN};%zmC6! z%*uxQJgpD!&6wz}>;#@NvPIAjLRz3v0-9wCqL5TO*;8PM#U6@ACCsvcWD^!{yp@^P^acS3_&0 zw+qo==rIux??7na;A8SXn@yOnzPKhQP*yuW1~8GirS^^*lNTa+oG>VYxN?Ml_JYbi zT~l-`JGWu;=6C>;V-0DZ4)Gt3;12 z@3E=T!3YV~@RS^7*d&}ovFLRMcjJToOvHD?5CLC^|#htSfLngyz20- zG5=R-swR>wp(yhVDpVmKr2k7AOJ7s}qWP?E*^Xr2qm^4<3r_ZRwcp=4xn|q&F6(0t zlq-E2#`Ln{msq7)tU6j@xPM6aX*<&@OR)5%X@cq@ZV4bek)}qYLl5e$15noSB6Wj9 z=0mMrE6oKY!?QtW!8&<}V=9)Lbr#1WSK-u~p*++hy)<9ZLZ^R=;1ezKia;TZUP>QZ@iQLwnbvmMYE9+|K#M;}t zb6c}f#z-9@wOx7TQju787J;6gv_yRh)tpVK$fqLi zf9#-@q|2(e1AKXj3P-Oej<#6t*vjk}YGuC9jY zs=BkspQ4vt`|#V3vB;h-(L@UHhUc93gppH8K05BXSEdVaULs5o;=j|Ime2$#q=ZQT z8#!gn%G6IVl!KIl={>|@fU+Cb0xe>_xb~Oy4nfhjY>D1tB}6$Tw_*tis`CvCU#xMW zknfvZZ6A0wKfG=6J17V;7}*lEn1*(4sWZqX)_!fw_~QFKnN;lQZ}~>vX>RW@Q!uCP z8UL<47v7rGR&i?VB{S^n9dUp2QGTARV%7ZHYwsGon*ZmI$GZO8=iSS>hym&?Z*#G; z7Y)q!KG@K4P!het`)jfYwe0Nf?QTfFuo7kYw6jmI%YFe4zv+n6G#7V{i|<$tDM8Ib-!Io+9oQq5Wu&{LI zkrdEMlgI5#)6VAKef8oD|B|-;v!cO`iuCqzLA_FRcZ;$LCPzvvNOvvqIrwWDOH$65 zg8XgyzHh5GZtYL8dc^xa$^3VK`!dq`8e}lRbu$h<7eg-TYGRsM&lx=EW&Wu7M7+YI zR~cG1jgbQxa69&4# z=a9epg9DS;gEKp$glyBWy?61S0pu=`-~lkF*XXs0$eFN1x{{jHLX801T)4J13^Ms8 z5zv@ZVTXzEylepoaREK$F@)K+E>o1_C>SqsRSjaC=FhJmz@kou`xFx9l+@#zJ9l}K zmKnOr=v#ZP<>?FG1|^-WCU*;&MZbjJuPJL`|I1lo_<*Z_MZBYHI(p#!M9Y|Y+{f8l z+8a8#5ec=9K`7PJf2YIa%TW1h%3TJE)@zWCnC<=_ zvF2!2N3TcpU?nhR@5)vZ=NJ@ZF-RrWw76R)UtiYSOuxxQey4jscr9nmzwPV^k1-ok ztRI<>*_$Dox^R6!Xn9NV<1dNjUrhSXO3UsP&%PATD;f)y20IlL^i@GxUH_)*-v4YL z?Ec06Gg65XuI>Jw@lhd~!|VqPlhtCj2u}*Wku1d`Y+HxAsn;Syo+l zUT436{(HZBJf*jS9{aW8Vc7HE&uU-^B#YXS^9a>-Hbg0Vht93 zF)dR|y(Z2~DM;T`|FabZ6;}o80NIvl`E)GyUY!*^ zKhDOB|IY)e&N9&Rh;7JVcc5CdkX6*9FRh8mV634m=We}#Tt%#|KQu_n6jUFL5+1mYrLc8nziS+!T-1$cZp+` z#O=4$~>U);Mv?Nw~21&dBHwc&<0Zpwa9Qc=0l_!q4JXGeWtY__9xfenP)!PZa&VKGv5NxzVSPP8IPVTsf;nh3LS2{ zl0+IaAJJHsi3}s^u;U@0b$un%l)P^l7RDaFlVr)VczaJ7;1RmUD?2Mc``)8$jmC2Dk3HP%*xbzxxN!8kT!p{LStPHE`VY&kWC`U#kf>+tCxh=~9` z({|hadh|4d!qu981RV9$*h!tac5cin5GDyh!qXBLng|>y$S;+HaPR!j^;nDp+QSpa8wD&sx^y&DJ%=@Vb? zkA7$TC*y{1kX%~kJwMpnRrzztT4dBl5WOL$ zs|`$3jHcmrukyy%K$oytKhH zS8Ux}`Sd-q$L`J@wh7qed0?~WkIRie%&e*&dY+Py1C?hxN}2zitg?Zb=MT!l@3=&@ zP8Oc~=_A4}0ZLvnPzsvKAA^%|j}6keC_h(>9~Tqxd9I8>5{^-iXv~F7GQcE9YRqF& z2+KtT7Kpr|9!q0B0`S-jQUqc~X6UO#dLo9Q5Hy5y-gl6~kF((osPY^z!LI>UcOEcUb7pa8&FdMzL#OrD6F^TF>dS_SA%IH}#5%2pOP$Pp^9EdWxvZaLF2XH8JkI-@FZ$B&#VPwOgT zEiLDH(dS!Ou(x*{0xhZ~=C!DXsIkiOjb^0i__|-0bE`Vn9BEuRXR+b_qsGPfraRv) zA3d@VXs~DO6T=S=M?9&z7=c>8jag)|KD+TRr2*DQho6cg-IcXaA39PshUq0Zw3ws^ znsrLZ3Z2oY2wOU09y4M-2l)DOS8q@v<%Yx4zz09ZW!Bh1EJzZV<9@|c|0S8ZtF-hiI8mGrgmLbSe6ld!V#D3c&e!5Y(z;LPAKKF4z1_Uwn0fE*N|UVpEzFRYTlZhF3S0lx{|C(*MUHxaDFvtM`L)TP)X~dA+W~ZiUbuWeZW~OhNs?Uq+@p(uBIk9yH8m zpq2yJ77>L%pPUMJr!cUul~n90!>ebGEXg)(;bu1hBrFfRyBqom7O zj(3oNtw!%xzm2?!-pv^DdxefvV$zVKe^~Osh%Tx_V~N&Y?8A5`nZc&FfzbJWMmU#Sp{&z7DVQc(TgiakZzW?)l2?AK#8< z`yRf1&xfvJnr-n)lyndNH}~m3)WJ`}I|~{nRj<|0pAY*0*t_a1A6rt)*xwLnILWa1 zkBg;q|7aBT3F_WiOGt5Qh(2mGF2bUN=Ysvz*ip6K@)IPt{|qJ=t6wP%&d!Q^{{FB3K8-el z(j-n!XG^kHsgd%FNK{jmQqAwdncBw1W!{$(itt8=J&u`zHL$!JoEYbNYj`0<5`91GaCA_GprlojsVhFkqQRL zmt9D^8Jel&f@KkK`kY|X&>v3^$Lt;F46zwJ6dFgP(~bg~RTrd|@+f@(E$&gdS5FVVZ26dcWz*|hl?$UbB)30&puAL!-SOl?Zd=y;YrBq`yCn~8 zPrBH#_}RwwPkT+~yDh1`EFMFgjsMbXzrGzln`E|!l>B{a=R#cRa6=|4qspPwpw_j{5tN^9K{cvKkNO z;VL%${b39g=pEd8eA`Fw;ZeR-jhL^`u2r3Z%&Vr43s7k)(0DJJvL^yNHzubS0Q(Vp zrfz1Q_3p3WfNe#vbPpl4_5Psi{;$Fw)*bR^iP3?o{UO9%HJ`%_Y`%geboXFkIs%&X zmXsTp`BujR!OBA@T9q`y^GtAqz$T-izRWWRFII#44BeqBG97?LF{_-`l1|N48D;BN z)I#AT{}@wqdWC>r?%R05DevZ{HitW$0{$kV*Po;PZ%z!dVVC4 zdB&g^Yn&Ghl~}UV0GXb1jd1G=kC9^#(Sue5;^H!g%Ip&cX9hhl?_}fLt_-I1xOR@j zgb)ur|2#lJyjVay$Ahd{fv?4Y<#C8sfIg_cQ0!3(LUIy2X?{a4!t5{VOPUq_}7`-`d(~>Zq zlshlUn1Z{Z2J*7EK#(b7m;tIYWze8@k2&ik^^SszLL&{&@j#P<){?G$V2Nnv$LI z1t0bfRjDW}l=16?d`4y!Zu_*%T2vZcMK(x&o=Q5x4$-9pD;Ga}@w3hXW4E zP!Gm_NnjO(s*QYMX~GDIX(_h2&$#iOe#YbA2lu5rp?{9g&E805^z3{SgtaT~F$g70 zSdJI0{?(h(UKO}!|BIBY%`OR6Iq`O89`X1`BeB&jAq~KD7j7$bp_EG=!{>^F<}n>9 z@=P6?aUZ~yr}Fgt<`z44=5s-g2*qMZu+|DS%B!w3<*6=jepUA?{Fr1O=N!p;L^h#- z!RSB{hCa!~6(i5xWTne{+uIeiiw)QTh6gC*0%*9m;JZCYR6GE!i|@??8I*(aUMm+d zE-p?J@Gk#21)sauFFdGy^Z01+O2jXil7DG{(;vF<-Ti%uK@H`3HrlB}ULQNX_G?qJ z7EZ13dLy`z$v#-YX_4%;sLB9Hykn@*K0CR%4MeNcaYq2XWbZM2UJVp8HdML7fwd@< zhmQks90R!rc2nnbS5gXc4}r0+T_IM>#dx#jI@FIBLGnbw9{s^F(>pa%{6cn--^AFQ zo`A*&&sC5jwOl`juT-+LF%uHG@IvG9Aa1y&dB;_mfJNMP*vCc=n zdM9k#J=)pt*(8)eacXA5;(d^_68Kv77DRV)kz_c|A$uz}OfH$noaLT+u%G|gNN<^| zeAQ?CtZB&(tQ)?LfFd%DFL$Cn--}9rmwTHXnC4E1$M)!@jhWw9^1*X{7{31+%D$RH zDm@akwMY-0i6CCT<(7!5z^+RieVw*ing|b5-yRAjiWur?^L6 z3b5H9^Pk$_5(a{{v{qKuVMAHAMx5Bs`>)TGE8U|SogcP|W&abn{5@QGP!H#mUs=Q~ z{4uoFZ5ePs7RU}1vHlP*!#b@sZtgJST1a+xwdGhd9O5upKtCd|6 z$@VnfQzL5(e}O@&cPP#+u*sG+MGy_XzF+%~tK$X~N(ciVC>ruh-;)3r0Lb{3RoRy} z8RF6-R+LhD88BdVXC739;U?-1U&1TYkl#SBeM2QNW4)EdO-|_J{|1QbY);bB7M-}1 zau%DGon%l_(7*BAg|wUkyV%40w^uIJesqaF^?cjh*0e3%7n0t_ud&z!NI=zL@8}|5 z*91EN{1RnU)ci-_%-aFsugJv#sHcu}?sN957xpmdm`8|@6GDAP8Di$vd3n3O|ePn zF8r*8YD47D1*g3Z`#fbB9}p!xzBU(d!4h?{Pu%*y^W_^ZU0i9fuAq3Me5m=AAnexL zriq-`cl1{d$y@fUo!h!~c2ja@U%F_<&4djgNxRx$06U!pQ2CAgZEX#_&+d!Ajm0@A z!H{U`0v;2okD?1PyV>}LZ}F30zgNEJN%jnJ^%y)kdGL% zf{jK^gGnmfN}Y6vNN*E+QxF@RuY#NukUn{7EG6OU%ar7a5Prt4SZV%Vlj3mPl09|1 zY?Y9VP3Twc-uZd+pY6MIGKn8D_M~my5?Z}w2j|aq3T)1sEu#)wzoQDKhqmrAD=5yi z8f4P<2|+s+NScOGD)3-@2`LjWbWBh4vZ))|1Xf8}+d(TH<(oT&tp%AWE9d~iQm415tPJ0zC3273b#VBce9dkIz5E0faZ%rQpXk& zxw}@5m|?#Z1^w}V6rE*QlWiY|*U4~n!{`uj^e7$OFgnH2B{-B+)B)RQ93`pGkPsAb z0ul;3Qbg1t9qPcso{jp*^X~nyzUUmo%lJ6M%kG$wGC&Z#7W3 zy=vQ6mHm7~B{Myx&^s-tAU&&KiYOFwx!|+e*&^cE1kSml=W^rCn6gWlNAvQ~tiqb{ z%v(#&w_eKMjX^2$6C!^AJ^9c^Hu565LcmdTAOVFtiZHqQlJkP&3Z>JrKrjH@GGrr3 z!={*;C~?8QB(Mz+&iD~t#ouR`RzjyBs<5a&>63Z5lc)ImKBbqK55wK+pu<_{yB$b7 z9sokg+s_7B@+6+kI0QK-4V=K^V3CKv)#Rq)ze6Hb9Rbg^03w}9K%8eF&0-x6)TSWL zbA|ZM1<_{bvJ%em3zC0zz~VXQ{w2T|djau;e9S5f^Av|&iipX|Wb(ygP7W3trlT2f ziYE`+ghO5A)JwfVHgTXwNDw&z%p*&r^Em*Eb(2wc`(@+jMu9j|Ab1MIoejLcCO|ua z{rNCgmU!uT!Z1fpG!sn&R1ZiC9!T|JatYfR_1ytAP16b#WO69}`fQL68-!N$?;n*Z>DMAcND% zP16aeVHVmFQf;eUU4U=1t4EJ_qEj?etMJHS7U3K}-;@Qi<~I6rU=%L+sY}F>VZcDk z#qc*l;YDR0kEP;~|Jaiu9xUjoE6~GSL5dt$x0}Ea0ZI`Ot)$vXvJmuUM@b0lTq28K zi`ea8#RKXR@Gx5*OZc}?cu2fiOZ2W3b1pTdtcdw`Ak*n&+Z@)qTbXFOp}C>`Nrw*j2{P20bYZCoLLtJ>QIK_gAYo$C$U+&K>JVOE z_NMdVsn1fM?28=umpxffPb~BZ7kWehX5~XYh!Ei*K(e3AiB6$CWalIi3B7q8=8A-0 z$w<00&5Z84Np5+G7bI1o}&7mM2E!GoQ`B8TvZ<6N*02`XT7o?x{Wuu31$ z+RDXHfhhTWv#ckQ!X7RcOgTM@)K=^N2Wz1bI#546JZ+B$r(yLD;3Xj>Ji{%r?0e9% z6F#`!q44`fIp$^IcaWM5h$m%O{1-GF2d7#91pV`i!@1&;oom9ldUr1?W|9g(`OWw!^ zAAJ*#YGk7td61j3f}LG#k&}ol38+bd(`{A>{{l5~r}Wi=Lq!Yr=MKkgj`PD#vSq!N8@r(davj&U|R|--d^p6USXk{i6|CvCcg=t--K%AR}HdHLh%9XeBn=A%pzN^S_4sAg%+sH({NBj z0W=T*79|32WZ=mVfH(zEcIx6ll>>&DbnfLv59YYGdsi;ywu51p18F#%3vDe@FBi}d zNl1&io26Vt8Fdd)#zZW3CELhPelwelrNXrK!-N|qT}#2|sK~I8BYKOy^z%j>xjwJh z!VI$%t+r791g6AxRBIN!#1)R26aF(Nyu=%c7(s?{D#q+$TWpC+^LLZ&8a44?ccPgy z7P-V1Lii!14kMD#&9nV*Qzm2ug;Lggta2Ekd>FwLfSmDQMLxg~j~HZPp74aIT{C$^ z)B;y{k8kY61RAhG;z5958X(OT2*3e69;@|Dw-1MnD-nll1}b*N`LFy|pUt3{MH*z5B9&H8Np(0bdn+Rf(a23aUL*o z;fi4GL=|x03?}*+U)T#=Qb|Uf=EJP{fZk09=Q4-iiTL5UCoo?;2nUehje7$8O1N_h zjY80MrBuqW#2VxuQ$#xz#Uw8;ggr!jAoYkgB(G{DJ`ac_wW3 zAKhDW^tI&on-95TXN$ESEqaI5_hN2w>YLuI?(z0r+!{H$J+k+nIo;1eHgcD~Y7mbL z5vw(AdcL?S@eoUROx5O0eiuqm>>t7*X?XaWA+lT^)!)6rz#-I5&v)XXibS9S6W&h5 zEb$1%@+bxe&Z43pTKm#WF%_Gsij>80KTn0_C%ENy=tBGFGzaHZsE!b7mW`Ms-fRs; zL5sWYF%N(G05z0Hbn=9XS%}AB%d2zGl1}gZhvCKJc#i)*aB1pHEdyz>$n``-J!R!p z&cm@_El~>$06CIBBkwca%(vTOoL2a6Wc3bzbq}!bBlppJig5Mck@%76OXfu=J^)%H zxucvdlcl7Z06L3*IY_)`ar@5wYKRH0-4U>dRC};?V)3v;c$fv3Wk$Vq!ukz?1y3izk!Rk# zs9n)@tE2zA_1;SR(+MgQa_He_Orres2=^@?uy6j|h|zzTn!lLaq)`$H(x4^NVRRkv z4LfI&E#8mi8|beQU$)|Aikr}TCMxQYV1E)qU|^}vnfJMdy5GIg%7&|}fKAB|e==gE z8J#z`p7ad!iH{kkAS&ch8+)?Hmqq|b!shtoeNv&X(>z}Xc3)rf zAI{-HBslOvGP<0KD5CuOqc>?cvaEI$7R&z?P5vB7o{XY^Fb&{J{gtYRFy(7-)WuYO z3G%gjeE8dyl>PFB+QI|r+RW;$`Y)Kp(EdBruY4YQg^gB!DEy3%*}=cFe}{zpr6!na zT=e*xD)e3&Xe$GNFETL?ao>FopblNmUrc*qMqO%Y^_$bqQ>q%3YYOohg>w zd{0W#(|&C4Yc0mN&9%C4s#UG3QYDh*b7sqkRdOY)2CMEGeb>{w#c^I&Au6u4{&F;{ zc;3JfUM?R+IqHxLa$4y*x*9fbXS%TFUT5ghKzs98Z|Kp*mz9>u4JISY{Rv{1^){c~ z;X(Gn3VQGnliQpk^v@4+mKj&AKL37szgIux{Art5&GN%$Gb5&grV;rP7=)77(qeHy zN$_hip=_*NK2%5;Kbg(=P_N@N?EJ(>LT5$KXNc0KqKH~NK^ks<(~!}9LEnx}lrglQ z<*K=~I!nC0?K*P84$_@}OMT~jPq6UMRUF=6>@OH@tItJ{bE)8e!7N~@=qgXBx=DIU zY40reIz%)|(q&qszEC-~!e4YV8m#A>-Ck)~sG}odT z!?A5HGhTIW5w^~(ZS zi!K=u-+5G!IE{K;>utK$TKHHro|6ki#j~hOJ83L{g`O5`V0AYKV5ZY#{#wS@&mgTP zcv2Nguya99nZLF-R}=S7>vWXzzqmiz_XE+}&)chc!?-qiJwdb9T5_CGZ7qDg83d9h z;_d*xZp3r`wYuc5D!9FhI`miWYd;@c?mgS+1CtbEP1B>c5}`WtQIjpzwyAsCMEhK+7>AfzLhGVMx<$;7=Yl1x>g z)9fw)s6)_h^XBC`)rR4rZQ@b7?hI)M$4 z(+iZ#>!iF6rbgxiHG;^%N`;-(Tyx8SGEYm70bC3`i>=<6;5R%q)TDISSllV~%%ZQW zl>d~~&f~9w@C3RZV&Q@Do@&T=dJkHgIb@)es4bYD=WaY#!Uq2B&>$V^!QfSjt4&i2 zE_lvu8(yK>YpZ;g_ZOOz($3A3GUh`3s>el^WJP4V=nUijX6^NpBBjxgo@+U66fzJb z(LsTpC~DJ=%7~PpV5CA{7)SVLf z5A!e-M79L7r#%!^sWY$AE?3m(Mk`m@Xm=P$*~!cjUhOq6w#*?$ZJD<5>mrX%Jg&Ov znNZ++&`njVRHN{7ePZ}0{&c6XO`*0GF$s+WIDIj?^Y7-hGc+L%{0wy9NZPhoeQLph zv{aYF>>z9)$xMmk~w~5#vaST9`Fkxwg#C zEp*+aHP-66q{Cy!mxiw~4-8z6JVi)z4*sm&A8aS(xtk|YOrZnC4T{E@(o@k+mV#;% z=o2^?l;uP%A3ayhxhnR_b^zoOpD)|VmXT?GJ@Urf$%;vZ;6P&+-XGPp&f)pl6l8Ec zIwNJ@XBFsQ`Yf2dsY=laGzmriLD&lv0K(nVS_bti_2>!e%38L0EPS!2+Iy){b}2q* z3t#&?Ps}nQ59EoPb!ZaRmfUQFPH`^z&pHc*&ZEjEmuhhFDj3(cJRk2Y`gMGHZCr%Z zt-8-W5yDl+Gw0EU6I->KK#i`FEkVdARa;Md3`w-Vus=-MpeVa2vPB@Kg7FTbW?X=; zjN>GJ>1d{#;!G65!C*fsGSS_RP#rmwO^upUJqc0BzKY zCIXcT?X|^IU3WsQA0veVE`Ck31qfz;7 z9FwApJVO1o5(I{o9}|qlPOWmTb!FoNAaxROUtibw%s}vDf{OV3g+>TTsgmW$PAgXV zWV{S?inf#MAb81i)_PeuZtsg70^?N1yqv=V`kJ;c+NjMFnvdbeaPtsnkXHvx!rS*OiA>vGhY1p8gcxCq1Pa1VXzp2QJ zo7}oCE``Zp=LZ{_Ahgszp}v#Dj|7<*aR%YK!a!t!rPih7@?C*|`#R!?6lnE{QM?+$wKyT|xkAT{>Bw_^1!yc$ zRjRq08D$v_RH#f6Ofpv*xEpF-u%0_S`uoM-$rxiCKoRgr^=X*6wc1pk?(x}YX$z+|EciuVlWRYEv13FH1AG{wj03v4udW`-S-}Dtmjl&K|16 z7vRXVjT6@)QcYZl{YE+fh12}k8T1xzw>y?ce@1=UdfhA)w{MLF(@$KctLE^)KC2U1 z(R&`=cHg!_qY)?I_c=79-J4T-!jtlBHpG0yzz9i0hV)-m6!s^bABvqpg z9IwCC7I}@o*eg9Ua!AEqRdVlqY*(i2e3oP6_1v=E*ojBtaAVd#t*NO&Ezc#0#+OPc zr^ruk|7a^(_(Lf_@$+;@6iE8=AJX&N^>8I{HsOYm_!3k#-Uc07H&Jso0O3%W3vrLL z5cZ_xseWt5{-+DE8ICKop5y)h|CFT4m;$N#{JA0abf8{biA@I;0Ho?M{~p-G0a(;4 zTju~T?);Xf+9%~1%~+EYI9l914Jr^S4QQyM`3^x`<7s0pk{5fyMlAiQKa$;8qaNw} zGvE>trl6Do`t-Xec(&s5tjLM%z6be6PP^){tdg4@P50MIcHhF5`w0J@wC=O*)?Wo_ z@0zM=S3T51( zKZfixIMpUbvyx?6-TSKV5wD{|YMQfambe0rdSUfVRy39}q|n4zcy4qMJzgTVKZU~> zVV5LyFyh$a?&HwjG5S|;;YDbAeDk_|Gj^U}NUXh+E4u@f z+X4E5;s2ZvKZv7UZqCQ{25SXUC^3E z6eFl^&WuZKLb%aA-O9m$yNga-h z?~D%mj^29Fx3JMSx@u%~OFoUq7?wU~ji3r>$QlWY$|Zl$CsvyZXJLnlm%31n3MZaF5hw{ ze^wS^$8`S4ocVyG*$&Zv;Xw*7^UHPri5sH1;~)cSe=z;$O0ME zwF&CP3esRDEXd%-ROe=~fPE&mdr*OpD-)k9z`uzRb49Va*bZM2>0D7fBO2iqh4uRC zHyfy{o+1Mtn>V{Fr`#0=lx01V5Xz&z%f-!jc?9*fxynl<_H`zf-1VMTQ69}MVt^dD z(JJ1tdx`M7Y&XOWH=Cq(jWNb;b!SlP>g8FiAu^2H|IJmj9-3Vjb^kvl@U!~oH1dz3hWor5R5#kHAr1)_xwIX zowMK)i7cQjY4LEs*S{}(beylWhvr|m-dK^fpkvciX@LgaW<+35)87cAH zU#J2^x`s`te_ae$NZojhv!*zH0Q(oM`74-G2Yay!*29J9;XzK9ZhWAaaIJFpKKL$+ zVe=0%yWUd+z8cZ=XN0eQrCRTpkcOaNvJ28k@Pv3q%n04rIn2H)gBarq1!D@E(Y(O3OB)AoFmsoPBL>Ab>WiV` z%3Wzdhc+)MY@X3O*CXG0r2ch~i8czeRaSbVK0rYaAv6k9N8@+KMu{5BU%B-XS5IJt z(4&X3)=NnLQcd8~KMf?s+1jursiQkf*3$N>*yVG7G#`l?7zyA$F(4x%&0(hN>vP3} z0%oCrQ#_QX$HUt2z|PF_`AY>Eg^a5=X|Y))Xxc`)hl3lQ?!(@r`LH%zJ2q}XgFeeY ziX<~4gJ7`)MhrG=!h<}1IP44^b~c@NR*%u|BKI{K;y|X^%Byl zUGG^MIVIb8u-aTau7utkk=eBB|JHUDWgTCq4EC=)A-(j#+yA<(kiefeT^Rhkp>jH2 zqiJewDogYj8D4td|9Y`j%8vh+y4(8%3mJaW57%JFH!<-$i7{I-i$EawIDMKeo|{A#(e`l0(BdLy6~RgcyNSxKGbb!|mz<4lGs7DO>$jC(L*_$yH}FQT$G`#X`2yD!G}CdA9!5I>O@%RA3Zuz@rB#!vs*VHQIwsLvYpS1(cp|1 zU9CtLj%mvu1MH0_LXG`V{WZPuyVCrblZ#F69l%6Q%R-2f1_jHk|;BWgP+v?v5kxy zU&{Fvk+#D~Sd|ImI3C`0@+qWwq}~vFep$>e>EU|$2LkT1$of{0?nhF`yy(W+6c}ad zdG6lxLuWSJW{%i|@6Z{@$=ji~Yj}~YcyIQY}^dAG+4^&A1!Pj$n5Q2sMq5cPN>R>6P zE?V8Y6RhXi_yS)t!IkBzq#Uj2Kxc2vTPlm6cQcJ%D4 zc=w0VNKeVH_6)C&M@5uT*}iSfDhskATeYSiY9;d)2Ob%{a|rQ?IH621#SKNeKj|ZS zm`_}h-Sf+yl^6W%H=iVH{9;@l_#7*`eFzQD*;-?(7{`g0wk-SqeoNYsJm+FgqZLCM zQxze-AHlP_k1VvzzEr#5Xm(UuaElh*^;2eDL0UW`I!kJ|<`c(g$LP^BU)*~e#b~>y zM=~C4WMG0?A!N`Y{%pok8gyj#Q-X0qOQDN3JBZ&cMJXuSTW4pSBYe+pj54r^%M#zLWg? zHZ|)f1cOlUFnYI`+S_CJKilMtgKnXyQ6hcr|RIx$gt3% zE4Mv22AQG9m!f=wPV6P#Ir8`XhATO6GGpcFU{SOUERx8GVx3Gq_WpA#IojnX93GQ- zHd8#?Ra!g7RsnqK+^R&@YJcFL{BB#oj-917D+kt`q0nrz`cjlWTA3?E1|-GT-^JUrx4`F_xdA6`t#ZARR7B6 z?|!dGF=O0rbyK;$EbF?l&MP|ZLprpxjN6)T<8g55XIsi~8dL)Op`67r@K`Py{!xyQ zx}|6sT}Q&%(!?`A!UWqm56sD2>zaf3%?wayMRF&bLvUzjr?fR5k>3q23jHrRDS`hn!eWK{v_5A6F(#Da~M8&=+c-id! z0Pav%Hk~PPAYf?b{WzV8Gu7C@Mou^|6>P%6&Et$&37v$V)`%0KUhPlo@gC!g-OzyOH(xIUCjQv?w-2;*}}h! z@^&|hT12niivE9R{z`nmt0`>uXw$Y(b+B|n$kL<>gN)oJR9bdAy3S? z$(Ol~$4`E}V1x3upSRIX@VjWXnI-0_KlpwNu5=1}+*$b?)exzDuJRH}d$6Ojq^plbZmlw~8d+u|DJUoWf*=e50D!LG z1CR##0SknMgv2ly84OBX6eA~&k&{LU3yTU1%Zlz3l@S)(Cn7Gmii?TMNr=nKNJxmw z$Vtj7$jVAeOA3o%)Mb$>k`k)25~^}?Itua{GEycAa(Yq-MGdT`I^0wPp{A^`UsYa9 zP0~r*fPZw(s7ZV>hYkwzmZx@4DXRXX2 z?Kpq)tPs;gPkeT?VRDiX!Q4^b)I(Lt$-&aj$=bow+R4(+-^`X|J&h-5A$^o@^yA{2@DPJ^YQSD3381N^@|G$JQm=Q9qn>7Dl{QFFefQABQ7W{ zIl#>?J}@}eBj9NK;pmX)%%s@p%*5E76G_==adC-Rr_wW0kEI`p2rb9!{$qwO2-Xwq zewZl+m$Qtk!lmf3&a}iJMp|f1nrC~44D}D}ON$zH46lv?=iW-MlDwnEwCtL~?8eGd=NSoY zwb?!AC^yD;>)PszE;rV6*PXlCQqtdE+|^Rs*HY7e zwYsmh@kV#!&F;FJy-h>c>qmRbDr?(XE?(+vZNG7)?N;CA!T!OXp8opwf!V&&iQ%rd z7ph+^we=3)yU;y8Jh?bJ_x!o32&e);CryPtpe_P+1_{`~Fx_g9P4w4=xC_}_+jHWZ)hbC^}Ta4WZwG5S80TucPyLr(_Xx4+jsE$ z@b|Y1-5wLg3LD~yk1r7lUWFBl$BGY9HguAMFZVpT6+S|&U$8HHcE|3_E?-0yCUh`| zkMoRYN7$7@lst2^7%I|58T_#6uQ6NB6*%i+?O$!ev-29E)SUc2oYw7;OL=m!=KbQJ|c<~cE0o01VlNOOGPT!%h)r`1qhLv!)o3! z7*hnyQLgAmg`-;XVm(88U@;`=0KL%XXvB!$wv_sD5g2Lz<5m^Vh~4Qb_d`pPl0F9| zY8HKcBH&MsKk(M{I~Qf6e!jxRxF)(XLgvkrR6EHj@v!MmdbqfY1JKXwdcEn9!$0Zf zaH(IvOdw}XCfQ(WnMnZF4r)u|HDNphDx@=r&X1iH+hEf!Q51%3qVnmih9t&gwGFp$ znn;a%*;|dyln*9Q#o8GWnI|WB3U8uLJdyd;l<9ulJFzO_!f*LnAJKIAt9y=}m!F(M_^DZ{UBYSR~5 zKKEQ`Y)i;*2tKn|^@?!s$W=@7N7Sphc^o((1_fJ`9sOPGDq0DsQGf8z5+z^q!G7xeNgH=PXY+IG z;Xf^I<|Md(lku%@a-!|EB4GngT?w>5|E>NmIMO1f?9+AK6jHoNjf{4-Q?D_+rvO!XIFQ@c$Y;++H?w3pwVZ91nd2Y>8wYxQA|9pR(|FiSabc562-X^C0 zt`67mkgP|YwV2oG$^GHiVq<4V#*gg&b&%%eZ@`hxpcHS!$#p9b8$%Z zGAHF?$b6u*8x(T-w>`oHlhq%{*DAKcHhbaq(}F+OoSx#+{-br}mOc zi@&iEr(`YEegx?4bk{8DmZ*8jBCUQ|ki#p9+(#K@{j3L{;;L{V?TK)N@5^!jDl$Z0 z+6Lafu5#L`5UfB55^C9*(hrCce`?_1))T5pN~y6qa#7;Y54ckQGqJL>38V&80`}tB=;87Oy68ih@>i#*mH-m&(kl{%3?>O$(QY z%jajq?+a^to$t#B2TmUOuOX54--p+Ef6Q(7K+R)V*r; zIbbaS)R9@3oC7v}GgVQ({MBM`Q$r)%FuV@0;Usd7U4GZ3RLEksZglwlu+Dwwi^5XR zr5+JdHzP3-Hs7KO>Q8Fyh%^vP3ZgF%bniJGEIJmwNMG0P4!56IINoZ1V6);)QsY}8 z+`j;X-OiZzgp6|iSQps%Id6?X$?ZQ(%umSYfd%!13uqnyP2+&oxHggl#E-}2av=&V z0QBaAWVkpEQnWK=`Hc_NKOeV0aZ$m$=(XYjdPJaD_oHhT(@KicRnR%Vt$q#A6QV{+ z>Pfa;ETNZK79E&+@oSj$TaXs6K>r@hq~r0P+S0**Pxdcd!%Nazd#g{@950U8J5U!F zE@$^bMd~KkLHh?YU+D!0s?5w43gZK1a;RV>3TIw{lWXb5db4?+2!siaV}-~du%L=( ziqBIys*FiowbwHhLG@l86AH!K!tq!Tn!*CeQ-7L1wJ8iyl5rcYKe`_7P!;<9OKrAI zI9|I!NyTa%es}75>+@>(H0NFqCUZ+kNk0{TbiLuggX6>FM3~bu?S)!o?;oL?#+v!Q znj#y|{93Bkm+jyj_AyYO-21%dm4gtaT>v5tzz7pCU!DM*Ow+~+lFXjG$qb+ZKfVXR zHqAkz9XG-6kaX6oo6AO~x~=2-C3D-e#7kK+I( zIcYv|aULsat7ZT&2dIZdEMkRuOkp0CHq3_Wa{;2%bnmPN%GY{!V z1RlE!OrOCNu^>8d#e9bRz0cwk&FbE~2(l0SFnfRgiq+V-k^w2QsKY(&hN0nCr@3w2 z{3?P(o=7yu-$LVH&1!bqXXn1=3WpbB6Z2XV5eCuQ&|WxneI62yJAvOl`gj@CP1rjv zOrb-ufWXYEz`824dnFdxbQHN{bhlKDPZuVEnM~T4rtpMw>5MZNM2_R`0#<(JhyYG| z)|U7c05>kfwg}~00&;O?Ml2Z?uXSdShmPd|PdWmrT=X3P%AKM9*v9R2ki#u#;okt+ zpQ%zs$0G$ks0 z+_`i-xYRuup~nMY0H?XDrK_4i|2Iv;YYh+*(2)t-T0w3Hp-1-kj4<=_u`F0U8+L(> zY$BftW~C@lU@1lDKxoX*z=*Lb)gQI$Ba7!0e~8`rCZ>|%{l`E$vzl3^r3wFn?OQT? zTBW;Y57U|ui38N*y!CbRj!H&V>K7DuN;lldOdh@qTv|)S7{hB!|wI6ANJ~=sS;UJHM;*e*D}qz;$&MKH6!6NMA#8*!2<>H zKo2Cr1a@arNdAIkT3ce;aKYXc{l+U{b&Z~P8wU~^wNWkjh8#O{dcO{EAf(aM0&v5; zfAMCY1ORf|sq`tf^&_tJDZX{09+Srf2)G^kvLz3+fh(vBk%h2}EMz0`{3dqa9!EG2 z5R8Z=KW~S{m|cv_gd|y9dq#wLZ$Tp4NS@vf8RqfXxr&PImk(|2SC$O5BSLT|+;ssJ zaeT9tuT0@wYj9#mBn$klO{Jy-Mq{H=g3zxAkqt~(Fon#LNj4;c^eJI`&0qt5CxP0F zeU-EeiqVrnr z>j?8$7%$6K(wY_BbF}OYVw;)GfV| zE4mb5$6G+auU5mWePi!hJqyxx8z%9^r>~h`wcV{!UvIG0DjBx`9O9kg)C!tE!cTd^ zZ^)x7jkECZGz&J!LGEqG zqZ4`QBV6=zENnnhF}7OXNj<0+NIj8lmS6qc^rWl? ze5C$Lvqx>SVhkZ;kCg_x*p2dh*lffn^hMQN+d)4c;tDF ziOZoADwfGh46q^prv66nSXj;a^o^yTrCHM>i@I0ILnEdkjkfQu-fvpGE7yPfZK*I5 zJWR%LUSmQ=raUd1o;G1}|8F{=rnlpDBX7iz^uO-(E!Yu!Fue9sj<#$2tkV*dUdZ%+c-Q`}LZ)`h?d9Y%s; znTWPsN+KDp+Vr?n2(5ErRQv3c)rOVV zlO&-sGJ-K+UhVd`>JDa#C`S*h17|VO3)q}-?97lYm;Y#TXK_?O>SSW8KtQHF=>iE^3gAdhYbN9V3)PfpqQfZI?@) zcLWa!P0UMzy0w71%w8z~NFSdsgmHoT39 zS;MwYtzueP;3vxvCKpn&2@15UdL$NMoA{#g4u~*RSaq5yX4oG%}3! z7d3-j9|Y_P4RS6-xTb8!jbEdngRvkZ!ooak!=xKb#KN+;5aYd@L>`=ugZv#t!3~0prh*$|Pn0gcM(RZX^&cCJ0oT|eO@2ZHL1v~3c z!oyr1Yq|^FA^sWUw?$!LF9(p9n`U)C09Cn1mwv22`f*)x1CmEVwQwQI6rlbdA53Rq z+VC(VEMlE1s3yS$M*S56u!P{?wu2waaO8;6+l4t3=95qcn-tQ)+?|t4h zShg)eyza<|edlxaBn4$by+%YdK$oc;bDCc$VmFPJ%=tEQx};#=#i6X7_3VAlo7qv)AZC(AV85Q4D(U(@lr|at%r#os_ST`|3!5OMhmh4WmH11 z%d%W_#6qk%v0d9CZ(CZN1yCg!-#Z_W7+h4U#_G6^*8k{XTco>4=(?&i)fwz3`t3AfXhF!|#^4q$K(OJEE`>$8`y6RoqOq#t75EGNW zFI4h+F6&H!ZmGdEMXcA@He067!+8%N3zrJx0uipxNHrsahsO0PDJD%);zxmx~cyv2pEgG@3N zsX-?rY>k3_O5Q9k4kxCk1-jTVS#h+8K@y?7l_4Z*`&1FjOGaR;q>`t)|ASdzi@PfBjs;@s0EJFAcX_o2&CexxP1?U0!-8DgcJR!ur_BW3wIH^cUp_w+5G?jM^$mc zfCWT$1{B*Xws8lv3qMkgojj$=n zMaS-u!J02F$+eR66y238GUHYExsvllAvPAiFLaYqF_?;pSziTHLaz{0U&ShEyF zTwuYduQaGAq3shbRE!BKB6k^?LJ`CJgYnFQ|7f0adrdQD)`MLM;wsn*=hv^VIEy?U ziq|XDrkywzzNuJen={s%($9`PQc++lrbodc@+6%A}DCa5Q4*w!7U4czk(n1OKpT|D;`d=Kp#Z^BO$DP#s z__H_w0+#0NLh+0Ic3>sYGa8#Oo0uvT0hy2t!Q~44XLF+_K+$+LKMT3yJp^$CB_dJ} z6FW&nekWdnde<=sX&tuF*IyHP2wlPWUnH4mi}G1yARzdB=bVrKYdHnFa)VK@P<S8mOK`<~%Y=1g*R{6Hk(-UKG-;Y!e4v^6h~1R)VXqtn~@U2O5k zYoY}aTjo^G8tR>^hnixl-Ogf3i!i{J^Y1o{2WfzW37_)dt(0Y*Pia`o@OiHq@By_t zoHNeyuX{lGW>QRq*(T1iYW;vMKA7D1l$b(4sa zpV$96Q+wKR?!{zz@@Se_VDAw|{P4QvGk@=`*XeQTEPv(*i%`|MloK(6aIH6mzem)~ z-^juNhxC{jE;@(62T71VMb3h8sHQPN`+r%J=?M3mGsX7Hv#&l|1|d*Wu-+vSY`y|* z+-WbCbsu}V9(7!><)>FkG6#i%Zy<3~ARX2^Z=%BJ3iv@xz0@4JptnAJ3&pErPDix( z2LeOW&ihuR+15;v^ntH#EV=LCQO)_swgX``>5}{a!o99}NI_n$h4*~E_3+1;!B*ee zJRgZ>=&tfXdEBWM+ST5tjGQNEUu*kOt(MraXvKFsA&7m7kYm_n^2Tvvq9Unneqv|o zsh1MWU5pY5$y7cB#iRmeivW2@q20!IQ}U3!GpsbE9VQkcdww=#n_XKG2cyN6>!uN+ z`jTIdIqiCc*^%P4v*?nCGn%Bi3{F*Ajv=FXO1pYf!Wc{*L(Csh-urnYhWFbCf*4r=XW7=+3|Ce)7h7=f-+=gJXy0!#kBygxGp=^s?&sa23hE)i7GY92jAgj$1( zG}Y6VTweTPr@(tc@k#Fx&Fr}_v$&8N`sTTy{aKCsQ!{uda_{8r_m_)?iA58?o;a6{ z?wu(XPw-0GDgC6KLnGMpDfU68c`c)ODEy;(Kh#)UO4P&|TY20~K zJB%R##CBxL*#l)`;ckmexnSye00p$yVI&sZiH>}d{mXL2p3k_|p0EWAbT-&~76wsp zAa<)0PP|-AI%n!zrZS2{=QCW@4!TM4Kg|UGU*wM$8BU-=vn%ov*Tac%;>rx%_M@de z`DVfyk7K_cd5j4KM5@_CKFk|HbKCq*!DZ58>T%|P$s{5deR z-eiEl*aPw}zeK%k20eJ_Ek+~}K;;ZzHkodw1ISbEXfs1qs>nnnwg$!b_Pq(H_e6j2 zEWSZD&CHOTu|5p%5LN=R;hISXyl|V-raL!m0i`JDkKV_pqM#T`4-opmZ=lY0R)^LA-2GCvPz-^-T7OyM0}MAgTX8RftDC#AX3M7nW>EtZ01R z-mnR68dtK4#vBWybPhkw(U9Ab>zhmL1|Hh*`s+RW;tH{A<_3yP3o<*XGz*fOwaY_> z(TF5>pao^88YJM9@dKFbpyOFM6bQ%{7b?NTXnH5g8@MZ%6h#**7Y&%|2^N8Q&0*-f zq@V+-nv%_`(cGDmEe1^PNXe5yFBum_O}Y=NyByjzTQV1L_oMb9@mO(Xd~D)BI6Hz1 zj1>Vm7O<}|L1W zp7@Adj-KpU@RWXTaN$pONzx^vUsLZYxOi22u_r@5J22q7=i%bohmLBcYFJJp$t@*_ zXdE_PV**MbbF?KP8o*jc-PjGsnxp2jRLXcM;8>_!qJrCggAG zar3jyvujtl%ac6e$xHypdwqH{?AoqeGlt%IM{yEHaHrJ27}ZODtv;(EoE>v;@?BZ8 zv?jt9(Zvd#VcK}fpV$=i{7f^%UrI?NAbkc7`m+v-PZs-qc+h~vtP->GANMw`zK*E@ z`_F|e2|DgN0FY*2nzksOtF#nJ`D&$|_AZm8bgQ4g=ZA^H!yi~Lw@gxN%LX^yxwg*t zP1|n})ehQ(bGpBCj=MyK$m*e0jrn0r=`bVU5)I0pLh^lh&JL(tcovxauj;n^!0qqQ zmKi5T{WFu%YoRl3-$B44$$lJH^*Od-;i5`}ND(%M&Xy5b+6v4YH1 zFx%yQi7y6!8Y~Aulg8c?EvL=E3KeJpfLKk4MJJ6pSotLO>FM7>9w@>g^it{<+_=_F zuRFEwB24icl;zod&8!O<@!j-Y^|t-gq*~79+2=zW2exAD9ANuj0pbkOF8;=j=Oy{v zVyijB{gC(fO-J^Ma0`T}0nT;CGy(P^W$FwDG66K0F?oIK1Xd5e>>97|?WvU3wCn<} zib7|Mnf1pOu5fC#v%Y>kHsGl3%4VB)u@et+12!ayy}RQP6Yx7s^!|#jVN7!f&9EUf z9VBY19NR3%Rj~2+t#^g++wQusHO<=784r>d>wFLC*w@A-@W>!c2DHzcRbgB0HV>fr zGYDGH>QB7GV0{#G02vK!+^9ls%pK#(O7b(?M#!aZpJ@TP3cF=?ZqNL%od6cUbQ`{5 z;XH6biD{!rv=36Qk8J`3eDEYAkIlDRY_&rev{MMd3%KA%SZDE8eo7YUY~PtGU~4JtaS|Y7^D`|& z3K8^fcydaWP@o110S6dtTwEDA2;0KKhg-294!v?)T6*t5$f1wY8=kNw7kDhSAJ2rb zCQI0o>}_TIsHtT_k>WGq`Ljns16QlhGM>2mul$E!L&1hlvn2x+HC`Vk?#8tKaW@^c z{R9?Mw;FfWL~#n9DUP2C#uMgCy7&Oh4J)>ERq&> ztOK+?pAdy17D=*kF8IT+$cU(0QI_rVOX8{kVmWa|-AKnw zIZmctS)hIs1mgFs$8^KCLCUq_igjZpmZK%Ws;Y>QUpHlu9&ZD1BtJr?J&km3H8tWd zuN++Dz5i^B*N8LUHyvJctD669zOI(QWnJ(Nx*$b+aL8vw%ToT0Hf!A_zjiA<=Hwal z+K+yAAFvikR=1+OW59k1;?+WgbI|YPZqrRU-)mH#9CW!*X>KQmKBvH^c90lJj&U|OtvHx2fXsC=>WVW+!!up!W?opq|Z4cJ|-&ecV z9v{&zZ_?3+GO%gqF6O|&kg|EU@VmCRMC<|3ZfsYUBeOLu*ZuG(OWsqnAh1MJ)#dZv zOq+MomG+T#?x9na-ll5R?tfRIsxT8GzcVEZXK$#C%8KYJ%QA!^F_uX}uGU!ZnQYGtlJT_b<%l3`s#^R8d|;u( z8JUkEnpxc3N#tKuu4%U`zfy|#ul5mW+KZi(`_AuzfGaQ*k57B*sWinU2N!m*B%k0(~6&Q1SU+58O%~4SW z^CJHk<|9J6neG)jIY-i6E4E}qN!;97EF6oV_@!Wf5cm76F8KB;u-FseNz^`n~lF0_#yM)J^#3L7Jx>J zSrDF-5D8)J8T0px@eqrTqNI}eW&v>q2u*GPKn$BvgY((QG4@$54lf#4$6!0Z5S@G|GCO5!{)PV&3!h+c6otkcYrWR7f&$R1Ony(pqG~ z&OVHyzbvp!_!mIbB_DoSFcvkEDnbpBMDCa)hbx67`J+a`Yi{4LPhaMjyMd$Xnn;WJ zyZLuczxkqwU-;*5^AAv{{+S&YU4qqAv>n5$_98uKN{c3?8w?nG&M`g)r37%^-3`({ zTD0ebTjX)35k(DD9YU>6YrTc0#!**us8F}d@1%mK#fK06GW``XXO`#?Siio^%N4^& z3+Z>sI3(S^+MaqaN_gT}afCk0d*Za)ZSTnqN3|ROMxSc5|KC#t&TAilIO#x5E}>=A z$3@US?BcawH9gjWxQI9pIqGokwbKQ^oAcy5kko)Rm{dp*yWna1!OF+~T>R5~!Czaw zPDBh8uBcw{sMaqw@O1X)yFS{k--ZL9HC&WmbjWF>*4UXbX#YGAndQ`}oy){2bK^=}kB#M-eH;6w zxtP2?@-6~H7Mia6R{5t6wnaL>{D39;Yr7tez7yk~VahIh{PIS2(&3j)_ZOJ?-ne+6 zl+j}QTy|)|ptPcpQMCC*{d4BprkX<|dh69eE3pYq`_1p^8OLNKyvHBqc;;K{^BOHC zP%Y;R6?Bh<5wz54!unL44b5Lcc@H4WIU_x!6nhL}L8jL0FC!#8;FSjp|AYH=ePY&a}GmXtV_53^bZWX#L14EizJ6 zL4)7;JG)ii3}!) zj{->}D1L>4NmyMKbmRR2C0t}HL9(JTxJnap#K{yU9i54*Si4(~JU=4B`sA~d)A=d1 zPtP$Mq!o!dTAVMc?KS+pJS`U>o$L;es4x;%v*sUtl!LhO=3TLaK)%-0%1c&`D7(3L zRLk-xwLhGY9&4hOTBHp1G9NK`Wla_}AKWr{Z;Lq-+SpAV!;E7Iev6)F#BOdjXGB3q ztDe3;kT-e&=PyzEC;da)qYR%X0ppikeM7s-D{AtC5sPXO#1f6r0y6^t&DCU%?c^+5 z5MW;|)Eu;$X?Yl$W<{T z)#0A0#`2_*=Ar@KN}PA?kVS4Yk~8LtZ8}YE$}td6wxj)e@IzOw3-G3hvDHeU5BudN z(aA+M<<`ij}F);F)*a;@Yc$h&pST|uZoAWnWH^wPnG7q#E( zT#x@r+KhN|uCn0%0ss*N12&lgR*c;I+*iw$HlSg_)dvSkGf&n{>paZY@DYUBJlfC; z7(21O;=c^G`<(UBvoC`U@__a$3hU&sJy)W@U!OI1*tQgV32`x!bUuDrroQ+XskprO%lB$9BZ( z{xc+Ou;C!V{1KA6>+I`HIXa`~@mc=qvz*-N4ox~}wi;&)^sO!S>N6wQ2b!e_4li^1 zadutkT(?<{#M#Z7?&@ovGL}9)!c%Q{(4RQubg5T@^@|(zzjyv%_KIJQmO5MrbSTmt zU-mhfz8k{K=P7K2y2zxlW@`mC+;vD?>{lL3Yt#U`Odv=Jym03PH9SEYUU9T< z{MhE;(=A?J?Z?klB3;9s=;M%Oi?&24cA5u#Yu@xB>;8hhd^2YBt_OFBf06B8vZ}<) zu>8v~HnV9cfH5DrJ*Fq(NvHDHwVBS=l?tnd)79(kjD+}+hNQxog=|IP6M<;*e+{T( zzuka;uiE)zf9pW!@5NDm15YM5XaN-akN#)cU$|L*5g!A#c3>hZbwtQY5!7SAEFGIe zIW8p(NN1aASp%k+%}%9f&YP<@-wA2uuqq@|Lp=fU5#`IkC$CZ`qPDD*^54&Cq1>MvOUSM55~FAE!AO= zCA8P`x9cnIGv-#mEnD-_EP2P(%bVb%%$AttpH3{P{#}^9_`_@TQ_KOJpU zKF~8c;ke1z);Iw3WjOgx;@Z_D_NbtFgU&XUVU;c)3|L2-8c6D9l5QPMQ7udTVrKU& z`Cvm*&P6RYH2SRYvdTxc@zm^g(p>LEH zEjZdZ@3+CZ%-*^7!%W_Z7GJoC3eT&3ylN}yZursS-N~3FfYzTl{5TaGm*m8eVmHXi zl6s38;}Q&K1Z_W&9{%9t{*;-!#>E0fgHFfU+dBJ_OfJv5-$}OLW|Sb zz8iGRu{f-OzSm|y8CKBe*1wn3Q`Ajpbwc=AqeC|5 z$A8j#*0IO(^o%h+dwX3}BPeT>TS;_F2T#m?EX%PQot}ApY-l{!gv+VbIoQ18*%fj7 zdu`P6<$nV73Ai|l&wQbucU9oZ09lY=RV9KmE^x0`csCsIoOnmVJ~AIfPVIylRE2j;??Ngma9m+V1wQ-kFA8B6C5ULU zgT8Z92e2<1u@V5SP5rzRZWhD7$1^v0@SP0S<{;>@ybwYXY752o^7C>GfjEh2@{W?Gil=&lFJsX2UZh1=nGPj@GqklAko z2EA9S1n9mhe3d>tbQT*>8I@N~p6EiN6#jXGlzpv5$32N@2I9|T4wU87)x zkZ{!cK&$xSL#lS(%z=56u^Y1Y&p#wLRfs6f>x=pn5z0j0ft zFP75&o^TbdjLAr^rcX%eE;^f06P`TGD^o${ss8uFVtO3N#CcjMKV-dl{p@jj7uN>_>!AHdH&7f zwIdF(@w4%%SxqMH_)AHMi~@$h+0BTTYuPg^^l+ z_rOXE>X!i%x9qGusiSqpIcxIy1LSfY^ymeG*INfhou|=xx z?GFU6FJ+(rz{w*1+ltvZ3z&~<+07BYbJ@9@hqU@M$T%6-P)YsfDW`~ZR)Bn6fSjV1 z=lB0zXV1swMd-tZ)%Gpqz6lhVZq=|~T(`BdS<3AH=LOUL+skMF6OA+O z+_sBRs^VP~>pWHqnX2V1VI4{@Fbv<gJ6TOE# zp7E(85YEo>^NR>mKvitNyPFo_R!jX?BtJ0mVIxh$`|{vY4SXaKivnU@r17!t+T|ZR z5nu9rHW3g)%J$5`Z9MJV&8?*cL$$6(&oedRm5VR?cJ~H56%}|JDHdb%M)WBAB3`$` zCR2y&Y{#baFFBa>o`N-3hxcwc#EW@!PovKGdsPU~(v@9L9@utzjVIGjE>PSHF1vS= zg@4s(k-76k`8nwS{+xFBU2eTFCY|L?Gyep-<)GP~Zd8jStu{+F-GUIlss#6+PrPyv6oc4;Ots08GjYAX zFY0a-K=+~}OfqnYkz4GQQ)J+~gi1=cPu8xa6u|x4Qy>1R%GY)CmWO0bY8+|dT^ktZbMtiJzwqfn ze80M=V=8&%?+RuPKh~CNF%DUb>zr#ha3*!Vhg?i&)#~IhHVj#)w8Xq)SWmF_Yu7xj z-m4vq{+_s;d1YQa%dJI@jgcNXa{rC5`6JC!w`tEE;@d~hS>_kiY#&Y7P92WR2YhDr zw0xNSMey>exzsiM9)W?+M{#!OyuS#%GBccJ1l$?KiFf?vG_Xfp$sFC#Q~^Jo+oh$w zxVCx=V9H0psI+VY!bb!4gs_4h51he&4UKdxEw-rEPCs~z){;JSpQ#UTyn5h(%i{;eJFrNw?CKEQv%jPf@kW+=ghp5O&Vch83UiGCV zN#a>mm$C7e?KNY!u3;+v{SA7g%fQe2I{fmB{p{B6-@zXG`P^ms8ToR{rdy6bf&W_w znk1ZDlAhgQ+AlB`W01M{oj(p?HLhb!$^%We4#h%F4Dx7q^^qzFEBe=~{*v3?w8>lr zvsTXhvc?yO6deK0=ieS#c6;ZJL7jDQK0Ef|1a-SzYQdLFQ**orj9XX*#DRNRIa)3; zB#@}s@%U*Q9iSWZ-jj%cfp;UnBW%W4lkK7p&ZJ z``+BtofL-^kRTWmkn0r->>hJfFnXi|O@we0kdrznz~o=}f>7LE)bWqrt%dJH>}4g$ z-CGoa4SKf*#MzC%EmFR9iviDCD#rdV7*mMgPU=}){Np8-5dT16?zviaU z_^Ej5eecD`CS*-vw{iATz~)(Ral{HcZ<(Rf`9MM=U-5}L_-Nn=YvTb_EYc4ddB9GX zRWT**dvb1N=PSL!C5GXfmj40sw(CwswK)yv&)`Y*0G`6&F|?4v*kA4vBko}NzUwjB&DtSD5H#-P!bntGKVf3)2uU0u^n`TC}R`1l3) zQ(ak~lrLVokGCzG78Gp`TChAJCd)Q`LlMFJHTKBE@XYcW_hlpV`2Q%&td!Uz9HpP* zo3OHfy;gPwp$6tG9qo6jz!JJJxJgNgzvNuy^GSTV@-VC6j%)q)AI+}bH&k!-d++_T z%&e;P+wsELSp&_-SnT_#Y+x4FAu$=gJFyvxxfy~jaeUm)AogrRfIcUd1 z@*mo~BW~`r87IjI7qxN{XX;V0aJ%u=wbWhT52)9g0L=8wp$m&`6VzeoLy@u5ee@5z zKA?L(Uivw{XkNlpB}u$}VLJZbm0#oV@{#?PY!)?Jg!6v?>p$kRo`g5ejwMrE2;5L; zgmBckhC7w@sqr87Z`iCK5ttQxYBnp$&vq(nY%b4kZ<49`C9?Hu@XV!VxTQA!Q@#Gk z4s0Aq;5+C}mN@1Nun7?Bj&pe&iS0*CsZ^SscRP{aYQ1c9On@SKbYFt?95JLod;T@q z>3jP|%q73xiqAtJMy5%h7ZtdA1J(}U*scI{{k7(~BQ7S-h?>r%ip4LUHdpN zluJ`Ihx))O)hhPxTeeBk^<#VtZJETV-4GWaDA>5+%xSY*JKr=XYxdSGDm^vFU4HR- zJFNP%)HR2IRiIN3)ws+Oj*@RdGP|8!HiJ+J(9-ck5Z}V)9yO+xd@%h(GS?XAMYtX18P`J%YCrupsW&f}xjeHV}k$8kW*0 zZwA_kb<$`RT zFFY`GI>0=eWv=1f;?_wV+`2sQ2)Xrc0_7SP0G+IQEU9A>@{Rw>ogL*MF-g)vfwV4Z z^&|A%Grio=^yGkPMC=WiO$=uf3dRmaz@6tf9Fve2HHV04o>DG=beKR=wYi2=mNm{t z#XjI$Y#+U4yIV>nNQVQdLvj)xJ<~CWT!|&SREEkxC&cVmb7nR z-wse}3I=)|Kf&0L3Av>lhniy{q$-GtARmDHoLNnWI3pU2^8jeYYBCF*fQW0_Urn{1}voG50} z63y0{9z@}t3ZFr^9FbjA-wPGHs-lMcg$D_V43x<@P({ogA*@%`y7vRiWY;6{ALWVo z+fPy5tp`JQY;ubzF`TFzm0;gFspU?c_gRDErM%Pl*u0uP_MfO#m75gyvj9F?jiAUz z(LQteP5alxgwaRq`&VB!zKG<2qGhD8BvQ3f$kCI6gWSz?B}jE$T7D~|_peU?&ZEU> zwULM5LQS+Q4@&@3jyUBTgmj%Tl9y-3fG@gS!EF_1D#wRDv3gzU5>rE`a5Xibj9%@~ z57G>ZZHHJ(o_3x_l1(bM(W9v5|K6N0n+o^JzKC`PtnkgeA~%&UF-}`#+uM232epWJ z+X>8$|JgS`dh76}U4|o?ky~yWC2D?MjKgQyONPnYfIKjOacTi9M41q?L8ol{|)j%qU@b+G8!tNy?|{+vaDN^Ztx@K*@6%H<=OBnPkK z2T)|JM+ln|z|2ld_$C*nJ5$v4B`jd;XyCalU)9qePA~gz#D(@RMaUpby(tBL60}o| zz~0(qX`M9-f4@L*Z?+Fi8vJzbacHjx6lWID!8cc2F!fQS^kvF4#;&O5H@mbG-fQ1PB%6sXwQKatA+L$zN)1&`D_3sYX2 z@L;*;pOChGoi6cI^7m@`-sfvkE+%B~Mr9YJR#N0T!;UN+De(@e-m`YMv)yw|iFf{p zya|?|7ML`rxvXM4n1u<<1WoDniaCcC4fX&R?SKjk3H0i>Xk~#n9P6wU@IU0nY2Js1IV)(W(e0+2h36ccjw4etIo5!}jF4-6i?0w~NeolB^RT8(i zi3Sl1BFP`5UNzdbv*|aR{>W#cI#0Rh8Zbgbelk=50Vpwq z7Q=*uxy_cU+rKb-!3>zv#%_t=)B7}_H-xa80v1KAC!JjCP&CQx9feqyM_HEr?bI$l z$zXvB89|z9F#;KJR`^$o`T1brJj5dJmDAy-I3}90l1)w3F*3wHDkH4~bV!?|3E9>; z9Sr?N#*nYIm~Cy4e-6H2&;N(oCMC{ozEhvWT6Q1bk3VkVy^3;g6%UaFw;Hu#;?}AY z#2OWlt;#=p?fuC?LmRu1MVORWM)UClntfL#W`mH~poTcaG9NM!uR{2K4V5D$egR1L zB*c3LA_XLw#5V*sIo0qW+dv72P{|U?MgSv*(a8YHA3%!@m^r4D3Gfk4FbKr>J3$DR zoJx?>`(dhs5kY}tyDQJIoOMCHpu_2THrjLsjBc)>w#o5*EZa8eN2gB2crHo=+RYfA z+Cl&P9ltSH^NiTk2RG67vYDH7)~S3(C7%%_w^B7RhP1?*51)p%GRoQ33aQ^d8KY5Z ze<}jM=pU;_^^tF0hI?U)0q?c7O?pxZ;0a@#nrvrS$fW*VVjfBWBZD+ZJ=}-- z>_fh94fDP0l-?@-C(Z>cg%Nw}f!%$wLpgi?3M1v&gU_$FR&WPG{Pz>~DI4x(eYMjU zYgjlBM56*oQm~Q=C0LA*lY5`&B3$|xE9_5zu8Px3a$TI72^ z)Xj2C`~V~uK-I$3>wL>JcT|U4U0s~eOoKcRm}P_Xl}5r7i8aZRvDRpvDUX^;!|oJ+ z@@}#UVq3eeNL|6UZq%io;Aih1{frUOm$A)fhF?p<7h|08&w4Mnndb2_T%#t+*$H>_ z3%2*RZfWA#oCg57dbiaz=2SB-beFGT@@~ zm=R6*JXTp@FKJ4HA9M;=3F3+_t(tE@+8K~y4I+bO877&Eb0$r93o1#Oj zaA9LP*J?6DW#L^%;gx4Z$yx&mBQ+ahF(kE&y;9qSY(|bW{jKIKi|tO-nWie0*o4o7 zXI;+udIBCb zG;BAWGOI;*hnqGKIv0Om+nFO zTkn_A!lhkE5m1c&m(X5djoY>ZcaV3a#j5)dc9AY35wy+=D)j}e8;v&J#=PGPCi11E znFFs!knE7d{qOe*ByBD%#1d;RjM1bEyYn$l51ll7BiLqv>}ADl zhgr5mcN0y8Vzi{s>ylCjG-k8K_!|?u((a&%Z2F9WoTsypvFL3^B#G7u1l zi&sglZ$2$vGiDtOrri0S;tN{2e*VPUV()w5_zC0j;xR&~oW4v-Is=sbJg59I?C<~? z&=j=f#}Tt4i8JC-blI(M6&toc7PB|+I9|9aG&3AG*o_Y?#WzCx8zJI7fL;Naf682l z_AwvT5CcJmK!+R9(qs9S{f2|CNAQ>aU3AQ8+cE++!r)H&I_D0tuntG!-3u)HoavHN zc5m2r{YLwnO~`yr?z&yJU$m5~(qFTmp5F?Zwd+VD&q$pRr4JH=YipYDU4J7b zpCcqa6*>N%?$=-JB0j~Zw>I_p5bSr=sP&$jZr zPOH%3)B0}+H0Zq$ftHAcpty5?xK~)mZJ1-l-Tk&J?>#o!o1Wj~wGF)B(qwzXl8#ZM z2mwS-@c(E3=_TbgF!wxcR^cQ|nWdzJ6dP9w!y%!g7b4$LZG?{g;NI+%JjcE{LmN+e#2EN$K-T<5|NQ|1MzbP3Haz`STscKMm&k;T{&T zc^m$rmIi~-!8&3vD4h&3_2Edf%Zao#m|P0V>A#elE6xx9q%l=n`S#eK&26|QR?Tjk+&6}by{qZ0$Yi7rqLkHzHj zS_YF;#hAt&SsT(8{DIAEjqHYrGo}>_iz-mwsdB4i&?m=;tAH&<#yQk?6Orau^4DEM zi!e%rN4hp?0~EIvLLeaP&CuTQj@qvh-G7pnK@|p;=o7P1HCpbx-=F!QCj6{b$NJmlw~GF_zwV}*{aLs4>!i(@rJg>C{DMD6H7+Z% zpVa!f+@ZR~H*ky}IOPXjmo*CIiZAUX?$UiY{HHIs)s7)&{#Jo+Z|$e1^gz>ZTnJ+q zVwL_`h;h%_OR-M`QVI%Z#g5rKx}m0SzXSB>ljj4fC!<4GRQwyE|AVR~q&0fFTE{Oj zqj-CyJefv|EQSy92h`JSe-fQSAj!m9JtIn$KcWEzSc|dXd{s~N_>gLjy#DHh=+UjB zgI?5H&Ig|04$g8gTs30k3&|v>h!^d9ol(~IzZR$&(?D{L;iEdAa*3R{Slhzp-mY z>54DoLY2@y++=EJx%62rMT562`6%DMgv489GBRzuBt!MdIpyuTGR}j6{dqw$v+<0gBe|H5d?B@A^|V7D3~NVu!Uk zbx4a#=e~nx*$5JhJk|OH9YyuLPR6#OOYD!AC5rFkpYIo)7LHM3uJ2A;(X;4l3rfr;d=*-8nD4^N zMHg|2zr}wSTU#b&wI&iodx{fQudUs%vO0Els?TzBfXzG0y#FM>TNeCzrPRMzd{M}~ z_Ck+$yE^u8E4Qc87tMJD23Y5jrk9lW;xm!!f&MmrPa(WJbPEXsYmPMRNK1;yaq!R@mPNyl&w`nkt*2jP+ zMv57!vWr-{Dt=17ldXnu+@{e>cLP)MA^Gkvagg}pOo7aEU%$Ffvt)WGuiEor{9k0n zQ`d>*(f7wb6I~4}x88BTxx1#RCurfcR270pFQU3Qe8rS6dH6G};yVQlTRHoQaiKcI z@9~qWNRNu$`!}=ykJm>)f)T9Niag|~(=RpQHj#E0QA~KsPefJMI^oRyDhz_np1(*- z?F(Ks+}u#i{PWg*N|H&(q!LjSX6i#UP(H#UmIn1Q2;IfP2IERIf8iNGhfOGxqt=s( z$O*UXy}73Xomk+GeDz!rd9-cOho1lj(r}p-2I3VF%CL)Hg2>k*5KZ?X3x#@?HIH_? z`$?#PV!0k#N-HuB%scw7YWU;&l*XlH6Uljav+BF43+_byWr`Izj&dPzf(QS%Oz6`V zVXGC6Z;T%WgjtGd!|jiG_%TwPHZ`O}h0kmY`VU4kHCc zdfo&do617sYsDU)E1<*96Tl7ubeuHlhm1L9jjM21eA0BH892UK>Xe*01Ec9`(99&` znIo%pbWI`K21i%4+K-#ynFH2@Po$NdY3$3;o6t^|g>1pnirRAdRx06w@$TBgvWmXEwjbWuUjQ(0i%)hY~th_UA4DB zHKwcmm@OQ^-Zb28^pMnuU=PM7$>Sp)RmNt1S+GOvDA8 zJ#tLkei_Hp)S^tUZ32W8Qk%_Ka2txFUO?4j7CMot3hP}JtJknlO<9{VO)i`(*EI-7 zts_Q~B_K&OXior)%hDFngaDe+&bP=`7q5j8^p6X}jKQ7$TjyS3mvEBY_NgC)?bFV0 z4700PTwULXcIP^N1MHQJqTok+6s~73JE@ZBRL{KX{UHi2wNcjmJQPjf{Rt+XzhvUj z-YSfUHnV$M*g5XBymh-g-h%T)dAe&}Zg-}_JQq+cDF;yn3V}sEOO1-7Y#R@ak} z+}(1^-9VAUrrre7vncFmvjg{IJ^-fBIMgmkN%8m$V&OyRnmbnat{;^P?dNY<`l}ZG zEL(1w%0g`Z4G3_YeKOL?@;8tL0?9dEJ*QwV#G`3`mil$o2iRk zb|mU1EOwnbWJM)(Q*INiVEUu&dq3(k4Qp)Y%x^743q8|_`~Kd>&M0y6-@2-_NJC720kf+(Z9fp*d*Sd0;RMU@l!M0f;gPFcx^^gXe2pRBZ^WvB}#Se#7H$lifW>wqt0}?H-{3X zMRn^uQqkQ3cXg|~+xNHMf3U}6r|a|nyxz~(#%NQe;!mi|9!HikuO@VM=SA0wUu^?d zl_#IimVOj^3dZWD1SAEWYg;^{lQqaf^q41ioY^7WuqNe2NCuxAsIzH#tF6(4WcgZC z?sHr_q*&e|uW4su9Cz_{U#z&`0U$W*0ZJ=}h}%Ud1vH`)^G+Y$H@MGXi>|B+C8%nOdc$}vI@sQ~6W@}af) zzj1Bre{9=UEkn`wmVs3l*u#6JrldyG9)hESg>K@C^Xk*0FkGAE58l zUh_WOWho5_5Ll=chO)#sc`753ZlYL{aw|M;Gh@AMMrJ~)pMnyN;YKQE^ zzSHO)Ue#hjRha;akZbo_b(GUp=9dVI1eSa?eqsd8IDx(_^7ieCzf=>Vr3$ksC{MW` z_}-fWfO@%m{j`PLh(qo@&~IJTy=G`278>w$8hC^;FEqb;p}d0cjFmW-DWcOAP$~=S zbl2HV4Sic~9&yRbxi-g6jwkje-c+Y&nt+ddU9@T4?DAcS9Yy*ovTdZ!If20`HQ@kr zYrzUqpPcACf3U{ABqJf0(54}sq*s1@Y8X@B)aIXZs)2g3ASJ;83WHOmhqz6J=r@;=H&0f`hAG8Wd3g_n2vF3yGTHRDWt zu-bxnlUC@>%#yR$mfT=OWnVGZfL6@-^z&nEgum^k(#p7^`g>YY)`4B?4{SUGHyVRF zmsvXJkVc1vTc?$c1iVem1Z6pVeCGjd99byx6XF(Zj=d9p5gof(VY}wKZCtg-eBw;z z@9x8q1aZ_{TbdW>(LrplFtCP%X*a;B!2+;F9 zbR-hFVL0?I6;*W6Yavi@LZaDWofX1*M`Xc0irO7!l0^lLxVN*FWHc`Bd-^Ki_? zR$|Fo?=u;;(_6XkXXgqZ7TQ}&o+%GTw0KpDLL#RztLfNcq$ML4tLV_Vp-wlLPS3mx zPDreu-l?|eiuv~>`7%Lgn4@#Gq_M$&>${IN4-rbcl9Ir(@o3idY7RKg6n>;+=Bk^n z7-G8mDpRm_UU7EW9|L*Mw4HTvum(Nh7ajyf&wQ_IG$R~imV(*Uv*6IktYPG zLDsKh8#>FDFJkV*oS{$G(pmMl%@TvAvEDG+=|IQ=rNem@rWnDMBDe|x-pv@>sn)q* zyU~Y1_AD#%et+mp2XVua)^l{XMTZOy3o;tyjqlfAn+!M}pNu&w&kzH}MxDp4hmXhl zZo6DSJu?U8(4pA0*n~%cEV$N5Bf8mUddS8zL}7|+MC1N-$p3{dvp?yYi4InyeWxKx zAU27wqvnxj)HEJD?(Ia|ia!Sy_{^(A4?kMY49xCq+~7XFSi4NgoEFdld|3x> z*AGn5G$w0!x#VKm9MAH6+V2E(C?AaMBI3JjuxvF*ZzlQ>!PHko>(H^kKMVmO} zISv(c9JgkE7?W;-dq^=pqe3QY(&Df(RX-u7--q_<_dwP;we9z*)=DMJ%-I zG)D0r^JoIMos2H!K-~o&x&S=d1mcsy zIlEm}bOQM459!~F*R6|@gRL7nuAT-4&vjtWsR`H4VE7psQKyIzWki#vYZmvfjcBeV zISY+t`&XT+)a$EECt`A@p{|imkEq?rk51g~kHejQ&^w9VEJ7b0?C=#NE)eua97lgF z5l1MnN`X$Jny^7lI7TNns)@Jdm^^P{3mLn%%M{MNbaX7Vt~$~+(-d3_Vv`H>ia=ZP zJib~186yW*%F(U@?+iTlzyx}m5xOM-ZJ-7?9gkRaG-=utgwrTWz?BR*?HOi1Xs3_? zT{VEFk3ib#L6MmeqR{30&*F$LIv(b1AzQM@q1(=$sd$_u$81nMPL^Ym)r4dO;;9Kd zmdb#0E&7nEO|x*>fNJ}8;;81D#dt=NkC5>@V<0ExY9F=gK;Q*@$)!1zLkBuUv-@`E zgQgojhjybw1icHUYr}F9C$rH3xo9sV%!apMO2_rHg25&=F*RLh5sVeGh^<-RMoD^7VP%f{4BGWC~VPXs&I6tu$(yZ^hc2AqKK+OYyz8r{?0=i0|(+wmb zQWrPycrVc43mCvJ4}bJN8~~CA0MbXGWE!*Z?8r|ekV?0zQ0r#EE=Q^qYa5jWQtLS_ zz}rDc%7v2Qw&lT34zQjaR#Mt>DKQf@E~;|}lqxa5X=fnD=1h~&1OvEhQVY9RUoTF- zUR>$&;HQ@;Z>8vf&RAi0SLv;;P2C+qgOVE_gA+}vT-`gz4F1XrXO zrXVj!9WVMG07P{=UhuM@WHp)vQUG8c!(WU>h7S+CIO(QaO<(`ymG{YwAXR{Wp~O); z1mDxHnw4Nqbsb6qNXluQUUl7JGUG|LJ@<{Z}R2g??<&RdV>xz?`vHIb)kSL!>1`m-Zg9z3de& zzd0~ta&6jvwJk|WCMGE{=>p;iD>&F)ajx2XN8OH*KW}iQOm_`tCwU}kuDyh%yALVI z`2w^UW27vW93W_Kpb|q$-5$I=Yz8nlE>3;zU9b?uEOb%HUcFp! zh7Do>{Br>wqZBMDm_KCJKE1mDv+Xr8NnV%KLFfha5Ar7yawnG^CQMHcI;)7^OWJI<{;cT+w?=W$78O~Kh<2Gd$9Rf*S60Npl%+5(t_-M^vH$eyCO1X4GS-!dzquX)@saG!q^st&Iy?4tMpm9rM>mm`Z{)eEjPY{ zs&)6J#tOCZG-iZM2nYsH{DPB@FkN)~YM3ZhLq>2Zsbl!7_8}0-HspWB^S|~57XYmH zK7L@rs+Yd600_Xec6|N$)gzRuEL~)WRjJN zz?&Cf7O|$vj!e3!X585}3AR(W_7RW5_ZJv+A95Ei#cEa!+>ac)@96>ka)5Hw=iN&` z$`{o-@Ey z+V^SzVtoj`18txyQO|H>_hcl?Q(m@PpPm|@75nz?K2zGUiDQM$A~X(Sc~Kd%drV%} zY$=|FsDN4uKY*W2VT*9&RUwL>h$&o)IGVE){qDhj`eqTLM`>N8ZyQ|H_WIFv zte(ZfQcc5uc13$E{8tU0dvxR4m)E|fe|F}r!Rk&dY|uE>t{Z5JTZa`od!<8l)?0tW z0TP`R#~AM9y>bx6W6pL`2or36HHbe*L z;GJ^7On?A5%f}TUE_a*2;SiyiI84PV7*KTAe!(6w#I3(#T7kKty^L znp0x(#gB*!<+7~}WT#C;;!{c=^&UujnYXUOZoE>*-R0CgFT626s^l-zb;JTMcW10; z<{@n#`X|;M>hAQ5%;9)LZ{Ih?EqcDJf{_QqA1y?9-)%I`f+xl52jzbj_NdB2w-1h8 z`?E}E$nR}&8@SMH+nQqpF17*{U=KVBCy^s^g@oV4F)=wZ;sS&GGNaS^mVSs55F4-U zZ10BwklwO$-AS2>NvXgfn#R;8-TAT6nRN&A*2psAz`n6OGzUpNJy74&vJx81>6Ao)KlTCrO7W{x*t z7JHX^_U*T?R0r?34-$-4Ivma!O>Lz-V7`GqmsTfR|GS=rmQE@$o|D>e)hvYBW2@hy3Bxl4;pE2v zd47wmn~3(5v2#_+K32d42z8$9(@e0=MM^kITwG}y)>nz(Tc`hs)mEcC?ia`MsR}%# z#K7cNmPc}hw0TDUzLpacwtpfUp0$F(IRK_3`zYC$M`s#)Q8)0a}6zc+i?5ufO8l!6v??!+N;c zPJR}=A1XyUj>}q6e{MUNjZnN`YXH z@rf{GYf{~hamTY|-V7UJ%p|A_T+j(?8Y7iY&P(0fgnM><5@A>NOBuWixS*HksU_O2 z(Y6^pF%@bY{RJ#OBSio%VDCTnbY=9czs0X!j_irU8aqIExBfIl-~m%X2=>7#mG&tH zGP|$9cr%~j%{x{Yo&BvH|IyKQ{o1;%mxJa$u}3dHIZVviu-5q{e3iC6tI={G#R$LE zOu`(=+rQypP=bTEa}UYlBOK>(Kk~Zh>KDt-?OCB&e%SrBPSFoHEIW8r@M!;gr|1zE z@(i;Ti)m+5ZPZIUZ#$?{u7x=wIG^>vhAO=lM-*LbGdB+jPK%PiN3q??pF~=nn4# z*w_F8xi0x|`);g~X%kJC2`8>ZXk& z$SCbG5*@|srC+6|wJ%%K@E8#5<;#U;6E@_MVvZFwBa7ps4QQR~5M~A?XNkMxcqOcD zk&fQ@j8UfV59`kN=?b9?%|%|(MPs@8i&w|~S8MwvH_yg|w@(H_=Revj1OKOOh<;j( z@$lyw>BT;7c}-BLRk_7CJ@^l=@8VHH$XY2o(*u^igOB|YA1pV*En_EsZA%=B2=*CV1`R-6pbbYG~GV;20jrS*`@hBxHtL|ded?+EPGc2i?p6O^__WEHKu z1LH1d>#qhNy>AK}%f|(0GyTMeI3tNwW7et5V#z|R8gF{T_m7dl!Fx8F1}|WdX5D)? z4wDb;pQ|7;z3gExw|Y99Uxsh{wvhlKKt&gc3G@0%)MZl>MA%i7h@kL%8m^3;zoli+;iK8``Xa(w8 zW56;%df_9Ar1kq`fH4guE2|AOVrv1YqYyf1L;=MTUW3q4Cdtl%{x=|A)?XIa2~zVA ze4D&Jw<)^?&Y3mFy*!~Sgf}fY(*##QqlhjOy|mcNO&PwFCHjaLy4wof_~PiqizP2a zx{71&BSF_wEYF^1o1al3J?s~@!R6_f9)|@#b-~#Bm^yW|J==e4aK2%G^>$8Zjk`Z{ zVOR^trBo8x1{F&dEpCI{OV58@fB9>T&konHovt$D=`LeImtZk8GAJl@jn=7f9O?1g=L-5%Q z600MPM!cfi{4APncc6v3YOft{gN9DCpIv9K_wgk8!bU-g7JV;g7JTifBIQLcyOfcBP7cNDcW(T|~qwicJ$ zPg*}CfaK|8c5Tuvr*GQfX-}r?X)lAWy9}-{0M(86R^%~BSxaQjw}!*Kd1`4K&GcF0 ztq1bpJr@T<$CuQM57rEx3r&O;wMnwtup9c-8%FurZCLkD-HiiIUk@?vzFf3JqVMzj zZsNiFJ`9jPiiRge-ks8f(RoQiqSy=X<~Ax>t(H9fi{;6Jf?2*n0Lpq!_)IBupxe{? zjRO0{i|EoIMC4J7=X*+nCKcIp6i@zzjDyYn7b=c0ToYebV8>h%pquo70WA^m<(sQd zmfzNC+*UI9_Z;%%!ZL%zV?LeIn0`qV&8_H>pWBOTJ5DcEi;Z-FA<>JaVjVsZee0}P zvTRhkJB7r!Zx?-K$-)Y;V~`VW!NR{jqC<-%SsmD-MBH{o`SSnDwN6{s08s=sRvs%kIjEz)=DuHLR)sf0j1Gi@@B4rTY0`)2RqyMb zkpv=+zcibrp5SydEEZvRpzFq$)S11B{ZAa(i7nDv)}L`Zv)=r8g`1Nu{JBM#BOOQA#DM4N)o`+{>(4r(D`Q+?dDO#3=5 z$wCGK-R#oDO`{tMvEy7xrMZ%$d}yi;&|=5NdNE1O+QcGH)Eu|hK_J>!DGHK_f=9(c z(`oEcF-(@&>^0{VL%ivyx1BKAlZ{#h&<#c}%?&F1jp0lqI&EOk=cA?#;Irp|53MXl zTU|DJm{+9Ro z>RY7e_1DEAxsna?^6gpAPe@ls)@St9E>GIgHU3tZ-1j_kp#6Qp?RR#s=Gw&^EuNrs zP3gp3UHLABj9Er=Z!9c}lS!EUpgm1!PD49R)7dn!7Hs}$D27=pc(Bl1fTB)=gguF8>@?%eLCk#2#ph%X2*YUAkxQ@%(Ynla+I;@u=&_-C2tuE{7!%~i5*Re23dM~p(%aobv zag+NPSVaUNnU9!8U)wDXhCcfuvYH%sw*ntB*0^;@6B51b`Y$38 z50R&i`lG1ywjjxm#L`QE?6&jS(+?+BJ~qn%Kb-v>u=aKRj@Nsxp4I+*c;fFq7)=_I zRejCOQS1?O6C{yYxb14(p%YVwj!ZP(&OUN3gA4ARzMg#8Wqo7VDI|{c`BY9!6jGs0=)^>F+Vb$AWb}=EUW` z(mhO;#ACDr_zgMHBrEnGvUkUD)tHsYX0BY4J6aZ|K~dD=F!FHfXjus;Vfjl^6;Ffv zr3rtT@v=}Wfo$}n$v3^g_b`Lrzpvi>RiRK5a>nWvvNI3@gjH?n{r3VSLlW;gxY&U>Nz4bH&ffv-JgyC$-MD{8ox|qY zz&mSRl_im7W^yaEvp{4=`l5LnU-Nu+`s}&7O7xd}s37ahKMVej^c}X)WGB6w^4YcU zXEZgu#_97Br-&Cm_Mg+tI)p(TJSQIN!6%esgJ_NR1Fuc@PorUlIV%l9Yc#nYZy#dOAP2y&q_KHS?K$x__0~ePU zHet4ri=Qct7l|^SNK&SyuEnYag>A?^Xe%?qhrfV?2eIjNoyk!{`mCMfk`H}LUtInC zV(n|whSyWK^oxhkTmDQsZ285pc7O0G_x(%m&eyMR4{ei_(Es-ObL$)i)%OobSM1q! zQrE|1Zt<6?QNJ=(9F}_vYb}mbmKi@66ZdgL1R{6;af^mZ zvIZ5V0IwVq8ux=QdVznl5L5md6aZ4I5gMykM{Bt<4@}*xAY|{1pZIx5QPE%Hs(s%E z0)&a7AWnY!w)o5@XVLuuNsL<5dIN(=5T7<-zIucTOugu3{A|Hh?enkVwDT;9l~47b zo%mj~dGqUQyYdm95QQO;oY#IbedqeH)-mn~EScjEm{)Fo-K)Z{vug_6q4`!Z^5NmJ ztl3{PZ?A2(AFOx1g*QM!GJOl*`3=A5qLvd6j`*j4gDQLamNt5(lkJnyIC-FyC6L+S zYii*W`FjTHmK`BYM#V)#&lqFk6)5=X) zkD%U|g>%UMr6Gjn@$*vHQcBfZ2ncZu%sVoku~?w#-BzrrR@lgEInhqlKI36nYeX5Y z8``lPht(@3AHxf|vppDHjg;@SC2Fe>V>6bGt{vA4#|q@I}|g< z3gLc(RC4=PCKGG5^+{$qyJ)PUTnLX7sOyQ&H<4~q34D=k^w*USYV)Yxb=48(@5^U|c?Ub{CjmFb`X3%8m1CE=@OwxBi2@a_fCJb=<(Bo& z(1fgKaD#kHebQRNVuz%41`IM{-KtPR0daUb%1O$dP9%hFQuLZ$&VDrO(D*W6$FmnI z7<4$OsfsWS&tbi7iHX>TDjW_c)@?bIf8g8JVPSxrh*BZlC;fUo2Yh5aRqytc)r<{0 zf9_z^@OH^-Nj`I}&>7*b>6voNO%GMls5{TOT)n`tvu9pM7)iqnQ{fsh=0*L5KFT7C z5D`VrKSpvI;$K?g)S~Dww?aNWlG3Z8i#Tg!It_1{4pU^-r4sFEHG?#d>c_97)#zB? zU+@o1SiXu3Ldk0n9`8x!kn;{u1WBe1X>m_s)GnS_sQY;0f{OZhw3>}3zkHoIkewBm z>TozK_T}K=($!6-OQxfQ`FQ~)7t@^J143AuR+*jlGV;LxTH|&FtZ!q*KQKF9EP9%Z zsh;b1w9D!BhnJ^IyyQc`FS|TCx_XFHr*BwE)&%LwMZ4 z#$K*2Cz85N{pf3c&@})tLxJDwB{ntAz8%G?sSaEdFR&n=q`(6#&Tc)TBFysmvxltx z!>ik{wx`-20D5luW!p6)=yMP5hWs)};YD=IvJzPvO*1O&UB}8p=Z5nO@L^&HZg=CS z&ys+J&2MhYWgdJbeuQN;P>pZSr&{;p%V(zY;4ZD-IvrP`UM1VT=1HOaxSD=`9CA#dfX?bNw~avc?}^x^FDC8XBscWycD>ICwIk(*+kjEAEv!Fl(g7~+h422Kkqx*ZPws+b&II;ra)NqY{}}PK}so4 z9wpN#E{!Xa0(kX=zfPQ4<2$A!Rfpkg1=tO*QM3h@PPpymL)?6t7aP0xW>zB~%S#B9otEjS-N>7S^8=7*+1a1(KGnrw>P6;f+GdG|? zZIIG6X?_{+@EKO6aCM>UmG-Gp1vK*L=;Nqouc>oO*KeMy^d=R!utzl<6e{pygV z428ifA1G{YKOfxmEUm#i>4dm2j&t?;6V=^r;dLKByq01!`~zPWIBuX{oWs$*(uSe5 z$<5Y(eqmxlra0~ zbm1Q3cm)VErDQYyMb|Ees0=Pb%3z&jW!?c%Sz4(Ogg}|z${Y^`?`Ygt%;F=1x|QiqdMvo3PmlO_o*gW`lA`S%x0>6dUE}M;U*+#k zBlq-JC5Vl+Gf3|y6;;authi~byOP)Aa-F=}eR1sPI5}s52mp2OXy@OqLQ)E!qZa_9 zJrFvCdjJ^TsXNb<+nN~Bz4TgV#INE=_bHd+t}Rv{bPd)l(W3#ThHCNRn>HZ7Qk$wc zXzzPeKs-JNEg*YJmz8o%u4|I8?&_Cc7SnGn(Di6V?qq4n`ID7*tR0s)or*Q6YYXP% zm#?ariduR__-mtPxg~!6A~?(n<$k3-_V9YNqpab6&W4!Tg#C&0-j^g>-=eH2d15!o zxN$d99$1=I=0&|>e26Z2l1jzcGbGeyPFxrhD+{7?$YlW6=1z^UKuSm3_V-+MQiA&e z3C-MbjuVgu8fenY`V<6LJ&7JQkblu)XEm0j?Sb{px9g%7wRU;w~T0EB3tDf$zbU& z!p0i3heSyAVTv!LJgSWDcs!`Aa;RdzN)!9c<5BOq9ER2lyR6h2Nq8B8pG2-5q}swL zT`K&At0bvcQV`{8K91;PGBXL<=Ym+4kTa^3rviHMB1=*#;A4RXkj?DHcVM>NWnjt^ z=oxzi#FwENN=#617-$x@iX zcjt=djwk<2;)rtDzq^(*Vq{)jZ&LC=6CMgr6FTO04|bAPfnrC7xv!wdM_|ry73h@; z4Sm=qK5RqBN1jH+? zUaHdMs{pd883snT#p-cYIDZ)6n$n1Cwi*CnBA{EP1o#NRSK1iCuMJ?qZ?6E@rqRrvd+p^E6-RZn;%+xDrN=UJG=Wp5tssT)=$ zzq)_uA{pdItJm8u`$wUz4U#qp;+o{mX`K&`vCGZosSx=MAs()~>X0+Ec4K{an7`5Y zl@M16nkOC|`p;(9Z*Z3@C-Oe}Ir-J%h78*T&f-d=UlnpMLf@Ib)xZ0cgm>-HqbjQj z(O(QuYjWSmhq3TzmPBM$in4W0(vR(Ftl~@|;Z|BS>N((~rQI0aT0PFGmu)5J@zrz( zm2fgoOa2JPdLmaRuqNV0^NZV-8OAK?n>suX;om3n9g( zJYz|gG;%oG;~ATV2fh2?OC)JDx$mwSrc9%ZBV+h7p>>p)Q3^8nVIGgW9ki;VOytx6Q1n8WGc~VQ>tmxk z(H*KmnZ~=&tI%3*a3KRl(1>gr+^h?MbHfOY#=%XiOsTL*AxO|xdBGamB%&u18Zw1i zVigaGtf~;JDj~jhKSh0v#sm#}L5q*I711hPM%t?0ikqSpX^%Ig{f>`04*xM9k7+=D zyCxHW9OXHn^f0UoaoqVmmN$nF+-8##Grm{1yGo;f?3l1+gav+EER6cJnA_vd?TK96 z&ERo7_SBiQy*XW-^G5Ev<#}(Mt0j3-sLgB}cJ$jcp_s*-!(#`jc#d5!+b*NWM>g2I zX9-oi8?1F;tH4)fpwYW$%qli^n(fX&8Et%O!$6tNOF#k>*Z6sea9N3wAbLEM3E#m) zt~8wl2ojaYM*v;Grai`+Ou{qV?RiFX=ZZES64A9ixAAQh zzDf(?5~^Ta+upk+@#}4K=hHUHL`Mcgq=-)I3!DUT9n3FS@MqDx@KyLbHLj7IEvZ;0 z&ZBY6BMg(eR*9ecOwB^DBoyk4YQ;Agl>s;L2}Ej?!D^+-^-ww-Nlf*giJdYGo0*F6 z)JdEn2t1wmV!HavCyr!OWj1Z~YIJp;TQnc@C@cgzzl3@|KN)4`d(5)a>>F14ntMLR z0<)|Xw2tc0)dJM<@0oN_Vq)4)Vz(6{^<^Bqa&Wve>f$mE%GnrYYat4;?XHq&>z5QX zIfyAlFg`E$`lM|cy@>Hyf#+!l!j$T8VC$9xZYedEnkH$E@NtH<_5%MVL zbY>CwkwZ_6cH1c$q|EwEqb?ofIHZ`7VH$Q({dJU^JlV^bM`G!}rlQ$;fqL%(ssFYl30n0E|aK>j5r3#?K z-6VChL(98H(>rwzi0R@2EDRShYwIE|q^B0Bcr&G;GdfL5o%kuz9GfwpA*TOUT7}dt z>$@B7E~kye<^D3k9P9}g6`%jq6JlO8>+p5OqZzX6t2K8g3U=mjR>)!iHWlu+;tK8T zx@l(|oxkWXJ}t443~GNCch?S*+I;Z6xrQ|NtvdO7RKRdOLgRI74@P$lDs)7!+lcSh zmB{|NgIh5ZCPvrt|5v!xVC$D+i>-w3ZX;K>j)hO6%J8|@4ETWU^1FnWTG2Y~i#oC~ zQ(5vS!l~=R@-8jt`Dgpv(|)*Q3@>jcn6UH3kDbeUta`t!yu>agZHve^JF8jbw$ccL zIUGD87MonP=XL(vvs@JX!P3sh`a|BE207^)MD9?(sN?unq2`Yq>~>;^t27>Dtut9o7QxF0A2HUm47C{^=kL}ck zWBdk>Gs{d5ZSsE;lk6CwsPL}83Y_}p_xbe|1jjDz9j1w+kKGm8{#G#>#6c66LJmA{ z^|rNsJY{jl+B^Egntzbj{u9R27XM+=S> z73CDfaj!*LRRKkQpgTXzZ<1rBAkycA4B5O;=h6`L7eE^o-qj^=2&B3eC64wpAOL>y z{Ng2($um}@XAe*9$6YC0)MdY*IXgLf**~3htrY8T$1Qou{ZI3yb-ULO*eXg)%nI`* zX}_gCR#}y%;|q_Zv=d(LRrXwQXm>Ad2S`l=+lVMb#_we_HbdV}2(3GJ#RdhA_5=rQ zYya=ksWi6Twjgs&_Fwx-kKPW+Xe8fT-(ndg0wf5|0;C{^jV;P?g+gwl$Tv~!?Vo>X z`MaYgf%wO;EstZohi%10SxeY{pHTEqA8be*?Z@qG005RM6}sOsN@6GqQxz?8*8EUOJjU9%S8qOcSOmtH*HjiH@r=xsK39P(ku zHL(v>)6*)5W=m=jwLy0U#W9A#W7lMq89K%av~7ORTdbCDyE&yQd$8Y+{rcM{0xnA( zlKbUb#or(H9gBQF`s~Jm^VXp||7}aSvYz0Q5R&+LbOr_C^i2KJHFU~b*XoX}d#BuX z_h6E`Hm{s(yM(L3=!Dm2j28~ppG{uBw@MK=aPVsO_F8y&xiguOy&aYSU^o95x9s{v zMJ*2Rp3{jJdR^IQV(Kp3nV73ofMs}s2Ums&!*y`F6yG+oO0`KtE)ArUe|OGk2E{Lq zyb@uEFRrY6adn>DjQpiz>dZ~^8X$l=XnT4mT_|J|bgkkAWA<3U1ArH;{$ zqj-W1V_(FtHj$yIAKe~mmegUun^kT&6w%O7vFFexBFFLYP9m|*U&A6E3pH+r4sDq) zadti(-@4dk$(*L;^ulgHB)h}DlCQxJOlY2Mp z4S1c{dG@M?F?v?J0K0)M;vBRSs5!XIAt7<6i(D#kK0DO)Othbh2_l!QqoSP2N?)P& z#K%XCJM)`s=Pp;qcr#;BvQ8_ILpp8;_EUg;o9=_I8x5Po0-;$d3oZR@@|G-GMZG+EnVk z=k^|UcNB$QbQIy^a*vXVFSwN~$h1m9SWULGO8OS=a8X{7+{(t4^ptrQ&d}nh$D$(D zY!jjoZ`pYUW5NfCUWm}5%7$RY@22&tr5;Qhbn$eUCbqIt9uq+fZE`Y7NTer%)LO1R zW2&DQMXrJByQw8}y{u>(yo>)^s$ACV)0!u4Y#`@~PFIxS*L7}qCCu9^wuoo=E%ea# z`oh5O6po3>qYHzw$y$>e=Q_l8ujiTW3uWE^?NwALsbX3qKg{92x+0}br9JUu7a(1M znR@`7EsaenJBhYbBiJpUS}p%pN=PyoS|a9ZF9IYm9;JVSee^e}j(2#+SN?cl%+K7b z?ZiX?Z7rL^X(LYh@S+nG*_)lVe4*K-fV5HpV8;{DR+s6q07NE2Oy%Cw4Cp}7w{Ekl)596n191?C1iy_`^?sXECeH&dH zn(M!?W0LL)S1y^%K)5|-S39J^wvI3k04b6haNr|b~BVx+$a6@x# zBK%~vmhTbGQ{_y^o?;;T*Iu-*MT=~97O>6dRL4vJw#{8W9{yB>6$&_*s)*DVm#J}o zMB?+J2Y3?^!zkb5QlMRI75hqO8?AS_P7A-kx_rV-!Pde52TpMpqX^AH z_}Ummy01Yk(AKM+G6wNY!^2?Ii}N|z9EacZElUO6h6?&e2Szg?ssW5&fQXjD>5|^L zaF2*))cLVRX3voK-X(5lvo3U;%N6vDXLahGEZu$HDY=>q=$Lf&(khmudZxF#tzldj z@D>yuyZUw(ZB?$pH(>HzL@$X+C({TqFtq>gT(pzimRQt}*|5l_!tJfC!3{;39tVD7 z+;dxF2(CjQMx<&nON8gOEU40r=-`}0t zHySZhdFgQC^^wCD@{V5JlWFnH3?9?lIIA>hcw8y42%+_i{y!coi>u1h4{qx@*g_`^ zhS?g_FO|@e%^wl*hX}TvOd|-y|61jP*4o2{`MHtrIU7qO3Y1&Upqt*n`9DVf1BPMN z$af(;G5X&!=v-Lpynh;Gr_fG!)CkCpu7lX~_Ri?U9r&9neG?f-bak3F=3n%(Q3z>G zX?x80%h^jC*8DxI0NDa@^zr@(U3XIS*}LC$SN+LfS+QeJQQ?MOrS$v>j9AQ=w?W4dC&<#l^j5gU&eT3wwW-2!#(N=}sMAu)d!KE(I z*MgcPkSrP?^tNJi0B9C4z&7i&X|O|d{r+CUEFHIApg&93TaSePXjxky_BQ9#!}sgD z?tch)y)Lxy^z}0xZxwHe?LGXpcIbYX(qV(+*#ipwUmBO??howFe8qiH{QLC&zsZ6J zYXQt!gz!V7pW&qUe-xd0Tul4_$IqE_W^IpNLP+!4BQyWQXU{o8-@=q&H^{=8n#r^y(D?MU1{ zD`Lg+5bLw4?J1qJ`b9Yj#*l~cTy|`4l?T5!Y}Wb&%Z3= zd~oH2x^b37kpg(6bRK0wsoRxGCynX(^KnKPPI}UN*K~<4*yJMAOaaIwb_=l5{o;@c zgT@CPkhTeA7(>_~2kEAMSMeKp$jJJgW_DMsovb{EO+W0Nw(gsD)%Z1jf(E00A9)Xr z=&GB~pjpi=u1cLR>)~O=F6Dl}1mJ1TY-FNXdw=D#_5Z~4h_4W0G(g&}v1L1gQ(9^S z;SU&urbH(8_UyA`$hNtkyxRlB0sp-#|IZ+Wt!yw?gP&0cz1c_SEtRiUb)n?Jj4$n< zUq|Qk#a^^udJ%lRj~H|@S*c=LSTSn>`#gyuFGYtZF-+blw{PT}J7nuhaKWe`)tV zrghzrdDV{!2kaxxoV}Wv>BH$m2kwtsoCXQ#snOJ3j{{F>Hdy{^bAR&*($#tMu}vfq zK$;a=JuJELQe-_&`D3WW+Nwd<5j5Qnd@2z=`Ens3ov<=nDn5IXHKSxVcAGrwX1hgw zE{gfA1YFI8`h4c|tj(NF?!F(g9iE4hW+#19HM8W8XhD{01J1K0jM4F}Envm0Z>T-r z%K^GcAxfRf3s8^}YUNb~@t}1)$YLYuFc6VzwQLyRYzH}08d9a`?gW07UKC~q5}LWM zr`=z>fAL$#@-Z1P^B8V!+Hh4wgR*GT&sqYm?@sUBz1SJ0GsjE`5@5CkJUs6g+!{5d z%bO+J-~?3{DlrWsu2Z3rJYtE6Ws>s2S;<5}=2=~~GvD-dkg2GG(`?1e2M#BG^O~FH zO${yfmh0MEnK-t zicA9VF)ZOb2@zPyB4)?*f&?@=joqtk-wbx7tDs!GsJt5OeQ9fGKOYE>DVnj@W_K>?-2UpizQ?S3gLiuPA1_;W08PHzoe5Pu zx>^7ydemq_4BAaC85NbO!*MEeja=ndX34Z#@+ueybc3dEK+)G%6uB3h@#%qZADwc(W z%+}mSt(eUyD=jKu3Nx{9cKU`|74X6eFv`Tega=+LA3t(tB{oMbSM?|%>2WdzseMyI0zKm}e(V*MW#1Tjk z@tApM%HtcGIeRgk{INM3B!(R+rq+Bi4ct^NMofAy=kkE65H>nCOU}!iqBJ(WoAk@u z`ChMU$@$tIl;h=6a%7k5+Q!38c6i%(3>9f1?d=aIp>Pg;4aiix% z_QCLf|C(_9nsei=Q+WL51#T(-u_LY_(ju2MAZ<^L!y8C>s;bBYn;mxv$e+VNsY%>C!tTu6WufG z^J|lQYS-qbZ0M}Lez^J72rE-X2+>e3sSJF_MDF-%od?(QkhHhRB8h2LJg0uz;D}H8 z#%?`z0(n}4yy6jSl-rrhE@x>Pvovd;MMvkoHZNZ1x1E-q{yKg2$MnWCW^Ox*evJyX zqrRQ*W7GQyI4igjSMy>~H&H51={9vVurS+O`{WRdAh6)Dz781;ZY_v1&2_^>??y`&v6(zrvoE+qBL4Jo?mA~ zSaJNj^*klrd(~Y0*zlshNxee8>39g$)($h$z!MlbUIQl^ z5!nXB`f6gPGE|0cjPFZ%mxs7$^;CMeS{Y$Kke@XbH+;Bhlyg4oVbZoO=F8Z2lG$(b zi&4^W$92gSc+#xor~lasUlOKb-r&KB?+pkgMkU=)aJQbn2+9^AA)xLWQ;yLKz1~WM zd$fC#F1?3GM^jih)LhJG_w!&Nh2uhlv^*rSpA?UErgwRF?C};q@_n^;@r?uA{T=9_a6E^im6e{!Da-igA&oSx@b z{&HU{N_Q5K>l<{NP;fjD@WiP2`r=LGrbuJioX)U^%f;_AS2bC6y?s&hA@TFX%yox~ zl)wGC)EhhL7NQl(d(gwBA67*yW>T~~`d*$+P>Rt$t35N_hJz}D*%sS0bWE>bWUxN! zp~q2!jTa3%RLFO9FVHZD-+gln%0j!T*}U^MW8^H=vALT5m>2st?7VV1aGzoBj@<^C zE_b8FuWP1G?El|IAh4I{s^W62M`<*x?qkM7~G{)2zl zYmEQX7}gN!7B^PMZ@KtvM-o8k718$sj3RJP+AVX+iC6bUP>=?nDUh_UdI;ZzJoaGq z|MV7Faq_z<0%ZhkjF0DC%`@r#A-E^rMC!mw!{$d1&W>dTdcW)R?%Kw$J2CajKI&?y za)tjdb1Xgg1?Y~kTCGgQ8Xp=;#xm*6*vzffROAbxc?vXA!b~0^DB$^P%N37xA+Cz7 zy@D-O!Vv_vMGB6_N4Mwfc`_byD*8nD(hadENc(p$w3r;{UnC!~$UfFq@KBa|Ykt;j z?A4NN^MyNpMxUN&xp4f>A%1C>sJju~!}8it)RuA3{_Zi)wbN-!$mM#%&Y+V^b^AT7(-m5@t14>m2 z@&gOr2@C4>OD&-~It+qOx<-j(0t{V(I>ocy-ESyN7vVntgl}Y><8v466M_M<+lKLp zJ~@(H`9lVtNJaQJQ*}^hVH+1!72B__jc?`J3oUf@Z&27ATYedtg#_gTAYt85hvkyi zvG-vzw!`T|ysznqaq{)SlfUf{svXxitr;*;-pX)e6G^d(M_B_SQ7YC{&4C1x%%DF?lTH9x}?^M=5Q{ny~O1F z=!3zzM=&pxKJ{;gScX~#2$GBz|fbQo{Us>xcvasR*e z6t5(@8nfKyKdUC_Fw+oU=;AGwi%2ch+w>-?+-rXN z)kM9*`tDHI{Ba=6Nm})4$n*aJv(K)De^+0){;MSq$)bPlw|-FHGrahA^GM{PyWVqP zUERk)Db(GouEj(yTsttgG~~W*c8r-i?Um(x&Vl~&bGgG*m%h<8{Uki}GjM%D(IgHA zxN28!Vml5HETlmVK}PE(#)&E(RJ}HQ)rREFPcPP(uLwD^c4XD# zch!pU9h;9d_jS0#6>rr+7DkA;+olju@Q4HoQj8XJqspC{_p;wIYDwY%#>6)*%#q?v zE?df4nZ+9khXRKhQZPw39=S41=UHJ|qF|g5`f#A%a#l=cz8qtkrpPF}h4a}{ShhpN zDuF$}lF$&8@Gr&eH`07Pr5Q8Ou2gk7UOAWPpB-4aC$W?_QpFlYX+=N)Wv_x7b?McX zw)9}GPY#SfcBo&BgUp88FSxRAuFm`#G}k|e6f}4I+1=n~ed8Cypcve^G zhJWRxx2olCkmA3gaqcbfK;WN^Wvn{)M%K8JTBPkJhqMtr@Yr0vpsyUaOVQF*RGCqq zRWnw@ZA?MxC~W6U-Et2>Beky^vY>mDoYhL? zSW7Q>`1T5E$%|_rZl`a#{&AqhO@DxI)oN9$gQ%EkHgpa;b}X9PQ1t)h=_X;hgDuL` z!SDW7=VG5ADZ4@?G5yRfkNWOTyjUel>9nTvK#%pwC84 z5s*h?o6mJueiwKMo<#iR_wZBv#`ZZ6DI$@VXT{PCGm-gI>>h}wgA8XL22wOEMZTP2 zqBe}?@`335{oc(Aj4o1K$Ay`5YO4Tt~Ki&d-{g8XI58RrLGMMGc zQa8SxICYLaz*}Y5p7NG=DXqB^pcJ*q82$K*e)=Y7y%9VwLQ)YmF}A*$2M2Gvv+5<= zdTy6~@Z1gs#XksSXPNbI2DqfZ$A709*qYYd?8PW)v}lEU+78yz80C$Ur5g|5U3PBE z$@ycp(kBNqvR>?v7^h(*4;1U8C@=uVK&TWT7-@j8g?N@Vel*u~l19|263Oyr#LEp%5&Vtcd ztNjC~do5`M_usytuo>s)(Z$rc%u{cB>f;5!RYFlWf^!cReZ&D}Z8B1mh_-sK^<6nn z{%Fxx@;_G`LQj2z4Hq{=x&4yTOMxIZ3ep5Yuk<$l@UhMYCJf#vNqrmini)I6px?XR zDlRRH{9{MV9qhsm32Ye^wPnj_=b;BofM9!*U&itj0n~IJVc#u0u7Uc6M}q2Qk0|Gx zC-rjFA!f*otUp#0yQ38wjS-sb&+x~jj&~fJ-PUAQ8JRy;PI(RKW?xzH+GN|4ju+<+ zq~Y2-u+0L?LCR1)S>~1 zp0$HUJVInQ&*|g71}K6;YgX12>})@KLa3DvON0UF7nEo>&ZBy_fqL>=a-Y@_l?Mg< zar)ur$5qO=L+d>UZ^nN6V6~jk7Up;NspmkSdY$(`D#-kZQ^QAo|FCou@1do*tI12l z8Wyx!#vdlHAG$Yt26u7&hh>GgVq~#4#X(IW>yo3fkP0p>LQQ#wWj)AaJ{2*2*D=@w z+YIt0PehXUV&>}(CU$GSc$D&B_aoEvOT~}UrT)af(j1Km;ET&RQ(%A z`2#fGEVD!iz6x@Y8qrxnJC1BDRa2Vf6d(7+(>K@6UVFQ0BS=f-QNEstw#J}v713G* zri>Yq-I9E@#D(f41-zxCB>j=wPLErmj{{Bpb|-lT$4p!{`twJ!A>N*qL$E5hZ~)kj zW6q*MPzg?7rbv9&kQ*2yyof^tP^zY!;*gyI>LvxS3ENe{r4(=}6^guJHMYKo=~u!u z9c4CkO2$RZ_sVj;PD!f(oz$`l53;7X`HQAfOWunqjc z+zfmu)nhAVp?+!fSRq)dwa(Eh4+ya-35f)+OVlE-un3$s_#c~PZT+%Wg zF;+D&7txHI93FQYv4S9BGnR;csSiiXB40qiB_$Pp z7@OAB#5{YOBqAiA{IHXgk(B))W=nwJk=p?V4{%XJ<%hytxR4i5*ZNe^I2kA=mD-d! z)l~w`3I?|>*QKEBDUz2XqW%7k?yQB^R+DoSd3BD&NUnDgxq5<|t*NUD`B@c7t^U-; zyy#PXIlj7hzI3rYwMHRLDucenakQkB2M<{(o*X~oRWL`r*G6jPV^w41Zp}ua%;gkc zWucYAH&_fWzXS$rKofCLDC))g0oFB8?qa7ig#QWSx5)X0Cgh=M7}r~*W}Py|Yp zBIF3ABO>=?Bph~pZ6d9T+1MC*An)-1^dnyvdn{hLTVoS5n@@ptF=&U7s=(s>vC5+H zT!yL$MDcfv+1xr{9eIO(oBgL4P^JubR>4y(jAkS&PUOw63%4*h`Ch^Nq+m{nS~PVn zaAV83Q+3P-R%LN^9QF85A$7lq3ce>MQQ;|L=tg>)0Pm-{pR!?t?m)r6&9WnQ)_(EO zU=wVB5Nt$nhLYNdBnsL#tM1!m4rByV6AO*R(qIWU`ILV0h}o0(mFAn z?Mle6jU1%}b1<9OOIK)33_QambGs%8%nD@;7*1;HZ*us_ulLh)m+B3{M?(Z!` z86z09UqW!e2#yFy`hSSWED&-cbf|zp27uiL%~UQ!p{_*K1ga9eZ%?Euk*5gRrI@gg zOOz>_Sx3lk4)8xJj(tL^qY5s5IMrfs=2&|=D<*8u7vEOtVV%hgYM!D!xvib2Zm;;* zI$d@=9JD@pa<5sN?B6EY{C_|*3|fp*!g=8KIud!UPCZKNP*6jKu#E<;5YrS&Lh~9? z>;}>MVlZw;vat}tiK$Ioni8NaRg!V`t2ZEzz|L+>ORvne(=T&-O0=(=+Wkbfvqj=i zM>0O`hEv}%mDaNQ0+A7W^;7$pe`GmuTFAl)mjC3jnYH=C5D{+&NY2;V1Ju=fXxEcJWV>kyf4w6-dY&Ji!fE~i zx~bspViG>o9>H0T2y6QUGXWzt2uT*2wqY^NGZXSc3o#LRSn@}Ymj< zm(5jR$7M2(bN1#LH;)+=m@agCt58w9M-QE2h8-9Sjqwv4VT&9x@=bwI#%eCG!v{pW ziX45hVJ-RdbBbe_IR%9))aLY9vcC`{!PH&&|1W{CNd-~G1ge4%DO^G2Qlv@(Ja0&ZLP_-Th*T+^>aM3M97>`2RCp+`Jp?w6r3h@SxDb*#`>b6CMb!A71W}H=7#`< zz_HV;;Fmle(LfOt7+!X{6xo_{#38eIv$}Qmx$rDLVc5+I){~{ z%S!6a%W$xQ+@jQ3kB}Yh7|lXjA`e{JIADbk47FS1mq3w~l>G?BPa`V;$Pt>H0__Nnds{yD2^3X!sqdluu5)>5j>J_vq zh3Bf3<}UvDKG6w$Svv9+DE;kb+D^j39a$&E%wZnUu#H@$y<1elEZRA??c)>UFHg3H zw;10Uo^aV?xu9<%gLOsJ{_E_XLdDwcBJx}dQXW9MqtkzPgZk*dPmkYiRer8sq=aY{ zkROjfn|=unMQMUN)T3>rBO;wY#5x@!I8H%+sMJa2hMw9X!CWCo{(kqV z;^R}S{7uH#uFoHhEk=$1VMRrJ>achfLw#B)Zr!1LnCN7mn6pPX*IS~WwSsqeb8VPfda)4^e4{oH=yzY|CQ{2rw? zjNuZNh(Eh*{@g93-xQH>X8rvuIx^8i5A6+l%-_AE|j-+R{f89?341-ZWBt-g*|zyd)v9>?ZE4BmO7qG9=tIz z{Ib3N-wB*QvD1oZg1Igmp|v4&?H|-oB^dv`Y&Vx+sl8#-23BwQqJ6#@kbpfzx^X}( z1(z-r(;&T3tSS^y8l$eBoj?Nt-e)#HAYGl43RT)It*p{0CGvv|>mb30=#xvRT;D~4 zgl>=$_;~Y}W2|H3sf>r=-IxLh<_JQ9qa|$-i46j?W|3|nJtcVVA3q}N^&r@YI zUaiC_47eyM<5{;`?TZRO<+-0fsceu;zeycfe%kw)ql7Z`7Ei}`aQ{h}o zcoe;;%;uaAS|&d{JLTw9{J$Uavu*+jnHg`qzACDOEoUs77VSQo&|jBnTN6Y@Yx zcGt%yCSt*m=UwLGWqOTX^T+%PycaA{eI((+lNme zf+j4Xe7P0>jowuM06r~DqQMA1BIhPYFjJc97!&RbEBrqyC06O{H*YOV4H;xZ6p zYesacy5q;vcP~A`j&9by$`+t**DD3q4KO=Ae*M2&!`HR!oGRLId|`_GP}I7*Qk(Pf z>?JOr*t0`-`)8(8xhhJ}zM9k>=h9q~WA5(x_$19^_~G~T%!OI?UxSZmcYG-JO=|sm zLD*wc-5%XpuFa(9p}H1*pRbwi0%HpDn#pKQyn8eX8pX;rB`nah+BkirCQ*VYlV7 zlZ2ZYju3zVdePbgr>(KXRT3p2Ox;^I6G>-JiYf7l)3a-VGSmRoo-Q!YPx=yr&Az1!u;i3Z&kM6G8`Ew@qVeSxgY>Qzz2i9sxY8yg-eW^btonD zM5KUEJJyqpr*awo5#JczMUdG`ZHdo|2IjW59&_sjPY3-Tvv&DRS{htTv!8cGcOP;t zeNNuRJ7;SmZhJ6}ta%vfss~b%r#4&m46SwF;@h!UAY}w_AqUl3lV8tXJbtB=m=-RB ztkpcCXIrwq*0)-p5}CkL;pB!7A*D#KXT*IgtGrzQb9oi8;)~CQK*r9qq9sjg8u#rT4RQ%kQAZl3_fCl*2O*#4>Q&)4 zf|32lyRYr$$}QCkDE!W^`?j4;^`ZE-@0^BId(NiW&AWmZ^)0IZ$X(0Lw_2Lm48+83_aw z{3v9-kKepac_ip3;k2dHwnTC0Qv*qDnEh%4@yR1;Ot7!(i=Dn zDK>y3qFG8?oY#3{E_V5~3Fy)(g9u{K-=E;u%8usbH0UH%HA-xNAZxZ{j}uSX%PrmG znU+ZFi?lISs8k`7fAhNZ-9Yr z-Ces8xkKAnf5%_*O}_xU7rkg;?JR|~`ntP=9TVISp4xk;&5Tt2llR&U1$A+=oN>_c zUypA|=whsRPt1!~e;(>&*`9|GIY!EM4YqxSmM)~MLA+H!K2Q9-|f)e8D_I`bkO*_0#fYgOE|FJbJvuNQK?Exo&`7 zHxvgN4i!_$+AXxEN^zKbMw<;4nr^W*qOKtN5cf)%D#PkEs)& z4n?xE3-1fScImzn74ByM=sadACN~jd1G8JOnJc(%RmX^^%dwW0VAvY=8co^8gyp9+<546q6Tsu$1#)=`%<}|x`L0% z-PG966saQu;fz-gpqM``{qu{MtZqGj3+mp7p&3X5}$3|L=O032~Z?UYVspR!X-H z4C_1<6jMtI5)-;O{((lt*#pIQ1M!4VA^__s%SofqAD*;ZdU6vA+ex6KuaM#gqaYHRd;eyWwgy%Mf^@ipEysbi z^C*!9$`L`5WMBP=!t8p})*kcxo@HLwTmF=A1d$J)OY)E0(-fSzqoupmLZv;Df14I1 z5QVjl!kn|qEnittN0-lDKDT^z8D^3$732^CMv0R@iP_`DtH+BsYDiltWc=@CwYCBy z*ujzDAdm;Dh=HZ@pf=3S5niO=2mndyh$L{JI2ygbqX3V00e~j;d0&pMnC{y0Q$2UUPzJXP~?7XAj_P{MYTGf@BP6WMheMn)hw1! zOl`lroZ9&)x%-I4M=Lx(3Q|&5=YU`EBT@mJttq+oFVW!T^G!lBIsh&jg%BZuM}h4+ zK?h1#5CXZT6OeJKr2esMTM}&=vmGZms8cd=Jf%LIHCmhm6uSus4q|C)7qBf1uyj5Y z{Yf9zHaymU{p7AaMCoSZ>kIA72)q!%C985*1mfpQEA#=HNcxvu zG1{t_r-9s!k~p!a7dsL=9+UxM%mkBXi9D_9NVpKvcE)~_6fi1<2<1@dtEW*460hxo zp75<6C35z9Y~{$v13gXakJ#3&E0pw3k|&oDcCY~ytCM&WJ`4Pg;ip70`Ew^L#rzXWL$ z(xw+Da$rLS{#gt2xiF6>fA}GWQA*$ea*y#*e%tHNX$d?)xmmX^$~2TdQGKU(1#7mA zZJbIU@;f^SN*+lh=Ac8H#4?>=hh&%w3H}9VVH7B71?dA40p$>>Rl-FcEk~ zD1#~h6iCpY^4SzizR#Iz?t%09CmjZ^MVCr9855Hn;eAc9GixOrBvRoI;+)E}Q>zKy z3r}5M`#lzfY66MNd|q7)n|=SmCK!Hx?E5`7{`S2tr}&CqC(52_o@F(s@P@ ztMrf6SIW;Q%QI%mIhR)G$Nsr01{A57IPJ^H@^ynP9F0HXo&dt-#Dr zFkCA;wE;w0B}PVsfo|0T$6{1Oxh&>|t93irtLdc>9Vjz6fWchAn@fXin?hQF#TcAa z3jY%h{zbpH+&(hSJJP3V?MiPD8vctj9{a=dy3XsjzdSd1BUz!8+bYy)(-6uLO) zuY6^@QvdKEcCanS$w%fn9|AnYX^bVn3zbO1W000{=S!${E0l zg!oDWU&8oB+B5=|5~lZc;@JrvbPhg|B#HYXXjs0sGC~q)dEIb7DF5IK#qq`6dTP$~~Wu+w_ClE=yoB+}?Hl)=0jYQQSyE?q+Ua>&gW7zq>f6OB*>pvPI&*sMl&hE=S-@Nkl z(#TUp@!8@NvDe+b52WmUey{2K1xo3o)6%kQ#)dI}%pn*ZAS)!uR&ut67^NYuHu}}D zd+X}X=Q$`jks>o42FabkH&K>UnPWO{5Bc<$ZTG39F05j0DL%o>3I%a)!ct!c!<9GZ zZT`B3HPz7Z$DDaZJM5l>FpVshIKm6?gj+i5sw;aDA%)Y=8q~gTUqj%i3_)RfRNw(^ z2w)-1Tn~_M-y=C;@CK!nE@+9qTH+Uf`R4=5%+8DcqNnRGL16>LjOm3qzQ~$)Papet zv?X@_%yi)Uk>`d5|Ge8|6#qTgVR_8L<%eHXK4RC6-t;jkEvRE{joSa|TA8@(*gL9N zGOw*J1}TndD~eLjer6TN2)8HWDu8hUPb{Uk0WF|p`nS^NvOgpOt8-6&eT<`$H4Vjr zhD$?t?b`qYPeoeuCnkwPqA@8htvsbvMrWd^Kxj29E8X^Sbj;>WAV$(BfbN%W3H1mC z7N<&W#t)$O`r*EixB$sf|qM)!WhMF>d3%E~r zZ0&V2b~v}x$nuwL}UrCXN65fE=FHAu{h zLTuGM#sLbOjOM(_8FP62LKsUR@U%Q&$)7Nv%EHMlw#)61P^X1FD{9;$o=2*u*Rq?{ z)+vnpB;C{seQ4#pk+%l)1+B_M$=eP*da`;~n0sw#;WRE#FV6vCWPk8GDcz+_D_K{U zVGqK}ozqX#b;Cp^nK$YuS8h0eaq68@bM4Kg)>kGrml}DOejRXBypixp#c{%Pk~~?# zC-&I=3P%43BOilYhQuJK|ArhugolI>3G!3}b;m2_KZ7%@RjSi(eZOPd$w)5;Vvc;o z04j`Nt&n{h$}I2&V! zs%7Y~4-{r|vK6?s{9Wg#FMGc}$bW5J{@rai68L>Qb-%yj?Iu?67et(#rvKs4L5#63 zETzya?j#bwC=OC2t$b%!%N84e@Y=)9CWmSD!XD05RY7@HbMKVN;B!so9nakwez3(! zfKY~p(_6rXGUm4Qd&Wbkz-g|J2n3J*1j;-WqbkDvrcvDT`H)0-Lv1l_VQaC zo8^m^hk?5aIQvIQWwbUsXPQ?z$y#vuhNR?`^S zLaSju;B^C;yz&K!071Vs$Oh1$ZC*-y%J|A4BU^9r)-$uK)}-!#^BC&bXD%wG3i&Ld z8j61N%B3a8wN2>ga)2Et@@5$e%HY5zsMH52Au%GcwkAW*&WIgk+=gqMyYqJX;1@a zHOeFSdFxS8B}k|62ukJNl(%El{f@y-U$lUsGnNHq+)E$jl^OYzlAma-VE8`80tT4F zowD-;;oaZta*96}Sf5cpJqD*Sj?Pxk8TWYtNt_p{`1ahWTiX#To_MtQrq~_)LAQTi z<+8o&Vv{~K9@rb6+n2m;95Nf#mOLenRpfiZ5!5aI3r8E6TalY<+JgoLf><68SEa3Q zD!ZO>K8%pIfzNp`pEfyHLc{z2y6zg#xyfp0^Jt%|ZA!Sku$`kY3i>xWqh(>?tLg&- zJxmA?37lZ9qVp6k+*DMMb z@Uv{k27^B(@Cxq;gc!!DN16UOz|@EHw8%mPrp@_GNPByEFyL!!^}mm!4SrdX9PJJx zo!!evCHMavlv&)EJsG;m!;e*wUA0&19_m^3)$jbsr>1S9o-I!mw`xtSky|IC%{i}& zOXo%|@&EB)?IQGgMNWxqU(<41J24K~3ovZf_8L|3AYBZA=jS`?lTv8O<5H6ost~8m z2INTwL3iq(AS>Ee>OtHk)I8ARIs60H^sPY%;$i*&Y3LmW1X@l$;eIR^-w8DYGNe3R z=*VTDS7Oqg36t0eaf6muO!a+E7&Ja}mJg7Ox&T(RP_mEM&2mZ~);XrOkTK<;?~Sjc z*%Qd%zHiF&D>rk_3<#-?I3sl2DA6{GmOIn)dpG0DrcVv;ynZX_OmVU4N(_AvJo1UP zlRFzEP5DS(JkgyJ+gQ0qF19=B$J$y!+o3fNeQ{jW{S;sUUu%nhVYAbr};1IO%W5OUeMOgK6=^l240Hhs5 zdMGd6lrBG(^%S1Wc8JUyykBXy_FTQNZRe-WOMbfJlW$q#_|)&;h2OfLp4ArH)OU)^ z{=61&dT&@nBf@0_p;PMjJ#WBkQZrjChVjC$;gQ>qJsn*?I5NIJ38Q-b1J9y3MoC#D zBE=4Da>~D05j4KWuvkoBhv!snyB@j)+2oKLH$P)zL7d|i$D&U_viBg-+U?!0?^Bw|*4+zv7vAmId*eHO z^>3U09EvLK`Si-@y@w=yVvzWs4+#zO*(cr+#Llz+{_ibp8xbUlGouNUyO z2%8^g)a#UcUnfkv#-v0eB?e)jt|$~T)-Gz`DnSl69Mabok!GXo$WB$(ENQ8ngDMH3 zQA%7a9vQVRz&V$R9e$F=-!sB@no%+0AzH0Dz*WJH*o(LE`!3%KpXQTz0p674D1pRF zDDzYv_UHoO&Q;Y*>z=qA<9i12Dry$%#=h;Y=^uSeja4mI2EP9pH!gX(@P&r)O2+(x zvOa1UkD(726F&Ny8DH+{TfU;otYqn0Lk?bS^eOq)3n)YpYsJ`K)hcrOUIW7F{9007 z5Jnq%CM&R{)dL3W`iZTWzOs5ajNmGiS(QqN7;sMn?5h2r*w(UfJmqy)x5EU>b_Qp( z67;k~4Q4cPnz}U+i}2*Zuqs_H2!XL#U9v~ERcoq$#00EmjQR7*fYiXB6|!`T1v7zl)d@Eb zBr{(IvR=s+`M>rcS$(W~8r)Z93EbRT@FFj4D3wRTSL6dJW7at;>l~{X9?xcH2t1&| z>TaT(ZW6Nl(AxfN|KmqhEY}GszRDwJ9iIBJ_k;+eO-G!Vjv#UQHlBo1hLkh9lBndJ z7}Y<43n&wLgBgk^o~v|bcpOCWxEnMZkv!fhSz6Fd?ji@|9y#cBWbY#o_c4?-s4P$& zr=XjbwixnM9vCs3;A($>plO^NoqX8kX}&7%7EsUPW8hJlyG8v%b>R4zi1ksnJo)7E z@Klz&*}`$%hyet{Ch3HnC_pF@EToc0S4tsN%ygTyQQZv%OD9Yt2wPvMfPFZ zWcEXfa<7VTNX4^xL-0AZ&PgD<{6^}Te#dhZJO%IqszMJw(zAv6q~$78&rr@xkXaXB zv#@)2tMCn26?t{=b@l&Ibmn0(uKyo@W?!3`rlDzCOsh&V+Ds~*X`Kp9DUvX4T7*#{ z3C~RXKJ6<_`yx(AJ7-#?5~m17m=Zz=9r`*>=lA^noa>tBy6)$ix$paPfA07D^>!(= zJZOK;{Y||4UGH~Z+a%_`sCW9W&5%{*pV@o-^tUez#Ad(qS&h)&#?_@ganOm?XD{at z{LVd9r~e(QArFHj4L_X|;Nf9?Tnsk0B4{+}ZU|S*o*ZF&i?Na5iO9Ry`MpO<>^GJ4 zt1Tf^7sU8r5L`>VFh46Alb_qEl=@Px0{Q9=B2>oYzw{4bWLc*k*8_FQ<>!5WFUXY?#>yZkronjIY^%2Y5Af|%y|EN_0y1)m6|u!kpYDzX zi&_UIv>Y?Y)3udcv*;&jOz`By7KH)NVo>=S(P44JPVap^rSD_<*PgV82)%6Yf(&yR z%BBT%@!`Psmi3q3zW4bYekQM>GA}gN`8CCOhmNoP$xmv9W4vz);5)6)FI_kGjH^w0 zaqW56kg>(F2XAx&fQXjWzv|E18<6||7mGAb>@SahX0!m&=Ye(bZxwMF{N}9HN!Qar z>gXX4l}ccT*M83ph+sJ`7`Onlj3KJHtjDw;UYoq`w=z>ff}tlA8e;#; z&>Aaa9sP&6nRmf64PSp0==dZbhQRtVq;b5&P$v1F-d}$SX)JeM{@M>SM=6TGexw~r zHmJm|mK?$wA`V%L=?l=WWS%x#5}1xymnx>rryO6v;S<4-HemIj9Hs%{0Wot@?qm;Y zGGAECBiWo$Tx_33?|@D1fT|-cb|JqrkBw}F7-mkwJC2^h-O9?@2=+TfO!+*v&f#}3 zn6|U0@a}81H(ooG9emzlYJmyvfb|T=rea{BsPweo;+CTXO!sp8Wl`>ye7DMl1ampoH1! zubKHpk6^Er*+-TQu@@0+hQ!EX$xw`h=bd&Y#O@u~-s`{xC6xFyOe?rb@`q~AgQR#+ zX@Pf1IIyJ=Wt9j3%P#CmgrzWnO#@Lhkixg$(lW4pk;fMF1BVI-`YmQ!Ei#S`{@_@oucwht z&@NxyHh-nx7CB%dS-0G$Io78!e~g@oIglDu%>LtR)QNM|bt*`Nsvd&&TTbO-wcp4Y z6*((AHi7xg%TLm*7W$bS7*Z@AuXwiMPR`dg$cH0ztKJ0tvkoroex)Vemc{{R_kJAA zyDx%tT_BWbGcK{u@e8uiGl7yxOg*#Ph-r_dfq-Ha(vCz~20Li4@bub1$5ak#@EZc$ zBgnWvJC5FjJ1J+~__K_ySiMg8yrHyNB)9kMtA2kYa;JkO13j5(4ig%=3D$gbn}sLa zXWm=>*Gl_>+TME8>bt`sfxW)BM2{=cH?d#$yH8_EOl#%kRNUFuc!#uHpnXRk#MM^S zJH<>yFd-f@mdDgP1%IUL@$51_JUA~Iol09?{qB=Re{pbl<@(e0jz@HN^uZ)etsMw9 z+y3k)SdN&+BujqT_Z!+GjaK?_=&-}X9MUAnUT`%fH~#|x2oUIf!u$C|91SPpBqG)+ z5bM}R`kl~pl1Om(x;+zm_4%Xns*fq1=P2kVdjC|}Ty7NipGwRFq648OT6S6K$bfb5 zfOV#&<)bFMdD*4I(fd38-uC1T_je+$zKgr+0>2@oG7qS5#f-(~48JRE zftbigoF(*Y;rkhmh+oEHrA#r~f`{e8XKsKN59vDJ)v*pl1AV$H2pk!}F#uc)1YbZS zD?n;|unNqC#UtPc8B+*`Nq}bS^I30-=Q>M3Plf=i!G-_1rJtsI?s~(z>2c}wTkLmJ zgWsD@kvXTvFmi-m{7p;fZ)+2G%EzUFAA8PMe!g?{bM4QL{gv_i4>@Ex4n$4g)R|u$ z)@be1XhY=8WBl4~1RUbpMT=DqRfeW+X|xYsHy!P3{Ex@MrdGJ$O0Vwc%V&3NZW>iqo0!LoB#{oVNN z#3bLgJ__x^z6y+#uTAmqREgKcETpAFNe9<(HEY_YJFqcCaTOEcB<~dPJIIBs z!HoiD=V0)`wn`<8o5j{#J=acf*2|7qtS6AI^4*b>=)p|ZNwn)oUvLf#yhd+6L)Z6U z(AF3R56chAF5l36$?^h@VRvsLZy92Fr!N(wcDqT{$yXnaOb?e*+_5EynCx_eeHF(4eCcqQaAoNh)CVsw_+j%CpGT3anHOj zQ9Tedtsi{w)Ys0Nc8+o(cJ|wFO^26`I5Nb|4s%>Js&eT9C{3niOOmA-Ra^TxE(uzGklne~lUo3s{^BgosA41FTJ zPQ;=$!+QZFj0_#Gf?WO`cCvKsm!VjY0;xRAqnM3DkeovBl<(99rDCchfQBRFd9`a1p3ZFSd1<1mh5+!|Cj&}bcxB`OfGNqL?5lNydKKo#Hhp#_CL|Vb2TFU1V%H+(!VcVnLJoDf9`e00`mCC}^;S zVM_uz(x2z zLrBLedF_@?@dH?IbR}4xYw>9x##ko#h6trSR2kGoPDrqcfR9Ehgt8Y%YqV~%RafLV zEC)g|AmrL%2xCZ-tjz?_Tk$d=dK^--t&Yz%Nm_>MsV^A}upQN3~aV6!uNIrS?R6ZD4g&_M>Eu8Nx;#NYjdFzw`pQYj?oJPya)7Tg?Gd z_&+C~J-m;pBHx=UY=u_!pZQ6KYmKtX?SJ%ee_3_TOeN=E)bRIzA4t}b8HJQ*?B2ND z%i1PdFXZT`U5Qyo7qb(nE|X@QG@M|u**>r_M(esO+vorJmZGoCkvm?KRM~R-t!dj? z3>jjbpj5|6h+^4Rmu~k``$$OH)*oo1Kn!1Wy0n9TTC{ zdzcRR)|QY}7o$0$ zyh#XEDNm`5l*k;D}lP`2?9z!9gl?#I57=_8{-^#S`RAqZ`Ccu#`clD0N#0YG3p(`^U)h{nvb8r&)9PXiDZ0w>NiZ|>;6 zvr+X?=g@;cW-}Rp)01R%VrQUN-idQ|7*1IvGTuBZ#zI|lw&cRrvGW(&8eXr?DjJ@J zntYzr+1S#j@t_Zwwy*kjjIzKUPR<-2x>9YD9vgc^y~N}t{marIPvd2lhA)N&%9{zj;{K0sLls6P}+-MUoK16G8>hyj(;T=cYXt%5|SCmvBPIv8LO5ud=oD{A=XM>T$1)jVxR#x*eDSH zbf^yP*0R@&Ek;&tc*xV-lc)cRr{Qa6usQd?aY|anr%zbX!SQO~$}H>l7ZwZU&Js}; z<@B-IHSVYmq36i6)Dd6IsM(VGpjhL!*tp3{qo1eR5x|I%f88axrz2C}xing~K~eA; zZ9y&OdSI%BjHz^jBSg3@^ZF*lcAp@Xy7ewZ<`esoc!g~Va4?onwKYW#d(sx6-8u*= z2vScdnphB~SA@3EB_Iu{ZdHsnZ3?ze&2%GW*HQl#ePESM_mfe2*u)Z3XJ&o(4j0eOT!vV&RR8%+Z!Wn`lt{juB&9C^a z-*$04wL;EbLSd?9uzx`6C?^0_6q%grC)`5}?j0UN5b-EK0PPE5*LjoO#B7%U%RlAV zC57^5Vo#fljizsd;qmOSI26j@uL)xzd>BL(9m6le?>Th%VeN*vHv9hu zw+9SSejK4LLdY-~4f`0u1W?FXW$yx3JB$wsAXm&%z2G|#xwQc5wNEwFSfChI;adCD zkh4fMy#2{&>;R41@YjT_58xi9cLw(Jv_Ac0ulbKH_Rc!82tGnHZ@1=4ghnsJd-A6K zziD)9WzFiN8)F=|PPehH@(x`Vo6k+Q`+H_}?;f4p=%JFMy855D-GqwW;~N@{-_#q$ zn3RdvFiGkFY3L!&W(!Y@q0}5!am-=t39Q zDh3*X(QpL50kjw~D_vK0ksVcL;q=hMsGi|3`gEFHV}BG|Jhi96&<{Fvv;7I4)&*BkC~daS0$sK-hO1a7%2O z4uV!8qZR$dXbWIe!;xTB6ywz|s2JIXo@RoHjIu~4?t09CA=>;Zm;V5yOoyi<^aVba z*&~TU1G*N18#mBUyXGK_y41PbdKKoOS!Am!8@Kc)4_R)oEnsvmYvP&Si$$6mUHbd- zG_AfGID|499<5j2BB{{}0*)g?u4S361X#8vSa(KkK3(`YKto86Mm&z%(xK|hJ?kb& zHn-YfNJXaSemC>577;MV6fi510nXkp@Sf9 zDFq!aClOTEXC6KnsZNE#&WbFnh!;^`*G6v+P~8&{u&*JctI)MGaU%MKD4>?eH`#P@TjklH z4S&Q~dxVM%5b@DLKBrGoji}h!?b>3-HWmiq#tl(xRuMJ<^fzk8PZPyvtKE@ zvN6GItQUqFF2~m5XhiK?4zNWps<&N$bZkPjD<@TiAhd)?$GLsFpOpEtgi8viS1@Iq zkC|I~%-ni|7hFEt5ub1sp|d{1LuUNsJ=qme^Us^Me?s&}mTNHMH{B;{Jy@)$_;#Ox z)9EgwPj;%Fyk>kQZ#d_9gO8fsxwyiRY0W*Q#`~RcR!;&q`y^Mc+T1ffeq8{4Yp)1> zyBp>A)8p{u$$wS6Ro=tZ4z8f}GNXgAZdUm-tIJ;g*N@A{ZEQ3sBS1p5vz#&~#}>#4 zQGX#19_t-+FK~m54^|%?#k7Xx111@SwjY%9Bh(fi##oz(lnvxo|Iv)`2YQUtDF|{e zmUZ;vwjSg2Q2z~H-ake+clT5dGis~McYa@U;$(dL$pdvY0cx>*?P=>{q23Nitwz+mH+!=go=XHjwqrRP_xai% z$`z5#ffbjk_4J?wtACZRBc?8~5xwr`H{VbOWRAppt=)hBX5>X+W+`z{*yDe`y1QqEjq>)U4+1E%Hl5bDT_9g>kz8o%Z$+0+ zuK%kTEv83;z;792m*QnzA5{m3;9p<$wHw@DF}Y9YE|s;YY$m+C`8$#>ym)LKQV7sF zVFsbL@7^V?|9 z)@~FGf+w{5ai$MXPx#Qo& zVlAhv6Sb&NmeRUNzwt06h+HLtjF`(l}fxU@U1O(Az|NjCWzyFhA^Y@Vu#) zcqK(Pz<-)2n{wp%0$j?0Vd6eYvNF)pjfXANve!mfDS@x7yXV?J)h?F_@{o zUTaIXM)2?#rps+uf;vC^!C?E3$SGP9X^q=nzR=9_12JfS@e`TOUyWYUIkMSfU56xc z_ojshfOg-LhlXaNJ5WH!F|r!}5Pw0=!;mI49Xl0ZifKgcps30qjX_{1lXD5$$t+U` zHhH#eDCthxM`K;bFj<~{!oM4jQ1DWKdIBwX)LK)^#seq7xw{0H`^yX@jx7XG09yv_ z#_P2Ppb)lZ_mz-%sLw(oTRVfHT0s4}brpq5`;XiVd0>5F#Ak2z1MtR}neFPrUW@zS zqaC^T8LOv!t)EM8%m>bMr%OWTyshejKZmvNEAopd@(uMke`BXky|T!!)S$|sBD_9C z3K36O1dcL{`P#!ujD5Ovq%Jx$|A34-mLOe)q*Aw zWUl_l)_ynsqhu!cb>_P6i9J3TW!W@<%@PFe+`oB1hi!h!-z|@vm?)ssaD9K0uHWUn5`zQQ|7`kf~uWh0*T#11W@$WWeEL`n$K>ZgL|EPrWkHv;561yP}NHI z9-dXtwbkY;ofbSwWjFAIuYA{ZmSlCD3ni`We>?8;H2YoI^Psu3KaSMqCL4A6=6~~h zs}}sh|9`vQ_|5hF@9+odZc0g~B8`KgTeOJ!(TvvCvSVcrhwm`VuZ4Zmd8Y=qhLa-A zqL_`pD^O$zz)#Gm9RTq~JK9|E$@4k{f(272O^+?&jK(cEHM<~I>0B^D>#=(OUHsbK z56#Fucbh@rdiKUy?{9av2kc(6Cp91`jCv15nGB8tT0#(WftJV)1jK%&dOi|SvgP&7 zs>fYLqQpm}LzITjJ585QI0YsK-2QrX!pX>WlObl@9$|=mXF=~U1zQTNrGjh zW;c7^u|RTmq|nYzm3c2EW_ZpLz*-)5>nWR8vdGij8%ky8_i8YN5C->;Tr28Y#sU5Z zd>_85{1-UKAIBMP7-@6baD(q)l(B$c6j!g`6_2Uy<7|UIRGUpT&FLCU=;c} zB2zxSxW&qGm*M#khv#gnY}?I|zYLW)ZUCDNp;u+|+v>X39(ap%6MR^3Q8RgCR(6ei zSk%go3Bzh?oM3Wz;Ri4FRFM8~%pTi`E!LZyxdkLEX_gppY3jM*58KpJ-|s{j3J_I! z`4zfvU9IB+b&l_nSG-_1c$_oNir$9I6Zs*@8is%>y$@$95KT*G5;AQ0ZGLR4Ma9-oszKDM6@UzWR<(sOkiTYqJjs{#motGS)S zWPUu2z(ermDVzle89+;*JC7q$D;j{8+6at&YmzB5Fk1)jpyQ12`03k!_G&sr4u-}9 zL)whKM;m%+?Q6dl(s+LNzWLo+5HL>3(5G<#bRJ!dJ}9Ra3ir63j42M`4iwZecI0|@ zx$W)Fqh!hk0%rcs5(!5d${)RX8)@x*FWl5z=FqGK=~kc}Pj)|A1InHxjzu2(yJ_9? z7oUxndfu%ZYYBQ&j{%pGzD=4dLxxnxL7In|VE{Rf?eN#22-}scApdv);tIfIM{*?E zpNnEYU}HbL4_E~#<1A9}5^^o$!7)E z9)PK!YNt7jbzKUT-OvLNsSZ#eBaW6(^*_L4Ga#XEH3U$L-z7}A)4j2(<(i(Gwvzee zdDg}8B>k;bIFX@oU+y9I#5F2;2Nl#hQL@=Va*$d>-jJl}TGoN-=Pw_MDbXb;tH#-& zb*FWQS1(P3n7W4f(~E*e%8iWk`(p420fc<=LazJ}O5YKNTyu$!qF?PrXgzb;`f%it zZ}-OoZx$&nx7??{fJ-P_3vFEd0@kvzFC(Mn^sT6ur*-gOzx>_&cW5%OVz|8H_O=|I z9S6qyaCB^dnw*Wr=|A~hZ$IsFI5ju5R_e0FqN+KVzPG)3%@v^+vbe<%c}A9KQNSaF zv~Y*)n*VMI@jtTvhF3?==cCzY?NY1&bvSw2>Y8(XG!Dkw0tk5(7KuKMZX02i!K>%U*!Z~FN*qn-I1NxAwn+99GWzhev z*k;S+bt!U*~bLg!x!D#bN_m^Jj_qls6?r3+A2M3 z9xGAtf9Gh>cK*K|F*=JrOF-!oBcv`DS(kh2gMp#fIKFcHlw^qj6F?=cxGVwAl7@l7 zlv@F$1>DpX;x5Q>p+drA2c;fiXreq8qJ^$LiA}yndfrOvrg^MRst|GocwHF^R3Ntt zai>~w$pZWt1yV_9&-W*K0023m>21&7@QfxSk z88LB!n7UNMrj zWT2{olL&})La<#Kw}XaHTEgfG6Ldk8D}c7kN4pC47D1#=fY>P@MJ1E=1!wk*lZH~2 zrpAxI?ZfEFF~Q>)H#sJ>Aj4ILi&w1b)Pe+i8C{jG{o@g%`L5uqi?;iMpy{p)03zI^ z*bo6`ZztMEnB*?N9*D)d(|lIGhMf_4J&+|w7&4Fe5+s`m54RAyZbmlj)pBS8Jn3xbuZ55GXi4eI6f7GQ`K-%xh5po8z|U&9`J}cjQ5u#Tp_1@gQOZc zi66+kC0DuyDLqq2f@CNQ4sz0PZ_gCwBnG=xisnMN^9md!#1*oYPO)vvYPByNwqiYr zQ_Twq{Vs49$LXxnFkgHz`vsWZEf{xUPFh;hp3WSq04WkC*GZGdq{+CZ()&_rz5-wI z9$&fd+?Kb|Jy=}SI5u(|d!{7i7%jJpX5?oP^C!k1nsR^pG}p8sGN($d>JceIMp!OB zTFxd8C@6Oc_A@f2+tQN(DQG7UfnjUXI8b=C&$C+O4gi0KhNFZLkDKA)WOWE6zThZz zbBMT<(5>HjlWgJERuVtDP$nmJ3oHNbBIWF@l$mifmoPmvN)Lya#=)mS_;>|6kcKzI z<8O50Y9XA~+_np8Cw8P-;bd`FL$#@I1RZ98S}XcMNeQ2W2~t330epsa&fXHjYB&e! zV_tg0lUP1N&X-EFMKzbE$yM+1+&4Ac!k}|(oCw5S9>FSip*BcsH?$&aA*TtMlV7mX z&$W?TEzfJY+K(0KKd_>_VN>ADd71oV`8eSso7g8;8aAT5R8Zb5k;Y1o25vd3Bt%wo zcd+vjFh@6G9G{>_r{GVY0SGUoO5G5t2B62)lb*}85?wO7MEDeus1nXQVmB40HxN{vS<|7nL&@~1zOb*JLjh_aUK7==&OeW>J#3pl! zY5_tDgfCz<#TDTQ3Xy&kbV{yO*!r6p2 ztMlPid@+DvAN4bA2?y{=M zUA2#>z0!5gHN9hppWW8)S6|$1^j>`ywCzWVu7&r{>N=gSO`n&PJ}=didrl630NCWcLKo_47FBT12-(TeuD?xg^vFV*aN@$d64zbKQY&zd5p%@P{a^Jf) zRW1<{Mfgf~SmF|4QwBblWjqmpF9eldKuR^j$^s%jwYBjpo02Ca-eXqg%5fH=4ewUs z!h?X$XSYI7CO!@~gMl2r0Gq%ruo}I2$c8W~Ac)`%wFG;K7P3{=o2w{2rrx1w#z(2w z+NYN_Tch7Jr|{Ej-G1To1cb+^@*bd$G9x*!-nq;QPG8AQx$Ivs5$t#}_0TVp-(5n7 zf|B=<+KJveFaKk87N*x9x!?oD$K@OBy2COzNq<$Su?9jKDsb;*N?jsR$%oiY{MbO@ zo?H$w6(C$aBI_&1|Eb=~CnGJ<*#8oHyG4cX)9?v$()1GLi>Pp<qaWiG7T=MA+xMr&Oo@I^Gq*?%&qwvvgZj&IL`W(9rAUY&mQO=Tza zQ%dT`x(GLEU0)T{ujBP^#z*Hs%6lmVhQm%LWB1TP`X{>jX1k7j^Tw{It4eVT<4OzG ztHj6Yq{qhCU=Vq;5jFDc`mPFmC=qD5$MH7J!YT1sJEyi_kI)r;`XAkyxU`7Q{b|KYmt|GMFvyj_`i*(yNT1w3_?q7Eo} zZwsz;X5f2eN(aVBDMyJiQ7kkb)kwiO(j&5a};G!tsA?^E4Ph{RD7S<3j7_<<)YF?WsMH)5 z$jM5OO`K68Rc~S8<4>3{hZew6{NWM*BI*w z%cI0n+UrZS`4o;)u4q20eBQq7`Ht+>vRf~(1;3@39iPq|nen}ykUI3{5_$BH>6xFW zlrsJ5W<%fF7xwKdbAPX(C=`@uB2pQf*dZeI2}$iD(jfF@Mf!^=Xo0ry8o#B&bd!ETwI*=&G4s@w55>=fNrDI=lv|@ zg*12eZuv-(e)NUZ$j_vot1cDGPl>D@^Yx*{U9U>j~$d>lQXQHa~x__g_yv_~A>dQgSPErd26! z5AM(>hCC{}?ADy(?B!jX<8_yY!a4obY~rAT)F5p+`wu=vPWn?pse;h%3c_v~B2`%G zx#{n`_~&Ei|2EeAR5*s(eel!AxKGhSTmZN_z3a#50lO-bGuPjD`20#~>a`wttgoM# z`Wm1dZzhxh{S^S|4}jL6y~gI0$)!GVpH-F7jQ|HE3^228l(KnuuxnTrH0avgU81%+ zSEOvDX?36moLxoUAu$Yh*92$(LVr-X?_hR3+iXoVquQYoLhf8{TF@3Ggq^3mg-u>P z{iWSC{pYTkb_eN_H+~61AkjD)Qz!%=@in26>;O#P0hHWFHJaw`U2NRZk>2&s9rwE0 z-G9y~>lUSbUcW(YTcMREk%ha|0`{@ts_qJHg z-pSCk`6J-@Y|u#iQP-}T+5R<0i;qVpS9Sq8I)e*_pFz*MZIx zb5}2!=*8zx|NOf8FICIhNPoA` zjK(O8VCgrkdPskv^E|QE!TmA{53S5MbWD^w;Ef`hut}`C;u9~mam5$rHb3aMGhwpD ztwZ)7TW&sZBW-Mjg{lB@KJOkJ6+}4B3{2O~EQyuQY+!DbP6bNI7PQ&|=e4dzDs-{``R7S2 zdy>-U&iU?rrWNm5S)V7~52~%#Jl#88Uu4lXo|wOj>xn7ai#1-5Et_qwNmqV(LNTcY3M!r0s{*Sov$?=B&{-WOl%?>%`f#~H&lpL z3&Mx~x4y!^c@C@YN|_yyU}*yR%ryWnZ@E6K^RWfyeE~q~oL?Y8k2}$^CLs0wPiEja zswCeen;(>}WCqk0xUF`63%J)d*{%A+m{WWgzSe_m%M_FxO6eQ+6JNA0_ z77(Ku!=0u7TVGzPdSKT@=Miy?MwH&a$?6A2In_Now;_LpBb;Mh4qWKO3iUC88{Z-x3VaEbtqLGKUWX@>yu*ya=4~K+o zK)X&k#ps^d+`V55*WEMh%=1&9Kk`2s^kmOUEX5=-c4KDlr-g)FCSL7&xvo+9da4W2 zx!5@SBy)e%5)F z5-=V=Wr@VXVNbK`Q0PSulIh z1(dAq9Uyp4*)uv6iE2_s?M7yw9c-b}GSntG`KFl_z>V5DcFOdLp+Fl$73CSBV5Y9# zq9NcuGCn0C2f@Slqh7+f?U?O)U#zkhR`b8dp>7rTEoNKpXT)^gz2O}G&UkVhg|SkI zb%L)FUsg3WHMgRva03kOy9Q-!4Y*w&J>OFOEIYYd7mEj)s8Ue#fv4L=nms{RCZT@= zw8?gOx+KkJYXU=1|=XC5$0Oi^Ia4x@TX~-|e|s2lx^!N;ctl zDt0aF^SH_3=_-$Vl0VV@5FyAAb$j@KyEa;+Rk%;}ug#VkukhuU3Pt2#1M^buASL6X z@}uHpHfutUVR;PD{jwE1kt)fozT==9BB*b$v;1;tBHx++Da^$&chWjOa@Wr|=j@|G z6isshz}U8mOIWk`*S7tuAzMK4Pj9wPOBsmh{%3bJ1&VmRCgKck{1F!HW6hR08wE=6 z#+e8jEX7T|bo22R*o}P*pRaytDRIkhL_*ko_N#ifq|s=$xLZDCGm}f2Pt>)3#CN3Z zxmh%{bJsu?De_R$`w=oDfVkffMZYfCHT)|zZE6j;S5J;}bIeuFNF5vKMhiBbJdCe# zUEn>Q_p|yR(EjI*-OPSr%G`azNNeAxZ)HD+xNAq_raNmAj}B=pT6|D{w0W1#unbLS z_Tlx$#q>-Lay>l{z7A&`f6Dpna^=3W`5HbCn3}I40%*tLH~Lp>@F7R8b|ASn~D%=b`bQ3`h%+M zaUI|OW3jE4w!mY&0-XuZpM~l2@gWT-ml;(xi^s?A%m)WkcRCgu(@VP*mz3WHA3P^kX4BKAz#M!%X zRhwW6`QzAlF{bg!(;KnT2X9>Z`!$7wh%i?8qhdXA)sIR-l-qkdKjN350g)~JZ47xHXT|c+P}1}yis*NF-d>maBy^^N@T&7wCwhl z7s=Ok3%{1wm9Ht**l&8*O1j~8>ABbHv&>ryE`H|oi7%JUdu+L;m3AUN)B0}LkuCf) znvqL$w*uwK+AB*>92g<*jvaBl@Fx++vz>dCq!@p&9cBe+Ro|}X_v5kg_=6TJ>s)>! zhx38Nx(jUNTn_j@T@m8D3kY9WH~+daG>IA9zN&JqY}z4DOimm`u+oaHn)aI`cY{EYb4_nsrKOXzr89fcK}ElD-*&ApZ{x`8s`*K9K^ z&F{P7wv_)uxW(vf$I}5@lb`~Vdujzbx9;nvM6J|RjPyG7m3~yztRtty;8Q*>^@)Tj zCxj_)*h;H4w&54$Xc+Pf&8!?9HDgb=|M6+oK*_3QC zBVCk=^;*j3mkb0OCAzdGy7;0UXlVB()b`&F+gDI2;hLkJDC?30lWvq1l;9MMa*9WB zA(RsXX%&kKe~#F~R&h^F_7Y(aFAcg$+gxQf#@%GAl7vUAI>?!8RxbjsaJoTNnl0S0 zN{z}FMJvr!ZOr3cdJfsbu`e1a^thzAzUMJN1=++YO{)FzR~PE=~Puon;= z7LYV%qK7+rw;UVQnsRahd!#YBJzwj+c9K`><`g|lN-a7h^;%lRaB2ZRE#4+86C2{2 z904Uq;9YYWBVn*EBQ-fhnjEpf_pks_6=-yDyNR`<75@%?@**+*4pT1nos=KXZ!{}I z4eU32=2o*^aB1dSeomQg0GVR56}MN=oPiOuTUpYl%kf#}0pf>OJLE#+U>wLAB zqUWEDMq(VMkej(E+Xacb5D7AgS0d0>9P#Pj&Fg*9RO8mraCCr<3y+x)2Y03?i!S)$ zV#45@z@%i`cCRqL7sMZq;b*k)vnqyzhle{du%Upfl;IlPiH&BCO#B`$^u~tzCWovz zq8z~zZ+U4&g7s471V?3Z<=S3A<+zFMpu6^(Jbx5m|kI*PK zX3osEQg$Sz&^YD8SZPab_O|Nma0}DdbERJb7!_TmB1JM>KA5o+%x;LuG_koMvAa*^ zywLnF@d|gm&P}%Vq~j{OLLCh<(c8k1M1iC;h8Q_sbb2M_loTuU#=bAzk(xP{kvTj% z$Pd2Ezn_>KV(6Oxdw3!d9{qi)zU$-V!6Fk79fO{#$h48}G~;c2I9&21&Plb=vu{AW z?>EzgiBvEo+zF=)`%ybr9v4YDeYwV_x^#iuA5!~3lW}i*Kc!;A_KUWubJviGO_N@M z*UW27Xh4^#Y*THk)N!hOdx>m&W1_QQ_rLSj-2p|smFs#Uq$V3s7D$xLk`CfiF0A0r z#5xovYPrRZC7WQpU>Yzp34c*=uXn7&9h>E!ob8`X!AwNpC!`iQ+5v2|Y%~o9zI8jxH)v3;qh((D^|xG7{^Y0#~zn2-N3HB!-|Jr=8AX}8>^Y;w~on9-(+ zQ%TJkc~T*^*d?Di6VTS~e72yj>-?P8vf)O?tJ^z`eNK-dH$O$x5nT`=0$Y6fdiMRy z(*o=swf;F|=n`^IxCLzi;pJ~qSjHQun4ak3CJCgz=eKfOX(2uE%DPFpyqNSLUb$m< z_{W&hWZvC>Cyydxj~7f0)?ORh!PRv4Q~$4~eoSXex^nzph=wpWah@vQGR7~i)${L5 z>hH^&?k5ZDgS^~Z=$rACmJSoX7EA^^J?F0g@)Nl1I3G%PvMv36p zk{2-ZPlx@TLIex^suovV{iYbWFMt$ z%Z`m7V-bb1G7Dwy9m&@J1_Zt2U6K(Q}w1tn*Zy4H7YU>$2ch%u?xb%p)^l&$Km*6SO z>hH;z2Uokde(w&uj?YNjwGb>^;HC4@;*%&h;v-^bZdpY|<%DN2;S+n|HQz073pur| z92>#daDISYe#^W^baw+Qt@ocDnkOdL|A= zhK3eKhL&c=#)cLaMwV6<7N%sfow1?4g^|6Lg`1UyGuhPM*2>-9%FV&ljO<`?r87f=IG(z?&a?4=-_5$>mkbk1UqSj*{TNU7;n)rX6l&uyIA-*+WUDrhIl&q zdANsoy8Ah~ZFcvJb90WcG0U~KPBhbs4Nzo;5#s~Q0;rB5K2HAL9$~)j5y9@!{+`hx zUU7lW(IIw>AaZ7~b78#mo@ndhH2b~TSZCKjYo`zc{qRr^-$3^Onnyssr;odDtebb3 zeNcL+pHE0&Kunm=mf)bRo5SKa2L%Ofi4BR_x+Nw&Dmo)BBtJ1QZEH+sLfqb@xXjSt zojVc^?AltG5x*~s78jopwq+M1DJ>yw->&rZeLFJ_-c$RYx2%EB;RTZy?vTCbCLN_#QeCFu(N<)SX5ScsFuMjP1?yRKg_vsnEj=Q zH+z6{fKy-0sXAOGswm+e=kuED%6R3+8ykc*f`)D(|5Q``rPhYCRb^*8D$jKtztmaZ zf3~i_tLgIj=Bwu#ulBWE>1`b7si+Zlo$l_we70+-@8p%sgXb>vw{)J9R#vSYtGV4* zH9jcYL|*vT(R6q0()-ztzH8&H7e)uh?q9#V{P5wEsoQt&KY8nF+ z?|(M_+t~Q=ef`JRk3XKxE?yrw%#J~C0H4vfHfNGXID3_p!Y0M-H{Iw4AuU0@%j!0C z>*CqL*^0np$zF$XkGs331lvMRe(UQFy<=swW3(ms(#hIJ=aknir^kZto$%u9)NqR- zM|F4|%U!bj7943x>+~vdi+wlS7TH3by81n1OHWd}nB~)8K4%rty7RG50k+P1cdPK5 zUFoNmYei3)Ll!Gj%KBgl4q|r;b&P1cZJ4fT0EeRs4MH1Qq!GS%8uSkDAu6Vh-BZ2> zjx0olO;+^87XIpMO1o`+a;!h?ZQsCD!@%~!>h}Z251ueGz4p7dSgE%p^8QY0OL}w9 zB=U-pRdIc&^2x}{R0Z??KT_kk&J8bp_kX7f&#heh?P>nAXpfb3rca~&|2_r}C#^~! zHy;Vrw$6c@5L3KVh&ScSZ669@Mc^!5Pd=cmR7c(`Z7HLfwWPu5+)Jzuq?e7%2A<*Z#@Y%ZX7^_TAnY`Vj6!Q*qZYyI8JM~^O)(g#E6j??FcXLpV|fCF8k&a{aQ=Y z@@OYjUqya!te&rr5MD_uoHiTtZ*cnrKasSj-XTzZ?~-HS;mGM~r266Dxj1@>h8j%X zc5+R23+ypY(j_=8h)9aj3^GyZ*|o&eKJ3y&>`eLQA7F5LRdJWj%--sqz4hM=axF)H zZVBzY3P*%(f1>htZs_y6XUZEBBixwi?XKx*)lWQKpI=A{>L@Yt);O)bwLj$1gQ&&5 zf%VD(xvq5=eP`?5DJRoHjgl6gl)g^hCf)OBTYmo(Lfr?BUZ9sONni?gA0%w{QPmZZ zNQte;@cU+6C*b{S_k>(V=jzv}rxZj{_-4I741%r4ikd4Z%Nb3lOy7=gJ$G`Tt~1HE z?MXu&)vBVda7ey;mvV$rs7~RGNxIJFhgp#PezJ2ySCnk|y6YqRAr^v#qx*IaEWF5k zr2S`h_9P)6tvI1v^X*{nRd?+{A^NB*VU!Df{M3=F5<)~?Z}d(p>{K}f=P|Uq8Ox_E za{9;A^Zy#`P;VU%bl#{KdS3Q4bQpi$@StyTckQ_l|FKruO!Q-;djDLAj@U4AHG88r`~t!pP4#*`3AR2d@OfJta{^5*IVm? zh0&&_lMZ=~;pi-)AmvPh&?|ig^0kKcc{SaG??B$K7p;{=-rG?v95-Y>eY!K_i)Ppi zH=zG*a}`<5t6zNEYQz1VPkzL!zrIg}<-M6bdRO;Nt%131sHf}Q+Mt0==AU;T`MvAe z|0Fl<#jCjVq*=Lmnn~C52NS!g57Z849OvyGG3Uqs;g~}I6AxGqv(50C^DiQGL zqls;0EpKYrhjymm|HK5RbxlSNF|fPuEd9N+t8*DQ5%_9Ej>a|mi*6fQ5ZZ2VyysGD zvelS}+y0u3exR-MTwl=73gd#r#~}mgDVy)R)Oy?c;E^GvVQKt|h;rV^oI@3xa<4w$ zzG268dmJ3O5BGwzc6mbCXYyFqSK3#D?rJ$M>fkS)z^JwUq!gs5@ zVb^C%HJdG)07CHeM&3iEZ_cevtl1O6nmSR%Qz2WnDSDjOUj1JUS8`6}n+LTB`Zh_! z-ZUF-Jz~@maU<V)Bk>FFXr$d~ zm%84_vv#zz~S>L{I23!!leHxJry3RDEh6A27I${IVJm6bC#azuF;$( za&*@a85ak2e!hMs^%Iw0VeZsBmuK0}+nn?JaH{3S+u7&3RC=gY?pIG|C1cx_`s_wX&?R%q7XCZg@0d;v`RUlPi#tT+F&3QS$PIPNU+uG~w9TcxF7s7`Eo zuQiAQQ#M;5kM5AU7<^(nHB$&OlCe+CZqIz!;uD3r#aro++0#^)ptq@-Fi69Vx@|v4 zVH+B+p9VG;cX>OH*}Om^@0PJPORiSW9N8UT>+yAF_vc>4u!vV@Qf%L34AmG<@hp~% z`FG=PB7DmKkxlF!d{jD#{)WG$CGUauF)z?sp0UVQ>leYgoxWEev*Z}`CnAD#sfaXB z7A6~jlPErF;{t?zB?Ox=$yTINSWv%3TY`*GmZPAPm^T%!XN%&(_MWg_F=gk7VE2Us z)A~4c@AW^eJG#s^qmyfK4Bc%D1l9ISOEIS`;8B6-0gC3~hk<>G^{)T@3G%wM`S0t! zX$?JW;~vq6#lEnjj(jQn(l`U@f02 z-2K5JHjs44%nt#*un$cFvu!pJR-1|L2cl z(B4b_ClIE;e&g1px8Zv>YU>5;``W*a6B}YKi%XR6F^QX#P|g9D$31EA+orBlibU{r zas+TZP=t06`&x}E9?h=v(yn6qpyFSE2tUbpY}p|=k@>;sV#_N6_zRkS7etb^;GUaq zno?N&CFM=4{Xt$PP&)& z<%fy7h}ioLUJV~BN41`58S#Gm%Du?fHNVv&+SlNBwyfyK5szM@^A1}o-l)fPr<$Ei zmA-;nOQ3;U;8u@-)}MJzrVtnjA}89fv7VG-Vb#L1eW9GKlY%jJgOs*Z^XvkL3CVSP_T_ zrDm%$A*xK|6)8S*(PGY4tsS8b8e301uwX`8=g+B|Il(spg&3$uQCFbD<-kvBPFj8X zZY8KMaYqc=GZSe?W|x&*t0Vm&R*ZUZ>Pr42Aqg zGPiLMP!a=LJ%HRAt7DbldkP@`l(|&$NbAMDD2S~XJwq*~4B{z+xco& z_oi5$nH{<%W3vV%E*MZ2KtdN~o_S^vR4 z>vKsq)A#!xlj8RD8mU8WAA&&)%o(({6&$mqLes{hkH&g@kM=QL(a12|$27&HQX7Am zBGU-Sk?~!jOoRZ%4>?t!Eyz}+XE$bM!)Pp%A`W??+D4W_0XZ-U&@jkXiw6vXYsjbr zM*I24Cu@!m)}CUNJGr*}^|o1Ng20IZ2o2&TLC*mkN)g_M*t#Uafl{c`#vpLswB9MO z-l{aOs30Flfw<9;+kfR}5=vzB#0hG{4hk|IXlSAyjA9^3;_SpSd_gcHdokvLlDmPU zx51c04RzZgmNhR*#?1&M#Vzq3ZBzG_F8-{RNWVakuMmTM7f z4P0wR-PPLI)%{&YZ{m=+IK zr+Ufyf*Oz6P@y|A>ys}9Dn-lC2K{4Ziq#1yS!W`0Xh$d^I|D>!O64wuU@wFe1fie^ z=6>72CimZYu0aQ$O*NZLmfJe#izh0?`}muR&t3e3n{DiV%EUh~YqfE~Pg79VS6dXsfOTR9j(T!Q!$Uy;D2w4C#Hte@{-Z>GK#Fqi zho?}HL5wP)05ily>H*LM5c`58cRkHKtsSv@k8BJbcE}7KmCs|PY~%2W@=e0`kAy}s zo7LXs3WCotPr>5WO5>Qw3smei@dmbkO^yk}UV8>6u-vt>_gp6Oq3^Fg4EF$vfv%=O8)CR;v zDMBbXc(Vb)D#pg_Q$F|AbH9c)T+Z}xaPKx095D)u-68yINBt2ypI-)>vnOo+TSgE* z7p8(JPZMMYh`AubUIQ*s-pdXAk+TZLW-t*Uad1T{aQ^s#n~B5mW^J4K<&?*lJe&KY zhHB<_*4}d_TczyNON4j*{No|qG50B0eLAPPruUV+;4(m7rd{yuv%mH zi+?e$Jmy@Dxmip2uFFWU<}b!r<4+hVZXkWvKtLnjx(6R*hMyt8af=qvqbb9ONOn8O>JIBEEGm(LG_#h2`g%G*LS#2H~Ki;n{hFZKd z#*lDCuJ_^CPNI#Zcl73Ah2{8;Cxx>9(;_l50l2oY>)K^3_WYvU)n{^7DVGNROa@T* z!|2(YJvu$k^v7fR;~Ov7Jpv{_bcVklTfo*jW!5Y-kDVkQ*f9)vVy?SS)x^7AM@sW_ zsB+dt122BFmk4=JNcb0;_{S2ot@Hp9_-p)~=|aUx8vv5XAN=>}nN?6eO>Eq?V~d`g zT;+$!^GrBOYs6DYHA$>G&$cRav(8`CTe9nIx4fMfGg`qCCf+=D&Ld&$l1e9WPT|F*i4?pm~XpM)sjo%=bCsma^*> z2V2xu_I4cHraJ?TH@vD}mr*&D`$y?X>z+9}4Vkdkclj)KlV!ovg_G&hxkl0@@tWM# zMY;9IQ2Q(UTTQv6Pl4_X{wq0%H4SDZ%J!n84v8PS4fcD5&Wp&kThDeq+Q_asu`qM{ z-3-|^|85fC@(2hA(8r`W($DeS>47(gAv}K)A?)tz`>Ew{1L4O za3(}s04tEf`-1@p2#FG-3Z+n46K16Z*C~JnKD`ujt-sm%oONK09sjx~|N70Wng_UY zYYm77rtZ3PGn@u1l%Vebd9QLFJ|uC}1rS$KH|Yv`qYywlkX{{ILpu`wGH1$i>+qS? zN@yrHja?D8Qb|nDmP6>>Cj;_^oJ!9h(}ZNQ;h?6PB&j~%!~VQ>BRaag5bKj#^t~>`*j2` zCL5+o`rG``0c{C_Bf-s5@YY26ZW>BO0yvV;cSPMwRQ$9A7eJD#UDi6c2fN%eLfMhfF=0q!f?cMvYZFJKv>!F)VCH&D2 z3j%`m?86UX3hayud)d5gS*g8b;KH?Qg7+Um`QbGMjG2~+0Wwa~XCI2a0PdnCOnm(a zBg%FZAVif8+b6*-O3;2m5F#Bl1ITxQ5TXQ^_Y6&3JYeD(eFEdS=K(f4d>9XRcojM0Io22U?>pK5ySUN(bH1g3>|e(y?8*>eV_Il zyBnn^d%1=SBLPn}Aj(waOb_O~WU|c+>F4dX<+z=13OrMSJx@Dvf{qBJy__c@bL?~< zJI>lfil#Oqt(P<7y-8V{#MX|?cTN9!XN(I)=1%QR%UzlqdmdhmVq6=NU_S~rUNDg; z{-5IsNS6+n|At@55UJ}=mDW;x+x5{giq`~y@R$0qT zLdB0V#a4Y=gDoTp3C2@!&8A4G9Q-&B}c9`Obl)Yt7G28wPZGCm8qT+2n zZ_9<`i`!l;&qo}vwk^C~q?5x~Kh7 z5Ju0Ay7bq=tW#(ljmm1FWV7qC`x-MQvd>j#s8Fwp+e>ifE_PN%uB&X7i^bn|PUz++ z8WFOB+Cc!SRzzZ9ZVkef!z~}bYOuX9i4|WXH)r!LFD#a*$2t7kNb@A*ju>lF21L_* z>z2j$eCvVLQG4<#0WVuc`*3t13k@yP+g72Ed>l_$ayp^CTIC$+`Eh1&rOa@C$VTF# zinEarUN$73xn%9WUHZ$^!)AbS?%L;DE-#0Z8QwS13YukrFEj+`YRm}fy@|2dpT^_$ zxCuFpS;CYKj^2j26DpUuMj%i%HA?DyKGC~WvV>WFrPY>;Bl6K*jVI4CFN|=Y?v)1V zd@W+*c$2p|Q37C$%1CU@-9``rqiFbbla8T$pk=@(QXc|mzBtG>A+}$6q1%lsoyhn= z=c(X3QbnPV2FgoZxI=rodRX7&D0*0$-4SWR+!%+qCQHfLm3S%vaeU|K`d5tUZxxmD z9_+^t_WDlmEA0bM5TR&&-WsIzQCd-olOpqJj5BSMf3EYuF^gE&s07cOs_(Y#@fW*X zbM16b`d|N9_ig)@{M8dFiUVjUvPZ(kibx3U=pK4UHDlnHVh1s4eY7FFXI39p*EnJb z)6%U*YI7x@%QZSjA*L-+xgf}O4X8G07QDCOvR#`n6HT-=5i*FHwj2ya8Bs`36!G5d zM3%ve*{0%@?Z>MHk#O8VejY)k7;4QjM-iV$_{TFMq#G1-;n0QDy&x5y5g}8mGNkYW%)uf+@2n1 zW7f_|yIPC1{U8QCf3HBW3!q}^NxuC9iaK%{3Om8OWJw?K=$FDZ@6wP2?HT2HaS3s4 z*?~Iaz4Pc>AIjN{A6(ahllX*PF$4z}uKpT;aw%isCdZ-S?cq7b@L1yO@zLd5=jFG# zs207D4I`v%-i1Mg<}IMj>YzR@Wg%9h>m@dSzz3yUCIQqQfmlk4;)P2i_<#}Iu58$` zF47ozr~bH20e$=EB?$iYe0@5_Com;}Hyk}thFu3$sB&ZYY|)8qK?f8{V8jm_RC%Y7 z0JkNhFyA?rb}T6{gVtALw-PO07nZxN??jN zI69L6(vs$#6ENtcqI*hyy!Ym_%1aBo4}UEwa)IrY*6PMRn86MHzN>yxTw+;(!kv&J zY__%Q+Aih;W2pePgIezI(V?g;9nsS*#CL+6y`JR(I=CA5=x20fWzP=#cNC7#N`z)g zkHj};4@ZyUyY~>etp0+x9PaHMzI&g)e80-(*6FlRhxl9iu!hM+ow>m1+sOAnHp{Sj z%aC%93Isv$070HM{nY}Ju|1eDVT~G(0@E0$^Mv!3>W!pyjd6h8*Q6SmLO7=Uq*%i{ zYut9sA>*Oj4$X_t9ntigECsJg=#vLBzc^Fk$iHv4$h_2Svt{1-I;C`j$nkC)65qus zypaq0_7R)B*sk6uhTBdm^xu&Br(c#qr7QN(f3-&q57RkO6gt$ERC0@wrNW8mR3I{Z z?E6M=rUmq>`=oKaiFho_!55|?f9uu|>;~RmX7DURcYc7=<)hRlgO^D&~3lm2YwL z^;H$@O%#Y+JP0ir9J7)L)RAAtA~)Y`VgwlS?=Q?pRm;wwmbW9sx+*7ffR@ufj&@YZ zJ23@$Q~SoHoSXwJ`RbB+*G*9oc412sVHSC6lk~F4ja#Qx>vdAS>aPMvljxASogfxH z>qcE&QUKB49O2avla;$_5fXs(&c7V9+Nin2-~r|mxP_ge=-!~3vSx0no^2-_w+^Tz z#a&FCrUKuR+ag|Tftp=+$(uB7x=x=;=T|9MJ{BLJY`ZmwjiDQv4ge^UfPz@L1n3cb zTQhjbgjApTv*TQ2DOh4J)X&r+0EmA1vZVKRqmlVI73<*=CmP;i!~{=>sQqxkadY-e zTR4Bu4lPynhdI_;Vk_fbAvE6W;12a@oTJamHMP#|{^x_`MX%ZNTeikfheS6)h-90)2bqG{o-3?vgmk<#)*6%V3*;*Q`a1EijI9aobOV#S zbd_fDS@}P#d7wF@8ZvmZ?kPC1b8By;+0ij``5S-aUy6FOUrKTeVU#lc_e2k!J2GB{ zV~gfkE?U|OPDPShH1bMpg^s2bU|W#+ngxhmg%yv~RIdVpYGl+Zs3k`1nY6<7z^r%j z6nel%uo^50u=L&Vg!I7t%n>pdinmKSL(IY!L7Q*4+8%%+i_BU8qi!hB2(bNW>R-!= zS;C=gsol3_#r9<{cO!(>q*67Fh6-9QV&2cMw#G$vogwAIv^|S4F0L4>Z)OV){gdF zFRv*iSu%f1^kkmDvU3R(?|85zZmHx?ubYgIBRU*QAbNyS*pvBvzoUIyNgaqqz=PS5 zxHcrW3Rv{=%&)Q1W#M26XezMR5?;@s>!{N~t;O;zAqPGQ-mm2a#KWW3${Fh{rHysi zzB(S2P_nq6t0}zb=+}=JlnIb!MR5>!0wSY~S308Ic=o1FuTgb%TXsB>ECOpoE1>(~ zA?78nL1!X=cR|7P7DP@!!Cz<2x}CLWCs=?DbTF(R+%)OvL4d^;YtoD))*#fHSvgC@ z&(S1lq5cCfsNz@BU#Tp0g5@(1ygU-J9DnZBBK1Pr(cC}m&2NuVR^?+kh*XDwWX7pG)@JzUDglRBgnHa4E?rdc8{>7y(|eJ32Re2A;o1_A_h$o{#XNaq>VogOdVI@WVHZ2k|~tr zuAGFED7}1&1&z!m-65;YP^tF@HDgvNfp43(WXtK6nOrinMWI=!!GQ>t){6A@&EIU- zg?l!uw>7@rvsr+SRG(0C7>@%Hm%o*Kl|dHmGOL^ZtzUpb&&B(F0a14_rWS;Z9%P09 zf)=1{NImI2Wu864ms=gO7(7=JcpKMAB7w*vvYQx$_kcd2mu>W{sTk-eLPH3kb^_8Z z9>5U!*dB;312UnA;e#`3hoDsPMoETC*UW<()z$qOgD|8RM2Lai-eK0ac?Iiof8PQg zh5^%OLD^TJunua)L!JKv9^V{Z$p+^Sf^w^zBo1YJt+?$pu2*Yp#sx2R!8fl`=F93H zg10kjA!;O6f}gF2IP#B?>dST8*OHixl}OV2fxS=#TPe%Jg%j5!A3xa}mq7cozy6)5 zeaoZ_uWcA(nc+PDl8eP^5=wRzAvG2;ND(UvU78+`wWgH2$72NvNG%4YZa+_x2qMc^ zzsjK927tN*rhZx$Fy(Gq1ra?EPZ`!VXG&Ux%G&lsvMS>RShTE9e3GTMHYA8|vEr8_ zC6N8?bNr1Z#n76XoSvZ!0In1SpyL6hB2W`({NCq~A(`I4wW2!6{J;Bd{8w{wl!cXk z08IvE((A2g7_}OToP*v5O!{6{eo0cfALNfOuhV1Te;&Md+R|3e;b_QzDp9i!UN2q~ ze@kF~iOkt*YAuBl07w`ubhAJ0Jy@@OAU3!~`smzn+d>`g{KXs3JQP$N(In6ekOF#$#D7^|pO9{v#6EqdPUnA=og{X@l@??OmfjJ}xK!gC4(tlqQ zqb7kpN>C5{&AL9%{|PGm%mSDnpaIO*_T?LGS9uJ8%j=i@T?;6cL8#=r8*i(Lb|1?F zq;C9GK7lSBQ=)#z^`n* z+o8gJBhRa8=6v8XRtQjf5J@E1KL?wV`*1Xm+p`O7SxIO}L$c(;XULTUV~zN8XND6Z zQ_k&jxBSneUJol3XX7ZZ*}IpaHULzc3F*8JjU#T4qil}{Uw-q1t$ha+MBq-p@NDg? z+DSJ*u@|jU!0^*F6<8WZ0@NpqW6I#{G^yHg)grb|MTl?i`PU+-oB9-}xBj&J z%H^D^G$sZ^dp&7+0v}3o+2J<=(E*^yjb_m4@cf|+M@wOV0=Zf{?1tm?$hxQPI%r0H zap9*%TGX{WpF)US7QH(JbZB+{S!IU2%lOaAO$!3Le*`FEgB;0FwnSS`vGhYPM^l1U zIbipCv$=*eE-R><6m-|Y7_xuF^?4n#V2x+GlV?G;Xix#m<)8mHXJ6BKXCN5t5rI>! zz$tUkV!bqp%CT@^TS(0@hE>B-w$&oE&(Pi4<>Y-Ae#6*Vv0|^L;N;-7IQ5Jct09!BZ5$~pq$mywGD0Gi;??UVcP3uzLoLol&wfQ zJM|3Y&gL@(w{-TbR$q{`n0lkO_Grm8;tBLA-!RZRp$*DJtZbD2AU9l&-%MbRp5zpYCe-sh2 zN;t9tA?EOpDryDK0Akjfxkm8)zO10qP&vcGUobKg0_VqHE_(MwEkkY)B*!m=n9FdS zQhP8rp$w70+$e#P`{&a@12Sm9EcqA!Yp9HpV;r+6LKp>pi4Xc>bi5?a1sYkWd6yhA zXfW25t#UC7YF4Y^y&5*-IwoW8-YBV6D1a9 zUtnU;FZ-C;aYPtHK|Tmnn#}z8H>=`bfrhR?h>^+4V6L$`$`)YAtc ziW>y|JfysP=DByTZp*Ly1K>|i%bko#P_^oL6Y`m*!+@wQ0;&QJa%o=2fp0-avV0q| z-*~V7_{i_z6mWY(|49UoKB}qiVf^%82`fFl3A!@-`T{p&rQs62#_8?h=PT8}6oa0o zWe^ZlkgX<&t3L>ENIahK(QUXhA(sfo)zrzd1vH3L{{>$d-p((AAP zSg#+Y%dP(-g=$K->M$V+s{phY9S|rq$^RC($tqR9Vai%K8-SFoHA*cJDsZ~2TH^6!keJcr(lY_E_xmzc}> zb@L1=iLv`pPhC7NQ;}?|M9w5$dcXV2z2W;#IbZ+$l~w6R=lp|KumZ{y+!#en)=dOT zLDxkkD_$f4(O5N?GI}+Yi_~zANFwd&xP^uV$gc%`_D0||sf>lib_lB!VdB=4wK;79 zB4=Q}>e8DQe8LB&(rX`*IM5Y*D(6~5&J)K|QKup{NCl?_wl!7IdaiaHn zAX3rxu6ovy6*f}c?jXfy?6&F-;%5k&q(Y^_JW6C9P%K3v@l*0M@`kJY=#m8LBnL%B zVEwufhBNW@9nsNId#jwI+F!*uJxqfgb)2nC4U~88r=#pQmePFZ9U{YP?>bDo=u6-@ z6@YEgdy74u+&364fh%#>2}fnY^q!Hl@ZIkbD$(#<8hg)0)C+&3dusUm>{!tqk(gxy?x>~f6RfP1~e z=F7quhiS^_q}`%NeM5OD;w=verI8+Oz=Oxx8R;B<08)2xu_S=C_4gMhL_MCuI{v?Z zJ2~nuVGWNgni3{(w;ktraA>n{;qBt@}Tmmzpk zQq*JL)2U%WhMXV+jCvAIZ==E{jyO`HobS+%XbJDv*CgzP?$K;J`-{)P*-j5!Li2ni z?2f99lUjRj`v3Od^F8p2!~Lyc?-1(w1)pIGJ87H`z;1o3Rp8!}m*xULkv=|4;k*I! zY!d&D!AiAE0L|oD(mOckC4SE-viiC*8u$3*C~|oeFfoG49SPzpfChDK>u}#kSJkeN z3-pfRKmbmcAo9eh0E(fe^^>GGf+7Rma9^!Oda~1iKEA-?mDxS86%oONA{`ckZX76U zJok-Y^a`O&Lbq9%I8{JJvY{;i(a*J&GxcNPY2;eVgDjqfOeP&6WvdOYaZNn*@g1y2 z0$X}rpS&@babdqVma$n&jZp4`5z5!G=q`3z6%xa1~V_ zz0!1`FumHqVU0x)h~RH_DE$c%0eGPVb&iR=HUw2}Sf!%^T}I_IAaLlUM7u#BZSbTW ziXl>AM&O{le;Lo=bQw(4JyS!VLpEsxFmoxK`T&|gPuAHI5mTkwo*o$Tw))rwCHXAC zps?=%6{<_^eyS=$^M+bkass1ULQlulg`#{3ff|4iq3O*dH_o3pq~;x z&qALaf!u}rHxm|-&i`mNF^dv)oQS7f8k6ptcK(^Nl16qKsNU5Xmg@Y&yh7T#3xXvv zPOzN-4@_4qOuH*Csm~m$x-OB${;#ChtSKK~^HuAkZy_92Ro*!83$TUGBBPBA3JYVC zwq1Uu3_JMMR3lHyAujqti|81T;IH`05x;xzXhCT2xZ^YE<4Zw?1Y;3U9@XMx)0TAx zf$p@x_0vMLF>qe*o$VpCM10w@+5my0)*wM1^_oV9iw(81u}x0!6ajVNJVEk?qq`dzp$5E&NWb$MXnS1xB z2N8PuUTESTFxGnQE6#2a?o-#XS3d%~rK|VeN$%j@`$f0#N>rA3uo!Bqam?py zDj)x#e5_CUT>Xjzi;oGg)f8_~k+`Uet)MqHVLe{A%> zvZlY92ltjICXG3@+;x!s7N8{vE~3~Y2quD8=uE?D0EYB5SsPb*S_kbi{WzQ|mky$c zqgD^tL_Z{YL$aU>G&ZG;XX2uRyIHNIXo7T_KuZ8dB+DNw8}=j|4}PH?7NT(2y9-`Q z^ABuloW9|WQYk=Y(opIQh&Q23p5fg@WXa>W`z8Tj2LL|24Mu@uTgn4gLi~!jaE&RH z5c2hTxOG2F>kC^x4Km=LqD&c4(uPc29!=0-DK1FxK@bvU1W)lUT%qDuBnqpNDT=o` zp=R~@cJ*hF|NisVS%8q9jgSDDb=b#*2bI~+&A75hsu|^`9mUt);if$e7L67;_;KF% zk2x|pTf%&;WDO~30BHn5%V#)gw1vyh;qhRSilpjsNrr~^E$zBmrl`M_wc#J^-4%u0 zRyLwq*+wi2s5unyvp``t#2e=nO&E+>9s#k_JUOnUeRXk<_C(jWm^?~gQ+l*wAxn+8 z^m?!N0}p&ATm!!fpjSbnU__$@@)>_?*SD}UV!0DmaMUDKw0Hhc*)2dtf@<-$?$Wmr zr?~$ML&Fn3dyXhkSVR!?KP=S{LeN|mX~A2jbtS<^4dA2Kr_*F+TXMM%xyHw8!N-ci ziEQgIOTBA(wPZ_=f#Zkm4n$)|Dksv8W3|9G4}s}(NA1!|t`g|SD84dNGfdVHZA3lm zK}CCj3k8ZA-Xuf|b^m|mBV>MU67O^RV#?qsa<$F>@CG|AMs^57-8quc@9o6i+Uk%% zaPTIq@OOXbpIBJLK11eGscr!;m@(yZ)Z+y+h6S`h&ke=BK@-YG6KdK?X)Pn0q-_1V zk&(R;;5!b*Xg}vdi19Sya{b~Sr)$pn*ogJrDo4=8D@uU3GJUJs%BV%ZPl66hk8+b7 z?W@?DY99nk_0BZ6^R=*pIWDjxz0%FTrICva42#(0f=qq5Pw0;ky#;SQG&^zzGWCF^ zyC6f-AfzOiqYDc)X=+Bxj1Ee+e*lsBv!K?%0eDO}7lb8E+jOu}({Xn6}NmW^y$88sbb6aPi5X~T&;{{uDf`K{P)3>)>7&+bP@9#}PyQ*N{o5TgAXXosCJd+=2$H~5a-M;qCG>l2N78qh z1A6OT_!~8uKleeIvXqKkr6T)5`2q>904RwCAk;HwpD!Od+^vAYB?aG}nd24i7LK_YVQ8<&3|k*AnoAJkRmHRi30^3bZsd`i=KHG)sc>PGGAWp#o) zhz^s*NL=yP9Nk6^z}B_>@TMXJ5(E>E5`$QJY=E%pPz=|9A;UuAW3uYgJO@R8%V^p; zvYN%xSQ)jJVFzpBNL&PpE_<_N?8UD0F%oEYD-d>z69z|olLF7QAFfSIQc*A z_8<+_Oq#+jjo7b6BO}!-Rt||4!|&leFHWnBAY4oy5xH0~6-Is5<|#x}8-0E2W74&> zxYJuDKwlLK`5{`ePGcJjS;*|UML8qP(2`Qg6pt<@9H2~F$gg6)IeVuKy&bENR#FUjFBw*@dwT&EQp_X?fxCO!^cv;fOzX|_j;&q2DgP8 z6WS_k!q~v)CgP?liwD_lL7uEB(%;Hb2qG?J6q#$?G7_YOd9^PgZy!Wz>jklWK*=V$Sf2=~D?~kVpej)QU$#vG-VmOL z>ijFw^MD#YkQG|qLb#A2+D*87V&lQl4UMD`RHaB>MQ?R%&6<0`L_~8n6Jp=PFZ3iT z|4rji%6OzbN`bwnT_t0zJs1Jh4gv=gwSN@p@-gOLKhK3QIVnv z2t-7U3W|MF=!j7iQPE9k0wRluiak_O(G7}}WX2&aOx}O&f}7pVeGgATk>J5VP*{ z)TT)NP(n@}6)s~Kjd5*6z1wWs*S_epS7e{iNy9pa)51a6x!+?<_(M-M4qfYfjkc)9EjC$144*ylnMuYac5Z z-sVQS=t1<6c=sjxCWZxtLobvgx89puP(y8!CVv0gYs`}5zvLPe62b}*%B;$a({KHg zYt66s8iTBNiDBn3yU2cOXACgM<7B8Q!=^3>C)+E`KGc1xSm zEGQY~wceu3V!wFls?yw0*L&yv_HQ9^LVau`_-BB-4C6U3+Eh+rW0R5gn&o&tU~s6< z^N^(MxSuDZfBqV%udU>C7cy+bre;}ij~FIG#xk)vBdlzR4>{=@c8$m$3JO%ZKZoJ`yX`%E3sf&qVAcCwcboXP9+8TM zKV|^dGK}>aKtB!V-O2sCZTwNX%3!4T&L#2uzQ86J;WA>uE=r$+EX>{})E*nkmW7_% zCMk&ff33StTA=x9{2)-J6H zS9Kz5EoB5TrDGgqT=QpQ+kjs4aqWc*wbT&J`46DECxbNz0aZ1wJ-_^~yPhFiT&E%3 z6;F-%y@rJ-Fh}1rhwUz`Mpyjr^vwd93$TOjE?a93%fi?L&%DfR9fUon*FUHJw@`Kq9!!yL*)K)0U|)FYZHwT-*%mzvo~rL0`YNB^*L%-@)t;?K z9}HHgU8An#z5V=(eWWQ>kvGwWdud?5w447B3csW8l2Sgco{+Q?bS(n-BFhFMfo7gE z0(3jm8$$QidOLdMl1|1q5@XKE%47yte)ur+&V; z%6d%^@X9Ob`qh3tp&SMTyl(GD3;M-jmVy829R3aY#~c_G=8Q<0der# zt^9~g;RGQPILf6b^s!%7-du2Q;9#se=$zr*5!TCch^#fIk1|VDAjzy0KnY2wKXl`$ z$$)r_D&q6FIv)&t{?boIm+CX4HQ=*o&Rt(TyED@yqft?8f5NVQI zYB}^E{s(`^bfen+RP{fh+$PqyD&-W5OVl4o%Sorq)<*6?@<9#+OE!*Rc2(3LV{<)Pe7t`r&urY z|52g~>%3nY#NHkWul78PNT-~UTKFCqS-}^NGSIu~NYf=trpNRZE+9c(Y?AwMgXfu3 z8b>}LSuA~d?i7}-&NNAA9&Tpn)bi~PnKqt7#aJ)CjN$u?Uyv2gzwnyz*~kBNZKD3$ z@G{2@9~#ROHpEBD0SQx8-+chsuhfLe2@ zkjO&1@SVlEN!lqTr53?7YZdK^IGx;C42`3}hG;-MgmjG|aN+|tP9G2uI+t+CCeaAw z$Tug$VsbP+9DdfhFhdUN?wJm^k8>v_@m?~P751Cv7gWYwonZWTwvW*Q6S+2)ba{1M z7keA!&Jr=nbYh+ueOX=n_CzDM@jqryN~r6W(MH@%%43XrxT8?7h!S?7{Mtc}!!6-O zTheBaR3=+lTXuhi3uxhmL2^&DEg>0f-0P zAH}i(t?Qak$k-(yE9nFHDh@tG+XFtr?q^l;WhcM4gQ=OIi5sVj+F|G28?SSM#byg= zUw941I=`6>N`iwj7$~eVTRv0-R22go&q+^ZBjEp%5r9EQ|7^|_n_{i#tr?N-rF!KF zJ-tMeaYTQ|cv)t5&dHl|raR&el-s6&q~*yO_VOaEn0#pSz0cCfw&aMrYu8oSO=Qe7 z?$e7l>GVpxHQ?n~g_2%(z+P7a@t#H>YGy|`CdBLgot=NC+D}Qc_vcAv0M;^3K$Fso zT=L>64lnwcJ^Vkb8*Opc+s`YN6Kg?N3oC^TW^O$ZoZFw!FEyu*EgJTy8aUfF-cu;FBj+j-K{Pq4{iD+7C1V{T zM{x=r1RP3Qg0WKcUjKML&M8})Zno5M8&R25)`q!UK!CQHT4)1tMu&a6@2&7mz-{A4 zKg#3D3A-lnf47GUUvH--jXyZKXXk$UveQrFY`FC4MUei%*({r+UdBaRxj~~s&ui~M z#L{T>R*QhkFcO`{2AFg!LjhfSKX^rB)|8DLz9{@Este76R{Buo#i2z1Ap)f^0Z=U1Zo@V&TqNhqnh zXPM)V6XjdvZ_Hw*Wqro9DVtdFrk;Hg2EIFce@nM=2llaONzR+GtmyO|Ossn<}&Ox9-)*CSACM6d=s}*m|tv`=A=Qv`J5OLuUO9CGDK`vd-H? z3DZh>i;3QCXOuJ~zK-`3N+J5K^+u=fw71*y8P+QPI6Y=&P8%P%b>ih+Sa#ee<1a$rQpiis4MB7tGwE(&! zYz3yV=v+2GpmGW|K?v8SjAQ$ZYXM-!=4q}JjCvWPWpGMJkjbF$Px9g&FSUn5JWF6) zBVcf}CL6JFX|ImT0o&G(j{(gZk)nCmVQ2JH=<6c&rWJ+L#XqOa;sl0Oy{v=kla)%( z{anLJOT*>duSs0P$Svjx;xApCk>uYuT|mhBr&lvRBNvGGB>bLQv_9@;ntXA(&*y*l zY^3d*+#0Um^qzgC_IKUiGz6&mibP|1w3^h8qlSy=^aISzmPUn}15+oc9RR6E%DAe+ zWGgTmV8T2=pGP{h?bmOLd0gcXh{i8Gk>KqJ6E3UpwZMP1O5;2PpxJsici|U+bd%Uc zu4o%t2L8$wng9UQvSycqUvn55GX=5p18TK)3>E1zDNq4pm@xj#OMJAzq)~Oe?K9MM z0E$sTG01VOm_@8-<%@`$CV~^dP4C~DRSHOl`JVeGSj&~3e}Gf6P>cy5 zNJG{TrJy6=>ZZU1D4+lslX4d4fdY04Oa@=FL8TKnrV~>HIii>jLgZou(-prv)ROT6 zCcV%Q_H!o?H^UYx=}4!(qd?zBU{F5BirKPif4676fCO$PBbG5Fnau0MKSQ&3dxSmp zeA`6HOIMqM`>%cX{*4`BR8{`2PN-`7(fR!aV}~NAXMs(?UJYpqrG{HB4G|k{78|L> zMulSLJ%wQkXix(EL#O9u-D} z0vX6-AsV(;L7Gt{7btXFmQwUdi=xFQ&0Koh`Q-;5LL5!RdUc2wfHQk+_Cy-=v)I$q zz;JvSY5cH08(g&bOZ1y%hW7-9cH%E>Flm@-6vUkzdk_EqwF$qRxLLcLK&{qI;tH2q z4|-Wo5UcJbIkr%2j~itEm0$SRR{{zX4(5>B2Whh!W|fAFaE-Qt=1-M7HcgQ4sXV&> zUaWaXA&d29HMEa>ipSdA0zkLcUjM3Q9@B!=C2JrP&EKo?vXa-3sx}x4`I*jmfuah7 zDg@L9@B&}&Iz*pUkW2)6`3jmAd~+4hMOBnz0(v)}u3llP;!-2IW<-ZJYNuY0!uVJ( z1*&kk2%j=hGP}j*GaB9E7!%^f|DgrX21cPr>p$_(PlsMdF5zn8xSDe10()`#lZWZA z3k?0l26EP_l>&pR{QPAP$TCjBFYj;pPLaX~N)?VsUS4mdU*NiikOnuz!Pcfeu_yXg zDd3C;`CMLkQ`>9Zzr5lqE|Q>8PNM;_PDNw)5rv!WN0k<8&}bpJC&kjJ6D41V88kJ2 zMzgo + /// Returns a string describing version information of the TensorFlow Lite library. + /// TensorFlow Lite uses semantic versioning. + /// + /// A string describing version information public static string GetVersion() { return Marshal.PtrToStringAnsi(TfLiteVersion()); } From cf11e9577bf08f3946e1ea67109fe5d63eebd2fe Mon Sep 17 00:00:00 2001 From: Koki Ibukuro Date: Fri, 22 Nov 2019 11:42:50 +0100 Subject: [PATCH 0694/2522] Add options argument to the Interpreter constructor --- .../HelloTFLite/Scripts/HelloTFLite.cs | 7 +++-- .../TensorFlowLite/SDK/Scripts/Interpreter.cs | 30 +++++++++++++------ 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/Examples/HelloTFLite/Scripts/HelloTFLite.cs b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/Examples/HelloTFLite/Scripts/HelloTFLite.cs index 5b885f611c2..cf6f1a1ec6c 100644 --- a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/Examples/HelloTFLite/Scripts/HelloTFLite.cs +++ b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/Examples/HelloTFLite/Scripts/HelloTFLite.cs @@ -46,9 +46,10 @@ public class HelloTFLite : MonoBehaviour { void Start () { Debug.LogFormat("TensorFlow Lite Verion: {0}", Interpreter.GetVersion()); - interpreter = new Interpreter( - modelData: model.bytes, - threads: 2); + var options = new Interpreter.Options() { + threads = 2, + }; + interpreter = new Interpreter(model.bytes, options); int inputCount = interpreter.GetInputTensorCount(); int outputCount = interpreter.GetOutputTensorCount(); diff --git a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs index bd608501ddf..0429daddb8e 100644 --- a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs +++ b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs @@ -27,6 +27,17 @@ namespace TensorFlowLite /// public class Interpreter : IDisposable { + public struct Options: IEquatable { + /// + /// The number of CPU threads to use for the interpreter. + /// + public int threads; + + public bool Equals(Options other) { + return threads == other.threads; + } + } + public struct TensorInfo { public string name { get; internal set; } public TfLiteType type { get; internal set; } @@ -42,23 +53,24 @@ namespace TensorFlowLite } } - private TfLiteModel model; - private TfLiteInterpreter interpreter; - private TfLiteInterpreterOptions options; + private TfLiteModel model = IntPtr.Zero; + private TfLiteInterpreter interpreter = IntPtr.Zero; + private TfLiteInterpreterOptions options = IntPtr.Zero; - public Interpreter(byte[] modelData, int threads) { + public Interpreter(byte[] modelData): this(modelData, default(Options)) {} + + public Interpreter(byte[] modelData, Options options) { GCHandle modelDataHandle = GCHandle.Alloc(modelData, GCHandleType.Pinned); IntPtr modelDataPtr = modelDataHandle.AddrOfPinnedObject(); model = TfLiteModelCreate(modelDataPtr, modelData.Length); if (model == IntPtr.Zero) throw new Exception("Failed to create TensorFlowLite Model"); - options = TfLiteInterpreterOptionsCreate(); - - if (threads > 1) { - TfLiteInterpreterOptionsSetNumThreads(options, threads); + if (!options.Equals(default(Options))) { + this.options = TfLiteInterpreterOptionsCreate(); + TfLiteInterpreterOptionsSetNumThreads(this.options, options.threads); } - interpreter = TfLiteInterpreterCreate(model, options); + interpreter = TfLiteInterpreterCreate(model, this.options); if (interpreter == IntPtr.Zero) throw new Exception("Failed to create TensorFlowLite Interpreter"); } From 1bd27689edd5133d498ba94cc0d74b60709b2d30 Mon Sep 17 00:00:00 2001 From: Koki Ibukuro Date: Fri, 22 Nov 2019 20:23:32 +0100 Subject: [PATCH 0695/2522] Rename types and properties --- .../TensorFlowLite/SDK/Scripts/Interpreter.cs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs index 0429daddb8e..ccc64fbc385 100644 --- a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs +++ b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs @@ -40,9 +40,9 @@ namespace TensorFlowLite public struct TensorInfo { public string name { get; internal set; } - public TfLiteType type { get; internal set; } + public DataType type { get; internal set; } public int[] dimensions { get; internal set; } - public TfLiteQuantizationParams quantizationParams { get; internal set; } + public QuantizationParams quantizationParams { get; internal set; } public override string ToString() { return string.Format("name: {0}, type: {1}, dimensions: {2}, quantizationParams: {3}", @@ -168,7 +168,7 @@ namespace TensorFlowLite private const string TensorFlowLibrary = "tensorflowlite_c"; #endif - public enum TfLiteType { + public enum DataType { NoType = 0, Float32 = 1, Int32 = 2, @@ -182,12 +182,12 @@ namespace TensorFlowLite Float16 = 10, } - public struct TfLiteQuantizationParams { + public struct QuantizationParams { public float scale; - public int zero_point; + public int zeroPoint; public override string ToString() { - return string.Format("scale: {0} zero_point: {1}", scale, zero_point); + return string.Format("scale: {0} zeroPoint: {1}", scale, zeroPoint); } } @@ -253,7 +253,7 @@ namespace TensorFlowLite int output_index); [DllImport (TensorFlowLibrary)] - private static extern unsafe TfLiteType TfLiteTensorType(TfLiteTensor tensor); + private static extern unsafe DataType TfLiteTensorType(TfLiteTensor tensor); [DllImport (TensorFlowLibrary)] private static extern unsafe int TfLiteTensorNumDims(TfLiteTensor tensor); @@ -268,7 +268,7 @@ namespace TensorFlowLite private static extern unsafe IntPtr TfLiteTensorName(TfLiteTensor tensor); [DllImport (TensorFlowLibrary)] - private static extern unsafe TfLiteQuantizationParams TfLiteTensorQuantizationParams(TfLiteTensor tensor); + private static extern unsafe QuantizationParams TfLiteTensorQuantizationParams(TfLiteTensor tensor); [DllImport (TensorFlowLibrary)] private static extern unsafe int TfLiteTensorCopyFromBuffer( From 9a7508c9f9fce14586eff8e9a206903eb1501732 Mon Sep 17 00:00:00 2001 From: Koki Ibukuro Date: Fri, 7 Feb 2020 13:13:09 +0100 Subject: [PATCH 0696/2522] Fix string.Join method to work on Unity 2017.4 --- .../Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs index ccc64fbc385..6314d55d9f0 100644 --- a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs +++ b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ using System; using System.Runtime.InteropServices; +using System.Linq; using TfLiteInterpreter = System.IntPtr; using TfLiteInterpreterOptions = System.IntPtr; @@ -48,7 +49,7 @@ namespace TensorFlowLite return string.Format("name: {0}, type: {1}, dimensions: {2}, quantizationParams: {3}", name, type, - "[" + string.Join(",", dimensions) + "]", + "[" + string.Join(",", dimensions.Select(d => d.ToString()).ToArray()) + "]", "{" + quantizationParams + "}"); } } From 2629e4f9a4d982448494c880402f603cf559c488 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Fri, 17 Jul 2020 06:28:18 -0700 Subject: [PATCH 0697/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/de0c6bd56b41 PiperOrigin-RevId: 321767379 Change-Id: I362f79f706f850bb6ed0e93226544f38462e2916 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 119f5eae301..643ea4be71d 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "e297006d6f02f0f54a69223b98defde09c43158f" - LLVM_SHA256 = "cad40ccdb48efbe9f5bc093e4bfcffd305c66c7658aaab2bee5e0a22690f967d" + LLVM_COMMIT = "de0c6bd56b41081f1b89a1c7a0bf2597fd6d0104" + LLVM_SHA256 = "5f5b45ea3c7679c6b35a09d508235847a85f9e5a415cc4227ed96399d6fa5b82" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 98e49c770cbc6ecafb6bff2338e2138217c0fcfd Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Fri, 17 Jul 2020 20:29:15 +0700 Subject: [PATCH 0698/2522] Refactor tf_gcs_filesystem init --- .../experimental/filesystem/plugins/gcs/BUILD | 2 + .../filesystem/plugins/gcs/gcs_filesystem.cc | 96 +++++++++++++++++-- .../filesystem/plugins/gcs/gcs_filesystem.h | 15 +++ 3 files changed, 106 insertions(+), 7 deletions(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/BUILD b/tensorflow/c/experimental/filesystem/plugins/gcs/BUILD index 28371eecaf7..b2636571c25 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/BUILD +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/BUILD @@ -25,7 +25,9 @@ cc_library( "//tensorflow:windows": get_win_copts(), }), deps = [ + ":expiring_lru_cache", ":gcs_helper", + ":ram_file_block_cache", "//tensorflow/c:env", "//tensorflow/c:tf_status", "//tensorflow/c/experimental/filesystem:filesystem_interface", diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc index 9be93c4fbf4..34add78f54c 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc @@ -28,6 +28,27 @@ limitations under the License. // This filesystem will support `gs://` URI schemes. namespace gcs = google::cloud::storage; +// The environment variable that overrides the block size for aligned reads from +// GCS. Specified in MB (e.g. "16" = 16 x 1024 x 1024 = 16777216 bytes). +constexpr char kBlockSize[] = "GCS_READ_CACHE_BLOCK_SIZE_MB"; +constexpr size_t kDefaultBlockSize = 64 * 1024 * 1024; +// The environment variable that overrides the max size of the LRU cache of +// blocks read from GCS. Specified in MB. +constexpr char kMaxCacheSize[] = "GCS_READ_CACHE_MAX_SIZE_MB"; +constexpr size_t kDefaultMaxCacheSize = 0; +// The environment variable that overrides the maximum staleness of cached file +// contents. Once any block of a file reaches this staleness, all cached blocks +// will be evicted on the next read. +constexpr char kMaxStaleness[] = "GCS_READ_CACHE_MAX_STALENESS"; +constexpr uint64_t kDefaultMaxStaleness = 0; + +constexpr char kStatCacheMaxAge[] = "GCS_STAT_CACHE_MAX_AGE"; +constexpr uint64_t kStatCacheDefaultMaxAge = 5; +// The environment variable that overrides the maximum number of entries in the +// Stat cache. +constexpr char kStatCacheMaxEntries[] = "GCS_STAT_CACHE_MAX_ENTRIES"; +constexpr size_t kStatCacheDefaultMaxEntries = 1024; + // How to upload new data when Flush() is called multiple times. // By default the entire file is reuploaded. constexpr char kAppendMode[] = "GCS_APPEND_MODE"; @@ -82,6 +103,30 @@ static void MaybeAppendSlash(std::string* name) { name->push_back('/'); } +// A helper function to actually read the data from GCS. +static int64_t LoadBufferFromGCS(const std::string& path, size_t offset, + size_t buffer_size, char* buffer, + gcs::Client* gcs_client, TF_Status* status) { + std::string bucket, object; + ParseGCSPath(path, false, &bucket, &object, status); + if (TF_GetCode(status) != TF_OK) return -1; + auto stream = gcs_client->ReadObject( + bucket, object, gcs::ReadRange(offset, offset + buffer_size)); + TF_SetStatusFromGCSStatus(stream.status(), status); + if ((TF_GetCode(status) != TF_OK) && + (TF_GetCode(status) != TF_OUT_OF_RANGE)) { + return -1; + } + int64_t read; + if (!absl::SimpleAtoi(stream.headers().find("content-length")->second, + &read)) { + TF_SetStatus(status, TF_UNKNOWN, "Could not get content-length header"); + return -1; + } + stream.read(buffer, read); + return stream.gcount(); +} + // SECTION 1. Implementation for `TF_RandomAccessFile` // ---------------------------------------------------------------------------- namespace tf_random_access_file { @@ -290,11 +335,53 @@ uint64_t Length(const TF_ReadOnlyMemoryRegion* region) { // SECTION 4. Implementation for `TF_Filesystem`, the actual filesystem // ---------------------------------------------------------------------------- namespace tf_gcs_filesystem { -// TODO(vnvo2409): Add lazy-loading and customizing parameters. // TODO(vnvo2409): Use partial reponse for better performance. // TODO(vnvo2409): We could do some cleanups like `return TF_SetStatus`. // TODO(vnvo2409): Refactor the filesystem implementation when // https://github.com/googleapis/google-cloud-cpp/issues/4482 is done. +GCSFile::GCSFile(google::cloud::storage::Client&& gcs_client) + : gcs_client(gcs_client), block_cache_lock() { + const char* append_mode = std::getenv(kAppendMode); + compose = (append_mode != nullptr) && (!strcmp(kAppendMode, append_mode)); + + uint64_t value; + block_size = kDefaultBlockSize; + size_t max_bytes = kDefaultMaxCacheSize; + uint64_t max_staleness = kDefaultMaxStaleness; + + // Apply the overrides for the block size (MB), max bytes (MB), and max + // staleness (seconds) if provided. + if (absl::SimpleAtoi(std::getenv(kBlockSize), &value)) { + block_size = value * 1024 * 1024; + } + if (absl::SimpleAtoi(std::getenv(kMaxCacheSize), &value)) { + max_bytes = static_cast(value * 1024 * 1024); + } + if (absl::SimpleAtoi(std::getenv(kMaxStaleness), &value)) { + max_staleness = value; + } + + auto gcs_client_ptr = &this->gcs_client; + file_block_cache = std::make_unique( + block_size, max_bytes, max_staleness, + [gcs_client_ptr](const std::string& filename, size_t offset, + size_t buffer_size, char* buffer, TF_Status* status) { + return LoadBufferFromGCS(filename, offset, buffer_size, buffer, + gcs_client_ptr, status); + }); + + uint64_t stat_cache_max_age = kStatCacheDefaultMaxAge; + size_t stat_cache_max_entries = kStatCacheDefaultMaxEntries; + if (absl::SimpleAtoi(std::getenv(kStatCacheMaxAge), &value)) { + stat_cache_max_age = value; + } + if (absl::SimpleAtoi(std::getenv(kStatCacheMaxEntries), &value)) { + stat_cache_max_entries = static_cast(value); + } + stat_cache = std::make_unique>( + stat_cache_max_age, stat_cache_max_entries); +} + void Init(TF_Filesystem* filesystem, TF_Status* status) { google::cloud::StatusOr client = gcs::Client::CreateDefaultClient(); @@ -303,12 +390,7 @@ void Init(TF_Filesystem* filesystem, TF_Status* status) { return; } - const char* append_mode = std::getenv(kAppendMode); - bool compose = - (append_mode != nullptr) && (!strcmp(kAppendMode, append_mode)); - - filesystem->plugin_filesystem = - new GCSFile({std::move(client.value()), compose}); + filesystem->plugin_filesystem = new GCSFile(std::move(client.value())); TF_SetStatus(status, TF_OK, ""); } diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.h b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.h index 93862f4a871..93f5d99d5da 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.h +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.h @@ -17,6 +17,8 @@ #include "google/cloud/storage/client.h" #include "tensorflow/c/experimental/filesystem/filesystem_interface.h" +#include "tensorflow/c/experimental/filesystem/plugins/gcs/expiring_lru_cache.h" +#include "tensorflow/c/experimental/filesystem/plugins/gcs/ram_file_block_cache.h" #include "tensorflow/c/tf_status.h" void ParseGCSPath(const std::string& fname, bool object_empty_ok, @@ -45,10 +47,23 @@ uint64_t Length(const TF_ReadOnlyMemoryRegion* region); } // namespace tf_read_only_memory_region namespace tf_gcs_filesystem { +typedef struct GcsFileStat { + TF_FileStatistics base; + int64_t generation_number; +} GcsFileStat; + typedef struct GCSFile { google::cloud::storage::Client gcs_client; // owned bool compose; + absl::Mutex block_cache_lock; + std::shared_ptr file_block_cache + ABSL_GUARDED_BY(block_cache_lock); + uint64_t block_size; // Reads smaller than block_size will trigger a read + // of block_size. + std::unique_ptr> stat_cache; + GCSFile(google::cloud::storage::Client&& gcs_client); } GCSFile; + void Init(TF_Filesystem* filesystem, TF_Status* status); void Cleanup(TF_Filesystem* filesystem); void NewRandomAccessFile(const TF_Filesystem* filesystem, const char* path, From b14f33c0bb845f9eeb59bcafff99469867284f87 Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Fri, 17 Jul 2020 10:25:11 -0400 Subject: [PATCH 0699/2522] Add common tags and update benchmarks to keep consistency --- tensorflow/python/keras/benchmarks/BUILD | 55 +++++-------------- .../mnist_hierarchical_rnn_benchmark_test.py | 1 - .../mnist_irnn_benchmark_test.py | 1 - 3 files changed, 15 insertions(+), 42 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/BUILD b/tensorflow/python/keras/benchmarks/BUILD index da88f3ecafd..a6afb2e8d85 100644 --- a/tensorflow/python/keras/benchmarks/BUILD +++ b/tensorflow/python/keras/benchmarks/BUILD @@ -32,15 +32,17 @@ py_library( ], ) +COMMON_TAGS = [ + "no_pip", # b/161253163 + "no_windows", # b/160628318 +] + py_test( name = "keras_cpu_benchmark_test", size = "large", srcs = ["keras_cpu_benchmark_test.py"], python_version = "PY3", - tags = [ - "no_pip", # b/161253163 - "no_windows", # b/160628318 - ], + tags = COMMON_TAGS, deps = [ ":benchmark_util", "//tensorflow:tensorflow_py", @@ -53,10 +55,7 @@ cuda_py_test( size = "medium", srcs = ["eager_microbenchmarks_test.py"], python_version = "PY3", - tags = [ - "no_oss_py38", # b/160170347 - "no_windows", # b/160269052 - ], + tags = COMMON_TAGS, deps = [ "//tensorflow:tensorflow_py", ], @@ -85,10 +84,7 @@ py_test( size = "medium", srcs = ["keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py"], python_version = "PY3", - tags = [ - "no_pip", # b/161253163 - "no_windows", # b/160628318 - ], + tags = COMMON_TAGS, deps = [ ":benchmark_util", "//tensorflow:tensorflow_py", @@ -99,10 +95,7 @@ py_test( name = "text_classification_transformer_benchmark_test", srcs = ["keras_examples_benchmarks/text_classification_transformer_benchmark_test.py"], python_version = "PY3", - tags = [ - "no_pip", # b/161253163 - "no_windows", # b/160628318 - ], + tags = COMMON_TAGS, deps = [ ":benchmark_util", "//tensorflow:tensorflow_py", @@ -113,10 +106,7 @@ py_test( name = "antirectifier_benchmark_test", srcs = ["keras_examples_benchmarks/antirectifier_benchmark_test.py"], python_version = "PY3", - tags = [ - "no_pip", # b/161253163 - "no_windows", # b/160628318 - ], + tags = COMMON_TAGS, deps = [ ":benchmark_util", "//tensorflow:tensorflow_py", @@ -127,10 +117,7 @@ py_test( name = "mnist_conv_benchmark_test", srcs = ["keras_examples_benchmarks/mnist_conv_benchmark_test.py"], python_version = "PY3", - tags = [ - "no_pip", # b/161253163 - "no_windows", # b/160628318 - ], + tags = COMMON_TAGS, deps = [ ":benchmark_util", "//tensorflow:tensorflow_py", @@ -142,10 +129,7 @@ py_test( name = "mnist_hierarchical_rnn_benchmark_test", srcs = ["keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py"], python_version = "PY3", - tags = [ - "no_pip", # b/161253163 - "no_windows", # b/160628318 - ], + tags = COMMON_TAGS, deps = [ ":benchmark_util", "//tensorflow:tensorflow_py", @@ -156,10 +140,7 @@ py_test( name = "mnist_irnn_benchmark_test", srcs = ["keras_examples_benchmarks/mnist_irnn_benchmark_test.py"], python_version = "PY3", - tags = [ - "no_pip", # b/161253163 - "no_windows", # b/160628318 - ], + tags = COMMON_TAGS, deps = [ ":benchmark_util", "//tensorflow:tensorflow_py", @@ -170,10 +151,7 @@ py_test( name = "reuters_mlp_benchmark_test", srcs = ["keras_examples_benchmarks/reuters_mlp_benchmark_test.py"], python_version = "PY3", - tags = [ - "no_pip", # b/161253163 - "no_windows", # b/160628318 - ], + tags = COMMON_TAGS, deps = [ ":benchmark_util", "//tensorflow:tensorflow_py", @@ -185,10 +163,7 @@ py_test( name = "cifar10_cnn_benchmark_test", srcs = ["keras_examples_benchmarks/cifar10_cnn_benchmark_test.py"], python_version = "PY3", - tags = [ - "no_pip", # b/161253163 - "no_windows", # b/160628318 - ], + tags = COMMON_TAGS, deps = [ ":benchmark_util", "//tensorflow:tensorflow_py", diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py index 3fadc228481..82cbe560bb4 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py @@ -121,4 +121,3 @@ class HierarchicalRNNBenchmark(tf.test.Benchmark): if __name__ == '__main__': tf.test.main() - \ No newline at end of file diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py index 8b8d838bddc..8d6f229669d 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py @@ -125,4 +125,3 @@ class IRNNMnistBenchmark(tf.test.Benchmark): if __name__ == '__main__': tf.test.main() - \ No newline at end of file From a5d1692eae9d9829eb25d9c32d39737208f7e927 Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Fri, 17 Jul 2020 10:32:41 -0400 Subject: [PATCH 0700/2522] Update dist-strat as off. --- tensorflow/python/keras/benchmarks/benchmark_util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/benchmark_util.py b/tensorflow/python/keras/benchmarks/benchmark_util.py index 7f5d51fbb3a..a641923519b 100644 --- a/tensorflow/python/keras/benchmarks/benchmark_util.py +++ b/tensorflow/python/keras/benchmarks/benchmark_util.py @@ -57,7 +57,7 @@ def measure_performance(model_fn, metrics=None, verbose=0, num_gpus=0, - distribution_strategy='mirrored'): + distribution_strategy='off'): """Run models and measure the performance. Arguments: @@ -81,7 +81,7 @@ def measure_performance(model_fn, num_gpus: Number of GPUs to run the model. distribution_strategy: Distribution strategies. It could be `multi_worker_mirrored`, `one_device`, `mirrored`. If unspecified, - `distribution_strategy` will default to 'mirrored'. Note that, `TPU` + `distribution_strategy` will default to 'off'. Note that, `TPU` and `parameter_server` are not supported yet. Returns: From 18daf9024016adcd9158462bb2e43a5b50c373bd Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Fri, 17 Jul 2020 22:28:50 +0700 Subject: [PATCH 0701/2522] Refactor NewRandomAccessFile --- .../filesystem/plugins/gcs/gcs_filesystem.cc | 103 ++++++++++++++---- 1 file changed, 81 insertions(+), 22 deletions(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc index 34add78f54c..f3985808900 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc @@ -130,10 +130,28 @@ static int64_t LoadBufferFromGCS(const std::string& path, size_t offset, // SECTION 1. Implementation for `TF_RandomAccessFile` // ---------------------------------------------------------------------------- namespace tf_random_access_file { +using ReadFn = + std::function; typedef struct GCSFile { - const std::string bucket; - const std::string object; - gcs::Client* gcs_client; // not owned + const std::string path; + const bool is_cache_enable; + const uint64_t buffer_size; + ReadFn read_fn; + absl::Mutex buffer_mutex; + uint64_t buffer_start ABSL_GUARDED_BY(buffer_mutex); + bool buffer_end_is_past_eof ABSL_GUARDED_BY(buffer_mutex); + std::string buffer ABSL_GUARDED_BY(buffer_mutex); + GCSFile(std::string path, bool is_cache_enable, uint64_t buffer_size, + ReadFn read_fn) + : path(path), + is_cache_enable(is_cache_enable), + buffer_size(buffer_size), + buffer_mutex(), + buffer_start(0), + buffer_end_is_past_eof(false), + buffer(), + read_fn(std::move(read_fn)) {} } GCSFile; void Cleanup(TF_RandomAccessFile* file) { @@ -141,30 +159,50 @@ void Cleanup(TF_RandomAccessFile* file) { delete gcs_file; } -// TODO(vnvo2409): Adding cache. +static void FillBuffer(uint64_t start, GCSFile* gcs_file, TF_Status* status) { + gcs_file->buffer_start = start; + gcs_file->buffer.resize(gcs_file->buffer_size); + auto read = + gcs_file->read_fn(gcs_file->path, gcs_file->buffer_start, + gcs_file->buffer_size, &(gcs_file->buffer[0]), status); + gcs_file->buffer_end_is_past_eof = (TF_GetCode(status) == TF_OUT_OF_RANGE); + gcs_file->buffer.resize(read); +} + // `google-cloud-cpp` is working on a feature that we may want to use. // See https://github.com/googleapis/google-cloud-cpp/issues/4013. int64_t Read(const TF_RandomAccessFile* file, uint64_t offset, size_t n, char* buffer, TF_Status* status) { auto gcs_file = static_cast(file->plugin_file); - auto stream = gcs_file->gcs_client->ReadObject( - gcs_file->bucket, gcs_file->object, gcs::ReadRange(offset, offset + n)); - TF_SetStatusFromGCSStatus(stream.status(), status); - if ((TF_GetCode(status) != TF_OK) && - (TF_GetCode(status) != TF_OUT_OF_RANGE)) { - return -1; + if (gcs_file->is_cache_enable || n > gcs_file->buffer_size) { + return gcs_file->read_fn(gcs_file->path, offset, n, buffer, status); + } else { + absl::MutexLock l(&gcs_file->buffer_mutex); + size_t buffer_end = gcs_file->buffer_start + gcs_file->buffer.size(); + size_t copy_size = 0; + if (offset < buffer_end && gcs_file->buffer_start) { + copy_size = (std::min)(n, static_cast(buffer_end - offset)); + memcpy(buffer, + gcs_file->buffer.data() + (offset - gcs_file->buffer_start), + copy_size); + } + bool consumed_buffer_to_eof = + offset + copy_size >= buffer_end && gcs_file->buffer_end_is_past_eof; + if (copy_size < n && !consumed_buffer_to_eof) { + FillBuffer(offset + copy_size, gcs_file, status); + if (TF_GetCode(status) != TF_OK && + TF_GetCode(status) != TF_OUT_OF_RANGE) { + // Empty the buffer to avoid caching bad reads. + gcs_file->buffer.resize(0); + return -1; + } + size_t remaining_copy = + (std::min)(n - copy_size, gcs_file->buffer.size()); + memcpy(buffer + copy_size, gcs_file->buffer.data(), remaining_copy); + copy_size += remaining_copy; + } + return copy_size; } - int64_t read; - if (!absl::SimpleAtoi(stream.headers().find("content-length")->second, - &read)) { - TF_SetStatus(status, TF_UNKNOWN, "Could not get content-length header"); - return -1; - } - if (read != n) { - TF_SetStatus(status, TF_OUT_OF_RANGE, "Read less bytes than requested"); - } - stream.read(buffer, read); - return read; } } // namespace tf_random_access_file @@ -407,8 +445,29 @@ void NewRandomAccessFile(const TF_Filesystem* filesystem, const char* path, if (TF_GetCode(status) != TF_OK) return; auto gcs_file = static_cast(filesystem->plugin_filesystem); + bool is_cache_enabled; + { + absl::MutexLock l(&gcs_file->block_cache_lock); + is_cache_enabled = gcs_file->file_block_cache->IsCacheEnabled(); + } + auto read_fn = [gcs_file, is_cache_enabled]( + const std::string& path, uint64_t offset, size_t n, + char* buffer, TF_Status* status) -> int64_t { + // TODO(vnvo2409): Check for `stat_cache`. + auto read = + is_cache_enabled + ? gcs_file->file_block_cache->Read(path, offset, n, buffer, status) + : LoadBufferFromGCS(path, offset, n, buffer, &gcs_file->gcs_client, + status); + if (TF_GetCode(status) != TF_OK) return -1; + if (read < n) + TF_SetStatus(status, TF_OUT_OF_RANGE, "Read less bytes than requested"); + else + TF_SetStatus(status, TF_OK, ""); + return read; + }; file->plugin_file = new tf_random_access_file::GCSFile( - {std::move(bucket), std::move(object), &gcs_file->gcs_client}); + std::move(path), is_cache_enabled, gcs_file->block_size, read_fn); TF_SetStatus(status, TF_OK, ""); } From cb15007a07143af950ec9c85bb94a5adafa3723b Mon Sep 17 00:00:00 2001 From: Yash Katariya Date: Fri, 17 Jul 2020 08:57:13 -0700 Subject: [PATCH 0702/2522] Remove the extra link since there is a markdown link already present. PiperOrigin-RevId: 321788077 Change-Id: Ibffe4baf6c903c50d3cefd84dd296191e17c34be --- tensorflow/python/ops/numpy_ops/np_utils.py | 4 ++-- tensorflow/python/ops/numpy_ops/np_utils_test.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/ops/numpy_ops/np_utils.py b/tensorflow/python/ops/numpy_ops/np_utils.py index 6a3b7a8e105..6408b9110dc 100644 --- a/tensorflow/python/ops/numpy_ops/np_utils.py +++ b/tensorflow/python/ops/numpy_ops/np_utils.py @@ -293,8 +293,8 @@ def _add_np_doc(doc, np_fun_name, np_f): template = None if template is not None: link = template % np_fun_name - doc += 'See the NumPy documentation for [`numpy.%s`](%s): %s' % ( - np_fun_name, link, link) + doc += 'See the NumPy documentation for `numpy.%s`: %s' % ( + np_fun_name, link) return doc diff --git a/tensorflow/python/ops/numpy_ops/np_utils_test.py b/tensorflow/python/ops/numpy_ops/np_utils_test.py index d780c126a1d..11796f607e9 100644 --- a/tensorflow/python/ops/numpy_ops/np_utils_test.py +++ b/tensorflow/python/ops/numpy_ops/np_utils_test.py @@ -81,8 +81,8 @@ Unsupported arguments: `x`. f docstring. -See the NumPy documentation for [`numpy.np_fun`](%s): %s""" - expected = expected % (link, link) +See the NumPy documentation for `numpy.np_fun`: %s""" + expected = expected % (link) self.assertEqual(expected, f.__doc__) @parameterized.parameters([None, 1, 'a', '1a', '1.1a', '1.1.1a']) From 17330919885c85ea221e695f5c7f6a20afe5e6a6 Mon Sep 17 00:00:00 2001 From: David Kao Date: Fri, 17 Jul 2020 09:14:30 -0700 Subject: [PATCH 0703/2522] Massage docstring for signatures arg of tf.saved_model.save. Previous wording minimized the dictionary option. PiperOrigin-RevId: 321791081 Change-Id: Ib178c31c7ca2465c34f13ffdfbd0b070bb9bd0cf --- tensorflow/python/saved_model/save.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/saved_model/save.py b/tensorflow/python/saved_model/save.py index a4b14fe9892..6631a85f12c 100644 --- a/tensorflow/python/saved_model/save.py +++ b/tensorflow/python/saved_model/save.py @@ -960,15 +960,16 @@ def save(obj, export_dir, signatures=None, options=None): Args: obj: A trackable object to export. export_dir: A directory in which to write the SavedModel. - signatures: Optional, either a `tf.function` with an input signature - specified or the result of `f.get_concrete_function` on a - `@tf.function`-decorated function `f`, in which case `f` will be used to - generate a signature for the SavedModel under the default serving - signature key. `signatures` may also be a dictionary, in which case it - maps from signature keys to either `tf.function` instances with input - signatures or concrete functions. The keys of such a dictionary may be - arbitrary strings, but will typically be from the - `tf.saved_model.signature_constants` module. + signatures: Optional, one of three types: + * a `tf.function` with an input signature specified, which will use the + default serving signature key, + * the result of `f.get_concrete_function` on a `@tf.function`-decorated + function `f`, in which case `f` will be used to generate a signature for + the SavedModel under the default serving signature key, + * a dictionary, which maps signature keys to either `tf.function` + instances with input signatures or concrete functions. Keys of such a + dictionary may be arbitrary strings, but will typically be from the + `tf.saved_model.signature_constants` module. options: Optional, `tf.saved_model.SaveOptions` object that specifies options for saving. From 2caabe5b0089dedf5bc5faa59f105149124e66b5 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Fri, 17 Jul 2020 23:35:26 +0700 Subject: [PATCH 0704/2522] Set proper status --- .../filesystem/plugins/gcs/gcs_filesystem.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc index f3985808900..c00c52d9edd 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc @@ -166,7 +166,7 @@ static void FillBuffer(uint64_t start, GCSFile* gcs_file, TF_Status* status) { gcs_file->read_fn(gcs_file->path, gcs_file->buffer_start, gcs_file->buffer_size, &(gcs_file->buffer[0]), status); gcs_file->buffer_end_is_past_eof = (TF_GetCode(status) == TF_OUT_OF_RANGE); - gcs_file->buffer.resize(read); + if (read >= 0) gcs_file->buffer.resize(read); } // `google-cloud-cpp` is working on a feature that we may want to use. @@ -200,7 +200,15 @@ int64_t Read(const TF_RandomAccessFile* file, uint64_t offset, size_t n, (std::min)(n - copy_size, gcs_file->buffer.size()); memcpy(buffer + copy_size, gcs_file->buffer.data(), remaining_copy); copy_size += remaining_copy; + if (copy_size < n) { + // Forget the end-of-file flag to allow for clients that poll on the + // same file. + gcs_file->buffer_end_is_past_eof = false; + TF_SetStatus(status, TF_OUT_OF_RANGE, "Read less bytes than requested"); + return copy_size; + } } + TF_SetStatus(status, TF_OK, ""); return copy_size; } } From 60dada860c59fd4b8bdf4a0e9ec3168011512457 Mon Sep 17 00:00:00 2001 From: Robert David Date: Fri, 17 Jul 2020 09:40:42 -0700 Subject: [PATCH 0705/2522] Nit: cleanup error strings when OpenCL memory allocations fail. PiperOrigin-RevId: 321795454 Change-Id: I636c2ab018532a26a70ec31938cd84560e94525e --- tensorflow/lite/delegates/gpu/cl/buffer.cc | 2 +- tensorflow/lite/delegates/gpu/cl/tensor.cc | 20 +++++++++---------- tensorflow/lite/delegates/gpu/cl/texture2d.cc | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/buffer.cc b/tensorflow/lite/delegates/gpu/cl/buffer.cc index a612be452d0..8639e8bbf18 100644 --- a/tensorflow/lite/delegates/gpu/cl/buffer.cc +++ b/tensorflow/lite/delegates/gpu/cl/buffer.cc @@ -37,7 +37,7 @@ absl::Status CreateBuffer(size_t size_in_bytes, bool gpu_read_only, const_cast(data), &error_code); if (!buffer) { return absl::UnknownError( - absl::StrCat("Failed to allocate device memory with clCreateBuffer", + absl::StrCat("Failed to allocate device memory (clCreateBuffer): ", CLErrorCodeToString(error_code))); } diff --git a/tensorflow/lite/delegates/gpu/cl/tensor.cc b/tensorflow/lite/delegates/gpu/cl/tensor.cc index 6a7e2174b9f..4da3e5e5b63 100644 --- a/tensorflow/lite/delegates/gpu/cl/tensor.cc +++ b/tensorflow/lite/delegates/gpu/cl/tensor.cc @@ -42,13 +42,13 @@ absl::Status CreateImageBufferFromBuffer(const CLContext& context, format.image_channel_data_type = ToImageChannelType(data_type); format.image_channel_order = CL_RGBA; - cl_int error; + cl_int error_code; *result = clCreateImage(context.context(), CL_MEM_READ_WRITE, &format, &desc, - nullptr, &error); - if (error != CL_SUCCESS) { + nullptr, &error_code); + if (error_code != CL_SUCCESS) { return absl::UnknownError( - absl::StrCat("Failed to create Texture2D (clCreateImage)", - CLErrorCodeToString(error))); + absl::StrCat("Failed to create Image from Buffer (clCreateImage): ", + CLErrorCodeToString(error_code))); } return absl::OkStatus(); } @@ -485,7 +485,7 @@ absl::Status AllocateTensorMemory(const CLContext& context, data_size, nullptr, &error_code); if (!memory) { return absl::UnknownError( - absl::StrCat("Failed to allocate device memory with clCreateBuffer", + absl::StrCat("Failed to allocate device memory (clCreateBuffer): ", CLErrorCodeToString(error_code))); } *result = CLMemory(memory, true); @@ -512,7 +512,7 @@ absl::Status AllocateTensorMemory(const CLContext& context, &format, &desc, nullptr, &error_code); if (error_code != CL_SUCCESS) { return absl::UnknownError( - absl::StrCat("Failed to create Texture2D (clCreateImage)", + absl::StrCat("Failed to create 2D texture (clCreateImage): ", CLErrorCodeToString(error_code))); } @@ -540,7 +540,7 @@ absl::Status AllocateTensorMemory(const CLContext& context, &format, &desc, nullptr, &error_code); if (error_code != CL_SUCCESS) { return absl::UnknownError( - absl::StrCat("Failed to create Texture3D (clCreateImage)", + absl::StrCat("Failed to create 3D texture (clCreateImage): ", CLErrorCodeToString(error_code))); } @@ -569,7 +569,7 @@ absl::Status AllocateTensorMemory(const CLContext& context, &format, &desc, nullptr, &error_code); if (error_code != CL_SUCCESS) { return absl::UnknownError( - absl::StrCat("Failed to create TextureArray (clCreateImage)", + absl::StrCat("Failed to create 2D texture array (clCreateImage): ", CLErrorCodeToString(error_code))); } @@ -609,7 +609,7 @@ absl::Status AllocateTensorMemory(const CLContext& context, &format, &desc, nullptr, &error_code); if (error_code != CL_SUCCESS) { return absl::UnknownError( - absl::StrCat("Failed to create Texture2D (clCreateImage)", + absl::StrCat("Failed to create 2D texture (clCreateImage): ", CLErrorCodeToString(error_code))); } diff --git a/tensorflow/lite/delegates/gpu/cl/texture2d.cc b/tensorflow/lite/delegates/gpu/cl/texture2d.cc index ec4909dcac1..cbeafe04c05 100644 --- a/tensorflow/lite/delegates/gpu/cl/texture2d.cc +++ b/tensorflow/lite/delegates/gpu/cl/texture2d.cc @@ -49,7 +49,7 @@ absl::Status CreateTexture2D(int width, int height, cl_channel_type type, &desc, data, &error_code); if (error_code != CL_SUCCESS) { return absl::UnknownError( - absl::StrCat("Failed to create Texture2D (clCreateImage)", + absl::StrCat("Failed to create 2D texture (clCreateImage): ", CLErrorCodeToString(error_code))); } From 5e7df63117509f2ab328c3e06785b33634890c46 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Jul 2020 09:43:03 -0700 Subject: [PATCH 0706/2522] Update comments to reflect the new names of `GenericOp` constructor parameters PiperOrigin-RevId: 321795872 Change-Id: I11c4668c547be8b10433cf1f3aee95ff52a16049 --- .../hlo/lib/Dialect/mhlo/transforms/legalize_to_linalg.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_linalg.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_linalg.cc index 717e9682436..fd6a7617344 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_linalg.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_linalg.cc @@ -298,8 +298,8 @@ class DataMovementOpConverter : public OpConversionPattern { auto nloops = resultType.getRank(); auto loc = op.getLoc(); auto linalgOp = rewriter.create( - loc, isLHLO ? ArrayRef{} : resultType, args, /*inputCount=*/1, - /*outputCount=*/1, indexing_maps, GetNParallelLoopsAttrs(nloops), + loc, isLHLO ? ArrayRef{} : resultType, args, /*argsIn=*/1, + /*argsOut=*/1, indexing_maps, GetNParallelLoopsAttrs(nloops), [&](OpBuilder& nestedBuilder, Location nestedLoc, ValueRange args) { nestedBuilder.create(loc, *args.begin()); }); @@ -420,7 +420,7 @@ class LhloBroadcastInDimConverter rewriter.create(loc, operand, llvm::makeArrayRef({zero})); rewriter.create( loc, llvm::None, llvm::makeArrayRef(operand_adaptor.output()), - /*inputCount=*/0, /*outputCount=*/1, + /*argsIn=*/0, /*argsOut=*/1, llvm::makeArrayRef(rewriter.getMultiDimIdentityMap(nloops)), GetNParallelLoopsAttrs(nloops), [&](OpBuilder& nestedBuilder, Location nestedLoc, ValueRange args) { @@ -433,7 +433,7 @@ class LhloBroadcastInDimConverter rewriter.create( loc, llvm::None, llvm::makeArrayRef({operand, operand_adaptor.output()}), - /*inputCount=*/1, /*outputCount=*/1, indexing_maps, + /*argsIn=*/1, /*argsOut=*/1, indexing_maps, GetNParallelLoopsAttrs(nloops), [&](OpBuilder& nestedBuilder, Location nestedLoc, ValueRange args) { nestedBuilder.create(loc, *args.begin()); From 86ba317d72f57bdcd66bb65c8f63a0ae38ede1a5 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Fri, 17 Jul 2020 10:01:27 -0700 Subject: [PATCH 0707/2522] Bump the references to github.com/google/ruy to commit d492ac890d982d7a153a326922f362b10de8d2ad. PiperOrigin-RevId: 321799197 Change-Id: Ieb00eb7cb3c247d0585d725314accdd719a42cb7 --- .../lite/micro/tools/make/third_party_downloads.inc | 4 ++-- tensorflow/lite/tools/make/Makefile | 4 ---- tensorflow/lite/tools/make/download_dependencies.sh | 4 ++-- third_party/ruy/workspace.bzl | 8 ++++---- 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/tensorflow/lite/micro/tools/make/third_party_downloads.inc b/tensorflow/lite/micro/tools/make/third_party_downloads.inc index 05fc08cc601..cd8d6bc6c6a 100644 --- a/tensorflow/lite/micro/tools/make/third_party_downloads.inc +++ b/tensorflow/lite/micro/tools/make/third_party_downloads.inc @@ -56,8 +56,8 @@ SIFIVE_FE310_LIB_MD5 := "06ee24c4956f8e21670ab3395861fe64" KISSFFT_URL="https://github.com/mborgerding/kissfft/archive/v130.zip" KISSFFT_MD5="438ba1fef5783cc5f5f201395cc477ca" -RUY_URL="https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip" -RUY_MD5="18613212e9c01aba85c7d19010b194a9" +RUY_URL="https://github.com/google/ruy/archive/d492ac890d982d7a153a326922f362b10de8d2ad.zip" +RUY_MD5="3a5c19abc60c3d9a8045ddf6b114067f" CIFAR10_DATASET_URL="https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz" CIFAR10_DATASET_MD5="c32a1d4ab5d03f1284b67883e8d87530" diff --git a/tensorflow/lite/tools/make/Makefile b/tensorflow/lite/tools/make/Makefile index f8b67fbbe7d..7d55370818c 100644 --- a/tensorflow/lite/tools/make/Makefile +++ b/tensorflow/lite/tools/make/Makefile @@ -221,10 +221,6 @@ else CORE_CC_EXCLUDE_SRCS += tensorflow/lite/minimal_logging_ios.cc endif -# Temporary fix for ruy compilation error. -# TODO(b/158800055): Remove this hack once the ruy version is correctly bumped. -CORE_CC_EXCLUDE_SRCS += tensorflow/lite/tools/make/downloads/ruy/ruy/prepare_packed_matrices.cc - # Filter out all the excluded files. TF_LITE_CC_SRCS := $(filter-out $(CORE_CC_EXCLUDE_SRCS), $(CORE_CC_ALL_SRCS)) diff --git a/tensorflow/lite/tools/make/download_dependencies.sh b/tensorflow/lite/tools/make/download_dependencies.sh index 0ab8307b07a..f60b937bb96 100755 --- a/tensorflow/lite/tools/make/download_dependencies.sh +++ b/tensorflow/lite/tools/make/download_dependencies.sh @@ -37,8 +37,8 @@ EIGEN_URL="$(grep -o 'https.*gitlab.com/libeigen/eigen/-/archive/.*tar\.gz' "${B EIGEN_SHA="$(eval echo $(grep '# SHARED_EIGEN_SHA' "${BZL_FILE_PATH}" | grep -o '\".*\"'))" GEMMLOWP_URL="$(grep -o 'https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GEMMLOWP_SHA="$(eval echo $(grep '# SHARED_GEMMLOWP_SHA' "${BZL_FILE_PATH}" | grep -o '\".*\"'))" -RUY_URL="https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip" -RUY_SHA="8fd4adeeff4f29796bf7cdda64806ec0495a2435361569f02afe3fe33406f07c" +RUY_URL="https://github.com/google/ruy/archive/d492ac890d982d7a153a326922f362b10de8d2ad.zip" +RUY_SHA="e1b38265ab36662c921be260c68dbe28349a539873baabd974a5140ea64f1fe0" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" GOOGLETEST_SHA="58a6f4277ca2bc8565222b3bbd58a177609e9c488e8a72649359ba51450db7d8" ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)" diff --git a/third_party/ruy/workspace.bzl b/third_party/ruy/workspace.bzl index ee0faec6eff..35943b04e58 100644 --- a/third_party/ruy/workspace.bzl +++ b/third_party/ruy/workspace.bzl @@ -5,11 +5,11 @@ load("//third_party:repo.bzl", "third_party_http_archive") def repo(): third_party_http_archive( name = "ruy", - sha256 = "8fd4adeeff4f29796bf7cdda64806ec0495a2435361569f02afe3fe33406f07c", - strip_prefix = "ruy-34ea9f4993955fa1ff4eb58e504421806b7f2e8f", + sha256 = "e1b38265ab36662c921be260c68dbe28349a539873baabd974a5140ea64f1fe0", + strip_prefix = "ruy-d492ac890d982d7a153a326922f362b10de8d2ad", urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip", - "https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip", + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/ruy/archive/d492ac890d982d7a153a326922f362b10de8d2ad.zip", + "https://github.com/google/ruy/archive/d492ac890d982d7a153a326922f362b10de8d2ad.zip", ], build_file = "//third_party/ruy:BUILD", ) From d94980b6923810f91ab6d2de2c40c59d29001fc9 Mon Sep 17 00:00:00 2001 From: Yixing Fu Date: Fri, 17 Jul 2020 17:05:11 +0000 Subject: [PATCH 0708/2522] avoided if statement in smart_resize --- .../python/keras/preprocessing/image.py | 39 ++++++++++--------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/tensorflow/python/keras/preprocessing/image.py b/tensorflow/python/keras/preprocessing/image.py index ecf069958cd..8769b558440 100644 --- a/tensorflow/python/keras/preprocessing/image.py +++ b/tensorflow/python/keras/preprocessing/image.py @@ -120,25 +120,26 @@ def smart_resize(x, size, interpolation='bilinear'): shape = array_ops.shape(img) height, width = shape[0], shape[1] target_height, target_width = size - target_ratio = float(target_height) / target_width - img_ratio = math_ops.cast( - height, 'float32') / math_ops.cast(width, 'float32') - if target_ratio < img_ratio: - crop_height = math_ops.cast( - math_ops.cast(width, 'float32') * target_height / target_width, 'int32') - crop_box_hstart = math_ops.cast( - math_ops.cast(height - crop_height, 'float32') / 2, 'int32') - crop_box_start = [crop_box_hstart, 0, 0] - crop_box_size = [crop_height, -1, -1] - else: - crop_width = math_ops.cast( - math_ops.cast(height * target_width, 'float32') / target_height, - 'int32') - crop_box_wstart = math_ops.cast((width - crop_width) / 2, 'int32') - crop_box_start = [0, crop_box_wstart, 0] - crop_box_size = [-1, crop_width, -1] - crop_box_start = array_ops.stack(crop_box_start) - crop_box_size = array_ops.stack(crop_box_size) + + crop_height = math_ops.cast( + math_ops.cast(width * target_height, 'float32') / target_width, + 'int32') + crop_width = math_ops.cast( + math_ops.cast(height * target_width, 'float32') / target_height, + 'int32') + + # Set back to input height / width if crop_height / crop_width is not smaller. + crop_height = math_ops.minimum(height, crop_height) + crop_width = math_ops.minimum(width, crop_width) + + crop_box_hstart = math_ops.cast( + math_ops.cast(height - crop_height, 'float32') / 2, 'int32') + crop_box_wstart = math_ops.cast( + math_ops.cast(width - crop_width, 'float32') / 2, 'int32') + + crop_box_start = array_ops.stack([crop_box_hstart, crop_box_wstart, 0]) + crop_box_size = array_ops.stack([crop_height, crop_width, -1]) + img = array_ops.slice(img, crop_box_start, crop_box_size) img = image_ops.resize_images_v2( images=img, From 802aaf7156f0b0580d9d34990a88fa52a182cd61 Mon Sep 17 00:00:00 2001 From: Yixing Fu Date: Fri, 17 Jul 2020 17:05:58 +0000 Subject: [PATCH 0709/2522] add test coverage for smart_resize on tf.data dataset --- .../python/keras/preprocessing/image_test.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tensorflow/python/keras/preprocessing/image_test.py b/tensorflow/python/keras/preprocessing/image_test.py index d2f4b18f7dd..0fd5e6c8513 100644 --- a/tensorflow/python/keras/preprocessing/image_test.py +++ b/tensorflow/python/keras/preprocessing/image_test.py @@ -30,6 +30,7 @@ from tensorflow.python.keras import layers from tensorflow.python.keras.engine import sequential from tensorflow.python.keras.preprocessing import image as preprocessing_image from tensorflow.python.platform import test +from tensorflow.python.data import Dataset try: import PIL # pylint:disable=g-import-not-at-top @@ -70,6 +71,21 @@ class TestImage(keras_parameterized.TestCase): output = preprocessing_image.smart_resize(test_input, size=(5, 15)) self.assertListEqual(list(output.shape), [5, 15, 3]) + + @test_util.run_v2_only + def test_smart_resize_tf_dataset(self): + test_input_np = np.random.random((2, 20, 40, 3)) + test_ds = Dataset.from_tensor_slices(test_input_np) + + resize = lambda img: preprocessing_image.smart_resize(img, size=size) + + for size in [(50, 50), (10, 10), (100, 50), (5, 15)]: + test_ds = test_ds.map(resize) + for sample in test_ds.as_numpy_iterator(): + self.assertIsInstance(sample, np.ndarray) + self.assertListEqual(list(sample.shape), [size[0], size[1], 3]) + + def test_smart_resize_errors(self): with self.assertRaisesRegex(ValueError, 'a tuple of 2 integers'): preprocessing_image.smart_resize( From 5c4b8790ca2144c9cde4c02cac46ffe12c76e9ef Mon Sep 17 00:00:00 2001 From: Haoyu Zhang Date: Fri, 17 Jul 2020 10:10:53 -0700 Subject: [PATCH 0710/2522] Fix two memory leaks and enable asan for C API remote tests. PiperOrigin-RevId: 321801325 Change-Id: Id579f93e167c9665b4ca740eee160da801ca0694 --- tensorflow/c/c_api_experimental.cc | 4 ++-- tensorflow/c/eager/BUILD | 2 -- .../core/distributed_runtime/master_env.h | 4 ++-- .../rpc/grpc_server_lib.cc | 22 +++++++++++-------- .../core/distributed_runtime/session_mgr.cc | 4 ++++ .../core/distributed_runtime/session_mgr.h | 2 ++ .../core/distributed_runtime/worker_env.h | 2 +- 7 files changed, 24 insertions(+), 16 deletions(-) diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc index 831c6a0ad40..531dcd367de 100644 --- a/tensorflow/c/c_api_experimental.cc +++ b/tensorflow/c/c_api_experimental.cc @@ -525,12 +525,12 @@ tensorflow::Status EnableCollectiveOps(const tensorflow::ServerDef& server_def, LOG_AND_RETURN_IF_ERROR(context->StoreCollectiveOpsServer( std::move(new_server), grpc_server->worker_env()->device_mgr, - grpc_server->worker_env()->collective_executor_mgr)); + grpc_server->worker_env()->collective_executor_mgr.get())); } else { LOG_AND_RETURN_IF_ERROR(grpc_server->UpdateServerDef(server_def)); LOG_AND_RETURN_IF_ERROR(context->StoreCollectiveOpsServer( /*new_server=*/nullptr, grpc_server->worker_env()->device_mgr, - grpc_server->worker_env()->collective_executor_mgr)); + grpc_server->worker_env()->collective_executor_mgr.get())); } return tensorflow::Status::OK(); #undef LOG_AND_RETURN_IF_ERROR diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index a77e76644b8..f56f8ad0a4b 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -514,7 +514,6 @@ tf_cuda_cc_test( extra_copts = tfe_xla_copts(), tags = [ "no_windows", - "noasan", # leaks gRPC server instances ], deps = [ ":c_api", @@ -581,7 +580,6 @@ tf_cuda_cc_test( extra_copts = tfe_xla_copts(), tags = [ "no_windows", - "noasan", # leaks gRPC server instances ], deps = [ ":c_api", diff --git a/tensorflow/core/distributed_runtime/master_env.h b/tensorflow/core/distributed_runtime/master_env.h index 837ccd1dd48..64b73dfac22 100644 --- a/tensorflow/core/distributed_runtime/master_env.h +++ b/tensorflow/core/distributed_runtime/master_env.h @@ -62,7 +62,7 @@ struct WorkerCacheFactoryOptions { struct MasterEnv { Env* env = nullptr; - // Object from which WorkerInterface instances can be obtained. + // Object from which WorkerInterface instances can be obtained. Not owned. WorkerCacheInterface* worker_cache = nullptr; // The operation definitions to use. Must be filled before use. @@ -93,7 +93,7 @@ struct MasterEnv { worker_cache_factory; // Generates per-step CollectiveExecutors and has access to utilities - // supporting collective operations. + // supporting collective operations. Not owned. CollectiveExecutorMgrInterface* collective_executor_mgr = nullptr; }; diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc index c0b4d0ef6ec..9fe6eef5610 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc @@ -267,9 +267,9 @@ Status GrpcServer::Init(const GrpcServerOptions& opts) { CHECK_NE(nullptr, worker_cache); if (opts.collective_mgr_func) { - worker_env_.collective_executor_mgr = - opts.collective_mgr_func(config, &worker_env_, worker_cache); - if (!worker_env_.collective_executor_mgr) { + worker_env_.collective_executor_mgr.reset( + opts.collective_mgr_func(config, &worker_env_, worker_cache)); + if (worker_env_.collective_executor_mgr == nullptr) { return errors::Internal( "collective_mgr_func did not return CollectiveExecutorMgr"); } @@ -281,9 +281,9 @@ Status GrpcServer::Init(const GrpcServerOptions& opts) { new CollectiveParamResolverDistributed(config, worker_env_.device_mgr, dev_resolver.get(), worker_cache, default_worker_name)); - worker_env_.collective_executor_mgr = new RpcCollectiveExecutorMgr( + worker_env_.collective_executor_mgr.reset(new RpcCollectiveExecutorMgr( config, worker_env_.device_mgr, std::move(dev_resolver), - std::move(param_resolver), worker_cache, default_worker_name); + std::move(param_resolver), worker_cache, default_worker_name)); } // Set up worker environment. @@ -299,7 +299,8 @@ Status GrpcServer::Init(const GrpcServerOptions& opts) { // Finish setting up master environment. master_env_.ops = OpRegistry::Global(); master_env_.worker_cache = worker_cache; - master_env_.collective_executor_mgr = worker_env_.collective_executor_mgr; + master_env_.collective_executor_mgr = + worker_env_.collective_executor_mgr.get(); StatsPublisherFactory stats_factory = opts.stats_factory; master_env_.master_session_factory = [config, stats_factory]( @@ -433,6 +434,8 @@ Status GrpcServer::UpdateServerDef(const ServerDef& server_def) { return errors::InvalidArgument( "Failed to build worker cache with the provided server def."); } + // Transfer ownership of worker_cache to worker_env_.session_mgr. + worker_env_.session_mgr->ResetDefaultWorkerCache(worker_cache); string default_worker_name; string unused; @@ -447,13 +450,14 @@ Status GrpcServer::UpdateServerDef(const ServerDef& server_def) { new CollectiveParamResolverDistributed( server_def_.default_session_config(), worker_env_.device_mgr, dev_resolver.get(), worker_cache, default_worker_name)); - worker_env_.collective_executor_mgr = new RpcCollectiveExecutorMgr( + worker_env_.collective_executor_mgr.reset(new RpcCollectiveExecutorMgr( server_def_.default_session_config(), worker_env_.device_mgr, std::move(dev_resolver), std::move(param_resolver), worker_cache, - default_worker_name); + default_worker_name)); master_env_.worker_cache = worker_cache; - master_env_.collective_executor_mgr = worker_env_.collective_executor_mgr; + master_env_.collective_executor_mgr = + worker_env_.collective_executor_mgr.get(); return Status::OK(); } diff --git a/tensorflow/core/distributed_runtime/session_mgr.cc b/tensorflow/core/distributed_runtime/session_mgr.cc index 1d9a22a5817..37f47848f75 100644 --- a/tensorflow/core/distributed_runtime/session_mgr.cc +++ b/tensorflow/core/distributed_runtime/session_mgr.cc @@ -144,6 +144,10 @@ Status SessionMgr::CreateSession( return Status::OK(); } +void SessionMgr::ResetDefaultWorkerCache(WorkerCacheInterface* worker_cache) { + default_worker_cache_.reset(worker_cache); +} + Status SessionMgr::UpdateSession( const string& session, const ServerDef& server_def, const protobuf::RepeatedPtrField& diff --git a/tensorflow/core/distributed_runtime/session_mgr.h b/tensorflow/core/distributed_runtime/session_mgr.h index 8c438dbd83e..a9467708870 100644 --- a/tensorflow/core/distributed_runtime/session_mgr.h +++ b/tensorflow/core/distributed_runtime/session_mgr.h @@ -53,6 +53,8 @@ class SessionMgr { const protobuf::RepeatedPtrField& device_attributes, bool isolate_session_state); + void ResetDefaultWorkerCache(WorkerCacheInterface* worker_cache); + // Updates state (worker cache, devices) of worker session identified by // session name (`session`) based on a new server_def and set of devices. Status UpdateSession(const string& session, const ServerDef& server_def, diff --git a/tensorflow/core/distributed_runtime/worker_env.h b/tensorflow/core/distributed_runtime/worker_env.h index ecc3313d0ce..b308c5e7a18 100644 --- a/tensorflow/core/distributed_runtime/worker_env.h +++ b/tensorflow/core/distributed_runtime/worker_env.h @@ -60,7 +60,7 @@ struct WorkerEnv { // Generates per-step CollectiveExecutors and has access to utilities // supporting collective operations. - CollectiveExecutorMgrInterface* collective_executor_mgr = nullptr; + std::unique_ptr collective_executor_mgr; // A pool of threads for scheduling compute work. thread::ThreadPool* compute_pool = nullptr; From ee08af1842e1b15e138df0b55be12383e92ec40c Mon Sep 17 00:00:00 2001 From: Abhijit Karmarkar Date: Fri, 17 Jul 2020 10:14:46 -0700 Subject: [PATCH 0711/2522] Add alwayslink=1 to :tf32_utils target. This fixes TF Serving OSS builds that presently fail with this error: ``` ERROR: /tmpfs/tmp/bazel/external/org_tensorflow/tensorflow/python/keras/api/BUILD:123:1: Couldn't build file external/org_tensorflow/tensorflow/python/keras/api/_v1/__init__.py: Executing genrule @org_tensorflow//tensorflow/python/keras/api:keras_python_api_gen_compat_v1 failed (Exit 1): bash failed: error executing command ImportError: /tmpfs/tmp/bazel/execroot/tf_serving/bazel-out/host/bin/external/org_tensorflow/tensorflow/python/keras/api/create_tensorflow.python_api_keras_python_api_gen_compat_v1.runfiles/org_tensorflow/tensorflow/python/_pywrap_tf32_execution.so: undefined symbol: _ZN10tensorflow20allow_tf32_executionEb ERROR: /tmpfs/tmp/bazel/external/org_tensorflow/tensorflow/python/keras/api/BUILD:137:1: Couldn't build file external/org_tensorflow/tensorflow/python/keras/api/_v2/__init__.py: Executing genrule @org_tensorflow//tensorflow/python/keras/api:keras_python_api_gen_compat_v2 failed (Exit 1): bash failed: error executing command ImportError: /tmpfs/tmp/bazel/execroot/tf_serving/bazel-out/host/bin/external/org_tensorflow/tensorflow/python/keras/api/create_tensorflow.python_api_keras_python_api_gen_compat_v2.runfiles/org_tensorflow/tensorflow/python/_pywrap_tf32_execution.so: undefined symbol: _ZN10tensorflow20allow_tf32_executionEb ``` PiperOrigin-RevId: 321802109 Change-Id: I76ba18f0c5d99b5780c34590c0c337000e8cdb30 --- tensorflow/core/platform/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD index ec5d1a59b40..9722ba8f897 100644 --- a/tensorflow/core/platform/BUILD +++ b/tensorflow/core/platform/BUILD @@ -951,6 +951,7 @@ cc_library( srcs = ["tf32_utils.cc"], hdrs = ["tf32_utils.h"], copts = tf_copts(), + alwayslink = 1, ) filegroup( From a15c3bcd9af1b0cc62d4e4f4391bcdf615fe32f0 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Fri, 17 Jul 2020 10:20:14 -0700 Subject: [PATCH 0712/2522] [tf.data] Update `parallel_batch` optimization description. PiperOrigin-RevId: 321803262 Change-Id: I6815bf16f1f576ae0c75bda3dc962cd468ba58c9 --- .../python/data/experimental/ops/optimization_options.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py index 74a6090c49b..ab1c7b73212 100644 --- a/tensorflow/python/data/experimental/ops/optimization_options.py +++ b/tensorflow/python/data/experimental/ops/optimization_options.py @@ -177,8 +177,13 @@ class OptimizationOptions(options.OptionsBase): parallel_batch = options.create_option( name="parallel_batch", ty=bool, - docstring="Whether to parallelize copying of batch elements. If None, " - "defaults to False.") + docstring="Whether to parallelize copying of batch elements. This " + "optimization is highly experimental and can cause performance " + "degradation (e.g. when the parallelization overhead exceeds the " + "benefits of performing the data copies in parallel). You should only " + "enable this optimization if a) your input pipeline is bottlenecked on " + "batching and b) you have validated that this optimization improves " + "performance. If None, defaults to False.") reorder_data_discarding_ops = options.create_option( name="reorder_data_discarding_ops", From 57b9563b482604f751aab626845102481d4e3f8a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Jul 2020 10:41:14 -0700 Subject: [PATCH 0713/2522] [tf.lite] Documentation fixes: 1. Add DOxygen \file directives to file-level comments. This is needed because without these, the file-level comments get (wrongly) attached to whatever follows, typically 'namespace tflite'. 2. Fix some cut-and-paste errors in the documentation. 3. Change top-level file comments for interpreter.h and optional_debug_tools.h to use DOxygen markup. 4. Fix a grammar error: "versions ... doesn't ..." => "versions ... don't". PiperOrigin-RevId: 321807896 Change-Id: I8ffd2cc393a235a270022479c605c70afbe4078d --- tensorflow/lite/allocation.h | 4 ++-- tensorflow/lite/interpreter.h | 5 +++-- tensorflow/lite/interpreter_builder.h | 5 ++--- tensorflow/lite/model.h | 5 ++--- tensorflow/lite/model_builder.h | 3 ++- tensorflow/lite/mutable_op_resolver.h | 2 +- tensorflow/lite/optional_debug_tools.h | 5 +++-- 7 files changed, 15 insertions(+), 14 deletions(-) diff --git a/tensorflow/lite/allocation.h b/tensorflow/lite/allocation.h index 60745f105a6..cf9ff5c1332 100644 --- a/tensorflow/lite/allocation.h +++ b/tensorflow/lite/allocation.h @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// Main abstraction controlling the tflite interpreter. -// See context.h for the API for defining operations (TfLiteRegistration). +/// \file +/// Memory management for TF Lite. #ifndef TENSORFLOW_LITE_ALLOCATION_H_ #define TENSORFLOW_LITE_ALLOCATION_H_ diff --git a/tensorflow/lite/interpreter.h b/tensorflow/lite/interpreter.h index 4543759f407..653283bc234 100644 --- a/tensorflow/lite/interpreter.h +++ b/tensorflow/lite/interpreter.h @@ -12,8 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// Main abstraction controlling the tflite interpreter. -// See context.h for the API for defining operations (TfLiteRegistration). +/// \file +/// Main abstraction controlling the tflite interpreter. +/// See context.h for the API for defining operations (TfLiteRegistration). #ifndef TENSORFLOW_LITE_INTERPRETER_H_ #define TENSORFLOW_LITE_INTERPRETER_H_ diff --git a/tensorflow/lite/interpreter_builder.h b/tensorflow/lite/interpreter_builder.h index 1b8ae5a8e68..c6638b94835 100644 --- a/tensorflow/lite/interpreter_builder.h +++ b/tensorflow/lite/interpreter_builder.h @@ -12,9 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -/// Deserialization infrastructure for tflite. Provides functionality -/// to go from a serialized tflite model in flatbuffer format to an -/// interpreter. +/// \file +/// Provides functionality to construct an interpreter for a model. /// #ifndef TENSORFLOW_LITE_INTERPRETER_BUILDER_H_ #define TENSORFLOW_LITE_INTERPRETER_BUILDER_H_ diff --git a/tensorflow/lite/model.h b/tensorflow/lite/model.h index 84dc00f145b..079b4ad2a40 100644 --- a/tensorflow/lite/model.h +++ b/tensorflow/lite/model.h @@ -12,9 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -/// Deserialization infrastructure for tflite. Provides functionality -/// to go from a serialized tflite model in flatbuffer format to an -/// interpreter. +/// \file +/// Defines tflite::Interpreter and tflite::InterpreterBuilder. /// #ifndef TENSORFLOW_LITE_MODEL_H_ #define TENSORFLOW_LITE_MODEL_H_ diff --git a/tensorflow/lite/model_builder.h b/tensorflow/lite/model_builder.h index 01807103e1e..e4233998a30 100644 --- a/tensorflow/lite/model_builder.h +++ b/tensorflow/lite/model_builder.h @@ -12,9 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +/// \file /// Deserialization infrastructure for tflite. Provides functionality /// to go from a serialized tflite model in flatbuffer format to an -/// interpreter. +/// in-memory representation of the model. /// #ifndef TENSORFLOW_LITE_MODEL_BUILDER_H_ #define TENSORFLOW_LITE_MODEL_BUILDER_H_ diff --git a/tensorflow/lite/mutable_op_resolver.h b/tensorflow/lite/mutable_op_resolver.h index fe5e121424c..69ecbbd6723 100644 --- a/tensorflow/lite/mutable_op_resolver.h +++ b/tensorflow/lite/mutable_op_resolver.h @@ -23,7 +23,7 @@ limitations under the License. namespace tflite { -// Some versions of gcc doesn't support partial specialization in class scope, +// Some versions of gcc don't support partial specialization in class scope, // so these are defined in a namescope. namespace op_resolver_hasher { template diff --git a/tensorflow/lite/optional_debug_tools.h b/tensorflow/lite/optional_debug_tools.h index fb2f78e5ae4..2337f8c8751 100644 --- a/tensorflow/lite/optional_debug_tools.h +++ b/tensorflow/lite/optional_debug_tools.h @@ -12,8 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// Optional debugging functionality. For small sized binaries, these are not -// needed. +/// \file +/// Optional debugging functionality. +/// For small sized binaries, these are not needed. #ifndef TENSORFLOW_LITE_OPTIONAL_DEBUG_TOOLS_H_ #define TENSORFLOW_LITE_OPTIONAL_DEBUG_TOOLS_H_ From 416d420baf983ed262465218abc699569c305af3 Mon Sep 17 00:00:00 2001 From: Wenhao Jia Date: Fri, 17 Jul 2020 10:57:26 -0700 Subject: [PATCH 0714/2522] Use TF_DISALLOW_COPY_AND_ASSIGN instead of DISALLOW_COPY_AND_ASSIGN. PiperOrigin-RevId: 321812018 Change-Id: I6a88e83da82d873fe4fd601e30d76b7c9d1e5d1b --- tensorflow/core/tpu/kernels/tpu_execute_op.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/tpu/kernels/tpu_execute_op.h b/tensorflow/core/tpu/kernels/tpu_execute_op.h index 2079f9afdc5..c66118ad45e 100644 --- a/tensorflow/core/tpu/kernels/tpu_execute_op.h +++ b/tensorflow/core/tpu/kernels/tpu_execute_op.h @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/mutex.h" namespace tensorflow { @@ -47,7 +48,7 @@ class TPUExecuteOp : public AsyncOpKernel { private: Status DoWork(OpKernelContext* context); - DISALLOW_COPY_AND_ASSIGN(TPUExecuteOp); + TF_DISALLOW_COPY_AND_ASSIGN(TPUExecuteOp); }; // A variant of TPUExecuteOp that contains fused device variable reads and @@ -58,7 +59,7 @@ class TPUExecuteAndUpdateVariablesOp : public TPUExecuteOp { ~TPUExecuteAndUpdateVariablesOp() override = default; private: - DISALLOW_COPY_AND_ASSIGN(TPUExecuteAndUpdateVariablesOp); + TF_DISALLOW_COPY_AND_ASSIGN(TPUExecuteAndUpdateVariablesOp); }; } // namespace tensorflow From 92b5bde9aaef792a525f1db983c063f1ff682090 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Fri, 17 Jul 2020 11:05:40 -0700 Subject: [PATCH 0715/2522] Dynamic literal support PiperOrigin-RevId: 321814205 Change-Id: I9bcdefa3da5dcde203fd07d2415b2173b5d56916 --- tensorflow/compiler/xla/literal.cc | 267 ++++++++++++++++-- tensorflow/compiler/xla/literal.h | 64 ++++- tensorflow/compiler/xla/literal_test.cc | 118 +++++++- .../compiler/xla/service/hlo_evaluator.cc | 4 + .../service/interpreter/executable_base.cc | 20 +- tensorflow/compiler/xla/shape_util.cc | 9 + tensorflow/compiler/xla/shape_util.h | 3 + 7 files changed, 451 insertions(+), 34 deletions(-) diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc index 73c37d6b2f3..c09a5fd8ac9 100644 --- a/tensorflow/compiler/xla/literal.cc +++ b/tensorflow/compiler/xla/literal.cc @@ -48,6 +48,10 @@ namespace { using absl::StrCat; constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; +// Literals can be used as DMA targets, which can require alignment. We +// force a tensorflow::Allocator::kAllocatorAlignment-byte minimum +// alignment. +constexpr int kMinimumAlignment = 64; // Converts between little and big endian. // @@ -133,12 +137,14 @@ void Literal::SetPiece(const Shape& shape, Piece* piece, bool allocate_arrays) { } } else if (shape.IsArray()) { if (allocate_arrays) { - // Literals can be used as DMA targets, which can require alignment. We - // force a tensorflow::Allocator::kAllocatorAlignment-byte minimum - // alignment. - constexpr int kMinimumAlignment = 64; piece->set_buffer(static_cast(tensorflow::port::AlignedMalloc( piece->size_bytes(), kMinimumAlignment))); + if (shape.is_dynamic()) { + CHECK_EQ(piece->dynamic_size_buffer(), nullptr); + piece->set_dynamic_size_buffer( + static_cast(tensorflow::port::AlignedMalloc( + piece->dynamic_size_buffer_bytes(), kMinimumAlignment))); + } } } else { // If the shape is neither an array nor tuple, then it must be @@ -171,6 +177,9 @@ void Literal::DeallocateBuffers() { if (piece->buffer() != nullptr) { tensorflow::port::AlignedFree(piece->buffer()); } + if (piece->dynamic_size_buffer() != nullptr) { + tensorflow::port::AlignedFree(piece->dynamic_size_buffer()); + } }); } @@ -199,6 +208,15 @@ Literal LiteralBase::CreateFromShape(const Shape& shape) { return literal; } +int32 LiteralBase::GetDynamicSize(int64 dim_index) const { + return GetDynamicSize(dim_index, {}); +} + +int32 LiteralBase::GetDynamicSize(int64 dim_index, + const ShapeIndex& shape_index) const { + return piece(shape_index).GetDynamicSize(dim_index); +} + absl::optional LiteralBase::GetFirstInteger() const { switch (shape().element_type()) { case U8: @@ -381,7 +399,9 @@ std::vector Literal::DecomposeTuple() { // Move the respective buffer over to the element Literal. dest_piece->set_buffer(src_piece.buffer()); + dest_piece->set_dynamic_size_buffer(src_piece.dynamic_size_buffer()); src_piece.set_buffer(nullptr); + src_piece.set_dynamic_size_buffer(nullptr); }); } // Set this literal to be nil-shaped. @@ -407,23 +427,51 @@ void CopyElementsBetween(absl::Span dest, src[IndexUtil::MultidimensionalIndexToLinearIndex(src_shape, index)]; } while (IndexUtil::BumpIndices(dest_shape, absl::MakeSpan(index))); } - } // namespace -Status LiteralBase::Piece::CopyFrom(const LiteralBase::Piece& src) { +int32 LiteralBase::Piece::GetDynamicSize(int64 dim_index) const { + CHECK(LayoutUtil::IsDenseArray(subshape())); + if (!subshape_->is_dynamic_dimension(dim_index)) { + // This is a static dimension, return size. + return subshape_->dimensions(dim_index); + } + CHECK_NE(dynamic_size_buffer(), nullptr); + return dynamic_size_buffer_[dim_index]; +} + +void LiteralBase::Piece::SetDynamicSize(int64 dim_index, int32 size) { + CHECK(LayoutUtil::IsDenseArray(subshape())); + CHECK(subshape_->is_dynamic_dimension(dim_index)); + if (dynamic_size_buffer() == nullptr) { + // Lazily initialize the dynamic size buffer. + set_dynamic_size_buffer(static_cast(tensorflow::port::AlignedMalloc( + dynamic_size_buffer_bytes(), kMinimumAlignment))); + /*for (int64 i = 0; i < subshape().rank(); ++i) { + // Initialized to -1 to help debug. + dynamic_size_buffer_[i] = -1; + }*/ + } + dynamic_size_buffer_[dim_index] = size; +} + +Status LiteralBase::Piece::CopyFrom(const LiteralBase::Piece& src, + bool only_dynamic_bound) { CHECK(subshape_ != nullptr); CHECK(src.subshape_ != nullptr); if (ShapeUtil::Equal(subshape(), src.subshape())) { // If the layouts are equal it's faster just to memcpy. memcpy(buffer(), src.buffer(), src.size_bytes()); } else { - TF_RET_CHECK(ShapeUtil::Compatible(src.subshape(), subshape())); std::vector origin(subshape().rank(), 0); switch (subshape().element_type()) { -#define COPY_ELEMENTS(XLA_T, NATIVE_T) \ - case (XLA_T): \ - CopyElementsBetween(data(), src.data(), \ - subshape(), src.subshape()); \ +#define COPY_ELEMENTS(XLA_T, NATIVE_T) \ + case (XLA_T): \ + if (only_dynamic_bound) { \ + CopyElementsWithDynamicBound(src); \ + } else { \ + CopyElementsBetween(data(), src.data(), \ + subshape(), src.subshape()); \ + } \ break; COPY_ELEMENTS(U8, uint8); COPY_ELEMENTS(U16, uint16); @@ -447,21 +495,54 @@ Status LiteralBase::Piece::CopyFrom(const LiteralBase::Piece& src) { PrimitiveType_Name(subshape().element_type())); } } + DCHECK_EQ(dynamic_size_buffer_bytes(), src.dynamic_size_buffer_bytes()); + if (subshape().is_dynamic() && src.subshape().is_dynamic()) { + CHECK_NE(dynamic_size_buffer_, nullptr); + CHECK_NE(src.dynamic_size_buffer_, nullptr); + memcpy(dynamic_size_buffer(), src.dynamic_size_buffer(), + src.dynamic_size_buffer_bytes()); + } return Status::OK(); } +void MutableLiteralBase::SetDynamicSize(int64 dim_index, int32 size) { + return SetDynamicSize(dim_index, {}, size); +} + +void MutableLiteralBase::SetDynamicSize(int64 dim_index, + const ShapeIndex& shape_index, + int32 size) { + Shape* subshape_ = ShapeUtil::GetMutableSubshape(shape_.get(), shape_index); + CHECK_GE(subshape_->dimensions(dim_index), size); + if (subshape_->dimensions(dim_index) == size) { + subshape_->set_dynamic_dimension(dim_index, false); + return; + } + subshape_->set_dynamic_dimension(dim_index, true); + piece(shape_index).SetDynamicSize(dim_index, size); +} + Status MutableLiteralBase::CopyFrom(const LiteralSlice& src_literal, const ShapeIndex& dest_shape_index, - const ShapeIndex& src_shape_index) { + const ShapeIndex& src_shape_index, + bool only_dynamic_bound) { const Shape& dest_subshape = ShapeUtil::GetSubshape(shape(), dest_shape_index); const Shape& src_subshape = ShapeUtil::GetSubshape(src_literal.shape(), src_shape_index); - if (!ShapeUtil::Compatible(dest_subshape, src_subshape)) { - return InvalidArgument( - "Destination subshape incompatible with source subshape: %s vs %s", - ShapeUtil::HumanString(dest_subshape), - ShapeUtil::HumanString(src_subshape)); + if (only_dynamic_bound) { + auto bound_shape = dest_subshape.is_static() ? src_subshape : dest_subshape; + auto compact_shape = + dest_subshape.is_static() ? dest_subshape : src_subshape; + CHECK(ShapeUtil::DynamicShapeIsCompatible(compact_shape, bound_shape)) + << compact_shape.ToString() << " vs " << bound_shape.ToString(); + } else { + if (!ShapeUtil::Compatible(dest_subshape, src_subshape)) { + return InvalidArgument( + "Destination subshape incompatible with source subshape: %s vs %s", + ShapeUtil::HumanString(dest_subshape), + ShapeUtil::HumanString(src_subshape)); + } } return root_piece_->ForEachMutableSubpieceWithStatus( [&](const ShapeIndex& index, Piece* piece) { @@ -486,7 +567,9 @@ Status MutableLiteralBase::CopyFrom(const LiteralSlice& src_literal, for (int64 i = dest_shape_index.size(); i < index.size(); ++i) { src_piece_index.push_back(index[i]); } - TF_RETURN_IF_ERROR(piece->CopyFrom(src_literal.piece(src_piece_index))); + TF_RETURN_IF_ERROR( + piece->CopyFrom(src_literal.piece(src_piece_index), + /*only_dynamic_bound=*/only_dynamic_bound)); return Status::OK(); }); } @@ -514,7 +597,9 @@ Status Literal::MoveFrom(Literal&& src_literal, } Piece& dest_piece = piece(dest_index); tensorflow::port::AlignedFree(dest_piece.buffer()); + tensorflow::port::AlignedFree(dest_piece.dynamic_size_buffer()); dest_piece.set_buffer(src_piece.buffer()); + dest_piece.set_dynamic_size_buffer(src_piece.dynamic_size_buffer()); }); src_literal.shape_ = absl::make_unique(ShapeUtil::MakeNil()); @@ -629,6 +714,41 @@ Literal LiteralBase::Relayout(const Shape& shape_with_layout) const { return result; } +Literal LiteralBase::ToBoundedDynamic(const Shape& bounded_shape) const { + CHECK(bounded_shape.is_dynamic()); + Literal result(bounded_shape); + ShapeUtil::ForEachSubshape( + shape(), [&](const Shape& subshape, const ShapeIndex& index) { + if (!subshape.IsArray()) { + return; + } + for (int64 i = 0; i < subshape.rank(); ++i) { + result.SetDynamicSize(i, subshape.dimensions(i)); + } + }); + TF_CHECK_OK(result.CopyFrom(*this, {}, {}, /*only_dynamic_bound=*/true)); + + return result; +} + +Literal LiteralBase::ToStatic() const { + // Create new shape with 'new_layout' set at the given shape index. + Shape new_shape = shape(); + ShapeUtil::ForEachMutableSubshape( + &new_shape, [this](Shape* subshape, const ShapeIndex& index) { + if (!subshape->IsArray()) { + return; + } + for (int64 i = 0; i < subshape->rank(); ++i) { + subshape->set_dynamic_dimension(i, false); + subshape->set_dimensions(i, GetDynamicSize(i, index)); + } + }); + Literal result(new_shape); + TF_CHECK_OK(result.CopyFrom(*this, {}, {}, /*only_dynamic_bound=*/true)); + return result; +} + StatusOr LiteralBase::Broadcast( const Shape& result_shape, absl::Span dimensions) const { if (!shape().IsArray()) { @@ -652,6 +772,11 @@ StatusOr LiteralBase::Broadcast( const int64 primitive_size = ShapeUtil::ByteSizeOfPrimitiveType(shape().element_type()); + for (int64 i = 0; i < dimensions.size(); ++i) { + int64 dynamic_size = GetDynamicSize(i); + result.SetDynamicSize(dimensions[i], dynamic_size); + } + ShapeUtil::ForEachIndex( result_shape, [&](absl::Span output_index) { for (int64 i = 0; i < dimensions.size(); ++i) { @@ -674,6 +799,9 @@ StatusOr LiteralBase::Reshape( if (!shape().IsArray()) { return InvalidArgument("Reshape does not support tuples."); } + if (shape().is_dynamic()) { + return Unimplemented("Dynamic reshape is not implemented."); + } Literal output; if (!LayoutUtil::IsMonotonicWithDim0Major(shape().layout())) { output = Relayout(LayoutUtil::GetDefaultLayoutForRank(shape().rank())); @@ -728,6 +856,9 @@ Literal LiteralBase::Transpose(absl::Span permutation) const { layout->add_minor_to_major(inverse_permutation[index]); } Literal new_literal(permuted_shape); + for (int64 i = 0; i < shape().rank(); i++) { + new_literal.SetDynamicSize(inverse_permutation[i], GetDynamicSize(i)); + } DCHECK_EQ(ShapeUtil::ByteSizeOf(new_literal.shape()), ShapeUtil::ByteSizeOf(shape())); std::memcpy(new_literal.untyped_data(), untyped_data(), size_bytes()); @@ -747,6 +878,14 @@ Literal LiteralBase::SliceInternal( return Get(new_indices); }) .ok()); + for (int64 dnum = 0; dnum < shape().rank(); ++dnum) { + if (shape().is_dynamic_dimension(dnum)) { + int64 dynamic_size = GetDynamicSize(dnum) - start_indices[dnum]; + CHECK_GE(dynamic_size, 0) << GetDynamicSize(dnum); + dynamic_size = std::min(dynamic_size, result_shape.dimensions(dnum)); + result_literal.SetDynamicSize(dnum, dynamic_size); + } + } return result_literal; } @@ -763,9 +902,10 @@ Literal LiteralBase::Slice(absl::Span start_indices, CHECK_GE(dimension, 0) << "dnum = " << dnum; result_dimensions.push_back(dimension); } - const auto result_shape = + auto result_shape = ShapeUtil::MakeShapeWithLayout(shape().element_type(), result_dimensions, LayoutUtil::MinorToMajor(shape())); + ShapeUtil::CopyDynamicDimensions(&result_shape, shape()); switch (result_shape.element_type()) { case PRED: return SliceInternal(result_shape, start_indices); @@ -1082,11 +1222,24 @@ void DenseArrayToStringHelper(const LiteralBase& literal, if (print_shape) { pieces->push_back(ShapeToString(print_layout, subshape)); + if (subshape.is_dynamic()) { + pieces->push_back("("); + for (int64 i = 0; i < subshape.dimensions_size(); ++i) { + pieces->push_back(StrCat(literal.GetDynamicSize(i, shape_index))); + if (i < subshape.dimensions_size() - 1) { + pieces->push_back(","); + } + } + pieces->push_back(")"); + } pieces->push_back(" "); } std::vector indices = {}; - std::vector dimensions(subshape.dimensions().begin(), - subshape.dimensions().end()); + std::vector dimensions; + dimensions.reserve(subshape.rank()); + for (int64 i = 0; i < subshape.rank(); ++i) { + dimensions.push_back(literal.GetDynamicSize(i, shape_index)); + } to_string_recursive(dimensions, &indices); } @@ -1374,13 +1527,44 @@ StatusOr LiteralBase::ConvertToShape(const Shape& dest_shape) const { return literal; } +template +void LiteralBase::Piece::CopyElementsWithDynamicBound( + const LiteralBase::Piece& src) { + auto dest_shape = subshape(); + auto src_shape = src.subshape(); + + // At least one shape has to be static as bound. + CHECK(dest_shape.is_static() || src_shape.is_static()); + auto bound_shape = dest_shape.is_static() ? src_shape : dest_shape; + if (ShapeUtil::IsZeroElementArray(dest_shape)) { + return; + } + std::vector index(dest_shape.rank()); + do { + bool out_of_bound = false; + for (int64 i = 0; i < index.size(); ++i) { + // Do not copy elements beyond dynamic bound. + if (index[i] >= GetDynamicSize(i) || index[i] >= src.GetDynamicSize(i)) { + out_of_bound = true; + } + } + if (out_of_bound) { + continue; + } + data()[IndexUtil::MultidimensionalIndexToLinearIndex(dest_shape, + index)] = + src.data()[IndexUtil::MultidimensionalIndexToLinearIndex( + src_shape, index)]; + } while (IndexUtil::BumpIndices(bound_shape, absl::MakeSpan(index))); +} + template bool LiteralBase::Piece::EqualElementsInternal( const LiteralBase::Piece& other, std::vector* multi_index) const { if (multi_index->size() == subshape().rank()) { return (Get(*multi_index) == other.Get(*multi_index)); } - for (int64 i = 0; i < subshape().dimensions(multi_index->size()); ++i) { + for (int64 i = 0; i < GetDynamicSize(multi_index->size()); ++i) { multi_index->push_back(i); if (!EqualElementsInternal(other, multi_index)) { return false; @@ -1390,10 +1574,26 @@ bool LiteralBase::Piece::EqualElementsInternal( return true; } +bool LiteralBase::Piece::EqualDynamicSize( + const LiteralBase::Piece& other) const { + DCHECK(ShapeUtil::Compatible(subshape(), other.subshape())); + if (subshape().is_static()) { + return true; + } + + for (int64 i = 0; i < subshape().rank(); ++i) { + if (GetDynamicSize(i) != other.GetDynamicSize(i)) { + return false; + } + } + return true; +} + bool LiteralBase::Piece::EqualElements(const LiteralBase::Piece& other) const { DCHECK(ShapeUtil::Compatible(subshape(), other.subshape())); - if (ShapeUtil::Equal(subshape(), other.subshape()) && + if (subshape().is_static() && + ShapeUtil::Equal(subshape(), other.subshape()) && LayoutUtil::IsDenseArray(subshape())) { CHECK_EQ(size_bytes(), other.size_bytes()); return memcmp(buffer(), other.buffer(), size_bytes()) == 0; @@ -1436,17 +1636,33 @@ bool LiteralBase::Piece::EqualElements(const LiteralBase::Piece& other) const { } bool LiteralBase::operator==(const LiteralBase& other) const { - if (!ShapeUtil::Compatible(shape(), other.shape())) { + // Checking the structure of tuple literals. Checks for dense arrays are + // performed below. + if (!ShapeUtil::EqualStructure(shape(), other.shape())) { return false; } return root_piece().ForEachSubpieceWithBool( [&](const ShapeIndex& index, const Piece& piece) { + const Piece& other_piece = other.piece(index); + const Shape& subshape = piece.subshape(); + const Shape& other_subshape = other_piece.subshape(); + if (subshape.element_type() != other_subshape.element_type()) { + return false; + } if (!piece.subshape().IsArray()) { return true; } + if (subshape.rank() != other_subshape.rank()) { + return false; + } + + for (int64 i = 0; i < subshape.rank(); ++i) { + if (piece.GetDynamicSize(i) != other_piece.GetDynamicSize(i)) { + return false; + } + } - const Piece& other_piece = other.piece(index); if (!piece.EqualElements(other_piece)) { return false; } @@ -2035,6 +2251,7 @@ void MutableBorrowingLiteral::CopyPieceSubtree(const Shape& shape, } } else if (shape.IsArray()) { dest_piece->set_buffer(src_piece->buffer()); + dest_piece->set_dynamic_size_buffer(src_piece->dynamic_size_buffer()); } else { // If the shape is neither an array nor tuple, then it must be // zero-sized. Otherwise, some memory needs to be allocated for it. diff --git a/tensorflow/compiler/xla/literal.h b/tensorflow/compiler/xla/literal.h index a2be92fbf5b..1ee71618887 100644 --- a/tensorflow/compiler/xla/literal.h +++ b/tensorflow/compiler/xla/literal.h @@ -112,6 +112,10 @@ class LiteralBase { template NativeT Get(absl::Span multi_index) const; + // Get the dynamic size on dim_index in the literal at the given shape_index. + int32 GetDynamicSize(int64 dim_index, const ShapeIndex& shape_index) const; + int32 GetDynamicSize(int64 dim_index) const; + // Returns the element value at index (0, ..., 0), however many zeroes are // required for that index. template @@ -281,6 +285,18 @@ class LiteralBase { // than being limited to a single array within the shape. Literal Relayout(const Shape& shape_with_layout) const; + // Generate a new literal whose static sizes are equal to the previous + // literal's dynamic sizes. + Literal ToStatic() const; + + // Expand a static literal into a new one with a bounded dyanmic literal. The + // static dimensions of the original literal becomes dynamic dimensions of the + // new literal, where the argument `bounded_shape` becomes the bounded shape + // of the new literal. + // + // Precondition: bounded_shape.is_dynamic() + Literal ToBoundedDynamic(const Shape& bounded_shape) const; + // Creates a new literal by reshaping this literal to have the given // dimensions. The total number of elements must not change; The // implementation currently only supports monotonic dim0-major layouts. @@ -354,10 +370,22 @@ class LiteralBase { template void Set(absl::Span index, NativeT value); + int32 GetDynamicSize(int64 dim_index) const; + void SetDynamicSize(int64 dim_index, int32 size); // Gets/sets the buffer holding the array data. char* buffer() const { return buffer_; } void set_buffer(char* buffer) { buffer_ = buffer; } + // Gets/sets the buffer holding dynamic sizes. + int32* dynamic_size_buffer() const { return dynamic_size_buffer_; } + void set_dynamic_size_buffer(int32* dynamic_size_buffer) { + dynamic_size_buffer_ = dynamic_size_buffer; + } + + int64 dynamic_size_buffer_bytes() const { + return subshape().dimensions_size() * sizeof(int32); + } + // Gets or sets the subshape of this piece. This reference points to a // subshape within the shape in the containing Literal (Literal::shape_). const Shape& subshape() const { return *subshape_; } @@ -434,15 +462,21 @@ class LiteralBase { } // Returns true if this piece and 'other' contain the same data. This piece - // and 'other' must be array-shaped and compatible. + // and 'other' must be array-shaped and compatible. If a literal has dynamic + // shape, comparison is done only for the valid elements. bool EqualElements(const Piece& other) const; + // Returns true if this piece and other pieces have the same dynamic + // dimension sizes. + bool EqualDynamicSize(const Piece& other) const; + // Writes the shape and data (if array-shaped) into the given proto. void WriteToProto(LiteralProto* proto) const; // Copy the data from 'src' into this piece's buffer. Shapes of this piece - // and src must be compatible. - Status CopyFrom(const Piece& src); + // and src must be compatible. If only_dynamic_bound is true, only elements + // within dynamic bounds will be copied. + Status CopyFrom(const Piece& src, bool only_dynamic_bound); // Copies the data from the given proto into this piece. The shape of this // piece must be equal (not just compatible) to the shape of the proto. @@ -497,9 +531,15 @@ class LiteralBase { bool EqualElementsInternal(const Piece& other, std::vector* multi_index) const; + // Internal helper to copy elements from another given piece + template + void CopyElementsWithDynamicBound(const LiteralBase::Piece& src); + // For array-shaped pieces, this is the buffer holding the literal data. char* buffer_ = nullptr; + int32* dynamic_size_buffer_ = nullptr; + // The shape of piece. This points into the shape of the containing Literal // (Literal::shape_). const Shape* subshape_ = nullptr; @@ -550,6 +590,11 @@ class MutableLiteralBase : public LiteralBase { // mutate the shape as this can produce malformed Literals. Shape* mutable_shape_do_not_use() { return shape_.get(); } + // Set the dynamic size on dim_index in the literal at the given shape_index. + void SetDynamicSize(int64 dim_index, const ShapeIndex& shape_index, + int32 size); + void SetDynamicSize(int64 dim_index, int32 size); + // Returns a pointer to the underlying buffer holding the array at the given // shape index. CHECKs if the subshape of the literal at the given ShapeIndex // is not array. @@ -560,10 +605,12 @@ class MutableLiteralBase : public LiteralBase { // Copy values from 'src_literal' rooted at 'src_shape_index' into this // literal rooted at 'dest_shape_index'. The subshape of this literal rooted // at 'dest_shape_index' must be compatible with the subshape of 'src_literal' - // rooted at 'src_shape_index', but need not be arrays. + // rooted at 'src_shape_index', but need not be arrays. If only_dynamic_bound + // is true, only elements within dynamic bounds will be copied. Status CopyFrom(const LiteralSlice& src_literal, const ShapeIndex& dest_shape_index = {}, - const ShapeIndex& src_shape_index = {}); + const ShapeIndex& src_shape_index = {}, + bool only_dynamic_bound = false); // Copies the values from src_literal, starting at src_base shape indexes, // to this literal, starting at dest_base, where the copy size in each @@ -924,9 +971,14 @@ void LiteralBase::EachCell( return; } std::vector indices(shape().rank(), 0); + + Shape shape_dynamic = shape(); + for (int64 i = 0; i < shape_dynamic.rank(); ++i) { + shape_dynamic.set_dimensions(i, GetDynamicSize(i)); + } do { per_cell(indices, Get(indices)); - } while (IndexUtil::BumpIndices(shape(), absl::MakeSpan(indices))); + } while (IndexUtil::BumpIndices(shape_dynamic, absl::MakeSpan(indices))); } template diff --git a/tensorflow/compiler/xla/literal_test.cc b/tensorflow/compiler/xla/literal_test.cc index 37316a2a807..a58e450a55a 100644 --- a/tensorflow/compiler/xla/literal_test.cc +++ b/tensorflow/compiler/xla/literal_test.cc @@ -149,6 +149,16 @@ TEST_F(LiteralUtilTest, R2ToString) { EXPECT_EQ(expected, literal.ToString()); } +TEST_F(LiteralUtilTest, R2DynamicToString) { + auto literal = LiteralUtil::CreateR2({{1, 2}, {3, 4}, {5, 6}}); + literal.SetDynamicSize(0, {}, 2); + const string expected = R"(s32[<=3,2](2,2) { + { 1, 2 }, + { 3, 4 } +})"; + EXPECT_EQ(expected, literal.ToString()); +} + TEST_F(LiteralUtilTest, R3ToString) { const auto literal = LiteralUtil::CreateR3({{{1}, {2}}, {{3}, {4}}, {{5}, {6}}}); @@ -421,6 +431,28 @@ TEST_F(LiteralUtilTest, TupleEquality) { EXPECT_NE(tuple1, different_tuple); } +TEST_F(LiteralUtilTest, DynamicShapeEquality) { + // Test equality with tuples. + auto r1 = LiteralUtil::CreateR1({1.0, 2.0}); + r1.SetDynamicSize(0, {}, 1); + auto r2 = LiteralUtil::CreateR2({{1.0, 2.0}, {3.0, 4.0}}); + r2.SetDynamicSize(0, {}, 1); + auto tuple1 = LiteralUtil::MakeTuple({&r1, &r2}); + + // Tuple with the same elements. One element is shared with the original + // tuple, the other is a clone of the element in the original tuple. + auto r1_clone = LiteralUtil::CreateR1({1.0, 3.0}); + r1_clone.SetDynamicSize(0, {}, 1); + auto tuple2 = LiteralUtil::MakeTuple({&r1_clone, &r2}); + EXPECT_EQ(tuple1, tuple2); + + // Tuple with different dynamic sizes. + auto r2_clone = LiteralUtil::CreateR2({{1.0, 2.0}, {3.0, 4.0}}); + r2_clone.SetDynamicSize(0, {}, 2); + auto tuple_3 = LiteralUtil::MakeTuple({&r1_clone, &r2_clone}); + EXPECT_NE(tuple1, tuple_3); +} + TEST_F(LiteralUtilTest, C64Equality) { // Test equality with tuples. auto vector = LiteralUtil::CreateR1({{1.0, 2.0}, {3.0, 4.0}}); @@ -692,6 +724,47 @@ TEST_F(LiteralUtilTest, TransposeR4) { }); } +TEST_F(LiteralUtilTest, TransposeDynamicR2) { + // F32[2, <=3] (2, 1) + auto original = LiteralUtil::CreateR2({{1, 2, 3}, {4, 5, 6}}); + original.SetDynamicSize(1, 1); + // F32[<=3, 2] (1, 2) + auto reshape = original.Transpose(/*permutation=*/{1, 0}); + + reshape.EachCell([&](absl::Span indices, float value) { + EXPECT_EQ(value, original.Get({indices[1], indices[0]})); + }); +} + +TEST_F(LiteralUtilTest, ToStaticR2) { + // F32[2, <=3] (2, 1) + auto original = LiteralUtil::CreateR2({{1, 2, 3}, {4, 5, 6}}); + original.SetDynamicSize(1, 1); + // F32[2, 1] + auto static_literal = original.ToStatic(); + EXPECT_EQ(static_literal.shape(), ShapeUtil::MakeShape(F32, {2, 1})); + EXPECT_TRUE(static_literal.shape().is_static()); + + static_literal.EachCell( + [&](absl::Span indices, float value) { + EXPECT_EQ(value, original.Get({indices[0], indices[1]})); + }); +} + +TEST_F(LiteralUtilTest, ToBoundedDynamicR2) { + // F32[2, 1] + auto original = LiteralUtil::CreateR2({{1}, {4}}); + // F32[2, <=3] (2, 1) + auto dynamic_shape = ShapeUtil::MakeShape(F32, {2, 3}, {false, true}); + auto dynamic_literal = original.ToBoundedDynamic(dynamic_shape); + EXPECT_EQ(dynamic_literal.shape(), dynamic_shape); + + dynamic_literal.EachCell( + [&](absl::Span indices, float value) { + EXPECT_EQ(value, original.Get({indices[0], indices[1]})); + }); +} + TEST_F(LiteralUtilTest, TestR4RelayoutEquivalence) { // Tests that using Relayout on an array is equivalent to creating it in the // target layout in the first place. @@ -797,6 +870,38 @@ TEST_F(LiteralUtilTest, SliceR3U32Full) { EXPECT_EQ(input_2x3x2, result); } +TEST_F(LiteralUtilTest, SliceR2Dynamic) { + auto input_3x4 = LiteralUtil::CreateR2( + {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}}); + input_3x4.SetDynamicSize(1, 3); + // slice second dim from dynamic size 3 to dynamic size 1. + auto result = input_3x4.Slice({0, 1}, {2, 2}); + auto expected = LiteralUtil::CreateR2({{2}, {6}}); + EXPECT_EQ(expected, result); + EXPECT_EQ(result.GetDynamicSize(1), 1); +} + +TEST_F(LiteralUtilTest, SliceR2DynamicInBound) { + auto input_3x4 = LiteralUtil::CreateR2( + {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}}); + input_3x4.SetDynamicSize(1, 1); + auto result = input_3x4.Slice({0, 0}, {2, 2}); + auto expected = LiteralUtil::CreateR2({{1}, {5}}); + EXPECT_EQ(expected, result); + EXPECT_EQ(result.GetDynamicSize(1), 1); +} + +TEST_F(LiteralUtilTest, SliceR2DynamicOutOfBound) { + auto input_3x4 = LiteralUtil::CreateR2( + {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}}); + input_3x4.SetDynamicSize(1, 1); + auto result = input_3x4.Slice({0, 1}, {2, 3}); + auto expected = LiteralUtil::CreateR2({{}, {}}); + EXPECT_EQ(expected, result); + // Out of bound access clamps into 0 sized dimension. + EXPECT_EQ(result.GetDynamicSize(1), 0); +} + TEST_F(LiteralUtilTest, PopulateR1S64) { Literal output(ShapeUtil::MakeShape(S64, {1})); output.PopulateR1({77}); @@ -1510,7 +1615,7 @@ TEST_F(LiteralUtilTest, CopyFromProto_u16) { EXPECT_EQ(u1, r[3]); } -TEST_F(LiteralUtilTest, LiteralSliceTest) { +TEST_F(LiteralUtilTest, LiteralDynamicSliceTest) { auto scalar = LiteralUtil::CreateR0(1.0); auto matrix = LiteralUtil::CreateR2({{1.0, 2.0}, {3.0, 4.0}}); auto tuple = LiteralUtil::MakeTuple({&scalar, &matrix}); @@ -1973,6 +2078,17 @@ TEST_F(LiteralUtilTest, BroadcastScalarToMatrix) { LiteralUtil::CreateR2({{9, 9}, {9, 9}})); } +TEST_F(LiteralUtilTest, DynamicBroadcast) { + Literal literal = LiteralUtil::CreateR1({1, 2}); + literal.SetDynamicSize(0, 1); + TF_ASSERT_OK_AND_ASSIGN( + Literal broadcasted_literal, + literal.Broadcast(/*result_shape=*/ShapeUtil::MakeShape(S64, {2, 2}), + /*dimensions=*/{1})); + EXPECT_EQ(broadcasted_literal, LiteralUtil::CreateR2({{1}, {1}})); + EXPECT_EQ(broadcasted_literal.GetDynamicSize(1), 1); +} + TEST_F(LiteralUtilTest, GetAsComplex128) { complex128 value = {1, 0}; Literal c1 = LiteralUtil::CreateR0(value); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index ae8f49df4b4..66e9e01fc38 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -440,6 +440,10 @@ Status HloEvaluator::HandleSetDimensionSize( Literal result(set_dimension_size->shape()); memcpy(result.untyped_data(), operand_literal.untyped_data(), operand_literal.size_bytes()); + const Literal& size_literal = + GetEvaluatedLiteralFor(set_dimension_size->operand(1)); + result.SetDynamicSize(set_dimension_size->dimension(), + size_literal.Get({})); evaluated_[set_dimension_size] = std::move(result); return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/interpreter/executable_base.cc b/tensorflow/compiler/xla/service/interpreter/executable_base.cc index 4b020ea2d32..4b6a8aa5202 100644 --- a/tensorflow/compiler/xla/service/interpreter/executable_base.cc +++ b/tensorflow/compiler/xla/service/interpreter/executable_base.cc @@ -81,8 +81,17 @@ StatusOr InterpreterExecutableBase::ExecuteAsyncOnStream( for (int64 i = 0; i < computation->num_parameters(); ++i) { const auto& expected_shape = computation->parameter_instruction(i)->shape(); const auto& actual_shape = argument_buffers[i].on_device_shape(); - if (!Shape::Equal().MinorToMajorOnlyInLayout()(expected_shape, - actual_shape)) { + bool shape_match = true; + if (expected_shape.is_dynamic()) { + if (!ShapeUtil::DynamicArrayShapeIsCompatible(actual_shape, + expected_shape)) { + shape_match = false; + } + } else if (!Shape::Equal().MinorToMajorOnlyInLayout()(expected_shape, + actual_shape)) { + shape_match = false; + } + if (!shape_match) { return InvalidArgument( "Shape mismatch on parameter %d. Expected %s, but was %s.", i, ShapeUtil::HumanStringWithLayout(expected_shape), @@ -100,11 +109,18 @@ StatusOr InterpreterExecutableBase::ExecuteAsyncOnStream( TF_ASSIGN_OR_RETURN(Literal arg_literal, transfer_manager->TransferLiteralFromDevice( run_options->stream(), argument_buffers[p])); + const auto& expected_shape = computation->parameter_instruction(p)->shape(); + if (expected_shape.is_dynamic()) { + // Expand the input literal to expected shape. + arg_literal = arg_literal.ToBoundedDynamic(expected_shape); + } arg_literals.push_back(std::move(arg_literal)); } TF_ASSIGN_OR_RETURN(Literal result_literal, Evaluate(*computation, arg_literals)); + // Shrink the generated dynamic shape into static shape. + result_literal = result_literal.ToStatic(); // Transform the result literal back into a ShapedBuffer. TF_ASSIGN_OR_RETURN(ScopedShapedBuffer result_buffers, diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index bce40578132..6e452293232 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -339,6 +339,15 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( TF_DCHECK_OK(ValidateShape(*shape)); } +/* static */ void ShapeUtil::CopyDynamicDimensions(Shape* to, + const Shape& from) { + CHECK_EQ(to->rank(), from.rank()); + for (int64 i = 0; i < from.rank(); ++i) { + to->set_dynamic_dimension(i, from.is_dynamic_dimension(i)); + } + TF_DCHECK_OK(ValidateShape(*to)); +} + /* static */ bool ShapeUtil::ElementIsIntegral(const Shape& shape) { return primitive_util::IsIntegralType(shape.element_type()); } diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index fe1a8acf6e4..3789d828528 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -377,6 +377,9 @@ class ShapeUtil { // Appends a major dimension to the shape with the given bound. static void AppendMajorDimension(int bound, Shape* shape); + // Copy the dynamic dimensions property from one shape to another. + static void CopyDynamicDimensions(Shape* to, const Shape& from); + // Returns an empty tuple shape. Can be used as a sentinel Shape value. static Shape MakeNil() { return MakeTupleShape({}); } From 32bb13daceb134c90741207e7a54a4562fb578f6 Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Fri, 17 Jul 2020 11:11:52 -0700 Subject: [PATCH 0716/2522] Allow access SaveOptions through SaveContext Distributed variables needs to behave differently when tracing functions with different SaveOptions. We need to access SaveContext in tf.distribute code instead of the another way around because we may have a handle to the strategy in saving code. PiperOrigin-RevId: 321815477 Change-Id: Ib69f6d42c60e198c0e8e174f76bc9424e21df5b5 --- tensorflow/python/distribute/BUILD | 4 +- tensorflow/python/distribute/values_test.py | 4 +- tensorflow/python/saved_model/BUILD | 12 +++ tensorflow/python/saved_model/save.py | 2 +- tensorflow/python/saved_model/save_context.py | 20 ++++- .../python/saved_model/save_context_test.py | 87 +++++++++++++++++++ 6 files changed, 123 insertions(+), 6 deletions(-) create mode 100644 tensorflow/python/saved_model/save_context_test.py diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index f8c04744c5b..0468c90292d 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -1145,6 +1145,8 @@ distribute_py_test( deps = [ ":combinations", ":distribute_lib", + ":distribute_utils", + ":packed_distributed_variable", ":strategy_combinations", ":test_util", ":tpu_strategy", @@ -1174,7 +1176,7 @@ distribute_py_test( "//tensorflow/python/eager:def_function", "//tensorflow/python/eager:test", "//tensorflow/python/saved_model:save_context", - "//tensorflow/python/saved_model/model_utils:mode_keys", + "//tensorflow/python/saved_model:save_options", "//tensorflow/python/tpu:tpu_lib", "//tensorflow/python/types", "@absl_py//absl/testing:parameterized", diff --git a/tensorflow/python/distribute/values_test.py b/tensorflow/python/distribute/values_test.py index ec4447951ad..73d15ce4aea 100644 --- a/tensorflow/python/distribute/values_test.py +++ b/tensorflow/python/distribute/values_test.py @@ -56,6 +56,7 @@ from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib from tensorflow.python.saved_model import save_context +from tensorflow.python.saved_model import save_options from tensorflow.python.tpu import tpu_strategy_util from tensorflow.python.training import saver as saver_lib from tensorflow.python.training.tracking import util as trackable_utils @@ -597,7 +598,8 @@ class PackedDistributedVariableTest(test.TestCase, parameterized.TestCase): self.assertIsInstance( v._packed_variable, packed.PackedDistributedVariable) - with save_context.save_context(): + options = save_options.SaveOptions() + with save_context.save_context(options): self.assertIsNone(v._packed_variable) diff --git a/tensorflow/python/saved_model/BUILD b/tensorflow/python/saved_model/BUILD index fc84eaf6c26..858fa10a1eb 100644 --- a/tensorflow/python/saved_model/BUILD +++ b/tensorflow/python/saved_model/BUILD @@ -302,6 +302,18 @@ py_strict_library( deps = [], ) +tf_py_test( + name = "save_context_test", + srcs = ["save_context_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":save_context", + ":save_options", + "//tensorflow/python:extra_py_tests_deps", + "//tensorflow/python/eager:test", + ], +) + py_strict_library( name = "save", srcs = [ diff --git a/tensorflow/python/saved_model/save.py b/tensorflow/python/saved_model/save.py index 6631a85f12c..33780c14db8 100644 --- a/tensorflow/python/saved_model/save.py +++ b/tensorflow/python/saved_model/save.py @@ -1143,6 +1143,6 @@ def _build_meta_graph(obj, options, meta_graph_def=None): """Creates a MetaGraph under a SaveContext.""" - with save_context.save_context(): + with save_context.save_context(options): return _build_meta_graph_impl(obj, export_dir, signatures, options, meta_graph_def) diff --git a/tensorflow/python/saved_model/save_context.py b/tensorflow/python/saved_model/save_context.py index 53d92587247..68fce83ae81 100644 --- a/tensorflow/python/saved_model/save_context.py +++ b/tensorflow/python/saved_model/save_context.py @@ -28,12 +28,20 @@ class SaveContext(threading.local): def __init__(self): super(SaveContext, self).__init__() self._in_save_context = False + self._options = None - def enter_save_context(self): + def options(self): + if not self.in_save_context(): + raise ValueError("not in a SaveContext") + return self._options + + def enter_save_context(self, options): self._in_save_context = True + self._options = options def exit_save_context(self): self._in_save_context = False + self._options = None def in_save_context(self): return self._in_save_context @@ -42,8 +50,10 @@ _save_context = SaveContext() @contextlib.contextmanager -def save_context(): - _save_context.enter_save_context() +def save_context(options): + if in_save_context(): + raise ValueError("already in a SaveContext") + _save_context.enter_save_context(options) try: yield finally: @@ -54,3 +64,7 @@ def in_save_context(): """Returns whether under a save context.""" return _save_context.in_save_context() + +def get_save_options(): + """Returns the save options if under a save context.""" + return _save_context.options() diff --git a/tensorflow/python/saved_model/save_context_test.py b/tensorflow/python/saved_model/save_context_test.py new file mode 100644 index 00000000000..424a3ea0d3b --- /dev/null +++ b/tensorflow/python/saved_model/save_context_test.py @@ -0,0 +1,87 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Test for SaveContext.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import threading + +from tensorflow.python.eager import test +from tensorflow.python.saved_model import save_context +from tensorflow.python.saved_model import save_options + + +class SaveContextTest(test.TestCase): + + def test_multi_thread(self): + self.assertFalse(save_context.in_save_context()) + with self.assertRaisesRegex(ValueError, 'not in a SaveContext'): + save_context.get_save_options() + + options = save_options.SaveOptions(save_debug_info=True) + with save_context.save_context(options): + self.assertTrue(save_context.in_save_context()) + self.assertTrue(save_context.get_save_options().save_debug_info) + + entered_context_in_thread = threading.Event() + continue_thread = threading.Event() + + def thread_fn(): + self.assertFalse(save_context.in_save_context()) + with self.assertRaisesRegex(ValueError, 'not in a SaveContext'): + save_context.get_save_options() + + options = save_options.SaveOptions(save_debug_info=False) + with save_context.save_context(options): + self.assertTrue(save_context.in_save_context()) + # save_debug_info has a different value in this thread. + self.assertFalse(save_context.get_save_options().save_debug_info) + entered_context_in_thread.set() + continue_thread.wait() + + self.assertFalse(save_context.in_save_context()) + with self.assertRaisesRegex(ValueError, 'not in a SaveContext'): + save_context.get_save_options() + + t = threading.Thread(target=thread_fn) + t.start() + + entered_context_in_thread.wait() + # Another thread shouldn't affect this thread. + self.assertTrue(save_context.in_save_context()) + self.assertTrue(save_context.get_save_options().save_debug_info) + + continue_thread.set() + t.join() + # Another thread exiting SaveContext shouldn't affect this thread. + self.assertTrue(save_context.in_save_context()) + self.assertTrue(save_context.get_save_options().save_debug_info) + + self.assertFalse(save_context.in_save_context()) + with self.assertRaisesRegex(ValueError, 'not in a SaveContext'): + save_context.get_save_options() + + def test_enter_multiple(self): + options = save_options.SaveOptions() + with self.assertRaisesRegex(ValueError, 'already in a SaveContext'): + with save_context.save_context(options): + with save_context.save_context(options): + pass + + +if __name__ == '__main__': + test.main() From af89635f3fa12235305b6100febde6306df45777 Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Fri, 17 Jul 2020 11:15:22 -0700 Subject: [PATCH 0717/2522] Make parse_example_dataset autoshardable. This CL enables autosharding to recurse past ParseExampleDataset when looking for a list of filenames to shard. Previously, autoshard would need to fall back on data-level sharding when ParseExampleDataset is present. PiperOrigin-RevId: 321816217 Change-Id: I179f88b139d1c1365cd1f60d53519f7998f75876 --- .../grappler/optimizers/data/auto_shard.cc | 3 +- .../kernel_tests/auto_shard_dataset_test.py | 47 +++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/data/auto_shard.cc b/tensorflow/core/grappler/optimizers/data/auto_shard.cc index 3e8583d74e9..f688c1f359f 100644 --- a/tensorflow/core/grappler/optimizers/data/auto_shard.cc +++ b/tensorflow/core/grappler/optimizers/data/auto_shard.cc @@ -64,7 +64,7 @@ constexpr std::array kMultipleInputsDatasetOps = { "ZipDataset" }; -constexpr std::array kPassThroughOps = { +constexpr std::array kPassThroughOps = { "_Retval", "AssertNextDataset", "BatchDataset", @@ -83,6 +83,7 @@ constexpr std::array kPassThroughOps = { "OptimizeDataset", "ParallelMapDataset", "ParallelMapDatasetV2", + "ParseExampleDatasetV2", "PrefetchDataset", "ReduceDataset", "RebatchDataset", diff --git a/tensorflow/python/data/experimental/kernel_tests/auto_shard_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/auto_shard_dataset_test.py index cd3432f28b5..c2aa406d260 100644 --- a/tensorflow/python/data/experimental/kernel_tests/auto_shard_dataset_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/auto_shard_dataset_test.py @@ -17,8 +17,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os + from absl.testing import parameterized +from tensorflow.core.example import example_pb2 +from tensorflow.core.example import feature_pb2 from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base from tensorflow.python.data.experimental.ops import cardinality from tensorflow.python.data.experimental.ops import distribute @@ -31,7 +35,10 @@ from tensorflow.python.data.kernel_tests import test_base from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import readers as core_readers from tensorflow.python.framework import combinations +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.lib.io import python_io +from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import string_ops from tensorflow.python.platform import test @@ -445,6 +452,46 @@ class AutoShardDatasetTest(reader_dataset_ops_test_base.TFRecordDatasetTestBase, ] self.assertDatasetProduces(dataset, list(chunk(expected, 5))) + @combinations.generate(test_base.default_test_combinations()) + def testMakeBatchedFeaturesDataset(self): + files = 2 + records_per_file = 5 + + def make_record(file_index): + example = example_pb2.Example( + features=feature_pb2.Features( + feature={ + "file": + feature_pb2.Feature( + int64_list=feature_pb2.Int64List(value=[file_index])), + })) + return example.SerializeToString() + + filenames = [] + for file_index in range(files): + filename = os.path.join(self.get_temp_dir(), + "tf_record.%d.txt" % file_index) + filenames.append(filename) + writer = python_io.TFRecordWriter(filename) + for _ in range(records_per_file): + writer.write(make_record(file_index)) + writer.close() + + dataset = readers.make_batched_features_dataset( + file_pattern=filenames, + batch_size=records_per_file, + features={ + "file": parsing_ops.FixedLenFeature([], dtypes.int64), + }, + reader=core_readers.TFRecordDataset, + num_epochs=1) + # We should shard at the file level, so that all records come from file 0. + dataset = distribute._AutoShardDataset(dataset, 2, 0) + dataset = dataset.unbatch() + output = self.getDatasetOutput(dataset) + files = [elem["file"] for elem in output] + self.assertEqual(files, [0] * records_per_file) + class AutoShardTextLineDatasetTest( reader_dataset_ops_test_base.TextLineDatasetTestBase, From 391ebea26671f25fcddd1315861d535605f6ff03 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Fri, 17 Jul 2020 11:42:51 -0700 Subject: [PATCH 0718/2522] Add a meaningful repr to KerasTensors, Update the KerasTensor docstring, and update some of the doctests to not fail w/ the new KerasTensor repr. PiperOrigin-RevId: 321822118 Change-Id: Iad58dddac5362301b6a06c532e58378f56e3b9ac --- tensorflow/python/keras/engine/BUILD | 15 ++++ .../python/keras/engine/keras_tensor.py | 38 +++++++-- .../python/keras/engine/keras_tensor_test.py | 85 +++++++++++++++++++ .../python/keras/layers/einsum_dense.py | 6 +- tensorflow/python/ops/array_ops.py | 2 +- 5 files changed, 133 insertions(+), 13 deletions(-) create mode 100644 tensorflow/python/keras/engine/keras_tensor_test.py diff --git a/tensorflow/python/keras/engine/BUILD b/tensorflow/python/keras/engine/BUILD index c64e38122e9..5f65923e6fe 100644 --- a/tensorflow/python/keras/engine/BUILD +++ b/tensorflow/python/keras/engine/BUILD @@ -322,6 +322,21 @@ tf_py_test( ], ) +tf_py_test( + name = "keras_tensor_test", + size = "small", + srcs = ["keras_tensor_test.py"], + python_version = "PY3", + tags = [ + "nomac", # TODO(mihaimaruseac): b/127695564 + ], + deps = [ + "//tensorflow/python:client_testlib", + "//tensorflow/python/keras", + "@absl_py//absl/testing:parameterized", + ], +) + tf_py_test( name = "input_spec_test", size = "small", diff --git a/tensorflow/python/keras/engine/keras_tensor.py b/tensorflow/python/keras/engine/keras_tensor.py index 8ab6f674243..6bdd11d8ec3 100644 --- a/tensorflow/python/keras/engine/keras_tensor.py +++ b/tensorflow/python/keras/engine/keras_tensor.py @@ -51,16 +51,15 @@ def keras_tensors_enabled(): class KerasTensor(object): """A representation of a Keras in/output during Functional API construction. - `KerasTensor`s are an alternative representation for Keras `Inputs` - and for intermediate outputs of layers during Functional API construction of - models. They are a lightweight data structure comprised of only the - `tf.TypeSpec` of the Tensor that will be consumed/produced in the - corresponding position of the model. + `KerasTensor`s are tensor-like objects that represent the symbolic inputs + and outputs of Keras layers during Functional model construction. They are + compromised of the `tf.TypeSpec` of the Tensor that will be + consumed/produced in the corresponding position of the model. - They implement just small subset of `tf.Tensor`'s attributes and - methods, and also overload - the same operators as `tf.Tensor` and automatically turn them into - Keras layers in the model. + They implement `tf.Tensor`'s attributes and methods, and also overload + the same operators as `tf.Tensor`. Passing a KerasTensor to a TF API that + supports dispatching will automatically turn that API call into a lambda + layer in the Functional model. `KerasTensor`s are still internal-only and are a work in progress, but they have several advantages over using a graph `tf.Tensor` to represent @@ -150,6 +149,27 @@ class KerasTensor(object): else: self._type_spec._shape = shape # pylint: disable=protected-access + def __repr__(self): + symbolic_description = '' + inferred_value_string = '' + if isinstance(self.type_spec, tensor_spec.TensorSpec): + type_spec_string = 'shape=%s dtype=%s' % (self.shape, self.dtype.name) + else: + type_spec_string = 'type_spec=%s' % self.type_spec + + if hasattr(self, '_keras_history'): + layer = self._keras_history.layer + node_index = self._keras_history.node_index + tensor_index = self._keras_history.tensor_index + symbolic_description = ( + ' (Symbolic value %s from symbolic call %s of layer \'%s\')' % ( + tensor_index, node_index, layer.name)) + if self._inferred_shape_value is not None: + inferred_value_string = ( + ' inferred_value=\'%s\'' % self._inferred_shape_value) + return '' % ( + type_spec_string, inferred_value_string, symbolic_description) + @property def dtype(self): """Returns the `dtype` of elements in the tensor.""" diff --git a/tensorflow/python/keras/engine/keras_tensor_test.py b/tensorflow/python/keras/engine/keras_tensor_test.py new file mode 100644 index 00000000000..63e117effec --- /dev/null +++ b/tensorflow/python/keras/engine/keras_tensor_test.py @@ -0,0 +1,85 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""InputSpec tests.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import tensor_spec +from tensorflow.python.keras import layers +from tensorflow.python.keras import testing_utils +from tensorflow.python.keras.engine import keras_tensor +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test + + +class KerasTensorTest(test.TestCase): + + def test_repr(self): + kt = keras_tensor.KerasTensor( + type_spec=tensor_spec.TensorSpec(shape=(1, 2, 3), dtype=dtypes.float32)) + expected_repr = "" + self.assertEqual(expected_repr, str(kt)) + self.assertEqual(expected_repr, repr(kt)) + + kt = keras_tensor.KerasTensor( + type_spec=tensor_spec.TensorSpec(shape=(2,), dtype=dtypes.int32), + inferred_shape_value=[2, 3]) + expected_repr = ( + "") + self.assertEqual(expected_repr, str(kt)) + self.assertEqual(expected_repr, repr(kt)) + + kt = keras_tensor.KerasTensor( + type_spec=sparse_tensor.SparseTensorSpec( + shape=(1, 2, 3), dtype=dtypes.float32)) + expected_repr = ( + "") + self.assertEqual(expected_repr, str(kt)) + self.assertEqual(expected_repr, repr(kt)) + + with testing_utils.use_keras_tensors_scope(True): + inp = layers.Input(shape=(3, 5)) + kt = layers.Dense(10)(inp) + expected_repr = ( + "") + self.assertEqual(expected_repr, str(kt)) + self.assertEqual(expected_repr, repr(kt)) + + kt = array_ops.reshape(kt, shape=(3, 5, 2)) + expected_repr = ("") + self.assertEqual(expected_repr, str(kt)) + self.assertEqual(expected_repr, repr(kt)) + + kts = array_ops.unstack(kt) + for i in range(3): + expected_repr = ("" % i) + self.assertEqual(expected_repr, str(kts[i])) + self.assertEqual(expected_repr, repr(kts[i])) + +if __name__ == "__main__": + ops.enable_eager_execution() + tensor_shape.enable_v2_tensorshape() + test.main() diff --git a/tensorflow/python/keras/layers/einsum_dense.py b/tensorflow/python/keras/layers/einsum_dense.py index 7b5bd085703..f8f2e01058d 100644 --- a/tensorflow/python/keras/layers/einsum_dense.py +++ b/tensorflow/python/keras/layers/einsum_dense.py @@ -73,7 +73,7 @@ class EinsumDense(Layer): >>> input_tensor = tf.keras.Input(shape=[32]) >>> output_tensor = layer(input_tensor) >>> output_tensor - + <... shape=(None, 64) dtype=...> **Applying a dense layer to a sequence** @@ -89,7 +89,7 @@ class EinsumDense(Layer): >>> input_tensor = tf.keras.Input(shape=[32, 128]) >>> output_tensor = layer(input_tensor) >>> output_tensor - + <... shape=(None, 32, 64) dtype=...> **Applying a dense layer to a sequence using ellipses** @@ -106,7 +106,7 @@ class EinsumDense(Layer): >>> input_tensor = tf.keras.Input(shape=[32, 128]) >>> output_tensor = layer(input_tensor) >>> output_tensor - + <... shape=(None, 32, 64) dtype=...> """ def __init__(self, diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index cc41f5f1a3c..b8711a444a8 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -591,7 +591,7 @@ def shape_v2(input, out_type=dtypes.int32, name=None): >>> a = tf.keras.layers.Input((None, 10)) >>> tf.shape(a) - + <... shape=(3,) dtype=int32...> In these cases, using `tf.Tensor.shape` will return more informative results. From 5ca8c1bcb9c1a0d6f411269fc2300cd7e2d467a5 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Fri, 17 Jul 2020 11:49:21 -0700 Subject: [PATCH 0719/2522] Canonicalize dense array conditional into tuple conditional with one element. It's annoying do deal with the fact that conditional can be either tuple or non-tuple. Canonicalize everything into tuple. PiperOrigin-RevId: 321823386 Change-Id: I8bfe798bd1b4af9c3ffd169fa6b497c8b2f92b4a --- tensorflow/compiler/xla/service/BUILD | 33 +++++++++ .../xla/service/conditional_canonicalizer.cc | 60 ++++++++++++++++ .../xla/service/conditional_canonicalizer.h | 38 ++++++++++ .../service/conditional_canonicalizer_test.cc | 72 +++++++++++++++++++ tensorflow/compiler/xla/service/cpu/BUILD | 1 + .../compiler/xla/service/cpu/cpu_compiler.cc | 2 + tensorflow/compiler/xla/service/gpu/BUILD | 1 + .../compiler/xla/service/gpu/gpu_compiler.cc | 3 +- 8 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 tensorflow/compiler/xla/service/conditional_canonicalizer.cc create mode 100644 tensorflow/compiler/xla/service/conditional_canonicalizer.h create mode 100644 tensorflow/compiler/xla/service/conditional_canonicalizer_test.cc diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 52cba3837dd..8d267affdd9 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -3932,6 +3932,39 @@ tf_cc_test( ], ) +cc_library( + name = "conditional_canonicalizer", + srcs = ["conditional_canonicalizer.cc"], + hdrs = ["conditional_canonicalizer.h"], + deps = [ + ":hlo", + ":hlo_pass", + "//tensorflow/compiler/xla:status_macros", + ], +) + +tf_cc_test( + name = "conditional_canonicalizer_test", + srcs = ["conditional_canonicalizer_test.cc"], + deps = [ + ":conditional_canonicalizer", + ":hlo", + ":hlo_matchers", + ":hlo_parser", + "//tensorflow/compiler/xla:literal", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto_cc", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/compiler/xla/tests:test_utils", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + cc_library( name = "hlo_get_dimension_size_rewriter", srcs = ["hlo_get_dimension_size_rewriter.cc"], diff --git a/tensorflow/compiler/xla/service/conditional_canonicalizer.cc b/tensorflow/compiler/xla/service/conditional_canonicalizer.cc new file mode 100644 index 00000000000..3d917eb39fe --- /dev/null +++ b/tensorflow/compiler/xla/service/conditional_canonicalizer.cc @@ -0,0 +1,60 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/conditional_canonicalizer.h" + +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/status_macros.h" + +namespace xla { +namespace { +Status CanonicalizeNonTupleConditional(HloInstruction* conditional) { + TF_RET_CHECK(conditional->opcode() == HloOpcode::kConditional); + for (auto* branch : conditional->called_computations()) { + HloInstruction* root = branch->root_instruction(); + TF_RET_CHECK(!root->shape().IsTuple()); + + HloInstruction* tuple = + branch->AddInstruction(HloInstruction::CreateTuple({root})); + branch->set_root_instruction(tuple, /*accept_different_shape=*/true); + } + auto root_shape = conditional->shape(); + *conditional->mutable_shape() = ShapeUtil::MakeTupleShape({root_shape}); + auto gte = conditional->parent()->AddInstruction( + HloInstruction::CreateGetTupleElement(root_shape, conditional, 0)); + TF_RETURN_IF_ERROR(conditional->ReplaceAllUsesWithDifferentShape(gte)); + return Status::OK(); +} +} // namespace + +StatusOr ConditionalCanonicalizer::Run(HloModule* module) { + XLA_VLOG_LINES( + 2, "ConditionalCanonicalizer::Run(), before:\n" + module->ToString()); + bool changed = false; + for (auto* comp : module->MakeNonfusionComputations()) { + for (auto* inst : comp->MakeInstructionPostOrder()) { + if (inst->opcode() == HloOpcode::kConditional && + !inst->shape().IsTuple()) { + TF_RETURN_IF_ERROR(CanonicalizeNonTupleConditional(inst)); + changed = true; + } + } + } + XLA_VLOG_LINES( + 2, "ConditionalCanonicalizer::Run(), after:\n" + module->ToString()); + return changed; +} +} // namespace xla diff --git a/tensorflow/compiler/xla/service/conditional_canonicalizer.h b/tensorflow/compiler/xla/service/conditional_canonicalizer.h new file mode 100644 index 00000000000..a390d87a007 --- /dev/null +++ b/tensorflow/compiler/xla/service/conditional_canonicalizer.h @@ -0,0 +1,38 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CONDITIONAL_CANONICALIZER_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_CONDITIONAL_CANONICALIZER_H_ + +#include + +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_pass_interface.h" + +namespace xla { + +// Canonicalize output of conditionals, make non-tuple outputs into tuple with +// single element output. After this pass, all conditional instructions have +// tuple outputs. +class ConditionalCanonicalizer : public HloModulePass { + public: + absl::string_view name() const override { + return "conditional canonicalizer"; + } + + StatusOr Run(HloModule* module) override; +}; +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CONDITIONAL_CANONICALIZER_H_ diff --git a/tensorflow/compiler/xla/service/conditional_canonicalizer_test.cc b/tensorflow/compiler/xla/service/conditional_canonicalizer_test.cc new file mode 100644 index 00000000000..498260cbabf --- /dev/null +++ b/tensorflow/compiler/xla/service/conditional_canonicalizer_test.cc @@ -0,0 +1,72 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/conditional_canonicalizer.h" + +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_matchers.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_utils.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +namespace op = xla::testing::opcode_matchers; + +class ConditionalCanonicalizerTest : public HloTestBase { + protected: + ConditionalCanonicalizerTest() {} +}; + +TEST_F(ConditionalCanonicalizerTest, DenseArrayConditionalRewrite) { + auto module = ParseAndReturnVerifiedModule(R"( +HloModule _ +true_branch { + true_param = (s32[3,2]) parameter(0) + ROOT root = s32[] constant(0) +} + +false_branch { + false_param = (s32[3,2]) parameter(0) + ROOT root = s32[] constant(1) +} + +ENTRY entry { + param0 = s32[3,2] parameter(0) + branch = pred[] constant(false) + param_tuple = (s32[3 ,2]) tuple(param0) + ROOT conditional = s32[] conditional(branch, param_tuple, param_tuple), + true_computation=true_branch, false_computation=false_branch +} +)") + .ValueOrDie(); + ConditionalCanonicalizer pass; + EXPECT_TRUE(pass.Run(module.get()).ValueOrDie()); + EXPECT_THAT(module->entry_computation()->root_instruction(), + op::GetTupleElement(op::Conditional())); +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index ac167b00bb3..782d08296f0 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -138,6 +138,7 @@ cc_library( "//tensorflow/compiler/xla/service:rng_bit_generator_expander", "//tensorflow/compiler/xla/service:tree_reduction_rewriter", "//tensorflow/compiler/xla/service:hlo_get_dimension_size_rewriter", + "//tensorflow/compiler/xla/service:conditional_canonicalizer", "//tensorflow/compiler/xla/service:conditional_to_select", "//tensorflow/compiler/xla/service:slow_operation_alarm", "//tensorflow/compiler/xla/service:scatter_expander", diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index 5464cfee082..04d703fdd59 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -54,6 +54,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/call_inliner.h" #include "tensorflow/compiler/xla/service/cholesky_expander.h" +#include "tensorflow/compiler/xla/service/conditional_canonicalizer.h" #include "tensorflow/compiler/xla/service/conditional_simplifier.h" #include "tensorflow/compiler/xla/service/conditional_to_select.h" #include "tensorflow/compiler/xla/service/convolution_group_converter.h" @@ -284,6 +285,7 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn( /*rewrite_grad_op=*/true); pipeline.AddPass( /*expansion_type=*/LogisticExpansionType::kExp); + pipeline.AddPass(); pipeline.AddPass(); pipeline.AddPass(); pipeline.AddPass(); diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index b22f258bac6..7b1d3e213ce 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -1168,6 +1168,7 @@ cc_library( "//tensorflow/compiler/xla/service:batchnorm_expander", "//tensorflow/compiler/xla/service:buffer_assignment", "//tensorflow/compiler/xla/service:call_inliner", + "//tensorflow/compiler/xla/service:conditional_canonicalizer", "//tensorflow/compiler/xla/service:conditional_simplifier", "//tensorflow/compiler/xla/service:convolution_4d_expander", "//tensorflow/compiler/xla/service:dot_decomposer", diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 3050e794f10..f2d29b5d11f 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -35,6 +35,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/batchnorm_expander.h" #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/call_inliner.h" +#include "tensorflow/compiler/xla/service/conditional_canonicalizer.h" #include "tensorflow/compiler/xla/service/conditional_simplifier.h" #include "tensorflow/compiler/xla/service/convolution_4d_expander.h" #include "tensorflow/compiler/xla/service/dot_decomposer.h" @@ -179,7 +180,7 @@ Status GpuCompiler::OptimizeHloModule( pipeline.AddPass( /*expansion_type=*/LogisticExpansionType::kExp); - + pipeline.AddPass(); pipeline.AddPass(); { From 16fe3dbfc03fea7bcd0be2203a14ae289739194c Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 17 Jul 2020 11:51:00 -0700 Subject: [PATCH 0720/2522] Add CC_OPT_FLAGS back to configure to prevent the default configure copts to be added to the bazelrc. PiperOrigin-RevId: 321823718 Change-Id: I771ae2ec2bce5e1c6649871407439d4f52bbd02d --- .../ci_build/release/macos/cpu_py35_full/nightly_release.sh | 1 + .../ci_build/release/macos/cpu_py36_full/nightly_release.sh | 1 + .../ci_build/release/macos/cpu_py37_full/nightly_release.sh | 1 + .../ci_build/release/macos/cpu_py38_full/nightly_release.sh | 1 + .../ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh | 1 + .../ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh | 1 + .../ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh | 1 + .../ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh | 1 + 8 files changed, 8 insertions(+) diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nightly_release.sh b/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nightly_release.sh index 45a8f435988..7da3b0ea9be 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nightly_release.sh @@ -35,6 +35,7 @@ sudo pip install twine ./tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.5) yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nightly_release.sh b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nightly_release.sh index d4cc8d7afac..33e1491dd86 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nightly_release.sh @@ -33,6 +33,7 @@ sudo pip install twine ./tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.6) yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nightly_release.sh b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nightly_release.sh index cd0f8a58ae6..631aea318bd 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nightly_release.sh @@ -33,6 +33,7 @@ sudo pip install twine ./tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.7) yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nightly_release.sh b/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nightly_release.sh index 11085b08a38..5ffef89188c 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nightly_release.sh @@ -33,6 +33,7 @@ sudo pip install twine ./tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.8) yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh index addfc59818e..aac88b57fa7 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh @@ -25,6 +25,7 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.5) yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh index c6fb6d469b1..600b4b0be8e 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh @@ -25,6 +25,7 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.6) yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh index 6e900d7dba8..a9e51461715 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh @@ -25,6 +25,7 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.7) yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh index 9b968c4c3d6..0b8fd1380f2 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh @@ -27,6 +27,7 @@ update_bazel_linux python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.8) yes "" | "$PYTHON_BIN_PATH" configure.py From 6f73e0f8aee2b95dec001ae3a9c9e26c4c459d8b Mon Sep 17 00:00:00 2001 From: Gabriel Rasskin Date: Fri, 17 Jul 2020 12:18:56 -0700 Subject: [PATCH 0721/2522] Create string_print_f.cc --- tensorflow/security/fuzzing/string_print_f.cc | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 tensorflow/security/fuzzing/string_print_f.cc diff --git a/tensorflow/security/fuzzing/string_print_f.cc b/tensorflow/security/fuzzing/string_print_f.cc new file mode 100644 index 00000000000..c6c45c22863 --- /dev/null +++ b/tensorflow/security/fuzzing/string_print_f.cc @@ -0,0 +1,45 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include + +#include "tensorflow/core/platform/stringprintf.h" + +#include + +// This is a fuzzer for tensorflow::strings::Printf + +namespace { + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + FuzzedDataProvider fuzzed_data(data, size); + + const char split = fuzzed_data.ConsumeIntegral(); + const char split_a = split % 8; + const char split_b = 8 - (split % 8); + + const std::string sa_string = fuzzed_data.ConsumeBytesAsString(split_a); + const std::string sb_string = fuzzed_data.ConsumeBytesAsString(split_b); + const std::string sc_string = fuzzed_data.ConsumeRemainingBytesAsString(); + const char *sa = sa_string.c_str(); + const char *sb = sb_string.c_str(); + const char *sc = sc_string.c_str(); + + tensorflow::strings::Printf("%s %s %s", sa, sb, sc); + + return 0; +} + +} // namespace From 7522f1c8b105ee327b462b820360d9b126c9e5ed Mon Sep 17 00:00:00 2001 From: Gabriel Rasskin Date: Fri, 17 Jul 2020 12:20:11 -0700 Subject: [PATCH 0722/2522] Add string_print_f to build --- tensorflow/security/fuzzing/BUILD | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tensorflow/security/fuzzing/BUILD b/tensorflow/security/fuzzing/BUILD index 2f560bc3c6b..a82ec75713c 100644 --- a/tensorflow/security/fuzzing/BUILD +++ b/tensorflow/security/fuzzing/BUILD @@ -65,3 +65,12 @@ tf_fuzz_target( "//tensorflow/core/platform:stringpiece", ], ) + +tf_fuzz_target( + name = "string_print_f_fuzz", + srcs = ["string_print_f_fuzz.cc"], + deps = [ + "//tensorflow/core/platform:stringprintf", + ], +) + From c174386740265d72515fd6f94fb30eec4addcb1c Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Fri, 17 Jul 2020 15:20:57 -0400 Subject: [PATCH 0723/2522] Update comments for benchmark test --- .../antirectifier_benchmark_test.py | 17 +++++++++-------- .../bidirectional_lstm_benchmark_test.py | 18 +++++++++--------- .../cifar10_cnn_benchmark_test.py | 17 +++++++++-------- .../mnist_conv_benchmark_test.py | 17 +++++++++-------- .../mnist_hierarchical_rnn_benchmark_test.py | 17 +++++++++-------- .../mnist_irnn_benchmark_test.py | 17 +++++++++-------- .../reuters_mlp_benchmark_test.py | 17 +++++++++-------- ...assification_transformer_benchmark_test.py | 19 +++++++++---------- 8 files changed, 72 insertions(+), 67 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py index 140c2ee0ccc..f8c2828f3a3 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py @@ -24,14 +24,6 @@ from tensorflow.python.keras.benchmarks import benchmark_util class AntirectifierBenchmark(tf.test.Benchmark): """Benchmarks for Antirectifier using `tf.test.Benchmark`.""" - # Required Arguments for measure_performance. - # x: Input data, it could be Numpy or load from tfds. - # y: Target data. If `x` is a dataset, generator instance, - # `y` should not be specified. - # loss: Loss function for model. - # optimizer: Optimizer for model. - # Other details can see in `measure_performance()` method of - # benchmark_util. def __init__(self): super(AntirectifierBenchmark, self).__init__() @@ -54,6 +46,15 @@ class AntirectifierBenchmark(tf.test.Benchmark): ) return model + # In each benchmark test, the required arguments for the + # method `measure_performance` include: + # x: Input data, it could be Numpy or loaded from tfds. + # y: Target data. If `x` is a dataset or generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Check more details in `measure_performance()` method of + # benchmark_util. def benchmark_pixel_cnn_bs_128(self): """Measure performance with batch_size=128 and run_iters=2.""" batch_size = 128 diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py index e7d426d98ba..63e99e36285 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py @@ -25,15 +25,6 @@ from tensorflow.python.keras.benchmarks import benchmark_util class BidirectionalLSTMBenchmark(tf.test.Benchmark): """Benchmarks for Bidirectional LSTM using `tf.test.Benchmark`.""" - # Required Arguments for measure_performance. - # x: Input data, it could be Numpy or load from tfds. - # y: Target data. If `x` is a dataset, generator instance, - # `y` should not be specified. - # loss: Loss function for model. - # optimizer: Optimizer for model. - # Other details can see in `measure_performance()` method of - # benchmark_util. - def __init__(self): super(BidirectionalLSTMBenchmark, self).__init__() self.max_feature = 20000 @@ -55,6 +46,15 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): model = tf.keras.Model(inputs, outputs) return model + # In each benchmark test, the required arguments for the + # method `measure_performance` include: + # x: Input data, it could be Numpy or loaded from tfds. + # y: Target data. If `x` is a dataset or generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Check more details in `measure_performance()` method of + # benchmark_util. def benchmark_bidirect_lstm_imdb_bs_128(self): """Measure performance with batch_size=128 and run_iters=3.""" batch_size = 128 diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py index 9806307d7d3..6bf5f8fb35a 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py @@ -24,14 +24,6 @@ from tensorflow.python.keras.benchmarks import benchmark_util class Cifar10CNNBenchmark(tf.test.Benchmark): """Benchmarks for CNN using `tf.test.Benchmark`.""" - # Required Arguments for measure_performance. - # x: Input data, it could be Numpy or load from tfds. - # y: Target data. If `x` is a dataset, generator instance, - # `y` should not be specified. - # loss: Loss function for model. - # optimizer: Optimizer for model. - # Other details can see in `measure_performance()` method of - # benchmark_util. def __init__(self): super(Cifar10CNNBenchmark, self).__init__() @@ -70,6 +62,15 @@ class Cifar10CNNBenchmark(tf.test.Benchmark): model.add(tf.keras.layers.Activation('softmax')) return model + # In each benchmark test, the required arguments for the + # method `measure_performance` include: + # x: Input data, it could be Numpy or loaded from tfds. + # y: Target data. If `x` is a dataset or generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Check more details in `measure_performance()` method of + # benchmark_util. def benchmark_cnn_cifar10_bs_256(self): """Measure performance with batch_size=256 and run_iters=3.""" batch_size = 256 diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_conv_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_conv_benchmark_test.py index d828e267523..150d432b9fd 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_conv_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_conv_benchmark_test.py @@ -26,14 +26,6 @@ from tensorflow.python.keras.benchmarks import benchmark_util class ConvMnistBenchmark(tf.test.Benchmark): """Benchmarks for Convnet using `tf.test.Benchmark`.""" - # Required Arguments for measure_performance. - # x: Input data, it could be Numpy or load from tfds. - # y: Target data. If `x` is a dataset, generator instance, - # `y` should not be specified. - # loss: Loss function for model. - # optimizer: Optimizer for model. - # Other details can see in `measure_performance()` method of - # benchmark_util. def __init__(self): super(ConvMnistBenchmark, self).__init__() @@ -66,6 +58,15 @@ class ConvMnistBenchmark(tf.test.Benchmark): ) return model + # In each benchmark test, the required arguments for the + # method `measure_performance` include: + # x: Input data, it could be Numpy or loaded from tfds. + # y: Target data. If `x` is a dataset or generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Check more details in `measure_performance()` method of + # benchmark_util. def benchmark_conv_mnist_bs_128(self): """Measure performance with batch_size=128 and run_iters=2.""" batch_size = 128 diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py index 82cbe560bb4..ad459710a95 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py @@ -24,14 +24,6 @@ from tensorflow.python.keras.benchmarks import benchmark_util class HierarchicalRNNBenchmark(tf.test.Benchmark): """Benchmarks for Hierarchical RNN using `tf.test.Benchmark`.""" - # Required Arguments for measure_performance. - # x: Input data, it could be Numpy or load from tfds. - # y: Target data. If `x` is a dataset, generator instance, - # `y` should not be specified. - # loss: Loss function for model. - # optimizer: Optimizer for model. - # Other details can see in `measure_performance()` method of - # benchmark_util. def __init__(self): super(HierarchicalRNNBenchmark, self).__init__() @@ -58,6 +50,15 @@ class HierarchicalRNNBenchmark(tf.test.Benchmark): return model + # In each benchmark test, the required arguments for the + # method `measure_performance` include: + # x: Input data, it could be Numpy or loaded from tfds. + # y: Target data. If `x` is a dataset or generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Check more details in `measure_performance()` method of + # benchmark_util. def benchmark_hrnn_mnist_bs_256(self): """Measure performance with batch_size=256 and run_iters=4.""" batch_size = 256 diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py index 8d6f229669d..b455fdb9305 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py @@ -24,14 +24,6 @@ from tensorflow.python.keras.benchmarks import benchmark_util class IRNNMnistBenchmark(tf.test.Benchmark): """Benchmarks for IRNN using `tf.test.Benchmark`.""" - # Required Arguments for measure_performance. - # x: Input data, it could be Numpy or load from tfds. - # y: Target data. If `x` is a dataset, generator instance, - # `y` should not be specified. - # loss: Loss function for model. - # optimizer: Optimizer for model. - # Other details can see in `measure_performance()` method of - # benchmark_util. def __init__(self): super(IRNNMnistBenchmark, self).__init__() @@ -59,6 +51,15 @@ class IRNNMnistBenchmark(tf.test.Benchmark): model.add(tf.keras.layers.Activation('softmax')) return model + # In each benchmark test, the required arguments for the + # method `measure_performance` include: + # x: Input data, it could be Numpy or loaded from tfds. + # y: Target data. If `x` is a dataset or generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Check more details in `measure_performance()` method of + # benchmark_util. def benchmark_irnn_mnist_bs_256(self): """Measure performance with batch_size=256 and run_iters=4.""" batch_size = 256 diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py index 064b5a4bc96..03cf8558531 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py @@ -26,14 +26,6 @@ from tensorflow.python.keras.benchmarks import benchmark_util class MLPReutersBenchmark(tf.test.Benchmark): """Benchmarks for MLP using `tf.test.Benchmark`.""" - # Required Arguments for measure_performance. - # x: Input data, it could be Numpy or load from tfds. - # y: Target data. If `x` is a dataset, generator instance, - # `y` should not be specified. - # loss: Loss function for model. - # optimizer: Optimizer for model. - # Other details can see in `measure_performance()` method of - # benchmark_util. def __init__(self): super(MLPReutersBenchmark, self).__init__() @@ -59,6 +51,15 @@ class MLPReutersBenchmark(tf.test.Benchmark): model.add(tf.keras.layers.Activation('softmax')) return model + # In each benchmark test, the required arguments for the + # method `measure_performance` include: + # x: Input data, it could be Numpy or loaded from tfds. + # y: Target data. If `x` is a dataset or generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Check more details in `measure_performance()` method of + # benchmark_util. def benchmark_mlp_reuters_bs_128(self): """Measure performance with batch_size=128 and run_iters=2.""" batch_size = 128 diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py index 205153004b9..26bd92cf271 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py @@ -24,19 +24,9 @@ from tensorflow.python.keras.benchmarks import benchmark_util class TextWithTransformerBenchmark(tf.test.Benchmark): """Benchmarks for Text classification with Transformer - using `tf.test.Benchmark`. """ - # Required Arguments for measure_performance. - # x: Input data, it could be Numpy or load from tfds. - # y: Target data. If `x` is a dataset, generator instance, - # `y` should not be specified. - # loss: Loss function for model. - # optimizer: Optimizer for model. - # Other details can see in `measure_performance()` method of - # benchmark_util. - def __init__(self): super(TextWithTransformerBenchmark, self).__init__() self.max_feature = 20000 @@ -66,6 +56,15 @@ class TextWithTransformerBenchmark(tf.test.Benchmark): model = tf.keras.Model(inputs=inputs, outputs=outputs) return model + # In each benchmark test, the required arguments for the + # method `measure_performance` include: + # x: Input data, it could be Numpy or loaded from tfds. + # y: Target data. If `x` is a dataset or generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Check more details in `measure_performance()` method of + # benchmark_util. def benchmark_text_classification_bs_128(self): """Measure performance with batch_size=128 and run_iters=3.""" batch_size = 128 From a1bcafd8d8b4bf99888d45f362442cbf9f8b957f Mon Sep 17 00:00:00 2001 From: "R. Alex hofer" Date: Fri, 17 Jul 2020 12:19:08 -0700 Subject: [PATCH 0724/2522] Make the sparse ops with Keras functional models error more verbose to aid debugging. PiperOrigin-RevId: 321829617 Change-Id: Ie2e0c131ca7632b37eb16aaeccbcd4894ee6bbd4 --- .../python/keras/engine/base_layer_utils.py | 33 ++++++++++++++----- .../keras/engine/base_layer_utils_test.py | 11 +++++-- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/keras/engine/base_layer_utils.py b/tensorflow/python/keras/engine/base_layer_utils.py index de67080af66..b3446b1511f 100644 --- a/tensorflow/python/keras/engine/base_layer_utils.py +++ b/tensorflow/python/keras/engine/base_layer_utils.py @@ -211,18 +211,18 @@ def _create_keras_history_helper(tensors, processed_ops, created_layers): # TODO(omalleyt): Resolve circular dependency. from tensorflow.python.keras.engine import base_layer # pylint: disable=g-import-not-at-top tensor_list = nest.flatten(tensors) + sparse_ops = [] + ragged_tensors = [] for tensor in tensor_list: if getattr(tensor, '_keras_history', None) is not None: continue - if sparse_tensor.is_sparse(tensor) or ragged_tensor.is_ragged(tensor): - example = """ - weights_mult = lambda x: tf.sparse.sparse_dense_matmul(x, weights) - output = tf.keras.layers.Lambda(weights_mult)(input) - """ - raise ValueError('Tensorflow ops that generate ragged or sparse tensor ' - 'outputs are currently not supported by Keras automatic ' - 'op wrapping. Please wrap these ops in a Lambda layer: ' - '\n\n```\n{example}\n```\n'.format(example=example)) + if sparse_tensor.is_sparse(tensor): + sparse_ops.append(tensor.op) + continue + if ragged_tensor.is_ragged(tensor): + # Ragged tensors don't have an op property + ragged_tensors.append(tensor) + continue op = tensor.op # The Op that created this Tensor. if op not in processed_ops: # Recursively set `_keras_history`. @@ -264,6 +264,21 @@ def _create_keras_history_helper(tensors, processed_ops, created_layers): kwargs={}, outputs=op.outputs) processed_ops.update([op]) + if sparse_ops or ragged_tensors: + lambda_example = """ + weights_mult = lambda x: tf.sparse.sparse_dense_matmul(x, weights) + output = tf.keras.layers.Lambda(weights_mult)(input) + """ + raise ValueError( + 'Tensorflow ops that generate ragged or sparse tensor ' + 'outputs are currently not supported by Keras automatic ' + 'op wrapping. Please wrap these ops in a Lambda layer: ' + '\n\n```\n{example}\n```\n' + 'Sparse ops encountered: {sparse_ops}\n' + 'Ragged tensors encountered: {ragged_tensors}\n'.format( + example=lambda_example, + sparse_ops=str(sparse_ops), + ragged_tensors=str(ragged_tensors))) return processed_ops, created_layers diff --git a/tensorflow/python/keras/engine/base_layer_utils_test.py b/tensorflow/python/keras/engine/base_layer_utils_test.py index c59e518536d..af389402eb8 100644 --- a/tensorflow/python/keras/engine/base_layer_utils_test.py +++ b/tensorflow/python/keras/engine/base_layer_utils_test.py @@ -91,14 +91,21 @@ class OpLayerTest(keras_parameterized.TestCase): def test_ragged_op_layer(self): with testing_utils.use_keras_tensors_scope(False): - with self.assertRaisesRegex(ValueError, 'Keras automatic op wrapping'): + with self.assertRaisesRegex( + ValueError, '(?ms)Keras automatic op wrapping' + '.*Ragged tensors encountered: ' + r'\[tf.RaggedTensor\(values=Tensor\("Cast:0", shape=\((\?|None),\), ' + r'dtype=float32\), row_splits=Tensor\("Placeholder_1:0", ' + r'shape=\((\?|None),\), dtype=int64\)\)\]'): int_values = keras.Input(shape=(None,), dtype=dtypes.int32, ragged=True) float_values = math_ops.cast(int_values, dtypes.float32) _ = keras.Model(int_values, float_values) def test_sparse_op_layer(self): with testing_utils.use_keras_tensors_scope(False): - with self.assertRaisesRegex(ValueError, 'Keras automatic op wrapping'): + with self.assertRaisesRegex( + ValueError, "(?ms)Keras automatic op wrapping" + r".*Sparse ops encountered: \[\\]"): int_values = keras.Input(shape=(None,), dtype=dtypes.int32, sparse=True) float_values = math_ops.cast(int_values, dtypes.float32) _ = keras.Model(int_values, float_values) From 7d192d4c90ad8fe14ad5c5bc1c34065acdb88a10 Mon Sep 17 00:00:00 2001 From: Gabriel Rasskin Date: Fri, 17 Jul 2020 12:27:29 -0700 Subject: [PATCH 0725/2522] Update string splitting --- tensorflow/security/fuzzing/string_print_f.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/security/fuzzing/string_print_f.cc b/tensorflow/security/fuzzing/string_print_f.cc index c6c45c22863..4d0a62870da 100644 --- a/tensorflow/security/fuzzing/string_print_f.cc +++ b/tensorflow/security/fuzzing/string_print_f.cc @@ -27,8 +27,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { FuzzedDataProvider fuzzed_data(data, size); const char split = fuzzed_data.ConsumeIntegral(); - const char split_a = split % 8; - const char split_b = 8 - (split % 8); + const char split_a = split & 0x07; + const char split_b = (split >> 3) & 0x07; const std::string sa_string = fuzzed_data.ConsumeBytesAsString(split_a); const std::string sb_string = fuzzed_data.ConsumeBytesAsString(split_b); From 0a7b7b014fd32b4d90a7de300cbe4ad385ad5656 Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Fri, 17 Jul 2020 12:51:52 -0700 Subject: [PATCH 0726/2522] remove dependency on feature_column_lib.DenseFeatures PiperOrigin-RevId: 321836252 Change-Id: I48bb1cbf1a44c4523693bd99335d516769dcb919 --- tensorflow/python/keras/saving/save_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/keras/saving/save_test.py b/tensorflow/python/keras/saving/save_test.py index 871e4db7254..4df46864f22 100644 --- a/tensorflow/python/keras/saving/save_test.py +++ b/tensorflow/python/keras/saving/save_test.py @@ -33,6 +33,7 @@ from tensorflow.python.keras import combinations from tensorflow.python.keras import losses from tensorflow.python.keras import testing_utils from tensorflow.python.keras.engine import sequential +from tensorflow.python.keras.feature_column import dense_features from tensorflow.python.keras.layers import core from tensorflow.python.keras.saving import model_config from tensorflow.python.keras.saving import save @@ -151,7 +152,7 @@ class TestSaveModel(test.TestCase, parameterized.TestCase): 'b': keras.layers.Input(shape=(1,), name='b', dtype='string') } - fc_layer = feature_column_lib.DenseFeatures(cols)(input_layers) + fc_layer = dense_features.DenseFeatures(cols)(input_layers) output = keras.layers.Dense(10)(fc_layer) model = keras.models.Model(input_layers, output) From c49b3f570dedbee09dfc2395bd1c9de6b95e5439 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Jul 2020 12:52:14 -0700 Subject: [PATCH 0727/2522] Create the reverse mapping of TfLiteType -> cpp types. Also put the boilerplate in a macro similar to tensorflow/core/framework/types.h PiperOrigin-RevId: 321836334 Change-Id: Iac20ee3742d63938ba5bf84134757eabdd5e61c2 --- tensorflow/lite/BUILD | 10 +++ tensorflow/lite/type_to_tflitetype.h | 86 +++++++++------------- tensorflow/lite/type_to_tflitetype_test.cc | 65 ++++++++++++++++ 3 files changed, 110 insertions(+), 51 deletions(-) create mode 100644 tensorflow/lite/type_to_tflitetype_test.cc diff --git a/tensorflow/lite/BUILD b/tensorflow/lite/BUILD index 06bfaf348fb..1c0882ef0aa 100644 --- a/tensorflow/lite/BUILD +++ b/tensorflow/lite/BUILD @@ -624,6 +624,16 @@ cc_library( deps = ["//tensorflow/lite/c:common"], ) +cc_test( + name = "type_to_tflitetype_test", + size = "small", + srcs = ["type_to_tflitetype_test.cc"], + deps = [ + ":type_to_tflitetype", + "@com_google_googletest//:gtest_main", + ], +) + cc_test( name = "minimal_logging_test", size = "small", diff --git a/tensorflow/lite/type_to_tflitetype.h b/tensorflow/lite/type_to_tflitetype.h index 4ad36688bee..a95b233c13c 100644 --- a/tensorflow/lite/type_to_tflitetype.h +++ b/tensorflow/lite/type_to_tflitetype.h @@ -28,59 +28,43 @@ limitations under the License. namespace tflite { -// Map statically from a c++ type to a TfLiteType. Used in interpreter for safe -// casts. -template +// Map statically from a C++ type to a TfLiteType. Used in interpreter for +// safe casts. +// Example: +// typeToTfLiteType() -> kTfLiteBool +template constexpr TfLiteType typeToTfLiteType() { return kTfLiteNoType; } -template <> -constexpr TfLiteType typeToTfLiteType() { - return kTfLiteInt32; -} -template <> -constexpr TfLiteType typeToTfLiteType() { - return kTfLiteInt16; -} -template <> -constexpr TfLiteType typeToTfLiteType() { - return kTfLiteInt64; -} -template <> -constexpr TfLiteType typeToTfLiteType() { - return kTfLiteFloat32; -} -template <> -constexpr TfLiteType typeToTfLiteType() { - return kTfLiteUInt8; -} -template <> -constexpr TfLiteType typeToTfLiteType() { - return kTfLiteInt8; -} -template <> -constexpr TfLiteType typeToTfLiteType() { - return kTfLiteBool; -} -template <> -constexpr TfLiteType typeToTfLiteType>() { - return kTfLiteComplex64; -} -template <> -constexpr TfLiteType typeToTfLiteType>() { - return kTfLiteComplex128; -} -template <> -constexpr TfLiteType typeToTfLiteType() { - return kTfLiteString; -} -template <> -constexpr TfLiteType typeToTfLiteType() { - return kTfLiteFloat16; -} -template <> -constexpr TfLiteType typeToTfLiteType() { - return kTfLiteFloat64; -} +// Map from TfLiteType to the corresponding C++ type. +// Example: +// TfLiteTypeToType::Type -> bool +template +struct TfLiteTypeToType {}; // Specializations below + +// Template specialization for both typeToTfLiteType and TfLiteTypeToType. +#define MATCH_TYPE_AND_TFLITE_TYPE(CPP_TYPE, TFLITE_TYPE_ENUM) \ + template <> \ + constexpr TfLiteType typeToTfLiteType() { \ + return TFLITE_TYPE_ENUM; \ + } \ + template <> \ + struct TfLiteTypeToType { \ + using Type = CPP_TYPE; \ + } + +MATCH_TYPE_AND_TFLITE_TYPE(int, kTfLiteInt32); +MATCH_TYPE_AND_TFLITE_TYPE(int16_t, kTfLiteInt16); +MATCH_TYPE_AND_TFLITE_TYPE(int64_t, kTfLiteInt64); +MATCH_TYPE_AND_TFLITE_TYPE(float, kTfLiteFloat32); +MATCH_TYPE_AND_TFLITE_TYPE(unsigned char, kTfLiteUInt8); +MATCH_TYPE_AND_TFLITE_TYPE(int8_t, kTfLiteInt8); +MATCH_TYPE_AND_TFLITE_TYPE(bool, kTfLiteBool); +MATCH_TYPE_AND_TFLITE_TYPE(std::complex, kTfLiteComplex64); +MATCH_TYPE_AND_TFLITE_TYPE(std::complex, kTfLiteComplex128); +MATCH_TYPE_AND_TFLITE_TYPE(std::string, kTfLiteString); +MATCH_TYPE_AND_TFLITE_TYPE(TfLiteFloat16, kTfLiteFloat16); +MATCH_TYPE_AND_TFLITE_TYPE(double, kTfLiteFloat64); + } // namespace tflite #endif // TENSORFLOW_LITE_TYPE_TO_TFLITETYPE_H_ diff --git a/tensorflow/lite/type_to_tflitetype_test.cc b/tensorflow/lite/type_to_tflitetype_test.cc new file mode 100644 index 00000000000..51148531913 --- /dev/null +++ b/tensorflow/lite/type_to_tflitetype_test.cc @@ -0,0 +1,65 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/type_to_tflitetype.h" + +#include + +#include + +namespace tflite { +namespace { + +TEST(TypeToTfLiteType, TypeMapsAreInverseOfEachOther) { + EXPECT_EQ(kTfLiteInt16, + typeToTfLiteType::Type>()); + EXPECT_EQ(kTfLiteInt32, + typeToTfLiteType::Type>()); + EXPECT_EQ(kTfLiteFloat32, + typeToTfLiteType::Type>()); + EXPECT_EQ(kTfLiteUInt8, + typeToTfLiteType::Type>()); + EXPECT_EQ(kTfLiteInt8, + typeToTfLiteType::Type>()); + EXPECT_EQ(kTfLiteBool, + typeToTfLiteType::Type>()); + EXPECT_EQ(kTfLiteComplex64, + typeToTfLiteType::Type>()); + EXPECT_EQ(kTfLiteComplex128, + typeToTfLiteType::Type>()); + EXPECT_EQ(kTfLiteString, + typeToTfLiteType::Type>()); + EXPECT_EQ(kTfLiteFloat16, + typeToTfLiteType::Type>()); + EXPECT_EQ(kTfLiteFloat64, + typeToTfLiteType::Type>()); +} + +TEST(TypeToTfLiteType, Sanity) { + EXPECT_EQ(kTfLiteFloat32, typeToTfLiteType()); + EXPECT_EQ(kTfLiteBool, typeToTfLiteType()); + EXPECT_EQ(kTfLiteString, typeToTfLiteType()); + static_assert( + std::is_same::Type>::value, + "TfLiteTypeToType test failure"); + static_assert(std::is_same::Type>::value, + "TfLiteTypeToType test failure"); + static_assert( + std::is_same::Type>::value, + "TfLiteTypeToType test failure"); +} + +} // namespace +} // namespace tflite From ce190ec2442150b4ead53c868a97956489bd98b0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Jul 2020 12:55:18 -0700 Subject: [PATCH 0728/2522] Dynamic literal support PiperOrigin-RevId: 321836977 Change-Id: Ib5524846de424da20643c6982f6454614d9ffa07 --- tensorflow/compiler/xla/literal.cc | 267 ++---------------- tensorflow/compiler/xla/literal.h | 64 +---- tensorflow/compiler/xla/literal_test.cc | 118 +------- .../compiler/xla/service/hlo_evaluator.cc | 4 - .../service/interpreter/executable_base.cc | 20 +- tensorflow/compiler/xla/shape_util.cc | 9 - tensorflow/compiler/xla/shape_util.h | 3 - 7 files changed, 34 insertions(+), 451 deletions(-) diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc index c09a5fd8ac9..73c37d6b2f3 100644 --- a/tensorflow/compiler/xla/literal.cc +++ b/tensorflow/compiler/xla/literal.cc @@ -48,10 +48,6 @@ namespace { using absl::StrCat; constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; -// Literals can be used as DMA targets, which can require alignment. We -// force a tensorflow::Allocator::kAllocatorAlignment-byte minimum -// alignment. -constexpr int kMinimumAlignment = 64; // Converts between little and big endian. // @@ -137,14 +133,12 @@ void Literal::SetPiece(const Shape& shape, Piece* piece, bool allocate_arrays) { } } else if (shape.IsArray()) { if (allocate_arrays) { + // Literals can be used as DMA targets, which can require alignment. We + // force a tensorflow::Allocator::kAllocatorAlignment-byte minimum + // alignment. + constexpr int kMinimumAlignment = 64; piece->set_buffer(static_cast(tensorflow::port::AlignedMalloc( piece->size_bytes(), kMinimumAlignment))); - if (shape.is_dynamic()) { - CHECK_EQ(piece->dynamic_size_buffer(), nullptr); - piece->set_dynamic_size_buffer( - static_cast(tensorflow::port::AlignedMalloc( - piece->dynamic_size_buffer_bytes(), kMinimumAlignment))); - } } } else { // If the shape is neither an array nor tuple, then it must be @@ -177,9 +171,6 @@ void Literal::DeallocateBuffers() { if (piece->buffer() != nullptr) { tensorflow::port::AlignedFree(piece->buffer()); } - if (piece->dynamic_size_buffer() != nullptr) { - tensorflow::port::AlignedFree(piece->dynamic_size_buffer()); - } }); } @@ -208,15 +199,6 @@ Literal LiteralBase::CreateFromShape(const Shape& shape) { return literal; } -int32 LiteralBase::GetDynamicSize(int64 dim_index) const { - return GetDynamicSize(dim_index, {}); -} - -int32 LiteralBase::GetDynamicSize(int64 dim_index, - const ShapeIndex& shape_index) const { - return piece(shape_index).GetDynamicSize(dim_index); -} - absl::optional LiteralBase::GetFirstInteger() const { switch (shape().element_type()) { case U8: @@ -399,9 +381,7 @@ std::vector Literal::DecomposeTuple() { // Move the respective buffer over to the element Literal. dest_piece->set_buffer(src_piece.buffer()); - dest_piece->set_dynamic_size_buffer(src_piece.dynamic_size_buffer()); src_piece.set_buffer(nullptr); - src_piece.set_dynamic_size_buffer(nullptr); }); } // Set this literal to be nil-shaped. @@ -427,51 +407,23 @@ void CopyElementsBetween(absl::Span dest, src[IndexUtil::MultidimensionalIndexToLinearIndex(src_shape, index)]; } while (IndexUtil::BumpIndices(dest_shape, absl::MakeSpan(index))); } + } // namespace -int32 LiteralBase::Piece::GetDynamicSize(int64 dim_index) const { - CHECK(LayoutUtil::IsDenseArray(subshape())); - if (!subshape_->is_dynamic_dimension(dim_index)) { - // This is a static dimension, return size. - return subshape_->dimensions(dim_index); - } - CHECK_NE(dynamic_size_buffer(), nullptr); - return dynamic_size_buffer_[dim_index]; -} - -void LiteralBase::Piece::SetDynamicSize(int64 dim_index, int32 size) { - CHECK(LayoutUtil::IsDenseArray(subshape())); - CHECK(subshape_->is_dynamic_dimension(dim_index)); - if (dynamic_size_buffer() == nullptr) { - // Lazily initialize the dynamic size buffer. - set_dynamic_size_buffer(static_cast(tensorflow::port::AlignedMalloc( - dynamic_size_buffer_bytes(), kMinimumAlignment))); - /*for (int64 i = 0; i < subshape().rank(); ++i) { - // Initialized to -1 to help debug. - dynamic_size_buffer_[i] = -1; - }*/ - } - dynamic_size_buffer_[dim_index] = size; -} - -Status LiteralBase::Piece::CopyFrom(const LiteralBase::Piece& src, - bool only_dynamic_bound) { +Status LiteralBase::Piece::CopyFrom(const LiteralBase::Piece& src) { CHECK(subshape_ != nullptr); CHECK(src.subshape_ != nullptr); if (ShapeUtil::Equal(subshape(), src.subshape())) { // If the layouts are equal it's faster just to memcpy. memcpy(buffer(), src.buffer(), src.size_bytes()); } else { + TF_RET_CHECK(ShapeUtil::Compatible(src.subshape(), subshape())); std::vector origin(subshape().rank(), 0); switch (subshape().element_type()) { -#define COPY_ELEMENTS(XLA_T, NATIVE_T) \ - case (XLA_T): \ - if (only_dynamic_bound) { \ - CopyElementsWithDynamicBound(src); \ - } else { \ - CopyElementsBetween(data(), src.data(), \ - subshape(), src.subshape()); \ - } \ +#define COPY_ELEMENTS(XLA_T, NATIVE_T) \ + case (XLA_T): \ + CopyElementsBetween(data(), src.data(), \ + subshape(), src.subshape()); \ break; COPY_ELEMENTS(U8, uint8); COPY_ELEMENTS(U16, uint16); @@ -495,54 +447,21 @@ Status LiteralBase::Piece::CopyFrom(const LiteralBase::Piece& src, PrimitiveType_Name(subshape().element_type())); } } - DCHECK_EQ(dynamic_size_buffer_bytes(), src.dynamic_size_buffer_bytes()); - if (subshape().is_dynamic() && src.subshape().is_dynamic()) { - CHECK_NE(dynamic_size_buffer_, nullptr); - CHECK_NE(src.dynamic_size_buffer_, nullptr); - memcpy(dynamic_size_buffer(), src.dynamic_size_buffer(), - src.dynamic_size_buffer_bytes()); - } return Status::OK(); } -void MutableLiteralBase::SetDynamicSize(int64 dim_index, int32 size) { - return SetDynamicSize(dim_index, {}, size); -} - -void MutableLiteralBase::SetDynamicSize(int64 dim_index, - const ShapeIndex& shape_index, - int32 size) { - Shape* subshape_ = ShapeUtil::GetMutableSubshape(shape_.get(), shape_index); - CHECK_GE(subshape_->dimensions(dim_index), size); - if (subshape_->dimensions(dim_index) == size) { - subshape_->set_dynamic_dimension(dim_index, false); - return; - } - subshape_->set_dynamic_dimension(dim_index, true); - piece(shape_index).SetDynamicSize(dim_index, size); -} - Status MutableLiteralBase::CopyFrom(const LiteralSlice& src_literal, const ShapeIndex& dest_shape_index, - const ShapeIndex& src_shape_index, - bool only_dynamic_bound) { + const ShapeIndex& src_shape_index) { const Shape& dest_subshape = ShapeUtil::GetSubshape(shape(), dest_shape_index); const Shape& src_subshape = ShapeUtil::GetSubshape(src_literal.shape(), src_shape_index); - if (only_dynamic_bound) { - auto bound_shape = dest_subshape.is_static() ? src_subshape : dest_subshape; - auto compact_shape = - dest_subshape.is_static() ? dest_subshape : src_subshape; - CHECK(ShapeUtil::DynamicShapeIsCompatible(compact_shape, bound_shape)) - << compact_shape.ToString() << " vs " << bound_shape.ToString(); - } else { - if (!ShapeUtil::Compatible(dest_subshape, src_subshape)) { - return InvalidArgument( - "Destination subshape incompatible with source subshape: %s vs %s", - ShapeUtil::HumanString(dest_subshape), - ShapeUtil::HumanString(src_subshape)); - } + if (!ShapeUtil::Compatible(dest_subshape, src_subshape)) { + return InvalidArgument( + "Destination subshape incompatible with source subshape: %s vs %s", + ShapeUtil::HumanString(dest_subshape), + ShapeUtil::HumanString(src_subshape)); } return root_piece_->ForEachMutableSubpieceWithStatus( [&](const ShapeIndex& index, Piece* piece) { @@ -567,9 +486,7 @@ Status MutableLiteralBase::CopyFrom(const LiteralSlice& src_literal, for (int64 i = dest_shape_index.size(); i < index.size(); ++i) { src_piece_index.push_back(index[i]); } - TF_RETURN_IF_ERROR( - piece->CopyFrom(src_literal.piece(src_piece_index), - /*only_dynamic_bound=*/only_dynamic_bound)); + TF_RETURN_IF_ERROR(piece->CopyFrom(src_literal.piece(src_piece_index))); return Status::OK(); }); } @@ -597,9 +514,7 @@ Status Literal::MoveFrom(Literal&& src_literal, } Piece& dest_piece = piece(dest_index); tensorflow::port::AlignedFree(dest_piece.buffer()); - tensorflow::port::AlignedFree(dest_piece.dynamic_size_buffer()); dest_piece.set_buffer(src_piece.buffer()); - dest_piece.set_dynamic_size_buffer(src_piece.dynamic_size_buffer()); }); src_literal.shape_ = absl::make_unique(ShapeUtil::MakeNil()); @@ -714,41 +629,6 @@ Literal LiteralBase::Relayout(const Shape& shape_with_layout) const { return result; } -Literal LiteralBase::ToBoundedDynamic(const Shape& bounded_shape) const { - CHECK(bounded_shape.is_dynamic()); - Literal result(bounded_shape); - ShapeUtil::ForEachSubshape( - shape(), [&](const Shape& subshape, const ShapeIndex& index) { - if (!subshape.IsArray()) { - return; - } - for (int64 i = 0; i < subshape.rank(); ++i) { - result.SetDynamicSize(i, subshape.dimensions(i)); - } - }); - TF_CHECK_OK(result.CopyFrom(*this, {}, {}, /*only_dynamic_bound=*/true)); - - return result; -} - -Literal LiteralBase::ToStatic() const { - // Create new shape with 'new_layout' set at the given shape index. - Shape new_shape = shape(); - ShapeUtil::ForEachMutableSubshape( - &new_shape, [this](Shape* subshape, const ShapeIndex& index) { - if (!subshape->IsArray()) { - return; - } - for (int64 i = 0; i < subshape->rank(); ++i) { - subshape->set_dynamic_dimension(i, false); - subshape->set_dimensions(i, GetDynamicSize(i, index)); - } - }); - Literal result(new_shape); - TF_CHECK_OK(result.CopyFrom(*this, {}, {}, /*only_dynamic_bound=*/true)); - return result; -} - StatusOr LiteralBase::Broadcast( const Shape& result_shape, absl::Span dimensions) const { if (!shape().IsArray()) { @@ -772,11 +652,6 @@ StatusOr LiteralBase::Broadcast( const int64 primitive_size = ShapeUtil::ByteSizeOfPrimitiveType(shape().element_type()); - for (int64 i = 0; i < dimensions.size(); ++i) { - int64 dynamic_size = GetDynamicSize(i); - result.SetDynamicSize(dimensions[i], dynamic_size); - } - ShapeUtil::ForEachIndex( result_shape, [&](absl::Span output_index) { for (int64 i = 0; i < dimensions.size(); ++i) { @@ -799,9 +674,6 @@ StatusOr LiteralBase::Reshape( if (!shape().IsArray()) { return InvalidArgument("Reshape does not support tuples."); } - if (shape().is_dynamic()) { - return Unimplemented("Dynamic reshape is not implemented."); - } Literal output; if (!LayoutUtil::IsMonotonicWithDim0Major(shape().layout())) { output = Relayout(LayoutUtil::GetDefaultLayoutForRank(shape().rank())); @@ -856,9 +728,6 @@ Literal LiteralBase::Transpose(absl::Span permutation) const { layout->add_minor_to_major(inverse_permutation[index]); } Literal new_literal(permuted_shape); - for (int64 i = 0; i < shape().rank(); i++) { - new_literal.SetDynamicSize(inverse_permutation[i], GetDynamicSize(i)); - } DCHECK_EQ(ShapeUtil::ByteSizeOf(new_literal.shape()), ShapeUtil::ByteSizeOf(shape())); std::memcpy(new_literal.untyped_data(), untyped_data(), size_bytes()); @@ -878,14 +747,6 @@ Literal LiteralBase::SliceInternal( return Get(new_indices); }) .ok()); - for (int64 dnum = 0; dnum < shape().rank(); ++dnum) { - if (shape().is_dynamic_dimension(dnum)) { - int64 dynamic_size = GetDynamicSize(dnum) - start_indices[dnum]; - CHECK_GE(dynamic_size, 0) << GetDynamicSize(dnum); - dynamic_size = std::min(dynamic_size, result_shape.dimensions(dnum)); - result_literal.SetDynamicSize(dnum, dynamic_size); - } - } return result_literal; } @@ -902,10 +763,9 @@ Literal LiteralBase::Slice(absl::Span start_indices, CHECK_GE(dimension, 0) << "dnum = " << dnum; result_dimensions.push_back(dimension); } - auto result_shape = + const auto result_shape = ShapeUtil::MakeShapeWithLayout(shape().element_type(), result_dimensions, LayoutUtil::MinorToMajor(shape())); - ShapeUtil::CopyDynamicDimensions(&result_shape, shape()); switch (result_shape.element_type()) { case PRED: return SliceInternal(result_shape, start_indices); @@ -1222,24 +1082,11 @@ void DenseArrayToStringHelper(const LiteralBase& literal, if (print_shape) { pieces->push_back(ShapeToString(print_layout, subshape)); - if (subshape.is_dynamic()) { - pieces->push_back("("); - for (int64 i = 0; i < subshape.dimensions_size(); ++i) { - pieces->push_back(StrCat(literal.GetDynamicSize(i, shape_index))); - if (i < subshape.dimensions_size() - 1) { - pieces->push_back(","); - } - } - pieces->push_back(")"); - } pieces->push_back(" "); } std::vector indices = {}; - std::vector dimensions; - dimensions.reserve(subshape.rank()); - for (int64 i = 0; i < subshape.rank(); ++i) { - dimensions.push_back(literal.GetDynamicSize(i, shape_index)); - } + std::vector dimensions(subshape.dimensions().begin(), + subshape.dimensions().end()); to_string_recursive(dimensions, &indices); } @@ -1527,44 +1374,13 @@ StatusOr LiteralBase::ConvertToShape(const Shape& dest_shape) const { return literal; } -template -void LiteralBase::Piece::CopyElementsWithDynamicBound( - const LiteralBase::Piece& src) { - auto dest_shape = subshape(); - auto src_shape = src.subshape(); - - // At least one shape has to be static as bound. - CHECK(dest_shape.is_static() || src_shape.is_static()); - auto bound_shape = dest_shape.is_static() ? src_shape : dest_shape; - if (ShapeUtil::IsZeroElementArray(dest_shape)) { - return; - } - std::vector index(dest_shape.rank()); - do { - bool out_of_bound = false; - for (int64 i = 0; i < index.size(); ++i) { - // Do not copy elements beyond dynamic bound. - if (index[i] >= GetDynamicSize(i) || index[i] >= src.GetDynamicSize(i)) { - out_of_bound = true; - } - } - if (out_of_bound) { - continue; - } - data()[IndexUtil::MultidimensionalIndexToLinearIndex(dest_shape, - index)] = - src.data()[IndexUtil::MultidimensionalIndexToLinearIndex( - src_shape, index)]; - } while (IndexUtil::BumpIndices(bound_shape, absl::MakeSpan(index))); -} - template bool LiteralBase::Piece::EqualElementsInternal( const LiteralBase::Piece& other, std::vector* multi_index) const { if (multi_index->size() == subshape().rank()) { return (Get(*multi_index) == other.Get(*multi_index)); } - for (int64 i = 0; i < GetDynamicSize(multi_index->size()); ++i) { + for (int64 i = 0; i < subshape().dimensions(multi_index->size()); ++i) { multi_index->push_back(i); if (!EqualElementsInternal(other, multi_index)) { return false; @@ -1574,26 +1390,10 @@ bool LiteralBase::Piece::EqualElementsInternal( return true; } -bool LiteralBase::Piece::EqualDynamicSize( - const LiteralBase::Piece& other) const { - DCHECK(ShapeUtil::Compatible(subshape(), other.subshape())); - if (subshape().is_static()) { - return true; - } - - for (int64 i = 0; i < subshape().rank(); ++i) { - if (GetDynamicSize(i) != other.GetDynamicSize(i)) { - return false; - } - } - return true; -} - bool LiteralBase::Piece::EqualElements(const LiteralBase::Piece& other) const { DCHECK(ShapeUtil::Compatible(subshape(), other.subshape())); - if (subshape().is_static() && - ShapeUtil::Equal(subshape(), other.subshape()) && + if (ShapeUtil::Equal(subshape(), other.subshape()) && LayoutUtil::IsDenseArray(subshape())) { CHECK_EQ(size_bytes(), other.size_bytes()); return memcmp(buffer(), other.buffer(), size_bytes()) == 0; @@ -1636,33 +1436,17 @@ bool LiteralBase::Piece::EqualElements(const LiteralBase::Piece& other) const { } bool LiteralBase::operator==(const LiteralBase& other) const { - // Checking the structure of tuple literals. Checks for dense arrays are - // performed below. - if (!ShapeUtil::EqualStructure(shape(), other.shape())) { + if (!ShapeUtil::Compatible(shape(), other.shape())) { return false; } return root_piece().ForEachSubpieceWithBool( [&](const ShapeIndex& index, const Piece& piece) { - const Piece& other_piece = other.piece(index); - const Shape& subshape = piece.subshape(); - const Shape& other_subshape = other_piece.subshape(); - if (subshape.element_type() != other_subshape.element_type()) { - return false; - } if (!piece.subshape().IsArray()) { return true; } - if (subshape.rank() != other_subshape.rank()) { - return false; - } - - for (int64 i = 0; i < subshape.rank(); ++i) { - if (piece.GetDynamicSize(i) != other_piece.GetDynamicSize(i)) { - return false; - } - } + const Piece& other_piece = other.piece(index); if (!piece.EqualElements(other_piece)) { return false; } @@ -2251,7 +2035,6 @@ void MutableBorrowingLiteral::CopyPieceSubtree(const Shape& shape, } } else if (shape.IsArray()) { dest_piece->set_buffer(src_piece->buffer()); - dest_piece->set_dynamic_size_buffer(src_piece->dynamic_size_buffer()); } else { // If the shape is neither an array nor tuple, then it must be // zero-sized. Otherwise, some memory needs to be allocated for it. diff --git a/tensorflow/compiler/xla/literal.h b/tensorflow/compiler/xla/literal.h index 1ee71618887..a2be92fbf5b 100644 --- a/tensorflow/compiler/xla/literal.h +++ b/tensorflow/compiler/xla/literal.h @@ -112,10 +112,6 @@ class LiteralBase { template NativeT Get(absl::Span multi_index) const; - // Get the dynamic size on dim_index in the literal at the given shape_index. - int32 GetDynamicSize(int64 dim_index, const ShapeIndex& shape_index) const; - int32 GetDynamicSize(int64 dim_index) const; - // Returns the element value at index (0, ..., 0), however many zeroes are // required for that index. template @@ -285,18 +281,6 @@ class LiteralBase { // than being limited to a single array within the shape. Literal Relayout(const Shape& shape_with_layout) const; - // Generate a new literal whose static sizes are equal to the previous - // literal's dynamic sizes. - Literal ToStatic() const; - - // Expand a static literal into a new one with a bounded dyanmic literal. The - // static dimensions of the original literal becomes dynamic dimensions of the - // new literal, where the argument `bounded_shape` becomes the bounded shape - // of the new literal. - // - // Precondition: bounded_shape.is_dynamic() - Literal ToBoundedDynamic(const Shape& bounded_shape) const; - // Creates a new literal by reshaping this literal to have the given // dimensions. The total number of elements must not change; The // implementation currently only supports monotonic dim0-major layouts. @@ -370,22 +354,10 @@ class LiteralBase { template void Set(absl::Span index, NativeT value); - int32 GetDynamicSize(int64 dim_index) const; - void SetDynamicSize(int64 dim_index, int32 size); // Gets/sets the buffer holding the array data. char* buffer() const { return buffer_; } void set_buffer(char* buffer) { buffer_ = buffer; } - // Gets/sets the buffer holding dynamic sizes. - int32* dynamic_size_buffer() const { return dynamic_size_buffer_; } - void set_dynamic_size_buffer(int32* dynamic_size_buffer) { - dynamic_size_buffer_ = dynamic_size_buffer; - } - - int64 dynamic_size_buffer_bytes() const { - return subshape().dimensions_size() * sizeof(int32); - } - // Gets or sets the subshape of this piece. This reference points to a // subshape within the shape in the containing Literal (Literal::shape_). const Shape& subshape() const { return *subshape_; } @@ -462,21 +434,15 @@ class LiteralBase { } // Returns true if this piece and 'other' contain the same data. This piece - // and 'other' must be array-shaped and compatible. If a literal has dynamic - // shape, comparison is done only for the valid elements. + // and 'other' must be array-shaped and compatible. bool EqualElements(const Piece& other) const; - // Returns true if this piece and other pieces have the same dynamic - // dimension sizes. - bool EqualDynamicSize(const Piece& other) const; - // Writes the shape and data (if array-shaped) into the given proto. void WriteToProto(LiteralProto* proto) const; // Copy the data from 'src' into this piece's buffer. Shapes of this piece - // and src must be compatible. If only_dynamic_bound is true, only elements - // within dynamic bounds will be copied. - Status CopyFrom(const Piece& src, bool only_dynamic_bound); + // and src must be compatible. + Status CopyFrom(const Piece& src); // Copies the data from the given proto into this piece. The shape of this // piece must be equal (not just compatible) to the shape of the proto. @@ -531,15 +497,9 @@ class LiteralBase { bool EqualElementsInternal(const Piece& other, std::vector* multi_index) const; - // Internal helper to copy elements from another given piece - template - void CopyElementsWithDynamicBound(const LiteralBase::Piece& src); - // For array-shaped pieces, this is the buffer holding the literal data. char* buffer_ = nullptr; - int32* dynamic_size_buffer_ = nullptr; - // The shape of piece. This points into the shape of the containing Literal // (Literal::shape_). const Shape* subshape_ = nullptr; @@ -590,11 +550,6 @@ class MutableLiteralBase : public LiteralBase { // mutate the shape as this can produce malformed Literals. Shape* mutable_shape_do_not_use() { return shape_.get(); } - // Set the dynamic size on dim_index in the literal at the given shape_index. - void SetDynamicSize(int64 dim_index, const ShapeIndex& shape_index, - int32 size); - void SetDynamicSize(int64 dim_index, int32 size); - // Returns a pointer to the underlying buffer holding the array at the given // shape index. CHECKs if the subshape of the literal at the given ShapeIndex // is not array. @@ -605,12 +560,10 @@ class MutableLiteralBase : public LiteralBase { // Copy values from 'src_literal' rooted at 'src_shape_index' into this // literal rooted at 'dest_shape_index'. The subshape of this literal rooted // at 'dest_shape_index' must be compatible with the subshape of 'src_literal' - // rooted at 'src_shape_index', but need not be arrays. If only_dynamic_bound - // is true, only elements within dynamic bounds will be copied. + // rooted at 'src_shape_index', but need not be arrays. Status CopyFrom(const LiteralSlice& src_literal, const ShapeIndex& dest_shape_index = {}, - const ShapeIndex& src_shape_index = {}, - bool only_dynamic_bound = false); + const ShapeIndex& src_shape_index = {}); // Copies the values from src_literal, starting at src_base shape indexes, // to this literal, starting at dest_base, where the copy size in each @@ -971,14 +924,9 @@ void LiteralBase::EachCell( return; } std::vector indices(shape().rank(), 0); - - Shape shape_dynamic = shape(); - for (int64 i = 0; i < shape_dynamic.rank(); ++i) { - shape_dynamic.set_dimensions(i, GetDynamicSize(i)); - } do { per_cell(indices, Get(indices)); - } while (IndexUtil::BumpIndices(shape_dynamic, absl::MakeSpan(indices))); + } while (IndexUtil::BumpIndices(shape(), absl::MakeSpan(indices))); } template diff --git a/tensorflow/compiler/xla/literal_test.cc b/tensorflow/compiler/xla/literal_test.cc index a58e450a55a..37316a2a807 100644 --- a/tensorflow/compiler/xla/literal_test.cc +++ b/tensorflow/compiler/xla/literal_test.cc @@ -149,16 +149,6 @@ TEST_F(LiteralUtilTest, R2ToString) { EXPECT_EQ(expected, literal.ToString()); } -TEST_F(LiteralUtilTest, R2DynamicToString) { - auto literal = LiteralUtil::CreateR2({{1, 2}, {3, 4}, {5, 6}}); - literal.SetDynamicSize(0, {}, 2); - const string expected = R"(s32[<=3,2](2,2) { - { 1, 2 }, - { 3, 4 } -})"; - EXPECT_EQ(expected, literal.ToString()); -} - TEST_F(LiteralUtilTest, R3ToString) { const auto literal = LiteralUtil::CreateR3({{{1}, {2}}, {{3}, {4}}, {{5}, {6}}}); @@ -431,28 +421,6 @@ TEST_F(LiteralUtilTest, TupleEquality) { EXPECT_NE(tuple1, different_tuple); } -TEST_F(LiteralUtilTest, DynamicShapeEquality) { - // Test equality with tuples. - auto r1 = LiteralUtil::CreateR1({1.0, 2.0}); - r1.SetDynamicSize(0, {}, 1); - auto r2 = LiteralUtil::CreateR2({{1.0, 2.0}, {3.0, 4.0}}); - r2.SetDynamicSize(0, {}, 1); - auto tuple1 = LiteralUtil::MakeTuple({&r1, &r2}); - - // Tuple with the same elements. One element is shared with the original - // tuple, the other is a clone of the element in the original tuple. - auto r1_clone = LiteralUtil::CreateR1({1.0, 3.0}); - r1_clone.SetDynamicSize(0, {}, 1); - auto tuple2 = LiteralUtil::MakeTuple({&r1_clone, &r2}); - EXPECT_EQ(tuple1, tuple2); - - // Tuple with different dynamic sizes. - auto r2_clone = LiteralUtil::CreateR2({{1.0, 2.0}, {3.0, 4.0}}); - r2_clone.SetDynamicSize(0, {}, 2); - auto tuple_3 = LiteralUtil::MakeTuple({&r1_clone, &r2_clone}); - EXPECT_NE(tuple1, tuple_3); -} - TEST_F(LiteralUtilTest, C64Equality) { // Test equality with tuples. auto vector = LiteralUtil::CreateR1({{1.0, 2.0}, {3.0, 4.0}}); @@ -724,47 +692,6 @@ TEST_F(LiteralUtilTest, TransposeR4) { }); } -TEST_F(LiteralUtilTest, TransposeDynamicR2) { - // F32[2, <=3] (2, 1) - auto original = LiteralUtil::CreateR2({{1, 2, 3}, {4, 5, 6}}); - original.SetDynamicSize(1, 1); - // F32[<=3, 2] (1, 2) - auto reshape = original.Transpose(/*permutation=*/{1, 0}); - - reshape.EachCell([&](absl::Span indices, float value) { - EXPECT_EQ(value, original.Get({indices[1], indices[0]})); - }); -} - -TEST_F(LiteralUtilTest, ToStaticR2) { - // F32[2, <=3] (2, 1) - auto original = LiteralUtil::CreateR2({{1, 2, 3}, {4, 5, 6}}); - original.SetDynamicSize(1, 1); - // F32[2, 1] - auto static_literal = original.ToStatic(); - EXPECT_EQ(static_literal.shape(), ShapeUtil::MakeShape(F32, {2, 1})); - EXPECT_TRUE(static_literal.shape().is_static()); - - static_literal.EachCell( - [&](absl::Span indices, float value) { - EXPECT_EQ(value, original.Get({indices[0], indices[1]})); - }); -} - -TEST_F(LiteralUtilTest, ToBoundedDynamicR2) { - // F32[2, 1] - auto original = LiteralUtil::CreateR2({{1}, {4}}); - // F32[2, <=3] (2, 1) - auto dynamic_shape = ShapeUtil::MakeShape(F32, {2, 3}, {false, true}); - auto dynamic_literal = original.ToBoundedDynamic(dynamic_shape); - EXPECT_EQ(dynamic_literal.shape(), dynamic_shape); - - dynamic_literal.EachCell( - [&](absl::Span indices, float value) { - EXPECT_EQ(value, original.Get({indices[0], indices[1]})); - }); -} - TEST_F(LiteralUtilTest, TestR4RelayoutEquivalence) { // Tests that using Relayout on an array is equivalent to creating it in the // target layout in the first place. @@ -870,38 +797,6 @@ TEST_F(LiteralUtilTest, SliceR3U32Full) { EXPECT_EQ(input_2x3x2, result); } -TEST_F(LiteralUtilTest, SliceR2Dynamic) { - auto input_3x4 = LiteralUtil::CreateR2( - {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}}); - input_3x4.SetDynamicSize(1, 3); - // slice second dim from dynamic size 3 to dynamic size 1. - auto result = input_3x4.Slice({0, 1}, {2, 2}); - auto expected = LiteralUtil::CreateR2({{2}, {6}}); - EXPECT_EQ(expected, result); - EXPECT_EQ(result.GetDynamicSize(1), 1); -} - -TEST_F(LiteralUtilTest, SliceR2DynamicInBound) { - auto input_3x4 = LiteralUtil::CreateR2( - {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}}); - input_3x4.SetDynamicSize(1, 1); - auto result = input_3x4.Slice({0, 0}, {2, 2}); - auto expected = LiteralUtil::CreateR2({{1}, {5}}); - EXPECT_EQ(expected, result); - EXPECT_EQ(result.GetDynamicSize(1), 1); -} - -TEST_F(LiteralUtilTest, SliceR2DynamicOutOfBound) { - auto input_3x4 = LiteralUtil::CreateR2( - {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}}); - input_3x4.SetDynamicSize(1, 1); - auto result = input_3x4.Slice({0, 1}, {2, 3}); - auto expected = LiteralUtil::CreateR2({{}, {}}); - EXPECT_EQ(expected, result); - // Out of bound access clamps into 0 sized dimension. - EXPECT_EQ(result.GetDynamicSize(1), 0); -} - TEST_F(LiteralUtilTest, PopulateR1S64) { Literal output(ShapeUtil::MakeShape(S64, {1})); output.PopulateR1({77}); @@ -1615,7 +1510,7 @@ TEST_F(LiteralUtilTest, CopyFromProto_u16) { EXPECT_EQ(u1, r[3]); } -TEST_F(LiteralUtilTest, LiteralDynamicSliceTest) { +TEST_F(LiteralUtilTest, LiteralSliceTest) { auto scalar = LiteralUtil::CreateR0(1.0); auto matrix = LiteralUtil::CreateR2({{1.0, 2.0}, {3.0, 4.0}}); auto tuple = LiteralUtil::MakeTuple({&scalar, &matrix}); @@ -2078,17 +1973,6 @@ TEST_F(LiteralUtilTest, BroadcastScalarToMatrix) { LiteralUtil::CreateR2({{9, 9}, {9, 9}})); } -TEST_F(LiteralUtilTest, DynamicBroadcast) { - Literal literal = LiteralUtil::CreateR1({1, 2}); - literal.SetDynamicSize(0, 1); - TF_ASSERT_OK_AND_ASSIGN( - Literal broadcasted_literal, - literal.Broadcast(/*result_shape=*/ShapeUtil::MakeShape(S64, {2, 2}), - /*dimensions=*/{1})); - EXPECT_EQ(broadcasted_literal, LiteralUtil::CreateR2({{1}, {1}})); - EXPECT_EQ(broadcasted_literal.GetDynamicSize(1), 1); -} - TEST_F(LiteralUtilTest, GetAsComplex128) { complex128 value = {1, 0}; Literal c1 = LiteralUtil::CreateR0(value); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 66e9e01fc38..ae8f49df4b4 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -440,10 +440,6 @@ Status HloEvaluator::HandleSetDimensionSize( Literal result(set_dimension_size->shape()); memcpy(result.untyped_data(), operand_literal.untyped_data(), operand_literal.size_bytes()); - const Literal& size_literal = - GetEvaluatedLiteralFor(set_dimension_size->operand(1)); - result.SetDynamicSize(set_dimension_size->dimension(), - size_literal.Get({})); evaluated_[set_dimension_size] = std::move(result); return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/interpreter/executable_base.cc b/tensorflow/compiler/xla/service/interpreter/executable_base.cc index 4b6a8aa5202..4b020ea2d32 100644 --- a/tensorflow/compiler/xla/service/interpreter/executable_base.cc +++ b/tensorflow/compiler/xla/service/interpreter/executable_base.cc @@ -81,17 +81,8 @@ StatusOr InterpreterExecutableBase::ExecuteAsyncOnStream( for (int64 i = 0; i < computation->num_parameters(); ++i) { const auto& expected_shape = computation->parameter_instruction(i)->shape(); const auto& actual_shape = argument_buffers[i].on_device_shape(); - bool shape_match = true; - if (expected_shape.is_dynamic()) { - if (!ShapeUtil::DynamicArrayShapeIsCompatible(actual_shape, - expected_shape)) { - shape_match = false; - } - } else if (!Shape::Equal().MinorToMajorOnlyInLayout()(expected_shape, - actual_shape)) { - shape_match = false; - } - if (!shape_match) { + if (!Shape::Equal().MinorToMajorOnlyInLayout()(expected_shape, + actual_shape)) { return InvalidArgument( "Shape mismatch on parameter %d. Expected %s, but was %s.", i, ShapeUtil::HumanStringWithLayout(expected_shape), @@ -109,18 +100,11 @@ StatusOr InterpreterExecutableBase::ExecuteAsyncOnStream( TF_ASSIGN_OR_RETURN(Literal arg_literal, transfer_manager->TransferLiteralFromDevice( run_options->stream(), argument_buffers[p])); - const auto& expected_shape = computation->parameter_instruction(p)->shape(); - if (expected_shape.is_dynamic()) { - // Expand the input literal to expected shape. - arg_literal = arg_literal.ToBoundedDynamic(expected_shape); - } arg_literals.push_back(std::move(arg_literal)); } TF_ASSIGN_OR_RETURN(Literal result_literal, Evaluate(*computation, arg_literals)); - // Shrink the generated dynamic shape into static shape. - result_literal = result_literal.ToStatic(); // Transform the result literal back into a ShapedBuffer. TF_ASSIGN_OR_RETURN(ScopedShapedBuffer result_buffers, diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index 6e452293232..bce40578132 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -339,15 +339,6 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( TF_DCHECK_OK(ValidateShape(*shape)); } -/* static */ void ShapeUtil::CopyDynamicDimensions(Shape* to, - const Shape& from) { - CHECK_EQ(to->rank(), from.rank()); - for (int64 i = 0; i < from.rank(); ++i) { - to->set_dynamic_dimension(i, from.is_dynamic_dimension(i)); - } - TF_DCHECK_OK(ValidateShape(*to)); -} - /* static */ bool ShapeUtil::ElementIsIntegral(const Shape& shape) { return primitive_util::IsIntegralType(shape.element_type()); } diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 3789d828528..fe1a8acf6e4 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -377,9 +377,6 @@ class ShapeUtil { // Appends a major dimension to the shape with the given bound. static void AppendMajorDimension(int bound, Shape* shape); - // Copy the dynamic dimensions property from one shape to another. - static void CopyDynamicDimensions(Shape* to, const Shape& from); - // Returns an empty tuple shape. Can be used as a sentinel Shape value. static Shape MakeNil() { return MakeTupleShape({}); } From debf854ea11512e0ec0315b8c519b6e97e5874ce Mon Sep 17 00:00:00 2001 From: Yash Katariya Date: Fri, 17 Jul 2020 13:08:31 -0700 Subject: [PATCH 0729/2522] Convert the np.xxx to a markdown link and remove the bare url PiperOrigin-RevId: 321839759 Change-Id: I402426202a156688210e993641d5b51f449d8fd0 --- tensorflow/python/ops/numpy_ops/np_utils.py | 2 +- tensorflow/python/ops/numpy_ops/np_utils_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/numpy_ops/np_utils.py b/tensorflow/python/ops/numpy_ops/np_utils.py index 6408b9110dc..2abf7f9a795 100644 --- a/tensorflow/python/ops/numpy_ops/np_utils.py +++ b/tensorflow/python/ops/numpy_ops/np_utils.py @@ -293,7 +293,7 @@ def _add_np_doc(doc, np_fun_name, np_f): template = None if template is not None: link = template % np_fun_name - doc += 'See the NumPy documentation for `numpy.%s`: %s' % ( + doc += 'See the NumPy documentation for [`numpy.%s`](%s).' % ( np_fun_name, link) return doc diff --git a/tensorflow/python/ops/numpy_ops/np_utils_test.py b/tensorflow/python/ops/numpy_ops/np_utils_test.py index 11796f607e9..f22d1a6954d 100644 --- a/tensorflow/python/ops/numpy_ops/np_utils_test.py +++ b/tensorflow/python/ops/numpy_ops/np_utils_test.py @@ -81,7 +81,7 @@ Unsupported arguments: `x`. f docstring. -See the NumPy documentation for `numpy.np_fun`: %s""" +See the NumPy documentation for [`numpy.np_fun`](%s).""" expected = expected % (link) self.assertEqual(expected, f.__doc__) From 25197272cf1163cf6aa0fa6c20bea4b0369cbe0e Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Fri, 17 Jul 2020 20:33:53 +0000 Subject: [PATCH 0730/2522] TensorKey check shape and dtype --- tensorflow/core/kernels/tensor_map_test.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/kernels/tensor_map_test.cc b/tensorflow/core/kernels/tensor_map_test.cc index 1ee175be34d..8ea4efd75fa 100644 --- a/tensorflow/core/kernels/tensor_map_test.cc +++ b/tensorflow/core/kernels/tensor_map_test.cc @@ -36,10 +36,12 @@ TEST(TensorKeyTest, Equal) { TensorKey k1 = Tensor(15); TensorKey k2 = Tensor(15); EXPECT_EQ(k1, k2); + EXPECT_EQ(k1.shape(), k2.shape()); + EXPECT_EQ(k1.dtype(), k2.dtype()); - TensorKey k3 = Tensor(15); - TensorKey k4 = Tensor(37); - EXPECT_NE(k3, k4); + TensorKey k3 = Tensor(37.0); + EXPECT_NE(k1, k3); + EXPECT_NE(k1.dtype(), k3.dtype()); } TEST(TensorMapTest, Insert) { From 0ae7b5327815455487471cce1fb206b8ef036b24 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Fri, 17 Jul 2020 20:35:35 +0000 Subject: [PATCH 0731/2522] separate key and value dtypes --- tensorflow/core/kernels/map_kernels.cc | 2 - tensorflow/core/kernels/map_kernels.h | 45 +++++-------------- tensorflow/core/ops/map_ops.cc | 39 +++++++--------- .../python/kernel_tests/map_ops_test.py | 40 ++++------------- tensorflow/python/ops/map_ops.py | 11 ++--- 5 files changed, 37 insertions(+), 100 deletions(-) diff --git a/tensorflow/core/kernels/map_kernels.cc b/tensorflow/core/kernels/map_kernels.cc index 45fa86c2bf6..c890ba77f54 100644 --- a/tensorflow/core/kernels/map_kernels.cc +++ b/tensorflow/core/kernels/map_kernels.cc @@ -35,6 +35,4 @@ REGISTER_KERNEL_BUILDER(Name("TensorMapInsert").Device(DEVICE_CPU), REGISTER_KERNEL_BUILDER(Name("TensorMapErase").Device(DEVICE_CPU), TensorMapErase); -REGISTER_KERNEL_BUILDER(Name("TensorMapReplace").Device(DEVICE_CPU), - TensorMapReplace); } diff --git a/tensorflow/core/kernels/map_kernels.h b/tensorflow/core/kernels/map_kernels.h index 1ab6fbd2323..5fb856bf00f 100644 --- a/tensorflow/core/kernels/map_kernels.h +++ b/tensorflow/core/kernels/map_kernels.h @@ -112,9 +112,7 @@ class TensorMapSize : public OpKernel { class TensorMapInsert : public OpKernel { public: - explicit TensorMapInsert(OpKernelConstruction* c) : OpKernel(c) { - OP_REQUIRES_OK(c, c->GetAttr("element_dtype", &element_dtype_)); - } + explicit TensorMapInsert(OpKernelConstruction* c) : OpKernel(c) {} ~TensorMapInsert() override {} void Compute(OpKernelContext* c) override { @@ -125,19 +123,14 @@ class TensorMapInsert : public OpKernel { TensorMap* output_map = nullptr; OP_REQUIRES_OK(c, ForwardInputOrCreateNewMap(c, 0, 0, *m, &output_map)); - output_map->insert(key, value); + output_map->replace(key, value); } - - private: - DataType element_dtype_; }; class TensorMapLookup : public OpKernel { public: - explicit TensorMapLookup(OpKernelConstruction* c) : OpKernel(c) { - OP_REQUIRES_OK(c, c->GetAttr("element_dtype", &element_dtype_)); - } + explicit TensorMapLookup(OpKernelConstruction* c) : OpKernel(c) {} ~TensorMapLookup() override {} void Compute(OpKernelContext* c) override { @@ -150,17 +143,12 @@ class TensorMapLookup : public OpKernel { c->set_output(0, m->tensors().find(key)->second); } - - private: - DataType element_dtype_; }; class TensorMapErase : public OpKernel { public: - explicit TensorMapErase(OpKernelConstruction* c) : OpKernel(c) { - OP_REQUIRES_OK(c, c->GetAttr("element_dtype", &element_dtype_)); - } + explicit TensorMapErase(OpKernelConstruction* c) : OpKernel(c) {} void Compute(OpKernelContext* c) override { const TensorMap* m = nullptr; @@ -177,35 +165,22 @@ class TensorMapErase : public OpKernel { OP_REQUIRES_OK(c, ForwardInputOrCreateNewMap(c, 0, 0, *m, &output_map)); output_map->tensors().erase(key); } - - private: - DataType element_dtype_; }; -class TensorMapReplace : public OpKernel { +class TensorMapHasKey : public OpKernel { public: - explicit TensorMapReplace(OpKernelConstruction* c) : OpKernel(c) { - OP_REQUIRES_OK(c, c->GetAttr("element_dtype", &element_dtype_)); - } - ~TensorMapReplace() override {} + explicit TensorMapHasKey(OpKernelConstruction* c) : OpKernel(c) {} + ~TensorMapHasKey() override {} void Compute(OpKernelContext* c) override { const TensorKey& key = c->input(1); - const Tensor& value = c->input(2); const TensorMap* m = nullptr; OP_REQUIRES_OK(c, GetInputMap(c, 0, &m)); - - OP_REQUIRES(c, m->tensors().find(key) != m->tensors().end(), - errors::InvalidArgument("Trying to replace non-existent key.")); - - TensorMap* output_map = nullptr; - OP_REQUIRES_OK(c, ForwardInputOrCreateNewMap(c, 0, 0, *m, &output_map)); - output_map->replace(key, value); + Tensor* result; + OP_REQUIRES_OK(c, c->allocate_output(0, TensorShape{}, &result)); + result->scalar()() = m->tensors().find(key) != m->tensors().end(); } - - private: - DataType element_dtype_; }; diff --git a/tensorflow/core/ops/map_ops.cc b/tensorflow/core/ops/map_ops.cc index f289a13c188..2e4284aa4a2 100644 --- a/tensorflow/core/ops/map_ops.cc +++ b/tensorflow/core/ops/map_ops.cc @@ -35,10 +35,11 @@ REGISTER_OP("TensorMapSize") REGISTER_OP("TensorMapInsert") .Input("input_handle: variant") - .Input("key: element_dtype") - .Input("value: element_dtype") + .Input("key: key_dtype") + .Input("value: value_dtype") .Output("output_handle: variant") - .Attr("element_dtype: type") + .Attr("key_dtype: type") + .Attr("value_dtype: type") .SetShapeFn([](shape_inference::InferenceContext* c) { c->set_output(0, c->Scalar()); return Status::OK(); @@ -46,38 +47,28 @@ REGISTER_OP("TensorMapInsert") REGISTER_OP("TensorMapLookup") .Input("input_handle: variant") - .Input("key: element_dtype") - .Output("value: element_dtype") - .Attr("element_dtype: type") + .Input("key: key_dtype") + .Output("value: value_dtype") + .Attr("key_dtype: type") + .Attr("value_dtype: type") .SetShapeFn([](shape_inference::InferenceContext* c) { - c->set_output(0, c->Scalar()); + c->set_output(0, c->UnknownShape()); return Status::OK(); }); REGISTER_OP("TensorMapErase") .Input("input_handle: variant") - .Input("key: element_dtype") + .Input("key: key_dtype") .Output("output_handle: variant") - .Output("value: element_dtype") - .Attr("element_dtype: type") + .Output("value: value_dtype") + .Attr("key_dtype: type") + .Attr("value_dtype: type") .SetShapeFn([](shape_inference::InferenceContext* c) { - DataType element_dtype; - TF_RETURN_IF_ERROR(c->GetAttr("element_dtype", &element_dtype)); - c->set_output(1, c->Scalar()); // removed element - c->set_output(0, c->Scalar()); // map + c->set_output(0, c->Scalar()); // output map + c->set_output(1, c->UnknownShape()); // removed element return Status::OK(); }); -REGISTER_OP("TensorMapReplace") - .Input("input_handle: variant") - .Input("key: element_dtype") - .Input("value: element_dtype") - .Output("output_handle: variant") - .Attr("element_dtype: type") - .SetShapeFn([](shape_inference::InferenceContext* c) { - c->set_output(0, c->Scalar()); - return Status::OK(); - }); } // namespace } // namespace tensorflow diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index b71e8ca8ebe..443b8fd34fc 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -20,6 +20,7 @@ from __future__ import print_function from absl.testing import parameterized from tensorflow.python.eager import backprop from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import test_util from tensorflow.python.ops import map_ops @@ -46,7 +47,7 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): k = constant_op.constant(1.0) v = constant_op.constant(2.0) m = map_ops.tensor_map_insert(m, k, v) - l = map_ops.tensor_map_lookup(m, k) + l = map_ops.tensor_map_lookup(m, k, dtypes.float32) self.assertAllClose(l, v) def testTensorMapLookupMissingKeyFails(self): @@ -56,34 +57,9 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): with self.assertRaisesRegex(errors.InvalidArgumentError, "Trying to lookup non-existent key."): - l = map_ops.tensor_map_lookup(m, k) + l = map_ops.tensor_map_lookup(m, k, dtypes.float32) self.evaluate(l) - def testTensorMapReplace(self): - m = map_ops.empty_tensor_map() - k = constant_op.constant(1.0) - v = constant_op.constant(2.0) - m = map_ops.tensor_map_insert(m, k, v) - s = map_ops.tensor_map_size(m) - self.assertAllClose(s, 1) - - v2 = constant_op.constant(3.0) - m = map_ops.tensor_map_replace(m, k, v2) - l = map_ops.tensor_map_lookup(m, k) - self.assertAllClose(l, v2) - - def testTensorMapReplaceMissingKeyFails(self): - m = map_ops.empty_tensor_map() - k = constant_op.constant(1.0) - k2 = constant_op.constant(2.0) - v = constant_op.constant(2.0) - m = map_ops.tensor_map_insert(m, k2, v) - - with self.assertRaisesRegex(errors.InvalidArgumentError, - "Trying to replace non-existent key."): - m = map_ops.tensor_map_replace(m, k, v) - self.evaluate(m) - def testTensorMapErase(self): m = map_ops.empty_tensor_map() k = constant_op.constant(1.0) @@ -92,7 +68,7 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): s = map_ops.tensor_map_size(m) self.assertAllEqual(s, 1) - m, e = map_ops.tensor_map_erase(m, k) + m, e = map_ops.tensor_map_erase(m, k, dtypes.float32) s = map_ops.tensor_map_size(m) self.assertAllEqual(s, 0) self.assertAllClose(e, v) @@ -104,7 +80,7 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): with self.assertRaisesRegex(errors.InvalidArgumentError, "Trying to erase non-existent item."): - m, e = map_ops.tensor_map_erase(m, k) + m, e = map_ops.tensor_map_erase(m, k, dtypes.float32) self.evaluate(e) def testTensorMapEraseMissingKeyFails(self): @@ -116,9 +92,9 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): with self.assertRaisesRegex(errors.InvalidArgumentError, "Trying to erase non-existent item."): - m, e = map_ops.tensor_map_erase(m, k) + m, e = map_ops.tensor_map_erase(m, k, dtypes.float32) self.evaluate(e) - + ''' def testInsertLookupGrad(self): with backprop.GradientTape() as tape: m = map_ops.empty_tensor_map() @@ -129,7 +105,7 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): l = map_ops.tensor_map_lookup(m, k) l *= 5 g = tape.gradient(l, v) - self.assertAllClose(g, 5.0) + self.assertAllClose(g, 5.0)''' if __name__ == '__main__': diff --git a/tensorflow/python/ops/map_ops.py b/tensorflow/python/ops/map_ops.py index 7813247c8e2..f14c1314d71 100644 --- a/tensorflow/python/ops/map_ops.py +++ b/tensorflow/python/ops/map_ops.py @@ -35,14 +35,11 @@ def tensor_map_size(input_handle): def tensor_map_insert(input_handle, key, value): return gen_map_ops.tensor_map_insert(input_handle, key, value) -def tensor_map_lookup(input_handle, key): - return gen_map_ops.tensor_map_lookup(input_handle, key) +def tensor_map_lookup(input_handle, key, value_dtype): + return gen_map_ops.tensor_map_lookup(input_handle, key, value_dtype) -def tensor_map_erase(input_handle, key): - return gen_map_ops.tensor_map_erase(input_handle, key) - -def tensor_map_replace(input_handle, key, value): - return gen_map_ops.tensor_map_replace(input_handle, key, value) +def tensor_map_erase(input_handle, key, value_dtype): + return gen_map_ops.tensor_map_erase(input_handle, key, value_dtype) @ops.RegisterGradient("TensorMapLookup") def LookupGrad(op, dval): From c29d6434bae6680039ca4c8b9aaf6dd30dae4c62 Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Fri, 17 Jul 2020 13:29:26 -0700 Subject: [PATCH 0732/2522] Make warnings in the external builds match the internal builds. PiperOrigin-RevId: 321843582 Change-Id: I5dc287411e2c5067b530bb47c0cb24a5e47973fd --- .../sparkfun_edge/accelerometer_handler.cc | 13 +- .../micro/examples/micro_speech/Makefile.inc | 10 +- .../lite/micro/kernels/cmsis-nn/softmax.cc | 2 - .../kernels/resize_nearest_neighbor_test.cc | 255 ++++++++++-------- tensorflow/lite/micro/micro_allocator.cc | 11 +- tensorflow/lite/micro/micro_interpreter.cc | 6 +- tensorflow/lite/micro/testing/test_utils.cc | 4 +- .../lite/micro/tools/ci_build/test_all.sh | 6 +- tensorflow/lite/micro/tools/make/Makefile | 7 +- .../make/targets/apollo3evb_makefile.inc | 12 +- .../tools/make/targets/bluepill_makefile.inc | 10 +- .../tools/make/targets/stm32f4_makefile.inc | 13 +- 12 files changed, 179 insertions(+), 170 deletions(-) diff --git a/tensorflow/lite/micro/examples/magic_wand/sparkfun_edge/accelerometer_handler.cc b/tensorflow/lite/micro/examples/magic_wand/sparkfun_edge/accelerometer_handler.cc index 0b35b69c298..ae2d127a428 100644 --- a/tensorflow/lite/micro/examples/magic_wand/sparkfun_edge/accelerometer_handler.cc +++ b/tensorflow/lite/micro/examples/magic_wand/sparkfun_edge/accelerometer_handler.cc @@ -173,23 +173,24 @@ bool ReadAccelerometer(tflite::ErrorReporter* error_reporter, float* input, } // Load data from FIFO buffer - axis3bit16_t data_raw_acceleration; + axis3bit16_t data_raw_acceleration_local; for (int i = 0; i < samples; i++) { // Zero out the struct that holds raw accelerometer data - memset(data_raw_acceleration.u8bit, 0x00, 3 * sizeof(int16_t)); + memset(data_raw_acceleration_local.u8bit, 0x00, 3 * sizeof(int16_t)); // If the return value is non-zero, sensor data was successfully read - if (lis2dh12_acceleration_raw_get(&dev_ctx, data_raw_acceleration.u8bit)) { + if (lis2dh12_acceleration_raw_get(&dev_ctx, + data_raw_acceleration_local.u8bit)) { TF_LITE_REPORT_ERROR(error_reporter, "Failed to get raw data."); } else { // Convert each raw 16-bit value into floating point values representing // milli-Gs, a unit of acceleration, and store in the current position of // our buffer save_data[begin_index++] = - lis2dh12_from_fs2_hr_to_mg(data_raw_acceleration.i16bit[0]); + lis2dh12_from_fs2_hr_to_mg(data_raw_acceleration_local.i16bit[0]); save_data[begin_index++] = - lis2dh12_from_fs2_hr_to_mg(data_raw_acceleration.i16bit[1]); + lis2dh12_from_fs2_hr_to_mg(data_raw_acceleration_local.i16bit[1]); save_data[begin_index++] = - lis2dh12_from_fs2_hr_to_mg(data_raw_acceleration.i16bit[2]); + lis2dh12_from_fs2_hr_to_mg(data_raw_acceleration_local.i16bit[2]); // Start from beginning, imitating loop array. if (begin_index >= 600) begin_index = 0; } diff --git a/tensorflow/lite/micro/examples/micro_speech/Makefile.inc b/tensorflow/lite/micro/examples/micro_speech/Makefile.inc index 70ad3e94238..a4ec24f48e1 100644 --- a/tensorflow/lite/micro/examples/micro_speech/Makefile.inc +++ b/tensorflow/lite/micro/examples/micro_speech/Makefile.inc @@ -237,18 +237,14 @@ $(MICRO_FEATURES_GENERATOR_HDRS) #Find any platform - specific rules for this example. include $(wildcard tensorflow/lite/micro/examples/micro_speech/*/Makefile.inc) -# Test the code for feature generation. -#TEMP_CXXFLAGS := CXXFLAGS -#CXXFLAGS := $(filter-out $(CC_WARNINGS),$(CXXFLAGS)) - -TEMP_CCFLAGS := CCFLAGS +# TODO(b/161489252): Disabling warnings for this example until we have a better +# way to build third_party code with a reduced list of CFLAGS. CCFLAGS := $(filter-out $(CC_WARNINGS),$(CCFLAGS)) +# Test the code for feature generation. $(eval $(call microlite_test,micro_features_generator_test,\ $(MICRO_FEATURES_GENERATOR_TEST_SRCS), $(MICRO_FEATURES_GENERATOR_TEST_HDRS))) -#CXXFLAGS := TEMP_CXXFLAGS - # Tests loading and running a speech model. $(eval $(call microlite_test,micro_speech_test,\ $(MICRO_SPEECH_TEST_SRCS),$(MICRO_SPEECH_TEST_HDRS))) diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc b/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc index 4b7179ac077..b18d1c9b1e8 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc @@ -102,8 +102,6 @@ void SoftmaxQuantized(const TfLiteTensor* input, TfLiteTensor* output, op_data, GetTensorShape(input), GetTensorData(input), GetTensorShape(output), GetTensorData(output)); } else { - const unsigned int num_dims = NumDimensions(input); - const int trailing_dim = input_shape.DimensionsCount() - 1; const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); diff --git a/tensorflow/lite/micro/kernels/resize_nearest_neighbor_test.cc b/tensorflow/lite/micro/kernels/resize_nearest_neighbor_test.cc index 654516c3ce7..1c2c22645e6 100644 --- a/tensorflow/lite/micro/kernels/resize_nearest_neighbor_test.cc +++ b/tensorflow/lite/micro/kernels/resize_nearest_neighbor_test.cc @@ -18,7 +18,6 @@ limitations under the License. #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" - namespace tflite { namespace testing { namespace { @@ -70,9 +69,7 @@ void TestResizeNearestNeighbor(const int* input_dims_data, const T* input_data, resolver.FindOp(tflite::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR); TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - TfLiteResizeNearestNeighborParams builtin_data = { - .align_corners = false - }; + TfLiteResizeNearestNeighborParams builtin_data = {false, false}; int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); @@ -99,7 +96,6 @@ void TestResizeNearestNeighbor(const int* input_dims_data, const T* input_data, } // namespace testing } // namespace tflite - TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(HorizontalResize) { @@ -110,8 +106,9 @@ TF_LITE_MICRO_TEST(HorizontalResize) { const int output_dims[] = {4, 1, 1, 3, 1}; float output_data[3]; - tflite::testing::TestResizeNearestNeighbor(input_dims, input_data, - expected_size_data, expected_output_data, output_dims, output_data); + tflite::testing::TestResizeNearestNeighbor( + input_dims, input_data, expected_size_data, expected_output_data, + output_dims, output_data); } TF_LITE_MICRO_TEST(HorizontalResizeUInt8) { const int input_dims[] = {4, 1, 1, 2, 1}; @@ -121,8 +118,9 @@ TF_LITE_MICRO_TEST(HorizontalResizeUInt8) { const int output_dims[] = {4, 1, 1, 3, 1}; uint8 output_data[3]; - tflite::testing::TestResizeNearestNeighbor(input_dims, input_data, - expected_size_data, expected_output_data, output_dims, output_data); + tflite::testing::TestResizeNearestNeighbor( + input_dims, input_data, expected_size_data, expected_output_data, + output_dims, output_data); } TF_LITE_MICRO_TEST(HorizontalResizeInt8) { const int input_dims[] = {4, 1, 1, 2, 1}; @@ -132,8 +130,9 @@ TF_LITE_MICRO_TEST(HorizontalResizeInt8) { const int output_dims[] = {4, 1, 1, 3, 1}; int8 output_data[3]; - tflite::testing::TestResizeNearestNeighbor(input_dims, input_data, - expected_size_data, expected_output_data, output_dims, output_data); + tflite::testing::TestResizeNearestNeighbor( + input_dims, input_data, expected_size_data, expected_output_data, + output_dims, output_data); } TF_LITE_MICRO_TEST(VerticalResize) { const int input_dims[] = {4, 1, 2, 1, 1}; @@ -143,8 +142,9 @@ TF_LITE_MICRO_TEST(VerticalResize) { const int output_dims[] = {4, 1, 3, 1, 1}; float output_data[3]; - tflite::testing::TestResizeNearestNeighbor(input_dims, input_data, - expected_size_data, expected_output_data, output_dims, output_data); + tflite::testing::TestResizeNearestNeighbor( + input_dims, input_data, expected_size_data, expected_output_data, + output_dims, output_data); } TF_LITE_MICRO_TEST(VerticalResizeUInt8) { const int input_dims[] = {4, 1, 2, 1, 1}; @@ -154,8 +154,9 @@ TF_LITE_MICRO_TEST(VerticalResizeUInt8) { const int output_dims[] = {4, 1, 3, 1, 1}; uint8 output_data[3]; - tflite::testing::TestResizeNearestNeighbor(input_dims, input_data, - expected_size_data, expected_output_data, output_dims, output_data); + tflite::testing::TestResizeNearestNeighbor( + input_dims, input_data, expected_size_data, expected_output_data, + output_dims, output_data); } TF_LITE_MICRO_TEST(VerticalResizeInt8) { const int input_dims[] = {4, 1, 2, 1, 1}; @@ -165,168 +166,196 @@ TF_LITE_MICRO_TEST(VerticalResizeInt8) { const int output_dims[] = {4, 1, 3, 1, 1}; int8 output_data[3]; - tflite::testing::TestResizeNearestNeighbor(input_dims, input_data, - expected_size_data, expected_output_data, output_dims, output_data); + tflite::testing::TestResizeNearestNeighbor( + input_dims, input_data, expected_size_data, expected_output_data, + output_dims, output_data); } TF_LITE_MICRO_TEST(TwoDimensionalResize) { const int input_dims[] = {4, 1, 2, 2, 1}; - const float input_data[] = {3, 6, // - 9, 12, // - }; + const float input_data[] = { + 3, 6, // + 9, 12, // + }; const int32 expected_size_data[] = {3, 3}; - const float expected_output_data[] = {3, 3, 6, // - 3, 3, 6, // - 9, 9, 12 // - }; + const float expected_output_data[] = { + 3, 3, 6, // + 3, 3, 6, // + 9, 9, 12 // + }; const int output_dims[] = {4, 1, 3, 3, 1}; float output_data[9]; - tflite::testing::TestResizeNearestNeighbor(input_dims, input_data, - expected_size_data, expected_output_data, output_dims, output_data); + tflite::testing::TestResizeNearestNeighbor( + input_dims, input_data, expected_size_data, expected_output_data, + output_dims, output_data); } TF_LITE_MICRO_TEST(TwoDimensionalResizeUInt8) { const int input_dims[] = {4, 1, 2, 2, 1}; - const uint8 input_data[] = {3, 6, // - 9, 12 // - }; + const uint8 input_data[] = { + 3, 6, // + 9, 12 // + }; const int32 expected_size_data[] = {3, 3}; - const uint8 expected_output_data[] = {3, 3, 6, // - 3, 3, 6, // - 9, 9, 12 // - }; + const uint8 expected_output_data[] = { + 3, 3, 6, // + 3, 3, 6, // + 9, 9, 12 // + }; const int output_dims[] = {4, 1, 3, 3, 1}; uint8 output_data[9]; - tflite::testing::TestResizeNearestNeighbor(input_dims, input_data, - expected_size_data, expected_output_data, output_dims, output_data); + tflite::testing::TestResizeNearestNeighbor( + input_dims, input_data, expected_size_data, expected_output_data, + output_dims, output_data); } TF_LITE_MICRO_TEST(TwoDimensionalResizeInt8) { const int input_dims[] = {4, 1, 2, 2, 1}; - const int8 input_data[] = {3, -6, // - 9, 12, // - }; + const int8 input_data[] = { + 3, -6, // + 9, 12, // + }; const int32 expected_size_data[] = {3, 3}; - const int8 expected_output_data[] = {3, 3, -6, // - 3, 3, -6, // - 9, 9, 12, // - }; + const int8 expected_output_data[] = { + 3, 3, -6, // + 3, 3, -6, // + 9, 9, 12, // + }; const int output_dims[] = {4, 1, 3, 3, 1}; int8 output_data[9]; - tflite::testing::TestResizeNearestNeighbor(input_dims, input_data, - expected_size_data, expected_output_data, output_dims, output_data); + tflite::testing::TestResizeNearestNeighbor( + input_dims, input_data, expected_size_data, expected_output_data, + output_dims, output_data); } TF_LITE_MICRO_TEST(TwoDimensionalResizeWithTwoBatches) { const int input_dims[] = {4, 2, 2, 2, 1}; - const float input_data[] = {3, 6, // - 9, 12, // - 4, 10, // - 10, 16 // - }; + const float input_data[] = { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }; const int32 expected_size_data[] = {3, 3}; - const float expected_output_data[] = {3, 3, 6, // - 3, 3, 6, // - 9, 9, 12, // - 4, 4, 10, // - 4, 4, 10, // - 10, 10, 16, // - }; + const float expected_output_data[] = { + 3, 3, 6, // + 3, 3, 6, // + 9, 9, 12, // + 4, 4, 10, // + 4, 4, 10, // + 10, 10, 16, // + }; const int output_dims[] = {4, 2, 3, 3, 1}; float output_data[18]; - tflite::testing::TestResizeNearestNeighbor(input_dims, input_data, - expected_size_data, expected_output_data, output_dims, output_data); + tflite::testing::TestResizeNearestNeighbor( + input_dims, input_data, expected_size_data, expected_output_data, + output_dims, output_data); } TF_LITE_MICRO_TEST(TwoDimensionalResizeWithTwoBatchesUInt8) { const int input_dims[] = {4, 2, 2, 2, 1}; - const uint8 input_data[] = {3, 6, // - 9, 12, // - 4, 10, // - 10, 16 // - }; + const uint8 input_data[] = { + 3, 6, // + 9, 12, // + 4, 10, // + 10, 16 // + }; const int32 expected_size_data[] = {3, 3}; - const uint8 expected_output_data[] = {3, 3, 6, // - 3, 3, 6, // - 9, 9, 12, // - 4, 4, 10, // - 4, 4, 10, // - 10, 10, 16, // - }; + const uint8 expected_output_data[] = { + 3, 3, 6, // + 3, 3, 6, // + 9, 9, 12, // + 4, 4, 10, // + 4, 4, 10, // + 10, 10, 16, // + }; const int output_dims[] = {4, 2, 3, 3, 1}; uint8 output_data[18]; - tflite::testing::TestResizeNearestNeighbor(input_dims, input_data, - expected_size_data, expected_output_data, output_dims, output_data); + tflite::testing::TestResizeNearestNeighbor( + input_dims, input_data, expected_size_data, expected_output_data, + output_dims, output_data); } TF_LITE_MICRO_TEST(TwoDimensionalResizeWithTwoBatchesInt8) { const int input_dims[] = {4, 2, 2, 2, 1}; - const int8 input_data[] = {3, 6, // - 9, -12, // - -4, 10, // - 10, 16 // - }; + const int8 input_data[] = { + 3, 6, // + 9, -12, // + -4, 10, // + 10, 16 // + }; const int32 expected_size_data[] = {3, 3}; - const int8 expected_output_data[] = {3, 3, 6, // - 3, 3, 6, // - 9, 9, -12, // - -4, -4, 10, // - -4, -4, 10, // - 10, 10, 16, // - }; + const int8 expected_output_data[] = { + 3, 3, 6, // + 3, 3, 6, // + 9, 9, -12, // + -4, -4, 10, // + -4, -4, 10, // + 10, 10, 16, // + }; const int output_dims[] = {4, 2, 3, 3, 1}; int8 output_data[18]; - tflite::testing::TestResizeNearestNeighbor(input_dims, input_data, - expected_size_data, expected_output_data, output_dims, output_data); + tflite::testing::TestResizeNearestNeighbor( + input_dims, input_data, expected_size_data, expected_output_data, + output_dims, output_data); } TF_LITE_MICRO_TEST(ThreeDimensionalResize) { const int input_dims[] = {4, 1, 2, 2, 2}; - const float input_data[] = {3, 4, 6, 10, // - 9, 10, 12, 16, // - }; + const float input_data[] = { + 3, 4, 6, 10, // + 9, 10, 12, 16, // + }; const int32 expected_size_data[] = {3, 3}; - const float expected_output_data[] = {3, 4, 3, 4, 6, 10, // - 3, 4, 3, 4, 6, 10, // - 9, 10, 9, 10, 12, 16, // - }; + const float expected_output_data[] = { + 3, 4, 3, 4, 6, 10, // + 3, 4, 3, 4, 6, 10, // + 9, 10, 9, 10, 12, 16, // + }; const int output_dims[] = {4, 1, 3, 3, 2}; float output_data[18]; - tflite::testing::TestResizeNearestNeighbor(input_dims, input_data, - expected_size_data, expected_output_data, output_dims, output_data); + tflite::testing::TestResizeNearestNeighbor( + input_dims, input_data, expected_size_data, expected_output_data, + output_dims, output_data); } TF_LITE_MICRO_TEST(ThreeDimensionalResizeUInt8) { const int input_dims[] = {4, 1, 2, 2, 2}; - const uint8 input_data[] = {3, 4, 6, 10, // - 10, 12, 14, 16, // - }; + const uint8 input_data[] = { + 3, 4, 6, 10, // + 10, 12, 14, 16, // + }; const int32 expected_size_data[] = {3, 3}; - const uint8 expected_output_data[] = {3, 4, 3, 4, 6, 10, // - 3, 4, 3, 4, 6, 10, // - 10, 12, 10, 12, 14, 16, // - }; + const uint8 expected_output_data[] = { + 3, 4, 3, 4, 6, 10, // + 3, 4, 3, 4, 6, 10, // + 10, 12, 10, 12, 14, 16, // + }; const int output_dims[] = {4, 1, 3, 3, 2}; uint8 output_data[18]; - tflite::testing::TestResizeNearestNeighbor(input_dims, input_data, - expected_size_data, expected_output_data, output_dims, output_data); + tflite::testing::TestResizeNearestNeighbor( + input_dims, input_data, expected_size_data, expected_output_data, + output_dims, output_data); } TF_LITE_MICRO_TEST(ThreeDimensionalResizeInt8) { const int input_dims[] = {4, 1, 2, 2, 2}; - const int8 input_data[] = {3, 4, -6, 10, // - 10, 12, -14, 16, // - }; + const int8 input_data[] = { + 3, 4, -6, 10, // + 10, 12, -14, 16, // + }; const int32 expected_size_data[] = {3, 3}; - const int8 expected_output_data[] = {3, 4, 3, 4, -6, 10, // - 3, 4, 3, 4, -6, 10, // - 10, 12, 10, 12, -14, 16, // - }; + const int8 expected_output_data[] = { + 3, 4, 3, 4, -6, 10, // + 3, 4, 3, 4, -6, 10, // + 10, 12, 10, 12, -14, 16, // + }; const int output_dims[] = {4, 1, 3, 3, 2}; int8 output_data[18]; - tflite::testing::TestResizeNearestNeighbor(input_dims, input_data, - expected_size_data, expected_output_data, output_dims, output_data); + tflite::testing::TestResizeNearestNeighbor( + input_dims, input_data, expected_size_data, expected_output_data, + output_dims, output_data); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc index b9ba89e7dc2..c045323ff4a 100644 --- a/tensorflow/lite/micro/micro_allocator.cc +++ b/tensorflow/lite/micro/micro_allocator.cc @@ -49,17 +49,8 @@ struct AllocationInfo { // We align tensor buffers to 16-byte boundaries, since this is a common // requirement for SIMD extensions. constexpr int kBufferAlignment = 16; - constexpr char kOfflineMemAllocMetadata[] = "OfflineMemoryAllocation"; - -// Instance of a zero-length int to pass as tensor dims for a flatbuffer -// Tensor with no shape. Note that the second member of a TfLiteArray is a -// flexible array member, which is not strictly valid C++. However it is -// supported by both GCC and clang, as long as the flexible array element is not -// initialized, which is ok in this case as it should never be accessed. -// Declaring this as constexpr causes build errors with clang, as it requires -// the flexible array element to be initialized. -const TfLiteIntArray kZeroLengthIntArray = {0}; +const TfLiteIntArray kZeroLengthIntArray = {0, {}}; class MicroBuiltinDataAllocator : public BuiltinDataAllocator { public: diff --git a/tensorflow/lite/micro/micro_interpreter.cc b/tensorflow/lite/micro/micro_interpreter.cc index a47dba83c6b..ef2ae2b7644 100644 --- a/tensorflow/lite/micro/micro_interpreter.cc +++ b/tensorflow/lite/micro/micro_interpreter.cc @@ -310,7 +310,7 @@ TfLiteStatus MicroInterpreter::Invoke() { TfLiteTensor* MicroInterpreter::input(size_t index) { const size_t length = inputs_size(); - if ((index < 0) || (index >= length)) { + if (index >= length) { TF_LITE_REPORT_ERROR(error_reporter_, "Input index %d out of range (length is %d)", index, length); @@ -321,7 +321,7 @@ TfLiteTensor* MicroInterpreter::input(size_t index) { TfLiteTensor* MicroInterpreter::output(size_t index) { const size_t length = outputs_size(); - if ((index < 0) || (index >= length)) { + if (index >= length) { TF_LITE_REPORT_ERROR(error_reporter_, "Output index %d out of range (length is %d)", index, length); @@ -332,7 +332,7 @@ TfLiteTensor* MicroInterpreter::output(size_t index) { TfLiteTensor* MicroInterpreter::tensor(size_t index) { const size_t length = tensors_size(); - if ((index < 0) || (index >= length)) { + if (index >= length) { TF_LITE_REPORT_ERROR(error_reporter_, "Tensor index %d out of range (length is %d)", index, length); diff --git a/tensorflow/lite/micro/testing/test_utils.cc b/tensorflow/lite/micro/testing/test_utils.cc index 8860d66efa9..5aa73df44a6 100644 --- a/tensorflow/lite/micro/testing/test_utils.cc +++ b/tensorflow/lite/micro/testing/test_utils.cc @@ -36,9 +36,9 @@ constexpr size_t kBufferAlignment = 16; // We store the pointer to the ith scratch buffer to implement the Request/Get // ScratchBuffer API for the tests. scratch_buffers_[i] will be the ith scratch // buffer and will still be allocated from within raw_arena_. -constexpr size_t kNumScratchBuffers = 5; +constexpr int kNumScratchBuffers = 5; uint8_t* scratch_buffers_[kNumScratchBuffers]; -size_t scratch_buffer_count_ = 0; +int scratch_buffer_count_ = 0; // Note that the context parameter in this function is only needed to match the // signature of TfLiteContext::AllocatePersistentBuffer and isn't needed in the diff --git a/tensorflow/lite/micro/tools/ci_build/test_all.sh b/tensorflow/lite/micro/tools/ci_build/test_all.sh index 817a4dce115..95366112f17 100755 --- a/tensorflow/lite/micro/tools/ci_build/test_all.sh +++ b/tensorflow/lite/micro/tools/ci_build/test_all.sh @@ -34,9 +34,6 @@ make -f tensorflow/lite/micro/tools/make/Makefile \ echo "Starting to run micro tests at `date`" -echo "Running Arduino tests at `date`" -tensorflow/lite/micro/tools/ci_build/test_arduino.sh - echo "Running bluepill tests at `date`" tensorflow/lite/micro/tools/ci_build/test_bluepill.sh @@ -52,4 +49,7 @@ tensorflow/lite/micro/tools/ci_build/test_x86.sh echo "Running stm32f4 tests at `date`" tensorflow/lite/micro/tools/ci_build/test_stm32f4.sh +echo "Running Arduino tests at `date`" +tensorflow/lite/micro/tools/ci_build/test_arduino.sh + echo "Finished all micro tests at `date`" diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile index cfab5f9f8d2..6c6e89561f2 100644 --- a/tensorflow/lite/micro/tools/make/Makefile +++ b/tensorflow/lite/micro/tools/make/Makefile @@ -74,11 +74,14 @@ TEST_SCRIPT := tensorflow/lite/micro/testing/test_linux_binary.sh MICROLITE_LIBS := -lm +CC_WARNINGS := -Werror -Wsign-compare -Wdouble-promotion \ + -Wshadow -Wunused-variable -Wmissing-field-initializers \ + -Wunused-function # TODO(b/150240249): Add in -fno-rtti once that works for the Xtensa toolchain. # TODO(b/159155203): Consider TF_LITE_STATIC_MEMORY to align more with the fact # this flag is for an optimized micro runtime. -CXXFLAGS := -std=c++11 -DTF_LITE_STATIC_MEMORY -CCFLAGS := -std=c11 -DTF_LITE_STATIC_MEMORY +CXXFLAGS := -std=c++11 -DTF_LITE_STATIC_MEMORY $(CC_WARNINGS) +CCFLAGS := -std=c11 -DTF_LITE_STATIC_MEMORY $(CC_WARNINGS) ARFLAGS := -r # override these in the makefile.inc for specific compiler targets diff --git a/tensorflow/lite/micro/tools/make/targets/apollo3evb_makefile.inc b/tensorflow/lite/micro/tools/make/targets/apollo3evb_makefile.inc index dc7a689daed..51163233074 100644 --- a/tensorflow/lite/micro/tools/make/targets/apollo3evb_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/apollo3evb_makefile.inc @@ -50,18 +50,14 @@ $(MAKEFILE_DIR)/downloads/$(AM_SDK_DEST)/$(SF_BSPS_DEST): $(MAKEFILE_DIR)/downlo -Wvla \ -Wall \ -Wextra \ - -Wsign-compare \ - -Wdouble-promotion \ - -Wunused-variable \ - -Wshadow \ - -Wmissing-field-initializers \ + -Wno-missing-field-initializers \ + -Wno-strict-aliasing \ + -Wno-type-limits \ + -Wno-unused-function \ -Wno-unused-parameter \ - -Wno-write-strings \ - -Wunused-function \ -fno-delete-null-pointer-checks \ -fno-threadsafe-statics \ -fomit-frame-pointer \ - -fpermissive \ -fno-use-cxa-atexit \ -nostdlib \ -ggdb \ diff --git a/tensorflow/lite/micro/tools/make/targets/bluepill_makefile.inc b/tensorflow/lite/micro/tools/make/targets/bluepill_makefile.inc index 3b6f1f9aa88..62230f6a80a 100644 --- a/tensorflow/lite/micro/tools/make/targets/bluepill_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/bluepill_makefile.inc @@ -29,18 +29,12 @@ ifeq ($(TARGET), bluepill) -Wvla \ -Wall \ -Wextra \ - -Wsign-compare \ - -Wdouble-promotion \ - -Wshadow \ - -Wunused-variable \ - -Wmissing-field-initializers \ -Wno-unused-parameter \ - -Wno-write-strings \ - -Wunused-function \ + -Wno-strict-aliasing \ + -Wno-type-limits \ -fno-delete-null-pointer-checks \ -fno-threadsafe-statics \ -fomit-frame-pointer \ - -fpermissive \ -fno-use-cxa-atexit \ -nostdlib \ -g \ diff --git a/tensorflow/lite/micro/tools/make/targets/stm32f4_makefile.inc b/tensorflow/lite/micro/tools/make/targets/stm32f4_makefile.inc index 8405542fcba..15ee93d4e19 100644 --- a/tensorflow/lite/micro/tools/make/targets/stm32f4_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/stm32f4_makefile.inc @@ -9,11 +9,12 @@ ifeq ($(TARGET), stm32f4) $(eval $(call add_third_party_download,$(CMSIS_URL),$(CMSIS_MD5),cmsis,patch_cmsis)) $(eval $(call add_third_party_download,$(STM32_BARE_LIB_URL),$(STM32_BARE_LIB_MD5),stm32_bare_lib,)) + # TODO(b/161478030) : change - Wno - vla to - Wvla and remove - Wno-shadow once + # we have a solution for fixing / avoiding being tripped up by these warnings. PLATFORM_FLAGS = \ -DGEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK \ -DTF_LITE_STATIC_MEMORY \ -DTF_LITE_MCU_DEBUG_LOG \ - -fno-rtti \ -fmessage-length=0 \ -fno-exceptions \ -fno-unwind-tables \ @@ -23,10 +24,12 @@ ifeq ($(TARGET), stm32f4) -MMD \ -mcpu=cortex-m4 \ -mthumb \ - -std=gnu++11 \ - -Wvla \ -Wall \ -Wextra \ + -Wno-shadow \ + -Wno-vla \ + -Wno-strict-aliasing \ + -Wno-type-limits \ -Wno-unused-parameter \ -Wno-missing-field-initializers \ -Wno-write-strings \ @@ -34,11 +37,9 @@ ifeq ($(TARGET), stm32f4) -Wunused-function \ -fno-delete-null-pointer-checks \ -fomit-frame-pointer \ - -fpermissive \ - -fno-use-cxa-atexit \ -g \ -Os - CXXFLAGS += $(PLATFORM_FLAGS) + CXXFLAGS += $(PLATFORM_FLAGS) -std=gnu++11 -fno-rtti -fno-use-cxa-atexit CCFLAGS += $(PLATFORM_FLAGS) LDFLAGS += \ --specs=nosys.specs \ From fd0e96b193b9d2c2f233d6a2a10b4a433deb28b3 Mon Sep 17 00:00:00 2001 From: Wenhao Jia Date: Fri, 17 Jul 2020 13:36:24 -0700 Subject: [PATCH 0733/2522] Add a TPU execution op. PiperOrigin-RevId: 321844765 Change-Id: I3bfb52fe00f7d378a26e3247beee7daa5ba6d38b --- tensorflow/core/tpu/kernels/BUILD | 41 - tensorflow/core/tpu/kernels/tpu_execute_op.cc | 805 ------------------ tensorflow/core/tpu/kernels/tpu_execute_op.h | 67 -- 3 files changed, 913 deletions(-) delete mode 100644 tensorflow/core/tpu/kernels/tpu_execute_op.cc delete mode 100644 tensorflow/core/tpu/kernels/tpu_execute_op.h diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index af7c9ead791..7a6160a2963 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -553,44 +553,3 @@ cc_library( ], alwayslink = 1, ) - -cc_library( - name = "tpu_execute_op", - srcs = ["tpu_execute_op.cc"], - hdrs = ["tpu_execute_op.h"], - deps = [ - ":tpu_compilation_cache_entry", - ":tpu_compilation_cache_external", - ":tpu_compilation_cache_local_lookup", - ":tpu_compilation_cache_lookup", - ":tpu_executable_info_proto_cc", - ":tpu_op_consts", - "//tensorflow/compiler/jit:xla_device", - "//tensorflow/compiler/jit:xla_launch_util", - "//tensorflow/compiler/jit:xla_tensor", - "//tensorflow/compiler/tf2xla:common", - "//tensorflow/compiler/tf2xla:tf2xla_util", - "//tensorflow/compiler/xla:debug_options_flags", - "//tensorflow/compiler/xla:shape_util", - "//tensorflow/compiler/xla:statusor", - "//tensorflow/compiler/xla:xla_data_proto_cc", - "//tensorflow/compiler/xla/service:dump", - "//tensorflow/compiler/xla/service:executable", - "//tensorflow/compiler/xla/service:maybe_owning_device_memory", - "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core:stream_executor_no_cuda", - "//tensorflow/core/profiler/lib:traceme", - "//tensorflow/core/tpu:tpu_configuration", - "//tensorflow/core/tpu:tpu_defs", - "//tensorflow/core/tpu:tpu_execute", - "//tensorflow/stream_executor:device_memory_allocator", - "//tensorflow/stream_executor/tpu:tpu_node_context", - "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/types:span", - ], - alwayslink = True, -) diff --git a/tensorflow/core/tpu/kernels/tpu_execute_op.cc b/tensorflow/core/tpu/kernels/tpu_execute_op.cc deleted file mode 100644 index 817649e2fe7..00000000000 --- a/tensorflow/core/tpu/kernels/tpu_execute_op.cc +++ /dev/null @@ -1,805 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/core/tpu/kernels/tpu_execute_op.h" - -#include "absl/container/flat_hash_map.h" -#include "absl/memory/memory.h" -#include "absl/types/span.h" -#include "tensorflow/compiler/jit/xla_device.h" -#include "tensorflow/compiler/jit/xla_launch_util.h" -#include "tensorflow/compiler/jit/xla_tensor.h" -#include "tensorflow/compiler/tf2xla/shape_util.h" -#include "tensorflow/compiler/tf2xla/tf2xla_util.h" -#include "tensorflow/compiler/xla/debug_options_flags.h" -#include "tensorflow/compiler/xla/service/dump.h" -#include "tensorflow/compiler/xla/service/executable.h" -#include "tensorflow/compiler/xla/service/maybe_owning_device_memory.h" -#include "tensorflow/compiler/xla/shape_util.h" -#include "tensorflow/compiler/xla/statusor.h" -#include "tensorflow/compiler/xla/xla_data.pb.h" -#include "tensorflow/core/framework/allocator.h" -#include "tensorflow/core/framework/node_def_util.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/resource_mgr.h" -#include "tensorflow/core/framework/resource_var.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/types.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/platform/stream_executor_no_cuda.h" -#include "tensorflow/core/platform/tracing.h" -#include "tensorflow/core/profiler/lib/traceme.h" -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h" -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.h" -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h" -#include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" -#include "tensorflow/core/tpu/kernels/tpu_op_consts.h" -#include "tensorflow/core/tpu/tpu_configuration.h" -#include "tensorflow/core/tpu/tpu_defs.h" -#include "tensorflow/core/tpu/tpu_execute.h" -#include "tensorflow/core/util/stream_executor_util.h" -#include "tensorflow/stream_executor/device_memory_allocator.h" -#include "tensorflow/stream_executor/tpu/tpu_node_context.h" - -namespace tensorflow { - -namespace { - -using ::tensorflow::tpu::TpuNodeContext; -using CompilationCacheEntryRef = ::tensorflow::tpu::CompilationCacheEntryRef< - ::tensorflow::tpu::TpuCompilationCacheEntry>; -using TpuCompilationCacheLookup = - ::tensorflow::tpu::TpuCompilationCacheLookup; - -// Looks up the input `key` in the compilation cache, populating -// `*rendezvous_key_base` and `*entry`. -Status GetComputationCacheEntry( - OpKernelContext* context, string* rendezvous_key_base, - std::unique_ptr* entry) { - const Tensor* key; - TF_RETURN_IF_ERROR(context->input("key", &key)); - profiler::TraceMe trace_me("TpuExecuteOp::LookupProto", /*level=*/2); - if (!TensorShapeUtils::IsVector(key->shape()) || - key->shape().dim_size(0) != 2) { - return errors::InvalidArgument( - "Key argument to TPUExecute must be a 2-element vector"); - } - - ResourceMgr* rmgr = GetTPUConfigResourceMgr(); - TpuCompilationCacheLookup* proto_lookup; - TF_RETURN_IF_ERROR(rmgr->Lookup(rmgr->default_container(), - tpu::kCompiledProtoCacheResourceName, - &proto_lookup)); - core::ScopedUnref lookup_unref(proto_lookup); - TF_RETURN_IF_ERROR(proto_lookup->Lookup(key->vec()(0), entry)); - *rendezvous_key_base = key->vec()(1); - return Status::OK(); -} - -struct VariableUpdateMap { - // Maps input index to the updated output index. If the variable doesn't have - // an updated output, the corresponding output is set to -1. - absl::flat_hash_map input_to_output; - // Maps output index to (the input index, whether the update is generated from - // compilation). - absl::flat_hash_map> output_to_input; - // Part of the input indices that are from the compilation, in the compiled - // order. - std::vector input_in_compiled_update_order; -}; - -// Creates a VariableUpdateMap from both the compilation and the fused variable -// reads/updates. -xla::StatusOr BuildVariableUpdateMap( - absl::Span - compiled_variable_updates, - absl::Span fused_device_var_reads_in_computation_inputs, - const std::vector& fused_device_var_updates_in_computation_outputs, - int64 computation_output_count) { - VariableUpdateMap map; - auto add_pair = [&](int input, int output, bool from_compilation) -> Status { - TF_RET_CHECK(map.input_to_output.emplace(input, output).second) - << "Duplicate variable input index: " << input; - if (output >= 0) { - TF_RET_CHECK(map.output_to_input - .emplace(output, std::pair{input, from_compilation}) - .second) - << "Duplicate variable output index: " << output; - } - return Status::OK(); - }; - - // First add the updates produced by the compilation. Not all variables are - // updated, and if not, they do not have an output in the XLA computation. The - // update output indices in the XLA computation start after the non-variable - // outputs. - int num_updated_variables = 0; - for (int i = 0; i < compiled_variable_updates.size(); ++i) { - const bool updated = compiled_variable_updates[i]->updated(); - if (updated) ++num_updated_variables; - } - TF_RET_CHECK(num_updated_variables <= computation_output_count) - << num_updated_variables << " <= " << computation_output_count; - int64 compiled_variable_output_index = - computation_output_count - num_updated_variables; - for (auto update : compiled_variable_updates) { - map.input_in_compiled_update_order.push_back(update->index()); - if (!update->updated()) { - TF_RETURN_IF_ERROR(add_pair(update->index(), -1, true)); - continue; - } - TF_RETURN_IF_ERROR( - add_pair(update->index(), compiled_variable_output_index, true)); - ++compiled_variable_output_index; - } - - // Now add the updates from the attributes. - TF_RET_CHECK(fused_device_var_reads_in_computation_inputs.size() == - fused_device_var_updates_in_computation_outputs.size()); - for (int64 i = 0; i < fused_device_var_reads_in_computation_inputs.size(); - ++i) { - TF_RETURN_IF_ERROR( - add_pair(fused_device_var_reads_in_computation_inputs[i], - fused_device_var_updates_in_computation_outputs[i], false)); - } - return map; -} - -// Buffers representing the inputs to a computation. -struct InputBuffers { - explicit InputBuffers(xla::Shape device_shape) - : buffers(std::move(device_shape)) {} - - InputBuffers(const InputBuffers&) = delete; - InputBuffers& operator=(const InputBuffers&) = delete; - - ~InputBuffers() = default; - - xla::ShapedBuffer ToShapedBuffer(xla::Shape host_shape, - se::DeviceMemoryAllocator* allocator, - int device_ordinal) { - CHECK_NE(allocator, nullptr); - xla::ShapedBuffer shaped_buffer(std::move(host_shape), buffers.shape(), - allocator->platform(), device_ordinal); - shaped_buffer.set_buffers(buffers.Map( - [](xla::MaybeOwningDeviceMemory* buffer) { - CHECK(buffer); - return buffer->AsDeviceMemoryBase(); - })); - return shaped_buffer; - } - - // Describes the buffer tree. - xla::ShapeTree buffers; - - // Information about resource variables passed directly to TPUExecute. - std::vector variables; - - // Mapping from input index to offsets in 'variables'. < 0 if the input does - // not correspond to a variable in 'variables'. - std::vector variable_index; -}; - -// Builds an InputBuffers object that describes the inputs to the computation. -xla::StatusOr> BuildComputationInputs( - OpKernelContext* context, const xla::Shape& input_host_shape, - const VariableUpdateMap& variable_updates, TpuNodeContext* node_context, - se::Stream* stream) { - profiler::TraceMe trace_me("BuildComputationInputs", /*level=*/2); - OpInputList arg_list; - TF_RETURN_IF_ERROR(context->input_list("args", &arg_list)); - - if (arg_list.size() != xla::ShapeUtil::TupleElementCount(input_host_shape)) { - return errors::InvalidArgument( - "Number of parameters (", arg_list.size(), - ") does not match input shape: ", - xla::ShapeUtil::TupleElementCount(input_host_shape)); - } - - auto validate_shape = [&](int i, const Tensor& tensor) { - const xla::Shape& expected = - xla::ShapeUtil::GetTupleElementShape(input_host_shape, i); - VLOG(4) << "Input " << i << " TF shape " << tensor.shape().DebugString(); - XlaTensor* xla_tensor = XlaTensor::FromTensor(&tensor); - - if (xla_tensor == nullptr) { - // FromTensor failed; tensor must be empty. - if (!xla::ShapeUtil::IsZeroElementArray(expected)) { - return errors::InvalidArgument( - "Run-time shape mismatch for TPUExecute argument[", i, "] (", - context->op_kernel().requested_input(i), "). Expected ", - expected.DebugString(), "; got empty tensor"); - } - } else { - // Compare host shapes, easier than getting the expected device shape. - const xla::Shape& xla_shape = xla_tensor->shaped_buffer().on_host_shape(); - if (!xla::ShapeUtil::Compatible(expected, xla_shape)) { - return errors::InvalidArgument( - "Run-time shape mismatch for TPUExecute argument[", i, "] (", - context->op_kernel().requested_input(i), "). Expected ", - expected.DebugString(), "; got ", xla_shape.DebugString()); - } - } - - return Status::OK(); - }; - - // Iterate over the inputs, validating the shapes of non-variable inputs, - // and creating a VariableInfo object for each variable. We consider variable - // inputs in a separate phase because we must acquire variable locks in order. - std::vector variables; - std::vector variable_index(arg_list.size(), -1); - variables.reserve(arg_list.size()); - for (int i = 0; i < arg_list.size(); ++i) { - // Arguments are assumed to be variables if they have a resource type. - // (Non-variable resources are not supported.) - if (context->input_dtype(i) == DT_RESOURCE) { - variable_index[i] = variables.size(); - // TODO(phawkins): we may be looking up many variables here; it would be - // better if we did not repeatedly acquire the resource manager's lock. - const ResourceHandle& handle = HandleFromInput(context, i); - Var* variable; - TF_RETURN_IF_ERROR(LookupResource(context, handle, &variable)); - variables.push_back(VariableInfo(i, handle.name(), variable)); - } else { - TF_RETURN_IF_ERROR(validate_shape(i, arg_list[i])); - } - } - - // Lock the variables, and validate their shapes. We hold the variable locks - // for the duration of the TPU execution so we can donate the variable buffers - // to the computation. If we copied the variable's Tensor instead, its - // reference count would be greater than one due to the reference the Var - // object holds, and we would never be able to reuse variable buffers. - // TODO(phawkins): add a 'reuse_buffers' attribute to TPUExecute that allows - // the user to elect to copy the buffers and permit concurrent access instead. - TF_RETURN_IF_ERROR(LockVariables(absl::MakeSpan(variables))); - for (int i = 0; i < variables.size(); ++i) { - TF_RETURN_IF_ERROR( - validate_shape(variables[i].index(), *variables[i].var()->tensor())); - } - - se::DeviceMemoryAllocator* const allocator = node_context->memory_allocator(); - xla::TransferManager* const transfer_manager = - node_context->transfer_manager(); - const int device_ordinal = node_context->device_ordinal(); - - auto input_buffers = absl::make_unique( - transfer_manager->HostShapeToDeviceShape(input_host_shape)); - - // Allocates a buffer for the root tuple. - const int64 root_size = - transfer_manager->GetByteSizeRequirement(input_buffers->buffers.shape()); - TF_ASSIGN_OR_RETURN(*input_buffers->buffers.mutable_element({}), - allocator->Allocate(device_ordinal, root_size)); - - // Helper function that sets the input buffers for 'arg_index' to 'buffers'. - // If 'donate_buffers' is true, donates ownership of the buffers in 'buffers' - // to the computation and overwrites the entries in 'buffers' with nulls. - auto set_input_buffers_helper = [&](int arg_index, bool donate_buffers, - xla::ShapedBuffer* buffers) { - buffers->buffers().ForEachMutableElement([&](const xla::ShapeIndex& index, - se::DeviceMemoryBase* buffer) { - xla::ShapeIndex in_index = {arg_index}; - for (int64 j : index) { - in_index.push_back(j); - } - auto* in_buffer = input_buffers->buffers.mutable_element(in_index); - if (donate_buffers) { - *in_buffer = se::OwningDeviceMemory(*buffer, device_ordinal, allocator); - *buffer = se::DeviceMemoryBase(); - } else { - *in_buffer = *buffer; - } - }); - }; - - // Assigns the buffers of 'tensor' as computation input 'i'. Allocates fresh - // buffers for zero-element tensors where required. - auto assign_input = [&](int i, const Tensor& tensor, - bool may_reuse) -> xla::Status { - XlaTensor* xla_tensor = XlaTensor::FromTensor(&tensor); - - // Size 0 tensors have no backing XlaTensor, but may still need to have - // tuple buffers allocated. - if (xla_tensor == nullptr) { - CHECK_EQ(tensor.NumElements(), 0); - const xla::Shape& host_shape = - xla::ShapeUtil::GetSubshape(input_host_shape, {i}); - TF_ASSIGN_OR_RETURN(xla::ScopedShapedBuffer buffers, - transfer_manager->AllocateScopedShapedBuffer( - host_shape, allocator, device_ordinal)); - set_input_buffers_helper(/*arg_index=*/i, /*donate_buffers=*/true, - &buffers); - } else { - bool can_reuse_buffers = tensor.RefCountIsOne() && may_reuse; - set_input_buffers_helper(/*arg_index=*/i, - /*donate_buffers=*/can_reuse_buffers, - &xla_tensor->shaped_buffer()); - xla_tensor->WaitForDefinitionEventOnStream(stream); - } - return Status::OK(); - }; - - for (int i = 0; i < arg_list.size(); ++i) { - auto it = variable_updates.input_to_output.find(i); - if (it == variable_updates.input_to_output.end()) { - TF_RETURN_IF_ERROR(assign_input(i, arg_list[i], /*may_reuse=*/true)); - continue; - } - // input i is a variable - bool updated = it->second >= 0; - if (arg_list[i].dtype() != DT_RESOURCE) { - TF_RETURN_IF_ERROR(assign_input(i, arg_list[i], updated)); - } else { - int vi = variable_index[i]; - TF_RETURN_IF_ERROR( - assign_input(i, *variables[vi].var()->tensor(), updated)); - } - } - - input_buffers->variables = std::move(variables); - input_buffers->variable_index = std::move(variable_index); - - return std::move(input_buffers); -} - -struct OutputBuffers { - OutputBuffers(xla::ScopedShapedBuffer b, se::DeviceMemoryAllocator* allocator) - : owned_buffers(b.on_device_shape(), true), - buffers(b.release()), - memory_allocator(allocator) {} - - ~OutputBuffers() { - buffers.buffers().ForEachElement([&](const xla::ShapeIndex& index, - const se::DeviceMemoryBase& buffer) { - if (owned_buffers.element(index) && !buffer.is_null()) { - Status status = - memory_allocator->Deallocate(buffers.device_ordinal(), buffer); - LOG_IF(ERROR, !status.ok()) << "Error deallocating buffer " << status; - } - }); - } - - // Which of the buffers do we own? - xla::ShapeTree owned_buffers; - - xla::ShapedBuffer buffers; - - se::DeviceMemoryAllocator* const memory_allocator; -}; - -// Allocates Tensors for the outputs of the computation. Ownership of most -// output buffers is passed to the output Tensors. Returns an OutputBuffer that -// owns the root buffer that should be passed to the XLA computation, as well as -// any output buffers that do not have corresponding output tensors. The latter -// may happen for zero-element tensors of type int64 or complex64 which still -// require a tuple buffer but do not have a corresponding XlaTensor. -xla::StatusOr> AllocateOutputTensors( - OpKernelContext* context, xla::ScopedShapedBuffer scoped_buffers, - absl::Span output_tensor_shape_protos, - const VariableUpdateMap& variable_updates, TpuNodeContext* node_context, - se::Stream* stream, int device_ordinal, InputBuffers* input_buffers, - const std::shared_ptr& definition_event) { - VLOG(4) << "Output buffers: " << scoped_buffers.ToString(); - - profiler::TraceMe trace_me("AllocateOutputTensors", /*level=*/2); - // Shapes of the outputs, in TensorShape form. - const int64 sub_elements = - xla::ShapeUtil::TupleElementCount(scoped_buffers.on_host_shape()); - if (sub_elements != output_tensor_shape_protos.size()) { - return errors::InvalidArgument( - "Mismatched numbers of output shapes: ", sub_elements, " vs. ", - output_tensor_shape_protos.size()); - } - - xla::TransferManager* const transfer_manager = - node_context->transfer_manager(); - - std::vector output_tensor_shapes; - output_tensor_shapes.reserve(sub_elements); - for (int64 i = 0; i < sub_elements; ++i) { - TF_RETURN_IF_ERROR( - TensorShape::IsValidShape(*output_tensor_shape_protos[i])); - TensorShape shape(*output_tensor_shape_protos[i]); - const xla::Shape& xla_shape = - xla::ShapeUtil::GetSubshape(scoped_buffers.on_host_shape(), {i}); - if (!xla_shape.IsArray() || - xla::ShapeUtil::ElementsIn(xla_shape) != shape.num_elements()) { - return errors::InvalidArgument( - "Mismatched number of elements in output shape: ", - xla::ShapeUtil::HumanString(xla_shape), " vs ", shape.DebugString()); - } - output_tensor_shapes.push_back(shape); - } - - // Builds a shaped buffer for the outputs. - TF_RET_CHECK(scoped_buffers.on_host_shape().IsTuple()); - TF_RET_CHECK(!xla::ShapeUtil::IsNestedTuple(scoped_buffers.on_host_shape())); - - se::DeviceMemoryAllocator* const allocator = node_context->memory_allocator(); - - auto output_buffers = - absl::MakeUnique(std::move(scoped_buffers), allocator); - - xla::Shape output_host_shape = output_buffers->buffers.on_host_shape(); - xla::Shape output_device_shape = output_buffers->buffers.on_device_shape(); - - if (!output_host_shape.is_static()) { - TF_RETURN_IF_ERROR(transfer_manager->ReadDynamicShapes( - stream, &output_buffers->buffers, &output_host_shape, - &output_device_shape)); - for (int64 i = 0; i < sub_elements; ++i) { - const xla::Shape& subshape = - xla::ShapeUtil::GetSubshape(output_host_shape, {i}); - TensorShape shape; - TF_RETURN_IF_ERROR(XLAShapeToTensorShape(subshape, &shape)); - output_tensor_shapes[i] = shape; - } - } - - // Transfers ownership of the buffers that back XLA computation output 'i' - // to 'output_tensor'. - auto transfer_buffers = [&](int i, Tensor* output_tensor) { - const xla::Shape& host_shape = - xla::ShapeUtil::GetTupleElementShape(output_host_shape, i); - const xla::Shape& device_shape = - xla::ShapeUtil::GetTupleElementShape(output_device_shape, i); - - // Transfers ownership of the output buffers to the output Tensor, if - // there the tensor is backed by an XlaTensor. Tensors of size 0 have no - // backing XlaTensor, so we let retain 'output_buffers' ownership of any - // buffers in that case. - if (output_tensor->NumElements() > 0) { - xla::ScopedShapedBuffer shaped_buffer(host_shape, device_shape, allocator, - device_ordinal); - shaped_buffer.buffers().ForEachMutableElement( - [&](const xla::ShapeIndex& index, se::DeviceMemoryBase* buffer) { - xla::ShapeIndex out_index = {i}; - for (int64 j : index) { - out_index.push_back(j); - } - *buffer = output_buffers->buffers.buffers().element(out_index); - *output_buffers->owned_buffers.mutable_element(out_index) = false; - }); - - XlaTensor* xla_tensor = XlaTensor::FromTensor(output_tensor); - xla_tensor->set_shaped_buffer(std::move(shaped_buffer)); - xla_tensor->ResetDefinitionEvent(definition_event, stream); - } - }; - - const int num_updated_variables = variable_updates.output_to_input.size(); - TF_RET_CHECK(num_updated_variables <= output_tensor_shapes.size()) - << num_updated_variables << " <= " << output_tensor_shapes.size(); - - OpInputList arg_list; - TF_RETURN_IF_ERROR(context->input_list("args", &arg_list)); - - // The TPU program outputs the updated variables including DT_RESOURCE and - // non-DT_RESOURCE. The TPUExecuteOp needs to output all non-DT_RESOURCE - // variables (updated or not). - // - // updated not_updated - // |------------------|------------------| - // DT_RESOURCE | allocate persist | do nothing | - // |------------------|------------------| - // | allocate | forward Op input | - // not DT_RESOURCE | output | to Op output | Op output - // |------------------|------------------| - // program output - - // Allocates a fresh tensor for each updated variable. While the variable - // inputs need come in no particular order, the variable values are - // always added last by XlaCompiler class, in the same order as the - // corresponding input variables. - int op_output_index = 0; - int compiled_update_index = 0; - auto process_non_updated_variable = [&](int input_index) { - const int variable_index = input_buffers->variable_index.at(input_index); - // If a DT_RESOURCE input is not updated, nothing needs to be done - // because there is no corresponding output. If a non-resource input - // is not updated, forward the input to the output. - if (variable_index < 0) { - context->set_output(op_output_index, arg_list[input_index]); - ++op_output_index; - } - }; - for (int i = 0; i < output_tensor_shapes.size(); ++i) { - auto it = variable_updates.output_to_input.find(i); - if (it == variable_updates.output_to_input.end()) { - // Not a variable update. - // Allocates a fresh tensor for each output of the operator. We always - // allocate a new host-side tensor, but the on-device buffers that back - // that tensor may be aliases of input buffers. - Tensor* output_tensor; - TF_RETURN_IF_ERROR(context->allocate_output( - op_output_index, output_tensor_shapes[i], &output_tensor)); - transfer_buffers(i, output_tensor); - ++op_output_index; - continue; - } - const int input_index = it->second.first; - // We must process the compiled updates in order, which includes the - // non-updated variables, i.e., those without an XLA output. - const bool from_compilation = it->second.second; - while (from_compilation && - variable_updates - .input_in_compiled_update_order[compiled_update_index] != - input_index) { - process_non_updated_variable( - variable_updates - .input_in_compiled_update_order[compiled_update_index]); - ++compiled_update_index; - } - ++compiled_update_index; - const int variable_index = input_buffers->variable_index.at(input_index); - PersistentTensor unused; - Tensor* output_tensor; - if (variable_index >= 0) { - // This output corresponds to a DT_RESOURCE input to the TPUExecute - // operator. Update the corresponding variable. - VariableInfo& var = input_buffers->variables[variable_index]; - // TODO(b/35625933): the correct thing to do would be to transfer - // ownership of the PersistentTensor into the Var object. However, Var - // contains a Tensor so we can't. - TF_RETURN_IF_ERROR(context->allocate_persistent( - var.var()->tensor()->dtype(), output_tensor_shapes[i], &unused, - &output_tensor)); - *var.var()->tensor() = *output_tensor; - } else { - // This output corresponds to a non-resource input to the TPUExecute - // operator. This case occurs for the distributed TPU rewrite which - // adds variable values as inputs and outputs rather than passing the - // variables themselves; reading and writing the variable is handled - // outside the op. - // TODO(phawkins): remove this case when placement of variables on TPU - // devices is well supported and we no longer need to place "remote" - // variables on CPU devices. - TF_RETURN_IF_ERROR(context->allocate_output( - op_output_index, output_tensor_shapes[i], &output_tensor)); - ++op_output_index; - } - transfer_buffers(i, output_tensor); - } - - // Process any remaining non-updated variables. - for (; compiled_update_index < - variable_updates.input_in_compiled_update_order.size(); - ++compiled_update_index) { - process_non_updated_variable( - variable_updates.input_in_compiled_update_order[compiled_update_index]); - } - return std::move(output_buffers); -} - -} // namespace - -// TPUExecuteOp - -TPUExecuteOp::TPUExecuteOp(OpKernelConstruction* context) - : AsyncOpKernel(context, /* is_deferred = */ true) {} - -AsyncOpKernel* TPUExecuteOp::AsAsync() { - // If TPU launches are asynchronous, we can perform the launch without - // blocking the calling thread, and so the executor may treat this kernel as - // a regular (synchronous) OpKernel. - return nullptr; -} - -void TPUExecuteOp::Compute(OpKernelContext* context) { - Status s = DoWork(context); - // NOTE: We can't use `OP_REQUIRES_OK()` here because that macro includes - // a dynamic check that we are not in an AsyncOpKernel. - if (TF_PREDICT_FALSE(!s.ok())) { - context->SetStatus(s); - } -} - -void TPUExecuteOp::ComputeAsync(OpKernelContext* context, DoneCallback done) { - // If TPU launches are asynchronous, then perform the launch on this - // thread to avoid a thread hop, which has an observable latency cost. - OP_REQUIRES_OK_ASYNC(context, DoWork(context), done); - done(); -} - -Status TPUExecuteOp::DoWork(OpKernelContext* context) { - VLOG(1) << "Cloud TPU: TPUExecuteOp::Compute"; - - const XlaDevice::Metadata* metadata; - TF_RETURN_IF_ERROR(XlaDevice::GetMetadata(context, &metadata)); - const int device_ordinal = metadata->device_ordinal(); - - // We are guaranteed that the object underlying TpuNodeContext won't be - // deleted out from under us, while node_context is alive. - TF_ASSIGN_OR_RETURN(std::unique_ptr node_context, - TpuNodeContext::Create(device_ordinal)); - - profiler::TraceMe trace_me( - [&, device_ordinal] { - return absl::StrCat("TpuExecuteOp#device_ordinal=", device_ordinal, - ",id=", context->step_id(), - ",iter_num=", context->frame_iter().iter_id, "#"); - }, - /*level=*/2); - profiler::TraceMe trace_me_init("TPUExecuteOp::Init", /*level=*/2); - - string rendezvous_key_base; - std::unique_ptr entry; - TF_RETURN_IF_ERROR( - GetComputationCacheEntry(context, &rendezvous_key_base, &entry)); - - // Shapes of the inputs and outputs, in xla::Shape form. - const TPUExecutableInfoProto* proto = entry->get().get_executable_info(); - - xla::TransferManager* const transfer_manager = - node_context->transfer_manager(); - CHECK(context->op_device_context()); - se::Stream* stream = context->op_device_context()->stream(); - - TF_RET_CHECK(proto->input_shapes_size() == 1); - - xla::Shape host_shape(proto->input_shapes(0)); - - TF_ASSIGN_OR_RETURN( - auto variable_update_map, - BuildVariableUpdateMap(proto->variable_indices(), - fused_device_var_reads_in_computation_inputs_, - fused_device_var_updates_in_computation_outputs_, - proto->output_tensor_shapes().size())); - TF_ASSIGN_OR_RETURN( - std::unique_ptr input_buffers, - BuildComputationInputs(context, host_shape, variable_update_map, - node_context.get(), stream)); - - // Ideally this should be the host-to-device stream from XlaDeviceContext. - // The particular anti-dependency this is avoiding (why we need a separate - // transfer stream) is between the executable writing tuple tables and - // TPUExecute()'s deregister_stream; if they come from the same stream pool - // antidependencies will occur. XlaBackend has a different pool of streams - // to the stream->GetOrCreateSubStream() that TPUExecute() uses, so these - // will never refer to the same stream. - // - // TODO(jmolloy): Add the necessary plumbing to obtain the proper - // host-to-device stream here. - TF_ASSIGN_OR_RETURN(auto transfer_stream_ptr, - node_context->BorrowStream(device_ordinal)); - - se::DeviceMemoryAllocator* const allocator = node_context->memory_allocator(); - auto shaped_buffer = - input_buffers->ToShapedBuffer(host_shape, allocator, device_ordinal); - if (transfer_manager->CanShapedBufferBeAccessedNow(stream->parent(), - shaped_buffer)) { - TF_RETURN_IF_ERROR(transfer_manager->WriteRootTupleIndexTable( - transfer_stream_ptr.get(), shaped_buffer)); - stream->ThenWaitFor(transfer_stream_ptr.get()); - } else { - TF_RETURN_IF_ERROR( - transfer_manager->WriteRootTupleIndexTable(stream, shaped_buffer)); - } - VLOG(4) << "Input buffers: " << shaped_buffer.ToString(); - - // Snapshot the inputs, if a snapshot was requested. - std::shared_ptr hlo_snapshot; - if (proto->has_session_module()) { - hlo_snapshot = std::make_shared(proto->session_module()); - auto literal = - std::make_shared(shaped_buffer.on_host_shape()); - transfer_manager->TransferLiteralFromDevice( - stream, shaped_buffer, literal.get(), - [hlo_snapshot, literal](Status status) { - if (!status.ok()) { - LOG(ERROR) << "TransferLiteralFromDevice for HLO snapshot inputs " - "failed: " - << status; - return; - } - *hlo_snapshot->add_arguments() = literal->ToProto(); - }); - } - - auto definition_event = std::make_shared(stream->parent()); - TF_RET_CHECK(definition_event->Init()) - << "TPU definition event initialization failed"; - - trace_me_init.Stop(); - - const uint32 rng_seed = GetXLARandomSeed(); - - std::unique_ptr device_assignment; - if (proto->has_device_assignment()) { - TF_ASSIGN_OR_RETURN(device_assignment, xla::DeviceAssignment::Deserialize( - proto->device_assignment())); - } - - VLOG(4) << "Input buffers after alias resolution: " - << shaped_buffer.ToString(); - - std::vector input; - input.emplace_back( - xla::ExecutionInput(std::move(input_buffers->buffers), host_shape)); - - // The buffers to be freed are in the `output` and will be automatically - // freed when it goes out of the scope. In async mode, this means the buffers - // will be freed before anyone calls "BlockHostUntilDone", which indicates - // that some of the (input) buffers will be freed while the program is running - // and looks scary. However, this turns out to be not a problem since although - // we free a memory and reassign it to other users while a program is running, - // all subsequent writes to the program that could possibly clobber the memory - // will depend on the program to finish. - const TPUHostTransferInfoProto* host_transfer_info = - entry->get().get_host_transfer_info(); - const xla::HloProto* hlo_metadata = entry->get().get_hlo_metadata(); - TF_ASSIGN_OR_RETURN( - xla::ExecutionOutput output, - TPUExecute(*proto, *host_transfer_info, *hlo_metadata, std::move(input), - rendezvous_key_base, rng_seed, node_context.get(), - device_assignment.get(), context->cancellation_manager(), - context, stream, transfer_stream_ptr.get(), - entry->get().get_tpu_program())); - stream->ThenRecordEvent(definition_event.get()); - - TF_ASSIGN_OR_RETURN( - std::unique_ptr output_buffers, - AllocateOutputTensors(context, output.ConsumeResult(), - proto->output_tensor_shapes(), variable_update_map, - node_context.get(), stream, device_ordinal, - input_buffers.get(), definition_event)); - - // Transfer the outputs and save the snapshot to disk. - if (hlo_snapshot) { - auto literal = - std::make_shared(output_buffers->buffers.on_host_shape()); - transfer_manager->TransferLiteralFromDevice( - stream, output_buffers->buffers, literal.get(), - [hlo_snapshot, literal](Status status) { - if (status.ok()) { - *hlo_snapshot->mutable_result() = literal->ToProto(); - } else { - LOG(ERROR) << "TransferLiteralFromDevice for HLO snapshot " - "outputs failed: " - << status; - } - DumpHloSnapshotIfEnabled(*hlo_snapshot, - xla::GetDebugOptionsFromFlags()); - }); - } - return Status::OK(); -} - -TPUExecuteOp::~TPUExecuteOp() = default; - -TPUExecuteAndUpdateVariablesOp::TPUExecuteAndUpdateVariablesOp( - OpKernelConstruction* context) - : TPUExecuteOp(context) { - OP_REQUIRES_OK(context, context->GetAttr( - "device_var_reads_indices", - &fused_device_var_reads_in_computation_inputs_)); - OP_REQUIRES_OK( - context, - context->GetAttr("device_var_updates_indices", - &fused_device_var_updates_in_computation_outputs_)); -} - -REGISTER_KERNEL_BUILDER( - Name("TPUExecute").Device(DEVICE_TPU_NODE).HostMemory("key"), TPUExecuteOp); - -REGISTER_KERNEL_BUILDER(Name("TPUExecuteAndUpdateVariables") - .Device(DEVICE_TPU_NODE) - .HostMemory("key"), - TPUExecuteAndUpdateVariablesOp); - -} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_execute_op.h b/tensorflow/core/tpu/kernels/tpu_execute_op.h deleted file mode 100644 index c66118ad45e..00000000000 --- a/tensorflow/core/tpu/kernels/tpu_execute_op.h +++ /dev/null @@ -1,67 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_EXECUTE_OP_H_ -#define TENSORFLOW_CORE_TPU_KERNELS_TPU_EXECUTE_OP_H_ - -#include -#include - -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/platform/macros.h" -#include "tensorflow/core/platform/mutex.h" - -namespace tensorflow { - -// Op that executes a precompiled TPU computation. -class TPUExecuteOp : public AsyncOpKernel { - public: - explicit TPUExecuteOp(OpKernelConstruction* context); - ~TPUExecuteOp() override; - - AsyncOpKernel* AsAsync() override; - - void Compute(OpKernelContext* context) override; - void ComputeAsync(OpKernelContext* context, DoneCallback done) override; - - protected: - // Used by TPUExecuteAndUpdateVariablesOp to set the fused variable reads and - // updates indices in the XLA computation. The two vectors must have the same - // size, and a pair of read index and write index represents a variable's - // input to the program and its updated value from the program. If the - // variable is not updated, use -1 as the output index. - std::vector fused_device_var_reads_in_computation_inputs_; - std::vector fused_device_var_updates_in_computation_outputs_; - - private: - Status DoWork(OpKernelContext* context); - - TF_DISALLOW_COPY_AND_ASSIGN(TPUExecuteOp); -}; - -// A variant of TPUExecuteOp that contains fused device variable reads and -// updates. -class TPUExecuteAndUpdateVariablesOp : public TPUExecuteOp { - public: - explicit TPUExecuteAndUpdateVariablesOp(OpKernelConstruction* context); - ~TPUExecuteAndUpdateVariablesOp() override = default; - - private: - TF_DISALLOW_COPY_AND_ASSIGN(TPUExecuteAndUpdateVariablesOp); -}; - -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_EXECUTE_OP_H_ From 7c3839000614e3b9d90cc16829910a1eb487e439 Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Fri, 17 Jul 2020 13:40:07 -0700 Subject: [PATCH 0734/2522] Checkpointing: Check `optimizer_object` has optimizer method before creating or restoring slot variables, since it can be a Checkpoint for those users following https://www.tensorflow.org/guide/checkpoint#loading_mechanics. PiperOrigin-RevId: 321845472 Change-Id: I2063d74f6cd3f10a7888cb637c1a5d175b5273f1 --- .../keras/optimizer_v2/optimizer_v2_test.py | 18 ++++++++++++++++++ tensorflow/python/training/tracking/base.py | 7 ++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py index 4479c378638..1e242256c7b 100644 --- a/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py +++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py @@ -59,6 +59,7 @@ from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.training import momentum from tensorflow.python.training import training_util +from tensorflow.python.training.tracking import util as trackable_utils _DATA_TYPES = [dtypes.half, dtypes.float32, dtypes.float64] @@ -644,6 +645,23 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): self.evaluate(opt_op) self.assertAllClose([0.7, 1.7], self.evaluate(var)) + @combinations.generate(combinations.combine(mode=['eager'])) + def testRestoringIterationsWithoutAnOptimizer(self): + opt = gradient_descent.SGD(3.0) + opt.iterations.assign(5) + checkpoint = trackable_utils.Checkpoint(optimizer=opt) + path = checkpoint.save(self.get_temp_dir()) + + # Following verifies that the `iterations` can be restored with the absence + # of an `Optimizer` object (using a `Checkpoint` as a placeholder). + iterations_var = variables.Variable(0, dtype=dtypes.int64) + optimizer_checkpoint = trackable_utils.Checkpoint(iter=iterations_var) + checkpoint_to_restore = trackable_utils.Checkpoint( + optimizer=optimizer_checkpoint) + checkpoint_to_restore.restore(path) + + self.assertEqual(5, self.evaluate(iterations_var)) + @keras_parameterized.run_all_keras_modes class OptimizersCompatibilityTest(keras_parameterized.TestCase): diff --git a/tensorflow/python/training/tracking/base.py b/tensorflow/python/training/tracking/base.py index d9cf7d72111..a8b0410dc77 100644 --- a/tensorflow/python/training/tracking/base.py +++ b/tensorflow/python/training/tracking/base.py @@ -251,7 +251,12 @@ class CheckpointPosition(object): original_variable=trackable, slot_variable_id=slot_restoration.slot_variable_id, slot_name=slot_restoration.slot_name)) - else: + + # `optimizer_object` can be a `Checkpoint` when user only needs the + # attributes the optimizer holds, such as `iterations`. In those cases, + # it would not have the optimizer's `_create_or_restore_slot_variable` + # method. + elif hasattr(optimizer_object, "_create_or_restore_slot_variable"): optimizer_object._create_or_restore_slot_variable( # pylint: disable=protected-access slot_variable_position=CheckpointPosition( checkpoint=checkpoint, From d910087ab6fc62b08f9a010d9f1f4c7e05bd02b0 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Fri, 17 Jul 2020 20:49:50 +0000 Subject: [PATCH 0735/2522] add TensorMapHasKey op --- tensorflow/core/kernels/map_kernels.cc | 3 +++ tensorflow/core/ops/map_ops.cc | 6 ++++++ .../python/kernel_tests/map_ops_test.py | 19 +++++++++++++++---- tensorflow/python/ops/map_ops.py | 5 ++++- 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/kernels/map_kernels.cc b/tensorflow/core/kernels/map_kernels.cc index c890ba77f54..db91a660809 100644 --- a/tensorflow/core/kernels/map_kernels.cc +++ b/tensorflow/core/kernels/map_kernels.cc @@ -35,4 +35,7 @@ REGISTER_KERNEL_BUILDER(Name("TensorMapInsert").Device(DEVICE_CPU), REGISTER_KERNEL_BUILDER(Name("TensorMapErase").Device(DEVICE_CPU), TensorMapErase); +REGISTER_KERNEL_BUILDER(Name("TensorMapHasKey").Device(DEVICE_CPU), + TensorMapHasKey); + } diff --git a/tensorflow/core/ops/map_ops.cc b/tensorflow/core/ops/map_ops.cc index 2e4284aa4a2..072c116fc29 100644 --- a/tensorflow/core/ops/map_ops.cc +++ b/tensorflow/core/ops/map_ops.cc @@ -69,6 +69,12 @@ REGISTER_OP("TensorMapErase") return Status::OK(); }); +REGISTER_OP("TensorMapHasKey") + .Input("input_handle: variant") + .Input("key: element_dtype") + .Output("has_key: bool") + .Attr("element_dtype: type") + .SetShapeFn(shape_inference::ScalarShape); } // namespace } // namespace tensorflow diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index 443b8fd34fc..26f1ea93e1a 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -94,7 +94,19 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): "Trying to erase non-existent item."): m, e = map_ops.tensor_map_erase(m, k, dtypes.float32) self.evaluate(e) - ''' + + def testTensorMapHasKey(self): + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + k2 = constant_op.constant(2.0) + v = constant_op.constant(2.0) + m = map_ops.tensor_map_insert(m, k, v) + + b = map_ops.tensor_map_has_key(m, k) + b2 = map_ops.tensor_map_has_key(m, k2) + self.assertAllEqual(b, True) + self.assertAllEqual(b2, False) + def testInsertLookupGrad(self): with backprop.GradientTape() as tape: m = map_ops.empty_tensor_map() @@ -102,11 +114,10 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): v = constant_op.constant(2.0) tape.watch(v) m = map_ops.tensor_map_insert(m, k, v) - l = map_ops.tensor_map_lookup(m, k) + l = map_ops.tensor_map_lookup(m, k, dtypes.float32) l *= 5 g = tape.gradient(l, v) - self.assertAllClose(g, 5.0)''' - + self.assertAllClose(g, 5.0) if __name__ == '__main__': test.main() diff --git a/tensorflow/python/ops/map_ops.py b/tensorflow/python/ops/map_ops.py index f14c1314d71..144346976d1 100644 --- a/tensorflow/python/ops/map_ops.py +++ b/tensorflow/python/ops/map_ops.py @@ -41,6 +41,9 @@ def tensor_map_lookup(input_handle, key, value_dtype): def tensor_map_erase(input_handle, key, value_dtype): return gen_map_ops.tensor_map_erase(input_handle, key, value_dtype) +def tensor_map_has_key(input_handle, key): + return gen_map_ops.tensor_map_has_key(input_handle, key) + @ops.RegisterGradient("TensorMapLookup") def LookupGrad(op, dval): m, k = op.inputs @@ -54,5 +57,5 @@ def InsertGrad(op, dmap): _, key, val = op.inputs map_grad = None key_grad = None - value_grad = tensor_map_lookup(dmap, key) + value_grad = tensor_map_lookup(dmap, key, val.dtype) return map_grad, key_grad, value_grad From 20500048c89cf3626f84ae9579e4667b02a2f9da Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Fri, 17 Jul 2020 13:47:05 -0700 Subject: [PATCH 0736/2522] Update keras.backend to rely on public API for device_spec. PiperOrigin-RevId: 321846718 Change-Id: Ifddcdbf34bea94a1240813218f35487af0ae464d --- tensorflow/python/keras/backend.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py index 01d3ecd09cf..f3b3f6a5157 100644 --- a/tensorflow/python/keras/backend.py +++ b/tensorflow/python/keras/backend.py @@ -43,7 +43,7 @@ from tensorflow.python.eager import lift_to_graph from tensorflow.python.framework import composite_tensor from tensorflow.python.framework import config from tensorflow.python.framework import constant_op -from tensorflow.python.framework import device as tfdev +from tensorflow.python.framework import device_spec from tensorflow.python.framework import dtypes as dtypes_module from tensorflow.python.framework import func_graph from tensorflow.python.framework import ops @@ -725,7 +725,7 @@ class _TfDeviceCaptureOp(object): def _set_device(self, device): """This method captures TF's explicit device scope setting.""" - if tfdev.is_device_spec(device): + if isinstance(device, device_spec.DeviceSpecV2): device = device.to_string() self.device = device @@ -744,7 +744,10 @@ def _get_current_tf_device(): graph = get_graph() op = _TfDeviceCaptureOp() graph._apply_device_functions(op) - return tfdev.DeviceSpec.from_string(op.device) + if tf2.enabled(): + return device_spec.DeviceSpecV2.from_string(op.device) + else: + return device_spec.DeviceSpecV1.from_string(op.device) def _is_current_explicit_device(device_type): From 8a4b945dec8f8c5706b502dd3fa80d96cbef998d Mon Sep 17 00:00:00 2001 From: Stella Laurenzo Date: Fri, 17 Jul 2020 14:07:13 -0700 Subject: [PATCH 0737/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/5d06e8b24f97 PiperOrigin-RevId: 321850594 Change-Id: I7c37f8ccc27e382b6626c089d5f0fd410f0345e4 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 643ea4be71d..34cc9be490c 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "de0c6bd56b41081f1b89a1c7a0bf2597fd6d0104" - LLVM_SHA256 = "5f5b45ea3c7679c6b35a09d508235847a85f9e5a415cc4227ed96399d6fa5b82" + LLVM_COMMIT = "5d06e8b24f97202764c7522efcfb6e2febdce6b7" + LLVM_SHA256 = "72d8a43ecfca1f0a90e9e7a7a42d4658fe385780d5603105de511edc8228f174" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 30fcb1c3a957c5863d12622e4d43a072cf63bf9f Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Fri, 17 Jul 2020 14:26:10 -0700 Subject: [PATCH 0738/2522] Update keras optimizers to use the equivalent public raw_ops API. PiperOrigin-RevId: 321854186 Change-Id: I0495c5228d9b17068b756c4d1fd80b3309c8346d --- .../python/keras/optimizer_v2/adadelta.py | 36 ++++---- .../python/keras/optimizer_v2/adagrad.py | 28 +++---- tensorflow/python/keras/optimizer_v2/adam.py | 48 +++++------ .../python/keras/optimizer_v2/adamax.py | 23 +++--- tensorflow/python/keras/optimizer_v2/ftrl.py | 82 +++++++++---------- .../keras/optimizer_v2/gradient_descent.py | 35 ++++---- .../python/keras/optimizer_v2/rmsprop.py | 82 +++++++++---------- 7 files changed, 168 insertions(+), 166 deletions(-) diff --git a/tensorflow/python/keras/optimizer_v2/adadelta.py b/tensorflow/python/keras/optimizer_v2/adadelta.py index 12f9e40c394..8c895ae07f4 100644 --- a/tensorflow/python/keras/optimizer_v2/adadelta.py +++ b/tensorflow/python/keras/optimizer_v2/adadelta.py @@ -24,7 +24,7 @@ from tensorflow.python.framework import ops from tensorflow.python.keras import backend_config from tensorflow.python.keras.optimizer_v2 import optimizer_v2 from tensorflow.python.ops import array_ops -from tensorflow.python.training import training_ops +from tensorflow.python.training import gen_training_ops from tensorflow.python.util.tf_export import keras_export @@ -120,14 +120,14 @@ class Adadelta(optimizer_v2.OptimizerV2): accum_grad = self.get_slot(var, 'accum_grad') accum_var = self.get_slot(var, 'accum_var') - return training_ops.resource_apply_adadelta( - var.handle, - accum_grad.handle, - accum_var.handle, - coefficients['lr_t'], - coefficients['rho'], - coefficients['epsilon'], - grad, + return gen_training_ops.ResourceApplyAdadelta( + var=var.handle, + accum=accum_grad.handle, + accum_update=accum_var.handle, + lr=coefficients['lr_t'], + rho=coefficients['rho'], + epsilon=coefficients['epsilon'], + grad=grad, use_locking=self._use_locking) def _resource_apply_sparse(self, grad, var, indices, apply_state=None): @@ -137,15 +137,15 @@ class Adadelta(optimizer_v2.OptimizerV2): accum_grad = self.get_slot(var, 'accum_grad') accum_var = self.get_slot(var, 'accum_var') - return training_ops.resource_sparse_apply_adadelta( - var.handle, - accum_grad.handle, - accum_var.handle, - coefficients['lr_t'], - coefficients['rho'], - coefficients['epsilon'], - grad, - indices, + return gen_training_ops.ResourceSparseApplyAdadelta( + var=var.handle, + accum=accum_grad.handle, + accum_update=accum_var.handle, + lr=coefficients['lr_t'], + rho=coefficients['rho'], + epsilon=coefficients['epsilon'], + grad=grad, + indices=indices, use_locking=self._use_locking) def get_config(self): diff --git a/tensorflow/python/keras/optimizer_v2/adagrad.py b/tensorflow/python/keras/optimizer_v2/adagrad.py index dbed9de92c6..ba76b837942 100644 --- a/tensorflow/python/keras/optimizer_v2/adagrad.py +++ b/tensorflow/python/keras/optimizer_v2/adagrad.py @@ -26,7 +26,7 @@ from tensorflow.python.keras import backend_config from tensorflow.python.keras.optimizer_v2 import optimizer_v2 from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops -from tensorflow.python.training import training_ops +from tensorflow.python.training import gen_training_ops from tensorflow.python.util.tf_export import keras_export @@ -129,12 +129,12 @@ class Adagrad(optimizer_v2.OptimizerV2): or self._fallback_apply_state(var_device, var_dtype)) acc = self.get_slot(var, 'accumulator') - return training_ops.resource_apply_adagrad_v2( - var.handle, - acc.handle, - coefficients['lr_t'], - coefficients['epsilon'], - grad, + return gen_training_ops.ResourceApplyAdagradV2( + var=var.handle, + accum=acc.handle, + lr=coefficients['lr_t'], + epsilon=coefficients['epsilon'], + grad=grad, use_locking=self._use_locking) def _resource_apply_sparse(self, grad, var, indices, apply_state=None): @@ -143,13 +143,13 @@ class Adagrad(optimizer_v2.OptimizerV2): or self._fallback_apply_state(var_device, var_dtype)) acc = self.get_slot(var, 'accumulator') - return training_ops.resource_sparse_apply_adagrad_v2( - var.handle, - acc.handle, - coefficients['lr_t'], - coefficients['epsilon'], - grad, - indices, + return gen_training_ops.ResourceSparseApplyAdagradV2( + var=var.handle, + accum=acc.handle, + lr=coefficients['lr_t'], + epsilon=coefficients['epsilon'], + grad=grad, + indices=indices, use_locking=self._use_locking) def get_config(self): diff --git a/tensorflow/python/keras/optimizer_v2/adam.py b/tensorflow/python/keras/optimizer_v2/adam.py index df41201e14b..1fccd116012 100644 --- a/tensorflow/python/keras/optimizer_v2/adam.py +++ b/tensorflow/python/keras/optimizer_v2/adam.py @@ -26,7 +26,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops -from tensorflow.python.training import training_ops +from tensorflow.python.training import gen_training_ops from tensorflow.python.util.tf_export import keras_export @@ -171,32 +171,32 @@ class Adam(optimizer_v2.OptimizerV2): v = self.get_slot(var, 'v') if not self.amsgrad: - return training_ops.resource_apply_adam( - var.handle, - m.handle, - v.handle, - coefficients['beta_1_power'], - coefficients['beta_2_power'], - coefficients['lr_t'], - coefficients['beta_1_t'], - coefficients['beta_2_t'], - coefficients['epsilon'], - grad, + return gen_training_ops.ResourceApplyAdam( + var=var.handle, + m=m.handle, + v=v.handle, + beta1_power=coefficients['beta_1_power'], + beta2_power=coefficients['beta_2_power'], + lr=coefficients['lr_t'], + beta1=coefficients['beta_1_t'], + beta2=coefficients['beta_2_t'], + epsilon=coefficients['epsilon'], + grad=grad, use_locking=self._use_locking) else: vhat = self.get_slot(var, 'vhat') - return training_ops.resource_apply_adam_with_amsgrad( - var.handle, - m.handle, - v.handle, - vhat.handle, - coefficients['beta_1_power'], - coefficients['beta_2_power'], - coefficients['lr_t'], - coefficients['beta_1_t'], - coefficients['beta_2_t'], - coefficients['epsilon'], - grad, + return gen_training_ops.ResourceApplyAdamWithAmsgrad( + var=var.handle, + m=m.handle, + v=v.handle, + vhat=vhat.handle, + beta1_power=coefficients['beta_1_power'], + beta2_power=coefficients['beta_2_power'], + lr=coefficients['lr_t'], + beta1=coefficients['beta_1_t'], + beta2=coefficients['beta_2_t'], + epsilon=coefficients['epsilon'], + grad=grad, use_locking=self._use_locking) def _resource_apply_sparse(self, grad, var, indices, apply_state=None): diff --git a/tensorflow/python/keras/optimizer_v2/adamax.py b/tensorflow/python/keras/optimizer_v2/adamax.py index 5ac4734c6a2..3f4312c731e 100644 --- a/tensorflow/python/keras/optimizer_v2/adamax.py +++ b/tensorflow/python/keras/optimizer_v2/adamax.py @@ -25,7 +25,7 @@ from tensorflow.python.keras.optimizer_v2 import optimizer_v2 from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops -from tensorflow.python.training import training_ops +from tensorflow.python.training import gen_training_ops from tensorflow.python.util.tf_export import keras_export @@ -136,17 +136,16 @@ class Adamax(optimizer_v2.OptimizerV2): m = self.get_slot(var, 'm') v = self.get_slot(var, 'v') - - return training_ops.resource_apply_ada_max( - var.handle, - m.handle, - v.handle, - coefficients['beta_1_power'], - coefficients['lr_t'], - coefficients['beta_1_t'], - coefficients['beta_2_t'], - coefficients['epsilon'], - grad, + return gen_training_ops.ResourceApplyAdaMax( + var=var.handle, + m=m.handle, + v=v.handle, + beta1_power=coefficients['beta_1_power'], + lr=coefficients['lr_t'], + beta1=coefficients['beta_1_t'], + beta2=coefficients['beta_2_t'], + epsilon=coefficients['epsilon'], + grad=grad, use_locking=self._use_locking) def _resource_apply_sparse(self, grad, var, indices, apply_state=None): diff --git a/tensorflow/python/keras/optimizer_v2/ftrl.py b/tensorflow/python/keras/optimizer_v2/ftrl.py index 419f0f70125..0e96724a44d 100644 --- a/tensorflow/python/keras/optimizer_v2/ftrl.py +++ b/tensorflow/python/keras/optimizer_v2/ftrl.py @@ -22,7 +22,7 @@ from tensorflow.python.keras.optimizer_v2 import optimizer_v2 from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops -from tensorflow.python.training import training_ops +from tensorflow.python.training import gen_training_ops from tensorflow.python.util.tf_export import keras_export @@ -135,27 +135,27 @@ class Ftrl(optimizer_v2.OptimizerV2): linear = self.get_slot(var, 'linear') if self._l2_shrinkage_regularization_strength <= 0.0: - return training_ops.resource_apply_ftrl( - var.handle, - accum.handle, - linear.handle, - grad, - coefficients['lr_t'], - coefficients['l1_regularization_strength'], - coefficients['l2_regularization_strength'], - coefficients['learning_rate_power'], + return gen_training_ops.ResourceApplyFtrl( + var=var.handle, + accum=accum.handle, + linear=linear.handle, + grad=grad, + lr=coefficients['lr_t'], + l1=coefficients['l1_regularization_strength'], + l2=coefficients['l2_regularization_strength'], + lr_power=coefficients['learning_rate_power'], use_locking=self._use_locking) else: - return training_ops.resource_apply_ftrl_v2( - var.handle, - accum.handle, - linear.handle, - grad, - coefficients['lr_t'], - coefficients['l1_regularization_strength'], - coefficients['l2_regularization_strength'], - coefficients['l2_shrinkage_regularization_strength'], - coefficients['learning_rate_power'], + return gen_training_ops.ResourceApplyFtrlV2( + var=var.handle, + accum=accum.handle, + linear=linear.handle, + grad=grad, + lr=coefficients['lr_t'], + l1=coefficients['l1_regularization_strength'], + l2=coefficients['l2_regularization_strength'], + l2_shrinkage=coefficients['l2_shrinkage_regularization_strength'], + lr_power=coefficients['learning_rate_power'], use_locking=self._use_locking) def _resource_apply_sparse(self, grad, var, indices, apply_state=None): @@ -167,29 +167,29 @@ class Ftrl(optimizer_v2.OptimizerV2): linear = self.get_slot(var, 'linear') if self._l2_shrinkage_regularization_strength <= 0.0: - return training_ops.resource_sparse_apply_ftrl( - var.handle, - accum.handle, - linear.handle, - grad, - indices, - coefficients['lr_t'], - coefficients['l1_regularization_strength'], - coefficients['l2_regularization_strength'], - coefficients['learning_rate_power'], + return gen_training_ops.ResourceSparseApplyFtrl( + var=var.handle, + accum=accum.handle, + linear=linear.handle, + grad=grad, + indices=indices, + lr=coefficients['lr_t'], + l1=coefficients['l1_regularization_strength'], + l2=coefficients['l2_regularization_strength'], + lr_power=coefficients['learning_rate_power'], use_locking=self._use_locking) else: - return training_ops.resource_sparse_apply_ftrl_v2( - var.handle, - accum.handle, - linear.handle, - grad, - indices, - coefficients['lr_t'], - coefficients['l1_regularization_strength'], - coefficients['l2_regularization_strength'], - coefficients['l2_shrinkage_regularization_strength'], - coefficients['learning_rate_power'], + return gen_training_ops.ResourceSparseApplyFtrlV2( + var=var.handle, + accum=accum.handle, + linear=linear.handle, + grad=grad, + indices=indices, + lr=coefficients['lr_t'], + l1=coefficients['l1_regularization_strength'], + l2=coefficients['l2_regularization_strength'], + l2_shrinkage=coefficients['l2_shrinkage_regularization_strength'], + lr_power=coefficients['learning_rate_power'], use_locking=self._use_locking) def get_config(self): diff --git a/tensorflow/python/keras/optimizer_v2/gradient_descent.py b/tensorflow/python/keras/optimizer_v2/gradient_descent.py index 017c4e5db25..466b42a3818 100644 --- a/tensorflow/python/keras/optimizer_v2/gradient_descent.py +++ b/tensorflow/python/keras/optimizer_v2/gradient_descent.py @@ -22,7 +22,7 @@ from tensorflow.python.framework import ops from tensorflow.python.keras.optimizer_v2 import optimizer_v2 from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_resource_variable_ops -from tensorflow.python.training import training_ops +from tensorflow.python.training import gen_training_ops from tensorflow.python.util.tf_export import keras_export @@ -136,17 +136,20 @@ class SGD(optimizer_v2.OptimizerV2): if self._momentum: momentum_var = self.get_slot(var, "momentum") - return training_ops.resource_apply_keras_momentum( - var.handle, - momentum_var.handle, - coefficients["lr_t"], - grad, - coefficients["momentum"], + return gen_training_ops.ResourceApplyKerasMomentum( + var=var.handle, + accum=momentum_var.handle, + lr=coefficients["lr_t"], + grad=grad, + momentum=coefficients["momentum"], use_locking=self._use_locking, use_nesterov=self.nesterov) else: - return training_ops.resource_apply_gradient_descent( - var.handle, coefficients["lr_t"], grad, use_locking=self._use_locking) + return gen_training_ops.ResourceApplyGradientDescent( + var=var.handle, + alpha=coefficients["lr_t"], + delta=grad, + use_locking=self._use_locking) def _resource_apply_sparse_duplicate_indices(self, grad, var, indices, **kwargs): @@ -170,13 +173,13 @@ class SGD(optimizer_v2.OptimizerV2): or self._fallback_apply_state(var_device, var_dtype)) momentum_var = self.get_slot(var, "momentum") - return training_ops.resource_sparse_apply_keras_momentum( - var.handle, - momentum_var.handle, - coefficients["lr_t"], - grad, - indices, - coefficients["momentum"], + return gen_training_ops.ResourceSparseApplyKerasMomentum( + var=var.handle, + accum=momentum_var.handle, + lr=coefficients["lr_t"], + grad=grad, + indices=indices, + momentum=coefficients["momentum"], use_locking=self._use_locking, use_nesterov=self.nesterov) diff --git a/tensorflow/python/keras/optimizer_v2/rmsprop.py b/tensorflow/python/keras/optimizer_v2/rmsprop.py index d1deaf34f45..1fa2577e72f 100644 --- a/tensorflow/python/keras/optimizer_v2/rmsprop.py +++ b/tensorflow/python/keras/optimizer_v2/rmsprop.py @@ -27,7 +27,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops -from tensorflow.python.training import training_ops +from tensorflow.python.training import gen_training_ops from tensorflow.python.util.tf_export import keras_export @@ -182,27 +182,27 @@ class RMSprop(optimizer_v2.OptimizerV2): mom = self.get_slot(var, "momentum") if self.centered: mg = self.get_slot(var, "mg") - return training_ops.resource_apply_centered_rms_prop( - var.handle, - mg.handle, - rms.handle, - mom.handle, - coefficients["lr_t"], - coefficients["rho"], - coefficients["momentum"], - coefficients["epsilon"], - grad, + return gen_training_ops.ResourceApplyCenteredRMSProp( + var=var.handle, + mg=mg.handle, + ms=rms.handle, + mom=mom.handle, + lr=coefficients["lr_t"], + rho=coefficients["rho"], + momentum=coefficients["momentum"], + epsilon=coefficients["epsilon"], + grad=grad, use_locking=self._use_locking) else: - return training_ops.resource_apply_rms_prop( - var.handle, - rms.handle, - mom.handle, - coefficients["lr_t"], - coefficients["rho"], - coefficients["momentum"], - coefficients["epsilon"], - grad, + return gen_training_ops.ResourceApplyRMSProp( + var=var.handle, + ms=rms.handle, + mom=mom.handle, + lr=coefficients["lr_t"], + rho=coefficients["rho"], + momentum=coefficients["momentum"], + epsilon=coefficients["epsilon"], + grad=grad, use_locking=self._use_locking) else: rms_t = (coefficients["rho"] * rms + @@ -228,29 +228,29 @@ class RMSprop(optimizer_v2.OptimizerV2): mom = self.get_slot(var, "momentum") if self.centered: mg = self.get_slot(var, "mg") - return training_ops.resource_sparse_apply_centered_rms_prop( - var.handle, - mg.handle, - rms.handle, - mom.handle, - coefficients["lr_t"], - coefficients["rho"], - coefficients["momentum"], - coefficients["epsilon"], - grad, - indices, + return gen_training_ops.ResourceSparseApplyCenteredRMSProp( + var=var.handle, + mg=mg.handle, + ms=rms.handle, + mom=mom.handle, + lr=coefficients["lr_t"], + rho=coefficients["rho"], + momentum=coefficients["momentum"], + epsilon=coefficients["epsilon"], + grad=grad, + indices=indices, use_locking=self._use_locking) else: - return training_ops.resource_sparse_apply_rms_prop( - var.handle, - rms.handle, - mom.handle, - coefficients["lr_t"], - coefficients["rho"], - coefficients["momentum"], - coefficients["epsilon"], - grad, - indices, + return gen_training_ops.ResourceSparseApplyRMSProp( + var=var.handle, + ms=rms.handle, + mom=mom.handle, + lr=coefficients["lr_t"], + rho=coefficients["rho"], + momentum=coefficients["momentum"], + epsilon=coefficients["epsilon"], + grad=grad, + indices=indices, use_locking=self._use_locking) else: rms_scaled_g_values = (grad * grad) * coefficients["one_minus_rho"] From 7ac42b9d7840a2e0dbce80dbbb948abce527c60d Mon Sep 17 00:00:00 2001 From: Reed Date: Fri, 17 Jul 2020 14:34:33 -0700 Subject: [PATCH 0739/2522] Fix CPU build --- tensorflow/core/common_runtime/gpu/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/common_runtime/gpu/BUILD b/tensorflow/core/common_runtime/gpu/BUILD index 18d6f06a5f4..c738e490501 100644 --- a/tensorflow/core/common_runtime/gpu/BUILD +++ b/tensorflow/core/common_runtime/gpu/BUILD @@ -159,6 +159,7 @@ tf_cuda_library( "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core:stream_executor", + "//tensorflow/core/platform:tf32_utils", "//tensorflow/core/profiler/lib:annotated_traceme", "//tensorflow/core/profiler/lib:scoped_annotation", "//third_party/eigen3", From 45eeac4f28df8e06cdfd36d8d396ff2c1b65a926 Mon Sep 17 00:00:00 2001 From: Yash Katariya Date: Fri, 17 Jul 2020 14:31:33 -0700 Subject: [PATCH 0740/2522] Make DocSource a class and enforce the restriction of `docstring` and `docstring_module_name`. PiperOrigin-RevId: 321855231 Change-Id: I516c95e41ebb3e3c43c1a7584d84068a0225e7eb --- .../python/tools/api/generator/doc_srcs.py | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/tensorflow/python/tools/api/generator/doc_srcs.py b/tensorflow/python/tools/api/generator/doc_srcs.py index 2f34db241a3..50f966aaea9 100644 --- a/tensorflow/python/tools/api/generator/doc_srcs.py +++ b/tensorflow/python/tools/api/generator/doc_srcs.py @@ -17,21 +17,27 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import collections - from tensorflow.python.util import tf_export -# Specifies docstring source for a module. -# Only one of docstring or docstring_module_name should be set. -# * If docstring is set, then we will use this docstring when -# for the module. -# * If docstring_module_name is set, then we will copy the docstring -# from docstring source module. -DocSource = collections.namedtuple( - 'DocSource', ['docstring', 'docstring_module_name']) -# Each attribute of DocSource is optional. -DocSource.__new__.__defaults__ = (None,) * len(DocSource._fields) +class DocSource(object): + """Specifies docstring source for a module. + + Only one of docstring or docstring_module_name should be set. + * If docstring is set, then we will use this docstring when + for the module. + * If docstring_module_name is set, then we will copy the docstring + from docstring source module. + """ + + def __init__(self, docstring=None, docstring_module_name=None): + self.docstring = docstring + self.docstring_module_name = docstring_module_name + + if self.docstring is not None and self.docstring_module_name is not None: + raise ValueError('Only one of `docstring` or `docstring_module_name` can ' + 'be set.') + _TENSORFLOW_DOC_SOURCES = { 'app': DocSource(docstring_module_name='platform.app'), From 0cd7f9e9475ad7bcf214781d6ec26eaee737923f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Jul 2020 14:31:56 -0700 Subject: [PATCH 0741/2522] Add overloads for Graphdef to MLIR translation functions which accept structured arguments instead of string arguments. PiperOrigin-RevId: 321855309 Change-Id: Id05fb96fb9ec20fd07bbd7d9cae83e653a9dae23 --- tensorflow/compiler/mlir/tensorflow/BUILD | 2 + .../translate/mlir_roundtrip_flags.cc | 76 +++++++++++----- .../translate/mlir_roundtrip_flags.h | 17 ++++ .../tensorflow/translate/tf_mlir_translate.cc | 90 +++++++++++++++---- .../tensorflow/translate/tf_mlir_translate.h | 39 +++++++- 5 files changed, 182 insertions(+), 42 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index c5e2b089c0d..593e0d90172 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -1325,11 +1325,13 @@ cc_library( ":mlir_roundtrip_flags", "//tensorflow/cc/saved_model:bundle_v2", "//tensorflow/core:graph", + "//tensorflow/core:lib", "//tensorflow/core:lib_proto_parsing", "//tensorflow/core:ops", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler/utils:transitive_fanin", "//tensorflow/stream_executor/lib", + "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:span", diff --git a/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.cc b/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.cc index 77da19d6853..4640cb6ce64 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.cc @@ -28,14 +28,15 @@ limitations under the License. #include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { Status ParseOutputArrayInfo(absl::string_view array_names, std::vector* outputs) { - std::vector output_names = absl::StrSplit(array_names, ','); - return ParseOutputArrayInfo(output_names, outputs); + TF_RETURN_IF_ERROR(ParseNodeNames(array_names, *outputs)); + return Status::OK(); } Status ParseOutputArrayInfo(const std::vector& output_names, @@ -51,22 +52,12 @@ Status ParseInputArrayInfo(absl::string_view array_names, absl::string_view data_types, absl::string_view shapes, GraphImportConfig::InputArrays* inputs) { - std::vector node_names = absl::StrSplit(array_names, ','); - std::vector node_dtypes = absl::StrSplit(data_types, ','); - - std::vector node_shapes_str = absl::StrSplit(shapes, ':'); + std::vector node_names; + std::vector node_dtypes; std::vector> node_shapes; - for (int i = 0; i < node_shapes_str.size(); i++) { - std::vector dims; - for (auto& dim_str : absl::StrSplit(node_shapes_str[i], ',')) { - // Treats empty input shape as scalar - if (dim_str.empty()) continue; - int size; - TF_RET_CHECK(absl::SimpleAtoi(dim_str, &size)); - dims.push_back(size); - } - node_shapes.push_back(dims); - } + TF_RETURN_IF_ERROR(ParseNodeNames(array_names, node_names)); + TF_RETURN_IF_ERROR(ParseNodeDataTypes(data_types, node_dtypes)); + TF_RETURN_IF_ERROR(ParseNodeShapes(shapes, node_shapes)); return ParseInputArrayInfo(node_names, node_dtypes, node_shapes, inputs); } @@ -75,8 +66,7 @@ Status ParseInputArrayInfo(const std::vector& node_names, const std::vector>& node_shapes, GraphImportConfig::InputArrays* inputs) { std::vector used_node_dtypes; - if (node_dtypes.empty() || - (node_dtypes.size() == 1 && node_dtypes[0].empty())) { + if (node_dtypes.empty()) { // Mark all the node dtypes Invalid, so the importer can handle them by // using the type from the graph. used_node_dtypes.resize(node_names.size(), DataType_Name(DT_INVALID)); @@ -97,10 +87,10 @@ Status ParseInputArrayInfo(const std::vector& node_names, node_names.size(), ", #data_types ", node_dtypes.size(), ")")); } - if (node_names.size() != node_shapes.size()) { + if (!node_shapes.empty() && node_names.size() != node_shapes.size()) { return errors::FailedPrecondition(absl::StrCat( - "Unmatched node array and data type numbers (#arrays ", - node_names.size(), ", #input_shapes ", node_shapes.size(), ")")); + "Unmatched node array and shape numbers (#arrays ", node_names.size(), + ", #input_shapes ", node_shapes.size(), ")")); } // StringMap doesn't support reserve else reserve input map size here. @@ -119,11 +109,49 @@ Status ParseInputArrayInfo(const std::vector& node_names, absl::StrCat("Invalid node type '", node_dtypes[i], "'")); } - for (auto& dim : node_shapes[i]) { - info.shape.add_dim()->set_size(dim); + if (!node_shapes.empty()) { + for (auto& dim : node_shapes[i]) { + info.shape.add_dim()->set_size(dim); + } } } return Status::OK(); } +Status ParseNodeShapes(absl::string_view shapes_str, + std::vector>& shapes_vector) { + shapes_vector.clear(); + if (!shapes_str.empty()) { + std::vector node_shapes_str = absl::StrSplit(shapes_str, ':'); + for (int i = 0; i < node_shapes_str.size(); i++) { + std::vector dims; + for (const absl::string_view dim_str : + absl::StrSplit(node_shapes_str[i], ',')) { + // Treats empty input shape as scalar + if (dim_str.empty()) continue; + int size; + TF_RET_CHECK(absl::SimpleAtoi(dim_str, &size)); + dims.push_back(size); + } + shapes_vector.push_back(dims); + } + } + return Status::OK(); +} + +Status ParseNodeNames(absl::string_view names_str, + std::vector& names_vector) { + names_vector = absl::StrSplit(names_str, ',', absl::SkipEmpty()); + return Status::OK(); +} + +Status ParseNodeDataTypes(absl::string_view data_types_str, + std::vector& data_type_vector) { + data_type_vector.clear(); + if (!data_types_str.empty()) { + data_type_vector = absl::StrSplit(data_types_str, ','); + } + return Status::OK(); +} + } // namespace tensorflow diff --git a/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.h b/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.h index cc38a73d106..334f935a139 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.h +++ b/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.h @@ -96,6 +96,23 @@ Status ParseInputArrayInfo(const std::vector& node_names, const std::vector& node_dtypes, const std::vector>& node_shapes, GraphImportConfig::InputArrays* inputs); + +// Parses shapes from the given string into shapes_vector which is a structured +// format. +// NOTE: If shapes_str is empty, shapes_vector will also be empty. +Status ParseNodeShapes(absl::string_view shapes_str, + std::vector>& shapes_vector); + +// Parses names from the given string into the names_vector. +// NOTE: If names_str is empty, names_vector will also be empty. +Status ParseNodeNames(absl::string_view names_str, + std::vector& names_vector); + +// Parses data types from the given string into the data_type_vector. +// NOTE: If data_types_str is empty, data_type_vector will also be empty. +Status ParseNodeDataTypes(absl::string_view data_types_str, + std::vector& data_type_vector); + } // namespace tensorflow #endif // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSLATE_MLIR_ROUNDTRIP_FLAGS_H_ diff --git a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc index b782b2c49d9..1c7988d3a40 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc @@ -35,6 +35,7 @@ limitations under the License. #include "tensorflow/core/framework/versions.pb.h" #include "tensorflow/core/graph/tensor_id.h" #include "tensorflow/core/grappler/utils/transitive_fanin.h" +#include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/protobuf/graph_debug_info.pb.h" @@ -42,11 +43,14 @@ namespace tensorflow { static StatusOr GraphdefToMlirImport( llvm::StringRef input, absl::string_view debug_info_file, - absl::string_view input_arrays, absl::string_view input_dtypes, - absl::string_view input_shapes, absl::string_view output_arrays, - absl::string_view control_output_arrays, bool prune_unused_nodes, - bool convert_legacy_fed_inputs, bool graph_as_function, bool upgrade_legacy, - bool enable_shape_inference, mlir::MLIRContext* context) { + const std::vector& input_arrays, + const std::vector& input_dtypes, + const std::vector>& input_shapes, + const std::vector& output_arrays, + const std::vector& control_output_arrays, + bool prune_unused_nodes, bool convert_legacy_fed_inputs, + bool graph_as_function, bool upgrade_legacy, bool enable_shape_inference, + mlir::MLIRContext* context) { GraphDef graphdef; TF_RETURN_IF_ERROR( tensorflow::LoadProtoFromBuffer({input.data(), input.size()}, &graphdef)); @@ -97,11 +101,14 @@ static StatusOr GraphdefToMlirImport( StatusOr GraphdefToMlirTranslateFunction( llvm::StringRef input, absl::string_view debug_info_file, - absl::string_view input_arrays, absl::string_view input_dtypes, - absl::string_view input_shapes, absl::string_view output_arrays, - absl::string_view control_output_arrays, bool prune_unused_nodes, - bool convert_legacy_fed_inputs, bool graph_as_function, bool upgrade_legacy, - bool enable_shape_inference, mlir::MLIRContext* context) { + const std::vector& input_arrays, + const std::vector& input_dtypes, + const std::vector>& input_shapes, + const std::vector& output_arrays, + const std::vector& control_output_arrays, + bool prune_unused_nodes, bool convert_legacy_fed_inputs, + bool graph_as_function, bool upgrade_legacy, bool enable_shape_inference, + mlir::MLIRContext* context) { auto module_or = GraphdefToMlirImport( input, debug_info_file, input_arrays, input_dtypes, input_shapes, output_arrays, control_output_arrays, prune_unused_nodes, @@ -113,6 +120,31 @@ StatusOr GraphdefToMlirTranslateFunction( return module_or; } +StatusOr GraphdefToMlirTranslateFunction( + llvm::StringRef input, absl::string_view debug_info_file, + absl::string_view input_arrays, absl::string_view input_dtypes, + absl::string_view input_shapes, absl::string_view output_arrays, + absl::string_view control_output_arrays, bool prune_unused_nodes, + bool convert_legacy_fed_inputs, bool graph_as_function, bool upgrade_legacy, + bool enable_shape_inference, mlir::MLIRContext* context) { + std::vector input_array_vector; + std::vector input_dtype_vector; + std::vector> input_shapes_vector; + std::vector output_array_vector; + std::vector control_output_array_vector; + TF_RETURN_IF_ERROR(ParseNodeNames(input_arrays, input_array_vector)); + TF_RETURN_IF_ERROR(ParseNodeDataTypes(input_dtypes, input_dtype_vector)); + TF_RETURN_IF_ERROR(ParseNodeNames(output_arrays, output_array_vector)); + TF_RETURN_IF_ERROR(ParseNodeShapes(input_shapes, input_shapes_vector)); + TF_RETURN_IF_ERROR( + ParseNodeNames(control_output_arrays, control_output_array_vector)); + return GraphdefToMlirTranslateFunction( + input, debug_info_file, input_array_vector, input_dtype_vector, + input_shapes_vector, output_array_vector, control_output_array_vector, + prune_unused_nodes, convert_legacy_fed_inputs, graph_as_function, + upgrade_legacy, enable_shape_inference, context); +} + StatusOr SavedModelObjectGraphToMlirImport( absl::string_view saved_model_dir, const std::unordered_set& tags, @@ -161,11 +193,14 @@ StatusOr SavedModelSignatureDefsToMlirImport( StatusOr GraphdefToSplattedMlirTranslateFunction( llvm::StringRef input, absl::string_view debug_info_file, - absl::string_view input_arrays, absl::string_view input_dtypes, - absl::string_view input_shapes, absl::string_view output_arrays, - absl::string_view control_output_arrays, bool prune_unused_nodes, - bool convert_legacy_fed_inputs, bool graph_as_function, bool upgrade_legacy, - bool enable_shape_inference, mlir::MLIRContext* context) { + const std::vector& input_arrays, + const std::vector& input_dtypes, + const std::vector>& input_shapes, + const std::vector& output_arrays, + const std::vector& control_output_arrays, + bool prune_unused_nodes, bool convert_legacy_fed_inputs, + bool graph_as_function, bool upgrade_legacy, bool enable_shape_inference, + mlir::MLIRContext* context) { auto module_or = GraphdefToMlirImport( input, debug_info_file, input_arrays, input_dtypes, input_shapes, output_arrays, control_output_arrays, prune_unused_nodes, @@ -211,4 +246,29 @@ StatusOr GraphdefToSplattedMlirTranslateFunction( return module_or; } +StatusOr GraphdefToSplattedMlirTranslateFunction( + llvm::StringRef input, absl::string_view debug_info_file, + absl::string_view input_arrays, absl::string_view input_dtypes, + absl::string_view input_shapes, absl::string_view output_arrays, + absl::string_view control_output_arrays, bool prune_unused_nodes, + bool convert_legacy_fed_inputs, bool graph_as_function, bool upgrade_legacy, + bool enable_shape_inference, mlir::MLIRContext* context) { + std::vector input_array_vector; + std::vector input_dtype_vector; + std::vector> input_shapes_vector; + std::vector output_array_vector; + std::vector control_output_array_vector; + TF_RETURN_IF_ERROR(ParseNodeNames(input_arrays, input_array_vector)); + TF_RETURN_IF_ERROR(ParseNodeDataTypes(input_dtypes, input_dtype_vector)); + TF_RETURN_IF_ERROR(ParseNodeNames(output_arrays, output_array_vector)); + TF_RETURN_IF_ERROR(ParseNodeShapes(input_shapes, input_shapes_vector)); + TF_RETURN_IF_ERROR( + ParseNodeNames(control_output_arrays, control_output_array_vector)); + return GraphdefToSplattedMlirTranslateFunction( + input, debug_info_file, input_array_vector, input_dtype_vector, + input_shapes_vector, output_array_vector, control_output_array_vector, + prune_unused_nodes, convert_legacy_fed_inputs, graph_as_function, + upgrade_legacy, enable_shape_inference, context); +} + } // namespace tensorflow diff --git a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h index ff5dc287488..0dc49d70192 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h +++ b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.h @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "absl/base/macros.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "mlir/IR/MLIRContext.h" // from @llvm-project @@ -33,9 +34,25 @@ using stream_executor::port::StatusOr; // TODO(antiagainst): Directly manipulating files in library functions is not // a good idea. We should pass in a string/stream here. -// Converts a TensorFlow GraphDef stored in the file with the given -// `input_filename` into a MLIR module. Creates MLIR entities into the -// given MLIR `context`. +// Converts a TensorFlow GraphDef contained in `input` param into a MLIR module. +// Creates MLIR entities into the given MLIR `context`. +StatusOr GraphdefToMlirTranslateFunction( + llvm::StringRef input, absl::string_view debug_info_file, + const std::vector& input_arrays, + const std::vector& input_dtypes, + const std::vector>& input_shapes, + const std::vector& output_arrays, + const std::vector& control_output_arrays, + bool prune_unused_nodes, bool convert_legacy_fed_inputs, + bool graph_as_function, bool upgrade_legacy, + // TODO(jpienaar): Remove this. + bool enable_shape_inference, mlir::MLIRContext* context); + +ABSL_DEPRECATED( + "Please use the other overload of this function which accepts structured " + "inputs instead of strings") +// Converts a TensorFlow GraphDef contained in `input` param into a MLIR module. +// Creates MLIR entities into the given MLIR `context`. StatusOr GraphdefToMlirTranslateFunction( llvm::StringRef input, absl::string_view debug_info_file, absl::string_view input_arrays, absl::string_view input_dtypes, @@ -47,6 +64,22 @@ StatusOr GraphdefToMlirTranslateFunction( // Similar as the above function, but replaces all constant tensors // with randomly generated splat values. +StatusOr GraphdefToSplattedMlirTranslateFunction( + llvm::StringRef input, absl::string_view debug_info_file, + const std::vector& input_arrays, + const std::vector& input_dtypes, + const std::vector>& input_shapes, + const std::vector& output_arrays, + const std::vector& control_output_arrays, + bool prune_unused_nodes, bool convert_legacy_fed_inputs, + bool graph_as_function, bool upgrade_legacy, bool enable_shape_inference, + mlir::MLIRContext* context); + +ABSL_DEPRECATED( + "Please use the other overload of this function which accepts structured " + "inputs instead of strings") +// Similar as the above function, but replaces all constant tensors +// with randomly generated splat values. StatusOr GraphdefToSplattedMlirTranslateFunction( llvm::StringRef input, absl::string_view debug_info_file, absl::string_view input_arrays, absl::string_view input_dtypes, From 2e1f64f5c7e15ee8c58c4a661e40d6218ff6b265 Mon Sep 17 00:00:00 2001 From: Wenhao Jia Date: Fri, 17 Jul 2020 14:39:02 -0700 Subject: [PATCH 0742/2522] Remove TPU-related targets from OSS TF testing. PiperOrigin-RevId: 321856726 Change-Id: If970ce0e4279c1dae4686eb033168886605d3fb8 --- .../tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh b/tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh index 203356952cb..bb95df86342 100755 --- a/tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +++ b/tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh @@ -15,4 +15,4 @@ #!/bin/bash set -x -DEFAULT_BAZEL_TARGETS="//tensorflow/... -//tensorflow/python/integration_testing/... -//tensorflow/compiler/tf2tensorrt/... -//tensorflow/compiler/xrt/... //tensorflow/compiler/mlir/lite/... -//tensorflow/lite/micro/examples/..." +DEFAULT_BAZEL_TARGETS="//tensorflow/... -//tensorflow/python/integration_testing/... -//tensorflow/compiler/tf2tensorrt/... -//tensorflow/compiler/xrt/... //tensorflow/compiler/mlir/lite/... -//tensorflow/lite/micro/examples/... -//tensorflow/core/tpu/..." From 2518ca5ea8b377a4c3fbfd07fbc4d4ba00b012a3 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Fri, 17 Jul 2020 14:50:43 -0700 Subject: [PATCH 0743/2522] Use the GenericTranspiler base class to reduce boilerplate. Fix name typo. Add basic support for transforming module (non-recursively). PiperOrigin-RevId: 321859242 Change-Id: Id9dbfdb773306587964e82dad9290b053ad57ea3 --- .../python/autograph/pyct/transpiler.py | 48 ++++++++++++++----- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/autograph/pyct/transpiler.py b/tensorflow/python/autograph/pyct/transpiler.py index 9916440cfcc..d93da4b03d1 100644 --- a/tensorflow/python/autograph/pyct/transpiler.py +++ b/tensorflow/python/autograph/pyct/transpiler.py @@ -238,7 +238,7 @@ class GenericTranspiler(object): class MyTransformer(GenericTranspiler): - def transform(self, obj): + def transform_ast(self, node, ctx): result = <> return result @@ -248,6 +248,14 @@ class GenericTranspiler(object): # result is the output """ + def get_transformed_name(self, node): + """Returns a name for the output function. Subclasses may override this.""" + if isinstance(node, gast.Lambda): + return 'lam' + elif isinstance(node, gast.FunctionDef): + return node.name + raise ValueError('Unknown node type {}'.format(node)) + def transform_ast(self, node, ctx): """Performs an actual transformation of a function's AST. @@ -289,6 +297,34 @@ class GenericTranspiler(object): args.kw_defaults[i] = parser.parse_expression('None') return node + def transform_module(self, mod, user_context): + """Transforms a module. + + Subclasses may override this method. The return value is opaque. + + The method receives the original AST. The result is passed as-is to the + output of `transform`. + + Args: + mod: A Python module. + user_context: An opaque object (may be None) that is forwarded to + transform_ast, through the ctx.user_context argument. + Returns: + List[Tuple[Any, Any]]. By default it returns the output of transform_ast, + evaluated on each supported member, other than modules, together with a + `transformer.Context` containing information about the transformation + process. + """ + result = [] + for member in mod.__dict__.values(): + if inspect.ismodule(member): + continue # Not transforming modules recursively. + try: + result.append(self.transform(member, user_context)) + except NotImplementedError: + pass # Skip unsupported elements. + return result + def transform_function(self, fn, user_context): """Transforms a function. @@ -363,16 +399,6 @@ class PyToPy(GenericTranspiler): self._cache_lock = threading.RLock() self._cache = cache.CodeObjectCache() - def get_transformed_name(self, node): - """Returns a name for the output function. Subclasses may override this.""" - if isinstance(node, gast.Lambda): - return 'lam' - elif isinstance(node, gast.FunctionDef): - # Note that we need to rename the function, to avoid any namespace - # clashes. - return node.name - raise ValueError('Unknown node type {}'.format(node)) - def get_extra_locals(self): """Returns extra static local variables to be made to transformed code. From b1ad57227dfee390c6b56b65dd1a4b77434382d6 Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Fri, 17 Jul 2020 14:53:18 -0700 Subject: [PATCH 0744/2522] Update V1 only saving_utils_test with graph scope. PiperOrigin-RevId: 321859757 Change-Id: Id77c5648dd67b31dc5a3b0cb6d2e7d1c2a3affbe --- .../python/keras/saving/saving_utils_test.py | 65 ++++++++++--------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/tensorflow/python/keras/saving/saving_utils_test.py b/tensorflow/python/keras/saving/saving_utils_test.py index 574e42a2aff..49b6fde9ec7 100644 --- a/tensorflow/python/keras/saving/saving_utils_test.py +++ b/tensorflow/python/keras/saving/saving_utils_test.py @@ -35,7 +35,6 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_spec -from tensorflow.python.framework import test_util from tensorflow.python.keras import backend as K from tensorflow.python.keras import combinations from tensorflow.python.keras import keras_parameterized @@ -295,44 +294,46 @@ class ModelSaveTest(keras_parameterized.TestCase): {input_name: np.ones((8, 5))})) -@test_util.run_deprecated_v1 # Not used in v2. class ExtractModelMetricsTest(keras_parameterized.TestCase): def test_extract_model_metrics(self): - a = keras.layers.Input(shape=(3,), name='input_a') - b = keras.layers.Input(shape=(3,), name='input_b') + # saving_utils.extract_model_metrics is used in V1 only API + # keras.experimental.export_saved_model. + with ops.Graph().as_default(): + a = keras.layers.Input(shape=(3,), name='input_a') + b = keras.layers.Input(shape=(3,), name='input_b') - dense = keras.layers.Dense(4, name='dense') - c = dense(a) - d = dense(b) - e = keras.layers.Dropout(0.5, name='dropout')(c) + dense = keras.layers.Dense(4, name='dense') + c = dense(a) + d = dense(b) + e = keras.layers.Dropout(0.5, name='dropout')(c) - model = keras.models.Model([a, b], [d, e]) - extract_metrics = saving_utils.extract_model_metrics(model) - self.assertEqual(None, extract_metrics) + model = keras.models.Model([a, b], [d, e]) + extract_metrics = saving_utils.extract_model_metrics(model) + self.assertEqual(None, extract_metrics) - extract_metric_names = [ - 'dense_binary_accuracy', 'dropout_binary_accuracy', - 'dense_mean_squared_error', 'dropout_mean_squared_error' - ] - if tf2.enabled(): - extract_metric_names.extend(['dense_mae', 'dropout_mae']) - else: - extract_metric_names.extend( - ['dense_mean_absolute_error', 'dropout_mean_absolute_error']) + extract_metric_names = [ + 'dense_binary_accuracy', 'dropout_binary_accuracy', + 'dense_mean_squared_error', 'dropout_mean_squared_error' + ] + if tf2.enabled(): + extract_metric_names.extend(['dense_mae', 'dropout_mae']) + else: + extract_metric_names.extend( + ['dense_mean_absolute_error', 'dropout_mean_absolute_error']) - model_metric_names = ['loss', 'dense_loss', 'dropout_loss' - ] + extract_metric_names - model.compile( - loss='mae', - metrics=[ - keras.metrics.BinaryAccuracy(), 'mae', - keras.metrics.mean_squared_error - ], - optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.01)) - extract_metrics = saving_utils.extract_model_metrics(model) - self.assertEqual(set(model_metric_names), set(model.metrics_names)) - self.assertEqual(set(extract_metric_names), set(extract_metrics.keys())) + model_metric_names = ['loss', 'dense_loss', 'dropout_loss' + ] + extract_metric_names + model.compile( + loss='mae', + metrics=[ + keras.metrics.BinaryAccuracy(), 'mae', + keras.metrics.mean_squared_error + ], + optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.01)) + extract_metrics = saving_utils.extract_model_metrics(model) + self.assertEqual(set(model_metric_names), set(model.metrics_names)) + self.assertEqual(set(extract_metric_names), set(extract_metrics.keys())) if __name__ == '__main__': From 342a5ef882073f806b7860e29f5e084c790271ed Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Fri, 17 Jul 2020 14:58:40 -0700 Subject: [PATCH 0745/2522] Update V1 only momentum_test with graph scope. PiperOrigin-RevId: 321860870 Change-Id: I848fa5c5b7ee72e7c3fa9815be8f2ca694735108 --- tensorflow/python/training/momentum_test.py | 24 ++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tensorflow/python/training/momentum_test.py b/tensorflow/python/training/momentum_test.py index 6e47a2e5f2e..332cc4018ac 100644 --- a/tensorflow/python/training/momentum_test.py +++ b/tensorflow/python/training/momentum_test.py @@ -160,10 +160,10 @@ class MomentumOptimizerTest(test.TestCase): self.assertStartsWith(optimizer_variables[1].name, "var3") self.assertEqual(2, len(optimizer_variables)) - @test_util.run_deprecated_v1 def testNesterovMomentum(self): for dtype in [dtypes.float32, dtypes.float64]: - with self.cached_session(): + # train.MomentumOptimizer is V1 only API. + with ops.Graph().as_default(), self.cached_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) @@ -187,10 +187,10 @@ class MomentumOptimizerTest(test.TestCase): self.assertAllClose(var0_np, self.evaluate(var0)) self.assertAllClose(var1_np, self.evaluate(var1)) - @test_util.run_deprecated_v1 def testSparseNesterovMomentum(self): for dtype in [dtypes.float32, dtypes.float64]: - with self.cached_session(): + # train.MomentumOptimizer is V1 only API. + with ops.Graph().as_default(), self.cached_session(): var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) @@ -282,10 +282,10 @@ class MomentumOptimizerTest(test.TestCase): self.evaluate(sgd_op) self.assertAllCloseAccordingToType([[1, 1], [0, 0]], self.evaluate(var0)) - @test_util.run_deprecated_v1 def testTensorLearningRateAndMomentum(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.cached_session(): + # train.MomentumOptimizer is V1 only API. + with ops.Graph().as_default(), self.cached_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) @@ -443,9 +443,9 @@ class MomentumOptimizerTest(test.TestCase): # pylint: enable=line-too-long return db_grad, db_out - @test_util.run_deprecated_v1 def testLikeDistBeliefMom01(self): - with self.cached_session(): + # train.MomentumOptimizer is V1 only API. + with ops.Graph().as_default(), self.cached_session(): db_grad, db_out = self._dbParamsMom01() num_samples = len(db_grad) var0 = variables.Variable([0.0] * num_samples) @@ -457,10 +457,10 @@ class MomentumOptimizerTest(test.TestCase): mom_update.run(feed_dict={grads0: db_grad[i]}) self.assertAllClose(np.array(db_out[i]), self.evaluate(var0)) - @test_util.run_deprecated_v1 def testSparse(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.cached_session(): + # train.MomentumOptimizer is V1 only API. + with ops.Graph().as_default(), self.cached_session(): var0 = variables.Variable(array_ops.zeros([4, 2], dtype=dtype)) var1 = variables.Variable(constant_op.constant(1.0, dtype, [4, 2])) grads0 = ops.IndexedSlices( @@ -539,10 +539,10 @@ class MomentumOptimizerTest(test.TestCase): ]), self.evaluate(var1)[2]) - @test_util.run_deprecated_v1 def testSharing(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.cached_session(): + # train.MomentumOptimizer is V1 only API. + with ops.Graph().as_default(), self.cached_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) From 01d9e46f28830bdb2c23b4ac19edcd8f8587d08f Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Fri, 17 Jul 2020 15:06:50 -0700 Subject: [PATCH 0746/2522] Add module-level guide for the tf.data service. The guide focuses more on the big picture of using the tf.data service, including running servers, compared to the documentation for `tf.data.experimental.service.distribute`, which focuses on using the tf.data service after it is deployed. PiperOrigin-RevId: 321862865 Change-Id: Ic75a23dd1aae2ac278423c5ae9321b91262fdc29 --- .../data/experimental/service/__init__.py | 104 +++++++++++++++++- 1 file changed, 103 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/data/experimental/service/__init__.py b/tensorflow/python/data/experimental/service/__init__.py index 7887e53600a..e249fb02c19 100644 --- a/tensorflow/python/data/experimental/service/__init__.py +++ b/tensorflow/python/data/experimental/service/__init__.py @@ -12,7 +12,109 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Experimental API for using the tf.data service.""" +"""API for using the tf.data service. + +This module contains: + +1. tf.data server implementations for running the tf.data service. +2. A `distribute` dataset transformation that moves a dataset's preprocessing + to happen in the tf.data service. + +The tf.data service offers a way to improve training speed when the host +attached to a training device can't keep up with the data consumption of the +model. For example, suppose a host can generate 100 examples/second, but the +model can process 200 examples/second. Training speed could be doubled by using +the tf.data service to generate 200 examples/second. + +## Before using the tf.data service + +There are a few things to do before using the tf.data service to speed up +training. + +### Understand processing_mode + +The tf.data service uses a cluster of workers to prepare data for training your +model. The `processing_mode` argument to +`tf.data.experimental.service.distribute` describes how to leverage multiple +workers to process the input dataset. Currently, the only supported +processing mode is "parallel_epochs", which means that the entire input dataset +will be processed independently by each of the tf.data service workers. For this +reason, it is important to shuffle data (e.g. filenames) non-deterministically, +so that each worker will process the elements of the dataset in a different +order. If your model requires input data to arrive in a certain order, the +"parallel_epochs" processing mode will not work well. We plan to support +additional modes of processing (such as processing a different shard of the +input data by each worker) in the near future. + +### Measure potential impact + +Before using the tf.data service, it is useful to first measure the potential +performance improvement. To do this, add + +``` +dataset = dataset.take(1).cache().repeat() +``` + +at the end of your dataset, and see how it affects your model's step time. +`take(1).cache().repeat()` will cache the first element of your dataset and +produce it repeatedly. This should make the dataset very fast, so that the model +becomes the bottleneck and you can identify the ideal model speed. With enough +workers, the tf.data service should be able to achieve similar speed. + +## Running the tf.data service + +tf.data servers should be brought up alongside your training jobs, and brought +down when the jobs are finished. The tf.data service uses one DispatchServer and +any number of WorkerServers. See +https://github.com/tensorflow/ecosystem/tree/master/data_service for an example +of using Google Kubernetes Engine (GKE) to manage the tf.data service. The +server implementation in +[tf_std_data_server.py](https://github.com/tensorflow/ecosystem/blob/master/data_service/tf_std_data_server.py) +is not GKE-specific, and can be used to run the tf.data service in other +contexts. + +### Fault tolerance + +The tf.data dispatch server manages all state for the service, so it is +important to keep the server alive. If the dispatch server is restarted +mid-training, the training must also be restarted. + +WorkerServers, on the other hand, may be freely restarted, added, or removed +during training. + +## Using the tf.data service from your training job + +Once you have a tf.data service cluster running, take note of the dispatcher IP +address and port. To connect to the service, you will use a string in the format +"grpc://:". + +``` +# Create dataset however you were before using the tf.data service. +dataset = your_dataset_factory() + +service = "grpc://{}:{}".format(dispatcher_address, dispatcher_port) +# This will register the dataset with the tf.data service cluster so that +# tf.data workers can run the dataset to produce elements. The dataset returned +# from applying `distribute` will fetch elements produced by tf.data workers. +dataset = dataset.apply(tf.data.experimental.service.distribute( + processing_mode="parallel_epochs", service=service)) +``` + +Below is a toy example that you can run yourself. + +>>> dispatcher = tf.data.experimental.service.DispatchServer(port=0) +>>> dispatcher_address = dispatcher.target.split("://")[1] +>>> worker = tf.data.experimental.service.WorkerServer( +... port=0, dispatcher_address=dispatcher_address) +>>> dataset = tf.data.Dataset.range(10) +>>> dataset = dataset.apply(tf.data.experimental.service.distribute( +... processing_mode="parallel_epochs", service=dispatcher.target)) +>>> print(list(dataset.as_numpy_iterator())) +[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + +See the documentation of `tf.data.experimental.service.distribute` for more +details about using the `distribute` transformation. +""" from __future__ import absolute_import from __future__ import division From ec25e318178f0423297ff7c958fa3e11b9574a76 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Fri, 17 Jul 2020 15:08:25 -0700 Subject: [PATCH 0747/2522] Rollback of rollback of [TF/XLA] Enable input/output aliasing in the TF2XLA bridge The underlying bug was fixed PiperOrigin-RevId: 321863222 Change-Id: I94c25f3243e33374ee089dd808c3f25704de2c92 --- tensorflow/compiler/jit/kernels/xla_ops.cc | 109 +++++--- .../compiler/jit/xla_compile_on_demand_op.cc | 36 ++- tensorflow/compiler/jit/xla_device_ops.cc | 12 +- tensorflow/compiler/jit/xla_launch_util.cc | 238 ++++++++++-------- tensorflow/compiler/jit/xla_launch_util.h | 28 ++- .../python/eager/def_function_xla_jit_test.py | 64 +++++ 6 files changed, 319 insertions(+), 168 deletions(-) diff --git a/tensorflow/compiler/jit/kernels/xla_ops.cc b/tensorflow/compiler/jit/kernels/xla_ops.cc index 48347a2915f..38e33a60657 100644 --- a/tensorflow/compiler/jit/kernels/xla_ops.cc +++ b/tensorflow/compiler/jit/kernels/xla_ops.cc @@ -277,7 +277,8 @@ static Status CompileToLocalExecutable( OpKernelContext* ctx, const NameAttrList& function, bool has_ref_vars, const XlaPlatformInfo& platform_info, absl::Span variable_infos, - absl::Span constants, bool lazy, xla::LocalClient** client, + absl::Span constants, bool lazy, bool may_alias_resource_update, + xla::LocalClient** client, const XlaCompiler::CompilationResult** compilation_result, xla::LocalExecutable** executable) { // We store information about the JIT-compiled XLA computation @@ -332,6 +333,9 @@ static Status CompileToLocalExecutable( // Optimization: where possible, have the computation return a naked array // rather than a one-element tuple. compile_options.always_return_tuple = false; + compile_options.alias_resource_update = !has_ref_vars && + !platform_info.is_on_xla_device() && + may_alias_resource_update; std::vector args; TF_RETURN_IF_ERROR(XlaComputationLaunchContext::BuildXlaCompilerArguments( @@ -350,20 +354,22 @@ void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) { const XlaCompiler::CompilationResult* compilation_result; xla::LocalExecutable* executable; - ResourceVarsSnapshot variables_snapshot; + std::vector variable_infos; { - std::vector variable_infos; OP_REQUIRES_OK( ctx, GetVariableInfosFromCtxInputs(ctx, resources_, &variable_infos)); OP_REQUIRES_OK(ctx, LockVariables(absl::MakeSpan(variable_infos))); Status s = CompileToLocalExecutable( ctx, function_, /*has_ref_vars=*/has_ref_vars_, platform_info_, - variable_infos, constants_, /*lazy=*/false, &client, - &compilation_result, &executable); + variable_infos, constants_, /*lazy=*/false, + /*may_alias_resource_update=*/true, &client, &compilation_result, + &executable); OP_REQUIRES_OK(ctx, s); - OP_REQUIRES_OK(ctx, - SnapshotResourceVariables(ctx, resources_, variable_infos, - &variables_snapshot)); + } + + std::map resource_var_ptrs; + for (int i = 0; i < resources_.size(); i++) { + resource_var_ptrs[resources_[i]] = variable_infos[i].var()->tensor(); } se::Stream* stream = @@ -374,12 +380,19 @@ void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) { absl::optional tf_allocator_adapter; se::DeviceMemoryAllocator* allocator = GetAllocator(&tf_allocator_adapter, ctx, platform_info_); + int device_ordinal = stream ? stream->parent()->device_ordinal() + : client->default_device_ordinal(); XlaComputationLaunchContext launch_context( - client, allocator, + client, allocator, device_ordinal, /*allocate_xla_tensors=*/platform_info_.is_on_xla_device(), platform_info_.UseMultipleStreams()); - launch_context.PopulateInputs(ctx, compilation_result, variables_snapshot, - /*missing_ctx_input_prefix=*/0); + const xla::HloInputOutputAliasConfig& input_output_alias = + executable->executable()->module().input_output_alias_config(); + xla::StatusOr> execution_inputs = + launch_context.PopulateInputs(ctx, compilation_result, resource_var_ptrs, + /*missing_ctx_input_prefix=*/0, + input_output_alias); + OP_REQUIRES_OK(ctx, execution_inputs.status()); // Execute the computation. VLOG(2) << "Executing computation."; @@ -403,24 +416,24 @@ void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) { Env* env = Env::Default(); auto start_time = env->NowMicros(); - xla::StatusOr run_result; + xla::StatusOr execution_output; if (!stream || platform_info_.platform_id() == se::host::kHostPlatformId) { - run_result = executable->Run(launch_context.arguments(), run_options); + execution_output = + executable->Run(std::move(*execution_inputs), run_options); } else { - run_result = executable->RunAsync(launch_context.arguments(), run_options); + execution_output = + executable->RunAsync(std::move(*execution_inputs), run_options); } - OP_REQUIRES(ctx, run_result.ok(), run_result.status()); + OP_REQUIRES(ctx, execution_output.ok(), execution_output.status()); auto elapsed = env->NowMicros() - start_time; VLOG(2) << "Elapsed time: " << elapsed << "us"; + OP_REQUIRES_OK( + ctx, launch_context.PopulateOutputs( + ctx, compilation_result, execution_output->ConsumeResult(), + /*missing_ctx_input_prefix=*/0, absl::MakeSpan(variable_infos), + input_output_alias, resource_var_ptrs)); - const xla::HloInputOutputAliasConfig& input_output_alias = - executable->executable()->module().input_output_alias_config(); - OP_REQUIRES_OK(ctx, - launch_context.PopulateOutputs( - ctx, compilation_result, run_result.ConsumeValueOrDie(), - /*missing_ctx_input_prefix=*/0, input_output_alias, - variables_snapshot)); VLOG(1) << "Done"; } @@ -516,10 +529,14 @@ void XlaCompileOp::Compute(OpKernelContext* ctx) { OP_REQUIRES_OK( ctx, GetVariableInfosFromCtxInputs(ctx, resources_, &variable_infos)); OP_REQUIRES_OK(ctx, LockVariables(absl::MakeSpan(variable_infos))); + + // Do not alias resource updates as locking variables in XlaCompile and + // unlocking them in XlaRun may lead to deadlocks. Status status = CompileToLocalExecutable( ctx, function_, has_ref_vars_, platform_info_, variable_infos, constants_, - /*lazy=*/!must_compile_, &client, &kernel, &executable); + /*lazy=*/!must_compile_, + /*may_alias_resource_update=*/false, &client, &kernel, &executable); OP_REQUIRES_OK(ctx, SnapshotResourceVariables(ctx, resources_, variable_infos, &variables)); if (must_compile_ || status.code() != error::UNIMPLEMENTED) { @@ -587,14 +604,22 @@ void XlaRunOp::Compute(OpKernelContext* ctx) { absl::optional tf_allocator_adapter; se::DeviceMemoryAllocator* allocator = GetAllocator(&tf_allocator_adapter, ctx, platform_info_); + se::Stream* stream = + ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; + int device_ordinal = stream ? stream->parent()->device_ordinal() + : closure.client()->default_device_ordinal(); XlaComputationLaunchContext launch_context( - closure.client(), allocator, + closure.client(), allocator, device_ordinal, /*allocate_xla_tensors=*/platform_info_.is_on_xla_device(), /*use_multiple_streams=*/platform_info_.UseMultipleStreams()); // We're missing the must-be-constant inputs, tell `PopulateInputs` // about this. We don't actually need these inputs because they've // already been baked into the compiled kernel. + const xla::HloInputOutputAliasConfig& input_output_alias = + closure.executable()->executable()->module().input_output_alias_config(); + xla::StatusOr> execution_inputs; + std::map snapshot_ptrs; { tensorflow::profiler::TraceMe hlo_module_activity( [&] { @@ -604,13 +629,17 @@ void XlaRunOp::Compute(OpKernelContext* ctx) { }, tensorflow::profiler::TraceMeLevel::kInfo); - launch_context.PopulateInputs( - ctx, closure.compilation_result(), closure.resource_var_snapshots(), - /*missing_ctx_input_prefix=*/closure.num_constant_args()); + for (auto& p : closure.resource_var_snapshots()) { + snapshot_ptrs.emplace(p.first, + p.second.has_value() ? &p.second.value() : nullptr); + } + execution_inputs = launch_context.PopulateInputs( + ctx, closure.compilation_result(), snapshot_ptrs, + /*missing_ctx_input_prefix=*/closure.num_constant_args(), + input_output_alias); + OP_REQUIRES_OK(ctx, execution_inputs.status()); } - se::Stream* stream = - ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; xla::ExecutableRunOptions run_options; run_options.set_stream(stream); run_options.set_allocator(allocator); @@ -631,21 +660,19 @@ void XlaRunOp::Compute(OpKernelContext* ctx) { Env* env = Env::Default(); auto start_time = env->NowMicros(); - xla::StatusOr run_result; + xla::StatusOr execution_output; if (!stream || platform_info_.platform_id() == se::host::kHostPlatformId) { - run_result = - closure.executable()->Run(launch_context.arguments(), run_options); + execution_output = + closure.executable()->Run(std::move(*execution_inputs), run_options); } else { - run_result = - closure.executable()->RunAsync(launch_context.arguments(), run_options); + execution_output = closure.executable()->RunAsync( + std::move(*execution_inputs), run_options); } - OP_REQUIRES(ctx, run_result.ok(), run_result.status()); + OP_REQUIRES(ctx, execution_output.ok(), execution_output.status()); auto elapsed = env->NowMicros() - start_time; VLOG(2) << "Elapsed time in computation: " << elapsed << "us"; - const xla::HloInputOutputAliasConfig& input_output_alias = - closure.executable()->executable()->module().input_output_alias_config(); tensorflow::profiler::TraceMe hlo_module_activity( [&] { @@ -653,12 +680,16 @@ void XlaRunOp::Compute(OpKernelContext* ctx) { }, tensorflow::profiler::TraceMeLevel::kInfo); + xla::StatusOr> variable_infos = GatherVariableInfo( + ctx, *closure.compilation_result(), closure.num_constant_args()); + OP_REQUIRES_OK(ctx, variable_infos.status()); + OP_REQUIRES_OK(ctx, LockVariables(absl::MakeSpan(*variable_infos))); OP_REQUIRES_OK( ctx, launch_context.PopulateOutputs( - ctx, closure.compilation_result(), run_result.ConsumeValueOrDie(), + ctx, closure.compilation_result(), execution_output->ConsumeResult(), /*missing_ctx_input_prefix=*/closure.num_constant_args(), - input_output_alias, closure.resource_var_snapshots())); + absl::MakeSpan(*variable_infos), input_output_alias, snapshot_ptrs)); } XlaMergeOp::XlaMergeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc index afaee614f02..50813859603 100644 --- a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc +++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc @@ -50,35 +50,47 @@ Status XlaCompileOnDemandOp::Run(OpKernelContext* ctx, // Builds an XLA allocator for the device. XlaComputationLaunchContext launch_context( client, client->backend().memory_allocator(), + client->default_device_ordinal(), /*allocate_xla_tensors=*/true, /*use_multiple_streams=*/metadata.UseMultipleStreams()); - launch_context.PopulateInputs(ctx, result, variable_args, - /*missing_ctx_input_prefix=*/0); + std::map snapshot_ptrs; + for (auto& p : variable_args) { + snapshot_ptrs.emplace(p.first, + p.second.has_value() ? &p.second.value() : nullptr); + } + + const xla::HloInputOutputAliasConfig& input_output_alias = + executable->executable()->module().input_output_alias_config(); + xla::StatusOr> execution_inputs = + launch_context.PopulateInputs(ctx, result, snapshot_ptrs, + /*missing_ctx_input_prefix=*/0, + input_output_alias); + TF_RETURN_IF_ERROR(execution_inputs.status()); se::Stream* stream = ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; TF_RET_CHECK(stream); VLOG(2) << "Executing computation: " << name(); - for (const xla::ShapedBuffer* arg : launch_context.arguments()) { - VLOG(2) << name() << ": " << *arg; - } xla::ExecutableRunOptions run_options; run_options.set_stream(stream); run_options.set_allocator(client->backend().memory_allocator()); run_options.set_intra_op_thread_pool(&ctx->eigen_cpu_device()); run_options.set_rng_seed(GetXLARandomSeed()); - xla::StatusOr run_result = - executable->Run(launch_context.arguments(), run_options); + xla::StatusOr run_result = + executable->Run(execution_inputs.ConsumeValueOrDie(), run_options); TF_RETURN_IF_ERROR(run_result.status()); - - const xla::HloInputOutputAliasConfig& input_output_alias = - executable->executable()->module().input_output_alias_config(); + xla::ExecutionOutput execution_output = run_result.ConsumeValueOrDie(); + xla::StatusOr> variable_infos = + GatherVariableInfo(ctx, *result, 0); + TF_RETURN_IF_ERROR(variable_infos.status()); + TF_RETURN_IF_ERROR(LockVariables(absl::MakeSpan(*variable_infos))); TF_RETURN_IF_ERROR(launch_context.PopulateOutputs( - ctx, result, run_result.ConsumeValueOrDie(), - /*missing_ctx_input_prefix=*/0, input_output_alias, variable_args)); + ctx, result, execution_output.ConsumeResult(), + /*missing_ctx_input_prefix=*/0, absl::MakeSpan(*variable_infos), + input_output_alias, snapshot_ptrs)); return Status::OK(); } diff --git a/tensorflow/compiler/jit/xla_device_ops.cc b/tensorflow/compiler/jit/xla_device_ops.cc index 8126059262b..f0555ae32e5 100644 --- a/tensorflow/compiler/jit/xla_device_ops.cc +++ b/tensorflow/compiler/jit/xla_device_ops.cc @@ -59,11 +59,13 @@ void XlaAssignVariableOp::Compute(OpKernelContext* context) { return Status::OK(); })); mutex_lock ml(*variable->mu()); - OP_REQUIRES(context, variable->tensor()->dtype() == dtype_, - errors::InvalidArgument( - "Trying to assign variable with wrong dtype. Expected ", - DataTypeString(variable->tensor()->dtype()), " got ", - DataTypeString(dtype_))); + OP_REQUIRES( + context, + !variable->is_initialized || variable->tensor()->dtype() == dtype_, + errors::InvalidArgument( + "Trying to assign variable with wrong dtype. Expected ", + DataTypeString(variable->tensor()->dtype()), " got ", + DataTypeString(dtype_))); variable->is_initialized = true; *variable->tensor() = value; } diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc index 7f107aaef11..41abe86df6e 100644 --- a/tensorflow/compiler/jit/xla_launch_util.cc +++ b/tensorflow/compiler/jit/xla_launch_util.cc @@ -91,29 +91,19 @@ VariableInfo::~VariableInfo() { Status GetVariableInfosFromCtxInputs(OpKernelContext* ctx, absl::Span variable_indices, std::vector* result) { - std::vector resource_handles; - absl::c_transform( - variable_indices, std::back_inserter(resource_handles), - [&](int variable_idx) { return &HandleFromInput(ctx, variable_idx); }); - - std::vector> variables; - Status s = LookupResources(ctx, resource_handles, &variables); - if (!s.ok()) { - errors::AppendToMessage(&s, kPossibleNonVariableResourceHintMessage); - return s; - } - result->clear(); result->reserve(variable_indices.size()); - for (int i = 0; i < variable_indices.size(); i++) { - // *Release* the variable because we're going to unref it later in - // ~VariableInfo. - Var* variable = variables[i].release(); - int input_idx = variable_indices[i]; - std::string var_name = HandleFromInput(ctx, input_idx).name(); - result->emplace_back(input_idx, var_name, variable); + for (int var_idx : variable_indices) { + Var* variable = nullptr; + ResourceHandle handle = HandleFromInput(ctx, var_idx); + TF_RETURN_IF_ERROR( + LookupOrCreateResource(ctx, handle, &variable, [&](Var** ptr) { + // This var is uninitialized for now. + *ptr = new Var(DT_INVALID); + return Status::OK(); + })); + result->emplace_back(var_idx, handle.name(), variable); } - return Status::OK(); } @@ -176,24 +166,43 @@ Status SnapshotResourceVariables(OpKernelContext* ctx, XlaComputationLaunchContext::XlaComputationLaunchContext( xla::LocalClient* client, se::DeviceMemoryAllocator* xla_allocator, - bool allocate_xla_tensors, bool use_multiple_streams) + int device_ordinal, bool allocate_xla_tensors, bool use_multiple_streams) : client_(client), xla_allocator_(xla_allocator), allocate_xla_tensors_(allocate_xla_tensors), - use_multiple_streams_(use_multiple_streams) { + use_multiple_streams_(use_multiple_streams), + device_ordinal_(device_ordinal) { if (use_multiple_streams_) { CHECK(allocate_xla_tensors_) << "To use multiple streams correctly we must " "be allocating XLA tensors!"; } } -void XlaComputationLaunchContext::PopulateInputs( +// Fills in `execution_input` with `buffer` for `index`. +static void PopulateExecutionInputBuffer(xla::ExecutionInput& execution_input, + xla::ShapeIndex index, + se::DeviceMemoryBase& buffer, + bool donate_buffer, int device_ordinal, + se::DeviceMemoryAllocator* allocator) { + xla::MaybeOwningDeviceMemory* in_buffer = + execution_input.MutableBuffer(index); + if (donate_buffer) { + *in_buffer = se::OwningDeviceMemory(buffer, device_ordinal, allocator); + buffer = se::DeviceMemoryBase(); + } else { + *in_buffer = buffer; + } +} + +xla::StatusOr> +XlaComputationLaunchContext::PopulateInputs( OpKernelContext* ctx, const XlaCompiler::CompilationResult* compilation_result, - const ResourceVarsSnapshot& variables, int missing_ctx_input_prefix) { - // Build ShapedBuffers that point directly to the Tensor buffers. - arg_ptrs_ = - std::vector(compilation_result->xla_input_shapes.size()); + const std::map& resource_vars, + int missing_ctx_input_prefix, + const xla::HloInputOutputAliasConfig& input_output_alias) { + std::vector arguments; + arguments.reserve(compilation_result->xla_input_shapes.size()); xla::TransferManager* transfer_manager = client_->backend().transfer_manager(); @@ -201,10 +210,28 @@ void XlaComputationLaunchContext::PopulateInputs( int arg_num = compilation_result->input_mapping[i]; CHECK_GE(arg_num, missing_ctx_input_prefix); const xla::Shape& shape = compilation_result->xla_input_shapes[i]; - const Tensor* t = variables.count(arg_num) - ? &(variables.at(arg_num).value()) + const xla::Shape& device_shape = + transfer_manager->HostShapeToDeviceShape(shape); + + bool is_resource_variable = resource_vars.count(arg_num); + bool is_updated_resource_variable = + is_resource_variable && + absl::c_any_of(compilation_result->resource_updates, + [&](const XlaCompiler::ResourceUpdate& update) { + return update.input_index == i && update.modified; + }); + + const Tensor* t = is_resource_variable + ? resource_vars.at(arg_num) : &(ctx->input(arg_num - missing_ctx_input_prefix)); CHECK(t); + bool donate_buffer = + t->RefCountIsOne() && is_updated_resource_variable && + input_output_alias.ParameterHasAlias(i, xla::ShapeIndex{}); + VLOG(3) << "Processing input: " << i + << "; is_resource_variable=" << is_resource_variable + << "; is_updated_resource_variable=" << is_updated_resource_variable + << "; donate_buffer=" << donate_buffer; if (use_multiple_streams_) { CHECK(ctx->op_device_context() && ctx->op_device_context()->stream()) @@ -215,23 +242,28 @@ void XlaComputationLaunchContext::PopulateInputs( ctx->op_device_context()->stream()); } - if (xla::Shape::Equal().MinorToMajorOnlyInLayout()( - shape, transfer_manager->HostShapeToDeviceShape(shape))) { + arguments.emplace_back(device_shape, shape); + xla::ExecutionInput& execution_input = arguments.back(); + if (xla::Shape::Equal().MinorToMajorOnlyInLayout()(shape, device_shape)) { se::DeviceMemoryBase dmem = XlaTensor::DeviceMemoryFromTensor(*t); - arg_buffers_.emplace_back( - /*on_host_shape=*/shape, /*on_device_shape=*/shape, - client_->platform(), client_->default_device_ordinal()); - arg_buffers_.back().set_buffer(dmem, /*index=*/{}); - arg_ptrs_[i] = &arg_buffers_.back(); + PopulateExecutionInputBuffer(execution_input, xla::ShapeIndex{}, dmem, + donate_buffer, device_ordinal_, + xla_allocator_); } else { - const XlaTensor* xla_tensor = XlaTensor::FromTensor(t); + XlaTensor* xla_tensor = XlaTensor::FromTensor(t); CHECK(xla_tensor && xla_tensor->has_shaped_buffer()); - arg_ptrs_[i] = const_cast(&xla_tensor->shaped_buffer()); + xla_tensor->shaped_buffer().buffers().ForEachMutableElement( + [&](const xla::ShapeIndex& index, se::DeviceMemoryBase* buffer) { + PopulateExecutionInputBuffer(execution_input, index, *buffer, + donate_buffer, device_ordinal_, + xla_allocator_); + }); } } + return std::move(arguments); } -// Construct the tensor for given type and buffer. +// Construct the tensor for the given type and buffer. static Tensor MakeTensor(DataType dtype, const TensorShape& shape, se::DeviceMemoryBase buffer, Allocator* allocator) { size_t expected_size = shape.num_elements() * DataTypeSize(dtype); @@ -247,28 +279,26 @@ static Tensor GetOrCreateTensorForOutput( int output_num, OpKernelContext* ctx, int missing_ctx_input_prefix, const xla::HloInputOutputAliasConfig& input_output_alias, absl::Span input_mapping, - const ResourceVarsSnapshot& resource_var_snapshots, DataType output_dtype, - const TensorShape& output_shape, se::DeviceMemoryBase output_buffer, - Allocator* output_allocator) { + const std::map& resource_vars_snapshots, + DataType output_dtype, const TensorShape& output_shape, + se::DeviceMemoryBase output_buffer, Allocator* output_allocator) { xla::ShapeIndex output_index = input_output_alias.shape().IsTuple() ? xla::ShapeIndex({output_num}) : xla::ShapeIndex({}); + CHECK(input_output_alias.shape().IsTuple() || output_num == 0); if (absl::optional alias = input_output_alias.GetAliasedParameter(output_index)) { + VLOG(3) << "Found alias: " << alias->ToString(); int tf_param = input_mapping[alias->parameter_number] - missing_ctx_input_prefix; - const Tensor* input_tensor = &ctx->input(tf_param); - - // If input tensor is a resource variable, alias to the snapshot we took at - // entry time. - if (input_tensor->dtype() == DT_RESOURCE) { - const absl::optional& v = - resource_var_snapshots.at(missing_ctx_input_prefix + tf_param); - CHECK(v.has_value()); - return *v; + const Tensor input_tensor = + ctx->input(tf_param).dtype() != DT_RESOURCE + ? ctx->input(tf_param) + : *resource_vars_snapshots.at(missing_ctx_input_prefix + tf_param); + if (output_buffer.opaque() == input_tensor.data()) { + return input_tensor; } - return *input_tensor; } return MakeTensor(output_dtype, output_shape, output_buffer, output_allocator); @@ -291,12 +321,10 @@ static Status SetOutputForConstant( OpKernelContext* ctx, se::Stream* stream, const XlaCompiler::CompilationResult* compilation_result, int output_num) { CHECK(compilation_result->outputs[output_num].is_constant); - // Output is a constant. const Tensor& const_tensor = compilation_result->outputs[output_num].constant_value; Tensor* output_tensor; - const size_t total_bytes = const_tensor.TotalBytes(); - if (stream && total_bytes > 0) { + if (stream && const_tensor.TotalBytes() > 0) { // Copy host -> device. (Empty tensors don't have backing buffers.) // Manually allocate memory using an XlaTensorBuffer so we can allocate // as much memory as the device requires (as given by @@ -335,52 +363,55 @@ static Status SetOutputForConstant( return Status::OK(); } -// Creates a list of updates resource variables. -static xla::StatusOr> GatherVariableInfo( - OpKernelContext* ctx, - const XlaCompiler::CompilationResult* compilation_result, - int missing_ctx_input_prefix) { - std::vector variable_infos; - variable_infos.reserve(compilation_result->resource_updates.size()); +static xla::StatusOr GetOrCreateResourceVar( + OpKernelContext* ctx, const ResourceHandle& handle, + const XlaCompiler::ResourceUpdate& write) { + Var* variable = nullptr; + TF_RETURN_IF_ERROR( + LookupOrCreateResource(ctx, handle, &variable, [&write](Var** ptr) { + *ptr = new Var(write.type); + return Status::OK(); + })); + return variable; +} - for (int i = 0; i < compilation_result->resource_updates.size(); ++i) { +xla::StatusOr> GatherVariableInfo( + OpKernelContext* ctx, + const XlaCompiler::CompilationResult& compilation_result, + int missing_ctx_input_prefix) { + std::vector out; + out.reserve(compilation_result.resource_updates.size()); + for (int i = 0; i < compilation_result.resource_updates.size(); ++i) { const XlaCompiler::ResourceUpdate& write = - compilation_result->resource_updates[i]; + compilation_result.resource_updates[i]; int actual_input_index = write.input_index - missing_ctx_input_prefix; if (actual_input_index < 0 || actual_input_index >= ctx->num_inputs()) { return errors::Internal("Invalid input index for variable write."); } - // TODO(b/35625933): tensorflow::Var should contain a PersistentTensor, - // not a Tensor. - Var* variable = nullptr; const ResourceHandle handle = HandleFromInput(ctx, actual_input_index); - TF_RETURN_IF_ERROR(LookupOrCreateResource(ctx, handle, &variable, - [&write](Var** ptr) { - *ptr = new Var(write.type); - return Status::OK(); - })); - variable_infos.emplace_back(actual_input_index, handle.name(), variable); + TF_ASSIGN_OR_RETURN(Var * variable, + GetOrCreateResourceVar(ctx, handle, write)); + out.emplace_back(actual_input_index, handle.name(), variable); } - return variable_infos; + return std::move(out); } Status XlaComputationLaunchContext::PopulateOutputs( OpKernelContext* ctx, const XlaCompiler::CompilationResult* compilation_result, ScopedShapedBuffer output, int missing_ctx_input_prefix, + absl::Span variable_infos, const xla::HloInputOutputAliasConfig& input_output_alias, - const ResourceVarsSnapshot& resource_var_snapshots) { + const std::map& resource_vars) { se::Stream* stream = ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; Allocator* allocator = ctx->device()->GetAllocator({}); // Computation output should always be a tuple. - if (VLOG_IS_ON(2)) { - VLOG(2) << "Result tuple shape: " << output.on_host_shape().DebugString(); - VLOG(2) << "Result tuple shape (on device): " - << output.on_device_shape().DebugString(); - } + VLOG(2) << "Result tuple shape: " << output.on_host_shape().DebugString(); + VLOG(2) << "Result tuple shape (on device): " + << output.on_device_shape().DebugString(); CHECK_EQ(ctx->num_outputs(), compilation_result->outputs.size()); // If the on-host-shape isn't a tuple, create a new single-element tuple @@ -438,8 +469,8 @@ Status XlaComputationLaunchContext::PopulateOutputs( for (int i = 0; i < ctx->num_outputs(); ++i) { const TensorShape& shape = output_tensor_shapes[i]; const DataType& type = compilation_result->outputs[i].type; - VLOG(2) << "Retval " << i << " shape " << shape.DebugString() << " type " - << DataTypeString(type); + VLOG(2) << "Populating output for retval " << i << " shape " + << shape.DebugString() << " type " << DataTypeString(type); if (type == DT_VARIANT) { return errors::Unimplemented( "Support for TensorList crossing the XLA/TF boundary " @@ -467,30 +498,37 @@ Status XlaComputationLaunchContext::PopulateOutputs( se::DeviceMemoryBase buffer = output.buffer({output_num}); Tensor output_tensor = GetOrCreateTensorForOutput( output_num, ctx, missing_ctx_input_prefix, input_output_alias, - compilation_result->input_mapping, resource_var_snapshots, + compilation_result->input_mapping, resource_vars, ctx->expected_output_dtype(i), shape, buffer, allocator); - output.set_buffer(se::OwningDeviceMemory(), {output_num}); ctx->set_output(i, output_tensor); } + output.set_buffer(se::OwningDeviceMemory(), {output_num}); ++output_num; } - - if (VLOG_IS_ON(3)) { - VLOG(3) << ctx->mutable_output(i)->DeviceSafeDebugString(); - } } - // Apply variable updates, if any. - VLOG(2) << "Applying variable updates"; - TF_ASSIGN_OR_RETURN( - std::vector variable_infos, - GatherVariableInfo(ctx, compilation_result, missing_ctx_input_prefix)); - TF_RETURN_IF_ERROR(LockVariables(absl::MakeSpan(variable_infos))); + // input_index -> index into variable_infos. + absl::flat_hash_map variable_info_lookup; + for (int i = 0; i < variable_infos.size(); i++) { + variable_info_lookup.emplace(variable_infos[i].index(), i); + } + // Apply variable updates, if any. for (int i = 0; i < compilation_result->resource_updates.size(); ++i) { const XlaCompiler::ResourceUpdate& write = compilation_result->resource_updates[i]; - if (variable_infos[i].var()->tensor()->dtype() != write.type) { + int actual_input_index = write.input_index - missing_ctx_input_prefix; + CHECK_GE(actual_input_index, 0); + CHECK_LT(actual_input_index, ctx->num_inputs()); + Var* var = variable_infos[variable_info_lookup[actual_input_index]].var(); + CHECK(var); + + VLOG(2) << "Updating variable #" << i + << " at input index: " << actual_input_index << " with shape " + << write.shape.DebugString() << "; variable tensor has shape: " + << var->tensor()->shape().DebugString(); + + if (var->is_initialized && var->tensor()->dtype() != write.type) { return errors::Internal("Mismatched type in variable write"); } @@ -504,14 +542,14 @@ Status XlaComputationLaunchContext::PopulateOutputs( } } else { se::DeviceMemoryBase buffer = output.buffer({output_num}); - output.set_buffer(se::OwningDeviceMemory(), {output_num}); output_tensor = GetOrCreateTensorForOutput( output_num, ctx, missing_ctx_input_prefix, input_output_alias, - compilation_result->input_mapping, resource_var_snapshots, write.type, + compilation_result->input_mapping, resource_vars, write.type, write.shape, buffer, allocator); } - *variable_infos[i].var()->tensor() = output_tensor; - variable_infos[i].var()->is_initialized |= write.modified; + output.set_buffer(se::OwningDeviceMemory(), {output_num}); + var->is_initialized |= write.modified; + *var->tensor() = output_tensor; ++output_num; } return Status::OK(); @@ -562,7 +600,7 @@ Status XlaComputationLaunchContext::BuildXlaCompilerArguments( arg.name = std::string(variable.name()); arg.kind = XlaCompiler::Argument::kResource; arg.resource_kind = XlaResource::kVariable; - if (variable.var()) { + if (variable.var() && variable.var()->is_initialized) { const Tensor* value = variable.var()->tensor(); arg.type = value->dtype(); arg.shape = value->shape(); diff --git a/tensorflow/compiler/jit/xla_launch_util.h b/tensorflow/compiler/jit/xla_launch_util.h index 92b6c4c8a08..b34b3059a4f 100644 --- a/tensorflow/compiler/jit/xla_launch_util.h +++ b/tensorflow/compiler/jit/xla_launch_util.h @@ -81,6 +81,12 @@ class VariableInfo { bool lock_held_ = false; }; +// Creates a list of updated resource variables. +xla::StatusOr> GatherVariableInfo( + OpKernelContext* ctx, + const XlaCompiler::CompilationResult& compilation_result, + int missing_ctx_input_prefix); + // Takes a snapshot of the values of resource variable arguments, whose indices // are specified in `variable_indices` argument. We snapshot tensors that back // resource variables since concurrent updates may modify the shape, and it is @@ -124,7 +130,7 @@ class XlaComputationLaunchContext { // objects. XlaComputationLaunchContext(xla::LocalClient* client, se::DeviceMemoryAllocator* xla_allocator, - bool allocate_xla_tensors, + int device_ordinal, bool allocate_xla_tensors, bool use_multiple_streams); // Builds a XlaCompiler::Argument vector from the arguments to an XlaLaunch @@ -142,10 +148,12 @@ class XlaComputationLaunchContext { // missing and adjusts input indices accordingly. All elements in kernel's // input_mapping must be greater than or equal to `missing_ctx_input_prefix` // (in other words, no inputs actually required by the kernel can be missing). - void PopulateInputs(OpKernelContext* ctx, - const XlaCompiler::CompilationResult* compilation_result, - const ResourceVarsSnapshot& variables, - int missing_ctx_input_prefix); + xla::StatusOr> PopulateInputs( + OpKernelContext* ctx, + const XlaCompiler::CompilationResult* compilation_result, + const std::map& resource_vars, + int missing_ctx_input_prefix, + const xla::HloInputOutputAliasConfig& input_output_alias); // Given the XLA output in `output`, populate all outputs of `ctx`. Also // writes out the resource variable updates. @@ -161,20 +169,16 @@ class XlaComputationLaunchContext { OpKernelContext* ctx, const XlaCompiler::CompilationResult* compilation_result, xla::ScopedShapedBuffer output, int missing_ctx_input_prefix, + absl::Span variable_infos, const xla::HloInputOutputAliasConfig& input_output_alias, - const ResourceVarsSnapshot& resource_var_snapshots); - - // Return the argument list. Only valid after PopulateInputs() has been - // called. - const std::vector& arguments() const { return arg_ptrs_; } + const std::map& resource_vars); private: xla::LocalClient* client_; se::DeviceMemoryAllocator* xla_allocator_; bool allocate_xla_tensors_; bool use_multiple_streams_; - std::deque arg_buffers_; - std::vector arg_ptrs_; + int device_ordinal_; }; // A simple TensorBuffer implementation that allows us to create Tensors that diff --git a/tensorflow/python/eager/def_function_xla_jit_test.py b/tensorflow/python/eager/def_function_xla_jit_test.py index d55f84863e9..bd7a6ec2279 100644 --- a/tensorflow/python/eager/def_function_xla_jit_test.py +++ b/tensorflow/python/eager/def_function_xla_jit_test.py @@ -32,6 +32,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import tensor_array_ops +from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -403,6 +404,69 @@ class DefFunctionTest(test.TestCase): self.assertEqual(inner_retracings, 1) + def testUpdateVariable(self): + v = variables.Variable(3.1) + + @def_function.function(experimental_compile=True) + def update_var(a, b): + v.assign_add(a * b) + + update_var(constant_op.constant(0.7), constant_op.constant(0.6)) + self.assertAllClose(v, 3.52) + + def testUpdateVariableVector(self): + v = variables.Variable([3.1, 3.1]) + + @def_function.function(experimental_compile=True) + def update_var(a, b): + v.assign_add(a * b) + + update_var( + constant_op.constant([0.7, 0.7]), constant_op.constant([0.6, 0.6])) + self.assertAllClose(v, [3.52, 3.52]) + + def testUpdateVariableInClass(self): + + class C(object): + + @def_function.function(experimental_compile=True) + def update_var(self, a, b): + if not hasattr(self, 'v'): + self.v = variables.Variable(3.1) + self.v.assign_add(a * b) + + c = C() + + @def_function.function + def outer(): + c.update_var(constant_op.constant(0.7), constant_op.constant(0.6)) + + outer() + self.assertAllClose(c.v, 3.52) + + def testUpdateVariableMultipleOutputs(self): + v = variables.Variable(3.1) + + @def_function.function(experimental_compile=True) + def update_var(a, b): + v.assign_add(a * b) + return a * b + v + + out = update_var(constant_op.constant(0.7), constant_op.constant(0.6)) + self.assertAllClose(v, 3.52) + self.assertAllClose(out, 3.94) + + def testReturnIdentity(self): + + @def_function.function(experimental_compile=True) + def f(a, b): + return (a, b) + + a = constant_op.constant([0.7]) + b = constant_op.constant([0.6]) + + f(a, b) + if __name__ == '__main__': ops.enable_eager_execution() From a44a11793c30dc596eb84ed666f9857674bf93cb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Jul 2020 15:19:04 -0700 Subject: [PATCH 0748/2522] Refactor to extract XPlane to tool converters. PiperOrigin-RevId: 321865397 Change-Id: Ifa496f60df649e68a4bc647c1417a065cdcf141a --- tensorflow/core/profiler/convert/BUILD | 4 +- .../op_stats_to_input_pipeline_analysis.cc | 6 +- .../op_stats_to_input_pipeline_analysis.h | 2 +- .../convert/op_stats_to_overview_page.cc | 9 +-- .../convert/op_stats_to_overview_page.h | 3 +- .../convert/op_stats_to_tf_stats_test.cc | 2 +- .../convert/xplane_to_memory_profile.cc | 28 ++++++++ .../convert/xplane_to_memory_profile.h | 3 + .../profiler/convert/xplane_to_op_stats.cc | 71 +++++++++++-------- .../profiler/convert/xplane_to_op_stats.h | 12 +++- .../convert/xplane_to_op_stats_test.cc | 18 ++--- .../convert/xplane_to_profile_response.cc | 53 +++----------- tensorflow/core/profiler/utils/BUILD | 1 + .../profiler/utils/hardware_type_utils.cc | 8 +++ .../core/profiler/utils/hardware_type_utils.h | 3 + tensorflow/python/profiler/internal/BUILD | 2 + .../profiler/internal/profiler_wrapper.cc | 66 +++++++++++++++++ 17 files changed, 199 insertions(+), 92 deletions(-) diff --git a/tensorflow/core/profiler/convert/BUILD b/tensorflow/core/profiler/convert/BUILD index 06594b1aeaf..e24addeb83a 100644 --- a/tensorflow/core/profiler/convert/BUILD +++ b/tensorflow/core/profiler/convert/BUILD @@ -102,6 +102,7 @@ cc_library( "//tensorflow/core/profiler/protobuf:steps_db_proto_cc", "//tensorflow/core/profiler/protobuf:tf_function_proto_cc", "//tensorflow/core/profiler/utils:diagnostics", + "//tensorflow/core/profiler/utils:hardware_type_utils", "//tensorflow/core/profiler/utils:html_utils", "//tensorflow/core/profiler/utils:math_utils", "//tensorflow/core/profiler/utils:op_metrics_db_utils", @@ -128,6 +129,7 @@ cc_library( "//tensorflow/core/profiler/protobuf:steps_db_proto_cc", "//tensorflow/core/profiler/utils:diagnostics", "//tensorflow/core/profiler/utils:event_span", + "//tensorflow/core/profiler/utils:hardware_type_utils", "//tensorflow/core/profiler/utils:html_utils", "//tensorflow/core/profiler/utils:math_utils", "//tensorflow/core/profiler/utils:tf_op_utils", @@ -303,7 +305,6 @@ cc_library( "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/rpc/client:save_profile", "//tensorflow/core/profiler/utils:xplane_schema", - "//tensorflow/core/profiler/utils:xplane_utils", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", ], @@ -505,6 +506,7 @@ cc_library( "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/utils:tf_xplane_visitor", "//tensorflow/core/profiler/utils:xplane_schema", + "//tensorflow/core/profiler/utils:xplane_utils", "//tensorflow/core/profiler/utils:xplane_visitor", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:flat_hash_map", diff --git a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc index 8864dbd4313..7dafbc69af1 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc @@ -38,6 +38,7 @@ limitations under the License. #include "tensorflow/core/profiler/protobuf/steps_db.pb.h" #include "tensorflow/core/profiler/utils/diagnostics.h" #include "tensorflow/core/profiler/utils/event_span.h" +#include "tensorflow/core/profiler/utils/hardware_type_utils.h" #include "tensorflow/core/profiler/utils/html_utils.h" #include "tensorflow/core/profiler/utils/math_utils.h" #include "tensorflow/core/profiler/utils/tf_op_utils.h" @@ -553,12 +554,13 @@ StepSummary ComputeStepTimeSummaryInMs( } InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis( - const OpStats& op_stats, const HardwareType& hardware_type) { + const OpStats& op_stats) { InputPipelineAnalysisResult result = ComputeGenericInputPipelineAnalysisResult( op_stats.step_db().step_sequence()); PopulateStepDiagnostics(op_stats, result.mutable_diagnostics()); - result.set_hardware_type(HardwareType_Name(hardware_type)); + result.set_hardware_type(HardwareType_Name( + ParseHardwareType(op_stats.run_environment().device_type()))); GenerateHostResult(op_stats.host_op_metrics_db(), &result); InputPipelineAnalysisRecommendation recommendation = GenerateRecommendation(); diff --git a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h index 2191251ee88..cc54a7ea684 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h +++ b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h @@ -59,7 +59,7 @@ BottleneckAnalysis ComputeBottleneckAnalysis( any_step_details); InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis( - const OpStats& op_stats, const HardwareType& hardware_type); + const OpStats& op_stats); // Returns true if explanation for "All Others" time is also included in // input_statement. diff --git a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc index a92902b6cf7..9f71175bcea 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/core/profiler/protobuf/steps_db.pb.h" #include "tensorflow/core/profiler/protobuf/tf_function.pb.h" #include "tensorflow/core/profiler/utils/diagnostics.h" +#include "tensorflow/core/profiler/utils/hardware_type_utils.h" #include "tensorflow/core/profiler/utils/html_utils.h" #include "tensorflow/core/profiler/utils/math_utils.h" #include "tensorflow/core/profiler/utils/op_metrics_db_utils.h" @@ -316,14 +317,13 @@ std::string EagerRecommendationHtml(double host_op_time_eager_percent, return recommendation; } -OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats, - HardwareType hardware_type) { +OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats) { OverviewPage overview_page; *overview_page.mutable_run_environment() = ComputeRunEnvironment(op_stats.run_environment()); *overview_page.mutable_analysis() = ComputeAnalysisResult(op_stats); *overview_page.mutable_input_analysis() = - ConvertOpStatsToInputPipelineAnalysis(op_stats, hardware_type); + ConvertOpStatsToInputPipelineAnalysis(op_stats); BottleneckAnalysis bottleneck = ComputeBottleneckAnalysis( overview_page.input_analysis().input_time_breakdown(), overview_page.input_analysis().step_details()); @@ -331,7 +331,8 @@ OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats, bottleneck, op_stats.device_op_metrics_db().precision_stats()); SetCommonRecommendation( bottleneck.input_classification(), bottleneck.input_statement(), "", - hardware_type, TfFunctionRecommendationHtml(op_stats.tf_function_db()), + ParseHardwareType(op_stats.run_environment().device_type()), + TfFunctionRecommendationHtml(op_stats.tf_function_db()), EagerRecommendationHtml( overview_page.analysis().host_op_time_eager_percent(), overview_page.analysis().device_op_time_eager_percent()), diff --git a/tensorflow/core/profiler/convert/op_stats_to_overview_page.h b/tensorflow/core/profiler/convert/op_stats_to_overview_page.h index 0d49ae492fc..876f6847e9f 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_overview_page.h +++ b/tensorflow/core/profiler/convert/op_stats_to_overview_page.h @@ -54,8 +54,7 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats); OverviewPageRunEnvironment ComputeRunEnvironment( const RunEnvironment& run_environment); -OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats, - HardwareType hardware_type); +OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats); // Returns a html which provides tf-function related recommendation. std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db); diff --git a/tensorflow/core/profiler/convert/op_stats_to_tf_stats_test.cc b/tensorflow/core/profiler/convert/op_stats_to_tf_stats_test.cc index 5a01bf3417b..4abd210705b 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_tf_stats_test.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_tf_stats_test.cc @@ -80,7 +80,7 @@ TEST(OpStatsToTfStats, GpuTfStats) { kKernel3DurationNs, /*on_device=*/true, kKernel3, &device_plane, &stream2); - const OpStats op_stats = ConvertXSpaceToOpStats(space); + const OpStats op_stats = ConvertXSpaceToOpStats(space, {OP_METRICS_DB}); const TfStatsDatabase tf_stats = ConvertOpStatsToTfStats(op_stats); // TfOp1, TfOp2, Idle diff --git a/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc b/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc index 530e402c8b7..ee4e597c5b9 100644 --- a/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc +++ b/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc @@ -35,6 +35,7 @@ limitations under the License. #include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" +#include "tensorflow/core/profiler/utils/xplane_utils.h" #include "tensorflow/core/profiler/utils/xplane_visitor.h" namespace tensorflow { @@ -481,6 +482,23 @@ void ProcessMemoryProfileProto(int64 max_num_snapshots, } } +template +Status ConvertProtoToJson(const Proto& proto_output, std::string* json_output) { + protobuf::util::JsonPrintOptions json_options; + json_options.always_print_primitive_fields = true; + auto status = protobuf::util::MessageToJsonString(proto_output, json_output, + json_options); + if (!status.ok()) { + // Convert error_msg google::protobuf::StringPiece (or absl::string_view) to + // tensorflow::StringPiece. + auto error_msg = status.message(); + return errors::Internal( + "Could not convert proto to JSON string: ", + absl::string_view(error_msg.data(), error_msg.length())); + } + return Status::OK(); +} + } // namespace MemoryProfile ConvertXPlaneToMemoryProfile(const XPlane& host_plane, @@ -490,5 +508,15 @@ MemoryProfile ConvertXPlaneToMemoryProfile(const XPlane& host_plane, return memory_profile; } +Status ConvertXSpaceToMemoryProfileJson(const XSpace& xspace, + std::string* json_output) { + if (const XPlane* host_plane = + FindPlaneWithName(xspace, kHostThreadsPlaneName)) { + MemoryProfile memory_profile = ConvertXPlaneToMemoryProfile(*host_plane); + TF_RETURN_IF_ERROR(ConvertProtoToJson(memory_profile, json_output)); + } + return Status::OK(); +} + } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/convert/xplane_to_memory_profile.h b/tensorflow/core/profiler/convert/xplane_to_memory_profile.h index 6eddaeeec71..73681794e18 100644 --- a/tensorflow/core/profiler/convert/xplane_to_memory_profile.h +++ b/tensorflow/core/profiler/convert/xplane_to_memory_profile.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_MEMORY_PROFILE_H_ #define TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_MEMORY_PROFILE_H_ +#include "tensorflow/core/platform/status.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/memory_profile.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" @@ -29,6 +30,8 @@ namespace profiler { MemoryProfile ConvertXPlaneToMemoryProfile(const XPlane& host_plane, int64 max_num_snapshots = 1000); +Status ConvertXSpaceToMemoryProfileJson(const XSpace& xspace, + std::string* json_output); } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/convert/xplane_to_op_stats.cc b/tensorflow/core/profiler/convert/xplane_to_op_stats.cc index a67da50d35c..82a13c71d47 100644 --- a/tensorflow/core/profiler/convert/xplane_to_op_stats.cc +++ b/tensorflow/core/profiler/convert/xplane_to_op_stats.cc @@ -106,7 +106,8 @@ void SetRunEnvironment(int32 accelerator_count, RunEnvironment* env) { } void ProcessHostPlane(const XPlane* host_plane, bool use_device_step_events, - OpMetricsDb* op_metrics_db, StepEvents* step_events) { + const OpStatsConfig& config, OpMetricsDb* op_metrics_db, + StepEvents* step_events) { absl::flat_hash_map tf_ops = CollectTfOpsFromHostThreadsXPlane(*host_plane); OpMetricsDbCombiner combiner(op_metrics_db); @@ -114,9 +115,11 @@ void ProcessHostPlane(const XPlane* host_plane, bool use_device_step_events, plane.ForEachLine([&](const XLineVisitor& line) { ConsumeTfMetricsDbData( ConvertHostThreadsXLineToTfMetricsDbData(line, tf_ops), &combiner); - CombineStepEvents(ConvertHostThreadsXLineToStepEvents( - line, use_device_step_events, *step_events), - step_events); + if (config.contains(STEP_DB)) { + CombineStepEvents(ConvertHostThreadsXLineToStepEvents( + line, use_device_step_events, *step_events), + step_events); + } }); } @@ -138,7 +141,8 @@ void PropagateXSpaceDiagnosticsToOpStats(const XSpace& space, } } -OpStats ConvertXSpaceToOpStats(const XSpace& space) { +OpStats ConvertXSpaceToOpStats(const XSpace& space, + const OpStatsConfig& config) { const XPlane* host_plane = FindPlaneWithName(space, kHostThreadsPlaneName); std::vector device_planes = FindPlanesWithPrefix(space, kGpuPlanePrefix); @@ -152,34 +156,45 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space) { std::vector reports; for (const XPlane* device_trace : device_planes) { - if (!op_stats.has_perf_env()) { - *op_stats.mutable_perf_env() = GetPerfEnvFromXPlane(*device_trace); + if (config.contains(OP_METRICS_DB)) { + if (!op_stats.has_perf_env()) { + *op_stats.mutable_perf_env() = GetPerfEnvFromXPlane(*device_trace); + } + const PerfEnv& perf_env = op_stats.perf_env(); + OpMetricsDb device_op_metrics_db = ConvertDeviceTraceXPlaneToOpMetricsDb( + *device_trace, perf_env.peak_tera_flops_per_second(), + perf_env.peak_hbm_bw_giga_bytes_per_second()); + op_metrics_db_combiner.Combine(device_op_metrics_db); + } + if (config.contains(STEP_DB)) { + CombineStepEvents(ConvertDeviceTraceXPlaneToStepEvents(*device_trace), + &step_events); + } + if (config.contains(KERNEL_STATS_DB)) { + KernelStatsDb kernel_stats_db = ConvertDeviceTraceXPlaneToKernelStatsDb( + *device_trace, /*on_kernel_fn=*/{}); + reports.insert(reports.begin(), kernel_stats_db.reports().begin(), + kernel_stats_db.reports().end()); } - const PerfEnv& perf_env = op_stats.perf_env(); - OpMetricsDb device_op_metrics_db = ConvertDeviceTraceXPlaneToOpMetricsDb( - *device_trace, perf_env.peak_tera_flops_per_second(), - perf_env.peak_hbm_bw_giga_bytes_per_second()); - op_metrics_db_combiner.Combine(device_op_metrics_db); - CombineStepEvents(ConvertDeviceTraceXPlaneToStepEvents(*device_trace), - &step_events); - KernelStatsDb kernel_stats_db = ConvertDeviceTraceXPlaneToKernelStatsDb( - *device_trace, /*on_kernel_fn=*/{}); - reports.insert(reports.begin(), kernel_stats_db.reports().begin(), - kernel_stats_db.reports().end()); } - GroupKernelReports(&reports, op_stats.mutable_kernel_stats_db()); - SortKernelsByTotalDurationDesc(op_stats.mutable_kernel_stats_db()); - // Convert a host plane. + if (config.contains(KERNEL_STATS_DB)) { + GroupKernelReports(&reports, op_stats.mutable_kernel_stats_db()); + SortKernelsByTotalDurationDesc(op_stats.mutable_kernel_stats_db()); + } bool has_device = !device_planes.empty(); - if (host_plane) { - ProcessHostPlane(host_plane, has_device, + // Convert a host plane. + if (host_plane && config.contains(OP_METRICS_DB)) { + ProcessHostPlane(host_plane, has_device, config, op_stats.mutable_host_op_metrics_db(), &step_events); } - StepEvents nonoverlapped_step_events = ToNonOverlappedStepEvents(step_events); - *op_stats.mutable_step_db() = - ConvertStepEventsToStepDb(has_device, nonoverlapped_step_events); - *op_stats.mutable_device_op_metrics_db()->mutable_precision_stats() = - ComputePrecisionStats(nonoverlapped_step_events); + if (config.contains(STEP_DB)) { + StepEvents nonoverlapped_step_events = + ToNonOverlappedStepEvents(step_events); + *op_stats.mutable_step_db() = + ConvertStepEventsToStepDb(has_device, nonoverlapped_step_events); + *op_stats.mutable_device_op_metrics_db()->mutable_precision_stats() = + ComputePrecisionStats(nonoverlapped_step_events); + } return op_stats; } diff --git a/tensorflow/core/profiler/convert/xplane_to_op_stats.h b/tensorflow/core/profiler/convert/xplane_to_op_stats.h index b69a99a2ef5..e1778006cbd 100644 --- a/tensorflow/core/profiler/convert/xplane_to_op_stats.h +++ b/tensorflow/core/profiler/convert/xplane_to_op_stats.h @@ -16,14 +16,24 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_OP_STATS_H_ #define TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_OP_STATS_H_ +#include "absl/container/flat_hash_set.h" #include "tensorflow/core/profiler/protobuf/op_stats.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" namespace tensorflow { namespace profiler { +enum OpStatsKind { + OP_METRICS_DB, + STEP_DB, + KERNEL_STATS_DB, +}; + +using OpStatsConfig = absl::flat_hash_set; + // NOTE: call GroupTfEvents before if OpStats.step_db needs to be generated. -OpStats ConvertXSpaceToOpStats(const XSpace& space); +OpStats ConvertXSpaceToOpStats(const XSpace& space, + const OpStatsConfig& config); // Propagate and dedup the diagnostics in XSpace and add to OpStats. void PropagateXSpaceDiagnosticsToOpStats(const XSpace& space, diff --git a/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc b/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc index 68bb8205f5e..beeb4a097bc 100644 --- a/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc +++ b/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc @@ -61,8 +61,8 @@ TEST(ConvertXPlaneToOpStats, PerfEnv) { *device_plane.GetOrCreateStatMetadata("compute_cap_minor"), absl::StrCat(kComputeCapMinor)); - GroupTfEvents(&space, /*event_group_name_map=*/nullptr); - OpStats op_stats = ConvertXSpaceToOpStats(space); + GroupTfEvents(&space, /*group_metadata_map=*/nullptr); + OpStats op_stats = ConvertXSpaceToOpStats(space, {OP_METRICS_DB}); const PerfEnv& perf_env = op_stats.perf_env(); EXPECT_NEAR(141, perf_env.peak_tera_flops_per_second(), kMaxError); EXPECT_NEAR(900, perf_env.peak_hbm_bw_giga_bytes_per_second(), kMaxError); @@ -76,8 +76,8 @@ TEST(ConvertXPlaneToOpStats, RunEnvironment) { XPlaneBuilder device_plane2( GetOrCreateGpuXPlane(&space, /*device_ordinal=*/1)); - GroupTfEvents(&space, /*event_group_name_map=*/nullptr); - OpStats op_stats = ConvertXSpaceToOpStats(space); + GroupTfEvents(&space, /*group_metadata_map=*/nullptr); + OpStats op_stats = ConvertXSpaceToOpStats(space, {}); const RunEnvironment& run_env = op_stats.run_environment(); EXPECT_EQ("GPU", run_env.device_type()); @@ -106,8 +106,8 @@ TEST(ConvertXPlaneToOpStats, CpuOnlyStepDbTest) { {{StatType::kStepId, kStepId}}); CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 70); - GroupTfEvents(&space, /*event_group_name_map=*/nullptr); - OpStats op_stats = ConvertXSpaceToOpStats(space); + GroupTfEvents(&space, /*group_metadata_map=*/nullptr); + OpStats op_stats = ConvertXSpaceToOpStats(space, {OP_METRICS_DB, STEP_DB}); const StepDatabaseResult& step_db = op_stats.step_db(); EXPECT_EQ(step_db.step_sequence_size(), 1); @@ -143,8 +143,8 @@ TEST(ConvertXPlaneToOpStats, GpuStepDbTest) { CreateXEvent(&device_plane_builder, &stream, "matmul", 50, 40, {{StatType::kCorrelationId, kCorrelationId}}); - GroupTfEvents(&space, /*event_group_name_map=*/nullptr); - OpStats op_stats = ConvertXSpaceToOpStats(space); + GroupTfEvents(&space, /*group_metadata_map=*/nullptr); + OpStats op_stats = ConvertXSpaceToOpStats(space, {OP_METRICS_DB, STEP_DB}); const StepDatabaseResult& step_db = op_stats.step_db(); EXPECT_EQ(step_db.step_sequence_size(), 1); @@ -161,7 +161,7 @@ TEST(ConvertXPlaneToOpStats, PropagateAndDedupErrors) { *space.add_errors() = kError; *space.add_errors() = kError; - OpStats op_stats = ConvertXSpaceToOpStats(space); + OpStats op_stats = ConvertXSpaceToOpStats(space, {}); EXPECT_EQ(1, op_stats.diagnostics().errors_size()); EXPECT_EQ(kError, op_stats.diagnostics().errors(/*index=*/0)); diff --git a/tensorflow/core/profiler/convert/xplane_to_profile_response.cc b/tensorflow/core/profiler/convert/xplane_to_profile_response.cc index 22af46c4380..d9992cb31bd 100644 --- a/tensorflow/core/profiler/convert/xplane_to_profile_response.cc +++ b/tensorflow/core/profiler/convert/xplane_to_profile_response.cc @@ -30,18 +30,15 @@ limitations under the License. #include "tensorflow/core/profiler/convert/xplane_to_op_stats.h" #include "tensorflow/core/profiler/convert/xplane_to_trace_events.h" #include "tensorflow/core/profiler/profiler_service.pb.h" -#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h" #include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h" #include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h" #include "tensorflow/core/profiler/protobuf/memory_profile.pb.h" -#include "tensorflow/core/profiler/protobuf/op_stats.pb.h" #include "tensorflow/core/profiler/protobuf/overview_page.pb.h" #include "tensorflow/core/profiler/protobuf/tf_stats.pb.h" #include "tensorflow/core/profiler/protobuf/trace_events.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/rpc/client/save_profile.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" -#include "tensorflow/core/profiler/utils/xplane_utils.h" namespace tensorflow { namespace profiler { @@ -55,12 +52,6 @@ const absl::string_view kKernelStats = "kernel_stats"; const absl::string_view kMemoryProfile = "memory_profile"; const absl::string_view kXPlane = "xplane"; -HardwareType HardwareTypeFromRunEnvironment(const RunEnvironment& run_env) { - if (run_env.device_type() == "GPU") return HardwareType::GPU; - if (run_env.device_type() == "CPU") return HardwareType::CPU_ONLY; - return HardwareType::UNKNOWN_HARDWARE; -} - template void AddToolData(absl::string_view tool_name, const Proto& tool_output, ProfileResponse* response) { @@ -69,23 +60,6 @@ void AddToolData(absl::string_view tool_name, const Proto& tool_output, tool_output.SerializeToString(tool_data->mutable_data()); } -template -Status ConvertProtoToJson(const Proto& proto_output, std::string* json_output) { - protobuf::util::JsonPrintOptions json_options; - json_options.always_print_primitive_fields = true; - auto status = protobuf::util::MessageToJsonString(proto_output, json_output, - json_options); - if (!status.ok()) { - // Convert error_msg google::protobuf::StringPiece (or absl::string_view) to - // tensorflow::StringPiece. - auto error_msg = status.message(); - return errors::Internal( - "Could not convert proto to JSON string: ", - absl::string_view(error_msg.data(), error_msg.length())); - } - return Status::OK(); -} - // Returns the tool name with extension. std::string ToolName(absl::string_view tool) { if (tool == kTraceViewer) return "trace.json.gz"; @@ -115,21 +89,18 @@ Status ConvertXSpaceToProfileResponse(const XSpace& xspace, // Trace viewer is the only tool, skip OpStats conversion. if (tools.size() == 1) return Status::OK(); } - OpStats op_stats = ConvertXSpaceToOpStats(xspace); - HardwareType hw_type = - HardwareTypeFromRunEnvironment(op_stats.run_environment()); + OpStats op_stats = + ConvertXSpaceToOpStats(xspace, {OP_METRICS_DB, STEP_DB, KERNEL_STATS_DB}); if (tools.contains(kOverviewPage)) { - OverviewPage overview_page_db = - ConvertOpStatsToOverviewPage(op_stats, hw_type); + OverviewPage overview_page_db = ConvertOpStatsToOverviewPage(op_stats); AddToolData(ToolName(kOverviewPage), overview_page_db, response); if (tools.contains(kInputPipeline)) { AddToolData(ToolName(kInputPipeline), overview_page_db.input_analysis(), response); } } else if (tools.contains(kInputPipeline)) { - InputPipelineAnalysisResult input_pipeline_analysis = - ConvertOpStatsToInputPipelineAnalysis(op_stats, hw_type); - AddToolData(ToolName(kInputPipeline), input_pipeline_analysis, response); + AddToolData(ToolName(kInputPipeline), + ConvertOpStatsToInputPipelineAnalysis(op_stats), response); } if (tools.contains(kTensorflowStats)) { TfStatsDatabase tf_stats_db = ConvertOpStatsToTfStats(op_stats); @@ -139,15 +110,11 @@ Status ConvertXSpaceToProfileResponse(const XSpace& xspace, AddToolData(ToolName(kKernelStats), op_stats.kernel_stats_db(), response); } if (tools.contains(kMemoryProfile)) { - if (const XPlane* host_plane = - FindPlaneWithName(xspace, kHostThreadsPlaneName)) { - MemoryProfile memory_profile = ConvertXPlaneToMemoryProfile(*host_plane); - std::string json_output; - TF_RETURN_IF_ERROR(ConvertProtoToJson(memory_profile, &json_output)); - TF_RETURN_IF_ERROR(SaveGzippedToolDataToTensorboardProfile( - req.repository_root(), req.session_id(), req.host_name(), - ToolName(kMemoryProfile), json_output)); - } + std::string json_output; + TF_RETURN_IF_ERROR(ConvertXSpaceToMemoryProfileJson(xspace, &json_output)); + TF_RETURN_IF_ERROR(SaveGzippedToolDataToTensorboardProfile( + req.repository_root(), req.session_id(), req.host_name(), + ToolName(kMemoryProfile), json_output)); } return Status::OK(); } diff --git a/tensorflow/core/profiler/utils/BUILD b/tensorflow/core/profiler/utils/BUILD index 0262c5659b7..d7046f1c214 100644 --- a/tensorflow/core/profiler/utils/BUILD +++ b/tensorflow/core/profiler/utils/BUILD @@ -47,6 +47,7 @@ cc_library( deps = [ "//tensorflow/core:lib", "//tensorflow/core/profiler/protobuf:hardware_types_proto_cc", + "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/core/profiler/utils/hardware_type_utils.cc b/tensorflow/core/profiler/utils/hardware_type_utils.cc index e2a4004555b..69b5d4796a3 100644 --- a/tensorflow/core/profiler/utils/hardware_type_utils.cc +++ b/tensorflow/core/profiler/utils/hardware_type_utils.cc @@ -74,5 +74,13 @@ double GetFlopMaxThroughputPerSM(const DeviceCapabilities& device_cap) { device_cap.clock_rate_in_ghz(); } +HardwareType ParseHardwareType(absl::string_view device_type) { + if (device_type == "GPU" || device_type == "Nvidia GPU") + return HardwareType::GPU; + if (device_type == "CPU") return HardwareType::CPU_ONLY; + if (device_type == "TPU") return HardwareType::TPU; + return HardwareType::UNKNOWN_HARDWARE; +} + } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/utils/hardware_type_utils.h b/tensorflow/core/profiler/utils/hardware_type_utils.h index 9d4b8b73eaf..70090fb766f 100644 --- a/tensorflow/core/profiler/utils/hardware_type_utils.h +++ b/tensorflow/core/profiler/utils/hardware_type_utils.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_UTILS_HARDWARE_TYPE_UTILS_H_ #define TENSORFLOW_CORE_PROFILER_UTILS_HARDWARE_TYPE_UTILS_H_ +#include "absl/strings/string_view.h" #include "tensorflow/core/profiler/protobuf/hardware_types.pb.h" namespace tensorflow { @@ -25,6 +26,8 @@ namespace profiler { // streaming multiprocessor. double GetFlopMaxThroughputPerSM(const DeviceCapabilities& device_cap); +HardwareType ParseHardwareType(absl::string_view device_type); + } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/python/profiler/internal/BUILD b/tensorflow/python/profiler/internal/BUILD index 6f7193b3207..221246e3c18 100644 --- a/tensorflow/python/profiler/internal/BUILD +++ b/tensorflow/python/profiler/internal/BUILD @@ -119,6 +119,8 @@ tf_python_pybind_extension( deps = [ "//tensorflow/core:lib", "//tensorflow/core/profiler:profiler_service_proto_cc", + "//tensorflow/core/profiler/convert:op_stats_to_tf_stats", + "//tensorflow/core/profiler/convert:xplane_to_op_stats", "//tensorflow/core/profiler/convert:xplane_to_profile_response", "//tensorflow/core/profiler/convert:xplane_to_trace_events", "//tensorflow/core/profiler/lib:profiler_session_headers", diff --git a/tensorflow/python/profiler/internal/profiler_wrapper.cc b/tensorflow/python/profiler/internal/profiler_wrapper.cc index 63300f2a1ec..16e986ac2e6 100644 --- a/tensorflow/python/profiler/internal/profiler_wrapper.cc +++ b/tensorflow/python/profiler/internal/profiler_wrapper.cc @@ -20,9 +20,16 @@ limitations under the License. #include "pybind11/pytypes.h" #include "tensorflow/core/platform/host_info.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h" +#include "tensorflow/core/profiler/convert/op_stats_to_overview_page.h" +#include "tensorflow/core/profiler/convert/op_stats_to_tf_stats.h" +#include "tensorflow/core/profiler/convert/xplane_to_memory_profile.h" +#include "tensorflow/core/profiler/convert/xplane_to_op_stats.h" #include "tensorflow/core/profiler/convert/xplane_to_profile_response.h" #include "tensorflow/core/profiler/convert/xplane_to_trace_events.h" #include "tensorflow/core/profiler/lib/profiler_session.h" +#include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h" +#include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h" #include "tensorflow/core/profiler/rpc/client/capture_profile.h" #include "tensorflow/core/profiler/rpc/client/save_profile.h" #include "tensorflow/core/profiler/rpc/profiler_server.h" @@ -32,6 +39,10 @@ namespace py = ::pybind11; namespace { +using ::tensorflow::profiler::KERNEL_STATS_DB; +using ::tensorflow::profiler::OP_METRICS_DB; +using ::tensorflow::profiler::STEP_DB; + tensorflow::ProfileRequest MakeProfileRequest( const tensorflow::string& logdir, const tensorflow::string& session_id, const tensorflow::string& host) { @@ -162,4 +173,59 @@ PYBIND11_MODULE(_pywrap_profiler, m) { tensorflow::MaybeRaiseRegisteredFromStatus(status); return content; }); + + m.def("xspace_to_trace_events", [](const py::bytes& serialized_xspace_proto) { + tensorflow::string content; + tensorflow::profiler::XSpace xspace; + xspace.ParseFromString(serialized_xspace_proto); + tensorflow::profiler::ConvertXSpaceToTraceEventsString(xspace, &content); + return py::bytes(content); + }); + + m.def("xspace_to_overview_page", + [](const py::bytes& serialized_xspace_proto) { + tensorflow::profiler::XSpace xspace; + xspace.ParseFromString(serialized_xspace_proto); + tensorflow::profiler::OverviewPage overview_page = + tensorflow::profiler::ConvertOpStatsToOverviewPage( + ConvertXSpaceToOpStats(xspace, {OP_METRICS_DB, STEP_DB})); + return py::bytes(overview_page.SerializeAsString()); + }); + + m.def("xspace_to_input_pipeline", + [](const py::bytes& serialized_xspace_proto) { + tensorflow::profiler::XSpace xspace; + xspace.ParseFromString(serialized_xspace_proto); + tensorflow::profiler::InputPipelineAnalysisResult input_pipeline = + tensorflow::profiler::ConvertOpStatsToInputPipelineAnalysis( + ConvertXSpaceToOpStats(xspace, {OP_METRICS_DB, STEP_DB})); + return py::bytes(input_pipeline.SerializeAsString()); + }); + + m.def("xspace_to_tf_stats", [](const py::bytes& serialized_xspace_proto) { + tensorflow::profiler::XSpace xspace; + xspace.ParseFromString(serialized_xspace_proto); + tensorflow::profiler::TfStatsDatabase tf_stats_db = + tensorflow::profiler::ConvertOpStatsToTfStats( + ConvertXSpaceToOpStats(xspace, {OP_METRICS_DB})); + return py::bytes(tf_stats_db.SerializeAsString()); + }); + + m.def("xspace_to_kernel_stats", [](const py::bytes& serialized_xspace_proto) { + tensorflow::profiler::XSpace xspace; + xspace.ParseFromString(serialized_xspace_proto); + tensorflow::profiler::OpStats op_stats = + ConvertXSpaceToOpStats(xspace, {KERNEL_STATS_DB}); + return py::bytes(op_stats.kernel_stats_db().SerializeAsString()); + }); + + m.def("xspace_to_memory_profile", + [](const py::bytes& serialized_xspace_proto) { + tensorflow::profiler::XSpace xspace; + xspace.ParseFromString(serialized_xspace_proto); + std::string json_output; + tensorflow::profiler::ConvertXSpaceToMemoryProfileJson(xspace, + &json_output); + return py::bytes(json_output); + }); }; From a44821de91b5ac3a082b2253ec8e695a69ccf2be Mon Sep 17 00:00:00 2001 From: Gaurav Jain Date: Fri, 17 Jul 2020 15:27:48 -0700 Subject: [PATCH 0749/2522] Fix GlobalStepTests to specify the collection The name is meaningless in v2, even in V1 users should have been specifying the collections parameter for the variable. PiperOrigin-RevId: 321867276 Change-Id: I899ee8779c780be2bcc26d997ca5d3edc5eddbe6 --- tensorflow/python/training/training_util_test.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/training/training_util_test.py b/tensorflow/python/training/training_util_test.py index 5049d6e00a0..cf5942287a1 100644 --- a/tensorflow/python/training/training_util_test.py +++ b/tensorflow/python/training/training_util_test.py @@ -20,14 +20,12 @@ from __future__ import print_function from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import test_util from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.training import monitored_session from tensorflow.python.training import training_util -@test_util.run_v1_only('b/120545219') class GlobalStepTest(test.TestCase): def _assert_global_step(self, global_step, expected_dtype=dtypes.int64): @@ -38,11 +36,12 @@ class GlobalStepTest(test.TestCase): def test_invalid_dtype(self): with ops.Graph().as_default() as g: self.assertIsNone(training_util.get_global_step()) - variables.Variable( + variables.VariableV1( 0.0, trainable=False, dtype=dtypes.float32, - name=ops.GraphKeys.GLOBAL_STEP) + name=ops.GraphKeys.GLOBAL_STEP, + collections=[ops.GraphKeys.GLOBAL_STEP]) self.assertRaisesRegex(TypeError, 'does not have integer type', training_util.get_global_step) self.assertRaisesRegex(TypeError, 'does not have integer type', @@ -55,7 +54,8 @@ class GlobalStepTest(test.TestCase): [0], trainable=False, dtype=dtypes.int32, - name=ops.GraphKeys.GLOBAL_STEP) + name=ops.GraphKeys.GLOBAL_STEP, + collections=[ops.GraphKeys.GLOBAL_STEP]) self.assertRaisesRegex(TypeError, 'not scalar', training_util.get_global_step) self.assertRaisesRegex(TypeError, 'not scalar', @@ -79,7 +79,8 @@ class GlobalStepTest(test.TestCase): 0, trainable=False, dtype=dtypes.int32, - name=ops.GraphKeys.GLOBAL_STEP) + name=ops.GraphKeys.GLOBAL_STEP, + collections=[ops.GraphKeys.GLOBAL_STEP]) self._assert_global_step( training_util.get_global_step(), expected_dtype=dtypes.int32) self._assert_global_step( @@ -92,7 +93,6 @@ class GlobalStepTest(test.TestCase): self._assert_global_step(training_util.get_or_create_global_step(g)) -@test_util.run_v1_only('b/120545219') class GlobalStepReadTest(test.TestCase): def test_global_step_read_is_none_if_there_is_no_global_step(self): From 0086d126749e4affacd8d95f485f37757635e214 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Fri, 17 Jul 2020 15:53:19 -0700 Subject: [PATCH 0750/2522] Adding Transaction API and signatures in commented form to Filesystem as preparation for final modification --- tensorflow/core/platform/file_system.cc | 21 +++++-- tensorflow/core/platform/file_system.h | 73 +++++++++++++++++++------ 2 files changed, 70 insertions(+), 24 deletions(-) diff --git a/tensorflow/core/platform/file_system.cc b/tensorflow/core/platform/file_system.cc index c9657e2339f..84257429de4 100644 --- a/tensorflow/core/platform/file_system.cc +++ b/tensorflow/core/platform/file_system.cc @@ -70,7 +70,7 @@ string FileSystem::TranslateName(const string& name) const { return this->CleanPath(path); } -Status FileSystem::IsDirectory(const string& name) { +Status FileSystem::IsDirectory(const string& name/*, TransactionToken *token */) { // Check if path exists. TF_RETURN_IF_ERROR(FileExists(name)); FileStatistics stat; @@ -86,10 +86,10 @@ Status FileSystem::HasAtomicMove(const string& path, bool* has_atomic_move) { return Status::OK(); } -void FileSystem::FlushCaches() {} +void FileSystem::FlushCaches(/* TransactionToken *token */) {} bool FileSystem::FilesExist(const std::vector& files, - std::vector* status) { + std::vector* status/*, TransactionToken *token */) { bool result = true; for (const auto& file : files) { Status s = FileExists(file); @@ -106,7 +106,7 @@ bool FileSystem::FilesExist(const std::vector& files, Status FileSystem::DeleteRecursively(const string& dirname, int64* undeleted_files, - int64* undeleted_dirs) { + int64* undeleted_dirs/*, TransactionToken *token */) { CHECK_NOTNULL(undeleted_files); CHECK_NOTNULL(undeleted_dirs); @@ -176,7 +176,7 @@ Status FileSystem::DeleteRecursively(const string& dirname, return ret; } -Status FileSystem::RecursivelyCreateDir(const string& dirname) { +Status FileSystem::RecursivelyCreateDir(const string& dirname/*, TransactionToken *token */) { StringPiece scheme, host, remaining_dir; this->ParseURI(dirname, &scheme, &host, &remaining_dir); std::vector sub_dirs; @@ -221,7 +221,7 @@ Status FileSystem::RecursivelyCreateDir(const string& dirname) { return Status::OK(); } -Status FileSystem::CopyFile(const string& src, const string& target) { +Status FileSystem::CopyFile(const string& src, const string& target/*, TransactionToken *token */) { return FileSystemCopyFile(this, src, this, target); } @@ -436,4 +436,13 @@ string FileSystem::CreateURI(StringPiece scheme, StringPiece host, return strings::StrCat(scheme, "://", host, path); } +string FileSystem::DecodeTransaction(const TransactionToken* token){ + if(token){ + std::stringstream oss; + oss<<"Token= "<token<<", Owner="<owner; + return oss.str(); + } + return "No Transaction"; +} + } // namespace tensorflow diff --git a/tensorflow/core/platform/file_system.h b/tensorflow/core/platform/file_system.h index 5f89ecd06df..954b6030560 100644 --- a/tensorflow/core/platform/file_system.h +++ b/tensorflow/core/platform/file_system.h @@ -68,7 +68,7 @@ class FileSystem { /// The ownership of the returned RandomAccessFile is passed to the caller /// and the object should be deleted when is not used. virtual tensorflow::Status NewRandomAccessFile( - const string& fname, std::unique_ptr* result) = 0; + const string& fname, std::unique_ptr* result/*, TransactionToken* token = nullptr */) = 0; /// \brief Creates an object that writes to a new file with the specified /// name. @@ -83,7 +83,7 @@ class FileSystem { /// The ownership of the returned WritableFile is passed to the caller /// and the object should be deleted when is not used. virtual tensorflow::Status NewWritableFile( - const string& fname, std::unique_ptr* result) = 0; + const string& fname, std::unique_ptr* result/*, TransactionToken* token = nullptr */) = 0; /// \brief Creates an object that either appends to an existing file, or /// writes to a new file (if the file does not exist to begin with). @@ -97,7 +97,7 @@ class FileSystem { /// The ownership of the returned WritableFile is passed to the caller /// and the object should be deleted when is not used. virtual tensorflow::Status NewAppendableFile( - const string& fname, std::unique_ptr* result) = 0; + const string& fname, std::unique_ptr* result/*, TransactionToken* token = nullptr */) = 0; /// \brief Creates a readonly region of memory with the file context. /// @@ -110,7 +110,7 @@ class FileSystem { /// The ownership of the returned ReadOnlyMemoryRegion is passed to the caller /// and the object should be deleted when is not used. virtual tensorflow::Status NewReadOnlyMemoryRegionFromFile( - const string& fname, std::unique_ptr* result) = 0; + const string& fname, std::unique_ptr* result/*, TransactionToken* token = nullptr */) = 0; /// Returns OK if the named path exists and NOT_FOUND otherwise. virtual tensorflow::Status FileExists(const string& fname) = 0; @@ -119,13 +119,13 @@ class FileSystem { /// if status is not null, populate the vector with a detailed status /// for each file. virtual bool FilesExist(const std::vector& files, - std::vector* status); + std::vector* status/*, TransactionToken* token = nullptr */); /// \brief Returns the immediate children in the given directory. /// /// The returned paths are relative to 'dir'. virtual tensorflow::Status GetChildren(const string& dir, - std::vector* result) = 0; + std::vector* result/*, TransactionToken* token = nullptr */) = 0; /// \brief Given a pattern, stores in *results the set of paths that matches /// that pattern. *results is cleared. @@ -150,7 +150,7 @@ class FileSystem { /// * UNIMPLEMENTED - Some underlying functions (like GetChildren) are not /// implemented virtual tensorflow::Status GetMatchingPaths(const string& pattern, - std::vector* results) = 0; + std::vector* results/*, TransactionToken* token = nullptr */) = 0; /// \brief Checks if the given filename matches the pattern. /// @@ -161,17 +161,17 @@ class FileSystem { /// \brief Obtains statistics for the given path. virtual tensorflow::Status Stat(const string& fname, - FileStatistics* stat) = 0; + FileStatistics* stat/*, TransactionToken* token = nullptr */) = 0; /// \brief Deletes the named file. - virtual tensorflow::Status DeleteFile(const string& fname) = 0; + virtual tensorflow::Status DeleteFile(const string& fname/*, TransactionToken* token = nullptr */) = 0; /// \brief Creates the specified directory. /// Typical return codes: /// * OK - successfully created the directory. /// * ALREADY_EXISTS - directory with name dirname already exists. /// * PERMISSION_DENIED - dirname is not writable. - virtual tensorflow::Status CreateDir(const string& dirname) = 0; + virtual tensorflow::Status CreateDir(const string& dirname/*, TransactionToken* token = nullptr */) = 0; /// \brief Creates the specified directory and all the necessary /// subdirectories. @@ -179,10 +179,10 @@ class FileSystem { /// * OK - successfully created the directory and sub directories, even if /// they were already created. /// * PERMISSION_DENIED - dirname or some subdirectory is not writable. - virtual tensorflow::Status RecursivelyCreateDir(const string& dirname); + virtual tensorflow::Status RecursivelyCreateDir(const string& dirname/*, TransactionToken* token = nullptr */); /// \brief Deletes the specified directory. - virtual tensorflow::Status DeleteDir(const string& dirname) = 0; + virtual tensorflow::Status DeleteDir(const string& dirname/*, TransactionToken* token = nullptr */) = 0; /// \brief Deletes the specified directory and all subdirectories and files /// underneath it. This is accomplished by traversing the directory tree @@ -210,18 +210,18 @@ class FileSystem { /// implemented virtual tensorflow::Status DeleteRecursively(const string& dirname, int64* undeleted_files, - int64* undeleted_dirs); + int64* undeleted_dirs/*, TransactionToken* token = nullptr */); /// \brief Stores the size of `fname` in `*file_size`. virtual tensorflow::Status GetFileSize(const string& fname, - uint64* file_size) = 0; + uint64* file_size/*, TransactionToken* token = nullptr */) = 0; /// \brief Overwrites the target if it exists. virtual tensorflow::Status RenameFile(const string& src, - const string& target) = 0; + const string& target/*, TransactionToken* token = nullptr */) = 0; /// \brief Copy the src to target. - virtual tensorflow::Status CopyFile(const string& src, const string& target); + virtual tensorflow::Status CopyFile(const string& src, const string& target/*, TransactionToken* token = nullptr */); /// \brief Translate an URI to a filename for the FileSystem implementation. /// @@ -241,7 +241,7 @@ class FileSystem { /// * NOT_FOUND - The path entry does not exist. /// * PERMISSION_DENIED - Insufficient permissions. /// * UNIMPLEMENTED - The file factory doesn't support directories. - virtual tensorflow::Status IsDirectory(const string& fname); + virtual tensorflow::Status IsDirectory(const string& fname/*, TransactionToken* token = nullptr */); /// \brief Returns whether the given path is on a file system /// that has atomic move capabilities. This can be used @@ -256,7 +256,7 @@ class FileSystem { virtual Status HasAtomicMove(const string& path, bool* has_atomic_move); /// \brief Flushes any cached filesystem objects from memory. - virtual void FlushCaches(); + virtual void FlushCaches(/* TransactionToken* token = nullptr */); /// \brief The separator this filesystem uses. /// @@ -346,6 +346,43 @@ class FileSystem { void ParseURI(StringPiece remaining, StringPiece* scheme, StringPiece* host, StringPiece* path) const; + // Transaction related API + + /// \brief Starts a new transaction + virtual tensorflow::Status StartTransaction(TransactionToken** token) { + token = nullptr; + return Status::OK(); + }; + + /// \brief Adds `path` to transaction in `token` + virtual tensorflow::Status AddToTransaction(const string& path, + TransactionToken* token) { + return Status::OK(); + }; + + /// \brief Ends transaction + virtual tensorflow::Status EndTransaction(TransactionToken* token) { + return Status::OK(); + }; + + /// \brief Get token for `path` or start a new transaction and add `path` to + /// it. + virtual tensorflow::Status GetTokenOrStartTransaction( + const string& path, TransactionToken** token) { + token = nullptr; + return Status::OK(); + }; + + /// \brief Return transaction for `path` or nullptr in `token` + virtual tensorflow::Status GetTransactionForPath(const string& path, + TransactionToken** token) { + return Status::OK(); + token = nullptr; + }; + + /// \brief Decode transaction to human readable string. + virtual string DecodeTransaction(const TransactionToken* token); + FileSystem() {} virtual ~FileSystem() = default; From e60cae601ae36e1b2b4d8d627a9a9eba48e577c5 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Fri, 17 Jul 2020 15:52:39 -0700 Subject: [PATCH 0751/2522] Remove references to private TF LazyLoader class by reimplementing it in Keras generic_utils. PiperOrigin-RevId: 321871471 Change-Id: I58a0e236304cdd3fa415c8ce89d646bb09ef3039 --- tensorflow/python/keras/saving/hdf5_format.py | 2 +- .../python/keras/saving/saved_model/load.py | 2 +- .../python/keras/saving/saved_model/save.py | 2 +- .../keras/saving/saved_model/save_impl.py | 2 +- .../saved_model/serialized_attributes.py | 2 +- .../python/keras/saving/saved_model/utils.py | 2 +- .../keras/saving/saved_model_experimental.py | 2 +- .../python/keras/utils/generic_utils.py | 27 ++++++++++++++++++- .../python/keras/utils/version_utils.py | 14 +++++----- 9 files changed, 40 insertions(+), 15 deletions(-) diff --git a/tensorflow/python/keras/saving/hdf5_format.py b/tensorflow/python/keras/saving/hdf5_format.py index 3aa4fe1245a..7f6dac0cbc0 100644 --- a/tensorflow/python/keras/saving/hdf5_format.py +++ b/tensorflow/python/keras/saving/hdf5_format.py @@ -31,11 +31,11 @@ from tensorflow.python.keras.saving import model_config as model_config_lib from tensorflow.python.keras.saving import saving_utils from tensorflow.python.keras.saving.saved_model import json_utils from tensorflow.python.keras.utils import conv_utils +from tensorflow.python.keras.utils.generic_utils import LazyLoader from tensorflow.python.keras.utils.io_utils import ask_to_proceed_with_overwrite from tensorflow.python.ops import variables as variables_module from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import serialization -from tensorflow.python.util.lazy_loader import LazyLoader # pylint: disable=g-import-not-at-top try: diff --git a/tensorflow/python/keras/saving/saved_model/load.py b/tensorflow/python/keras/saving/saved_model/load.py index 0b55e30c27b..c0160609ef4 100644 --- a/tensorflow/python/keras/saving/saved_model/load.py +++ b/tensorflow/python/keras/saving/saved_model/load.py @@ -34,6 +34,7 @@ from tensorflow.python.keras.saving.saved_model import utils from tensorflow.python.keras.saving.saved_model.serialized_attributes import CommonEndpoints from tensorflow.python.keras.utils import generic_utils from tensorflow.python.keras.utils import metrics_utils +from tensorflow.python.keras.utils.generic_utils import LazyLoader from tensorflow.python.platform import tf_logging as logging from tensorflow.python.saved_model import load as tf_load from tensorflow.python.saved_model import nested_structure_coder @@ -43,7 +44,6 @@ from tensorflow.python.training.tracking.tracking import delete_tracking from tensorflow.python.util import compat from tensorflow.python.util import nest from tensorflow.python.util import object_identity -from tensorflow.python.util.lazy_loader import LazyLoader # To avoid circular dependencies between keras/engine and keras/saving, # code in keras/saving must delay imports. diff --git a/tensorflow/python/keras/saving/saved_model/save.py b/tensorflow/python/keras/saving/saved_model/save.py index 7d6bc120758..a40856cbf54 100644 --- a/tensorflow/python/keras/saving/saved_model/save.py +++ b/tensorflow/python/keras/saving/saved_model/save.py @@ -22,9 +22,9 @@ from tensorflow.python.distribute import distribution_strategy_context from tensorflow.python.keras import backend as K from tensorflow.python.keras.saving import saving_utils from tensorflow.python.keras.saving.saved_model import save_impl +from tensorflow.python.keras.utils.generic_utils import LazyLoader from tensorflow.python.keras.utils.io_utils import ask_to_proceed_with_overwrite from tensorflow.python.saved_model import save as save_lib -from tensorflow.python.util.lazy_loader import LazyLoader # To avoid circular dependencies between keras/engine and keras/saving, # code in keras/saving must delay imports. diff --git a/tensorflow/python/keras/saving/saved_model/save_impl.py b/tensorflow/python/keras/saving/saved_model/save_impl.py index c2e4f96e127..a2c4d58d18e 100644 --- a/tensorflow/python/keras/saving/saved_model/save_impl.py +++ b/tensorflow/python/keras/saving/saved_model/save_impl.py @@ -38,13 +38,13 @@ from tensorflow.python.keras.saving.saved_model import load as keras_load from tensorflow.python.keras.saving.saved_model import serialized_attributes from tensorflow.python.keras.saving.saved_model import utils from tensorflow.python.keras.utils import version_utils +from tensorflow.python.keras.utils.generic_utils import LazyLoader from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training.tracking import base as trackable from tensorflow.python.training.tracking import data_structures from tensorflow.python.util import nest from tensorflow.python.util import tf_decorator from tensorflow.python.util import tf_inspect -from tensorflow.python.util.lazy_loader import LazyLoader # To avoid circular dependencies between keras/engine and keras/saving, # code in keras/saving must delay imports. diff --git a/tensorflow/python/keras/saving/saved_model/serialized_attributes.py b/tensorflow/python/keras/saving/saved_model/serialized_attributes.py index 3aff9265a13..ac17cc50225 100644 --- a/tensorflow/python/keras/saving/saved_model/serialized_attributes.py +++ b/tensorflow/python/keras/saving/saved_model/serialized_attributes.py @@ -21,9 +21,9 @@ from __future__ import print_function from tensorflow.python.eager import def_function from tensorflow.python.eager import function as defun from tensorflow.python.keras.saving.saved_model import constants +from tensorflow.python.keras.utils.generic_utils import LazyLoader from tensorflow.python.training.tracking import base as trackable from tensorflow.python.training.tracking.tracking import AutoTrackable -from tensorflow.python.util.lazy_loader import LazyLoader # TODO(b/134426265): Switch back to single-quotes to match the rest of the file # once the issue with copybara is fixed. diff --git a/tensorflow/python/keras/saving/saved_model/utils.py b/tensorflow/python/keras/saving/saved_model/utils.py index bd3f0c1b626..9c1926b11ab 100644 --- a/tensorflow/python/keras/saving/saved_model/utils.py +++ b/tensorflow/python/keras/saving/saved_model/utils.py @@ -24,10 +24,10 @@ from tensorflow.python.eager import context from tensorflow.python.keras import backend as K from tensorflow.python.keras.engine import base_layer_utils from tensorflow.python.keras.utils import tf_utils +from tensorflow.python.keras.utils.generic_utils import LazyLoader from tensorflow.python.training.tracking import layer_utils as trackable_layer_utils from tensorflow.python.util import tf_decorator from tensorflow.python.util import tf_inspect -from tensorflow.python.util.lazy_loader import LazyLoader # pylint:disable=g-inconsistent-quotes diff --git a/tensorflow/python/keras/saving/saved_model_experimental.py b/tensorflow/python/keras/saving/saved_model_experimental.py index 0c6714b8340..25628cd1ba3 100644 --- a/tensorflow/python/keras/saving/saved_model_experimental.py +++ b/tensorflow/python/keras/saving/saved_model_experimental.py @@ -29,6 +29,7 @@ from tensorflow.python.keras.optimizer_v2 import optimizer_v2 from tensorflow.python.keras.saving import model_config from tensorflow.python.keras.saving import saving_utils from tensorflow.python.keras.utils import mode_keys +from tensorflow.python.keras.utils.generic_utils import LazyLoader from tensorflow.python.lib.io import file_io from tensorflow.python.ops import variables from tensorflow.python.platform import tf_logging as logging @@ -42,7 +43,6 @@ from tensorflow.python.training.tracking import graph_view from tensorflow.python.util import compat from tensorflow.python.util import deprecation from tensorflow.python.util import nest -from tensorflow.python.util.lazy_loader import LazyLoader from tensorflow.python.util.tf_export import keras_export # To avoid circular dependencies between keras/engine and keras/saving, diff --git a/tensorflow/python/keras/utils/generic_utils.py b/tensorflow/python/keras/utils/generic_utils.py index f26e6a61947..e33a24b93dd 100644 --- a/tensorflow/python/keras/utils/generic_utils.py +++ b/tensorflow/python/keras/utils/generic_utils.py @@ -19,6 +19,7 @@ from __future__ import print_function import binascii import codecs +import importlib import marshal import os import re @@ -801,7 +802,31 @@ def populate_dict_with_module_objects(target_dict, modules, obj_filter): if obj_filter(obj): target_dict[name] = obj + +class LazyLoader(python_types.ModuleType): + """Lazily import a module, mainly to avoid pulling in large dependencies.""" + + def __init__(self, local_name, parent_module_globals, name): + self._local_name = local_name + self._parent_module_globals = parent_module_globals + super(LazyLoader, self).__init__(name) + + def _load(self): + """Load the module and insert it into the parent's globals.""" + # Import the target module and insert it into the parent's namespace + module = importlib.import_module(self.__name__) + self._parent_module_globals[self._local_name] = module + # Update this object's dict so that if someone keeps a reference to the + # LazyLoader, lookups are efficient (__getattr__ is only called on lookups + # that fail). + self.__dict__.update(module.__dict__) + return module + + def __getattr__(self, item): + module = self._load() + return getattr(module, item) + + # Aliases - custom_object_scope = CustomObjectScope # pylint: disable=invalid-name diff --git a/tensorflow/python/keras/utils/version_utils.py b/tensorflow/python/keras/utils/version_utils.py index d3796dcbf92..4c300d0e459 100644 --- a/tensorflow/python/keras/utils/version_utils.py +++ b/tensorflow/python/keras/utils/version_utils.py @@ -20,27 +20,27 @@ from __future__ import print_function from tensorflow.python.eager import context from tensorflow.python.framework import ops -from tensorflow.python.util import lazy_loader +from tensorflow.python.keras.utils.generic_utils import LazyLoader # TODO(b/134426265): Switch back to single-quotes once the issue # with copybara is fixed. # pylint: disable=g-inconsistent-quotes -training = lazy_loader.LazyLoader( +training = LazyLoader( "training", globals(), "tensorflow.python.keras.engine.training") -training_v1 = lazy_loader.LazyLoader( +training_v1 = LazyLoader( "training_v1", globals(), "tensorflow.python.keras.engine.training_v1") -base_layer = lazy_loader.LazyLoader( +base_layer = LazyLoader( "base_layer", globals(), "tensorflow.python.keras.engine.base_layer") -base_layer_v1 = lazy_loader.LazyLoader( +base_layer_v1 = LazyLoader( "base_layer_v1", globals(), "tensorflow.python.keras.engine.base_layer_v1") -callbacks = lazy_loader.LazyLoader( +callbacks = LazyLoader( "callbacks", globals(), "tensorflow.python.keras.callbacks") -callbacks_v1 = lazy_loader.LazyLoader( +callbacks_v1 = LazyLoader( "callbacks_v1", globals(), "tensorflow.python.keras.callbacks_v1") From c2e4affcb5097ab14ec06bcbf019089cbf63ca2a Mon Sep 17 00:00:00 2001 From: Pankaj Kanwar Date: Fri, 17 Jul 2020 16:01:02 -0700 Subject: [PATCH 0752/2522] Fix to point to the latest gcc & g++ version. PiperOrigin-RevId: 321872911 Change-Id: I0a2a2c3eeb86988c5642e3c8c30733fc834bf87a --- tensorflow/tools/ci_build/horovod/gpu/nightly.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/tools/ci_build/horovod/gpu/nightly.sh b/tensorflow/tools/ci_build/horovod/gpu/nightly.sh index ed701e6a9f7..060193614c3 100644 --- a/tensorflow/tools/ci_build/horovod/gpu/nightly.sh +++ b/tensorflow/tools/ci_build/horovod/gpu/nightly.sh @@ -57,6 +57,10 @@ sudo make all install export LD_LIBRARY_PATH=/usr/local/lib/openmpi sudo ldconfig +sudo update-alternatives --install /usr/bin/gcc gcc /dt7/usr/bin/gcc 60 --slave /usr/bin/g++ g++ /dt7/usr/bin/g++ + +g++ --version + # Install Horovod. cd .. HOROVOD_WITH_TENSORFLOW=1 From 268838c2302738e42355387ee1786e03c5dd3596 Mon Sep 17 00:00:00 2001 From: Gaurav Jain Date: Fri, 17 Jul 2020 16:07:56 -0700 Subject: [PATCH 0753/2522] Use standard type conversion before trying default When converting a non-tensor argument to an op we were using the default dtype for conversion purposes. This leads to inconsistent behavior with the type that is inferred during regular tensor conversion. For example, python integers are either treated as int32 or int64 depending on their size when converting with tf.convert_to_tensor. Instead, we should try to use standard tensor conversion with the expectation of getting the correct type for the argument. There are cases where this doesn't work such as with empty tensors. In that case we should fallback to use the default_dtype to ensure we have a valid dtype. PiperOrigin-RevId: 321874236 Change-Id: If3c6abb7eb15e1743fc5aec4a71c51ff964ef527 --- RELEASE.md | 3 ++ tensorflow/python/eager/execute.py | 27 +++++++++--- tensorflow/python/eager/ops_test.py | 35 ++++++++++++--- tensorflow/python/framework/constant_op.py | 2 +- tensorflow/python/framework/op_def_library.py | 43 ++++++++++++++++--- tensorflow/python/framework/python_op_gen.cc | 10 +++++ .../python/kernel_tests/reshape_op_test.py | 19 ++++++++ 7 files changed, 120 insertions(+), 19 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 33b3bb16041..74c4adfc97c 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -35,6 +35,9 @@ * `tf.types.experimental.TensorLike` is a new `Union` type that can be used as type annotation for variables representing a Tensor or a value that can be converted to Tensor by `tf.convert_to_tensor`. + * Calling ops with a python constants or numpy values is now consistent with + tf.convert_to_tensor behavior. This avoids operations like tf.reshape + truncating inputs such as from int64 to int32. * `tf.data`: * Added optional `exclude_cols` parameter to CsvDataset. This parameter is the complement of `select_cols`; at most one of these should be specified. diff --git a/tensorflow/python/eager/execute.py b/tensorflow/python/eager/execute.py index e206262309e..32808071e63 100644 --- a/tensorflow/python/eager/execute.py +++ b/tensorflow/python/eager/execute.py @@ -233,7 +233,7 @@ def make_tensor(v, arg_name): (repr(v), arg_name)) -def args_to_matching_eager(l, ctx, default_dtype=None): +def args_to_matching_eager(l, ctx, allowed_dtypes, default_dtype=None): """Convert sequence `l` to eager same-type Tensors.""" if (not l) and (default_dtype is not None): return default_dtype, [] # List is empty; assume default dtype. @@ -243,8 +243,6 @@ def args_to_matching_eager(l, ctx, default_dtype=None): break else: # note: intentional for-else return l[0]._datatype_enum(), l # pylint: disable=protected-access - # TODO(josh11b): Could we do a better job if we also passed in the - # allowed dtypes when that was known? # Is some input already a Tensor with a dtype? dtype = None @@ -256,13 +254,28 @@ def args_to_matching_eager(l, ctx, default_dtype=None): if dtype is None: # Infer a dtype based on the first value, and use that dtype for the # remaining values. + ret = [] for t in l: - ret.append( - ops.convert_to_tensor( - t, dtype, preferred_dtype=default_dtype, ctx=ctx)) + tensor = None + # First see if we can get a valid dtype with the default conversion + # and see if it matches an allowed dtypes. Some ops like ConcatV2 may + # not list allowed dtypes, in which case we should skip this. + if dtype is None and allowed_dtypes: + tensor = ops.convert_to_tensor(t, ctx=ctx) + # If we did not match an allowed dtype, try again with the default + # dtype. This could be because we have an empty tensor and thus we + # picked the wrong type. + if tensor.dtype not in allowed_dtypes: + tensor = None + + if tensor is None: + tensor = ops.convert_to_tensor( + t, dtype, preferred_dtype=default_dtype, ctx=ctx) + + ret.append(tensor) if dtype is None: - dtype = ret[-1].dtype + dtype = tensor.dtype else: ret = [ops.convert_to_tensor(t, dtype, ctx=ctx) for t in l] diff --git a/tensorflow/python/eager/ops_test.py b/tensorflow/python/eager/ops_test.py index 0e9b6283237..b996d0dd0c4 100644 --- a/tensorflow/python/eager/ops_test.py +++ b/tensorflow/python/eager/ops_test.py @@ -326,17 +326,42 @@ class OpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testArgsToMatchingEagerDefault(self): # Uses default ctx = context.context() - t, r = execute.args_to_matching_eager([[3, 4]], ctx, dtypes.int32) + allowed_dtypes = [dtypes.int32, dtypes.int64] + + # Follows standard int conversion rules + t, r = execute.args_to_matching_eager([[3, 4]], ctx, allowed_dtypes, + dtypes.int32) self.assertEqual(t, dtypes.int32) self.assertEqual(r[0].dtype, dtypes.int32) - t, r = execute.args_to_matching_eager([[3, 4]], ctx, dtypes.int64) + t, r = execute.args_to_matching_eager([[3, 4]], ctx, allowed_dtypes, + dtypes.int64) + self.assertEqual(t, dtypes.int32) + self.assertEqual(r[0].dtype, dtypes.int32) + # Use int64 since it is a better fit + t, r = execute.args_to_matching_eager([[2**48]], ctx, allowed_dtypes, + dtypes.int32) self.assertEqual(t, dtypes.int64) self.assertEqual(r[0].dtype, dtypes.int64) - t, r = execute.args_to_matching_eager([], ctx, dtypes.int64) + + # When the regular tensor conversion fails, then use the default type as a + # hint. + allowed_dtypes = [dtypes.uint32, dtypes.uint32] + t, r = execute.args_to_matching_eager([[3, 4]], ctx, allowed_dtypes, + dtypes.uint32) + self.assertEqual(t, dtypes.uint32) + self.assertEqual(r[0].dtype, dtypes.uint32) + t, r = execute.args_to_matching_eager([[3, 4]], ctx, allowed_dtypes, + dtypes.uint64) + self.assertEqual(t, dtypes.uint64) + self.assertEqual(r[0].dtype, dtypes.uint64) + + t, r = execute.args_to_matching_eager([], ctx, allowed_dtypes, dtypes.int64) self.assertEqual(t, dtypes.int64) + # Doesn't use default - t, r = execute.args_to_matching_eager( - [['string', 'arg']], ctx, dtypes.int32) + allowed_dtypes = [dtypes.int32, dtypes.string] + t, r = execute.args_to_matching_eager([['string', 'arg']], ctx, + allowed_dtypes, dtypes.int32) self.assertEqual(t, dtypes.string) self.assertEqual(r[0].dtype, dtypes.string) diff --git a/tensorflow/python/framework/constant_op.py b/tensorflow/python/framework/constant_op.py index 4bf67c75162..343856b6749 100644 --- a/tensorflow/python/framework/constant_op.py +++ b/tensorflow/python/framework/constant_op.py @@ -40,7 +40,7 @@ def _eager_reshape(tensor, shape, ctx): """Eager-only version of Reshape op; requires tensor is an eager Tensor.""" attr_t = tensor._datatype_enum() # pylint: disable=protected-access attr_tshape, (shape,) = execute.args_to_matching_eager( - [shape], ctx, dtypes.int32) + [shape], ctx, [dtypes.int32, dtypes.int64], dtypes.int32) inputs_flat = [tensor, shape] attrs = ("T", attr_t, "Tshape", attr_tshape) result, = execute.execute( diff --git a/tensorflow/python/framework/op_def_library.py b/tensorflow/python/framework/op_def_library.py index 6c72d38c197..17e06b79f74 100644 --- a/tensorflow/python/framework/op_def_library.py +++ b/tensorflow/python/framework/op_def_library.py @@ -337,6 +337,7 @@ def _apply_op_helper(op_type_name, name=None, **keywords): # pylint: disable=in # on the other. Handling this will require restructuring this code # significantly. default_type_attr_map = {} + allowed_list_attr_map = {} for attr_def in op_def.attr: if attr_def.type != "type": continue @@ -344,6 +345,8 @@ def _apply_op_helper(op_type_name, name=None, **keywords): # pylint: disable=in if attr_def.HasField("default_value"): default_type_attr_map[key] = dtypes.as_dtype( attr_def.default_value.type) + if attr_def.HasField("allowed_values"): + allowed_list_attr_map[key] = attr_def.allowed_values.list.type # Requires that op_def has passed validation (using the C++ # ValidateOpDef() from ../framework/op_def_util.h). @@ -451,6 +454,7 @@ def _apply_op_helper(op_type_name, name=None, **keywords): # pylint: disable=in # arguments to that type. dtype = None default_dtype = None + allowed_list = None if input_arg.type != types_pb2.DT_INVALID: dtype = input_arg.type elif input_arg.type_attr in attrs: @@ -460,14 +464,41 @@ def _apply_op_helper(op_type_name, name=None, **keywords): # pylint: disable=in # so we prefer the attr's default, so code that adds a new attr # with a default is backwards compatible. default_dtype = default_type_attr_map[input_arg.type_attr] + allowed_list = allowed_list_attr_map.get(input_arg.type_attr) try: - values = ops.convert_to_tensor( - values, - name=input_arg.name, - dtype=dtype, - as_ref=input_arg.is_ref, - preferred_dtype=default_dtype) + # First see if we can get a valid dtype with the default conversion + # and see if it matches an allowed dtypes. Some ops like ConcatV2 may + # not list allowed dtypes, in which case we should skip this. + if dtype is None and allowed_list: + inferred = None + try: + inferred = ops.convert_to_tensor( + values, name=input_arg.name, as_ref=input_arg.is_ref) + except TypeError as err: + # When converting a python object such as a list of Dimensions, we + # need a dtype to be specified, thus tensor conversion may throw + # an exception which we will ignore and try again below. + pass + + # If we did not match an allowed dtype, try again with the default + # dtype. This could be because we have an empty tensor and thus we + # picked the wrong type. + if inferred is not None and inferred.dtype in allowed_list: + values = inferred + else: + values = ops.convert_to_tensor( + values, + name=input_arg.name, + as_ref=input_arg.is_ref, + preferred_dtype=default_dtype) + else: + values = ops.convert_to_tensor( + values, + name=input_arg.name, + dtype=dtype, + as_ref=input_arg.is_ref, + preferred_dtype=default_dtype) except TypeError as err: if dtype is None: raise err diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc index c6c5dfb7b37..f81e99242bf 100644 --- a/tensorflow/python/framework/python_op_gen.cc +++ b/tensorflow/python/framework/python_op_gen.cc @@ -1008,6 +1008,16 @@ void GenEagerPythonOp::AddEagerInferredAttrs(const string& indentation) { FlattenInputs(&arg_list->second, &output_sizes); string conversion = strings::StrCat("_execute.args_to_matching_eager(", flattened, ", ctx"); + + strings::StrAppend(&conversion, ", ["); + for (int t : attr.allowed_values().list().type()) { + DataType dtype = static_cast(t); + const string py_dtype = + python_op_gen_internal::DataTypeToPython(dtype, "_dtypes."); + strings::StrAppend(&conversion, py_dtype, ", "); + } + strings::StrAppend(&conversion, "]"); + if (attr.has_default_value()) { strings::StrAppend( &conversion, ", ", diff --git a/tensorflow/python/kernel_tests/reshape_op_test.py b/tensorflow/python/kernel_tests/reshape_op_test.py index 0d54138e053..e7e1c7023d8 100644 --- a/tensorflow/python/kernel_tests/reshape_op_test.py +++ b/tensorflow/python/kernel_tests/reshape_op_test.py @@ -22,6 +22,7 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker @@ -191,6 +192,24 @@ class ReshapeTest(test.TestCase): dtypes.float32, shape=[None, 37, None]))) self.assertEqual([None, 37, None], y.get_shape().as_list()) + def testTensorShape(self): + x = array_ops.zeros([1, 100]) + y = array_ops.reshape( + x, [tensor_shape.Dimension(100), + tensor_shape.Dimension(1)]) + self.assertEqual([100, 1], y.get_shape().as_list()) + y = array_ops.reshape(x, tensor_shape.TensorShape([100, 1])) + self.assertEqual([100, 1], y.get_shape().as_list()) + + def testInt64Shape(self): + x = array_ops.zeros([50000, 50000]) + # Provide dimension larger than int32 + y = array_ops.reshape(x, [50000**2]) + self.assertEqual([50000**2], y.get_shape().as_list()) + # Even if first dimension is within int32, ensure we correctly go to int64 + y = array_ops.reshape(x, [1, 50000**2]) + self.assertEqual([1, 50000**2], y.get_shape().as_list()) + if __name__ == "__main__": test.main() From 76468b290d4ee54b2d2db0582e85086d80916f69 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Fri, 17 Jul 2020 16:13:03 -0700 Subject: [PATCH 0754/2522] Cache first input and output in the interpreter. Upcoming changes to memory allocations will remove the global TfLiteTensor allocation. This change prepares the interpreter by caching pointers to the first input and output tensor. This is required because all TfLiteTensor getters on the interpreter will be persistent. Calling these getters over-and-over with the same index will cause applications to run out of tail space. PiperOrigin-RevId: 321874981 Change-Id: Ibb0add16ffd8a01cb5312d0913a96c3c69001f1f --- tensorflow/lite/micro/micro_interpreter.cc | 34 +++++++++++++++++++--- tensorflow/lite/micro/micro_interpreter.h | 5 ++++ 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/tensorflow/lite/micro/micro_interpreter.cc b/tensorflow/lite/micro/micro_interpreter.cc index ef2ae2b7644..e259d9d7adb 100644 --- a/tensorflow/lite/micro/micro_interpreter.cc +++ b/tensorflow/lite/micro/micro_interpreter.cc @@ -91,7 +91,9 @@ MicroInterpreter::MicroInterpreter(const Model* model, error_reporter)), tensors_allocated_(false), initialization_status_(kTfLiteError), - context_helper_(error_reporter_, &allocator_) { + context_helper_(error_reporter_, &allocator_), + input_tensor_(nullptr), + output_tensor_(nullptr) { Init(profiler); } @@ -106,7 +108,9 @@ MicroInterpreter::MicroInterpreter(const Model* model, allocator_(*allocator), tensors_allocated_(false), initialization_status_(kTfLiteError), - context_helper_(error_reporter_, &allocator_) { + context_helper_(error_reporter_, &allocator_), + input_tensor_(nullptr), + output_tensor_(nullptr) { Init(profiler); } @@ -316,7 +320,18 @@ TfLiteTensor* MicroInterpreter::input(size_t index) { length); return nullptr; } - return &(context_.tensors[inputs().Get(index)]); + if (index != 0) { + TF_LITE_REPORT_ERROR(error_reporter_, + "Input tensors not at index 0 will allocate from the " + "persistent memory arena in the future!"); + return &(context_.tensors[inputs().Get(index)]); + } + if (input_tensor_ == nullptr) { + // TODO(b/160894903): This API will allocate TfLiteTensor structs from + // persistent (tail) memory and cache on this pointer. + input_tensor_ = &(context_.tensors[inputs().Get(index)]); + } + return input_tensor_; } TfLiteTensor* MicroInterpreter::output(size_t index) { @@ -327,7 +342,18 @@ TfLiteTensor* MicroInterpreter::output(size_t index) { length); return nullptr; } - return &(context_.tensors[outputs().Get(index)]); + if (index != 0) { + TF_LITE_REPORT_ERROR(error_reporter_, + "Output tensors not at index 0 will allocate from the " + "persistent memory arena in the future!"); + return &(context_.tensors[outputs().Get(index)]); + } + if (output_tensor_ == nullptr) { + // TODO(b/160894903): This API will allocate TfLiteTensor structs from + // persistent (tail) memory and cache on this pointer. + output_tensor_ = &(context_.tensors[outputs().Get(index)]); + } + return output_tensor_; } TfLiteTensor* MicroInterpreter::tensor(size_t index) { diff --git a/tensorflow/lite/micro/micro_interpreter.h b/tensorflow/lite/micro/micro_interpreter.h index 6e9e5eca572..679a0f13f54 100644 --- a/tensorflow/lite/micro/micro_interpreter.h +++ b/tensorflow/lite/micro/micro_interpreter.h @@ -191,6 +191,11 @@ class MicroInterpreter { const SubGraph* subgraph_; internal::ContextHelper context_helper_; + + // TODO(b/160894903): Clean these pointers up when all APIs are updated to new + // TfLiteEvalTensor buffers. + TfLiteTensor* input_tensor_; + TfLiteTensor* output_tensor_; }; } // namespace tflite From 872de1c6817692d5db478d6b78b983152613d650 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Fri, 17 Jul 2020 16:20:07 -0700 Subject: [PATCH 0755/2522] Remove references to PartitionedVariable from the V2 Keras codebase. Note that references remain under legacy/ and in V1-only classes, with no API dependency (we use an attribute to identify them instead). We can't remove them without breaking backwards compatibility. PiperOrigin-RevId: 321876199 Change-Id: I5a267515ea4f6c7e360b29f26a86e2489e8967c3 --- tensorflow/python/keras/engine/base_layer.py | 25 ++++++------------- .../python/keras/engine/base_layer_utils.py | 5 ++++ .../python/keras/engine/base_layer_v1.py | 4 +-- .../keras/layers/legacy_rnn/rnn_cell_impl.py | 3 ++- .../python/keras/legacy_tf_layers/base.py | 3 ++- .../golden/v1/tensorflow.keras.-model.pbtxt | 2 +- .../v1/tensorflow.keras.-sequential.pbtxt | 2 +- ...low.keras.experimental.-linear-model.pbtxt | 2 +- ....experimental.-peephole-l-s-t-m-cell.pbtxt | 2 +- ...eras.experimental.-sequence-features.pbtxt | 2 +- ....keras.experimental.-wide-deep-model.pbtxt | 2 +- ...ow.keras.layers.-abstract-r-n-n-cell.pbtxt | 2 +- .../tensorflow.keras.layers.-activation.pbtxt | 2 +- ...eras.layers.-activity-regularization.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-add.pbtxt | 2 +- ...low.keras.layers.-additive-attention.pbtxt | 2 +- ...nsorflow.keras.layers.-alpha-dropout.pbtxt | 2 +- .../tensorflow.keras.layers.-attention.pbtxt | 2 +- ...low.keras.layers.-average-pooling1-d.pbtxt | 2 +- ...low.keras.layers.-average-pooling2-d.pbtxt | 2 +- ...low.keras.layers.-average-pooling3-d.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-average.pbtxt | 2 +- ...tensorflow.keras.layers.-avg-pool1-d.pbtxt | 2 +- ...tensorflow.keras.layers.-avg-pool2-d.pbtxt | 2 +- ...tensorflow.keras.layers.-avg-pool3-d.pbtxt | 2 +- ...ow.keras.layers.-batch-normalization.pbtxt | 2 +- ...nsorflow.keras.layers.-bidirectional.pbtxt | 2 +- ...tensorflow.keras.layers.-concatenate.pbtxt | 2 +- ...orflow.keras.layers.-conv-l-s-t-m2-d.pbtxt | 2 +- ...flow.keras.layers.-conv1-d-transpose.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-conv1-d.pbtxt | 2 +- ...flow.keras.layers.-conv2-d-transpose.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-conv2-d.pbtxt | 2 +- ...flow.keras.layers.-conv3-d-transpose.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-conv3-d.pbtxt | 2 +- ...ras.layers.-convolution1-d-transpose.pbtxt | 2 +- ...sorflow.keras.layers.-convolution1-d.pbtxt | 2 +- ...ras.layers.-convolution2-d-transpose.pbtxt | 2 +- ...sorflow.keras.layers.-convolution2-d.pbtxt | 2 +- ...ras.layers.-convolution3-d-transpose.pbtxt | 2 +- ...sorflow.keras.layers.-convolution3-d.pbtxt | 2 +- ...tensorflow.keras.layers.-cropping1-d.pbtxt | 2 +- ...tensorflow.keras.layers.-cropping2-d.pbtxt | 2 +- ...tensorflow.keras.layers.-cropping3-d.pbtxt | 2 +- ...sorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt | 2 +- ...rflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt | 2 +- ...sorflow.keras.layers.-dense-features.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-dense.pbtxt | 2 +- ...flow.keras.layers.-depthwise-conv2-d.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-dot.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-dropout.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-e-l-u.pbtxt | 2 +- .../tensorflow.keras.layers.-embedding.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-flatten.pbtxt | 2 +- .../tensorflow.keras.layers.-g-r-u-cell.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-g-r-u.pbtxt | 2 +- ...rflow.keras.layers.-gaussian-dropout.pbtxt | 2 +- ...sorflow.keras.layers.-gaussian-noise.pbtxt | 2 +- ...as.layers.-global-average-pooling1-d.pbtxt | 2 +- ...as.layers.-global-average-pooling2-d.pbtxt | 2 +- ...as.layers.-global-average-pooling3-d.pbtxt | 2 +- ...low.keras.layers.-global-avg-pool1-d.pbtxt | 2 +- ...low.keras.layers.-global-avg-pool2-d.pbtxt | 2 +- ...low.keras.layers.-global-avg-pool3-d.pbtxt | 2 +- ...low.keras.layers.-global-max-pool1-d.pbtxt | 2 +- ...low.keras.layers.-global-max-pool2-d.pbtxt | 2 +- ...low.keras.layers.-global-max-pool3-d.pbtxt | 2 +- ....keras.layers.-global-max-pooling1-d.pbtxt | 2 +- ....keras.layers.-global-max-pooling2-d.pbtxt | 2 +- ....keras.layers.-global-max-pooling3-d.pbtxt | 2 +- ...tensorflow.keras.layers.-input-layer.pbtxt | 2 +- ...ensorflow.keras.layers.-l-s-t-m-cell.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-l-s-t-m.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-lambda.pbtxt | 2 +- ...ow.keras.layers.-layer-normalization.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-layer.pbtxt | 2 +- ...ensorflow.keras.layers.-leaky-re-l-u.pbtxt | 2 +- ...w.keras.layers.-locally-connected1-d.pbtxt | 2 +- ...w.keras.layers.-locally-connected2-d.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-masking.pbtxt | 2 +- ...tensorflow.keras.layers.-max-pool1-d.pbtxt | 2 +- ...tensorflow.keras.layers.-max-pool2-d.pbtxt | 2 +- ...tensorflow.keras.layers.-max-pool3-d.pbtxt | 2 +- ...sorflow.keras.layers.-max-pooling1-d.pbtxt | 2 +- ...sorflow.keras.layers.-max-pooling2-d.pbtxt | 2 +- ...sorflow.keras.layers.-max-pooling3-d.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-maximum.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-minimum.pbtxt | 2 +- .../tensorflow.keras.layers.-multiply.pbtxt | 2 +- .../tensorflow.keras.layers.-p-re-l-u.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-permute.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-r-n-n.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-re-l-u.pbtxt | 2 +- ...nsorflow.keras.layers.-repeat-vector.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-reshape.pbtxt | 2 +- ...flow.keras.layers.-separable-conv1-d.pbtxt | 2 +- ...flow.keras.layers.-separable-conv2-d.pbtxt | 2 +- ...ras.layers.-separable-convolution1-d.pbtxt | 2 +- ...ras.layers.-separable-convolution2-d.pbtxt | 2 +- ...flow.keras.layers.-simple-r-n-n-cell.pbtxt | 2 +- ...ensorflow.keras.layers.-simple-r-n-n.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-softmax.pbtxt | 2 +- ...low.keras.layers.-spatial-dropout1-d.pbtxt | 2 +- ...low.keras.layers.-spatial-dropout2-d.pbtxt | 2 +- ...low.keras.layers.-spatial-dropout3-d.pbtxt | 2 +- ...ow.keras.layers.-stacked-r-n-n-cells.pbtxt | 2 +- .../tensorflow.keras.layers.-subtract.pbtxt | 2 +- ...low.keras.layers.-thresholded-re-l-u.pbtxt | 2 +- ...rflow.keras.layers.-time-distributed.pbtxt | 2 +- ...sorflow.keras.layers.-up-sampling1-d.pbtxt | 2 +- ...sorflow.keras.layers.-up-sampling2-d.pbtxt | 2 +- ...sorflow.keras.layers.-up-sampling3-d.pbtxt | 2 +- .../v1/tensorflow.keras.layers.-wrapper.pbtxt | 2 +- ...orflow.keras.layers.-zero-padding1-d.pbtxt | 2 +- ...orflow.keras.layers.-zero-padding2-d.pbtxt | 2 +- ...orflow.keras.layers.-zero-padding3-d.pbtxt | 2 +- ...as.layers.experimental.-einsum-dense.pbtxt | 2 +- ...xperimental.-random-fourier-features.pbtxt | 2 +- ...tal.preprocessing.-category-crossing.pbtxt | 2 +- ...tal.preprocessing.-category-encoding.pbtxt | 2 +- ...erimental.preprocessing.-center-crop.pbtxt | 2 +- ...mental.preprocessing.-discretization.pbtxt | 2 +- ....experimental.preprocessing.-hashing.pbtxt | 2 +- ...mental.preprocessing.-integer-lookup.pbtxt | 2 +- ...imental.preprocessing.-normalization.pbtxt | 2 +- ...l.preprocessing.-preprocessing-layer.pbtxt | 2 +- ...ental.preprocessing.-random-contrast.pbtxt | 2 +- ...erimental.preprocessing.-random-crop.pbtxt | 2 +- ...erimental.preprocessing.-random-flip.pbtxt | 2 +- ...imental.preprocessing.-random-height.pbtxt | 2 +- ...ental.preprocessing.-random-rotation.pbtxt | 2 +- ...al.preprocessing.-random-translation.pbtxt | 2 +- ...rimental.preprocessing.-random-width.pbtxt | 2 +- ...erimental.preprocessing.-random-zoom.pbtxt | 2 +- ...xperimental.preprocessing.-rescaling.pbtxt | 2 +- ...experimental.preprocessing.-resizing.pbtxt | 2 +- ...imental.preprocessing.-string-lookup.pbtxt | 2 +- ...al.preprocessing.-text-vectorization.pbtxt | 2 +- .../v1/tensorflow.keras.models.-model.pbtxt | 2 +- .../tensorflow.keras.models.-sequential.pbtxt | 2 +- .../golden/v2/tensorflow.keras.-model.pbtxt | 2 +- .../v2/tensorflow.keras.-sequential.pbtxt | 2 +- ...low.keras.experimental.-linear-model.pbtxt | 2 +- ....experimental.-peephole-l-s-t-m-cell.pbtxt | 2 +- ...eras.experimental.-sequence-features.pbtxt | 2 +- ....keras.experimental.-wide-deep-model.pbtxt | 2 +- ...ow.keras.layers.-abstract-r-n-n-cell.pbtxt | 2 +- .../tensorflow.keras.layers.-activation.pbtxt | 2 +- ...eras.layers.-activity-regularization.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-add.pbtxt | 2 +- ...low.keras.layers.-additive-attention.pbtxt | 2 +- ...nsorflow.keras.layers.-alpha-dropout.pbtxt | 2 +- .../tensorflow.keras.layers.-attention.pbtxt | 2 +- ...low.keras.layers.-average-pooling1-d.pbtxt | 2 +- ...low.keras.layers.-average-pooling2-d.pbtxt | 2 +- ...low.keras.layers.-average-pooling3-d.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-average.pbtxt | 2 +- ...tensorflow.keras.layers.-avg-pool1-d.pbtxt | 2 +- ...tensorflow.keras.layers.-avg-pool2-d.pbtxt | 2 +- ...tensorflow.keras.layers.-avg-pool3-d.pbtxt | 2 +- ...ow.keras.layers.-batch-normalization.pbtxt | 2 +- ...nsorflow.keras.layers.-bidirectional.pbtxt | 2 +- ...tensorflow.keras.layers.-concatenate.pbtxt | 2 +- ...orflow.keras.layers.-conv-l-s-t-m2-d.pbtxt | 2 +- ...flow.keras.layers.-conv1-d-transpose.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-conv1-d.pbtxt | 2 +- ...flow.keras.layers.-conv2-d-transpose.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-conv2-d.pbtxt | 2 +- ...flow.keras.layers.-conv3-d-transpose.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-conv3-d.pbtxt | 2 +- ...ras.layers.-convolution1-d-transpose.pbtxt | 2 +- ...sorflow.keras.layers.-convolution1-d.pbtxt | 2 +- ...ras.layers.-convolution2-d-transpose.pbtxt | 2 +- ...sorflow.keras.layers.-convolution2-d.pbtxt | 2 +- ...ras.layers.-convolution3-d-transpose.pbtxt | 2 +- ...sorflow.keras.layers.-convolution3-d.pbtxt | 2 +- ...tensorflow.keras.layers.-cropping1-d.pbtxt | 2 +- ...tensorflow.keras.layers.-cropping2-d.pbtxt | 2 +- ...tensorflow.keras.layers.-cropping3-d.pbtxt | 2 +- ...sorflow.keras.layers.-dense-features.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-dense.pbtxt | 2 +- ...flow.keras.layers.-depthwise-conv2-d.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-dot.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-dropout.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-e-l-u.pbtxt | 2 +- .../tensorflow.keras.layers.-embedding.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-flatten.pbtxt | 2 +- .../tensorflow.keras.layers.-g-r-u-cell.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-g-r-u.pbtxt | 2 +- ...rflow.keras.layers.-gaussian-dropout.pbtxt | 2 +- ...sorflow.keras.layers.-gaussian-noise.pbtxt | 2 +- ...as.layers.-global-average-pooling1-d.pbtxt | 2 +- ...as.layers.-global-average-pooling2-d.pbtxt | 2 +- ...as.layers.-global-average-pooling3-d.pbtxt | 2 +- ...low.keras.layers.-global-avg-pool1-d.pbtxt | 2 +- ...low.keras.layers.-global-avg-pool2-d.pbtxt | 2 +- ...low.keras.layers.-global-avg-pool3-d.pbtxt | 2 +- ...low.keras.layers.-global-max-pool1-d.pbtxt | 2 +- ...low.keras.layers.-global-max-pool2-d.pbtxt | 2 +- ...low.keras.layers.-global-max-pool3-d.pbtxt | 2 +- ....keras.layers.-global-max-pooling1-d.pbtxt | 2 +- ....keras.layers.-global-max-pooling2-d.pbtxt | 2 +- ....keras.layers.-global-max-pooling3-d.pbtxt | 2 +- ...tensorflow.keras.layers.-input-layer.pbtxt | 2 +- ...ensorflow.keras.layers.-l-s-t-m-cell.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-l-s-t-m.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-lambda.pbtxt | 2 +- ...ow.keras.layers.-layer-normalization.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-layer.pbtxt | 2 +- ...ensorflow.keras.layers.-leaky-re-l-u.pbtxt | 2 +- ...w.keras.layers.-locally-connected1-d.pbtxt | 2 +- ...w.keras.layers.-locally-connected2-d.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-masking.pbtxt | 2 +- ...tensorflow.keras.layers.-max-pool1-d.pbtxt | 2 +- ...tensorflow.keras.layers.-max-pool2-d.pbtxt | 2 +- ...tensorflow.keras.layers.-max-pool3-d.pbtxt | 2 +- ...sorflow.keras.layers.-max-pooling1-d.pbtxt | 2 +- ...sorflow.keras.layers.-max-pooling2-d.pbtxt | 2 +- ...sorflow.keras.layers.-max-pooling3-d.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-maximum.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-minimum.pbtxt | 2 +- .../tensorflow.keras.layers.-multiply.pbtxt | 2 +- .../tensorflow.keras.layers.-p-re-l-u.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-permute.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-r-n-n.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-re-l-u.pbtxt | 2 +- ...nsorflow.keras.layers.-repeat-vector.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-reshape.pbtxt | 2 +- ...flow.keras.layers.-separable-conv1-d.pbtxt | 2 +- ...flow.keras.layers.-separable-conv2-d.pbtxt | 2 +- ...ras.layers.-separable-convolution1-d.pbtxt | 2 +- ...ras.layers.-separable-convolution2-d.pbtxt | 2 +- ...flow.keras.layers.-simple-r-n-n-cell.pbtxt | 2 +- ...ensorflow.keras.layers.-simple-r-n-n.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-softmax.pbtxt | 2 +- ...low.keras.layers.-spatial-dropout1-d.pbtxt | 2 +- ...low.keras.layers.-spatial-dropout2-d.pbtxt | 2 +- ...low.keras.layers.-spatial-dropout3-d.pbtxt | 2 +- ...ow.keras.layers.-stacked-r-n-n-cells.pbtxt | 2 +- .../tensorflow.keras.layers.-subtract.pbtxt | 2 +- ...low.keras.layers.-thresholded-re-l-u.pbtxt | 2 +- ...rflow.keras.layers.-time-distributed.pbtxt | 2 +- ...sorflow.keras.layers.-up-sampling1-d.pbtxt | 2 +- ...sorflow.keras.layers.-up-sampling2-d.pbtxt | 2 +- ...sorflow.keras.layers.-up-sampling3-d.pbtxt | 2 +- .../v2/tensorflow.keras.layers.-wrapper.pbtxt | 2 +- ...orflow.keras.layers.-zero-padding1-d.pbtxt | 2 +- ...orflow.keras.layers.-zero-padding2-d.pbtxt | 2 +- ...orflow.keras.layers.-zero-padding3-d.pbtxt | 2 +- ...as.layers.experimental.-einsum-dense.pbtxt | 2 +- ...xperimental.-random-fourier-features.pbtxt | 2 +- ...perimental.-sync-batch-normalization.pbtxt | 2 +- ...tal.preprocessing.-category-crossing.pbtxt | 2 +- ...tal.preprocessing.-category-encoding.pbtxt | 2 +- ...erimental.preprocessing.-center-crop.pbtxt | 2 +- ...mental.preprocessing.-discretization.pbtxt | 2 +- ....experimental.preprocessing.-hashing.pbtxt | 2 +- ...mental.preprocessing.-integer-lookup.pbtxt | 2 +- ...imental.preprocessing.-normalization.pbtxt | 2 +- ...l.preprocessing.-preprocessing-layer.pbtxt | 2 +- ...ental.preprocessing.-random-contrast.pbtxt | 2 +- ...erimental.preprocessing.-random-crop.pbtxt | 2 +- ...erimental.preprocessing.-random-flip.pbtxt | 2 +- ...imental.preprocessing.-random-height.pbtxt | 2 +- ...ental.preprocessing.-random-rotation.pbtxt | 2 +- ...al.preprocessing.-random-translation.pbtxt | 2 +- ...rimental.preprocessing.-random-width.pbtxt | 2 +- ...erimental.preprocessing.-random-zoom.pbtxt | 2 +- ...xperimental.preprocessing.-rescaling.pbtxt | 2 +- ...experimental.preprocessing.-resizing.pbtxt | 2 +- ...imental.preprocessing.-string-lookup.pbtxt | 2 +- ...al.preprocessing.-text-vectorization.pbtxt | 2 +- .../v2/tensorflow.keras.models.-model.pbtxt | 2 +- .../tensorflow.keras.models.-sequential.pbtxt | 2 +- ...orflow.nn.-r-n-n-cell-device-wrapper.pbtxt | 2 +- ...rflow.nn.-r-n-n-cell-dropout-wrapper.pbtxt | 2 +- ...flow.nn.-r-n-n-cell-residual-wrapper.pbtxt | 2 +- 277 files changed, 291 insertions(+), 293 deletions(-) diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py index 4a590f8ff21..d4c4c348742 100644 --- a/tensorflow/python/keras/engine/base_layer.py +++ b/tensorflow/python/keras/engine/base_layer.py @@ -35,7 +35,6 @@ from tensorflow.python import tf2 from tensorflow.python.autograph.core import ag_ctx from tensorflow.python.autograph.impl import api as autograph from tensorflow.python.distribute import distribution_strategy_context as ds_context -from tensorflow.python.distribute import sharded_variable from tensorflow.python.eager import context from tensorflow.python.eager import execute from tensorflow.python.eager import function @@ -483,7 +482,6 @@ class Layer(module.Module, version_utils.LayerVersionSelector): regularizer=None, trainable=None, constraint=None, - partitioner=None, use_resource=None, synchronization=tf_variables.VariableSynchronization.AUTO, aggregation=tf_variables.VariableAggregation.NONE, @@ -502,7 +500,6 @@ class Layer(module.Module, version_utils.LayerVersionSelector): Note that `trainable` cannot be `True` if `synchronization` is set to `ON_READ`. constraint: Constraint instance (callable). - partitioner: Partitioner to be passed to the `Trackable` API. use_resource: Whether to use `ResourceVariable`. synchronization: Indicates when a distributed a variable will be aggregated. Accepted values are constants defined in the class @@ -517,24 +514,20 @@ class Layer(module.Module, version_utils.LayerVersionSelector): `collections`, `experimental_autocast` and `caching_device`. Returns: - The created variable. Usually either a `Variable` or `ResourceVariable` - instance. If `partitioner` is not `None`, a `PartitionedVariable` - instance is returned. + The variable created. Raises: - RuntimeError: If called with partitioned variable regularization and - eager execution is enabled. ValueError: When giving unsupported dtype and no initializer or when trainable has been set to True with synchronization set as `ON_READ`. """ if shape is None: shape = () + kwargs.pop('partitioner', None) # Ignored. # Validate optional keyword arguments. for kwarg in kwargs: - if kwarg not in ['getter', 'collections', 'experimental_autocast', - 'caching_device']: + if kwarg not in ['collections', 'experimental_autocast', + 'caching_device', 'getter']: raise TypeError('Unknown keyword argument:', kwarg) - getter = kwargs.pop('getter', base_layer_utils.make_variable) collections_arg = kwargs.pop('collections', None) # 'experimental_autocast' can be set to False by the caller to indicate an # AutoCastVariable should never be created. @@ -579,10 +572,11 @@ class Layer(module.Module, version_utils.LayerVersionSelector): raise ValueError('An initializer for variable %s of type %s is required' ' for layer %s' % (name, dtype.base_dtype, self.name)) + getter = kwargs.pop('getter', base_layer_utils.make_variable) if (autocast and self._dtype_policy.should_cast_variables and dtype.is_floating): - # Wrap 'getter' with a version that returns an AutoCastVariable. old_getter = getter + # Wrap variable constructor to return an AutoCastVariable. def getter(*args, **kwargs): # pylint: disable=function-redefined variable = old_getter(*args, **kwargs) return autocast_variable.create_autocast_variable(variable) @@ -606,7 +600,6 @@ class Layer(module.Module, version_utils.LayerVersionSelector): dtype=dtype, constraint=constraint, trainable=trainable, - partitioner=partitioner, use_resource=use_resource, collections=collections_arg, synchronization=synchronization, @@ -620,9 +613,7 @@ class Layer(module.Module, version_utils.LayerVersionSelector): self._handle_weight_regularization(name_in_scope, variable, regularizer) - if isinstance( - variable, - (tf_variables.PartitionedVariable, sharded_variable.ShardedVariable)): + if base_layer_utils.is_split_variable(variable): for v in variable: backend.track_variable(v) if trainable: @@ -2440,7 +2431,7 @@ class Layer(module.Module, version_utils.LayerVersionSelector): regularization = regularizer(v) return regularization - if isinstance(variable, tf_variables.PartitionedVariable): + if base_layer_utils.is_split_variable(variable): for v in variable: self.add_loss(functools.partial(_loss_for_variable, v)) else: diff --git a/tensorflow/python/keras/engine/base_layer_utils.py b/tensorflow/python/keras/engine/base_layer_utils.py index b3446b1511f..bd7387258c1 100644 --- a/tensorflow/python/keras/engine/base_layer_utils.py +++ b/tensorflow/python/keras/engine/base_layer_utils.py @@ -853,6 +853,11 @@ def no_ragged_support(inputs, layer_name): 'input to an uniform tensor.' % (layer_name, inputs)) +def is_split_variable(v): + """Returns True if `v` is either a PartionedVariable or a SharedVariable.""" + return hasattr(v, '_variable_list') or hasattr(v, '_variables') + + # TODO(kathywu): This is a temporary hack. When a network of layers is revived # from SavedModel, only the top-level layer will have losses. This causes issues # in eager mode because the child layers may have graph losses diff --git a/tensorflow/python/keras/engine/base_layer_v1.py b/tensorflow/python/keras/engine/base_layer_v1.py index 1b9dcf50123..a459eaefbc3 100644 --- a/tensorflow/python/keras/engine/base_layer_v1.py +++ b/tensorflow/python/keras/engine/base_layer_v1.py @@ -454,7 +454,7 @@ class Layer(base_layer.Layer): self._handle_weight_regularization(name_in_scope, variable, regularizer) - if isinstance(variable, tf_variables.PartitionedVariable): + if base_layer_utils.is_split_variable(variable): for v in variable: backend.track_variable(v) if trainable: @@ -1923,7 +1923,7 @@ class Layer(base_layer.Layer): regularization = regularizer(v) return regularization - if isinstance(variable, tf_variables.PartitionedVariable): + if base_layer_utils.is_split_variable(variable): for v in variable: self.add_loss(functools.partial(_loss_for_variable, v)) else: diff --git a/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_impl.py b/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_impl.py index 2276fbf98ee..de69cdac611 100644 --- a/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_impl.py +++ b/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_impl.py @@ -34,6 +34,7 @@ from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.keras import activations from tensorflow.python.keras import initializers +from tensorflow.python.keras.engine import base_layer_utils from tensorflow.python.keras.engine import input_spec from tensorflow.python.keras.layers.legacy_rnn import rnn_cell_wrapper_impl from tensorflow.python.keras.utils import tf_utils @@ -250,7 +251,7 @@ class RNNCell(base_layer.Layer): else: trainable = ( variable in tf_variables.trainable_variables() or - (isinstance(variable, tf_variables.PartitionedVariable) and + (base_layer_utils.is_split_variable(variable) and list(variable)[0] in tf_variables.trainable_variables())) if trainable and all(variable is not v for v in self._trainable_weights): self._trainable_weights.append(variable) diff --git a/tensorflow/python/keras/legacy_tf_layers/base.py b/tensorflow/python/keras/legacy_tf_layers/base.py index 5a944703af9..e5a37e854ac 100644 --- a/tensorflow/python/keras/legacy_tf_layers/base.py +++ b/tensorflow/python/keras/legacy_tf_layers/base.py @@ -24,6 +24,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.keras import backend from tensorflow.python.keras.engine import base_layer +from tensorflow.python.keras.engine import base_layer_utils from tensorflow.python.keras.mixed_precision.experimental import policy from tensorflow.python.ops import variable_scope as vs from tensorflow.python.ops import variables as tf_variables @@ -405,7 +406,7 @@ class Layer(base_layer.Layer): trainable = True def _should_add_regularizer(variable, existing_variable_set): - if isinstance(variable, tf_variables.PartitionedVariable): + if base_layer_utils.is_split_variable(variable): for var in variable: if var in existing_variable_set: return False diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt index 6318e577087..a9f6f069560 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt @@ -158,7 +158,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt index 9b7b7736746..168539be647 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt @@ -164,7 +164,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt index 976eb49d4c8..2aff054a51d 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt @@ -159,7 +159,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt index e366d0b1f52..3bd1bc2c939 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-sequence-features.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-sequence-features.pbtxt index 2b4ebd55410..7150f2bd928 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-sequence-features.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-sequence-features.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt index 500aa28eae7..ed49246e458 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt @@ -159,7 +159,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-abstract-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-abstract-r-n-n-cell.pbtxt index 363e4b7cf20..51277dfae56 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-abstract-r-n-n-cell.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-abstract-r-n-n-cell.pbtxt @@ -145,7 +145,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activation.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activation.pbtxt index 409dfcd26e4..378f6568eef 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activation.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activation.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activity-regularization.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activity-regularization.pbtxt index 599d550fee2..a9d11967feb 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activity-regularization.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activity-regularization.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-add.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-add.pbtxt index b43eee1e6e5..fca5d2928ee 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-add.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-add.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-additive-attention.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-additive-attention.pbtxt index 2edb8e028c1..96b809486a7 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-additive-attention.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-additive-attention.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-alpha-dropout.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-alpha-dropout.pbtxt index 4f7c7d6ca27..4ca1dc4a217 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-alpha-dropout.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-alpha-dropout.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-attention.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-attention.pbtxt index e335099c084..ae2cb7f7e20 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-attention.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-attention.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt index 80c8e0d63c1..ae64e051158 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling2-d.pbtxt index 4e95f083490..fd77d449216 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling3-d.pbtxt index 370589fb876..fc39c337669 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average.pbtxt index 8abc0add0dc..cbcfbb1022f 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt index 6938e84fe77..bbb6c19bd7f 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool2-d.pbtxt index 5ca4fd39173..16d329f22c1 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool3-d.pbtxt index 378bf0e84a0..56cb840bd0b 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-batch-normalization.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-batch-normalization.pbtxt index f5ef78bc6d7..6dc759e1338 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-batch-normalization.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-batch-normalization.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-bidirectional.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-bidirectional.pbtxt index 60402f72e77..a619ae0a480 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-bidirectional.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-bidirectional.pbtxt @@ -142,7 +142,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-concatenate.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-concatenate.pbtxt index c6733e7e8ab..237d4e7f34c 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-concatenate.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-concatenate.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt index 958d06a0d0f..dc15a2c227c 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt @@ -227,7 +227,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv1-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv1-d-transpose.pbtxt index 03ccf45ea84..3d3ee3c67bf 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv1-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv1-d-transpose.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv1-d.pbtxt index f41ff3f2136..23fb7bfc4eb 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt index 2370f378e57..a7eeb12ef04 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d.pbtxt index 82caa2a7b1d..9f4aa3cd95f 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt index e8adfd50fb2..a83cbc24972 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d.pbtxt index d49616f857c..7ccbb9a2694 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution1-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution1-d-transpose.pbtxt index 6332fcc5d4b..0733557f70d 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution1-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution1-d-transpose.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution1-d.pbtxt index 5c94f928d6b..71c2e77e7ff 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt index 0de86bcf16a..824bd8bbb2f 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d.pbtxt index a0f0ff2e7ba..ac9d5be1883 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt index c772d1243f4..ed63b8d98d4 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d.pbtxt index dc0637655e3..d00f7a5b396 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping1-d.pbtxt index 090f4055906..2ca122485d6 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping1-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping2-d.pbtxt index b010cfe1acd..1b69967a59e 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping2-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping2-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping3-d.pbtxt index 802627cc0ed..265f13b06bd 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping3-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping3-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt index 7c95b72997c..e40eb1470a7 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt @@ -147,7 +147,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt index 40f6c7a7338..167a4d9e96f 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt @@ -147,7 +147,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense-features.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense-features.pbtxt index 3d541caa034..59793ff6d45 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense-features.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense-features.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense.pbtxt index 90d398e8975..8370406e34d 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt index d754f8d1de3..554d7531912 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dot.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dot.pbtxt index 7189ec944a1..101719437d4 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dot.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dot.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dropout.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dropout.pbtxt index ecee07fae25..d441302523f 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dropout.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dropout.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-e-l-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-e-l-u.pbtxt index f24a778092b..a736e4c03fd 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-e-l-u.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-e-l-u.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-embedding.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-embedding.pbtxt index f9e1721669a..3f9002792d8 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-embedding.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-embedding.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-flatten.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-flatten.pbtxt index b76d54415c7..217f2701f3d 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-flatten.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-flatten.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u-cell.pbtxt index 5a6976a8e54..7ed2d307cd0 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u-cell.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u-cell.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u.pbtxt index 2eedd3a212e..0a2ebd2cfe7 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u.pbtxt @@ -210,7 +210,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-dropout.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-dropout.pbtxt index 1d3b1e73c55..1962f3284f0 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-dropout.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-dropout.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-noise.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-noise.pbtxt index 46caef8596c..64073b27c24 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-noise.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-noise.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt index 0afe2f7483f..73ed7f59394 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt index 8c001871ed6..2fb47e8a5a6 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt index c0e2d405e9a..e3ab2b5ab6d 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt index 785db4a7762..494e2247fbf 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt index c288f835aac..22e79311d67 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt index 060e6fd7d12..83f91393647 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt index fcf2cf4c0ac..d211683ae9c 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool2-d.pbtxt index 5a9fcc6db6c..2b9442dee85 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool3-d.pbtxt index 9d50d7e2d95..e02b42bdd0e 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt index b071143ab2b..60d2a947d87 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt index 49a548fc277..352527ea0f4 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt index f090677378d..a1ff2f402a7 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-input-layer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-input-layer.pbtxt index e7952bb23c9..d1811a28b55 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-input-layer.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-input-layer.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt index 16519f922d9..27cada1194e 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m.pbtxt index 1dc701cc7d6..080fb51d538 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m.pbtxt @@ -210,7 +210,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-lambda.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-lambda.pbtxt index 5673f4d01a1..c9f01c56606 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-lambda.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-lambda.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-layer-normalization.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-layer-normalization.pbtxt index 99eafcb8fe3..3b9306cdfe6 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-layer-normalization.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-layer-normalization.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-layer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-layer.pbtxt index bc84f3b51c1..03902ed1de4 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-layer.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-layer.pbtxt @@ -136,7 +136,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-leaky-re-l-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-leaky-re-l-u.pbtxt index 95c5d4bb39b..bf98a150184 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-leaky-re-l-u.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-leaky-re-l-u.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected1-d.pbtxt index b72424921a4..040230d63b3 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected1-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected2-d.pbtxt index 985c2274379..8d49e7a58a1 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected2-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected2-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-masking.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-masking.pbtxt index 65ae9893b11..485ae3b16ef 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-masking.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-masking.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt index 57c01d47d04..05050fdbffa 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool2-d.pbtxt index 88c1fb93eaf..8ae6a0ab43b 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool3-d.pbtxt index c5f18cb9a05..ae8aea28552 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt index db5eb02deee..94d2e0e6f6e 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling2-d.pbtxt index 45e68e0d94c..91b0b44ea50 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling3-d.pbtxt index 36ea8cbb6ae..587850f1d6c 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-maximum.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-maximum.pbtxt index 87afbb42ee7..ac97ca6e061 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-maximum.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-maximum.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-minimum.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-minimum.pbtxt index 1de712f45ae..7c8950ce3fa 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-minimum.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-minimum.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-multiply.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-multiply.pbtxt index 4502002d2c7..9ef978eeb3a 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-multiply.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-multiply.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-p-re-l-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-p-re-l-u.pbtxt index 1543f27fab8..19a48d77113 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-p-re-l-u.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-p-re-l-u.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-permute.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-permute.pbtxt index 47749db7b2a..03d5a2195cd 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-permute.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-permute.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt index 76bfe022b2b..c8c5b8326dd 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt @@ -141,7 +141,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-re-l-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-re-l-u.pbtxt index 48ac55d9b2e..84530e067b2 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-re-l-u.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-re-l-u.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-repeat-vector.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-repeat-vector.pbtxt index 71753b21d64..4de5b1c20d8 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-repeat-vector.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-repeat-vector.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-reshape.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-reshape.pbtxt index 8778dfa3f20..b9fcb027aaf 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-reshape.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-reshape.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv1-d.pbtxt index 1f7d7c0204d..5b6bff9dc5f 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv1-d.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv2-d.pbtxt index e301442b952..3fb3c032a3e 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv2-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv2-d.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution1-d.pbtxt index 6e52320dcb5..5387a8e5fc5 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution1-d.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution2-d.pbtxt index 16e10561154..de2d3eaaab4 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution2-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution2-d.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt index 8491504b3ab..80e17948612 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n.pbtxt index f651e06aa50..48e0c26b010 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n.pbtxt @@ -198,7 +198,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-softmax.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-softmax.pbtxt index 75737c0a415..db272bdf782 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-softmax.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-softmax.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt index e70f19ac2e5..0260221d093 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt index b5d4b00a220..ddad5641e76 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt index 52ace6ef50f..47e6ba9abfa 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt index 14f3ccf383d..5379da642ed 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt @@ -145,7 +145,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-subtract.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-subtract.pbtxt index f85acb2945a..1e070fb36db 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-subtract.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-subtract.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt index dccb48bce49..6d7724bdfe3 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-time-distributed.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-time-distributed.pbtxt index b02fba20dee..d740fc8de3a 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-time-distributed.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-time-distributed.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling1-d.pbtxt index 84f58bf535e..de377d9d2eb 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling1-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt index b78fcc18e08..e2ee7941662 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling3-d.pbtxt index 29851be0b77..8dd967cd3ce 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling3-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling3-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-wrapper.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-wrapper.pbtxt index da355d1142f..334463a4031 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-wrapper.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-wrapper.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding1-d.pbtxt index 3180441407e..ba4e58e3dfb 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding1-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding1-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding2-d.pbtxt index ecccc705e07..8538d903fba 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding2-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding2-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding3-d.pbtxt index fc93d9957d7..2b8681ae8cc 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding3-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding3-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt index 8e8684bab54..a2d7d285409 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.-random-fourier-features.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.-random-fourier-features.pbtxt index 9b4ddcd3f62..e24ca0dc01a 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.-random-fourier-features.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.-random-fourier-features.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt index 73592f11b8d..d48eb4ecc4a 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.pbtxt index 26a016264d8..04e59727b19 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.pbtxt @@ -145,7 +145,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt index b03ca3dc11c..819c6a693c9 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt index 10f5b9ae1aa..5f78e5637ea 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt index b29b32e1315..51f113d4e4a 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-integer-lookup.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-integer-lookup.pbtxt index 44afdfc7d34..75625d24d30 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-integer-lookup.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-integer-lookup.pbtxt @@ -147,7 +147,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt index e1a451f8f52..093a2b2292e 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt @@ -145,7 +145,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt index 095471dc12a..0fa0355b0f2 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt @@ -141,7 +141,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-contrast.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-contrast.pbtxt index b9a330e8bcc..0f575454a80 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-contrast.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-contrast.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-crop.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-crop.pbtxt index f0f1bc45b0e..6425f6c6ed6 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-crop.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-crop.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-flip.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-flip.pbtxt index 8b1d7734d44..245723f1f56 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-flip.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-flip.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-height.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-height.pbtxt index 5a8f67f9487..e56ec9e2dad 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-height.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-height.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-rotation.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-rotation.pbtxt index 0938ec3d684..167fa775273 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-rotation.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-rotation.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-translation.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-translation.pbtxt index cfee60206f1..224417edfdd 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-translation.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-translation.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-width.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-width.pbtxt index 93b272d1105..48bcfbb94b9 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-width.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-width.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt index f0b50e92e66..570f3a3bc03 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt index 94b6c730699..fc759ce7ea6 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt index 30b20a1ac01..abcb2d4876a 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-string-lookup.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-string-lookup.pbtxt index 0e671b5f78b..2d7e71c2c43 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-string-lookup.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-string-lookup.pbtxt @@ -147,7 +147,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt index 1b9a745486c..532f98fb322 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt @@ -145,7 +145,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt index ad0edc64606..4368742d7bb 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt @@ -158,7 +158,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt index b38c669df0f..8e9409f27a9 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt @@ -164,7 +164,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt index 6318e577087..a9f6f069560 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt @@ -158,7 +158,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt index 9b7b7736746..168539be647 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt @@ -164,7 +164,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt index 976eb49d4c8..2aff054a51d 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt @@ -159,7 +159,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt index e366d0b1f52..3bd1bc2c939 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-sequence-features.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-sequence-features.pbtxt index 2b4ebd55410..7150f2bd928 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-sequence-features.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-sequence-features.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt index 500aa28eae7..ed49246e458 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt @@ -159,7 +159,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-abstract-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-abstract-r-n-n-cell.pbtxt index 363e4b7cf20..51277dfae56 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-abstract-r-n-n-cell.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-abstract-r-n-n-cell.pbtxt @@ -145,7 +145,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activation.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activation.pbtxt index 409dfcd26e4..378f6568eef 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activation.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activation.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activity-regularization.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activity-regularization.pbtxt index 599d550fee2..a9d11967feb 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activity-regularization.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activity-regularization.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-add.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-add.pbtxt index b43eee1e6e5..fca5d2928ee 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-add.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-add.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-additive-attention.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-additive-attention.pbtxt index 2edb8e028c1..96b809486a7 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-additive-attention.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-additive-attention.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-alpha-dropout.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-alpha-dropout.pbtxt index 4f7c7d6ca27..4ca1dc4a217 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-alpha-dropout.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-alpha-dropout.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-attention.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-attention.pbtxt index e335099c084..ae2cb7f7e20 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-attention.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-attention.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt index 80c8e0d63c1..ae64e051158 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling2-d.pbtxt index 4e95f083490..fd77d449216 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling3-d.pbtxt index 370589fb876..fc39c337669 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average.pbtxt index 8abc0add0dc..cbcfbb1022f 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt index 6938e84fe77..bbb6c19bd7f 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool2-d.pbtxt index 5ca4fd39173..16d329f22c1 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool3-d.pbtxt index 378bf0e84a0..56cb840bd0b 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-batch-normalization.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-batch-normalization.pbtxt index 049b9b2b8a6..fd130c55979 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-batch-normalization.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-batch-normalization.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-bidirectional.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-bidirectional.pbtxt index 60402f72e77..a619ae0a480 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-bidirectional.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-bidirectional.pbtxt @@ -142,7 +142,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-concatenate.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-concatenate.pbtxt index c6733e7e8ab..237d4e7f34c 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-concatenate.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-concatenate.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt index 958d06a0d0f..dc15a2c227c 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt @@ -227,7 +227,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv1-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv1-d-transpose.pbtxt index 03ccf45ea84..3d3ee3c67bf 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv1-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv1-d-transpose.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv1-d.pbtxt index f41ff3f2136..23fb7bfc4eb 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt index 2370f378e57..a7eeb12ef04 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d.pbtxt index 82caa2a7b1d..9f4aa3cd95f 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt index e8adfd50fb2..a83cbc24972 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d.pbtxt index d49616f857c..7ccbb9a2694 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution1-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution1-d-transpose.pbtxt index 6332fcc5d4b..0733557f70d 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution1-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution1-d-transpose.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution1-d.pbtxt index 5c94f928d6b..71c2e77e7ff 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt index 0de86bcf16a..824bd8bbb2f 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d.pbtxt index a0f0ff2e7ba..ac9d5be1883 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt index c772d1243f4..ed63b8d98d4 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d.pbtxt index dc0637655e3..d00f7a5b396 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping1-d.pbtxt index 090f4055906..2ca122485d6 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping1-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping2-d.pbtxt index b010cfe1acd..1b69967a59e 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping2-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping2-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping3-d.pbtxt index 802627cc0ed..265f13b06bd 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping3-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping3-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense-features.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense-features.pbtxt index 29c3b428d23..c74a5868d98 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense-features.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense-features.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense.pbtxt index 90d398e8975..8370406e34d 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt index d754f8d1de3..554d7531912 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dot.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dot.pbtxt index 7189ec944a1..101719437d4 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dot.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dot.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dropout.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dropout.pbtxt index ecee07fae25..d441302523f 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dropout.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dropout.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-e-l-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-e-l-u.pbtxt index f24a778092b..a736e4c03fd 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-e-l-u.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-e-l-u.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-embedding.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-embedding.pbtxt index f9e1721669a..3f9002792d8 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-embedding.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-embedding.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-flatten.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-flatten.pbtxt index b76d54415c7..217f2701f3d 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-flatten.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-flatten.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u-cell.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u-cell.pbtxt index ac60b3f5b3f..84576890c14 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u-cell.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u-cell.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt index 6d8f5008f65..bf71821c303 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt @@ -212,7 +212,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-dropout.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-dropout.pbtxt index 1d3b1e73c55..1962f3284f0 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-dropout.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-dropout.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-noise.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-noise.pbtxt index 46caef8596c..64073b27c24 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-noise.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-noise.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt index 0afe2f7483f..73ed7f59394 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt index 8c001871ed6..2fb47e8a5a6 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt index c0e2d405e9a..e3ab2b5ab6d 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt index 785db4a7762..494e2247fbf 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt index c288f835aac..22e79311d67 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt index 060e6fd7d12..83f91393647 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt index fcf2cf4c0ac..d211683ae9c 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool2-d.pbtxt index 5a9fcc6db6c..2b9442dee85 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool3-d.pbtxt index 9d50d7e2d95..e02b42bdd0e 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt index b071143ab2b..60d2a947d87 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt index 49a548fc277..352527ea0f4 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt index f090677378d..a1ff2f402a7 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-input-layer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-input-layer.pbtxt index e7952bb23c9..d1811a28b55 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-input-layer.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-input-layer.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt index 3682005bee1..fb7ee25f8f6 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt index 56368eefc1b..fd5eb6f50ce 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt @@ -212,7 +212,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-lambda.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-lambda.pbtxt index 5673f4d01a1..c9f01c56606 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-lambda.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-lambda.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-layer-normalization.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-layer-normalization.pbtxt index 99eafcb8fe3..3b9306cdfe6 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-layer-normalization.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-layer-normalization.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-layer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-layer.pbtxt index bc84f3b51c1..03902ed1de4 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-layer.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-layer.pbtxt @@ -136,7 +136,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-leaky-re-l-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-leaky-re-l-u.pbtxt index 95c5d4bb39b..bf98a150184 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-leaky-re-l-u.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-leaky-re-l-u.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected1-d.pbtxt index b72424921a4..040230d63b3 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected1-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected2-d.pbtxt index 985c2274379..8d49e7a58a1 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected2-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected2-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-masking.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-masking.pbtxt index 65ae9893b11..485ae3b16ef 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-masking.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-masking.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt index 57c01d47d04..05050fdbffa 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool2-d.pbtxt index 88c1fb93eaf..8ae6a0ab43b 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool3-d.pbtxt index c5f18cb9a05..ae8aea28552 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt index db5eb02deee..94d2e0e6f6e 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling2-d.pbtxt index 45e68e0d94c..91b0b44ea50 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling3-d.pbtxt index 36ea8cbb6ae..587850f1d6c 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-maximum.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-maximum.pbtxt index 87afbb42ee7..ac97ca6e061 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-maximum.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-maximum.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-minimum.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-minimum.pbtxt index 1de712f45ae..7c8950ce3fa 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-minimum.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-minimum.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-multiply.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-multiply.pbtxt index 4502002d2c7..9ef978eeb3a 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-multiply.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-multiply.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-p-re-l-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-p-re-l-u.pbtxt index 1543f27fab8..19a48d77113 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-p-re-l-u.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-p-re-l-u.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-permute.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-permute.pbtxt index 47749db7b2a..03d5a2195cd 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-permute.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-permute.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt index 76bfe022b2b..c8c5b8326dd 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt @@ -141,7 +141,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-re-l-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-re-l-u.pbtxt index 48ac55d9b2e..84530e067b2 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-re-l-u.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-re-l-u.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-repeat-vector.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-repeat-vector.pbtxt index 71753b21d64..4de5b1c20d8 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-repeat-vector.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-repeat-vector.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-reshape.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-reshape.pbtxt index 8778dfa3f20..b9fcb027aaf 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-reshape.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-reshape.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv1-d.pbtxt index 1f7d7c0204d..5b6bff9dc5f 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv1-d.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv2-d.pbtxt index e301442b952..3fb3c032a3e 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv2-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv2-d.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution1-d.pbtxt index 6e52320dcb5..5387a8e5fc5 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution1-d.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution2-d.pbtxt index 16e10561154..de2d3eaaab4 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution2-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution2-d.pbtxt @@ -139,7 +139,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt index 8491504b3ab..80e17948612 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n.pbtxt index f651e06aa50..48e0c26b010 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n.pbtxt @@ -198,7 +198,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-softmax.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-softmax.pbtxt index 75737c0a415..db272bdf782 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-softmax.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-softmax.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt index e70f19ac2e5..0260221d093 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt index b5d4b00a220..ddad5641e76 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt index 52ace6ef50f..47e6ba9abfa 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt index 14f3ccf383d..5379da642ed 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt @@ -145,7 +145,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-subtract.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-subtract.pbtxt index f85acb2945a..1e070fb36db 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-subtract.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-subtract.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt index dccb48bce49..6d7724bdfe3 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-time-distributed.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-time-distributed.pbtxt index b02fba20dee..d740fc8de3a 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-time-distributed.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-time-distributed.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling1-d.pbtxt index 84f58bf535e..de377d9d2eb 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling1-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt index b78fcc18e08..e2ee7941662 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling3-d.pbtxt index 29851be0b77..8dd967cd3ce 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling3-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling3-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-wrapper.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-wrapper.pbtxt index da355d1142f..334463a4031 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-wrapper.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-wrapper.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding1-d.pbtxt index 3180441407e..ba4e58e3dfb 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding1-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding1-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding2-d.pbtxt index ecccc705e07..8538d903fba 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding2-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding2-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding3-d.pbtxt index fc93d9957d7..2b8681ae8cc 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding3-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding3-d.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt index 8e8684bab54..a2d7d285409 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.-einsum-dense.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.-random-fourier-features.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.-random-fourier-features.pbtxt index 9b4ddcd3f62..e24ca0dc01a 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.-random-fourier-features.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.-random-fourier-features.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.-sync-batch-normalization.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.-sync-batch-normalization.pbtxt index 70d57f09355..f34dce7b307 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.-sync-batch-normalization.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.-sync-batch-normalization.pbtxt @@ -138,7 +138,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt index 73592f11b8d..d48eb4ecc4a 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.pbtxt index 6874daac890..b4662d3c0e9 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.pbtxt @@ -143,7 +143,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt index b03ca3dc11c..819c6a693c9 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt index 10f5b9ae1aa..5f78e5637ea 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt index b29b32e1315..51f113d4e4a 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-integer-lookup.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-integer-lookup.pbtxt index ca87b769d6a..e4e24a25b7b 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-integer-lookup.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-integer-lookup.pbtxt @@ -144,7 +144,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt index 7045e8512ad..a58ffc1c2a5 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-normalization.pbtxt @@ -143,7 +143,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt index 095471dc12a..0fa0355b0f2 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.pbtxt @@ -141,7 +141,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-contrast.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-contrast.pbtxt index b9a330e8bcc..0f575454a80 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-contrast.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-contrast.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-crop.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-crop.pbtxt index f0f1bc45b0e..6425f6c6ed6 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-crop.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-crop.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-flip.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-flip.pbtxt index 8b1d7734d44..245723f1f56 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-flip.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-flip.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-height.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-height.pbtxt index 5a8f67f9487..e56ec9e2dad 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-height.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-height.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-rotation.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-rotation.pbtxt index 0938ec3d684..167fa775273 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-rotation.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-rotation.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-translation.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-translation.pbtxt index cfee60206f1..224417edfdd 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-translation.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-translation.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-width.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-width.pbtxt index 93b272d1105..48bcfbb94b9 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-width.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-width.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt index f0b50e92e66..570f3a3bc03 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt index 94b6c730699..fc759ce7ea6 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt index 30b20a1ac01..abcb2d4876a 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt @@ -137,7 +137,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-string-lookup.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-string-lookup.pbtxt index 516f2c08571..80da4a3df58 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-string-lookup.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-string-lookup.pbtxt @@ -144,7 +144,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt index 2106fa75996..4d5a28fc8b4 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.pbtxt @@ -143,7 +143,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt index ad0edc64606..4368742d7bb 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt @@ -158,7 +158,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt index b38c669df0f..8e9409f27a9 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt @@ -164,7 +164,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.-r-n-n-cell-device-wrapper.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.-r-n-n-cell-device-wrapper.pbtxt index e88c74bf97b..e0a352e79bf 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.nn.-r-n-n-cell-device-wrapper.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.-r-n-n-cell-device-wrapper.pbtxt @@ -148,7 +148,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.-r-n-n-cell-dropout-wrapper.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.-r-n-n-cell-dropout-wrapper.pbtxt index d44b2862408..d9a3159309d 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.nn.-r-n-n-cell-dropout-wrapper.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.-r-n-n-cell-dropout-wrapper.pbtxt @@ -152,7 +152,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.-r-n-n-cell-residual-wrapper.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.-r-n-n-cell-residual-wrapper.pbtxt index a514f8ceea5..cc3e1399eed 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.nn.-r-n-n-cell-residual-wrapper.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.-r-n-n-cell-residual-wrapper.pbtxt @@ -148,7 +148,7 @@ tf_class { } member_method { name: "add_weight" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " } member_method { name: "apply" From c401f122e1b5f54e9e8892c56ad90d19dab60eee Mon Sep 17 00:00:00 2001 From: Bruce Fontaine Date: Fri, 17 Jul 2020 16:49:20 -0700 Subject: [PATCH 0756/2522] Use TPUStrategyV2 for tpu_outside_compilation_test PiperOrigin-RevId: 321880845 Change-Id: I67ac2dc981ea1981c85856dd23b3b0f8f9abd634 --- tensorflow/python/tpu/tpu_outside_compilation_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/tpu/tpu_outside_compilation_test.py b/tensorflow/python/tpu/tpu_outside_compilation_test.py index 811807378e8..bdba57ca000 100644 --- a/tensorflow/python/tpu/tpu_outside_compilation_test.py +++ b/tensorflow/python/tpu/tpu_outside_compilation_test.py @@ -58,7 +58,7 @@ def get_tpu_strategy(): resolver = get_tpu_cluster_resolver() remote.connect_to_cluster(resolver) tpu_strategy_util.initialize_tpu_system(resolver) - return tpu_lib.TPUStrategy(resolver) + return tpu_lib.TPUStrategyV2(resolver) class TpuOutsideCompilationTest(test.TestCase, parameterized.TestCase): From b318f3f48c747debb903fed6aeb54293a26b4ac4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Jul 2020 16:53:37 -0700 Subject: [PATCH 0757/2522] Include Ops that are used via PartitionedCalls to MetaGraphDef.MetaInfoDef.stripped_op_list PiperOrigin-RevId: 321881535 Change-Id: I03c450833a421379d9d490ca974167d8264a0e60 --- tensorflow/python/framework/meta_graph.py | 9 ++++++-- .../python/framework/meta_graph_test.py | 23 +++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/framework/meta_graph.py b/tensorflow/python/framework/meta_graph.py index 327b476c576..dbc2a894d65 100644 --- a/tensorflow/python/framework/meta_graph.py +++ b/tensorflow/python/framework/meta_graph.py @@ -161,12 +161,17 @@ def ops_used_by_graph_def(graph_def): functions_to_process.append(name_to_function[op]) used_ops.add(op) - for node in graph_def.node: + def process_node(node): mark_op_as_used(node.op) + if node.op in ["PartitionedCall", "StatefulPartitionedCall"]: + mark_op_as_used(node.attr["f"].func.name) + + for node in graph_def.node: + process_node(node) while functions_to_process: fun = functions_to_process.pop() for node in fun.node_def: - mark_op_as_used(node.op) + process_node(node) return [op for op in used_ops if op not in name_to_function] diff --git a/tensorflow/python/framework/meta_graph_test.py b/tensorflow/python/framework/meta_graph_test.py index ae44fbce0f0..36acd81fe26 100644 --- a/tensorflow/python/framework/meta_graph_test.py +++ b/tensorflow/python/framework/meta_graph_test.py @@ -161,6 +161,29 @@ class SimpleMetaGraphTest(test.TestCase): op_list = meta_graph.stripped_op_list_for_graph(graph) self.assertEqual(["Const"], [op.name for op in op_list.op]) + def testStrippedOpListPartitionedCalls(self): + # Function A calls B via StatefulPartitionedCall. + graph = graph_pb2.GraphDef() + a = graph.library.function.add() + b = graph.library.function.add() + a.signature.name = "A" + b.signature.name = "B" + node_in_a = a.node_def.add() + node_in_a.op = "StatefulPartitionedCall" + node_in_a.attr["f"].func.name = "B" + b.node_def.add().op = "Const" + b.node_def.add().op = "A" + + # Use A in the graph via PartitionedCall. + node = graph.node.add() + node.op = "PartitionedCall" + node.attr["f"].func.name = "A" + + op_list = meta_graph.stripped_op_list_for_graph(graph) + self.assertSameElements( + ["Const", "PartitionedCall", "StatefulPartitionedCall"], + [op.name for op in op_list.op]) + @test_util.run_deprecated_v1 def testDefaultAttrStripping(self): """Verifies that default attributes are stripped from a graph def.""" From ac2a037cf78e5128c8b9b1a86665be77b93dd468 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Fri, 17 Jul 2020 16:58:47 -0700 Subject: [PATCH 0758/2522] Enable the ability to allocate TfLiteTensor structs from temp and persistent memory. Upcoming changes to memory allocations will remove the global TfLiteTensor allocation. This change prepares the allocator for internal adjustments to memory requests. When the class fully switches over to TfLiteEvalTensor, the TfLitePtrUnion data buffer will be used instead of the existing large allocation on TfLiteContext. PiperOrigin-RevId: 321882339 Change-Id: Ia33fe5f3f5f10bb5fce3f4a78fbc4e97a4021dae --- tensorflow/lite/micro/BUILD | 1 + tensorflow/lite/micro/micro_allocator.cc | 56 ++++++++++--- tensorflow/lite/micro/micro_allocator.h | 18 ++++- tensorflow/lite/micro/micro_allocator_test.cc | 78 ++++++++++++++++--- 4 files changed, 127 insertions(+), 26 deletions(-) diff --git a/tensorflow/lite/micro/BUILD b/tensorflow/lite/micro/BUILD index d66b297508b..feaf57d5d2d 100644 --- a/tensorflow/lite/micro/BUILD +++ b/tensorflow/lite/micro/BUILD @@ -297,6 +297,7 @@ tflite_micro_cc_test( ":micro_framework", ":test_helpers", "//tensorflow/lite/micro/testing:micro_test", + "//tensorflow/lite/micro/testing:test_conv_model", ], ) diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc index c045323ff4a..173640c3e90 100644 --- a/tensorflow/lite/micro/micro_allocator.cc +++ b/tensorflow/lite/micro/micro_allocator.cc @@ -385,6 +385,8 @@ namespace internal { // Handles architecture safe mapping of flatbuffer vectors to a TfLite*Array // struct. Matching types are required (e.g. float and TfLiteFloatArray). +// Big-endian systems will always allocate dimension array data in the tail +// (persistent) section. template TfLiteStatus FlatBufferVectorToTfLiteTypeArray( SimpleMemoryAllocator* allocator, ErrorReporter* error_reporter, @@ -426,7 +428,8 @@ TfLiteStatus FlatBufferVectorToTfLiteTypeArray( } TfLiteStatus InitializeTfLiteTensorFromFlatbuffer( - SimpleMemoryAllocator* allocator, const tflite::Tensor& flatbuffer_tensor, + SimpleMemoryAllocator* allocator, bool allocate_temp, + const tflite::Tensor& flatbuffer_tensor, const flatbuffers::Vector>* buffers, ErrorReporter* error_reporter, TfLiteTensor* result) { *result = {}; @@ -508,9 +511,14 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer( // Populate per-channel quantization params. int channels = src_quantization->scale()->size(); TfLiteAffineQuantization* quantization = - reinterpret_cast( - allocator->AllocateFromTail(sizeof(TfLiteAffineQuantization), - alignof(TfLiteAffineQuantization))); + allocate_temp + ? reinterpret_cast( + allocator->AllocateTemp(sizeof(TfLiteAffineQuantization), + alignof(TfLiteAffineQuantization))) + : reinterpret_cast( + allocator->AllocateFromTail( + sizeof(TfLiteAffineQuantization), + alignof(TfLiteAffineQuantization))); if (quantization == nullptr) { TF_LITE_REPORT_ERROR(error_reporter, "Unable to allocate TfLiteAffineQuantization.\n"); @@ -521,8 +529,13 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer( // buffer. This value can not be reused from the flatbuffer since the // zero_point is stored as a int64_t. quantization->zero_point = - reinterpret_cast(allocator->AllocateFromTail( - TfLiteIntArrayGetSizeInBytes(channels), alignof(TfLiteIntArray))); + allocate_temp + ? reinterpret_cast(allocator->AllocateFromTail( + TfLiteIntArrayGetSizeInBytes(channels), + alignof(TfLiteIntArray))) + : reinterpret_cast(allocator->AllocateFromTail( + TfLiteIntArrayGetSizeInBytes(channels), + alignof(TfLiteIntArray))); if (quantization->zero_point == nullptr) { TF_LITE_REPORT_ERROR(error_reporter, "Unable to allocate quantization->zero_point.\n"); @@ -719,8 +732,9 @@ TfLiteStatus MicroAllocator::PopulateTfLiteTensorArrayFromFlatbuffer( // Initialize tensors in context_ using the flatbuffer for quantization data. for (size_t i = 0; i < subgraph->tensors()->size(); ++i) { TfLiteStatus status = internal::InitializeTfLiteTensorFromFlatbuffer( - memory_allocator_, *subgraph->tensors()->Get(i), model->buffers(), - error_reporter_, &context->tensors[i]); + memory_allocator_, /*allocate_temp=*/false, + *subgraph->tensors()->Get(i), model->buffers(), error_reporter_, + &context->tensors[i]); if (status != kTfLiteOk) { TF_LITE_REPORT_ERROR(error_reporter_, "Failed to initialize tensor %d", i); @@ -833,8 +847,25 @@ TfLiteStatus MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer( return kTfLiteOk; } -TfLiteTensor* MicroAllocator::AllocateTfLiteTensor(const Model* model, - int subgraph_idx) { +TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensor(const Model* model, + int tensor_index) { + const SubGraph* subgraph = GetSubGraphFromModel(model); + TFLITE_DCHECK(subgraph != nullptr); + + // This value is allocated from persistent arena space. It is guaranteed to be + // around for the lifetime of the application. + TfLiteTensor* tensor = + reinterpret_cast(memory_allocator_->AllocateFromTail( + sizeof(TfLiteTensor), alignof(TfLiteTensor))); + internal::InitializeTfLiteTensorFromFlatbuffer( + memory_allocator_, /*allocate_temp=*/false, + *subgraph->tensors()->Get(tensor_index), model->buffers(), + error_reporter_, tensor); + return tensor; +} + +TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor(const Model* model, + int tensor_index) { const SubGraph* subgraph = GetSubGraphFromModel(model); TFLITE_DCHECK(subgraph != nullptr); @@ -845,8 +876,9 @@ TfLiteTensor* MicroAllocator::AllocateTfLiteTensor(const Model* model, reinterpret_cast(memory_allocator_->AllocateTemp( sizeof(TfLiteTensor), alignof(TfLiteTensor))); internal::InitializeTfLiteTensorFromFlatbuffer( - memory_allocator_, *subgraph->tensors()->Get(subgraph_idx), - model->buffers(), error_reporter_, tensor); + memory_allocator_, /*allocate_temp=*/true, + *subgraph->tensors()->Get(tensor_index), model->buffers(), + error_reporter_, tensor); return tensor; } diff --git a/tensorflow/lite/micro/micro_allocator.h b/tensorflow/lite/micro/micro_allocator.h index b38b936929b..58e274f4efe 100644 --- a/tensorflow/lite/micro/micro_allocator.h +++ b/tensorflow/lite/micro/micro_allocator.h @@ -33,8 +33,13 @@ namespace internal { // Sets up all of the data structure members for a TfLiteTensor based on the // contents of a serialized tensor in the flatbuffer. +// TODO(b/160894903): Once all kernels have been updated to the new +// TfLiteEvalTensor API - drop the allocate_temp flag. This enables internal +// flatbuffer quantization or dimension allocations to take place in either the +// temp or tail section of the arena. TfLiteStatus InitializeTfLiteTensorFromFlatbuffer( - SimpleMemoryAllocator* allocator, const tflite::Tensor& flatbuffer_tensor, + SimpleMemoryAllocator* allocator, bool allocate_temp, + const tflite::Tensor& flatbuffer_tensor, const flatbuffers::Vector>* buffers, ErrorReporter* error_reporter, TfLiteTensor* result); @@ -119,12 +124,19 @@ class MicroAllocator { TfLiteStatus FinishModelAllocation(const Model* model, TfLiteContext* context); + // Allocates a TfLiteTensor struct and populates the returned value with + // properties from the model flatbuffer. This struct is allocated from + // persistent arena memory is only guaranteed for the lifetime of the + // application. + virtual TfLiteTensor* AllocatePersistentTfLiteTensor(const Model* model, + int tensor_index); + // Allocates a TfLiteTensor struct and populates the returned value with // properties from the model flatbuffer. This struct is allocated from // temporary arena memory is only guaranteed until a call is made to // ResetTempAllocations(). - virtual TfLiteTensor* AllocateTfLiteTensor(const Model* model, - int subgraph_idx); + virtual TfLiteTensor* AllocateTempTfLiteTensor(const Model* model, + int tensor_index); // Resets all temporary allocations. This method should be called after a // chain of temp allocations (e.g. chain of TfLiteTensor objects via diff --git a/tensorflow/lite/micro/micro_allocator_test.cc b/tensorflow/lite/micro/micro_allocator_test.cc index 67da95c3b0a..f9348cc7425 100644 --- a/tensorflow/lite/micro/micro_allocator_test.cc +++ b/tensorflow/lite/micro/micro_allocator_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/lite/micro/simple_memory_allocator.h" #include "tensorflow/lite/micro/test_helpers.h" #include "tensorflow/lite/micro/testing/micro_test.h" +#include "tensorflow/lite/micro/testing/test_conv_model.h" namespace tflite { namespace testing { @@ -86,8 +87,8 @@ TF_LITE_MICRO_TEST(TestInitializeRuntimeTensor) { TfLiteTensor allocated_tensor; TF_LITE_MICRO_EXPECT_EQ( kTfLiteOk, tflite::internal::InitializeTfLiteTensorFromFlatbuffer( - simple_allocator, *tensor, buffers, micro_test::reporter, - &allocated_tensor)); + simple_allocator, /*allocate_temp=*/false, *tensor, + buffers, micro_test::reporter, &allocated_tensor)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, allocated_tensor.type); TF_LITE_MICRO_EXPECT_EQ(1, allocated_tensor.dims->size); TF_LITE_MICRO_EXPECT_EQ(100, allocated_tensor.dims->data[0]); @@ -98,6 +99,36 @@ TF_LITE_MICRO_TEST(TestInitializeRuntimeTensor) { simple_allocator->~SimpleMemoryAllocator(); } +// TODO(b/160894903): Drop this test when InitializeTfLiteTensorFromFlatbuffer() +// always allocates from temp (kernels are using the new TfLiteEvalTensor API): +TF_LITE_MICRO_TEST(TestInitializeTempRuntimeTensor) { + constexpr size_t arena_size = 1024; + uint8_t arena[arena_size]; + tflite::SimpleMemoryAllocator* simple_allocator = + tflite::SimpleMemoryAllocator::Create(micro_test::reporter, arena, + arena_size); + + const tflite::Tensor* tensor = tflite::testing::Create1dFlatbufferTensor(100); + const flatbuffers::Vector>* buffers = + tflite::testing::CreateFlatbufferBuffers(); + + TfLiteTensor allocated_temp_tensor; + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, tflite::internal::InitializeTfLiteTensorFromFlatbuffer( + simple_allocator, /*allocate_temp=*/true, *tensor, buffers, + micro_test::reporter, &allocated_temp_tensor)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, allocated_temp_tensor.type); + TF_LITE_MICRO_EXPECT_EQ(1, allocated_temp_tensor.dims->size); + TF_LITE_MICRO_EXPECT_EQ(100, allocated_temp_tensor.dims->data[0]); + TF_LITE_MICRO_EXPECT_EQ(static_cast(400), + allocated_temp_tensor.bytes); + TF_LITE_MICRO_EXPECT(nullptr == allocated_temp_tensor.data.i32); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteArenaRw, + allocated_temp_tensor.allocation_type); + + simple_allocator->~SimpleMemoryAllocator(); +} + TF_LITE_MICRO_TEST(TestInitializeQuantizedTensor) { constexpr size_t arena_size = 1024; uint8_t arena[arena_size]; @@ -113,8 +144,8 @@ TF_LITE_MICRO_TEST(TestInitializeQuantizedTensor) { TfLiteTensor allocated_tensor; TF_LITE_MICRO_EXPECT_EQ( kTfLiteOk, tflite::internal::InitializeTfLiteTensorFromFlatbuffer( - simple_allocator, *tensor, buffers, micro_test::reporter, - &allocated_tensor)); + simple_allocator, /*allocate_temp=*/false, *tensor, + buffers, micro_test::reporter, &allocated_tensor)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, allocated_tensor.type); TF_LITE_MICRO_EXPECT_EQ(1, allocated_tensor.dims->size); TF_LITE_MICRO_EXPECT_EQ(100, allocated_tensor.dims->data[0]); @@ -140,8 +171,8 @@ TF_LITE_MICRO_TEST(TestMissingQuantization) { TfLiteTensor allocated_tensor; TF_LITE_MICRO_EXPECT_EQ( kTfLiteOk, tflite::internal::InitializeTfLiteTensorFromFlatbuffer( - simple_allocator, *tensor, buffers, micro_test::reporter, - &allocated_tensor)); + simple_allocator, /*allocate_temp=*/false, *tensor, + buffers, micro_test::reporter, &allocated_tensor)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, allocated_tensor.type); TF_LITE_MICRO_EXPECT_EQ(1, allocated_tensor.dims->size); TF_LITE_MICRO_EXPECT_EQ(100, allocated_tensor.dims->data[0]); @@ -515,6 +546,31 @@ TF_LITE_MICRO_TEST(OfflinePlannerOfflineOnline) { TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start); } +TF_LITE_MICRO_TEST(TestAllocatePersistentTfLiteTensor) { + const tflite::Model* model = tflite::GetModel(kTestConvModelData); + constexpr size_t arena_size = 1024 * 12; + uint8_t arena[arena_size]; + tflite::MicroAllocator* allocator = + tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); + TF_LITE_MICRO_EXPECT_NE(allocator, nullptr); + + TfLiteTensor* tensor1 = + allocator->AllocatePersistentTfLiteTensor(model, /*tensor_index=*/1); + TF_LITE_MICRO_EXPECT_NE(tensor1, nullptr); + TF_LITE_MICRO_EXPECT_NE(tensor1->quantization.params, nullptr); + TF_LITE_MICRO_EXPECT_FALSE(tensor1->is_variable); + + TfLiteTensor* tensor2 = + allocator->AllocatePersistentTfLiteTensor(model, /*tensor_index=*/2); + TF_LITE_MICRO_EXPECT_NE(tensor2, nullptr); + TF_LITE_MICRO_EXPECT_NE(tensor2->quantization.params, nullptr); + TF_LITE_MICRO_EXPECT_FALSE(tensor2->is_variable); + + // The address of tensor1 should be higher than the address of tensor2 since + // persistent allocations take place in the tail which grows downward. + TF_LITE_MICRO_EXPECT_GT(tensor1, tensor2); +} + TF_LITE_MICRO_TEST(TestAllocateSingleTfLiteTensor) { const tflite::Model* model = tflite::testing::GetSimpleMockModel(); constexpr size_t arena_size = 1024; @@ -524,7 +580,7 @@ TF_LITE_MICRO_TEST(TestAllocateSingleTfLiteTensor) { TF_LITE_MICRO_EXPECT_NE(allocator, nullptr); TfLiteTensor* tensor1 = - allocator->AllocateTfLiteTensor(model, /*subgraph_idx=*/1); + allocator->AllocateTempTfLiteTensor(model, /*tensor_index=*/1); TF_LITE_MICRO_EXPECT_NE(tensor1, nullptr); } @@ -537,11 +593,11 @@ TF_LITE_MICRO_TEST(TestAllocateChainOfTfLiteTensor) { TF_LITE_MICRO_EXPECT_NE(allocator, nullptr); TfLiteTensor* tensor1 = - allocator->AllocateTfLiteTensor(model, /*subgraph_idx=*/1); + allocator->AllocateTempTfLiteTensor(model, /*tensor_index=*/1); TF_LITE_MICRO_EXPECT_NE(tensor1, nullptr); TfLiteTensor* tensor2 = - allocator->AllocateTfLiteTensor(model, /*subgraph_idx=*/2); + allocator->AllocateTempTfLiteTensor(model, /*tensor_index=*/3); TF_LITE_MICRO_EXPECT_NE(tensor1, nullptr); // The address of tensor2 should be higher than the address of tensor1 @@ -558,13 +614,13 @@ TF_LITE_MICRO_TEST(TestAllocateTfLiteTensorWithReset) { TF_LITE_MICRO_EXPECT(allocator != nullptr); TfLiteTensor* tensor1 = - allocator->AllocateTfLiteTensor(model, /*subgraph_idx=*/1); + allocator->AllocateTempTfLiteTensor(model, /*tensor_index=*/1); TF_LITE_MICRO_EXPECT(tensor1 != nullptr); allocator->ResetTempAllocations(); TfLiteTensor* tensor2 = - allocator->AllocateTfLiteTensor(model, /*subgraph_idx=*/2); + allocator->AllocateTempTfLiteTensor(model, /*tensor_index=*/2); TF_LITE_MICRO_EXPECT(tensor1 != nullptr); // The address of tensor2 should be equal than the address of tensor1 since From 5605ee70c78cc5f35acd948548de07789bf925ad Mon Sep 17 00:00:00 2001 From: Chenkai Kuang Date: Fri, 17 Jul 2020 16:59:03 -0700 Subject: [PATCH 0759/2522] Doc fixit of tf.distribute.ReplicaContext: class doc string and attributes. PiperOrigin-RevId: 321882389 Change-Id: Ibd70e1bc29b030bb6422308876ba383ed864d179 --- .../python/distribute/distribute_lib.py | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/distribute/distribute_lib.py b/tensorflow/python/distribute/distribute_lib.py index 216f8a6ff66..41ec725d3ab 100644 --- a/tensorflow/python/distribute/distribute_lib.py +++ b/tensorflow/python/distribute/distribute_lib.py @@ -2827,11 +2827,21 @@ class StrategyExtendedV1(StrategyExtendedV2): # and switches the thread mode to a "cross-replica context". @tf_export("distribute.ReplicaContext") class ReplicaContext(object): - """`tf.distribute.Strategy` API when in a replica context. + """A class with a collection of APIs that can be called in a replica context. You can use `tf.distribute.get_replica_context` to get an instance of - `ReplicaContext`. This should be inside your replicated step function, such - as in a `tf.distribute.Strategy.run` call. + `ReplicaContext`, which can only be called inside the function passed to + `tf.distribute.Strategy.run`. + + >>> strategy = tf.distribute.MirroredStrategy(['GPU:0', 'GPU:1']) + >>> def func(): + ... replica_context = tf.distribute.get_replica_context() + ... return replica_context.replica_id_in_sync_group + >>> strategy.run(func) + PerReplica:{ + 0: , + 1: + } """ def __init__(self, strategy, replica_id_in_sync_group): @@ -2906,16 +2916,16 @@ class ReplicaContext(object): @property def num_replicas_in_sync(self): - """Returns number of replicas over which gradients are aggregated.""" + """Returns number of replicas that are kept in sync.""" return self._strategy.num_replicas_in_sync @property def replica_id_in_sync_group(self): - """Returns the id of the replica being defined. + """Returns the id of the replica. - This identifies the replica that is part of a sync group. Currently we - assume that all sync groups contain the same number of replicas. The value - of the replica id can range from 0 to `num_replica_in_sync` - 1. + This identifies the replica among all replicas that are kept in sync. The + value of the replica id can range from 0 to + `tf.distribute.ReplicaContext.num_replicas_in_sync` - 1. NOTE: This is not guaranteed to be the same ID as the XLA replica ID use for low-level operations such as collective_permute. From c68614a2bd8a8797e9f7d911b18b97c28c5918f1 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Fri, 17 Jul 2020 16:59:51 -0700 Subject: [PATCH 0760/2522] Fix bug in TensorStridedSliceAssign where forwarding inputs forgets to set outputs. Also adds the first unit test of tensor_strided_slice_assign... PiperOrigin-RevId: 321882495 Change-Id: I36867a942aea8af70954eb538d6b2629f9f1f3ee --- tensorflow/core/kernels/strided_slice_op.cc | 20 ++----------------- .../python/kernel_tests/array_ops_test.py | 8 ++++++++ 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc index b4099213303..fc08fa8ff7d 100644 --- a/tensorflow/core/kernels/strided_slice_op.cc +++ b/tensorflow/core/kernels/strided_slice_op.cc @@ -305,25 +305,9 @@ class StridedSliceAssignOp : public OpKernel { Tensor tmp; if (isTensor) { const Tensor& input = context->input(0); - TensorShape shape = input.shape(); - std::unique_ptr forwarded_input = context->forward_input( - 0, 0, input.dtype(), shape, DEVICE_MEMORY, AllocatorAttributes()); - - if (forwarded_input == nullptr) { - Tensor* out; - // We were not able to forward the input, so we deep copy the tensor and - // set the output. - OP_REQUIRES_OK(context, - context->allocate_output(0, input.shape(), &out)); - - OP_REQUIRES_OK(context, - tensorflow::functor::DoCopy( - context->eigen_device(), input, out)); - old_lhs = out; - } else { - old_lhs = forwarded_input.get(); - } + OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( + {0}, 0, input.shape(), &old_lhs)); } else { if (context->input_dtype(0) == DT_RESOURCE) { core::RefCountPtr v; diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index af2b28a1033..829be7ffe45 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -1227,6 +1227,14 @@ class SliceAssignTest(test_util.TensorFlowTestCase): with self.assertRaises(ValueError): sess.run(v[:].assign(too_small_val)) + @test_util.run_in_graph_and_eager_modes + def testTensorStridedSliceAssign(self): + @def_function.function + def assign(x): + y = x + 1 + return gen_array_ops.tensor_strided_slice_update(y, [0], [1], [1], [0]) + self.assertAllEqual([0, 1], self.evaluate(assign(array_ops.zeros([2])))) + class ShapeSizeRankTest(test_util.TensorFlowTestCase): From 9d77b8e74dae8527b20e198fdf1035e3ad81d1be Mon Sep 17 00:00:00 2001 From: Jonathan Chu Date: Sat, 18 Jul 2020 00:32:54 +0000 Subject: [PATCH 0761/2522] Add minor comment to test PR --- tensorflow/python/eager/function.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 709ecaa37e6..43717ebf2f4 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -2163,6 +2163,7 @@ class ConcreteFunction(object): Returns: The actual call output. """ + # TODO: implement in C++ if self._func_graph.structured_outputs is None: return result From 2a9fc6d9cba7068a507ec85a2112e953e464bf60 Mon Sep 17 00:00:00 2001 From: Robert David Date: Fri, 17 Jul 2020 17:32:14 -0700 Subject: [PATCH 0762/2522] Remove default parameter for asymmetric_quantize_inputs. PiperOrigin-RevId: 321886805 Change-Id: I373b6b1ca3cae0b7da65d7c2375e1032f1b194a8 --- tensorflow/lite/kernels/lstm_test.cc | 129 ++++++++++++++------------- 1 file changed, 65 insertions(+), 64 deletions(-) diff --git a/tensorflow/lite/kernels/lstm_test.cc b/tensorflow/lite/kernels/lstm_test.cc index a9023dce371..1a42d637c08 100644 --- a/tensorflow/lite/kernels/lstm_test.cc +++ b/tensorflow/lite/kernels/lstm_test.cc @@ -41,7 +41,7 @@ class LSTMOpModel : public SingleOpModel { bool use_projection_bias, float cell_clip, float proj_clip, const std::vector>& input_shapes, const TensorType weight_type, bool is_layer_norm, - bool asymmetric_quantize_inputs = false) + bool asymmetric_quantize_inputs) : n_batch_(n_batch), n_input_(n_input), n_cell_(n_cell), @@ -504,8 +504,8 @@ TEST_F(NoCifgNoPeepholeNoProjectionNoClippingLstmTest, LstmBlackBoxTest) { {0, 0}, // projection_weight tensor {0}, // projection_bias tensor }, - /*weight_type=*/TensorType_FLOAT32, - /*is_layer_norm=*/false); + /*weight_type=*/TensorType_FLOAT32, /*is_layer_norm=*/false, + /*asymmetric_quantize_inputs=*/false); VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm); } @@ -559,8 +559,8 @@ TEST_F(NoCifgNoPeepholeNoProjectionNoClippingOmittedLayerNormLstmTest, {0}, // cell_layer_norm_coefficient tensor {0}, // output_layer_norm_coefficient tensor }, - /*weight_type=*/TensorType_FLOAT32, - /*is_layer_norm=*/true); + /*weight_type=*/TensorType_FLOAT32, /*is_layer_norm=*/true, + /*asymmetric_quantize_inputs=*/false); VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm); } @@ -607,8 +607,8 @@ TEST_P(NoCifgNoPeepholeNoProjectionNoClippingLstmTest, {0, 0}, // projection_weight tensor {0}, // projection_bias tensor }, - /*weight_type=*/TensorType_UINT8, - /*is_layer_norm=*/false, GetParam()); + /*weight_type=*/TensorType_UINT8, /*is_layer_norm=*/false, + /*asymmetric_quantize_inputs=*/GetParam()); VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/0.0157651); @@ -658,8 +658,8 @@ TEST_P(NoCifgNoPeepholeNoProjectionNoClippingLstmInt8Test, {0, 0}, // projection_weight tensor {0}, // projection_bias tensor }, - /*weight_type=*/TensorType_INT8, - /*is_layer_norm=*/false, GetParam()); + /*weight_type=*/TensorType_INT8, /*is_layer_norm=*/false, + /*asymmetric_quantize_inputs=*/GetParam()); VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/0.0157651); @@ -749,8 +749,8 @@ TEST_F(CifgNoPeepholeNoProjectionNoClippingLstmTest, LstmBlackBoxTest) { {0, 0}, // projection_weight tensor {0}, // projection_bias tensor }, - /*weight_type=*/TensorType_FLOAT32, - /*is_layer_norm=*/false); + /*weight_type=*/TensorType_FLOAT32, /*is_layer_norm=*/false, + /*asymmetric_quantize_inputs=*/false); VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm); } @@ -797,8 +797,8 @@ TEST_P(CifgNoPeepholeNoProjectionNoClippingLstmTest, {0, 0}, // projection_weight tensor {0}, // projection_bias tensor }, - /*weight_type=*/TensorType_UINT8, - /*is_layer_norm=*/false, GetParam()); + /*weight_type=*/TensorType_UINT8, /*is_layer_norm=*/false, + /*asymmetric_quantize_inputs=*/GetParam()); VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/0.03573); } @@ -846,8 +846,8 @@ TEST_P(CifgNoPeepholeNoProjectionNoClippingLstmInt8Test, {0, 0}, // projection_weight tensor {0}, // projection_bias tensor }, - /*weight_type=*/TensorType_INT8, - /*is_layer_norm=*/false, GetParam()); + /*weight_type=*/TensorType_INT8, /*is_layer_norm=*/false, + /*asymmetric_quantize_inputs=*/GetParam()); VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/0.03573); } @@ -1487,8 +1487,8 @@ TEST_F(NoCifgPeepholeProjectionNoClippingLstmTest, LstmBlackBoxTest) { {n_output, n_cell}, // projection_weight tensor {0}, // projection_bias tensor }, - /*weight_type=*/TensorType_FLOAT32, - /*is_layer_norm=*/false); + /*weight_type=*/TensorType_FLOAT32, /*is_layer_norm=*/false, + /*asymmetric_quantize_inputs=*/false); VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm); } @@ -1534,8 +1534,8 @@ TEST_P(NoCifgPeepholeProjectionNoClippingLstmTest, {n_output, n_cell}, // projection_weight tensor {0}, // projection_bias tensor }, - /*weight_type=*/TensorType_UINT8, - /*is_layer_norm=*/false, GetParam()); + /*weight_type=*/TensorType_UINT8, /*is_layer_norm=*/false, + /*asymmetric_quantize_inputs=*/GetParam()); VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/0.00467); } @@ -1583,8 +1583,8 @@ TEST_P(NoCifgPeepholeProjectionNoClippingLstmInt8Test, {n_output, n_cell}, // projection_weight tensor {0}, // projection_bias tensor }, - /*weight_type=*/TensorType_INT8, - /*is_layer_norm=*/false, GetParam()); + /*weight_type=*/TensorType_INT8, /*is_layer_norm=*/false, + /*asymmetric_quantize_inputs=*/GetParam()); VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/0.0015); } @@ -1703,8 +1703,8 @@ TEST_F(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest, {n_cell}, // cell_layer_norm_coefficient tensor {n_cell}, // output_layer_norm_coefficient tensor }, - /*weight_type=*/TensorType_FLOAT32, - /*is_layer_norm=*/true); + /*weight_type=*/TensorType_FLOAT32, /*is_layer_norm=*/true, + /*asymmetric_quantize_inputs=*/false); // Verify the final output. lstm_golden_output_ = {{ @@ -1774,8 +1774,8 @@ TEST_P(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest, {n_cell}, // cell_layer_norm_coefficient tensor {n_cell}, // output_layer_norm_coefficient tensor }, - /*weight_type=*/TensorType_UINT8, - /*is_layer_norm=*/true, GetParam()); + /*weight_type=*/TensorType_UINT8, /*is_layer_norm=*/true, + /*asymmetric_quantize_inputs=*/GetParam()); lstm_golden_output_ = {{ // Batch0: 3 (input_sequence_size) * 3 (n_output) @@ -1847,8 +1847,8 @@ TEST_P(NoCifgPeepholeProjectionNoClippingLayerNormLstmInt8Test, {n_cell}, // cell_layer_norm_coefficient tensor {n_cell}, // output_layer_norm_coefficient tensor }, - /*weight_type=*/TensorType_INT8, - /*is_layer_norm=*/true, GetParam()); + /*weight_type=*/TensorType_INT8, /*is_layer_norm=*/true, + /*asymmetric_quantize_inputs=*/GetParam()); // Goldens are calculated from weight_type=TensorType_FLOAT32. lstm_golden_output_ = {{ @@ -1961,8 +1961,8 @@ TEST_F(CifgPeepholeProjectionNoClippingLayerNormLstmTest, {n_cell}, // cell_layer_norm_coefficient tensor {n_cell}, // output_layer_norm_coefficient tensor }, - /*weight_type=*/TensorType_FLOAT32, - /*is_layer_norm=*/true); + /*weight_type=*/TensorType_FLOAT32, /*is_layer_norm=*/true, + /*asymmetric_quantize_inputs=*/false); // Verify the final output. lstm_golden_output_ = { @@ -2032,8 +2032,8 @@ TEST_P(CifgPeepholeProjectionNoClippingLayerNormLstmTest, {n_cell}, // cell_layer_norm_coefficient tensor {n_cell}, // output_layer_norm_coefficient tensor }, - /*weight_type=*/TensorType_UINT8, - /*is_layer_norm=*/true, GetParam()); + /*weight_type=*/TensorType_UINT8, /*is_layer_norm=*/true, + /*asymmetric_quantize_inputs=*/GetParam()); // Verify the final output. lstm_golden_output_ = { @@ -2104,8 +2104,8 @@ TEST_P(CifgPeepholeProjectionNoClippingLayerNormLstmInt8Test, {n_cell}, // cell_layer_norm_coefficient tensor {n_cell}, // output_layer_norm_coefficient tensor }, - /*weight_type=*/TensorType_INT8, - /*is_layer_norm=*/true, GetParam()); + /*weight_type=*/TensorType_INT8, /*is_layer_norm=*/true, + /*asymmetric_quantize_inputs=*/GetParam()); // Goldens are results using FLOAT32 inference. lstm_golden_output_ = {{ @@ -3309,44 +3309,45 @@ TEST(LSTMOpModel, InvalidTypeTest) { {0, 0}, // projection_weight tensor {0}, // projection_bias tensor }, - /*weight_type=*/TensorType_INT32, - /*is_layer_norm=*/false), + /*weight_type=*/TensorType_INT32, /*is_layer_norm=*/false, + /*asymmetric_quantize_inputs=*/false), ""); - EXPECT_DEATH(LSTMOpModel lstm( - n_batch, n_input, n_cell, n_output, - /*use_cifg=*/false, /*use_peephole=*/false, - /*use_projection_weights=*/false, - /*use_projection_bias=*/false, - /*cell_clip=*/0.0, /*proj_clip=*/0.0, - { - {n_batch, n_input}, // input tensor + EXPECT_DEATH( + LSTMOpModel lstm( + n_batch, n_input, n_cell, n_output, + /*use_cifg=*/false, /*use_peephole=*/false, + /*use_projection_weights=*/false, + /*use_projection_bias=*/false, + /*cell_clip=*/0.0, /*proj_clip=*/0.0, + { + {n_batch, n_input}, // input tensor - {n_cell, n_input}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor + {n_cell, n_input}, // input_to_input_weight tensor + {n_cell, n_input}, // input_to_forget_weight tensor + {n_cell, n_input}, // input_to_cell_weight tensor + {n_cell, n_input}, // input_to_output_weight tensor - {n_cell, n_output}, // recurrent_to_input_weight_tensor - {n_cell, n_output}, // recurrent_to_forget_weight_tensor - {n_cell, n_output}, // recurrent_to_cell_weight_tensor - {n_cell, n_output}, // recurrent_to_output_weight_tensor + {n_cell, n_output}, // recurrent_to_input_weight_tensor + {n_cell, n_output}, // recurrent_to_forget_weight_tensor + {n_cell, n_output}, // recurrent_to_cell_weight_tensor + {n_cell, n_output}, // recurrent_to_output_weight_tensor - {0}, // cell_to_input_weight tensor - {0}, // cell_to_forget_weight tensor - {0}, // cell_to_output_weight tensor + {0}, // cell_to_input_weight tensor + {0}, // cell_to_forget_weight tensor + {0}, // cell_to_output_weight tensor - {n_cell}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor + {n_cell}, // input_gate_bias tensor + {n_cell}, // forget_gate_bias tensor + {n_cell}, // cell_gate_bias tensor + {n_cell}, // output_gate_bias tensor - {0, 0}, // projection_weight tensor - {0}, // projection_bias tensor - }, - /*weight_type=*/TensorType_COMPLEX64, - /*is_layer_norm=*/false), - ""); + {0, 0}, // projection_weight tensor + {0}, // projection_bias tensor + }, + /*weight_type=*/TensorType_COMPLEX64, /*is_layer_norm=*/false, + /*asymmetric_quantize_inputs=*/false), + ""); } #endif From 1cbaa0c469c5e13ecd8ee155d5c975f5f1a2b23e Mon Sep 17 00:00:00 2001 From: Stella Laurenzo Date: Fri, 17 Jul 2020 17:37:01 -0700 Subject: [PATCH 0763/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/cf5df40c4cf1 PiperOrigin-RevId: 321887282 Change-Id: I44f02ccce148fef6dae76c65077b85306ece8005 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 34cc9be490c..716af27ff12 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "5d06e8b24f97202764c7522efcfb6e2febdce6b7" - LLVM_SHA256 = "72d8a43ecfca1f0a90e9e7a7a42d4658fe385780d5603105de511edc8228f174" + LLVM_COMMIT = "cf5df40c4cf1a53a02ab1d56a488642e3dda8f6d" + LLVM_SHA256 = "64abb1a7b30578b81e5d15469b8437cbb7e16847444e258e71e07247983cb63b" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 19448cf8b9dba7edc6905d822250bb51193057fe Mon Sep 17 00:00:00 2001 From: Wenhao Jia Date: Fri, 17 Jul 2020 17:38:21 -0700 Subject: [PATCH 0764/2522] Add a TPU execution op. PiperOrigin-RevId: 321887408 Change-Id: I8f4686189e98c28da00f56f7530a95bce2a2f17c --- tensorflow/core/tpu/kernels/BUILD | 41 + tensorflow/core/tpu/kernels/tpu_execute_op.cc | 809 ++++++++++++++++++ tensorflow/core/tpu/kernels/tpu_execute_op.h | 67 ++ 3 files changed, 917 insertions(+) create mode 100644 tensorflow/core/tpu/kernels/tpu_execute_op.cc create mode 100644 tensorflow/core/tpu/kernels/tpu_execute_op.h diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 7a6160a2963..af7c9ead791 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -553,3 +553,44 @@ cc_library( ], alwayslink = 1, ) + +cc_library( + name = "tpu_execute_op", + srcs = ["tpu_execute_op.cc"], + hdrs = ["tpu_execute_op.h"], + deps = [ + ":tpu_compilation_cache_entry", + ":tpu_compilation_cache_external", + ":tpu_compilation_cache_local_lookup", + ":tpu_compilation_cache_lookup", + ":tpu_executable_info_proto_cc", + ":tpu_op_consts", + "//tensorflow/compiler/jit:xla_device", + "//tensorflow/compiler/jit:xla_launch_util", + "//tensorflow/compiler/jit:xla_tensor", + "//tensorflow/compiler/tf2xla:common", + "//tensorflow/compiler/tf2xla:tf2xla_util", + "//tensorflow/compiler/xla:debug_options_flags", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:xla_data_proto_cc", + "//tensorflow/compiler/xla/service:dump", + "//tensorflow/compiler/xla/service:executable", + "//tensorflow/compiler/xla/service:maybe_owning_device_memory", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:stream_executor_no_cuda", + "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/core/tpu:tpu_configuration", + "//tensorflow/core/tpu:tpu_defs", + "//tensorflow/core/tpu:tpu_execute", + "//tensorflow/stream_executor:device_memory_allocator", + "//tensorflow/stream_executor/tpu:tpu_node_context", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/types:span", + ], + alwayslink = True, +) diff --git a/tensorflow/core/tpu/kernels/tpu_execute_op.cc b/tensorflow/core/tpu/kernels/tpu_execute_op.cc new file mode 100644 index 00000000000..8060aa95f17 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_execute_op.cc @@ -0,0 +1,809 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/tpu/kernels/tpu_execute_op.h" + +#include + +#include "absl/container/flat_hash_map.h" +#include "absl/memory/memory.h" +#include "absl/types/span.h" +#include "tensorflow/compiler/jit/xla_device.h" +#include "tensorflow/compiler/jit/xla_launch_util.h" +#include "tensorflow/compiler/jit/xla_tensor.h" +#include "tensorflow/compiler/tf2xla/shape_util.h" +#include "tensorflow/compiler/tf2xla/tf2xla_util.h" +#include "tensorflow/compiler/xla/debug_options_flags.h" +#include "tensorflow/compiler/xla/service/dump.h" +#include "tensorflow/compiler/xla/service/executable.h" +#include "tensorflow/compiler/xla/service/maybe_owning_device_memory.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/framework/resource_var.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/stream_executor_no_cuda.h" +#include "tensorflow/core/platform/tracing.h" +#include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h" +#include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_op_consts.h" +#include "tensorflow/core/tpu/tpu_configuration.h" +#include "tensorflow/core/tpu/tpu_defs.h" +#include "tensorflow/core/tpu/tpu_execute.h" +#include "tensorflow/core/util/stream_executor_util.h" +#include "tensorflow/stream_executor/device_memory_allocator.h" +#include "tensorflow/stream_executor/tpu/tpu_node_context.h" + +namespace tensorflow { + +namespace { + +using ::tensorflow::tpu::TpuNodeContext; +using CompilationCacheEntryRef = ::tensorflow::tpu::CompilationCacheEntryRef< + ::tensorflow::tpu::TpuCompilationCacheEntry>; +using TpuCompilationCacheLookup = + ::tensorflow::tpu::TpuCompilationCacheLookup; + +// Looks up the input `key` in the compilation cache, populating +// `*rendezvous_key_base` and `*entry`. +Status GetComputationCacheEntry( + OpKernelContext* context, string* rendezvous_key_base, + std::unique_ptr* entry) { + const Tensor* key; + TF_RETURN_IF_ERROR(context->input("key", &key)); + profiler::TraceMe trace_me("TpuExecuteOp::LookupProto", /*level=*/2); + if (!TensorShapeUtils::IsVector(key->shape()) || + key->shape().dim_size(0) != 2) { + return errors::InvalidArgument( + "Key argument to TPUExecute must be a 2-element vector"); + } + + ResourceMgr* rmgr = GetTPUConfigResourceMgr(); + TpuCompilationCacheLookup* proto_lookup; + TF_RETURN_IF_ERROR(rmgr->Lookup(rmgr->default_container(), + tpu::kCompiledProtoCacheResourceName, + &proto_lookup)); + core::ScopedUnref lookup_unref(proto_lookup); + TF_RETURN_IF_ERROR(proto_lookup->Lookup(key->vec()(0), entry)); + *rendezvous_key_base = key->vec()(1); + return Status::OK(); +} + +struct VariableUpdateMap { + // Maps input index to the updated output index. If the variable doesn't have + // an updated output, the corresponding output is set to -1. + absl::flat_hash_map input_to_output; + // Maps output index to (the input index, whether the update is generated from + // compilation). + absl::flat_hash_map> output_to_input; + // Part of the input indices that are from the compilation, in the compiled + // order. + std::vector input_in_compiled_update_order; +}; + +// Creates a VariableUpdateMap from both the compilation and the fused variable +// reads/updates. +xla::StatusOr BuildVariableUpdateMap( + absl::Span + compiled_variable_updates, + absl::Span fused_device_var_reads_in_computation_inputs, + const std::vector& fused_device_var_updates_in_computation_outputs, + int64 computation_output_count) { + VariableUpdateMap map; + auto add_pair = [&](int input, int output, bool from_compilation) -> Status { + TF_RET_CHECK(map.input_to_output.emplace(input, output).second) + << "Duplicate variable input index: " << input; + if (output >= 0) { + TF_RET_CHECK(map.output_to_input + .emplace(output, std::make_pair(input, from_compilation)) + .second) + << "Duplicate variable output index: " << output; + } + return Status::OK(); + }; + + // First add the updates produced by the compilation. Not all variables are + // updated, and if not, they do not have an output in the XLA computation. The + // update output indices in the XLA computation start after the non-variable + // outputs. + int num_updated_variables = 0; + for (int i = 0; i < compiled_variable_updates.size(); ++i) { + const bool updated = compiled_variable_updates[i]->updated(); + if (updated) ++num_updated_variables; + } + TF_RET_CHECK(num_updated_variables <= computation_output_count) + << num_updated_variables << " <= " << computation_output_count; + int64 compiled_variable_output_index = + computation_output_count - num_updated_variables; + for (auto update : compiled_variable_updates) { + map.input_in_compiled_update_order.push_back(update->index()); + if (!update->updated()) { + TF_RETURN_IF_ERROR(add_pair(update->index(), -1, true)); + continue; + } + TF_RETURN_IF_ERROR( + add_pair(update->index(), compiled_variable_output_index, true)); + ++compiled_variable_output_index; + } + + // Now add the updates from the attributes. + TF_RET_CHECK(fused_device_var_reads_in_computation_inputs.size() == + fused_device_var_updates_in_computation_outputs.size()); + for (int64 i = 0; i < fused_device_var_reads_in_computation_inputs.size(); + ++i) { + TF_RETURN_IF_ERROR( + add_pair(fused_device_var_reads_in_computation_inputs[i], + fused_device_var_updates_in_computation_outputs[i], false)); + } + return map; +} + +// Buffers representing the inputs to a computation. +struct InputBuffers { + explicit InputBuffers(xla::Shape device_shape) + : buffers(std::move(device_shape)) {} + + InputBuffers(const InputBuffers&) = delete; + InputBuffers& operator=(const InputBuffers&) = delete; + + ~InputBuffers() = default; + + xla::ShapedBuffer ToShapedBuffer(xla::Shape host_shape, + se::DeviceMemoryAllocator* allocator, + int device_ordinal) { + CHECK_NE(allocator, nullptr); + xla::ShapedBuffer shaped_buffer(std::move(host_shape), buffers.shape(), + allocator->platform(), device_ordinal); + shaped_buffer.set_buffers(buffers.Map( + [](xla::MaybeOwningDeviceMemory* buffer) { + CHECK(buffer); + return buffer->AsDeviceMemoryBase(); + })); + return shaped_buffer; + } + + // Describes the buffer tree. + xla::ShapeTree buffers; + + // Information about resource variables passed directly to TPUExecute. + std::vector variables; + + // Mapping from input index to offsets in 'variables'. < 0 if the input does + // not correspond to a variable in 'variables'. + std::vector variable_index; +}; + +// Builds an InputBuffers object that describes the inputs to the computation. +xla::StatusOr> BuildComputationInputs( + OpKernelContext* context, const xla::Shape& input_host_shape, + const VariableUpdateMap& variable_updates, TpuNodeContext* node_context, + se::Stream* stream) { + profiler::TraceMe trace_me("BuildComputationInputs", /*level=*/2); + OpInputList arg_list; + TF_RETURN_IF_ERROR(context->input_list("args", &arg_list)); + + if (arg_list.size() != xla::ShapeUtil::TupleElementCount(input_host_shape)) { + return errors::InvalidArgument( + "Number of parameters (", arg_list.size(), + ") does not match input shape: ", + xla::ShapeUtil::TupleElementCount(input_host_shape)); + } + + auto validate_shape = [&](int i, const Tensor& tensor) { + const xla::Shape& expected = + xla::ShapeUtil::GetTupleElementShape(input_host_shape, i); + VLOG(4) << "Input " << i << " TF shape " << tensor.shape().DebugString(); + XlaTensor* xla_tensor = XlaTensor::FromTensor(&tensor); + + if (xla_tensor == nullptr) { + // FromTensor failed; tensor must be empty. + if (!xla::ShapeUtil::IsZeroElementArray(expected)) { + return errors::InvalidArgument( + "Run-time shape mismatch for TPUExecute argument[", i, "] (", + context->op_kernel().requested_input(i), "). Expected ", + expected.DebugString(), "; got empty tensor"); + } + } else { + // Compare host shapes, easier than getting the expected device shape. + const xla::Shape& xla_shape = xla_tensor->shaped_buffer().on_host_shape(); + if (!xla::ShapeUtil::Compatible(expected, xla_shape)) { + return errors::InvalidArgument( + "Run-time shape mismatch for TPUExecute argument[", i, "] (", + context->op_kernel().requested_input(i), "). Expected ", + expected.DebugString(), "; got ", xla_shape.DebugString()); + } + } + + return Status::OK(); + }; + + // Iterate over the inputs, validating the shapes of non-variable inputs, + // and creating a VariableInfo object for each variable. We consider variable + // inputs in a separate phase because we must acquire variable locks in order. + std::vector variables; + std::vector variable_index(arg_list.size(), -1); + variables.reserve(arg_list.size()); + for (int i = 0; i < arg_list.size(); ++i) { + // Arguments are assumed to be variables if they have a resource type. + // (Non-variable resources are not supported.) + if (context->input_dtype(i) == DT_RESOURCE) { + variable_index[i] = variables.size(); + // TODO(phawkins): we may be looking up many variables here; it would be + // better if we did not repeatedly acquire the resource manager's lock. + const ResourceHandle& handle = HandleFromInput(context, i); + Var* variable; + TF_RETURN_IF_ERROR(LookupResource(context, handle, &variable)); + variables.push_back(VariableInfo(i, handle.name(), variable)); + } else { + TF_RETURN_IF_ERROR(validate_shape(i, arg_list[i])); + } + } + + // Lock the variables, and validate their shapes. We hold the variable locks + // for the duration of the TPU execution so we can donate the variable buffers + // to the computation. If we copied the variable's Tensor instead, its + // reference count would be greater than one due to the reference the Var + // object holds, and we would never be able to reuse variable buffers. + // TODO(phawkins): add a 'reuse_buffers' attribute to TPUExecute that allows + // the user to elect to copy the buffers and permit concurrent access instead. + TF_RETURN_IF_ERROR(LockVariables(absl::MakeSpan(variables))); + for (int i = 0; i < variables.size(); ++i) { + TF_RETURN_IF_ERROR( + validate_shape(variables[i].index(), *variables[i].var()->tensor())); + } + + se::DeviceMemoryAllocator* const allocator = node_context->memory_allocator(); + xla::TransferManager* const transfer_manager = + node_context->transfer_manager(); + const int device_ordinal = node_context->device_ordinal(); + + auto input_buffers = absl::make_unique( + transfer_manager->HostShapeToDeviceShape(input_host_shape)); + + // Allocates a buffer for the root tuple. + const int64 root_size = + transfer_manager->GetByteSizeRequirement(input_buffers->buffers.shape()); + TF_ASSIGN_OR_RETURN(*input_buffers->buffers.mutable_element({}), + allocator->Allocate(device_ordinal, root_size)); + + // Helper function that sets the input buffers for 'arg_index' to 'buffers'. + // If 'donate_buffers' is true, donates ownership of the buffers in 'buffers' + // to the computation and overwrites the entries in 'buffers' with nulls. + auto set_input_buffers_helper = [&](int arg_index, bool donate_buffers, + xla::ShapedBuffer* buffers) { + buffers->buffers().ForEachMutableElement([&](const xla::ShapeIndex& index, + se::DeviceMemoryBase* buffer) { + xla::ShapeIndex in_index = {arg_index}; + for (int64 j : index) { + in_index.push_back(j); + } + auto* in_buffer = input_buffers->buffers.mutable_element(in_index); + if (donate_buffers) { + *in_buffer = se::OwningDeviceMemory(*buffer, device_ordinal, allocator); + *buffer = se::DeviceMemoryBase(); + } else { + *in_buffer = *buffer; + } + }); + }; + + // Assigns the buffers of 'tensor' as computation input 'i'. Allocates fresh + // buffers for zero-element tensors where required. + auto assign_input = [&](int i, const Tensor& tensor, + bool may_reuse) -> xla::Status { + XlaTensor* xla_tensor = XlaTensor::FromTensor(&tensor); + + // Size 0 tensors have no backing XlaTensor, but may still need to have + // tuple buffers allocated. + if (xla_tensor == nullptr) { + CHECK_EQ(tensor.NumElements(), 0); + const xla::Shape& host_shape = + xla::ShapeUtil::GetSubshape(input_host_shape, {i}); + TF_ASSIGN_OR_RETURN(xla::ScopedShapedBuffer buffers, + transfer_manager->AllocateScopedShapedBuffer( + host_shape, allocator, device_ordinal)); + set_input_buffers_helper(/*arg_index=*/i, /*donate_buffers=*/true, + &buffers); + } else { + bool can_reuse_buffers = tensor.RefCountIsOne() && may_reuse; + set_input_buffers_helper(/*arg_index=*/i, + /*donate_buffers=*/can_reuse_buffers, + &xla_tensor->shaped_buffer()); + xla_tensor->WaitForDefinitionEventOnStream(stream); + } + return Status::OK(); + }; + + for (int i = 0; i < arg_list.size(); ++i) { + auto it = variable_updates.input_to_output.find(i); + if (it == variable_updates.input_to_output.end()) { + TF_RETURN_IF_ERROR(assign_input(i, arg_list[i], /*may_reuse=*/true)); + continue; + } + // input i is a variable + bool updated = it->second >= 0; + if (arg_list[i].dtype() != DT_RESOURCE) { + TF_RETURN_IF_ERROR(assign_input(i, arg_list[i], updated)); + } else { + int vi = variable_index[i]; + TF_RETURN_IF_ERROR( + assign_input(i, *variables[vi].var()->tensor(), updated)); + } + } + + input_buffers->variables = std::move(variables); + input_buffers->variable_index = std::move(variable_index); + + return std::move(input_buffers); +} + +struct OutputBuffers { + OutputBuffers(xla::ScopedShapedBuffer b, se::DeviceMemoryAllocator* allocator) + : owned_buffers(b.on_device_shape(), true), + buffers(b.release()), + memory_allocator(allocator) {} + + ~OutputBuffers() { + buffers.buffers().ForEachElement([&](const xla::ShapeIndex& index, + const se::DeviceMemoryBase& buffer) { + if (owned_buffers.element(index) && !buffer.is_null()) { + Status status = + memory_allocator->Deallocate(buffers.device_ordinal(), buffer); + if (!status.ok()) { + LOG(ERROR) << "Error deallocating buffer " << status; + } + } + }); + } + + // Which of the buffers do we own? + xla::ShapeTree owned_buffers; + + xla::ShapedBuffer buffers; + + se::DeviceMemoryAllocator* const memory_allocator; +}; + +// Allocates Tensors for the outputs of the computation. Ownership of most +// output buffers is passed to the output Tensors. Returns an OutputBuffer that +// owns the root buffer that should be passed to the XLA computation, as well as +// any output buffers that do not have corresponding output tensors. The latter +// may happen for zero-element tensors of type int64 or complex64 which still +// require a tuple buffer but do not have a corresponding XlaTensor. +xla::StatusOr> AllocateOutputTensors( + OpKernelContext* context, xla::ScopedShapedBuffer scoped_buffers, + absl::Span output_tensor_shape_protos, + const VariableUpdateMap& variable_updates, TpuNodeContext* node_context, + se::Stream* stream, int device_ordinal, InputBuffers* input_buffers, + const std::shared_ptr& definition_event) { + VLOG(4) << "Output buffers: " << scoped_buffers.ToString(); + + profiler::TraceMe trace_me("AllocateOutputTensors", /*level=*/2); + // Shapes of the outputs, in TensorShape form. + const int64 sub_elements = + xla::ShapeUtil::TupleElementCount(scoped_buffers.on_host_shape()); + if (sub_elements != output_tensor_shape_protos.size()) { + return errors::InvalidArgument( + "Mismatched numbers of output shapes: ", sub_elements, " vs. ", + output_tensor_shape_protos.size()); + } + + xla::TransferManager* const transfer_manager = + node_context->transfer_manager(); + + std::vector output_tensor_shapes; + output_tensor_shapes.reserve(sub_elements); + for (int64 i = 0; i < sub_elements; ++i) { + TF_RETURN_IF_ERROR( + TensorShape::IsValidShape(*output_tensor_shape_protos[i])); + TensorShape shape(*output_tensor_shape_protos[i]); + const xla::Shape& xla_shape = + xla::ShapeUtil::GetSubshape(scoped_buffers.on_host_shape(), {i}); + if (!xla_shape.IsArray() || + xla::ShapeUtil::ElementsIn(xla_shape) != shape.num_elements()) { + return errors::InvalidArgument( + "Mismatched number of elements in output shape: ", + xla::ShapeUtil::HumanString(xla_shape), " vs ", shape.DebugString()); + } + output_tensor_shapes.push_back(shape); + } + + // Builds a shaped buffer for the outputs. + TF_RET_CHECK(scoped_buffers.on_host_shape().IsTuple()); + TF_RET_CHECK(!xla::ShapeUtil::IsNestedTuple(scoped_buffers.on_host_shape())); + + se::DeviceMemoryAllocator* const allocator = node_context->memory_allocator(); + + auto output_buffers = + absl::make_unique(std::move(scoped_buffers), allocator); + + xla::Shape output_host_shape = output_buffers->buffers.on_host_shape(); + xla::Shape output_device_shape = output_buffers->buffers.on_device_shape(); + + if (!output_host_shape.is_static()) { + TF_RETURN_IF_ERROR(transfer_manager->ReadDynamicShapes( + stream, &output_buffers->buffers, &output_host_shape, + &output_device_shape)); + for (int64 i = 0; i < sub_elements; ++i) { + const xla::Shape& subshape = + xla::ShapeUtil::GetSubshape(output_host_shape, {i}); + TensorShape shape; + TF_RETURN_IF_ERROR(XLAShapeToTensorShape(subshape, &shape)); + output_tensor_shapes[i] = shape; + } + } + + // Transfers ownership of the buffers that back XLA computation output 'i' + // to 'output_tensor'. + auto transfer_buffers = [&](int i, Tensor* output_tensor) { + const xla::Shape& host_shape = + xla::ShapeUtil::GetTupleElementShape(output_host_shape, i); + const xla::Shape& device_shape = + xla::ShapeUtil::GetTupleElementShape(output_device_shape, i); + + // Transfers ownership of the output buffers to the output Tensor, if + // there the tensor is backed by an XlaTensor. Tensors of size 0 have no + // backing XlaTensor, so we let retain 'output_buffers' ownership of any + // buffers in that case. + if (output_tensor->NumElements() > 0) { + xla::ScopedShapedBuffer shaped_buffer(host_shape, device_shape, allocator, + device_ordinal); + shaped_buffer.buffers().ForEachMutableElement( + [&](const xla::ShapeIndex& index, se::DeviceMemoryBase* buffer) { + xla::ShapeIndex out_index = {i}; + for (int64 j : index) { + out_index.push_back(j); + } + *buffer = output_buffers->buffers.buffers().element(out_index); + *output_buffers->owned_buffers.mutable_element(out_index) = false; + }); + + XlaTensor* xla_tensor = XlaTensor::FromTensor(output_tensor); + xla_tensor->set_shaped_buffer(std::move(shaped_buffer)); + xla_tensor->ResetDefinitionEvent(definition_event, stream); + } + }; + + const int num_updated_variables = variable_updates.output_to_input.size(); + TF_RET_CHECK(num_updated_variables <= output_tensor_shapes.size()) + << num_updated_variables << " <= " << output_tensor_shapes.size(); + + OpInputList arg_list; + TF_RETURN_IF_ERROR(context->input_list("args", &arg_list)); + + // The TPU program outputs the updated variables including DT_RESOURCE and + // non-DT_RESOURCE. The TPUExecuteOp needs to output all non-DT_RESOURCE + // variables (updated or not). + // + // updated not_updated + // |------------------|------------------| + // DT_RESOURCE | allocate persist | do nothing | + // |------------------|------------------| + // | allocate | forward Op input | + // not DT_RESOURCE | output | to Op output | Op output + // |------------------|------------------| + // program output + + // Allocates a fresh tensor for each updated variable. While the variable + // inputs need come in no particular order, the variable values are + // always added last by XlaCompiler class, in the same order as the + // corresponding input variables. + int op_output_index = 0; + int compiled_update_index = 0; + auto process_non_updated_variable = [&](int input_index) { + const int variable_index = input_buffers->variable_index.at(input_index); + // If a DT_RESOURCE input is not updated, nothing needs to be done + // because there is no corresponding output. If a non-resource input + // is not updated, forward the input to the output. + if (variable_index < 0) { + context->set_output(op_output_index, arg_list[input_index]); + ++op_output_index; + } + }; + for (int i = 0; i < output_tensor_shapes.size(); ++i) { + auto it = variable_updates.output_to_input.find(i); + if (it == variable_updates.output_to_input.end()) { + // Not a variable update. + // Allocates a fresh tensor for each output of the operator. We always + // allocate a new host-side tensor, but the on-device buffers that back + // that tensor may be aliases of input buffers. + Tensor* output_tensor; + TF_RETURN_IF_ERROR(context->allocate_output( + op_output_index, output_tensor_shapes[i], &output_tensor)); + transfer_buffers(i, output_tensor); + ++op_output_index; + continue; + } + const int input_index = it->second.first; + // We must process the compiled updates in order, which includes the + // non-updated variables, i.e., those without an XLA output. + const bool from_compilation = it->second.second; + while (from_compilation && + variable_updates + .input_in_compiled_update_order[compiled_update_index] != + input_index) { + process_non_updated_variable( + variable_updates + .input_in_compiled_update_order[compiled_update_index]); + ++compiled_update_index; + } + ++compiled_update_index; + const int variable_index = input_buffers->variable_index.at(input_index); + PersistentTensor unused; + Tensor* output_tensor; + if (variable_index >= 0) { + // This output corresponds to a DT_RESOURCE input to the TPUExecute + // operator. Update the corresponding variable. + VariableInfo& var = input_buffers->variables[variable_index]; + // TODO(b/35625933): the correct thing to do would be to transfer + // ownership of the PersistentTensor into the Var object. However, Var + // contains a Tensor so we can't. + TF_RETURN_IF_ERROR(context->allocate_persistent( + var.var()->tensor()->dtype(), output_tensor_shapes[i], &unused, + &output_tensor)); + *var.var()->tensor() = *output_tensor; + } else { + // This output corresponds to a non-resource input to the TPUExecute + // operator. This case occurs for the distributed TPU rewrite which + // adds variable values as inputs and outputs rather than passing the + // variables themselves; reading and writing the variable is handled + // outside the op. + // TODO(phawkins): remove this case when placement of variables on TPU + // devices is well supported and we no longer need to place "remote" + // variables on CPU devices. + TF_RETURN_IF_ERROR(context->allocate_output( + op_output_index, output_tensor_shapes[i], &output_tensor)); + ++op_output_index; + } + transfer_buffers(i, output_tensor); + } + + // Process any remaining non-updated variables. + for (; compiled_update_index < + variable_updates.input_in_compiled_update_order.size(); + ++compiled_update_index) { + process_non_updated_variable( + variable_updates.input_in_compiled_update_order[compiled_update_index]); + } + return std::move(output_buffers); +} + +} // namespace + +// TPUExecuteOp + +TPUExecuteOp::TPUExecuteOp(OpKernelConstruction* context) + : AsyncOpKernel(context, /* is_deferred = */ true) {} + +AsyncOpKernel* TPUExecuteOp::AsAsync() { + // If TPU launches are asynchronous, we can perform the launch without + // blocking the calling thread, and so the executor may treat this kernel as + // a regular (synchronous) OpKernel. + return nullptr; +} + +void TPUExecuteOp::Compute(OpKernelContext* context) { + Status s = DoWork(context); + // NOTE: We can't use `OP_REQUIRES_OK()` here because that macro includes + // a dynamic check that we are not in an AsyncOpKernel. + if (TF_PREDICT_FALSE(!s.ok())) { + context->SetStatus(s); + } +} + +void TPUExecuteOp::ComputeAsync(OpKernelContext* context, DoneCallback done) { + // If TPU launches are asynchronous, then perform the launch on this + // thread to avoid a thread hop, which has an observable latency cost. + OP_REQUIRES_OK_ASYNC(context, DoWork(context), done); + done(); +} + +Status TPUExecuteOp::DoWork(OpKernelContext* context) { + VLOG(1) << "Cloud TPU: TPUExecuteOp::Compute"; + + const XlaDevice::Metadata* metadata; + TF_RETURN_IF_ERROR(XlaDevice::GetMetadata(context, &metadata)); + const int device_ordinal = metadata->device_ordinal(); + + // We are guaranteed that the object underlying TpuNodeContext won't be + // deleted out from under us, while node_context is alive. + TF_ASSIGN_OR_RETURN(std::unique_ptr node_context, + TpuNodeContext::Create(device_ordinal)); + + profiler::TraceMe trace_me( + [&, device_ordinal] { + return absl::StrCat("TpuExecuteOp#device_ordinal=", device_ordinal, + ",id=", context->step_id(), + ",iter_num=", context->frame_iter().iter_id, "#"); + }, + /*level=*/2); + profiler::TraceMe trace_me_init("TPUExecuteOp::Init", /*level=*/2); + + string rendezvous_key_base; + std::unique_ptr entry; + TF_RETURN_IF_ERROR( + GetComputationCacheEntry(context, &rendezvous_key_base, &entry)); + + // Shapes of the inputs and outputs, in xla::Shape form. + const TPUExecutableInfoProto* proto = entry->get().get_executable_info(); + + xla::TransferManager* const transfer_manager = + node_context->transfer_manager(); + CHECK(context->op_device_context()); + se::Stream* stream = context->op_device_context()->stream(); + + TF_RET_CHECK(proto->input_shapes_size() == 1); + + xla::Shape host_shape(proto->input_shapes(0)); + + TF_ASSIGN_OR_RETURN( + auto variable_update_map, + BuildVariableUpdateMap(proto->variable_indices(), + fused_device_var_reads_in_computation_inputs_, + fused_device_var_updates_in_computation_outputs_, + proto->output_tensor_shapes().size())); + TF_ASSIGN_OR_RETURN( + std::unique_ptr input_buffers, + BuildComputationInputs(context, host_shape, variable_update_map, + node_context.get(), stream)); + + // Ideally this should be the host-to-device stream from XlaDeviceContext. + // The particular anti-dependency this is avoiding (why we need a separate + // transfer stream) is between the executable writing tuple tables and + // TPUExecute()'s deregister_stream; if they come from the same stream pool + // antidependencies will occur. XlaBackend has a different pool of streams + // to the stream->GetOrCreateSubStream() that TPUExecute() uses, so these + // will never refer to the same stream. + // + // TODO(jmolloy): Add the necessary plumbing to obtain the proper + // host-to-device stream here. + TF_ASSIGN_OR_RETURN(auto transfer_stream_ptr, + node_context->BorrowStream(device_ordinal)); + + se::DeviceMemoryAllocator* const allocator = node_context->memory_allocator(); + auto shaped_buffer = + input_buffers->ToShapedBuffer(host_shape, allocator, device_ordinal); + if (transfer_manager->CanShapedBufferBeAccessedNow(stream->parent(), + shaped_buffer)) { + TF_RETURN_IF_ERROR(transfer_manager->WriteRootTupleIndexTable( + transfer_stream_ptr.get(), shaped_buffer)); + stream->ThenWaitFor(transfer_stream_ptr.get()); + } else { + TF_RETURN_IF_ERROR( + transfer_manager->WriteRootTupleIndexTable(stream, shaped_buffer)); + } + VLOG(4) << "Input buffers: " << shaped_buffer.ToString(); + + // Snapshot the inputs, if a snapshot was requested. + std::shared_ptr hlo_snapshot; + if (proto->has_session_module()) { + hlo_snapshot = std::make_shared(proto->session_module()); + auto literal = + std::make_shared(shaped_buffer.on_host_shape()); + transfer_manager->TransferLiteralFromDevice( + stream, shaped_buffer, literal.get(), + [hlo_snapshot, literal](Status status) { + if (!status.ok()) { + LOG(ERROR) << "TransferLiteralFromDevice for HLO snapshot inputs " + "failed: " + << status; + return; + } + *hlo_snapshot->add_arguments() = literal->ToProto(); + }); + } + + auto definition_event = std::make_shared(stream->parent()); + TF_RET_CHECK(definition_event->Init()) + << "TPU definition event initialization failed"; + + trace_me_init.Stop(); + + const uint32 rng_seed = GetXLARandomSeed(); + + std::unique_ptr device_assignment; + if (proto->has_device_assignment()) { + TF_ASSIGN_OR_RETURN(device_assignment, xla::DeviceAssignment::Deserialize( + proto->device_assignment())); + } + + VLOG(4) << "Input buffers after alias resolution: " + << shaped_buffer.ToString(); + + std::vector input; + input.emplace_back( + xla::ExecutionInput(std::move(input_buffers->buffers), host_shape)); + + // The buffers to be freed are in the `output` and will be automatically + // freed when it goes out of the scope. In async mode, this means the buffers + // will be freed before anyone calls "BlockHostUntilDone", which indicates + // that some of the (input) buffers will be freed while the program is running + // and looks scary. However, this turns out to be not a problem since although + // we free a memory and reassign it to other users while a program is running, + // all subsequent writes to the program that could possibly clobber the memory + // will depend on the program to finish. + const TPUHostTransferInfoProto* host_transfer_info = + entry->get().get_host_transfer_info(); + const xla::HloProto* hlo_metadata = entry->get().get_hlo_metadata(); + TF_ASSIGN_OR_RETURN( + xla::ExecutionOutput output, + TPUExecute(*proto, *host_transfer_info, *hlo_metadata, std::move(input), + rendezvous_key_base, rng_seed, node_context.get(), + device_assignment.get(), context->cancellation_manager(), + context, stream, transfer_stream_ptr.get(), + entry->get().get_tpu_program())); + stream->ThenRecordEvent(definition_event.get()); + + TF_ASSIGN_OR_RETURN( + std::unique_ptr output_buffers, + AllocateOutputTensors(context, output.ConsumeResult(), + proto->output_tensor_shapes(), variable_update_map, + node_context.get(), stream, device_ordinal, + input_buffers.get(), definition_event)); + + // Transfer the outputs and save the snapshot to disk. + if (hlo_snapshot) { + auto literal = + std::make_shared(output_buffers->buffers.on_host_shape()); + transfer_manager->TransferLiteralFromDevice( + stream, output_buffers->buffers, literal.get(), + [hlo_snapshot, literal](Status status) { + if (status.ok()) { + *hlo_snapshot->mutable_result() = literal->ToProto(); + } else { + LOG(ERROR) << "TransferLiteralFromDevice for HLO snapshot " + "outputs failed: " + << status; + } + DumpHloSnapshotIfEnabled(*hlo_snapshot, + xla::GetDebugOptionsFromFlags()); + }); + } + return Status::OK(); +} + +TPUExecuteOp::~TPUExecuteOp() = default; + +TPUExecuteAndUpdateVariablesOp::TPUExecuteAndUpdateVariablesOp( + OpKernelConstruction* context) + : TPUExecuteOp(context) { + OP_REQUIRES_OK(context, context->GetAttr( + "device_var_reads_indices", + &fused_device_var_reads_in_computation_inputs_)); + OP_REQUIRES_OK( + context, + context->GetAttr("device_var_updates_indices", + &fused_device_var_updates_in_computation_outputs_)); +} + +REGISTER_KERNEL_BUILDER( + Name("TPUExecute").Device(DEVICE_TPU_NODE).HostMemory("key"), TPUExecuteOp); + +REGISTER_KERNEL_BUILDER(Name("TPUExecuteAndUpdateVariables") + .Device(DEVICE_TPU_NODE) + .HostMemory("key"), + TPUExecuteAndUpdateVariablesOp); + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_execute_op.h b/tensorflow/core/tpu/kernels/tpu_execute_op.h new file mode 100644 index 00000000000..c66118ad45e --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_execute_op.h @@ -0,0 +1,67 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_EXECUTE_OP_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TPU_EXECUTE_OP_H_ + +#include +#include + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/mutex.h" + +namespace tensorflow { + +// Op that executes a precompiled TPU computation. +class TPUExecuteOp : public AsyncOpKernel { + public: + explicit TPUExecuteOp(OpKernelConstruction* context); + ~TPUExecuteOp() override; + + AsyncOpKernel* AsAsync() override; + + void Compute(OpKernelContext* context) override; + void ComputeAsync(OpKernelContext* context, DoneCallback done) override; + + protected: + // Used by TPUExecuteAndUpdateVariablesOp to set the fused variable reads and + // updates indices in the XLA computation. The two vectors must have the same + // size, and a pair of read index and write index represents a variable's + // input to the program and its updated value from the program. If the + // variable is not updated, use -1 as the output index. + std::vector fused_device_var_reads_in_computation_inputs_; + std::vector fused_device_var_updates_in_computation_outputs_; + + private: + Status DoWork(OpKernelContext* context); + + TF_DISALLOW_COPY_AND_ASSIGN(TPUExecuteOp); +}; + +// A variant of TPUExecuteOp that contains fused device variable reads and +// updates. +class TPUExecuteAndUpdateVariablesOp : public TPUExecuteOp { + public: + explicit TPUExecuteAndUpdateVariablesOp(OpKernelConstruction* context); + ~TPUExecuteAndUpdateVariablesOp() override = default; + + private: + TF_DISALLOW_COPY_AND_ASSIGN(TPUExecuteAndUpdateVariablesOp); +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_EXECUTE_OP_H_ From 0721b70578357055569778cb2ed9d7870d2ddceb Mon Sep 17 00:00:00 2001 From: Karim Nosir Date: Fri, 17 Jul 2020 17:54:28 -0700 Subject: [PATCH 0765/2522] - Add names for patterns in legalize pass in TFL. - Move pattern for squareddiff->relu to optimize pass from legalize, and add unit-test. - Remove redundant fusing pattern for add with activation, Should be handled already in optimize pass. PiperOrigin-RevId: 321889085 Change-Id: I8886667e2167dc476b651a0c125203f4eca8512e --- .../compiler/mlir/lite/tests/optimize.mlir | 10 + .../mlir/lite/transforms/legalize_patterns.td | 321 ++++++++++-------- .../mlir/lite/transforms/optimize_patterns.td | 9 +- 3 files changed, 204 insertions(+), 136 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/tests/optimize.mlir b/tensorflow/compiler/mlir/lite/tests/optimize.mlir index 7861eb1ec6b..67d1b314fd2 100644 --- a/tensorflow/compiler/mlir/lite/tests/optimize.mlir +++ b/tensorflow/compiler/mlir/lite/tests/optimize.mlir @@ -992,3 +992,13 @@ func @RemoveCast(%arg0: tensor<2x2xf32>) -> tensor<2x2xf32> { // CHECK: return %arg0 } +func @squaredDifferenceReluRemoveRelu(%arg0: tensor<1xf32>, %arg1: tensor<1xf32>) -> tensor<1xf32> { + %0 = "tfl.squared_difference"(%arg0, %arg1) : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32> + %1 = "tfl.relu"(%0) : (tensor<1xf32>) -> tensor<1xf32> + return %1: tensor<1xf32> + +// CHECK-LABEL: squaredDifferenceReluRemoveRelu +// CHECK: %[[RESULT:.*]] = tfl.squared_difference %arg0, %arg1 : tensor<1xf32> +// CHECK: return %[[RESULT]] +} + diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td index f5b45df3eee..47cfaecd3fb 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td @@ -66,8 +66,10 @@ def LegalizeTFConstToTFLConst: Pat<(TF_ConstOp ElementsAttr:$value), (TFL_ConstOp $value)>; // Convert to std constant for statically shaped, non-opaque constants. -def : Pat<(TF_ConstOp:$res NonOpaqueElementsAttr:$value), (ConstantOp $value), - [(AnyStaticShapeTensor $res)], (addBenefit 10)>; +def ConvertTfConstToStdConst : Pat< + (TF_ConstOp:$res NonOpaqueElementsAttr:$value), + (ConstantOp $value), + [(AnyStaticShapeTensor $res)], (addBenefit 10)>; //===----------------------------------------------------------------------===// // Unary ops patterns. @@ -162,186 +164,234 @@ def LegalizeMaximum : Pat<(TF_MaximumOp $arg1, $arg2), def LegalizeMinimum : Pat<(TF_MinimumOp $arg1, $arg2), (TFL_MinimumOp $arg1, $arg2)>; -def : Pat<(TF_NegOp $arg), (TFL_NegOp $arg)>; -def : Pat<(TF_OneHotOp $indices, $depth, $on_value, $off_value, $axis), - (TFL_OneHotOp $indices, $depth, $on_value, $off_value, - (convertIntAttrTo32Bit $axis))>; -def : Pat<(TF_PowOp $x, $y), (TFL_PowOp $x, $y)>; -def : Pat<(TF_RangeOp $start, $limit, $delta), (TFL_RangeOp $start, $limit, $delta)>; -def : Pat<(TF_Relu6Op $arg), (TFL_Relu6Op $arg)>; -def : Pat<(TF_ReluOp $arg), (TFL_ReluOp $arg)>; -def : Pat<(TF_ReverseSequenceOp $input, $seq_lengths, $seq_dim, $batch_dim), - (TFL_ReverseSequenceOp $input, $seq_lengths, - (convertIntAttrTo32Bit $seq_dim), - (convertIntAttrTo32Bit $batch_dim))>; -def : Pat<(TF_RoundOp $arg), (TFL_RoundOp $arg)>; -def : Pat<(TF_RsqrtOp $arg), (TFL_RsqrtOp $arg)>; -def : Pat<(TF_SqrtOp $arg), (TFL_SqrtOp $arg)>; -def : Pat<(TF_SquareOp $arg), (TFL_SquareOp $arg)>; -def : Pat<(TF_SegmentSumOp $data, I32Tensor:$segment_ids), (TFL_SegmentSumOp $data, $segment_ids)>; -def : Pat<(TF_SelectOp $cond, $x, $y), (TFL_SelectOp $cond, $x, $y)>; -def : Pat<(TF_SelectV2Op:$src_op $cond, $x, $y), (TFL_SelectOp $cond, $x, $y), [(HasSameStaticShapes $src_op)]>; -def : Pat<(TF_SelectV2Op:$src_op $cond, $x, $y), (TFL_SelectV2Op $cond, $x, $y), [(HasNotSameStaticShapes $src_op)]>; -def : Pat<(TF_ShapeOp $arg), (TFL_ShapeOp $arg)>; -def : Pat<(TF_SigmoidOp $arg), (TFL_LogisticOp $arg)>; -def : Pat<(TF_SinOp F32Tensor:$arg), (TFL_SinOp $arg)>; -def : Pat<(TF_SliceOp $input, $begin, $size), (TFL_SliceOp $input, $begin, $size)>; -def : Pat<(TF_SoftmaxOp $arg), (TFL_SoftmaxOp $arg, ConstF32Attr<"1.0">)>; -def : Pat<(TF_SoftplusOp F32Tensor:$arg0), (TFL_LogOp (TFL_AddOp (TFL_ExpOp $arg0), (ConstantOp ConstantAttr, "1.0f">), TFL_AF_None))>; -def : Pat<(TF_SqueezeOp $arg, $squeeze_dims), (TFL_SqueezeOp $arg, $squeeze_dims)>; -def : Pat<(TF_TanhOp $arg), (TFL_TanhOp $arg)>; -def : Pat<(TF_TransposeOp $arg, $perm), (TFL_TransposeOp $arg, $perm)>; -def : Pat<(TF_WhereOp $arg), (TFL_WhereOp $arg)>; -def : Pat<(TF_ZerosLikeOp $arg), (TFL_ZerosLikeOp $arg)>; +def LegalizeNeg : Pat<(TF_NegOp $arg), (TFL_NegOp $arg)>; +def LegalizeOneHot : Pat< + (TF_OneHotOp $indices, $depth, $on_value, $off_value, $axis), + (TFL_OneHotOp $indices, $depth, $on_value, $off_value, + (convertIntAttrTo32Bit $axis))>; +def LegalizePow : Pat<(TF_PowOp $x, $y), (TFL_PowOp $x, $y)>; +def LegalizeRange : Pat<(TF_RangeOp $start, $limit, $delta), + (TFL_RangeOp $start, $limit, $delta)>; +def LegalizeRelu6 : Pat<(TF_Relu6Op $arg), (TFL_Relu6Op $arg)>; +def LegalizeRelu : Pat<(TF_ReluOp $arg), (TFL_ReluOp $arg)>; +def LegalizeReverseSequence : Pat< + (TF_ReverseSequenceOp $input, $seq_lengths, $seq_dim, $batch_dim), + (TFL_ReverseSequenceOp $input, $seq_lengths, + (convertIntAttrTo32Bit $seq_dim), (convertIntAttrTo32Bit $batch_dim))>; +def LegalizeRound : Pat<(TF_RoundOp $arg), (TFL_RoundOp $arg)>; +def LegalizeRsqrt : Pat<(TF_RsqrtOp $arg), (TFL_RsqrtOp $arg)>; +def LegalizeSqrt : Pat<(TF_SqrtOp $arg), (TFL_SqrtOp $arg)>; +def LegalizeSquare : Pat<(TF_SquareOp $arg), (TFL_SquareOp $arg)>; +def LegalizeSegmentSum : Pat<(TF_SegmentSumOp $data, I32Tensor:$segment_ids), + (TFL_SegmentSumOp $data, $segment_ids)>; +def LegalizeSelect : Pat<(TF_SelectOp $cond, $x, $y), + (TFL_SelectOp $cond, $x, $y)>; +def LegalizeSelectV2SameStaticShape : Pat<(TF_SelectV2Op:$src_op $cond, $x, $y), + (TFL_SelectOp $cond, $x, $y), + [(HasSameStaticShapes $src_op)]>; +def LegalizeSelectV2NotSameStaticShape : Pat< + (TF_SelectV2Op:$src_op $cond, $x, $y), + (TFL_SelectV2Op $cond, $x, $y), + [(HasNotSameStaticShapes $src_op)]>; +def LegalizeShape : Pat<(TF_ShapeOp $arg), (TFL_ShapeOp $arg)>; +def LegalizeSigmoid : Pat<(TF_SigmoidOp $arg), (TFL_LogisticOp $arg)>; +def LegalizeSin : Pat<(TF_SinOp F32Tensor:$arg), (TFL_SinOp $arg)>; +def LegalizeSlice : Pat<(TF_SliceOp $input, $begin, $size), + (TFL_SliceOp $input, $begin, $size)>; +def LegalizeSoftmax : Pat<(TF_SoftmaxOp $arg), + (TFL_SoftmaxOp $arg, ConstF32Attr<"1.0">)>; +def LegalizeSoftPlus : Pat<(TF_SoftplusOp F32Tensor:$arg0), + (TFL_LogOp (TFL_AddOp (TFL_ExpOp $arg0), + (ConstantOp ConstantAttr, "1.0f">), + TFL_AF_None))>; +def LegalizeSqueeze : Pat<(TF_SqueezeOp $arg, $squeeze_dims), + (TFL_SqueezeOp $arg, $squeeze_dims)>; +def LegalizeTanh : Pat<(TF_TanhOp $arg), (TFL_TanhOp $arg)>; +def LegalizeTranspose : Pat<(TF_TransposeOp $arg, $perm), + (TFL_TransposeOp $arg, $perm)>; +def LegalizeWhere : Pat<(TF_WhereOp $arg), (TFL_WhereOp $arg)>; +def LegalizeZerosLike : Pat<(TF_ZerosLikeOp $arg), (TFL_ZerosLikeOp $arg)>; //===----------------------------------------------------------------------===// // Binary ops patterns. //===----------------------------------------------------------------------===// -def : Pat<(TF_LessOp $l, $r), (TFL_LessOp $l, $r)>; -def : Pat<(TF_GreaterOp $l, $r), (TFL_GreaterOp $l, $r)>; +def LegalizeLess : Pat<(TF_LessOp $l, $r), (TFL_LessOp $l, $r)>; +def LegalizeGreater : Pat<(TF_GreaterOp $l, $r), (TFL_GreaterOp $l, $r)>; -def : Pat<(TF_LessEqualOp $l, $r), (TFL_LessEqualOp $l, $r)>; -def : Pat<(TF_GreaterEqualOp $l, $r), (TFL_GreaterEqualOp $l, $r)>; +def LegalizeLessEqual : Pat<(TF_LessEqualOp $l, $r), (TFL_LessEqualOp $l, $r)>; +def LegalizeGreaterEqual : Pat<(TF_GreaterEqualOp $l, $r), + (TFL_GreaterEqualOp $l, $r)>; // Gather in TF -> Gather in TFL with axis=0 // The 'validate_indices' attribute is deprecated. -def : Pat<(TF_GatherOp $params, $indices, $ignored_validate_indices), - (TFL_GatherOp $params, $indices, ConstantAttr)>; +def LegalizeGather: Pat< + (TF_GatherOp $params, $indices, $ignored_validate_indices), + (TFL_GatherOp $params, $indices, ConstantAttr)>; -def : Pat<(TF_GatherNdOp $params, $indices), - (TFL_GatherNdOp $params, $indices)>; +def LegalizeGatherNd : Pat<(TF_GatherNdOp $params, $indices), + (TFL_GatherNdOp $params, $indices)>; -def : Pat<(TF_GatherV2Op $params, $indices, - (ConstantOp ElementsAttr:$axis), - ConstantAttr:$batch_dims), - (TFL_GatherOp $params, $indices, - ExtractSingleElementAsInt32:$axis)>; +def LegalizeGatherV2 : Pat< + (TF_GatherV2Op $params, $indices, (ConstantOp ElementsAttr:$axis), + ConstantAttr:$batch_dims), + (TFL_GatherOp $params, $indices, ExtractSingleElementAsInt32:$axis)>; -def : Pat<(TF_FloorDivOp $l, $r), (TFL_FloorDivOp $l, $r)>; +def LegalizeFloorDiv : Pat<(TF_FloorDivOp $l, $r), (TFL_FloorDivOp $l, $r)>; -def : Pat<(TF_NotEqualOp $l, $r, /*incompatible_shape_error=*/ConstBoolAttrTrue), - (TFL_NotEqualOp $l, $r)>; +def LegalizeNotEqual : Pat< + (TF_NotEqualOp $l, $r, /*incompatible_shape_error=*/ConstBoolAttrTrue), + (TFL_NotEqualOp $l, $r)>; -def : Pat<(TF_LogicalAndOp $l, $r), (TFL_LogicalAndOp $l, $r)>; +def LegalizeLogicalAnd : Pat<(TF_LogicalAndOp $l, $r), + (TFL_LogicalAndOp $l, $r)>; -def : Pat<(TF_LogicalOrOp $l, $r), (TFL_LogicalOrOp $l, $r)>; +def LegalizeLogicalOr : Pat<(TF_LogicalOrOp $l, $r), (TFL_LogicalOrOp $l, $r)>; + +def LegalizeAdd : Pat<(TF_AddOp $lhs, $rhs), + (TFL_AddOp $lhs, $rhs, TFL_AF_None)>; +def LegalizeAddv2 : Pat<(TF_AddV2Op $lhs, $rhs), + (TFL_AddOp $lhs, $rhs, TFL_AF_None)>; +def LegalizeBiasAdd : Pat< + (TF_BiasAddOp F32Tensor:$l, F32Tensor:$r, IsDataFormatNHWC:$data_format), + (TFL_AddOp $l, $r, TFL_AF_None)>; +def LegalizeSub : Pat<(TF_SubOp $lhs, $rhs), + (TFL_SubOp $lhs, $rhs, TFL_AF_None)>; +def LegalizeMul : Pat<(TF_MulOp $lhs, $rhs), + (TFL_MulOp $lhs, $rhs, TFL_AF_None)>; +def LegalizeRealDiv : Pat<(TF_RealDivOp $lhs, $rhs), + (TFL_DivOp $lhs, $rhs, TFL_AF_None)>; +def LegalizeDiv : Pat<(TF_DivOp $lhs, $rhs), + (TFL_DivOp $lhs, $rhs, TFL_AF_None)>; -def : Pat<(TF_AddOp $lhs, $rhs), (TFL_AddOp $lhs, $rhs, TFL_AF_None)>; -def : Pat<(TF_AddV2Op $lhs, $rhs), (TFL_AddOp $lhs, $rhs, TFL_AF_None)>; // When batch size is known, TF BatchMatMul gets unfolded to TFL FullyConnected // with additional ops. In the case of unknown batch size, the match will // fall through to here and convert to TF Lite BatchMatMul. -def : Pat<(TF_BatchMatMulV2Op $lhs, $rhs, $adj_x, $adj_y), (TFL_BatchMatMulOp $lhs, $rhs, $adj_x, $adj_y)>; -def : Pat<(TF_BatchMatMulOp $lhs, $rhs, $adj_x, $adj_y), (TFL_BatchMatMulOp $lhs, $rhs, $adj_x, $adj_y)>; -def : Pat<(TF_SubOp $lhs, $rhs), (TFL_SubOp $lhs, $rhs, TFL_AF_None)>; -def : Pat<(TF_MulOp $lhs, $rhs), (TFL_MulOp $lhs, $rhs, TFL_AF_None)>; -def : Pat<(TF_RealDivOp $lhs, $rhs), (TFL_DivOp $lhs, $rhs, TFL_AF_None)>; -def : Pat<(TF_DivOp $lhs, $rhs), (TFL_DivOp $lhs, $rhs, TFL_AF_None)>; +def LegalizeBatchMatMulV2UnknownBatch : Pat< + (TF_BatchMatMulV2Op $lhs, $rhs, $adj_x, $adj_y), + (TFL_BatchMatMulOp $lhs, $rhs, $adj_x, $adj_y)>; +def LegalizeBatchMatMulUnknownBatch : Pat< + (TF_BatchMatMulOp $lhs, $rhs, $adj_x, $adj_y), + (TFL_BatchMatMulOp $lhs, $rhs, $adj_x, $adj_y)>; -def : Pat<(TF_BiasAddOp F32Tensor:$l, F32Tensor:$r, - IsDataFormatNHWC:$data_format), - (TFL_AddOp $l, $r, TFL_AF_None)>; -// TODO(jpienaar): These should be handled by the pattern rewriter, find out -// why it isn't. -def : Pat<(TF_Relu6Op (TF_BiasAddOp F32Tensor:$l, F32Tensor:$r, - IsDataFormatNHWC:$data_format)), - (TFL_AddOp $l, $r, TFL_AF_Relu6)>; - -def : Pat<(TF_FakeQuantWithMinMaxVarsOp $inputs, - (ConstantOp F32ElementsAttr:$min), - (ConstantOp F32ElementsAttr:$max), - $num_bits, $narrow_range), - (TFL_DequantizeOp - (TFL_QuantizeOp $inputs, - (ConvertToQuantTypeFromAttrs $inputs, $min, $max, - $num_bits, $narrow_range)))>; +def LegalizeFakeQuantWithMinMaxVars: Pat< + (TF_FakeQuantWithMinMaxVarsOp $inputs, (ConstantOp F32ElementsAttr:$min), + (ConstantOp F32ElementsAttr:$max), $num_bits, $narrow_range), + (TFL_DequantizeOp + (TFL_QuantizeOp $inputs, (ConvertToQuantTypeFromAttrs $inputs, $min, $max, + $num_bits, $narrow_range)))>; // TODO(rocky): Not all of the attributes are handled correctly. Make this // more general if there is a need. -def : Pat<(TF_QuantizeAndDequantizeV2Op $inputs, - (ConstantOp F32ElementsAttr:$min), - (ConstantOp F32ElementsAttr:$max), - $signed_input, $num_bits, $range_given, $round_mode, - $narrow_range, $axis), - (TFL_DequantizeOp - (TFL_QuantizeOp $inputs, - (ConvertToQuantTypeFromAttrs $inputs, $min, $max, - $num_bits, $narrow_range)))>; +def LegalizeQuantizeAndDequantizeV2 : Pat< + (TF_QuantizeAndDequantizeV2Op $inputs, (ConstantOp F32ElementsAttr:$min), + (ConstantOp F32ElementsAttr:$max), + $signed_input, $num_bits, $range_given, $round_mode, $narrow_range, $axis), + (TFL_DequantizeOp + (TFL_QuantizeOp $inputs, (ConvertToQuantTypeFromAttrs $inputs, $min, $max, + $num_bits, $narrow_range)))>; -def : Pat<(TF_RankOp $input), (TFL_RankOp $input)>; +def LegalizeRank : Pat<(TF_RankOp $input), (TFL_RankOp $input)>; -def : Pat<(TF_SquaredDifferenceOp $l, $r), (TFL_SquaredDifferenceOp $l, $r)>; +def LegalizeSquaredDifference : Pat<(TF_SquaredDifferenceOp $l, $r), + (TFL_SquaredDifferenceOp $l, $r)>; -// Note(ycling): We can eliminate Relu from Relu(SquaredDifference(x, y)), -// since the result of SquaredDifference is always non-negative. -// TFLite interpreter doesn't support Relu+int32 for now. So the test cases -// are failing without the following pattern to optimize Relu away fixes -// the problem. -def : Pat<(TF_ReluOp (TF_SquaredDifferenceOp $l, $r)), - (TFL_SquaredDifferenceOp $l, $r)>; +def LegalizeReverseV2 : Pat<(TF_ReverseV2Op $arg0, $arg1), + (TFL_ReverseV2Op $arg0, $arg1)>; -def : Pat<(TF_ReverseV2Op $arg0, $arg1), (TFL_ReverseV2Op $arg0, $arg1)>; +def LegalizeEqual : Pat<(TF_EqualOp $arg0, $arg1, + /*incompatible_shape_error=*/ConstBoolAttrTrue), + (TFL_EqualOp $arg0, $arg1)>; -def : Pat<(TF_EqualOp $arg0, $arg1, /*incompatible_shape_error=*/ConstBoolAttrTrue), (TFL_EqualOp $arg0, $arg1)>; +def LegalizePad : Pat<(TF_PadOp $arg0, $arg1), (TFL_PadOp $arg0, $arg1)>; -def : Pat<(TF_PadOp $arg0, $arg1), (TFL_PadOp $arg0, $arg1)>; +def LegalizeTile : Pat<(TF_TileOp $arg0, $arg1), (TFL_TileOp $arg0, $arg1)>; -def : Pat<(TF_TileOp $arg0, $arg1), (TFL_TileOp $arg0, $arg1)>; +def LegalizePadV2 : Pat<(TF_PadV2Op $arg0, $arg1, $cst), + (TFL_PadV2Op $arg0, $arg1, $cst)>; -def : Pat<(TF_PadV2Op $arg0, $arg1, $cst), (TFL_PadV2Op $arg0, $arg1, $cst)>; +def LegalizeMean : Pat<(TF_MeanOp $arg0, $arg1, BoolAttr:$arg2), + (TFL_MeanOp $arg0, $arg1, $arg2)>; -def : Pat<(TF_MeanOp $arg0, $arg1, BoolAttr:$arg2), (TFL_MeanOp $arg0, $arg1, $arg2)>; - -def : Pat<(TF_SumOp $arg, $axes, BoolAttr:$arg2), (TFL_SumOp $arg, $axes, $arg2)>; +def LegalizeSum : Pat<(TF_SumOp $arg, $axes, BoolAttr:$arg2), + (TFL_SumOp $arg, $axes, $arg2)>; // TopK in TFL is always sorted so we ignore that attribute here. -def : Pat<(TF_TopKV2Op $input, $k, $ignored_sorted), (TFL_TopKV2Op $input, $k)>; +def LegalizeTopKV2 : Pat<(TF_TopKV2Op $input, $k, $ignored_sorted), + (TFL_TopKV2Op $input, $k)>; -def : Pat<(TF_MinOp $arg0, $arg1, BoolAttr:$arg2), (TFL_ReduceMinOp $arg0, $arg1, $arg2)>; +def LegalizeMin : Pat<(TF_MinOp $arg0, $arg1, BoolAttr:$arg2), + (TFL_ReduceMinOp $arg0, $arg1, $arg2)>; -def : Pat<(TF_MaxOp $arg0, $arg1, BoolAttr:$arg2), (TFL_ReduceMaxOp $arg0, $arg1, $arg2)>; +def LegalizeMax : Pat<(TF_MaxOp $arg0, $arg1, BoolAttr:$arg2), + (TFL_ReduceMaxOp $arg0, $arg1, $arg2)>; -def : Pat<(TF_ProdOp $arg0, $arg1, BoolAttr:$arg2), (TFL_ReduceProdOp $arg0, $arg1, $arg2)>; +def LegalizeProd : Pat<(TF_ProdOp $arg0, $arg1, BoolAttr:$arg2), + (TFL_ReduceProdOp $arg0, $arg1, $arg2)>; -def : Pat<(TF_AnyOp $input, $reduction_indices, $keep_dims), - (TFL_ReduceAnyOp $input, $reduction_indices, $keep_dims)>; +def LegalizeAny : Pat<(TF_AnyOp $input, $reduction_indices, $keep_dims), + (TFL_ReduceAnyOp $input, $reduction_indices, $keep_dims)>; -def : Pat<(TF_CastOp $arg0, BoolAttr:$arg1), (TFL_CastOp $arg0)>; +def LegalizeCast : Pat<(TF_CastOp $arg0, BoolAttr:$arg1), (TFL_CastOp $arg0)>; -def : Pat<(TF_BatchToSpaceNDOp $input, $block_shape, $crops), (TFL_BatchToSpaceNdOp $input, $block_shape, $crops)>; +def LegalizeBatchToSpaceND : Pat< + (TF_BatchToSpaceNDOp $input, $block_shape, $crops), + (TFL_BatchToSpaceNdOp $input, $block_shape, $crops)>; -def : Pat<(TF_SpaceToBatchNDOp $input, $block_shape, $paddings), (TFL_SpaceToBatchNdOp $input, $block_shape, $paddings)>; +def LegalizeSpaceToBatchND : Pat< + (TF_SpaceToBatchNDOp $input, $block_shape, $paddings), + (TFL_SpaceToBatchNdOp $input, $block_shape, $paddings)>; -def : Pat<(TF_SpaceToDepthOp $input, $block_size, IsDataFormatNHWC:$data_format), - (TFL_SpaceToDepthOp $input, (convertIntAttrTo32Bit $block_size))>; +def LegalizeSpaceToDepth : Pat< + (TF_SpaceToDepthOp $input, $block_size, IsDataFormatNHWC:$data_format), + (TFL_SpaceToDepthOp $input, (convertIntAttrTo32Bit $block_size))>; -def : Pat<(TF_DepthToSpaceOp $input, $block_size, IsDataFormatNHWC:$data_format), - (TFL_DepthToSpaceOp $input, (convertIntAttrTo32Bit $block_size))>; +def LegalizeDepthToSpace : Pat< + (TF_DepthToSpaceOp $input, $block_size, IsDataFormatNHWC:$data_format), + (TFL_DepthToSpaceOp $input, (convertIntAttrTo32Bit $block_size))>; -def : Pat<(TF_ResizeBilinearOp $images, $size, $align_corners, $half_pixel_centers), (TFL_ResizeBilinearOp $images, $size, $align_corners, $half_pixel_centers)>; -def : Pat<(TF_ResizeNearestNeighborOp $images, $size, $align_corners, $half_pixel_centers), (TFL_ResizeNearestNeighborOp $images, $size, $align_corners, $half_pixel_centers)>; +def LegalizeResizeBilinear : Pat< + (TF_ResizeBilinearOp $images, $size, $align_corners, $half_pixel_centers), + (TFL_ResizeBilinearOp $images, $size, $align_corners, $half_pixel_centers)>; +def LegalizeResizeNearestNeighbor : Pat< + (TF_ResizeNearestNeighborOp $images, $size, $align_corners, + $half_pixel_centers), + (TFL_ResizeNearestNeighborOp $images, $size, $align_corners, + $half_pixel_centers)>; -def : Pat<(TF_MirrorPadOp $arg0, $arg1, $cst), (TFL_MirrorPadOp $arg0, $arg1, $cst)>; +def LegalizeMirrorPad : Pat<(TF_MirrorPadOp $arg0, $arg1, $cst), + (TFL_MirrorPadOp $arg0, $arg1, $cst)>; -def : Pat<(TF_SparseToDenseOp $sparse_indices, $output_shape, $sparse_values, $default_value, $validate_indices), - (TFL_SparseToDenseOp $sparse_indices, $output_shape, $sparse_values, $default_value)>; +def LegalizeSparseToDense : Pat< + (TF_SparseToDenseOp $sparse_indices, $output_shape, $sparse_values, + $default_value, $validate_indices), + (TFL_SparseToDenseOp $sparse_indices, $output_shape, $sparse_values, + $default_value)>; -def : Pat<(TF_UniqueOp $arg0),(TFL_UniqueOp $arg0)>; +def LegalizeUnique : Pat<(TF_UniqueOp $arg0),(TFL_UniqueOp $arg0)>; -def : Pat<(TF_FloorModOp $arg0, $arg1), (TFL_FloorModOp $arg0, $arg1)>; -def : Pat<(TF_ExpOp $arg0), (TFL_ExpOp $arg0)>; +def LegalizeFloorMod : Pat<(TF_FloorModOp $arg0, $arg1), + (TFL_FloorModOp $arg0, $arg1)>; +def LegalizeExp : Pat<(TF_ExpOp $arg0), (TFL_ExpOp $arg0)>; -def : Pat<(TF_LRNOp $arg0, $radius, F32Attr:$bias, F32Attr:$alpha, F32Attr:$beta), (TFL_LocalResponseNormalizationOp $arg0, (convertIntAttrTo32Bit $radius), $bias, $alpha, $beta)>; +def LegalizeLRN : Pat< + (TF_LRNOp $arg0, $radius, F32Attr:$bias, F32Attr:$alpha, F32Attr:$beta), + (TFL_LocalResponseNormalizationOp $arg0, (convertIntAttrTo32Bit $radius), + $bias, $alpha, $beta)>; -def : Pat< - (TF_NonMaxSuppressionV4Op $boxes, $scores, $max_output_size, $iou_threshold, $score_threshold, $pad_to_max_output_size), - (TFL_NonMaxSuppressionV4Op $boxes, $scores, $max_output_size, $iou_threshold, $score_threshold)>; +def LegalizeNonMaxSuppressionV4 : Pat< + (TF_NonMaxSuppressionV4Op $boxes, $scores, $max_output_size, $iou_threshold, + $score_threshold, $pad_to_max_output_size), + (TFL_NonMaxSuppressionV4Op $boxes, $scores, $max_output_size, $iou_threshold, + $score_threshold)>; -def : Pat< - (TF_NonMaxSuppressionV5Op $boxes, $scores, $max_output_size, $iou_threshold, $score_threshold, $soft_nms_sigma, $pad_to_max_output_size), - (TFL_NonMaxSuppressionV5Op $boxes, $scores, $max_output_size, $iou_threshold, $score_threshold, $soft_nms_sigma)>; +def LegalizeNonMaxSuppressionV5 : Pat< + (TF_NonMaxSuppressionV5Op $boxes, $scores, $max_output_size, $iou_threshold, + $score_threshold, $soft_nms_sigma, $pad_to_max_output_size), + (TFL_NonMaxSuppressionV5Op $boxes, $scores, $max_output_size, $iou_threshold, + $score_threshold, $soft_nms_sigma)>; -def : Pat<(TF_MatrixDiagOp $diagonal), (TFL_MatrixDiagOp $diagonal)>; +def LegalizeMatrixDiag : Pat<(TF_MatrixDiagOp $diagonal), + (TFL_MatrixDiagOp $diagonal)>; class I32VectorElementsAttr : ElementsAttrBase< CPred<"$_self.isa() &&" @@ -356,7 +406,7 @@ class I32VectorElementsAttr : ElementsAttrBase< "RankedTensorType::get({" # len # "}, $_builder.getIntegerType(32)), $0)"; } -def : Pat< +def LegalizeConv2DBackpropInput : Pat< (TF_Conv2DBackpropInputOp $input_sizes, $filter, $out_backprop, IsIntList1XY1:$strides, BoolAttr:$use_cudnn_on_gpu, @@ -373,9 +423,10 @@ def : Pat< /*stride_h=*/ ExtractI32At<1>:$strides, /*stride_w=*/ ExtractI32At<2>:$strides)>; -def : Pat< +def LegalizeMatrixSetDiag : Pat< (TF_MatrixSetDiagOp $input, $diagonal), (TFL_MatrixSetDiagOp $input, $diagonal)>; -def : Pat<(TF_ScatterNdOp I32Tensor:$indices, $updates, $shape), - (TFL_ScatterNdOp I32Tensor:$indices, $updates, $shape)>; +def LegalizeScatterNd : Pat< + (TF_ScatterNdOp I32Tensor:$indices, $updates, $shape), + (TFL_ScatterNdOp I32Tensor:$indices, $updates, $shape)>; diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td index 1fae567c835..9746d9df1ea 100644 --- a/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td +++ b/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td @@ -485,4 +485,11 @@ foreach ActFun = [TFL_AF_Relu, TFL_AF_Relu6, TFL_AF_Relu1, TFL_AF_None] in { [(HasOneUse $first_output)]>; } - +// We can eliminate Relu from Relu(SquaredDifference(x, y)), +// since the result of SquaredDifference is always non-negative. +// TFLite interpreter doesn't support Relu+int32 for now. So the test cases +// are failing without the following pattern to optimize Relu away fixes +// the problem. +def OptimizeReluSquaredDifference : Pat< + (TFL_ReluOp (TFL_SquaredDifferenceOp $l, $r)), + (TFL_SquaredDifferenceOp $l, $r)>; From b13a153d077046fc0c823400c176ce39f41ce94d Mon Sep 17 00:00:00 2001 From: Robert David Date: Fri, 17 Jul 2020 18:06:19 -0700 Subject: [PATCH 0766/2522] Separate "is layer norm" and "has layer norm tensors" parameters of LSTMOpModel. PiperOrigin-RevId: 321890450 Change-Id: Ie5a07786688bd1e3e2914362e78735fee29df093 --- tensorflow/lite/kernels/lstm_test.cc | 161 +++++++++++++-------------- 1 file changed, 80 insertions(+), 81 deletions(-) diff --git a/tensorflow/lite/kernels/lstm_test.cc b/tensorflow/lite/kernels/lstm_test.cc index 1a42d637c08..754aaba9319 100644 --- a/tensorflow/lite/kernels/lstm_test.cc +++ b/tensorflow/lite/kernels/lstm_test.cc @@ -40,8 +40,8 @@ class LSTMOpModel : public SingleOpModel { bool use_peephole, bool use_projection_weights, bool use_projection_bias, float cell_clip, float proj_clip, const std::vector>& input_shapes, - const TensorType weight_type, bool is_layer_norm, - bool asymmetric_quantize_inputs) + const TensorType weight_type, bool model_has_legacy_20_inputs, + bool is_layer_norm, bool asymmetric_quantize_inputs) : n_batch_(n_batch), n_input_(n_input), n_cell_(n_cell), @@ -111,23 +111,19 @@ class LSTMOpModel : public SingleOpModel { AddInput(TensorData{TensorType_FLOAT32, {n_batch_, n_cell_}}, true); // Layer norm weights. - if (is_layer_norm) { - const int kInputLayerNormCoeffsIndex = 20; - const int kForgetLayerNormCoeffsIndex = 21; - const int kCellLayerNormCoeffsIndex = 22; - const int kOutputLayerNormCoeffsIndex = 23; + if (!model_has_legacy_20_inputs) { if (use_cifg) { input_layer_norm_coefficients_ = AddNullInput(); } else { input_layer_norm_coefficients_ = - AddLayerNormCoeffsTensor(kInputLayerNormCoeffsIndex, input_shapes); + is_layer_norm ? AddInput(TensorType_FLOAT32) : AddNullInput(); } forget_layer_norm_coefficients_ = - AddLayerNormCoeffsTensor(kForgetLayerNormCoeffsIndex, input_shapes); + is_layer_norm ? AddInput(TensorType_FLOAT32) : AddNullInput(); cell_layer_norm_coefficients_ = - AddLayerNormCoeffsTensor(kCellLayerNormCoeffsIndex, input_shapes); + is_layer_norm ? AddInput(TensorType_FLOAT32) : AddNullInput(); output_layer_norm_coefficients_ = - AddLayerNormCoeffsTensor(kOutputLayerNormCoeffsIndex, input_shapes); + is_layer_norm ? AddInput(TensorType_FLOAT32) : AddNullInput(); } output_ = AddOutput(TensorType_FLOAT32); @@ -277,15 +273,6 @@ class LSTMOpModel : public SingleOpModel { int n_output_; private: - int AddLayerNormCoeffsTensor( - int tensor_index, const std::vector>& input_shapes) { - if (input_shapes[tensor_index][0] != 0) { - return AddInput(TensorType_FLOAT32); - } else { - return AddNullInput(); - } - } - template void PopulateTensor(int index, const std::vector& data) { // Nothing to do if tensor is an optional input or if data vector is empty. @@ -504,16 +491,17 @@ TEST_F(NoCifgNoPeepholeNoProjectionNoClippingLstmTest, LstmBlackBoxTest) { {0, 0}, // projection_weight tensor {0}, // projection_bias tensor }, - /*weight_type=*/TensorType_FLOAT32, /*is_layer_norm=*/false, + /*weight_type=*/TensorType_FLOAT32, + /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/false); VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm); } -class NoCifgNoPeepholeNoProjectionNoClippingOmittedLayerNormLstmTest +class NoCifgNoPeepholeNoProjectionNoClippingNoLayerNormLstmTest : public NoCifgNoPeepholeNoProjectionNoClippingLstmTest {}; -TEST_F(NoCifgNoPeepholeNoProjectionNoClippingOmittedLayerNormLstmTest, +TEST_F(NoCifgNoPeepholeNoProjectionNoClippingNoLayerNormLstmTest, LstmBlackBoxTest) { const int n_batch = 1; const int n_input = 2; @@ -559,7 +547,9 @@ TEST_F(NoCifgNoPeepholeNoProjectionNoClippingOmittedLayerNormLstmTest, {0}, // cell_layer_norm_coefficient tensor {0}, // output_layer_norm_coefficient tensor }, - /*weight_type=*/TensorType_FLOAT32, /*is_layer_norm=*/true, + /*weight_type=*/TensorType_FLOAT32, + /*model_has_legacy_20_inputs=*/false, + /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/false); VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm); @@ -607,7 +597,8 @@ TEST_P(NoCifgNoPeepholeNoProjectionNoClippingLstmTest, {0, 0}, // projection_weight tensor {0}, // projection_bias tensor }, - /*weight_type=*/TensorType_UINT8, /*is_layer_norm=*/false, + /*weight_type=*/TensorType_UINT8, + /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, @@ -658,7 +649,8 @@ TEST_P(NoCifgNoPeepholeNoProjectionNoClippingLstmInt8Test, {0, 0}, // projection_weight tensor {0}, // projection_bias tensor }, - /*weight_type=*/TensorType_INT8, /*is_layer_norm=*/false, + /*weight_type=*/TensorType_INT8, + /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, @@ -749,7 +741,8 @@ TEST_F(CifgNoPeepholeNoProjectionNoClippingLstmTest, LstmBlackBoxTest) { {0, 0}, // projection_weight tensor {0}, // projection_bias tensor }, - /*weight_type=*/TensorType_FLOAT32, /*is_layer_norm=*/false, + /*weight_type=*/TensorType_FLOAT32, + /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/false); VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm); @@ -797,7 +790,8 @@ TEST_P(CifgNoPeepholeNoProjectionNoClippingLstmTest, {0, 0}, // projection_weight tensor {0}, // projection_bias tensor }, - /*weight_type=*/TensorType_UINT8, /*is_layer_norm=*/false, + /*weight_type=*/TensorType_UINT8, + /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/0.03573); @@ -846,7 +840,8 @@ TEST_P(CifgNoPeepholeNoProjectionNoClippingLstmInt8Test, {0, 0}, // projection_weight tensor {0}, // projection_bias tensor }, - /*weight_type=*/TensorType_INT8, /*is_layer_norm=*/false, + /*weight_type=*/TensorType_INT8, + /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/0.03573); @@ -1487,7 +1482,8 @@ TEST_F(NoCifgPeepholeProjectionNoClippingLstmTest, LstmBlackBoxTest) { {n_output, n_cell}, // projection_weight tensor {0}, // projection_bias tensor }, - /*weight_type=*/TensorType_FLOAT32, /*is_layer_norm=*/false, + /*weight_type=*/TensorType_FLOAT32, + /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/false); VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm); @@ -1534,7 +1530,8 @@ TEST_P(NoCifgPeepholeProjectionNoClippingLstmTest, {n_output, n_cell}, // projection_weight tensor {0}, // projection_bias tensor }, - /*weight_type=*/TensorType_UINT8, /*is_layer_norm=*/false, + /*weight_type=*/TensorType_UINT8, + /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/0.00467); @@ -1583,7 +1580,8 @@ TEST_P(NoCifgPeepholeProjectionNoClippingLstmInt8Test, {n_output, n_cell}, // projection_weight tensor {0}, // projection_bias tensor }, - /*weight_type=*/TensorType_INT8, /*is_layer_norm=*/false, + /*weight_type=*/TensorType_INT8, + /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/0.0015); @@ -1703,8 +1701,8 @@ TEST_F(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest, {n_cell}, // cell_layer_norm_coefficient tensor {n_cell}, // output_layer_norm_coefficient tensor }, - /*weight_type=*/TensorType_FLOAT32, /*is_layer_norm=*/true, - /*asymmetric_quantize_inputs=*/false); + /*weight_type=*/TensorType_FLOAT32, /*model_has_legacy_20_inputs=*/false, + /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/false); // Verify the final output. lstm_golden_output_ = {{ @@ -1774,8 +1772,8 @@ TEST_P(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest, {n_cell}, // cell_layer_norm_coefficient tensor {n_cell}, // output_layer_norm_coefficient tensor }, - /*weight_type=*/TensorType_UINT8, /*is_layer_norm=*/true, - /*asymmetric_quantize_inputs=*/GetParam()); + /*weight_type=*/TensorType_UINT8, /*model_has_legacy_20_inputs=*/false, + /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/GetParam()); lstm_golden_output_ = {{ // Batch0: 3 (input_sequence_size) * 3 (n_output) @@ -1847,8 +1845,8 @@ TEST_P(NoCifgPeepholeProjectionNoClippingLayerNormLstmInt8Test, {n_cell}, // cell_layer_norm_coefficient tensor {n_cell}, // output_layer_norm_coefficient tensor }, - /*weight_type=*/TensorType_INT8, /*is_layer_norm=*/true, - /*asymmetric_quantize_inputs=*/GetParam()); + /*weight_type=*/TensorType_INT8, /*model_has_legacy_20_inputs=*/false, + /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/GetParam()); // Goldens are calculated from weight_type=TensorType_FLOAT32. lstm_golden_output_ = {{ @@ -1961,8 +1959,8 @@ TEST_F(CifgPeepholeProjectionNoClippingLayerNormLstmTest, {n_cell}, // cell_layer_norm_coefficient tensor {n_cell}, // output_layer_norm_coefficient tensor }, - /*weight_type=*/TensorType_FLOAT32, /*is_layer_norm=*/true, - /*asymmetric_quantize_inputs=*/false); + /*weight_type=*/TensorType_FLOAT32, /*model_has_legacy_20_inputs=*/false, + /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/false); // Verify the final output. lstm_golden_output_ = { @@ -2032,8 +2030,8 @@ TEST_P(CifgPeepholeProjectionNoClippingLayerNormLstmTest, {n_cell}, // cell_layer_norm_coefficient tensor {n_cell}, // output_layer_norm_coefficient tensor }, - /*weight_type=*/TensorType_UINT8, /*is_layer_norm=*/true, - /*asymmetric_quantize_inputs=*/GetParam()); + /*weight_type=*/TensorType_UINT8, /*model_has_legacy_20_inputs=*/false, + /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/GetParam()); // Verify the final output. lstm_golden_output_ = { @@ -2104,8 +2102,8 @@ TEST_P(CifgPeepholeProjectionNoClippingLayerNormLstmInt8Test, {n_cell}, // cell_layer_norm_coefficient tensor {n_cell}, // output_layer_norm_coefficient tensor }, - /*weight_type=*/TensorType_INT8, /*is_layer_norm=*/true, - /*asymmetric_quantize_inputs=*/GetParam()); + /*weight_type=*/TensorType_INT8, /*model_has_legacy_20_inputs=*/false, + /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/GetParam()); // Goldens are results using FLOAT32 inference. lstm_golden_output_ = {{ @@ -3278,41 +3276,6 @@ TEST(LSTMOpModel, InvalidTypeTest) { const int n_cell = 4; const int n_output = 4; - EXPECT_DEATH(LSTMOpModel lstm( - n_batch, n_input, n_cell, n_output, - /*use_cifg=*/false, /*use_peephole=*/false, - /*use_projection_weights=*/false, - /*use_projection_bias=*/false, - /*cell_clip=*/0.0, /*proj_clip=*/0.0, - { - {n_batch, n_input}, // input tensor - - {n_cell, n_input}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {n_cell, n_output}, // recurrent_to_input_weight_tensor - {n_cell, n_output}, // recurrent_to_forget_weight_tensor - {n_cell, n_output}, // recurrent_to_cell_weight_tensor - {n_cell, n_output}, // recurrent_to_output_weight_tensor - - {0}, // cell_to_input_weight tensor - {0}, // cell_to_forget_weight tensor - {0}, // cell_to_output_weight tensor - - {n_cell}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {0, 0}, // projection_weight tensor - {0}, // projection_bias tensor - }, - /*weight_type=*/TensorType_INT32, /*is_layer_norm=*/false, - /*asymmetric_quantize_inputs=*/false), - ""); - EXPECT_DEATH( LSTMOpModel lstm( n_batch, n_input, n_cell, n_output, @@ -3345,9 +3308,45 @@ TEST(LSTMOpModel, InvalidTypeTest) { {0, 0}, // projection_weight tensor {0}, // projection_bias tensor }, - /*weight_type=*/TensorType_COMPLEX64, /*is_layer_norm=*/false, - /*asymmetric_quantize_inputs=*/false), + /*weight_type=*/TensorType_INT32, /*model_has_legacy_20_inputs=*/true, + /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/false), ""); + + EXPECT_DEATH(LSTMOpModel lstm( + n_batch, n_input, n_cell, n_output, + /*use_cifg=*/false, /*use_peephole=*/false, + /*use_projection_weights=*/false, + /*use_projection_bias=*/false, + /*cell_clip=*/0.0, /*proj_clip=*/0.0, + { + {n_batch, n_input}, // input tensor + + {n_cell, n_input}, // input_to_input_weight tensor + {n_cell, n_input}, // input_to_forget_weight tensor + {n_cell, n_input}, // input_to_cell_weight tensor + {n_cell, n_input}, // input_to_output_weight tensor + + {n_cell, n_output}, // recurrent_to_input_weight_tensor + {n_cell, n_output}, // recurrent_to_forget_weight_tensor + {n_cell, n_output}, // recurrent_to_cell_weight_tensor + {n_cell, n_output}, // recurrent_to_output_weight_tensor + + {0}, // cell_to_input_weight tensor + {0}, // cell_to_forget_weight tensor + {0}, // cell_to_output_weight tensor + + {n_cell}, // input_gate_bias tensor + {n_cell}, // forget_gate_bias tensor + {n_cell}, // cell_gate_bias tensor + {n_cell}, // output_gate_bias tensor + + {0, 0}, // projection_weight tensor + {0}, // projection_bias tensor + }, + /*weight_type=*/TensorType_COMPLEX64, + /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, + /*asymmetric_quantize_inputs=*/false), + ""); } #endif From 369a9507f820af15db6d468592e266f4a69d67b8 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Fri, 17 Jul 2020 18:15:56 -0700 Subject: [PATCH 0767/2522] [Resubmit] Dynamic literal support PiperOrigin-RevId: 321891486 Change-Id: Ib6ac31e7f011e42f22b3b0ab8ee04373f6f6526c --- tensorflow/compiler/xla/literal.cc | 267 ++++++++++++++++-- tensorflow/compiler/xla/literal.h | 64 ++++- tensorflow/compiler/xla/literal_test.cc | 118 +++++++- .../compiler/xla/service/hlo_evaluator.cc | 4 + .../service/interpreter/executable_base.cc | 20 +- tensorflow/compiler/xla/shape_util.cc | 9 + tensorflow/compiler/xla/shape_util.h | 3 + 7 files changed, 450 insertions(+), 35 deletions(-) diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc index 73c37d6b2f3..d2b300f0b2d 100644 --- a/tensorflow/compiler/xla/literal.cc +++ b/tensorflow/compiler/xla/literal.cc @@ -48,6 +48,10 @@ namespace { using absl::StrCat; constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; +// Literals can be used as DMA targets, which can require alignment. We +// force a tensorflow::Allocator::kAllocatorAlignment-byte minimum +// alignment. +constexpr int kMinimumAlignment = 64; // Converts between little and big endian. // @@ -133,12 +137,14 @@ void Literal::SetPiece(const Shape& shape, Piece* piece, bool allocate_arrays) { } } else if (shape.IsArray()) { if (allocate_arrays) { - // Literals can be used as DMA targets, which can require alignment. We - // force a tensorflow::Allocator::kAllocatorAlignment-byte minimum - // alignment. - constexpr int kMinimumAlignment = 64; piece->set_buffer(static_cast(tensorflow::port::AlignedMalloc( piece->size_bytes(), kMinimumAlignment))); + if (shape.is_dynamic()) { + CHECK_EQ(piece->dynamic_size_buffer(), nullptr); + piece->set_dynamic_size_buffer( + static_cast(tensorflow::port::AlignedMalloc( + piece->dynamic_size_buffer_bytes(), kMinimumAlignment))); + } } } else { // If the shape is neither an array nor tuple, then it must be @@ -171,6 +177,9 @@ void Literal::DeallocateBuffers() { if (piece->buffer() != nullptr) { tensorflow::port::AlignedFree(piece->buffer()); } + if (piece->dynamic_size_buffer() != nullptr) { + tensorflow::port::AlignedFree(piece->dynamic_size_buffer()); + } }); } @@ -199,6 +208,15 @@ Literal LiteralBase::CreateFromShape(const Shape& shape) { return literal; } +int32 LiteralBase::GetDynamicSize(int64 dim_index) const { + return GetDynamicSize(dim_index, {}); +} + +int32 LiteralBase::GetDynamicSize(int64 dim_index, + const ShapeIndex& shape_index) const { + return piece(shape_index).GetDynamicSize(dim_index); +} + absl::optional LiteralBase::GetFirstInteger() const { switch (shape().element_type()) { case U8: @@ -381,7 +399,9 @@ std::vector Literal::DecomposeTuple() { // Move the respective buffer over to the element Literal. dest_piece->set_buffer(src_piece.buffer()); + dest_piece->set_dynamic_size_buffer(src_piece.dynamic_size_buffer()); src_piece.set_buffer(nullptr); + src_piece.set_dynamic_size_buffer(nullptr); }); } // Set this literal to be nil-shaped. @@ -407,23 +427,51 @@ void CopyElementsBetween(absl::Span dest, src[IndexUtil::MultidimensionalIndexToLinearIndex(src_shape, index)]; } while (IndexUtil::BumpIndices(dest_shape, absl::MakeSpan(index))); } - } // namespace -Status LiteralBase::Piece::CopyFrom(const LiteralBase::Piece& src) { +int32 LiteralBase::Piece::GetDynamicSize(int64 dim_index) const { + CHECK(LayoutUtil::IsDenseArray(subshape())); + if (!subshape_->is_dynamic_dimension(dim_index)) { + // This is a static dimension, return size. + return subshape_->dimensions(dim_index); + } + CHECK_NE(dynamic_size_buffer(), nullptr); + return dynamic_size_buffer_[dim_index]; +} + +void LiteralBase::Piece::SetDynamicSize(int64 dim_index, int32 size) { + CHECK(LayoutUtil::IsDenseArray(subshape())); + CHECK(subshape_->is_dynamic_dimension(dim_index)); + if (dynamic_size_buffer() == nullptr) { + // Lazily initialize the dynamic size buffer. + set_dynamic_size_buffer(static_cast(tensorflow::port::AlignedMalloc( + dynamic_size_buffer_bytes(), kMinimumAlignment))); + /*for (int64 i = 0; i < subshape().rank(); ++i) { + // Initialized to -1 to help debug. + dynamic_size_buffer_[i] = -1; + }*/ + } + dynamic_size_buffer_[dim_index] = size; +} + +Status LiteralBase::Piece::CopyFrom(const LiteralBase::Piece& src, + bool only_dynamic_bound) { CHECK(subshape_ != nullptr); CHECK(src.subshape_ != nullptr); if (ShapeUtil::Equal(subshape(), src.subshape())) { // If the layouts are equal it's faster just to memcpy. memcpy(buffer(), src.buffer(), src.size_bytes()); } else { - TF_RET_CHECK(ShapeUtil::Compatible(src.subshape(), subshape())); std::vector origin(subshape().rank(), 0); switch (subshape().element_type()) { -#define COPY_ELEMENTS(XLA_T, NATIVE_T) \ - case (XLA_T): \ - CopyElementsBetween(data(), src.data(), \ - subshape(), src.subshape()); \ +#define COPY_ELEMENTS(XLA_T, NATIVE_T) \ + case (XLA_T): \ + if (only_dynamic_bound) { \ + CopyElementsWithDynamicBound(src); \ + } else { \ + CopyElementsBetween(data(), src.data(), \ + subshape(), src.subshape()); \ + } \ break; COPY_ELEMENTS(U8, uint8); COPY_ELEMENTS(U16, uint16); @@ -447,21 +495,54 @@ Status LiteralBase::Piece::CopyFrom(const LiteralBase::Piece& src) { PrimitiveType_Name(subshape().element_type())); } } + DCHECK_EQ(dynamic_size_buffer_bytes(), src.dynamic_size_buffer_bytes()); + if (subshape().is_dynamic() && src.subshape().is_dynamic()) { + CHECK_NE(dynamic_size_buffer_, nullptr); + CHECK_NE(src.dynamic_size_buffer_, nullptr); + memcpy(dynamic_size_buffer(), src.dynamic_size_buffer(), + src.dynamic_size_buffer_bytes()); + } return Status::OK(); } +void MutableLiteralBase::SetDynamicSize(int64 dim_index, int32 size) { + return SetDynamicSize(dim_index, {}, size); +} + +void MutableLiteralBase::SetDynamicSize(int64 dim_index, + const ShapeIndex& shape_index, + int32 size) { + Shape* subshape_ = ShapeUtil::GetMutableSubshape(shape_.get(), shape_index); + CHECK_GE(subshape_->dimensions(dim_index), size); + if (subshape_->dimensions(dim_index) == size) { + subshape_->set_dynamic_dimension(dim_index, false); + return; + } + subshape_->set_dynamic_dimension(dim_index, true); + piece(shape_index).SetDynamicSize(dim_index, size); +} + Status MutableLiteralBase::CopyFrom(const LiteralSlice& src_literal, const ShapeIndex& dest_shape_index, - const ShapeIndex& src_shape_index) { + const ShapeIndex& src_shape_index, + bool only_dynamic_bound) { const Shape& dest_subshape = ShapeUtil::GetSubshape(shape(), dest_shape_index); const Shape& src_subshape = ShapeUtil::GetSubshape(src_literal.shape(), src_shape_index); - if (!ShapeUtil::Compatible(dest_subshape, src_subshape)) { - return InvalidArgument( - "Destination subshape incompatible with source subshape: %s vs %s", - ShapeUtil::HumanString(dest_subshape), - ShapeUtil::HumanString(src_subshape)); + if (only_dynamic_bound) { + auto bound_shape = dest_subshape.is_static() ? src_subshape : dest_subshape; + auto compact_shape = + dest_subshape.is_static() ? dest_subshape : src_subshape; + CHECK(ShapeUtil::DynamicShapeIsCompatible(compact_shape, bound_shape)) + << compact_shape.ToString() << " vs " << bound_shape.ToString(); + } else { + if (!ShapeUtil::Compatible(dest_subshape, src_subshape)) { + return InvalidArgument( + "Destination subshape incompatible with source subshape: %s vs %s", + ShapeUtil::HumanString(dest_subshape), + ShapeUtil::HumanString(src_subshape)); + } } return root_piece_->ForEachMutableSubpieceWithStatus( [&](const ShapeIndex& index, Piece* piece) { @@ -486,7 +567,9 @@ Status MutableLiteralBase::CopyFrom(const LiteralSlice& src_literal, for (int64 i = dest_shape_index.size(); i < index.size(); ++i) { src_piece_index.push_back(index[i]); } - TF_RETURN_IF_ERROR(piece->CopyFrom(src_literal.piece(src_piece_index))); + TF_RETURN_IF_ERROR( + piece->CopyFrom(src_literal.piece(src_piece_index), + /*only_dynamic_bound=*/only_dynamic_bound)); return Status::OK(); }); } @@ -514,7 +597,9 @@ Status Literal::MoveFrom(Literal&& src_literal, } Piece& dest_piece = piece(dest_index); tensorflow::port::AlignedFree(dest_piece.buffer()); + tensorflow::port::AlignedFree(dest_piece.dynamic_size_buffer()); dest_piece.set_buffer(src_piece.buffer()); + dest_piece.set_dynamic_size_buffer(src_piece.dynamic_size_buffer()); }); src_literal.shape_ = absl::make_unique(ShapeUtil::MakeNil()); @@ -629,6 +714,41 @@ Literal LiteralBase::Relayout(const Shape& shape_with_layout) const { return result; } +Literal LiteralBase::ToBoundedDynamic(const Shape& bounded_shape) const { + CHECK(bounded_shape.is_dynamic()); + Literal result(bounded_shape); + ShapeUtil::ForEachSubshape( + shape(), [&](const Shape& subshape, const ShapeIndex& index) { + if (!subshape.IsArray()) { + return; + } + for (int64 i = 0; i < subshape.rank(); ++i) { + result.SetDynamicSize(i, subshape.dimensions(i)); + } + }); + TF_CHECK_OK(result.CopyFrom(*this, {}, {}, /*only_dynamic_bound=*/true)); + + return result; +} + +Literal LiteralBase::ToStatic() const { + // Create new shape with 'new_layout' set at the given shape index. + Shape new_shape = shape(); + ShapeUtil::ForEachMutableSubshape( + &new_shape, [this](Shape* subshape, const ShapeIndex& index) { + if (!subshape->IsArray()) { + return; + } + for (int64 i = 0; i < subshape->rank(); ++i) { + subshape->set_dynamic_dimension(i, false); + subshape->set_dimensions(i, GetDynamicSize(i, index)); + } + }); + Literal result(new_shape); + TF_CHECK_OK(result.CopyFrom(*this, {}, {}, /*only_dynamic_bound=*/true)); + return result; +} + StatusOr LiteralBase::Broadcast( const Shape& result_shape, absl::Span dimensions) const { if (!shape().IsArray()) { @@ -652,6 +772,11 @@ StatusOr LiteralBase::Broadcast( const int64 primitive_size = ShapeUtil::ByteSizeOfPrimitiveType(shape().element_type()); + for (int64 i = 0; i < dimensions.size(); ++i) { + int64 dynamic_size = GetDynamicSize(i); + result.SetDynamicSize(dimensions[i], dynamic_size); + } + ShapeUtil::ForEachIndex( result_shape, [&](absl::Span output_index) { for (int64 i = 0; i < dimensions.size(); ++i) { @@ -674,6 +799,9 @@ StatusOr LiteralBase::Reshape( if (!shape().IsArray()) { return InvalidArgument("Reshape does not support tuples."); } + if (shape().is_dynamic()) { + return Unimplemented("Dynamic reshape is not implemented."); + } Literal output; if (!LayoutUtil::IsMonotonicWithDim0Major(shape().layout())) { output = Relayout(LayoutUtil::GetDefaultLayoutForRank(shape().rank())); @@ -728,6 +856,9 @@ Literal LiteralBase::Transpose(absl::Span permutation) const { layout->add_minor_to_major(inverse_permutation[index]); } Literal new_literal(permuted_shape); + for (int64 i = 0; i < shape().rank(); i++) { + new_literal.SetDynamicSize(inverse_permutation[i], GetDynamicSize(i)); + } DCHECK_EQ(ShapeUtil::ByteSizeOf(new_literal.shape()), ShapeUtil::ByteSizeOf(shape())); std::memcpy(new_literal.untyped_data(), untyped_data(), size_bytes()); @@ -747,6 +878,14 @@ Literal LiteralBase::SliceInternal( return Get(new_indices); }) .ok()); + for (int64 dnum = 0; dnum < shape().rank(); ++dnum) { + if (shape().is_dynamic_dimension(dnum)) { + int64 dynamic_size = GetDynamicSize(dnum) - start_indices[dnum]; + CHECK_GE(dynamic_size, 0) << GetDynamicSize(dnum); + dynamic_size = std::min(dynamic_size, result_shape.dimensions(dnum)); + result_literal.SetDynamicSize(dnum, dynamic_size); + } + } return result_literal; } @@ -763,9 +902,10 @@ Literal LiteralBase::Slice(absl::Span start_indices, CHECK_GE(dimension, 0) << "dnum = " << dnum; result_dimensions.push_back(dimension); } - const auto result_shape = + auto result_shape = ShapeUtil::MakeShapeWithLayout(shape().element_type(), result_dimensions, LayoutUtil::MinorToMajor(shape())); + ShapeUtil::CopyDynamicDimensions(&result_shape, shape()); switch (result_shape.element_type()) { case PRED: return SliceInternal(result_shape, start_indices); @@ -1082,11 +1222,24 @@ void DenseArrayToStringHelper(const LiteralBase& literal, if (print_shape) { pieces->push_back(ShapeToString(print_layout, subshape)); + if (subshape.is_dynamic()) { + pieces->push_back("("); + for (int64 i = 0; i < subshape.dimensions_size(); ++i) { + pieces->push_back(StrCat(literal.GetDynamicSize(i, shape_index))); + if (i < subshape.dimensions_size() - 1) { + pieces->push_back(","); + } + } + pieces->push_back(")"); + } pieces->push_back(" "); } std::vector indices = {}; - std::vector dimensions(subshape.dimensions().begin(), - subshape.dimensions().end()); + std::vector dimensions; + dimensions.reserve(subshape.rank()); + for (int64 i = 0; i < subshape.rank(); ++i) { + dimensions.push_back(literal.GetDynamicSize(i, shape_index)); + } to_string_recursive(dimensions, &indices); } @@ -1374,13 +1527,44 @@ StatusOr LiteralBase::ConvertToShape(const Shape& dest_shape) const { return literal; } +template +void LiteralBase::Piece::CopyElementsWithDynamicBound( + const LiteralBase::Piece& src) { + auto dest_shape = subshape(); + auto src_shape = src.subshape(); + + // At least one shape has to be static as bound. + CHECK(dest_shape.is_static() || src_shape.is_static()); + auto bound_shape = dest_shape.is_static() ? src_shape : dest_shape; + if (ShapeUtil::IsZeroElementArray(dest_shape)) { + return; + } + std::vector index(dest_shape.rank()); + do { + bool out_of_bound = false; + for (int64 i = 0; i < index.size(); ++i) { + // Do not copy elements beyond dynamic bound. + if (index[i] >= GetDynamicSize(i) || index[i] >= src.GetDynamicSize(i)) { + out_of_bound = true; + } + } + if (out_of_bound) { + continue; + } + data()[IndexUtil::MultidimensionalIndexToLinearIndex(dest_shape, + index)] = + src.data()[IndexUtil::MultidimensionalIndexToLinearIndex( + src_shape, index)]; + } while (IndexUtil::BumpIndices(bound_shape, absl::MakeSpan(index))); +} + template bool LiteralBase::Piece::EqualElementsInternal( const LiteralBase::Piece& other, std::vector* multi_index) const { if (multi_index->size() == subshape().rank()) { return (Get(*multi_index) == other.Get(*multi_index)); } - for (int64 i = 0; i < subshape().dimensions(multi_index->size()); ++i) { + for (int64 i = 0; i < GetDynamicSize(multi_index->size()); ++i) { multi_index->push_back(i); if (!EqualElementsInternal(other, multi_index)) { return false; @@ -1390,10 +1574,24 @@ bool LiteralBase::Piece::EqualElementsInternal( return true; } -bool LiteralBase::Piece::EqualElements(const LiteralBase::Piece& other) const { +bool LiteralBase::Piece::EqualDynamicSize( + const LiteralBase::Piece& other) const { DCHECK(ShapeUtil::Compatible(subshape(), other.subshape())); + if (subshape().is_static()) { + return true; + } - if (ShapeUtil::Equal(subshape(), other.subshape()) && + for (int64 i = 0; i < subshape().rank(); ++i) { + if (GetDynamicSize(i) != other.GetDynamicSize(i)) { + return false; + } + } + return true; +} + +bool LiteralBase::Piece::EqualElements(const LiteralBase::Piece& other) const { + if (subshape().is_static() && + ShapeUtil::Equal(subshape(), other.subshape()) && LayoutUtil::IsDenseArray(subshape())) { CHECK_EQ(size_bytes(), other.size_bytes()); return memcmp(buffer(), other.buffer(), size_bytes()) == 0; @@ -1436,17 +1634,33 @@ bool LiteralBase::Piece::EqualElements(const LiteralBase::Piece& other) const { } bool LiteralBase::operator==(const LiteralBase& other) const { - if (!ShapeUtil::Compatible(shape(), other.shape())) { + // Checking the structure of tuple literals. Checks for dense arrays are + // performed below. + if (!ShapeUtil::EqualStructure(shape(), other.shape())) { return false; } return root_piece().ForEachSubpieceWithBool( [&](const ShapeIndex& index, const Piece& piece) { + const Piece& other_piece = other.piece(index); + const Shape& subshape = piece.subshape(); + const Shape& other_subshape = other_piece.subshape(); + if (subshape.element_type() != other_subshape.element_type()) { + return false; + } if (!piece.subshape().IsArray()) { return true; } + if (subshape.rank() != other_subshape.rank()) { + return false; + } + + for (int64 i = 0; i < subshape.rank(); ++i) { + if (piece.GetDynamicSize(i) != other_piece.GetDynamicSize(i)) { + return false; + } + } - const Piece& other_piece = other.piece(index); if (!piece.EqualElements(other_piece)) { return false; } @@ -2035,6 +2249,7 @@ void MutableBorrowingLiteral::CopyPieceSubtree(const Shape& shape, } } else if (shape.IsArray()) { dest_piece->set_buffer(src_piece->buffer()); + dest_piece->set_dynamic_size_buffer(src_piece->dynamic_size_buffer()); } else { // If the shape is neither an array nor tuple, then it must be // zero-sized. Otherwise, some memory needs to be allocated for it. diff --git a/tensorflow/compiler/xla/literal.h b/tensorflow/compiler/xla/literal.h index a2be92fbf5b..1ee71618887 100644 --- a/tensorflow/compiler/xla/literal.h +++ b/tensorflow/compiler/xla/literal.h @@ -112,6 +112,10 @@ class LiteralBase { template NativeT Get(absl::Span multi_index) const; + // Get the dynamic size on dim_index in the literal at the given shape_index. + int32 GetDynamicSize(int64 dim_index, const ShapeIndex& shape_index) const; + int32 GetDynamicSize(int64 dim_index) const; + // Returns the element value at index (0, ..., 0), however many zeroes are // required for that index. template @@ -281,6 +285,18 @@ class LiteralBase { // than being limited to a single array within the shape. Literal Relayout(const Shape& shape_with_layout) const; + // Generate a new literal whose static sizes are equal to the previous + // literal's dynamic sizes. + Literal ToStatic() const; + + // Expand a static literal into a new one with a bounded dyanmic literal. The + // static dimensions of the original literal becomes dynamic dimensions of the + // new literal, where the argument `bounded_shape` becomes the bounded shape + // of the new literal. + // + // Precondition: bounded_shape.is_dynamic() + Literal ToBoundedDynamic(const Shape& bounded_shape) const; + // Creates a new literal by reshaping this literal to have the given // dimensions. The total number of elements must not change; The // implementation currently only supports monotonic dim0-major layouts. @@ -354,10 +370,22 @@ class LiteralBase { template void Set(absl::Span index, NativeT value); + int32 GetDynamicSize(int64 dim_index) const; + void SetDynamicSize(int64 dim_index, int32 size); // Gets/sets the buffer holding the array data. char* buffer() const { return buffer_; } void set_buffer(char* buffer) { buffer_ = buffer; } + // Gets/sets the buffer holding dynamic sizes. + int32* dynamic_size_buffer() const { return dynamic_size_buffer_; } + void set_dynamic_size_buffer(int32* dynamic_size_buffer) { + dynamic_size_buffer_ = dynamic_size_buffer; + } + + int64 dynamic_size_buffer_bytes() const { + return subshape().dimensions_size() * sizeof(int32); + } + // Gets or sets the subshape of this piece. This reference points to a // subshape within the shape in the containing Literal (Literal::shape_). const Shape& subshape() const { return *subshape_; } @@ -434,15 +462,21 @@ class LiteralBase { } // Returns true if this piece and 'other' contain the same data. This piece - // and 'other' must be array-shaped and compatible. + // and 'other' must be array-shaped and compatible. If a literal has dynamic + // shape, comparison is done only for the valid elements. bool EqualElements(const Piece& other) const; + // Returns true if this piece and other pieces have the same dynamic + // dimension sizes. + bool EqualDynamicSize(const Piece& other) const; + // Writes the shape and data (if array-shaped) into the given proto. void WriteToProto(LiteralProto* proto) const; // Copy the data from 'src' into this piece's buffer. Shapes of this piece - // and src must be compatible. - Status CopyFrom(const Piece& src); + // and src must be compatible. If only_dynamic_bound is true, only elements + // within dynamic bounds will be copied. + Status CopyFrom(const Piece& src, bool only_dynamic_bound); // Copies the data from the given proto into this piece. The shape of this // piece must be equal (not just compatible) to the shape of the proto. @@ -497,9 +531,15 @@ class LiteralBase { bool EqualElementsInternal(const Piece& other, std::vector* multi_index) const; + // Internal helper to copy elements from another given piece + template + void CopyElementsWithDynamicBound(const LiteralBase::Piece& src); + // For array-shaped pieces, this is the buffer holding the literal data. char* buffer_ = nullptr; + int32* dynamic_size_buffer_ = nullptr; + // The shape of piece. This points into the shape of the containing Literal // (Literal::shape_). const Shape* subshape_ = nullptr; @@ -550,6 +590,11 @@ class MutableLiteralBase : public LiteralBase { // mutate the shape as this can produce malformed Literals. Shape* mutable_shape_do_not_use() { return shape_.get(); } + // Set the dynamic size on dim_index in the literal at the given shape_index. + void SetDynamicSize(int64 dim_index, const ShapeIndex& shape_index, + int32 size); + void SetDynamicSize(int64 dim_index, int32 size); + // Returns a pointer to the underlying buffer holding the array at the given // shape index. CHECKs if the subshape of the literal at the given ShapeIndex // is not array. @@ -560,10 +605,12 @@ class MutableLiteralBase : public LiteralBase { // Copy values from 'src_literal' rooted at 'src_shape_index' into this // literal rooted at 'dest_shape_index'. The subshape of this literal rooted // at 'dest_shape_index' must be compatible with the subshape of 'src_literal' - // rooted at 'src_shape_index', but need not be arrays. + // rooted at 'src_shape_index', but need not be arrays. If only_dynamic_bound + // is true, only elements within dynamic bounds will be copied. Status CopyFrom(const LiteralSlice& src_literal, const ShapeIndex& dest_shape_index = {}, - const ShapeIndex& src_shape_index = {}); + const ShapeIndex& src_shape_index = {}, + bool only_dynamic_bound = false); // Copies the values from src_literal, starting at src_base shape indexes, // to this literal, starting at dest_base, where the copy size in each @@ -924,9 +971,14 @@ void LiteralBase::EachCell( return; } std::vector indices(shape().rank(), 0); + + Shape shape_dynamic = shape(); + for (int64 i = 0; i < shape_dynamic.rank(); ++i) { + shape_dynamic.set_dimensions(i, GetDynamicSize(i)); + } do { per_cell(indices, Get(indices)); - } while (IndexUtil::BumpIndices(shape(), absl::MakeSpan(indices))); + } while (IndexUtil::BumpIndices(shape_dynamic, absl::MakeSpan(indices))); } template diff --git a/tensorflow/compiler/xla/literal_test.cc b/tensorflow/compiler/xla/literal_test.cc index 37316a2a807..a58e450a55a 100644 --- a/tensorflow/compiler/xla/literal_test.cc +++ b/tensorflow/compiler/xla/literal_test.cc @@ -149,6 +149,16 @@ TEST_F(LiteralUtilTest, R2ToString) { EXPECT_EQ(expected, literal.ToString()); } +TEST_F(LiteralUtilTest, R2DynamicToString) { + auto literal = LiteralUtil::CreateR2({{1, 2}, {3, 4}, {5, 6}}); + literal.SetDynamicSize(0, {}, 2); + const string expected = R"(s32[<=3,2](2,2) { + { 1, 2 }, + { 3, 4 } +})"; + EXPECT_EQ(expected, literal.ToString()); +} + TEST_F(LiteralUtilTest, R3ToString) { const auto literal = LiteralUtil::CreateR3({{{1}, {2}}, {{3}, {4}}, {{5}, {6}}}); @@ -421,6 +431,28 @@ TEST_F(LiteralUtilTest, TupleEquality) { EXPECT_NE(tuple1, different_tuple); } +TEST_F(LiteralUtilTest, DynamicShapeEquality) { + // Test equality with tuples. + auto r1 = LiteralUtil::CreateR1({1.0, 2.0}); + r1.SetDynamicSize(0, {}, 1); + auto r2 = LiteralUtil::CreateR2({{1.0, 2.0}, {3.0, 4.0}}); + r2.SetDynamicSize(0, {}, 1); + auto tuple1 = LiteralUtil::MakeTuple({&r1, &r2}); + + // Tuple with the same elements. One element is shared with the original + // tuple, the other is a clone of the element in the original tuple. + auto r1_clone = LiteralUtil::CreateR1({1.0, 3.0}); + r1_clone.SetDynamicSize(0, {}, 1); + auto tuple2 = LiteralUtil::MakeTuple({&r1_clone, &r2}); + EXPECT_EQ(tuple1, tuple2); + + // Tuple with different dynamic sizes. + auto r2_clone = LiteralUtil::CreateR2({{1.0, 2.0}, {3.0, 4.0}}); + r2_clone.SetDynamicSize(0, {}, 2); + auto tuple_3 = LiteralUtil::MakeTuple({&r1_clone, &r2_clone}); + EXPECT_NE(tuple1, tuple_3); +} + TEST_F(LiteralUtilTest, C64Equality) { // Test equality with tuples. auto vector = LiteralUtil::CreateR1({{1.0, 2.0}, {3.0, 4.0}}); @@ -692,6 +724,47 @@ TEST_F(LiteralUtilTest, TransposeR4) { }); } +TEST_F(LiteralUtilTest, TransposeDynamicR2) { + // F32[2, <=3] (2, 1) + auto original = LiteralUtil::CreateR2({{1, 2, 3}, {4, 5, 6}}); + original.SetDynamicSize(1, 1); + // F32[<=3, 2] (1, 2) + auto reshape = original.Transpose(/*permutation=*/{1, 0}); + + reshape.EachCell([&](absl::Span indices, float value) { + EXPECT_EQ(value, original.Get({indices[1], indices[0]})); + }); +} + +TEST_F(LiteralUtilTest, ToStaticR2) { + // F32[2, <=3] (2, 1) + auto original = LiteralUtil::CreateR2({{1, 2, 3}, {4, 5, 6}}); + original.SetDynamicSize(1, 1); + // F32[2, 1] + auto static_literal = original.ToStatic(); + EXPECT_EQ(static_literal.shape(), ShapeUtil::MakeShape(F32, {2, 1})); + EXPECT_TRUE(static_literal.shape().is_static()); + + static_literal.EachCell( + [&](absl::Span indices, float value) { + EXPECT_EQ(value, original.Get({indices[0], indices[1]})); + }); +} + +TEST_F(LiteralUtilTest, ToBoundedDynamicR2) { + // F32[2, 1] + auto original = LiteralUtil::CreateR2({{1}, {4}}); + // F32[2, <=3] (2, 1) + auto dynamic_shape = ShapeUtil::MakeShape(F32, {2, 3}, {false, true}); + auto dynamic_literal = original.ToBoundedDynamic(dynamic_shape); + EXPECT_EQ(dynamic_literal.shape(), dynamic_shape); + + dynamic_literal.EachCell( + [&](absl::Span indices, float value) { + EXPECT_EQ(value, original.Get({indices[0], indices[1]})); + }); +} + TEST_F(LiteralUtilTest, TestR4RelayoutEquivalence) { // Tests that using Relayout on an array is equivalent to creating it in the // target layout in the first place. @@ -797,6 +870,38 @@ TEST_F(LiteralUtilTest, SliceR3U32Full) { EXPECT_EQ(input_2x3x2, result); } +TEST_F(LiteralUtilTest, SliceR2Dynamic) { + auto input_3x4 = LiteralUtil::CreateR2( + {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}}); + input_3x4.SetDynamicSize(1, 3); + // slice second dim from dynamic size 3 to dynamic size 1. + auto result = input_3x4.Slice({0, 1}, {2, 2}); + auto expected = LiteralUtil::CreateR2({{2}, {6}}); + EXPECT_EQ(expected, result); + EXPECT_EQ(result.GetDynamicSize(1), 1); +} + +TEST_F(LiteralUtilTest, SliceR2DynamicInBound) { + auto input_3x4 = LiteralUtil::CreateR2( + {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}}); + input_3x4.SetDynamicSize(1, 1); + auto result = input_3x4.Slice({0, 0}, {2, 2}); + auto expected = LiteralUtil::CreateR2({{1}, {5}}); + EXPECT_EQ(expected, result); + EXPECT_EQ(result.GetDynamicSize(1), 1); +} + +TEST_F(LiteralUtilTest, SliceR2DynamicOutOfBound) { + auto input_3x4 = LiteralUtil::CreateR2( + {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}}); + input_3x4.SetDynamicSize(1, 1); + auto result = input_3x4.Slice({0, 1}, {2, 3}); + auto expected = LiteralUtil::CreateR2({{}, {}}); + EXPECT_EQ(expected, result); + // Out of bound access clamps into 0 sized dimension. + EXPECT_EQ(result.GetDynamicSize(1), 0); +} + TEST_F(LiteralUtilTest, PopulateR1S64) { Literal output(ShapeUtil::MakeShape(S64, {1})); output.PopulateR1({77}); @@ -1510,7 +1615,7 @@ TEST_F(LiteralUtilTest, CopyFromProto_u16) { EXPECT_EQ(u1, r[3]); } -TEST_F(LiteralUtilTest, LiteralSliceTest) { +TEST_F(LiteralUtilTest, LiteralDynamicSliceTest) { auto scalar = LiteralUtil::CreateR0(1.0); auto matrix = LiteralUtil::CreateR2({{1.0, 2.0}, {3.0, 4.0}}); auto tuple = LiteralUtil::MakeTuple({&scalar, &matrix}); @@ -1973,6 +2078,17 @@ TEST_F(LiteralUtilTest, BroadcastScalarToMatrix) { LiteralUtil::CreateR2({{9, 9}, {9, 9}})); } +TEST_F(LiteralUtilTest, DynamicBroadcast) { + Literal literal = LiteralUtil::CreateR1({1, 2}); + literal.SetDynamicSize(0, 1); + TF_ASSERT_OK_AND_ASSIGN( + Literal broadcasted_literal, + literal.Broadcast(/*result_shape=*/ShapeUtil::MakeShape(S64, {2, 2}), + /*dimensions=*/{1})); + EXPECT_EQ(broadcasted_literal, LiteralUtil::CreateR2({{1}, {1}})); + EXPECT_EQ(broadcasted_literal.GetDynamicSize(1), 1); +} + TEST_F(LiteralUtilTest, GetAsComplex128) { complex128 value = {1, 0}; Literal c1 = LiteralUtil::CreateR0(value); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index ae8f49df4b4..66e9e01fc38 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -440,6 +440,10 @@ Status HloEvaluator::HandleSetDimensionSize( Literal result(set_dimension_size->shape()); memcpy(result.untyped_data(), operand_literal.untyped_data(), operand_literal.size_bytes()); + const Literal& size_literal = + GetEvaluatedLiteralFor(set_dimension_size->operand(1)); + result.SetDynamicSize(set_dimension_size->dimension(), + size_literal.Get({})); evaluated_[set_dimension_size] = std::move(result); return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/interpreter/executable_base.cc b/tensorflow/compiler/xla/service/interpreter/executable_base.cc index 4b020ea2d32..4b6a8aa5202 100644 --- a/tensorflow/compiler/xla/service/interpreter/executable_base.cc +++ b/tensorflow/compiler/xla/service/interpreter/executable_base.cc @@ -81,8 +81,17 @@ StatusOr InterpreterExecutableBase::ExecuteAsyncOnStream( for (int64 i = 0; i < computation->num_parameters(); ++i) { const auto& expected_shape = computation->parameter_instruction(i)->shape(); const auto& actual_shape = argument_buffers[i].on_device_shape(); - if (!Shape::Equal().MinorToMajorOnlyInLayout()(expected_shape, - actual_shape)) { + bool shape_match = true; + if (expected_shape.is_dynamic()) { + if (!ShapeUtil::DynamicArrayShapeIsCompatible(actual_shape, + expected_shape)) { + shape_match = false; + } + } else if (!Shape::Equal().MinorToMajorOnlyInLayout()(expected_shape, + actual_shape)) { + shape_match = false; + } + if (!shape_match) { return InvalidArgument( "Shape mismatch on parameter %d. Expected %s, but was %s.", i, ShapeUtil::HumanStringWithLayout(expected_shape), @@ -100,11 +109,18 @@ StatusOr InterpreterExecutableBase::ExecuteAsyncOnStream( TF_ASSIGN_OR_RETURN(Literal arg_literal, transfer_manager->TransferLiteralFromDevice( run_options->stream(), argument_buffers[p])); + const auto& expected_shape = computation->parameter_instruction(p)->shape(); + if (expected_shape.is_dynamic()) { + // Expand the input literal to expected shape. + arg_literal = arg_literal.ToBoundedDynamic(expected_shape); + } arg_literals.push_back(std::move(arg_literal)); } TF_ASSIGN_OR_RETURN(Literal result_literal, Evaluate(*computation, arg_literals)); + // Shrink the generated dynamic shape into static shape. + result_literal = result_literal.ToStatic(); // Transform the result literal back into a ShapedBuffer. TF_ASSIGN_OR_RETURN(ScopedShapedBuffer result_buffers, diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index bce40578132..6e452293232 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -339,6 +339,15 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( TF_DCHECK_OK(ValidateShape(*shape)); } +/* static */ void ShapeUtil::CopyDynamicDimensions(Shape* to, + const Shape& from) { + CHECK_EQ(to->rank(), from.rank()); + for (int64 i = 0; i < from.rank(); ++i) { + to->set_dynamic_dimension(i, from.is_dynamic_dimension(i)); + } + TF_DCHECK_OK(ValidateShape(*to)); +} + /* static */ bool ShapeUtil::ElementIsIntegral(const Shape& shape) { return primitive_util::IsIntegralType(shape.element_type()); } diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index fe1a8acf6e4..3789d828528 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -377,6 +377,9 @@ class ShapeUtil { // Appends a major dimension to the shape with the given bound. static void AppendMajorDimension(int bound, Shape* shape); + // Copy the dynamic dimensions property from one shape to another. + static void CopyDynamicDimensions(Shape* to, const Shape& from); + // Returns an empty tuple shape. Can be used as a sentinel Shape value. static Shape MakeNil() { return MakeTupleShape({}); } From d49ef4dae9755e0b492afea74b40bfaecd98befb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Jul 2020 18:56:11 -0700 Subject: [PATCH 0768/2522] Internal change PiperOrigin-RevId: 321895506 Change-Id: If53ac258f4dd5df305c079513196d4a8f92198b5 --- RELEASE.md | 3 -- tensorflow/python/eager/execute.py | 27 +++--------- tensorflow/python/eager/ops_test.py | 35 +++------------ tensorflow/python/framework/constant_op.py | 2 +- tensorflow/python/framework/op_def_library.py | 43 +++---------------- tensorflow/python/framework/python_op_gen.cc | 10 ----- .../python/kernel_tests/reshape_op_test.py | 19 -------- 7 files changed, 19 insertions(+), 120 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 74c4adfc97c..33b3bb16041 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -35,9 +35,6 @@ * `tf.types.experimental.TensorLike` is a new `Union` type that can be used as type annotation for variables representing a Tensor or a value that can be converted to Tensor by `tf.convert_to_tensor`. - * Calling ops with a python constants or numpy values is now consistent with - tf.convert_to_tensor behavior. This avoids operations like tf.reshape - truncating inputs such as from int64 to int32. * `tf.data`: * Added optional `exclude_cols` parameter to CsvDataset. This parameter is the complement of `select_cols`; at most one of these should be specified. diff --git a/tensorflow/python/eager/execute.py b/tensorflow/python/eager/execute.py index 32808071e63..e206262309e 100644 --- a/tensorflow/python/eager/execute.py +++ b/tensorflow/python/eager/execute.py @@ -233,7 +233,7 @@ def make_tensor(v, arg_name): (repr(v), arg_name)) -def args_to_matching_eager(l, ctx, allowed_dtypes, default_dtype=None): +def args_to_matching_eager(l, ctx, default_dtype=None): """Convert sequence `l` to eager same-type Tensors.""" if (not l) and (default_dtype is not None): return default_dtype, [] # List is empty; assume default dtype. @@ -243,6 +243,8 @@ def args_to_matching_eager(l, ctx, allowed_dtypes, default_dtype=None): break else: # note: intentional for-else return l[0]._datatype_enum(), l # pylint: disable=protected-access + # TODO(josh11b): Could we do a better job if we also passed in the + # allowed dtypes when that was known? # Is some input already a Tensor with a dtype? dtype = None @@ -254,28 +256,13 @@ def args_to_matching_eager(l, ctx, allowed_dtypes, default_dtype=None): if dtype is None: # Infer a dtype based on the first value, and use that dtype for the # remaining values. - ret = [] for t in l: - tensor = None - # First see if we can get a valid dtype with the default conversion - # and see if it matches an allowed dtypes. Some ops like ConcatV2 may - # not list allowed dtypes, in which case we should skip this. - if dtype is None and allowed_dtypes: - tensor = ops.convert_to_tensor(t, ctx=ctx) - # If we did not match an allowed dtype, try again with the default - # dtype. This could be because we have an empty tensor and thus we - # picked the wrong type. - if tensor.dtype not in allowed_dtypes: - tensor = None - - if tensor is None: - tensor = ops.convert_to_tensor( - t, dtype, preferred_dtype=default_dtype, ctx=ctx) - - ret.append(tensor) + ret.append( + ops.convert_to_tensor( + t, dtype, preferred_dtype=default_dtype, ctx=ctx)) if dtype is None: - dtype = tensor.dtype + dtype = ret[-1].dtype else: ret = [ops.convert_to_tensor(t, dtype, ctx=ctx) for t in l] diff --git a/tensorflow/python/eager/ops_test.py b/tensorflow/python/eager/ops_test.py index b996d0dd0c4..0e9b6283237 100644 --- a/tensorflow/python/eager/ops_test.py +++ b/tensorflow/python/eager/ops_test.py @@ -326,42 +326,17 @@ class OpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testArgsToMatchingEagerDefault(self): # Uses default ctx = context.context() - allowed_dtypes = [dtypes.int32, dtypes.int64] - - # Follows standard int conversion rules - t, r = execute.args_to_matching_eager([[3, 4]], ctx, allowed_dtypes, - dtypes.int32) + t, r = execute.args_to_matching_eager([[3, 4]], ctx, dtypes.int32) self.assertEqual(t, dtypes.int32) self.assertEqual(r[0].dtype, dtypes.int32) - t, r = execute.args_to_matching_eager([[3, 4]], ctx, allowed_dtypes, - dtypes.int64) - self.assertEqual(t, dtypes.int32) - self.assertEqual(r[0].dtype, dtypes.int32) - # Use int64 since it is a better fit - t, r = execute.args_to_matching_eager([[2**48]], ctx, allowed_dtypes, - dtypes.int32) + t, r = execute.args_to_matching_eager([[3, 4]], ctx, dtypes.int64) self.assertEqual(t, dtypes.int64) self.assertEqual(r[0].dtype, dtypes.int64) - - # When the regular tensor conversion fails, then use the default type as a - # hint. - allowed_dtypes = [dtypes.uint32, dtypes.uint32] - t, r = execute.args_to_matching_eager([[3, 4]], ctx, allowed_dtypes, - dtypes.uint32) - self.assertEqual(t, dtypes.uint32) - self.assertEqual(r[0].dtype, dtypes.uint32) - t, r = execute.args_to_matching_eager([[3, 4]], ctx, allowed_dtypes, - dtypes.uint64) - self.assertEqual(t, dtypes.uint64) - self.assertEqual(r[0].dtype, dtypes.uint64) - - t, r = execute.args_to_matching_eager([], ctx, allowed_dtypes, dtypes.int64) + t, r = execute.args_to_matching_eager([], ctx, dtypes.int64) self.assertEqual(t, dtypes.int64) - # Doesn't use default - allowed_dtypes = [dtypes.int32, dtypes.string] - t, r = execute.args_to_matching_eager([['string', 'arg']], ctx, - allowed_dtypes, dtypes.int32) + t, r = execute.args_to_matching_eager( + [['string', 'arg']], ctx, dtypes.int32) self.assertEqual(t, dtypes.string) self.assertEqual(r[0].dtype, dtypes.string) diff --git a/tensorflow/python/framework/constant_op.py b/tensorflow/python/framework/constant_op.py index 343856b6749..4bf67c75162 100644 --- a/tensorflow/python/framework/constant_op.py +++ b/tensorflow/python/framework/constant_op.py @@ -40,7 +40,7 @@ def _eager_reshape(tensor, shape, ctx): """Eager-only version of Reshape op; requires tensor is an eager Tensor.""" attr_t = tensor._datatype_enum() # pylint: disable=protected-access attr_tshape, (shape,) = execute.args_to_matching_eager( - [shape], ctx, [dtypes.int32, dtypes.int64], dtypes.int32) + [shape], ctx, dtypes.int32) inputs_flat = [tensor, shape] attrs = ("T", attr_t, "Tshape", attr_tshape) result, = execute.execute( diff --git a/tensorflow/python/framework/op_def_library.py b/tensorflow/python/framework/op_def_library.py index 17e06b79f74..6c72d38c197 100644 --- a/tensorflow/python/framework/op_def_library.py +++ b/tensorflow/python/framework/op_def_library.py @@ -337,7 +337,6 @@ def _apply_op_helper(op_type_name, name=None, **keywords): # pylint: disable=in # on the other. Handling this will require restructuring this code # significantly. default_type_attr_map = {} - allowed_list_attr_map = {} for attr_def in op_def.attr: if attr_def.type != "type": continue @@ -345,8 +344,6 @@ def _apply_op_helper(op_type_name, name=None, **keywords): # pylint: disable=in if attr_def.HasField("default_value"): default_type_attr_map[key] = dtypes.as_dtype( attr_def.default_value.type) - if attr_def.HasField("allowed_values"): - allowed_list_attr_map[key] = attr_def.allowed_values.list.type # Requires that op_def has passed validation (using the C++ # ValidateOpDef() from ../framework/op_def_util.h). @@ -454,7 +451,6 @@ def _apply_op_helper(op_type_name, name=None, **keywords): # pylint: disable=in # arguments to that type. dtype = None default_dtype = None - allowed_list = None if input_arg.type != types_pb2.DT_INVALID: dtype = input_arg.type elif input_arg.type_attr in attrs: @@ -464,41 +460,14 @@ def _apply_op_helper(op_type_name, name=None, **keywords): # pylint: disable=in # so we prefer the attr's default, so code that adds a new attr # with a default is backwards compatible. default_dtype = default_type_attr_map[input_arg.type_attr] - allowed_list = allowed_list_attr_map.get(input_arg.type_attr) try: - # First see if we can get a valid dtype with the default conversion - # and see if it matches an allowed dtypes. Some ops like ConcatV2 may - # not list allowed dtypes, in which case we should skip this. - if dtype is None and allowed_list: - inferred = None - try: - inferred = ops.convert_to_tensor( - values, name=input_arg.name, as_ref=input_arg.is_ref) - except TypeError as err: - # When converting a python object such as a list of Dimensions, we - # need a dtype to be specified, thus tensor conversion may throw - # an exception which we will ignore and try again below. - pass - - # If we did not match an allowed dtype, try again with the default - # dtype. This could be because we have an empty tensor and thus we - # picked the wrong type. - if inferred is not None and inferred.dtype in allowed_list: - values = inferred - else: - values = ops.convert_to_tensor( - values, - name=input_arg.name, - as_ref=input_arg.is_ref, - preferred_dtype=default_dtype) - else: - values = ops.convert_to_tensor( - values, - name=input_arg.name, - dtype=dtype, - as_ref=input_arg.is_ref, - preferred_dtype=default_dtype) + values = ops.convert_to_tensor( + values, + name=input_arg.name, + dtype=dtype, + as_ref=input_arg.is_ref, + preferred_dtype=default_dtype) except TypeError as err: if dtype is None: raise err diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc index f81e99242bf..c6c5dfb7b37 100644 --- a/tensorflow/python/framework/python_op_gen.cc +++ b/tensorflow/python/framework/python_op_gen.cc @@ -1008,16 +1008,6 @@ void GenEagerPythonOp::AddEagerInferredAttrs(const string& indentation) { FlattenInputs(&arg_list->second, &output_sizes); string conversion = strings::StrCat("_execute.args_to_matching_eager(", flattened, ", ctx"); - - strings::StrAppend(&conversion, ", ["); - for (int t : attr.allowed_values().list().type()) { - DataType dtype = static_cast(t); - const string py_dtype = - python_op_gen_internal::DataTypeToPython(dtype, "_dtypes."); - strings::StrAppend(&conversion, py_dtype, ", "); - } - strings::StrAppend(&conversion, "]"); - if (attr.has_default_value()) { strings::StrAppend( &conversion, ", ", diff --git a/tensorflow/python/kernel_tests/reshape_op_test.py b/tensorflow/python/kernel_tests/reshape_op_test.py index e7e1c7023d8..0d54138e053 100644 --- a/tensorflow/python/kernel_tests/reshape_op_test.py +++ b/tensorflow/python/kernel_tests/reshape_op_test.py @@ -22,7 +22,6 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes -from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker @@ -192,24 +191,6 @@ class ReshapeTest(test.TestCase): dtypes.float32, shape=[None, 37, None]))) self.assertEqual([None, 37, None], y.get_shape().as_list()) - def testTensorShape(self): - x = array_ops.zeros([1, 100]) - y = array_ops.reshape( - x, [tensor_shape.Dimension(100), - tensor_shape.Dimension(1)]) - self.assertEqual([100, 1], y.get_shape().as_list()) - y = array_ops.reshape(x, tensor_shape.TensorShape([100, 1])) - self.assertEqual([100, 1], y.get_shape().as_list()) - - def testInt64Shape(self): - x = array_ops.zeros([50000, 50000]) - # Provide dimension larger than int32 - y = array_ops.reshape(x, [50000**2]) - self.assertEqual([50000**2], y.get_shape().as_list()) - # Even if first dimension is within int32, ensure we correctly go to int64 - y = array_ops.reshape(x, [1, 50000**2]) - self.assertEqual([1, 50000**2], y.get_shape().as_list()) - if __name__ == "__main__": test.main() From 9b59779bc373bdac2593cc656750efe2aee27782 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Jul 2020 19:47:05 -0700 Subject: [PATCH 0769/2522] Include Ops that are used via PartitionedCalls to MetaGraphDef.MetaInfoDef.stripped_op_list PiperOrigin-RevId: 321899181 Change-Id: I780effb5c9d38f3778bfa169ff0692dcda9abcb4 --- tensorflow/python/framework/meta_graph.py | 9 ++------ .../python/framework/meta_graph_test.py | 23 ------------------- 2 files changed, 2 insertions(+), 30 deletions(-) diff --git a/tensorflow/python/framework/meta_graph.py b/tensorflow/python/framework/meta_graph.py index dbc2a894d65..327b476c576 100644 --- a/tensorflow/python/framework/meta_graph.py +++ b/tensorflow/python/framework/meta_graph.py @@ -161,17 +161,12 @@ def ops_used_by_graph_def(graph_def): functions_to_process.append(name_to_function[op]) used_ops.add(op) - def process_node(node): - mark_op_as_used(node.op) - if node.op in ["PartitionedCall", "StatefulPartitionedCall"]: - mark_op_as_used(node.attr["f"].func.name) - for node in graph_def.node: - process_node(node) + mark_op_as_used(node.op) while functions_to_process: fun = functions_to_process.pop() for node in fun.node_def: - process_node(node) + mark_op_as_used(node.op) return [op for op in used_ops if op not in name_to_function] diff --git a/tensorflow/python/framework/meta_graph_test.py b/tensorflow/python/framework/meta_graph_test.py index 36acd81fe26..ae44fbce0f0 100644 --- a/tensorflow/python/framework/meta_graph_test.py +++ b/tensorflow/python/framework/meta_graph_test.py @@ -161,29 +161,6 @@ class SimpleMetaGraphTest(test.TestCase): op_list = meta_graph.stripped_op_list_for_graph(graph) self.assertEqual(["Const"], [op.name for op in op_list.op]) - def testStrippedOpListPartitionedCalls(self): - # Function A calls B via StatefulPartitionedCall. - graph = graph_pb2.GraphDef() - a = graph.library.function.add() - b = graph.library.function.add() - a.signature.name = "A" - b.signature.name = "B" - node_in_a = a.node_def.add() - node_in_a.op = "StatefulPartitionedCall" - node_in_a.attr["f"].func.name = "B" - b.node_def.add().op = "Const" - b.node_def.add().op = "A" - - # Use A in the graph via PartitionedCall. - node = graph.node.add() - node.op = "PartitionedCall" - node.attr["f"].func.name = "A" - - op_list = meta_graph.stripped_op_list_for_graph(graph) - self.assertSameElements( - ["Const", "PartitionedCall", "StatefulPartitionedCall"], - [op.name for op in op_list.op]) - @test_util.run_deprecated_v1 def testDefaultAttrStripping(self): """Verifies that default attributes are stripped from a graph def.""" From b4ee16059c49b932460790698d56ac26f2ccee91 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Jul 2020 20:11:34 -0700 Subject: [PATCH 0770/2522] Internal change PiperOrigin-RevId: 321900773 Change-Id: If154d6c4f44e002db2a4b4f431f17f715dcc06ca --- RELEASE.md | 3 ++ tensorflow/python/eager/execute.py | 27 +++++++++--- tensorflow/python/eager/ops_test.py | 35 ++++++++++++--- tensorflow/python/framework/constant_op.py | 2 +- tensorflow/python/framework/op_def_library.py | 43 ++++++++++++++++--- tensorflow/python/framework/python_op_gen.cc | 10 +++++ .../python/kernel_tests/reshape_op_test.py | 19 ++++++++ 7 files changed, 120 insertions(+), 19 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 33b3bb16041..74c4adfc97c 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -35,6 +35,9 @@ * `tf.types.experimental.TensorLike` is a new `Union` type that can be used as type annotation for variables representing a Tensor or a value that can be converted to Tensor by `tf.convert_to_tensor`. + * Calling ops with a python constants or numpy values is now consistent with + tf.convert_to_tensor behavior. This avoids operations like tf.reshape + truncating inputs such as from int64 to int32. * `tf.data`: * Added optional `exclude_cols` parameter to CsvDataset. This parameter is the complement of `select_cols`; at most one of these should be specified. diff --git a/tensorflow/python/eager/execute.py b/tensorflow/python/eager/execute.py index e206262309e..32808071e63 100644 --- a/tensorflow/python/eager/execute.py +++ b/tensorflow/python/eager/execute.py @@ -233,7 +233,7 @@ def make_tensor(v, arg_name): (repr(v), arg_name)) -def args_to_matching_eager(l, ctx, default_dtype=None): +def args_to_matching_eager(l, ctx, allowed_dtypes, default_dtype=None): """Convert sequence `l` to eager same-type Tensors.""" if (not l) and (default_dtype is not None): return default_dtype, [] # List is empty; assume default dtype. @@ -243,8 +243,6 @@ def args_to_matching_eager(l, ctx, default_dtype=None): break else: # note: intentional for-else return l[0]._datatype_enum(), l # pylint: disable=protected-access - # TODO(josh11b): Could we do a better job if we also passed in the - # allowed dtypes when that was known? # Is some input already a Tensor with a dtype? dtype = None @@ -256,13 +254,28 @@ def args_to_matching_eager(l, ctx, default_dtype=None): if dtype is None: # Infer a dtype based on the first value, and use that dtype for the # remaining values. + ret = [] for t in l: - ret.append( - ops.convert_to_tensor( - t, dtype, preferred_dtype=default_dtype, ctx=ctx)) + tensor = None + # First see if we can get a valid dtype with the default conversion + # and see if it matches an allowed dtypes. Some ops like ConcatV2 may + # not list allowed dtypes, in which case we should skip this. + if dtype is None and allowed_dtypes: + tensor = ops.convert_to_tensor(t, ctx=ctx) + # If we did not match an allowed dtype, try again with the default + # dtype. This could be because we have an empty tensor and thus we + # picked the wrong type. + if tensor.dtype not in allowed_dtypes: + tensor = None + + if tensor is None: + tensor = ops.convert_to_tensor( + t, dtype, preferred_dtype=default_dtype, ctx=ctx) + + ret.append(tensor) if dtype is None: - dtype = ret[-1].dtype + dtype = tensor.dtype else: ret = [ops.convert_to_tensor(t, dtype, ctx=ctx) for t in l] diff --git a/tensorflow/python/eager/ops_test.py b/tensorflow/python/eager/ops_test.py index 0e9b6283237..b996d0dd0c4 100644 --- a/tensorflow/python/eager/ops_test.py +++ b/tensorflow/python/eager/ops_test.py @@ -326,17 +326,42 @@ class OpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testArgsToMatchingEagerDefault(self): # Uses default ctx = context.context() - t, r = execute.args_to_matching_eager([[3, 4]], ctx, dtypes.int32) + allowed_dtypes = [dtypes.int32, dtypes.int64] + + # Follows standard int conversion rules + t, r = execute.args_to_matching_eager([[3, 4]], ctx, allowed_dtypes, + dtypes.int32) self.assertEqual(t, dtypes.int32) self.assertEqual(r[0].dtype, dtypes.int32) - t, r = execute.args_to_matching_eager([[3, 4]], ctx, dtypes.int64) + t, r = execute.args_to_matching_eager([[3, 4]], ctx, allowed_dtypes, + dtypes.int64) + self.assertEqual(t, dtypes.int32) + self.assertEqual(r[0].dtype, dtypes.int32) + # Use int64 since it is a better fit + t, r = execute.args_to_matching_eager([[2**48]], ctx, allowed_dtypes, + dtypes.int32) self.assertEqual(t, dtypes.int64) self.assertEqual(r[0].dtype, dtypes.int64) - t, r = execute.args_to_matching_eager([], ctx, dtypes.int64) + + # When the regular tensor conversion fails, then use the default type as a + # hint. + allowed_dtypes = [dtypes.uint32, dtypes.uint32] + t, r = execute.args_to_matching_eager([[3, 4]], ctx, allowed_dtypes, + dtypes.uint32) + self.assertEqual(t, dtypes.uint32) + self.assertEqual(r[0].dtype, dtypes.uint32) + t, r = execute.args_to_matching_eager([[3, 4]], ctx, allowed_dtypes, + dtypes.uint64) + self.assertEqual(t, dtypes.uint64) + self.assertEqual(r[0].dtype, dtypes.uint64) + + t, r = execute.args_to_matching_eager([], ctx, allowed_dtypes, dtypes.int64) self.assertEqual(t, dtypes.int64) + # Doesn't use default - t, r = execute.args_to_matching_eager( - [['string', 'arg']], ctx, dtypes.int32) + allowed_dtypes = [dtypes.int32, dtypes.string] + t, r = execute.args_to_matching_eager([['string', 'arg']], ctx, + allowed_dtypes, dtypes.int32) self.assertEqual(t, dtypes.string) self.assertEqual(r[0].dtype, dtypes.string) diff --git a/tensorflow/python/framework/constant_op.py b/tensorflow/python/framework/constant_op.py index 4bf67c75162..343856b6749 100644 --- a/tensorflow/python/framework/constant_op.py +++ b/tensorflow/python/framework/constant_op.py @@ -40,7 +40,7 @@ def _eager_reshape(tensor, shape, ctx): """Eager-only version of Reshape op; requires tensor is an eager Tensor.""" attr_t = tensor._datatype_enum() # pylint: disable=protected-access attr_tshape, (shape,) = execute.args_to_matching_eager( - [shape], ctx, dtypes.int32) + [shape], ctx, [dtypes.int32, dtypes.int64], dtypes.int32) inputs_flat = [tensor, shape] attrs = ("T", attr_t, "Tshape", attr_tshape) result, = execute.execute( diff --git a/tensorflow/python/framework/op_def_library.py b/tensorflow/python/framework/op_def_library.py index 6c72d38c197..17e06b79f74 100644 --- a/tensorflow/python/framework/op_def_library.py +++ b/tensorflow/python/framework/op_def_library.py @@ -337,6 +337,7 @@ def _apply_op_helper(op_type_name, name=None, **keywords): # pylint: disable=in # on the other. Handling this will require restructuring this code # significantly. default_type_attr_map = {} + allowed_list_attr_map = {} for attr_def in op_def.attr: if attr_def.type != "type": continue @@ -344,6 +345,8 @@ def _apply_op_helper(op_type_name, name=None, **keywords): # pylint: disable=in if attr_def.HasField("default_value"): default_type_attr_map[key] = dtypes.as_dtype( attr_def.default_value.type) + if attr_def.HasField("allowed_values"): + allowed_list_attr_map[key] = attr_def.allowed_values.list.type # Requires that op_def has passed validation (using the C++ # ValidateOpDef() from ../framework/op_def_util.h). @@ -451,6 +454,7 @@ def _apply_op_helper(op_type_name, name=None, **keywords): # pylint: disable=in # arguments to that type. dtype = None default_dtype = None + allowed_list = None if input_arg.type != types_pb2.DT_INVALID: dtype = input_arg.type elif input_arg.type_attr in attrs: @@ -460,14 +464,41 @@ def _apply_op_helper(op_type_name, name=None, **keywords): # pylint: disable=in # so we prefer the attr's default, so code that adds a new attr # with a default is backwards compatible. default_dtype = default_type_attr_map[input_arg.type_attr] + allowed_list = allowed_list_attr_map.get(input_arg.type_attr) try: - values = ops.convert_to_tensor( - values, - name=input_arg.name, - dtype=dtype, - as_ref=input_arg.is_ref, - preferred_dtype=default_dtype) + # First see if we can get a valid dtype with the default conversion + # and see if it matches an allowed dtypes. Some ops like ConcatV2 may + # not list allowed dtypes, in which case we should skip this. + if dtype is None and allowed_list: + inferred = None + try: + inferred = ops.convert_to_tensor( + values, name=input_arg.name, as_ref=input_arg.is_ref) + except TypeError as err: + # When converting a python object such as a list of Dimensions, we + # need a dtype to be specified, thus tensor conversion may throw + # an exception which we will ignore and try again below. + pass + + # If we did not match an allowed dtype, try again with the default + # dtype. This could be because we have an empty tensor and thus we + # picked the wrong type. + if inferred is not None and inferred.dtype in allowed_list: + values = inferred + else: + values = ops.convert_to_tensor( + values, + name=input_arg.name, + as_ref=input_arg.is_ref, + preferred_dtype=default_dtype) + else: + values = ops.convert_to_tensor( + values, + name=input_arg.name, + dtype=dtype, + as_ref=input_arg.is_ref, + preferred_dtype=default_dtype) except TypeError as err: if dtype is None: raise err diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc index c6c5dfb7b37..f81e99242bf 100644 --- a/tensorflow/python/framework/python_op_gen.cc +++ b/tensorflow/python/framework/python_op_gen.cc @@ -1008,6 +1008,16 @@ void GenEagerPythonOp::AddEagerInferredAttrs(const string& indentation) { FlattenInputs(&arg_list->second, &output_sizes); string conversion = strings::StrCat("_execute.args_to_matching_eager(", flattened, ", ctx"); + + strings::StrAppend(&conversion, ", ["); + for (int t : attr.allowed_values().list().type()) { + DataType dtype = static_cast(t); + const string py_dtype = + python_op_gen_internal::DataTypeToPython(dtype, "_dtypes."); + strings::StrAppend(&conversion, py_dtype, ", "); + } + strings::StrAppend(&conversion, "]"); + if (attr.has_default_value()) { strings::StrAppend( &conversion, ", ", diff --git a/tensorflow/python/kernel_tests/reshape_op_test.py b/tensorflow/python/kernel_tests/reshape_op_test.py index 0d54138e053..e7e1c7023d8 100644 --- a/tensorflow/python/kernel_tests/reshape_op_test.py +++ b/tensorflow/python/kernel_tests/reshape_op_test.py @@ -22,6 +22,7 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker @@ -191,6 +192,24 @@ class ReshapeTest(test.TestCase): dtypes.float32, shape=[None, 37, None]))) self.assertEqual([None, 37, None], y.get_shape().as_list()) + def testTensorShape(self): + x = array_ops.zeros([1, 100]) + y = array_ops.reshape( + x, [tensor_shape.Dimension(100), + tensor_shape.Dimension(1)]) + self.assertEqual([100, 1], y.get_shape().as_list()) + y = array_ops.reshape(x, tensor_shape.TensorShape([100, 1])) + self.assertEqual([100, 1], y.get_shape().as_list()) + + def testInt64Shape(self): + x = array_ops.zeros([50000, 50000]) + # Provide dimension larger than int32 + y = array_ops.reshape(x, [50000**2]) + self.assertEqual([50000**2], y.get_shape().as_list()) + # Even if first dimension is within int32, ensure we correctly go to int64 + y = array_ops.reshape(x, [1, 50000**2]) + self.assertEqual([1, 50000**2], y.get_shape().as_list()) + if __name__ == "__main__": test.main() From 2f14e77dbcc4d81dce8d525d5566d3e364e3e9f6 Mon Sep 17 00:00:00 2001 From: Gaurav Jain Date: Fri, 17 Jul 2020 21:03:14 -0700 Subject: [PATCH 0771/2522] Disable broken test PiperOrigin-RevId: 321904748 Change-Id: Id18d5de321cb2fbc1d4f8ee95b843d5799381bb9 --- tensorflow/python/ops/init_ops_v2_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/ops/init_ops_v2_test.py b/tensorflow/python/ops/init_ops_v2_test.py index d45d5f6f6b3..37b66d59c09 100644 --- a/tensorflow/python/ops/init_ops_v2_test.py +++ b/tensorflow/python/ops/init_ops_v2_test.py @@ -162,6 +162,7 @@ class RandomUniformInitializerTest(InitializersTest): @test_util.run_in_graph_and_eager_modes def testRangeInitializer(self): + self.skipTest("b/161580897") shape = (9, 6, 7) self._range_test( init_ops_v2.RandomUniform(minval=-1, maxval=1, seed=124), From e84364f038953c93782e18a933ddd524899d5fd4 Mon Sep 17 00:00:00 2001 From: "T.J. Alumbaugh" Date: Fri, 17 Jul 2020 22:00:33 -0700 Subject: [PATCH 0772/2522] Handles additional 4D Einsum formulas in TF Lite PiperOrigin-RevId: 321908463 Change-Id: I09149745fac1205610b68010e3fb81913d6892b5 --- .../mlir/tensorflow/tests/einsum.mlir | 25 ++++++++++ .../mlir/tensorflow/transforms/einsum.cc | 50 +++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/einsum.mlir b/tensorflow/compiler/mlir/tensorflow/tests/einsum.mlir index 130887555b0..e7430993755 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/einsum.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/einsum.mlir @@ -98,6 +98,31 @@ func @einsum_transposereduceddim(%arg0: tensor<2x5x7xf32>, %arg1: tensor<2x5x3x7 // CHECK: return %[[v3]] : tensor<2x5x3xf32> } +func @einsum_fourdreducelast(%arg0: tensor<2x5x7x3xf32>, %arg1: tensor<2x3x5x13xf32>) -> tensor<2x7x5x13xf32> { + %0 = "tf.Einsum"(%arg0, %arg1) {T = "tfdtype$DT_FLOAT", equation = "acbe,aecd->abcd"}: (tensor<2x5x7x3xf32>, tensor<2x3x5x13xf32>) -> tensor<2x7x5x13xf32> + return %0 : tensor<2x7x5x13xf32> + // CHECK-LABEL: einsum_fourdreducelast + // CHECK: %[[cst:.*]] = constant dense<[0, 2, 1, 3]> : tensor<4xi32> + // CHECK: %[[v0:.*]] = "tf.Transpose"(%arg1, %[[cst]]) : (tensor<2x3x5x13xf32>, tensor<4xi32>) -> tensor<2x5x3x13xf32> + // CHECK: %[[v1:.*]] = "tf.BatchMatMulV2"(%arg0, %[[v0]]) {adj_x = false, adj_y = false} : (tensor<2x5x7x3xf32>, tensor<2x5x3x13xf32>) -> tensor<2x5x7x13xf32> + // CHECK: %[[v2:.*]] = "tf.Transpose"(%[[v1]], %[[cst]]) : (tensor<2x5x7x13xf32>, tensor<4xi32>) -> tensor<2x7x5x13xf32> + // CHECK: return %[[v2]] : tensor<2x7x5x13xf32> +} + +func @einsum_fourdtransposeall(%arg0: tensor<2x5x7x3xf32>, %arg1: tensor<2x11x7x3xf32>) -> tensor<2x7x11x5xf32> { + %0 = "tf.Einsum"(%arg0, %arg1) {T = "tfdtype$DT_FLOAT", equation = "aecd,abcd->acbe"}: (tensor<2x5x7x3xf32>, tensor<2x11x7x3xf32>) -> tensor<2x7x11x5xf32> + return %0 : tensor<2x7x11x5xf32> + // CHECK-LABEL: einsum_fourdtransposeall + // CHECK: %[[cst:.*]] = constant dense<[0, 2, 1, 3]> : tensor<4xi32> + // CHECK: %[[cst_1:.*]] = constant dense<[0, 2, 3, 1]> : tensor<4xi32> + // CHECK: %[[cst_2:.*]] = constant dense<[0, 1, 3, 2]> : tensor<4xi32> + // CHECK: %[[v0:.*]] = "tf.Transpose"(%arg0, %[[cst]]) : (tensor<2x5x7x3xf32>, tensor<4xi32>) -> tensor<2x7x5x3xf32> + // CHECK: %[[v1:.*]] = "tf.Transpose"(%arg1, %[[cst_1]]) : (tensor<2x11x7x3xf32>, tensor<4xi32>) -> tensor<2x7x3x11xf32> + // CHECK: %[[v2:.*]] = "tf.BatchMatMulV2"(%[[v0]], %[[v1]]) {adj_x = false, adj_y = false} : (tensor<2x7x5x3xf32>, tensor<2x7x3x11xf32>) -> tensor<2x7x5x11xf32> + // CHECK: %[[v3:.*]] = "tf.Transpose"(%[[v2]], %[[cst_2]]) : (tensor<2x7x5x11xf32>, tensor<4xi32>) -> tensor<2x7x11x5xf32> + // CHECK: return %[[v3]] : tensor<2x7x11x5xf32> +} + func @einsum_no_match(%arg0: tensor<4x5xf32>, %arg1: tensor<5xf32>) -> tensor<4xf32> { %0 = "tf.Einsum"(%arg0, %arg1) {T = "tfdtype$DT_FLOAT", equation = "ij,j->i"}: (tensor<4x5xf32>, tensor<5xf32>) -> tensor<4xf32> return %0 : tensor<4xf32> diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/einsum.cc b/tensorflow/compiler/mlir/tensorflow/transforms/einsum.cc index 1e622a295ec..c05a0ad1b62 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/einsum.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/einsum.cc @@ -54,6 +54,8 @@ enum EinsumEquation { TransposeMatMul, BatchMatMulReducedDim, TransposeReducedDim, + FourDReduceLast, + FourDTransposeAll, UnsupportedEquation }; @@ -146,6 +148,14 @@ EinsumEquation parseEquation(const std::vector& eqn) { if (is_equal(eqn, {A, B, C, COMMA, A, B, D, C, ARROW, A, B, D})) { return EinsumEquation::TransposeReducedDim; } + // ABCD,ADBE->ACBE + if (is_equal(eqn, {A, B, C, D, COMMA, A, D, B, E, ARROW, A, C, B, E})) { + return EinsumEquation::FourDReduceLast; + } + // ABCD,AECD->ACEB + if (is_equal(eqn, {A, B, C, D, COMMA, A, E, C, D, ARROW, A, C, E, B})) { + return EinsumEquation::FourDTransposeAll; + } return EinsumEquation::UnsupportedEquation; } @@ -380,6 +390,7 @@ LogicalResult ConvertTFEinsumOp::matchAndRewrite( auto final_reshape = createReshapeOp(bmm_op, {lhs_dim0, lhs_dim1, rhs_dim3}, bmm_element_type, loc, &rewriter); rewriter.replaceOp(op, {final_reshape.getResult()}); + return success(); } if (einsum_eqn == EinsumEquation::TransposeReducedDim) { // Case "BIJ,BINJ->BIN" @@ -404,6 +415,45 @@ LogicalResult ConvertTFEinsumOp::matchAndRewrite( auto final_reshape = createReshapeOp(bmm_op, {lhs_dim0, lhs_dim1, rhs_dim2}, bmm_element_type, loc, &rewriter); rewriter.replaceOp(op, {final_reshape.getResult()}); + return success(); + } + if (einsum_eqn == EinsumEquation::FourDReduceLast) { + // Case "acbe,aecd->abcd" + const int lhs_dim2 = lhs_shape[2]; + const int rhs_dim0 = rhs_shape[0]; + const int rhs_dim2 = rhs_shape[2]; + const int rhs_dim3 = rhs_shape[3]; + // Transpose RHS + rhs = createTransposeOp(rhs, loc, {0, 2, 1, 3}, &rewriter); + std::vector bmm_shape = {rhs_dim0, rhs_dim2, lhs_dim2, rhs_dim3}; + auto bmm_type = RankedTensorType::get(bmm_shape, rhs_type.getElementType()); + auto bmm_op = rewriter.create( + loc, ArrayRef{bmm_type}, lhs, rhs, rewriter.getBoolAttr(false), + rewriter.getBoolAttr(false)); + + auto trans_bmm = createTransposeOp(bmm_op, loc, {0, 2, 1, 3}, &rewriter); + rewriter.replaceOp(op, {trans_bmm.getResult()}); + return success(); + } + if (einsum_eqn == EinsumEquation::FourDTransposeAll) { + // Case "aecd,abcd->acbe" + const int lhs_dim0 = lhs_shape[0]; + const int lhs_dim1 = lhs_shape[1]; + const int lhs_dim2 = lhs_shape[2]; + const int rhs_dim1 = rhs_shape[1]; + // Transpose LHS + lhs = createTransposeOp(lhs, loc, {0, 2, 1, 3}, &rewriter); + // Transpose RHS + rhs = createTransposeOp(rhs, loc, {0, 2, 3, 1}, &rewriter); + std::vector bmm_shape = {lhs_dim0, lhs_dim2, lhs_dim1, rhs_dim1}; + auto bmm_type = RankedTensorType::get(bmm_shape, rhs_type.getElementType()); + auto bmm_op = rewriter.create( + loc, ArrayRef{bmm_type}, lhs, rhs, rewriter.getBoolAttr(false), + rewriter.getBoolAttr(false)); + + auto trans_bmm = createTransposeOp(bmm_op, loc, {0, 1, 3, 2}, &rewriter); + rewriter.replaceOp(op, {trans_bmm.getResult()}); + return success(); } return failure(); From bed7093667979b1ef6af0280cd1110145b73e0eb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Jul 2020 22:47:10 -0700 Subject: [PATCH 0773/2522] Export module comments for tf.experimental.numpy. PiperOrigin-RevId: 321913264 Change-Id: Idf5deae8e3c0c8ffa485a1883ca87108cdb3664e --- tensorflow/python/tools/api/generator/doc_srcs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/tools/api/generator/doc_srcs.py b/tensorflow/python/tools/api/generator/doc_srcs.py index 50f966aaea9..b514607d67e 100644 --- a/tensorflow/python/tools/api/generator/doc_srcs.py +++ b/tensorflow/python/tools/api/generator/doc_srcs.py @@ -47,6 +47,7 @@ _TENSORFLOW_DOC_SOURCES = { 'distributions': DocSource( docstring_module_name='ops.distributions.distributions'), 'errors': DocSource(docstring_module_name='framework.errors'), + 'experimental.numpy': DocSource(docstring_module_name='ops.numpy_ops'), 'gfile': DocSource(docstring_module_name='platform.gfile'), 'graph_util': DocSource(docstring_module_name='framework.graph_util'), 'image': DocSource(docstring_module_name='ops.image_ops'), From 829256e314bf20a1a91cd19fbc80e2392a05cfba Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Fri, 17 Jul 2020 22:52:59 -0700 Subject: [PATCH 0774/2522] fix skip test for adagrad_da_test PiperOrigin-RevId: 321913659 Change-Id: I82c492522955911b9009bab006ea73cbffa1e404 --- tensorflow/python/training/adagrad_da_test.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/training/adagrad_da_test.py b/tensorflow/python/training/adagrad_da_test.py index dd40bf58bb1..96131ef0a73 100644 --- a/tensorflow/python/training/adagrad_da_test.py +++ b/tensorflow/python/training/adagrad_da_test.py @@ -22,6 +22,7 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import embedding_ops from tensorflow.python.ops import math_ops @@ -35,7 +36,7 @@ class AdagradDAOptimizerTest(test.TestCase): def doTestAdagradDAwithoutRegularizationBasic1(self, use_resource=False): for dtype in [dtypes.float64, dtypes.float32]: - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): global_step = variables.Variable(0, dtype=dtypes.int64) if use_resource: var0 = resource_variable_ops.ResourceVariable([0.0, 0.0], dtype=dtype) @@ -74,15 +75,13 @@ class AdagradDAOptimizerTest(test.TestCase): self.assertAllCloseAccordingToType( np.array([-0.094821, -0.189358]), v1_val) - @test_util.run_deprecated_v1 def testAdagradDAWithoutRegularizationBasic1(self): self.doTestAdagradDAwithoutRegularizationBasic1() - @test_util.run_deprecated_v1 def testResourceAdagradDAWithoutRegularizationBasic1(self): self.doTestAdagradDAwithoutRegularizationBasic1(use_resource=True) - @test_util.run_deprecated_v1 + @test_util.run_v1_only("loss needs to be callable in v2") def testMinimizeSparseResourceVariable(self): for dtype in [dtypes.float32, dtypes.float64]: with self.cached_session(): @@ -104,10 +103,9 @@ class AdagradDAOptimizerTest(test.TestCase): self.evaluate(var0), rtol=0.01) - @test_util.run_deprecated_v1 def testAdagradDAwithoutRegularizationBasic2(self): for dtype in [dtypes.float64, dtypes.float32]: - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): global_step = variables.Variable(0, dtype=dtypes.int64) var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([4.0, 3.0], dtype=dtype) @@ -137,10 +135,9 @@ class AdagradDAOptimizerTest(test.TestCase): self.assertAllCloseAccordingToType( np.array([-0.094821, -0.189358]), v1_val) - @test_util.run_deprecated_v1 def testAdagradDAWithL1(self): for dtype in [dtypes.float64, dtypes.float32]: - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): global_step = variables.Variable(0, dtype=dtypes.int64) var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([4.0, 3.0], dtype=dtype) @@ -170,10 +167,9 @@ class AdagradDAOptimizerTest(test.TestCase): self.assertAllCloseAccordingToType( np.array([-0.085339, -0.17989]), v1_val) - @test_util.run_deprecated_v1 def testAdagradDAWithL1_L2(self): for dtype in [dtypes.float64, dtypes.float32]: - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): global_step = variables.Variable(0, dtype=dtypes.int64) var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([4.0, 3.0], dtype=dtype) From 8155035bbfbd4619a2964cf18805084ba30e5b02 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 18 Jul 2020 02:01:22 -0700 Subject: [PATCH 0775/2522] compat: Update forward compatibility horizon to 2020-07-18 PiperOrigin-RevId: 321924405 Change-Id: I6fb9e21c329ae9217a5fe80ff614600ab4ed5b7a --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 4ce014416d8..4c122028412 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 17) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 18) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From c9f4236b31cdef27fac65f3304d35cdaecdc9f53 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 18 Jul 2020 02:01:26 -0700 Subject: [PATCH 0776/2522] Update GraphDef version to 466. PiperOrigin-RevId: 321924410 Change-Id: I3c9ede1c60d413533fe27625f766cd174034c49a --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index a5fc4387ea6..be71e6cd5bc 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 465 // Updated: 2020/7/17 +#define TF_GRAPH_DEF_VERSION 466 // Updated: 2020/7/18 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 8e7a50da1a03a70c2f1fb505a333ba309bd2d319 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Sat, 18 Jul 2020 18:44:42 +0700 Subject: [PATCH 0777/2522] Fix ParseS3Path for NewReadOnlyMemoryRegionFromFile --- .../c/experimental/filesystem/plugins/s3/s3_filesystem.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index f20c7708384..53a791bf2ea 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -678,7 +678,7 @@ void NewReadOnlyMemoryRegionFromFile(const TF_Filesystem* filesystem, TF_ReadOnlyMemoryRegion* region, TF_Status* status) { Aws::String bucket, object; - ParseS3Path(path, true, &bucket, &object, status); + ParseS3Path(path, false, &bucket, &object, status); if (TF_GetCode(status) != TF_OK) return; auto s3_file = static_cast(filesystem->plugin_filesystem); From c7f7a43e41d300d686decf622d07e12a9e0a9f30 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Sat, 18 Jul 2020 19:02:32 +0700 Subject: [PATCH 0778/2522] Add CopyFile --- .../filesystem/plugins/s3/s3_filesystem.cc | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 53a791bf2ea..bbd0b4a202b 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -710,6 +710,56 @@ void NewReadOnlyMemoryRegionFromFile(const TF_Filesystem* filesystem, TF_SetStatus(status, TF_OK, ""); } +static void SimpleCopyFile(const Aws::String& source, + const Aws::String& bucket_dst, + const Aws::String& object_dst, S3File* s3_file, + TF_Status* status){}; + +static void MultiPartCopy(const Aws::String& source, + const Aws::String& bucket_dst, + const Aws::String& object_dst, const size_t num_parts, + const uint64_t file_size, S3File* s3_file, + TF_Status* status){}; + +void CopyFile(const TF_Filesystem* filesystem, const char* src, const char* dst, + TF_Status* status) { + auto file_size = GetFileSize(filesystem, src, status); + if (TF_GetCode(status) != TF_OK) return; + if (file_size == 0) + return TF_SetStatus(status, TF_FAILED_PRECONDITION, + "Source is a directory or empty file"); + + Aws::String bucket_src, object_src; + ParseS3Path(src, false, &bucket_src, &object_src, status); + if (TF_GetCode(status) != TF_OK) return; + Aws::String copy_src = bucket_src + "/" + object_src; + + Aws::String bucket_dst, object_dst; + ParseS3Path(dst, false, &bucket_dst, &object_dst, status); + if (TF_GetCode(status) != TF_OK) return; + + auto s3_file = static_cast(filesystem->plugin_filesystem); + auto chunk_size = + s3_file->multi_part_chunk_sizes[Aws::Transfer::TransferDirection::UPLOAD]; + size_t num_parts = 1; + if (file_size > chunk_size) num_parts = ceil((float)file_size / chunk_size); + if (num_parts == 1) + SimpleCopyFile(copy_src, bucket_dst, object_dst, s3_file, status); + else if (num_parts > 10000) + TF_SetStatus( + status, TF_UNIMPLEMENTED, + absl::StrCat("MultiPartCopy with number of parts more than 10000 is " + "not supported. Your object ", + src, " required ", num_parts, + " as multi_part_copy_part_size is set to ", chunk_size, + ". You can control this part size using the environment " + "variable S3_MULTI_PART_COPY_PART_SIZE to increase it.") + .c_str()); + else + MultiPartCopy(copy_src, bucket_dst, object_dst, num_parts, file_size, + s3_file, status); +} + // TODO(vnvo2409): Implement later } // namespace tf_s3_filesystem From 545ded18e13fd55100140abf2339f4d8283b9dca Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Sat, 18 Jul 2020 19:22:54 +0700 Subject: [PATCH 0779/2522] Add SimpleCopy --- .../filesystem/plugins/s3/s3_filesystem.cc | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index bbd0b4a202b..417a05d9a7f 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -17,6 +17,7 @@ limitations under the License. #include #include #include +#include #include #include #include @@ -713,7 +714,18 @@ void NewReadOnlyMemoryRegionFromFile(const TF_Filesystem* filesystem, static void SimpleCopyFile(const Aws::String& source, const Aws::String& bucket_dst, const Aws::String& object_dst, S3File* s3_file, - TF_Status* status){}; + TF_Status* status) { + Aws::S3::Model::CopyObjectRequest copy_object_request; + copy_object_request.WithCopySource(source) + .WithBucket(bucket_dst) + .WithKey(object_dst); + auto copy_object_outcome = + s3_file->s3_client->CopyObject(copy_object_request); + if (!copy_object_outcome.IsSuccess()) + TF_SetStatusFromAWSError(copy_object_outcome.GetError(), status); + else + TF_SetStatus(status, TF_OK, ""); +}; static void MultiPartCopy(const Aws::String& source, const Aws::String& bucket_dst, From 4029e5feebd42956d2848cfa76f883e2676341fc Mon Sep 17 00:00:00 2001 From: Fausto Morales Date: Sat, 18 Jul 2020 11:49:34 -0500 Subject: [PATCH 0780/2522] Account for possibility of pyc extension. --- tensorflow/lite/python/interpreter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/python/interpreter.py b/tensorflow/lite/python/interpreter.py index 35e05d8c8c9..12ee41d6dee 100644 --- a/tensorflow/lite/python/interpreter.py +++ b/tensorflow/lite/python/interpreter.py @@ -21,11 +21,12 @@ from __future__ import print_function import ctypes import platform import sys +import os import numpy as np # pylint: disable=g-import-not-at-top -if not __file__.endswith('tflite_runtime/interpreter.py'): +if not os.path.splitext(__file__)[0].endswith('tflite_runtime/interpreter'): # This file is part of tensorflow package. from tensorflow.lite.python.interpreter_wrapper import _pywrap_tensorflow_interpreter_wrapper as _interpreter_wrapper from tensorflow.python.util.tf_export import tf_export as _tf_export From 1a89a3198751c31b31b0afb979dc1f8ef425022d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 18 Jul 2020 10:45:22 -0700 Subject: [PATCH 0781/2522] Change link to README.md in module comments. PiperOrigin-RevId: 321954059 Change-Id: I46383d8bc96170edde656fca6c5ca596ea0a53de --- tensorflow/python/ops/numpy_ops/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/numpy_ops/__init__.py b/tensorflow/python/ops/numpy_ops/__init__.py index f4d3e8b3e05..aa45b78946e 100644 --- a/tensorflow/python/ops/numpy_ops/__init__.py +++ b/tensorflow/python/ops/numpy_ops/__init__.py @@ -16,7 +16,7 @@ This module provides a subset of numpy APIs, built on top of TensorFlow operations. Please see documentation here: -https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/ops/numpy_ops. +https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/ops/numpy_ops/README.md """ # TODO(wangpeng): Append `np_export`ed symbols to the comments above. From abe18063ada4223acae4397e4df08b4e017110b3 Mon Sep 17 00:00:00 2001 From: Feng Liu Date: Sat, 18 Jul 2020 11:58:52 -0700 Subject: [PATCH 0782/2522] Run shape inference when all ops are in TF dialect PiperOrigin-RevId: 321958208 Change-Id: I62a356c3e073312090f41c55ec6ebd7c1c7e6621 --- tensorflow/compiler/mlir/lite/tf_tfl_passes.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc index fc44e778b92..4ac22448e6c 100644 --- a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc +++ b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc @@ -166,6 +166,10 @@ void AddTFToTFLConversionPasses(const mlir::TFL::PassConfig& pass_config, // The below passes only make sense if Builtin TFLite ops are enabled // for emission. if (pass_config.emit_builtin_tflite_ops) { + // Run shape inference after variables are converted to constants. + if (pass_config.shape_inference) { + pass_manager->addPass(mlir::TF::CreateTFShapeInferencePass()); + } // Prepare for TFLite dialect, rerun canonicalization, and then legalize to // the TFLite dialect. pass_manager->addPass( @@ -173,6 +177,9 @@ void AddTFToTFLConversionPasses(const mlir::TFL::PassConfig& pass_config, pass_manager->addNestedPass(mlir::createCanonicalizerPass()); if (pass_config.shape_inference) { // Add a shape inference pass to optimize away the unnecessary casts. + // This also fixes the unranked shapes due to TF ops constant folding. + // TODO(fengliuai): remove this pass if TableGen patterns have a better + // to control the shapes for the intermediate results. pass_manager->addPass(mlir::TF::CreateTFShapeInferencePass()); } From 555c5d61553c47121acaa5939db27af0b7ec10e9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 18 Jul 2020 12:17:37 -0700 Subject: [PATCH 0783/2522] Run shape inference when all ops are in TF dialect PiperOrigin-RevId: 321959448 Change-Id: I022448bc64b8745dec0b66b59cb31946e457eaaa --- tensorflow/compiler/mlir/lite/tf_tfl_passes.cc | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc index 4ac22448e6c..fc44e778b92 100644 --- a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc +++ b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc @@ -166,10 +166,6 @@ void AddTFToTFLConversionPasses(const mlir::TFL::PassConfig& pass_config, // The below passes only make sense if Builtin TFLite ops are enabled // for emission. if (pass_config.emit_builtin_tflite_ops) { - // Run shape inference after variables are converted to constants. - if (pass_config.shape_inference) { - pass_manager->addPass(mlir::TF::CreateTFShapeInferencePass()); - } // Prepare for TFLite dialect, rerun canonicalization, and then legalize to // the TFLite dialect. pass_manager->addPass( @@ -177,9 +173,6 @@ void AddTFToTFLConversionPasses(const mlir::TFL::PassConfig& pass_config, pass_manager->addNestedPass(mlir::createCanonicalizerPass()); if (pass_config.shape_inference) { // Add a shape inference pass to optimize away the unnecessary casts. - // This also fixes the unranked shapes due to TF ops constant folding. - // TODO(fengliuai): remove this pass if TableGen patterns have a better - // to control the shapes for the intermediate results. pass_manager->addPass(mlir::TF::CreateTFShapeInferencePass()); } From 332ad33c638866047f50f500eb20ed9ffe5bf47d Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Sat, 18 Jul 2020 13:32:10 -0700 Subject: [PATCH 0784/2522] Open source distributed_tpu_rewrite_pass.cc and associated helper methods PiperOrigin-RevId: 321964336 Change-Id: I101c790d7c57c78fdbf0f4ac63fdd13916dd0481 --- tensorflow/core/tpu/graph_rewrite/BUILD | 118 + .../core/tpu/graph_rewrite/cond_builder.cc | 83 + .../core/tpu/graph_rewrite/cond_builder.h | 74 + .../distributed_tpu_rewrite_pass.cc | 3926 +++++++++++++++++ .../distributed_tpu_rewrite_pass.h | 589 +++ .../distributed_tpu_rewrite_pass_internal.cc | 45 + .../distributed_tpu_rewrite_pass_internal.h | 38 + .../host_training_loop_optimization_util.cc | 629 +++ .../host_training_loop_optimization_util.h | 80 + .../incomplete_nodedef_builder.cc | 73 + .../incomplete_nodedef_builder.h | 58 + .../tpu_rewrite_pass_registration.cc | 4 +- tensorflow/stream_executor/tpu/BUILD | 1 + tensorflow/stream_executor/tpu/tpu_topology.h | 1 + 14 files changed, 5718 insertions(+), 1 deletion(-) create mode 100644 tensorflow/core/tpu/graph_rewrite/cond_builder.cc create mode 100644 tensorflow/core/tpu/graph_rewrite/cond_builder.h create mode 100644 tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc create mode 100644 tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h create mode 100644 tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.cc create mode 100644 tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.h create mode 100644 tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.cc create mode 100644 tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.h create mode 100644 tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.cc create mode 100644 tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.h diff --git a/tensorflow/core/tpu/graph_rewrite/BUILD b/tensorflow/core/tpu/graph_rewrite/BUILD index 69238456d57..bffb44c1b97 100644 --- a/tensorflow/core/tpu/graph_rewrite/BUILD +++ b/tensorflow/core/tpu/graph_rewrite/BUILD @@ -13,6 +13,7 @@ cc_library( srcs = ["tpu_rewrite_pass_registration.cc"], deps = [ ":distributed_tpu_configuration_rewrite_pass", + ":distributed_tpu_rewrite_pass", ":encapsulate_tpu_computations_pass", ":variable_merger_pass", "//tensorflow/core:core_cpu", @@ -101,3 +102,120 @@ cc_library( "@com_google_absl//absl/strings", ], ) + +cc_library( + name = "distributed_tpu_rewrite_pass_internal", + srcs = ["distributed_tpu_rewrite_pass_internal.cc"], + hdrs = ["distributed_tpu_rewrite_pass_internal.h"], + deps = [ + "//tensorflow/core:framework", + "@com_google_absl//absl/random", + ], +) + +cc_library( + name = "distributed_tpu_rewrite_pass", + srcs = [ + "distributed_tpu_rewrite_pass.cc", + ], + hdrs = [ + "distributed_tpu_rewrite_pass.h", + ], + deps = [ + ":cond_builder", + ":distributed_tpu_rewrite_helpers", + ":distributed_tpu_rewrite_pass_internal", + ":host_training_loop_optimization_util", + ":incomplete_nodedef_builder", + "//tensorflow/compiler/jit:encapsulate_util", + "//tensorflow/compiler/jit:shape_inference", + "//tensorflow/compiler/tf2xla:resource_operation_table", + "//tensorflow/compiler/tf2xla:sharding_util", + "//tensorflow/compiler/tf2xla:side_effect_util", + "//tensorflow/compiler/tf2xla:tf2xla_util", + "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/xla:array3d", + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla:xla_proto_cc", + "//tensorflow/compiler/xla/client:sharding_builder", + "//tensorflow/compiler/xla/service:computation_placer", + "//tensorflow/core:framework", + "//tensorflow/core:graph", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:session_options", + "//tensorflow/core/common_runtime:function", + "//tensorflow/core/common_runtime:graph_constructor", + "//tensorflow/core/common_runtime:lower_function_call_op", + "//tensorflow/core/common_runtime:lower_functional_ops", + "//tensorflow/core/common_runtime:lower_if_op", + "//tensorflow/core/common_runtime:lower_while_op", + "//tensorflow/core/common_runtime:optimization_registry", + "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", + "//tensorflow/core/protobuf/tpu:dynamic_padding_proto_cc", + "//tensorflow/core/protobuf/tpu:topology_proto_cc", + "//tensorflow/core/tpu:tpu_compile_interface", + "//tensorflow/core/tpu:tpu_defs", + "//tensorflow/core/tpu/kernels:tpu_util_c_api_hdrs", + "//tensorflow/stream_executor/tpu:tpu_platform_interface", + "//tensorflow/stream_executor/tpu:tpu_topology_external", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:node_hash_map", + "@com_google_absl//absl/types:span", + ], +) + +cc_library( + name = "incomplete_nodedef_builder", + srcs = ["incomplete_nodedef_builder.cc"], + hdrs = ["incomplete_nodedef_builder.h"], + deps = [ + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/core:core_cpu", + "//tensorflow/core:core_cpu_lib", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + ], +) + +cc_library( + name = "cond_builder", + srcs = ["cond_builder.cc"], + hdrs = ["cond_builder.h"], + deps = [ + ":incomplete_nodedef_builder", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/core:core_cpu", + "//tensorflow/core:core_cpu_lib", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + ], +) + +cc_library( + name = "host_training_loop_optimization_util", + srcs = [ + "host_training_loop_optimization_util.cc", + ], + hdrs = [ + "host_training_loop_optimization_util.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":distributed_tpu_rewrite_pass_internal", + "//tensorflow/compiler/tf2xla:functionalize_control_flow_util", + "//tensorflow/compiler/tf2xla:tf2xla_util", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework_internal", + "//tensorflow/core:graph", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/container:node_hash_set", + "@com_google_absl//absl/types:optional", + ], +) diff --git a/tensorflow/core/tpu/graph_rewrite/cond_builder.cc b/tensorflow/core/tpu/graph_rewrite/cond_builder.cc new file mode 100644 index 00000000000..e16ae08aec3 --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/cond_builder.cc @@ -0,0 +1,83 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/graph_rewrite/cond_builder.h" + +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.h" + +namespace tensorflow { + +CondBuilder::CondBuilder(string name, string device, const NodeDebugInfo& debug, + Graph* graph) + : graph_(graph), name_(std::move(name)), device_(std::move(device)) { + auto new_name = [graph, this](string suffix) { + return graph->NewName(strings::StrCat(name_, "/", suffix)); + }; + TF_CHECK_OK( + IncompleteNodeDefBuilder::Identity(new_name("pred"), DT_BOOL, debug) + .Device(device_) + .Build(graph_, &pred_)); + Node* switch_pred; + TF_CHECK_OK( + IncompleteNodeDefBuilder::Switch(new_name("switch_pred"), DT_BOOL, debug) + .Device(device_) + .Build(graph_, &switch_pred)); + graph_->AddEdge(pred(), 0, switch_pred, 0); + graph_->AddEdge(pred(), 0, switch_pred, 1); + TF_CHECK_OK( + IncompleteNodeDefBuilder::Identity(new_name("switch_f"), DT_BOOL, debug) + .Device(device_) + .Build(graph_, &switch_f_)); + TF_CHECK_OK( + IncompleteNodeDefBuilder::Identity(new_name("switch_t"), DT_BOOL, debug) + .Device(device_) + .Build(graph_, &switch_t_)); + graph_->AddEdge(switch_pred, kElseBranch, switch_f_, 0); + graph_->AddEdge(switch_pred, kThenBranch, switch_t_, 0); + Node* merge_pred; + TF_CHECK_OK(IncompleteNodeDefBuilder::Merge(new_name("merge_pred"), DT_BOOL, + debug, /*n=*/2) + .Device(device_) + .Build(graph_, &merge_pred)); + graph_->AddEdge(switch_f_, 0, merge_pred, kElseBranch); + graph_->AddEdge(switch_t_, 0, merge_pred, kThenBranch); + // Note: when additional return values are added then there should be a + // control dependency between those merge nodes and control_successor_ to + // ensure that it is control successor of conditional. + control_successor_ = merge_pred; +} + +Node* CondBuilder::pred() { return pred_; } + +Node* CondBuilder::switch_f() { return switch_f_; } + +Node* CondBuilder::switch_t() { return switch_t_; } + +Node* CondBuilder::control_successor() { return control_successor_; } + +Status CondBuilder::AddInput(const string& input_name, const DataType& type, + const string& device, const NodeDebugInfo& debug, + Node** input) { + auto b = IncompleteNodeDefBuilder::Switch( + graph_->NewName(strings::StrCat(name_, "/", input_name)), type, debug); + TF_RETURN_IF_ERROR(b.Device(device).Build(graph_, input)); + graph_->AddEdge(pred(), 0, *input, 1); + return Status::OK(); +} + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/cond_builder.h b/tensorflow/core/tpu/graph_rewrite/cond_builder.h new file mode 100644 index 00000000000..29e264dfc0a --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/cond_builder.h @@ -0,0 +1,74 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_TPU_GRAPH_REWRITE_COND_BUILDER_H_ +#define TENSORFLOW_CORE_TPU_GRAPH_REWRITE_COND_BUILDER_H_ + +#include + +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +// Conditional builder. +// Convenience builder to make it easy to construct a conditional. E.g., +// Node* pred = ...; +// CondBuilder cb("cond", g); +// auto switch_var = cb.AddInput("var", DT_RESOURCE); +// g->AddEdge(pred, 0, cb.pred(), 0); +// Will create the nodes of a conditional that takes as input a resource +// variable ("var") as input and that switches on pred. +// +// This currently only handles the case needed by distributed_tpu_rewrite_pass +// and is not completely general. +class CondBuilder { + public: + enum Branch { kElseBranch = 0, kThenBranch = 1 }; + + CondBuilder(string name, string device, const NodeDebugInfo& debug, + Graph* graph); + + // Returns node corresponding to the predicate input. + Node* pred(); + + // Returns node corresponding to switch_f branch of predicate switch. + Node* switch_f(); + + // Returns node corresponding to switch_t branch of predicate switch. + Node* switch_t(); + + // Returns node corresponding to control successor. + Node* control_successor(); + + // Returns the Switch node to feed a value of the given type into the + // conditional. + Status AddInput(const string& input_name, const DataType& type, + const string& device, const NodeDebugInfo& debug, + Node** input); + + private: + Node* control_successor_; + Node* switch_f_; + Node* switch_t_; + Node* pred_; + Graph* const graph_; + const string name_; + const string device_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_GRAPH_REWRITE_COND_BUILDER_H_ diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc new file mode 100644 index 00000000000..208cb8bd865 --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc @@ -0,0 +1,3926 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Compilation for distributed TPU (TPU_REPLICATED_CORE devices). + +#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h" + +#include +#include + +#include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" +#include "tensorflow/compiler/jit/encapsulate_util.h" +#include "tensorflow/compiler/tf2xla/resource_operation_table.h" +#include "tensorflow/compiler/tf2xla/sharding_util.h" +#include "tensorflow/compiler/tf2xla/side_effect_util.h" +#include "tensorflow/compiler/tf2xla/tf2xla_util.h" +#include "tensorflow/compiler/xla/array3d.h" +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/client/sharding_builder.h" +#include "tensorflow/compiler/xla/service/computation_placer.h" +#include "tensorflow/compiler/xla/xla.pb.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/graph_constructor.h" +#include "tensorflow/core/common_runtime/lower_function_call_op.h" +#include "tensorflow/core/common_runtime/lower_functional_ops.h" +#include "tensorflow/core/common_runtime/lower_if_op.h" +#include "tensorflow/core/common_runtime/lower_while_op.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/graph_to_functiondef.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/framework/versions.pb.h" +#include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/gtl/cleanup.h" +#include "tensorflow/core/lib/strings/proto_serialization.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/fingerprint.h" +#include "tensorflow/core/protobuf/tpu/compile_metadata.pb.h" +#include "tensorflow/core/protobuf/tpu/dynamic_padding.pb.h" +#include "tensorflow/core/protobuf/tpu/topology.pb.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/tpu/graph_rewrite/cond_builder.h" +#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_helpers.h" +#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.h" +#include "tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.h" +#include "tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.h" +#include "tensorflow/core/tpu/kernels/tpu_util_c_api.h" +#include "tensorflow/core/tpu/tpu_compile_interface.h" +#include "tensorflow/core/tpu/tpu_defs.h" +#include "tensorflow/core/util/device_name_utils.h" +#include "tensorflow/core/util/dump_graph.h" +#include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" + +namespace tensorflow { + +namespace { + +// Device coordinates are defined as (x, y, z, core), thus resulting in a rank 4 +// topology. +constexpr int kTPUTopologyRank = 4; + +// An upper bound on how many cores may be present in the topology. +static constexpr int kTPUMaxTopologySize = 4096; + +// Attribute containing the serialized xla::OpSharding to be passed to the +// corresponding XLA HLO operation, which represents how a shape is distributed +// across logical cores, e.g., replication, single-device, or partitioning. +const char kShardingAttribute[] = "_XlaSharding"; + +const char kTPUPartitionedInput[] = "TPUPartitionedInput"; +const char kTPUPartitionedOutput[] = "TPUPartitionedOutput"; + +// TODO(phawkins) add a canonical copy of these operator names and refactor +// everything to use it. +static const char* const kSendFromHostOp = "_XlaSendFromHost"; +static const char* const kRecvAtHostOp = "_XlaRecvAtHost"; + +static const char* const kTPUCompilationResultAttr = "_tpu_compilation_status"; +static const char* const kPostDeviceRewriteAttr = "_post_device_rewrite"; + +string CoreDeviceLabel(int core) { + return strings::StrCat("/device:", DEVICE_TPU_REPLICATED_CORE, ":", core); +} + +// Creates a unique node name with a particular prefix. +string UniqueNodeName(const StringPiece prefix, Graph* graph) { + return graph->NewName(strings::StrCat(prefix, "/_", internal::GetNodeId())); +} + +Status SetNodeDeviceForTPUCommunication(DeviceNameUtils::ParsedName device, + const string& target_device_type, + Node* node) { + TF_RET_CHECK(device.has_type && device.type == DEVICE_TPU_NODE); + TF_RET_CHECK(device.has_id); + TF_RET_CHECK(HasNodeAttr(node->def(), kXlaHasHostTransferAttrName)); + + // Store the device instance as an attr on the Node. + TF_RETURN_IF_ERROR(SetDeviceOrdinalAttributeForNode(node, device.id)); + + // Place the execute Op on the TPU_SYSTEM device so it can access the cache of + // compiled protos in the resource manager. + device.type = target_device_type; + device.id = 0; + + node->set_assigned_device_name(DeviceNameUtils::ParsedNameToString(device)); + return Status::OK(); +} + +Status SetNodeDeviceForTPUCommunication(const string& tpu_device_name, + const string& target_device_type, + Node* node) { + // Parse the TPU device. + DeviceNameUtils::ParsedName device; + TF_RET_CHECK(DeviceNameUtils::ParseFullName(tpu_device_name, &device)); + return SetNodeDeviceForTPUCommunication(device, target_device_type, node); +} + +// Iterate over the nodes in the original graph and find all the TPUReplicate +// nodes, and all the nodes that are part of outside_compilation clusters. +Status FindTaggedNodes( + Graph* graph, std::vector* replicate_nodes, + std::map* + outside_compilation_nodes, + std::map>* head_tail_outside_compilation_nodes) { + for (Node* node : graph->op_nodes()) { + if (node->type_string() == "_TPUReplicate") { + replicate_nodes->push_back(node); + const AttrValue* cluster_attr = node->attrs().Find(kTPUReplicateAttr); + if (cluster_attr == nullptr) { + return errors::Internal("TPUReplicate node ", node->name(), " has no ", + kTPUReplicateAttr, " attr."); + } else { + const string& cluster = cluster_attr->s(); + if (cluster.empty()) { + return errors::Internal("Attr ", kTPUReplicateAttr, " on node ", + node->name(), " has no string value."); + } + if (outside_compilation_nodes->find(cluster) != + outside_compilation_nodes->end()) { + return errors::Internal( + "TPUReplicate node ", node->name(), " has ", kTPUReplicateAttr, + " attr value '", cluster, + "' which is a duplicate of another TPUReplicate node in the " + "graph."); + } + (*outside_compilation_nodes)[cluster] = + DistributedTPURewritePass::OutsideCompilationNodeMap(); + (*head_tail_outside_compilation_nodes)[cluster] = std::vector(); + } + } + } + for (Node* node : graph->op_nodes()) { + if (node->type_string() != "_TPUReplicate") { + const AttrValue* cluster_attr = node->attrs().Find(kTPUReplicateAttr); + const AttrValue* outside_compilation_attr = + node->attrs().Find(kOutsideCompilationAttr); + if (cluster_attr == nullptr) { + if (outside_compilation_attr != nullptr) { + return errors::Internal("Node ", node->name(), " has ", + kOutsideCompilationAttr, " attr but no ", + kTPUReplicateAttr, " attr."); + } + } else { + const string& cluster = cluster_attr->s(); + if (cluster.empty()) { + return errors::Internal("Attr ", kTPUReplicateAttr, " on node ", + node->name(), " has no string value."); + } + const auto iter = outside_compilation_nodes->find(cluster); + if (iter == outside_compilation_nodes->end()) { + return errors::Internal( + "Attr ", kTPUReplicateAttr, " on node ", node->name(), + " does not correspond to a TPUReplicate node."); + } + if (outside_compilation_attr == nullptr) { + return errors::Internal("Node ", node->name(), " has ", + kTPUReplicateAttr, " attr but no ", + kOutsideCompilationAttr, " attr."); + } + const string& oc_cluster = outside_compilation_attr->s(); + if (oc_cluster.empty()) { + return errors::Internal("Attr ", kOutsideCompilationAttr, " on node ", + node->name(), " has no string value."); + } + + // Outside compilation cluster at head and tail of TPU computation has + // already been moved to host and is already replicated. As so, do not + // replicate outside compilation nodes with replica id attribute. + int replica_id; + if (TryGetNodeAttr(node->def(), kXlaReplicaIdAttrName, &replica_id)) { + const AttrValue* head_attr = + node->attrs().Find("_xla_only_arg_or_oc_input"); + const AttrValue* tail_attr = + node->attrs().Find("_xla_only_ret_or_oc_output"); + if (((head_attr != nullptr) && (head_attr->b())) || + ((tail_attr != nullptr) && (tail_attr->b()))) { + // This is safe as this has the same keys as + // outside_compilation_nodes which we already know has this key. + (*head_tail_outside_compilation_nodes)[cluster].push_back(node); + } + continue; + } + iter->second[oc_cluster].push_back(node); + } + } + } + return Status::OK(); +} + +// Helper class to spread TPU computation arguments and return values +// across cores. +// If all shapes are fully defined, balance by their size. +// If some of them are not fully defined, the undefined shapes size will +// be estimated with the average size of the fully defined ones. +// If none are defined, fall back to round-robin. +class TensorDevicePlacer { + public: + // Creates a TensorDevicePlacer object to distribute arguments or + // return values to a set of num_devices devices, where the types and + // the inferred shapes of the inputs (arguments or return values) are + // passed in types and shapes. + TensorDevicePlacer(int64 num_devices, const DataTypeVector& types, + const std::vector& shapes) + : index_nodes_(num_devices), sizes_(types.size()) { + int64 total_size = 0; + int64 num_defined = 0; + for (int64 i = 0; i < types.size(); ++i) { + sizes_[i] = GetInferredShapeSize(shapes[i], types[i]); + if (sizes_[i] >= 0) { + total_size += sizes_[i]; + ++num_defined; + } + } + // If a shape is undefined, select a size for it which is the average + // of the defined shapes. If no shapes are defined, assign 1 so that we + // get round-robin behavior. + int64 undefined_shape_size = + (num_defined > 0) ? total_size / num_defined : 1; + for (int64 i = 0; i < sizes_.size(); ++i) { + if (sizes_[i] < 0) { + sizes_[i] = undefined_shape_size; + } + } + min_heap_.reserve(num_devices); + for (int64 i = 0; i < num_devices; ++i) { + min_heap_.push_back(&index_nodes_[i]); + } + std::make_heap(min_heap_.begin(), min_heap_.end(), DeviceNodeCompare); + } + + // Reports that the argument/return-value at index has been assigned + // by the user to a given device. + void ReportDeviceAssigned(int64 device, int64 index) { + DeviceNode* node = &index_nodes_.at(device); + node->size += sizes_.at(index); + std::make_heap(min_heap_.begin(), min_heap_.end(), DeviceNodeCompare); + } + + // Retrieves the device at which the argument/return-value at index + // should be assigned to. + int64 RetrieveAssignment(int64 index) { + DeviceNode* node = *(min_heap_.begin()); + int64 device = node - index_nodes_.data(); + node->size += sizes_.at(index); + std::make_heap(min_heap_.begin(), min_heap_.end(), DeviceNodeCompare); + return device; + } + + private: + struct DeviceNode { + int64 size = 0; + }; + + // std::push_heap, etc... creates a max-heap, but we want a min-heap. + static bool DeviceNodeCompare(const DeviceNode* lhs, const DeviceNode* rhs) { + return lhs->size > rhs->size; + } + + static int64 GetInferredShapeSize(const InferredShape& ishape, + DataType dtype) { + return ishape.shape.IsFullyDefined() + ? ishape.shape.num_elements() * DataTypeSize(dtype) + : -1; + } + + std::vector index_nodes_; + std::vector min_heap_; + std::vector sizes_; +}; + +Status ValidateCoreNumber(int64 core, int64 num_cores_per_replica) { + if (core < 0 || core >= num_cores_per_replica) { + return tensorflow::errors::InvalidArgument("Invalid core ID: ", core, + ". The valid core IDs are [0..", + num_cores_per_replica, ")"); + } + return Status::OK(); +} + +Status FindHostComputeKeyPlaceholderNodes( + const Graph* graph, const std::vector& replicate_nodes, + std::unordered_map* host_compute_key_placeholder_map) { + host_compute_key_placeholder_map->clear(); + for (const auto node : replicate_nodes) { + (*host_compute_key_placeholder_map)[node->name()] = nullptr; + } + + for (Node* node : graph->op_nodes()) { + if (node->type_string() == "Placeholder" && + str_util::EndsWith(node->name(), "_key_placeholder")) { + const AttrValue* call_node_attr = + node->attrs().Find("_host_compute_call_node"); + if (call_node_attr != nullptr) { + auto iter = host_compute_key_placeholder_map->find(call_node_attr->s()); + if (iter == host_compute_key_placeholder_map->end()) { + return errors::InvalidArgument( + "Node ", node->name(), " has _host_compute_call_node attribute '", + call_node_attr->s(), "' that doesn't correspond to a call node"); + } + if (iter->second != nullptr) { + return errors::InvalidArgument( + "Key placeholder node ", iter->second->name(), " for call node ", + call_node_attr->s(), " previously found as ", + iter->second->name()); + } + iter->second = node; + } + } + } + + return Status::OK(); +} + +Status ReplaceCompilationResultNodeWithIdentity(Graph* graph, Node** node) { + Node* old_node = *node; + // We want to replace the node with an identity node with the same name. + const string& node_name = old_node->name(); + + // Create identity node. + TF_ASSIGN_OR_RETURN( + Node * id_node, + BuildIdentityNode(graph, node_name, DT_STRING, + /*input=*/nullptr, /*requested_device=*/"")); + + // No incoming edges are copied as a new one will be added from compile node + // to id_node. + + // Copy outgoing edges to the id node. + std::vector out_edges(old_node->out_edges().begin(), + old_node->out_edges().end()); + for (const Edge* edge : out_edges) { + Node* dst = edge->dst(); + int src_output = edge->src_output(); + int dst_input = edge->dst_input(); + + if (src_output == Graph::kControlSlot) { + graph->AddControlEdge(id_node, dst); + } else { + graph->AddEdge(id_node, src_output, dst, dst_input); + } + graph->RemoveEdge(edge); + } + graph->RemoveNode(old_node); + + *node = id_node; + return Status::OK(); +} + +Status FillPaddingMap( + const Node& replicate_node, + protobuf::RepeatedPtrField* padding_maps) { + std::vector padding_map_strs; + TF_RETURN_IF_ERROR( + GetNodeAttr(replicate_node.attrs(), "padding_map", &padding_map_strs)); + padding_maps->Reserve(padding_map_strs.size()); + for (const string& padding_map_str : padding_map_strs) { + tpu::PaddingMap* padding_map = padding_maps->Add(); + if (!padding_map->ParseFromString(padding_map_str)) { + return errors::InvalidArgument( + "Malformed padding_map serialized string: ", padding_map_str); + } + } + return Status::OK(); +} + +Status GetStepMarkerLocation(const Node& replicate_node, + xla::DebugOptions::StepMarkerLocation* location) { + string step_marker_location_attr; + TF_RETURN_IF_ERROR(GetNodeAttr(replicate_node.attrs(), "step_marker_location", + &step_marker_location_attr)); + if (step_marker_location_attr.empty()) { + *location = xla::DebugOptions::STEP_MARK_AT_ENTRY; + } else { + if (!xla::DebugOptions::StepMarkerLocation_Parse(step_marker_location_attr, + location)) { + return errors::InvalidArgument("Malformed step_marker_location: ", + step_marker_location_attr); + } + } + return Status::OK(); +} + +// Extracts a map of dimension and number of splits for tiled input from xla +// sharding attribute. +Status GetDimensionIndicesAndNumSplitsFromSharding( + const xla::OpSharding& sharding, std::map* split_dimension_map) { + for (int dim_index = 0; + dim_index < sharding.tile_assignment_dimensions_size(); dim_index++) { + if (sharding.tile_assignment_dimensions(dim_index) > 1) { + split_dimension_map->emplace( + dim_index, sharding.tile_assignment_dimensions(dim_index)); + } + } + + if (split_dimension_map->empty()) { + return errors::InvalidArgument("Arg has unnecessary tiled sharding: ", + sharding.DebugString()); + } + return Status::OK(); +} + +// Updates contents of the function with `function_name` in function library +// definition `flib_def` to `new_graph`. This is required when graph +// transformation happens inside a function call body. +Status UpdateFunctionLibDefinition(const Graph& new_graph, + const std::string& function_name, + FunctionLibraryDefinition* flib_def) { + FunctionDef graph_fdef; + TF_RETURN_IF_ERROR(GraphToFunctionDef(new_graph, function_name, &graph_fdef)); + TF_RETURN_IF_ERROR(flib_def->ReplaceFunction(function_name, graph_fdef)); + return Status::OK(); +} + +struct NodeOut { + Node* node; + int index; +}; + +struct ShardedInputIndex { + int replica_id; + int argument_index; + + bool operator<(const ShardedInputIndex& rhs) const { + return std::tie(replica_id, argument_index) < + std::tie(rhs.replica_id, rhs.argument_index); + } +}; + +struct ShardedInputInfo { + // Split node that would be connected to tiled input Node. + Node* split_node; + // List of splits nodes and output index of the split node from which sharded + // input will be connected to the TPUExecute node. The inputs are ordered by + // logical core ids. + std::vector sharded_inputs; +}; + +// Adds split node and split dimension node to graph for sharding tiled inputs. +// |graph| owns the returned Node* instance. +xla::StatusOr CreateSplitNode(int num_splits, int dim, + int orig_src_output, DataType dtype, + absl::string_view name_prefix, + Node* control_predecessor, Node* orig_src, + Graph* graph) { + const std::string input_assigned_device = orig_src->assigned_device_name(); + + // Add a split dimension node. + NodeDef split_dim_def; + split_dim_def.set_name( + graph->NewName(absl::StrCat(name_prefix, "/split_dim"))); + split_dim_def.set_op("Const"); + split_dim_def.set_device(input_assigned_device); + AddNodeAttr("dtype", DT_INT32, &split_dim_def); + TensorProto tensor_proto; + tensor_proto.set_dtype(DT_INT32); + tensor_proto.add_int_val(dim); + TensorShape shape({}); + shape.AsProto(tensor_proto.mutable_tensor_shape()); + AddNodeAttr("value", tensor_proto, &split_dim_def); + Status s; + Node* split_dim_node = graph->AddNode(split_dim_def, &s); + TF_RETURN_IF_ERROR(s); + // Add a split node. + NodeDef split_def; + split_def.set_name(graph->NewName(absl::StrCat(name_prefix, "/split"))); + split_def.set_op("Split"); + split_def.set_device(input_assigned_device); + AddNodeAttr("num_split", num_splits, &split_def); + AddNodeAttr("T", dtype, &split_def); + split_def.add_input(absl::StrCat(split_dim_node->name(), ":0")); + split_def.add_input(absl::StrCat(orig_src->name(), ":", orig_src_output)); + Node* split_node = graph->AddNode(split_def, &s); + TF_RETURN_IF_ERROR(s); + + graph->AddEdge(split_dim_node, 0, split_node, 0); + graph->AddEdge(orig_src, orig_src_output, split_node, 1); + + // Add a control dependency from `control_predecessor` to newly created + // constant node. This ensures that newly added split/split dim + // nodes are placed inside correct while loop frames when TPUExecute + // node is inside a host training loop. + graph->AddControlEdge(control_predecessor, split_dim_node); + + return split_node; +} + +// Creates a set of splits nodes that shards tiled input node in graph. +xla::StatusOr CreateOrGetSplitNodesForInputSharding( + const xla::OpSharding& sharding, int orig_arg_num, DataType dtype, + int replica_id, int orig_src_output, Node* orig_src, + Node* control_predecessor, Graph* graph, + std::map* + arg_index_to_sharded_input_map) { + ShardedInputIndex input_index{replica_id, orig_arg_num}; + auto iter = arg_index_to_sharded_input_map->find(input_index); + if (iter != arg_index_to_sharded_input_map->end()) { + return iter->second; + } + // Maps input dimension and number of splits with which the + // dimension sharded. + std::map split_dimension_map; + TF_RETURN_IF_ERROR(GetDimensionIndicesAndNumSplitsFromSharding( + sharding, &split_dimension_map)); + TF_RET_CHECK(!split_dimension_map.empty()) + << "Unnecessary sharding attribute found."; + + // For v1 while loop, nodes inside the loop body must either + // 1) Have data edges from while loop input node. + // or + // 2) Have direct control dependency from while loop input control + // node. + // + // As so, if we are adding Split node inside, while loop body, + // we must manually add a control dependency to a node inside + // a while loop (i.e. `control_predecessor`) to constant nodes + // without data in-edges to make sure that added split nodes + // have correct frame name. Else, placer will complain when + // `BuildControlFlow()` is invoked. + + auto sharding_it = split_dimension_map.begin(); + std::queue split_nodes_for_dimension; + int split_dimension = sharding_it->first; + int num_split = sharding_it->second; + + // Creates a tree of split nodes for sharding tiled inputs. Splits nodes + // are created such that input data is sharded in row major order. + // Split nodes at ith depth from the original input node represent nodes + // that split the input data at ith dimension. + TF_ASSIGN_OR_RETURN( + Node * root_split_node, + CreateSplitNode(num_split, split_dimension, orig_src_output, dtype, + absl::StrCat("sharded_input/replica_", replica_id, + "_dim_", split_dimension), + control_predecessor, orig_src, graph)); + sharding_it++; + + split_nodes_for_dimension.emplace(root_split_node); + + while (sharding_it != split_dimension_map.end()) { + split_dimension = sharding_it->first; + num_split = sharding_it->second; + int num_split_nodes_in_dimension = split_nodes_for_dimension.size(); + for (int i = 0; i < num_split_nodes_in_dimension; ++i) { + Node* input_split_node = split_nodes_for_dimension.front(); + split_nodes_for_dimension.pop(); + for (int src_output_index = 0; + src_output_index < input_split_node->num_outputs(); + ++src_output_index) { + TF_ASSIGN_OR_RETURN( + Node * split_node, + CreateSplitNode(num_split, split_dimension, src_output_index, dtype, + absl::StrCat("sharded_input/replica_", replica_id, + "_dim_", split_dimension), + control_predecessor, input_split_node, graph)); + split_nodes_for_dimension.emplace(split_node); + } + } + sharding_it++; + } + + // `split_nodes_for_dimension` now includes final split nodes + // from which sharded data will be fed into TPUExcute nodes -- sorted by + // row major order. + std::vector sharded_inputs_list; + sharded_inputs_list.reserve(split_nodes_for_dimension.size()); + while (!split_nodes_for_dimension.empty()) { + Node* split_node = split_nodes_for_dimension.front(); + split_nodes_for_dimension.pop(); + int num_splits; + TF_RETURN_IF_ERROR( + GetNodeAttr(split_node->def(), "num_split", &num_splits)); + for (int out_index = 0; out_index < num_splits; ++out_index) { + sharded_inputs_list.emplace_back(NodeOut{split_node, out_index}); + } + } + + ShardedInputInfo sharded_input_info{root_split_node, + std::move(sharded_inputs_list)}; + (*arg_index_to_sharded_input_map)[input_index] = sharded_input_info; + return sharded_input_info; +} + +// Creates a concat node to be used for aggregating sharded retvals across +// logical cores. +xla::StatusOr CreateConcatNode(int dim, int num_splits, DataType dtype, + absl::string_view name_prefix, + const std::vector& inputs, + Graph* graph, absl::string_view device) { + // Add a Concat dim node. + NodeDef concat_dim_def; + concat_dim_def.set_name( + graph->NewName(absl::StrCat(name_prefix, "/concat_dim"))); + concat_dim_def.set_op("Const"); + AddNodeAttr("dtype", DT_INT32, &concat_dim_def); + concat_dim_def.set_device(std::string(device)); + TensorProto tensor_proto; + tensor_proto.set_dtype(DT_INT32); + tensor_proto.add_int_val(dim); + TensorShape shape({}); + shape.AsProto(tensor_proto.mutable_tensor_shape()); + AddNodeAttr("value", tensor_proto, &concat_dim_def); + Status s; + Node* concat_dim_node = graph->AddNode(concat_dim_def, &s); + TF_RETURN_IF_ERROR(s); + + // Add a Concat node. + NodeDef concat_def; + concat_def.set_name(graph->NewName(absl::StrCat(name_prefix, "/concat"))); + concat_def.set_op("Concat"); + AddNodeAttr("N", num_splits, &concat_def); + AddNodeAttr("T", dtype, &concat_def); + concat_def.add_input(absl::StrCat(concat_dim_node->name(), ":0")); + concat_def.set_device(std::string(device)); + for (const auto& i : inputs) { + concat_def.add_input(absl::StrCat(i.node->name(), ":", i.index)); + } + Node* concat_node = graph->AddNode(concat_def, &s); + TF_RETURN_IF_ERROR(s); + + graph->AddEdge(concat_dim_node, 0, concat_node, 0); + + // 0th input to concat node is a concat dim node. So we start from 1st input + // and add all input edges. + int dst_input = 1; + for (const auto& i : inputs) { + graph->AddEdge(i.node, i.index, concat_node, dst_input); + ++dst_input; + } + return concat_node; +} + +// Creates a set of Concat nodes that aggregates sharded outputs from TPUExecute +// nodes into a single output. Sharded outputs are concatenated along row major +// order. That is, tiled output along 0th dimension will be concatenated last. +xla::StatusOr CreateConcatNodesForRetval( + const xla::OpSharding& sharding, DataType dtype, int replica_id, + const std::vector& orig_inputs, Graph* graph, + absl::string_view device) { + std::map split_dimension_map; + TF_RETURN_IF_ERROR(GetDimensionIndicesAndNumSplitsFromSharding( + sharding, &split_dimension_map)); + + std::vector inputs_to_sharded_retval = orig_inputs; + + for (auto it = split_dimension_map.rbegin(); it != split_dimension_map.rend(); + it++) { + auto dim = it->first; + auto num_splits = it->second; + + int num_concat_nodes = inputs_to_sharded_retval.size() / num_splits; + int input_index_to_concat_node = 0; + + std::vector new_concat_nodes; + for (int i = 0; i < num_concat_nodes; ++i) { + auto concat_input_it = + inputs_to_sharded_retval.begin() + input_index_to_concat_node; + std::vector inputs(concat_input_it, + concat_input_it + num_splits); + input_index_to_concat_node += num_splits; + + TF_ASSIGN_OR_RETURN( + Node * concat_node, + CreateConcatNode( + dim, num_splits, dtype, + absl::StrCat("sharded_output/replica_", replica_id, "_dim_", dim), + inputs, graph, device)); + new_concat_nodes.emplace_back(NodeOut{concat_node, 0}); + } + inputs_to_sharded_retval = new_concat_nodes; + } + + TF_RET_CHECK(inputs_to_sharded_retval.size() == 1); + return inputs_to_sharded_retval.at(0).node; +} + +absl::optional GetCoreIndexInSharding(const xla::OpSharding& sharding, + int64 core) { + absl::optional output_index; + for (int i = 0; i < sharding.tile_assignment_devices_size(); i++) { + int64 assigned_core = sharding.tile_assignment_devices(i); + if (assigned_core == core) { + output_index = i; + break; + } + } + return output_index; +} + +// Set the padding ops the same devices as the original inputs. If the original +// inputs are on TPUs, the padding ops will be placed on TPUs and XLA on demand +// mode will be triggered, so we don't need to copy the data back to the host +// to do the padding. +Status SetPaddingNodesDevices(Graph* graph) { + for (Node* n : graph->op_nodes()) { + bool tpu_padding_attr; + if (n->type_string() == "Pad" && + GetNodeAttr(n->attrs(), kPostDeviceRewriteAttr, &tpu_padding_attr) + .ok()) { + Node* unpadded_input; + TF_RETURN_IF_ERROR(n->input_node(0, &unpadded_input)); + + const string& requested_device = unpadded_input->requested_device(); + const string& assigned_device = unpadded_input->assigned_device_name(); + if (!requested_device.empty() || !assigned_device.empty()) { + // The output nodes of the original unpadded inputs include the padded + // inputs and real shapes of inputs, we assign those to the same device + // as the original inputs. + for (Node* out : unpadded_input->out_nodes()) { + if (GetNodeAttr(out->attrs(), kPostDeviceRewriteAttr, + &tpu_padding_attr) + .ok()) { + out->set_requested_device(requested_device); + out->set_assigned_device_name(assigned_device); + } + } + // There might be a tf.shape node added before TPUCompileOp, we need to + // set its device as well. + for (Node* out : n->out_nodes()) { + if (n->type_string() == "Shape") { + out->set_requested_device(requested_device); + out->set_assigned_device_name(assigned_device); + } + } + } + } + } + return Status::OK(); +} + +const string& AssignedOrRequestedDevice(const Node* node) { + if (!node->assigned_device_name().empty()) { + return node->assigned_device_name(); + } + return node->requested_device(); +} + +bool IsTpuDevice(const string& device_string) { + DeviceNameUtils::ParsedName device; + return DeviceNameUtils::ParseFullName(device_string, &device) && + device.type == DEVICE_TPU_NODE; +} + +// Returns a set of device ops can be placed on TPU. There is no strict rule of +// thumb to decide which ops should be in the list, but empirically they are +// mostly dummy ops like Identity-like ops or control flow related ops. However +// people can add also add other ops like Pad to allow data stay on TPU. +const absl::flat_hash_set& PlaceOnTPUOpList() { + static const auto place_on_tpu_ops = new absl::flat_hash_set( + {"Identity", "IdentityN", "Enter", "Exit", "Switch", "Merge", + "NextIteration", "Shape"}); + return *place_on_tpu_ops; +} + +// If an op satisfies the following conditions, it will be placed on the same +// TPU device as its inputs: +// (1) The op can be placed on TPU (in the PlaceOnTPUOpList) +// (2) The op itself has no requested or assigned devices. +// (3) All the data inputs of this op are placed on the same device on TPUs. +// There are exceptions like the NextIterations input of Switch node can +// be placed on CPU as it is just a boolean. +// +// Returns true if the node device has been changed, otherwise returns false. +bool PlaceOpsOnTPU(Node* node) { + if (!AssignedOrRequestedDevice(node).empty() || + !PlaceOnTPUOpList().contains(node->type_string())) { + return false; + } + string src_tpu_device = ""; + Node* src_node; + for (const Edge* e : node->in_edges()) { + if (e->IsControlEdge()) { + continue; + } + Node* src = e->src(); + const string& src_device = AssignedOrRequestedDevice(src); + + // Make exceptions that we don't force the some inputs to place on TPUs. + if (node->IsSwitch() && src->IsLoopCond()) { + continue; + } + + if (!IsTpuDevice(src_device) || + (!src_tpu_device.empty() && src_device != src_tpu_device)) { + return false; + } + if (src_tpu_device.empty()) { + src_tpu_device = src_device; + src_node = src; + } + } + node->set_assigned_device_name(src_node->assigned_device_name()); + node->set_requested_device(src_node->requested_device()); + return true; +} + +// Validate sharding configuration derived from XlaSharding attribute. +// Infer the core id from the OpSharding, if necessary. +Status ParseAndValidateSharding(const xla::OpSharding& sharding, + const int num_cores_per_replica, + int64* inferred_core_id, + absl::optional* result) { + if (sharding.type() == xla::OpSharding::MAXIMAL) { + int64 core_annotation = sharding.tile_assignment_devices(0); + TF_RETURN_IF_ERROR( + ValidateCoreNumber(core_annotation, num_cores_per_replica)); + if (*inferred_core_id == -1 || *inferred_core_id > core_annotation) { + *inferred_core_id = core_annotation; + result->emplace(sharding); + } + } else { + if (sharding.type() == xla::OpSharding::OTHER) { + for (int64 core : sharding.tile_assignment_devices()) { + TF_RETURN_IF_ERROR(ValidateCoreNumber(core, num_cores_per_replica)); + } + } + + if (!result->has_value()) { + *result = sharding; + } else { + std::string result_value_serialized; + std::string sharding_serialized; + SerializeToStringDeterministic(result->value(), &result_value_serialized); + SerializeToStringDeterministic(sharding, &sharding_serialized); + + if (result_value_serialized != sharding_serialized) { + // We see different shardings, assign to core 0. + result->emplace(xla::sharding_builder::AssignDevice(0)); + } + } + } + return Status::OK(); +} + +// As XlaSharding node may be followed by Cast op or an Identity op, +// recursively walk the graph and aggregate nodes connectd to +// |input_node| or Cast/Identity op following the |input_node|. +void FindNodesMaybeContainingShardingInfo(const Node& input_node, + std::vector* nodes) { + if (input_node.IsIdentity() || input_node.type_string() == "Cast") { + for (const Node* connected_node : input_node.out_nodes()) + FindNodesMaybeContainingShardingInfo(*connected_node, nodes); + } + nodes->emplace_back(&input_node); +} + +// Parse sharding configuration from |node| or it's adjacent nodes. +// XlaSharding configuration may be derived from +// a) Connected Identity op node. +// b) Connected Cast op node. +xla::StatusOr> +ParseInputShardingFromAdjacentNode(const int num_cores_per_replica, + const Node& node) { + // If |node| has `device` attribute or is a XlaSharding op, + // return the parsed OpSharding. + TF_ASSIGN_OR_RETURN(absl::optional sharding, + ParseShardingFromDevice(node, num_cores_per_replica)); + if (sharding.has_value()) return sharding; + + // XlaShardingOp may be followed by an identity or followed by identity + // and a Cast op. + std::vector potential_nodes_with_input_sharding; + FindNodesMaybeContainingShardingInfo(node, + &potential_nodes_with_input_sharding); + for (const Node* maybe_node_with_sharding_info : + potential_nodes_with_input_sharding) { + if (maybe_node_with_sharding_info->type_string() != "XlaSharding") continue; + + TF_ASSIGN_OR_RETURN(absl::optional sharding_config, + ParseShardingFromDevice(*maybe_node_with_sharding_info, + num_cores_per_replica)); + if (sharding_config.has_value()) return sharding_config; + } + return sharding; +} + +// Walk the graph from an argument node to find OpSharding configuration +// from its neighbor nodes. Sharding configuration may be inferred from +// 1) Parsing XlaSharding attribute from neighboring node. +// 2) If argument node is a resource, then by parsing adjacent nodes +// of the connected ReadVariable op. +Status ParseAndValidateShardingFromNeighbors( + const int num_cores_per_replica, const std::string& arg_node_name, + const Node& neighbor_node, int64* inferred_core_id, bool* is_fast_mem, + absl::optional* result) { + if (neighbor_node.attrs().Find(TPU_FAST_MEM_ATTR) != nullptr) { + *is_fast_mem = true; + VLOG(2) << "place " << neighbor_node.name() << " on fast memory because " + << arg_node_name << " has " << TPU_FAST_MEM_ATTR << " attribute"; + } + + // XlaSharding information may be encoded on node directly connected to the + // argument node. + TF_ASSIGN_OR_RETURN( + absl::optional sharding, + ParseInputShardingFromAdjacentNode(num_cores_per_replica, neighbor_node)); + if (sharding.has_value()) { + TF_RETURN_IF_ERROR(ParseAndValidateSharding( + *sharding, num_cores_per_replica, inferred_core_id, result)); + return Status::OK(); + } + + // When we use variable in TPU computation, we always have a + // XlaSharding op followed by a ReadVariableOp. As so, correctly parse + // the users of ReadVariableOp for potential sharding configuration. + if (neighbor_node.type_string() == "ReadVariableOp") { + for (const Edge* e : neighbor_node.out_edges()) { + if (e->IsControlEdge()) continue; + + if (e->dst()->attrs().Find(TPU_FAST_MEM_ATTR) != nullptr) { + *is_fast_mem = true; + VLOG(2) << "place " << arg_node_name << " on fast memory because " + << e->dst()->name() << TPU_FAST_MEM_ATTR << " attribute"; + } + + TF_ASSIGN_OR_RETURN( + absl::optional sharding, + ParseInputShardingFromAdjacentNode(num_cores_per_replica, *e->dst())); + if (sharding.has_value()) { + TF_RETURN_IF_ERROR(ParseAndValidateSharding( + *sharding, num_cores_per_replica, inferred_core_id, result)); + return Status::OK(); + } + } + } + return Status::OK(); +} + +} // namespace + +// Inputs: +// replication_spec_string: the device to which the TPUReplicate node was +// assigned. +// device_set: the set of TF devices. +// Outputs: +// tpu_compilation_device: the name of the TPU compilation device. +// num_tpus_per_task: the number of TPUs in each task. Verifies that all tasks +// have the same number of TPU devices. +// tpu_devices: the TPU devices, indexed by [task][device]. +static Status GetTPUDeviceNames( + const string& replication_spec_string, const DeviceSet& device_set, + string* tpu_compilation_device, int* num_tpus_per_task, + std::vector>* tpu_devices) { + // TODO(b/110910013) GetSystemDevice parses the spec and returns the name of + // the tpu_system device, which we replace by the cpu device. We do this + // replacement because we want to place the TPUCompileOp (and the compile + // assert op) explicitly on cpu devices on the same job as the tpu_system + // device. + DeviceNameUtils::ParsedName replication_spec; + Device* replication_device; + TF_RETURN_IF_ERROR(DistributedTPURewriteHelpers::GetSystemDevice( + replication_spec_string, device_set, &replication_spec, + &replication_device)); + *tpu_compilation_device = + str_util::StringReplace(replication_device->name(), DEVICE_TPU_SYSTEM, + DEVICE_CPU, /*replace_all=*/true); + + // Finds the set of TPU devices attached to the tasks in the job. + TF_RETURN_IF_ERROR(DistributedTPURewriteHelpers::GetTPUDevices( + replication_spec, device_set, num_tpus_per_task, tpu_devices)); + + return Status::OK(); +} + +// Parses the topology attribute of TPUReplicate, and populates *topology with +// a physical mesh coordinate to (task, device) mapping. +static Status ParseTopologyAttr(const string& topology_attr, + const tpu::TpuTopologyExternal& tpu_topology, + int num_tasks, int num_tpus_per_task, + xla::Array4D>* topology) { + static_assert(4 == kTPUTopologyRank, "Assumes the topology rank is 4"); + tpu::TopologyProto proto; + proto.ParseFromString(topology_attr); + if (proto.mesh_shape_size() != kTPUTopologyRank) { + return errors::InvalidArgument("TPU topology must be rank ", + kTPUTopologyRank); + } + if (proto.num_tasks() != num_tasks) { + return errors::InvalidArgument("Mismatched number of TPU tasks"); + } + if (proto.num_tpu_devices_per_task() != num_tpus_per_task) { + return errors::InvalidArgument("Mismatched number of TPUs per task (", + proto.num_tpu_devices_per_task(), + " != ", num_tpus_per_task, ")."); + } + if (proto.device_coordinates_size() != + num_tasks * num_tpus_per_task * kTPUTopologyRank) { + return errors::InvalidArgument( + "device coordinates should be ", num_tasks, "x", num_tpus_per_task, "x", + kTPUTopologyRank, "; got ", proto.device_coordinates_size()); + } + + int devices_per_chip = tpu_topology.LogicalDevicesPerChip(kTensorCore); + *topology = xla::Array4D>( + tpu_topology.chip_bounds().x, tpu_topology.chip_bounds().y, + tpu_topology.chip_bounds().z, devices_per_chip, {-1, -1}); + int pos = 0; + for (int task = 0; task < num_tasks; ++task) { + for (int device = 0; device < num_tpus_per_task; ++device) { + int32 x = proto.device_coordinates(pos++); + int32 y = proto.device_coordinates(pos++); + int32 z = proto.device_coordinates(pos++); + int32 core = proto.device_coordinates(pos++); + + if (!tpu_topology.HasChip(x, y, z) || core < 0 || + core >= devices_per_chip) { + return errors::InvalidArgument( + "Mesh coordinates (", x, ",", y, ",", z, ",", core, + ") are not valid for the current TPU topology"); + } + if ((*topology)(x, y, z, core).first != -1) { + return errors::InvalidArgument("Duplicate coordinates (", x, ",", y, + ",", z, ",", core, ") in TPU topology"); + } + (*topology)(x, y, z, core) = {task, device}; + } + } + return Status::OK(); +} + +// Parses the value of the device_assignment attribute to TPUReplicate. +// Populates *device_assignment; *device_assignment must be a 2D array with +// shape (num_replicas, num_cores_per_replica). +static Status ParseDeviceAssignmentAttr( + absl::Span device_assignment_attr, + const tpu::TpuTopologyExternal& tpu_topology, int num_replicas, + int num_cores_per_replica, + xla::Array2D* device_assignment) { + static_assert(4 == kTPUTopologyRank, "Assumes the topology rank is 4"); + + const int64 device_assignment_attr_size = + num_replicas * num_cores_per_replica * kTPUTopologyRank; + if (device_assignment_attr.size() != device_assignment_attr_size) { + return errors::InvalidArgument( + "Length of device_assignment attribute must be equal to num_replicas (", + num_replicas, ") * num_cores_per_replica (", num_cores_per_replica, + ") * ", kTPUTopologyRank, " got ", device_assignment_attr.size()); + } + for (int core : device_assignment_attr) { + if (core < 0 || core >= kTPUMaxTopologySize) { + return errors::InvalidArgument( + "Invalid core number in device assignment: ", core); + } + } + + *device_assignment = xla::Array2D( + num_replicas, num_cores_per_replica); + int devices_per_chip = tpu_topology.LogicalDevicesPerChip(kTensorCore); + xla::Array4D replica_assignment( + tpu_topology.chip_bounds().x, tpu_topology.chip_bounds().y, + tpu_topology.chip_bounds().z, devices_per_chip, -1); + int pos = 0; + for (int replica = 0; replica < num_replicas; ++replica) { + for (int logical_core = 0; logical_core < num_cores_per_replica; + ++logical_core) { + int32 x = device_assignment_attr[pos++]; + int32 y = device_assignment_attr[pos++]; + int32 z = device_assignment_attr[pos++]; + int32 core = device_assignment_attr[pos++]; + + if (!tpu_topology.HasChip(x, y, z) || core < 0 || + core >= devices_per_chip) { + return errors::InvalidArgument( + "Mesh coordinates (", x, ",", y, ",", core, + ") are not valid for the current TPU topology"); + } + tpu::TpuCoreLocationExternal core_location = + tpu_topology.Core(x, y, z, kTensorCore, core); + + if (replica_assignment(x, y, z, core) != -1) { + return errors::InvalidArgument("Duplicate coordinates (", x, ",", y, + ",", z, ",", core, + ") in TPU device assignment"); + } + replica_assignment(x, y, z, core) = replica; + (*device_assignment)(replica, logical_core) = core_location; + } + } + return Status::OK(); +} + +// Builds TensorFlow device assignments for the special case of a single core +// computation that is replicated to every core in the mesh. +// LINT.IfChange +static Status BuildFullMeshDeviceAssignment( + int num_replicas, const std::vector>& tpu_devices, + int num_tasks, int num_tpus_per_task, + std::vector>* tf_device_assignment) { + // Assign TensorFlow devices to replicas arbitrarily. + for (int i = 0; i < num_replicas; ++i) { + int task = i / num_tpus_per_task; + int device = i % num_tpus_per_task; + TF_RET_CHECK(task >= 0 && task < num_tasks); + TF_RET_CHECK(device >= 0 && device < num_tpus_per_task); + + // We don't actually know which TF device corresponds to which physical + // device, but it doesn't matter—they're all identical. + (*tf_device_assignment)[i] = {tpu_devices[task][device]->name()}; + } + return Status::OK(); +} +// LINT.ThenChange(//tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc) + +// Builds TensorFlow device assignments for a replicated computation and convert +// device_assignment into xla_device_assignment. +static Status BuildGeneralDeviceAssignment( + int num_replicas, int num_cores_per_replica, + const std::vector>& tpu_devices, + const xla::Array2D& device_assignment, + const xla::Array4D>& topology, + std::vector>* tf_device_assignment, + std::unique_ptr* xla_device_assignment) { + // Assign TensorFlow devices to each computation's replicas according to + // device_assignment and 'topology'. + *xla_device_assignment = absl::make_unique( + num_replicas, num_cores_per_replica); + for (int replica = 0; replica < num_replicas; ++replica) { + for (int computation = 0; computation < num_cores_per_replica; + ++computation) { + const tpu::TpuCoreLocationExternal& core_location = + device_assignment(replica, computation); + + int task; + int device; + std::tie(task, device) = + topology(core_location.chip_coordinates().x, + core_location.chip_coordinates().y, + core_location.chip_coordinates().z, core_location.index()); + + CHECK_LT(computation, num_cores_per_replica); + (**xla_device_assignment)(replica, computation) = core_location.Id(); + + // The communication pattern between replicas will be determined later by + // BuildAllReduceRing. + TF_RET_CHECK(task >= 0 && task < tpu_devices.size()); + TF_RET_CHECK(device >= 0 && device < tpu_devices[task].size()); + (*tf_device_assignment)[replica].push_back( + tpu_devices[task][device]->name()); + } + } + return Status::OK(); +} + +/*static*/ Status DistributedTPURewritePass::BuildDeviceAssignment( + const tpu::TpuTopologyExternal& tpu_topology, int num_tpus_per_task, + const std::vector>& tpu_devices, int num_replicas, + int num_cores_per_replica, const string& topology_attr, + absl::Span device_assignment_attr, + std::vector>* tf_device_assignment, + std::unique_ptr* xla_device_assignment) { + const int num_tasks = tpu_devices.size(); + const int num_tpu_devices = num_tasks * num_tpus_per_task; + VLOG(2) << "num_tasks=" << num_tasks + << " num_tpus_per_task=" << num_tpus_per_task; + + // Checks num_replicas is sane first to avoid integer overflow. + if (num_replicas > num_tpu_devices) { +#ifdef PLATFORM_CLOUD_TPU + return errors::InvalidArgument("Requested num_replicas=", num_replicas, + " but there are only ", num_tpu_devices, + " cores in the TPU topology."); +#else + return errors::InvalidArgument("Requested num_replicas=", num_replicas, + " but there are only ", num_tpu_devices, + " cores in the TPU topology."); +#endif + } + if (num_replicas * num_cores_per_replica > num_tpu_devices) { + return errors::InvalidArgument( + "Requested num_replicas=", num_replicas, " with ", + num_cores_per_replica, " cores per replica, but there are only ", + num_tpu_devices, " cores in the TPU topology"); + } + + tf_device_assignment->clear(); + tf_device_assignment->resize(num_replicas); + + // Special case: we allow the user to omit the topology and device assignment + // information in two cases: + // * there is only one replica and one core per replica. In this case, we + // don't need to know topology information because we don't communicate with + // other cores. + // * the number of replicas is equal to the number of cores in the slice. In + // this case, all cores are running the same program so we don't need to + // know which is which. + if (topology_attr.empty()) { + // LINT.IfChange + if (num_replicas != 1 && num_replicas != num_tpu_devices) { + return errors::InvalidArgument( + "TPUReplicate asked to create ", num_replicas, + " replicas, but the number of cores in the TPU topology is ", + num_tpu_devices, + " and no TPU device assignment was supplied. " + "A TPU device assignment is required if the number of replicas is " + "not 1 or the number of cores in the topology (", + num_tpu_devices, ")"); + } + + if (num_cores_per_replica != 1) { + return errors::InvalidArgument( + "A TPU topology must be provided if num_cores_per_replica != 1"); + } + + if (!device_assignment_attr.empty()) { + return errors::InvalidArgument( + "A TPU topology must be provided if device_assignment_attr is " + "non-empty"); + } + + // If there is only one replica, assign the Tensorflow computation to task 0 + // device 0, and leave the XLA device assignment empty. We don't know which + // core this is in the TPU topology, but it doesn't matter—we don't need to + // communicate with any other cores. + if (num_replicas == 1) { + (*tf_device_assignment)[0] = {tpu_devices[0][0]->name()}; + return Status::OK(); + } + + // Otherwise, num_replicas is equal to the number of cores, and we build a + // device assignment that covers the entire mesh. We do not need to know + // the topology to do so because all cores are identical. + return BuildFullMeshDeviceAssignment(num_replicas, tpu_devices, num_tasks, + num_tpus_per_task, + tf_device_assignment); + // LINT.ThenChange(//tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc) + } + + // Array that maps mesh coordinates to {TF task, TF TPU device #} pairs. + xla::Array4D> topology; + TF_RETURN_IF_ERROR(ParseTopologyAttr(topology_attr, tpu_topology, num_tasks, + num_tpus_per_task, &topology)); + + // Array that maps logical (replica, core) pairs to physical mesh coordinates. + xla::Array2D device_assignment; + TF_RETURN_IF_ERROR(ParseDeviceAssignmentAttr( + device_assignment_attr, tpu_topology, num_replicas, num_cores_per_replica, + &device_assignment)); + + return BuildGeneralDeviceAssignment( + num_replicas, num_cores_per_replica, tpu_devices, device_assignment, + topology, tf_device_assignment, xla_device_assignment); +} + +Status DistributedTPURewritePass::GetComputationForTPUReplicateOp( + const NameAttrList& function, FunctionLibraryRuntime* flr, + Graph* computation, DataTypeVector* arg_types, + DataTypeVector* retval_types) { + FunctionLibraryRuntime::Handle handle; + + TF_RETURN_IF_ERROR( + flr->Instantiate(function.name(), AttrSlice(&function.attr()), &handle)); + + const FunctionBody* fbody = flr->GetFunctionBody(handle); + + CopyGraph(*fbody->graph, computation); + *arg_types = fbody->arg_types; + *retval_types = fbody->ret_types; + return Status::OK(); +} + +// Grab the InferredShape corresponding to an edge input. +static Status GetEdgeShape(const GraphShapeInfo& shape_info, const Edge& edge, + const InferredShape** info) { + auto it = shape_info.find(edge.src()->name()); + if (it == shape_info.end()) { + return errors::InvalidArgument( + "Input to replicated TPU computation is missing InferredShape: ", + edge.src()->name()); + } + TF_RET_CHECK(it->second.size() > edge.src_output()); + *info = &it->second[edge.src_output()]; + return Status::OK(); +} + +Status DistributedTPURewritePass::GetArgAndRetvalShapes( + const GraphShapeInfo& shape_info, const Node& node, + const ParameterInfo& params_info, std::vector* arg_shapes, + std::vector* retval_shapes) { + std::vector input_edges; + TF_RETURN_IF_ERROR(node.input_edges(&input_edges)); + + // If any replica's arg shape is unknown, we will mark the computation's arg + // shape as being unknown. If the shapes differ the TpuExecute Op will raise a + // runtime error. + std::vector any_replica_shape_unknown( + params_info.NumInputsToEachReplica()); + arg_shapes->clear(); + arg_shapes->resize(params_info.NumInputsToEachReplica()); + TF_RET_CHECK(input_edges.size() == params_info.NumInputsFromHost()); + // Determines the shapes of the per-replica arguments and checks that all + // replicas have identical shapes. + int64 edge_pos = 0; + auto check_shape = [&](int input_index) -> Status { + const InferredShape* info; + TF_RETURN_IF_ERROR(GetEdgeShape(shape_info, *input_edges[edge_pos], &info)); + ++edge_pos; + + if ((info->handle_type == DT_INVALID && !info->shape.IsFullyDefined()) || + (info->handle_type != DT_INVALID && + !info->handle_shape.IsFullyDefined())) { + any_replica_shape_unknown[input_index] = true; + } + xla::StatusOr status = + MergeInferredShapes((*arg_shapes)[input_index], *info); + if (!status.ok()) { + return errors::InvalidArgument( + "Mismatched shapes for input ", input_index, ": ", + (*arg_shapes)[input_index].shape.DebugString(), " vs. ", + info->shape.DebugString()); + } + (*arg_shapes)[input_index] = status.ValueOrDie(); + return Status::OK(); + }; + + for (int64 i = 0; i < params_info.NumReplicas(); ++i) { + for (int64 j = 0; j < params_info.NumPerReplicaArgs(); ++j) { + TF_RETURN_IF_ERROR(check_shape(j)); + } + } + + for (int64 i = 0; i < params_info.NumDistributedArgs(); ++i) { + TF_RETURN_IF_ERROR(check_shape(params_info.NumPerReplicaArgs() + i)); + } + + for (int64 i = 0; + i < params_info.NumPerReplicaArgs() + params_info.NumDistributedArgs(); + ++i) { + if (any_replica_shape_unknown[i]) { + (*arg_shapes)[i].shape = PartialTensorShape(); + (*arg_shapes)[i].handle_shape = PartialTensorShape(); + } + } + + // Determines the shape of the broadcast arguments. + for (int64 i = 0; i < params_info.NumBroadcastArgs(); ++i) { + TF_RET_CHECK(node.input_type(edge_pos) != DT_RESOURCE); + const InferredShape* info; + TF_RETURN_IF_ERROR(GetEdgeShape(shape_info, *input_edges[edge_pos], &info)); + (*arg_shapes)[i + params_info.NumPerReplicaArgs() + + params_info.NumDistributedArgs()] + .shape = info->shape; + ++edge_pos; + } + + // Determines the handle shape and handle type of the resource variable + // arguments. + for (int64 i = 0; i < params_info.NumVariables(); ++i) { + TF_RET_CHECK(node.input_type(edge_pos) == DT_RESOURCE); + const InferredShape* info; + TF_RETURN_IF_ERROR(GetEdgeShape(shape_info, *input_edges[edge_pos], &info)); + InferredShape& arg_shape = + (*arg_shapes)[i + params_info.NumPerReplicaArgs() + + params_info.NumDistributedArgs() + + params_info.NumBroadcastArgs()]; + arg_shape.shape = TensorShape(); // Variables are always scalars. + arg_shape.handle_shape = info->handle_shape; + arg_shape.handle_type = info->handle_type; + TF_RET_CHECK(arg_shape.handle_type != DT_INVALID); + ++edge_pos; + } + + // Determines the shape of the guaranteed constants. + // TODO(vinuraja): Can be removed because they are not required for any + // calculations. Leaving them here for symmetry with other structures like + // arg_types, arg_sharding, etc. + for (int64 i = 0; i < params_info.NumGuaranteedConstants(); ++i) { + TF_RET_CHECK(node.input_type(edge_pos) != DT_RESOURCE); + const InferredShape* info; + TF_RETURN_IF_ERROR(GetEdgeShape(shape_info, *input_edges[edge_pos], &info)); + (*arg_shapes)[i + params_info.NumPerReplicaArgs() + + params_info.NumDistributedArgs() + + params_info.NumBroadcastArgs() + params_info.NumVariables()] + .shape = info->shape; + ++edge_pos; + } + + // Extract the return value shapes. + auto it = shape_info.find(node.name()); + retval_shapes->clear(); + if (it != shape_info.end()) { + TF_RET_CHECK(it->second.size() >= node.num_outputs()); + retval_shapes->resize(node.num_outputs()); + for (int i = 0; i < node.num_outputs(); ++i) { + (*retval_shapes)[i].shape = it->second[i].shape; + } + } else if (node.num_outputs() > 0) { + return errors::InvalidArgument( + "Replicated TPU computation is missing InferredShape: ", + FormatNodeForError(node)); + } + return Status::OK(); +} + +// Verifies that all nodes have legal sharding. +static Status ValidateCoreNumbers(const Graph& graph, + int num_cores_per_replica) { + for (Node* n : graph.nodes()) { + TF_ASSIGN_OR_RETURN(absl::optional sharding, + ParseShardingFromDevice(*n, num_cores_per_replica)); + } + return Status::OK(); +} + +static Status InferXlaShardingFromNeighbors( + const Node& n, int num_cores_per_replica, FunctionLibraryRuntime* flr, + CachedFunctionHandles* cached_function_handles, + absl::optional* output_sharding, bool* is_fast_mem) { + int64 core = -1; + absl::optional result; + // We assume the variable has been allocated on fast memory if any consuming + // op has TPU_FAST_MEM_ATTR attribute. This is a protocol between runtime and + // compiler. + *is_fast_mem = false; + for (const Edge* edge : n.out_edges()) { + if (edge->IsControlEdge()) continue; + + TF_RETURN_IF_ERROR(ParseAndValidateShardingFromNeighbors( + num_cores_per_replica, n.name(), *edge->dst(), &core, is_fast_mem, + &result)); + + if (!flr) continue; + + // The nodes deciding this arg's device assignment might be in + // FunctionDef. Instantiate FunctionDefs associated with this node + // and check nodes using this arg. + std::function parse_sharding_from_function = + [&](const Edge* call_edge) { + auto associated_functions = GetAssociatedFunctions( + *call_edge->dst(), flr->GetFunctionLibraryDefinition()); + for (auto& associated_function : associated_functions) { + FunctionLibraryRuntime::Handle handle; + TF_RETURN_IF_ERROR(cached_function_handles->GetOrInstantiate( + associated_function.func_name(), + AttrSlice(&associated_function.attrs()), &handle)); + const FunctionBody* body = flr->GetFunctionBody(handle); + Graph* g = body->graph; + + for (Node* body_node : g->nodes()) { + if (!body_node->IsArg()) continue; + + int index; + TF_RETURN_IF_ERROR( + GetNodeAttr(body_node->attrs(), "index", &index)); + if (index != call_edge->dst_input()) continue; + + for (const Edge* out_edge : body_node->out_edges()) { + if (out_edge->IsControlEdge()) continue; + + TF_RETURN_IF_ERROR(ParseAndValidateShardingFromNeighbors( + num_cores_per_replica, n.name(), *out_edge->dst(), &core, + is_fast_mem, &result)); + + TF_RETURN_IF_ERROR(parse_sharding_from_function(out_edge)); + } + } + } + return Status::OK(); + }; + TF_RETURN_IF_ERROR(parse_sharding_from_function(edge)); + } + *output_sharding = result; + return Status::OK(); +} + +bool UseSpmdForXlaPartitioning(const Node* replicate_node) { + bool spmd_attr; + if (!replicate_node || + !TryGetNodeAttr(replicate_node->attrs(), "use_spmd_for_xla_partitioning", + &spmd_attr)) { + spmd_attr = false; + } + return spmd_attr; +} + +Status DistributedTPURewritePass::AssignArgsAndRetvalsToCores( + int num_cores_per_replica, const ParameterInfo& params_info, + const DataTypeVector& arg_types, + const std::vector& arg_shapes, + const DataTypeVector& retval_types, + const std::vector& retval_shapes, const Graph& graph, + const Node* replicate_node, FunctionLibraryRuntime* flr, + std::vector* arg_sharding, std::vector* arg_fast_mem, + std::vector* retval_sharding) { + // Builds vectors of the argument and return nodes. + std::vector args(arg_types.size()); + std::vector retvals(retval_types.size()); + absl::flat_hash_map partitioned_output_nodes; + for (Node* node : graph.op_nodes()) { + if (node->IsArg()) { + int index; + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "index", &index)); + TF_RET_CHECK(index >= 0 && index < args.size()); + args[index] = node; + } else if (node->IsRetval()) { + int index; + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "index", &index)); + TF_RET_CHECK(index >= 0 && index < retvals.size()); + retvals[index] = node; + } + } + for (const Edge* edge : replicate_node->out_edges()) { + int num_partitioned_outputs = 0; + for (const Edge* out_edge : edge->dst()->out_edges()) { + if (out_edge->dst()->type_string() == kTPUPartitionedOutput) { + partitioned_output_nodes[edge->src_output()] = out_edge->dst(); + num_partitioned_outputs++; + } + } + if (num_partitioned_outputs > 1) { + return errors::InvalidArgument( + "More than one TPUPartitionedOutput per replciated output."); + } + } + + // Verifies there are no missing arguments/return values. + for (int i = 0; i < args.size(); ++i) { + if (args[i] == nullptr) { + return errors::Internal("Missing function argument: ", i); + } + } + for (int i = 0; i < retvals.size(); ++i) { + if (retvals[i] == nullptr) { + return errors::Internal("Missing function return value: ", i); + } + } + + // Assigns a core to each _Arg. Chooses the lowest-numbered core that + // consumes the argument. We choose the lowest-numbered core so the + // assignment is deterministic. + TensorDevicePlacer args_device_selector(num_cores_per_replica, arg_types, + arg_shapes); + arg_sharding->resize(args.size()); + arg_fast_mem->resize(args.size()); + CachedFunctionHandles cached_function_handles(flr); + const bool use_spmd = UseSpmdForXlaPartitioning(replicate_node) || + replicate_inputs_outputs_by_default_for_xla_spmd_; + for (int i = 0; i < args.size(); ++i) { + const Node* n = args[i]; + absl::optional assigned_core; + absl::optional sharding; + bool is_fast_mem; + TF_RETURN_IF_ERROR(InferXlaShardingFromNeighbors( + *n, num_cores_per_replica, flr, &cached_function_handles, &sharding, + &is_fast_mem)); + + if (params_info.IsPerReplicaArg(i) || params_info.IsDistributedArg(i)) { + Node* input_node; + TF_RETURN_IF_ERROR(replicate_node->input_node(i, &input_node)); + if (input_node->type_string() == kTPUPartitionedInput) { + TF_ASSIGN_OR_RETURN(absl::optional parsed_sharding, + GetShardingFromNodeDef(input_node->def())); + if (!parsed_sharding.has_value()) + return errors::InvalidArgument("Missing _XlaSharding attr from: ", + input_node->DebugString()); + sharding = parsed_sharding; + VLOG(1) << "Arg " << i << " parsed sharding information from " + << input_node->name() << " : " + << parsed_sharding->DebugString(); + } + } + + if (sharding.has_value() && enable_automatic_model_parallelism_) { + return tensorflow::errors::InvalidArgument( + "Specifying manual sharding is not allowed when automatic " + "model parallelism is enabled.", + sharding->DebugString()); + } + + if (!sharding.has_value()) { + if (use_spmd && + (params_info.IsVariableArg(i) || params_info.IsBroadcastArg(i) || + ((params_info.IsPerReplicaArg(i) || + params_info.IsDistributedArg(i)) && + arg_types[i] != DT_RESOURCE))) { + // Use replication for host variables or non-variable per-replica + // inputs. + sharding = xla::sharding_builder::Replicate(); + } else { + // TODO(dlibenzi): Distributing variables to cores other than 0 makes + // learning/brain/research/babelfish/trainer:trainer_tpu_test fail. + // For now distribute only per replica arguments, unless + // tf_jf_distribute_vars is set, to allow debugging the issue. + if (((params_info.IsPerReplicaArg(i) || + params_info.IsDistributedArg(i)) && + arg_types[i] != DT_RESOURCE) || + (distribute_vars_ && params_info.IsVariableArg(i))) { + assigned_core = args_device_selector.RetrieveAssignment(i); + } else { + assigned_core = 0; + } + sharding = xla::sharding_builder::AssignDevice(*assigned_core); + } + } else if (sharding->type() == xla::OpSharding::MAXIMAL) { + assigned_core = sharding->tile_assignment_devices(0); + } else if (sharding->type() != xla::OpSharding::REPLICATED && + sharding->type() != xla::OpSharding::OTHER) { + return tensorflow::errors::InvalidArgument( + "Unsupported argument sharding: ", sharding->DebugString()); + } + if (assigned_core.has_value()) { + args_device_selector.ReportDeviceAssigned(*assigned_core, i); + VLOG(3) << "Assigning argument " << i << " (" << n->DebugString() + << ") to core " << *assigned_core; + args[i]->set_assigned_device_name(CoreDeviceLabel(*assigned_core)); + } else if (sharding->type() == xla::OpSharding::OTHER) { + for (int64 core : sharding->tile_assignment_devices()) { + args_device_selector.ReportDeviceAssigned(core, i); + VLOG(3) << "Assigning argument " << i << " (" << n->DebugString() + << ") with tiled sharding to core " << core; + } + } else { + CHECK_EQ(sharding->type(), xla::OpSharding::REPLICATED); + for (int64 core = 0; core < num_cores_per_replica; ++core) { + args_device_selector.ReportDeviceAssigned(core, i); + } + VLOG(3) << "Assigning argument " << i << " (" << n->DebugString() + << ") to all cores"; + } + (*arg_sharding)[i] = *sharding; + (*arg_fast_mem)[i] = is_fast_mem; + if (is_fast_mem) { + VLOG(3) << "Add " << TPU_FAST_MEM_ATTR << " attribute to " + << args[i]->name(); + } + args[i]->AddAttr(kShardingAttribute, sharding->SerializeAsString()); + } + TF_RETURN_IF_ERROR(cached_function_handles.ReleaseAllHandles()); + + // Assigns each _Retval node to the core that produces its value. + TensorDevicePlacer retvals_device_selector(num_cores_per_replica, + retval_types, retval_shapes); + retval_sharding->resize(retvals.size()); + for (int i = 0; i < retvals.size(); ++i) { + const Edge* edge; + TF_RETURN_IF_ERROR(retvals[i]->input_edge(0, &edge)); + + TF_ASSIGN_OR_RETURN( + absl::optional sharding, + ParseShardingFromDevice(*edge->src(), num_cores_per_replica)); + + if (partitioned_output_nodes.contains(i)) { + Node* output_node = partitioned_output_nodes[i]; + TF_ASSIGN_OR_RETURN(absl::optional parsed_sharding, + GetShardingFromNodeDef(output_node->def())); + if (parsed_sharding.has_value()) { + sharding = parsed_sharding; + VLOG(1) << "Retval " << i << " parsed sharding information from " + << output_node->name() << " : " << sharding->DebugString(); + } + } + absl::optional assigned_core; + if (sharding.has_value()) { + if (enable_automatic_model_parallelism_) { + return tensorflow::errors::InvalidArgument( + "Specifying manual sharding is not allowed when automatic " + "model parallelism is enabled.", + sharding->DebugString()); + } + + if (sharding.value().type() == xla::OpSharding::MAXIMAL) { + assigned_core = sharding.value().tile_assignment_devices(0); + TF_RETURN_IF_ERROR( + ValidateCoreNumber(*assigned_core, num_cores_per_replica)); + } else if (sharding.value().type() != xla::OpSharding::REPLICATED && + sharding.value().type() != xla::OpSharding::OTHER) { + return tensorflow::errors::InvalidArgument( + "Unsupported argument sharding: ", sharding->DebugString()); + } + } else { + if (use_spmd) { + sharding = xla::sharding_builder::Replicate(); + } else { + if (distribute_vars_) { + assigned_core = retvals_device_selector.RetrieveAssignment(i); + } else { + assigned_core = 0; + } + sharding = xla::sharding_builder::AssignDevice(*assigned_core); + } + } + if (assigned_core.has_value()) { + retvals[i]->set_assigned_device_name(CoreDeviceLabel(*assigned_core)); + retvals_device_selector.ReportDeviceAssigned(*assigned_core, i); + VLOG(3) << "Assigning return value " << i << " (" + << retvals[i]->DebugString() << ") to core " << *assigned_core; + } else if (sharding->type() == xla::OpSharding::OTHER) { + for (int64 core : sharding->tile_assignment_devices()) { + retvals_device_selector.ReportDeviceAssigned(core, i); + VLOG(3) << "Assigning return value " << i << " (" + << retvals[i]->DebugString() << ") with tiled sharding to core " + << core; + } + } else { + CHECK_EQ(sharding->type(), xla::OpSharding::REPLICATED); + for (int64 core = 0; core < num_cores_per_replica; ++core) { + retvals_device_selector.ReportDeviceAssigned(core, i); + } + VLOG(3) << "Assigning return value " << i << " (" + << retvals[i]->DebugString() << ") to all cores."; + } + retvals[i]->AddAttr(kShardingAttribute, sharding->SerializeAsString()); + (*retval_sharding)[i] = *sharding; + } + return Status::OK(); +} + +// Builds Shape nodes that compute the shapes of arguments whose shapes are not +// statically known. +/* static */ Status DistributedTPURewritePass::BuildDynamicShapeNodes( + const Node& replicate_node, const std::vector& arg_shapes, + const ParameterInfo& params_info, const std::vector& variable_reads, + Graph* graph, std::vector* dynamic_shape_nodes) { + dynamic_shape_nodes->clear(); + + std::vector replicate_input_edges; + TF_RETURN_IF_ERROR(replicate_node.input_edges(&replicate_input_edges)); + + // The compiler determines the shape of each constant by inspecting the value + // of its corresponding host-memory tensor; this happens when a step is run. + // As a result, the shapes of constants are not needed at graph rewrite time. + const int num_args = arg_shapes.size() - params_info.NumGuaranteedConstants(); + TF_RET_CHECK(num_args == params_info.NumPerReplicaArgs() + + params_info.NumDistributedArgs() + + params_info.NumBroadcastArgs() + + params_info.NumVariables()); + + for (int i = 0; i < num_args; ++i) { + const PartialTensorShape* shape = arg_shapes[i].handle_type == DT_INVALID + ? &arg_shapes[i].shape + : &arg_shapes[i].handle_shape; + if (!shape->IsFullyDefined()) { + Node* src; + int src_output; + if (params_info.IsPerReplicaArg(i)) { + TF_RET_CHECK(i < replicate_input_edges.size()); + // All replicas must have the same input shapes. Uses the shape of the + // inputs from the first replica. + src = replicate_input_edges[i]->src(); + src_output = replicate_input_edges[i]->src_output(); + } else if (params_info.IsDistributedArg(i) || + params_info.IsBroadcastArg(i)) { + int64 input_num = + params_info.NumPerReplicaArgs() * params_info.NumReplicas() + i - + params_info.NumPerReplicaArgs(); + TF_RET_CHECK(0 <= input_num && + input_num < replicate_input_edges.size()); + src = replicate_input_edges[input_num]->src(); + src_output = replicate_input_edges[input_num]->src_output(); + } else { + int64 var_num = i - params_info.NumPerReplicaArgs() - + params_info.NumDistributedArgs() - + params_info.NumBroadcastArgs(); + TF_RET_CHECK(0 <= var_num && var_num < variable_reads.size()); + src = variable_reads[var_num]; + src_output = 0; + } + + NodeDef def; + def.set_name(graph->NewName(strings::StrCat(src->name(), "/shape"))); + def.set_op("Shape"); + def.set_device(src->assigned_device_name()); + AddNodeAttr("T", src->output_type(src_output), &def); + AddNodeAttr("out_type", DT_INT64, &def); + MergeDebugInfo(NodeDebugInfo(replicate_node.def()), &def); + + Status status; + Node* shape_node = graph->AddNode(def, &status); + if (!status.ok()) return status; + dynamic_shape_nodes->push_back(shape_node); + + shape_node->set_assigned_device_name(src->assigned_device_name()); + graph->AddEdge(src, src_output, shape_node, 0); + } + } + return Status::OK(); +} + +// Builds a TPUCompile node that compiles the bodies of the function call +// `nodes`. +Status DistributedTPURewritePass::BuildCompileNode( + const Node* replicate_node, const NameAttrList& function, + uint64 library_fingerprint, const ParameterInfo& params_info, + const std::vector& arg_shapes, + const DataTypeVector& arg_types, + const std::vector& guaranteed_constant_nodes, + const string& session_handle, + const std::vector& arg_sharding, + const std::vector& arg_fast_mem, + const std::vector& retval_sharding, + int num_cores_per_replica, const string& compile_device, + const xla::DeviceAssignment* xla_device_assignment, + const std::vector& dynamic_shape_nodes, Graph* graph, + Node** compile_node, int64 autotuner_thresh) { + VLOG(1) << "BuildCompileNode"; + + tpu::TPUCompileMetadataProto proto; + proto.set_num_replicas(params_info.NumReplicas()); + proto.set_num_cores_per_replica(num_cores_per_replica); + proto.set_function_library_fingerprint(library_fingerprint); + proto.set_enable_automatic_model_parallelism( + enable_cross_replica_sharding_mirrored_variables_); + const bool use_spmd = UseSpmdForXlaPartitioning(replicate_node); + proto.set_use_spmd_for_xla_partitioning(use_spmd); + + // Get and fill padding map. + if (replicate_node != nullptr) { + TF_RETURN_IF_ERROR( + FillPaddingMap(*replicate_node, proto.mutable_padding_maps())); + xla::DebugOptions::StepMarkerLocation location; + TF_RETURN_IF_ERROR(GetStepMarkerLocation(*replicate_node, &location)); + proto.set_step_marker_location(location); + } + + if (xla_device_assignment != nullptr) { + TF_RETURN_IF_ERROR( + xla_device_assignment->Serialize(proto.mutable_device_assignment())); + } + + const int num_args = arg_types.size(); + const int num_guaranteed_constants = guaranteed_constant_nodes.size(); + const int guaranteed_const_start_index = num_args - num_guaranteed_constants; + TF_RET_CHECK(num_args == arg_shapes.size()); + TF_RET_CHECK(num_args == arg_sharding.size()) + << num_args << " != " << arg_sharding.size(); + + for (int i = 0; i < num_args; ++i) { + tpu::TPUCompileMetadataProto::Arg* arg = proto.add_args(); + DataType type = arg_types[i]; + const InferredShape& arg_shape = arg_shapes[i]; + if (type == DT_RESOURCE) { + TF_RET_CHECK(arg_shape.handle_type != DT_INVALID) << i; + arg->set_dtype(arg_shape.handle_type); + arg_shape.handle_shape.AsProto(arg->mutable_shape()); + arg->set_kind(tpu::TPUCompileMetadataProto::Arg::VARIABLE); + arg->set_fast_mem(arg_fast_mem[i]); + } else { + arg->set_dtype(type); + arg_shape.shape.AsProto(arg->mutable_shape()); + if (i >= guaranteed_const_start_index) { + const DataType edge_type = + guaranteed_constant_nodes[i - guaranteed_const_start_index] + ->output_type(0); + TF_RET_CHECK(type == edge_type) + << "Arg type: " << type << " but edge type: " << edge_type; + arg->set_kind(tpu::TPUCompileMetadataProto::Arg::GUARANTEED_CONSTANT); + } else { + arg->set_kind(tpu::TPUCompileMetadataProto::Arg::PARAMETER); + } + } + // As long as the argument is not a per-replica one, it should have the same + // value for all replicas. For clarity, we keep the (redundant) checks for + // variable, broadcast and constant types, to prevent bugs in case new types + // with different semantics are introduced in the future. + arg->set_is_same_data_across_replicas( + !params_info.IsPerReplicaArg(i) && !params_info.IsDistributedArg(i) && + (params_info.IsVariableArg(i) || params_info.IsBroadcastArg(i) || + params_info.IsConstantArg(i))); + if (params_info.mirrored_variable_indices().count(i) > 0) { + CHECK_EQ(type, DT_RESOURCE); + arg->set_is_same_data_across_replicas(true); + // 64-bit type is not shardable by XLA:TPU yet. + bool sharding_enabled = (arg_shape.handle_type != DT_COMPLEX64 && + arg_shape.handle_type != DT_INT64 && + arg_shape.handle_type != DT_UINT64 && + arg_shape.handle_type != DT_DOUBLE); + arg->set_enable_xla_sharding( + sharding_enabled ? tpu::TPUCompileMetadataProto::Arg::TENTATIVE + : tpu::TPUCompileMetadataProto::Arg::DISALLOWED); + } + *arg->mutable_sharding() = arg_sharding[i]; + } + + const int num_retvals = retval_sharding.size(); + for (int i = 0; i < num_retvals; ++i) { + *proto.add_retvals()->mutable_sharding() = retval_sharding[i]; + } + proto.set_session_handle(session_handle); + + DataTypeVector constant_arg_types; + constant_arg_types.reserve(num_guaranteed_constants); + for (int i = 0; i < num_guaranteed_constants; ++i) { + constant_arg_types.push_back(arg_types[guaranteed_const_start_index + i]); + } + proto.set_xla_fusion_autotuner_thresh(autotuner_thresh); + + string metadata; + proto.SerializeToString(&metadata); + + NodeDef def; + def.set_name(UniqueNodeName("TPUReplicate/_compile", graph)); + def.set_op("TPUCompile"); + def.set_device(compile_device); + if (replicate_node) { + MergeDebugInfo(NodeDebugInfo(replicate_node->def()), &def); + } + + AddNodeAttr("function", function, &def); + AddNodeAttr("num_computations", num_cores_per_replica, &def); + AddNodeAttr("NumDynamicShapes", static_cast(dynamic_shape_nodes.size()), + &def); + AddNodeAttr("metadata", metadata, &def); + AddNodeAttr("Tguaranteed_constants", constant_arg_types, &def); + + Status status; + *compile_node = graph->AddNode(def, &status); + TF_RETURN_IF_ERROR(status); + + (*compile_node)->set_assigned_device_name(compile_device); + + for (int i = 0; i < dynamic_shape_nodes.size(); ++i) { + graph->AddEdge(dynamic_shape_nodes[i], 0, *compile_node, i); + } + + for (int i = 0; i < num_guaranteed_constants; ++i) { + graph->AddEdge(guaranteed_constant_nodes[i], 0, *compile_node, + dynamic_shape_nodes.size() + i); + } + VLOG(1) << "BuildCompileNode(): " << status; + return status; +} + +Status DistributedTPURewritePass::FindGuaranteedConstantInputs( + const Node& node, const NameRangeMap& input_range_map, + std::vector* guaranteed_constants) { + std::vector input_edges; + TF_RETURN_IF_ERROR(node.input_edges(&input_edges)); + std::pair variables_limits = + input_range_map.at("guaranteed_constants"); + for (int i = variables_limits.first; i < variables_limits.second; ++i) { + guaranteed_constants->push_back(input_edges[i]->src()); + } + return Status::OK(); +} + +Status DistributedTPURewritePass::FindVariableInputs( + const Node& node, const NameRangeMap& input_range_map, + std::vector* variables) { + std::vector input_edges; + TF_RETURN_IF_ERROR(node.input_edges(&input_edges)); + std::pair variables_limits = input_range_map.at("variables"); + for (int i = variables_limits.first; i < variables_limits.second; ++i) { + Node* node = input_edges[i]->src(); + + // Find the type of the VarHandleOp that feeds this node, looking through + // any wrapping Enter or Switch nodes. + while (node->IsEnter() || node->IsSwitch()) { + TF_RETURN_IF_ERROR(node->input_node(0, &node)); + } + // Fix the variable device assignment if it is requested with a full name. + if (!node->has_assigned_device_name() && + !node->requested_device().empty()) { + DeviceNameUtils::ParsedName var_device; + TF_RET_CHECK(DeviceNameUtils::ParseFullName(node->requested_device(), + &var_device)); + if (var_device.has_job && var_device.has_replica && var_device.has_task && + var_device.has_type && var_device.has_id) { + node->set_assigned_device_name(node->requested_device()); + if (node != input_edges[i]->src() && + !input_edges[i]->src()->has_assigned_device_name()) { + input_edges[i]->src()->set_assigned_device_name( + node->requested_device()); + } + } + } + if (node->type_string() == "VarHandleOp") { + DataType dtype; + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "dtype", &dtype)); + variables->push_back(VariableInput{input_edges[i]->src(), + input_edges[i]->src_output(), dtype}); + } else if (node->type_string() == "_Arg") { + std::vector dtypes; + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "_handle_dtypes", &dtypes)); + if (dtypes.empty()) { + return errors::Internal( + "_Arg node with resource output must have non-empty _handle_dtypes " + "attribute: ", + node->DebugString()); + } + variables->push_back(VariableInput{ + input_edges[i]->src(), input_edges[i]->src_output(), dtypes[0]}); + } else { + return errors::Internal( + "Cannot handle variable input with node type other than VarHandleOp " + "and _Arg: ", + node->DebugString()); + } + } + return Status::OK(); +} + +// Builds a NoOp node, used for building control dependencies. +static Status BuildNoopNode(const Node& source, StringPiece name, + const string& device, Graph* graph, Node** node) { + NodeDefBuilder builder(name, "NoOp", NodeDebugInfo(source)); + if (!device.empty()) { + builder.Device(device); + } + NodeDef def; + TF_RETURN_IF_ERROR(builder.Finalize(&def)); + + Status status; + *node = graph->AddNode(def, &status); + if (!device.empty()) { + (*node)->set_assigned_device_name(device); + } + return status; +} + +Status DistributedTPURewritePass::ConnectHostComputeNodes( + Node* compile_node, Node* key_placeholder_node, Graph* graph) { + // First find all the downstream nodes of the key placeholder node, since we + // want to delete the connecting edges from key_placeholder_node which would + // invalidate the out_nodes iterator. + std::vector host_transfer_nodes; + for (Node* node : key_placeholder_node->out_nodes()) { + host_transfer_nodes.push_back(node); + } + for (Node* node : host_transfer_nodes) { + int input_index = -1; + for (int i = 0; i < node->num_inputs(); i++) { + const Edge* e; + TF_RETURN_IF_ERROR(node->input_edge(i, &e)); + if (e->src() == key_placeholder_node) { + if (input_index != -1) { + return errors::Internal( + "Node ", node->name(), + " has multiple input edges from key placeholder node"); + } + input_index = e->dst_input(); + } + } + if (input_index == -1) { + return errors::Internal("Node ", node->name(), + " has no input edge from key placeholder node"); + } + const Edge* key_edge; + TF_RETURN_IF_ERROR(node->input_edge(input_index, &key_edge)); + graph->RemoveEdge(key_edge); + graph->AddEdge(compile_node, 1, node, input_index); + } + graph->RemoveNode(key_placeholder_node); + return Status::OK(); +} + +Status DistributedTPURewritePass::BuildVariableReads( + absl::Span variables, Node* control_predecessor, + Graph* graph, std::vector* variable_reads) { + variable_reads->resize(variables.size()); + for (int i = 0; i < variables.size(); ++i) { + string name = + graph->NewName(strings::StrCat(variables[i].node->name(), "/read")); + NodeDefBuilder builder(name, "ReadVariableOp", + NodeDebugInfo(*variables[i].node)); + + builder.Attr("dtype", variables[i].dtype); + builder.Device(variables[i].node->assigned_device_name()); + builder.Input(variables[i].node->name(), 0, DT_RESOURCE); + NodeDef def; + TF_RETURN_IF_ERROR(builder.Finalize(&def)); + + Status status; + Node* read_node; + (*variable_reads)[i] = read_node = graph->AddNode(def, &status); + if (!status.ok()) return status; + + read_node->set_requested_device(variables[i].node->requested_device()); + read_node->set_assigned_device_name( + variables[i].node->assigned_device_name()); + graph->AddEdge(variables[i].node, variables[i].index, read_node, 0); + + graph->AddControlEdge(control_predecessor, read_node); + } + return Status::OK(); +} + +bool DistributedTPURewritePass::ContainsResourceWriteOp( + const Graph& graph, const FunctionLibraryDefinition& fld) { + for (const Node* n : graph.nodes()) { + const XlaResourceOpInfo* op_info = GetResourceOpInfoForOp(n->type_string()); + if (op_info && op_info->kind() != XlaResourceOpKind::kRead) { + VLOG(2) << "Found write resource op inside computation"; + return true; + } + } + for (const string& func_name : fld.ListFunctionNames()) { + const FunctionDef* func_def = fld.Find(func_name); + for (const NodeDef& n : func_def->node_def()) { + const XlaResourceOpInfo* op_info = GetResourceOpInfoForOp(n.op()); + if (op_info && op_info->kind() != XlaResourceOpKind::kRead) { + VLOG(2) << "Found write resource op inside " << func_name; + return true; + } + } + } + return false; +} + +Status DistributedTPURewritePass::BuildVariableWrites( + absl::Span variables, Node* control_successor, + absl::Span variable_writes, Graph* graph) { + CHECK_EQ(variables.size(), variable_writes.size()); + for (int i = 0; i < variables.size(); ++i) { + const VariableWrite& write = variable_writes[i]; + NodeDebugInfo debug_info(*variables[i].node); + + auto name = [&](string suffix) { + return graph->NewName( + strings::StrCat(variables[i].node->name(), "/", suffix)); + }; + + Node* write_node; + TF_RETURN_IF_ERROR( + IncompleteNodeDefBuilder(name("assign"), "AssignVariableOp", debug_info) + .AddAttr("dtype", variables[i].dtype) + .Device(variables[i].node->assigned_device_name()) + .Build(graph, &write_node)); + + // Colocate the control flow with the variable. + CondBuilder cb(variables[i].node->name(), + variables[i].node->assigned_device_name(), debug_info, + graph); + + // Inputs to conditional. + Node* switch_val; + TF_RETURN_IF_ERROR( + cb.AddInput("switch_val", variables[i].dtype, + /*device=*/write.value->assigned_device_name(), debug_info, + &switch_val)); + Node* switch_var; + TF_RETURN_IF_ERROR( + cb.AddInput("switch_var", DT_RESOURCE, + /*device=*/variables[i].node->assigned_device_name(), + debug_info, &switch_var)); + // Conditionally write the value back. + graph->AddEdge(variables[i].node, variables[i].index, switch_var, 0); + graph->AddEdge(switch_var, CondBuilder::kThenBranch, write_node, 0); + graph->AddEdge(switch_val, CondBuilder::kThenBranch, write_node, 1); + // Add control edge from the write to value that will be merged. There is no + // output from the write so this control edge ensures the write completes. + graph->AddControlEdge(write_node, cb.switch_t()); + + graph->AddControlEdge(cb.control_successor(), control_successor); + + graph->AddEdge(write.predicate, write.predicate_output, cb.pred(), 0); + graph->AddEdge(write.value, write.value_output, switch_val, 0); + } + return Status::OK(); +} + +namespace { + +// Helper that creates an IdentityN node containing all of the variables +// values on CPU device 'device', except for those that will be split across +// cores. (For split variables, this may cause additional cross-host data +// transfers if more than 1 devices share the same variable partition on a +// remote host.) +// +// A previous iteration of this code built one Identity node per TPU core per +// variable, but this can rapidly become hundreds of thousands of nodes. This +// formulation creates a single IdentityN node containing all of the variables +// on each host. This may cause some unnecessary variable copies if only a +// subset of hosts consume a given variable, but has the virtue of being +// simple, and most models use pure replication where all cores want all the +// variables. +// +// Returns the node and its output index to be consumed by TPUExecute for the +// requested variable index. +xla::StatusOr CreateOrGetPerHostVariableCopy( + const string& host_cpu_device, int64 var_index, + const std::vector& variable_reads, + const DistributedTPURewritePass::ParameterInfo& params_info, + const std::vector& arg_shardings, + const Node& replicate_node, + absl::flat_hash_map>* per_host_var_copies, + Graph* graph) { + auto it = per_host_var_copies->find(host_cpu_device); + if (it != per_host_var_copies->end()) { + return it->second[var_index]; + } + + DataTypeVector dtypes; + // Per-variable data source for TPUExecute. + std::vector index_mapping; + index_mapping.reserve(variable_reads.size()); + dtypes.reserve(variable_reads.size()); + for (int64 i = 0; i < variable_reads.size(); ++i) { + Node* read = variable_reads[i]; + int64 orig_arg_num = + i + params_info.NumPerReplicaArgs() + params_info.NumBroadcastArgs(); + if (arg_shardings[orig_arg_num].type() != xla::OpSharding::OTHER) { + // We haven't built the IdentityN node yet, so temporarily use nullptr. + index_mapping.push_back( + NodeOut{nullptr, static_cast(dtypes.size())}); + dtypes.push_back(read->output_type(0)); + } else { + // Do not copy the full tensor of partitioned variables. + index_mapping.push_back(NodeOut{read, 0}); + } + } + NodeDef ndef; + ndef.set_name( + graph->NewName(absl::StrCat(replicate_node.name(), "/_variable_copy"))); + ndef.set_op("IdentityN"); + ndef.set_device(host_cpu_device); + AddNodeAttr("T", dtypes, &ndef); + Status s; + Node* id_node = graph->AddNode(ndef, &s); + TF_RETURN_IF_ERROR(s); + id_node->set_assigned_device_name(host_cpu_device); + + for (int64 i = 0; i < variable_reads.size(); ++i) { + if (index_mapping[i].node == nullptr) { + // Fill index_mapping with the actual IdentityN node. + index_mapping[i].node = id_node; + // Add the edge to id_node. + graph->AddEdge(variable_reads[i], 0, id_node, index_mapping[i].index); + } + } + + auto result = index_mapping[var_index]; + (*per_host_var_copies)[host_cpu_device] = std::move(index_mapping); + return result; +} + +} // namespace + +Status DistributedTPURewritePass::BuildExecuteNodes( + const ParameterInfo& params_info, int num_tasks, int num_cores_per_replica, + const Node& replicate_node, const DataTypeVector& arg_types, + const std::vector& arg_shapes, + const DataTypeVector& retval_types, + const std::vector& arg_shardings, + const std::vector& retval_shardings, + const std::vector>& tpu_device_names, + Node* compile_node, const std::vector& variable_reads, + Node* control_predecessor, Node* control_successor, + std::vector* variable_writes, Graph* graph) { + VLOG(1) << "BuildExecuteNodes " << replicate_node.DebugString(); + TF_RET_CHECK(params_info.NumReplicas() == tpu_device_names.size()); + + const int num_variables = variable_reads.size(); + const int num_retvals_per_replica = retval_types.size(); + + variable_writes->resize(num_variables); + + std::vector replicate_input_edges; + TF_RETURN_IF_ERROR(replicate_node.input_edges(&replicate_input_edges)); + + // Map from replicate input index to the fan_in node; + absl::flat_hash_map> replicate_input_fan_in_nodes; + absl::flat_hash_map> replicate_output_fan_out_nodes; + absl::flat_hash_map> + replicate_output_fan_out_dst_inputs; + std::vector to_be_removed_nodes; + + for (const Edge* e : replicate_input_edges) { + if (e->src()->type_string() == kTPUPartitionedInput) { + int num_users = 0; + for (const auto& ue : e->src()->out_edges()) { + if (!ue->IsControlEdge()) ++num_users; + } + if (num_users != 1) { + return tensorflow::errors::InvalidArgument( + e->src()->name(), " must only have one user. Found ", num_users); + } + to_be_removed_nodes.push_back(e->src()); + std::vector& nodes = replicate_input_fan_in_nodes[e->dst_input()]; + nodes.resize(num_cores_per_replica, nullptr); + VLOG(2) << "allocate " << num_cores_per_replica + << " for replicate_input_fan_in_nodes[" << e->dst_input() << "]"; + std::vector fan_in_edges; + TF_RETURN_IF_ERROR(e->src()->input_edges(&fan_in_edges)); + TF_RET_CHECK(fan_in_edges.size() == num_cores_per_replica); + + for (const Edge* fe : fan_in_edges) { + nodes[fe->dst_input()] = fe->src(); + VLOG(2) << "replicate_input_fan_in_nodes[" << e->dst_input() << "][" + << fe->dst_input() << "] = " << fe->src()->name(); + } + } + } + + // Replicate output edges are sorted by replica id and then by outputs for + // each replica. For example, if TPU Computation has outputs (output_1, + // output_2, and output_3) and number of replicas is 2, then + // replicate_output_edges order would be: + // output_1_replica_1, output_2_replica_1, output_3_replica_1, + // output_1_replica_2, output_2_replica_2, output_3_replica_2. + std::vector replicate_output_edges(replicate_node.num_outputs(), + nullptr); + for (const Edge* edge : replicate_node.out_edges()) { + if (edge->IsControlEdge()) continue; + + int num_partitioned_outputs = 0; + + for (const Edge* out_edge : edge->dst()->out_edges()) { + if (out_edge->dst()->type_string() == kTPUPartitionedOutput) { + num_partitioned_outputs++; + // Paths between replicate_node and replicate_output_fan_out_nodes: + // ReplicateNode->TpuOutIdenity->kTPUPartitionedOutput->fan-out-nodes + TF_RET_CHECK(edge->dst()->out_edges().size() == 1); + to_be_removed_nodes.push_back(edge->dst()); + to_be_removed_nodes.push_back(out_edge->dst()); + // Get the right replicated id from the replicate_output_edge. + std::vector& nodes = + replicate_output_fan_out_nodes[edge->src_output()]; + std::vector& dst_inputs = + replicate_output_fan_out_dst_inputs[edge->src_output()]; + nodes.resize(num_cores_per_replica, nullptr); + dst_inputs.resize(num_cores_per_replica, 0); + TF_RET_CHECK(out_edge->dst()->out_edges().size() == + num_cores_per_replica); + + for (const Edge* fe : out_edge->dst()->out_edges()) { + nodes[fe->src_output()] = fe->dst(); + dst_inputs[fe->src_output()] = fe->dst_input(); + VLOG(2) << "replicate_output_fan_out_nodes[" << out_edge->src_output() + << "][" << fe->src_output() + << "] = " << fe->dst()->DebugString() << " with dst_input " + << fe->dst_input(); + } + } + } + replicate_output_edges[edge->src_output()] = edge; + if (num_partitioned_outputs > 1) { + return errors::InvalidArgument( + "More than one TPUPartitionedOutput per replciated output."); + } + } + + const int num_execute_args = + arg_shardings.size() - params_info.NumGuaranteedConstants(); + // Inverts the arg_shardings and retval_shardings mappings to + // form core -> {argument number} maps. + std::vector> core_arg_nums(num_cores_per_replica); + for (int i = 0; i < num_execute_args; ++i) { + const auto& sharding = arg_shardings[i]; + if (sharding.type() == xla::OpSharding::MAXIMAL) { + int core = sharding.tile_assignment_devices(0); + TF_RETURN_IF_ERROR(ValidateCoreNumber(core, num_cores_per_replica)); + core_arg_nums[core].push_back(i); + } else if (sharding.type() == xla::OpSharding::OTHER) { + for (int64 core : sharding.tile_assignment_devices()) { + core_arg_nums[core].push_back(i); + } + } else if (sharding.type() == xla::OpSharding::REPLICATED) { + for (int core = 0; core < num_cores_per_replica; ++core) { + core_arg_nums[core].push_back(i); + } + } else { + return tensorflow::errors::InvalidArgument( + "Unsupported argument sharding: ", sharding.DebugString()); + } + } + std::vector> core_retval_nums(num_cores_per_replica); + for (int i = 0; i < retval_shardings.size(); ++i) { + const auto& sharding = retval_shardings[i]; + if (sharding.type() == xla::OpSharding::MAXIMAL) { + int core = sharding.tile_assignment_devices(0); + TF_RETURN_IF_ERROR(ValidateCoreNumber(core, num_cores_per_replica)); + core_retval_nums[core].push_back(i); + } else if (sharding.type() == xla::OpSharding::REPLICATED) { + for (int core = 0; core < num_cores_per_replica; ++core) { + core_retval_nums[core].push_back(i); + } + } else if (sharding.type() == xla::OpSharding::OTHER) { + for (int64 core : sharding.tile_assignment_devices()) { + core_retval_nums[core].push_back(i); + } + } else { + return tensorflow::errors::InvalidArgument( + "Unsupported argument sharding: ", sharding.DebugString()); + } + } + + // Maps host device name to a list of per-variable pairs (variable_copy_node, + // output_index_of_copy_node). + absl::flat_hash_map> per_host_var_copies; + + // Mapping from original resource arg number to a second level map. Second + // level map is from core id to output index of updated variable value. + absl::flat_hash_map> + orig_arg_num_to_output_index_mapping; + // Mapping from retval index to a second level map. Second level map is from + // core id to output index of sharded output value. + std::unordered_map> + retval_index_to_output_index_mapping; + + // Represents mapping of argument index of sharded input to each + // TPUExecute node to its corresponding Split node and its output index + // from which sharded input will be fed into TPUExecute node. + std::map input_index_to_sharded_inputs; + + // Builds one TPUExecute node per core per replica. + std::vector> execute_nodes(params_info.NumReplicas()); + for (int core = 0; core < num_cores_per_replica; ++core) { + DataTypeVector core_retval_types; + for (int output : core_retval_nums[core]) { + core_retval_types.push_back(retval_types[output]); + } + DataTypeVector core_arg_types; + std::vector core_variable_writes; + for (int input : core_arg_nums[core]) { + // Resource variables can be passed either by reference (as a DT_RESOURCE) + // tensor or by value (as the variable's current value). Per-replica or + // distributed resource arguments are always passed by reference and + // broadcast variables are always passed by value. + if (arg_types[input] == DT_RESOURCE && + !params_info.IsPerReplicaArg(input) && + !params_info.IsDistributedArg(input)) { + DataType handle_type = arg_shapes[input].handle_type; + TF_RET_CHECK(handle_type != DT_INVALID) << DataTypeString(handle_type); + core_arg_types.push_back(handle_type); + int base = input - params_info.NumPerReplicaArgs() - + params_info.NumDistributedArgs() - + params_info.NumBroadcastArgs(); + // Variables passed by value will have a corresponding additional output + // containing an updated value for the variable. + core_variable_writes.push_back(base); + core_retval_types.push_back(handle_type); + } else { + core_arg_types.push_back(arg_types[input]); + } + } + + NodeDef def; + def.set_op("TPUExecute"); + MergeDebugInfo(NodeDebugInfo(replicate_node.def()), &def); + AddNodeAttr("Targs", core_arg_types, &def); + AddNodeAttr("Tresults", core_retval_types, &def); + + for (int64 replica = 0; replica < params_info.NumReplicas(); ++replica) { + def.set_name(strings::StrCat(replicate_node.name(), "/_execute_", replica, + "_", core)); + + Status status; + Node* node = graph->AddNode(def, &status); + if (!status.ok()) return status; + execute_nodes[replica].push_back(node); + + node->set_assigned_device_name(tpu_device_names[replica][core]); + + // Add control edges to ensure that execution happens after + // `control_predecessor`, happens before `control_successor`, and is + // triggered by evaluating any operator that depends on the original + // TPUReplicate operator. See the comment at the top of the header file + // for more details. + graph->AddControlEdge(control_predecessor, node); + graph->AddControlEdge(node, control_successor); + + // Add data input edges. + for (int64 i = 0; i < core_arg_nums[core].size(); ++i) { + int64 orig_arg_num = core_arg_nums[core][i]; + VLOG(2) << " replica " << replica << " core " << core << " i " << i + << " orig_arg_num " << orig_arg_num; + if (params_info.IsPerReplicaArg(orig_arg_num) || + params_info.IsDistributedArg(orig_arg_num)) { + // Per-replica input and distributed input + int64 input_num = params_info.IsPerReplicaArg(orig_arg_num) + ? replica * params_info.NumPerReplicaArgs() + + core_arg_nums[core][i] + : params_info.NumReplicas() * + params_info.NumPerReplicaArgs() + + core_arg_nums[core][i] - + params_info.NumPerReplicaArgs(); + + const Edge* edge = replicate_input_edges[input_num]; + VLOG(2) << "replicate_input_edges[" << input_num << "]"; + DataType dtype = edge->src()->output_type(edge->src_output()); + if (dtype == DT_RESOURCE) { + DataType handle_dtype = arg_shapes[orig_arg_num].handle_type; + if (std::find(kTpuAllTypes.begin(), kTpuAllTypes.end(), + handle_dtype) == kTpuAllTypes.end()) { + return errors::InvalidArgument( + "Unsupported resource variable data type for TPU: ", + DataTypeString(handle_dtype), ", caused by output ", + edge->src()->name(), ":", edge->src_output()); + } + } else { + if (std::find(kTpuAllTypes.begin(), kTpuAllTypes.end(), dtype) == + kTpuAllTypes.end()) { + return errors::InvalidArgument( + "Unsupported data type for TPU: ", DataTypeString(dtype), + ", caused by output ", edge->src()->name(), ":", + edge->src_output()); + } + } + if (arg_shardings[orig_arg_num].type() == xla::OpSharding::OTHER) { + // Don't automatically add a split node when input node is + // kTPUPartitionedInput + if (edge->src()->type_string() == kTPUPartitionedInput) { + VLOG(2) << "Connect " + << replicate_input_fan_in_nodes[input_num][core]->name() + << " to " << node->name() << " at " << i; + graph->AddEdge(replicate_input_fan_in_nodes[input_num][core], 0, + node, i); + } else { + if (dtype == DT_RESOURCE) { + return errors::InvalidArgument( + "Tiled sharding for per-replica DT_RESOURCE input must", + "be TPUPartitionedInput. Here got ", + edge->src()->type_string()); + } + const xla::OpSharding& sharding = arg_shardings[orig_arg_num]; + + // Create or get the Split node. + TF_ASSIGN_OR_RETURN( + ShardedInputInfo sharded_input_info, + CreateOrGetSplitNodesForInputSharding( + sharding, orig_arg_num, dtype, replica, + edge->src_output(), edge->src(), control_predecessor, + graph, &input_index_to_sharded_inputs)); + + // Calculate which output we should receive from the Split node. + absl::optional output_index = + GetCoreIndexInSharding(sharding, core); + TF_RET_CHECK(output_index); + + NodeOut split_node_and_index = + sharded_input_info.sharded_inputs.at(output_index.value()); + // Connect with Split node output. + graph->AddEdge(split_node_and_index.node, + split_node_and_index.index, node, i); + } + } else if (edge->src()->type_string() == kTPUPartitionedInput && + arg_shardings[orig_arg_num].type() == + xla::OpSharding::REPLICATED) { + graph->AddEdge(replicate_input_fan_in_nodes[input_num][core], 0, + node, i); + } else { + graph->AddEdge(edge->src(), edge->src_output(), node, i); + } + } else if (params_info.IsBroadcastArg(orig_arg_num)) { + // Broadcast input. + int64 input_num = params_info.FirstBroadcastArgFromHost() + + core_arg_nums[core][i] - + params_info.NumPerReplicaArgs() - + params_info.NumDistributedArgs(); + const Edge* edge = replicate_input_edges[input_num]; + DataType dtype = edge->src()->output_type(edge->src_output()); + if (std::find(kTpuAllTypes.begin(), kTpuAllTypes.end(), dtype) == + kTpuAllTypes.end()) { + return errors::InvalidArgument( + "Unsupported data type for TPU: ", DataTypeString(dtype), + ", caused by output ", edge->src()->name(), ":", + edge->src_output()); + } + graph->AddEdge(edge->src(), edge->src_output(), node, i); + } else { + // Variable input. + int64 variable_num = orig_arg_num - params_info.NumPerReplicaArgs() - + params_info.NumDistributedArgs() - + params_info.NumBroadcastArgs(); + TF_RET_CHECK(variable_num < num_variables); + + Node* variable_read = variable_reads[variable_num]; + DataType dtype = variable_read->output_type(0); + if (std::find(kTpuAllTypes.begin(), kTpuAllTypes.end(), dtype) == + kTpuAllTypes.end()) { + return errors::InvalidArgument( + "Unsupported resource variable data type for TPU: ", + DataTypeString(dtype), ", caused by ReadVariableOp ", + variable_read->DebugString()); + } + DeviceNameUtils::ParsedName requested_device; + string requested = variable_read->requested_device(); + TF_RET_CHECK( + DeviceNameUtils::ParseFullName(requested, &requested_device)); + if (requested_device.type != "TPU") { + // Stage the value via the CPU device on the remote host. The graph + // partitioner will introduce an intermediate copy rather than + // copying the same tensor multiple times across the network, and we + // would prefer that intermediate copy to be in host memory to avoid + // running out of memory if the TPUExecute op on the staging device + // starts running before the _Send ops to the other TPU devices on + // the same host complete. We don't do this if the variables are + // already placed on TPU, otherwise it will cause an unnecessary + // round trip copy. + // TODO(b/79580121): give each replica its own on-device variable + // replica and then delete this code. + string device; + TF_RETURN_IF_ERROR(DeviceNameUtils::DeviceNameToCpuDeviceName( + tpu_device_names[replica][core], &device)); + TF_ASSIGN_OR_RETURN(auto var_data, + CreateOrGetPerHostVariableCopy( + device, variable_num, variable_reads, + params_info, arg_shardings, replicate_node, + &per_host_var_copies, graph)); + + if (arg_shardings[orig_arg_num].type() == xla::OpSharding::OTHER) { + const xla::OpSharding& sharding = arg_shardings[orig_arg_num]; + // Create or get the Split node. + TF_ASSIGN_OR_RETURN( + ShardedInputInfo sharded_input_info, + CreateOrGetSplitNodesForInputSharding( + sharding, orig_arg_num, + arg_shapes[orig_arg_num].handle_type, replica, + var_data.index, var_data.node, control_predecessor, graph, + &input_index_to_sharded_inputs)); + + // Calculate which output we should receive from the Split node. + absl::optional output_index = + GetCoreIndexInSharding(sharding, core); + TF_RET_CHECK(output_index); + NodeOut split_node_and_index = + sharded_input_info.sharded_inputs[output_index.value()]; + // Connect with Split node output. + graph->AddEdge(split_node_and_index.node, + split_node_and_index.index, node, i); + + } else { + graph->AddEdge(var_data.node, var_data.index, node, i); + } + } else { + graph->AddEdge(variable_reads[variable_num], 0, node, i); + } + } + } + + // Adds a program input edge from the compiler. + graph->AddEdge(compile_node, core + 1, node, node->num_inputs() - 1); + + // Add data output edges. + int num_outputs = core_retval_nums[core].size(); + for (int i = 0; i < num_outputs; ++i) { + int output_num = + replica * num_retvals_per_replica + core_retval_nums[core][i]; + const auto& sharding = retval_shardings[core_retval_nums[core][i]]; + if (sharding.type() == xla::OpSharding::OTHER) { + int retval_index = core_retval_nums[core][i]; + retval_index_to_output_index_mapping[retval_index][core] = i; + bool is_last_core = + core == + *std::max_element(sharding.tile_assignment_devices().begin(), + sharding.tile_assignment_devices().end()); + bool isPartitionOutNode = false; + + const Edge* e = replicate_output_edges[output_num]; + const Edge* e_out; + for (const Edge* out_edge : e->dst()->out_edges()) { + if (out_edge->dst()->type_string() == kTPUPartitionedOutput) { + isPartitionOutNode = true; + e_out = out_edge; + } + } + if (isPartitionOutNode) { + graph->AddEdge( + node, i, replicate_output_fan_out_nodes[output_num][core], + replicate_output_fan_out_dst_inputs[output_num][core]); + VLOG(2) << "Connect " << node->name() << " at " << i << " to " + << replicate_output_fan_out_nodes[output_num][core]->name() + << " at " + << replicate_output_fan_out_dst_inputs[output_num][core]; + if (is_last_core) { + graph->RemoveEdge(e); + graph->RemoveEdge(e_out); + } + continue; + } + + // Do this in the iteration of last core in tile assignment, so all + // TPUExecute nodes have been created. + if (!is_last_core) { + continue; + } + + // Add a Concat node. + std::vector orig_inputs; + for (int64 core_id : sharding.tile_assignment_devices()) { + int core_retval_index = + retval_index_to_output_index_mapping[retval_index][core_id]; + orig_inputs.push_back( + NodeOut{execute_nodes[replica][core_id], + static_cast( + core_retval_nums[core_id][core_retval_index])}); + } + DataType dtype = e->src()->output_type(e->src_output()); + TF_ASSIGN_OR_RETURN( + Node * concat_node, + CreateConcatNodesForRetval(sharding, dtype, replica, orig_inputs, + graph, /*device=*/"")); + + const Edge* edge = replicate_output_edges[output_num]; + Node* dst = edge->dst(); + int dst_input = edge->dst_input(); + graph->RemoveEdge(edge); + graph->AddEdge(concat_node, 0, dst, dst_input); + + continue; + } + + // If this is a replicated output, outputs on all cores will be the + // same, and we only take the output from core 0. + if (sharding.type() == xla::OpSharding::REPLICATED && core != 0) { + continue; + } + + // If output has maximal sharding, make sure we only use output from + // TPUExecute node with logical core id equal to core id defined by the + // xla sharding. + if (sharding.type() == xla::OpSharding::MAXIMAL && + core != sharding.tile_assignment_devices(0)) { + continue; + } + + const Edge* replicate_edge_to_replace = + replicate_output_edges[output_num]; + Node* dst = replicate_edge_to_replace->dst(); + int dst_input = replicate_edge_to_replace->dst_input(); + graph->RemoveEdge(replicate_edge_to_replace); + graph->AddEdge(node, i, dst, dst_input); + } + + // Feed the updated variable values from the first replica to the + // variable write nodes. + if (replica == 0) { + for (int i = 0; i < core_variable_writes.size(); ++i) { + int orig_arg_num = + core_variable_writes[i] + params_info.NumPerReplicaArgs() + + params_info.NumDistributedArgs() + params_info.NumBroadcastArgs(); + const auto& sharding = arg_shardings[orig_arg_num]; + // If this is a tiling sharded variable, concat variable updates from + // all cores. + if (sharding.type() == xla::OpSharding::OTHER) { + orig_arg_num_to_output_index_mapping[orig_arg_num][core] = i; + + // Do this in the iteration of last core in tile assignment, so all + // TPUExecute nodes have been created. + if (core != + *std::max_element(sharding.tile_assignment_devices().begin(), + sharding.tile_assignment_devices().end())) { + continue; + } + + // Add a Concat node. + std::vector orig_inputs; + for (int64 core_id : sharding.tile_assignment_devices()) { + int core_retval_num = + orig_arg_num_to_output_index_mapping[orig_arg_num][core_id]; + orig_inputs.push_back( + NodeOut{execute_nodes[0][core_id], + static_cast(core_retval_nums[core_id].size() + + core_retval_num)}); + } + + // Use the variable read's device for the concat. They should both + // be collocated with the variable. + absl::string_view device = + variable_reads[core_variable_writes[i]]->assigned_device_name(); + TF_ASSIGN_OR_RETURN( + Node * concat_node, + CreateConcatNodesForRetval( + sharding, arg_shapes[orig_arg_num].handle_type, replica, + orig_inputs, graph, device)); + // Populate VariableWrite. + VariableWrite& write = variable_writes->at(core_variable_writes[i]); + write.value = concat_node; + write.value_output = 0; + write.predicate = compile_node; + write.predicate_output = num_cores_per_replica + core + 1; + + continue; + } + + // If this is a replicated variable, outputs on all cores will be the + // same, and we only take the output from core 0 for the varialbe + // update. + if (sharding.type() == xla::OpSharding::REPLICATED && core != 0) { + continue; + } + VariableWrite& write = variable_writes->at(core_variable_writes[i]); + write.value = node; + write.value_output = num_outputs + i; + write.predicate = compile_node; + write.predicate_output = num_cores_per_replica + core + 1; + } + } + } + } + + for (Node* node : to_be_removed_nodes) { + graph->RemoveNode(node); + } + return Status::OK(); +} + +/* static */ Status DistributedTPURewritePass::CopyOutsideCompilationNodes( + int replica_index, const std::vector& outside_compilation_nodes, + const DeviceNameUtils::ParsedName& tpu_device, + const DeviceNameUtils::ParsedName& partial_device, + NodeToNodeReplicasMap* node_images, Graph* graph) { + for (Node* node : outside_compilation_nodes) { + NodeDef image_def = node->def(); + MergeDebugInfo(NodeDebugInfo(node->def()), &image_def); + const string suffix = strings::StrCat("/R", replica_index); + // In addition to node name, make the frame name unique to avoid multiple + // LoopCond nodes in one frame. + TF_RETURN_IF_ERROR( + AddPrefixAndSuffixToNode("" /* prefix */, suffix, &image_def)); + Status status; + Node* image = graph->AddNode(image_def, &status); + image->AddAttr(kXlaReplicaIdAttrName, replica_index); + TF_RETURN_IF_ERROR(status); + if (HasNodeAttr(image->def(), kXlaHasHostTransferAttrName)) { + TF_RETURN_IF_ERROR( + SetNodeDeviceForTPUCommunication(tpu_device, DEVICE_CPU, image)); + } else { + const string& original_device_string = + node->assigned_device_name().empty() ? node->requested_device() + : node->assigned_device_name(); + DeviceNameUtils::ParsedName device; + TF_RET_CHECK( + DeviceNameUtils::ParseFullName(original_device_string, &device)); + // If the requested device can be merged with the replica's host device, + // then do so. For example, if the requested device is "/CPU:0" or + // "/GPU:0" then it will be placed on the CPU/GPU of the host where this + // replica is running. But if the requested device is + // "/task:3/replica:2/CPU:0" then it will be placed on that task/replica. + if (DeviceNameUtils::IsSpecification(device, partial_device)) { + TF_RETURN_IF_ERROR( + DeviceNameUtils::MergeDevNames(&device, partial_device)); + } + image->set_requested_device(DeviceNameUtils::ParsedNameToString(device)); + } + std::vector& node_image_vector = (*node_images)[node]; + node_image_vector.resize(replica_index + 1); + node_image_vector[replica_index] = image; + } + return Status::OK(); +} + +/* static */ Status DistributedTPURewritePass::ReplicateOutsideCompilationNodes( + const std::vector>& tf_device_assignment, + const HostComputeCoreMap& host_compute_core, + const OutsideCompilationNodeMap& outside_compilation_nodes, + NodeToNodeReplicasMap* node_images, Graph* graph) { + // Iterate over replicas. + for (int i = 0; i < tf_device_assignment.size(); ++i) { + const auto& core_devices = tf_device_assignment[i]; + for (const auto& oc_cluster_iter : outside_compilation_nodes) { + const string& oc_cluster_name = oc_cluster_iter.first; + const auto& oc_cluster_nodes = oc_cluster_iter.second; + // We previously validated that host_compute_core contains an entry for + // each cluster. + int core = host_compute_core.at(oc_cluster_name); + TF_RET_CHECK(core >= 0 && core < core_devices.size()); + // tpu_device is the device the HostCompute XLA Op for this cluster runs + // on. + DeviceNameUtils::ParsedName tpu_device; + TF_RET_CHECK( + DeviceNameUtils::ParseFullName(core_devices[core], &tpu_device)); + // partial_device contains the replica and task but not the type. + DeviceNameUtils::ParsedName partial_device = tpu_device; + partial_device.has_type = false; + partial_device.has_id = false; + + if (tf_device_assignment.size() == 1) { + // With a single replica don't copy any nodes just put the original + // nodes into the image map. We leave the device placement alone, except + // that we have to fill in the correct core for the host send and + // receive nodes. + for (Node* node : oc_cluster_nodes) { + (*node_images)[node] = {node}; + node->AddAttr(kXlaReplicaIdAttrName, 0); + if (HasNodeAttr(node->def(), kXlaHasHostTransferAttrName)) { + TF_RETURN_IF_ERROR( + SetNodeDeviceForTPUCommunication(tpu_device, DEVICE_CPU, node)); + } + } + } else { + // Iterate over outside_compilation clusters in this computation, adding + // all the nodes with appropriate device assignments. + TF_RETURN_IF_ERROR( + CopyOutsideCompilationNodes(i, oc_cluster_nodes, tpu_device, + partial_device, node_images, graph)); + } + } + } + return Status::OK(); +} + +/* static */ Status DistributedTPURewritePass::CopyOutsideCompilationEdges( + const std::vector& outside_compilation_nodes, + const NodeToNodeReplicasMap& node_images, + const std::unordered_map outside_compilation_inputs, + Graph* graph) { + for (Node* node : outside_compilation_nodes) { + const auto& images = node_images.at(node); + // Make a copy of all edges and iterate on "in_edges", because we might + // remove edges when iteratating through them. + std::vector in_edges(node->in_edges().begin(), + node->in_edges().end()); + for (const Edge* edge : in_edges) { + Node* src = edge->src(); + const auto iter = node_images.find(src); + if (iter == node_images.end()) { + if (images.size() > 1) { + // The source node is a 'normal' node not part of any + // rewrite. Broadcast the value to all replicas. (If images.size() == + // 1 the cluster is not replicated and we can leave the original edge + // in place.) + for (Node* dst : images) { + graph->AddEdge(src, edge->src_output(), dst, edge->dst_input()); + } + } + continue; + } + + // The source node is a replicated outside_compilation node. + const auto& src_images = iter->second; + if (src_images.size() != images.size()) { + return errors::InvalidArgument( + "Graph contains an edge from node ", src->name(), + " in an outside_compilation block replicated ", src_images.size(), + " ways to node ", node->name(), + " in an outside_compilation block replicated ", images.size(), + " ways. Replication factors must match. Leave a comment on " + "tracking bug b/76419636 if you need this to be supported."); + } + bool is_lifted_arg; + string outside_compilation_cluster; + if (GetNodeAttr(src->def(), kXlaIsLiftedArgAttrName, &is_lifted_arg) + .ok() && + GetNodeAttr(src->def(), kOutsideCompilationAttr, + &outside_compilation_cluster) + .ok()) { + const auto input_iter = + outside_compilation_inputs.find(outside_compilation_cluster); + TF_RET_CHECK(input_iter != outside_compilation_inputs.end()); + TF_RET_CHECK(input_iter->second->type_string() == "IdentityN"); + int dst_input = edge->dst_input(); + if (src_images.size() == 1) { + graph->RemoveEdge(edge); + } + for (int i = 0; i < src_images.size(); ++i) { + graph->AddEdge(input_iter->second, i, images[i], dst_input); + } + continue; + } + + bool is_placeholder_for_arg; + string outside_compilation_input_attr; + if (GetNodeAttr(src->def(), kXlaIsPlaceholderForArg, + &is_placeholder_for_arg) + .ok() && + GetNodeAttr(src->def(), kXlaOutsideCompilationInputsAttrName, + &outside_compilation_input_attr) + .ok()) { + const auto input_iter = + outside_compilation_inputs.find(outside_compilation_input_attr); + TF_RET_CHECK(input_iter != outside_compilation_inputs.end()); + TF_RET_CHECK(input_iter->second->type_string() == "IdentityN"); + int dst_input = edge->dst_input(); + if (src_images.size() == 1) { + graph->RemoveEdge(edge); + } + for (int i = 0; i < src_images.size(); ++i) { + graph->AddEdge(input_iter->second, i, images[i], dst_input); + } + continue; + } + + if (images.size() > 1) { + // If images.size() == 1 neither cluster is replicated and we can + // leave the original edges in place. + for (int i = 0; i < src_images.size(); ++i) { + graph->AddEdge(src_images[i], edge->src_output(), images[i], + edge->dst_input()); + } + } + } + for (const Edge* edge : node->out_edges()) { + Node* dst = edge->dst(); + const auto iter = node_images.find(dst); + if (iter == node_images.end()) { + // The source node is a 'normal' node not part of any rewrite. + if (edge->IsControlEdge()) { + // Make the dst node have a control dependency on every replica. + if (images.size() > 1) { + for (int i = 0; i < images.size(); ++i) { + graph->AddControlEdge(images[i], dst); + } + } + // else the cluster is not replicated so we can leave the original + // edge in place. + } else { + // The edge + // is only valid if the outside_compilation block is not replicated. + if (images.size() > 1) { + return errors::InvalidArgument( + "Graph contains an edge from node ", node->name(), + " in an outside_compilation block replicated ", images.size(), + " ways to node ", dst->name(), + " that is not part of an outside_compilation block. Edges from " + "outside_compilation to regular graph nodes are only supported " + "for replication factors of 1. Leave a comment on tracking bug " + "b/76419636 if you need this to be supported."); + } + // else the cluster is not replicated so we can leave the original + // edge in place. + } + } + // The case where src and dst are both in node_images is covered elsewhere + // when iterating over in_edges of dst. + } + } + return Status::OK(); +} + +/* static */ Status DistributedTPURewritePass::ReplicateOutsideCompilationEdges( + const OutsideCompilationNodeMap& outside_compilation_nodes, + const NodeToNodeReplicasMap& node_images, + const std::unordered_map outside_compilation_inputs, + Graph* graph) { + for (const auto& oc_cluster_iter : outside_compilation_nodes) { + TF_RETURN_IF_ERROR( + CopyOutsideCompilationEdges(oc_cluster_iter.second, node_images, + outside_compilation_inputs, graph)); + } + return Status::OK(); +} + +/* static */ Status DistributedTPURewritePass::RemoveOutsideCompilationNodes( + const NodeToNodeReplicasMap& node_images, Graph* graph) { + for (const auto& iter : node_images) { + if (iter.second.size() > 1) { + // The cluster was replicated so remove the original node. + Node* node = iter.first; + graph->RemoveNode(node); + } + } + return Status::OK(); +} + +/* static */ Status +DistributedTPURewritePass::LowerOutsideCompilationFunctionalNodes( + Graph* g, const FunctionLibraryDefinition& flib_def, + const TPUReplicateDeviceNamesMapping& tpu_replicate_device_names_mapping) { + bool modified = false; + do { + std::vector nodes_to_lower; + for (Node* n : g->op_nodes()) { + if (!HasNodeAttr(n->def(), kOutsideCompilationAttr)) { + continue; + } + + if (n->IsWhileNode() || n->IsIfNode() || IsFunctionCall(flib_def, *n)) { + // Only lower functional ops with DT_RESOURCE input, because otherwise + // placer will complain. For normal cases, lowering will cause slowdown + // when related functions are huge (b/139037679). + bool has_resource_input = false; + for (const Edge* e : n->in_edges()) { + if (!e->IsControlEdge() && + e->src()->output_type(e->src_output()) == DT_RESOURCE) { + has_resource_input = true; + break; + } + } + if (has_resource_input) { + nodes_to_lower.push_back(n); + } + } + } + + modified = !nodes_to_lower.empty(); + + auto lower_functional_node = [&flib_def, &g](Node* n) -> Status { + // Clear device assignment. Otherwise all lowered nodes will have + // device assignment, which is not what we want. + n->set_requested_device(""); + + int replica_id; + TF_RETURN_IF_ERROR( + GetNodeAttr(n->def(), kXlaReplicaIdAttrName, &replica_id)); + + string outside_compilation_attr; + TF_RETURN_IF_ERROR(GetNodeAttr(n->def(), kOutsideCompilationAttr, + &outside_compilation_attr)); + + // There are two different kinds of functional outside compilation nodes: + // 1. Nodes that are in outside compilation blocks already. They are + // generated by FunctionalizeControlFlowForXlaPass, and only have + // attribute kOutsideCompilationAttr. + // 2. Mirrored control flow built for outside compilation in functional + // nodes. They are generated by ExtractOutsideCompilationPass, and have + // both kOutsideCompilationAttr and kXlaHasHostTransferAttrName. + // When lowering them, they need to be treated differently. + // For 1), their body functions are always V1 functions written by users, + // and their "control outputs" are control inputs of _Retval nodes. They + // should be lowered as V1 functions. + // For 2), we always add necessary "control outputs" + // (_XlaRecvAtHost/_XlaSendAtHost nodes) to "control_ret" field in their + // FunctionDef's. They should be lowered as V2 functions. + bool is_host_side_mirrored_control_flow = + HasNodeAttr(n->def(), kXlaHasHostTransferAttrName); + + int num_node_ids = g->num_node_ids(); + bool is_call_node = IsFunctionCall(flib_def, *n); + if (n->IsWhileNode()) { + TF_RETURN_IF_ERROR(RewriteWhileNode(n, g, + /*keep_node_fetchable=*/false)); + } else if (n->IsIfNode()) { + TF_RETURN_IF_ERROR(RewriteIfNode(n, g, /*keep_node_fetchable=*/false)); + } else { + TF_RET_CHECK(is_call_node); + // See comments for "is_host_side_mirrored_control_flow" above. + // If this is a node that's in outside compilation block, lower it as + // V1 function. This is controlled by removing + // kLowerAsMultiDeviceFunctionAttr from the node. + if (!is_host_side_mirrored_control_flow) { + n->ClearAttr(LowerFunctionalOpsPass::kLowerAsMultiDeviceFunctionAttr); + } else { + n->ClearAttr(LowerFunctionalOpsPass::kLowerAsMultiDeviceFunctionAttr); + n->AddAttr(LowerFunctionalOpsPass::kLowerAsMultiDeviceFunctionAttr, + true); + } + TF_RETURN_IF_ERROR( + RewriteFunctionCallNode(n, g, flib_def, + /*keep_caller_fetchable=*/false)); + } + + for (int i = num_node_ids; i < g->num_node_ids(); i++) { + Node* node = g->FindNodeId(i); + if (!node) { + continue; + } + + if (!is_call_node && is_host_side_mirrored_control_flow && + IsFunctionCall(flib_def, *node)) { + // For If/While nodes, if they are host side mirrored control flow, + // mark their body function calls with kXlaHasHostTransferAttrName + // attribute to make sure we lower them as V2 function. + node->AddAttr(kXlaHasHostTransferAttrName, true); + } + + if (IsFunctionCall(flib_def, *node) || node->IsWhileNode() || + node->IsIfNode()) { + // Set kOutsideCompilationAttr attribute so we lower these + // nested function call nodes later. + node->AddAttr(kOutsideCompilationAttr, outside_compilation_attr); + // Set kXlaReplicaIdAttrName attribute so we know replica id when we + // lower this function call node. + node->AddAttr(kXlaReplicaIdAttrName, replica_id); + } else if (node->type_string() == "_XlaRecvAtHost" || + node->type_string() == "_XlaSendFromHost") { + // For "_XlaRecvAtHost" and "_XlaSendFromHost" nodes, make sure they + // have kXlaReplicaIdAttrName attribute so later we know which host + // device to assign. + node->AddAttr(kXlaReplicaIdAttrName, replica_id); + } + } + return Status::OK(); + }; + + for (Node* n : nodes_to_lower) { + TF_RETURN_IF_ERROR(lower_functional_node(n)); + } + } while (modified); + + // Set device for all _XlaRecvAtHost and _XlaSendFromHost nodes. + for (Node* n : g->op_nodes()) { + if (n->type_string() != "_XlaRecvAtHost" && + n->type_string() != "_XlaSendFromHost") { + continue; + } + + string replicate; + TF_RETURN_IF_ERROR(GetNodeAttr(n->def(), kTPUReplicateAttr, &replicate)); + auto iter = tpu_replicate_device_names_mapping.find(replicate); + TF_RET_CHECK(iter != tpu_replicate_device_names_mapping.end()); + const auto& tpu_device_names = iter->second; + + int replica_id; + TF_RETURN_IF_ERROR( + GetNodeAttr(n->def(), kXlaReplicaIdAttrName, &replica_id)); + TF_RET_CHECK(replica_id < tpu_device_names.size()); + const string& tpu_device_name = tpu_device_names[replica_id][0]; + string host_device_name; + TF_RETURN_IF_ERROR(DeviceNameUtils::DeviceNameToCpuDeviceName( + tpu_device_name, &host_device_name)); + n->set_assigned_device_name(host_device_name); + // We may run TPU rewrite passes again on the subgraphs of the resulting + // graph. Clear kTPUReplicateAttr and kOutsideCompilationAttr for + // "_XlaRecvAtHost" nodes and "_XlaSendFromHost" nodes, in order to make + // sure that TPU rewrite passes take no effect on host-side subgraphs for + // outside compilation. + n->ClearAttr(kTPUReplicateAttr); + n->ClearAttr(kOutsideCompilationAttr); + } + + // Remove IdentityN nodes generated for outside compilation. IdentityN is + // exempt from resource edge colocation, but here we do need input and output + // for these IdentityN nodes to be colocated. + std::vector identityn_nodes; + for (Node* n : g->op_nodes()) { + if (n->type_string() == "IdentityN" && + HasNodeAttr(n->def(), kXlaOutsideCompilationInputsAttrName)) { + identityn_nodes.push_back(n); + } + } + for (Node* n : identityn_nodes) { + std::vector out_edges(n->out_edges().begin(), + n->out_edges().end()); + for (const Edge* e : out_edges) { + if (e->IsControlEdge()) { + continue; + } + + int src_output = e->src_output(); + const Edge* input_edge; + TF_RETURN_IF_ERROR(n->input_edge(src_output, &input_edge)); + Node* dst = e->dst(); + int dst_input = e->dst_input(); + g->RemoveEdge(e); + g->AddEdge(input_edge->src(), input_edge->src_output(), dst, dst_input); + } + g->RemoveNode(n); + } + + return Status::OK(); +} + +/* static */ Status DistributedTPURewritePass::ParseHostComputeCores( + const Node& replicate_node, + const OutsideCompilationNodeMap& outside_compilation_nodes, + HostComputeCoreMap* host_compute_core) { + std::vector hc_core_string; + TF_RETURN_IF_ERROR(GetNodeAttr(replicate_node.attrs(), "host_compute_core", + &hc_core_string)); + TF_RETURN_IF_ERROR( + ParseHostComputeCoreList(hc_core_string, host_compute_core)); + for (const auto& iter : outside_compilation_nodes) { + const string& oc_cluster_name = iter.first; + if (host_compute_core->find(oc_cluster_name) == host_compute_core->end()) { + // By default put host compute Ops on replicated core 0. + (*host_compute_core)[oc_cluster_name] = 0; + } + } + return Status::OK(); +} + +/* static */ Status DistributedTPURewritePass::GetDeviceTopology( + const DeviceSet& device_set, const Node& replicate_node, int* num_replicas, + int* num_cores_per_replica, int* num_tasks, + std::vector>* tf_device_assignment, + std::unique_ptr* xla_device_assignment, + string* tpu_compilation_device) { + TF_RETURN_IF_ERROR( + GetNodeAttr(replicate_node.attrs(), "num_replicas", num_replicas)); + if (*num_replicas < 1) { + return errors::InvalidArgument("num_replicas must be >= 1, got ", + *num_replicas); + } + + // Find the set of TPU devices in the TF job. + // Indexed by [task number][tpu device number]. + std::vector> tpu_devices; + int num_tpus_per_task; + TF_RETURN_IF_ERROR(GetTPUDeviceNames(replicate_node.requested_device(), + device_set, tpu_compilation_device, + &num_tpus_per_task, &tpu_devices)); + + string topology; + TF_RETURN_IF_ERROR( + GetNodeAttr(replicate_node.attrs(), "topology", &topology)); + TF_RETURN_IF_ERROR(GetNodeAttr( + replicate_node.attrs(), "num_cores_per_replica", num_cores_per_replica)); + std::vector device_assignment; + TF_RETURN_IF_ERROR(GetNodeAttr(replicate_node.attrs(), "device_assignment", + &device_assignment)); + + // TODO(cwhipkey): since we can control multiple pods of different shapes + // from a single worker, it may be desirable to propagate the remote device + // information around (e.g., in DeviceAttributes). This can lead to the mesh + // topology proto being leaked to cloud TPU users (e.g. through GetStatus + // calls); this may be okay, but to be conservative, just assume that the + // master session has the proper flags set. + auto* tpu_platform = tpu::TpuPlatformInterface::GetRegisteredPlatform(); + TF_RET_CHECK(tpu_platform); + tpu::TpuTopologyExternal tpu_topology(tpu_platform->GetTopologyPtr()); + TF_RET_CHECK(num_tpus_per_task == + tpu_topology.LogicalDevicesPerHost(kTensorCore)); + TF_RETURN_IF_ERROR(BuildDeviceAssignment( + tpu_topology, num_tpus_per_task, tpu_devices, *num_replicas, + *num_cores_per_replica, topology, device_assignment, tf_device_assignment, + xla_device_assignment)); + + return Status::OK(); +} + +/* static */ Status DistributedTPURewritePass::GetIOTypes( + int num_replicas, const Node& replicate_node, FunctionLibraryRuntime* flr, + Graph* graph, NameRangeMap* input_name_map, const NameAttrList** function, + std::unique_ptr* computation, DataTypeVector* arg_types, + DataTypeVector* retval_types, ParameterInfo* params_info) { + DataTypeVector input_types, broadcast_input_types, guaranteed_constant_types; + TF_RETURN_IF_ERROR( + GetNodeAttr(replicate_node.attrs(), "Tinputs", &input_types)); + TF_RETURN_IF_ERROR(GetNodeAttr(replicate_node.attrs(), "Tbroadcast_inputs", + &broadcast_input_types)); + TF_RETURN_IF_ERROR(GetNodeAttr(replicate_node.attrs(), + "Tguaranteed_constants", + &guaranteed_constant_types)); + int num_distributed_vars; + TF_RETURN_IF_ERROR(GetNodeAttr(replicate_node.attrs(), + "num_distributed_variables", + &num_distributed_vars)); + const int num_per_replica_inputs = input_types.size() - num_distributed_vars; + + if (num_per_replica_inputs % num_replicas != 0) { + return errors::InvalidArgument( + "Number of inputs to TPUReplicate (", num_per_replica_inputs, + ") is not divisible by the number of replicas (", num_replicas, ")."); + } + + int num_variables; + TF_RETURN_IF_ERROR( + GetNodeAttr(replicate_node.attrs(), "NumVariables", &num_variables)); + + NameRangeMap output_name_map; + TF_RETURN_IF_ERROR(NameRangesForNode(replicate_node, replicate_node.op_def(), + input_name_map, &output_name_map)); + + TF_RETURN_IF_ERROR( + GetNodeAttr(replicate_node.attrs(), "computation", function)); + + *computation = absl::make_unique(graph->op_registry()); + TF_RETURN_IF_ERROR(GetComputationForTPUReplicateOp( + **function, flr, computation->get(), arg_types, retval_types)); + + *params_info = ParameterInfo( + num_replicas, num_per_replica_inputs / num_replicas, num_distributed_vars, + broadcast_input_types.size(), num_variables, + guaranteed_constant_types.size(), retval_types->size()); + + if (arg_types->size() != params_info->NumInputsToEachReplica()) { + return errors::InvalidArgument( + "Computation argument to TPUReplicate has wrong number of " + "arguments. Expected ", + params_info->NumInputsToEachReplica(), " inputs, got ", + arg_types->size()); + } + if (replicate_node.num_outputs() != params_info->NumOutputsToHost()) { + return errors::InvalidArgument( + "Wrong number of outputs from TPUReplicate. Expected ", + params_info->NumOutputsToHost(), " outputs, got ", + replicate_node.num_outputs()); + } + if (enable_cross_replica_sharding_mirrored_variables_) { + std::vector mirrored_variable_indices; + TF_RETURN_IF_ERROR(GetNodeAttr(replicate_node.attrs(), + TPUREPLICATE_MIRRORED_VAR_INDICES_ATTR, + &mirrored_variable_indices)); + for (int index : mirrored_variable_indices) { + TF_RET_CHECK(params_info->IsPerReplicaArg(index) || + params_info->IsDistributedArg(index)) + << "Mirrored variables not categorized as per-replica arguments, " + "index: " + << index; + params_info->mutable_mirrored_variable_indices()->insert(index); + } + } + return Status::OK(); +} + +/* static */ Status DistributedTPURewritePass::BuildSequencingNodes( + const string& tpu_compilation_device, const Node& replicate_node, + Graph* graph, Node** host_transfer_sequencer, Node** control_before, + Node** control_after) { + *host_transfer_sequencer = nullptr; + + TF_RETURN_IF_ERROR( + BuildNoopNode(replicate_node, + graph->NewName(strings::StrCat(replicate_node.name(), "/", + "control_before")), + /*device=*/"", graph, control_before)); + for (const Edge* e : replicate_node.in_edges()) { + if (!e->IsControlEdge()) { + continue; + } + Node* predecessor = e->src(); + if (predecessor->IsSource()) continue; + if (predecessor->type_string() == "NoOp" && + predecessor->attrs().Find("_xla_host_transfer_sequencer") != nullptr) { + // The node is the sequencer for host transfer operations. Its control + // dependency needs to be placed after the execute node, not before. + if (*host_transfer_sequencer != nullptr) { + return errors::Internal("Replicate node ", replicate_node.name(), + " has two transfer sequencer nodes: ", + (*host_transfer_sequencer)->name(), " and ", + predecessor->name()); + } + // Set the correct device to match the other sequencing nodes. + predecessor->set_assigned_device_name(tpu_compilation_device); + *host_transfer_sequencer = predecessor; + } else { + graph->AddControlEdge(predecessor, *control_before); + } + } + + TF_RETURN_IF_ERROR( + BuildNoopNode(replicate_node, + graph->NewName(strings::StrCat(replicate_node.name(), "/", + "control_after")), + /*device=*/tpu_compilation_device, graph, control_after)); + for (Node* successor : replicate_node.out_nodes()) { + if (successor->attrs().Find("_xla_tail_outside_compilation") != nullptr) { + graph->AddControlEdge(successor, *control_after); + } else { + graph->AddControlEdge(*control_after, successor); + } + } + return Status::OK(); +} + +/* static */ Status DistributedTPURewritePass::DealWithConstantsAndVariables( + const Node& replicate_node, const NameRangeMap& input_name_map, + Graph* graph, Node* host_transfer_sequencer, Node* control_before, + Node* control_after, absl::Span variable_nodes, + std::vector* guaranteed_constant_nodes, + std::vector* variable_reads) { + TF_RETURN_IF_ERROR(FindGuaranteedConstantInputs( + replicate_node, input_name_map, guaranteed_constant_nodes)); + + TF_RETURN_IF_ERROR(BuildVariableReads(variable_nodes, control_before, graph, + variable_reads)); + // Add the control dependency from host transfer nodes. + if (host_transfer_sequencer != nullptr) { + graph->AddControlEdge(host_transfer_sequencer, control_after); + } + return Status::OK(); +} + +/* static */ Status +DistributedTPURewritePass::BuildCompilationStatusReturnNodes( + Node* replicate_node, Node* compile_node, Node** control_after_compilation, + Graph* graph) { + const Edge* compilation_edge = nullptr; + for (const auto* e : replicate_node->out_edges()) { + if (e->IsControlEdge() && + e->dst()->type_string() == "TPUCompilationResult") { + TF_RET_CHECK(compilation_edge == nullptr) + << "Multiple compilation result nodes attached to the same replicate " + "cluster."; + compilation_edge = e; + } + } + + // TODO(jpienaar): This should be checked by default, current tests not using + // this are ones that use the "abort upon successful compile flag" which will + // be removed. Leaving this in until then. + if (compilation_edge != nullptr) { + Node* compilation_status = compilation_edge->dst(); + const AttrValue* compile_status_cluster_attr = + compilation_status->attrs().Find(kTPUCompilationResultAttr); + TF_RET_CHECK(compile_status_cluster_attr != nullptr); + const string& compile_status_cluster = compile_status_cluster_attr->s(); + TF_RET_CHECK(!compile_status_cluster.empty()); + const AttrValue* replicate_cluster_attr = + replicate_node->attrs().Find(kTPUReplicateAttr); + TF_RET_CHECK(replicate_cluster_attr != nullptr); + const string& replicate_cluster = replicate_cluster_attr->s(); + TF_RET_CHECK(!replicate_cluster.empty()); + TF_RET_CHECK(compile_status_cluster == replicate_cluster); + + TF_RETURN_IF_ERROR( + ReplaceCompilationResultNodeWithIdentity(graph, &compilation_status)); + graph->AddEdge(compile_node, 0, compilation_status, 0); + } + + NodeDef def; + def.set_name(UniqueNodeName("tpu_compile_succeeded_assert", graph)); + // Create an op to assert that compilation succeeded. The alternative would + // have been to have each execute op check and return an error. + def.set_op("TPUCompileSucceededAssert"); + MergeDebugInfo(NodeDebugInfo(replicate_node->def()), &def); + Status status; + Node* compile_succeeded = graph->AddNode(def, &status); + compile_succeeded->set_assigned_device_name( + compile_node->assigned_device_name()); + TF_RETURN_IF_ERROR(status); + graph->AddEdge(compile_node, 0, compile_succeeded, 0); + + // Build a sequencing node for when compilation has completed. + TF_RETURN_IF_ERROR( + BuildNoopNode(*replicate_node, + graph->NewName(strings::StrCat(compile_node->name(), "/", + "after_compilation")), + /*device=*/"", graph, control_after_compilation)); + graph->AddControlEdge(compile_succeeded, *control_after_compilation); + + return Status::OK(); +} + +// Updates the head and tail outside compiled nodes so that nodes have the +// correct device and removes the replication and outside compilation attributes +// so that these nodes do not trigger further graph optimization passes. +/* static */ Status DistributedTPURewritePass::UpdateHeadTailOutsideCompilation( + const std::vector>& tf_device_assignment, + const std::vector& head_tail_outside_compilation_nodes) { + for (Node* node : head_tail_outside_compilation_nodes) { + int replica_id; + TF_RETURN_IF_ERROR( + GetNodeAttr(node->def(), kXlaReplicaIdAttrName, &replica_id)); + // Since we set the device, this will now run on a task other than 0. We + // clear the two following attributes so that we don't trigger encapsulation + // again on the remote host (which will fail due to a missing + // _TPUReplicateMetadata node for the cluster). + for (const Edge* e : node->in_edges()) { + // Resource consuming ops should colocate with its resource input. + if (e->src()->IsArg() && + e->src()->output_type(e->src_output()) == DT_RESOURCE) { + node->set_requested_device(tf_device_assignment[replica_id][0]); + } + } + if (node->requested_device().empty()) { + string cpu_device; + TF_RETURN_IF_ERROR(DeviceNameUtils::DeviceNameToCpuDeviceName( + tf_device_assignment[replica_id][0], &cpu_device)); + node->set_requested_device(cpu_device); + } + node->ClearAttr(kTPUReplicateAttr); + node->ClearAttr(kOutsideCompilationAttr); + } + return Status::OK(); +} + +/* static */ +Status DistributedTPURewritePass::FingerprintFunctionLibrary( + const FunctionLibraryDefinition& library, uint64* fingerprint) { + // TODO(phawkins): rather than fingerprinting the entire function library, + // consider fingerprinting just the transitive dependencies of a + // computation. + std::string serialized; + FunctionDefLibrary library_proto = library.ToProto(); + if (library_proto.ByteSizeLong() >= 1.5 * 1024 * 1024 * 1024) { + LOG(WARNING) << "Serializing large proto, size: " + << library_proto.ByteSizeLong(); + } + TF_RET_CHECK(SerializeToStringDeterministic(library_proto, &serialized)); + *fingerprint = TpuCompileInterface::Get()->FingerprintString(serialized); + return Status::OK(); +} + +// Performs the rewrite on a single TPUReplicate node. +/* static */ Status DistributedTPURewritePass::RewriteTPUReplicateNode( + const string& session_handle, const DeviceSet& device_set, + Node* replicate_node, FunctionLibraryDefinition* flib_def, + FunctionLibraryRuntime* flr, Node* host_compute_key_placeholder_node, + const OutsideCompilationNodeMap& outside_compilation_nodes, + const std::vector& head_tail_outside_compilation_nodes, + NodeToNodeReplicasMap* outside_compilation_node_images, Graph* graph, + const GraphShapeInfo& shape_info, + TPUReplicateDeviceNamesMapping* tpu_replicate_device_names_mapping, + int64 autotuner_thresh) { + VLOG(2) << "Rewriting node " << replicate_node->name(); + + // num_replicas and num_cores_per_replica are the 'virtual' replicas (copies + // of the computation) and cores (virtual cores within computations) specified + // by the user. They will be mapped to physical TPU cores below. + int num_replicas; + int num_cores_per_replica; + int num_tasks; // Number of tasks. + std::vector> tf_device_assignment; + std::unique_ptr xla_device_assignment; + string tpu_compilation_device; + TF_RETURN_IF_ERROR(GetDeviceTopology( + device_set, *replicate_node, &num_replicas, &num_cores_per_replica, + &num_tasks, &tf_device_assignment, &xla_device_assignment, + &tpu_compilation_device)); + + TF_RETURN_IF_ERROR(UpdateHeadTailOutsideCompilation( + tf_device_assignment, head_tail_outside_compilation_nodes)); + + string replicate; + TF_RETURN_IF_ERROR( + GetNodeAttr(replicate_node->def(), kTPUReplicateAttr, &replicate)); + tpu_replicate_device_names_mapping->emplace(replicate, tf_device_assignment); + + NameRangeMap input_name_map; + const NameAttrList* function; + std::unique_ptr computation; + DataTypeVector arg_types, retval_types; + ParameterInfo params_info; + TF_RETURN_IF_ERROR(GetIOTypes(num_replicas, *replicate_node, flr, graph, + &input_name_map, &function, &computation, + &arg_types, &retval_types, ¶ms_info)); + + std::vector arg_shapes, retval_shapes; + TF_RETURN_IF_ERROR(GetArgAndRetvalShapes( + shape_info, *replicate_node, params_info, &arg_shapes, &retval_shapes)); + + TF_RETURN_IF_ERROR(ValidateCoreNumbers(*computation, num_cores_per_replica)); + + std::vector arg_sharding; + std::vector arg_fast_mem; + std::vector retval_sharding; + TF_RETURN_IF_ERROR(AssignArgsAndRetvalsToCores( + num_cores_per_replica, params_info, arg_types, arg_shapes, retval_types, + retval_shapes, *computation, replicate_node, flr, &arg_sharding, + &arg_fast_mem, &retval_sharding)); + + VLOG(1) << DumpGraphToFile("distributed_tpu_graph_to_replicate", *computation, + flib_def); + + GraphDef graph_def; + graph->ToGraphDef(&graph_def); + FunctionLibraryDefinition reachable_functions = + flib_def->ReachableDefinitions(graph_def); + uint64 library_fingerprint; + + TF_RETURN_IF_ERROR( + FingerprintFunctionLibrary(reachable_functions, &library_fingerprint)); + VLOG(1) << "Fingerprint functions: " + << absl::StrJoin(reachable_functions.ListFunctionNames(), ", "); + VLOG(1) << "library_fingerprint: " << library_fingerprint; + + // Builds trigger nodes that put barriers around the expansion of + // TPUReplicate. In particular, we must guarantee: + // a) variable reads happen after all predecessors of the original + // TPUReplicate. + // b) variable writes happen before all successors of the original + // TPUReplicate. + // c) all replicas execute, even if output tensors are only requested from + // a subset of replicas. This is necessary both to ensure that variable + // updates happen, but also Send/Recv will deadlock if only one half of + // the communicating pair runs. + Node* host_transfer_sequencer; + Node* control_before; + Node* control_after; + TF_RETURN_IF_ERROR(BuildSequencingNodes( + tpu_compilation_device, *replicate_node, graph, &host_transfer_sequencer, + &control_before, &control_after)); + + // Build a vector of variable nodes that are inputs. + std::vector variable_inputs; + TF_RETURN_IF_ERROR( + FindVariableInputs(*replicate_node, input_name_map, &variable_inputs)); + + std::vector guaranteed_constant_nodes; + std::vector variable_reads; + TF_RETURN_IF_ERROR(DealWithConstantsAndVariables( + *replicate_node, input_name_map, graph, host_transfer_sequencer, + control_before, control_after, variable_inputs, + &guaranteed_constant_nodes, &variable_reads)); + + // Builds Shape nodes that compute the dynamic shapes of arguments whose + // shapes are not statically known. + std::vector dynamic_shape_nodes; + TF_RETURN_IF_ERROR(BuildDynamicShapeNodes(*replicate_node, arg_shapes, + params_info, variable_reads, graph, + &dynamic_shape_nodes)); + + // Builds a TPUCompile node that compiles `clusters` on `compile_device`. + Node* compile_node; + TF_RETURN_IF_ERROR(BuildCompileNode( + replicate_node, *function, library_fingerprint, params_info, arg_shapes, + arg_types, guaranteed_constant_nodes, session_handle, arg_sharding, + arg_fast_mem, retval_sharding, num_cores_per_replica, + /*compile_device=*/tpu_compilation_device, xla_device_assignment.get(), + dynamic_shape_nodes, graph, &compile_node, autotuner_thresh)); + + // Compilation must be sequenced after the control node if the TPU computation + // in a control-flow construct, such as a loop. + graph->AddControlEdge(control_before, compile_node); + + Node* control_after_compilation; + TF_RETURN_IF_ERROR(BuildCompilationStatusReturnNodes( + replicate_node, compile_node, &control_after_compilation, graph)); + + std::vector variable_writes; + TF_RETURN_IF_ERROR(BuildExecuteNodes( + params_info, num_tasks, num_cores_per_replica, *replicate_node, arg_types, + arg_shapes, retval_types, arg_sharding, retval_sharding, + tf_device_assignment, compile_node, variable_reads, + control_after_compilation, control_after, &variable_writes, graph)); + bool contains_resource_write_op = + ContainsResourceWriteOp(*graph, reachable_functions); + + VLOG(2) << "contains_resource_write_op: " << contains_resource_write_op; + // Skip conditional write if there is no resource writing op inside TPU + // computation. + if (contains_resource_write_op) { + TF_RETURN_IF_ERROR(BuildVariableWrites(variable_inputs, control_after, + variable_writes, graph)); + } + + if (host_compute_key_placeholder_node != nullptr) { + TF_RETURN_IF_ERROR(ConnectHostComputeNodes( + compile_node, host_compute_key_placeholder_node, graph)); + } + + HostComputeCoreMap host_compute_core; + TF_RETURN_IF_ERROR(ParseHostComputeCores( + *replicate_node, outside_compilation_nodes, &host_compute_core)); + TF_RETURN_IF_ERROR(ReplicateOutsideCompilationNodes( + tf_device_assignment, host_compute_core, outside_compilation_nodes, + outside_compilation_node_images, graph)); + + graph->RemoveNode(replicate_node); + return Status::OK(); +} + +// Adds sharded weight update optimization for each host training loop. +// +// For any host training loop found in the graph, TPUVariableReshard ops +// are inserted to match the best layout chosen by the XLA. +/* static */ Status +DistributedTPURewritePass::PerformHostTrainingLoopOptimization( + Graph* graph, FunctionLibraryDefinition* flib_def, + FunctionLibraryRuntime* flr) { + std::vector host_training_loops_info; + Status s = tpu::DetectHostTrainingLoop( + /*current_function_name=*/nullptr, + /*current_function_attr=*/nullptr, flib_def, graph, flr, + &host_training_loops_info); + if (!s.ok()) { + VLOG(2) << "No valid host training loop found. Skipping sharded weight " + << "update optimization."; + return Status::OK(); + } + + for (const auto& host_loop : host_training_loops_info) { + const auto& function_name = host_loop.encapsulating_function_name; + // `function_name` has value when host training loop is inside a + // function call node. When host training loop is found inside a function + // call node, then, in addition to adding TPUVariableReshard ops, function + // library definition needs to be updated as well. + if (function_name.has_value()) { + const auto& function_attr = host_loop.encapsulating_function_attrs; + TF_RET_CHECK(function_attr.has_value()) + << "Unable to find function attribute for function: " + << *function_name; + + const FunctionDef* function_def = flib_def->Find(*function_name); + TF_RET_CHECK(function_def) + << "Unable to find function : " << *function_name; + + std::unique_ptr fbody; + TF_RETURN_IF_ERROR(FunctionDefToBodyHelper( + *function_def, AttrSlice(&function_attr.value()), flib_def, &fbody)); + Graph* function_graph = fbody->graph; + TF_RETURN_IF_ERROR(tpu::AddReshardOp(function_graph, host_loop)); + TF_RETURN_IF_ERROR(UpdateFunctionLibDefinition(*function_graph, + *function_name, flib_def)); + } else { + TF_RETURN_IF_ERROR(tpu::AddReshardOp(graph, host_loop)); + } + } + return Status::OK(); +} + +Status DistributedTPURewritePass::PlaceUnassignedDeviceNodesOnTPUIfPossible( + Graph* graph) { + ReverseDFS(*graph, {}, PlaceOpsOnTPU); + return Status::OK(); +} + +Status DistributedTPURewritePass::Run( + const GraphOptimizationPassOptions& options) { + VLOG(1) << "DistributedTPURewritePass::Run"; + + Graph* graph = options.graph->get(); + + VLOG(1) << DumpGraphToFile("distributed_tpu_compilation_before", *graph, + options.flib_def); + + const auto* config = &options.session_options->config; + std::unique_ptr pflr( + new ProcessFunctionLibraryRuntime( + nullptr, options.session_options->env, config, + graph->versions().producer(), options.flib_def, + config ? config->graph_options().optimizer_options() + : OptimizerOptions())); + + FunctionLibraryRuntime* flr = + pflr->GetFLR(ProcessFunctionLibraryRuntime::kDefaultFLRDevice); + + // This pass can only run in the session master, which should fill + // in the device_set field to the options. + TF_RET_CHECK(options.device_set != nullptr); + + // Find all the replicate nodes before mutating the graph. + std::vector replicate_nodes; + // Map from compiled subgraph cluster name to the outside_compilation nodes in + // that cluster. + std::map outside_compilation_nodes; + std::map> head_tail_outside_compilation_nodes; + TF_RETURN_IF_ERROR(FindTaggedNodes(graph, &replicate_nodes, + &outside_compilation_nodes, + &head_tail_outside_compilation_nodes)); + + if (replicate_nodes.empty()) { + // Remove unused TPUPartitionedInput nodes. + for (Node* n : graph->nodes()) { + if (n->type_string() == kTPUPartitionedInput) graph->RemoveNode(n); + } + return Status::OK(); + } + + std::unordered_map host_compute_key_placeholder_map; + TF_RETURN_IF_ERROR(FindHostComputeKeyPlaceholderNodes( + graph, replicate_nodes, &host_compute_key_placeholder_map)); + + GraphShapeInfo shape_info; + TF_RETURN_IF_ERROR(InferShapes(graph, /*arg_shapes=*/{}, + flr->GetFunctionLibraryDefinition(), + &shape_info)); + int64 autotuner_thresh = options.session_options->config.experimental() + .xla_fusion_autotuner_thresh(); + + NodeToNodeReplicasMap outside_compilation_node_images; + TPUReplicateDeviceNamesMapping tpu_replicate_device_names_mapping; + for (Node* node : replicate_nodes) { + TF_RETURN_IF_ERROR(RewriteTPUReplicateNode( + options.session_handle, *options.device_set, node, options.flib_def, + flr, host_compute_key_placeholder_map[node->name()], + outside_compilation_nodes[node->name()], + head_tail_outside_compilation_nodes[node->name()], + &outside_compilation_node_images, graph, shape_info, + &tpu_replicate_device_names_mapping, autotuner_thresh)); + } + + // Place the padding nodes generated by dynamic padder on the correct devices. + // TODO(rxsang): Place padding ops on TPUs in + // PlaceUnassignedDeviceNodesOnTPUIfPossible function. + TF_RETURN_IF_ERROR(SetPaddingNodesDevices(graph)); + + std::unordered_map outside_compilation_inputs; + for (Node* n : graph->op_nodes()) { + string lifted_arg_inputs_attr; + if (n->type_string() == "IdentityN" && + GetNodeAttr(n->def(), kXlaOutsideCompilationInputsAttrName, + &lifted_arg_inputs_attr) + .ok()) { + outside_compilation_inputs[lifted_arg_inputs_attr] = n; + } + } + for (const auto& iter : outside_compilation_nodes) { + TF_RETURN_IF_ERROR(ReplicateOutsideCompilationEdges( + iter.second, outside_compilation_node_images, + outside_compilation_inputs, graph)); + } + TF_RETURN_IF_ERROR( + RemoveOutsideCompilationNodes(outside_compilation_node_images, graph)); + TF_RETURN_IF_ERROR(LowerOutsideCompilationFunctionalNodes( + graph, *options.flib_def, tpu_replicate_device_names_mapping)); + + TF_RETURN_IF_ERROR(PlaceUnassignedDeviceNodesOnTPUIfPossible(graph)); + VLOG(1) << DumpGraphToFile("distributed_tpu_compilation_after", *graph, + options.flib_def); + VLOG(1) << "DistributedTPURewritePass::Run() finished"; + + if (enable_cross_replica_sharding_mirrored_variables_) { + VLOG(1) << "Starting host training loop optimization."; + VLOG(1) << DumpGraphToFile("host_loop_optimization_before", *graph, + options.flib_def); + TF_RETURN_IF_ERROR( + PerformHostTrainingLoopOptimization(graph, options.flib_def, flr)); + VLOG(1) << DumpGraphToFile("host_loop_optimization_after", *graph, + options.flib_def); + VLOG(1) << "Host training loop optimization finished."; + } + + return Status::OK(); +} + +bool DistributedTPURewritePass::distribute_vars_ = false; +bool DistributedTPURewritePass:: + replicate_inputs_outputs_by_default_for_xla_spmd_ = false; +bool DistributedTPURewritePass:: + enable_cross_replica_sharding_mirrored_variables_ = true; +bool DistributedTPURewritePass::enable_automatic_model_parallelism_ = false; + +/*static*/ void DistributedTPURewritePass::SetDistributedTpuRewritePassOptions( + bool distribute_vars, bool replicate_inputs_outputs_by_default_for_xla_spmd, + bool enable_cross_replica_sharding_mirrored_variables, + bool enable_automatic_model_parallelism) { + distribute_vars_ = distribute_vars; + replicate_inputs_outputs_by_default_for_xla_spmd_ = + replicate_inputs_outputs_by_default_for_xla_spmd; + enable_cross_replica_sharding_mirrored_variables_ = + enable_cross_replica_sharding_mirrored_variables; + enable_automatic_model_parallelism_ = enable_automatic_model_parallelism; +} + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h new file mode 100644 index 00000000000..52fae7a7c13 --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h @@ -0,0 +1,589 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Rewrites TPUReplicate nodes into replicated computations on TPU. +// +// To represent a distributed TPU computation, we use the +// TPUReplicate operator, that describes a subgraph (represented as a +// Tensorflow function) to replicate across a TPU pod. +// +// Model parallelism and data parallelism: +// --------------------------------------- +// We support two different kinds of parallelism on TPU: +// * data parallelism (replication), or parallelization across batches, and +// * model parallelism, or parallelization within a batch. +// +// The function passed to a TPUReplicate operator is replicated many +// times across a TPU pod (data parallelism). The `num_replicas` attribute +// controls how many replicas of the computation to create. Replicas are mostly +// independent; replicas can only communicate using the CrossReplicaSum +// operator, which is typically used to communicate gradients during training. +// +// Each replica may optionally use more than one TPU core (model +// parallelism). The `num_cores_per_replica` attribute controls how many cores +// there are per replica. For each core, there is a virtual TPU_REPLICATED_CORE +// device that is only valid within replicated TPU computations (e.g., +// TPU_REPLICATED_CORE:0, TPU_REPLICATED_CORE:1, etc.); each TPU_REPLICATED_CORE +// device corresponds to one TPU core in every replica. +// Each replica has runs its own copy of the computation assigned to each +// TPU_REPLICATED_CORE device. +// +// The Python code is responsible for providing a device_assignment that +// describes how the replicated logical cores map to physical cores on the TPU +// topology. +// +// Inputs to TPUReplicate: +// ------------------------------ +// The TPUReplicate operator takes three kinds of inputs, in the +// following order: +// * per-replica inputs. If there are three per-replica inputs (A, B, C) and two +// replicas, the first six arguments to TPUReplicate will be: +// A0 B0 C0 A1 B1 C1 +// where Ai is the A input to the i-th replica. +// * distributed inputs. These inputs follow the per-replica inputs. +// If there are two distributed inputs (E, F) and two replicas, the following +// arguments to TPUReplicate will be: E F. +// But there is local E and F on each replica. +// * broadcast inputs. These inputs follow the distributed inputs. All +// replicas receive a copy of each of these inputs. +// * variables. Resource variables accessed by the computation follow the +// broadcast inputs. +// +// For example, for a computation with two replicas, three per-replica inputs +// (A, B, C), two distributed inputs(E, F), two broadcast inputs (X, Y), and two +// variables (V, W), the arguments to TPUReplicate will be: +// A0 B0 C0 A1 B1 C1 E F X Y V W +// and each replica will receive the following arguments: +// A B C E F X Y V W +// +// Distributed TPU compilation requires that the shapes of all operators +// be known statically at compilation time, before any nodes have executed. +// Shapes are determined using shape information emitted by InferShapes. It +// is not possible to replicate Tensorflow operators with unknown or dynamic +// shapes for TPU at present. +// +// Graph rewrite: +// -------------- +// Compilation replaces TPUReplicate operators with: +// * a single TPUCompile node that compiles the computations, +// * one TPUExecute node for each TPU device in the system that +// executes the relevant computation, +// * one ReadVariableOp for each variable accessed by the replicated +// computation, +// * one AssignVariableOp for each variable accessed by the replicated +// computation. An assignment is built even if a variable is only read by the +// computation. We do not know which variables are written until we apply the +// XlaCompiler to the computation, but that does not happen until after the +// rewrite. Conservatively, we write back the values of all variables after +// the computation completes. +// TODO(phawkins): only write back variables that the computation may write. +// * one Shape node for each Tensor or Variable input to the computation whose +// shape is not statically known at rewrite time. The input shapes are fed +// to the TPUCompile node. +// +// To ensure that the reads and writes seem to happen at the right time in the +// graph execution, we add control edges from all predecessors of the original +// TPUReplicate operator to each of the ReadVariableOp operators. +// Similarly, we add control edges from all of the AssignVariableOp operators to +// all of the successors of the TPUReplicate operator. +// +// The TPUReplicate rewrite must run before placement, since resource +// variable inputs will have DT_RESOURCE, which cannot be sent across devices, +// leading to objections from the placer. The rewrite rewrites the resource +// accesses into explicit ReadVariableOp and AssignVariableOp operators that the +// placer is free to colocate with the variables. + +#ifndef TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_REWRITE_PASS_H_ +#define TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_REWRITE_PASS_H_ + +#include +#include + +#include "absl/container/node_hash_map.h" +#include "absl/types/span.h" +#include "tensorflow/compiler/jit/shape_inference.h" +#include "tensorflow/compiler/xla/service/computation_placer.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/stream_executor/tpu/tpu_topology.h" + +namespace tensorflow { + +// Replaces clusters assigned to TPU_SYSTEM devices with +// TPUCompile and TPUExecute nodes assigned to the corresponding +// TPU devices. +class DistributedTPURewritePass : public GraphOptimizationPass { + public: + static void SetDistributedTpuRewritePassOptions( + bool distribute_vars, + bool replicate_inputs_outputs_by_default_for_xla_spmd, + bool enable_cross_replica_sharding_mirrored_variables, + bool enable_automatic_model_parallelism); + + Status Run(const GraphOptimizationPassOptions& options) override; + + // The following methods are public only for the use of unit tests. + + // See comment at the top of the file for how the inputs are ordered. + // Encapsulates the different TPU replicated node input and output + // information, and provide common APIs over them. + class ParameterInfo { + public: + ParameterInfo() {} + ParameterInfo(int64 num_replicas, int64 num_per_replica_args, + int64 num_distributed_args, int64 num_broadcast_args, + int64 num_variables, int64 num_guaranteed_constants, + int64 num_retvals_per_replica) + : num_replicas_(num_replicas), + num_per_replica_args_(num_per_replica_args), + num_distributed_args_(num_distributed_args), + num_broadcast_args_(num_broadcast_args), + num_variables_(num_variables), + num_guaranteed_constants_(num_guaranteed_constants), + num_retvals_per_replica_(num_retvals_per_replica) {} + + int64 NumReplicas() const { return num_replicas_; } + + int64 NumPerReplicaArgs() const { return num_per_replica_args_; } + + int64 NumDistributedArgs() const { return num_distributed_args_; } + + int64 NumBroadcastArgs() const { return num_broadcast_args_; } + + int64 NumVariables() const { return num_variables_; } + + int64 NumGuaranteedConstants() const { return num_guaranteed_constants_; } + + int64 NumRetvalsPerReplica() const { return num_retvals_per_replica_; } + + bool IsPerReplicaArg(int64 index) const { + return index < num_per_replica_args_; + } + + bool IsDistributedArg(int64 index) const { + return index >= num_per_replica_args_ && + index < (num_per_replica_args_ + num_distributed_args_); + } + + bool IsBroadcastArg(int64 index) const { + return index >= num_per_replica_args_ && + index < (num_per_replica_args_ + num_distributed_args_ + + num_broadcast_args_); + } + + bool IsVariableArg(int64 index) const { + return index >= (num_per_replica_args_ + num_broadcast_args_) && + index < (num_per_replica_args_ + num_distributed_args_ + + num_broadcast_args_ + num_variables_); + } + + bool IsConstantArg(int64 index) const { + return index >= (num_per_replica_args_ + num_distributed_args_ + + num_broadcast_args_ + num_variables_) && + index < (num_per_replica_args_ + num_distributed_args_ + + num_broadcast_args_ + num_variables_ + + num_guaranteed_constants_); + } + + // Returns the number of inputs which has been received by the host. + int64 NumInputsFromHost() const { + return num_replicas_ * num_per_replica_args_ + num_distributed_args_ + + num_broadcast_args_ + num_variables_ + num_guaranteed_constants_; + } + + // Returns the number of inputs which will be sent to each replica. + int64 NumInputsToEachReplica() const { + return num_per_replica_args_ + num_distributed_args_ + + num_broadcast_args_ + num_variables_ + num_guaranteed_constants_; + } + + // Returns the total number of output values returned to the host (for all + // replicas). + int64 NumOutputsToHost() const { + return num_replicas_ * num_retvals_per_replica_; + } + + // Returns the position of the first per-replica argument, within the set + // of all hosts arguments. + // Broadcast arguments follow the distributed arguments. + int64 FirstBroadcastArgFromHost() const { + return num_replicas_ * num_per_replica_args_ + num_distributed_args_; + } + + // Indices of mirrored variables across replicas, which should be + // categorized as per_replica_args. + const std::set& mirrored_variable_indices() const { + return mirrored_variable_indices_; + } + std::set* mutable_mirrored_variable_indices() { + return &mirrored_variable_indices_; + } + + private: + int64 num_replicas_ = 1; + int64 num_per_replica_args_ = 0; + int64 num_distributed_args_ = 0; + int64 num_broadcast_args_ = 0; + int64 num_variables_ = 0; + int64 num_guaranteed_constants_ = 0; + int64 num_retvals_per_replica_ = 0; + std::set mirrored_variable_indices_; + }; + + // Mapping from TPUReplicate cluster name to tpu device names. Value is a + // mapping from [replica][core] to a TF device name. + typedef absl::flat_hash_map>> + TPUReplicateDeviceNamesMapping; + + // Determines which devices to use to run the computation. + // Inputs: + // * num_tpus_per_task: the number of TPU devices attached to each task + // * tpu_devices: a [task][device] collection of TPU devices + // * num_replicas: the number of replicas requested + // * num_cores_per_replica: the number of cores in each computation instance + // * topology_attr: the topology TPUReplicate attribute + // * device_assignment_attr: the device_assignment TPUReplicate attribute + // Outputs: + // * tf_device_assignment: a mapping from [replica][core] to a TF device name + // * xla_device_assignment: a mapping from [replica][core] to a linearized TPU + // coordinate. + // TODO(phawkins): change tf_device_assignment to an xla::Array2D. + static Status BuildDeviceAssignment( + const tpu::TpuTopologyExternal& topology, int num_tpus_per_task, + const std::vector>& tpu_devices, int num_replicas, + int num_cores_per_replica, const string& topology_attr, + absl::Span device_assignment_attr, + std::vector>* tf_device_assignment, + std::unique_ptr* xla_device_assignment); + + // Returns the `computation` graph attached to TPUReplicate operator + // `node`. `flr` is a FunctionLibraryRuntime to use when + // instantiating the function body. Sets `*arg_types` and + // `*retval_types` to the argument/return types of the function. + static Status GetComputationForTPUReplicateOp(const NameAttrList& function, + FunctionLibraryRuntime* flr, + Graph* computation, + DataTypeVector* arg_types, + DataTypeVector* retval_types); + + // Returns the shapes of the argument tensors and return values of the + // TPUReplicate operator `node` using the _output_shapes, + // _output_handle_shapes, and _output_handle_types annotations on the input + // nodes. Expects inputs in the following order (see comment at top of file): + // * num_replicas * num_per_replica_args per-replica inputs, + // * num_broadcast_args broadcast inputs, + // * num_variables variable inputs. + // Returns an error if the input shapes to `node` are not statically known. + // Also verifies that all replicas have identical input shapes for their + // per-replica inputs. + static Status GetArgAndRetvalShapes( + const GraphShapeInfo& shape_info, const Node& node, + const ParameterInfo& params_info, std::vector* arg_shapes, + std::vector* retval_shapes); + + // Assigns arguments and return values to cores. The assignment is represented + // as an XLA op sharding, so that an argument can be replicated across cores. + // `arg_sharding` and `retval_sharding` are vectors of shardings indexed by + // argument/retval number. + // `arg_fast_mem` is vector of fast_mem indication which is indexed by + // argument number. + static Status AssignArgsAndRetvalsToCores( + int num_cores_per_replica, const ParameterInfo& params_info, + const DataTypeVector& arg_types, + const std::vector& arg_shapes, + const DataTypeVector& retval_types, + const std::vector& retval_shapes, const Graph& graph, + const Node* replicate_node, FunctionLibraryRuntime* flr, + std::vector<::xla::OpSharding>* arg_sharding, + std::vector* arg_fast_mem, + std::vector<::xla::OpSharding>* retval_sharding); + + // Computes a fingerprint of the contents of `library`. + static Status FingerprintFunctionLibrary( + const FunctionLibraryDefinition& library, uint64* fingerprint); + + // Populates `*variables` with the "variables" inputs to `index`-th output of + // `node`. + struct VariableInput { + Node* node; + int index; + + // Type of the variable's value. Note that this is different to the type of + // the output of 'variable', which is always DT_RESOURCE. + DataType dtype; + }; + static Status FindVariableInputs(const Node& node, + const NameRangeMap& input_range_map, + std::vector* variables); + + // Populates '*guaranteed_constants' with the "guaranteed_constants" inputs + // to 'node'. + static Status FindGuaranteedConstantInputs( + const Node& node, const NameRangeMap& input_range_map, + std::vector* guaranteed_constants); + + // Builds Shape nodes that compute the shapes of arguments whose shapes are + // not statically known. + static Status BuildDynamicShapeNodes( + const Node& replicate_node, const std::vector& arg_shapes, + const ParameterInfo& params_info, + const std::vector& variable_reads, Graph* graph, + std::vector* dynamic_shape_nodes); + + // Builds a TPUCompile node that compiles the computation in + // `function_names`. calls `nodes`. + // TODO(b/33943292): at present, for model parallelism with Send/Recv to work + // the `nodes` must correspond to the computations assigned to TPU:0, + // TPU:1, ... in order since XLA hard-codes the chip IDs in the generated + // executables. + static Status BuildCompileNode( + const Node* replicate_node, const NameAttrList& function, + uint64 library_fingerprint, const ParameterInfo& params_info, + const std::vector& arg_shapes, + const DataTypeVector& arg_types, + const std::vector& guaranteed_constant_nodes, + const string& session_handle, + const std::vector<::xla::OpSharding>& arg_sharding, + const std::vector& arg_fast_mem, + const std::vector<::xla::OpSharding>& retval_sharding, + int num_cores_per_replica, const string& compile_device, + const xla::DeviceAssignment* xla_device_assignment, + const std::vector& dynamic_shape_nodes, Graph* graph, + Node** compile_node, int64 autotuner_thresh); + + // Builds a TPUCompileSucceededAssert node that verifies that compilation + // succeeded and replaces the TPUCompilationStatus node in the graph. + static Status BuildCompilationStatusReturnNodes( + Node* replicate_node, Node* compile_node, + Node** control_after_compilation, Graph* graph); + + // Builds ReadVariableOp nodes that read `variables`, with a control + // edges that ensure they happen after `control_predecessor`. + static Status BuildVariableReads(absl::Span variables, + Node* control_predecessor, Graph* graph, + std::vector* variable_reads); + + // Returns true if graph or functions contain resource write op, otherwise + // return false. + // TODO(b/137048563): Recognize unused resource rewrite op. + static bool ContainsResourceWriteOp(const Graph& graph, + const FunctionLibraryDefinition& fld); + // Struct that describes a variable value to be written back from TPUExecute. + struct VariableWrite { + // A node:output pair containing a boolean tensor that determines whether + // the value should be written back. + Node* predicate; + int predicate_output; + + // A node:output pair containing the value to be written back. + Node* value; + int value_output; + }; + + // Builds AssignVariableOp nodes that write `variables` with the values from + // `variable_writes`, with control edges that ensure the writes happen before + // `control_successor`. + static Status BuildVariableWrites( + absl::Span variables, Node* control_successor, + absl::Span variable_writes, Graph* graph); + + // Builds TPUExecute operators assigned to each TPU device + // involved in the computation. + // Arguments: + // * `params_info` is the structure containing the information about the + // TPUReplicate node inputs and outputs. + // * `num_tasks` is the number of TensorFlow tasks in the slice. + // * `num_cores_per_replica` is the number of cores which are dedicated to + // each replica. + // * `replicate_node` is the original TPUReplicate node. + // * `arg_types` are the types of the arguments to the computation function + // passed as argument to TPUReplicate, including per-replica, + // broadcast, and variable arguments. + // * `arg_shapes` are the corresponding shapes (and handle types/shapes, if + // applicable). + // * `arg_shardings` and `retval_shardings` are mappings from + // arguments/return indices to shardings, as returned by + // `AssignArgsAndRetvalsToCores`. + // * `pod_devices` lists the devices to assign to each core of each replica. + // * `variable_reads` is a vectors of ReadVariableOp operators, one for each + // variable argument to the computation. + // * The execute operators will have a control edge from + // `control_predecessor` and another control edge to `control_successor`. + // Populates '*variable_writes' with information about variable values to + // write back. + static Status BuildExecuteNodes( + const ParameterInfo& params_info, int num_tasks, + int num_cores_per_replica, const Node& replicate_node, + const DataTypeVector& arg_types, + const std::vector& arg_shapes, + const DataTypeVector& retval_types, + const std::vector<::xla::OpSharding>& arg_shardings, + const std::vector<::xla::OpSharding>& retval_shardings, + const std::vector>& tpu_device_names, + Node* compile_node, const std::vector& variable_reads, + Node* control_predecessor, Node* control_successor, + std::vector* variable_writes, Graph* graph); + + // Connects the compile node to all the host transfer nodes, and removes the + // key placeholder node that was previously standing in for it. + // Arguments: + // * `compile_node` is the TPUCompile node that has been added to the graph. + // * `key_placeholder_node` is the placeholder node to send the key to all the + // host + // * transfer nodes in the original graph. + // * `graph` is the graph being rewritten. + static Status ConnectHostComputeNodes(Node* compile_node, + Node* key_placeholder_node, + Graph* graph); + + // Map from a Node in an outside_compilation cluster in the original graph to + // the list of Nodes, one for each replica, that it is expanded into during + // replication. + typedef absl::node_hash_map> NodeToNodeReplicasMap; + + // Map from the name of an outside_compilation cluster to the model-parallel + // core index that the HostCompute Op should be placed on in that cluster. + typedef std::map HostComputeCoreMap; + + // Map from the name of an outside_compilation cluster to the list of Nodes + // that should run on the host for that cluster. + typedef std::map> OutsideCompilationNodeMap; + + // Copies the outside_compilation nodes in a cluster to create replica + // replica_index. + static Status CopyOutsideCompilationNodes( + int replica_index, const std::vector& outside_compilation_nodes, + const DeviceNameUtils::ParsedName& tpu_device, + const DeviceNameUtils::ParsedName& partial_device, + NodeToNodeReplicasMap* node_images, Graph* graph); + + // Replicates all the nodes in outside_compilation clusters in a compiled + // computation. + static Status ReplicateOutsideCompilationNodes( + const std::vector>& tf_device_assignment, + const HostComputeCoreMap& host_compute_core, + const OutsideCompilationNodeMap& outside_compilation_nodes, + NodeToNodeReplicasMap* node_images, Graph* graph); + + // Lifts the edges between original outside_compilation nodes in a cluster + // onto their replicas. + static Status CopyOutsideCompilationEdges( + const std::vector& outside_compilation_nodes, + const NodeToNodeReplicasMap& node_images, + const std::unordered_map outside_compilation_inputs, + Graph* graph); + + // Lifts all the edges in outside_compilation clusters in a compiled + // computation to their replicas. + static Status ReplicateOutsideCompilationEdges( + const OutsideCompilationNodeMap& outside_compilation_nodes, + const NodeToNodeReplicasMap& node_images, + const std::unordered_map outside_compilation_inputs, + Graph* graph); + + // Removes all the original outside_compilation nodes from the graph, + // following replication. + static Status RemoveOutsideCompilationNodes( + const NodeToNodeReplicasMap& node_images, Graph* graph); + + // Lowers outside compilation functional nodes (If/While/function call). + // Otherwise, when we have multiple workers, device placer will not be able to + // place nodes if outside compilation has DT_RESOURCE inputs (e.g. a + // DT_RESOURCE input fed into multiple While nodes on different devices). + static Status LowerOutsideCompilationFunctionalNodes( + Graph* g, const FunctionLibraryDefinition& flib_def, + const TPUReplicateDeviceNamesMapping& tpu_replicate_device_names_mapping); + + // Parses the 'host_compute_core' attribute on replicate_node to get the + // replicated core id of each outside_compilation cluster. + static Status ParseHostComputeCores( + const Node& replicate_node, + const OutsideCompilationNodeMap& outside_compilation_nodes, + HostComputeCoreMap* host_compute_core); + + // Gets the physical topology information about the TPU system. + static Status GetDeviceTopology( + const DeviceSet& device_set, const Node& replicate_node, + int* num_replicas, int* num_cores_per_replica, int* num_tasks, + std::vector>* tf_device_assignment, + std::unique_ptr* xla_device_assignment, + string* tpu_compilation_device); + + // Gets the types of args, retvals, and parameters. + static Status GetIOTypes( + int num_replicas, const Node& replicate_node, FunctionLibraryRuntime* flr, + Graph* graph, NameRangeMap* input_name_map, const NameAttrList** function, + std::unique_ptr* computation, DataTypeVector* arg_types, + DataTypeVector* retval_types, ParameterInfo* params_info); + + // Find known constants and deals with variable reads. + static Status DealWithConstantsAndVariables( + const Node& replicate_node, const NameRangeMap& input_name_map, + Graph* graph, Node* host_transfer_sequencer, Node* control_before, + Node* control_after, absl::Span variable_nodes, + std::vector* guaranteed_constant_nodes, + std::vector* variable_reads); + + // Adds NoOp nodes for sequencing computation and variable reads/writes. + static Status BuildSequencingNodes(const string& tpu_compilation_device, + const Node& replicate_node, Graph* graph, + Node** host_transfer_sequencer, + Node** control_before, + Node** control_after); + + // Performs the pass's rewrite on a TPUReplicate node `node`. + static Status RewriteTPUReplicateNode( + const string& session_handle, const DeviceSet& device_set, + Node* replicate_node, FunctionLibraryDefinition* flib_def, + FunctionLibraryRuntime* flr, Node* host_compute_key_placeholder_node, + const OutsideCompilationNodeMap& outside_compilation_nodes, + const std::vector& head_tail_outside_compilation_nodes, + NodeToNodeReplicasMap* outside_compilation_node_images, Graph* graph, + const GraphShapeInfo& shape_info, + TPUReplicateDeviceNamesMapping* tpu_replicate_device_names_mapping, + int64 autotuner_thresh); + + // Performs host training loop optimization. For example, when TPUExecute + // node is inside a while loop, then model weight variables can be sharded + // in XLA preferred layout and then unsharded only at the very last iteration + // to reduce the number of all_gather. + static Status PerformHostTrainingLoopOptimization( + Graph* graph, FunctionLibraryDefinition* flib_def, + FunctionLibraryRuntime* flr); + + // Heuristically place some nodes with unassigned devices on TPUs for + // performance reasons. + static Status PlaceUnassignedDeviceNodesOnTPUIfPossible(Graph* graph); + + // Updates the head and tail outside compiled nodes so that nodes have the + // correct device and removes the replication and outside compilation + // attributes so that these nodes do not trigger further graph optimization + // passes. + static Status UpdateHeadTailOutsideCompilation( + const std::vector>& tf_device_assignment, + const std::vector& head_tail_outside_compilation_nodes); + + private: + static bool distribute_vars_; + static bool replicate_inputs_outputs_by_default_for_xla_spmd_; + static bool enable_cross_replica_sharding_mirrored_variables_; + static bool enable_automatic_model_parallelism_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_REWRITE_PASS_H_ diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.cc b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.cc new file mode 100644 index 00000000000..18b158c0335 --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.cc @@ -0,0 +1,45 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.h" + +#include + +#include "absl/random/random.h" + +namespace tensorflow { +namespace { + +static int64 overridden_node_id = -1; + +} // namespace + +namespace internal { + +void OverrideNodeIdForTesting(const int64 node_id) { + overridden_node_id = node_id; +} + +uint64 GetNodeId() { + if (overridden_node_id > -1) { + return overridden_node_id; + } else { + return absl::Uniform(absl::SharedBitGen(), uint64{0}, + std::numeric_limits::max()); + } +} + +} // namespace internal +} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.h b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.h new file mode 100644 index 00000000000..ce80249c30f --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.h @@ -0,0 +1,38 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_REWRITE_PASS_INTERNAL_H_ +#define TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_REWRITE_PASS_INTERNAL_H_ + +#include "tensorflow/core/framework/types.h" + +namespace tensorflow { + +// Implementation details of distributed_tpu_rewrite_pass.cc, please DO NOT +// depend on these. +namespace internal { + +// When set to a value >= 0, overrides the node_id. Used for getting +// deterministic node_ids during testing. +void OverrideNodeIdForTesting(int64 node_id); + +// Retrieves the node id, used to make some node names unique in the rewrite +// pass. +uint64 GetNodeId(); + +} // namespace internal +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_REWRITE_PASS_INTERNAL_H_ diff --git a/tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.cc b/tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.cc new file mode 100644 index 00000000000..2f4c1c4eabe --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.cc @@ -0,0 +1,629 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.h" + +#include +#include +#include + +#include "absl/container/flat_hash_set.h" +#include "absl/container/node_hash_set.h" +#include "tensorflow/compiler/tf2xla/functionalize_control_flow_util.h" +#include "tensorflow/compiler/tf2xla/tf2xla_util.h" +#include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/gtl/cleanup.h" +#include "tensorflow/core/protobuf/tpu/compile_metadata.pb.h" +#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.h" + +namespace tensorflow { +namespace tpu { + +namespace { + +constexpr char kDefaultShardingValue[] = ""; + +const Edge* FindEdgeConnecting(const Node* src, const Node* dst) { + for (const auto e : src->out_edges()) { + if (e->dst()->name() == dst->name()) return &(*e); + } + return nullptr; +} + +// Contains TPUExecute node and its DT_RESOURCE input nodes that +// correspond to model weights. +struct ExecuteNodeInfo { + Node* execute_node; + std::vector var_inputs; +}; + +// Returns whether `node` is in `execute_nodes` or `(identity) -> execute`. +bool IsExecuteNodeOrIdentityToExecuteNode( + const Graph& graph, const std::unordered_set& loop_nodes, + const absl::flat_hash_set& execute_nodes, Node* node) { + if (execute_nodes.find(node) != execute_nodes.end()) return true; + if (loop_nodes.find(node) == loop_nodes.end()) return false; + if (node->IsNextIteration()) return true; + if (!node->IsIdentity()) return false; + + for (const Edge* e : node->out_edges()) { + if (e->IsControlEdge()) continue; + + Node* node = e->dst(); + if (!IsExecuteNodeOrIdentityToExecuteNode(graph, loop_nodes, execute_nodes, + node)) { + return false; + } + } + + return true; +} + +// From input node to the TPUExecute op, finds the corresponding Enter node +// by searching/traversing nodes in below pattern of nodes: +// Enter ----> (identity) ---> While body input +// Returns nullptr if the Enter node is not found. +xla::StatusOr FindEnterNodeFromTPUExecuteNodeInput(Node* input_node) { + Node* node = input_node; + while (node->IsIdentity()) { + TF_RETURN_IF_ERROR(node->input_node(0, &node)); + } + + if (node->IsEnter()) { + return node; + } + return nullptr; +} + +xla::StatusOr ResourceOnlyUsedForTPUExecuteInLoop( + const Graph& graph, const std::unordered_set& loop_nodes, + const Node* enter_node, const absl::flat_hash_set execute_nodes) { + for (const Edge* output_edge : enter_node->out_edges()) { + Node* output_node = output_edge->dst(); + if (output_edge->IsControlEdge() || output_node->IsExit()) continue; + + // If output node is not execute node, it must be output node + // to the while loop body. + if (!IsExecuteNodeOrIdentityToExecuteNode(graph, loop_nodes, execute_nodes, + output_node)) { + return false; + } + } + return true; +} + +// Given a TPUCompile node, find all TPUExecute nodes that executes the compiled +// program and its model weight variable inputs as well. +// TPUCompileMetadataProto of TPUCompile node must be reset to `new_metadata` +// if new reshard ops are added. +Status ExtractExecuteNodeInfo(const Node* compile_node, const Graph& graph, + const std::unordered_set& loop_nodes, + std::vector* execute_node_info, + TPUCompileMetadataProto* new_metadata) { + string metadata_string; + TF_RETURN_IF_ERROR( + GetNodeAttr(compile_node->attrs(), "metadata", &metadata_string)); + new_metadata->ParsePartialFromString(metadata_string); + if (new_metadata->num_cores_per_replica() != 1) { + // We do not support model parallelism yet. + return Status::OK(); + } + + execute_node_info->clear(); + for (Node* node : compile_node->out_nodes()) { + if (node->type_string() == "TPUExecute") { + execute_node_info->push_back({node}); + } + } + if (execute_node_info->empty()) { + return Status::OK(); + } + TF_RET_CHECK(execute_node_info->size() == new_metadata->num_replicas()) + << "Number of replicas does not equal number of execute nodes: " + << new_metadata->num_replicas() << " vs " << execute_node_info->size(); + DataTypeVector arg_types; + TF_RETURN_IF_ERROR(GetNodeAttr((*execute_node_info)[0].execute_node->attrs(), + "Targs", &arg_types)); + for (int64 i = 0; i < arg_types.size(); ++i) { + if (arg_types[i] != DT_RESOURCE) { + continue; + } + const auto sharding_config = new_metadata->args(i).enable_xla_sharding(); + if (sharding_config != TPUCompileMetadataProto::Arg::TENTATIVE && + sharding_config != TPUCompileMetadataProto::Arg::ALLOWED) { + continue; + } + std::vector edges(execute_node_info->size()); + bool is_supported = true; + std::unordered_map> + enter_to_execute_nodes; + for (int64 j = 0; j < edges.size(); ++j) { + auto execute = (*execute_node_info)[j].execute_node; + TF_RETURN_IF_ERROR(execute->input_edge(i, &edges[j])); + TF_RET_CHECK(edges[j]->src()->output_type(edges[j]->src_output()) == + arg_types[i]) + << "Execute op has an unexpected input type."; + // Traverse backwards to find the Enter node from which the input is + // passed. + // This makes sure that we are checking the usages of all potential + // aliases of the input node as well. + TF_ASSIGN_OR_RETURN(auto enter_node, FindEnterNodeFromTPUExecuteNodeInput( + edges[j]->src())); + if (enter_node == nullptr) { + is_supported = false; + enter_to_execute_nodes.clear(); + break; + } + enter_to_execute_nodes[enter_node].insert(edges[j]->dst()); + } + + for (const auto& it : enter_to_execute_nodes) { + // Size of execute nodes should be either 1 (per-replica variables) or + // num_replicas (distributed variables). + if ((it.second.size() != 1) && + (it.second.size() != new_metadata->num_replicas())) { + is_supported = false; + break; + } + TF_ASSIGN_OR_RETURN(bool no_other_use, + ResourceOnlyUsedForTPUExecuteInLoop( + graph, loop_nodes, it.first, it.second)); + if (!no_other_use) { + is_supported = false; + break; + } + } + + // Add the variable input edges only when they are supported for all + // executes. + if (is_supported) { + for (int64 j = 0; j < edges.size(); ++j) { + (*execute_node_info)[j].var_inputs.push_back(edges[j]); + } + new_metadata->mutable_args(i)->set_enable_xla_sharding( + TPUCompileMetadataProto::Arg::ALLOWED); + } + } + + int64 total = 0; + for (const auto& a : new_metadata->args()) { + if (a.enable_xla_sharding() == TPUCompileMetadataProto::Arg::ALLOWED) { + total++; + } + } + TF_RET_CHECK(total == (*execute_node_info)[0].var_inputs.size()) + << " total " << total << " var_inputs " + << (*execute_node_info)[0].var_inputs.size(); + if (total == 0) { + // We don't need to process anything if no input is added. + execute_node_info->clear(); + } + return Status::OK(); +} + +bool IsTPUCompileOp(const Node& n) { return n.type_string() == "TPUCompile"; } + +void FindTPUCompileNodes( + const std::string* current_function_name, + const AttrValueMap* current_function_attr, + const std::unordered_map& frames, + std::vector* host_training_loops_info) { + // Adds frames with no children (i.e., the innermost frames) to a worklist. + std::deque worklist; + + for (auto& frame : frames) { + if (frame.second.num_children == 0) { + worklist.push_back(&frame.second); + } + } + + // Check TPUCompile node from the innermost while loop to the outermost + // while loop. + while (!worklist.empty()) { + const WhileLoopFrame* frame = worklist.front(); + worklist.pop_front(); + + for (const auto& n : frame->nodes) { + if (!IsTPUCompileOp(*n)) continue; + + HostTrainingLoopInfo host_training_loop_info; + host_training_loop_info.compile_node_name = n->name(); + host_training_loop_info.loop_cond_node_name = frame->loop_cond->name(); + host_training_loop_info.while_loop_name = frame->name; + + for (const auto arg : frame->args) { + LoopArgInfo arg_info; + arg_info.enter_node_name = arg.enter->name(); + if (arg.exit) arg_info.exit_node_name = arg.exit->name(); + + host_training_loop_info.loop_arguments.push_back(std::move(arg_info)); + } + host_training_loop_info.loop_nodes = frame->nodes; + + if (current_function_name) { + host_training_loop_info.encapsulating_function_name = + *current_function_name; + } + if (current_function_attr) { + host_training_loop_info.encapsulating_function_attrs = + *current_function_attr; + } + + host_training_loops_info->emplace_back( + std::move(host_training_loop_info)); + } + + // If the parent has no remaining children, add it to the worklist. + --frame->parent->num_children; + if (frame->parent->num_children == 0) { + worklist.push_back(frame->parent); + } + } +} + +// From while loop cond node, finds all loop exit nodes by searching/traversing +// nodes in below pattern of nodes: +// LoopCond -----> Switch -----> Exit +std::vector FindLoopExitNodes(const Node& loop_cond) { + std::vector loop_exit_nodes; + for (const auto e_cond : loop_cond.out_edges()) { + if (e_cond->IsControlEdge() || !e_cond->dst()->IsSwitch()) continue; + auto switch_node = e_cond->dst(); + + for (const auto e_switch : switch_node->out_edges()) { + if (e_switch->IsControlEdge() || !e_switch->dst()->IsExit()) continue; + + loop_exit_nodes.push_back(e_switch->dst()); + } + } + return loop_exit_nodes; +} + +// Find any one of switch nodes in the while loop by traversing the graph +// from while loop condition node. +xla::StatusOr GetLoopSwitchNode(const Node& loop_cond_node) { + Node* loop_switch_node; + for (auto n : loop_cond_node.out_nodes()) { + if (n->IsSwitch()) { + loop_switch_node = n; + break; + } + } + + TF_RET_CHECK(loop_switch_node->IsSwitch()) + << "Unable to find any switch nodes."; + return loop_switch_node; +} + +// Returns or creates a node in that is executed before each loop iteration +// in the while loop. +Status GetOrCreateBeforeEachIterationNode(Graph* graph, Node* loop_switch_node, + Node** node_out) { + // If while loop switch node already has a outgoing data to true brach + // of the switch op, then reuse that node. + for (const auto out_edge : loop_switch_node->out_edges()) { + if (out_edge->src_output() == 1) { + *node_out = out_edge->dst(); + return Status::OK(); + } + } + + // Create Identity node that represents execution at every loop iteration. + NodeDef at_loop_iteration_nodedef; + at_loop_iteration_nodedef.set_op("Identity"); + DataType dtype; + TF_RETURN_IF_ERROR(GetNodeAttr(loop_switch_node->def(), "T", &dtype)); + + AddNodeAttr("T", dtype, &at_loop_iteration_nodedef); + at_loop_iteration_nodedef.set_name(graph->NewName(strings::StrCat( + "TPUVariableReshard/before_iteration", "/_", internal::GetNodeId()))); + + Status status; + Node* at_loop_iteration_node = + graph->AddNode(at_loop_iteration_nodedef, &status); + TF_RETURN_IF_ERROR(status); + + graph->AddEdge(loop_switch_node, 1, at_loop_iteration_node, 0); + *node_out = at_loop_iteration_node; + return Status::OK(); +} + +// Injects NoOp node in that is executed after the very last iteration +// of the while loop but before the while loop exit node. +Status AddNoOpAfterLastIteration(Graph* graph, Node* loop_switch_node, + Node** node_out) { + // Find the exit node from loop switch node. + Node* exit_node; + for (const auto out_node : loop_switch_node->out_nodes()) { + if (out_node->IsExit()) { + exit_node = out_node; + break; + } + } + + TF_RET_CHECK(exit_node != nullptr) + << "Cannot find exit node connected to switch node :" + << loop_switch_node->name(); + + // Create NoOp that represents execution at the end of while loop + // last iteration. + NodeDef after_last_loop_iteration; + after_last_loop_iteration.set_op("Identity"); + DataType dtype; + TF_RETURN_IF_ERROR(GetNodeAttr(loop_switch_node->def(), "T", &dtype)); + + AddNodeAttr("T", dtype, &after_last_loop_iteration); + after_last_loop_iteration.set_name(graph->NewName(strings::StrCat( + "TPUVariableReshard/last_iteration", "/_", internal::GetNodeId()))); + + Status status; + Node* after_last_iteration_node = + graph->AddNode(after_last_loop_iteration, &status); + TF_RETURN_IF_ERROR(status); + + // Newly created node must be executed once after last iteration of the while + // loop and before while loop exits. + graph->AddEdge(loop_switch_node, 0, after_last_iteration_node, 0); + graph->AddControlEdge(after_last_iteration_node, exit_node); + *node_out = after_last_iteration_node; + return Status::OK(); +} + +} // namespace + +Status DetectHostTrainingLoop( + const std::string* current_function_name, + const AttrValueMap* current_function_attr, + const FunctionLibraryDefinition* library, Graph* graph, + FunctionLibraryRuntime* flr, + std::vector* host_training_loops_info) { + std::vector associated_function_list; + for (const auto* n : graph->nodes()) { + const auto associated_functions = GetAssociatedFunctions(*n, library); + if (associated_functions.empty()) continue; + + associated_function_list.insert(associated_function_list.end(), + associated_functions.begin(), + associated_functions.end()); + } + + Status ret_status = Status::OK(); + for (const auto& function : associated_function_list) { + if (function.type() != AssociatedFunctionInfo::kFunctionAttr) continue; + + // Convert the function to Graph. + FunctionLibraryRuntime::Handle handle; + TF_RETURN_IF_ERROR(flr->Instantiate(function.func_name(), + AttrSlice(&function.attrs()), &handle)); + auto cleanup_handle = gtl::MakeCleanup([&]() { + auto s = flr->ReleaseHandle(handle); + if (!s.ok()) { + ret_status.Update(s); + } + }); + const FunctionBody* body = flr->GetFunctionBody(handle); + Graph* function_graph = body->graph; + TF_RETURN_IF_ERROR(DetectHostTrainingLoop( + &function.func_name(), &function.attrs(), library, function_graph, flr, + host_training_loops_info)); + } + + // BuildControlFlowInfo() requires that the graph's source node is connected + // to all source nodes in the graph. Many graphs violate this invariant. + // As so, add edges to source/sink nodes so that this invariant is kept. + FixupSourceAndSinkEdges(graph); + std::vector cf_info; + TF_RETURN_IF_ERROR( + BuildControlFlowInfo(graph, &cf_info, /*unreachable_nodes=*/nullptr)); + + std::unordered_map frames; + TF_RETURN_IF_ERROR(ExtractWhileLoopFrames(cf_info, graph, &frames)); + FindTPUCompileNodes(current_function_name, current_function_attr, frames, + host_training_loops_info); + return ret_status; +} + +Status AddReshardOp(Graph* graph, const HostTrainingLoopInfo& host_loop_info) { + const auto& compile_node_name = host_loop_info.compile_node_name; + const auto node_name_map = graph->BuildNodeNameIndex(); + const auto node_it = node_name_map.find(compile_node_name); + TF_RET_CHECK(node_it != node_name_map.end()) + << "Unable to find compile node : " << compile_node_name; + + const auto compile_node = node_it->second; + std::vector execute_nodes_info; + + Status status; + TPUCompileMetadataProto metadata; + status = + ExtractExecuteNodeInfo(compile_node, *graph, host_loop_info.loop_nodes, + &execute_nodes_info, &metadata); + if (!status.ok()) { + LOG(ERROR) << "Encountered error when trying to extract execute nodes, " + "skipping host loop optimization. Status: " + << status.ToString(); + return Status::OK(); + } + + if (execute_nodes_info.empty()) { + return Status::OK(); + } + + // Update the TPUCompileMetadata such that sharding config of the + // sharded resource variable inputs is set to ALLOWED instead of + // TENTATIVE. + string new_metadata_string; + metadata.SerializeToString(&new_metadata_string); + compile_node->ClearAttr("metadata"); + compile_node->AddAttr("metadata", new_metadata_string); + + // Unsharding of the model weight variables must happen only at the very + // last loop iteration. As so, add while loop condition predicate as an + // input to the sharding switch node. If loop condition is true, we do not + // unshard. + const auto& cond_node_name = host_loop_info.loop_cond_node_name; + auto loop_cond_node_it = node_name_map.find(cond_node_name); + TF_RET_CHECK(loop_cond_node_it != node_name_map.end()) + << "Cannot find loop condition node : " << cond_node_name; + auto* loop_condition_node = loop_cond_node_it->second; + + // In order to make sure that shard/unshard operations are invoked + // at the start of every loop body and at the end of last iteration + // of the loop, respectively, traverse the graph and find a switch node + // of the host training loop. + TF_ASSIGN_OR_RETURN(Node * switch_node, + GetLoopSwitchNode(*loop_condition_node)); + + Node* after_last_iteration_node; + TF_RETURN_IF_ERROR(AddNoOpAfterLastIteration(graph, switch_node, + &after_last_iteration_node)); + + Node* before_loop_iteration_node; + TF_RETURN_IF_ERROR(GetOrCreateBeforeEachIterationNode( + graph, switch_node, &before_loop_iteration_node)); + + // Create const op that represents default sharding value + // (i.e. no-op sharding). + NodeDef default_sharding; + default_sharding.set_op("Const"); + default_sharding.set_name(graph->NewName(strings::StrCat( + "TPUVariableReshard/default_shard_state", "/_", internal::GetNodeId()))); + AddNodeAttr("dtype", DT_STRING, &default_sharding); + + Tensor t(DT_STRING, {2}); + t.vec()(0) = kDefaultShardingValue; + t.vec()(1) = kDefaultShardingValue; + t.AsProtoTensorContent( + (*default_sharding.mutable_attr())["value"].mutable_tensor()); + + Node* default_sharding_node = graph->AddNode(default_sharding, &status); + TF_RETURN_IF_ERROR(status); + // Add control edge between loop condition to make sure that + // default_sharding_node node is inside the while loop frame. + graph->AddControlEdge(loop_condition_node, default_sharding_node); + + // Build a no-op node used to add control edges after unshard nodes. + NodeDef after_unshard; + after_unshard.set_op("NoOp"); + after_unshard.set_name(graph->NewName(strings::StrCat( + "TPUVariableReshard/last_iteration", "/_", internal::GetNodeId()))); + auto after_unshard_node = graph->AddNode(after_unshard, &status); + TF_RETURN_IF_ERROR(status); + + for (auto info : execute_nodes_info) { + auto execute_node = info.execute_node; + // Create Reshard op that optionally shards model weight variables + // prior to program execution. + NodeDef reshard_node_def; + reshard_node_def.set_name(graph->NewName(strings::StrCat( + "TPUVariableReshard/reshard", "/_", internal::GetNodeId()))); + reshard_node_def.set_op("TPUReshardVariables"); + AddNodeAttr("N", static_cast(info.var_inputs.size()), + &reshard_node_def); + Node* reshard_op_node = graph->AddNode(reshard_node_def, &status); + if (!status.ok()) return status; + + reshard_op_node->set_assigned_device_name( + execute_node->assigned_device_name()); + + // Reshard op must execute at every loop iteration prior to + // TPUExecute node. + graph->AddControlEdge(before_loop_iteration_node, reshard_op_node); + graph->AddControlEdge(reshard_op_node, execute_node); + + for (int i = 0; i < info.var_inputs.size(); ++i) { + const auto variable_edge = info.var_inputs[i]; + graph->AddEdge(variable_edge->src(), variable_edge->src_output(), + reshard_op_node, i); + } + + const int new_key_input = info.var_inputs.size(); + // Add program input edge from the compiler(i.e. compilation key). + const auto compilation_key_edge = + FindEdgeConnecting(compile_node, execute_node); + graph->AddEdge(compile_node, compilation_key_edge->src_output(), + reshard_op_node, new_key_input); + + // Create VarHandleOp to store sharding state. Sharding state holds string + // compilation key that identifies whether the graph is re-compiled and the + // variables need to be sharded again. + NodeDef var_handle_def; + var_handle_def.set_op("VarHandleOp"); + var_handle_def.set_name(graph->NewName(strings::StrCat( + "TPUVariableReshard/reshard_state", "/_", internal::GetNodeId()))); + AddNodeAttr("dtype", DT_STRING, &var_handle_def); + AddNodeAttr("shape", TensorShape({}), &var_handle_def); + Node* var_handle_node = graph->AddNode(var_handle_def, &status); + if (!status.ok()) return status; + + // Add control edge between `var_handle_def` node and while loop + // loop condition so that `var_handle_def` is inside the same while loop + // frame. + // TODO(hongjunchoi): Consider adding control edge from another node--such + // as input control node. + graph->AddControlEdge(loop_condition_node, var_handle_node); + + // Connect data edge between var handle op and reshard op. + const int format_state_input = new_key_input + 1; + graph->AddEdge(var_handle_node, 0, reshard_op_node, format_state_input); + + // Create Reshard op that represents unsharding after TPUExecute. + NodeDef unshard_node_def; + unshard_node_def.set_name(graph->NewName(strings::StrCat( + "TPUVariableReshard/unshard", "/_", internal::GetNodeId()))); + unshard_node_def.set_op("TPUReshardVariables"); + AddNodeAttr("N", static_cast(info.var_inputs.size()), + &unshard_node_def); + Node* unshard_op_node = graph->AddNode(unshard_node_def, &status); + TF_RETURN_IF_ERROR(status); + + unshard_op_node->set_assigned_device_name( + execute_node->assigned_device_name()); + + for (int i = 0; i < info.var_inputs.size(); ++i) { + const auto variable_edge = info.var_inputs[i]; + // Connect model weight resource variables to unshard op. Since unshard op + // must be only invoked after the very last loop iteration, for each while + // loop inputs, we traverse backwards to find the switch node of the host + // training loop and connect `output_false` field of the switch node with + // unshard op. + TF_ASSIGN_OR_RETURN( + Node * enter_node, + FindEnterNodeFromTPUExecuteNodeInput(variable_edge->src())); + graph->AddEdge(enter_node, 0, unshard_op_node, i); + } + + // Add control dependency before/after unshard node and the control nodes. + graph->AddControlEdge(after_last_iteration_node, unshard_op_node); + graph->AddControlEdge(unshard_op_node, after_unshard_node); + + graph->AddEdge(default_sharding_node, 0, unshard_op_node, new_key_input); + + // Add data edge from sharding state var handle op to unshard op. + graph->AddEdge(var_handle_node, 0, unshard_op_node, format_state_input); + } + // Add control dependency from after_unshard_node to all exits nodes. This is + // to make sure that the unshard ops will be executed as long as any of the + // exits are used. + for (auto exit : FindLoopExitNodes(*loop_condition_node)) { + graph->AddControlEdge(after_unshard_node, exit); + } + return Status::OK(); +} + +} // namespace tpu +} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.h b/tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.h new file mode 100644 index 00000000000..8f9be8549cc --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.h @@ -0,0 +1,80 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_TPU_GRAPH_REWRITE_HOST_TRAINING_LOOP_OPTIMIZATION_UTIL_H_ +#define TENSORFLOW_CORE_TPU_GRAPH_REWRITE_HOST_TRAINING_LOOP_OPTIMIZATION_UTIL_H_ + +#include +#include +#include + +#include "absl/types/optional.h" +#include "tensorflow/compiler/tf2xla/functionalize_control_flow_util.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/graph/graph.h" + +namespace tensorflow { +namespace tpu { + +struct LoopArgInfo { + std::string enter_node_name; + // Exit nodes are optional for loop invariant while loop args. + absl::optional exit_node_name; +}; + +struct HostTrainingLoopInfo { + // Name and attribute information about the function in which + // host training loop is included. If host training loop is not + // inside a function call, then `function_name` and `function_attrs` + // are nullopt. + absl::optional encapsulating_function_name; + absl::optional encapsulating_function_attrs; + + // TPU Compile node as within a host training loop. + std::string compile_node_name; + + // Name of the while loop in which TPU compile op is located. + std::string while_loop_name; + + // Name of the node that represents loop condition. + std::string loop_cond_node_name; + + // Exit and Enter node names for each loop arguments. + std::vector loop_arguments; + + std::unordered_set loop_nodes; +}; + +// Walks through the `graph`, recursively if functional nodes exist, and +// identifies all host training loops. Host training loops are the inner +// most while loops that encapsulates TPUCompileOp node. This would be +// later used/analyzed to inroduce host loop specific optimizations such +// as adding sharded weight update. +Status DetectHostTrainingLoop( + const std::string* current_function_name, + const AttrValueMap* current_function_attr, + const FunctionLibraryDefinition* library, Graph* graph, + FunctionLibraryRuntime* flr, + std::vector* host_training_loops_info); + +// Injects VariableReshardOps to before and after TPUExecute op inside +// host training loop body. This effectively applies sharded weight update +// on model weight variables. +Status AddReshardOp(Graph* graph, const HostTrainingLoopInfo& host_loop_info); + +} // namespace tpu +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_GRAPH_REWRITE_HOST_TRAINING_LOOP_OPTIMIZATION_UTIL_H_ diff --git a/tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.cc b/tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.cc new file mode 100644 index 00000000000..47187204f69 --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.cc @@ -0,0 +1,73 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.h" + +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/core/common_runtime/function.h" + +namespace tensorflow { + +IncompleteNodeDefBuilder::IncompleteNodeDefBuilder(const string& name, + const string& op, + const NodeDebugInfo& debug) { + nodedef_.set_name(name); + nodedef_.set_op(op); + MergeDebugInfo(debug, &nodedef_); +} + +IncompleteNodeDefBuilder& IncompleteNodeDefBuilder::AddAttr( + const string& attr, const DataType& type) { + AddNodeAttr(attr, type, &nodedef_); + return *this; +} + +IncompleteNodeDefBuilder& IncompleteNodeDefBuilder::AddAttr(const string& attr, + int val) { + AddNodeAttr(attr, val, &nodedef_); + return *this; +} + +IncompleteNodeDefBuilder& IncompleteNodeDefBuilder::Device( + const string& device) { + nodedef_.set_device(device); + return *this; +} + +Status IncompleteNodeDefBuilder::Build(Graph* graph, Node** n) { + Status status; + *n = graph->AddNode(nodedef_, &status); + return status; +} + +IncompleteNodeDefBuilder IncompleteNodeDefBuilder::Identity( + const string& name, const DataType& type, const NodeDebugInfo& debug) { + return IncompleteNodeDefBuilder(name, "Identity", debug).AddAttr("T", type); +} + +IncompleteNodeDefBuilder IncompleteNodeDefBuilder::Merge( + const string& name, const DataType& type, const NodeDebugInfo& debug, + int n) { + return IncompleteNodeDefBuilder(name, "Merge", debug) + .AddAttr("T", type) + .AddAttr("N", n); +} + +IncompleteNodeDefBuilder IncompleteNodeDefBuilder::Switch( + const string& name, const DataType& type, const NodeDebugInfo& debug) { + return IncompleteNodeDefBuilder(name, "Switch", debug).AddAttr("T", type); +} + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.h b/tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.h new file mode 100644 index 00000000000..88e484f00cf --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.h @@ -0,0 +1,58 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_TPU_GRAPH_REWRITE_NODEDEF_BUILDER_H_ +#define TENSORFLOW_CORE_TPU_GRAPH_REWRITE_NODEDEF_BUILDER_H_ + +#include + +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +// Convenience builder to build NodeDefs without specifying the inputs. This is +// similar to NodeDefBuilder except inputs are not specified. +// TODO(jpienaar): Clean up NodeDefBuilder and remove this class. +class IncompleteNodeDefBuilder { + public: + IncompleteNodeDefBuilder(const string& name, const string& op, + const NodeDebugInfo& debug); + + IncompleteNodeDefBuilder& AddAttr(const string& attr, const DataType& type); + IncompleteNodeDefBuilder& AddAttr(const string& attr, int val); + + IncompleteNodeDefBuilder& Device(const string& device); + + Status Build(Graph* graph, Node** n); + + static IncompleteNodeDefBuilder Identity(const string& name, + const DataType& type, + const NodeDebugInfo& debug); + static IncompleteNodeDefBuilder Merge(const string& name, + const DataType& type, + const NodeDebugInfo& debug, int n); + static IncompleteNodeDefBuilder Switch(const string& name, + const DataType& type, + const NodeDebugInfo& debug); + + private: + NodeDef nodedef_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_GRAPH_REWRITE_NODEDEF_BUILDER_H_ diff --git a/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc b/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc index ef1831464e2..83a652d7aaa 100644 --- a/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc +++ b/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/optimization_registry.h" #include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_pass.h" +#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h" #include "tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.h" #include "tensorflow/core/tpu/graph_rewrite/variable_merger_pass.h" @@ -30,8 +31,9 @@ REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 34, EncapsulateTPUComputationsPass); REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 39, ExtractOutsideCompilationPass); +REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 40, + DistributedTPURewritePass); REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, 0, VariableMergerPass); - } // namespace } // namespace tensorflow diff --git a/tensorflow/stream_executor/tpu/BUILD b/tensorflow/stream_executor/tpu/BUILD index 6e00542ddf4..2f75c4c49cb 100644 --- a/tensorflow/stream_executor/tpu/BUILD +++ b/tensorflow/stream_executor/tpu/BUILD @@ -322,6 +322,7 @@ cc_library( name = "tpu_topology_external", srcs = ["tpu_topology.cc"], hdrs = ["tpu_topology.h"], + visibility = ["//visibility:public"], deps = [ "//tensorflow/core/platform:types", "//tensorflow/core/tpu:tpu_api", diff --git a/tensorflow/stream_executor/tpu/tpu_topology.h b/tensorflow/stream_executor/tpu/tpu_topology.h index b7d462804c9..b8c7b850463 100644 --- a/tensorflow/stream_executor/tpu/tpu_topology.h +++ b/tensorflow/stream_executor/tpu/tpu_topology.h @@ -30,6 +30,7 @@ struct TpuChipCoordinatesExternal { class TpuCoreLocationExternal { public: + TpuCoreLocationExternal() : core_location_(nullptr) {} explicit TpuCoreLocationExternal(void* core_location) : core_location_(core_location) {} TpuChipCoordinatesExternal chip_coordinates() const; From e925db6faef1067b97b9bc76238ba32a4d29b83b Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Sat, 18 Jul 2020 16:29:23 -0700 Subject: [PATCH 0785/2522] Move helper class ThreadSafeStatus into a separate file with unit test. PiperOrigin-RevId: 321974213 Change-Id: I78ceb91618c40da799097aa4d2048be0bf182c16 --- tensorflow/core/kernels/BUILD | 1 + tensorflow/core/kernels/batch_kernels.cc | 38 +------------ tensorflow/core/kernels/batching_util/BUILD | 24 ++++++++ .../batching_util/threadsafe_status.cc | 51 +++++++++++++++++ .../kernels/batching_util/threadsafe_status.h | 57 +++++++++++++++++++ .../batching_util/threadsafe_status_test.cc | 51 +++++++++++++++++ 6 files changed, 185 insertions(+), 37 deletions(-) create mode 100644 tensorflow/core/kernels/batching_util/threadsafe_status.cc create mode 100644 tensorflow/core/kernels/batching_util/threadsafe_status.h create mode 100644 tensorflow/core/kernels/batching_util/threadsafe_status_test.cc diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index eb1e658a744..c395f7d3e73 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -663,6 +663,7 @@ cc_library( "//tensorflow/core:lib_internal", "//tensorflow/core/kernels/batching_util:periodic_function_dynamic", "//tensorflow/core/kernels/batching_util:shared_batch_scheduler_hdrs", + "//tensorflow/core/kernels/batching_util:threadsafe_status", "//tensorflow/core/util:incremental_barrier", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", diff --git a/tensorflow/core/kernels/batch_kernels.cc b/tensorflow/core/kernels/batch_kernels.cc index 818685a3fff..20885a89141 100644 --- a/tensorflow/core/kernels/batch_kernels.cc +++ b/tensorflow/core/kernels/batch_kernels.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/types.h" #include "tensorflow/core/kernels/batching_util/periodic_function.h" #include "tensorflow/core/kernels/batching_util/shared_batch_scheduler.h" +#include "tensorflow/core/kernels/batching_util/threadsafe_status.h" #include "tensorflow/core/kernels/concat_lib.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/kernels/split_lib.h" @@ -309,43 +310,6 @@ Status Split(OpKernelContext* context, const Tensor& input, return split_status; } -// Wrapper class to allow both lock-free construction and concurrent updates on -// a shared 'status'. -class ThreadSafeStatus { - public: - const Status& status() const& TF_LOCKS_EXCLUDED(mutex_) { - tf_shared_lock lock(mutex_); - return status_; - } - Status status() && TF_LOCKS_EXCLUDED(mutex_) { - tf_shared_lock lock(mutex_); - return std::move(status_); - } - - // Retains the first error status: replaces the current status with - // `new_status` if `new_status` is not OK and the previous status is OK. - void Update(const Status& new_status) TF_LOCKS_EXCLUDED(mutex_) { - if (new_status.ok()) { - return; - } - - mutex_lock lock(mutex_); - status_.Update(new_status); - } - void Update(Status&& new_status) TF_LOCKS_EXCLUDED(mutex_) { - if (new_status.ok()) { - return; - } - - mutex_lock lock(mutex_); - status_.Update(std::forward(new_status)); - } - - private: - mutable mutex mutex_; - Status status_ TF_GUARDED_BY(mutex_); -}; - // A class encapsulating the state and logic for batching tensors. class BatchResource : public ResourceBase { public: diff --git a/tensorflow/core/kernels/batching_util/BUILD b/tensorflow/core/kernels/batching_util/BUILD index 3ae415ee31c..e92764712c8 100644 --- a/tensorflow/core/kernels/batching_util/BUILD +++ b/tensorflow/core/kernels/batching_util/BUILD @@ -53,6 +53,18 @@ cc_library( ], ) +cc_library( + name = "threadsafe_status", + srcs = ["threadsafe_status.cc"], + hdrs = ["threadsafe_status.h"], + deps = [ + "//tensorflow/core:lib", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/status", + "@com_google_absl//absl/synchronization", + ], +) + tf_cc_test( name = "batch_scheduler_test", srcs = ["batch_scheduler_test.cc"], @@ -187,6 +199,18 @@ tf_cc_test( ], ) +tf_cc_test( + name = "threadsafe_status_test", + srcs = ["threadsafe_status_test.cc"], + deps = [ + ":threadsafe_status", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + cc_library( name = "fake_clock_env", testonly = 1, diff --git a/tensorflow/core/kernels/batching_util/threadsafe_status.cc b/tensorflow/core/kernels/batching_util/threadsafe_status.cc new file mode 100644 index 00000000000..fa5cda7161b --- /dev/null +++ b/tensorflow/core/kernels/batching_util/threadsafe_status.cc @@ -0,0 +1,51 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/batching_util/threadsafe_status.h" + +#include "absl/base/thread_annotations.h" +#include "absl/status/status.h" +#include "absl/synchronization/mutex.h" +#include "tensorflow/core/platform/mutex.h" + +namespace tensorflow { +const Status& ThreadSafeStatus::status() const& { + tf_shared_lock lock(mutex_); + return status_; +} + +Status ThreadSafeStatus::status() && { + tf_shared_lock lock(mutex_); + return std::move(status_); +} + +void ThreadSafeStatus::Update(const Status& new_status) { + if (new_status.ok()) { + return; + } + + mutex_lock lock(mutex_); + status_.Update(new_status); +} + +void ThreadSafeStatus::Update(Status&& new_status) { + if (new_status.ok()) { + return; + } + + mutex_lock lock(mutex_); + status_.Update(std::forward(new_status)); +} +} // namespace tensorflow diff --git a/tensorflow/core/kernels/batching_util/threadsafe_status.h b/tensorflow/core/kernels/batching_util/threadsafe_status.h new file mode 100644 index 00000000000..c14a8a90714 --- /dev/null +++ b/tensorflow/core/kernels/batching_util/threadsafe_status.h @@ -0,0 +1,57 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_THREADSAFE_STATUS_H_ +#define TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_THREADSAFE_STATUS_H_ + +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/platform/thread_annotations.h" + +namespace tensorflow { +// Wrapper class to allow both lock-free construction and concurrent updates on +// a 'status'. +// +// Example Usage: +// std::thread threads[2]; +// ThreadSafeStatus thread_safe_status; +// threads[0] = std::thread([&]() { +// status.Update(errors::Internal("internal error")); +// }); +// threads[1] = std::thread([&]() { +// status.Update(errors::InvalidArgument("invalid argument")); +// }); +// threads[0].Join(); +// threads[1].Join(); +// +// NOTE: +// When updated in a multi-threading setup, only the first error is retained. +class ThreadSafeStatus { + public: + const Status& status() const& TF_LOCKS_EXCLUDED(mutex_); + Status status() && TF_LOCKS_EXCLUDED(mutex_); + + // Retains the first error status: replaces the current status with + // `new_status` if `new_status` is not OK and the previous status is OK. + void Update(const Status& new_status) TF_LOCKS_EXCLUDED(mutex_); + void Update(Status&& new_status) TF_LOCKS_EXCLUDED(mutex_); + + private: + mutable mutex mutex_; + Status status_ TF_GUARDED_BY(mutex_); +}; +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_THREADSAFE_STATUS_H_ diff --git a/tensorflow/core/kernels/batching_util/threadsafe_status_test.cc b/tensorflow/core/kernels/batching_util/threadsafe_status_test.cc new file mode 100644 index 00000000000..e0c5d03c8a4 --- /dev/null +++ b/tensorflow/core/kernels/batching_util/threadsafe_status_test.cc @@ -0,0 +1,51 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/batching_util/threadsafe_status.h" + +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/protobuf/error_codes.pb.h" + +namespace tensorflow { +namespace { + +TEST(ThreadSafeStatus, DefaultOk) { + ThreadSafeStatus status; + TF_EXPECT_OK(status.status()); +} + +TEST(ThreadSafeStatus, Update) { + ThreadSafeStatus status; + TF_EXPECT_OK(status.status()); + + status.Update(errors::FailedPrecondition("original error")); + EXPECT_EQ(status.status().code(), error::FAILED_PRECONDITION); + + status.Update(Status::OK()); + EXPECT_EQ(status.status().code(), error::FAILED_PRECONDITION); + + status.Update(errors::Internal("new error")); + EXPECT_EQ(status.status().code(), error::FAILED_PRECONDITION); +} + +TEST(ThreadSafeStatus, Move) { + ThreadSafeStatus status; + TF_EXPECT_OK(std::move(status).status()); +} + +} // namespace +} // namespace tensorflow From 6ef62c88a59e91eadc4d91142c195be5b4f2cf4b Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Sat, 18 Jul 2020 19:29:33 -0700 Subject: [PATCH 0786/2522] Remove Keras usage of private TF API ops.losses.util. PiperOrigin-RevId: 321984193 Change-Id: I29f6662a2b93f3f4b6911da99c19a2cbe6643b9d --- tensorflow/python/keras/utils/losses_utils.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/keras/utils/losses_utils.py b/tensorflow/python/keras/utils/losses_utils.py index aebae99d3eb..bd8aeb4065f 100644 --- a/tensorflow/python/keras/utils/losses_utils.py +++ b/tensorflow/python/keras/utils/losses_utils.py @@ -26,7 +26,6 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.losses import loss_reduction -from tensorflow.python.ops.losses import util as tf_losses_utils from tensorflow.python.util.tf_export import keras_export @@ -260,8 +259,16 @@ def compute_weighted_loss(losses, if not isinstance(sample_weight, keras_tensor.KerasTensor): sample_weight = ops.convert_to_tensor_v2(sample_weight) - weighted_losses = tf_losses_utils.scale_losses_by_sample_weight( - losses, sample_weight) + + # TODO(psv): Handle casting here in a better way, eg. if losses is float64 + # we do not want to lose precision. + losses = math_ops.cast(losses, 'float32') + sample_weight = math_ops.cast(sample_weight, 'float32') + # Update dimensions of `sample_weight` to match with `losses` if possible. + losses, _, sample_weight = squeeze_or_expand_dimensions( # pylint: disable=unbalanced-tuple-unpacking + losses, None, sample_weight) + weighted_losses = math_ops.multiply(losses, sample_weight) + # Apply reduction function to the individual weighted losses. loss = reduce_weighted_loss(weighted_losses, reduction) # Convert the result back to the input type. From e7173c2da7ca32464c9238335eb6151254ff255c Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Sat, 18 Jul 2020 20:40:40 -0700 Subject: [PATCH 0787/2522] [XLA] Enable support for strength reduction of integer dots and add more tests. PiperOrigin-RevId: 321987959 Change-Id: I56b573b24c5197286f0a5c2ac39639497c83defa --- .../xla/service/algebraic_simplifier.cc | 8 +-- .../compiler/xla/tests/dot_operation_test.cc | 59 +++++++++++++++++-- 2 files changed, 57 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 3e012fc41b8..c0adef44087 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -2129,8 +2129,6 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { // If the lhs or rhs have only batch and contracting dimensions, a dot can be // rewritten as reduce(mul(broadcast(transpose(x)),broadcast(transpose(y)))) if (options_.enable_dot_strength_reduction() && - (ShapeUtil::ElementIsFloating(dot->shape()) || - ShapeUtil::ElementIsComplex(dot->shape())) && ((dot->dot_dimension_numbers().lhs_batch_dimensions_size() + dot->dot_dimension_numbers().lhs_contracting_dimensions_size() == lhs->shape().rank()) || @@ -2192,9 +2190,9 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { std::vector reduce_dims( dot->dot_dimension_numbers().lhs_contracting_dimensions_size()); PrimitiveType dot_type = - ShapeUtil::ElementIsComplex(dot->shape()) - ? dot->shape().element_type() - : dot->shape().element_type() == F64 ? F64 : F32; + ShapeUtil::ElementIsFloating(dot->shape()) + ? (dot->shape().element_type() == F64 ? F64 : F32) + : dot->shape().element_type(); new_dot = AsType(new_dot, dot_type); const int64 outer_dims = std::max(rhs_outer_dims, lhs_outer_dims); absl::c_iota( diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index 26cb25acbfe..71cfd95f77f 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -1429,19 +1429,68 @@ ENTRY main { EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{4e-3, 4e-3})); } -XLA_TEST_F(DotOperationTextTest, IntegerDotCodegen) { +XLA_TEST_F(DotOperationTextTest, S32IotaDot) { absl::string_view hlo_string = R"( HloModule SmallIntegerDot ENTRY SmallIntegerDot { - arg0 = s32[1,2,2] parameter(0) - arg1 = s32[1,2,1] parameter(1) - ROOT dot = s32[1,2,1] dot(arg0, arg1), lhs_batch_dims={0}, lhs_contracting_dims={2}, rhs_batch_dims={0}, rhs_contracting_dims={1} + arg0 = s32[5,55,8] iota(), iota_dimension=1 + arg1 = s32[5,8,200] iota(), iota_dimension=2 + ROOT dot = s32[5,55,200] dot(arg0, arg1), lhs_batch_dims={0}, lhs_contracting_dims={2}, rhs_batch_dims={0}, rhs_contracting_dims={1} } )"; - EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{4e-3, 4e-3})); + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); +} + +XLA_TEST_F(DotOperationTextTest, S32IotaSquaredDot) { + absl::string_view hlo_string = + R"( +HloModule SmallIntegerDot + +ENTRY SmallIntegerDot { + arg0 = s32[16,2] iota(), iota_dimension=0 + a = s32[16,2] multiply(arg0, arg0) + r = s32[16,2] multiply(a, a) + arg1 = s32[2,98] iota(), iota_dimension=1 + b = s32[2,98] multiply(arg1, arg1) + s = s32[2,98] multiply(b, b) + ROOT dot = s32[16,98] dot(r, s), lhs_contracting_dims={1}, rhs_contracting_dims={0} +} +)"; + + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); +} + +XLA_TEST_F(DotOperationTextTest, DISABLED_ON_CPU(S8Dot)) { + absl::string_view hlo_string = + R"( +HloModule SmallIntegerDot + +ENTRY SmallIntegerDot { + arg0 = s8[20,2] parameter(0) + arg1 = s8[2,20] parameter(1) + ROOT dot = s8[20,20] dot(arg0, arg1), lhs_contracting_dims={1}, rhs_contracting_dims={0} +} +)"; + + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); +} + +XLA_TEST_F(DotOperationTextTest, S32Dot) { + absl::string_view hlo_string = + R"( +HloModule SmallIntegerDot + +ENTRY SmallIntegerDot { + arg0 = s32[20,55] parameter(0) + arg1 = s32[55,20] parameter(1) + ROOT dot = s32[20,20] dot(arg0, arg1), lhs_contracting_dims={1}, rhs_contracting_dims={0} +} +)"; + + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); } XLA_TEST_F(DotOperationTextTest, GpuTransposeOutput) { From 1bc2d1b6716f655d4d39a0af785b7548c8c62199 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 18 Jul 2020 20:54:53 -0700 Subject: [PATCH 0788/2522] [XLA] Enable support for strength reduction of integer dots and add more tests. PiperOrigin-RevId: 321988771 Change-Id: I1fabf9bffd45c45a8657961253b621a1d3eb433f --- .../xla/service/algebraic_simplifier.cc | 8 ++- .../compiler/xla/tests/dot_operation_test.cc | 59 ++----------------- 2 files changed, 10 insertions(+), 57 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index c0adef44087..3e012fc41b8 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -2129,6 +2129,8 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { // If the lhs or rhs have only batch and contracting dimensions, a dot can be // rewritten as reduce(mul(broadcast(transpose(x)),broadcast(transpose(y)))) if (options_.enable_dot_strength_reduction() && + (ShapeUtil::ElementIsFloating(dot->shape()) || + ShapeUtil::ElementIsComplex(dot->shape())) && ((dot->dot_dimension_numbers().lhs_batch_dimensions_size() + dot->dot_dimension_numbers().lhs_contracting_dimensions_size() == lhs->shape().rank()) || @@ -2190,9 +2192,9 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { std::vector reduce_dims( dot->dot_dimension_numbers().lhs_contracting_dimensions_size()); PrimitiveType dot_type = - ShapeUtil::ElementIsFloating(dot->shape()) - ? (dot->shape().element_type() == F64 ? F64 : F32) - : dot->shape().element_type(); + ShapeUtil::ElementIsComplex(dot->shape()) + ? dot->shape().element_type() + : dot->shape().element_type() == F64 ? F64 : F32; new_dot = AsType(new_dot, dot_type); const int64 outer_dims = std::max(rhs_outer_dims, lhs_outer_dims); absl::c_iota( diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index 71cfd95f77f..26cb25acbfe 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -1429,68 +1429,19 @@ ENTRY main { EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{4e-3, 4e-3})); } -XLA_TEST_F(DotOperationTextTest, S32IotaDot) { +XLA_TEST_F(DotOperationTextTest, IntegerDotCodegen) { absl::string_view hlo_string = R"( HloModule SmallIntegerDot ENTRY SmallIntegerDot { - arg0 = s32[5,55,8] iota(), iota_dimension=1 - arg1 = s32[5,8,200] iota(), iota_dimension=2 - ROOT dot = s32[5,55,200] dot(arg0, arg1), lhs_batch_dims={0}, lhs_contracting_dims={2}, rhs_batch_dims={0}, rhs_contracting_dims={1} + arg0 = s32[1,2,2] parameter(0) + arg1 = s32[1,2,1] parameter(1) + ROOT dot = s32[1,2,1] dot(arg0, arg1), lhs_batch_dims={0}, lhs_contracting_dims={2}, rhs_batch_dims={0}, rhs_contracting_dims={1} } )"; - EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); -} - -XLA_TEST_F(DotOperationTextTest, S32IotaSquaredDot) { - absl::string_view hlo_string = - R"( -HloModule SmallIntegerDot - -ENTRY SmallIntegerDot { - arg0 = s32[16,2] iota(), iota_dimension=0 - a = s32[16,2] multiply(arg0, arg0) - r = s32[16,2] multiply(a, a) - arg1 = s32[2,98] iota(), iota_dimension=1 - b = s32[2,98] multiply(arg1, arg1) - s = s32[2,98] multiply(b, b) - ROOT dot = s32[16,98] dot(r, s), lhs_contracting_dims={1}, rhs_contracting_dims={0} -} -)"; - - EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); -} - -XLA_TEST_F(DotOperationTextTest, DISABLED_ON_CPU(S8Dot)) { - absl::string_view hlo_string = - R"( -HloModule SmallIntegerDot - -ENTRY SmallIntegerDot { - arg0 = s8[20,2] parameter(0) - arg1 = s8[2,20] parameter(1) - ROOT dot = s8[20,20] dot(arg0, arg1), lhs_contracting_dims={1}, rhs_contracting_dims={0} -} -)"; - - EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); -} - -XLA_TEST_F(DotOperationTextTest, S32Dot) { - absl::string_view hlo_string = - R"( -HloModule SmallIntegerDot - -ENTRY SmallIntegerDot { - arg0 = s32[20,55] parameter(0) - arg1 = s32[55,20] parameter(1) - ROOT dot = s32[20,20] dot(arg0, arg1), lhs_contracting_dims={1}, rhs_contracting_dims={0} -} -)"; - - EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{4e-3, 4e-3})); } XLA_TEST_F(DotOperationTextTest, GpuTransposeOutput) { From 8fcb21c8e8ad1089e38294305093ecdb37bf0ded Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Sat, 18 Jul 2020 21:03:49 -0700 Subject: [PATCH 0789/2522] Remove references to private TF API `has_weights`. PiperOrigin-RevId: 321989414 Change-Id: Ida7aeaafb81708280ff944b98ee3de7327537cea --- tensorflow/python/keras/engine/base_layer.py | 4 ++-- tensorflow/python/keras/engine/base_layer_utils.py | 6 ++++++ tensorflow/python/keras/engine/base_layer_v1.py | 4 ++-- tensorflow/python/keras/engine/training.py | 2 +- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py index d4c4c348742..e7c394e614d 100644 --- a/tensorflow/python/keras/engine/base_layer.py +++ b/tensorflow/python/keras/engine/base_layer.py @@ -2739,7 +2739,7 @@ class Layer(module.Module, version_utils.LayerVersionSelector): super(tracking.AutoTrackable, self).__delattr__(name) if (isinstance(existing_value, Layer) - or trackable_layer_utils.has_weights(existing_value)): + or base_layer_utils.has_weights(existing_value)): super(tracking.AutoTrackable, self).__setattr__( '_layers', [l for l in self._layers if l is not existing_value]) @@ -2789,7 +2789,7 @@ class Layer(module.Module, version_utils.LayerVersionSelector): # Be careful about metric if it becomes a Module in future. # Append value to self._layers if relevant if (getattr(self, '_auto_track_sub_layers', True) and - (isinstance(value, Layer) or trackable_layer_utils.has_weights(value))): + (isinstance(value, Layer) or base_layer_utils.has_weights(value))): self._maybe_create_attribute('_layers', []) # We need to check object identity to avoid de-duplicating empty # container types which compare equal. diff --git a/tensorflow/python/keras/engine/base_layer_utils.py b/tensorflow/python/keras/engine/base_layer_utils.py index bd7387258c1..d331e0d0b86 100644 --- a/tensorflow/python/keras/engine/base_layer_utils.py +++ b/tensorflow/python/keras/engine/base_layer_utils.py @@ -858,6 +858,12 @@ def is_split_variable(v): return hasattr(v, '_variable_list') or hasattr(v, '_variables') +def has_weights(obj): + obj_type = type(obj) + return (hasattr(obj_type, 'trainable_weights') and + hasattr(obj_type, 'non_trainable_weights')) + + # TODO(kathywu): This is a temporary hack. When a network of layers is revived # from SavedModel, only the top-level layer will have losses. This causes issues # in eager mode because the child layers may have graph losses diff --git a/tensorflow/python/keras/engine/base_layer_v1.py b/tensorflow/python/keras/engine/base_layer_v1.py index a459eaefbc3..e3cc738c434 100644 --- a/tensorflow/python/keras/engine/base_layer_v1.py +++ b/tensorflow/python/keras/engine/base_layer_v1.py @@ -2194,7 +2194,7 @@ class Layer(base_layer.Layer): super(tracking.AutoTrackable, self).__delattr__(name) if (isinstance(existing_value, Layer) - or trackable_layer_utils.has_weights(existing_value)): + or base_layer_utils.has_weights(existing_value)): super(tracking.AutoTrackable, self).__setattr__( '_layers', [l for l in self._layers if l is not existing_value]) @@ -2244,7 +2244,7 @@ class Layer(base_layer.Layer): # Be careful about metric if it becomes a Module in future. # Append value to self._layers if relevant if (getattr(self, '_auto_track_sub_layers', True) and - (isinstance(value, Layer) or trackable_layer_utils.has_weights(value))): + (isinstance(value, Layer) or base_layer_utils.has_weights(value))): self._maybe_create_attribute('_layers', []) # We need to check object identity to avoid de-duplicating empty # container types which compare equal. diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index ad72251ed9d..bbab6fc7f98 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -329,7 +329,7 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): if all( isinstance(v, (base_layer.Layer, data_structures.TrackableDataStructure)) or - trackable_layer_utils.has_weights(v) for v in nest.flatten(value)): + base_layer_utils.has_weights(v) for v in nest.flatten(value)): try: self._base_model_initialized except AttributeError: From b9a6a50bb7818be65f5f8ee6df193b5d0bcffc0c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 18 Jul 2020 21:46:19 -0700 Subject: [PATCH 0790/2522] Remove references to private TF API `has_weights`. PiperOrigin-RevId: 321991498 Change-Id: Iaa9b872a99e95f596224921716757a0e4ce43585 --- tensorflow/python/keras/engine/base_layer.py | 4 ++-- tensorflow/python/keras/engine/base_layer_utils.py | 6 ------ tensorflow/python/keras/engine/base_layer_v1.py | 4 ++-- tensorflow/python/keras/engine/training.py | 2 +- 4 files changed, 5 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py index e7c394e614d..d4c4c348742 100644 --- a/tensorflow/python/keras/engine/base_layer.py +++ b/tensorflow/python/keras/engine/base_layer.py @@ -2739,7 +2739,7 @@ class Layer(module.Module, version_utils.LayerVersionSelector): super(tracking.AutoTrackable, self).__delattr__(name) if (isinstance(existing_value, Layer) - or base_layer_utils.has_weights(existing_value)): + or trackable_layer_utils.has_weights(existing_value)): super(tracking.AutoTrackable, self).__setattr__( '_layers', [l for l in self._layers if l is not existing_value]) @@ -2789,7 +2789,7 @@ class Layer(module.Module, version_utils.LayerVersionSelector): # Be careful about metric if it becomes a Module in future. # Append value to self._layers if relevant if (getattr(self, '_auto_track_sub_layers', True) and - (isinstance(value, Layer) or base_layer_utils.has_weights(value))): + (isinstance(value, Layer) or trackable_layer_utils.has_weights(value))): self._maybe_create_attribute('_layers', []) # We need to check object identity to avoid de-duplicating empty # container types which compare equal. diff --git a/tensorflow/python/keras/engine/base_layer_utils.py b/tensorflow/python/keras/engine/base_layer_utils.py index d331e0d0b86..bd7387258c1 100644 --- a/tensorflow/python/keras/engine/base_layer_utils.py +++ b/tensorflow/python/keras/engine/base_layer_utils.py @@ -858,12 +858,6 @@ def is_split_variable(v): return hasattr(v, '_variable_list') or hasattr(v, '_variables') -def has_weights(obj): - obj_type = type(obj) - return (hasattr(obj_type, 'trainable_weights') and - hasattr(obj_type, 'non_trainable_weights')) - - # TODO(kathywu): This is a temporary hack. When a network of layers is revived # from SavedModel, only the top-level layer will have losses. This causes issues # in eager mode because the child layers may have graph losses diff --git a/tensorflow/python/keras/engine/base_layer_v1.py b/tensorflow/python/keras/engine/base_layer_v1.py index e3cc738c434..a459eaefbc3 100644 --- a/tensorflow/python/keras/engine/base_layer_v1.py +++ b/tensorflow/python/keras/engine/base_layer_v1.py @@ -2194,7 +2194,7 @@ class Layer(base_layer.Layer): super(tracking.AutoTrackable, self).__delattr__(name) if (isinstance(existing_value, Layer) - or base_layer_utils.has_weights(existing_value)): + or trackable_layer_utils.has_weights(existing_value)): super(tracking.AutoTrackable, self).__setattr__( '_layers', [l for l in self._layers if l is not existing_value]) @@ -2244,7 +2244,7 @@ class Layer(base_layer.Layer): # Be careful about metric if it becomes a Module in future. # Append value to self._layers if relevant if (getattr(self, '_auto_track_sub_layers', True) and - (isinstance(value, Layer) or base_layer_utils.has_weights(value))): + (isinstance(value, Layer) or trackable_layer_utils.has_weights(value))): self._maybe_create_attribute('_layers', []) # We need to check object identity to avoid de-duplicating empty # container types which compare equal. diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index bbab6fc7f98..ad72251ed9d 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -329,7 +329,7 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): if all( isinstance(v, (base_layer.Layer, data_structures.TrackableDataStructure)) or - base_layer_utils.has_weights(v) for v in nest.flatten(value)): + trackable_layer_utils.has_weights(v) for v in nest.flatten(value)): try: self._base_model_initialized except AttributeError: From f7ee1bff1d90aa0ac0a5e16a71c3c60f7ad96fdb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 18 Jul 2020 23:31:59 -0700 Subject: [PATCH 0791/2522] GitHub Issue #26639 PiperOrigin-RevId: 321996999 Change-Id: I140a2b4ffc3acb6e32acd35603c52e12c52d6ef1 --- tensorflow/python/keras/applications/nasnet.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/python/keras/applications/nasnet.py b/tensorflow/python/keras/applications/nasnet.py index cb73aff2f07..4f71165f2e9 100644 --- a/tensorflow/python/keras/applications/nasnet.py +++ b/tensorflow/python/keras/applications/nasnet.py @@ -354,6 +354,7 @@ def NASNetMobile(input_shape=None, layer at the top of the network. weights: `None` (random initialization) or `imagenet` (ImageNet weights) + For loading `imagenet` weights, `input_shape` should be (224, 224, 3) input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. @@ -429,6 +430,7 @@ def NASNetLarge(input_shape=None, layer at the top of the network. weights: `None` (random initialization) or `imagenet` (ImageNet weights) + For loading `imagenet` weights, `input_shape` should be (331, 331, 3) input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. From c01a007594dc48fb024ce8b46353e01a75ab1a35 Mon Sep 17 00:00:00 2001 From: Dimitris Vardoulakis Date: Sat, 18 Jul 2020 23:50:04 -0700 Subject: [PATCH 0792/2522] Turn on logging for a convolution test. PiperOrigin-RevId: 321997730 Change-Id: I7c682d2d526b2b86464f17dcb945cbd7ba2f59e7 --- tensorflow/compiler/xla/tests/BUILD | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index b3353cf905c..b7e07c7d486 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1130,6 +1130,27 @@ xla_test( name = "convolution_test_1d", timeout = "long", srcs = ["convolution_test_1d.cc"], + # Turn on logging so that VLOG statements don't appear uncovered to zapfhahn. + args = ["--vmodule=convolution_emitter=7"], + # In the open source build, convolution_test_1d_gpu fails because it doesn't + # recognize --vmodule. + disabled_backends = ["gpu"], + shard_count = 50, + tags = [ + "no_rocm", + "optonly", + ], + deps = CONVOLUTION_TEST_DEPS + [ + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + ], +) + +xla_test( + name = "convolution_test_1d_gpu", + timeout = "long", + srcs = ["convolution_test_1d.cc"], + backends = ["gpu"], shard_count = 50, tags = [ "no_rocm", From 7046d5dd3aa0761cdddeb17731e884370bba411a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 19 Jul 2020 02:01:40 -0700 Subject: [PATCH 0793/2522] Update GraphDef version to 467. PiperOrigin-RevId: 322005543 Change-Id: I4889b653f1292324dd505f6a2f15cafd6bd94519 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index be71e6cd5bc..2933c311aa7 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 466 // Updated: 2020/7/18 +#define TF_GRAPH_DEF_VERSION 467 // Updated: 2020/7/19 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 6c6e53ee31750a21b3049967a2e242e63c3dd0f8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 19 Jul 2020 02:01:41 -0700 Subject: [PATCH 0794/2522] compat: Update forward compatibility horizon to 2020-07-19 PiperOrigin-RevId: 322005546 Change-Id: I83dd5792fad12c0dd5d737443f8583ddfc551f98 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 4c122028412..a1f967fc876 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 18) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 19) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From dc89a6ddc7bf54d371a9408e6ace2b35d9b5a393 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Mon, 20 Jul 2020 01:11:23 +0700 Subject: [PATCH 0795/2522] Add new line Co-authored-by: Mihai Maruseac --- .../c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc index c00c52d9edd..7fc8e098908 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc @@ -142,6 +142,7 @@ typedef struct GCSFile { uint64_t buffer_start ABSL_GUARDED_BY(buffer_mutex); bool buffer_end_is_past_eof ABSL_GUARDED_BY(buffer_mutex); std::string buffer ABSL_GUARDED_BY(buffer_mutex); + GCSFile(std::string path, bool is_cache_enable, uint64_t buffer_size, ReadFn read_fn) : path(path), From f8e6f2f2af4ceb35d5e68f9ce1ec2186e2b684b2 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Mon, 20 Jul 2020 01:12:00 +0700 Subject: [PATCH 0796/2522] reordering the constructors Co-authored-by: Mihai Maruseac --- .../c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc index 7fc8e098908..2c488c123c1 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc @@ -148,11 +148,11 @@ typedef struct GCSFile { : path(path), is_cache_enable(is_cache_enable), buffer_size(buffer_size), + read_fn(std::move(read_fn)), buffer_mutex(), buffer_start(0), buffer_end_is_past_eof(false), - buffer(), - read_fn(std::move(read_fn)) {} + buffer() {} } GCSFile; void Cleanup(TF_RandomAccessFile* file) { From c89b5a6a5820bf771f7e20641ac53b83f2f6dabe Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Mon, 20 Jul 2020 01:38:32 +0700 Subject: [PATCH 0797/2522] Fix mutex condition --- .../filesystem/plugins/gcs/gcs_filesystem.cc | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc index 2c488c123c1..a542a45d0fb 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc @@ -161,6 +161,7 @@ void Cleanup(TF_RandomAccessFile* file) { } static void FillBuffer(uint64_t start, GCSFile* gcs_file, TF_Status* status) { + ABSL_EXCLUSIVE_LOCKS_REQUIRED(gcs_file->buffer_mutex) gcs_file->buffer_start = start; gcs_file->buffer.resize(gcs_file->buffer_size); auto read = @@ -463,11 +464,14 @@ void NewRandomAccessFile(const TF_Filesystem* filesystem, const char* path, const std::string& path, uint64_t offset, size_t n, char* buffer, TF_Status* status) -> int64_t { // TODO(vnvo2409): Check for `stat_cache`. - auto read = - is_cache_enabled - ? gcs_file->file_block_cache->Read(path, offset, n, buffer, status) - : LoadBufferFromGCS(path, offset, n, buffer, &gcs_file->gcs_client, - status); + int64_t read = 0; + if (is_cache_enabled) { + absl::ReaderMutexLock l(&gcs_file->block_cache_lock); + read = gcs_file->file_block_cache->Read(path, offset, n, buffer, status); + } else { + read = LoadBufferFromGCS(path, offset, n, buffer, &gcs_file->gcs_client, + status); + } if (TF_GetCode(status) != TF_OK) return -1; if (read < n) TF_SetStatus(status, TF_OUT_OF_RANGE, "Read less bytes than requested"); From a62cd317a0c913cb453b1b1e29e58238e285c230 Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Sun, 19 Jul 2020 13:19:05 -0700 Subject: [PATCH 0798/2522] [XLA] Enable support for strength reduction of integer dots and add more tests. PiperOrigin-RevId: 322045005 Change-Id: Iead56c733e860764df6694dc8999d5fa2b740b12 --- .../xla/service/algebraic_simplifier.cc | 8 +-- .../compiler/xla/tests/dot_operation_test.cc | 59 +++++++++++++++++-- 2 files changed, 57 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 3e012fc41b8..c0adef44087 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -2129,8 +2129,6 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { // If the lhs or rhs have only batch and contracting dimensions, a dot can be // rewritten as reduce(mul(broadcast(transpose(x)),broadcast(transpose(y)))) if (options_.enable_dot_strength_reduction() && - (ShapeUtil::ElementIsFloating(dot->shape()) || - ShapeUtil::ElementIsComplex(dot->shape())) && ((dot->dot_dimension_numbers().lhs_batch_dimensions_size() + dot->dot_dimension_numbers().lhs_contracting_dimensions_size() == lhs->shape().rank()) || @@ -2192,9 +2190,9 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { std::vector reduce_dims( dot->dot_dimension_numbers().lhs_contracting_dimensions_size()); PrimitiveType dot_type = - ShapeUtil::ElementIsComplex(dot->shape()) - ? dot->shape().element_type() - : dot->shape().element_type() == F64 ? F64 : F32; + ShapeUtil::ElementIsFloating(dot->shape()) + ? (dot->shape().element_type() == F64 ? F64 : F32) + : dot->shape().element_type(); new_dot = AsType(new_dot, dot_type); const int64 outer_dims = std::max(rhs_outer_dims, lhs_outer_dims); absl::c_iota( diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index 26cb25acbfe..71cfd95f77f 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -1429,19 +1429,68 @@ ENTRY main { EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{4e-3, 4e-3})); } -XLA_TEST_F(DotOperationTextTest, IntegerDotCodegen) { +XLA_TEST_F(DotOperationTextTest, S32IotaDot) { absl::string_view hlo_string = R"( HloModule SmallIntegerDot ENTRY SmallIntegerDot { - arg0 = s32[1,2,2] parameter(0) - arg1 = s32[1,2,1] parameter(1) - ROOT dot = s32[1,2,1] dot(arg0, arg1), lhs_batch_dims={0}, lhs_contracting_dims={2}, rhs_batch_dims={0}, rhs_contracting_dims={1} + arg0 = s32[5,55,8] iota(), iota_dimension=1 + arg1 = s32[5,8,200] iota(), iota_dimension=2 + ROOT dot = s32[5,55,200] dot(arg0, arg1), lhs_batch_dims={0}, lhs_contracting_dims={2}, rhs_batch_dims={0}, rhs_contracting_dims={1} } )"; - EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{4e-3, 4e-3})); + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); +} + +XLA_TEST_F(DotOperationTextTest, S32IotaSquaredDot) { + absl::string_view hlo_string = + R"( +HloModule SmallIntegerDot + +ENTRY SmallIntegerDot { + arg0 = s32[16,2] iota(), iota_dimension=0 + a = s32[16,2] multiply(arg0, arg0) + r = s32[16,2] multiply(a, a) + arg1 = s32[2,98] iota(), iota_dimension=1 + b = s32[2,98] multiply(arg1, arg1) + s = s32[2,98] multiply(b, b) + ROOT dot = s32[16,98] dot(r, s), lhs_contracting_dims={1}, rhs_contracting_dims={0} +} +)"; + + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); +} + +XLA_TEST_F(DotOperationTextTest, DISABLED_ON_CPU(S8Dot)) { + absl::string_view hlo_string = + R"( +HloModule SmallIntegerDot + +ENTRY SmallIntegerDot { + arg0 = s8[20,2] parameter(0) + arg1 = s8[2,20] parameter(1) + ROOT dot = s8[20,20] dot(arg0, arg1), lhs_contracting_dims={1}, rhs_contracting_dims={0} +} +)"; + + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); +} + +XLA_TEST_F(DotOperationTextTest, S32Dot) { + absl::string_view hlo_string = + R"( +HloModule SmallIntegerDot + +ENTRY SmallIntegerDot { + arg0 = s32[20,55] parameter(0) + arg1 = s32[55,20] parameter(1) + ROOT dot = s32[20,20] dot(arg0, arg1), lhs_contracting_dims={1}, rhs_contracting_dims={0} +} +)"; + + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); } XLA_TEST_F(DotOperationTextTest, GpuTransposeOutput) { From 912cbcda147c7ea1ebb4125ce203cf5bc352d527 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 19 Jul 2020 14:05:42 -0700 Subject: [PATCH 0799/2522] Fix dependencies so that target headers passes parse_headers for Apple builds PiperOrigin-RevId: 322047310 Change-Id: Ic102eff275ae62e065c26ffa12dfa0f512eb4605 --- tensorflow/core/framework/BUILD | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/framework/BUILD b/tensorflow/core/framework/BUILD index df454f4c876..8de5f74823e 100644 --- a/tensorflow/core/framework/BUILD +++ b/tensorflow/core/framework/BUILD @@ -677,7 +677,11 @@ cc_library( name = "type_index", hdrs = ["type_index.h"], visibility = ["//visibility:private"], - deps = ["//tensorflow/core/platform:types"], + deps = [ + "//tensorflow/core/platform:hash", + "//tensorflow/core/platform:stringpiece", + "//tensorflow/core/platform:types", + ], ) cc_library( From 553ced87c83c10567914ec14f55abc0bb864b7d5 Mon Sep 17 00:00:00 2001 From: Dimitris Vardoulakis Date: Sun, 19 Jul 2020 15:13:36 -0700 Subject: [PATCH 0800/2522] Remove a test that is timing out from coverage. PiperOrigin-RevId: 322051406 Change-Id: Ie4f8815f60857c7a1651552c679254ddcf42f7fd --- tensorflow/compiler/xla/tests/BUILD | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index b7e07c7d486..37188299dca 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1119,6 +1119,8 @@ xla_test( tags = [ "no_rocm", "optonly", + # Timed out on 2020-07-18 + "nozapfhahn", ], deps = CONVOLUTION_TEST_DEPS + [ "@com_google_absl//absl/memory", From 19a8ce8888bd451bc856d8a9c50d271bceb22130 Mon Sep 17 00:00:00 2001 From: Terry Heo Date: Sun, 19 Jul 2020 18:02:25 -0700 Subject: [PATCH 0801/2522] tflite: Refactor core/macros.h, c/c_api.h and c/common.h - Move TFL_CAPI_EXPORT to c/common.h to make them available for delegates. - Create lite:macros module to make core/macros.h available for delegates. PiperOrigin-RevId: 322061366 Change-Id: I671631687775c4d86b0056ccd5a20ca368874a58 --- tensorflow/lite/BUILD | 6 +++++- tensorflow/lite/c/c_api.h | 14 -------------- tensorflow/lite/c/common.h | 16 ++++++++++++++++ tensorflow/lite/delegates/flex/BUILD | 1 + tensorflow/lite/delegates/flex/delegate.cc | 9 ++++----- .../lite/delegates/gpu/cl/gpu_api_delegate.h | 14 -------------- tensorflow/lite/delegates/gpu/delegate.h | 14 -------------- tensorflow/lite/delegates/gpu/gl_delegate.h | 14 -------------- tensorflow/lite/delegates/gpu/metal_delegate.h | 14 +------------- .../lite/delegates/hexagon/hexagon_delegate.h | 14 -------------- .../tools/benchmark/experimental/c/c_api_types.h | 16 ++++++++++++++++ 11 files changed, 43 insertions(+), 89 deletions(-) diff --git a/tensorflow/lite/BUILD b/tensorflow/lite/BUILD index 1c0882ef0aa..61b9972c4d9 100644 --- a/tensorflow/lite/BUILD +++ b/tensorflow/lite/BUILD @@ -650,10 +650,14 @@ cc_test( cc_library( name = "shared_library", hdrs = ["shared_library.h"], - copts = TFLITE_DEFAULT_COPTS, linkopts = if_not_windows(["-ldl"]), ) +cc_library( + name = "macros", + hdrs = ["core/macros.h"], +) + # Shared lib target for convenience, pulls in the core runtime and builtin ops. # Note: This target is not yet finalized, and the exact set of exported (C/C++) # APIs is subject to change. The output library name is platform dependent: diff --git a/tensorflow/lite/c/c_api.h b/tensorflow/lite/c/c_api.h index 754fc3b8bbd..880b80e69b4 100644 --- a/tensorflow/lite/c/c_api.h +++ b/tensorflow/lite/c/c_api.h @@ -66,20 +66,6 @@ limitations under the License. /// TfLiteInterpreterOptionsDelete(options); /// TfLiteModelDelete(model); -#ifdef SWIG -#define TFL_CAPI_EXPORT -#else -#if defined(_WIN32) -#ifdef TFL_COMPILE_LIBRARY -#define TFL_CAPI_EXPORT __declspec(dllexport) -#else -#define TFL_CAPI_EXPORT __declspec(dllimport) -#endif // TFL_COMPILE_LIBRARY -#else -#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) -#endif // _WIN32 -#endif // SWIG - #ifdef __cplusplus extern "C" { #endif // __cplusplus diff --git a/tensorflow/lite/c/common.h b/tensorflow/lite/c/common.h index 89b25892914..142ee34317e 100644 --- a/tensorflow/lite/c/common.h +++ b/tensorflow/lite/c/common.h @@ -233,6 +233,22 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a); } \ } while (0) +// Define TFL_CAPI_EXPORT macro to export a function properly with a shared +// library. +#ifdef SWIG +#define TFL_CAPI_EXPORT +#else +#if defined(_WIN32) +#ifdef TFL_COMPILE_LIBRARY +#define TFL_CAPI_EXPORT __declspec(dllexport) +#else +#define TFL_CAPI_EXPORT __declspec(dllimport) +#endif // TFL_COMPILE_LIBRARY +#else +#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) +#endif // _WIN32 +#endif // SWIG + // Single-precision complex data type compatible with the C99 definition. typedef struct TfLiteComplex64 { float re, im; // real and imaginary parts, respectively. diff --git a/tensorflow/lite/delegates/flex/BUILD b/tensorflow/lite/delegates/flex/BUILD index 8320ecebf9a..7039a9fa952 100644 --- a/tensorflow/lite/delegates/flex/BUILD +++ b/tensorflow/lite/delegates/flex/BUILD @@ -92,6 +92,7 @@ cc_library( "//tensorflow/lite/core/api", "//tensorflow/lite/c:common", "//tensorflow/lite:kernel_api", + "//tensorflow/lite:macros", "//tensorflow/lite:minimal_logging", "//tensorflow/lite:string", "//tensorflow/lite:string_util", diff --git a/tensorflow/lite/delegates/flex/delegate.cc b/tensorflow/lite/delegates/flex/delegate.cc index f85b5e60f91..0bbd50a7fbd 100644 --- a/tensorflow/lite/delegates/flex/delegate.cc +++ b/tensorflow/lite/delegates/flex/delegate.cc @@ -19,7 +19,9 @@ limitations under the License. #include "absl/strings/str_cat.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/lite/c/common.h" #include "tensorflow/lite/context_util.h" +#include "tensorflow/lite/core/macros.h" #include "tensorflow/lite/delegates/flex/buffer_map.h" #include "tensorflow/lite/delegates/flex/kernel.h" #include "tensorflow/lite/delegates/flex/util.h" @@ -30,7 +32,7 @@ limitations under the License. namespace tflite { // Corresponding weak declaration found in lite/interpreter_builder.cc. -#if !defined(_WIN32) +#if TFLITE_HAS_ATTRIBUTE_WEAK // If weak symbol is not supported (Windows), it can use // TF_AcquireFlexDelegate() path instead. TfLiteDelegateUniquePtr AcquireFlexDelegate() { @@ -145,10 +147,7 @@ TfLiteStatus FlexDelegate::CopyFromBufferHandle( // interpreter_build.cc. To export the function name globally, the function name // must be matched with patterns in tf_version_script.lds extern "C" { -#if defined(_WIN32) -__declspec(dllexport) -#endif - tflite::TfLiteDelegateUniquePtr TF_AcquireFlexDelegate() { +TFL_CAPI_EXPORT tflite::TfLiteDelegateUniquePtr TF_AcquireFlexDelegate() { return tflite::FlexDelegate::Create(); } } // extern "C" diff --git a/tensorflow/lite/delegates/gpu/cl/gpu_api_delegate.h b/tensorflow/lite/delegates/gpu/cl/gpu_api_delegate.h index 1a9fb73e6ab..bef67c2f6eb 100644 --- a/tensorflow/lite/delegates/gpu/cl/gpu_api_delegate.h +++ b/tensorflow/lite/delegates/gpu/cl/gpu_api_delegate.h @@ -23,20 +23,6 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/delegates/gpu/delegate.h" -#ifdef SWIG -#define TFL_CAPI_EXPORT -#else -#if defined(_WIN32) -#ifdef TFL_COMPILE_LIBRARY -#define TFL_CAPI_EXPORT __declspec(dllexport) -#else -#define TFL_CAPI_EXPORT __declspec(dllimport) -#endif // TFL_COMPILE_LIBRARY -#else -#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) -#endif // _WIN32 -#endif // SWIG - #ifdef __cplusplus extern "C" { #endif // __cplusplus diff --git a/tensorflow/lite/delegates/gpu/delegate.h b/tensorflow/lite/delegates/gpu/delegate.h index f03392d9a3c..9af586bfd75 100644 --- a/tensorflow/lite/delegates/gpu/delegate.h +++ b/tensorflow/lite/delegates/gpu/delegate.h @@ -20,20 +20,6 @@ limitations under the License. #include "tensorflow/lite/c/common.h" -#ifdef SWIG -#define TFL_CAPI_EXPORT -#else -#if defined(_WIN32) -#ifdef TFL_COMPILE_LIBRARY -#define TFL_CAPI_EXPORT __declspec(dllexport) -#else -#define TFL_CAPI_EXPORT __declspec(dllimport) -#endif // TFL_COMPILE_LIBRARY -#else -#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) -#endif // _WIN32 -#endif // SWIG - #ifdef __cplusplus extern "C" { #endif // __cplusplus diff --git a/tensorflow/lite/delegates/gpu/gl_delegate.h b/tensorflow/lite/delegates/gpu/gl_delegate.h index fa8eec2ad6b..e6efd646fc3 100644 --- a/tensorflow/lite/delegates/gpu/gl_delegate.h +++ b/tensorflow/lite/delegates/gpu/gl_delegate.h @@ -22,20 +22,6 @@ limitations under the License. #include "absl/base/macros.h" #include "tensorflow/lite/c/common.h" -#ifdef SWIG -#define TFL_CAPI_EXPORT -#else -#if defined(_WIN32) -#ifdef TFL_COMPILE_LIBRARY -#define TFL_CAPI_EXPORT __declspec(dllexport) -#else -#define TFL_CAPI_EXPORT __declspec(dllimport) -#endif // TFL_COMPILE_LIBRARY -#else -#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) -#endif // _WIN32 -#endif // SWIG - #ifdef __cplusplus extern "C" { #endif // __cplusplus diff --git a/tensorflow/lite/delegates/gpu/metal_delegate.h b/tensorflow/lite/delegates/gpu/metal_delegate.h index 1cb660c42d0..e4bdba36799 100644 --- a/tensorflow/lite/delegates/gpu/metal_delegate.h +++ b/tensorflow/lite/delegates/gpu/metal_delegate.h @@ -16,19 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_LITE_DELEGATES_GPU_METAL_DELEGATE_H_ #define TENSORFLOW_LITE_DELEGATES_GPU_METAL_DELEGATE_H_ -#ifdef SWIG -#define TFL_CAPI_EXPORT -#else -#if defined(_WIN32) -#ifdef TFL_COMPILE_LIBRARY -#define TFL_CAPI_EXPORT __declspec(dllexport) -#else -#define TFL_CAPI_EXPORT __declspec(dllimport) -#endif // TFL_COMPILE_LIBRARY -#else -#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) -#endif // _WIN32 -#endif // SWIG +#include "tensorflow/lite/c/common.h" #ifdef __cplusplus extern "C" { diff --git a/tensorflow/lite/delegates/hexagon/hexagon_delegate.h b/tensorflow/lite/delegates/hexagon/hexagon_delegate.h index 264a132b995..931b02e4fa5 100644 --- a/tensorflow/lite/delegates/hexagon/hexagon_delegate.h +++ b/tensorflow/lite/delegates/hexagon/hexagon_delegate.h @@ -17,20 +17,6 @@ limitations under the License. #include "tensorflow/lite/c/common.h" -#ifdef SWIG -#define TFL_CAPI_EXPORT -#else -#if defined(_WIN32) -#ifdef TFL_COMPILE_LIBRARY -#define TFL_CAPI_EXPORT __declspec(dllexport) -#else -#define TFL_CAPI_EXPORT __declspec(dllimport) -#endif // TFL_COMPILE_LIBRARY -#else -#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) -#endif // _WIN32 -#endif // SWIG - #ifdef __cplusplus extern "C" { #endif // __cplusplus diff --git a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h index 89b25892914..142ee34317e 100644 --- a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h +++ b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h @@ -233,6 +233,22 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a); } \ } while (0) +// Define TFL_CAPI_EXPORT macro to export a function properly with a shared +// library. +#ifdef SWIG +#define TFL_CAPI_EXPORT +#else +#if defined(_WIN32) +#ifdef TFL_COMPILE_LIBRARY +#define TFL_CAPI_EXPORT __declspec(dllexport) +#else +#define TFL_CAPI_EXPORT __declspec(dllimport) +#endif // TFL_COMPILE_LIBRARY +#else +#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) +#endif // _WIN32 +#endif // SWIG + // Single-precision complex data type compatible with the C99 definition. typedef struct TfLiteComplex64 { float re, im; // real and imaginary parts, respectively. From 304bfa2fb47f117d2ef4454ba20528f9a196bf41 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Sun, 19 Jul 2020 18:44:03 -0700 Subject: [PATCH 0802/2522] Remove mutex lock from the ProcessState::GetCPUAllocator. PiperOrigin-RevId: 322064244 Change-Id: I87f4abd2a8d578bb6c0c2d9ef84a2fc9e552d8cc --- tensorflow/core/common_runtime/process_state.cc | 14 +++++++++++++- tensorflow/core/common_runtime/process_state.h | 7 +++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/common_runtime/process_state.cc b/tensorflow/core/common_runtime/process_state.cc index 19f7a985f3e..300e5b9c6ea 100644 --- a/tensorflow/core/common_runtime/process_state.cc +++ b/tensorflow/core/common_runtime/process_state.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/process_state.h" +#include #include #include @@ -42,7 +43,8 @@ namespace tensorflow { return instance; } -ProcessState::ProcessState() : numa_enabled_(false) {} +ProcessState::ProcessState() + : numa_enabled_(false), cpu_allocators_cached_(0) {} string ProcessState::MemDesc::DebugString() { return strings::StrCat((loc == CPU ? "CPU " : "GPU "), dev_index, @@ -61,6 +63,12 @@ ProcessState::MemDesc ProcessState::PtrType(const void* ptr) { Allocator* ProcessState::GetCPUAllocator(int numa_node) { if (!numa_enabled_ || numa_node == port::kNUMANoAffinity) numa_node = 0; + + // Check if allocator for the numa node is in lock-free cache. + if (numa_node < cpu_allocators_cached_.load(std::memory_order_acquire)) { + return cpu_allocators_cache_[numa_node]; + } + mutex_lock lock(mu_); while (cpu_allocators_.size() <= static_cast(numa_node)) { // If visitors have been defined we need an Allocator built from @@ -115,6 +123,10 @@ Allocator* ProcessState::GetCPUAllocator(int numa_node) { allocator = new TrackingAllocator(allocator, true); } cpu_allocators_.push_back(allocator); + if (cpu_allocators_.size() < cpu_allocators_cache_.max_size()) { + cpu_allocators_cache_[cpu_allocators_.size() - 1] = allocator; + cpu_allocators_cached_.fetch_add(1, std::memory_order_release); + } if (!sub_allocator) { DCHECK(cpu_alloc_visitors_.empty() && cpu_free_visitors_.empty()); } diff --git a/tensorflow/core/common_runtime/process_state.h b/tensorflow/core/common_runtime/process_state.h index a833c22db1c..92dd680ca1a 100644 --- a/tensorflow/core/common_runtime/process_state.h +++ b/tensorflow/core/common_runtime/process_state.h @@ -102,6 +102,13 @@ class ProcessState : public ProcessStateInterface { std::vector cpu_alloc_visitors_ TF_GUARDED_BY(mu_); std::vector cpu_free_visitors_ TF_GUARDED_BY(mu_); + // A cache of cpu allocators indexed by a numa node. Used as a fast path to + // get CPU allocator by numa node id without locking the mutex. We can't use + // `cpu_allocators_` storage in the lock-free path because concurrent + // operation can deallocate the vector storage. + std::atomic cpu_allocators_cached_; + std::array cpu_allocators_cache_; + // Optional RecordingAllocators that wrap the corresponding // Allocators for runtime attribute use analysis. MDMap mem_desc_map_; From ee2a910faa6bcffb5f1b8dfb55b861deed6de234 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Sun, 19 Jul 2020 20:26:21 -0700 Subject: [PATCH 0803/2522] Remove usage of private API has_weights. PiperOrigin-RevId: 322070903 Change-Id: Icd34e840fc067c38adfddd23ccb1169bf1af46b7 --- tensorflow/python/keras/engine/base_layer.py | 4 ++-- tensorflow/python/keras/engine/base_layer_utils.py | 7 +++++++ tensorflow/python/keras/engine/base_layer_v1.py | 4 ++-- tensorflow/python/keras/engine/training.py | 2 +- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py index d4c4c348742..e7c394e614d 100644 --- a/tensorflow/python/keras/engine/base_layer.py +++ b/tensorflow/python/keras/engine/base_layer.py @@ -2739,7 +2739,7 @@ class Layer(module.Module, version_utils.LayerVersionSelector): super(tracking.AutoTrackable, self).__delattr__(name) if (isinstance(existing_value, Layer) - or trackable_layer_utils.has_weights(existing_value)): + or base_layer_utils.has_weights(existing_value)): super(tracking.AutoTrackable, self).__setattr__( '_layers', [l for l in self._layers if l is not existing_value]) @@ -2789,7 +2789,7 @@ class Layer(module.Module, version_utils.LayerVersionSelector): # Be careful about metric if it becomes a Module in future. # Append value to self._layers if relevant if (getattr(self, '_auto_track_sub_layers', True) and - (isinstance(value, Layer) or trackable_layer_utils.has_weights(value))): + (isinstance(value, Layer) or base_layer_utils.has_weights(value))): self._maybe_create_attribute('_layers', []) # We need to check object identity to avoid de-duplicating empty # container types which compare equal. diff --git a/tensorflow/python/keras/engine/base_layer_utils.py b/tensorflow/python/keras/engine/base_layer_utils.py index bd7387258c1..4734fbb6ba9 100644 --- a/tensorflow/python/keras/engine/base_layer_utils.py +++ b/tensorflow/python/keras/engine/base_layer_utils.py @@ -858,6 +858,13 @@ def is_split_variable(v): return hasattr(v, '_variable_list') or hasattr(v, '_variables') +def has_weights(obj): + obj_type = type(obj) + return (hasattr(obj_type, 'trainable_weights') and + hasattr(obj_type, 'non_trainable_weights') and + not isinstance(obj, type)) + + # TODO(kathywu): This is a temporary hack. When a network of layers is revived # from SavedModel, only the top-level layer will have losses. This causes issues # in eager mode because the child layers may have graph losses diff --git a/tensorflow/python/keras/engine/base_layer_v1.py b/tensorflow/python/keras/engine/base_layer_v1.py index a459eaefbc3..e3cc738c434 100644 --- a/tensorflow/python/keras/engine/base_layer_v1.py +++ b/tensorflow/python/keras/engine/base_layer_v1.py @@ -2194,7 +2194,7 @@ class Layer(base_layer.Layer): super(tracking.AutoTrackable, self).__delattr__(name) if (isinstance(existing_value, Layer) - or trackable_layer_utils.has_weights(existing_value)): + or base_layer_utils.has_weights(existing_value)): super(tracking.AutoTrackable, self).__setattr__( '_layers', [l for l in self._layers if l is not existing_value]) @@ -2244,7 +2244,7 @@ class Layer(base_layer.Layer): # Be careful about metric if it becomes a Module in future. # Append value to self._layers if relevant if (getattr(self, '_auto_track_sub_layers', True) and - (isinstance(value, Layer) or trackable_layer_utils.has_weights(value))): + (isinstance(value, Layer) or base_layer_utils.has_weights(value))): self._maybe_create_attribute('_layers', []) # We need to check object identity to avoid de-duplicating empty # container types which compare equal. diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index ad72251ed9d..bbab6fc7f98 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -329,7 +329,7 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): if all( isinstance(v, (base_layer.Layer, data_structures.TrackableDataStructure)) or - trackable_layer_utils.has_weights(v) for v in nest.flatten(value)): + base_layer_utils.has_weights(v) for v in nest.flatten(value)): try: self._base_model_initialized except AttributeError: From 667a9b0b35133a3ca1bbe8f2fce17160df636aec Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 19 Jul 2020 20:28:14 -0700 Subject: [PATCH 0804/2522] Delay the creation of Params in Tf2XlaRewriter until it is needed By doing some of the validation check before creating the Params save us from having to create them for ops not supported by this legalization pass. PiperOrigin-RevId: 322071012 Change-Id: I639027eaafac1a8c3435b9908967ede43b3110ca --- .../mlir/xla/transforms/legalize_tf_with_tf2xla.cc | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc index 5ba31318c31..c9ccfe90535 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc @@ -226,7 +226,6 @@ class Tf2XlaRewriter { static LogicalResult RewriteOp(Operation* op, OpBuilder& builder, const std::string& device_type) { Tf2XlaRewriter rewriter(op, builder, device_type); - if (failed(rewriter.PrepareParams())) return failure(); return rewriter.LegalizeOp(); } @@ -235,10 +234,12 @@ class Tf2XlaRewriter { const std::string& device_type) : op_(op), device_type_(device_type), - hlo_builder_(op->getName().getStringRef().str(), builder, - op->getLoc()) {} + hlo_builder_(op->getName().getStringRef().str(), builder, op->getLoc()), + context_(nullptr) {} - ~Tf2XlaRewriter() { context_->Unref(); } + ~Tf2XlaRewriter() { + if (context_) context_->Unref(); + } // Prepares OpKernelContext params common to all the ops. // Emits an error on failure. @@ -336,6 +337,8 @@ LogicalResult Tf2XlaRewriter::LegalizeOp() { << nodedef_or.status().ToString(); } + if (failed(PrepareParams())) return failure(); + std::shared_ptr props; tensorflow::Status status = tensorflow::NodeProperties::CreateFromNodeDef( *nodedef_or.ValueOrDie(), From da51cb0dc8c5f759bf3964af5f2326eca8858399 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Sun, 19 Jul 2020 20:56:29 -0700 Subject: [PATCH 0805/2522] Clean up unused member 'max_execution_batch_size_'. PiperOrigin-RevId: 322073296 Change-Id: If825232373e57b3a004ad06c1045b565cc463c24 --- tensorflow/core/kernels/batch_kernels.cc | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tensorflow/core/kernels/batch_kernels.cc b/tensorflow/core/kernels/batch_kernels.cc index 20885a89141..0dacaf30443 100644 --- a/tensorflow/core/kernels/batch_kernels.cc +++ b/tensorflow/core/kernels/batch_kernels.cc @@ -1024,11 +1024,6 @@ class BatchFunctionKernel : public AsyncOpKernel { enable_large_batch_splitting_ = false; } - if (enable_large_batch_splitting_ && (!allowed_batch_sizes_.empty())) { - max_execution_batch_size_ = *allowed_batch_sizes_.rbegin(); - } else { - max_execution_batch_size_ = max_batch_size_; - } OP_REQUIRES_OK(c, ValidateAllowedBatchSizes()); } @@ -1088,7 +1083,6 @@ class BatchFunctionKernel : public AsyncOpKernel { string batcher_queue_; int32 num_batch_threads_; int32 max_batch_size_; - int32 max_execution_batch_size_; int32 batch_timeout_micros_; int32 max_enqueued_batches_; std::vector allowed_batch_sizes_; From 2fd3f75f05b99cd2d3c36ac57384633e7707d307 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 19 Jul 2020 20:58:34 -0700 Subject: [PATCH 0806/2522] nnapi: set android_sdk_version for non-Android platforms When running on Linux where libneuralnetworks.so is available, set the android_sdk_version to match the loaded libneuralnetworks.so. This change will allow Tensorflow to use NNAPI in ChromeOS. PiperOrigin-RevId: 322073469 Change-Id: Ica6ee5f00ead5a547e5a19f9b7001266a4d09f70 --- tensorflow/lite/nnapi/nnapi_implementation.cc | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tensorflow/lite/nnapi/nnapi_implementation.cc b/tensorflow/lite/nnapi/nnapi_implementation.cc index 1d4c86f8456..52def4de8fd 100644 --- a/tensorflow/lite/nnapi/nnapi_implementation.cc +++ b/tensorflow/lite/nnapi/nnapi_implementation.cc @@ -76,6 +76,31 @@ int ASharedMemory_create(const char* name, size_t size) { } return fd; } + +// Determine the NnApi version from loaded entry points +uint32_t CalculateAndroidSdkVersion(NnApi const& nnapi) { + // Test for specific NNAPI 1.0, 1.1, 1.2 and 1.3 functions + bool has_10 = nnapi.ANeuralNetworksMemory_createFromFd != nullptr; + bool has_11 = + nnapi.ANeuralNetworksModel_relaxComputationFloat32toFloat16 != nullptr; + bool has_12 = nnapi.ANeuralNetworks_getDeviceCount != nullptr; + bool has_13 = nnapi.ANeuralNetworksCompilation_setTimeout != nullptr; + + uint32_t sdk_version = 0; + if (has_10) { + sdk_version = 27; + } + if (sdk_version == 27 && has_11) { + sdk_version = 28; + } + if (sdk_version == 28 && has_12) { + sdk_version = 29; + } + if (sdk_version == 29 && has_13) { + sdk_version = 30; + } + return sdk_version; +} #endif // __ANDROID__ #define LOAD_FUNCTION(handle, name) \ @@ -244,6 +269,15 @@ const NnApi LoadNnApi() { LOAD_FUNCTION_OPTIONAL(libneuralnetworks, ANeuralNetworksExecution_startComputeWithDependencies); +#ifndef __ANDROID__ + // If libneuralnetworks.so is loaded, but android_sdk_version is not set, + // then determine android_sdk_version by testing which functions are + // available. + if (nnapi.nnapi_exists && nnapi.android_sdk_version == 0) { + nnapi.android_sdk_version = CalculateAndroidSdkVersion(nnapi); + } +#endif // __ANDROID__ + return nnapi; } From 9d468d0ca62ad9050cb1946cb0d23ec24ec577ca Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Sun, 19 Jul 2020 21:31:55 -0700 Subject: [PATCH 0807/2522] Update `run_deprecated_v1` tests in saver_test with graph scope. PiperOrigin-RevId: 322075874 Change-Id: I658922986a6da5d867e34c07219c44cefbfbf7a7 --- tensorflow/python/training/saver_test.py | 413 ++++++++++++----------- 1 file changed, 210 insertions(+), 203 deletions(-) diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index 75608b8dbf5..32cdc2ed5e4 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -166,24 +166,24 @@ class SaverTest(test.TestCase): def testResourceBasic(self): self.basicSaveRestore(resource_variable_ops.ResourceVariable) - @test_util.run_deprecated_v1 def testResourceColocation(self): - partitioner = partitioned_variables.fixed_size_partitioner(num_shards=2) - with ops_lib.device("/job:ps/device:GPU:0"): - v = variable_scope.get_variable("v0", - shape=[10, 2], - partitioner=partitioner, - use_resource=True) - saver_module.Saver({"v0": v}).build() - save_op = None - for op in ops_lib.get_default_graph().get_operations(): - if op.type == "SaveV2": - save_op = op - break - assert save_op is not None - for save_inp in save_op.inputs[3:]: - # Input to SaveV2 op is placed on CPU of the same device as the Variable. - self.assertEqual("/job:ps/device:CPU:0", save_inp.device) + # train.Saver is V1 only API. + with ops_lib.Graph().as_default(): + partitioner = partitioned_variables.fixed_size_partitioner(num_shards=2) + with ops_lib.device("/job:ps/device:GPU:0"): + v = variable_scope.get_variable( + "v0", shape=[10, 2], partitioner=partitioner, use_resource=True) + saver_module.Saver({"v0": v}).build() + save_op = None + for op in ops_lib.get_default_graph().get_operations(): + if op.type == "SaveV2": + save_op = op + break + assert save_op is not None + for save_inp in save_op.inputs[3:]: + # Input to SaveV2 op is placed on CPU of the same device as + # the Variable. + self.assertEqual("/job:ps/device:CPU:0", save_inp.device) def testResourceVariableReadOpsAddedDeterministically(self): graph_defs = [] @@ -297,7 +297,6 @@ class SaverTest(test.TestCase): not op.name.startswith("saver2/save/"))] self.assertEqual(ops_in_saver2_scope_but_not_save_scope, []) - @test_util.run_deprecated_v1 def testSaveCopyRestoreWithSaveRelativePaths(self): """Save, copy checkpoint dir and restore from copied dir. @@ -307,75 +306,79 @@ class SaverTest(test.TestCase): os.mkdir(save_dir1) save_path1 = os.path.join(save_dir1, "save_copy_restore") - # Build a graph with 2 parameter nodes, and Save and - # Restore nodes for them. - v0 = variables.VariableV1(10.0, name="v0") - v1 = variables.VariableV1(20.0, name="v1") - v2 = saver_test_utils.CheckpointedOp(name="v2") - v2_init = v2.insert("k1", 30.0) - save = saver_module.Saver( - var_list={ - "v0": v0, - "v1": v1, - "v2": v2.saveable}, - restore_sequentially=True, - save_relative_paths=True) - init_all_op = [variables.global_variables_initializer(), v2_init] - - with self.cached_session() as sess: - # Initialize all variables - self.evaluate(init_all_op) - - # Check that the parameter nodes have been initialized. - self.assertEqual(10.0, self.evaluate(v0)) - self.assertEqual(20.0, self.evaluate(v1)) - self.assertEqual(b"k1", self.evaluate(v2.keys())) - self.assertEqual(30.0, self.evaluate(v2.values())) - - # Save the initialized values in the file at "save_path" - val = save.save(sess, save_path1) - self.assertTrue(isinstance(val, six.string_types)) - self.assertEqual(save_path1, val) - - self.assertEqual( - checkpoint_management.latest_checkpoint(save_dir1), save_path1) - save_dir2 = os.path.join(self.get_temp_dir(), "save_dir2") - os.renames(save_dir1, save_dir2) - save_path2 = os.path.join(save_dir2, "save_copy_restore") - self.assertEqual( - checkpoint_management.latest_checkpoint(save_dir2), save_path2) - - # Start a second session. In that session the parameter nodes - # have not been initialized either. - with self.cached_session() as sess: - v0 = variables.VariableV1(-1.0, name="v0") - v1 = variables.VariableV1(-1.0, name="v1") + # train.Saver is V1 only API. + with ops_lib.Graph().as_default(): + # Build a graph with 2 parameter nodes, and Save and + # Restore nodes for them. + v0 = variables.VariableV1(10.0, name="v0") + v1 = variables.VariableV1(20.0, name="v1") v2 = saver_test_utils.CheckpointedOp(name="v2") - save = saver_module.Saver({"v0": v0, "v1": v1, "v2": v2.saveable}) + v2_init = v2.insert("k1", 30.0) + save = saver_module.Saver( + var_list={ + "v0": v0, + "v1": v1, + "v2": v2.saveable + }, + restore_sequentially=True, + save_relative_paths=True) + init_all_op = [variables.global_variables_initializer(), v2_init] + + with self.cached_session() as sess: + # Initialize all variables + self.evaluate(init_all_op) + + # Check that the parameter nodes have been initialized. + self.assertEqual(10.0, self.evaluate(v0)) + self.assertEqual(20.0, self.evaluate(v1)) + self.assertEqual(b"k1", self.evaluate(v2.keys())) + self.assertEqual(30.0, self.evaluate(v2.values())) + + # Save the initialized values in the file at "save_path" + val = save.save(sess, save_path1) + self.assertTrue(isinstance(val, six.string_types)) + self.assertEqual(save_path1, val) - # Assert that the variables are not initialized. self.assertEqual( - len(variables.report_uninitialized_variables().eval()), 2) - self.assertEqual(0, len(self.evaluate(v2.keys()))) - self.assertEqual(0, len(self.evaluate(v2.values()))) + checkpoint_management.latest_checkpoint(save_dir1), save_path1) + save_dir2 = os.path.join(self.get_temp_dir(), "save_dir2") + os.renames(save_dir1, save_dir2) + save_path2 = os.path.join(save_dir2, "save_copy_restore") + self.assertEqual( + checkpoint_management.latest_checkpoint(save_dir2), save_path2) - # Restore the saved values in the parameter nodes. - save.restore(sess, save_path2) - # Check that the parameter nodes have been restored. - self.assertEqual(10.0, self.evaluate(v0)) - self.assertEqual(20.0, self.evaluate(v1)) - self.assertEqual(b"k1", self.evaluate(v2.keys())) - self.assertEqual(30.0, self.evaluate(v2.values())) + # Start a second session. In that session the parameter nodes + # have not been initialized either. + with self.cached_session() as sess: + v0 = variables.VariableV1(-1.0, name="v0") + v1 = variables.VariableV1(-1.0, name="v1") + v2 = saver_test_utils.CheckpointedOp(name="v2") + save = saver_module.Saver({"v0": v0, "v1": v1, "v2": v2.saveable}) + + # Assert that the variables are not initialized. + self.assertEqual( + len(variables.report_uninitialized_variables().eval()), 2) + self.assertEqual(0, len(self.evaluate(v2.keys()))) + self.assertEqual(0, len(self.evaluate(v2.values()))) + + # Restore the saved values in the parameter nodes. + save.restore(sess, save_path2) + # Check that the parameter nodes have been restored. + self.assertEqual(10.0, self.evaluate(v0)) + self.assertEqual(20.0, self.evaluate(v1)) + self.assertEqual(b"k1", self.evaluate(v2.keys())) + self.assertEqual(30.0, self.evaluate(v2.values())) - @test_util.run_deprecated_v1 def testFilenameTensor(self): - v0 = variables.VariableV1(0, name="v0") - filename = b"somerandomfilename" - save = saver_module.Saver({"v0": v0}, filename=filename) - with self.cached_session() as sess: - tensor = sess.graph.get_tensor_by_name( - save.saver_def.filename_tensor_name) - self.assertEqual(self.evaluate(tensor), filename) + # train.Saver is V1 only API. + with ops_lib.Graph().as_default(): + v0 = variables.VariableV1(0, name="v0") + filename = b"somerandomfilename" + save = saver_module.Saver({"v0": v0}, filename=filename) + with self.cached_session() as sess: + tensor = sess.graph.get_tensor_by_name( + save.saver_def.filename_tensor_name) + self.assertEqual(self.evaluate(tensor), filename) def testInvalidPath(self): v0 = variables.VariableV1(0, name="v0") @@ -558,15 +561,15 @@ class SaverTest(test.TestCase): # The cached readers should know to re-read the file. self._SaveAndLoad("var1", 1.1, 2.2, save_path) - @test_util.run_deprecated_v1 def testAllowEmpty(self): save_path = os.path.join(self.get_temp_dir(), "allow_empty") - with self.cached_session() as sess: + # train.Saver is V1 only API. + with ops_lib.Graph().as_default(), self.cached_session() as sess: _ = constant_op.constant(1) save = saver_module.Saver(allow_empty=True) val = save.save(sess, save_path) self.assertIsNone(val) - with self.cached_session() as sess: + with ops_lib.Graph().as_default(), self.cached_session() as sess: save = saver_module.Saver(allow_empty=True) save.restore(sess, save_path) @@ -986,9 +989,9 @@ class SaveRestoreShardedTest(test.TestCase): checkpoint_management.latest_checkpoint(self.get_temp_dir()), os.path.join(self.get_temp_dir(), "sharded_basics")) - @test_util.run_deprecated_v1 def testSaverDef(self): - with self.cached_session(): + # train.Saver is V1 only API. + with ops_lib.Graph().as_default(), self.cached_session(): v0 = variables.VariableV1(123, name="v0") save = saver_module.Saver({"v0": v0}, sharded=True) sd = save.as_saver_def() @@ -1004,7 +1007,8 @@ class SaveRestoreShardedTest(test.TestCase): call_saver_with_dict = False # updated by test loop below def _save(partitioner=None): - with self.session(graph=ops_lib.Graph()) as sess: + # train.Saver is V1 only API. + with ops_lib.Graph().as_default(), self.session() as sess: # Calls .eval() to return the ndarray that makes up the full variable. rnd = random_ops.random_uniform(var_full_shape).eval() @@ -1034,7 +1038,8 @@ class SaveRestoreShardedTest(test.TestCase): return rnd def _restore(partitioner=None): - with self.session(graph=ops_lib.Graph()) as sess: + # train.Saver is V1 only API. + with ops_lib.Graph().as_default(), self.session() as sess: if partitioner: new_vs = [ variable_scope.get_variable( @@ -1092,11 +1097,9 @@ class SaveRestoreShardedTest(test.TestCase): num_shards=3)) self.assertAllEqual(saved_full, restored_full) - @test_util.run_deprecated_v1 def testPartitionedVariable(self): self._testPartitionedVariables(use_resource=False) - @test_util.run_deprecated_v1 def testPartitionedResourceVariable(self): self._testPartitionedVariables(use_resource=True) @@ -1321,11 +1324,11 @@ class MaxToKeepTest(test.TestCase): # Deleted by the first helper. self.assertFalse(checkpoint_management.checkpoint_exists(s3)) - @test_util.run_deprecated_v1 def testNonSharded(self): save_dir = self._get_test_dir("max_to_keep_non_sharded") - with self.cached_session() as sess: + # train.Saver is V1 only API. + with ops_lib.Graph().as_default(), self.cached_session() as sess: v = variables.VariableV1(10.0, name="v") save = saver_module.Saver({"v": v}, max_to_keep=2) self.evaluate(variables.global_variables_initializer()) @@ -2016,29 +2019,29 @@ class MetaGraphTest(test.TestCase): self.assertEqual(33, len(meta_graph_def0.graph_def.node)) self.assertEqual(21, len(meta_graph_def1.graph_def.node)) - @test_util.run_deprecated_v1 def testBinaryAndTextFormat(self): test_dir = self._get_test_dir("binary_and_text") filename = os.path.join(test_dir, "metafile") - with self.session(graph=ops_lib.Graph()): + # train.Saver is V1 only API. + with ops_lib.Graph().as_default(), self.session(): # Creates a graph. variables.VariableV1(10.0, name="v0") # Exports the graph as binary format. saver_module.export_meta_graph(filename, as_text=False) - with self.session(graph=ops_lib.Graph()): + with ops_lib.Graph().as_default(), self.session(): # Imports the binary format graph. saver = saver_module.import_meta_graph(filename) self.assertIsNotNone(saver) # Exports the graph as text format. saver.export_meta_graph(filename, as_text=True) - with self.session(graph=ops_lib.Graph()): + with ops_lib.Graph().as_default(), self.session(): # Imports the text format graph. saver_module.import_meta_graph(filename) # Writes wrong contents to the file. graph_io.write_graph(saver.as_saver_def(), os.path.dirname(filename), os.path.basename(filename)) - with self.session(graph=ops_lib.Graph()): + with ops_lib.Graph().as_default(), self.session(): # Import should fail. with self.assertRaisesWithPredicateMatch(IOError, lambda e: "Cannot parse file"): @@ -2190,12 +2193,13 @@ class MetaGraphTest(test.TestCase): train_op = ops_lib.get_collection("train_op")[0] self.evaluate(train_op) - @test_util.run_deprecated_v1 def testGraphExtension(self): test_dir = self._get_test_dir("graph_extension") - self._testGraphExtensionSave(test_dir) - self._testGraphExtensionRestore(test_dir) - self._testRestoreFromTrainGraphWithControlContext(test_dir) + # train.Saver and train.import_meta_graph are V1 only APIs. + with ops_lib.Graph().as_default(): + self._testGraphExtensionSave(test_dir) + self._testGraphExtensionRestore(test_dir) + self._testRestoreFromTrainGraphWithControlContext(test_dir) def _testGradientSerDes(self, graph_fn): """Tests that gradients can be computed after exporting and importing. @@ -2342,13 +2346,13 @@ class MetaGraphTest(test.TestCase): self.assertEqual(o.summary, "") self.assertEqual(o.description, "") - @test_util.run_deprecated_v1 def testStripDefaultValuedAttrs(self): """Verifies that default valued attrs are stripped, unless disabled.""" # With strip_default_attrs enabled, attributes "T" (float32) and "Tout" # (complex64) in the "Complex" op must be removed. - with self.cached_session(): + # train.Saver and train.export_meta_graph are V1 only APIs. + with ops_lib.Graph().as_default(), self.cached_session(): real_num = variables.VariableV1(1.0, dtype=dtypes.float32, name="real") imag_num = variables.VariableV1(2.0, dtype=dtypes.float32, name="imag") math_ops.complex(real_num, imag_num, name="complex") @@ -2365,7 +2369,7 @@ class MetaGraphTest(test.TestCase): # With strip_default_attrs disabled, attributes "T" (float32) and "Tout" # (complex64) in the "Complex" op must *not* be removed, even if they map # to their defaults. - with self.session(graph=ops_lib.Graph()): + with ops_lib.Graph().as_default(), self.session(): real_num = variables.VariableV1(1.0, dtype=dtypes.float32, name="real") imag_num = variables.VariableV1(2.0, dtype=dtypes.float32, name="imag") math_ops.complex(real_num, imag_num, name="complex") @@ -2379,25 +2383,27 @@ class MetaGraphTest(test.TestCase): self.assertIn("T", node_def.attr) self.assertIn("Tout", node_def.attr) - @test_util.run_deprecated_v1 def testImportIntoNamescope(self): # Test that we can import a meta graph into a namescope. test_dir = self._get_test_dir("import_into_namescope") filename = os.path.join(test_dir, "ckpt") - image = array_ops.placeholder(dtypes.float32, [None, 784], name="image") - label = array_ops.placeholder(dtypes.float32, [None, 10], name="label") - with session.Session() as sess: - weights = variables.VariableV1( - random_ops.random_uniform([784, 10]), name="weights") - bias = variables.VariableV1(array_ops.zeros([10]), name="bias") - logit = nn_ops.relu(math_ops.matmul(image, weights) + bias, name="logits") - nn_ops.softmax(logit, name="prediction") - cost = nn_ops.softmax_cross_entropy_with_logits(labels=label, - logits=logit, name="cost") - adam.AdamOptimizer().minimize(cost, name="optimize") - saver = saver_module.Saver() - self.evaluate(variables.global_variables_initializer()) - saver.save(sess, filename) + # train.Saver is V1 only API. + with ops_lib.Graph().as_default(): + image = array_ops.placeholder(dtypes.float32, [None, 784], name="image") + label = array_ops.placeholder(dtypes.float32, [None, 10], name="label") + with session.Session() as sess: + weights = variables.VariableV1( + random_ops.random_uniform([784, 10]), name="weights") + bias = variables.VariableV1(array_ops.zeros([10]), name="bias") + logit = nn_ops.relu( + math_ops.matmul(image, weights) + bias, name="logits") + nn_ops.softmax(logit, name="prediction") + cost = nn_ops.softmax_cross_entropy_with_logits( + labels=label, logits=logit, name="cost") + adam.AdamOptimizer().minimize(cost, name="optimize") + saver = saver_module.Saver() + self.evaluate(variables.global_variables_initializer()) + saver.save(sess, filename) graph = ops_lib.Graph() with session.Session(graph=graph) as sess: @@ -2450,25 +2456,27 @@ class MetaGraphTest(test.TestCase): filename + ".meta", graph=graph_2, import_scope="my_scope") self.assertIsInstance(new_saver_3, saver_module.Saver) - @test_util.run_deprecated_v1 def testImportIntoImplicitNamescope(self): # Test that we can import a meta graph into an implicit namescope. test_dir = self._get_test_dir("import_into_namescope") filename = os.path.join(test_dir, "ckpt") - image = array_ops.placeholder(dtypes.float32, [None, 784], name="image") - label = array_ops.placeholder(dtypes.float32, [None, 10], name="label") - with session.Session() as sess: - weights = variables.VariableV1( - random_ops.random_uniform([784, 10]), name="weights") - bias = variables.VariableV1(array_ops.zeros([10]), name="bias") - logit = nn_ops.relu(math_ops.matmul(image, weights) + bias, name="logits") - nn_ops.softmax(logit, name="prediction") - cost = nn_ops.softmax_cross_entropy_with_logits(labels=label, - logits=logit, name="cost") - adam.AdamOptimizer().minimize(cost, name="optimize") - saver = saver_module.Saver() - self.evaluate(variables.global_variables_initializer()) - saver.save(sess, filename) + # train.Saver is V1 only API. + with ops_lib.Graph().as_default(): + image = array_ops.placeholder(dtypes.float32, [None, 784], name="image") + label = array_ops.placeholder(dtypes.float32, [None, 10], name="label") + with session.Session() as sess: + weights = variables.VariableV1( + random_ops.random_uniform([784, 10]), name="weights") + bias = variables.VariableV1(array_ops.zeros([10]), name="bias") + logit = nn_ops.relu( + math_ops.matmul(image, weights) + bias, name="logits") + nn_ops.softmax(logit, name="prediction") + cost = nn_ops.softmax_cross_entropy_with_logits( + labels=label, logits=logit, name="cost") + adam.AdamOptimizer().minimize(cost, name="optimize") + saver = saver_module.Saver() + self.evaluate(variables.global_variables_initializer()) + saver.save(sess, filename) graph = ops_lib.Graph() with session.Session(graph=graph) as sess: @@ -2573,7 +2581,6 @@ class CheckpointReaderTest(test.TestCase): _WRITE_VERSION = saver_pb2.SaverDef.V1 - @test_util.run_deprecated_v1 def testDebugString(self): # Builds a graph. v0 = variables.VariableV1( @@ -2734,7 +2741,7 @@ class ScopedGraphTest(test.TestCase): export_scope="hidden1") self.assertEqual(["biases:0", "weights:0"], sorted(var_list.keys())) - with self.session(graph=graph) as sess: + with graph.as_default(), self.session() as sess: self.evaluate(variables.global_variables_initializer()) saver = saver_module.Saver(var_list=var_list, max_to_keep=1) saver.save(sess, os.path.join(test_dir, ckpt_filename), write_state=False) @@ -2746,15 +2753,15 @@ class ScopedGraphTest(test.TestCase): with graph.as_default(): new_image = constant_op.constant( 1.2, dtypes.float32, shape=[100, 28], name="images") - var_list = meta_graph.import_scoped_meta_graph( - os.path.join(test_dir, exported_filename), - graph=graph, - input_map={"$unbound_inputs_images": new_image}, - import_scope="new_hidden1") - self.assertEqual(["biases:0", "weights:0"], sorted(var_list.keys())) - hidden1 = graph.as_graph_element("new_hidden1/Relu:0") - weights1 = graph.as_graph_element("new_hidden1/weights:0") - biases1 = graph.as_graph_element("new_hidden1/biases:0") + var_list = meta_graph.import_scoped_meta_graph( + os.path.join(test_dir, exported_filename), + graph=graph, + input_map={"$unbound_inputs_images": new_image}, + import_scope="new_hidden1") + self.assertEqual(["biases:0", "weights:0"], sorted(var_list.keys())) + hidden1 = graph.as_graph_element("new_hidden1/Relu:0") + weights1 = graph.as_graph_element("new_hidden1/weights:0") + biases1 = graph.as_graph_element("new_hidden1/biases:0") with graph.as_default(): # Hidden 2 @@ -2794,7 +2801,7 @@ class ScopedGraphTest(test.TestCase): set(variables.global_variables()) - set(var_list.keys())) init_rest_op = variables.variables_initializer(rest_variables) - with self.session(graph=graph) as sess: + with graph.as_default(), self.session() as sess: saver = saver_module.Saver(var_list=var_list, max_to_keep=1) saver.restore(sess, os.path.join(test_dir, ckpt_filename)) # Verify that we have restored weights1 and biases1. @@ -2805,7 +2812,6 @@ class ScopedGraphTest(test.TestCase): # Verifies that we can save the subgraph under "hidden1" and restore it # into "new_hidden1" in the new graph. - @test_util.run_deprecated_v1 def testScopedSaveAndRestore(self): test_dir = self._get_test_dir("scoped_export_import") ckpt_filename = "ckpt" @@ -2815,7 +2821,6 @@ class ScopedGraphTest(test.TestCase): # Verifies that we can copy the subgraph under "hidden1" and copy it # to different name scope in the same graph or different graph. - @test_util.run_deprecated_v1 def testCopyScopedGraph(self): test_dir = self._get_test_dir("scoped_copy") saver0_ckpt = os.path.join(test_dir, "saver0.ckpt") @@ -2830,7 +2835,7 @@ class ScopedGraphTest(test.TestCase): nn_ops.relu(math_ops.matmul(images, weights1) + biases1, name="relu") # Run the graph and save scoped checkpoint. - with self.session(graph=graph1) as sess: + with graph1.as_default(), self.session(graph=graph1) as sess: self.evaluate(variables.global_variables_initializer()) _, var_list_1 = meta_graph.export_scoped_meta_graph( export_scope="hidden1") @@ -2851,7 +2856,7 @@ class ScopedGraphTest(test.TestCase): var_list_2 = meta_graph.copy_scoped_meta_graph( from_scope="hidden1", to_scope="hidden2") - with self.session(graph=graph1) as sess: + with graph1.as_default(), self.session(graph=graph1) as sess: saver1 = saver_module.Saver(var_list=var_list_1, max_to_keep=1) saver1.restore(sess, saver0_ckpt) saver2 = saver_module.Saver(var_list=var_list_2, max_to_keep=1) @@ -2861,18 +2866,18 @@ class ScopedGraphTest(test.TestCase): # Verifies copy to different graph. graph2 = ops_lib.Graph() - new_var_list_1 = meta_graph.copy_scoped_meta_graph( - from_scope="hidden1", - to_scope="new_hidden1", - from_graph=graph1, - to_graph=graph2) + with graph2.as_default(): + new_var_list_1 = meta_graph.copy_scoped_meta_graph( + from_scope="hidden1", + to_scope="new_hidden1", + from_graph=graph1, + to_graph=graph2) - with self.session(graph=graph2) as sess: - saver3 = saver_module.Saver(var_list=new_var_list_1, max_to_keep=1) - saver3.restore(sess, saver0_ckpt) - self.assertAllClose(expected, sess.run("new_hidden1/relu:0")) + with self.session() as sess: + saver3 = saver_module.Saver(var_list=new_var_list_1, max_to_keep=1) + saver3.restore(sess, saver0_ckpt) + self.assertAllClose(expected, sess.run("new_hidden1/relu:0")) - @test_util.run_deprecated_v1 def testExportGraphDefWithScope(self): test_dir = self._get_test_dir("export_graph_def") saver0_ckpt = os.path.join(test_dir, "saver0.ckpt") @@ -2886,30 +2891,30 @@ class ScopedGraphTest(test.TestCase): biases1 = variables.VariableV1([0.1] * 3, name="biases") nn_ops.relu(math_ops.matmul(images, weights1) + biases1, name="relu") - # Run the graph and save scoped checkpoint. - with self.session(graph=graph1) as sess: - self.evaluate(variables.global_variables_initializer()) - _, var_list_1 = meta_graph.export_scoped_meta_graph( - graph_def=graph1.as_graph_def(), export_scope="hidden1") - saver = saver_module.Saver(var_list=var_list_1, max_to_keep=1) - saver.save(sess, saver0_ckpt, write_state=False) + # Run the graph and save scoped checkpoint. + with self.session(graph=graph1) as sess: + self.evaluate(variables.global_variables_initializer()) + _, var_list_1 = meta_graph.export_scoped_meta_graph( + graph_def=graph1.as_graph_def(), export_scope="hidden1") + saver = saver_module.Saver(var_list=var_list_1, max_to_keep=1) + saver.save(sess, saver0_ckpt, write_state=False) expected = np.reshape([[5.0999999, 7.0999999, 9.10000038] * 3], (3, 3)) # Verifies that we can run successfully after restoring. graph2 = ops_lib.Graph() - new_var_list_1 = meta_graph.copy_scoped_meta_graph( - from_scope="hidden1", - to_scope="new_hidden1", - from_graph=graph1, - to_graph=graph2) + with graph2.as_default(): + new_var_list_1 = meta_graph.copy_scoped_meta_graph( + from_scope="hidden1", + to_scope="new_hidden1", + from_graph=graph1, + to_graph=graph2) - with self.session(graph=graph2) as sess: - saver3 = saver_module.Saver(var_list=new_var_list_1, max_to_keep=1) - saver3.restore(sess, saver0_ckpt) - self.assertAllClose(expected, sess.run("new_hidden1/relu:0")) + with self.session(graph=graph2) as sess: + saver3 = saver_module.Saver(var_list=new_var_list_1, max_to_keep=1) + saver3.restore(sess, saver0_ckpt) + self.assertAllClose(expected, sess.run("new_hidden1/relu:0")) - @test_util.run_deprecated_v1 def testSerializeSaverWithScope(self): test_dir = self._get_test_dir("export_graph_def") saver1_ckpt = os.path.join(test_dir, "saver1.ckpt") @@ -2926,40 +2931,42 @@ class ScopedGraphTest(test.TestCase): saver2 = saver_module.Saver(var_list=[variable2], name="hidden2/") graph.add_to_collection(ops_lib.GraphKeys.SAVERS, saver2) - with self.session(graph=graph) as sess: - self.evaluate(variables.global_variables_initializer()) - saver1.save(sess, saver1_ckpt, write_state=False) - saver2.save(sess, saver2_ckpt, write_state=False) + with self.session(graph=graph) as sess: + self.evaluate(variables.global_variables_initializer()) + saver1.save(sess, saver1_ckpt, write_state=False) + saver2.save(sess, saver2_ckpt, write_state=False) graph1 = ops_lib.Graph() - var_dict1 = meta_graph.copy_scoped_meta_graph( - from_scope="hidden1", - to_scope="new_hidden1", - from_graph=graph, - to_graph=graph1) - self.assertEqual(1, len(var_dict1)) + with graph1.as_default(): + var_dict1 = meta_graph.copy_scoped_meta_graph( + from_scope="hidden1", + to_scope="new_hidden1", + from_graph=graph, + to_graph=graph1) + self.assertEqual(1, len(var_dict1)) - saver_list1 = graph1.get_collection(ops_lib.GraphKeys.SAVERS) - self.assertEqual(1, len(saver_list1)) + saver_list1 = graph1.get_collection(ops_lib.GraphKeys.SAVERS) + self.assertEqual(1, len(saver_list1)) - with self.session(graph=graph1) as sess: - saver_list1[0].restore(sess, saver1_ckpt) - self.assertEqual(1.0, self.evaluate(var_dict1["variable1:0"])) + with self.session(graph=graph1) as sess: + saver_list1[0].restore(sess, saver1_ckpt) + self.assertEqual(1.0, self.evaluate(var_dict1["variable1:0"])) graph2 = ops_lib.Graph() - var_dict2 = meta_graph.copy_scoped_meta_graph( - from_scope="hidden2", - to_scope="new_hidden2", - from_graph=graph, - to_graph=graph2) - self.assertEqual(1, len(var_dict2)) + with graph2.as_default(): + var_dict2 = meta_graph.copy_scoped_meta_graph( + from_scope="hidden2", + to_scope="new_hidden2", + from_graph=graph, + to_graph=graph2) + self.assertEqual(1, len(var_dict2)) - saver_list2 = graph2.get_collection(ops_lib.GraphKeys.SAVERS) - self.assertEqual(1, len(saver_list2)) + saver_list2 = graph2.get_collection(ops_lib.GraphKeys.SAVERS) + self.assertEqual(1, len(saver_list2)) - with self.session(graph=graph2) as sess: - saver_list2[0].restore(sess, saver2_ckpt) - self.assertEqual(2.0, self.evaluate(var_dict2["variable2:0"])) + with self.session(graph=graph2) as sess: + saver_list2[0].restore(sess, saver2_ckpt) + self.assertEqual(2.0, self.evaluate(var_dict2["variable2:0"])) class _OwnsAVariableSimple(trackable_base.Trackable): From a0a897784a2762b8b452fbf3d388fa50f8fa4203 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Mon, 20 Jul 2020 05:17:17 +0000 Subject: [PATCH 0808/2522] testHasKeyLookup --- .../python/kernel_tests/map_ops_test.py | 31 ++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index 26f1ea93e1a..b214909dc54 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -26,6 +26,10 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import map_ops from tensorflow.python.platform import test +from tensorflow.python.util.lazy_loader import LazyLoader +control_flow_ops = LazyLoader("control_flow_ops", globals(), + "tensorflow.python.ops.control_flow_ops") + @test_util.run_all_in_graph_and_eager_modes class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): @@ -96,16 +100,35 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): self.evaluate(e) def testTensorMapHasKey(self): + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + k2 = constant_op.constant(2.0) + v = constant_op.constant(2.0) + m = map_ops.tensor_map_insert(m, k, v) + + b = map_ops.tensor_map_has_key(m, k) + b2 = map_ops.tensor_map_has_key(m, k2) + self.assertAllEqual(b, True) + self.assertAllEqual(b2, False) + + def testHasKeyLookup(self): + with self.test_session(): m = map_ops.empty_tensor_map() k = constant_op.constant(1.0) k2 = constant_op.constant(2.0) v = constant_op.constant(2.0) m = map_ops.tensor_map_insert(m, k, v) - b = map_ops.tensor_map_has_key(m, k) - b2 = map_ops.tensor_map_has_key(m, k2) - self.assertAllEqual(b, True) - self.assertAllEqual(b2, False) + default_value = constant_op.constant(0.0) + l = control_flow_ops.cond(map_ops.tensor_map_has_key(m, k), + lambda: map_ops.tensor_map_lookup(m, k, dtypes.float32), + lambda: default_value) + l2 = control_flow_ops.cond(map_ops.tensor_map_has_key(m, k2), + lambda: map_ops.tensor_map_lookup(m, k, dtypes.float32), + lambda: default_value) + + self.assertAllClose(l, v) + self.assertAllClose(l2, default_value) def testInsertLookupGrad(self): with backprop.GradientTape() as tape: From 91d9c30d7a17977d26106d2797d1c6925c5de6ae Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Sun, 19 Jul 2020 23:19:11 -0700 Subject: [PATCH 0809/2522] In input_test, replace run_v1_only decorators with the comments that they are only applicable in v1. PiperOrigin-RevId: 322083712 Change-Id: I7e3c5a4f51d66ec4986b67eacbd26e64e8a7ef18 --- tensorflow/python/training/input_test.py | 64 ++++++++++++++++++------ 1 file changed, 48 insertions(+), 16 deletions(-) diff --git a/tensorflow/python/training/input_test.py b/tensorflow/python/training/input_test.py index 3dc889a7895..5d01d1c73de 100644 --- a/tensorflow/python/training/input_test.py +++ b/tensorflow/python/training/input_test.py @@ -873,19 +873,27 @@ class BatchTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("Input pipelines based on Queues are not supported " + "when eager execution is enabled. TF2 uses tf.data " + "instead.") def testSingleThreadKeepInput(self): self._testKeepInputHelper(1, False) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("Input pipelines based on Queues are not supported " + "when eager execution is enabled. TF2 uses tf.data " + "instead.") def testSingleThreadKeepInputEnqueueMany(self): self._testKeepInputHelper(1, True) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("Input pipelines based on Queues are not supported " + "when eager execution is enabled. TF2 uses tf.data " + "instead.") def testMultipleThreadKeepInput(self): self._testKeepInputHelper(5, False) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("Input pipelines based on Queues are not supported " + "when eager execution is enabled. TF2 uses tf.data " + "instead.") def testMultipleThreadKeepInputEnqueueMany(self): self._testKeepInputHelper(5, True) @@ -1487,19 +1495,27 @@ class BatchJoinTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("Input pipelines based on Queues are not supported " + "when eager execution is enabled. TF2 uses tf.data " + "instead.") def testSingleThreadKeepInput(self): self._testKeepInputHelper(1, False) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("Input pipelines based on Queues are not supported " + "when eager execution is enabled. TF2 uses tf.data " + "instead.") def testSingleThreadKeepInputEnqueueMany(self): self._testKeepInputHelper(1, True) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("Input pipelines based on Queues are not supported " + "when eager execution is enabled. TF2 uses tf.data " + "instead.") def testMultipleThreadKeepInput(self): self._testKeepInputHelper(5, False) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("Input pipelines based on Queues are not supported " + "when eager execution is enabled. TF2 uses tf.data " + "instead.") def testMultipleThreadKeepInputEnqueueMany(self): self._testKeepInputHelper(5, True) @@ -1910,19 +1926,27 @@ class ShuffleBatchTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("Input pipelines based on Queues are not supported " + "when eager execution is enabled. TF2 uses tf.data " + "instead.") def testSingleThreadKeepInput(self): self._testKeepInputHelper(1, False) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("Input pipelines based on Queues are not supported " + "when eager execution is enabled. TF2 uses tf.data " + "instead.") def testSingleThreadKeepInputEnqueueMany(self): self._testKeepInputHelper(1, True) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("Input pipelines based on Queues are not supported " + "when eager execution is enabled. TF2 uses tf.data " + "instead.") def testMultipleThreadKeepInput(self): self._testKeepInputHelper(5, False) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("Input pipelines based on Queues are not supported " + "when eager execution is enabled. TF2 uses tf.data " + "instead.") def testMultipleThreadKeepInputEnqueueMany(self): self._testKeepInputHelper(5, True) @@ -2314,19 +2338,27 @@ class ShuffleBatchJoinTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("Input pipelines based on Queues are not supported " + "when eager execution is enabled. TF2 uses tf.data " + "instead.") def testSingleThreadKeepInput(self): self._testKeepInputHelper(1, False) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("Input pipelines based on Queues are not supported " + "when eager execution is enabled. TF2 uses tf.data " + "instead.") def testSingleThreadKeepInputEnqueueMany(self): self._testKeepInputHelper(1, True) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("Input pipelines based on Queues are not supported " + "when eager execution is enabled. TF2 uses tf.data " + "instead.") def testMultipleThreadKeepInput(self): self._testKeepInputHelper(5, False) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("Input pipelines based on Queues are not supported " + "when eager execution is enabled. TF2 uses tf.data " + "instead.") def testMultipleThreadKeepInputEnqueueMany(self): self._testKeepInputHelper(5, True) From c86abcdd5d9c73b28318b1de5d9cb9c6487a9a05 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Mon, 20 Jul 2020 14:13:54 +0700 Subject: [PATCH 0810/2522] Fix segfault in deleter for unique_ptr --- .../filesystem/plugins/s3/s3_filesystem.cc | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 417a05d9a7f..11cd9de4d57 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -546,7 +546,10 @@ void NewAppendableFile(const TF_Filesystem* filesystem, const char* path, GetS3Client(s3_file); GetTransferManager(Aws::Transfer::TransferDirection::UPLOAD, s3_file); - // We need to delete `file->plugin_file` in case of errors. + // We need to delete `file->plugin_file` in case of errors. We set + // `file->plugin_file` to `nullptr` in order to avoid segment fault when + // calling deleter of `unique_ptr`. + file->plugin_file = nullptr; std::unique_ptr writer( file, [](TF_WritableFile* file) { if (file != nullptr && file->plugin_file != nullptr) { @@ -562,10 +565,14 @@ void NewAppendableFile(const TF_Filesystem* filesystem, const char* path, std::unique_ptr reader( new TF_RandomAccessFile, [](TF_RandomAccessFile* file) { if (file != nullptr) { - tf_random_access_file::Cleanup(file); + if (file->plugin_file != nullptr) + tf_random_access_file::Cleanup(file); delete file; } }); + // We set `reader->plugin_file` to `nullptr` in order to avoid segment fault + // when calling deleter of `unique_ptr` + reader->plugin_file = nullptr; NewRandomAccessFile(filesystem, path, reader.get(), status); if (TF_GetCode(status) != TF_OK) return; @@ -696,10 +703,14 @@ void NewReadOnlyMemoryRegionFromFile(const TF_Filesystem* filesystem, std::unique_ptr reader( new TF_RandomAccessFile, [](TF_RandomAccessFile* file) { if (file != nullptr) { - tf_random_access_file::Cleanup(file); + if (file->plugin_file != nullptr) + tf_random_access_file::Cleanup(file); delete file; } }); + // We set `reader->plugin_file` to `nullptr` in order to avoid segment fault + // when calling deleter of `unique_ptr` + reader->plugin_file = nullptr; NewRandomAccessFile(filesystem, path, reader.get(), status); if (TF_GetCode(status) != TF_OK) return; auto read = From d70d248c2d1441482ac2bfa641e53ebe0b1f8f85 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Sun, 19 Jul 2020 22:57:17 +0700 Subject: [PATCH 0811/2522] Add MultiPartCopy (without callback) --- .../filesystem/plugins/s3/s3_filesystem.cc | 155 +++++++++++++++++- 1 file changed, 154 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 11cd9de4d57..1cfb548e6ba 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -14,14 +14,20 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h" +#include #include #include #include +#include +#include +#include #include +#include #include #include #include #include +#include #include #include @@ -738,11 +744,158 @@ static void SimpleCopyFile(const Aws::String& source, TF_SetStatus(status, TF_OK, ""); }; +using EtagOutcome = + Aws::Utils::Outcome>; +typedef struct MultipartCopyAsyncContext + : public Aws::Client::AsyncCallerContext { + int part_number; + int* num_finished_parts; + Aws::Vector* etag_outcomes; + + // lock and cv for multi part copy + absl::Mutex* multi_part_copy_mutex; + absl::CondVar* multi_part_copy_cv; +} MultipartCopyAsyncContext; + +static void AbortMultiPartCopy(const Aws::String& bucket_dst, + const Aws::String& object_dst, + const Aws::String& upload_id, S3File* s3_file, + TF_Status* status) {} + +static void MultiPartCopyCallback( + const Aws::S3::Model::UploadPartCopyRequest& request, + const Aws::S3::Model::UploadPartCopyOutcome& outcome, + const std::shared_ptr& context) {} + static void MultiPartCopy(const Aws::String& source, const Aws::String& bucket_dst, const Aws::String& object_dst, const size_t num_parts, const uint64_t file_size, S3File* s3_file, - TF_Status* status){}; + TF_Status* status) { + Aws::S3::Model::CreateMultipartUploadRequest create_multipart_upload_request; + create_multipart_upload_request.WithBucket(bucket_dst).WithKey(object_dst); + + GetS3Client(s3_file); + GetTransferManager(Aws::Transfer::TransferDirection::UPLOAD, s3_file); + + auto create_multipart_upload_outcome = + s3_file->s3_client->CreateMultipartUpload( + create_multipart_upload_request); + if (!create_multipart_upload_outcome.IsSuccess()) + return TF_SetStatusFromAWSError(create_multipart_upload_outcome.GetError(), + status); + + auto upload_id = create_multipart_upload_outcome.GetResult().GetUploadId(); + + int num_finished_parts = 0; + // Keep track of `Outcome` of each upload part. + Aws::Vector etag_outcomes(num_parts); + // Mutex which protects access of the part_states map. + absl::Mutex multi_part_copy_mutex; + // Condition variable to be used with above mutex for synchronization. + absl::CondVar multi_part_copy_cv; + + auto chunk_size = + s3_file->multi_part_chunk_sizes[Aws::Transfer::TransferDirection::UPLOAD]; + + size_t retries = 0; + while (retries++ < 3) { + // Queue up parts. + for (auto part_number = 0; part_number < num_parts; ++part_number) { + if (etag_outcomes[part_number].IsSuccess()) continue; + uint64_t start_pos = part_number * chunk_size; + uint64_t end_pos = start_pos + chunk_size - 1; + if (end_pos >= file_size) end_pos = file_size - 1; + + Aws::String range = + absl::StrCat("bytes=", start_pos, "-", end_pos).c_str(); + Aws::S3::Model::UploadPartCopyRequest upload_part_copy_request; + upload_part_copy_request.WithBucket(bucket_dst) + .WithKey(object_dst) + .WithCopySource(source) + .WithCopySourceRange(range) + // S3 API partNumber starts from 1. + .WithPartNumber(part_number + 1) + .WithUploadId(upload_id); + + auto multi_part_context = + Aws::MakeShared("MultiPartCopyContext"); + multi_part_context->part_number = part_number; + multi_part_context->num_finished_parts = &num_finished_parts; + multi_part_context->etag_outcomes = &etag_outcomes; + multi_part_context->multi_part_copy_mutex = &multi_part_copy_mutex; + multi_part_context->multi_part_copy_cv = &multi_part_copy_cv; + auto callback = + [](const Aws::S3::S3Client* client, + const Aws::S3::Model::UploadPartCopyRequest& request, + const Aws::S3::Model::UploadPartCopyOutcome& outcome, + const std::shared_ptr& + context) { + auto multipart_context = + std::static_pointer_cast( + context); + MultiPartCopyCallback(request, outcome, multipart_context); + }; + + std::shared_ptr context = + multi_part_context; + s3_file->s3_client->UploadPartCopyAsync(upload_part_copy_request, + callback, context); + } + // Wait till they finish. + { + absl::MutexLock l(&multi_part_copy_mutex); + // Wait on the mutex until notify is called then check the finished parts + // as there could be false notifications. + while (num_finished_parts != num_parts) { + multi_part_copy_cv.Wait(&multi_part_copy_mutex); + } + } + // check if there was any error for any part. + for (auto part_number = 0; part_number < num_parts; ++part_number) { + if (!etag_outcomes[part_number].IsSuccess()) { + if (retries >= 3) { + AbortMultiPartCopy(bucket_dst, object_dst, upload_id, s3_file, + status); + if (TF_GetCode(status) != TF_OK) return; + return TF_SetStatusFromAWSError(etag_outcomes[part_number].GetError(), + status); + } else { + // Retry. + num_finished_parts--; + } + } + } + } + + Aws::S3::Model::CompletedMultipartUpload completed_multipart_upload; + // If there was an error still in any part, it would abort and return in the + // above loop. We set the eTag of completed parts to the final + // `completed_multipart_upload`. Note these parts have to be added in order. + for (int part_number = 0; part_number < num_parts; ++part_number) { + Aws::S3::Model::CompletedPart completed_part; + completed_part.SetPartNumber(part_number + 1); + completed_part.SetETag(etag_outcomes[part_number].GetResult()); + completed_multipart_upload.AddParts(completed_part); + } + + Aws::S3::Model::CompleteMultipartUploadRequest + complete_multipart_upload_request; + complete_multipart_upload_request.WithBucket(bucket_dst) + .WithKey(object_dst) + .WithUploadId(upload_id) + .WithMultipartUpload(completed_multipart_upload); + auto complete_multipart_upload_outcome = + s3_file->s3_client->CompleteMultipartUpload( + complete_multipart_upload_request); + if (!complete_multipart_upload_outcome.IsSuccess()) + AbortMultiPartCopy(bucket_dst, object_dst, upload_id, s3_file, status); + else + return TF_SetStatus(status, TF_OK, ""); + if (TF_GetCode(status) == TF_OK) + return TF_SetStatusFromAWSError( + complete_multipart_upload_outcome.GetError(), status); +}; void CopyFile(const TF_Filesystem* filesystem, const char* src, const char* dst, TF_Status* status) { From 3d97765f63a1ae49413ee81472d5ad779fafa29e Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Mon, 20 Jul 2020 01:05:26 +0700 Subject: [PATCH 0812/2522] Add callbacks for MultiPartCopyCallback --- .../filesystem/plugins/s3/s3_filesystem.cc | 29 +++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 1cfb548e6ba..d8939db568e 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include #include +#include #include #include #include @@ -760,12 +761,36 @@ typedef struct MultipartCopyAsyncContext static void AbortMultiPartCopy(const Aws::String& bucket_dst, const Aws::String& object_dst, const Aws::String& upload_id, S3File* s3_file, - TF_Status* status) {} + TF_Status* status) { + Aws::S3::Model::AbortMultipartUploadRequest request; + request.WithBucket(bucket_dst).WithKey(object_dst).WithUploadId(upload_id); + auto outcome = s3_file->s3_client->AbortMultipartUpload(request); + if (!outcome.IsSuccess()) + TF_SetStatusFromAWSError(outcome.GetError(), status); + else + TF_SetStatus(status, TF_OK, ""); +} static void MultiPartCopyCallback( const Aws::S3::Model::UploadPartCopyRequest& request, const Aws::S3::Model::UploadPartCopyOutcome& outcome, - const std::shared_ptr& context) {} + const std::shared_ptr& context) { + // Access to `etag_outcomes` should be thread-safe because of distinct + // `part_number`. + auto part_number = context->part_number; + auto etag_outcomes = context->etag_outcomes; + if (outcome.IsSuccess()) { + (*etag_outcomes)[part_number] = + outcome.GetResult().GetCopyPartResult().GetETag(); + } else { + (*etag_outcomes)[part_number] = outcome.GetError(); + } + { + absl::MutexLock l(context->multi_part_copy_mutex); + (*context->num_finished_parts)++; + context->multi_part_copy_cv->Signal(); + } +} static void MultiPartCopy(const Aws::String& source, const Aws::String& bucket_dst, From da889b8c8de63bbf63f2c4d4bb9d1cb011ccc474 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Jul 2020 01:13:01 -0700 Subject: [PATCH 0813/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/e3d646c699f1 PiperOrigin-RevId: 322094653 Change-Id: Ia39bb57ac054e727b54c951dfa25fe55e0055424 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 7ac813c702f..2c94c763bab 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -711,8 +711,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "cf5df40c4cf1a53a02ab1d56a488642e3dda8f6d" - LLVM_SHA256 = "64abb1a7b30578b81e5d15469b8437cbb7e16847444e258e71e07247983cb63b" + LLVM_COMMIT = "e3d646c699f158aac700e939373ea5786899cbc1" + LLVM_SHA256 = "d296fbd094b18a2b64034097c2e5d2e18e642f49a4d689508f21c92425fdb177" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 14c1f1a592c4d9e23667ee2ee8af8b864c212f15 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Jul 2020 02:01:30 -0700 Subject: [PATCH 0814/2522] compat: Update forward compatibility horizon to 2020-07-20 PiperOrigin-RevId: 322099058 Change-Id: I05eef4bf915c2422932fd3b9a39a07d720d9f8a0 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index a1f967fc876..c804dbfc786 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 19) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 20) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 102603bc11787e6b2c3fc6bac0e92757bf6267c1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Jul 2020 02:01:30 -0700 Subject: [PATCH 0815/2522] Update GraphDef version to 468. PiperOrigin-RevId: 322099059 Change-Id: If7a204d485001ccd10dbd2c46aa7143721ff0fc2 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 2933c311aa7..28ead43a284 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 467 // Updated: 2020/7/19 +#define TF_GRAPH_DEF_VERSION 468 // Updated: 2020/7/20 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 0a6475b416ed2522a0225341b4535b27f189a0a4 Mon Sep 17 00:00:00 2001 From: Chao Mei Date: Mon, 20 Jul 2020 02:12:55 -0700 Subject: [PATCH 0816/2522] Use a heavier but more accurate way to obtain information about applying delegate, and update the fyi message accordingly. PiperOrigin-RevId: 322100537 Change-Id: I5ded22b26e0bdbfb908cbcea80aff0744a550e01 --- .../tools/benchmark/benchmark_tflite_model.cc | 40 ++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc index f816bbdedfe..9da48badfbc 100644 --- a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc +++ b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc @@ -650,26 +650,38 @@ TfLiteStatus BenchmarkTfLiteModel::Init() { << " delegate."; return kTfLiteError; } else { - bool fully_delegated = true; - if (interpreter_->execution_plan().size() != 1) { - fully_delegated = false; - } else { - int first_node_id = interpreter_->execution_plan()[0]; - const TfLiteNode first_node = - interpreter_->node_and_registration(first_node_id)->first; - if (delegate.get() != first_node.delegate) { - fully_delegated = false; + // Ideally, such delegate info should already be computed when the + // delegate is being applied to the model graph. + int num_delegated_kernels = 0; + for (int i = 0; i < interpreter_->execution_plan().size(); ++i) { + int node_id = interpreter_->execution_plan()[i]; + const TfLiteNode& node = + interpreter_->node_and_registration(node_id)->first; + if (delegate.get() == node.delegate) { + num_delegated_kernels++; } } + bool fully_delegated = (num_delegated_kernels == 1 && + interpreter_->execution_plan().size() == 1); + if (params_.Get("require_full_delegation") && !fully_delegated) { TFLITE_LOG(ERROR) << "Disallowed CPU fallback detected."; return kTfLiteError; } - const std::string delegate_status = - fully_delegated ? "completely" : "partially"; - TFLITE_LOG(INFO) << "Applied " << delegate_provider->GetName() - << " delegate, and the model graph will be " - << delegate_status << " executed w/ the delegate."; + if (fully_delegated) { + TFLITE_LOG(INFO) << "Applied " << delegate_provider->GetName() + << " delegate, and the model graph will be completely" + << " executed by the delegate."; + } else if (num_delegated_kernels > 0) { + TFLITE_LOG(INFO) << "Applied " << delegate_provider->GetName() + << " delegate, and the model graph will be partially" + << " executed by the delegate w/ " + << num_delegated_kernels << " delegate kernels."; + } else { + TFLITE_LOG(INFO) << "Though " << delegate_provider->GetName() + << " delegate is applied, the model graph will not be" + << " executed by the delegate."; + } } owned_delegates_.emplace_back(std::move(delegate)); } From 8ee14cf0ff86ae967f05c1a845c9be5d2d5b8120 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Jul 2020 04:09:28 -0700 Subject: [PATCH 0817/2522] Add a test to verify that the TF Lite C API headers can build, link, and run successfully when compiled as C code. PiperOrigin-RevId: 322114627 Change-Id: I3baf36fcc7f79dc782f38b6f3e63a6300b395f3d --- tensorflow/lite/c/BUILD | 15 +++ tensorflow/lite/c/c_test.c | 143 ++++++++++++++++++++++++++++ tensorflow/lite/tools/make/Makefile | 1 + 3 files changed, 159 insertions(+) create mode 100644 tensorflow/lite/c/c_test.c diff --git a/tensorflow/lite/c/BUILD b/tensorflow/lite/c/BUILD index 1aa043b7c0c..366b43336b9 100644 --- a/tensorflow/lite/c/BUILD +++ b/tensorflow/lite/c/BUILD @@ -158,3 +158,18 @@ cc_test( "@com_google_googletest//:gtest", ], ) + +cc_test( + name = "c_test", + size = "small", + srcs = ["c_test.c"], + copts = tflite_copts(), + data = [ + "//tensorflow/lite:testdata/add.bin", + ], + deps = [ + ":c_api", + ":c_api_experimental", + ":common", + ], +) diff --git a/tensorflow/lite/c/c_test.c b/tensorflow/lite/c/c_test.c new file mode 100644 index 00000000000..2e9ca30ee43 --- /dev/null +++ b/tensorflow/lite/c/c_test.c @@ -0,0 +1,143 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/c/c_api.h" +#include "tensorflow/lite/c/c_api_experimental.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/c/builtin_op_data.h" + +// This file exists just to verify that the above header files above can build, +// link, and run as "C" code. + +#ifdef __cplusplus +#error "This file should be compiled as C code, not as C++." +#endif + +#include +#include +#include + +static void CheckFailed(const char *expression, const char *filename, + int line_number) { + fprintf(stderr, "ERROR: CHECK failed: %s:%d: %s\n", filename, line_number, + expression); + fflush(stderr); + abort(); +} + +// We use an extra level of macro indirection here to ensure that the +// macro arguments get evaluated, so that in a call to CHECK(foo), +// the call to STRINGIZE(condition) in the definition of the CHECK +// macro results in the string "foo" rather than the string "condition". +#define STRINGIZE(expression) STRINGIZE2(expression) +#define STRINGIZE2(expression) #expression + +// Like assert(), but not dependent on NDEBUG. +#define CHECK(condition) \ + ((condition) ? (void)0 \ + : CheckFailed(STRINGIZE(condition), __FILE__, __LINE__)) +#define ASSERT_EQ(expected, actual) CHECK((expected) == (actual)) +#define ASSERT_NE(expected, actual) CHECK((expected) != (actual)) +#define ASSERT_STREQ(expected, actual) \ + ASSERT_EQ(0, strcmp((expected), (actual))) + +// Test the TfLiteVersion function. +static void TestVersion(void) { + const char *version = TfLiteVersion(); + printf("Version = %s\n", version); + CHECK(version[0] != '\0'); +} + +static void TestSmokeTest(void) { + TfLiteModel* model = + TfLiteModelCreateFromFile("tensorflow/lite/testdata/add.bin"); + ASSERT_NE(model, NULL); + + TfLiteInterpreterOptions* options = TfLiteInterpreterOptionsCreate(); + ASSERT_NE(options, NULL); + TfLiteInterpreterOptionsSetNumThreads(options, 2); + + TfLiteInterpreter* interpreter = TfLiteInterpreterCreate(model, options); + ASSERT_NE(interpreter, NULL); + + // The options/model can be deleted immediately after interpreter creation. + TfLiteInterpreterOptionsDelete(options); + TfLiteModelDelete(model); + + ASSERT_EQ(TfLiteInterpreterAllocateTensors(interpreter), kTfLiteOk); + ASSERT_EQ(TfLiteInterpreterGetInputTensorCount(interpreter), 1); + ASSERT_EQ(TfLiteInterpreterGetOutputTensorCount(interpreter), 1); + + int input_dims[1] = {2}; + ASSERT_EQ(TfLiteInterpreterResizeInputTensor( + interpreter, 0, input_dims, 1), + kTfLiteOk); + ASSERT_EQ(TfLiteInterpreterAllocateTensors(interpreter), kTfLiteOk); + + TfLiteTensor* input_tensor = TfLiteInterpreterGetInputTensor(interpreter, 0); + ASSERT_NE(input_tensor, NULL); + ASSERT_EQ(TfLiteTensorType(input_tensor), kTfLiteFloat32); + ASSERT_EQ(TfLiteTensorNumDims(input_tensor), 1); + ASSERT_EQ(TfLiteTensorDim(input_tensor, 0), 2); + ASSERT_EQ(TfLiteTensorByteSize(input_tensor), sizeof(float) * 2); + ASSERT_NE(TfLiteTensorData(input_tensor), NULL); + ASSERT_STREQ(TfLiteTensorName(input_tensor), "input"); + + TfLiteQuantizationParams input_params = + TfLiteTensorQuantizationParams(input_tensor); + ASSERT_EQ(input_params.scale, 0.f); + ASSERT_EQ(input_params.zero_point, 0); + + float input[2] = {1.f, 3.f}; + ASSERT_EQ(TfLiteTensorCopyFromBuffer(input_tensor, input, + 2 * sizeof(float)), + kTfLiteOk); + + ASSERT_EQ(TfLiteInterpreterInvoke(interpreter), kTfLiteOk); + + const TfLiteTensor* output_tensor = + TfLiteInterpreterGetOutputTensor(interpreter, 0); + ASSERT_NE(output_tensor, NULL); + ASSERT_EQ(TfLiteTensorType(output_tensor), kTfLiteFloat32); + ASSERT_EQ(TfLiteTensorNumDims(output_tensor), 1); + ASSERT_EQ(TfLiteTensorDim(output_tensor, 0), 2); + ASSERT_EQ(TfLiteTensorByteSize(output_tensor), sizeof(float) * 2); + ASSERT_NE(TfLiteTensorData(output_tensor), NULL); + ASSERT_STREQ(TfLiteTensorName(output_tensor), "output"); + + TfLiteQuantizationParams output_params = + TfLiteTensorQuantizationParams(output_tensor); + ASSERT_EQ(output_params.scale, 0.f); + ASSERT_EQ(output_params.zero_point, 0); + + float output[2]; + ASSERT_EQ(TfLiteTensorCopyToBuffer(output_tensor, output, + 2 * sizeof(float)), + kTfLiteOk); + ASSERT_EQ(output[0], 3.f); + ASSERT_EQ(output[1], 9.f); + + TfLiteInterpreterDelete(interpreter); +} + +static void RunTests(void) { + TestVersion(); + TestSmokeTest(); +} + +int main(void) { + RunTests(); + return 0; +} diff --git a/tensorflow/lite/tools/make/Makefile b/tensorflow/lite/tools/make/Makefile index 7d55370818c..c7ddff58440 100644 --- a/tensorflow/lite/tools/make/Makefile +++ b/tensorflow/lite/tools/make/Makefile @@ -148,6 +148,7 @@ endif CORE_CC_ALL_SRCS := $(sort $(CORE_CC_ALL_SRCS)) CORE_CC_EXCLUDE_SRCS := \ $(wildcard tensorflow/lite/*test.cc) \ +$(wildcard tensorflow/lite/*/*test.c) \ $(wildcard tensorflow/lite/*/*test.cc) \ $(wildcard tensorflow/lite/*/*/benchmark.cc) \ $(wildcard tensorflow/lite/*/*/example*.cc) \ From 6fe5847c19b7d4fdf777f14b7c8edb1b2870d397 Mon Sep 17 00:00:00 2001 From: Krzysztof Laskowski Date: Mon, 20 Jul 2020 14:19:13 +0200 Subject: [PATCH 0818/2522] Fix setting out_mtypes in MemoryTypesForNode Clear hostmem_attr vector before populating it with indices from "_output_hostmem" attribute so that it doesn't contain indices from "_input_hostmem" attribute. --- tensorflow/core/framework/memory_types.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/framework/memory_types.cc b/tensorflow/core/framework/memory_types.cc index d27ef1da61d..208ad20c21b 100644 --- a/tensorflow/core/framework/memory_types.cc +++ b/tensorflow/core/framework/memory_types.cc @@ -161,6 +161,7 @@ Status MemoryTypesForNode(const OpRegistryInterface* op_registry, } } } + hostmem_attr.clear(); if (TryGetNodeAttr(ndef, "_output_hostmem", &hostmem_attr)) { for (int32 i : hostmem_attr) { if (0 <= i && i < out_mtypes->size()) { From db528aadcc0be089b947f9ea81d9e49ee8ec72d8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Jul 2020 04:21:13 -0700 Subject: [PATCH 0819/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/7ca9b589c453 PiperOrigin-RevId: 322115852 Change-Id: I36ad61da02e8e145d0c5ac208852e85cb810d13d --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 2c94c763bab..ee3d5b3260a 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -711,8 +711,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "e3d646c699f158aac700e939373ea5786899cbc1" - LLVM_SHA256 = "d296fbd094b18a2b64034097c2e5d2e18e642f49a4d689508f21c92425fdb177" + LLVM_COMMIT = "7ca9b589c45302feb28c0b3b0e80088c0901bb40" + LLVM_SHA256 = "b5c8977fb6cfebf47e188f24c4524f038e117e26d631e607dae975e37671f0d9" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From d548331e86047f3068b54a0a23ff42ef98b8e39d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Jul 2020 07:19:57 -0700 Subject: [PATCH 0820/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/f9595857b9f8 PiperOrigin-RevId: 322139193 Change-Id: I88020d8d9a597590a3548df401f1f99de767b9d3 --- tensorflow/workspace.bzl | 4 ++-- third_party/mlir/BUILD | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index ee3d5b3260a..2f0ed2db863 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -711,8 +711,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "7ca9b589c45302feb28c0b3b0e80088c0901bb40" - LLVM_SHA256 = "b5c8977fb6cfebf47e188f24c4524f038e117e26d631e607dae975e37671f0d9" + LLVM_COMMIT = "f9595857b9f868fc7724ea767a8fd984d02848ff" + LLVM_SHA256 = "84e90b625094990dd319a436788656db195953cc823eb080e74ee20531bf872a" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD index ec0574f3367..2d091b04b85 100644 --- a/third_party/mlir/BUILD +++ b/third_party/mlir/BUILD @@ -387,7 +387,7 @@ filegroup( "include/mlir/Interfaces/CallInterfaces.td", "include/mlir/Interfaces/ControlFlowInterfaces.td", "include/mlir/Interfaces/SideEffectInterfaces.td", - "include/mlir/Interfaces/VectorUnrollInterface.td", + "include/mlir/Interfaces/VectorInterfaces.td", "include/mlir/Interfaces/ViewLikeInterface.td", ":OpBaseTdFiles", ], @@ -647,13 +647,13 @@ cc_library( ) cc_library( - name = "VectorUnrollInterface", - srcs = ["lib/Interfaces/VectorUnrollInterface.cpp"], - hdrs = ["include/mlir/Interfaces/VectorUnrollInterface.h"], + name = "VectorInterfaces", + srcs = ["lib/Interfaces/VectorInterfaces.cpp"], + hdrs = ["include/mlir/Interfaces/VectorInterfaces.h"], includes = ["include"], deps = [ ":IR", - ":VectorUnrollInterfaceIncGen", + ":VectorInterfacesIncGen", ], ) @@ -855,7 +855,7 @@ cc_library( ":SideEffectInterfaces", ":StandardOpsIncGen", ":Support", - ":VectorUnrollInterface", + ":VectorInterfaces", ":ViewLikeInterface", "@llvm-project//llvm:Support", ], @@ -918,9 +918,9 @@ cc_library( ":SideEffectInterfaces", ":StandardOps", ":Support", + ":VectorInterfaces", ":VectorOpsIncGen", ":VectorTransformPatternsIncGen", - ":VectorUnrollInterface", "@llvm-project//llvm:Support", ], ) @@ -2127,20 +2127,20 @@ gentbl( ) gentbl( - name = "VectorUnrollInterfaceIncGen", + name = "VectorInterfacesIncGen", strip_include_prefix = "include", tbl_outs = [ ( "-gen-op-interface-decls", - "include/mlir/Interfaces/VectorUnrollInterface.h.inc", + "include/mlir/Interfaces/VectorInterfaces.h.inc", ), ( "-gen-op-interface-defs", - "include/mlir/Interfaces/VectorUnrollInterface.cpp.inc", + "include/mlir/Interfaces/VectorInterfaces.cpp.inc", ), ], tblgen = ":mlir-tblgen", - td_file = "include/mlir/Interfaces/VectorUnrollInterface.td", + td_file = "include/mlir/Interfaces/VectorInterfaces.td", td_srcs = [ ":OpBaseTdFiles", ], @@ -3586,7 +3586,7 @@ filegroup( name = "VectorOpsTdFiles", srcs = [ "include/mlir/Dialect/Vector/VectorOps.td", - "include/mlir/Interfaces/VectorUnrollInterface.td", + "include/mlir/Interfaces/VectorInterfaces.td", ":AffineOpsTdFiles", ":OpBaseTdFiles", ], @@ -3716,7 +3716,7 @@ exports_files( "include/mlir/Interfaces/ControlFlowInterfaces.h", "include/mlir/Interfaces/ControlFlowInterfaces.td", "include/mlir/Interfaces/SideEffectInterfaces.td", - "include/mlir/Interfaces/VectorUnrollInterface.td", + "include/mlir/Interfaces/VectorInterfaces.td", "include/mlir/Interfaces/ViewLikeInterface.td", "include/mlir/Dialect/LLVMIR/LLVMOpBase.td", "include/mlir/Dialect/StandardOps/IR/Ops.td", From 45591fac1d5a9042e205832e60c2fc4bfaeadfdc Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Fri, 5 Jun 2020 03:56:36 +0200 Subject: [PATCH 0821/2522] Add dynamic shape test to ConvertFusedBatchNorm --- .../tf2tensorrt/convert/convert_nodes.cc | 13 +- .../tf2tensorrt/convert/convert_nodes_test.cc | 136 ++++++++++++++++++ 2 files changed, 148 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index f2407fccfad..07d7705dccf 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -4942,7 +4942,18 @@ Status ConvertFusedBatchNorm(OpConverterParams* params) { node_def.name()); } nvinfer1::ITensor* tensor = inputs.at(0).tensor(); - + if (!params->use_implicit_batch && tensor->getDimensions().d[1] == -1) { + // This check is to make sure that channel dimension is known during + // conversion. + // + // We check this only in explicit batch mode and reject an op with unknown + // channel dimension during segmentation. In implicit batch mode we have + // known shapes during conversion even though the shapes may not be known + // during segmentation (see the actual argument for input_shapes when + // ConvertGraphDefToEngine is called from TRTEngineOp::BuildEngine). + return errors::InvalidArgument("Channel dimension must be static, at ", + node_def.name()); + } // Check parameter types auto parameter_type = inputs.at(1).weights().TrtDType(); if ((parameter_type != nvinfer1::DataType::kFLOAT) && diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc index 52d05ff8225..1b41d544464 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc @@ -2011,6 +2011,142 @@ TEST_F(OpConverterTest, ConvertConst) { TestConvertConst(this); } +template +NodeDef CreateFusedBatchNormOp(DataType tf_type, std::string data_format, + bool is_training, float epsilon) { + Scope s = Scope::NewRootScope(); + auto x = ops::Placeholder(s.WithOpName("x"), tf_type); + auto scale = ops::Placeholder(s.WithOpName("scale"), tf_type); + auto offset = ops::Placeholder(s.WithOpName("offset"), tf_type); + auto mean = ops::Placeholder(s.WithOpName("mean"), tf_type); + auto variance = ops::Placeholder(s.WithOpName("variance"), tf_type); + typename T::Attrs attrs; + attrs.data_format_ = data_format; + attrs.is_training_ = is_training; + if (epsilon > 0) { + attrs.epsilon_ = epsilon; + } else { + EXPECT_GE(epsilon, 0); + } + return T(s.WithOpName("my_batchnorm"), x, scale, offset, mean, variance, + attrs) + .operation.node() + ->def(); +} + +TEST_P(OpConverterTest1, ConvertFusedBatchNorm) { + using OpFunc = std::function; + std::vector get_node_def_vec{ + CreateFusedBatchNormOp, + CreateFusedBatchNormOp, + CreateFusedBatchNormOp}; + + struct TestParam { + std::string data_format; + int tensor_input_idx; // Index of an input that will be provided as tensor. + bool is_training; + float epsilon; + Status conversion_status; + bool keep_channel_unknown; + }; + + struct NodeInput { + std::string name; + std::vector dims; + std::vector val; + }; + std::vector node_input{ + {"x", {2, 3, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}, + {"scale", {3}, {7, 8, 9}}, + {"offset", {3}, {10, 20, 30}}, + {"mean", {3}, {1, 2, 3}}, + {"variance", {3}, {4, 5, 6}}}; + + std::vector expected_output{10.0, 13.495633, 23.574135, 27.148273, + 37.342354, 41.013527, 30.9738, 34.469433, + 45.018955, 48.59309, 59.369415, 63.04059}; + for (auto get_node_def : get_node_def_vec) { + NodeDef tmp_node_def = get_node_def(tf_type, "NCHW", true, 0); + std::string op_name = tmp_node_def.op(); + std::vector test_param{ + {"NHWC", 0, false, 0, + errors::Unimplemented(StrCat( + op_name, " only supports data_format=NCHW, at my_batchnorm"))}, + {"NCHW", 0, true, 0, + errors::Unimplemented(StrCat( + op_name, " only supports is_training=false, at my_batchnorm"))}, + {"NCHW", 1, false, 0, + errors::Unimplemented(StrCat("The input \"scale\" for ", op_name, + " must be a constant, at my_batchnorm"))}, + {"NCHW", 2, false, 0, + errors::Unimplemented(StrCat("The input \"offset\" for ", op_name, + " must be a constant, at my_batchnorm"))}, + {"NCHW", 3, false, 0, + errors::Unimplemented(StrCat("The input \"mean\" for ", op_name, + " must be a constant, at my_batchnorm"))}, + {"NCHW", 4, false, 0, + errors::Unimplemented(StrCat("The input \"variance\" for ", op_name, + " must be a constant, at my_batchnorm"))}, + {"NCHW", 0, false, 0.01}}; // The last one is the only test that runs. + if (trt_mode == TrtTestMode::kDynamicShape) { + test_param.push_back( + {"NCHW", 0, false, 0.01, + errors::InvalidArgument( + "Channel dimension must be static, at my_batchnorm"), + true}); + } + for (auto p : test_param) { + Reset(); + NodeDef node_def = + get_node_def(tf_type, p.data_format, p.is_training, p.epsilon); + for (int i = 0; i < node_input.size(); i++) { + if (i == 0 || i == p.tensor_input_idx) { + // The first input (x) is always added as a tensor, and it hase shape + // NCHW. The other inputs are per channel values (1D, size C). + // + // In implicit batch mode, it is not possible to add any of the 1D + // inputs as a tensor: the first dim is always treated as batch dim in + // implicit batch mode, and that has to agree for all tensors. We have + // two input tensors with shapes NCHW and C and in general N != C. + // The converter already picked up N from the fist input, and reports + // an error when we try to add any other tensors with not matching + // first dim. + // + // This restriction does not apply in explicit batch mode: the tensors + // can have different first dim. The converter still expects that only + // the first arg is a tensor. TODO(tfeher) Check if one can relax this + // restriction. + Status expected_status = + (i != 0 && trt_mode == TrtTestMode::kImplicitBatch) + ? errors::InvalidArgument( + StrCat("Batch size doesn't match for tensor ", + node_input[i].name, + ": Provided batch size does not match " + "converter batch size: 3 vs 2")) + : Status::OK(); + std::vector partial_input_shape; + if (i == 0 && trt_mode == TrtTestMode::kDynamicShape && + !p.keep_channel_unknown) { + // keep channel dim static (known) + partial_input_shape.resize(4, -1); + partial_input_shape[1] = node_input[i].dims[1]; + } + AddTestTensor(node_input[i].name, node_input[i].dims, tf_type, + node_input[i].val, partial_input_shape, + expected_status); + + } else { + AddTestWeights(node_input[i].name, node_input[i].dims, + node_input[i].val, tf_type); + } + } + TestOpConverter("my_batchnorm", node_def, node_input[0].dims, + p.conversion_status, Status::OK(), + ArrayFloatNear(expected_output)); + } + } +} // namespace convert + TEST_P(OpConverterTest1, ConvertTranspose) { // Get the NodeDef for Transpose. Scope s = Scope::NewRootScope(); From ca6caa17c2def7dc3acedb387c2f4a1e76a7bb7b Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Mon, 20 Jul 2020 21:58:54 +0700 Subject: [PATCH 0822/2522] Fix for `ABSL_EXCLUSIVE_LOCKS_REQUIRED` Co-authored-by: Mihai Maruseac --- .../c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc index a542a45d0fb..6006273b2ca 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc @@ -161,7 +161,7 @@ void Cleanup(TF_RandomAccessFile* file) { } static void FillBuffer(uint64_t start, GCSFile* gcs_file, TF_Status* status) { - ABSL_EXCLUSIVE_LOCKS_REQUIRED(gcs_file->buffer_mutex) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(gcs_file->buffer_mutex); gcs_file->buffer_start = start; gcs_file->buffer.resize(gcs_file->buffer_size); auto read = From 587607a9b7be9439b01bc29c4d88633c3f49c8f1 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Mon, 20 Jul 2020 08:08:33 -0700 Subject: [PATCH 0823/2522] [MLIR] Extend shape inference to handle region based control flow ops - Handle WhileRegion by propagating input shapes to outputs (similar to While) - Handle IfRegion by propagating matching types across the two branches. - Rename AddCastBackForUnsupportedNonTFUses to UpdateTypeAndInsertNonTFUseCasts and change it to handle both cast insertion and type update, so that it can be consistently done in the correct order (casts first, type update after that). PiperOrigin-RevId: 322146177 Change-Id: I29b74cae05c9974620501547af43f21fdbf92c1b --- .../tensorflow/tests/shape_inference.mlir | 52 +++- .../tensorflow/transforms/shape_inference.cc | 234 +++++++++++------- 2 files changed, 193 insertions(+), 93 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir b/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir index 7d2f630869a..5a8f63ec63d 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir @@ -124,6 +124,27 @@ func @multiple_blocks_one_return(%arg0: tensor) -> tensor<*xf32> { return %0 : tensor<*xf32> } + // Verify shape propagation from function arg -> if region body -> if region output -> function return type + // CHECK-LABEL: shape_from_if_to_region_bodies_to_output + // CHECK-SAME: -> tensor<1x2x3xf32> + func @shape_from_if_to_region_bodies_to_output(%arg0: tensor, %arg1: tensor<1x2x3xf32>) -> tensor<*xf32> { + %unshaped = "tf.Cast"(%arg1) : (tensor<1x2x3xf32>) -> tensor<*xf32> + %0 = "tf.IfRegion"(%arg0) ({ + // CHECK: "tf.Add"{{.+}}(tensor<1x2x3xf32>, tensor<1x2x3xf32>) -> tensor<1x2x3xf32> + // CHECK: "tf.Yield"{{.+}}(tensor<1x2x3xf32>) -> () + %1 = "tf.Add"(%unshaped, %unshaped) : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32> + "tf.Yield"(%1) : (tensor<*xf32>) -> () + }, { + // CHECK: "tf.Sub"{{.+}}(tensor<1x2x3xf32>, tensor<1x2x3xf32>) -> tensor<1x2x3xf32> + // CHECK: "tf.Yield"{{.+}}(tensor<1x2x3xf32>) -> () + %2 = "tf.Sub"(%unshaped, %unshaped) : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32> + "tf.Yield"(%2) : (tensor<*xf32>) -> () + // CHECK: {is_stateless = true} : (tensor) -> tensor<1x2x3xf32> + }) {is_stateless = true} : (tensor) -> tensor<*xf32> + // CHECK: return {{.*}} : tensor<1x2x3xf32> + return %0 : tensor<*xf32> + } + // CHECK-LABEL: func @shape_from_while_to_cond_body_functions func @shape_from_while_to_cond_body_functions(%arg0: tensor<4xf32>, %arg1: tensor>>, %arg2: tensor>>) -> tensor<4xf32> { // CHECK: "tf.While" @@ -169,6 +190,33 @@ func @multiple_blocks_one_return(%arg0: tensor) -> tensor<*xf32> { return %1, %arg1, %arg2 : tensor<*xf32>, tensor<*x!tf.resource>, tensor>> } + // Verify shape propagation from function arg -> while region cond/body -> while region output -> function return type + // CHECK-LABEL: func @shape_from_while_operands_to_cond_body_to_while_results + // CHECK-SAME: -> tensor<1x2x3xf32> + func @shape_from_while_operands_to_cond_body_to_while_results(%arg0: tensor, %arg1: tensor<1x2x3xf32>) -> tensor<*xf32> { + %unshaped = "tf.Cast"(%arg1) : (tensor<1x2x3xf32>) -> tensor<*xf32> + // CHECK: "tf.WhileRegion" + %0:2 = "tf.WhileRegion"(%arg0, %unshaped) ({ + // CHECK: {{.*}}({{.+}}: tensor, {{.+}}: tensor<1x2x3xf32>): + ^bb0(%carg0: tensor, %carg1: tensor<*xf32>): + %limit = constant dense<5> : tensor + %cond = "tf.NotEqual"(%carg0, %limit) : (tensor, tensor) -> tensor + "tf.Yield"(%cond) : (tensor) -> () + }, { + // CHECK: {{.*}}({{.+}}: tensor, {{.+}}: tensor<1x2x3xf32>): + ^bb0(%barg0: tensor, %barg1: tensor<*xf32>): + %one = constant dense<1> : tensor + %sub = "tf.Sub"(%barg0, %one) : (tensor, tensor) -> tensor + // CHECK: "tf.Neg"({{.+}}) : (tensor<1x2x3xf32>) -> tensor<1x2x3xf32> + %neg = "tf.Neg"(%barg1) : (tensor<*xf32>) -> tensor<*xf32> + // CHECK: "tf.Yield"{{.+}}, {{.+}}) : (tensor, tensor<1x2x3xf32>) -> () + "tf.Yield"(%sub, %neg) : (tensor, tensor<*xf32>) -> () + // CHECK: {is_stateless = true} : (tensor, tensor<1x2x3xf32>) -> (tensor, tensor<1x2x3xf32>) + }) {is_stateless = true} : (tensor, tensor<*xf32>) -> (tensor, tensor<*xf32>) + // CHECK: return {{.+}}#1 : tensor<1x2x3xf32> + return %0#1 : tensor<*xf32> + } + // CHECK-LABEL: func @shape_from_case_to_branch_functions( // CHECK-SAME: %[[ARG_0:.*]]: tensor, // CHECK-SAME: %[[ARG_1:.*]]: tensor>> @@ -219,7 +267,7 @@ func @multiple_blocks_one_return(%arg0: tensor) -> tensor<*xf32> { // CHECK-LABEL: func @reused_if_then_branch // CHECK-SAME: (%arg0: tensor<*xf32>) -> tensor<*xf32> - // expected-warning @+1 {{expected control flow function reused_if_then_branch to have exactly 1 use}} + // expected-warning @+1 {{expected control flow function @reused_if_then_branch to have exactly 1 use}} func @reused_if_then_branch(%arg0: tensor<*xf32>) -> tensor<*xf32> { // CHECK: return // CHECK-SAME: tensor<*xf32> @@ -228,7 +276,7 @@ func @multiple_blocks_one_return(%arg0: tensor) -> tensor<*xf32> { // CHECK-LABEL: func @reused_if_else_branch // CHECK-SAME: (%arg0: tensor<*xf32>) -> tensor<*xf32> - // expected-warning @+1 {{expected control flow function reused_if_else_branch to have exactly 1 use}} + // expected-warning @+1 {{expected control flow function @reused_if_else_branch to have exactly 1 use}} func @reused_if_else_branch(%arg0: tensor<*xf32>) -> tensor<*xf32> { // CHECK: "tf.Identity"(%arg0) : (tensor<*xf32>) -> tensor<*xf32> %0 = "tf.Identity"(%arg0) : (tensor<*xf32>) -> (tensor<*xf32>) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc index d2e497a1dec..2551e68dd74 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc @@ -130,25 +130,28 @@ bool NeedsCastBack(OpOperand& use, Dialect* tf_dialect) { !IsSupportedNonTFOp(use.getOwner()); } -// Inserts tf.Cast operation when changing the type of a result if the user is -// not a TF operation, as we can't guarantee that the new type will be OK. -void AddCastBackForUnsupportedNonTFUses(Operation* op, Value result, - Dialect* tf_dialect, Type old_type) { - // A tf.Cast operation is lazily created on the first uses that isn't a TF - // operation. +// Updates the result of an operation to a new inferred type. Also inserts +// tf.Cast operation for uses that are incompatible with the new type. +void UpdateTypeAndInsertIncompatibleUseCasts(Dialect* tf_dialect, Type new_type, + Operation* op, Value result) { + // A tf.Cast operation is lazily created on the first use requires a cast. TF::CastOp cast_op; auto get_cast_op = [&]() { if (!cast_op) { OpBuilder b(op); b.setInsertionPointAfter(op); - cast_op = b.create(op->getLoc(), old_type, result, + cast_op = b.create(op->getLoc(), result.getType(), result, /*truncate=*/b.getBoolAttr(false)); } return Value(cast_op); }; + // First insert cast back for uses that need a cast and then + // update the type. for (OpOperand& use : make_early_inc_range(result.getUses())) { if (NeedsCastBack(use, tf_dialect)) use.set(get_cast_op()); } + + result.setType(new_type); } // Extracts a PartialTensorShape from the MLIR type. @@ -213,7 +216,8 @@ bool CanBeRefined(Type type) { // Returns whether `original_type` type can be refined with // `potential_refined_type` type. bool CanRefineTypeWith(Type original_type, Type potential_refined_type) { - if (!CanBeRefined(original_type)) return false; + if (original_type == potential_refined_type || !CanBeRefined(original_type)) + return false; auto shape_type = potential_refined_type.dyn_cast(); if (!shape_type) return false; @@ -225,6 +229,18 @@ bool CanRefineTypeWith(Type original_type, Type potential_refined_type) { !element_type_with_subtype.GetSubtypes().empty(); } +// Refines the type of `result` of `op` using the type `potential_refined_type`. +// Return true if the type was changed. +bool RefineResultType(Operation* op, Value result, + Type potential_refined_type) { + if (!CanRefineTypeWith(result.getType(), potential_refined_type)) + return false; + + UpdateTypeAndInsertIncompatibleUseCasts(op->getDialect(), + potential_refined_type, op, result); + return true; +} + // Infers the shape from a (Stateful)PartionedCall operation by looking up the // called function and propagating the return type. bool InferShapeForCall(Operation* op) { @@ -239,23 +255,29 @@ bool InferShapeForCall(Operation* op) { // Map each of the results of the call to the returned type of the // function. for (auto result : zip(op->getResults(), func.getType().getResults())) { - auto call_op_result = std::get<0>(result); - auto func_result_type = std::get<1>(result); - if (call_op_result.getType() == func_result_type) continue; - if (!CanRefineTypeWith(call_op_result.getType(), func_result_type)) - continue; - - // Inserts a cast back to the original type if any user is not in the - // TF dialect. - AddCastBackForUnsupportedNonTFUses(op, call_op_result, op->getDialect(), - call_op_result.getType()); - // Finally we inferred the shape and replace the type for this result. - call_op_result.setType(func_result_type); - changed = true; + changed = RefineResultType(op, std::get<0>(result), std::get<1>(result)) || + changed; } + return changed; } +// Infer the shape IfRegion outputs based on the shapes of the then and else +// yields. +bool InferShapeForIfRegion(IfRegionOp op) { + bool changed = false; + + Operation* then_yield = op.then_branch().front().getTerminator(); + Operation* else_yield = op.else_branch().front().getTerminator(); + for (auto result : zip(op.getResults(), then_yield->getOperandTypes(), + else_yield->getOperandTypes())) { + // If then and else types do not match, skip refinement for that result. + if (std::get<1>(result) != std::get<2>(result)) continue; + changed = RefineResultType(op, std::get<0>(result), std::get<1>(result)) || + changed; + } + return changed; +} bool InferShapeForCast(CastOp op, Dialect* tf_dialect) { Value result = op.getResult(); if (!CanBeRefined(result.getType())) return false; @@ -278,9 +300,9 @@ bool InferShapeForCast(CastOp op, Dialect* tf_dialect) { auto new_type = RankedTensorType::get( ranked_op_type.getShape(), result.getType().cast().getElementType()); - auto old_type = result.getType(); - result.setType(new_type); - AddCastBackForUnsupportedNonTFUses(op, op.getResult(), tf_dialect, old_type); + + UpdateTypeAndInsertIncompatibleUseCasts(tf_dialect, new_type, op, + op.getResult()); return true; } @@ -304,12 +326,8 @@ bool RefineWithInferTypeOpInterface(InferTypeOpInterface infer_ti, for (auto result : zip(op->getResults(), inferred)) { if (std::get<0>(result).getType() == std::get<1>(result)) continue; - // Inserts a cast back to the original type if any user is not in the - // TF dialect. - AddCastBackForUnsupportedNonTFUses(op, std::get<0>(result), - op->getDialect(), std::get<1>(result)); - // Finally we inferred the shape and replace the type for this result. - std::get<0>(result).setType(std::get<1>(result)); + UpdateTypeAndInsertIncompatibleUseCasts( + op->getDialect(), std::get<1>(result), op, std::get<0>(result)); changed = true; } return changed; @@ -498,20 +516,25 @@ class ShapeInference { // 1) They are never reused, ie. having a single use in module. // 2) Their input types match those of their parent ops (excluding inputs // like predicate). - // Returns a boolean indicating whether any change has been applied. - LogicalResult RefineShapeForControlFlowFunc(FuncOp func, - ArrayRef input_types, - int64_t max_iteration); - - // Propagate the shapes to the functions named. LogicalResult PropagateShapeToFunctions( ModuleOp module, Operation::operand_type_range input_types, ArrayRef func_names, int64_t max_iteration); + // Propagates shapes to regions given the shapes of the inputs of the regions. + // All regions provided in `regions` are assumed to have inputs of type + // `input_types`. + LogicalResult PropagateShapeToRegions( + Operation::operand_type_range input_types, ArrayRef regions, + int64_t max_iteration); + // Shape propagation for call/control flow ops. LogicalResult PropagateShapeIntoAttachedFunctions(Operation* op, int64_t max_iteration); + // Shape propagation for region based control flow. + LogicalResult PropagateShapeIntoAttachedRegions(Operation* op, + int64_t max_iterations); + // Propagates any constant operand of call_op to the called function body's // corresponding argument if the callee has only one use. // @@ -648,8 +671,8 @@ bool ShapeInference::RefineTypeForPassThroughOperands(Operation* op, .isa()) continue; - std::get<1>(entry).setType(operand_type); - AddCastBackForUnsupportedNonTFUses(op, result, tf_dialect_, result_type); + UpdateTypeAndInsertIncompatibleUseCasts(tf_dialect_, operand_type, op, + result); changed = true; } return changed; @@ -679,13 +702,12 @@ bool ShapeInference::RefineShapeForPassThroughOps(Operation* op) { result_type.getShape() == operand_type.getShape()) continue; if (!is_allowed_dtype(operand_type.getElementType()) || - !is_allowed_dtype(result_type.getElementType())) { + !is_allowed_dtype(result_type.getElementType())) continue; - } - result.setType(RankedTensorType::get(operand_type.getShape(), - result_type.getElementType())); - AddCastBackForUnsupportedNonTFUses(op, result, tf_dialect_, result_type); + auto new_type = RankedTensorType::get(operand_type.getShape(), + result_type.getElementType()); + UpdateTypeAndInsertIncompatibleUseCasts(tf_dialect_, new_type, op, result); changed = true; } return changed; @@ -725,7 +747,8 @@ bool ShapeInference::InferShapeForSingleOperation(Operation* op) { // The shape function of these ops sometimes does not propagate subtypes // (handle shapes) for resource and variant types. We use a simple passthrough // to make sure they are preserved in the output. - if (isa(op)) { + if (isa(op)) { return RefineTypeForPassThroughOperands(op, op->getOperands(), op->getResults()); } @@ -745,6 +768,11 @@ bool ShapeInference::InferShapeForSingleOperation(Operation* op) { op)) return InferShapeForCall(op); + // Handle IfRegion operations by infering return shape from the then and else + // branches. + if (auto if_region = dyn_cast(op)) + return InferShapeForIfRegion(if_region); + // tf.Cast are only inferred if they have at least one user in the TF dialect // or feeding into the function return. This is necessary to avoid inserting // casts which cannot be refined. @@ -923,12 +951,8 @@ bool ShapeInference::InferShapeForSingleOperation(Operation* op) { } auto new_type = get_tensor_type(shape_handle, new_element_type); if (result.getType() == new_type) continue; - // Inserts a cast back to the original type if any user is not in the TF - // dialect or a return. - AddCastBackForUnsupportedNonTFUses(op, result, tf_dialect_, - result.getType()); - // Finally we inferred the shape and replace the type for this result. - result.setType(new_type); + + UpdateTypeAndInsertIncompatibleUseCasts(tf_dialect_, new_type, op, result); changed = true; } if (changed) @@ -937,48 +961,65 @@ bool ShapeInference::InferShapeForSingleOperation(Operation* op) { return changed; } -LogicalResult ShapeInference::RefineShapeForControlFlowFunc( - FuncOp func, ArrayRef input_types, int64_t max_iteration) { - ModuleOp module = func.getParentOfType(); - auto func_uses = SymbolTable::getSymbolUses(func, &module.getBodyRegion()); - int num_uses = std::distance(func_uses->begin(), func_uses->end()); - if (num_uses != 1) { - func.emitWarning(formatv( - "expected control flow function {0} to have exactly 1 use, found {1}.", - func.getName(), num_uses)); - return failure(); - } - - FunctionType func_type = func.getType(); - func.setType(FunctionType::get(input_types, func_type.getResults(), - func.getContext())); - - for (auto arg_and_idx : llvm::enumerate(func.getArguments())) { - arg_and_idx.value().setType(input_types[arg_and_idx.index()]); - } - - auto res = InferShapeUntilFixPoint(&func.getBody(), max_iteration); - if (failed(res)) return res; - - auto new_return_types = InferShapeForFunctionReturnType(func); - if (new_return_types.hasValue()) { - func.setType(FunctionType::get(input_types, new_return_types.getValue(), - func.getContext())); - } - - return success(); -} - LogicalResult ShapeInference::PropagateShapeToFunctions( ModuleOp module, Operation::operand_type_range input_types, ArrayRef func_names, int64_t max_iteration) { bool all_succeeded = true; auto types = llvm::to_vector<4>(input_types); + // If shape propagation fails for one function, return failure, but do not + // early exit and attempt to propagate shapes for all provided functions to + // have a best-effort propagation. for (auto func_name : func_names) { FuncOp func = module.lookupSymbol(func_name); - all_succeeded = - succeeded(RefineShapeForControlFlowFunc(func, types, max_iteration)) && - all_succeeded; + auto func_uses = SymbolTable::getSymbolUses(func, &module.getBodyRegion()); + if (!llvm::hasSingleElement(func_uses.getValue())) { + int num_uses = std::distance(func_uses->begin(), func_uses->end()); + func.emitWarning( + formatv("expected control flow function @{0} to have exactly 1 use, " + "found {1}.", + func.getName(), num_uses)); + all_succeeded = false; + continue; + } + + FunctionType func_type = func.getType(); + func.setType( + FunctionType::get(types, func_type.getResults(), func.getContext())); + + auto res = + PropagateShapeToRegions(input_types, {&func.getBody()}, max_iteration); + if (failed(res)) { + all_succeeded = false; + continue; + } + + auto new_return_types = InferShapeForFunctionReturnType(func); + if (new_return_types) + func.setType(FunctionType::get(types, new_return_types.getValue(), + func.getContext())); + } + return success(all_succeeded); +} + +LogicalResult ShapeInference::PropagateShapeToRegions( + Operation::operand_type_range input_types, ArrayRef regions, + int64_t max_iteration) { + bool all_succeeded = true; + auto types = llvm::to_vector<4>(input_types); + // If shape propagation fails for one region, return failure, but do not + // early exit and attempt to propagate shapes for all provided regions to + // have a best-effort propagation. + for (auto region : regions) { + // Refine region arguments. + Block& entry = region->front(); + assert(types.size() == entry.getNumArguments()); + for (auto arg_and_idx : llvm::enumerate(entry.getArguments())) { + arg_and_idx.value().setType(types[arg_and_idx.index()]); + } + + // Propagate shapes into the region. + all_succeeded = succeeded(InferShapeUntilFixPoint(region, max_iteration)) && + all_succeeded; } return success(all_succeeded); } @@ -1080,6 +1121,16 @@ LogicalResult ShapeInference::PropagateShapeIntoAttachedFunctions( return success(); } +LogicalResult ShapeInference::PropagateShapeIntoAttachedRegions( + Operation* op, int64_t max_iteration) { + if (auto while_op = dyn_cast(op)) { + return PropagateShapeToRegions(while_op.getOperandTypes(), + {&while_op.cond(), &while_op.body()}, + max_iteration); + } + return success(); +} + LogicalResult ShapeInference::TryToFold(Operation* op) { LLVM_DEBUG(op->print(llvm::dbgs() << "TryToFold "); llvm::dbgs() << "\n"); // If any output result is known, then the op probably has been computed @@ -1131,12 +1182,8 @@ LogicalResult ShapeInference::TryToFold(Operation* op) { if (ElementsAttr eattr = attr.dyn_cast_or_null()) { if (std::get<0>(result).getType() == eattr.getType()) continue; - // Inserts a cast back to the original type if any user is not in the - // TF dialect. - Type old_type = std::get<0>(result).getType(); - std::get<0>(result).setType(eattr.getType()); - AddCastBackForUnsupportedNonTFUses(op, std::get<0>(result), tf_dialect_, - old_type); + UpdateTypeAndInsertIncompatibleUseCasts(tf_dialect_, eattr.getType(), op, + std::get<0>(result)); } } @@ -1177,6 +1224,11 @@ LogicalResult ShapeInference::InferShapeUntilFixPoint(Region* region, "arguments and bodies"; } + if (failed(PropagateShapeIntoAttachedRegions(op, max_iteration))) { + op->emitWarning() << "unable to refine shape of attached region " + "arguments and bodies"; + } + changed |= InferShapeForSingleOperation(op); }); } From 42c0e7aa22fd8f87766f0dcb1dd6bb877bca5e8c Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Mon, 20 Jul 2020 08:22:09 -0700 Subject: [PATCH 0824/2522] Use getCallableForCallee in PrepareCompositeFunctionsPass - Use CallOpInterface::getCallableForCallee() to find the SymRefAttr instead of looking up the named attribute "f" PiperOrigin-RevId: 322148224 Change-Id: I2d3a15a3efcc912383b04dfd7cd97edccd6f9d59 --- .../tests/prepare-composite-functions-tf.mlir | 12 +++++++++ .../prepare_composite_functions_tf.cc | 26 ++++++++++++------- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/tests/prepare-composite-functions-tf.mlir b/tensorflow/compiler/mlir/lite/tests/prepare-composite-functions-tf.mlir index 7ce60d98062..684c58ce003 100644 --- a/tensorflow/compiler/mlir/lite/tests/prepare-composite-functions-tf.mlir +++ b/tensorflow/compiler/mlir/lite/tests/prepare-composite-functions-tf.mlir @@ -481,3 +481,15 @@ func @nms_padded_invalid_num_args(%arg0: tensor<100x4xf32>, %arg1: tensor<100xf3 // expected-error @+1 {{TFLite does not support batched input for non_max_suppression_padded}} func @nms_padded_with_batches(%arg0: tensor<2x100x4xf32>, %arg1: tensor<2x100xf32>, %arg2: tensor, %arg3: tensor, %arg4: tensor, %arg5: tensor, %arg6: tensor, %arg7: tensor, %arg8: tensor) -> (tensor<2x10xi32>, tensor) attributes {tf._implements = "non_max_suppression_padded_v2", tf._reference = "mlir"} } + +// ----- + +module { +// CHECK-LABEL: func @some_func +// CHECK-LABEL: func @func_with_call +func @some_func(%arg0: tensor<100xf32>) -> tensor<100xf32> attributes {tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c"} +func @func_with_call(%arg0: tensor<100xf32>) -> tensor<100xf32> { + %0 = call @some_func(%arg0) : (tensor<100xf32>) -> tensor<100xf32> + return %0 : tensor<100xf32> + } +} diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc index 20fc9fc0692..f7923847835 100644 --- a/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc @@ -34,6 +34,7 @@ limitations under the License. #include "mlir/IR/Operation.h" // from @llvm-project #include "mlir/IR/StandardTypes.h" // from @llvm-project #include "mlir/IR/SymbolTable.h" // from @llvm-project +#include "mlir/IR/Visitors.h" // from @llvm-project #include "mlir/Interfaces/CallInterfaces.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Support/LLVM.h" // from @llvm-project @@ -237,21 +238,26 @@ LogicalResult CheckOutputConsumer( } LogicalResult CheckFusableKerasLstm(FuncOp lstm_func, ModuleOp module) { - bool check_failed = false; for (auto func : module.getOps()) { - func.walk([&](Operation* op) { - auto call_op = dyn_cast_or_null(op); - if (call_op && op->getAttrOfType("f").getRootReference() == - lstm_func.getName()) { - // Keras LSTM have 5 outputs. - // We should make sure only the first or the second output are consumed. - if (failed(CheckOutputConsumer(call_op, 5, {0, 1}))) - check_failed = true; + auto result = func.walk([&](Operation* op) { + if (auto call_op = dyn_cast(op)) { + CallInterfaceCallable callable = call_op.getCallableForCallee(); + if (auto sym = callable.dyn_cast()) { + if (sym.getRootReference() == lstm_func.getName()) { + // Keras LSTM have 5 outputs. + // We should make sure only the first or the second output are + // consumed. + if (failed(CheckOutputConsumer(call_op, 5, {0, 1}))) + return WalkResult::interrupt(); + } + } } + return WalkResult::advance(); }); + + if (result.wasInterrupted()) return failure(); } - if (check_failed) return failure(); return success(); } From 1faa8af23a5d9b5015d11a658d8f59cf456cf00a Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Mon, 20 Jul 2020 08:23:12 -0700 Subject: [PATCH 0825/2522] [TF:TRT] Fix an uninitialized access. The third test that tests GetDeviceSet() returning nullptr actually accesses the uninitialized device_set_ field. PiperOrigin-RevId: 322148374 Change-Id: Ib84ba177bb000a8bb59c7e7ecc6a633a1c51464f --- tensorflow/compiler/tf2tensorrt/convert/convert_graph_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_graph_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_graph_test.cc index 54fb1d56441..3b0553426c0 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_graph_test.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_graph_test.cc @@ -71,7 +71,7 @@ class FakeCluster : public grappler::Cluster { } private: - const DeviceSet* device_set_; + const DeviceSet* device_set_ = nullptr; }; TEST(ConvertGraphTest, GetDeviceAndAllocator) { From 7feafa7d702abaebbffb51be9c495e6531a797d6 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Mon, 20 Jul 2020 08:40:43 -0700 Subject: [PATCH 0826/2522] [MLIR] Extend region<->functional control flow to handle While - For functional -> region control flow, consolidate all the call and yield creation into the existing CreateCall function, and add ability to either use the supplied args (for If) or create new region args of the same type (for While) when creating the call. - For region -> functional control flow, add ability to add extern values as passthrough when outlining a region, and changed TrivialTransformInfo to use a provided matcher to check if 2 call arguments match and use that to drive the different criteria for a trivially transformable IfRegion vs WhileRegion. PiperOrigin-RevId: 322151011 Change-Id: Iff8a1c8476f5476df106323f8afe75a606702a42 --- .../functional-control-flow-to-regions.mlir | 88 +++- .../region-control-flow-to-functional.mlir | 412 +++++++++++++++++- .../functional_control_flow_to_regions.cc | 84 ++-- .../region_control_flow_to_functional.cc | 295 ++++++++----- 4 files changed, 735 insertions(+), 144 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/functional-control-flow-to-regions.mlir b/tensorflow/compiler/mlir/tensorflow/tests/functional-control-flow-to-regions.mlir index a7e9b22d72b..70525fda235 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/functional-control-flow-to-regions.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/functional-control-flow-to-regions.mlir @@ -1,5 +1,6 @@ -// RUN: tf-opt %s -tf-functional-control-flow-to-regions -split-input-file | FileCheck %s --dump-input=fail +// RUN: tf-opt %s -tf-functional-control-flow-to-regions -split-input-file | FileCheck %s +// Simple If // CHECK: func @testIf1Then{{.+}} // CHECK: func @testIf1Else{{.+}} func @testIf1Then(tensor<*xf32>) -> tensor<*xf32> @@ -12,6 +13,8 @@ func @testIf1Result(%arg0: tensor, %arg1: tensor<*xf32>) -> tensor<*xf32> { } : (tensor, tensor<*xf32>) -> tensor<*xf32> // CHECK: "tf.IfRegion" + // CHECK-NOT: then_branch + // CHECK-NOT: else_branch // CHECK: [[Result0:%.*]] = call @testIf1Then // CHECK: "tf.Yield"([[Result0]]) // CHECK: [[Result1:%.*]] = call @testIf1Else @@ -21,7 +24,7 @@ func @testIf1Result(%arg0: tensor, %arg1: tensor<*xf32>) -> tensor<*xf32> { // ----- -// With mismatching input types +// If with mismatching input types // CHECK: func @testIf1Then{{.+}} // CHECK: func @testIf1Else{{.+}} @@ -46,7 +49,7 @@ func @testIf2Result(%arg0: tensor, %arg1: tensor<2xf32>) -> tensor<2xf32> { // ----- -// No inputs, some outputs +// If with no inputs, some outputs // CHECK: func @testIf1Then{{.+}} // CHECK: func @testIf1Else{{.+}} func @testIf1Then() -> tensor<*xf32> @@ -68,7 +71,7 @@ func @testIfNoInputs(%arg0: tensor) -> tensor<2xf32> { // ----- -// No outputs, some inputs +// If with no outputs, some inputs // CHECK: func @testIf1Then{{.+}} // CHECK: func @testIf1Else{{.+}} func @testIf1Then(tensor<*xf32>) -> () @@ -91,7 +94,8 @@ func @testIfNoResult(%arg0: tensor, %arg1: tensor<2xf32>) -> () { } // ----- -// No outputs, No inputs + +// If with no outputs, No inputs // CHECK: func @testIf1Then{{.+}} // CHECK: func @testIf1Else{{.+}} func @testIf1Then() -> () @@ -111,3 +115,77 @@ func @testIfNoInputAndNoResult(%arg0: tensor) -> () { return } +// ----- + +// Simple While +func @testWhileCond(tensor<*xf32>) -> (tensor) +func @testWhileBody(tensor<*xf32>) -> (tensor<*xf32>) + +// CHECK-LABEL: func @testWhileResult +func @testWhileResult(tensor<*xf32>) -> (tensor<*xf32>) { +^bb0(%arg0: tensor<*xf32>): + %1 = "tf.While"(%arg0) { + cond = @testWhileCond, + body = @testWhileBody, + is_stateless = false + } : (tensor<*xf32>) -> (tensor<*xf32>) + + // CHECK: [[Result0:%.*]] = "tf.WhileRegion" + // CHECK-NOT: cond = + // CHECK-NOT: body = + // CHECK: [[Result1:%.*]] = call @testWhileCond + // CHECK: "tf.Yield"([[Result1]]) + // CHECK: [[Result2:%.*]] = call @testWhileBody + // CHECK: "tf.Yield"([[Result2]]) + // CHECK: return [[Result0]] + return %1 : tensor<*xf32> +} + +// ----- + +// While with no inputs & outputs +func @testWhileCond() -> (tensor) +func @testWhileBody() -> () + +// CHECK-LABEL: func @testWhileResultNoIO +func @testWhileResultNoIO() -> () { + "tf.While"() { + cond = @testWhileCond, + body = @testWhileBody, + is_stateless = false + } : () -> () + + // CHECK: "tf.WhileRegion" + // CHECK: [[Result1:%.*]] = call @testWhileCond + // CHECK: "tf.Yield"([[Result1]]) + // CHECK: call @testWhileBody + // CHECK: "tf.Yield"() + return +} + +// ----- + +// While with type mismatch +func @testWhileCond(tensor<4xf32>) -> (tensor) +func @testWhileBody(tensor<4xf32>) -> (tensor<4xf32>) + +// CHECK-LABEL: func @testWhileResult +func @testWhileResult(tensor<*xf32>) -> (tensor<*xf32>) { +^bb0(%arg0: tensor<*xf32>): + %1 = "tf.While"(%arg0) { + cond = @testWhileCond, + body = @testWhileBody, + is_stateless = false + } : (tensor<*xf32>) -> (tensor<*xf32>) + + // CHECK: [[Result0:%.*]] = "tf.WhileRegion" + // CHECK: [[ResultCast0:%.*]] = "tf.Cast" + // CHECK: [[Result1:%.*]] = call @testWhileCond([[ResultCast0]]) + // CHECK: "tf.Yield"([[Result1]]) + // CHECK: [[ResultCast1:%.*]] = "tf.Cast" + // CHECK: [[Result2:%.*]] = call @testWhileBody([[ResultCast1]]) + // CHECK: "tf.Yield"([[Result2]]) + // CHECK: return [[Result0]] + return %1 : tensor<*xf32> +} + diff --git a/tensorflow/compiler/mlir/tensorflow/tests/region-control-flow-to-functional.mlir b/tensorflow/compiler/mlir/tensorflow/tests/region-control-flow-to-functional.mlir index 5ea863852ad..ea35248adf3 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/region-control-flow-to-functional.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/region-control-flow-to-functional.mlir @@ -1,6 +1,6 @@ -// RUN: tf-opt %s -tf-region-control-flow-to-functional -split-input-file -//| FileCheck %s --dump-input=fail +// RUN: tf-opt %s -tf-region-control-flow-to-functional -split-input-file | FileCheck %s +// Simple IfRegion // CHECK: func @tf.IfRegion_else(%arg0: tensor<*xf32>) -> tensor<*xf32> // CHECK-NEXT: "tf.Neg" // CHECK: func @tf.IfRegion_then(%arg0: tensor<*xf32>) -> tensor<*xf32> @@ -42,7 +42,7 @@ func @testIfCondition(%arg0: tensor, %arg1: tensor<2xf32>) -> tensor<2xf32> // ----- -// Constant sinking +// Constant sinking for IfRegion // CHECK: func @tf.IfRegion_else() -> tensor<2xf32> // CHECK-NEXT: constant dense<1.0 @@ -105,7 +105,7 @@ func @testNested(%arg0: tensor, %arg1: tensor<*xf32>) -> tensor<*xf32> { // ----- -// Match existing function->Region pattern (simple) +// Match existing function->Region pattern (simple) for IfRegion func @testIf1Then(tensor<*xf32>) -> tensor<*xf32> func @testIf1Else(tensor<*xf32>) -> tensor<*xf32> func @testIf1Result(%arg0: tensor, %arg1: tensor<*xf32>) -> tensor<*xf32> { @@ -122,7 +122,7 @@ func @testIf1Result(%arg0: tensor, %arg1: tensor<*xf32>) -> tensor<*xf32> { // ----- -// Match existing function->Region pattern (with casts) +// Match existing function->Region pattern (with casts) for IfRegion func @testIf1Then(tensor<*xf32>) -> tensor<*xf32> func @testIf1Else(tensor<*xf32>) -> tensor<*xf32> @@ -142,7 +142,29 @@ func @testIf2Result(%arg0: tensor, %arg1: tensor<2xf32>) -> tensor<2xf32> { // ----- -// No inputs, some outputs +// Match existing function->Region pattern (with multiple casts) for IfRegion + +func @testIf1Then(tensor<*xf32>) -> tensor<*xf32> +func @testIf1Else(tensor<*xf32>) -> tensor<*xf32> +func @testIf2Result(%arg0: tensor, %arg1: tensor<2xf32>) -> tensor<2xf32> { + // CHECK: "tf.If"({{.+}}) {else_branch = @testIf1Else, {{.+}} then_branch = @testIf1Then} + %0 = "tf.IfRegion"(%arg0) ( { + %1 = "tf.Cast"(%arg1) {Truncate = false} : (tensor<2xf32>) -> tensor + %2 = "tf.Cast"(%1) {Truncate = false} : (tensor) -> tensor<*xf32> + %3 = call @testIf1Then(%2) : (tensor<*xf32>) -> tensor<*xf32> + "tf.Yield"(%3) : (tensor<*xf32>) -> () + }, { + %1 = "tf.Cast"(%arg1) {Truncate = false} : (tensor<2xf32>) -> tensor + %2 = "tf.Cast"(%1) {Truncate = false} : (tensor) -> tensor<*xf32> + %3 = call @testIf1Else(%2) : (tensor<*xf32>) -> tensor<*xf32> + "tf.Yield"(%3) : (tensor<*xf32>) -> () + }) {is_stateless = false} : (tensor) -> tensor<2xf32> + return %0 : tensor<2xf32> +} + +// ----- + +// No inputs, some outputs for IfRegion // CHECK: func @tf.IfRegion_else() -> tensor<2xf32> // CHECK-NEXT: constant dense<1.000000e+00> // CHECK-NEXT: "tf.Neg" @@ -165,7 +187,7 @@ func @testSimple(%arg0: tensor) -> tensor<2xf32> { // ----- -// No outputs, some inputs +// No outputs, some inputs for IfRegion // // CHECK: func @tf.IfRegion_else(%arg0: tensor<*xf32>) // CHECK-NEXT: "tf.Neg" @@ -186,3 +208,379 @@ func @testNoOutputs(%arg0: tensor, %arg1: tensor<*xf32>) -> () { return } +// ----- + +// Simple WhileRegion +// CHECK: func @tf.WhileRegion_body{{.+}}{sym_visibility = "private"} +// CHECK: "tf.Add" +// CHECK: constant dense<1> +// CHECK: "tf.Sub" +// CHECK:func @tf.WhileRegion_cond{{.+}}{sym_visibility = "private"} +// CHECK: constant dense<0> +// CHECK: "tf.NotEqual" +// CHECK-LABEL: testValidWhileRegion +func @testValidWhileRegion(%arg0 : tensor<*xf32>, %arg1 : tensor) -> tensor<*xf32> { + // CHECK: [[Result:%.*]]:2 = "tf.While"(%arg0, %arg1) {body = @tf.WhileRegion_body, cond = @tf.WhileRegion_cond + %0:2 = "tf.WhileRegion"(%arg0, %arg1) ( + { + // condition, check if count has reached 0 + ^bb0(%carg0: tensor<*xf32>, %carg1: tensor): + %zero = constant dense<0> : tensor + %ne = "tf.NotEqual"(%carg1, %zero) : (tensor, tensor) -> tensor + "tf.Yield"(%ne) : (tensor) -> () + }, + { + // loop body + ^bb0(%barg0: tensor<*xf32>, %barg1: tensor): + %add = "tf.Add"(%barg0, %barg0) : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32> + %one = constant dense<1> : tensor + %sub = "tf.Sub"(%barg1, %one) : (tensor, tensor) -> tensor + "tf.Yield"(%add, %sub) : (tensor<*xf32>, tensor) -> () + } + ) { is_stateless = false } : (tensor<*xf32>, tensor) -> (tensor<*xf32>, tensor) + // CHECK: return [[Result]]#0 + return %0#0 : tensor<*xf32> +} + +// ----- + +// WhileRegion with type mismatch +// CHECK: func @tf.WhileRegion_body{{.+}}{sym_visibility = "private"} +// CHECK: "tf.Add" +// CHECK: constant dense<1> +// CHECK: "tf.Sub" +// CHECK:func @tf.WhileRegion_cond{{.+}}{sym_visibility = "private"} +// CHECK: constant dense<0> +// CHECK: "tf.NotEqual" +// CHECK-LABEL: testWhileRegionTypeMismatch +func @testWhileRegionTypeMismatch(%arg0 : tensor<*xf32>, %arg1 : tensor) -> tensor<*xf32> { + // CHECK: [[Result:%.*]]:2 = "tf.While"(%arg0, %arg1) {body = @tf.WhileRegion_body, cond = @tf.WhileRegion_cond + %0:2 = "tf.WhileRegion"(%arg0, %arg1) ( + { + // condition, check if count has reached 0 + ^bb0(%carg0: tensor<4xf32>, %carg1: tensor): + %zero = constant dense<0> : tensor + %ne = "tf.NotEqual"(%carg1, %zero) : (tensor, tensor) -> tensor + "tf.Yield"(%ne) : (tensor) -> () + }, + { + // loop body + ^bb0(%barg0: tensor<4xf32>, %barg1: tensor): + %add = "tf.Add"(%barg0, %barg0) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> + %one = constant dense<1> : tensor + %sub = "tf.Sub"(%barg1, %one) : (tensor, tensor) -> tensor + "tf.Yield"(%add, %sub) : (tensor<4xf32>, tensor) -> () + } + ) { is_stateless = false } : (tensor<*xf32>, tensor) -> (tensor<*xf32>, tensor) + // CHECK: return [[Result]]#0 + return %0#0 : tensor<*xf32> +} + +// ----- + +// WhileRegion with constant sinking +// CHECK: func @tf.WhileRegion_body{{.+}}{sym_visibility = "private"} +// CHECK: constant dense<1> +// CHECK: "tf.Add" +// CHECK: "tf.Sub" +// CHECK:func @tf.WhileRegion_cond{{.+}}{sym_visibility = "private"} +// CHECK: constant dense<0> +// CHECK: "tf.NotEqual" +// CHECK-LABEL: testWhileRegionConstantSink +func @testWhileRegionConstantSink(%arg0 : tensor<*xf32>, %arg1 : tensor) -> tensor<*xf32> { + %zero = constant dense<0> : tensor + %one = constant dense<1> : tensor + // CHECK: [[Result:%.*]]:2 = "tf.While"(%arg0, %arg1) {body = @tf.WhileRegion_body, cond = @tf.WhileRegion_cond + %0:2 = "tf.WhileRegion"(%arg0, %arg1) ( + { + ^bb0(%carg0: tensor<4xf32>, %carg1: tensor): + %ne = "tf.NotEqual"(%carg1, %zero) : (tensor, tensor) -> tensor + "tf.Yield"(%ne) : (tensor) -> () + }, + { + // loop body + ^bb0(%barg0: tensor<4xf32>, %barg1: tensor): + %add = "tf.Add"(%barg0, %barg0) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> + %sub = "tf.Sub"(%barg1, %one) : (tensor, tensor) -> tensor + "tf.Yield"(%add, %sub) : (tensor<4xf32>, tensor) -> () + } + ) { is_stateless = false } : (tensor<*xf32>, tensor) -> (tensor<*xf32>, tensor) + // CHECK: return [[Result]]#0 + return %0#0 : tensor<*xf32> +} + +// ----- + +// WhileRegion with implicitly captured extern value in cond +// CHECK: func @tf.WhileRegion_body(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor) +// CHECK: "tf.Add" +// CHECK: constant dense<1> +// CHECK: "tf.Sub" +// CHECK: return %{{.+}}, %{{.+}}, %arg2 : tensor<*xf32>, tensor, tensor +// CHECK: func @tf.WhileRegion_cond(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor) +// CHECK: "tf.NotEqual"(%arg1, %arg2) +// CHECK-LABEL: testWhileRegionExternInCond +func @testWhileRegionExternInCond(%arg0 : tensor<*xf32>, %arg1 : tensor, %arg2 : tensor) -> tensor<*xf32> { + %cst = constant dense<4> : tensor + %limit = "tf.Add"(%arg2, %cst) : (tensor, tensor) -> tensor + // CHECK: [[Result:%.*]]:3 = "tf.While"(%arg0, %arg1, %{{.+}}) {body = @tf.WhileRegion_body, cond = @tf.WhileRegion_cond + %0:2 = "tf.WhileRegion"(%arg0, %arg1) ( + { + ^bb0(%carg0: tensor<*xf32>, %carg1: tensor): + %ne = "tf.NotEqual"(%carg1, %limit) : (tensor, tensor) -> tensor + "tf.Yield"(%ne) : (tensor) -> () + }, + { + // loop body + ^bb0(%barg0: tensor<*xf32>, %barg1: tensor): + %add = "tf.Add"(%barg0, %barg0) : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32> + %one = constant dense<1> : tensor + %sub = "tf.Sub"(%barg1, %one) : (tensor, tensor) -> tensor + "tf.Yield"(%add, %sub) : (tensor<*xf32>, tensor) -> () + } + ) { is_stateless = false } : (tensor<*xf32>, tensor) -> (tensor<*xf32>, tensor) + // CHECK: return [[Result]]#0 + return %0#0 : tensor<*xf32> +} + +// ----- + +// WhileRegion with implicitly captured extern value in body +// CHECK: func @tf.WhileRegion_body(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor) +// CHECK: %0 = "tf.Add"(%arg0, %arg0) +// CHECK: %1 = "tf.Sub"(%arg1, %arg2) +// CHECK: return %0, %1, %arg2 + +// CHECK: func @tf.WhileRegion_cond(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor) +// CHECK: constant dense<0> +// CHECK: "tf.NotEqual" + +// CHECK-LABEL: testWhileRegionExternInBody +func @testWhileRegionExternInBody(%arg0 : tensor<*xf32>, %arg1 : tensor, %arg2 : tensor) -> tensor<*xf32> { + %zero = constant dense<0> : tensor + %cst = constant dense<4> : tensor + %stride = "tf.Add"(%arg2, %cst) : (tensor, tensor) -> tensor + // CHECK: [[Result:%.*]]:3 = "tf.While"(%arg0, %arg1, %{{.+}}) {body = @tf.WhileRegion_body, cond = @tf.WhileRegion_cond + %0:2 = "tf.WhileRegion"(%arg0, %arg1) ( + { + ^bb0(%carg0: tensor<*xf32>, %carg1: tensor): + %ne = "tf.NotEqual"(%carg1, %zero) : (tensor, tensor) -> tensor + "tf.Yield"(%ne) : (tensor) -> () + }, + { + // loop body + ^bb0(%barg0: tensor<*xf32>, %barg1: tensor): + %add = "tf.Add"(%barg0, %barg0) : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32> + %sub = "tf.Sub"(%barg1, %stride) : (tensor, tensor) -> tensor + "tf.Yield"(%add, %sub) : (tensor<*xf32>, tensor) -> () + } + ) { is_stateless = false } : (tensor<*xf32>, tensor) -> (tensor<*xf32>, tensor) + // CHECK: return [[Result]]#0 + return %0#0 : tensor<*xf32> +} + +// ----- + +// WhileRegion with implicitly captured extern value in cond and body +// CHECK: func @tf.WhileRegion_body(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor, %arg3: tensor) +// CHECK: return %{{.+}}, %{{.+}}, %arg2, %arg3 +// CHECK: func @tf.WhileRegion_cond(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor, %arg3: tensor) +// CHECK-LABEL: testWhileRegionExternInBodyAndCond +func @testWhileRegionExternInBodyAndCond(%arg0 : tensor<*xf32>, %arg1 : tensor, %arg2 : tensor) -> tensor<*xf32> { + %cst = constant dense<4> : tensor + %stride = "tf.Add"(%arg2, %cst) : (tensor, tensor) -> tensor + %cst1 = constant dense<44> : tensor + %limit = "tf.Add"(%arg2, %cst1) : (tensor, tensor) -> tensor + // CHECK: [[Result:%.*]]:4 = "tf.While"(%arg0, %arg1, %{{.+}}, %{{.+}}) {body = @tf.WhileRegion_body, cond = @tf.WhileRegion_cond + %0:2 = "tf.WhileRegion"(%arg0, %arg1) ( + { + ^bb0(%carg0: tensor<*xf32>, %carg1: tensor): + %ne = "tf.NotEqual"(%carg1, %limit) : (tensor, tensor) -> tensor + "tf.Yield"(%ne) : (tensor) -> () + }, + { + // loop body + ^bb0(%barg0: tensor<*xf32>, %barg1: tensor): + %add = "tf.Add"(%barg0, %barg0) : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32> + %sub = "tf.Sub"(%barg1, %stride) : (tensor, tensor) -> tensor + "tf.Yield"(%add, %sub) : (tensor<*xf32>, tensor) -> () + } + ) { is_stateless = false } : (tensor<*xf32>, tensor) -> (tensor<*xf32>, tensor) + // CHECK: return [[Result]]#0 + return %0#0 : tensor<*xf32> +} + +// ----- + +// WhileRegion with same value implicitly captured in cond and body +// CHECK: func @tf.WhileRegion_body(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor) +// CHECK: return %{{.+}}, %{{.+}}, %arg2 +// CHECK: func @tf.WhileRegion_cond(%arg0: tensor<*xf32>, %arg1: tensor, %arg2: tensor) +// CHECK-LABEL: testWhileRegionSameExternInBodyAndCond +func @testWhileRegionSameExternInBodyAndCond(%arg0 : tensor<*xf32>, %arg1 : tensor, %arg2 : tensor) -> tensor<*xf32> { + %cst = constant dense<4> : tensor + %stride = "tf.Add"(%arg2, %cst) : (tensor, tensor) -> tensor + // CHECK: [[Result:%.*]]:3 = "tf.While"(%arg0, %arg1, %{{.+}}) {body = @tf.WhileRegion_body, cond = @tf.WhileRegion_cond + %0:2 = "tf.WhileRegion"(%arg0, %arg1) ( + { + ^bb0(%carg0: tensor<*xf32>, %carg1: tensor): + %ne = "tf.NotEqual"(%carg1, %stride) : (tensor, tensor) -> tensor + "tf.Yield"(%ne) : (tensor) -> () + }, + { + // loop body + ^bb0(%barg0: tensor<*xf32>, %barg1: tensor): + %add = "tf.Add"(%barg0, %barg0) : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32> + %sub = "tf.Sub"(%barg1, %stride) : (tensor, tensor) -> tensor + "tf.Yield"(%add, %sub) : (tensor<*xf32>, tensor) -> () + } + ) { is_stateless = false } : (tensor<*xf32>, tensor) -> (tensor<*xf32>, tensor) + // CHECK: return [[Result]]#0 + return %0#0 : tensor<*xf32> +} + +// ----- + +// Simple trivially transformable while +// CHECK: func @while_cond +// CHECK: func @while_body +// CHECK-LABEL: testWhileRegionTrivial +func @while_cond(%arg0 : tensor<*xf32>, %arg1 : tensor) -> tensor +func @while_body(%arg0 : tensor<*xf32>, %arg1 : tensor) -> (tensor<*xf32>, tensor) +func @testWhileRegionTrivial(%arg0 : tensor<*xf32>, %arg1 : tensor) -> tensor<*xf32> { + // CHECK: [[Result:%.*]]:2 = "tf.While"(%arg0, %arg1) {body = @while_body, cond = @while_cond + %0:2 = "tf.WhileRegion"(%arg0, %arg1) ( + { + ^bb0(%carg0: tensor<*xf32>, %carg1: tensor): + %cond = call @while_cond(%carg0, %carg1) : (tensor<*xf32>, tensor) -> tensor + "tf.Yield"(%cond) : (tensor) -> () + }, + { + // loop body + ^bb0(%barg0: tensor<*xf32>, %barg1: tensor): + %bdy:2 = call @while_body(%barg0, %barg1) : (tensor<*xf32>, tensor) -> (tensor<*xf32>, tensor) + "tf.Yield"(%bdy#0, %bdy#1) : (tensor<*xf32>, tensor) -> () + } + ) { is_stateless = false } : (tensor<*xf32>, tensor) -> (tensor<*xf32>, tensor) + // CHECK: return [[Result]]#0 + return %0#0 : tensor<*xf32> +} + +// ----- + +// Trivially transformable with casts +// CHECK: func @while_cond +// CHECK: func @while_body +// CHECK-LABEL: testWhileRegionTrivialCasts +func @while_cond(%arg0 : tensor<4xf32>, %arg1 : tensor) -> tensor +func @while_body(%arg0 : tensor<4xf32>, %arg1 : tensor) -> (tensor<4xf32>, tensor) +func @testWhileRegionTrivialCasts(%arg0 : tensor<*xf32>, %arg1 : tensor) -> tensor<*xf32> { + // CHECK: [[Result:%.*]]:2 = "tf.While"(%arg0, %arg1) {body = @while_body, cond = @while_cond + %0:2 = "tf.WhileRegion"(%arg0, %arg1) ( + { + ^bb0(%carg0: tensor<*xf32>, %carg1: tensor): + %cond_cast = "tf.Cast"(%carg0) : (tensor<*xf32>) -> tensor<4xf32> + %cond = call @while_cond(%cond_cast, %carg1) : (tensor<4xf32>, tensor) -> tensor + "tf.Yield"(%cond) : (tensor) -> () + }, + { + // loop body + ^bb0(%barg0: tensor<*xf32>, %barg1: tensor): + %bdy_cast = "tf.Cast"(%barg0) : (tensor<*xf32>) -> tensor<4xf32> + %bdy:2 = call @while_body(%bdy_cast, %barg1) : (tensor<4xf32>, tensor) -> (tensor<4xf32>, tensor) + "tf.Yield"(%bdy#0, %bdy#1) : (tensor<4xf32>, tensor) -> () + } + ) { is_stateless = false } : (tensor<*xf32>, tensor) -> (tensor<*xf32>, tensor) + // CHECK: return [[Result]]#0 + return %0#0 : tensor<*xf32> +} + +// ----- + +// Trivially transformable with multiple casts +// CHECK: func @while_cond +// CHECK: func @while_body +// CHECK-LABEL: testWhileRegionTrivialMultipleCasts +func @while_cond(%arg0 : tensor<4xf32>, %arg1 : tensor) -> tensor +func @while_body(%arg0 : tensor<4xf32>, %arg1 : tensor) -> (tensor<4xf32>, tensor) +func @testWhileRegionTrivialMultipleCasts(%arg0 : tensor<*xf32>, %arg1 : tensor) -> tensor<*xf32> { + // CHECK: [[Result:%.*]]:2 = "tf.While"(%arg0, %arg1) {body = @while_body, cond = @while_cond + %0:2 = "tf.WhileRegion"(%arg0, %arg1) ( + { + ^bb0(%carg0: tensor<*xf32>, %carg1: tensor): + %cond_cast0 = "tf.Cast"(%carg0) : (tensor<*xf32>) -> tensor + %cond_cast1 = "tf.Cast"(%cond_cast0) : (tensor) -> tensor<4xf32> + %cond = call @while_cond(%cond_cast1, %carg1) : (tensor<4xf32>, tensor) -> tensor + "tf.Yield"(%cond) : (tensor) -> () + }, + { + // loop body + ^bb0(%barg0: tensor<*xf32>, %barg1: tensor): + %bdy_cast0 = "tf.Cast"(%barg0) : (tensor<*xf32>) -> tensor + %bdy_cast1 = "tf.Cast"(%bdy_cast0) : (tensor) -> tensor<4xf32> + %bdy:2 = call @while_body(%bdy_cast1, %barg1) : (tensor<4xf32>, tensor) -> (tensor<4xf32>, tensor) + "tf.Yield"(%bdy#0, %bdy#1) : (tensor<4xf32>, tensor) -> () + } + ) { is_stateless = false } : (tensor<*xf32>, tensor) -> (tensor<*xf32>, tensor) + // CHECK: return [[Result]]#0 + return %0#0 : tensor<*xf32> +} + +// ----- + +// Almost trivially transformable with extern values +// CHECK: func @tf.WhileRegion_body +// CHECK: call @while_body +// CHECK: @tf.WhileRegion_cond +// CHECK: call @while_cond +// CHECK-LABEL: testWhileRegionExtern +func @while_cond(%arg0 : tensor<*xf32>, %arg1 : tensor) -> tensor +func @while_body(%arg0 : tensor<*xf32>, %arg1 : tensor, %arg2 : tensor<*xf32>) -> (tensor<*xf32>, tensor) +func @testWhileRegionExtern(%arg0 : tensor<*xf32>, %arg1 : tensor) -> tensor<*xf32> { + %ext = "tf.Neg"(%arg0) : (tensor<*xf32>) -> tensor<*xf32> + // CHECK: [[Result:%.*]]:3 = "tf.While"(%arg0, %arg1, %{{.+}}) {body = @tf.WhileRegion_body, cond = @tf.WhileRegion_cond + %0:2 = "tf.WhileRegion"(%arg0, %arg1) ( + { + ^bb0(%carg0: tensor<*xf32>, %carg1: tensor): + %cond = call @while_cond(%carg0, %carg1) : (tensor<*xf32>, tensor) -> tensor + "tf.Yield"(%cond) : (tensor) -> () + }, + { + // loop body + ^bb0(%barg0: tensor<*xf32>, %barg1: tensor): + %bdy:2 = call @while_body(%barg0, %barg1, %ext) : (tensor<*xf32>, tensor, tensor<*xf32>) -> (tensor<*xf32>, tensor) + "tf.Yield"(%bdy#0, %bdy#1) : (tensor<*xf32>, tensor) -> () + } + ) { is_stateless = false } : (tensor<*xf32>, tensor) -> (tensor<*xf32>, tensor) + // CHECK: return [[Result]]#0 + return %0#0 : tensor<*xf32> +} + +// ----- + +// Almost trivially transformable, mismatching block arguments +// CHECK: func @tf.WhileRegion_body +// CHECK: call @while_body +// CHECK: @tf.WhileRegion_cond +// CHECK: call @while_cond +// CHECK-LABEL: testWhileRegionBlockArgMismatch +func @while_cond(%arg0 : tensor, %arg1 : tensor<*xf32>) -> tensor +func @while_body(%arg0 : tensor<*xf32>, %arg1 : tensor) -> (tensor<*xf32>, tensor) +func @testWhileRegionBlockArgMismatch(%arg0 : tensor<*xf32>, %arg1 : tensor) -> tensor<*xf32> { + // CHECK: [[Result:%.*]]:2 = "tf.While"(%arg0, %arg1) {body = @tf.WhileRegion_body, cond = @tf.WhileRegion_cond + %0:2 = "tf.WhileRegion"(%arg0, %arg1) ( + { + ^bb0(%carg0: tensor<*xf32>, %carg1: tensor): + %cond = call @while_cond(%carg1, %carg0) : (tensor, tensor<*xf32>) -> tensor + "tf.Yield"(%cond) : (tensor) -> () + }, + { + // loop body + ^bb0(%barg0: tensor<*xf32>, %barg1: tensor): + %bdy:2 = call @while_body(%barg0, %barg1) : (tensor<*xf32>, tensor) -> (tensor<*xf32>, tensor) + "tf.Yield"(%bdy#0, %bdy#1) : (tensor<*xf32>, tensor) -> () + } + ) { is_stateless = false } : (tensor<*xf32>, tensor) -> (tensor<*xf32>, tensor) + // CHECK: return [[Result]]#0 + return %0#0 : tensor<*xf32> +} diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_regions.cc b/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_regions.cc index 5ab0eda08c6..3320a16a6ac 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_regions.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_regions.cc @@ -15,7 +15,7 @@ limitations under the License. // This transformation pass transforms functional control flow operations in the // TensorFlow dialect to their region based counterparts, i.e., -// tf.If -> tf.IfRegion +// tf.If -> tf.IfRegion and tf.While -> tf.WhileRegion #include "llvm/Support/raw_ostream.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project @@ -33,6 +33,8 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" +#define DEBUG_TYPE "tf-functional-cf-to-region" + namespace mlir { namespace TF { @@ -44,24 +46,37 @@ struct FunctionalControlFlowToRegions void runOnOperation() override; }; -// Create a call to function `fn` with arguments `args` and return the CallOp. -// The arguments are cast to the required type before the call. -CallOp CreateCall(Location loc, Operation::operand_range args, FuncOp fn, - OpBuilder* builder) { - FunctionType fn_type = fn.getType(); - llvm::SmallVector operands; - int num_operands = fn_type.getNumInputs(); - operands.reserve(num_operands); - for (const auto& ArgAndType : zip(args, fn_type.getInputs())) { +// Creates a call to function `callee` in region `caller_region`. Use `args` as +// the call arguments, and terminate the region with a yield. The arguments are +// cast to the required type before the call. `use_region_args` control whether +// the input arguments are used as is (for IfOp) or block arguments of the same +// type as the input arguments are created and then used as call arguments (for +// While). +void CreateCall(Operation* op, StringRef callee, Region& caller_region, + ValueRange args, bool use_region_args) { + assert(caller_region.empty() && + "Expected empty region for newly created ops"); + OpBuilder builder(caller_region); + Block* entry = builder.createBlock(&caller_region); + auto func = op->getParentOfType().lookupSymbol(callee); + + if (use_region_args) { + entry->addArguments(args.getType()); + args = entry->getArguments(); + } + llvm::SmallVector casted_args; + casted_args.reserve(func.getNumArguments()); + for (const auto& ArgAndType : zip(args, func.getType().getInputs())) { Value arg = std::get<0>(ArgAndType); Type expected_type = std::get<1>(ArgAndType); if (arg.getType() != expected_type) { - arg = builder->create(loc, expected_type, arg, - /*Truncate=*/builder->getBoolAttr(false)); + arg = builder.create(op->getLoc(), expected_type, arg, + /*Truncate=*/builder.getBoolAttr(false)); } - operands.push_back(arg); + casted_args.push_back(arg); } - return builder->create(loc, fn, operands); + auto call = builder.create(op->getLoc(), func, casted_args); + builder.create(op->getLoc(), call.getResults()); } // Transform a functional IfOp to a region based IfRegionOp. @@ -70,31 +85,42 @@ LogicalResult ConvertIfOp(IfOp if_op) { if_op.getLoc(), if_op.getResultTypes(), if_op.cond(), if_op.is_stateless()); - // Insert call to the given function into the 'region'. - auto create_region_with_call = [&if_op](FlatSymbolRefAttr symbol, - Region& region) { - OpBuilder builder(region); - builder.createBlock(®ion); - auto func = if_op.getParentOfType().lookupSymbol( - symbol.getValue()); - auto call = CreateCall(if_op.getLoc(), if_op.input(), func, &builder); - builder.create(if_op.getLoc(), call.getResults()); - }; - - create_region_with_call(if_op.then_branchAttr(), if_region.then_branch()); - create_region_with_call(if_op.else_branchAttr(), if_region.else_branch()); - + CreateCall(if_op, /*callee=*/if_op.then_branch(), + /*caller_region=*/if_region.then_branch(), if_op.input(), + /*use_region_args=*/false); + CreateCall(if_op, /*callee=*/if_op.else_branch(), + /*caller_region=*/if_region.else_branch(), if_op.input(), + /*use_region_args=*/false); if_op.replaceAllUsesWith(if_region.getResults()); if_op.erase(); return success(); } +LogicalResult ConvertWhileOp(WhileOp while_op) { + auto while_region = OpBuilder(while_op).create( + while_op.getLoc(), while_op.getResultTypes(), while_op.input(), + while_op.is_stateless(), while_op.parallel_iterations()); + + CreateCall(while_op, while_op.cond(), while_region.cond(), while_op.input(), + /*use_region_args=*/true); + CreateCall(while_op, while_op.body(), while_region.body(), while_op.input(), + /*use_region_args=*/true); + while_op.replaceAllUsesWith(while_region.getResults()); + while_op.erase(); + return success(); +} + void FunctionalControlFlowToRegions::runOnOperation() { ModuleOp module = getOperation(); auto result = module.walk([](Operation* op) { if (IfOp if_op = llvm::dyn_cast(op)) { if (failed(ConvertIfOp(if_op))) { - if_op.emitOpError() << " failed to convert to region form"; + op->emitOpError() << "failed to convert to region form"; + return WalkResult::interrupt(); + } + } else if (auto while_op = llvm::dyn_cast(op)) { + if (failed(ConvertWhileOp(while_op))) { + op->emitOpError() << "failed to convert to region form"; return WalkResult::interrupt(); } } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc b/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc index ca0467942ca..46914653dbd 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc @@ -15,9 +15,11 @@ limitations under the License. // This transformation pass transforms region bases control flow operations in // the TensorFlow dialect to their functional counterparts, i.e., -// tf.IfRegion -> tf.If +// tf.IfRegion -> tf.If and tf.WhileRegion -> tf.While #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Casting.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project @@ -36,6 +38,8 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" +#define DEBUG_TYPE "tf-region-cf-to-functional" + namespace mlir { namespace TF { @@ -48,6 +52,7 @@ struct RegionControlFlowToFunctional private: LogicalResult ConvertIfOp(IfRegionOp if_region); + LogicalResult ConvertWhileOp(WhileRegionOp while_region); // Get unique name by using the loc to name mapping. std::string GetName(Operation* op, StringRef suffix); @@ -61,20 +66,20 @@ std::string RegionControlFlowToFunctional::GetName(Operation* op, return (mapper.GetUniqueName(op) + suffix).str(); } -// Returns all the external values referenced from the given set of regions. If -// the external value is a constant, sink it into the region instead (and do not +// Returns all the external values referenced from the given regions. If the +// external value is a constant, sink it into the region instead (and do not // add it to the returned vector). -llvm::SmallVector CollectExternValues(ArrayRef regions) { - llvm::SetVector extern_values_set; +llvm::SmallVector CollectExternValues(Region& first, Region& second) { + llvm::SetVector extern_values; - for (auto region : regions) { + for (Region* region : {&first, &second}) { llvm::SetVector region_extern_values; getUsedValuesDefinedAbove(*region, region_extern_values); // Sink down constants into the functions. for (auto extern_value : region_extern_values) { if (!matchPattern(extern_value, m_Constant())) { - extern_values_set.insert(extern_value); + extern_values.insert(extern_value); continue; } // Add constant at start of region. @@ -85,28 +90,44 @@ llvm::SmallVector CollectExternValues(ArrayRef regions) { } } - return {extern_values_set.begin(), extern_values_set.end()}; + return llvm::to_vector<4>(extern_values); } // Extracts the contents of a region with a single block into a new function. // `extern_values` is the set of external values that the region refers to. // -// Any inputs to the terminator of the region are converted to return values of -// the function. If any of these values is not exact type as the function's -// return type, appropriate cast operations will be inserted -void ExtractSingleBlockRegion(Region& region, FunctionType type, StringRef name, +// Inputs to the terminator of the region are converted to return values of +// the function. If `extern_values_passthrough` is true, all the extern values +// are also added as return values from the function +void ExtractSingleBlockRegion(Region& region, StringRef name, llvm::SmallVectorImpl& extern_values, - llvm::SmallVectorImpl& worklist) { + llvm::SmallVectorImpl& worklist, + bool extern_values_passthrough) { ModuleOp module = region.getParentOfType(); auto builder = OpBuilder::atBlockBegin(module.getBody()); auto loc = region.getParentOp()->getLoc(); + Block& entry = region.front(); + int num_region_arguments = entry.getNumArguments(); + Operation* terminator = entry.getTerminator(); + + // Build the function type. Region arguments and extern values together + // become the function arguments, with region arguments going first. + auto input_types = llvm::to_vector<4>(entry.getArgumentTypes()); + for (auto input : extern_values) input_types.push_back(input.getType()); + + // Terminator operands and pass through extern values (if enabled) together + // become the function return values. + auto return_types = llvm::to_vector<4>(terminator->getOperandTypes()); + if (extern_values_passthrough) + for (auto input : extern_values) return_types.push_back(input.getType()); + + auto type = FunctionType::get(input_types, return_types, region.getContext()); // Create new function and extract region body into the function. auto outlined_func = builder.create(loc, name, type, ArrayRef{}); - - outlined_func.getBody().takeBody(region); Region& func_region = outlined_func.getBody(); + func_region.takeBody(region); Block& first_block = func_region.front(); // Replace all external uses with function arguments. @@ -115,27 +136,24 @@ void ExtractSingleBlockRegion(Region& region, FunctionType type, StringRef name, replaceAllUsesInRegionWith(it.value(), arg, func_region); } - // Replace the existing terminator with a return. - Operation* terminator = outlined_func.getBody().front().getTerminator(); - builder.setInsertionPoint(terminator); + // Function return values are all the terminator operands + pass through + // extern values (if enabled). + auto return_values = llvm::to_vector<4>(terminator->getOperands()); + if (extern_values_passthrough) + return_values.insert(return_values.end(), + first_block.args_begin() + num_region_arguments, + first_block.args_end()); - SmallVector return_values; - return_values.reserve(terminator->getNumOperands()); - for (auto it : llvm::enumerate(type.getResults())) { - Value ret_val = terminator->getOperand(it.index()); - // Add a cast operation if types do not match. - if (ret_val.getType() != it.value()) { - ret_val = - builder.create(terminator->getLoc(), it.value(), ret_val); - } - return_values.push_back(ret_val); - } + // Replace the existing terminator with a return. + terminator = first_block.getTerminator(); + builder.setInsertionPoint(terminator); builder.create(terminator->getLoc(), return_values); terminator->erase(); + outlined_func.setVisibility(FuncOp::Visibility::Private); // Add the outlined function to the worklist in case its body has - // IfRegion ops that need to converted. + // IfRegion or WhileRegion ops that need to converted. worklist.push_back(outlined_func); } @@ -170,17 +188,29 @@ llvm::Optional IsSingleCallRegion(Region& region) { return call; } -// Returns whether the arguments of the given call are same as the given list of -// arguments (after looking through cast ops). -bool MatchCallArgs(CallOp call, llvm::SmallVectorImpl& args) { - if (call.getNumOperands() != args.size()) return false; +using MatcherFn = function_ref; - for (auto it : llvm::enumerate(args)) { - Value arg = call.getOperand(it.index()); - if (auto cast = dyn_cast_or_null(arg.getDefiningOp())) - arg = cast.getOperand(); +// Returns whether the arguments of the given 2 calls are match (after looking +// through cast ops). `matcher` is the predicate used to check if two arguments +// match. +bool MatchCallArgs(CallOp first, CallOp second, MatcherFn matcher) { + if (first.getNumOperands() != second.getNumOperands()) return false; - if (arg != it.value()) return false; + Region& first_region = *first.getParentRegion(); + Region& second_region = *second.getParentRegion(); + + for (auto it : llvm::zip(first.getArgOperands(), second.getArgOperands())) { + // Get the defining Op, skipping over casts. + auto get_defining_op = [](Value value) { + while (llvm::isa_and_nonnull(value.getDefiningOp())) + value = cast(value.getDefiningOp()).getOperand(); + return value; + }; + Value first_arg = get_defining_op(std::get<0>(it)); + Value second_arg = get_defining_op(std::get<1>(it)); + + if (!matcher(first_arg, first_region, second_arg, second_region)) + return false; } return true; } @@ -193,11 +223,10 @@ struct TrivialTransformInfo { bool can_transform = false; // List of callee names (one for each region). - llvm::SmallVector callee_names; + llvm::SmallVector callee_names; - // List of arguments used in these call (each call uses the same arguments - // potentially through casts). - llvm::SmallVector call_args; + // Constructor will analyze the 2 regions. + TrivialTransformInfo(Region& first, Region& second, MatcherFn matcher); }; // Analyzes the given set of regions (attached to the same parent op) to check @@ -206,88 +235,62 @@ struct TrivialTransformInfo { // regions are single call regions and the all the calls have the same // arguments. // -// If this trivial transformation is possible, return the relevant information -// needed for the transformation (in `TrivialTransformInfo`), else indicate that -// a trivial transformation is not possible by setting `can_transform` false. -TrivialTransformInfo AnalyzeForTrivialTransform(ArrayRef regions) { - const TrivialTransformInfo cannot_transform; +// If such a trivial transformation is possible, stash the relevant information +// needed for the transformation, else indicate that a trivial transformation is +// not possible by setting `can_transform` to false. +TrivialTransformInfo::TrivialTransformInfo(Region& first, Region& second, + MatcherFn matcher) { + auto call0 = IsSingleCallRegion(first); + auto call1 = IsSingleCallRegion(second); + if (!call0 || !call1) return; - if (regions.empty()) return cannot_transform; + if (!MatchCallArgs(call0.getValue(), call1.getValue(), matcher)) return; - llvm::SmallVector calls; - calls.reserve(regions.size()); - - // Verify each region is a single call and collect these calls. - for (Region* region : regions) { - auto call = IsSingleCallRegion(*region); - if (!call.hasValue()) return cannot_transform; - calls.push_back(call.getValue()); - } - - llvm::SmallVector callees; - callees.reserve(regions.size()); - - CallOp call0 = calls[0]; - int num_args = call0.getNumOperands(); - - // Collect arguments of the first call. - llvm::SmallVector call0_args; - call0_args.reserve(num_args); - for (Value arg : call0.getArgOperands()) { - if (auto cast = dyn_cast_or_null(arg.getDefiningOp())) - arg = cast.getOperand(); - call0_args.push_back(arg); - } - - // Match arguments of rest of the calls with those of the first call. - for (auto call : calls) { - if (call != call0 && !MatchCallArgs(call, call0_args)) - return cannot_transform; - callees.push_back(call.getCallee()); - } - - return {true, callees, call0_args}; + can_transform = true; + callee_names = {call0.getValue().getCallee(), call1.getValue().getCallee()}; } // Transform IfRegionOp to IfOp. LogicalResult RegionControlFlowToFunctional::ConvertIfOp(IfRegionOp if_region) { - const TrivialTransformInfo tti = AnalyzeForTrivialTransform( - {&if_region.then_branch(), &if_region.else_branch()}); + llvm::SmallVector extern_values; + + // For IfOp, arguments of calls in the then and else regions match if they + // are the same value. + auto if_matcher = [&](Value first, Region&, Value second, Region&) { + if (first != second) return false; + + // collect the call arguments post lookup through cast Op's + extern_values.push_back(first); + return true; + }; + + const TrivialTransformInfo tti(if_region.then_branch(), + if_region.else_branch(), if_matcher); std::string then_name, else_name; - llvm::SmallVector extern_values; if (tti.can_transform) { // We can transform to functional form trivially without outlining. then_name = tti.callee_names[0].str(); else_name = tti.callee_names[1].str(); - extern_values = tti.call_args; } else { // Collect external values that are used within the else and then bodies. - extern_values = CollectExternValues( - {&if_region.then_branch(), &if_region.else_branch()}); + extern_values = + CollectExternValues(if_region.then_branch(), if_region.else_branch()); // These external values need to be added as inputs to the generated If. The // order is determined by the order of these values the `extern_vales`. - // Build the type for the outlined function. - llvm::SmallVector input_types; - input_types.reserve(extern_values.size()); - for (auto input : extern_values) input_types.push_back(input.getType()); - - FunctionType func_type = FunctionType::get( - input_types, if_region.getResultTypes(), if_region.getContext()); - // Create 2 new functions with the input signature matching this order, // and outline the `then` and `else` regions by moving the bodies of these // regions into these functions. Replace tf.yield with a regular return. then_name = GetName(if_region, "_then"); - ExtractSingleBlockRegion(if_region.then_branch(), func_type, then_name, - extern_values, worklist); + ExtractSingleBlockRegion(if_region.then_branch(), then_name, extern_values, + worklist, /*extern_values_passthrough=*/false); else_name = GetName(if_region, "_else"); - ExtractSingleBlockRegion(if_region.else_branch(), func_type, else_name, - extern_values, worklist); + ExtractSingleBlockRegion(if_region.else_branch(), else_name, extern_values, + worklist, /*extern_values_passthrough=*/false); } // Once we have the `then` and `else` functions ready (either outlined or @@ -302,19 +305,105 @@ LogicalResult RegionControlFlowToFunctional::ConvertIfOp(IfRegionOp if_region) { return success(); } +// Transform WhileRegion to WhileOp. +LogicalResult RegionControlFlowToFunctional::ConvertWhileOp( + WhileRegionOp while_region) { + // For While, the arguments of the calls in the body and cond regions match + // if they are region arguments with the same region argument numbers. If the + // 2 calls have the same value (an extern value) used an an argument, we + // cannot do a trivial transformation because post transform, we will need to + // pass this extern value as an argument to the function, so we cannot use the + // existing function as is. + auto while_matcher = [](Value first, Region& first_region, Value second, + Region& second_region) { + if (!first.isa() || !second.isa()) + return false; + BlockArgument first_block_arg = first.cast(); + BlockArgument second_block_arg = second.cast(); + + // 2 block arguments will match if they are the same argument number, and + // are block arguments of the corresponding containing regions. + return first_block_arg.getArgNumber() == second_block_arg.getArgNumber() && + first_block_arg.getParentBlock() == &first_region.front() && + second_block_arg.getParentBlock() == &second_region.front(); + }; + + const TrivialTransformInfo tti(while_region.cond(), while_region.body(), + while_matcher); + + // All existing inputs to while region are inputs to the functional while. + auto new_inputs = llvm::to_vector<4>(while_region.getOperands()); + + // All existing results will also be generated by the functional while. + auto new_result_types = llvm::to_vector<4>(while_region.getResultTypes()); + + std::string cond_name, body_name; + if (tti.can_transform) { + // We can transform to functional form trivially without outlining. + cond_name = tti.callee_names[0].str(); + body_name = tti.callee_names[1].str(); + } else { + // The WhileRegion regions can refer to either arguments of the region, or + // external values implicitly captured by the region. When converting to + // functional form, all such external values need to become function + // arguments of the outlined functions, and become pass through values in + // the outlined body function. So when outlining the while body, in addition + // to the region arguments, all these external references need to be added + // as function arguments. + llvm::SmallVector extern_values = + CollectExternValues(while_region.cond(), while_region.body()); + + // Outline the `cond` and `body` regions by moving the bodies of these + // regions into new functions. Replace tf.yield with a regular return. + cond_name = GetName(while_region, "_cond"); + ExtractSingleBlockRegion(while_region.cond(), cond_name, extern_values, + worklist, /*extern_values_passthrough=*/false); + + body_name = GetName(while_region, "_body"); + ExtractSingleBlockRegion(while_region.body(), body_name, extern_values, + worklist, /*extern_values_passthrough=*/true); + + // All extern values become additional inputs and additional output types + // for the functional while. + new_inputs.append(extern_values.begin(), extern_values.end()); + for (auto ext : extern_values) new_result_types.push_back(ext.getType()); + } + + // Once we have the `cond` and `body` functions ready (either outlined or + // existing ones), replace the region based op with a functional op. + OpBuilder builder(while_region); + auto while_op = builder.create( + while_region.getLoc(), new_result_types, new_inputs, cond_name, body_name, + builder.getArrayAttr({}), while_region.parallel_iterations(), + while_region.is_stateless()); + + // Redirect old results to new results. + for (auto it : llvm::zip( + while_region.getResults(), + while_op.getResults().take_front(while_region.getNumResults()))) + std::get<0>(it).replaceAllUsesWith(std::get<1>(it)); + + while_region.erase(); + return success(); +} + void RegionControlFlowToFunctional::runOnOperation() { ModuleOp module = getOperation(); // Seed worklist with all functions in the module. worklist = llvm::to_vector<4>(module.getOps()); - while (!worklist.empty()) { FuncOp function = worklist.pop_back_val(); auto result = function.walk([&](Operation* op) { - if (IfRegionOp if_region = llvm::dyn_cast(op)) { + if (auto if_region = llvm::dyn_cast(op)) { if (failed(ConvertIfOp(if_region))) { - if_region.emitOpError() << " failed to convert to functional form"; + op->emitOpError() << "failed to convert to functional form"; + return WalkResult::interrupt(); + } + } else if (auto while_region = llvm::dyn_cast(op)) { + if (failed(ConvertWhileOp(while_region))) { + op->emitOpError() << "failed to convert to functional form"; return WalkResult::interrupt(); } } From cb1119ba71e2fdb84ebaf51a82496835e0133324 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Jul 2020 08:43:51 -0700 Subject: [PATCH 0827/2522] Update TPUUpdateEmbeddingEnqueueInput pass to create mode constants rather than depend on inputs from SelectV2 op. SelectV2 op may be constant folded away if the conditional value is from a const op. As so, create mode constant ("train" or "inference") based on presence of gradient op. PiperOrigin-RevId: 322151534 Change-Id: I6dbbafe2173af0270e95237fcc30b0e818cbf3ba --- ...pu_update_embedding_enqueue_op_inputs.mlir | 18 ++++--- .../tpu_update_embedding_enqueue_op_inputs.cc | 48 ++++++++++++------- 2 files changed, 39 insertions(+), 27 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_update_embedding_enqueue_op_inputs.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_update_embedding_enqueue_op_inputs.mlir index 09e701e5dd3..f9bdddd2778 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_update_embedding_enqueue_op_inputs.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_update_embedding_enqueue_op_inputs.mlir @@ -9,16 +9,15 @@ // CHECK-SAME: %[[ARG_5:[a-z0-9]*]]: tensor // CHECK-SAME: %[[ARG_6:[a-z0-9]*]]: tensor // CHECK-SAME: %[[ARG_7:[a-z0-9]*]]: tensor -// CHECK-SAME: %[[ARG_8:[a-z0-9]*]]: tensor func @check_enqueue_ops_update_for_eval(%arg0: tensor, %arg1: tensor, %arg2 :tensor, %arg3: tensor, %arg4: tensor, %arg5: tensor, - %arg6: tensor, %arg7: tensor, %arg8: tensor) -> () { + %arg6: tensor, %arg7: tensor) -> () { // CHECK: %[[CONST_0:[a-z0-9]*]] = "tf.Const"() %0 = "tf.Const"() {value = dense<[]> : tensor<0xf32>} : () -> tensor<0xf32> - %1 = "tf.SelectV2"(%arg8, %arg6, %arg7) : (tensor, tensor, tensor) -> tensor - // CHECK: "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%[[ARG_0]], %[[ARG_1]], %[[ARG_2]], %[[ARG_3]], %[[ARG_4]], %[[ARG_5]], %[[CONST_0]], %[[CONST_0]], %[[CONST_0]], %[[ARG_7]]) - "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %0, %0, %0, %1) {_tpu_embedding_layer = "call1", _xla_outside_compilation = "0", combiners = ["mean", "sum"], device_ordinal = -1 : i64, max_sequence_lengths = [0, 0, 0], table_ids = [1, 1, 0]} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor<0xf32>, tensor<0xf32>, tensor<0xf32>, tensor) -> () + // CHECK: %[[CONST_MODE:[a-z0-9]*]] = "tf.Const"() {value = dense<"inference"> : tensor} : () -> tensor + // CHECK: "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%[[ARG_0]], %[[ARG_1]], %[[ARG_2]], %[[ARG_3]], %[[ARG_4]], %[[ARG_5]], %[[CONST_0]], %[[CONST_0]], %[[CONST_0]], %[[CONST_MODE]]) + "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %0, %0, %0, %arg7) {_tpu_embedding_layer = "call1", _xla_outside_compilation = "0", combiners = ["mean", "sum"], device_ordinal = -1 : i64, max_sequence_lengths = [0, 0, 0], table_ids = [1, 1, 0]} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor<0xf32>, tensor<0xf32>, tensor<0xf32>, tensor) -> () %2:2 = "tf.RecvTPUEmbeddingActivations"() {_tpu_embedding_layer = "call1", config = "\0A\0B\0C\0D"} : () -> (tensor<2x2xf32>, tensor<4x4xf32>) return } @@ -34,20 +33,19 @@ func @check_enqueue_ops_update_for_eval(%arg0: tensor, %arg1: tensor // CHECK-SAME: %[[ARG_6:[a-z0-9]*]]: tensor // CHECK-SAME: %[[ARG_7:[a-z0-9]*]]: tensor -// CHECK-SAME: %[[ARG_8:[a-z0-9]*]]: tensor func @check_enqueue_ops_update_for_training(%arg0: tensor, %arg1: tensor, %arg2 :tensor, %arg3: tensor, %arg4: tensor, %arg5: tensor, - %arg6: tensor, %arg7: tensor, %arg8: tensor) -> () { + %arg6: tensor, %arg7: tensor) -> () { // CHECK: %[[CONST_0:[a-z0-9]*]] = "tf.Const"() %0 = "tf.Const"() {value = dense<[]> : tensor<0xf32>} : () -> tensor<0xf32> - %1 = "tf.SelectV2"(%arg8, %arg6, %arg7) : (tensor, tensor, tensor) -> tensor %2 = "tf.Const"() {value = dense<0.0> : tensor<2x2xf32>} : () -> tensor<2x2xf32> %3 = "tf.Const"() {value = dense<0.0> : tensor<4x4xf32>} : () -> tensor<4x4xf32> "tf.SendTPUEmbeddingGradients"(%2, %3) {_tpu_embedding_layer = "call1", config = "\0A\0B\0C\0D", operand_segment_sizes = dense<[2, 0]> : vector<2xi32>} : (tensor<2x2xf32>, tensor<4x4xf32>) -> () - // CHECK: "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%[[ARG_0]], %[[ARG_1]], %[[ARG_2]], %[[ARG_3]], %[[ARG_4]], %[[ARG_5]], %[[CONST_0]], %[[CONST_0]], %[[CONST_0]], %[[ARG_6]]) - "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %0, %0, %0, %1) {_tpu_embedding_layer = "call1", _xla_outside_compilation = "0", combiners = ["mean", "sum"], device_ordinal = -1 : i64, max_sequence_lengths = [0, 0, 0], table_ids = [1, 1, 0]} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor<0xf32>, tensor<0xf32>, tensor<0xf32>, tensor) -> () + // CHECK: %[[CONST_MODE:[a-z0-9]*]] = "tf.Const"() {value = dense<"train"> : tensor} : () -> tensor + // CHECK: "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%[[ARG_0]], %[[ARG_1]], %[[ARG_2]], %[[ARG_3]], %[[ARG_4]], %[[ARG_5]], %[[CONST_0]], %[[CONST_0]], %[[CONST_0]], %[[CONST_MODE]]) + "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %0, %0, %0, %arg7) {_tpu_embedding_layer = "call1", _xla_outside_compilation = "0", combiners = ["mean", "sum"], device_ordinal = -1 : i64, max_sequence_lengths = [0, 0, 0], table_ids = [1, 1, 0]} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor<0xf32>, tensor<0xf32>, tensor<0xf32>, tensor) -> () %4:2 = "tf.RecvTPUEmbeddingActivations"() {_tpu_embedding_layer = "call1", config = "\0A\0B\0C\0D"} : () -> (tensor<2x2xf32>, tensor<4x4xf32>) return } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_update_embedding_enqueue_op_inputs.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_update_embedding_enqueue_op_inputs.cc index 820dec02b90..7469d99d887 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_update_embedding_enqueue_op_inputs.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_update_embedding_enqueue_op_inputs.cc @@ -13,18 +13,22 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/Casting.h" #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Block.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project #include "mlir/IR/Value.h" // from @llvm-project #include "mlir/IR/Visitors.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Pass/PassRegistry.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" namespace mlir { @@ -86,7 +90,8 @@ LogicalResult FindTPUEmbeddingOps( LogicalResult UpdateEmbeddingEnqueueOpInput( const llvm::StringMap& enqueue_op_map, const llvm::StringMap& recv_activation_op_map, - const llvm::StringMap& send_gradient_op_map) { + const llvm::StringMap& send_gradient_op_map, + OpBuilder* builder) { for (const auto& it : enqueue_op_map) { const auto& embedding_attr = it.getKey(); Operation* embedding_op = it.second; @@ -96,21 +101,29 @@ LogicalResult UpdateEmbeddingEnqueueOpInput( << TF::RecvTPUEmbeddingActivationsOp::getOperationName() << "' op"; // TPU Embedding enqueue ops take different inputs depending on whether - // graph is in training mode or in eval/prediction mode. The inputs to the - // enqueue ops are present/listed as operands to SelectV2 op. Then branch - // operand of the SelectV2 op represents input to take during training - // and else branch operand represents input to take during - // prediction/evaluation. If SendTPUEmbeddingGradients op exists in the - // graph, then graph is in training mode, so correctly forward the input - // of SelectV2 op as operand to the TPU embedding enqueue op. + // graph is in training mode or in eval/prediction mode. During training, + // the mode parameter for TPUEmbeddingEnqueue op must be `train` and for + // evaluation or prediction, mode must be set to `inference`. + // If SendTPUEmbeddingGradients op exists in the graph, then graph is + // in training mode, so create a const op with value `train` use the + // output value of the constant as an operand to the TPU embedding + // enqueue op. bool is_training = send_gradient_op_map.count(embedding_attr); - for (auto enqueue_operand : embedding_op->getOperands()) { - if (auto select = llvm::dyn_cast_or_null( - enqueue_operand.getDefiningOp())) { - enqueue_operand.replaceAllUsesWith(is_training ? select.t() - : select.e()); - } - } + + // The last operand of TPUEmbeddingEnqueue ops is the mode which + // represents whether graph is in training mode or in evaluation mode. + auto& mode_enqueue_operand = + embedding_op->getOpOperand(embedding_op->getNumOperands() - 1); + + llvm::SmallVector mode_string_value; + mode_string_value.emplace_back(is_training ? "train" : "inference"); + builder->setInsertionPoint(embedding_op); + auto enqueue_mode = builder->create( + embedding_op->getLoc(), + DenseStringElementsAttr::get( + RankedTensorType::get({}, builder->getType()), + mode_string_value)); + mode_enqueue_operand.set(enqueue_mode); } return success(); @@ -140,8 +153,9 @@ void TPUUpdateEmbeddingEnqueueOpInputs::runOnFunction() { return signalPassFailure(); } - if (failed(UpdateEmbeddingEnqueueOpInput( - enqueue_op_map, recv_activation_op_map, send_gradient_op_map))) + if (failed(UpdateEmbeddingEnqueueOpInput(enqueue_op_map, + recv_activation_op_map, + send_gradient_op_map, &builder))) return signalPassFailure(); } From e7e026d0eaf4cb3417841b9ef83381c201e4e5d3 Mon Sep 17 00:00:00 2001 From: Sachin Joglekar Date: Mon, 20 Jul 2020 08:49:59 -0700 Subject: [PATCH 0828/2522] Add information about quantization support in GPU delegate documentation PiperOrigin-RevId: 322152589 Change-Id: I452ffa6fabf5bbbb81267a9b5716b1e6277c0ddb --- .../lite/g3doc/performance/gpu_advanced.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tensorflow/lite/g3doc/performance/gpu_advanced.md b/tensorflow/lite/g3doc/performance/gpu_advanced.md index eb6d6319db1..1614523b705 100644 --- a/tensorflow/lite/g3doc/performance/gpu_advanced.md +++ b/tensorflow/lite/g3doc/performance/gpu_advanced.md @@ -244,6 +244,24 @@ as well. This includes all flavors of quantization, including: To optimize performance, use models that have floating-point input & output tensors. +#### How does this work? + +Since the GPU backend only supports floating-point execution, we run quantized +models by giving it a ‘floating-point view’ of the original model. At a +high-level, this entails the following steps: + +* *Constant tensors* (such as weights/biases) are dequantized once into the + GPU memory. This happens when the delegate is applied to the TFLite + Interpreter. + +* *Inputs and outputs* to the GPU program, if 8-bit quantized, are dequantized + and quantized (respectively) for each inference. This is done on the CPU + using TFLite’s optimized kernels. + +* The GPU program is modified to mimic quantized behavior by inserting + *quantization simulators* between operations. This is necessary for models + where ops expect activations to follow bounds learnt during quantization. + This feature can be enabled using delegate options as follows: #### Android From f384c39c4da862edca9d71fe1736b27d0cd7b196 Mon Sep 17 00:00:00 2001 From: Jared Duke Date: Mon, 20 Jul 2020 09:19:56 -0700 Subject: [PATCH 0829/2522] Fix issue with TFLite zip test execution Use html.escape instead of the deprecated cgi.escape. This allows TFLite zip tests to be run manually, e.g., bazel test --test_tag_filters= \ //tensorflow/lite/testing:zip_test_depthwiseconv PiperOrigin-RevId: 322158421 Change-Id: Id93625748a70390562edde4975c78ed386813cec --- tensorflow/lite/testing/generate_examples_report.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/lite/testing/generate_examples_report.py b/tensorflow/lite/testing/generate_examples_report.py index 7bcf8cd86a1..2d7545be9b1 100644 --- a/tensorflow/lite/testing/generate_examples_report.py +++ b/tensorflow/lite/testing/generate_examples_report.py @@ -21,7 +21,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import cgi +import html import json FAILED = "FAILED" @@ -45,7 +45,7 @@ def make_report_table(fp, title, reports): reports.sort(key=lambda x: x[1]["tf"], reverse=True) def result_cell(x, row, col): """Produce a cell with the condition string `x`.""" - s = cgi.escape(repr(x), quote=True) + s = html.escape(repr(x), quote=True) color = "#44ff44" if x == SUCCESS else ( "#ff4444" if x == FAILED else "#eeeeee") handler = "ShowLog(%d, %d)" % (row, col) @@ -76,8 +76,8 @@ log.innerHTML = "
" + data[row][col]  + "
"; } """) fp.write("var data = \n") - fp.write(json.dumps([[cgi.escape(x[1]["tf_log"], quote=True), - cgi.escape(x[1]["toco_log"], quote=True)] + fp.write(json.dumps([[html.escape(x[1]["tf_log"], quote=True), + html.escape(x[1]["toco_log"], quote=True)] for x in reports])) fp.write(";\n") @@ -100,14 +100,14 @@ log.innerHTML = "
" + data[row][col]  + "
"; fp.write("\n") fp.write("\n") for p in param_keys: - fp.write("\n" % cgi.escape(p, quote=True)) + fp.write("\n" % html.escape(p, quote=True)) fp.write("\n") fp.write("\n") fp.write("\n") for idx, (params, vals) in enumerate(reports): fp.write("\n") for p in param_keys: - fp.write(" \n" % cgi.escape(repr(params[p]), quote=True)) + fp.write(" \n" % html.escape(repr(params[p]), quote=True)) result_cell(vals["tf"], idx, 0) result_cell(vals["toco"], idx, 1) From 362255be18059e1ac03a53ede55299b84eb214f9 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Mon, 20 Jul 2020 23:31:58 +0700 Subject: [PATCH 0830/2522] Use inline instead of FillBuffer --- .../filesystem/plugins/gcs/gcs_filesystem.cc | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc index 6006273b2ca..44a8ffb57bd 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc @@ -160,17 +160,6 @@ void Cleanup(TF_RandomAccessFile* file) { delete gcs_file; } -static void FillBuffer(uint64_t start, GCSFile* gcs_file, TF_Status* status) { - ABSL_EXCLUSIVE_LOCKS_REQUIRED(gcs_file->buffer_mutex); - gcs_file->buffer_start = start; - gcs_file->buffer.resize(gcs_file->buffer_size); - auto read = - gcs_file->read_fn(gcs_file->path, gcs_file->buffer_start, - gcs_file->buffer_size, &(gcs_file->buffer[0]), status); - gcs_file->buffer_end_is_past_eof = (TF_GetCode(status) == TF_OUT_OF_RANGE); - if (read >= 0) gcs_file->buffer.resize(read); -} - // `google-cloud-cpp` is working on a feature that we may want to use. // See https://github.com/googleapis/google-cloud-cpp/issues/4013. int64_t Read(const TF_RandomAccessFile* file, uint64_t offset, size_t n, @@ -191,7 +180,14 @@ int64_t Read(const TF_RandomAccessFile* file, uint64_t offset, size_t n, bool consumed_buffer_to_eof = offset + copy_size >= buffer_end && gcs_file->buffer_end_is_past_eof; if (copy_size < n && !consumed_buffer_to_eof) { - FillBuffer(offset + copy_size, gcs_file, status); + gcs_file->buffer_start = offset + copy_size; + gcs_file->buffer.resize(gcs_file->buffer_size); + auto read_fill_buffer = gcs_file->read_fn( + gcs_file->path, gcs_file->buffer_start, gcs_file->buffer_size, + &(gcs_file->buffer[0]), status); + gcs_file->buffer_end_is_past_eof = + (TF_GetCode(status) == TF_OUT_OF_RANGE); + if (read_fill_buffer >= 0) gcs_file->buffer.resize(read_fill_buffer); if (TF_GetCode(status) != TF_OK && TF_GetCode(status) != TF_OUT_OF_RANGE) { // Empty the buffer to avoid caching bad reads. From 8296ec766f2410ac4c8fdb61b84682e435a6c326 Mon Sep 17 00:00:00 2001 From: Jonathan Chu Date: Mon, 20 Jul 2020 16:32:56 +0000 Subject: [PATCH 0831/2522] Add ldap --- tensorflow/python/eager/function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 43717ebf2f4..be8fcb82610 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -2163,7 +2163,7 @@ class ConcreteFunction(object): Returns: The actual call output. """ - # TODO: implement in C++ + # TODO(jlchu): implement in C++. if self._func_graph.structured_outputs is None: return result From 4b87b12c96c56a4c6485195ca5123b1a21636221 Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Mon, 20 Jul 2020 09:51:31 -0700 Subject: [PATCH 0832/2522] Update run_v1_only test with proper reason. PiperOrigin-RevId: 322164638 Change-Id: I17ffbc5c396c8fa97e2a4dff8ada380795c52ba2 --- .../python/training/server_lib_same_variables_no_clear_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/training/server_lib_same_variables_no_clear_test.py b/tensorflow/python/training/server_lib_same_variables_no_clear_test.py index ff3fab9f372..34f83ca6f12 100644 --- a/tensorflow/python/training/server_lib_same_variables_no_clear_test.py +++ b/tensorflow/python/training/server_lib_same_variables_no_clear_test.py @@ -34,7 +34,8 @@ class SameVariablesNoClearTest(test.TestCase): # TODO(b/34465411): Starting multiple servers with different configurations # in the same test is flaky. Move this test case back into # "server_lib_test.py" when this is no longer the case. - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only( + "This exercises tensor lookup via names which is not supported in V2.") def testSameVariablesNoClear(self): server = server_lib.Server.create_local_server() From 4d0dfef5f42702f9856274d87a183c2661fae9fb Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Mon, 20 Jul 2020 10:02:59 -0700 Subject: [PATCH 0833/2522] Internal change only. PiperOrigin-RevId: 322167240 Change-Id: I3cc74440dd29a7357bc1ca82e59750906bea8cad --- tensorflow/python/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 42fafc5d9cc..7a6e331a3eb 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -56,6 +56,7 @@ visibility = [ "//third_party/py/reverb:__subpackages__", "//third_party/py/neural_structured_learning:__subpackages__", "//third_party/py/tensorflow_examples:__subpackages__", + "//third_party/py/tf_agents:__subpackages__", # For benchmarks. "//third_party/py/tf_slim:__subpackages__", # TODO(aselle): to pass open source test. "//bazel_pip/tensorflow/lite/toco/python:__pkg__", From 66f5d8da8b68ab358599581a1adca6b71cf7fc7c Mon Sep 17 00:00:00 2001 From: Gabriel Rasskin Date: Mon, 20 Jul 2020 17:39:07 +0000 Subject: [PATCH 0834/2522] String_print_f build --- .../fuzzing/{string_print_f.cc => string_print_f_fuzz.cc} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tensorflow/security/fuzzing/{string_print_f.cc => string_print_f_fuzz.cc} (100%) diff --git a/tensorflow/security/fuzzing/string_print_f.cc b/tensorflow/security/fuzzing/string_print_f_fuzz.cc similarity index 100% rename from tensorflow/security/fuzzing/string_print_f.cc rename to tensorflow/security/fuzzing/string_print_f_fuzz.cc From 09ade5978f405a0120590daa3130374bfeb4f4db Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Jul 2020 10:56:11 -0700 Subject: [PATCH 0835/2522] Internal change PiperOrigin-RevId: 322179620 Change-Id: I0226b3428a731addb6fdbd3cec356bd9eb7a1d27 --- ...pu_update_embedding_enqueue_op_inputs.mlir | 18 +++---- .../tpu_update_embedding_enqueue_op_inputs.cc | 48 +++++++------------ 2 files changed, 27 insertions(+), 39 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_update_embedding_enqueue_op_inputs.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_update_embedding_enqueue_op_inputs.mlir index f9bdddd2778..09e701e5dd3 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_update_embedding_enqueue_op_inputs.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_update_embedding_enqueue_op_inputs.mlir @@ -9,15 +9,16 @@ // CHECK-SAME: %[[ARG_5:[a-z0-9]*]]: tensor // CHECK-SAME: %[[ARG_6:[a-z0-9]*]]: tensor // CHECK-SAME: %[[ARG_7:[a-z0-9]*]]: tensor +// CHECK-SAME: %[[ARG_8:[a-z0-9]*]]: tensor func @check_enqueue_ops_update_for_eval(%arg0: tensor, %arg1: tensor, %arg2 :tensor, %arg3: tensor, %arg4: tensor, %arg5: tensor, - %arg6: tensor, %arg7: tensor) -> () { + %arg6: tensor, %arg7: tensor, %arg8: tensor) -> () { // CHECK: %[[CONST_0:[a-z0-9]*]] = "tf.Const"() %0 = "tf.Const"() {value = dense<[]> : tensor<0xf32>} : () -> tensor<0xf32> + %1 = "tf.SelectV2"(%arg8, %arg6, %arg7) : (tensor, tensor, tensor) -> tensor - // CHECK: %[[CONST_MODE:[a-z0-9]*]] = "tf.Const"() {value = dense<"inference"> : tensor} : () -> tensor - // CHECK: "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%[[ARG_0]], %[[ARG_1]], %[[ARG_2]], %[[ARG_3]], %[[ARG_4]], %[[ARG_5]], %[[CONST_0]], %[[CONST_0]], %[[CONST_0]], %[[CONST_MODE]]) - "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %0, %0, %0, %arg7) {_tpu_embedding_layer = "call1", _xla_outside_compilation = "0", combiners = ["mean", "sum"], device_ordinal = -1 : i64, max_sequence_lengths = [0, 0, 0], table_ids = [1, 1, 0]} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor<0xf32>, tensor<0xf32>, tensor<0xf32>, tensor) -> () + // CHECK: "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%[[ARG_0]], %[[ARG_1]], %[[ARG_2]], %[[ARG_3]], %[[ARG_4]], %[[ARG_5]], %[[CONST_0]], %[[CONST_0]], %[[CONST_0]], %[[ARG_7]]) + "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %0, %0, %0, %1) {_tpu_embedding_layer = "call1", _xla_outside_compilation = "0", combiners = ["mean", "sum"], device_ordinal = -1 : i64, max_sequence_lengths = [0, 0, 0], table_ids = [1, 1, 0]} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor<0xf32>, tensor<0xf32>, tensor<0xf32>, tensor) -> () %2:2 = "tf.RecvTPUEmbeddingActivations"() {_tpu_embedding_layer = "call1", config = "\0A\0B\0C\0D"} : () -> (tensor<2x2xf32>, tensor<4x4xf32>) return } @@ -33,19 +34,20 @@ func @check_enqueue_ops_update_for_eval(%arg0: tensor, %arg1: tensor // CHECK-SAME: %[[ARG_6:[a-z0-9]*]]: tensor // CHECK-SAME: %[[ARG_7:[a-z0-9]*]]: tensor +// CHECK-SAME: %[[ARG_8:[a-z0-9]*]]: tensor func @check_enqueue_ops_update_for_training(%arg0: tensor, %arg1: tensor, %arg2 :tensor, %arg3: tensor, %arg4: tensor, %arg5: tensor, - %arg6: tensor, %arg7: tensor) -> () { + %arg6: tensor, %arg7: tensor, %arg8: tensor) -> () { // CHECK: %[[CONST_0:[a-z0-9]*]] = "tf.Const"() %0 = "tf.Const"() {value = dense<[]> : tensor<0xf32>} : () -> tensor<0xf32> + %1 = "tf.SelectV2"(%arg8, %arg6, %arg7) : (tensor, tensor, tensor) -> tensor %2 = "tf.Const"() {value = dense<0.0> : tensor<2x2xf32>} : () -> tensor<2x2xf32> %3 = "tf.Const"() {value = dense<0.0> : tensor<4x4xf32>} : () -> tensor<4x4xf32> "tf.SendTPUEmbeddingGradients"(%2, %3) {_tpu_embedding_layer = "call1", config = "\0A\0B\0C\0D", operand_segment_sizes = dense<[2, 0]> : vector<2xi32>} : (tensor<2x2xf32>, tensor<4x4xf32>) -> () - // CHECK: %[[CONST_MODE:[a-z0-9]*]] = "tf.Const"() {value = dense<"train"> : tensor} : () -> tensor - // CHECK: "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%[[ARG_0]], %[[ARG_1]], %[[ARG_2]], %[[ARG_3]], %[[ARG_4]], %[[ARG_5]], %[[CONST_0]], %[[CONST_0]], %[[CONST_0]], %[[CONST_MODE]]) - "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %0, %0, %0, %arg7) {_tpu_embedding_layer = "call1", _xla_outside_compilation = "0", combiners = ["mean", "sum"], device_ordinal = -1 : i64, max_sequence_lengths = [0, 0, 0], table_ids = [1, 1, 0]} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor<0xf32>, tensor<0xf32>, tensor<0xf32>, tensor) -> () + // CHECK: "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%[[ARG_0]], %[[ARG_1]], %[[ARG_2]], %[[ARG_3]], %[[ARG_4]], %[[ARG_5]], %[[CONST_0]], %[[CONST_0]], %[[CONST_0]], %[[ARG_6]]) + "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %0, %0, %0, %1) {_tpu_embedding_layer = "call1", _xla_outside_compilation = "0", combiners = ["mean", "sum"], device_ordinal = -1 : i64, max_sequence_lengths = [0, 0, 0], table_ids = [1, 1, 0]} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor<0xf32>, tensor<0xf32>, tensor<0xf32>, tensor) -> () %4:2 = "tf.RecvTPUEmbeddingActivations"() {_tpu_embedding_layer = "call1", config = "\0A\0B\0C\0D"} : () -> (tensor<2x2xf32>, tensor<4x4xf32>) return } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_update_embedding_enqueue_op_inputs.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_update_embedding_enqueue_op_inputs.cc index 7469d99d887..820dec02b90 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_update_embedding_enqueue_op_inputs.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_update_embedding_enqueue_op_inputs.cc @@ -13,22 +13,18 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/Casting.h" #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Block.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project #include "mlir/IR/Value.h" // from @llvm-project #include "mlir/IR/Visitors.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Pass/PassRegistry.h" // from @llvm-project -#include "mlir/Support/LLVM.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" -#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" namespace mlir { @@ -90,8 +86,7 @@ LogicalResult FindTPUEmbeddingOps( LogicalResult UpdateEmbeddingEnqueueOpInput( const llvm::StringMap& enqueue_op_map, const llvm::StringMap& recv_activation_op_map, - const llvm::StringMap& send_gradient_op_map, - OpBuilder* builder) { + const llvm::StringMap& send_gradient_op_map) { for (const auto& it : enqueue_op_map) { const auto& embedding_attr = it.getKey(); Operation* embedding_op = it.second; @@ -101,29 +96,21 @@ LogicalResult UpdateEmbeddingEnqueueOpInput( << TF::RecvTPUEmbeddingActivationsOp::getOperationName() << "' op"; // TPU Embedding enqueue ops take different inputs depending on whether - // graph is in training mode or in eval/prediction mode. During training, - // the mode parameter for TPUEmbeddingEnqueue op must be `train` and for - // evaluation or prediction, mode must be set to `inference`. - // If SendTPUEmbeddingGradients op exists in the graph, then graph is - // in training mode, so create a const op with value `train` use the - // output value of the constant as an operand to the TPU embedding - // enqueue op. + // graph is in training mode or in eval/prediction mode. The inputs to the + // enqueue ops are present/listed as operands to SelectV2 op. Then branch + // operand of the SelectV2 op represents input to take during training + // and else branch operand represents input to take during + // prediction/evaluation. If SendTPUEmbeddingGradients op exists in the + // graph, then graph is in training mode, so correctly forward the input + // of SelectV2 op as operand to the TPU embedding enqueue op. bool is_training = send_gradient_op_map.count(embedding_attr); - - // The last operand of TPUEmbeddingEnqueue ops is the mode which - // represents whether graph is in training mode or in evaluation mode. - auto& mode_enqueue_operand = - embedding_op->getOpOperand(embedding_op->getNumOperands() - 1); - - llvm::SmallVector mode_string_value; - mode_string_value.emplace_back(is_training ? "train" : "inference"); - builder->setInsertionPoint(embedding_op); - auto enqueue_mode = builder->create( - embedding_op->getLoc(), - DenseStringElementsAttr::get( - RankedTensorType::get({}, builder->getType()), - mode_string_value)); - mode_enqueue_operand.set(enqueue_mode); + for (auto enqueue_operand : embedding_op->getOperands()) { + if (auto select = llvm::dyn_cast_or_null( + enqueue_operand.getDefiningOp())) { + enqueue_operand.replaceAllUsesWith(is_training ? select.t() + : select.e()); + } + } } return success(); @@ -153,9 +140,8 @@ void TPUUpdateEmbeddingEnqueueOpInputs::runOnFunction() { return signalPassFailure(); } - if (failed(UpdateEmbeddingEnqueueOpInput(enqueue_op_map, - recv_activation_op_map, - send_gradient_op_map, &builder))) + if (failed(UpdateEmbeddingEnqueueOpInput( + enqueue_op_map, recv_activation_op_map, send_gradient_op_map))) return signalPassFailure(); } From c99cade2628071213656fec80b7ab5f04b1fc04e Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Mon, 20 Jul 2020 18:11:16 +0000 Subject: [PATCH 0836/2522] modify gradient --- .../python/kernel_tests/map_ops_test.py | 43 +++++++++++++++---- tensorflow/python/ops/map_ops.py | 18 ++++++-- 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index b214909dc54..c6f37251f21 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -26,7 +26,7 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import map_ops from tensorflow.python.platform import test -from tensorflow.python.util.lazy_loader import LazyLoader +from tensorflow.python.util.lazy_loader import LazyLoader control_flow_ops = LazyLoader("control_flow_ops", globals(), "tensorflow.python.ops.control_flow_ops") @@ -110,7 +110,7 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): b2 = map_ops.tensor_map_has_key(m, k2) self.assertAllEqual(b, True) self.assertAllEqual(b2, False) - + def testHasKeyLookup(self): with self.test_session(): m = map_ops.empty_tensor_map() @@ -119,17 +119,15 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): v = constant_op.constant(2.0) m = map_ops.tensor_map_insert(m, k, v) - default_value = constant_op.constant(0.0) l = control_flow_ops.cond(map_ops.tensor_map_has_key(m, k), - lambda: map_ops.tensor_map_lookup(m, k, dtypes.float32), - lambda: default_value) + lambda: map_ops.tensor_map_lookup(m, k, dtypes.float32), + lambda: array_ops.zeros_like(v)) l2 = control_flow_ops.cond(map_ops.tensor_map_has_key(m, k2), - lambda: map_ops.tensor_map_lookup(m, k, dtypes.float32), - lambda: default_value) - + lambda: map_ops.tensor_map_lookup(m, k, dtypes.float32), + lambda: default_value) self.assertAllClose(l, v) self.assertAllClose(l2, default_value) - + def testInsertLookupGrad(self): with backprop.GradientTape() as tape: m = map_ops.empty_tensor_map() @@ -142,5 +140,32 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): g = tape.gradient(l, v) self.assertAllClose(g, 5.0) + #TODO(kattian): Test alternating inserts and lookups + def testMultipleInsertLookupGrad(self): + with backprop.GradientTape(persistent=True) as tape: + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + v = constant_op.constant(2.0) + k2 = constant_op.constant(12.0) + v2 = constant_op.constant(22.0) + k3 = constant_op.constant(13.0) + v3 = constant_op.constant(23.0) + tape.watch(v) + tape.watch(v2) + tape.watch(v3) + m = map_ops.tensor_map_insert(m, k, v) + m = map_ops.tensor_map_insert(m, k2, v2) + m = map_ops.tensor_map_insert(m, k3, v3) + + l = map_ops.tensor_map_lookup(m, k, v.dtype) + l2 = map_ops.tensor_map_lookup(m, k2, v2.dtype) + l3 = map_ops.tensor_map_lookup(m, k3, v3.dtype) + g = tape.gradient(l * 5, v) + self.assertAllClose(g, 5) + g2 = tape.gradient(l2 * 5, v2) + self.assertAllClose(g2, 5) + g3 = tape.gradient(l3 * 5, v3) + self.assertAllClose(g3, 5) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/ops/map_ops.py b/tensorflow/python/ops/map_ops.py index 144346976d1..f18623aa41a 100644 --- a/tensorflow/python/ops/map_ops.py +++ b/tensorflow/python/ops/map_ops.py @@ -22,7 +22,13 @@ from __future__ import print_function # pylint: disable=wildcard-import from tensorflow.python.framework import ops from tensorflow.python.ops import gen_map_ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops.gen_map_ops import * +from tensorflow.python.framework import constant_op + +from tensorflow.python.util.lazy_loader import LazyLoader +control_flow_ops = LazyLoader("control_flow_ops", globals(), + "tensorflow.python.ops.control_flow_ops") ops.NotDifferentiable("EmptyTensorMap") @@ -46,7 +52,7 @@ def tensor_map_has_key(input_handle, key): @ops.RegisterGradient("TensorMapLookup") def LookupGrad(op, dval): - m, k = op.inputs + _, k = op.inputs map_grad = empty_tensor_map() map_grad = tensor_map_insert(map_grad, k, dval) key_grad = None @@ -54,8 +60,12 @@ def LookupGrad(op, dval): @ops.RegisterGradient("TensorMapInsert") def InsertGrad(op, dmap): - _, key, val = op.inputs - map_grad = None + _, k, v = op.inputs key_grad = None - value_grad = tensor_map_lookup(dmap, key, val.dtype) + value_grad = control_flow_ops.cond(tensor_map_has_key(dmap, k), + lambda: tensor_map_lookup(dmap, k, v.dtype), + lambda: array_ops.zeros_like(v)) + map_grad = control_flow_ops.cond(tensor_map_has_key(dmap, k), + lambda: tensor_map_erase(dmap, k, v.dtype)[0], + lambda: dmap) return map_grad, key_grad, value_grad From 21fae3ad156deeb3657555cc9bfd378818a36bdf Mon Sep 17 00:00:00 2001 From: Haoyu Zhang Date: Mon, 20 Jul 2020 10:58:03 -0700 Subject: [PATCH 0837/2522] Do not invoke DeregisterCall when RendezvousMgr is already aborted. PiperOrigin-RevId: 322180083 Change-Id: I7c755881968bd7fd218b010fad6423d05fa4738f --- tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc b/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc index 89fe6ced725..512c17fcfcf 100644 --- a/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc +++ b/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc @@ -282,7 +282,6 @@ void RpcRemoteRendezvous::RecvFromRemoteAsync( // callback. call->ReleaseWorker(sess->worker_cache()); call->done()(call->status(), Args(), Args(), Tensor(), false); - DeregisterCall(call); get_call_freelist()->Release(call); return; } From 5bba93c62eb7224bbecaa8daf478dd28d3068492 Mon Sep 17 00:00:00 2001 From: Robert Suderman Date: Mon, 20 Jul 2020 10:58:56 -0700 Subject: [PATCH 0838/2522] Fix namespace for complex lowerings PiperOrigin-RevId: 322180317 Change-Id: Ifcbebbee57ae7504f122f6b0d0992fa7919d8501 --- .../mlir/hlo/lib/Dialect/mhlo/transforms/lower_complex.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lower_complex.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lower_complex.cc index 54ea4955573..23d146afeee 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lower_complex.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lower_complex.cc @@ -51,7 +51,7 @@ class LowerComplex : public PassWrapper { } // end anonymous namespace namespace mlir { -namespace hlo { +namespace mhlo { namespace { #include "tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/generated_lower_complex.inc" @@ -62,14 +62,14 @@ void PopulateComplexLoweringPatterns(MLIRContext* context, OwningRewritePatternList* patterns) { populateWithGenerated(context, patterns); } -} // end namespace hlo +} // end namespace mhlo } // end namespace mlir // Lowers the complex operations that can be represented using other operations. void LowerComplex::runOnFunction() { // Add lowering patterns to the list. OwningRewritePatternList patterns; - mlir::hlo::PopulateComplexLoweringPatterns(&getContext(), &patterns); + mlir::mhlo::PopulateComplexLoweringPatterns(&getContext(), &patterns); applyPatternsAndFoldGreedily(getFunction(), patterns); } From 09dab68b9549b35389511318fbd29d62d3e8cf6f Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Mon, 20 Jul 2020 11:14:19 -0700 Subject: [PATCH 0839/2522] Migrate the Linux CPU nightly release scripts to use the new bazelrc configs. PiperOrigin-RevId: 322184022 Change-Id: I47ce79ca30ec386e8865c29b7060de57592e664b --- .../release/ubuntu_16/cpu_py2_full/nightly_release.sh | 8 +------- .../release/ubuntu_16/cpu_py35_full/nightly_release.sh | 8 +------- .../release/ubuntu_16/cpu_py36_full/nightly_release.sh | 8 +------- .../release/ubuntu_16/cpu_py37_full/nightly_release.sh | 8 +------- .../release/ubuntu_16/cpu_py38_full/nightly_release.sh | 8 +------- 5 files changed, 5 insertions(+), 35 deletions(-) diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py2_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py2_full/nightly_release.sh index 1a5124ecef3..b60fe5fdc51 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py2_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py2_full/nightly_release.sh @@ -26,18 +26,12 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=0 export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python2.7) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 \ - --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ - tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_linux tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh index 664e6cf50b2..06ae6e9de18 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh @@ -27,18 +27,12 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=0 export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 \ - --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ - tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_linux tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nightly_release.sh index 2b770867099..c54fe72a55a 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nightly_release.sh @@ -27,18 +27,12 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=0 export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.6) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 \ - --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ - tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_linux tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nightly_release.sh index 25e59a5b096..4bea46486c3 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nightly_release.sh @@ -27,18 +27,12 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=0 export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.7) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 \ - --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ - tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_linux tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/nightly_release.sh index e82064f7221..3dc627f23ee 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/nightly_release.sh @@ -27,18 +27,12 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=0 export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.8) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 \ - --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ - tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_linux tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag From 72446c75743c4646e841561a1c7600f7b9e4f174 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Mon, 20 Jul 2020 11:36:01 -0700 Subject: [PATCH 0840/2522] Add a TODO and change signature to std::string --- tensorflow/core/platform/file_system.cc | 4 +++- tensorflow/core/platform/file_system.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/platform/file_system.cc b/tensorflow/core/platform/file_system.cc index 84257429de4..c96cf99291e 100644 --- a/tensorflow/core/platform/file_system.cc +++ b/tensorflow/core/platform/file_system.cc @@ -436,7 +436,9 @@ string FileSystem::CreateURI(StringPiece scheme, StringPiece host, return strings::StrCat(scheme, "://", host, path); } -string FileSystem::DecodeTransaction(const TransactionToken* token){ +std::string FileSystem::DecodeTransaction(const TransactionToken* token){ + +// TODO(sami): Switch using StrCat when void* is supported if(token){ std::stringstream oss; oss<<"Token= "<token<<", Owner="<owner; diff --git a/tensorflow/core/platform/file_system.h b/tensorflow/core/platform/file_system.h index 954b6030560..e16cd4333dd 100644 --- a/tensorflow/core/platform/file_system.h +++ b/tensorflow/core/platform/file_system.h @@ -381,7 +381,7 @@ class FileSystem { }; /// \brief Decode transaction to human readable string. - virtual string DecodeTransaction(const TransactionToken* token); + virtual std::string DecodeTransaction(const TransactionToken* token); FileSystem() {} From 0599a371346ca56ef28280670d9bd951e6794ce3 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Mon, 20 Jul 2020 18:38:43 +0000 Subject: [PATCH 0841/2522] same key tests --- .../python/kernel_tests/map_ops_test.py | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index c6f37251f21..c66b9bbad92 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -23,6 +23,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import map_ops from tensorflow.python.platform import test @@ -57,8 +58,6 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testTensorMapLookupMissingKeyFails(self): m = map_ops.empty_tensor_map() k = constant_op.constant(1.0) - v = constant_op.constant(2.0) - with self.assertRaisesRegex(errors.InvalidArgumentError, "Trying to lookup non-existent key."): l = map_ops.tensor_map_lookup(m, k, dtypes.float32) @@ -80,8 +79,6 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testTensorMapEraseFromEmptyMapFails(self): m = map_ops.empty_tensor_map() k = constant_op.constant(1.0) - v = constant_op.constant(2.0) - with self.assertRaisesRegex(errors.InvalidArgumentError, "Trying to erase non-existent item."): m, e = map_ops.tensor_map_erase(m, k, dtypes.float32) @@ -93,7 +90,6 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): k2 = constant_op.constant(2.0) v = constant_op.constant(2.0) m = map_ops.tensor_map_insert(m, k2, v) - with self.assertRaisesRegex(errors.InvalidArgumentError, "Trying to erase non-existent item."): m, e = map_ops.tensor_map_erase(m, k, dtypes.float32) @@ -119,9 +115,10 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): v = constant_op.constant(2.0) m = map_ops.tensor_map_insert(m, k, v) + default_value = array_ops.zeros_like(v) l = control_flow_ops.cond(map_ops.tensor_map_has_key(m, k), lambda: map_ops.tensor_map_lookup(m, k, dtypes.float32), - lambda: array_ops.zeros_like(v)) + lambda: default_value) l2 = control_flow_ops.cond(map_ops.tensor_map_has_key(m, k2), lambda: map_ops.tensor_map_lookup(m, k, dtypes.float32), lambda: default_value) @@ -161,11 +158,27 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): l2 = map_ops.tensor_map_lookup(m, k2, v2.dtype) l3 = map_ops.tensor_map_lookup(m, k3, v3.dtype) g = tape.gradient(l * 5, v) - self.assertAllClose(g, 5) g2 = tape.gradient(l2 * 5, v2) - self.assertAllClose(g2, 5) g3 = tape.gradient(l3 * 5, v3) + self.assertAllClose(g, 5) + self.assertAllClose(g2, 5) self.assertAllClose(g3, 5) + def testSameKeyInsertLookupGrad(self): + with backprop.GradientTape(persistent=True) as tape: + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + v = constant_op.constant(2.0) + v2 = constant_op.constant(22.0) + tape.watch(v) + tape.watch(v2) + m = map_ops.tensor_map_insert(m, k, v) + m = map_ops.tensor_map_insert(m, k, v2) + l = map_ops.tensor_map_lookup(m, k, v.dtype) + g = tape.gradient(l * 5, v) + g2 = tape.gradient(l * 5, v2) + self.assertAllClose(g, array_ops.zeros_like(v)) + self.assertAllClose(g2, 5) + if __name__ == '__main__': test.main() From 0199d315ef5ac8e6d8e333724dc2162bb349e7d4 Mon Sep 17 00:00:00 2001 From: Dimitris Vardoulakis Date: Mon, 20 Jul 2020 11:32:47 -0700 Subject: [PATCH 0842/2522] [XLA:coverage] Remove cpu from convolution_test_1d to fix kokoro breakage. Follow-up to cl/321997730. PiperOrigin-RevId: 322188129 Change-Id: Ie2a8018b73ad2e6d79b51fbbf9d7df6e6cd171cb --- tensorflow/compiler/xla/tests/BUILD | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 37188299dca..927f9d14883 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1136,7 +1136,10 @@ xla_test( args = ["--vmodule=convolution_emitter=7"], # In the open source build, convolution_test_1d_gpu fails because it doesn't # recognize --vmodule. - disabled_backends = ["gpu"], + disabled_backends = [ + "cpu", + "gpu", + ], shard_count = 50, tags = [ "no_rocm", @@ -1149,10 +1152,13 @@ xla_test( ) xla_test( - name = "convolution_test_1d_gpu", + name = "convolution_test_1d_no_vmodule", timeout = "long", srcs = ["convolution_test_1d.cc"], - backends = ["gpu"], + backends = [ + "cpu", + "gpu", + ], shard_count = 50, tags = [ "no_rocm", From 48a990515c46b118d3da8bd5e091fb7477288471 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Jul 2020 11:35:31 -0700 Subject: [PATCH 0843/2522] [xla] HTMLEscape XLA Shapes in hlo graph dumper to avoid corrupt dot output. PiperOrigin-RevId: 322188812 Change-Id: If6f38dbafe280c8a49a287f80ef6705413d5fa7f --- tensorflow/compiler/xla/service/hlo_graph_dumper.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index a50af6bf1b9..d7e8984dee8 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -1181,7 +1181,7 @@ string HloDotDumper::GetInstructionNodeExtraInfo(const HloInstruction* instr) { instr_shape = StrCat( absl::string_view(instr_shape).substr(0, kMaxShapeLen - 3), "..."); } - lines.push_back(instr_shape); + lines.push_back(HtmlLikeStringSanitize(instr_shape)); } if (debug_options_.xla_hlo_graph_addresses()) { lines.push_back(StrFormat("[%p]", instr)); From 340e873efddff0e20b8a8711ec0f1d022f43a98a Mon Sep 17 00:00:00 2001 From: Michael Banfield Date: Mon, 20 Jul 2020 12:01:37 -0700 Subject: [PATCH 0844/2522] Use simple uploads for GCS directory objects and avoid reuploading. PiperOrigin-RevId: 322194404 Change-Id: I1e63a8a2de62c7d9eed673014871e8238d7b237c --- .../core/platform/cloud/gcs_file_system.cc | 45 ++++++++--- .../platform/cloud/gcs_file_system_test.cc | 77 ++++++++++++------- 2 files changed, 84 insertions(+), 38 deletions(-) diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index ec9b0c726fb..43ece688034 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -66,6 +66,8 @@ constexpr size_t kReadAppendableFileBufferSize = 1024 * 1024; // In bytes. constexpr int kGetChildrenDefaultPageSize = 1000; // The HTTP response code "308 Resume Incomplete". constexpr uint64 HTTP_CODE_RESUME_INCOMPLETE = 308; +// The HTTP response code "412 Precondition Failed". +constexpr uint64 HTTP_CODE_PRECONDITION_FAILED = 412; // The environment variable that overrides the size of the readahead buffer. ABSL_DEPRECATED("Use GCS_READ_CACHE_BLOCK_SIZE_MB instead.") constexpr char kReadaheadBufferSize[] = "GCS_READAHEAD_BUFFER_SIZE_BYTES"; @@ -1659,27 +1661,50 @@ Status GcsFileSystem::DeleteFile( Status GcsFileSystem::CreateDir( const string& dirname /*, TransactionToken* token */) { + string dirname_with_slash = MaybeAppendSlash(dirname); + VLOG(3) << "CreateDir: creating directory with dirname: " << dirname + << " and dirname_with_slash: " << dirname_with_slash; string bucket, object; - TF_RETURN_IF_ERROR(ParseGcsPath(dirname, true, &bucket, &object)); + TF_RETURN_IF_ERROR(ParseGcsPath(dirname_with_slash, /*empty_object_ok=*/true, + &bucket, &object)); if (object.empty()) { bool is_bucket; TF_RETURN_IF_ERROR(BucketExists(bucket, &is_bucket)); return is_bucket ? Status::OK() - : errors::NotFound("The specified bucket ", dirname, - " was not found."); + : errors::NotFound("The specified bucket ", + dirname_with_slash, " was not found."); } - const string dirname_with_slash = MaybeAppendSlash(dirname); - if (FileExists(dirname_with_slash).ok()) { + // Use the original name for a correct error here. + VLOG(3) << "CreateDir: directory already exists, not uploading " << dirname; return errors::AlreadyExists(dirname); } - // Create a zero-length directory marker object. - std::unique_ptr file; - TF_RETURN_IF_ERROR(NewWritableFile(dirname_with_slash, &file)); - TF_RETURN_IF_ERROR(file->Close()); - return Status::OK(); + std::unique_ptr request; + TF_RETURN_IF_ERROR(CreateHttpRequest(&request)); + + request->SetUri(strings::StrCat( + kGcsUploadUriBase, "b/", bucket, + "/o?uploadType=media&name=", request->EscapeString(object), + // Adding this parameter means HTTP_CODE_PRECONDITION_FAILED + // will be returned if the object already exists, so avoid reuploading. + "&ifGenerationMatch=0")); + + request->SetPostEmptyBody(); + request->SetTimeouts(timeouts_.connect, timeouts_.idle, timeouts_.metadata); + const Status& status = request->Send(); + if (status.ok()) { + VLOG(3) << "CreateDir: finished uploading directory " << dirname; + return Status::OK(); + } + if (request->GetResponseCode() != HTTP_CODE_PRECONDITION_FAILED) { + TF_RETURN_WITH_CONTEXT_IF_ERROR(status, " when uploading ", + dirname_with_slash); + } + VLOG(3) << "Ignoring directory already exists on object " + << dirname_with_slash; + return errors::AlreadyExists(dirname); } // Checks that the directory is empty (i.e no objects with this prefix exist). diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc index 6892bd7cc26..c8e72487bbe 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc @@ -3198,32 +3198,47 @@ TEST(GcsFileSystemTest, IsDirectory_BucketNotFound) { TEST(GcsFileSystemTest, CreateDir_Folder) { std::vector requests( - {new FakeHttpRequest( - "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/" - "subpath%2F?fields=size%2Cgeneration%2Cupdated\n" - "Auth Token: fake_token\n" - "Timeouts: 5 1 10\n", - "{}"), - new FakeHttpRequest( - "Uri: https://www.googleapis.com/upload/storage/v1/b/bucket/o?" - "uploadType=resumable&name=subpath%2F\n" - "Auth Token: fake_token\n" - "Header X-Upload-Content-Length: 0\n" - "Post: yes\n" - "Timeouts: 5 1 10\n", - "", {{"Location", "https://custom/upload/location"}}), - new FakeHttpRequest("Uri: https://custom/upload/location\n" - "Auth Token: fake_token\n" - "Timeouts: 5 1 30\n" - "Put body: \n", - ""), - new FakeHttpRequest( - "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/" - "subpath%2F?fields=size%2Cgeneration%2Cupdated\n" - "Auth Token: fake_token\n" - "Timeouts: 5 1 10\n", - strings::StrCat("{\"size\": \"1010\",\"generation\": \"1\"," - "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))}); + + { + // File doesn't exist. + new FakeHttpRequest( + "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/" + "subpath%2F?fields=size%2Cgeneration%2Cupdated\n" + "Auth Token: fake_token\n" + "Timeouts: 5 1 10\n", + "{}"), + // Simple upload. + new FakeHttpRequest( + "Uri: https://www.googleapis.com/upload/storage/v1/b/bucket/o?" + "uploadType=media&name=subpath%2F&ifGenerationMatch=0\n" + "Auth Token: fake_token\n" + "Post: yes\n" + "Timeouts: 5 1 10\n", + ""), + // File exists. + new FakeHttpRequest( + "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/" + "subpath%2F?fields=size%2Cgeneration%2Cupdated\n" + "Auth Token: fake_token\n" + "Timeouts: 5 1 10\n", + strings::StrCat("{\"size\": \"1010\",\"generation\": \"1\"," + "\"updated\": \"2016-04-29T23:15:24.896Z\"}")), + // File doesn't exist again. + new FakeHttpRequest( + "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/" + "subpath%2F?fields=size%2Cgeneration%2Cupdated\n" + "Auth Token: fake_token\n" + "Timeouts: 5 1 10\n", + "{}"), + // Simulate object uploaded in between. + new FakeHttpRequest( + "Uri: https://www.googleapis.com/upload/storage/v1/b/bucket/o?" + "uploadType=media&name=subpath%2F&ifGenerationMatch=0\n" + "Auth Token: fake_token\n" + "Post: yes\n" + "Timeouts: 5 1 10\n", + "", errors::FailedPrecondition("412"), 412), + }); GcsFileSystem fs( std::unique_ptr(new FakeAuthProvider), std::unique_ptr( @@ -3236,8 +3251,14 @@ TEST(GcsFileSystemTest, CreateDir_Folder) { nullptr /* gcs additional header */, false /* compose append */); TF_EXPECT_OK(fs.CreateDir("gs://bucket/subpath")); - EXPECT_EQ(errors::AlreadyExists("gs://bucket/subpath/"), - fs.CreateDir("gs://bucket/subpath/")); + // Check that when GCS returns the object already exists return that the + // directory already exists. + EXPECT_EQ(errors::AlreadyExists("gs://bucket/subpath"), + fs.CreateDir("gs://bucket/subpath")); + // Check that when GCS returns the object already has a version (failed + // precondition) return directory already exists. + EXPECT_EQ(errors::AlreadyExists("gs://bucket/subpath"), + fs.CreateDir("gs://bucket/subpath")); } TEST(GcsFileSystemTest, CreateDir_Bucket) { From c2f231507c0b8b7abb8097323545f5810a208bda Mon Sep 17 00:00:00 2001 From: Gaurav Agrawal Date: Mon, 20 Jul 2020 12:08:09 -0700 Subject: [PATCH 0845/2522] Add DT_INT8 and DT_UINT8 as supported TPU type. PiperOrigin-RevId: 322196027 Change-Id: I626d66c587ea0231cf1665b6d3349a13499b57e4 --- tensorflow/core/tpu/tpu_defs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/tpu/tpu_defs.h b/tensorflow/core/tpu/tpu_defs.h index db392ddd6eb..1c4b4c4e38e 100644 --- a/tensorflow/core/tpu/tpu_defs.h +++ b/tensorflow/core/tpu/tpu_defs.h @@ -51,9 +51,10 @@ extern const char* const kTPUReplicateAttr; extern const char* const kOutsideCompilationAttr; // Supported types for TPUs. -static constexpr std::array kTpuAllTypes = { +static constexpr std::array kTpuAllTypes = { {DT_INT32, DT_UINT32, DT_BFLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL, - DT_COMPLEX64, DT_INT64, DT_UINT64, DT_QINT8, DT_QUINT8}}; + DT_COMPLEX64, DT_INT64, DT_UINT64, DT_QINT8, DT_QUINT8, DT_INT8, + DT_UINT8}}; } // namespace tensorflow From 89d00590011c8b85fc95b75ccfedd15c72dc8423 Mon Sep 17 00:00:00 2001 From: Russell Power Date: Mon, 20 Jul 2020 12:10:46 -0700 Subject: [PATCH 0846/2522] Add cross replica op definitions. PiperOrigin-RevId: 322196613 Change-Id: Id8e2dc1eb17aed50fb1fad775f040ba4cf4636ce --- tensorflow/core/tpu/BUILD | 24 ++ tensorflow/core/tpu/kernels/BUILD | 19 + .../core/tpu/kernels/cross_replica_ops.cc | 139 ++++++++ tensorflow/core/tpu/tpu_library_init_fns.inc | 7 + tensorflow/core/tpu/tpu_on_demand_compiler.cc | 336 ++++++++++++++++++ .../stream_executor/tpu/tpu_executor_c_api.h | 10 +- 6 files changed, 532 insertions(+), 3 deletions(-) create mode 100644 tensorflow/core/tpu/kernels/cross_replica_ops.cc create mode 100644 tensorflow/core/tpu/tpu_on_demand_compiler.cc diff --git a/tensorflow/core/tpu/BUILD b/tensorflow/core/tpu/BUILD index d82011c6961..c781c5f67ac 100644 --- a/tensorflow/core/tpu/BUILD +++ b/tensorflow/core/tpu/BUILD @@ -161,6 +161,7 @@ cc_library( ":tpu_system_device", "//tensorflow/stream_executor/tpu:tpu_executor", "//tensorflow/stream_executor/tpu:tpu_transfer_manager", + "//tensorflow/core/tpu:tpu_on_demand_compiler", ], "//conditions:default": [], }), @@ -269,3 +270,26 @@ cc_library( "@com_google_absl//absl/memory", ], ) + +cc_library( + name = "tpu_on_demand_compiler", + srcs = ["tpu_on_demand_compiler.cc"], + deps = [ + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:xla_data_proto_cc", + "//tensorflow/compiler/xla/service:compiler", + "//tensorflow/compiler/xla/service:executable", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_cost_analysis", + "//tensorflow/compiler/xla/service:hlo_module_group", + "//tensorflow/compiler/xla/service:shaped_buffer", + "//tensorflow/stream_executor:device_memory_allocator", + "//tensorflow/stream_executor/tpu:c_api_conversions", + "//tensorflow/stream_executor/tpu:proto_helper", + "//tensorflow/stream_executor/tpu:status_helper", + "//tensorflow/stream_executor/tpu:tpu_executor", + "//tensorflow/stream_executor/tpu:tpu_executor_c_api_hdrs", + "@com_google_absl//absl/types:span", + ], + alwayslink = True, +) diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index af7c9ead791..9da16032121 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -594,3 +594,22 @@ cc_library( ], alwayslink = True, ) + +cc_library( + name = "cross_replica_ops", + srcs = ["cross_replica_ops.cc"], + deps = [ + "//tensorflow/compiler/tf2xla:common", + "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto_cc", + "//tensorflow/compiler/xla/client:xla_builder", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:framework", + "//tensorflow/core:graph", + "//tensorflow/core:lib", + "@com_google_absl//absl/strings", + ], + alwayslink = 1, +) diff --git a/tensorflow/core/tpu/kernels/cross_replica_ops.cc b/tensorflow/core/tpu/kernels/cross_replica_ops.cc new file mode 100644 index 00000000000..89dba79cc63 --- /dev/null +++ b/tensorflow/core/tpu/kernels/cross_replica_ops.cc @@ -0,0 +1,139 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include + +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/compiler/xla/client/xla_builder.h" +#include "tensorflow/core/framework/kernel_def_builder.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +namespace { + +// Convert 1D group_assignment into 2D replica_groups. +std::vector Convert( + const std::vector& group_assignment, + const TensorShape& group_assignment_shape) { + VLOG(1) << "group_assignment size: " << group_assignment.size(); + VLOG(1) << "group_assignment_shape: " << group_assignment_shape.DebugString(); + + std::vector replica_groups; + const int64 num_groups = group_assignment_shape.dim_size(0); + const int64 num_replica_per_group = group_assignment_shape.dim_size(1); + + replica_groups.reserve(num_groups); + for (int64 g = 0; g < num_groups; ++g) { + xla::ReplicaGroup replica_group; + + for (int64 i = 0; i < num_replica_per_group; ++i) { + int64 replica = group_assignment[num_replica_per_group * g + i]; + replica_group.add_replica_ids(replica); + } + replica_groups.push_back(replica_group); + } + return replica_groups; +} + +class CrossReplicaSumOp : public XlaOpKernel { + public: + explicit CrossReplicaSumOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} + + void Compile(XlaOpKernelContext* ctx) override { + std::vector flattened_group_assignment; + OP_REQUIRES_OK(ctx, ctx->ConstantInputReshapedToIntVector( + 1, &flattened_group_assignment)); + std::vector replica_groups = + Convert(flattened_group_assignment, ctx->InputShape(1)); + ctx->SetOutput(0, xla::CrossReplicaSum(ctx->Input(0), replica_groups)); + } + + private: + TF_DISALLOW_COPY_AND_ASSIGN(CrossReplicaSumOp); +}; + +class AllToAllOp : public XlaOpKernel { + public: + explicit AllToAllOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("split_dimension", &split_dimension_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("concat_dimension", &concat_dimension_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("split_count", &split_count_)); + } + + void Compile(XlaOpKernelContext* ctx) override { + std::vector flattened_group_assignment; + OP_REQUIRES_OK(ctx, ctx->ConstantInputReshapedToIntVector( + 1, &flattened_group_assignment)); + + std::vector replica_groups = + Convert(flattened_group_assignment, ctx->InputShape(1)); + ctx->SetOutput( + 0, xla::AllToAll(ctx->Input(0), split_dimension_, concat_dimension_, + split_count_, replica_groups)); + } + + private: + int64 split_dimension_; + int64 concat_dimension_; + int64 split_count_; + + TF_DISALLOW_COPY_AND_ASSIGN(AllToAllOp); +}; + +class CollectivePermuteOp : public XlaOpKernel { + public: + explicit CollectivePermuteOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} + + void Compile(XlaOpKernelContext* ctx) override { + const TensorShape source_target_shape = ctx->InputShape(1); + OP_REQUIRES( + ctx, + source_target_shape.dims() == 2 && source_target_shape.dim_size(1) == 2, + errors::InvalidArgument( + "CollectivePermuteOp requires source_target_pair's shape to" + " [num_pairs, 2]. Get ", + source_target_shape)); + + xla::Literal source_target_literal; + OP_REQUIRES_OK(ctx, + ctx->ConstantInputAsInt64Literal(1, &source_target_literal)); + const int num_pairs = source_target_shape.dim_size(0); + std::vector> source_target_pairs(num_pairs); + for (int i = 0; i < num_pairs; ++i) { + source_target_pairs[i] = {source_target_literal.Get({i, 0}), + source_target_literal.Get({i, 1})}; + } + ctx->SetOutput(0, + xla::CollectivePermute(ctx->Input(0), source_target_pairs)); + } + + private: + TF_DISALLOW_COPY_AND_ASSIGN(CollectivePermuteOp); +}; + +REGISTER_XLA_OP(Name("AllToAll").CompileTimeConstantInput("group_assignment"), + AllToAllOp); +REGISTER_XLA_OP(Name("CollectivePermute") + .TypeConstraint("T", {DT_FLOAT, DT_BFLOAT16, DT_INT32, + DT_COMPLEX64}) + .CompileTimeConstantInput("source_target_pairs"), + CollectivePermuteOp); +REGISTER_XLA_OP( + Name("CrossReplicaSum").CompileTimeConstantInput("group_assignment"), + CrossReplicaSumOp); + +} // anonymous namespace +} // namespace tensorflow diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index 06197870fee..f811769b364 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -174,6 +174,13 @@ tensorflow::Status SetExecutorStructFn(void* library_handle) { TFTPU_SET_FN(executor_fn, TpuCoreLocation_Index); TFTPU_SET_FN(executor_fn, TpuCoreLocation_Id); + TFTPU_SET_FN(executor_fn, TpuCompiler_RunHloPasses); + TFTPU_SET_FN(executor_fn, TpuCompiler_RunBackend); + TFTPU_SET_FN(executor_fn, TpuCompiler_Compile); + TFTPU_SET_FN(executor_fn, TpuCompiler_ShapeSize); + TFTPU_SET_FN(executor_fn, TpuExecutable_ExecuteAsyncOnStream); + TFTPU_SET_FN(executor_fn, TpuExecutable_Free); + return tensorflow::Status::OK(); } diff --git a/tensorflow/core/tpu/tpu_on_demand_compiler.cc b/tensorflow/core/tpu/tpu_on_demand_compiler.cc new file mode 100644 index 00000000000..66cd6869700 --- /dev/null +++ b/tensorflow/core/tpu/tpu_on_demand_compiler.cc @@ -0,0 +1,336 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include + +#include "absl/types/span.h" +#include "tensorflow/compiler/xla/service/compiler.h" +#include "tensorflow/compiler/xla/service/executable.h" +#include "tensorflow/compiler/xla/service/hlo_cost_analysis.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_module_group.h" +#include "tensorflow/compiler/xla/service/shaped_buffer.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/stream_executor/device_memory_allocator.h" +#include "tensorflow/stream_executor/tpu/c_api_conversions.h" +#include "tensorflow/stream_executor/tpu/proto_helper.h" +#include "tensorflow/stream_executor/tpu/status_helper.h" +#include "tensorflow/stream_executor/tpu/tpu_executor.h" +#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" +#include "tensorflow/stream_executor/tpu/tpu_platform.h" + +namespace xla { + +namespace { + +using ::tensorflow::tpu::ExecutorApiFn; + +// TODO(power) -- dedup +inline xla::ShapedBuffer CShapedBufferToXLAShapedBuffer( + XLA_ShapedBuffer* c_buffer) { + xla::Shape xla_on_host_shape = + TpuConversions::CShapeToXlaShape(&c_buffer->on_host_shape); + xla::Shape xla_on_device_shape = + TpuConversions::CShapeToXlaShape(&c_buffer->on_device_shape); + + xla::ShapeTree xla_shape_tree( + xla_on_device_shape); + size_t i = 0; + for (auto& pair : xla_shape_tree) { + pair.second = TpuConversions::SE_DeviceMemoryBaseToDeviceMemoryBase( + c_buffer->bases[i]); + i++; + } + + xla::ShapedBuffer xla_shaped_buffer( + xla_on_host_shape, xla_on_device_shape, + tensorflow::tpu::TpuPlatformInterface::GetRegisteredPlatform(), + c_buffer->device_ordinal); + xla_shaped_buffer.set_buffers(xla_shape_tree); + return xla_shaped_buffer; +} + +class TpuExecutable : public Executable { + public: + TpuExecutable(SE_Executable* se_executable, + std::shared_ptr hlo_module) + : Executable(std::move(hlo_module), nullptr, nullptr), + se_executable_(se_executable) {} + + ~TpuExecutable() override { + ExecutorApiFn()->TpuExecutable_FreeFn(se_executable_); + } + + StatusOr ExecuteAsyncOnStream( + const ServiceExecutableRunOptions* run_options, + std::vector arguments, + HloExecutionProfile* hlo_execution_profile) override { + SE_ExecutableRunOptions se_run_options = + TpuConversions::ExecutableRunOptionsToSE_ExecutableRunOptions( + *run_options); + SE_ExecutionInput** se_args = new SE_ExecutionInput*[arguments.size()]; + for (int i = 0; i < arguments.size(); ++i) { + auto& arg = arguments[i]; + se_args[i] = new SE_ExecutionInput; + + TpuConversions::XlaShapeToCShape(arg.shape(), + &se_args[i]->shape_tree.shape); + auto* arg_buffers = arg.MutableBuffers(); + absl::InlinedVector se_buffers; + for (auto& pair : *arg_buffers) { + se_buffers.push_back( + TpuConversions::SEMaybeOwningDeviceMemoryToC(pair.second)); + } + se_args[i]->shape_tree.buffers = + new SE_MaybeOwningDeviceMemory[se_buffers.size()]; + for (int j = 0; j < se_buffers.size(); ++j) { + se_args[i]->shape_tree.buffers[j] = se_buffers[j]; + } + + TpuConversions::XlaShapeToCShape(arg.shape(), &se_args[i]->dynamic_shape); + TpuConversions::XlaShapeToCShape(arg.host_shape(), + &se_args[i]->host_shape); + const auto& unowned_indices = arg.unowned_indices(); + se_args[i]->unowned_indices_size = unowned_indices.size(); + se_args[i]->unowned_indices = new XLA_ShapeIndex[unowned_indices.size()]; + int j = 0; + for (auto& idx : unowned_indices) { + se_args[i]->unowned_indices[j] = + TpuConversions::XlaShapeIndexToCShapeIndex(idx); + ++j; + } + } + SE_ExecutionOutput se_execution_output; + StatusHelper status; + ExecutorApiFn()->TpuExecutable_ExecuteAsyncOnStreamFn( + se_executable_, &se_run_options, se_args, arguments.size(), nullptr, + &se_execution_output, status.c_status); + if (!status.ok()) { + return status.status(); + } + + xla::ScopedShapedBuffer result( + CShapedBufferToXLAShapedBuffer(&se_execution_output.result), + run_options->stream()->parent()->GetAllocator()); + + ExecutionOutput output(std::move(result)); + for (int i = 0; i < se_execution_output.aliased_indices_size; ++i) { + output.AddAliasedIndex(TpuConversions::CShapeIndexToXlaShapeIndex( + &se_execution_output.aliased_indices[i])); + } + + for (int i = 0; i < se_execution_output.to_be_released_size; ++i) { + output.AddToBeReleased( + TpuConversions::COwningDeviceMemToSEOwningDeviceMem( + &se_execution_output.to_be_released[i], + run_options->stream()->parent()->GetAllocator()) + .Release() + .value()); + } + + return output; + } + + private: + SE_Executable* se_executable_; +}; + +XLA_HloModuleConfig HloModuleConfigToC(const xla::HloModuleConfig& config) { + XLA_HloModuleConfig hlo_config{ + .seed = config.seed(), + .launch_id = config.launch_id(), + .replica_count = config.replica_count(), + .num_partitions = config.num_partitions(), + .use_spmd_partitioning = config.use_spmd_partitioning(), + .has_static_device_assignment = config.has_static_device_assignment(), + .has_entry_computation_layout = config.has_entry_computation_layout()}; + if (config.has_static_device_assignment()) { + DeviceAssignmentProto dev_proto; + config.static_device_assignment().Serialize(&dev_proto).IgnoreError(); + hlo_config.static_device_assignment = + stream_executor::tpu::SerializeProto(dev_proto); + } + if (config.has_entry_computation_layout()) { + auto layout = config.entry_computation_layout(); + TpuConversions::XlaShapeToCShape( + layout.result_layout().shape(), + &hlo_config.entry_computation_layout.result_layout); + hlo_config.entry_computation_layout.parameter_layouts = + new XLA_Shape[layout.parameter_count()]; + for (int i = 0; i < layout.parameter_count(); ++i) { + TpuConversions::XlaShapeToCShape( + layout.parameter_layout(i).shape(), + &hlo_config.entry_computation_layout.parameter_layouts[i]); + } + hlo_config.entry_computation_layout.parameter_count = + layout.parameter_count(); + } + return hlo_config; +} + +class TpuCompiler : public Compiler { + public: + TpuCompiler() { compiler_ = TpuCompiler_New(); } + ~TpuCompiler() override {} + + stream_executor::Platform::Id PlatformId() const override { + return tensorflow::TpuPlatform::kId; + } + + StatusOr> RunHloPasses( + std::unique_ptr module, + stream_executor::StreamExecutor* executor, + stream_executor::DeviceMemoryAllocator* device_allocator) override { + XLA_HloModule hlo_module; + hlo_module.module_config = HloModuleConfigToC(module->config()); + hlo_module.proto = stream_executor::tpu::SerializeProto(module->ToProto()); + auto allocator = TpuConversions::AllocatorToSE_Allocator(device_allocator); + XLA_HloModule result; + StatusHelper status; + ExecutorApiFn()->TpuCompiler_RunHloPassesFn( + compiler_, &hlo_module, + static_cast(executor->implementation()) + ->se_executor(), + &allocator, &result, status.c_status); + if (!status.ok()) { + return status.status(); + } + HloModuleProto result_proto = + stream_executor::tpu::DeserializeProto(result.proto); + return HloModule::CreateFromProto(result_proto, module->config()); + } + + StatusOr< + std::tuple, std::unique_ptr>> + RunHloPassesAndBufferAssignement( + std::unique_ptr module, + stream_executor::StreamExecutor* executor, + stream_executor::DeviceMemoryAllocator* device_allocator) override { + return Unimplemented( + "This compiler does not support RunHloPassesAndBufferAssignment."); + } + + StatusOr> RunBackend( + std::unique_ptr module, + stream_executor::StreamExecutor* executor, + stream_executor::DeviceMemoryAllocator* device_allocator) override { + XLA_HloModule hlo_module; + hlo_module.module_config = HloModuleConfigToC(module->config()); + hlo_module.proto = stream_executor::tpu::SerializeProto(module->ToProto()); + auto allocator = TpuConversions::AllocatorToSE_Allocator(device_allocator); + + SE_Executable* result; + StatusHelper status; + ExecutorApiFn()->TpuCompiler_RunBackendFn( + compiler_, &hlo_module, + static_cast(executor->implementation()) + ->se_executor(), + &allocator, &result, status.c_status); + if (!status.ok()) { + return status.status(); + } + + std::unique_ptr exec = + absl::make_unique(result, std::move(module)); + return exec; + } + + StatusOr>> Compile( + std::unique_ptr module_group, + std::vector> stream_exec, + stream_executor::DeviceMemoryAllocator* device_allocator) override { + XLA_HloModuleGroup se_module_group; + se_module_group.proto = + stream_executor::tpu::SerializeProto(module_group->ToProto()); + se_module_group.module_config = + new XLA_HloModuleConfig[module_group->size()]; + for (int i = 0; i < module_group->size(); ++i) { + const auto& config = module_group->module(i).config(); + se_module_group.module_config[i] = HloModuleConfigToC(config); + } + + SE_StreamExecutorList* se_lists = + new SE_StreamExecutorList[stream_exec.size()]; + for (int i = 0; i < stream_exec.size(); ++i) { + se_lists[i].exec = new SE_StreamExecutor*[stream_exec[i].size()]; + for (int j = 0; j < stream_exec[i].size(); ++j) { + se_lists[i].exec[j] = static_cast( + stream_exec[i][j]->implementation()) + ->se_executor(); + } + } + + SE_DeviceMemoryAllocator allocator = + TpuConversions::AllocatorToSE_Allocator(device_allocator); + + SE_Executable** se_executables = new SE_Executable*[module_group->size()]; + + StatusHelper status; + + ExecutorApiFn()->TpuCompiler_CompileFn( + compiler_, &se_module_group, se_lists, stream_exec.size(), &allocator, + se_executables, status.c_status); + + if (!status.ok()) { + return status.status(); + } + + std::vector> executables; + std::vector> modules = + module_group->ConsumeModules(); + for (int i = 0; i < module_group->size(); ++i) { + executables[i] = absl::make_unique(se_executables[i], + std::move(modules[i])); + } + + return executables; + } + + // Compiles the HLO module group for ahead-of-time execution. This is + // intended for use in static compilation. + StatusOr>> + CompileAheadOfTime(std::unique_ptr module_group, + const AotCompilationOptions& options) override { + return Unimplemented("This compiler does not support CompileAheadOfTime."); + } + + // Returns a function that computes the size in bytes of the logical + // buffer that contains a shape. + HloCostAnalysis::ShapeSizeFunction ShapeSizeBytesFunction() const override { + return [this](const xla::Shape& shape) { + XLA_Shape c_shape; + TpuConversions::XlaShapeToCShape(shape, &c_shape); + int64 bytes = + ExecutorApiFn()->TpuCompiler_ShapeSizeFn(compiler_, &c_shape); + TpuConversions::CShapeCleanup(&c_shape); + return bytes; + }; + } + + private: + Tpu_Compiler* compiler_; +}; + +static bool InitModule() { + xla::Compiler::RegisterCompilerFactory(tensorflow::TpuPlatform::kId, []() { + return absl::make_unique(); + }); + return true; +} + +static bool module_initialized = InitModule(); + +} // namespace +} // namespace xla diff --git a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h index e80086bcb69..3795bdd0cf6 100644 --- a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h +++ b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h @@ -436,9 +436,6 @@ TFTPU_CAPI_EXPORT void TpuCompiler_Compile( TFTPU_CAPI_EXPORT int64_t TpuCompiler_ShapeSize(Tpu_Compiler* compiler, XLA_Shape* c_shape); -TFTPU_CAPI_EXPORT void TpuExecutable_HloModule(SE_Executable* executable, - TpuSerializedProto* proto); - TFTPU_CAPI_EXPORT void TpuExecutable_ExecuteAsyncOnStream( SE_Executable* executable, SE_ExecutableRunOptions* run_options, SE_ExecutionInput** se_arguments, int se_arguments_size, @@ -550,6 +547,13 @@ struct TfTpu_ExecutorApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_ChipCoordinates_Z); TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Index); TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Id); + + TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_RunHloPasses); + TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_RunBackend); + TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_Compile); + TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_ShapeSize); + TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_ExecuteAsyncOnStream); + TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Free); }; } From 87c8c18762298993aa67370cd4b502567c4fee78 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Mon, 20 Jul 2020 12:24:30 -0700 Subject: [PATCH 0847/2522] Fixed transpose attributes parsing. Treating 2d shapes as BC and 3d shapes as BWC. PiperOrigin-RevId: 322199495 Change-Id: Ic409376aa8eb9d5cb72ea8b2bed5c76e83ba52e1 --- .../delegates/gpu/common/model_builder.cc | 28 +++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.cc b/tensorflow/lite/delegates/gpu/common/model_builder.cc index ea9ca70aba2..d09a8133770 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc @@ -2103,12 +2103,36 @@ class TransposeOperationParser : public TFLiteOperationParser { TransposeAttributes attr; Tensor perm; RETURN_IF_ERROR(reader->ReadTensor(1, &perm)); + std::map axis_to_index = {{Axis::BATCH, 0}, + {Axis::HEIGHT, 1}, + {Axis::WIDTH, 2}, + {Axis::CHANNELS, 3}}; if (perm.data.size() == 4) { attr.perm = BHWC(perm.data[0], perm.data[1], perm.data[2], perm.data[3]); } else if (perm.data.size() == 3) { - attr.perm = BHWC(0, perm.data[0] + 1, perm.data[1] + 1, perm.data[2] + 1); + std::vector index_to_axis = {Axis::CHANNELS, Axis::WIDTH, + Axis::BATCH}; + std::map remap = { + {Axis::HEIGHT, Axis::HEIGHT}, + {index_to_axis[perm.data[2]], Axis::BATCH}, + {index_to_axis[perm.data[1]], Axis::WIDTH}, + {index_to_axis[perm.data[0]], Axis::CHANNELS}}; + attr.perm.b = axis_to_index[remap[Axis::BATCH]]; + attr.perm.h = axis_to_index[remap[Axis::HEIGHT]]; + attr.perm.w = axis_to_index[remap[Axis::WIDTH]]; + attr.perm.c = axis_to_index[remap[Axis::CHANNELS]]; + } else if (perm.data.size() == 2) { - attr.perm = BHWC(0, 1, perm.data[0] + 2, perm.data[1] + 2); + std::vector index_to_axis = {Axis::CHANNELS, Axis::BATCH}; + std::map remap = { + {Axis::HEIGHT, Axis::HEIGHT}, + {Axis::WIDTH, Axis::WIDTH}, + {index_to_axis[perm.data[1]], Axis::BATCH}, + {index_to_axis[perm.data[0]], Axis::CHANNELS}}; + attr.perm.b = axis_to_index[remap[Axis::BATCH]]; + attr.perm.h = axis_to_index[remap[Axis::HEIGHT]]; + attr.perm.w = axis_to_index[remap[Axis::WIDTH]]; + attr.perm.c = axis_to_index[remap[Axis::CHANNELS]]; } else { return absl::InvalidArgumentError( "Permutation for transpose is invalid."); From 536f6be302d945513fedff57e65d89f6e0e026db Mon Sep 17 00:00:00 2001 From: Rachel Lim Date: Mon, 20 Jul 2020 12:37:36 -0700 Subject: [PATCH 0848/2522] tflite: Refactor core/macros.h, c/c_api.h and c/common.h - Move TFL_CAPI_EXPORT to c/common.h to make them available for delegates. - Create lite:macros module to make core/macros.h available for delegates. PiperOrigin-RevId: 322202106 Change-Id: I45fe26dea2415167888479e6fe54cb4fd35b70d1 --- tensorflow/lite/BUILD | 6 +----- tensorflow/lite/c/c_api.h | 14 ++++++++++++++ tensorflow/lite/c/common.h | 16 ---------------- tensorflow/lite/delegates/flex/BUILD | 1 - tensorflow/lite/delegates/flex/delegate.cc | 9 +++++---- .../lite/delegates/gpu/cl/gpu_api_delegate.h | 14 ++++++++++++++ tensorflow/lite/delegates/gpu/delegate.h | 14 ++++++++++++++ tensorflow/lite/delegates/gpu/gl_delegate.h | 14 ++++++++++++++ tensorflow/lite/delegates/gpu/metal_delegate.h | 14 +++++++++++++- .../lite/delegates/hexagon/hexagon_delegate.h | 14 ++++++++++++++ .../tools/benchmark/experimental/c/c_api_types.h | 16 ---------------- 11 files changed, 89 insertions(+), 43 deletions(-) diff --git a/tensorflow/lite/BUILD b/tensorflow/lite/BUILD index 61b9972c4d9..1c0882ef0aa 100644 --- a/tensorflow/lite/BUILD +++ b/tensorflow/lite/BUILD @@ -650,14 +650,10 @@ cc_test( cc_library( name = "shared_library", hdrs = ["shared_library.h"], + copts = TFLITE_DEFAULT_COPTS, linkopts = if_not_windows(["-ldl"]), ) -cc_library( - name = "macros", - hdrs = ["core/macros.h"], -) - # Shared lib target for convenience, pulls in the core runtime and builtin ops. # Note: This target is not yet finalized, and the exact set of exported (C/C++) # APIs is subject to change. The output library name is platform dependent: diff --git a/tensorflow/lite/c/c_api.h b/tensorflow/lite/c/c_api.h index 880b80e69b4..754fc3b8bbd 100644 --- a/tensorflow/lite/c/c_api.h +++ b/tensorflow/lite/c/c_api.h @@ -66,6 +66,20 @@ limitations under the License. /// TfLiteInterpreterOptionsDelete(options); /// TfLiteModelDelete(model); +#ifdef SWIG +#define TFL_CAPI_EXPORT +#else +#if defined(_WIN32) +#ifdef TFL_COMPILE_LIBRARY +#define TFL_CAPI_EXPORT __declspec(dllexport) +#else +#define TFL_CAPI_EXPORT __declspec(dllimport) +#endif // TFL_COMPILE_LIBRARY +#else +#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) +#endif // _WIN32 +#endif // SWIG + #ifdef __cplusplus extern "C" { #endif // __cplusplus diff --git a/tensorflow/lite/c/common.h b/tensorflow/lite/c/common.h index 142ee34317e..89b25892914 100644 --- a/tensorflow/lite/c/common.h +++ b/tensorflow/lite/c/common.h @@ -233,22 +233,6 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a); } \ } while (0) -// Define TFL_CAPI_EXPORT macro to export a function properly with a shared -// library. -#ifdef SWIG -#define TFL_CAPI_EXPORT -#else -#if defined(_WIN32) -#ifdef TFL_COMPILE_LIBRARY -#define TFL_CAPI_EXPORT __declspec(dllexport) -#else -#define TFL_CAPI_EXPORT __declspec(dllimport) -#endif // TFL_COMPILE_LIBRARY -#else -#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) -#endif // _WIN32 -#endif // SWIG - // Single-precision complex data type compatible with the C99 definition. typedef struct TfLiteComplex64 { float re, im; // real and imaginary parts, respectively. diff --git a/tensorflow/lite/delegates/flex/BUILD b/tensorflow/lite/delegates/flex/BUILD index 7039a9fa952..8320ecebf9a 100644 --- a/tensorflow/lite/delegates/flex/BUILD +++ b/tensorflow/lite/delegates/flex/BUILD @@ -92,7 +92,6 @@ cc_library( "//tensorflow/lite/core/api", "//tensorflow/lite/c:common", "//tensorflow/lite:kernel_api", - "//tensorflow/lite:macros", "//tensorflow/lite:minimal_logging", "//tensorflow/lite:string", "//tensorflow/lite:string_util", diff --git a/tensorflow/lite/delegates/flex/delegate.cc b/tensorflow/lite/delegates/flex/delegate.cc index 0bbd50a7fbd..f85b5e60f91 100644 --- a/tensorflow/lite/delegates/flex/delegate.cc +++ b/tensorflow/lite/delegates/flex/delegate.cc @@ -19,9 +19,7 @@ limitations under the License. #include "absl/strings/str_cat.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/lite/c/common.h" #include "tensorflow/lite/context_util.h" -#include "tensorflow/lite/core/macros.h" #include "tensorflow/lite/delegates/flex/buffer_map.h" #include "tensorflow/lite/delegates/flex/kernel.h" #include "tensorflow/lite/delegates/flex/util.h" @@ -32,7 +30,7 @@ limitations under the License. namespace tflite { // Corresponding weak declaration found in lite/interpreter_builder.cc. -#if TFLITE_HAS_ATTRIBUTE_WEAK +#if !defined(_WIN32) // If weak symbol is not supported (Windows), it can use // TF_AcquireFlexDelegate() path instead. TfLiteDelegateUniquePtr AcquireFlexDelegate() { @@ -147,7 +145,10 @@ TfLiteStatus FlexDelegate::CopyFromBufferHandle( // interpreter_build.cc. To export the function name globally, the function name // must be matched with patterns in tf_version_script.lds extern "C" { -TFL_CAPI_EXPORT tflite::TfLiteDelegateUniquePtr TF_AcquireFlexDelegate() { +#if defined(_WIN32) +__declspec(dllexport) +#endif + tflite::TfLiteDelegateUniquePtr TF_AcquireFlexDelegate() { return tflite::FlexDelegate::Create(); } } // extern "C" diff --git a/tensorflow/lite/delegates/gpu/cl/gpu_api_delegate.h b/tensorflow/lite/delegates/gpu/cl/gpu_api_delegate.h index bef67c2f6eb..1a9fb73e6ab 100644 --- a/tensorflow/lite/delegates/gpu/cl/gpu_api_delegate.h +++ b/tensorflow/lite/delegates/gpu/cl/gpu_api_delegate.h @@ -23,6 +23,20 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/delegates/gpu/delegate.h" +#ifdef SWIG +#define TFL_CAPI_EXPORT +#else +#if defined(_WIN32) +#ifdef TFL_COMPILE_LIBRARY +#define TFL_CAPI_EXPORT __declspec(dllexport) +#else +#define TFL_CAPI_EXPORT __declspec(dllimport) +#endif // TFL_COMPILE_LIBRARY +#else +#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) +#endif // _WIN32 +#endif // SWIG + #ifdef __cplusplus extern "C" { #endif // __cplusplus diff --git a/tensorflow/lite/delegates/gpu/delegate.h b/tensorflow/lite/delegates/gpu/delegate.h index 9af586bfd75..f03392d9a3c 100644 --- a/tensorflow/lite/delegates/gpu/delegate.h +++ b/tensorflow/lite/delegates/gpu/delegate.h @@ -20,6 +20,20 @@ limitations under the License. #include "tensorflow/lite/c/common.h" +#ifdef SWIG +#define TFL_CAPI_EXPORT +#else +#if defined(_WIN32) +#ifdef TFL_COMPILE_LIBRARY +#define TFL_CAPI_EXPORT __declspec(dllexport) +#else +#define TFL_CAPI_EXPORT __declspec(dllimport) +#endif // TFL_COMPILE_LIBRARY +#else +#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) +#endif // _WIN32 +#endif // SWIG + #ifdef __cplusplus extern "C" { #endif // __cplusplus diff --git a/tensorflow/lite/delegates/gpu/gl_delegate.h b/tensorflow/lite/delegates/gpu/gl_delegate.h index e6efd646fc3..fa8eec2ad6b 100644 --- a/tensorflow/lite/delegates/gpu/gl_delegate.h +++ b/tensorflow/lite/delegates/gpu/gl_delegate.h @@ -22,6 +22,20 @@ limitations under the License. #include "absl/base/macros.h" #include "tensorflow/lite/c/common.h" +#ifdef SWIG +#define TFL_CAPI_EXPORT +#else +#if defined(_WIN32) +#ifdef TFL_COMPILE_LIBRARY +#define TFL_CAPI_EXPORT __declspec(dllexport) +#else +#define TFL_CAPI_EXPORT __declspec(dllimport) +#endif // TFL_COMPILE_LIBRARY +#else +#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) +#endif // _WIN32 +#endif // SWIG + #ifdef __cplusplus extern "C" { #endif // __cplusplus diff --git a/tensorflow/lite/delegates/gpu/metal_delegate.h b/tensorflow/lite/delegates/gpu/metal_delegate.h index e4bdba36799..1cb660c42d0 100644 --- a/tensorflow/lite/delegates/gpu/metal_delegate.h +++ b/tensorflow/lite/delegates/gpu/metal_delegate.h @@ -16,7 +16,19 @@ limitations under the License. #ifndef TENSORFLOW_LITE_DELEGATES_GPU_METAL_DELEGATE_H_ #define TENSORFLOW_LITE_DELEGATES_GPU_METAL_DELEGATE_H_ -#include "tensorflow/lite/c/common.h" +#ifdef SWIG +#define TFL_CAPI_EXPORT +#else +#if defined(_WIN32) +#ifdef TFL_COMPILE_LIBRARY +#define TFL_CAPI_EXPORT __declspec(dllexport) +#else +#define TFL_CAPI_EXPORT __declspec(dllimport) +#endif // TFL_COMPILE_LIBRARY +#else +#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) +#endif // _WIN32 +#endif // SWIG #ifdef __cplusplus extern "C" { diff --git a/tensorflow/lite/delegates/hexagon/hexagon_delegate.h b/tensorflow/lite/delegates/hexagon/hexagon_delegate.h index 931b02e4fa5..264a132b995 100644 --- a/tensorflow/lite/delegates/hexagon/hexagon_delegate.h +++ b/tensorflow/lite/delegates/hexagon/hexagon_delegate.h @@ -17,6 +17,20 @@ limitations under the License. #include "tensorflow/lite/c/common.h" +#ifdef SWIG +#define TFL_CAPI_EXPORT +#else +#if defined(_WIN32) +#ifdef TFL_COMPILE_LIBRARY +#define TFL_CAPI_EXPORT __declspec(dllexport) +#else +#define TFL_CAPI_EXPORT __declspec(dllimport) +#endif // TFL_COMPILE_LIBRARY +#else +#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) +#endif // _WIN32 +#endif // SWIG + #ifdef __cplusplus extern "C" { #endif // __cplusplus diff --git a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h index 142ee34317e..89b25892914 100644 --- a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h +++ b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h @@ -233,22 +233,6 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a); } \ } while (0) -// Define TFL_CAPI_EXPORT macro to export a function properly with a shared -// library. -#ifdef SWIG -#define TFL_CAPI_EXPORT -#else -#if defined(_WIN32) -#ifdef TFL_COMPILE_LIBRARY -#define TFL_CAPI_EXPORT __declspec(dllexport) -#else -#define TFL_CAPI_EXPORT __declspec(dllimport) -#endif // TFL_COMPILE_LIBRARY -#else -#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) -#endif // _WIN32 -#endif // SWIG - // Single-precision complex data type compatible with the C99 definition. typedef struct TfLiteComplex64 { float re, im; // real and imaginary parts, respectively. From f2a506484fd391dd4ff099968b960bcdd9f7ed44 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Mon, 20 Jul 2020 19:54:22 +0000 Subject: [PATCH 0849/2522] add another test --- .../python/kernel_tests/map_ops_test.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index c66b9bbad92..688f4907457 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -180,5 +180,25 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): self.assertAllClose(g, array_ops.zeros_like(v)) self.assertAllClose(g2, 5) + def testSameKeyInsertLookupGrad2(self): + with backprop.GradientTape(persistent=True) as tape: + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + v = constant_op.constant(2.0) + v2 = constant_op.constant(22.0) + tape.watch(v) + tape.watch(v2) + m = map_ops.tensor_map_insert(m, k, v) + l = map_ops.tensor_map_lookup(m, k, v.dtype) + g = tape.gradient(l * 5, v) + self.assertAllClose(g, 5) + + m = map_ops.tensor_map_insert(m, k, v2) + l2 = map_ops.tensor_map_lookup(m, k, v2.dtype) + g2 = tape.gradient(l2 * 5, v2) + g3 = tape.gradient(l2 * 5, v) + self.assertAllClose(g2, 5) + self.assertAllClose(g3, array_ops.zeros_like(v)) + if __name__ == '__main__': test.main() From 800da0d15aa464e276260d05d7a94ecccd56911f Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Mon, 20 Jul 2020 12:57:13 -0700 Subject: [PATCH 0850/2522] Replacing nest.is_sequence with nest.is_nested PiperOrigin-RevId: 322206369 Change-Id: I21ea9f922b0c3c874aa50bd58b2eb1b1fba313f9 --- tensorflow/python/keras/backend.py | 6 ++-- .../python/keras/engine/compile_utils.py | 12 ++++---- .../python/keras/engine/data_adapter.py | 4 +-- tensorflow/python/keras/engine/functional.py | 10 +++---- tensorflow/python/keras/engine/node.py | 2 +- .../keras/layers/legacy_rnn/rnn_cell_impl.py | 6 ++-- tensorflow/python/keras/layers/recurrent.py | 28 +++++++++---------- tensorflow/python/keras/utils/tf_utils.py | 4 +-- 8 files changed, 36 insertions(+), 36 deletions(-) diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py index f3b3f6a5157..566aa75f2d9 100644 --- a/tensorflow/python/keras/backend.py +++ b/tensorflow/python/keras/backend.py @@ -4105,9 +4105,9 @@ def rnn(step_function, # That's what the tile call does, it just repeats the mask along its # second dimension n times. def _expand_mask(mask_t, input_t, fixed_dim=1): - if nest.is_sequence(mask_t): + if nest.is_nested(mask_t): raise ValueError('mask_t is expected to be tensor, but got %s' % mask_t) - if nest.is_sequence(input_t): + if nest.is_nested(input_t): raise ValueError('input_t is expected to be tensor, but got %s' % input_t) rank_diff = len(input_t.shape) - len(mask_t.shape) for _ in range(rank_diff): @@ -4133,7 +4133,7 @@ def rnn(step_function, input_t.reverse() return input_t - if nest.is_sequence(inputs): + if nest.is_nested(inputs): processed_input = nest.map_structure(_process_single_input_t, inputs) else: processed_input = (_process_single_input_t(inputs),) diff --git a/tensorflow/python/keras/engine/compile_utils.py b/tensorflow/python/keras/engine/compile_utils.py index 2e4e40826e5..5b8f1492fd0 100644 --- a/tensorflow/python/keras/engine/compile_utils.py +++ b/tensorflow/python/keras/engine/compile_utils.py @@ -62,7 +62,7 @@ class Container(object): struct = map_to_output_names(outputs, self._output_names, struct) struct = map_missing_dict_keys(outputs, struct) # Allow passing one object that applies to all outputs. - if not nest.is_sequence(struct) and nest.is_sequence(outputs): + if not nest.is_nested(struct) and nest.is_nested(outputs): struct = nest.map_structure(lambda _: struct, outputs) return struct @@ -267,7 +267,7 @@ class LossesContainer(Container): return loss def _should_broadcast(self, obj): - return not nest.is_sequence(obj) + return not nest.is_nested(obj) def _copy_object(self, obj): return obj # Losses don't need to be copied. @@ -478,11 +478,11 @@ class MetricsContainer(Container): def _should_broadcast(self, obj): # e.g. 'mse'. - if not nest.is_sequence(obj): + if not nest.is_nested(obj): return True # e.g. ['mse'] or ['mse', 'mae']. return (isinstance(obj, (list, tuple)) and - not any(nest.is_sequence(o) for o in obj)) + not any(nest.is_nested(o) for o in obj)) def _copy_object(self, obj): if isinstance(obj, metrics_mod.Metric): @@ -572,10 +572,10 @@ def map_to_output_names(y_pred, output_names, struct): Returns: `struct` mapped to a list in same order as `output_names`. """ - single_output = not nest.is_sequence(y_pred) + single_output = not nest.is_nested(y_pred) outputs_are_flat_list = (not single_output and isinstance(y_pred, (list, tuple)) and - not any(nest.is_sequence(y_p) for y_p in y_pred)) + not any(nest.is_nested(y_p) for y_p in y_pred)) if (single_output or outputs_are_flat_list) and isinstance(struct, dict): output_names = output_names or create_pseudo_output_names(y_pred) diff --git a/tensorflow/python/keras/engine/data_adapter.py b/tensorflow/python/keras/engine/data_adapter.py index 8184ec7a0c1..3672ef64da3 100644 --- a/tensorflow/python/keras/engine/data_adapter.py +++ b/tensorflow/python/keras/engine/data_adapter.py @@ -1300,7 +1300,7 @@ def _make_class_weight_map_fn(class_weight): """Convert `class_weight` to `sample_weight`.""" x, y, sw = unpack_x_y_sample_weight(data) - if nest.is_sequence(y): + if nest.is_nested(y): raise ValueError( "`class_weight` is only supported for Models with a single output.") @@ -1496,7 +1496,7 @@ def pack_x_y_sample_weight(x, y=None, sample_weight=None): # there is no ambiguity. This also makes NumPy and Dataset # consistent in that the user does not have to wrap their Dataset # data in an unecessary tuple - if not nest.is_sequence(x): + if not nest.is_nested(x): return x else: return (x,) diff --git a/tensorflow/python/keras/engine/functional.py b/tensorflow/python/keras/engine/functional.py index 6c725d0d795..2b991cc187c 100644 --- a/tensorflow/python/keras/engine/functional.py +++ b/tensorflow/python/keras/engine/functional.py @@ -130,9 +130,9 @@ class Functional(training_lib.Model): # be called with a dict, where the keys of the dict are the names # of the `Input` objects. Extra keys are ignored with warning. self._enable_dict_to_input_mapping = ( - not nest.is_sequence(self._nested_inputs) or + not nest.is_nested(self._nested_inputs) or (isinstance(self._nested_inputs, (list, tuple, dict)) and - not any(nest.is_sequence(t) for t in self._nested_inputs))) + not any(nest.is_nested(t) for t in self._nested_inputs))) if any(not hasattr(tensor, '_keras_history') for tensor in self.outputs): base_layer_utils.create_keras_history(self._nested_outputs) @@ -519,7 +519,7 @@ class Functional(training_lib.Model): """Maps `tensors` to their respective `keras.Input`.""" if self._enable_dict_to_input_mapping and isinstance(tensors, dict): ref_inputs = self._nested_inputs - if not nest.is_sequence(ref_inputs): + if not nest.is_nested(ref_inputs): ref_inputs = [self._nested_inputs] if isinstance(ref_inputs, dict): # In the case that the graph is constructed with dict input tensors, @@ -1289,7 +1289,7 @@ def get_network_config(network, serialize_layer_fn=None): tf_utils.ListWrapper([layer.name, new_node_index, tensor_index])) model_inputs = nest.pack_sequence_as(network._nested_inputs, model_inputs) # Preserve external Keras compat for Models with single input. - if not nest.is_sequence(model_inputs): + if not nest.is_nested(model_inputs): model_inputs = [model_inputs] model_inputs = tf_utils.convert_inner_node_data(model_inputs) config['input_layers'] = model_inputs @@ -1305,7 +1305,7 @@ def get_network_config(network, serialize_layer_fn=None): tf_utils.ListWrapper([layer.name, new_node_index, tensor_index])) model_outputs = nest.pack_sequence_as(network._nested_outputs, model_outputs) # Preserve external Keras compat for Models with single output. - if not nest.is_sequence(model_outputs): + if not nest.is_nested(model_outputs): model_outputs = [model_outputs] model_outputs = tf_utils.convert_inner_node_data(model_outputs) config['output_layers'] = model_outputs diff --git a/tensorflow/python/keras/engine/node.py b/tensorflow/python/keras/engine/node.py index f629648baf0..30771a181f1 100644 --- a/tensorflow/python/keras/engine/node.py +++ b/tensorflow/python/keras/engine/node.py @@ -198,7 +198,7 @@ class Node(object): return tf_utils.ListWrapper(data) data = nest.map_structure(serialize_first_arg_tensor, inputs) - if not nest.is_sequence(data): + if not nest.is_nested(data): data = [data] data = tf_utils.convert_inner_node_data(data) return data diff --git a/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_impl.py b/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_impl.py index de69cdac611..422e7afb31e 100644 --- a/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_impl.py +++ b/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_impl.py @@ -1235,7 +1235,7 @@ class MultiRNNCell(RNNCell): super(MultiRNNCell, self).__init__() if not cells: raise ValueError("Must specify at least one cell for MultiRNNCell.") - if not nest.is_sequence(cells): + if not nest.is_nested(cells): raise TypeError("cells must be a list or tuple, but saw: %s." % cells) if len(set(id(cell) for cell in cells)) < len(cells): @@ -1252,7 +1252,7 @@ class MultiRNNCell(RNNCell): self._track_trackable(cell, name="cell-%d" % (cell_number,)) self._state_is_tuple = state_is_tuple if not state_is_tuple: - if any(nest.is_sequence(c.state_size) for c in self._cells): + if any(nest.is_nested(c.state_size) for c in self._cells): raise ValueError("Some cells return tuples of states, but the flag " "state_is_tuple is not set. State sizes are: %s" % str([c.state_size for c in self._cells])) @@ -1309,7 +1309,7 @@ class MultiRNNCell(RNNCell): for i, cell in enumerate(self._cells): with vs.variable_scope("cell_%d" % i): if self._state_is_tuple: - if not nest.is_sequence(state): + if not nest.is_nested(state): raise ValueError( "Expected state to be a tuple of length %d, but received: %s" % (len(self.state_size), state)) diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py index 9d10ad73749..72962eb8aef 100644 --- a/tensorflow/python/keras/layers/recurrent.py +++ b/tensorflow/python/keras/layers/recurrent.py @@ -139,7 +139,7 @@ class StackedRNNCells(Layer): # Call the cells in order and store the returned states. new_nested_states = [] for cell, states in zip(self.cells, nested_states): - states = states if nest.is_sequence(states) else [states] + states = states if nest.is_nested(states) else [states] # TF cell does not wrap the state into list when there is only one state. is_tf_rnn_cell = getattr(cell, '_is_tf_rnn_cell', None) is not None states = states[0] if len(states) == 1 and is_tf_rnn_cell else states @@ -448,7 +448,7 @@ class RNN(Layer): def states(self): if self._states is None: state = nest.map_structure(lambda _: None, self.cell.state_size) - return state if nest.is_sequence(self.cell.state_size) else [state] + return state if nest.is_nested(self.cell.state_size) else [state] return self._states @states.setter @@ -559,7 +559,7 @@ class RNN(Layer): # A nested tensor input pass - if not nest.is_sequence(input_shape): + if not nest.is_nested(input_shape): # This indicates the there is only one input. if self.input_spec is not None: self.input_spec[0] = get_input_spec(input_shape) @@ -632,7 +632,7 @@ class RNN(Layer): def get_initial_state(self, inputs): get_initial_state_fn = getattr(self.cell, 'get_initial_state', None) - if nest.is_sequence(inputs): + if nest.is_nested(inputs): # The input are nested sequences. Use the first element in the seq to get # batch size and dtype. inputs = nest.flatten(inputs)[0] @@ -647,7 +647,7 @@ class RNN(Layer): init_state = _generate_zero_filled_state(batch_size, self.cell.state_size, dtype) # Keras RNN expect the states in a list, even if it's a single state tensor. - if not nest.is_sequence(init_state): + if not nest.is_nested(init_state): init_state = [init_state] # Force the state to be a list in case it is a namedtuple eg LSTMStateTuple. return list(init_state) @@ -743,7 +743,7 @@ class RNN(Layer): # TODO(scottzhu): Should we accept multiple different masks? mask = nest.flatten(mask)[0] - if nest.is_sequence(inputs): + if nest.is_nested(inputs): # In the case of nested input, use the first element for shape check. input_shape = K.int_shape(nest.flatten(inputs)[0]) else: @@ -782,7 +782,7 @@ class RNN(Layer): states = states[0] if len(states) == 1 and is_tf_rnn_cell else states output, new_states = cell_call_fn( inputs, states, constants=constants, **kwargs) - if not nest.is_sequence(new_states): + if not nest.is_nested(new_states): new_states = [new_states] return output, new_states else: @@ -790,7 +790,7 @@ class RNN(Layer): def step(inputs, states): states = states[0] if len(states) == 1 and is_tf_rnn_cell else states output, new_states = cell_call_fn(inputs, states, **kwargs) - if not nest.is_sequence(new_states): + if not nest.is_nested(new_states): new_states = [new_states] return output, new_states last_output, outputs, states = K.rnn( @@ -929,7 +929,7 @@ class RNN(Layer): return K.zeros([batch_size] + tensor_shape.TensorShape(state).as_list()) self.states = nest.map_structure( create_state_variable, self.cell.state_size) - if not nest.is_sequence(self.states): + if not nest.is_nested(self.states): self.states = [self.states] elif states is None: for state, size in zip(nest.flatten(self.states), @@ -1359,7 +1359,7 @@ class SimpleRNNCell(DropoutRNNCellMixin, Layer): self.built = True def call(self, inputs, states, training=None): - prev_output = states[0] if nest.is_sequence(states) else states + prev_output = states[0] if nest.is_nested(states) else states dp_mask = self.get_dropout_mask_for_cell(inputs, training) rec_dp_mask = self.get_recurrent_dropout_mask_for_cell( prev_output, training) @@ -1377,7 +1377,7 @@ class SimpleRNNCell(DropoutRNNCellMixin, Layer): if self.activation is not None: output = self.activation(output) - new_state = [output] if nest.is_sequence(states) else output + new_state = [output] if nest.is_nested(states) else output return output, new_state def get_initial_state(self, inputs=None, batch_size=None, dtype=None): @@ -1819,7 +1819,7 @@ class GRUCell(DropoutRNNCellMixin, Layer): self.built = True def call(self, inputs, states, training=None): - h_tm1 = states[0] if nest.is_sequence(states) else states # previous memory + h_tm1 = states[0] if nest.is_nested(states) else states # previous memory dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=3) rec_dp_mask = self.get_recurrent_dropout_mask_for_cell( @@ -1917,7 +1917,7 @@ class GRUCell(DropoutRNNCellMixin, Layer): hh = self.activation(x_h + recurrent_h) # previous and candidate state mixed by update gate h = z * h_tm1 + (1 - z) * hh - new_state = [h] if nest.is_sequence(states) else h + new_state = [h] if nest.is_nested(states) else h return h, new_state def get_config(self): @@ -3020,7 +3020,7 @@ def _generate_zero_filled_state(batch_size_tensor, state_size, dtype): init_state_size = [batch_size_tensor] + flat_dims return array_ops.zeros(init_state_size, dtype=dtype) - if nest.is_sequence(state_size): + if nest.is_nested(state_size): return nest.map_structure(create_zeros, state_size) else: return create_zeros(state_size) diff --git a/tensorflow/python/keras/utils/tf_utils.py b/tensorflow/python/keras/utils/tf_utils.py index 01f3c2a8de2..c9ad96cd37b 100644 --- a/tensorflow/python/keras/utils/tf_utils.py +++ b/tensorflow/python/keras/utils/tf_utils.py @@ -174,7 +174,7 @@ def map_structure_with_atomic(is_atomic_fn, map_fn, nested): return map_fn(nested) # Recursively convert. - if not nest.is_sequence(nested): + if not nest.is_nested(nested): raise ValueError( 'Received non-atomic and non-sequence element: {}'.format(nested)) if nest._is_mapping(nested): @@ -284,7 +284,7 @@ def convert_inner_node_data(nested, wrap=False): return True if _is_serialized_node_data(nested): return True - return not nest.is_sequence(nested) + return not nest.is_nested(nested) def _convert_object_or_list(nested): """Convert b/t `ListWrapper` object and list representations.""" From 6ee2d328fd52c90cf19ea7c053041b22e81e47ad Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Mon, 20 Jul 2020 13:14:45 -0700 Subject: [PATCH 0851/2522] Define for building OpenCL only delegate. By default OpenCL delegate supports GL interop and links GL/EGL libraries. With define we can build delegate without dependencies on GL libs. PiperOrigin-RevId: 322210374 Change-Id: Id02c53747873a474c31971553048c73d3506f4ae --- tensorflow/lite/delegates/gpu/BUILD | 12 +- tensorflow/lite/delegates/gpu/api.h | 7 +- tensorflow/lite/delegates/gpu/cl/BUILD | 19 ++- tensorflow/lite/delegates/gpu/cl/api.cc | 88 ++++++++-- tensorflow/lite/delegates/gpu/cl/api.h | 4 + .../lite/delegates/gpu/cl/gpu_api_delegate.h | 9 +- .../lite/delegates/gpu/cl/testing/BUILD | 18 ++ .../gpu/cl/testing/delegate_testing.cc | 158 ++++++++++++++++++ tensorflow/lite/delegates/gpu/delegate.cc | 9 +- 9 files changed, 301 insertions(+), 23 deletions(-) create mode 100644 tensorflow/lite/delegates/gpu/cl/testing/delegate_testing.cc diff --git a/tensorflow/lite/delegates/gpu/BUILD b/tensorflow/lite/delegates/gpu/BUILD index 54fc124cde6..0e40095f255 100644 --- a/tensorflow/lite/delegates/gpu/BUILD +++ b/tensorflow/lite/delegates/gpu/BUILD @@ -234,7 +234,14 @@ cc_library( ], "//conditions:default": [], }), - deps = [ + deps = select({ + "//tensorflow/lite/delegates/gpu/cl:opencl_delegate_no_gl": [], + "//conditions:default": [ + "//tensorflow/lite/delegates/gpu/gl:api2", + ], + }) + [ + "@com_google_absl//absl/memory", + "@com_google_absl//absl/types:span", "//tensorflow/lite:kernel_api", "//tensorflow/lite:minimal_logging", "//tensorflow/lite/c:common", @@ -247,9 +254,6 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:model_transformer", "//tensorflow/lite/delegates/gpu/common:quantization_util", "//tensorflow/lite/delegates/gpu/common:status", - "//tensorflow/lite/delegates/gpu/gl:api2", "//tensorflow/lite/kernels/internal:optimized_base", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/types:span", ], ) diff --git a/tensorflow/lite/delegates/gpu/api.h b/tensorflow/lite/delegates/gpu/api.h index 1dfeeebd700..7892d0ce2f6 100644 --- a/tensorflow/lite/delegates/gpu/api.h +++ b/tensorflow/lite/delegates/gpu/api.h @@ -43,9 +43,14 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/data_type.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/common/util.h" -#include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h" #include +#define GL_NO_PROTOTYPES +#define EGL_NO_PROTOTYPES +#include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h" +#undef GL_NO_PROTOTYPES +#undef EGL_NO_PROTOTYPES + namespace tflite { namespace gpu { diff --git a/tensorflow/lite/delegates/gpu/cl/BUILD b/tensorflow/lite/delegates/gpu/cl/BUILD index ffb9d6204ad..9155bc1166a 100644 --- a/tensorflow/lite/delegates/gpu/cl/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/BUILD @@ -9,23 +9,34 @@ package( licenses = ["notice"], # Apache 2.0 ) +config_setting( + name = "opencl_delegate_no_gl", + values = {"copt": "-DCL_DELEGATE_NO_GL"}, +) + cc_library( name = "api", srcs = ["api.cc"], hdrs = ["api.h"], - deps = [ + deps = select({ + ":opencl_delegate_no_gl": [], + "//conditions:default": [ + ":egl_sync", + ":gl_interop", + ], + }) + [ ":cl_command_queue", ":cl_errors", ":cl_event", - ":egl_sync", ":environment", - ":gl_interop", ":inference_context", ":opencl_wrapper", ":precision", ":tensor", ":tensor_type", ":tensor_type_util", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/types:span", "//tensorflow/lite/delegates/gpu:api", "//tensorflow/lite/delegates/gpu/cl/kernels:converter", "//tensorflow/lite/delegates/gpu/common:data_type", @@ -33,8 +44,6 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:shape", "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:tensor", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/types:span", ], ) diff --git a/tensorflow/lite/delegates/gpu/cl/api.cc b/tensorflow/lite/delegates/gpu/cl/api.cc index ffe0fb68881..503b04543b4 100644 --- a/tensorflow/lite/delegates/gpu/cl/api.cc +++ b/tensorflow/lite/delegates/gpu/cl/api.cc @@ -15,7 +15,9 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/cl/api.h" -#include +#ifndef CL_DELEGATE_NO_GL +#define CL_DELEGATE_ALLOW_GL +#endif #include #include @@ -25,9 +27,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h" #include "tensorflow/lite/delegates/gpu/cl/cl_errors.h" #include "tensorflow/lite/delegates/gpu/cl/cl_event.h" -#include "tensorflow/lite/delegates/gpu/cl/egl_sync.h" #include "tensorflow/lite/delegates/gpu/cl/environment.h" -#include "tensorflow/lite/delegates/gpu/cl/gl_interop.h" #include "tensorflow/lite/delegates/gpu/cl/inference_context.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h" #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h" @@ -39,6 +39,13 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/shape.h" #include "tensorflow/lite/delegates/gpu/common/tensor.h" +#ifdef CL_DELEGATE_ALLOW_GL +#include + +#include "tensorflow/lite/delegates/gpu/cl/egl_sync.h" +#include "tensorflow/lite/delegates/gpu/cl/gl_interop.h" +#endif + namespace tflite { namespace gpu { namespace cl { @@ -87,11 +94,13 @@ class DefaultTensorTie : public TensorTie { const TensorTieDef& def, const TensorObjectConverterBuilder& converter_builder) { auto object_type = def.external_def.object_def.object_type; +#ifdef CL_DELEGATE_ALLOW_GL if (def.external_def.object_def.user_provided && GlClBufferCopier::IsSupported(def.external_def.object_def, def.internal_def.object_def)) { return true; } +#endif return (object_type == ObjectType::OPENCL_BUFFER || object_type == ObjectType::OPENCL_TEXTURE || object_type == ObjectType::CPU_MEMORY) && @@ -138,6 +147,7 @@ class DefaultTensorTie : public TensorTie { private: absl::Status Init(TensorObjectConverterBuilder* converter_builder, Environment* env) { +#ifdef CL_DELEGATE_ALLOW_GL if (def().external_def.object_def.user_provided && GlClBufferCopier::IsSupported(def().external_def.object_def, def().internal_def.object_def)) { @@ -156,6 +166,12 @@ class DefaultTensorTie : public TensorTie { RETURN_IF_ERROR(converter_builder->MakeConverter( def().internal_def, def().external_def, &converter_to_)); } +#else + RETURN_IF_ERROR(converter_builder->MakeConverter( + def().external_def, def().internal_def, &converter_from_)); + RETURN_IF_ERROR(converter_builder->MakeConverter( + def().internal_def, def().external_def, &converter_to_)); +#endif return MaybeAllocateExternalObject(env); } @@ -275,6 +291,7 @@ class TwoStepTensorTie : public TensorTie { std::unique_ptr outer_tie_; }; +#ifdef CL_DELEGATE_ALLOW_GL // Captures GL object into CL context before performing a conversion. class GlBufferHolder : public TensorTie { public: @@ -351,6 +368,7 @@ class GlBufferHolder : public TensorTie { std::unique_ptr tie_; TensorObject external_obj_; }; +#endif TensorObject TensorToObj(const Tensor& tensor) { if (tensor.GetStorageType() == TensorStorageType::BUFFER) { @@ -365,19 +383,28 @@ TensorObject TensorToObj(const Tensor& tensor) { // Responsible for creating new tensor objects. class TensorTieFactory { public: - TensorTieFactory(Environment* env, InferenceContext* context, - GlInteropFabric* gl_interop_fabric) + TensorTieFactory(Environment* env, InferenceContext* context +#ifdef CL_DELEGATE_ALLOW_GL + , + GlInteropFabric* gl_interop_fabric +#endif + ) : env_(*env), context_(*context), +#ifdef CL_DELEGATE_ALLOW_GL gl_interop_fabric_(gl_interop_fabric), - converter_builder_(NewConverterBuilder(env)) {} +#endif + converter_builder_(NewConverterBuilder(env)) { + } bool IsSupported(const TensorTieDef& def) const { return IsValid(def.external_def.object_def) && (NoopTensorTie::IsSupported(def) || DefaultTensorTie::IsSupported(def, *converter_builder_) || +#ifdef CL_DELEGATE_ALLOW_GL (gl_interop_fabric_ && GlBufferHolder::IsSupported(def, *converter_builder_)) || +#endif TwoStepTensorTie::IsSupported(def, *converter_builder_)); } @@ -392,10 +419,12 @@ class TensorTieFactory { if (DefaultTensorTie::IsSupported(def, *converter)) { return DefaultTensorTie::New(def, internal_object, converter, &env_, tie); } +#ifdef CL_DELEGATE_ALLOW_GL if (gl_interop_fabric_ && GlBufferHolder::IsSupported(def, *converter)) { return GlBufferHolder::New(def, internal_object, converter, gl_interop_fabric_, &env_, tie); } +#endif if (TwoStepTensorTie::IsSupported(def, *converter)) { return TwoStepTensorTie::New(def, internal_object, converter, &env_, tie); } @@ -405,18 +434,29 @@ class TensorTieFactory { private: Environment& env_; InferenceContext& context_; +#ifdef CL_DELEGATE_ALLOW_GL GlInteropFabric* gl_interop_fabric_; +#endif std::unique_ptr converter_builder_; }; class InferenceRunnerImpl : public InferenceRunner { public: InferenceRunnerImpl(Environment* environment, - std::unique_ptr context, - std::unique_ptr gl_interop_fabric) + std::unique_ptr context +#ifdef CL_DELEGATE_ALLOW_GL + , + std::unique_ptr gl_interop_fabric +#endif + ) : queue_(environment->queue()), - context_(std::move(context)), - gl_interop_fabric_(std::move(gl_interop_fabric)) {} + context_(std::move(context)) +#ifdef CL_DELEGATE_ALLOW_GL + , + gl_interop_fabric_(std::move(gl_interop_fabric)) +#endif + { + } absl::Status Initialize(const std::vector& inputs, const std::vector& outputs, @@ -464,9 +504,11 @@ class InferenceRunnerImpl : public InferenceRunner { } absl::Status Run() override { +#ifdef CL_DELEGATE_ALLOW_GL if (gl_interop_fabric_) { RETURN_IF_ERROR(gl_interop_fabric_->Start()); } +#endif for (auto& obj : inputs_) { RETURN_IF_ERROR(obj->CopyFromExternalObject()); } @@ -475,9 +517,11 @@ class InferenceRunnerImpl : public InferenceRunner { for (auto& obj : outputs_) { RETURN_IF_ERROR(obj->CopyToExternalObject()); } +#ifdef CL_DELEGATE_ALLOW_GL if (gl_interop_fabric_) { RETURN_IF_ERROR(gl_interop_fabric_->Finish()); } +#endif return absl::OkStatus(); } @@ -506,7 +550,9 @@ class InferenceRunnerImpl : public InferenceRunner { CLCommandQueue* queue_; std::unique_ptr context_; +#ifdef CL_DELEGATE_ALLOW_GL std::unique_ptr gl_interop_fabric_; +#endif std::vector> inputs_; std::vector> outputs_; }; @@ -542,6 +588,7 @@ class InferenceBuilderImpl : public InferenceBuilder { } RETURN_IF_ERROR(context_->InitFromGraph(create_info, graph, environment_)); +#ifdef CL_DELEGATE_ALLOW_GL if (env_options.IsGlAware() && IsGlSharingSupported(environment_->device())) { gl_interop_fabric_ = absl::make_unique( @@ -549,6 +596,10 @@ class InferenceBuilderImpl : public InferenceBuilder { } tie_factory_ = absl::make_unique( environment_, context_.get(), gl_interop_fabric_.get()); +#else + tie_factory_ = + absl::make_unique(environment_, context_.get()); +#endif inputs_ = LinkTensors(graph, graph.inputs()); outputs_ = LinkTensors(graph, graph.outputs()); @@ -599,6 +650,7 @@ class InferenceBuilderImpl : public InferenceBuilder { } absl::Status Build(std::unique_ptr* runner) override { +#ifdef CL_DELEGATE_ALLOW_GL if (gl_interop_fabric_ && !HasGlObjects()) { // destroy interop layer when there are no GL objects to avoid // extra synchronization cost. @@ -606,6 +658,10 @@ class InferenceBuilderImpl : public InferenceBuilder { } auto runner_impl = absl::make_unique( environment_, std::move(context_), std::move(gl_interop_fabric_)); +#else + auto runner_impl = absl::make_unique( + environment_, std::move(context_)); +#endif RETURN_IF_ERROR( runner_impl->Initialize(inputs_, outputs_, tie_factory_.get())); *runner = std::move(runner_impl); @@ -676,6 +732,7 @@ class InferenceBuilderImpl : public InferenceBuilder { } bool HasGlObjects() const { +#ifdef CL_DELEGATE_ALLOW_GL auto is_gl = [](ObjectType t) { return t == ObjectType::OPENGL_SSBO || t == ObjectType::OPENGL_TEXTURE; }; @@ -689,6 +746,7 @@ class InferenceBuilderImpl : public InferenceBuilder { return true; } } +#endif return false; } @@ -703,7 +761,9 @@ class InferenceBuilderImpl : public InferenceBuilder { } std::unique_ptr context_; +#ifdef CL_DELEGATE_ALLOW_GL std::unique_ptr gl_interop_fabric_; +#endif Environment* environment_; std::vector inputs_; @@ -730,20 +790,25 @@ class InferenceEnvironmentImpl : public InferenceEnvironment { RETURN_IF_ERROR(CreateDefaultGPUDevice(&device)); } +#ifdef CL_DELEGATE_ALLOW_GL properties_.is_gl_sharing_supported = IsGlSharingSupported(device); properties_.is_gl_to_cl_fast_sync_supported = IsClEventFromEglSyncSupported(device); properties_.is_cl_to_gl_fast_sync_supported = IsEglSyncFromClEventSupported(); +#endif CLContext context; if (options_.context) { +#ifdef CL_DELEGATE_ALLOW_GL if (options_.IsGlAware()) { return absl::InvalidArgumentError( "OpenCL context and EGL parameters are set in the same time."); } +#endif context = CLContext(options_.context, /* has_ownership = */ false); } else { +#ifdef CL_DELEGATE_ALLOW_GL if (options_.IsGlAware() && properties_.is_gl_sharing_supported) { RETURN_IF_ERROR(CreateCLGLContext( device, @@ -753,6 +818,9 @@ class InferenceEnvironmentImpl : public InferenceEnvironment { } else { RETURN_IF_ERROR(CreateCLContext(device, &context)); } +#else + RETURN_IF_ERROR(CreateCLContext(device, &context)); +#endif } CLCommandQueue queue; diff --git a/tensorflow/lite/delegates/gpu/cl/api.h b/tensorflow/lite/delegates/gpu/cl/api.h index bddf7de3363..826d4f2bc78 100644 --- a/tensorflow/lite/delegates/gpu/cl/api.h +++ b/tensorflow/lite/delegates/gpu/cl/api.h @@ -16,6 +16,10 @@ limitations under the License. #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_API_H_ #define TENSORFLOW_LITE_DELEGATES_GPU_CL_API_H_ +#ifdef CL_DELEGATE_NO_GL +#define EGL_NO_PROTOTYPES +#endif + #include #include diff --git a/tensorflow/lite/delegates/gpu/cl/gpu_api_delegate.h b/tensorflow/lite/delegates/gpu/cl/gpu_api_delegate.h index 1a9fb73e6ab..e10489cc99b 100644 --- a/tensorflow/lite/delegates/gpu/cl/gpu_api_delegate.h +++ b/tensorflow/lite/delegates/gpu/cl/gpu_api_delegate.h @@ -16,8 +16,13 @@ limitations under the License. #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_GPU_API_DELEGATE_H_ #define TENSORFLOW_LITE_DELEGATES_GPU_CL_GPU_API_DELEGATE_H_ +#define GL_NO_PROTOTYPES +#define EGL_NO_PROTOTYPES #include #include +#undef GL_NO_PROTOTYPES +#undef EGL_NO_PROTOTYPES + #include #include "tensorflow/lite/c/common.h" @@ -76,8 +81,8 @@ typedef struct { // .compile_options = { // .precision_loss_allowed = false, // } -// .egl_display = eglGetCurrentDisplay(), -// .egl_context = eglGetCurrentContext(); +// .egl_display = EGL_NO_DISPLAY; +// .egl_context = EGL_NO_CONTEXT; TFL_CAPI_EXPORT TfLiteDelegate* TfLiteGpuDelegateCreate_New( const TfLiteGpuDelegateOptions_New* options); diff --git a/tensorflow/lite/delegates/gpu/cl/testing/BUILD b/tensorflow/lite/delegates/gpu/cl/testing/BUILD index 723e4cd9e99..c82190ca0e6 100644 --- a/tensorflow/lite/delegates/gpu/cl/testing/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/testing/BUILD @@ -16,3 +16,21 @@ cc_binary( "@com_google_absl//absl/time", ], ) + +cc_binary( + name = "delegate_testing", + srcs = ["delegate_testing.cc"], + tags = [ + "nobuilder", + "notap", + ], + deps = [ + "//tensorflow/lite/delegates/gpu:delegate", + "//tensorflow/lite/delegates/gpu/cl:gpu_api_delegate", + "//tensorflow/lite/delegates/gpu/common:status", + "//tensorflow/lite/delegates/gpu/common/testing:tflite_model_reader", + "//tensorflow/lite/kernels:builtin_ops", + "//tensorflow/lite/kernels:kernel_util", + "@com_google_absl//absl/time", + ], +) diff --git a/tensorflow/lite/delegates/gpu/cl/testing/delegate_testing.cc b/tensorflow/lite/delegates/gpu/cl/testing/delegate_testing.cc new file mode 100644 index 00000000000..4e92f897d96 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/cl/testing/delegate_testing.cc @@ -0,0 +1,158 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include // NOLINT(build/c++11) +#include +#include +#include +#include +#include + +#include "absl/time/time.h" +#include "tensorflow/lite/delegates/gpu/cl/gpu_api_delegate.h" +#include "tensorflow/lite/delegates/gpu/common/status.h" +#include "tensorflow/lite/delegates/gpu/common/testing/tflite_model_reader.h" +#include "tensorflow/lite/delegates/gpu/delegate.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/kernels/register.h" + +namespace { + +void FillInputTensor(tflite::Interpreter* interpreter) { + for (int k = 0; k < interpreter->inputs().size(); ++k) { + float* p = interpreter->typed_input_tensor(k); + const auto n = + tflite::NumElements(interpreter->tensor(interpreter->inputs()[k])); + for (int i = 0; i < n; ++i) { + p[i] = std::sin(i); + } + } +} + +void CompareCPUGPUResults(tflite::Interpreter* cpu, tflite::Interpreter* gpu, + float eps) { + for (int i = 0; i < cpu->outputs().size(); ++i) { + const float* cpu_out = cpu->typed_output_tensor(i); + const float* gpu_out = gpu->typed_output_tensor(i); + auto out_n = tflite::NumElements(cpu->tensor(cpu->outputs()[i])); + const int kMaxPrint = 10; + int printed = 0; + int total_different = 0; + for (int k = 0; k < out_n; ++k) { + const float abs_diff = fabs(cpu_out[k] - gpu_out[k]); + if (abs_diff > eps) { + total_different++; + if (printed < kMaxPrint) { + std::cout << "Output #" << i << ": element #" << k << ": CPU value - " + << cpu_out[k] << ", GPU value - " << gpu_out[k] + << ", abs diff - " << abs_diff << std::endl; + printed++; + } + if (printed == kMaxPrint) { + std::cout << "Printed " << kMaxPrint + << " different elements, threshhold - " << eps + << ", next different elements skipped" << std::endl; + printed++; + } + } + } + std::cout << "Total " << total_different + << " different elements, for output #" << i << ", threshhold - " + << eps << std::endl; + } +} + +} // namespace + +int main(int argc, char** argv) { + if (argc <= 1) { + std::cerr << "Expected model path as second argument." << std::endl; + return -1; + } + + auto model = tflite::FlatBufferModel::BuildFromFile(argv[1]); + if (!model) { + std::cerr << "FlatBufferModel::BuildFromFile failed, model path - " + << argv[1] << std::endl; + return -1; + } + tflite::ops::builtin::BuiltinOpResolver op_resolver; + tflite::InterpreterBuilder builder(*model, op_resolver); + + // CPU. + std::unique_ptr cpu_inference; + builder(&cpu_inference); + if (!cpu_inference) { + std::cerr << "Failed to build CPU inference." << std::endl; + return -1; + } + auto status = cpu_inference->AllocateTensors(); + if (status != kTfLiteOk) { + std::cerr << "Failed to AllocateTensors for CPU inference." << std::endl; + return -1; + } + FillInputTensor(cpu_inference.get()); + status = cpu_inference->Invoke(); + if (status != kTfLiteOk) { + std::cerr << "Failed to Invoke CPU inference." << std::endl; + return -1; + } + + // GPU. + std::unique_ptr gpu_inference; + builder(&gpu_inference); + if (!gpu_inference) { + std::cerr << "Failed to build GPU inference." << std::endl; + return -1; + } + TfLiteGpuDelegateOptionsV2 options; + options.is_precision_loss_allowed = -1; + options.inference_preference = + TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER; + options.inference_priority1 = TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY; + options.inference_priority2 = TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE; + options.inference_priority3 = TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION; + auto* gpu_delegate = TfLiteGpuDelegateV2Create(&options); + status = gpu_inference->ModifyGraphWithDelegate(gpu_delegate); + if (status != kTfLiteOk) { + std::cerr << "ModifyGraphWithDelegate failed." << std::endl; + return -1; + } + FillInputTensor(gpu_inference.get()); + status = gpu_inference->Invoke(); + if (status != kTfLiteOk) { + std::cerr << "Failed to Invoke GPU inference." << std::endl; + return -1; + } + + CompareCPUGPUResults(cpu_inference.get(), gpu_inference.get(), 1e-4f); + + // CPU inference latency. + auto start = std::chrono::high_resolution_clock::now(); + cpu_inference->Invoke(); + auto end = std::chrono::high_resolution_clock::now(); + std::cout << "CPU time - " << (end - start).count() * 1e-6f << "ms" + << std::endl; + + // GPU inference latency. + start = std::chrono::high_resolution_clock::now(); + gpu_inference->Invoke(); + end = std::chrono::high_resolution_clock::now(); + std::cout << "GPU time(CPU->GPU->CPU) - " << (end - start).count() * 1e-6f + << "ms" << std::endl; + + TfLiteGpuDelegateV2Delete(gpu_delegate); + return EXIT_SUCCESS; +} diff --git a/tensorflow/lite/delegates/gpu/delegate.cc b/tensorflow/lite/delegates/gpu/delegate.cc index 38e60753c59..0f2d9811633 100644 --- a/tensorflow/lite/delegates/gpu/delegate.cc +++ b/tensorflow/lite/delegates/gpu/delegate.cc @@ -34,10 +34,13 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/model_transformer.h" #include "tensorflow/lite/delegates/gpu/common/quantization_util.h" #include "tensorflow/lite/delegates/gpu/common/status.h" -#include "tensorflow/lite/delegates/gpu/gl/api2.h" #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h" #include "tensorflow/lite/minimal_logging.h" +#ifndef CL_DELEGATE_NO_GL +#include "tensorflow/lite/delegates/gpu/gl/api2.h" +#endif + namespace tflite { namespace gpu { namespace { @@ -315,6 +318,7 @@ class DelegateKernel { absl::Status InitializeOpenGlApi(GraphFloat32* graph, std::unique_ptr* builder) { +#ifndef CL_DELEGATE_NO_GL gl::InferenceEnvironmentOptions env_options; gl::InferenceEnvironmentProperties properties; RETURN_IF_ERROR( @@ -330,13 +334,16 @@ class DelegateKernel { enforce_same_thread_ = true; TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO, "Initialized OpenGL-based API."); +#endif return absl::OkStatus(); } // The Delegate instance that's shared across all DelegateKernel instances. Delegate* const delegate_; // doesn't own the memory. std::unique_ptr cl_environment_; +#ifndef CL_DELEGATE_NO_GL std::unique_ptr gl_environment_; +#endif std::unique_ptr runner_; std::vector input_indices_; std::vector output_indices_; From fa85309fb533988e8181becff5eee5644c1b72e3 Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Mon, 20 Jul 2020 13:17:25 -0700 Subject: [PATCH 0852/2522] Implement async Python stack trace error reporting. PiperOrigin-RevId: 322211018 Change-Id: I0d02e98748cc79cca04dc71fc828990548fce70f --- tensorflow/c/eager/BUILD | 1 + .../c/eager/immediate_execution_operation.h | 7 ++ tensorflow/core/common_runtime/eager/BUILD | 1 + .../common_runtime/eager/eager_operation.cc | 1 + .../common_runtime/eager/eager_operation.h | 10 +++ .../core/common_runtime/eager/execute.cc | 2 +- .../core/common_runtime/eager/execute_node.h | 13 +++- tensorflow/core/platform/errors.h | 4 +- tensorflow/core/platform/status.cc | 9 ++- tensorflow/core/platform/status.h | 23 ++++++- tensorflow/core/util/BUILD | 18 +++++ tensorflow/core/util/abstract_stack_trace.h | 44 ++++++++++++ tensorflow/python/BUILD | 4 +- tensorflow/python/eager/BUILD | 3 + tensorflow/python/eager/ops_test.py | 19 +++++ tensorflow/python/eager/pywrap_tfe_src.cc | 69 ++++++++++++++++--- tensorflow/python/util/stack_trace.cc | 36 +++++++--- tensorflow/python/util/stack_trace.h | 55 +++++++++++++-- 18 files changed, 288 insertions(+), 31 deletions(-) create mode 100644 tensorflow/core/util/abstract_stack_trace.h diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index f56f8ad0a4b..0f728f1ebc3 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -308,6 +308,7 @@ cc_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", + "//tensorflow/core/util:abstract_stack_trace", "@com_google_absl//absl/types:span", ], ) diff --git a/tensorflow/c/eager/immediate_execution_operation.h b/tensorflow/c/eager/immediate_execution_operation.h index 31a75c5b8c7..8e22fb2d8b5 100644 --- a/tensorflow/c/eager/immediate_execution_operation.h +++ b/tensorflow/c/eager/immediate_execution_operation.h @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/platform/casts.h" #include "tensorflow/core/platform/status.h" +#include "tensorflow/core/util/abstract_stack_trace.h" struct TFE_Op; @@ -44,6 +45,12 @@ class ImmediateExecutionOperation : public AbstractOperation { // Experimental virtual Status SetUseXla(bool enable) = 0; + // Set stack trace to be used for potential async error reporting. + virtual void SetStackTrace(AbstractStackTrace stack_trace) = 0; + + // Returns the stack trace set by `SetStackTrace` if exists. + virtual absl::optional GetStackTrace() = 0; + // For LLVM style RTTI. static bool classof(const AbstractOperation* ptr) { return ptr->getKind() == kEager || ptr->getKind() == kTfrt; diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD index 003a4e5996f..b4c905f220e 100644 --- a/tensorflow/core/common_runtime/eager/BUILD +++ b/tensorflow/core/common_runtime/eager/BUILD @@ -163,6 +163,7 @@ tf_cuda_library( "//tensorflow/core:protos_all_cc", "//tensorflow/core:lib", "//tensorflow/core/platform:platform_port", + "//tensorflow/core/util:abstract_stack_trace", ] + select({ "//tensorflow:android": [ "//tensorflow/core:portable_tensorflow_lib_lite", diff --git a/tensorflow/core/common_runtime/eager/eager_operation.cc b/tensorflow/core/common_runtime/eager/eager_operation.cc index 47610629479..6dbc342c1bd 100644 --- a/tensorflow/core/common_runtime/eager/eager_operation.cc +++ b/tensorflow/core/common_runtime/eager/eager_operation.cc @@ -306,6 +306,7 @@ Status EagerOperation::Reset( } attrs_.Reset(op); use_xla_ = false; + stack_trace_.reset(); is_function_ = is_function; cancellation_manager_ = nullptr; executor_ = executor ? executor : &ctx_.Executor(); diff --git a/tensorflow/core/common_runtime/eager/eager_operation.h b/tensorflow/core/common_runtime/eager/eager_operation.h index dad578ba9f0..9fc35a18a7f 100644 --- a/tensorflow/core/common_runtime/eager/eager_operation.h +++ b/tensorflow/core/common_runtime/eager/eager_operation.h @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/core/framework/cancellation.h" #include "tensorflow/core/framework/device_attributes.pb.h" #include "tensorflow/core/framework/op_def.pb.h" +#include "tensorflow/core/util/abstract_stack_trace.h" #include "tensorflow/core/util/device_name_utils.h" namespace tensorflow { @@ -120,6 +121,14 @@ class EagerOperation : public ImmediateExecutionOperation { Status SetUseXla(bool enable) override; + void SetStackTrace(AbstractStackTrace stack_trace) override { + stack_trace_ = stack_trace; + } + + absl::optional GetStackTrace() override { + return stack_trace_; + } + Status Reset(const char* op, const char* device_name, bool remote, EagerExecutor* executor, const absl::optional @@ -218,6 +227,7 @@ class EagerOperation : public ImmediateExecutionOperation { VariantDevice device_; bool use_xla_ = false; + absl::optional stack_trace_; bool is_function_; // Conceptually const, but can't be because of Reset bool colocation_exempt_; CancellationManager* cancellation_manager_ = nullptr; // Not owned. diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc index 71d781e5d3d..fec31da703e 100644 --- a/tensorflow/core/common_runtime/eager/execute.cc +++ b/tensorflow/core/common_runtime/eager/execute.cc @@ -634,7 +634,7 @@ Status EagerLocalExecute(EagerOperation* op, TensorHandle** retvals, auto node = absl::make_unique( &ctx, op->Inputs(), op->remote_func_params(), std::move(kernel), graph_collector, op->GetCancellationManager(), - absl::Span(retvals, num_outputs)); + absl::Span(retvals, num_outputs), op->GetStackTrace()); // Release the inputs from the eager operation since the AsyncExecuteNode // would have taken ownership. This allows the inputs to be forwarded if // possible. diff --git a/tensorflow/core/common_runtime/eager/execute_node.h b/tensorflow/core/common_runtime/eager/execute_node.h index 7924471066e..6d11ecbf7a4 100644 --- a/tensorflow/core/common_runtime/eager/execute_node.h +++ b/tensorflow/core/common_runtime/eager/execute_node.h @@ -150,14 +150,16 @@ class AsyncExecuteNode : public EagerNode { core::RefCountPtr kernel, GraphCollector* graph_collector, CancellationManager* cancellation_manager, - absl::Span retvals) + absl::Span retvals, + absl::optional stack_trace) : EagerNode(), ctx_(ctx), inputs_(inputs), remote_func_params_(remote_func_params), kernel_(std::move(kernel)), graph_collector_(graph_collector), - cancellation_manager_(cancellation_manager) { + cancellation_manager_(cancellation_manager), + stack_trace_(stack_trace) { // Copy the output handles, since the container for them might get // destroyed. for (auto handle : retvals) { @@ -194,10 +196,14 @@ class AsyncExecuteNode : public EagerNode { } ++i; } - const Status status = EagerKernelExecute( + Status status = EagerKernelExecute( ctx_, inputs_, remote_func_params_, kernel_, graph_collector_, cancellation_manager_, absl::MakeSpan(retvals_)); if (!status.ok()) { + if (stack_trace_.has_value()) { + status = Status(status.code(), status.error_message(), + stack_trace_->ToStackFrames()); + } Abort(status); return status; } @@ -227,6 +233,7 @@ class AsyncExecuteNode : public EagerNode { core::RefCountPtr kernel_; GraphCollector* graph_collector_; CancellationManager* const cancellation_manager_; + absl::optional stack_trace_; absl::InlinedVector retvals_; }; diff --git a/tensorflow/core/platform/errors.h b/tensorflow/core/platform/errors.h index 3f1ff477655..55af45a4c24 100644 --- a/tensorflow/core/platform/errors.h +++ b/tensorflow/core/platform/errors.h @@ -62,9 +62,11 @@ inline const strings::AlphaNum& PrepareForStrCat(const strings::AlphaNum& a) { // to be several layers of additional context. template void AppendToMessage(::tensorflow::Status* status, Args... args) { + std::vector stack_trace = status->stack_trace(); *status = ::tensorflow::Status( status->code(), - ::tensorflow::strings::StrCat(status->error_message(), "\n\t", args...)); + ::tensorflow::strings::StrCat(status->error_message(), "\n\t", args...), + std::move(stack_trace)); } // For propagating errors when calling a function. diff --git a/tensorflow/core/platform/status.cc b/tensorflow/core/platform/status.cc index c85527f27ad..04f74d024ca 100644 --- a/tensorflow/core/platform/status.cc +++ b/tensorflow/core/platform/status.cc @@ -89,11 +89,13 @@ class StatusLogSink : public TFLogSink { } // namespace -Status::Status(tensorflow::error::Code code, StringPiece msg) { +Status::Status(tensorflow::error::Code code, tensorflow::StringPiece msg, + std::vector&& stack_trace) { assert(code != tensorflow::error::OK); state_ = std::unique_ptr(new State); state_->code = code; state_->msg = string(msg); + state_->stack_trace = std::move(stack_trace); VLOG(5) << "Generated non-OK status: \"" << *this << "\". " << CurrentStackTrace(); } @@ -117,6 +119,11 @@ const string& Status::empty_string() { return *empty; } +const std::vector& Status::empty_stack_trace() { + static std::vector* empty = new std::vector(); + return *empty; +} + string error_name(error::Code code) { switch (code) { case tensorflow::error::OK: diff --git a/tensorflow/core/platform/status.h b/tensorflow/core/platform/status.h index 5ee93a179db..fc570caf6b1 100644 --- a/tensorflow/core/platform/status.h +++ b/tensorflow/core/platform/status.h @@ -29,6 +29,13 @@ limitations under the License. namespace tensorflow { +// A struct representing a frame in a stack trace. +struct StackFrame { + std::string file_name; + int line_number; + std::string function_name; +}; + #if defined(__clang__) // Only clang supports warn_unused_result as a type annotation. class TF_MUST_USE_RESULT Status; @@ -43,7 +50,15 @@ class Status { /// \brief Create a status with the specified error code and msg as a /// human-readable string containing more detailed information. - Status(tensorflow::error::Code code, tensorflow::StringPiece msg); + Status(tensorflow::error::Code code, tensorflow::StringPiece msg) + : Status(code, msg, {}) {} + + /// \brief Create a status with the specified error code, msg, and stack trace + /// as a human-readable string containing more detailed information. +#ifndef SWIG + Status(tensorflow::error::Code code, tensorflow::StringPiece msg, + std::vector&& stack_trace); +#endif /// Copy the specified status. Status(const Status& s); @@ -66,6 +81,10 @@ class Status { return ok() ? empty_string() : state_->msg; } + const std::vector& stack_trace() const { + return ok() ? empty_stack_trace() : state_->stack_trace; + } + bool operator==(const Status& x) const; bool operator!=(const Status& x) const; @@ -91,9 +110,11 @@ class Status { private: static const std::string& empty_string(); + static const std::vector& empty_stack_trace(); struct State { tensorflow::error::Code code; std::string msg; + std::vector stack_trace; }; // OK status has a `NULL` state_. Otherwise, `state_` points to // a `State` structure containing the error code and message(s) diff --git a/tensorflow/core/util/BUILD b/tensorflow/core/util/BUILD index 962beb55e05..78757bed13e 100644 --- a/tensorflow/core/util/BUILD +++ b/tensorflow/core/util/BUILD @@ -61,6 +61,7 @@ filegroup( filegroup( name = "mobile_srcs_only_runtime", srcs = [ + "abstract_stack_trace.h", "batch_util.cc", "batch_util.h", "bcast.cc", @@ -313,6 +314,7 @@ filegroup( filegroup( name = "framework_srcs", srcs = [ + "abstract_stack_trace.h", "activation_mode.h", "batch_util.h", "bcast.h", @@ -437,6 +439,22 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "abstract_stack_trace", + hdrs = ["abstract_stack_trace.h"], + visibility = [ + "//tensorflow/c/eager:__pkg__", + "//tensorflow/core:__pkg__", + "//tensorflow/core/common_runtime/eager:__pkg__", + "//tensorflow/core/platform:__pkg__", + "//tensorflow/python:__pkg__", + "//tensorflow/python/eager:__pkg__", + ], + deps = [ + "//tensorflow/core/platform:status", + ], +) + tf_cuda_library( name = "gpu_cuda_alias", hdrs = ["gpu_cuda_alias.h"], diff --git a/tensorflow/core/util/abstract_stack_trace.h b/tensorflow/core/util/abstract_stack_trace.h new file mode 100644 index 00000000000..442adc6f380 --- /dev/null +++ b/tensorflow/core/util/abstract_stack_trace.h @@ -0,0 +1,44 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_UTIL_ABSTRACT_STACK_TRACE_H_ +#define TENSORFLOW_CORE_UTIL_ABSTRACT_STACK_TRACE_H_ + +#include + +#include "tensorflow/core/platform/status.h" + +namespace tensorflow { + +// Language agnostic stack trace class. It only saves an id, and language +// clients are responsible for managing the actual stack trace objects. +class AbstractStackTrace { + public: + AbstractStackTrace(int id, std::vector (*to_stack_frames)(int)) + : id_(id), to_stack_frames_(to_stack_frames) {} + + // Returns stack trace as a vector of `StackFrame`s. + std::vector ToStackFrames() const { + return to_stack_frames_(id_); + } + + private: + int id_; + std::vector (*to_stack_frames_)(int); +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_UTIL_ABSTRACT_STACK_TRACE_H_ diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 7f40b0dac95..09d22aa203d 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -5664,9 +5664,11 @@ cc_library( hdrs = ["util/stack_trace.h"], deps = [ ":py_util", + "//tensorflow/core/platform:str_util", + "//tensorflow/core/platform:stringpiece", + "//tensorflow/core/util:abstract_stack_trace", "//third_party/python_runtime:headers", # buildcleaner: keep "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/synchronization", "@com_google_absl//absl/types:optional", ], ) diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index 65c99b8c6e5..a96d2322b88 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -56,13 +56,16 @@ cc_library( "//tensorflow/core/platform:logging", "//tensorflow/core/platform:types", "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/core/util:abstract_stack_trace", "//tensorflow/python:cpp_python_util", "//tensorflow/python:ndarray_tensor", "//tensorflow/python:ndarray_tensor_bridge", "//tensorflow/python:numpy_lib", "//tensorflow/python:py_exception_registry", "//tensorflow/python:py_seq_tensor", + "//tensorflow/python:py_util", "//tensorflow/python:safe_ptr", + "//tensorflow/python:stack_trace", "//third_party/py/numpy:headers", "//third_party/python_runtime:headers", "@com_google_absl//absl/container:flat_hash_map", diff --git a/tensorflow/python/eager/ops_test.py b/tensorflow/python/eager/ops_test.py index b996d0dd0c4..a859f4edf01 100644 --- a/tensorflow/python/eager/ops_test.py +++ b/tensorflow/python/eager/ops_test.py @@ -30,6 +30,7 @@ from tensorflow.python.eager import test from tensorflow.python.framework import config from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util @@ -480,6 +481,24 @@ class OpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): self.assertIs(weak_x(), None) self.assertIs(weak_y(), None) + def testAsyncExceptionStackTrace(self): + config.set_synchronous_execution(False) + + def exception_originated_from_here(): + # Invalid shapes for matmul. + return math_ops.matmul([[1]], [[2], [3]]) + + # In sync mode, an exception would have been raised here but since this is + # in async, the exception will be raised next. + x = exception_originated_from_here() + + with self.assertRaisesRegex(errors_impl.InvalidArgumentError, + 'in exception_originated_from_here'): + x.numpy() + + context.async_clear_error() + config.set_synchronous_execution(True) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index a4c06f8e72f..dcaaafeda5c 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -41,10 +41,13 @@ limitations under the License. #include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/core/util/abstract_stack_trace.h" #include "tensorflow/python/eager/pywrap_gradient_exclusions.h" #include "tensorflow/python/eager/pywrap_tensor.h" #include "tensorflow/python/eager/pywrap_tfe.h" +#include "tensorflow/python/lib/core/py_util.h" #include "tensorflow/python/lib/core/safe_ptr.h" +#include "tensorflow/python/util/stack_trace.h" #include "tensorflow/python/util/util.h" using tensorflow::string; @@ -854,10 +857,14 @@ void TFE_Py_ExecuteCancelable(TFE_Context* ctx, const char* device_name, TF_Status* out_status) { tensorflow::profiler::TraceMe activity( "TFE_Py_ExecuteCancelable", tensorflow::profiler::TraceMeLevel::kInfo); + TFE_Op* op = GetOp(ctx, op_name, device_name, out_status); + auto cleaner = tensorflow::gtl::MakeCleanup([ctx, op] { ReturnOp(ctx, op); }); if (!out_status->status.ok()) return; + tensorflow::unwrap(op)->SetStackTrace(tensorflow::GetStackTrace()); + for (int i = 0; i < inputs->size() && out_status->status.ok(); ++i) { TFE_OpAddInput(op, inputs->at(i), out_status); } @@ -970,14 +977,54 @@ void RaiseFallbackException(const char* message) { .data()); } +// Format and return `status`' error message with the attached stack trace if +// available. `status` must have an error. +std::string FormatErrorStatusStackTrace(const tensorflow::Status& status) { + tensorflow::DCheckPyGilState(); + DCHECK(!status.ok()); + + if (status.stack_trace().empty()) return status.error_message(); + + const std::vector& stack_trace = status.stack_trace(); + + PyObject* linecache = PyImport_ImportModule("linecache"); + PyObject* getline = + PyObject_GetAttr(linecache, PyUnicode_FromString("getline")); + DCHECK(getline); + + std::ostringstream result; + result << "Exception originated from\n\n"; + + for (const tensorflow::StackFrame& stack_frame : stack_trace) { + PyObject* line_str_obj = PyObject_CallFunction( + getline, const_cast("si"), stack_frame.file_name.c_str(), + stack_frame.line_number); + tensorflow::StringPiece line_str = TFE_GetPythonString(line_str_obj); + tensorflow::str_util::RemoveWhitespaceContext(&line_str); + result << " File \"" << stack_frame.file_name << "\", line " + << stack_frame.line_number << ", in " << stack_frame.function_name + << '\n'; + + if (!line_str.empty()) result << " " << line_str << '\n'; + Py_XDECREF(line_str_obj); + } + + Py_DecRef(getline); + Py_DecRef(linecache); + + result << '\n' << status.error_message(); + return result.str(); +} + int MaybeRaiseExceptionFromTFStatus(TF_Status* status, PyObject* exception) { if (status->status.ok()) return 0; const char* msg = TF_Message(status); if (exception == nullptr) { tensorflow::mutex_lock l(exception_class_mutex); if (exception_class != nullptr) { - tensorflow::Safe_PyObjectPtr val( - Py_BuildValue("si", msg, TF_GetCode(status))); + tensorflow::Safe_PyObjectPtr val(Py_BuildValue( + "si", FormatErrorStatusStackTrace(status->status).c_str(), + TF_GetCode(status))); if (PyErr_Occurred()) { // NOTE: This hides the actual error (i.e. the reason `status` was not // TF_OK), but there is nothing we can do at this point since we can't @@ -1003,7 +1050,8 @@ int MaybeRaiseExceptionFromStatus(const tensorflow::Status& status, if (exception == nullptr) { tensorflow::mutex_lock l(exception_class_mutex); if (exception_class != nullptr) { - tensorflow::Safe_PyObjectPtr val(Py_BuildValue("si", msg, status.code())); + tensorflow::Safe_PyObjectPtr val(Py_BuildValue( + "si", FormatErrorStatusStackTrace(status).c_str(), status.code())); PyErr_SetObject(exception_class, val.get()); return -1; } else { @@ -3527,6 +3575,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject* args) { } TFE_Op* op = GetOp(ctx, op_name, op_exec_info.device_name, status); + tensorflow::unwrap(op)->SetStackTrace(tensorflow::GetStackTrace()); + auto cleaner = tensorflow::gtl::MakeCleanup([status, ctx, op] { ReturnStatus(status); ReturnOp(ctx, op); @@ -3746,11 +3796,14 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject* args) { if (!status->status.ok()) { // Augment the status with the op_name for easier debugging similar to // TFE_Py_Execute. - TF_SetStatus(status, TF_GetCode(status), - tensorflow::strings::StrCat( - TF_Message(status), - " [Op:", TFE_GetPythonString(op_exec_info.op_name), "]") - .c_str()); + std::vector stack_trace = + status->status.stack_trace(); + status->status = tensorflow::Status( + status->status.code(), + tensorflow::strings::StrCat( + TF_Message(status), + " [Op:", TFE_GetPythonString(op_exec_info.op_name), "]"), + std::move(stack_trace)); MaybeRaiseExceptionFromTFStatus(status, nullptr); return nullptr; diff --git a/tensorflow/python/util/stack_trace.cc b/tensorflow/python/util/stack_trace.cc index cf574f6f292..04b427fd67b 100644 --- a/tensorflow/python/util/stack_trace.cc +++ b/tensorflow/python/util/stack_trace.cc @@ -15,6 +15,9 @@ limitations under the License. #include "tensorflow/python/util/stack_trace.h" +#include "tensorflow/core/platform/str_util.h" +#include "tensorflow/core/platform/stringpiece.h" + namespace { // Returns C string from a Python string object. Handles Python2/3 strings. @@ -31,22 +34,33 @@ const char* GetPythonString(PyObject* o) { return PyBytes_AsString(o); #endif } + } // namespace namespace tensorflow { -std::string StackTrace::ToString() const { - DCheckPyGilState(); - std::ostringstream result; +std::vector StackTrace::ToStackFrames() const { + std::vector result; + result.reserve(size_); + for (int i = size_ - 1; i >= 0; --i) { - result << " File \"" << PyUnicode_AsUTF8(code_objs_[i]->co_filename) - << "\", line " - << PyCode_Addr2Line(code_objs_[i], last_instructions_[i]) << ", in " - << GetPythonString(code_objs_[i]->co_name) - << "\n \n"; - // TODO(kkb): Add source code line. See tf_stack.cc's - // FrameSummary::line() function. + const char* file_name = GetPythonString(code_objs_[i]->co_filename); + const int line_number = + PyCode_Addr2Line(code_objs_[i], last_instructions_[i]); + result.emplace_back(StackFrame{file_name, line_number, + GetPythonString(code_objs_[i]->co_name)}); } - return result.str(); + + return result; } + +StackTrace* StackTraceManager::Get(int id) { + DCheckPyGilState(); + if (next_id_ - id > kStackTraceCircularBufferSize) return nullptr; + + return &stack_traces_[id & (kStackTraceCircularBufferSize - 1)]; +} + +StackTraceManager* const stack_trace_manager = new StackTraceManager(); + } // namespace tensorflow diff --git a/tensorflow/python/util/stack_trace.h b/tensorflow/python/util/stack_trace.h index 0b9a737bf7e..732d40c92d2 100644 --- a/tensorflow/python/util/stack_trace.h +++ b/tensorflow/python/util/stack_trace.h @@ -25,6 +25,8 @@ limitations under the License. #include "absl/base/attributes.h" #include "absl/base/optimization.h" +#include "absl/types/optional.h" +#include "tensorflow/core/util/abstract_stack_trace.h" #include "tensorflow/python/lib/core/py_util.h" namespace tensorflow { @@ -82,10 +84,8 @@ class StackTrace final { return *this; } - // Returns string representation of the captured stack trace. - std::string ToString() const; - - // TODO(kkb): Implement structured stack trace object getter. + // Returns a structured representation of the captured stack trace. + std::vector ToStackFrames() const; private: std::array code_objs_; @@ -103,6 +103,53 @@ class StackTrace final { StackTrace& operator=(const StackTrace&) = delete; }; +// A class that manages Python stack traces in a circular buffer. Users can +// insert stack trace entries and retrive them by ids. +class StackTraceManager { + public: + static constexpr int kStackTraceCircularBufferSize = 1024; + + // Captures the current Python stack trace and returns an id. + // Python GIL must be acquired beforehand. + ABSL_MUST_USE_RESULT + ABSL_ATTRIBUTE_HOT + int Capture() { + DCheckPyGilState(); + const int id = next_id_++; + const int index = id & (kStackTraceCircularBufferSize - 1); + stack_traces_[index] = StackTrace::Capture(); + return id; + } + + // Retrieve captured Python stack trace by id. Returns `nullptr` if the + // requested stack trace is evicted from the circular buffer. + // Python GIL must be acquired beforehand. + ABSL_MUST_USE_RESULT + StackTrace* Get(int id); + + private: + int next_id_ = 0; + std::array stack_traces_; +}; + +// Singleton StackTraceManager. +extern StackTraceManager* const stack_trace_manager; + +// Returns Python stack trace object that can be converted to string. +// Note that the actual stack trace is kept in a circular buffer for string +// conversion could fail if it's evicted before. +// Python GIL must be acquired beforehand. +inline AbstractStackTrace GetStackTrace() { + DCheckPyGilState(); + return AbstractStackTrace(stack_trace_manager->Capture(), [](int id) { + PyGILState_STATE gstate = PyGILState_Ensure(); + std::vector result = + stack_trace_manager->Get(id)->ToStackFrames(); + PyGILState_Release(gstate); + return result; + }); +} + } // namespace tensorflow #endif // TENSORFLOW_PYTHON_UTIL_STACK_TRACE_H_ From 47c1aeb1aac754a0a6c9797cfc1248c3bb302421 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Mon, 20 Jul 2020 20:25:24 +0000 Subject: [PATCH 0853/2522] update tests --- .../python/kernel_tests/map_ops_test.py | 22 +++++++++---------- tensorflow/python/ops/map_ops.py | 1 - 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index 688f4907457..7fda6fb3efd 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -135,9 +135,8 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): l = map_ops.tensor_map_lookup(m, k, dtypes.float32) l *= 5 g = tape.gradient(l, v) - self.assertAllClose(g, 5.0) + self.assertAllClose(g, 5) - #TODO(kattian): Test alternating inserts and lookups def testMultipleInsertLookupGrad(self): with backprop.GradientTape(persistent=True) as tape: m = map_ops.empty_tensor_map() @@ -158,11 +157,11 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): l2 = map_ops.tensor_map_lookup(m, k2, v2.dtype) l3 = map_ops.tensor_map_lookup(m, k3, v3.dtype) g = tape.gradient(l * 5, v) - g2 = tape.gradient(l2 * 5, v2) - g3 = tape.gradient(l3 * 5, v3) + g2 = tape.gradient(l2 * 6, v2) + g3 = tape.gradient(l3 * 7, v3) self.assertAllClose(g, 5) - self.assertAllClose(g2, 5) - self.assertAllClose(g3, 5) + self.assertAllClose(g2, 6) + self.assertAllClose(g3, 7) def testSameKeyInsertLookupGrad(self): with backprop.GradientTape(persistent=True) as tape: @@ -180,7 +179,7 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): self.assertAllClose(g, array_ops.zeros_like(v)) self.assertAllClose(g2, 5) - def testSameKeyInsertLookupGrad2(self): + def testSameKeyAlternatingInsertLookupGrad(self): with backprop.GradientTape(persistent=True) as tape: m = map_ops.empty_tensor_map() k = constant_op.constant(1.0) @@ -192,13 +191,12 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): l = map_ops.tensor_map_lookup(m, k, v.dtype) g = tape.gradient(l * 5, v) self.assertAllClose(g, 5) - m = map_ops.tensor_map_insert(m, k, v2) l2 = map_ops.tensor_map_lookup(m, k, v2.dtype) - g2 = tape.gradient(l2 * 5, v2) - g3 = tape.gradient(l2 * 5, v) - self.assertAllClose(g2, 5) - self.assertAllClose(g3, array_ops.zeros_like(v)) + g2 = tape.gradient(l2 * 6, v) + g3 = tape.gradient(l2 * 7, v2) + self.assertAllClose(g2, array_ops.zeros_like(v)) + self.assertAllClose(g3, 7) if __name__ == '__main__': test.main() diff --git a/tensorflow/python/ops/map_ops.py b/tensorflow/python/ops/map_ops.py index f18623aa41a..443bb0b1934 100644 --- a/tensorflow/python/ops/map_ops.py +++ b/tensorflow/python/ops/map_ops.py @@ -24,7 +24,6 @@ from tensorflow.python.framework import ops from tensorflow.python.ops import gen_map_ops from tensorflow.python.ops import array_ops from tensorflow.python.ops.gen_map_ops import * -from tensorflow.python.framework import constant_op from tensorflow.python.util.lazy_loader import LazyLoader control_flow_ops = LazyLoader("control_flow_ops", globals(), From f6194bd42bb3326f2600f8f44c89775b9624caef Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 20 Jul 2020 13:19:31 -0700 Subject: [PATCH 0854/2522] Exclude "lib/Analysis/DevelopmentModeInlineAdvisor.cpp" from LLVM OSS build files to avoid circular dependency on TensorFlow PiperOrigin-RevId: 322211426 Change-Id: I8373ef75b13c1f2f0f62d6798979f0ab024a19d8 --- third_party/llvm/llvm.autogenerated.BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/llvm/llvm.autogenerated.BUILD b/third_party/llvm/llvm.autogenerated.BUILD index bade7ab37ff..b1b8d92a8a3 100644 --- a/third_party/llvm/llvm.autogenerated.BUILD +++ b/third_party/llvm/llvm.autogenerated.BUILD @@ -721,6 +721,7 @@ cc_library( "lib/Analysis/*.h", ], exclude = [ + "lib/Analysis/DevelopmentModeInlineAdvisor.cpp", "lib/Analysis/MLInlineAdvisor.cpp", "lib/Analysis/ReleaseModeModelRunner.cpp", "lib/Analysis/TFUtils.cpp", From 44d800d213c56ff234bd8a908b1296b85359a3e8 Mon Sep 17 00:00:00 2001 From: Jay Shi Date: Mon, 20 Jul 2020 13:20:59 -0700 Subject: [PATCH 0855/2522] [tf.data] Remove the unnecessary input for `Snapshot` function. PiperOrigin-RevId: 322211694 Change-Id: I3b342105de1321e31f83943e6d5682fa8b2366f5 --- tensorflow/core/framework/model.cc | 8 ++++---- tensorflow/core/framework/model.h | 3 +-- tensorflow/core/framework/model_test.cc | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc index 6dcaf8ecac2..bc72ca08034 100644 --- a/tensorflow/core/framework/model.cc +++ b/tensorflow/core/framework/model.cc @@ -972,9 +972,9 @@ double Node::OutputTime(absl::flat_hash_map* input_times, return output_times[long_name()]; } -std::shared_ptr Node::Snapshot(std::shared_ptr output) const { +std::shared_ptr Node::Snapshot() const { NodePairList node_pairs; - auto result = SnapshotHelper(output, &node_pairs); + auto result = SnapshotHelper(nullptr, &node_pairs); while (!node_pairs.empty()) { auto node_pair = node_pairs.front(); @@ -1346,7 +1346,7 @@ void Model::OptimizeGradientDescent(int64 cpu_budget, int64 ram_budget) { std::shared_ptr snapshot; { tf_shared_lock lock(mu_); - snapshot = output_->Snapshot(nullptr); + snapshot = output_->Snapshot(); } VLOG(2) << "Starting optimization of tunable parameters with GradientDescent"; auto parameters = CollectTunableParameters(snapshot); @@ -1422,7 +1422,7 @@ void Model::OptimizeHillClimb(int64 cpu_budget, int64 ram_budget) { std::shared_ptr snapshot; { tf_shared_lock lock(mu_); - snapshot = output_->Snapshot(nullptr); + snapshot = output_->Snapshot(); } VLOG(2) << "Starting optimization of tunable parameters with HillClimb"; const double processing_time = TotalProcessingTime(snapshot); diff --git a/tensorflow/core/framework/model.h b/tensorflow/core/framework/model.h index e8d78756192..5ddd64853a8 100644 --- a/tensorflow/core/framework/model.h +++ b/tensorflow/core/framework/model.h @@ -339,8 +339,7 @@ class Node { // // The purpose for this method is to allow the model optimization logic to // operate over immutable state while allowing concurrent model updates. - std::shared_ptr Snapshot(std::shared_ptr output) const - TF_LOCKS_EXCLUDED(mu_); + std::shared_ptr Snapshot() const TF_LOCKS_EXCLUDED(mu_); // Returns the per-element processing time spent in this node. double SelfProcessingTime() const TF_LOCKS_EXCLUDED(mu_); diff --git a/tensorflow/core/framework/model_test.cc b/tensorflow/core/framework/model_test.cc index 3cbe90b08f9..5a4d0da374c 100644 --- a/tensorflow/core/framework/model_test.cc +++ b/tensorflow/core/framework/model_test.cc @@ -756,7 +756,7 @@ TEST(SnapshotTest, Model) { cur_node = cur_node->inputs().front(); } - std::shared_ptr root_copy = root->Snapshot(nullptr); + std::shared_ptr root_copy = root->Snapshot(); cur_node = root; std::shared_ptr cur_node_copy = root_copy; From 24c4a6dd9096f836a63db0d70ee7acc01bd18fd8 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 20 Jul 2020 13:25:27 -0700 Subject: [PATCH 0856/2522] Open source distributed_tpu_rewrite_pass.cc and associated helper methods PiperOrigin-RevId: 322212574 Change-Id: Idd8c5ac1cc28daa546937085730595667a607b5c --- tensorflow/core/tpu/graph_rewrite/BUILD | 118 - .../core/tpu/graph_rewrite/cond_builder.cc | 83 - .../core/tpu/graph_rewrite/cond_builder.h | 74 - .../distributed_tpu_rewrite_pass.cc | 3926 ----------------- .../distributed_tpu_rewrite_pass.h | 589 --- .../distributed_tpu_rewrite_pass_internal.cc | 45 - .../distributed_tpu_rewrite_pass_internal.h | 38 - .../host_training_loop_optimization_util.cc | 629 --- .../host_training_loop_optimization_util.h | 80 - .../incomplete_nodedef_builder.cc | 73 - .../incomplete_nodedef_builder.h | 58 - .../tpu_rewrite_pass_registration.cc | 4 +- tensorflow/stream_executor/tpu/BUILD | 1 - tensorflow/stream_executor/tpu/tpu_topology.h | 1 - 14 files changed, 1 insertion(+), 5718 deletions(-) delete mode 100644 tensorflow/core/tpu/graph_rewrite/cond_builder.cc delete mode 100644 tensorflow/core/tpu/graph_rewrite/cond_builder.h delete mode 100644 tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc delete mode 100644 tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h delete mode 100644 tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.cc delete mode 100644 tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.h delete mode 100644 tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.cc delete mode 100644 tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.h delete mode 100644 tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.cc delete mode 100644 tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.h diff --git a/tensorflow/core/tpu/graph_rewrite/BUILD b/tensorflow/core/tpu/graph_rewrite/BUILD index bffb44c1b97..69238456d57 100644 --- a/tensorflow/core/tpu/graph_rewrite/BUILD +++ b/tensorflow/core/tpu/graph_rewrite/BUILD @@ -13,7 +13,6 @@ cc_library( srcs = ["tpu_rewrite_pass_registration.cc"], deps = [ ":distributed_tpu_configuration_rewrite_pass", - ":distributed_tpu_rewrite_pass", ":encapsulate_tpu_computations_pass", ":variable_merger_pass", "//tensorflow/core:core_cpu", @@ -102,120 +101,3 @@ cc_library( "@com_google_absl//absl/strings", ], ) - -cc_library( - name = "distributed_tpu_rewrite_pass_internal", - srcs = ["distributed_tpu_rewrite_pass_internal.cc"], - hdrs = ["distributed_tpu_rewrite_pass_internal.h"], - deps = [ - "//tensorflow/core:framework", - "@com_google_absl//absl/random", - ], -) - -cc_library( - name = "distributed_tpu_rewrite_pass", - srcs = [ - "distributed_tpu_rewrite_pass.cc", - ], - hdrs = [ - "distributed_tpu_rewrite_pass.h", - ], - deps = [ - ":cond_builder", - ":distributed_tpu_rewrite_helpers", - ":distributed_tpu_rewrite_pass_internal", - ":host_training_loop_optimization_util", - ":incomplete_nodedef_builder", - "//tensorflow/compiler/jit:encapsulate_util", - "//tensorflow/compiler/jit:shape_inference", - "//tensorflow/compiler/tf2xla:resource_operation_table", - "//tensorflow/compiler/tf2xla:sharding_util", - "//tensorflow/compiler/tf2xla:side_effect_util", - "//tensorflow/compiler/tf2xla:tf2xla_util", - "//tensorflow/compiler/tf2xla:xla_compiler", - "//tensorflow/compiler/xla:array3d", - "//tensorflow/compiler/xla:array4d", - "//tensorflow/compiler/xla:xla_proto_cc", - "//tensorflow/compiler/xla/client:sharding_builder", - "//tensorflow/compiler/xla/service:computation_placer", - "//tensorflow/core:framework", - "//tensorflow/core:graph", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:session_options", - "//tensorflow/core/common_runtime:function", - "//tensorflow/core/common_runtime:graph_constructor", - "//tensorflow/core/common_runtime:lower_function_call_op", - "//tensorflow/core/common_runtime:lower_functional_ops", - "//tensorflow/core/common_runtime:lower_if_op", - "//tensorflow/core/common_runtime:lower_while_op", - "//tensorflow/core/common_runtime:optimization_registry", - "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", - "//tensorflow/core/protobuf/tpu:dynamic_padding_proto_cc", - "//tensorflow/core/protobuf/tpu:topology_proto_cc", - "//tensorflow/core/tpu:tpu_compile_interface", - "//tensorflow/core/tpu:tpu_defs", - "//tensorflow/core/tpu/kernels:tpu_util_c_api_hdrs", - "//tensorflow/stream_executor/tpu:tpu_platform_interface", - "//tensorflow/stream_executor/tpu:tpu_topology_external", - "@com_google_absl//absl/algorithm:container", - "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/container:node_hash_map", - "@com_google_absl//absl/types:span", - ], -) - -cc_library( - name = "incomplete_nodedef_builder", - srcs = ["incomplete_nodedef_builder.cc"], - hdrs = ["incomplete_nodedef_builder.h"], - deps = [ - "//tensorflow/compiler/xla:status_macros", - "//tensorflow/core:core_cpu", - "//tensorflow/core:core_cpu_lib", - "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", - "//tensorflow/core:lib", - ], -) - -cc_library( - name = "cond_builder", - srcs = ["cond_builder.cc"], - hdrs = ["cond_builder.h"], - deps = [ - ":incomplete_nodedef_builder", - "//tensorflow/compiler/xla:status_macros", - "//tensorflow/core:core_cpu", - "//tensorflow/core:core_cpu_lib", - "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", - "//tensorflow/core:lib", - ], -) - -cc_library( - name = "host_training_loop_optimization_util", - srcs = [ - "host_training_loop_optimization_util.cc", - ], - hdrs = [ - "host_training_loop_optimization_util.h", - ], - visibility = ["//visibility:public"], - deps = [ - ":distributed_tpu_rewrite_pass_internal", - "//tensorflow/compiler/tf2xla:functionalize_control_flow_util", - "//tensorflow/compiler/tf2xla:tf2xla_util", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework_internal", - "//tensorflow/core:graph", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", - "@com_google_absl//absl/container:flat_hash_set", - "@com_google_absl//absl/container:node_hash_set", - "@com_google_absl//absl/types:optional", - ], -) diff --git a/tensorflow/core/tpu/graph_rewrite/cond_builder.cc b/tensorflow/core/tpu/graph_rewrite/cond_builder.cc deleted file mode 100644 index e16ae08aec3..00000000000 --- a/tensorflow/core/tpu/graph_rewrite/cond_builder.cc +++ /dev/null @@ -1,83 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/tpu/graph_rewrite/cond_builder.h" - -#include "tensorflow/compiler/xla/status_macros.h" -#include "tensorflow/core/common_runtime/function.h" -#include "tensorflow/core/framework/node_def_builder.h" -#include "tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.h" - -namespace tensorflow { - -CondBuilder::CondBuilder(string name, string device, const NodeDebugInfo& debug, - Graph* graph) - : graph_(graph), name_(std::move(name)), device_(std::move(device)) { - auto new_name = [graph, this](string suffix) { - return graph->NewName(strings::StrCat(name_, "/", suffix)); - }; - TF_CHECK_OK( - IncompleteNodeDefBuilder::Identity(new_name("pred"), DT_BOOL, debug) - .Device(device_) - .Build(graph_, &pred_)); - Node* switch_pred; - TF_CHECK_OK( - IncompleteNodeDefBuilder::Switch(new_name("switch_pred"), DT_BOOL, debug) - .Device(device_) - .Build(graph_, &switch_pred)); - graph_->AddEdge(pred(), 0, switch_pred, 0); - graph_->AddEdge(pred(), 0, switch_pred, 1); - TF_CHECK_OK( - IncompleteNodeDefBuilder::Identity(new_name("switch_f"), DT_BOOL, debug) - .Device(device_) - .Build(graph_, &switch_f_)); - TF_CHECK_OK( - IncompleteNodeDefBuilder::Identity(new_name("switch_t"), DT_BOOL, debug) - .Device(device_) - .Build(graph_, &switch_t_)); - graph_->AddEdge(switch_pred, kElseBranch, switch_f_, 0); - graph_->AddEdge(switch_pred, kThenBranch, switch_t_, 0); - Node* merge_pred; - TF_CHECK_OK(IncompleteNodeDefBuilder::Merge(new_name("merge_pred"), DT_BOOL, - debug, /*n=*/2) - .Device(device_) - .Build(graph_, &merge_pred)); - graph_->AddEdge(switch_f_, 0, merge_pred, kElseBranch); - graph_->AddEdge(switch_t_, 0, merge_pred, kThenBranch); - // Note: when additional return values are added then there should be a - // control dependency between those merge nodes and control_successor_ to - // ensure that it is control successor of conditional. - control_successor_ = merge_pred; -} - -Node* CondBuilder::pred() { return pred_; } - -Node* CondBuilder::switch_f() { return switch_f_; } - -Node* CondBuilder::switch_t() { return switch_t_; } - -Node* CondBuilder::control_successor() { return control_successor_; } - -Status CondBuilder::AddInput(const string& input_name, const DataType& type, - const string& device, const NodeDebugInfo& debug, - Node** input) { - auto b = IncompleteNodeDefBuilder::Switch( - graph_->NewName(strings::StrCat(name_, "/", input_name)), type, debug); - TF_RETURN_IF_ERROR(b.Device(device).Build(graph_, input)); - graph_->AddEdge(pred(), 0, *input, 1); - return Status::OK(); -} - -} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/cond_builder.h b/tensorflow/core/tpu/graph_rewrite/cond_builder.h deleted file mode 100644 index 29e264dfc0a..00000000000 --- a/tensorflow/core/tpu/graph_rewrite/cond_builder.h +++ /dev/null @@ -1,74 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CORE_TPU_GRAPH_REWRITE_COND_BUILDER_H_ -#define TENSORFLOW_CORE_TPU_GRAPH_REWRITE_COND_BUILDER_H_ - -#include - -#include "tensorflow/core/graph/graph.h" -#include "tensorflow/core/lib/core/status.h" - -namespace tensorflow { - -// Conditional builder. -// Convenience builder to make it easy to construct a conditional. E.g., -// Node* pred = ...; -// CondBuilder cb("cond", g); -// auto switch_var = cb.AddInput("var", DT_RESOURCE); -// g->AddEdge(pred, 0, cb.pred(), 0); -// Will create the nodes of a conditional that takes as input a resource -// variable ("var") as input and that switches on pred. -// -// This currently only handles the case needed by distributed_tpu_rewrite_pass -// and is not completely general. -class CondBuilder { - public: - enum Branch { kElseBranch = 0, kThenBranch = 1 }; - - CondBuilder(string name, string device, const NodeDebugInfo& debug, - Graph* graph); - - // Returns node corresponding to the predicate input. - Node* pred(); - - // Returns node corresponding to switch_f branch of predicate switch. - Node* switch_f(); - - // Returns node corresponding to switch_t branch of predicate switch. - Node* switch_t(); - - // Returns node corresponding to control successor. - Node* control_successor(); - - // Returns the Switch node to feed a value of the given type into the - // conditional. - Status AddInput(const string& input_name, const DataType& type, - const string& device, const NodeDebugInfo& debug, - Node** input); - - private: - Node* control_successor_; - Node* switch_f_; - Node* switch_t_; - Node* pred_; - Graph* const graph_; - const string name_; - const string device_; -}; - -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_TPU_GRAPH_REWRITE_COND_BUILDER_H_ diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc deleted file mode 100644 index 208cb8bd865..00000000000 --- a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc +++ /dev/null @@ -1,3926 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// Compilation for distributed TPU (TPU_REPLICATED_CORE devices). - -#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h" - -#include -#include - -#include "absl/algorithm/container.h" -#include "absl/container/flat_hash_map.h" -#include "tensorflow/compiler/jit/encapsulate_util.h" -#include "tensorflow/compiler/tf2xla/resource_operation_table.h" -#include "tensorflow/compiler/tf2xla/sharding_util.h" -#include "tensorflow/compiler/tf2xla/side_effect_util.h" -#include "tensorflow/compiler/tf2xla/tf2xla_util.h" -#include "tensorflow/compiler/xla/array3d.h" -#include "tensorflow/compiler/xla/array4d.h" -#include "tensorflow/compiler/xla/client/sharding_builder.h" -#include "tensorflow/compiler/xla/service/computation_placer.h" -#include "tensorflow/compiler/xla/xla.pb.h" -#include "tensorflow/core/common_runtime/function.h" -#include "tensorflow/core/common_runtime/graph_constructor.h" -#include "tensorflow/core/common_runtime/lower_function_call_op.h" -#include "tensorflow/core/common_runtime/lower_functional_ops.h" -#include "tensorflow/core/common_runtime/lower_if_op.h" -#include "tensorflow/core/common_runtime/lower_while_op.h" -#include "tensorflow/core/common_runtime/optimization_registry.h" -#include "tensorflow/core/framework/function.h" -#include "tensorflow/core/framework/graph_to_functiondef.h" -#include "tensorflow/core/framework/node_def_builder.h" -#include "tensorflow/core/framework/node_def_util.h" -#include "tensorflow/core/framework/partial_tensor_shape.h" -#include "tensorflow/core/framework/tensor.pb.h" -#include "tensorflow/core/framework/types.pb.h" -#include "tensorflow/core/framework/versions.pb.h" -#include "tensorflow/core/graph/algorithm.h" -#include "tensorflow/core/graph/graph.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/gtl/cleanup.h" -#include "tensorflow/core/lib/strings/proto_serialization.h" -#include "tensorflow/core/lib/strings/str_util.h" -#include "tensorflow/core/platform/fingerprint.h" -#include "tensorflow/core/protobuf/tpu/compile_metadata.pb.h" -#include "tensorflow/core/protobuf/tpu/dynamic_padding.pb.h" -#include "tensorflow/core/protobuf/tpu/topology.pb.h" -#include "tensorflow/core/public/session_options.h" -#include "tensorflow/core/tpu/graph_rewrite/cond_builder.h" -#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_helpers.h" -#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.h" -#include "tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.h" -#include "tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.h" -#include "tensorflow/core/tpu/kernels/tpu_util_c_api.h" -#include "tensorflow/core/tpu/tpu_compile_interface.h" -#include "tensorflow/core/tpu/tpu_defs.h" -#include "tensorflow/core/util/device_name_utils.h" -#include "tensorflow/core/util/dump_graph.h" -#include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" - -namespace tensorflow { - -namespace { - -// Device coordinates are defined as (x, y, z, core), thus resulting in a rank 4 -// topology. -constexpr int kTPUTopologyRank = 4; - -// An upper bound on how many cores may be present in the topology. -static constexpr int kTPUMaxTopologySize = 4096; - -// Attribute containing the serialized xla::OpSharding to be passed to the -// corresponding XLA HLO operation, which represents how a shape is distributed -// across logical cores, e.g., replication, single-device, or partitioning. -const char kShardingAttribute[] = "_XlaSharding"; - -const char kTPUPartitionedInput[] = "TPUPartitionedInput"; -const char kTPUPartitionedOutput[] = "TPUPartitionedOutput"; - -// TODO(phawkins) add a canonical copy of these operator names and refactor -// everything to use it. -static const char* const kSendFromHostOp = "_XlaSendFromHost"; -static const char* const kRecvAtHostOp = "_XlaRecvAtHost"; - -static const char* const kTPUCompilationResultAttr = "_tpu_compilation_status"; -static const char* const kPostDeviceRewriteAttr = "_post_device_rewrite"; - -string CoreDeviceLabel(int core) { - return strings::StrCat("/device:", DEVICE_TPU_REPLICATED_CORE, ":", core); -} - -// Creates a unique node name with a particular prefix. -string UniqueNodeName(const StringPiece prefix, Graph* graph) { - return graph->NewName(strings::StrCat(prefix, "/_", internal::GetNodeId())); -} - -Status SetNodeDeviceForTPUCommunication(DeviceNameUtils::ParsedName device, - const string& target_device_type, - Node* node) { - TF_RET_CHECK(device.has_type && device.type == DEVICE_TPU_NODE); - TF_RET_CHECK(device.has_id); - TF_RET_CHECK(HasNodeAttr(node->def(), kXlaHasHostTransferAttrName)); - - // Store the device instance as an attr on the Node. - TF_RETURN_IF_ERROR(SetDeviceOrdinalAttributeForNode(node, device.id)); - - // Place the execute Op on the TPU_SYSTEM device so it can access the cache of - // compiled protos in the resource manager. - device.type = target_device_type; - device.id = 0; - - node->set_assigned_device_name(DeviceNameUtils::ParsedNameToString(device)); - return Status::OK(); -} - -Status SetNodeDeviceForTPUCommunication(const string& tpu_device_name, - const string& target_device_type, - Node* node) { - // Parse the TPU device. - DeviceNameUtils::ParsedName device; - TF_RET_CHECK(DeviceNameUtils::ParseFullName(tpu_device_name, &device)); - return SetNodeDeviceForTPUCommunication(device, target_device_type, node); -} - -// Iterate over the nodes in the original graph and find all the TPUReplicate -// nodes, and all the nodes that are part of outside_compilation clusters. -Status FindTaggedNodes( - Graph* graph, std::vector* replicate_nodes, - std::map* - outside_compilation_nodes, - std::map>* head_tail_outside_compilation_nodes) { - for (Node* node : graph->op_nodes()) { - if (node->type_string() == "_TPUReplicate") { - replicate_nodes->push_back(node); - const AttrValue* cluster_attr = node->attrs().Find(kTPUReplicateAttr); - if (cluster_attr == nullptr) { - return errors::Internal("TPUReplicate node ", node->name(), " has no ", - kTPUReplicateAttr, " attr."); - } else { - const string& cluster = cluster_attr->s(); - if (cluster.empty()) { - return errors::Internal("Attr ", kTPUReplicateAttr, " on node ", - node->name(), " has no string value."); - } - if (outside_compilation_nodes->find(cluster) != - outside_compilation_nodes->end()) { - return errors::Internal( - "TPUReplicate node ", node->name(), " has ", kTPUReplicateAttr, - " attr value '", cluster, - "' which is a duplicate of another TPUReplicate node in the " - "graph."); - } - (*outside_compilation_nodes)[cluster] = - DistributedTPURewritePass::OutsideCompilationNodeMap(); - (*head_tail_outside_compilation_nodes)[cluster] = std::vector(); - } - } - } - for (Node* node : graph->op_nodes()) { - if (node->type_string() != "_TPUReplicate") { - const AttrValue* cluster_attr = node->attrs().Find(kTPUReplicateAttr); - const AttrValue* outside_compilation_attr = - node->attrs().Find(kOutsideCompilationAttr); - if (cluster_attr == nullptr) { - if (outside_compilation_attr != nullptr) { - return errors::Internal("Node ", node->name(), " has ", - kOutsideCompilationAttr, " attr but no ", - kTPUReplicateAttr, " attr."); - } - } else { - const string& cluster = cluster_attr->s(); - if (cluster.empty()) { - return errors::Internal("Attr ", kTPUReplicateAttr, " on node ", - node->name(), " has no string value."); - } - const auto iter = outside_compilation_nodes->find(cluster); - if (iter == outside_compilation_nodes->end()) { - return errors::Internal( - "Attr ", kTPUReplicateAttr, " on node ", node->name(), - " does not correspond to a TPUReplicate node."); - } - if (outside_compilation_attr == nullptr) { - return errors::Internal("Node ", node->name(), " has ", - kTPUReplicateAttr, " attr but no ", - kOutsideCompilationAttr, " attr."); - } - const string& oc_cluster = outside_compilation_attr->s(); - if (oc_cluster.empty()) { - return errors::Internal("Attr ", kOutsideCompilationAttr, " on node ", - node->name(), " has no string value."); - } - - // Outside compilation cluster at head and tail of TPU computation has - // already been moved to host and is already replicated. As so, do not - // replicate outside compilation nodes with replica id attribute. - int replica_id; - if (TryGetNodeAttr(node->def(), kXlaReplicaIdAttrName, &replica_id)) { - const AttrValue* head_attr = - node->attrs().Find("_xla_only_arg_or_oc_input"); - const AttrValue* tail_attr = - node->attrs().Find("_xla_only_ret_or_oc_output"); - if (((head_attr != nullptr) && (head_attr->b())) || - ((tail_attr != nullptr) && (tail_attr->b()))) { - // This is safe as this has the same keys as - // outside_compilation_nodes which we already know has this key. - (*head_tail_outside_compilation_nodes)[cluster].push_back(node); - } - continue; - } - iter->second[oc_cluster].push_back(node); - } - } - } - return Status::OK(); -} - -// Helper class to spread TPU computation arguments and return values -// across cores. -// If all shapes are fully defined, balance by their size. -// If some of them are not fully defined, the undefined shapes size will -// be estimated with the average size of the fully defined ones. -// If none are defined, fall back to round-robin. -class TensorDevicePlacer { - public: - // Creates a TensorDevicePlacer object to distribute arguments or - // return values to a set of num_devices devices, where the types and - // the inferred shapes of the inputs (arguments or return values) are - // passed in types and shapes. - TensorDevicePlacer(int64 num_devices, const DataTypeVector& types, - const std::vector& shapes) - : index_nodes_(num_devices), sizes_(types.size()) { - int64 total_size = 0; - int64 num_defined = 0; - for (int64 i = 0; i < types.size(); ++i) { - sizes_[i] = GetInferredShapeSize(shapes[i], types[i]); - if (sizes_[i] >= 0) { - total_size += sizes_[i]; - ++num_defined; - } - } - // If a shape is undefined, select a size for it which is the average - // of the defined shapes. If no shapes are defined, assign 1 so that we - // get round-robin behavior. - int64 undefined_shape_size = - (num_defined > 0) ? total_size / num_defined : 1; - for (int64 i = 0; i < sizes_.size(); ++i) { - if (sizes_[i] < 0) { - sizes_[i] = undefined_shape_size; - } - } - min_heap_.reserve(num_devices); - for (int64 i = 0; i < num_devices; ++i) { - min_heap_.push_back(&index_nodes_[i]); - } - std::make_heap(min_heap_.begin(), min_heap_.end(), DeviceNodeCompare); - } - - // Reports that the argument/return-value at index has been assigned - // by the user to a given device. - void ReportDeviceAssigned(int64 device, int64 index) { - DeviceNode* node = &index_nodes_.at(device); - node->size += sizes_.at(index); - std::make_heap(min_heap_.begin(), min_heap_.end(), DeviceNodeCompare); - } - - // Retrieves the device at which the argument/return-value at index - // should be assigned to. - int64 RetrieveAssignment(int64 index) { - DeviceNode* node = *(min_heap_.begin()); - int64 device = node - index_nodes_.data(); - node->size += sizes_.at(index); - std::make_heap(min_heap_.begin(), min_heap_.end(), DeviceNodeCompare); - return device; - } - - private: - struct DeviceNode { - int64 size = 0; - }; - - // std::push_heap, etc... creates a max-heap, but we want a min-heap. - static bool DeviceNodeCompare(const DeviceNode* lhs, const DeviceNode* rhs) { - return lhs->size > rhs->size; - } - - static int64 GetInferredShapeSize(const InferredShape& ishape, - DataType dtype) { - return ishape.shape.IsFullyDefined() - ? ishape.shape.num_elements() * DataTypeSize(dtype) - : -1; - } - - std::vector index_nodes_; - std::vector min_heap_; - std::vector sizes_; -}; - -Status ValidateCoreNumber(int64 core, int64 num_cores_per_replica) { - if (core < 0 || core >= num_cores_per_replica) { - return tensorflow::errors::InvalidArgument("Invalid core ID: ", core, - ". The valid core IDs are [0..", - num_cores_per_replica, ")"); - } - return Status::OK(); -} - -Status FindHostComputeKeyPlaceholderNodes( - const Graph* graph, const std::vector& replicate_nodes, - std::unordered_map* host_compute_key_placeholder_map) { - host_compute_key_placeholder_map->clear(); - for (const auto node : replicate_nodes) { - (*host_compute_key_placeholder_map)[node->name()] = nullptr; - } - - for (Node* node : graph->op_nodes()) { - if (node->type_string() == "Placeholder" && - str_util::EndsWith(node->name(), "_key_placeholder")) { - const AttrValue* call_node_attr = - node->attrs().Find("_host_compute_call_node"); - if (call_node_attr != nullptr) { - auto iter = host_compute_key_placeholder_map->find(call_node_attr->s()); - if (iter == host_compute_key_placeholder_map->end()) { - return errors::InvalidArgument( - "Node ", node->name(), " has _host_compute_call_node attribute '", - call_node_attr->s(), "' that doesn't correspond to a call node"); - } - if (iter->second != nullptr) { - return errors::InvalidArgument( - "Key placeholder node ", iter->second->name(), " for call node ", - call_node_attr->s(), " previously found as ", - iter->second->name()); - } - iter->second = node; - } - } - } - - return Status::OK(); -} - -Status ReplaceCompilationResultNodeWithIdentity(Graph* graph, Node** node) { - Node* old_node = *node; - // We want to replace the node with an identity node with the same name. - const string& node_name = old_node->name(); - - // Create identity node. - TF_ASSIGN_OR_RETURN( - Node * id_node, - BuildIdentityNode(graph, node_name, DT_STRING, - /*input=*/nullptr, /*requested_device=*/"")); - - // No incoming edges are copied as a new one will be added from compile node - // to id_node. - - // Copy outgoing edges to the id node. - std::vector out_edges(old_node->out_edges().begin(), - old_node->out_edges().end()); - for (const Edge* edge : out_edges) { - Node* dst = edge->dst(); - int src_output = edge->src_output(); - int dst_input = edge->dst_input(); - - if (src_output == Graph::kControlSlot) { - graph->AddControlEdge(id_node, dst); - } else { - graph->AddEdge(id_node, src_output, dst, dst_input); - } - graph->RemoveEdge(edge); - } - graph->RemoveNode(old_node); - - *node = id_node; - return Status::OK(); -} - -Status FillPaddingMap( - const Node& replicate_node, - protobuf::RepeatedPtrField* padding_maps) { - std::vector padding_map_strs; - TF_RETURN_IF_ERROR( - GetNodeAttr(replicate_node.attrs(), "padding_map", &padding_map_strs)); - padding_maps->Reserve(padding_map_strs.size()); - for (const string& padding_map_str : padding_map_strs) { - tpu::PaddingMap* padding_map = padding_maps->Add(); - if (!padding_map->ParseFromString(padding_map_str)) { - return errors::InvalidArgument( - "Malformed padding_map serialized string: ", padding_map_str); - } - } - return Status::OK(); -} - -Status GetStepMarkerLocation(const Node& replicate_node, - xla::DebugOptions::StepMarkerLocation* location) { - string step_marker_location_attr; - TF_RETURN_IF_ERROR(GetNodeAttr(replicate_node.attrs(), "step_marker_location", - &step_marker_location_attr)); - if (step_marker_location_attr.empty()) { - *location = xla::DebugOptions::STEP_MARK_AT_ENTRY; - } else { - if (!xla::DebugOptions::StepMarkerLocation_Parse(step_marker_location_attr, - location)) { - return errors::InvalidArgument("Malformed step_marker_location: ", - step_marker_location_attr); - } - } - return Status::OK(); -} - -// Extracts a map of dimension and number of splits for tiled input from xla -// sharding attribute. -Status GetDimensionIndicesAndNumSplitsFromSharding( - const xla::OpSharding& sharding, std::map* split_dimension_map) { - for (int dim_index = 0; - dim_index < sharding.tile_assignment_dimensions_size(); dim_index++) { - if (sharding.tile_assignment_dimensions(dim_index) > 1) { - split_dimension_map->emplace( - dim_index, sharding.tile_assignment_dimensions(dim_index)); - } - } - - if (split_dimension_map->empty()) { - return errors::InvalidArgument("Arg has unnecessary tiled sharding: ", - sharding.DebugString()); - } - return Status::OK(); -} - -// Updates contents of the function with `function_name` in function library -// definition `flib_def` to `new_graph`. This is required when graph -// transformation happens inside a function call body. -Status UpdateFunctionLibDefinition(const Graph& new_graph, - const std::string& function_name, - FunctionLibraryDefinition* flib_def) { - FunctionDef graph_fdef; - TF_RETURN_IF_ERROR(GraphToFunctionDef(new_graph, function_name, &graph_fdef)); - TF_RETURN_IF_ERROR(flib_def->ReplaceFunction(function_name, graph_fdef)); - return Status::OK(); -} - -struct NodeOut { - Node* node; - int index; -}; - -struct ShardedInputIndex { - int replica_id; - int argument_index; - - bool operator<(const ShardedInputIndex& rhs) const { - return std::tie(replica_id, argument_index) < - std::tie(rhs.replica_id, rhs.argument_index); - } -}; - -struct ShardedInputInfo { - // Split node that would be connected to tiled input Node. - Node* split_node; - // List of splits nodes and output index of the split node from which sharded - // input will be connected to the TPUExecute node. The inputs are ordered by - // logical core ids. - std::vector sharded_inputs; -}; - -// Adds split node and split dimension node to graph for sharding tiled inputs. -// |graph| owns the returned Node* instance. -xla::StatusOr CreateSplitNode(int num_splits, int dim, - int orig_src_output, DataType dtype, - absl::string_view name_prefix, - Node* control_predecessor, Node* orig_src, - Graph* graph) { - const std::string input_assigned_device = orig_src->assigned_device_name(); - - // Add a split dimension node. - NodeDef split_dim_def; - split_dim_def.set_name( - graph->NewName(absl::StrCat(name_prefix, "/split_dim"))); - split_dim_def.set_op("Const"); - split_dim_def.set_device(input_assigned_device); - AddNodeAttr("dtype", DT_INT32, &split_dim_def); - TensorProto tensor_proto; - tensor_proto.set_dtype(DT_INT32); - tensor_proto.add_int_val(dim); - TensorShape shape({}); - shape.AsProto(tensor_proto.mutable_tensor_shape()); - AddNodeAttr("value", tensor_proto, &split_dim_def); - Status s; - Node* split_dim_node = graph->AddNode(split_dim_def, &s); - TF_RETURN_IF_ERROR(s); - // Add a split node. - NodeDef split_def; - split_def.set_name(graph->NewName(absl::StrCat(name_prefix, "/split"))); - split_def.set_op("Split"); - split_def.set_device(input_assigned_device); - AddNodeAttr("num_split", num_splits, &split_def); - AddNodeAttr("T", dtype, &split_def); - split_def.add_input(absl::StrCat(split_dim_node->name(), ":0")); - split_def.add_input(absl::StrCat(orig_src->name(), ":", orig_src_output)); - Node* split_node = graph->AddNode(split_def, &s); - TF_RETURN_IF_ERROR(s); - - graph->AddEdge(split_dim_node, 0, split_node, 0); - graph->AddEdge(orig_src, orig_src_output, split_node, 1); - - // Add a control dependency from `control_predecessor` to newly created - // constant node. This ensures that newly added split/split dim - // nodes are placed inside correct while loop frames when TPUExecute - // node is inside a host training loop. - graph->AddControlEdge(control_predecessor, split_dim_node); - - return split_node; -} - -// Creates a set of splits nodes that shards tiled input node in graph. -xla::StatusOr CreateOrGetSplitNodesForInputSharding( - const xla::OpSharding& sharding, int orig_arg_num, DataType dtype, - int replica_id, int orig_src_output, Node* orig_src, - Node* control_predecessor, Graph* graph, - std::map* - arg_index_to_sharded_input_map) { - ShardedInputIndex input_index{replica_id, orig_arg_num}; - auto iter = arg_index_to_sharded_input_map->find(input_index); - if (iter != arg_index_to_sharded_input_map->end()) { - return iter->second; - } - // Maps input dimension and number of splits with which the - // dimension sharded. - std::map split_dimension_map; - TF_RETURN_IF_ERROR(GetDimensionIndicesAndNumSplitsFromSharding( - sharding, &split_dimension_map)); - TF_RET_CHECK(!split_dimension_map.empty()) - << "Unnecessary sharding attribute found."; - - // For v1 while loop, nodes inside the loop body must either - // 1) Have data edges from while loop input node. - // or - // 2) Have direct control dependency from while loop input control - // node. - // - // As so, if we are adding Split node inside, while loop body, - // we must manually add a control dependency to a node inside - // a while loop (i.e. `control_predecessor`) to constant nodes - // without data in-edges to make sure that added split nodes - // have correct frame name. Else, placer will complain when - // `BuildControlFlow()` is invoked. - - auto sharding_it = split_dimension_map.begin(); - std::queue split_nodes_for_dimension; - int split_dimension = sharding_it->first; - int num_split = sharding_it->second; - - // Creates a tree of split nodes for sharding tiled inputs. Splits nodes - // are created such that input data is sharded in row major order. - // Split nodes at ith depth from the original input node represent nodes - // that split the input data at ith dimension. - TF_ASSIGN_OR_RETURN( - Node * root_split_node, - CreateSplitNode(num_split, split_dimension, orig_src_output, dtype, - absl::StrCat("sharded_input/replica_", replica_id, - "_dim_", split_dimension), - control_predecessor, orig_src, graph)); - sharding_it++; - - split_nodes_for_dimension.emplace(root_split_node); - - while (sharding_it != split_dimension_map.end()) { - split_dimension = sharding_it->first; - num_split = sharding_it->second; - int num_split_nodes_in_dimension = split_nodes_for_dimension.size(); - for (int i = 0; i < num_split_nodes_in_dimension; ++i) { - Node* input_split_node = split_nodes_for_dimension.front(); - split_nodes_for_dimension.pop(); - for (int src_output_index = 0; - src_output_index < input_split_node->num_outputs(); - ++src_output_index) { - TF_ASSIGN_OR_RETURN( - Node * split_node, - CreateSplitNode(num_split, split_dimension, src_output_index, dtype, - absl::StrCat("sharded_input/replica_", replica_id, - "_dim_", split_dimension), - control_predecessor, input_split_node, graph)); - split_nodes_for_dimension.emplace(split_node); - } - } - sharding_it++; - } - - // `split_nodes_for_dimension` now includes final split nodes - // from which sharded data will be fed into TPUExcute nodes -- sorted by - // row major order. - std::vector sharded_inputs_list; - sharded_inputs_list.reserve(split_nodes_for_dimension.size()); - while (!split_nodes_for_dimension.empty()) { - Node* split_node = split_nodes_for_dimension.front(); - split_nodes_for_dimension.pop(); - int num_splits; - TF_RETURN_IF_ERROR( - GetNodeAttr(split_node->def(), "num_split", &num_splits)); - for (int out_index = 0; out_index < num_splits; ++out_index) { - sharded_inputs_list.emplace_back(NodeOut{split_node, out_index}); - } - } - - ShardedInputInfo sharded_input_info{root_split_node, - std::move(sharded_inputs_list)}; - (*arg_index_to_sharded_input_map)[input_index] = sharded_input_info; - return sharded_input_info; -} - -// Creates a concat node to be used for aggregating sharded retvals across -// logical cores. -xla::StatusOr CreateConcatNode(int dim, int num_splits, DataType dtype, - absl::string_view name_prefix, - const std::vector& inputs, - Graph* graph, absl::string_view device) { - // Add a Concat dim node. - NodeDef concat_dim_def; - concat_dim_def.set_name( - graph->NewName(absl::StrCat(name_prefix, "/concat_dim"))); - concat_dim_def.set_op("Const"); - AddNodeAttr("dtype", DT_INT32, &concat_dim_def); - concat_dim_def.set_device(std::string(device)); - TensorProto tensor_proto; - tensor_proto.set_dtype(DT_INT32); - tensor_proto.add_int_val(dim); - TensorShape shape({}); - shape.AsProto(tensor_proto.mutable_tensor_shape()); - AddNodeAttr("value", tensor_proto, &concat_dim_def); - Status s; - Node* concat_dim_node = graph->AddNode(concat_dim_def, &s); - TF_RETURN_IF_ERROR(s); - - // Add a Concat node. - NodeDef concat_def; - concat_def.set_name(graph->NewName(absl::StrCat(name_prefix, "/concat"))); - concat_def.set_op("Concat"); - AddNodeAttr("N", num_splits, &concat_def); - AddNodeAttr("T", dtype, &concat_def); - concat_def.add_input(absl::StrCat(concat_dim_node->name(), ":0")); - concat_def.set_device(std::string(device)); - for (const auto& i : inputs) { - concat_def.add_input(absl::StrCat(i.node->name(), ":", i.index)); - } - Node* concat_node = graph->AddNode(concat_def, &s); - TF_RETURN_IF_ERROR(s); - - graph->AddEdge(concat_dim_node, 0, concat_node, 0); - - // 0th input to concat node is a concat dim node. So we start from 1st input - // and add all input edges. - int dst_input = 1; - for (const auto& i : inputs) { - graph->AddEdge(i.node, i.index, concat_node, dst_input); - ++dst_input; - } - return concat_node; -} - -// Creates a set of Concat nodes that aggregates sharded outputs from TPUExecute -// nodes into a single output. Sharded outputs are concatenated along row major -// order. That is, tiled output along 0th dimension will be concatenated last. -xla::StatusOr CreateConcatNodesForRetval( - const xla::OpSharding& sharding, DataType dtype, int replica_id, - const std::vector& orig_inputs, Graph* graph, - absl::string_view device) { - std::map split_dimension_map; - TF_RETURN_IF_ERROR(GetDimensionIndicesAndNumSplitsFromSharding( - sharding, &split_dimension_map)); - - std::vector inputs_to_sharded_retval = orig_inputs; - - for (auto it = split_dimension_map.rbegin(); it != split_dimension_map.rend(); - it++) { - auto dim = it->first; - auto num_splits = it->second; - - int num_concat_nodes = inputs_to_sharded_retval.size() / num_splits; - int input_index_to_concat_node = 0; - - std::vector new_concat_nodes; - for (int i = 0; i < num_concat_nodes; ++i) { - auto concat_input_it = - inputs_to_sharded_retval.begin() + input_index_to_concat_node; - std::vector inputs(concat_input_it, - concat_input_it + num_splits); - input_index_to_concat_node += num_splits; - - TF_ASSIGN_OR_RETURN( - Node * concat_node, - CreateConcatNode( - dim, num_splits, dtype, - absl::StrCat("sharded_output/replica_", replica_id, "_dim_", dim), - inputs, graph, device)); - new_concat_nodes.emplace_back(NodeOut{concat_node, 0}); - } - inputs_to_sharded_retval = new_concat_nodes; - } - - TF_RET_CHECK(inputs_to_sharded_retval.size() == 1); - return inputs_to_sharded_retval.at(0).node; -} - -absl::optional GetCoreIndexInSharding(const xla::OpSharding& sharding, - int64 core) { - absl::optional output_index; - for (int i = 0; i < sharding.tile_assignment_devices_size(); i++) { - int64 assigned_core = sharding.tile_assignment_devices(i); - if (assigned_core == core) { - output_index = i; - break; - } - } - return output_index; -} - -// Set the padding ops the same devices as the original inputs. If the original -// inputs are on TPUs, the padding ops will be placed on TPUs and XLA on demand -// mode will be triggered, so we don't need to copy the data back to the host -// to do the padding. -Status SetPaddingNodesDevices(Graph* graph) { - for (Node* n : graph->op_nodes()) { - bool tpu_padding_attr; - if (n->type_string() == "Pad" && - GetNodeAttr(n->attrs(), kPostDeviceRewriteAttr, &tpu_padding_attr) - .ok()) { - Node* unpadded_input; - TF_RETURN_IF_ERROR(n->input_node(0, &unpadded_input)); - - const string& requested_device = unpadded_input->requested_device(); - const string& assigned_device = unpadded_input->assigned_device_name(); - if (!requested_device.empty() || !assigned_device.empty()) { - // The output nodes of the original unpadded inputs include the padded - // inputs and real shapes of inputs, we assign those to the same device - // as the original inputs. - for (Node* out : unpadded_input->out_nodes()) { - if (GetNodeAttr(out->attrs(), kPostDeviceRewriteAttr, - &tpu_padding_attr) - .ok()) { - out->set_requested_device(requested_device); - out->set_assigned_device_name(assigned_device); - } - } - // There might be a tf.shape node added before TPUCompileOp, we need to - // set its device as well. - for (Node* out : n->out_nodes()) { - if (n->type_string() == "Shape") { - out->set_requested_device(requested_device); - out->set_assigned_device_name(assigned_device); - } - } - } - } - } - return Status::OK(); -} - -const string& AssignedOrRequestedDevice(const Node* node) { - if (!node->assigned_device_name().empty()) { - return node->assigned_device_name(); - } - return node->requested_device(); -} - -bool IsTpuDevice(const string& device_string) { - DeviceNameUtils::ParsedName device; - return DeviceNameUtils::ParseFullName(device_string, &device) && - device.type == DEVICE_TPU_NODE; -} - -// Returns a set of device ops can be placed on TPU. There is no strict rule of -// thumb to decide which ops should be in the list, but empirically they are -// mostly dummy ops like Identity-like ops or control flow related ops. However -// people can add also add other ops like Pad to allow data stay on TPU. -const absl::flat_hash_set& PlaceOnTPUOpList() { - static const auto place_on_tpu_ops = new absl::flat_hash_set( - {"Identity", "IdentityN", "Enter", "Exit", "Switch", "Merge", - "NextIteration", "Shape"}); - return *place_on_tpu_ops; -} - -// If an op satisfies the following conditions, it will be placed on the same -// TPU device as its inputs: -// (1) The op can be placed on TPU (in the PlaceOnTPUOpList) -// (2) The op itself has no requested or assigned devices. -// (3) All the data inputs of this op are placed on the same device on TPUs. -// There are exceptions like the NextIterations input of Switch node can -// be placed on CPU as it is just a boolean. -// -// Returns true if the node device has been changed, otherwise returns false. -bool PlaceOpsOnTPU(Node* node) { - if (!AssignedOrRequestedDevice(node).empty() || - !PlaceOnTPUOpList().contains(node->type_string())) { - return false; - } - string src_tpu_device = ""; - Node* src_node; - for (const Edge* e : node->in_edges()) { - if (e->IsControlEdge()) { - continue; - } - Node* src = e->src(); - const string& src_device = AssignedOrRequestedDevice(src); - - // Make exceptions that we don't force the some inputs to place on TPUs. - if (node->IsSwitch() && src->IsLoopCond()) { - continue; - } - - if (!IsTpuDevice(src_device) || - (!src_tpu_device.empty() && src_device != src_tpu_device)) { - return false; - } - if (src_tpu_device.empty()) { - src_tpu_device = src_device; - src_node = src; - } - } - node->set_assigned_device_name(src_node->assigned_device_name()); - node->set_requested_device(src_node->requested_device()); - return true; -} - -// Validate sharding configuration derived from XlaSharding attribute. -// Infer the core id from the OpSharding, if necessary. -Status ParseAndValidateSharding(const xla::OpSharding& sharding, - const int num_cores_per_replica, - int64* inferred_core_id, - absl::optional* result) { - if (sharding.type() == xla::OpSharding::MAXIMAL) { - int64 core_annotation = sharding.tile_assignment_devices(0); - TF_RETURN_IF_ERROR( - ValidateCoreNumber(core_annotation, num_cores_per_replica)); - if (*inferred_core_id == -1 || *inferred_core_id > core_annotation) { - *inferred_core_id = core_annotation; - result->emplace(sharding); - } - } else { - if (sharding.type() == xla::OpSharding::OTHER) { - for (int64 core : sharding.tile_assignment_devices()) { - TF_RETURN_IF_ERROR(ValidateCoreNumber(core, num_cores_per_replica)); - } - } - - if (!result->has_value()) { - *result = sharding; - } else { - std::string result_value_serialized; - std::string sharding_serialized; - SerializeToStringDeterministic(result->value(), &result_value_serialized); - SerializeToStringDeterministic(sharding, &sharding_serialized); - - if (result_value_serialized != sharding_serialized) { - // We see different shardings, assign to core 0. - result->emplace(xla::sharding_builder::AssignDevice(0)); - } - } - } - return Status::OK(); -} - -// As XlaSharding node may be followed by Cast op or an Identity op, -// recursively walk the graph and aggregate nodes connectd to -// |input_node| or Cast/Identity op following the |input_node|. -void FindNodesMaybeContainingShardingInfo(const Node& input_node, - std::vector* nodes) { - if (input_node.IsIdentity() || input_node.type_string() == "Cast") { - for (const Node* connected_node : input_node.out_nodes()) - FindNodesMaybeContainingShardingInfo(*connected_node, nodes); - } - nodes->emplace_back(&input_node); -} - -// Parse sharding configuration from |node| or it's adjacent nodes. -// XlaSharding configuration may be derived from -// a) Connected Identity op node. -// b) Connected Cast op node. -xla::StatusOr> -ParseInputShardingFromAdjacentNode(const int num_cores_per_replica, - const Node& node) { - // If |node| has `device` attribute or is a XlaSharding op, - // return the parsed OpSharding. - TF_ASSIGN_OR_RETURN(absl::optional sharding, - ParseShardingFromDevice(node, num_cores_per_replica)); - if (sharding.has_value()) return sharding; - - // XlaShardingOp may be followed by an identity or followed by identity - // and a Cast op. - std::vector potential_nodes_with_input_sharding; - FindNodesMaybeContainingShardingInfo(node, - &potential_nodes_with_input_sharding); - for (const Node* maybe_node_with_sharding_info : - potential_nodes_with_input_sharding) { - if (maybe_node_with_sharding_info->type_string() != "XlaSharding") continue; - - TF_ASSIGN_OR_RETURN(absl::optional sharding_config, - ParseShardingFromDevice(*maybe_node_with_sharding_info, - num_cores_per_replica)); - if (sharding_config.has_value()) return sharding_config; - } - return sharding; -} - -// Walk the graph from an argument node to find OpSharding configuration -// from its neighbor nodes. Sharding configuration may be inferred from -// 1) Parsing XlaSharding attribute from neighboring node. -// 2) If argument node is a resource, then by parsing adjacent nodes -// of the connected ReadVariable op. -Status ParseAndValidateShardingFromNeighbors( - const int num_cores_per_replica, const std::string& arg_node_name, - const Node& neighbor_node, int64* inferred_core_id, bool* is_fast_mem, - absl::optional* result) { - if (neighbor_node.attrs().Find(TPU_FAST_MEM_ATTR) != nullptr) { - *is_fast_mem = true; - VLOG(2) << "place " << neighbor_node.name() << " on fast memory because " - << arg_node_name << " has " << TPU_FAST_MEM_ATTR << " attribute"; - } - - // XlaSharding information may be encoded on node directly connected to the - // argument node. - TF_ASSIGN_OR_RETURN( - absl::optional sharding, - ParseInputShardingFromAdjacentNode(num_cores_per_replica, neighbor_node)); - if (sharding.has_value()) { - TF_RETURN_IF_ERROR(ParseAndValidateSharding( - *sharding, num_cores_per_replica, inferred_core_id, result)); - return Status::OK(); - } - - // When we use variable in TPU computation, we always have a - // XlaSharding op followed by a ReadVariableOp. As so, correctly parse - // the users of ReadVariableOp for potential sharding configuration. - if (neighbor_node.type_string() == "ReadVariableOp") { - for (const Edge* e : neighbor_node.out_edges()) { - if (e->IsControlEdge()) continue; - - if (e->dst()->attrs().Find(TPU_FAST_MEM_ATTR) != nullptr) { - *is_fast_mem = true; - VLOG(2) << "place " << arg_node_name << " on fast memory because " - << e->dst()->name() << TPU_FAST_MEM_ATTR << " attribute"; - } - - TF_ASSIGN_OR_RETURN( - absl::optional sharding, - ParseInputShardingFromAdjacentNode(num_cores_per_replica, *e->dst())); - if (sharding.has_value()) { - TF_RETURN_IF_ERROR(ParseAndValidateSharding( - *sharding, num_cores_per_replica, inferred_core_id, result)); - return Status::OK(); - } - } - } - return Status::OK(); -} - -} // namespace - -// Inputs: -// replication_spec_string: the device to which the TPUReplicate node was -// assigned. -// device_set: the set of TF devices. -// Outputs: -// tpu_compilation_device: the name of the TPU compilation device. -// num_tpus_per_task: the number of TPUs in each task. Verifies that all tasks -// have the same number of TPU devices. -// tpu_devices: the TPU devices, indexed by [task][device]. -static Status GetTPUDeviceNames( - const string& replication_spec_string, const DeviceSet& device_set, - string* tpu_compilation_device, int* num_tpus_per_task, - std::vector>* tpu_devices) { - // TODO(b/110910013) GetSystemDevice parses the spec and returns the name of - // the tpu_system device, which we replace by the cpu device. We do this - // replacement because we want to place the TPUCompileOp (and the compile - // assert op) explicitly on cpu devices on the same job as the tpu_system - // device. - DeviceNameUtils::ParsedName replication_spec; - Device* replication_device; - TF_RETURN_IF_ERROR(DistributedTPURewriteHelpers::GetSystemDevice( - replication_spec_string, device_set, &replication_spec, - &replication_device)); - *tpu_compilation_device = - str_util::StringReplace(replication_device->name(), DEVICE_TPU_SYSTEM, - DEVICE_CPU, /*replace_all=*/true); - - // Finds the set of TPU devices attached to the tasks in the job. - TF_RETURN_IF_ERROR(DistributedTPURewriteHelpers::GetTPUDevices( - replication_spec, device_set, num_tpus_per_task, tpu_devices)); - - return Status::OK(); -} - -// Parses the topology attribute of TPUReplicate, and populates *topology with -// a physical mesh coordinate to (task, device) mapping. -static Status ParseTopologyAttr(const string& topology_attr, - const tpu::TpuTopologyExternal& tpu_topology, - int num_tasks, int num_tpus_per_task, - xla::Array4D>* topology) { - static_assert(4 == kTPUTopologyRank, "Assumes the topology rank is 4"); - tpu::TopologyProto proto; - proto.ParseFromString(topology_attr); - if (proto.mesh_shape_size() != kTPUTopologyRank) { - return errors::InvalidArgument("TPU topology must be rank ", - kTPUTopologyRank); - } - if (proto.num_tasks() != num_tasks) { - return errors::InvalidArgument("Mismatched number of TPU tasks"); - } - if (proto.num_tpu_devices_per_task() != num_tpus_per_task) { - return errors::InvalidArgument("Mismatched number of TPUs per task (", - proto.num_tpu_devices_per_task(), - " != ", num_tpus_per_task, ")."); - } - if (proto.device_coordinates_size() != - num_tasks * num_tpus_per_task * kTPUTopologyRank) { - return errors::InvalidArgument( - "device coordinates should be ", num_tasks, "x", num_tpus_per_task, "x", - kTPUTopologyRank, "; got ", proto.device_coordinates_size()); - } - - int devices_per_chip = tpu_topology.LogicalDevicesPerChip(kTensorCore); - *topology = xla::Array4D>( - tpu_topology.chip_bounds().x, tpu_topology.chip_bounds().y, - tpu_topology.chip_bounds().z, devices_per_chip, {-1, -1}); - int pos = 0; - for (int task = 0; task < num_tasks; ++task) { - for (int device = 0; device < num_tpus_per_task; ++device) { - int32 x = proto.device_coordinates(pos++); - int32 y = proto.device_coordinates(pos++); - int32 z = proto.device_coordinates(pos++); - int32 core = proto.device_coordinates(pos++); - - if (!tpu_topology.HasChip(x, y, z) || core < 0 || - core >= devices_per_chip) { - return errors::InvalidArgument( - "Mesh coordinates (", x, ",", y, ",", z, ",", core, - ") are not valid for the current TPU topology"); - } - if ((*topology)(x, y, z, core).first != -1) { - return errors::InvalidArgument("Duplicate coordinates (", x, ",", y, - ",", z, ",", core, ") in TPU topology"); - } - (*topology)(x, y, z, core) = {task, device}; - } - } - return Status::OK(); -} - -// Parses the value of the device_assignment attribute to TPUReplicate. -// Populates *device_assignment; *device_assignment must be a 2D array with -// shape (num_replicas, num_cores_per_replica). -static Status ParseDeviceAssignmentAttr( - absl::Span device_assignment_attr, - const tpu::TpuTopologyExternal& tpu_topology, int num_replicas, - int num_cores_per_replica, - xla::Array2D* device_assignment) { - static_assert(4 == kTPUTopologyRank, "Assumes the topology rank is 4"); - - const int64 device_assignment_attr_size = - num_replicas * num_cores_per_replica * kTPUTopologyRank; - if (device_assignment_attr.size() != device_assignment_attr_size) { - return errors::InvalidArgument( - "Length of device_assignment attribute must be equal to num_replicas (", - num_replicas, ") * num_cores_per_replica (", num_cores_per_replica, - ") * ", kTPUTopologyRank, " got ", device_assignment_attr.size()); - } - for (int core : device_assignment_attr) { - if (core < 0 || core >= kTPUMaxTopologySize) { - return errors::InvalidArgument( - "Invalid core number in device assignment: ", core); - } - } - - *device_assignment = xla::Array2D( - num_replicas, num_cores_per_replica); - int devices_per_chip = tpu_topology.LogicalDevicesPerChip(kTensorCore); - xla::Array4D replica_assignment( - tpu_topology.chip_bounds().x, tpu_topology.chip_bounds().y, - tpu_topology.chip_bounds().z, devices_per_chip, -1); - int pos = 0; - for (int replica = 0; replica < num_replicas; ++replica) { - for (int logical_core = 0; logical_core < num_cores_per_replica; - ++logical_core) { - int32 x = device_assignment_attr[pos++]; - int32 y = device_assignment_attr[pos++]; - int32 z = device_assignment_attr[pos++]; - int32 core = device_assignment_attr[pos++]; - - if (!tpu_topology.HasChip(x, y, z) || core < 0 || - core >= devices_per_chip) { - return errors::InvalidArgument( - "Mesh coordinates (", x, ",", y, ",", core, - ") are not valid for the current TPU topology"); - } - tpu::TpuCoreLocationExternal core_location = - tpu_topology.Core(x, y, z, kTensorCore, core); - - if (replica_assignment(x, y, z, core) != -1) { - return errors::InvalidArgument("Duplicate coordinates (", x, ",", y, - ",", z, ",", core, - ") in TPU device assignment"); - } - replica_assignment(x, y, z, core) = replica; - (*device_assignment)(replica, logical_core) = core_location; - } - } - return Status::OK(); -} - -// Builds TensorFlow device assignments for the special case of a single core -// computation that is replicated to every core in the mesh. -// LINT.IfChange -static Status BuildFullMeshDeviceAssignment( - int num_replicas, const std::vector>& tpu_devices, - int num_tasks, int num_tpus_per_task, - std::vector>* tf_device_assignment) { - // Assign TensorFlow devices to replicas arbitrarily. - for (int i = 0; i < num_replicas; ++i) { - int task = i / num_tpus_per_task; - int device = i % num_tpus_per_task; - TF_RET_CHECK(task >= 0 && task < num_tasks); - TF_RET_CHECK(device >= 0 && device < num_tpus_per_task); - - // We don't actually know which TF device corresponds to which physical - // device, but it doesn't matter—they're all identical. - (*tf_device_assignment)[i] = {tpu_devices[task][device]->name()}; - } - return Status::OK(); -} -// LINT.ThenChange(//tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc) - -// Builds TensorFlow device assignments for a replicated computation and convert -// device_assignment into xla_device_assignment. -static Status BuildGeneralDeviceAssignment( - int num_replicas, int num_cores_per_replica, - const std::vector>& tpu_devices, - const xla::Array2D& device_assignment, - const xla::Array4D>& topology, - std::vector>* tf_device_assignment, - std::unique_ptr* xla_device_assignment) { - // Assign TensorFlow devices to each computation's replicas according to - // device_assignment and 'topology'. - *xla_device_assignment = absl::make_unique( - num_replicas, num_cores_per_replica); - for (int replica = 0; replica < num_replicas; ++replica) { - for (int computation = 0; computation < num_cores_per_replica; - ++computation) { - const tpu::TpuCoreLocationExternal& core_location = - device_assignment(replica, computation); - - int task; - int device; - std::tie(task, device) = - topology(core_location.chip_coordinates().x, - core_location.chip_coordinates().y, - core_location.chip_coordinates().z, core_location.index()); - - CHECK_LT(computation, num_cores_per_replica); - (**xla_device_assignment)(replica, computation) = core_location.Id(); - - // The communication pattern between replicas will be determined later by - // BuildAllReduceRing. - TF_RET_CHECK(task >= 0 && task < tpu_devices.size()); - TF_RET_CHECK(device >= 0 && device < tpu_devices[task].size()); - (*tf_device_assignment)[replica].push_back( - tpu_devices[task][device]->name()); - } - } - return Status::OK(); -} - -/*static*/ Status DistributedTPURewritePass::BuildDeviceAssignment( - const tpu::TpuTopologyExternal& tpu_topology, int num_tpus_per_task, - const std::vector>& tpu_devices, int num_replicas, - int num_cores_per_replica, const string& topology_attr, - absl::Span device_assignment_attr, - std::vector>* tf_device_assignment, - std::unique_ptr* xla_device_assignment) { - const int num_tasks = tpu_devices.size(); - const int num_tpu_devices = num_tasks * num_tpus_per_task; - VLOG(2) << "num_tasks=" << num_tasks - << " num_tpus_per_task=" << num_tpus_per_task; - - // Checks num_replicas is sane first to avoid integer overflow. - if (num_replicas > num_tpu_devices) { -#ifdef PLATFORM_CLOUD_TPU - return errors::InvalidArgument("Requested num_replicas=", num_replicas, - " but there are only ", num_tpu_devices, - " cores in the TPU topology."); -#else - return errors::InvalidArgument("Requested num_replicas=", num_replicas, - " but there are only ", num_tpu_devices, - " cores in the TPU topology."); -#endif - } - if (num_replicas * num_cores_per_replica > num_tpu_devices) { - return errors::InvalidArgument( - "Requested num_replicas=", num_replicas, " with ", - num_cores_per_replica, " cores per replica, but there are only ", - num_tpu_devices, " cores in the TPU topology"); - } - - tf_device_assignment->clear(); - tf_device_assignment->resize(num_replicas); - - // Special case: we allow the user to omit the topology and device assignment - // information in two cases: - // * there is only one replica and one core per replica. In this case, we - // don't need to know topology information because we don't communicate with - // other cores. - // * the number of replicas is equal to the number of cores in the slice. In - // this case, all cores are running the same program so we don't need to - // know which is which. - if (topology_attr.empty()) { - // LINT.IfChange - if (num_replicas != 1 && num_replicas != num_tpu_devices) { - return errors::InvalidArgument( - "TPUReplicate asked to create ", num_replicas, - " replicas, but the number of cores in the TPU topology is ", - num_tpu_devices, - " and no TPU device assignment was supplied. " - "A TPU device assignment is required if the number of replicas is " - "not 1 or the number of cores in the topology (", - num_tpu_devices, ")"); - } - - if (num_cores_per_replica != 1) { - return errors::InvalidArgument( - "A TPU topology must be provided if num_cores_per_replica != 1"); - } - - if (!device_assignment_attr.empty()) { - return errors::InvalidArgument( - "A TPU topology must be provided if device_assignment_attr is " - "non-empty"); - } - - // If there is only one replica, assign the Tensorflow computation to task 0 - // device 0, and leave the XLA device assignment empty. We don't know which - // core this is in the TPU topology, but it doesn't matter—we don't need to - // communicate with any other cores. - if (num_replicas == 1) { - (*tf_device_assignment)[0] = {tpu_devices[0][0]->name()}; - return Status::OK(); - } - - // Otherwise, num_replicas is equal to the number of cores, and we build a - // device assignment that covers the entire mesh. We do not need to know - // the topology to do so because all cores are identical. - return BuildFullMeshDeviceAssignment(num_replicas, tpu_devices, num_tasks, - num_tpus_per_task, - tf_device_assignment); - // LINT.ThenChange(//tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc) - } - - // Array that maps mesh coordinates to {TF task, TF TPU device #} pairs. - xla::Array4D> topology; - TF_RETURN_IF_ERROR(ParseTopologyAttr(topology_attr, tpu_topology, num_tasks, - num_tpus_per_task, &topology)); - - // Array that maps logical (replica, core) pairs to physical mesh coordinates. - xla::Array2D device_assignment; - TF_RETURN_IF_ERROR(ParseDeviceAssignmentAttr( - device_assignment_attr, tpu_topology, num_replicas, num_cores_per_replica, - &device_assignment)); - - return BuildGeneralDeviceAssignment( - num_replicas, num_cores_per_replica, tpu_devices, device_assignment, - topology, tf_device_assignment, xla_device_assignment); -} - -Status DistributedTPURewritePass::GetComputationForTPUReplicateOp( - const NameAttrList& function, FunctionLibraryRuntime* flr, - Graph* computation, DataTypeVector* arg_types, - DataTypeVector* retval_types) { - FunctionLibraryRuntime::Handle handle; - - TF_RETURN_IF_ERROR( - flr->Instantiate(function.name(), AttrSlice(&function.attr()), &handle)); - - const FunctionBody* fbody = flr->GetFunctionBody(handle); - - CopyGraph(*fbody->graph, computation); - *arg_types = fbody->arg_types; - *retval_types = fbody->ret_types; - return Status::OK(); -} - -// Grab the InferredShape corresponding to an edge input. -static Status GetEdgeShape(const GraphShapeInfo& shape_info, const Edge& edge, - const InferredShape** info) { - auto it = shape_info.find(edge.src()->name()); - if (it == shape_info.end()) { - return errors::InvalidArgument( - "Input to replicated TPU computation is missing InferredShape: ", - edge.src()->name()); - } - TF_RET_CHECK(it->second.size() > edge.src_output()); - *info = &it->second[edge.src_output()]; - return Status::OK(); -} - -Status DistributedTPURewritePass::GetArgAndRetvalShapes( - const GraphShapeInfo& shape_info, const Node& node, - const ParameterInfo& params_info, std::vector* arg_shapes, - std::vector* retval_shapes) { - std::vector input_edges; - TF_RETURN_IF_ERROR(node.input_edges(&input_edges)); - - // If any replica's arg shape is unknown, we will mark the computation's arg - // shape as being unknown. If the shapes differ the TpuExecute Op will raise a - // runtime error. - std::vector any_replica_shape_unknown( - params_info.NumInputsToEachReplica()); - arg_shapes->clear(); - arg_shapes->resize(params_info.NumInputsToEachReplica()); - TF_RET_CHECK(input_edges.size() == params_info.NumInputsFromHost()); - // Determines the shapes of the per-replica arguments and checks that all - // replicas have identical shapes. - int64 edge_pos = 0; - auto check_shape = [&](int input_index) -> Status { - const InferredShape* info; - TF_RETURN_IF_ERROR(GetEdgeShape(shape_info, *input_edges[edge_pos], &info)); - ++edge_pos; - - if ((info->handle_type == DT_INVALID && !info->shape.IsFullyDefined()) || - (info->handle_type != DT_INVALID && - !info->handle_shape.IsFullyDefined())) { - any_replica_shape_unknown[input_index] = true; - } - xla::StatusOr status = - MergeInferredShapes((*arg_shapes)[input_index], *info); - if (!status.ok()) { - return errors::InvalidArgument( - "Mismatched shapes for input ", input_index, ": ", - (*arg_shapes)[input_index].shape.DebugString(), " vs. ", - info->shape.DebugString()); - } - (*arg_shapes)[input_index] = status.ValueOrDie(); - return Status::OK(); - }; - - for (int64 i = 0; i < params_info.NumReplicas(); ++i) { - for (int64 j = 0; j < params_info.NumPerReplicaArgs(); ++j) { - TF_RETURN_IF_ERROR(check_shape(j)); - } - } - - for (int64 i = 0; i < params_info.NumDistributedArgs(); ++i) { - TF_RETURN_IF_ERROR(check_shape(params_info.NumPerReplicaArgs() + i)); - } - - for (int64 i = 0; - i < params_info.NumPerReplicaArgs() + params_info.NumDistributedArgs(); - ++i) { - if (any_replica_shape_unknown[i]) { - (*arg_shapes)[i].shape = PartialTensorShape(); - (*arg_shapes)[i].handle_shape = PartialTensorShape(); - } - } - - // Determines the shape of the broadcast arguments. - for (int64 i = 0; i < params_info.NumBroadcastArgs(); ++i) { - TF_RET_CHECK(node.input_type(edge_pos) != DT_RESOURCE); - const InferredShape* info; - TF_RETURN_IF_ERROR(GetEdgeShape(shape_info, *input_edges[edge_pos], &info)); - (*arg_shapes)[i + params_info.NumPerReplicaArgs() + - params_info.NumDistributedArgs()] - .shape = info->shape; - ++edge_pos; - } - - // Determines the handle shape and handle type of the resource variable - // arguments. - for (int64 i = 0; i < params_info.NumVariables(); ++i) { - TF_RET_CHECK(node.input_type(edge_pos) == DT_RESOURCE); - const InferredShape* info; - TF_RETURN_IF_ERROR(GetEdgeShape(shape_info, *input_edges[edge_pos], &info)); - InferredShape& arg_shape = - (*arg_shapes)[i + params_info.NumPerReplicaArgs() + - params_info.NumDistributedArgs() + - params_info.NumBroadcastArgs()]; - arg_shape.shape = TensorShape(); // Variables are always scalars. - arg_shape.handle_shape = info->handle_shape; - arg_shape.handle_type = info->handle_type; - TF_RET_CHECK(arg_shape.handle_type != DT_INVALID); - ++edge_pos; - } - - // Determines the shape of the guaranteed constants. - // TODO(vinuraja): Can be removed because they are not required for any - // calculations. Leaving them here for symmetry with other structures like - // arg_types, arg_sharding, etc. - for (int64 i = 0; i < params_info.NumGuaranteedConstants(); ++i) { - TF_RET_CHECK(node.input_type(edge_pos) != DT_RESOURCE); - const InferredShape* info; - TF_RETURN_IF_ERROR(GetEdgeShape(shape_info, *input_edges[edge_pos], &info)); - (*arg_shapes)[i + params_info.NumPerReplicaArgs() + - params_info.NumDistributedArgs() + - params_info.NumBroadcastArgs() + params_info.NumVariables()] - .shape = info->shape; - ++edge_pos; - } - - // Extract the return value shapes. - auto it = shape_info.find(node.name()); - retval_shapes->clear(); - if (it != shape_info.end()) { - TF_RET_CHECK(it->second.size() >= node.num_outputs()); - retval_shapes->resize(node.num_outputs()); - for (int i = 0; i < node.num_outputs(); ++i) { - (*retval_shapes)[i].shape = it->second[i].shape; - } - } else if (node.num_outputs() > 0) { - return errors::InvalidArgument( - "Replicated TPU computation is missing InferredShape: ", - FormatNodeForError(node)); - } - return Status::OK(); -} - -// Verifies that all nodes have legal sharding. -static Status ValidateCoreNumbers(const Graph& graph, - int num_cores_per_replica) { - for (Node* n : graph.nodes()) { - TF_ASSIGN_OR_RETURN(absl::optional sharding, - ParseShardingFromDevice(*n, num_cores_per_replica)); - } - return Status::OK(); -} - -static Status InferXlaShardingFromNeighbors( - const Node& n, int num_cores_per_replica, FunctionLibraryRuntime* flr, - CachedFunctionHandles* cached_function_handles, - absl::optional* output_sharding, bool* is_fast_mem) { - int64 core = -1; - absl::optional result; - // We assume the variable has been allocated on fast memory if any consuming - // op has TPU_FAST_MEM_ATTR attribute. This is a protocol between runtime and - // compiler. - *is_fast_mem = false; - for (const Edge* edge : n.out_edges()) { - if (edge->IsControlEdge()) continue; - - TF_RETURN_IF_ERROR(ParseAndValidateShardingFromNeighbors( - num_cores_per_replica, n.name(), *edge->dst(), &core, is_fast_mem, - &result)); - - if (!flr) continue; - - // The nodes deciding this arg's device assignment might be in - // FunctionDef. Instantiate FunctionDefs associated with this node - // and check nodes using this arg. - std::function parse_sharding_from_function = - [&](const Edge* call_edge) { - auto associated_functions = GetAssociatedFunctions( - *call_edge->dst(), flr->GetFunctionLibraryDefinition()); - for (auto& associated_function : associated_functions) { - FunctionLibraryRuntime::Handle handle; - TF_RETURN_IF_ERROR(cached_function_handles->GetOrInstantiate( - associated_function.func_name(), - AttrSlice(&associated_function.attrs()), &handle)); - const FunctionBody* body = flr->GetFunctionBody(handle); - Graph* g = body->graph; - - for (Node* body_node : g->nodes()) { - if (!body_node->IsArg()) continue; - - int index; - TF_RETURN_IF_ERROR( - GetNodeAttr(body_node->attrs(), "index", &index)); - if (index != call_edge->dst_input()) continue; - - for (const Edge* out_edge : body_node->out_edges()) { - if (out_edge->IsControlEdge()) continue; - - TF_RETURN_IF_ERROR(ParseAndValidateShardingFromNeighbors( - num_cores_per_replica, n.name(), *out_edge->dst(), &core, - is_fast_mem, &result)); - - TF_RETURN_IF_ERROR(parse_sharding_from_function(out_edge)); - } - } - } - return Status::OK(); - }; - TF_RETURN_IF_ERROR(parse_sharding_from_function(edge)); - } - *output_sharding = result; - return Status::OK(); -} - -bool UseSpmdForXlaPartitioning(const Node* replicate_node) { - bool spmd_attr; - if (!replicate_node || - !TryGetNodeAttr(replicate_node->attrs(), "use_spmd_for_xla_partitioning", - &spmd_attr)) { - spmd_attr = false; - } - return spmd_attr; -} - -Status DistributedTPURewritePass::AssignArgsAndRetvalsToCores( - int num_cores_per_replica, const ParameterInfo& params_info, - const DataTypeVector& arg_types, - const std::vector& arg_shapes, - const DataTypeVector& retval_types, - const std::vector& retval_shapes, const Graph& graph, - const Node* replicate_node, FunctionLibraryRuntime* flr, - std::vector* arg_sharding, std::vector* arg_fast_mem, - std::vector* retval_sharding) { - // Builds vectors of the argument and return nodes. - std::vector args(arg_types.size()); - std::vector retvals(retval_types.size()); - absl::flat_hash_map partitioned_output_nodes; - for (Node* node : graph.op_nodes()) { - if (node->IsArg()) { - int index; - TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "index", &index)); - TF_RET_CHECK(index >= 0 && index < args.size()); - args[index] = node; - } else if (node->IsRetval()) { - int index; - TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "index", &index)); - TF_RET_CHECK(index >= 0 && index < retvals.size()); - retvals[index] = node; - } - } - for (const Edge* edge : replicate_node->out_edges()) { - int num_partitioned_outputs = 0; - for (const Edge* out_edge : edge->dst()->out_edges()) { - if (out_edge->dst()->type_string() == kTPUPartitionedOutput) { - partitioned_output_nodes[edge->src_output()] = out_edge->dst(); - num_partitioned_outputs++; - } - } - if (num_partitioned_outputs > 1) { - return errors::InvalidArgument( - "More than one TPUPartitionedOutput per replciated output."); - } - } - - // Verifies there are no missing arguments/return values. - for (int i = 0; i < args.size(); ++i) { - if (args[i] == nullptr) { - return errors::Internal("Missing function argument: ", i); - } - } - for (int i = 0; i < retvals.size(); ++i) { - if (retvals[i] == nullptr) { - return errors::Internal("Missing function return value: ", i); - } - } - - // Assigns a core to each _Arg. Chooses the lowest-numbered core that - // consumes the argument. We choose the lowest-numbered core so the - // assignment is deterministic. - TensorDevicePlacer args_device_selector(num_cores_per_replica, arg_types, - arg_shapes); - arg_sharding->resize(args.size()); - arg_fast_mem->resize(args.size()); - CachedFunctionHandles cached_function_handles(flr); - const bool use_spmd = UseSpmdForXlaPartitioning(replicate_node) || - replicate_inputs_outputs_by_default_for_xla_spmd_; - for (int i = 0; i < args.size(); ++i) { - const Node* n = args[i]; - absl::optional assigned_core; - absl::optional sharding; - bool is_fast_mem; - TF_RETURN_IF_ERROR(InferXlaShardingFromNeighbors( - *n, num_cores_per_replica, flr, &cached_function_handles, &sharding, - &is_fast_mem)); - - if (params_info.IsPerReplicaArg(i) || params_info.IsDistributedArg(i)) { - Node* input_node; - TF_RETURN_IF_ERROR(replicate_node->input_node(i, &input_node)); - if (input_node->type_string() == kTPUPartitionedInput) { - TF_ASSIGN_OR_RETURN(absl::optional parsed_sharding, - GetShardingFromNodeDef(input_node->def())); - if (!parsed_sharding.has_value()) - return errors::InvalidArgument("Missing _XlaSharding attr from: ", - input_node->DebugString()); - sharding = parsed_sharding; - VLOG(1) << "Arg " << i << " parsed sharding information from " - << input_node->name() << " : " - << parsed_sharding->DebugString(); - } - } - - if (sharding.has_value() && enable_automatic_model_parallelism_) { - return tensorflow::errors::InvalidArgument( - "Specifying manual sharding is not allowed when automatic " - "model parallelism is enabled.", - sharding->DebugString()); - } - - if (!sharding.has_value()) { - if (use_spmd && - (params_info.IsVariableArg(i) || params_info.IsBroadcastArg(i) || - ((params_info.IsPerReplicaArg(i) || - params_info.IsDistributedArg(i)) && - arg_types[i] != DT_RESOURCE))) { - // Use replication for host variables or non-variable per-replica - // inputs. - sharding = xla::sharding_builder::Replicate(); - } else { - // TODO(dlibenzi): Distributing variables to cores other than 0 makes - // learning/brain/research/babelfish/trainer:trainer_tpu_test fail. - // For now distribute only per replica arguments, unless - // tf_jf_distribute_vars is set, to allow debugging the issue. - if (((params_info.IsPerReplicaArg(i) || - params_info.IsDistributedArg(i)) && - arg_types[i] != DT_RESOURCE) || - (distribute_vars_ && params_info.IsVariableArg(i))) { - assigned_core = args_device_selector.RetrieveAssignment(i); - } else { - assigned_core = 0; - } - sharding = xla::sharding_builder::AssignDevice(*assigned_core); - } - } else if (sharding->type() == xla::OpSharding::MAXIMAL) { - assigned_core = sharding->tile_assignment_devices(0); - } else if (sharding->type() != xla::OpSharding::REPLICATED && - sharding->type() != xla::OpSharding::OTHER) { - return tensorflow::errors::InvalidArgument( - "Unsupported argument sharding: ", sharding->DebugString()); - } - if (assigned_core.has_value()) { - args_device_selector.ReportDeviceAssigned(*assigned_core, i); - VLOG(3) << "Assigning argument " << i << " (" << n->DebugString() - << ") to core " << *assigned_core; - args[i]->set_assigned_device_name(CoreDeviceLabel(*assigned_core)); - } else if (sharding->type() == xla::OpSharding::OTHER) { - for (int64 core : sharding->tile_assignment_devices()) { - args_device_selector.ReportDeviceAssigned(core, i); - VLOG(3) << "Assigning argument " << i << " (" << n->DebugString() - << ") with tiled sharding to core " << core; - } - } else { - CHECK_EQ(sharding->type(), xla::OpSharding::REPLICATED); - for (int64 core = 0; core < num_cores_per_replica; ++core) { - args_device_selector.ReportDeviceAssigned(core, i); - } - VLOG(3) << "Assigning argument " << i << " (" << n->DebugString() - << ") to all cores"; - } - (*arg_sharding)[i] = *sharding; - (*arg_fast_mem)[i] = is_fast_mem; - if (is_fast_mem) { - VLOG(3) << "Add " << TPU_FAST_MEM_ATTR << " attribute to " - << args[i]->name(); - } - args[i]->AddAttr(kShardingAttribute, sharding->SerializeAsString()); - } - TF_RETURN_IF_ERROR(cached_function_handles.ReleaseAllHandles()); - - // Assigns each _Retval node to the core that produces its value. - TensorDevicePlacer retvals_device_selector(num_cores_per_replica, - retval_types, retval_shapes); - retval_sharding->resize(retvals.size()); - for (int i = 0; i < retvals.size(); ++i) { - const Edge* edge; - TF_RETURN_IF_ERROR(retvals[i]->input_edge(0, &edge)); - - TF_ASSIGN_OR_RETURN( - absl::optional sharding, - ParseShardingFromDevice(*edge->src(), num_cores_per_replica)); - - if (partitioned_output_nodes.contains(i)) { - Node* output_node = partitioned_output_nodes[i]; - TF_ASSIGN_OR_RETURN(absl::optional parsed_sharding, - GetShardingFromNodeDef(output_node->def())); - if (parsed_sharding.has_value()) { - sharding = parsed_sharding; - VLOG(1) << "Retval " << i << " parsed sharding information from " - << output_node->name() << " : " << sharding->DebugString(); - } - } - absl::optional assigned_core; - if (sharding.has_value()) { - if (enable_automatic_model_parallelism_) { - return tensorflow::errors::InvalidArgument( - "Specifying manual sharding is not allowed when automatic " - "model parallelism is enabled.", - sharding->DebugString()); - } - - if (sharding.value().type() == xla::OpSharding::MAXIMAL) { - assigned_core = sharding.value().tile_assignment_devices(0); - TF_RETURN_IF_ERROR( - ValidateCoreNumber(*assigned_core, num_cores_per_replica)); - } else if (sharding.value().type() != xla::OpSharding::REPLICATED && - sharding.value().type() != xla::OpSharding::OTHER) { - return tensorflow::errors::InvalidArgument( - "Unsupported argument sharding: ", sharding->DebugString()); - } - } else { - if (use_spmd) { - sharding = xla::sharding_builder::Replicate(); - } else { - if (distribute_vars_) { - assigned_core = retvals_device_selector.RetrieveAssignment(i); - } else { - assigned_core = 0; - } - sharding = xla::sharding_builder::AssignDevice(*assigned_core); - } - } - if (assigned_core.has_value()) { - retvals[i]->set_assigned_device_name(CoreDeviceLabel(*assigned_core)); - retvals_device_selector.ReportDeviceAssigned(*assigned_core, i); - VLOG(3) << "Assigning return value " << i << " (" - << retvals[i]->DebugString() << ") to core " << *assigned_core; - } else if (sharding->type() == xla::OpSharding::OTHER) { - for (int64 core : sharding->tile_assignment_devices()) { - retvals_device_selector.ReportDeviceAssigned(core, i); - VLOG(3) << "Assigning return value " << i << " (" - << retvals[i]->DebugString() << ") with tiled sharding to core " - << core; - } - } else { - CHECK_EQ(sharding->type(), xla::OpSharding::REPLICATED); - for (int64 core = 0; core < num_cores_per_replica; ++core) { - retvals_device_selector.ReportDeviceAssigned(core, i); - } - VLOG(3) << "Assigning return value " << i << " (" - << retvals[i]->DebugString() << ") to all cores."; - } - retvals[i]->AddAttr(kShardingAttribute, sharding->SerializeAsString()); - (*retval_sharding)[i] = *sharding; - } - return Status::OK(); -} - -// Builds Shape nodes that compute the shapes of arguments whose shapes are not -// statically known. -/* static */ Status DistributedTPURewritePass::BuildDynamicShapeNodes( - const Node& replicate_node, const std::vector& arg_shapes, - const ParameterInfo& params_info, const std::vector& variable_reads, - Graph* graph, std::vector* dynamic_shape_nodes) { - dynamic_shape_nodes->clear(); - - std::vector replicate_input_edges; - TF_RETURN_IF_ERROR(replicate_node.input_edges(&replicate_input_edges)); - - // The compiler determines the shape of each constant by inspecting the value - // of its corresponding host-memory tensor; this happens when a step is run. - // As a result, the shapes of constants are not needed at graph rewrite time. - const int num_args = arg_shapes.size() - params_info.NumGuaranteedConstants(); - TF_RET_CHECK(num_args == params_info.NumPerReplicaArgs() + - params_info.NumDistributedArgs() + - params_info.NumBroadcastArgs() + - params_info.NumVariables()); - - for (int i = 0; i < num_args; ++i) { - const PartialTensorShape* shape = arg_shapes[i].handle_type == DT_INVALID - ? &arg_shapes[i].shape - : &arg_shapes[i].handle_shape; - if (!shape->IsFullyDefined()) { - Node* src; - int src_output; - if (params_info.IsPerReplicaArg(i)) { - TF_RET_CHECK(i < replicate_input_edges.size()); - // All replicas must have the same input shapes. Uses the shape of the - // inputs from the first replica. - src = replicate_input_edges[i]->src(); - src_output = replicate_input_edges[i]->src_output(); - } else if (params_info.IsDistributedArg(i) || - params_info.IsBroadcastArg(i)) { - int64 input_num = - params_info.NumPerReplicaArgs() * params_info.NumReplicas() + i - - params_info.NumPerReplicaArgs(); - TF_RET_CHECK(0 <= input_num && - input_num < replicate_input_edges.size()); - src = replicate_input_edges[input_num]->src(); - src_output = replicate_input_edges[input_num]->src_output(); - } else { - int64 var_num = i - params_info.NumPerReplicaArgs() - - params_info.NumDistributedArgs() - - params_info.NumBroadcastArgs(); - TF_RET_CHECK(0 <= var_num && var_num < variable_reads.size()); - src = variable_reads[var_num]; - src_output = 0; - } - - NodeDef def; - def.set_name(graph->NewName(strings::StrCat(src->name(), "/shape"))); - def.set_op("Shape"); - def.set_device(src->assigned_device_name()); - AddNodeAttr("T", src->output_type(src_output), &def); - AddNodeAttr("out_type", DT_INT64, &def); - MergeDebugInfo(NodeDebugInfo(replicate_node.def()), &def); - - Status status; - Node* shape_node = graph->AddNode(def, &status); - if (!status.ok()) return status; - dynamic_shape_nodes->push_back(shape_node); - - shape_node->set_assigned_device_name(src->assigned_device_name()); - graph->AddEdge(src, src_output, shape_node, 0); - } - } - return Status::OK(); -} - -// Builds a TPUCompile node that compiles the bodies of the function call -// `nodes`. -Status DistributedTPURewritePass::BuildCompileNode( - const Node* replicate_node, const NameAttrList& function, - uint64 library_fingerprint, const ParameterInfo& params_info, - const std::vector& arg_shapes, - const DataTypeVector& arg_types, - const std::vector& guaranteed_constant_nodes, - const string& session_handle, - const std::vector& arg_sharding, - const std::vector& arg_fast_mem, - const std::vector& retval_sharding, - int num_cores_per_replica, const string& compile_device, - const xla::DeviceAssignment* xla_device_assignment, - const std::vector& dynamic_shape_nodes, Graph* graph, - Node** compile_node, int64 autotuner_thresh) { - VLOG(1) << "BuildCompileNode"; - - tpu::TPUCompileMetadataProto proto; - proto.set_num_replicas(params_info.NumReplicas()); - proto.set_num_cores_per_replica(num_cores_per_replica); - proto.set_function_library_fingerprint(library_fingerprint); - proto.set_enable_automatic_model_parallelism( - enable_cross_replica_sharding_mirrored_variables_); - const bool use_spmd = UseSpmdForXlaPartitioning(replicate_node); - proto.set_use_spmd_for_xla_partitioning(use_spmd); - - // Get and fill padding map. - if (replicate_node != nullptr) { - TF_RETURN_IF_ERROR( - FillPaddingMap(*replicate_node, proto.mutable_padding_maps())); - xla::DebugOptions::StepMarkerLocation location; - TF_RETURN_IF_ERROR(GetStepMarkerLocation(*replicate_node, &location)); - proto.set_step_marker_location(location); - } - - if (xla_device_assignment != nullptr) { - TF_RETURN_IF_ERROR( - xla_device_assignment->Serialize(proto.mutable_device_assignment())); - } - - const int num_args = arg_types.size(); - const int num_guaranteed_constants = guaranteed_constant_nodes.size(); - const int guaranteed_const_start_index = num_args - num_guaranteed_constants; - TF_RET_CHECK(num_args == arg_shapes.size()); - TF_RET_CHECK(num_args == arg_sharding.size()) - << num_args << " != " << arg_sharding.size(); - - for (int i = 0; i < num_args; ++i) { - tpu::TPUCompileMetadataProto::Arg* arg = proto.add_args(); - DataType type = arg_types[i]; - const InferredShape& arg_shape = arg_shapes[i]; - if (type == DT_RESOURCE) { - TF_RET_CHECK(arg_shape.handle_type != DT_INVALID) << i; - arg->set_dtype(arg_shape.handle_type); - arg_shape.handle_shape.AsProto(arg->mutable_shape()); - arg->set_kind(tpu::TPUCompileMetadataProto::Arg::VARIABLE); - arg->set_fast_mem(arg_fast_mem[i]); - } else { - arg->set_dtype(type); - arg_shape.shape.AsProto(arg->mutable_shape()); - if (i >= guaranteed_const_start_index) { - const DataType edge_type = - guaranteed_constant_nodes[i - guaranteed_const_start_index] - ->output_type(0); - TF_RET_CHECK(type == edge_type) - << "Arg type: " << type << " but edge type: " << edge_type; - arg->set_kind(tpu::TPUCompileMetadataProto::Arg::GUARANTEED_CONSTANT); - } else { - arg->set_kind(tpu::TPUCompileMetadataProto::Arg::PARAMETER); - } - } - // As long as the argument is not a per-replica one, it should have the same - // value for all replicas. For clarity, we keep the (redundant) checks for - // variable, broadcast and constant types, to prevent bugs in case new types - // with different semantics are introduced in the future. - arg->set_is_same_data_across_replicas( - !params_info.IsPerReplicaArg(i) && !params_info.IsDistributedArg(i) && - (params_info.IsVariableArg(i) || params_info.IsBroadcastArg(i) || - params_info.IsConstantArg(i))); - if (params_info.mirrored_variable_indices().count(i) > 0) { - CHECK_EQ(type, DT_RESOURCE); - arg->set_is_same_data_across_replicas(true); - // 64-bit type is not shardable by XLA:TPU yet. - bool sharding_enabled = (arg_shape.handle_type != DT_COMPLEX64 && - arg_shape.handle_type != DT_INT64 && - arg_shape.handle_type != DT_UINT64 && - arg_shape.handle_type != DT_DOUBLE); - arg->set_enable_xla_sharding( - sharding_enabled ? tpu::TPUCompileMetadataProto::Arg::TENTATIVE - : tpu::TPUCompileMetadataProto::Arg::DISALLOWED); - } - *arg->mutable_sharding() = arg_sharding[i]; - } - - const int num_retvals = retval_sharding.size(); - for (int i = 0; i < num_retvals; ++i) { - *proto.add_retvals()->mutable_sharding() = retval_sharding[i]; - } - proto.set_session_handle(session_handle); - - DataTypeVector constant_arg_types; - constant_arg_types.reserve(num_guaranteed_constants); - for (int i = 0; i < num_guaranteed_constants; ++i) { - constant_arg_types.push_back(arg_types[guaranteed_const_start_index + i]); - } - proto.set_xla_fusion_autotuner_thresh(autotuner_thresh); - - string metadata; - proto.SerializeToString(&metadata); - - NodeDef def; - def.set_name(UniqueNodeName("TPUReplicate/_compile", graph)); - def.set_op("TPUCompile"); - def.set_device(compile_device); - if (replicate_node) { - MergeDebugInfo(NodeDebugInfo(replicate_node->def()), &def); - } - - AddNodeAttr("function", function, &def); - AddNodeAttr("num_computations", num_cores_per_replica, &def); - AddNodeAttr("NumDynamicShapes", static_cast(dynamic_shape_nodes.size()), - &def); - AddNodeAttr("metadata", metadata, &def); - AddNodeAttr("Tguaranteed_constants", constant_arg_types, &def); - - Status status; - *compile_node = graph->AddNode(def, &status); - TF_RETURN_IF_ERROR(status); - - (*compile_node)->set_assigned_device_name(compile_device); - - for (int i = 0; i < dynamic_shape_nodes.size(); ++i) { - graph->AddEdge(dynamic_shape_nodes[i], 0, *compile_node, i); - } - - for (int i = 0; i < num_guaranteed_constants; ++i) { - graph->AddEdge(guaranteed_constant_nodes[i], 0, *compile_node, - dynamic_shape_nodes.size() + i); - } - VLOG(1) << "BuildCompileNode(): " << status; - return status; -} - -Status DistributedTPURewritePass::FindGuaranteedConstantInputs( - const Node& node, const NameRangeMap& input_range_map, - std::vector* guaranteed_constants) { - std::vector input_edges; - TF_RETURN_IF_ERROR(node.input_edges(&input_edges)); - std::pair variables_limits = - input_range_map.at("guaranteed_constants"); - for (int i = variables_limits.first; i < variables_limits.second; ++i) { - guaranteed_constants->push_back(input_edges[i]->src()); - } - return Status::OK(); -} - -Status DistributedTPURewritePass::FindVariableInputs( - const Node& node, const NameRangeMap& input_range_map, - std::vector* variables) { - std::vector input_edges; - TF_RETURN_IF_ERROR(node.input_edges(&input_edges)); - std::pair variables_limits = input_range_map.at("variables"); - for (int i = variables_limits.first; i < variables_limits.second; ++i) { - Node* node = input_edges[i]->src(); - - // Find the type of the VarHandleOp that feeds this node, looking through - // any wrapping Enter or Switch nodes. - while (node->IsEnter() || node->IsSwitch()) { - TF_RETURN_IF_ERROR(node->input_node(0, &node)); - } - // Fix the variable device assignment if it is requested with a full name. - if (!node->has_assigned_device_name() && - !node->requested_device().empty()) { - DeviceNameUtils::ParsedName var_device; - TF_RET_CHECK(DeviceNameUtils::ParseFullName(node->requested_device(), - &var_device)); - if (var_device.has_job && var_device.has_replica && var_device.has_task && - var_device.has_type && var_device.has_id) { - node->set_assigned_device_name(node->requested_device()); - if (node != input_edges[i]->src() && - !input_edges[i]->src()->has_assigned_device_name()) { - input_edges[i]->src()->set_assigned_device_name( - node->requested_device()); - } - } - } - if (node->type_string() == "VarHandleOp") { - DataType dtype; - TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "dtype", &dtype)); - variables->push_back(VariableInput{input_edges[i]->src(), - input_edges[i]->src_output(), dtype}); - } else if (node->type_string() == "_Arg") { - std::vector dtypes; - TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "_handle_dtypes", &dtypes)); - if (dtypes.empty()) { - return errors::Internal( - "_Arg node with resource output must have non-empty _handle_dtypes " - "attribute: ", - node->DebugString()); - } - variables->push_back(VariableInput{ - input_edges[i]->src(), input_edges[i]->src_output(), dtypes[0]}); - } else { - return errors::Internal( - "Cannot handle variable input with node type other than VarHandleOp " - "and _Arg: ", - node->DebugString()); - } - } - return Status::OK(); -} - -// Builds a NoOp node, used for building control dependencies. -static Status BuildNoopNode(const Node& source, StringPiece name, - const string& device, Graph* graph, Node** node) { - NodeDefBuilder builder(name, "NoOp", NodeDebugInfo(source)); - if (!device.empty()) { - builder.Device(device); - } - NodeDef def; - TF_RETURN_IF_ERROR(builder.Finalize(&def)); - - Status status; - *node = graph->AddNode(def, &status); - if (!device.empty()) { - (*node)->set_assigned_device_name(device); - } - return status; -} - -Status DistributedTPURewritePass::ConnectHostComputeNodes( - Node* compile_node, Node* key_placeholder_node, Graph* graph) { - // First find all the downstream nodes of the key placeholder node, since we - // want to delete the connecting edges from key_placeholder_node which would - // invalidate the out_nodes iterator. - std::vector host_transfer_nodes; - for (Node* node : key_placeholder_node->out_nodes()) { - host_transfer_nodes.push_back(node); - } - for (Node* node : host_transfer_nodes) { - int input_index = -1; - for (int i = 0; i < node->num_inputs(); i++) { - const Edge* e; - TF_RETURN_IF_ERROR(node->input_edge(i, &e)); - if (e->src() == key_placeholder_node) { - if (input_index != -1) { - return errors::Internal( - "Node ", node->name(), - " has multiple input edges from key placeholder node"); - } - input_index = e->dst_input(); - } - } - if (input_index == -1) { - return errors::Internal("Node ", node->name(), - " has no input edge from key placeholder node"); - } - const Edge* key_edge; - TF_RETURN_IF_ERROR(node->input_edge(input_index, &key_edge)); - graph->RemoveEdge(key_edge); - graph->AddEdge(compile_node, 1, node, input_index); - } - graph->RemoveNode(key_placeholder_node); - return Status::OK(); -} - -Status DistributedTPURewritePass::BuildVariableReads( - absl::Span variables, Node* control_predecessor, - Graph* graph, std::vector* variable_reads) { - variable_reads->resize(variables.size()); - for (int i = 0; i < variables.size(); ++i) { - string name = - graph->NewName(strings::StrCat(variables[i].node->name(), "/read")); - NodeDefBuilder builder(name, "ReadVariableOp", - NodeDebugInfo(*variables[i].node)); - - builder.Attr("dtype", variables[i].dtype); - builder.Device(variables[i].node->assigned_device_name()); - builder.Input(variables[i].node->name(), 0, DT_RESOURCE); - NodeDef def; - TF_RETURN_IF_ERROR(builder.Finalize(&def)); - - Status status; - Node* read_node; - (*variable_reads)[i] = read_node = graph->AddNode(def, &status); - if (!status.ok()) return status; - - read_node->set_requested_device(variables[i].node->requested_device()); - read_node->set_assigned_device_name( - variables[i].node->assigned_device_name()); - graph->AddEdge(variables[i].node, variables[i].index, read_node, 0); - - graph->AddControlEdge(control_predecessor, read_node); - } - return Status::OK(); -} - -bool DistributedTPURewritePass::ContainsResourceWriteOp( - const Graph& graph, const FunctionLibraryDefinition& fld) { - for (const Node* n : graph.nodes()) { - const XlaResourceOpInfo* op_info = GetResourceOpInfoForOp(n->type_string()); - if (op_info && op_info->kind() != XlaResourceOpKind::kRead) { - VLOG(2) << "Found write resource op inside computation"; - return true; - } - } - for (const string& func_name : fld.ListFunctionNames()) { - const FunctionDef* func_def = fld.Find(func_name); - for (const NodeDef& n : func_def->node_def()) { - const XlaResourceOpInfo* op_info = GetResourceOpInfoForOp(n.op()); - if (op_info && op_info->kind() != XlaResourceOpKind::kRead) { - VLOG(2) << "Found write resource op inside " << func_name; - return true; - } - } - } - return false; -} - -Status DistributedTPURewritePass::BuildVariableWrites( - absl::Span variables, Node* control_successor, - absl::Span variable_writes, Graph* graph) { - CHECK_EQ(variables.size(), variable_writes.size()); - for (int i = 0; i < variables.size(); ++i) { - const VariableWrite& write = variable_writes[i]; - NodeDebugInfo debug_info(*variables[i].node); - - auto name = [&](string suffix) { - return graph->NewName( - strings::StrCat(variables[i].node->name(), "/", suffix)); - }; - - Node* write_node; - TF_RETURN_IF_ERROR( - IncompleteNodeDefBuilder(name("assign"), "AssignVariableOp", debug_info) - .AddAttr("dtype", variables[i].dtype) - .Device(variables[i].node->assigned_device_name()) - .Build(graph, &write_node)); - - // Colocate the control flow with the variable. - CondBuilder cb(variables[i].node->name(), - variables[i].node->assigned_device_name(), debug_info, - graph); - - // Inputs to conditional. - Node* switch_val; - TF_RETURN_IF_ERROR( - cb.AddInput("switch_val", variables[i].dtype, - /*device=*/write.value->assigned_device_name(), debug_info, - &switch_val)); - Node* switch_var; - TF_RETURN_IF_ERROR( - cb.AddInput("switch_var", DT_RESOURCE, - /*device=*/variables[i].node->assigned_device_name(), - debug_info, &switch_var)); - // Conditionally write the value back. - graph->AddEdge(variables[i].node, variables[i].index, switch_var, 0); - graph->AddEdge(switch_var, CondBuilder::kThenBranch, write_node, 0); - graph->AddEdge(switch_val, CondBuilder::kThenBranch, write_node, 1); - // Add control edge from the write to value that will be merged. There is no - // output from the write so this control edge ensures the write completes. - graph->AddControlEdge(write_node, cb.switch_t()); - - graph->AddControlEdge(cb.control_successor(), control_successor); - - graph->AddEdge(write.predicate, write.predicate_output, cb.pred(), 0); - graph->AddEdge(write.value, write.value_output, switch_val, 0); - } - return Status::OK(); -} - -namespace { - -// Helper that creates an IdentityN node containing all of the variables -// values on CPU device 'device', except for those that will be split across -// cores. (For split variables, this may cause additional cross-host data -// transfers if more than 1 devices share the same variable partition on a -// remote host.) -// -// A previous iteration of this code built one Identity node per TPU core per -// variable, but this can rapidly become hundreds of thousands of nodes. This -// formulation creates a single IdentityN node containing all of the variables -// on each host. This may cause some unnecessary variable copies if only a -// subset of hosts consume a given variable, but has the virtue of being -// simple, and most models use pure replication where all cores want all the -// variables. -// -// Returns the node and its output index to be consumed by TPUExecute for the -// requested variable index. -xla::StatusOr CreateOrGetPerHostVariableCopy( - const string& host_cpu_device, int64 var_index, - const std::vector& variable_reads, - const DistributedTPURewritePass::ParameterInfo& params_info, - const std::vector& arg_shardings, - const Node& replicate_node, - absl::flat_hash_map>* per_host_var_copies, - Graph* graph) { - auto it = per_host_var_copies->find(host_cpu_device); - if (it != per_host_var_copies->end()) { - return it->second[var_index]; - } - - DataTypeVector dtypes; - // Per-variable data source for TPUExecute. - std::vector index_mapping; - index_mapping.reserve(variable_reads.size()); - dtypes.reserve(variable_reads.size()); - for (int64 i = 0; i < variable_reads.size(); ++i) { - Node* read = variable_reads[i]; - int64 orig_arg_num = - i + params_info.NumPerReplicaArgs() + params_info.NumBroadcastArgs(); - if (arg_shardings[orig_arg_num].type() != xla::OpSharding::OTHER) { - // We haven't built the IdentityN node yet, so temporarily use nullptr. - index_mapping.push_back( - NodeOut{nullptr, static_cast(dtypes.size())}); - dtypes.push_back(read->output_type(0)); - } else { - // Do not copy the full tensor of partitioned variables. - index_mapping.push_back(NodeOut{read, 0}); - } - } - NodeDef ndef; - ndef.set_name( - graph->NewName(absl::StrCat(replicate_node.name(), "/_variable_copy"))); - ndef.set_op("IdentityN"); - ndef.set_device(host_cpu_device); - AddNodeAttr("T", dtypes, &ndef); - Status s; - Node* id_node = graph->AddNode(ndef, &s); - TF_RETURN_IF_ERROR(s); - id_node->set_assigned_device_name(host_cpu_device); - - for (int64 i = 0; i < variable_reads.size(); ++i) { - if (index_mapping[i].node == nullptr) { - // Fill index_mapping with the actual IdentityN node. - index_mapping[i].node = id_node; - // Add the edge to id_node. - graph->AddEdge(variable_reads[i], 0, id_node, index_mapping[i].index); - } - } - - auto result = index_mapping[var_index]; - (*per_host_var_copies)[host_cpu_device] = std::move(index_mapping); - return result; -} - -} // namespace - -Status DistributedTPURewritePass::BuildExecuteNodes( - const ParameterInfo& params_info, int num_tasks, int num_cores_per_replica, - const Node& replicate_node, const DataTypeVector& arg_types, - const std::vector& arg_shapes, - const DataTypeVector& retval_types, - const std::vector& arg_shardings, - const std::vector& retval_shardings, - const std::vector>& tpu_device_names, - Node* compile_node, const std::vector& variable_reads, - Node* control_predecessor, Node* control_successor, - std::vector* variable_writes, Graph* graph) { - VLOG(1) << "BuildExecuteNodes " << replicate_node.DebugString(); - TF_RET_CHECK(params_info.NumReplicas() == tpu_device_names.size()); - - const int num_variables = variable_reads.size(); - const int num_retvals_per_replica = retval_types.size(); - - variable_writes->resize(num_variables); - - std::vector replicate_input_edges; - TF_RETURN_IF_ERROR(replicate_node.input_edges(&replicate_input_edges)); - - // Map from replicate input index to the fan_in node; - absl::flat_hash_map> replicate_input_fan_in_nodes; - absl::flat_hash_map> replicate_output_fan_out_nodes; - absl::flat_hash_map> - replicate_output_fan_out_dst_inputs; - std::vector to_be_removed_nodes; - - for (const Edge* e : replicate_input_edges) { - if (e->src()->type_string() == kTPUPartitionedInput) { - int num_users = 0; - for (const auto& ue : e->src()->out_edges()) { - if (!ue->IsControlEdge()) ++num_users; - } - if (num_users != 1) { - return tensorflow::errors::InvalidArgument( - e->src()->name(), " must only have one user. Found ", num_users); - } - to_be_removed_nodes.push_back(e->src()); - std::vector& nodes = replicate_input_fan_in_nodes[e->dst_input()]; - nodes.resize(num_cores_per_replica, nullptr); - VLOG(2) << "allocate " << num_cores_per_replica - << " for replicate_input_fan_in_nodes[" << e->dst_input() << "]"; - std::vector fan_in_edges; - TF_RETURN_IF_ERROR(e->src()->input_edges(&fan_in_edges)); - TF_RET_CHECK(fan_in_edges.size() == num_cores_per_replica); - - for (const Edge* fe : fan_in_edges) { - nodes[fe->dst_input()] = fe->src(); - VLOG(2) << "replicate_input_fan_in_nodes[" << e->dst_input() << "][" - << fe->dst_input() << "] = " << fe->src()->name(); - } - } - } - - // Replicate output edges are sorted by replica id and then by outputs for - // each replica. For example, if TPU Computation has outputs (output_1, - // output_2, and output_3) and number of replicas is 2, then - // replicate_output_edges order would be: - // output_1_replica_1, output_2_replica_1, output_3_replica_1, - // output_1_replica_2, output_2_replica_2, output_3_replica_2. - std::vector replicate_output_edges(replicate_node.num_outputs(), - nullptr); - for (const Edge* edge : replicate_node.out_edges()) { - if (edge->IsControlEdge()) continue; - - int num_partitioned_outputs = 0; - - for (const Edge* out_edge : edge->dst()->out_edges()) { - if (out_edge->dst()->type_string() == kTPUPartitionedOutput) { - num_partitioned_outputs++; - // Paths between replicate_node and replicate_output_fan_out_nodes: - // ReplicateNode->TpuOutIdenity->kTPUPartitionedOutput->fan-out-nodes - TF_RET_CHECK(edge->dst()->out_edges().size() == 1); - to_be_removed_nodes.push_back(edge->dst()); - to_be_removed_nodes.push_back(out_edge->dst()); - // Get the right replicated id from the replicate_output_edge. - std::vector& nodes = - replicate_output_fan_out_nodes[edge->src_output()]; - std::vector& dst_inputs = - replicate_output_fan_out_dst_inputs[edge->src_output()]; - nodes.resize(num_cores_per_replica, nullptr); - dst_inputs.resize(num_cores_per_replica, 0); - TF_RET_CHECK(out_edge->dst()->out_edges().size() == - num_cores_per_replica); - - for (const Edge* fe : out_edge->dst()->out_edges()) { - nodes[fe->src_output()] = fe->dst(); - dst_inputs[fe->src_output()] = fe->dst_input(); - VLOG(2) << "replicate_output_fan_out_nodes[" << out_edge->src_output() - << "][" << fe->src_output() - << "] = " << fe->dst()->DebugString() << " with dst_input " - << fe->dst_input(); - } - } - } - replicate_output_edges[edge->src_output()] = edge; - if (num_partitioned_outputs > 1) { - return errors::InvalidArgument( - "More than one TPUPartitionedOutput per replciated output."); - } - } - - const int num_execute_args = - arg_shardings.size() - params_info.NumGuaranteedConstants(); - // Inverts the arg_shardings and retval_shardings mappings to - // form core -> {argument number} maps. - std::vector> core_arg_nums(num_cores_per_replica); - for (int i = 0; i < num_execute_args; ++i) { - const auto& sharding = arg_shardings[i]; - if (sharding.type() == xla::OpSharding::MAXIMAL) { - int core = sharding.tile_assignment_devices(0); - TF_RETURN_IF_ERROR(ValidateCoreNumber(core, num_cores_per_replica)); - core_arg_nums[core].push_back(i); - } else if (sharding.type() == xla::OpSharding::OTHER) { - for (int64 core : sharding.tile_assignment_devices()) { - core_arg_nums[core].push_back(i); - } - } else if (sharding.type() == xla::OpSharding::REPLICATED) { - for (int core = 0; core < num_cores_per_replica; ++core) { - core_arg_nums[core].push_back(i); - } - } else { - return tensorflow::errors::InvalidArgument( - "Unsupported argument sharding: ", sharding.DebugString()); - } - } - std::vector> core_retval_nums(num_cores_per_replica); - for (int i = 0; i < retval_shardings.size(); ++i) { - const auto& sharding = retval_shardings[i]; - if (sharding.type() == xla::OpSharding::MAXIMAL) { - int core = sharding.tile_assignment_devices(0); - TF_RETURN_IF_ERROR(ValidateCoreNumber(core, num_cores_per_replica)); - core_retval_nums[core].push_back(i); - } else if (sharding.type() == xla::OpSharding::REPLICATED) { - for (int core = 0; core < num_cores_per_replica; ++core) { - core_retval_nums[core].push_back(i); - } - } else if (sharding.type() == xla::OpSharding::OTHER) { - for (int64 core : sharding.tile_assignment_devices()) { - core_retval_nums[core].push_back(i); - } - } else { - return tensorflow::errors::InvalidArgument( - "Unsupported argument sharding: ", sharding.DebugString()); - } - } - - // Maps host device name to a list of per-variable pairs (variable_copy_node, - // output_index_of_copy_node). - absl::flat_hash_map> per_host_var_copies; - - // Mapping from original resource arg number to a second level map. Second - // level map is from core id to output index of updated variable value. - absl::flat_hash_map> - orig_arg_num_to_output_index_mapping; - // Mapping from retval index to a second level map. Second level map is from - // core id to output index of sharded output value. - std::unordered_map> - retval_index_to_output_index_mapping; - - // Represents mapping of argument index of sharded input to each - // TPUExecute node to its corresponding Split node and its output index - // from which sharded input will be fed into TPUExecute node. - std::map input_index_to_sharded_inputs; - - // Builds one TPUExecute node per core per replica. - std::vector> execute_nodes(params_info.NumReplicas()); - for (int core = 0; core < num_cores_per_replica; ++core) { - DataTypeVector core_retval_types; - for (int output : core_retval_nums[core]) { - core_retval_types.push_back(retval_types[output]); - } - DataTypeVector core_arg_types; - std::vector core_variable_writes; - for (int input : core_arg_nums[core]) { - // Resource variables can be passed either by reference (as a DT_RESOURCE) - // tensor or by value (as the variable's current value). Per-replica or - // distributed resource arguments are always passed by reference and - // broadcast variables are always passed by value. - if (arg_types[input] == DT_RESOURCE && - !params_info.IsPerReplicaArg(input) && - !params_info.IsDistributedArg(input)) { - DataType handle_type = arg_shapes[input].handle_type; - TF_RET_CHECK(handle_type != DT_INVALID) << DataTypeString(handle_type); - core_arg_types.push_back(handle_type); - int base = input - params_info.NumPerReplicaArgs() - - params_info.NumDistributedArgs() - - params_info.NumBroadcastArgs(); - // Variables passed by value will have a corresponding additional output - // containing an updated value for the variable. - core_variable_writes.push_back(base); - core_retval_types.push_back(handle_type); - } else { - core_arg_types.push_back(arg_types[input]); - } - } - - NodeDef def; - def.set_op("TPUExecute"); - MergeDebugInfo(NodeDebugInfo(replicate_node.def()), &def); - AddNodeAttr("Targs", core_arg_types, &def); - AddNodeAttr("Tresults", core_retval_types, &def); - - for (int64 replica = 0; replica < params_info.NumReplicas(); ++replica) { - def.set_name(strings::StrCat(replicate_node.name(), "/_execute_", replica, - "_", core)); - - Status status; - Node* node = graph->AddNode(def, &status); - if (!status.ok()) return status; - execute_nodes[replica].push_back(node); - - node->set_assigned_device_name(tpu_device_names[replica][core]); - - // Add control edges to ensure that execution happens after - // `control_predecessor`, happens before `control_successor`, and is - // triggered by evaluating any operator that depends on the original - // TPUReplicate operator. See the comment at the top of the header file - // for more details. - graph->AddControlEdge(control_predecessor, node); - graph->AddControlEdge(node, control_successor); - - // Add data input edges. - for (int64 i = 0; i < core_arg_nums[core].size(); ++i) { - int64 orig_arg_num = core_arg_nums[core][i]; - VLOG(2) << " replica " << replica << " core " << core << " i " << i - << " orig_arg_num " << orig_arg_num; - if (params_info.IsPerReplicaArg(orig_arg_num) || - params_info.IsDistributedArg(orig_arg_num)) { - // Per-replica input and distributed input - int64 input_num = params_info.IsPerReplicaArg(orig_arg_num) - ? replica * params_info.NumPerReplicaArgs() + - core_arg_nums[core][i] - : params_info.NumReplicas() * - params_info.NumPerReplicaArgs() + - core_arg_nums[core][i] - - params_info.NumPerReplicaArgs(); - - const Edge* edge = replicate_input_edges[input_num]; - VLOG(2) << "replicate_input_edges[" << input_num << "]"; - DataType dtype = edge->src()->output_type(edge->src_output()); - if (dtype == DT_RESOURCE) { - DataType handle_dtype = arg_shapes[orig_arg_num].handle_type; - if (std::find(kTpuAllTypes.begin(), kTpuAllTypes.end(), - handle_dtype) == kTpuAllTypes.end()) { - return errors::InvalidArgument( - "Unsupported resource variable data type for TPU: ", - DataTypeString(handle_dtype), ", caused by output ", - edge->src()->name(), ":", edge->src_output()); - } - } else { - if (std::find(kTpuAllTypes.begin(), kTpuAllTypes.end(), dtype) == - kTpuAllTypes.end()) { - return errors::InvalidArgument( - "Unsupported data type for TPU: ", DataTypeString(dtype), - ", caused by output ", edge->src()->name(), ":", - edge->src_output()); - } - } - if (arg_shardings[orig_arg_num].type() == xla::OpSharding::OTHER) { - // Don't automatically add a split node when input node is - // kTPUPartitionedInput - if (edge->src()->type_string() == kTPUPartitionedInput) { - VLOG(2) << "Connect " - << replicate_input_fan_in_nodes[input_num][core]->name() - << " to " << node->name() << " at " << i; - graph->AddEdge(replicate_input_fan_in_nodes[input_num][core], 0, - node, i); - } else { - if (dtype == DT_RESOURCE) { - return errors::InvalidArgument( - "Tiled sharding for per-replica DT_RESOURCE input must", - "be TPUPartitionedInput. Here got ", - edge->src()->type_string()); - } - const xla::OpSharding& sharding = arg_shardings[orig_arg_num]; - - // Create or get the Split node. - TF_ASSIGN_OR_RETURN( - ShardedInputInfo sharded_input_info, - CreateOrGetSplitNodesForInputSharding( - sharding, orig_arg_num, dtype, replica, - edge->src_output(), edge->src(), control_predecessor, - graph, &input_index_to_sharded_inputs)); - - // Calculate which output we should receive from the Split node. - absl::optional output_index = - GetCoreIndexInSharding(sharding, core); - TF_RET_CHECK(output_index); - - NodeOut split_node_and_index = - sharded_input_info.sharded_inputs.at(output_index.value()); - // Connect with Split node output. - graph->AddEdge(split_node_and_index.node, - split_node_and_index.index, node, i); - } - } else if (edge->src()->type_string() == kTPUPartitionedInput && - arg_shardings[orig_arg_num].type() == - xla::OpSharding::REPLICATED) { - graph->AddEdge(replicate_input_fan_in_nodes[input_num][core], 0, - node, i); - } else { - graph->AddEdge(edge->src(), edge->src_output(), node, i); - } - } else if (params_info.IsBroadcastArg(orig_arg_num)) { - // Broadcast input. - int64 input_num = params_info.FirstBroadcastArgFromHost() + - core_arg_nums[core][i] - - params_info.NumPerReplicaArgs() - - params_info.NumDistributedArgs(); - const Edge* edge = replicate_input_edges[input_num]; - DataType dtype = edge->src()->output_type(edge->src_output()); - if (std::find(kTpuAllTypes.begin(), kTpuAllTypes.end(), dtype) == - kTpuAllTypes.end()) { - return errors::InvalidArgument( - "Unsupported data type for TPU: ", DataTypeString(dtype), - ", caused by output ", edge->src()->name(), ":", - edge->src_output()); - } - graph->AddEdge(edge->src(), edge->src_output(), node, i); - } else { - // Variable input. - int64 variable_num = orig_arg_num - params_info.NumPerReplicaArgs() - - params_info.NumDistributedArgs() - - params_info.NumBroadcastArgs(); - TF_RET_CHECK(variable_num < num_variables); - - Node* variable_read = variable_reads[variable_num]; - DataType dtype = variable_read->output_type(0); - if (std::find(kTpuAllTypes.begin(), kTpuAllTypes.end(), dtype) == - kTpuAllTypes.end()) { - return errors::InvalidArgument( - "Unsupported resource variable data type for TPU: ", - DataTypeString(dtype), ", caused by ReadVariableOp ", - variable_read->DebugString()); - } - DeviceNameUtils::ParsedName requested_device; - string requested = variable_read->requested_device(); - TF_RET_CHECK( - DeviceNameUtils::ParseFullName(requested, &requested_device)); - if (requested_device.type != "TPU") { - // Stage the value via the CPU device on the remote host. The graph - // partitioner will introduce an intermediate copy rather than - // copying the same tensor multiple times across the network, and we - // would prefer that intermediate copy to be in host memory to avoid - // running out of memory if the TPUExecute op on the staging device - // starts running before the _Send ops to the other TPU devices on - // the same host complete. We don't do this if the variables are - // already placed on TPU, otherwise it will cause an unnecessary - // round trip copy. - // TODO(b/79580121): give each replica its own on-device variable - // replica and then delete this code. - string device; - TF_RETURN_IF_ERROR(DeviceNameUtils::DeviceNameToCpuDeviceName( - tpu_device_names[replica][core], &device)); - TF_ASSIGN_OR_RETURN(auto var_data, - CreateOrGetPerHostVariableCopy( - device, variable_num, variable_reads, - params_info, arg_shardings, replicate_node, - &per_host_var_copies, graph)); - - if (arg_shardings[orig_arg_num].type() == xla::OpSharding::OTHER) { - const xla::OpSharding& sharding = arg_shardings[orig_arg_num]; - // Create or get the Split node. - TF_ASSIGN_OR_RETURN( - ShardedInputInfo sharded_input_info, - CreateOrGetSplitNodesForInputSharding( - sharding, orig_arg_num, - arg_shapes[orig_arg_num].handle_type, replica, - var_data.index, var_data.node, control_predecessor, graph, - &input_index_to_sharded_inputs)); - - // Calculate which output we should receive from the Split node. - absl::optional output_index = - GetCoreIndexInSharding(sharding, core); - TF_RET_CHECK(output_index); - NodeOut split_node_and_index = - sharded_input_info.sharded_inputs[output_index.value()]; - // Connect with Split node output. - graph->AddEdge(split_node_and_index.node, - split_node_and_index.index, node, i); - - } else { - graph->AddEdge(var_data.node, var_data.index, node, i); - } - } else { - graph->AddEdge(variable_reads[variable_num], 0, node, i); - } - } - } - - // Adds a program input edge from the compiler. - graph->AddEdge(compile_node, core + 1, node, node->num_inputs() - 1); - - // Add data output edges. - int num_outputs = core_retval_nums[core].size(); - for (int i = 0; i < num_outputs; ++i) { - int output_num = - replica * num_retvals_per_replica + core_retval_nums[core][i]; - const auto& sharding = retval_shardings[core_retval_nums[core][i]]; - if (sharding.type() == xla::OpSharding::OTHER) { - int retval_index = core_retval_nums[core][i]; - retval_index_to_output_index_mapping[retval_index][core] = i; - bool is_last_core = - core == - *std::max_element(sharding.tile_assignment_devices().begin(), - sharding.tile_assignment_devices().end()); - bool isPartitionOutNode = false; - - const Edge* e = replicate_output_edges[output_num]; - const Edge* e_out; - for (const Edge* out_edge : e->dst()->out_edges()) { - if (out_edge->dst()->type_string() == kTPUPartitionedOutput) { - isPartitionOutNode = true; - e_out = out_edge; - } - } - if (isPartitionOutNode) { - graph->AddEdge( - node, i, replicate_output_fan_out_nodes[output_num][core], - replicate_output_fan_out_dst_inputs[output_num][core]); - VLOG(2) << "Connect " << node->name() << " at " << i << " to " - << replicate_output_fan_out_nodes[output_num][core]->name() - << " at " - << replicate_output_fan_out_dst_inputs[output_num][core]; - if (is_last_core) { - graph->RemoveEdge(e); - graph->RemoveEdge(e_out); - } - continue; - } - - // Do this in the iteration of last core in tile assignment, so all - // TPUExecute nodes have been created. - if (!is_last_core) { - continue; - } - - // Add a Concat node. - std::vector orig_inputs; - for (int64 core_id : sharding.tile_assignment_devices()) { - int core_retval_index = - retval_index_to_output_index_mapping[retval_index][core_id]; - orig_inputs.push_back( - NodeOut{execute_nodes[replica][core_id], - static_cast( - core_retval_nums[core_id][core_retval_index])}); - } - DataType dtype = e->src()->output_type(e->src_output()); - TF_ASSIGN_OR_RETURN( - Node * concat_node, - CreateConcatNodesForRetval(sharding, dtype, replica, orig_inputs, - graph, /*device=*/"")); - - const Edge* edge = replicate_output_edges[output_num]; - Node* dst = edge->dst(); - int dst_input = edge->dst_input(); - graph->RemoveEdge(edge); - graph->AddEdge(concat_node, 0, dst, dst_input); - - continue; - } - - // If this is a replicated output, outputs on all cores will be the - // same, and we only take the output from core 0. - if (sharding.type() == xla::OpSharding::REPLICATED && core != 0) { - continue; - } - - // If output has maximal sharding, make sure we only use output from - // TPUExecute node with logical core id equal to core id defined by the - // xla sharding. - if (sharding.type() == xla::OpSharding::MAXIMAL && - core != sharding.tile_assignment_devices(0)) { - continue; - } - - const Edge* replicate_edge_to_replace = - replicate_output_edges[output_num]; - Node* dst = replicate_edge_to_replace->dst(); - int dst_input = replicate_edge_to_replace->dst_input(); - graph->RemoveEdge(replicate_edge_to_replace); - graph->AddEdge(node, i, dst, dst_input); - } - - // Feed the updated variable values from the first replica to the - // variable write nodes. - if (replica == 0) { - for (int i = 0; i < core_variable_writes.size(); ++i) { - int orig_arg_num = - core_variable_writes[i] + params_info.NumPerReplicaArgs() + - params_info.NumDistributedArgs() + params_info.NumBroadcastArgs(); - const auto& sharding = arg_shardings[orig_arg_num]; - // If this is a tiling sharded variable, concat variable updates from - // all cores. - if (sharding.type() == xla::OpSharding::OTHER) { - orig_arg_num_to_output_index_mapping[orig_arg_num][core] = i; - - // Do this in the iteration of last core in tile assignment, so all - // TPUExecute nodes have been created. - if (core != - *std::max_element(sharding.tile_assignment_devices().begin(), - sharding.tile_assignment_devices().end())) { - continue; - } - - // Add a Concat node. - std::vector orig_inputs; - for (int64 core_id : sharding.tile_assignment_devices()) { - int core_retval_num = - orig_arg_num_to_output_index_mapping[orig_arg_num][core_id]; - orig_inputs.push_back( - NodeOut{execute_nodes[0][core_id], - static_cast(core_retval_nums[core_id].size() + - core_retval_num)}); - } - - // Use the variable read's device for the concat. They should both - // be collocated with the variable. - absl::string_view device = - variable_reads[core_variable_writes[i]]->assigned_device_name(); - TF_ASSIGN_OR_RETURN( - Node * concat_node, - CreateConcatNodesForRetval( - sharding, arg_shapes[orig_arg_num].handle_type, replica, - orig_inputs, graph, device)); - // Populate VariableWrite. - VariableWrite& write = variable_writes->at(core_variable_writes[i]); - write.value = concat_node; - write.value_output = 0; - write.predicate = compile_node; - write.predicate_output = num_cores_per_replica + core + 1; - - continue; - } - - // If this is a replicated variable, outputs on all cores will be the - // same, and we only take the output from core 0 for the varialbe - // update. - if (sharding.type() == xla::OpSharding::REPLICATED && core != 0) { - continue; - } - VariableWrite& write = variable_writes->at(core_variable_writes[i]); - write.value = node; - write.value_output = num_outputs + i; - write.predicate = compile_node; - write.predicate_output = num_cores_per_replica + core + 1; - } - } - } - } - - for (Node* node : to_be_removed_nodes) { - graph->RemoveNode(node); - } - return Status::OK(); -} - -/* static */ Status DistributedTPURewritePass::CopyOutsideCompilationNodes( - int replica_index, const std::vector& outside_compilation_nodes, - const DeviceNameUtils::ParsedName& tpu_device, - const DeviceNameUtils::ParsedName& partial_device, - NodeToNodeReplicasMap* node_images, Graph* graph) { - for (Node* node : outside_compilation_nodes) { - NodeDef image_def = node->def(); - MergeDebugInfo(NodeDebugInfo(node->def()), &image_def); - const string suffix = strings::StrCat("/R", replica_index); - // In addition to node name, make the frame name unique to avoid multiple - // LoopCond nodes in one frame. - TF_RETURN_IF_ERROR( - AddPrefixAndSuffixToNode("" /* prefix */, suffix, &image_def)); - Status status; - Node* image = graph->AddNode(image_def, &status); - image->AddAttr(kXlaReplicaIdAttrName, replica_index); - TF_RETURN_IF_ERROR(status); - if (HasNodeAttr(image->def(), kXlaHasHostTransferAttrName)) { - TF_RETURN_IF_ERROR( - SetNodeDeviceForTPUCommunication(tpu_device, DEVICE_CPU, image)); - } else { - const string& original_device_string = - node->assigned_device_name().empty() ? node->requested_device() - : node->assigned_device_name(); - DeviceNameUtils::ParsedName device; - TF_RET_CHECK( - DeviceNameUtils::ParseFullName(original_device_string, &device)); - // If the requested device can be merged with the replica's host device, - // then do so. For example, if the requested device is "/CPU:0" or - // "/GPU:0" then it will be placed on the CPU/GPU of the host where this - // replica is running. But if the requested device is - // "/task:3/replica:2/CPU:0" then it will be placed on that task/replica. - if (DeviceNameUtils::IsSpecification(device, partial_device)) { - TF_RETURN_IF_ERROR( - DeviceNameUtils::MergeDevNames(&device, partial_device)); - } - image->set_requested_device(DeviceNameUtils::ParsedNameToString(device)); - } - std::vector& node_image_vector = (*node_images)[node]; - node_image_vector.resize(replica_index + 1); - node_image_vector[replica_index] = image; - } - return Status::OK(); -} - -/* static */ Status DistributedTPURewritePass::ReplicateOutsideCompilationNodes( - const std::vector>& tf_device_assignment, - const HostComputeCoreMap& host_compute_core, - const OutsideCompilationNodeMap& outside_compilation_nodes, - NodeToNodeReplicasMap* node_images, Graph* graph) { - // Iterate over replicas. - for (int i = 0; i < tf_device_assignment.size(); ++i) { - const auto& core_devices = tf_device_assignment[i]; - for (const auto& oc_cluster_iter : outside_compilation_nodes) { - const string& oc_cluster_name = oc_cluster_iter.first; - const auto& oc_cluster_nodes = oc_cluster_iter.second; - // We previously validated that host_compute_core contains an entry for - // each cluster. - int core = host_compute_core.at(oc_cluster_name); - TF_RET_CHECK(core >= 0 && core < core_devices.size()); - // tpu_device is the device the HostCompute XLA Op for this cluster runs - // on. - DeviceNameUtils::ParsedName tpu_device; - TF_RET_CHECK( - DeviceNameUtils::ParseFullName(core_devices[core], &tpu_device)); - // partial_device contains the replica and task but not the type. - DeviceNameUtils::ParsedName partial_device = tpu_device; - partial_device.has_type = false; - partial_device.has_id = false; - - if (tf_device_assignment.size() == 1) { - // With a single replica don't copy any nodes just put the original - // nodes into the image map. We leave the device placement alone, except - // that we have to fill in the correct core for the host send and - // receive nodes. - for (Node* node : oc_cluster_nodes) { - (*node_images)[node] = {node}; - node->AddAttr(kXlaReplicaIdAttrName, 0); - if (HasNodeAttr(node->def(), kXlaHasHostTransferAttrName)) { - TF_RETURN_IF_ERROR( - SetNodeDeviceForTPUCommunication(tpu_device, DEVICE_CPU, node)); - } - } - } else { - // Iterate over outside_compilation clusters in this computation, adding - // all the nodes with appropriate device assignments. - TF_RETURN_IF_ERROR( - CopyOutsideCompilationNodes(i, oc_cluster_nodes, tpu_device, - partial_device, node_images, graph)); - } - } - } - return Status::OK(); -} - -/* static */ Status DistributedTPURewritePass::CopyOutsideCompilationEdges( - const std::vector& outside_compilation_nodes, - const NodeToNodeReplicasMap& node_images, - const std::unordered_map outside_compilation_inputs, - Graph* graph) { - for (Node* node : outside_compilation_nodes) { - const auto& images = node_images.at(node); - // Make a copy of all edges and iterate on "in_edges", because we might - // remove edges when iteratating through them. - std::vector in_edges(node->in_edges().begin(), - node->in_edges().end()); - for (const Edge* edge : in_edges) { - Node* src = edge->src(); - const auto iter = node_images.find(src); - if (iter == node_images.end()) { - if (images.size() > 1) { - // The source node is a 'normal' node not part of any - // rewrite. Broadcast the value to all replicas. (If images.size() == - // 1 the cluster is not replicated and we can leave the original edge - // in place.) - for (Node* dst : images) { - graph->AddEdge(src, edge->src_output(), dst, edge->dst_input()); - } - } - continue; - } - - // The source node is a replicated outside_compilation node. - const auto& src_images = iter->second; - if (src_images.size() != images.size()) { - return errors::InvalidArgument( - "Graph contains an edge from node ", src->name(), - " in an outside_compilation block replicated ", src_images.size(), - " ways to node ", node->name(), - " in an outside_compilation block replicated ", images.size(), - " ways. Replication factors must match. Leave a comment on " - "tracking bug b/76419636 if you need this to be supported."); - } - bool is_lifted_arg; - string outside_compilation_cluster; - if (GetNodeAttr(src->def(), kXlaIsLiftedArgAttrName, &is_lifted_arg) - .ok() && - GetNodeAttr(src->def(), kOutsideCompilationAttr, - &outside_compilation_cluster) - .ok()) { - const auto input_iter = - outside_compilation_inputs.find(outside_compilation_cluster); - TF_RET_CHECK(input_iter != outside_compilation_inputs.end()); - TF_RET_CHECK(input_iter->second->type_string() == "IdentityN"); - int dst_input = edge->dst_input(); - if (src_images.size() == 1) { - graph->RemoveEdge(edge); - } - for (int i = 0; i < src_images.size(); ++i) { - graph->AddEdge(input_iter->second, i, images[i], dst_input); - } - continue; - } - - bool is_placeholder_for_arg; - string outside_compilation_input_attr; - if (GetNodeAttr(src->def(), kXlaIsPlaceholderForArg, - &is_placeholder_for_arg) - .ok() && - GetNodeAttr(src->def(), kXlaOutsideCompilationInputsAttrName, - &outside_compilation_input_attr) - .ok()) { - const auto input_iter = - outside_compilation_inputs.find(outside_compilation_input_attr); - TF_RET_CHECK(input_iter != outside_compilation_inputs.end()); - TF_RET_CHECK(input_iter->second->type_string() == "IdentityN"); - int dst_input = edge->dst_input(); - if (src_images.size() == 1) { - graph->RemoveEdge(edge); - } - for (int i = 0; i < src_images.size(); ++i) { - graph->AddEdge(input_iter->second, i, images[i], dst_input); - } - continue; - } - - if (images.size() > 1) { - // If images.size() == 1 neither cluster is replicated and we can - // leave the original edges in place. - for (int i = 0; i < src_images.size(); ++i) { - graph->AddEdge(src_images[i], edge->src_output(), images[i], - edge->dst_input()); - } - } - } - for (const Edge* edge : node->out_edges()) { - Node* dst = edge->dst(); - const auto iter = node_images.find(dst); - if (iter == node_images.end()) { - // The source node is a 'normal' node not part of any rewrite. - if (edge->IsControlEdge()) { - // Make the dst node have a control dependency on every replica. - if (images.size() > 1) { - for (int i = 0; i < images.size(); ++i) { - graph->AddControlEdge(images[i], dst); - } - } - // else the cluster is not replicated so we can leave the original - // edge in place. - } else { - // The edge - // is only valid if the outside_compilation block is not replicated. - if (images.size() > 1) { - return errors::InvalidArgument( - "Graph contains an edge from node ", node->name(), - " in an outside_compilation block replicated ", images.size(), - " ways to node ", dst->name(), - " that is not part of an outside_compilation block. Edges from " - "outside_compilation to regular graph nodes are only supported " - "for replication factors of 1. Leave a comment on tracking bug " - "b/76419636 if you need this to be supported."); - } - // else the cluster is not replicated so we can leave the original - // edge in place. - } - } - // The case where src and dst are both in node_images is covered elsewhere - // when iterating over in_edges of dst. - } - } - return Status::OK(); -} - -/* static */ Status DistributedTPURewritePass::ReplicateOutsideCompilationEdges( - const OutsideCompilationNodeMap& outside_compilation_nodes, - const NodeToNodeReplicasMap& node_images, - const std::unordered_map outside_compilation_inputs, - Graph* graph) { - for (const auto& oc_cluster_iter : outside_compilation_nodes) { - TF_RETURN_IF_ERROR( - CopyOutsideCompilationEdges(oc_cluster_iter.second, node_images, - outside_compilation_inputs, graph)); - } - return Status::OK(); -} - -/* static */ Status DistributedTPURewritePass::RemoveOutsideCompilationNodes( - const NodeToNodeReplicasMap& node_images, Graph* graph) { - for (const auto& iter : node_images) { - if (iter.second.size() > 1) { - // The cluster was replicated so remove the original node. - Node* node = iter.first; - graph->RemoveNode(node); - } - } - return Status::OK(); -} - -/* static */ Status -DistributedTPURewritePass::LowerOutsideCompilationFunctionalNodes( - Graph* g, const FunctionLibraryDefinition& flib_def, - const TPUReplicateDeviceNamesMapping& tpu_replicate_device_names_mapping) { - bool modified = false; - do { - std::vector nodes_to_lower; - for (Node* n : g->op_nodes()) { - if (!HasNodeAttr(n->def(), kOutsideCompilationAttr)) { - continue; - } - - if (n->IsWhileNode() || n->IsIfNode() || IsFunctionCall(flib_def, *n)) { - // Only lower functional ops with DT_RESOURCE input, because otherwise - // placer will complain. For normal cases, lowering will cause slowdown - // when related functions are huge (b/139037679). - bool has_resource_input = false; - for (const Edge* e : n->in_edges()) { - if (!e->IsControlEdge() && - e->src()->output_type(e->src_output()) == DT_RESOURCE) { - has_resource_input = true; - break; - } - } - if (has_resource_input) { - nodes_to_lower.push_back(n); - } - } - } - - modified = !nodes_to_lower.empty(); - - auto lower_functional_node = [&flib_def, &g](Node* n) -> Status { - // Clear device assignment. Otherwise all lowered nodes will have - // device assignment, which is not what we want. - n->set_requested_device(""); - - int replica_id; - TF_RETURN_IF_ERROR( - GetNodeAttr(n->def(), kXlaReplicaIdAttrName, &replica_id)); - - string outside_compilation_attr; - TF_RETURN_IF_ERROR(GetNodeAttr(n->def(), kOutsideCompilationAttr, - &outside_compilation_attr)); - - // There are two different kinds of functional outside compilation nodes: - // 1. Nodes that are in outside compilation blocks already. They are - // generated by FunctionalizeControlFlowForXlaPass, and only have - // attribute kOutsideCompilationAttr. - // 2. Mirrored control flow built for outside compilation in functional - // nodes. They are generated by ExtractOutsideCompilationPass, and have - // both kOutsideCompilationAttr and kXlaHasHostTransferAttrName. - // When lowering them, they need to be treated differently. - // For 1), their body functions are always V1 functions written by users, - // and their "control outputs" are control inputs of _Retval nodes. They - // should be lowered as V1 functions. - // For 2), we always add necessary "control outputs" - // (_XlaRecvAtHost/_XlaSendAtHost nodes) to "control_ret" field in their - // FunctionDef's. They should be lowered as V2 functions. - bool is_host_side_mirrored_control_flow = - HasNodeAttr(n->def(), kXlaHasHostTransferAttrName); - - int num_node_ids = g->num_node_ids(); - bool is_call_node = IsFunctionCall(flib_def, *n); - if (n->IsWhileNode()) { - TF_RETURN_IF_ERROR(RewriteWhileNode(n, g, - /*keep_node_fetchable=*/false)); - } else if (n->IsIfNode()) { - TF_RETURN_IF_ERROR(RewriteIfNode(n, g, /*keep_node_fetchable=*/false)); - } else { - TF_RET_CHECK(is_call_node); - // See comments for "is_host_side_mirrored_control_flow" above. - // If this is a node that's in outside compilation block, lower it as - // V1 function. This is controlled by removing - // kLowerAsMultiDeviceFunctionAttr from the node. - if (!is_host_side_mirrored_control_flow) { - n->ClearAttr(LowerFunctionalOpsPass::kLowerAsMultiDeviceFunctionAttr); - } else { - n->ClearAttr(LowerFunctionalOpsPass::kLowerAsMultiDeviceFunctionAttr); - n->AddAttr(LowerFunctionalOpsPass::kLowerAsMultiDeviceFunctionAttr, - true); - } - TF_RETURN_IF_ERROR( - RewriteFunctionCallNode(n, g, flib_def, - /*keep_caller_fetchable=*/false)); - } - - for (int i = num_node_ids; i < g->num_node_ids(); i++) { - Node* node = g->FindNodeId(i); - if (!node) { - continue; - } - - if (!is_call_node && is_host_side_mirrored_control_flow && - IsFunctionCall(flib_def, *node)) { - // For If/While nodes, if they are host side mirrored control flow, - // mark their body function calls with kXlaHasHostTransferAttrName - // attribute to make sure we lower them as V2 function. - node->AddAttr(kXlaHasHostTransferAttrName, true); - } - - if (IsFunctionCall(flib_def, *node) || node->IsWhileNode() || - node->IsIfNode()) { - // Set kOutsideCompilationAttr attribute so we lower these - // nested function call nodes later. - node->AddAttr(kOutsideCompilationAttr, outside_compilation_attr); - // Set kXlaReplicaIdAttrName attribute so we know replica id when we - // lower this function call node. - node->AddAttr(kXlaReplicaIdAttrName, replica_id); - } else if (node->type_string() == "_XlaRecvAtHost" || - node->type_string() == "_XlaSendFromHost") { - // For "_XlaRecvAtHost" and "_XlaSendFromHost" nodes, make sure they - // have kXlaReplicaIdAttrName attribute so later we know which host - // device to assign. - node->AddAttr(kXlaReplicaIdAttrName, replica_id); - } - } - return Status::OK(); - }; - - for (Node* n : nodes_to_lower) { - TF_RETURN_IF_ERROR(lower_functional_node(n)); - } - } while (modified); - - // Set device for all _XlaRecvAtHost and _XlaSendFromHost nodes. - for (Node* n : g->op_nodes()) { - if (n->type_string() != "_XlaRecvAtHost" && - n->type_string() != "_XlaSendFromHost") { - continue; - } - - string replicate; - TF_RETURN_IF_ERROR(GetNodeAttr(n->def(), kTPUReplicateAttr, &replicate)); - auto iter = tpu_replicate_device_names_mapping.find(replicate); - TF_RET_CHECK(iter != tpu_replicate_device_names_mapping.end()); - const auto& tpu_device_names = iter->second; - - int replica_id; - TF_RETURN_IF_ERROR( - GetNodeAttr(n->def(), kXlaReplicaIdAttrName, &replica_id)); - TF_RET_CHECK(replica_id < tpu_device_names.size()); - const string& tpu_device_name = tpu_device_names[replica_id][0]; - string host_device_name; - TF_RETURN_IF_ERROR(DeviceNameUtils::DeviceNameToCpuDeviceName( - tpu_device_name, &host_device_name)); - n->set_assigned_device_name(host_device_name); - // We may run TPU rewrite passes again on the subgraphs of the resulting - // graph. Clear kTPUReplicateAttr and kOutsideCompilationAttr for - // "_XlaRecvAtHost" nodes and "_XlaSendFromHost" nodes, in order to make - // sure that TPU rewrite passes take no effect on host-side subgraphs for - // outside compilation. - n->ClearAttr(kTPUReplicateAttr); - n->ClearAttr(kOutsideCompilationAttr); - } - - // Remove IdentityN nodes generated for outside compilation. IdentityN is - // exempt from resource edge colocation, but here we do need input and output - // for these IdentityN nodes to be colocated. - std::vector identityn_nodes; - for (Node* n : g->op_nodes()) { - if (n->type_string() == "IdentityN" && - HasNodeAttr(n->def(), kXlaOutsideCompilationInputsAttrName)) { - identityn_nodes.push_back(n); - } - } - for (Node* n : identityn_nodes) { - std::vector out_edges(n->out_edges().begin(), - n->out_edges().end()); - for (const Edge* e : out_edges) { - if (e->IsControlEdge()) { - continue; - } - - int src_output = e->src_output(); - const Edge* input_edge; - TF_RETURN_IF_ERROR(n->input_edge(src_output, &input_edge)); - Node* dst = e->dst(); - int dst_input = e->dst_input(); - g->RemoveEdge(e); - g->AddEdge(input_edge->src(), input_edge->src_output(), dst, dst_input); - } - g->RemoveNode(n); - } - - return Status::OK(); -} - -/* static */ Status DistributedTPURewritePass::ParseHostComputeCores( - const Node& replicate_node, - const OutsideCompilationNodeMap& outside_compilation_nodes, - HostComputeCoreMap* host_compute_core) { - std::vector hc_core_string; - TF_RETURN_IF_ERROR(GetNodeAttr(replicate_node.attrs(), "host_compute_core", - &hc_core_string)); - TF_RETURN_IF_ERROR( - ParseHostComputeCoreList(hc_core_string, host_compute_core)); - for (const auto& iter : outside_compilation_nodes) { - const string& oc_cluster_name = iter.first; - if (host_compute_core->find(oc_cluster_name) == host_compute_core->end()) { - // By default put host compute Ops on replicated core 0. - (*host_compute_core)[oc_cluster_name] = 0; - } - } - return Status::OK(); -} - -/* static */ Status DistributedTPURewritePass::GetDeviceTopology( - const DeviceSet& device_set, const Node& replicate_node, int* num_replicas, - int* num_cores_per_replica, int* num_tasks, - std::vector>* tf_device_assignment, - std::unique_ptr* xla_device_assignment, - string* tpu_compilation_device) { - TF_RETURN_IF_ERROR( - GetNodeAttr(replicate_node.attrs(), "num_replicas", num_replicas)); - if (*num_replicas < 1) { - return errors::InvalidArgument("num_replicas must be >= 1, got ", - *num_replicas); - } - - // Find the set of TPU devices in the TF job. - // Indexed by [task number][tpu device number]. - std::vector> tpu_devices; - int num_tpus_per_task; - TF_RETURN_IF_ERROR(GetTPUDeviceNames(replicate_node.requested_device(), - device_set, tpu_compilation_device, - &num_tpus_per_task, &tpu_devices)); - - string topology; - TF_RETURN_IF_ERROR( - GetNodeAttr(replicate_node.attrs(), "topology", &topology)); - TF_RETURN_IF_ERROR(GetNodeAttr( - replicate_node.attrs(), "num_cores_per_replica", num_cores_per_replica)); - std::vector device_assignment; - TF_RETURN_IF_ERROR(GetNodeAttr(replicate_node.attrs(), "device_assignment", - &device_assignment)); - - // TODO(cwhipkey): since we can control multiple pods of different shapes - // from a single worker, it may be desirable to propagate the remote device - // information around (e.g., in DeviceAttributes). This can lead to the mesh - // topology proto being leaked to cloud TPU users (e.g. through GetStatus - // calls); this may be okay, but to be conservative, just assume that the - // master session has the proper flags set. - auto* tpu_platform = tpu::TpuPlatformInterface::GetRegisteredPlatform(); - TF_RET_CHECK(tpu_platform); - tpu::TpuTopologyExternal tpu_topology(tpu_platform->GetTopologyPtr()); - TF_RET_CHECK(num_tpus_per_task == - tpu_topology.LogicalDevicesPerHost(kTensorCore)); - TF_RETURN_IF_ERROR(BuildDeviceAssignment( - tpu_topology, num_tpus_per_task, tpu_devices, *num_replicas, - *num_cores_per_replica, topology, device_assignment, tf_device_assignment, - xla_device_assignment)); - - return Status::OK(); -} - -/* static */ Status DistributedTPURewritePass::GetIOTypes( - int num_replicas, const Node& replicate_node, FunctionLibraryRuntime* flr, - Graph* graph, NameRangeMap* input_name_map, const NameAttrList** function, - std::unique_ptr* computation, DataTypeVector* arg_types, - DataTypeVector* retval_types, ParameterInfo* params_info) { - DataTypeVector input_types, broadcast_input_types, guaranteed_constant_types; - TF_RETURN_IF_ERROR( - GetNodeAttr(replicate_node.attrs(), "Tinputs", &input_types)); - TF_RETURN_IF_ERROR(GetNodeAttr(replicate_node.attrs(), "Tbroadcast_inputs", - &broadcast_input_types)); - TF_RETURN_IF_ERROR(GetNodeAttr(replicate_node.attrs(), - "Tguaranteed_constants", - &guaranteed_constant_types)); - int num_distributed_vars; - TF_RETURN_IF_ERROR(GetNodeAttr(replicate_node.attrs(), - "num_distributed_variables", - &num_distributed_vars)); - const int num_per_replica_inputs = input_types.size() - num_distributed_vars; - - if (num_per_replica_inputs % num_replicas != 0) { - return errors::InvalidArgument( - "Number of inputs to TPUReplicate (", num_per_replica_inputs, - ") is not divisible by the number of replicas (", num_replicas, ")."); - } - - int num_variables; - TF_RETURN_IF_ERROR( - GetNodeAttr(replicate_node.attrs(), "NumVariables", &num_variables)); - - NameRangeMap output_name_map; - TF_RETURN_IF_ERROR(NameRangesForNode(replicate_node, replicate_node.op_def(), - input_name_map, &output_name_map)); - - TF_RETURN_IF_ERROR( - GetNodeAttr(replicate_node.attrs(), "computation", function)); - - *computation = absl::make_unique(graph->op_registry()); - TF_RETURN_IF_ERROR(GetComputationForTPUReplicateOp( - **function, flr, computation->get(), arg_types, retval_types)); - - *params_info = ParameterInfo( - num_replicas, num_per_replica_inputs / num_replicas, num_distributed_vars, - broadcast_input_types.size(), num_variables, - guaranteed_constant_types.size(), retval_types->size()); - - if (arg_types->size() != params_info->NumInputsToEachReplica()) { - return errors::InvalidArgument( - "Computation argument to TPUReplicate has wrong number of " - "arguments. Expected ", - params_info->NumInputsToEachReplica(), " inputs, got ", - arg_types->size()); - } - if (replicate_node.num_outputs() != params_info->NumOutputsToHost()) { - return errors::InvalidArgument( - "Wrong number of outputs from TPUReplicate. Expected ", - params_info->NumOutputsToHost(), " outputs, got ", - replicate_node.num_outputs()); - } - if (enable_cross_replica_sharding_mirrored_variables_) { - std::vector mirrored_variable_indices; - TF_RETURN_IF_ERROR(GetNodeAttr(replicate_node.attrs(), - TPUREPLICATE_MIRRORED_VAR_INDICES_ATTR, - &mirrored_variable_indices)); - for (int index : mirrored_variable_indices) { - TF_RET_CHECK(params_info->IsPerReplicaArg(index) || - params_info->IsDistributedArg(index)) - << "Mirrored variables not categorized as per-replica arguments, " - "index: " - << index; - params_info->mutable_mirrored_variable_indices()->insert(index); - } - } - return Status::OK(); -} - -/* static */ Status DistributedTPURewritePass::BuildSequencingNodes( - const string& tpu_compilation_device, const Node& replicate_node, - Graph* graph, Node** host_transfer_sequencer, Node** control_before, - Node** control_after) { - *host_transfer_sequencer = nullptr; - - TF_RETURN_IF_ERROR( - BuildNoopNode(replicate_node, - graph->NewName(strings::StrCat(replicate_node.name(), "/", - "control_before")), - /*device=*/"", graph, control_before)); - for (const Edge* e : replicate_node.in_edges()) { - if (!e->IsControlEdge()) { - continue; - } - Node* predecessor = e->src(); - if (predecessor->IsSource()) continue; - if (predecessor->type_string() == "NoOp" && - predecessor->attrs().Find("_xla_host_transfer_sequencer") != nullptr) { - // The node is the sequencer for host transfer operations. Its control - // dependency needs to be placed after the execute node, not before. - if (*host_transfer_sequencer != nullptr) { - return errors::Internal("Replicate node ", replicate_node.name(), - " has two transfer sequencer nodes: ", - (*host_transfer_sequencer)->name(), " and ", - predecessor->name()); - } - // Set the correct device to match the other sequencing nodes. - predecessor->set_assigned_device_name(tpu_compilation_device); - *host_transfer_sequencer = predecessor; - } else { - graph->AddControlEdge(predecessor, *control_before); - } - } - - TF_RETURN_IF_ERROR( - BuildNoopNode(replicate_node, - graph->NewName(strings::StrCat(replicate_node.name(), "/", - "control_after")), - /*device=*/tpu_compilation_device, graph, control_after)); - for (Node* successor : replicate_node.out_nodes()) { - if (successor->attrs().Find("_xla_tail_outside_compilation") != nullptr) { - graph->AddControlEdge(successor, *control_after); - } else { - graph->AddControlEdge(*control_after, successor); - } - } - return Status::OK(); -} - -/* static */ Status DistributedTPURewritePass::DealWithConstantsAndVariables( - const Node& replicate_node, const NameRangeMap& input_name_map, - Graph* graph, Node* host_transfer_sequencer, Node* control_before, - Node* control_after, absl::Span variable_nodes, - std::vector* guaranteed_constant_nodes, - std::vector* variable_reads) { - TF_RETURN_IF_ERROR(FindGuaranteedConstantInputs( - replicate_node, input_name_map, guaranteed_constant_nodes)); - - TF_RETURN_IF_ERROR(BuildVariableReads(variable_nodes, control_before, graph, - variable_reads)); - // Add the control dependency from host transfer nodes. - if (host_transfer_sequencer != nullptr) { - graph->AddControlEdge(host_transfer_sequencer, control_after); - } - return Status::OK(); -} - -/* static */ Status -DistributedTPURewritePass::BuildCompilationStatusReturnNodes( - Node* replicate_node, Node* compile_node, Node** control_after_compilation, - Graph* graph) { - const Edge* compilation_edge = nullptr; - for (const auto* e : replicate_node->out_edges()) { - if (e->IsControlEdge() && - e->dst()->type_string() == "TPUCompilationResult") { - TF_RET_CHECK(compilation_edge == nullptr) - << "Multiple compilation result nodes attached to the same replicate " - "cluster."; - compilation_edge = e; - } - } - - // TODO(jpienaar): This should be checked by default, current tests not using - // this are ones that use the "abort upon successful compile flag" which will - // be removed. Leaving this in until then. - if (compilation_edge != nullptr) { - Node* compilation_status = compilation_edge->dst(); - const AttrValue* compile_status_cluster_attr = - compilation_status->attrs().Find(kTPUCompilationResultAttr); - TF_RET_CHECK(compile_status_cluster_attr != nullptr); - const string& compile_status_cluster = compile_status_cluster_attr->s(); - TF_RET_CHECK(!compile_status_cluster.empty()); - const AttrValue* replicate_cluster_attr = - replicate_node->attrs().Find(kTPUReplicateAttr); - TF_RET_CHECK(replicate_cluster_attr != nullptr); - const string& replicate_cluster = replicate_cluster_attr->s(); - TF_RET_CHECK(!replicate_cluster.empty()); - TF_RET_CHECK(compile_status_cluster == replicate_cluster); - - TF_RETURN_IF_ERROR( - ReplaceCompilationResultNodeWithIdentity(graph, &compilation_status)); - graph->AddEdge(compile_node, 0, compilation_status, 0); - } - - NodeDef def; - def.set_name(UniqueNodeName("tpu_compile_succeeded_assert", graph)); - // Create an op to assert that compilation succeeded. The alternative would - // have been to have each execute op check and return an error. - def.set_op("TPUCompileSucceededAssert"); - MergeDebugInfo(NodeDebugInfo(replicate_node->def()), &def); - Status status; - Node* compile_succeeded = graph->AddNode(def, &status); - compile_succeeded->set_assigned_device_name( - compile_node->assigned_device_name()); - TF_RETURN_IF_ERROR(status); - graph->AddEdge(compile_node, 0, compile_succeeded, 0); - - // Build a sequencing node for when compilation has completed. - TF_RETURN_IF_ERROR( - BuildNoopNode(*replicate_node, - graph->NewName(strings::StrCat(compile_node->name(), "/", - "after_compilation")), - /*device=*/"", graph, control_after_compilation)); - graph->AddControlEdge(compile_succeeded, *control_after_compilation); - - return Status::OK(); -} - -// Updates the head and tail outside compiled nodes so that nodes have the -// correct device and removes the replication and outside compilation attributes -// so that these nodes do not trigger further graph optimization passes. -/* static */ Status DistributedTPURewritePass::UpdateHeadTailOutsideCompilation( - const std::vector>& tf_device_assignment, - const std::vector& head_tail_outside_compilation_nodes) { - for (Node* node : head_tail_outside_compilation_nodes) { - int replica_id; - TF_RETURN_IF_ERROR( - GetNodeAttr(node->def(), kXlaReplicaIdAttrName, &replica_id)); - // Since we set the device, this will now run on a task other than 0. We - // clear the two following attributes so that we don't trigger encapsulation - // again on the remote host (which will fail due to a missing - // _TPUReplicateMetadata node for the cluster). - for (const Edge* e : node->in_edges()) { - // Resource consuming ops should colocate with its resource input. - if (e->src()->IsArg() && - e->src()->output_type(e->src_output()) == DT_RESOURCE) { - node->set_requested_device(tf_device_assignment[replica_id][0]); - } - } - if (node->requested_device().empty()) { - string cpu_device; - TF_RETURN_IF_ERROR(DeviceNameUtils::DeviceNameToCpuDeviceName( - tf_device_assignment[replica_id][0], &cpu_device)); - node->set_requested_device(cpu_device); - } - node->ClearAttr(kTPUReplicateAttr); - node->ClearAttr(kOutsideCompilationAttr); - } - return Status::OK(); -} - -/* static */ -Status DistributedTPURewritePass::FingerprintFunctionLibrary( - const FunctionLibraryDefinition& library, uint64* fingerprint) { - // TODO(phawkins): rather than fingerprinting the entire function library, - // consider fingerprinting just the transitive dependencies of a - // computation. - std::string serialized; - FunctionDefLibrary library_proto = library.ToProto(); - if (library_proto.ByteSizeLong() >= 1.5 * 1024 * 1024 * 1024) { - LOG(WARNING) << "Serializing large proto, size: " - << library_proto.ByteSizeLong(); - } - TF_RET_CHECK(SerializeToStringDeterministic(library_proto, &serialized)); - *fingerprint = TpuCompileInterface::Get()->FingerprintString(serialized); - return Status::OK(); -} - -// Performs the rewrite on a single TPUReplicate node. -/* static */ Status DistributedTPURewritePass::RewriteTPUReplicateNode( - const string& session_handle, const DeviceSet& device_set, - Node* replicate_node, FunctionLibraryDefinition* flib_def, - FunctionLibraryRuntime* flr, Node* host_compute_key_placeholder_node, - const OutsideCompilationNodeMap& outside_compilation_nodes, - const std::vector& head_tail_outside_compilation_nodes, - NodeToNodeReplicasMap* outside_compilation_node_images, Graph* graph, - const GraphShapeInfo& shape_info, - TPUReplicateDeviceNamesMapping* tpu_replicate_device_names_mapping, - int64 autotuner_thresh) { - VLOG(2) << "Rewriting node " << replicate_node->name(); - - // num_replicas and num_cores_per_replica are the 'virtual' replicas (copies - // of the computation) and cores (virtual cores within computations) specified - // by the user. They will be mapped to physical TPU cores below. - int num_replicas; - int num_cores_per_replica; - int num_tasks; // Number of tasks. - std::vector> tf_device_assignment; - std::unique_ptr xla_device_assignment; - string tpu_compilation_device; - TF_RETURN_IF_ERROR(GetDeviceTopology( - device_set, *replicate_node, &num_replicas, &num_cores_per_replica, - &num_tasks, &tf_device_assignment, &xla_device_assignment, - &tpu_compilation_device)); - - TF_RETURN_IF_ERROR(UpdateHeadTailOutsideCompilation( - tf_device_assignment, head_tail_outside_compilation_nodes)); - - string replicate; - TF_RETURN_IF_ERROR( - GetNodeAttr(replicate_node->def(), kTPUReplicateAttr, &replicate)); - tpu_replicate_device_names_mapping->emplace(replicate, tf_device_assignment); - - NameRangeMap input_name_map; - const NameAttrList* function; - std::unique_ptr computation; - DataTypeVector arg_types, retval_types; - ParameterInfo params_info; - TF_RETURN_IF_ERROR(GetIOTypes(num_replicas, *replicate_node, flr, graph, - &input_name_map, &function, &computation, - &arg_types, &retval_types, ¶ms_info)); - - std::vector arg_shapes, retval_shapes; - TF_RETURN_IF_ERROR(GetArgAndRetvalShapes( - shape_info, *replicate_node, params_info, &arg_shapes, &retval_shapes)); - - TF_RETURN_IF_ERROR(ValidateCoreNumbers(*computation, num_cores_per_replica)); - - std::vector arg_sharding; - std::vector arg_fast_mem; - std::vector retval_sharding; - TF_RETURN_IF_ERROR(AssignArgsAndRetvalsToCores( - num_cores_per_replica, params_info, arg_types, arg_shapes, retval_types, - retval_shapes, *computation, replicate_node, flr, &arg_sharding, - &arg_fast_mem, &retval_sharding)); - - VLOG(1) << DumpGraphToFile("distributed_tpu_graph_to_replicate", *computation, - flib_def); - - GraphDef graph_def; - graph->ToGraphDef(&graph_def); - FunctionLibraryDefinition reachable_functions = - flib_def->ReachableDefinitions(graph_def); - uint64 library_fingerprint; - - TF_RETURN_IF_ERROR( - FingerprintFunctionLibrary(reachable_functions, &library_fingerprint)); - VLOG(1) << "Fingerprint functions: " - << absl::StrJoin(reachable_functions.ListFunctionNames(), ", "); - VLOG(1) << "library_fingerprint: " << library_fingerprint; - - // Builds trigger nodes that put barriers around the expansion of - // TPUReplicate. In particular, we must guarantee: - // a) variable reads happen after all predecessors of the original - // TPUReplicate. - // b) variable writes happen before all successors of the original - // TPUReplicate. - // c) all replicas execute, even if output tensors are only requested from - // a subset of replicas. This is necessary both to ensure that variable - // updates happen, but also Send/Recv will deadlock if only one half of - // the communicating pair runs. - Node* host_transfer_sequencer; - Node* control_before; - Node* control_after; - TF_RETURN_IF_ERROR(BuildSequencingNodes( - tpu_compilation_device, *replicate_node, graph, &host_transfer_sequencer, - &control_before, &control_after)); - - // Build a vector of variable nodes that are inputs. - std::vector variable_inputs; - TF_RETURN_IF_ERROR( - FindVariableInputs(*replicate_node, input_name_map, &variable_inputs)); - - std::vector guaranteed_constant_nodes; - std::vector variable_reads; - TF_RETURN_IF_ERROR(DealWithConstantsAndVariables( - *replicate_node, input_name_map, graph, host_transfer_sequencer, - control_before, control_after, variable_inputs, - &guaranteed_constant_nodes, &variable_reads)); - - // Builds Shape nodes that compute the dynamic shapes of arguments whose - // shapes are not statically known. - std::vector dynamic_shape_nodes; - TF_RETURN_IF_ERROR(BuildDynamicShapeNodes(*replicate_node, arg_shapes, - params_info, variable_reads, graph, - &dynamic_shape_nodes)); - - // Builds a TPUCompile node that compiles `clusters` on `compile_device`. - Node* compile_node; - TF_RETURN_IF_ERROR(BuildCompileNode( - replicate_node, *function, library_fingerprint, params_info, arg_shapes, - arg_types, guaranteed_constant_nodes, session_handle, arg_sharding, - arg_fast_mem, retval_sharding, num_cores_per_replica, - /*compile_device=*/tpu_compilation_device, xla_device_assignment.get(), - dynamic_shape_nodes, graph, &compile_node, autotuner_thresh)); - - // Compilation must be sequenced after the control node if the TPU computation - // in a control-flow construct, such as a loop. - graph->AddControlEdge(control_before, compile_node); - - Node* control_after_compilation; - TF_RETURN_IF_ERROR(BuildCompilationStatusReturnNodes( - replicate_node, compile_node, &control_after_compilation, graph)); - - std::vector variable_writes; - TF_RETURN_IF_ERROR(BuildExecuteNodes( - params_info, num_tasks, num_cores_per_replica, *replicate_node, arg_types, - arg_shapes, retval_types, arg_sharding, retval_sharding, - tf_device_assignment, compile_node, variable_reads, - control_after_compilation, control_after, &variable_writes, graph)); - bool contains_resource_write_op = - ContainsResourceWriteOp(*graph, reachable_functions); - - VLOG(2) << "contains_resource_write_op: " << contains_resource_write_op; - // Skip conditional write if there is no resource writing op inside TPU - // computation. - if (contains_resource_write_op) { - TF_RETURN_IF_ERROR(BuildVariableWrites(variable_inputs, control_after, - variable_writes, graph)); - } - - if (host_compute_key_placeholder_node != nullptr) { - TF_RETURN_IF_ERROR(ConnectHostComputeNodes( - compile_node, host_compute_key_placeholder_node, graph)); - } - - HostComputeCoreMap host_compute_core; - TF_RETURN_IF_ERROR(ParseHostComputeCores( - *replicate_node, outside_compilation_nodes, &host_compute_core)); - TF_RETURN_IF_ERROR(ReplicateOutsideCompilationNodes( - tf_device_assignment, host_compute_core, outside_compilation_nodes, - outside_compilation_node_images, graph)); - - graph->RemoveNode(replicate_node); - return Status::OK(); -} - -// Adds sharded weight update optimization for each host training loop. -// -// For any host training loop found in the graph, TPUVariableReshard ops -// are inserted to match the best layout chosen by the XLA. -/* static */ Status -DistributedTPURewritePass::PerformHostTrainingLoopOptimization( - Graph* graph, FunctionLibraryDefinition* flib_def, - FunctionLibraryRuntime* flr) { - std::vector host_training_loops_info; - Status s = tpu::DetectHostTrainingLoop( - /*current_function_name=*/nullptr, - /*current_function_attr=*/nullptr, flib_def, graph, flr, - &host_training_loops_info); - if (!s.ok()) { - VLOG(2) << "No valid host training loop found. Skipping sharded weight " - << "update optimization."; - return Status::OK(); - } - - for (const auto& host_loop : host_training_loops_info) { - const auto& function_name = host_loop.encapsulating_function_name; - // `function_name` has value when host training loop is inside a - // function call node. When host training loop is found inside a function - // call node, then, in addition to adding TPUVariableReshard ops, function - // library definition needs to be updated as well. - if (function_name.has_value()) { - const auto& function_attr = host_loop.encapsulating_function_attrs; - TF_RET_CHECK(function_attr.has_value()) - << "Unable to find function attribute for function: " - << *function_name; - - const FunctionDef* function_def = flib_def->Find(*function_name); - TF_RET_CHECK(function_def) - << "Unable to find function : " << *function_name; - - std::unique_ptr fbody; - TF_RETURN_IF_ERROR(FunctionDefToBodyHelper( - *function_def, AttrSlice(&function_attr.value()), flib_def, &fbody)); - Graph* function_graph = fbody->graph; - TF_RETURN_IF_ERROR(tpu::AddReshardOp(function_graph, host_loop)); - TF_RETURN_IF_ERROR(UpdateFunctionLibDefinition(*function_graph, - *function_name, flib_def)); - } else { - TF_RETURN_IF_ERROR(tpu::AddReshardOp(graph, host_loop)); - } - } - return Status::OK(); -} - -Status DistributedTPURewritePass::PlaceUnassignedDeviceNodesOnTPUIfPossible( - Graph* graph) { - ReverseDFS(*graph, {}, PlaceOpsOnTPU); - return Status::OK(); -} - -Status DistributedTPURewritePass::Run( - const GraphOptimizationPassOptions& options) { - VLOG(1) << "DistributedTPURewritePass::Run"; - - Graph* graph = options.graph->get(); - - VLOG(1) << DumpGraphToFile("distributed_tpu_compilation_before", *graph, - options.flib_def); - - const auto* config = &options.session_options->config; - std::unique_ptr pflr( - new ProcessFunctionLibraryRuntime( - nullptr, options.session_options->env, config, - graph->versions().producer(), options.flib_def, - config ? config->graph_options().optimizer_options() - : OptimizerOptions())); - - FunctionLibraryRuntime* flr = - pflr->GetFLR(ProcessFunctionLibraryRuntime::kDefaultFLRDevice); - - // This pass can only run in the session master, which should fill - // in the device_set field to the options. - TF_RET_CHECK(options.device_set != nullptr); - - // Find all the replicate nodes before mutating the graph. - std::vector replicate_nodes; - // Map from compiled subgraph cluster name to the outside_compilation nodes in - // that cluster. - std::map outside_compilation_nodes; - std::map> head_tail_outside_compilation_nodes; - TF_RETURN_IF_ERROR(FindTaggedNodes(graph, &replicate_nodes, - &outside_compilation_nodes, - &head_tail_outside_compilation_nodes)); - - if (replicate_nodes.empty()) { - // Remove unused TPUPartitionedInput nodes. - for (Node* n : graph->nodes()) { - if (n->type_string() == kTPUPartitionedInput) graph->RemoveNode(n); - } - return Status::OK(); - } - - std::unordered_map host_compute_key_placeholder_map; - TF_RETURN_IF_ERROR(FindHostComputeKeyPlaceholderNodes( - graph, replicate_nodes, &host_compute_key_placeholder_map)); - - GraphShapeInfo shape_info; - TF_RETURN_IF_ERROR(InferShapes(graph, /*arg_shapes=*/{}, - flr->GetFunctionLibraryDefinition(), - &shape_info)); - int64 autotuner_thresh = options.session_options->config.experimental() - .xla_fusion_autotuner_thresh(); - - NodeToNodeReplicasMap outside_compilation_node_images; - TPUReplicateDeviceNamesMapping tpu_replicate_device_names_mapping; - for (Node* node : replicate_nodes) { - TF_RETURN_IF_ERROR(RewriteTPUReplicateNode( - options.session_handle, *options.device_set, node, options.flib_def, - flr, host_compute_key_placeholder_map[node->name()], - outside_compilation_nodes[node->name()], - head_tail_outside_compilation_nodes[node->name()], - &outside_compilation_node_images, graph, shape_info, - &tpu_replicate_device_names_mapping, autotuner_thresh)); - } - - // Place the padding nodes generated by dynamic padder on the correct devices. - // TODO(rxsang): Place padding ops on TPUs in - // PlaceUnassignedDeviceNodesOnTPUIfPossible function. - TF_RETURN_IF_ERROR(SetPaddingNodesDevices(graph)); - - std::unordered_map outside_compilation_inputs; - for (Node* n : graph->op_nodes()) { - string lifted_arg_inputs_attr; - if (n->type_string() == "IdentityN" && - GetNodeAttr(n->def(), kXlaOutsideCompilationInputsAttrName, - &lifted_arg_inputs_attr) - .ok()) { - outside_compilation_inputs[lifted_arg_inputs_attr] = n; - } - } - for (const auto& iter : outside_compilation_nodes) { - TF_RETURN_IF_ERROR(ReplicateOutsideCompilationEdges( - iter.second, outside_compilation_node_images, - outside_compilation_inputs, graph)); - } - TF_RETURN_IF_ERROR( - RemoveOutsideCompilationNodes(outside_compilation_node_images, graph)); - TF_RETURN_IF_ERROR(LowerOutsideCompilationFunctionalNodes( - graph, *options.flib_def, tpu_replicate_device_names_mapping)); - - TF_RETURN_IF_ERROR(PlaceUnassignedDeviceNodesOnTPUIfPossible(graph)); - VLOG(1) << DumpGraphToFile("distributed_tpu_compilation_after", *graph, - options.flib_def); - VLOG(1) << "DistributedTPURewritePass::Run() finished"; - - if (enable_cross_replica_sharding_mirrored_variables_) { - VLOG(1) << "Starting host training loop optimization."; - VLOG(1) << DumpGraphToFile("host_loop_optimization_before", *graph, - options.flib_def); - TF_RETURN_IF_ERROR( - PerformHostTrainingLoopOptimization(graph, options.flib_def, flr)); - VLOG(1) << DumpGraphToFile("host_loop_optimization_after", *graph, - options.flib_def); - VLOG(1) << "Host training loop optimization finished."; - } - - return Status::OK(); -} - -bool DistributedTPURewritePass::distribute_vars_ = false; -bool DistributedTPURewritePass:: - replicate_inputs_outputs_by_default_for_xla_spmd_ = false; -bool DistributedTPURewritePass:: - enable_cross_replica_sharding_mirrored_variables_ = true; -bool DistributedTPURewritePass::enable_automatic_model_parallelism_ = false; - -/*static*/ void DistributedTPURewritePass::SetDistributedTpuRewritePassOptions( - bool distribute_vars, bool replicate_inputs_outputs_by_default_for_xla_spmd, - bool enable_cross_replica_sharding_mirrored_variables, - bool enable_automatic_model_parallelism) { - distribute_vars_ = distribute_vars; - replicate_inputs_outputs_by_default_for_xla_spmd_ = - replicate_inputs_outputs_by_default_for_xla_spmd; - enable_cross_replica_sharding_mirrored_variables_ = - enable_cross_replica_sharding_mirrored_variables; - enable_automatic_model_parallelism_ = enable_automatic_model_parallelism; -} - -} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h deleted file mode 100644 index 52fae7a7c13..00000000000 --- a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h +++ /dev/null @@ -1,589 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// Rewrites TPUReplicate nodes into replicated computations on TPU. -// -// To represent a distributed TPU computation, we use the -// TPUReplicate operator, that describes a subgraph (represented as a -// Tensorflow function) to replicate across a TPU pod. -// -// Model parallelism and data parallelism: -// --------------------------------------- -// We support two different kinds of parallelism on TPU: -// * data parallelism (replication), or parallelization across batches, and -// * model parallelism, or parallelization within a batch. -// -// The function passed to a TPUReplicate operator is replicated many -// times across a TPU pod (data parallelism). The `num_replicas` attribute -// controls how many replicas of the computation to create. Replicas are mostly -// independent; replicas can only communicate using the CrossReplicaSum -// operator, which is typically used to communicate gradients during training. -// -// Each replica may optionally use more than one TPU core (model -// parallelism). The `num_cores_per_replica` attribute controls how many cores -// there are per replica. For each core, there is a virtual TPU_REPLICATED_CORE -// device that is only valid within replicated TPU computations (e.g., -// TPU_REPLICATED_CORE:0, TPU_REPLICATED_CORE:1, etc.); each TPU_REPLICATED_CORE -// device corresponds to one TPU core in every replica. -// Each replica has runs its own copy of the computation assigned to each -// TPU_REPLICATED_CORE device. -// -// The Python code is responsible for providing a device_assignment that -// describes how the replicated logical cores map to physical cores on the TPU -// topology. -// -// Inputs to TPUReplicate: -// ------------------------------ -// The TPUReplicate operator takes three kinds of inputs, in the -// following order: -// * per-replica inputs. If there are three per-replica inputs (A, B, C) and two -// replicas, the first six arguments to TPUReplicate will be: -// A0 B0 C0 A1 B1 C1 -// where Ai is the A input to the i-th replica. -// * distributed inputs. These inputs follow the per-replica inputs. -// If there are two distributed inputs (E, F) and two replicas, the following -// arguments to TPUReplicate will be: E F. -// But there is local E and F on each replica. -// * broadcast inputs. These inputs follow the distributed inputs. All -// replicas receive a copy of each of these inputs. -// * variables. Resource variables accessed by the computation follow the -// broadcast inputs. -// -// For example, for a computation with two replicas, three per-replica inputs -// (A, B, C), two distributed inputs(E, F), two broadcast inputs (X, Y), and two -// variables (V, W), the arguments to TPUReplicate will be: -// A0 B0 C0 A1 B1 C1 E F X Y V W -// and each replica will receive the following arguments: -// A B C E F X Y V W -// -// Distributed TPU compilation requires that the shapes of all operators -// be known statically at compilation time, before any nodes have executed. -// Shapes are determined using shape information emitted by InferShapes. It -// is not possible to replicate Tensorflow operators with unknown or dynamic -// shapes for TPU at present. -// -// Graph rewrite: -// -------------- -// Compilation replaces TPUReplicate operators with: -// * a single TPUCompile node that compiles the computations, -// * one TPUExecute node for each TPU device in the system that -// executes the relevant computation, -// * one ReadVariableOp for each variable accessed by the replicated -// computation, -// * one AssignVariableOp for each variable accessed by the replicated -// computation. An assignment is built even if a variable is only read by the -// computation. We do not know which variables are written until we apply the -// XlaCompiler to the computation, but that does not happen until after the -// rewrite. Conservatively, we write back the values of all variables after -// the computation completes. -// TODO(phawkins): only write back variables that the computation may write. -// * one Shape node for each Tensor or Variable input to the computation whose -// shape is not statically known at rewrite time. The input shapes are fed -// to the TPUCompile node. -// -// To ensure that the reads and writes seem to happen at the right time in the -// graph execution, we add control edges from all predecessors of the original -// TPUReplicate operator to each of the ReadVariableOp operators. -// Similarly, we add control edges from all of the AssignVariableOp operators to -// all of the successors of the TPUReplicate operator. -// -// The TPUReplicate rewrite must run before placement, since resource -// variable inputs will have DT_RESOURCE, which cannot be sent across devices, -// leading to objections from the placer. The rewrite rewrites the resource -// accesses into explicit ReadVariableOp and AssignVariableOp operators that the -// placer is free to colocate with the variables. - -#ifndef TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_REWRITE_PASS_H_ -#define TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_REWRITE_PASS_H_ - -#include -#include - -#include "absl/container/node_hash_map.h" -#include "absl/types/span.h" -#include "tensorflow/compiler/jit/shape_inference.h" -#include "tensorflow/compiler/xla/service/computation_placer.h" -#include "tensorflow/core/common_runtime/optimization_registry.h" -#include "tensorflow/core/framework/function.h" -#include "tensorflow/core/graph/graph.h" -#include "tensorflow/core/platform/env.h" -#include "tensorflow/stream_executor/tpu/tpu_topology.h" - -namespace tensorflow { - -// Replaces clusters assigned to TPU_SYSTEM devices with -// TPUCompile and TPUExecute nodes assigned to the corresponding -// TPU devices. -class DistributedTPURewritePass : public GraphOptimizationPass { - public: - static void SetDistributedTpuRewritePassOptions( - bool distribute_vars, - bool replicate_inputs_outputs_by_default_for_xla_spmd, - bool enable_cross_replica_sharding_mirrored_variables, - bool enable_automatic_model_parallelism); - - Status Run(const GraphOptimizationPassOptions& options) override; - - // The following methods are public only for the use of unit tests. - - // See comment at the top of the file for how the inputs are ordered. - // Encapsulates the different TPU replicated node input and output - // information, and provide common APIs over them. - class ParameterInfo { - public: - ParameterInfo() {} - ParameterInfo(int64 num_replicas, int64 num_per_replica_args, - int64 num_distributed_args, int64 num_broadcast_args, - int64 num_variables, int64 num_guaranteed_constants, - int64 num_retvals_per_replica) - : num_replicas_(num_replicas), - num_per_replica_args_(num_per_replica_args), - num_distributed_args_(num_distributed_args), - num_broadcast_args_(num_broadcast_args), - num_variables_(num_variables), - num_guaranteed_constants_(num_guaranteed_constants), - num_retvals_per_replica_(num_retvals_per_replica) {} - - int64 NumReplicas() const { return num_replicas_; } - - int64 NumPerReplicaArgs() const { return num_per_replica_args_; } - - int64 NumDistributedArgs() const { return num_distributed_args_; } - - int64 NumBroadcastArgs() const { return num_broadcast_args_; } - - int64 NumVariables() const { return num_variables_; } - - int64 NumGuaranteedConstants() const { return num_guaranteed_constants_; } - - int64 NumRetvalsPerReplica() const { return num_retvals_per_replica_; } - - bool IsPerReplicaArg(int64 index) const { - return index < num_per_replica_args_; - } - - bool IsDistributedArg(int64 index) const { - return index >= num_per_replica_args_ && - index < (num_per_replica_args_ + num_distributed_args_); - } - - bool IsBroadcastArg(int64 index) const { - return index >= num_per_replica_args_ && - index < (num_per_replica_args_ + num_distributed_args_ + - num_broadcast_args_); - } - - bool IsVariableArg(int64 index) const { - return index >= (num_per_replica_args_ + num_broadcast_args_) && - index < (num_per_replica_args_ + num_distributed_args_ + - num_broadcast_args_ + num_variables_); - } - - bool IsConstantArg(int64 index) const { - return index >= (num_per_replica_args_ + num_distributed_args_ + - num_broadcast_args_ + num_variables_) && - index < (num_per_replica_args_ + num_distributed_args_ + - num_broadcast_args_ + num_variables_ + - num_guaranteed_constants_); - } - - // Returns the number of inputs which has been received by the host. - int64 NumInputsFromHost() const { - return num_replicas_ * num_per_replica_args_ + num_distributed_args_ + - num_broadcast_args_ + num_variables_ + num_guaranteed_constants_; - } - - // Returns the number of inputs which will be sent to each replica. - int64 NumInputsToEachReplica() const { - return num_per_replica_args_ + num_distributed_args_ + - num_broadcast_args_ + num_variables_ + num_guaranteed_constants_; - } - - // Returns the total number of output values returned to the host (for all - // replicas). - int64 NumOutputsToHost() const { - return num_replicas_ * num_retvals_per_replica_; - } - - // Returns the position of the first per-replica argument, within the set - // of all hosts arguments. - // Broadcast arguments follow the distributed arguments. - int64 FirstBroadcastArgFromHost() const { - return num_replicas_ * num_per_replica_args_ + num_distributed_args_; - } - - // Indices of mirrored variables across replicas, which should be - // categorized as per_replica_args. - const std::set& mirrored_variable_indices() const { - return mirrored_variable_indices_; - } - std::set* mutable_mirrored_variable_indices() { - return &mirrored_variable_indices_; - } - - private: - int64 num_replicas_ = 1; - int64 num_per_replica_args_ = 0; - int64 num_distributed_args_ = 0; - int64 num_broadcast_args_ = 0; - int64 num_variables_ = 0; - int64 num_guaranteed_constants_ = 0; - int64 num_retvals_per_replica_ = 0; - std::set mirrored_variable_indices_; - }; - - // Mapping from TPUReplicate cluster name to tpu device names. Value is a - // mapping from [replica][core] to a TF device name. - typedef absl::flat_hash_map>> - TPUReplicateDeviceNamesMapping; - - // Determines which devices to use to run the computation. - // Inputs: - // * num_tpus_per_task: the number of TPU devices attached to each task - // * tpu_devices: a [task][device] collection of TPU devices - // * num_replicas: the number of replicas requested - // * num_cores_per_replica: the number of cores in each computation instance - // * topology_attr: the topology TPUReplicate attribute - // * device_assignment_attr: the device_assignment TPUReplicate attribute - // Outputs: - // * tf_device_assignment: a mapping from [replica][core] to a TF device name - // * xla_device_assignment: a mapping from [replica][core] to a linearized TPU - // coordinate. - // TODO(phawkins): change tf_device_assignment to an xla::Array2D. - static Status BuildDeviceAssignment( - const tpu::TpuTopologyExternal& topology, int num_tpus_per_task, - const std::vector>& tpu_devices, int num_replicas, - int num_cores_per_replica, const string& topology_attr, - absl::Span device_assignment_attr, - std::vector>* tf_device_assignment, - std::unique_ptr* xla_device_assignment); - - // Returns the `computation` graph attached to TPUReplicate operator - // `node`. `flr` is a FunctionLibraryRuntime to use when - // instantiating the function body. Sets `*arg_types` and - // `*retval_types` to the argument/return types of the function. - static Status GetComputationForTPUReplicateOp(const NameAttrList& function, - FunctionLibraryRuntime* flr, - Graph* computation, - DataTypeVector* arg_types, - DataTypeVector* retval_types); - - // Returns the shapes of the argument tensors and return values of the - // TPUReplicate operator `node` using the _output_shapes, - // _output_handle_shapes, and _output_handle_types annotations on the input - // nodes. Expects inputs in the following order (see comment at top of file): - // * num_replicas * num_per_replica_args per-replica inputs, - // * num_broadcast_args broadcast inputs, - // * num_variables variable inputs. - // Returns an error if the input shapes to `node` are not statically known. - // Also verifies that all replicas have identical input shapes for their - // per-replica inputs. - static Status GetArgAndRetvalShapes( - const GraphShapeInfo& shape_info, const Node& node, - const ParameterInfo& params_info, std::vector* arg_shapes, - std::vector* retval_shapes); - - // Assigns arguments and return values to cores. The assignment is represented - // as an XLA op sharding, so that an argument can be replicated across cores. - // `arg_sharding` and `retval_sharding` are vectors of shardings indexed by - // argument/retval number. - // `arg_fast_mem` is vector of fast_mem indication which is indexed by - // argument number. - static Status AssignArgsAndRetvalsToCores( - int num_cores_per_replica, const ParameterInfo& params_info, - const DataTypeVector& arg_types, - const std::vector& arg_shapes, - const DataTypeVector& retval_types, - const std::vector& retval_shapes, const Graph& graph, - const Node* replicate_node, FunctionLibraryRuntime* flr, - std::vector<::xla::OpSharding>* arg_sharding, - std::vector* arg_fast_mem, - std::vector<::xla::OpSharding>* retval_sharding); - - // Computes a fingerprint of the contents of `library`. - static Status FingerprintFunctionLibrary( - const FunctionLibraryDefinition& library, uint64* fingerprint); - - // Populates `*variables` with the "variables" inputs to `index`-th output of - // `node`. - struct VariableInput { - Node* node; - int index; - - // Type of the variable's value. Note that this is different to the type of - // the output of 'variable', which is always DT_RESOURCE. - DataType dtype; - }; - static Status FindVariableInputs(const Node& node, - const NameRangeMap& input_range_map, - std::vector* variables); - - // Populates '*guaranteed_constants' with the "guaranteed_constants" inputs - // to 'node'. - static Status FindGuaranteedConstantInputs( - const Node& node, const NameRangeMap& input_range_map, - std::vector* guaranteed_constants); - - // Builds Shape nodes that compute the shapes of arguments whose shapes are - // not statically known. - static Status BuildDynamicShapeNodes( - const Node& replicate_node, const std::vector& arg_shapes, - const ParameterInfo& params_info, - const std::vector& variable_reads, Graph* graph, - std::vector* dynamic_shape_nodes); - - // Builds a TPUCompile node that compiles the computation in - // `function_names`. calls `nodes`. - // TODO(b/33943292): at present, for model parallelism with Send/Recv to work - // the `nodes` must correspond to the computations assigned to TPU:0, - // TPU:1, ... in order since XLA hard-codes the chip IDs in the generated - // executables. - static Status BuildCompileNode( - const Node* replicate_node, const NameAttrList& function, - uint64 library_fingerprint, const ParameterInfo& params_info, - const std::vector& arg_shapes, - const DataTypeVector& arg_types, - const std::vector& guaranteed_constant_nodes, - const string& session_handle, - const std::vector<::xla::OpSharding>& arg_sharding, - const std::vector& arg_fast_mem, - const std::vector<::xla::OpSharding>& retval_sharding, - int num_cores_per_replica, const string& compile_device, - const xla::DeviceAssignment* xla_device_assignment, - const std::vector& dynamic_shape_nodes, Graph* graph, - Node** compile_node, int64 autotuner_thresh); - - // Builds a TPUCompileSucceededAssert node that verifies that compilation - // succeeded and replaces the TPUCompilationStatus node in the graph. - static Status BuildCompilationStatusReturnNodes( - Node* replicate_node, Node* compile_node, - Node** control_after_compilation, Graph* graph); - - // Builds ReadVariableOp nodes that read `variables`, with a control - // edges that ensure they happen after `control_predecessor`. - static Status BuildVariableReads(absl::Span variables, - Node* control_predecessor, Graph* graph, - std::vector* variable_reads); - - // Returns true if graph or functions contain resource write op, otherwise - // return false. - // TODO(b/137048563): Recognize unused resource rewrite op. - static bool ContainsResourceWriteOp(const Graph& graph, - const FunctionLibraryDefinition& fld); - // Struct that describes a variable value to be written back from TPUExecute. - struct VariableWrite { - // A node:output pair containing a boolean tensor that determines whether - // the value should be written back. - Node* predicate; - int predicate_output; - - // A node:output pair containing the value to be written back. - Node* value; - int value_output; - }; - - // Builds AssignVariableOp nodes that write `variables` with the values from - // `variable_writes`, with control edges that ensure the writes happen before - // `control_successor`. - static Status BuildVariableWrites( - absl::Span variables, Node* control_successor, - absl::Span variable_writes, Graph* graph); - - // Builds TPUExecute operators assigned to each TPU device - // involved in the computation. - // Arguments: - // * `params_info` is the structure containing the information about the - // TPUReplicate node inputs and outputs. - // * `num_tasks` is the number of TensorFlow tasks in the slice. - // * `num_cores_per_replica` is the number of cores which are dedicated to - // each replica. - // * `replicate_node` is the original TPUReplicate node. - // * `arg_types` are the types of the arguments to the computation function - // passed as argument to TPUReplicate, including per-replica, - // broadcast, and variable arguments. - // * `arg_shapes` are the corresponding shapes (and handle types/shapes, if - // applicable). - // * `arg_shardings` and `retval_shardings` are mappings from - // arguments/return indices to shardings, as returned by - // `AssignArgsAndRetvalsToCores`. - // * `pod_devices` lists the devices to assign to each core of each replica. - // * `variable_reads` is a vectors of ReadVariableOp operators, one for each - // variable argument to the computation. - // * The execute operators will have a control edge from - // `control_predecessor` and another control edge to `control_successor`. - // Populates '*variable_writes' with information about variable values to - // write back. - static Status BuildExecuteNodes( - const ParameterInfo& params_info, int num_tasks, - int num_cores_per_replica, const Node& replicate_node, - const DataTypeVector& arg_types, - const std::vector& arg_shapes, - const DataTypeVector& retval_types, - const std::vector<::xla::OpSharding>& arg_shardings, - const std::vector<::xla::OpSharding>& retval_shardings, - const std::vector>& tpu_device_names, - Node* compile_node, const std::vector& variable_reads, - Node* control_predecessor, Node* control_successor, - std::vector* variable_writes, Graph* graph); - - // Connects the compile node to all the host transfer nodes, and removes the - // key placeholder node that was previously standing in for it. - // Arguments: - // * `compile_node` is the TPUCompile node that has been added to the graph. - // * `key_placeholder_node` is the placeholder node to send the key to all the - // host - // * transfer nodes in the original graph. - // * `graph` is the graph being rewritten. - static Status ConnectHostComputeNodes(Node* compile_node, - Node* key_placeholder_node, - Graph* graph); - - // Map from a Node in an outside_compilation cluster in the original graph to - // the list of Nodes, one for each replica, that it is expanded into during - // replication. - typedef absl::node_hash_map> NodeToNodeReplicasMap; - - // Map from the name of an outside_compilation cluster to the model-parallel - // core index that the HostCompute Op should be placed on in that cluster. - typedef std::map HostComputeCoreMap; - - // Map from the name of an outside_compilation cluster to the list of Nodes - // that should run on the host for that cluster. - typedef std::map> OutsideCompilationNodeMap; - - // Copies the outside_compilation nodes in a cluster to create replica - // replica_index. - static Status CopyOutsideCompilationNodes( - int replica_index, const std::vector& outside_compilation_nodes, - const DeviceNameUtils::ParsedName& tpu_device, - const DeviceNameUtils::ParsedName& partial_device, - NodeToNodeReplicasMap* node_images, Graph* graph); - - // Replicates all the nodes in outside_compilation clusters in a compiled - // computation. - static Status ReplicateOutsideCompilationNodes( - const std::vector>& tf_device_assignment, - const HostComputeCoreMap& host_compute_core, - const OutsideCompilationNodeMap& outside_compilation_nodes, - NodeToNodeReplicasMap* node_images, Graph* graph); - - // Lifts the edges between original outside_compilation nodes in a cluster - // onto their replicas. - static Status CopyOutsideCompilationEdges( - const std::vector& outside_compilation_nodes, - const NodeToNodeReplicasMap& node_images, - const std::unordered_map outside_compilation_inputs, - Graph* graph); - - // Lifts all the edges in outside_compilation clusters in a compiled - // computation to their replicas. - static Status ReplicateOutsideCompilationEdges( - const OutsideCompilationNodeMap& outside_compilation_nodes, - const NodeToNodeReplicasMap& node_images, - const std::unordered_map outside_compilation_inputs, - Graph* graph); - - // Removes all the original outside_compilation nodes from the graph, - // following replication. - static Status RemoveOutsideCompilationNodes( - const NodeToNodeReplicasMap& node_images, Graph* graph); - - // Lowers outside compilation functional nodes (If/While/function call). - // Otherwise, when we have multiple workers, device placer will not be able to - // place nodes if outside compilation has DT_RESOURCE inputs (e.g. a - // DT_RESOURCE input fed into multiple While nodes on different devices). - static Status LowerOutsideCompilationFunctionalNodes( - Graph* g, const FunctionLibraryDefinition& flib_def, - const TPUReplicateDeviceNamesMapping& tpu_replicate_device_names_mapping); - - // Parses the 'host_compute_core' attribute on replicate_node to get the - // replicated core id of each outside_compilation cluster. - static Status ParseHostComputeCores( - const Node& replicate_node, - const OutsideCompilationNodeMap& outside_compilation_nodes, - HostComputeCoreMap* host_compute_core); - - // Gets the physical topology information about the TPU system. - static Status GetDeviceTopology( - const DeviceSet& device_set, const Node& replicate_node, - int* num_replicas, int* num_cores_per_replica, int* num_tasks, - std::vector>* tf_device_assignment, - std::unique_ptr* xla_device_assignment, - string* tpu_compilation_device); - - // Gets the types of args, retvals, and parameters. - static Status GetIOTypes( - int num_replicas, const Node& replicate_node, FunctionLibraryRuntime* flr, - Graph* graph, NameRangeMap* input_name_map, const NameAttrList** function, - std::unique_ptr* computation, DataTypeVector* arg_types, - DataTypeVector* retval_types, ParameterInfo* params_info); - - // Find known constants and deals with variable reads. - static Status DealWithConstantsAndVariables( - const Node& replicate_node, const NameRangeMap& input_name_map, - Graph* graph, Node* host_transfer_sequencer, Node* control_before, - Node* control_after, absl::Span variable_nodes, - std::vector* guaranteed_constant_nodes, - std::vector* variable_reads); - - // Adds NoOp nodes for sequencing computation and variable reads/writes. - static Status BuildSequencingNodes(const string& tpu_compilation_device, - const Node& replicate_node, Graph* graph, - Node** host_transfer_sequencer, - Node** control_before, - Node** control_after); - - // Performs the pass's rewrite on a TPUReplicate node `node`. - static Status RewriteTPUReplicateNode( - const string& session_handle, const DeviceSet& device_set, - Node* replicate_node, FunctionLibraryDefinition* flib_def, - FunctionLibraryRuntime* flr, Node* host_compute_key_placeholder_node, - const OutsideCompilationNodeMap& outside_compilation_nodes, - const std::vector& head_tail_outside_compilation_nodes, - NodeToNodeReplicasMap* outside_compilation_node_images, Graph* graph, - const GraphShapeInfo& shape_info, - TPUReplicateDeviceNamesMapping* tpu_replicate_device_names_mapping, - int64 autotuner_thresh); - - // Performs host training loop optimization. For example, when TPUExecute - // node is inside a while loop, then model weight variables can be sharded - // in XLA preferred layout and then unsharded only at the very last iteration - // to reduce the number of all_gather. - static Status PerformHostTrainingLoopOptimization( - Graph* graph, FunctionLibraryDefinition* flib_def, - FunctionLibraryRuntime* flr); - - // Heuristically place some nodes with unassigned devices on TPUs for - // performance reasons. - static Status PlaceUnassignedDeviceNodesOnTPUIfPossible(Graph* graph); - - // Updates the head and tail outside compiled nodes so that nodes have the - // correct device and removes the replication and outside compilation - // attributes so that these nodes do not trigger further graph optimization - // passes. - static Status UpdateHeadTailOutsideCompilation( - const std::vector>& tf_device_assignment, - const std::vector& head_tail_outside_compilation_nodes); - - private: - static bool distribute_vars_; - static bool replicate_inputs_outputs_by_default_for_xla_spmd_; - static bool enable_cross_replica_sharding_mirrored_variables_; - static bool enable_automatic_model_parallelism_; -}; - -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_REWRITE_PASS_H_ diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.cc b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.cc deleted file mode 100644 index 18b158c0335..00000000000 --- a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.cc +++ /dev/null @@ -1,45 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.h" - -#include - -#include "absl/random/random.h" - -namespace tensorflow { -namespace { - -static int64 overridden_node_id = -1; - -} // namespace - -namespace internal { - -void OverrideNodeIdForTesting(const int64 node_id) { - overridden_node_id = node_id; -} - -uint64 GetNodeId() { - if (overridden_node_id > -1) { - return overridden_node_id; - } else { - return absl::Uniform(absl::SharedBitGen(), uint64{0}, - std::numeric_limits::max()); - } -} - -} // namespace internal -} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.h b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.h deleted file mode 100644 index ce80249c30f..00000000000 --- a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.h +++ /dev/null @@ -1,38 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_REWRITE_PASS_INTERNAL_H_ -#define TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_REWRITE_PASS_INTERNAL_H_ - -#include "tensorflow/core/framework/types.h" - -namespace tensorflow { - -// Implementation details of distributed_tpu_rewrite_pass.cc, please DO NOT -// depend on these. -namespace internal { - -// When set to a value >= 0, overrides the node_id. Used for getting -// deterministic node_ids during testing. -void OverrideNodeIdForTesting(int64 node_id); - -// Retrieves the node id, used to make some node names unique in the rewrite -// pass. -uint64 GetNodeId(); - -} // namespace internal -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_REWRITE_PASS_INTERNAL_H_ diff --git a/tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.cc b/tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.cc deleted file mode 100644 index 2f4c1c4eabe..00000000000 --- a/tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.cc +++ /dev/null @@ -1,629 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.h" - -#include -#include -#include - -#include "absl/container/flat_hash_set.h" -#include "absl/container/node_hash_set.h" -#include "tensorflow/compiler/tf2xla/functionalize_control_flow_util.h" -#include "tensorflow/compiler/tf2xla/tf2xla_util.h" -#include "tensorflow/core/graph/algorithm.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/gtl/cleanup.h" -#include "tensorflow/core/protobuf/tpu/compile_metadata.pb.h" -#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.h" - -namespace tensorflow { -namespace tpu { - -namespace { - -constexpr char kDefaultShardingValue[] = ""; - -const Edge* FindEdgeConnecting(const Node* src, const Node* dst) { - for (const auto e : src->out_edges()) { - if (e->dst()->name() == dst->name()) return &(*e); - } - return nullptr; -} - -// Contains TPUExecute node and its DT_RESOURCE input nodes that -// correspond to model weights. -struct ExecuteNodeInfo { - Node* execute_node; - std::vector var_inputs; -}; - -// Returns whether `node` is in `execute_nodes` or `(identity) -> execute`. -bool IsExecuteNodeOrIdentityToExecuteNode( - const Graph& graph, const std::unordered_set& loop_nodes, - const absl::flat_hash_set& execute_nodes, Node* node) { - if (execute_nodes.find(node) != execute_nodes.end()) return true; - if (loop_nodes.find(node) == loop_nodes.end()) return false; - if (node->IsNextIteration()) return true; - if (!node->IsIdentity()) return false; - - for (const Edge* e : node->out_edges()) { - if (e->IsControlEdge()) continue; - - Node* node = e->dst(); - if (!IsExecuteNodeOrIdentityToExecuteNode(graph, loop_nodes, execute_nodes, - node)) { - return false; - } - } - - return true; -} - -// From input node to the TPUExecute op, finds the corresponding Enter node -// by searching/traversing nodes in below pattern of nodes: -// Enter ----> (identity) ---> While body input -// Returns nullptr if the Enter node is not found. -xla::StatusOr FindEnterNodeFromTPUExecuteNodeInput(Node* input_node) { - Node* node = input_node; - while (node->IsIdentity()) { - TF_RETURN_IF_ERROR(node->input_node(0, &node)); - } - - if (node->IsEnter()) { - return node; - } - return nullptr; -} - -xla::StatusOr ResourceOnlyUsedForTPUExecuteInLoop( - const Graph& graph, const std::unordered_set& loop_nodes, - const Node* enter_node, const absl::flat_hash_set execute_nodes) { - for (const Edge* output_edge : enter_node->out_edges()) { - Node* output_node = output_edge->dst(); - if (output_edge->IsControlEdge() || output_node->IsExit()) continue; - - // If output node is not execute node, it must be output node - // to the while loop body. - if (!IsExecuteNodeOrIdentityToExecuteNode(graph, loop_nodes, execute_nodes, - output_node)) { - return false; - } - } - return true; -} - -// Given a TPUCompile node, find all TPUExecute nodes that executes the compiled -// program and its model weight variable inputs as well. -// TPUCompileMetadataProto of TPUCompile node must be reset to `new_metadata` -// if new reshard ops are added. -Status ExtractExecuteNodeInfo(const Node* compile_node, const Graph& graph, - const std::unordered_set& loop_nodes, - std::vector* execute_node_info, - TPUCompileMetadataProto* new_metadata) { - string metadata_string; - TF_RETURN_IF_ERROR( - GetNodeAttr(compile_node->attrs(), "metadata", &metadata_string)); - new_metadata->ParsePartialFromString(metadata_string); - if (new_metadata->num_cores_per_replica() != 1) { - // We do not support model parallelism yet. - return Status::OK(); - } - - execute_node_info->clear(); - for (Node* node : compile_node->out_nodes()) { - if (node->type_string() == "TPUExecute") { - execute_node_info->push_back({node}); - } - } - if (execute_node_info->empty()) { - return Status::OK(); - } - TF_RET_CHECK(execute_node_info->size() == new_metadata->num_replicas()) - << "Number of replicas does not equal number of execute nodes: " - << new_metadata->num_replicas() << " vs " << execute_node_info->size(); - DataTypeVector arg_types; - TF_RETURN_IF_ERROR(GetNodeAttr((*execute_node_info)[0].execute_node->attrs(), - "Targs", &arg_types)); - for (int64 i = 0; i < arg_types.size(); ++i) { - if (arg_types[i] != DT_RESOURCE) { - continue; - } - const auto sharding_config = new_metadata->args(i).enable_xla_sharding(); - if (sharding_config != TPUCompileMetadataProto::Arg::TENTATIVE && - sharding_config != TPUCompileMetadataProto::Arg::ALLOWED) { - continue; - } - std::vector edges(execute_node_info->size()); - bool is_supported = true; - std::unordered_map> - enter_to_execute_nodes; - for (int64 j = 0; j < edges.size(); ++j) { - auto execute = (*execute_node_info)[j].execute_node; - TF_RETURN_IF_ERROR(execute->input_edge(i, &edges[j])); - TF_RET_CHECK(edges[j]->src()->output_type(edges[j]->src_output()) == - arg_types[i]) - << "Execute op has an unexpected input type."; - // Traverse backwards to find the Enter node from which the input is - // passed. - // This makes sure that we are checking the usages of all potential - // aliases of the input node as well. - TF_ASSIGN_OR_RETURN(auto enter_node, FindEnterNodeFromTPUExecuteNodeInput( - edges[j]->src())); - if (enter_node == nullptr) { - is_supported = false; - enter_to_execute_nodes.clear(); - break; - } - enter_to_execute_nodes[enter_node].insert(edges[j]->dst()); - } - - for (const auto& it : enter_to_execute_nodes) { - // Size of execute nodes should be either 1 (per-replica variables) or - // num_replicas (distributed variables). - if ((it.second.size() != 1) && - (it.second.size() != new_metadata->num_replicas())) { - is_supported = false; - break; - } - TF_ASSIGN_OR_RETURN(bool no_other_use, - ResourceOnlyUsedForTPUExecuteInLoop( - graph, loop_nodes, it.first, it.second)); - if (!no_other_use) { - is_supported = false; - break; - } - } - - // Add the variable input edges only when they are supported for all - // executes. - if (is_supported) { - for (int64 j = 0; j < edges.size(); ++j) { - (*execute_node_info)[j].var_inputs.push_back(edges[j]); - } - new_metadata->mutable_args(i)->set_enable_xla_sharding( - TPUCompileMetadataProto::Arg::ALLOWED); - } - } - - int64 total = 0; - for (const auto& a : new_metadata->args()) { - if (a.enable_xla_sharding() == TPUCompileMetadataProto::Arg::ALLOWED) { - total++; - } - } - TF_RET_CHECK(total == (*execute_node_info)[0].var_inputs.size()) - << " total " << total << " var_inputs " - << (*execute_node_info)[0].var_inputs.size(); - if (total == 0) { - // We don't need to process anything if no input is added. - execute_node_info->clear(); - } - return Status::OK(); -} - -bool IsTPUCompileOp(const Node& n) { return n.type_string() == "TPUCompile"; } - -void FindTPUCompileNodes( - const std::string* current_function_name, - const AttrValueMap* current_function_attr, - const std::unordered_map& frames, - std::vector* host_training_loops_info) { - // Adds frames with no children (i.e., the innermost frames) to a worklist. - std::deque worklist; - - for (auto& frame : frames) { - if (frame.second.num_children == 0) { - worklist.push_back(&frame.second); - } - } - - // Check TPUCompile node from the innermost while loop to the outermost - // while loop. - while (!worklist.empty()) { - const WhileLoopFrame* frame = worklist.front(); - worklist.pop_front(); - - for (const auto& n : frame->nodes) { - if (!IsTPUCompileOp(*n)) continue; - - HostTrainingLoopInfo host_training_loop_info; - host_training_loop_info.compile_node_name = n->name(); - host_training_loop_info.loop_cond_node_name = frame->loop_cond->name(); - host_training_loop_info.while_loop_name = frame->name; - - for (const auto arg : frame->args) { - LoopArgInfo arg_info; - arg_info.enter_node_name = arg.enter->name(); - if (arg.exit) arg_info.exit_node_name = arg.exit->name(); - - host_training_loop_info.loop_arguments.push_back(std::move(arg_info)); - } - host_training_loop_info.loop_nodes = frame->nodes; - - if (current_function_name) { - host_training_loop_info.encapsulating_function_name = - *current_function_name; - } - if (current_function_attr) { - host_training_loop_info.encapsulating_function_attrs = - *current_function_attr; - } - - host_training_loops_info->emplace_back( - std::move(host_training_loop_info)); - } - - // If the parent has no remaining children, add it to the worklist. - --frame->parent->num_children; - if (frame->parent->num_children == 0) { - worklist.push_back(frame->parent); - } - } -} - -// From while loop cond node, finds all loop exit nodes by searching/traversing -// nodes in below pattern of nodes: -// LoopCond -----> Switch -----> Exit -std::vector FindLoopExitNodes(const Node& loop_cond) { - std::vector loop_exit_nodes; - for (const auto e_cond : loop_cond.out_edges()) { - if (e_cond->IsControlEdge() || !e_cond->dst()->IsSwitch()) continue; - auto switch_node = e_cond->dst(); - - for (const auto e_switch : switch_node->out_edges()) { - if (e_switch->IsControlEdge() || !e_switch->dst()->IsExit()) continue; - - loop_exit_nodes.push_back(e_switch->dst()); - } - } - return loop_exit_nodes; -} - -// Find any one of switch nodes in the while loop by traversing the graph -// from while loop condition node. -xla::StatusOr GetLoopSwitchNode(const Node& loop_cond_node) { - Node* loop_switch_node; - for (auto n : loop_cond_node.out_nodes()) { - if (n->IsSwitch()) { - loop_switch_node = n; - break; - } - } - - TF_RET_CHECK(loop_switch_node->IsSwitch()) - << "Unable to find any switch nodes."; - return loop_switch_node; -} - -// Returns or creates a node in that is executed before each loop iteration -// in the while loop. -Status GetOrCreateBeforeEachIterationNode(Graph* graph, Node* loop_switch_node, - Node** node_out) { - // If while loop switch node already has a outgoing data to true brach - // of the switch op, then reuse that node. - for (const auto out_edge : loop_switch_node->out_edges()) { - if (out_edge->src_output() == 1) { - *node_out = out_edge->dst(); - return Status::OK(); - } - } - - // Create Identity node that represents execution at every loop iteration. - NodeDef at_loop_iteration_nodedef; - at_loop_iteration_nodedef.set_op("Identity"); - DataType dtype; - TF_RETURN_IF_ERROR(GetNodeAttr(loop_switch_node->def(), "T", &dtype)); - - AddNodeAttr("T", dtype, &at_loop_iteration_nodedef); - at_loop_iteration_nodedef.set_name(graph->NewName(strings::StrCat( - "TPUVariableReshard/before_iteration", "/_", internal::GetNodeId()))); - - Status status; - Node* at_loop_iteration_node = - graph->AddNode(at_loop_iteration_nodedef, &status); - TF_RETURN_IF_ERROR(status); - - graph->AddEdge(loop_switch_node, 1, at_loop_iteration_node, 0); - *node_out = at_loop_iteration_node; - return Status::OK(); -} - -// Injects NoOp node in that is executed after the very last iteration -// of the while loop but before the while loop exit node. -Status AddNoOpAfterLastIteration(Graph* graph, Node* loop_switch_node, - Node** node_out) { - // Find the exit node from loop switch node. - Node* exit_node; - for (const auto out_node : loop_switch_node->out_nodes()) { - if (out_node->IsExit()) { - exit_node = out_node; - break; - } - } - - TF_RET_CHECK(exit_node != nullptr) - << "Cannot find exit node connected to switch node :" - << loop_switch_node->name(); - - // Create NoOp that represents execution at the end of while loop - // last iteration. - NodeDef after_last_loop_iteration; - after_last_loop_iteration.set_op("Identity"); - DataType dtype; - TF_RETURN_IF_ERROR(GetNodeAttr(loop_switch_node->def(), "T", &dtype)); - - AddNodeAttr("T", dtype, &after_last_loop_iteration); - after_last_loop_iteration.set_name(graph->NewName(strings::StrCat( - "TPUVariableReshard/last_iteration", "/_", internal::GetNodeId()))); - - Status status; - Node* after_last_iteration_node = - graph->AddNode(after_last_loop_iteration, &status); - TF_RETURN_IF_ERROR(status); - - // Newly created node must be executed once after last iteration of the while - // loop and before while loop exits. - graph->AddEdge(loop_switch_node, 0, after_last_iteration_node, 0); - graph->AddControlEdge(after_last_iteration_node, exit_node); - *node_out = after_last_iteration_node; - return Status::OK(); -} - -} // namespace - -Status DetectHostTrainingLoop( - const std::string* current_function_name, - const AttrValueMap* current_function_attr, - const FunctionLibraryDefinition* library, Graph* graph, - FunctionLibraryRuntime* flr, - std::vector* host_training_loops_info) { - std::vector associated_function_list; - for (const auto* n : graph->nodes()) { - const auto associated_functions = GetAssociatedFunctions(*n, library); - if (associated_functions.empty()) continue; - - associated_function_list.insert(associated_function_list.end(), - associated_functions.begin(), - associated_functions.end()); - } - - Status ret_status = Status::OK(); - for (const auto& function : associated_function_list) { - if (function.type() != AssociatedFunctionInfo::kFunctionAttr) continue; - - // Convert the function to Graph. - FunctionLibraryRuntime::Handle handle; - TF_RETURN_IF_ERROR(flr->Instantiate(function.func_name(), - AttrSlice(&function.attrs()), &handle)); - auto cleanup_handle = gtl::MakeCleanup([&]() { - auto s = flr->ReleaseHandle(handle); - if (!s.ok()) { - ret_status.Update(s); - } - }); - const FunctionBody* body = flr->GetFunctionBody(handle); - Graph* function_graph = body->graph; - TF_RETURN_IF_ERROR(DetectHostTrainingLoop( - &function.func_name(), &function.attrs(), library, function_graph, flr, - host_training_loops_info)); - } - - // BuildControlFlowInfo() requires that the graph's source node is connected - // to all source nodes in the graph. Many graphs violate this invariant. - // As so, add edges to source/sink nodes so that this invariant is kept. - FixupSourceAndSinkEdges(graph); - std::vector cf_info; - TF_RETURN_IF_ERROR( - BuildControlFlowInfo(graph, &cf_info, /*unreachable_nodes=*/nullptr)); - - std::unordered_map frames; - TF_RETURN_IF_ERROR(ExtractWhileLoopFrames(cf_info, graph, &frames)); - FindTPUCompileNodes(current_function_name, current_function_attr, frames, - host_training_loops_info); - return ret_status; -} - -Status AddReshardOp(Graph* graph, const HostTrainingLoopInfo& host_loop_info) { - const auto& compile_node_name = host_loop_info.compile_node_name; - const auto node_name_map = graph->BuildNodeNameIndex(); - const auto node_it = node_name_map.find(compile_node_name); - TF_RET_CHECK(node_it != node_name_map.end()) - << "Unable to find compile node : " << compile_node_name; - - const auto compile_node = node_it->second; - std::vector execute_nodes_info; - - Status status; - TPUCompileMetadataProto metadata; - status = - ExtractExecuteNodeInfo(compile_node, *graph, host_loop_info.loop_nodes, - &execute_nodes_info, &metadata); - if (!status.ok()) { - LOG(ERROR) << "Encountered error when trying to extract execute nodes, " - "skipping host loop optimization. Status: " - << status.ToString(); - return Status::OK(); - } - - if (execute_nodes_info.empty()) { - return Status::OK(); - } - - // Update the TPUCompileMetadata such that sharding config of the - // sharded resource variable inputs is set to ALLOWED instead of - // TENTATIVE. - string new_metadata_string; - metadata.SerializeToString(&new_metadata_string); - compile_node->ClearAttr("metadata"); - compile_node->AddAttr("metadata", new_metadata_string); - - // Unsharding of the model weight variables must happen only at the very - // last loop iteration. As so, add while loop condition predicate as an - // input to the sharding switch node. If loop condition is true, we do not - // unshard. - const auto& cond_node_name = host_loop_info.loop_cond_node_name; - auto loop_cond_node_it = node_name_map.find(cond_node_name); - TF_RET_CHECK(loop_cond_node_it != node_name_map.end()) - << "Cannot find loop condition node : " << cond_node_name; - auto* loop_condition_node = loop_cond_node_it->second; - - // In order to make sure that shard/unshard operations are invoked - // at the start of every loop body and at the end of last iteration - // of the loop, respectively, traverse the graph and find a switch node - // of the host training loop. - TF_ASSIGN_OR_RETURN(Node * switch_node, - GetLoopSwitchNode(*loop_condition_node)); - - Node* after_last_iteration_node; - TF_RETURN_IF_ERROR(AddNoOpAfterLastIteration(graph, switch_node, - &after_last_iteration_node)); - - Node* before_loop_iteration_node; - TF_RETURN_IF_ERROR(GetOrCreateBeforeEachIterationNode( - graph, switch_node, &before_loop_iteration_node)); - - // Create const op that represents default sharding value - // (i.e. no-op sharding). - NodeDef default_sharding; - default_sharding.set_op("Const"); - default_sharding.set_name(graph->NewName(strings::StrCat( - "TPUVariableReshard/default_shard_state", "/_", internal::GetNodeId()))); - AddNodeAttr("dtype", DT_STRING, &default_sharding); - - Tensor t(DT_STRING, {2}); - t.vec()(0) = kDefaultShardingValue; - t.vec()(1) = kDefaultShardingValue; - t.AsProtoTensorContent( - (*default_sharding.mutable_attr())["value"].mutable_tensor()); - - Node* default_sharding_node = graph->AddNode(default_sharding, &status); - TF_RETURN_IF_ERROR(status); - // Add control edge between loop condition to make sure that - // default_sharding_node node is inside the while loop frame. - graph->AddControlEdge(loop_condition_node, default_sharding_node); - - // Build a no-op node used to add control edges after unshard nodes. - NodeDef after_unshard; - after_unshard.set_op("NoOp"); - after_unshard.set_name(graph->NewName(strings::StrCat( - "TPUVariableReshard/last_iteration", "/_", internal::GetNodeId()))); - auto after_unshard_node = graph->AddNode(after_unshard, &status); - TF_RETURN_IF_ERROR(status); - - for (auto info : execute_nodes_info) { - auto execute_node = info.execute_node; - // Create Reshard op that optionally shards model weight variables - // prior to program execution. - NodeDef reshard_node_def; - reshard_node_def.set_name(graph->NewName(strings::StrCat( - "TPUVariableReshard/reshard", "/_", internal::GetNodeId()))); - reshard_node_def.set_op("TPUReshardVariables"); - AddNodeAttr("N", static_cast(info.var_inputs.size()), - &reshard_node_def); - Node* reshard_op_node = graph->AddNode(reshard_node_def, &status); - if (!status.ok()) return status; - - reshard_op_node->set_assigned_device_name( - execute_node->assigned_device_name()); - - // Reshard op must execute at every loop iteration prior to - // TPUExecute node. - graph->AddControlEdge(before_loop_iteration_node, reshard_op_node); - graph->AddControlEdge(reshard_op_node, execute_node); - - for (int i = 0; i < info.var_inputs.size(); ++i) { - const auto variable_edge = info.var_inputs[i]; - graph->AddEdge(variable_edge->src(), variable_edge->src_output(), - reshard_op_node, i); - } - - const int new_key_input = info.var_inputs.size(); - // Add program input edge from the compiler(i.e. compilation key). - const auto compilation_key_edge = - FindEdgeConnecting(compile_node, execute_node); - graph->AddEdge(compile_node, compilation_key_edge->src_output(), - reshard_op_node, new_key_input); - - // Create VarHandleOp to store sharding state. Sharding state holds string - // compilation key that identifies whether the graph is re-compiled and the - // variables need to be sharded again. - NodeDef var_handle_def; - var_handle_def.set_op("VarHandleOp"); - var_handle_def.set_name(graph->NewName(strings::StrCat( - "TPUVariableReshard/reshard_state", "/_", internal::GetNodeId()))); - AddNodeAttr("dtype", DT_STRING, &var_handle_def); - AddNodeAttr("shape", TensorShape({}), &var_handle_def); - Node* var_handle_node = graph->AddNode(var_handle_def, &status); - if (!status.ok()) return status; - - // Add control edge between `var_handle_def` node and while loop - // loop condition so that `var_handle_def` is inside the same while loop - // frame. - // TODO(hongjunchoi): Consider adding control edge from another node--such - // as input control node. - graph->AddControlEdge(loop_condition_node, var_handle_node); - - // Connect data edge between var handle op and reshard op. - const int format_state_input = new_key_input + 1; - graph->AddEdge(var_handle_node, 0, reshard_op_node, format_state_input); - - // Create Reshard op that represents unsharding after TPUExecute. - NodeDef unshard_node_def; - unshard_node_def.set_name(graph->NewName(strings::StrCat( - "TPUVariableReshard/unshard", "/_", internal::GetNodeId()))); - unshard_node_def.set_op("TPUReshardVariables"); - AddNodeAttr("N", static_cast(info.var_inputs.size()), - &unshard_node_def); - Node* unshard_op_node = graph->AddNode(unshard_node_def, &status); - TF_RETURN_IF_ERROR(status); - - unshard_op_node->set_assigned_device_name( - execute_node->assigned_device_name()); - - for (int i = 0; i < info.var_inputs.size(); ++i) { - const auto variable_edge = info.var_inputs[i]; - // Connect model weight resource variables to unshard op. Since unshard op - // must be only invoked after the very last loop iteration, for each while - // loop inputs, we traverse backwards to find the switch node of the host - // training loop and connect `output_false` field of the switch node with - // unshard op. - TF_ASSIGN_OR_RETURN( - Node * enter_node, - FindEnterNodeFromTPUExecuteNodeInput(variable_edge->src())); - graph->AddEdge(enter_node, 0, unshard_op_node, i); - } - - // Add control dependency before/after unshard node and the control nodes. - graph->AddControlEdge(after_last_iteration_node, unshard_op_node); - graph->AddControlEdge(unshard_op_node, after_unshard_node); - - graph->AddEdge(default_sharding_node, 0, unshard_op_node, new_key_input); - - // Add data edge from sharding state var handle op to unshard op. - graph->AddEdge(var_handle_node, 0, unshard_op_node, format_state_input); - } - // Add control dependency from after_unshard_node to all exits nodes. This is - // to make sure that the unshard ops will be executed as long as any of the - // exits are used. - for (auto exit : FindLoopExitNodes(*loop_condition_node)) { - graph->AddControlEdge(after_unshard_node, exit); - } - return Status::OK(); -} - -} // namespace tpu -} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.h b/tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.h deleted file mode 100644 index 8f9be8549cc..00000000000 --- a/tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.h +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CORE_TPU_GRAPH_REWRITE_HOST_TRAINING_LOOP_OPTIMIZATION_UTIL_H_ -#define TENSORFLOW_CORE_TPU_GRAPH_REWRITE_HOST_TRAINING_LOOP_OPTIMIZATION_UTIL_H_ - -#include -#include -#include - -#include "absl/types/optional.h" -#include "tensorflow/compiler/tf2xla/functionalize_control_flow_util.h" -#include "tensorflow/core/common_runtime/function.h" -#include "tensorflow/core/graph/graph.h" - -namespace tensorflow { -namespace tpu { - -struct LoopArgInfo { - std::string enter_node_name; - // Exit nodes are optional for loop invariant while loop args. - absl::optional exit_node_name; -}; - -struct HostTrainingLoopInfo { - // Name and attribute information about the function in which - // host training loop is included. If host training loop is not - // inside a function call, then `function_name` and `function_attrs` - // are nullopt. - absl::optional encapsulating_function_name; - absl::optional encapsulating_function_attrs; - - // TPU Compile node as within a host training loop. - std::string compile_node_name; - - // Name of the while loop in which TPU compile op is located. - std::string while_loop_name; - - // Name of the node that represents loop condition. - std::string loop_cond_node_name; - - // Exit and Enter node names for each loop arguments. - std::vector loop_arguments; - - std::unordered_set loop_nodes; -}; - -// Walks through the `graph`, recursively if functional nodes exist, and -// identifies all host training loops. Host training loops are the inner -// most while loops that encapsulates TPUCompileOp node. This would be -// later used/analyzed to inroduce host loop specific optimizations such -// as adding sharded weight update. -Status DetectHostTrainingLoop( - const std::string* current_function_name, - const AttrValueMap* current_function_attr, - const FunctionLibraryDefinition* library, Graph* graph, - FunctionLibraryRuntime* flr, - std::vector* host_training_loops_info); - -// Injects VariableReshardOps to before and after TPUExecute op inside -// host training loop body. This effectively applies sharded weight update -// on model weight variables. -Status AddReshardOp(Graph* graph, const HostTrainingLoopInfo& host_loop_info); - -} // namespace tpu -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_TPU_GRAPH_REWRITE_HOST_TRAINING_LOOP_OPTIMIZATION_UTIL_H_ diff --git a/tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.cc b/tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.cc deleted file mode 100644 index 47187204f69..00000000000 --- a/tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.cc +++ /dev/null @@ -1,73 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.h" - -#include "tensorflow/compiler/xla/status_macros.h" -#include "tensorflow/core/common_runtime/function.h" - -namespace tensorflow { - -IncompleteNodeDefBuilder::IncompleteNodeDefBuilder(const string& name, - const string& op, - const NodeDebugInfo& debug) { - nodedef_.set_name(name); - nodedef_.set_op(op); - MergeDebugInfo(debug, &nodedef_); -} - -IncompleteNodeDefBuilder& IncompleteNodeDefBuilder::AddAttr( - const string& attr, const DataType& type) { - AddNodeAttr(attr, type, &nodedef_); - return *this; -} - -IncompleteNodeDefBuilder& IncompleteNodeDefBuilder::AddAttr(const string& attr, - int val) { - AddNodeAttr(attr, val, &nodedef_); - return *this; -} - -IncompleteNodeDefBuilder& IncompleteNodeDefBuilder::Device( - const string& device) { - nodedef_.set_device(device); - return *this; -} - -Status IncompleteNodeDefBuilder::Build(Graph* graph, Node** n) { - Status status; - *n = graph->AddNode(nodedef_, &status); - return status; -} - -IncompleteNodeDefBuilder IncompleteNodeDefBuilder::Identity( - const string& name, const DataType& type, const NodeDebugInfo& debug) { - return IncompleteNodeDefBuilder(name, "Identity", debug).AddAttr("T", type); -} - -IncompleteNodeDefBuilder IncompleteNodeDefBuilder::Merge( - const string& name, const DataType& type, const NodeDebugInfo& debug, - int n) { - return IncompleteNodeDefBuilder(name, "Merge", debug) - .AddAttr("T", type) - .AddAttr("N", n); -} - -IncompleteNodeDefBuilder IncompleteNodeDefBuilder::Switch( - const string& name, const DataType& type, const NodeDebugInfo& debug) { - return IncompleteNodeDefBuilder(name, "Switch", debug).AddAttr("T", type); -} - -} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.h b/tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.h deleted file mode 100644 index 88e484f00cf..00000000000 --- a/tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.h +++ /dev/null @@ -1,58 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CORE_TPU_GRAPH_REWRITE_NODEDEF_BUILDER_H_ -#define TENSORFLOW_CORE_TPU_GRAPH_REWRITE_NODEDEF_BUILDER_H_ - -#include - -#include "tensorflow/core/framework/node_def_builder.h" -#include "tensorflow/core/graph/graph.h" -#include "tensorflow/core/lib/core/status.h" - -namespace tensorflow { - -// Convenience builder to build NodeDefs without specifying the inputs. This is -// similar to NodeDefBuilder except inputs are not specified. -// TODO(jpienaar): Clean up NodeDefBuilder and remove this class. -class IncompleteNodeDefBuilder { - public: - IncompleteNodeDefBuilder(const string& name, const string& op, - const NodeDebugInfo& debug); - - IncompleteNodeDefBuilder& AddAttr(const string& attr, const DataType& type); - IncompleteNodeDefBuilder& AddAttr(const string& attr, int val); - - IncompleteNodeDefBuilder& Device(const string& device); - - Status Build(Graph* graph, Node** n); - - static IncompleteNodeDefBuilder Identity(const string& name, - const DataType& type, - const NodeDebugInfo& debug); - static IncompleteNodeDefBuilder Merge(const string& name, - const DataType& type, - const NodeDebugInfo& debug, int n); - static IncompleteNodeDefBuilder Switch(const string& name, - const DataType& type, - const NodeDebugInfo& debug); - - private: - NodeDef nodedef_; -}; - -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_TPU_GRAPH_REWRITE_NODEDEF_BUILDER_H_ diff --git a/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc b/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc index 83a652d7aaa..ef1831464e2 100644 --- a/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc +++ b/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc @@ -15,7 +15,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/optimization_registry.h" #include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_pass.h" -#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h" #include "tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.h" #include "tensorflow/core/tpu/graph_rewrite/variable_merger_pass.h" @@ -31,9 +30,8 @@ REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 34, EncapsulateTPUComputationsPass); REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 39, ExtractOutsideCompilationPass); -REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 40, - DistributedTPURewritePass); REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, 0, VariableMergerPass); + } // namespace } // namespace tensorflow diff --git a/tensorflow/stream_executor/tpu/BUILD b/tensorflow/stream_executor/tpu/BUILD index 2f75c4c49cb..6e00542ddf4 100644 --- a/tensorflow/stream_executor/tpu/BUILD +++ b/tensorflow/stream_executor/tpu/BUILD @@ -322,7 +322,6 @@ cc_library( name = "tpu_topology_external", srcs = ["tpu_topology.cc"], hdrs = ["tpu_topology.h"], - visibility = ["//visibility:public"], deps = [ "//tensorflow/core/platform:types", "//tensorflow/core/tpu:tpu_api", diff --git a/tensorflow/stream_executor/tpu/tpu_topology.h b/tensorflow/stream_executor/tpu/tpu_topology.h index b8c7b850463..b7d462804c9 100644 --- a/tensorflow/stream_executor/tpu/tpu_topology.h +++ b/tensorflow/stream_executor/tpu/tpu_topology.h @@ -30,7 +30,6 @@ struct TpuChipCoordinatesExternal { class TpuCoreLocationExternal { public: - TpuCoreLocationExternal() : core_location_(nullptr) {} explicit TpuCoreLocationExternal(void* core_location) : core_location_(core_location) {} TpuChipCoordinatesExternal chip_coordinates() const; From 9bf535aed35a197714251135b9155bd998df68c4 Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Mon, 20 Jul 2020 13:35:35 -0700 Subject: [PATCH 0857/2522] [XLA/GPU] Sync the XLA/GPU -> MLIR doc. PiperOrigin-RevId: 322214657 Change-Id: I8326adca5cd1d388e95b7a1cdba7a34f6d6dbca0 --- .../compiler/mlir/g3doc/xla_gpu_codegen.md | 40 ++++++++++--------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/tensorflow/compiler/mlir/g3doc/xla_gpu_codegen.md b/tensorflow/compiler/mlir/g3doc/xla_gpu_codegen.md index 2fe109c1783..8e7e605fc4c 100644 --- a/tensorflow/compiler/mlir/g3doc/xla_gpu_codegen.md +++ b/tensorflow/compiler/mlir/g3doc/xla_gpu_codegen.md @@ -74,7 +74,6 @@ We have several choices on how to lower the host-side part from LHLO: * (Pro) easy to implement library calls (cuDNN, cuBLAS, cuFFT, etc), as TFRT ops are interpreted by C++ code. * (Con) host side is under development and not tested. - * (Con) the JAX integration isn’t clear from a runtime point of view * Jitted CPU code * (Pro) great lower-ability. Create a few loops and conditions and it's done. @@ -84,8 +83,7 @@ We have several choices on how to lower the host-side part from LHLO: dynamic loading, etc). * Existing (interpreting) XLA runtime -Tentative conclusion: Use jitted CPU code during the transition, and optionally -adopt TFRT in the end. +Decision: adopt TFRT, but also support jitting CPU code in TFRT. ## Migrating Device LLVM IR (Task 3) @@ -114,7 +112,7 @@ end state of each XLA op: * (Cost) Will be throw-away work if we want to ultimately migrate to Standard. * (Benefit) It is easy and mechanical. Can be done in a short period. - * (Benefit) It doesn't benefit more compared to a). + * (Benefit) It doesn't benefit more compared to (1). 1. Refactor old emitters to be like LHLO -> MLIR GPU + Standard + Loops: * (Cost) Lifting existing emitters to Standard introduces some challenges. Pointers and GEPs need to be converted to MemRefs and SubViews. Ensuring @@ -134,6 +132,19 @@ end state of each XLA op: * (Benefit) unified stack; community support; portability; more optimization potentials. +Conclusions: + +* Don't go for (2). (1) or (3) are just better than (2). (2) costs more than + (1), since it requires a lot of mechanical refactoring. With (1) we can + still achieve the goal of enabling XLA to pick up MLIR emitters. This is by + doing LHLO -> LLVM IR -> run legacy device emitters. +* ElementalIrEmitter ops go for (4), but not incrementally. There is no way to + do it op by op, because all elementally-emitted ops are connected into the + same graph. This work can also serve as a unification point of several + on-going forces (xla/service/mlir\_gpu, the kernel generator, Linalg). +* All other ops go for (1). As a stretch goal, they might be migrated to (3) + or (4). + ## Prioritization While all three tasks mentioned above are parallelizable, under limited @@ -210,26 +221,19 @@ The exact profiling can't be easily done for MLIR-generated ops, since: ### Step 3: (Task 2) Migrating Thunks -This step migrates all host ops and library calls. This step will eliminate most -of the thunks and produce serializable MLIR instead. - -There are roughly three kinds of thunks: - +As a note, there are roughly three kinds of thunks: * KernelThunk, which launches a kernel. * Control flow thunks, which has host control flow logic (conditional, while, for, sequence) and launch body kernels. * Library thunks: cuDNN, cuBLAS, cuFFT, NCCL, etc. -The **bottom line** is to: +The plan is: +* Make Thunks (de)serializable. +* Help improve TFRT to a state where it can support these semantics. +* As the state improves, migrate individual thunks incrementally. -* Create a Thunk dialect that provides (de)serialize logic for all existing - C++-based Thunks. -* Change emitters to emit a graph of Thunk dialect. - -**Optionally**, we can relieve some thunks from C++ implementation. KernelThunk -can lower to the GPU LaunchKernelOp. Control flow thunks can leverage the CFG -Dialect for loops and conditions, combined with LaunchKernelOp. This optional -step requires profiling and stream support. +These action items are only partially ordered. The actual execution order / +engineering parallelism is to be evaluated as it goes. ### Step 4: (Task 3) Migrated ElementalIrEmitter From c688e89e76d9e6a3aa0a953effe4849cce3f39fa Mon Sep 17 00:00:00 2001 From: Rachel Lim Date: Mon, 20 Jul 2020 13:40:53 -0700 Subject: [PATCH 0858/2522] [tf.data] Fix issue where _RebatchDataset doesn't work with tensor specs with no output shapes PiperOrigin-RevId: 322215616 Change-Id: Iba85bb4902cc28860988c0dc50386ec588f8e77e --- .../experimental/kernel_tests/rebatch_dataset_test.py | 10 ++++++++++ tensorflow/python/data/experimental/ops/distribute.py | 9 +++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py index 3f8b40be508..841c25b6856 100644 --- a/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py @@ -219,6 +219,16 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): value_index += num_values self.assertDatasetProduces(dataset, expected_output) + @combinations.generate(test_base.default_test_combinations()) + def testNoOutputShapes(self): + # Some datasets, e.g. datasets with None tensors, have components without + # output shapes. Test that this doesn't break rebatching shape inference + # logic. + dataset = dataset_ops.Dataset.range(4) + dataset = dataset.map(lambda x: (x, None)) + dataset = dataset.batch(4, drop_remainder=True) + _ = distribute._RebatchDataset(dataset, num_replicas=2) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/data/experimental/ops/distribute.py b/tensorflow/python/data/experimental/ops/distribute.py index ca10c0f24f1..ae3c13ecc97 100644 --- a/tensorflow/python/data/experimental/ops/distribute.py +++ b/tensorflow/python/data/experimental/ops/distribute.py @@ -21,6 +21,7 @@ from tensorflow.python.data.experimental.ops.distribute_options import ExternalS from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops @@ -75,8 +76,12 @@ class _RebatchDataset(dataset_ops.UnaryDataset): def __init__(self, input_dataset, num_replicas, use_fallback=True): - def recalculate_batch_size(output_shape): + def recalculate_batch_size(type_spec): """Recalculates the output_shape after dividing it by num_replicas.""" + output_shape = type_spec._to_legacy_output_shapes() # pylint: disable=protected-access + if not isinstance(output_shape, tensor_shape.TensorShape): + return None + # If the output shape is unknown, we set the batch dimension to unknown. if output_shape.rank is None: return None @@ -97,7 +102,7 @@ class _RebatchDataset(dataset_ops.UnaryDataset): def rebatch(type_spec): # pylint: disable=protected-access - batch_size = recalculate_batch_size(type_spec._to_legacy_output_shapes()) + batch_size = recalculate_batch_size(type_spec) return type_spec._unbatch()._batch(batch_size) # pylint: enable=protected-access From 7e95c4abdad395a6e515f60ba8694048b6ba3755 Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Mon, 20 Jul 2020 13:56:50 -0700 Subject: [PATCH 0859/2522] fixit for basic_loops_test PiperOrigin-RevId: 322219117 Change-Id: Ie673d9071e7e8c67237c3c87cb067f7a6e99e7ce --- tensorflow/python/training/basic_loops_test.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/training/basic_loops_test.py b/tensorflow/python/training/basic_loops_test.py index 748116331dc..75a866371ce 100644 --- a/tensorflow/python/training/basic_loops_test.py +++ b/tensorflow/python/training/basic_loops_test.py @@ -23,7 +23,6 @@ import shutil from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops -from tensorflow.python.framework import test_util from tensorflow.python.platform import test from tensorflow.python.training import basic_loops from tensorflow.python.training import supervisor @@ -38,10 +37,8 @@ def _test_dir(test_name): class BasicTrainLoopTest(test.TestCase): - @test_util.run_deprecated_v1 def testBasicTrainLoop(self): logdir = _test_dir("basic_train_loop") - sv = supervisor.Supervisor(logdir=logdir) # Counts the number of calls. num_calls = [0] @@ -53,14 +50,13 @@ class BasicTrainLoopTest(test.TestCase): sv.request_stop() with ops.Graph().as_default(): + sv = supervisor.Supervisor(logdir=logdir) basic_loops.basic_train_loop( sv, train_fn, args=(sv, "y"), kwargs={"a": "A"}) self.assertEqual(3, num_calls[0]) - @test_util.run_deprecated_v1 def testBasicTrainLoopExceptionAborts(self): logdir = _test_dir("basic_train_loop_exception_aborts") - sv = supervisor.Supervisor(logdir=logdir) def train_fn(unused_sess): train_fn.counter += 1 @@ -71,13 +67,12 @@ class BasicTrainLoopTest(test.TestCase): train_fn.counter = 0 with ops.Graph().as_default(): + sv = supervisor.Supervisor(logdir=logdir) with self.assertRaisesRegex(RuntimeError, "Failed"): basic_loops.basic_train_loop(sv, train_fn) - @test_util.run_deprecated_v1 def testBasicTrainLoopRetryOnAborted(self): logdir = _test_dir("basic_train_loop_exception_aborts") - sv = supervisor.Supervisor(logdir=logdir) class AbortAndRetry(object): @@ -95,6 +90,7 @@ class BasicTrainLoopTest(test.TestCase): raise RuntimeError("Failed Again") with ops.Graph().as_default(): + sv = supervisor.Supervisor(logdir=logdir) aar = AbortAndRetry() with self.assertRaisesRegex(RuntimeError, "Failed Again"): basic_loops.basic_train_loop(sv, aar.train_fn) From 8fbbcbfd02459902a2fbca314a6855a9ba623bbe Mon Sep 17 00:00:00 2001 From: Gaurav Jain Date: Mon, 20 Jul 2020 14:01:06 -0700 Subject: [PATCH 0860/2522] Reduce memory requirement for test PiperOrigin-RevId: 322220008 Change-Id: If4940e9c4aa0b783cea1d112deb7f4cadbcf96fb --- tensorflow/python/kernel_tests/reshape_op_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/reshape_op_test.py b/tensorflow/python/kernel_tests/reshape_op_test.py index e7e1c7023d8..c711d7a6a88 100644 --- a/tensorflow/python/kernel_tests/reshape_op_test.py +++ b/tensorflow/python/kernel_tests/reshape_op_test.py @@ -202,7 +202,7 @@ class ReshapeTest(test.TestCase): self.assertEqual([100, 1], y.get_shape().as_list()) def testInt64Shape(self): - x = array_ops.zeros([50000, 50000]) + x = array_ops.zeros([50000, 50000], dtype=dtypes.bool) # Provide dimension larger than int32 y = array_ops.reshape(x, [50000**2]) self.assertEqual([50000**2], y.get_shape().as_list()) From 18445b0e39b677a21c86b4cf3d2bcb823f27e3e2 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Mon, 20 Jul 2020 14:08:21 -0700 Subject: [PATCH 0861/2522] Dynamic Concat: Correctly replace root instruction. It's very easy to make the mistake where we replace a bunch of uses of a instruction but didn't really set the root. Add a new function to do that (probably need another round of renaming to be explicit about where a function will replace the root or not). Also use the new dynamic literal feature to simplify the test. PiperOrigin-RevId: 322221542 Change-Id: I314c89118555b897d575f8b418a2ba187d579b07 --- .../compiler/xla/service/dynamic_padder.cc | 4 +-- .../xla/service/dynamic_padder_test.cc | 34 +++++++------------ .../compiler/xla/service/hlo_instruction.cc | 21 ++++++++++++ .../compiler/xla/service/hlo_instruction.h | 6 ++++ 4 files changed, 40 insertions(+), 25 deletions(-) diff --git a/tensorflow/compiler/xla/service/dynamic_padder.cc b/tensorflow/compiler/xla/service/dynamic_padder.cc index 44fdda0f411..c1f9da599e8 100644 --- a/tensorflow/compiler/xla/service/dynamic_padder.cc +++ b/tensorflow/compiler/xla/service/dynamic_padder.cc @@ -688,9 +688,7 @@ StatusOr RewriteDynamicConcat( dynamic_size)); } } - for (HloInstruction* user : prev_users) { - TF_RETURN_IF_ERROR(concat->ReplaceUseWith(user, rewritten_concat)); - } + TF_RETURN_IF_ERROR(concat->ReplaceUsesWith(prev_users, rewritten_concat)); TF_RETURN_IF_ERROR(dynamic_dimension_inference->ForwardDynamicSize( concat, rewritten_concat, {})); return true; diff --git a/tensorflow/compiler/xla/service/dynamic_padder_test.cc b/tensorflow/compiler/xla/service/dynamic_padder_test.cc index e4c70317f2b..e8f429d9db6 100644 --- a/tensorflow/compiler/xla/service/dynamic_padder_test.cc +++ b/tensorflow/compiler/xla/service/dynamic_padder_test.cc @@ -83,8 +83,8 @@ class DynamicPadderTest : public HloTestBase { return module; } - StatusOr RunPadder() { - DynamicPadder padder(/*slice_dynamic_output=*/true, + StatusOr RunPadder(bool slice_dynamic_output = false) { + DynamicPadder padder(/*slice_dynamic_output=*/slice_dynamic_output, CustomCallDynamicDimensionInference, OpHasDynamismSupport); return padder.Run(module_.get()); @@ -162,7 +162,7 @@ ENTRY main { module_ = GetHloModule(hlo_text); - TF_ASSERT_OK(RunPadder().status()); + TF_ASSERT_OK(RunPadder(/*slice_dynamic_output=*/true).status()); // After rewrite, we should have : // // param @@ -218,7 +218,7 @@ ENTRY main { module_ = GetHloModule(hlo_text); - TF_ASSERT_OK(RunPadder().status()); + TF_ASSERT_OK(RunPadder(/*slice_dynamic_output=*/true).status()); // After rewrite, we should have : // // param @@ -654,26 +654,16 @@ XLA_TEST_F(ExecutionTest, DynamicConcat) { const string hlo_text = R"( HloModule DynamicConcat -update_s32 (lhs: s32[], rhs: s32[]) -> s32[] { - lhs = s32[] parameter(0) - rhs = s32[] parameter(1) - ROOT add = s32[] add(lhs, rhs) -} - ENTRY main { param_0 = s32[3] parameter(0) param_1 = s32[3] parameter(1) param_2 = s32[3] parameter(2) size = s32[] constant(2) - param_padded_0 = s32[3] set-dimension-size(param_0, size), dimensions={0} - param_padded_2 = s32[3] set-dimension-size(param_2, size), dimensions={0} - %concatenate = s32[9] - concatenate(s32[3] param_padded_0, s32[3] param_1, s32[3] param_padded_2), + param_padded_0 = s32[<=3] set-dimension-size(param_0, size), dimensions={0} + param_padded_2 = s32[<=3] set-dimension-size(param_2, size), dimensions={0} + ROOT %concatenate = s32[9] + concatenate(s32[<=3] param_padded_0, s32[<=3] param_1, s32[<=3] param_padded_2), dimensions={0} - init = s32[] constant(0) - ROOT reduce = s32[] reduce(concatenate, init), - dimensions={0}, - to_apply=update_s32 } )"; @@ -686,10 +676,10 @@ ENTRY main { LiteralUtil::CreateR1({6, 7, -1}); // Dynamic operand. auto module = GetHloModule(hlo_text); - Literal result = - PadAndExecute(std::move(module), {&operand_0, &operand_1, &operand_2}); - - Literal expected = LiteralUtil::CreateR0(28); + Literal result = PadAndExecute(std::move(module), + {&operand_0, &operand_1, &operand_2}, false); + result.SetDynamicSize(0, 7); + Literal expected = LiteralUtil::CreateR1({1, 2, 3, 4, 5, 6, 7}); EXPECT_EQ(result, expected); } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 9957df41f1a..6ffb149b893 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -2189,6 +2189,27 @@ Status HloInstruction::ReplaceOperandWithDifferentShape( return Status::OK(); } +Status HloInstruction::ReplaceUsesWith(absl::Span users, + HloInstruction* new_producer) { + TF_RET_CHECK( + ShapeUtil::CompatibleIgnoringFpPrecision(shape(), new_producer->shape())) + << shape() << " is not compatible with " << new_producer->shape(); + return ReplaceAllUsesWithDifferentShape(users, new_producer); +} + +Status HloInstruction::ReplaceAllUsesWithDifferentShape( + absl::Span users, HloInstruction* new_producer) { + for (HloInstruction* user : users) { + TF_RETURN_IF_ERROR(ReplaceUseWith(user, new_producer)); + } + + if (parent_ && parent_->root_instruction() == this) { + parent_->set_root_instruction(new_producer, + /*accept_different_shape=*/true); + } + return Status::OK(); +} + Status HloInstruction::ReplaceAllUsesWith(HloInstruction* new_producer) { TF_RET_CHECK( ShapeUtil::CompatibleIgnoringFpPrecision(shape(), new_producer->shape())) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 8c50a9bb8fc..95a56d7595b 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -1201,6 +1201,12 @@ class HloInstruction { // Same as ReplaceAllUsesWith, but new_producer can have a different shape. Status ReplaceAllUsesWithDifferentShape(HloInstruction* new_producer); + // Same as ReplaceAllUsesWith, but only replace given set of users. + Status ReplaceUsesWith(absl::Span users, + HloInstruction* new_producer); + Status ReplaceAllUsesWithDifferentShape( + absl::Span users, HloInstruction* new_producer); + // Performs a postorder DFS visit using this node as the root. If // call_finish_visit is true, then DfsHloVisitor::FinishVisit is called when // complete. If ignore_control_predecessors is true, instructions only From 365edbfd5909b9a0dcd37bffa1e815e7ce125ecd Mon Sep 17 00:00:00 2001 From: Revan Sopher Date: Mon, 20 Jul 2020 14:14:59 -0700 Subject: [PATCH 0862/2522] Add get_shape() to RaggedTensor as alias of shape. This matches the signature of Tensor and SparseTensor, removing an uneccesary stumbling block in adding ragged support to sparse codepaths. PiperOrigin-RevId: 322222828 Change-Id: Ie643ad7ab94f397c988475961e246fc40043dbc8 --- tensorflow/python/ops/ragged/ragged_tensor.py | 21 +++++++++++++++++++ .../python/ops/ragged/ragged_tensor_test.py | 5 +++++ .../golden/v1/tensorflow.-ragged-tensor.pbtxt | 4 ++++ .../golden/v2/tensorflow.-ragged-tensor.pbtxt | 4 ++++ 4 files changed, 34 insertions(+) diff --git a/tensorflow/python/ops/ragged/ragged_tensor.py b/tensorflow/python/ops/ragged/ragged_tensor.py index 93eede6e18a..767f549e952 100644 --- a/tensorflow/python/ops/ragged/ragged_tensor.py +++ b/tensorflow/python/ops/ragged/ragged_tensor.py @@ -825,6 +825,27 @@ class RaggedTensor(composite_tensor.CompositeTensor, value_shape = self._values.shape[1:] return tensor_shape.TensorShape([nrows, ncols]).concatenate(value_shape) + def get_shape(self): + """The statically known shape of this ragged tensor. + + Returns: + A `TensorShape` containing the statically known shape of this ragged + tensor. Ragged dimensions have a size of `None`. + + Alias for `shape` property. + + Examples: + + >>> tf.ragged.constant([[0], [1, 2]]).get_shape() + TensorShape([2, None]) + + >>> tf.ragged.constant( + ... [[[0, 1]], [[1, 2], [3, 4]]], ragged_rank=1).get_shape() + TensorShape([2, None, 2]) + + """ + return self.shape + @property def ragged_rank(self): """The number of times the RaggedTensor's flat_values is partitioned. diff --git a/tensorflow/python/ops/ragged/ragged_tensor_test.py b/tensorflow/python/ops/ragged/ragged_tensor_test.py index 9a15245ea2a..286b730b298 100644 --- a/tensorflow/python/ops/ragged/ragged_tensor_test.py +++ b/tensorflow/python/ops/ragged/ragged_tensor_test.py @@ -760,6 +760,11 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase): [1, 2, 3], array_ops.placeholder(dtype=dtypes.int64)) self.assertEqual(rt6.shape.as_list(), [None, None]) + def testGetShape(self): + rt = RaggedTensor.from_row_splits(b'a b c d e f g'.split(), + [0, 2, 5, 6, 6, 7]) + self.assertEqual(rt.shape.as_list(), rt.get_shape().as_list()) + #============================================================================= # RaggedTensor.__str__ #============================================================================= diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-ragged-tensor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-ragged-tensor.pbtxt index 7a41cfea4b7..b1754f26b20 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.-ragged-tensor.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.-ragged-tensor.pbtxt @@ -92,6 +92,10 @@ tf_class { name: "from_value_rowids" argspec: "args=[\'cls\', \'values\', \'value_rowids\', \'nrows\', \'name\', \'validate\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'True\'], " } + member_method { + name: "get_shape" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_dims" argspec: "args=[\'self\', \'outer_axis\', \'inner_axis\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-ragged-tensor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-ragged-tensor.pbtxt index 7a41cfea4b7..b1754f26b20 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.-ragged-tensor.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.-ragged-tensor.pbtxt @@ -92,6 +92,10 @@ tf_class { name: "from_value_rowids" argspec: "args=[\'cls\', \'values\', \'value_rowids\', \'nrows\', \'name\', \'validate\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'True\'], " } + member_method { + name: "get_shape" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "merge_dims" argspec: "args=[\'self\', \'outer_axis\', \'inner_axis\'], varargs=None, keywords=None, defaults=None" From b7fb764f9fd8e84ef089d3f3e5a805a01cddde62 Mon Sep 17 00:00:00 2001 From: Smit Hinsu Date: Mon, 20 Jul 2020 14:15:14 -0700 Subject: [PATCH 0863/2522] Verify IslandOp body has no block arguments Also, removed explicit check on empty body which is already verified by trait SingleBlockImplicitTerminator. PiperOrigin-RevId: 322222883 Change-Id: Id9344905328a5680a02b8c95fe2805e230780e77 --- .../compiler/mlir/tensorflow/ir/tf_executor.cc | 4 ++-- .../tensorflow/tests/tf_executor_ops_invalid.mlir | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc index 1b1d5ba6f3b..8db06e83527 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc @@ -301,8 +301,8 @@ bool IslandOp::WrapsSingleOp() { namespace { LogicalResult Verify(IslandOp island) { - if (island.GetBody().empty()) - return island.emitOpError() << "expects a non-empty body"; + if (!island.GetBody().args_empty()) + return island.emitOpError() << "expects body without any arguments"; Operation &yield = island.GetBody().back(); if (!isa(yield)) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_executor_ops_invalid.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf_executor_ops_invalid.mlir index 2f034f1bfae..0e9814de137 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_executor_ops_invalid.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_executor_ops_invalid.mlir @@ -232,6 +232,20 @@ func @invalid_island(%arg0: tensor<*xf32>, %ctl: !tf_executor.control) { // ----- +// Check that an island body doesn't have any block arguments. +func @invalid_island(%arg0: tensor<*xf32>, %ctl: !tf_executor.control) { + tf_executor.graph { + "tf_executor.island"() ({ + // expected-error@-1 {{expects body without any arguments}} + ^entry(%arg: tensor<2xi32>): + tf_executor.yield + }) : () -> (!tf_executor.control) + } + return +} + +// ----- + // Check that an island body can't be empty. func @invalid_island(%arg0: tensor<*xf32>, %ctl: !tf_executor.control) { tf_executor.graph { From 4f6c86b163a7dd996ce12ccc7302c3cba3340ae1 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Mon, 20 Jul 2020 14:21:51 -0700 Subject: [PATCH 0864/2522] Switch TF Micro to use TfLiteEval tensors by default. This change drastically modifies the way memory is used in TF Micro. Currently, large blocks of persistent memory are allocated for TfLiteTensor structs and any associated quantization data. Instead of this pattern, those TfLiteTensor structs and quantization data will be allocated from the "temp" section of the memory arena. Instead of allocating a large block of TfLiteTensor structs - a minimal TfLiteEval struct is allocated. This new struct will serve as the source of truth for all buffers in the graph. Everything works in the kernel implementations with this change - they are just temporarily slower. All TfLiteTensor structs fetched from GetInput()/GetOutput()/etc are now allocated on the fly through the temp allocation. Each kernel should be updated to fetch the TfLiteEval struct in the Eval() block in each kernel. Additionally, quantization data should be cached in those op kernels. This CL saves up to 50% on the arena for larger conv-based models. PiperOrigin-RevId: 322224278 Change-Id: Id32509a75c9f68177f5bb6b850ea11907afcbb1d --- tensorflow/lite/micro/BUILD | 1 + .../lite/micro/memory_arena_threshold_test.cc | 45 ++- tensorflow/lite/micro/memory_helpers.cc | 17 + tensorflow/lite/micro/memory_helpers.h | 5 + tensorflow/lite/micro/micro_allocator.cc | 303 +++++++++++------ tensorflow/lite/micro/micro_allocator.h | 86 +++-- tensorflow/lite/micro/micro_allocator_test.cc | 320 ++++++++++-------- tensorflow/lite/micro/micro_interpreter.cc | 131 +++++-- tensorflow/lite/micro/micro_interpreter.h | 22 +- .../lite/micro/micro_interpreter_test.cc | 20 +- .../lite/micro/recording_micro_allocator.cc | 112 +++--- .../lite/micro/recording_micro_allocator.h | 39 ++- .../micro/recording_micro_allocator_test.cc | 188 +++++----- tensorflow/lite/micro/test_helpers.cc | 8 + tensorflow/lite/micro/test_helpers.h | 3 + 15 files changed, 802 insertions(+), 498 deletions(-) diff --git a/tensorflow/lite/micro/BUILD b/tensorflow/lite/micro/BUILD index feaf57d5d2d..c42335c9772 100644 --- a/tensorflow/lite/micro/BUILD +++ b/tensorflow/lite/micro/BUILD @@ -294,6 +294,7 @@ tflite_micro_cc_test( "micro_allocator_test.cc", ], deps = [ + ":memory_helpers", ":micro_framework", ":test_helpers", "//tensorflow/lite/micro/testing:micro_test", diff --git a/tensorflow/lite/micro/memory_arena_threshold_test.cc b/tensorflow/lite/micro/memory_arena_threshold_test.cc index 7a1f63b5633..d783f3a970e 100644 --- a/tensorflow/lite/micro/memory_arena_threshold_test.cc +++ b/tensorflow/lite/micro/memory_arena_threshold_test.cc @@ -49,15 +49,14 @@ constexpr int kKeywordModelNodeAndRegistrationCount = 15; // Run this test with '--copt=-DTF_LITE_STATIC_MEMORY' to get optimized memory // runtime values: #ifdef TF_LITE_STATIC_MEMORY -constexpr int kKeywordModelTotalSize = 18192; -constexpr int kKeywordModelTailSize = 17520; +constexpr int kKeywordModelTotalSize = 14336; +constexpr int kKeywordModelTailSize = 13664; #else -constexpr int kKeywordModelTotalSize = 21152; -constexpr int kKeywordModelTailSize = 20480; +constexpr int kKeywordModelTotalSize = 14704; +constexpr int kKeywordModelTailSize = 14032; #endif constexpr int kKeywordModelHeadSize = 672; constexpr int kKeywordModelTfLiteTensorVariableBufferDataSize = 10240; -constexpr int kKeywordModelTfLiteTensorQuantizationDataSize = 1728; constexpr int kKeywordModelOpRuntimeDataSize = 148; constexpr int kTestConvModelArenaSize = 12 * 1024; @@ -69,14 +68,13 @@ constexpr int kTestConvModelNodeAndRegistrationCount = 7; // NOTE: These values are measured on x86-64: // TODO(b/158651472): Consider auditing these values on non-64 bit systems. #ifdef TF_LITE_STATIC_MEMORY -constexpr int kTestConvModelTotalSize = 10816; -constexpr int kTestConvModelTailSize = 3072; +constexpr int kTestConvModelTotalSize = 9488; +constexpr int kTestConvModelTailSize = 1744; #else -constexpr int kTestConvModelTotalSize = 11712; -constexpr int kTestConvModelTailSize = 3968; +constexpr int kTestConvModelTotalSize = 9648; +constexpr int kTestConvModelTailSize = 1904; #endif constexpr int kTestConvModelHeadSize = 7744; -constexpr int kTestConvModelTfLiteTensorQuantizationDataSize = 768; constexpr int kTestConvModelOpRuntimeDataSize = 136; struct ModelAllocationThresholds { @@ -86,7 +84,6 @@ struct ModelAllocationThresholds { size_t head_alloc_size = 0; size_t tail_alloc_size = 0; size_t tensor_variable_buffer_data_size = 0; - size_t tensor_quantization_data_size = 0; size_t op_runtime_data_size = 0; }; @@ -124,12 +121,12 @@ void ValidateModelAllocationThresholds( "Tail", allocator.GetSimpleMemoryAllocator()->GetTailUsedBytes(), thresholds.tail_alloc_size); EnsureAllocatedSizeThreshold( - "TfLiteTensor", + "TfLiteEvalTensor", allocator .GetRecordedAllocation( - tflite::RecordedAllocationType::kTfLiteTensorArray) + tflite::RecordedAllocationType::kTfLiteEvalTensorData) .used_bytes, - sizeof(TfLiteTensor) * thresholds.tensor_count); + sizeof(TfLiteEvalTensor) * thresholds.tensor_count); EnsureAllocatedSizeThreshold( "VariableBufferData", allocator @@ -138,12 +135,19 @@ void ValidateModelAllocationThresholds( .used_bytes, thresholds.tensor_variable_buffer_data_size); EnsureAllocatedSizeThreshold( - "QuantizationData", + "PersistentTfLiteTensor", + allocator + .GetRecordedAllocation( + tflite::RecordedAllocationType::kPersistentTfLiteTensorData) + .used_bytes, + 0); + EnsureAllocatedSizeThreshold( + "PersistentTfliteTensorQuantizationData", allocator .GetRecordedAllocation(tflite::RecordedAllocationType:: - kTfLiteTensorArrayQuantizationData) + kPersistentTfLiteTensorQuantizationData) .used_bytes, - thresholds.tensor_quantization_data_size); + 0); EnsureAllocatedSizeThreshold( "NodeAndRegistration", allocator @@ -159,8 +163,7 @@ void ValidateModelAllocationThresholds( thresholds.op_runtime_data_size); // Ensure tail allocation recording is not missing any large chunks: - size_t tail_est_length = sizeof(TfLiteTensor) * thresholds.tensor_count + - thresholds.tensor_quantization_data_size + + size_t tail_est_length = sizeof(TfLiteEvalTensor) * thresholds.tensor_count + thresholds.tensor_variable_buffer_data_size + sizeof(tflite::NodeAndRegistration) * thresholds.node_and_registration_count + @@ -191,8 +194,6 @@ TF_LITE_MICRO_TEST(TestKeywordModelMemoryThreshold) { thresholds.tail_alloc_size = kKeywordModelTailSize; thresholds.tensor_variable_buffer_data_size = kKeywordModelTfLiteTensorVariableBufferDataSize; - thresholds.tensor_quantization_data_size = - kKeywordModelTfLiteTensorQuantizationDataSize; thresholds.op_runtime_data_size = kKeywordModelOpRuntimeDataSize; ValidateModelAllocationThresholds(interpreter.GetMicroAllocator(), @@ -214,8 +215,6 @@ TF_LITE_MICRO_TEST(TestConvModelMemoryThreshold) { thresholds.total_alloc_size = kTestConvModelTotalSize; thresholds.head_alloc_size = kTestConvModelHeadSize; thresholds.tail_alloc_size = kTestConvModelTailSize; - thresholds.tensor_quantization_data_size = - kTestConvModelTfLiteTensorQuantizationDataSize; thresholds.op_runtime_data_size = kTestConvModelOpRuntimeDataSize; ValidateModelAllocationThresholds(interpreter.GetMicroAllocator(), diff --git a/tensorflow/lite/micro/memory_helpers.cc b/tensorflow/lite/micro/memory_helpers.cc index 0e8f335c049..20e6e10c2ed 100644 --- a/tensorflow/lite/micro/memory_helpers.cc +++ b/tensorflow/lite/micro/memory_helpers.cc @@ -101,6 +101,23 @@ TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor, return kTfLiteOk; } +TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor, + size_t* out_bytes) { + TFLITE_DCHECK(out_bytes != nullptr); + + int element_count = 1; + // If eval_tensor->dims == nullptr, then tensor is a scalar so has 1 element. + if (eval_tensor->dims != nullptr) { + for (int n = 0; n < eval_tensor->dims->size; ++n) { + element_count *= eval_tensor->dims->data[n]; + } + } + size_t type_size; + TF_LITE_ENSURE_STATUS(TfLiteTypeSizeOf(eval_tensor->type, &type_size)); + *out_bytes = element_count * type_size; + return kTfLiteOk; +} + TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context, const TfLiteTensor* input1, const TfLiteTensor* input2, diff --git a/tensorflow/lite/micro/memory_helpers.h b/tensorflow/lite/micro/memory_helpers.h index 274eef52976..8f5526ce924 100644 --- a/tensorflow/lite/micro/memory_helpers.h +++ b/tensorflow/lite/micro/memory_helpers.h @@ -41,6 +41,11 @@ TfLiteStatus BytesRequiredForTensor(const tflite::Tensor& flatbuffer_tensor, size_t* bytes, size_t* type_size, ErrorReporter* error_reporter); +// How many bytes are used in a TfLiteEvalTensor instance. The byte length is +// returned in out_bytes. +TfLiteStatus TfLiteEvalTensorByteLength(const TfLiteEvalTensor* eval_tensor, + size_t* out_bytes); + // Deduce output dimensions from input and allocate given size. // Useful for operators with two inputs where the largest input should equal the // output dimension. diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc index 173640c3e90..8ad6db362d7 100644 --- a/tensorflow/lite/micro/micro_allocator.cc +++ b/tensorflow/lite/micro/micro_allocator.cc @@ -163,7 +163,7 @@ class AllocationInfoBuilder { // Add allocaiton information for the tensors. TfLiteStatus AddTensors(const SubGraph* subgraph, const int32_t* offline_offsets, - TfLiteTensor* runtime_tensors); + TfLiteEvalTensor* eval_tensors); // Add allocation information for the scratch buffers. TfLiteStatus AddScratchBuffers(internal::ScratchBufferHandle* buffer_handles); @@ -199,16 +199,20 @@ TfLiteStatus AllocationInfoBuilder::Allocate() { TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph, const int32_t* offline_offsets, - TfLiteTensor* runtime_tensors) { + TfLiteEvalTensor* eval_tensors) { + TFLITE_DCHECK(eval_tensors != nullptr); + // Set up allocation info for all tensors. for (size_t i = 0; i < tensor_count_; ++i) { AllocationInfo* current = &info_[i]; - // TfLiteTensor.uint8 field is deprecated so use .data field instead. - current->output_ptr = &(runtime_tensors[i].data.data); - current->bytes = runtime_tensors[i].bytes; + current->output_ptr = &(eval_tensors[i].data.data); + + TF_LITE_ENSURE_STATUS( + TfLiteEvalTensorByteLength(&eval_tensors[i], ¤t->bytes)); + current->first_created = -1; current->last_used = -1; - current->needs_allocating = (runtime_tensors[i].data.data == nullptr) && + current->needs_allocating = (eval_tensors[i].data.data == nullptr) && (!subgraph->tensors()->Get(i)->is_variable()); if (offline_offsets) { current->offline_offset = offline_offsets[i]; @@ -427,25 +431,19 @@ TfLiteStatus FlatBufferVectorToTfLiteTypeArray( return kTfLiteOk; } -TfLiteStatus InitializeTfLiteTensorFromFlatbuffer( - SimpleMemoryAllocator* allocator, bool allocate_temp, +// Returns a pointer to any buffer associated with the flatbuffer tensor. Can +// return nullptr if no buffer is found. +void* GetFlatbufferTensorBuffer( const tflite::Tensor& flatbuffer_tensor, - const flatbuffers::Vector>* buffers, - ErrorReporter* error_reporter, TfLiteTensor* result) { - *result = {}; - // Make sure the serialized type is one we know how to deal with, and convert - // it from a flatbuffer enum into a constant used by the kernel C API. - TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(), - &result->type, error_reporter)); - // Make sure we remember if the serialized tensor is designated as a variable. - result->is_variable = flatbuffer_tensor.is_variable(); - + const flatbuffers::Vector>* buffers) { // We need to figure out where the actual contents of this tensor are stored // in memory. We'll check to see if there's a serialized buffer (pretty much // the same as a constant op in TensorFlow) associated with this tensor first, // and if there is update the runtime structure to point to its location in // memory. // First see if there's any buffer information in the serialized tensor. + // TODO(b/160894903): Add better unit tests that validate flatbuffer values. + void* out_buffer = nullptr; if (auto* buffer = (*buffers)[flatbuffer_tensor.buffer()]) { // If we've found a buffer, does it have any data? if (auto* array = buffer->data()) { @@ -453,10 +451,7 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer( if (array->size()) { // We've found a buffer with valid data, so update the runtime tensor // data structure to point to it. - result->data.data = - const_cast(static_cast(array->data())); - // We set the data from a serialized buffer, so record tha. - result->allocation_type = kTfLiteMmapRo; + out_buffer = const_cast(static_cast(array->data())); } } // TODO(petewarden): It's not clear in what circumstances we could have a @@ -465,6 +460,25 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer( // error condition? It would be good to tighten up the specification to make // it less ambiguous. } + return out_buffer; +} + +TfLiteStatus InitializeTfLiteTensorFromFlatbuffer( + SimpleMemoryAllocator* allocator, bool allocate_temp, + const tflite::Tensor& flatbuffer_tensor, + const flatbuffers::Vector>* buffers, + ErrorReporter* error_reporter, TfLiteTensor* result) { + TFLITE_DCHECK(result != nullptr); + + *result = {}; + // Make sure the serialized type is one we know how to deal with, and convert + // it from a flatbuffer enum into a constant used by the kernel C API. + TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(), + &result->type, error_reporter)); + // Make sure we remember if the serialized tensor is designated as a variable. + result->is_variable = flatbuffer_tensor.is_variable(); + + result->data.data = GetFlatbufferTensorBuffer(flatbuffer_tensor, buffers); // TODO(petewarden): Some of these paths aren't getting enough testing // coverage, so we should figure out some tests that exercise them. @@ -473,6 +487,9 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer( // make a note that they will be allocated from memory. The actual // allocation won't happen until later. result->allocation_type = kTfLiteArenaRw; + } else { + // We set the data from a serialized buffer, so record tha. + result->allocation_type = kTfLiteMmapRo; } // Figure out what the size in bytes of the buffer is and store it. @@ -530,7 +547,7 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer( // zero_point is stored as a int64_t. quantization->zero_point = allocate_temp - ? reinterpret_cast(allocator->AllocateFromTail( + ? reinterpret_cast(allocator->AllocateTemp( TfLiteIntArrayGetSizeInBytes(channels), alignof(TfLiteIntArray))) : reinterpret_cast(allocator->AllocateFromTail( @@ -560,6 +577,29 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer( return kTfLiteOk; } +TfLiteStatus InitializeTfLiteEvalTensorFromFlatbuffer( + SimpleMemoryAllocator* allocator, const tflite::Tensor& flatbuffer_tensor, + const flatbuffers::Vector>* buffers, + ErrorReporter* error_reporter, TfLiteEvalTensor* result) { + *result = {}; + // Make sure the serialized type is one we know how to deal with, and convert + // it from a flatbuffer enum into a constant used by the kernel C API. + TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(), + &result->type, error_reporter)); + + result->data.data = GetFlatbufferTensorBuffer(flatbuffer_tensor, buffers); + + if (flatbuffer_tensor.shape() == nullptr) { + // flatbuffer_tensor.shape() can return a nullptr in the case of a scalar + // tensor. + result->dims = const_cast(&kZeroLengthIntArray); + } else { + TF_LITE_ENSURE_STATUS(FlatBufferVectorToTfLiteTypeArray( + allocator, error_reporter, flatbuffer_tensor.shape(), &(result->dims))); + } + return kTfLiteOk; +} + } // namespace internal MicroAllocator::MicroAllocator(SimpleMemoryAllocator* memory_allocator, @@ -599,11 +639,10 @@ MicroAllocator* MicroAllocator::Create(SimpleMemoryAllocator* memory_allocator, } TfLiteStatus MicroAllocator::StartModelAllocation( - const Model* model, TfLiteContext* context, - const MicroOpResolver& op_resolver, - NodeAndRegistration** node_and_registrations) { + const Model* model, const MicroOpResolver& op_resolver, + NodeAndRegistration** node_and_registrations, + TfLiteEvalTensor** eval_tensors) { TFLITE_DCHECK(model != nullptr); - TFLITE_DCHECK(context != nullptr); if (model_is_allocating_) { TF_LITE_REPORT_ERROR(error_reporter_, @@ -612,23 +651,19 @@ TfLiteStatus MicroAllocator::StartModelAllocation( return kTfLiteError; } - const SubGraph* subgraph = GetSubGraphFromModel(model); - TFLITE_DCHECK(subgraph != nullptr); model_is_allocating_ = true; - TF_LITE_ENSURE_STATUS(AllocateTfLiteTensorArray(context, subgraph)); + TF_LITE_ENSURE_STATUS(AllocateTfLiteEvalTensors(model, eval_tensors)); TF_LITE_ENSURE_STATUS( - PopulateTfLiteTensorArrayFromFlatbuffer(model, context, subgraph)); - TF_LITE_ENSURE_STATUS( - AllocateNodeAndRegistrations(subgraph, node_and_registrations)); + AllocateNodeAndRegistrations(model, node_and_registrations)); TF_LITE_ENSURE_STATUS(PrepareNodeAndRegistrationDataFromFlatbuffer( - model, subgraph, op_resolver, *node_and_registrations)); + model, op_resolver, *node_and_registrations)); return kTfLiteOk; } -TfLiteStatus MicroAllocator::FinishModelAllocation(const Model* model, - TfLiteContext* context) { +TfLiteStatus MicroAllocator::FinishModelAllocation( + const Model* model, TfLiteEvalTensor* eval_tensors) { if (!model_is_allocating_) { TF_LITE_REPORT_ERROR(error_reporter_, "MicroAllocator: Model allocation finished before " @@ -639,8 +674,8 @@ TfLiteStatus MicroAllocator::FinishModelAllocation(const Model* model, const SubGraph* subgraph = GetSubGraphFromModel(model); TFLITE_DCHECK(subgraph != nullptr); - TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(model, context, subgraph)); - TF_LITE_ENSURE_STATUS(AllocateVariables(context, subgraph)); + TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(model, subgraph, eval_tensors)); + TF_LITE_ENSURE_STATUS(AllocateVariables(subgraph, eval_tensors)); model_is_allocating_ = false; return kTfLiteOk; @@ -711,41 +746,13 @@ size_t MicroAllocator::used_bytes() const { return memory_allocator_->GetUsedBytes(); } -TfLiteStatus MicroAllocator::AllocateTfLiteTensorArray( - TfLiteContext* context, const SubGraph* subgraph) { - context->tensors_size = subgraph->tensors()->size(); - context->tensors = - reinterpret_cast(memory_allocator_->AllocateFromTail( - sizeof(TfLiteTensor) * context->tensors_size, alignof(TfLiteTensor))); - if (context->tensors == nullptr) { - TF_LITE_REPORT_ERROR( - error_reporter_, - "Failed to allocate memory for context->tensors, %d bytes required", - sizeof(TfLiteTensor) * context->tensors_size); - return kTfLiteError; - } - return kTfLiteOk; -} - -TfLiteStatus MicroAllocator::PopulateTfLiteTensorArrayFromFlatbuffer( - const Model* model, TfLiteContext* context, const SubGraph* subgraph) { - // Initialize tensors in context_ using the flatbuffer for quantization data. - for (size_t i = 0; i < subgraph->tensors()->size(); ++i) { - TfLiteStatus status = internal::InitializeTfLiteTensorFromFlatbuffer( - memory_allocator_, /*allocate_temp=*/false, - *subgraph->tensors()->Get(i), model->buffers(), error_reporter_, - &context->tensors[i]); - if (status != kTfLiteOk) { - TF_LITE_REPORT_ERROR(error_reporter_, "Failed to initialize tensor %d", - i); - return kTfLiteError; - } - } - return kTfLiteOk; -} - TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations( - const SubGraph* subgraph, NodeAndRegistration** node_and_registrations) { + const Model* model, NodeAndRegistration** node_and_registrations) { + TFLITE_DCHECK(node_and_registrations); + + const SubGraph* subgraph = GetSubGraphFromModel(model); + TFLITE_DCHECK(subgraph != nullptr); + NodeAndRegistration* output = reinterpret_cast( memory_allocator_->AllocateFromTail( sizeof(NodeAndRegistration) * subgraph->operators()->size(), @@ -761,9 +768,14 @@ TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations( } TfLiteStatus MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer( - const Model* model, const SubGraph* subgraph, - const MicroOpResolver& op_resolver, + const Model* model, const MicroOpResolver& op_resolver, NodeAndRegistration* node_and_registrations) { + TFLITE_DCHECK(model != nullptr); + TFLITE_DCHECK(node_and_registrations != nullptr); + + const SubGraph* subgraph = GetSubGraphFromModel(model); + TFLITE_DCHECK(subgraph != nullptr); + TfLiteStatus status = kTfLiteOk; auto* opcodes = model->operator_codes(); MicroBuiltinDataAllocator builtin_data_allocator(memory_allocator_); @@ -847,25 +859,40 @@ TfLiteStatus MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer( return kTfLiteOk; } -TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensor(const Model* model, - int tensor_index) { +TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensor( + const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) { const SubGraph* subgraph = GetSubGraphFromModel(model); TFLITE_DCHECK(subgraph != nullptr); // This value is allocated from persistent arena space. It is guaranteed to be // around for the lifetime of the application. TfLiteTensor* tensor = - reinterpret_cast(memory_allocator_->AllocateFromTail( - sizeof(TfLiteTensor), alignof(TfLiteTensor))); - internal::InitializeTfLiteTensorFromFlatbuffer( - memory_allocator_, /*allocate_temp=*/false, - *subgraph->tensors()->Get(tensor_index), model->buffers(), - error_reporter_, tensor); + AllocatePersistentTfLiteTensorInternal(model, eval_tensors, tensor_index); + + // Populate any fields from the flatbuffer, since this TfLiteTensor struct is + // allocated in the persistent section of the arena, ensure that additional + // allocations also take place in that section of the arena. + if (PopulateTfLiteTensorFromFlatbuffer(model, subgraph, tensor, tensor_index, + /*allocate_temp=*/false) != + kTfLiteOk) { + TF_LITE_REPORT_ERROR(error_reporter_, + "Failed to populate a persistent TfLiteTensor struct " + "from flatbuffer data!"); + return nullptr; + } + + if (eval_tensors != nullptr) { + // Tensor buffers that are allocated at runtime (e.g. non-weight buffers) + // and not located in the flatbuffer are stored on the pre-allocated list of + // TfLiteEvalTensors structs. These structs are the source of truth, simply + // point the corresponding buffer to the new TfLiteTensor data value. + tensor->data.data = eval_tensors[tensor_index].data.data; + } return tensor; } -TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor(const Model* model, - int tensor_index) { +TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor( + const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) { const SubGraph* subgraph = GetSubGraphFromModel(model); TFLITE_DCHECK(subgraph != nullptr); @@ -875,10 +902,25 @@ TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor(const Model* model, TfLiteTensor* tensor = reinterpret_cast(memory_allocator_->AllocateTemp( sizeof(TfLiteTensor), alignof(TfLiteTensor))); - internal::InitializeTfLiteTensorFromFlatbuffer( - memory_allocator_, /*allocate_temp=*/true, - *subgraph->tensors()->Get(tensor_index), model->buffers(), - error_reporter_, tensor); + + // Populate any fields from the flatbuffer, since this TfLiteTensor struct is + // allocated in the temp section of the arena, ensure that additional + // allocations also take place in that section of the arena. + if (PopulateTfLiteTensorFromFlatbuffer(model, subgraph, tensor, tensor_index, + /*allocate_temp=*/true) != kTfLiteOk) { + TF_LITE_REPORT_ERROR( + error_reporter_, + "Failed to populate a temp TfLiteTensor struct from flatbuffer data!"); + return nullptr; + } + + if (eval_tensors != nullptr) { + // Tensor buffers that are allocated at runtime (e.g. non-weight buffers) + // and not located in the flatbuffer are stored on the pre-allocated list of + // TfLiteEvalTensors structs. These structs are the source of truth, simply + // point the corresponding buffer to the new TfLiteTensor data value. + tensor->data.data = eval_tensors[tensor_index].data.data; + } return tensor; } @@ -886,26 +928,79 @@ void MicroAllocator::ResetTempAllocations() { memory_allocator_->ResetTempAllocations(); } -TfLiteStatus MicroAllocator::AllocateVariables(TfLiteContext* context, - const SubGraph* subgraph) { - for (size_t i = 0; i < context->tensors_size; ++i) { - if (subgraph->tensors()->Get(i)->is_variable()) { - context->tensors[i].data.data = memory_allocator_->AllocateFromTail( - context->tensors[i].bytes, kBufferAlignment); - // Allocation failure. - if (context->tensors[i].data.data == nullptr) { +TfLiteStatus MicroAllocator::AllocateTfLiteEvalTensors( + const Model* model, TfLiteEvalTensor** eval_tensors) { + TFLITE_DCHECK(eval_tensors != nullptr); + + const SubGraph* subgraph = GetSubGraphFromModel(model); + TFLITE_DCHECK(subgraph != nullptr); + + size_t alloc_count = subgraph->tensors()->size(); + TfLiteEvalTensor* tensors = + reinterpret_cast(memory_allocator_->AllocateFromTail( + sizeof(TfLiteEvalTensor) * alloc_count, alignof(TfLiteEvalTensor))); + if (tensors == nullptr) { + TF_LITE_REPORT_ERROR(error_reporter_, + "Failed to allocate memory for context->eval_tensors, " + "%d bytes required", + sizeof(TfLiteEvalTensor) * alloc_count); + return kTfLiteError; + } + + for (size_t i = 0; i < alloc_count; ++i) { + TfLiteStatus status = internal::InitializeTfLiteEvalTensorFromFlatbuffer( + memory_allocator_, *subgraph->tensors()->Get(i), model->buffers(), + error_reporter_, &tensors[i]); + if (status != kTfLiteOk) { + TF_LITE_REPORT_ERROR(error_reporter_, "Failed to initialize tensor %d", + i); + return kTfLiteError; + } + } + *eval_tensors = tensors; + return kTfLiteOk; +} + +TfLiteStatus MicroAllocator::AllocateVariables(const SubGraph* subgraph, + TfLiteEvalTensor* eval_tensors) { + for (size_t i = 0; i < subgraph->tensors()->size(); ++i) { + auto* tensor = subgraph->tensors()->Get(i); + if (tensor->is_variable()) { + size_t buffer_size; + TF_LITE_ENSURE_STATUS( + TfLiteEvalTensorByteLength(&eval_tensors[i], &buffer_size)); + + eval_tensors[i].data.data = + memory_allocator_->AllocateFromTail(buffer_size, kBufferAlignment); + + if (eval_tensors[i].data.data == nullptr) { TF_LITE_REPORT_ERROR(error_reporter_, "Failed to allocate variable tensor of size %d", - context->tensors[i].bytes); + buffer_size); return kTfLiteError; } } - tflite::ResetVariableTensor(&(context->tensors[i])); } - return kTfLiteOk; } +TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensorInternal( + const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) { + return reinterpret_cast(memory_allocator_->AllocateFromTail( + sizeof(TfLiteTensor), alignof(TfLiteTensor))); +} + +TfLiteStatus MicroAllocator::PopulateTfLiteTensorFromFlatbuffer( + const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor, + int tensor_index, bool allocate_temp) { + // TODO(b/160894903): This method serves as a stub to ensure quantized + // allocations in the tail can be recorded. Once all kernels have been ported + // to the new API this can be dropped. + return internal::InitializeTfLiteTensorFromFlatbuffer( + memory_allocator_, allocate_temp, *subgraph->tensors()->Get(tensor_index), + model->buffers(), error_reporter_, tensor); +} + ErrorReporter* MicroAllocator::error_reporter() const { return error_reporter_; } @@ -920,9 +1015,9 @@ const SubGraph* MicroAllocator::GetSubGraphFromModel(const Model* model) { return (*subgraphs)[0]; } -TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(const Model* model, - TfLiteContext* context, - const SubGraph* subgraph) { +TfLiteStatus MicroAllocator::CommitStaticMemoryPlan( + const Model* model, const SubGraph* subgraph, + TfLiteEvalTensor* eval_tensors) { // Create static memory plan // 1. Calculate AllocationInfo to know the lifetime of each tensor/buffer. // 2. Add them into the planner (such as the GreedyMemoryPlanner). @@ -942,8 +1037,8 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(const Model* model, const int32_t* offline_planner_offsets = nullptr; TF_LITE_ENSURE_STATUS( builder.GetOfflinePlannedOffsets(model, &offline_planner_offsets)); - TF_LITE_ENSURE_STATUS(builder.AddTensors(subgraph, offline_planner_offsets, - context->tensors)); + TF_LITE_ENSURE_STATUS( + builder.AddTensors(subgraph, offline_planner_offsets, eval_tensors)); TF_LITE_ENSURE_STATUS(builder.AddScratchBuffers(scratch_buffer_handles_)); const AllocationInfo* allocation_info = builder.Finish(); diff --git a/tensorflow/lite/micro/micro_allocator.h b/tensorflow/lite/micro/micro_allocator.h index 58e274f4efe..47dad629944 100644 --- a/tensorflow/lite/micro/micro_allocator.h +++ b/tensorflow/lite/micro/micro_allocator.h @@ -1,5 +1,5 @@ /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - +b/160894903 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at @@ -110,32 +110,40 @@ class MicroAllocator { // This method will run through the flatbuffer data supplied in the model to // properly allocate tensor, node, and op registration data. This method is // expected to be followed with a call to FinishModelAllocation() before - // resuming allocation with another model. + // resuming allocation with another model. All persistent tensor buffers are + // stored in the out-param eval_tensors. This value is allocated from the + // persistent memory arena and will be used to host runtime tensor buffers. TfLiteStatus StartModelAllocation( - const Model* model, TfLiteContext* context, - const MicroOpResolver& op_resolver, - NodeAndRegistration** node_and_registrations); + const Model* model, const MicroOpResolver& op_resolver, + NodeAndRegistration** node_and_registrations, + TfLiteEvalTensor** eval_tensors); // Finish allocating internal resources required for model inference. // This method will plan non-persistent buffers and commit a memory plan to // the 'head' section of the memory arena. All variable tensor data will also // be allocated. This method should be called after assigning model resources - // in StartModelAllocation(). + // in StartModelAllocation(). The eval_tensors pointer should be the value + // passed into this class during StartModelAllocation(). TfLiteStatus FinishModelAllocation(const Model* model, - TfLiteContext* context); + TfLiteEvalTensor* eval_tensors); // Allocates a TfLiteTensor struct and populates the returned value with // properties from the model flatbuffer. This struct is allocated from // persistent arena memory is only guaranteed for the lifetime of the - // application. - virtual TfLiteTensor* AllocatePersistentTfLiteTensor(const Model* model, - int tensor_index); + // application. The eval_tensors pointer should be the value passed into this + // class during StartModelAllocation() and contains the source-of-truth for + // buffers. + virtual TfLiteTensor* AllocatePersistentTfLiteTensor( + const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index); // Allocates a TfLiteTensor struct and populates the returned value with // properties from the model flatbuffer. This struct is allocated from // temporary arena memory is only guaranteed until a call is made to - // ResetTempAllocations(). + // ResetTempAllocations(). The eval_tensors pointer should be the value passed + // into this class during StartModelAllocation() and contains the + // source-of-truth for buffers. virtual TfLiteTensor* AllocateTempTfLiteTensor(const Model* model, + TfLiteEvalTensor* eval_tensors, int tensor_index); // Resets all temporary allocations. This method should be called after a @@ -168,51 +176,59 @@ class MicroAllocator { ErrorReporter* error_reporter); virtual ~MicroAllocator(); - // Allocates an array in the arena to hold pointers to the tensors required - // to initialize and prepare a model. These allocations are stored and - // populated on the context. - // TODO(b/160894903): Remove this function when new kernel API is ready. - virtual TfLiteStatus AllocateTfLiteTensorArray(TfLiteContext* context, - const SubGraph* subgraph); - - // Populates content on the list of tensor pointers required to initialize and - // prepare a model from data in the flatbuffer (loaded from the TfLiteModel - // instance). Persistent data (e.g. quantization params) is allocated from the - // arena. - // TODO(b/160894903): Remove this function when new kernel API is ready. - virtual TfLiteStatus PopulateTfLiteTensorArrayFromFlatbuffer( - const Model* model, TfLiteContext* context, const SubGraph* subgraph); - // Allocates an array in the arena to hold pointers to the node and // registration pointers required to represent the inference graph of the // model. virtual TfLiteStatus AllocateNodeAndRegistrations( - const SubGraph* subgraph, NodeAndRegistration** node_and_registrations); + const Model* model, NodeAndRegistration** node_and_registrations); // Populates node and registration pointers representing the inference graph // of the model from values inside the flatbuffer (loaded from the TfLiteModel // instance). Persistent data (e.g. operator data) is allocated from the // arena. virtual TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer( - const Model* model, const SubGraph* subgraph, - const MicroOpResolver& op_resolver, + const Model* model, const MicroOpResolver& op_resolver, NodeAndRegistration* node_and_registrations); + // Allocates the list of persistent TfLiteEvalTensors that are used for the + // "eval" phase of model inference. These structs will be the source of truth + // for all tensor buffers. Allocation results are stored in the out-param + // eval_tensors. + virtual TfLiteStatus AllocateTfLiteEvalTensors( + const Model* model, TfLiteEvalTensor** eval_tensors); + // Allocates persistent tensor buffers for variable tensors in the subgraph. - virtual TfLiteStatus AllocateVariables(TfLiteContext* context, - const SubGraph* subgraph); + virtual TfLiteStatus AllocateVariables(const SubGraph* subgraph, + TfLiteEvalTensor* eval_tensors); + + // TODO(b/160894903): Once all kernels have been updated to the new API drop + // this method. It is only used to record TfLiteTensor persistent allocations. + virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal( + const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index); + + // Populates a TfLiteTensor struct with data from the model flatbuffer. Any + // quantization data is allocated from either the tail (persistent) or temp + // sections of the arena based on the allocation flag. + // TODO(b/160894903): Once all kernels have been updated to the new API drop + // this function since all allocations for quantized data will take place in + // the temp section. + virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer( + const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor, + int tensor_index, bool allocate_temp); ErrorReporter* error_reporter() const; - private: // Returns the first subgraph from the model. const SubGraph* GetSubGraphFromModel(const Model* model); + private: // Commits a memory plan for all non-persistent buffer allocations in the - // 'head' section of the memory arena. + // 'head' section of the memory arena. The eval_tensors pointer is the list of + // pre-allocated TfLiteEvalTensor structs that will point to the buffers that + // will be allocated into the head section in this function call. virtual TfLiteStatus CommitStaticMemoryPlan(const Model* model, - TfLiteContext* context, - const SubGraph* subgraph); + const SubGraph* subgraph, + TfLiteEvalTensor* eval_tensors); // A simple memory allocator that always allocate from the arena tail or head. SimpleMemoryAllocator* memory_allocator_; diff --git a/tensorflow/lite/micro/micro_allocator_test.cc b/tensorflow/lite/micro/micro_allocator_test.cc index f9348cc7425..22eb999ec9b 100644 --- a/tensorflow/lite/micro/micro_allocator_test.cc +++ b/tensorflow/lite/micro/micro_allocator_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include +#include "tensorflow/lite/micro/memory_helpers.h" #include "tensorflow/lite/micro/simple_memory_allocator.h" #include "tensorflow/lite/micro/test_helpers.h" #include "tensorflow/lite/micro/testing/micro_test.h" @@ -28,7 +29,7 @@ namespace { constexpr int kExpectedAlignment = 4; -void VerifyMockTensor(TfLiteTensor* tensor, bool is_variable = false) { +void VerifyMockTfLiteTensor(TfLiteTensor* tensor, bool is_variable = false) { TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, tensor->type); TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->size); TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->data[0]); @@ -40,7 +41,7 @@ void VerifyMockTensor(TfLiteTensor* tensor, bool is_variable = false) { kExpectedAlignment)); } -void VerifyMockWeightTensor(TfLiteTensor* tensor) { +void VerifyMockWeightTfLiteTensor(TfLiteTensor* tensor) { TF_LITE_MICRO_EXPECT_EQ(kTfLiteUInt8, tensor->type); TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->size); TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->data[0]); @@ -48,12 +49,54 @@ void VerifyMockWeightTensor(TfLiteTensor* tensor) { TF_LITE_MICRO_EXPECT_NE(nullptr, tensor->data.raw); } -void EnsureUniqueVariableTensorBuffer(TfLiteContext* context, +void VerifyMockTfLiteEvalTensor(TfLiteEvalTensor* tensor) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, tensor->type); + TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->size); + TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->data[0]); + size_t buffer_size; + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, tflite::TfLiteEvalTensorByteLength(tensor, &buffer_size)); + TF_LITE_MICRO_EXPECT_EQ(static_cast(4), buffer_size); + TF_LITE_MICRO_EXPECT_NE(nullptr, tensor->data.raw); + TF_LITE_MICRO_EXPECT_EQ(static_cast(0), + (reinterpret_cast(tensor->data.raw) % + kExpectedAlignment)); +} + +void VerifyMockWeightTfLiteEvalTensor(TfLiteEvalTensor* tensor) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteUInt8, tensor->type); + TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->size); + TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->data[0]); + size_t buffer_size; + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, tflite::TfLiteEvalTensorByteLength(tensor, &buffer_size)); + TF_LITE_MICRO_EXPECT_EQ(static_cast(1), buffer_size); + TF_LITE_MICRO_EXPECT_NE(nullptr, tensor->data.raw); +} + +void VerifyMockTensor(const Model* model, MicroAllocator* allocator, + TfLiteEvalTensor* eval_tensors, int tensor_idx, + bool is_variable = false) { + VerifyMockTfLiteTensor(allocator->AllocatePersistentTfLiteTensor( + model, eval_tensors, tensor_idx), + is_variable); + VerifyMockTfLiteEvalTensor(&eval_tensors[tensor_idx]); +} + +void VerifyMockWeightTensor(const Model* model, MicroAllocator* allocator, + TfLiteEvalTensor* eval_tensors, int tensor_idx) { + VerifyMockWeightTfLiteTensor(allocator->AllocatePersistentTfLiteTensor( + model, eval_tensors, tensor_idx)); + VerifyMockWeightTfLiteEvalTensor(&eval_tensors[tensor_idx]); +} + +void EnsureUniqueVariableTensorBuffer(const Model* model, + TfLiteEvalTensor* eval_tensors, const int variable_tensor_idx) { - for (size_t i = 0; i < context->tensors_size; i++) { + for (size_t i = 0; i < GetModelTensorCount(model); i++) { if (i != static_cast(variable_tensor_idx)) { - TF_LITE_MICRO_EXPECT_NE(context->tensors[variable_tensor_idx].data.raw, - context->tensors[i].data.raw); + TF_LITE_MICRO_EXPECT_NE(eval_tensors[variable_tensor_idx].data.raw, + eval_tensors[i].data.raw); } } } @@ -182,7 +225,7 @@ TF_LITE_MICRO_TEST(TestMissingQuantization) { TF_LITE_MICRO_TEST(TestFailsWhenModelStartsTwice) { const tflite::Model* model = tflite::testing::GetSimpleMockModel(); - TfLiteContext context; + TfLiteEvalTensor* eval_tensors = nullptr; tflite::AllOpsResolver op_resolver = tflite::testing::GetOpResolver(); tflite::NodeAndRegistration* node_and_registration; constexpr size_t arena_size = 1024; @@ -191,29 +234,31 @@ TF_LITE_MICRO_TEST(TestFailsWhenModelStartsTwice) { tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); TF_LITE_MICRO_EXPECT(nullptr != allocator); TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, allocator->StartModelAllocation(model, &context, op_resolver, - &node_and_registration)); + kTfLiteOk, + allocator->StartModelAllocation(model, op_resolver, + &node_and_registration, &eval_tensors)); TF_LITE_MICRO_EXPECT_EQ( - kTfLiteError, allocator->StartModelAllocation( - model, &context, op_resolver, &node_and_registration)); + kTfLiteError, + allocator->StartModelAllocation(model, op_resolver, + &node_and_registration, &eval_tensors)); } TF_LITE_MICRO_TEST(TestFailsWhenModelFinishesBeforeStart) { const tflite::Model* model = tflite::testing::GetSimpleMockModel(); - TfLiteContext context; + TfLiteEvalTensor* eval_tensors = nullptr; tflite::AllOpsResolver op_resolver = tflite::testing::GetOpResolver(); constexpr size_t arena_size = 1024; uint8_t arena[arena_size]; tflite::MicroAllocator* allocator = tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); - TF_LITE_MICRO_EXPECT(nullptr != allocator); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteError, - allocator->FinishModelAllocation(model, &context)); + TF_LITE_MICRO_EXPECT_NE(nullptr, allocator); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteError, allocator->FinishModelAllocation(model, eval_tensors)); } TF_LITE_MICRO_TEST(TestMockModelAllocation) { const tflite::Model* model = tflite::testing::GetSimpleMockModel(); - TfLiteContext context; + TfLiteEvalTensor* eval_tensors = nullptr; tflite::AllOpsResolver op_resolver = tflite::testing::GetOpResolver(); tflite::NodeAndRegistration* node_and_registration; constexpr size_t arena_size = 1024; @@ -222,32 +267,27 @@ TF_LITE_MICRO_TEST(TestMockModelAllocation) { tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); TF_LITE_MICRO_EXPECT(nullptr != allocator); TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, allocator->StartModelAllocation(model, &context, op_resolver, - &node_and_registration)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - allocator->FinishModelAllocation(model, &context)); + kTfLiteOk, + allocator->StartModelAllocation(model, op_resolver, + &node_and_registration, &eval_tensors)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, allocator->FinishModelAllocation(model, eval_tensors)); - TF_LITE_MICRO_EXPECT_EQ(static_cast(4), context.tensors_size); + size_t model_tensor_size = tflite::testing::GetModelTensorCount(model); + TF_LITE_MICRO_EXPECT_EQ(static_cast(4), model_tensor_size); - // NOTE: Tensor indexes match the values in GetSimpleMockModel(). - tflite::testing::VerifyMockTensor(&context.tensors[0]); - tflite::testing::VerifyMockWeightTensor(&context.tensors[1]); - tflite::testing::VerifyMockTensor(&context.tensors[2]); - tflite::testing::VerifyMockTensor(&context.tensors[3]); + tflite::testing::VerifyMockTensor(model, allocator, eval_tensors, 0); + tflite::testing::VerifyMockWeightTensor(model, allocator, eval_tensors, 1); + tflite::testing::VerifyMockTensor(model, allocator, eval_tensors, 2); + tflite::testing::VerifyMockTensor(model, allocator, eval_tensors, 3); - TF_LITE_MICRO_EXPECT_NE(context.tensors[1].data.raw, - context.tensors[0].data.raw); - TF_LITE_MICRO_EXPECT_NE(context.tensors[2].data.raw, - context.tensors[0].data.raw); - TF_LITE_MICRO_EXPECT_NE(context.tensors[1].data.raw, - context.tensors[2].data.raw); - TF_LITE_MICRO_EXPECT_NE(context.tensors[3].data.raw, - context.tensors[0].data.raw); - TF_LITE_MICRO_EXPECT_NE(context.tensors[3].data.raw, - context.tensors[1].data.raw); - TF_LITE_MICRO_EXPECT_NE(context.tensors[3].data.raw, - context.tensors[2].data.raw); - TF_LITE_MICRO_EXPECT_LE(allocator->used_bytes(), 760 + 100); + TF_LITE_MICRO_EXPECT_NE(eval_tensors[1].data.raw, eval_tensors[0].data.raw); + TF_LITE_MICRO_EXPECT_NE(eval_tensors[2].data.raw, eval_tensors[0].data.raw); + TF_LITE_MICRO_EXPECT_NE(eval_tensors[1].data.raw, eval_tensors[2].data.raw); + TF_LITE_MICRO_EXPECT_NE(eval_tensors[3].data.raw, eval_tensors[0].data.raw); + TF_LITE_MICRO_EXPECT_NE(eval_tensors[3].data.raw, eval_tensors[1].data.raw); + TF_LITE_MICRO_EXPECT_NE(eval_tensors[3].data.raw, eval_tensors[2].data.raw); + TF_LITE_MICRO_EXPECT_LE(allocator->used_bytes(), 856 + 100); // SimpleMockModel has 2 operators: tflite::testing::VerifyRegistrationAndNodeAllocation(node_and_registration, @@ -256,7 +296,7 @@ TF_LITE_MICRO_TEST(TestMockModelAllocation) { TF_LITE_MICRO_TEST(TestAllocationForModelsWithBranches) { const tflite::Model* model = tflite::testing::GetSimpleModelWithBranch(); - TfLiteContext context; + TfLiteEvalTensor* eval_tensors = nullptr; tflite::AllOpsResolver op_resolver = tflite::testing::GetOpResolver(); tflite::NodeAndRegistration* node_and_registration; constexpr size_t arena_size = 4096; @@ -265,24 +305,28 @@ TF_LITE_MICRO_TEST(TestAllocationForModelsWithBranches) { tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); TF_LITE_MICRO_EXPECT_NE(nullptr, allocator); TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, allocator->StartModelAllocation(model, &context, op_resolver, - &node_and_registration)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - allocator->FinishModelAllocation(model, &context)); + kTfLiteOk, + allocator->StartModelAllocation(model, op_resolver, + &node_and_registration, &eval_tensors)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, allocator->FinishModelAllocation(model, eval_tensors)); - uint8_t* start = context.tensors[0].data.uint8; + uint8_t* start = eval_tensors[0].data.uint8; // Check test_helpers.cc BuildSimpleModelWithBranch for model structure. // t0 is the first tensor, so place it in offset 0. - TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start); + TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[0].data.uint8 - start); // bytes = 2 * 2 * 3 * sizeof(float32) = 48, same for other tensors. - TF_LITE_MICRO_EXPECT_EQ(static_cast(48), context.tensors[0].bytes); + size_t buffer_size; + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, tflite::TfLiteEvalTensorByteLength( + &eval_tensors[0], &buffer_size)); + TF_LITE_MICRO_EXPECT_EQ(static_cast(48), buffer_size); // t1 can't reuse any memory, as n0 requires both t0 and t1. - TF_LITE_MICRO_EXPECT_EQ(96, context.tensors[1].data.uint8 - start); + TF_LITE_MICRO_EXPECT_EQ(96, eval_tensors[1].data.uint8 - start); // t2 can't reuse any memory, as n1 requires both t0 and t2. Also n2 requires // both t1 and t2. - TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[2].data.uint8 - start); + TF_LITE_MICRO_EXPECT_EQ(48, eval_tensors[2].data.uint8 - start); // t3 reuses the same memory from t0 as t0 is not an input to any node. - TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start); + TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[3].data.uint8 - start); // SimpleModelWithBranch has 3 operators: tflite::testing::VerifyRegistrationAndNodeAllocation(node_and_registration, @@ -291,7 +335,7 @@ TF_LITE_MICRO_TEST(TestAllocationForModelsWithBranches) { TF_LITE_MICRO_TEST(TestAllocationForComplexModelAllocation) { const tflite::Model* model = tflite::testing::GetComplexMockModel(); - TfLiteContext context; + TfLiteEvalTensor* eval_tensors = nullptr; tflite::AllOpsResolver op_resolver = tflite::testing::GetOpResolver(); tflite::NodeAndRegistration* node_and_registration; constexpr size_t arena_size = 2048; @@ -300,32 +344,34 @@ TF_LITE_MICRO_TEST(TestAllocationForComplexModelAllocation) { tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); TF_LITE_MICRO_EXPECT(nullptr != allocator); TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, allocator->StartModelAllocation(model, &context, op_resolver, - &node_and_registration)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - allocator->FinishModelAllocation(model, &context)); + kTfLiteOk, + allocator->StartModelAllocation(model, op_resolver, + &node_and_registration, &eval_tensors)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, allocator->FinishModelAllocation(model, eval_tensors)); - TF_LITE_MICRO_EXPECT_EQ(static_cast(10), context.tensors_size); + size_t model_tensor_size = tflite::testing::GetModelTensorCount(model); + TF_LITE_MICRO_EXPECT_EQ(static_cast(10), model_tensor_size); // NOTE: Tensor indexes match the values in GetComplexMockModel(). - tflite::testing::VerifyMockTensor(&context.tensors[0]); - tflite::testing::VerifyMockTensor(&context.tensors[1], - true /* is_variable */); - tflite::testing::VerifyMockWeightTensor(&context.tensors[2]); - tflite::testing::VerifyMockTensor(&context.tensors[3]); - tflite::testing::VerifyMockTensor(&context.tensors[4], - true /* is_variable */); - tflite::testing::VerifyMockWeightTensor(&context.tensors[5]); - tflite::testing::VerifyMockTensor(&context.tensors[6]); - tflite::testing::VerifyMockTensor(&context.tensors[7], - true /* is_variable */); - tflite::testing::VerifyMockWeightTensor(&context.tensors[8]); - tflite::testing::VerifyMockTensor(&context.tensors[9]); + tflite::testing::VerifyMockTensor(model, allocator, eval_tensors, 0); + tflite::testing::VerifyMockTensor(model, allocator, eval_tensors, 1, + /*is_variable=*/true); + tflite::testing::VerifyMockWeightTensor(model, allocator, eval_tensors, 2); + tflite::testing::VerifyMockTensor(model, allocator, eval_tensors, 3); + tflite::testing::VerifyMockTensor(model, allocator, eval_tensors, 4, + /*is_variable=*/true); + tflite::testing::VerifyMockWeightTensor(model, allocator, eval_tensors, 5); + tflite::testing::VerifyMockTensor(model, allocator, eval_tensors, 6); + tflite::testing::VerifyMockTensor(model, allocator, eval_tensors, 7, + /*is_variable=*/true); + tflite::testing::VerifyMockWeightTensor(model, allocator, eval_tensors, 8); + tflite::testing::VerifyMockTensor(model, allocator, eval_tensors, 9); - // Ensure that variable tensors have unique address - tflite::testing::EnsureUniqueVariableTensorBuffer(&context, 1); - tflite::testing::EnsureUniqueVariableTensorBuffer(&context, 4); - tflite::testing::EnsureUniqueVariableTensorBuffer(&context, 7); + // // Ensure that variable tensors have unique address + tflite::testing::EnsureUniqueVariableTensorBuffer(model, eval_tensors, 1); + tflite::testing::EnsureUniqueVariableTensorBuffer(model, eval_tensors, 4); + tflite::testing::EnsureUniqueVariableTensorBuffer(model, eval_tensors, 7); // ComplexMockModel has 3 operators: tflite::testing::VerifyRegistrationAndNodeAllocation(node_and_registration, @@ -363,27 +409,32 @@ TF_LITE_MICRO_TEST(OfflinePlannerBranchesAllOnline) { const tflite::Model* model = tflite::testing::GetModelWithOfflinePlanning( nbr_tensors, metadata_buffer, node_list, num_conns); - TfLiteContext context; + TfLiteEvalTensor* eval_tensors = nullptr; constexpr size_t arena_size = 4096; uint8_t arena[arena_size]; tflite::MicroAllocator* allocator = tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, allocator->StartModelAllocation(model, &context, op_resolver, - &node_and_registration)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - allocator->FinishModelAllocation(model, &context)); + kTfLiteOk, + allocator->StartModelAllocation(model, op_resolver, + &node_and_registration, &eval_tensors)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, allocator->FinishModelAllocation(model, eval_tensors)); // Since all of the tensors are online planned and the model structure is // identical to that in TestAllocationForModelsWithBranches, // the offsets be should identical to that test. - uint8_t* start = context.tensors[0].data.uint8; - TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start); - TF_LITE_MICRO_EXPECT_EQ(static_cast(48), context.tensors[0].bytes); - TF_LITE_MICRO_EXPECT_EQ(96, context.tensors[1].data.uint8 - start); - TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[2].data.uint8 - start); - TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start); + uint8_t* start = eval_tensors[0].data.uint8; + TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[0].data.uint8 - start); + + size_t buffer_size; + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, tflite::TfLiteEvalTensorByteLength( + &eval_tensors[0], &buffer_size)); + TF_LITE_MICRO_EXPECT_EQ(static_cast(48), buffer_size); + TF_LITE_MICRO_EXPECT_EQ(96, eval_tensors[1].data.uint8 - start); + TF_LITE_MICRO_EXPECT_EQ(48, eval_tensors[2].data.uint8 - start); + TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[3].data.uint8 - start); } TF_LITE_MICRO_TEST(OfflinePlannerBasic) { @@ -419,23 +470,24 @@ TF_LITE_MICRO_TEST(OfflinePlannerBasic) { const tflite::Model* model = tflite::testing::GetModelWithOfflinePlanning( nbr_tensors, metadata_buffer, node_list, num_conns); - TfLiteContext context; + TfLiteEvalTensor* eval_tensors = nullptr; constexpr size_t arena_size = 4096; uint8_t arena[arena_size]; tflite::MicroAllocator* allocator = tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, allocator->StartModelAllocation(model, &context, op_resolver, - &node_and_registration)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - allocator->FinishModelAllocation(model, &context)); + kTfLiteOk, + allocator->StartModelAllocation(model, op_resolver, + &node_and_registration, &eval_tensors)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, allocator->FinishModelAllocation(model, eval_tensors)); - uint8_t* start = context.tensors[0].data.uint8; - TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start); - TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[1].data.uint8 - start); - TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[2].data.uint8 - start); - TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[3].data.uint8 - start); + uint8_t* start = eval_tensors[0].data.uint8; + TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[0].data.uint8 - start); + TF_LITE_MICRO_EXPECT_EQ(48, eval_tensors[1].data.uint8 - start); + TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[2].data.uint8 - start); + TF_LITE_MICRO_EXPECT_EQ(48, eval_tensors[3].data.uint8 - start); } TF_LITE_MICRO_TEST(OfflinePlannerOverlappingAllocation) { @@ -471,24 +523,25 @@ TF_LITE_MICRO_TEST(OfflinePlannerOverlappingAllocation) { const tflite::Model* model = tflite::testing::GetModelWithOfflinePlanning( nbr_tensors, metadata_buffer, node_list, num_conns); - TfLiteContext context; + TfLiteEvalTensor* eval_tensors = nullptr; constexpr size_t arena_size = 4096; uint8_t arena[arena_size]; tflite::MicroAllocator* allocator = tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, allocator->StartModelAllocation(model, &context, op_resolver, - &node_and_registration)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - allocator->FinishModelAllocation(model, &context)); + kTfLiteOk, + allocator->StartModelAllocation(model, op_resolver, + &node_and_registration, &eval_tensors)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, allocator->FinishModelAllocation(model, eval_tensors)); - uint8_t* start = context.tensors[0].data.uint8; - TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start); - TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[1].data.uint8 - start); - TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[2].data.uint8 - start); - TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start); - TF_LITE_MICRO_EXPECT_EQ(static_cast(48), context.tensors[0].bytes); + uint8_t* start = eval_tensors[0].data.uint8; + TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[0].data.uint8 - start); + TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[1].data.uint8 - start); + TF_LITE_MICRO_EXPECT_EQ(48, eval_tensors[2].data.uint8 - start); + TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[3].data.uint8 - start); + // TF_LITE_MICRO_EXPECT_EQ(static_cast(48), context.tensors[0].bytes); } TF_LITE_MICRO_TEST(OfflinePlannerOfflineOnline) { @@ -526,24 +579,25 @@ TF_LITE_MICRO_TEST(OfflinePlannerOfflineOnline) { const tflite::Model* model = tflite::testing::GetModelWithOfflinePlanning( nbr_tensors, metadata_buffer, node_list, num_conns); - TfLiteContext context; + TfLiteEvalTensor* eval_tensors = nullptr; constexpr size_t arena_size = 4096; uint8_t arena[arena_size]; tflite::MicroAllocator* allocator = tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, allocator->StartModelAllocation(model, &context, op_resolver, - &node_and_registration)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - allocator->FinishModelAllocation(model, &context)); + kTfLiteOk, + allocator->StartModelAllocation(model, op_resolver, + &node_and_registration, &eval_tensors)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, allocator->FinishModelAllocation(model, eval_tensors)); - uint8_t* start = context.tensors[0].data.uint8; - TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start); - TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[1].data.uint8 - start); - TF_LITE_MICRO_EXPECT_EQ(96, context.tensors[2].data.uint8 - start); - TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[4].data.uint8 - start); - TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start); + uint8_t* start = eval_tensors[0].data.uint8; + TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[0].data.uint8 - start); + TF_LITE_MICRO_EXPECT_EQ(48, eval_tensors[1].data.uint8 - start); + TF_LITE_MICRO_EXPECT_EQ(96, eval_tensors[2].data.uint8 - start); + TF_LITE_MICRO_EXPECT_EQ(48, eval_tensors[4].data.uint8 - start); + TF_LITE_MICRO_EXPECT_EQ(0, eval_tensors[3].data.uint8 - start); } TF_LITE_MICRO_TEST(TestAllocatePersistentTfLiteTensor) { @@ -554,14 +608,14 @@ TF_LITE_MICRO_TEST(TestAllocatePersistentTfLiteTensor) { tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); TF_LITE_MICRO_EXPECT_NE(allocator, nullptr); - TfLiteTensor* tensor1 = - allocator->AllocatePersistentTfLiteTensor(model, /*tensor_index=*/1); + TfLiteTensor* tensor1 = allocator->AllocatePersistentTfLiteTensor( + model, /*eval_tensors=*/nullptr, /*tensor_index=*/1); TF_LITE_MICRO_EXPECT_NE(tensor1, nullptr); TF_LITE_MICRO_EXPECT_NE(tensor1->quantization.params, nullptr); TF_LITE_MICRO_EXPECT_FALSE(tensor1->is_variable); - TfLiteTensor* tensor2 = - allocator->AllocatePersistentTfLiteTensor(model, /*tensor_index=*/2); + TfLiteTensor* tensor2 = allocator->AllocatePersistentTfLiteTensor( + model, /*eval_tensors=*/nullptr, /*tensor_index=*/2); TF_LITE_MICRO_EXPECT_NE(tensor2, nullptr); TF_LITE_MICRO_EXPECT_NE(tensor2->quantization.params, nullptr); TF_LITE_MICRO_EXPECT_FALSE(tensor2->is_variable); @@ -571,7 +625,7 @@ TF_LITE_MICRO_TEST(TestAllocatePersistentTfLiteTensor) { TF_LITE_MICRO_EXPECT_GT(tensor1, tensor2); } -TF_LITE_MICRO_TEST(TestAllocateSingleTfLiteTensor) { +TF_LITE_MICRO_TEST(TestAllocateSingleTempTfLiteTensor) { const tflite::Model* model = tflite::testing::GetSimpleMockModel(); constexpr size_t arena_size = 1024; uint8_t arena[arena_size]; @@ -579,8 +633,8 @@ TF_LITE_MICRO_TEST(TestAllocateSingleTfLiteTensor) { tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); TF_LITE_MICRO_EXPECT_NE(allocator, nullptr); - TfLiteTensor* tensor1 = - allocator->AllocateTempTfLiteTensor(model, /*tensor_index=*/1); + TfLiteTensor* tensor1 = allocator->AllocateTempTfLiteTensor( + model, /*eval_tensors=*/nullptr, /*tensor_index=*/1); TF_LITE_MICRO_EXPECT_NE(tensor1, nullptr); } @@ -592,13 +646,13 @@ TF_LITE_MICRO_TEST(TestAllocateChainOfTfLiteTensor) { tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); TF_LITE_MICRO_EXPECT_NE(allocator, nullptr); - TfLiteTensor* tensor1 = - allocator->AllocateTempTfLiteTensor(model, /*tensor_index=*/1); + TfLiteTensor* tensor1 = allocator->AllocateTempTfLiteTensor( + model, /*eval_tensors=*/nullptr, /*tensor_index=*/1); TF_LITE_MICRO_EXPECT_NE(tensor1, nullptr); - TfLiteTensor* tensor2 = - allocator->AllocateTempTfLiteTensor(model, /*tensor_index=*/3); - TF_LITE_MICRO_EXPECT_NE(tensor1, nullptr); + TfLiteTensor* tensor2 = allocator->AllocateTempTfLiteTensor( + model, /*eval_tensors=*/nullptr, /*tensor_index=*/2); + TF_LITE_MICRO_EXPECT_NE(tensor2, nullptr); // The address of tensor2 should be higher than the address of tensor1 // (chained allocations): @@ -613,15 +667,15 @@ TF_LITE_MICRO_TEST(TestAllocateTfLiteTensorWithReset) { tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); TF_LITE_MICRO_EXPECT(allocator != nullptr); - TfLiteTensor* tensor1 = - allocator->AllocateTempTfLiteTensor(model, /*tensor_index=*/1); + TfLiteTensor* tensor1 = allocator->AllocateTempTfLiteTensor( + model, /*eval_tensors=*/nullptr, /*tensor_index=*/1); TF_LITE_MICRO_EXPECT(tensor1 != nullptr); allocator->ResetTempAllocations(); - TfLiteTensor* tensor2 = - allocator->AllocateTempTfLiteTensor(model, /*tensor_index=*/2); - TF_LITE_MICRO_EXPECT(tensor1 != nullptr); + TfLiteTensor* tensor2 = allocator->AllocateTempTfLiteTensor( + model, /*eval_tensors=*/nullptr, /*tensor_index=*/2); + TF_LITE_MICRO_EXPECT(tensor2 != nullptr); // The address of tensor2 should be equal than the address of tensor1 since // allocations were not chained: diff --git a/tensorflow/lite/micro/micro_interpreter.cc b/tensorflow/lite/micro/micro_interpreter.cc index e259d9d7adb..87c8da7da42 100644 --- a/tensorflow/lite/micro/micro_interpreter.cc +++ b/tensorflow/lite/micro/micro_interpreter.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/core/api/error_reporter.h" #include "tensorflow/lite/core/api/tensor_utils.h" +#include "tensorflow/lite/micro/memory_helpers.h" #include "tensorflow/lite/micro/micro_allocator.h" #include "tensorflow/lite/micro/micro_op_resolver.h" #include "tensorflow/lite/micro/micro_profiler.h" @@ -42,6 +43,10 @@ const char* OpNameFromRegistration(const TfLiteRegistration* registration) { namespace internal { +ContextHelper::ContextHelper(ErrorReporter* error_reporter, + MicroAllocator* allocator, const Model* model) + : allocator_(allocator), error_reporter_(error_reporter), model_(model) {} + TfLiteStatus ContextHelper::AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes, void** ptr) { return reinterpret_cast(ctx->impl_) @@ -72,8 +77,21 @@ void ContextHelper::ReportOpError(struct TfLiteContext* context, TfLiteTensor* ContextHelper::GetTensor(const struct TfLiteContext* context, int tensor_idx) { - // TODO(b/160894903): Return this value from temp allocated memory. - return &context->tensors[tensor_idx]; + ContextHelper* helper = static_cast(context->impl_); + return helper->allocator_->AllocateTempTfLiteTensor( + helper->model_, helper->eval_tensors_, tensor_idx); +} + +TfLiteEvalTensor* ContextHelper::GetEvalTensor( + const struct TfLiteContext* context, int tensor_idx) { + ContextHelper* helper = reinterpret_cast(context->impl_); + return &helper->eval_tensors_[tensor_idx]; +} + +void ContextHelper::SetNodeIndex(int idx) { current_node_idx_ = idx; } + +void ContextHelper::SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors) { + eval_tensors_ = eval_tensors; } } // namespace internal @@ -91,7 +109,8 @@ MicroInterpreter::MicroInterpreter(const Model* model, error_reporter)), tensors_allocated_(false), initialization_status_(kTfLiteError), - context_helper_(error_reporter_, &allocator_), + eval_tensors_(nullptr), + context_helper_(error_reporter_, &allocator_, model), input_tensor_(nullptr), output_tensor_(nullptr) { Init(profiler); @@ -108,7 +127,8 @@ MicroInterpreter::MicroInterpreter(const Model* model, allocator_(*allocator), tensors_allocated_(false), initialization_status_(kTfLiteError), - context_helper_(error_reporter_, &allocator_), + eval_tensors_(nullptr), + context_helper_(error_reporter_, &allocator_, model), input_tensor_(nullptr), output_tensor_(nullptr) { Init(profiler); @@ -143,13 +163,14 @@ void MicroInterpreter::Init(tflite::Profiler* profiler) { context_.impl_ = static_cast(&context_helper_); context_.ReportError = context_helper_.ReportOpError; context_.GetTensor = context_helper_.GetTensor; + context_.GetEvalTensor = context_helper_.GetEvalTensor; context_.recommended_num_threads = 1; context_.profiler = profiler; initialization_status_ = kTfLiteOk; } -void MicroInterpreter::CorrectTensorEndianness(TfLiteTensor* tensorCorr) { +void MicroInterpreter::CorrectTensorEndianness(TfLiteEvalTensor* tensorCorr) { int32_t tensorSize = 1; for (int d = 0; d < tensorCorr->dims->size; ++d) tensorSize *= reinterpret_cast(tensorCorr->dims->data)[d]; @@ -190,24 +211,40 @@ void MicroInterpreter::CorrectTensorDataEndianness(T* data, int32_t size) { } TfLiteStatus MicroInterpreter::AllocateTensors() { - if (allocator_.StartModelAllocation(model_, &context_, op_resolver_, - &node_and_registrations_) != kTfLiteOk) { + if (allocator_.StartModelAllocation(model_, op_resolver_, + &node_and_registrations_, + &eval_tensors_) != kTfLiteOk) { TF_LITE_REPORT_ERROR(error_reporter_, "Failed starting model allocation.\n"); initialization_status_ = kTfLiteError; return kTfLiteError; } + // Update the pointer now that TfLiteEvalTensor allocation has completed on + // the context helper. + // TODO(b/16157777): This call would not be needed if ContextHelper rolled + // into the interpreter. + context_helper_.SetTfLiteEvalTensors(eval_tensors_); + // If the system is big endian then convert weights from the flatbuffer from // little to big endian on startup so that it does not need to be done during // inference. // NOTE: This requires that the flatbuffer is held in memory which can be // modified by this process. if (!FLATBUFFERS_LITTLEENDIAN) { - for (size_t t = 0; t < tensors_size(); ++t) { - TfLiteTensor* thisTensor = &context_.tensors[t]; - if (thisTensor->allocation_type == kTfLiteMmapRo) - CorrectTensorEndianness(thisTensor); + for (size_t t = 0; t < subgraph_->tensors()->size(); ++t) { + if (auto* buffer = + (*model_->buffers())[subgraph_->tensors()->Get(t)->buffer()]) { + // If we've found a buffer, does it have any data? + if (auto* array = buffer->data()) { + // If it has any data, is the data size larger than zero? + if (array->size()) { + // Update the endianness of the corresponding eval tensor since that + // struct holds the buffer used at inference time. + CorrectTensorEndianness(&eval_tensors_[t]); + } + } + } } } @@ -236,8 +273,8 @@ TfLiteStatus MicroInterpreter::AllocateTensors() { } context_helper_.SetNodeIndex(-1); - // Both AllocatePersistentBuffer and RequestScratchBufferInArena is available - // in Prepare stage. + // Both AllocatePersistentBuffer and RequestScratchBufferInArena is + // available in Prepare stage. context_.RequestScratchBufferInArena = context_helper_.RequestScratchBufferInArena; for (size_t i = 0; i < subgraph_->operators()->size(); ++i) { @@ -255,6 +292,7 @@ TfLiteStatus MicroInterpreter::AllocateTensors() { return kTfLiteError; } } + allocator_.ResetTempAllocations(); } context_helper_.SetNodeIndex(-1); @@ -265,7 +303,9 @@ TfLiteStatus MicroInterpreter::AllocateTensors() { context_.GetScratchBuffer = context_helper_.GetScratchBuffer; TF_LITE_ENSURE_OK(&context_, - allocator_.FinishModelAllocation(model_, &context_)); + allocator_.FinishModelAllocation(model_, eval_tensors_)); + TF_LITE_ENSURE_STATUS(ResetVariableTensors()); + tensors_allocated_ = true; return kTfLiteOk; } @@ -290,7 +330,8 @@ TfLiteStatus MicroInterpreter::Invoke() { if (registration->invoke) { TfLiteStatus invoke_status; #ifndef NDEBUG // Omit profiler overhead from release builds. - // The case where profiler == nullptr is handled by ScopedOperatorProfile. + // The case where profiler == nullptr is handled by + // ScopedOperatorProfile. tflite::Profiler* profiler = reinterpret_cast(context_.profiler); ScopedOperatorProfile scoped_profiler( @@ -298,6 +339,12 @@ TfLiteStatus MicroInterpreter::Invoke() { #endif invoke_status = registration->invoke(&context_, node); + // All TfLiteTensor structs used in the kernel are allocated from temp + // memory in the allocator. This creates a chain of allocations in the + // temp section. The call below resets the chain of allocations to + // prepare for the next call. + allocator_.ResetTempAllocations(); + if (invoke_status == kTfLiteError) { TF_LITE_REPORT_ERROR( error_reporter_, @@ -321,15 +368,17 @@ TfLiteTensor* MicroInterpreter::input(size_t index) { return nullptr; } if (index != 0) { - TF_LITE_REPORT_ERROR(error_reporter_, - "Input tensors not at index 0 will allocate from the " - "persistent memory arena in the future!"); - return &(context_.tensors[inputs().Get(index)]); + TF_LITE_REPORT_ERROR( + error_reporter_, + "Input tensors not at index 0 are allocated from the " + "persistent memory arena. Repeat calls will cause excess " + "allocation!"); + return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_, + inputs().Get(index)); } if (input_tensor_ == nullptr) { - // TODO(b/160894903): This API will allocate TfLiteTensor structs from - // persistent (tail) memory and cache on this pointer. - input_tensor_ = &(context_.tensors[inputs().Get(index)]); + input_tensor_ = allocator_.AllocatePersistentTfLiteTensor( + model_, eval_tensors_, inputs().Get(index)); } return input_tensor_; } @@ -343,15 +392,19 @@ TfLiteTensor* MicroInterpreter::output(size_t index) { return nullptr; } if (index != 0) { - TF_LITE_REPORT_ERROR(error_reporter_, - "Output tensors not at index 0 will allocate from the " - "persistent memory arena in the future!"); - return &(context_.tensors[outputs().Get(index)]); + TF_LITE_REPORT_ERROR( + error_reporter_, + "Output tensors not at index 0 are allocated from the " + "persistent memory arena. Repeat calls will cause excess " + "allocation!"); + return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_, + outputs().Get(index)); } if (output_tensor_ == nullptr) { // TODO(b/160894903): This API will allocate TfLiteTensor structs from // persistent (tail) memory and cache on this pointer. - output_tensor_ = &(context_.tensors[outputs().Get(index)]); + output_tensor_ = allocator_.AllocatePersistentTfLiteTensor( + model_, eval_tensors_, outputs().Get(index)); } return output_tensor_; } @@ -364,22 +417,26 @@ TfLiteTensor* MicroInterpreter::tensor(size_t index) { length); return nullptr; } - return &context_.tensors[index]; + return allocator_.AllocatePersistentTfLiteTensor(model_, eval_tensors_, + index); } TfLiteStatus MicroInterpreter::ResetVariableTensors() { - const size_t length = tensors_size(); - for (size_t i = 0; i < length; ++i) { - TfLiteTensor* cur_tensor = tensor(i); - if (cur_tensor->is_variable) { - TfLiteStatus status = tflite::ResetVariableTensor(cur_tensor); - if (status != kTfLiteOk) { - TF_LITE_REPORT_ERROR(error_reporter_, - "Failed to reset variable tensor at index: %d", i); - return status; + for (size_t i = 0; i < subgraph_->tensors()->size(); ++i) { + auto* tensor = subgraph_->tensors()->Get(i); + if (tensor->is_variable()) { + size_t buffer_size; + TF_LITE_ENSURE_STATUS( + TfLiteEvalTensorByteLength(&eval_tensors_[i], &buffer_size)); + + int value = 0; + if (tensor->type() == tflite::TensorType_INT8) { + value = tensor->quantization()->zero_point()->Get(0); } + memset(eval_tensors_[i].data.raw, value, buffer_size); } } + return kTfLiteOk; } diff --git a/tensorflow/lite/micro/micro_interpreter.h b/tensorflow/lite/micro/micro_interpreter.h index 679a0f13f54..df70514c8d3 100644 --- a/tensorflow/lite/micro/micro_interpreter.h +++ b/tensorflow/lite/micro/micro_interpreter.h @@ -35,32 +35,37 @@ namespace internal { // A helper class to encapsulate the implementation of APIs in Context. // context->impl_ points to an instance of this class. // Check tensorflow/lite/c/common.h for detailed descriptions. +// TODO(b/16157777): Consider rolling this class into MicroInterpreter. class ContextHelper { public: explicit ContextHelper(ErrorReporter* error_reporter, - MicroAllocator* allocator) - : allocator_(allocator), error_reporter_(error_reporter) {} + MicroAllocator* allocator, const Model* model); + // Functions that will be assigned to function pointers on TfLiteContext: static TfLiteStatus AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes, void** ptr); - static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* ctx, size_t bytes, int* buffer_idx); - static void* GetScratchBuffer(TfLiteContext* ctx, int buffer_idx); - static void ReportOpError(struct TfLiteContext* context, const char* format, ...); - static TfLiteTensor* GetTensor(const struct TfLiteContext* context, int tensor_idx); + static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context, + int tensor_idx); - void SetNodeIndex(int idx) { current_node_idx_ = idx; } + // Sets the current node index to assist with scratch buffer allocations: + void SetNodeIndex(int idx); + + // Sets the pointer to a list of TfLiteEvalTensor instances. + void SetTfLiteEvalTensors(TfLiteEvalTensor* eval_tensors); private: MicroAllocator* allocator_; ErrorReporter* error_reporter_; + const Model* model_; + TfLiteEvalTensor* eval_tensors_; int current_node_idx_ = -1; }; @@ -173,7 +178,7 @@ class MicroInterpreter { // error reporting during initialization. void Init(tflite::Profiler* profiler); - void CorrectTensorEndianness(TfLiteTensor* tensorCorr); + void CorrectTensorEndianness(TfLiteEvalTensor* tensorCorr); template void CorrectTensorDataEndianness(T* data, int32_t size); @@ -190,6 +195,7 @@ class MicroInterpreter { TfLiteStatus initialization_status_; const SubGraph* subgraph_; + TfLiteEvalTensor* eval_tensors_; internal::ContextHelper context_helper_; // TODO(b/160894903): Clean these pointers up when all APIs are updated to new diff --git a/tensorflow/lite/micro/micro_interpreter_test.cc b/tensorflow/lite/micro/micro_interpreter_test.cc index 21c7e935f17..3037ffaada9 100644 --- a/tensorflow/lite/micro/micro_interpreter_test.cc +++ b/tensorflow/lite/micro/micro_interpreter_test.cc @@ -72,7 +72,7 @@ TF_LITE_MICRO_TEST(TestInterpreter) { tflite::AllOpsResolver op_resolver = tflite::testing::GetOpResolver(); - constexpr size_t allocator_buffer_size = 1000; + constexpr size_t allocator_buffer_size = 2000; uint8_t allocator_buffer[allocator_buffer_size]; // Create a new scope so that we can test the destructor. @@ -127,7 +127,7 @@ TF_LITE_MICRO_TEST(TestKernelMemoryPlanning) { tflite::AllOpsResolver op_resolver = tflite::testing::GetOpResolver(); - constexpr size_t allocator_buffer_size = 1024; + constexpr size_t allocator_buffer_size = 2048; uint8_t allocator_buffer[allocator_buffer_size]; tflite::MicroInterpreter interpreter(model, op_resolver, allocator_buffer, allocator_buffer_size, @@ -312,13 +312,7 @@ TF_LITE_MICRO_TEST(TestIncompleteInitializationAllocationsWithSmallArena) { static_cast(0), allocator ->GetRecordedAllocation( - tflite::RecordedAllocationType::kTfLiteTensorArray) - .used_bytes); - TF_LITE_MICRO_EXPECT_EQ( - static_cast(0), - allocator - ->GetRecordedAllocation(tflite::RecordedAllocationType:: - kTfLiteTensorArrayQuantizationData) + tflite::RecordedAllocationType::kTfLiteEvalTensorData) .used_bytes); TF_LITE_MICRO_EXPECT_EQ( static_cast(0), @@ -358,13 +352,13 @@ TF_LITE_MICRO_TEST(TestInterpreterDoesNotAllocateUntilInvoke) { static_cast(0), allocator ->GetRecordedAllocation( - tflite::RecordedAllocationType::kTfLiteTensorArray) + tflite::RecordedAllocationType::kTfLiteTensorVariableBufferData) .used_bytes); TF_LITE_MICRO_EXPECT_EQ( static_cast(0), allocator ->GetRecordedAllocation( - tflite::RecordedAllocationType::kTfLiteTensorVariableBufferData) + tflite::RecordedAllocationType::kTfLiteEvalTensorData) .used_bytes); TF_LITE_MICRO_EXPECT_EQ( static_cast(0), @@ -382,9 +376,9 @@ TF_LITE_MICRO_TEST(TestInterpreterDoesNotAllocateUntilInvoke) { TF_LITE_MICRO_EXPECT_GT( allocator ->GetRecordedAllocation( - tflite::RecordedAllocationType::kTfLiteTensorArray) + tflite::RecordedAllocationType::kTfLiteEvalTensorData) .used_bytes, - static_cast(0)); + 0); TF_LITE_MICRO_EXPECT_GT( allocator diff --git a/tensorflow/lite/micro/recording_micro_allocator.cc b/tensorflow/lite/micro/recording_micro_allocator.cc index e667e7db9a9..5e338a339ca 100644 --- a/tensorflow/lite/micro/recording_micro_allocator.cc +++ b/tensorflow/lite/micro/recording_micro_allocator.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/lite/core/api/error_reporter.h" #include "tensorflow/lite/kernels/internal/compatibility.h" #include "tensorflow/lite/micro/compatibility.h" +#include "tensorflow/lite/micro/micro_allocator.h" #include "tensorflow/lite/micro/recording_simple_memory_allocator.h" namespace tflite { @@ -47,10 +48,12 @@ RecordingMicroAllocator* RecordingMicroAllocator::Create( RecordedAllocation RecordingMicroAllocator::GetRecordedAllocation( RecordedAllocationType allocation_type) const { switch (allocation_type) { - case RecordedAllocationType::kTfLiteTensorArray: - return recorded_tflite_tensor_array_data_; - case RecordedAllocationType::kTfLiteTensorArrayQuantizationData: - return recorded_tflite_tensor_array_quantization_data_; + case RecordedAllocationType::kTfLiteEvalTensorData: + return recorded_tflite_eval_tensor_data_; + case RecordedAllocationType::kPersistentTfLiteTensorData: + return recorded_persistent_tflite_tensor_data_; + case RecordedAllocationType::kPersistentTfLiteTensorQuantizationData: + return recorded_persistent_tflite_tensor_quantization_data_; case RecordedAllocationType::kTfLiteTensorVariableBufferData: return recorded_tflite_tensor_variable_buffer_data_; case RecordedAllocationType::kNodeAndRegistrationArray: @@ -81,11 +84,13 @@ void RecordingMicroAllocator::PrintAllocations() const { error_reporter(), "[RecordingMicroAllocator] Arena allocation tail %d bytes", recording_memory_allocator_->GetTailUsedBytes()); - PrintRecordedAllocation(RecordedAllocationType::kTfLiteTensorArray, - "TfLiteTensor struct", "tensors"); + PrintRecordedAllocation(RecordedAllocationType::kTfLiteEvalTensorData, + "TfLiteEvalTensor data", "allocations"); + PrintRecordedAllocation(RecordedAllocationType::kPersistentTfLiteTensorData, + "Persistent TfLiteTensor data", "tensors"); PrintRecordedAllocation( - RecordedAllocationType::kTfLiteTensorArrayQuantizationData, - "TfLiteTensor quantization data", "allocations"); + RecordedAllocationType::kPersistentTfLiteTensorQuantizationData, + "Persistent TfLiteTensor quantization data", "allocations"); PrintRecordedAllocation( RecordedAllocationType::kTfLiteTensorVariableBufferData, "TfLiteTensor variable buffer data", "allocations"); @@ -108,42 +113,12 @@ void RecordingMicroAllocator::PrintRecordedAllocation( allocation.count, allocation_description); } -TfLiteStatus RecordingMicroAllocator::AllocateTfLiteTensorArray( - TfLiteContext* context, const SubGraph* subgraph) { - RecordedAllocation allocations = SnapshotAllocationUsage(); - - TfLiteStatus status = - MicroAllocator::AllocateTfLiteTensorArray(context, subgraph); - - RecordAllocationUsage(allocations, recorded_tflite_tensor_array_data_); - // The allocation for this recording will always be 1. This is because the - // parent class mallocs one large allocation for the number of tensors in the - // graph (e.g. sizeof(TfLiteTensor) * num_tensors). - // To prevent extra overhead and potential for fragmentation, manually adjust - // the accounting by decrementing by 1 and adding the actual number of tensors - // used in the graph: - recorded_tflite_tensor_array_data_.count += context->tensors_size - 1; - return status; -} - -TfLiteStatus RecordingMicroAllocator::PopulateTfLiteTensorArrayFromFlatbuffer( - const Model* model, TfLiteContext* context, const SubGraph* subgraph) { - RecordedAllocation allocations = SnapshotAllocationUsage(); - - TfLiteStatus status = MicroAllocator::PopulateTfLiteTensorArrayFromFlatbuffer( - model, context, subgraph); - - RecordAllocationUsage(allocations, - recorded_tflite_tensor_array_quantization_data_); - return status; -} - TfLiteStatus RecordingMicroAllocator::AllocateNodeAndRegistrations( - const SubGraph* subgraph, NodeAndRegistration** node_and_registrations) { + const Model* model, NodeAndRegistration** node_and_registrations) { RecordedAllocation allocations = SnapshotAllocationUsage(); TfLiteStatus status = MicroAllocator::AllocateNodeAndRegistrations( - subgraph, node_and_registrations); + model, node_and_registrations); RecordAllocationUsage(allocations, recorded_node_and_registration_array_data_); @@ -157,36 +132,79 @@ TfLiteStatus RecordingMicroAllocator::AllocateNodeAndRegistrations( // the accounting by decrementing by 1 and adding the actual number of nodes // used in the graph: recorded_node_and_registration_array_data_.count += - subgraph->operators()->size() - 1; + GetSubGraphFromModel(model)->operators()->size() - 1; return status; } TfLiteStatus RecordingMicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer( - const Model* model, const SubGraph* subgraph, - const MicroOpResolver& op_resolver, + const Model* model, const MicroOpResolver& op_resolver, NodeAndRegistration* node_and_registrations) { RecordedAllocation allocations = SnapshotAllocationUsage(); TfLiteStatus status = MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer( - model, subgraph, op_resolver, node_and_registrations); + model, op_resolver, node_and_registrations); RecordAllocationUsage(allocations, recorded_op_data_); return status; } -TfLiteStatus RecordingMicroAllocator::AllocateVariables( - TfLiteContext* context, const SubGraph* subgraph) { +TfLiteStatus RecordingMicroAllocator::AllocateTfLiteEvalTensors( + const Model* model, TfLiteEvalTensor** eval_tensors) { RecordedAllocation allocations = SnapshotAllocationUsage(); - TfLiteStatus status = MicroAllocator::AllocateVariables(context, subgraph); + TfLiteStatus status = + MicroAllocator::AllocateTfLiteEvalTensors(model, eval_tensors); + + RecordAllocationUsage(allocations, recorded_tflite_eval_tensor_data_); + // The allocation for this recording will always be 1. This is because the + // parent class mallocs one large allocation for the number of tensors in the + // graph (e.g. sizeof(TfLiteEvalTensor) * num_tensors). + // To prevent extra overhead and potential for fragmentation, manually adjust + // the accounting by decrementing by 1 and adding the actual number of tensors + // used in the graph: + recorded_tflite_eval_tensor_data_.count += + GetSubGraphFromModel(model)->tensors()->size() - 1; + return status; +} + +TfLiteStatus RecordingMicroAllocator::AllocateVariables( + const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors) { + RecordedAllocation allocations = SnapshotAllocationUsage(); + + TfLiteStatus status = + MicroAllocator::AllocateVariables(subgraph, eval_tensors); RecordAllocationUsage(allocations, recorded_tflite_tensor_variable_buffer_data_); return status; } +TfLiteTensor* RecordingMicroAllocator::AllocatePersistentTfLiteTensorInternal( + const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) { + RecordedAllocation allocations = SnapshotAllocationUsage(); + + TfLiteTensor* result = MicroAllocator::AllocatePersistentTfLiteTensorInternal( + model, eval_tensors, tensor_index); + + RecordAllocationUsage(allocations, recorded_persistent_tflite_tensor_data_); + return result; +} + +TfLiteStatus RecordingMicroAllocator::PopulateTfLiteTensorFromFlatbuffer( + const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor, + int tensor_index, bool allocate_temp) { + RecordedAllocation allocations = SnapshotAllocationUsage(); + + TfLiteStatus status = MicroAllocator::PopulateTfLiteTensorFromFlatbuffer( + model, subgraph, tensor, tensor_index, allocate_temp); + + RecordAllocationUsage(allocations, + recorded_persistent_tflite_tensor_quantization_data_); + return status; +} + RecordedAllocation RecordingMicroAllocator::SnapshotAllocationUsage() const { return {/*requested_bytes=*/recording_memory_allocator_->GetRequestedBytes(), /*used_bytes=*/recording_memory_allocator_->GetUsedBytes(), diff --git a/tensorflow/lite/micro/recording_micro_allocator.h b/tensorflow/lite/micro/recording_micro_allocator.h index a5b97c7ef3a..9243fec12e5 100644 --- a/tensorflow/lite/micro/recording_micro_allocator.h +++ b/tensorflow/lite/micro/recording_micro_allocator.h @@ -25,8 +25,9 @@ namespace tflite { // List of buckets currently recorded by this class. Each type keeps a list of // allocated information during model initialization. enum class RecordedAllocationType { - kTfLiteTensorArray, - kTfLiteTensorArrayQuantizationData, + kTfLiteEvalTensorData, + kPersistentTfLiteTensorData, + kPersistentTfLiteTensorQuantizationData, kTfLiteTensorVariableBufferData, kNodeAndRegistrationArray, kOpData, @@ -66,20 +67,29 @@ class RecordingMicroAllocator : public MicroAllocator { void PrintAllocations() const; protected: - TfLiteStatus AllocateTfLiteTensorArray(TfLiteContext* context, - const SubGraph* subgraph) override; - TfLiteStatus PopulateTfLiteTensorArrayFromFlatbuffer( - const Model* model, TfLiteContext* context, - const SubGraph* subgraph) override; TfLiteStatus AllocateNodeAndRegistrations( - const SubGraph* subgraph, + const Model* model, NodeAndRegistration** node_and_registrations) override; TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer( - const Model* model, const SubGraph* subgraph, - const MicroOpResolver& op_resolver, + const Model* model, const MicroOpResolver& op_resolver, NodeAndRegistration* node_and_registrations) override; - TfLiteStatus AllocateVariables(TfLiteContext* context, - const SubGraph* subgraph) override; + TfLiteStatus AllocateTfLiteEvalTensors( + const Model* model, TfLiteEvalTensor** eval_tensors) override; + TfLiteStatus AllocateVariables(const SubGraph* subgraph, + TfLiteEvalTensor* eval_tensors) override; + // TODO(b/160894903): Once all kernels have been updated to the new API drop + // this method. It is only used to record TfLiteTensor persistent allocations. + TfLiteTensor* AllocatePersistentTfLiteTensorInternal( + const Model* model, TfLiteEvalTensor* eval_tensors, + int tensor_index) override; + // TODO(b/160894903): Once all kernels have been updated to the new API drop + // this function since all allocations for quantized data will take place in + // the temp section. + TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(const Model* model, + const SubGraph* subgraph, + TfLiteTensor* tensor, + int tensor_index, + bool allocate_temp) override; private: RecordingMicroAllocator(RecordingSimpleMemoryAllocator* memory_allocator, @@ -95,8 +105,9 @@ class RecordingMicroAllocator : public MicroAllocator { const RecordingSimpleMemoryAllocator* recording_memory_allocator_; - RecordedAllocation recorded_tflite_tensor_array_data_ = {}; - RecordedAllocation recorded_tflite_tensor_array_quantization_data_ = {}; + RecordedAllocation recorded_tflite_eval_tensor_data_ = {}; + RecordedAllocation recorded_persistent_tflite_tensor_data_ = {}; + RecordedAllocation recorded_persistent_tflite_tensor_quantization_data_ = {}; RecordedAllocation recorded_tflite_tensor_variable_buffer_data_ = {}; RecordedAllocation recorded_node_and_registration_array_data_ = {}; RecordedAllocation recorded_op_data_ = {}; diff --git a/tensorflow/lite/micro/recording_micro_allocator_test.cc b/tensorflow/lite/micro/recording_micro_allocator_test.cc index a2aa49f2246..f46bd29abdd 100644 --- a/tensorflow/lite/micro/recording_micro_allocator_test.cc +++ b/tensorflow/lite/micro/recording_micro_allocator_test.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/lite/micro/testing/test_conv_model.h" #define TF_LITE_TENSOR_STRUCT_SIZE sizeof(TfLiteTensor) +#define TF_LITE_EVAL_TENSOR_STRUCT_SIZE sizeof(TfLiteEvalTensor) #define TF_LITE_AFFINE_QUANTIZATION_SIZE sizeof(TfLiteAffineQuantization) #define NODE_AND_REGISTRATION_STRUCT_SIZE sizeof(tflite::NodeAndRegistration) @@ -33,8 +34,8 @@ constexpr int kTestConvArenaSize = 1024 * 12; TF_LITE_MICRO_TESTS_BEGIN -TF_LITE_MICRO_TEST(TestRecordsTfLiteTensorArrayData) { - TfLiteContext context; +TF_LITE_MICRO_TEST(TestRecordsTfLiteEvalTensorArrayData) { + TfLiteEvalTensor* eval_tensors = nullptr; tflite::AllOpsResolver all_ops_resolver; tflite::NodeAndRegistration* node_and_registration; const tflite::Model* model = tflite::GetModel(kTestConvModelData); @@ -50,11 +51,11 @@ TF_LITE_MICRO_TEST(TestRecordsTfLiteTensorArrayData) { TfLiteStatus status; status = micro_allocator->StartModelAllocation( - model, &context, all_ops_resolver, &node_and_registration); + model, all_ops_resolver, &node_and_registration, &eval_tensors); TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); if (status != kTfLiteOk) return 1; - status = micro_allocator->FinishModelAllocation(model, &context); + status = micro_allocator->FinishModelAllocation(model, eval_tensors); TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); if (status != kTfLiteOk) return 1; @@ -62,76 +63,21 @@ TF_LITE_MICRO_TEST(TestRecordsTfLiteTensorArrayData) { tflite::RecordedAllocation recorded_allocation = micro_allocator->GetRecordedAllocation( - tflite::RecordedAllocationType::kTfLiteTensorArray); - TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.count, context.tensors_size); + tflite::RecordedAllocationType::kTfLiteEvalTensorData); + + micro_allocator->PrintAllocations(); + + size_t tensors_count = tflite::testing::GetModelTensorCount(model); + + TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.count, tensors_count); TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.requested_bytes, - context.tensors_size * TF_LITE_TENSOR_STRUCT_SIZE); + tensors_count * TF_LITE_EVAL_TENSOR_STRUCT_SIZE); TF_LITE_MICRO_EXPECT_GE(recorded_allocation.used_bytes, - context.tensors_size * TF_LITE_TENSOR_STRUCT_SIZE); -} - -TF_LITE_MICRO_TEST(TestRecordsTensorArrayQuantizationData) { - TfLiteContext context; - tflite::AllOpsResolver all_ops_resolver; - tflite::NodeAndRegistration* node_and_registration; - const tflite::Model* model = tflite::GetModel(kTestConvModelData); - uint8_t arena[kTestConvArenaSize]; - - tflite::RecordingMicroAllocator* micro_allocator = - tflite::RecordingMicroAllocator::Create(arena, kTestConvArenaSize, - micro_test::reporter); - TF_LITE_MICRO_EXPECT_NE(micro_allocator, nullptr); - if (micro_allocator == nullptr) return 1; - - TfLiteStatus status; - status = micro_allocator->StartModelAllocation( - model, &context, all_ops_resolver, &node_and_registration); - TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); - if (status != kTfLiteOk) return 1; - - status = micro_allocator->FinishModelAllocation(model, &context); - TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); - if (status != kTfLiteOk) return 1; - - // Walk the model subgraph to find all tensors with quantization params and - // keep a tally. - size_t quantized_tensor_count = 0; - size_t quantized_channel_bytes = 0; - for (size_t i = 0; i < context.tensors_size; ++i) { - const tflite::Tensor* cur_tensor = - model->subgraphs()->Get(0)->tensors()->Get(i); - const tflite::QuantizationParameters* quantization_params = - cur_tensor->quantization(); - if (quantization_params && quantization_params->scale() && - quantization_params->scale()->size() > 0 && - quantization_params->zero_point() && - quantization_params->zero_point()->size() > 0) { - quantized_tensor_count++; - size_t num_channels = quantization_params->scale()->size(); - quantized_channel_bytes += TfLiteIntArrayGetSizeInBytes(num_channels); - } - } - - // Calculate the expected allocation bytes with subgraph quantization data: - size_t expected_requested_bytes = - quantized_tensor_count * TF_LITE_AFFINE_QUANTIZATION_SIZE + - quantized_channel_bytes; - - tflite::RecordedAllocation recorded_allocation = - micro_allocator->GetRecordedAllocation( - tflite::RecordedAllocationType::kTfLiteTensorArrayQuantizationData); - - // Each quantized tensors has 2 mallocs (quant struct, zero point dimensions): - TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.count, - quantized_tensor_count * 2); - TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.requested_bytes, - expected_requested_bytes); - TF_LITE_MICRO_EXPECT_GE(recorded_allocation.used_bytes, - expected_requested_bytes); + tensors_count * TF_LITE_EVAL_TENSOR_STRUCT_SIZE); } TF_LITE_MICRO_TEST(TestRecordsNodeAndRegistrationArrayData) { - TfLiteContext context; + TfLiteEvalTensor* eval_tensors = nullptr; tflite::AllOpsResolver all_ops_resolver; tflite::NodeAndRegistration* node_and_registration; const tflite::Model* model = tflite::GetModel(kTestConvModelData); @@ -145,11 +91,11 @@ TF_LITE_MICRO_TEST(TestRecordsNodeAndRegistrationArrayData) { TfLiteStatus status; status = micro_allocator->StartModelAllocation( - model, &context, all_ops_resolver, &node_and_registration); + model, all_ops_resolver, &node_and_registration, &eval_tensors); TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); if (status != kTfLiteOk) return 1; - status = micro_allocator->FinishModelAllocation(model, &context); + status = micro_allocator->FinishModelAllocation(model, eval_tensors); TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); if (status != kTfLiteOk) return 1; @@ -165,7 +111,7 @@ TF_LITE_MICRO_TEST(TestRecordsNodeAndRegistrationArrayData) { } TF_LITE_MICRO_TEST(TestRecordsMultiTenantAllocations) { - TfLiteContext context; + TfLiteEvalTensor* eval_tensors = nullptr; tflite::AllOpsResolver all_ops_resolver; tflite::NodeAndRegistration* node_and_registration; const tflite::Model* model = tflite::GetModel(kTestConvModelData); @@ -183,34 +129,108 @@ TF_LITE_MICRO_TEST(TestRecordsMultiTenantAllocations) { // First allocation with the model in the arena: status = micro_allocator->StartModelAllocation( - model, &context, all_ops_resolver, &node_and_registration); + model, all_ops_resolver, &node_and_registration, &eval_tensors); TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); if (status != kTfLiteOk) return 1; - status = micro_allocator->FinishModelAllocation(model, &context); + status = micro_allocator->FinishModelAllocation(model, eval_tensors); TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); if (status != kTfLiteOk) return 1; // Second allocation with the same model in the arena: status = micro_allocator->StartModelAllocation( - model, &context, all_ops_resolver, &node_and_registration); + model, all_ops_resolver, &node_and_registration, &eval_tensors); TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); if (status != kTfLiteOk) return 1; - status = kTfLiteOk, micro_allocator->FinishModelAllocation(model, &context); + status = kTfLiteOk, + micro_allocator->FinishModelAllocation(model, eval_tensors); TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); if (status != kTfLiteOk) return 1; + size_t tensors_count = tflite::testing::GetModelTensorCount(model); + tflite::RecordedAllocation recorded_allocation = micro_allocator->GetRecordedAllocation( - tflite::RecordedAllocationType::kTfLiteTensorArray); - TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.count, context.tensors_size * 2); - TF_LITE_MICRO_EXPECT_EQ( - recorded_allocation.requested_bytes, - context.tensors_size * TF_LITE_TENSOR_STRUCT_SIZE * 2); - TF_LITE_MICRO_EXPECT_GE( - recorded_allocation.used_bytes, - context.tensors_size * TF_LITE_TENSOR_STRUCT_SIZE * 2); + tflite::RecordedAllocationType::kTfLiteEvalTensorData); + TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.count, tensors_count * 2); + TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.requested_bytes, + tensors_count * TF_LITE_EVAL_TENSOR_STRUCT_SIZE * 2); + TF_LITE_MICRO_EXPECT_GE(recorded_allocation.used_bytes, + tensors_count * TF_LITE_EVAL_TENSOR_STRUCT_SIZE * 2); +} + +TF_LITE_MICRO_TEST(TestRecordsPersistentTfLiteTensorData) { + const tflite::Model* model = tflite::GetModel(kTestConvModelData); + uint8_t arena[kTestConvArenaSize]; + + tflite::RecordingMicroAllocator* micro_allocator = + tflite::RecordingMicroAllocator::Create(arena, kTestConvArenaSize, + micro_test::reporter); + TF_LITE_MICRO_EXPECT_NE(micro_allocator, nullptr); + if (micro_allocator == nullptr) return 1; + + TfLiteTensor* tensor = micro_allocator->AllocatePersistentTfLiteTensor( + model, /*eval_tensors=*/nullptr, 0); + TF_LITE_MICRO_EXPECT_NE(tensor, nullptr); + if (tensor == nullptr) return 1; + + tflite::RecordedAllocation recorded_allocation = + micro_allocator->GetRecordedAllocation( + tflite::RecordedAllocationType::kPersistentTfLiteTensorData); + + TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.count, static_cast(1)); + TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.requested_bytes, + TF_LITE_TENSOR_STRUCT_SIZE); + TF_LITE_MICRO_EXPECT_GE(recorded_allocation.used_bytes, + TF_LITE_TENSOR_STRUCT_SIZE); +} + +TF_LITE_MICRO_TEST(TestRecordsPersistentTfLiteTensorQuantizationData) { + const tflite::Model* model = tflite::GetModel(kTestConvModelData); + uint8_t arena[kTestConvArenaSize]; + + tflite::RecordingMicroAllocator* micro_allocator = + tflite::RecordingMicroAllocator::Create(arena, kTestConvArenaSize, + micro_test::reporter); + TF_LITE_MICRO_EXPECT_NE(micro_allocator, nullptr); + if (micro_allocator == nullptr) return 1; + + TfLiteTensor* tensor = micro_allocator->AllocatePersistentTfLiteTensor( + model, /*eval_tensors=*/nullptr, 0); + TF_LITE_MICRO_EXPECT_NE(tensor, nullptr); + if (tensor == nullptr) return 1; + + // Walk the model subgraph to find all tensors with quantization params and + // keep a tally. + size_t quantized_channel_bytes = 0; + const tflite::Tensor* cur_tensor = + model->subgraphs()->Get(0)->tensors()->Get(0); + const tflite::QuantizationParameters* quantization_params = + cur_tensor->quantization(); + if (quantization_params && quantization_params->scale() && + quantization_params->scale()->size() > 0 && + quantization_params->zero_point() && + quantization_params->zero_point()->size() > 0) { + size_t num_channels = quantization_params->scale()->size(); + quantized_channel_bytes += TfLiteIntArrayGetSizeInBytes(num_channels); + } + + // Calculate the expected allocation bytes with subgraph quantization data: + size_t expected_requested_bytes = + TF_LITE_AFFINE_QUANTIZATION_SIZE + quantized_channel_bytes; + + tflite::RecordedAllocation recorded_allocation = + micro_allocator->GetRecordedAllocation( + tflite::RecordedAllocationType:: + kPersistentTfLiteTensorQuantizationData); + + // Each quantized tensors has 2 mallocs (quant struct, zero point dimensions): + TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.count, static_cast(2)); + TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.requested_bytes, + expected_requested_bytes); + TF_LITE_MICRO_EXPECT_GE(recorded_allocation.used_bytes, + expected_requested_bytes); } // TODO(b/158124094): Find a way to audit OpData allocations on diff --git a/tensorflow/lite/micro/test_helpers.cc b/tensorflow/lite/micro/test_helpers.cc index 6551683bfb2..5df5b34deaf 100644 --- a/tensorflow/lite/micro/test_helpers.cc +++ b/tensorflow/lite/micro/test_helpers.cc @@ -996,5 +996,13 @@ TfLiteTensor CreateSymmetricPerChannelQuantizedTensor( return result; } +size_t GetModelTensorCount(const Model* model) { + auto* subgraphs = model->subgraphs(); + if (subgraphs) { + return (*subgraphs)[0]->tensors()->size(); + } + return 0; +} + } // namespace testing } // namespace tflite diff --git a/tensorflow/lite/micro/test_helpers.h b/tensorflow/lite/micro/test_helpers.h index c2b489314d0..8941e394587 100644 --- a/tensorflow/lite/micro/test_helpers.h +++ b/tensorflow/lite/micro/test_helpers.h @@ -177,6 +177,9 @@ TfLiteTensor CreateSymmetricPerChannelQuantizedTensor( int* zero_points, TfLiteAffineQuantization* affine_quant, int quantized_dimension, bool is_variable = false); +// Returns the number of tensors in the default subgraph for a tflite::Model. +size_t GetModelTensorCount(const Model* model); + } // namespace testing } // namespace tflite From a5bbca201e40a3f91038d02257b8e11a6b4f79bf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Jul 2020 14:54:45 -0700 Subject: [PATCH 0865/2522] Update TPUUpdateEmbeddingEnqueueInput pass to create mode constants rather than depend on inputs from SelectV2 op. SelectV2 op may be constant folded away if the conditional value is from a const op. As so, create mode constant ("train" or "inference") based on presence of gradient op. PiperOrigin-RevId: 322231147 Change-Id: I6965dd9ca95566b4eaad6977f5c5e46563db03f6 --- ...pu_update_embedding_enqueue_op_inputs.mlir | 18 +++--- .../tpu_update_embedding_enqueue_op_inputs.cc | 56 +++++++++++++------ 2 files changed, 47 insertions(+), 27 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_update_embedding_enqueue_op_inputs.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_update_embedding_enqueue_op_inputs.mlir index 09e701e5dd3..47374b7f7d4 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_update_embedding_enqueue_op_inputs.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_update_embedding_enqueue_op_inputs.mlir @@ -9,16 +9,15 @@ // CHECK-SAME: %[[ARG_5:[a-z0-9]*]]: tensor // CHECK-SAME: %[[ARG_6:[a-z0-9]*]]: tensor // CHECK-SAME: %[[ARG_7:[a-z0-9]*]]: tensor -// CHECK-SAME: %[[ARG_8:[a-z0-9]*]]: tensor func @check_enqueue_ops_update_for_eval(%arg0: tensor, %arg1: tensor, %arg2 :tensor, %arg3: tensor, %arg4: tensor, %arg5: tensor, - %arg6: tensor, %arg7: tensor, %arg8: tensor) -> () { + %arg6: tensor, %arg7: tensor) -> () { // CHECK: %[[CONST_0:[a-z0-9]*]] = "tf.Const"() %0 = "tf.Const"() {value = dense<[]> : tensor<0xf32>} : () -> tensor<0xf32> - %1 = "tf.SelectV2"(%arg8, %arg6, %arg7) : (tensor, tensor, tensor) -> tensor - // CHECK: "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%[[ARG_0]], %[[ARG_1]], %[[ARG_2]], %[[ARG_3]], %[[ARG_4]], %[[ARG_5]], %[[CONST_0]], %[[CONST_0]], %[[CONST_0]], %[[ARG_7]]) - "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %0, %0, %0, %1) {_tpu_embedding_layer = "call1", _xla_outside_compilation = "0", combiners = ["mean", "sum"], device_ordinal = -1 : i64, max_sequence_lengths = [0, 0, 0], table_ids = [1, 1, 0]} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor<0xf32>, tensor<0xf32>, tensor<0xf32>, tensor) -> () + // CHECK: %[[CONST_MODE:[a-z0-9]*]] = "tf.Const"() {_xla_outside_compilation = "0", value = dense<"inference"> : tensor} : () -> tensor + // CHECK: "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%[[ARG_0]], %[[ARG_1]], %[[ARG_2]], %[[ARG_3]], %[[ARG_4]], %[[ARG_5]], %[[CONST_0]], %[[CONST_0]], %[[CONST_0]], %[[CONST_MODE]]) + "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %0, %0, %0, %arg7) {_tpu_embedding_layer = "call1", _xla_outside_compilation = "0", combiners = ["mean", "sum"], device_ordinal = -1 : i64, max_sequence_lengths = [0, 0, 0], table_ids = [1, 1, 0]} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor<0xf32>, tensor<0xf32>, tensor<0xf32>, tensor) -> () %2:2 = "tf.RecvTPUEmbeddingActivations"() {_tpu_embedding_layer = "call1", config = "\0A\0B\0C\0D"} : () -> (tensor<2x2xf32>, tensor<4x4xf32>) return } @@ -34,20 +33,19 @@ func @check_enqueue_ops_update_for_eval(%arg0: tensor, %arg1: tensor // CHECK-SAME: %[[ARG_6:[a-z0-9]*]]: tensor // CHECK-SAME: %[[ARG_7:[a-z0-9]*]]: tensor -// CHECK-SAME: %[[ARG_8:[a-z0-9]*]]: tensor func @check_enqueue_ops_update_for_training(%arg0: tensor, %arg1: tensor, %arg2 :tensor, %arg3: tensor, %arg4: tensor, %arg5: tensor, - %arg6: tensor, %arg7: tensor, %arg8: tensor) -> () { + %arg6: tensor, %arg7: tensor) -> () { // CHECK: %[[CONST_0:[a-z0-9]*]] = "tf.Const"() %0 = "tf.Const"() {value = dense<[]> : tensor<0xf32>} : () -> tensor<0xf32> - %1 = "tf.SelectV2"(%arg8, %arg6, %arg7) : (tensor, tensor, tensor) -> tensor %2 = "tf.Const"() {value = dense<0.0> : tensor<2x2xf32>} : () -> tensor<2x2xf32> %3 = "tf.Const"() {value = dense<0.0> : tensor<4x4xf32>} : () -> tensor<4x4xf32> "tf.SendTPUEmbeddingGradients"(%2, %3) {_tpu_embedding_layer = "call1", config = "\0A\0B\0C\0D", operand_segment_sizes = dense<[2, 0]> : vector<2xi32>} : (tensor<2x2xf32>, tensor<4x4xf32>) -> () - // CHECK: "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%[[ARG_0]], %[[ARG_1]], %[[ARG_2]], %[[ARG_3]], %[[ARG_4]], %[[ARG_5]], %[[CONST_0]], %[[CONST_0]], %[[CONST_0]], %[[ARG_6]]) - "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %0, %0, %0, %1) {_tpu_embedding_layer = "call1", _xla_outside_compilation = "0", combiners = ["mean", "sum"], device_ordinal = -1 : i64, max_sequence_lengths = [0, 0, 0], table_ids = [1, 1, 0]} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor<0xf32>, tensor<0xf32>, tensor<0xf32>, tensor) -> () + // CHECK: %[[CONST_MODE:[a-z0-9]*]] = "tf.Const"() {_xla_outside_compilation = "0", value = dense<"train"> : tensor} : () -> tensor + // CHECK: "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%[[ARG_0]], %[[ARG_1]], %[[ARG_2]], %[[ARG_3]], %[[ARG_4]], %[[ARG_5]], %[[CONST_0]], %[[CONST_0]], %[[CONST_0]], %[[CONST_MODE]]) + "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %0, %0, %0, %arg7) {_tpu_embedding_layer = "call1", _xla_outside_compilation = "0", combiners = ["mean", "sum"], device_ordinal = -1 : i64, max_sequence_lengths = [0, 0, 0], table_ids = [1, 1, 0]} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor<0xf32>, tensor<0xf32>, tensor<0xf32>, tensor) -> () %4:2 = "tf.RecvTPUEmbeddingActivations"() {_tpu_embedding_layer = "call1", config = "\0A\0B\0C\0D"} : () -> (tensor<2x2xf32>, tensor<4x4xf32>) return } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_update_embedding_enqueue_op_inputs.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_update_embedding_enqueue_op_inputs.cc index 820dec02b90..6cd9f763b87 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_update_embedding_enqueue_op_inputs.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_update_embedding_enqueue_op_inputs.cc @@ -13,24 +13,29 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/Casting.h" #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Block.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project #include "mlir/IR/Value.h" // from @llvm-project #include "mlir/IR/Visitors.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Pass/PassRegistry.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" namespace mlir { namespace TFTPU { namespace { +constexpr char kXlaOutsideCompilationAttr[] = "_xla_outside_compilation"; constexpr char kTPUEmbeddingAttr[] = "_tpu_embedding_layer"; struct TPUUpdateEmbeddingEnqueueOpInputs @@ -86,7 +91,8 @@ LogicalResult FindTPUEmbeddingOps( LogicalResult UpdateEmbeddingEnqueueOpInput( const llvm::StringMap& enqueue_op_map, const llvm::StringMap& recv_activation_op_map, - const llvm::StringMap& send_gradient_op_map) { + const llvm::StringMap& send_gradient_op_map, + OpBuilder* builder) { for (const auto& it : enqueue_op_map) { const auto& embedding_attr = it.getKey(); Operation* embedding_op = it.second; @@ -96,21 +102,36 @@ LogicalResult UpdateEmbeddingEnqueueOpInput( << TF::RecvTPUEmbeddingActivationsOp::getOperationName() << "' op"; // TPU Embedding enqueue ops take different inputs depending on whether - // graph is in training mode or in eval/prediction mode. The inputs to the - // enqueue ops are present/listed as operands to SelectV2 op. Then branch - // operand of the SelectV2 op represents input to take during training - // and else branch operand represents input to take during - // prediction/evaluation. If SendTPUEmbeddingGradients op exists in the - // graph, then graph is in training mode, so correctly forward the input - // of SelectV2 op as operand to the TPU embedding enqueue op. + // graph is in training mode or in eval/prediction mode. During training, + // the mode parameter for TPUEmbeddingEnqueue op must be `train` and for + // evaluation or prediction, mode must be set to `inference`. + // If SendTPUEmbeddingGradients op exists in the graph, then graph is + // in training mode, so create a const op with value `train` use the + // output value of the constant as an operand to the TPU embedding + // enqueue op. bool is_training = send_gradient_op_map.count(embedding_attr); - for (auto enqueue_operand : embedding_op->getOperands()) { - if (auto select = llvm::dyn_cast_or_null( - enqueue_operand.getDefiningOp())) { - enqueue_operand.replaceAllUsesWith(is_training ? select.t() - : select.e()); - } - } + + // The last operand of TPUEmbeddingEnqueue ops is the mode which + // represents whether graph is in training mode or in evaluation mode. + auto& mode_enqueue_operand = + embedding_op->getOpOperand(embedding_op->getNumOperands() - 1); + + llvm::SmallVector mode_string_value; + mode_string_value.emplace_back(is_training ? "train" : "inference"); + builder->setInsertionPoint(embedding_op); + auto enqueue_mode = builder->create( + embedding_op->getLoc(), + DenseStringElementsAttr::get( + RankedTensorType::get({}, builder->getType()), + mode_string_value)); + + auto outside_compilation_attr = + embedding_op->getAttrOfType(kXlaOutsideCompilationAttr); + if (outside_compilation_attr) + enqueue_mode.setAttr(kXlaOutsideCompilationAttr, + outside_compilation_attr); + + mode_enqueue_operand.set(enqueue_mode); } return success(); @@ -140,8 +161,9 @@ void TPUUpdateEmbeddingEnqueueOpInputs::runOnFunction() { return signalPassFailure(); } - if (failed(UpdateEmbeddingEnqueueOpInput( - enqueue_op_map, recv_activation_op_map, send_gradient_op_map))) + if (failed(UpdateEmbeddingEnqueueOpInput(enqueue_op_map, + recv_activation_op_map, + send_gradient_op_map, &builder))) return signalPassFailure(); } From 886d44f5660a541efa9316dea995e3de6b4cb2d8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Jul 2020 15:02:14 -0700 Subject: [PATCH 0866/2522] Fixes GitHub 34283 PiperOrigin-RevId: 322232817 Change-Id: I6940a3f452ec6e6974edcbb82420e73c0e8ca36b --- tensorflow/python/keras/layers/dense_attention.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/layers/dense_attention.py b/tensorflow/python/keras/layers/dense_attention.py index d9a087ccb38..494f1c1c464 100644 --- a/tensorflow/python/keras/layers/dense_attention.py +++ b/tensorflow/python/keras/layers/dense_attention.py @@ -266,7 +266,7 @@ class Attention(BaseDenseAttention): value_input = tf.keras.Input(shape=(None,), dtype='int32') # Embedding lookup. - token_embedding = tf.keras.layers.Embedding(max_tokens, dimension) + token_embedding = tf.keras.layers.Embedding(input_dim=1000, output_dim=64) # Query embeddings of shape [batch_size, Tq, dimension]. query_embeddings = token_embedding(query_input) # Value embeddings of shape [batch_size, Tv, dimension]. From 1392800b57e7797c18113e7ed9ac5baa55a7eee2 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Mon, 20 Jul 2020 22:09:45 +0000 Subject: [PATCH 0867/2522] api def TensorMapHasKey --- .../core/api_def/base_api/api_def_TensorMapHasKey.pbtxt | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 tensorflow/core/api_def/base_api/api_def_TensorMapHasKey.pbtxt diff --git a/tensorflow/core/api_def/base_api/api_def_TensorMapHasKey.pbtxt b/tensorflow/core/api_def/base_api/api_def_TensorMapHasKey.pbtxt new file mode 100644 index 00000000000..fc46a3abfd9 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_TensorMapHasKey.pbtxt @@ -0,0 +1,9 @@ +op { + graph_op_name: "TensorMapHasKey" + summary: "Returns whether the given key exists in the map." + description: < Date: Mon, 20 Jul 2020 15:03:46 -0700 Subject: [PATCH 0868/2522] Internal change PiperOrigin-RevId: 322233237 Change-Id: Ia0dc803f65dcc1f8304c0fd61a2a47f325b4fd53 --- tensorflow/core/kernels/cwise_op_exp.cc | 4 ++-- tensorflow/python/kernel_tests/cwise_ops_unary_test.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/kernels/cwise_op_exp.cc b/tensorflow/core/kernels/cwise_op_exp.cc index 48b6823cbdc..2b157f0e7a9 100644 --- a/tensorflow/core/kernels/cwise_op_exp.cc +++ b/tensorflow/core/kernels/cwise_op_exp.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER6(UnaryOp, CPU, "Exp", functor::exp, float, Eigen::half, double, - bfloat16, complex64, complex128); +REGISTER5(UnaryOp, CPU, "Exp", functor::exp, float, Eigen::half, double, + complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER5(UnaryOp, GPU, "Exp", functor::exp, float, Eigen::half, double, diff --git a/tensorflow/python/kernel_tests/cwise_ops_unary_test.py b/tensorflow/python/kernel_tests/cwise_ops_unary_test.py index 368f3509dc6..df848a653d4 100644 --- a/tensorflow/python/kernel_tests/cwise_ops_unary_test.py +++ b/tensorflow/python/kernel_tests/cwise_ops_unary_test.py @@ -389,7 +389,6 @@ class UnaryOpTest(test.TestCase): 2).reshape(1, 3, 2).astype(dtypes_lib.bfloat16.as_numpy_dtype) self._compareCpu(x, np.abs, math_ops.abs) self._compareCpu(x, np.abs, _ABS) - self._compareCpu(x, np.exp, math_ops.exp) self._compareBoth(x, np.negative, math_ops.negative) self._compareBoth(x, np.negative, _NEG) From cf3a2e3c5daf18b19dae42fae53c1d60649fae52 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Mon, 20 Jul 2020 15:06:01 -0700 Subject: [PATCH 0869/2522] Remove the init_ops{_v2}.Initializer instance check in keras. PiperOrigin-RevId: 322233727 Change-Id: I6150bdd225be5d52c56995ee64a833986246477d --- tensorflow/python/keras/engine/BUILD | 2 -- tensorflow/python/keras/engine/base_layer_utils.py | 7 ++----- tensorflow/python/keras/layers/kernelized.py | 4 ++-- tensorflow/python/keras/layers/kernelized_test.py | 3 ++- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/keras/engine/BUILD b/tensorflow/python/keras/engine/BUILD index 5f65923e6fe..c71069b3657 100644 --- a/tensorflow/python/keras/engine/BUILD +++ b/tensorflow/python/keras/engine/BUILD @@ -90,8 +90,6 @@ py_library( "//tensorflow/python:control_flow_v2_func_graphs", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", - "//tensorflow/python:init_ops", - "//tensorflow/python:init_ops_v2", "//tensorflow/python:tf2", "//tensorflow/python:util", "//tensorflow/python:variables", diff --git a/tensorflow/python/keras/engine/base_layer_utils.py b/tensorflow/python/keras/engine/base_layer_utils.py index 4734fbb6ba9..597072e27f0 100644 --- a/tensorflow/python/keras/engine/base_layer_utils.py +++ b/tensorflow/python/keras/engine/base_layer_utils.py @@ -33,12 +33,11 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_util from tensorflow.python.ops import control_flow_util_v2 from tensorflow.python.ops import control_flow_v2_func_graphs -from tensorflow.python.ops import init_ops -from tensorflow.python.ops import init_ops_v2 from tensorflow.python.ops import variables as tf_variables from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.training.tracking import base as tracking from tensorflow.python.util import nest +from tensorflow.python.util import tf_inspect from tensorflow.python.util.tf_export import keras_export _call_context = threading.local() @@ -118,9 +117,7 @@ def make_variable(name, variable_dtype = None else: # Instantiate initializer if provided initializer is a type object. - if isinstance( - initializer, - (type(init_ops.Initializer), type(init_ops_v2.Initializer))): + if tf_inspect.isclass(initializer): initializer = initializer() init_val = functools.partial(initializer, shape, dtype=dtype) variable_dtype = dtype.base_dtype diff --git a/tensorflow/python/keras/layers/kernelized.py b/tensorflow/python/keras/layers/kernelized.py index 5f401899bec..eac985e63bf 100644 --- a/tensorflow/python/keras/layers/kernelized.py +++ b/tensorflow/python/keras/layers/kernelized.py @@ -236,8 +236,8 @@ class RandomFourierFeatures(base_layer.Layer): def get_config(self): kernel_initializer = self.kernel_initializer - if isinstance(self.kernel_initializer, init_ops.Initializer): - kernel_initializer = initializers.serialize(self.kernel_initializer) + if not isinstance(kernel_initializer, six.string_types): + kernel_initializer = initializers.serialize(kernel_initializer) config = { 'output_dim': self.output_dim, 'kernel_initializer': kernel_initializer, diff --git a/tensorflow/python/keras/layers/kernelized_test.py b/tensorflow/python/keras/layers/kernelized_test.py index 3c836f1ccde..8ae3b2f31cb 100644 --- a/tensorflow/python/keras/layers/kernelized_test.py +++ b/tensorflow/python/keras/layers/kernelized_test.py @@ -25,6 +25,7 @@ import shutil from absl.testing import parameterized import numpy as np +import six from tensorflow.python.eager import context from tensorflow.python.framework import constant_op @@ -226,7 +227,7 @@ class RandomFourierFeaturesTest(test.TestCase, parameterized.TestCase): name='random_fourier_features', ) expected_initializer = initializer - if isinstance(initializer, init_ops.Initializer): + if not isinstance(initializer, six.string_types): expected_initializer = initializers.serialize(initializer) expected_dtype = ( From 5cb1025a172c23f2fc60d9396aa62ba8c2d13669 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 20 Jul 2020 15:06:53 -0700 Subject: [PATCH 0870/2522] Introduce initialize_platform parameter so that platforms can optionally be returned without forced initialization PiperOrigin-RevId: 322233924 Change-Id: I70a7d44887544d5b3030f4938d8d7fb0efe72bce --- .../stream_executor/multi_platform_manager.cc | 34 +++++++++++++++++-- .../stream_executor/multi_platform_manager.h | 7 ++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/tensorflow/stream_executor/multi_platform_manager.cc b/tensorflow/stream_executor/multi_platform_manager.cc index 64543a8ae4d..6c767d1d66e 100644 --- a/tensorflow/stream_executor/multi_platform_manager.cc +++ b/tensorflow/stream_executor/multi_platform_manager.cc @@ -39,6 +39,14 @@ class MultiPlatformManagerImpl { port::StatusOr PlatformWithId(const Platform::Id& id) TF_LOCKS_EXCLUDED(mu_); + port::StatusOr PlatformWithName(absl::string_view target, + bool initialize_platform) + TF_LOCKS_EXCLUDED(mu_); + + port::StatusOr PlatformWithId(const Platform::Id& id, + bool initialize_platform) + TF_LOCKS_EXCLUDED(mu_); + port::StatusOr InitializePlatformWithName( absl::string_view target, const std::map& options) TF_LOCKS_EXCLUDED(mu_); @@ -104,10 +112,20 @@ port::Status MultiPlatformManagerImpl::RegisterPlatform( port::StatusOr MultiPlatformManagerImpl::PlatformWithName( absl::string_view target) { + return PlatformWithName(target, /*initialize_platform=*/true); +} + +port::StatusOr MultiPlatformManagerImpl::PlatformWithId( + const Platform::Id& id) { + return PlatformWithId(id, /*initialize_platform=*/true); +} + +port::StatusOr MultiPlatformManagerImpl::PlatformWithName( + absl::string_view target, bool initialize_platform) { absl::MutexLock lock(&mu_); SE_ASSIGN_OR_RETURN(Platform * platform, LookupByNameLocked(target)); - if (!platform->Initialized()) { + if (initialize_platform && !platform->Initialized()) { SE_RETURN_IF_ERROR(platform->Initialize({})); } @@ -115,11 +133,11 @@ port::StatusOr MultiPlatformManagerImpl::PlatformWithName( } port::StatusOr MultiPlatformManagerImpl::PlatformWithId( - const Platform::Id& id) { + const Platform::Id& id, bool initialize_platform) { absl::MutexLock lock(&mu_); SE_ASSIGN_OR_RETURN(Platform * platform, LookupByIdLocked(id)); - if (!platform->Initialized()) { + if (initialize_platform && !platform->Initialized()) { SE_RETURN_IF_ERROR(platform->Initialize({})); } @@ -250,6 +268,16 @@ MultiPlatformManagerImpl& Impl() { return Impl().PlatformWithId(id); } +/*static*/ port::StatusOr MultiPlatformManager::PlatformWithId( + const Platform::Id& id, bool initialize_platform) { + return Impl().PlatformWithId(id, initialize_platform); +} + +/*static*/ port::StatusOr MultiPlatformManager::PlatformWithName( + absl::string_view target, bool initialize_platform) { + return Impl().PlatformWithName(target, initialize_platform); +} + /*static*/ port::StatusOr MultiPlatformManager::InitializePlatformWithName( absl::string_view target, diff --git a/tensorflow/stream_executor/multi_platform_manager.h b/tensorflow/stream_executor/multi_platform_manager.h index 556015de790..fbb6effdf83 100644 --- a/tensorflow/stream_executor/multi_platform_manager.h +++ b/tensorflow/stream_executor/multi_platform_manager.h @@ -100,6 +100,13 @@ class MultiPlatformManager { static port::StatusOr PlatformWithName(absl::string_view target); static port::StatusOr PlatformWithId(const Platform::Id& id); + // Same functions as above, but allows platforms to be returned without + // initialization if initialize_platform == false. + static port::StatusOr PlatformWithName(absl::string_view target, + bool initialize_platform); + static port::StatusOr PlatformWithId(const Platform::Id& id, + bool initialize_platform); + // Retrieves the platform registered with the given platform name (e.g. // "CUDA", "OpenCL", ...) or id (an opaque, comparable value provided by the // Platform's Id() method). From e8f53d2144d9b50ee19563b9d3c67d0f0ead30c6 Mon Sep 17 00:00:00 2001 From: Haifeng Jin Date: Mon, 20 Jul 2020 15:08:44 -0700 Subject: [PATCH 0871/2522] export OperatorNotAllowedInGraphError to public for tf.keras to use public API of TensorFlow only. The change won't break any existing usages. PiperOrigin-RevId: 322234301 Change-Id: I7b09e7ce7afa293590f09adbd69d38b59afa6175 --- tensorflow/python/framework/errors_impl.py | 6 ++++++ ...errors.-operator-not-allowed-in-graph-error.pbtxt | 12 ++++++++++++ .../tools/api/golden/v2/tensorflow.errors.pbtxt | 4 ++++ 3 files changed, 22 insertions(+) create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.errors.-operator-not-allowed-in-graph-error.pbtxt diff --git a/tensorflow/python/framework/errors_impl.py b/tensorflow/python/framework/errors_impl.py index 48ed060556d..34daf43372a 100644 --- a/tensorflow/python/framework/errors_impl.py +++ b/tensorflow/python/framework/errors_impl.py @@ -48,7 +48,13 @@ class InaccessibleTensorError(ValueError): pass +@tf_export("errors.OperatorNotAllowedInGraphError", v1=[]) class OperatorNotAllowedInGraphError(TypeError): + """An error is raised for unsupported operator in Graph execution. + + For example, using a `tf.Tensor` as a Python `bool` in Graph execution + is not allowed. + """ pass diff --git a/tensorflow/tools/api/golden/v2/tensorflow.errors.-operator-not-allowed-in-graph-error.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.errors.-operator-not-allowed-in-graph-error.pbtxt new file mode 100644 index 00000000000..29478c395a5 --- /dev/null +++ b/tensorflow/tools/api/golden/v2/tensorflow.errors.-operator-not-allowed-in-graph-error.pbtxt @@ -0,0 +1,12 @@ +path: "tensorflow.errors.OperatorNotAllowedInGraphError" +tf_class { + is_instance: "" + is_instance: "" + member { + name: "args" + mtype: "" + } + member_method { + name: "__init__" + } +} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.errors.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.errors.pbtxt index 0a9ef10ef90..4933f33f30d 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.errors.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.errors.pbtxt @@ -84,6 +84,10 @@ tf_module { name: "OpError" mtype: "" } + member { + name: "OperatorNotAllowedInGraphError" + mtype: "" + } member { name: "OutOfRangeError" mtype: "" From b0e577259ac9e4e45fce65392b93dbb5c8d9780a Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Mon, 20 Jul 2020 15:11:59 -0700 Subject: [PATCH 0872/2522] Add missing #include "third_party/absl/types/optional.h" PiperOrigin-RevId: 322234958 Change-Id: If0de8c153503386120f6273064deb6119222cb2c --- tensorflow/c/eager/BUILD | 1 + tensorflow/c/eager/immediate_execution_operation.h | 1 + 2 files changed, 2 insertions(+) diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index 0f728f1ebc3..fffd22f2698 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -309,6 +309,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/util:abstract_stack_trace", + "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", ], ) diff --git a/tensorflow/c/eager/immediate_execution_operation.h b/tensorflow/c/eager/immediate_execution_operation.h index 8e22fb2d8b5..f599da6dadc 100644 --- a/tensorflow/c/eager/immediate_execution_operation.h +++ b/tensorflow/c/eager/immediate_execution_operation.h @@ -17,6 +17,7 @@ limitations under the License. #include +#include "absl/types/optional.h" #include "absl/types/span.h" #include "tensorflow/c/eager/abstract_operation.h" #include "tensorflow/c/eager/immediate_execution_tensor_handle.h" From de6c391468d031cc40f6a1bc2421a1f76ce99bbb Mon Sep 17 00:00:00 2001 From: Karim Nosir Date: Mon, 20 Jul 2020 15:16:09 -0700 Subject: [PATCH 0873/2522] NFC: Name patterns in optimize pass in TFL. PiperOrigin-RevId: 322235744 Change-Id: I55a0a6a7602bb8d34a3d254e55748f6ad4130b1c --- .../mlir/lite/transforms/optimize_patterns.td | 351 +++++++++--------- 1 file changed, 179 insertions(+), 172 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td index 9746d9df1ea..0533e810377 100644 --- a/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td +++ b/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td @@ -37,22 +37,19 @@ class HasRankAtMost : Constraint< // Multi-pattern consisting of matching stand-alone convolution op followed by // activation op. multiclass FuseActFnIntoConvOpPat { - def : Pat<(ActFnOp (TFL_Conv2DOp:$conv_out $input, $filter, $bias, - $h_factor, $w_factor, TFL_AF_None, - $padding, $stride_h, $stride_w)), - (TFL_Conv2DOp $input, $filter, $bias, - $h_factor, $w_factor, ActFnAttr, - $padding, $stride_h, $stride_w), - [(HasOneUse $conv_out)]>; - def : Pat<(ActFnOp (TFL_DepthwiseConv2DOp:$conv_out $input, $filter, $bias, - $h_factor, $w_factor, TFL_AF_None, - $padding, $stride_h, $stride_w, - $multiplier)), - (TFL_DepthwiseConv2DOp $input, $filter, $bias, - $h_factor, $w_factor, ActFnAttr, - $padding, $stride_h, $stride_w, - $multiplier), - [(HasOneUse $conv_out)]>; + def FuseActivationFuncWithConv#ActFnOp#ActFnAttr : Pat< + (ActFnOp (TFL_Conv2DOp:$conv_out $input, $filter, $bias, $h_factor, + $w_factor, TFL_AF_None, $padding, $stride_h, $stride_w)), + (TFL_Conv2DOp $input, $filter, $bias, $h_factor, $w_factor, ActFnAttr, + $padding, $stride_h, $stride_w), + [(HasOneUse $conv_out)]>; + def FuseActivationFuncWithDepthwiseConv#ActFnOp#ActFnAttr : Pat< + (ActFnOp (TFL_DepthwiseConv2DOp:$conv_out $input, $filter, $bias, $h_factor, + $w_factor, TFL_AF_None, $padding, $stride_h, $stride_w, + $multiplier)), + (TFL_DepthwiseConv2DOp $input, $filter, $bias, $h_factor, $w_factor, + ActFnAttr, $padding, $stride_h, $stride_w, $multiplier), + [(HasOneUse $conv_out)]>; } // TODO(hinsu): Also fuse ops corresponding to SIGN_BIT fused @@ -73,33 +70,29 @@ class CanFuseConvOrDepthwiseConv : Constraint< // constant folding the bias and the binary op's constant operand. The following // pattern restricts to float constant values for now. multiclass FuseBinaryOpToPrecedingAffine { - def : Pat<(binaryOp (TFL_Conv2DOp:$output $input, $filter, - (ConstantOp F32ElementsAttr:$bias), - $h_factor, $w_factor, TFL_AF_None, - $padding, $stride_h, $stride_w), - (ConstantOp F32ElementsAttr:$value), $act_fn), - (TFL_Conv2DOp $input, $filter, - (binaryOp (ConstantOp $bias), - (ConstantOp $value), TFL_AF_None), - $h_factor, $w_factor, $act_fn, - $padding, $stride_h, $stride_w), - [(CanFuseConvOrDepthwiseConv<"false"> $filter, $value), - (HasOneUse $output)]>; - def : Pat<(binaryOp (TFL_DepthwiseConv2DOp:$output $input, $filter, - (ConstantOp F32ElementsAttr:$bias), - $h_factor, $w_factor, TFL_AF_None, - $padding, $stride_h, $stride_w, - $multiplier), - (ConstantOp F32ElementsAttr:$value), $act_fn), - (TFL_DepthwiseConv2DOp $input, $filter, - (binaryOp (ConstantOp $bias), - (ConstantOp $value), - TFL_AF_None), - $h_factor, $w_factor, $act_fn, - $padding, $stride_h, $stride_w, - $multiplier), - [(CanFuseConvOrDepthwiseConv<"true"> $filter, $value), - (HasOneUse $output)]>; + def FuseBinaryOpWithConv#binaryOp : Pat< + (binaryOp (TFL_Conv2DOp:$output $input, $filter, + (ConstantOp F32ElementsAttr:$bias), $h_factor, $w_factor, + TFL_AF_None, $padding, $stride_h, $stride_w), + (ConstantOp F32ElementsAttr:$value), $act_fn), + (TFL_Conv2DOp $input, $filter, + (binaryOp (ConstantOp $bias), + (ConstantOp $value), TFL_AF_None), + $h_factor, $w_factor, $act_fn, $padding, $stride_h, $stride_w), + [(CanFuseConvOrDepthwiseConv<"false"> $filter, $value), + (HasOneUse $output)]>; + def FuseBinaryOpWithDepthwiseConv#binaryOp : Pat< + (binaryOp (TFL_DepthwiseConv2DOp:$output $input, $filter, + (ConstantOp F32ElementsAttr:$bias), + $h_factor, $w_factor, TFL_AF_None, $padding, $stride_h, + $stride_w, $multiplier), + (ConstantOp F32ElementsAttr:$value), $act_fn), + (TFL_DepthwiseConv2DOp $input, $filter, + (binaryOp (ConstantOp $bias), (ConstantOp $value), TFL_AF_None), + $h_factor, $w_factor, $act_fn, $padding, $stride_h, $stride_w, + $multiplier), + [(CanFuseConvOrDepthwiseConv<"true"> $filter, $value), + (HasOneUse $output)]>; } foreach binaryOp = [TFL_AddOp, TFL_SubOp] in defm : FuseBinaryOpToPrecedingAffine; @@ -116,43 +109,43 @@ def ExpandTo4DForDepthwiseConv: NativeCodeCall< // The following pattern restricts to float constant values for now. multiclass FuseMulOrDivWithConv2dOrDepthwiseConv2d { - def : Pat<(BinaryOp (TFL_DepthwiseConv2DOp:$output $input, - (ConstantOp F32ElementsAttr:$filter), - (ConstantOp F32ElementsAttr:$bias), - $h_factor, $w_factor, TFL_AF_None, - $padding, $stride_h, $stride_w, - $multiplier), - (ConstantOp F32ElementsAttr:$value), $act_fn), - (TFL_DepthwiseConv2DOp $input, - (BinaryOp (ConstantOp $filter), - (ConstantOp - (ExpandTo4DForDepthwiseConv $value)), - TFL_AF_None), - (BinaryOp (ConstantOp $bias), - (ConstantOp $value), - TFL_AF_None), - $h_factor, $w_factor, $act_fn, - $padding, $stride_h, $stride_w, - $multiplier), - [(CanFuseConvOrDepthwiseConv<"true"> $filter, $value), - (HasOneUse $output)]>; - def : Pat<(BinaryOp (TFL_Conv2DOp:$conv_output $input, - (ConstantOp F32ElementsAttr:$filter), - (ConstantOp F32ElementsAttr:$bias), - $h_factor, $w_factor, TFL_AF_None, - $padding, $stride_h, $stride_w), - (ConstantOp F32ElementsAttr:$value), $act_fn), - (TFL_Conv2DOp $input, - (BinaryOp (ConstantOp $filter), - (ConstantOp (ExpandTo4DForConv $value)), - TFL_AF_None), - (BinaryOp (ConstantOp $bias), - (ConstantOp $value), - TFL_AF_None), - $h_factor, $w_factor, $act_fn, - $padding, $stride_h, $stride_w), - [(CanFuseConvOrDepthwiseConv<"false"> $filter, $value), - (HasOneUse $conv_output)]>; + def FuseMulOrDivWithDepthwiseConv#BinaryOp : Pat< + (BinaryOp (TFL_DepthwiseConv2DOp:$output $input, + (ConstantOp F32ElementsAttr:$filter), + (ConstantOp F32ElementsAttr:$bias), + $h_factor, $w_factor, TFL_AF_None, $padding, $stride_h, + $stride_w, $multiplier), + (ConstantOp F32ElementsAttr:$value), $act_fn), + (TFL_DepthwiseConv2DOp $input, + (BinaryOp + (ConstantOp $filter), + (ConstantOp (ExpandTo4DForDepthwiseConv $value)), + TFL_AF_None), + (BinaryOp + (ConstantOp $bias), + (ConstantOp $value), + TFL_AF_None), + $h_factor, $w_factor, $act_fn, $padding, $stride_h, + $stride_w, $multiplier), + [(CanFuseConvOrDepthwiseConv<"true"> $filter, $value), + (HasOneUse $output)]>; + def FuseMulOrDivWithConv#BinaryOp : Pat< + (BinaryOp (TFL_Conv2DOp:$conv_output $input, + (ConstantOp F32ElementsAttr:$filter), + (ConstantOp F32ElementsAttr:$bias), + $h_factor, $w_factor, TFL_AF_None, + $padding, $stride_h, $stride_w), + (ConstantOp F32ElementsAttr:$value), $act_fn), + (TFL_Conv2DOp $input, + (BinaryOp (ConstantOp $filter), + (ConstantOp (ExpandTo4DForConv $value)), + TFL_AF_None), + (BinaryOp (ConstantOp $bias), + (ConstantOp $value), + TFL_AF_None), + $h_factor, $w_factor, $act_fn, $padding, $stride_h, $stride_w), + [(CanFuseConvOrDepthwiseConv<"false"> $filter, $value), + (HasOneUse $conv_output)]>; } foreach BinaryOp = [TFL_DivOp, TFL_MulOp] in @@ -177,7 +170,7 @@ class OperandHasRank : Constraint< CPred<"$0.getType().cast().getRank() == " # n>>; // Matching HardSwish -def : Pat< +def MatchHardSwishPattern1 : Pat< (TFL_MulOp (TFL_MulOp $x, (TFL_AddOp @@ -190,7 +183,7 @@ def : Pat< (TFL_HardSwishOp $x), [(EqualOperands $x, $y)]>; -def : Pat< +def MatchHardSwishPattern2 : Pat< (TFL_MulOp $x, (TFL_MulOp @@ -207,7 +200,7 @@ def : Pat< // Matching HardSwish with extra FakeQuant. These FakeQuant ops were due to // incorrect placement in the quantization aware training. // TODO(b/149735743): We should make the placement automatically. -def : Pat< +def MatchHardSwishQuantized : Pat< (TFL_MulOp (TFL_DequantizeOp (TFL_QuantizeOp (TFL_MulOp $x, (TFL_DequantizeOp (TFL_QuantizeOp (TFL_AddOp @@ -238,7 +231,8 @@ multiclass L2NormalizePatterns { // This pattern constructs L2NormalizationOp from // Mul->Rsqrt->Sum->Square Or // Div->sqrt->Sum->Square - def : Pat<(FirstOp $operand1, + def L2NormalizePattern1#FirstOp#SecondOp : Pat< + (FirstOp $operand1, (SecondOp (TFL_SumOp (TFL_SquareOp:$sq_op $square_operand), @@ -251,7 +245,8 @@ multiclass L2NormalizePatterns { // Below patterns for L2Normalize when there is an Add or Maximum // adding or clamping to a small constant scalar. - def : Pat<(FirstOp $operand1, + def L2NormalizePattern2#FirstOp#SecondOp : Pat< + (FirstOp $operand1, (SecondOp (TFL_AddOp (TFL_SumOp @@ -265,7 +260,8 @@ multiclass L2NormalizePatterns { (L2NormValidReduceIndex $sq_op, $axis), (ConstDoubleValueLessThan<"1e-3"> $epsilon)]>; - def : Pat<(FirstOp $operand1, + def L2NormalizePattern3#FirstOp#SecondOp : Pat< + (FirstOp $operand1, (SecondOp (TFL_MaximumOp (TFL_SumOp @@ -302,14 +298,16 @@ def HaveSameType : Constraint>; // Pattern for skipping Tile if it is mainly for broadcasting and the // Op is already supporting broadcasting. multiclass FuseTileBroadcastIntoFollowingBinary { - def : Pat<(BinaryOp:$result (TFL_TileOp $input, (ConstantOp $tile)), - $operand, $act_func), - (BinaryOp $input, $operand, $act_func), + def FuseTileBroadcastToBinaryOp1#BinaryOp : Pat< + (BinaryOp:$result (TFL_TileOp $input, (ConstantOp $tile)), + $operand, $act_func), + (BinaryOp $input, $operand, $act_func), [(OperandsBroadcastToOutputType $input, $operand, $result)]>; - def : Pat<(BinaryOp:$result $operand, - (TFL_TileOp $input, (ConstantOp $tile)), $act_func), - (BinaryOp $operand, $input, $act_func), + def FuseTileBroadcastToBinaryOp2#BinaryOp : Pat< + (BinaryOp:$result $operand, + (TFL_TileOp $input, (ConstantOp $tile)), $act_func), + (BinaryOp $operand, $input, $act_func), [(OperandsBroadcastToOutputType $operand, $input, $result)]>; } @@ -318,9 +316,10 @@ multiclass FusedBinaryActivationFuncOpPat { foreach actFnPair = [[TFL_ReluOp, TFL_AF_Relu], [TFL_Relu6Op, TFL_AF_Relu6], [TFL_Relu1Op, TFL_AF_Relu1]] in { - def : Pat<(actFnPair[0] (BinaryOp:$binary_out $lhs, $rhs, TFL_AF_None)), - (BinaryOp $lhs, $rhs, actFnPair[1]), - [(HasOneUse $binary_out)]>; + def FuseBinaryWithActivation#BinaryOp#actFnPair[0] : Pat< + (actFnPair[0] (BinaryOp:$binary_out $lhs, $rhs, TFL_AF_None)), + (BinaryOp $lhs, $rhs, actFnPair[1]), + [(HasOneUse $binary_out)]>; } } @@ -340,21 +339,22 @@ foreach BinaryOp = [TFL_AddOp, TFL_SubOp, TFL_DivOp, TFL_MulOp] in { // transformation, the shape of the binary op result is [40x1600], which // couldn't be reshaped to [1,40,40]. `IsTailOfShape` constraint is added to // make sure $rhs is the tail shape of $lhs. - def : Pat<(BinaryOp (TFL_ReshapeOp:$lhs $input, (ConstantOp:$shape $s)), - (ConstantOp:$rhs $a), TFL_AF_None), - (TFL_ReshapeOp (BinaryOp $input, $rhs, TFL_AF_None), $shape), - // The broadcasting of "BinaryOp" only happens in the lower - // dimensions, and the higher dimensions are same, so we know the - // result and input of the "BinaryOp" in the source pattern have - // the same shape, which is defined by `shape`. - [(IsTailOfShape $rhs, $lhs), - (HasOneUse $lhs), - // The result of the new "BinaryOp" will have the same shape as - // `input`. In other words, the shape of the `Reshape` op are not - // changed after the transformation. - (IsTailOfShape $rhs, $input), - (HasRankAtMost<5> $input), - (HasRankAtMost<5> $rhs)]>; + def MoveBinaryOpBeforeReshape#BinaryOp : Pat< + (BinaryOp (TFL_ReshapeOp:$lhs $input, (ConstantOp:$shape $s)), + (ConstantOp:$rhs $a), TFL_AF_None), + (TFL_ReshapeOp (BinaryOp $input, $rhs, TFL_AF_None), $shape), + // The broadcasting of "BinaryOp" only happens in the lower + // dimensions, and the higher dimensions are same, so we know the + // result and input of the "BinaryOp" in the source pattern have + // the same shape, which is defined by `shape`. + [(IsTailOfShape $rhs, $lhs), + (HasOneUse $lhs), + // The result of the new "BinaryOp" will have the same shape as + // `input`. In other words, the shape of the `Reshape` op are not + // changed after the transformation. + (IsTailOfShape $rhs, $input), + (HasRankAtMost<5> $input), + (HasRankAtMost<5> $rhs)]>; } foreach BinaryOp = [TFL_FloorDivOp, TFL_FloorModOp, TFL_MinimumOp, @@ -370,19 +370,20 @@ foreach BinaryOp = [TFL_FloorDivOp, TFL_FloorModOp, TFL_MinimumOp, // transformation, the shape of the binary op result is [40x1600], which // couldn't be reshaped to [1,40,40]. `IsTailOfShape` constraint is added to // make sure $rhs is the tail shape of $lhs. - def : Pat<(BinaryOp (TFL_ReshapeOp:$lhs $input, (ConstantOp:$shape $s)), - (ConstantOp:$rhs $a)), - (TFL_ReshapeOp (BinaryOp $input, $rhs), $shape), - // The broadcasting of "BinaryOp" only happens in the lower - // dimensions, and the higher dimensions are same, so we know the - // result and input of the "BinaryOp" in the source pattern have - // the same shape, which is defined by `shape`. - [(IsTailOfShape $rhs, $lhs), - (HasOneUse $lhs), - // The result of the new "BinaryOp" will have the same shape as - // `input`. In other words, the shape of the `Reshape` op are not - // changed after the transformation. - (IsTailOfShape $rhs, $input)]>; + def MoveBinaryOpBeforeReshape#BinaryOp : Pat< + (BinaryOp (TFL_ReshapeOp:$lhs $input, (ConstantOp:$shape $s)), + (ConstantOp:$rhs $a)), + (TFL_ReshapeOp (BinaryOp $input, $rhs), $shape), + // The broadcasting of "BinaryOp" only happens in the lower + // dimensions, and the higher dimensions are same, so we know the + // result and input of the "BinaryOp" in the source pattern have + // the same shape, which is defined by `shape`. + [(IsTailOfShape $rhs, $lhs), + (HasOneUse $lhs), + // The result of the new "BinaryOp" will have the same shape as + // `input`. In other words, the shape of the `Reshape` op are not + // changed after the transformation. + (IsTailOfShape $rhs, $input)]>; } // Reorder the element-wise value operations and the element move operations, @@ -392,9 +393,10 @@ foreach ValueOp = [TFL_CeilOp, TFL_ExpOp, TFL_FloorOp, TFL_NegOp, TFL_TanhOp, TFL_SqrtOp, TFL_SquareOp] in { foreach MoveOp = [TFL_DepthToSpaceOp, TFL_ExpandDimsOp, TFL_SqueezeOp, TFL_ReshapeOp, TFL_TransposeOp] in { - def : Pat<(ValueOp:$value (MoveOp:$move $input, $move_def)), - (MoveOp (ValueOp $input), $move_def), - [(HasOneUse $move)]>; + def ReorderElementwiseAndMoveOperations#ValueOp#MoveOp : Pat< + (ValueOp:$value (MoveOp:$move $input, $move_def)), + (MoveOp (ValueOp $input), $move_def), + [(HasOneUse $move)]>; } } @@ -403,16 +405,16 @@ foreach ValueOp = [TFL_CeilOp, TFL_ExpOp, TFL_FloorOp, TFL_NegOp, def GetShape: NativeCodeCall<"GetShape($0)">; // Convert squeeze to reshape if possible. -def : Pat<(TFL_SqueezeOp:$squeeze_op $input, $squeeze_dims), - (TFL_ReshapeOp $input, - (ConstantOp (GetShape $squeeze_op))), - [(AnyStaticShapeTensor $squeeze_op)]>; +def ConvertSqueezeToReshape : Pat< + (TFL_SqueezeOp:$squeeze_op $input, $squeeze_dims), + (TFL_ReshapeOp $input, (ConstantOp (GetShape $squeeze_op))), + [(AnyStaticShapeTensor $squeeze_op)]>; // Convert expand_dims to reshape if possible. -def : Pat<(TFL_ExpandDimsOp:$expand_dims_op $input, $dim), - (TFL_ReshapeOp $input, - (ConstantOp (GetShape $expand_dims_op))), - [(AnyStaticShapeTensor $expand_dims_op)]>; +def ConvertExpandDimsToReshape : Pat< + (TFL_ExpandDimsOp:$expand_dims_op $input, $dim), + (TFL_ReshapeOp $input, (ConstantOp (GetShape $expand_dims_op))), + [(AnyStaticShapeTensor $expand_dims_op)]>; class FloatValueEquals : Constraint().getNumElements() == 1 &&" @@ -420,25 +422,27 @@ class FloatValueEquals : Constraint().getValues().begin() == " # val>>; // ReLU patterns -def : Pat<(TFL_MinimumOp (TFL_MaximumOp $input, - (ConstantOp $NegOne)), - (ConstantOp $One)), - (TFL_Relu1Op $input), - [(FloatValueEquals<"-1"> $NegOne), (FloatValueEquals<"1"> $One)]>; +def MatchRelu1Pattern1 : Pat< + (TFL_MinimumOp (TFL_MaximumOp $input, (ConstantOp $NegOne)), + (ConstantOp $One)), + (TFL_Relu1Op $input), + [(FloatValueEquals<"-1"> $NegOne), (FloatValueEquals<"1"> $One)]>; -def : Pat<(TFL_MaximumOp (TFL_MinimumOp $input, - (ConstantOp $One)), - (ConstantOp $NegOne)), - (TFL_Relu1Op $input), - [(FloatValueEquals<"-1"> $NegOne), (FloatValueEquals<"1"> $One)]>; +def MatchRelu1Pattern2 : Pat< + (TFL_MaximumOp (TFL_MinimumOp $input, (ConstantOp $One)), + (ConstantOp $NegOne)), + (TFL_Relu1Op $input), + [(FloatValueEquals<"-1"> $NegOne), (FloatValueEquals<"1"> $One)]>; -def : Pat<(TFL_MaximumOp (TFL_MulOp:$mul_out $input1, - (ConstantOp F32ElementsAttr:$alpha), TFL_AF_None), - $input2), - (TFL_LeakyReluOp $input1, ExtractSingleElementAsFloat:$alpha), - [(ConstDoubleValueLessThan<"1"> $alpha), - (EqualOperands $input1, $input2), - (HasOneUse $mul_out)]>; +def MatchLeakyRelu : Pat< + (TFL_MaximumOp + (TFL_MulOp:$mul_out $input1, + (ConstantOp F32ElementsAttr:$alpha), TFL_AF_None), + $input2), + (TFL_LeakyReluOp $input1, ExtractSingleElementAsFloat:$alpha), + [(ConstDoubleValueLessThan<"1"> $alpha), + (EqualOperands $input1, $input2), + (HasOneUse $mul_out)]>; def RemoveTrivialCast : Pat<(TFL_CastOp:$output $input), (replaceWithValue $input), @@ -451,23 +455,25 @@ def PReluAlphaRankCheck : Constraint< // PReLU pattern from Keras: // f(x) = Relu(x) + (-alpha * Relu(-x)) -def : Pat<(TFL_AddOp - (TFL_ReluOp:$relu_out $input1), - (TFL_MulOp:$mul_out - (TFL_ReluOp (TFL_NegOp:$input_neg_out $input2)), - $neg_alpha, - TFL_AF_None), - TFL_AF_None), - (TFL_PReluOp $input1, (TFL_NegOp $neg_alpha)), - [(EqualOperands $input1, $input2), - (PReluAlphaRankCheck $neg_alpha, $input1), - (HasOneUse $relu_out), - (HasOneUse $mul_out), - (HasOneUse $input_neg_out)]>; +def MatchPRelu : Pat< + (TFL_AddOp + (TFL_ReluOp:$relu_out $input1), + (TFL_MulOp:$mul_out + (TFL_ReluOp (TFL_NegOp:$input_neg_out $input2)), + $neg_alpha, + TFL_AF_None), + TFL_AF_None), + (TFL_PReluOp $input1, (TFL_NegOp $neg_alpha)), + [(EqualOperands $input1, $input2), + (PReluAlphaRankCheck $neg_alpha, $input1), + (HasOneUse $relu_out), + (HasOneUse $mul_out), + (HasOneUse $input_neg_out)]>; // The constant folding in this pass might produce constant in the tf dialect. // This rule is to legalize these constant to the tfl dialect. -def : Pat<(TF_ConstOp ElementsAttr:$value), (TFL_ConstOp $value)>; +def LegalizeConstOp : Pat< + (TF_ConstOp ElementsAttr:$value), (TFL_ConstOp $value)>; // Reorders adds to allow constant folding. // Add --> Add $input, $constantA @@ -476,13 +482,14 @@ def : Pat<(TF_ConstOp ElementsAttr:$value), (TFL_ConstOp $value)>; // Add --> $input // \--> Add ($constantA, $constantB) foreach ActFun = [TFL_AF_Relu, TFL_AF_Relu6, TFL_AF_Relu1, TFL_AF_None] in { - def : Pat<(TFL_AddOp - (TFL_AddOp:$first_output $input, (ConstantOp $a), TFL_AF_None), - (ConstantOp $b), ActFun), - (TFL_AddOp $input, - (TFL_AddOp (ConstantOp $a), (ConstantOp $b), TFL_AF_None), - ActFun), - [(HasOneUse $first_output)]>; + def ReorderAddToAllowConstFold_ActFunc_#ActFun : Pat< + (TFL_AddOp + (TFL_AddOp:$first_output $input, (ConstantOp $a), TFL_AF_None), + (ConstantOp $b), ActFun), + (TFL_AddOp $input, + (TFL_AddOp (ConstantOp $a), (ConstantOp $b), TFL_AF_None), + ActFun), + [(HasOneUse $first_output)]>; } // We can eliminate Relu from Relu(SquaredDifference(x, y)), From ad8cdb5e558d2274a1c9600c9d8d929744bc4ec3 Mon Sep 17 00:00:00 2001 From: Jaesung Chung Date: Mon, 20 Jul 2020 15:31:53 -0700 Subject: [PATCH 0874/2522] Enable upgrade_legacy flag in Lite's V2 saved model converter PiperOrigin-RevId: 322238994 Change-Id: I3e244afa9f1e785498b023e7bf5605555fdc395c --- .../lite/python/saved_model_to_tfl_flatbuffer.cc | 5 ++++- tensorflow/compiler/mlir/lite/tf_tfl_translate.cc | 12 +++++++----- .../compiler/mlir/lite/tf_to_tfl_flatbuffer.cc | 5 +++-- tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h | 3 ++- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/python/saved_model_to_tfl_flatbuffer.cc b/tensorflow/compiler/mlir/lite/python/saved_model_to_tfl_flatbuffer.cc index ddd36fbd74c..dafcfd11147 100644 --- a/tensorflow/compiler/mlir/lite/python/saved_model_to_tfl_flatbuffer.cc +++ b/tensorflow/compiler/mlir/lite/python/saved_model_to_tfl_flatbuffer.cc @@ -151,10 +151,13 @@ Status ConvertSavedModelToTFLiteFlatBuffer( return errors::Unimplemented("Only support a single exported name."); } + tensorflow::GraphImportConfig specs; + specs.upgrade_legacy = true; + TF_ASSIGN_OR_RETURN(auto module, ImportSavedModel(model_flags.saved_model_dir(), model_flags.saved_model_version(), tags, - exported_names, &context)); + exported_names, specs, &context)); if (!model_flags.input_arrays().empty() || !model_flags.output_arrays().empty()) { diff --git a/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc b/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc index 963ab743a83..046c7bbbcf0 100644 --- a/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc +++ b/tensorflow/compiler/mlir/lite/tf_tfl_translate.cc @@ -144,6 +144,10 @@ int main(int argc, char **argv) { StatusOr module; + tensorflow::GraphImportConfig specs; + specs.upgrade_legacy = upgrade_legacy; + specs.prune_unused_nodes = true; + // TODO(b/147435528): We need to test the e2e behavior once the graph freezing // inside mlir is done. if (import_saved_model_object_graph || import_saved_model_signature_defs) { @@ -168,12 +172,10 @@ int main(int argc, char **argv) { return kTrFailure; } - module = tensorflow::ImportSavedModel(input_file_name, saved_model_version, - tags, exported_names, &context); + module = + tensorflow::ImportSavedModel(input_file_name, saved_model_version, tags, + exported_names, specs, &context); } else { - tensorflow::GraphImportConfig specs; - specs.upgrade_legacy = upgrade_legacy; - specs.prune_unused_nodes = true; module = tensorflow::LoadFromGraphdefOrMlirSource( input_file_name, input_mlir, use_splatted_constant, custom_opdefs, specs, debug_info_file, input_arrays, input_dtypes, input_shapes, diff --git a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc index 714bc493bed..414a0de0118 100644 --- a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc +++ b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc @@ -186,7 +186,8 @@ Status ConvertTFExecutorToTFLOrFlatbuffer( StatusOr ImportSavedModel( const std::string& input_filename, const int saved_model_version, const std::unordered_set& tags, - absl::Span exported_names, mlir::MLIRContext* context) { + absl::Span exported_names, const GraphImportConfig& specs, + mlir::MLIRContext* context) { if (saved_model_version == 2) { auto module_or = tensorflow::SavedModelObjectGraphToMlirImport( input_filename, tags, exported_names, context); @@ -194,7 +195,7 @@ StatusOr ImportSavedModel( return module_or.ConsumeValueOrDie(); } else if (saved_model_version == 1) { auto module_or = tensorflow::SavedModelSignatureDefsToMlirImport( - input_filename, tags, exported_names, context); + input_filename, tags, exported_names, context, specs.upgrade_legacy); if (!module_or.status().ok()) return module_or.status(); return module_or.ConsumeValueOrDie(); diff --git a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h index 4ad58c4f8ef..8f1edec8879 100644 --- a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h +++ b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h @@ -48,7 +48,8 @@ LoadFromGraphdefOrMlirSource( stream_executor::port::StatusOr ImportSavedModel( const std::string& input_filename, const int saved_model_version, const std::unordered_set& tags, - absl::Span exported_names, mlir::MLIRContext* context); + absl::Span exported_names, const GraphImportConfig& specs, + mlir::MLIRContext* context); // Taking a MLIR module in TF executor dialect and a set of parameters, // applies a set of passes to convert the module to TF Lite dialect and From b645ed7fc059cef29d0577522d79aa99a85f81d9 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Mon, 20 Jul 2020 15:33:58 -0700 Subject: [PATCH 0875/2522] Add a `convert_to_tensor` to the start of Tensor.__getitem__ (_slice_helper) to make sure it dispatches directly, rather than letting the nested tf.strided_slice trigger dispatching. This is important because `tensor.__getitem__` does some input arg manipulation before getting to the `tf.strided_slice`. So, when we try to run the traced code using the args provided to `strided_slice` (e.g. for KerasTensors), we lose information about constants that TPUs need to compile graphs involving shape manipulation. Tracing `__getitem__` and its input args directly does not seem to run into this problem. (Note: this TPU situation is separate from the shape value inferring we do in KerasTensors during Functional API construction/tracing time. This happens at model run-time when running the already-traced code) To get this all to work correctly in practice, this CL also has to: * Add tf.nest support for flattening & packing python `slice` objects, in case the slice object contains symbolic tensors/values to trace * Add serialization/deserialization support for `slice` and `ellipsis` objects in Keras PiperOrigin-RevId: 322239438 Change-Id: If9b72368dff8bd50b61a1adbc6162f0a8da684d3 --- .../keras/layers/tensorflow_op_layer_test.py | 109 ++++++++++++++++++ .../keras/saving/saved_model/json_utils.py | 7 ++ tensorflow/python/keras/utils/tf_utils.py | 2 + tensorflow/python/ops/array_ops.py | 2 + tensorflow/python/util/dispatch_test.py | 63 +++++++++- tensorflow/python/util/nest.py | 17 ++- tensorflow/python/util/nest_test.py | 21 ++++ tensorflow/python/util/serialization.py | 10 ++ tensorflow/python/util/util.cc | 42 +++++++ tensorflow/python/util/util.h | 9 ++ tensorflow/python/util/util_wrapper.cc | 18 +++ 11 files changed, 293 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/keras/layers/tensorflow_op_layer_test.py b/tensorflow/python/keras/layers/tensorflow_op_layer_test.py index cb044260106..7baaa6a4ddc 100644 --- a/tensorflow/python/keras/layers/tensorflow_op_layer_test.py +++ b/tensorflow/python/keras/layers/tensorflow_op_layer_test.py @@ -294,6 +294,115 @@ class AutoLambdaTest(keras_parameterized.TestCase): self.assertAllEqual([layer.name for layer in model.layers], [layer.name for layer in new_model.layers]) + def test_getitem_slice_with_step_only(self): + if not context.executing_eagerly(): + self.skipTest('Complex slicing like this fails in v1') + inp = keras.Input(shape=(4, 3, 8)) + slice_step = keras.Input(shape=(), dtype='int32') + + out = inp[..., ::slice_step[0]] + model = keras.Model( + inputs=[inp, slice_step], + outputs=out) + model.compile( + adam.Adam(0.001), + 'mse', + run_eagerly=testing_utils.should_run_eagerly()) + batch_size = 7 + step = 3 + x = array_ops.stack([ + math_ops.range(8) for _ in range(batch_size)]) + args = [x, constant_op.constant(step, shape=(batch_size,))] + expected = array_ops.stack([ + math_ops.range(8)[::step] for _ in range(batch_size)]) + + self.assertAllEqual(model(args), expected) + self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) + + # Make sure it can be successfully saved and loaded + config = model.get_config() + model = keras.Model.from_config(config) + + self.assertAllEqual(model(args), expected) + self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) + + def test_getitem_slice_with_stop_only(self): + if not context.executing_eagerly(): + self.skipTest('Complex slicing like this fails in v1') + inp = keras.Input(shape=(4, 3, 8)) + slice_stop = keras.Input(shape=(), dtype='int32') + + out = inp[..., :slice_stop[0]] + model = keras.Model( + inputs=[inp, slice_stop], + outputs=out) + model.compile( + adam.Adam(0.001), + 'mse', + run_eagerly=testing_utils.should_run_eagerly()) + batch_size = 7 + stop = 6 + x = array_ops.stack([ + math_ops.range(8) for _ in range(batch_size)]) + args = [x, constant_op.constant(stop, shape=(batch_size,))] + expected = array_ops.stack([ + math_ops.range(8)[:stop] for _ in range(batch_size)]) + + self.assertAllEqual(model(args), expected) + self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) + + # Make sure it can be successfully saved and loaded + config = model.get_config() + model = keras.Model.from_config(config) + + self.assertAllEqual(model(args), expected) + self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) + + def test_getitem_complex_slicing(self): + if not context.executing_eagerly(): + self.skipTest('Complex slicing like this fails in v1') + inp = keras.Input(shape=(4, 3, 8)) + first_dim = keras.Input(shape=(), dtype='int32') + slice_start = keras.Input(shape=(), dtype='int32') + slice_stop = keras.Input(shape=(), dtype='int32') + slice_stride = keras.Input(shape=(), dtype='int32') + + out = inp[..., first_dim[0], slice_start[0]:slice_stop[0]:slice_stride[0]] + model = keras.Model( + inputs=[inp, first_dim, slice_start, slice_stop, slice_stride], + outputs=out) + model.compile( + adam.Adam(0.001), + 'mse', + run_eagerly=testing_utils.should_run_eagerly()) + batch_size = 7 + start = 1 + stop = 6 + step = 2 + x = array_ops.stack([array_ops.stack([array_ops.stack([ + math_ops.range(8) + for _ in range(3)]) for _ in range(4)]) for _ in range(batch_size)]) + args = [x, + constant_op.constant(0, shape=(batch_size,)), + constant_op.constant(start, shape=(batch_size,)), + constant_op.constant(stop, shape=(batch_size,)), + constant_op.constant(step, shape=(batch_size,))] + # Slice the innermost dim. only grab one index from the second-to-innermost + # dim, removing that dim from the shape. + expected = array_ops.stack([array_ops.stack([ + math_ops.range(8)[start:stop:step] + for _ in range(4)]) for _ in range(batch_size)]) + + self.assertAllEqual(model(args), expected) + self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) + + # Make sure it can be successfully saved and loaded + config = model.get_config() + model = keras.Model.from_config(config) + + self.assertAllEqual(model(args), expected) + self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) + def test_numerical_correctness_simple(self): x = ops.convert_to_tensor_v2([[-1., 0., -2., 1.]]) inputs = keras.Input(shape=(4,)) diff --git a/tensorflow/python/keras/saving/saved_model/json_utils.py b/tensorflow/python/keras/saving/saved_model/json_utils.py index 0ac86d4e692..df198266899 100644 --- a/tensorflow/python/keras/saving/saved_model/json_utils.py +++ b/tensorflow/python/keras/saving/saved_model/json_utils.py @@ -61,9 +61,16 @@ def decode(json_string): def _decode_helper(obj): + """A decoding helper that is TF-object aware.""" if isinstance(obj, dict) and 'class_name' in obj: if obj['class_name'] == 'TensorShape': return tensor_shape.TensorShape(obj['items']) elif obj['class_name'] == '__tuple__': return tuple(_decode_helper(i) for i in obj['items']) + elif obj['class_name'] == '__slice__': + return slice(start=_decode_helper(obj['start']), + stop=_decode_helper(obj['stop']), + step=_decode_helper(obj['step'])) + elif obj['class_name'] == '__ellipsis__': + return Ellipsis return obj diff --git a/tensorflow/python/keras/utils/tf_utils.py b/tensorflow/python/keras/utils/tf_utils.py index c9ad96cd37b..8c3028fa8ef 100644 --- a/tensorflow/python/keras/utils/tf_utils.py +++ b/tensorflow/python/keras/utils/tf_utils.py @@ -181,6 +181,8 @@ def map_structure_with_atomic(is_atomic_fn, map_fn, nested): values = [nested[k] for k in nest._sorted(nested)] elif nest._is_attrs(nested): values = _astuple(nested) + elif nest._is_slice(nested): + values = (nested.start, nested.stop, nested.step) else: values = nested mapped_values = [ diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index b8711a444a8..fab2d7b8713 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -955,6 +955,8 @@ def _slice_helper(tensor, slice_spec, var=None): TypeError: If the slice indices aren't int, slice, ellipsis, tf.newaxis or scalar int32/int64 tensors. """ + tensor = ops.convert_to_tensor(tensor) + if isinstance(slice_spec, bool) or \ (isinstance(slice_spec, ops.Tensor) and slice_spec.dtype == dtypes.bool) or \ (isinstance(slice_spec, np.ndarray) and slice_spec.dtype == bool): diff --git a/tensorflow/python/util/dispatch_test.py b/tensorflow/python/util/dispatch_test.py index cc4fed0abb7..db73dff57e0 100644 --- a/tensorflow/python/util/dispatch_test.py +++ b/tensorflow/python/util/dispatch_test.py @@ -20,6 +20,7 @@ from __future__ import print_function from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.proto_ops import decode_proto @@ -28,6 +29,7 @@ from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging from tensorflow.python.util import deprecation from tensorflow.python.util import dispatch +from tensorflow.python.util import nest from tensorflow.python.util.tf_export import get_canonical_name_for_symbol from tensorflow.python.util.tf_export import tf_export @@ -68,6 +70,25 @@ class TensorTracer(object): ["{}={}".format(name, x) for (name, x) in self.kwargs.items()]) return "{}({})".format(self.name, ", ".join(args)) + @classmethod + def _overload_all_operators(cls): # pylint: disable=invalid-name + """Register overloads for all operators.""" + for operator in ops.Tensor.OVERLOADABLE_OPERATORS: + cls._overload_operator(operator) + + @classmethod + def _overload_operator(cls, operator): # pylint: disable=invalid-name + """Overload an operator with the same overloading as `ops.Tensor`.""" + tensor_oper = getattr(ops.Tensor, operator) + + # Compatibility with Python 2: + # Python 2 unbound methods have type checks for the first arg, + # so we need to extract the underlying function + tensor_oper = getattr(tensor_oper, "__func__", tensor_oper) + setattr(cls, operator, tensor_oper) + +TensorTracer._overload_all_operators() # pylint: disable=protected-access + class TensorTracerOpDispatcher(dispatch.GlobalOpDispatcher): """Global op dispatcher for TensorTracer.""" @@ -82,11 +103,7 @@ class TensorTracerOpDispatcher(dispatch.GlobalOpDispatcher): return TensorTracer(symbol_name, args, kwargs) def is_tensor_tracer_arg(self, value): - if isinstance(value, TensorTracer): - return True - if isinstance(value, (list, tuple)): - if any(isinstance(x, TensorTracer) for x in value): - return True + return any(isinstance(x, TensorTracer) for x in nest.flatten(value)) @test_util.run_all_in_graph_and_eager_modes @@ -214,5 +231,41 @@ class DispatchTest(test_util.TensorFlowTestCase): # Clean up. dispatch._GLOBAL_DISPATCHERS = original_global_dispatchers + def testGlobalDispatcherGetItem(self): + original_global_dispatchers = dispatch._GLOBAL_DISPATCHERS + try: + TensorTracerOpDispatcher().register() + + x = TensorTracer("x") + trace = x[0] + self.assertEqual( + str(trace), + "__operators__.getitem(x, 0)") + + x = TensorTracer("x") + y = TensorTracer("y") + trace = x[y] + self.assertEqual( + str(trace), + "__operators__.getitem(x, y)") + + x = TensorTracer("x") + y = TensorTracer("y") + trace = x[:y] # pylint: disable=invalid-slice-index + self.assertEqual( + str(trace), + "__operators__.getitem(x, slice(None, y, None))") + + x = array_ops.ones(shape=(5, 5)) + y = TensorTracer("y") + trace = x[:y] # pylint: disable=invalid-slice-index + self.assertRegex( + str(trace).replace("\n", " "), + r"__operators__.getitem\(.*Tensor.*, slice\(None, y, None\)\)") + + finally: + # Clean up. + dispatch._GLOBAL_DISPATCHERS = original_global_dispatchers + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/python/util/nest.py b/tensorflow/python/util/nest.py index 66f43a3d682..7ae288c8e15 100644 --- a/tensorflow/python/util/nest.py +++ b/tensorflow/python/util/nest.py @@ -26,10 +26,13 @@ nest recognizes the following types of collections: 4.orderedDict 5.MutableMapping 6.attr.s + 7.slice attr.s decorated classes (http://www.attrs.org) are also supported, in the same way as `namedtuple`. +Python slices get flattened into `[x.start, x.stop, x.step]` + The utilities here assume (and do not check) that the nested structures form a 'tree', i.e., no references in the structure of the input of these functions should be recursive. @@ -120,6 +123,7 @@ def _is_namedtuple(instance, strict=False): # See the swig file (util.i) for documentation. _is_mapping_view = _pywrap_utils.IsMappingView _is_attrs = _pywrap_utils.IsAttrs +_is_slice = _pywrap_utils.IsSlice _is_composite_tensor = _pywrap_utils.IsCompositeTensor _is_type_spec = _pywrap_utils.IsTypeSpec _is_mutable_mapping = _pywrap_utils.IsMutableMapping @@ -190,6 +194,8 @@ def _sequence_like(instance, args): # For object proxies, first create the underlying type and then re-wrap it # in the proxy type. return type(instance)(_sequence_like(instance.__wrapped__, args)) + elif _is_slice(instance): + return slice(*args) else: # Not a namedtuple return type(instance)(args) @@ -244,6 +250,10 @@ def _yield_sorted_items(iterable): # Note: to allow CompositeTensors and their TypeSpecs to have matching # structures, we need to use the same key string here. yield iterable.value_type.__name__, iterable._component_specs # pylint: disable=protected-access + elif _is_slice(iterable): + yield "start", iterable.start + yield "stop", iterable.stop + yield "step", iterable.step else: for item in enumerate(iterable): yield item @@ -275,7 +285,8 @@ def is_nested(seq): def flatten(structure, expand_composites=False): """Returns a flat list from a given nested structure. - If nest is not a structure , tuple (or a namedtuple), dict, or an attrs class, + If nest is not a structure , tuple (or a namedtuple), dict, slice, or an + attrs class, then returns a single-element list: [nest]. @@ -286,7 +297,8 @@ def flatten(structure, expand_composites=False): repacks dicts and OrderedDicts after they have been flattened, and also allows flattening an OrderedDict and then repacking it back using a corresponding plain dict, or vice-versa. Dictionaries with non-sortable keys cannot be - flattened. + flattened. `slice`s will get flattened into the form + `[x.start, x.stop, x.step]`. Users must not modify any collections used in nest while this function is running. @@ -1429,4 +1441,5 @@ _pywrap_utils.RegisterType("Mapping", _collections_abc.Mapping) _pywrap_utils.RegisterType("MutableMapping", _collections_abc.MutableMapping) _pywrap_utils.RegisterType("Sequence", _collections_abc.Sequence) _pywrap_utils.RegisterType("MappingView", _collections_abc.MappingView) +_pywrap_utils.RegisterType("Slice", slice) _pywrap_utils.RegisterType("ObjectProxy", _wrapt.ObjectProxy) diff --git a/tensorflow/python/util/nest_test.py b/tensorflow/python/util/nest_test.py index ca808ba9ff1..7d674519433 100644 --- a/tensorflow/python/util/nest_test.py +++ b/tensorflow/python/util/nest_test.py @@ -122,6 +122,27 @@ class NestTest(parameterized.TestCase, test.TestCase): new_structure = nest.map_structure(lambda x: x, structure) self.assertEqual(structure, new_structure) + @parameterized.parameters( + slice(4), + slice(None), + # Because slice is overloaded, it does not take keyword args + slice(None, None, None), + slice(6, None, None), + slice(None, 4, None), + slice(None, None, 2), + slice(6, 2, None), + slice(None, 4, 5), + slice(3, None, 5), + slice(3, 7, 5), + ) + @test_util.assert_no_new_pyobjects_executing_eagerly + def testFlattenAndPackSlice(self, value): + structure = [value] + flat = nest.flatten(structure) + self.assertAllEqual(flat, [value.start, value.stop, value.step]) + new_structure = nest.pack_sequence_as(structure, flat) + self.assertEqual(structure, new_structure) + @test_util.assert_no_new_pyobjects_executing_eagerly def testFlattenAndPack(self): structure = ((3, 4), 5, (6, 7, (9, 10), 8)) diff --git a/tensorflow/python/util/serialization.py b/tensorflow/python/util/serialization.py index 3b1713b4c61..3258d138b0c 100644 --- a/tensorflow/python/util/serialization.py +++ b/tensorflow/python/util/serialization.py @@ -70,6 +70,16 @@ def get_json_type(obj): if isinstance(obj, collections_abc.Mapping): return dict(obj) + if isinstance(obj, slice): + return { + 'class_name': '__slice__', + 'start': obj.start, + 'stop': obj.stop, + 'step': obj.step} + + if obj is Ellipsis: + return {'class_name': '__ellipsis__'} + if isinstance(obj, wrapt.ObjectProxy): return obj.__wrapped__ diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc index cf8581443e7..5f794aa38b0 100644 --- a/tensorflow/python/util/util.cc +++ b/tensorflow/python/util/util.cc @@ -240,6 +240,17 @@ int IsMappingHelper(PyObject* o) { return check_cache->CachedLookup(o); } +// Returns 1 if `o` is considered a slice object for the purposes of Flatten(). +// Returns 0 otherwise. +// Returns -1 if an error occurred. +int IsSliceHelper(PyObject* o) { + static auto* const check_cache = new CachedTypeCheck([](PyObject* to_check) { + return IsInstanceOfRegisteredType(to_check, "Slice"); + }); + if (PyDict_Check(o)) return true; + return check_cache->CachedLookup(o); +} + // Returns 1 if `o` is considered a mutable mapping for the purposes of // Flatten(). Returns 0 otherwise. Returns -1 if an error occurred. int IsMutableMappingHelper(PyObject* o) { @@ -344,6 +355,7 @@ int IsSequenceHelper(PyObject* o) { if (IsMappingHelper(o)) return true; if (IsMappingViewHelper(o)) return true; if (IsAttrsHelper(o)) return true; + if (IsSliceHelper(o)) return true; if (PySet_Check(o) && !WarnedThatSetIsNotSequence) { LOG(WARNING) << "Sets are not currently considered sequences, " "but this may change in the future, " @@ -531,6 +543,31 @@ class AttrsValueIterator : public ValueIterator { Safe_PyObjectPtr iter_; }; +class SliceValueIterator : public ValueIterator { + public: + explicit SliceValueIterator(PyObject* slice) : slice_(slice), attr_(0) { + Py_INCREF(slice); + } + + Safe_PyObjectPtr next() override { + Safe_PyObjectPtr result; + if (attr_ == 0) { + result.reset(PyObject_GetAttrString(slice_.get(), "start")); + } else if (attr_ == 1) { + result.reset(PyObject_GetAttrString(slice_.get(), "stop")); + } else if (attr_ == 2) { + result.reset(PyObject_GetAttrString(slice_.get(), "step")); + } + attr_++; + + return result; + } + + private: + Safe_PyObjectPtr slice_; + int attr_; +}; + bool IsSparseTensorValueType(PyObject* o) { PyObject* sparse_tensor_value_type = GetRegisteredPyObject("SparseTensorValue"); @@ -593,6 +630,8 @@ ValueIteratorPtr GetValueIterator(PyObject* nested) { return absl::make_unique(nested); } else if (IsAttrsHelper(nested)) { return absl::make_unique(nested); + } else if (IsSliceHelper(nested)) { + return absl::make_unique(nested); } else { return absl::make_unique(nested); } @@ -606,6 +645,8 @@ ValueIteratorPtr GetValueIteratorForData(PyObject* nested) { return absl::make_unique(nested); } else if (IsAttrsHelper(nested)) { return absl::make_unique(nested); + } else if (IsSliceHelper(nested)) { + return absl::make_unique(nested); } else if (IsSparseTensorValueType(nested)) { return absl::make_unique(nested); } else { @@ -909,6 +950,7 @@ bool IsSequence(PyObject* o) { return IsSequenceHelper(o) == 1; } bool IsMapping(PyObject* o) { return IsMappingHelper(o) == 1; } bool IsMutableMapping(PyObject* o) { return IsMutableMappingHelper(o) == 1; } bool IsMappingView(PyObject* o) { return IsMappingViewHelper(o) == 1; } +bool IsSlice(PyObject* o) { return IsSliceHelper(o) == 1; } bool IsAttrs(PyObject* o) { return IsAttrsHelper(o) == 1; } bool IsTensor(PyObject* o) { return IsTensorHelper(o) == 1; } bool IsEagerTensorSlow(PyObject* o) { return IsEagerTensorHelper(o) == 1; } diff --git a/tensorflow/python/util/util.h b/tensorflow/python/util/util.h index fc0b864416e..2f8ff6f8093 100644 --- a/tensorflow/python/util/util.h +++ b/tensorflow/python/util/util.h @@ -115,6 +115,15 @@ bool IsTuple(PyObject* o); // True if the sequence subclasses mapping. bool IsMappingView(PyObject* o); +// Returns a true if its input is a python `slice` object. +// +// Args: +// seq: the input to be checked. +// +// Returns: +// True if the input object is a python `slice`. +bool IsSlice(PyObject* o); + // A version of PyMapping_Keys that works in C++11 // // Args: diff --git a/tensorflow/python/util/util_wrapper.cc b/tensorflow/python/util/util_wrapper.cc index 63c70d785cc..a6f421027af 100644 --- a/tensorflow/python/util/util_wrapper.cc +++ b/tensorflow/python/util/util_wrapper.cc @@ -198,6 +198,24 @@ PYBIND11_MODULE(_pywrap_utils, m) { Returns: True if `instance` is an instance of an `attr.s` decorated class. )pbdoc"); + m.def( + "IsSlice", + [](const py::handle& o) { + bool result = tensorflow::swig::IsSlice(o.ptr()); + if (PyErr_Occurred()) { + throw py::error_already_set(); + } + return result; + }, + R"pbdoc( + Returns True if `instance` is an instance of a python`slice` object. + + Args: + instance: An instance of a Python object. + + Returns: + True if `instance` is an instance of a python `slice` object. + )pbdoc"); m.def( "SameNamedtuples", [](const py::handle& o1, const py::handle& o2) { From 9f3c94aba6e5c2f6f514f3323a761111ada1807b Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Mon, 20 Jul 2020 15:48:36 -0700 Subject: [PATCH 0876/2522] Create bazel release configs for CPU Windows branches and propagate those changes to nightly release jobs. PiperOrigin-RevId: 322242369 Change-Id: Ied6e57ceb0f5783f59b8bd41903f35a79f0dde5a --- .bazelrc | 6 +++++- .../release/windows/cpu_py35_full/nightly_release.bat | 2 +- .../release/windows/cpu_py36_full/nightly_release.bat | 2 +- .../release/windows/cpu_py37_full/nightly_release.bat | 2 +- .../release/windows/cpu_py38_full/nightly_release.bat | 2 +- .../tools/ci_build/windows/cpu/pip/build_tf_windows.sh | 4 ++-- 6 files changed, 11 insertions(+), 7 deletions(-) diff --git a/.bazelrc b/.bazelrc index a7b768f1b16..d3cb67d08d3 100644 --- a/.bazelrc +++ b/.bazelrc @@ -84,7 +84,8 @@ # release_gpu_common: Common options for GPU builds on Linux and Windows. # release_cpu_linux: Toolchain and CUDA options for Linux CPU builds. # release_cpu_macos: Toolchain and CUDA options for MacOS CPU builds. -# release_gpu_linux: Toolchain and CUDA options for Linux PU builds. +# release_gpu_linux: Toolchain and CUDA options for Linux GPU builds. +# release_cpu_windows: Toolchain and CUDA options for Windows CPU builds. # Allow builds using libc++ as a linker library # This is mostly for OSSFuzz, so we also pass in the flags from environment to clean build file @@ -570,3 +571,6 @@ build:release_gpu_common --action_env=GCC_HOST_COMPILER_PATH="/usr/bin/gcc-5" build:release_gpu_linux --config=release_gpu_common build:release_gpu_linux --config=avx_linux build:release_gpu_linux --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain + +build:release_cpu_windows --config=release_common +build:release_cpu_windows --announce_rc diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py35_full/nightly_release.bat b/tensorflow/tools/ci_build/release/windows/cpu_py35_full/nightly_release.bat index 5b254fcae5f..0f78ee006ff 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py35_full/nightly_release.bat +++ b/tensorflow/tools/ci_build/release/windows/cpu_py35_full/nightly_release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python35 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --tf_nightly --project_name "tf_nightly_cpu" --extra_build_flags "--config=v2" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --tf_nightly --project_name "tf_nightly_cpu" --extra_build_flags "--config=release_cpu_windows" diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py36_full/nightly_release.bat b/tensorflow/tools/ci_build/release/windows/cpu_py36_full/nightly_release.bat index 745ba8ea4b6..a5ebfb21103 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py36_full/nightly_release.bat +++ b/tensorflow/tools/ci_build/release/windows/cpu_py36_full/nightly_release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python36 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --tf_nightly --project_name "tf_nightly_cpu" --extra_build_flags "--config=v2" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --tf_nightly --project_name "tf_nightly_cpu" --extra_build_flags "--config=release_cpu_windows" diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py37_full/nightly_release.bat b/tensorflow/tools/ci_build/release/windows/cpu_py37_full/nightly_release.bat index 8596652f59d..9914c0235d1 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py37_full/nightly_release.bat +++ b/tensorflow/tools/ci_build/release/windows/cpu_py37_full/nightly_release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python37 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --tf_nightly --project_name "tf_nightly_cpu" --extra_build_flags "--config=v2" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --tf_nightly --project_name "tf_nightly_cpu" --extra_build_flags "--config=release_cpu_windows" diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py38_full/nightly_release.bat b/tensorflow/tools/ci_build/release/windows/cpu_py38_full/nightly_release.bat index 1214812cc46..bee0bb4edea 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py38_full/nightly_release.bat +++ b/tensorflow/tools/ci_build/release/windows/cpu_py38_full/nightly_release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python38 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --tf_nightly --project_name "tf_nightly_cpu" --extra_build_flags "--config=v2" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --tf_nightly --project_name "tf_nightly_cpu" --extra_build_flags "--config=release_cpu_windows" diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh index 4e5d769cf6b..0abdb59fede 100644 --- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh @@ -136,12 +136,12 @@ fi run_configure_for_cpu_build -bazel build --announce_rc --config=opt ${EXTRA_BUILD_FLAGS} \ +bazel build ${EXTRA_BUILD_FLAGS} \ --build_tag_filters=-no_pip,-no_windows,-no_oss,-gpu,-tpu \ --output_filter=^$ \ tensorflow/lite:framework tensorflow/lite/examples/minimal:minimal || exit $? -bazel build --announce_rc --config=opt ${EXTRA_BUILD_FLAGS} \ +bazel build ${EXTRA_BUILD_FLAGS} \ --output_filter=^$ \ tensorflow/tools/pip_package:build_pip_package || exit $? From 18ebe824d2f6f20b09839cb0a0073032a2d6c5fe Mon Sep 17 00:00:00 2001 From: Karmel Allison Date: Mon, 20 Jul 2020 15:49:15 -0700 Subject: [PATCH 0877/2522] Replace instances of "blacklist" with "denylist" where possible. See Google Developer guidelines at https://developers.google.com/style/word-list#blacklist for more information. PiperOrigin-RevId: 322242499 Change-Id: I66ea33f87811bbc734f538029d806240da91130f --- RELEASE.md | 6 +- .../jit/mark_for_compilation_pass_test.cc | 2 +- tensorflow/compiler/tests/eager_test.py | 2 +- .../compiler/tf2tensorrt/segment/segment.cc | 18 +- .../compiler/xla/debug_options_flags.cc | 8 +- tensorflow/compiler/xla/service/gpu/BUILD | 2 +- ...ist.pbtxt => hlo_algorithm_denylist.pbtxt} | 0 .../xla/service/gpu/gpu_autotuning.proto | 10 +- .../service/gpu/gpu_conv_algorithm_picker.cc | 24 +-- .../service/gpu/hlo_algorithm_blacklist.cc | 22 +- .../xla/service/gpu/hlo_algorithm_blacklist.h | 14 +- .../gpu/hlo_algorithm_blacklist_test.cc | 16 +- .../xla/service/hlo_rematerialization.cc | 30 +-- tensorflow/compiler/xla/xla.proto | 4 +- .../optimizers/auto_mixed_precision.cc | 160 +++++++------- .../optimizers/auto_mixed_precision_lists.h | 40 ++-- .../optimizers/auto_mixed_precision_test.cc | 199 +++++++++--------- .../optimizers/pin_to_host_optimizer.cc | 12 +- .../core/platform/cloud/gcs_dns_cache.cc | 2 +- .../core/profiler/internal/tfprof_stats.cc | 2 +- tensorflow/go/genop/internal/genop.go | 4 +- .../delegates/nnapi/acceleration_test_list.cc | 6 +- .../lite/g3doc/performance/coreml_delegate.md | 6 +- .../acceleration_test_util_internal.cc | 8 +- .../kernels/acceleration_test_util_internal.h | 14 +- .../acceleration_test_util_internal_test.cc | 34 +-- tensorflow/lite/toco/tflite/export_test.cc | 2 +- .../stages/image_classification_stage.cc | 31 ++- .../stages/image_classification_stage.h | 6 +- .../imagenet_image_classification/README.md | 6 +- .../imagenet_image_classification/run_eval.cc | 15 +- .../tensorrt/test/quantization_mnist_test.py | 2 +- .../python/compiler/tensorrt/trt_convert.py | 24 +-- .../compiler/tensorrt/trt_convert_test.py | 2 +- tensorflow/python/compiler/xla/xla.py | 4 +- tensorflow/python/debug/__init__.py | 4 +- tensorflow/python/debug/cli/analyzer_cli.py | 33 +-- .../lib/debug_graph_reconstruction_test.py | 15 +- tensorflow/python/debug/lib/debug_utils.py | 69 +++--- .../python/debug/lib/debug_utils_test.py | 34 +-- .../python/debug/lib/session_debug_testlib.py | 4 +- .../python/framework/auto_control_deps.py | 10 +- .../python/framework/convert_to_constants.py | 24 +-- .../framework/convert_to_constants_test.py | 18 +- .../python/framework/graph_util_impl.py | 4 +- tensorflow/python/framework/test_util.py | 32 +-- .../grappler/auto_mixed_precision_test.py | 12 +- tensorflow/python/tools/freeze_graph.py | 24 +-- tensorflow/python/tools/saved_model_cli.py | 21 +- .../python/tools/saved_model_cli_test.py | 10 +- tensorflow/python/tpu/feature_column.py | 16 +- tensorflow/python/tpu/feature_column_test.py | 4 +- tensorflow/python/tpu/tpu.py | 10 +- .../training/experimental/mixed_precision.py | 8 +- .../tools/ci_build/builds/builds_common.sh | 8 +- .../ci_build/builds/integration_tests.sh | 10 +- .../tools/ci_build/builds/run_pip_tests.sh | 2 +- .../tools/ci_build/builds/test_tutorials.sh | 10 +- .../tools/ci_build/builds/test_user_ops.sh | 2 +- tensorflow/tools/ci_build/ci_sanity.sh | 2 +- tensorflow/tools/ci_build/pylintrc | 2 +- tensorflow/tools/common/traverse.py | 2 +- third_party/sycl/crosstool/computecpp.tpl | 2 +- third_party/sycl/crosstool/trisycl.tpl | 2 +- 64 files changed, 572 insertions(+), 559 deletions(-) rename tensorflow/compiler/xla/service/gpu/data/{hlo_algorithm_blacklist.pbtxt => hlo_algorithm_denylist.pbtxt} (100%) diff --git a/RELEASE.md b/RELEASE.md index 2ca0b7013c1..c4fa615cf4d 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -66,9 +66,9 @@ * Tracing and Debugging: * * Other: - * We have replaced uses of "whitelist" with "allowlist" where possible. - Please see https://developers.google.com/style/word-list#blacklist for more - context. + * We have replaced uses of "whitelist" and "blacklist" with "allowlist" + and "denylist" where possible. Please see + https://developers.google.com/style/word-list#blacklist for more context. * ## Thanks to our Contributors diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc index 3ae72eb514c..e88319bb732 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc @@ -1829,7 +1829,7 @@ TEST(XlaCompilationTest, XLALiteAllowlist) { } EXPECT_TRUE(unknow_op.empty()) << "Someone added support for a new TF opeations inside XLA. They must " - "be included in the XLALite allowlist or blacklist:\n" + "be included in the XLALite allowlist or denylist:\n" << absl::StrJoin(unknow_op, "\n"); } } // namespace diff --git a/tensorflow/compiler/tests/eager_test.py b/tensorflow/compiler/tests/eager_test.py index 520348e0f8a..eef9d24766d 100644 --- a/tensorflow/compiler/tests/eager_test.py +++ b/tensorflow/compiler/tests/eager_test.py @@ -311,7 +311,7 @@ class EagerFunctionTest(xla_test.XLATestCase): if 'GPU' in self.device: # TODO(b/32333178) self.skipTest('Current implementation of RandomStandardNormal kernel ' - 'is very slow on GPU, and has been blacklisted.') + 'is very slow on GPU, and has been denylisted.') with self.test_scope(): data_format = 'channels_last' conv = convolutional.Conv2D( diff --git a/tensorflow/compiler/tf2tensorrt/segment/segment.cc b/tensorflow/compiler/tf2tensorrt/segment/segment.cc index 9f9711f6c48..1337a733f91 100644 --- a/tensorflow/compiler/tf2tensorrt/segment/segment.cc +++ b/tensorflow/compiler/tf2tensorrt/segment/segment.cc @@ -711,15 +711,15 @@ Status SegmentGraph(const Graph* tf_graph, std::unordered_set unsupported_ops; int num_unsupported_ops = 0; - // Getting the operations blacklisted for conversion - string tftrt_op_blacklist_str; + // Getting the operations denylisted for conversion + string tftrt_op_denylist_str; TF_CHECK_OK( - ReadStringFromEnvVar("TF_TRT_OP_BLACKLIST", "", &tftrt_op_blacklist_str)); + ReadStringFromEnvVar("TF_TRT_OP_DENYLIST", "", &tftrt_op_denylist_str)); - auto tftrt_op_blacklist = gtl::FlatSet{}; // non-absl ok + auto tftrt_op_denylist = gtl::FlatSet{}; // non-absl ok - for (const auto& x : str_util::Split(tftrt_op_blacklist_str, ",")) { - tftrt_op_blacklist.insert(x); + for (const auto& x : str_util::Split(tftrt_op_denylist_str, ",")) { + tftrt_op_denylist.insert(x); } // Parsing each node of the graph @@ -761,13 +761,13 @@ Status SegmentGraph(const Graph* tf_graph, const Status status = candidate_fn(node->tf_node()); if (!status.ok()) { exclude_node(status.error_message()); - } else if (tftrt_op_blacklist.count(node->tf_node()->type_string())) { + } else if (tftrt_op_denylist.count(node->tf_node()->type_string())) { // WARNING verbosity since the user explicitly requests this behavior. LOG_WARNING_WITH_PREFIX - << "Blacklisted as TF-TRT candidate, " + << "Denylisted as TF-TRT candidate, " << "(Op type: " << node->tf_node()->type_string() << "), " << "(Op name: " << node->name() << ")"; - exclude_node("Blacklisted with the env var TF_TRT_OP_BLACKLIST"); + exclude_node("Denylisted with the env var TF_TRT_OP_DENYLIST"); } else { VLOG(2) << "Accepted as a TF-TRT candidate, " << "(Op type: " << node->tf_node()->type_string() << "), " diff --git a/tensorflow/compiler/xla/debug_options_flags.cc b/tensorflow/compiler/xla/debug_options_flags.cc index 552dfcd15c3..2dd7acb2f67 100644 --- a/tensorflow/compiler/xla/debug_options_flags.cc +++ b/tensorflow/compiler/xla/debug_options_flags.cc @@ -535,10 +535,10 @@ static void AllocateFlags() { flag_values->xla_gpu_force_conv_nchw(), "For cuDNN convolutions, always NCHW layouts.")); flag_objects->push_back(tensorflow::Flag( - "xla_gpu_algorithm_blacklist_path", - string_setter_for(&DebugOptions::set_xla_gpu_algorithm_blacklist_path), - flag_values->xla_gpu_algorithm_blacklist_path(), - "An AlgorithmBlacklist text proto file as a blacklist of convolutions to " + "xla_gpu_algorithm_denylist_path", + string_setter_for(&DebugOptions::set_xla_gpu_algorithm_denylist_path), + flag_values->xla_gpu_algorithm_denylist_path(), + "An AlgorithmDenylist text proto file as a denylist of convolutions to " "avoid to use.")); flag_objects->push_back(tensorflow::Flag( "xla_gpu_deterministic_reductions", diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 7b1d3e213ce..2a85c9f2908 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -1676,7 +1676,7 @@ cc_library( tf_cc_test( name = "hlo_algorithm_blacklist_test", srcs = ["hlo_algorithm_blacklist_test.cc"], - data = ["data/hlo_algorithm_blacklist.pbtxt"], + data = ["data/hlo_algorithm_denylist.pbtxt"], tags = ["no_pip"], deps = [ ":hlo_algorithm_blacklist", diff --git a/tensorflow/compiler/xla/service/gpu/data/hlo_algorithm_blacklist.pbtxt b/tensorflow/compiler/xla/service/gpu/data/hlo_algorithm_denylist.pbtxt similarity index 100% rename from tensorflow/compiler/xla/service/gpu/data/hlo_algorithm_blacklist.pbtxt rename to tensorflow/compiler/xla/service/gpu/data/hlo_algorithm_denylist.pbtxt diff --git a/tensorflow/compiler/xla/service/gpu/gpu_autotuning.proto b/tensorflow/compiler/xla/service/gpu/gpu_autotuning.proto index 35b5cfacb2d..563245da969 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_autotuning.proto +++ b/tensorflow/compiler/xla/service/gpu/gpu_autotuning.proto @@ -15,19 +15,19 @@ message ConvInstructionLog { repeated uint64 operand_addresses = 4; } -message BlacklistedAlgorithm { +message DenylistedAlgorithm { int64 id = 1; bool tensor_ops = 2; } -message AlgorithmBlacklistEntry { +message AlgorithmDenylistEntry { string hlo = 1; tensorflow.ComputeCapability cc = 2; tensorflow.CudnnVersion cudnn_version = 3; string blas_version = 5; - repeated BlacklistedAlgorithm algos = 4; + repeated DenylistedAlgorithm algos = 4; } -message AlgorithmBlacklist { - repeated AlgorithmBlacklistEntry entries = 1; +message AlgorithmDenylist { + repeated AlgorithmDenylistEntry entries = 1; } diff --git a/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc index 67255f02665..4ca113f6c99 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc @@ -438,10 +438,9 @@ GpuConvAlgorithmPicker::PickBestAlgorithmNoCacheCuda( (void)blas->GetVersion(&blas_version); } - absl::Span blacklisted_algos = - GetBlacklistedConvAlgorithms(GetComputeCapability(stream_exec_), - GetCudnnVersion(stream_exec_), blas_version, - canonical_hlo); + absl::Span disabled_algos = GetDisabledConvAlgorithms( + GetComputeCapability(stream_exec_), GetCudnnVersion(stream_exec_), + blas_version, canonical_hlo); for (const AlgorithmDesc& alg : GetAlgorithms(kind, stream_exec_)) { XLA_SCOPED_LOGGING_TIMER_LEVEL( @@ -449,7 +448,7 @@ GpuConvAlgorithmPicker::PickBestAlgorithmNoCacheCuda( AlgorithmToString(alg)), 2); - if (absl::c_linear_search(blacklisted_algos, alg)) { + if (absl::c_linear_search(disabled_algos, alg)) { LOG(INFO) << "Omitted potentially buggy algorithm " << AlgorithmToString(alg) << " for conv " << instr->ToString(); continue; @@ -503,7 +502,7 @@ GpuConvAlgorithmPicker::PickBestAlgorithmNoCacheCuda( if (!input_output_allocator_redzone_clear || !scratch_allocator_redzone_clear) { - AlgorithmBlacklist proto; + AlgorithmDenylist proto; auto entry = proto.add_entries(); entry->set_hlo(canonical_hlo); *entry->mutable_cc() = GetComputeCapability(stream_exec_); @@ -513,13 +512,12 @@ GpuConvAlgorithmPicker::PickBestAlgorithmNoCacheCuda( algo->set_id(alg.algo_id()); algo->set_tensor_ops(alg.tensor_ops_enabled()); - LOG(ERROR) - << "To blacklist this algorithm for this convolution, " - "copy-paste the following " - "proto to the blacklist file pointed by XLA_FLAGS " - "--xla_gpu_algorithm_blacklist_path=" - << GetDebugOptionsFromFlags().xla_gpu_algorithm_blacklist_path() - << " : " << proto.ShortDebugString(); + LOG(ERROR) << "To denylist this algorithm for this convolution, " + "copy-paste the following " + "proto to the denylist file pointed by XLA_FLAGS " + "--xla_gpu_algorithm_denylist_path=" + << GetDebugOptionsFromFlags().xla_gpu_algorithm_denylist_path() + << " : " << proto.ShortDebugString(); continue; } diff --git a/tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.cc b/tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.cc index 601c805ce16..a68d52cf832 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.cc +++ b/tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.cc @@ -24,7 +24,7 @@ limitations under the License. namespace xla { namespace gpu { -constexpr char kDefaultBlacklist[] = R"pb( +constexpr char kDefaultDenylist[] = R"pb( entries { hlo: "(f32[4,32,32,32]{2,1,3,0}, u8[0]{0}) custom-call(f32[4,32,32,32]{2,1,3,0}, f32[5,5,32,32]{1,0,2,3}), window={size=5x5 pad=2_2x2_2}, dim_labels=b01f_01io->b01f, custom_call_target=\"__cudnn$convForward\", backend_config=\"{conv_result_scale:1}\"" cc { major: 7 } @@ -41,28 +41,26 @@ constexpr char kDefaultBlacklist[] = R"pb( } )pb"; -absl::Span -GetBlacklistedConvAlgorithms(tensorflow::ComputeCapability cc, - tensorflow::CudnnVersion cudnn_version, - const std::string& blas_version, - const std::string& hlo) { +absl::Span GetDisabledConvAlgorithms( + tensorflow::ComputeCapability cc, tensorflow::CudnnVersion cudnn_version, + const std::string& blas_version, const std::string& hlo) { // Key is the tuple of canonicalized hlo, compute capability major/minor, // cudnn version major/minor/patch, blas version. using MapType = absl::flat_hash_map< std::tuple, std::vector>; - static MapType* blacklist = [] { + static MapType* denylist = [] { MapType* list = new MapType(); - AlgorithmBlacklist proto; + AlgorithmDenylist proto; std::string file_path = - GetDebugOptionsFromFlags().xla_gpu_algorithm_blacklist_path(); + GetDebugOptionsFromFlags().xla_gpu_algorithm_denylist_path(); if (!file_path.empty()) { TF_CHECK_OK(tensorflow::ReadTextProto(tensorflow::Env::Default(), file_path, &proto)); } else { CHECK(tensorflow::protobuf::TextFormat::ParseFromString( - std::string(kDefaultBlacklist), &proto)); + std::string(kDefaultDenylist), &proto)); } for (const auto& entry : proto.entries()) { for (const auto& algo : entry.algos()) { @@ -77,10 +75,10 @@ GetBlacklistedConvAlgorithms(tensorflow::ComputeCapability cc, return list; }(); - auto iter = blacklist->find(std::make_tuple( + auto iter = denylist->find(std::make_tuple( hlo, cc.major(), cc.minor(), cudnn_version.major(), cudnn_version.minor(), cudnn_version.patch(), std::string(blas_version))); - if (iter != blacklist->end()) { + if (iter != denylist->end()) { return iter->second; } return {}; diff --git a/tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.h b/tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.h index c1955a452aa..73d1219c1ab 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.h +++ b/tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_HLO_ALGORITHM_BLACKLIST_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_HLO_ALGORITHM_BLACKLIST_H_ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_HLO_ALGORITHM_DENYLIST_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_HLO_ALGORITHM_DENYLIST_H_ #include @@ -24,13 +24,11 @@ limitations under the License. namespace xla { namespace gpu { -absl::Span -GetBlacklistedConvAlgorithms(tensorflow::ComputeCapability cc, - tensorflow::CudnnVersion cudnn_version, - const std::string& blas_version, - const std::string& hlo); +absl::Span GetDisabledConvAlgorithms( + tensorflow::ComputeCapability cc, tensorflow::CudnnVersion cudnn_version, + const std::string& blas_version, const std::string& hlo); } // namespace gpu } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_HLO_ALGORITHM_BLACKLIST_H_ +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_HLO_ALGORITHM_DENYLIST_H_ diff --git a/tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist_test.cc b/tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist_test.cc index bc24f486668..c4529f855c8 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist_test.cc +++ b/tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist_test.cc @@ -26,22 +26,22 @@ namespace xla { namespace gpu { namespace { -class BlacklistTest : public testing::Test { +class DenylistTest : public testing::Test { protected: - BlacklistTest() { + DenylistTest() { tensorflow::setenv( "XLA_FLAGS", absl::StrCat( - "--xla_gpu_algorithm_blacklist_path=", + "--xla_gpu_algorithm_denylist_path=", tensorflow::GetDataDependencyFilepath(tensorflow::io::JoinPath( "tensorflow", "compiler", "xla", "service", "gpu", "data", - "hlo_algorithm_blacklist.pbtxt"))) + "hlo_algorithm_denylist.pbtxt"))) .data(), 0); } }; -TEST_F(BlacklistTest, DefaultTest) { +TEST_F(DenylistTest, DefaultTest) { tensorflow::ComputeCapability cc; cc.set_major(7); cc.set_minor(0); @@ -49,7 +49,7 @@ TEST_F(BlacklistTest, DefaultTest) { cudnn_version.set_major(7); cudnn_version.set_minor(6); cudnn_version.set_patch(2); - auto list = GetBlacklistedConvAlgorithms( + auto list = GetDisabledConvAlgorithms( cc, cudnn_version, /*blas_version=*/"9000", R"((f16[256,112,112,64]{3,2,1,0}, u8[0]{0}) custom-call(f16[256,224,224,4]{3,2,1,0}, f16[7,7,4,64]{2,1,0,3}), window={size=7x7 stride=2x2 pad=3_3x3_3}, dim_labels=b01f_01io->b01f, custom_call_target="__cudnn$convForward", backend_config="{conv_result_scale:1}")"); ASSERT_EQ(4, list.size()); @@ -59,7 +59,7 @@ TEST_F(BlacklistTest, DefaultTest) { EXPECT_EQ(stream_executor::dnn::AlgorithmDesc(1, true), list[3]); } -TEST_F(BlacklistTest, NegativeTest) { +TEST_F(DenylistTest, NegativeTest) { tensorflow::ComputeCapability cc; cc.set_major(7); cc.set_minor(0); @@ -68,7 +68,7 @@ TEST_F(BlacklistTest, NegativeTest) { cudnn_version.set_minor(6); cudnn_version.set_minor(2); auto list = - GetBlacklistedConvAlgorithms(cc, cudnn_version, "9000", R"(invalid hlo)"); + GetDisabledConvAlgorithms(cc, cudnn_version, "9000", R"(invalid hlo)"); ASSERT_EQ(0, list.size()); } diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc index 2166ecdd890..e1defa313e8 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc @@ -121,9 +121,9 @@ struct Item { bool placed = false; // To avoid an infinite loop rematerializing the same set of - // instructions ad infinitum, keep a blacklist of instructions + // instructions ad infinitum, keep a denylist of instructions // which should not be rematerialized. - bool blacklisted = false; + bool denylisted = false; // The buffers defined by this instruction. BufferIdList buffers_defined; @@ -292,8 +292,8 @@ class InstructionList { InsertBeforeInstructions(to_insert, {max_position_item->next}); } - void Blacklist(const HloInstruction* inst) { - GetItem(inst)->blacklisted = true; + void Denylist(const HloInstruction* inst) { + GetItem(inst)->denylisted = true; } private: @@ -1158,13 +1158,13 @@ std::vector GetInitialBlock(const InstructionList& instruction_list, return item_block; } -// Returns whether any instruction in 'block' is blacklisted or +// Returns whether any instruction in 'block' is denylisted or // non-rematerializable. -bool AnyBlacklistedOrNonRematerializable( +bool AnyDenylistedOrNonRematerializable( const std::vector& block, absl::flat_hash_map* rematerializable_map) { for (auto* item : block) { - if (item->blacklisted) { + if (item->denylisted) { return true; } if (!CanBeRematerialized(item->instruction, rematerializable_map)) { @@ -1195,10 +1195,10 @@ MemoryUsageTracker::PickRematerializationCandidates( // instructions. break; } - // If any item in the starting block are blacklisted or non-rematable, then + // If any item in the starting block are denylisted or non-rematable, then // break and move on to next start_item (we can actually move to the last // invalid item in this block, but let's ignore that optimization for now). - if (AnyBlacklistedOrNonRematerializable(block, rematerializable_map)) { + if (AnyDenylistedOrNonRematerializable(block, rematerializable_map)) { continue; } while (block.size() <= max_block_size) { @@ -1289,8 +1289,8 @@ MemoryUsageTracker::PickRematerializationCandidates( // Time to update the block to include the next instruction. auto* last_item = block[block.size() - 1]; auto* next_item = instruction_list.next(last_item); - if (next_item == nullptr || next_item->blacklisted || - !next_item->placed || next_item == in_progress_item_ || + if (next_item == nullptr || next_item->denylisted || !next_item->placed || + next_item == in_progress_item_ || !CanBeRematerialized(next_item->instruction, rematerializable_map)) { break; } @@ -1404,7 +1404,7 @@ StatusOr RematerializeInstructions( // instruction it was a copying of. Now 'remat' is a rematerialization // of 'best' and kills 'best'. Stop rematerializing this instruction // to avoid an infinite loop. - instruction_list->Blacklist(remat); + instruction_list->Denylist(remat); } remat_move_instructions->insert(remat); } else { @@ -1460,8 +1460,8 @@ StatusOr CompressInstruction(MemoryUsageTracker* memory_tracker, place_before.push_back(instruction_list->GetItem(user)); } - instruction_list->Blacklist(compressed_item->instruction); - instruction_list->Blacklist(uncompressed_item->instruction); + instruction_list->Denylist(compressed_item->instruction); + instruction_list->Denylist(uncompressed_item->instruction); instruction_list->InsertBeforeInstructions(uncompressed_item, place_before); @@ -1583,7 +1583,7 @@ StatusOr HloRematerialization::RematerializeComputation( // rematerialization is added to 'remat_move_instructions' (the // rematerialization is essentially a move). If the next rematerialization of // the instruction is also a move then the rematerialization is added to the - // blacklist. + // denylist. absl::flat_hash_set remat_move_instructions; // The map from instructions to their rematerializable status. diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto index 42b6ea6bd53..6b9917eac53 100644 --- a/tensorflow/compiler/xla/xla.proto +++ b/tensorflow/compiler/xla/xla.proto @@ -270,8 +270,8 @@ message DebugOptions { // Paths to files with ptx code. repeated string xla_gpu_ptx_file = 127; - // Blacklist for cuDNN convolutions. - string xla_gpu_algorithm_blacklist_path = 128; + // Denylist for cuDNN convolutions. + string xla_gpu_algorithm_denylist_path = 128; // Guarantee run-to-run determinism from reductions on XLA:GPU. bool xla_gpu_deterministic_reductions = 130; diff --git a/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc b/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc index 252eb3c885c..94907d2ee6c 100644 --- a/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc +++ b/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc @@ -293,7 +293,7 @@ class NodeTypeAttrMap { } // Note that the mappings generated here include inputs/outputs with fixed // types. This makes the mappings complete (all inputs and outputs are - // included), and allows the graph rewriter to propagate black paint + // included), and allows the graph rewriter to propagate deny paint // from/through ops with fixed types. io2type_entry.first.reserve(input_arg_inds.size()); for (int i = 0; i < static_cast(input_arg_inds.size()); ++i) { @@ -843,10 +843,10 @@ DataTypeSet AllowedDataTypes(const OpDef& op_def, const TypeAttrId& t_attr_id) { } Status ValidateLists(const gtl::FlatSet& allow_list, - const gtl::FlatSet& black_list, - const gtl::FlatSet& gray_list, + const gtl::FlatSet& deny_list, + const gtl::FlatSet& infer_list, const gtl::FlatSet& clear_list) { - std::vector> lists{allow_list, black_list, gray_list, + std::vector> lists{allow_list, deny_list, infer_list, clear_list}; std::multiset counts; for (const auto& list : lists) { @@ -967,23 +967,23 @@ class AutoMixedPrecisionImpl { bool SupportsF16(const NodeTypeId& node_type) const; const NodeTypeId* GetTensorListFloat32NodeTypeId(const NodeDef& node) const; bool IsSourceOrSinkOp(const string& op) const; - void FindFloat32TensorListOpClustersAndBlacklistUnsafe( + void FindFloat32TensorListOpClustersAndDenylistUnsafe( std::vector>* clusters, - absl::flat_hash_set* black_set) const; + absl::flat_hash_set* deny_set) const; void FindTensorListImplicitFloat32Edges( const absl::flat_hash_set& tensor_list_nodes, std::vector* implicit_data_edges) const; void AddAllowlistOps(absl::flat_hash_set* allow_set) const; - void PropagateBlackFwdThroughClearAndGray( - absl::flat_hash_set* black_set) const; + void PropagateDenyFwdThroughClearAndInfer( + absl::flat_hash_set* deny_set) const; void ForceColorMatchBetweenTensorListOps( const absl::flat_hash_set& tensor_list_nodes, absl::flat_hash_set* allow_set, - absl::flat_hash_set* black_set) const; - void AddClearAndGrayToAllowIfBetweenAllow( - const absl::flat_hash_set& black_set, + absl::flat_hash_set* deny_set) const; + void AddClearAndInferToAllowIfBetweenAllow( + const absl::flat_hash_set& deny_set, absl::flat_hash_set* allow_set) const; - void PropagateAllowThroughClear(const absl::flat_hash_set& black_set, + void PropagateAllowThroughClear(const absl::flat_hash_set& deny_set, absl::flat_hash_set* allow_set) const; Status ForceColorMatchOnRecurrentEdges( absl::flat_hash_set* allow_set) const; @@ -1006,8 +1006,8 @@ class AutoMixedPrecisionImpl { bool force_all_fp16_; AutoMixedPrecisionMode mode_; gtl::FlatSet f16_allowlist_; - gtl::FlatSet f16_blacklist_; - gtl::FlatSet f16_graylist_; + gtl::FlatSet f16_denylist_; + gtl::FlatSet f16_inferlist_; gtl::FlatSet f16_clearlist_; absl::flat_hash_set should_process_nodes_; DataType target_dtype_; // Either DT_HALF or DT_BFLOAT16 @@ -1083,12 +1083,12 @@ Status AutoMixedPrecisionImpl::PrintDebugLogs(bool preop, size_t timestamp) { for (const auto& x : mp_lists->AllowList()) { f << x << "\n"; } - f << "\nBlackList:\n"; - for (const auto& x : mp_lists->BlackList()) { + f << "\nDenyList:\n"; + for (const auto& x : mp_lists->DenyList()) { f << x << "\n"; } - f << "\nGrayList:\n"; - for (const auto& x : mp_lists->GrayList()) { + f << "\nInferList:\n"; + for (const auto& x : mp_lists->InferList()) { f << x << "\n"; } f << "\nClearList:\n"; @@ -1255,11 +1255,11 @@ Status AutoMixedPrecisionImpl::Optimize() { std::unique_ptr mp_lists = get_mixed_precision_lists(); f16_allowlist_ = mp_lists->AllowList(); - f16_blacklist_ = mp_lists->BlackList(); - f16_graylist_ = mp_lists->GrayList(); + f16_denylist_ = mp_lists->DenyList(); + f16_inferlist_ = mp_lists->InferList(); f16_clearlist_ = mp_lists->ClearList(); - TF_RETURN_IF_ERROR(ValidateLists(f16_allowlist_, f16_blacklist_, - f16_graylist_, f16_clearlist_)); + TF_RETURN_IF_ERROR(ValidateLists(f16_allowlist_, f16_denylist_, + f16_inferlist_, f16_clearlist_)); size_t timestamp = Env::Default()->NowMicros() / 1000; TF_RETURN_IF_ERROR(PrintDebugLogs(/* preop = */ true, timestamp)); @@ -1294,11 +1294,11 @@ Status AutoMixedPrecisionImpl::Optimize() { TF_RETURN_IF_ERROR( graph_type_view_.InitializeFromGraph(*graph_, node_type_map_)); - absl::flat_hash_set black_set; + absl::flat_hash_set deny_set; std::vector> tensor_list_clusters; - FindFloat32TensorListOpClustersAndBlacklistUnsafe(&tensor_list_clusters, - &black_set); + FindFloat32TensorListOpClustersAndDenylistUnsafe(&tensor_list_clusters, + &deny_set); std::vector ephemeral_edges; for (const auto& cluster : tensor_list_clusters) { VLOG(1) << "Found safe Tensor List cluster of size " << cluster.size(); @@ -1320,14 +1320,14 @@ Status AutoMixedPrecisionImpl::Optimize() { // This is done under the assumption that allowlist ops are always // numerically-safe in f16 and that they are the most important ops for // improving performance. - // 2) Add nodes to the black_set iff they are numerically-dangerous (aka - // "blacklist" ops) or they are on a forward path from a blacklist node to - // a black/gray node (including the node at the end of the path) through - // non-numerically-dangerous ops (aka "greylist" and "clearlist" ops). + // 2) Add nodes to the deny_set iff they are numerically-dangerous (aka + // "denylist" ops) or they are on a forward path from a denylist node to + // a deny/infer node (including the node at the end of the path) through + // non-numerically-dangerous ops (aka "inferlist" and "clearlist" ops). // This is done to prevent numerically-dangerous ops and their downstream // effects from being changed to f16, which would risk breaking the // numerical accuracy of the model. - // 3) For all remaining nodes that are not considered dangerous (greylist + // 3) For all remaining nodes that are not considered dangerous (inferlist // and clearlist ops), find those that are between (i.e., both upstream // and downstream of) allow nodes, and add them to the allow_set. // This is done to avoid unnecessary casts between allowlist ops. @@ -1346,29 +1346,29 @@ Status AutoMixedPrecisionImpl::Optimize() { return Status::OK(); } - VLOG(2) << "Beginning pass 2 to propagate black forwards from blacklist ops " - "through clear/graylist ops"; - PropagateBlackFwdThroughClearAndGray(&black_set); + VLOG(2) << "Beginning pass 2 to propagate deny forwards from denylist ops " + "through clear/inferlist ops"; + PropagateDenyFwdThroughClearAndInfer(&deny_set); VLOG(2) << "Finished pass 2"; VLOG(2) << "Forcing color match between data structure ops"; for (const auto& cluster : tensor_list_clusters) { - ForceColorMatchBetweenTensorListOps(cluster, &allow_set, &black_set); + ForceColorMatchBetweenTensorListOps(cluster, &allow_set, &deny_set); } - VLOG(2) << "Beginning pass 3 to set clear and gray nodes to allow if they " + VLOG(2) << "Beginning pass 3 to set clear and infer nodes to allow if they " "are between allow ops"; - AddClearAndGrayToAllowIfBetweenAllow(black_set, &allow_set); + AddClearAndInferToAllowIfBetweenAllow(deny_set, &allow_set); VLOG(2) << "Finished pass 3"; VLOG(2) << "Beginning pass 4 to propagate allow from allow nodes through " "clearlist ops"; - PropagateAllowThroughClear(black_set, &allow_set); + PropagateAllowThroughClear(deny_set, &allow_set); VLOG(2) << "Finished pass 4"; VLOG(2) << "Forcing color match between data structure ops"; for (const auto& cluster : tensor_list_clusters) { - ForceColorMatchBetweenTensorListOps(cluster, &allow_set, &black_set); + ForceColorMatchBetweenTensorListOps(cluster, &allow_set, &deny_set); } VLOG(2) << "Forcing color match on loop edges"; @@ -1426,11 +1426,11 @@ bool AutoMixedPrecisionImpl::IsSourceOrSinkOp(const string& op) const { // Finds all clusters of float32 Tensor List nodes that are connected via their // handle edges. Unsafe clusters (those with unprocessable nodes, or with edges // that cross untraversable boundaries via _Arg, _Ret, PartitionedCall etc. -// nodes) are added to black_set. The caller should paint all nodes in a cluster +// nodes) are added to deny_set. The caller should paint all nodes in a cluster // the same color, as they may all refer to the same Tensor List. -void AutoMixedPrecisionImpl::FindFloat32TensorListOpClustersAndBlacklistUnsafe( +void AutoMixedPrecisionImpl::FindFloat32TensorListOpClustersAndDenylistUnsafe( std::vector>* tensor_list_clusters, - absl::flat_hash_set* black_set) const { + absl::flat_hash_set* deny_set) const { absl::flat_hash_set tensor_list_prop_set; for (int root_idx = 0; root_idx < graph_type_view_.num_nodes(); ++root_idx) { const NodeTypeId& root = *graph_type_view_.GetNode(root_idx); @@ -1463,7 +1463,7 @@ void AutoMixedPrecisionImpl::FindFloat32TensorListOpClustersAndBlacklistUnsafe( cluster.insert(node); if (!ShouldProcess(*node)) { // The cluster contains an un-processable node. - black_set->insert(root_fp32_idx); + deny_set->insert(root_fp32_idx); } // TODO(benbarsdell): In a theoretical pathological // case of a Tensor List of Tensor List handles, the @@ -1471,7 +1471,7 @@ void AutoMixedPrecisionImpl::FindFloat32TensorListOpClustersAndBlacklistUnsafe( // sink. } else if (IsSourceOrSinkOp(node->op())) { // The cluster crosses an untraversable boundary. - black_set->insert(root_fp32_idx); + deny_set->insert(root_fp32_idx); } })); tensor_list_clusters->push_back(cluster); @@ -1534,21 +1534,21 @@ void AutoMixedPrecisionImpl::AddAllowlistOps( } } -// Adds nodes to black_set iff they are on the blacklist or they are on a -// forward path from a blacklist node to a black/gray node (including the node -// at the end of the path) through clear and gray nodes. -// E.g., black -> gray -> clear -> gray -> clear -> allow -> gray -// becomes: black -> black -> black -> black -> clear -> allow -> gray. -void AutoMixedPrecisionImpl::PropagateBlackFwdThroughClearAndGray( - absl::flat_hash_set* black_set) const { +// Adds nodes to deny_set iff they are on the denylist or they are on a +// forward path from a denylist node to a deny/infer node (including the node +// at the end of the path) through clear and infer nodes. +// E.g., deny -> infer -> clear -> infer -> clear -> allow -> infer +// becomes: deny -> deny -> deny -> deny -> clear -> allow -> infer. +void AutoMixedPrecisionImpl::PropagateDenyFwdThroughClearAndInfer( + absl::flat_hash_set* deny_set) const { if (force_all_fp16_) return; - // Find clear nodes that are upstream of black or gray. - absl::flat_hash_set upstream_of_black_or_gray_set; + // Find clear nodes that are upstream of deny or infer. + absl::flat_hash_set upstream_of_deny_or_infer_set; for (int root_idx = 0; root_idx < graph_type_view_.num_nodes(); ++root_idx) { const NodeTypeId& root = *graph_type_view_.GetNode(root_idx); - if (!(f16_blacklist_.count(root.node->op()) || - f16_graylist_.count(root.node->op()))) { + if (!(f16_denylist_.count(root.node->op()) || + f16_inferlist_.count(root.node->op()))) { continue; } DfsTypeTraversal(graph_type_view_, {&root}, @@ -1556,42 +1556,42 @@ void AutoMixedPrecisionImpl::PropagateBlackFwdThroughClearAndGray( DfsTypePredicates::Enter([&](int idx) -> bool { const NodeTypeId& item = *graph_type_view_.GetNode(idx); return idx == root_idx || - (!upstream_of_black_or_gray_set.count(idx) && + (!upstream_of_deny_or_infer_set.count(idx) && f16_clearlist_.count(item.node->op())); }), DfsTypeCallbacks::PreOrder([&](int idx) { - upstream_of_black_or_gray_set.insert(idx); + upstream_of_deny_or_infer_set.insert(idx); })); } - // Propagate black forward through nodes in upstream_of_black_or_gray_set. + // Propagate deny forward through nodes in upstream_of_deny_or_infer_set. for (int root_idx = 0; root_idx < graph_type_view_.num_nodes(); ++root_idx) { const NodeTypeId& root = *graph_type_view_.GetNode(root_idx); - if (black_set->count(root_idx) || !f16_blacklist_.count(root.node->op())) { + if (deny_set->count(root_idx) || !f16_denylist_.count(root.node->op())) { continue; } DfsTypeTraversal( graph_type_view_, {&root}, TypeTraversalDirection::kFollowOutputs, DfsTypePredicates::Enter([&](int idx) -> bool { - return idx == root_idx || (!black_set->count(idx) && - upstream_of_black_or_gray_set.count(idx)); + return idx == root_idx || (!deny_set->count(idx) && + upstream_of_deny_or_infer_set.count(idx)); }), DfsTypeCallbacks::PreOrder([&](int idx) { - bool inserted = black_set->insert(idx).second; + bool inserted = deny_set->insert(idx).second; if (VLOG_IS_ON(2) && inserted) { const NodeTypeId& item = *graph_type_view_.GetNode(idx); VLOG(2) << "Painting type " << item.type_attr.DebugString() << " of " << item.node->op() << " node " - << item.node->name() << " BLACK"; + << item.node->name() << " DENY"; } })); } } -void AutoMixedPrecisionImpl::AddClearAndGrayToAllowIfBetweenAllow( - const absl::flat_hash_set& black_set, +void AutoMixedPrecisionImpl::AddClearAndInferToAllowIfBetweenAllow( + const absl::flat_hash_set& deny_set, absl::flat_hash_set* allow_set) const { - // Find clear/graylist ops that are downstream of allow ops. + // Find clear/inferlist ops that are downstream of allow ops. absl::flat_hash_set downstream_of_allow_set; for (int root_idx = 0; root_idx < graph_type_view_.num_nodes(); ++root_idx) { const NodeTypeId& root = *graph_type_view_.GetNode(root_idx); @@ -1605,13 +1605,13 @@ void AutoMixedPrecisionImpl::AddClearAndGrayToAllowIfBetweenAllow( return idx == root_idx || (!downstream_of_allow_set.count(idx) && !f16_allowlist_.count(item.node->op()) && - !black_set.count(idx) && ShouldProcess(*item.node) && + !deny_set.count(idx) && ShouldProcess(*item.node) && // TODO(benbarsdell): Consider allowing propagation through // ops that are already float16 in order to reduce the number // of casts. IsFloat32(item) && SupportsF16(item) && (f16_clearlist_.count(item.node->op()) || - f16_graylist_.count(item.node->op()))); + f16_inferlist_.count(item.node->op()))); }), DfsTypeCallbacks::PreOrder( [&](int idx) { downstream_of_allow_set.insert(idx); })); @@ -1645,7 +1645,7 @@ void AutoMixedPrecisionImpl::AddClearAndGrayToAllowIfBetweenAllow( } void AutoMixedPrecisionImpl::PropagateAllowThroughClear( - const absl::flat_hash_set& black_set, + const absl::flat_hash_set& deny_set, absl::flat_hash_set* allow_set) const { // Propagate allow from allow nodes through clearlist ops. absl::flat_hash_set clear_prop_set; @@ -1661,7 +1661,7 @@ void AutoMixedPrecisionImpl::PropagateAllowThroughClear( DfsTypePredicates::Enter([&](int idx) -> bool { const NodeTypeId& item = *graph_type_view_.GetNode(idx); return idx == root_idx || - (!allow_set->count(idx) && !black_set.count(idx) && + (!allow_set->count(idx) && !deny_set.count(idx) && ShouldProcess(*item.node) && IsFloat32(item) && SupportsF16(item) && (f16_clearlist_.count(item.node->op())) && @@ -1727,14 +1727,14 @@ Status AutoMixedPrecisionImpl::ForceColorMatchOnRecurrentEdges( if (allow_set->erase(merge_idx)) { VLOG(2) << "Painting type T of Merge node " << graph_type_view_.GetNode(merge_idx)->node->name() - << " BLACK to match the color of its sibling Merge nodes " + << " DENY to match the color of its sibling Merge nodes " "with common NextIteration node " << node.name(); } } if (allow_set->erase(nextiter_idx)) { VLOG(2) << "Painting type T of NextIteration node " << node.name() - << " BLACK to match the color of its output Merge node(s)"; + << " DENY to match the color of its output Merge node(s)"; } } else { if (allow_set->insert(nextiter_idx).second) { @@ -1751,8 +1751,8 @@ Status AutoMixedPrecisionImpl::ForceColorMatchOnRecurrentEdges( void AutoMixedPrecisionImpl::ForceColorMatchBetweenTensorListOps( const absl::flat_hash_set& tensor_list_nodes, absl::flat_hash_set* allow_set, - absl::flat_hash_set* black_set) const { - bool any_black = false; + absl::flat_hash_set* deny_set) const { + bool any_deny = false; bool any_allow = false; std::vector node_type_idxs; node_type_idxs.reserve(tensor_list_nodes.size()); @@ -1766,24 +1766,24 @@ void AutoMixedPrecisionImpl::ForceColorMatchBetweenTensorListOps( node_type_idxs.push_back(maybe_node_type_idx.value()); } for (int node_type_idx : node_type_idxs) { - if (black_set->count(node_type_idx)) { - any_black = true; + if (deny_set->count(node_type_idx)) { + any_deny = true; break; } else if (allow_set->count(node_type_idx)) { any_allow = true; } } - if (!any_black && !any_allow) return; + if (!any_deny && !any_allow) return; for (int node_type_idx : node_type_idxs) { const NodeTypeId& node_type = *graph_type_view_.GetNode(node_type_idx); VLOG(2) << "Painting type " << node_type.type_attr.DebugString() << " of " << node_type.node->op() << " node " << node_type.node->name() << " " - << (any_black ? "BLACK" : "ALLOW") + << (any_deny ? "DENY" : "ALLOW") << " because at least one of its siblings is " - << (any_black ? "BLACK" : "ALLOW"); - if (any_black) { + << (any_deny ? "DENY" : "ALLOW"); + if (any_deny) { allow_set->erase(node_type_idx); - black_set->insert(node_type_idx); + deny_set->insert(node_type_idx); } else { allow_set->insert(node_type_idx); } diff --git a/tensorflow/core/grappler/optimizers/auto_mixed_precision_lists.h b/tensorflow/core/grappler/optimizers/auto_mixed_precision_lists.h index 6643149a6e5..805a7de9225 100644 --- a/tensorflow/core/grappler/optimizers/auto_mixed_precision_lists.h +++ b/tensorflow/core/grappler/optimizers/auto_mixed_precision_lists.h @@ -23,7 +23,7 @@ limitations under the License. namespace tensorflow { namespace grappler { -// Represents the four lists of ops: the allow list, gray list, black list, and +// Represents the four lists of ops: the allow list, infer list, deny list, and // clear list. These lists determine which ops are converted to fp16/bf16 // (referred to as 'f16' for short) and which ops stay as fp32. class AutoMixedPrecisionLists { @@ -36,13 +36,13 @@ class AutoMixedPrecisionLists { virtual gtl::FlatSet AllowList() = 0; // Returns the set of ops that can run in f16 and are considered numerically- // safe (for execution in f16), but which may be made unsafe by an upstream - // blacklist op. - virtual gtl::FlatSet GrayList() = 0; + // denylist op. + virtual gtl::FlatSet InferList() = 0; // Returns the set of ops that are considered numerically-dangerous (i.e., // unsafe for execution in f16) and whose effects may also be observed in // downstream nodes (e.g. for f16, in Exp -> Add, the Add is unsafe due to // the Exp). - virtual gtl::FlatSet BlackList() = 0; + virtual gtl::FlatSet DenyList() = 0; // Returns the set of ops that do not have numerically-significant effects // (i.e., they are always considered safe for execution in f16 precision), and // can run in f16. @@ -51,10 +51,11 @@ class AutoMixedPrecisionLists { protected: // Adds or removes ops from list if certain environmental variables are set. static void UpdateList(const string& list_name, gtl::FlatSet* list) { - CHECK(list_name == "ALLOWLIST" || list_name == "GRAYLIST" || // Crash OK. - list_name == "BLACKLIST" || list_name == "CLEARLIST" || + CHECK(list_name == "ALLOWLIST" || list_name == "INFERLIST" || // Crash OK. + list_name == "DENYLIST" || list_name == "CLEARLIST" || // TODO(reedwm): for bkwds compat; remove when no longer necessary: - list_name == "WHITELIST"); + list_name == "WHITELIST" || list_name == "GRAYLIST" || + list_name == "BLACKLIST"); string add_env_var = "TF_AUTO_MIXED_PRECISION_GRAPH_REWRITE_" + list_name + "_ADD"; string remove_env_var = @@ -154,7 +155,7 @@ class AutoMixedPrecisionListsCuda : public AutoMixedPrecisionLists { return list; } - gtl::FlatSet GrayList() override { + gtl::FlatSet InferList() override { if (IsPseudoFastMath()) { return gtl::FlatSet{}; } @@ -204,11 +205,14 @@ class AutoMixedPrecisionListsCuda : public AutoMixedPrecisionLists { "Tanh", "TanhGrad", }; + UpdateList("INFERLIST", &list); + // For backwards compatibility, keeping the original env variable here. + // TODO(reedwm): This should be removed if we don't have active users. UpdateList("GRAYLIST", &list); return list; } - gtl::FlatSet BlackList() override { + gtl::FlatSet DenyList() override { if (IsPseudoFastMath()) { return gtl::FlatSet{}; } @@ -224,6 +228,9 @@ class AutoMixedPrecisionListsCuda : public AutoMixedPrecisionLists { "SparseSoftmaxCrossEntropyWithLogits", "Sum", }; + UpdateList("DENYLIST", &list); + // For backwards compatibility, keeping the original env variable here. + // TODO(reedwm): This should be removed if we don't have active users. UpdateList("BLACKLIST", &list); return list; } @@ -344,7 +351,7 @@ class AutoMixedPrecisionListsMkl : public AutoMixedPrecisionLists { AutoMixedPrecisionListsMkl() {} // Only ops which are supported by MKL in bfloat16 should be added to the - // allow list, gray list, or clear list. + // allow list, infer list, or clear list. gtl::FlatSet AllowList() override { auto list = gtl::FlatSet{"Conv2D", "Conv2DBackpropFilter", @@ -360,10 +367,13 @@ class AutoMixedPrecisionListsMkl : public AutoMixedPrecisionLists { "BatchMatMulV2"}; UpdateList("ALLOWLIST", &list); + // For backwards compatibility, keeping the original env variable here. + // TODO(reedwm): This should be removed if we don't have active users. + UpdateList("WHITELIST", &list); return list; } - gtl::FlatSet GrayList() override { + gtl::FlatSet InferList() override { auto list = gtl::FlatSet{ "Add", "AddN", @@ -384,11 +394,14 @@ class AutoMixedPrecisionListsMkl : public AutoMixedPrecisionLists { "Mul", "Sub", }; + UpdateList("INFERLIST", &list); + // For backwards compatibility, keeping the original env variable here. + // TODO(reedwm): This should be removed if we don't have active users. UpdateList("GRAYLIST", &list); return list; } - gtl::FlatSet BlackList() override { + gtl::FlatSet DenyList() override { auto list = gtl::FlatSet{ "Exp", "Expm1", @@ -401,6 +414,9 @@ class AutoMixedPrecisionListsMkl : public AutoMixedPrecisionLists { "SparseSoftmaxCrossEntropyWithLogits", "Sum", }; + UpdateList("DENYLIST", &list); + // For backwards compatibility, keeping the original env variable here. + // TODO(reedwm): This should be removed if we don't have active users. UpdateList("BLACKLIST", &list); return list; } diff --git a/tensorflow/core/grappler/optimizers/auto_mixed_precision_test.cc b/tensorflow/core/grappler/optimizers/auto_mixed_precision_test.cc index eef1f4c499a..90c8bc82b70 100644 --- a/tensorflow/core/grappler/optimizers/auto_mixed_precision_test.cc +++ b/tensorflow/core/grappler/optimizers/auto_mixed_precision_test.cc @@ -160,7 +160,7 @@ class AutoMixedPrecisionTest : public GrapplerTest { return AddNode(name, op, inputs, attributes, graph); } - void TestSimpleUnaryGrayOp( + void TestSimpleUnaryInferOp( double input_min, double input_max, double atol, double rtol, const std::function& test_op_factory) { @@ -170,8 +170,8 @@ class AutoMixedPrecisionTest : public GrapplerTest { GenerateIdentityMatrix(size, size)); Output input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT); Output allow1 = ops::MatMul(s.WithOpName("allow1"), input, eye); - Output gry1 = test_op_factory(s.WithOpName("gry1"), allow1); - Output allow2 = ops::MatMul(s.WithOpName("allow2"), gry1, eye); + Output infer1 = test_op_factory(s.WithOpName("infer1"), allow1); + Output allow2 = ops::MatMul(s.WithOpName("allow2"), infer1, eye); Output fetch1 = ops::Identity(s.WithOpName("fetch1"), allow2); GrapplerItem item; item.fetch = {"fetch1"}; @@ -191,7 +191,7 @@ class AutoMixedPrecisionTest : public GrapplerTest { EXPECT_EQ(output_view.GetNode("input")->attr().at("dtype").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("allow1")->attr().at("T").type(), DT_HALF); - EXPECT_EQ(output_view.GetNode("gry1")->attr().at("T").type(), DT_HALF); + EXPECT_EQ(output_view.GetNode("infer1")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("allow2")->attr().at("T").type(), DT_HALF); auto tensors = EvaluateNodes(output, item.fetch, feed); @@ -209,10 +209,10 @@ class AutoMixedPrecisionTest : public GrapplerTest { TEST_F(AutoMixedPrecisionTest, NoOp) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output input = ops::Const(s.WithOpName("input"), 1.234f, {32}); - Output blk1 = ops::Exp(s.WithOpName("blk1"), input); - Output clr1 = ops::Relu(s.WithOpName("clr1"), blk1); - Output gry1 = ops::Sqrt(s.WithOpName("gry1"), clr1); - Output clr2 = ops::Relu(s.WithOpName("clr2"), gry1); + Output deny1 = ops::Exp(s.WithOpName("deny1"), input); + Output clr1 = ops::Relu(s.WithOpName("clr1"), deny1); + Output infer1 = ops::Sqrt(s.WithOpName("infer1"), clr1); + Output clr2 = ops::Relu(s.WithOpName("clr2"), infer1); Output fetch = ops::Identity(s.WithOpName("fetch"), clr2); GrapplerItem item; @@ -230,9 +230,9 @@ TEST_F(AutoMixedPrecisionTest, NoOp) { GraphView output_view(&output); EXPECT_EQ(output_view.GetNode("input")->attr().at("dtype").type(), DT_FLOAT); - EXPECT_EQ(output_view.GetNode("blk1")->attr().at("T").type(), DT_FLOAT); + EXPECT_EQ(output_view.GetNode("deny1")->attr().at("T").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("clr1")->attr().at("T").type(), DT_FLOAT); - EXPECT_EQ(output_view.GetNode("gry1")->attr().at("T").type(), DT_FLOAT); + EXPECT_EQ(output_view.GetNode("infer1")->attr().at("T").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("clr2")->attr().at("T").type(), DT_FLOAT); auto tensors = EvaluateNodes(output, item.fetch); @@ -284,16 +284,16 @@ TEST_F(AutoMixedPrecisionTest, AlreadyFp16) { TEST_F(AutoMixedPrecisionTest, Simple) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output input = ops::Const(s.WithOpName("input"), 1.f / 32, {32, 32}); - Output blk1 = ops::Exp(s.WithOpName("blk1"), input); - Output clr1 = ops::Relu(s.WithOpName("clr1"), blk1); - Output gry1 = ops::Sqrt(s.WithOpName("gry1"), clr1); - Output clr2 = ops::Relu(s.WithOpName("clr2"), gry1); + Output deny1 = ops::Exp(s.WithOpName("deny1"), input); + Output clr1 = ops::Relu(s.WithOpName("clr1"), deny1); + Output infer1 = ops::Sqrt(s.WithOpName("infer1"), clr1); + Output clr2 = ops::Relu(s.WithOpName("clr2"), infer1); Output allow1 = ops::MatMul(s.WithOpName("allow1"), clr2, clr2); Output clr3 = ops::Relu(s.WithOpName("clr3"), allow1); - Output gry2 = ops::Log(s.WithOpName("gry2"), clr3); - Output clr4 = ops::Relu(s.WithOpName("clr4"), gry2); - Output blk2 = ops::SparseMatMul(s.WithOpName("blk2"), clr4, clr4); - Output clr5 = ops::Relu(s.WithOpName("clr5"), blk2); + Output infer2 = ops::Log(s.WithOpName("infer2"), clr3); + Output clr4 = ops::Relu(s.WithOpName("clr4"), infer2); + Output deny2 = ops::SparseMatMul(s.WithOpName("deny2"), clr4, clr4); + Output clr5 = ops::Relu(s.WithOpName("clr5"), deny2); Output fetch = ops::Identity(s.WithOpName("fetch"), clr5); GrapplerItem item; @@ -310,16 +310,16 @@ TEST_F(AutoMixedPrecisionTest, Simple) { GraphView output_view(&output); EXPECT_EQ(output.node_size(), item.graph.node_size() + 2); EXPECT_EQ(output_view.GetNode("input")->attr().at("dtype").type(), DT_FLOAT); - EXPECT_EQ(output_view.GetNode("blk1")->attr().at("T").type(), DT_FLOAT); + EXPECT_EQ(output_view.GetNode("deny1")->attr().at("T").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("clr1")->attr().at("T").type(), DT_FLOAT); - EXPECT_EQ(output_view.GetNode("gry1")->attr().at("T").type(), DT_FLOAT); + EXPECT_EQ(output_view.GetNode("infer1")->attr().at("T").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("clr2")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("allow1")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("clr3")->attr().at("T").type(), DT_HALF); - EXPECT_EQ(output_view.GetNode("gry2")->attr().at("T").type(), DT_FLOAT); + EXPECT_EQ(output_view.GetNode("infer2")->attr().at("T").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("clr4")->attr().at("T").type(), DT_FLOAT); - EXPECT_EQ(output_view.GetNode("blk2")->attr().at("Ta").type(), DT_FLOAT); - EXPECT_EQ(output_view.GetNode("blk2")->attr().at("Tb").type(), DT_FLOAT); + EXPECT_EQ(output_view.GetNode("deny2")->attr().at("Ta").type(), DT_FLOAT); + EXPECT_EQ(output_view.GetNode("deny2")->attr().at("Tb").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("clr5")->attr().at("T").type(), DT_FLOAT); auto tensors = EvaluateNodes(output, item.fetch); @@ -374,13 +374,13 @@ TEST_F(AutoMixedPrecisionTest, PreserveFetches) { Output input = ops::Const(s.WithOpName("input"), 1.f / 32, {32, 32}); Output allow1 = ops::MatMul(s.WithOpName("allow1"), input, input); Output clr1 = ops::Relu(s.WithOpName("clr1"), allow1); - Output gry1 = ops::Sqrt(s.WithOpName("gry1"), clr1); - Output blk1 = ops::Exp(s.WithOpName("blk1"), gry1); - Output clr2 = ops::Relu(s.WithOpName("clr2"), blk1); + Output infer1 = ops::Sqrt(s.WithOpName("infer1"), clr1); + Output deny1 = ops::Exp(s.WithOpName("deny1"), infer1); + Output clr2 = ops::Relu(s.WithOpName("clr2"), deny1); Output allow2 = ops::MatMul(s.WithOpName("allow2"), clr2, clr2); Output clr3 = ops::Relu(s.WithOpName("clr3"), allow2); - Output blk2 = ops::Exp(s.WithOpName("blk2"), clr3); - Output clr4 = ops::Relu(s.WithOpName("clr4"), blk2); + Output deny2 = ops::Exp(s.WithOpName("deny2"), clr3); + Output clr4 = ops::Relu(s.WithOpName("clr4"), deny2); GrapplerItem item; item.fetch = {"allow1", "clr2", "clr3"}; @@ -398,12 +398,12 @@ TEST_F(AutoMixedPrecisionTest, PreserveFetches) { EXPECT_EQ(output_view.GetNode("input")->attr().at("dtype").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("allow1")->attr().at("T").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("clr1")->attr().at("T").type(), DT_FLOAT); - EXPECT_EQ(output_view.GetNode("gry1")->attr().at("T").type(), DT_FLOAT); - EXPECT_EQ(output_view.GetNode("blk1")->attr().at("T").type(), DT_FLOAT); + EXPECT_EQ(output_view.GetNode("infer1")->attr().at("T").type(), DT_FLOAT); + EXPECT_EQ(output_view.GetNode("deny1")->attr().at("T").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("clr2")->attr().at("T").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("allow2")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("clr3")->attr().at("T").type(), DT_FLOAT); - EXPECT_EQ(output_view.GetNode("blk2")->attr().at("T").type(), DT_FLOAT); + EXPECT_EQ(output_view.GetNode("deny2")->attr().at("T").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("clr4")->attr().at("T").type(), DT_FLOAT); auto tensors = EvaluateNodes(output, item.fetch); @@ -419,11 +419,11 @@ TEST_F(AutoMixedPrecisionTest, PreserveCPUNodes) { Output input = ops::Const(s.WithOpName("input"), 1.f / 32, {32, 32}); Output clr1 = ops::Relu(s.WithOpName("clr1"), input); Output allow1 = ops::MatMul(s.WithOpName("allow1"), clr1, clr1); - Output gry1 = ops::Tanh(s.WithOpName("gry1"), allow1); + Output infer1 = ops::Tanh(s.WithOpName("infer1"), allow1); Output allow2 = ops::MatMul(s.WithOpName("allow2").WithDevice( "/job:localhost/replica:0/task:0/device:CPU:0"), - gry1, gry1); + infer1, infer1); Output clr2 = ops::Relu(s.WithOpName("clr2"), allow2); Output fetch = ops::Identity(s.WithOpName("fetch"), clr2); @@ -443,7 +443,7 @@ TEST_F(AutoMixedPrecisionTest, PreserveCPUNodes) { EXPECT_EQ(output_view.GetNode("input")->attr().at("dtype").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("clr1")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("allow1")->attr().at("T").type(), DT_HALF); - EXPECT_EQ(output_view.GetNode("gry1")->attr().at("T").type(), DT_FLOAT); + EXPECT_EQ(output_view.GetNode("infer1")->attr().at("T").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("allow2")->attr().at("T").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("clr2")->attr().at("T").type(), DT_FLOAT); @@ -521,9 +521,9 @@ TEST_F(AutoMixedPrecisionTest, FusedBatchNorm) { s.WithOpName("bng1"), fbn1, allow1, scale, fbn1_rs1, fbn1_rs2, ops::FusedBatchNormGrad::DataFormat("NHWC")) .x_backprop; - Output gry1 = ops::Add(s.WithOpName("gry1"), fbn1, bng1); + Output infer1 = ops::Add(s.WithOpName("infer1"), fbn1, bng1); Output allow2 = - ops::Conv2D(s.WithOpName("allow2"), gry1, weight, {1, 1, 1, 1}, "SAME", + ops::Conv2D(s.WithOpName("allow2"), infer1, weight, {1, 1, 1, 1}, "SAME", ops::Conv2D::DataFormat("NHWC")); Output fetch = ops::Identity(s.WithOpName("fetch"), allow2); @@ -547,7 +547,7 @@ TEST_F(AutoMixedPrecisionTest, FusedBatchNorm) { EXPECT_EQ(output_view.GetNode("bng1")->op(), "FusedBatchNormGradV2"); EXPECT_EQ(output_view.GetNode("bng1")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("bng1")->attr().at("U").type(), DT_FLOAT); - EXPECT_EQ(output_view.GetNode("gry1")->attr().at("T").type(), DT_HALF); + EXPECT_EQ(output_view.GetNode("infer1")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("allow2")->attr().at("T").type(), DT_HALF); auto tensors = EvaluateNodes(output, item.fetch); @@ -563,10 +563,10 @@ TEST_F(AutoMixedPrecisionTest, RepeatedAndListTypeAttrs) { Output input = ops::Const(s.WithOpName("input"), 1.f / 32, {32, 32}); Output allow1 = ops::MatMul(s.WithOpName("allow1"), input, input); auto clr1_op = ops::IdentityN(s.WithOpName("clr1"), {allow1, allow1, allow1}); - Output gry1 = - ops::AddN(s.WithOpName("gry1"), + Output infer1 = + ops::AddN(s.WithOpName("infer1"), {clr1_op.output[0], clr1_op.output[1], clr1_op.output[2]}); - Output allow2 = ops::MatMul(s.WithOpName("allow2"), gry1, gry1); + Output allow2 = ops::MatMul(s.WithOpName("allow2"), infer1, infer1); Output fetch = ops::Identity(s.WithOpName("fetch"), allow2); GrapplerItem item; @@ -587,7 +587,7 @@ TEST_F(AutoMixedPrecisionTest, RepeatedAndListTypeAttrs) { for (auto type : output_view.GetNode("clr1")->attr().at("T").list().type()) { EXPECT_EQ(type, DT_HALF); } - EXPECT_EQ(output_view.GetNode("gry1")->attr().at("T").type(), DT_HALF); + EXPECT_EQ(output_view.GetNode("infer1")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("allow2")->attr().at("T").type(), DT_HALF); auto tensors = EvaluateNodes(output, item.fetch); @@ -633,17 +633,17 @@ TEST_F(AutoMixedPrecisionTest, ExistingCast) { TEST_F(AutoMixedPrecisionTest, RecurrentEdgeColorMismatch) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output input = ops::Const(s.WithOpName("input"), 1.f / 32, {32, 32}); - Output blk1 = ops::Exp(s.WithOpName("blk1"), input); + Output deny1 = ops::Exp(s.WithOpName("deny1"), input); Output ent1 = - ops::internal::Enter(s.WithOpName("ent1"), blk1, "loop1").output; + ops::internal::Enter(s.WithOpName("ent1"), deny1, "loop1").output; // Note that the second input is later replaced with "nxt1". Output mrg1 = ops::Merge(s.WithOpName("mrg1"), {ent1, ent1}).output; // For simplicity, the loop condition is constant false. Output con1 = ops::Const(s.WithOpName("con1"), false, {}); Output lpc1 = ops::LoopCond(s.WithOpName("lpc1"), con1).output; auto swt1 = ops::Switch(s.WithOpName("swt1"), mrg1, lpc1); - Output gry1 = ops::Sqrt(s.WithOpName("gry1"), swt1.output_true); - Output allow1 = ops::MatMul(s.WithOpName("allow1"), gry1, gry1); + Output infer1 = ops::Sqrt(s.WithOpName("infer1"), swt1.output_true); + Output allow1 = ops::MatMul(s.WithOpName("allow1"), infer1, infer1); Output nxt1 = ops::NextIteration(s.WithOpName("nxt1"), allow1); Output ext1 = ops::internal::Exit(s.WithOpName("ext1"), swt1.output_false); Output fetch = ops::Identity(s.WithOpName("fetch"), ext1); @@ -671,14 +671,14 @@ TEST_F(AutoMixedPrecisionTest, RecurrentEdgeColorMismatch) { GraphView output_view(&output); EXPECT_EQ(output.node_size(), item.graph.node_size() + 2); - // Note that mrg1 gets painted black because it is between blk1 and gry1. This - // forces nxt1 and mrg2 to be painted black as well (they would otherwise be - // painted allow because they are clear and have a direct path to allow1). - EXPECT_EQ(output_view.GetNode("blk1")->attr().at("T").type(), DT_FLOAT); + // Note that mrg1 gets painted deny because it is between deny1 and infer1. + // This forces nxt1 and mrg2 to be painted deny as well (they would otherwise + // be painted allow because they are clear and have a direct path to allow1). + EXPECT_EQ(output_view.GetNode("deny1")->attr().at("T").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("ent1")->attr().at("T").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("mrg1")->attr().at("T").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("swt1")->attr().at("T").type(), DT_FLOAT); - EXPECT_EQ(output_view.GetNode("gry1")->attr().at("T").type(), DT_FLOAT); + EXPECT_EQ(output_view.GetNode("infer1")->attr().at("T").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("allow1")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("nxt1")->attr().at("T").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("ext1")->attr().at("T").type(), DT_FLOAT); @@ -711,8 +711,8 @@ TEST_F(AutoMixedPrecisionTest, TensorListSetGet) { Output tl1r1 = ops::TensorListGetItem(s.WithOpName("tl1r1"), tl1rs, idx2, shape, DT_FLOAT) .item; - Output gry1 = ops::Tanh(s.WithOpName("gry1"), tl1r1); - Output allow2 = ops::MatMul(s.WithOpName("allow2"), gry1, gry1); + Output infer1 = ops::Tanh(s.WithOpName("infer1"), tl1r1); + Output allow2 = ops::MatMul(s.WithOpName("allow2"), infer1, infer1); auto tl1w3 = ops::TensorListSetItem(s.WithOpName("tl1w3"), tl1.handle, idx3, allow2); Output tl1r2 = @@ -748,7 +748,7 @@ TEST_F(AutoMixedPrecisionTest, TensorListSetGet) { EXPECT_EQ(output_view.GetNode("allow1")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("tl1w2")->attr().at(type_key).type(), DT_HALF); EXPECT_EQ(output_view.GetNode("tl1r1")->attr().at(type_key).type(), DT_HALF); - EXPECT_EQ(output_view.GetNode("gry1")->attr().at("T").type(), DT_HALF); + EXPECT_EQ(output_view.GetNode("infer1")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("allow2")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("tl1w3")->attr().at(type_key).type(), DT_HALF); EXPECT_EQ(output_view.GetNode("tl2")->attr().at(type_key).type(), DT_FLOAT); @@ -776,8 +776,8 @@ TEST_F(AutoMixedPrecisionTest, TensorListPushPop) { Output tl1r1 = ops::TensorListPopBack(s.WithOpName("tl1r1"), tl1w2.output_handle, shape, DT_FLOAT) .tensor; - Output gry1 = ops::Tanh(s.WithOpName("gry1"), tl1r1); - Output allow2 = ops::MatMul(s.WithOpName("allow2"), gry1, gry1); + Output infer1 = ops::Tanh(s.WithOpName("infer1"), tl1r1); + Output allow2 = ops::MatMul(s.WithOpName("allow2"), infer1, infer1); auto tl1w3 = ops::TensorListPushBack(s.WithOpName("tl1w3"), tl1.handle, allow2); Output tl1r2 = ops::TensorListPopBack(s.WithOpName("tl1r2"), @@ -811,7 +811,7 @@ TEST_F(AutoMixedPrecisionTest, TensorListPushPop) { EXPECT_EQ(output_view.GetNode("allow1")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("tl1w2")->attr().at(type_key).type(), DT_HALF); EXPECT_EQ(output_view.GetNode("tl1r1")->attr().at(type_key).type(), DT_HALF); - EXPECT_EQ(output_view.GetNode("gry1")->attr().at("T").type(), DT_HALF); + EXPECT_EQ(output_view.GetNode("infer1")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("allow2")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("tl1w3")->attr().at(type_key).type(), DT_HALF); EXPECT_EQ(output_view.GetNode("tl2")->attr().at(type_key).type(), DT_FLOAT); @@ -835,8 +835,8 @@ TEST_F(AutoMixedPrecisionTest, TensorListFromTensor) { Output tl1r1 = ops::TensorListStack(s.WithOpName("tl1r1"), tl1.output_handle, shape, DT_FLOAT) .tensor; - Output gry1 = ops::Tanh(s.WithOpName("gry1"), tl1r1); - Output allow2 = ops::MatMul(s.WithOpName("allow2"), gry1, gry1); + Output infer1 = ops::Tanh(s.WithOpName("infer1"), tl1r1); + Output allow2 = ops::MatMul(s.WithOpName("allow2"), infer1, infer1); Output fetch1 = ops::Identity(s.WithOpName("fetch1"), allow2); // This tests that a allow-painted object node (tl2) will force an unpainted @@ -863,7 +863,7 @@ TEST_F(AutoMixedPrecisionTest, TensorListFromTensor) { EXPECT_EQ(output_view.GetNode("allow1")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("tl1")->attr().at(type_key).type(), DT_HALF); EXPECT_EQ(output_view.GetNode("tl1r1")->attr().at(type_key).type(), DT_HALF); - EXPECT_EQ(output_view.GetNode("gry1")->attr().at("T").type(), DT_HALF); + EXPECT_EQ(output_view.GetNode("infer1")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("allow2")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("tl2")->attr().at(type_key).type(), DT_HALF); EXPECT_EQ(output_view.GetNode("tl2w1")->attr().at(type_key).type(), DT_HALF); @@ -902,8 +902,8 @@ TEST_F(AutoMixedPrecisionTest, TensorListPushBackBatchAndConcatLists) { Output tl3r1 = ops::TensorListPopBack(s.WithOpName("tl3r1"), tl3, shape, DT_FLOAT) .tensor; - Output gry1 = ops::Tanh(s.WithOpName("gry1"), tl3r1); - Output allow2 = ops::MatMul(s.WithOpName("allow2"), gry1, gry1); + Output infer1 = ops::Tanh(s.WithOpName("infer1"), tl3r1); + Output allow2 = ops::MatMul(s.WithOpName("allow2"), infer1, infer1); Output fetch1 = ops::Identity(s.WithOpName("fetch1"), allow2); GrapplerItem item; @@ -922,7 +922,7 @@ TEST_F(AutoMixedPrecisionTest, TensorListPushBackBatchAndConcatLists) { const char* type_key = "element_dtype"; EXPECT_EQ(output_view.GetNode("allow1")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("allow2")->attr().at("T").type(), DT_HALF); - EXPECT_EQ(output_view.GetNode("gry1")->attr().at("T").type(), DT_HALF); + EXPECT_EQ(output_view.GetNode("infer1")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("tl1")->attr().at(type_key).type(), DT_HALF); EXPECT_EQ(output_view.GetNode("tl2")->attr().at(type_key).type(), DT_HALF); EXPECT_EQ(output_view.GetNode("tl3")->attr().at(type_key).type(), DT_HALF); @@ -967,22 +967,25 @@ TEST_F(AutoMixedPrecisionTest, TensorListThroughFunction) { tensorflow::Input shape = {32, 32}; Output input = ops::Const(s.WithOpName("input"), 1.f / 32, {32, 32}); Output allow1 = ops::MatMul(s.WithOpName("allow1"), input, input); - Output gry1 = ops::Tanh(s.WithOpName("gry1"), allow1); + Output infer1 = ops::Tanh(s.WithOpName("infer1"), allow1); auto tl1 = ops::EmptyTensorList(s.WithOpName("tl1"), {32, 32}, 8, DT_FLOAT); - auto tl1w1 = ops::TensorListPushBack(s.WithOpName("tl1w1"), tl1.handle, gry1); - auto _gry1 = tensorflow::ops::AsNodeOut(s, gry1); + auto tl1w1 = + ops::TensorListPushBack(s.WithOpName("tl1w1"), tl1.handle, infer1); + auto _infer1 = tensorflow::ops::AsNodeOut(s, infer1); auto _tl1w1_handle = tensorflow::ops::AsNodeOut(s, tl1w1.output_handle); auto builder = tensorflow::NodeBuilder("Func1", "Func1", s.graph()->op_registry()); tensorflow::Node* func1_op; - TF_CHECK_OK( - builder.Input(_tl1w1_handle).Input(_gry1).Finalize(s.graph(), &func1_op)); + TF_CHECK_OK(builder.Input(_tl1w1_handle) + .Input(_infer1) + .Finalize(s.graph(), &func1_op)); Output func1_handle(func1_op, 0); Output tl1r1 = ops::TensorListPopBack(s.WithOpName("tl1r1"), func1_handle, shape, DT_FLOAT) .tensor; auto tl2 = ops::EmptyTensorList(s.WithOpName("tl2"), {32, 32}, 8, DT_FLOAT); - auto tl2w1 = ops::TensorListPushBack(s.WithOpName("tl2w1"), tl2.handle, gry1); + auto tl2w1 = + ops::TensorListPushBack(s.WithOpName("tl2w1"), tl2.handle, infer1); Output tl2r1 = ops::TensorListPopBack(s.WithOpName("tl2r1"), tl2w1.output_handle, shape, DT_FLOAT) .tensor; @@ -1004,7 +1007,7 @@ TEST_F(AutoMixedPrecisionTest, TensorListThroughFunction) { const char* type_key = "element_dtype"; EXPECT_EQ(output_view.GetNode("allow1")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("allow2")->attr().at("T").type(), DT_HALF); - EXPECT_EQ(output_view.GetNode("gry1")->attr().at("T").type(), DT_HALF); + EXPECT_EQ(output_view.GetNode("infer1")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("tl2")->attr().at(type_key).type(), DT_HALF); EXPECT_EQ(output_view.GetNode("tl2w1")->attr().at(type_key).type(), DT_HALF); EXPECT_EQ(output_view.GetNode("tl2r1")->attr().at(type_key).type(), DT_HALF); @@ -1069,7 +1072,7 @@ TEST_F(AutoMixedPrecisionTest, BatchMatMul) { } TEST_F(AutoMixedPrecisionTest, EluOp) { - TestSimpleUnaryGrayOp( + TestSimpleUnaryInferOp( -5, 5, 1.0e-3, 1.0e-3, [](const tensorflow::Scope& scope, Output input) -> Output { return ops::Elu(scope, input); @@ -1077,7 +1080,7 @@ TEST_F(AutoMixedPrecisionTest, EluOp) { } TEST_F(AutoMixedPrecisionTest, ErfOp) { - TestSimpleUnaryGrayOp( + TestSimpleUnaryInferOp( -5, 5, 1.0e-3, -1, [](const tensorflow::Scope& scope, Output input) -> Output { return ops::Erf(scope, input); @@ -1085,7 +1088,7 @@ TEST_F(AutoMixedPrecisionTest, ErfOp) { } TEST_F(AutoMixedPrecisionTest, ErfcOp) { - TestSimpleUnaryGrayOp( + TestSimpleUnaryInferOp( -5, 5, 1.0e-3, -1, [](const tensorflow::Scope& scope, Output input) -> Output { return ops::Erfc(scope, input); @@ -1093,7 +1096,7 @@ TEST_F(AutoMixedPrecisionTest, ErfcOp) { } TEST_F(AutoMixedPrecisionTest, InvOp) { - TestSimpleUnaryGrayOp( + TestSimpleUnaryInferOp( 0.01, 10, -1, 1.0e-3, [](const tensorflow::Scope& scope, Output input) -> Output { return ops::Inv(scope, input); @@ -1101,7 +1104,7 @@ TEST_F(AutoMixedPrecisionTest, InvOp) { } TEST_F(AutoMixedPrecisionTest, LogOp) { - TestSimpleUnaryGrayOp( + TestSimpleUnaryInferOp( 0.01, 10, 1.0e-3, 2.0e-3, [](const tensorflow::Scope& scope, Output input) -> Output { return ops::Log(scope, input); @@ -1109,7 +1112,7 @@ TEST_F(AutoMixedPrecisionTest, LogOp) { } TEST_F(AutoMixedPrecisionTest, Log1pOp) { - TestSimpleUnaryGrayOp( + TestSimpleUnaryInferOp( -0.99, 9, 1.0e-3, 5.0e-3, [](const tensorflow::Scope& scope, Output input) -> Output { return ops::Log1p(scope, input); @@ -1117,7 +1120,7 @@ TEST_F(AutoMixedPrecisionTest, Log1pOp) { } TEST_F(AutoMixedPrecisionTest, LogSoftmaxOp) { - TestSimpleUnaryGrayOp( + TestSimpleUnaryInferOp( -8, 8, -1, 1.0e-2, [](const tensorflow::Scope& scope, Output input) -> Output { return ops::LogSoftmax(scope, input); @@ -1125,7 +1128,7 @@ TEST_F(AutoMixedPrecisionTest, LogSoftmaxOp) { } TEST_F(AutoMixedPrecisionTest, ReciprocalOp) { - TestSimpleUnaryGrayOp( + TestSimpleUnaryInferOp( 0.01, 10, -1, 1.0e-3, [](const tensorflow::Scope& scope, Output input) -> Output { return ops::Reciprocal(scope, input); @@ -1133,7 +1136,7 @@ TEST_F(AutoMixedPrecisionTest, ReciprocalOp) { } TEST_F(AutoMixedPrecisionTest, SigmoidOp) { - TestSimpleUnaryGrayOp( + TestSimpleUnaryInferOp( -5, 5, 1.0e-3, -1, [](const tensorflow::Scope& scope, Output input) -> Output { return ops::Sigmoid(scope, input); @@ -1141,7 +1144,7 @@ TEST_F(AutoMixedPrecisionTest, SigmoidOp) { } TEST_F(AutoMixedPrecisionTest, SoftmaxOp) { - TestSimpleUnaryGrayOp( + TestSimpleUnaryInferOp( -8, 8, 2.0e-3, -1, [](const tensorflow::Scope& scope, Output input) -> Output { return ops::Softmax(scope, input); @@ -1149,7 +1152,7 @@ TEST_F(AutoMixedPrecisionTest, SoftmaxOp) { } TEST_F(AutoMixedPrecisionTest, SoftplusOp) { - TestSimpleUnaryGrayOp( + TestSimpleUnaryInferOp( -5, 5, 1.0e-3, 1.0e-3, [](const tensorflow::Scope& scope, Output input) -> Output { return ops::Softplus(scope, input); @@ -1157,7 +1160,7 @@ TEST_F(AutoMixedPrecisionTest, SoftplusOp) { } TEST_F(AutoMixedPrecisionTest, SqrtOp) { - TestSimpleUnaryGrayOp( + TestSimpleUnaryInferOp( 0, 10, 1.0e-3, 1.0e-3, [](const tensorflow::Scope& scope, Output input) -> Output { return ops::Sqrt(scope, input); @@ -1165,7 +1168,7 @@ TEST_F(AutoMixedPrecisionTest, SqrtOp) { } TEST_F(AutoMixedPrecisionTest, TanhOp) { - TestSimpleUnaryGrayOp( + TestSimpleUnaryInferOp( -5, 5, 1.0e-3, -1, [](const tensorflow::Scope& scope, Output input) -> Output { return ops::Tanh(scope, input); @@ -1229,16 +1232,16 @@ TEST_F(AutoMixedPrecisionMklTest, AlreadyBf16) { TEST_F(AutoMixedPrecisionMklTest, Simple) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output input = ops::Const(s.WithOpName("input"), 1.f / 32, {32, 32}); - Output blk1 = ops::Exp(s.WithOpName("blk1"), input); - Output clr1 = ops::Relu(s.WithOpName("clr1"), blk1); - Output gry1 = ops::Sqrt(s.WithOpName("gry1"), clr1); - Output clr2 = ops::Relu(s.WithOpName("clr2"), gry1); + Output deny1 = ops::Exp(s.WithOpName("deny1"), input); + Output clr1 = ops::Relu(s.WithOpName("clr1"), deny1); + Output infer1 = ops::Sqrt(s.WithOpName("infer1"), clr1); + Output clr2 = ops::Relu(s.WithOpName("clr2"), infer1); Output allow1 = ops::MatMul(s.WithOpName("allow1"), clr2, clr2); Output clr3 = ops::Relu(s.WithOpName("clr3"), allow1); - Output blk2 = ops::Log(s.WithOpName("blk2"), clr3); - Output clr4 = ops::Relu(s.WithOpName("clr4"), blk2); - Output blk3 = ops::SparseMatMul(s.WithOpName("blk3"), clr4, clr4); - Output clr5 = ops::Relu(s.WithOpName("clr5"), blk3); + Output deny2 = ops::Log(s.WithOpName("deny2"), clr3); + Output clr4 = ops::Relu(s.WithOpName("clr4"), deny2); + Output deny3 = ops::SparseMatMul(s.WithOpName("deny3"), clr4, clr4); + Output clr5 = ops::Relu(s.WithOpName("clr5"), deny3); Output fetch = ops::Identity(s.WithOpName("fetch"), clr5); GrapplerItem item; @@ -1255,16 +1258,16 @@ TEST_F(AutoMixedPrecisionMklTest, Simple) { GraphView output_view(&output); EXPECT_EQ(output.node_size(), item.graph.node_size() + 2); EXPECT_EQ(output_view.GetNode("input")->attr().at("dtype").type(), DT_FLOAT); - EXPECT_EQ(output_view.GetNode("blk1")->attr().at("T").type(), DT_FLOAT); + EXPECT_EQ(output_view.GetNode("deny1")->attr().at("T").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("clr1")->attr().at("T").type(), DT_FLOAT); - EXPECT_EQ(output_view.GetNode("gry1")->attr().at("T").type(), DT_FLOAT); + EXPECT_EQ(output_view.GetNode("infer1")->attr().at("T").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("clr2")->attr().at("T").type(), DT_BFLOAT16); EXPECT_EQ(output_view.GetNode("allow1")->attr().at("T").type(), DT_BFLOAT16); EXPECT_EQ(output_view.GetNode("clr3")->attr().at("T").type(), DT_BFLOAT16); - EXPECT_EQ(output_view.GetNode("blk2")->attr().at("T").type(), DT_FLOAT); + EXPECT_EQ(output_view.GetNode("deny2")->attr().at("T").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("clr4")->attr().at("T").type(), DT_FLOAT); - EXPECT_EQ(output_view.GetNode("blk3")->attr().at("Ta").type(), DT_FLOAT); - EXPECT_EQ(output_view.GetNode("blk3")->attr().at("Tb").type(), DT_FLOAT); + EXPECT_EQ(output_view.GetNode("deny3")->attr().at("Ta").type(), DT_FLOAT); + EXPECT_EQ(output_view.GetNode("deny3")->attr().at("Tb").type(), DT_FLOAT); EXPECT_EQ(output_view.GetNode("clr5")->attr().at("T").type(), DT_FLOAT); auto tensors = EvaluateNodes(output, item.fetch); @@ -1294,8 +1297,8 @@ TEST_F(AutoMixedPrecisionMklTest, TensorListSetGet) { Output tl1r1 = ops::TensorListGetItem(s.WithOpName("tl1r1"), tl1rs, idx2, shape, DT_FLOAT) .item; - Output gry1 = ops::Mul(s.WithOpName("gry1"), tl1r1, tl1r1); - Output allow2 = ops::MatMul(s.WithOpName("allow2"), gry1, gry1); + Output infer1 = ops::Mul(s.WithOpName("infer1"), tl1r1, tl1r1); + Output allow2 = ops::MatMul(s.WithOpName("allow2"), infer1, infer1); auto tl1w3 = ops::TensorListSetItem(s.WithOpName("tl1w3"), tl1.handle, idx3, allow2); Output tl1r2 = @@ -1335,7 +1338,7 @@ TEST_F(AutoMixedPrecisionMklTest, TensorListSetGet) { DT_BFLOAT16); EXPECT_EQ(output_view.GetNode("tl1r1")->attr().at(type_key).type(), DT_BFLOAT16); - EXPECT_EQ(output_view.GetNode("gry1")->attr().at("T").type(), DT_BFLOAT16); + EXPECT_EQ(output_view.GetNode("infer1")->attr().at("T").type(), DT_BFLOAT16); EXPECT_EQ(output_view.GetNode("allow2")->attr().at("T").type(), DT_BFLOAT16); EXPECT_EQ(output_view.GetNode("tl1w3")->attr().at(type_key).type(), DT_BFLOAT16); diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc index 35d0c5b0e40..a043479789f 100644 --- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer.cc @@ -36,8 +36,8 @@ namespace internal { // dynamically determined. constexpr int64 kTensorMaxSize = 64; -// All the nodes that should be blacklisted and not swapped. -bool IsBlacklisted(const NodeDef& node) { +// All the nodes that should be denylisted and not swapped. +bool IsDenylisted(const NodeDef& node) { return // Collective ops should not be swapped. IsCollective(node) || @@ -94,8 +94,8 @@ Status IsNodeOutputPortHostFriendly(const GraphView& graph, bool* is_candidate) { *is_candidate = false; - // Make sure we are not a blacklisted op. - if (IsBlacklisted(node)) { + // Make sure we are not a denylisted op. + if (IsDenylisted(node)) { return Status::OK(); } @@ -215,7 +215,7 @@ bool IsNodeInputPortHostFriendly(const NodeDef& node, int port_id) { // Checks if a node is a candidate to pin to Host. // The rough algorithm is as follows: -// 1] Check if node is blacklisted. +// 1] Check if node is denylisted. // 2] Check if node can run on Host. // 3] Check all input/outputs are Host "friendly" (atm, friendly means small, // ints, and pinned to Host). @@ -230,7 +230,7 @@ Status IsNodeHostCandidate(const GraphView& graph, GraphProperties* properties, } // Skip these node types. - if (IsBlacklisted(node)) { + if (IsDenylisted(node)) { return Status::OK(); } diff --git a/tensorflow/core/platform/cloud/gcs_dns_cache.cc b/tensorflow/core/platform/cloud/gcs_dns_cache.cc index da499f6a8c3..7865c6011c3 100644 --- a/tensorflow/core/platform/cloud/gcs_dns_cache.cc +++ b/tensorflow/core/platform/cloud/gcs_dns_cache.cc @@ -64,7 +64,7 @@ GcsDnsCache::GcsDnsCache(Env* env, int64 refresh_rate_secs) : env_(env), refresh_rate_secs_(refresh_rate_secs) {} void GcsDnsCache::AnnotateRequest(HttpRequest* request) { - // TODO(saeta): Blacklist failing IP addresses. + // TODO(saeta): Denylist failing IP addresses. mutex_lock l(mu_); if (!started_) { VLOG(1) << "Starting GCS DNS cache."; diff --git a/tensorflow/core/profiler/internal/tfprof_stats.cc b/tensorflow/core/profiler/internal/tfprof_stats.cc index 56e6e2bcba3..bd105227449 100644 --- a/tensorflow/core/profiler/internal/tfprof_stats.cc +++ b/tensorflow/core/profiler/internal/tfprof_stats.cc @@ -33,7 +33,7 @@ namespace { const char* const kProfilePrefix = "Profile:\n"; bool CreateRunMetadataNode(const string& name, NodeDef* def) { - // TODO(xpan): Better solution than blacklisting this 2 nodes. They + // TODO(xpan): Better solution than denylisting this 2 nodes. They // actually cost some resources, maybe include them. Some nodes, such // as _SOURCE appear in multiple devices, which breaks tfprof's assumption. if (name == "RecvTensor" || name == "_SOURCE" || diff --git a/tensorflow/go/genop/internal/genop.go b/tensorflow/go/genop/internal/genop.go index c4ea8abb543..f6bfdbbdf29 100644 --- a/tensorflow/go/genop/internal/genop.go +++ b/tensorflow/go/genop/internal/genop.go @@ -110,13 +110,13 @@ func generateFunctionsForOps(w io.Writer, ops *odpb.OpList, apimap *apiDefMap) e if err := tmplHeader.Execute(w, thisPackage); err != nil { return err } - blacklist := map[string]bool{ + denylist := map[string]bool{ "Const": true, "PyFunc": true, "PyFuncStateless": true, } for _, op := range ops.Op { - if blacklist[op.Name] { + if denylist[op.Name] { continue } apidef, err := apimap.Get(op.Name) diff --git a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc index 15245173761..3fa692c62e1 100644 --- a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc +++ b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc @@ -18,7 +18,7 @@ namespace tflite { const constexpr char* NnapiAccelerationTestParams::kAccelerationTestConfig = R"( -## Every Test can be allowlisted or blacklisted using a regexp on its test_id +## Every Test can be allowlisted or denylisted using a regexp on its test_id ## Test_id # @@ -28,8 +28,8 @@ const constexpr char* NnapiAccelerationTestParams::kAccelerationTestConfig = # the ordinal is the position in the list of parameters generated by the # cardinal product of all the different parameter sets -# Blacklist/Allowlist -# To blacklist an element simply add - before the test_id regex +# Denylist/Allowlist +# To denylist an element simply add - before the test_id regex ## Rules evaluation # diff --git a/tensorflow/lite/g3doc/performance/coreml_delegate.md b/tensorflow/lite/g3doc/performance/coreml_delegate.md index 8c2bab96d23..d3f597eefcb 100644 --- a/tensorflow/lite/g3doc/performance/coreml_delegate.md +++ b/tensorflow/lite/g3doc/performance/coreml_delegate.md @@ -159,10 +159,10 @@ if (delegate == nullptr) { interpreter->ModifyGraphWithDelegate(delegate); ``` -The delegate creation logic reads device's machine id (e.g. iPhone11,1) -to determine its Neural Engine availability. See the +The delegate creation logic reads device's machine id (e.g. iPhone11,1) to +determine its Neural Engine availability. See the [code](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.mm) -for more detail. Alternatively, you can implement your own set of blacklist +for more detail. Alternatively, you can implement your own set of denylist devices using other libraries such as [DeviceKit](https://github.com/devicekit/DeviceKit). diff --git a/tensorflow/lite/kernels/acceleration_test_util_internal.cc b/tensorflow/lite/kernels/acceleration_test_util_internal.cc index a6ad8234f59..825d03f245d 100644 --- a/tensorflow/lite/kernels/acceleration_test_util_internal.cc +++ b/tensorflow/lite/kernels/acceleration_test_util_internal.cc @@ -46,7 +46,7 @@ void ReadAccelerationConfig( auto first_sep_pos = std::find(curr_config_line.begin(), curr_config_line.end(), ','); - bool is_blacklist = false; + bool is_denylist = false; std::string key = curr_config_line; std::string value{}; if (first_sep_pos != curr_config_line.end()) { @@ -54,13 +54,13 @@ void ReadAccelerationConfig( value = std::string(first_sep_pos + 1, curr_config_line.end()); } - // Regexps starting with '-'' are blacklist ones. + // Regexps starting with '-'' are denylist ones. if (key[0] == '-') { key = key.substr(1); - is_blacklist = true; + is_denylist = true; } - consumer(key, value, is_blacklist); + consumer(key, value, is_denylist); } } } diff --git a/tensorflow/lite/kernels/acceleration_test_util_internal.h b/tensorflow/lite/kernels/acceleration_test_util_internal.h index 24fc2383f9e..857a2da8749 100644 --- a/tensorflow/lite/kernels/acceleration_test_util_internal.h +++ b/tensorflow/lite/kernels/acceleration_test_util_internal.h @@ -39,15 +39,15 @@ template class ConfigurationEntry { public: ConfigurationEntry(const std::string& test_id_rex, T test_config, - bool is_blacklist) + bool is_denylist) : test_id_rex_(test_id_rex), test_config_(test_config), - is_blacklist_(is_blacklist) {} + is_denylist_(is_denylist) {} bool Matches(const std::string& test_id) { return RE2::FullMatch(test_id, test_id_rex_); } - bool IsBlacklistEntry() const { return is_blacklist_; } + bool IsDenylistEntry() const { return is_denylist_; } const T& TestConfig() const { return test_config_; } const std::string& TestIdRex() const { return test_id_rex_; } @@ -55,7 +55,7 @@ class ConfigurationEntry { private: std::string test_id_rex_; T test_config_; - bool is_blacklist_; + bool is_denylist_; }; // Returns the acceleration test configuration for the given test id and @@ -71,9 +71,9 @@ absl::optional GetAccelerationTestParam(std::string test_id) { auto config = new std::vector>(); auto consumer = [&config](std::string key, std::string value_str, - bool is_blacklist) mutable { + bool is_denylist) mutable { T value = T::ParseConfigurationLine(value_str); - config->push_back(ConfigurationEntry(key, value, is_blacklist)); + config->push_back(ConfigurationEntry(key, value, is_denylist)); }; ReadAccelerationConfig(T::kAccelerationTestConfig, consumer); @@ -91,7 +91,7 @@ absl::optional GetAccelerationTestParam(std::string test_id) { test_config->begin(), test_config->end(), [&test_id](ConfigurationEntry elem) { return elem.Matches(test_id); }); if (test_config_iter != test_config->end() && - !test_config_iter->IsBlacklistEntry()) { + !test_config_iter->IsDenylistEntry()) { return absl::optional(test_config_iter->TestConfig()); } else { return absl::optional(); diff --git a/tensorflow/lite/kernels/acceleration_test_util_internal_test.cc b/tensorflow/lite/kernels/acceleration_test_util_internal_test.cc index 6d6b7a722b8..400b366f33e 100644 --- a/tensorflow/lite/kernels/acceleration_test_util_internal_test.cc +++ b/tensorflow/lite/kernels/acceleration_test_util_internal_test.cc @@ -52,11 +52,11 @@ struct SimpleConfig { class ReadAccelerationConfigTest : public ::testing::Test { public: std::unordered_map allowlist_; - std::unordered_map blacklist_; + std::unordered_map denylist_; std::function consumer_ = - [this](std::string key, std::string value, bool is_blacklist) { - if (is_blacklist) { - blacklist_[key] = {value}; + [this](std::string key, std::string value, bool is_denylist) { + if (is_denylist) { + denylist_[key] = {value}; } else { allowlist_[key] = {value}; } @@ -67,13 +67,13 @@ TEST_F(ReadAccelerationConfigTest, ReadsAKeyOnlyLine) { ReadAccelerationConfig("key", consumer_); EXPECT_THAT(allowlist_.find("key"), Not(Eq(allowlist_.end()))); - EXPECT_TRUE(blacklist_.empty()); + EXPECT_TRUE(denylist_.empty()); } -TEST_F(ReadAccelerationConfigTest, ReadsABlacklistKeyOnlyLine) { +TEST_F(ReadAccelerationConfigTest, ReadsADenylistKeyOnlyLine) { ReadAccelerationConfig("-key", consumer_); - EXPECT_THAT(blacklist_.find("key"), Not(Eq(allowlist_.end()))); + EXPECT_THAT(denylist_.find("key"), Not(Eq(allowlist_.end()))); EXPECT_TRUE(allowlist_.empty()); } @@ -81,13 +81,13 @@ TEST_F(ReadAccelerationConfigTest, ReadsAKeyValueLine) { ReadAccelerationConfig("key,value", consumer_); EXPECT_THAT(allowlist_["key"].value, Eq("value")); - EXPECT_TRUE(blacklist_.empty()); + EXPECT_TRUE(denylist_.empty()); } -TEST_F(ReadAccelerationConfigTest, ReadsABlackListKeyValueLine) { +TEST_F(ReadAccelerationConfigTest, ReadsADenyListKeyValueLine) { ReadAccelerationConfig("-key,value", consumer_); - EXPECT_THAT(blacklist_["key"].value, Eq("value")); + EXPECT_THAT(denylist_["key"].value, Eq("value")); EXPECT_TRUE(allowlist_.empty()); } @@ -95,13 +95,13 @@ TEST_F(ReadAccelerationConfigTest, KeysAreLeftTrimmed) { ReadAccelerationConfig(" key,value", consumer_); EXPECT_THAT(allowlist_["key"].value, Eq("value")); - EXPECT_TRUE(blacklist_.empty()); + EXPECT_TRUE(denylist_.empty()); } TEST_F(ReadAccelerationConfigTest, BlKeysAreLeftTrimmed) { ReadAccelerationConfig(" -key,value", consumer_); - EXPECT_THAT(blacklist_["key"].value, Eq("value")); + EXPECT_THAT(denylist_["key"].value, Eq("value")); EXPECT_TRUE(allowlist_.empty()); } @@ -109,14 +109,14 @@ TEST_F(ReadAccelerationConfigTest, IgnoresCommentedLines) { ReadAccelerationConfig("#key,value", consumer_); EXPECT_TRUE(allowlist_.empty()); - EXPECT_TRUE(blacklist_.empty()); + EXPECT_TRUE(denylist_.empty()); } TEST_F(ReadAccelerationConfigTest, CommentCanHaveTrailingBlanks) { ReadAccelerationConfig(" #key,value", consumer_); EXPECT_TRUE(allowlist_.empty()); - EXPECT_TRUE(blacklist_.empty()); + EXPECT_TRUE(denylist_.empty()); } TEST_F(ReadAccelerationConfigTest, CommentsAreOnlyForTheFullLine) { @@ -129,7 +129,7 @@ TEST_F(ReadAccelerationConfigTest, IgnoresEmptyLines) { ReadAccelerationConfig("", consumer_); EXPECT_TRUE(allowlist_.empty()); - EXPECT_TRUE(blacklist_.empty()); + EXPECT_TRUE(denylist_.empty()); } TEST_F(ReadAccelerationConfigTest, ParsesMultipleLines) { @@ -137,7 +137,7 @@ TEST_F(ReadAccelerationConfigTest, ParsesMultipleLines) { EXPECT_THAT(allowlist_["key1"].value, Eq("value1")); EXPECT_THAT(allowlist_["key2"].value, Eq("value2")); - EXPECT_THAT(blacklist_["key3"].value, Eq("value3")); + EXPECT_THAT(denylist_["key3"].value, Eq("value3")); } TEST_F(ReadAccelerationConfigTest, ParsesMultipleLinesWithCommentsAndSpaces) { @@ -177,7 +177,7 @@ TEST(GetAccelerationTestParam, SupportsWildcards) { ASSERT_THAT(config_value_maybe.value().value, Eq("data-4")); } -TEST(GetAccelerationTestParam, SupportBlacklist) { +TEST(GetAccelerationTestParam, SupportDenylist) { const auto config_value_maybe = GetAccelerationTestParam("test-5"); ASSERT_FALSE(config_value_maybe.has_value()); diff --git a/tensorflow/lite/toco/tflite/export_test.cc b/tensorflow/lite/toco/tflite/export_test.cc index dd0b1273dca..ced55921e50 100644 --- a/tensorflow/lite/toco/tflite/export_test.cc +++ b/tensorflow/lite/toco/tflite/export_test.cc @@ -796,7 +796,7 @@ TEST(OperatorKeyTest, TestFlexWithUnsupportedOp) { EXPECT_EQ(key.version(), 1); // While HashTableV2 is excluded from the allowlisted flex op list, eventually // it won't be, and the following expectations will need to change as the op - // is explicitly blacklisted due to lack of asset support. + // is explicitly denylisted due to lack of asset support. EXPECT_FALSE(key.is_flex_op()); EXPECT_FALSE(key.is_unsupported_flex_op()); } diff --git a/tensorflow/lite/tools/evaluation/stages/image_classification_stage.cc b/tensorflow/lite/tools/evaluation/stages/image_classification_stage.cc index f0c1daeb06b..4f280c79143 100644 --- a/tensorflow/lite/tools/evaluation/stages/image_classification_stage.cc +++ b/tensorflow/lite/tools/evaluation/stages/image_classification_stage.cc @@ -150,32 +150,31 @@ EvaluationStageMetrics ImageClassificationStage::LatestMetrics() { return metrics; } -TfLiteStatus FilterBlackListedImages(const std::string& blacklist_file_path, - std::vector* image_labels) { - if (!blacklist_file_path.empty()) { +TfLiteStatus FilterDenyListedImages(const std::string& denylist_file_path, + std::vector* image_labels) { + if (!denylist_file_path.empty()) { std::vector lines; - if (!tflite::evaluation::ReadFileLines(blacklist_file_path, &lines)) { - LOG(ERROR) << "Could not read: " << blacklist_file_path; + if (!tflite::evaluation::ReadFileLines(denylist_file_path, &lines)) { + LOG(ERROR) << "Could not read: " << denylist_file_path; return kTfLiteError; } - std::vector blacklist_ids; - blacklist_ids.reserve(lines.size()); - // Populate blacklist_ids with indices of images. - std::transform(lines.begin(), lines.end(), - std::back_inserter(blacklist_ids), + std::vector denylist_ids; + denylist_ids.reserve(lines.size()); + // Populate denylist_ids with indices of images. + std::transform(lines.begin(), lines.end(), std::back_inserter(denylist_ids), [](const std::string& val) { return std::stoi(val) - 1; }); std::vector filtered_images; - std::sort(blacklist_ids.begin(), blacklist_ids.end()); + std::sort(denylist_ids.begin(), denylist_ids.end()); const size_t size_post_filtering = - image_labels->size() - blacklist_ids.size(); + image_labels->size() - denylist_ids.size(); filtered_images.reserve(size_post_filtering); - int blacklist_index = 0; + int denylist_index = 0; for (int image_index = 0; image_index < image_labels->size(); image_index++) { - if (blacklist_index < blacklist_ids.size() && - blacklist_ids[blacklist_index] == image_index) { - blacklist_index++; + if (denylist_index < denylist_ids.size() && + denylist_ids[denylist_index] == image_index) { + denylist_index++; continue; } filtered_images.push_back((*image_labels)[image_index]); diff --git a/tensorflow/lite/tools/evaluation/stages/image_classification_stage.h b/tensorflow/lite/tools/evaluation/stages/image_classification_stage.h index c3f8eb8f900..d468afbc359 100644 --- a/tensorflow/lite/tools/evaluation/stages/image_classification_stage.h +++ b/tensorflow/lite/tools/evaluation/stages/image_classification_stage.h @@ -80,10 +80,10 @@ struct ImageLabel { std::string label; }; -// Reads a file containing newline-separated blacklisted image indices and +// Reads a file containing newline-separated denylisted image indices and // filters them out from image_labels. -TfLiteStatus FilterBlackListedImages(const std::string& blacklist_file_path, - std::vector* image_labels); +TfLiteStatus FilterDenyListedImages(const std::string& denylist_file_path, + std::vector* image_labels); } // namespace evaluation } // namespace tflite diff --git a/tensorflow/lite/tools/evaluation/tasks/imagenet_image_classification/README.md b/tensorflow/lite/tools/evaluation/tasks/imagenet_image_classification/README.md index b775c3dc8e8..9cea895eff0 100644 --- a/tensorflow/lite/tools/evaluation/tasks/imagenet_image_classification/README.md +++ b/tensorflow/lite/tools/evaluation/tasks/imagenet_image_classification/README.md @@ -57,9 +57,9 @@ The binary takes the following parameters: and the following optional parameters: -* `blacklist_file_path`: `string` \ - Path to blacklist file. This file contains the indices of images that are - blacklisted for evaluation. 1762 images are blacklisted in ILSVRC dataset. +* `denylist_file_path`: `string` \ + Path to denylist file. This file contains the indices of images that are + denylisted for evaluation. 1762 images are denylisted in ILSVRC dataset. For details please refer to readme.txt of ILSVRC2014 devkit. * `num_images`: `int` (default=0) \ diff --git a/tensorflow/lite/tools/evaluation/tasks/imagenet_image_classification/run_eval.cc b/tensorflow/lite/tools/evaluation/tasks/imagenet_image_classification/run_eval.cc index fdc97d44abc..50e12be1c1c 100644 --- a/tensorflow/lite/tools/evaluation/tasks/imagenet_image_classification/run_eval.cc +++ b/tensorflow/lite/tools/evaluation/tasks/imagenet_image_classification/run_eval.cc @@ -35,7 +35,7 @@ constexpr char kGroundTruthImagesPathFlag[] = "ground_truth_images_path"; constexpr char kGroundTruthLabelsFlag[] = "ground_truth_labels"; constexpr char kOutputFilePathFlag[] = "output_file_path"; constexpr char kModelOutputLabelsFlag[] = "model_output_labels"; -constexpr char kBlacklistFilePathFlag[] = "blacklist_file_path"; +constexpr char kDenylistFilePathFlag[] = "denylist_file_path"; constexpr char kNumImagesFlag[] = "num_images"; constexpr char kInterpreterThreadsFlag[] = "num_interpreter_threads"; constexpr char kDelegateFlag[] = "delegate"; @@ -64,7 +64,7 @@ class ImagenetClassification : public TaskExecutor { std::string ground_truth_images_path_; std::string ground_truth_labels_path_; std::string model_output_labels_path_; - std::string blacklist_file_path_; + std::string denylist_file_path_; std::string output_file_path_; std::string delegate_; int num_images_; @@ -90,10 +90,10 @@ std::vector ImagenetClassification::GetFlags() { "Path to ground truth labels, corresponding to alphabetical ordering " "of ground truth images."), tflite::Flag::CreateFlag( - kBlacklistFilePathFlag, &blacklist_file_path_, - "Path to blacklist file (optional) where each line is a single " + kDenylistFilePathFlag, &denylist_file_path_, + "Path to denylist file (optional) where each line is a single " "integer that is " - "equal to index number of blacklisted image."), + "equal to index number of denylisted image."), tflite::Flag::CreateFlag(kOutputFilePathFlag, &output_file_path_, "File to output metrics proto to."), tflite::Flag::CreateFlag(kNumImagesFlag, &num_images_, @@ -131,9 +131,8 @@ absl::optional ImagenetClassification::RunImpl() { image_labels.push_back({image_files[i], ground_truth_image_labels[i]}); } - // Filter out blacklisted/unwanted images. - if (FilterBlackListedImages(blacklist_file_path_, &image_labels) != - kTfLiteOk) { + // Filter out denylisted/unwanted images. + if (FilterDenyListedImages(denylist_file_path_, &image_labels) != kTfLiteOk) { return absl::nullopt; } if (num_images_ > 0) { diff --git a/tensorflow/python/compiler/tensorrt/test/quantization_mnist_test.py b/tensorflow/python/compiler/tensorrt/test/quantization_mnist_test.py index 92e44aa68a8..2716a933336 100644 --- a/tensorflow/python/compiler/tensorrt/test/quantization_mnist_test.py +++ b/tensorflow/python/compiler/tensorrt/test/quantization_mnist_test.py @@ -147,7 +147,7 @@ class QuantizationAwareTrainingMNISTTest(test_util.TensorFlowTestCase): len(graph_def.node)) converter = trt_convert.TrtGraphConverter( input_graph_def=graph_def, - nodes_blacklist=[OUTPUT_NODE_NAME], + nodes_denylist=[OUTPUT_NODE_NAME], max_batch_size=max_batch_size, precision_mode='INT8', # There is a 2GB GPU memory limit for each test, so we set diff --git a/tensorflow/python/compiler/tensorrt/trt_convert.py b/tensorflow/python/compiler/tensorrt/trt_convert.py index 255d65abda9..a0388c3630d 100644 --- a/tensorflow/python/compiler/tensorrt/trt_convert.py +++ b/tensorflow/python/compiler/tensorrt/trt_convert.py @@ -432,7 +432,7 @@ class TrtGraphConverter(object): input_saved_model_tags=None, input_saved_model_signature_key=None, input_graph_def=None, - nodes_blacklist=None, + nodes_denylist=None, session_config=None, max_batch_size=1, max_workspace_size_bytes=DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES, @@ -452,7 +452,7 @@ class TrtGraphConverter(object): input_graph_def: a GraphDef object containing a model to be transformed. If set to None, the graph will be read from the SavedModel loaded from input_saved_model_dir. - nodes_blacklist: list of node names to prevent the converter from + nodes_denylist: list of node names to prevent the converter from touching. session_config: the ConfigProto used to create a Session. It's also used as a template to create a TRT-enabled ConfigProto for conversion. If not @@ -497,7 +497,7 @@ class TrtGraphConverter(object): _check_trt_version_compatibility() self._input_graph_def = input_graph_def - self._nodes_blacklist = nodes_blacklist + self._nodes_denylist = nodes_denylist self._input_saved_model_dir = input_saved_model_dir self._converted = False @@ -558,15 +558,15 @@ class TrtGraphConverter(object): graph_id=b"tf_graph") self._converted = True - def _add_nodes_blacklist(self): - if self._nodes_blacklist: + def _add_nodes_denylist(self): + if self._nodes_denylist: collection_def = self._grappler_meta_graph_def.collection_def["train_op"] - blacklist = collection_def.node_list.value - for i in self._nodes_blacklist: + denylist = collection_def.node_list.value + for i in self._nodes_denylist: if isinstance(i, ops.Tensor): - blacklist.append(_to_bytes(i.name)) + denylist.append(_to_bytes(i.name)) else: - blacklist.append(_to_bytes(i)) + denylist.append(_to_bytes(i)) def _convert_graph_def(self): """Convert the input GraphDef.""" @@ -575,7 +575,7 @@ class TrtGraphConverter(object): importer.import_graph_def(self._input_graph_def, name="") self._grappler_meta_graph_def = saver.export_meta_graph( graph_def=graph.as_graph_def(add_shapes=True), graph=graph) - self._add_nodes_blacklist() + self._add_nodes_denylist() self._run_conversion() @@ -629,7 +629,7 @@ class TrtGraphConverter(object): self._grappler_meta_graph_def.collection_def[collection_key].CopyFrom( input_meta_graph_def.collection_def[collection_key]) - self._add_nodes_blacklist() + self._add_nodes_denylist() # Copy other information. self._grappler_meta_graph_def.meta_info_def.CopyFrom( @@ -1342,7 +1342,7 @@ def create_inference_graph( input_saved_model_tags=input_saved_model_tags, input_saved_model_signature_key=input_saved_model_signature_key, input_graph_def=input_graph_def, - nodes_blacklist=outputs, + nodes_denylist=outputs, session_config=session_config, max_batch_size=max_batch_size, max_workspace_size_bytes=max_workspace_size_bytes, diff --git a/tensorflow/python/compiler/tensorrt/trt_convert_test.py b/tensorflow/python/compiler/tensorrt/trt_convert_test.py index 9052fc2b6ed..1aa53a5bc1b 100644 --- a/tensorflow/python/compiler/tensorrt/trt_convert_test.py +++ b/tensorflow/python/compiler/tensorrt/trt_convert_test.py @@ -280,7 +280,7 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase): input_saved_model_signature_key=_SAVED_MODEL_SIGNATURE_KEY, input_graph_def=None if input_saved_model_dir else self._GetGraphDefForV1(device), - nodes_blacklist=None if input_saved_model_dir else ["output"], + nodes_denylist=None if input_saved_model_dir else ["output"], session_config=self._GetConfigProto(), max_batch_size=max_batch_size, max_workspace_size_bytes=TrtConvertTest._TRT_MAX_WORKSPACE_SIZE_BYTES, diff --git a/tensorflow/python/compiler/xla/xla.py b/tensorflow/python/compiler/xla/xla.py index b68640f9b42..51ad5569a30 100644 --- a/tensorflow/python/compiler/xla/xla.py +++ b/tensorflow/python/compiler/xla/xla.py @@ -44,7 +44,7 @@ _MAX_WARNING_LINES = 5 # Operations that indicate some error in the users graph. For example, XLA # computation should not have any Placeholder op. -_BLACKLISTED_OPS = set([ +_DENYLISTED_OPS = set([ 'Placeholder', ]) @@ -195,7 +195,7 @@ class XLACompileContext(control_flow_ops.XLAControlFlowContext): def AddOp(self, op): """Create op in XLACompileContext and notifies outer context recursively.""" # pylint: disable=protected-access - if op.type in _BLACKLISTED_OPS: + if op.type in _DENYLISTED_OPS: logging.error( 'Operation of type %s (%s) is not supported in XLA. Execution will ' 'fail if this op is used in the graph. ', op.type, op.name) diff --git a/tensorflow/python/debug/__init__.py b/tensorflow/python/debug/__init__.py index ffbdff8c47b..18e7379269f 100644 --- a/tensorflow/python/debug/__init__.py +++ b/tensorflow/python/debug/__init__.py @@ -18,7 +18,7 @@ See the [TFDBG](https://www.tensorflow.org/guide/debugger) guide. @@add_debug_tensor_watch @@watch_graph -@@watch_graph_with_blacklists +@@watch_graph_with_denylists @@DebugTensorDatum @@DebugDumpDir @@load_tensor_from_event @@ -57,7 +57,7 @@ from tensorflow.python.debug.lib.debug_graphs import reconstruct_non_debug_graph from tensorflow.python.debug.lib.debug_utils import add_debug_tensor_watch from tensorflow.python.debug.lib.debug_utils import watch_graph -from tensorflow.python.debug.lib.debug_utils import watch_graph_with_blacklists +from tensorflow.python.debug.lib.debug_utils import watch_graph_with_denylists from tensorflow.python.debug.wrappers.dumping_wrapper import DumpingDebugWrapperSession from tensorflow.python.debug.wrappers.framework import WatchOptions diff --git a/tensorflow/python/debug/cli/analyzer_cli.py b/tensorflow/python/debug/cli/analyzer_cli.py index 49b48fd2dcc..832ac548240 100644 --- a/tensorflow/python/debug/cli/analyzer_cli.py +++ b/tensorflow/python/debug/cli/analyzer_cli.py @@ -136,8 +136,8 @@ class DebugAnalyzer(object): _TENSOR_NAME_COLUMN_HEAD = "Tensor name" # Op types to be omitted when generating descriptions of graph structure. - _GRAPH_STRUCT_OP_TYPE_BLACKLIST = ( - "_Send", "_Recv", "_HostSend", "_HostRecv", "_Retval") + _GRAPH_STRUCT_OP_TYPE_DENYLIST = ("_Send", "_Recv", "_HostSend", "_HostRecv", + "_Retval") def __init__(self, debug_dump, config): """DebugAnalyzer constructor. @@ -795,16 +795,16 @@ class DebugAnalyzer(object): lines, font_attr_segs=font_attr_segs) # List node inputs (non-control and control). - inputs = self._exclude_blacklisted_ops( + inputs = self._exclude_denylisted_ops( self._debug_dump.node_inputs(node_name)) - ctrl_inputs = self._exclude_blacklisted_ops( + ctrl_inputs = self._exclude_denylisted_ops( self._debug_dump.node_inputs(node_name, is_control=True)) output.extend(self._format_neighbors("input", inputs, ctrl_inputs)) # List node output recipients (non-control and control). - recs = self._exclude_blacklisted_ops( + recs = self._exclude_denylisted_ops( self._debug_dump.node_recipients(node_name)) - ctrl_recs = self._exclude_blacklisted_ops( + ctrl_recs = self._exclude_denylisted_ops( self._debug_dump.node_recipients(node_name, is_control=True)) output.extend(self._format_neighbors("recipient", recs, ctrl_recs)) @@ -822,19 +822,20 @@ class DebugAnalyzer(object): _add_main_menu(output, node_name=node_name, enable_node_info=False) return output - def _exclude_blacklisted_ops(self, node_names): - """Exclude all nodes whose op types are in _GRAPH_STRUCT_OP_TYPE_BLACKLIST. + def _exclude_denylisted_ops(self, node_names): + """Exclude all nodes whose op types are in _GRAPH_STRUCT_OP_TYPE_DENYLIST. Args: node_names: An iterable of node or graph element names. Returns: - A list of node names that are not blacklisted. + A list of node names that are not denylisted. """ - return [node_name for node_name in node_names - if self._debug_dump.node_op_type( - debug_graphs.get_node_name(node_name)) not in - self._GRAPH_STRUCT_OP_TYPE_BLACKLIST] + return [ + node_name for node_name in node_names + if self._debug_dump.node_op_type(debug_graphs.get_node_name(node_name)) + not in self._GRAPH_STRUCT_OP_TYPE_DENYLIST + ] def _render_node_traceback(self, node_name): """Render traceback of a node's creation in Python, if available. @@ -1401,13 +1402,13 @@ class DebugAnalyzer(object): """ # Make a shallow copy of the list because it may be extended later. - all_inputs = self._exclude_blacklisted_ops( + all_inputs = self._exclude_denylisted_ops( copy.copy(tracker(node_name, is_control=False))) is_ctrl = [False] * len(all_inputs) if include_control: # Sort control inputs or recipients in alphabetical order of the node # names. - ctrl_inputs = self._exclude_blacklisted_ops( + ctrl_inputs = self._exclude_denylisted_ops( sorted(tracker(node_name, is_control=True))) all_inputs.extend(ctrl_inputs) is_ctrl.extend([True] * len(ctrl_inputs)) @@ -1440,7 +1441,7 @@ class DebugAnalyzer(object): for i, inp in enumerate(all_inputs): op_type = self._debug_dump.node_op_type(debug_graphs.get_node_name(inp)) - if op_type in self._GRAPH_STRUCT_OP_TYPE_BLACKLIST: + if op_type in self._GRAPH_STRUCT_OP_TYPE_DENYLIST: continue if is_ctrl[i]: diff --git a/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py b/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py index b3baa6e7bc2..60cdd6141ff 100644 --- a/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py +++ b/tensorflow/python/debug/lib/debug_graph_reconstruction_test.py @@ -39,8 +39,7 @@ from tensorflow.python.training import gradient_descent class ReconstructNonDebugGraphTest(test_util.TensorFlowTestCase): - _OP_TYPE_BLACKLIST = ( - "_Send", "_Recv", "_HostSend", "_HostRecv", "_Retval") + _OP_TYPE_DENYLIST = ("_Send", "_Recv", "_HostSend", "_HostRecv", "_Retval") def _no_rewrite_session_config(self): rewriter_config = rewriter_config_pb2.RewriterConfig( @@ -60,10 +59,10 @@ class ReconstructNonDebugGraphTest(test_util.TensorFlowTestCase): file_io.delete_recursively(self._dump_dir) super(ReconstructNonDebugGraphTest, self).tearDown() - def _graphDefWithoutBlacklistedNodes(self, graph_def): + def _graphDefWithoutDenylistedNodes(self, graph_def): output_graph_def = graph_pb2.GraphDef() for node in graph_def.node: - if node.op not in self._OP_TYPE_BLACKLIST: + if node.op not in self._OP_TYPE_DENYLIST: new_node = output_graph_def.node.add() new_node.CopyFrom(node) @@ -110,16 +109,16 @@ class ReconstructNonDebugGraphTest(test_util.TensorFlowTestCase): for i, non_debug_graph_def in enumerate(non_debug_graph_defs): device_name = debug_graphs._infer_device_name(non_debug_graph_def) test_util.assert_equal_graph_def( - self._graphDefWithoutBlacklistedNodes(reconstructed[device_name]), - self._graphDefWithoutBlacklistedNodes(non_debug_graph_def)) + self._graphDefWithoutDenylistedNodes(reconstructed[device_name]), + self._graphDefWithoutDenylistedNodes(non_debug_graph_def)) # Test debug_graphs.reconstruct_non_debug_graph_def. reconstructed_again = ( debug_graphs.reconstruct_non_debug_graph_def( run_metadata.partition_graphs[i])) test_util.assert_equal_graph_def( - self._graphDefWithoutBlacklistedNodes(reconstructed_again), - self._graphDefWithoutBlacklistedNodes(non_debug_graph_def)) + self._graphDefWithoutDenylistedNodes(reconstructed_again), + self._graphDefWithoutDenylistedNodes(non_debug_graph_def)) def testReconstructSimpleGraph(self): with session.Session() as sess: diff --git a/tensorflow/python/debug/lib/debug_utils.py b/tensorflow/python/debug/lib/debug_utils.py index 61575cdef76..7b739fe472d 100644 --- a/tensorflow/python/debug/lib/debug_utils.py +++ b/tensorflow/python/debug/lib/debug_utils.py @@ -199,20 +199,20 @@ def watch_graph(run_options, run_options.debug_options.reset_disk_byte_usage = reset_disk_byte_usage -def watch_graph_with_blacklists(run_options, - graph, - debug_ops="DebugIdentity", - debug_urls=None, - node_name_regex_blacklist=None, - op_type_regex_blacklist=None, - tensor_dtype_regex_blacklist=None, - tolerate_debug_op_creation_failures=False, - global_step=-1, - reset_disk_byte_usage=False): - """Add debug tensor watches, blacklisting nodes and op types. +def watch_graph_with_denylists(run_options, + graph, + debug_ops="DebugIdentity", + debug_urls=None, + node_name_regex_denylist=None, + op_type_regex_denylist=None, + tensor_dtype_regex_denylist=None, + tolerate_debug_op_creation_failures=False, + global_step=-1, + reset_disk_byte_usage=False): + """Add debug tensor watches, denylisting nodes and op types. This is similar to `watch_graph()`, but the node names and op types are - blacklisted, instead of allowlisted. + denylisted, instead of allowlisted. N.B.: 1. Under certain circumstances, the `Tensor` may not get actually watched @@ -225,28 +225,25 @@ def watch_graph_with_blacklists(run_options, Args: run_options: An instance of `config_pb2.RunOptions` to be modified. graph: An instance of `ops.Graph`. - debug_ops: (`str` or `list` of `str`) name(s) of the debug op(s) to use. - See the documentation of `watch_graph` for more details. + debug_ops: (`str` or `list` of `str`) name(s) of the debug op(s) to use. See + the documentation of `watch_graph` for more details. debug_urls: URL(s) to send debug values to, e.g., `file:///tmp/tfdbg_dump_1`, `grpc://localhost:12345`. - node_name_regex_blacklist: Regular-expression blacklist for node_name. - This should be a string, e.g., `"(weight_[0-9]+|bias_.*)"`. - op_type_regex_blacklist: Regular-expression blacklist for the op type of - nodes, e.g., `"(Variable|Add)"`. - If both node_name_regex_blacklist and op_type_regex_blacklist - are set, the two filtering operations will occur in a logical `OR` - relation. In other words, a node will be excluded if it hits either of - the two blacklists; a node will be included if and only if it hits - neither of the blacklists. - tensor_dtype_regex_blacklist: Regular-expression blacklist for Tensor - data type, e.g., `"^int.*"`. - This blacklist operates in logical `OR` relations to the two allowlists - above. + node_name_regex_denylist: Regular-expression denylist for node_name. This + should be a string, e.g., `"(weight_[0-9]+|bias_.*)"`. + op_type_regex_denylist: Regular-expression denylist for the op type of + nodes, e.g., `"(Variable|Add)"`. If both node_name_regex_denylist and + op_type_regex_denylist are set, the two filtering operations will occur in + a logical `OR` relation. In other words, a node will be excluded if it + hits either of the two denylists; a node will be included if and only if + it hits neither of the denylists. + tensor_dtype_regex_denylist: Regular-expression denylist for Tensor data + type, e.g., `"^int.*"`. This denylist operates in logical `OR` relations + to the two allowlists above. tolerate_debug_op_creation_failures: (`bool`) whether debug op creation failures (e.g., due to dtype incompatibility) are to be tolerated by not throwing exceptions. - global_step: (`int`) Optional global_step count for this debug tensor - watch. + global_step: (`int`) Optional global_step count for this debug tensor watch. reset_disk_byte_usage: (`bool`) whether to reset the tracked disk byte usage to zero (default: `False`). """ @@ -254,12 +251,14 @@ def watch_graph_with_blacklists(run_options, if isinstance(debug_ops, str): debug_ops = [debug_ops] - node_name_pattern = (re.compile(node_name_regex_blacklist) if - node_name_regex_blacklist else None) - op_type_pattern = (re.compile(op_type_regex_blacklist) if - op_type_regex_blacklist else None) - tensor_dtype_pattern = (re.compile(tensor_dtype_regex_blacklist) if - tensor_dtype_regex_blacklist else None) + node_name_pattern = ( + re.compile(node_name_regex_denylist) + if node_name_regex_denylist else None) + op_type_pattern = ( + re.compile(op_type_regex_denylist) if op_type_regex_denylist else None) + tensor_dtype_pattern = ( + re.compile(tensor_dtype_regex_denylist) + if tensor_dtype_regex_denylist else None) ops = graph.get_operations() for op in ops: diff --git a/tensorflow/python/debug/lib/debug_utils_test.py b/tensorflow/python/debug/lib/debug_utils_test.py index 188b89debec..b76583a3d9d 100644 --- a/tensorflow/python/debug/lib/debug_utils_test.py +++ b/tensorflow/python/debug/lib/debug_utils_test.py @@ -291,12 +291,12 @@ class DebugUtilsTest(test_util.TensorFlowTestCase): ["DebugIdentity"], ["file:///tmp/tfdbg_1"]) self.assertItemsEqual(["a1", "a1/Assign"], node_names) - def testWatchGraph_nodeNameBlacklist(self): - debug_utils.watch_graph_with_blacklists( + def testWatchGraph_nodeNameDenylist(self): + debug_utils.watch_graph_with_denylists( self._run_options, self._graph, debug_urls="file:///tmp/tfdbg_1", - node_name_regex_blacklist="(a1$|a1_init$|a1/.*|p1$)") + node_name_regex_denylist="(a1$|a1_init$|a1/.*|p1$)") node_names = self._verify_watches( self._run_options.debug_options.debug_tensor_watch_opts, 0, @@ -305,37 +305,37 @@ class DebugUtilsTest(test_util.TensorFlowTestCase): sorted(["b_init", "b", "b/Assign", "b/read", "c", "s"]), sorted(node_names)) - def testWatchGraph_opTypeBlacklist(self): - debug_utils.watch_graph_with_blacklists( + def testWatchGraph_opTypeDenylist(self): + debug_utils.watch_graph_with_denylists( self._run_options, self._graph, debug_urls="file:///tmp/tfdbg_1", - op_type_regex_blacklist="(Variable|Identity|Assign|Const)") + op_type_regex_denylist="(Variable|Identity|Assign|Const)") node_names = self._verify_watches( self._run_options.debug_options.debug_tensor_watch_opts, 0, ["DebugIdentity"], ["file:///tmp/tfdbg_1"]) self.assertEqual(sorted(["p1", "s"]), sorted(node_names)) - def testWatchGraph_nodeNameAndOpTypeBlacklists(self): - debug_utils.watch_graph_with_blacklists( + def testWatchGraph_nodeNameAndOpTypeDenylists(self): + debug_utils.watch_graph_with_denylists( self._run_options, self._graph, debug_urls="file:///tmp/tfdbg_1", - node_name_regex_blacklist="p1$", - op_type_regex_blacklist="(Variable|Identity|Assign|Const)") + node_name_regex_denylist="p1$", + op_type_regex_denylist="(Variable|Identity|Assign|Const)") node_names = self._verify_watches( self._run_options.debug_options.debug_tensor_watch_opts, 0, ["DebugIdentity"], ["file:///tmp/tfdbg_1"]) self.assertEqual(["s"], node_names) - def testWatchGraph_tensorDTypeBlacklists(self): - debug_utils.watch_graph_with_blacklists( + def testWatchGraph_tensorDTypeDenylists(self): + debug_utils.watch_graph_with_denylists( self._run_options, self._graph, debug_urls="file:///tmp/tfdbg_1", - tensor_dtype_regex_blacklist=".*_ref") + tensor_dtype_regex_denylist=".*_ref") node_names = self._verify_watches( self._run_options.debug_options.debug_tensor_watch_opts, 0, @@ -346,13 +346,13 @@ class DebugUtilsTest(test_util.TensorFlowTestCase): self.assertNotIn("b/Assign", node_names) self.assertIn("s", node_names) - def testWatchGraph_nodeNameAndTensorDTypeBlacklists(self): - debug_utils.watch_graph_with_blacklists( + def testWatchGraph_nodeNameAndTensorDTypeDenylists(self): + debug_utils.watch_graph_with_denylists( self._run_options, self._graph, debug_urls="file:///tmp/tfdbg_1", - node_name_regex_blacklist="^s$", - tensor_dtype_regex_blacklist=".*_ref") + node_name_regex_denylist="^s$", + tensor_dtype_regex_denylist=".*_ref") node_names = self._verify_watches( self._run_options.debug_options.debug_tensor_watch_opts, 0, diff --git a/tensorflow/python/debug/lib/session_debug_testlib.py b/tensorflow/python/debug/lib/session_debug_testlib.py index 16f92085baa..a51d743e746 100644 --- a/tensorflow/python/debug/lib/session_debug_testlib.py +++ b/tensorflow/python/debug/lib/session_debug_testlib.py @@ -588,10 +588,10 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): sess.run(variables.global_variables_initializer()) run_options = config_pb2.RunOptions(output_partition_graphs=True) - debug_utils.watch_graph_with_blacklists( + debug_utils.watch_graph_with_denylists( run_options, sess.graph, - node_name_regex_blacklist="(.*rnn/while/.*|.*TensorArray.*)", + node_name_regex_denylist="(.*rnn/while/.*|.*TensorArray.*)", debug_urls=self._debug_urls()) # b/36870549: Nodes with these name patterns need to be excluded from # tfdbg in order to prevent MSAN warnings of uninitialized Tensors diff --git a/tensorflow/python/framework/auto_control_deps.py b/tensorflow/python/framework/auto_control_deps.py index 0928661ef76..7b49f2e16e8 100644 --- a/tensorflow/python/framework/auto_control_deps.py +++ b/tensorflow/python/framework/auto_control_deps.py @@ -74,13 +74,13 @@ LEGACY_RANDOM_OPS = [ # random OpKernel instantiation is reused across multiple steps # of the loop. Since legacy Random OpKernels have an internal rng state, # automatic dependency tracking across loop steps would likely - # fix this race; and for that case this blacklist is problematic. + # fix this race; and for that case this denylist is problematic. # However, since automatic dependency tracking inside while loops is not # currently supported, and there are no other examples of OpKernel reuse # (each OpKernel is associated with a unique op in graph mode), - # this blacklist has no effect on the aforementioned behavior. + # this denylist has no effect on the aforementioned behavior. # - # TODO(ebrevdo,skyewm): Modify the check against this blacklist to + # TODO(ebrevdo,skyewm): Modify the check against this denylist to # only occur when the op is inside a "variable initialization scope"; and # add proper autodeps inside while_loops that respects this updated check. "RandomUniform", @@ -104,7 +104,7 @@ _ORDER_INSENSITIVE_STATEFUL_OPS = [ ] # LINT.ThenChange(//tensorflow/core/grappler/optimizers/function_optimizer.cc) -_ALL_BLACKLISTED_OPS = ( +_ALL_DENYLISTED_OPS = ( set(ASYNC_STATEFUL_OPS) | set(LEGACY_RANDOM_OPS) | set(_ORDER_INSENSITIVE_STATEFUL_OPS)) @@ -124,7 +124,7 @@ _ALLOWLIST_STATELESS_OPS = [ def op_is_stateful(op): # pylint: disable=protected-access - return (op._is_stateful and op.type not in _ALL_BLACKLISTED_OPS) or ( + return (op._is_stateful and op.type not in _ALL_DENYLISTED_OPS) or ( op.type in _ALLOWLIST_STATELESS_OPS) diff --git a/tensorflow/python/framework/convert_to_constants.py b/tensorflow/python/framework/convert_to_constants.py index 555004e0836..ea11b1c41dc 100644 --- a/tensorflow/python/framework/convert_to_constants.py +++ b/tensorflow/python/framework/convert_to_constants.py @@ -711,12 +711,12 @@ class _ConverterData(object): def __init__(self, graph_def, variable_names_allowlist=None, - variable_names_blacklist=None): + variable_names_denylist=None): self._graph_def = graph_def self._tensor_data = {} self._build_node_defs_list() self._variable_names_allowlist = variable_names_allowlist - self._variable_names_blacklist = variable_names_blacklist + self._variable_names_denylist = variable_names_denylist @property def graph_def(self): @@ -742,8 +742,8 @@ class _ConverterData(object): """Checks whether to convert the given variable name to a constant.""" return (self._variable_names_allowlist is None or name in self._variable_names_allowlist) and ( - self._variable_names_blacklist is None or - name not in self._variable_names_blacklist) + self._variable_names_denylist is None or + name not in self._variable_names_denylist) def _build_node_defs_list(self): """Builds the list of NodeDefs in the GraphDef. @@ -777,7 +777,7 @@ class _FunctionConverterData(_ConverterData): lower_control_flow, aggressive_inlining, variable_names_allowlist=None, - variable_names_blacklist=None): + variable_names_denylist=None): """Creates the conversion data for the given function. Args: @@ -789,7 +789,7 @@ class _FunctionConverterData(_ConverterData): properly connected to control outputs). variable_names_allowlist: The set of variable names to convert (by default, all variables are converted). - variable_names_blacklist: The set of variable names to omit converting to + variable_names_denylist: The set of variable names to omit converting to constants. """ @@ -800,7 +800,7 @@ class _FunctionConverterData(_ConverterData): super(_FunctionConverterData, self).__init__( graph_def, variable_names_allowlist=variable_names_allowlist, - variable_names_blacklist=variable_names_blacklist) + variable_names_denylist=variable_names_denylist) self._build_tensor_data() def _build_tensor_data(self): @@ -850,12 +850,12 @@ class _SessionConverterData(_ConverterData): graph_def, output_node_names, variable_names_allowlist=None, - variable_names_blacklist=None): + variable_names_denylist=None): graph_def = graph_util.extract_sub_graph(graph_def, output_node_names) super(_SessionConverterData, self).__init__( graph_def, variable_names_allowlist=variable_names_allowlist, - variable_names_blacklist=variable_names_blacklist) + variable_names_denylist=variable_names_denylist) nodes_to_convert = [] tensor_names_to_convert = [] @@ -1115,7 +1115,7 @@ def convert_variables_to_constants_from_session_graph( graph_def, output_node_names, variable_names_allowlist=None, - variable_names_blacklist=None): + variable_names_denylist=None): """Replaces all the variables in a graph with constants of the same values. This function works similarly to convert_variables_to_constants_v2, but it @@ -1131,7 +1131,7 @@ def convert_variables_to_constants_from_session_graph( output_node_names: List of name strings for the result nodes of the graph. variable_names_allowlist: The set of variable names to convert (by default, all variables are converted). - variable_names_blacklist: The set of variable names to omit converting to + variable_names_denylist: The set of variable names to omit converting to constants. Returns: @@ -1143,5 +1143,5 @@ def convert_variables_to_constants_from_session_graph( graph_def=graph_def, output_node_names=output_node_names, variable_names_allowlist=variable_names_allowlist, - variable_names_blacklist=variable_names_blacklist)) + variable_names_denylist=variable_names_denylist)) return graph_def diff --git a/tensorflow/python/framework/convert_to_constants_test.py b/tensorflow/python/framework/convert_to_constants_test.py index 7252082d084..3adabb00a3e 100644 --- a/tensorflow/python/framework/convert_to_constants_test.py +++ b/tensorflow/python/framework/convert_to_constants_test.py @@ -594,7 +594,7 @@ class ConvertVariablesToConstantsSessionTest(test.TestCase): output = self.evaluate(output_node) self.assertNear(2.0, output, 0.00001) - def test_resource_variable_can_be_written_after_blacklisting(self): + def test_resource_variable_can_be_written_after_denylisting(self): with ops.Graph().as_default(): with variable_scope.variable_scope("", use_resource=True): variable_node = variable_scope.get_variable( @@ -614,17 +614,17 @@ class ConvertVariablesToConstantsSessionTest(test.TestCase): # Test variable name black list. This should result in the variable # not being a const. Furthermore, the paths that read from and assign - # to the blacklisted variable should continue to be valid. - constant_graph_def_with_blacklist = ( + # to the denylisted variable should continue to be valid. + constant_graph_def_with_denylist = ( convert_to_constants .convert_variables_to_constants_from_session_graph( session=sess, graph_def=variable_graph_def, output_node_names=["output_node", initializer_name], - variable_names_blacklist=set(["variable_node"]))) + variable_names_denylist=set(["variable_node"]))) variable_node = None - for node in constant_graph_def_with_blacklist.node: + for node in constant_graph_def_with_denylist.node: if node.name == "variable_node": variable_node = node self.assertIsNotNone(variable_node) @@ -634,7 +634,7 @@ class ConvertVariablesToConstantsSessionTest(test.TestCase): # variable is not, and that the graph can be executed and update the # variable can be updated with each execution. with ops.Graph().as_default(): - _ = importer.import_graph_def(constant_graph_def_with_blacklist, name="") + _ = importer.import_graph_def(constant_graph_def_with_denylist, name="") with session_lib.Session() as sess: output_node = sess.graph.get_tensor_by_name("output_node:0") self.evaluate(sess.graph.get_operation_by_name(initializer_name)) @@ -798,7 +798,7 @@ class ConvertVariablesToConstantsSessionTest(test.TestCase): .convert_variables_to_constants_from_session_graph( sess, variable_graph_def, ["out"], - variable_names_blacklist=["y"])) + variable_names_denylist=["y"])) self._assertGraphContains( constant_graph_def, """ node { @@ -840,7 +840,7 @@ class ConvertVariablesToConstantsSessionTest(test.TestCase): .convert_variables_to_constants_from_session_graph( sess, variable_graph_def, ["out"], - variable_names_blacklist=["y"])) + variable_names_denylist=["y"])) self._assertGraphContains( constant_graph_def, """ node { @@ -1086,7 +1086,7 @@ class ConvertVariablesToConstantsSessionTest(test.TestCase): .convert_variables_to_constants_from_session_graph( sess, variable_graph_def, ["case/cond"], - variable_names_blacklist=["y"])) + variable_names_denylist=["y"])) self._assertGraphContains( constant_graph_def, """ node {name: "x" op: "Const"} diff --git a/tensorflow/python/framework/graph_util_impl.py b/tensorflow/python/framework/graph_util_impl.py index 753584813f9..4ef26fc8539 100644 --- a/tensorflow/python/framework/graph_util_impl.py +++ b/tensorflow/python/framework/graph_util_impl.py @@ -270,14 +270,14 @@ def convert_variables_to_constants(sess, Raises: RuntimeError: if a DT_RESOURCE op is found whose ancestor Variables are both - blacklisted AND whitelisted for freezing. + denylisted AND whitelisted for freezing. """ ret = convert_to_constants.convert_variables_to_constants_from_session_graph( session=sess, graph_def=input_graph_def, output_node_names=output_node_names, variable_names_allowlist=variable_names_whitelist, - variable_names_blacklist=variable_names_blacklist) + variable_names_denylist=variable_names_blacklist) # The previous code logic generated an empty versions field, we clear it here # to maintain backwards compatibility. ret.versions.Clear() diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 8ddbcf34f3b..9e49f4b02cc 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -773,34 +773,34 @@ def assert_no_new_tensors(f): def _find_reference_cycle(objects, idx): - def get_ignore_reason(obj, blacklist): + def get_ignore_reason(obj, denylist): """Tests whether an object should be omitted from the dependency graph.""" - if len(blacklist) > 100: + if len(denylist) > 100: return "" if tf_inspect.isframe(obj): if "test_util.py" in tf_inspect.getframeinfo(obj)[0]: return "" - for b in blacklist: + for b in denylist: if b is obj: return "" - if obj is blacklist: + if obj is denylist: return "" return None # Note: this function is meant to help with diagnostics. Its output is purely # a human-readable representation, so you may freely modify it to suit your # needs. - def describe(obj, blacklist, leaves_only=False): + def describe(obj, denylist, leaves_only=False): """Returns a custom human-readable summary of obj. Args: obj: the value to describe. - blacklist: same as blacklist in get_ignore_reason. + denylist: same as denylist in get_ignore_reason. leaves_only: boolean flag used when calling describe recursively. Useful for summarizing collections. """ - if get_ignore_reason(obj, blacklist): - return "{}{}".format(get_ignore_reason(obj, blacklist), type(obj)) + if get_ignore_reason(obj, denylist): + return "{}{}".format(get_ignore_reason(obj, denylist), type(obj)) if tf_inspect.isframe(obj): return "frame: {}".format(tf_inspect.getframeinfo(obj)) elif tf_inspect.ismodule(obj): @@ -810,10 +810,10 @@ def _find_reference_cycle(objects, idx): return "{}, {}".format(type(obj), id(obj)) elif isinstance(obj, list): return "list({}): {}".format( - id(obj), [describe(e, blacklist, leaves_only=True) for e in obj]) + id(obj), [describe(e, denylist, leaves_only=True) for e in obj]) elif isinstance(obj, tuple): return "tuple({}): {}".format( - id(obj), [describe(e, blacklist, leaves_only=True) for e in obj]) + id(obj), [describe(e, denylist, leaves_only=True) for e in obj]) elif isinstance(obj, dict): return "dict({}): {} keys".format(id(obj), len(obj.keys())) elif tf_inspect.isfunction(obj): @@ -822,7 +822,7 @@ def _find_reference_cycle(objects, idx): else: return "{}, {}".format(type(obj), id(obj)) - def build_ref_graph(obj, graph, reprs, blacklist): + def build_ref_graph(obj, graph, reprs, denylist): """Builds a reference graph as -> . Args: @@ -832,21 +832,21 @@ def _find_reference_cycle(objects, idx): references, the graph holds object IDs rather than actual objects. reprs: Auxiliary structure that maps object IDs to their human-readable description. - blacklist: List of objects to ignore. + denylist: List of objects to ignore. """ referrers = gc.get_referrers(obj) - blacklist = blacklist + (referrers,) + denylist = denylist + (referrers,) obj_id = id(obj) for r in referrers: - if get_ignore_reason(r, blacklist) is None: + if get_ignore_reason(r, denylist) is None: r_id = id(r) if r_id not in graph: graph[r_id] = [] if obj_id not in graph[r_id]: graph[r_id].append(obj_id) - build_ref_graph(r, graph, reprs, blacklist) - reprs[r_id] = describe(r, blacklist) + build_ref_graph(r, graph, reprs, denylist) + reprs[r_id] = describe(r, denylist) def find_cycle(el, graph, reprs, path): """Finds and prints a single cycle in the dependency graph.""" diff --git a/tensorflow/python/grappler/auto_mixed_precision_test.py b/tensorflow/python/grappler/auto_mixed_precision_test.py index 539c2bca9f3..567ff8c000d 100644 --- a/tensorflow/python/grappler/auto_mixed_precision_test.py +++ b/tensorflow/python/grappler/auto_mixed_precision_test.py @@ -59,8 +59,8 @@ def _input(shape): def _weight(shape): """Generates a weight of a given shape.""" # Note that the lambda is needed to allow construction inside loops. - return variables.Variable( - lambda: init_ops.glorot_uniform_initializer(seed=0)(shape)) + return variables.Variable(lambda: init_ops.glorot_uniform_initializer(seed=0) + (shape)) def _bias(shape): @@ -204,11 +204,11 @@ def _make_node_with_color(color, input_tensor, name=None): if color == 'w': # Allow node weights = _weight(input_tensor.get_shape().as_list()) return math_ops.matmul(input_tensor, weights, name=name) - if color == 'g': # Gray node + if color == 'g': # Infer node return math_ops.add(input_tensor, 0.1, name=name) if color == 'c': # Clear node return nn.relu(input_tensor, name=name) - if color == 'b': # Black node + if color == 'b': # Deny node return math_ops.pow(math_ops.pow(input_tensor, 2.), 0.5, name=name) raise ValueError('Invalid node color: ' + str(color)) @@ -371,8 +371,8 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase): The loop has different node colors in different sections of the graph. The arguments must be strings where each character represents the color of a - node in that section of the graph: w = allow, g = gray, c = clear, - b = black. CAPITALIZED characters indicate that the node is expected to be + node in that section of the graph: w = allow, g = infer, c = clear, + b = deny. CAPITALIZED characters indicate that the node is expected to be changed to DT_HALF during graph optimization. inp -> loop [ body ] -> out. diff --git a/tensorflow/python/tools/freeze_graph.py b/tensorflow/python/tools/freeze_graph.py index 561e998f6c3..33b5c78d982 100644 --- a/tensorflow/python/tools/freeze_graph.py +++ b/tensorflow/python/tools/freeze_graph.py @@ -84,7 +84,7 @@ def freeze_graph_with_def_protos(input_graph_def, clear_devices, initializer_nodes, variable_names_whitelist="", - variable_names_blacklist="", + variable_names_denylist="", input_meta_graph_def=None, input_saved_model_dir=None, saved_model_tags=None, @@ -107,7 +107,7 @@ def freeze_graph_with_def_protos(input_graph_def, freezing. variable_names_whitelist: The set of variable names to convert (optional, by default, all variables are converted). - variable_names_blacklist: The set of variable names to omit converting + variable_names_denylist: The set of variable names to omit converting to constants (optional). input_meta_graph_def: A `MetaGraphDef` (optional), input_saved_model_dir: Path to the dir with TensorFlow 'SavedModel' file @@ -213,9 +213,9 @@ def freeze_graph_with_def_protos(input_graph_def, variable_names_whitelist = ( variable_names_whitelist.replace(" ", "").split(",") if variable_names_whitelist else None) - variable_names_blacklist = ( - variable_names_blacklist.replace(" ", "").split(",") - if variable_names_blacklist else None) + variable_names_denylist = ( + variable_names_denylist.replace(" ", "").split(",") + if variable_names_denylist else None) if input_meta_graph_def: output_graph_def = graph_util.convert_variables_to_constants( @@ -223,14 +223,14 @@ def freeze_graph_with_def_protos(input_graph_def, input_meta_graph_def.graph_def, output_node_names.replace(" ", "").split(","), variable_names_whitelist=variable_names_whitelist, - variable_names_blacklist=variable_names_blacklist) + variable_names_blacklist=variable_names_denylist) else: output_graph_def = graph_util.convert_variables_to_constants( sess, input_graph_def, output_node_names.replace(" ", "").split(","), variable_names_whitelist=variable_names_whitelist, - variable_names_blacklist=variable_names_blacklist) + variable_names_blacklist=variable_names_denylist) # Write GraphDef to file if output path has been given. if output_graph: @@ -294,7 +294,7 @@ def freeze_graph(input_graph, clear_devices, initializer_nodes, variable_names_whitelist="", - variable_names_blacklist="", + variable_names_denylist="", input_meta_graph=None, input_saved_model_dir=None, saved_model_tags=tag_constants.SERVING, @@ -318,7 +318,7 @@ def freeze_graph(input_graph, freezing. variable_names_whitelist: The set of variable names to convert (optional, by default, all variables are converted), - variable_names_blacklist: The set of variable names to omit converting + variable_names_denylist: The set of variable names to omit converting to constants (optional). input_meta_graph: A `MetaGraphDef` file to load (optional). input_saved_model_dir: Path to the dir with TensorFlow 'SavedModel' file and @@ -354,7 +354,7 @@ def freeze_graph(input_graph, clear_devices, initializer_nodes, variable_names_whitelist, - variable_names_blacklist, + variable_names_denylist, input_meta_graph_def, input_saved_model_dir, [tag for tag in saved_model_tags.replace(" ", "").split(",") if tag], @@ -373,7 +373,7 @@ def main(unused_args, flags): flags.input_checkpoint, flags.output_node_names, flags.restore_op_name, flags.filename_tensor_name, flags.output_graph, flags.clear_devices, flags.initializer_nodes, - flags.variable_names_whitelist, flags.variable_names_blacklist, + flags.variable_names_whitelist, flags.variable_names_denylist, flags.input_meta_graph, flags.input_saved_model_dir, flags.saved_model_tags, checkpoint_version) @@ -456,7 +456,7 @@ def run_main(): only those variables will be converted to constants.\ """) parser.add_argument( - "--variable_names_blacklist", + "--variable_names_denylist", type=str, default="", help="""\ diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py index 415220ad14e..bdbdd3499ad 100644 --- a/tensorflow/python/tools/saved_model_cli.py +++ b/tensorflow/python/tools/saved_model_cli.py @@ -58,8 +58,8 @@ _XLA_DEBUG_OPTIONS_URL = ( 'tensorflow/compiler/xla/debug_options_flags.cc') -# Set of ops to blacklist. -_OP_BLACKLIST = set(['WriteFile', 'ReadFile', 'PrintV2']) +# Set of ops to denylist. +_OP_DENYLIST = set(['WriteFile', 'ReadFile', 'PrintV2']) def _show_tag_sets(saved_model_dir): @@ -349,9 +349,9 @@ def get_signature_def_map(saved_model_dir, tag_set): def scan_meta_graph_def(meta_graph_def): - """Scans meta_graph_def and reports if there are ops on blacklist. + """Scans meta_graph_def and reports if there are ops on denylist. - Print ops if they are on black list, or print success if no blacklisted ops + Print ops if they are on black list, or print success if no denylisted ops found. Args: @@ -359,13 +359,14 @@ def scan_meta_graph_def(meta_graph_def): """ all_ops_set = set( meta_graph_lib.ops_used_by_graph_def(meta_graph_def.graph_def)) - blacklisted_ops = _OP_BLACKLIST & all_ops_set - if blacklisted_ops: + denylisted_ops = _OP_DENYLIST & all_ops_set + if denylisted_ops: # TODO(yifeif): print more warnings - print('MetaGraph with tag set %s contains the following blacklisted ops:' % - meta_graph_def.meta_info_def.tags, blacklisted_ops) + print( + 'MetaGraph with tag set %s contains the following denylisted ops:' % + meta_graph_def.meta_info_def.tags, denylisted_ops) else: - print('MetaGraph with tag set %s does not contain blacklisted ops.' % + print('MetaGraph with tag set %s does not contain denylisted ops.' % meta_graph_def.meta_info_def.tags) @@ -957,7 +958,7 @@ def add_run_subparser(subparsers): def add_scan_subparser(subparsers): """Add parser for `scan`.""" scan_msg = ('Usage example:\n' - 'To scan for blacklisted ops in SavedModel:\n' + 'To scan for denylisted ops in SavedModel:\n' '$saved_model_cli scan --dir /tmp/saved_model\n' 'To scan a specific MetaGraph, pass in --tag_set\n') parser_scan = subparsers.add_parser( diff --git a/tensorflow/python/tools/saved_model_cli_test.py b/tensorflow/python/tools/saved_model_cli_test.py index 0baca7fef55..84283ec7dd7 100644 --- a/tensorflow/python/tools/saved_model_cli_test.py +++ b/tensorflow/python/tools/saved_model_cli_test.py @@ -698,18 +698,18 @@ Defined Functions: with captured_output() as (out, _): saved_model_cli.scan(args) output = out.getvalue().strip() - self.assertTrue('does not contain blacklisted ops' in output) + self.assertTrue('does not contain denylisted ops' in output) - def testScanCommandFoundBlacklistedOp(self): + def testScanCommandFoundDenylistedOp(self): self.parser = saved_model_cli.create_parser() base_path = test.test_src_dir_path(SAVED_MODEL_PATH) args = self.parser.parse_args( ['scan', '--dir', base_path, '--tag_set', 'serve']) - op_blacklist = saved_model_cli._OP_BLACKLIST - saved_model_cli._OP_BLACKLIST = set(['VariableV2']) + op_denylist = saved_model_cli._OP_DENYLIST + saved_model_cli._OP_DENYLIST = set(['VariableV2']) with captured_output() as (out, _): saved_model_cli.scan(args) - saved_model_cli._OP_BLACKLIST = op_blacklist + saved_model_cli._OP_DENYLIST = op_denylist output = out.getvalue().strip() self.assertTrue('\'VariableV2\'' in output) diff --git a/tensorflow/python/tpu/feature_column.py b/tensorflow/python/tpu/feature_column.py index 6039a57ce90..924acc0ee0d 100644 --- a/tensorflow/python/tpu/feature_column.py +++ b/tensorflow/python/tpu/feature_column.py @@ -36,13 +36,13 @@ _SUPPORTED_SEQUENCE_COLUMNS = (fc._SequenceCategoricalColumn, # For V2 columns, we support anything that inherits from CategoricalColumn -# other than those in the blacklist. User-provided columns that inherit from +# other than those in the denylist. User-provided columns that inherit from # CategoricalColumn may or may not be compatible; it is up to the user to # manage TPU compatibility for custom columns. _SUPPORTED_CATEGORICAL_COLUMNS_V2 = (fc_lib.CategoricalColumn,) -_BLACKLISTED_CATEGORICAL_COLUMNS_V2 = (fc_lib.HashedCategoricalColumn, - fc_lib.BucketizedColumn, - fc_lib.CrossedColumn) +_DENYLISTED_CATEGORICAL_COLUMNS_V2 = (fc_lib.HashedCategoricalColumn, + fc_lib.BucketizedColumn, + fc_lib.CrossedColumn) _SUPPORTED_CATEGORICAL_COLUMNS = (fc._IdentityCategoricalColumn, fc._VocabularyFileCategoricalColumn, fc._VocabularyListCategoricalColumn, @@ -106,9 +106,9 @@ def embedding_column(categorical_column, ValueError: if `initializer` is specified but not callable. TypeError: if categorical_column is not a supported type. """ - if isinstance(categorical_column, _BLACKLISTED_CATEGORICAL_COLUMNS_V2): + if isinstance(categorical_column, _DENYLISTED_CATEGORICAL_COLUMNS_V2): raise TypeError('categorical_column for tpu ' - ' embedding_column was blacklisted type %s' % + ' embedding_column was denylisted type %s' % type(categorical_column)) if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS): raise TypeError( @@ -223,9 +223,9 @@ def shared_embedding_columns(categorical_columns, or 0 for a sequence column. """ for categorical_column in categorical_columns: - if isinstance(categorical_column, _BLACKLISTED_CATEGORICAL_COLUMNS_V2): + if isinstance(categorical_column, _DENYLISTED_CATEGORICAL_COLUMNS_V2): raise TypeError('categorical_column for tpu ' - ' embedding_column was blacklisted type %s' % + ' embedding_column was denylisted type %s' % type(categorical_column)) if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS): raise TypeError( diff --git a/tensorflow/python/tpu/feature_column_test.py b/tensorflow/python/tpu/feature_column_test.py index 74cfe27f006..5992e74972f 100644 --- a/tensorflow/python/tpu/feature_column_test.py +++ b/tensorflow/python/tpu/feature_column_test.py @@ -59,8 +59,8 @@ class EmbeddingColumnTest(test.TestCase): 'aaa': parsing_ops.VarLenFeature(dtypes.int64) }, embedding_column._parse_example_spec) - def test_blacklisted_column(self): - # HashedCategoricalColumn is blacklisted and so will raise an exception. + def test_denylisted_column(self): + # HashedCategoricalColumn is denylisted and so will raise an exception. categorical_column = fc_lib.categorical_column_with_hash_bucket( key='aaa', hash_bucket_size=3) embedding_dimension = 2 diff --git a/tensorflow/python/tpu/tpu.py b/tensorflow/python/tpu/tpu.py index 14ba164314e..97cb456f50f 100644 --- a/tensorflow/python/tpu/tpu.py +++ b/tensorflow/python/tpu/tpu.py @@ -57,7 +57,7 @@ ops.NotDifferentiable("TPUReplicatedInput") # Operations that indicate some error in the users graph, e.g. a placeholder # that's introduced outside of the infeed. -_BLACKLISTED_OPS = set([ +_DENYLISTED_OPS = set([ "Placeholder", ]) @@ -526,7 +526,7 @@ class TPUReplicateContext(control_flow_ops.XLAControlFlowContext): def AddOp(self, op): # pylint: disable=protected-access - if op.type in _BLACKLISTED_OPS: + if op.type in _DENYLISTED_OPS: logging.error("Operation of type %s (%s) is not supported on the TPU. " "Execution will fail if this op is used in the graph. " % (op.type, op.name)) @@ -1947,7 +1947,9 @@ def rewrite(computation, # pylint: enable=indexing-exception # Operations that indicate some error in the user's inference graph. -_BLACKLISTED_INFERENCE_OPS = set([ + + +_DENYLISTED_INFERENCE_OPS = set([ "ReadVariableOp", "AssignVariableOp", "AssignAddVariableOp", @@ -1993,7 +1995,7 @@ class _TPUInferenceContext(control_flow_ops.XLAControlFlowContext): def _AddOpInternal(self, op): # pylint: disable=protected-access - if self._check_ops and op.type in _BLACKLISTED_INFERENCE_OPS: + if self._check_ops and op.type in _DENYLISTED_INFERENCE_OPS: raise NotImplementedError( "Operation of type %s (%s) is not supported on the TPU for inference." " Execution will fail if this op is used in the graph. Make sure your" diff --git a/tensorflow/python/training/experimental/mixed_precision.py b/tensorflow/python/training/experimental/mixed_precision.py index c41ec38ccef..af0e27dd860 100644 --- a/tensorflow/python/training/experimental/mixed_precision.py +++ b/tensorflow/python/training/experimental/mixed_precision.py @@ -124,10 +124,10 @@ def enable_mixed_precision_graph_rewrite(opt, loss_scale='dynamic'): E.g. `ArgMax` and `Floor`. * `AllowList`: Ops that are considered numerically safe for execution in float16, and thus are always converted. E.g. `Conv2D`. - * `BlackList`: Ops that are numerically unsafe to execute in float16 and + * `DenyList`: Ops that are numerically unsafe to execute in float16 and can negatively affect downstream nodes. E.g. `Softmax`. * `GrayList`: Ops that are considered numerically safe for execution in - float16 unless downstream from a BlackList Op. E.g. `Add` and `AvgPool`. + float16 unless downstream from a DenyList Op. E.g. `Add` and `AvgPool`. When this function is used, gradients should be computed and applied with the returned optimizer, either by calling `opt.minimize()` or @@ -269,10 +269,10 @@ def enable_mixed_precision_graph_rewrite_v1(opt, loss_scale='dynamic'): E.g. `ArgMax` and `Floor`. * `AllowList`: Ops that are considered numerically safe for execution in float16, and thus are always converted. E.g. `Conv2D`. - * `BlackList`: Ops that are numerically unsafe to execute in float16 and + * `DenyList`: Ops that are numerically unsafe to execute in float16 and can negatively affect downstream nodes. E.g. `Softmax`. * `GrayList`: Ops that are considered numerically safe for execution in - float16 unless downstream from a BlackList Op. E.g. `Add` and `AvgPool`. + float16 unless downstream from a DenyList Op. E.g. `Add` and `AvgPool`. When this function is used, gradients should only be computed and applied with the returned optimizer, either by calling `opt.minimize()` or diff --git a/tensorflow/tools/ci_build/builds/builds_common.sh b/tensorflow/tools/ci_build/builds/builds_common.sh index 8b0c065a9e3..c5698f1068e 100644 --- a/tensorflow/tools/ci_build/builds/builds_common.sh +++ b/tensorflow/tools/ci_build/builds/builds_common.sh @@ -126,7 +126,7 @@ test_runner() { # Run a suite of tests, print failure logs (if any), wall-time each test, # and show the summary at the end. # - # Usage: test_runner + # Usage: test_runner # e.g., test_runner "Tutorial test-on-install" \ # "test1 test2 test3" "test2 test3" "/tmp/log_dir" @@ -136,7 +136,7 @@ test_runner() { TEST_DESC=$1 ALL_TESTS_STR=$2 - TEST_BLACKLIST_SR=$3 + TEST_DENYLIST_SR=$3 LOGS_DIR=$4 NUM_TESTS=$(echo "${ALL_TESTS_STR}" | wc -w) @@ -152,9 +152,9 @@ test_runner() { ((COUNTER++)) STAT_STR="(${COUNTER} / ${NUM_TESTS})" - if [[ "${TEST_BLACKLIST_STR}" == *"${CURR_TEST}"* ]]; then + if [[ "${TEST_DENYLIST_STR}" == *"${CURR_TEST}"* ]]; then ((SKIPPED_COUNTER++)) - echo "${STAT_STR} Blacklisted ${TEST_DESC} SKIPPED: ${CURR_TEST}" + echo "${STAT_STR} Denylisted ${TEST_DESC} SKIPPED: ${CURR_TEST}" continue fi diff --git a/tensorflow/tools/ci_build/builds/integration_tests.sh b/tensorflow/tools/ci_build/builds/integration_tests.sh index 18dbd2a2363..6b036da0783 100755 --- a/tensorflow/tools/ci_build/builds/integration_tests.sh +++ b/tensorflow/tools/ci_build/builds/integration_tests.sh @@ -24,19 +24,19 @@ # the Python binary path. # # This script obeys the following environment variables (if exists): -# TF_BUILD_INTEG_TEST_BLACKLIST: Force skipping of specified integration tests +# TF_BUILD_INTEG_TEST_DENYLIST: Force skipping of specified integration tests # listed in INTEG_TESTS below. # # List of all integration tests to run, separated by spaces INTEG_TESTS="ffmpeg_lib" -if [[ -z "${TF_BUILD_INTEG_TEST_BLACKLIST}" ]]; then - TF_BUILD_INTEG_TEST_BLACKLIST="" +if [[ -z "${TF_BUILD_INTEG_TEST_DENYLIST}" ]]; then + TF_BUILD_INTEG_TEST_DENYLIST="" fi echo "" echo "=== Integration Tests ===" -echo "TF_BUILD_INTEG_TEST_BLACKLIST = \"${TF_BUILD_INTEG_TEST_BLACKLIST}\"" +echo "TF_BUILD_INTEG_TEST_DENYLIST = \"${TF_BUILD_INTEG_TEST_DENYLIST}\"" # Timeout (in seconds) for each integration test TIMEOUT=1800 @@ -121,4 +121,4 @@ test_ffmpeg_lib() { # Run the integration tests test_runner "integration test-on-install" \ - "${INTEG_TESTS}" "${TF_BUILD_INTEG_TEST_BLACKLIST}" "${LOGS_DIR}" + "${INTEG_TESTS}" "${TF_BUILD_INTEG_TEST_DENYLIST}" "${LOGS_DIR}" diff --git a/tensorflow/tools/ci_build/builds/run_pip_tests.sh b/tensorflow/tools/ci_build/builds/run_pip_tests.sh index a095633a22e..bf5c0043df0 100755 --- a/tensorflow/tools/ci_build/builds/run_pip_tests.sh +++ b/tensorflow/tools/ci_build/builds/run_pip_tests.sh @@ -25,7 +25,7 @@ # the Python binary path. # # The --gpu flag informs the script that this is a GPU build, so that the -# appropriate test blacklists can be applied accordingly. +# appropriate test denylists can be applied accordingly. # # The --mac flag informs the script that this is running on mac. Mac does not # have flock, so we should skip using parallel_gpu_execute on mac. diff --git a/tensorflow/tools/ci_build/builds/test_tutorials.sh b/tensorflow/tools/ci_build/builds/test_tutorials.sh index a12827a2d3c..b5219c47b6a 100755 --- a/tensorflow/tools/ci_build/builds/test_tutorials.sh +++ b/tensorflow/tools/ci_build/builds/test_tutorials.sh @@ -28,19 +28,19 @@ # the Python binary path. # # This script obeys the following environment variables (if exists): -# TUT_TESTS_BLACKLIST: Force skipping of specified tutorial tests listed +# TUT_TESTS_DENYLIST: Force skipping of specified tutorial tests listed # in TUT_TESTS below. # # List of all tutorial tests to run, separated by spaces TUT_TESTS="mnist_with_summaries word2vec" -if [[ -z "${TUT_TESTS_BLACKLIST}" ]]; then - TF_BUILD_TUT_TEST_BLACKLIST="" +if [[ -z "${TUT_TESTS_DENYLIST}" ]]; then + TF_BUILD_TUT_TEST_DENYLIST="" fi echo "" echo "=== Testing tutorials ===" -echo "TF_BUILD_TUT_TEST_BLACKLIST = \"${TF_BUILD_TUT_TEST_BLACKLIST}\"" +echo "TF_BUILD_TUT_TEST_DENYLIST = \"${TF_BUILD_TUT_TEST_DENYLIST}\"" # Timeout (in seconds) for each tutorial test TIMEOUT=1800 @@ -269,4 +269,4 @@ test_ptb_word_lm() { # Run the tutorial tests test_runner "tutorial test-on-install" \ - "${TUT_TESTS}" "${TF_BUILD_TUT_TEST_BLACKLIST}" "${LOGS_DIR}" + "${TUT_TESTS}" "${TF_BUILD_TUT_TEST_DENYLIST}" "${LOGS_DIR}" diff --git a/tensorflow/tools/ci_build/builds/test_user_ops.sh b/tensorflow/tools/ci_build/builds/test_user_ops.sh index 0fe5acfcd9a..08c236a1b19 100755 --- a/tensorflow/tools/ci_build/builds/test_user_ops.sh +++ b/tensorflow/tools/ci_build/builds/test_user_ops.sh @@ -23,7 +23,7 @@ # the Python binary path. # # The --gpu flag informs the script that this is a GPU build, so that the -# appropriate test blacklists can be applied accordingly. +# appropriate test denylists can be applied accordingly. # echo "" diff --git a/tensorflow/tools/ci_build/ci_sanity.sh b/tensorflow/tools/ci_build/ci_sanity.sh index eb9a5a2a96e..f4961e896ee 100755 --- a/tensorflow/tools/ci_build/ci_sanity.sh +++ b/tensorflow/tools/ci_build/ci_sanity.sh @@ -355,7 +355,7 @@ do_external_licenses_check(){ EXTERNAL_LICENSES_CHECK_END_TIME=$(date +'%s') - # Blacklist + # Denylist echo ${MISSING_LICENSES_FILE} grep \ -e "@bazel_tools//third_party/" \ diff --git a/tensorflow/tools/ci_build/pylintrc b/tensorflow/tools/ci_build/pylintrc index 68fdb617166..5d65c9644c7 100644 --- a/tensorflow/tools/ci_build/pylintrc +++ b/tensorflow/tools/ci_build/pylintrc @@ -10,7 +10,7 @@ # Profiled execution. profile=no -# Add files or directories to the blacklist. They should be base names, not +# Add files or directories to the denylist. They should be base names, not # paths. ignore=CVS diff --git a/tensorflow/tools/common/traverse.py b/tensorflow/tools/common/traverse.py index 1d9c98277b5..299fe5732c6 100644 --- a/tensorflow/tools/common/traverse.py +++ b/tensorflow/tools/common/traverse.py @@ -101,7 +101,7 @@ def traverse(root, visit): is already in the stack. Traversing system modules can take a long time, it is advisable to pass a - `visit` callable which blacklists such modules. + `visit` callable which denylists such modules. Args: root: A python object with which to start the traversal. diff --git a/third_party/sycl/crosstool/computecpp.tpl b/third_party/sycl/crosstool/computecpp.tpl index c699eabb6f3..ac27e81bc88 100755 --- a/third_party/sycl/crosstool/computecpp.tpl +++ b/third_party/sycl/crosstool/computecpp.tpl @@ -41,7 +41,7 @@ def main(): # compile for C return call([CPU_C_COMPILER] + compiler_flags) - # create a blacklist of folders that will be skipped when compiling with ComputeCpp + # create a denylist of folders that will be skipped when compiling with ComputeCpp skip_extensions = [".cu.cc"] skip_folders = ["tensorflow/compiler", "tensorflow/docs_src", "third_party", "external", "hexagon"] skip_folders = [(folder + '/') for folder in skip_folders] diff --git a/third_party/sycl/crosstool/trisycl.tpl b/third_party/sycl/crosstool/trisycl.tpl index 87a70d8f954..8206a1a94b1 100644 --- a/third_party/sycl/crosstool/trisycl.tpl +++ b/third_party/sycl/crosstool/trisycl.tpl @@ -57,7 +57,7 @@ def main(): ] + opt_flags if (compiling_cpp == 1): - # create a blacklist of folders that will be skipped when compiling + # create a denylist of folders that will be skipped when compiling # with triSYCL skip_extensions = ['.cu.cc'] skip_folders = [ From f5b9c2225584c79539ff6746b3417e8505443a4b Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Mon, 20 Jul 2020 15:56:47 -0700 Subject: [PATCH 0878/2522] Add TpuCompileOp implementation. PiperOrigin-RevId: 322243843 Change-Id: Ibb4faa94cf5751668d71e3eb83f623988e39e115 --- tensorflow/core/tpu/kernels/BUILD | 43 +++++++++- tensorflow/core/tpu/kernels/tpu_compile_op.cc | 86 +++++++++++++++++++ tensorflow/core/tpu/kernels/tpu_compile_op.h | 31 ++----- .../core/tpu/kernels/tpu_compile_op_common.cc | 13 +++ .../core/tpu/kernels/tpu_compile_op_common.h | 25 +++++- .../core/tpu/kernels/tpu_compile_op_impl.cc | 39 +++++++++ .../tpu/kernels/tpu_compile_op_support.cc | 81 ++++++++++++++--- .../core/tpu/kernels/tpu_compile_op_support.h | 5 ++ 8 files changed, 284 insertions(+), 39 deletions(-) create mode 100644 tensorflow/core/tpu/kernels/tpu_compile_op.cc diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 9da16032121..987feba9473 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -9,6 +9,11 @@ load( "tf_kernel_library", ) +# Config setting to enable go/libtpu support. +WITH_TPU_SUPPORT = "//tensorflow:with_tpu_support" + +DEFAULT = "//conditions:default" + package( default_visibility = [ "//tensorflow/core/tpu:__subpackages__", @@ -178,12 +183,14 @@ cc_library( "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:xla_data_proto_cc", "//tensorflow/compiler/xla/service:computation_layout", + "//tensorflow/compiler/xla/service:computation_placer", "//tensorflow/compiler/xla/service:dump", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_module_config", "//tensorflow/compiler/xla/service:hlo_module_group", "//tensorflow/core:framework", "//tensorflow/core/framework:protos_all_cc", + "//tensorflow/core/platform:errors", "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", "//tensorflow/stream_executor/tpu:proto_helper", "@com_google_absl//absl/strings", @@ -494,7 +501,10 @@ tf_proto_library_cc( cc_library( name = "tpu_compile_op_hdrs", hdrs = ["tpu_compile_op.h"], - deps = ["//tensorflow/core:framework"], + deps = [ + ":tpu_compile_op_common", + "//tensorflow/core:framework", + ], ) cc_library( @@ -532,6 +542,10 @@ cc_library( name = "tpu_compile_op_impl", srcs = ["tpu_compile_op_impl.cc"], hdrs = ["tpu_compile_op_impl.h"], + copts = select({ + WITH_TPU_SUPPORT: ["-DLIBTFTPU"], + DEFAULT: [], + }), deps = [ "//tensorflow/compiler/jit:shape_inference", "//tensorflow/compiler/tf2xla:xla_compiler", @@ -554,6 +568,33 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "tpu_compile_op_lib", + srcs = ["tpu_compile_op.cc"], + deps = [ + ":tpu_compile_op_hdrs", + ":tpu_compile_op_options", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/core/protobuf/tpu:compilation_result_proto_cc", + "//tensorflow/stream_executor/tpu:tpu_node_context", + ], + alwayslink = True, +) + +cc_library( + name = "tpu_compile_op", + deps = [ + ":tpu_compile_op_hdrs", + ":tpu_compile_op_impl", + ":tpu_compile_op_lib", + ":tpu_compile_op_options", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/core/protobuf/tpu:compilation_result_proto_cc", + "//tensorflow/stream_executor/tpu:tpu_node_context", + ], + alwayslink = True, +) + cc_library( name = "tpu_execute_op", srcs = ["tpu_execute_op.cc"], diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op.cc b/tensorflow/core/tpu/kernels/tpu_compile_op.cc new file mode 100644 index 00000000000..da158f01bb6 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compile_op.cc @@ -0,0 +1,86 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/tpu/kernels/tpu_compile_op.h" + +#include + +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/core/protobuf/tpu/compilation_result.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_op_options.h" +#include "tensorflow/stream_executor/tpu/tpu_node_context.h" + +namespace tensorflow { +namespace tpu { +using ::stream_executor::port::StatusOr; + +TpuCompileOp::TpuCompileOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + StatusOr> compile_op_impl = + CompileOpImplFactory::Get()->CreateNonMlirImpl(ctx); + OP_REQUIRES_OK(ctx, compile_op_impl.status()); + impl_ = std::move(compile_op_impl.ValueOrDie()); +} + +void TpuCompileOp::Compute(OpKernelContext* ctx) { impl_->Compute(ctx); } + +TpuCompileMlirOp::TpuCompileMlirOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + StatusOr> compile_op_impl = + CompileOpImplFactory::Get()->CreateMlirImpl(ctx); + OP_REQUIRES_OK(ctx, compile_op_impl.status()); + impl_ = std::move(compile_op_impl.ValueOrDie()); +} + +void TpuCompileMlirOp::Compute(OpKernelContext* ctx) { impl_->Compute(ctx); } + +void TpuCompileSucceededAssertOp::Compute(OpKernelContext* ctx) { + const Tensor compilation_result = ctx->input(0); + CompilationResultProto proto; + Status status; + if (!proto.ParseFromString(compilation_result.scalar()())) { + status = + errors::InvalidArgument("Unable to parse compilation result proto"); + } + if (!status.ok() || proto.status_code() != error::Code::OK) { + status.Update(Status(proto.status_code(), proto.status_error_message())); + errors::AppendToMessage(&status, "TPU compilation failed"); + if (tensorflow::internal::TpuCompilationFailureClosesChips()) { + // At this point, if compilation fails we do not know if a task + // is already running that expects results from this compiled + // program to complete. So close the TPU driver to release all + // awaiting interactions (all awaiting interaction will fail and + // continue to fail until reinitialized). + LOG(ERROR) << "Cloud TPU: Closing chips. TPU compilation is considered " + "as part of device state, and a failed compilation results " + "in a device reset."; + + Status close_status = TpuNodeContext::CloseTpuHost(); + + if (!close_status.ok()) { + errors::AppendToMessage(&status, close_status.error_message()); + } + } + ctx->CtxFailureWithWarning(status); + } +} + +REGISTER_MODULE_INITIALIZER(register_tpu_compile_op_kernel, { + VLOG(1) << "Register TpuCompileOp kernel."; + REGISTER_KERNEL_BUILDER(Name("TPUCompile").Device(DEVICE_CPU), TpuCompileOp); + REGISTER_KERNEL_BUILDER(Name("_TPUCompileMlir").Device(DEVICE_CPU), + TpuCompileMlirOp); + REGISTER_KERNEL_BUILDER(Name("TPUCompileSucceededAssert").Device(DEVICE_CPU), + TpuCompileSucceededAssertOp); +}); +} // namespace tpu +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op.h b/tensorflow/core/tpu/kernels/tpu_compile_op.h index 0bbf5695400..48c0d9de3b3 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_op.h @@ -18,19 +18,10 @@ limitations under the License. #include #include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_op_common.h" namespace tensorflow { namespace tpu { -// Forward declaration. -#if defined(LIBTFTPU) -class TpuCompileOpKernelImpl; -#else -namespace internal { -class TpuCompileOpKernelImpl; -} -#endif -} // namespace tpu - // The TPUCompile operator compiles a Tensorflow function into a // TPU executable to be run by TPUExecute. // @@ -42,13 +33,9 @@ class TpuCompileOp : public OpKernel { void Compute(OpKernelContext* ctx) override; private: -#if defined(LIBTFTPU) - std::unique_ptr impl_; -#else - std::unique_ptr impl_; -#endif + std::unique_ptr impl_; - DISALLOW_COPY_AND_ASSIGN(TpuCompileOp); + TF_DISALLOW_COPY_AND_ASSIGN(TpuCompileOp); }; // The TPUCompile operator compiles a MLIR module into a @@ -62,13 +49,9 @@ class TpuCompileMlirOp : public OpKernel { void Compute(OpKernelContext* ctx) override; private: -#if defined(LIBTFTPU) - std::unique_ptr impl_; -#else - std::unique_ptr impl_; -#endif + std::unique_ptr impl_; - DISALLOW_COPY_AND_ASSIGN(TpuCompileMlirOp); + TF_DISALLOW_COPY_AND_ASSIGN(TpuCompileMlirOp); }; class TpuCompileSucceededAssertOp : public OpKernel { @@ -80,9 +63,9 @@ class TpuCompileSucceededAssertOp : public OpKernel { void Compute(OpKernelContext* ctx) override; private: - DISALLOW_COPY_AND_ASSIGN(TpuCompileSucceededAssertOp); + TF_DISALLOW_COPY_AND_ASSIGN(TpuCompileSucceededAssertOp); }; - +} // namespace tpu } // namespace tensorflow #endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_OP_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc index f9e3507f2d8..43143e5d618 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc @@ -130,6 +130,19 @@ Status SetPerCoreArgShapes( } // namespace +CompileOpImplFactory* CompileOpImplFactory::factory_ = nullptr; + +/* static */ +CompileOpImplFactory* CompileOpImplFactory::Get() { return factory_; } + +/* static */ +void CompileOpImplFactory::Register(CompileOpImplFactory* factory) { + CHECK_EQ(factory_, nullptr) + << "CompileOpImplFactory can only be registered " + "once and there can only be one factory active and used."; + factory_ = factory; +} + Status TpuCompileOpKernelCommon::AssignReturnValueToCore( std::vector* retval_core_mapping) { std::vector per_core_retval_counts(metadata_.num_cores_per_replica(), 0); diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_common.h b/tensorflow/core/tpu/kernels/tpu_compile_op_common.h index 1bbe698cb37..3d3f0afcdb7 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_common.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_common.h @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/compiler/jit/shape_inference.h" #include "tensorflow/compiler/tf2xla/xla_compiler.h" #include "tensorflow/compiler/xla/client/compile_only_client.h" +#include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/protobuf/tpu/compile_metadata.pb.h" @@ -34,6 +35,29 @@ limitations under the License. namespace tensorflow { namespace tpu { +// Forward declaration, defined below. +class TpuCompileOpKernelCommon; + +// A base factory class for creating a `TpuCompileOpKernelImpl` variant. +// By design, the actual factory can only be set once. +class CompileOpImplFactory { + public: + virtual ~CompileOpImplFactory() = default; + + virtual stream_executor::port::StatusOr< + std::unique_ptr> + CreateNonMlirImpl(OpKernelConstruction* ctx) = 0; + + virtual stream_executor::port::StatusOr< + std::unique_ptr> + CreateMlirImpl(OpKernelConstruction* ctx) = 0; + + static CompileOpImplFactory* Get(); + static void Register(CompileOpImplFactory* factory); + + private: + static CompileOpImplFactory* factory_; +}; // Abstract base class for TpuCompileOpKernel implementation. class TpuCompileOpKernelCommon { @@ -213,7 +237,6 @@ class TpuCompileOpKernelCommon { private: TF_DISALLOW_COPY_AND_ASSIGN(TpuCompileOpKernelCommon); }; - } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_impl.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_impl.cc index 3e684f97a88..8703dd818f5 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_impl.cc +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_impl.cc @@ -23,6 +23,8 @@ limitations under the License. namespace tensorflow { namespace tpu { +using stream_executor::port::StatusOr; + Status TpuCompileOpKernelImpl::Compile( const absl::variant& computation, const XLA_TpuMeshState* mesh_state, @@ -35,5 +37,42 @@ Status TpuCompileOpKernelImpl::Compile( return TpuProgramGroup::CompileAndBuild(compilation_request, mesh_state, tpu_program_group); } + +class TpuCompileOpImplFactory : public CompileOpImplFactory { + public: + StatusOr> CreateNonMlirImpl( + OpKernelConstruction* ctx) override { + NameAttrList function_name; + TPUCompileMetadataProto metadata; + TF_RETURN_IF_ERROR(CompileOpMetadataFromContext(ctx, &metadata, + &function_name, + /*mlir_module=*/nullptr)); + VLOG(1) << "Create tensorflow::tpu::TpuCompileOpKernelImpl"; + return {std::make_unique( + function_name, metadata, metadata.num_cores_per_replica(), + /*return_hlo_protos=*/false, + /*unload_cache_on_session_close=*/false)}; + } + + StatusOr> CreateMlirImpl( + OpKernelConstruction* ctx) override { + TPUCompileMetadataProto metadata; + std::string mlir_module; + TF_RETURN_IF_ERROR(CompileOpMetadataFromContext( + ctx, &metadata, /*function_name=*/nullptr, &mlir_module)); + VLOG(1) << "Create tensorflow::tpu::TpuCompileOpKernelImpl"; + return {std::make_unique( + mlir_module, metadata, metadata.num_cores_per_replica(), + /*return_hlo_protos=*/false, + /*unload_cache_on_session_close=*/false)}; + } +}; + +#if defined(LIBTFTPU) +REGISTER_MODULE_INITIALIZER(tpu_compile_op_impl_factory, { + VLOG(1) << "register TpuCompileOpImplFactory()"; + CompileOpImplFactory::Register(new TpuCompileOpImplFactory()); +}); +#endif // LIBTFTPU } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc index 41e81c6bca7..5cc35a07e66 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc @@ -16,27 +16,28 @@ limitations under the License. #include "tensorflow/compiler/xla/debug_options_flags.h" #include "tensorflow/compiler/xla/service/computation_layout.h" +#include "tensorflow/compiler/xla/service/computation_placer.h" #include "tensorflow/compiler/xla/service/dump.h" #include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/errors.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_key.h" #include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" #include "tensorflow/stream_executor/tpu/proto_helper.h" namespace tensorflow { namespace tpu { - -using stream_executor::port::Status; -using stream_executor::port::StatusOr; -using xla::ComputationLayout; -using xla::DebugOptions; -using xla::DeviceAssignment; -using xla::HloModuleConfig; -using xla::HloSharding; -using xla::InvalidArgument; -using xla::ProgramShape; -using xla::Shape; -using xla::ShapeTree; -using xla::ShapeUtil; +using ::stream_executor::port::Status; +using ::stream_executor::port::StatusOr; +using ::xla::ComputationLayout; +using ::xla::DebugOptions; +using ::xla::DeviceAssignment; +using ::xla::HloModuleConfig; +using ::xla::HloSharding; +using ::xla::InvalidArgument; +using ::xla::ProgramShape; +using ::xla::Shape; +using ::xla::ShapeTree; +using ::xla::ShapeUtil; Status ValidateResultShape(const Shape& client_shape, const Shape& result_shape) { @@ -485,5 +486,59 @@ StatusOr CreateTpuCompilationRequest( VLOG(1) << "TpuCompilationRequest:\n" << compilation_request.DebugString(); return compilation_request; } + +Status CompileOpMetadataFromContext(OpKernelConstruction* ctx, + TPUCompileMetadataProto* metadata, + NameAttrList* function_name, + std::string* mlir_module) { + CHECK_NE(metadata, nullptr); + + int num_computations; + TF_RETURN_IF_ERROR(ctx->GetAttr("num_computations", &num_computations)); + + std::string metadata_string; + TF_RETURN_IF_ERROR(ctx->GetAttr("metadata", &metadata_string)); + if (!metadata->ParsePartialFromString(metadata_string)) { + return errors::InvalidArgument("Unable to parse TPUCompileMetadataProto"); + } + + if (function_name != nullptr) { + TF_RETURN_IF_ERROR(ctx->GetAttr("function", function_name)); + } + + if (mlir_module != nullptr) { + TF_RETURN_IF_ERROR(ctx->GetAttr("mlir_module", mlir_module)); + } + + if (num_computations != metadata->num_cores_per_replica()) { + return errors::InvalidArgument( + "num_computations must be equal to " + "num_cores_per_replica in the 'metadata' " + "attribute (", + num_computations, " vs ", metadata->num_cores_per_replica(), ")"); + } + + if (metadata->has_device_assignment()) { + StatusOr> device_assignment_or_error = + DeviceAssignment::Deserialize(metadata->device_assignment()); + TF_RETURN_IF_ERROR(device_assignment_or_error.status()); + const DeviceAssignment& device_assignment = + *device_assignment_or_error.ValueOrDie(); + const int num_replicas = metadata->num_replicas(); + if (device_assignment.replica_count() != num_replicas) { + return errors::InvalidArgument( + "Device assignment replica_count != num_replicas; ", + device_assignment.replica_count(), " vs ", num_replicas); + } + if (device_assignment.computation_count() != + metadata->num_cores_per_replica()) { + return errors::InvalidArgument( + "Device assignment computation_count != num_cores_per_replica; ", + device_assignment.computation_count(), " vs ", + metadata->num_cores_per_replica()); + } + } + return Status::OK(); +} } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_support.h b/tensorflow/core/tpu/kernels/tpu_compile_op_support.h index d6d407cb28f..bc60f64286a 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_support.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_support.h @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/compiler/xla/shape_tree.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.pb.h" @@ -154,6 +155,10 @@ se::port::StatusOr CreateTpuCompilationRequest( const TPUCompileMetadataProto& metadata, const std::vector& arg_shapes); +se::port::Status CompileOpMetadataFromContext(OpKernelConstruction* ctx, + TPUCompileMetadataProto* metadata, + NameAttrList* function_name, + std::string* mlir_module); } // namespace tpu } // namespace tensorflow From 1f3a439c5d9788ecbaca61edca440760ef85fca0 Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Mon, 20 Jul 2020 15:59:53 -0700 Subject: [PATCH 0879/2522] fix for is_resource_variable PiperOrigin-RevId: 322244449 Change-Id: I8633c8b44cc112b3e9231a2c768390f61ffa7ec4 --- tensorflow/python/keras/layers/recurrent_v2.py | 5 +++-- .../keras/mixed_precision/experimental/autocast_variable.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/keras/layers/recurrent_v2.py b/tensorflow/python/keras/layers/recurrent_v2.py index bad9ecee7d6..878269dee5e 100644 --- a/tensorflow/python/keras/layers/recurrent_v2.py +++ b/tensorflow/python/keras/layers/recurrent_v2.py @@ -37,6 +37,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variables from tensorflow.python.platform import build_info from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import keras_export @@ -1664,7 +1665,7 @@ def _runtime(runtime_name): def _read_variable_value(v): - """Read the value of a resource variable if it is variable.""" - if resource_variable_ops.is_resource_variable(v): + """Read the value of a variable if it is variable.""" + if isinstance(v, variables.Variable): return v.read_value() return v diff --git a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py index 06a4b92bb8e..caad08ce066 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py +++ b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py @@ -62,7 +62,7 @@ class AutoCastVariable(variables.Variable, core.Tensor): Raises: ValueError: If `variable` is not a floating-point resource variable """ - if not resource_variable_ops.is_resource_variable(variable): + if not isinstance(variable, variables.Variable): raise ValueError('variable must be of type tf.ResourceVariable, but got: ' '%s' % variable) if not variable.dtype.is_floating: From 1c6d807fa6e4ff046ffa9db318462fb5fcd70d8f Mon Sep 17 00:00:00 2001 From: Karim Nosir Date: Mon, 20 Jul 2020 16:02:25 -0700 Subject: [PATCH 0880/2522] Remove some unused dependencies. PiperOrigin-RevId: 322244923 Change-Id: I59126604b903ebc6f16d2849daf483e26c9031fd --- tensorflow/compiler/mlir/lite/BUILD | 15 --------------- tensorflow/compiler/mlir/lite/quantization/BUILD | 1 - 2 files changed, 16 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD index a6b5327c15d..3b67ea3d846 100644 --- a/tensorflow/compiler/mlir/lite/BUILD +++ b/tensorflow/compiler/mlir/lite/BUILD @@ -220,18 +220,14 @@ cc_library( ], deps = [ ":tensorflow_lite_ops_inc_gen", - ":validators", - "//tensorflow/compiler/mlir/lite/experimental/estimators:cost_estimators", "//tensorflow/compiler/mlir/lite/quantization:quantization_lib", "//tensorflow/compiler/mlir/tensorflow:tensorflow_types", "//tensorflow/lite/schema:schema_fbs", "@llvm-project//llvm:Support", - "@llvm-project//mlir:Analysis", "@llvm-project//mlir:DerivedAttributeOpInterface", "@llvm-project//mlir:Dialect", "@llvm-project//mlir:IR", "@llvm-project//mlir:LoopLikeInterface", - "@llvm-project//mlir:Pass", "@llvm-project//mlir:QuantOps", "@llvm-project//mlir:SideEffects", "@llvm-project//mlir:StandardOps", @@ -349,7 +345,6 @@ cc_library( "transforms/passes.h", ], deps = [ - ":common", ":lstm_utils", ":stateful_ops_utils", ":tensorflow_lite", @@ -369,7 +364,6 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/kernels:tensor_list", - "//tensorflow/core/platform:logging", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/memory", "@llvm-project//llvm:Support", @@ -400,7 +394,6 @@ cc_library( "//tensorflow/compiler/mlir/lite/quantization:quantization_lib", "//tensorflow/compiler/mlir/tensorflow", "@llvm-project//llvm:Support", - "@llvm-project//mlir:Analysis", "@llvm-project//mlir:IR", "@llvm-project//mlir:Pass", "@llvm-project//mlir:StandardOps", @@ -434,7 +427,6 @@ cc_library( "//tensorflow/core:protos_all_cc", "@com_google_absl//absl/memory", "@llvm-project//llvm:Support", - "@llvm-project//mlir:Analysis", "@llvm-project//mlir:IR", "@llvm-project//mlir:Pass", "@llvm-project//mlir:QuantOps", @@ -457,7 +449,6 @@ cc_library( "//tensorflow/lite/tools/optimize/sparsity:format_converter", "@com_google_absl//absl/base", "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", "@llvm-project//mlir:Pass", @@ -609,8 +600,6 @@ cc_library( "//tensorflow/compiler/mlir/tensorflow", "//tensorflow/compiler/mlir/tensorflow:convert_tensor", "//tensorflow/compiler/mlir/tensorflow:export_tf_dialect_op", - "//tensorflow/compiler/mlir/tensorflow:mangling_util", - "//tensorflow/compiler/mlir/tensorflow:mlir_roundtrip_flags", "//tensorflow/compiler/mlir/tensorflow:tensorflow_dialect_registration", "//tensorflow/compiler/mlir/tensorflow:tensorflow_types", "//tensorflow/compiler/xla:statusor", @@ -651,7 +640,6 @@ cc_library( ":flatbuffer_tflite_operator_lib", ":tensorflow_lite", ":tensorflow_lite_dialect_registration", - "//tensorflow/compiler/mlir/lite/quantization:quantization_lib", "//tensorflow/compiler/mlir/tensorflow:mangling_util", "//tensorflow/compiler/mlir/tensorflow:tensorflow_types", "//tensorflow/compiler/xla:statusor", @@ -724,7 +712,6 @@ cc_library( "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", - "@com_google_absl//absl/strings", "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", "@llvm-project//mlir:MlirTranslateMain", @@ -858,10 +845,8 @@ cc_library( "//tensorflow/core:core_cpu_base", "@llvm-project//llvm:Support", "@llvm-project//mlir:AllPassesAndDialects", - "@llvm-project//mlir:Analysis", "@llvm-project//mlir:IR", "@llvm-project//mlir:Pass", - "@llvm-project//mlir:QuantOps", "@llvm-project//mlir:Transforms", ], ) diff --git a/tensorflow/compiler/mlir/lite/quantization/BUILD b/tensorflow/compiler/mlir/lite/quantization/BUILD index de83a37b82e..aec0d8da34f 100644 --- a/tensorflow/compiler/mlir/lite/quantization/BUILD +++ b/tensorflow/compiler/mlir/lite/quantization/BUILD @@ -81,7 +81,6 @@ cc_library( "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@llvm-project//llvm:Support", - "@llvm-project//mlir:Analysis", "@llvm-project//mlir:IR", "@llvm-project//mlir:Pass", "@llvm-project//mlir:QuantOps", From 4155347a205ecccf98a2d8c952c69326456d432c Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Mon, 20 Jul 2020 16:12:04 -0700 Subject: [PATCH 0881/2522] Record multiple candidate in _colocation_stack for a variable. - In TF2.0 it's possible that we set the device for the variable, but not the reference op. - Previously when calling "colocate_with" we only track the device for reference op, but not the variable. - This cl also returns the device for the original variable as second candidate when recording a device in collocation_stack. PiperOrigin-RevId: 322246956 Change-Id: I329399a5b801b5b93623416a065fbcadcb26d023 --- tensorflow/python/framework/ops.py | 35 ++++++++++++++++++------- tensorflow/python/framework/ops_test.py | 12 +++++++++ 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 4483dadecc0..8d6e1aa3e7b 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -3601,12 +3601,17 @@ class Graph(object): if self._colocation_stack: all_colocation_groups = [] + is_device_set = False for colocation_op in self._colocation_stack.peek_objs(): - all_colocation_groups.extend(colocation_op.colocation_groups()) - if colocation_op.device: + try: + all_colocation_groups.extend(colocation_op.colocation_groups()) + except AttributeError: + pass + if colocation_op.device and not is_device_set: # pylint: disable=protected-access op._set_device(colocation_op.device) # pylint: enable=protected-access + is_device_set = True all_colocation_groups = sorted(set(all_colocation_groups)) # pylint: disable=protected-access @@ -4356,7 +4361,7 @@ class Graph(object): if op is None and not ignore_existing: raise ValueError("Trying to reset colocation (op is None) but " "ignore_existing is not True") - op = _op_to_colocate_with(op, self) + op, device_only_candidate = _op_to_colocate_with(op, self) # By default, colocate_with resets the device function stack, # since colocate_with is typically used in specific internal @@ -4376,8 +4381,12 @@ class Graph(object): # offset refers to the stack frame used for storing code location. # We use 4, the sum of 1 to use our caller's stack frame and 3 # to jump over layers of context managers above us. + if device_only_candidate is not None: + self._colocation_stack.push_obj(device_only_candidate, offset=4) self._colocation_stack.push_obj(op, offset=4) - + elif not ignore_existing: + raise ValueError("Trying to reset colocation (op is None) but " + "ignore_existing is not True") try: yield finally: @@ -4385,6 +4394,8 @@ class Graph(object): self._device_function_stack = device_fn_tmp if op is not None: self._colocation_stack.pop_obj() + if device_only_candidate is not None: + self._colocation_stack.pop_obj() # Reset the colocation stack if requested. if ignore_existing: @@ -6812,26 +6823,30 @@ def _operation_conversion_error(op, dtype=None, name=None, as_ref=False): def _op_to_colocate_with(v, graph): """Operation object corresponding to v to use for colocation constraints.""" if v is None: - return None + return None, None if isinstance(v, Operation): - return v + return v, None + # We always want to colocate with the reference op. # When 'v' is a ResourceVariable, the reference op is the handle creating op. # # What this should be is: # if isinstance(v, ResourceVariable): - # return v.handle.op + # return v.handle.op, v # However, that would require a circular import dependency. # As of October 2018, there were attempts underway to remove # colocation constraints altogether. Assuming that will # happen soon, perhaps this hack to work around the circular # import dependency is acceptable. if hasattr(v, "handle") and isinstance(v.handle, Tensor): + device_only_candidate = lambda: None + device_only_candidate.device = v.device + device_only_candidate.name = v.name if graph.building_function: - return graph.capture(v.handle).op + return graph.capture(v.handle).op, device_only_candidate else: - return v.handle.op - return internal_convert_to_tensor_or_indexed_slices(v, as_ref=True).op + return v.handle.op, device_only_candidate + return internal_convert_to_tensor_or_indexed_slices(v, as_ref=True).op, None def _is_keras_symbolic_tensor(x): diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index 09a192dea52..c5f556dc6ba 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -3231,6 +3231,18 @@ class ColocationGroupTest(test_util.TensorFlowTestCase): b = variables.Variable([3.0], name="b") self.assertEqual([b"loc:@a"], b.op.colocation_groups()) + @test_util.run_deprecated_v1 + def testColocateResourceVariablesInFunction(self): + with ops.device("/device:CPU:0"): + a = resource_variable_ops.ResourceVariable(1.0) + + @def_function.function + def f(): + with ops.colocate_with(a): + b = array_ops.ones([], name="output") + self.assertEqual("/device:CPU:0", b.op.device) + f() + @test_util.disable_tfrt("Graph is not supported yet. b/156187905") def testColocateWithVariableInFunction(self): v = variables.Variable(1.) From e645a91105b73894556c28576120c84fee546ae4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Jul 2020 16:48:20 -0700 Subject: [PATCH 0882/2522] Integrate LLVM at llvm/llvm-project@78f543e5a1cd Updates LLVM usage to match [78f543e5a1cd](https://github.com/llvm/llvm-project/commit/78f543e5a1cd) PiperOrigin-RevId: 322253918 Change-Id: I88f6b399876540047066125c0c46a28a68afe84f --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 2f0ed2db863..20bd810d1a3 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -711,8 +711,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "f9595857b9f868fc7724ea767a8fd984d02848ff" - LLVM_SHA256 = "84e90b625094990dd319a436788656db195953cc823eb080e74ee20531bf872a" + LLVM_COMMIT = "78f543e5a1cd46b5232d5479dd513d2110f52e96" + LLVM_SHA256 = "9adea3b2b150d5a56ff87f630faa832b5736da0ba4be979814bdfc7ffe782dec" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 7cbb2ecbf2c4e8d32c417376870bf12c741e6866 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Mon, 20 Jul 2020 16:55:19 -0700 Subject: [PATCH 0883/2522] [MLIR][NFC] Eliminate empty ArrayRef argument when creating Ops - The MLIR build() methods for Ops now provide an empty default value for named attributes. So no need to pass this explicitly now. PiperOrigin-RevId: 322255266 Change-Id: Iae731e79e090676a249332d34a6dee0c2e891a83 --- .../lite/transforms/while_loop_outline.cc | 3 +- .../transforms/collection_ops_util.cc | 49 +++++++------------ .../transforms/executor_island_coarsening.cc | 4 +- .../executor_tpuv1_outline_tpu_island.cc | 4 +- .../transforms/parallel_execute_to_islands.cc | 2 +- .../readonly_references_to_resources.cc | 2 +- .../region_control_flow_to_functional.cc | 3 +- .../transforms/resource_op_lifting.cc | 6 +-- .../transforms/stack_ops_decomposition.cc | 12 ++--- .../tensor_array_ops_decomposition.cc | 15 ++---- .../tensor_list_ops_decomposition.cc | 14 ++---- .../transforms/tpu_dynamic_layout_pass.cc | 3 +- .../tpu_extract_outside_compilation.cc | 3 +- .../tensorflow/transforms/tpu_rewrite_pass.cc | 5 +- .../tpu_variable_runtime_reformatting.cc | 6 +-- 15 files changed, 48 insertions(+), 83 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/transforms/while_loop_outline.cc b/tensorflow/compiler/mlir/lite/transforms/while_loop_outline.cc index 106b0f9af83..3342981b75f 100644 --- a/tensorflow/compiler/mlir/lite/transforms/while_loop_outline.cc +++ b/tensorflow/compiler/mlir/lite/transforms/while_loop_outline.cc @@ -143,8 +143,7 @@ void WhileOutlinePass::OutlineWhile(WhileOp while_op) { type = FunctionType::get(types, result_types, &getContext()); } - auto outlined_func = builder.create(while_op.getLoc(), name, type, - ArrayRef{}); + auto outlined_func = builder.create(while_op.getLoc(), name, type); outlined_func.getBody().takeBody(region); Region& func_region = outlined_func.getBody(); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/collection_ops_util.cc b/tensorflow/compiler/mlir/tensorflow/transforms/collection_ops_util.cc index 58c4eac5c95..96f50065886 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/collection_ops_util.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/collection_ops_util.cc @@ -77,8 +77,7 @@ Value GetIndicesForElement(Value index, Value buffer, OpBuilder builder, ArrayRef{RankedTensorType::get( {static_cast(buffer_type.getShape().size())}, getElementTypeOrSelf(index.getType()))}, - ArrayRef{index, zeros_tensor, CreateScalarConst(0, builder, loc)}, - ArrayRef{}); + ArrayRef{index, zeros_tensor, CreateScalarConst(0, builder, loc)}); } Value GetElement(Value index, Value buffer, OpBuilder builder, Location loc, @@ -95,15 +94,14 @@ Value GetElement(Value index, Value buffer, OpBuilder builder, Location loc, auto slice = builder.create( loc, ArrayRef{slice_type}, ArrayRef{buffer, GetIndicesForElement(index, buffer, builder, loc), - size_const}, - ArrayRef{}); + size_const}); if (keep_slice_shape) return slice; auto element_type = RankedTensorType::get(buffer_type.getShape().drop_front(), buffer_type.getElementType()); auto reshape = builder.create( loc, ArrayRef{element_type}, - ArrayRef{slice, GetR1Const(element_type.getShape(), builder, loc)}, - ArrayRef{}); + ArrayRef{slice, + GetR1Const(element_type.getShape(), builder, loc)}); return reshape.output(); } @@ -120,15 +118,13 @@ Value SetElement(Value index, Value buffer, Value element, OpBuilder builder, if (element.getType() != slice_type) { update_slice = builder.create( loc, ArrayRef{slice_type}, - ArrayRef{element, GetR1Const(slice_shape, builder, loc)}, - ArrayRef{}); + ArrayRef{element, GetR1Const(slice_shape, builder, loc)}); } return builder .create( loc, ArrayRef{buffer.getType()}, ArrayRef{buffer, update_slice, - GetIndicesForElement(index, buffer, builder, loc)}, - ArrayRef{}) + GetIndicesForElement(index, buffer, builder, loc)}) .output(); } @@ -140,8 +136,7 @@ Value ReshapeScalarToSizeType(OpBuilder builder, Value scalar, Location loc) { auto size_type = GetSizeType(builder); return builder.create( loc, ArrayRef{size_type}, - ArrayRef{scalar, GetR1Const(size_type.getShape(), builder, loc)}, - ArrayRef{}); + ArrayRef{scalar, GetR1Const(size_type.getShape(), builder, loc)}); } LogicalResult CreateInitBufferValue(ArrayRef element_shape, @@ -171,13 +166,12 @@ LogicalResult CreateInitBufferValue(ArrayRef element_shape, if (getElementTypeOrSelf(zero.getType()) != element_dtype) { zero = builder.create( op->getLoc(), ArrayRef{RankedTensorType::get({}, element_dtype)}, - ArrayRef{zero}, ArrayRef{}); + ArrayRef{zero}); } auto buffer_type = RankedTensorType::get(buffer_shape, element_dtype); auto broadcast = builder.create( op->getLoc(), ArrayRef{buffer_type}, - ArrayRef{zero, GetR1Const(buffer_shape, builder, op->getLoc())}, - ArrayRef{}); + ArrayRef{zero, GetR1Const(buffer_shape, builder, op->getLoc())}); *buffer = broadcast.output(); return success(); } @@ -241,27 +235,24 @@ Value ReadLocalVariable(Value local_var, OpBuilder builder, Location loc) { ArrayRef{getElementTypeOrSelf(local_var.getType()) .cast() .getSubtypes()[0]}, - ArrayRef{local_var}, ArrayRef{}) + ArrayRef{local_var}) .value(); } // Creates an AssignVariableOp on a local variable. TF::AssignVariableOp WriteLocalVariable(Value local_var, Value value, OpBuilder builder, Location loc) { - return builder.create(loc, ArrayRef{}, - ArrayRef{local_var, value}, - ArrayRef{}); + return builder.create( + loc, ArrayRef{}, ArrayRef{local_var, value}); } Value AccumulateBuffers(Value a, Value b, OpBuilder builder, Location loc) { if (getElementTypeOrSelf(a.getType()) == builder.getI1Type()) { return builder.create(loc, ArrayRef{a.getType()}, - ArrayRef{a, b}, - ArrayRef{}); + ArrayRef{a, b}); } return builder.create(loc, ArrayRef{a.getType()}, - ArrayRef{a, b}, - ArrayRef{}); + ArrayRef{a, b}); } namespace { @@ -303,15 +294,13 @@ Value GatherElements(Value indices, Value buffer, OpBuilder builder, return builder.create( loc, ArrayRef{slice_type}, ArrayRef{buffer, GetR1Const(slice_starts, builder, loc), - GetR1Const(result_shape, builder, loc)}, - ArrayRef{}); + GetR1Const(result_shape, builder, loc)}); } auto result_type = RankedTensorType::get(result_shape, buffer_type.getElementType()); return builder.create( loc, ArrayRef{result_type}, - ArrayRef{buffer, indices, CreateScalarConst(0, builder, loc)}, - ArrayRef{}); + ArrayRef{buffer, indices, CreateScalarConst(0, builder, loc)}); } Value ScatterAccumulateElements(Value indices, Value updates, Value buffer, @@ -334,8 +323,7 @@ Value ScatterAccumulateElements(Value indices, Value updates, Value buffer, auto index = builder.create( loc, ArrayRef{GetSizeType(builder)}, ArrayRef{indices, GetR1Const({i}, builder, loc), - GetR1Const({1}, builder, loc)}, - ArrayRef{}); + GetR1Const({1}, builder, loc)}); auto old_slice = GetElement(index, buffer, builder, loc, /*keep_slice_shape=*/true); starts_in_update[0] = i; @@ -344,8 +332,7 @@ Value ScatterAccumulateElements(Value indices, Value updates, Value buffer, builder .create( loc, ArrayRef{old_slice.getType()}, - ArrayRef{updates, update_slice_starts, slice_sizes}, - ArrayRef{}) + ArrayRef{updates, update_slice_starts, slice_sizes}) .output(); slice = AccumulateBuffers(old_slice, slice, builder, loc); buffer = SetElement(index, buffer, slice, builder, loc); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/executor_island_coarsening.cc b/tensorflow/compiler/mlir/tensorflow/transforms/executor_island_coarsening.cc index 0d72a7638a3..02a2e7efa6f 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/executor_island_coarsening.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/executor_island_coarsening.cc @@ -185,8 +185,8 @@ IslandOp CreateNewIsland(IslandOp parent, IslandOp child, Operation* old_island = insert_position == kParentIsland ? parent : child; OpBuilder builder(old_island); - auto new_island = builder.create( - old_island->getLoc(), result_types, operands, ArrayRef{}); + auto new_island = + builder.create(old_island->getLoc(), result_types, operands); new_island.body().push_back(new Block); return new_island; } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/executor_tpuv1_outline_tpu_island.cc b/tensorflow/compiler/mlir/tensorflow/transforms/executor_tpuv1_outline_tpu_island.cc index e04f6bf3daa..d8e739ee949 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/executor_tpuv1_outline_tpu_island.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/executor_tpuv1_outline_tpu_island.cc @@ -105,8 +105,8 @@ void TPUBridgeExecutorIslandOutlining::runOnOperation() { // Create the outlined function SmallString<32> name = kOutlinedFuncPrefix; name += llvm::Twine(prefix_id++).str(); - auto outlined_func = OpBuilder(ctx).create( - island_op.getLoc(), name, func_type, ArrayRef()); + auto outlined_func = + OpBuilder(ctx).create(island_op.getLoc(), name, func_type); outlined_symbol_table.insert(outlined_func); // We will "steal" the body of the island and replace it with a call to the diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/parallel_execute_to_islands.cc b/tensorflow/compiler/mlir/tensorflow/transforms/parallel_execute_to_islands.cc index c13d7de754e..44205063266 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/parallel_execute_to_islands.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/parallel_execute_to_islands.cc @@ -190,7 +190,7 @@ tf_executor::IslandOp CreateOutputBarrierIsland( builder->setInsertionPoint(island_op); auto island_output_sink = builder->create( island_op.getLoc(), llvm::to_vector<8>(island_op.getResultTypes()), - island_operands, llvm::ArrayRef{}); + island_operands); island_output_sink.body().push_back(new Block); return island_output_sink; } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/readonly_references_to_resources.cc b/tensorflow/compiler/mlir/tensorflow/transforms/readonly_references_to_resources.cc index f8bbb8994f4..104f11e0cc0 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/readonly_references_to_resources.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/readonly_references_to_resources.cc @@ -160,7 +160,7 @@ void ConvertReadonlyReferenceVariablesToResourceVariablesPass::runOnFunction() { builder.setInsertionPoint(user); ReadVariableOp read_variable_op = builder.create( user->getLoc(), ArrayRef{tensor_type}, - ArrayRef{var_handle_op}, ArrayRef{}); + ArrayRef{var_handle_op}); user->getResult(0).replaceAllUsesWith(read_variable_op.getResult()); user->erase(); } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc b/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc index 46914653dbd..f1004fa049e 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc @@ -124,8 +124,7 @@ void ExtractSingleBlockRegion(Region& region, StringRef name, auto type = FunctionType::get(input_types, return_types, region.getContext()); // Create new function and extract region body into the function. - auto outlined_func = - builder.create(loc, name, type, ArrayRef{}); + auto outlined_func = builder.create(loc, name, type); Region& func_region = outlined_func.getBody(); func_region.takeBody(region); Block& first_block = func_region.front(); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc index 100893e89b5..74679f19941 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc @@ -558,15 +558,13 @@ void AddLoadsStoresOutsideControlFlowOp( auto operand = caller->getOperand(index); builder.setInsertionPoint(caller); new_operands[index] = builder.create( - caller->getLoc(), ArrayRef{new_type}, ArrayRef{operand}, - ArrayRef{}); + caller->getLoc(), ArrayRef{new_type}, ArrayRef{operand}); caller->setOperand(index, new_operands[index]); if (updated_index < 0) continue; builder.setInsertionPointAfter(caller); builder.create( caller->getLoc(), ArrayRef{}, - ArrayRef{operand, caller->getResult(updated_index)}, - ArrayRef{}); + ArrayRef{operand, caller->getResult(updated_index)}); } } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc b/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc index 5e095a311ee..8d6e1c2ce30 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc @@ -409,11 +409,9 @@ LogicalResult HandleStackV2Op( ArrayRef{buffer.getType().cast()}, stack.getContext())); auto local_var = builder.create( - stack.getLoc(), ArrayRef{var_type}, ArrayRef{}, - ArrayRef{}); + stack.getLoc(), ArrayRef{var_type}, ArrayRef{}); auto local_size_var = builder.create( - stack.getLoc(), ArrayRef{size_var_type}, ArrayRef{}, - ArrayRef{}); + stack.getLoc(), ArrayRef{size_var_type}, ArrayRef{}); // Zero-initialize the local vars. cutil::WriteLocalVariable(local_size_var, cutil::GetR1Const({0LL}, builder, stack.getLoc()), @@ -446,8 +444,7 @@ LogicalResult HandleStackPushV2Op( cutil::WriteLocalVariable(push.handle(), stack_val, builder, push.getLoc()); index = builder.create( push.getLoc(), ArrayRef{index.getType()}, - ArrayRef{index, cutil::GetR1Const({1}, builder, push.getLoc())}, - ArrayRef{}); + ArrayRef{index, cutil::GetR1Const({1}, builder, push.getLoc())}); cutil::WriteLocalVariable(it->getSecond(), index, builder, push.getLoc()); push.erase(); return success(); @@ -467,8 +464,7 @@ LogicalResult HandleStackPopV2Op( auto size = cutil::ReadLocalVariable(it->getSecond(), builder, pop.getLoc()); auto new_size = builder.create( pop.getLoc(), ArrayRef{size.getType()}, - ArrayRef{size, cutil::GetR1Const({1}, builder, pop.getLoc())}, - ArrayRef{}); + ArrayRef{size, cutil::GetR1Const({1}, builder, pop.getLoc())}); auto pop_val = cutil::GetElement(new_size, stack_val, builder, pop.getLoc()); pop.replaceAllUsesWith(pop_val); // Update the size. diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc index 9c659a95078..cb30bc35a7a 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc @@ -166,8 +166,7 @@ LogicalResult HandleTensorArrayV3Op( ArrayRef{buffer.getType().cast()}, ta.getContext())); auto local_var = builder.create( - ta.getLoc(), ArrayRef{var_type}, ArrayRef{}, - ArrayRef{}); + ta.getLoc(), ArrayRef{var_type}, ArrayRef{}); cutil::WriteLocalVariable(local_var, buffer, builder, ta.getLoc()); ta.handle().replaceAllUsesWith(local_var); // The flow output is just a way for the front end to enforce ordering among @@ -227,8 +226,7 @@ LogicalResult HandleTensorArrayWriteV3Op( elem = builder.create( write.getLoc(), ArrayRef{slice_type}, ArrayRef{elem, cutil::GetR1Const(slice_type.getShape(), builder, - write.getLoc())}, - ArrayRef{}); + write.getLoc())}); elem = cutil::AccumulateBuffers(elem, original_elem, builder, write.getLoc()); } @@ -261,8 +259,7 @@ LogicalResult HandleTensorArrayConcatV3Op( ArrayRef{ RankedTensorType::get(shape, buffer_type.getElementType())}, ArrayRef{buffer, - cutil::GetR1Const(shape, builder, concat.getLoc())}, - ArrayRef{}); + cutil::GetR1Const(shape, builder, concat.getLoc())}); concat.value().replaceAllUsesWith(buffer); // Create the lengths as a list of the same value (element size). @@ -302,8 +299,7 @@ LogicalResult HandleTensorArraySplitV3Op( buffer_shape, elem_type.getElementType())}, ArrayRef{split.value(), cutil::GetR1Const(buffer_shape, builder, - split.getLoc())}, - ArrayRef{}) + split.getLoc())}) .output(); // Accumulate with the old buffer. auto old_buffer = @@ -339,8 +335,7 @@ LogicalResult CreateAndInitializeGradVariable(Type local_var_type, Operation* op, Value* var) { OpBuilder builder(op); *var = builder.create( - op->getLoc(), ArrayRef{local_var_type}, ArrayRef{}, - ArrayRef{}); + op->getLoc(), ArrayRef{local_var_type}, ArrayRef{}); Value buffer; auto buffer_type = getElementTypeOrSelf(local_var_type) .cast() diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc index 11153f0dfc3..5cbc42a862c 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc @@ -438,7 +438,7 @@ LogicalResult HandleTensorListFromTensorOp( OpBuilder builder(list); Value buffer = builder.create( list.getLoc(), ArrayRef{list.tensor().getType()}, - ArrayRef{list.tensor()}, ArrayRef{}); + ArrayRef{list.tensor()}); auto type = buffer.getType().cast(); if (!type.hasStaticShape()) { return list.emitOpError("TensorListFromTensorOp input has unknown shape."); @@ -468,8 +468,7 @@ LogicalResult HandleTensorListPushBackOp( cutil::SetElement(size, buffer, push.tensor(), builder, push.getLoc()); auto new_size = builder.create( push.getLoc(), ArrayRef{size.getType()}, - ArrayRef{size, cutil::GetR1Const({1LL}, builder, push.getLoc())}, - ArrayRef{}); + ArrayRef{size, cutil::GetR1Const({1LL}, builder, push.getLoc())}); push.output_handle().replaceAllUsesWith(new_buffer); (*buffer_to_size)[new_buffer] = {new_size, /*fixed=*/false}; push.erase(); @@ -491,12 +490,10 @@ LogicalResult HandleTensorListPopBackOp( auto size = it->getSecond().size; OpBuilder builder(pop); auto new_buffer = builder.create( - pop.getLoc(), ArrayRef{buffer.getType()}, ArrayRef{buffer}, - ArrayRef{}); + pop.getLoc(), ArrayRef{buffer.getType()}, ArrayRef{buffer}); auto new_size = builder.create( pop.getLoc(), ArrayRef{size.getType()}, - ArrayRef{size, cutil::GetR1Const({1LL}, builder, pop.getLoc())}, - ArrayRef{}); + ArrayRef{size, cutil::GetR1Const({1LL}, builder, pop.getLoc())}); auto element = cutil::GetElement(new_size, new_buffer, builder, pop.getLoc()); pop.output_handle().replaceAllUsesWith(new_buffer); pop.tensor().replaceAllUsesWith(element); @@ -567,8 +564,7 @@ LogicalResult HandleTensorListLengthOp( ArrayRef{RankedTensorType::get( {}, getElementTypeOrSelf(current_size.getType()))}, ArrayRef{current_size, - cutil::GetR1Const({}, builder, length.getLoc())}, - ArrayRef{}); + cutil::GetR1Const({}, builder, length.getLoc())}); length.length().replaceAllUsesWith(reshape); } length.erase(); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_dynamic_layout_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_dynamic_layout_pass.cc index e2f4fca1219..bd678afb1ad 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_dynamic_layout_pass.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_dynamic_layout_pass.cc @@ -154,8 +154,7 @@ TF::TPUCopyWithLayoutOp BuildCopyWithLayout(tf_device::LaunchOp execute_launch, Value input, OpBuilder* builder) { return builder->create( execute_launch.getLoc(), llvm::ArrayRef{input.getType()}, - llvm::ArrayRef{input, get_layout.layout()}, - llvm::ArrayRef{}); + llvm::ArrayRef{input, get_layout.layout()}); } // Performs transformation for a non-replicated input. diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc index af0675197ac..6a750addd4e 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc @@ -206,8 +206,7 @@ TF::_HostComputeMlirOp CreateHostCompute( device_output_types.push_back(output.getType()); SetHostComputeInsertion(builder, cluster_ops, inputs); auto host_compute = builder->create( - tpu_cluster.getLoc(), device_output_types, inputs.getArrayRef(), - llvm::ArrayRef{}); + tpu_cluster.getLoc(), device_output_types, inputs.getArrayRef()); host_compute.setAttr(kAncestorsAttr, builder->getArrayAttr({})); host_compute.setAttr(kShapesAttr, builder->getArrayAttr({})); host_compute.setAttr(kKeyAttr, builder->getStringAttr(communication_key)); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc index 050ba24417f..7e6f2915502 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc @@ -473,9 +473,8 @@ LogicalResult BuildExecuteOp( if (failed(result)) return failure(); // TPUExecute has same output types as cluster_func. - *execute_op = builder->create( - cluster_func.getLoc(), output_types, inputs, - llvm::ArrayRef{}); + *execute_op = builder->create(cluster_func.getLoc(), + output_types, inputs); return success(); } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc index 5bc6bd4e053..12ce8c57f73 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc @@ -521,8 +521,7 @@ void HandleReplicateOp(TF::WhileOp while_op, tf_device::ReplicateOp replicate, replicate.GetNumReplicatedBlockArguments() - 1)); builder.setInsertionPoint(execute_launch); auto reformat_op = builder.create( - execute_launch.getLoc(), llvm::ArrayRef{}, reformat_operands, - llvm::ArrayRef{}); + execute_launch.getLoc(), llvm::ArrayRef{}, reformat_operands); WrapOpInLaunch(&builder, execute_launch.getLoc(), reformat_op, execute_launch.device()); @@ -579,8 +578,7 @@ void HandleReplicateOp(TF::WhileOp while_op, tf_device::ReplicateOp replicate, default_state_key.getResult()); // Unformat op. auto unformat_op = builder.create( - while_op.getLoc(), llvm::ArrayRef{}, unformat_operands, - llvm::ArrayRef{}); + while_op.getLoc(), llvm::ArrayRef{}, unformat_operands); WrapOpInLaunch(&builder, execute_launch.getLoc(), unformat_op, execute_launch.device()); builder.create(while_op.getLoc(), ArrayRef{}); From 31ff47fb93ba63dbd750125df4ee1d66124c75fc Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Mon, 20 Jul 2020 16:55:20 -0700 Subject: [PATCH 0884/2522] In BasicBatchScheduler, mirror options from SharedBatchScheduler::Options and SharedBatchScheduler::QueueOptions. PiperOrigin-RevId: 322255271 Change-Id: Ifc27383dd230720656a9187e54db7bee05eb6856 --- .../batching_util/basic_batch_scheduler.h | 62 +++++++++++++++++++ .../batching_util/shared_batch_scheduler.h | 4 +- tensorflow/core/util/BUILD | 9 ++- 3 files changed, 70 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/kernels/batching_util/basic_batch_scheduler.h b/tensorflow/core/kernels/batching_util/basic_batch_scheduler.h index 26df1f82920..bd44115db22 100644 --- a/tensorflow/core/kernels/batching_util/basic_batch_scheduler.h +++ b/tensorflow/core/kernels/batching_util/basic_batch_scheduler.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_BASIC_BATCH_SCHEDULER_H_ #include + #include #include #include @@ -176,6 +177,61 @@ class BasicBatchScheduler : public BatchScheduler { // parameter. int max_enqueued_batches = 10; + // If true, an input task (i.e., input of `BasicBatchScheduler::Schedule`) + // with a large size (i.e., larger than the largest value of + // `allowed_batch_sizes`) will be split into multiple smaller batch tasks + // and possibly put into different batches for processing. If false, each + // input task is put into one batch as a whole for processing. + // + // API note: + // The value of this option doesn't affect processing output given the same + // input; it affects implementation details as stated below: + // 1. Improve batching efficiency by eliminating unnecessary padding in the + // following scenario: when an open batch has M slots while an input of size + // N is scheduled (M < N), the input can be split to fill remaining slots + // of an open batch as opposed to padding. + // 2.`max_batch_size` specifies the limit of input and + // `max_execution_batch_size` specifies the limit of a task to be processed. + // API user can give an input of size 128 when 'max_execution_batch_size' + // is 32 -> implementation can split input of 128 into 4 x 32, schedule + // concurrent processing, and then return concatenated results corresponding + // to 128. + bool enable_large_batch_splitting = false; + + // `split_input_task_func` specifies how to split `input_task` into + // `output_tasks`. + // + // `input_task`: a unit of task to be split. + // `first_output_task_size`: task size of first output. + // `max_batch_size`: Maximum size of each batch. + // `output_tasks`: A list of output tasks after split. + // + // REQUIRED: + // 1) All `output_tasks` should be non-empty tasks. + // 2) Sizes of `output_tasks` add up to size of `input_task`. + // + // NOTE: + // Instantiations of `TaskType` may vary, so it's up to caller to define + // how (e.g., which members to access) to split input tasks. + std::function* input_task, + int first_output_task_size, int input_batch_size_limit, + std::vector>* output_tasks)> + split_input_task_func; + + // The maximum size of each enqueued batch (i.e., in `batches_`). + // + // The scheduler may form batches of any size between 1 and this number + // (inclusive). If there is a need to quantize the batch sizes, i.e. only + // submit batches whose size is in a small set of allowed sizes, that can be + // done by adding padding in the process-batch callback. + // + // REQUIRES: + // - If enable_large_batch_splitting is true, `max_execution_batch_size` is + // less than or equal to `max_batch_size`. + // - If enable_large_batch_splitting is false, `max_execution_batch_size` is + // equal to `max_batch_size`. + int max_execution_batch_size = 10; + // The following options are typically only overridden by test code. // The environment to use. @@ -231,6 +287,12 @@ Status BasicBatchScheduler::Create( options.batch_timeout_micros; shared_scheduler_queue_options.max_enqueued_batches = options.max_enqueued_batches; + shared_scheduler_queue_options.enable_large_batch_splitting = + options.enable_large_batch_splitting; + shared_scheduler_queue_options.split_input_task_func = + options.split_input_task_func; + shared_scheduler_queue_options.max_execution_batch_size = + options.max_execution_batch_size; std::unique_ptr> shared_scheduler_queue; TF_RETURN_IF_ERROR(shared_scheduler->AddQueue(shared_scheduler_queue_options, process_batch_callback, diff --git a/tensorflow/core/kernels/batching_util/shared_batch_scheduler.h b/tensorflow/core/kernels/batching_util/shared_batch_scheduler.h index 6763e366c76..ce7823a7aef 100644 --- a/tensorflow/core/kernels/batching_util/shared_batch_scheduler.h +++ b/tensorflow/core/kernels/batching_util/shared_batch_scheduler.h @@ -171,7 +171,7 @@ class SharedBatchScheduler // For usage of `split_input_task_func`, please see its comment. bool enable_large_batch_splitting = false; - // `input_task`: a unit of task to be splitted (raw pointer not owned). + // `input_task`: a unit of task to be split. // `first_output_task_size`: task size of first output. // `max_batch_size`: Maximum size of each batch. // `output_tasks`: A list of output tasks after split. @@ -711,7 +711,7 @@ Status Queue::ScheduleWithSplit(std::unique_ptr* task) { std::vector> output_tasks; if (input_task_size <= open_batch_remaining_slot) { - // This is the fast path when input doesn't need to be splitted. + // This is the fast path when input doesn't need to be split. output_tasks.push_back(std::move(*task)); } else { TF_RETURN_IF_ERROR(SplitInputBatchIntoSubtasks(task, &output_tasks)); diff --git a/tensorflow/core/util/BUILD b/tensorflow/core/util/BUILD index 78757bed13e..bb2b9ff429e 100644 --- a/tensorflow/core/util/BUILD +++ b/tensorflow/core/util/BUILD @@ -25,10 +25,12 @@ load( "if_static", ) +default_package_visibility = [ + "//tensorflow/core:__subpackages__", +] + package( - default_visibility = [ - "//tensorflow/core:__subpackages__", - ], + default_visibility = default_package_visibility, licenses = ["notice"], # Apache 2.0 ) @@ -558,6 +560,7 @@ cc_library( name = "incremental_barrier", srcs = ["incremental_barrier.cc"], hdrs = ["incremental_barrier.h"], + visibility = default_package_visibility + ["//tensorflow_serving:__subpackages__"], deps = [ "//tensorflow/core:lib", "@com_google_absl//absl/functional:bind_front", From 8ceb3ac1b2a85cd0b61fd9410b4e12cf04c992d7 Mon Sep 17 00:00:00 2001 From: Robert Suderman Date: Mon, 20 Jul 2020 16:58:31 -0700 Subject: [PATCH 0885/2522] Add a transform for Gathers to torch_index_select. Some gathers can be interpreted as torch index selects. Transforming these cases allow torch_index_select lowerings to be used for certain gathers. PiperOrigin-RevId: 322255835 Change-Id: I5d34295f48900b9f500e4e885b073fe629892fbb --- tensorflow/compiler/mlir/hlo/BUILD | 21 +++ .../Dialect/mhlo/transforms/rewriters.h | 4 + .../legalize_gather_to_torch_index_select.cc | 152 ++++++++++++++++++ ...legalize-gather-to-torch-index-select.mlir | 41 +++++ 4 files changed, 218 insertions(+) create mode 100644 tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_gather_to_torch_index_select.cc create mode 100644 tensorflow/compiler/mlir/hlo/tests/hlo-legalize-gather-to-torch-index-select.mlir diff --git a/tensorflow/compiler/mlir/hlo/BUILD b/tensorflow/compiler/mlir/hlo/BUILD index bc6393fa3f3..86af6f5ffec 100644 --- a/tensorflow/compiler/mlir/hlo/BUILD +++ b/tensorflow/compiler/mlir/hlo/BUILD @@ -568,6 +568,26 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "legalize_gather_to_torch_index_select", + srcs = ["lib/Dialect/mhlo/transforms/legalize_gather_to_torch_index_select.cc"], + hdrs = [ + "include/mlir-hlo/Dialect/mhlo/transforms/passes.h", + "include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h", + ], + deps = [ + ":hlo", + "@com_google_absl//absl/memory", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:Pass", + "@llvm-project//mlir:StandardOps", + "@llvm-project//mlir:Support", + "@llvm-project//mlir:Transforms", + ], + alwayslink = 1, +) + cc_library( name = "legalize_tanh_to_approximation", srcs = ["lib/Dialect/mhlo/transforms/legalize_tanh_to_approximation.cc"], @@ -717,6 +737,7 @@ cc_library( ":hlo_dialect_registration", ":hlo_legalize_to_lhlo", ":legalize_control_flow", + ":legalize_gather_to_torch_index_select", ":legalize_tanh_to_approximation", ":legalize_to_linalg", ":legalize_to_standard", diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h index f3f4405ffa6..ddc6417b9ec 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h @@ -41,6 +41,10 @@ void PopulateComplexLoweringPatterns(MLIRContext *context, void PopulateOptimizeMHLOPatterns(MLIRContext *context, OwningRewritePatternList *patterns); +// Rewrite patterns for gather to equivalent torch index select legalization. +void PopulateGatherToTorchIndexSelectPatterns( + mlir::MLIRContext *context, OwningRewritePatternList *patterns); + void PopulateMhloToStdPatterns(OwningRewritePatternList *patterns, MLIRContext *ctx); diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_gather_to_torch_index_select.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_gather_to_torch_index_select.cc new file mode 100644 index 00000000000..391829865c6 --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_gather_to_torch_index_select.cc @@ -0,0 +1,152 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "absl/memory/memory.h" +#include "mlir/IR/Function.h" // from @llvm-project +#include "mlir/IR/PatternMatch.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/passes.h" +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h" + +namespace mlir { + +namespace mhlo { +namespace { + +struct GatherIsTorchIndexSelect : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(GatherOp gather, + PatternRewriter &rewriter) const override { + auto start_indices = gather.start_indices(); + auto start_indices_ty = start_indices.getType().cast(); + if (!start_indices_ty.hasRank()) { + return failure(); + } + + auto operand = gather.operand(); + auto operand_ty = operand.getType().cast(); + if (!operand_ty.hasRank()) { + return failure(); + } + + int64_t index_vector_dim = + std::max(0, start_indices_ty.getRank() - 1); + + // We can use torch_index_select if the last dimension represents the + // gather indices. + auto dimension_numbers = gather.dimension_numbers(); + if (dimension_numbers.index_vector_dim().getValue().getSExtValue() != + index_vector_dim) { + return failure(); + } + + // Index select only works across a single dimension. + if (!start_indices_ty.getShape().empty() && + start_indices_ty.getShape().back() != 1) { + return failure(); + } + + // Only support the default case for start_index_map. + if (dimension_numbers.start_index_map().getType().getRank() != 1 || + dimension_numbers.start_index_map() + .getValue(0) + .cast() + .getValue() != 0) { + return failure(); + } + + auto result_ty = gather.getResult().getType().dyn_cast(); + if (!result_ty) { + return failure(); + } + + // Offset dimensions should be the defaults. + if (dimension_numbers.offset_dims().getType().getNumElements() != + result_ty.getRank() - index_vector_dim) { + return failure(); + } + + for (auto it : llvm::enumerate(dimension_numbers.offset_dims())) { + if ((it.index() + index_vector_dim) != it.value()) { + return failure(); + } + } + + for (auto it : llvm::enumerate(gather.slice_sizes().getIntValues())) { + // First shape value must be 1. + if (it.index() == 0) { + if (it.value().getSExtValue() != 1) { + return failure(); + } + continue; + } + + // The gather needs to index the entire slice for each other dimension. + if (it.value().getSExtValue() != operand_ty.getDimSize(it.index())) { + return failure(); + } + } + + llvm::SmallVector index_select_shape = + llvm::to_vector<4>(start_indices_ty.getShape()); + + for (auto dim : operand_ty.getShape().drop_front()) { + index_select_shape.push_back(dim); + } + + if (!dimension_numbers.collapsed_slice_dims().getType().hasRank() || + dimension_numbers.collapsed_slice_dims().getType().getNumElements() != + 1 || + dimension_numbers.collapsed_slice_dims().getValue({0}) != 0) { + return failure(); + } + + auto torch_index_select = rewriter.create( + gather.getLoc(), + RankedTensorType::get(index_select_shape, operand_ty.getElementType()), + operand, gather.start_indices(), rewriter.getI64IntegerAttr(0), + rewriter.getI64IntegerAttr(0)); + + rewriter.replaceOpWithNewOp(gather, gather.getType(), + torch_index_select); + + return success(); + } +}; + +struct LegalizeGatherToTorchIndexSelect + : public PassWrapper { + /// Perform the lowering of standard dialect operations to approximations. + void runOnFunction() override { + OwningRewritePatternList patterns; + PopulateGatherToTorchIndexSelectPatterns(&getContext(), &patterns); + applyPatternsAndFoldGreedily(getFunction(), patterns); + } +}; +} // namespace + +void PopulateGatherToTorchIndexSelectPatterns( + mlir::MLIRContext *context, OwningRewritePatternList *patterns) { + patterns->insert(context); +} + +static PassRegistration legalize_hlo_pass( + "mhlo-legalize-gather-to-torch-index-select", + "Legalizes gathers to a torch index select."); + +} // namespace mhlo +} // namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-gather-to-torch-index-select.mlir b/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-gather-to-torch-index-select.mlir new file mode 100644 index 00000000000..ca90a80aa6c --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-gather-to-torch-index-select.mlir @@ -0,0 +1,41 @@ +// RUN: mlir-hlo-opt -mhlo-legalize-gather-to-torch-index-select %s -o - | FileCheck %s + +// CHECK-LABEL: @gather_to_index_select +func @gather_to_index_select(%arg0 : tensor<5x4xf32>, %arg1 : tensor<1x3x1xi32>) -> tensor<1x3x4xf32> { + // CHECK: [[TIS:%.+]] = "mhlo.torch_index_select"(%arg0, %arg1) { + // CHECK-SAME: batch_dims = 0 : i64, + // CHECK-SAME: dim = 0 : i64 + // CHECK-SAME: } : (tensor<5x4xf32>, tensor<1x3x1xi32>) -> tensor<1x3x1x4xf32> + // CHECK: [[RES:%.+]] = "mhlo.reshape"([[TIS]]) + %0 = "mhlo.gather"(%arg0, %arg1) {dimension_numbers = {collapsed_slice_dims = dense<0> : tensor<1xi64>, index_vector_dim = 2 : i64, offset_dims = dense<2> : tensor<1xi64>, start_index_map = dense<0> : tensor<1xi64>}, indices_are_sorted = false, slice_sizes = dense<[1, 4]> : tensor<2xi64>} : (tensor<5x4xf32>, tensor<1x3x1xi32>) -> tensor<1x3x4xf32> + + // CHECK: return [[RES]] + return %0 : tensor<1x3x4xf32> +} + +// CHECK-LABEL: @scalar_gather_to_index_select +func @scalar_gather_to_index_select(%arg0 : tensor<5x4xf32>, %arg1 : tensor) -> tensor<1x4xf32> { + // CHECK: [[TIS:%.+]] = "mhlo.torch_index_select"(%arg0, %arg1) { + // CHECK-SAME: batch_dims = 0 : i64, + // CHECK-SAME: dim = 0 : i64 + // CHECK-SAME: } : (tensor<5x4xf32>, tensor) -> tensor<4xf32> + // CHECK: [[RES:%.+]] = "mhlo.reshape"([[TIS]]) + %0 = "mhlo.gather"(%arg0, %arg1) {dimension_numbers = {collapsed_slice_dims = dense<0> : tensor<1xi64>, index_vector_dim = 0 : i64, offset_dims = dense<[0, 1]> : tensor<2xi64>, start_index_map = dense<0> : tensor<1xi64>}, indices_are_sorted = false, slice_sizes = dense<[1, 4]> : tensor<2xi64>} : (tensor<5x4xf32>, tensor) -> tensor<1x4xf32> + + // CHECK: return [[RES]] + return %0 : tensor<1x4xf32> +} + +// CHECK-LABEL: @gather_no_lowering_subslice +func @gather_no_lowering_subslice(%arg0 : tensor<5x4xf32>, %arg1 : tensor<1x3x1xi32>) -> tensor<1x3x3xf32> { + // CHECK: "mhlo.gather" + %0 = "mhlo.gather"(%arg0, %arg1) {dimension_numbers = {collapsed_slice_dims = dense<0> : tensor<1xi64>, index_vector_dim = 2 : i64, offset_dims = dense<2> : tensor<1xi64>, start_index_map = dense<0> : tensor<1xi64>}, indices_are_sorted = false, slice_sizes = dense<[1, 3]> : tensor<2xi64>} : (tensor<5x4xf32>, tensor<1x3x1xi32>) -> tensor<1x3x3xf32> + return %0 : tensor<1x3x3xf32> +} + +// CHECK-LABEL: @gather_no_lowering_multidim +func @gather_no_lowering_multidim(%arg0 : tensor<5x4xf32>, %arg1 : tensor<1x3x2xi32>) -> tensor<1x3x4xf32> { + // CHECK: "mhlo.gather" + %0 = "mhlo.gather"(%arg0, %arg1) {dimension_numbers = {collapsed_slice_dims = dense<0> : tensor<1xi64>, index_vector_dim = 2 : i64, offset_dims = dense<2> : tensor<1xi64>, start_index_map = dense<0> : tensor<1xi64>}, indices_are_sorted = false, slice_sizes = dense<[1, 4]> : tensor<2xi64>} : (tensor<5x4xf32>, tensor<1x3x2xi32>) -> tensor<1x3x4xf32> + return %0 : tensor<1x3x4xf32> +} From 0a67d256531aeaf529ea990ea0aed127a446b39f Mon Sep 17 00:00:00 2001 From: Karim Nosir Date: Mon, 20 Jul 2020 17:01:40 -0700 Subject: [PATCH 0886/2522] Add a pass to raise unknown ops to new internal op type custom_tf op. This to allow doing some graph optimizations on the custom ops. PiperOrigin-RevId: 322256380 Change-Id: I62d12be6bea6eb92c442617256cc51ac2cbf8e2a --- tensorflow/compiler/mlir/lite/BUILD | 1 + .../compiler/mlir/lite/flatbuffer_export.cc | 31 ++++--- tensorflow/compiler/mlir/lite/ir/tfl_ops.td | 28 ++++++- .../lite/tests/end2end/custom_opdef.pbtxt | 16 ++-- tensorflow/compiler/mlir/lite/tests/ops.mlir | 10 +++ .../mlir/lite/tests/raise-custom-ops.mlir | 20 +++++ .../compiler/mlir/lite/tf_tfl_passes.cc | 1 + .../compiler/mlir/lite/transforms/passes.h | 3 + .../mlir/lite/transforms/raise_custom_ops.cc | 80 +++++++++++++++++++ 9 files changed, 172 insertions(+), 18 deletions(-) create mode 100644 tensorflow/compiler/mlir/lite/tests/raise-custom-ops.mlir create mode 100644 tensorflow/compiler/mlir/lite/transforms/raise_custom_ops.cc diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD index 3b67ea3d846..8a60b292bc2 100644 --- a/tensorflow/compiler/mlir/lite/BUILD +++ b/tensorflow/compiler/mlir/lite/BUILD @@ -334,6 +334,7 @@ cc_library( "transforms/optimize_functional_ops.cc", "transforms/prepare_composite_functions_tf.cc", "transforms/prepare_tf.cc", + "transforms/raise_custom_ops.cc", "transforms/runtime_verify.cc", "transforms/split_merged_operands.cc", "transforms/trim_functions_tf.cc", diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc index 09c79d90e26..2e69a1740db 100644 --- a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc +++ b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc @@ -1196,22 +1196,35 @@ Optional> Translator::BuildSubGraph( if (IsConst(&inst)) continue; // Fetch operand and result tensor indices. - std::vector operands; - operands.reserve(inst.getNumOperands()); - for (auto operand : inst.getOperands()) { - if (operand.getType().isa()) - operands.push_back(kTfLiteOptionalTensor); - else - operands.push_back(tensor_index_map.lookup(operand)); - } std::vector results; results.reserve(inst.getNumOperands()); for (auto result : inst.getResults()) { results.push_back(tensor_index_map.lookup(result)); } + Operation* real_inst = &inst; + // CustomTfOp is just a wrapper around a TF op, we export the custom Op + // not the wrapper, so we fetch the op from the region. + if (auto custom_op = dyn_cast(inst)) { + // If we have custom op with a region, then use the first op in the + // region, if it exists, otherwise just use params for custom op. + if (!custom_op.body().empty()) { + real_inst = &custom_op.body().front().front(); + } else { + module_.emitError( + "Invalid CustomTfOp: Custom TF Op have empty region."); + } + } + std::vector operands; + operands.reserve(real_inst->getNumOperands()); + for (auto operand : real_inst->getOperands()) { + if (operand.getType().isa()) + operands.push_back(kTfLiteOptionalTensor); + else + operands.push_back(tensor_index_map.lookup(operand)); + } if (auto tfl_operator = - BuildOperator(&inst, operands, results, intermediates)) + BuildOperator(real_inst, operands, results, intermediates)) operators.push_back(*tfl_operator); else failed_once = true; diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td index 66124ba9982..715d047f0bf 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td @@ -4296,7 +4296,8 @@ def TFL_WhileOp : Op { +def TFL_CustomOp : Op { let summary = "Custom op"; let description = [{ @@ -4319,4 +4320,29 @@ def TFL_CustomOp : Op { let verifier = [{ return Verify(*this); }]; } +def TFL_CustomTfOp : Op]> { + let summary = "Wrapper Op for TF custom ops."; + + let description = [{ + A wrapper op around any Custom TF op. These includes ops defined using + custom_opdefs or linked which are not defined in TF dialect. + This Op just wraps the custom op inside a region. + Note #1, this Op will not include TF Lite custom ops defined using CustomOp. + Note #2, this op is just internal representation inside the converter and + are not exposed/exported when the model is exported to Flatbuffer. + }]; + + let arguments = (ins + Variadic>:$input + ); + let results = (outs Variadic:$output); + + let regions = (region SizedRegion<1>:$body); +} + #endif // TFL_OPS diff --git a/tensorflow/compiler/mlir/lite/tests/end2end/custom_opdef.pbtxt b/tensorflow/compiler/mlir/lite/tests/end2end/custom_opdef.pbtxt index 345468e609e..481be9d4deb 100644 --- a/tensorflow/compiler/mlir/lite/tests/end2end/custom_opdef.pbtxt +++ b/tensorflow/compiler/mlir/lite/tests/end2end/custom_opdef.pbtxt @@ -36,11 +36,11 @@ versions { producer: 27 } -# CHECK-LABEL: func @main -# CHECK-SAME: (%[[ARG_0:[a-z0-9]+]]: tensor<4xi32>, %[[ARG_1:[a-z0-9]+]]: tensor<4xi32>) -> tensor<*xi32> -# CHECK-SAME: control_outputs = "" -# CHECK-SAME: inputs = "input0,input1" -# CHECK-SAME: outputs = "output" -# CHECK-NEXT: %[[OP:[a-z0-9]+]] = "tf.BannaPotatoSaladWithColeslaw"(%[[ARG_0]], %[[ARG_1]]) {T = i32, device = ""} : (tensor<4xi32>, tensor<4xi32>) -> tensor<*xi32> -# CHECK-NEXT: return %[[OP]] : tensor<*xi32> -# CHECK-NEXT: } +# CHECK-LABEL: func @main(%arg0: tensor<4xi32>, %arg1: tensor<4xi32>) -> tensor<*xi32> +# CHECK: attributes {tf.entry_function = {control_outputs = "", inputs = "input0,input1", outputs = "output"}} { +# CHECK-NEXT: %[[CUSTOM:.*]] = "tfl.custom_tf"(%arg0, %arg1) ( { +# CHECK-NEXT: %[[OUTPUTS:.*]] = "tf.BannaPotatoSaladWithColeslaw"(%arg0, %arg1) {T = i32, device = ""} : (tensor<4xi32>, tensor<4xi32>) -> tensor<*xi32> +# CHECK-NEXT: "tfl.yield"(%[[OUTPUTS]]) : (tensor<*xi32>) -> () +# CHECK-NEXT: }) : (tensor<4xi32>, tensor<4xi32>) -> tensor<*xi32> +# CHECK-NEXT: return %[[CUSTOM]] : tensor<*xi32> +# CHECK-NEXT: } diff --git a/tensorflow/compiler/mlir/lite/tests/ops.mlir b/tensorflow/compiler/mlir/lite/tests/ops.mlir index 5f434e954c8..06e05987ee6 100644 --- a/tensorflow/compiler/mlir/lite/tests/ops.mlir +++ b/tensorflow/compiler/mlir/lite/tests/ops.mlir @@ -598,6 +598,16 @@ func @testMaxPool2DWrongOperandStorageType(tensor<1x7x7x16x!quant.uniform, %arg1: tensor<1x64x64x32xf32>, %arg2: tensor<1x64x64x32xf32>) -> (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) { + %0, %1, %2, %3 = "tfl.custom_tf"(%arg0, %arg1, %arg2) ({ + %4, %5, %6, %7 = "tf.TFLite_Detection_PostProcess"(%arg0, %arg1, %arg2) {_output_quantized = true, _output_types = [f32, f32, f32, f32], _support_output_type_float_in_quantized_op = true, detections_per_class = 100 : i64, device = "", h_scale = 5.000000e+00 : f32, max_classes_per_detection = 1 : i64, max_detections = 20 : i64, nms_iou_threshold = 6.000000e-01 : f32, nms_score_threshold = 3.000000e-01 : f32, num_classes = 90 : i64, use_regular_nms = false, w_scale = 5.000000e+00 : f32, x_scale = 1.000000e+01 : f32, y_scale = 1.000000e+01 : f32} : (tensor<1x64x64x32xf32>, tensor<1x64x64x32xf32>, tensor<1x64x64x32xf32>) -> (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) + "tfl.yield"(%4, %5, %6, %7) : (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) -> () + }) : (tensor<1x64x64x32xf32>, tensor<1x64x64x32xf32>, tensor<1x64x64x32xf32>) -> (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) + return %0, %1 : tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32> +} + +// ----- + func @testMaxPoolingWithArgMax2D(%arg0: tensor<1x64x64x32xf32>) -> (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) { // custom op for "tfl.max_pooling_with_argmax_2d"(%arg0) {filter_h = 2 : i32, filter_w = 2 : i32, padding = "SAME", stride_h = 2 : i32, stride_w = 2 : i32} : (tensor<1x64x64x32xf32>) -> (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) %0, %1 = "tfl.custom"(%arg0) {custom_option = opaque<"tfl", "0x01000000020000000200000002000000020000000000000000000000000000000000000000000000"> : tensor<40xi8>, custom_code = "MaxPoolingWithArgmax2D"} : (tensor<1x64x64x32xf32>) -> (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) diff --git a/tensorflow/compiler/mlir/lite/tests/raise-custom-ops.mlir b/tensorflow/compiler/mlir/lite/tests/raise-custom-ops.mlir new file mode 100644 index 00000000000..1bac8019a30 --- /dev/null +++ b/tensorflow/compiler/mlir/lite/tests/raise-custom-ops.mlir @@ -0,0 +1,20 @@ +// RUN: tf-opt -tfl-raise-custom-ops -canonicalize %s -o - | FileCheck %s + +// CHECK-LABEL: custom_op +func @custom_op(%arg0: tensor<4xf32>) -> tensor<4xf32> { + %0 = "tfl.pseudo_const" () {value = dense<1.0> : tensor<4xf32>} : () -> tensor<4xf32> + %1 = "tfl.mul"(%arg0, %0) {fused_activation_function = "NONE"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> + // will be preserved since it has uses. + %2 = "tf.MyCustomOp"(%1, %0) {fused_activation_function = "RELU", int_attr = 2 : i32} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> + // will be removed since it doesn't have uses and doesn't have side effect. + "tf.MyCustomOp"(%1, %0) {fused_activation_function = "RELU", int_attr = 2 : i32} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> + return %2 : tensor<4xf32> + +// CHECK-NEXT: %[[CST:.*]] = constant dense<1.000000e+00> +// CHECK-NEXT: %[[MUL:.*]] = tfl.mul %arg0, %[[CST]] {fused_activation_function = "NONE"} : tensor<4xf32> +// CHECK-NEXT: %[[CUSTOM:.*]] = "tfl.custom_tf"(%[[MUL]], %[[CST]]) ( { +// CHECK-NEXT: %[[MY_CUSTOM:.*]] = "tf.MyCustomOp"(%[[MUL]], %[[CST]]) {fused_activation_function = "RELU", int_attr = 2 : i32} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> +// CHECK-NEXT: "tfl.yield"(%[[MY_CUSTOM]]) : (tensor<4xf32>) -> () +// CHECK-NEXT: }) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> +// CHECK-NEXT: return %[[CUSTOM]] : tensor<4xf32> +} diff --git a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc index fc44e778b92..239d52530ec 100644 --- a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc +++ b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc @@ -187,6 +187,7 @@ void AddTFToTFLConversionPasses(const mlir::TFL::PassConfig& pass_config, // so that it can target constants introduced once TensorFlow Identity ops // are removed during legalization. pass_manager->addPass(mlir::TFL::CreateOptimizeFunctionalOpsPass()); + pass_manager->addPass(mlir::TFL::CreateRaiseCustomOpsPass()); pass_manager->addPass(mlir::createSymbolDCEPass()); pass_manager->addNestedPass(mlir::createCanonicalizerPass()); pass_manager->addNestedPass(mlir::createCSEPass()); diff --git a/tensorflow/compiler/mlir/lite/transforms/passes.h b/tensorflow/compiler/mlir/lite/transforms/passes.h index af97931b2a3..804a391231a 100644 --- a/tensorflow/compiler/mlir/lite/transforms/passes.h +++ b/tensorflow/compiler/mlir/lite/transforms/passes.h @@ -91,6 +91,9 @@ std::unique_ptr> CreateWhileOutlinePass(); // Verifies runtime constraints. std::unique_ptr> CreateRuntimeVerifyPass(); +// Creates raise custom ops pass, which legalize custom ops to TFL::CustomOp +std::unique_ptr> CreateRaiseCustomOpsPass(); + } // namespace TFL } // namespace mlir diff --git a/tensorflow/compiler/mlir/lite/transforms/raise_custom_ops.cc b/tensorflow/compiler/mlir/lite/transforms/raise_custom_ops.cc new file mode 100644 index 00000000000..40cca526951 --- /dev/null +++ b/tensorflow/compiler/mlir/lite/transforms/raise_custom_ops.cc @@ -0,0 +1,80 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h" +#include "tensorflow/compiler/mlir/lite/transforms/passes.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" + +namespace mlir { +namespace TFL { +namespace { +// This transformation pass takes an operation with unknown op properties and +// wrap it by a TFL::CustomTfOp. +struct RaiseCustomOpsPass + : public PassWrapper { + void runOnFunction() override; +}; + +void RaiseCustomOpsPass::runOnFunction() { + auto fn = getFunction(); + OpBuilder builder(fn.getContext()); + + llvm::SmallVector custom_ops; + for (Operation &op : fn.getOps()) { + // Skips the ops with known op property. + if (op.getAbstractOperation()) continue; + // Skips already imported ops that are imported as CustomTfOp. + if (op.getParentOfType()) continue; + if (llvm::isa(op) || llvm::isa(op)) + continue; + custom_ops.push_back(&op); + } + + for (auto *op : custom_ops) { + builder.setInsertionPoint(op); + auto custom_op = builder.create( + op->getLoc(), op->getResultTypes(), op->getOperands()); + Region region; + region.push_back(new Block); + + builder.setInsertionPointToEnd(®ion.front()); + Operation *inner_op = builder.clone(*op); + builder.create(op->getLoc(), inner_op->getResults()); + custom_op.body().takeBody(region); + + op->replaceAllUsesWith(custom_op); + op->erase(); + } +} +} // namespace + +// Creates an instance of the TensorFlow Lite dialect raise custom op pass. +std::unique_ptr> CreateRaiseCustomOpsPass() { + return std::make_unique(); +} + +static PassRegistration pass( + "tfl-raise-custom-ops", "Raise custom ops into tflite dialect."); + +} // namespace TFL +} // namespace mlir From d5097840b9cabbe37fb40c12fc050d96f4728882 Mon Sep 17 00:00:00 2001 From: Yanhua Sun Date: Mon, 20 Jul 2020 17:02:38 -0700 Subject: [PATCH 0887/2522] add benchmarks_test option to capture profiles PiperOrigin-RevId: 322256578 Change-Id: I895261966df5fcec4104299e4697e889624e08db --- .../python/eager/benchmarks_test_base.py | 46 ++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/eager/benchmarks_test_base.py b/tensorflow/python/eager/benchmarks_test_base.py index 3d81d08ccbf..1c56ebfd279 100644 --- a/tensorflow/python/eager/benchmarks_test_base.py +++ b/tensorflow/python/eager/benchmarks_test_base.py @@ -18,11 +18,37 @@ from __future__ import absolute_import as _absolute_import from __future__ import division as _division from __future__ import print_function as _print_function +import os +import uuid + from tensorflow.python.eager import test +from tensorflow.python.platform import flags +from tensorflow.python.profiler import profiler_v2 as profiler + +flags.DEFINE_bool("xprof", False, "Run and report benchmarks with xprof on") +flags.DEFINE_string("logdir", "/tmp/xprof/", "Directory to store xprof data") class MicroBenchmarksBase(test.Benchmark): - """Run and report benchmark results.""" + """Run and report benchmark results. + + The first run is without any profilng. + Second run is with xprof and python trace. Third run is with xprof without + python trace. Note: xprof runs are with fewer iterations. + """ + + def run_with_xprof(self, enable_python_trace, run_benchmark, func, + num_iters_xprof, execution_mode, suid): + if enable_python_trace: + options = profiler.ProfilerOptions(python_tracer_level=1) + logdir = os.path.join(flags.FLAGS.logdir, suid + "_with_python") + else: + options = profiler.ProfilerOptions(python_tracer_level=0) + logdir = os.path.join(flags.FLAGS.logdir, suid) + with profiler.Profile(logdir, options): + total_time = run_benchmark(func, num_iters_xprof, execution_mode) + us_per_example = float("{0:.3f}".format(total_time * 1e6 / num_iters_xprof)) + return logdir, us_per_example def run_report(self, run_benchmark, func, num_iters, execution_mode=None): """Run and report benchmark results.""" @@ -32,6 +58,24 @@ class MicroBenchmarksBase(test.Benchmark): "examples_per_sec": float("{0:.3f}".format(num_iters / total_time)), "us_per_example": float("{0:.3f}".format(total_time * 1e6 / num_iters)) } + + if flags.FLAGS.xprof: + suid = str(uuid.uuid4()) + # Re-run with xprof and python trace. + num_iters_xprof = min(100, num_iters) + xprof_link, us_per_example = self.run_with_xprof(True, run_benchmark, + func, num_iters_xprof, + execution_mode, suid) + extras["xprof link with python trace"] = xprof_link + extras["us_per_example with xprof and python"] = us_per_example + + # Re-run with xprof but no python trace. + xprof_link, us_per_example = self.run_with_xprof(False, run_benchmark, + func, num_iters_xprof, + execution_mode, suid) + extras["xprof link"] = xprof_link + extras["us_per_example with xprof"] = us_per_example + benchmark_name = self._get_benchmark_name() self.report_benchmark( iters=num_iters, wall_time=mean_us, extras=extras, name=benchmark_name) From 536abb77e6659d89890f996d53681388770d68fc Mon Sep 17 00:00:00 2001 From: Juho Ha Date: Mon, 20 Jul 2020 17:19:15 -0700 Subject: [PATCH 0888/2522] Add Benchmark app for Firebase Game Loop test. New Android app org.tensorflow.lite.benchmark.firebase is added. This app is similar to org.tensorflow.lite.benchmark, but it uses some predefined agruments for each scenario of Firebase Test Lab Game Loop test. Output result is formatted as Firebase Test Lab Game Loop test and csv format is also saved. Refer to https://firebase.google.com/docs/test-lab/android/game-loop PiperOrigin-RevId: 322259423 Change-Id: I5ab7085cf1143c3513710022d1caf13b2f98c2fa --- .../firebase/android/AndroidManifest.xml | 51 ++++ .../experimental/firebase/android/BUILD | 44 +++ .../android/jni/benchmark_model_jni.cc | 257 ++++++++++++++++++ .../benchmark/firebase/BenchmarkModel.java | 31 +++ .../firebase/BenchmarkModelActivity.java | 74 +++++ 5 files changed, 457 insertions(+) create mode 100644 tensorflow/lite/tools/benchmark/experimental/firebase/android/AndroidManifest.xml create mode 100644 tensorflow/lite/tools/benchmark/experimental/firebase/android/BUILD create mode 100644 tensorflow/lite/tools/benchmark/experimental/firebase/android/jni/benchmark_model_jni.cc create mode 100644 tensorflow/lite/tools/benchmark/experimental/firebase/android/src/org/tensorflow/lite/benchmark/firebase/BenchmarkModel.java create mode 100644 tensorflow/lite/tools/benchmark/experimental/firebase/android/src/org/tensorflow/lite/benchmark/firebase/BenchmarkModelActivity.java diff --git a/tensorflow/lite/tools/benchmark/experimental/firebase/android/AndroidManifest.xml b/tensorflow/lite/tools/benchmark/experimental/firebase/android/AndroidManifest.xml new file mode 100644 index 00000000000..563e90eb93f --- /dev/null +++ b/tensorflow/lite/tools/benchmark/experimental/firebase/android/AndroidManifest.xml @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tensorflow/lite/tools/benchmark/experimental/firebase/android/BUILD b/tensorflow/lite/tools/benchmark/experimental/firebase/android/BUILD new file mode 100644 index 00000000000..a4ea258112b --- /dev/null +++ b/tensorflow/lite/tools/benchmark/experimental/firebase/android/BUILD @@ -0,0 +1,44 @@ +# Description: +# BenchmarkModel Android harness for TensorFlow Lite benchmarks. + +load("//tensorflow/lite:build_def.bzl", "tflite_jni_binary") +load("@build_bazel_rules_android//android:rules.bzl", "android_binary") + +package( + default_visibility = ["//visibility:private"], + licenses = ["notice"], # Apache 2.0 +) + +exports_files(["LICENSE"]) + +android_binary( + name = "benchmark_model_firebase", + srcs = glob([ + "src/**/*.java", + ]), + custom_package = "org.tensorflow.lite.benchmark.firebase", + manifest = "AndroidManifest.xml", + # In some platforms we don't have an Android SDK/NDK and this target + # can't be built. We need to prevent the build system from trying to + # use the target in that case. + tags = ["manual"], + deps = [":tensorflowlite_benchmark_firebase_native"], +) + +tflite_jni_binary( + name = "libtensorflowlite_benchmark_firebase.so", + srcs = glob([ + "jni/**/*.cc", + "jni/**/*.h", + ]), + deps = [ + "//tensorflow/lite/java/jni", + "//tensorflow/lite/tools/benchmark:benchmark_tflite_model_lib", + ], +) + +cc_library( + name = "tensorflowlite_benchmark_firebase_native", + srcs = ["libtensorflowlite_benchmark_firebase.so"], + visibility = ["//visibility:private"], +) diff --git a/tensorflow/lite/tools/benchmark/experimental/firebase/android/jni/benchmark_model_jni.cc b/tensorflow/lite/tools/benchmark/experimental/firebase/android/jni/benchmark_model_jni.cc new file mode 100644 index 00000000000..2d4c70ccc2b --- /dev/null +++ b/tensorflow/lite/tools/benchmark/experimental/firebase/android/jni/benchmark_model_jni.cc @@ -0,0 +1,257 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "tensorflow/lite/tools/benchmark/benchmark_tflite_model.h" + +#ifdef __ANDROID__ +#include +#endif + +namespace tflite { +namespace benchmark { +namespace { + +const char kOutputDir[] = "/sdcard/benchmark_output"; + +class FirebaseReportingListener : public BenchmarkListener { + public: + explicit FirebaseReportingListener(std::string tag, int report_fd) + : tag_(tag), report_fd_(report_fd) { + if (report_fd < 0) { +#ifdef __ANDROID__ + __android_log_print( + ANDROID_LOG_ERROR, "tflite", + "Report would be streamed only to local log not to Firebase " + "since the Firebase log file is not opened."); +#else + fprintf(stderr, + "Report would be streamed only to local log not to Firebase " + "since the Firebase log file is not opened."); +#endif + } + } + + void OnBenchmarkEnd(const BenchmarkResults& results) override { + ReportResult(results); + } + + void ReportFailure(TfLiteStatus status) { + std::string status_msg = + status == kTfLiteError + ? "TFLite error" + : (status == kTfLiteDelegateError ? "TFLite delegate error" + : "Unknown error code"); + Report(status_msg, std::vector>()); + } + + private: + void Report( + const std::string& status, + const std::vector>& contents) { + // The output format of Firebase Game Loop test is json. + // https://firebase.google.com/docs/test-lab/android/game-loop#output-example + std::stringstream report; + report << "{\n" + << " \"name\": \"TFLite benchmark\",\n" + << " \"benchmark config\": \"" << tag_ << "\",\n" + << " \"status\": \"" << status << "\""; + for (const auto& content : contents) { + report << ",\n" + << " \"" << content.first << "\": \"" << content.second << "\""; + } + report << "\n}\n"; + + auto report_str = report.str(); + if (report_fd_ >= 0) { + write(report_fd_, report_str.c_str(), report_str.size()); + } + +#ifdef __ANDROID__ + __android_log_print(ANDROID_LOG_ERROR, "tflite", "%s", report_str.c_str()); +#else + fprintf(stderr, "%s", report_str.c_str()); +#endif + } + + void ReportResult(const BenchmarkResults& results) { + std::vector> contents; + std::stringstream avg_time; + avg_time << "init: " << results.startup_latency_us() << ", " + << "warmup: " << results.warmup_time_us().avg() << ", " + << "inference: " << results.inference_time_us().avg(); + contents.emplace_back("average time in us", avg_time.str()); + std::stringstream overall_mem_usage; + overall_mem_usage << results.overall_mem_usage(); + contents.emplace_back("overall memory usage", overall_mem_usage.str()); + + Report("OK", contents); + } + + std::string tag_; + int report_fd_; +}; + +class CsvExportingListener : public BenchmarkListener { + public: + explicit CsvExportingListener(std::string tag) : tag_(tag) {} + + void OnBenchmarkEnd(const BenchmarkResults& results) override { + if (!CreateOutputDir()) { +#ifdef __ANDROID__ + __android_log_print(ANDROID_LOG_ERROR, "tflite", + "Failed to create output directory %s.", kOutputDir); +#else + fprintf(stderr, "Failed to create output directory %s.", kOutputDir); +#endif + return; + } + WriteBenchmarkResultCsv(results); + } + + private: + bool CreateOutputDir() { + struct stat st; + if (stat(kOutputDir, &st) != 0) { + if (mkdir(kOutputDir, 0777) != 0 && errno != EEXIST) { + return false; + } + } else if (!S_ISDIR(st.st_mode)) { + errno = ENOTDIR; + return false; + } + return true; + } + + void WriteBenchmarkResultCsv(const BenchmarkResults& results) { + auto init_us = results.startup_latency_us(); + auto warmup_us = results.warmup_time_us(); + auto inference_us = results.inference_time_us(); + auto init_mem_usage = results.init_mem_usage(); + auto overall_mem_usage = results.overall_mem_usage(); + + std::stringstream file_name; + file_name << kOutputDir << "/benchmark_result_" << tag_; + + std::ofstream file; + file.open(file_name.str().c_str()); + file << "config_key,model_size,init_time," + << "warmup_avg,warmup_min,warmup_max,warmup_stddev," + << "inference_avg,inference_min,inference_max,inference_stddev," + << "init_max_rss,init_total_alloc,init_in_use_alloc," + << "overall_max_rss,overall_total_alloc,overall_in_use_alloc\n"; + file << tag_ << "," << results.model_size_mb() << "," << init_us << "," + << warmup_us.avg() << "," << warmup_us.min() << "," << warmup_us.max() + << "," << warmup_us.std_deviation() << "," << inference_us.avg() << "," + << inference_us.min() << "," << inference_us.max() << "," + << inference_us.std_deviation() << "," + << (init_mem_usage.max_rss_kb / 1024.0) << "," + << (init_mem_usage.total_allocated_bytes / 1024.0 / 1024.0) << "," + << (init_mem_usage.in_use_allocated_bytes / 1024.0 / 1024.0) << "," + << (overall_mem_usage.max_rss_kb / 1024.0) << "," + << (overall_mem_usage.total_allocated_bytes / 1024.0 / 1024.0) << "," + << (overall_mem_usage.in_use_allocated_bytes / 1024.0 / 1024.0) + << "\n"; + file.close(); + } + + std::string tag_; +}; + +std::string GetScenarioConfig(int scenario, std::vector& args) { + // The number of scenarios should equal to the value specified in + // AndroidManifest.xml file. + std::unordered_map>> + all_scenarios = { + {1, {"cpu_1thread", {"--num_threads=1"}}}, + {2, {"cpu_2threads", {"--num_threads=2"}}}, + {3, {"cpu_4threads", {"--num_threads=4"}}}, + {4, {"xnnpack_1thread", {"--use_xnnpack=true", "--num_threads=1"}}}, + {5, {"xnnpack_2threads", {"--use_xnnpack=true", "--num_threads=2"}}}, + {6, {"xnnpack_4threads", {"--use_xnnpack=true", "--num_threads=4"}}}, + {7, + {"gpu_default", + {"--use_gpu=true", "--gpu_precision_loss_allowed=false"}}}, + {8, + {"gpu_fp16", + {"--use_gpu=true", "--gpu_precision_loss_allowed=true"}}}, + {9, {"dsp_hexagon", {"--use_hexagon=true"}}}, + {10, {"nnapi", {"--use_nnapi=true"}}}, + }; + + std::string tag; + args.emplace_back("(BenchmarkModelAndroid)"); + args.emplace_back("--graph=/data/local/tmp/graph"); + + auto it = all_scenarios.find(scenario); + if (it != all_scenarios.end()) { + const auto& scenario_info = it->second; + tag = scenario_info.first; + for (const auto& arg : scenario_info.second) { + args.push_back(arg); + } + } + return tag; +} + +void RunScenario(int scenario, int report_fd) { + std::vector args; + std::string tag = GetScenarioConfig(static_cast(scenario), args); + std::vector argv; + argv.reserve(args.size()); + for (auto& arg : args) { + argv.push_back(const_cast(arg.data())); + } + + BenchmarkTfLiteModel benchmark; + FirebaseReportingListener firebaseReporting(tag, report_fd); + benchmark.AddListener(&firebaseReporting); + CsvExportingListener csvExporting(tag); + benchmark.AddListener(&csvExporting); + auto status = benchmark.Run(static_cast(argv.size()), argv.data()); + if (status != kTfLiteOk) { + firebaseReporting.ReportFailure(status); + } +} + +} // namespace +} // namespace benchmark +} // namespace tflite + +#ifdef __cplusplus +extern "C" { +#endif + +JNIEXPORT void JNICALL +Java_org_tensorflow_lite_benchmark_firebase_BenchmarkModel_nativeRun( + JNIEnv* env, jclass clazz, jint scenario, jint report_fd) { + tflite::benchmark::RunScenario(static_cast(scenario), + static_cast(report_fd)); +} + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus diff --git a/tensorflow/lite/tools/benchmark/experimental/firebase/android/src/org/tensorflow/lite/benchmark/firebase/BenchmarkModel.java b/tensorflow/lite/tools/benchmark/experimental/firebase/android/src/org/tensorflow/lite/benchmark/firebase/BenchmarkModel.java new file mode 100644 index 00000000000..01a3aa7b7c6 --- /dev/null +++ b/tensorflow/lite/tools/benchmark/experimental/firebase/android/src/org/tensorflow/lite/benchmark/firebase/BenchmarkModel.java @@ -0,0 +1,31 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite.benchmark.firebase; + +/** Helper class for running a native TensorFlow Lite benchmark. */ +class BenchmarkModel { + static { + System.loadLibrary("tensorflowlite_benchmark_firebase"); + } + + // Executes a standard TensorFlow Lite benchmark with predefined args for each scenario. + // Result and status will be reported to the file with reportFd file descriptor. + public static void run(int scenario, int reportFd) { + nativeRun(scenario, reportFd); + } + + private static native void nativeRun(int scenario, int reportFd); +} diff --git a/tensorflow/lite/tools/benchmark/experimental/firebase/android/src/org/tensorflow/lite/benchmark/firebase/BenchmarkModelActivity.java b/tensorflow/lite/tools/benchmark/experimental/firebase/android/src/org/tensorflow/lite/benchmark/firebase/BenchmarkModelActivity.java new file mode 100644 index 00000000000..e0dd8904cde --- /dev/null +++ b/tensorflow/lite/tools/benchmark/experimental/firebase/android/src/org/tensorflow/lite/benchmark/firebase/BenchmarkModelActivity.java @@ -0,0 +1,74 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite.benchmark.firebase; + +import android.app.Activity; +import android.content.Intent; +import android.net.Uri; +import android.os.Bundle; +import android.os.ParcelFileDescriptor; +import android.util.Log; +import java.io.FileNotFoundException; +import java.io.IOException; + +/** + * {@code Activity} class for Firebase Game Loop test. + * + *

This Activity receives and handles an {@code Intent} for Firebase Game Loop test. Refer to + * https://firebase.google.com/docs/test-lab/android/game-loop. + */ +public class BenchmarkModelActivity extends Activity { + + private static final String TAG = "tflite_BenchmarkModelActivity"; + + @Override + public void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + + Intent intent = getIntent(); + if (!intent.getAction().equals("com.google.intent.action.TEST_LOOP")) { + Log.e(TAG, "Received non Firebase Game Loop test intent " + intent.getAction()); + finish(); + } + int scenario = intent.getIntExtra("scenario", 0); + Log.i(TAG, "Running TensorFlow Lite benchmark with scenario: " + scenario); + + ParcelFileDescriptor parcelFileDescriptor = null; + Uri reportFile = intent.getData(); + if (reportFile != null) { + Log.i(TAG, "Logging the result to " + reportFile.getEncodedPath()); + try { + parcelFileDescriptor = + getContentResolver().openAssetFileDescriptor(reportFile, "w").getParcelFileDescriptor(); + } catch (FileNotFoundException | NullPointerException e) { + Log.e(TAG, "Error while opening Firebase Test Lab report file", e); + } + } + + int reportFd = parcelFileDescriptor != null ? parcelFileDescriptor.getFd() : -1; + BenchmarkModel.run(scenario, reportFd); + + if (parcelFileDescriptor != null) { + try { + parcelFileDescriptor.close(); + } catch (IOException e) { + Log.e(TAG, "Failed to close Firebase Test Lab result file", e); + } + } + + finish(); + } +} From 08849be1f141627a74cb38d44e3b426553380841 Mon Sep 17 00:00:00 2001 From: Gaurav Jain Date: Mon, 20 Jul 2020 17:25:48 -0700 Subject: [PATCH 0889/2522] Rollforward: Ignore other graph inputs in custom gradient We revert to the original version of the change but are careful to only use the filtered inputs for variable search. PiperOrigin-RevId: 322260404 Change-Id: Ia6f8eedf0464684925bd1cf7b5cca1ea79c4b78c --- tensorflow/python/ops/custom_gradient.py | 22 ++++++++++++++++++++-- tensorflow/python/ops/gradients_test.py | 24 ++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/custom_gradient.py b/tensorflow/python/ops/custom_gradient.py index 58e5c3c9794..2437e050c50 100644 --- a/tensorflow/python/ops/custom_gradient.py +++ b/tensorflow/python/ops/custom_gradient.py @@ -354,9 +354,27 @@ def _graph_mode_decorator(f, args, kwargs): variables_in_tape = frozenset([ v.ref() for v in variable_watcher.watched_variables() ]) + + graphs = {getattr(o, "graph", None) for o in flat_result} + # Not all results may be tensors. However, we want to ensure all tensor + # outputs are from the same graph and get a list of captured inputs for + # variable search + graphs.discard(None) # Discard non-graph outputs + if graphs: + if len(graphs) > 1: + raise ValueError( + "All custom_gradient outputs should be from the same graph") + output_graph = graphs.pop() + filtered_input_tensors = [] + for i in args: + if i.graph == output_graph: + filtered_input_tensors.append(i) + else: + filtered_input_tensors = args + variables_in_subgraph = frozenset([ - v.ref() - for v in _get_dependent_variables(input_ops=args, output_ops=flat_result) + v.ref() for v in _get_dependent_variables( + input_ops=filtered_input_tensors, output_ops=flat_result) ]) variables = list( [v.deref() for v in variables_in_subgraph.union(variables_in_tape)]) diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py index d179735b47a..7425e1e1522 100644 --- a/tensorflow/python/ops/gradients_test.py +++ b/tensorflow/python/ops/gradients_test.py @@ -1172,6 +1172,30 @@ class CustomGradientTest(test_util.TensorFlowTestCase, parameterized.TestCase): dw = self.evaluate(math_ops.reduce_sum(grads[1])) self.assertEqual(12., dw) + def testCustomGradientWithCapture(self): + with ops.Graph().as_default(): + x = constant(3.) + + @framework_function.Defun(dtypes.float32) + def F(y): + + @custom_gradient.custom_gradient + def MyMultiply(x1, x2): + result = x1 * x2 + + def Grad(dy): + # Switched the ordering here. + return [dy * x1, dy * x2] + + return result, Grad + + res = MyMultiply(x, y) + return gradients.gradients(res, [y]) + + y = constant(5.) + dy = F(y) + self.assertAllEqual(5., self.evaluate(dy)) + def testCustomGradientWithVariablesNoFalsePositives(self): @custom_gradient.custom_gradient From aab91905246710f2c11af91ef107425a2cf53364 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Mon, 20 Jul 2020 17:35:54 -0700 Subject: [PATCH 0890/2522] [XLA] Make computation sorting optional Sorting computations by content is very expensive, we should only do it if autotuning is requested. PiperOrigin-RevId: 322261925 Change-Id: I062fc6ebe202dc21c42342617577288eb2c99578 --- tensorflow/compiler/xla/service/hlo_module.cc | 25 +++++++++---------- .../compiler/xla/service/hlo_module_config.h | 10 ++++++++ 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index c715d016c4f..308b8e8f095 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_module.h" +#include #include #include #include @@ -650,30 +651,28 @@ bool CompareComputationsByContent(HloComputation* a, HloComputation* b) { } // anonymous namespace std::vector HloModule::MakeComputationSorted() const { - std::vector result; - result.reserve(computations_.size()); - for (const auto& computation : computations_) { - result.push_back(computation.get()); + std::vector result = MakeComputationPostOrder(); + if (config().content_aware_computation_sorting()) { + absl::c_sort(result, CompareComputationsByContent); } - std::sort(result.begin(), result.end(), CompareComputationsByContent); return result; } std::vector HloModule::MakeNonfusionComputations() const { - std::vector result; - for (auto* c : computations()) { - if (c->IsFusionComputation()) { - continue; - } - result.push_back(c); - } + std::vector result = MakeComputationPostOrder(); + result.erase(std::remove_if( + result.begin(), result.end(), + [](HloComputation* c) { return c->IsFusionComputation(); }), + result.end()); return result; } std::vector HloModule::MakeNonfusionComputationsSorted() const { auto result = MakeNonfusionComputations(); - std::sort(result.begin(), result.end(), CompareComputationsByContent); + if (config().content_aware_computation_sorting()) { + absl::c_sort(result, CompareComputationsByContent); + } return result; } diff --git a/tensorflow/compiler/xla/service/hlo_module_config.h b/tensorflow/compiler/xla/service/hlo_module_config.h index 0abf3a496f7..7ab0f24d06e 100644 --- a/tensorflow/compiler/xla/service/hlo_module_config.h +++ b/tensorflow/compiler/xla/service/hlo_module_config.h @@ -188,6 +188,14 @@ class HloModuleConfig { alias_passthrough_params_ = alias_passthrough_params; } + bool content_aware_computation_sorting() const { + return content_aware_computation_sorting_; + } + void set_content_aware_computation_sorting( + bool content_aware_computation_sorting) { + content_aware_computation_sorting_ = content_aware_computation_sorting; + } + FusionConfigCollection fusion_config_collection() const { return fusion_config_collection_; } @@ -251,6 +259,8 @@ class HloModuleConfig { bool alias_passthrough_params_ = false; + bool content_aware_computation_sorting_ = false; + FusionConfigCollection fusion_config_collection_ = FusionConfigCollection::kOff; From aace2ce85f88d0d828661703486a001a9c356669 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Mon, 20 Jul 2020 18:07:39 -0700 Subject: [PATCH 0891/2522] Fix the CPU Python tf-nightly 3.5 breakage. PiperOrigin-RevId: 322266144 Change-Id: Iba5c346ca2b1af9ffaed3447722178d9c9684708 --- .../ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh index 06ae6e9de18..16f1e7524fd 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh @@ -32,7 +32,7 @@ export PYTHON_BIN_PATH=$(which python) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=release_cpu_linux tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_linux --host_force_python=PY3 tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag From 4a2edaed8a76638d5f3a65ff6c1cb97b6a4b63e1 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Mon, 20 Jul 2020 18:13:49 -0700 Subject: [PATCH 0892/2522] Fork the control_flow_util to keras. PiperOrigin-RevId: 322266959 Change-Id: Ib66d6c4971d214ae22e55dee6c0748a0ab4a386c --- tensorflow/python/keras/BUILD | 2 +- tensorflow/python/keras/backend.py | 2 +- .../python/keras/engine/base_layer_utils.py | 2 +- tensorflow/python/keras/layers/BUILD | 1 - tensorflow/python/keras/layers/recurrent.py | 2 +- tensorflow/python/keras/utils/BUILD | 8 ++ .../python/keras/utils/control_flow_util.py | 83 +++++++++++++++++++ 7 files changed, 95 insertions(+), 5 deletions(-) create mode 100644 tensorflow/python/keras/utils/control_flow_util.py diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index 4e88494a374..24c5b9de8ca 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -55,7 +55,6 @@ py_library( "//tensorflow/python:composite_tensor_utils", "//tensorflow/python:constant_op", "//tensorflow/python:control_flow_ops", - "//tensorflow/python:control_flow_util", "//tensorflow/python:ctc_ops", "//tensorflow/python:dtypes", "//tensorflow/python:framework", @@ -87,6 +86,7 @@ py_library( "//tensorflow/python/distribute:distribute_lib", "//tensorflow/python/distribute:multi_worker_util", "//tensorflow/python/keras/engine:keras_tensor", + "//tensorflow/python/keras/utils:control_flow_util", ], ) diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py index 566aa75f2d9..07da09049c5 100644 --- a/tensorflow/python/keras/backend.py +++ b/tensorflow/python/keras/backend.py @@ -53,10 +53,10 @@ from tensorflow.python.framework import tensor_spec from tensorflow.python.framework import tensor_util from tensorflow.python.keras import backend_config from tensorflow.python.keras.engine import keras_tensor +from tensorflow.python.keras.utils import control_flow_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import control_flow_util from tensorflow.python.ops import ctc_ops as ctc from tensorflow.python.ops import functional_ops from tensorflow.python.ops import gradients as gradients_module diff --git a/tensorflow/python/keras/engine/base_layer_utils.py b/tensorflow/python/keras/engine/base_layer_utils.py index 597072e27f0..f393b73cf1d 100644 --- a/tensorflow/python/keras/engine/base_layer_utils.py +++ b/tensorflow/python/keras/engine/base_layer_utils.py @@ -29,8 +29,8 @@ from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.keras import backend +from tensorflow.python.keras.utils import control_flow_util from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_util from tensorflow.python.ops import control_flow_util_v2 from tensorflow.python.ops import control_flow_v2_func_graphs from tensorflow.python.ops import variables as tf_variables diff --git a/tensorflow/python/keras/layers/BUILD b/tensorflow/python/keras/layers/BUILD index 4d1571583fe..fe46f580162 100644 --- a/tensorflow/python/keras/layers/BUILD +++ b/tensorflow/python/keras/layers/BUILD @@ -358,7 +358,6 @@ py_library( deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", - "//tensorflow/python:control_flow_util", "//tensorflow/python:framework_ops", "//tensorflow/python:math_ops", "//tensorflow/python:platform", diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py index 72962eb8aef..388e7bf6a5f 100644 --- a/tensorflow/python/keras/layers/recurrent.py +++ b/tensorflow/python/keras/layers/recurrent.py @@ -33,11 +33,11 @@ from tensorflow.python.keras import regularizers from tensorflow.python.keras.engine.base_layer import Layer from tensorflow.python.keras.engine.input_spec import InputSpec from tensorflow.python.keras.saving.saved_model import layer_serialization +from tensorflow.python.keras.utils import control_flow_util from tensorflow.python.keras.utils import generic_utils from tensorflow.python.keras.utils import tf_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import control_flow_util from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops from tensorflow.python.platform import tf_logging as logging diff --git a/tensorflow/python/keras/utils/BUILD b/tensorflow/python/keras/utils/BUILD index 8e84a789c66..899701d624c 100644 --- a/tensorflow/python/keras/utils/BUILD +++ b/tensorflow/python/keras/utils/BUILD @@ -31,6 +31,7 @@ py_library( "all_utils.py", ], deps = [ + ":control_flow_util", ":engine_utils", ":generic_utils", ":layer_utils", @@ -40,6 +41,13 @@ py_library( ], ) +py_library( + name = "control_flow_util", + srcs = ["control_flow_util.py"], + srcs_version = "PY2AND3", + deps = [], +) + py_library( name = "data_utils", srcs = ["data_utils.py"], diff --git a/tensorflow/python/keras/utils/control_flow_util.py b/tensorflow/python/keras/utils/control_flow_util.py new file mode 100644 index 00000000000..d30856b0342 --- /dev/null +++ b/tensorflow/python/keras/utils/control_flow_util.py @@ -0,0 +1,83 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Utility functions for control flow. + +This file is copied from tensorflow/python/ops/control_flow_util.py. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +def InXlaContext(graph): + ctxt = graph._get_control_flow_context() # pylint: disable=protected-access + return GetContainingXLAContext(ctxt) is not None + + +def GraphOrParentsInXlaContext(graph): + while True: + if InXlaContext(graph): return True + try: + graph = graph.outer_graph + except AttributeError: + return False + + +def IsInWhileLoop(op): + ctxt = op._get_control_flow_context() # pylint: disable=protected-access + return GetContainingWhileContext(ctxt) is not None + + +def GetContainingWhileContext(ctxt, stop_ctxt=None): + """Returns the first ancestor WhileContext of `ctxt`. + + Returns `ctxt` if `ctxt` is a WhileContext, or None if `ctxt` is not in a + while loop. + + Args: + ctxt: ControlFlowContext + stop_ctxt: ControlFlowContext, optional. If provided, the search will end + if it sees stop_ctxt. + + Returns: + `ctxt` if `ctxt` is a WhileContext, the most nested WhileContext containing + `ctxt`, or None if `ctxt` is not in a while loop. If `stop_ctxt` is not + `None`, this returns `ctxt` if it matches `stop_ctxt` in its traversal. + """ + while ctxt: + if ctxt.IsWhileContext() or ctxt == stop_ctxt: return ctxt + ctxt = ctxt.outer_context + return None + + +def GetContainingXLAContext(ctxt): + """Returns the first ancestor XLAContext of `ctxt`. + + Returns `ctxt` if `ctxt` is a XLAContext, or None if `ctxt` is not in a + while loop. + + Args: + ctxt: ControlFlowContext + + Returns: + `ctxt` if `ctxt` is a XLAContext, the most nested XLAContext containing + `ctxt`, or None if `ctxt` is not in a while loop. + """ + while ctxt: + if ctxt.IsXLAContext(): return ctxt + ctxt = ctxt.outer_context + return None From 0e711195b9af87193e0bdde45b05dfe90f405799 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Jul 2020 18:36:18 -0700 Subject: [PATCH 0893/2522] Add a pass to raise unknown ops to new internal op type custom_tf op. This to allow doing some graph optimizations on the custom ops. PiperOrigin-RevId: 322269741 Change-Id: Ib201da70efd2520829b05923e200f392ea35f19b --- tensorflow/compiler/mlir/lite/BUILD | 1 - .../compiler/mlir/lite/flatbuffer_export.cc | 29 ++----- tensorflow/compiler/mlir/lite/ir/tfl_ops.td | 28 +------ .../lite/tests/end2end/custom_opdef.pbtxt | 16 ++-- tensorflow/compiler/mlir/lite/tests/ops.mlir | 10 --- .../mlir/lite/tests/raise-custom-ops.mlir | 20 ----- .../compiler/mlir/lite/tf_tfl_passes.cc | 1 - .../compiler/mlir/lite/transforms/passes.h | 3 - .../mlir/lite/transforms/raise_custom_ops.cc | 80 ------------------- 9 files changed, 17 insertions(+), 171 deletions(-) delete mode 100644 tensorflow/compiler/mlir/lite/tests/raise-custom-ops.mlir delete mode 100644 tensorflow/compiler/mlir/lite/transforms/raise_custom_ops.cc diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD index 8a60b292bc2..3b67ea3d846 100644 --- a/tensorflow/compiler/mlir/lite/BUILD +++ b/tensorflow/compiler/mlir/lite/BUILD @@ -334,7 +334,6 @@ cc_library( "transforms/optimize_functional_ops.cc", "transforms/prepare_composite_functions_tf.cc", "transforms/prepare_tf.cc", - "transforms/raise_custom_ops.cc", "transforms/runtime_verify.cc", "transforms/split_merged_operands.cc", "transforms/trim_functions_tf.cc", diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc index 2e69a1740db..09c79d90e26 100644 --- a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc +++ b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc @@ -1196,35 +1196,22 @@ Optional> Translator::BuildSubGraph( if (IsConst(&inst)) continue; // Fetch operand and result tensor indices. - std::vector results; - results.reserve(inst.getNumOperands()); - for (auto result : inst.getResults()) { - results.push_back(tensor_index_map.lookup(result)); - } - Operation* real_inst = &inst; - // CustomTfOp is just a wrapper around a TF op, we export the custom Op - // not the wrapper, so we fetch the op from the region. - if (auto custom_op = dyn_cast(inst)) { - // If we have custom op with a region, then use the first op in the - // region, if it exists, otherwise just use params for custom op. - if (!custom_op.body().empty()) { - real_inst = &custom_op.body().front().front(); - } else { - module_.emitError( - "Invalid CustomTfOp: Custom TF Op have empty region."); - } - } std::vector operands; - operands.reserve(real_inst->getNumOperands()); - for (auto operand : real_inst->getOperands()) { + operands.reserve(inst.getNumOperands()); + for (auto operand : inst.getOperands()) { if (operand.getType().isa()) operands.push_back(kTfLiteOptionalTensor); else operands.push_back(tensor_index_map.lookup(operand)); } + std::vector results; + results.reserve(inst.getNumOperands()); + for (auto result : inst.getResults()) { + results.push_back(tensor_index_map.lookup(result)); + } if (auto tfl_operator = - BuildOperator(real_inst, operands, results, intermediates)) + BuildOperator(&inst, operands, results, intermediates)) operators.push_back(*tfl_operator); else failed_once = true; diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td index 715d047f0bf..66124ba9982 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td @@ -4296,8 +4296,7 @@ def TFL_WhileOp : Op { +def TFL_CustomOp : Op { let summary = "Custom op"; let description = [{ @@ -4320,29 +4319,4 @@ def TFL_CustomOp : Op]> { - let summary = "Wrapper Op for TF custom ops."; - - let description = [{ - A wrapper op around any Custom TF op. These includes ops defined using - custom_opdefs or linked which are not defined in TF dialect. - This Op just wraps the custom op inside a region. - Note #1, this Op will not include TF Lite custom ops defined using CustomOp. - Note #2, this op is just internal representation inside the converter and - are not exposed/exported when the model is exported to Flatbuffer. - }]; - - let arguments = (ins - Variadic>:$input - ); - let results = (outs Variadic:$output); - - let regions = (region SizedRegion<1>:$body); -} - #endif // TFL_OPS diff --git a/tensorflow/compiler/mlir/lite/tests/end2end/custom_opdef.pbtxt b/tensorflow/compiler/mlir/lite/tests/end2end/custom_opdef.pbtxt index 481be9d4deb..345468e609e 100644 --- a/tensorflow/compiler/mlir/lite/tests/end2end/custom_opdef.pbtxt +++ b/tensorflow/compiler/mlir/lite/tests/end2end/custom_opdef.pbtxt @@ -36,11 +36,11 @@ versions { producer: 27 } -# CHECK-LABEL: func @main(%arg0: tensor<4xi32>, %arg1: tensor<4xi32>) -> tensor<*xi32> -# CHECK: attributes {tf.entry_function = {control_outputs = "", inputs = "input0,input1", outputs = "output"}} { -# CHECK-NEXT: %[[CUSTOM:.*]] = "tfl.custom_tf"(%arg0, %arg1) ( { -# CHECK-NEXT: %[[OUTPUTS:.*]] = "tf.BannaPotatoSaladWithColeslaw"(%arg0, %arg1) {T = i32, device = ""} : (tensor<4xi32>, tensor<4xi32>) -> tensor<*xi32> -# CHECK-NEXT: "tfl.yield"(%[[OUTPUTS]]) : (tensor<*xi32>) -> () -# CHECK-NEXT: }) : (tensor<4xi32>, tensor<4xi32>) -> tensor<*xi32> -# CHECK-NEXT: return %[[CUSTOM]] : tensor<*xi32> -# CHECK-NEXT: } +# CHECK-LABEL: func @main +# CHECK-SAME: (%[[ARG_0:[a-z0-9]+]]: tensor<4xi32>, %[[ARG_1:[a-z0-9]+]]: tensor<4xi32>) -> tensor<*xi32> +# CHECK-SAME: control_outputs = "" +# CHECK-SAME: inputs = "input0,input1" +# CHECK-SAME: outputs = "output" +# CHECK-NEXT: %[[OP:[a-z0-9]+]] = "tf.BannaPotatoSaladWithColeslaw"(%[[ARG_0]], %[[ARG_1]]) {T = i32, device = ""} : (tensor<4xi32>, tensor<4xi32>) -> tensor<*xi32> +# CHECK-NEXT: return %[[OP]] : tensor<*xi32> +# CHECK-NEXT: } diff --git a/tensorflow/compiler/mlir/lite/tests/ops.mlir b/tensorflow/compiler/mlir/lite/tests/ops.mlir index 06e05987ee6..5f434e954c8 100644 --- a/tensorflow/compiler/mlir/lite/tests/ops.mlir +++ b/tensorflow/compiler/mlir/lite/tests/ops.mlir @@ -598,16 +598,6 @@ func @testMaxPool2DWrongOperandStorageType(tensor<1x7x7x16x!quant.uniform, %arg1: tensor<1x64x64x32xf32>, %arg2: tensor<1x64x64x32xf32>) -> (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) { - %0, %1, %2, %3 = "tfl.custom_tf"(%arg0, %arg1, %arg2) ({ - %4, %5, %6, %7 = "tf.TFLite_Detection_PostProcess"(%arg0, %arg1, %arg2) {_output_quantized = true, _output_types = [f32, f32, f32, f32], _support_output_type_float_in_quantized_op = true, detections_per_class = 100 : i64, device = "", h_scale = 5.000000e+00 : f32, max_classes_per_detection = 1 : i64, max_detections = 20 : i64, nms_iou_threshold = 6.000000e-01 : f32, nms_score_threshold = 3.000000e-01 : f32, num_classes = 90 : i64, use_regular_nms = false, w_scale = 5.000000e+00 : f32, x_scale = 1.000000e+01 : f32, y_scale = 1.000000e+01 : f32} : (tensor<1x64x64x32xf32>, tensor<1x64x64x32xf32>, tensor<1x64x64x32xf32>) -> (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) - "tfl.yield"(%4, %5, %6, %7) : (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) -> () - }) : (tensor<1x64x64x32xf32>, tensor<1x64x64x32xf32>, tensor<1x64x64x32xf32>) -> (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) - return %0, %1 : tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32> -} - -// ----- - func @testMaxPoolingWithArgMax2D(%arg0: tensor<1x64x64x32xf32>) -> (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) { // custom op for "tfl.max_pooling_with_argmax_2d"(%arg0) {filter_h = 2 : i32, filter_w = 2 : i32, padding = "SAME", stride_h = 2 : i32, stride_w = 2 : i32} : (tensor<1x64x64x32xf32>) -> (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) %0, %1 = "tfl.custom"(%arg0) {custom_option = opaque<"tfl", "0x01000000020000000200000002000000020000000000000000000000000000000000000000000000"> : tensor<40xi8>, custom_code = "MaxPoolingWithArgmax2D"} : (tensor<1x64x64x32xf32>) -> (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) diff --git a/tensorflow/compiler/mlir/lite/tests/raise-custom-ops.mlir b/tensorflow/compiler/mlir/lite/tests/raise-custom-ops.mlir deleted file mode 100644 index 1bac8019a30..00000000000 --- a/tensorflow/compiler/mlir/lite/tests/raise-custom-ops.mlir +++ /dev/null @@ -1,20 +0,0 @@ -// RUN: tf-opt -tfl-raise-custom-ops -canonicalize %s -o - | FileCheck %s - -// CHECK-LABEL: custom_op -func @custom_op(%arg0: tensor<4xf32>) -> tensor<4xf32> { - %0 = "tfl.pseudo_const" () {value = dense<1.0> : tensor<4xf32>} : () -> tensor<4xf32> - %1 = "tfl.mul"(%arg0, %0) {fused_activation_function = "NONE"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> - // will be preserved since it has uses. - %2 = "tf.MyCustomOp"(%1, %0) {fused_activation_function = "RELU", int_attr = 2 : i32} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> - // will be removed since it doesn't have uses and doesn't have side effect. - "tf.MyCustomOp"(%1, %0) {fused_activation_function = "RELU", int_attr = 2 : i32} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> - return %2 : tensor<4xf32> - -// CHECK-NEXT: %[[CST:.*]] = constant dense<1.000000e+00> -// CHECK-NEXT: %[[MUL:.*]] = tfl.mul %arg0, %[[CST]] {fused_activation_function = "NONE"} : tensor<4xf32> -// CHECK-NEXT: %[[CUSTOM:.*]] = "tfl.custom_tf"(%[[MUL]], %[[CST]]) ( { -// CHECK-NEXT: %[[MY_CUSTOM:.*]] = "tf.MyCustomOp"(%[[MUL]], %[[CST]]) {fused_activation_function = "RELU", int_attr = 2 : i32} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> -// CHECK-NEXT: "tfl.yield"(%[[MY_CUSTOM]]) : (tensor<4xf32>) -> () -// CHECK-NEXT: }) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> -// CHECK-NEXT: return %[[CUSTOM]] : tensor<4xf32> -} diff --git a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc index 239d52530ec..fc44e778b92 100644 --- a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc +++ b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc @@ -187,7 +187,6 @@ void AddTFToTFLConversionPasses(const mlir::TFL::PassConfig& pass_config, // so that it can target constants introduced once TensorFlow Identity ops // are removed during legalization. pass_manager->addPass(mlir::TFL::CreateOptimizeFunctionalOpsPass()); - pass_manager->addPass(mlir::TFL::CreateRaiseCustomOpsPass()); pass_manager->addPass(mlir::createSymbolDCEPass()); pass_manager->addNestedPass(mlir::createCanonicalizerPass()); pass_manager->addNestedPass(mlir::createCSEPass()); diff --git a/tensorflow/compiler/mlir/lite/transforms/passes.h b/tensorflow/compiler/mlir/lite/transforms/passes.h index 804a391231a..af97931b2a3 100644 --- a/tensorflow/compiler/mlir/lite/transforms/passes.h +++ b/tensorflow/compiler/mlir/lite/transforms/passes.h @@ -91,9 +91,6 @@ std::unique_ptr> CreateWhileOutlinePass(); // Verifies runtime constraints. std::unique_ptr> CreateRuntimeVerifyPass(); -// Creates raise custom ops pass, which legalize custom ops to TFL::CustomOp -std::unique_ptr> CreateRaiseCustomOpsPass(); - } // namespace TFL } // namespace mlir diff --git a/tensorflow/compiler/mlir/lite/transforms/raise_custom_ops.cc b/tensorflow/compiler/mlir/lite/transforms/raise_custom_ops.cc deleted file mode 100644 index 40cca526951..00000000000 --- a/tensorflow/compiler/mlir/lite/transforms/raise_custom_ops.cc +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringRef.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/Builders.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h" -#include "tensorflow/compiler/mlir/lite/transforms/passes.h" -#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" - -namespace mlir { -namespace TFL { -namespace { -// This transformation pass takes an operation with unknown op properties and -// wrap it by a TFL::CustomTfOp. -struct RaiseCustomOpsPass - : public PassWrapper { - void runOnFunction() override; -}; - -void RaiseCustomOpsPass::runOnFunction() { - auto fn = getFunction(); - OpBuilder builder(fn.getContext()); - - llvm::SmallVector custom_ops; - for (Operation &op : fn.getOps()) { - // Skips the ops with known op property. - if (op.getAbstractOperation()) continue; - // Skips already imported ops that are imported as CustomTfOp. - if (op.getParentOfType()) continue; - if (llvm::isa(op) || llvm::isa(op)) - continue; - custom_ops.push_back(&op); - } - - for (auto *op : custom_ops) { - builder.setInsertionPoint(op); - auto custom_op = builder.create( - op->getLoc(), op->getResultTypes(), op->getOperands()); - Region region; - region.push_back(new Block); - - builder.setInsertionPointToEnd(®ion.front()); - Operation *inner_op = builder.clone(*op); - builder.create(op->getLoc(), inner_op->getResults()); - custom_op.body().takeBody(region); - - op->replaceAllUsesWith(custom_op); - op->erase(); - } -} -} // namespace - -// Creates an instance of the TensorFlow Lite dialect raise custom op pass. -std::unique_ptr> CreateRaiseCustomOpsPass() { - return std::make_unique(); -} - -static PassRegistration pass( - "tfl-raise-custom-ops", "Raise custom ops into tflite dialect."); - -} // namespace TFL -} // namespace mlir From f35eb71ee00c8b7ddd458cd9c90c777e10699aa2 Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Mon, 20 Jul 2020 19:11:55 -0700 Subject: [PATCH 0894/2522] Update multi_worker_tutorial_test with checkpointing section. PiperOrigin-RevId: 322273832 Change-Id: I8617a48aada1f982e97d33d6272a3ddf4b8a8716 --- .../distribute/multi_worker_tutorial_test.py | 44 +++++++++++++++++-- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/keras/distribute/multi_worker_tutorial_test.py b/tensorflow/python/keras/distribute/multi_worker_tutorial_test.py index 35e2c7309fc..f7d64c2fc23 100644 --- a/tensorflow/python/keras/distribute/multi_worker_tutorial_test.py +++ b/tensorflow/python/keras/distribute/multi_worker_tutorial_test.py @@ -20,6 +20,7 @@ import contextlib import os import re import zipfile +from absl import logging from absl.testing import parameterized import numpy as np from tensorflow.python import keras @@ -35,6 +36,8 @@ from tensorflow.python.keras.datasets import mnist from tensorflow.python.keras.optimizer_v2 import gradient_descent from tensorflow.python.lib.io import file_io from tensorflow.python.platform import test +from tensorflow.python.training import checkpoint_management +from tensorflow.python.training.tracking import util as tracking_util from tensorflow.python.util import nest @@ -105,7 +108,7 @@ class MultiWorkerTutorialTest(parameterized.TestCase, test.TestCase): num_workers = 4 - def proc_func(model_path): + def proc_func(model_path, checkpoint_dir): global_batch_size = per_worker_batch_size * num_workers strategy = collective_all_reduce_strategy.CollectiveAllReduceStrategy() with strategy.scope(): @@ -129,7 +132,8 @@ class MultiWorkerTutorialTest(parameterized.TestCase, test.TestCase): callbacks=callbacks) def _is_chief(task_type, task_id): - return task_type == 'chief' or (task_type == 'worker' and task_id == 0) + return task_type is None or task_type == 'chief' or ( + task_type == 'worker' and task_id == 0) def _get_temp_dir(dirpath, task_id): base_dirpath = 'workertemp_' + str(task_id) @@ -163,14 +167,46 @@ class MultiWorkerTutorialTest(parameterized.TestCase, test.TestCase): loaded_model = keras.saving.save.load_model(model_path) loaded_model.fit(multi_worker_dataset, epochs=2, steps_per_epoch=20) - model_path = os.path.join(self.get_temp_dir(), 'ckpt.tf') + checkpoint = tracking_util.Checkpoint(model=multi_worker_model) + write_checkpoint_dir = write_filepath(checkpoint_dir, task_type, task_id) + checkpoint_manager = checkpoint_management.CheckpointManager( + checkpoint, directory=write_checkpoint_dir, max_to_keep=1) + + checkpoint_manager.save() + if not _is_chief(task_type, task_id): + file_io.delete_recursively_v2(write_checkpoint_dir) + + # Make sure chief finishes saving before non-chief's assertions. + multi_process_runner.barrier().wait() + + if not file_io.file_exists(checkpoint_dir): + raise RuntimeError() + if file_io.file_exists(write_checkpoint_dir) != _is_chief( + task_type, task_id): + raise RuntimeError() + + latest_checkpoint = checkpoint_management.latest_checkpoint( + checkpoint_dir) + checkpoint.restore(latest_checkpoint) + multi_worker_model.fit(multi_worker_dataset, epochs=2, steps_per_epoch=20) + + logging.info('testMultiWorkerTutorial successfully ends') + + model_path = os.path.join(self.get_temp_dir(), 'model.tf') + checkpoint_dir = os.path.join(self.get_temp_dir(), 'ckpt') with test_util.skip_if_error(self, errors_impl.UnavailableError): mpr_result = multi_process_runner.run( proc_func, multi_worker_test_base.create_cluster_spec(num_workers=num_workers), - args=(model_path,), + args=(model_path, checkpoint_dir), list_stdout=True) + self.assertTrue( + any([ + 'testMultiWorkerTutorial successfully ends' in msg + for msg in mpr_result.stdout + ])) + def extract_accuracy(worker_id, input_string): match = re.match( r'\[worker\-{}\].*accuracy: (\d+\.\d+).*'.format(worker_id), From 0e2cb83c1c7e8ce1fd040fcca56a37155dde75a1 Mon Sep 17 00:00:00 2001 From: Haoyu Zhang Date: Mon, 20 Jul 2020 19:54:10 -0700 Subject: [PATCH 0895/2522] Do not invoke DeregisterCall when RendezvousMgr is already aborted. PiperOrigin-RevId: 322277723 Change-Id: I35494f617133f07deb99b00fe8548dd02b0f6b41 --- tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc b/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc index 512c17fcfcf..5de81341c65 100644 --- a/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc +++ b/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.cc @@ -277,6 +277,7 @@ void RpcRemoteRendezvous::RecvFromRemoteAsync( // RendezvousMgr already aborted, shouldn't send RPC call any more if (!call->status().ok()) { + DeregisterCall(call); // NOTE: `*sess` can potentially be deleted before we return from // `call->done()(...)`, so we must release the worker before calling the // callback. From 322791ebc84cb85897bad6bb0db85d350c0b4c2d Mon Sep 17 00:00:00 2001 From: Brian Zhao Date: Mon, 20 Jul 2020 20:01:40 -0700 Subject: [PATCH 0896/2522] Automated g4 rollback of changelist 322239438. PiperOrigin-RevId: 322278474 Change-Id: I81c3ffdc2b4705e28a7dc6715dd8a5387e24b64c --- .../keras/layers/tensorflow_op_layer_test.py | 109 ------------------ .../keras/saving/saved_model/json_utils.py | 7 -- tensorflow/python/keras/utils/tf_utils.py | 2 - tensorflow/python/ops/array_ops.py | 2 - tensorflow/python/util/dispatch_test.py | 63 +--------- tensorflow/python/util/nest.py | 17 +-- tensorflow/python/util/nest_test.py | 21 ---- tensorflow/python/util/serialization.py | 10 -- tensorflow/python/util/util.cc | 42 ------- tensorflow/python/util/util.h | 9 -- tensorflow/python/util/util_wrapper.cc | 18 --- 11 files changed, 7 insertions(+), 293 deletions(-) diff --git a/tensorflow/python/keras/layers/tensorflow_op_layer_test.py b/tensorflow/python/keras/layers/tensorflow_op_layer_test.py index 7baaa6a4ddc..cb044260106 100644 --- a/tensorflow/python/keras/layers/tensorflow_op_layer_test.py +++ b/tensorflow/python/keras/layers/tensorflow_op_layer_test.py @@ -294,115 +294,6 @@ class AutoLambdaTest(keras_parameterized.TestCase): self.assertAllEqual([layer.name for layer in model.layers], [layer.name for layer in new_model.layers]) - def test_getitem_slice_with_step_only(self): - if not context.executing_eagerly(): - self.skipTest('Complex slicing like this fails in v1') - inp = keras.Input(shape=(4, 3, 8)) - slice_step = keras.Input(shape=(), dtype='int32') - - out = inp[..., ::slice_step[0]] - model = keras.Model( - inputs=[inp, slice_step], - outputs=out) - model.compile( - adam.Adam(0.001), - 'mse', - run_eagerly=testing_utils.should_run_eagerly()) - batch_size = 7 - step = 3 - x = array_ops.stack([ - math_ops.range(8) for _ in range(batch_size)]) - args = [x, constant_op.constant(step, shape=(batch_size,))] - expected = array_ops.stack([ - math_ops.range(8)[::step] for _ in range(batch_size)]) - - self.assertAllEqual(model(args), expected) - self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) - - # Make sure it can be successfully saved and loaded - config = model.get_config() - model = keras.Model.from_config(config) - - self.assertAllEqual(model(args), expected) - self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) - - def test_getitem_slice_with_stop_only(self): - if not context.executing_eagerly(): - self.skipTest('Complex slicing like this fails in v1') - inp = keras.Input(shape=(4, 3, 8)) - slice_stop = keras.Input(shape=(), dtype='int32') - - out = inp[..., :slice_stop[0]] - model = keras.Model( - inputs=[inp, slice_stop], - outputs=out) - model.compile( - adam.Adam(0.001), - 'mse', - run_eagerly=testing_utils.should_run_eagerly()) - batch_size = 7 - stop = 6 - x = array_ops.stack([ - math_ops.range(8) for _ in range(batch_size)]) - args = [x, constant_op.constant(stop, shape=(batch_size,))] - expected = array_ops.stack([ - math_ops.range(8)[:stop] for _ in range(batch_size)]) - - self.assertAllEqual(model(args), expected) - self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) - - # Make sure it can be successfully saved and loaded - config = model.get_config() - model = keras.Model.from_config(config) - - self.assertAllEqual(model(args), expected) - self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) - - def test_getitem_complex_slicing(self): - if not context.executing_eagerly(): - self.skipTest('Complex slicing like this fails in v1') - inp = keras.Input(shape=(4, 3, 8)) - first_dim = keras.Input(shape=(), dtype='int32') - slice_start = keras.Input(shape=(), dtype='int32') - slice_stop = keras.Input(shape=(), dtype='int32') - slice_stride = keras.Input(shape=(), dtype='int32') - - out = inp[..., first_dim[0], slice_start[0]:slice_stop[0]:slice_stride[0]] - model = keras.Model( - inputs=[inp, first_dim, slice_start, slice_stop, slice_stride], - outputs=out) - model.compile( - adam.Adam(0.001), - 'mse', - run_eagerly=testing_utils.should_run_eagerly()) - batch_size = 7 - start = 1 - stop = 6 - step = 2 - x = array_ops.stack([array_ops.stack([array_ops.stack([ - math_ops.range(8) - for _ in range(3)]) for _ in range(4)]) for _ in range(batch_size)]) - args = [x, - constant_op.constant(0, shape=(batch_size,)), - constant_op.constant(start, shape=(batch_size,)), - constant_op.constant(stop, shape=(batch_size,)), - constant_op.constant(step, shape=(batch_size,))] - # Slice the innermost dim. only grab one index from the second-to-innermost - # dim, removing that dim from the shape. - expected = array_ops.stack([array_ops.stack([ - math_ops.range(8)[start:stop:step] - for _ in range(4)]) for _ in range(batch_size)]) - - self.assertAllEqual(model(args), expected) - self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) - - # Make sure it can be successfully saved and loaded - config = model.get_config() - model = keras.Model.from_config(config) - - self.assertAllEqual(model(args), expected) - self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) - def test_numerical_correctness_simple(self): x = ops.convert_to_tensor_v2([[-1., 0., -2., 1.]]) inputs = keras.Input(shape=(4,)) diff --git a/tensorflow/python/keras/saving/saved_model/json_utils.py b/tensorflow/python/keras/saving/saved_model/json_utils.py index df198266899..0ac86d4e692 100644 --- a/tensorflow/python/keras/saving/saved_model/json_utils.py +++ b/tensorflow/python/keras/saving/saved_model/json_utils.py @@ -61,16 +61,9 @@ def decode(json_string): def _decode_helper(obj): - """A decoding helper that is TF-object aware.""" if isinstance(obj, dict) and 'class_name' in obj: if obj['class_name'] == 'TensorShape': return tensor_shape.TensorShape(obj['items']) elif obj['class_name'] == '__tuple__': return tuple(_decode_helper(i) for i in obj['items']) - elif obj['class_name'] == '__slice__': - return slice(start=_decode_helper(obj['start']), - stop=_decode_helper(obj['stop']), - step=_decode_helper(obj['step'])) - elif obj['class_name'] == '__ellipsis__': - return Ellipsis return obj diff --git a/tensorflow/python/keras/utils/tf_utils.py b/tensorflow/python/keras/utils/tf_utils.py index 8c3028fa8ef..c9ad96cd37b 100644 --- a/tensorflow/python/keras/utils/tf_utils.py +++ b/tensorflow/python/keras/utils/tf_utils.py @@ -181,8 +181,6 @@ def map_structure_with_atomic(is_atomic_fn, map_fn, nested): values = [nested[k] for k in nest._sorted(nested)] elif nest._is_attrs(nested): values = _astuple(nested) - elif nest._is_slice(nested): - values = (nested.start, nested.stop, nested.step) else: values = nested mapped_values = [ diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index fab2d7b8713..b8711a444a8 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -955,8 +955,6 @@ def _slice_helper(tensor, slice_spec, var=None): TypeError: If the slice indices aren't int, slice, ellipsis, tf.newaxis or scalar int32/int64 tensors. """ - tensor = ops.convert_to_tensor(tensor) - if isinstance(slice_spec, bool) or \ (isinstance(slice_spec, ops.Tensor) and slice_spec.dtype == dtypes.bool) or \ (isinstance(slice_spec, np.ndarray) and slice_spec.dtype == bool): diff --git a/tensorflow/python/util/dispatch_test.py b/tensorflow/python/util/dispatch_test.py index db73dff57e0..cc4fed0abb7 100644 --- a/tensorflow/python/util/dispatch_test.py +++ b/tensorflow/python/util/dispatch_test.py @@ -20,7 +20,6 @@ from __future__ import print_function from tensorflow.python.framework import ops from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.proto_ops import decode_proto @@ -29,7 +28,6 @@ from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging from tensorflow.python.util import deprecation from tensorflow.python.util import dispatch -from tensorflow.python.util import nest from tensorflow.python.util.tf_export import get_canonical_name_for_symbol from tensorflow.python.util.tf_export import tf_export @@ -70,25 +68,6 @@ class TensorTracer(object): ["{}={}".format(name, x) for (name, x) in self.kwargs.items()]) return "{}({})".format(self.name, ", ".join(args)) - @classmethod - def _overload_all_operators(cls): # pylint: disable=invalid-name - """Register overloads for all operators.""" - for operator in ops.Tensor.OVERLOADABLE_OPERATORS: - cls._overload_operator(operator) - - @classmethod - def _overload_operator(cls, operator): # pylint: disable=invalid-name - """Overload an operator with the same overloading as `ops.Tensor`.""" - tensor_oper = getattr(ops.Tensor, operator) - - # Compatibility with Python 2: - # Python 2 unbound methods have type checks for the first arg, - # so we need to extract the underlying function - tensor_oper = getattr(tensor_oper, "__func__", tensor_oper) - setattr(cls, operator, tensor_oper) - -TensorTracer._overload_all_operators() # pylint: disable=protected-access - class TensorTracerOpDispatcher(dispatch.GlobalOpDispatcher): """Global op dispatcher for TensorTracer.""" @@ -103,7 +82,11 @@ class TensorTracerOpDispatcher(dispatch.GlobalOpDispatcher): return TensorTracer(symbol_name, args, kwargs) def is_tensor_tracer_arg(self, value): - return any(isinstance(x, TensorTracer) for x in nest.flatten(value)) + if isinstance(value, TensorTracer): + return True + if isinstance(value, (list, tuple)): + if any(isinstance(x, TensorTracer) for x in value): + return True @test_util.run_all_in_graph_and_eager_modes @@ -231,41 +214,5 @@ class DispatchTest(test_util.TensorFlowTestCase): # Clean up. dispatch._GLOBAL_DISPATCHERS = original_global_dispatchers - def testGlobalDispatcherGetItem(self): - original_global_dispatchers = dispatch._GLOBAL_DISPATCHERS - try: - TensorTracerOpDispatcher().register() - - x = TensorTracer("x") - trace = x[0] - self.assertEqual( - str(trace), - "__operators__.getitem(x, 0)") - - x = TensorTracer("x") - y = TensorTracer("y") - trace = x[y] - self.assertEqual( - str(trace), - "__operators__.getitem(x, y)") - - x = TensorTracer("x") - y = TensorTracer("y") - trace = x[:y] # pylint: disable=invalid-slice-index - self.assertEqual( - str(trace), - "__operators__.getitem(x, slice(None, y, None))") - - x = array_ops.ones(shape=(5, 5)) - y = TensorTracer("y") - trace = x[:y] # pylint: disable=invalid-slice-index - self.assertRegex( - str(trace).replace("\n", " "), - r"__operators__.getitem\(.*Tensor.*, slice\(None, y, None\)\)") - - finally: - # Clean up. - dispatch._GLOBAL_DISPATCHERS = original_global_dispatchers - if __name__ == "__main__": googletest.main() diff --git a/tensorflow/python/util/nest.py b/tensorflow/python/util/nest.py index 7ae288c8e15..66f43a3d682 100644 --- a/tensorflow/python/util/nest.py +++ b/tensorflow/python/util/nest.py @@ -26,13 +26,10 @@ nest recognizes the following types of collections: 4.orderedDict 5.MutableMapping 6.attr.s - 7.slice attr.s decorated classes (http://www.attrs.org) are also supported, in the same way as `namedtuple`. -Python slices get flattened into `[x.start, x.stop, x.step]` - The utilities here assume (and do not check) that the nested structures form a 'tree', i.e., no references in the structure of the input of these functions should be recursive. @@ -123,7 +120,6 @@ def _is_namedtuple(instance, strict=False): # See the swig file (util.i) for documentation. _is_mapping_view = _pywrap_utils.IsMappingView _is_attrs = _pywrap_utils.IsAttrs -_is_slice = _pywrap_utils.IsSlice _is_composite_tensor = _pywrap_utils.IsCompositeTensor _is_type_spec = _pywrap_utils.IsTypeSpec _is_mutable_mapping = _pywrap_utils.IsMutableMapping @@ -194,8 +190,6 @@ def _sequence_like(instance, args): # For object proxies, first create the underlying type and then re-wrap it # in the proxy type. return type(instance)(_sequence_like(instance.__wrapped__, args)) - elif _is_slice(instance): - return slice(*args) else: # Not a namedtuple return type(instance)(args) @@ -250,10 +244,6 @@ def _yield_sorted_items(iterable): # Note: to allow CompositeTensors and their TypeSpecs to have matching # structures, we need to use the same key string here. yield iterable.value_type.__name__, iterable._component_specs # pylint: disable=protected-access - elif _is_slice(iterable): - yield "start", iterable.start - yield "stop", iterable.stop - yield "step", iterable.step else: for item in enumerate(iterable): yield item @@ -285,8 +275,7 @@ def is_nested(seq): def flatten(structure, expand_composites=False): """Returns a flat list from a given nested structure. - If nest is not a structure , tuple (or a namedtuple), dict, slice, or an - attrs class, + If nest is not a structure , tuple (or a namedtuple), dict, or an attrs class, then returns a single-element list: [nest]. @@ -297,8 +286,7 @@ def flatten(structure, expand_composites=False): repacks dicts and OrderedDicts after they have been flattened, and also allows flattening an OrderedDict and then repacking it back using a corresponding plain dict, or vice-versa. Dictionaries with non-sortable keys cannot be - flattened. `slice`s will get flattened into the form - `[x.start, x.stop, x.step]`. + flattened. Users must not modify any collections used in nest while this function is running. @@ -1441,5 +1429,4 @@ _pywrap_utils.RegisterType("Mapping", _collections_abc.Mapping) _pywrap_utils.RegisterType("MutableMapping", _collections_abc.MutableMapping) _pywrap_utils.RegisterType("Sequence", _collections_abc.Sequence) _pywrap_utils.RegisterType("MappingView", _collections_abc.MappingView) -_pywrap_utils.RegisterType("Slice", slice) _pywrap_utils.RegisterType("ObjectProxy", _wrapt.ObjectProxy) diff --git a/tensorflow/python/util/nest_test.py b/tensorflow/python/util/nest_test.py index 7d674519433..ca808ba9ff1 100644 --- a/tensorflow/python/util/nest_test.py +++ b/tensorflow/python/util/nest_test.py @@ -122,27 +122,6 @@ class NestTest(parameterized.TestCase, test.TestCase): new_structure = nest.map_structure(lambda x: x, structure) self.assertEqual(structure, new_structure) - @parameterized.parameters( - slice(4), - slice(None), - # Because slice is overloaded, it does not take keyword args - slice(None, None, None), - slice(6, None, None), - slice(None, 4, None), - slice(None, None, 2), - slice(6, 2, None), - slice(None, 4, 5), - slice(3, None, 5), - slice(3, 7, 5), - ) - @test_util.assert_no_new_pyobjects_executing_eagerly - def testFlattenAndPackSlice(self, value): - structure = [value] - flat = nest.flatten(structure) - self.assertAllEqual(flat, [value.start, value.stop, value.step]) - new_structure = nest.pack_sequence_as(structure, flat) - self.assertEqual(structure, new_structure) - @test_util.assert_no_new_pyobjects_executing_eagerly def testFlattenAndPack(self): structure = ((3, 4), 5, (6, 7, (9, 10), 8)) diff --git a/tensorflow/python/util/serialization.py b/tensorflow/python/util/serialization.py index 3258d138b0c..3b1713b4c61 100644 --- a/tensorflow/python/util/serialization.py +++ b/tensorflow/python/util/serialization.py @@ -70,16 +70,6 @@ def get_json_type(obj): if isinstance(obj, collections_abc.Mapping): return dict(obj) - if isinstance(obj, slice): - return { - 'class_name': '__slice__', - 'start': obj.start, - 'stop': obj.stop, - 'step': obj.step} - - if obj is Ellipsis: - return {'class_name': '__ellipsis__'} - if isinstance(obj, wrapt.ObjectProxy): return obj.__wrapped__ diff --git a/tensorflow/python/util/util.cc b/tensorflow/python/util/util.cc index 5f794aa38b0..cf8581443e7 100644 --- a/tensorflow/python/util/util.cc +++ b/tensorflow/python/util/util.cc @@ -240,17 +240,6 @@ int IsMappingHelper(PyObject* o) { return check_cache->CachedLookup(o); } -// Returns 1 if `o` is considered a slice object for the purposes of Flatten(). -// Returns 0 otherwise. -// Returns -1 if an error occurred. -int IsSliceHelper(PyObject* o) { - static auto* const check_cache = new CachedTypeCheck([](PyObject* to_check) { - return IsInstanceOfRegisteredType(to_check, "Slice"); - }); - if (PyDict_Check(o)) return true; - return check_cache->CachedLookup(o); -} - // Returns 1 if `o` is considered a mutable mapping for the purposes of // Flatten(). Returns 0 otherwise. Returns -1 if an error occurred. int IsMutableMappingHelper(PyObject* o) { @@ -355,7 +344,6 @@ int IsSequenceHelper(PyObject* o) { if (IsMappingHelper(o)) return true; if (IsMappingViewHelper(o)) return true; if (IsAttrsHelper(o)) return true; - if (IsSliceHelper(o)) return true; if (PySet_Check(o) && !WarnedThatSetIsNotSequence) { LOG(WARNING) << "Sets are not currently considered sequences, " "but this may change in the future, " @@ -543,31 +531,6 @@ class AttrsValueIterator : public ValueIterator { Safe_PyObjectPtr iter_; }; -class SliceValueIterator : public ValueIterator { - public: - explicit SliceValueIterator(PyObject* slice) : slice_(slice), attr_(0) { - Py_INCREF(slice); - } - - Safe_PyObjectPtr next() override { - Safe_PyObjectPtr result; - if (attr_ == 0) { - result.reset(PyObject_GetAttrString(slice_.get(), "start")); - } else if (attr_ == 1) { - result.reset(PyObject_GetAttrString(slice_.get(), "stop")); - } else if (attr_ == 2) { - result.reset(PyObject_GetAttrString(slice_.get(), "step")); - } - attr_++; - - return result; - } - - private: - Safe_PyObjectPtr slice_; - int attr_; -}; - bool IsSparseTensorValueType(PyObject* o) { PyObject* sparse_tensor_value_type = GetRegisteredPyObject("SparseTensorValue"); @@ -630,8 +593,6 @@ ValueIteratorPtr GetValueIterator(PyObject* nested) { return absl::make_unique(nested); } else if (IsAttrsHelper(nested)) { return absl::make_unique(nested); - } else if (IsSliceHelper(nested)) { - return absl::make_unique(nested); } else { return absl::make_unique(nested); } @@ -645,8 +606,6 @@ ValueIteratorPtr GetValueIteratorForData(PyObject* nested) { return absl::make_unique(nested); } else if (IsAttrsHelper(nested)) { return absl::make_unique(nested); - } else if (IsSliceHelper(nested)) { - return absl::make_unique(nested); } else if (IsSparseTensorValueType(nested)) { return absl::make_unique(nested); } else { @@ -950,7 +909,6 @@ bool IsSequence(PyObject* o) { return IsSequenceHelper(o) == 1; } bool IsMapping(PyObject* o) { return IsMappingHelper(o) == 1; } bool IsMutableMapping(PyObject* o) { return IsMutableMappingHelper(o) == 1; } bool IsMappingView(PyObject* o) { return IsMappingViewHelper(o) == 1; } -bool IsSlice(PyObject* o) { return IsSliceHelper(o) == 1; } bool IsAttrs(PyObject* o) { return IsAttrsHelper(o) == 1; } bool IsTensor(PyObject* o) { return IsTensorHelper(o) == 1; } bool IsEagerTensorSlow(PyObject* o) { return IsEagerTensorHelper(o) == 1; } diff --git a/tensorflow/python/util/util.h b/tensorflow/python/util/util.h index 2f8ff6f8093..fc0b864416e 100644 --- a/tensorflow/python/util/util.h +++ b/tensorflow/python/util/util.h @@ -115,15 +115,6 @@ bool IsTuple(PyObject* o); // True if the sequence subclasses mapping. bool IsMappingView(PyObject* o); -// Returns a true if its input is a python `slice` object. -// -// Args: -// seq: the input to be checked. -// -// Returns: -// True if the input object is a python `slice`. -bool IsSlice(PyObject* o); - // A version of PyMapping_Keys that works in C++11 // // Args: diff --git a/tensorflow/python/util/util_wrapper.cc b/tensorflow/python/util/util_wrapper.cc index a6f421027af..63c70d785cc 100644 --- a/tensorflow/python/util/util_wrapper.cc +++ b/tensorflow/python/util/util_wrapper.cc @@ -198,24 +198,6 @@ PYBIND11_MODULE(_pywrap_utils, m) { Returns: True if `instance` is an instance of an `attr.s` decorated class. )pbdoc"); - m.def( - "IsSlice", - [](const py::handle& o) { - bool result = tensorflow::swig::IsSlice(o.ptr()); - if (PyErr_Occurred()) { - throw py::error_already_set(); - } - return result; - }, - R"pbdoc( - Returns True if `instance` is an instance of a python`slice` object. - - Args: - instance: An instance of a Python object. - - Returns: - True if `instance` is an instance of a python `slice` object. - )pbdoc"); m.def( "SameNamedtuples", [](const py::handle& o1, const py::handle& o2) { From 8dbdd0803fb0d18e30366a7abe356eb9d225c528 Mon Sep 17 00:00:00 2001 From: Revan Sopher Date: Mon, 20 Jul 2020 20:03:36 -0700 Subject: [PATCH 0897/2522] Create no-src target for expanding visibility of TF deps. PiperOrigin-RevId: 322278685 Change-Id: Id1295a5de3029ca874cd3908e3f09b5fef88bea7 --- tensorflow/python/tpu/BUILD | 7 +++++++ tensorflow/python/tpu/tpu_test_wrapper.bzl | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/tpu/BUILD b/tensorflow/python/tpu/BUILD index dbe99670b99..e1e71e62692 100644 --- a/tensorflow/python/tpu/BUILD +++ b/tensorflow/python/tpu/BUILD @@ -560,6 +560,13 @@ tpu_py_test( ], ) +# NOTE this target should only be depended on by the tpu_test_wrapper macro. +py_library( + name = "tpu_test_deps", + visibility = ["//visibility:public"], + deps = ["//tensorflow/python:client_testlib"], +) + tf_proto_library( name = "tensor_tracer_proto", srcs = ["tensor_tracer.proto"], diff --git a/tensorflow/python/tpu/tpu_test_wrapper.bzl b/tensorflow/python/tpu/tpu_test_wrapper.bzl index fda631701bc..48ccdc3b2d1 100644 --- a/tensorflow/python/tpu/tpu_test_wrapper.bzl +++ b/tensorflow/python/tpu/tpu_test_wrapper.bzl @@ -57,7 +57,7 @@ def get_kwargs_for_wrapping( kwargs["python_version"] = kwargs.get("python_version", "PY3") kwargs["srcs"] = [wrapper_src] + kwargs["srcs"] kwargs["deps"] = depset( - ["//tensorflow/python:client_testlib"], + ["//tensorflow/python/tpu:tpu_test_deps"], transitive = [deps], ) kwargs["main"] = wrapper_src From 918f876bf812fd744151fea29b2df4aa18acfa8f Mon Sep 17 00:00:00 2001 From: Renjie Liu Date: Mon, 20 Jul 2020 20:25:21 -0700 Subject: [PATCH 0898/2522] Add quantized int8 elu using LUT approach PiperOrigin-RevId: 322280954 Change-Id: I77a97819bb767a3152442d50858e81b33f401932 --- tensorflow/lite/kernels/activations.cc | 30 ++++++++++++++++----- tensorflow/lite/kernels/activations_test.cc | 23 ++++++++++++++++ 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/tensorflow/lite/kernels/activations.cc b/tensorflow/lite/kernels/activations.cc index c62b75962aa..654ccbc27ec 100644 --- a/tensorflow/lite/kernels/activations.cc +++ b/tensorflow/lite/kernels/activations.cc @@ -1313,6 +1313,20 @@ TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) { } } +TfLiteStatus EluPrepare(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* input = GetInput(context, node, 0); + TfLiteTensor* output = GetOutput(context, node, 0); + OpData* data = reinterpret_cast(node->user_data); + + // Use LUT to handle quantized elu path. + if (input->type == kTfLiteInt8) { + PopulateLookupTable(data, input, output, [](float value) { + return value < 0.0 ? std::exp(value) - 1.0f : value; + }); + } + return GenericPrepare(context, node); +} + TfLiteStatus EluEval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input = GetInput(context, node, 0); TfLiteTensor* output = GetOutput(context, node, 0); @@ -1322,10 +1336,15 @@ TfLiteStatus EluEval(TfLiteContext* context, TfLiteNode* node) { GetTensorShape(output), GetTensorData(output)); return kTfLiteOk; } break; + case kTfLiteInt8: { + OpData* data = reinterpret_cast(node->user_data); + EvalUsingLookupTable(data, input, output); + return kTfLiteOk; + } break; default: - TF_LITE_KERNEL_LOG(context, - "Only float32 is supported currently, got %s.", - TfLiteTypeGetName(input->type)); + TF_LITE_KERNEL_LOG( + context, "Only float32 and int8 is supported currently, got %s.", + TfLiteTypeGetName(input->type)); return kTfLiteError; } } @@ -1333,9 +1352,8 @@ TfLiteStatus EluEval(TfLiteContext* context, TfLiteNode* node) { } // namespace activations TfLiteRegistration* Register_ELU() { - static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr, - activations::GenericPrepare, - activations::EluEval}; + static TfLiteRegistration r = {activations::Init, activations::Free, + activations::EluPrepare, activations::EluEval}; return &r; } diff --git a/tensorflow/lite/kernels/activations_test.cc b/tensorflow/lite/kernels/activations_test.cc index 50b1c041e34..d8f883b9c1d 100644 --- a/tensorflow/lite/kernels/activations_test.cc +++ b/tensorflow/lite/kernels/activations_test.cc @@ -253,6 +253,29 @@ TEST(FloatActivationsOpTest, Elu) { }))); } +TEST(QuantizedActivationsOpTest, EluInt8) { + const float kMin = -1; + const float kMax = 127.f / 128.f; + QuantizedActivationsOpModel model( + BuiltinOperator_ELU, + /*input=*/{TensorType_INT8, {1, 2, 4, 1}, 8 * kMin, 8 * kMax}, + /*output=*/{TensorType_INT8, {1, 2, 4, 1}, 8 * kMin, 8 * kMax}); + + model.SetInput({ + 0, -6, 2, -4, // + 3, -2, 6, -0.1, // + }); + + model.Invoke(); + EXPECT_THAT(model.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + { + 0, -1.0, 2.0, -1, // + 3.0, -0.875, 6.0, -0.125, // + }, + kQuantizedTolerance))); +} + TEST(FloatActivationsOpTest, Relu) { FloatActivationsOpModel m(BuiltinOperator_RELU, /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}}); From bc8bb3ba84087a86931f87226663eddb9fda7faf Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Mon, 20 Jul 2020 20:34:46 -0700 Subject: [PATCH 0899/2522] Add builtin BroadcastTo Op to TFLite Converter support will be added in a follow-up CL. PiperOrigin-RevId: 322281991 Change-Id: I9a96d0dff3a089a9b43b85c955cc416717e26aa9 --- RELEASE.md | 3 +- tensorflow/lite/builtin_ops.h | 1 + tensorflow/lite/c/common.c | 23 ++ tensorflow/lite/c/common.h | 3 + .../lite/core/api/flatbuffer_conversions.cc | 1 + tensorflow/lite/kernels/BUILD | 14 + tensorflow/lite/kernels/broadcast_to.cc | 136 ++++++++++ tensorflow/lite/kernels/broadcast_to_test.cc | 255 ++++++++++++++++++ tensorflow/lite/kernels/builtin_op_kernels.h | 1 + tensorflow/lite/kernels/internal/BUILD | 1 + tensorflow/lite/kernels/internal/common.h | 7 + .../kernels/internal/reference/broadcast_to.h | 90 +++++++ tensorflow/lite/kernels/register.cc | 1 + tensorflow/lite/kernels/register_ref.cc | 2 + tensorflow/lite/schema/schema.fbs | 9 +- tensorflow/lite/schema/schema_generated.h | 134 ++++++++- tensorflow/lite/toco/model.h | 1 + tensorflow/lite/toco/tflite/op_version.cc | 1 + .../benchmark/experimental/c/c_api_types.h | 3 + .../lite/tools/versioning/runtime_version.cc | 1 + .../tools/versioning/runtime_version_test.cc | 2 +- 21 files changed, 674 insertions(+), 15 deletions(-) create mode 100644 tensorflow/lite/kernels/broadcast_to.cc create mode 100644 tensorflow/lite/kernels/broadcast_to_test.cc create mode 100644 tensorflow/lite/kernels/internal/reference/broadcast_to.h diff --git a/RELEASE.md b/RELEASE.md index c4fa615cf4d..12b5168954b 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -54,7 +54,8 @@ * `tf.function`/AutoGraph: * * `tf.lite`: - * + * Better support for ops with high-dimensional broadcasting inputs by adding + `BroadcastTo` ops when necessary. * `tf.random`: * * Math and Linear Algebra: diff --git a/tensorflow/lite/builtin_ops.h b/tensorflow/lite/builtin_ops.h index 85140289ac1..c6440729738 100644 --- a/tensorflow/lite/builtin_ops.h +++ b/tensorflow/lite/builtin_ops.h @@ -153,6 +153,7 @@ typedef enum { kTfLiteBuiltinDensify = 124, kTfLiteBuiltinSegmentSum = 125, kTfLiteBuiltinBatchMatmul = 126, + kTfLiteBuiltinBroadcastTo = 127, } TfLiteBuiltinOperator; #ifdef __cplusplus diff --git a/tensorflow/lite/c/common.c b/tensorflow/lite/c/common.c index 0264f420b12..4bbbcbbf03c 100644 --- a/tensorflow/lite/c/common.c +++ b/tensorflow/lite/c/common.c @@ -219,6 +219,29 @@ const char* TfLiteTypeGetName(TfLiteType type) { return "Unknown type"; } +// Size of string is not constant, return 0 in such case. +int TfLiteTypeGetSize(TfLiteType type) { + switch (type) { + case kTfLiteUInt8: + case kTfLiteInt8: + return 1; + case kTfLiteBool: + return sizeof(bool); + case kTfLiteInt16: + case kTfLiteFloat16: + return 2; + case kTfLiteFloat32: + case kTfLiteInt32: + return 4; + case kTfLiteInt64: + case kTfLiteComplex64: + case kTfLiteFloat64: + return 8; + default: + return 0; + } +} + TfLiteDelegate TfLiteDelegateCreate() { TfLiteDelegate d = { .data_ = NULL, diff --git a/tensorflow/lite/c/common.h b/tensorflow/lite/c/common.h index 89b25892914..692a8eaf7a2 100644 --- a/tensorflow/lite/c/common.h +++ b/tensorflow/lite/c/common.h @@ -268,6 +268,9 @@ typedef enum { // Return the name of a given type, for error reporting purposes. const char* TfLiteTypeGetName(TfLiteType type); +// Return the size of given type in bytes. Return 0 in in case of string. +int TfLiteTypeGetSize(TfLiteType type); + // SupportedQuantizationTypes. typedef enum TfLiteQuantizationType { // No quantization. diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.cc b/tensorflow/lite/core/api/flatbuffer_conversions.cc index 0652c64f6c2..059ad97f551 100644 --- a/tensorflow/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/lite/core/api/flatbuffer_conversions.cc @@ -820,6 +820,7 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_SCATTER_ND: case BuiltinOperator_DENSIFY: case BuiltinOperator_SEGMENT_SUM: + case BuiltinOperator_BROADCAST_TO: return kTfLiteOk; } return kTfLiteError; diff --git a/tensorflow/lite/kernels/BUILD b/tensorflow/lite/kernels/BUILD index 4351a2c93a2..3157081dd21 100644 --- a/tensorflow/lite/kernels/BUILD +++ b/tensorflow/lite/kernels/BUILD @@ -491,6 +491,7 @@ BUILTIN_KERNEL_SRCS = [ "batch_to_space_nd.cc", "bidirectional_sequence_lstm.cc", "bidirectional_sequence_rnn.cc", + "broadcast_to.cc", "cast.cc", "ceil.cc", "comparisons.cc", @@ -984,6 +985,19 @@ cc_test( ], ) +cc_test( + name = "broadcast_to_test", + size = "small", + srcs = ["broadcast_to_test.cc"], + deps = [ + ":builtin_ops", + ":test_main", + ":test_util", + "//tensorflow/lite:framework", + "@com_google_googletest//:gtest", + ], +) + cc_test( name = "cast_test", size = "small", diff --git a/tensorflow/lite/kernels/broadcast_to.cc b/tensorflow/lite/kernels/broadcast_to.cc new file mode 100644 index 00000000000..0e7baca2277 --- /dev/null +++ b/tensorflow/lite/kernels/broadcast_to.cc @@ -0,0 +1,136 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/lite/kernels/internal/reference/broadcast_to.h" + +#include + +#include +#include + +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/tensor.h" +#include "tensorflow/lite/kernels/kernel_util.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace broadcastto { + +constexpr int kInputTensor = 0; +constexpr int kShapeTensor = 1; +constexpr int kOutputTensor = 0; +constexpr int kMaxDims = 8; + +struct BroadcastToContext { + BroadcastToContext(TfLiteContext* context, TfLiteNode* node) { + input = GetInput(context, node, kInputTensor); + shape = GetInput(context, node, kShapeTensor); + output = GetOutput(context, node, kOutputTensor); + } + const TfLiteTensor* input; + const TfLiteTensor* shape; + TfLiteTensor* output; +}; + +TfLiteStatus ResizeOutputTensor(TfLiteContext* context, + BroadcastToContext* op_context) { + // Ensures the shape is 1D tensor. + TF_LITE_ENSURE_EQ(context, NumDimensions(op_context->shape), 1); + + // Ensure output dims is not less than input dims. + int input_num_dims = NumDimensions(op_context->input); + int output_num_dims = SizeOfDimension(op_context->shape, 0); + TF_LITE_ENSURE_MSG(context, input_num_dims <= output_num_dims, + "Output shape must be broadcastable from input shape."); + TF_LITE_ENSURE_MSG(context, output_num_dims <= kMaxDims, + "BroadcastTo only supports 1-8D tensor."); + + // Check if output shape is broadcastable from input shape. + auto get_shape_data = [op_context](int i) -> int32_t { + if (op_context->shape->type == kTfLiteInt32) { + return GetTensorData(op_context->shape)[i]; + } else { + return GetTensorData(op_context->shape)[i]; + } + }; + + int extending_dims = output_num_dims - input_num_dims; + for (int idx = 0; idx < input_num_dims; ++idx) { + TF_LITE_ENSURE_MSG(context, + (SizeOfDimension(op_context->input, idx) == 1 || + SizeOfDimension(op_context->input, idx) == + get_shape_data(extending_dims + idx)), + "Output shape must be broadcastable from input shape."); + } + // Resizing the shape of the output tensor. + TfLiteIntArray* output_shape = TfLiteIntArrayCreate(output_num_dims); + std::unique_ptr + scoped_output_shape(output_shape, TfLiteIntArrayFree); + for (int idx = 0; idx < output_num_dims; ++idx) { + output_shape->data[idx] = get_shape_data(idx); + } + + return context->ResizeTensor(context, op_context->output, + scoped_output_shape.release()); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE(context, NumInputs(node) == 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TF_LITE_ENSURE_MSG(context, + (NumDimensions(GetInput(context, node, 0)) <= kMaxDims), + "BroadcastTo only supports 1-8D tensor."); + + BroadcastToContext op_context(context, node); + TF_LITE_ENSURE(context, op_context.shape->type == kTfLiteInt32 || + op_context.shape->type == kTfLiteInt64); + TF_LITE_ENSURE_EQ(context, op_context.input->type, op_context.output->type); + + // Not yet support string type due to the use of memcopy with fixed size. + TF_LITE_ENSURE(context, op_context.input->type != kTfLiteString); + + if (IsConstantTensor(op_context.shape)) { + return ResizeOutputTensor(context, &op_context); + } + + SetTensorToDynamic(op_context.output); + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + BroadcastToContext op_context(context, node); + if (IsDynamicTensor(op_context.output)) { + TF_LITE_ENSURE_OK(context, ResizeOutputTensor(context, &op_context)); + } + + // BroadcastTo op support upto 8 dims, matching the support of Tensorflow. + reference_ops::BroadcastTo( + GetTensorShape(op_context.input), op_context.input->data.raw, + GetTensorShape(op_context.output), op_context.output->data.raw, + op_context.input->type); + return kTfLiteOk; +} + +} // namespace broadcastto + +TfLiteRegistration* Register_BROADCAST_TO() { + static TfLiteRegistration r = {nullptr, nullptr, broadcastto::Prepare, + broadcastto::Eval}; + return &r; +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/lite/kernels/broadcast_to_test.cc b/tensorflow/lite/kernels/broadcast_to_test.cc new file mode 100644 index 00000000000..a36ed352055 --- /dev/null +++ b/tensorflow/lite/kernels/broadcast_to_test.cc @@ -0,0 +1,255 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include + +#include +#include "tensorflow/lite/interpreter.h" +#include "tensorflow/lite/kernels/register.h" +#include "tensorflow/lite/kernels/test_util.h" +#include "tensorflow/lite/model.h" + +namespace tflite { +namespace { +using ::testing::ElementsAreArray; + +template +class BroadcastToOpModel : public SingleOpModel { + public: + // BroadcastTo with dynamic shape. + BroadcastToOpModel(std::initializer_list input_shape, + std::initializer_list shape_shape) { + input_ = AddInput({GetTensorType(), input_shape}); + shape_ = AddInput({GetTensorType(), shape_shape}); + output_ = AddOutput(GetTensorType()); + SetBuiltinOp(BuiltinOperator_BROADCAST_TO, + BuiltinOptions_BroadcastToOptions, + CreateBroadcastToOptions(builder_).Union()); + BuildInterpreter({input_shape, shape_shape}); + } + + // BroadcastTo with const shape. + BroadcastToOpModel(std::initializer_list input_shape, + std::initializer_list shape_shape, + std::initializer_list shape_values) { + input_ = AddInput({GetTensorType(), input_shape}); + shape_ = + AddConstInput(GetTensorType(), shape_values, shape_shape); + output_ = AddOutput(GetTensorType()); + SetBuiltinOp(BuiltinOperator_BROADCAST_TO, + BuiltinOptions_BroadcastToOptions, + CreateBroadcastToOptions(builder_).Union()); + BuildInterpreter({input_shape, shape_shape}); + } + + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); + } + + void SetShape(std::initializer_list data) { + PopulateTensor(shape_, data); + } + + std::vector GetOutput() { + return ExtractVector(output_); + } + std::vector GetOutputShape() { return GetTensorShape(output_); } + + protected: + int input_; + int shape_; + int output_; +}; + +template +class BroadcastToOpTest : public ::testing::Test {}; + +using DataTypes = ::testing::Types; +TYPED_TEST_SUITE(BroadcastToOpTest, DataTypes); + +#ifdef GTEST_HAS_DEATH_TEST +TYPED_TEST(BroadcastToOpTest, ShapeMustBe1D) { + EXPECT_DEATH( + BroadcastToOpModel({2, 3, 4, 4}, {2, 2}, {2, 3, 4, 4}), ""); + // Non-constant Shape tensor. + BroadcastToOpModel m({2, 3, 4, 4}, {2, 2}); + m.SetShape({2, 3, 4, 4}); + EXPECT_THAT(m.InvokeUnchecked(), kTfLiteError); +} + +TYPED_TEST(BroadcastToOpTest, TooManyDimensions) { + EXPECT_DEATH(BroadcastToOpModel({1, 2, 3, 4, 5, 6, 7, 8, 9}, {9}, + {2, 2, 3, 4, 5, 6, 7, 8, 9}), + "BroadcastTo only supports 1-8D tensor."); + EXPECT_DEATH(BroadcastToOpModel({1, 2, 3, 4, 5, 6, 7, 8, 9}, {9}), + "BroadcastTo only supports 1-8D tensor."); +} + +TYPED_TEST(BroadcastToOpTest, MismatchDimension) { + EXPECT_DEATH(BroadcastToOpModel({2, 4, 1, 2}, {4}, {2, 4, 1, 3}), + "Output shape must be broadcastable from input shape."); + EXPECT_DEATH( + BroadcastToOpModel({2, 4, 1, 2, 3}, {4}, {2, 4, 1, 2}), + "Output shape must be broadcastable from input shape."); + + // Non-constant Shape tensor. + BroadcastToOpModel m1({2, 4, 1, 2}, {4}); + m1.SetShape({2, 3, 4, 4}); + EXPECT_THAT(m1.InvokeUnchecked(), kTfLiteError); + BroadcastToOpModel m2({2, 4, 1, 2}, {5}); + m2.SetShape({1, 2, 3, 4, 4}); + EXPECT_THAT(m2.InvokeUnchecked(), kTfLiteError); +} +#endif + +TYPED_TEST(BroadcastToOpTest, BroadcastTo1DConstTest) { + BroadcastToOpModel m({1}, {1}, {4}); + m.SetInput({3}); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({4})); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 3, 3, 3})); +} + +TYPED_TEST(BroadcastToOpTest, BroadcastTo4DConstTest) { + BroadcastToOpModel m({1, 1, 1, 2}, {4}, {1, 1, 2, 2}); + m.SetInput({3, 4}); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 2, 2})); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 4, 3, 4})); +} + +TYPED_TEST(BroadcastToOpTest, BroadcastTo8DConstTest) { + BroadcastToOpModel m({1, 1, 1, 1, 1, 1, 2, 1}, {8}, + {1, 1, 1, 1, 1, 1, 2, 2}); + m.SetInput({3, 4}); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 1, 1, 1, 1, 2, 2})); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 3, 4, 4})); +} + +TYPED_TEST(BroadcastToOpTest, BroadcastTo1DDynamicTest) { + BroadcastToOpModel m({1}, {1}); + m.SetInput({3}); + m.SetShape({4}); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({4})); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 3, 3, 3})); +} + +TYPED_TEST(BroadcastToOpTest, BroadcastTo4DDynamicTest) { + BroadcastToOpModel m({1, 1, 1, 2}, {4}); + m.SetInput({3, 4}); + m.SetShape({1, 1, 2, 2}); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 2, 2})); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 4, 3, 4})); +} + +TYPED_TEST(BroadcastToOpTest, BroadcastTo8DDynamicTest) { + BroadcastToOpModel m({1, 1, 1, 1, 1, 1, 2, 1}, {8}); + m.SetInput({3, 4}); + m.SetShape({1, 1, 1, 1, 1, 1, 2, 2}); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 1, 1, 1, 1, 2, 2})); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 3, 4, 4})); +} + +TYPED_TEST(BroadcastToOpTest, ComplexBroadcast4DConstTest) { + BroadcastToOpModel m({1, 3, 1, 2}, {4}, {3, 3, 2, 2}); + m.SetInput({1, 2, 3, 4, 5, 6}); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 3, 2, 2})); + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray({1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6, 1, 2, 1, 2, 3, 4, + 3, 4, 5, 6, 5, 6, 1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6})); +} + +TYPED_TEST(BroadcastToOpTest, ComplexBroadcast4DDynamicTest) { + BroadcastToOpModel m({1, 3, 1, 2}, {4}); + m.SetInput({1, 2, 3, 4, 5, 6}); + m.SetShape({3, 3, 2, 2}); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 3, 2, 2})); + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray({1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6, 1, 2, 1, 2, 3, 4, + 3, 4, 5, 6, 5, 6, 1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6})); +} + +TYPED_TEST(BroadcastToOpTest, ComplexBroadcast6DConstTest) { + BroadcastToOpModel m({1, 2, 1, 3, 1, 2}, {6}, {2, 2, 1, 3, 2, 2}); + m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 2, 1, 3, 2, 2})); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray({1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6, + 7, 8, 7, 8, 9, 10, 9, 10, 11, 12, 11, 12, + 1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6, + 7, 8, 7, 8, 9, 10, 9, 10, 11, 12, 11, 12})); +} + +TYPED_TEST(BroadcastToOpTest, ComplexBroadcast6DDynamicTest) { + BroadcastToOpModel m({1, 2, 1, 3, 1, 2}, {6}); + m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + m.SetShape({2, 2, 1, 3, 2, 2}); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 2, 1, 3, 2, 2})); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray({1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6, + 7, 8, 7, 8, 9, 10, 9, 10, 11, 12, 11, 12, + 1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6, + 7, 8, 7, 8, 9, 10, 9, 10, 11, 12, 11, 12})); +} + +TYPED_TEST(BroadcastToOpTest, ExtendingShape4DConstTest) { + BroadcastToOpModel m({3, 1, 2}, {4}, {3, 3, 2, 2}); + m.SetInput({1, 2, 3, 4, 5, 6}); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 3, 2, 2})); + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray({1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6, 1, 2, 1, 2, 3, 4, + 3, 4, 5, 6, 5, 6, 1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6})); +} + +TYPED_TEST(BroadcastToOpTest, NoBroadcastingConstTest) { + BroadcastToOpModel m({3, 1, 2}, {3}, {3, 1, 2}); + m.SetInput({1, 2, 3, 4, 5, 6}); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1, 2})); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 2, 3, 4, 5, 6})); +} + +TYPED_TEST(BroadcastToOpTest, Int64ShapeConstTest) { + BroadcastToOpModel m({1, 1, 1, 1, 1, 1, 2, 1}, {8}, + {1, 1, 1, 1, 1, 1, 2, 2}); + m.SetInput({3, 4}); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 1, 1, 1, 1, 2, 2})); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 3, 4, 4})); +} + +TYPED_TEST(BroadcastToOpTest, Int64ShapeDDynamicTest) { + BroadcastToOpModel m({1, 1, 1, 1, 1, 1, 2, 1}, {8}); + m.SetInput({3, 4}); + m.SetShape({1, 1, 1, 1, 1, 1, 2, 2}); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 1, 1, 1, 1, 2, 2})); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 3, 4, 4})); +} + +} // namespace +} // namespace tflite diff --git a/tensorflow/lite/kernels/builtin_op_kernels.h b/tensorflow/lite/kernels/builtin_op_kernels.h index 1c73f06487b..fea25f8605c 100644 --- a/tensorflow/lite/kernels/builtin_op_kernels.h +++ b/tensorflow/lite/kernels/builtin_op_kernels.h @@ -39,6 +39,7 @@ TfLiteRegistration* Register_BATCH_TO_SPACE_ND(); TfLiteRegistration* Register_BATCH_MATMUL(); TfLiteRegistration* Register_BIDIRECTIONAL_SEQUENCE_LSTM(); TfLiteRegistration* Register_BIDIRECTIONAL_SEQUENCE_RNN(); +TfLiteRegistration* Register_BROADCAST_TO(); TfLiteRegistration* Register_CAST(); TfLiteRegistration* Register_CEIL(); TfLiteRegistration* Register_CONCATENATION(); diff --git a/tensorflow/lite/kernels/internal/BUILD b/tensorflow/lite/kernels/internal/BUILD index 5acabeb45cd..075c1da9865 100644 --- a/tensorflow/lite/kernels/internal/BUILD +++ b/tensorflow/lite/kernels/internal/BUILD @@ -441,6 +441,7 @@ cc_library( "reference/arg_min_max.h", "reference/batch_matmul.h", "reference/binary_function.h", + "reference/broadcast_to.h", "reference/ceil.h", "reference/comparisons.h", "reference/concatenation.h", diff --git a/tensorflow/lite/kernels/internal/common.h b/tensorflow/lite/kernels/internal/common.h index c45aff9e47b..10cb164e696 100644 --- a/tensorflow/lite/kernels/internal/common.h +++ b/tensorflow/lite/kernels/internal/common.h @@ -665,6 +665,13 @@ inline int SubscriptToIndex(const NdArrayDesc<5>& desc, int indexes[5]) { indexes[4] * desc.strides[4]; } +inline int SubscriptToIndex(const NdArrayDesc<8>& desc, int indexes[8]) { + return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] + + indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] + + indexes[4] * desc.strides[4] + indexes[5] * desc.strides[5] + + indexes[6] * desc.strides[6] + indexes[7] * desc.strides[7]; +} + // Given the dimensions of the operands for an element-wise binary broadcast, // adjusts them so that they can be directly iterated over with simple loops. // Returns the adjusted dims as instances of NdArrayDesc in 'desc0_out' and diff --git a/tensorflow/lite/kernels/internal/reference/broadcast_to.h b/tensorflow/lite/kernels/internal/reference/broadcast_to.h new file mode 100644 index 00000000000..69f4531ba14 --- /dev/null +++ b/tensorflow/lite/kernels/internal/reference/broadcast_to.h @@ -0,0 +1,90 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_ + +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/common.h" + +namespace tflite { +namespace reference_ops { +template +void BroadcastImpl(const NdArrayDesc& input_desc, const char* input_data, + const NdArrayDesc& output_desc, char* output_data, + int indexes[N], int dim, const int last_broadcasting_dim, + const int type_size) { + // Copy data from input to output. + if (dim == last_broadcasting_dim) { + int copy_size = output_desc.strides[dim] * type_size; + const char* data_src = + input_data + SubscriptToIndex(input_desc, indexes) * type_size; + char* data_dst = + output_data + SubscriptToIndex(output_desc, indexes) * type_size; + for (int i = 0; i < output_desc.extents[dim]; ++i, data_dst += copy_size) { + memcpy(data_dst, data_src, copy_size); + } + return; + } + + // Recursive call to find the next broadcasting. + for (indexes[dim] = 0; indexes[dim] < input_desc.extents[dim]; + ++indexes[dim]) { + BroadcastImpl(input_desc, input_data, output_desc, output_data, indexes, + dim + 1, last_broadcasting_dim, type_size); + } + + // Duplicate data in output tensor. + indexes[dim] = 0; + if (input_desc.extents[dim] != output_desc.extents[dim]) { + int copy_size = output_desc.strides[dim] * type_size; + char* data_src = + output_data + SubscriptToIndex(output_desc, indexes) * type_size; + char* data_dst = data_src + copy_size; + for (int i = 1; i < output_desc.extents[dim]; ++i, data_dst += copy_size) { + memcpy(data_dst, data_src, copy_size); + } + } +} + +template +inline void BroadcastTo(const RuntimeShape& unextended_input_shape, + const char* input_data, + const RuntimeShape& unextended_output_shape, + char* output_data, TfLiteType data_type) { + NdArrayDesc input_desc; + NdArrayDesc output_desc; + CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_input_shape), + &input_desc); + CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape), + &output_desc); + + // Get the last dimension has broadcasting. At this dimension, the data is + // copied from input tensor to output tensor. + int last_broadcast_dim = 0; + for (int i = N - 1; i > 0; --i) { + if (input_desc.extents[i] != output_desc.extents[i]) { + last_broadcast_dim = i; + break; + } + } + + // Broadcasting using memcpy. + int indexes[N] = {0}; + BroadcastImpl(input_desc, input_data, output_desc, output_data, indexes, 0, + last_broadcast_dim, TfLiteTypeGetSize(data_type)); +} +} // namespace reference_ops +} // namespace tflite +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_ diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc index 275340ec225..da7480d448d 100644 --- a/tensorflow/lite/kernels/register.cc +++ b/tensorflow/lite/kernels/register.cc @@ -292,6 +292,7 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_BATCH_MATMUL, Register_BATCH_MATMUL(), /* min_version = */ 1, /* max_version = */ 2); + AddBuiltin(BuiltinOperator_BROADCAST_TO, Register_BROADCAST_TO()); AddCustom("NumericVerify", tflite::ops::custom::Register_NUMERIC_VERIFY()); // TODO(andrewharp, ahentz): Move these somewhere more appropriate so that // custom ops aren't always included by default. diff --git a/tensorflow/lite/kernels/register_ref.cc b/tensorflow/lite/kernels/register_ref.cc index 233520e2165..b1dd31ab168 100644 --- a/tensorflow/lite/kernels/register_ref.cc +++ b/tensorflow/lite/kernels/register_ref.cc @@ -139,6 +139,7 @@ TfLiteRegistration* Register_DEPTH_TO_SPACE_REF(); TfLiteRegistration* Register_SELECT_V2(); TfLiteRegistration* Register_SEGMENT_SUM(); TfLiteRegistration* Register_BATCH_MATMUL_REF(); +TfLiteRegistration* Register_BROADCAST_TO(); namespace { @@ -207,6 +208,7 @@ BuiltinRefOpResolver::BuiltinRefOpResolver() { Register_SPACE_TO_BATCH_ND_REF()); AddBuiltin(BuiltinOperator_BATCH_TO_SPACE_ND, Register_BATCH_TO_SPACE_ND_REF()); + AddBuiltin(BuiltinOperator_BROADCAST_TO, Register_BROADCAST_TO()); AddBuiltin(BuiltinOperator_MUL, Register_MUL_REF()); AddBuiltin(BuiltinOperator_L2_NORMALIZATION, Register_L2NORM_REF()); AddBuiltin(BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, diff --git a/tensorflow/lite/schema/schema.fbs b/tensorflow/lite/schema/schema.fbs index 878acde1e16..949d769b457 100644 --- a/tensorflow/lite/schema/schema.fbs +++ b/tensorflow/lite/schema/schema.fbs @@ -349,7 +349,8 @@ enum BuiltinOperator : byte { SELECT_V2 = 123, DENSIFY = 124, SEGMENT_SUM = 125, - BATCH_MATMUL = 126 + BATCH_MATMUL = 126, + BROADCAST_TO = 127 } @@ -455,7 +456,8 @@ union BuiltinOptions { SelectV2Options, DensifyOptions, SegmentSumOptions, - BatchMatMulOptions + BatchMatMulOptions, + BroadcastToOptions } enum Padding : byte { SAME, VALID } @@ -975,6 +977,9 @@ table BatchMatMulOptions { adj_y:bool; } +table BroadcastToOptions { +} + // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a // builtin, or a string if the operator is custom. table OperatorCode { diff --git a/tensorflow/lite/schema/schema_generated.h b/tensorflow/lite/schema/schema_generated.h index a6117dc72ab..7bf79e52e27 100755 --- a/tensorflow/lite/schema/schema_generated.h +++ b/tensorflow/lite/schema/schema_generated.h @@ -349,6 +349,9 @@ struct SegmentSumOptionsT; struct BatchMatMulOptions; struct BatchMatMulOptionsT; +struct BroadcastToOptions; +struct BroadcastToOptionsT; + struct OperatorCode; struct OperatorCodeT; @@ -781,11 +784,12 @@ enum BuiltinOperator { BuiltinOperator_DENSIFY = 124, BuiltinOperator_SEGMENT_SUM = 125, BuiltinOperator_BATCH_MATMUL = 126, + BuiltinOperator_BROADCAST_TO = 127, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_BATCH_MATMUL + BuiltinOperator_MAX = BuiltinOperator_BROADCAST_TO }; -inline const BuiltinOperator (&EnumValuesBuiltinOperator())[127] { +inline const BuiltinOperator (&EnumValuesBuiltinOperator())[128] { static const BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -913,13 +917,14 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[127] { BuiltinOperator_SELECT_V2, BuiltinOperator_DENSIFY, BuiltinOperator_SEGMENT_SUM, - BuiltinOperator_BATCH_MATMUL + BuiltinOperator_BATCH_MATMUL, + BuiltinOperator_BROADCAST_TO }; return values; } inline const char * const *EnumNamesBuiltinOperator() { - static const char * const names[128] = { + static const char * const names[129] = { "ADD", "AVERAGE_POOL_2D", "CONCATENATION", @@ -1047,13 +1052,14 @@ inline const char * const *EnumNamesBuiltinOperator() { "DENSIFY", "SEGMENT_SUM", "BATCH_MATMUL", + "BROADCAST_TO", nullptr }; return names; } inline const char *EnumNameBuiltinOperator(BuiltinOperator e) { - if (flatbuffers::IsOutRange(e, BuiltinOperator_ADD, BuiltinOperator_BATCH_MATMUL)) return ""; + if (flatbuffers::IsOutRange(e, BuiltinOperator_ADD, BuiltinOperator_BROADCAST_TO)) return ""; const size_t index = static_cast(e); return EnumNamesBuiltinOperator()[index]; } @@ -1161,11 +1167,12 @@ enum BuiltinOptions { BuiltinOptions_DensifyOptions = 99, BuiltinOptions_SegmentSumOptions = 100, BuiltinOptions_BatchMatMulOptions = 101, + BuiltinOptions_BroadcastToOptions = 102, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_BatchMatMulOptions + BuiltinOptions_MAX = BuiltinOptions_BroadcastToOptions }; -inline const BuiltinOptions (&EnumValuesBuiltinOptions())[102] { +inline const BuiltinOptions (&EnumValuesBuiltinOptions())[103] { static const BuiltinOptions values[] = { BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -1268,13 +1275,14 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[102] { BuiltinOptions_SelectV2Options, BuiltinOptions_DensifyOptions, BuiltinOptions_SegmentSumOptions, - BuiltinOptions_BatchMatMulOptions + BuiltinOptions_BatchMatMulOptions, + BuiltinOptions_BroadcastToOptions }; return values; } inline const char * const *EnumNamesBuiltinOptions() { - static const char * const names[103] = { + static const char * const names[104] = { "NONE", "Conv2DOptions", "DepthwiseConv2DOptions", @@ -1377,13 +1385,14 @@ inline const char * const *EnumNamesBuiltinOptions() { "DensifyOptions", "SegmentSumOptions", "BatchMatMulOptions", + "BroadcastToOptions", nullptr }; return names; } inline const char *EnumNameBuiltinOptions(BuiltinOptions e) { - if (flatbuffers::IsOutRange(e, BuiltinOptions_NONE, BuiltinOptions_BatchMatMulOptions)) return ""; + if (flatbuffers::IsOutRange(e, BuiltinOptions_NONE, BuiltinOptions_BroadcastToOptions)) return ""; const size_t index = static_cast(e); return EnumNamesBuiltinOptions()[index]; } @@ -1796,6 +1805,10 @@ template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_BatchMatMulOptions; }; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_BroadcastToOptions; +}; + struct BuiltinOptionsUnion { BuiltinOptions type; void *value; @@ -2636,6 +2649,14 @@ struct BuiltinOptionsUnion { return type == BuiltinOptions_BatchMatMulOptions ? reinterpret_cast(value) : nullptr; } + tflite::BroadcastToOptionsT *AsBroadcastToOptions() { + return type == BuiltinOptions_BroadcastToOptions ? + reinterpret_cast(value) : nullptr; + } + const tflite::BroadcastToOptionsT *AsBroadcastToOptions() const { + return type == BuiltinOptions_BroadcastToOptions ? + reinterpret_cast(value) : nullptr; + } }; bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); @@ -9310,6 +9331,46 @@ inline flatbuffers::Offset CreateBatchMatMulOptions( flatbuffers::Offset CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const BatchMatMulOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct BroadcastToOptionsT : public flatbuffers::NativeTable { + typedef BroadcastToOptions TableType; + BroadcastToOptionsT() { + } +}; + +struct BroadcastToOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef BroadcastToOptionsT NativeTableType; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + BroadcastToOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(BroadcastToOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct BroadcastToOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit BroadcastToOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + BroadcastToOptionsBuilder &operator=(const BroadcastToOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateBroadcastToOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + BroadcastToOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateBroadcastToOptions(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct OperatorCodeT : public flatbuffers::NativeTable { typedef OperatorCode TableType; tflite::BuiltinOperator builtin_code; @@ -9749,6 +9810,9 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const tflite::BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const { return builtin_options_type() == tflite::BuiltinOptions_BatchMatMulOptions ? static_cast(builtin_options()) : nullptr; } + const tflite::BroadcastToOptions *builtin_options_as_BroadcastToOptions() const { + return builtin_options_type() == tflite::BuiltinOptions_BroadcastToOptions ? static_cast(builtin_options()) : nullptr; + } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); } @@ -10189,6 +10253,10 @@ template<> inline const tflite::BatchMatMulOptions *Operator::builtin_options_as return builtin_options_as_BatchMatMulOptions(); } +template<> inline const tflite::BroadcastToOptions *Operator::builtin_options_as() const { + return builtin_options_as_BroadcastToOptions(); +} + struct OperatorBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; @@ -13656,6 +13724,29 @@ inline flatbuffers::Offset CreateBatchMatMulOptions(flatbuff _adj_y); } +inline BroadcastToOptionsT *BroadcastToOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new BroadcastToOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void BroadcastToOptions::UnPackTo(BroadcastToOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset BroadcastToOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateBroadcastToOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateBroadcastToOptions(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BroadcastToOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateBroadcastToOptions( + _fbb); +} + inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new OperatorCodeT(); UnPackTo(_o, _resolver); @@ -14465,6 +14556,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } + case BuiltinOptions_BroadcastToOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } default: return true; } } @@ -14887,6 +14982,10 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } + case BuiltinOptions_BroadcastToOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } default: return nullptr; } } @@ -15297,6 +15396,10 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff auto ptr = reinterpret_cast(value); return CreateBatchMatMulOptions(_fbb, ptr, _rehasher).Union(); } + case BuiltinOptions_BroadcastToOptions: { + auto ptr = reinterpret_cast(value); + return CreateBroadcastToOptions(_fbb, ptr, _rehasher).Union(); + } default: return 0; } } @@ -15707,6 +15810,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL value = new tflite::BatchMatMulOptionsT(*reinterpret_cast(u.value)); break; } + case BuiltinOptions_BroadcastToOptions: { + value = new tflite::BroadcastToOptionsT(*reinterpret_cast(u.value)); + break; + } default: break; } @@ -16219,6 +16326,11 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } + case BuiltinOptions_BroadcastToOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } default: break; } value = nullptr; @@ -16282,4 +16394,4 @@ inline std::unique_ptr UnPackSizePrefixedModel( } // namespace tflite -#endif // FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_ +#endif // FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_ \ No newline at end of file diff --git a/tensorflow/lite/toco/model.h b/tensorflow/lite/toco/model.h index b42fed6fbc1..2478ca6f4a3 100644 --- a/tensorflow/lite/toco/model.h +++ b/tensorflow/lite/toco/model.h @@ -43,6 +43,7 @@ enum class OperatorType : uint8 { kAveragePool, kBatchMatMul, kBatchNormalization, + kBroadcastTo, kCeil, kConv, kConcatenation, diff --git a/tensorflow/lite/toco/tflite/op_version.cc b/tensorflow/lite/toco/tflite/op_version.cc index b16f282bedd..3793bb50c9f 100644 --- a/tensorflow/lite/toco/tflite/op_version.cc +++ b/tensorflow/lite/toco/tflite/op_version.cc @@ -63,6 +63,7 @@ std::string GetMinimumRuntimeVersionForModel(const Model& model) { {{OperatorType::kBatchToSpaceND, 1}, "1.6.0"}, {{OperatorType::kBatchToSpaceND, 2}, "1.14.0"}, {{OperatorType::kBatchMatMul, 1}, kPendingReleaseOpVersion}, + {{OperatorType::kBroadcastTo, 1}, kPendingReleaseOpVersion}, {{OperatorType::kCast, 1}, "1.5.0"}, {{OperatorType::kConcatenation, 1}, "1.5.0"}, {{OperatorType::kConcatenation, 2}, "1.14.0"}, diff --git a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h index 89b25892914..692a8eaf7a2 100644 --- a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h +++ b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h @@ -268,6 +268,9 @@ typedef enum { // Return the name of a given type, for error reporting purposes. const char* TfLiteTypeGetName(TfLiteType type); +// Return the size of given type in bytes. Return 0 in in case of string. +int TfLiteTypeGetSize(TfLiteType type); + // SupportedQuantizationTypes. typedef enum TfLiteQuantizationType { // No quantization. diff --git a/tensorflow/lite/tools/versioning/runtime_version.cc b/tensorflow/lite/tools/versioning/runtime_version.cc index c2e3f279a90..8938c0b5d4b 100644 --- a/tensorflow/lite/tools/versioning/runtime_version.cc +++ b/tensorflow/lite/tools/versioning/runtime_version.cc @@ -59,6 +59,7 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code, {{BuiltinOperator_AVERAGE_POOL_2D, 3}, "2.3.0"}, {{BuiltinOperator_BATCH_MATMUL, 1}, "2.3.0"}, {{BuiltinOperator_BATCH_MATMUL, 2}, "2.3.0"}, + {{BuiltinOperator_BROADCAST_TO, 1}, kPendingReleaseVersion}, {{BuiltinOperator_CONV_2D, 1}, "1.5.0"}, {{BuiltinOperator_CONV_2D, 2}, "1.14.0"}, {{BuiltinOperator_CONV_2D, 3}, "1.14.0"}, diff --git a/tensorflow/lite/tools/versioning/runtime_version_test.cc b/tensorflow/lite/tools/versioning/runtime_version_test.cc index c32de228cc3..df1ca46410c 100644 --- a/tensorflow/lite/tools/versioning/runtime_version_test.cc +++ b/tensorflow/lite/tools/versioning/runtime_version_test.cc @@ -47,7 +47,7 @@ TEST(OpVersionTest, OpversionMissing) { EXPECT_NE(runtime_version, "") << "Please add the version " << version << " of " << tflite::EnumNamesBuiltinOperator()[op_code] - << " runtime_version.cc"; + << " to runtime_version.cc"; } } } From 4c0a09fc302e193df54f127ca59f465e4966b8db Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Mon, 20 Jul 2020 21:20:33 -0700 Subject: [PATCH 0900/2522] fixit for resource_scatter_update. PiperOrigin-RevId: 322286887 Change-Id: I9c2293d00c371b9cab279366bc893e509e1ded3b --- tensorflow/python/keras/optimizer_v2/optimizer_v2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py index fb149999141..4e1aba1f3b4 100644 --- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py +++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py @@ -46,7 +46,6 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_resource_variable_ops from tensorflow.python.ops import gradients from tensorflow.python.ops import math_ops -from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables as tf_variables from tensorflow.python.platform import tf_logging as logging from tensorflow.python.saved_model import revived_types @@ -1159,7 +1158,8 @@ class OptimizerV2(trackable.Trackable): def _resource_scatter_update(self, x, i, v): with ops.control_dependencies( - [resource_variable_ops.resource_scatter_update(x.handle, i, v)]): + [gen_resource_variable_ops.ResourceScatterUpdate( + resource=x.handle, indices=i, updates=v)]): return x.value() @property From 00efa42e11c4b680bd2ea4baf0b07f2cf71c8d31 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Jul 2020 21:23:44 -0700 Subject: [PATCH 0901/2522] Internal change PiperOrigin-RevId: 322287162 Change-Id: Ie532abc0d8add6f28ce0d8646243bd120528daff --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 20bd810d1a3..a515b3e8939 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -711,8 +711,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "78f543e5a1cd46b5232d5479dd513d2110f52e96" - LLVM_SHA256 = "9adea3b2b150d5a56ff87f630faa832b5736da0ba4be979814bdfc7ffe782dec" + LLVM_COMMIT = "fc24d1eaddd8c0618e3ef3ab395029a0238d4568" + LLVM_SHA256 = "da5ad5dcf9d5360bc5c3715419bf0e747dba37d61725f85765a6e24844354212" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From f3d0c101095784be8d397b0dbff378319aad6b15 Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Mon, 20 Jul 2020 21:34:56 -0700 Subject: [PATCH 0902/2522] Enable eager test for GradientDescentOptimizerTest --- .../optimizer_v2/gradient_descent_test.py | 159 +++++++++--------- 1 file changed, 79 insertions(+), 80 deletions(-) diff --git a/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py b/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py index 0084f04bdd9..1f5a1004b48 100644 --- a/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py +++ b/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py @@ -149,29 +149,28 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): self.evaluate(var0)) self.assertAllCloseAccordingToType([3.0 - 1.0], self.evaluate(var1)) + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testMinimizeSparseResourceVariable(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with ops.Graph().as_default(): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - var0 = variables.Variable([[1.0, 2.0]], dtype=dtype) - var1 = variables.Variable([3.0], dtype=dtype) - x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + var0 = variables.Variable([[1.0, 2.0]], dtype=dtype) + var1 = variables.Variable([3.0], dtype=dtype) + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) - def loss(): - pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) # pylint: disable=cell-var-from-loop - pred += var1 # pylint: disable=cell-var-from-loop - return pred * pred + def loss(): + pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) # pylint: disable=cell-var-from-loop + pred += var1 # pylint: disable=cell-var-from-loop + return pred * pred - sgd_op = gradient_descent.SGD(1.0).minimize(loss, [var0, var1]) - self.evaluate(variables.global_variables_initializer()) - # Run 1 step of sgd - self.evaluate(sgd_op) - # Validate updated params - np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0 - np_grad = 2 * np_pred - self.assertAllCloseAccordingToType( - [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]], self.evaluate(var0)) - self.assertAllCloseAccordingToType([3.0 - np_grad], self.evaluate(var1)) + sgd_op = gradient_descent.SGD(1.0).minimize(loss, [var0, var1]) + self.evaluate(variables.global_variables_initializer()) + # Run 1 step of sgd + self.evaluate(sgd_op) + # Validate updated params + np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0 + np_grad = 2 * np_pred + self.assertAllCloseAccordingToType( + [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]], self.evaluate(var0)) + self.assertAllCloseAccordingToType([3.0 - np_grad], self.evaluate(var1)) def testTensorLearningRate(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: @@ -191,72 +190,72 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], self.evaluate(var1)) + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testGradWrtRef(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with ops.Graph().as_default(): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - opt = gradient_descent.SGD(3.0) - values = [1.0, 3.0] - vars_ = [variables.Variable([v], dtype=dtype) for v in values] - loss = lambda: vars_[0] + vars_[1] # pylint: disable=cell-var-from-loop - grads_and_vars = opt._compute_gradients(loss, vars_) - self.evaluate(variables.global_variables_initializer()) - for grad, _ in grads_and_vars: - self.assertAllCloseAccordingToType([1.0], self.evaluate(grad)) + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + opt = gradient_descent.SGD(3.0) + values = [1.0, 3.0] + vars_ = [variables.Variable([v], dtype=dtype) for v in values] + loss = lambda: vars_[0] + vars_[1] # pylint: disable=cell-var-from-loop + grads_and_vars = opt._compute_gradients(loss, vars_) + self.evaluate(variables.global_variables_initializer()) + for grad, _ in grads_and_vars: + self.assertAllCloseAccordingToType([1.0], self.evaluate(grad)) + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testSparseBasic(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with ops.Graph().as_default(): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - var0 = variables.Variable([[1.0], [2.0]], dtype=dtype) - var1 = variables.Variable([[3.0], [4.0]], dtype=dtype) - grads0 = ops.IndexedSlices( - constant_op.constant([0.1], shape=[1, 1], dtype=dtype), - constant_op.constant([0]), constant_op.constant([2, 1])) - grads1 = ops.IndexedSlices( - constant_op.constant([0.01], shape=[1, 1], dtype=dtype), - constant_op.constant([1]), constant_op.constant([2, 1])) - sgd_op = gradient_descent.SGD(3.0).apply_gradients( - zip([grads0, grads1], [var0, var1])) - self.evaluate(variables.global_variables_initializer()) - # Run 1 step of sgd - self.evaluate(sgd_op) - # Validate updated params - self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]], - self.evaluate(var0)) - self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]], - self.evaluate(var1)) + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + var0 = variables.Variable([[1.0], [2.0]], dtype=dtype) + var1 = variables.Variable([[3.0], [4.0]], dtype=dtype) + grads0 = ops.IndexedSlices( + constant_op.constant([0.1], shape=[1, 1], dtype=dtype), + constant_op.constant([0]), constant_op.constant([2, 1])) + grads1 = ops.IndexedSlices( + constant_op.constant([0.01], shape=[1, 1], dtype=dtype), + constant_op.constant([1]), constant_op.constant([2, 1])) + sgd_op = gradient_descent.SGD(3.0).apply_gradients( + zip([grads0, grads1], [var0, var1])) + self.evaluate(variables.global_variables_initializer()) + # Run 1 step of sgd + self.evaluate(sgd_op) + # Validate updated params + self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]], + self.evaluate(var0)) + self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]], + self.evaluate(var1)) + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testSparseBasicWithLearningRateDecay(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with ops.Graph().as_default(): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - var0 = variables.Variable([[1.0], [2.0]], dtype=dtype) - var1 = variables.Variable([[3.0], [4.0]], dtype=dtype) - grads0 = ops.IndexedSlices( - constant_op.constant([0.1], shape=[1, 1], dtype=dtype), - constant_op.constant([0]), constant_op.constant([2, 1])) - grads1 = ops.IndexedSlices( - constant_op.constant([0.01], shape=[1, 1], dtype=dtype), - constant_op.constant([1]), constant_op.constant([2, 1])) - sgd_op = gradient_descent.SGD( - 3.0, decay=0.5).apply_gradients( - zip([grads0, grads1], [var0, var1])) - self.evaluate(variables.global_variables_initializer()) - # Run 2 steps of sgd - self.evaluate(sgd_op) - # Validate updated params - self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]], - self.evaluate(var0)) - self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]], - self.evaluate(var1)) + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + var0 = variables.Variable([[1.0], [2.0]], dtype=dtype) + var1 = variables.Variable([[3.0], [4.0]], dtype=dtype) + grads0 = ops.IndexedSlices( + constant_op.constant([0.1], shape=[1, 1], dtype=dtype), + constant_op.constant([0]), constant_op.constant([2, 1])) + grads1 = ops.IndexedSlices( + constant_op.constant([0.01], shape=[1, 1], dtype=dtype), + constant_op.constant([1]), constant_op.constant([2, 1])) - self.evaluate(sgd_op) - # Validate updated params - self.assertAllCloseAccordingToType( - [[1.0 - 3.0 * 0.1 - 2.0 * 0.1], [2.0]], self.evaluate(var0)) - self.assertAllCloseAccordingToType( - [[3.0], [4.0 - 3.0 * 0.01 - 2.0 * 0.01]], self.evaluate(var1)) + opt = gradient_descent.SGD(3.0, decay=0.5) + update_op = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + self.evaluate(variables.global_variables_initializer()) + # Run 2 steps of sgd + self.evaluate(update_op) + # Validate updated params + self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]], + self.evaluate(var0)) + self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]], + self.evaluate(var1)) + + if context.executing_eagerly(): + opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + else: + self.evaluate(update_op) + # Validate updated params + self.assertAllCloseAccordingToType( + [[1.0 - 3.0 * 0.1 - 2.0 * 0.1], [2.0]], self.evaluate(var0)) + self.assertAllCloseAccordingToType( + [[3.0], [4.0 - 3.0 * 0.01 - 2.0 * 0.01]], self.evaluate(var1)) def testCapturingInDefunWhileExecutingEagerly(self): with context.eager_mode(): From 12bcf150bd5252a676b6c90bdf11aff3823931ec Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Jul 2020 21:41:00 -0700 Subject: [PATCH 0903/2522] Add `defines` propagation to `cc_header_only_library` This matches the behavior of the standard cc rules, and keeps us from needing to find header-only versions of dependencies that require `defines` propagation, just to put them in `extra_deps`. PiperOrigin-RevId: 322289122 Change-Id: I11710bd21ee5196d8e68a73340cc60317b0fdc54 --- tensorflow/tensorflow.bzl | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 6139fc9372a..6ce638d41ee 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -1765,6 +1765,29 @@ def transitive_hdrs(name, deps = [], **kwargs): _transitive_hdrs(name = name + "_gather", deps = deps) native.filegroup(name = name, srcs = [":" + name + "_gather"]) +# Bazel rule for collecting the transitive parameters from a set of dependencies into a library. +# Propagates defines. +def _transitive_parameters_library_impl(ctx): + defines = depset( + transitive = [dep[CcInfo].compilation_context.defines for dep in ctx.attr.original_deps], + ) + return CcInfo( + compilation_context = cc_common.create_compilation_context( + defines = depset(direct = defines.to_list()), + ), + ) + +_transitive_parameters_library = rule( + attrs = { + "original_deps": attr.label_list( + allow_empty = True, + allow_files = True, + providers = [CcInfo], + ), + }, + implementation = _transitive_parameters_library_impl, +) + # Create a header only library that includes all the headers exported by # the libraries in deps. # @@ -1783,11 +1806,15 @@ def transitive_hdrs(name, deps = [], **kwargs): # def cc_header_only_library(name, deps = [], includes = [], extra_deps = [], **kwargs): _transitive_hdrs(name = name + "_gather", deps = deps) + _transitive_parameters_library( + name = name + "_gathered_parameters", + original_deps = deps, + ) cc_library( name = name, hdrs = [":" + name + "_gather"], includes = includes, - deps = extra_deps, + deps = [":" + name + "_gathered_parameters"] + extra_deps, **kwargs ) From 1030be3e16b4609d0e0858a9180c49b1321c9b98 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 20 Jul 2020 21:53:32 -0700 Subject: [PATCH 0904/2522] Add `includes` propagation to `cc_header_only_library` This matches the behavior of the standard cc rules, and keeps us from needing to find header-only versions of dependencies that require `includes` propagation, just to put them in `extra_deps`. PiperOrigin-RevId: 322290260 Change-Id: I211abe5b68fd84b865adb72b8249dee8eae985d4 --- tensorflow/tensorflow.bzl | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 6ce638d41ee..1bf4b24559d 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -1766,14 +1766,30 @@ def transitive_hdrs(name, deps = [], **kwargs): native.filegroup(name = name, srcs = [":" + name + "_gather"]) # Bazel rule for collecting the transitive parameters from a set of dependencies into a library. -# Propagates defines. +# Propagates defines and includes. def _transitive_parameters_library_impl(ctx): defines = depset( transitive = [dep[CcInfo].compilation_context.defines for dep in ctx.attr.original_deps], ) + system_includes = depset( + transitive = [dep[CcInfo].compilation_context.system_includes for dep in ctx.attr.original_deps], + ) + includes = depset( + transitive = [dep[CcInfo].compilation_context.includes for dep in ctx.attr.original_deps], + ) + quote_includes = depset( + transitive = [dep[CcInfo].compilation_context.quote_includes for dep in ctx.attr.original_deps], + ) + framework_includes = depset( + transitive = [dep[CcInfo].compilation_context.framework_includes for dep in ctx.attr.original_deps], + ) return CcInfo( compilation_context = cc_common.create_compilation_context( defines = depset(direct = defines.to_list()), + system_includes = depset(direct = system_includes.to_list()), + includes = depset(direct = includes.to_list()), + quote_includes = depset(direct = quote_includes.to_list()), + framework_includes = depset(direct = framework_includes.to_list()), ), ) @@ -1792,12 +1808,9 @@ _transitive_parameters_library = rule( # the libraries in deps. # # **NOTE**: The headers brought in are **NOT** fully transitive; certain -# deep headers may be missing. Furthermore, the `includes` argument of -# cc_libraries in the dependencies are *not* going to be respected -# when you use cc_header_only_library. Some cases where this creates -# problems include: Eigen, grpc, MLIR. In cases such as these, you must -# find a header-only version of the cc_library rule you care about and -# link it *directly* in addition to your use of the cc_header_only_library +# deep headers may be missing. If this creates problems, you must find +# a header-only version of the cc_library rule you care about and link it +# *directly* in addition to your use of the cc_header_only_library # intermediary. # # For: From a3a2eaf9a01cb5ced0a6322d561c05708e052e4f Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Mon, 20 Jul 2020 22:20:36 -0700 Subject: [PATCH 0905/2522] Enable eager test for MomentumOptimizerTest --- .../optimizer_v2/gradient_descent_test.py | 507 +++++++++--------- 1 file changed, 262 insertions(+), 245 deletions(-) diff --git a/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py b/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py index 1f5a1004b48..0872d6d3a29 100644 --- a/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py +++ b/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py @@ -358,90 +358,100 @@ class MomentumOptimizerTest(test.TestCase, parameterized.TestCase): 3.98 - ((0.9 * 0.01 + 0.01) * 2.0) ]), self.evaluate(var1)) + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testNesterovMomentum(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with ops.Graph().as_default(): - for dtype in [dtypes.float32, dtypes.float64]: - var0 = variables.Variable([1.0, 2.0], dtype=dtype, name="var0") - var1 = variables.Variable([3.0, 4.0], dtype=dtype, name="var1") - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - loss = lambda: 5 * var0 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop - mom_op = gradient_descent.SGD( - learning_rate=2.0, momentum=0.9, nesterov=True) - opt_op = mom_op.minimize(loss, [var0, var1]) - self.evaluate(variables.global_variables_initializer()) - for _ in range(1, 5): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + var0 = variables.Variable([1.0, 2.0], dtype=dtype, name="var0") + var1 = variables.Variable([3.0, 4.0], dtype=dtype, name="var1") + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + loss = lambda: 5 * var0 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop + mom_op = gradient_descent.SGD( + learning_rate=2.0, momentum=0.9, nesterov=True) + opt_op = mom_op.minimize(loss, [var0, var1]) + self.evaluate(variables.global_variables_initializer()) + for i in range(1, 5): + # already updated once in eager mode + if i != 1 and context.executing_eagerly(): + mom_op.minimize(loss, [var0, var1]) + else: self.evaluate(opt_op) - var0_np, accum0_np = self._update_nesterov_momentum_numpy( - var0_np, accum0_np, var0_np * 10, 2.0, 0.9) - var1_np, accum1_np = self._update_nesterov_momentum_numpy( - var1_np, accum1_np, 3, 2.0, 0.9) - self.assertAllClose(var0_np, self.evaluate(var0)) - self.assertAllClose(var1_np, self.evaluate(var1)) + var0_np, accum0_np = self._update_nesterov_momentum_numpy( + var0_np, accum0_np, var0_np * 10, 2.0, 0.9) + var1_np, accum1_np = self._update_nesterov_momentum_numpy( + var1_np, accum1_np, 3, 2.0, 0.9) + self.assertAllClose(var0_np, self.evaluate(var0)) + self.assertAllClose(var1_np, self.evaluate(var1)) + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testSparseNesterovMomentum(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. for dtype in [dtypes.float32, dtypes.float64]: - with ops.Graph().as_default(), self.cached_session() as sess: - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - grads = [] + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + grads = [] + for t in range(1, 5): + grads.append(var0_np * 10) + var0_np, accum0_np = self._update_nesterov_momentum_numpy( + var0_np, accum0_np, var0_np * 10, 2.0, 0.9) + var1_np, accum1_np = self._update_nesterov_momentum_numpy( + var1_np, accum1_np, 3, 2.0, 0.9) + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + var0 = variables.Variable(var0_np, dtype=dtype, name="var0") + var1 = variables.Variable(var1_np, dtype=dtype, name="var1") + mom_op = gradient_descent.SGD( + learning_rate=2.0, momentum=0.9, nesterov=True) + grads_and_vars = [] + for t in range(1, 5): + y = ops.IndexedSlices(constant_op.constant(grads[t - 1], dtype=dtype), + constant_op.constant([0, 1]), + constant_op.constant([2])) + grads_and_vars.append([ + (y, var0), + (constant_op.constant([3.0, 3.0], dtype=dtype), var1)]) + if not context.executing_eagerly(): + opt_update = [] for t in range(1, 5): - grads.append(var0_np * 10) - var0_np, accum0_np = self._update_nesterov_momentum_numpy( - var0_np, accum0_np, var0_np * 10, 2.0, 0.9) - var1_np, accum1_np = self._update_nesterov_momentum_numpy( - var1_np, accum1_np, 3, 2.0, 0.9) - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - var0 = variables.Variable(var0_np, dtype=dtype, name="var0") - var1 = variables.Variable(var1_np, dtype=dtype, name="var1") - mom_op = gradient_descent.SGD( - learning_rate=2.0, momentum=0.9, nesterov=True) - x_feed = array_ops.placeholder(dtype) - y_feed = ops.IndexedSlices(x_feed, constant_op.constant([0, 1]), - constant_op.constant([2])) - grads_and_vars = [(y_feed, var0), - (constant_op.constant([3.0, 3.0], dtype=dtype), var1)] - opt_update = mom_op.apply_gradients(grads_and_vars) - self.evaluate(variables.global_variables_initializer()) - for t in range(1, 5): - sess.run(opt_update, feed_dict={x_feed: grads[t - 1]}) - var0_np, accum0_np = self._update_nesterov_momentum_numpy( - var0_np, accum0_np, var0_np * 10, 2.0, 0.9) - var1_np, accum1_np = self._update_nesterov_momentum_numpy( - var1_np, accum1_np, 3, 2.0, 0.9) - self.assertAllClose(var0_np, self.evaluate(var0)) - self.assertAllClose(var1_np, self.evaluate(var1)) + opt_update.append(mom_op.apply_gradients(grads_and_vars[t - 1])) + self.evaluate(variables.global_variables_initializer()) + for t in range(1, 5): + if context.executing_eagerly(): + mom_op.apply_gradients(grads_and_vars[t - 1]) + else: + self.evaluate(opt_update[t - 1]) + var0_np, accum0_np = self._update_nesterov_momentum_numpy( + var0_np, accum0_np, var0_np * 10, 2.0, 0.9) + var1_np, accum1_np = self._update_nesterov_momentum_numpy( + var1_np, accum1_np, 3, 2.0, 0.9) + self.assertAllClose(var0_np, self.evaluate(var0)) + self.assertAllClose(var1_np, self.evaluate(var1)) + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testMinimizeSparseResourceVariable(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with ops.Graph().as_default(): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - var0 = variables.Variable([[1.0, 2.0]], dtype=dtype) + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + var0 = variables.Variable([[1.0, 2.0]], dtype=dtype) - # pylint: disable=cell-var-from-loop - def loss(): - x = constant_op.constant([[4.0], [5.0]], dtype=dtype) - pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) - return pred * pred + # pylint: disable=cell-var-from-loop + def loss(): + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) + return pred * pred - # pylint: enable=cell-var-from-loop + # pylint: enable=cell-var-from-loop - opt = gradient_descent.SGD(learning_rate=1.0, momentum=0.9) - sgd_op = opt.minimize(loss, [var0]) - self.evaluate(variables.global_variables_initializer()) - # Run 1 step of sgd - self.evaluate(sgd_op) - # Validate updated params - self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0)) + opt = gradient_descent.SGD(learning_rate=1.0, momentum=0.9) + sgd_op = opt.minimize(loss, [var0]) + self.evaluate(variables.global_variables_initializer()) + # Run 1 step of sgd + self.evaluate(sgd_op) + # Validate updated params + self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0)) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testMinimizeWith2DIndicesForEmbeddingLookup(self): @@ -456,151 +466,149 @@ class MomentumOptimizerTest(test.TestCase, parameterized.TestCase): self.evaluate(sgd_op) self.assertAllCloseAccordingToType([[1, 1], [0, 0]], self.evaluate(var0)) + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testTensorLearningRateAndMomentum(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with ops.Graph().as_default(): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - var0 = variables.Variable([1.0, 2.0], dtype=dtype) - var1 = variables.Variable([3.0, 4.0], dtype=dtype) - grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) - grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) - mom_opt = gradient_descent.SGD( - learning_rate=constant_op.constant(2.0), - momentum=constant_op.constant(0.9)) - mom_update = mom_opt.apply_gradients( + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + mom_opt = gradient_descent.SGD( + learning_rate=constant_op.constant(2.0), + momentum=constant_op.constant(0.9)) + mom_update = mom_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + self.evaluate(variables.global_variables_initializer()) + # Check we have slots + slot0 = mom_opt.get_slot(var0, "momentum") + self.assertEqual(slot0.shape, var0.shape) + slot1 = mom_opt.get_slot(var1, "momentum") + self.assertEqual(slot1.shape, var1.shape) + + # Step 1: the momentum accumulators where 0. So we should see a normal + # update: v -= grad * learning_rate + self.evaluate(mom_update) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array([-0.2, -0.2]), self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([-0.02, -0.02]), self.evaluate(slot1)) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), + self.evaluate(var0)) + self.assertAllCloseAccordingToType( + np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), + self.evaluate(var1)) + # Step 2: the momentum accumulators contain the previous update. + if context.executing_eagerly(): + mom_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) - self.evaluate(variables.global_variables_initializer()) - # Check we have slots - slot0 = mom_opt.get_slot(var0, "momentum") - self.assertEqual(slot0.shape, var0.shape) - slot1 = mom_opt.get_slot(var1, "momentum") - self.assertEqual(slot1.shape, var1.shape) - - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - # Step 1: the momentum accumulators where 0. So we should see a normal - # update: v -= grad * learning_rate + else: self.evaluate(mom_update) - # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType( - np.array([-0.2, -0.2]), self.evaluate(slot0)) - self.assertAllCloseAccordingToType( - np.array([-0.02, -0.02]), self.evaluate(slot1)) - # Check that the parameters have been updated. - self.assertAllCloseAccordingToType( - np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), - self.evaluate(var0)) - self.assertAllCloseAccordingToType( - np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), - self.evaluate(var1)) - # Step 2: the momentum accumulators contain the previous update. - self.evaluate(mom_update) - # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType( - np.array([(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)]), - self.evaluate(slot0)) - self.assertAllCloseAccordingToType( - np.array([(0.9 * (-0.02) - 2.0 * 0.01), - (0.9 * (-0.02) - 2.0 * 0.01)]), self.evaluate(slot1)) - # Check that the parameters have been updated. - self.assertAllCloseAccordingToType( - np.array([ - 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), - 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) - ]), self.evaluate(var0)) - self.assertAllCloseAccordingToType( - np.array([ - 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), - 3.98 - ((0.9 * 0.01 + 0.01) * 2.0) - ]), self.evaluate(var1)) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array([(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)]), + self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([(0.9 * (-0.02) - 2.0 * 0.01), + (0.9 * (-0.02) - 2.0 * 0.01)]), self.evaluate(slot1)) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([ + 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), + 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) + ]), self.evaluate(var0)) + self.assertAllCloseAccordingToType( + np.array([ + 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), + 3.98 - ((0.9 * 0.01 + 0.01) * 2.0) + ]), self.evaluate(var1)) + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testSparse(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with ops.Graph().as_default(): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - var0 = variables.Variable(array_ops.zeros([4, 2], dtype=dtype)) - var1 = variables.Variable(constant_op.constant(1.0, dtype, [4, 2])) - grads0 = ops.IndexedSlices( - constant_op.constant([[.1, .1]], dtype=dtype), - constant_op.constant([1]), constant_op.constant([4, 2])) - grads1 = ops.IndexedSlices( - constant_op.constant([[.01, .01], [.01, .01]], dtype=dtype), - constant_op.constant([2, 3]), constant_op.constant([4, 2])) - mom_opt = gradient_descent.SGD(learning_rate=2.0, momentum=0.9) - mom_update = mom_opt.apply_gradients( + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + var0 = variables.Variable(array_ops.zeros([4, 2], dtype=dtype)) + var1 = variables.Variable(constant_op.constant(1.0, dtype, [4, 2])) + grads0 = ops.IndexedSlices( + constant_op.constant([[.1, .1]], dtype=dtype), + constant_op.constant([1]), constant_op.constant([4, 2])) + grads1 = ops.IndexedSlices( + constant_op.constant([[.01, .01], [.01, .01]], dtype=dtype), + constant_op.constant([2, 3]), constant_op.constant([4, 2])) + mom_opt = gradient_descent.SGD(learning_rate=2.0, momentum=0.9) + mom_update = mom_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + self.evaluate(variables.global_variables_initializer()) + + # Check we have slots + slot0 = mom_opt.get_slot(var0, "momentum") + self.assertEqual(slot0.shape, var0.shape) + slot1 = mom_opt.get_slot(var1, "momentum") + self.assertEqual(slot1.shape, var1.shape) + + # Step 1: the momentum accumulators are 0. So we should see a normal + # update: v -= grad * learning_rate + self.evaluate(mom_update) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array([0, 0]), + self.evaluate(slot0)[0]) + self.assertAllCloseAccordingToType( + np.array([-2.0 * .1, -2.0 * .1]), + self.evaluate(slot0)[1]) + self.assertAllCloseAccordingToType( + np.array([-2.0 * .01, -2.0 * .01]), + self.evaluate(slot1)[2]) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([0, 0]), + self.evaluate(var0)[0]) + self.assertAllCloseAccordingToType( + np.array([-(0.1 * 2.0), -(0.1 * 2.0)]), + self.evaluate(var0)[1]) + self.assertAllCloseAccordingToType( + np.array([1.0 - (0.01 * 2.0), 1.0 - (0.01 * 2.0)]), + self.evaluate(var1)[2]) + # Step 2: the momentum accumulators contain the previous update. + if context.executing_eagerly(): + mom_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) - self.evaluate(variables.global_variables_initializer()) - - # Check we have slots - slot0 = mom_opt.get_slot(var0, "momentum") - self.assertEqual(slot0.shape, var0.shape) - slot1 = mom_opt.get_slot(var1, "momentum") - self.assertEqual(slot1.shape, var1.shape) - - # Fetch params to validate initial values - self.assertAllClose([0, 0], self.evaluate(var0)[0]) - self.assertAllClose([0, 0], self.evaluate(var0)[1]) - self.assertAllClose([1, 1], self.evaluate(var1)[2]) - - # Step 1: the momentum accumulators are 0. So we should see a normal - # update: v -= grad * learning_rate + else: self.evaluate(mom_update) - # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType( - np.array([0, 0]), - self.evaluate(slot0)[0]) - self.assertAllCloseAccordingToType( - np.array([-2.0 * .1, -2.0 * .1]), - self.evaluate(slot0)[1]) - self.assertAllCloseAccordingToType( - np.array([-2.0 * .01, -2.0 * .01]), - self.evaluate(slot1)[2]) - # Check that the parameters have been updated. - self.assertAllCloseAccordingToType( - np.array([0, 0]), - self.evaluate(var0)[0]) - self.assertAllCloseAccordingToType( - np.array([-(0.1 * 2.0), -(0.1 * 2.0)]), - self.evaluate(var0)[1]) - self.assertAllCloseAccordingToType( - np.array([1.0 - (0.01 * 2.0), 1.0 - (0.01 * 2.0)]), - self.evaluate(var1)[2]) - # Step 2: the momentum accumulators contain the previous update. - self.evaluate(mom_update) - # Check that the momentum accumulators have been updated. - self.assertAllClose(np.array([0, 0]), self.evaluate(slot0)[0]) - self.assertAllCloseAccordingToType( - np.array([(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)]), - self.evaluate(slot0)[1]) - self.assertAllCloseAccordingToType( - np.array([(0.9 * (-0.02) - 2.0 * 0.01), - (0.9 * (-0.02) - 2.0 * 0.01)]), - self.evaluate(slot1)[2]) - # Check that the parameters have been updated. - self.assertAllClose(np.array([0, 0]), self.evaluate(var0)[0]) - self.assertAllCloseAccordingToType( - np.array([ - -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), - -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) - ]), - self.evaluate(var0)[1]) - self.assertAllCloseAccordingToType( - np.array([ - 0.98 - ((0.9 * 0.01 + 0.01) * 2.0), - 0.98 - ((0.9 * 0.01 + 0.01) * 2.0) - ]), - self.evaluate(var1)[2]) + # Check that the momentum accumulators have been updated. + self.assertAllClose(np.array([0, 0]), self.evaluate(slot0)[0]) + self.assertAllCloseAccordingToType( + np.array([(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)]), + self.evaluate(slot0)[1]) + self.assertAllCloseAccordingToType( + np.array([(0.9 * (-0.02) - 2.0 * 0.01), + (0.9 * (-0.02) - 2.0 * 0.01)]), + self.evaluate(slot1)[2]) + # Check that the parameters have been updated. + self.assertAllClose(np.array([0, 0]), self.evaluate(var0)[0]) + self.assertAllCloseAccordingToType( + np.array([ + -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), + -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) + ]), + self.evaluate(var0)[1]) + self.assertAllCloseAccordingToType( + np.array([ + 0.98 - ((0.9 * 0.01 + 0.01) * 2.0), + 0.98 - ((0.9 * 0.01 + 0.01) * 2.0) + ]), + self.evaluate(var1)[2]) + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testSharing(self): - # TODO(tanzheny, omalleyt): Fix test in eager mode. - with ops.Graph().as_default(): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - var0 = variables.Variable([1.0, 2.0], dtype=dtype) - var1 = variables.Variable([3.0, 4.0], dtype=dtype) - grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) - grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) - mom_opt = gradient_descent.SGD(learning_rate=2.0, momentum=0.9) + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + mom_opt = gradient_descent.SGD(learning_rate=2.0, momentum=0.9) + if not context.executing_eagerly(): mom_update1 = mom_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) mom_update2 = mom_opt.apply_gradients( @@ -612,44 +620,53 @@ class MomentumOptimizerTest(test.TestCase, parameterized.TestCase): slot1 = mom_opt.get_slot(var1, "momentum") self.assertEqual(slot1.shape, var1.shape) - # Fetch params to validate initial values - self.assertAllClose([1.0, 2.0], self.evaluate(var0)) - self.assertAllClose([3.0, 4.0], self.evaluate(var1)) - # Step 1: the momentum accumulators where 0. So we should see a normal - # update: v -= grad * learning_rate + # Step 1: the momentum accumulators where 0. So we should see a normal + # update: v -= grad * learning_rate + if context.executing_eagerly(): + mom_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + slot0 = mom_opt.get_slot(var0, "momentum") + self.assertEqual(slot0.shape, var0.shape) + slot1 = mom_opt.get_slot(var1, "momentum") + self.assertEqual(slot1.shape, var1.shape) + else: self.evaluate(mom_update1) - # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType( - np.array([-0.2, -0.2]), self.evaluate(slot0)) - self.assertAllCloseAccordingToType( - np.array([-0.02, -0.02]), self.evaluate(slot1)) - # Check that the parameters have been updated. - self.assertAllCloseAccordingToType( - np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), - self.evaluate(var0)) - self.assertAllCloseAccordingToType( - np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), - self.evaluate(var1)) - # Step 2: the second momentum accumulators contain the previous update. + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array([-0.2, -0.2]), self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([-0.02, -0.02]), self.evaluate(slot1)) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), + self.evaluate(var0)) + self.assertAllCloseAccordingToType( + np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), + self.evaluate(var1)) + # Step 2: the second momentum accumulators contain the previous update. + if context.executing_eagerly(): + mom_update2 = mom_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + else: self.evaluate(mom_update2) - # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType( - np.array([(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)]), - self.evaluate(slot0)) - self.assertAllCloseAccordingToType( - np.array([(0.9 * (-0.02) - 2.0 * 0.01), - (0.9 * (-0.02) - 2.0 * 0.01)]), self.evaluate(slot1)) - # Check that the parameters have been updated. - self.assertAllCloseAccordingToType( - np.array([ - 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), - 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) - ]), self.evaluate(var0)) - self.assertAllCloseAccordingToType( - np.array([ - 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), - 3.98 - ((0.9 * 0.01 + 0.01) * 2.0) - ]), self.evaluate(var1)) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array([(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)]), + self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([(0.9 * (-0.02) - 2.0 * 0.01), + (0.9 * (-0.02) - 2.0 * 0.01)]), self.evaluate(slot1)) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([ + 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), + 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) + ]), self.evaluate(var0)) + self.assertAllCloseAccordingToType( + np.array([ + 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), + 3.98 - ((0.9 * 0.01 + 0.01) * 2.0) + ]), self.evaluate(var1)) @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testConfig(self): From b4de69d524b49bc70993f5f57b5c5e2df95027a3 Mon Sep 17 00:00:00 2001 From: Terry Heo Date: Mon, 20 Jul 2020 22:22:24 -0700 Subject: [PATCH 0906/2522] Enable python:lite_flex_test for MacOS Use relative path of _pywrap_tensorflow_internal.so. PiperOrigin-RevId: 322294422 Change-Id: I96e5d52048bf0c5b6039d9f020ba88faa7c31130 --- tensorflow/lite/interpreter_builder.cc | 2 ++ tensorflow/lite/python/BUILD | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/interpreter_builder.cc b/tensorflow/lite/interpreter_builder.cc index 996fc7e6b82..96871e740c0 100644 --- a/tensorflow/lite/interpreter_builder.cc +++ b/tensorflow/lite/interpreter_builder.cc @@ -121,6 +121,8 @@ TFLITE_ATTRIBUTE_WEAK Interpreter::TfLiteDelegatePtr AcquireFlexDelegate() { const char* filename_pywrap_tensorflow_internal = #if defined(_WIN32) "_pywrap_tensorflow_internal.pyd"; +#elif defined(__APPLE__) + "python/_pywrap_tensorflow_internal.so"; #else "_pywrap_tensorflow_internal.so"; #endif diff --git a/tensorflow/lite/python/BUILD b/tensorflow/lite/python/BUILD index dfcf46baa90..e26000c810a 100644 --- a/tensorflow/lite/python/BUILD +++ b/tensorflow/lite/python/BUILD @@ -195,9 +195,6 @@ py_test( srcs = ["lite_flex_test.py"], python_version = "PY3", srcs_version = "PY2AND3", - tags = [ - "no_mac", # TODO(b/159077703): Enable Python API Flex support on MacOS. - ], deps = [ ":lite", "//tensorflow/python:client_testlib", From c06c26f032d746b190adf3250d9c7985d1e161be Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Mon, 20 Jul 2020 22:39:01 -0700 Subject: [PATCH 0907/2522] Move decorator to class level --- .../optimizer_v2/gradient_descent_test.py | 21 ++----------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py b/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py index 0872d6d3a29..4b05b3d2908 100644 --- a/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py +++ b/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py @@ -37,9 +37,9 @@ from tensorflow.python.ops import variables from tensorflow.python.platform import test +@combinations.generate(combinations.combine(mode=["graph", "eager"])) class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasic(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: var0 = variables.Variable([1.0, 2.0], dtype=dtype) @@ -88,7 +88,6 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): [3.0 - 3.0 * 0.01 - 2.0 * 0.01, 4.0 - 3.0 * 0.01 - 2.0 * 0.01], self.evaluate(var1)) - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasicWithLearningRateDecay(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: learning_rate = 3.0 @@ -96,7 +95,6 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): sgd = gradient_descent.SGD(learning_rate=learning_rate, decay=decay) self._test_basic_sgd_with_learning_rate_decay(sgd, dtype) - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasicWithLearningRateInverseTimeDecay(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: learning_rate = learning_rate_schedule.InverseTimeDecay( @@ -104,7 +102,6 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): sgd = gradient_descent.SGD(learning_rate=learning_rate) self._test_basic_sgd_with_learning_rate_decay(sgd, dtype) - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasicWithLearningRateInverseTimeDecaySerializeAndDeserialize(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: learning_rate = learning_rate_schedule.InverseTimeDecay( @@ -113,7 +110,6 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): sgd = gradient_descent.SGD.from_config(sgd.get_config()) self._test_basic_sgd_with_learning_rate_decay(sgd, dtype) - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasicCallableParams(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: var0 = variables.Variable([1.0, 2.0], dtype=dtype) @@ -132,7 +128,6 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], self.evaluate(var1)) - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testMinimizeResourceVariable(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: var0 = variables.Variable([[1.0, 2.0]], dtype=dtype) @@ -149,7 +144,6 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): self.evaluate(var0)) self.assertAllCloseAccordingToType([3.0 - 1.0], self.evaluate(var1)) - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testMinimizeSparseResourceVariable(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: var0 = variables.Variable([[1.0, 2.0]], dtype=dtype) @@ -190,7 +184,6 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], self.evaluate(var1)) - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testGradWrtRef(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: opt = gradient_descent.SGD(3.0) @@ -202,7 +195,6 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): for grad, _ in grads_and_vars: self.assertAllCloseAccordingToType([1.0], self.evaluate(grad)) - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testSparseBasic(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: var0 = variables.Variable([[1.0], [2.0]], dtype=dtype) @@ -224,7 +216,6 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]], self.evaluate(var1)) - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testSparseBasicWithLearningRateDecay(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: var0 = variables.Variable([[1.0], [2.0]], dtype=dtype) @@ -291,6 +282,7 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) +@combinations.generate(combinations.combine(mode=["graph", "eager"])) class MomentumOptimizerTest(test.TestCase, parameterized.TestCase): def _update_nesterov_momentum_numpy(self, var, accum, g, lr, momentum): @@ -298,7 +290,6 @@ class MomentumOptimizerTest(test.TestCase, parameterized.TestCase): var += (accum * momentum - g * lr) return var, accum - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasic(self): for _, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): var0 = variables.Variable([1.0, 2.0], dtype=dtype, name="var0") @@ -358,7 +349,6 @@ class MomentumOptimizerTest(test.TestCase, parameterized.TestCase): 3.98 - ((0.9 * 0.01 + 0.01) * 2.0) ]), self.evaluate(var1)) - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testNesterovMomentum(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: var0 = variables.Variable([1.0, 2.0], dtype=dtype, name="var0") @@ -385,7 +375,6 @@ class MomentumOptimizerTest(test.TestCase, parameterized.TestCase): self.assertAllClose(var0_np, self.evaluate(var0)) self.assertAllClose(var1_np, self.evaluate(var1)) - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testSparseNesterovMomentum(self): for dtype in [dtypes.float32, dtypes.float64]: var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) @@ -432,7 +421,6 @@ class MomentumOptimizerTest(test.TestCase, parameterized.TestCase): self.assertAllClose(var0_np, self.evaluate(var0)) self.assertAllClose(var1_np, self.evaluate(var1)) - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testMinimizeSparseResourceVariable(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: var0 = variables.Variable([[1.0, 2.0]], dtype=dtype) @@ -453,7 +441,6 @@ class MomentumOptimizerTest(test.TestCase, parameterized.TestCase): # Validate updated params self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0)) - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testMinimizeWith2DIndicesForEmbeddingLookup(self): var0 = variables.Variable(array_ops.ones([2, 2])) @@ -466,7 +453,6 @@ class MomentumOptimizerTest(test.TestCase, parameterized.TestCase): self.evaluate(sgd_op) self.assertAllCloseAccordingToType([[1, 1], [0, 0]], self.evaluate(var0)) - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testTensorLearningRateAndMomentum(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: var0 = variables.Variable([1.0, 2.0], dtype=dtype) @@ -525,7 +511,6 @@ class MomentumOptimizerTest(test.TestCase, parameterized.TestCase): 3.98 - ((0.9 * 0.01 + 0.01) * 2.0) ]), self.evaluate(var1)) - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testSparse(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: var0 = variables.Variable(array_ops.zeros([4, 2], dtype=dtype)) @@ -600,7 +585,6 @@ class MomentumOptimizerTest(test.TestCase, parameterized.TestCase): ]), self.evaluate(var1)[2]) - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testSharing(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: var0 = variables.Variable([1.0, 2.0], dtype=dtype) @@ -668,7 +652,6 @@ class MomentumOptimizerTest(test.TestCase, parameterized.TestCase): 3.98 - ((0.9 * 0.01 + 0.01) * 2.0) ]), self.evaluate(var1)) - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testConfig(self): opt = gradient_descent.SGD(learning_rate=1.0, momentum=0.9, nesterov=True) config = opt.get_config() From 836b99a27b930de801ecb17abf42e8aba3cac0b8 Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Mon, 20 Jul 2020 22:42:27 -0700 Subject: [PATCH 0908/2522] Fix lint --- .../keras/optimizer_v2/gradient_descent_test.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py b/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py index 4b05b3d2908..bad83fd06cf 100644 --- a/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py +++ b/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py @@ -212,9 +212,9 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): self.evaluate(sgd_op) # Validate updated params self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]], - self.evaluate(var0)) + self.evaluate(var0)) self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]], - self.evaluate(var1)) + self.evaluate(var1)) def testSparseBasicWithLearningRateDecay(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: @@ -234,9 +234,9 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): self.evaluate(update_op) # Validate updated params self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]], - self.evaluate(var0)) + self.evaluate(var0)) self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]], - self.evaluate(var1)) + self.evaluate(var1)) if context.executing_eagerly(): opt.apply_gradients(zip([grads0, grads1], [var0, var1])) @@ -402,8 +402,8 @@ class MomentumOptimizerTest(test.TestCase, parameterized.TestCase): constant_op.constant([0, 1]), constant_op.constant([2])) grads_and_vars.append([ - (y, var0), - (constant_op.constant([3.0, 3.0], dtype=dtype), var1)]) + (y, var0), + (constant_op.constant([3.0, 3.0], dtype=dtype), var1)]) if not context.executing_eagerly(): opt_update = [] for t in range(1, 5): From f56973780fd53fb0f280c67b0d699bd7db864b88 Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Mon, 20 Jul 2020 23:33:32 -0700 Subject: [PATCH 0909/2522] [XLA/GPU] Replace llvm_ir/alias_analysis with LLVM's own analysis Specifically, since GPU kernels don't benefit from any inter-procedural annotations, just annotation what's in the kernel functions. Since all kernels take full allocations as arguments, and XLA-created allocations by definition don't alias, mark all XLA-created allocations noalias. The result is much faster compilation for many modules with no performance regression (within noise margin). PiperOrigin-RevId: 322301923 Change-Id: I752ca807061d836fc75e69e6d045bbde3f14d9ba --- tensorflow/compiler/xla/service/gpu/BUILD | 1 - .../xla/service/gpu/hlo_to_ir_bindings.cc | 1 - .../xla/service/gpu/hlo_to_ir_bindings.h | 6 +-- .../xla/service/gpu/ir_emitter_unnested.cc | 19 +++++++++ .../service/gpu/tests/gpu_alignment_test.cc | 2 +- .../xla/service/gpu/tests/gpu_noalias_test.cc | 13 ++---- .../xla/service/gpu/tests/scatter.hlo | 40 +++++-------------- 7 files changed, 35 insertions(+), 47 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 2a85c9f2908..753f6867066 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -170,7 +170,6 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla/service:buffer_assignment", "//tensorflow/compiler/xla/service:hlo", - "//tensorflow/compiler/xla/service/llvm_ir:alias_analysis", "//tensorflow/compiler/xla/service/llvm_ir:buffer_assignment_util", "//tensorflow/compiler/xla/service/llvm_ir:ir_array", "//tensorflow/compiler/xla/service/llvm_ir:llvm_util", diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc index 23b29df6ec8..7b0686af910 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc +++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc @@ -231,7 +231,6 @@ llvm_ir::IrArray HloToIrBindings::GetIrArray(const HloInstruction& hlo, << " of " << hlo.ToString(); llvm_ir::IrArray ir_array(base_ptr, ShapeUtil::GetSubshape(hlo.shape(), shape_index)); - alias_analysis_.AddAliasingInformationToIrArray(hlo, &ir_array, shape_index); // The GPU backend emits one kernel per top-level HLO, and LLVM views // execution of one kernel as the "whole program" executed on the GPU. diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h index f57b594e9c1..5eef6727801 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h +++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h @@ -25,7 +25,6 @@ limitations under the License. #include "tensorflow/compiler/xla/map_util.h" #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" -#include "tensorflow/compiler/xla/service/llvm_ir/alias_analysis.h" #include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h" namespace xla { @@ -42,8 +41,7 @@ class HloToIrBindings { : buffer_assignment_(buffer_assignment), is_nested_(is_nested), b_(b), - module_(llvm_module), - alias_analysis_(module, *buffer_assignment_, &b_->getContext()) {} + module_(llvm_module) {} void EmitBasePointersForHlos( absl::Span io_hlos, @@ -116,8 +114,6 @@ class HloToIrBindings { // The address of the memory block that contains all temporary buffers. llvm::Value* temp_buffer_base_ = nullptr; - - llvm_ir::AliasAnalysis alias_analysis_; }; } // namespace gpu diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index a232bf7fce5..34cdfb4ecf0 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -1747,6 +1747,25 @@ std::unique_ptr IrEmitterUnnested::BuildKernelThunk( auto buffers_it = non_constant_buffers.begin(); for (; arg_it != kernel->arg_end(); ++arg_it, ++buffers_it) { kernel_args[*buffers_it] = arg_it; + + // Annotate all allocations with LLVM's `noalias`. + // There are three kinds of allocations: + // * Read-only allocations, aka input parameters that are not aliased with + // outputs. + // * Read-write allocations, including all output buffers, some of which + // may alias with input HLO parameters, but aliased HLO buffers are always + // assigned with the same allocation. + // * The temp buffer. + // + // Read-only allocations may overlap with each other, but since they are + // not mutated, they can always be annotated with `noalias` per LLVM + // semantics. + // + // Read-write allocations and the temp buffer don't overlap with any + // allocations, therefore they can also be annotated with `noalias`. + kernel->addParamAttr( + arg_it->getArgNo(), + llvm::Attribute::get(arg_it->getContext(), llvm::Attribute::NoAlias)); } } diff --git a/tensorflow/compiler/xla/service/gpu/tests/gpu_alignment_test.cc b/tensorflow/compiler/xla/service/gpu/tests/gpu_alignment_test.cc index 914b81c632f..3ebac925886 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/gpu_alignment_test.cc +++ b/tensorflow/compiler/xla/service/gpu/tests/gpu_alignment_test.cc @@ -45,7 +45,7 @@ ENTRY main { )"; CompileAndVerifyIr(hlo_string, R"( -CHECK: @fusion(i8* align 64 dereferenceable(600) %alloc0, i8* align 16 dereferenceable(400) %alloc1, i8* align 64 dereferenceable(864) %temp_buf) +CHECK: @fusion(i8* noalias align 64 dereferenceable(600) %alloc0, i8* noalias align 16 dereferenceable(400) %alloc1, i8* noalias align 64 dereferenceable(864) %temp_buf) )"); } diff --git a/tensorflow/compiler/xla/service/gpu/tests/gpu_noalias_test.cc b/tensorflow/compiler/xla/service/gpu/tests/gpu_noalias_test.cc index 38ff2da7161..1e39a4deaa7 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/gpu_noalias_test.cc +++ b/tensorflow/compiler/xla/service/gpu/tests/gpu_noalias_test.cc @@ -51,16 +51,9 @@ TEST_F(GpuNoAliasTest, Concat) { hlo_module->AddEntryComputation(std::move(computation)); CompileAndVerifyIr(std::move(hlo_module), - R"( -; CHECK: %[[x_gep:.*]] = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %x{{.*}}, i32 0 -; CHECK: load float, float* %[[x_gep]], {{.*}}, !noalias ![[param_noalias:.*]] -; CHECK: %[[y_gep:.*]] = getelementptr inbounds [2 x [2 x float]], [2 x [2 x float]]* %y{{.*}}, i32 0 -; CHECK: load float, float* %[[y_gep]], {{.*}}, !noalias ![[param_noalias]] -; CHECK: %[[result_ptr:.*]] = bitcast [2 x [6 x float]]* %fusion{{.*}} to float* -; CHECK: %[[result_gep:.*]] = getelementptr inbounds float, float* %[[result_ptr]] -; CHECK: store float {{.*}}, float* %[[result_gep]], align 4, !alias.scope ![[param_noalias]] -; CHECK: ![[param_noalias]] = !{![[retval_buffer:.*]]} - )", + R"(CHECK-LABEL: define void @fusion + CHECK-SAME: i8* noalias align {{[0-9]*}} dereferenceable({{[0-9]*}}) %[[OUTPUT_ALLOC:[a-z0-9]*]] + CHECK: %fusion.raw = {{.*}} %[[OUTPUT_ALLOC]])", /*match_optimized_ir=*/false); } diff --git a/tensorflow/compiler/xla/service/gpu/tests/scatter.hlo b/tensorflow/compiler/xla/service/gpu/tests/scatter.hlo index 796c0adadd2..c5edec4b916 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/scatter.hlo +++ b/tensorflow/compiler/xla/service/gpu/tests/scatter.hlo @@ -1,6 +1,6 @@ // RUN: hlo_to_llvm_ir %s | FileCheck %s -// CHECK-LABEL: define void @scatter_TensorFlowScatterV1(i8* align 64 dereferenceable(36) %alloc0, i8* align 16 dereferenceable(36) %alloc1, i8* align 16 dereferenceable(24) %alloc2, i8* align 16 dereferenceable(8) %alloc3) { +// CHECK-LABEL: define void @scatter_TensorFlowScatterV1(i8* noalias align 64 dereferenceable(36) %alloc0, i8* noalias align 16 dereferenceable(36) %alloc1, i8* noalias align 16 dereferenceable(24) %alloc2, i8* noalias align 16 dereferenceable(8) %alloc3) { // CHECK: entry: // CHECK: %[[VAL_32:.*]] = alloca i32, align 4 // CHECK: %[[VAL_0:.*]] = getelementptr inbounds i8, i8* %[[VAL_1:.*]], i64 0 @@ -26,7 +26,7 @@ // CHECK: ret void // CHECK: scatter_TensorFlowScatterV1.in_bounds-true: ; preds = %[[VAL_24]] // CHECK: %[[VAL_25:.*]] = getelementptr inbounds [2 x i32], [2 x i32]* %[[VAL_8]], i32 0, i32 %[[VAL_19]] -// CHECK: %[[VAL_26:.*]] = load i32, i32* %[[VAL_25]], align 4, !invariant.load !4, !noalias !5 +// CHECK: %[[VAL_26:.*]] = load i32, i32* %[[VAL_25]], align 4, !invariant.load !4 // CHECK: %[[VAL_27:.*]] = add i32 0, %[[VAL_26]] // CHECK: %[[VAL_28:.*]] = icmp ult i32 %[[VAL_26]], 3 // CHECK: %[[VAL_29:.*]] = and i1 true, %[[VAL_28]] @@ -37,7 +37,7 @@ // CHECK: %[[VAL_31:.*]] = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* %[[VAL_2]], i32 0, i32 %[[VAL_27]], i32 %[[VAL_18]] // CHECK: %[[VAL_33:.*]] = bitcast [2 x [3 x i32]]* %[[VAL_11]] to i32* // CHECK: %[[VAL_34:.*]] = getelementptr inbounds i32, i32* %[[VAL_33]], i32 %[[VAL_15]] -// CHECK: %[[VAL_35:.*]] = load i32, i32* %[[VAL_34]], align 4, !invariant.load !4, !noalias !5 +// CHECK: %[[VAL_35:.*]] = load i32, i32* %[[VAL_34]], align 4, !invariant.load !4 // CHECK: store i32 %[[VAL_35]], i32* %[[VAL_32]], align 4 // CHECK: %[[VAL_36:.*]] = load i32, i32* %[[VAL_32]], align 4 // CHECK: store atomic i32 %[[VAL_36]], i32* %[[VAL_31]] unordered, align 4 @@ -48,9 +48,6 @@ // CHECK: !2 = !{i32 0, i32 1} // CHECK: !3 = !{i32 0, i32 6} // CHECK: !4 = !{} -// CHECK: !5 = !{!6} -// CHECK: !6 = !{!"buffer: {index:0, offset:0, size:36}", !7} -// CHECK: !7 = !{!"XLA global AA domain"} HloModule TensorFlowScatterV1 @@ -75,7 +72,7 @@ ENTRY main { // ----- -// CHECK-LABEL: define void @scatter_ScatterIntoScalar(i8* align 64 dereferenceable(4) %alloc0, i8* align 16 dereferenceable(4) %alloc1, i8* align 16 dereferenceable(4) %alloc2, i8* align 16 %alloc3) { +// CHECK-LABEL: define void @scatter_ScatterIntoScalar(i8* noalias align 64 dereferenceable(4) %alloc0, i8* noalias align 16 dereferenceable(4) %alloc1, i8* noalias align 16 dereferenceable(4) %alloc2, i8* noalias align 16 %alloc3) { // CHECK: entry: // CHECK: %[[VAL_60:.*]] = alloca i32, align 4 // CHECK: %[[VAL_37:.*]] = getelementptr inbounds i8, i8* %[[VAL_38:.*]], i64 0 @@ -101,7 +98,7 @@ ENTRY main { // CHECK: scatter.in_bounds-after: ; preds = %[[VAL_59]], %[[VAL_55]] // CHECK: br label %[[VAL_56]] // CHECK: scatter.in_bounds-true: ; preds = %[[VAL_55]] -// CHECK: %[[VAL_61:.*]] = load i32, i32* %[[VAL_48]], align 4, !invariant.load !3, !noalias !4 +// CHECK: %[[VAL_61:.*]] = load i32, i32* %[[VAL_48]], align 4, !invariant.load !3 // CHECK: store i32 %[[VAL_61]], i32* %[[VAL_60]], align 4 // CHECK: %[[VAL_62:.*]] = load i32, i32* %[[VAL_60]], align 4 // CHECK: store atomic i32 %[[VAL_62]], i32* %[[VAL_39]] unordered, align 4 @@ -111,9 +108,6 @@ ENTRY main { // CHECK: !1 = !{void (i8*, i8*, i8*, i8*)* @scatter_ScatterIntoScalar, !"reqntidx", i32 1} // CHECK: !2 = !{i32 0, i32 1} // CHECK: !3 = !{} -// CHECK: !4 = !{!5} -// CHECK: !5 = !{!"buffer: {index:0, offset:0, size:4}", !6} -// CHECK: !6 = !{!"XLA global AA domain"} HloModule ScatterIntoScalar @@ -137,7 +131,7 @@ ENTRY main { // ----- -// CHECK-LABEL: define void @scatter_TensorFlowScatter_Mul(i8* align 64 dereferenceable(36) %alloc0, i8* align 16 dereferenceable(36) %alloc1, i8* align 16 dereferenceable(24) %alloc2, i8* align 16 dereferenceable(8) %alloc3) { +// CHECK-LABEL: define void @scatter_TensorFlowScatter_Mul(i8* noalias align 64 dereferenceable(36) %alloc0, i8* noalias align 16 dereferenceable(36) %alloc1, i8* noalias align 16 dereferenceable(24) %alloc2, i8* noalias align 16 dereferenceable(8) %alloc3) { // CHECK: %[[VAL_63:.*]] = alloca i32, align 4 // CHECK: %[[VAL_64:.*]] = alloca i32, align 4 // CHECK: %[[VAL_98:.*]] = alloca i32, align 4 @@ -164,7 +158,7 @@ ENTRY main { // CHECK: ret void // CHECK: scatter_TensorFlowScatter_Mul.in_bounds-true: ; preds = %[[VAL_89]] // CHECK: %[[VAL_90:.*]] = getelementptr inbounds [2 x i32], [2 x i32]* %[[VAL_73]], i32 0, i32 %[[VAL_84]] -// CHECK: %[[VAL_91:.*]] = load i32, i32* %[[VAL_90]], align 4, !invariant.load !4, !noalias !5 +// CHECK: %[[VAL_91:.*]] = load i32, i32* %[[VAL_90]], align 4, !invariant.load !4 // CHECK: %[[VAL_92:.*]] = add i32 0, %[[VAL_91]] // CHECK: %[[VAL_93:.*]] = icmp ult i32 %[[VAL_91]], 3 // CHECK: %[[VAL_94:.*]] = and i1 true, %[[VAL_93]] @@ -175,7 +169,7 @@ ENTRY main { // CHECK: %[[VAL_97:.*]] = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* %[[VAL_67]], i32 0, i32 %[[VAL_92]], i32 %[[VAL_83]] // CHECK: %[[VAL_99:.*]] = bitcast [2 x [3 x i32]]* %[[VAL_76]] to i32* // CHECK: %[[VAL_100:.*]] = getelementptr inbounds i32, i32* %[[VAL_99]], i32 %[[VAL_80]] -// CHECK: %[[VAL_101:.*]] = load i32, i32* %[[VAL_100]], align 4, !invariant.load !4, !noalias !5 +// CHECK: %[[VAL_101:.*]] = load i32, i32* %[[VAL_100]], align 4, !invariant.load !4 // CHECK: store i32 %[[VAL_101]], i32* %[[VAL_98]], align 4 // CHECK: %[[VAL_102:.*]] = load i32, i32* %[[VAL_98]], align 4 // CHECK: %[[VAL_103:.*]] = load i32, i32* %[[VAL_97]], align 4 @@ -199,15 +193,6 @@ ENTRY main { // CHECK: !2 = !{i32 0, i32 1} // CHECK: !3 = !{i32 0, i32 6} // CHECK: !4 = !{} -// CHECK: !5 = !{!6} -// CHECK: !6 = !{!"buffer: {index:0, offset:0, size:36}", !7} -// CHECK: !7 = !{!"XLA global AA domain"} -// CHECK: !8 = !{!9} -// CHECK: !9 = !{!"buffer: {index:4, offset:0, size:4}", !7} -// CHECK: !10 = !{!11} -// CHECK: !11 = !{!"buffer: {index:6, offset:0, size:4}", !7} -// CHECK: !12 = !{!13} -// CHECK: !13 = !{!"buffer: {index:5, offset:0, size:4}", !7} HloModule TensorFlowScatter_Mul @@ -231,7 +216,7 @@ ENTRY main { // ----- -// CHECK-LABEL: define void @scatter_ScalarUpdate(i8* align 64 dereferenceable(16) %alloc0, i8* align 16 dereferenceable(16) %alloc1, i8* align 16 dereferenceable(4) %alloc2, i8* align 16 dereferenceable(4) %alloc3) { +// CHECK-LABEL: define void @scatter_ScalarUpdate(i8* noalias align 64 dereferenceable(16) %alloc0, i8* noalias align 16 dereferenceable(16) %alloc1, i8* noalias align 16 dereferenceable(4) %alloc2, i8* noalias align 16 dereferenceable(4) %alloc3) { // CHECK: entry: // CHECK: %[[VAL_146:.*]] = alloca i32, align 4 // CHECK: %[[VAL_118:.*]] = getelementptr inbounds i8, i8* %[[VAL_119:.*]], i64 0 @@ -253,7 +238,7 @@ ENTRY main { // CHECK: scatter_ScalarUpdate.in_bounds-after: ; preds = %[[VAL_138:.*]], %[[VAL_139:.*]] // CHECK: ret void // CHECK: scatter_ScalarUpdate.in_bounds-true: ; preds = %[[VAL_139]] -// CHECK: %[[VAL_140:.*]] = load i32, i32* %[[VAL_126]], align 4, !invariant.load !3, !noalias !4 +// CHECK: %[[VAL_140:.*]] = load i32, i32* %[[VAL_126]], align 4, !invariant.load !3 // CHECK: %[[VAL_141:.*]] = add i32 0, %[[VAL_140]] // CHECK: %[[VAL_142:.*]] = icmp ult i32 %[[VAL_140]], 4 // CHECK: %[[VAL_143:.*]] = and i1 true, %[[VAL_142]] @@ -262,7 +247,7 @@ ENTRY main { // CHECK: br label %[[VAL_137]] // CHECK: scatter.in_bounds-true: ; preds = %[[VAL_136]] // CHECK: %[[VAL_145:.*]] = getelementptr inbounds [4 x i32], [4 x i32]* %[[VAL_120]], i32 0, i32 %[[VAL_141]] -// CHECK: %[[VAL_147:.*]] = load i32, i32* %[[VAL_129]], align 4, !invariant.load !3, !noalias !4 +// CHECK: %[[VAL_147:.*]] = load i32, i32* %[[VAL_129]], align 4, !invariant.load !3 // CHECK: store i32 %[[VAL_147]], i32* %[[VAL_146]], align 4 // CHECK: %[[VAL_148:.*]] = load i32, i32* %[[VAL_146]], align 4 // CHECK: store atomic i32 %[[VAL_148]], i32* %[[VAL_145]] unordered, align 4 @@ -272,9 +257,6 @@ ENTRY main { // CHECK: !1 = !{void (i8*, i8*, i8*, i8*)* @scatter_ScalarUpdate, !"reqntidx", i32 1} // CHECK: !2 = !{i32 0, i32 1} // CHECK: !3 = !{} -// CHECK: !4 = !{!5} -// CHECK: !5 = !{!"buffer: {index:0, offset:0, size:16}", !6} -// CHECK: !6 = !{!"XLA global AA domain"} HloModule ScalarUpdate From 7fc4740b6fd1964729253c920e9b50579cc425d7 Mon Sep 17 00:00:00 2001 From: Taehee Jeong Date: Tue, 21 Jul 2020 00:10:02 -0700 Subject: [PATCH 0910/2522] [Core ML delegate] Add PAD and MIRROR_PAD op The initial support only comes with 4D support. Due to Core ML's constraint, the padding should be constant and is only allowed for H and W dimensions PiperOrigin-RevId: 322305274 Change-Id: I16fc9c78f952a9a15c2bf4d0f7e4a1c52a9b3b4e --- .../delegates/coreml/builders/op_builder.cc | 5 + .../delegates/coreml/builders/op_factory.h | 3 + .../coreml/builders/pad_op_builder.cc | 99 +++++++++++++++++++ .../coreml/builders/pad_op_builder.h | 55 +++++++++++ .../delegates/coreml/coreml_delegate.mm | 10 +- 5 files changed, 170 insertions(+), 2 deletions(-) create mode 100644 tensorflow/lite/experimental/delegates/coreml/builders/pad_op_builder.cc create mode 100644 tensorflow/lite/experimental/delegates/coreml/builders/pad_op_builder.h diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.cc b/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.cc index 47a2eecb51b..c775f4fdb48 100644 --- a/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.cc +++ b/tensorflow/lite/experimental/delegates/coreml/builders/op_builder.cc @@ -46,8 +46,13 @@ OpBuilder* GraphBuilder::AddBuilder(int builtin_code, const TfLiteNode* node) { return AddBuilder(CreateLogisticOpBuilder, node); case kTfLiteBuiltinMaxPool2d: return AddBuilder(CreateMaxPool2dOpBuilder, node); + case kTfLiteBuiltinMirrorPad: + return AddBuilder(CreateMirrorPadOpBuilder, node); case kTfLiteBuiltinMul: return AddBuilder(CreateMulOpBuilder, node); + case kTfLiteBuiltinPad: + case kTfLiteBuiltinPadv2: + return AddBuilder(CreatePadOpBuilder, node); case kTfLiteBuiltinRelu: return AddBuilder(CreateReluOpBuilder, node); case kTfLiteBuiltinReluN1To1: diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/op_factory.h b/tensorflow/lite/experimental/delegates/coreml/builders/op_factory.h index bc275908d10..4245021fc2f 100644 --- a/tensorflow/lite/experimental/delegates/coreml/builders/op_factory.h +++ b/tensorflow/lite/experimental/delegates/coreml/builders/op_factory.h @@ -32,7 +32,10 @@ OpBuilder* CreateFullyConnectedOpBuilder(GraphBuilder* graph_builder); OpBuilder* CreateHardSwishOpBuilder(GraphBuilder* graph_builder); OpBuilder* CreateLogisticOpBuilder(GraphBuilder* graph_builder); OpBuilder* CreateMaxPool2dOpBuilder(GraphBuilder* graph_builder); +OpBuilder* CreateMirrorPadOpBuilder(GraphBuilder* graph_builder); OpBuilder* CreateMulOpBuilder(GraphBuilder* graph_builder); +// PAD handles PAD and PADV2 together. +OpBuilder* CreatePadOpBuilder(GraphBuilder* graph_builder); OpBuilder* CreateReluOpBuilder(GraphBuilder* graph_builder); OpBuilder* CreateReluN1To1OpBuilder(GraphBuilder* graph_builder); OpBuilder* CreateRelu6OpBuilder(GraphBuilder* graph_builder); diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/pad_op_builder.cc b/tensorflow/lite/experimental/delegates/coreml/builders/pad_op_builder.cc new file mode 100644 index 00000000000..10ec8c32f4e --- /dev/null +++ b/tensorflow/lite/experimental/delegates/coreml/builders/pad_op_builder.cc @@ -0,0 +1,99 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/lite/experimental/delegates/coreml/builders/pad_op_builder.h" + +#include "tensorflow/lite/builtin_ops.h" +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/experimental/delegates/coreml/builders/op_factory.h" +#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "tensorflow/lite/kernels/kernel_util.h" + +namespace tflite { +namespace delegates { +namespace coreml { + +const char* PadOpBuilder::DebugName() { + if (str_debug_name_[0]) return str_debug_name_; + GetDebugName(padding_type_ == PadType::kPad ? "PadOpBuilder (PAD)" + : "PadOpBuilder (MIRROR_PAD)", + node_id_, str_debug_name_); + return str_debug_name_; +} + +CoreML::Specification::NeuralNetworkLayer* PadOpBuilder::Build() { + layer_->set_name(DebugName()); + if (padding_type_ == PadType::kPad) { + layer_->mutable_padding()->mutable_constant(); + } else if (padding_type_ == PadType::kMirrorPad) { + layer_->mutable_padding()->mutable_reflection(); + } + return layer_.release(); +} + +// padding is d x 2 tensor, where d is the dimension of input. +// only paddings for width and height are considered. +void PadOpBuilder::SetPadding(const TfLiteTensor* padding) { + const int32_t* padding_data = GetTensorData(padding); + for (int i = 1; i <= 2; ++i) { + auto* borderamount = layer_->mutable_padding() + ->mutable_paddingamounts() + ->add_borderamounts(); + borderamount->set_startedgesize(padding_data[i * 2]); + borderamount->set_endedgesize(padding_data[i * 2 + 1]); + } +} + +void PadOpBuilder::SetConstantValue(const TfLiteTensor* constant_value) { + layer_->mutable_padding()->mutable_constant()->set_value( + GetTensorData(constant_value)[0]); +} + +TfLiteStatus PadOpBuilder::RegisterInputs(const TfLiteIntArray* inputs, + TfLiteContext* context) { + if (!(inputs->size == 2 || inputs->size == 3)) { + TF_LITE_KERNEL_LOG(context, "Wrong # of inputs to Padding!."); + return kTfLiteError; + } + AddInput(inputs->data[0]); + SetPadding(GetInput(context, tflite_node_, 1)); + if (inputs->size == 3) { + SetConstantValue(GetInput(context, tflite_node_, 2)); + } + + return kTfLiteOk; +} + +TfLiteStatus PadOpBuilder::RegisterOutputs(const TfLiteIntArray* outputs, + TfLiteContext* context) { + if (outputs->size != 1) { + TF_LITE_KERNEL_LOG(context, "Wrong # of outputs to Padding!."); + return kTfLiteError; + } + graph_builder_->AddTensorWithID(outputs->data[0], GetOutput(context)); + return kTfLiteOk; +} + +OpBuilder* CreatePadOpBuilder(GraphBuilder* graph_builder) { + return new PadOpBuilder(graph_builder, PadType::kPad); +} + +OpBuilder* CreateMirrorPadOpBuilder(GraphBuilder* graph_builder) { + return new PadOpBuilder(graph_builder, PadType::kMirrorPad); +} + +} // namespace coreml +} // namespace delegates +} // namespace tflite diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/pad_op_builder.h b/tensorflow/lite/experimental/delegates/coreml/builders/pad_op_builder.h new file mode 100644 index 00000000000..3fb949a3fb7 --- /dev/null +++ b/tensorflow/lite/experimental/delegates/coreml/builders/pad_op_builder.h @@ -0,0 +1,55 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_COREML_BUILDERS_PAD_OP_BUILDER_H_ +#define TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_COREML_BUILDERS_PAD_OP_BUILDER_H_ + +#include "tensorflow/lite/builtin_ops.h" +#include "tensorflow/lite/experimental/delegates/coreml/builders/op_builder.h" + +namespace tflite { +namespace delegates { +namespace coreml { + +enum class PadType { kPad, kMirrorPad }; + +// Supports PAD, PADV2, MIRROR_PAD +class PadOpBuilder : public OpBuilder { + public: + explicit PadOpBuilder(GraphBuilder* graph_builder, PadType padding_type) + : OpBuilder(graph_builder), padding_type_(padding_type) {} + + const char* DebugName() override; + + CoreML::Specification::NeuralNetworkLayer* Build() override; + + TfLiteStatus RegisterInputs(const TfLiteIntArray* inputs, + TfLiteContext* context) override; + + TfLiteStatus RegisterOutputs(const TfLiteIntArray* outputs, + TfLiteContext* context) override; + + void SetPadding(const TfLiteTensor* padding); + + void SetConstantValue(const TfLiteTensor* constant_value); + + private: + PadType padding_type_; +}; + +} // namespace coreml +} // namespace delegates +} // namespace tflite + +#endif // TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_COREML_BUILDERS_PAD_OP_BUILDER_H_ diff --git a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.mm b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.mm index 3c6d6c57f5f..5b7e18dbe6b 100644 --- a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.mm +++ b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.mm @@ -106,10 +106,17 @@ bool IsNodeSupportedByDelegate(const TfLiteRegistration* registration, const TfL const auto* params = reinterpret_cast(node->builtin_data); return params != nullptr && params->activation == kTfLiteActNone; } + case kTfLiteBuiltinMirrorPad: { + return true; // TODO(b/153495339): will be updated in follow-up CL + } case kTfLiteBuiltinMul: { return node->builtin_data != nullptr && delegates::coreml::IsBinaryOpSupported(registration, node, context); } + case kTfLiteBuiltinPad: + case kTfLiteBuiltinPadv2: { + return true; // TODO(b/153495339): will be updated in follow-up CL + } case kTfLiteBuiltinRelu: { return true; } @@ -241,8 +248,7 @@ TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) { delegated_nodes.size(), partition_helper.num_total_nodes(), partition_helper.num_partitions()); return context->ReplaceNodeSubsetsWithDelegateKernels( - context, GetCoreMlKernelRegistration(), BuildTfLiteIntArray(delegated_nodes).get(), - delegate); + context, GetCoreMlKernelRegistration(), BuildTfLiteIntArray(delegated_nodes).get(), delegate); } TfLiteDelegate* CreateCoreMlDelegate(const TfLiteCoreMlDelegateOptions* options) { From 417cf2c35261c62bd736cda24fb9a09ff0af83dd Mon Sep 17 00:00:00 2001 From: Jing Pu Date: Tue, 21 Jul 2020 00:36:12 -0700 Subject: [PATCH 0911/2522] Internal change PiperOrigin-RevId: 322308044 Change-Id: I6ae5a489a783ff3fbb6888ba5e8557e2188ec944 --- third_party/mlir/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD index 2d091b04b85..12f73c54ec6 100644 --- a/third_party/mlir/BUILD +++ b/third_party/mlir/BUILD @@ -3271,6 +3271,7 @@ cc_library( ":QuantPassIncGen", ":SideEffectInterfaces", ":StandardOps", + ":TransformUtils", "@llvm-project//llvm:Support", ], ) From 3873154276b5d9f78fd82cedd658c808f4ea9c7b Mon Sep 17 00:00:00 2001 From: Taehee Jeong Date: Tue, 21 Jul 2020 00:58:22 -0700 Subject: [PATCH 0912/2522] [Core ML delegate] Validation for padding ops The initial support only comes with 4D support. Due to Core ML's constraint, the padding should be constant and is only allowed for H and W dimensions PiperOrigin-RevId: 322310845 Change-Id: I8cd4d65c49f6300211899e678d1ac811aaaf0385 --- .../delegates/coreml/builders/op_validator.h | 4 ++ .../coreml/builders/pad_op_builder.cc | 44 +++++++++++++++++++ .../delegates/coreml/coreml_delegate.mm | 4 +- 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/op_validator.h b/tensorflow/lite/experimental/delegates/coreml/builders/op_validator.h index 501a304706c..b099fd7493a 100644 --- a/tensorflow/lite/experimental/delegates/coreml/builders/op_validator.h +++ b/tensorflow/lite/experimental/delegates/coreml/builders/op_validator.h @@ -31,6 +31,10 @@ bool IsDepthwiseConvolutionOpSupported(const TfLiteRegistration* registration, bool IsFullyConnectedOpSupported(const TfLiteRegistration* registration, const TfLiteNode* node, TfLiteContext* context); +bool IsMirrorPadOpSupported(const TfLiteRegistration* registration, + const TfLiteNode* node, TfLiteContext* context); +bool IsPadOpSupported(const TfLiteRegistration* registration, + const TfLiteNode* node, TfLiteContext* context); bool IsReshapeOpSupported(const TfLiteRegistration* registration, const TfLiteNode* node, TfLiteContext* context, int coreml_version); diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/pad_op_builder.cc b/tensorflow/lite/experimental/delegates/coreml/builders/pad_op_builder.cc index 10ec8c32f4e..d8ef4f61ddb 100644 --- a/tensorflow/lite/experimental/delegates/coreml/builders/pad_op_builder.cc +++ b/tensorflow/lite/experimental/delegates/coreml/builders/pad_op_builder.cc @@ -94,6 +94,50 @@ OpBuilder* CreateMirrorPadOpBuilder(GraphBuilder* graph_builder) { return new PadOpBuilder(graph_builder, PadType::kMirrorPad); } +bool IsPadOpSupported(const TfLiteRegistration* registration, + const TfLiteNode* node, TfLiteContext* context) { + // padding is d x 2 tensor, where d is the dimension of input. + const TfLiteTensor* padding = GetInput(context, node, 1); + if (!IsConstantTensor(padding)) { + TF_LITE_KERNEL_LOG(context, + "%s: Only constant padding is supported for PAD.", + padding->name); + return false; + } + if (padding->dims->data[0] != 4 || padding->dims->data[1] != 2) { + TF_LITE_KERNEL_LOG(context, "%s: Only 4D inputs are supported for PAD.", + padding->name); + return false; + } + const int32_t* padding_data = GetTensorData(padding); + if (!(padding_data[0] == 0 && padding_data[1] == 0)) { + TF_LITE_KERNEL_LOG( + context, "%s: Padding for batch dimension is not supported in PAD.", + padding->name); + return false; + } + + if (!(padding_data[6] == 0 && padding_data[7] == 0)) { + TF_LITE_KERNEL_LOG( + context, "%s: Padding for channel dimension is not supported in PAD.", + padding->name); + return false; + } + return true; +} + +bool IsMirrorPadOpSupported(const TfLiteRegistration* registration, + const TfLiteNode* node, TfLiteContext* context) { + auto* params = + reinterpret_cast(node->builtin_data); + if (params->mode != kTfLiteMirrorPaddingReflect) { + TF_LITE_KERNEL_LOG(context, + "Only REFLECT mode is supported for MIRROR_PAD."); + return false; + } + return IsPadOpSupported(registration, node, context); +} + } // namespace coreml } // namespace delegates } // namespace tflite diff --git a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.mm b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.mm index 5b7e18dbe6b..2cca58aa9fc 100644 --- a/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.mm +++ b/tensorflow/lite/experimental/delegates/coreml/coreml_delegate.mm @@ -107,7 +107,7 @@ bool IsNodeSupportedByDelegate(const TfLiteRegistration* registration, const TfL return params != nullptr && params->activation == kTfLiteActNone; } case kTfLiteBuiltinMirrorPad: { - return true; // TODO(b/153495339): will be updated in follow-up CL + return delegates::coreml::IsMirrorPadOpSupported(registration, node, context); } case kTfLiteBuiltinMul: { return node->builtin_data != nullptr && @@ -115,7 +115,7 @@ bool IsNodeSupportedByDelegate(const TfLiteRegistration* registration, const TfL } case kTfLiteBuiltinPad: case kTfLiteBuiltinPadv2: { - return true; // TODO(b/153495339): will be updated in follow-up CL + return delegates::coreml::IsPadOpSupported(registration, node, context); } case kTfLiteBuiltinRelu: { return true; From 308f78ffb33b2831d76704094be7f49e4ce5de85 Mon Sep 17 00:00:00 2001 From: Taehee Jeong Date: Tue, 21 Jul 2020 01:05:38 -0700 Subject: [PATCH 0913/2522] [Core ML delegate] Update documentation regarding padding op support PiperOrigin-RevId: 322311827 Change-Id: I461c37c5d36912a0e649561fb9ab7f87a32abdae --- .../lite/experimental/delegates/coreml/README.md | 14 ++++++++++++++ .../lite/g3doc/performance/coreml_delegate.md | 6 ++++++ 2 files changed, 20 insertions(+) diff --git a/tensorflow/lite/experimental/delegates/coreml/README.md b/tensorflow/lite/experimental/delegates/coreml/README.md index fa2e2a8d68a..1353bf92e48 100644 --- a/tensorflow/lite/experimental/delegates/coreml/README.md +++ b/tensorflow/lite/experimental/delegates/coreml/README.md @@ -72,20 +72,34 @@ Following ops are supported by the Core ML delegate. * Weights and bias should be constant. * DepthwiseConv2D * Weights and bias should be constant. +* FullyConnected (aka Dense or InnerProduct) + * Weights and bias (if present) should be constant. + * Only supports single-batch case. Input dimensions should be 1, except + the last dimension. * Hardswish * Logistic (aka Sigmoid) * MaxPool2D +* MirrorPad + * Only 4D input with `REFLECT` mode is supported. Padding should be + constant, and is only allowed for H and W dimensions. * Mul * Only certain shapes are broadcastable. In Core ML tensor layout, following tensor shapes are broadcastable. `[B, C, H, W]`, `[B, C, 1, 1]`, `[B, 1, H, W]`, `[B, 1, 1, 1]`. +* Pad and PadV2 + * Only 4D input is supported. Padding should be constant, and is only + allowed for H and W dimensions. * Relu * ReluN1To1 * Relu6 * Reshape + * Only supported when target Core ML version is 2, not supported when + targeting Core ML 3. * ResizeBilinear * SoftMax * Tanh +* TransposeConv + * Weights should be constant. ## FAQ diff --git a/tensorflow/lite/g3doc/performance/coreml_delegate.md b/tensorflow/lite/g3doc/performance/coreml_delegate.md index d3f597eefcb..b9ae477fe14 100644 --- a/tensorflow/lite/g3doc/performance/coreml_delegate.md +++ b/tensorflow/lite/g3doc/performance/coreml_delegate.md @@ -195,10 +195,16 @@ Following ops are supported by the Core ML delegate. * Hardswish * Logistic (aka Sigmoid) * MaxPool2D +* MirrorPad + * Only 4D input with `REFLECT` mode is supported. Padding should be + constant, and is only allowed for H and W dimensions. * Mul * Only certain shapes are broadcastable. In Core ML tensor layout, following tensor shapes are broadcastable. `[B, C, H, W]`, `[B, C, 1, 1]`, `[B, 1, H, W]`, `[B, 1, 1, 1]`. +* Pad and PadV2 + * Only 4D input is supported. Padding should be constant, and is only + allowed for H and W dimensions. * Relu * ReluN1To1 * Relu6 From e6895cb883a63b4b6a46776cdcf43d160c80a5fd Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Mon, 20 Jul 2020 12:24:22 +0700 Subject: [PATCH 0914/2522] stat_cache for RandomAccessFile/LoadBufferFromGCS --- .../filesystem/plugins/gcs/gcs_filesystem.cc | 72 +++++++++++++++---- 1 file changed, 58 insertions(+), 14 deletions(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc index 44a8ffb57bd..d693673c209 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc @@ -106,11 +106,12 @@ static void MaybeAppendSlash(std::string* name) { // A helper function to actually read the data from GCS. static int64_t LoadBufferFromGCS(const std::string& path, size_t offset, size_t buffer_size, char* buffer, - gcs::Client* gcs_client, TF_Status* status) { + tf_gcs_filesystem::GCSFile* gcs_file, + TF_Status* status) { std::string bucket, object; ParseGCSPath(path, false, &bucket, &object, status); if (TF_GetCode(status) != TF_OK) return -1; - auto stream = gcs_client->ReadObject( + auto stream = gcs_file->gcs_client.ReadObject( bucket, object, gcs::ReadRange(offset, offset + buffer_size)); TF_SetStatusFromGCSStatus(stream.status(), status); if ((TF_GetCode(status) != TF_OK) && @@ -120,11 +121,28 @@ static int64_t LoadBufferFromGCS(const std::string& path, size_t offset, int64_t read; if (!absl::SimpleAtoi(stream.headers().find("content-length")->second, &read)) { - TF_SetStatus(status, TF_UNKNOWN, "Could not get content-length header"); - return -1; + if (TF_GetCode(status) == TF_OUT_OF_RANGE) { + read = 0; + } else { + TF_SetStatus(status, TF_UNKNOWN, "Could not get content-length header"); + return -1; + } } stream.read(buffer, read); - return stream.gcount(); + read = stream.gcount(); + if (read < buffer_size) { + // Check stat cache to see if we encountered an interrupted read. + tf_gcs_filesystem::GcsFileStat stat; + if (gcs_file->stat_cache->Lookup(path, &stat)) { + if (offset + read < stat.base.length) { + TF_SetStatus(status, TF_INTERNAL, + absl::StrCat("File contents are inconsistent for file: ", + path, " @ ", offset) + .c_str()); + } + } + } + return read; } // SECTION 1. Implementation for `TF_RandomAccessFile` @@ -405,13 +423,12 @@ GCSFile::GCSFile(google::cloud::storage::Client&& gcs_client) max_staleness = value; } - auto gcs_client_ptr = &this->gcs_client; file_block_cache = std::make_unique( block_size, max_bytes, max_staleness, - [gcs_client_ptr](const std::string& filename, size_t offset, - size_t buffer_size, char* buffer, TF_Status* status) { - return LoadBufferFromGCS(filename, offset, buffer_size, buffer, - gcs_client_ptr, status); + [this](const std::string& filename, size_t offset, size_t buffer_size, + char* buffer, TF_Status* status) { + return LoadBufferFromGCS(filename, offset, buffer_size, buffer, this, + status); }); uint64_t stat_cache_max_age = kStatCacheDefaultMaxAge; @@ -443,6 +460,19 @@ void Cleanup(TF_Filesystem* filesystem) { delete gcs_file; } +static void UncachedStatForObject(const std::string& bucket, + const std::string& object, GcsFileStat* stat, + gcs::Client* gcs_client, TF_Status* status) { + auto metadata = gcs_client->GetObjectMetadata(bucket, object); + if (!metadata) return TF_SetStatusFromGCSStatus(metadata.status(), status); + stat->generation_number = metadata->generation(); + stat->base.length = metadata->size(); + stat->base.mtime_nsec = + metadata->time_storage_class_updated().time_since_epoch().count(); + stat->base.is_directory = object.back() == '/'; + return TF_SetStatus(status, TF_OK, ""); +} + // TODO(vnvo2409): Implement later void NewRandomAccessFile(const TF_Filesystem* filesystem, const char* path, TF_RandomAccessFile* file, TF_Status* status) { @@ -456,17 +486,31 @@ void NewRandomAccessFile(const TF_Filesystem* filesystem, const char* path, absl::MutexLock l(&gcs_file->block_cache_lock); is_cache_enabled = gcs_file->file_block_cache->IsCacheEnabled(); } - auto read_fn = [gcs_file, is_cache_enabled]( + auto read_fn = [gcs_file, is_cache_enabled, bucket, object]( const std::string& path, uint64_t offset, size_t n, char* buffer, TF_Status* status) -> int64_t { - // TODO(vnvo2409): Check for `stat_cache`. int64_t read = 0; if (is_cache_enabled) { absl::ReaderMutexLock l(&gcs_file->block_cache_lock); + GcsFileStat stat; + gcs_file->stat_cache->LookupOrCompute( + path, &stat, + [gcs_file, bucket, object](const std::string& path, GcsFileStat* stat, + TF_Status* status) { + UncachedStatForObject(bucket, object, stat, &gcs_file->gcs_client, + status); + }, + status); + if (TF_GetCode(status) != TF_OK) return -1; + if (!gcs_file->file_block_cache->ValidateAndUpdateFileSignature( + path, stat.generation_number)) { + std::cout + << "File signature has been changed. Refreshing the cache. Path: " + << path; + } read = gcs_file->file_block_cache->Read(path, offset, n, buffer, status); } else { - read = LoadBufferFromGCS(path, offset, n, buffer, &gcs_file->gcs_client, - status); + read = LoadBufferFromGCS(path, offset, n, buffer, gcs_file, status); } if (TF_GetCode(status) != TF_OK) return -1; if (read < n) From 4343d75c5153470198d970166497fb5b9c4961f5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Jul 2020 02:01:45 -0700 Subject: [PATCH 0915/2522] compat: Update forward compatibility horizon to 2020-07-21 PiperOrigin-RevId: 322317384 Change-Id: I5d2077a8b252286cd518aa35408edd3b0ac91fa1 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index c804dbfc786..9d79a790cfc 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 20) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 21) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 3f05eefa39d0d91d62099defe25d35baef6552d5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Jul 2020 02:01:46 -0700 Subject: [PATCH 0916/2522] Update GraphDef version to 469. PiperOrigin-RevId: 322317389 Change-Id: I6a1e12c625a66b8c6654087048a3117de0fa4329 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 28ead43a284..12c7579fa7d 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 468 // Updated: 2020/7/20 +#define TF_GRAPH_DEF_VERSION 469 // Updated: 2020/7/21 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 8599f29645b1eb90a4a18d328e92c97976862f02 Mon Sep 17 00:00:00 2001 From: Tres Popp Date: Tue, 21 Jul 2020 02:06:21 -0700 Subject: [PATCH 0917/2522] Change cast to dyn_cast in hlo::ReshapeOp's verification. With cast, a failing verification results in an assertion error rather than returning a failing status. PiperOrigin-RevId: 322317937 Change-Id: Ia5bae056a2876a0cab28233d5523ae2137e0c7d8 --- tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc index ee898828b76..411c6583861 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc @@ -1468,7 +1468,7 @@ static LogicalResult Verify(PadOp op) { static LogicalResult Verify(ReshapeOp op) { // If the operand type is dynamically shaped there is nothing to verify. - auto operand_ty = op.operand().getType().cast(); + auto operand_ty = op.operand().getType().dyn_cast(); if (!operand_ty || !operand_ty.hasStaticShape()) return success(); // If the operand type is statically shaped (not required) the number of From b9b7a299310b6818c54fb27688859d1a9b950747 Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Tue, 21 Jul 2020 17:34:06 +0800 Subject: [PATCH 0918/2522] modifies based on reviews --- tensorflow/core/lib/io/record_reader.cc | 17 +++++++- tensorflow/core/lib/io/record_reader.h | 11 +++-- .../core/lib/io/record_reader_writer_test.cc | 43 +++++++++++++++++-- 3 files changed, 62 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/lib/io/record_reader.cc b/tensorflow/core/lib/io/record_reader.cc index f4fc2089065..3492d851a77 100644 --- a/tensorflow/core/lib/io/record_reader.cc +++ b/tensorflow/core/lib/io/record_reader.cc @@ -208,11 +208,13 @@ Status RecordReader::ReadRecord(uint64* offset, tstring* record) { return Status::OK(); } -Status RecordReader::SkipRecords(uint64* offset, int num_to_skip) { +Status RecordReader::SkipRecords(uint64* offset, int num_to_skip, + int* num_skipped) { TF_RETURN_IF_ERROR(PositionInputStream(*offset)); Status s; tstring record; + *num_skipped = 0; for (int i = 0; i < num_to_skip; ++i) { s = ReadChecksummed(*offset, sizeof(uint64), &record); if (!s.ok()) { @@ -220,10 +222,21 @@ Status RecordReader::SkipRecords(uint64* offset, int num_to_skip) { return s; } const uint64 length = core::DecodeFixed64(record.data()); - input_stream_->SkipNBytes(length + kFooterSize); + + // Skip data + s = input_stream_->SkipNBytes(length + kFooterSize); + if (!s.ok()) { + last_read_failed_ = true; + if (errors::IsOutOfRange(s)) { + s = errors::DataLoss("truncated record at ", *offset); + } + return s; + } *offset += kHeaderSize + length + kFooterSize; DCHECK_EQ(*offset, input_stream_->Tell()); + (*num_skipped)++; } + return Status::OK(); } SequentialRecordReader::SequentialRecordReader( diff --git a/tensorflow/core/lib/io/record_reader.h b/tensorflow/core/lib/io/record_reader.h index 3d27d7d26a4..34080b954e3 100644 --- a/tensorflow/core/lib/io/record_reader.h +++ b/tensorflow/core/lib/io/record_reader.h @@ -100,8 +100,9 @@ class RecordReader { // Skip num_to_skip record starting at "*offset" and update *offset // to point to the offset of the next num_to_skip + 1 record. // Return OK on success, OUT_OF_RANGE for end of file, or something - // else for an error. - Status SkipRecords(uint64* offset, int num_to_skip); + // else for an error. "*num_skipped" records the number of records that + // are actually skipped. It should be equal to num_to_skip on success. + Status SkipRecords(uint64* offset, int num_to_skip, int* num_skipped); // Return the metadata of the Record file. // @@ -148,8 +149,10 @@ class SequentialRecordReader { // Skip the next num_to_skip record in the file. Return OK on success, // OUT_OF_RANGE for end of file, or something else for an error. - Status SkipRecords(int num_to_skip) { - return underlying_.SkipRecords(&offset_, num_to_skip); + // "*num_skipped" records the number of records that are actually skipped. + // It should be equal to num_to_skip on success. + Status SkipRecords(int num_to_skip, int* num_skipped) { + return underlying_.SkipRecords(&offset_, num_to_skip, num_skipped); } // Return the current offset in the file. diff --git a/tensorflow/core/lib/io/record_reader_writer_test.cc b/tensorflow/core/lib/io/record_reader_writer_test.cc index 6a314388690..60f22cab6c5 100644 --- a/tensorflow/core/lib/io/record_reader_writer_test.cc +++ b/tensorflow/core/lib/io/record_reader_writer_test.cc @@ -158,9 +158,9 @@ TEST(RecordReaderWriterTest, TestBasics) { } } -TEST(RecordReaderWriterTest, TestSkip) { +TEST(RecordReaderWriterTest, TestSkipBasic) { Env* env = Env::Default(); - string fname = testing::TmpDir() + "/record_reader_writer_skip_est"; + string fname = testing::TmpDir() + "/record_reader_writer_skip_basic_test"; for (auto buf_size : BufferSizes()) { { @@ -184,14 +184,51 @@ TEST(RecordReaderWriterTest, TestSkip) { options.zlib_options.input_buffer_size = buf_size; io::RecordReader reader(read_file.get(), options); uint64 offset = 0; + int num_skipped; tstring record; - TF_CHECK_OK(reader.SkipRecords(&offset, 2)); + TF_CHECK_OK(reader.SkipRecords(&offset, 2, &num_skipped)); + EXPECT_EQ(2, num_skipped); TF_CHECK_OK(reader.ReadRecord(&offset, &record)); EXPECT_EQ("hij", record); } } } +TEST(RecordReaderWriterTest, TestSkipOutOfRange) { + Env* env = Env::Default(); + string fname = testing::TmpDir() + + "/record_reader_writer_skip_out_of_range_test"; + + for (auto buf_size : BufferSizes()) { + { + std::unique_ptr file; + TF_CHECK_OK(env->NewWritableFile(fname, &file)); + + io::RecordWriterOptions options; + options.zlib_options.output_buffer_size = buf_size; + io::RecordWriter writer(file.get(), options); + TF_EXPECT_OK(writer.WriteRecord("abc")); + TF_EXPECT_OK(writer.WriteRecord("defg")); + TF_CHECK_OK(writer.Flush()); + } + + { + std::unique_ptr read_file; + // Read it back with the RecordReader. + TF_CHECK_OK(env->NewRandomAccessFile(fname, &read_file)); + io::RecordReaderOptions options; + options.zlib_options.input_buffer_size = buf_size; + io::RecordReader reader(read_file.get(), options); + uint64 offset = 0; + int num_skipped; + tstring record; + Status s = reader.SkipRecords(&offset, 3, &num_skipped); + EXPECT_EQ(2, num_skipped); + EXPECT_EQ(error::OUT_OF_RANGE, s.code()); + } + } +} + TEST(RecordReaderWriterTest, TestSnappy) { Env* env = Env::Default(); string fname = testing::TmpDir() + "/record_reader_writer_snappy_test"; From 76e62bedc47716f02a7e3c3b19d8fe4461780aaa Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Jul 2020 03:04:25 -0700 Subject: [PATCH 0919/2522] Integrate LLVM at llvm/llvm-project@30371df85f3e Updates LLVM usage to match [30371df85f3e](https://github.com/llvm/llvm-project/commit/30371df85f3e) PiperOrigin-RevId: 322324674 Change-Id: I373b6992e8e9672efc5dcdb31327f1711068ba59 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index a515b3e8939..a1ec3dcb0be 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -711,8 +711,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "fc24d1eaddd8c0618e3ef3ab395029a0238d4568" - LLVM_SHA256 = "da5ad5dcf9d5360bc5c3715419bf0e747dba37d61725f85765a6e24844354212" + LLVM_COMMIT = "30371df85f3e6fc2352647566f92c9079f77b2c7" + LLVM_SHA256 = "432d415dfa3de87f64e147aa0db7f432c707692bdd53d524f8d7185435380864" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From edeb44f86e89c617e9bd7a68129fde91d8d1dd50 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 21 Jul 2020 16:07:54 +0700 Subject: [PATCH 0920/2522] Fix problems related to `TF_OUT_OF_RANGE` --- .../filesystem/plugins/gcs/gcs_filesystem.cc | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc index d693673c209..d543a845524 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc @@ -121,6 +121,9 @@ static int64_t LoadBufferFromGCS(const std::string& path, size_t offset, int64_t read; if (!absl::SimpleAtoi(stream.headers().find("content-length")->second, &read)) { + // When we read a file with offset that is bigger than the actual file size. + // GCS will return an empty header (e.g no `content-length` header). In this + // case, we will set read to `0` and continue. if (TF_GetCode(status) == TF_OUT_OF_RANGE) { read = 0; } else { @@ -128,6 +131,8 @@ static int64_t LoadBufferFromGCS(const std::string& path, size_t offset, return -1; } } + // `TF_OUT_OF_RANGE` isn't considered as an error. So we clear it here. + TF_SetStatus(status, TF_OK, ""); stream.read(buffer, read); read = stream.gcount(); if (read < buffer_size) { @@ -216,13 +221,13 @@ int64_t Read(const TF_RandomAccessFile* file, uint64_t offset, size_t n, (std::min)(n - copy_size, gcs_file->buffer.size()); memcpy(buffer + copy_size, gcs_file->buffer.data(), remaining_copy); copy_size += remaining_copy; - if (copy_size < n) { - // Forget the end-of-file flag to allow for clients that poll on the - // same file. - gcs_file->buffer_end_is_past_eof = false; - TF_SetStatus(status, TF_OUT_OF_RANGE, "Read less bytes than requested"); - return copy_size; - } + } + if (copy_size < n) { + // Forget the end-of-file flag to allow for clients that poll on the + // same file. + gcs_file->buffer_end_is_past_eof = false; + TF_SetStatus(status, TF_OUT_OF_RANGE, "Read less bytes than requested"); + return copy_size; } TF_SetStatus(status, TF_OK, ""); return copy_size; From 35c3d223d224395c1db1979d9ad8492fc693b29e Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Mon, 20 Jul 2020 13:48:20 +0700 Subject: [PATCH 0921/2522] Add init for test and remove init from setup. --- .../filesystem/plugins/gcs/gcs_filesystem.cc | 35 +++++++++++++++++++ .../filesystem/plugins/gcs/gcs_filesystem.h | 11 ++++++ .../plugins/gcs/gcs_filesystem_test.cc | 24 ++++++++++--- 3 files changed, 66 insertions(+), 4 deletions(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc index d543a845524..e8b8f905d48 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc @@ -448,6 +448,41 @@ GCSFile::GCSFile(google::cloud::storage::Client&& gcs_client) stat_cache_max_age, stat_cache_max_entries); } +GCSFile::GCSFile(google::cloud::storage::Client&& gcs_client, bool compose, + uint64_t block_size, size_t max_bytes, uint64_t max_staleness, + uint64_t stat_cache_max_age, size_t stat_cache_max_entries) + : gcs_client(gcs_client), + compose(compose), + block_size(block_size), + block_cache_lock() { + file_block_cache = std::make_unique( + block_size, max_bytes, max_staleness, + [this](const std::string& filename, size_t offset, size_t buffer_size, + char* buffer, TF_Status* status) { + return LoadBufferFromGCS(filename, offset, buffer_size, buffer, this, + status); + }); + stat_cache = std::make_unique>( + stat_cache_max_age, stat_cache_max_entries); +} + +void InitTest(TF_Filesystem* filesystem, bool compose, uint64_t block_size, + size_t max_bytes, uint64_t max_staleness, + uint64_t stat_cache_max_age, size_t stat_cache_max_entries, + TF_Status* status) { + google::cloud::StatusOr client = + gcs::Client::CreateDefaultClient(); + if (!client) { + TF_SetStatusFromGCSStatus(client.status(), status); + return; + } + + filesystem->plugin_filesystem = + new GCSFile(std::move(client.value()), compose, block_size, max_bytes, + max_staleness, stat_cache_max_age, stat_cache_max_entries); + TF_SetStatus(status, TF_OK, ""); +} + void Init(TF_Filesystem* filesystem, TF_Status* status) { google::cloud::StatusOr client = gcs::Client::CreateDefaultClient(); diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.h b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.h index 93f5d99d5da..973ce9e9dc2 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.h +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.h @@ -62,8 +62,19 @@ typedef struct GCSFile { // of block_size. std::unique_ptr> stat_cache; GCSFile(google::cloud::storage::Client&& gcs_client); + // This constructor is used for testing purpose only. + GCSFile(google::cloud::storage::Client&& gcs_client, bool compose, + uint64_t block_size, size_t max_bytes, uint64_t max_staleness, + uint64_t stat_cache_max_age, size_t stat_cache_max_entries); } GCSFile; +// This function is used to initialize a filesystem without the need of setting +// manually environement variables. +void InitTest(TF_Filesystem* filesystem, bool compose, uint64_t block_size, + size_t max_bytes, uint64_t max_staleness, + uint64_t stat_cache_max_age, size_t stat_cache_max_entries, + TF_Status* status); + void Init(TF_Filesystem* filesystem, TF_Status* status); void Cleanup(TF_Filesystem* filesystem); void NewRandomAccessFile(const TF_Filesystem* filesystem, const char* path, diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem_test.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem_test.cc index 0e3c855d6c6..140dc719348 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem_test.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem_test.cc @@ -66,6 +66,9 @@ static std::string* GetTmpDir() { namespace tensorflow { namespace { +// TODO(vnvo2409): Refactor `gcs_filesystem_test` to remove unnecessary tests +// after porting all tests from +// `//tensorflow/core/platform/cloud:gcs_file_system_test`. class GCSFilesystemTest : public ::testing::Test { public: void SetUp() override { @@ -74,13 +77,14 @@ class GCSFilesystemTest : public ::testing::Test { ::testing::UnitTest::GetInstance()->current_test_info()->name()); status_ = TF_NewStatus(); filesystem_ = new TF_Filesystem; - tf_gcs_filesystem::Init(filesystem_, status_); - ASSERT_TF_OK(status_) << "Could not initialize filesystem. " - << TF_Message(status_); + filesystem_->plugin_filesystem = nullptr; + // Because different tests requires different setup for filesystem. We + // initialize filesystem in each testcase. } void TearDown() override { TF_DeleteStatus(status_); - tf_gcs_filesystem::Cleanup(filesystem_); + if (filesystem_->plugin_filesystem != nullptr) + tf_gcs_filesystem::Cleanup(filesystem_); delete filesystem_; } @@ -172,6 +176,9 @@ TEST_F(GCSFilesystemTest, ParseGCSPath) { } TEST_F(GCSFilesystemTest, RandomAccessFile) { + tf_gcs_filesystem::Init(filesystem_, status_); + ASSERT_TF_OK(status_) << "Could not initialize filesystem. " + << TF_Message(status_); std::string filepath = GetURIForPath("a_file"); TF_RandomAccessFile* file = new TF_RandomAccessFile; tf_gcs_filesystem::NewRandomAccessFile(filesystem_, filepath.c_str(), file, @@ -208,6 +215,9 @@ TEST_F(GCSFilesystemTest, RandomAccessFile) { } TEST_F(GCSFilesystemTest, WritableFile) { + tf_gcs_filesystem::Init(filesystem_, status_); + ASSERT_TF_OK(status_) << "Could not initialize filesystem. " + << TF_Message(status_); std::string filepath = GetURIForPath("a_file"); TF_WritableFile* file = new TF_WritableFile; tf_gcs_filesystem::NewWritableFile(filesystem_, filepath.c_str(), file, @@ -273,6 +283,9 @@ TEST_F(GCSFilesystemTest, WritableFile) { } TEST_F(GCSFilesystemTest, ReadOnlyMemoryRegion) { + tf_gcs_filesystem::Init(filesystem_, status_); + ASSERT_TF_OK(status_) << "Could not initialize filesystem. " + << TF_Message(status_); std::string path = GetURIForPath("a_file"); auto gcs_file = static_cast(filesystem_->plugin_filesystem); @@ -298,6 +311,9 @@ TEST_F(GCSFilesystemTest, ReadOnlyMemoryRegion) { delete region; } +// These tests below are ported from +// `//tensorflow/core/platform/cloud:gcs_file_system_test` + } // namespace } // namespace tensorflow From 72e7b50945f52e6d9e1ca4dfb0a5b361466091c3 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 21 Jul 2020 01:04:56 +0700 Subject: [PATCH 0922/2522] Add test RandomAccessFile_NoBlockCache --- .../plugins/gcs/gcs_filesystem_test.cc | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem_test.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem_test.cc index 140dc719348..ff64ac52ee2 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem_test.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem_test.cc @@ -121,6 +121,21 @@ class GCSFilesystemTest : public ::testing::Test { } } +::testing::AssertionResult InsertObject(const std::string& path, + const std::string& content, + gcs::Client* gcs_client, + TF_Status* status) { + std::string bucket, object; + ParseGCSPath(path, false, &bucket, &object, status); + if (TF_GetCode(status) != TF_OK) + return ::testing::AssertionFailure() << TF_Message(status); + auto metadata = gcs_client->InsertObject(bucket, object, content); + if (metadata) + return ::testing::AssertionSuccess(); + else + return ::testing::AssertionFailure() << metadata.status().message(); +} + ::testing::AssertionResult CompareSubString(int64_t offset, size_t length, absl::string_view result, size_t read) { @@ -313,6 +328,33 @@ TEST_F(GCSFilesystemTest, ReadOnlyMemoryRegion) { // These tests below are ported from // `//tensorflow/core/platform/cloud:gcs_file_system_test` +TEST_F(GCSFilesystemTest, RandomAccessFile_NoBlockCache) { + tf_gcs_filesystem::InitTest(filesystem_, false, 0, 0, 0, 0, 0, status_); + ASSERT_TF_OK(status_) << "Could not initialize filesystem. " + << TF_Message(status_); + std::string path = GetURIForPath("a_file"); + auto gcs_file = + static_cast(filesystem_->plugin_filesystem); + ASSERT_TRUE(InsertObject(path, "0123456789", &gcs_file->gcs_client, status_)); + + TF_RandomAccessFile* file = new TF_RandomAccessFile; + tf_gcs_filesystem::NewRandomAccessFile(filesystem_, path.c_str(), file, + status_); + ASSERT_TF_OK(status_); + + std::string result; + result.resize(6); + int64_t read = tf_random_access_file::Read(file, 0, 6, &result[0], status_); + ASSERT_EQ(read, 6) << "Read: " << read << "\n"; + ASSERT_TF_OK(status_); + ASSERT_EQ(result, "012345") << "Result: " << result << "\n"; + + read = tf_random_access_file::Read(file, 6, 6, &result[0], status_); + ASSERT_EQ(read, 4) << "Read: " << read << "\n"; + ASSERT_EQ(TF_GetCode(status_), TF_OUT_OF_RANGE) << TF_Message(status_); + result.resize(read); + ASSERT_EQ(result, "6789") << "Result: " << result << "\n"; +} } // namespace } // namespace tensorflow From b6d9b11d41b7ab7dbeef4f8f59a38f2066b4cbd4 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 21 Jul 2020 01:13:24 +0700 Subject: [PATCH 0923/2522] Add test NewRandomAccessFile_Buffered --- .../plugins/gcs/gcs_filesystem_test.cc | 30 ++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem_test.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem_test.cc index ff64ac52ee2..729f1e387dc 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem_test.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem_test.cc @@ -328,7 +328,7 @@ TEST_F(GCSFilesystemTest, ReadOnlyMemoryRegion) { // These tests below are ported from // `//tensorflow/core/platform/cloud:gcs_file_system_test` -TEST_F(GCSFilesystemTest, RandomAccessFile_NoBlockCache) { +TEST_F(GCSFilesystemTest, NewRandomAccessFile_NoBlockCache) { tf_gcs_filesystem::InitTest(filesystem_, false, 0, 0, 0, 0, 0, status_); ASSERT_TF_OK(status_) << "Could not initialize filesystem. " << TF_Message(status_); @@ -356,6 +356,34 @@ TEST_F(GCSFilesystemTest, RandomAccessFile_NoBlockCache) { ASSERT_EQ(result, "6789") << "Result: " << result << "\n"; } +TEST_F(GCSFilesystemTest, NewRandomAccessFile_Buffered) { + tf_gcs_filesystem::InitTest(filesystem_, false, 10, 0, 0, 0, 0, status_); + ASSERT_TF_OK(status_) << "Could not initialize filesystem. " + << TF_Message(status_); + std::string path = GetURIForPath("a_file"); + auto gcs_file = + static_cast(filesystem_->plugin_filesystem); + ASSERT_TRUE(InsertObject(path, "0123456789", &gcs_file->gcs_client, status_)); + + TF_RandomAccessFile* file = new TF_RandomAccessFile; + tf_gcs_filesystem::NewRandomAccessFile(filesystem_, path.c_str(), file, + status_); + ASSERT_TF_OK(status_); + + std::string result; + result.resize(6); + int64_t read = tf_random_access_file::Read(file, 0, 6, &result[0], status_); + ASSERT_EQ(read, 6) << "Read: " << read << "\n"; + ASSERT_TF_OK(status_); + ASSERT_EQ(result, "012345") << "Result: " << result << "\n"; + + read = tf_random_access_file::Read(file, 6, 6, &result[0], status_); + ASSERT_EQ(read, 4) << "Read: " << read << "\n"; + ASSERT_EQ(TF_GetCode(status_), TF_OUT_OF_RANGE) << TF_Message(status_); + result.resize(read); + ASSERT_EQ(result, "6789") << "Result: " << result << "\n"; +} + } // namespace } // namespace tensorflow From e8e12d5efc6e93fd243b99a0d938827712560933 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 21 Jul 2020 01:21:45 +0700 Subject: [PATCH 0924/2522] Add test NewRandomAccessFile_Buffered_ReadAtEOF --- .../plugins/gcs/gcs_filesystem_test.cc | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem_test.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem_test.cc index 729f1e387dc..0944750284c 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem_test.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem_test.cc @@ -384,6 +384,34 @@ TEST_F(GCSFilesystemTest, NewRandomAccessFile_Buffered) { ASSERT_EQ(result, "6789") << "Result: " << result << "\n"; } +TEST_F(GCSFilesystemTest, NewRandomAccessFile_Buffered_ReadAtEOF) { + tf_gcs_filesystem::InitTest(filesystem_, false, 10, 0, 0, 0, 0, status_); + ASSERT_TF_OK(status_) << "Could not initialize filesystem. " + << TF_Message(status_); + std::string path = GetURIForPath("a_file"); + auto gcs_file = + static_cast(filesystem_->plugin_filesystem); + ASSERT_TRUE(InsertObject(path, "0123456789", &gcs_file->gcs_client, status_)); + + TF_RandomAccessFile* file = new TF_RandomAccessFile; + tf_gcs_filesystem::NewRandomAccessFile(filesystem_, path.c_str(), file, + status_); + ASSERT_TF_OK(status_); + + std::string result; + result.resize(10); + int64_t read = tf_random_access_file::Read(file, 0, result.length(), &result[0], status_); + ASSERT_EQ(read, 10) << "Read: " << read << "\n"; + ASSERT_TF_OK(status_); + ASSERT_EQ(result, "0123456789") << "Result: " << result << "\n"; + + read = tf_random_access_file::Read(file, result.length(), result.length(), &result[0], status_); + ASSERT_EQ(read, 0) << "Read: " << read << "\n"; + ASSERT_EQ(TF_GetCode(status_), TF_OUT_OF_RANGE) << TF_Message(status_); + result.resize(read); + ASSERT_EQ(result, "") << "Result: " << result << "\n"; +} + } // namespace } // namespace tensorflow From 9d3aad8797f2068974fb0269609fe644c15aadd5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Jul 2020 05:30:44 -0700 Subject: [PATCH 0925/2522] Integrate LLVM at llvm/llvm-project@becaa6803ab5 Updates LLVM usage to match [becaa6803ab5](https://github.com/llvm/llvm-project/commit/becaa6803ab5) PiperOrigin-RevId: 322341078 Change-Id: I9a57a9d3f8aecf932cac0a98f0c902912c5a315a --- tensorflow/workspace.bzl | 4 ++-- third_party/llvm/llvm.autogenerated.BUILD | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index a1ec3dcb0be..5d25b38b159 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -711,8 +711,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "30371df85f3e6fc2352647566f92c9079f77b2c7" - LLVM_SHA256 = "432d415dfa3de87f64e147aa0db7f432c707692bdd53d524f8d7185435380864" + LLVM_COMMIT = "becaa6803ab532d15506829f0551a5fa49c39d7e" + LLVM_SHA256 = "4dd3797959716010c355ee327b7649b98e1da491e198421d258b5c515d677a40" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), diff --git a/third_party/llvm/llvm.autogenerated.BUILD b/third_party/llvm/llvm.autogenerated.BUILD index b1b8d92a8a3..fb1eff30e06 100644 --- a/third_party/llvm/llvm.autogenerated.BUILD +++ b/third_party/llvm/llvm.autogenerated.BUILD @@ -3188,6 +3188,7 @@ cc_library( ]), copts = llvm_copts, deps = [ + ":BinaryFormat", ":DebugInfoCodeView", ":MC", ":Object", From 5484b4be2bd7e2019e4ec3e6d5a97dfa47aaeb4e Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 21 Jul 2020 19:44:03 +0700 Subject: [PATCH 0926/2522] Add NewRandomAccessFile_Buffered_CachedOutOfRange --- .../plugins/gcs/gcs_filesystem_test.cc | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem_test.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem_test.cc index 0944750284c..dc49c067dfa 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem_test.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem_test.cc @@ -412,6 +412,40 @@ TEST_F(GCSFilesystemTest, NewRandomAccessFile_Buffered_ReadAtEOF) { ASSERT_EQ(result, "") << "Result: " << result << "\n"; } +TEST_F(GCSFilesystemTest, NewRandomAccessFile_Buffered_CachedOutOfRange) { + tf_gcs_filesystem::InitTest(filesystem_, false, 10, 0, 0, 0, 0, status_); + ASSERT_TF_OK(status_) << "Could not initialize filesystem. " + << TF_Message(status_); + std::string path = GetURIForPath("a_file"); + auto gcs_file = + static_cast(filesystem_->plugin_filesystem); + ASSERT_TRUE(InsertObject(path, "012345678", &gcs_file->gcs_client, status_)); + + TF_RandomAccessFile* file = new TF_RandomAccessFile; + tf_gcs_filesystem::NewRandomAccessFile(filesystem_, path.c_str(), file, + status_); + ASSERT_TF_OK(status_); + + std::string result; + result.resize(5); + int64_t read = tf_random_access_file::Read(file, 0, result.length(), &result[0], status_); + ASSERT_EQ(read, 5) << "Read: " << read << "\n"; + ASSERT_TF_OK(status_); + ASSERT_EQ(result, "01234") << "Result: " << result << "\n"; + + read = tf_random_access_file::Read(file, 4, result.length(), &result[0], status_); + ASSERT_EQ(read, 5) << "Read: " << read << "\n"; + ASSERT_TF_OK(status_); + result.resize(read); + ASSERT_EQ(result, "45678") << "Result: " << result << "\n"; + + read = tf_random_access_file::Read(file, 5, result.length(), &result[0], status_); + ASSERT_EQ(read, 4) << "Read: " << read << "\n"; + ASSERT_EQ(TF_GetCode(status_), TF_OUT_OF_RANGE) << TF_Message(status_); + result.resize(read); + ASSERT_EQ(result, "5678") << "Result: " << result << "\n"; +} + } // namespace } // namespace tensorflow From 73e38363a236f984f0aa20bbee82875f0a4eb204 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Tue, 21 Jul 2020 06:09:00 -0700 Subject: [PATCH 0927/2522] Add __array__ method to `tf.Variable`. `np.array` converts a `tf.Tensor`. It should work on a `tf.Variable` too. This seems like the right class to add it to since this is the class that implements `.numpy()` for Variable. ``` >>> np.array(tf.Variable([1.0])) array([1.], dtype=float32) ``` PiperOrigin-RevId: 322345276 Change-Id: I66a6e19f66ed444f0f0bdbd48e340ef76e443e20 --- .../python/kernel_tests/resource_variable_ops_test.py | 2 ++ tensorflow/python/ops/resource_variable_ops.py | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index edcd8d7a05e..0bc61fc6dac 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -1194,6 +1194,8 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase, v2 = resource_variable_ops.ResourceVariable( initial_value=callable_init, name="var7") self.assertEqual("var7:0", v2.name) + self.assertAllEqual(np.array(v2), v2.read_value().numpy()) + self.assertAllEqual(v2.numpy(), v2.read_value().numpy()) self.assertAllEqual(2 * init.numpy(), v2.read_value().numpy()) # Test assign_add. diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 8f5d056807f..5921d9ac0f6 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -474,6 +474,17 @@ class BaseResourceVariable(variables.VariableV1, core.Tensor): else: yield + def __array__(self): + """Allows direct conversion to a numpy array. + + >>> np.array(tf.Variable([1.0])) + array([1.], dtype=float32) + + Returns: + The variable value as a numpy array. + """ + return self.numpy() + def __nonzero__(self): return self.__bool__() From 5f62654629d64b9e456143519dfca1ee9dbc5f1c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Jul 2020 07:29:04 -0700 Subject: [PATCH 0928/2522] Add __array__ method to `tf.Variable`. `np.array` converts a `tf.Tensor`. It should work on a `tf.Variable` too. This seems like the right class to add it to since this is the class that implements `.numpy()` for Variable. ``` >>> np.array(tf.Variable([1.0])) array([1.], dtype=float32) ``` PiperOrigin-RevId: 322355667 Change-Id: I2ae98c426e54dd96da864dc85b12af0c63f21f72 --- .../python/kernel_tests/resource_variable_ops_test.py | 2 -- tensorflow/python/ops/resource_variable_ops.py | 11 ----------- 2 files changed, 13 deletions(-) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index 0bc61fc6dac..edcd8d7a05e 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -1194,8 +1194,6 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase, v2 = resource_variable_ops.ResourceVariable( initial_value=callable_init, name="var7") self.assertEqual("var7:0", v2.name) - self.assertAllEqual(np.array(v2), v2.read_value().numpy()) - self.assertAllEqual(v2.numpy(), v2.read_value().numpy()) self.assertAllEqual(2 * init.numpy(), v2.read_value().numpy()) # Test assign_add. diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 5921d9ac0f6..8f5d056807f 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -474,17 +474,6 @@ class BaseResourceVariable(variables.VariableV1, core.Tensor): else: yield - def __array__(self): - """Allows direct conversion to a numpy array. - - >>> np.array(tf.Variable([1.0])) - array([1.], dtype=float32) - - Returns: - The variable value as a numpy array. - """ - return self.numpy() - def __nonzero__(self): return self.__bool__() From 636c214203c4b07d048e9d3f0faee9c4386ac5fb Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Tue, 21 Jul 2020 08:06:44 -0700 Subject: [PATCH 0929/2522] [MLIR] IfOp/WhileOp: Convert output_shapes to a derived attribute - Also add canonicalization for IfOp and WhileOp to remove the output_shapes attribute from these ops if its present. - Fix shape inference for IfOp to not rely on tensorflow shape inference function and use a custom function instead to infer IfOp output types/shapes and update unit test to exercise the new path. - Update MLIR->Graphdef export test to verify that output_shapes is emitted in the generated graphdef. PiperOrigin-RevId: 322360899 Change-Id: I1fc1d47f610f8ad915833c3ade43e845adbf252e --- .../compiler/mlir/tensorflow/ir/tf_ops.h | 1 + .../compiler/mlir/tensorflow/ir/tf_ops.td | 28 ++++++++- .../compiler/mlir/tensorflow/ir/tf_ops_a_m.cc | 6 +- .../compiler/mlir/tensorflow/ir/tf_ops_a_m.h | 1 + .../mlir/tensorflow/ir/tf_ops_helpers.inc | 20 ++++++ .../compiler/mlir/tensorflow/ir/tf_ops_n_z.cc | 8 +++ .../compiler/mlir/tensorflow/ir/tf_ops_n_z.h | 1 + .../mlir/tensorflow/tests/canonicalize.mlir | 33 ++++++++++ .../mlir2graphdef/functional-if-ops.mlir | 34 ++++++++-- .../mlir2graphdef/functional-while-ops.mlir | 36 +++++++++-- .../tests/resource-device-inference.mlir | 8 +-- .../tensorflow/tests/resource_op_lifting.mlir | 24 +++---- .../tensorflow/tests/shape_inference.mlir | 9 +-- .../tpu-variable-runtime-reformatting.mlir | 9 +-- .../tests/tpu_space_to_depth_pass.mlir | 2 +- .../region_control_flow_to_functional.cc | 3 +- .../transforms/resource_op_lifting.cc | 10 +-- .../tensorflow/transforms/shape_inference.cc | 63 ++++++++++++------- .../transforms/stack_ops_decomposition.cc | 8 --- .../tensor_array_ops_decomposition.cc | 4 -- .../tensor_list_ops_decomposition.cc | 8 --- .../tpu_variable_runtime_reformatting.cc | 10 --- 22 files changed, 220 insertions(+), 106 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h index d06dce81e09..039ed1bc3a8 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h @@ -23,6 +23,7 @@ limitations under the License. #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Dialect.h" // from @llvm-project +#include "mlir/IR/Function.h" // from @llvm-project #include "mlir/IR/Matchers.h" // from @llvm-project #include "mlir/IR/Module.h" // from @llvm-project #include "mlir/IR/OpImplementation.h" // from @llvm-project diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td index 71b30ae8090..d230a24afdc 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td @@ -225,12 +225,25 @@ else_branch: A function that takes 'inputs' and returns a list of TF_DerivedOperandTypeAttr Tcond = TF_DerivedOperandTypeAttr<0>; TF_DerivedOperandTypeListAttr Tin = TF_DerivedOperandTypeListAttr<1>; TF_DerivedResultTypeListAttr Tout = TF_DerivedResultTypeListAttr<0>; + TF_DerivedResultShapeListAttr output_shapes = TF_DerivedResultShapeListAttr<0>; let verifier = [{ return Verify(*this); }]; let hasCanonicalizer = 1; + + let extraClassDeclaration = [{ + // Get the then branch function. + FuncOp then_func() { + return getParentOfType().lookupSymbol(then_branch()); + } + + // Get the else branch function. + FuncOp else_func() { + return getParentOfType().lookupSymbol(else_branch()); + } + }]; } def TF_YieldOp : TF_Op<"Yield", @@ -612,7 +625,6 @@ body: A function that takes a list of tensors and returns another FlatSymbolRefAttr:$cond, FlatSymbolRefAttr:$body, - DefaultValuedAttr:$output_shapes, DefaultValuedAttr:$parallel_iterations, // Used to map StatelessWhile and While op defined in TensorFlow to a common @@ -625,10 +637,24 @@ body: A function that takes a list of tensors and returns another ); TF_DerivedOperandTypeListAttr T = TF_DerivedOperandTypeListAttr<0>; + TF_DerivedResultShapeListAttr output_shapes = TF_DerivedResultShapeListAttr<0>; let verifier = [{ return Verify(*this); }]; + let hasCanonicalizer = 1; + + let extraClassDeclaration = [{ + // Get the condition function. + FuncOp cond_func() { + return getParentOfType().lookupSymbol(cond()); + } + + // Get the body function. + FuncOp body_func() { + return getParentOfType().lookupSymbol(body()); + } + }]; } def TL_WhileRegionOp : TF_Op<"WhileRegion", diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc index f4f9ec42864..6183dde8581 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc @@ -1615,6 +1615,10 @@ static LogicalResult Verify(IfOp op) { return success(); } +//===----------------------------------------------------------------------===// +// IfOp canonicalization. +//===----------------------------------------------------------------------===// + class FoldConstantIfOp : public OpRewritePattern { public: explicit FoldConstantIfOp(MLIRContext *context) @@ -1662,7 +1666,7 @@ LogicalResult FoldConstantIfOp::matchAndRewrite( void IfOp::getCanonicalizationPatterns(OwningRewritePatternList &results, MLIRContext *context) { - results.insert(context); + results.insert>(context); } //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h index b2b78da8993..19a927a23d7 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h @@ -20,6 +20,7 @@ limitations under the License. #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Dialect.h" // from @llvm-project +#include "mlir/IR/Function.h" // from @llvm-project #include "mlir/IR/Matchers.h" // from @llvm-project #include "mlir/IR/Module.h" // from @llvm-project #include "mlir/IR/OpImplementation.h" // from @llvm-project diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc index 33d51301208..71f1560aa6c 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc @@ -578,3 +578,23 @@ LogicalResult VerifyRegionResults(Operation *op, Region ®ion, } return success(); } + +//===----------------------------------------------------------------------===// +// Function control flow canonicalization. +//===----------------------------------------------------------------------===// + +// Eliminate attributes that are not needed, but can get attached to Ops +// during import. +template +struct DropAttributes : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + // Drop the "output_shapes" attribute. + LogicalResult matchAndRewrite(Op op, + PatternRewriter &rewriter) const override { + bool found = op.removeAttr("output_shapes") == + MutableDictionaryAttr::RemoveResult::Removed; + return success(found); + } +}; + diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc index 0d9b2610492..564db91eed7 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc @@ -2065,6 +2065,14 @@ static LogicalResult Verify(WhileOp op) { return success(); } +//===----------------------------------------------------------------------===// +// WhileOp canonicalization. +//===----------------------------------------------------------------------===// +void WhileOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert>(context); +} + //===----------------------------------------------------------------------===// // WhileRegionOp //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h index b6e9222a370..761c06a475c 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h @@ -20,6 +20,7 @@ limitations under the License. #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Dialect.h" // from @llvm-project +#include "mlir/IR/Function.h" // from @llvm-project #include "mlir/IR/Matchers.h" // from @llvm-project #include "mlir/IR/Module.h" // from @llvm-project #include "mlir/IR/OpImplementation.h" // from @llvm-project diff --git a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir index 17a19c50998..6830f3e1e7e 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir @@ -985,3 +985,36 @@ func @testWhileRegionUnusedValue(%arg0 : tensor<*xf32>, %arg1 : tensor, %ar // CHECK: return %[[WHILE_OUT]]#0 : tensor<*xf32> return %0#0 : tensor<*xf32> } + +// Check that output_shapes attribute is removed for tf.If +func @testIfThen(tensor<*xf32>) -> tensor<*xf32> +func @testIfElse(tensor<*xf32>) -> tensor<*xf32> +// CHECK-LABEL: func @testIfDropOutputShapes +func @testIfDropOutputShapes(tensor, tensor<2xf32>) -> tensor<2xf32> { +^bb0(%arg0: tensor, %arg1: tensor<2xf32>): + // CHECK: "tf.If" + // CHECK-NOT: output_shapes + %1 = "tf.If"(%arg0, %arg1) { + then_branch = @testIfThen, else_branch = @testIfElse, is_stateless = false, output_shapes = [#tf.shape<>] + } : (tensor, tensor<2xf32>) -> tensor<2xf32> + + return %1 : tensor<2xf32> +} + +// Check that output_shapes attribute is removed for tf.Whileß +func @testWhileCond(tensor<*xf32>) -> (tensor) +func @testWhileBody(tensor<*xf32>) -> (tensor<*xf32>) +// CHECK-LABEL: func @testWhileDropOutputShapes +func @testWhileDropOutputShapes(tensor<*xf32>) -> (tensor<*xf32>) { +^bb0(%arg0: tensor<*xf32>): + // CHECK: "tf.While" + // CHECK-NOT: output_shapes + %1 = "tf.While"(%arg0) { + cond = @testWhileCond, + body = @testWhileBody, + is_stateless = false, + output_shapes = [#tf.shape<>] + } : (tensor<*xf32>) -> (tensor<*xf32>) + + return %1 : tensor<*xf32> +} diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-if-ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-if-ops.mlir index d9ad36f2ce6..b6933459382 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-if-ops.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-if-ops.mlir @@ -1,13 +1,13 @@ // RUN: tf-mlir-translate -mlir-to-graphdef %s -o - | FileCheck %s -func @main(%arg0: tensor, %arg1: tensor) -> (tensor, tensor) { +func @main(%arg0: tensor, %arg1: tensor, %arg2: tensor<4xf32>, %arg3: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) { %0:2 = tf_executor.graph { %outputs_2, %control_3 = tf_executor.island wraps "tf.Less"(%arg0, %arg1) : (tensor, tensor) -> tensor - %outputs_4, %control_5 = tf_executor.island wraps "tf.If"(%outputs_2, %arg0, %arg1) {else_branch = @cond_false, is_stateless = false, then_branch = @cond_true} : (tensor, tensor, tensor) -> tensor loc("StatefulIf") - %outputs_6, %control_7 = tf_executor.island wraps "tf.If"(%outputs_2, %arg0, %arg1) {else_branch = @cond_false, is_stateless = true, then_branch = @cond_true} : (tensor, tensor, tensor) -> tensor loc("StatelessIf") - tf_executor.fetch %outputs_4, %outputs_6 : tensor, tensor + %outputs_4, %control_5 = tf_executor.island wraps "tf.If"(%outputs_2, %arg2, %arg3) {else_branch = @cond_false, is_stateless = false, then_branch = @cond_true} : (tensor, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> loc("StatefulIf") + %outputs_6, %control_7 = tf_executor.island wraps "tf.If"(%outputs_2, %arg2, %arg3) {else_branch = @cond_false, is_stateless = true, then_branch = @cond_true} : (tensor, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> loc("StatelessIf") + tf_executor.fetch %outputs_4, %outputs_6 : tensor<4xf32>, tensor<4xf32> } - return %0#0, %0#1 : tensor, tensor + return %0#0, %0#1 : tensor<4xf32>, tensor<4xf32> } func @cond_true(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>) -> tensor<*xf32> { @@ -34,8 +34,32 @@ func @cond_false(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>) -> tensor<*xf32> { // CHECK-NOT: name: // CHECK: op: "If" // CHECK-NOT: is_stateless +// CHECK: attr { +// CHECK: key: "output_shapes" +// CHECK: value { +// CHECK: list { +// CHECK: shape { +// CHECK: dim { +// CHECK: size: 4 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } // CHECK: name: "StatelessIf" // CHECK-NOT: name: // CHECK: op: "StatelessIf" // CHECK-NOT: is_stateless +// CHECK: attr { +// CHECK: key: "output_shapes" +// CHECK: value { +// CHECK: list { +// CHECK: shape { +// CHECK: dim { +// CHECK: size: 4 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-while-ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-while-ops.mlir index 9f14a144d9d..c7a4630d985 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-while-ops.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-while-ops.mlir @@ -1,12 +1,12 @@ // RUN: tf-mlir-translate -mlir-to-graphdef %s -o - | FileCheck %s -func @main(%arg0: tensor, %arg1: tensor) -> (tensor, tensor) { +func @main(%arg0: tensor, %arg1: tensor<5xf32>) -> (tensor<5xf32>, tensor<5xf32>) { %0:2 = tf_executor.graph { - %outputs_2:2, %control_3 = tf_executor.island wraps "tf.While"(%arg0, %arg1) {body = @body, cond = @cond, is_stateless = false} : (tensor, tensor) -> (tensor, tensor) loc("StatefulWhile") - %outputs_4:2, %control_5 = tf_executor.island wraps "tf.While"(%arg0, %arg1) {body = @body, cond = @cond, is_stateless = true} : (tensor, tensor) -> (tensor, tensor) loc("StatelessWhile") - tf_executor.fetch %outputs_2#1, %outputs_4#1 : tensor, tensor + %outputs_2:2, %control_3 = tf_executor.island wraps "tf.While"(%arg0, %arg1) {body = @body, cond = @cond, is_stateless = false} : (tensor, tensor<5xf32>) -> (tensor, tensor<5xf32>) loc("StatefulWhile") + %outputs_4:2, %control_5 = tf_executor.island wraps "tf.While"(%arg0, %arg1) {body = @body, cond = @cond, is_stateless = true} : (tensor, tensor<5xf32>) -> (tensor, tensor<5xf32>) loc("StatelessWhile") + tf_executor.fetch %outputs_2#1, %outputs_4#1 : tensor<5xf32>, tensor<5xf32> } - return %0#0, %0#1 : tensor, tensor + return %0#0, %0#1 : tensor<5xf32>, tensor<5xf32> } func @cond(%arg0: tensor<*xi32>, %arg1: tensor<*xf32>) -> tensor { @@ -36,8 +36,34 @@ func @body(%arg0: tensor<*xi32>, %arg1: tensor<*xf32>) -> (tensor<*xi32>, tensor // CHECK-NOT: name: // CHECK: op: "While" // CHECK-NOT: is_stateless +// CHECK: attr { +// CHECK: key: "output_shapes" +// CHECK: value { +// CHECK: list { +// CHECK: shape { +// CHECK: dim { +// CHECK: size: 5 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } + // CHECK: name: "StatelessWhile" // CHECK-NOT: name: // CHECK: op: "StatelessWhile" // CHECK-NOT: is_stateless +// CHECK: attr { +// CHECK: key: "output_shapes" +// CHECK: value { +// CHECK: list { +// CHECK: shape { +// CHECK: dim { +// CHECK: size: 5 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } + diff --git a/tensorflow/compiler/mlir/tensorflow/tests/resource-device-inference.mlir b/tensorflow/compiler/mlir/tensorflow/tests/resource-device-inference.mlir index a9e814c647e..a4a7c1dad2e 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/resource-device-inference.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/resource-device-inference.mlir @@ -56,7 +56,7 @@ func @propagate_if_op( "tf.If"(%arg1, %id0, %var_handle) { then_branch = @if_then, else_branch = @if_else, - output_shapes = [], is_stateless = false} + is_stateless = false} : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> () tf_executor.yield @@ -128,8 +128,7 @@ func @propagate_while_op( // CHECK-NEXT: "tf.While" "tf.While"(%arg1, %id0, %var_handle) { body = @while_body, - cond = @while_cond, - output_shapes = [], is_stateless = false} + cond = @while_cond, is_stateless = false} : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> (tensor, tensor<*x!tf.resource>>, @@ -209,8 +208,7 @@ func @error_on_conflict_multiple_callers( : () -> tensor<*x!tf.resource>> "tf.If"(%arg1, %id0, %var_handle) { then_branch = @if_then_and_else, - else_branch = @if_then_and_else, - output_shapes = [], is_stateless = false} + else_branch = @if_then_and_else, is_stateless = false} : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> () "tf.If"(%arg1, %var_handle, %id0) { diff --git a/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir b/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir index cd93e1423ea..d8a87b9bdb4 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir @@ -147,8 +147,7 @@ func @cluster_with_loop() -> () { "tf_device.cluster"() ( { // CHECK: %[[WHILE:.*]]:2 = "tf.While"(%[[COUNT]], %[[READ]]) %2:3 = "tf.While"(%0, %1, %unused) - {body = @while_body, cond = @while_cond, device = "", is_stateless = false, - output_shapes = [#tf.shape<>, #tf.shape<>]} + {body = @while_body, cond = @while_cond, device = "", is_stateless = false} : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) // CHECK: tf_device.return %[[WHILE]]#1 : tensor @@ -197,8 +196,7 @@ func @cluster_with_loop() -> () { "tf_device.cluster"() ( { // CHECK: %[[WHILE:.*]] = "tf.While"(%[[READ]]) %1 = "tf.While"(%0) { - body = @while_body, cond = @while_cond, device = "", is_stateless = false, - output_shapes = [#tf.shape<>]} + body = @while_body, cond = @while_cond, device = "", is_stateless = false} : (tensor<*x!tf.resource>>) -> (tensor<*x!tf.resource>>) // CHECK: tf_device.return %[[WHILE]] : tensor @@ -239,8 +237,7 @@ func @cluster_with_loop() -> () { "tf_device.cluster"() ( { // CHECK: %[[WHILE:.*]] = "tf.While"(%[[READ]]) %1 = "tf.While"(%0) { - body = @while_body, cond = @while_cond, device = "", is_stateless = false, - output_shapes = [#tf.shape<>]} + body = @while_body, cond = @while_cond, device = "", is_stateless = false} : (tensor<*x!tf.resource>>) -> (tensor<*x!tf.resource>>) // CHECK: tf_device.return @@ -278,8 +275,7 @@ func @cluster_with_nested_loop() -> () { "tf_device.cluster"() ( { // CHECK: %[[WHILE:.*]] = "tf.While"(%[[READ]]) %2:2 = "tf.While"(%0, %1) { - body = @while_body, cond = @while_cond, device = "", is_stateless = false, - output_shapes = [#tf.shape<>, #tf.shape<>]} + body = @while_body, cond = @while_cond, device = "", is_stateless = false} : (tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> (tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) // CHECK: tf_device.return %[[WHILE]] : tensor @@ -295,8 +291,7 @@ func @while_body(%arg0: tensor<*x!tf.resource>>, %arg1: tensor<*x!tf -> (tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) { // CHECK: %[[WHILE:.*]] = "tf.While"(%[[BARG0]]) %0:2 = "tf.While"(%arg0, %arg1) { - body = @while_body1, cond = @while_cond1, device = "", is_stateless = false, - output_shapes = [#tf.shape<>, #tf.shape<>]} + body = @while_body1, cond = @while_cond1, device = "", is_stateless = false} : (tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> (tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) // CHECK-NEXT: return %[[WHILE]] @@ -334,8 +329,7 @@ func @cluster_with_loop() -> () { %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>> "tf_device.cluster"() ( { %1 = "tf.While"(%0) { - body = @while_body, cond = @while_cond, device = "", is_stateless = false, - output_shapes = [#tf.shape<>]} + body = @while_body, cond = @while_cond, device = "", is_stateless = false} : (tensor<*x!tf.resource>>) -> (tensor<*x!tf.resource>>) tf_device.return }) {cluster_attr = "cluster_attr"} : () -> () @@ -359,8 +353,7 @@ func @cluster_with_loop() -> () { %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>> "tf_device.cluster"() ( { %1 = "tf.While"(%0) { - body = @while_body, cond = @while_cond, device = "", is_stateless = false, - output_shapes = [#tf.shape<>]} + body = @while_body, cond = @while_cond, device = "", is_stateless = false} : (tensor<*x!tf.resource>>) -> (tensor<*x!tf.resource>>) tf_device.return }) {cluster_attr = "cluster_attr"} : () -> () @@ -384,8 +377,7 @@ func @cluster_with_loop() -> () { %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>> "tf_device.cluster"() ( { %1 = "tf.While"(%0) { - body = @while_body, cond = @while_cond, device = "", is_stateless = false, - output_shapes = [#tf.shape<>]} + body = @while_body, cond = @while_cond, device = "", is_stateless = false} : (tensor<*x!tf.resource>>) -> (tensor<*x!tf.resource>>) tf_device.return }) {cluster_attr = "cluster_attr"} : () -> () diff --git a/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir b/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir index 5a8f63ec63d..4a5e3c8deaa 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir @@ -100,10 +100,11 @@ func @multiple_blocks_one_return(%arg0: tensor) -> tensor<*xf32> { return %1 : tensor } - // CHECK-LABEL: func @shape_from_if_to_branch_functions - func @shape_from_if_to_branch_functions(%arg0: tensor, %arg1: tensor<1x2x3xf32>) -> tensor<1x2x3xf32> { - %0 = "tf.If"(%arg0, %arg1) {Tcond = i1, Tin = ["tfdtype$DT_FLOAT"], Tout = ["tfdtype$DT_FLOAT"], _xla_propagate_compile_time_consts = true, device = "", else_branch = @if_else_branch, is_stateless = true, name = "if", then_branch = @if_then_branch} : (tensor, tensor<1x2x3xf32>) -> tensor<1x2x3xf32> - return %0 : tensor<1x2x3xf32> + // CHECK-LABEL: func @shape_from_if_to_branch_functions_to_results + // CHECK-SAME: (%arg0: tensor, %arg1: tensor<1x2x3xf32>) -> tensor<1x2x3xf32> + func @shape_from_if_to_branch_functions_to_results(%arg0: tensor, %arg1: tensor<1x2x3xf32>) -> tensor<*xf32> { + %0 = "tf.If"(%arg0, %arg1) {Tcond = i1, Tin = ["tfdtype$DT_FLOAT"], Tout = ["tfdtype$DT_FLOAT"], else_branch = @if_else_branch, is_stateless = true, name = "if", then_branch = @if_then_branch} : (tensor, tensor<1x2x3xf32>) -> tensor<*xf32> + return %0 : tensor<*xf32> } // CHECK-LABEL: func @if_then_branch diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu-variable-runtime-reformatting.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu-variable-runtime-reformatting.mlir index 43be8743e51..1e308b42bfc 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu-variable-runtime-reformatting.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu-variable-runtime-reformatting.mlir @@ -20,8 +20,7 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr {T = ["tfdtype$DT_INT32", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE"], body = @while_body_7560, - cond = @while_cond_7550, device = "", is_stateless = false, - output_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>]} + cond = @while_cond_7550, device = "", is_stateless = false} : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, @@ -217,8 +216,7 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr {T = ["tfdtype$DT_INT32", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE"], body = @while_body_7560, - cond = @while_cond_7550, device = "", is_stateless = false, - output_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>]} + cond = @while_cond_7550, device = "", is_stateless = false} : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, @@ -305,8 +303,7 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr {T = ["tfdtype$DT_INT32", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE"], body = @while_body_7560, - cond = @while_cond_7550, device = "", is_stateless = false, - output_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>]} + cond = @while_cond_7550, device = "", is_stateless = false} : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_space_to_depth_pass.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_space_to_depth_pass.mlir index 199426b1aa9..280986a7ee1 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_space_to_depth_pass.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_space_to_depth_pass.mlir @@ -7,7 +7,7 @@ module attributes {tf.devices = {"/job:localhost/replica:0/task:0/device:CPU:0" %0 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor %1 = "tf.Const"() {value = dense<2> : tensor} : () -> tensor %2 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %3:10 = "tf.While"(%2, %1, %2, %0, %1, %arg2, %arg4, %arg5, %arg6, %arg7) {_lower_using_switch_merge = true, _num_original_outputs = 10 : i64, _read_only_resource_inputs = [], body = @while_body_2710, cond = @while_cond_2700, device = "", is_stateless = false, output_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>], parallel_iterations = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor>>, tensor>>, tensor>>, tensor>>) -> (tensor, tensor, tensor, tensor, tensor, tensor, tensor>>, tensor>>, tensor>>, tensor>>) + %3:10 = "tf.While"(%2, %1, %2, %0, %1, %arg2, %arg4, %arg5, %arg6, %arg7) {_lower_using_switch_merge = true, _num_original_outputs = 10 : i64, _read_only_resource_inputs = [], body = @while_body_2710, cond = @while_cond_2700, device = "", is_stateless = false, parallel_iterations = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor>>, tensor>>, tensor>>, tensor>>) -> (tensor, tensor, tensor, tensor, tensor, tensor, tensor>>, tensor>>, tensor>>, tensor>>) return } // CHECK-LABEL: func @while_body_2710 diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc b/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc index f1004fa049e..2ae2a976767 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc @@ -373,8 +373,7 @@ LogicalResult RegionControlFlowToFunctional::ConvertWhileOp( OpBuilder builder(while_region); auto while_op = builder.create( while_region.getLoc(), new_result_types, new_inputs, cond_name, body_name, - builder.getArrayAttr({}), while_region.parallel_iterations(), - while_region.is_stateless()); + while_region.parallel_iterations(), while_region.is_stateless()); // Redirect old results to new results. for (auto it : llvm::zip( diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc index 74679f19941..a9caeaac50d 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc @@ -627,8 +627,6 @@ LogicalResult HandleWhileLoop(TF::WhileOp while_op, FuncOp body, FuncOp cond) { }); // Recreate the while op. OpBuilder builder(while_op); - auto new_output_shapes = FilterRange>( - while_op.output_shapes().getValue(), resource_arg_uses); // Now use the filtered original operands, which will be replaced by // AddLoadsStoresOutsideControlFlowOp(). auto new_while = builder.create( @@ -636,8 +634,7 @@ LogicalResult HandleWhileLoop(TF::WhileOp while_op, FuncOp body, FuncOp cond) { FilterRange(while_op.getOperands(), resource_arg_uses), while_op.getAttrs()); - // Prepare for AddLoadsStoresOutsideControlFlowOp() and update - // new_output_shapes. + // Prepare for AddLoadsStoresOutsideControlFlowOp(). llvm::SmallDenseMap> arg_data_type_and_updated_output_index; for (const auto& entry : remaining_resource_data_types) { @@ -647,14 +644,9 @@ LogicalResult HandleWhileLoop(TF::WhileOp while_op, FuncOp body, FuncOp cond) { : entry.getFirst(); arg_data_type_and_updated_output_index[entry.getFirst()] = { entry.getSecond(), update_index}; - if (!new_output_shapes.empty()) { - new_output_shapes[entry.getFirst()] = - tensorflow::ConvertTypeToTensorShapeAttr(entry.getSecond()); - } } AddLoadsStoresOutsideControlFlowOp(new_while, arg_data_type_and_updated_output_index); - new_while.setAttr("output_shapes", builder.getArrayAttr(new_output_shapes)); // Replace uses. for (int64_t i = 0; i < old_to_new_indices.size(); ++i) { if (old_to_new_indices[i] >= 0) { diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc index 2551e68dd74..104cce1fb89 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc @@ -262,22 +262,6 @@ bool InferShapeForCall(Operation* op) { return changed; } -// Infer the shape IfRegion outputs based on the shapes of the then and else -// yields. -bool InferShapeForIfRegion(IfRegionOp op) { - bool changed = false; - - Operation* then_yield = op.then_branch().front().getTerminator(); - Operation* else_yield = op.else_branch().front().getTerminator(); - for (auto result : zip(op.getResults(), then_yield->getOperandTypes(), - else_yield->getOperandTypes())) { - // If then and else types do not match, skip refinement for that result. - if (std::get<1>(result) != std::get<2>(result)) continue; - changed = RefineResultType(op, std::get<0>(result), std::get<1>(result)) || - changed; - } - return changed; -} bool InferShapeForCast(CastOp op, Dialect* tf_dialect) { Value result = op.getResult(); if (!CanBeRefined(result.getType())) return false; @@ -306,6 +290,37 @@ bool InferShapeForCast(CastOp op, Dialect* tf_dialect) { return true; } +// Infer the shape IfOp outputs based on the shapes of the then and else +// function result types. +bool InferShapeForIf(IfOp op) { + bool changed = false; + for (auto it : + llvm::zip(op.getResults(), op.then_func().getType().getResults(), + op.else_func().getType().getResults())) { + // If then and else types do not match, skip refinement for that result. + if (std::get<1>(it) != std::get<2>(it)) continue; + changed = RefineResultType(op, std::get<0>(it), std::get<1>(it)) || changed; + } + return changed; +} + +// Infer the shape IfRegion outputs based on the shapes of the then and else +// yields. +bool InferShapeForIfRegion(IfRegionOp op) { + bool changed = false; + + Operation* then_yield = op.then_branch().front().getTerminator(); + Operation* else_yield = op.else_branch().front().getTerminator(); + for (auto result : zip(op.getResults(), then_yield->getOperandTypes(), + else_yield->getOperandTypes())) { + // If then and else types do not match, skip refinement for that result. + if (std::get<1>(result) != std::get<2>(result)) continue; + changed = RefineResultType(op, std::get<0>(result), std::get<1>(result)) || + changed; + } + return changed; +} + bool RefineWithInferTypeOpInterface(InferTypeOpInterface infer_ti, Dialect* tf_dialect) { Operation* op = infer_ti.getOperation(); @@ -768,17 +783,23 @@ bool ShapeInference::InferShapeForSingleOperation(Operation* op) { op)) return InferShapeForCall(op); - // Handle IfRegion operations by infering return shape from the then and else - // branches. - if (auto if_region = dyn_cast(op)) - return InferShapeForIfRegion(if_region); - // tf.Cast are only inferred if they have at least one user in the TF dialect // or feeding into the function return. This is necessary to avoid inserting // casts which cannot be refined. if (auto cast_op = dyn_cast(op)) return InferShapeForCast(cast_op, tf_dialect_); + // Handle IfOp here by inferring the shape from the else/then function + // results. Since `output_shapes` is a derived attribute, avoid going down the + // TF InferenceContext path as IfOp shape inference is implemented as just + // a lookup of the output_shapes attribute. + if (auto if_op = dyn_cast(op)) return InferShapeForIf(if_op); + + // Handle IfRegion operations by infering return shape from the then and else + // branches. + if (auto if_region = dyn_cast(op)) + return InferShapeForIfRegion(if_region); + StringRef op_name = op->getName().getStringRef(); // Drop the `tf.` prefix to query TF registry. auto node_name = diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc b/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc index 8d6e1c2ce30..2dc45ee9816 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc @@ -197,24 +197,16 @@ LogicalResult HandleWhileOp( if (!signature_change) return success(); // Create the new while op. auto new_while_operands = llvm::to_vector<8>(while_op.getOperands()); - auto new_output_shapes = - llvm::to_vector<8>(while_op.output_shapes().getValue()); OpBuilder builder(while_op); assert(while_op.getNumOperands() == while_op.getNumResults()); for (int64_t i = 0; i < while_op.getNumResults(); ++i) { auto it = data_var_to_size_var.find(while_op.getOperand(i)); if (it == data_var_to_size_var.end()) continue; new_while_operands.push_back(it->getSecond()); - if (!new_output_shapes.empty()) { - // Size is a scalar shape. - new_output_shapes.push_back( - mlir::TF::ShapeAttr::get(builder.getContext(), ArrayRef())); - } } auto new_while = builder.create(while_op.getLoc(), body.getType().getInputs(), new_while_operands, while_op.getAttrs()); - new_while.setAttr("output_shapes", builder.getArrayAttr(new_output_shapes)); for (int64_t i = 0; i < while_op.getNumResults(); ++i) { if (!getElementTypeOrSelf(while_op.getOperand(i).getType()) .isa()) { diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc index cb30bc35a7a..2c3422e3e00 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc @@ -595,8 +595,6 @@ LogicalResult HandleWhileOp(TF::WhileOp while_op, ModuleOp module, auto new_while = builder.create(while_op.getLoc(), body.getType().getInputs(), operands, while_op.getAttrs()); - // Clear the output shapes as it is not needed for XLA lowering. - new_while.setAttr("output_shapes", builder.getArrayAttr({})); for (int64_t i = 0; i < while_op.getNumOperands(); ++i) { if (ta_arg_buffer_type(i)) { while_op.getResult(i).replaceAllUsesWith(while_op.getOperand(i)); @@ -663,8 +661,6 @@ LogicalResult HandleIfOp(TF::IfOp if_op, ModuleOp module, auto new_if = builder.create(if_op.getLoc(), then_branch.getType().getResults(), operands, if_op.getAttrs()); - // Clear the output shapes as it is not needed for XLA lowering. - new_if.setAttr("output_shapes", builder.getArrayAttr({})); auto ret_forwards_input = [](FuncOp f, int64_t ret_ind) -> int64_t { auto retval = f.front().getTerminator()->getOperand(ret_ind); auto arg = retval.dyn_cast(); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc index 5cbc42a862c..cd055a8dc4a 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc @@ -190,22 +190,14 @@ LogicalResult HandleWhileOp( } // Create the new while op. auto new_while_operands = llvm::to_vector<8>(while_op.getOperands()); - auto new_output_shapes = - llvm::to_vector<8>(while_op.output_shapes().getValue()); for (int64_t i = 0; i < while_op.getNumResults(); ++i) { auto it = buffer_to_size->find(while_op.getOperand(i)); if (it == buffer_to_size->end()) continue; new_while_operands.push_back(it->getSecond().size); - if (!new_output_shapes.empty()) { - // Size is a scalar shape. - new_output_shapes.push_back( - mlir::TF::ShapeAttr::get(builder.getContext(), ArrayRef())); - } } auto new_while = builder.create(while_op.getLoc(), body.getType().getInputs(), new_while_operands, while_op.getAttrs()); - new_while.setAttr("output_shapes", builder.getArrayAttr(new_output_shapes)); for (const auto& entry : output_buffer_to_size) { (*buffer_to_size)[new_while.getResult(std::get<0>(entry))] = { new_while.getResult(std::get<1>(entry)), std::get<2>(entry)}; diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc index 12ce8c57f73..2b2a33b8bc2 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc @@ -365,16 +365,6 @@ TF::WhileOp AddStateVarsToWhileOp(TF::WhileOp while_op, FuncOp body, while_op.getLoc(), append_types(llvm::to_vector<4>(while_op.getResultTypes())), new_while_operands, while_op.getAttrs()); - if (new_while_op.output_shapes().size() != 0) { - auto new_output_shapes = llvm::to_vector<4>(new_while_op.output_shapes()); - // VarHandleOp is a scalar shape resource. - for (int64_t i = 0; i < state_vars.size(); ++i) { - new_output_shapes.push_back( - mlir::TF::ShapeAttr::get(builder.getContext(), ArrayRef())); - } - new_while_op.setAttr("output_shapes", - builder.getArrayAttr(new_output_shapes)); - } while_op.replaceAllUsesWith( new_while_op.getResults().take_front(while_op.getNumResults())); while_op.erase(); From e38db6885f36895f44094cc0e915a941fb9e9acd Mon Sep 17 00:00:00 2001 From: Vignesh Kothapalli Date: Tue, 21 Jul 2020 21:01:48 +0530 Subject: [PATCH 0930/2522] added tests for type mismatches in UniqueDataset --- .../experimental/kernel_tests/unique_test.py | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/data/experimental/kernel_tests/unique_test.py b/tensorflow/python/data/experimental/kernel_tests/unique_test.py index 9a51c4224ff..04a33ea009d 100644 --- a/tensorflow/python/data/experimental/kernel_tests/unique_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/unique_test.py @@ -13,6 +13,7 @@ # limitations under the License. # ============================================================================== """Tests for `tf.data.experimental.unique()`.""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -24,6 +25,7 @@ from tensorflow.python.data.kernel_tests import test_base from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import combinations from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.platform import test from tensorflow.python.util import compat @@ -49,7 +51,8 @@ class UniqueTest(test_base.DatasetTestBase, parameterized.TestCase): for test_case, expected in test_cases: current_test_case = test_case self.assertDatasetProduces(dataset, [ - compat.as_bytes(element) if dtype == dtypes.string else element + compat.as_bytes( + element) if dtype == dtypes.string else element for element in expected ]) @@ -76,6 +79,36 @@ class UniqueTest(test_base.DatasetTestBase, parameterized.TestCase): (["foo", "bar", "baz", "baz", "bar", "foo"], ["foo", "bar", "baz"]), ]) + @combinations.generate(test_base.graph_only_combinations()) + def testTypeMismatch(self): + + # raises InternalError when dtypes don't match. + # NOTE: Generating the following expected outputs can be considered/taken up as an + # enhancement in the experimental API. + with self.assertRaises(errors.InternalError): + self._testSimpleHelper(dtypes.string, [ + (["hello", 1, 2, 1], ["hello"]), + (["hello", "world", 1], ["hello", "world"]), + (["hello", "hello", "world", 1, 2], ["hello", "world"]), + (["hello", "world", 1, 1, 2], ["hello", "world"]), + ([1, 2, "hello"], ["hello"]), + ([1, 1, 2, 3, 3, "hello"], ["hello"]), + ]) + + self._testSimpleHelper(dtypes.int32, [ + ([1, "hello", "world"], [1]), + ([1, 2, 1, "hello", "hello", "world"], [1, 2]), + (["hello", 1, 2], [1, 2]), + (["hello", 1, 1, 2, 3, 3], [1, 2, 3]), + ]) + + self._testSimpleHelper(dtypes.int64, [ + ([2, 3, "hello", "world"], [2, 3]), + ([2, 3, 3, "hello", "hello", "world"], [2, 3]), + (["hello", 2, 2], [2]), + (["hello", "hello", 1, 1, 2, 3], [1, 2, 3]), + ]) + if __name__ == "__main__": test.main() From 28ca3bf61ba54ef46c1906ac97d671af236c62a3 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Tue, 21 Jul 2020 15:33:08 +0000 Subject: [PATCH 0931/2522] erase and minor --- .../base_api/api_def_TensorMapReplace.pbtxt | 10 -------- tensorflow/core/ops/map_ops.cc | 2 +- .../python/kernel_tests/map_ops_test.py | 25 ++++++++++++++++--- tensorflow/python/ops/map_ops.py | 14 +++++++---- 4 files changed, 31 insertions(+), 20 deletions(-) delete mode 100644 tensorflow/core/api_def/base_api/api_def_TensorMapReplace.pbtxt diff --git a/tensorflow/core/api_def/base_api/api_def_TensorMapReplace.pbtxt b/tensorflow/core/api_def/base_api/api_def_TensorMapReplace.pbtxt deleted file mode 100644 index 80c49cbbc25..00000000000 --- a/tensorflow/core/api_def/base_api/api_def_TensorMapReplace.pbtxt +++ /dev/null @@ -1,10 +0,0 @@ -op { - graph_op_name: "TensorMapReplace" - summary: "Returns a map that is the 'input_handle' after replacing the existing key value with the given value." - description: < Date: Tue, 21 Jul 2020 08:29:39 -0700 Subject: [PATCH 0932/2522] Add the flatbuffers python library to the Kokoro build VMs PiperOrigin-RevId: 322364519 Change-Id: Ifab2f499b7b3513ef96f500fc782b4bdb6a745ec --- tensorflow/tools/ci_build/release/common.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/tools/ci_build/release/common.sh b/tensorflow/tools/ci_build/release/common.sh index b533564e7a1..c3b5bd9a867 100644 --- a/tensorflow/tools/ci_build/release/common.sh +++ b/tensorflow/tools/ci_build/release/common.sh @@ -143,6 +143,7 @@ function install_pip_deps { ${SUDO_CMD} ${PIP_CMD} install scipy ${SUDO_CMD} ${PIP_CMD} install scikit-learn ${SUDO_CMD} ${PIP_CMD} install --upgrade tb-nightly + ${PIP_CMD} install --user --upgrade flatbuffers ${PIP_CMD} install --user --upgrade attrs ${PIP_CMD} install --user --upgrade tf-estimator-nightly ${PIP_CMD} install --user --upgrade "future>=0.17.1" @@ -166,6 +167,7 @@ function install_ubuntu_16_pip_deps { # LINT.IfChange(ubuntu_16_pip_installations) "${PIP_CMD}" install astunparse==1.6.3 --user "${PIP_CMD}" install --user --upgrade attrs + "${PIP_CMD}" install --user --upgrade flatbuffers "${PIP_CMD}" install keras_preprocessing==1.1.0 --no-deps --user "${PIP_CMD}" install numpy==1.16.0 --user "${PIP_CMD}" install --user --upgrade "future>=0.17.1" @@ -220,6 +222,7 @@ function install_macos_pip_deps { ${SUDO_CMD} ${PIP_CMD} install h5py==2.10.0 ${SUDO_CMD} ${PIP_CMD} install --upgrade grpcio ${SUDO_CMD} ${PIP_CMD} install --upgrade tb-nightly + ${PIP_CMD} install --user --upgrade flatbuffers ${PIP_CMD} install --user --upgrade attrs # b/156523241 ${PIP_CMD} install --force-reinstall --user --upgrade tf-estimator-nightly From bcbe3dc2bb8d88d35c99d9ad02fa516e1e6af962 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 17 Jul 2020 22:08:18 +0000 Subject: [PATCH 0933/2522] Improve error message in shape check This PR tries to address the issue raised in 41504 where the error message of: ``` InvalidArgumentError: Shape [2,2,2,2,2,2,2,2,2,2,2,2] would have more than 2**63 - 1 elements [Op:BroadcastTo] ``` is not intuitive. The issue is that the construction of error message in tensor_shape.cc did not copy the complete shape dims. This PR fixes the issue. This PR fixes 41504. Signed-off-by: Yong Tang --- tensorflow/core/framework/tensor_shape.cc | 2 +- .../python/kernel_tests/broadcast_to_ops_test.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/framework/tensor_shape.cc b/tensorflow/core/framework/tensor_shape.cc index 1e11c4ab0ce..565014d14b1 100644 --- a/tensorflow/core/framework/tensor_shape.cc +++ b/tensorflow/core/framework/tensor_shape.cc @@ -593,7 +593,7 @@ Status MakeShapeHelper(const T* dims, int64 n, Shape* out) { if (TF_PREDICT_FALSE(new_num_elements < 0)) { TensorShapeProto proto; for (int64 j = 0; j < n; ++j) { - proto.add_dim()->set_size(dim); + proto.add_dim()->set_size(internal::SubtleMustCopy(dims[j])); } return errors::InvalidArgument( "Shape ", TensorShape::DebugString(proto), diff --git a/tensorflow/python/kernel_tests/broadcast_to_ops_test.py b/tensorflow/python/kernel_tests/broadcast_to_ops_test.py index 9915b12c642..89c955e065e 100644 --- a/tensorflow/python/kernel_tests/broadcast_to_ops_test.py +++ b/tensorflow/python/kernel_tests/broadcast_to_ops_test.py @@ -192,5 +192,15 @@ class BroadcastToTest(test_util.TensorFlowTestCase): out, out.get_shape()) self.assertLess(err, 1e-4) + def testBroadcastToInvalidShape(self): + with self.assertRaisesRegex( + (ValueError, errors.InvalidArgumentError), + "110,53,104,147,157,123,5,24,188,40,5,2"): + output_shape = [110, 53, 104, 147, 157, 123, 5, 24, 188, 40, 5, 2] + x = np.array([1, 2, 3], dtype=np.int32) + v = array_ops.broadcast_to(constant_op.constant(x), output_shape) + self.evaluate(v) + + if __name__ == "__main__": test_lib.main() From 2fb8c36438505e155b782baaf848aa0e20f45951 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 21 Jul 2020 08:46:55 -0700 Subject: [PATCH 0934/2522] Add visibility for bazel_pip prefixed packages. This package prefix is used in open source Kokoro pip testing. This fixes errors like this one: target '//tensorflow/compiler/tests:xla_test' is not visible from target '//bazel_pip/tensorflow/compiler/tests:reshape_op_test_gpu' PiperOrigin-RevId: 322367493 Change-Id: I632d05c0e769da782c11dca2eebe5732f8889ba8 --- tensorflow/BUILD | 8 +++----- tensorflow/compiler/tests/BUILD | 5 ----- tensorflow/python/BUILD | 8 -------- 3 files changed, 3 insertions(+), 18 deletions(-) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 8a0918b416f..c4963a8f106 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -532,16 +532,14 @@ selects.config_setting_group( package_group( name = "internal", packages = [ - # To pass open source testing in the pip Kokoros. - "//bazel_pip/tensorflow/...", "//learning/brain/swift/x10/...", "//perftools/accelerators/xprof/api/...", - "//third_party/py/autograph/...", - "//third_party/swift/tensorflow/x10/...", - "//third_party/swift/tensorflow_apis/...", "//tensorflow/...", "//tensorflow_estimator/python/estimator/...", "//tensorflow_models/official/...", + "//third_party/py/autograph/...", + "//third_party/swift/tensorflow/x10/...", + "//third_party/swift/tensorflow_apis/...", ], ) diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 1dd61c235a8..16eb3afd6c5 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -21,10 +21,6 @@ package_group( includes = [ "//tensorflow/compiler/tf2xla:internal", ], - packages = [ - # To pass open source testing in the pip Kokoros. - "//bazel_pip/tensorflow/compiler/tests/...", - ], ) package_group( @@ -34,7 +30,6 @@ package_group( ], packages = [ # To pass open source testing in the pip Kokoros. - "//bazel_pip/tensorflow/compiler/tests/...", "//platforms/xla/tests/neural_nets", ], ) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 09d22aa203d..365ef58f796 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -2757,8 +2757,6 @@ tf_py_test( tf_gen_op_wrapper_private_py( name = "array_ops_gen", visibility = [ - # To pass open source testing in the pip Kokoros. - "//bazel_pip/tensorflow/compiler/tests:__pkg__", "//learning/brain/python/ops:__pkg__", "//tensorflow/compiler/tests:__pkg__", "//tensorflow/python/kernel_tests:__pkg__", @@ -2772,8 +2770,6 @@ tf_gen_op_wrapper_private_py( tf_gen_op_wrapper_private_py( name = "bitwise_ops_gen", visibility = [ - # To pass open source testing in the pip Kokoros. - "//bazel_pip/tensorflow/compiler/tests:__pkg__", "//learning/brain/python/ops:__pkg__", "//tensorflow/compiler/tests:__pkg__", "//tensorflow/contrib/quantization:__pkg__", @@ -2969,8 +2965,6 @@ tf_gen_op_wrapper_private_py( tf_gen_op_wrapper_private_py( name = "math_ops_gen", visibility = [ - # To pass open source testing in the pip Kokoros. - "//bazel_pip/tensorflow/compiler/tests:__pkg__", "//learning/brain/google/python/ops:__pkg__", "//learning/brain/python/ops:__pkg__", "//tensorflow/compiler/tests:__pkg__", @@ -2981,8 +2975,6 @@ tf_gen_op_wrapper_private_py( tf_gen_op_wrapper_private_py( name = "nn_ops_gen", visibility = [ - # To pass open source testing in the pip Kokoros. - "//bazel_pip/tensorflow/compiler/tests:__pkg__", "//learning/brain/python/ops:__pkg__", "//tensorflow/compiler/tests:__pkg__", "//tensorflow/python/kernel_tests:__pkg__", From c104766a18c43eaecd87b7c261db59c8e69c3e38 Mon Sep 17 00:00:00 2001 From: Taehee Jeong Date: Tue, 21 Jul 2020 08:59:38 -0700 Subject: [PATCH 0935/2522] [Core ML delegate] Add missing constraint of concat of PiperOrigin-RevId: 322369907 Change-Id: I03709deedd536b8f1add44df4b2650e256048788 --- tensorflow/lite/g3doc/performance/coreml_delegate.md | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/lite/g3doc/performance/coreml_delegate.md b/tensorflow/lite/g3doc/performance/coreml_delegate.md index b9ae477fe14..2803b080a13 100644 --- a/tensorflow/lite/g3doc/performance/coreml_delegate.md +++ b/tensorflow/lite/g3doc/performance/coreml_delegate.md @@ -184,6 +184,7 @@ Following ops are supported by the Core ML delegate. 1]`, `[B, 1, H, W]`, `[B, 1, 1, 1]`. * AveragePool2D * Concat + * Concatenation should be done along the channel axis. * Conv2D * Weights and bias should be constant. * DepthwiseConv2D From 526ead8f3df0b0df464fa425a99cc306b76bbb41 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Tue, 21 Jul 2020 16:09:26 +0000 Subject: [PATCH 0936/2522] Update tensorflow/core/platform/file_system.cc --- tensorflow/core/platform/file_system.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/platform/file_system.cc b/tensorflow/core/platform/file_system.cc index c96cf99291e..bfe4039ffb4 100644 --- a/tensorflow/core/platform/file_system.cc +++ b/tensorflow/core/platform/file_system.cc @@ -441,7 +441,7 @@ std::string FileSystem::DecodeTransaction(const TransactionToken* token){ // TODO(sami): Switch using StrCat when void* is supported if(token){ std::stringstream oss; - oss<<"Token= "<token<<", Owner="<owner; + oss << "Token= " << token->token << ", Owner=" << token->owner; return oss.str(); } return "No Transaction"; From 49ffb9a314481b61a197adc99f3dc2f992e517b9 Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Tue, 21 Jul 2020 09:14:47 -0700 Subject: [PATCH 0937/2522] Implement attribute setters for graph mode in unified API. PiperOrigin-RevId: 322373208 Change-Id: I0c083c0554ac49184d1c1fbcd32efa524c175dd2 --- .../eager/c_api_unified_experimental_graph.cc | 99 ++++++++++++++----- 1 file changed, 75 insertions(+), 24 deletions(-) diff --git a/tensorflow/c/eager/c_api_unified_experimental_graph.cc b/tensorflow/c/eager/c_api_unified_experimental_graph.cc index 6165a7d14a3..6c903560e52 100644 --- a/tensorflow/c/eager/c_api_unified_experimental_graph.cc +++ b/tensorflow/c/eager/c_api_unified_experimental_graph.cc @@ -33,6 +33,7 @@ limitations under the License. using tensorflow::dyn_cast; using tensorflow::string; +using tensorflow::gtl::ArraySlice; namespace tensorflow { namespace tracing { @@ -138,20 +139,23 @@ class GraphOperation : public TracingOperation { Status SetAttrString(const char* attr_name, const char* data, size_t length) override { - return tensorflow::errors::Unimplemented( - "SetAttrString has not been implemented yet."); + tensorflow::StringPiece s(data, length); + op_->node_builder.Attr(attr_name, s); + return Status::OK(); } Status SetAttrInt(const char* attr_name, int64_t value) override { - return tensorflow::errors::Unimplemented( - "SetAttrInt has not been implemented yet."); + static_assert(sizeof(int64_t) == sizeof(tensorflow::int64), + "64-bit int types should match in size"); + op_->node_builder.Attr(attr_name, static_cast(value)); + return Status::OK(); } Status SetAttrFloat(const char* attr_name, float value) override { - return tensorflow::errors::Unimplemented( - "SetAttrFloat has not been implemented yet."); + op_->node_builder.Attr(attr_name, value); + return Status::OK(); } Status SetAttrBool(const char* attr_name, bool value) override { - return tensorflow::errors::Unimplemented( - "SetAttrBool has not been implemented yet."); + op_->node_builder.Attr(attr_name, value); + return Status::OK(); } Status SetAttrType(const char* const attr_name, DataType value) override { if (!op_) { @@ -164,8 +168,15 @@ class GraphOperation : public TracingOperation { } Status SetAttrShape(const char* attr_name, const int64_t* dims, const int num_dims) override { - return tensorflow::errors::Unimplemented( - "SetAttrShape has not been implemented yet."); + PartialTensorShape shape; + if (num_dims >= 0) { + static_assert(sizeof(int64_t) == sizeof(tensorflow::int64), + "64-bit int types should match in size"); + shape = PartialTensorShape(ArraySlice( + reinterpret_cast(dims), num_dims)); + } + op_->node_builder.Attr(attr_name, shape); + return Status::OK(); } Status SetAttrFunction(const char* attr_name, const AbstractOperation* value) override { @@ -174,8 +185,10 @@ class GraphOperation : public TracingOperation { } Status SetAttrFunctionName(const char* attr_name, const char* value, size_t length) override { - return tensorflow::errors::Unimplemented( - "SetAttrFunctionName has not been implemented yet."); + tensorflow::NameAttrList func_name; + func_name.set_name(string(value, value + length)); + op_->node_builder.Attr(attr_name, func_name); + return Status::OK(); } Status SetAttrTensor(const char* attr_name, AbstractTensorInterface* tensor) override { @@ -184,33 +197,71 @@ class GraphOperation : public TracingOperation { } Status SetAttrStringList(const char* attr_name, const void* const* values, const size_t* lengths, int num_values) override { - return tensorflow::errors::Unimplemented( - "SetAttrStringList has not been implemented yet."); + if (strcmp(attr_name, tensorflow::kColocationAttrName) == 0) { + op_->colocation_constraints.clear(); + for (int i = 0; i < num_values; ++i) { + op_->colocation_constraints.emplace(static_cast(values[i]), + lengths[i]); + } + } else { + std::vector v; + v.reserve(num_values); + for (int i = 0; i < num_values; ++i) { + v.emplace_back(static_cast(values[i]), lengths[i]); + } + op_->node_builder.Attr(attr_name, v); + } + return Status::OK(); } Status SetAttrFloatList(const char* attr_name, const float* values, int num_values) override { - return tensorflow::errors::Unimplemented( - "SetAttrFloatList has not been implemented yet."); + op_->node_builder.Attr(attr_name, + ArraySlice(values, num_values)); + return Status::OK(); } Status SetAttrIntList(const char* attr_name, const int64_t* values, int num_values) override { - return tensorflow::errors::Unimplemented( - "SetAttrIntList has not been implemented yet."); + static_assert(sizeof(int64_t) == sizeof(tensorflow::int64), + "64-bit int types should match in size"); + op_->node_builder.Attr( + attr_name, + ArraySlice( + reinterpret_cast(values), num_values)); + return Status::OK(); } Status SetAttrTypeList(const char* attr_name, const DataType* values, int num_values) override { - return tensorflow::errors::Unimplemented( - "SetAttrTypeList has not been implemented yet."); + op_->node_builder.Attr(attr_name, + ArraySlice(values, num_values)); + return Status::OK(); } Status SetAttrBoolList(const char* attr_name, const unsigned char* values, int num_values) override { - return tensorflow::errors::Unimplemented( - "SetAttrBoolList has not been implemented yet."); + std::unique_ptr b(new bool[num_values]); + for (int i = 0; i < num_values; ++i) { + b[i] = values[i]; + } + op_->node_builder.Attr(attr_name, + ArraySlice(b.get(), num_values)); + + return Status::OK(); } Status SetAttrShapeList(const char* attr_name, const int64_t** dims, const int* num_dims, int num_values) override { - return tensorflow::errors::Unimplemented( - "SetAttrShapeList has not been implemented yet."); + std::vector shapes; + shapes.reserve(num_values); + for (int i = 0; i < num_values; ++i) { + if (num_dims[i] < 0) { + shapes.emplace_back(); + } else { + static_assert(sizeof(int64_t) == sizeof(tensorflow::int64), + "64-bit int types should match in size"); + shapes.emplace_back(ArraySlice( + reinterpret_cast(dims[i]), num_dims[i])); + } + } + op_->node_builder.Attr(attr_name, shapes); + return Status::OK(); } Status SetAttrFunctionList( const char* attr_name, From 7361a98958190fbcf6c9b61d258f0796439a856e Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 21 Jul 2020 09:21:50 -0700 Subject: [PATCH 0938/2522] Remove all BUILD aliases to tensorflow/core/framework:bounds_check Use the actual rule directly everywhere. PiperOrigin-RevId: 322374754 Change-Id: I9be3ac9f16e372a74389a567acfbe175bf52b9ec --- tensorflow/core/BUILD | 14 ++--- tensorflow/core/kernels/BUILD | 99 ++++++++++++++++------------------- 2 files changed, 48 insertions(+), 65 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 9c6fd9f3632..83c3cf3b394 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -302,12 +302,6 @@ cc_library( ], ) -alias( - name = "framework_bounds_check", - actual = "//tensorflow/core/framework:bounds_check", - visibility = ["//tensorflow/core/kernels:friends"], -) - alias( name = "human_readable_json", actual = "//tensorflow/core/platform:human_readable_json", @@ -769,7 +763,7 @@ cc_library( ":lib", ":lib_internal", ":stream_executor", - "//tensorflow/core/kernels:bounds_check_lib", + "//tensorflow/core/framework:bounds_check", ], alwayslink = 1, ) @@ -2161,7 +2155,7 @@ cc_header_only_library( ":lib", ":lib_internal", ":version_lib", - "//tensorflow/core/kernels:bounds_check", + "//tensorflow/core/framework:bounds_check", "//tensorflow/core/platform/default/build_config:platformlib", ], ) @@ -2226,7 +2220,7 @@ tf_cuda_library( "//tensorflow/core/framework:shape_inference", "//tensorflow/core/framework:tensor", "//tensorflow/core/framework:tensor_shape", - "//tensorflow/core/kernels:bounds_check", + "//tensorflow/core/framework:bounds_check", "//tensorflow/core/platform/default/build_config:platformlib", "//tensorflow/core/profiler/lib:annotated_traceme", "//tensorflow/core/profiler/lib:traceme", @@ -2328,7 +2322,7 @@ tf_cuda_library( ":function_ops_op_lib", ":functional_grad", ":functional_ops_op_lib", - "//tensorflow/core/kernels:bounds_check", + "//tensorflow/core/framework:bounds_check", "//tensorflow/core/kernels:required", ]), alwayslink = 1, diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index c395f7d3e73..9501611f82e 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -164,7 +164,6 @@ tf_kernel_library( "strided_slice_op_gpu_number_types.cu.cc", ], deps = [ - ":bounds_check", ":dense_update_functor", ":inplace_ops", ":ops_util", @@ -173,6 +172,7 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -283,9 +283,9 @@ tf_kernel_library( "gpu_device_array_gpu.h", ], deps = [ - ":bounds_check", "//tensorflow/core:framework", "//tensorflow/core:framework_internal", + "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], alwayslink = 0, @@ -347,11 +347,11 @@ tf_kernel_library( name = "extract_image_patches_op", prefix = "extract_image_patches_op", deps = [ - ":bounds_check", ":eigen_helpers", ":ops_util", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -360,11 +360,11 @@ tf_kernel_library( name = "extract_volume_patches_op", prefix = "extract_volume_patches_op", deps = [ - ":bounds_check", ":eigen_helpers", ":ops_util", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -690,9 +690,9 @@ cc_library( hdrs = ["save_restore_tensor.h"], copts = if_not_windows(["-Wno-sign-compare"]), deps = [ - ":bounds_check", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core/framework:bounds_check", "//tensorflow/core/util/tensor_bundle", ], ) @@ -748,19 +748,8 @@ cc_library( ], ) -alias( - name = "bounds_check", - actual = "//tensorflow/core:framework_bounds_check", - visibility = [":friends"], -) - # Private support libraries --------------------------------------------------- -cc_header_only_library( - name = "bounds_check_lib", - deps = [":bounds_check"], -) - cc_library( name = "gpu_device_array", hdrs = [ @@ -947,9 +936,9 @@ cc_library( hdrs = ["image_resizer_state.h"], visibility = ["//visibility:private"], deps = [ - ":bounds_check", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -982,7 +971,7 @@ tf_cc_test( # OpKernel libraries ---------------------------------------------------------- ARRAY_DEPS = [ - ":bounds_check", + "//tensorflow/core/framework:bounds_check", ":concat_lib", ":fill_functor", ":gather_functor", @@ -1596,12 +1585,12 @@ tf_kernel_library( srcs = ["cudnn_rnn_ops.cc"], visibility = ["//visibility:public"], deps = [ - ":bounds_check_lib", ":gpu_utils", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:stream_executor", + "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -2052,8 +2041,8 @@ tf_kernel_library( prefix = "gather_functor", visibility = [":friends"], deps = [ - ":bounds_check", "//tensorflow/core:framework", + "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -2282,10 +2271,10 @@ tf_kernel_library( prefix = "scatter_functor", visibility = [":friends"], deps = [ - ":bounds_check", ":dense_update_functor", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -2450,7 +2439,7 @@ tf_kernel_library( name = "ctc_ops", prefix = "ctc", deps = [ - ":bounds_check", + "//tensorflow/core/framework:bounds_check", ":ops_util", "//tensorflow/core:framework", "//tensorflow/core:lib", @@ -2519,7 +2508,7 @@ cc_header_only_library( ) DATA_FLOW_DEPS = [ - ":bounds_check", + "//tensorflow/core/framework:bounds_check", ":concat_lib", ":conditional_accumulator", ":conditional_accumulator_base", @@ -2663,7 +2652,7 @@ tf_kernel_library( ) DYNAMIC_DEPS = [ - ":bounds_check", + "//tensorflow/core/framework:bounds_check", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", @@ -2716,7 +2705,7 @@ tf_cc_test( ) LOOKUP_DEPS = [ - ":bounds_check", + "//tensorflow/core/framework:bounds_check", ":initializable_lookup_table", ":lookup_util", "@com_google_absl//absl/container:flat_hash_map", @@ -2896,7 +2885,6 @@ tf_kernel_library( srcs = ["resource_variable_ops.cc"], hdrs = ["resource_variable_ops.h"], deps = [ - ":bounds_check", ":dense_update_functor", ":gather_functor", ":gather_nd_op", @@ -2907,6 +2895,7 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core/framework:bounds_check", "@com_google_absl//absl/strings", ], ) @@ -3025,7 +3014,7 @@ cc_library( ) IMAGE_DEPS = [ - ":bounds_check", + "//tensorflow/core/framework:bounds_check", ":eigen_helpers", ":image_resizer_state", "//third_party/eigen3", @@ -3177,11 +3166,11 @@ tf_kernel_library( name = "encode_wav_op", prefix = "encode_wav_op", deps = [ - ":bounds_check", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", + "//tensorflow/core/framework:bounds_check", ], ) @@ -3492,7 +3481,7 @@ tf_kernel_library( ) SAVE_RESTORE_DEPS = [ - ":bounds_check_lib", + "//tensorflow/core/framework:bounds_check", ":save_restore_tensor", "//tensorflow/core:framework", "//tensorflow/core:lib", @@ -3935,9 +3924,9 @@ tf_kernel_library( "roll_op.h", ], deps = [ - ":bounds_check", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -3962,7 +3951,7 @@ tf_cc_test( ) MATH_DEPS = [ - ":bounds_check", + "//tensorflow/core/framework:bounds_check", ":fill_functor", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -4328,7 +4317,6 @@ tf_cuda_cc_test( size = "small", srcs = ["cwise_ops_test.cc"], deps = [ - ":bounds_check", ":cwise_op", ":nn", ":ops_testutil", @@ -4340,6 +4328,7 @@ tf_cuda_cc_test( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", + "//tensorflow/core/framework:bounds_check", ], ) @@ -4645,7 +4634,7 @@ tf_kernel_library( deps = [ ":conv_grad_shape_utils", ":conv_ops_3d_headers", - ":bounds_check", + "//tensorflow/core/framework:bounds_check", ":conv_2d", ":conv_3d", ":eigen_contraction_kernel", @@ -4712,7 +4701,7 @@ tf_kernel_library( "depthwise_conv_op_gpu_half.cu.cc", ], deps = [ - ":bounds_check", + "//tensorflow/core/framework:bounds_check", ":conv_ops", ":ops_util", "//tensorflow/core:core_cpu", @@ -4733,7 +4722,7 @@ tf_kernel_library( ], prefix = "depthwise_conv_grad_op", deps = [ - ":bounds_check", + "//tensorflow/core/framework:bounds_check", ":conv_ops", ":ops_util", "//tensorflow/core:core_cpu", @@ -4777,7 +4766,7 @@ cc_library( ) NN_DEPS = [ - ":bounds_check", + "//tensorflow/core/framework:bounds_check", ":conv_2d", ":eigen_contraction_kernel", ":ops_util", @@ -5034,7 +5023,6 @@ tf_kernel_library( "pooling_ops_3d_gpu.cu.cc", ], deps = [ - ":bounds_check", ":conv_2d", ":conv_3d", ":conv_ops", @@ -5045,6 +5033,7 @@ tf_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:stream_executor", + "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -5105,9 +5094,9 @@ tf_kernel_library( ], visibility = [":friends"], deps = [ - ":bounds_check", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -5289,7 +5278,6 @@ tf_kernel_library( name = "stateful_random_ops", prefix = "stateful_random_ops", deps = [ - ":bounds_check", ":dense_update_functor", ":gather_functor", ":mutex_ops", @@ -5305,6 +5293,7 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core/framework:bounds_check", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:variant", ], @@ -5314,11 +5303,11 @@ tf_kernel_library( name = "stateless_random_ops", prefix = "stateless_random_ops", deps = [ - ":bounds_check", ":random_op", ":random_poisson_op", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core/framework:bounds_check", ], ) @@ -5505,7 +5494,7 @@ tf_kernel_library( name = "sparse_tensor_dense_matmul_op", prefix = "sparse_tensor_dense_matmul_op", deps = SPARSE_DEPS + [ - ":bounds_check", + "//tensorflow/core/framework:bounds_check", ":fill_functor", "//third_party/eigen3", ], @@ -5523,7 +5512,7 @@ tf_kernel_library( name = "sparse_xent_op", prefix = "sparse_xent_op", deps = SPARSE_DEPS + [ - ":bounds_check", + "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ] + if_cuda_or_rocm([ ":reduction_ops", @@ -5678,7 +5667,7 @@ cc_library( STATE_DEPS = [ ":assign_op", - ":bounds_check", + "//tensorflow/core/framework:bounds_check", ":fill_functor", ":scatter_functor", "//third_party/eigen3", @@ -5849,7 +5838,7 @@ cc_library( ) STRING_DEPS = [ - ":bounds_check", + "//tensorflow/core/framework:bounds_check", ":string_util", "//third_party/eigen3", "//tensorflow/core:framework", @@ -6048,11 +6037,11 @@ tf_kernel_library( name = "unicode_ops", prefix = "unicode_ops", deps = [ - ":bounds_check", ":string_util", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", "//third_party/icu/data:conversion_data", "@icu//:common", @@ -6069,11 +6058,11 @@ tf_kernel_library( name = "training_ops", prefix = "training_ops", deps = [ - ":bounds_check", ":training_op_helpers", ":variable_ops", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -7754,7 +7743,7 @@ tf_mkl_kernel_library( "reference_gemm.h", ], deps = [ - ":bounds_check", + "//tensorflow/core/framework:bounds_check", ":ops_util", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -8101,7 +8090,7 @@ tf_mkl_kernel_library( "no_op.h", ], deps = [ - ":bounds_check", + "//tensorflow/core/framework:bounds_check", ":fill_functor", ":matmul_op", ":ops_util", @@ -8124,7 +8113,7 @@ tf_mkl_kernel_library( ], prefix = "mkl_conv", deps = [ - ":bounds_check", + "//tensorflow/core/framework:bounds_check", ":conv_ops", ":ops_util", "@com_google_absl//absl/strings", @@ -8196,7 +8185,7 @@ tf_mkl_kernel_library( name = "mkl_tfconv_op", prefix = "mkl_tfconv", deps = [ - ":bounds_check", + "//tensorflow/core/framework:bounds_check", ":ops_util", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -8210,7 +8199,7 @@ tf_mkl_kernel_library( hdrs = ["mkl_tfconv_op.h"], prefix = "mkl_input_conversion", deps = [ - ":bounds_check", + "//tensorflow/core/framework:bounds_check", ":ops_util", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -8228,7 +8217,7 @@ tf_mkl_kernel_library( ], hdrs = ["mkl_pooling_ops_common.h"], deps = [ - ":bounds_check", + "//tensorflow/core/framework:bounds_check", ":ops_util", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -8298,7 +8287,7 @@ tf_mkl_kernel_library( name = "mkl_relu_op", prefix = "mkl_relu", deps = [ - ":bounds_check", + "//tensorflow/core/framework:bounds_check", ":ops_util", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -8312,7 +8301,7 @@ tf_mkl_kernel_library( name = "mkl_softmax_op", prefix = "mkl_softmax", deps = [ - ":bounds_check", + "//tensorflow/core/framework:bounds_check", ":ops_util", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -8635,10 +8624,10 @@ cc_library( "meta_support.h", ], deps = [ - ":bounds_check", ":quantization_utils", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", "@gemmlowp", ], From 099cc6f9452bace63deafdba68e4a38ba9618a4c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Jul 2020 09:36:55 -0700 Subject: [PATCH 0939/2522] Integrate LLVM at llvm/llvm-project@61dd481f1105 Updates LLVM usage to match [61dd481f1105](https://github.com/llvm/llvm-project/commit/61dd481f1105) PiperOrigin-RevId: 322377858 Change-Id: I3fc71902fd3241d0754c7cee49295a2be72811fd --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 5d25b38b159..2502f48e895 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -711,8 +711,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "becaa6803ab532d15506829f0551a5fa49c39d7e" - LLVM_SHA256 = "4dd3797959716010c355ee327b7649b98e1da491e198421d258b5c515d677a40" + LLVM_COMMIT = "61dd481f11051450522bcd2cfcb7222a90d3464e" + LLVM_SHA256 = "3604007894e3dc73e166b6d70cefe5ee06e1e4fc1b1ae33ca75077498200881c" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From fde689563d0e9f78e6dafc0defdf7cc4101f53cb Mon Sep 17 00:00:00 2001 From: Robert David Date: Tue, 21 Jul 2020 09:45:50 -0700 Subject: [PATCH 0940/2522] Cleanup LSTM tests: configuration defines tensor sizes, so don't require it in another parameter of LSTMOpModel constructor. This allows merging the two LSTMIntegerOpModels. PiperOrigin-RevId: 322379806 Change-Id: Iaf2f19c1090b3b39047bc4b781f10ebaac55bbd5 --- tensorflow/lite/kernels/lstm_test.cc | 1158 ++++---------------------- 1 file changed, 155 insertions(+), 1003 deletions(-) diff --git a/tensorflow/lite/kernels/lstm_test.cc b/tensorflow/lite/kernels/lstm_test.cc index 754aaba9319..cad57461f30 100644 --- a/tensorflow/lite/kernels/lstm_test.cc +++ b/tensorflow/lite/kernels/lstm_test.cc @@ -39,7 +39,6 @@ class LSTMOpModel : public SingleOpModel { LSTMOpModel(int n_batch, int n_input, int n_cell, int n_output, bool use_cifg, bool use_peephole, bool use_projection_weights, bool use_projection_bias, float cell_clip, float proj_clip, - const std::vector>& input_shapes, const TensorType weight_type, bool model_has_legacy_20_inputs, bool is_layer_norm, bool asymmetric_quantize_inputs) : n_batch_(n_batch), @@ -47,36 +46,43 @@ class LSTMOpModel : public SingleOpModel { n_cell_(n_cell), n_output_(n_output), weight_type_(weight_type) { - input_ = AddInput(TensorType_FLOAT32); + input_ = AddInput(TensorData{TensorType_FLOAT32, {n_batch, n_input}}); if (use_cifg) { input_to_input_weights_ = AddNullInput(); } else { - input_to_input_weights_ = AddInput(weight_type); + input_to_input_weights_ = + AddInput(TensorData{weight_type, {n_cell, n_input}}); } - input_to_forget_weights_ = AddInput(weight_type); - input_to_cell_weights_ = AddInput(weight_type); - input_to_output_weights_ = AddInput(weight_type); - + input_to_forget_weights_ = + AddInput(TensorData{weight_type, {n_cell, n_input}}); + input_to_cell_weights_ = + AddInput(TensorData{weight_type, {n_cell, n_input}}); + input_to_output_weights_ = + AddInput(TensorData{weight_type, {n_cell, n_input}}); if (use_cifg) { recurrent_to_input_weights_ = AddNullInput(); } else { - recurrent_to_input_weights_ = AddInput(weight_type); + recurrent_to_input_weights_ = + AddInput(TensorData{weight_type, {n_cell, n_output}}); } - recurrent_to_forget_weights_ = AddInput(weight_type); - recurrent_to_cell_weights_ = AddInput(weight_type); - recurrent_to_output_weights_ = AddInput(weight_type); + recurrent_to_forget_weights_ = + AddInput(TensorData{weight_type, {n_cell, n_output}}); + recurrent_to_cell_weights_ = + AddInput(TensorData{weight_type, {n_cell, n_output}}); + recurrent_to_output_weights_ = + AddInput(TensorData{weight_type, {n_cell, n_output}}); if (use_peephole) { if (use_cifg) { cell_to_input_weights_ = AddNullInput(); } else { - cell_to_input_weights_ = AddInput(weight_type); + cell_to_input_weights_ = AddInput(TensorData{weight_type, {n_cell}}); } - cell_to_forget_weights_ = AddInput(weight_type); - cell_to_output_weights_ = AddInput(weight_type); + cell_to_forget_weights_ = AddInput(TensorData{weight_type, {n_cell}}); + cell_to_output_weights_ = AddInput(TensorData{weight_type, {n_cell}}); } else { cell_to_input_weights_ = AddNullInput(); cell_to_forget_weights_ = AddNullInput(); @@ -86,16 +92,17 @@ class LSTMOpModel : public SingleOpModel { if (use_cifg) { input_gate_bias_ = AddNullInput(); } else { - input_gate_bias_ = AddInput(TensorType_FLOAT32); + input_gate_bias_ = AddInput(TensorData{TensorType_FLOAT32, {n_cell}}); } - forget_gate_bias_ = AddInput(TensorType_FLOAT32); - cell_gate_bias_ = AddInput(TensorType_FLOAT32); - output_gate_bias_ = AddInput(TensorType_FLOAT32); + forget_gate_bias_ = AddInput(TensorData{TensorType_FLOAT32, {n_cell}}); + cell_gate_bias_ = AddInput(TensorData{TensorType_FLOAT32, {n_cell}}); + output_gate_bias_ = AddInput(TensorData{TensorType_FLOAT32, {n_cell}}); if (use_projection_weights) { - projection_weights_ = AddInput(weight_type); + projection_weights_ = + AddInput(TensorData{weight_type, {n_output, n_cell}}); if (use_projection_bias) { - projection_bias_ = AddInput(TensorType_FLOAT32); + projection_bias_ = AddInput(TensorData{TensorType_FLOAT32, {n_output}}); } else { projection_bias_ = AddNullInput(); } @@ -106,9 +113,9 @@ class LSTMOpModel : public SingleOpModel { // Adding the 2 state tensors. output_state_ = - AddInput(TensorData{TensorType_FLOAT32, {n_batch_, n_output_}}, true); + AddInput(TensorData{TensorType_FLOAT32, {n_batch, n_output}}, true); cell_state_ = - AddInput(TensorData{TensorType_FLOAT32, {n_batch_, n_cell_}}, true); + AddInput(TensorData{TensorType_FLOAT32, {n_batch, n_cell}}, true); // Layer norm weights. if (!model_has_legacy_20_inputs) { @@ -116,17 +123,21 @@ class LSTMOpModel : public SingleOpModel { input_layer_norm_coefficients_ = AddNullInput(); } else { input_layer_norm_coefficients_ = - is_layer_norm ? AddInput(TensorType_FLOAT32) : AddNullInput(); + is_layer_norm ? AddInput(TensorData{TensorType_FLOAT32, {n_cell}}) + : AddNullInput(); } forget_layer_norm_coefficients_ = - is_layer_norm ? AddInput(TensorType_FLOAT32) : AddNullInput(); + is_layer_norm ? AddInput(TensorData{TensorType_FLOAT32, {n_cell}}) + : AddNullInput(); cell_layer_norm_coefficients_ = - is_layer_norm ? AddInput(TensorType_FLOAT32) : AddNullInput(); + is_layer_norm ? AddInput(TensorData{TensorType_FLOAT32, {n_cell}}) + : AddNullInput(); output_layer_norm_coefficients_ = - is_layer_norm ? AddInput(TensorType_FLOAT32) : AddNullInput(); + is_layer_norm ? AddInput(TensorData{TensorType_FLOAT32, {n_cell}}) + : AddNullInput(); } - output_ = AddOutput(TensorType_FLOAT32); + output_ = AddOutput(TensorData{TensorType_FLOAT32, {n_output}}); SetBuiltinOp( BuiltinOperator_LSTM, BuiltinOptions_LSTMOptions, @@ -135,7 +146,7 @@ class LSTMOpModel : public SingleOpModel { asymmetric_quantize_inputs) .Union()); - BuildInterpreter(input_shapes); + BuildInterpreter({}); // Input sizes are already set up. } void SetInputToInputWeights(const std::vector& f) { @@ -466,31 +477,6 @@ TEST_F(NoCifgNoPeepholeNoProjectionNoClippingLstmTest, LstmBlackBoxTest) { /*use_projection_weights=*/false, /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0, - { - {n_batch, n_input}, // input tensor - - {n_cell, n_input}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {n_cell, n_output}, // recurrent_to_input_weight_tensor - {n_cell, n_output}, // recurrent_to_forget_weight_tensor - {n_cell, n_output}, // recurrent_to_cell_weight_tensor - {n_cell, n_output}, // recurrent_to_output_weight_tensor - - {0}, // cell_to_input_weight tensor - {0}, // cell_to_forget_weight tensor - {0}, // cell_to_output_weight tensor - - {n_cell}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {0, 0}, // projection_weight tensor - {0}, // projection_bias tensor - }, /*weight_type=*/TensorType_FLOAT32, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/false); @@ -514,39 +500,6 @@ TEST_F(NoCifgNoPeepholeNoProjectionNoClippingNoLayerNormLstmTest, /*use_projection_weights=*/false, /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0, - { - {n_batch, n_input}, // input tensor - - {n_cell, n_input}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {n_cell, n_output}, // recurrent_to_input_weight_tensor - {n_cell, n_output}, // recurrent_to_forget_weight_tensor - {n_cell, n_output}, // recurrent_to_cell_weight_tensor - {n_cell, n_output}, // recurrent_to_output_weight_tensor - - {0}, // cell_to_input_weight tensor - {0}, // cell_to_forget_weight tensor - {0}, // cell_to_output_weight tensor - - {n_cell}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {0, 0}, // projection_weight tensor - {0}, // projection_bias tensor - - {n_batch, n_output}, // output_state tensor - {n_batch, n_cell}, // cell_state tensor - - {0}, // input_layer_norm_coefficient tensor - {0}, // forget_layer_norm_coefficient tensor - {0}, // cell_layer_norm_coefficient tensor - {0}, // output_layer_norm_coefficient tensor - }, /*weight_type=*/TensorType_FLOAT32, /*model_has_legacy_20_inputs=*/false, /*is_layer_norm=*/false, @@ -572,31 +525,6 @@ TEST_P(NoCifgNoPeepholeNoProjectionNoClippingLstmTest, /*use_projection_weights=*/false, /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0, - { - {n_batch, n_input}, // input tensor - - {n_cell, n_input}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {n_cell, n_output}, // recurrent_to_input_weight tensor - {n_cell, n_output}, // recurrent_to_forget_weight tensor - {n_cell, n_output}, // recurrent_to_cell_weight tensor - {n_cell, n_output}, // recurrent_to_output_weight tensor - - {0}, // cell_to_input_weight tensor - {0}, // cell_to_forget_weight tensor - {0}, // cell_to_output_weight tensor - - {n_cell}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {0, 0}, // projection_weight tensor - {0}, // projection_bias tensor - }, /*weight_type=*/TensorType_UINT8, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); @@ -624,31 +552,6 @@ TEST_P(NoCifgNoPeepholeNoProjectionNoClippingLstmInt8Test, /*use_projection_weights=*/false, /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0, - { - {n_batch, n_input}, // input tensor - - {n_cell, n_input}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {n_cell, n_output}, // recurrent_to_input_weight tensor - {n_cell, n_output}, // recurrent_to_forget_weight tensor - {n_cell, n_output}, // recurrent_to_cell_weight tensor - {n_cell, n_output}, // recurrent_to_output_weight tensor - - {0}, // cell_to_input_weight tensor - {0}, // cell_to_forget_weight tensor - {0}, // cell_to_output_weight tensor - - {n_cell}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {0, 0}, // projection_weight tensor - {0}, // projection_bias tensor - }, /*weight_type=*/TensorType_INT8, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); @@ -716,31 +619,6 @@ TEST_F(CifgNoPeepholeNoProjectionNoClippingLstmTest, LstmBlackBoxTest) { /*use_projection_weights=*/false, /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0, - { - {n_batch, n_input}, // input tensor - - {0, 0}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {0, 0}, // recurrent_to_input_weight tensor - {n_cell, n_output}, // recurrent_to_forget_weight tensor - {n_cell, n_output}, // recurrent_to_cell_weight tensor - {n_cell, n_output}, // recurrent_to_output_weight tensor - - {0}, // cell_to_input_weight tensor - {n_cell}, // cell_to_forget_weight tensor - {n_cell}, // cell_to_output_weight tensor - - {0}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {0, 0}, // projection_weight tensor - {0}, // projection_bias tensor - }, /*weight_type=*/TensorType_FLOAT32, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/false); @@ -765,31 +643,6 @@ TEST_P(CifgNoPeepholeNoProjectionNoClippingLstmTest, /*use_projection_weights=*/false, /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0, - { - {n_batch, n_input}, // input tensor - - {0, 0}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {0, 0}, // recurrent_to_input_weight tensor - {n_cell, n_output}, // recurrent_to_forget_weight tensor - {n_cell, n_output}, // recurrent_to_cell_weight tensor - {n_cell, n_output}, // recurrent_to_output_weight tensor - - {0}, // cell_to_input_weight tensor - {n_cell}, // cell_to_forget_weight tensor - {n_cell}, // cell_to_output_weight tensor - - {0}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {0, 0}, // projection_weight tensor - {0}, // projection_bias tensor - }, /*weight_type=*/TensorType_UINT8, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); @@ -815,31 +668,6 @@ TEST_P(CifgNoPeepholeNoProjectionNoClippingLstmInt8Test, /*use_projection_weights=*/false, /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0, - { - {n_batch, n_input}, // input tensor - - {0, 0}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {0, 0}, // recurrent_to_input_weight tensor - {n_cell, n_output}, // recurrent_to_forget_weight tensor - {n_cell, n_output}, // recurrent_to_cell_weight tensor - {n_cell, n_output}, // recurrent_to_output_weight tensor - - {0}, // cell_to_input_weight tensor - {n_cell}, // cell_to_forget_weight tensor - {n_cell}, // cell_to_output_weight tensor - - {0}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {0, 0}, // projection_weight tensor - {0}, // projection_bias tensor - }, /*weight_type=*/TensorType_INT8, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); @@ -1457,31 +1285,6 @@ TEST_F(NoCifgPeepholeProjectionNoClippingLstmTest, LstmBlackBoxTest) { /*use_projection_weights=*/true, /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0, - { - {n_batch, n_input}, // input tensor - - {n_cell, n_input}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {n_cell, n_output}, // recurrent_to_input_weight tensor - {n_cell, n_output}, // recurrent_to_forget_weight tensor - {n_cell, n_output}, // recurrent_to_cell_weight tensor - {n_cell, n_output}, // recurrent_to_output_weight tensor - - {n_cell}, // cell_to_input_weight tensor - {n_cell}, // cell_to_forget_weight tensor - {n_cell}, // cell_to_output_weight tensor - - {n_cell}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {n_output, n_cell}, // projection_weight tensor - {0}, // projection_bias tensor - }, /*weight_type=*/TensorType_FLOAT32, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/false); @@ -1505,31 +1308,6 @@ TEST_P(NoCifgPeepholeProjectionNoClippingLstmTest, /*use_projection_weights=*/true, /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0, - { - {n_batch, n_input}, // input tensor - - {n_cell, n_input}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {n_cell, n_output}, // recurrent_to_input_weight tensor - {n_cell, n_output}, // recurrent_to_forget_weight tensor - {n_cell, n_output}, // recurrent_to_cell_weight tensor - {n_cell, n_output}, // recurrent_to_output_weight tensor - - {n_cell}, // cell_to_input_weight tensor - {n_cell}, // cell_to_forget_weight tensor - {n_cell}, // cell_to_output_weight tensor - - {n_cell}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {n_output, n_cell}, // projection_weight tensor - {0}, // projection_bias tensor - }, /*weight_type=*/TensorType_UINT8, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); @@ -1555,31 +1333,6 @@ TEST_P(NoCifgPeepholeProjectionNoClippingLstmInt8Test, /*use_projection_weights=*/true, /*use_projection_bias=*/false, /*cell_clip=*/0.0, /*proj_clip=*/0.0, - { - {n_batch, n_input}, // input tensor - - {n_cell, n_input}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {n_cell, n_output}, // recurrent_to_input_weight tensor - {n_cell, n_output}, // recurrent_to_forget_weight tensor - {n_cell, n_output}, // recurrent_to_cell_weight tensor - {n_cell, n_output}, // recurrent_to_output_weight tensor - - {n_cell}, // cell_to_input_weight tensor - {n_cell}, // cell_to_forget_weight tensor - {n_cell}, // cell_to_output_weight tensor - - {n_cell}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {n_output, n_cell}, // projection_weight tensor - {0}, // projection_bias tensor - }, /*weight_type=*/TensorType_INT8, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); @@ -1668,39 +1421,6 @@ TEST_F(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest, /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, /*use_projection_bias=*/false, cell_clip, proj_clip, - { - {n_batch, n_input}, // input tensor - - {n_cell, n_input}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {n_cell, n_output}, // recurrent_to_input_weight tensor - {n_cell, n_output}, // recurrent_to_forget_weight tensor - {n_cell, n_output}, // recurrent_to_cell_weight tensor - {n_cell, n_output}, // recurrent_to_output_weight tensor - - {n_cell}, // cell_to_input_weight tensor - {n_cell}, // cell_to_forget_weight tensor - {n_cell}, // cell_to_output_weight tensor - - {n_cell}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {n_output, n_cell}, // projection_weight tensor - {0}, // projection_bias tensor - - {n_batch, n_output}, // output_state tensor - {n_batch, n_cell}, // cell_state tensor - - {n_cell}, // input_layer_norm_coefficient tensor - {n_cell}, // forget_layer_norm_coefficient tensor - {n_cell}, // cell_layer_norm_coefficient tensor - {n_cell}, // output_layer_norm_coefficient tensor - }, /*weight_type=*/TensorType_FLOAT32, /*model_has_legacy_20_inputs=*/false, /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/false); @@ -1739,39 +1459,6 @@ TEST_P(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest, /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, /*use_projection_bias=*/false, cell_clip, proj_clip, - { - {n_batch, n_input}, // input tensor - - {n_cell, n_input}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {n_cell, n_output}, // recurrent_to_input_weight tensor - {n_cell, n_output}, // recurrent_to_forget_weight tensor - {n_cell, n_output}, // recurrent_to_cell_weight tensor - {n_cell, n_output}, // recurrent_to_output_weight tensor - - {n_cell}, // cell_to_input_weight tensor - {n_cell}, // cell_to_forget_weight tensor - {n_cell}, // cell_to_output_weight tensor - - {n_cell}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {n_output, n_cell}, // projection_weight tensor - {0}, // projection_bias tensor - - {n_batch, n_output}, // output_state tensor - {n_batch, n_cell}, // cell_state tensor - - {n_cell}, // input_layer_norm_coefficient tensor - {n_cell}, // forget_layer_norm_coefficient tensor - {n_cell}, // cell_layer_norm_coefficient tensor - {n_cell}, // output_layer_norm_coefficient tensor - }, /*weight_type=*/TensorType_UINT8, /*model_has_legacy_20_inputs=*/false, /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/GetParam()); @@ -1812,39 +1499,6 @@ TEST_P(NoCifgPeepholeProjectionNoClippingLayerNormLstmInt8Test, /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, /*use_projection_bias=*/false, cell_clip, proj_clip, - { - {n_batch, n_input}, // input tensor - - {n_cell, n_input}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {n_cell, n_output}, // recurrent_to_input_weight tensor - {n_cell, n_output}, // recurrent_to_forget_weight tensor - {n_cell, n_output}, // recurrent_to_cell_weight tensor - {n_cell, n_output}, // recurrent_to_output_weight tensor - - {n_cell}, // cell_to_input_weight tensor - {n_cell}, // cell_to_forget_weight tensor - {n_cell}, // cell_to_output_weight tensor - - {n_cell}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {n_output, n_cell}, // projection_weight tensor - {0}, // projection_bias tensor - - {n_batch, n_output}, // output_state tensor - {n_batch, n_cell}, // cell_state tensor - - {n_cell}, // input_layer_norm_coefficient tensor - {n_cell}, // forget_layer_norm_coefficient tensor - {n_cell}, // cell_layer_norm_coefficient tensor - {n_cell}, // output_layer_norm_coefficient tensor - }, /*weight_type=*/TensorType_INT8, /*model_has_legacy_20_inputs=*/false, /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/GetParam()); @@ -1926,39 +1580,6 @@ TEST_F(CifgPeepholeProjectionNoClippingLayerNormLstmTest, /*use_cifg=*/true, /*use_peephole=*/true, /*use_projection_weights=*/true, /*use_projection_bias=*/false, cell_clip, proj_clip, - { - {n_batch, n_input}, // input tensor - - {0, 0}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {0, 0}, // recurrent_to_input_weight tensor - {n_cell, n_output}, // recurrent_to_forget_weight tensor - {n_cell, n_output}, // recurrent_to_cell_weight tensor - {n_cell, n_output}, // recurrent_to_output_weight tensor - - {0}, // cell_to_input_weight tensor - {n_cell}, // cell_to_forget_weight tensor - {n_cell}, // cell_to_output_weight tensor - - {0}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {n_output, n_cell}, // projection_weight tensor - {0}, // projection_bias tensor - - {n_batch, n_output}, // output_state tensor - {n_batch, n_cell}, // cell_state tensor - - {0}, // input_layer_norm_coefficient tensor - {n_cell}, // forget_layer_norm_coefficient tensor - {n_cell}, // cell_layer_norm_coefficient tensor - {n_cell}, // output_layer_norm_coefficient tensor - }, /*weight_type=*/TensorType_FLOAT32, /*model_has_legacy_20_inputs=*/false, /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/false); @@ -1997,39 +1618,6 @@ TEST_P(CifgPeepholeProjectionNoClippingLayerNormLstmTest, /*use_cifg=*/true, /*use_peephole=*/true, /*use_projection_weights=*/true, /*use_projection_bias=*/false, cell_clip, proj_clip, - { - {n_batch, n_input}, // input tensor - - {0, 0}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {0, 0}, // recurrent_to_input_weight tensor - {n_cell, n_output}, // recurrent_to_forget_weight tensor - {n_cell, n_output}, // recurrent_to_cell_weight tensor - {n_cell, n_output}, // recurrent_to_output_weight tensor - - {0}, // cell_to_input_weight tensor - {n_cell}, // cell_to_forget_weight tensor - {n_cell}, // cell_to_output_weight tensor - - {0}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {n_output, n_cell}, // projection_weight tensor - {0}, // projection_bias tensor - - {n_batch, n_output}, // output_state tensor - {n_batch, n_cell}, // cell_state tensor - - {0}, // input_layer_norm_coefficient tensor - {n_cell}, // forget_layer_norm_coefficient tensor - {n_cell}, // cell_layer_norm_coefficient tensor - {n_cell}, // output_layer_norm_coefficient tensor - }, /*weight_type=*/TensorType_UINT8, /*model_has_legacy_20_inputs=*/false, /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/GetParam()); @@ -2069,39 +1657,6 @@ TEST_P(CifgPeepholeProjectionNoClippingLayerNormLstmInt8Test, /*use_cifg=*/true, /*use_peephole=*/true, /*use_projection_weights=*/true, /*use_projection_bias=*/false, cell_clip, proj_clip, - { - {n_batch, n_input}, // input tensor - - {0, 0}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {0, 0}, // recurrent_to_input_weight tensor - {n_cell, n_output}, // recurrent_to_forget_weight tensor - {n_cell, n_output}, // recurrent_to_cell_weight tensor - {n_cell, n_output}, // recurrent_to_output_weight tensor - - {0}, // cell_to_input_weight tensor - {n_cell}, // cell_to_forget_weight tensor - {n_cell}, // cell_to_output_weight tensor - - {0}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {n_output, n_cell}, // projection_weight tensor - {0}, // projection_bias tensor - - {n_batch, n_output}, // output_state tensor - {n_batch, n_cell}, // cell_state tensor - - {0}, // input_layer_norm_coefficient tensor - {n_cell}, // forget_layer_norm_coefficient tensor - {n_cell}, // cell_layer_norm_coefficient tensor - {n_cell}, // output_layer_norm_coefficient tensor - }, /*weight_type=*/TensorType_INT8, /*model_has_legacy_20_inputs=*/false, /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/GetParam()); @@ -2129,56 +1684,71 @@ class LSTMIntegerOpModel : public SingleOpModel { bool use_cifg, bool use_peephole, bool use_projection_weights, bool use_projection_bias, bool use_layer_norm, float cell_clip, float proj_clip, - const std::vector>& input_shapes, + bool use_8x8_8_implementation, const std::vector>& ranges, const std::vector>& intermediates) : n_batch_(n_batch), n_input_(n_input), n_cell_(n_cell), n_output_(n_output) { - EXPECT_EQ(input_shapes.size() + 1, ranges.size()); - EXPECT_EQ(intermediates.size(), 5); - input_ = AddInput( - {TensorType_INT8, input_shapes[0], ranges[0].first, ranges[0].second}); + input_ = AddInput({TensorType_INT8, + {n_batch, n_input}, + ranges[0].first, + ranges[0].second}); if (use_cifg) { input_to_input_weights_ = AddNullInput(); } else { - input_to_input_weights_ = AddInput({TensorType_INT8, input_shapes[1], - ranges[1].first, ranges[1].second}); + input_to_input_weights_ = AddInput({TensorType_INT8, + {n_cell, n_input}, + ranges[1].first, + ranges[1].second}); } - input_to_forget_weights_ = AddInput( - {TensorType_INT8, input_shapes[2], ranges[2].first, ranges[2].second}); - input_to_cell_weights_ = AddInput( - {TensorType_INT8, input_shapes[3], ranges[3].first, ranges[3].second}); - input_to_output_weights_ = AddInput( - {TensorType_INT8, input_shapes[4], ranges[4].first, ranges[4].second}); + input_to_forget_weights_ = AddInput({TensorType_INT8, + {n_cell, n_input}, + ranges[2].first, + ranges[2].second}); + input_to_cell_weights_ = AddInput({TensorType_INT8, + {n_cell, n_input}, + ranges[3].first, + ranges[3].second}); + input_to_output_weights_ = AddInput({TensorType_INT8, + {n_cell, n_input}, + ranges[4].first, + ranges[4].second}); if (use_cifg) { recurrent_to_input_weights_ = AddNullInput(); } else { - recurrent_to_input_weights_ = - AddInput({TensorType_INT8, input_shapes[5], ranges[5].first, - ranges[5].second}); + recurrent_to_input_weights_ = AddInput({TensorType_INT8, + {n_cell, n_output}, + ranges[5].first, + ranges[5].second}); } - recurrent_to_forget_weights_ = AddInput( - {TensorType_INT8, input_shapes[6], ranges[6].first, ranges[6].second}); - recurrent_to_cell_weights_ = AddInput( - {TensorType_INT8, input_shapes[7], ranges[7].first, ranges[7].second}); - recurrent_to_output_weights_ = AddInput( - {TensorType_INT8, input_shapes[8], ranges[8].first, ranges[8].second}); + recurrent_to_forget_weights_ = AddInput({TensorType_INT8, + {n_cell, n_output}, + ranges[6].first, + ranges[6].second}); + recurrent_to_cell_weights_ = AddInput({TensorType_INT8, + {n_cell, n_output}, + ranges[7].first, + ranges[7].second}); + recurrent_to_output_weights_ = AddInput({TensorType_INT8, + {n_cell, n_output}, + ranges[8].first, + ranges[8].second}); if (use_peephole) { if (use_cifg) { cell_to_input_weights_ = AddNullInput(); } else { - cell_to_input_weights_ = AddInput({TensorType_INT16, input_shapes[9], - ranges[9].first, ranges[9].second}); + cell_to_input_weights_ = AddInput( + {TensorType_INT16, {n_cell}, ranges[9].first, ranges[9].second}); } - cell_to_forget_weights_ = AddInput({TensorType_INT16, input_shapes[10], - ranges[10].first, ranges[10].second}); - cell_to_output_weights_ = AddInput({TensorType_INT16, input_shapes[11], - ranges[11].first, ranges[11].second}); + cell_to_forget_weights_ = AddInput( + {TensorType_INT16, {n_cell}, ranges[10].first, ranges[10].second}); + cell_to_output_weights_ = AddInput( + {TensorType_INT16, {n_cell}, ranges[11].first, ranges[11].second}); } else { cell_to_input_weights_ = AddNullInput(); cell_to_forget_weights_ = AddNullInput(); @@ -2188,22 +1758,26 @@ class LSTMIntegerOpModel : public SingleOpModel { if (use_cifg) { input_gate_bias_ = AddNullInput(); } else { - input_gate_bias_ = AddInput({TensorType_INT32, input_shapes[12], - ranges[12].first, ranges[12].second}); + input_gate_bias_ = AddInput( + {TensorType_INT32, {n_cell}, ranges[12].first, ranges[12].second}); } - forget_gate_bias_ = AddInput({TensorType_INT32, input_shapes[13], - ranges[13].first, ranges[13].second}); - cell_gate_bias_ = AddInput({TensorType_INT32, input_shapes[14], - ranges[14].first, ranges[14].second}); - output_gate_bias_ = AddInput({TensorType_INT32, input_shapes[15], - ranges[15].first, ranges[15].second}); + forget_gate_bias_ = AddInput( + {TensorType_INT32, {n_cell}, ranges[13].first, ranges[13].second}); + cell_gate_bias_ = AddInput( + {TensorType_INT32, {n_cell}, ranges[14].first, ranges[14].second}); + output_gate_bias_ = AddInput( + {TensorType_INT32, {n_cell}, ranges[15].first, ranges[15].second}); if (use_projection_weights) { - projection_weights_ = AddInput({TensorType_INT8, input_shapes[16], - ranges[16].first, ranges[16].second}); + projection_weights_ = AddInput({TensorType_INT8, + {n_output, n_cell}, + ranges[16].first, + ranges[16].second}); if (use_projection_bias) { - projection_bias_ = AddInput({TensorType_INT32, input_shapes[17], - ranges[17].first, ranges[17].second}); + projection_bias_ = AddInput({TensorType_INT32, + {n_output}, + ranges[17].first, + ranges[17].second}); } else { projection_bias_ = AddNullInput(); } @@ -2213,11 +1787,15 @@ class LSTMIntegerOpModel : public SingleOpModel { } // Adding the 2 state tensors. - output_state_ = AddInput({TensorType_INT16, input_shapes[18], - ranges[18].first, ranges[18].second}, + output_state_ = AddInput({TensorType_INT16, + {n_batch, n_output}, + ranges[18].first, + ranges[18].second}, true); - cell_state_ = AddInput({TensorType_INT16, input_shapes[19], - ranges[19].first, ranges[19].second}, + cell_state_ = AddInput({TensorType_INT16, + {n_batch, n_cell}, + ranges[19].first, + ranges[19].second}, true); // Layer norm weights. @@ -2225,25 +1803,25 @@ class LSTMIntegerOpModel : public SingleOpModel { if (use_cifg) { input_layer_norm_coefficients_ = AddNullInput(); } else { - input_layer_norm_coefficients_ = - AddInput({TensorType_INT16, input_shapes[20], ranges[20].first, - ranges[20].second}); + input_layer_norm_coefficients_ = AddInput( + {TensorType_INT16, {n_cell}, ranges[20].first, ranges[20].second}); } - forget_layer_norm_coefficients_ = - AddInput({TensorType_INT16, input_shapes[21], ranges[21].first, - ranges[21].second}); - cell_layer_norm_coefficients_ = - AddInput({TensorType_INT16, input_shapes[22], ranges[22].first, - ranges[22].second}); - output_layer_norm_coefficients_ = - AddInput({TensorType_INT16, input_shapes[23], ranges[23].first, - ranges[23].second}); + forget_layer_norm_coefficients_ = AddInput( + {TensorType_INT16, {n_cell}, ranges[21].first, ranges[21].second}); + cell_layer_norm_coefficients_ = AddInput( + {TensorType_INT16, {n_cell}, ranges[22].first, ranges[22].second}); + output_layer_norm_coefficients_ = AddInput( + {TensorType_INT16, {n_cell}, ranges[23].first, ranges[23].second}); } + if (use_8x8_8_implementation) { + EXPECT_EQ(intermediates.size(), 12); + } else { + EXPECT_EQ(intermediates.size(), 5); + } for (int i = 0; i < intermediates.size(); ++i) { - intermediates_[i] = - AddIntermediate(TensorType_INT16, {intermediates[i].first}, - {intermediates[i].second}); + AddIntermediate(TensorType_INT16, {intermediates[i].first}, + {intermediates[i].second}); } output_ = AddOutput({TensorType_INT8, @@ -2256,7 +1834,7 @@ class LSTMIntegerOpModel : public SingleOpModel { cell_clip, proj_clip) .Union()); - BuildInterpreter(input_shapes); + BuildInterpreter({}); // Input sizes are already set } void SetInputToInputWeights(const std::vector& f) { @@ -2383,8 +1961,6 @@ class LSTMIntegerOpModel : public SingleOpModel { int projection_weights_; int projection_bias_; - int intermediates_[5]; - int output_; int output_state_; int cell_state_; @@ -2451,41 +2027,6 @@ TEST(LSTMIntegerOpModel, NoCifgYesLayerNormNoYesProjectionNoPeephole) { const std::vector projection_weights = { -0.1, 0.2, 0.01, -0.2, 0.1, 0.5, 0.3, 0.08, 0.07, 0.2, -0.4, 0.2}; - // Input shapes. - const std::vector> inputs = { - {n_batch, n_input}, // input tensor - - {n_cell, n_input}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {n_cell, n_output}, // recurrent_to_input_weight tensor - {n_cell, n_output}, // recurrent_to_forget_weight tensor - {n_cell, n_output}, // recurrent_to_cell_weight tensor - {n_cell, n_output}, // recurrent_to_output_weight tensor - - {0}, // cell_to_input_weight tensor - {0}, // cell_to_forget_weight tensor - {0}, // cell_to_output_weight tensor - - {n_cell}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {n_output, n_cell}, // projection_weight tensor - {0}, // projection_bias tensor - - {n_batch, n_output}, // output_state tensor - {n_batch, n_cell}, // cell_state tensor - - {n_cell}, // input_layer_norm_coefficient tensor - {n_cell}, // forget_layer_norm_coefficient tensor - {n_cell}, // cell_layer_norm_coefficient tensor - {n_cell}, // output_layer_norm_coefficient tensor - }; - // Input ranges. const std::vector> ranges = { {-1.0, 127.0 / 128}, // input tensor @@ -2532,8 +2073,9 @@ TEST(LSTMIntegerOpModel, NoCifgYesLayerNormNoYesProjectionNoPeephole) { /*use_cifg=*/false, /*use_peephole=*/false, /*use_projection_weights=*/true, /*use_projection_bias=*/false, - /*use_layer_norm=*/true, cell_clip, proj_clip, inputs, - ranges, intermediates); + /*use_layer_norm=*/true, cell_clip, proj_clip, + /*use_8x8_8_implementation=*/false, ranges, + intermediates); // Set weights. lstm.SetInputToInputWeights(input_to_input_weights); @@ -2653,41 +2195,6 @@ TEST(LSTMIntegerOpModel, NoCifgYesLayerNormNoYesProjectionYesPeephole) { const std::vector projection_weights = { -0.1, 0.2, 0.01, -0.2, 0.1, 0.5, 0.3, 0.08, 0.07, 0.2, -0.4, 0.2}; - // Input shapes. - const std::vector> inputs = { - {n_batch, n_input}, // input tensor - - {n_cell, n_input}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {n_cell, n_output}, // recurrent_to_input_weight tensor - {n_cell, n_output}, // recurrent_to_forget_weight tensor - {n_cell, n_output}, // recurrent_to_cell_weight tensor - {n_cell, n_output}, // recurrent_to_output_weight tensor - - {n_cell}, // cell_to_input_weight tensor - {n_cell}, // cell_to_forget_weight tensor - {n_cell}, // cell_to_output_weight tensor - - {n_cell}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {n_output, n_cell}, // projection_weight tensor - {0}, // projection_bias tensor - - {n_batch, n_output}, // output_state tensor - {n_batch, n_cell}, // cell_state tensor - - {n_cell}, // input_layer_norm_coefficient tensor - {n_cell}, // forget_layer_norm_coefficient tensor - {n_cell}, // cell_layer_norm_coefficient tensor - {n_cell}, // output_layer_norm_coefficient tensor - }; - // Input ranges. const std::vector> ranges = { {-1.0, 127.0 / 128}, // input tensor @@ -2734,8 +2241,9 @@ TEST(LSTMIntegerOpModel, NoCifgYesLayerNormNoYesProjectionYesPeephole) { /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, /*use_projection_bias=*/false, - /*use_layer_norm=*/true, cell_clip, proj_clip, inputs, - ranges, intermediates); + /*use_layer_norm=*/true, cell_clip, proj_clip, + /*use_8x8_8_implementation=*/false, ranges, + intermediates); // Set weights. lstm.SetInputToInputWeights(input_to_input_weights); @@ -2797,279 +2305,7 @@ TEST(LSTMIntegerOpModel, NoCifgYesLayerNormNoYesProjectionYesPeephole) { } } -class LSTMIntegerOpModel8x8_8 : public SingleOpModel { - public: - LSTMIntegerOpModel8x8_8( - int n_batch, int n_input, int n_cell, int n_output, bool use_cifg, - bool use_peephole, bool use_projection_weights, bool use_projection_bias, - bool use_layer_norm, float cell_clip, float proj_clip, - const std::vector>& input_shapes, - const std::vector>& ranges, - const std::vector>& intermediates) - : n_batch_(n_batch), - n_input_(n_input), - n_cell_(n_cell), - n_output_(n_output) { - EXPECT_EQ(input_shapes.size() + 1, ranges.size()); - EXPECT_EQ(intermediates.size(), 12); - input_ = AddInput( - {TensorType_INT8, input_shapes[0], ranges[0].first, ranges[0].second}); - - if (use_cifg) { - input_to_input_weights_ = AddNullInput(); - } else { - input_to_input_weights_ = AddInput({TensorType_INT8, input_shapes[1], - ranges[1].first, ranges[1].second}); - } - input_to_forget_weights_ = AddInput( - {TensorType_INT8, input_shapes[2], ranges[2].first, ranges[2].second}); - input_to_cell_weights_ = AddInput( - {TensorType_INT8, input_shapes[3], ranges[3].first, ranges[3].second}); - input_to_output_weights_ = AddInput( - {TensorType_INT8, input_shapes[4], ranges[4].first, ranges[4].second}); - - if (use_cifg) { - recurrent_to_input_weights_ = AddNullInput(); - } else { - recurrent_to_input_weights_ = - AddInput({TensorType_INT8, input_shapes[5], ranges[5].first, - ranges[5].second}); - } - recurrent_to_forget_weights_ = AddInput( - {TensorType_INT8, input_shapes[6], ranges[6].first, ranges[6].second}); - recurrent_to_cell_weights_ = AddInput( - {TensorType_INT8, input_shapes[7], ranges[7].first, ranges[7].second}); - recurrent_to_output_weights_ = AddInput( - {TensorType_INT8, input_shapes[8], ranges[8].first, ranges[8].second}); - - if (use_peephole) { - if (use_cifg) { - cell_to_input_weights_ = AddNullInput(); - } else { - cell_to_input_weights_ = AddInput({TensorType_INT16, input_shapes[9], - ranges[9].first, ranges[9].second}); - } - cell_to_forget_weights_ = AddInput({TensorType_INT16, input_shapes[10], - ranges[10].first, ranges[10].second}); - cell_to_output_weights_ = AddInput({TensorType_INT16, input_shapes[11], - ranges[11].first, ranges[11].second}); - } else { - cell_to_input_weights_ = AddNullInput(); - cell_to_forget_weights_ = AddNullInput(); - cell_to_output_weights_ = AddNullInput(); - } - - if (use_cifg) { - input_gate_bias_ = AddNullInput(); - } else { - input_gate_bias_ = AddInput({TensorType_INT32, input_shapes[12], - ranges[12].first, ranges[12].second}); - } - forget_gate_bias_ = AddInput({TensorType_INT32, input_shapes[13], - ranges[13].first, ranges[13].second}); - cell_gate_bias_ = AddInput({TensorType_INT32, input_shapes[14], - ranges[14].first, ranges[14].second}); - output_gate_bias_ = AddInput({TensorType_INT32, input_shapes[15], - ranges[15].first, ranges[15].second}); - - if (use_projection_weights) { - projection_weights_ = AddInput({TensorType_INT8, input_shapes[16], - ranges[16].first, ranges[16].second}); - if (use_projection_bias) { - projection_bias_ = AddInput({TensorType_INT32, input_shapes[17], - ranges[17].first, ranges[17].second}); - } else { - projection_bias_ = AddNullInput(); - } - } else { - projection_weights_ = AddNullInput(); - projection_bias_ = AddNullInput(); - } - - // Adding the 2 state tensors. - output_state_ = AddInput({TensorType_INT16, input_shapes[18], - ranges[18].first, ranges[18].second}, - true); - cell_state_ = AddInput({TensorType_INT16, input_shapes[19], - ranges[19].first, ranges[19].second}, - true); - - // Layer norm weights. - if (use_layer_norm) { - if (use_cifg) { - input_layer_norm_coefficients_ = AddNullInput(); - } else { - input_layer_norm_coefficients_ = - AddInput({TensorType_INT16, input_shapes[20], ranges[20].first, - ranges[20].second}); - } - forget_layer_norm_coefficients_ = - AddInput({TensorType_INT16, input_shapes[21], ranges[21].first, - ranges[21].second}); - cell_layer_norm_coefficients_ = - AddInput({TensorType_INT16, input_shapes[22], ranges[22].first, - ranges[22].second}); - output_layer_norm_coefficients_ = - AddInput({TensorType_INT16, input_shapes[23], ranges[23].first, - ranges[23].second}); - } - - for (int i = 0; i < intermediates.size(); ++i) { - intermediates_[i] = - AddIntermediate(TensorType_INT16, {intermediates[i].first}, - {intermediates[i].second}); - } - - output_ = AddOutput({TensorType_INT8, - {n_batch, n_output}, - ranges[24].first, - ranges[24].second}); - - SetBuiltinOp(BuiltinOperator_LSTM, BuiltinOptions_LSTMOptions, - CreateLSTMOptions(builder_, ActivationFunctionType_TANH, - cell_clip, proj_clip) - .Union()); - - BuildInterpreter(input_shapes); - } - - void SetInputToInputWeights(const std::vector& f) { - QuantizeAndPopulate(input_to_input_weights_, f); - } - - void SetInputToForgetWeights(const std::vector& f) { - QuantizeAndPopulate(input_to_forget_weights_, f); - } - - void SetInputToCellWeights(const std::vector& f) { - QuantizeAndPopulate(input_to_cell_weights_, f); - } - - void SetInputToOutputWeights(const std::vector& f) { - QuantizeAndPopulate(input_to_output_weights_, f); - } - - void SetRecurrentToInputWeights(const std::vector& f) { - QuantizeAndPopulate(recurrent_to_input_weights_, f); - } - - void SetRecurrentToForgetWeights(const std::vector& f) { - QuantizeAndPopulate(recurrent_to_forget_weights_, f); - } - - void SetRecurrentToCellWeights(const std::vector& f) { - QuantizeAndPopulate(recurrent_to_cell_weights_, f); - } - - void SetRecurrentToOutputWeights(const std::vector& f) { - QuantizeAndPopulate(recurrent_to_output_weights_, f); - } - - void SetCellToInputWeights(const std::vector& f) { - QuantizeAndPopulate(cell_to_input_weights_, f); - } - - void SetCellToForgetWeights(const std::vector& f) { - QuantizeAndPopulate(cell_to_forget_weights_, f); - } - - void SetCellToOutputWeights(const std::vector& f) { - QuantizeAndPopulate(cell_to_output_weights_, f); - } - - void SetInputLayerNormCoefficients(const std::vector& f) { - QuantizeAndPopulate(input_layer_norm_coefficients_, f); - } - - void SetForgetLayerNormCoefficients(const std::vector& f) { - QuantizeAndPopulate(forget_layer_norm_coefficients_, f); - } - - void SetCellLayerNormCoefficients(const std::vector& f) { - QuantizeAndPopulate(cell_layer_norm_coefficients_, f); - } - - void SetOutputLayerNormCoefficients(const std::vector& f) { - QuantizeAndPopulate(output_layer_norm_coefficients_, f); - } - - void SetInputGateBias(const std::vector& f) { - QuantizeAndPopulate(input_gate_bias_, f); - } - - void SetForgetGateBias(const std::vector& f) { - QuantizeAndPopulate(forget_gate_bias_, f); - } - - void SetCellBias(const std::vector& f) { - QuantizeAndPopulate(cell_gate_bias_, f); - } - - void SetOutputGateBias(const std::vector& f) { - QuantizeAndPopulate(output_gate_bias_, f); - } - - void SetProjectionWeights(const std::vector& f) { - QuantizeAndPopulate(projection_weights_, f); - } - - void SetProjectionBias(const std::vector& f) { - QuantizeAndPopulate(projection_bias_, f); - } - - void SetInput(const std::vector& f) { - QuantizeAndPopulate(input_, f); - } - - std::vector GetOutput() { return ExtractVector(output_); } - - int num_inputs() { return n_input_; } - int num_outputs() { return n_output_; } - int num_cells() { return n_cell_; } - int num_batches() { return n_batch_; } - - protected: - int input_; - int input_to_input_weights_; - int input_to_forget_weights_; - int input_to_cell_weights_; - int input_to_output_weights_; - - int recurrent_to_input_weights_; - int recurrent_to_forget_weights_; - int recurrent_to_cell_weights_; - int recurrent_to_output_weights_; - - int cell_to_input_weights_; - int cell_to_forget_weights_; - int cell_to_output_weights_; - - int input_layer_norm_coefficients_; - int forget_layer_norm_coefficients_; - int cell_layer_norm_coefficients_; - int output_layer_norm_coefficients_; - - int input_gate_bias_; - int forget_gate_bias_; - int cell_gate_bias_; - int output_gate_bias_; - - int projection_weights_; - int projection_bias_; - - int intermediates_[12]; - - int output_; - int output_state_; - int cell_state_; - - int n_batch_; - int n_input_; - int n_cell_; - int n_output_; -}; - -TEST(LSTMIntegerOpModel8x8_8, CifgYesLayerNormNoYesProjectionNoPeephole) { +TEST(LSTMIntegerOpModel, CifgYesLayerNormNoYesProjectionNoPeephole_8x8_8) { // Hyper parameters. const int n_batch = 2; const int n_input = 5; @@ -3126,41 +2362,6 @@ TEST(LSTMIntegerOpModel8x8_8, CifgYesLayerNormNoYesProjectionNoPeephole) { -0.1, 0.2, 0.01, -0.2, 0.1, 0.5, 0.3, 0.08, 0.07, 0.2, -0.4, 0.2}; const std::vector projection_bias = {0.1, 0.3, 0.5}; - // Input shapes. - const std::vector> inputs = { - {n_batch, n_input}, // input tensor - - {0}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {0}, // recurrent_to_input_weight tensor - {n_cell, n_output}, // recurrent_to_forget_weight tensor - {n_cell, n_output}, // recurrent_to_cell_weight tensor - {n_cell, n_output}, // recurrent_to_output_weight tensor - - {0}, // cell_to_input_weight tensor - {0}, // cell_to_forget_weight tensor - {0}, // cell_to_output_weight tensor - - {0}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {n_output, n_cell}, // projection_weight tensor - {n_output}, // projection_bias tensor - - {n_batch, n_output}, // output_state tensor - {n_batch, n_cell}, // cell_state tensor - - {0}, // input_layer_norm_coefficient tensor - {n_cell}, // forget_layer_norm_coefficient tensor - {n_cell}, // cell_layer_norm_coefficient tensor - {n_cell}, // output_layer_norm_coefficient tensor - }; - // Input ranges. const std::vector> ranges = { {-1.0, 127.0 / 128}, // input tensor @@ -3205,12 +2406,13 @@ TEST(LSTMIntegerOpModel8x8_8, CifgYesLayerNormNoYesProjectionNoPeephole) { {0.007059, 0}, {0.007, 0}, {0.007, 0}, {0.3, 0}}; // Create model. - LSTMIntegerOpModel8x8_8 lstm(n_batch, n_input, n_cell, n_output, - /*use_cifg=*/true, /*use_peephole=*/false, - /*use_projection_weights=*/true, - /*use_projection_bias=*/true, - /*use_layer_norm=*/true, cell_clip, proj_clip, - inputs, ranges, intermediates); + LSTMIntegerOpModel lstm(n_batch, n_input, n_cell, n_output, + /*use_cifg=*/true, /*use_peephole=*/false, + /*use_projection_weights=*/true, + /*use_projection_bias=*/true, + /*use_layer_norm=*/true, cell_clip, proj_clip, + /*use_8x8_8_implementation=*/true, ranges, + intermediates); // Set weights. // lstm.SetInputToInputWeights(input_to_input_weights); @@ -3276,76 +2478,26 @@ TEST(LSTMOpModel, InvalidTypeTest) { const int n_cell = 4; const int n_output = 4; - EXPECT_DEATH( - LSTMOpModel lstm( - n_batch, n_input, n_cell, n_output, - /*use_cifg=*/false, /*use_peephole=*/false, - /*use_projection_weights=*/false, - /*use_projection_bias=*/false, - /*cell_clip=*/0.0, /*proj_clip=*/0.0, - { - {n_batch, n_input}, // input tensor + EXPECT_DEATH(LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, + /*use_cifg=*/false, /*use_peephole=*/false, + /*use_projection_weights=*/false, + /*use_projection_bias=*/false, + /*cell_clip=*/0.0, /*proj_clip=*/0.0, + /*weight_type=*/TensorType_INT32, + /*model_has_legacy_20_inputs=*/true, + /*is_layer_norm=*/false, + /*asymmetric_quantize_inputs=*/false), + ""); - {n_cell, n_input}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {n_cell, n_output}, // recurrent_to_input_weight_tensor - {n_cell, n_output}, // recurrent_to_forget_weight_tensor - {n_cell, n_output}, // recurrent_to_cell_weight_tensor - {n_cell, n_output}, // recurrent_to_output_weight_tensor - - {0}, // cell_to_input_weight tensor - {0}, // cell_to_forget_weight tensor - {0}, // cell_to_output_weight tensor - - {n_cell}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {0, 0}, // projection_weight tensor - {0}, // projection_bias tensor - }, - /*weight_type=*/TensorType_INT32, /*model_has_legacy_20_inputs=*/true, - /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/false), - ""); - - EXPECT_DEATH(LSTMOpModel lstm( - n_batch, n_input, n_cell, n_output, - /*use_cifg=*/false, /*use_peephole=*/false, - /*use_projection_weights=*/false, - /*use_projection_bias=*/false, - /*cell_clip=*/0.0, /*proj_clip=*/0.0, - { - {n_batch, n_input}, // input tensor - - {n_cell, n_input}, // input_to_input_weight tensor - {n_cell, n_input}, // input_to_forget_weight tensor - {n_cell, n_input}, // input_to_cell_weight tensor - {n_cell, n_input}, // input_to_output_weight tensor - - {n_cell, n_output}, // recurrent_to_input_weight_tensor - {n_cell, n_output}, // recurrent_to_forget_weight_tensor - {n_cell, n_output}, // recurrent_to_cell_weight_tensor - {n_cell, n_output}, // recurrent_to_output_weight_tensor - - {0}, // cell_to_input_weight tensor - {0}, // cell_to_forget_weight tensor - {0}, // cell_to_output_weight tensor - - {n_cell}, // input_gate_bias tensor - {n_cell}, // forget_gate_bias tensor - {n_cell}, // cell_gate_bias tensor - {n_cell}, // output_gate_bias tensor - - {0, 0}, // projection_weight tensor - {0}, // projection_bias tensor - }, - /*weight_type=*/TensorType_COMPLEX64, - /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, - /*asymmetric_quantize_inputs=*/false), + EXPECT_DEATH(LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, + /*use_cifg=*/false, /*use_peephole=*/false, + /*use_projection_weights=*/false, + /*use_projection_bias=*/false, + /*cell_clip=*/0.0, /*proj_clip=*/0.0, + /*weight_type=*/TensorType_COMPLEX64, + /*model_has_legacy_20_inputs=*/true, + /*is_layer_norm=*/false, + /*asymmetric_quantize_inputs=*/false), ""); } #endif From 0850be02ff74941435024d5652975be51738213e Mon Sep 17 00:00:00 2001 From: Russell Power Date: Tue, 21 Jul 2020 09:47:48 -0700 Subject: [PATCH 0941/2522] Reorganize C++ <-> C conversions. PiperOrigin-RevId: 322380237 Change-Id: I569f11fe9fdab5978b0c2e5708a0aa8cc3e87e9b --- tensorflow/core/tpu/kernels/BUILD | 1 + tensorflow/core/tpu/kernels/tpu_util_c_api.h | 9 +- tensorflow/core/tpu/tpu_execute.cc | 25 +- tensorflow/core/tpu/tpu_on_demand_compiler.cc | 86 ++---- tensorflow/stream_executor/tpu/BUILD | 75 +++-- .../stream_executor/tpu/c_api_conversions.cc | 216 ++++++++++++++ .../stream_executor/tpu/c_api_conversions.h | 281 +++++------------- tensorflow/stream_executor/tpu/c_api_decl.h | 253 ++++++++++++++++ tensorflow/stream_executor/tpu/c_api_defn.h | 70 +++++ .../tpu/device_memory_base_helper.h | 41 --- tensorflow/stream_executor/tpu/proto_helper.h | 6 +- .../stream_executor/tpu/status_helper.h | 1 - tensorflow/stream_executor/tpu/tpu_event.h | 33 ++ .../stream_executor/tpu/tpu_executable.cc | 12 +- .../stream_executor/tpu/tpu_executor.cc | 24 +- .../stream_executor/tpu/tpu_executor_c_api.h | 213 +------------ .../tpu/tpu_node_context_c_api.h | 2 +- tensorflow/stream_executor/tpu/tpu_stream.h | 25 +- tensorflow/stream_executor/tpu/tpu_topology.h | 2 +- .../tpu/tpu_transfer_manager.cc | 34 +-- 20 files changed, 779 insertions(+), 630 deletions(-) create mode 100644 tensorflow/stream_executor/tpu/c_api_conversions.cc create mode 100644 tensorflow/stream_executor/tpu/c_api_decl.h create mode 100644 tensorflow/stream_executor/tpu/c_api_defn.h delete mode 100644 tensorflow/stream_executor/tpu/device_memory_base_helper.h create mode 100644 tensorflow/stream_executor/tpu/tpu_event.h diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 987feba9473..b3ae4770cda 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -441,6 +441,7 @@ cc_library( deps = [ ":tpu_mesh_state_c_api_hdrs", "//tensorflow/core/tpu:libtftpu_header", + "//tensorflow/stream_executor/tpu:c_api_decl", "//tensorflow/stream_executor/tpu:proto_helper", ], alwayslink = True, diff --git a/tensorflow/core/tpu/kernels/tpu_util_c_api.h b/tensorflow/core/tpu/kernels/tpu_util_c_api.h index a6cc2f01703..37c55518721 100644 --- a/tensorflow/core/tpu/kernels/tpu_util_c_api.h +++ b/tensorflow/core/tpu/kernels/tpu_util_c_api.h @@ -17,16 +17,9 @@ limitations under the License. #include "tensorflow/core/tpu/kernels/tpu_mesh_state_c_api.h" #include "tensorflow/core/tpu/libtftpu.h" +#include "tensorflow/stream_executor/tpu/c_api_decl.h" #include "tensorflow/stream_executor/tpu/proto_helper.h" -typedef struct SE_Status SE_Status; - -enum TpuCoreTypeEnum { - kTensorCore, - kEmbeddingV1, - kEmbeddingV2, -}; - // Property for creating compilation cache key. struct CompilationCacheKeyProperty { const char* config_prefix; diff --git a/tensorflow/core/tpu/tpu_execute.cc b/tensorflow/core/tpu/tpu_execute.cc index 022e8c2a07e..d897eb7ee9b 100644 --- a/tensorflow/core/tpu/tpu_execute.cc +++ b/tensorflow/core/tpu/tpu_execute.cc @@ -109,32 +109,32 @@ void ExitCountdown(Env* env) { xla::Shape HostShapeToDeviceShape(const xla::Shape& host_shape) { XLA_Shape c_host_shape; XLA_Shape c_device_shape; - TpuConversions::XlaShapeToCShape(host_shape, &c_host_shape); + ApiConverter::ToC(host_shape, &c_host_shape); tensorflow::tpu::ExecuteApiFn()->HardwareLayout_HostShapeToDeviceShapeFn( &c_host_shape, &c_device_shape); - xla::Shape device_shape = TpuConversions::CShapeToXlaShape(&c_device_shape); - TpuConversions::CShapeCleanup(&c_host_shape); - TpuConversions::CShapeCleanup(&c_device_shape); + xla::Shape device_shape = ApiConverter::FromC(&c_device_shape); + ApiConverter::Free(&c_host_shape); + ApiConverter::Free(&c_device_shape); return device_shape; } int64 ShapeSizeCompact(const xla::Shape& shape) { XLA_Shape c_shape; - TpuConversions::XlaShapeToCShape(shape, &c_shape); + ApiConverter::ToC(shape, &c_shape); int64 size = tensorflow::tpu::ExecuteApiFn()->HardwareLayout_ShapeSizeCompactFn( &c_shape); - TpuConversions::CShapeCleanup(&c_shape); + ApiConverter::Free(&c_shape); return size; } int64 ShapeSizeCompactRaw(const xla::Shape& shape) { XLA_Shape c_shape; - TpuConversions::XlaShapeToCShape(shape, &c_shape); + ApiConverter::ToC(shape, &c_shape); int64 size = tensorflow::tpu::ExecuteApiFn()->HardwareLayout_ShapeSizeCompactRawFn( &c_shape); - TpuConversions::CShapeCleanup(&c_shape); + ApiConverter::Free(&c_shape); return size; } @@ -241,17 +241,16 @@ xla::Status UpdateDynamicInputs( // After getting the data onto the host, transpose the data to // the correct layout by delinearizing it and linearizing it again. XLA_Shape c_runtime_shape, c_compile_time_shape; - TpuConversions::XlaShapeToCShape(runtime_shape, &c_runtime_shape); - TpuConversions::XlaShapeToCShape(compile_time_shape, - &c_compile_time_shape); + ApiConverter::ToC(runtime_shape, &c_runtime_shape); + ApiConverter::ToC(compile_time_shape, &c_compile_time_shape); StatusHelper status; tensorflow::tpu::ExecuteApiFn() ->TpuExecute_RuntimeInputToPaddedDataFn( raw_input_runtime->data(), raw_input_runtime->size(), padded_data->data(), padded_data->size(), &c_runtime_shape, &c_compile_time_shape, status.c_status); - TpuConversions::CShapeCleanup(&c_runtime_shape); - TpuConversions::CShapeCleanup(&c_compile_time_shape); + ApiConverter::Free(&c_runtime_shape); + ApiConverter::Free(&c_compile_time_shape); return status.status(); }); // Allocate new input and transfer the padded and transposed data to diff --git a/tensorflow/core/tpu/tpu_on_demand_compiler.cc b/tensorflow/core/tpu/tpu_on_demand_compiler.cc index 66cd6869700..0839304ce10 100644 --- a/tensorflow/core/tpu/tpu_on_demand_compiler.cc +++ b/tensorflow/core/tpu/tpu_on_demand_compiler.cc @@ -30,6 +30,20 @@ limitations under the License. #include "tensorflow/stream_executor/tpu/tpu_executor.h" #include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" #include "tensorflow/stream_executor/tpu/tpu_platform.h" +#include "tensorflow/stream_executor/tpu/tpu_stream.h" + +namespace ApiConverter { +static SE_ExecutableRunOptions ToC( + const xla::ServiceExecutableRunOptions& options) { + SE_ExecutableRunOptions se_options; + se_options.allocator = ApiConverter::ToC(options.run_options().allocator()); + se_options.device_ordinal = options.run_options().device_ordinal(); + auto impl = + const_cast(options.stream())->implementation(); + se_options.stream = static_cast(impl)->se_stream(); + return se_options; +} +} // namespace ApiConverter namespace xla { @@ -37,31 +51,6 @@ namespace { using ::tensorflow::tpu::ExecutorApiFn; -// TODO(power) -- dedup -inline xla::ShapedBuffer CShapedBufferToXLAShapedBuffer( - XLA_ShapedBuffer* c_buffer) { - xla::Shape xla_on_host_shape = - TpuConversions::CShapeToXlaShape(&c_buffer->on_host_shape); - xla::Shape xla_on_device_shape = - TpuConversions::CShapeToXlaShape(&c_buffer->on_device_shape); - - xla::ShapeTree xla_shape_tree( - xla_on_device_shape); - size_t i = 0; - for (auto& pair : xla_shape_tree) { - pair.second = TpuConversions::SE_DeviceMemoryBaseToDeviceMemoryBase( - c_buffer->bases[i]); - i++; - } - - xla::ShapedBuffer xla_shaped_buffer( - xla_on_host_shape, xla_on_device_shape, - tensorflow::tpu::TpuPlatformInterface::GetRegisteredPlatform(), - c_buffer->device_ordinal); - xla_shaped_buffer.set_buffers(xla_shape_tree); - return xla_shaped_buffer; -} - class TpuExecutable : public Executable { public: TpuExecutable(SE_Executable* se_executable, @@ -77,21 +66,17 @@ class TpuExecutable : public Executable { const ServiceExecutableRunOptions* run_options, std::vector arguments, HloExecutionProfile* hlo_execution_profile) override { - SE_ExecutableRunOptions se_run_options = - TpuConversions::ExecutableRunOptionsToSE_ExecutableRunOptions( - *run_options); + SE_ExecutableRunOptions se_run_options = ApiConverter::ToC(*run_options); SE_ExecutionInput** se_args = new SE_ExecutionInput*[arguments.size()]; for (int i = 0; i < arguments.size(); ++i) { auto& arg = arguments[i]; se_args[i] = new SE_ExecutionInput; - TpuConversions::XlaShapeToCShape(arg.shape(), - &se_args[i]->shape_tree.shape); + ApiConverter::ToC(arg.shape(), &se_args[i]->shape_tree.shape); auto* arg_buffers = arg.MutableBuffers(); absl::InlinedVector se_buffers; for (auto& pair : *arg_buffers) { - se_buffers.push_back( - TpuConversions::SEMaybeOwningDeviceMemoryToC(pair.second)); + se_buffers.push_back(ApiConverter::ToC(pair.second)); } se_args[i]->shape_tree.buffers = new SE_MaybeOwningDeviceMemory[se_buffers.size()]; @@ -99,16 +84,14 @@ class TpuExecutable : public Executable { se_args[i]->shape_tree.buffers[j] = se_buffers[j]; } - TpuConversions::XlaShapeToCShape(arg.shape(), &se_args[i]->dynamic_shape); - TpuConversions::XlaShapeToCShape(arg.host_shape(), - &se_args[i]->host_shape); + ApiConverter::ToC(arg.shape(), &se_args[i]->dynamic_shape); + ApiConverter::ToC(arg.host_shape(), &se_args[i]->host_shape); const auto& unowned_indices = arg.unowned_indices(); se_args[i]->unowned_indices_size = unowned_indices.size(); se_args[i]->unowned_indices = new XLA_ShapeIndex[unowned_indices.size()]; int j = 0; for (auto& idx : unowned_indices) { - se_args[i]->unowned_indices[j] = - TpuConversions::XlaShapeIndexToCShapeIndex(idx); + se_args[i]->unowned_indices[j] = ApiConverter::ToC(idx); ++j; } } @@ -122,20 +105,19 @@ class TpuExecutable : public Executable { } xla::ScopedShapedBuffer result( - CShapedBufferToXLAShapedBuffer(&se_execution_output.result), + ApiConverter::FromC(&se_execution_output.result), run_options->stream()->parent()->GetAllocator()); ExecutionOutput output(std::move(result)); for (int i = 0; i < se_execution_output.aliased_indices_size; ++i) { - output.AddAliasedIndex(TpuConversions::CShapeIndexToXlaShapeIndex( - &se_execution_output.aliased_indices[i])); + output.AddAliasedIndex( + ApiConverter::FromC(&se_execution_output.aliased_indices[i])); } for (int i = 0; i < se_execution_output.to_be_released_size; ++i) { output.AddToBeReleased( - TpuConversions::COwningDeviceMemToSEOwningDeviceMem( - &se_execution_output.to_be_released[i], - run_options->stream()->parent()->GetAllocator()) + ApiConverter::FromC(&se_execution_output.to_be_released[i], + run_options->stream()->parent()->GetAllocator()) .Release() .value()); } @@ -164,13 +146,12 @@ XLA_HloModuleConfig HloModuleConfigToC(const xla::HloModuleConfig& config) { } if (config.has_entry_computation_layout()) { auto layout = config.entry_computation_layout(); - TpuConversions::XlaShapeToCShape( - layout.result_layout().shape(), - &hlo_config.entry_computation_layout.result_layout); + ApiConverter::ToC(layout.result_layout().shape(), + &hlo_config.entry_computation_layout.result_layout); hlo_config.entry_computation_layout.parameter_layouts = new XLA_Shape[layout.parameter_count()]; for (int i = 0; i < layout.parameter_count(); ++i) { - TpuConversions::XlaShapeToCShape( + ApiConverter::ToC( layout.parameter_layout(i).shape(), &hlo_config.entry_computation_layout.parameter_layouts[i]); } @@ -196,7 +177,7 @@ class TpuCompiler : public Compiler { XLA_HloModule hlo_module; hlo_module.module_config = HloModuleConfigToC(module->config()); hlo_module.proto = stream_executor::tpu::SerializeProto(module->ToProto()); - auto allocator = TpuConversions::AllocatorToSE_Allocator(device_allocator); + auto allocator = ApiConverter::ToC(device_allocator); XLA_HloModule result; StatusHelper status; ExecutorApiFn()->TpuCompiler_RunHloPassesFn( @@ -229,7 +210,7 @@ class TpuCompiler : public Compiler { XLA_HloModule hlo_module; hlo_module.module_config = HloModuleConfigToC(module->config()); hlo_module.proto = stream_executor::tpu::SerializeProto(module->ToProto()); - auto allocator = TpuConversions::AllocatorToSE_Allocator(device_allocator); + auto allocator = ApiConverter::ToC(device_allocator); SE_Executable* result; StatusHelper status; @@ -272,8 +253,7 @@ class TpuCompiler : public Compiler { } } - SE_DeviceMemoryAllocator allocator = - TpuConversions::AllocatorToSE_Allocator(device_allocator); + SE_DeviceMemoryAllocator allocator = ApiConverter::ToC(device_allocator); SE_Executable** se_executables = new SE_Executable*[module_group->size()]; @@ -311,10 +291,10 @@ class TpuCompiler : public Compiler { HloCostAnalysis::ShapeSizeFunction ShapeSizeBytesFunction() const override { return [this](const xla::Shape& shape) { XLA_Shape c_shape; - TpuConversions::XlaShapeToCShape(shape, &c_shape); + ApiConverter::ToC(shape, &c_shape); int64 bytes = ExecutorApiFn()->TpuCompiler_ShapeSizeFn(compiler_, &c_shape); - TpuConversions::CShapeCleanup(&c_shape); + ApiConverter::Free(&c_shape); return bytes; }; } diff --git a/tensorflow/stream_executor/tpu/BUILD b/tensorflow/stream_executor/tpu/BUILD index 6e00542ddf4..7fa46ebd8d1 100644 --- a/tensorflow/stream_executor/tpu/BUILD +++ b/tensorflow/stream_executor/tpu/BUILD @@ -6,47 +6,39 @@ package( ) cc_library( - name = "tpu_executor_c_api_hdrs", - hdrs = ["tpu_executor_c_api.h"], - visibility = ["//visibility:public"], + name = "c_api_decl", + hdrs = [ + "c_api_decl.h", + "c_api_defn.h", + ], deps = [ "//tensorflow/c:tf_attrtype", "//tensorflow/c:tf_status", "//tensorflow/core/tpu:libtftpu_header", - "//tensorflow/core/tpu/kernels:tpu_util_c_api_hdrs", + "//tensorflow/stream_executor:stream_executor_headers", ], - alwayslink = True, ) cc_library( - name = "tpu_node_context_c_api_hdrs", - hdrs = ["tpu_node_context_c_api.h"], + name = "tpu_executor_c_api_hdrs", + hdrs = ["tpu_executor_c_api.h"], visibility = ["//visibility:public"], deps = [ + ":c_api_decl", + "//tensorflow/c:tf_attrtype", + "//tensorflow/c:tf_status", "//tensorflow/core/tpu:libtftpu_header", - "//tensorflow/core/tpu/kernels:tpu_util_c_api_hdrs", - ], - alwayslink = True, -) - -cc_library( - name = "status_helper", - hdrs = ["status_helper.h"], - deps = [ - ":tpu_executor_c_api_hdrs", - "//tensorflow/core/platform:status", - "//tensorflow/core/tpu:tpu_api", - "//tensorflow/core/tpu/kernels:tpu_util_c_api_hdrs", ], ) cc_library( name = "c_api_conversions", + srcs = ["c_api_conversions.cc"], hdrs = ["c_api_conversions.h"], deps = [ - ":device_memory_base_helper", + ":c_api_decl", ":tpu_executor_c_api_hdrs", - ":tpu_executor_hdrs", + ":tpu_platform_interface", "//tensorflow/compiler/xla:executable_run_options", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla:shape_util", @@ -61,18 +53,33 @@ cc_library( ) cc_library( - name = "proto_helper", - srcs = ["proto_helper.cc"], - hdrs = ["proto_helper.h"], - deps = ["//tensorflow/core:lib"], + name = "tpu_node_context_c_api_hdrs", + hdrs = ["tpu_node_context_c_api.h"], + visibility = ["//visibility:public"], + deps = [ + ":c_api_decl", + "//tensorflow/core/tpu:libtftpu_header", + ], ) cc_library( - name = "device_memory_base_helper", - hdrs = ["device_memory_base_helper.h"], + name = "status_helper", + hdrs = ["status_helper.h"], deps = [ + ":c_api_decl", ":tpu_executor_c_api_hdrs", - "//tensorflow/stream_executor:device_memory", + "//tensorflow/core/platform:status", + "//tensorflow/core/tpu:tpu_api", + ], +) + +cc_library( + name = "proto_helper", + srcs = ["proto_helper.cc"], + hdrs = ["proto_helper.h"], + deps = [ + ":c_api_decl", + "//tensorflow/core:lib", ], ) @@ -89,7 +96,7 @@ cc_library( ], visibility = ["//visibility:public"], deps = [ - ":device_memory_base_helper", + ":c_api_conversions", ":status_helper", ":tpu_executor_base", ":tpu_executor_c_api_hdrs", @@ -120,7 +127,7 @@ cc_library( "tpu_timer.h", ], deps = [ - ":device_memory_base_helper", + ":c_api_conversions", ":status_helper", ":tpu_executor_c_api_hdrs", "//tensorflow/core:lib", @@ -140,13 +147,15 @@ cc_library( "tpu_platform.cc", ], hdrs = [ + "tpu_event.h", "tpu_executor.h", "tpu_platform.h", "tpu_stream.h", "tpu_timer.h", ], deps = [ - ":device_memory_base_helper", + ":c_api_conversions", + ":c_api_decl", ":status_helper", ":tpu_executor_c_api_hdrs", ":tpu_executor_interface", @@ -323,8 +332,8 @@ cc_library( srcs = ["tpu_topology.cc"], hdrs = ["tpu_topology.h"], deps = [ + ":c_api_decl", "//tensorflow/core/platform:types", "//tensorflow/core/tpu:tpu_api", - "//tensorflow/core/tpu/kernels:tpu_util_c_api_hdrs", ], ) diff --git a/tensorflow/stream_executor/tpu/c_api_conversions.cc b/tensorflow/stream_executor/tpu/c_api_conversions.cc new file mode 100644 index 00000000000..76046c514ad --- /dev/null +++ b/tensorflow/stream_executor/tpu/c_api_conversions.cc @@ -0,0 +1,216 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/stream_executor/tpu/c_api_conversions.h" + +#include "tensorflow/stream_executor/tpu/c_api_defn.h" +#include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" + +namespace ApiConverter { +xla::ShapedBuffer FromC(XLA_ShapedBuffer* c_buffer) { + xla::Shape xla_on_host_shape = ApiConverter::FromC(&c_buffer->on_host_shape); + xla::Shape xla_on_device_shape = + ApiConverter::FromC(&c_buffer->on_device_shape); + + xla::ShapeTree xla_shape_tree( + xla_on_device_shape); + size_t i = 0; + for (auto& pair : xla_shape_tree) { + pair.second = ApiConverter::FromC(c_buffer->bases[i]); + i++; + } + + xla::ShapedBuffer xla_shaped_buffer( + xla_on_host_shape, xla_on_device_shape, + tensorflow::tpu::TpuPlatformInterface::GetRegisteredPlatform(), + c_buffer->device_ordinal); + xla_shaped_buffer.set_buffers(xla_shape_tree); + return xla_shaped_buffer; +} + +SE_MaybeOwningDeviceMemory ToC(xla::MaybeOwningDeviceMemory& mem) { + SE_MaybeOwningDeviceMemory se_mem; + se_mem.owned = mem.HasOwnership(); + se_mem.memory = ApiConverter::ToC(mem.AsDeviceMemoryBase()); + if (mem.HasOwnership()) { + auto owned = mem.Release().value(); + se_mem.device_ordinal = owned.device_ordinal(); + se_mem.allocator = ApiConverter::ToC(owned.allocator()); + } else { + se_mem.allocator = + ToC(static_cast(nullptr)); + se_mem.device_ordinal = -1; + } + return se_mem; +} + +xla::MaybeOwningDeviceMemory FromC( + SE_MaybeOwningDeviceMemory* se_mem, + stream_executor::DeviceMemoryAllocator* allocator) { + if (se_mem->owned) { + return xla::MaybeOwningDeviceMemory( + stream_executor::OwningDeviceMemory(ApiConverter::FromC(se_mem->memory), + se_mem->device_ordinal, allocator)); + } else { + return xla::MaybeOwningDeviceMemory(ApiConverter::FromC(se_mem->memory)); + } +} + +SE_DeviceMemoryAllocator ToC( + stream_executor::DeviceMemoryAllocator* allocator) { + SE_DeviceMemoryAllocator se_allocator; + if (allocator == nullptr) { + se_allocator.ctx = nullptr; + se_allocator.platform = nullptr; + se_allocator.allocate = nullptr; + se_allocator.deallocate = nullptr; + return se_allocator; + } + // N.B. Platform is assumed to be the registered backend platform. + se_allocator.platform = nullptr; + se_allocator.ctx = allocator; + se_allocator.allocate = [](void* ctx, int device_ordinal, uint64_t size, + bool retry_on_failure, int64_t memory_space, + SE_ScopedDeviceMemory* memory, + SE_Status* se_status) { + auto allocation = + reinterpret_cast(ctx) + ->Allocate(device_ordinal, size, retry_on_failure, memory_space); + if (!allocation.ok()) { + auto status = allocation.status(); + TpuStatus_Set(se_status, status.code(), status.error_message().data(), + status.error_message().size()); + } else { + auto& scoped_memory = allocation.ValueOrDie(); + memory->wrapped = ApiConverter::ToC(scoped_memory.Release()); + memory->device_ordinal = scoped_memory.device_ordinal(); + } + }; + + se_allocator.deallocate = [](void* ctx, SE_DeviceMemoryBase* base, + int device_ordinal, SE_Status* se_status) { + auto status = reinterpret_cast(ctx) + ->Deallocate(device_ordinal, ApiConverter::FromC(*base)); + if (!status.ok()) { + TpuStatus_Set(se_status, status.code(), status.error_message().data(), + status.error_message().size()); + } + }; + return se_allocator; +} +SE_MaybeOwningDeviceMemory ToC(stream_executor::OwningDeviceMemory* mem) { + SE_MaybeOwningDeviceMemory se_mem; + se_mem.device_ordinal = mem->device_ordinal(); + se_mem.memory = ApiConverter::ToC(mem->Release()); + se_mem.allocator = ApiConverter::ToC(mem->allocator()); + se_mem.owned = true; + return se_mem; +} + +SE_DeviceMemoryBase ToC(const stream_executor::DeviceMemoryBase& base) { + SE_DeviceMemoryBase se_base; + se_base.opaque = const_cast(base.opaque()); + se_base.payload = base.payload(); + se_base.size = base.size(); + return se_base; +} + +stream_executor::DeviceMemoryBase FromC(const SE_DeviceMemoryBase& se_base) { + stream_executor::DeviceMemoryBase base(se_base.opaque, se_base.size); + base.SetPayload(se_base.payload); + return base; +} + +xla::Shape FromC(XLA_Shape* shape) { + xla::ShapeProto p; + p.ParseFromArray(shape->bytes, shape->size); + return xla::Shape(p); +} + +void ToC(const xla::Shape& xla_shape, XLA_Shape* c_shape) { + xla::ShapeProto p = xla_shape.ToProto(); + std::string p_str = p.SerializeAsString(); + c_shape->bytes = new char[p_str.size()]; + c_shape->size = p_str.size(); + memcpy(c_shape->bytes, p_str.data(), p_str.size()); +} + +XLA_ShapeIndex ToC(const xla::ShapeIndex& xla_shape) { + XLA_ShapeIndex c_shape; + CHECK_LT(xla_shape.size(), 8); + c_shape.count = xla_shape.size(); + for (int i = 0; i < xla_shape.size(); ++i) { + c_shape.indices[i] = xla_shape[i]; + } + return c_shape; +} + +xla::ShapeIndex FromC(XLA_ShapeIndex* c_shape) { + return xla::ShapeIndex(&c_shape->indices[0], + &c_shape->indices[c_shape->count]); +} + +void ToC(const xla::LiteralSlice& literal, XLA_Literal* c_literal) { + ApiConverter::ToC(literal.shape(), &c_literal->shape); + auto shapes = xla::ShapeUtil::GetLeafShapes(literal.shape()); + c_literal->buffers = new char*[shapes.size()]; + c_literal->sizes = new size_t[shapes.size()]; + c_literal->count = shapes.size(); + for (int i = 0; i < shapes.size(); ++i) { + c_literal->buffers[i] = reinterpret_cast( + const_cast(literal.untyped_data(shapes[i].index))); + c_literal->sizes[i] = literal.size_bytes(shapes[i].index); + } +} + +xla::MutableBorrowingLiteral FromC(XLA_Literal* c_literal) { + xla::Shape shape = ApiConverter::FromC(&c_literal->shape); + return xla::MutableBorrowingLiteral( + absl::MakeSpan(c_literal->buffers, c_literal->count), shape); +} + +void ToC(const xla::ShapedBuffer& buffer, XLA_ShapedBuffer* c_device_buffer) { + ApiConverter::ToC(buffer.on_host_shape(), &c_device_buffer->on_host_shape); + ApiConverter::ToC(buffer.on_device_shape(), + &c_device_buffer->on_device_shape); + c_device_buffer->device_ordinal = buffer.device_ordinal(); + absl::InlinedVector bases; + for (auto& pair : buffer.buffers()) { + bases.push_back(ApiConverter::ToC(pair.second)); + } + c_device_buffer->count = bases.size(); + c_device_buffer->bases = new SE_DeviceMemoryBase[bases.size()]; + for (int i = 0; i < bases.size(); ++i) { + c_device_buffer->bases[i] = bases[i]; + } +} + +void Free(XLA_Shape* shape) { delete[] shape->bytes; } +void Free(XLA_ShapeIndex*) {} +void Free(SE_DeviceMemoryBase*) {} + +void Free(XLA_Literal* c_literal) { + delete[] c_literal->buffers; + delete[] c_literal->sizes; + ApiConverter::Free(&c_literal->shape); +} + +void Free(XLA_ShapedBuffer* c_buffer) { + ApiConverter::Free(&c_buffer->on_device_shape); + ApiConverter::Free(&c_buffer->on_host_shape); + delete[] c_buffer->bases; +} + +} // namespace ApiConverter diff --git a/tensorflow/stream_executor/tpu/c_api_conversions.h b/tensorflow/stream_executor/tpu/c_api_conversions.h index 8052d0f3154..15b16388f8c 100644 --- a/tensorflow/stream_executor/tpu/c_api_conversions.h +++ b/tensorflow/stream_executor/tpu/c_api_conversions.h @@ -24,213 +24,90 @@ limitations under the License. #include "tensorflow/compiler/xla/service/shaped_buffer.h" #include "tensorflow/compiler/xla/shape.h" #include "tensorflow/compiler/xla/shape_util.h" -#include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/stream_executor/device_memory.h" #include "tensorflow/stream_executor/device_memory_allocator.h" +#include "tensorflow/stream_executor/tpu/c_api_decl.h" #include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" -#include "tensorflow/stream_executor/tpu/tpu_platform.h" -#include "tensorflow/stream_executor/tpu/tpu_stream.h" -class TpuConversions { - public: - static stream_executor::DeviceMemoryBase - SE_DeviceMemoryBaseToDeviceMemoryBase(SE_DeviceMemoryBase se_base) { - stream_executor::DeviceMemoryBase base(se_base.opaque, se_base.size); - base.SetPayload(se_base.payload); - return base; +// APIs for converting between internal and external versions of +// XLA/StreamExecutor data structures. +namespace ApiConverter { + +// se::DeviceMemoryBase +SE_DeviceMemoryBase ToC(const stream_executor::DeviceMemoryBase& base); +stream_executor::DeviceMemoryBase FromC(const SE_DeviceMemoryBase& se_base); +void Free(SE_DeviceMemoryBase*); + +// xla::Shape +xla::Shape FromC(XLA_Shape* shape); +void ToC(const xla::Shape& xla_shape, XLA_Shape* c_shape); +void Free(XLA_Shape* shape); + +// xla::ShapeIndex +XLA_ShapeIndex ToC(const xla::ShapeIndex& xla_shape); +xla::ShapeIndex FromC(XLA_ShapeIndex* c_shape); +void Free(XLA_ShapeIndex*); + +// Literal +void ToC(const xla::LiteralSlice& literal, XLA_Literal* c_literal); +xla::MutableBorrowingLiteral FromC(XLA_Literal* c_literal); +void Free(XLA_Literal* c_literal); + +// ShapedBuffer +void ToC(const xla::ShapedBuffer& buffer, XLA_ShapedBuffer* c_device_buffer); +xla::ShapedBuffer FromC(XLA_ShapedBuffer* c_buffer); +void Free(XLA_ShapedBuffer* c_buffer); + +// se::DeviceMemoryBase +SE_DeviceMemoryBase ToC(const stream_executor::DeviceMemoryBase& base); +stream_executor::DeviceMemoryBase FromC(const SE_DeviceMemoryBase& se_base); +void Free(SE_DeviceMemoryBase*); + +// xla::Shape +xla::Shape FromC(XLA_Shape* shape); +void ToC(const xla::Shape& xla_shape, XLA_Shape* c_shape); +void Free(XLA_Shape* shape); + +// Literal +void ToC(const xla::LiteralSlice& literal, XLA_Literal* c_literal); +xla::MutableBorrowingLiteral FromC(XLA_Literal* c_literal); +void Free(XLA_Literal* c_literal); + +// ShapedBuffer +void ToC(const xla::ShapedBuffer& buffer, XLA_ShapedBuffer* c_device_buffer); +xla::ShapedBuffer FromC(XLA_ShapedBuffer* c_buffer); +void Free(XLA_ShapedBuffer* c_buffer); + +xla::MaybeOwningDeviceMemory FromC( + SE_MaybeOwningDeviceMemory* se_mem, + stream_executor::DeviceMemoryAllocator* allocator); + +// DeviceMemoryAllocator +SE_DeviceMemoryAllocator ToC(stream_executor::DeviceMemoryAllocator* allocator); + +// OwningDeviceMemory +SE_MaybeOwningDeviceMemory ToC(stream_executor::OwningDeviceMemory* mem); +SE_MaybeOwningDeviceMemory ToC(xla::MaybeOwningDeviceMemory& mem); + +// Helper for managing stack based C -> C++ conversions. +template +struct StackHelper { + explicit StackHelper() {} + + template + explicit StackHelper(const CppType& t) { + ::ApiConverter::ToC(t, &value); + } + ~StackHelper() { ::ApiConverter::Free(&value); } + + template + CppType AsCpp() const { + return ::ApiConverter::FromC(&value); } - static SE_DeviceMemoryBase DeviceMemoryBaseToSE_DeviceMemoryBase( - const stream_executor::DeviceMemoryBase& base) { - SE_DeviceMemoryBase se_base; - se_base.opaque = const_cast(base.opaque()); - se_base.payload = base.payload(); - se_base.size = base.size(); - return se_base; - } - - static xla::Shape CShapeToXlaShape(XLA_Shape* shape) { - xla::ShapeProto p; - p.ParseFromArray(shape->bytes, shape->size); - return xla::Shape(p); - } - - static void XlaShapeToCShape(const xla::Shape& xla_shape, - XLA_Shape* c_shape) { - xla::ShapeProto p = xla_shape.ToProto(); - std::string p_str = p.SerializeAsString(); - c_shape->bytes = new char[p_str.size()]; - c_shape->size = p_str.size(); - memcpy(c_shape->bytes, p_str.data(), p_str.size()); - } - - static XLA_ShapeIndex XlaShapeIndexToCShapeIndex( - const xla::ShapeIndex& xla_shape) { - XLA_ShapeIndex c_shape; - CHECK_LT(xla_shape.size(), 8); - c_shape.count = xla_shape.size(); - for (int i = 0; i < xla_shape.size(); ++i) { - c_shape.indices[i] = xla_shape[i]; - } - return c_shape; - } - - static xla::ShapeIndex CShapeIndexToXlaShapeIndex(XLA_ShapeIndex* c_shape) { - return xla::ShapeIndex(&c_shape->indices[0], - &c_shape->indices[c_shape->count]); - } - - static void XLAShapedBufferToCShapedBuffer( - const xla::ShapedBuffer& buffer, XLA_ShapedBuffer* c_device_buffer) { - XlaShapeToCShape(buffer.on_host_shape(), &c_device_buffer->on_host_shape); - XlaShapeToCShape(buffer.on_device_shape(), - &c_device_buffer->on_device_shape); - c_device_buffer->device_ordinal = buffer.device_ordinal(); - absl::InlinedVector bases; - for (auto& pair : buffer.buffers()) { - bases.push_back(DeviceMemoryBaseToSE_DeviceMemoryBase(pair.second)); - } - c_device_buffer->count = bases.size(); - c_device_buffer->bases = new SE_DeviceMemoryBase[bases.size()]; - for (int i = 0; i < bases.size(); ++i) { - c_device_buffer->bases[i] = bases[i]; - } - } - - static void XLALiteralToCLiteral(const xla::LiteralSlice& literal, - XLA_Literal* c_literal) { - XlaShapeToCShape(literal.shape(), &c_literal->shape); - auto shapes = xla::ShapeUtil::GetLeafShapes(literal.shape()); - c_literal->buffers = new char*[shapes.size()]; - c_literal->sizes = new size_t[shapes.size()]; - c_literal->count = shapes.size(); - for (int i = 0; i < shapes.size(); ++i) { - c_literal->buffers[i] = reinterpret_cast( - const_cast(literal.untyped_data(shapes[i].index))); - c_literal->sizes[i] = literal.size_bytes(shapes[i].index); - } - } - - static xla::MutableBorrowingLiteral CLiteralToXLALiteral( - XLA_Literal* c_literal) { - xla::Shape shape = CShapeToXlaShape(&c_literal->shape); - return xla::MutableBorrowingLiteral( - absl::MakeSpan(c_literal->buffers, c_literal->count), shape); - } - - static void CShapeCleanup(XLA_Shape* c_shape) { delete[] c_shape->bytes; } - - static void CLiteralCleanup(XLA_Literal* c_literal) { - delete[] c_literal->buffers; - delete[] c_literal->sizes; - CShapeCleanup(&c_literal->shape); - } - - static void CShapedBufferCleanup(XLA_ShapedBuffer* c_buffer) { - CShapeCleanup(&c_buffer->on_device_shape); - CShapeCleanup(&c_buffer->on_host_shape); - delete[] c_buffer->bases; - } - - static SE_DeviceMemoryAllocator AllocatorToSE_Allocator( - stream_executor::DeviceMemoryAllocator* allocator) { - SE_DeviceMemoryAllocator se_allocator; - if (allocator == nullptr) { - se_allocator.ctx = nullptr; - se_allocator.platform = nullptr; - se_allocator.allocate = nullptr; - se_allocator.deallocate = nullptr; - return se_allocator; - } - se_allocator.platform = - static_cast(allocator->platform()) - ->se_platform(); - se_allocator.ctx = allocator; - se_allocator.allocate = [](void* ctx, int device_ordinal, uint64_t size, - bool retry_on_failure, int64_t memory_space, - SE_ScopedDeviceMemory* memory, - SE_Status* se_status) { - auto allocation = - reinterpret_cast(ctx) - ->Allocate(device_ordinal, size, retry_on_failure, memory_space); - if (!allocation.ok()) { - auto status = allocation.status(); - TpuStatus_Set(se_status, status.code(), status.error_message().data(), - status.error_message().size()); - } else { - auto& scoped_memory = allocation.ValueOrDie(); - memory->wrapped = - DeviceMemoryBaseToSE_DeviceMemoryBase(scoped_memory.Release()); - memory->device_ordinal = scoped_memory.device_ordinal(); - } - }; - - se_allocator.deallocate = [](void* ctx, SE_DeviceMemoryBase* base, - int device_ordinal, SE_Status* se_status) { - auto status = - reinterpret_cast(ctx) - ->Deallocate(device_ordinal, - SE_DeviceMemoryBaseToDeviceMemoryBase(*base)); - if (!status.ok()) { - TpuStatus_Set(se_status, status.code(), status.error_message().data(), - status.error_message().size()); - } - }; - return se_allocator; - } - - static SE_ExecutableRunOptions ExecutableRunOptionsToSE_ExecutableRunOptions( - const xla::ServiceExecutableRunOptions& options) { - SE_ExecutableRunOptions se_options; - se_options.allocator = - AllocatorToSE_Allocator(options.run_options().allocator()); - se_options.device_ordinal = options.run_options().device_ordinal(); - se_options.stream = - static_cast(options.stream()->implementation()) - ->se_stream(); - return se_options; - } - - static SE_MaybeOwningDeviceMemory SEOwningDeviceMemoryToC( - stream_executor::OwningDeviceMemory* mem) { - SE_MaybeOwningDeviceMemory se_mem; - se_mem.device_ordinal = mem->device_ordinal(); - se_mem.memory = DeviceMemoryBaseToSE_DeviceMemoryBase(mem->Release()); - se_mem.allocator = AllocatorToSE_Allocator(mem->allocator()); - se_mem.owned = true; - return se_mem; - } - - static SE_MaybeOwningDeviceMemory SEMaybeOwningDeviceMemoryToC( - xla::MaybeOwningDeviceMemory& mem) { - SE_MaybeOwningDeviceMemory se_mem; - se_mem.owned = mem.HasOwnership(); - se_mem.memory = - DeviceMemoryBaseToSE_DeviceMemoryBase(mem.AsDeviceMemoryBase()); - if (mem.HasOwnership()) { - auto owned = mem.Release().value(); - se_mem.device_ordinal = owned.device_ordinal(); - se_mem.allocator = - TpuConversions::AllocatorToSE_Allocator(owned.allocator()); - } else { - se_mem.allocator = AllocatorToSE_Allocator(nullptr); - se_mem.device_ordinal = -1; - } - return se_mem; - } - - static xla::MaybeOwningDeviceMemory COwningDeviceMemToSEOwningDeviceMem( - SE_MaybeOwningDeviceMemory* se_mem, - stream_executor::DeviceMemoryAllocator* allocator) { - if (se_mem->owned) { - return xla::MaybeOwningDeviceMemory(stream_executor::OwningDeviceMemory( - SE_DeviceMemoryBaseToDeviceMemoryBase(se_mem->memory), - se_mem->device_ordinal, allocator)); - } else { - return xla::MaybeOwningDeviceMemory( - SE_DeviceMemoryBaseToDeviceMemoryBase(se_mem->memory)); - } - } + mutable CType value; }; -#endif // THIRD_PARTY_TENSORFLOW_STREAM_EXECUTOR_TPU_C_API_CONVERSIONS_H_ +} // namespace ApiConverter + +#endif diff --git a/tensorflow/stream_executor/tpu/c_api_decl.h b/tensorflow/stream_executor/tpu/c_api_decl.h new file mode 100644 index 00000000000..1989ae01c25 --- /dev/null +++ b/tensorflow/stream_executor/tpu/c_api_decl.h @@ -0,0 +1,253 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_C_API_DECL_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_C_API_DECL_H_ + +#include +#include + +#include "tensorflow/c/tf_attrtype.h" +#include "tensorflow/c/tf_status.h" +#include "tensorflow/core/tpu/libtftpu.h" + +extern "C" { + +enum TpuCoreTypeEnum { + kTensorCore, + kEmbeddingV1, + kEmbeddingV2, +}; + +typedef struct SE_Status SE_Status; + +typedef struct SE_Platform SE_Platform; +typedef struct SE_StreamExecutor SE_StreamExecutor; +typedef struct SE_Stream SE_Stream; +typedef struct SE_Event SE_Event; +typedef struct SE_Timer SE_Timer; + +typedef struct TpuSerializedProto { + const char* bytes; + size_t size; +} TpuSerializedProto; + +typedef struct SE_PlatformId { + void* id; // aka stream_executor::Platform::Id +} SE_PlatformId; +typedef struct SE_StreamExecutorConfig SE_StreamExecutorConfig; +typedef struct SE_DeviceOptions SE_DeviceOptions; +typedef SE_Status* (*SE_StatusCallbackFn)(void*); + +typedef struct SE_DeviceMemoryBase { + void* opaque; + uint64_t size; + uint64_t payload; +} SE_DeviceMemoryBase; + +typedef struct SE_ScopedDeviceMemory { + SE_DeviceMemoryBase wrapped; + int device_ordinal; +} SE_ScopedDeviceMemory; + +typedef struct SE_AllocatorStats { + int64_t num_allocs; + int64_t bytes_in_use; + int64_t peak_bytes_in_use; + int64_t largest_alloc_size; + + bool has_bytes_limit; + int64_t bytes_limit; + + int64_t bytes_reserved; + int64_t peak_bytes_reserved; + + bool has_bytes_reservable_limit; + int64_t bytes_reservable_limit; + + int64_t largest_free_block_bytes; +} SE_AllocatorStats; + +// Note, due to the... odd way in which DeviceMemoryAllocator is used in TF, we +// cannot simply wrap an underlying pointer. Instead, we reverse the call +// direction and request memory via a callback. +typedef void (*SE_AllocateFn)(void* ctx, int device_ordinal, uint64_t size, + bool retry_on_failure, int64_t memory_space, + SE_ScopedDeviceMemory* result, SE_Status* status); + +typedef void (*SE_DeallocateFn)(void* ctx, SE_DeviceMemoryBase* base, + int device_ordinal, SE_Status* status); + +typedef struct SE_DeviceMemoryAllocator { + SE_Platform* platform; + void* ctx; + SE_AllocateFn allocate; + SE_DeallocateFn deallocate; +} SE_DeviceMemoryAllocator; + +typedef struct SE_DeviceDescription { + char* device_vendor; + char* platform_version; + char* driver_version; + char* runtime_version; + char* pci_bus_id; + char* name; + + int64_t thread_dim_limit_x; + int64_t thread_dim_limit_y; + int64_t thread_dim_limit_z; + int64_t block_dim_limit_x; + int64_t block_dim_limit_y; + int64_t block_dim_limit_z; + + int64_t threads_per_core_limit; + int64_t threads_per_block_limit; + int64_t threads_per_warp; + + int64_t registers_per_core_limit; + int64_t registers_per_block_limit; + + int64_t device_address_bits; + int64_t device_memory_size; + int64_t memory_bandwidth; + + int64_t shared_memory_per_core; + int64_t shared_memory_per_block; + + float clock_rate_ghz; + + int cuda_compute_capability_major; + int cuda_compute_capability_minor; + + int rocm_amdgpu_isa_version; + + int numa_node; + int core_count; + bool ecc_enabled; +} SE_DeviceDescription; + +typedef struct Tpu_Compiler Tpu_Compiler; +typedef struct SE_Executable SE_Executable; + +typedef struct SE_ExecutableRunOptions { + SE_DeviceMemoryAllocator allocator; + int device_ordinal; + SE_Stream* stream; +} SE_ExecutableRunOptions; + +typedef struct SE_MaybeOwningDeviceMemory { + SE_DeviceMemoryBase memory; + bool owned; + + // Set if owned + int device_ordinal; + SE_DeviceMemoryAllocator allocator; +} SE_MaybeOwningDeviceMemory; + +// Represents an XLA shape tree. +// Shapes are flattened in default traversal order. +typedef struct XLA_Shape { + char* bytes; + size_t size; +} XLA_Shape; + +// Represents a leaf node for a XLA shaped buffer. +typedef struct XLA_ShapedBuffer { + XLA_Shape on_host_shape; + XLA_Shape on_device_shape; + int device_ordinal; + + SE_DeviceMemoryBase* bases; + size_t count; +} XLA_ShapedBuffer; + +// Represents a leaf XLA literal. +typedef struct XLA_Literal { + char** buffers; + size_t* sizes; + size_t count; + XLA_Shape shape; +} XLA_Literal; + +typedef struct XLA_MaybeOwningDeviceMemoryShapeTree { + XLA_Shape shape; + SE_MaybeOwningDeviceMemory* buffers; +} XLA_MaybeOwningDeviceMemoryShapeTree; + +typedef struct XLA_ShapeIndex { + int64_t indices[8]; + int64_t count; +} XLA_ShapeIndex; + +typedef struct SE_ExecutionInput { + XLA_MaybeOwningDeviceMemoryShapeTree shape_tree; + XLA_ShapeIndex* unowned_indices; + int unowned_indices_size; + XLA_Shape dynamic_shape; + XLA_Shape host_shape; +} SE_ExecutionInput; + +typedef struct SE_ExecutionOutput { + XLA_ShapedBuffer result; + SE_MaybeOwningDeviceMemory* to_be_released; + int to_be_released_size; + XLA_ShapeIndex* aliased_indices; + int aliased_indices_size; +} SE_ExecutionOutput; + +typedef struct XLA_ComputationLayout { + int parameter_count; + XLA_Shape* parameter_layouts; + XLA_Shape result_layout; +} XLA_ComputationLayout; + +typedef struct XLA_HloModuleConfig { + uint64_t seed; + int32_t launch_id; + int64_t replica_count; + int64_t num_partitions; + bool use_spmd_partitioning; + bool has_static_device_assignment; + TpuSerializedProto static_device_assignment; + bool has_entry_computation_layout; + XLA_ComputationLayout entry_computation_layout; +} XLA_HloModuleConfig; + +typedef struct SE_HloExecutionProfile SE_HloExecutionProfile; + +struct SE_StreamExecutorList { + SE_StreamExecutor** exec; + int count; +}; + +typedef struct XLA_HloModuleGroup { + TpuSerializedProto proto; + XLA_HloModuleConfig* module_config; +} XLA_HloModuleGroup; + +typedef struct XLA_HloModule { + TpuSerializedProto proto; + XLA_HloModuleConfig module_config; +} XLA_HloModule; + +typedef struct XLA_TransferManager XLA_TransferManager; + +typedef struct XLA_ComputationPlacer XLA_ComputationPlacer; + +typedef void (*XLA_CallbackFn)(void*); +typedef void (*XLA_StatusCallbackFn)(void*, SE_Status*); +} + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_C_API_DECL_H_ diff --git a/tensorflow/stream_executor/tpu/c_api_defn.h b/tensorflow/stream_executor/tpu/c_api_defn.h new file mode 100644 index 00000000000..1599f1f266a --- /dev/null +++ b/tensorflow/stream_executor/tpu/c_api_defn.h @@ -0,0 +1,70 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_C_API_DEFN_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_C_API_DEFN_H_ + +#include "tensorflow/stream_executor/device_options.h" +#include "tensorflow/stream_executor/event.h" +#include "tensorflow/stream_executor/stream.h" +#include "tensorflow/stream_executor/stream_executor.h" +#include "tensorflow/stream_executor/timer.h" + +// Definitions for XLA API data structures. Any underlying C++ data structures +// are implementation details and should only be used from within the stream +// executor implementation. + +namespace stream_executor { +class Platform; +class StreamExecutor; +} // namespace stream_executor + +struct SE_Platform { + stream_executor::Platform* platform; +}; + +struct SE_StreamExecutor { + stream_executor::StreamExecutor* executor; +}; + +struct SE_Stream { + explicit SE_Stream(stream_executor::StreamExecutor* parent) + : stream(parent) {} + stream_executor::Stream stream; +}; + +struct SE_Event { + explicit SE_Event(stream_executor::StreamExecutor* parent) : event(parent) {} + stream_executor::Event event; +}; + +struct SE_Timer { + explicit SE_Timer(stream_executor::StreamExecutor* parent) : timer(parent) {} + stream_executor::Timer timer; +}; + +struct SE_StreamExecutorConfig { + stream_executor::StreamExecutorConfig config; +}; + +struct SE_DeviceOptions { + stream_executor::DeviceOptions options; +}; + +struct XLA_TransferManager {}; + +struct XLA_ComputationPlacer {}; + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_C_API_DEFN_H_ diff --git a/tensorflow/stream_executor/tpu/device_memory_base_helper.h b/tensorflow/stream_executor/tpu/device_memory_base_helper.h deleted file mode 100644 index 9937dc29642..00000000000 --- a/tensorflow/stream_executor/tpu/device_memory_base_helper.h +++ /dev/null @@ -1,41 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_DEVICE_MEMORY_BASE_HELPER_H_ -#define TENSORFLOW_STREAM_EXECUTOR_TPU_DEVICE_MEMORY_BASE_HELPER_H_ - -#include "tensorflow/stream_executor/device_memory.h" -#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" - -class DeviceMemoryBaseHelper { - public: - static stream_executor::DeviceMemoryBase - SE_DeviceMemoryBaseToDeviceMemoryBase(SE_DeviceMemoryBase se_base) { - stream_executor::DeviceMemoryBase base(se_base.opaque, se_base.size); - base.SetPayload(se_base.payload); - return base; - } - - static SE_DeviceMemoryBase DeviceMemoryBaseToSE_DeviceMemoryBase( - const stream_executor::DeviceMemoryBase& base) { - SE_DeviceMemoryBase se_base; - se_base.opaque = const_cast(base.opaque()); - se_base.payload = base.payload(); - se_base.size = base.size(); - return se_base; - } -}; - -#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_DEVICE_MEMORY_BASE_HELPER_H_ diff --git a/tensorflow/stream_executor/tpu/proto_helper.h b/tensorflow/stream_executor/tpu/proto_helper.h index 3bd2b09f95e..29c322b0e9e 100644 --- a/tensorflow/stream_executor/tpu/proto_helper.h +++ b/tensorflow/stream_executor/tpu/proto_helper.h @@ -19,14 +19,10 @@ limitations under the License. #include #include "tensorflow/core/platform/logging.h" +#include "tensorflow/stream_executor/tpu/c_api_decl.h" extern "C" { -typedef struct TpuSerializedProto { - const char* bytes; - size_t size; -} TpuSerializedProto; - void StreamExecutor_Tpu_FreeSerializedProto(const TpuSerializedProto* proto); } // extern "C" diff --git a/tensorflow/stream_executor/tpu/status_helper.h b/tensorflow/stream_executor/tpu/status_helper.h index 0129abb0815..85c7bf526a9 100644 --- a/tensorflow/stream_executor/tpu/status_helper.h +++ b/tensorflow/stream_executor/tpu/status_helper.h @@ -17,7 +17,6 @@ limitations under the License. #define TENSORFLOW_STREAM_EXECUTOR_TPU_STATUS_HELPER_H_ #include "tensorflow/core/platform/status.h" -#include "tensorflow/core/tpu/kernels/tpu_util_c_api.h" #include "tensorflow/core/tpu/tpu_api.h" #include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" diff --git a/tensorflow/stream_executor/tpu/tpu_event.h b/tensorflow/stream_executor/tpu/tpu_event.h new file mode 100644 index 00000000000..af53d730ecf --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_event.h @@ -0,0 +1,33 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EVENT_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EVENT_H_ + +#include "tensorflow/core/tpu/tpu_api.h" +#include "tensorflow/stream_executor/stream_executor_internal.h" +#include "tensorflow/stream_executor/tpu/c_api_decl.h" + +class TpuEvent : public ::stream_executor::internal::EventInterface { + public: + explicit TpuEvent(SE_Event* event) : event_(event) {} + ~TpuEvent() override { + tensorflow::tpu::ExecutorApiFn()->TpuEvent_FreeFn(event_); + } + + private: + SE_Event* event_; +}; + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EVENT_H_ diff --git a/tensorflow/stream_executor/tpu/tpu_executable.cc b/tensorflow/stream_executor/tpu/tpu_executable.cc index e8ff3a54db8..dd28f21455e 100644 --- a/tensorflow/stream_executor/tpu/tpu_executable.cc +++ b/tensorflow/stream_executor/tpu/tpu_executable.cc @@ -95,21 +95,21 @@ Status TpuExecutable::LoadProgramAndEnqueueToStream( Shape TpuExecutable::HostShapeToDeviceShape(const Shape& host_shape) { XLA_Shape c_host_shape; XLA_Shape c_device_shape; - TpuConversions::XlaShapeToCShape(host_shape, &c_host_shape); + ApiConverter::ToC(host_shape, &c_host_shape); tensorflow::tpu::ExecuteApiFn()->HardwareLayout_HostShapeToDeviceShapeFn( &c_host_shape, &c_device_shape); - Shape device_shape = TpuConversions::CShapeToXlaShape(&c_device_shape); - TpuConversions::CShapeCleanup(&c_host_shape); - TpuConversions::CShapeCleanup(&c_device_shape); + Shape device_shape = ApiConverter::FromC(&c_device_shape); + ApiConverter::Free(&c_host_shape); + ApiConverter::Free(&c_device_shape); return device_shape; } int64 TpuExecutable::ShapeSize(const Shape& shape) { XLA_Shape c_shape; - TpuConversions::XlaShapeToCShape(shape, &c_shape); + ApiConverter::ToC(shape, &c_shape); int64 size = tensorflow::tpu::ExecuteApiFn()->HardwareLayout_ShapeSizeFn(&c_shape); - TpuConversions::CShapeCleanup(&c_shape); + ApiConverter::Free(&c_shape); return size; } diff --git a/tensorflow/stream_executor/tpu/tpu_executor.cc b/tensorflow/stream_executor/tpu/tpu_executor.cc index 8386653da82..851fb3ec4e7 100644 --- a/tensorflow/stream_executor/tpu/tpu_executor.cc +++ b/tensorflow/stream_executor/tpu/tpu_executor.cc @@ -18,8 +18,8 @@ limitations under the License. #include "tensorflow/c/tf_status.h" #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/tpu/tpu_api.h" -#include "tensorflow/stream_executor/tpu/device_memory_base_helper.h" #include "tensorflow/stream_executor/tpu/status_helper.h" +#include "tensorflow/stream_executor/tpu/tpu_event.h" #include "tensorflow/stream_executor/tpu/tpu_stream.h" #include "tensorflow/stream_executor/tpu/tpu_timer.h" @@ -187,18 +187,16 @@ TpuExecutor::CreateEventImplementation() { DeviceMemoryBase TpuExecutor::Allocate(uint64 size, int64 memory_space) { SE_DeviceMemoryBase se_base = tpu::ExecutorApiFn()->TpuExecutor_AllocateFn( executor_, size, memory_space); - return DeviceMemoryBaseHelper::SE_DeviceMemoryBaseToDeviceMemoryBase(se_base); + return ApiConverter::FromC(se_base); } void TpuExecutor::Deallocate(const DeviceMemoryBase& memory) { - SE_DeviceMemoryBase se_base = - DeviceMemoryBaseHelper::DeviceMemoryBaseToSE_DeviceMemoryBase(memory); + SE_DeviceMemoryBase se_base = ApiConverter::ToC(memory); tpu::ExecutorApiFn()->TpuExecutor_DeallocateFn(executor_, &se_base); } void TpuExecutor::Deallocate(DeviceMemoryBase* memory) { - SE_DeviceMemoryBase se_base = - DeviceMemoryBaseHelper::DeviceMemoryBaseToSE_DeviceMemoryBase(*memory); + SE_DeviceMemoryBase se_base = ApiConverter::ToC(*memory); tpu::ExecutorApiFn()->TpuExecutor_DeallocateFn(executor_, &se_base); } @@ -273,8 +271,7 @@ Status TpuExecutor::EnqueueInfeed(int32 infeed_queue_index, bool TpuExecutor::Memcpy(Stream* stream, void* host_dst, const ::stream_executor::DeviceMemoryBase& device_src, uint64 size) { - SE_DeviceMemoryBase se_base = - DeviceMemoryBaseHelper::DeviceMemoryBaseToSE_DeviceMemoryBase(device_src); + SE_DeviceMemoryBase se_base = ApiConverter::ToC(device_src); return tpu::ExecutorApiFn()->TpuExecutor_MemcpyToHostFn( executor_, stream_map().at(stream->implementation()), host_dst, &se_base, size); @@ -283,9 +280,7 @@ bool TpuExecutor::Memcpy(Stream* stream, void* host_dst, bool TpuExecutor::Memcpy(Stream* stream, ::stream_executor::DeviceMemoryBase* device_dst, const void* host_src, uint64 size) { - SE_DeviceMemoryBase se_base = - DeviceMemoryBaseHelper::DeviceMemoryBaseToSE_DeviceMemoryBase( - *device_dst); + SE_DeviceMemoryBase se_base = ApiConverter::ToC(*device_dst); return tpu::ExecutorApiFn()->TpuExecutor_MemcpyFromHostFn( executor_, stream_map().at(stream->implementation()), &se_base, host_src, size); @@ -295,9 +290,7 @@ Status TpuExecutor::SynchronousMemcpy( ::stream_executor::DeviceMemoryBase* device_dst, const void* host_src, uint64 size) { StatusHelper status; - SE_DeviceMemoryBase se_base = - DeviceMemoryBaseHelper::DeviceMemoryBaseToSE_DeviceMemoryBase( - *device_dst); + SE_DeviceMemoryBase se_base = ApiConverter::ToC(*device_dst); tpu::ExecutorApiFn()->TpuExecutor_SynchronousMemcpyFromHostFn( executor_, &se_base, host_src, size, status.c_status); return status.status(); @@ -307,8 +300,7 @@ Status TpuExecutor::SynchronousMemcpy( void* host_dst, const ::stream_executor::DeviceMemoryBase& device_src, uint64 size) { StatusHelper status; - SE_DeviceMemoryBase se_base = - DeviceMemoryBaseHelper::DeviceMemoryBaseToSE_DeviceMemoryBase(device_src); + SE_DeviceMemoryBase se_base = ApiConverter::ToC(device_src); tpu::ExecutorApiFn()->TpuExecutor_SynchronousMemcpyToHostFn( executor_, host_dst, &se_base, size, status.c_status); return status.status(); diff --git a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h index 3795bdd0cf6..c1e68cf03a5 100644 --- a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h +++ b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h @@ -21,123 +21,8 @@ limitations under the License. #include "tensorflow/c/tf_attrtype.h" #include "tensorflow/c/tf_status.h" -#include "tensorflow/core/tpu/kernels/tpu_util_c_api.h" #include "tensorflow/core/tpu/libtftpu.h" - -typedef struct SE_Platform SE_Platform; -typedef struct SE_StreamExecutor SE_StreamExecutor; -typedef struct SE_Stream SE_Stream; -typedef struct SE_Event SE_Event; -typedef struct SE_Timer SE_Timer; - -typedef struct SE_PlatformId { - void* id; // aka stream_executor::Platform::Id -} SE_PlatformId; -typedef struct SE_StreamExecutorConfig SE_StreamExecutorConfig; -typedef struct SE_DeviceOptions SE_DeviceOptions; -typedef SE_Status* (*SE_StatusCallbackFn)(void*); - -typedef struct SE_DeviceMemoryBase { - void* opaque; - uint64_t size; - uint64_t payload; -} SE_DeviceMemoryBase; - -typedef struct SE_ScopedDeviceMemory { - SE_DeviceMemoryBase wrapped; - int device_ordinal; -} SE_ScopedDeviceMemory; - -typedef struct SE_AllocatorStats { - int64_t num_allocs; - int64_t bytes_in_use; - int64_t peak_bytes_in_use; - int64_t largest_alloc_size; - - bool has_bytes_limit; - int64_t bytes_limit; - - int64_t bytes_reserved; - int64_t peak_bytes_reserved; - - bool has_bytes_reservable_limit; - int64_t bytes_reservable_limit; - - int64_t largest_free_block_bytes; -} SE_AllocatorStats; - -typedef struct SE_DeviceDescription { - char* device_vendor; - char* platform_version; - char* driver_version; - char* runtime_version; - char* pci_bus_id; - char* name; - - int64_t thread_dim_limit_x; - int64_t thread_dim_limit_y; - int64_t thread_dim_limit_z; - int64_t block_dim_limit_x; - int64_t block_dim_limit_y; - int64_t block_dim_limit_z; - - int64_t threads_per_core_limit; - int64_t threads_per_block_limit; - int64_t threads_per_warp; - - int64_t registers_per_core_limit; - int64_t registers_per_block_limit; - - int64_t device_address_bits; - int64_t device_memory_size; - int64_t memory_bandwidth; - - int64_t shared_memory_per_core; - int64_t shared_memory_per_block; - - float clock_rate_ghz; - - int cuda_compute_capability_major; - int cuda_compute_capability_minor; - - int rocm_amdgpu_isa_version; - - int numa_node; - int core_count; - bool ecc_enabled; -} SE_DeviceDescription; - -typedef struct XLA_TransferManager XLA_TransferManager; - -typedef struct XLA_ComputationPlacer XLA_ComputationPlacer; - -// Represents an XLA shape tree. -// Shapes are flattened in default traversal order. -typedef struct XLA_Shape { - char* bytes; - size_t size; -} XLA_Shape; - -// Represents a leaf node for a XLA shaped buffer. -typedef struct XLA_ShapedBuffer { - XLA_Shape on_host_shape; - XLA_Shape on_device_shape; - int device_ordinal; - - SE_DeviceMemoryBase* bases; - size_t count; -} XLA_ShapedBuffer; - -// Represents a leaf XLA literal. -typedef struct XLA_Literal { - char** buffers; - size_t* sizes; - size_t count; - XLA_Shape shape; -} XLA_Literal; - -typedef void (*XLA_CallbackFn)(void*); -typedef void (*XLA_StatusCallbackFn)(void*, SE_Status*); +#include "tensorflow/stream_executor/tpu/c_api_decl.h" extern "C" { @@ -318,105 +203,9 @@ int TpuCoreLocation_Id(void* tpu_core_location); // C API for XLA::Compiler interface -// Note, due to the... odd way in which DeviceMemoryAllocator is used in TF, we -// cannot simply wrap an underlying pointer. Instead, we reverse the call -// direction and request memory via a callback. -typedef void (*SE_AllocateFn)(void* ctx, int device_ordinal, uint64_t size, - bool retry_on_failure, int64_t memory_space, - SE_ScopedDeviceMemory* result, SE_Status* status); - -typedef void (*SE_DeallocateFn)(void* ctx, SE_DeviceMemoryBase* base, - int device_ordinal, SE_Status* status); - -typedef struct SE_DeviceMemoryAllocator { - SE_Platform* platform; - void* ctx; - SE_AllocateFn allocate; - SE_DeallocateFn deallocate; -} SE_DeviceMemoryAllocator; - -typedef struct Tpu_Compiler Tpu_Compiler; -typedef struct SE_Executable SE_Executable; - -typedef struct SE_ExecutableRunOptions { - SE_DeviceMemoryAllocator allocator; - int device_ordinal; - SE_Stream* stream; -} SE_ExecutableRunOptions; - -typedef struct SE_MaybeOwningDeviceMemory { - SE_DeviceMemoryBase memory; - bool owned; - - // Set if owned - int device_ordinal; - SE_DeviceMemoryAllocator allocator; -} SE_MaybeOwningDeviceMemory; - -typedef struct XLA_MaybeOwningDeviceMemoryShapeTree { - XLA_Shape shape; - SE_MaybeOwningDeviceMemory* buffers; -} XLA_MaybeOwningDeviceMemoryShapeTree; - -typedef struct XLA_ShapeIndex { - int64_t indices[8]; - int64_t count; -} XLA_ShapeIndex; - -typedef struct SE_ExecutionInput { - XLA_MaybeOwningDeviceMemoryShapeTree shape_tree; - XLA_ShapeIndex* unowned_indices; - int unowned_indices_size; - XLA_Shape dynamic_shape; - XLA_Shape host_shape; -} SE_ExecutionInput; - -typedef struct SE_ExecutionOutput { - XLA_ShapedBuffer result; - SE_MaybeOwningDeviceMemory* to_be_released; - int to_be_released_size; - XLA_ShapeIndex* aliased_indices; - int aliased_indices_size; -} SE_ExecutionOutput; - -typedef struct XLA_ComputationLayout { - int parameter_count; - XLA_Shape* parameter_layouts; - XLA_Shape result_layout; -} XLA_ComputationLayout; - -typedef struct XLA_HloModuleConfig { - uint64_t seed; - int32_t launch_id; - int64_t replica_count; - int64_t num_partitions; - bool use_spmd_partitioning; - bool has_static_device_assignment; - TpuSerializedProto static_device_assignment; - bool has_entry_computation_layout; - XLA_ComputationLayout entry_computation_layout; -} XLA_HloModuleConfig; - -typedef struct SE_HloExecutionProfile SE_HloExecutionProfile; - TFTPU_CAPI_EXPORT Tpu_Compiler* TpuCompiler_New(); TFTPU_CAPI_EXPORT void TpuCompiler_Free(Tpu_Compiler* compiler); -struct SE_StreamExecutorList { - SE_StreamExecutor** exec; - int count; -}; - -typedef struct XLA_HloModuleGroup { - TpuSerializedProto proto; - XLA_HloModuleConfig* module_config; -} XLA_HloModuleGroup; - -typedef struct XLA_HloModule { - TpuSerializedProto proto; - XLA_HloModuleConfig module_config; -} XLA_HloModule; - TFTPU_CAPI_EXPORT void TpuCompiler_RunHloPasses( Tpu_Compiler* compiler, XLA_HloModule* se_hlo_module, SE_StreamExecutor* stream_executor, SE_DeviceMemoryAllocator* allocator, diff --git a/tensorflow/stream_executor/tpu/tpu_node_context_c_api.h b/tensorflow/stream_executor/tpu/tpu_node_context_c_api.h index e7ca506df72..009671ef985 100644 --- a/tensorflow/stream_executor/tpu/tpu_node_context_c_api.h +++ b/tensorflow/stream_executor/tpu/tpu_node_context_c_api.h @@ -15,8 +15,8 @@ limitations under the License. #ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_NODE_CONTEXT_C_API_H_ #define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_NODE_CONTEXT_C_API_H_ -#include "tensorflow/core/tpu/kernels/tpu_util_c_api.h" #include "tensorflow/core/tpu/libtftpu.h" +#include "tensorflow/stream_executor/tpu/c_api_decl.h" typedef struct XLA_TpuNodeContext XLA_TpuNodeContext; diff --git a/tensorflow/stream_executor/tpu/tpu_stream.h b/tensorflow/stream_executor/tpu/tpu_stream.h index 223341a42e5..ab84005c718 100644 --- a/tensorflow/stream_executor/tpu/tpu_stream.h +++ b/tensorflow/stream_executor/tpu/tpu_stream.h @@ -18,7 +18,7 @@ limitations under the License. #include "tensorflow/core/tpu/tpu_api.h" #include "tensorflow/stream_executor/stream_executor_internal.h" -#include "tensorflow/stream_executor/tpu/device_memory_base_helper.h" +#include "tensorflow/stream_executor/tpu/c_api_conversions.h" #include "tensorflow/stream_executor/tpu/status_helper.h" #include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" #include "tensorflow/stream_executor/tpu/tpu_stream_interface.h" @@ -45,30 +45,15 @@ class TpuStream : public tensorflow::tpu::TpuStreamInterface { StatusHelper status; tensorflow::tpu::ExecutorApiFn() ->TpuStream_TpuEnqueueOnDeviceSendRecvLocalFn( - stream_, - DeviceMemoryBaseHelper::DeviceMemoryBaseToSE_DeviceMemoryBase( - send_buffer), - DeviceMemoryBaseHelper::DeviceMemoryBaseToSE_DeviceMemoryBase( - recv_buffer), - status.c_status); + stream_, ApiConverter::ToC(send_buffer), + ApiConverter::ToC(recv_buffer), status.c_status); return status.status(); } - SE_Stream* se_stream() { return stream_; } + SE_Stream* se_stream() const { return stream_; } private: - SE_Stream* stream_; -}; - -class TpuEvent : public ::stream_executor::internal::EventInterface { - public: - explicit TpuEvent(SE_Event* event) : event_(event) {} - ~TpuEvent() override { - tensorflow::tpu::ExecutorApiFn()->TpuEvent_FreeFn(event_); - } - - private: - SE_Event* event_; + mutable SE_Stream* stream_; }; #endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_STREAM_H_ diff --git a/tensorflow/stream_executor/tpu/tpu_topology.h b/tensorflow/stream_executor/tpu/tpu_topology.h index b7d462804c9..48371b6e008 100644 --- a/tensorflow/stream_executor/tpu/tpu_topology.h +++ b/tensorflow/stream_executor/tpu/tpu_topology.h @@ -17,7 +17,7 @@ limitations under the License. #define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_TOPOLOGY_H_ #include "tensorflow/core/platform/types.h" -#include "tensorflow/core/tpu/kernels/tpu_util_c_api.h" +#include "tensorflow/stream_executor/tpu/c_api_decl.h" namespace tensorflow { namespace tpu { diff --git a/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc b/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc index 48d3be61b5e..29781c81cac 100644 --- a/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc +++ b/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc @@ -46,13 +46,13 @@ xla::Shape TpuTransferManager::HostShapeToDeviceShape( XLA_Shape c_host_shape; XLA_Shape c_device_shape; - TpuConversions::XlaShapeToCShape(host_shape, &c_host_shape); + ApiConverter::ToC(host_shape, &c_host_shape); tpu::ExecutorApiFn()->TpuTransferManager_HostShapeToDeviceShapeFn( manager_, &c_host_shape, &c_device_shape); - xla::Shape device_shape = TpuConversions::CShapeToXlaShape(&c_device_shape); - TpuConversions::CShapeCleanup(&c_host_shape); - TpuConversions::CShapeCleanup(&c_device_shape); + xla::Shape device_shape = ApiConverter::FromC(&c_device_shape); + ApiConverter::Free(&c_host_shape); + ApiConverter::Free(&c_device_shape); return device_shape; } @@ -63,19 +63,18 @@ Status TpuTransferManager::TransferLiteralToDeviceAsync( StatusHelper status; XLA_Literal c_literal; - TpuConversions::XLALiteralToCLiteral(literal, &c_literal); + ApiConverter::ToC(literal, &c_literal); XLA_ShapedBuffer c_device_buffer; - TpuConversions::XLAShapedBufferToCShapedBuffer(device_buffer, - &c_device_buffer); + ApiConverter::ToC(device_buffer, &c_device_buffer); tpu::ExecutorApiFn()->TpuTransferManager_TransferLiteralToDeviceAsyncFn( manager_, TpuPlatform::GetRegisteredPlatform()->stream_map()->at( stream->implementation()), &c_literal, &c_device_buffer, status.c_status); - TpuConversions::CShapedBufferCleanup(&c_device_buffer); - TpuConversions::CLiteralCleanup(&c_literal); + ApiConverter::Free(&c_device_buffer); + ApiConverter::Free(&c_literal); return status.status(); } @@ -110,30 +109,29 @@ void TpuTransferManager::TransferLiteralFromDevice( state->remaining_transfers = 1; state->done = done; XLA_ShapedBuffer c_device_buffer; - TpuConversions::XLAShapedBufferToCShapedBuffer(device_buffer, - &c_device_buffer); + ApiConverter::ToC(device_buffer, &c_device_buffer); XLA_Literal c_literal; - TpuConversions::XLALiteralToCLiteral(literal, &c_literal); + ApiConverter::ToC(literal, &c_literal); tpu::ExecutorApiFn()->TpuTransferManager_TransferLiteralFromDeviceFn( manager_, TpuPlatform::GetRegisteredPlatform()->LookupStream( stream->implementation()), &c_device_buffer, &c_literal, TransferLiteralFromDeviceTrampoline, state); - TpuConversions::CShapedBufferCleanup(&c_device_buffer); - TpuConversions::CLiteralCleanup(&c_literal); + ApiConverter::Free(&c_device_buffer); + ApiConverter::Free(&c_literal); } int64 TpuTransferManager::GetByteSizeRequirement( const xla::Shape& shape) const { XLA_Shape c_shape; - TpuConversions::XlaShapeToCShape(shape, &c_shape); + ApiConverter::ToC(shape, &c_shape); int64 size_in_bytes = tpu::ExecutorApiFn()->TpuTransferManager_GetByteSizeRequirementFn( manager_, &c_shape); - TpuConversions::CShapeCleanup(&c_shape); + ApiConverter::Free(&c_shape); return size_in_bytes; } @@ -150,7 +148,7 @@ Status TpuTransferManager::WriteSingleTupleIndexTable( elements[i].size(), elements[i].payload()}; } XLA_Shape c_shape; - TpuConversions::XlaShapeToCShape(shape, &c_shape); + ApiConverter::ToC(shape, &c_shape); SE_DeviceMemoryBase region_base{region->opaque(), region->size(), region->payload()}; StatusHelper status; @@ -162,7 +160,7 @@ Status TpuTransferManager::WriteSingleTupleIndexTable( elements_bases, elements.size(), &c_shape, ®ion_base, status.c_status); delete[] elements_bases; - TpuConversions::CShapeCleanup(&c_shape); + ApiConverter::Free(&c_shape); return status.status(); } From ba990dd7e81490a98b98a4ca6c63b76a2cf572b0 Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Tue, 21 Jul 2020 10:14:31 -0700 Subject: [PATCH 0942/2522] Update run_v1_only tests with proper reasons. PiperOrigin-RevId: 322386004 Change-Id: I9b292be8ce51118f6537853c14a22da748fcea9d --- .../python/training/sync_replicas_optimizer_test.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/training/sync_replicas_optimizer_test.py b/tensorflow/python/training/sync_replicas_optimizer_test.py index 7ff31d61c9a..32ad339d712 100644 --- a/tensorflow/python/training/sync_replicas_optimizer_test.py +++ b/tensorflow/python/training/sync_replicas_optimizer_test.py @@ -89,7 +89,8 @@ class SyncReplicasOptimizerTest(test.TestCase): def _run(self, train_op, sess): sess.run(train_op) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only( + "This exercises tensor lookup via names which is not supported in V2.") def test2Workers(self): num_workers = 2 replicas_to_aggregate = 2 @@ -180,7 +181,8 @@ class SyncReplicasOptimizerTest(test.TestCase): sessions[1].run(var_1_g_1)) # 3 workers and one of them is backup. - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only( + "This exercises tensor lookup via names which is not supported in V2.") def test3Workers1Backup(self): num_workers = 3 replicas_to_aggregate = 2 @@ -268,7 +270,9 @@ class SyncReplicasOptimizerHookTest(test.TestCase): with self.assertRaisesRegex(ValueError, "apply_gradient should be called"): hook.begin() - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only( + "train.SyncReplicasOptimizer and train.GradientDescentOptimizer " + "are V1 only APIs.") def testCanCreatedBeforeMinimizeCalled(self): """This behavior is required to be integrated with Estimators.""" opt = training.SyncReplicasOptimizer( @@ -281,7 +285,8 @@ class SyncReplicasOptimizerHookTest(test.TestCase): opt.minimize(v, global_step=global_step) hook.begin() - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only( + "train.SyncReplicasOptimizer and train.AdamOptimizer are V1 only APIs.") def testFetchVariableList(self): opt = training.SyncReplicasOptimizer( opt=adam.AdamOptimizer(0.01), From 08a7e4f4179a6a3333c24871570205a2425dd2ab Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Jul 2020 10:23:52 -0700 Subject: [PATCH 0943/2522] Internal change PiperOrigin-RevId: 322388229 Change-Id: Id48ee62df25cced913937d62cce4757753098217 --- .../compiler/mlir/tensorflow/ir/tf_ops.h | 1 - .../compiler/mlir/tensorflow/ir/tf_ops.td | 28 +-------- .../compiler/mlir/tensorflow/ir/tf_ops_a_m.cc | 6 +- .../compiler/mlir/tensorflow/ir/tf_ops_a_m.h | 1 - .../mlir/tensorflow/ir/tf_ops_helpers.inc | 20 ------ .../compiler/mlir/tensorflow/ir/tf_ops_n_z.cc | 8 --- .../compiler/mlir/tensorflow/ir/tf_ops_n_z.h | 1 - .../mlir/tensorflow/tests/canonicalize.mlir | 33 ---------- .../mlir2graphdef/functional-if-ops.mlir | 34 ++-------- .../mlir2graphdef/functional-while-ops.mlir | 36 ++--------- .../tests/resource-device-inference.mlir | 8 ++- .../tensorflow/tests/resource_op_lifting.mlir | 24 ++++--- .../tensorflow/tests/shape_inference.mlir | 9 ++- .../tpu-variable-runtime-reformatting.mlir | 9 ++- .../tests/tpu_space_to_depth_pass.mlir | 2 +- .../region_control_flow_to_functional.cc | 3 +- .../transforms/resource_op_lifting.cc | 10 ++- .../tensorflow/transforms/shape_inference.cc | 63 +++++++------------ .../transforms/stack_ops_decomposition.cc | 8 +++ .../tensor_array_ops_decomposition.cc | 4 ++ .../tensor_list_ops_decomposition.cc | 8 +++ .../tpu_variable_runtime_reformatting.cc | 10 +++ 22 files changed, 106 insertions(+), 220 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h index 039ed1bc3a8..d06dce81e09 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h @@ -23,7 +23,6 @@ limitations under the License. #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Dialect.h" // from @llvm-project -#include "mlir/IR/Function.h" // from @llvm-project #include "mlir/IR/Matchers.h" // from @llvm-project #include "mlir/IR/Module.h" // from @llvm-project #include "mlir/IR/OpImplementation.h" // from @llvm-project diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td index d230a24afdc..71b30ae8090 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td @@ -225,25 +225,12 @@ else_branch: A function that takes 'inputs' and returns a list of TF_DerivedOperandTypeAttr Tcond = TF_DerivedOperandTypeAttr<0>; TF_DerivedOperandTypeListAttr Tin = TF_DerivedOperandTypeListAttr<1>; TF_DerivedResultTypeListAttr Tout = TF_DerivedResultTypeListAttr<0>; - TF_DerivedResultShapeListAttr output_shapes = TF_DerivedResultShapeListAttr<0>; let verifier = [{ return Verify(*this); }]; let hasCanonicalizer = 1; - - let extraClassDeclaration = [{ - // Get the then branch function. - FuncOp then_func() { - return getParentOfType().lookupSymbol(then_branch()); - } - - // Get the else branch function. - FuncOp else_func() { - return getParentOfType().lookupSymbol(else_branch()); - } - }]; } def TF_YieldOp : TF_Op<"Yield", @@ -625,6 +612,7 @@ body: A function that takes a list of tensors and returns another FlatSymbolRefAttr:$cond, FlatSymbolRefAttr:$body, + DefaultValuedAttr:$output_shapes, DefaultValuedAttr:$parallel_iterations, // Used to map StatelessWhile and While op defined in TensorFlow to a common @@ -637,24 +625,10 @@ body: A function that takes a list of tensors and returns another ); TF_DerivedOperandTypeListAttr T = TF_DerivedOperandTypeListAttr<0>; - TF_DerivedResultShapeListAttr output_shapes = TF_DerivedResultShapeListAttr<0>; let verifier = [{ return Verify(*this); }]; - let hasCanonicalizer = 1; - - let extraClassDeclaration = [{ - // Get the condition function. - FuncOp cond_func() { - return getParentOfType().lookupSymbol(cond()); - } - - // Get the body function. - FuncOp body_func() { - return getParentOfType().lookupSymbol(body()); - } - }]; } def TL_WhileRegionOp : TF_Op<"WhileRegion", diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc index 6183dde8581..f4f9ec42864 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc @@ -1615,10 +1615,6 @@ static LogicalResult Verify(IfOp op) { return success(); } -//===----------------------------------------------------------------------===// -// IfOp canonicalization. -//===----------------------------------------------------------------------===// - class FoldConstantIfOp : public OpRewritePattern { public: explicit FoldConstantIfOp(MLIRContext *context) @@ -1666,7 +1662,7 @@ LogicalResult FoldConstantIfOp::matchAndRewrite( void IfOp::getCanonicalizationPatterns(OwningRewritePatternList &results, MLIRContext *context) { - results.insert>(context); + results.insert(context); } //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h index 19a927a23d7..b2b78da8993 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h @@ -20,7 +20,6 @@ limitations under the License. #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Dialect.h" // from @llvm-project -#include "mlir/IR/Function.h" // from @llvm-project #include "mlir/IR/Matchers.h" // from @llvm-project #include "mlir/IR/Module.h" // from @llvm-project #include "mlir/IR/OpImplementation.h" // from @llvm-project diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc index 71f1560aa6c..33d51301208 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc @@ -578,23 +578,3 @@ LogicalResult VerifyRegionResults(Operation *op, Region ®ion, } return success(); } - -//===----------------------------------------------------------------------===// -// Function control flow canonicalization. -//===----------------------------------------------------------------------===// - -// Eliminate attributes that are not needed, but can get attached to Ops -// during import. -template -struct DropAttributes : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - // Drop the "output_shapes" attribute. - LogicalResult matchAndRewrite(Op op, - PatternRewriter &rewriter) const override { - bool found = op.removeAttr("output_shapes") == - MutableDictionaryAttr::RemoveResult::Removed; - return success(found); - } -}; - diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc index 564db91eed7..0d9b2610492 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc @@ -2065,14 +2065,6 @@ static LogicalResult Verify(WhileOp op) { return success(); } -//===----------------------------------------------------------------------===// -// WhileOp canonicalization. -//===----------------------------------------------------------------------===// -void WhileOp::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { - results.insert>(context); -} - //===----------------------------------------------------------------------===// // WhileRegionOp //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h index 761c06a475c..b6e9222a370 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h @@ -20,7 +20,6 @@ limitations under the License. #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Dialect.h" // from @llvm-project -#include "mlir/IR/Function.h" // from @llvm-project #include "mlir/IR/Matchers.h" // from @llvm-project #include "mlir/IR/Module.h" // from @llvm-project #include "mlir/IR/OpImplementation.h" // from @llvm-project diff --git a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir index 6830f3e1e7e..17a19c50998 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir @@ -985,36 +985,3 @@ func @testWhileRegionUnusedValue(%arg0 : tensor<*xf32>, %arg1 : tensor, %ar // CHECK: return %[[WHILE_OUT]]#0 : tensor<*xf32> return %0#0 : tensor<*xf32> } - -// Check that output_shapes attribute is removed for tf.If -func @testIfThen(tensor<*xf32>) -> tensor<*xf32> -func @testIfElse(tensor<*xf32>) -> tensor<*xf32> -// CHECK-LABEL: func @testIfDropOutputShapes -func @testIfDropOutputShapes(tensor, tensor<2xf32>) -> tensor<2xf32> { -^bb0(%arg0: tensor, %arg1: tensor<2xf32>): - // CHECK: "tf.If" - // CHECK-NOT: output_shapes - %1 = "tf.If"(%arg0, %arg1) { - then_branch = @testIfThen, else_branch = @testIfElse, is_stateless = false, output_shapes = [#tf.shape<>] - } : (tensor, tensor<2xf32>) -> tensor<2xf32> - - return %1 : tensor<2xf32> -} - -// Check that output_shapes attribute is removed for tf.Whileß -func @testWhileCond(tensor<*xf32>) -> (tensor) -func @testWhileBody(tensor<*xf32>) -> (tensor<*xf32>) -// CHECK-LABEL: func @testWhileDropOutputShapes -func @testWhileDropOutputShapes(tensor<*xf32>) -> (tensor<*xf32>) { -^bb0(%arg0: tensor<*xf32>): - // CHECK: "tf.While" - // CHECK-NOT: output_shapes - %1 = "tf.While"(%arg0) { - cond = @testWhileCond, - body = @testWhileBody, - is_stateless = false, - output_shapes = [#tf.shape<>] - } : (tensor<*xf32>) -> (tensor<*xf32>) - - return %1 : tensor<*xf32> -} diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-if-ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-if-ops.mlir index b6933459382..d9ad36f2ce6 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-if-ops.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-if-ops.mlir @@ -1,13 +1,13 @@ // RUN: tf-mlir-translate -mlir-to-graphdef %s -o - | FileCheck %s -func @main(%arg0: tensor, %arg1: tensor, %arg2: tensor<4xf32>, %arg3: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) { +func @main(%arg0: tensor, %arg1: tensor) -> (tensor, tensor) { %0:2 = tf_executor.graph { %outputs_2, %control_3 = tf_executor.island wraps "tf.Less"(%arg0, %arg1) : (tensor, tensor) -> tensor - %outputs_4, %control_5 = tf_executor.island wraps "tf.If"(%outputs_2, %arg2, %arg3) {else_branch = @cond_false, is_stateless = false, then_branch = @cond_true} : (tensor, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> loc("StatefulIf") - %outputs_6, %control_7 = tf_executor.island wraps "tf.If"(%outputs_2, %arg2, %arg3) {else_branch = @cond_false, is_stateless = true, then_branch = @cond_true} : (tensor, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> loc("StatelessIf") - tf_executor.fetch %outputs_4, %outputs_6 : tensor<4xf32>, tensor<4xf32> + %outputs_4, %control_5 = tf_executor.island wraps "tf.If"(%outputs_2, %arg0, %arg1) {else_branch = @cond_false, is_stateless = false, then_branch = @cond_true} : (tensor, tensor, tensor) -> tensor loc("StatefulIf") + %outputs_6, %control_7 = tf_executor.island wraps "tf.If"(%outputs_2, %arg0, %arg1) {else_branch = @cond_false, is_stateless = true, then_branch = @cond_true} : (tensor, tensor, tensor) -> tensor loc("StatelessIf") + tf_executor.fetch %outputs_4, %outputs_6 : tensor, tensor } - return %0#0, %0#1 : tensor<4xf32>, tensor<4xf32> + return %0#0, %0#1 : tensor, tensor } func @cond_true(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>) -> tensor<*xf32> { @@ -34,32 +34,8 @@ func @cond_false(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>) -> tensor<*xf32> { // CHECK-NOT: name: // CHECK: op: "If" // CHECK-NOT: is_stateless -// CHECK: attr { -// CHECK: key: "output_shapes" -// CHECK: value { -// CHECK: list { -// CHECK: shape { -// CHECK: dim { -// CHECK: size: 4 -// CHECK: } -// CHECK: } -// CHECK: } -// CHECK: } -// CHECK: } // CHECK: name: "StatelessIf" // CHECK-NOT: name: // CHECK: op: "StatelessIf" // CHECK-NOT: is_stateless -// CHECK: attr { -// CHECK: key: "output_shapes" -// CHECK: value { -// CHECK: list { -// CHECK: shape { -// CHECK: dim { -// CHECK: size: 4 -// CHECK: } -// CHECK: } -// CHECK: } -// CHECK: } -// CHECK: } diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-while-ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-while-ops.mlir index c7a4630d985..9f14a144d9d 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-while-ops.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-while-ops.mlir @@ -1,12 +1,12 @@ // RUN: tf-mlir-translate -mlir-to-graphdef %s -o - | FileCheck %s -func @main(%arg0: tensor, %arg1: tensor<5xf32>) -> (tensor<5xf32>, tensor<5xf32>) { +func @main(%arg0: tensor, %arg1: tensor) -> (tensor, tensor) { %0:2 = tf_executor.graph { - %outputs_2:2, %control_3 = tf_executor.island wraps "tf.While"(%arg0, %arg1) {body = @body, cond = @cond, is_stateless = false} : (tensor, tensor<5xf32>) -> (tensor, tensor<5xf32>) loc("StatefulWhile") - %outputs_4:2, %control_5 = tf_executor.island wraps "tf.While"(%arg0, %arg1) {body = @body, cond = @cond, is_stateless = true} : (tensor, tensor<5xf32>) -> (tensor, tensor<5xf32>) loc("StatelessWhile") - tf_executor.fetch %outputs_2#1, %outputs_4#1 : tensor<5xf32>, tensor<5xf32> + %outputs_2:2, %control_3 = tf_executor.island wraps "tf.While"(%arg0, %arg1) {body = @body, cond = @cond, is_stateless = false} : (tensor, tensor) -> (tensor, tensor) loc("StatefulWhile") + %outputs_4:2, %control_5 = tf_executor.island wraps "tf.While"(%arg0, %arg1) {body = @body, cond = @cond, is_stateless = true} : (tensor, tensor) -> (tensor, tensor) loc("StatelessWhile") + tf_executor.fetch %outputs_2#1, %outputs_4#1 : tensor, tensor } - return %0#0, %0#1 : tensor<5xf32>, tensor<5xf32> + return %0#0, %0#1 : tensor, tensor } func @cond(%arg0: tensor<*xi32>, %arg1: tensor<*xf32>) -> tensor { @@ -36,34 +36,8 @@ func @body(%arg0: tensor<*xi32>, %arg1: tensor<*xf32>) -> (tensor<*xi32>, tensor // CHECK-NOT: name: // CHECK: op: "While" // CHECK-NOT: is_stateless -// CHECK: attr { -// CHECK: key: "output_shapes" -// CHECK: value { -// CHECK: list { -// CHECK: shape { -// CHECK: dim { -// CHECK: size: 5 -// CHECK: } -// CHECK: } -// CHECK: } -// CHECK: } -// CHECK: } - // CHECK: name: "StatelessWhile" // CHECK-NOT: name: // CHECK: op: "StatelessWhile" // CHECK-NOT: is_stateless -// CHECK: attr { -// CHECK: key: "output_shapes" -// CHECK: value { -// CHECK: list { -// CHECK: shape { -// CHECK: dim { -// CHECK: size: 5 -// CHECK: } -// CHECK: } -// CHECK: } -// CHECK: } -// CHECK: } - diff --git a/tensorflow/compiler/mlir/tensorflow/tests/resource-device-inference.mlir b/tensorflow/compiler/mlir/tensorflow/tests/resource-device-inference.mlir index a4a7c1dad2e..a9e814c647e 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/resource-device-inference.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/resource-device-inference.mlir @@ -56,7 +56,7 @@ func @propagate_if_op( "tf.If"(%arg1, %id0, %var_handle) { then_branch = @if_then, else_branch = @if_else, - is_stateless = false} + output_shapes = [], is_stateless = false} : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> () tf_executor.yield @@ -128,7 +128,8 @@ func @propagate_while_op( // CHECK-NEXT: "tf.While" "tf.While"(%arg1, %id0, %var_handle) { body = @while_body, - cond = @while_cond, is_stateless = false} + cond = @while_cond, + output_shapes = [], is_stateless = false} : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> (tensor, tensor<*x!tf.resource>>, @@ -208,7 +209,8 @@ func @error_on_conflict_multiple_callers( : () -> tensor<*x!tf.resource>> "tf.If"(%arg1, %id0, %var_handle) { then_branch = @if_then_and_else, - else_branch = @if_then_and_else, is_stateless = false} + else_branch = @if_then_and_else, + output_shapes = [], is_stateless = false} : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> () "tf.If"(%arg1, %var_handle, %id0) { diff --git a/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir b/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir index d8a87b9bdb4..cd93e1423ea 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir @@ -147,7 +147,8 @@ func @cluster_with_loop() -> () { "tf_device.cluster"() ( { // CHECK: %[[WHILE:.*]]:2 = "tf.While"(%[[COUNT]], %[[READ]]) %2:3 = "tf.While"(%0, %1, %unused) - {body = @while_body, cond = @while_cond, device = "", is_stateless = false} + {body = @while_body, cond = @while_cond, device = "", is_stateless = false, + output_shapes = [#tf.shape<>, #tf.shape<>]} : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) // CHECK: tf_device.return %[[WHILE]]#1 : tensor @@ -196,7 +197,8 @@ func @cluster_with_loop() -> () { "tf_device.cluster"() ( { // CHECK: %[[WHILE:.*]] = "tf.While"(%[[READ]]) %1 = "tf.While"(%0) { - body = @while_body, cond = @while_cond, device = "", is_stateless = false} + body = @while_body, cond = @while_cond, device = "", is_stateless = false, + output_shapes = [#tf.shape<>]} : (tensor<*x!tf.resource>>) -> (tensor<*x!tf.resource>>) // CHECK: tf_device.return %[[WHILE]] : tensor @@ -237,7 +239,8 @@ func @cluster_with_loop() -> () { "tf_device.cluster"() ( { // CHECK: %[[WHILE:.*]] = "tf.While"(%[[READ]]) %1 = "tf.While"(%0) { - body = @while_body, cond = @while_cond, device = "", is_stateless = false} + body = @while_body, cond = @while_cond, device = "", is_stateless = false, + output_shapes = [#tf.shape<>]} : (tensor<*x!tf.resource>>) -> (tensor<*x!tf.resource>>) // CHECK: tf_device.return @@ -275,7 +278,8 @@ func @cluster_with_nested_loop() -> () { "tf_device.cluster"() ( { // CHECK: %[[WHILE:.*]] = "tf.While"(%[[READ]]) %2:2 = "tf.While"(%0, %1) { - body = @while_body, cond = @while_cond, device = "", is_stateless = false} + body = @while_body, cond = @while_cond, device = "", is_stateless = false, + output_shapes = [#tf.shape<>, #tf.shape<>]} : (tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> (tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) // CHECK: tf_device.return %[[WHILE]] : tensor @@ -291,7 +295,8 @@ func @while_body(%arg0: tensor<*x!tf.resource>>, %arg1: tensor<*x!tf -> (tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) { // CHECK: %[[WHILE:.*]] = "tf.While"(%[[BARG0]]) %0:2 = "tf.While"(%arg0, %arg1) { - body = @while_body1, cond = @while_cond1, device = "", is_stateless = false} + body = @while_body1, cond = @while_cond1, device = "", is_stateless = false, + output_shapes = [#tf.shape<>, #tf.shape<>]} : (tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> (tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) // CHECK-NEXT: return %[[WHILE]] @@ -329,7 +334,8 @@ func @cluster_with_loop() -> () { %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>> "tf_device.cluster"() ( { %1 = "tf.While"(%0) { - body = @while_body, cond = @while_cond, device = "", is_stateless = false} + body = @while_body, cond = @while_cond, device = "", is_stateless = false, + output_shapes = [#tf.shape<>]} : (tensor<*x!tf.resource>>) -> (tensor<*x!tf.resource>>) tf_device.return }) {cluster_attr = "cluster_attr"} : () -> () @@ -353,7 +359,8 @@ func @cluster_with_loop() -> () { %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>> "tf_device.cluster"() ( { %1 = "tf.While"(%0) { - body = @while_body, cond = @while_cond, device = "", is_stateless = false} + body = @while_body, cond = @while_cond, device = "", is_stateless = false, + output_shapes = [#tf.shape<>]} : (tensor<*x!tf.resource>>) -> (tensor<*x!tf.resource>>) tf_device.return }) {cluster_attr = "cluster_attr"} : () -> () @@ -377,7 +384,8 @@ func @cluster_with_loop() -> () { %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>> "tf_device.cluster"() ( { %1 = "tf.While"(%0) { - body = @while_body, cond = @while_cond, device = "", is_stateless = false} + body = @while_body, cond = @while_cond, device = "", is_stateless = false, + output_shapes = [#tf.shape<>]} : (tensor<*x!tf.resource>>) -> (tensor<*x!tf.resource>>) tf_device.return }) {cluster_attr = "cluster_attr"} : () -> () diff --git a/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir b/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir index 4a5e3c8deaa..5a8f63ec63d 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir @@ -100,11 +100,10 @@ func @multiple_blocks_one_return(%arg0: tensor) -> tensor<*xf32> { return %1 : tensor } - // CHECK-LABEL: func @shape_from_if_to_branch_functions_to_results - // CHECK-SAME: (%arg0: tensor, %arg1: tensor<1x2x3xf32>) -> tensor<1x2x3xf32> - func @shape_from_if_to_branch_functions_to_results(%arg0: tensor, %arg1: tensor<1x2x3xf32>) -> tensor<*xf32> { - %0 = "tf.If"(%arg0, %arg1) {Tcond = i1, Tin = ["tfdtype$DT_FLOAT"], Tout = ["tfdtype$DT_FLOAT"], else_branch = @if_else_branch, is_stateless = true, name = "if", then_branch = @if_then_branch} : (tensor, tensor<1x2x3xf32>) -> tensor<*xf32> - return %0 : tensor<*xf32> + // CHECK-LABEL: func @shape_from_if_to_branch_functions + func @shape_from_if_to_branch_functions(%arg0: tensor, %arg1: tensor<1x2x3xf32>) -> tensor<1x2x3xf32> { + %0 = "tf.If"(%arg0, %arg1) {Tcond = i1, Tin = ["tfdtype$DT_FLOAT"], Tout = ["tfdtype$DT_FLOAT"], _xla_propagate_compile_time_consts = true, device = "", else_branch = @if_else_branch, is_stateless = true, name = "if", then_branch = @if_then_branch} : (tensor, tensor<1x2x3xf32>) -> tensor<1x2x3xf32> + return %0 : tensor<1x2x3xf32> } // CHECK-LABEL: func @if_then_branch diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu-variable-runtime-reformatting.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu-variable-runtime-reformatting.mlir index 1e308b42bfc..43be8743e51 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu-variable-runtime-reformatting.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu-variable-runtime-reformatting.mlir @@ -20,7 +20,8 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr {T = ["tfdtype$DT_INT32", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE"], body = @while_body_7560, - cond = @while_cond_7550, device = "", is_stateless = false} + cond = @while_cond_7550, device = "", is_stateless = false, + output_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>]} : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, @@ -216,7 +217,8 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr {T = ["tfdtype$DT_INT32", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE"], body = @while_body_7560, - cond = @while_cond_7550, device = "", is_stateless = false} + cond = @while_cond_7550, device = "", is_stateless = false, + output_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>]} : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, @@ -303,7 +305,8 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr {T = ["tfdtype$DT_INT32", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE"], body = @while_body_7560, - cond = @while_cond_7550, device = "", is_stateless = false} + cond = @while_cond_7550, device = "", is_stateless = false, + output_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>]} : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_space_to_depth_pass.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_space_to_depth_pass.mlir index 280986a7ee1..199426b1aa9 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_space_to_depth_pass.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_space_to_depth_pass.mlir @@ -7,7 +7,7 @@ module attributes {tf.devices = {"/job:localhost/replica:0/task:0/device:CPU:0" %0 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor %1 = "tf.Const"() {value = dense<2> : tensor} : () -> tensor %2 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %3:10 = "tf.While"(%2, %1, %2, %0, %1, %arg2, %arg4, %arg5, %arg6, %arg7) {_lower_using_switch_merge = true, _num_original_outputs = 10 : i64, _read_only_resource_inputs = [], body = @while_body_2710, cond = @while_cond_2700, device = "", is_stateless = false, parallel_iterations = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor>>, tensor>>, tensor>>, tensor>>) -> (tensor, tensor, tensor, tensor, tensor, tensor, tensor>>, tensor>>, tensor>>, tensor>>) + %3:10 = "tf.While"(%2, %1, %2, %0, %1, %arg2, %arg4, %arg5, %arg6, %arg7) {_lower_using_switch_merge = true, _num_original_outputs = 10 : i64, _read_only_resource_inputs = [], body = @while_body_2710, cond = @while_cond_2700, device = "", is_stateless = false, output_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>], parallel_iterations = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor>>, tensor>>, tensor>>, tensor>>) -> (tensor, tensor, tensor, tensor, tensor, tensor, tensor>>, tensor>>, tensor>>, tensor>>) return } // CHECK-LABEL: func @while_body_2710 diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc b/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc index 2ae2a976767..f1004fa049e 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc @@ -373,7 +373,8 @@ LogicalResult RegionControlFlowToFunctional::ConvertWhileOp( OpBuilder builder(while_region); auto while_op = builder.create( while_region.getLoc(), new_result_types, new_inputs, cond_name, body_name, - while_region.parallel_iterations(), while_region.is_stateless()); + builder.getArrayAttr({}), while_region.parallel_iterations(), + while_region.is_stateless()); // Redirect old results to new results. for (auto it : llvm::zip( diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc index a9caeaac50d..74679f19941 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc @@ -627,6 +627,8 @@ LogicalResult HandleWhileLoop(TF::WhileOp while_op, FuncOp body, FuncOp cond) { }); // Recreate the while op. OpBuilder builder(while_op); + auto new_output_shapes = FilterRange>( + while_op.output_shapes().getValue(), resource_arg_uses); // Now use the filtered original operands, which will be replaced by // AddLoadsStoresOutsideControlFlowOp(). auto new_while = builder.create( @@ -634,7 +636,8 @@ LogicalResult HandleWhileLoop(TF::WhileOp while_op, FuncOp body, FuncOp cond) { FilterRange(while_op.getOperands(), resource_arg_uses), while_op.getAttrs()); - // Prepare for AddLoadsStoresOutsideControlFlowOp(). + // Prepare for AddLoadsStoresOutsideControlFlowOp() and update + // new_output_shapes. llvm::SmallDenseMap> arg_data_type_and_updated_output_index; for (const auto& entry : remaining_resource_data_types) { @@ -644,9 +647,14 @@ LogicalResult HandleWhileLoop(TF::WhileOp while_op, FuncOp body, FuncOp cond) { : entry.getFirst(); arg_data_type_and_updated_output_index[entry.getFirst()] = { entry.getSecond(), update_index}; + if (!new_output_shapes.empty()) { + new_output_shapes[entry.getFirst()] = + tensorflow::ConvertTypeToTensorShapeAttr(entry.getSecond()); + } } AddLoadsStoresOutsideControlFlowOp(new_while, arg_data_type_and_updated_output_index); + new_while.setAttr("output_shapes", builder.getArrayAttr(new_output_shapes)); // Replace uses. for (int64_t i = 0; i < old_to_new_indices.size(); ++i) { if (old_to_new_indices[i] >= 0) { diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc index 104cce1fb89..2551e68dd74 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc @@ -262,6 +262,22 @@ bool InferShapeForCall(Operation* op) { return changed; } +// Infer the shape IfRegion outputs based on the shapes of the then and else +// yields. +bool InferShapeForIfRegion(IfRegionOp op) { + bool changed = false; + + Operation* then_yield = op.then_branch().front().getTerminator(); + Operation* else_yield = op.else_branch().front().getTerminator(); + for (auto result : zip(op.getResults(), then_yield->getOperandTypes(), + else_yield->getOperandTypes())) { + // If then and else types do not match, skip refinement for that result. + if (std::get<1>(result) != std::get<2>(result)) continue; + changed = RefineResultType(op, std::get<0>(result), std::get<1>(result)) || + changed; + } + return changed; +} bool InferShapeForCast(CastOp op, Dialect* tf_dialect) { Value result = op.getResult(); if (!CanBeRefined(result.getType())) return false; @@ -290,37 +306,6 @@ bool InferShapeForCast(CastOp op, Dialect* tf_dialect) { return true; } -// Infer the shape IfOp outputs based on the shapes of the then and else -// function result types. -bool InferShapeForIf(IfOp op) { - bool changed = false; - for (auto it : - llvm::zip(op.getResults(), op.then_func().getType().getResults(), - op.else_func().getType().getResults())) { - // If then and else types do not match, skip refinement for that result. - if (std::get<1>(it) != std::get<2>(it)) continue; - changed = RefineResultType(op, std::get<0>(it), std::get<1>(it)) || changed; - } - return changed; -} - -// Infer the shape IfRegion outputs based on the shapes of the then and else -// yields. -bool InferShapeForIfRegion(IfRegionOp op) { - bool changed = false; - - Operation* then_yield = op.then_branch().front().getTerminator(); - Operation* else_yield = op.else_branch().front().getTerminator(); - for (auto result : zip(op.getResults(), then_yield->getOperandTypes(), - else_yield->getOperandTypes())) { - // If then and else types do not match, skip refinement for that result. - if (std::get<1>(result) != std::get<2>(result)) continue; - changed = RefineResultType(op, std::get<0>(result), std::get<1>(result)) || - changed; - } - return changed; -} - bool RefineWithInferTypeOpInterface(InferTypeOpInterface infer_ti, Dialect* tf_dialect) { Operation* op = infer_ti.getOperation(); @@ -783,23 +768,17 @@ bool ShapeInference::InferShapeForSingleOperation(Operation* op) { op)) return InferShapeForCall(op); + // Handle IfRegion operations by infering return shape from the then and else + // branches. + if (auto if_region = dyn_cast(op)) + return InferShapeForIfRegion(if_region); + // tf.Cast are only inferred if they have at least one user in the TF dialect // or feeding into the function return. This is necessary to avoid inserting // casts which cannot be refined. if (auto cast_op = dyn_cast(op)) return InferShapeForCast(cast_op, tf_dialect_); - // Handle IfOp here by inferring the shape from the else/then function - // results. Since `output_shapes` is a derived attribute, avoid going down the - // TF InferenceContext path as IfOp shape inference is implemented as just - // a lookup of the output_shapes attribute. - if (auto if_op = dyn_cast(op)) return InferShapeForIf(if_op); - - // Handle IfRegion operations by infering return shape from the then and else - // branches. - if (auto if_region = dyn_cast(op)) - return InferShapeForIfRegion(if_region); - StringRef op_name = op->getName().getStringRef(); // Drop the `tf.` prefix to query TF registry. auto node_name = diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc b/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc index 2dc45ee9816..8d6e1c2ce30 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc @@ -197,16 +197,24 @@ LogicalResult HandleWhileOp( if (!signature_change) return success(); // Create the new while op. auto new_while_operands = llvm::to_vector<8>(while_op.getOperands()); + auto new_output_shapes = + llvm::to_vector<8>(while_op.output_shapes().getValue()); OpBuilder builder(while_op); assert(while_op.getNumOperands() == while_op.getNumResults()); for (int64_t i = 0; i < while_op.getNumResults(); ++i) { auto it = data_var_to_size_var.find(while_op.getOperand(i)); if (it == data_var_to_size_var.end()) continue; new_while_operands.push_back(it->getSecond()); + if (!new_output_shapes.empty()) { + // Size is a scalar shape. + new_output_shapes.push_back( + mlir::TF::ShapeAttr::get(builder.getContext(), ArrayRef())); + } } auto new_while = builder.create(while_op.getLoc(), body.getType().getInputs(), new_while_operands, while_op.getAttrs()); + new_while.setAttr("output_shapes", builder.getArrayAttr(new_output_shapes)); for (int64_t i = 0; i < while_op.getNumResults(); ++i) { if (!getElementTypeOrSelf(while_op.getOperand(i).getType()) .isa()) { diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc index 2c3422e3e00..cb30bc35a7a 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc @@ -595,6 +595,8 @@ LogicalResult HandleWhileOp(TF::WhileOp while_op, ModuleOp module, auto new_while = builder.create(while_op.getLoc(), body.getType().getInputs(), operands, while_op.getAttrs()); + // Clear the output shapes as it is not needed for XLA lowering. + new_while.setAttr("output_shapes", builder.getArrayAttr({})); for (int64_t i = 0; i < while_op.getNumOperands(); ++i) { if (ta_arg_buffer_type(i)) { while_op.getResult(i).replaceAllUsesWith(while_op.getOperand(i)); @@ -661,6 +663,8 @@ LogicalResult HandleIfOp(TF::IfOp if_op, ModuleOp module, auto new_if = builder.create(if_op.getLoc(), then_branch.getType().getResults(), operands, if_op.getAttrs()); + // Clear the output shapes as it is not needed for XLA lowering. + new_if.setAttr("output_shapes", builder.getArrayAttr({})); auto ret_forwards_input = [](FuncOp f, int64_t ret_ind) -> int64_t { auto retval = f.front().getTerminator()->getOperand(ret_ind); auto arg = retval.dyn_cast(); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc index cd055a8dc4a..5cbc42a862c 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc @@ -190,14 +190,22 @@ LogicalResult HandleWhileOp( } // Create the new while op. auto new_while_operands = llvm::to_vector<8>(while_op.getOperands()); + auto new_output_shapes = + llvm::to_vector<8>(while_op.output_shapes().getValue()); for (int64_t i = 0; i < while_op.getNumResults(); ++i) { auto it = buffer_to_size->find(while_op.getOperand(i)); if (it == buffer_to_size->end()) continue; new_while_operands.push_back(it->getSecond().size); + if (!new_output_shapes.empty()) { + // Size is a scalar shape. + new_output_shapes.push_back( + mlir::TF::ShapeAttr::get(builder.getContext(), ArrayRef())); + } } auto new_while = builder.create(while_op.getLoc(), body.getType().getInputs(), new_while_operands, while_op.getAttrs()); + new_while.setAttr("output_shapes", builder.getArrayAttr(new_output_shapes)); for (const auto& entry : output_buffer_to_size) { (*buffer_to_size)[new_while.getResult(std::get<0>(entry))] = { new_while.getResult(std::get<1>(entry)), std::get<2>(entry)}; diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc index 2b2a33b8bc2..12ce8c57f73 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc @@ -365,6 +365,16 @@ TF::WhileOp AddStateVarsToWhileOp(TF::WhileOp while_op, FuncOp body, while_op.getLoc(), append_types(llvm::to_vector<4>(while_op.getResultTypes())), new_while_operands, while_op.getAttrs()); + if (new_while_op.output_shapes().size() != 0) { + auto new_output_shapes = llvm::to_vector<4>(new_while_op.output_shapes()); + // VarHandleOp is a scalar shape resource. + for (int64_t i = 0; i < state_vars.size(); ++i) { + new_output_shapes.push_back( + mlir::TF::ShapeAttr::get(builder.getContext(), ArrayRef())); + } + new_while_op.setAttr("output_shapes", + builder.getArrayAttr(new_output_shapes)); + } while_op.replaceAllUsesWith( new_while_op.getResults().take_front(while_op.getNumResults())); while_op.erase(); From 66709ac555d296af056d5cb5c67162589465a830 Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Tue, 21 Jul 2020 10:29:35 -0700 Subject: [PATCH 0944/2522] Create experimental dirs c/experimental/{ops|gradients} for adding ops and gradient functions. We will eventually machine generate ops in experimental/ops. PiperOrigin-RevId: 322389665 Change-Id: I6ceaefa1c94e4f276c888a1a31c1033e2b7b8f80 --- tensorflow/c/eager/BUILD | 2 + tensorflow/c/eager/gradients_test.cc | 47 +--------------- tensorflow/c/experimental/gradients/BUILD | 23 ++++++++ .../c/experimental/gradients/math_grad.cc | 54 +++++++++++++++++++ .../c/experimental/gradients/math_grad.h | 26 +++++++++ tensorflow/c/experimental/ops/BUILD | 24 +++++++++ tensorflow/c/experimental/ops/array_ops.cc | 38 +++++++++++++ tensorflow/c/experimental/ops/array_ops.h | 31 +++++++++++ 8 files changed, 200 insertions(+), 45 deletions(-) create mode 100644 tensorflow/c/experimental/gradients/BUILD create mode 100644 tensorflow/c/experimental/gradients/math_grad.cc create mode 100644 tensorflow/c/experimental/gradients/math_grad.h create mode 100644 tensorflow/c/experimental/ops/BUILD create mode 100644 tensorflow/c/experimental/ops/array_ops.cc create mode 100644 tensorflow/c/experimental/ops/array_ops.h diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index fffd22f2698..dca35b78e0f 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -240,6 +240,8 @@ tf_cuda_cc_test( "//tensorflow/c:c_api", "//tensorflow/c:c_test_util", "//tensorflow/c:tf_status_helper", + "//tensorflow/c/experimental/gradients:math_grad", + "//tensorflow/c/experimental/ops:array_ops", "//tensorflow/cc/profiler", "//tensorflow/compiler/mlir/tensorflow/c:mlir_c_api_registration", "//tensorflow/core:lib", diff --git a/tensorflow/c/eager/gradients_test.cc b/tensorflow/c/eager/gradients_test.cc index 5820058f3e2..0a3d267e937 100644 --- a/tensorflow/c/eager/gradients_test.cc +++ b/tensorflow/c/eager/gradients_test.cc @@ -23,6 +23,8 @@ limitations under the License. #include "tensorflow/c/eager/c_api_unified_experimental.h" #include "tensorflow/c/eager/c_api_unified_experimental_internal.h" #include "tensorflow/c/eager/gradients_internal.h" +#include "tensorflow/c/experimental/gradients/math_grad.h" +#include "tensorflow/c/experimental/ops/array_ops.h" #include "tensorflow/c/tf_status_helper.h" #include "tensorflow/c/tf_tensor.h" #include "tensorflow/core/lib/llvm_rtti/llvm_rtti.h" @@ -42,55 +44,10 @@ class CppGradients } }; -// Creates an Identity op. -Status Identity(AbstractContext* ctx, - absl::Span inputs, - absl::Span outputs, const char* name) { - AbstractOperationPtr identity_op(ctx->CreateOperation()); - TF_RETURN_IF_ERROR( - identity_op->Reset("Identity", /*raw_device_name=*/nullptr)); - if (isa(identity_op.get())) { - TF_RETURN_IF_ERROR(dyn_cast(identity_op.get()) - ->SetOpName(name)); - } - TF_RETURN_IF_ERROR(identity_op->AddInput(inputs[0])); - int num_retvals = 1; - TF_RETURN_IF_ERROR(identity_op->Execute(outputs, &num_retvals)); - return Status::OK(); -} - -// =================== Register gradients for Add ============================ -class AddGradientFunction : public GradientFunction { - public: - explicit AddGradientFunction(AbstractContext* ctx) : ctx_(ctx) {} - Status Compute(absl::Span grad_inputs, - std::vector* grad_outputs) override { - grad_outputs->resize(2); - std::vector identity_outputs(1); - TF_RETURN_IF_ERROR(Identity(ctx_, {grad_inputs[0]}, - absl::MakeSpan(identity_outputs), "Id0")); - (*grad_outputs)[0] = identity_outputs[0]; - TF_RETURN_IF_ERROR(Identity(ctx_, {grad_inputs[0]}, - absl::MakeSpan(identity_outputs), "Id1")); - (*grad_outputs)[1] = identity_outputs[0]; - return Status::OK(); - } - ~AddGradientFunction() override {} - - private: - AbstractContext* ctx_; -}; - -GradientFunction* AddRegisterer(const ForwardOperation& op) { - return new AddGradientFunction(op.ctx); -} - Status RegisterGradients(GradientRegistry* registry) { return registry->Register("Add", AddRegisterer); } -// =================== End gradient registrations ============================ - // Computes `inputs[0] + inputs[1]` and records it on the tape. Status Add(AbstractContext* ctx, Tape* tape, absl::Span inputs, diff --git a/tensorflow/c/experimental/gradients/BUILD b/tensorflow/c/experimental/gradients/BUILD new file mode 100644 index 00000000000..e3acdf7e2c3 --- /dev/null +++ b/tensorflow/c/experimental/gradients/BUILD @@ -0,0 +1,23 @@ +# Library of gradient functions. +package( + licenses = ["notice"], # Apache 2.0 +) + +cc_library( + name = "math_grad", + srcs = ["math_grad.cc"], + hdrs = [ + "math_grad.h", + ], + visibility = [ + "//tensorflow:internal", + ], + deps = [ + "//tensorflow/c/eager:abstract_operation", + "//tensorflow/c/eager:abstract_tensor_handle", + "//tensorflow/c/eager:c_api_unified_internal", + "//tensorflow/c/eager:gradients", + "//tensorflow/c/experimental/ops:array_ops", + "//tensorflow/core/lib/llvm_rtti", + ], +) diff --git a/tensorflow/c/experimental/gradients/math_grad.cc b/tensorflow/c/experimental/gradients/math_grad.cc new file mode 100644 index 00000000000..e27cbb2e02f --- /dev/null +++ b/tensorflow/c/experimental/gradients/math_grad.cc @@ -0,0 +1,54 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/c/experimental/gradients/math_grad.h" + +#include "tensorflow/c/experimental/ops/array_ops.h" + +using tensorflow::ops::Identity; + +namespace tensorflow { +namespace gradients { +namespace { + +class AddGradientFunction : public GradientFunction { + public: + explicit AddGradientFunction(AbstractContext* ctx) : ctx_(ctx) {} + Status Compute(absl::Span grad_inputs, + std::vector* grad_outputs) override { + grad_outputs->resize(2); + std::vector identity_outputs(1); + // TODO(b/145674566): Handle name unification in tracing code. + // TODO(b/161805092): Support broadcasting. + TF_RETURN_IF_ERROR(ops::Identity( + ctx_, {grad_inputs[0]}, absl::MakeSpan(identity_outputs), "Identity0")); + (*grad_outputs)[0] = identity_outputs[0]; + TF_RETURN_IF_ERROR(ops::Identity( + ctx_, {grad_inputs[0]}, absl::MakeSpan(identity_outputs), "Identity1")); + (*grad_outputs)[1] = identity_outputs[0]; + return Status::OK(); + } + ~AddGradientFunction() override {} + + private: + AbstractContext* ctx_; +}; + +} // namespace + +GradientFunction* AddRegisterer(const ForwardOperation& op) { + return new AddGradientFunction(op.ctx); +} +} // namespace gradients +} // namespace tensorflow diff --git a/tensorflow/c/experimental/gradients/math_grad.h b/tensorflow/c/experimental/gradients/math_grad.h new file mode 100644 index 00000000000..473253f9b27 --- /dev/null +++ b/tensorflow/c/experimental/gradients/math_grad.h @@ -0,0 +1,26 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_C_EXPERIMENTAL_GRADIENTS_MATH_GRAD_H_ +#define TENSORFLOW_C_EXPERIMENTAL_GRADIENTS_MATH_GRAD_H_ + +#include "tensorflow/c/eager/gradients.h" + +namespace tensorflow { +namespace gradients { +GradientFunction* AddRegisterer(const ForwardOperation& op); +} // namespace gradients +} // namespace tensorflow + +#endif // TENSORFLOW_C_EXPERIMENTAL_GRADIENTS_MATH_GRAD_H_ diff --git a/tensorflow/c/experimental/ops/BUILD b/tensorflow/c/experimental/ops/BUILD new file mode 100644 index 00000000000..312709f4332 --- /dev/null +++ b/tensorflow/c/experimental/ops/BUILD @@ -0,0 +1,24 @@ +# Experimental ops. These will eventually be replaced by machine-generated versions. +package( + licenses = ["notice"], # Apache 2.0 +) + +cc_library( + name = "array_ops", + srcs = [ + "array_ops.cc", + ], + hdrs = [ + "array_ops.h", + ], + visibility = [ + "//tensorflow:internal", + ], + deps = [ + "//tensorflow/c/eager:abstract_operation", + "//tensorflow/c/eager:abstract_tensor_handle", + "//tensorflow/c/eager:c_api_unified_internal", + "//tensorflow/core/lib/llvm_rtti", + "//tensorflow/core/platform:errors", + ], +) diff --git a/tensorflow/c/experimental/ops/array_ops.cc b/tensorflow/c/experimental/ops/array_ops.cc new file mode 100644 index 00000000000..e38b00088cf --- /dev/null +++ b/tensorflow/c/experimental/ops/array_ops.cc @@ -0,0 +1,38 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/c/experimental/ops/array_ops.h" + +#include "tensorflow/core/platform/errors.h" + +namespace tensorflow { +namespace ops { +// Creates an Identity op. +Status Identity(AbstractContext* ctx, + absl::Span inputs, + absl::Span outputs, const char* name) { + AbstractOperationPtr identity_op(ctx->CreateOperation()); + TF_RETURN_IF_ERROR( + identity_op->Reset("Identity", /*raw_device_name=*/nullptr)); + if (isa(identity_op.get())) { + TF_RETURN_IF_ERROR(dyn_cast(identity_op.get()) + ->SetOpName(name)); + } + TF_RETURN_IF_ERROR(identity_op->AddInput(inputs[0])); + int num_retvals = 1; + return identity_op->Execute(outputs, &num_retvals); +} + +} // namespace ops +} // namespace tensorflow diff --git a/tensorflow/c/experimental/ops/array_ops.h b/tensorflow/c/experimental/ops/array_ops.h new file mode 100644 index 00000000000..8a9db484c2e --- /dev/null +++ b/tensorflow/c/experimental/ops/array_ops.h @@ -0,0 +1,31 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_C_EXPERIMENTAL_OPS_ARRAY_OPS_H_ +#define TENSORFLOW_C_EXPERIMENTAL_OPS_ARRAY_OPS_H_ + +#include "tensorflow/c/eager/abstract_operation.h" +#include "tensorflow/c/eager/abstract_tensor_handle.h" +#include "tensorflow/c/eager/c_api_unified_experimental_internal.h" +#include "tensorflow/core/lib/llvm_rtti/llvm_rtti.h" + +namespace tensorflow { +namespace ops { +Status Identity(AbstractContext* ctx, + absl::Span inputs, + absl::Span outputs, const char* name); +} // namespace ops +} // namespace tensorflow + +#endif // TENSORFLOW_C_EXPERIMENTAL_OPS_ARRAY_OPS_H_ From dca2939a9b1828780338ae1275c74ee141b1210e Mon Sep 17 00:00:00 2001 From: Sayed Hadi Hashemi Date: Tue, 21 Jul 2020 11:04:27 -0700 Subject: [PATCH 0945/2522] Update CUDA11+cuDNN8 RBE Dockerfile with CUDA 11 GA. PiperOrigin-RevId: 322398193 Change-Id: I03e2f8e089e9660c87fcf3984c4707f1070c74e2 --- ....rbe.cuda11.0-cudnn8-ubuntu18.04-manylinux2010-multipython | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda11.0-cudnn8-ubuntu18.04-manylinux2010-multipython b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda11.0-cudnn8-ubuntu18.04-manylinux2010-multipython index cd841a77aba..4092346dc7f 100644 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda11.0-cudnn8-ubuntu18.04-manylinux2010-multipython +++ b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda11.0-cudnn8-ubuntu18.04-manylinux2010-multipython @@ -8,7 +8,7 @@ # --tag "gcr.io/tensorflow-testing/nosla-cuda11.0-cudnn8-ubuntu18.04-manylinux2010-multipython" . # $ docker push gcr.io/tensorflow-testing/nosla-cuda11.0-cudnn8-ubuntu18.04-manylinux2010-multipython -FROM nvidia/cuda:11.0-cudnn8-devel-ubuntu18.04-rc as devtoolset +FROM nvidia/cuda:11.0-cudnn8-devel-ubuntu18.04 as devtoolset ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y \ @@ -35,7 +35,7 @@ RUN /build_devtoolset.sh devtoolset-7 /dt7 RUN /build_devtoolset.sh devtoolset-8 /dt8 # TODO(klimek): Split up into two different docker images. -FROM nvidia/cuda:11.0-cudnn8-devel-ubuntu18.04-rc +FROM nvidia/cuda:11.0-cudnn8-devel-ubuntu18.04 COPY --from=devtoolset /dt7 /dt7 COPY --from=devtoolset /dt8 /dt8 From e01e164d5b1bb45923db17c17290e982fe9102ef Mon Sep 17 00:00:00 2001 From: Gaurav Jain Date: Tue, 21 Jul 2020 11:29:09 -0700 Subject: [PATCH 0946/2522] Allocate test tensor on CPU to avoid OOM on GPU PiperOrigin-RevId: 322403954 Change-Id: Ia4abe61fbad2e3c3f02ae1a6998446ed8bc5386b --- .../python/kernel_tests/reshape_op_test.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/kernel_tests/reshape_op_test.py b/tensorflow/python/kernel_tests/reshape_op_test.py index c711d7a6a88..e84458067b5 100644 --- a/tensorflow/python/kernel_tests/reshape_op_test.py +++ b/tensorflow/python/kernel_tests/reshape_op_test.py @@ -22,6 +22,7 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops @@ -202,13 +203,14 @@ class ReshapeTest(test.TestCase): self.assertEqual([100, 1], y.get_shape().as_list()) def testInt64Shape(self): - x = array_ops.zeros([50000, 50000], dtype=dtypes.bool) - # Provide dimension larger than int32 - y = array_ops.reshape(x, [50000**2]) - self.assertEqual([50000**2], y.get_shape().as_list()) - # Even if first dimension is within int32, ensure we correctly go to int64 - y = array_ops.reshape(x, [1, 50000**2]) - self.assertEqual([1, 50000**2], y.get_shape().as_list()) + with ops.device("/device:CPU:0"): + x = array_ops.zeros([50000, 50000], dtype=dtypes.bool) + # Provide dimension larger than int32 + y = array_ops.reshape(x, [50000**2]) + self.assertEqual([50000**2], y.get_shape().as_list()) + # Even if first dimension is within int32, ensure we correctly go to int64 + y = array_ops.reshape(x, [1, 50000**2]) + self.assertEqual([1, 50000**2], y.get_shape().as_list()) if __name__ == "__main__": From 0f3763e26dea978bf51c8e5170f2dbe03a973d3c Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Tue, 21 Jul 2020 18:45:46 +0000 Subject: [PATCH 0947/2522] Fix issue in tf.ones where tf.quint8/quint16 does not work in graph mode This PR tries to address the issue where tf.ones where tf.quint8/quint16 does not work in graph mode: ``` >>> @tf.function(autograph=False) ... def f(): ... return tf.ones([], tf.qint16) ... >>> f() ... ... allow_broadcast=allow_broadcast)) File "/Library/Python/3.7/site-packages/tensorflow/python/framework/tensor_util.py", line 456, in make_tensor_proto _AssertCompatible(values, dtype) File "/Library/Python/3.7/site-packages/tensorflow/python/framework/tensor_util.py", line 336, in _AssertCompatible (dtype.name, repr(mismatch), type(mismatch).__name__)) TypeError: Expected qint16, got 1 of type 'int' instead. ``` The reason is similiar to the internal error encountered in 41421. Signed-off-by: Yong Tang --- tensorflow/python/kernel_tests/constant_op_test.py | 10 ++++++++++ tensorflow/python/ops/array_ops.py | 7 ++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py index a1316df34f8..d23874c2097 100644 --- a/tensorflow/python/kernel_tests/constant_op_test.py +++ b/tensorflow/python/kernel_tests/constant_op_test.py @@ -24,6 +24,7 @@ from google.protobuf import text_format from tensorflow.core.framework import graph_pb2 from tensorflow.core.framework import tensor_pb2 +from tensorflow.python.eager import def_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes as dtypes_lib from tensorflow.python.framework import errors_impl @@ -629,6 +630,15 @@ class OnesTest(test.TestCase): self.assertEqual([2, 3], z.get_shape()) self.assertAllEqual(z, np.ones([2, 3])) + def testQintDtype(self): + @def_function.function(autograph=False) + def f(): + return math_ops.cast( + array_ops.ones([2, 3], dtype=dtypes_lib.quint8), dtypes_lib.int32) + + value = self.evaluate(f()) + self.assertTrue(np.all(value)) + class OnesLikeTest(test.TestCase): diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index cc41f5f1a3c..c2f4f4182d8 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -3053,7 +3053,12 @@ def ones(shape, dtype=dtypes.float32, name=None): """ dtype = dtypes.as_dtype(dtype).base_dtype with ops.name_scope(name, "ones", [shape]) as name: - one = True if dtype == dtypes.bool else 1 + if dtype == dtypes.bool: + one = True + elif dtype.is_quantized: + one = np.ones([]).astype(dtype.as_numpy_dtype) + else: + one = 1 if not isinstance(shape, ops.Tensor): try: if not context.executing_eagerly(): From 60fa5d18665b6809add7d0a0f1f84d1f22e7ea34 Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Tue, 21 Jul 2020 11:51:17 -0700 Subject: [PATCH 0948/2522] Remove frame->f_trace DCHECK, which is not always nullptr. PiperOrigin-RevId: 322408718 Change-Id: I6ca3caba213e28ffca64df281ff54b970a07cf98 --- tensorflow/python/util/stack_trace.h | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/util/stack_trace.h b/tensorflow/python/util/stack_trace.h index 732d40c92d2..9e66ac3c8d0 100644 --- a/tensorflow/python/util/stack_trace.h +++ b/tensorflow/python/util/stack_trace.h @@ -50,7 +50,6 @@ class StackTrace final { int i = 0; for (; i < kMaxDepth && frame != nullptr; frame = frame->f_back, ++i) { PyCodeObject* code_obj = frame->f_code; - DCHECK(frame->f_trace == nullptr); DCHECK(code_obj != nullptr); Py_INCREF(code_obj); From 4626b11187bb098b71acf6364045f78ac68ec9a8 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 21 Jul 2020 11:54:01 -0700 Subject: [PATCH 0949/2522] Remove all BUILD aliases to tensorflow/core/framework:bounds_check Use the actual rule directly everywhere. PiperOrigin-RevId: 322409304 Change-Id: I42d22f8d5254d967a364faf5b6e4f6ab53244e11 --- tensorflow/core/BUILD | 14 +++-- tensorflow/core/kernels/BUILD | 99 +++++++++++++++++++---------------- 2 files changed, 65 insertions(+), 48 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 83c3cf3b394..9c6fd9f3632 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -302,6 +302,12 @@ cc_library( ], ) +alias( + name = "framework_bounds_check", + actual = "//tensorflow/core/framework:bounds_check", + visibility = ["//tensorflow/core/kernels:friends"], +) + alias( name = "human_readable_json", actual = "//tensorflow/core/platform:human_readable_json", @@ -763,7 +769,7 @@ cc_library( ":lib", ":lib_internal", ":stream_executor", - "//tensorflow/core/framework:bounds_check", + "//tensorflow/core/kernels:bounds_check_lib", ], alwayslink = 1, ) @@ -2155,7 +2161,7 @@ cc_header_only_library( ":lib", ":lib_internal", ":version_lib", - "//tensorflow/core/framework:bounds_check", + "//tensorflow/core/kernels:bounds_check", "//tensorflow/core/platform/default/build_config:platformlib", ], ) @@ -2220,7 +2226,7 @@ tf_cuda_library( "//tensorflow/core/framework:shape_inference", "//tensorflow/core/framework:tensor", "//tensorflow/core/framework:tensor_shape", - "//tensorflow/core/framework:bounds_check", + "//tensorflow/core/kernels:bounds_check", "//tensorflow/core/platform/default/build_config:platformlib", "//tensorflow/core/profiler/lib:annotated_traceme", "//tensorflow/core/profiler/lib:traceme", @@ -2322,7 +2328,7 @@ tf_cuda_library( ":function_ops_op_lib", ":functional_grad", ":functional_ops_op_lib", - "//tensorflow/core/framework:bounds_check", + "//tensorflow/core/kernels:bounds_check", "//tensorflow/core/kernels:required", ]), alwayslink = 1, diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 9501611f82e..c395f7d3e73 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -164,6 +164,7 @@ tf_kernel_library( "strided_slice_op_gpu_number_types.cu.cc", ], deps = [ + ":bounds_check", ":dense_update_functor", ":inplace_ops", ":ops_util", @@ -172,7 +173,6 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -283,9 +283,9 @@ tf_kernel_library( "gpu_device_array_gpu.h", ], deps = [ + ":bounds_check", "//tensorflow/core:framework", "//tensorflow/core:framework_internal", - "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], alwayslink = 0, @@ -347,11 +347,11 @@ tf_kernel_library( name = "extract_image_patches_op", prefix = "extract_image_patches_op", deps = [ + ":bounds_check", ":eigen_helpers", ":ops_util", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -360,11 +360,11 @@ tf_kernel_library( name = "extract_volume_patches_op", prefix = "extract_volume_patches_op", deps = [ + ":bounds_check", ":eigen_helpers", ":ops_util", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -690,9 +690,9 @@ cc_library( hdrs = ["save_restore_tensor.h"], copts = if_not_windows(["-Wno-sign-compare"]), deps = [ + ":bounds_check", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core/framework:bounds_check", "//tensorflow/core/util/tensor_bundle", ], ) @@ -748,8 +748,19 @@ cc_library( ], ) +alias( + name = "bounds_check", + actual = "//tensorflow/core:framework_bounds_check", + visibility = [":friends"], +) + # Private support libraries --------------------------------------------------- +cc_header_only_library( + name = "bounds_check_lib", + deps = [":bounds_check"], +) + cc_library( name = "gpu_device_array", hdrs = [ @@ -936,9 +947,9 @@ cc_library( hdrs = ["image_resizer_state.h"], visibility = ["//visibility:private"], deps = [ + ":bounds_check", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -971,7 +982,7 @@ tf_cc_test( # OpKernel libraries ---------------------------------------------------------- ARRAY_DEPS = [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check", ":concat_lib", ":fill_functor", ":gather_functor", @@ -1585,12 +1596,12 @@ tf_kernel_library( srcs = ["cudnn_rnn_ops.cc"], visibility = ["//visibility:public"], deps = [ + ":bounds_check_lib", ":gpu_utils", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:stream_executor", - "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -2041,8 +2052,8 @@ tf_kernel_library( prefix = "gather_functor", visibility = [":friends"], deps = [ + ":bounds_check", "//tensorflow/core:framework", - "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -2271,10 +2282,10 @@ tf_kernel_library( prefix = "scatter_functor", visibility = [":friends"], deps = [ + ":bounds_check", ":dense_update_functor", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -2439,7 +2450,7 @@ tf_kernel_library( name = "ctc_ops", prefix = "ctc", deps = [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check", ":ops_util", "//tensorflow/core:framework", "//tensorflow/core:lib", @@ -2508,7 +2519,7 @@ cc_header_only_library( ) DATA_FLOW_DEPS = [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check", ":concat_lib", ":conditional_accumulator", ":conditional_accumulator_base", @@ -2652,7 +2663,7 @@ tf_kernel_library( ) DYNAMIC_DEPS = [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", @@ -2705,7 +2716,7 @@ tf_cc_test( ) LOOKUP_DEPS = [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check", ":initializable_lookup_table", ":lookup_util", "@com_google_absl//absl/container:flat_hash_map", @@ -2885,6 +2896,7 @@ tf_kernel_library( srcs = ["resource_variable_ops.cc"], hdrs = ["resource_variable_ops.h"], deps = [ + ":bounds_check", ":dense_update_functor", ":gather_functor", ":gather_nd_op", @@ -2895,7 +2907,6 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core/framework:bounds_check", "@com_google_absl//absl/strings", ], ) @@ -3014,7 +3025,7 @@ cc_library( ) IMAGE_DEPS = [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check", ":eigen_helpers", ":image_resizer_state", "//third_party/eigen3", @@ -3166,11 +3177,11 @@ tf_kernel_library( name = "encode_wav_op", prefix = "encode_wav_op", deps = [ + ":bounds_check", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", - "//tensorflow/core/framework:bounds_check", ], ) @@ -3481,7 +3492,7 @@ tf_kernel_library( ) SAVE_RESTORE_DEPS = [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check_lib", ":save_restore_tensor", "//tensorflow/core:framework", "//tensorflow/core:lib", @@ -3924,9 +3935,9 @@ tf_kernel_library( "roll_op.h", ], deps = [ + ":bounds_check", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -3951,7 +3962,7 @@ tf_cc_test( ) MATH_DEPS = [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check", ":fill_functor", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -4317,6 +4328,7 @@ tf_cuda_cc_test( size = "small", srcs = ["cwise_ops_test.cc"], deps = [ + ":bounds_check", ":cwise_op", ":nn", ":ops_testutil", @@ -4328,7 +4340,6 @@ tf_cuda_cc_test( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", - "//tensorflow/core/framework:bounds_check", ], ) @@ -4634,7 +4645,7 @@ tf_kernel_library( deps = [ ":conv_grad_shape_utils", ":conv_ops_3d_headers", - "//tensorflow/core/framework:bounds_check", + ":bounds_check", ":conv_2d", ":conv_3d", ":eigen_contraction_kernel", @@ -4701,7 +4712,7 @@ tf_kernel_library( "depthwise_conv_op_gpu_half.cu.cc", ], deps = [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check", ":conv_ops", ":ops_util", "//tensorflow/core:core_cpu", @@ -4722,7 +4733,7 @@ tf_kernel_library( ], prefix = "depthwise_conv_grad_op", deps = [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check", ":conv_ops", ":ops_util", "//tensorflow/core:core_cpu", @@ -4766,7 +4777,7 @@ cc_library( ) NN_DEPS = [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check", ":conv_2d", ":eigen_contraction_kernel", ":ops_util", @@ -5023,6 +5034,7 @@ tf_kernel_library( "pooling_ops_3d_gpu.cu.cc", ], deps = [ + ":bounds_check", ":conv_2d", ":conv_3d", ":conv_ops", @@ -5033,7 +5045,6 @@ tf_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:stream_executor", - "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -5094,9 +5105,9 @@ tf_kernel_library( ], visibility = [":friends"], deps = [ + ":bounds_check", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -5278,6 +5289,7 @@ tf_kernel_library( name = "stateful_random_ops", prefix = "stateful_random_ops", deps = [ + ":bounds_check", ":dense_update_functor", ":gather_functor", ":mutex_ops", @@ -5293,7 +5305,6 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core/framework:bounds_check", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:variant", ], @@ -5303,11 +5314,11 @@ tf_kernel_library( name = "stateless_random_ops", prefix = "stateless_random_ops", deps = [ + ":bounds_check", ":random_op", ":random_poisson_op", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core/framework:bounds_check", ], ) @@ -5494,7 +5505,7 @@ tf_kernel_library( name = "sparse_tensor_dense_matmul_op", prefix = "sparse_tensor_dense_matmul_op", deps = SPARSE_DEPS + [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check", ":fill_functor", "//third_party/eigen3", ], @@ -5512,7 +5523,7 @@ tf_kernel_library( name = "sparse_xent_op", prefix = "sparse_xent_op", deps = SPARSE_DEPS + [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check", "//third_party/eigen3", ] + if_cuda_or_rocm([ ":reduction_ops", @@ -5667,7 +5678,7 @@ cc_library( STATE_DEPS = [ ":assign_op", - "//tensorflow/core/framework:bounds_check", + ":bounds_check", ":fill_functor", ":scatter_functor", "//third_party/eigen3", @@ -5838,7 +5849,7 @@ cc_library( ) STRING_DEPS = [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check", ":string_util", "//third_party/eigen3", "//tensorflow/core:framework", @@ -6037,11 +6048,11 @@ tf_kernel_library( name = "unicode_ops", prefix = "unicode_ops", deps = [ + ":bounds_check", ":string_util", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", "//third_party/icu/data:conversion_data", "@icu//:common", @@ -6058,11 +6069,11 @@ tf_kernel_library( name = "training_ops", prefix = "training_ops", deps = [ + ":bounds_check", ":training_op_helpers", ":variable_ops", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", ], ) @@ -7743,7 +7754,7 @@ tf_mkl_kernel_library( "reference_gemm.h", ], deps = [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check", ":ops_util", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -8090,7 +8101,7 @@ tf_mkl_kernel_library( "no_op.h", ], deps = [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check", ":fill_functor", ":matmul_op", ":ops_util", @@ -8113,7 +8124,7 @@ tf_mkl_kernel_library( ], prefix = "mkl_conv", deps = [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check", ":conv_ops", ":ops_util", "@com_google_absl//absl/strings", @@ -8185,7 +8196,7 @@ tf_mkl_kernel_library( name = "mkl_tfconv_op", prefix = "mkl_tfconv", deps = [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check", ":ops_util", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -8199,7 +8210,7 @@ tf_mkl_kernel_library( hdrs = ["mkl_tfconv_op.h"], prefix = "mkl_input_conversion", deps = [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check", ":ops_util", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -8217,7 +8228,7 @@ tf_mkl_kernel_library( ], hdrs = ["mkl_pooling_ops_common.h"], deps = [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check", ":ops_util", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -8287,7 +8298,7 @@ tf_mkl_kernel_library( name = "mkl_relu_op", prefix = "mkl_relu", deps = [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check", ":ops_util", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -8301,7 +8312,7 @@ tf_mkl_kernel_library( name = "mkl_softmax_op", prefix = "mkl_softmax", deps = [ - "//tensorflow/core/framework:bounds_check", + ":bounds_check", ":ops_util", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -8624,10 +8635,10 @@ cc_library( "meta_support.h", ], deps = [ + ":bounds_check", ":quantization_utils", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core/framework:bounds_check", "//third_party/eigen3", "@gemmlowp", ], From b3e23cebd01f822aba0c0b6658db7abc0eddf445 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Tue, 21 Jul 2020 11:54:09 -0700 Subject: [PATCH 0950/2522] [XLA:SPMD][NFC] Move dot/conv handling in separate files PiperOrigin-RevId: 322409334 Change-Id: I7640414b86f496c78a93f4aa3dde5231a98d234e --- tensorflow/compiler/xla/service/spmd/BUILD | 2 + .../xla/service/spmd/convolution_handler.cc | 695 ++++++ .../compiler/xla/service/spmd/dot_handler.cc | 1211 +++++++++++ .../xla/service/spmd/spmd_partitioner.cc | 1876 ----------------- .../xla/service/spmd/spmd_partitioner_util.cc | 41 + .../xla/service/spmd/spmd_partitioner_util.h | 9 + 6 files changed, 1958 insertions(+), 1876 deletions(-) create mode 100644 tensorflow/compiler/xla/service/spmd/convolution_handler.cc create mode 100644 tensorflow/compiler/xla/service/spmd/dot_handler.cc diff --git a/tensorflow/compiler/xla/service/spmd/BUILD b/tensorflow/compiler/xla/service/spmd/BUILD index 4433078472d..e41b89f6dff 100644 --- a/tensorflow/compiler/xla/service/spmd/BUILD +++ b/tensorflow/compiler/xla/service/spmd/BUILD @@ -17,6 +17,8 @@ package_group( cc_library( name = "spmd_partitioner", srcs = [ + "convolution_handler.cc", + "dot_handler.cc", "spmd_partitioner.cc", "spmd_partitioner_util.cc", ], diff --git a/tensorflow/compiler/xla/service/spmd/convolution_handler.cc b/tensorflow/compiler/xla/service/spmd/convolution_handler.cc new file mode 100644 index 00000000000..1204df59080 --- /dev/null +++ b/tensorflow/compiler/xla/service/spmd/convolution_handler.cc @@ -0,0 +1,695 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "absl/algorithm/container.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/dot_as_convolution_util.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_instructions.h" +#include "tensorflow/compiler/xla/service/hlo_sharding.h" +#include "tensorflow/compiler/xla/service/hlo_sharding_util.h" +#include "tensorflow/compiler/xla/service/shape_inference.h" +#include "tensorflow/compiler/xla/service/spmd/spmd_partitioner.h" +#include "tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/compiler/xla/window_util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/numbers.h" + +namespace xla { +namespace spmd { + +Status SpmdPartitioningVisitor::HandleConvolutionTiledLhsAndRhs( + HloInstruction* hlo) { + TF_RET_CHECK(hlo->opcode() == HloOpcode::kConvolution); + + auto lhs = GetPartitionedHlo(hlo->operand(0)); + auto rhs = GetPartitionedHlo(hlo->operand(1)); + TF_RET_CHECK(!lhs.sharding().IsTileMaximal() && + !rhs.sharding().IsTileMaximal()); + + const auto& dnums = hlo->convolution_dimension_numbers(); + + // Check if the operand shardings are aligned. Also we currently don't + // support partitioning non-spatial dimensions. + std::vector rhs_to_lhs_indices(hlo->shape().rank()); + rhs_to_lhs_indices[dnums.kernel_output_feature_dimension()] = + dnums.input_batch_dimension(); + rhs_to_lhs_indices[dnums.kernel_input_feature_dimension()] = + dnums.input_feature_dimension(); + for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { + rhs_to_lhs_indices[dnums.kernel_spatial_dimensions(i)] = + dnums.input_spatial_dimensions(i); + } + std::vector lhs_to_rhs_indices(hlo->shape().rank()); + for (int64 i = 0; i < rhs_to_lhs_indices.size(); ++i) { + lhs_to_rhs_indices[rhs_to_lhs_indices[i]] = i; + } + + Window window = hlo->window(); + std::vector reversed_rhs_dims; + for (int64 i = 0; i < window.dimensions_size(); ++i) { + if (window.dimensions(i).window_reversal()) { + reversed_rhs_dims.push_back(dnums.kernel_spatial_dimensions(i)); + } + } + if (!reversed_rhs_dims.empty()) { + // Make the reversed dims left-padded to prepare for window reversal. + auto left_padded_rhs = HaloExchangeToPadOnLeft(rhs, reversed_rhs_dims); + if (left_padded_rhs == nullptr) { + return DefaultAction(hlo); + } + left_padded_rhs->set_sharding(rhs.sharding()); + rhs = PartitionedHlo(left_padded_rhs, rhs.base_shape(), rhs.state()); + } + // Consider window reversal when resharding RHS or LHS. Note: this will not + // reverse the data in the shard. We use window reversal to do that. + auto aligned_rhs_sharding = hlo_sharding_util::ReverseSharding( + hlo_sharding_util::TransposeSharding(lhs.sharding(), rhs_to_lhs_indices), + reversed_rhs_dims); + auto aligned_lhs_sharding = hlo_sharding_util::TransposeSharding( + hlo_sharding_util::ReverseSharding(rhs.sharding(), reversed_rhs_dims), + lhs_to_rhs_indices); + + auto unsupported_sharding = [&](const HloSharding& lhs_sharding, + const HloSharding& rhs_sharding) { + return lhs_sharding.tile_assignment().dim(dnums.input_batch_dimension()) != + 1 || + rhs_sharding.tile_assignment().dim( + dnums.kernel_output_feature_dimension()) != 1; + }; + + auto zero = b_.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(hlo->shape().element_type()))); + if (ShapeSizeInBytes(lhs.base_shape()) < ShapeSizeInBytes(rhs.base_shape())) { + if (unsupported_sharding(aligned_lhs_sharding, rhs.sharding())) { + return DefaultAction(hlo); + } + lhs = lhs.Reshard(aligned_lhs_sharding).PadWithValue(zero); + rhs = rhs.PadWithValue(zero, reversed_rhs_dims); + } else { + if (unsupported_sharding(lhs.sharding(), aligned_rhs_sharding)) { + return DefaultAction(hlo); + } + lhs = lhs.PadWithValue(zero); + rhs = + rhs.Reshard(aligned_rhs_sharding).PadWithValue(zero, reversed_rhs_dims); + } + + // Reshard LHS by exchanging halo such that each shard computes the partial + // sum of the full shape result, and add AllReduce. + // + // The size of halo on each dimension can be calculated from the projection + // onto the LHS that each RHS shard i needs to read. RHS and LHS below refers + // to the shard size of RHS and LHS, WC is the number of windows, and D is the + // window dilation. + // + // * offset(i): RHS * D * i - low_padding + // * limit(i): {(RHS - 1) * D + 1} * (i + 1) + (WC - 1) * stride - low_padding + // + // Since shard i has LHS of range [i * LHS, (i + 1) * LHS) + // * left-halo: i * LHS - offset(i) + // = (LHS - RHS) * i + low_padding + // * right-halo: limit(i) - (i + 1) * LHS + // = [{(RHS - 1) * D + 1} - LHS] * (i + 1) + (WC - 1) * stride - low_padding + std::vector shard_counts(dnums.input_spatial_dimensions_size()); + std::vector lhs_shard_sizes(dnums.input_spatial_dimensions_size()); + std::vector rhs_shard_sizes(dnums.input_spatial_dimensions_size()); + for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { + int64 lhs_dimension = dnums.input_spatial_dimensions(i); + int64 rhs_dimension = dnums.kernel_spatial_dimensions(i); + int64 shard_count = lhs.sharding().tile_assignment().dim(lhs_dimension); + auto wd = window.dimensions(i); + if (wd.base_dilation() != 1) { + return DefaultAction(hlo); + } + + int64 lhs_shard_size = + CeilOfRatio(lhs.base_shape().dimensions(lhs_dimension), shard_count); + int64 rhs_shard_size = + CeilOfRatio(rhs.base_shape().dimensions(rhs_dimension), shard_count); + shard_counts[i] = shard_count; + lhs_shard_sizes[i] = lhs_shard_size; + rhs_shard_sizes[i] = rhs_shard_size; + } + + std::vector left_halo_size_functions(hlo->shape().rank()); + std::vector right_halo_size_functions(hlo->shape().rank()); + Window new_window = window; + + auto partition_ordinals = + MakeTiledPartitionOrdinals(lhs.sharding(), partition_id_, &b_); + HloInstruction* lhs_with_halo = lhs.hlo(); + for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { + int64 lhs_dimension = dnums.input_spatial_dimensions(i); + int64 lhs_shard_size = lhs_shard_sizes[i]; + int64 rhs_shard_size = rhs_shard_sizes[i]; + + if (shard_counts[i] == 1) { + continue; + } + + // Calculate the left and right halo sizes as described in the comments + // above. + auto wd = window.dimensions(i); + int64 padding_low = wd.padding_low(); + int64 padding_high = wd.padding_high(); + int64 base = lhs.base_shape().dimensions(lhs_dimension); + int64 window_count = 1 + (padding_low + padding_high + base - + (1 + (wd.size() - 1) * wd.window_dilation())) / + wd.stride(); + int64 rhs_shard_size_dilated = + (rhs_shard_size - 1) * wd.window_dilation() + 1; + + left_halo_size_functions[lhs_dimension] = + OffsetCalculation(MultiplyAddDivideOffsetCalculation( + lhs_shard_size - rhs_shard_size * wd.window_dilation(), padding_low, + 1)); + right_halo_size_functions[lhs_dimension] = + OffsetCalculation(MultiplyAddDivideOffsetCalculation( + rhs_shard_size_dilated - lhs_shard_size, + rhs_shard_size_dilated - lhs_shard_size + + wd.stride() * (window_count - 1) - padding_low, + 1)); + + // Exchange halo and concatenate. + int64 dim = dnums.input_spatial_dimensions(i); + int64 explicit_left_padding_on_full_shape = padding_low; + int64 shard_size_with_halo = + wd.stride() * (window_count - 1) + rhs_shard_size_dilated; + + new_window.mutable_dimensions(i)->set_padding_low(0); + new_window.mutable_dimensions(i)->set_padding_high(0); + new_window.mutable_dimensions(i)->set_size(rhs_shard_size); + + // offset_on_padded_shape and padded_full_shape_size are needed only if + // we want to mask out-of-range values in ExchangeHaloAndGetValidData(). + // Since the default value for both the collective-permute is zero and + // also we call PadWithValue() on both operands at the beginning, we + // don't need to mask here. + // + // TODO(hyoulkee): Consider removing one of the two PadWithValue() calls + // if it's always safe. + auto offset_on_padded_shape = + OffsetCalculation(MultiplyAddDivideOffsetCalculation()); + int64 padded_full_shape_size = 0; + auto concat = ExchangeHaloAndGetValidData( + lhs_with_halo, lhs.base_shape(), left_halo_size_functions[dim], + right_halo_size_functions[dim], explicit_left_padding_on_full_shape, + padded_full_shape_size, shard_size_with_halo, dim, lhs.sharding(), + offset_on_padded_shape.Calculate(partition_ordinals[dim], &b_), zero, + partition_ordinals[dim], collective_ops_creator_, next_channel_id_, &b_, + /*mask_invalid_region=*/false); + if (!concat) { + return DefaultAction(hlo); + } + lhs_with_halo = *concat; + } + + SetPartitionedHlo(hlo, [&]() { + auto conv = b_.AddInstruction(HloInstruction::CreateConvolve( + hlo->shape(), lhs_with_halo, rhs.hlo(), hlo->feature_group_count(), + hlo->batch_group_count(), new_window, + hlo->convolution_dimension_numbers(), hlo->precision_config())); + auto ar = collective_ops_creator_.create_cross_partition_all_reduce( + &b_, conv, MakeBinaryAdd(hlo->shape().element_type(), module_), + NewChannel()); + ar->set_sharding(HloSharding::Replicate()); + return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()) + .Reshard(hlo->sharding()) + .hlo(); + }); + return Status::OK(); +} + +Status SpmdPartitioningVisitor::HandleConvolution(HloInstruction* hlo) { + auto dot_dnums = dot_as_convolution_util::ParseDotGeneralFromConvolution(hlo); + if (dot_dnums) { + // Use HandleDotHelper() for convs that are actually einsums. + spmd::DotGeneralDimsMapping mapping; + for (const auto& dims : dot_dnums->batch_dims) { + mapping.batch_dims.emplace_back(); + mapping.batch_dims.back().lhs = dims.lhs; + mapping.batch_dims.back().rhs = dims.rhs; + mapping.batch_dims.back().output = dims.output; + } + for (const auto& dims : dot_dnums->contracting_dims) { + mapping.contracting_dims.emplace_back(); + mapping.contracting_dims.back().lhs = dims.lhs; + mapping.contracting_dims.back().rhs = dims.rhs; + mapping.contracting_dims.back().output = dims.output; + } + for (const auto& dims : dot_dnums->lhs_non_contracting_dims) { + mapping.lhs_non_contracting_dims.emplace_back(); + mapping.lhs_non_contracting_dims.back().lhs = dims.lhs; + mapping.lhs_non_contracting_dims.back().rhs = dims.rhs; + mapping.lhs_non_contracting_dims.back().output = dims.output; + } + for (const auto& dims : dot_dnums->rhs_non_contracting_dims) { + mapping.rhs_non_contracting_dims.emplace_back(); + mapping.rhs_non_contracting_dims.back().lhs = dims.lhs; + mapping.rhs_non_contracting_dims.back().rhs = dims.rhs; + mapping.rhs_non_contracting_dims.back().output = dims.output; + } + auto create_sharded_conv = + [&](HloInstruction* lhs_hlo, HloInstruction* rhs_hlo, + spmd::SpmdBuilder* b) -> StatusOr { + TF_ASSIGN_OR_RETURN( + auto sharded_conv, + dot_as_convolution_util::CreateShardedConvForDotGeneralConvolution( + *hlo, *dot_dnums, lhs_hlo, rhs_hlo)); + return b->AddInstruction(std::move(sharded_conv)); + }; + return HandleDotHelper(hlo, mapping, create_sharded_conv); + } + + auto lhs = GetPartitionedHlo(hlo->operand(0)); + auto rhs = GetPartitionedHlo(hlo->operand(1)); + const HloSharding& sharding = hlo->sharding(); + const auto& dnums = hlo->convolution_dimension_numbers(); + std::vector rhs_to_lhs_indices(hlo->shape().rank()); + rhs_to_lhs_indices[dnums.kernel_output_feature_dimension()] = + dnums.input_batch_dimension(); + rhs_to_lhs_indices[dnums.kernel_input_feature_dimension()] = + dnums.input_feature_dimension(); + for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { + rhs_to_lhs_indices[dnums.kernel_spatial_dimensions(i)] = + dnums.input_spatial_dimensions(i); + } + std::vector lhs_to_rhs_indices(hlo->shape().rank()); + for (int64 i = 0; i < rhs_to_lhs_indices.size(); ++i) { + lhs_to_rhs_indices[rhs_to_lhs_indices[i]] = i; + } + auto aligned_rhs_sharding = + hlo_sharding_util::TransposeSharding(lhs.sharding(), rhs_to_lhs_indices); + auto aligned_lhs_sharding = + hlo_sharding_util::TransposeSharding(rhs.sharding(), lhs_to_rhs_indices); + + // Handling cases where all the partitioned dimensions are parallel + // dimensions. + int64 lhs_parallel_dim_partitions = 1; + int64 rhs_parallel_dim_partitions = 1; + std::vector parallel_spatial_dims; + for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { + int64 lhs_dim = dnums.input_spatial_dimensions(i); + int64 lhs_size = lhs.base_shape().dimensions(lhs_dim); + const auto& wd = hlo->window().dimensions(i); + int64 rhs_dim = dnums.kernel_spatial_dimensions(i); + // Only non reversal window is supported right now. + if (!wd.window_reversal() && + dot_as_convolution_util::ConvSpatialDimensionIsParallel(wd, lhs_size)) { + parallel_spatial_dims.emplace_back(i); + lhs_parallel_dim_partitions *= ShardCountAtDim(lhs.sharding(), lhs_dim); + rhs_parallel_dim_partitions *= ShardCountAtDim(rhs.sharding(), rhs_dim); + } + } + bool lhs_partition_dims_are_parallel = + (lhs_parallel_dim_partitions == num_partitions_); + bool rhs_partition_dims_are_parallel = + (rhs_parallel_dim_partitions == num_partitions_); + + // If there is a parallel dim and all the partitioned dimensions are parallel + // dimensions in either LHS or RHS, simply create partitioned convolutions. + if (!parallel_spatial_dims.empty() && + (lhs_partition_dims_are_parallel || rhs_partition_dims_are_parallel)) { + // Reshard LHS or RHS to partition at parallel dimensions as the other + // operand. + if (lhs_partition_dims_are_parallel) { + rhs = rhs.Reshard(aligned_rhs_sharding); + } else { + lhs = lhs.Reshard(aligned_lhs_sharding); + } + auto lhs_shard_shape = + MakePartitionedShape(lhs.base_shape(), lhs.sharding()); + auto rhs_shard_shape = + MakePartitionedShape(rhs.base_shape(), rhs.sharding()); + // Update convolution window. + auto new_window = hlo->window(); + for (const auto& spatial_dim : parallel_spatial_dims) { + auto wd = new_window.mutable_dimensions(spatial_dim); + wd->set_size(lhs_shard_shape.dimensions( + dnums.input_spatial_dimensions(spatial_dim))); + wd->set_stride(std::max(1, wd->size() - 1)); + wd->set_base_dilation(wd->size()); + } + TF_ASSIGN_OR_RETURN( + Shape sharded_conv_shape, + ShapeInference::InferConvolveShape( + lhs_shard_shape, rhs_shard_shape, hlo->feature_group_count(), + hlo->batch_group_count(), new_window, dnums)); + *sharded_conv_shape.mutable_layout() = hlo->shape().layout(); + SetPartitionedHlo(hlo, [&]() { + auto sharded_conv = b_.AddInstruction(HloInstruction::CreateConvolve( + sharded_conv_shape, lhs.hlo(), rhs.hlo(), hlo->feature_group_count(), + hlo->batch_group_count(), new_window, dnums, + hlo->precision_config())); + sharded_conv->set_sharding(hlo->sharding()); + return PartitionedHlo(sharded_conv, hlo->shape(), MakePartitioningState()) + .Reshard(hlo->sharding()) + .hlo(); + }); + return Status::OK(); + } + + // Handling cases where both operands' shardings are aligned. We check that + // the LHS batch dimension is not partitioned because it is mapped to the + // output feature dimension in aligned_rhs_sharding, which are not the same + // dimension. + if (!lhs.sharding().IsTileMaximal() && !rhs.sharding().IsTileMaximal()) { + if (options_.conv_halo_exchange_always_on_lhs) { + return HandleConvolutionTiledLhsAndRhs(hlo); + } else { + // Reshard RHS so that each shard computes the partial sum of the full + // shape result, and add AllReduce. See HandleConvolutionTiledLhsAndRhs() + // that reshards LHS. + // + // The size of halo on each dimension can be calculated from the + // projection onto the RHS that shard i needs to read. RHS and LHS below + // refers to the shard size of RHS and LHS, WC is the number of windows, + // and D is the window dilation. + // + // * offset(i): LHS * i + low_padding - (WC - 1) * stride + // * limit(i): LHS * (i + 1) + low_padding + // + // Since shard i has RHS of range [i * RHS * D, (i + 1) * RHS * D) + // * left-halo: i * RHS - offset(i) + // = i * (RHS * D - LHS) + (WC - 1) * stride - low_padding + // * right-halo: limit(i) - (i + 1) * RHS + // = (i + 1) * (LHS - RHS * D) + low_pading + + auto unsupported_sharding = [&](const HloSharding& lhs_sharding, + const HloSharding& rhs_sharding) { + // We currently don't support partitioning input batch or output feature + // dimensions. + return lhs_sharding.tile_assignment().dim( + dnums.input_batch_dimension()) != 1 || + rhs_sharding.tile_assignment().dim( + dnums.kernel_output_feature_dimension()) != 1; + }; + auto zero = b_.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(hlo->shape().element_type()))); + if (ShapeSizeInBytes(lhs.base_shape()) < + ShapeSizeInBytes(rhs.base_shape())) { + if (unsupported_sharding(aligned_lhs_sharding, rhs.sharding())) { + return DefaultAction(hlo); + } + lhs = lhs.Reshard(aligned_lhs_sharding).PadWithValue(zero); + rhs = rhs.PadWithValue(zero); + } else { + if (unsupported_sharding(lhs.sharding(), aligned_rhs_sharding)) { + return DefaultAction(hlo); + } + lhs = lhs.PadWithValue(zero); + rhs = rhs.Reshard(aligned_rhs_sharding).PadWithValue(zero); + } + + Window window = hlo->window(); + std::vector shard_counts(dnums.input_spatial_dimensions_size()); + std::vector lhs_shard_sizes(dnums.input_spatial_dimensions_size()); + std::vector rhs_shard_sizes(dnums.input_spatial_dimensions_size()); + for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { + int64 lhs_dimension = dnums.input_spatial_dimensions(i); + int64 rhs_dimension = dnums.kernel_spatial_dimensions(i); + int64 shard_count = rhs.sharding().tile_assignment().dim(rhs_dimension); + auto wd = window.dimensions(i); + if (wd.base_dilation() != 1 || wd.window_reversal()) { + return DefaultAction(hlo); + } + + int64 lhs_shard_size = CeilOfRatio( + lhs.base_shape().dimensions(lhs_dimension), shard_count); + int64 rhs_shard_size = CeilOfRatio( + rhs.base_shape().dimensions(rhs_dimension), shard_count); + shard_counts[i] = shard_count; + lhs_shard_sizes[i] = lhs_shard_size; + rhs_shard_sizes[i] = rhs_shard_size; + } + + std::vector left_halo_size_functions( + hlo->shape().rank()); + std::vector right_halo_size_functions( + hlo->shape().rank()); + Window new_window = window; + + // Data structures needed for Pad and DynamicSlice on LHS if needed. + bool need_dynamic_slice_lhs = false; + auto partition_ordinals = + MakeTiledPartitionOrdinals(lhs.sharding(), partition_id_, &b_); + std::vector zero_padding(hlo->shape().rank()); + PaddingConfig pad_config = + window_util::MakeSymmetricPadding(zero_padding); + auto zero_s32 = b_.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::Zero(S32))); + std::vector dynamic_slice_start_indices( + hlo->shape().rank(), zero_s32); + Shape dynamic_slice_shape = lhs.hlo()->shape(); + Shape pad_shape = lhs.hlo()->shape(); + + for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { + int64 lhs_dimension = dnums.input_spatial_dimensions(i); + int64 rhs_dimension = dnums.kernel_spatial_dimensions(i); + int64 lhs_shard_size = lhs_shard_sizes[i]; + int64 rhs_shard_size = rhs_shard_sizes[i]; + + if (shard_counts[i] == 1) { + continue; + } + + // Calculate the left and right halo sizes as described in the comments + // above. It calculcates the halo sizes with dilation, so we apply + // CeilOfRatio({left,right}_halo_size, window_dilation). + auto wd = window.dimensions(i); + int64 padding_low = wd.padding_low(); + int64 padding_high = wd.padding_high(); + int64 base = lhs.base_shape().dimensions(lhs_dimension); + int64 window_count = + 1 + (padding_low + padding_high + base - + (1 + (wd.size() - 1) * wd.window_dilation())) / + wd.stride(); + left_halo_size_functions[rhs_dimension] = + OffsetCalculation(MultiplyAddDivideOffsetCalculation( + rhs_shard_size * wd.window_dilation() - lhs_shard_size, + (window_count - 1) * wd.stride() - padding_low + + wd.window_dilation() - 1, + wd.window_dilation())); + right_halo_size_functions[rhs_dimension] = + OffsetCalculation(MultiplyAddDivideOffsetCalculation( + lhs_shard_size - rhs_shard_size * wd.window_dilation(), + lhs_shard_size - rhs_shard_size * wd.window_dilation() + + padding_low + wd.window_dilation() - 1, + wd.window_dilation())); + + // New RHS window size includes the maximum of both left and right + // halos. + int64 halo_size = left_halo_size_functions[rhs_dimension].MaxInRange( + 1, shard_counts[i]) + + right_halo_size_functions[rhs_dimension].MaxInRange( + 0, shard_counts[i] - 1); + int64 new_window_size = + rhs.hlo()->shape().dimensions(rhs_dimension) + halo_size; + + // The amount of new low padding could be dynamic (e.g., window_dilation + // != 1), which requires pad (to the maximum) and dynamic slice on LHS. + // + // If we consider the first window, the offset of the dilated RHS that + // aligns with the first valid LHS element for shard i is 'padding_low + + // LHS * i'. When the left halo is added to RHS, the offset of the first + // RHS element is (RHS * i - left_halo) * window_dilation. The + // difference between the two values is the amount of padding_low we + // need on LHS. + auto new_padding_low_function = + OffsetCalculation( + HloOpcode::kMultiply, left_halo_size_functions[rhs_dimension], + OffsetCalculation(MultiplyAddDivideOffsetCalculation( + 0, wd.window_dilation(), 1))) - + OffsetCalculation(MultiplyAddDivideOffsetCalculation( + rhs_shard_size * wd.window_dilation() - lhs_shard_size, + -padding_low, 1)); + + int64 new_padding_low_max = + new_padding_low_function.MaxInRange(0, shard_counts[i]); + int64 new_padding_low = new_padding_low_max; + int64 new_padding_high = window_count * wd.stride() + + (new_window_size - 1) * wd.window_dilation() - + new_padding_low - lhs_shard_size; + + // We do pad/dynamic-slice only when the padding is dynamic. + if (!new_padding_low_function.IsConstant()) { + need_dynamic_slice_lhs = true; + new_padding_low = 0; + pad_config.mutable_dimensions(lhs_dimension) + ->set_edge_padding_low(new_padding_low_max); + pad_config.mutable_dimensions(lhs_dimension) + ->set_edge_padding_high(new_padding_low_max); + pad_shape.set_dimensions(lhs_dimension, + lhs_shard_size + 2 * new_padding_low_max); + dynamic_slice_start_indices[lhs_dimension] = + (OffsetCalculation(MultiplyAddDivideOffsetCalculation( + 0, new_padding_low_max, 1)) - + new_padding_low_function) + .Calculate(partition_ordinals[lhs_dimension], &b_); + dynamic_slice_shape.set_dimensions( + lhs_dimension, lhs_shard_size + new_padding_low_max); + } + + // Since the convolution RHS operand size increased with halos, adjust + // the window config accordingly. + new_window.mutable_dimensions(i)->set_padding_low(new_padding_low); + new_window.mutable_dimensions(i)->set_padding_high(new_padding_high); + new_window.mutable_dimensions(i)->set_size( + rhs.hlo()->shape().dimensions(rhs_dimension) + halo_size); + } + + HloInstruction* conv_lhs = lhs.hlo(); + if (need_dynamic_slice_lhs) { + auto pad = b_.AddInstruction( + HloInstruction::CreatePad(pad_shape, lhs.hlo(), zero, pad_config)); + conv_lhs = b_.AddInstruction(HloInstruction::CreateDynamicSlice( + dynamic_slice_shape, pad, dynamic_slice_start_indices, + dynamic_slice_shape.dimensions())); + } + + // Exchange halo and concatenate. + HloInstruction* rhs_with_halo = rhs.hlo(); + for (int i = 0; i < dnums.kernel_spatial_dimensions_size(); ++i) { + int64 dim = dnums.kernel_spatial_dimensions(i); + int64 explicit_left_padding_on_full_shape = + left_halo_size_functions[dim].Calculate(0); + int64 shard_size_with_halo = new_window.dimensions(i).size(); + + // offset_on_padded_shape and padded_full_shape_size are needed only if + // we want to mask out-of-range values in ExchangeHaloAndGetValidData(). + // Since the default value for both the collective-permute is zero and + // also we call PadWithValue() on both operands at the beginning, we + // don't need to mask here. + // + // TODO(hyoulkee): Consider removing one of the two PadWithValue() calls + // if it's always safe. + auto offset_on_padded_shape = + OffsetCalculation(MultiplyAddDivideOffsetCalculation( + rhs_shard_sizes[i], explicit_left_padding_on_full_shape, 1)) - + left_halo_size_functions[dim]; + int64 padded_full_shape_size = + offset_on_padded_shape.Calculate(shard_counts[i] - 1) + + new_window.dimensions(i).size(); + auto concat = ExchangeHaloAndGetValidData( + rhs_with_halo, rhs.base_shape(), left_halo_size_functions[dim], + right_halo_size_functions[dim], explicit_left_padding_on_full_shape, + padded_full_shape_size, shard_size_with_halo, dim, rhs.sharding(), + offset_on_padded_shape.Calculate(partition_ordinals[dim], &b_), + zero, partition_ordinals[dim], collective_ops_creator_, + next_channel_id_, &b_, /*mask_invalid_region=*/false); + if (!concat) { + return DefaultAction(hlo); + } + rhs_with_halo = *concat; + } + + SetPartitionedHlo(hlo, [&]() { + auto conv = b_.AddInstruction(HloInstruction::CreateConvolve( + hlo->shape(), conv_lhs, rhs_with_halo, hlo->feature_group_count(), + hlo->batch_group_count(), new_window, dnums, + hlo->precision_config())); + auto ar = collective_ops_creator_.create_cross_partition_all_reduce( + &b_, conv, MakeBinaryAdd(hlo->shape().element_type(), module_), + NewChannel()); + ar->set_sharding(HloSharding::Replicate()); + return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()) + .Reshard(hlo->sharding()) + .hlo(); + }); + return Status::OK(); + } + } + + if (!sharding.IsTileMaximal()) { + // We don't currently support sharding on output feature dimension. + if (sharding.tile_assignment().dim(dnums.output_feature_dimension()) > 1) { + return DefaultAction(hlo); + } + + // Check if the operand and the output sharding are aligned. + std::vector input_to_output_indices(hlo->shape().rank()); + input_to_output_indices[dnums.input_batch_dimension()] = + dnums.output_batch_dimension(); + input_to_output_indices[dnums.input_feature_dimension()] = + dnums.output_feature_dimension(); + for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { + input_to_output_indices[dnums.input_spatial_dimensions(i)] = + dnums.output_spatial_dimensions(i); + } + auto target_operand_sharding = + hlo_sharding_util::TransposeSharding(sharding, input_to_output_indices); + lhs = lhs.Reshard(target_operand_sharding); + + // Replicate the RHS. + rhs = rhs.Reshard(HloSharding::Replicate()); + + // Convolution window config does not include batch and feature dimensions, + // whereas ReshardAsWindowedInput() expects the same number of window + // dimensions as the rank of the operand. So add two more trivial + // dimensions. + std::vector ones(hlo->shape().rank(), 1); + auto operand_window = window_util::MakeWindow(ones); + for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { + *operand_window.mutable_dimensions(dnums.input_spatial_dimensions(i)) = + hlo->window().dimensions(i); + } + + auto zero = b_.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(hlo->shape().element_type()))); + auto resharded_operand_and_window = lhs.ReshardAsWindowedInput( + operand_window, target_operand_sharding, zero); + if (!resharded_operand_and_window.has_value()) { + return DefaultAction(hlo); + } + Window new_window; + for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { + *new_window.add_dimensions() = + resharded_operand_and_window->shard_window.dimensions( + dnums.input_spatial_dimensions(i)); + } + TF_ASSIGN_OR_RETURN( + Shape sharded_conv_shape, + ShapeInference::InferConvolveShape( + resharded_operand_and_window->sharded_input->shape(), + rhs.hlo()->shape(), hlo->feature_group_count(), + hlo->batch_group_count(), new_window, dnums)); + auto shard_shape = MakePartitionedShape(hlo->shape(), hlo->sharding()); + *sharded_conv_shape.mutable_layout() = shard_shape.layout(); + SetPartitionedHlo(hlo, [&]() { + auto sharded_conv = b_.AddInstruction(HloInstruction::CreateConvolve( + sharded_conv_shape, resharded_operand_and_window->sharded_input, + rhs.hlo(), hlo->feature_group_count(), hlo->batch_group_count(), + new_window, dnums, hlo->precision_config())); + if (!resharded_operand_and_window->dynamic_slice_index_on_output + .has_value()) { + CHECK(ShapeUtil::Compatible(shard_shape, sharded_conv->shape())); + return sharded_conv; + } + return b_.AddInstruction(HloInstruction::CreateDynamicSlice( + shard_shape, sharded_conv, + *resharded_operand_and_window->dynamic_slice_index_on_output, + shard_shape.dimensions())); + }); + return Status::OK(); + } + return DefaultAction(hlo); +} + +} // namespace spmd +} // namespace xla diff --git a/tensorflow/compiler/xla/service/spmd/dot_handler.cc b/tensorflow/compiler/xla/service/spmd/dot_handler.cc new file mode 100644 index 00000000000..9ecf21f5841 --- /dev/null +++ b/tensorflow/compiler/xla/service/spmd/dot_handler.cc @@ -0,0 +1,1211 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "absl/algorithm/container.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_instructions.h" +#include "tensorflow/compiler/xla/service/hlo_sharding.h" +#include "tensorflow/compiler/xla/service/hlo_sharding_util.h" +#include "tensorflow/compiler/xla/service/shape_inference.h" +#include "tensorflow/compiler/xla/service/spmd/spmd_partitioner.h" +#include "tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/numbers.h" + +namespace xla { +namespace spmd { + +Status SpmdPartitioningVisitor::HandleDot(HloInstruction* hlo) { + DotGeneralDimsMapping mapping; + const auto& dnums = hlo->dot_dimension_numbers(); + int64 next_output_dim = 0; + for (int64 i = 0; i < dnums.lhs_batch_dimensions_size(); ++i) { + mapping.batch_dims.emplace_back(); + mapping.batch_dims.back().lhs = dnums.lhs_batch_dimensions(i); + mapping.batch_dims.back().rhs = dnums.rhs_batch_dimensions(i); + mapping.batch_dims.back().output = next_output_dim++; + } + for (int64 i = 0; i < dnums.lhs_contracting_dimensions_size(); ++i) { + mapping.contracting_dims.emplace_back(); + mapping.contracting_dims.back().lhs = dnums.lhs_contracting_dimensions(i); + mapping.contracting_dims.back().rhs = dnums.rhs_contracting_dimensions(i); + mapping.contracting_dims.back().output = -1; + } + for (int64 i = 0; i < hlo->operand(0)->shape().rank(); ++i) { + if (absl::c_linear_search(dnums.lhs_batch_dimensions(), i) || + absl::c_linear_search(dnums.lhs_contracting_dimensions(), i)) { + continue; + } + mapping.lhs_non_contracting_dims.emplace_back(); + mapping.lhs_non_contracting_dims.back().lhs = i; + mapping.lhs_non_contracting_dims.back().rhs = -1; + mapping.lhs_non_contracting_dims.back().output = next_output_dim++; + } + for (int64 i = 0; i < hlo->operand(1)->shape().rank(); ++i) { + if (absl::c_linear_search(dnums.rhs_batch_dimensions(), i) || + absl::c_linear_search(dnums.rhs_contracting_dimensions(), i)) { + continue; + } + mapping.rhs_non_contracting_dims.emplace_back(); + mapping.rhs_non_contracting_dims.back().lhs = -1; + mapping.rhs_non_contracting_dims.back().rhs = i; + mapping.rhs_non_contracting_dims.back().output = next_output_dim++; + } + auto create_sharded_dot = [&](HloInstruction* l, HloInstruction* r, + SpmdBuilder* b) -> StatusOr { + TF_ASSIGN_OR_RETURN( + auto sharded_dot_shape, + ShapeInference::InferDotOpShape(l->shape(), r->shape(), + hlo->dot_dimension_numbers())); + return b->AddInstruction(HloInstruction::CreateDot( + sharded_dot_shape, l, r, hlo->dot_dimension_numbers(), + hlo->precision_config())); + }; + return HandleDotHelper(hlo, mapping, create_sharded_dot); +} + +Status SpmdPartitioningVisitor::HandleDotHelper( + HloInstruction* hlo, const DotGeneralDimsMapping& dims_mapping, + const std::function( + HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot) { + const HloSharding& lhs_sharding = hlo->operand(0)->sharding(); + const HloSharding& rhs_sharding = hlo->operand(1)->sharding(); + + // Similar to hlo_sharding_util::TransposeSharding(), but allows + // removing/adding non-partitioned dimensions. + auto transpose_sharding = + [&](const HloSharding& source, absl::Span src_to_tgt, + absl::Span tgt_to_src) -> absl::optional { + if (source.IsTileMaximal()) { + return source; + } + std::vector tgt_dims_skipping_new(tgt_to_src.size(), -1); + int64 skipped_tgt_dims = 0; + for (int64 i = 0; i < tgt_to_src.size(); ++i) { + if (tgt_to_src[i] < 0) { + skipped_tgt_dims++; + } else { + tgt_dims_skipping_new[i] = i - skipped_tgt_dims; + } + } + int64 skipped_src_dims = absl::c_count(src_to_tgt, -1); + std::vector perm(src_to_tgt.size()); + for (int64 i = 0; i < src_to_tgt.size(); ++i) { + if (src_to_tgt[i] < 0) { + if (source.tile_assignment().dim(i) > 1) { + return absl::nullopt; + } + perm[src_to_tgt.size() - skipped_src_dims] = i; + skipped_src_dims--; + } else { + perm[tgt_dims_skipping_new[src_to_tgt[i]]] = i; + } + } + auto tgt_sharding = hlo_sharding_util::TransposeSharding(source, perm); + if (skipped_tgt_dims == 0) { + return tgt_sharding; + } + auto reshape_tiles = tgt_sharding.tile_assignment(); + std::vector tgt_tiles(tgt_to_src.size(), 1); + for (int64 i = 0; i < tgt_tiles.size(); ++i) { + if (tgt_to_src[i] >= 0) { + tgt_tiles[i] = reshape_tiles.dim(tgt_dims_skipping_new[i]); + } + } + reshape_tiles.Reshape(tgt_tiles); + return HloSharding::Tile(reshape_tiles); + }; + + std::vector lhs_to_rhs_indices(hlo->operand(0)->shape().rank(), -1); + std::vector lhs_to_output_indices(hlo->operand(0)->shape().rank(), -1); + std::vector rhs_to_lhs_indices(hlo->operand(1)->shape().rank(), -1); + std::vector rhs_to_output_indices(hlo->operand(1)->shape().rank(), -1); + std::vector output_to_lhs_indices(hlo->shape().rank(), -1); + std::vector output_to_rhs_indices(hlo->shape().rank(), -1); + auto populate_indices_mapping = + [&](const DotGeneralDimsMapping::DimsMapping& mapping) { + if (mapping.lhs >= 0) { + lhs_to_rhs_indices[mapping.lhs] = mapping.rhs; + lhs_to_output_indices[mapping.lhs] = mapping.output; + } + if (mapping.rhs >= 0) { + rhs_to_lhs_indices[mapping.rhs] = mapping.lhs; + rhs_to_output_indices[mapping.rhs] = mapping.output; + } + if (mapping.output >= 0) { + output_to_lhs_indices[mapping.output] = mapping.lhs; + output_to_rhs_indices[mapping.output] = mapping.rhs; + } + }; + for (const auto& mapping : dims_mapping.batch_dims) { + populate_indices_mapping(mapping); + } + for (const auto& mapping : dims_mapping.contracting_dims) { + populate_indices_mapping(mapping); + } + for (const auto& mapping : dims_mapping.lhs_non_contracting_dims) { + populate_indices_mapping(mapping); + } + for (const auto& mapping : dims_mapping.rhs_non_contracting_dims) { + populate_indices_mapping(mapping); + } + auto lhs_sharding_transposed_to_match_rhs = + transpose_sharding(lhs_sharding, lhs_to_rhs_indices, rhs_to_lhs_indices); + auto rhs_sharding_transposed_to_match_lhs = + transpose_sharding(rhs_sharding, rhs_to_lhs_indices, lhs_to_rhs_indices); + auto lhs_sharding_transposed_to_match_output = transpose_sharding( + lhs_sharding, lhs_to_output_indices, output_to_lhs_indices); + auto rhs_sharding_transposed_to_match_output = transpose_sharding( + rhs_sharding, rhs_to_output_indices, output_to_rhs_indices); + auto output_sharding_transposed_to_match_lhs = transpose_sharding( + hlo->sharding(), output_to_lhs_indices, lhs_to_output_indices); + auto output_sharding_transposed_to_match_rhs = transpose_sharding( + hlo->sharding(), output_to_rhs_indices, rhs_to_output_indices); + + // lhs_rhs_or_output: 0 lhs, 1 rhs, 2 output. + auto get_partitions_for_dims = + [&](const HloSharding& sharding, + absl::Span dims, + int lhs_rhs_or_output) { + int64 partitions = 1; + if (sharding.IsTileMaximal()) { + return partitions; + } + for (const auto& dim : dims) { + if (lhs_rhs_or_output == 0) { + partitions *= sharding.tile_assignment().dim(dim.lhs); + } else if (lhs_rhs_or_output == 1) { + partitions *= sharding.tile_assignment().dim(dim.rhs); + } else { + CHECK_EQ(lhs_rhs_or_output, 2); + partitions *= sharding.tile_assignment().dim(dim.output); + } + } + return partitions; + }; + const int64 lhs_batch_partitions = + get_partitions_for_dims(lhs_sharding, dims_mapping.batch_dims, 0); + const int64 rhs_batch_partitions = + get_partitions_for_dims(rhs_sharding, dims_mapping.batch_dims, 1); + const int64 output_batch_partitions = + get_partitions_for_dims(hlo->sharding(), dims_mapping.batch_dims, 2); + const int64 lhs_contracting_partitions = + get_partitions_for_dims(lhs_sharding, dims_mapping.contracting_dims, 0); + const int64 rhs_contracting_partitions = + get_partitions_for_dims(rhs_sharding, dims_mapping.contracting_dims, 1); + const int64 lhs_non_contracting_partitions = get_partitions_for_dims( + lhs_sharding, dims_mapping.lhs_non_contracting_dims, 0); + const int64 rhs_non_contracting_partitions = get_partitions_for_dims( + rhs_sharding, dims_mapping.rhs_non_contracting_dims, 1); + const int64 output_lhs_non_contracting_partitions = get_partitions_for_dims( + hlo->sharding(), dims_mapping.lhs_non_contracting_dims, 2); + const int64 output_rhs_non_contracting_partitions = get_partitions_for_dims( + hlo->sharding(), dims_mapping.rhs_non_contracting_dims, 2); + + auto& lhs = GetPartitionedHlo(hlo->operand(0)); + auto& rhs = GetPartitionedHlo(hlo->operand(1)); + // LHS and RHS are partitioned the same way and only partitioned in batch + // dimensions. + if (lhs_batch_partitions == rhs_batch_partitions && + rhs_batch_partitions == num_partitions_ && + lhs_sharding_transposed_to_match_rhs == rhs_sharding) { + TF_ASSIGN_OR_RETURN(auto dot, + create_sharded_dot(lhs.hlo(), rhs.hlo(), &b_)); + SetPartitionedHlo(hlo, [&] { + dot->set_sharding(*lhs_sharding_transposed_to_match_output); + return PartitionedHlo(dot, hlo->shape(), MakePartitioningState()) + .Reshard(hlo->sharding()) + .hlo(); + }); + return Status::OK(); + } + + // Try emit batch-partitioned einsum with one operand resharded. Returns + // whether the attempt succeeds. If may_reshard_with_allreduce is false, + // reshard must be done using all-to-all; otherwise this attempt fails. + auto try_emit_output_batch_partitioned_einsum_with_reshard = + [&](bool may_reshard_with_allreduce) -> StatusOr { + // LHS and output are batch partitioned in the same way. + if (lhs_batch_partitions == num_partitions_ && + output_batch_partitions == num_partitions_ && + lhs_sharding_transposed_to_match_output == hlo->sharding()) { + if (!may_reshard_with_allreduce && + !GetReshardAllToAllSourceTargetDims( + rhs.sharding(), *lhs_sharding_transposed_to_match_rhs)) { + return false; + } + auto resharded_rhs = rhs.Reshard(*lhs_sharding_transposed_to_match_rhs); + TF_ASSIGN_OR_RETURN( + auto dot, create_sharded_dot(lhs.hlo(), resharded_rhs.hlo(), &b_)); + SetPartitionedHlo(hlo, [&] { return dot; }); + return true; + } + // RHS and output are batch partitioned in the same way. + if (rhs_batch_partitions == num_partitions_ && + output_batch_partitions == num_partitions_ && + rhs_sharding_transposed_to_match_output == hlo->sharding()) { + if (!may_reshard_with_allreduce && + !GetReshardAllToAllSourceTargetDims( + lhs.sharding(), *rhs_sharding_transposed_to_match_lhs)) { + return false; + } + auto resharded_lhs = lhs.Reshard(*rhs_sharding_transposed_to_match_lhs); + TF_ASSIGN_OR_RETURN( + auto dot, create_sharded_dot(resharded_lhs.hlo(), rhs.hlo(), &b_)); + SetPartitionedHlo(hlo, [&] { return dot; }); + return true; + } + return false; + }; + + { + // Try batch-parallel by resharding one operand, and not using all-reduce. + TF_ASSIGN_OR_RETURN( + bool emitted, + try_emit_output_batch_partitioned_einsum_with_reshard(false)); + if (emitted) { + return Status::OK(); + } + } + + // Try to emit windowed DotGeneral when one operand is partitioned in the same + // way as the output along non-contracting dimensions, but the other operand + // is tiled in other dimensions. + auto emit_windowed_dot_general = [&](int64 matching_operand, + int64 windowing_operand, + bool windowed_at_contracting_dims, + bool windowed_at_batch_dims) { + CHECK_EQ(matching_operand + windowing_operand, 1); + CHECK(!windowed_at_batch_dims || !windowed_at_contracting_dims); + auto unpadded_result_buffer_shape = + MakePartitionedShape(hlo->shape(), hlo->sharding()); + auto padded_result_buffer_shape = unpadded_result_buffer_shape; + // For windowing at batch/non-contracting dims, we produce the result one + // partition at a time, so we need to pad the shape in case of uneven + // partitioning in order to make dynamic-update-slice in-bound. + if (!windowed_at_contracting_dims) { + padded_result_buffer_shape = GetPaddedShapeForUnevenPartitioning( + padded_result_buffer_shape, + windowing_operand == 0 ? *lhs_sharding_transposed_to_match_output + : *rhs_sharding_transposed_to_match_output); + } + // Mask the padding area of the windowed operand with zero if there is + // uneven partitioning. + if (windowed_at_contracting_dims) { + auto& to_mask = windowing_operand == 0 ? lhs : rhs; + to_mask = + to_mask.PadWithValue(b_.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(hlo->shape().element_type())))); + } + auto result_buffer = CreateZero(padded_result_buffer_shape, &b_); + auto iteration = b_.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0(0))); + + // Create a while loop that computes one window per iteration. During each + // iteration, each partition sends its input window to its neighbor using + // collective-permute for the next iteration. + SpmdBuilder body_b("windowed_dot_general_body", visiting_hlo_); + auto param = body_b.AddInstruction(HloInstruction::CreateParameter( + /*parameter_number=*/0, + ShapeUtil::MakeTupleShape({lhs.hlo()->shape(), rhs.hlo()->shape(), + result_buffer->shape(), iteration->shape()}), + "param")); + auto l = body_b.AddInstruction( + HloInstruction::CreateGetTupleElement(lhs.hlo()->shape(), param, 0)); + auto r = body_b.AddInstruction( + HloInstruction::CreateGetTupleElement(rhs.hlo()->shape(), param, 1)); + auto o = body_b.AddInstruction(HloInstruction::CreateGetTupleElement( + result_buffer->shape(), param, 2)); + auto i = body_b.AddInstruction( + HloInstruction::CreateGetTupleElement(iteration->shape(), param, 3)); + + auto partition_id = collective_ops_creator_.create_partition_id(&body_b); + auto data_partition_id = body_b.AddInstruction(HloInstruction::CreateBinary( + i->shape(), HloOpcode::kAdd, i, partition_id)); + auto partition_count = body_b.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR0(num_partitions_))); + data_partition_id = body_b.AddInstruction(HloInstruction::CreateBinary( + i->shape(), HloOpcode::kRemainder, data_partition_id, partition_count)); + auto dot_lhs = l; + auto dot_rhs = r; + if (windowed_at_contracting_dims || windowed_at_batch_dims) { + // Slice the matching operand according to the partitioned contracting + // dimensions on the windowed operand. We do this by treating the matching + // operand as replicated, and resharding it to match the windowed operand. + auto slice_operand = matching_operand == 0 ? l : r; + slice_operand->set_sharding(HloSharding::Replicate()); + auto state = MakePartitioningState(); + state.b = &body_b; + state.partition_id = data_partition_id; + auto slice = PartitionedHlo(slice_operand, slice_operand->shape(), state) + .Reshard(windowing_operand == 0 + ? *lhs_sharding_transposed_to_match_rhs + : *rhs_sharding_transposed_to_match_lhs) + .hlo(); + slice_operand->clear_sharding(); + if (matching_operand == 0) { + dot_lhs = slice; + } else { + dot_rhs = slice; + } + } + TF_ASSIGN_OR_RETURN(auto dot, + create_sharded_dot(dot_lhs, dot_rhs, &body_b)); + if (windowed_at_contracting_dims) { + // Accumulate the partial output to the result buffer. + o = body_b.AddInstruction( + HloInstruction::CreateBinary(o->shape(), HloOpcode::kAdd, o, dot)); + } else { + // The windowing operand is partitioned along batch/non-contracting + // dimensions, so we need a dynamic-update-slice to save the partial + // output in the result buffer. + auto offsets = MakePartitionOffsets( + o->shape(), + windowing_operand == 0 ? *lhs_sharding_transposed_to_match_output + : *rhs_sharding_transposed_to_match_output, + data_partition_id, &body_b); + o = body_b.AddInstruction(HloInstruction::CreateDynamicUpdateSlice( + o->shape(), o, dot, offsets)); + } + + // ++i + i = body_b.AddInstruction(HloInstruction::CreateBinary( + i->shape(), HloOpcode::kAdd, i, + body_b.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0(1))))); + auto has_more = body_b.AddInstruction(HloInstruction::CreateCompare( + ShapeUtil::MakeShape(PRED, {}), i, + body_b.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR0(num_partitions_))), + ComparisonDirection::kLt)); + // Collective-permute for the next window. We don't need it for the last + // iteration, so we use a conditional around the collective-permute. + HloInstruction* conditional; + { + SpmdBuilder cp_b("window_collective_permute", visiting_hlo_); + { + auto p = cp_b.AddInstruction(HloInstruction::CreateParameter( + 0, windowing_operand == 0 ? l->shape() : r->shape(), "window")); + std::vector> sd_pairs(num_partitions_); + for (int64 source = 0; source < num_partitions_; ++source) { + // 0 -> n-1, 1 -> 0, 2 -> 1, ... + sd_pairs[source] = {source, + (source - 1 + num_partitions_) % num_partitions_}; + } + collective_ops_creator_.create_cross_partition_collective_permute( + &cp_b, p, sd_pairs, (*next_channel_id_)++); + } + SpmdBuilder ncp_b("last_iteration_noop", visiting_hlo_); + { + ncp_b.AddInstruction(HloInstruction::CreateParameter( + 0, windowing_operand == 0 ? l->shape() : r->shape(), "window")); + } + conditional = body_b.AddInstruction(HloInstruction::CreateConditional( + windowing_operand == 0 ? l->shape() : r->shape(), has_more, + windowing_operand == 0 ? l : r, + module_->AddEmbeddedComputation(cp_b.Build()), + windowing_operand == 0 ? l : r, + module_->AddEmbeddedComputation(ncp_b.Build()))); + } + if (windowing_operand == 0) { + l = conditional; + } else { + r = conditional; + } + body_b.AddInstruction(HloInstruction::CreateTuple({l, r, o, i})); + + SpmdBuilder cond_b("windowed_dot_general_cond", visiting_hlo_); + auto cond_param = cond_b.AddInstruction(HloInstruction::CreateParameter( + /*parameter_number=*/0, + ShapeUtil::MakeTupleShape({lhs.hlo()->shape(), rhs.hlo()->shape(), + result_buffer->shape(), iteration->shape()}), + "param")); + auto cond_i = cond_b.AddInstruction(HloInstruction::CreateGetTupleElement( + iteration->shape(), cond_param, 3)); + cond_b.AddInstruction(HloInstruction::CreateCompare( + ShapeUtil::MakeShape(PRED, {}), cond_i, + cond_b.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR0(num_partitions_))), + ComparisonDirection::kLt)); + auto while_loop = b_.AddInstruction(HloInstruction::CreateWhile( + cond_param->shape(), module_->AddEmbeddedComputation(cond_b.Build()), + module_->AddEmbeddedComputation(body_b.Build()), + b_.AddInstruction(HloInstruction::CreateTuple( + {lhs.hlo(), rhs.hlo(), result_buffer, iteration})))); + windowed_dot_general_loops_.push_back({while_loop, windowing_operand, + windowed_at_contracting_dims, + windowed_at_batch_dims}); + SetPartitionedHlo(hlo, [&] { + auto result = b_.AddInstruction(HloInstruction::CreateGetTupleElement( + result_buffer->shape(), while_loop, 2)); + if (!ShapeUtil::Compatible(padded_result_buffer_shape, + unpadded_result_buffer_shape)) { + result = b_.AddInstruction(HloInstruction::CreateSlice( + unpadded_result_buffer_shape, result, + std::vector(padded_result_buffer_shape.rank(), 0), + unpadded_result_buffer_shape.dimensions(), + std::vector(padded_result_buffer_shape.rank(), 1))); + } + return result; + }); + return Status::OK(); + }; + if (output_lhs_non_contracting_partitions == num_partitions_ && + output_sharding_transposed_to_match_lhs == lhs_sharding && + ShapeSizeInBytes(hlo->operand(1)->shape()) >= + options_.threshold_for_windowed_einsum_mib * 1024 * 1024) { + if (rhs_contracting_partitions == num_partitions_) { + return emit_windowed_dot_general(0, 1, true, false); + } + if (rhs_non_contracting_partitions == num_partitions_) { + return emit_windowed_dot_general(0, 1, false, false); + } + if (rhs_batch_partitions == num_partitions_) { + return emit_windowed_dot_general(0, 1, false, true); + } + } + if (output_rhs_non_contracting_partitions == num_partitions_ && + output_sharding_transposed_to_match_rhs == rhs_sharding && + ShapeSizeInBytes(hlo->operand(0)->shape()) >= + options_.threshold_for_windowed_einsum_mib * 1024 * 1024) { + if (lhs_contracting_partitions == num_partitions_) { + return emit_windowed_dot_general(1, 0, true, false); + } + if (lhs_non_contracting_partitions == num_partitions_) { + return emit_windowed_dot_general(1, 0, false, false); + } + if (lhs_batch_partitions == num_partitions_) { + return emit_windowed_dot_general(1, 0, false, true); + } + } + + { + // Try batch-parallel by resharding one operand, and allowing all-reduce. + TF_ASSIGN_OR_RETURN( + bool emitted, + try_emit_output_batch_partitioned_einsum_with_reshard(true)); + if (emitted) { + return Status::OK(); + } + } + + // LHS and RHS have the same partitioned contracting dimensions. + if (lhs_contracting_partitions == rhs_contracting_partitions && + lhs_contracting_partitions == num_partitions_) { + auto zero = b_.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(hlo->shape().element_type()))); + // Pad both sides with zero, since NaN at one side cannot be masked by zero + // on the other side. + if (ShapeSizeInBytes(lhs.base_shape()) < + ShapeSizeInBytes(rhs.base_shape())) { + lhs = + lhs.Reshard(*rhs_sharding_transposed_to_match_lhs).PadWithValue(zero); + rhs = rhs.PadWithValue(zero); + } else { + lhs = lhs.PadWithValue(zero); + rhs = + rhs.Reshard(*lhs_sharding_transposed_to_match_rhs).PadWithValue(zero); + } + TF_ASSIGN_OR_RETURN(auto dot, + create_sharded_dot(lhs.hlo(), rhs.hlo(), &b_)); + SetPartitionedHlo(hlo, [&] { + auto ar = collective_ops_creator_.create_cross_partition_all_reduce( + &b_, dot, MakeBinaryAdd(hlo->shape().element_type(), module_), + NewChannel()); + ar->set_sharding(HloSharding::Replicate()); + return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()) + .Reshard(hlo->sharding()) + .hlo(); + }); + return Status::OK(); + } + + // LHS and output have the same partitioned non-contracting dimensions. + if (lhs_non_contracting_partitions == num_partitions_ && + output_lhs_non_contracting_partitions == num_partitions_ && + lhs_sharding_transposed_to_match_output == hlo->sharding()) { + auto rhs_replicated = rhs.Reshard(HloSharding::Replicate()).hlo(); + TF_ASSIGN_OR_RETURN(auto dot, + create_sharded_dot(lhs.hlo(), rhs_replicated, &b_)); + SetPartitionedHlo(hlo, [&] { return dot; }); + return Status::OK(); + } + + // RHS and output have the same partitioned non-contracting dimensions. + if (rhs_non_contracting_partitions == num_partitions_ && + output_rhs_non_contracting_partitions == num_partitions_ && + rhs_sharding_transposed_to_match_output == hlo->sharding()) { + auto lhs_replicated = lhs.Reshard(HloSharding::Replicate()).hlo(); + TF_ASSIGN_OR_RETURN(auto dot, + create_sharded_dot(lhs_replicated, rhs.hlo(), &b_)); + SetPartitionedHlo(hlo, [&] { return dot; }); + return Status::OK(); + } + + // Output is batch partitioned. + if (output_batch_partitions == num_partitions_) { + auto resharded_lhs = lhs.Reshard(*output_sharding_transposed_to_match_lhs); + auto resharded_rhs = rhs.Reshard(*output_sharding_transposed_to_match_rhs); + TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(resharded_lhs.hlo(), + resharded_rhs.hlo(), &b_)); + SetPartitionedHlo(hlo, [&] { return dot; }); + return Status::OK(); + } + // Output is partitioned along LHS non-contracting dimensions. + if (output_lhs_non_contracting_partitions == num_partitions_) { + auto resharded_lhs = lhs.Reshard(*output_sharding_transposed_to_match_lhs); + auto replicated_rhs = rhs.Reshard(HloSharding::Replicate()); + TF_ASSIGN_OR_RETURN( + auto dot, + create_sharded_dot(resharded_lhs.hlo(), replicated_rhs.hlo(), &b_)); + SetPartitionedHlo(hlo, [&] { return dot; }); + return Status::OK(); + } + // Output is partitioned along RHS non-contracting dimensions. + if (output_rhs_non_contracting_partitions == num_partitions_) { + auto replicated_lhs = lhs.Reshard(HloSharding::Replicate()); + auto resharded_rhs = rhs.Reshard(*output_sharding_transposed_to_match_rhs); + TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(replicated_lhs.hlo(), + resharded_rhs.hlo(), &b_)); + SetPartitionedHlo(hlo, [&] { return dot; }); + return Status::OK(); + } + + // Returns true if it is beneficial to reshard the operand at `operand_idx` + // across the contracting dimension. + const auto should_partition_contracting_dim = [&](int64 operand_idx) { + if (!hlo->sharding().IsReplicated()) { + return false; + } + + if (operand_idx == 0) { + // If LHS and output are replicated, we compare the cost of all-gather + // on RHS vs all-reduce on the output. + return (rhs_contracting_partitions == num_partitions_) && + lhs.sharding().IsReplicated() && + ShapeUtil::ElementsIn(hlo->operand(1)->shape()) > + ShapeUtil::ElementsIn(hlo->shape()); + } else { + return (lhs_contracting_partitions == num_partitions_) && + rhs.sharding().IsReplicated() && + ShapeUtil::ElementsIn(hlo->operand(0)->shape()) > + ShapeUtil::ElementsIn(hlo->shape()); + } + }; + + // When the output is replicated and one of the operands is partitioned along + // contracting dimension, align the other operand to be partitioned along + // the contracting dimensions. + if (hlo->sharding().IsReplicated() && (should_partition_contracting_dim(0) || + should_partition_contracting_dim(1))) { + auto zero = b_.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(hlo->shape().element_type()))); + if (should_partition_contracting_dim(0)) { + lhs = + lhs.Reshard(*rhs_sharding_transposed_to_match_lhs).PadWithValue(zero); + rhs = rhs.PadWithValue(zero); + } else { + lhs = lhs.PadWithValue(zero); + rhs = + rhs.Reshard(*lhs_sharding_transposed_to_match_rhs).PadWithValue(zero); + } + TF_ASSIGN_OR_RETURN(auto dot, + create_sharded_dot(lhs.hlo(), rhs.hlo(), &b_)); + SetPartitionedHlo(hlo, [&] { + auto ar = collective_ops_creator_.create_cross_partition_all_reduce( + &b_, dot, MakeBinaryAdd(hlo->shape().element_type(), module_), + NewChannel()); + ar->set_sharding(HloSharding::Replicate()); + return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()).hlo(); + }); + return Status::OK(); + } + + return DefaultAction(hlo); +} + +namespace { + +// Finds a cluster of nodes that produce the inputs for `hlo` which only depend +// on small operands, which means the cluster should start with broadcasts, +// constants and iotas. All other internal nodes must be non-side-effecting +// elemntwise ops. Returns the set of nodes, and the small operands. E.g., for +// the following graph, +// +// a -> broadcast -> multiply +// iota ---> add--/ +// constant/ +// +// FindInputNodesIfOnlyDependOnSmallOperands(multiply) will return +// <{broadcast, iota, constant, add, multiply}, [a]>. +std::pair, std::vector> +FindInputNodesIfOnlyDependOnSmallOperands(HloInstruction* hlo) { + std::unordered_set nodes_found; + std::vector new_operands; + std::unordered_set new_operands_set; + std::vector worklist; + worklist.push_back(hlo); + while (!worklist.empty()) { + auto inst = worklist.back(); + worklist.pop_back(); + if (nodes_found.count(inst) > 0) { + continue; + } + if (inst->opcode() == HloOpcode::kBroadcast || + inst->opcode() == HloOpcode::kConstant || + inst->opcode() == HloOpcode::kIota) { + nodes_found.insert(inst); + for (auto o : inst->operands()) { + auto res = new_operands_set.emplace(o); + if (res.second) { + new_operands.push_back(o); + } + } + } else if (inst->IsElementwise() && !inst->HasSideEffectNoRecurse() && + inst->opcode() != HloOpcode::kAllReduce && + absl::c_all_of(inst->operands(), + [inst](const HloInstruction* o) { + return ShapeUtil::CompatibleIgnoringElementType( + o->shape(), inst->shape()); + })) { + nodes_found.insert(inst); + for (auto o : inst->operands()) { + worklist.push_back(o); + } + } else { + nodes_found.clear(); + new_operands.clear(); + break; + } + } + return {std::move(nodes_found), std::move(new_operands)}; +} + +// Moves a cluster of memory-reducing nodes into the windowed dot-general loop +// on contracting dimensions. Such a loop has a dynamic slice on the +// non-windowed operand. If we move the input nodes into the loop, the +// dynamic-slice could be merged with them by later optimization passes, which +// reduces memory. +// +// small_operands small_operands +// | | +// input_nodes loop { | +// | => input_nodes +// loop { | | +// dynamic-slice dynamic-slice +// ... ... +// } } +// +// Later optimization passes (TpuPadSliceMover) will merge the dynamic slice +// with the input nodes. +Status SinkInputNodesIntoWindowedDotGeneralLoopOnContractingDimensions( + HloInstruction* loop, int64 non_windowed_operand_index) { + auto input_tuple = loop->mutable_operand(0); + auto old_operand = input_tuple->mutable_operand(non_windowed_operand_index); + auto input_nodes = FindInputNodesIfOnlyDependOnSmallOperands(old_operand); + auto to_sink = std::move(input_nodes.first); + auto new_operands = std::move(input_nodes.second); + if (to_sink.empty()) { + return Status::OK(); + } + auto computation = loop->parent(); + // Replace the old operand with a tuple of the found small operands. + auto new_input_subtuple = + computation->AddInstruction(HloInstruction::CreateTuple(new_operands)); + TF_RETURN_IF_ERROR(input_tuple->ReplaceOperandWithDifferentShape( + non_windowed_operand_index, new_input_subtuple)); + + auto body = loop->while_body(); + auto body_param = body->parameter_instruction(0); + auto old_body_param_users = body_param->users(); + // Update all tuple shapes. + for (auto tuple : std::vector{ + input_tuple, loop, loop->while_condition()->parameter_instruction(0), + body_param, body->root_instruction()}) { + *ShapeUtil::GetMutableSubshape(tuple->mutable_shape(), + {non_windowed_operand_index}) = + new_input_subtuple->shape(); + } + // Now update the loop body. + auto new_operand_tuple_inside = + body->AddInstruction(HloInstruction::CreateGetTupleElement( + new_input_subtuple->shape(), body_param, non_windowed_operand_index)); + TF_RETURN_IF_ERROR(body->root_instruction()->ReplaceOperandWithDifferentShape( + non_windowed_operand_index, new_operand_tuple_inside)); + + // Create nodes inside the loop body. + std::vector worklist; + std::unordered_map outside_to_inside; + auto add_users_if_available = [&](HloInstruction* inst) { + for (auto u : inst->users()) { + if (outside_to_inside.count(u) == 0 && to_sink.count(u) > 0 && + absl::c_all_of(u->operands(), [&](const HloInstruction* o) { + return outside_to_inside.count(o) > 0; + })) { + worklist.push_back(u); + } + } + }; + for (int64 i = 0; i < new_operands.size(); ++i) { + outside_to_inside[new_operands[i]] = + body->AddInstruction(HloInstruction::CreateGetTupleElement( + new_operands[i]->shape(), new_operand_tuple_inside, i)); + add_users_if_available(new_operands[i]); + } + // HLOs to sink without operands. + std::vector nullaries_to_sink; + for (auto inst : to_sink) { + if (inst->operand_count() == 0) { + nullaries_to_sink.push_back(inst); + } + } + // Sort nullaries_to_sink to make it deterministic. + absl::c_sort(nullaries_to_sink, + [](const HloInstruction* a, const HloInstruction* b) { + return a->unique_id() < b->unique_id(); + }); + for (auto inst : nullaries_to_sink) { + worklist.push_back(inst); + } + while (!worklist.empty()) { + auto inst = worklist.back(); + worklist.pop_back(); + std::vector inst_new_operands(inst->operand_count()); + for (int64 i = 0; i < inst->operand_count(); ++i) { + inst_new_operands[i] = outside_to_inside[inst->operand(i)]; + } + outside_to_inside[inst] = body->AddInstruction( + inst->CloneWithNewOperands(inst->shape(), inst_new_operands)); + add_users_if_available(inst); + } + TF_RET_CHECK(outside_to_inside.count(old_operand) > 0); + for (auto ou : old_body_param_users) { + if (ou->opcode() == HloOpcode::kGetTupleElement && + ou->tuple_index() == non_windowed_operand_index) { + TF_RETURN_IF_ERROR( + ou->ReplaceAllUsesWith(outside_to_inside[old_operand])); + TF_RETURN_IF_ERROR(body->RemoveInstruction(ou)); + } + } + return Status::OK(); +} + +// Moves a cluster of memory-reducing nodes (with reduce nodes at the end) into +// the windowed dot-general loop on non-contracting dimensions. Such a loop has +// a dynamic-update-slice at the output. If we move the user nodes into the loop +// and before the dynamic-update-slice, the user nodes can operate on smaller +// shapes, which reduces memory. +// +// small_operands small_operands +// | | => | | +// | | loop { loop { | | +// | | conv | broadcast conv +// | | | | | / +// | | dynamic-update-slice | dynamic-slice / +// | | | | | / +// | | } | | multiply----- +// |broadcast / | / +// | | / reduce +// |multiply-- | +// \ | dynamic-update-slice +// reduce } +// +// Later optimization passes (TpuPadSliceMover) will merge the dynamic slice +// with the input nodes (broadcast). +Status MoveUsersIntoWindowedDotGeneralLoopOnNonContractingDimensions( + HloInstruction* loop) { + CHECK_EQ(loop->user_count(), 1); + // There should be a single direct user of the while loop, which is the + // gte for element 2, i.e., the dot output. + auto user_gte = loop->users().front(); + CHECK_EQ(user_gte->opcode(), HloOpcode::kGetTupleElement); + CHECK_EQ(user_gte->tuple_index(), 2); + auto computation = loop->parent(); + + // Find the reduce outputs and the input nodes they depend on, if input nodes + // only have small operands. + std::unordered_set to_move; + std::vector new_operands; + std::unordered_set new_operands_set; + std::vector reduce_outputs; + std::vector worklist; + Shape padded_shape = user_gte->shape(); + Shape unpadded_shape = user_gte->shape(); + auto original_output = user_gte; + + if (user_gte->user_count() == 1 && + user_gte->users().back()->opcode() == HloOpcode::kSlice) { + original_output = user_gte->users().back(); + unpadded_shape = original_output->shape(); + } + for (auto u : original_output->users()) { + worklist.push_back(u); + } + to_move.insert(original_output); + while (!worklist.empty()) { + auto inst = worklist.back(); + worklist.pop_back(); + if (to_move.count(inst) > 0) { + continue; + } + // We only support reduces with simple reduction function, since we may need + // to accumulate across iterations manually. + if (inst->opcode() == HloOpcode::kReduce && + inst->to_apply()->instruction_count() == 3 && + inst->to_apply()->num_parameters() == 2 && + inst->to_apply()->root_instruction()->IsElementwise()) { + to_move.insert(inst); + auto other_operand = inst->mutable_operand(1); + auto res = new_operands_set.emplace(other_operand); + if (res.second) { + new_operands.push_back(other_operand); + } + reduce_outputs.push_back(inst); + } else if (inst != computation->root_instruction() && + inst->user_count() > 0 && inst->IsElementwise() && + !inst->HasSideEffectNoRecurse() && + inst->opcode() != HloOpcode::kAllReduce && + absl::c_all_of(inst->operands(), + [inst](const HloInstruction* o) { + return ShapeUtil::CompatibleIgnoringElementType( + o->shape(), inst->shape()); + })) { + // For an elementwise op, we need to make sure that they depend on only + // nodes already in to_move and nodes with small operands. + bool can_include = true; + for (auto operand : inst->operands()) { + if (to_move.count(operand) > 0) { + continue; + } + auto find_result = FindInputNodesIfOnlyDependOnSmallOperands(operand); + if (find_result.first.empty()) { + can_include = false; + break; + } + for (auto n : find_result.first) { + to_move.insert(n); + } + for (auto new_operand : find_result.second) { + auto res = new_operands_set.insert(new_operand); + if (res.second) { + new_operands.push_back(new_operand); + } + } + } + if (!can_include) { + to_move.clear(); + break; + } + to_move.insert(inst); + for (auto u : inst->users()) { + worklist.push_back(u); + } + } else { + to_move.clear(); + break; + } + } + // If nothing is found, to_move could contain only original_output, or cleared + // by the above code. + if (to_move.size() <= 1) { + return Status::OK(); + } + + // We will replace the original loop output with reduce-shape outputs. Create + // the initial buffers before the loop. + for (auto out : reduce_outputs) { + auto padded_out_shape = out->shape(); + int64 operand_dim = 0; + int64 output_dim = 0; + while (output_dim < padded_out_shape.rank()) { + if (absl::c_linear_search(out->dimensions(), operand_dim)) { + // Dimension colapsed. + ++operand_dim; + continue; + } + // Kept dimensions have the same size of the padded shape. + padded_out_shape.set_dimensions(output_dim, + padded_shape.dimensions(operand_dim)); + ++operand_dim; + ++output_dim; + } + auto broadcast = + computation->AddInstruction(HloInstruction::CreateBroadcast( + padded_out_shape, + computation->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(out->shape().element_type()))), + {})); + new_operands.push_back(broadcast); + } + + auto input_tuple = loop->mutable_operand(0); + // Create the new input subtuple that contains the small operands and the + // reduce-shape result buffers. + auto new_input_subtuple = + computation->AddInstruction(HloInstruction::CreateTuple(new_operands)); + TF_RETURN_IF_ERROR( + input_tuple->ReplaceOperandWithDifferentShape(2, new_input_subtuple)); + auto body = loop->while_body(); + auto body_param = body->parameter_instruction(0); + auto body_root = body->root_instruction(); + CHECK_EQ(body_root->opcode(), HloOpcode::kTuple); + // Update tuple shapes. + for (auto tuple : std::vector{ + input_tuple, loop, loop->while_condition()->parameter_instruction(0), + body_param, body_root}) { + *ShapeUtil::GetMutableSubshape(tuple->mutable_shape(), {2}) = + new_input_subtuple->shape(); + } + auto new_loop_input = + body->AddInstruction(HloInstruction::CreateGetTupleElement( + new_input_subtuple->shape(), body_param, 2)); + + // Now create the moved nodes inside the loop body. + std::unordered_map outside_to_inside; + worklist.clear(); + auto add_users_if_available = [&](HloInstruction* inst) { + for (auto u : inst->users()) { + if (outside_to_inside.count(u) == 0 && to_move.count(u) > 0 && + absl::c_all_of(u->operands(), [&](const HloInstruction* o) { + return outside_to_inside.count(o) > 0; + })) { + worklist.push_back(u); + } + } + }; + for (int64 i = 0; i < new_operands.size(); ++i) { + outside_to_inside[new_operands[i]] = + body->AddInstruction(HloInstruction::CreateGetTupleElement( + new_operands[i]->shape(), new_loop_input, i)); + add_users_if_available(new_operands[i]); + } + // The elementwise nodes will be created with sliced shape. The original loop + // output corresponds to the dynamic-update-slice's update slice. + auto dus = body_root->mutable_operand(2); + CHECK_EQ(dus->opcode(), HloOpcode::kDynamicUpdateSlice); + outside_to_inside[original_output] = dus->mutable_operand(1); + add_users_if_available(original_output); + std::vector slice_offsets(padded_shape.rank()); + for (int64 i = 0; i < slice_offsets.size(); ++i) { + slice_offsets[i] = dus->mutable_operand(i + 2); + } + auto get_slice = [&](HloInstruction* padded) { + return body->AddInstruction(HloInstruction::CreateDynamicSlice( + ShapeUtil::ChangeElementType(dus->operand(1)->shape(), + padded->shape().element_type()), + padded, slice_offsets, dus->operand(1)->shape().dimensions())); + }; + // Helper functions to create nodes with small operands. + auto add_broadcast = [&](const HloInstruction* broadcast) { + auto padded_operand_shape = broadcast->operand(0)->shape(); + for (int64 i = 0; i < broadcast->dimensions().size(); ++i) { + padded_operand_shape.set_dimensions( + i, padded_shape.dimensions(broadcast->dimensions(i))); + } + auto padded_operand = PadToShape(outside_to_inside[broadcast->operand(0)], + padded_operand_shape, nullptr, body); + outside_to_inside[broadcast] = + get_slice(body->AddInstruction(broadcast->CloneWithNewOperands( + ShapeUtil::ChangeElementType(padded_shape, + padded_operand_shape.element_type()), + {padded_operand}))); + }; + auto add_iota = [&](const HloInstruction* iota) { + outside_to_inside[iota] = + get_slice(body->AddInstruction(iota->CloneWithNewOperands( + ShapeUtil::ChangeElementType(padded_shape, + iota->shape().element_type()), + {}))); + }; + auto add_constant = [&](const HloInstruction* constant) { + outside_to_inside[constant] = body->AddInstruction(constant->Clone()); + outside_to_inside[constant] = get_slice( + PadToShape(outside_to_inside[constant], + ShapeUtil::ChangeElementType( + padded_shape, constant->shape().element_type()), + nullptr, body)); + }; + while (!worklist.empty()) { + auto inst = worklist.back(); + worklist.pop_back(); + if (outside_to_inside.count(inst) > 0) { + continue; + } + if (inst->opcode() == HloOpcode::kBroadcast) { + add_broadcast(inst); + } else if (inst->opcode() == HloOpcode::kIota) { + add_iota(inst); + } else if (inst->opcode() == HloOpcode::kConstant) { + add_constant(inst); + } else if (inst->opcode() == HloOpcode::kReduce) { + // This is an output, for which we has special handling later. + } else { + std::vector operands_inside(inst->operand_count()); + for (int64 i = 0; i < operands_inside.size(); ++i) { + operands_inside[i] = outside_to_inside[inst->operand(i)]; + } + outside_to_inside[inst] = body->AddInstruction(inst->CloneWithNewOperands( + ShapeUtil::ChangeElementType(dus->operand(1)->shape(), + inst->shape().element_type()), + operands_inside)); + } + add_users_if_available(inst); + } + std::vector new_outputs_inside(new_operands.size()); + for (int64 i = 0; i < new_outputs_inside.size(); ++i) { + new_outputs_inside[i] = outside_to_inside[new_operands[i]]; + } + // Now create the reduce outpus inside of the loop. + for (int64 i = 0; i < reduce_outputs.size(); ++i) { + auto reduce_outside = reduce_outputs[i]; + CHECK_EQ(reduce_outside->opcode(), HloOpcode::kReduce); + int64 index_in_operand = new_operands.size() - reduce_outputs.size() + i; + auto last_iter_result = outside_to_inside[new_operands[index_in_operand]]; + auto operand0 = outside_to_inside[reduce_outside->operand(0)]; + auto operand1 = outside_to_inside[reduce_outside->operand(1)]; + TF_ASSIGN_OR_RETURN(auto reduce_shape, + ShapeInference::InferReduceShape( + {&operand0->shape(), &operand1->shape()}, + reduce_outside->dimensions(), + reduce_outside->to_apply()->ComputeProgramShape())); + *reduce_shape.mutable_layout() = reduce_outside->shape().layout(); + std::vector reduce_dus_offsets; + // If any collapsed dimension is windowed, we need to accumulate with last + // iteration's result. If such a dimension has padding, we also need to mask + // off invalid data. + bool needs_accumulate = false; + std::vector dims_to_mask; + for (int64 i = 0; i < slice_offsets.size(); ++i) { + if (absl::c_linear_search(reduce_outside->dimensions(), i)) { + if (reduce_outside->operand(0)->shape().dimensions(i) != + operand0->shape().dimensions(i)) { + needs_accumulate = true; + if (unpadded_shape.dimensions(i) != padded_shape.dimensions(i)) { + dims_to_mask.push_back(i); + } + } + continue; + } + reduce_dus_offsets.push_back(slice_offsets[i]); + } + // Mask off invalid data in collapsed dimensions. + for (int64 dim : dims_to_mask) { + auto iota = body->AddInstruction(HloInstruction::CreateIota( + ShapeUtil::ChangeElementType(operand0->shape(), S32), dim)); + auto add = body->AddInstruction(HloInstruction::CreateBinary( + iota->shape(), HloOpcode::kAdd, iota, + body->AddInstruction(HloInstruction::CreateBroadcast( + iota->shape(), slice_offsets[dim], {})))); + auto limit = body->AddInstruction(HloInstruction::CreateBroadcast( + iota->shape(), + body->AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0( + reduce_outside->operand(0)->shape().dimensions(dim)))), + {})); + auto compare = body->AddInstruction(HloInstruction::CreateCompare( + ShapeUtil::ChangeElementType(iota->shape(), PRED), add, limit, + ComparisonDirection::kLt)); + operand0 = body->AddInstruction(HloInstruction::CreateTernary( + operand0->shape(), HloOpcode::kSelect, compare, operand0, + body->AddInstruction(HloInstruction::CreateBroadcast( + operand0->shape(), operand1, {})))); + } + auto output_inside = + body->AddInstruction(reduce_outside->CloneWithNewOperands( + reduce_shape, {operand0, operand1})); + // Accumulate with previous results if needed. + if (needs_accumulate) { + auto input_slice = + body->AddInstruction(HloInstruction::CreateDynamicSlice( + output_inside->shape(), last_iter_result, reduce_dus_offsets, + output_inside->shape().dimensions())); + output_inside = body->AddInstruction(HloInstruction::CreateBinary( + output_inside->shape(), + reduce_outside->to_apply()->root_instruction()->opcode(), + output_inside, input_slice)); + } + // Dynamic-update-slice if needed. + if (!ShapeUtil::Compatible(output_inside->shape(), + last_iter_result->shape())) { + output_inside = + body->AddInstruction(HloInstruction::CreateDynamicUpdateSlice( + last_iter_result->shape(), last_iter_result, output_inside, + reduce_dus_offsets)); + } + new_outputs_inside[index_in_operand] = output_inside; + } + // Body output. + auto new_output_inside = + body->AddInstruction(HloInstruction::CreateTuple(new_outputs_inside)); + TF_RETURN_IF_ERROR( + body_root->ReplaceOperandWithDifferentShape(2, new_output_inside)); + TF_RETURN_IF_ERROR(body->RemoveInstructionAndUnusedOperands(dus)); + // Replace uses of the reduces outside the loop. + auto new_output_gte = + computation->AddInstruction(HloInstruction::CreateGetTupleElement( + new_output_inside->shape(), loop, 2)); + for (int64 i = 0; i < reduce_outputs.size(); ++i) { + int64 index_in_operand = new_operands.size() - reduce_outputs.size() + i; + auto new_output = + computation->AddInstruction(HloInstruction::CreateGetTupleElement( + new_outputs_inside[index_in_operand]->shape(), new_output_gte, + index_in_operand)); + if (!ShapeUtil::Compatible(new_output->shape(), + reduce_outputs[i]->shape())) { + new_output = computation->AddInstruction(HloInstruction::CreateSlice( + reduce_outputs[i]->shape(), new_output, + std::vector(new_output->shape().rank(), 0), + reduce_outputs[i]->shape().dimensions(), + std::vector(new_output->shape().rank(), 1))); + } + TF_RETURN_IF_ERROR(reduce_outputs[i]->ReplaceAllUsesWith(new_output)); + TF_RETURN_IF_ERROR( + computation->RemoveInstructionAndUnusedOperands(reduce_outputs[i])); + } + return Status::OK(); +} + +} // namespace + +Status SpmdPartitioningVisitor::DoCodeMotionForWindowedDotGeneralLoops( + HloComputation* computation) { + for (auto& loop : windowed_dot_general_loops_) { + if (loop.windowed_in_contracting_dims || loop.windowed_in_batch_dims) { + // We have a dynamic-slice for the non-windowed operand in + // batch/contracting-dim windowed dot-general. So moving the + // broadcast/iota/elementwise ops into the loop could help reduce memory + // via fusion. + TF_RETURN_IF_ERROR( + SinkInputNodesIntoWindowedDotGeneralLoopOnContractingDimensions( + loop.while_loop, 1 - loop.windowed_operand)); + } + if (!loop.windowed_in_contracting_dims) { + // We have a dynamic-update-slice for the output in + // batch/non-contracting-dim windowed dot-general. So moving reduce ops + // into the loop could help reduce memory. + TF_RETURN_IF_ERROR( + MoveUsersIntoWindowedDotGeneralLoopOnNonContractingDimensions( + loop.while_loop)); + } + } + return Status::OK(); +} + +} // namespace spmd +} // namespace xla diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc index 76014c83340..ceead32f530 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc @@ -31,7 +31,6 @@ limitations under the License. #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/protobuf_util.h" #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" -#include "tensorflow/compiler/xla/service/dot_as_convolution_util.h" #include "tensorflow/compiler/xla/service/flatten_call_graph.h" #include "tensorflow/compiler/xla/service/hlo_casting_utils.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" @@ -176,47 +175,6 @@ std::vector CreateReplicaGroups(int64 num_replicas) { return groups; } -absl::optional> GetReshardAllToAllSourceTargetDims( - const HloSharding& source, const HloSharding& target) { - if (source.IsTileMaximal() || target.IsTileMaximal() || - source.tile_assignment().num_dimensions() != - target.tile_assignment().num_dimensions()) { - return absl::nullopt; - } - int64 source_dim = -1; - int64 target_dim = -1; - for (int64 i = 0; i < source.tile_assignment().num_dimensions(); ++i) { - if (source.tile_assignment().dim(i) > 1 && - target.tile_assignment().dim(i) == 1) { - if (source_dim != -1) { - return absl::nullopt; - } - source_dim = i; - } else if (source.tile_assignment().dim(i) == 1 && - target.tile_assignment().dim(i) > 1) { - if (target_dim != -1) { - return absl::nullopt; - } - target_dim = i; - } else if (source.tile_assignment().dim(i) != - target.tile_assignment().dim(i)) { - return absl::nullopt; - } - } - if (source_dim == -1 || target_dim == -1 || source_dim == target_dim) { - return absl::nullopt; - } - return std::pair(source_dim, target_dim); -} - -bool CanReshardWithCollectivePermute(const HloSharding& source, - const HloSharding& target) { - return !source.IsTileMaximal() && !target.IsTileMaximal() && - source.tile_assignment().dimensions() == - target.tile_assignment().dimensions() && - source.tile_assignment() != target.tile_assignment(); -} - // Clears all sharding attributes from instructions in the module. This must be // called only after all SPMD transformation is complete. Status ClearShardingAttributes(HloModule* module) { @@ -2883,1840 +2841,6 @@ Status SpmdPartitioningVisitor::HandleTuple(HloInstruction* hlo) { return Status::OK(); } -Status SpmdPartitioningVisitor::HandleConvolutionTiledLhsAndRhs( - HloInstruction* hlo) { - TF_RET_CHECK(hlo->opcode() == HloOpcode::kConvolution); - - auto lhs = GetPartitionedHlo(hlo->operand(0)); - auto rhs = GetPartitionedHlo(hlo->operand(1)); - TF_RET_CHECK(!lhs.sharding().IsTileMaximal() && - !rhs.sharding().IsTileMaximal()); - - const auto& dnums = hlo->convolution_dimension_numbers(); - - // Check if the operand shardings are aligned. Also we currently don't - // support partitioning non-spatial dimensions. - std::vector rhs_to_lhs_indices(hlo->shape().rank()); - rhs_to_lhs_indices[dnums.kernel_output_feature_dimension()] = - dnums.input_batch_dimension(); - rhs_to_lhs_indices[dnums.kernel_input_feature_dimension()] = - dnums.input_feature_dimension(); - for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { - rhs_to_lhs_indices[dnums.kernel_spatial_dimensions(i)] = - dnums.input_spatial_dimensions(i); - } - std::vector lhs_to_rhs_indices(hlo->shape().rank()); - for (int64 i = 0; i < rhs_to_lhs_indices.size(); ++i) { - lhs_to_rhs_indices[rhs_to_lhs_indices[i]] = i; - } - - Window window = hlo->window(); - std::vector reversed_rhs_dims; - for (int64 i = 0; i < window.dimensions_size(); ++i) { - if (window.dimensions(i).window_reversal()) { - reversed_rhs_dims.push_back(dnums.kernel_spatial_dimensions(i)); - } - } - if (!reversed_rhs_dims.empty()) { - // Make the reversed dims left-padded to prepare for window reversal. - auto left_padded_rhs = HaloExchangeToPadOnLeft(rhs, reversed_rhs_dims); - if (left_padded_rhs == nullptr) { - return DefaultAction(hlo); - } - left_padded_rhs->set_sharding(rhs.sharding()); - rhs = PartitionedHlo(left_padded_rhs, rhs.base_shape(), rhs.state()); - } - // Consider window reversal when resharding RHS or LHS. Note: this will not - // reverse the data in the shard. We use window reversal to do that. - auto aligned_rhs_sharding = hlo_sharding_util::ReverseSharding( - hlo_sharding_util::TransposeSharding(lhs.sharding(), rhs_to_lhs_indices), - reversed_rhs_dims); - auto aligned_lhs_sharding = hlo_sharding_util::TransposeSharding( - hlo_sharding_util::ReverseSharding(rhs.sharding(), reversed_rhs_dims), - lhs_to_rhs_indices); - - auto unsupported_sharding = [&](const HloSharding& lhs_sharding, - const HloSharding& rhs_sharding) { - return lhs_sharding.tile_assignment().dim(dnums.input_batch_dimension()) != - 1 || - rhs_sharding.tile_assignment().dim( - dnums.kernel_output_feature_dimension()) != 1; - }; - - auto zero = b_.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(hlo->shape().element_type()))); - if (ShapeSizeInBytes(lhs.base_shape()) < ShapeSizeInBytes(rhs.base_shape())) { - if (unsupported_sharding(aligned_lhs_sharding, rhs.sharding())) { - return DefaultAction(hlo); - } - lhs = lhs.Reshard(aligned_lhs_sharding).PadWithValue(zero); - rhs = rhs.PadWithValue(zero, reversed_rhs_dims); - } else { - if (unsupported_sharding(lhs.sharding(), aligned_rhs_sharding)) { - return DefaultAction(hlo); - } - lhs = lhs.PadWithValue(zero); - rhs = - rhs.Reshard(aligned_rhs_sharding).PadWithValue(zero, reversed_rhs_dims); - } - - // Reshard LHS by exchanging halo such that each shard computes the partial - // sum of the full shape result, and add AllReduce. - // - // The size of halo on each dimension can be calculated from the projection - // onto the LHS that each RHS shard i needs to read. RHS and LHS below refers - // to the shard size of RHS and LHS, WC is the number of windows, and D is the - // window dilation. - // - // * offset(i): RHS * D * i - low_padding - // * limit(i): {(RHS - 1) * D + 1} * (i + 1) + (WC - 1) * stride - low_padding - // - // Since shard i has LHS of range [i * LHS, (i + 1) * LHS) - // * left-halo: i * LHS - offset(i) - // = (LHS - RHS) * i + low_padding - // * right-halo: limit(i) - (i + 1) * LHS - // = [{(RHS - 1) * D + 1} - LHS] * (i + 1) + (WC - 1) * stride - low_padding - std::vector shard_counts(dnums.input_spatial_dimensions_size()); - std::vector lhs_shard_sizes(dnums.input_spatial_dimensions_size()); - std::vector rhs_shard_sizes(dnums.input_spatial_dimensions_size()); - for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { - int64 lhs_dimension = dnums.input_spatial_dimensions(i); - int64 rhs_dimension = dnums.kernel_spatial_dimensions(i); - int64 shard_count = lhs.sharding().tile_assignment().dim(lhs_dimension); - auto wd = window.dimensions(i); - if (wd.base_dilation() != 1) { - return DefaultAction(hlo); - } - - int64 lhs_shard_size = - CeilOfRatio(lhs.base_shape().dimensions(lhs_dimension), shard_count); - int64 rhs_shard_size = - CeilOfRatio(rhs.base_shape().dimensions(rhs_dimension), shard_count); - shard_counts[i] = shard_count; - lhs_shard_sizes[i] = lhs_shard_size; - rhs_shard_sizes[i] = rhs_shard_size; - } - - std::vector left_halo_size_functions(hlo->shape().rank()); - std::vector right_halo_size_functions(hlo->shape().rank()); - Window new_window = window; - - auto partition_ordinals = - MakeTiledPartitionOrdinals(lhs.sharding(), partition_id_, &b_); - HloInstruction* lhs_with_halo = lhs.hlo(); - for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { - int64 lhs_dimension = dnums.input_spatial_dimensions(i); - int64 lhs_shard_size = lhs_shard_sizes[i]; - int64 rhs_shard_size = rhs_shard_sizes[i]; - - if (shard_counts[i] == 1) { - continue; - } - - // Calculate the left and right halo sizes as described in the comments - // above. - auto wd = window.dimensions(i); - int64 padding_low = wd.padding_low(); - int64 padding_high = wd.padding_high(); - int64 base = lhs.base_shape().dimensions(lhs_dimension); - int64 window_count = 1 + (padding_low + padding_high + base - - (1 + (wd.size() - 1) * wd.window_dilation())) / - wd.stride(); - int64 rhs_shard_size_dilated = - (rhs_shard_size - 1) * wd.window_dilation() + 1; - - left_halo_size_functions[lhs_dimension] = - OffsetCalculation(MultiplyAddDivideOffsetCalculation( - lhs_shard_size - rhs_shard_size * wd.window_dilation(), padding_low, - 1)); - right_halo_size_functions[lhs_dimension] = - OffsetCalculation(MultiplyAddDivideOffsetCalculation( - rhs_shard_size_dilated - lhs_shard_size, - rhs_shard_size_dilated - lhs_shard_size + - wd.stride() * (window_count - 1) - padding_low, - 1)); - - // Exchange halo and concatenate. - int64 dim = dnums.input_spatial_dimensions(i); - int64 explicit_left_padding_on_full_shape = padding_low; - int64 shard_size_with_halo = - wd.stride() * (window_count - 1) + rhs_shard_size_dilated; - - new_window.mutable_dimensions(i)->set_padding_low(0); - new_window.mutable_dimensions(i)->set_padding_high(0); - new_window.mutable_dimensions(i)->set_size(rhs_shard_size); - - // offset_on_padded_shape and padded_full_shape_size are needed only if - // we want to mask out-of-range values in ExchangeHaloAndGetValidData(). - // Since the default value for both the collective-permute is zero and - // also we call PadWithValue() on both operands at the beginning, we - // don't need to mask here. - // - // TODO(hyoulkee): Consider removing one of the two PadWithValue() calls - // if it's always safe. - auto offset_on_padded_shape = - OffsetCalculation(MultiplyAddDivideOffsetCalculation()); - int64 padded_full_shape_size = 0; - auto concat = ExchangeHaloAndGetValidData( - lhs_with_halo, lhs.base_shape(), left_halo_size_functions[dim], - right_halo_size_functions[dim], explicit_left_padding_on_full_shape, - padded_full_shape_size, shard_size_with_halo, dim, lhs.sharding(), - offset_on_padded_shape.Calculate(partition_ordinals[dim], &b_), zero, - partition_ordinals[dim], collective_ops_creator_, next_channel_id_, &b_, - /*mask_invalid_region=*/false); - if (!concat) { - return DefaultAction(hlo); - } - lhs_with_halo = *concat; - } - - SetPartitionedHlo(hlo, [&]() { - auto conv = b_.AddInstruction(HloInstruction::CreateConvolve( - hlo->shape(), lhs_with_halo, rhs.hlo(), hlo->feature_group_count(), - hlo->batch_group_count(), new_window, - hlo->convolution_dimension_numbers(), hlo->precision_config())); - auto ar = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, conv, MakeBinaryAdd(hlo->shape().element_type(), module_), - NewChannel()); - ar->set_sharding(HloSharding::Replicate()); - return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()) - .Reshard(hlo->sharding()) - .hlo(); - }); - return Status::OK(); -} - -Status SpmdPartitioningVisitor::HandleConvolution(HloInstruction* hlo) { - auto dot_dnums = dot_as_convolution_util::ParseDotGeneralFromConvolution(hlo); - if (dot_dnums) { - // Use HandleDotHelper() for convs that are actually einsums. - spmd::DotGeneralDimsMapping mapping; - for (const auto& dims : dot_dnums->batch_dims) { - mapping.batch_dims.emplace_back(); - mapping.batch_dims.back().lhs = dims.lhs; - mapping.batch_dims.back().rhs = dims.rhs; - mapping.batch_dims.back().output = dims.output; - } - for (const auto& dims : dot_dnums->contracting_dims) { - mapping.contracting_dims.emplace_back(); - mapping.contracting_dims.back().lhs = dims.lhs; - mapping.contracting_dims.back().rhs = dims.rhs; - mapping.contracting_dims.back().output = dims.output; - } - for (const auto& dims : dot_dnums->lhs_non_contracting_dims) { - mapping.lhs_non_contracting_dims.emplace_back(); - mapping.lhs_non_contracting_dims.back().lhs = dims.lhs; - mapping.lhs_non_contracting_dims.back().rhs = dims.rhs; - mapping.lhs_non_contracting_dims.back().output = dims.output; - } - for (const auto& dims : dot_dnums->rhs_non_contracting_dims) { - mapping.rhs_non_contracting_dims.emplace_back(); - mapping.rhs_non_contracting_dims.back().lhs = dims.lhs; - mapping.rhs_non_contracting_dims.back().rhs = dims.rhs; - mapping.rhs_non_contracting_dims.back().output = dims.output; - } - auto create_sharded_conv = - [&](HloInstruction* lhs_hlo, HloInstruction* rhs_hlo, - spmd::SpmdBuilder* b) -> StatusOr { - TF_ASSIGN_OR_RETURN( - auto sharded_conv, - dot_as_convolution_util::CreateShardedConvForDotGeneralConvolution( - *hlo, *dot_dnums, lhs_hlo, rhs_hlo)); - return b->AddInstruction(std::move(sharded_conv)); - }; - return HandleDotHelper(hlo, mapping, create_sharded_conv); - } - - auto lhs = GetPartitionedHlo(hlo->operand(0)); - auto rhs = GetPartitionedHlo(hlo->operand(1)); - const HloSharding& sharding = hlo->sharding(); - const auto& dnums = hlo->convolution_dimension_numbers(); - std::vector rhs_to_lhs_indices(hlo->shape().rank()); - rhs_to_lhs_indices[dnums.kernel_output_feature_dimension()] = - dnums.input_batch_dimension(); - rhs_to_lhs_indices[dnums.kernel_input_feature_dimension()] = - dnums.input_feature_dimension(); - for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { - rhs_to_lhs_indices[dnums.kernel_spatial_dimensions(i)] = - dnums.input_spatial_dimensions(i); - } - std::vector lhs_to_rhs_indices(hlo->shape().rank()); - for (int64 i = 0; i < rhs_to_lhs_indices.size(); ++i) { - lhs_to_rhs_indices[rhs_to_lhs_indices[i]] = i; - } - auto aligned_rhs_sharding = - hlo_sharding_util::TransposeSharding(lhs.sharding(), rhs_to_lhs_indices); - auto aligned_lhs_sharding = - hlo_sharding_util::TransposeSharding(rhs.sharding(), lhs_to_rhs_indices); - - // Handling cases where all the partitioned dimensions are parallel - // dimensions. - int64 lhs_parallel_dim_partitions = 1; - int64 rhs_parallel_dim_partitions = 1; - std::vector parallel_spatial_dims; - for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { - int64 lhs_dim = dnums.input_spatial_dimensions(i); - int64 lhs_size = lhs.base_shape().dimensions(lhs_dim); - const auto& wd = hlo->window().dimensions(i); - int64 rhs_dim = dnums.kernel_spatial_dimensions(i); - // Only non reversal window is supported right now. - if (!wd.window_reversal() && - dot_as_convolution_util::ConvSpatialDimensionIsParallel(wd, lhs_size)) { - parallel_spatial_dims.emplace_back(i); - lhs_parallel_dim_partitions *= ShardCountAtDim(lhs.sharding(), lhs_dim); - rhs_parallel_dim_partitions *= ShardCountAtDim(rhs.sharding(), rhs_dim); - } - } - bool lhs_partition_dims_are_parallel = - (lhs_parallel_dim_partitions == num_partitions_); - bool rhs_partition_dims_are_parallel = - (rhs_parallel_dim_partitions == num_partitions_); - - // If there is a parallel dim and all the partitioned dimensions are parallel - // dimensions in either LHS or RHS, simply create partitioned convolutions. - if (!parallel_spatial_dims.empty() && - (lhs_partition_dims_are_parallel || rhs_partition_dims_are_parallel)) { - // Reshard LHS or RHS to partition at parallel dimensions as the other - // operand. - if (lhs_partition_dims_are_parallel) { - rhs = rhs.Reshard(aligned_rhs_sharding); - } else { - lhs = lhs.Reshard(aligned_lhs_sharding); - } - auto lhs_shard_shape = - MakePartitionedShape(lhs.base_shape(), lhs.sharding()); - auto rhs_shard_shape = - MakePartitionedShape(rhs.base_shape(), rhs.sharding()); - // Update convolution window. - auto new_window = hlo->window(); - for (const auto& spatial_dim : parallel_spatial_dims) { - auto wd = new_window.mutable_dimensions(spatial_dim); - wd->set_size(lhs_shard_shape.dimensions( - dnums.input_spatial_dimensions(spatial_dim))); - wd->set_stride(std::max(1, wd->size() - 1)); - wd->set_base_dilation(wd->size()); - } - TF_ASSIGN_OR_RETURN( - Shape sharded_conv_shape, - ShapeInference::InferConvolveShape( - lhs_shard_shape, rhs_shard_shape, hlo->feature_group_count(), - hlo->batch_group_count(), new_window, dnums)); - *sharded_conv_shape.mutable_layout() = hlo->shape().layout(); - SetPartitionedHlo(hlo, [&]() { - auto sharded_conv = b_.AddInstruction(HloInstruction::CreateConvolve( - sharded_conv_shape, lhs.hlo(), rhs.hlo(), hlo->feature_group_count(), - hlo->batch_group_count(), new_window, dnums, - hlo->precision_config())); - sharded_conv->set_sharding(hlo->sharding()); - return PartitionedHlo(sharded_conv, hlo->shape(), MakePartitioningState()) - .Reshard(hlo->sharding()) - .hlo(); - }); - return Status::OK(); - } - - // Handling cases where both operands' shardings are aligned. We check that - // the LHS batch dimension is not partitioned because it is mapped to the - // output feature dimension in aligned_rhs_sharding, which are not the same - // dimension. - if (!lhs.sharding().IsTileMaximal() && !rhs.sharding().IsTileMaximal()) { - if (options_.conv_halo_exchange_always_on_lhs) { - return HandleConvolutionTiledLhsAndRhs(hlo); - } else { - // Reshard RHS so that each shard computes the partial sum of the full - // shape result, and add AllReduce. See HandleConvolutionTiledLhsAndRhs() - // that reshards LHS. - // - // The size of halo on each dimension can be calculated from the - // projection onto the RHS that shard i needs to read. RHS and LHS below - // refers to the shard size of RHS and LHS, WC is the number of windows, - // and D is the window dilation. - // - // * offset(i): LHS * i + low_padding - (WC - 1) * stride - // * limit(i): LHS * (i + 1) + low_padding - // - // Since shard i has RHS of range [i * RHS * D, (i + 1) * RHS * D) - // * left-halo: i * RHS - offset(i) - // = i * (RHS * D - LHS) + (WC - 1) * stride - low_padding - // * right-halo: limit(i) - (i + 1) * RHS - // = (i + 1) * (LHS - RHS * D) + low_pading - - auto unsupported_sharding = [&](const HloSharding& lhs_sharding, - const HloSharding& rhs_sharding) { - // We currently don't support partitioning input batch or output feature - // dimensions. - return lhs_sharding.tile_assignment().dim( - dnums.input_batch_dimension()) != 1 || - rhs_sharding.tile_assignment().dim( - dnums.kernel_output_feature_dimension()) != 1; - }; - auto zero = b_.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(hlo->shape().element_type()))); - if (ShapeSizeInBytes(lhs.base_shape()) < - ShapeSizeInBytes(rhs.base_shape())) { - if (unsupported_sharding(aligned_lhs_sharding, rhs.sharding())) { - return DefaultAction(hlo); - } - lhs = lhs.Reshard(aligned_lhs_sharding).PadWithValue(zero); - rhs = rhs.PadWithValue(zero); - } else { - if (unsupported_sharding(lhs.sharding(), aligned_rhs_sharding)) { - return DefaultAction(hlo); - } - lhs = lhs.PadWithValue(zero); - rhs = rhs.Reshard(aligned_rhs_sharding).PadWithValue(zero); - } - - Window window = hlo->window(); - std::vector shard_counts(dnums.input_spatial_dimensions_size()); - std::vector lhs_shard_sizes(dnums.input_spatial_dimensions_size()); - std::vector rhs_shard_sizes(dnums.input_spatial_dimensions_size()); - for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { - int64 lhs_dimension = dnums.input_spatial_dimensions(i); - int64 rhs_dimension = dnums.kernel_spatial_dimensions(i); - int64 shard_count = rhs.sharding().tile_assignment().dim(rhs_dimension); - auto wd = window.dimensions(i); - if (wd.base_dilation() != 1 || wd.window_reversal()) { - return DefaultAction(hlo); - } - - int64 lhs_shard_size = CeilOfRatio( - lhs.base_shape().dimensions(lhs_dimension), shard_count); - int64 rhs_shard_size = CeilOfRatio( - rhs.base_shape().dimensions(rhs_dimension), shard_count); - shard_counts[i] = shard_count; - lhs_shard_sizes[i] = lhs_shard_size; - rhs_shard_sizes[i] = rhs_shard_size; - } - - std::vector left_halo_size_functions( - hlo->shape().rank()); - std::vector right_halo_size_functions( - hlo->shape().rank()); - Window new_window = window; - - // Data structures needed for Pad and DynamicSlice on LHS if needed. - bool need_dynamic_slice_lhs = false; - auto partition_ordinals = - MakeTiledPartitionOrdinals(lhs.sharding(), partition_id_, &b_); - std::vector zero_padding(hlo->shape().rank()); - PaddingConfig pad_config = - window_util::MakeSymmetricPadding(zero_padding); - auto zero_s32 = b_.AddInstruction( - HloInstruction::CreateConstant(LiteralUtil::Zero(S32))); - std::vector dynamic_slice_start_indices( - hlo->shape().rank(), zero_s32); - Shape dynamic_slice_shape = lhs.hlo()->shape(); - Shape pad_shape = lhs.hlo()->shape(); - - for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { - int64 lhs_dimension = dnums.input_spatial_dimensions(i); - int64 rhs_dimension = dnums.kernel_spatial_dimensions(i); - int64 lhs_shard_size = lhs_shard_sizes[i]; - int64 rhs_shard_size = rhs_shard_sizes[i]; - - if (shard_counts[i] == 1) { - continue; - } - - // Calculate the left and right halo sizes as described in the comments - // above. It calculcates the halo sizes with dilation, so we apply - // CeilOfRatio({left,right}_halo_size, window_dilation). - auto wd = window.dimensions(i); - int64 padding_low = wd.padding_low(); - int64 padding_high = wd.padding_high(); - int64 base = lhs.base_shape().dimensions(lhs_dimension); - int64 window_count = - 1 + (padding_low + padding_high + base - - (1 + (wd.size() - 1) * wd.window_dilation())) / - wd.stride(); - left_halo_size_functions[rhs_dimension] = - OffsetCalculation(MultiplyAddDivideOffsetCalculation( - rhs_shard_size * wd.window_dilation() - lhs_shard_size, - (window_count - 1) * wd.stride() - padding_low + - wd.window_dilation() - 1, - wd.window_dilation())); - right_halo_size_functions[rhs_dimension] = - OffsetCalculation(MultiplyAddDivideOffsetCalculation( - lhs_shard_size - rhs_shard_size * wd.window_dilation(), - lhs_shard_size - rhs_shard_size * wd.window_dilation() + - padding_low + wd.window_dilation() - 1, - wd.window_dilation())); - - // New RHS window size includes the maximum of both left and right - // halos. - int64 halo_size = left_halo_size_functions[rhs_dimension].MaxInRange( - 1, shard_counts[i]) + - right_halo_size_functions[rhs_dimension].MaxInRange( - 0, shard_counts[i] - 1); - int64 new_window_size = - rhs.hlo()->shape().dimensions(rhs_dimension) + halo_size; - - // The amount of new low padding could be dynamic (e.g., window_dilation - // != 1), which requires pad (to the maximum) and dynamic slice on LHS. - // - // If we consider the first window, the offset of the dilated RHS that - // aligns with the first valid LHS element for shard i is 'padding_low + - // LHS * i'. When the left halo is added to RHS, the offset of the first - // RHS element is (RHS * i - left_halo) * window_dilation. The - // difference between the two values is the amount of padding_low we - // need on LHS. - auto new_padding_low_function = - OffsetCalculation( - HloOpcode::kMultiply, left_halo_size_functions[rhs_dimension], - OffsetCalculation(MultiplyAddDivideOffsetCalculation( - 0, wd.window_dilation(), 1))) - - OffsetCalculation(MultiplyAddDivideOffsetCalculation( - rhs_shard_size * wd.window_dilation() - lhs_shard_size, - -padding_low, 1)); - - int64 new_padding_low_max = - new_padding_low_function.MaxInRange(0, shard_counts[i]); - int64 new_padding_low = new_padding_low_max; - int64 new_padding_high = window_count * wd.stride() + - (new_window_size - 1) * wd.window_dilation() - - new_padding_low - lhs_shard_size; - - // We do pad/dynamic-slice only when the padding is dynamic. - if (!new_padding_low_function.IsConstant()) { - need_dynamic_slice_lhs = true; - new_padding_low = 0; - pad_config.mutable_dimensions(lhs_dimension) - ->set_edge_padding_low(new_padding_low_max); - pad_config.mutable_dimensions(lhs_dimension) - ->set_edge_padding_high(new_padding_low_max); - pad_shape.set_dimensions(lhs_dimension, - lhs_shard_size + 2 * new_padding_low_max); - dynamic_slice_start_indices[lhs_dimension] = - (OffsetCalculation(MultiplyAddDivideOffsetCalculation( - 0, new_padding_low_max, 1)) - - new_padding_low_function) - .Calculate(partition_ordinals[lhs_dimension], &b_); - dynamic_slice_shape.set_dimensions( - lhs_dimension, lhs_shard_size + new_padding_low_max); - } - - // Since the convolution RHS operand size increased with halos, adjust - // the window config accordingly. - new_window.mutable_dimensions(i)->set_padding_low(new_padding_low); - new_window.mutable_dimensions(i)->set_padding_high(new_padding_high); - new_window.mutable_dimensions(i)->set_size( - rhs.hlo()->shape().dimensions(rhs_dimension) + halo_size); - } - - HloInstruction* conv_lhs = lhs.hlo(); - if (need_dynamic_slice_lhs) { - auto pad = b_.AddInstruction( - HloInstruction::CreatePad(pad_shape, lhs.hlo(), zero, pad_config)); - conv_lhs = b_.AddInstruction(HloInstruction::CreateDynamicSlice( - dynamic_slice_shape, pad, dynamic_slice_start_indices, - dynamic_slice_shape.dimensions())); - } - - // Exchange halo and concatenate. - HloInstruction* rhs_with_halo = rhs.hlo(); - for (int i = 0; i < dnums.kernel_spatial_dimensions_size(); ++i) { - int64 dim = dnums.kernel_spatial_dimensions(i); - int64 explicit_left_padding_on_full_shape = - left_halo_size_functions[dim].Calculate(0); - int64 shard_size_with_halo = new_window.dimensions(i).size(); - - // offset_on_padded_shape and padded_full_shape_size are needed only if - // we want to mask out-of-range values in ExchangeHaloAndGetValidData(). - // Since the default value for both the collective-permute is zero and - // also we call PadWithValue() on both operands at the beginning, we - // don't need to mask here. - // - // TODO(hyoulkee): Consider removing one of the two PadWithValue() calls - // if it's always safe. - auto offset_on_padded_shape = - OffsetCalculation(MultiplyAddDivideOffsetCalculation( - rhs_shard_sizes[i], explicit_left_padding_on_full_shape, 1)) - - left_halo_size_functions[dim]; - int64 padded_full_shape_size = - offset_on_padded_shape.Calculate(shard_counts[i] - 1) + - new_window.dimensions(i).size(); - auto concat = ExchangeHaloAndGetValidData( - rhs_with_halo, rhs.base_shape(), left_halo_size_functions[dim], - right_halo_size_functions[dim], explicit_left_padding_on_full_shape, - padded_full_shape_size, shard_size_with_halo, dim, rhs.sharding(), - offset_on_padded_shape.Calculate(partition_ordinals[dim], &b_), - zero, partition_ordinals[dim], collective_ops_creator_, - next_channel_id_, &b_, /*mask_invalid_region=*/false); - if (!concat) { - return DefaultAction(hlo); - } - rhs_with_halo = *concat; - } - - SetPartitionedHlo(hlo, [&]() { - auto conv = b_.AddInstruction(HloInstruction::CreateConvolve( - hlo->shape(), conv_lhs, rhs_with_halo, hlo->feature_group_count(), - hlo->batch_group_count(), new_window, dnums, - hlo->precision_config())); - auto ar = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, conv, MakeBinaryAdd(hlo->shape().element_type(), module_), - NewChannel()); - ar->set_sharding(HloSharding::Replicate()); - return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()) - .Reshard(hlo->sharding()) - .hlo(); - }); - return Status::OK(); - } - } - - if (!sharding.IsTileMaximal()) { - // We don't currently support sharding on output feature dimension. - if (sharding.tile_assignment().dim(dnums.output_feature_dimension()) > 1) { - return DefaultAction(hlo); - } - - // Check if the operand and the output sharding are aligned. - std::vector input_to_output_indices(hlo->shape().rank()); - input_to_output_indices[dnums.input_batch_dimension()] = - dnums.output_batch_dimension(); - input_to_output_indices[dnums.input_feature_dimension()] = - dnums.output_feature_dimension(); - for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { - input_to_output_indices[dnums.input_spatial_dimensions(i)] = - dnums.output_spatial_dimensions(i); - } - auto target_operand_sharding = - hlo_sharding_util::TransposeSharding(sharding, input_to_output_indices); - lhs = lhs.Reshard(target_operand_sharding); - - // Replicate the RHS. - rhs = rhs.Reshard(HloSharding::Replicate()); - - // Convolution window config does not include batch and feature dimensions, - // whereas ReshardAsWindowedInput() expects the same number of window - // dimensions as the rank of the operand. So add two more trivial - // dimensions. - std::vector ones(hlo->shape().rank(), 1); - auto operand_window = window_util::MakeWindow(ones); - for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { - *operand_window.mutable_dimensions(dnums.input_spatial_dimensions(i)) = - hlo->window().dimensions(i); - } - - auto zero = b_.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(hlo->shape().element_type()))); - auto resharded_operand_and_window = lhs.ReshardAsWindowedInput( - operand_window, target_operand_sharding, zero); - if (!resharded_operand_and_window.has_value()) { - return DefaultAction(hlo); - } - Window new_window; - for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { - *new_window.add_dimensions() = - resharded_operand_and_window->shard_window.dimensions( - dnums.input_spatial_dimensions(i)); - } - TF_ASSIGN_OR_RETURN( - Shape sharded_conv_shape, - ShapeInference::InferConvolveShape( - resharded_operand_and_window->sharded_input->shape(), - rhs.hlo()->shape(), hlo->feature_group_count(), - hlo->batch_group_count(), new_window, dnums)); - auto shard_shape = MakePartitionedShape(hlo->shape(), hlo->sharding()); - *sharded_conv_shape.mutable_layout() = shard_shape.layout(); - SetPartitionedHlo(hlo, [&]() { - auto sharded_conv = b_.AddInstruction(HloInstruction::CreateConvolve( - sharded_conv_shape, resharded_operand_and_window->sharded_input, - rhs.hlo(), hlo->feature_group_count(), hlo->batch_group_count(), - new_window, dnums, hlo->precision_config())); - if (!resharded_operand_and_window->dynamic_slice_index_on_output - .has_value()) { - CHECK(ShapeUtil::Compatible(shard_shape, sharded_conv->shape())); - return sharded_conv; - } - return b_.AddInstruction(HloInstruction::CreateDynamicSlice( - shard_shape, sharded_conv, - *resharded_operand_and_window->dynamic_slice_index_on_output, - shard_shape.dimensions())); - }); - return Status::OK(); - } - return DefaultAction(hlo); -} - -Status SpmdPartitioningVisitor::HandleDot(HloInstruction* hlo) { - DotGeneralDimsMapping mapping; - const auto& dnums = hlo->dot_dimension_numbers(); - int64 next_output_dim = 0; - for (int64 i = 0; i < dnums.lhs_batch_dimensions_size(); ++i) { - mapping.batch_dims.emplace_back(); - mapping.batch_dims.back().lhs = dnums.lhs_batch_dimensions(i); - mapping.batch_dims.back().rhs = dnums.rhs_batch_dimensions(i); - mapping.batch_dims.back().output = next_output_dim++; - } - for (int64 i = 0; i < dnums.lhs_contracting_dimensions_size(); ++i) { - mapping.contracting_dims.emplace_back(); - mapping.contracting_dims.back().lhs = dnums.lhs_contracting_dimensions(i); - mapping.contracting_dims.back().rhs = dnums.rhs_contracting_dimensions(i); - mapping.contracting_dims.back().output = -1; - } - for (int64 i = 0; i < hlo->operand(0)->shape().rank(); ++i) { - if (absl::c_linear_search(dnums.lhs_batch_dimensions(), i) || - absl::c_linear_search(dnums.lhs_contracting_dimensions(), i)) { - continue; - } - mapping.lhs_non_contracting_dims.emplace_back(); - mapping.lhs_non_contracting_dims.back().lhs = i; - mapping.lhs_non_contracting_dims.back().rhs = -1; - mapping.lhs_non_contracting_dims.back().output = next_output_dim++; - } - for (int64 i = 0; i < hlo->operand(1)->shape().rank(); ++i) { - if (absl::c_linear_search(dnums.rhs_batch_dimensions(), i) || - absl::c_linear_search(dnums.rhs_contracting_dimensions(), i)) { - continue; - } - mapping.rhs_non_contracting_dims.emplace_back(); - mapping.rhs_non_contracting_dims.back().lhs = -1; - mapping.rhs_non_contracting_dims.back().rhs = i; - mapping.rhs_non_contracting_dims.back().output = next_output_dim++; - } - auto create_sharded_dot = [&](HloInstruction* l, HloInstruction* r, - SpmdBuilder* b) -> StatusOr { - TF_ASSIGN_OR_RETURN( - auto sharded_dot_shape, - ShapeInference::InferDotOpShape(l->shape(), r->shape(), - hlo->dot_dimension_numbers())); - return b->AddInstruction(HloInstruction::CreateDot( - sharded_dot_shape, l, r, hlo->dot_dimension_numbers(), - hlo->precision_config())); - }; - return HandleDotHelper(hlo, mapping, create_sharded_dot); -} - -Status SpmdPartitioningVisitor::HandleDotHelper( - HloInstruction* hlo, const DotGeneralDimsMapping& dims_mapping, - const std::function( - HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot) { - const HloSharding& lhs_sharding = hlo->operand(0)->sharding(); - const HloSharding& rhs_sharding = hlo->operand(1)->sharding(); - - // Similar to hlo_sharding_util::TransposeSharding(), but allows - // removing/adding non-partitioned dimensions. - auto transpose_sharding = - [&](const HloSharding& source, absl::Span src_to_tgt, - absl::Span tgt_to_src) -> absl::optional { - if (source.IsTileMaximal()) { - return source; - } - std::vector tgt_dims_skipping_new(tgt_to_src.size(), -1); - int64 skipped_tgt_dims = 0; - for (int64 i = 0; i < tgt_to_src.size(); ++i) { - if (tgt_to_src[i] < 0) { - skipped_tgt_dims++; - } else { - tgt_dims_skipping_new[i] = i - skipped_tgt_dims; - } - } - int64 skipped_src_dims = absl::c_count(src_to_tgt, -1); - std::vector perm(src_to_tgt.size()); - for (int64 i = 0; i < src_to_tgt.size(); ++i) { - if (src_to_tgt[i] < 0) { - if (source.tile_assignment().dim(i) > 1) { - return absl::nullopt; - } - perm[src_to_tgt.size() - skipped_src_dims] = i; - skipped_src_dims--; - } else { - perm[tgt_dims_skipping_new[src_to_tgt[i]]] = i; - } - } - auto tgt_sharding = hlo_sharding_util::TransposeSharding(source, perm); - if (skipped_tgt_dims == 0) { - return tgt_sharding; - } - auto reshape_tiles = tgt_sharding.tile_assignment(); - std::vector tgt_tiles(tgt_to_src.size(), 1); - for (int64 i = 0; i < tgt_tiles.size(); ++i) { - if (tgt_to_src[i] >= 0) { - tgt_tiles[i] = reshape_tiles.dim(tgt_dims_skipping_new[i]); - } - } - reshape_tiles.Reshape(tgt_tiles); - return HloSharding::Tile(reshape_tiles); - }; - - std::vector lhs_to_rhs_indices(hlo->operand(0)->shape().rank(), -1); - std::vector lhs_to_output_indices(hlo->operand(0)->shape().rank(), -1); - std::vector rhs_to_lhs_indices(hlo->operand(1)->shape().rank(), -1); - std::vector rhs_to_output_indices(hlo->operand(1)->shape().rank(), -1); - std::vector output_to_lhs_indices(hlo->shape().rank(), -1); - std::vector output_to_rhs_indices(hlo->shape().rank(), -1); - auto populate_indices_mapping = - [&](const DotGeneralDimsMapping::DimsMapping& mapping) { - if (mapping.lhs >= 0) { - lhs_to_rhs_indices[mapping.lhs] = mapping.rhs; - lhs_to_output_indices[mapping.lhs] = mapping.output; - } - if (mapping.rhs >= 0) { - rhs_to_lhs_indices[mapping.rhs] = mapping.lhs; - rhs_to_output_indices[mapping.rhs] = mapping.output; - } - if (mapping.output >= 0) { - output_to_lhs_indices[mapping.output] = mapping.lhs; - output_to_rhs_indices[mapping.output] = mapping.rhs; - } - }; - for (const auto& mapping : dims_mapping.batch_dims) { - populate_indices_mapping(mapping); - } - for (const auto& mapping : dims_mapping.contracting_dims) { - populate_indices_mapping(mapping); - } - for (const auto& mapping : dims_mapping.lhs_non_contracting_dims) { - populate_indices_mapping(mapping); - } - for (const auto& mapping : dims_mapping.rhs_non_contracting_dims) { - populate_indices_mapping(mapping); - } - auto lhs_sharding_transposed_to_match_rhs = - transpose_sharding(lhs_sharding, lhs_to_rhs_indices, rhs_to_lhs_indices); - auto rhs_sharding_transposed_to_match_lhs = - transpose_sharding(rhs_sharding, rhs_to_lhs_indices, lhs_to_rhs_indices); - auto lhs_sharding_transposed_to_match_output = transpose_sharding( - lhs_sharding, lhs_to_output_indices, output_to_lhs_indices); - auto rhs_sharding_transposed_to_match_output = transpose_sharding( - rhs_sharding, rhs_to_output_indices, output_to_rhs_indices); - auto output_sharding_transposed_to_match_lhs = transpose_sharding( - hlo->sharding(), output_to_lhs_indices, lhs_to_output_indices); - auto output_sharding_transposed_to_match_rhs = transpose_sharding( - hlo->sharding(), output_to_rhs_indices, rhs_to_output_indices); - - // lhs_rhs_or_output: 0 lhs, 1 rhs, 2 output. - auto get_partitions_for_dims = - [&](const HloSharding& sharding, - absl::Span dims, - int lhs_rhs_or_output) { - int64 partitions = 1; - if (sharding.IsTileMaximal()) { - return partitions; - } - for (const auto& dim : dims) { - if (lhs_rhs_or_output == 0) { - partitions *= sharding.tile_assignment().dim(dim.lhs); - } else if (lhs_rhs_or_output == 1) { - partitions *= sharding.tile_assignment().dim(dim.rhs); - } else { - CHECK_EQ(lhs_rhs_or_output, 2); - partitions *= sharding.tile_assignment().dim(dim.output); - } - } - return partitions; - }; - const int64 lhs_batch_partitions = - get_partitions_for_dims(lhs_sharding, dims_mapping.batch_dims, 0); - const int64 rhs_batch_partitions = - get_partitions_for_dims(rhs_sharding, dims_mapping.batch_dims, 1); - const int64 output_batch_partitions = - get_partitions_for_dims(hlo->sharding(), dims_mapping.batch_dims, 2); - const int64 lhs_contracting_partitions = - get_partitions_for_dims(lhs_sharding, dims_mapping.contracting_dims, 0); - const int64 rhs_contracting_partitions = - get_partitions_for_dims(rhs_sharding, dims_mapping.contracting_dims, 1); - const int64 lhs_non_contracting_partitions = get_partitions_for_dims( - lhs_sharding, dims_mapping.lhs_non_contracting_dims, 0); - const int64 rhs_non_contracting_partitions = get_partitions_for_dims( - rhs_sharding, dims_mapping.rhs_non_contracting_dims, 1); - const int64 output_lhs_non_contracting_partitions = get_partitions_for_dims( - hlo->sharding(), dims_mapping.lhs_non_contracting_dims, 2); - const int64 output_rhs_non_contracting_partitions = get_partitions_for_dims( - hlo->sharding(), dims_mapping.rhs_non_contracting_dims, 2); - - auto& lhs = GetPartitionedHlo(hlo->operand(0)); - auto& rhs = GetPartitionedHlo(hlo->operand(1)); - // LHS and RHS are partitioned the same way and only partitioned in batch - // dimensions. - if (lhs_batch_partitions == rhs_batch_partitions && - rhs_batch_partitions == num_partitions_ && - lhs_sharding_transposed_to_match_rhs == rhs_sharding) { - TF_ASSIGN_OR_RETURN(auto dot, - create_sharded_dot(lhs.hlo(), rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { - dot->set_sharding(*lhs_sharding_transposed_to_match_output); - return PartitionedHlo(dot, hlo->shape(), MakePartitioningState()) - .Reshard(hlo->sharding()) - .hlo(); - }); - return Status::OK(); - } - - // Try emit batch-partitioned einsum with one operand resharded. Returns - // whether the attempt succeeds. If may_reshard_with_allreduce is false, - // reshard must be done using all-to-all; otherwise this attempt fails. - auto try_emit_output_batch_partitioned_einsum_with_reshard = - [&](bool may_reshard_with_allreduce) -> StatusOr { - // LHS and output are batch partitioned in the same way. - if (lhs_batch_partitions == num_partitions_ && - output_batch_partitions == num_partitions_ && - lhs_sharding_transposed_to_match_output == hlo->sharding()) { - if (!may_reshard_with_allreduce && - !GetReshardAllToAllSourceTargetDims( - rhs.sharding(), *lhs_sharding_transposed_to_match_rhs)) { - return false; - } - auto resharded_rhs = rhs.Reshard(*lhs_sharding_transposed_to_match_rhs); - TF_ASSIGN_OR_RETURN( - auto dot, create_sharded_dot(lhs.hlo(), resharded_rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { return dot; }); - return true; - } - // RHS and output are batch partitioned in the same way. - if (rhs_batch_partitions == num_partitions_ && - output_batch_partitions == num_partitions_ && - rhs_sharding_transposed_to_match_output == hlo->sharding()) { - if (!may_reshard_with_allreduce && - !GetReshardAllToAllSourceTargetDims( - lhs.sharding(), *rhs_sharding_transposed_to_match_lhs)) { - return false; - } - auto resharded_lhs = lhs.Reshard(*rhs_sharding_transposed_to_match_lhs); - TF_ASSIGN_OR_RETURN( - auto dot, create_sharded_dot(resharded_lhs.hlo(), rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { return dot; }); - return true; - } - return false; - }; - - { - // Try batch-parallel by resharding one operand, and not using all-reduce. - TF_ASSIGN_OR_RETURN( - bool emitted, - try_emit_output_batch_partitioned_einsum_with_reshard(false)); - if (emitted) { - return Status::OK(); - } - } - - // Try to emit windowed DotGeneral when one operand is partitioned in the same - // way as the output along non-contracting dimensions, but the other operand - // is tiled in other dimensions. - auto emit_windowed_dot_general = [&](int64 matching_operand, - int64 windowing_operand, - bool windowed_at_contracting_dims, - bool windowed_at_batch_dims) { - CHECK_EQ(matching_operand + windowing_operand, 1); - CHECK(!windowed_at_batch_dims || !windowed_at_contracting_dims); - auto unpadded_result_buffer_shape = - MakePartitionedShape(hlo->shape(), hlo->sharding()); - auto padded_result_buffer_shape = unpadded_result_buffer_shape; - // For windowing at batch/non-contracting dims, we produce the result one - // partition at a time, so we need to pad the shape in case of uneven - // partitioning in order to make dynamic-update-slice in-bound. - if (!windowed_at_contracting_dims) { - padded_result_buffer_shape = GetPaddedShapeForUnevenPartitioning( - padded_result_buffer_shape, - windowing_operand == 0 ? *lhs_sharding_transposed_to_match_output - : *rhs_sharding_transposed_to_match_output); - } - // Mask the padding area of the windowed operand with zero if there is - // uneven partitioning. - if (windowed_at_contracting_dims) { - auto& to_mask = windowing_operand == 0 ? lhs : rhs; - to_mask = - to_mask.PadWithValue(b_.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(hlo->shape().element_type())))); - } - auto result_buffer = CreateZero(padded_result_buffer_shape, &b_); - auto iteration = b_.AddInstruction( - HloInstruction::CreateConstant(LiteralUtil::CreateR0(0))); - - // Create a while loop that computes one window per iteration. During each - // iteration, each partition sends its input window to its neighbor using - // collective-permute for the next iteration. - SpmdBuilder body_b("windowed_dot_general_body", visiting_hlo_); - auto param = body_b.AddInstruction(HloInstruction::CreateParameter( - /*parameter_number=*/0, - ShapeUtil::MakeTupleShape({lhs.hlo()->shape(), rhs.hlo()->shape(), - result_buffer->shape(), iteration->shape()}), - "param")); - auto l = body_b.AddInstruction( - HloInstruction::CreateGetTupleElement(lhs.hlo()->shape(), param, 0)); - auto r = body_b.AddInstruction( - HloInstruction::CreateGetTupleElement(rhs.hlo()->shape(), param, 1)); - auto o = body_b.AddInstruction(HloInstruction::CreateGetTupleElement( - result_buffer->shape(), param, 2)); - auto i = body_b.AddInstruction( - HloInstruction::CreateGetTupleElement(iteration->shape(), param, 3)); - - auto partition_id = collective_ops_creator_.create_partition_id(&body_b); - auto data_partition_id = body_b.AddInstruction(HloInstruction::CreateBinary( - i->shape(), HloOpcode::kAdd, i, partition_id)); - auto partition_count = body_b.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR0(num_partitions_))); - data_partition_id = body_b.AddInstruction(HloInstruction::CreateBinary( - i->shape(), HloOpcode::kRemainder, data_partition_id, partition_count)); - auto dot_lhs = l; - auto dot_rhs = r; - if (windowed_at_contracting_dims || windowed_at_batch_dims) { - // Slice the matching operand according to the partitioned contracting - // dimensions on the windowed operand. We do this by treating the matching - // operand as replicated, and resharding it to match the windowed operand. - auto slice_operand = matching_operand == 0 ? l : r; - slice_operand->set_sharding(HloSharding::Replicate()); - auto state = MakePartitioningState(); - state.b = &body_b; - state.partition_id = data_partition_id; - auto slice = PartitionedHlo(slice_operand, slice_operand->shape(), state) - .Reshard(windowing_operand == 0 - ? *lhs_sharding_transposed_to_match_rhs - : *rhs_sharding_transposed_to_match_lhs) - .hlo(); - slice_operand->clear_sharding(); - if (matching_operand == 0) { - dot_lhs = slice; - } else { - dot_rhs = slice; - } - } - TF_ASSIGN_OR_RETURN(auto dot, - create_sharded_dot(dot_lhs, dot_rhs, &body_b)); - if (windowed_at_contracting_dims) { - // Accumulate the partial output to the result buffer. - o = body_b.AddInstruction( - HloInstruction::CreateBinary(o->shape(), HloOpcode::kAdd, o, dot)); - } else { - // The windowing operand is partitioned along batch/non-contracting - // dimensions, so we need a dynamic-update-slice to save the partial - // output in the result buffer. - auto offsets = MakePartitionOffsets( - o->shape(), - windowing_operand == 0 ? *lhs_sharding_transposed_to_match_output - : *rhs_sharding_transposed_to_match_output, - data_partition_id, &body_b); - o = body_b.AddInstruction(HloInstruction::CreateDynamicUpdateSlice( - o->shape(), o, dot, offsets)); - } - - // ++i - i = body_b.AddInstruction(HloInstruction::CreateBinary( - i->shape(), HloOpcode::kAdd, i, - body_b.AddInstruction( - HloInstruction::CreateConstant(LiteralUtil::CreateR0(1))))); - auto has_more = body_b.AddInstruction(HloInstruction::CreateCompare( - ShapeUtil::MakeShape(PRED, {}), i, - body_b.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR0(num_partitions_))), - ComparisonDirection::kLt)); - // Collective-permute for the next window. We don't need it for the last - // iteration, so we use a conditional around the collective-permute. - HloInstruction* conditional; - { - SpmdBuilder cp_b("window_collective_permute", visiting_hlo_); - { - auto p = cp_b.AddInstruction(HloInstruction::CreateParameter( - 0, windowing_operand == 0 ? l->shape() : r->shape(), "window")); - std::vector> sd_pairs(num_partitions_); - for (int64 source = 0; source < num_partitions_; ++source) { - // 0 -> n-1, 1 -> 0, 2 -> 1, ... - sd_pairs[source] = {source, - (source - 1 + num_partitions_) % num_partitions_}; - } - collective_ops_creator_.create_cross_partition_collective_permute( - &cp_b, p, sd_pairs, (*next_channel_id_)++); - } - SpmdBuilder ncp_b("last_iteration_noop", visiting_hlo_); - { - ncp_b.AddInstruction(HloInstruction::CreateParameter( - 0, windowing_operand == 0 ? l->shape() : r->shape(), "window")); - } - conditional = body_b.AddInstruction(HloInstruction::CreateConditional( - windowing_operand == 0 ? l->shape() : r->shape(), has_more, - windowing_operand == 0 ? l : r, - module_->AddEmbeddedComputation(cp_b.Build()), - windowing_operand == 0 ? l : r, - module_->AddEmbeddedComputation(ncp_b.Build()))); - } - if (windowing_operand == 0) { - l = conditional; - } else { - r = conditional; - } - body_b.AddInstruction(HloInstruction::CreateTuple({l, r, o, i})); - - SpmdBuilder cond_b("windowed_dot_general_cond", visiting_hlo_); - auto cond_param = cond_b.AddInstruction(HloInstruction::CreateParameter( - /*parameter_number=*/0, - ShapeUtil::MakeTupleShape({lhs.hlo()->shape(), rhs.hlo()->shape(), - result_buffer->shape(), iteration->shape()}), - "param")); - auto cond_i = cond_b.AddInstruction(HloInstruction::CreateGetTupleElement( - iteration->shape(), cond_param, 3)); - cond_b.AddInstruction(HloInstruction::CreateCompare( - ShapeUtil::MakeShape(PRED, {}), cond_i, - cond_b.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR0(num_partitions_))), - ComparisonDirection::kLt)); - auto while_loop = b_.AddInstruction(HloInstruction::CreateWhile( - cond_param->shape(), module_->AddEmbeddedComputation(cond_b.Build()), - module_->AddEmbeddedComputation(body_b.Build()), - b_.AddInstruction(HloInstruction::CreateTuple( - {lhs.hlo(), rhs.hlo(), result_buffer, iteration})))); - windowed_dot_general_loops_.push_back({while_loop, windowing_operand, - windowed_at_contracting_dims, - windowed_at_batch_dims}); - SetPartitionedHlo(hlo, [&] { - auto result = b_.AddInstruction(HloInstruction::CreateGetTupleElement( - result_buffer->shape(), while_loop, 2)); - if (!ShapeUtil::Compatible(padded_result_buffer_shape, - unpadded_result_buffer_shape)) { - result = b_.AddInstruction(HloInstruction::CreateSlice( - unpadded_result_buffer_shape, result, - std::vector(padded_result_buffer_shape.rank(), 0), - unpadded_result_buffer_shape.dimensions(), - std::vector(padded_result_buffer_shape.rank(), 1))); - } - return result; - }); - return Status::OK(); - }; - if (output_lhs_non_contracting_partitions == num_partitions_ && - output_sharding_transposed_to_match_lhs == lhs_sharding && - ShapeSizeInBytes(hlo->operand(1)->shape()) >= - options_.threshold_for_windowed_einsum_mib * 1024 * 1024) { - if (rhs_contracting_partitions == num_partitions_) { - return emit_windowed_dot_general(0, 1, true, false); - } - if (rhs_non_contracting_partitions == num_partitions_) { - return emit_windowed_dot_general(0, 1, false, false); - } - if (rhs_batch_partitions == num_partitions_) { - return emit_windowed_dot_general(0, 1, false, true); - } - } - if (output_rhs_non_contracting_partitions == num_partitions_ && - output_sharding_transposed_to_match_rhs == rhs_sharding && - ShapeSizeInBytes(hlo->operand(0)->shape()) >= - options_.threshold_for_windowed_einsum_mib * 1024 * 1024) { - if (lhs_contracting_partitions == num_partitions_) { - return emit_windowed_dot_general(1, 0, true, false); - } - if (lhs_non_contracting_partitions == num_partitions_) { - return emit_windowed_dot_general(1, 0, false, false); - } - if (lhs_batch_partitions == num_partitions_) { - return emit_windowed_dot_general(1, 0, false, true); - } - } - - { - // Try batch-parallel by resharding one operand, and allowing all-reduce. - TF_ASSIGN_OR_RETURN( - bool emitted, - try_emit_output_batch_partitioned_einsum_with_reshard(true)); - if (emitted) { - return Status::OK(); - } - } - - // LHS and RHS have the same partitioned contracting dimensions. - if (lhs_contracting_partitions == rhs_contracting_partitions && - lhs_contracting_partitions == num_partitions_) { - auto zero = b_.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(hlo->shape().element_type()))); - // Pad both sides with zero, since NaN at one side cannot be masked by zero - // on the other side. - if (ShapeSizeInBytes(lhs.base_shape()) < - ShapeSizeInBytes(rhs.base_shape())) { - lhs = - lhs.Reshard(*rhs_sharding_transposed_to_match_lhs).PadWithValue(zero); - rhs = rhs.PadWithValue(zero); - } else { - lhs = lhs.PadWithValue(zero); - rhs = - rhs.Reshard(*lhs_sharding_transposed_to_match_rhs).PadWithValue(zero); - } - TF_ASSIGN_OR_RETURN(auto dot, - create_sharded_dot(lhs.hlo(), rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { - auto ar = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, dot, MakeBinaryAdd(hlo->shape().element_type(), module_), - NewChannel()); - ar->set_sharding(HloSharding::Replicate()); - return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()) - .Reshard(hlo->sharding()) - .hlo(); - }); - return Status::OK(); - } - - // LHS and output have the same partitioned non-contracting dimensions. - if (lhs_non_contracting_partitions == num_partitions_ && - output_lhs_non_contracting_partitions == num_partitions_ && - lhs_sharding == hlo->sharding()) { - auto rhs_replicated = rhs.Reshard(HloSharding::Replicate()).hlo(); - TF_ASSIGN_OR_RETURN(auto dot, - create_sharded_dot(lhs.hlo(), rhs_replicated, &b_)); - SetPartitionedHlo(hlo, [&] { return dot; }); - return Status::OK(); - } - - // RHS and output have the same partitioned non-contracting dimensions. - if (rhs_non_contracting_partitions == num_partitions_ && - output_rhs_non_contracting_partitions == num_partitions_ && - rhs_sharding_transposed_to_match_output == hlo->sharding()) { - auto lhs_replicated = lhs.Reshard(HloSharding::Replicate()).hlo(); - TF_ASSIGN_OR_RETURN(auto dot, - create_sharded_dot(lhs_replicated, rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { return dot; }); - return Status::OK(); - } - - // Output is batch partitioned. - if (output_batch_partitions == num_partitions_) { - auto resharded_lhs = lhs.Reshard(*output_sharding_transposed_to_match_lhs); - auto resharded_rhs = rhs.Reshard(*output_sharding_transposed_to_match_rhs); - TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(resharded_lhs.hlo(), - resharded_rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { return dot; }); - return Status::OK(); - } - // Output is partitioned along LHS non-contracting dimensions. - if (output_lhs_non_contracting_partitions == num_partitions_) { - auto resharded_lhs = lhs.Reshard(*output_sharding_transposed_to_match_lhs); - auto replicated_rhs = rhs.Reshard(HloSharding::Replicate()); - TF_ASSIGN_OR_RETURN( - auto dot, - create_sharded_dot(resharded_lhs.hlo(), replicated_rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { return dot; }); - return Status::OK(); - } - // Output is partitioned along RHS non-contracting dimensions. - if (output_rhs_non_contracting_partitions == num_partitions_) { - auto replicated_lhs = lhs.Reshard(HloSharding::Replicate()); - auto resharded_rhs = rhs.Reshard(*output_sharding_transposed_to_match_rhs); - TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(replicated_lhs.hlo(), - resharded_rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { return dot; }); - return Status::OK(); - } - - // Returns true if it is beneficial to reshard the operand at `operand_idx` - // across the contracting dimension. - const auto should_partition_contracting_dim = [&](int64 operand_idx) { - if (!hlo->sharding().IsReplicated()) { - return false; - } - - if (operand_idx == 0) { - // If LHS and output are replicated, we compare the cost of all-gather - // on RHS vs all-reduce on the output. - return (rhs_contracting_partitions == num_partitions_) && - lhs.sharding().IsReplicated() && - ShapeUtil::ElementsIn(hlo->operand(1)->shape()) > - ShapeUtil::ElementsIn(hlo->shape()); - } else { - return (lhs_contracting_partitions == num_partitions_) && - rhs.sharding().IsReplicated() && - ShapeUtil::ElementsIn(hlo->operand(0)->shape()) > - ShapeUtil::ElementsIn(hlo->shape()); - } - }; - - // When the output is replicated and one of the operands is partitioned along - // contracting dimension, align the other operand to be partitioned along - // the contracting dimensions. - if (hlo->sharding().IsReplicated() && (should_partition_contracting_dim(0) || - should_partition_contracting_dim(1))) { - auto zero = b_.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(hlo->shape().element_type()))); - if (should_partition_contracting_dim(0)) { - lhs = - lhs.Reshard(*rhs_sharding_transposed_to_match_lhs).PadWithValue(zero); - rhs = rhs.PadWithValue(zero); - } else { - lhs = lhs.PadWithValue(zero); - rhs = - rhs.Reshard(*lhs_sharding_transposed_to_match_rhs).PadWithValue(zero); - } - TF_ASSIGN_OR_RETURN(auto dot, - create_sharded_dot(lhs.hlo(), rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { - auto ar = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, dot, MakeBinaryAdd(hlo->shape().element_type(), module_), - NewChannel()); - ar->set_sharding(HloSharding::Replicate()); - return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()).hlo(); - }); - return Status::OK(); - } - - return DefaultAction(hlo); -} - -namespace { - -// Finds a cluster of nodes that produce the inputs for `hlo` which only depend -// on small operands, which means the cluster should start with broadcasts, -// constants and iotas. All other internal nodes must be non-side-effecting -// elemntwise ops. Returns the set of nodes, and the small operands. E.g., for -// the following graph, -// -// a -> broadcast -> multiply -// iota ---> add--/ -// constant/ -// -// FindInputNodesIfOnlyDependOnSmallOperands(multiply) will return -// <{broadcast, iota, constant, add, multiply}, [a]>. -std::pair, std::vector> -FindInputNodesIfOnlyDependOnSmallOperands(HloInstruction* hlo) { - std::unordered_set nodes_found; - std::vector new_operands; - std::unordered_set new_operands_set; - std::vector worklist; - worklist.push_back(hlo); - while (!worklist.empty()) { - auto inst = worklist.back(); - worklist.pop_back(); - if (nodes_found.count(inst) > 0) { - continue; - } - if (inst->opcode() == HloOpcode::kBroadcast || - inst->opcode() == HloOpcode::kConstant || - inst->opcode() == HloOpcode::kIota) { - nodes_found.insert(inst); - for (auto o : inst->operands()) { - auto res = new_operands_set.emplace(o); - if (res.second) { - new_operands.push_back(o); - } - } - } else if (inst->IsElementwise() && !inst->HasSideEffectNoRecurse() && - inst->opcode() != HloOpcode::kAllReduce && - absl::c_all_of(inst->operands(), - [inst](const HloInstruction* o) { - return ShapeUtil::CompatibleIgnoringElementType( - o->shape(), inst->shape()); - })) { - nodes_found.insert(inst); - for (auto o : inst->operands()) { - worklist.push_back(o); - } - } else { - nodes_found.clear(); - new_operands.clear(); - break; - } - } - return {std::move(nodes_found), std::move(new_operands)}; -} - -// Moves a cluster of memory-reducing nodes into the windowed dot-general loop -// on contracting dimensions. Such a loop has a dynamic slice on the -// non-windowed operand. If we move the input nodes into the loop, the -// dynamic-slice could be merged with them by later optimization passes, which -// reduces memory. -// -// small_operands small_operands -// | | -// input_nodes loop { | -// | => input_nodes -// loop { | | -// dynamic-slice dynamic-slice -// ... ... -// } } -// -// Later optimization passes (TpuPadSliceMover) will merge the dynamic slice -// with the input nodes. -Status SinkInputNodesIntoWindowedDotGeneralLoopOnContractingDimensions( - HloInstruction* loop, int64 non_windowed_operand_index) { - auto input_tuple = loop->mutable_operand(0); - auto old_operand = input_tuple->mutable_operand(non_windowed_operand_index); - auto input_nodes = FindInputNodesIfOnlyDependOnSmallOperands(old_operand); - auto to_sink = std::move(input_nodes.first); - auto new_operands = std::move(input_nodes.second); - if (to_sink.empty()) { - return Status::OK(); - } - auto computation = loop->parent(); - // Replace the old operand with a tuple of the found small operands. - auto new_input_subtuple = - computation->AddInstruction(HloInstruction::CreateTuple(new_operands)); - TF_RETURN_IF_ERROR(input_tuple->ReplaceOperandWithDifferentShape( - non_windowed_operand_index, new_input_subtuple)); - - auto body = loop->while_body(); - auto body_param = body->parameter_instruction(0); - auto old_body_param_users = body_param->users(); - // Update all tuple shapes. - for (auto tuple : std::vector{ - input_tuple, loop, loop->while_condition()->parameter_instruction(0), - body_param, body->root_instruction()}) { - *ShapeUtil::GetMutableSubshape(tuple->mutable_shape(), - {non_windowed_operand_index}) = - new_input_subtuple->shape(); - } - // Now update the loop body. - auto new_operand_tuple_inside = - body->AddInstruction(HloInstruction::CreateGetTupleElement( - new_input_subtuple->shape(), body_param, non_windowed_operand_index)); - TF_RETURN_IF_ERROR(body->root_instruction()->ReplaceOperandWithDifferentShape( - non_windowed_operand_index, new_operand_tuple_inside)); - - // Create nodes inside the loop body. - std::vector worklist; - std::unordered_map outside_to_inside; - auto add_users_if_available = [&](HloInstruction* inst) { - for (auto u : inst->users()) { - if (outside_to_inside.count(u) == 0 && to_sink.count(u) > 0 && - absl::c_all_of(u->operands(), [&](const HloInstruction* o) { - return outside_to_inside.count(o) > 0; - })) { - worklist.push_back(u); - } - } - }; - for (int64 i = 0; i < new_operands.size(); ++i) { - outside_to_inside[new_operands[i]] = - body->AddInstruction(HloInstruction::CreateGetTupleElement( - new_operands[i]->shape(), new_operand_tuple_inside, i)); - add_users_if_available(new_operands[i]); - } - // HLOs to sink without operands. - std::vector nullaries_to_sink; - for (auto inst : to_sink) { - if (inst->operand_count() == 0) { - nullaries_to_sink.push_back(inst); - } - } - // Sort nullaries_to_sink to make it deterministic. - absl::c_sort(nullaries_to_sink, - [](const HloInstruction* a, const HloInstruction* b) { - return a->unique_id() < b->unique_id(); - }); - for (auto inst : nullaries_to_sink) { - worklist.push_back(inst); - } - while (!worklist.empty()) { - auto inst = worklist.back(); - worklist.pop_back(); - std::vector inst_new_operands(inst->operand_count()); - for (int64 i = 0; i < inst->operand_count(); ++i) { - inst_new_operands[i] = outside_to_inside[inst->operand(i)]; - } - outside_to_inside[inst] = body->AddInstruction( - inst->CloneWithNewOperands(inst->shape(), inst_new_operands)); - add_users_if_available(inst); - } - TF_RET_CHECK(outside_to_inside.count(old_operand) > 0); - for (auto ou : old_body_param_users) { - if (ou->opcode() == HloOpcode::kGetTupleElement && - ou->tuple_index() == non_windowed_operand_index) { - TF_RETURN_IF_ERROR( - ou->ReplaceAllUsesWith(outside_to_inside[old_operand])); - TF_RETURN_IF_ERROR(body->RemoveInstruction(ou)); - } - } - return Status::OK(); -} - -// Moves a cluster of memory-reducing nodes (with reduce nodes at the end) into -// the windowed dot-general loop on non-contracting dimensions. Such a loop has -// a dynamic-update-slice at the output. If we move the user nodes into the loop -// and before the dynamic-update-slice, the user nodes can operate on smaller -// shapes, which reduces memory. -// -// small_operands small_operands -// | | => | | -// | | loop { loop { | | -// | | conv | broadcast conv -// | | | | | / -// | | dynamic-update-slice | dynamic-slice / -// | | | | | / -// | | } | | multiply----- -// |broadcast / | / -// | | / reduce -// |multiply-- | -// \ | dynamic-update-slice -// reduce } -// -// Later optimization passes (TpuPadSliceMover) will merge the dynamic slice -// with the input nodes (broadcast). -Status MoveUsersIntoWindowedDotGeneralLoopOnNonContractingDimensions( - HloInstruction* loop) { - CHECK_EQ(loop->user_count(), 1); - // There should be a single direct user of the while loop, which is the - // gte for element 2, i.e., the dot output. - auto user_gte = loop->users().front(); - CHECK_EQ(user_gte->opcode(), HloOpcode::kGetTupleElement); - CHECK_EQ(user_gte->tuple_index(), 2); - auto computation = loop->parent(); - - // Find the reduce outputs and the input nodes they depend on, if input nodes - // only have small operands. - std::unordered_set to_move; - std::vector new_operands; - std::unordered_set new_operands_set; - std::vector reduce_outputs; - std::vector worklist; - Shape padded_shape = user_gte->shape(); - Shape unpadded_shape = user_gte->shape(); - auto original_output = user_gte; - - if (user_gte->user_count() == 1 && - user_gte->users().back()->opcode() == HloOpcode::kSlice) { - original_output = user_gte->users().back(); - unpadded_shape = original_output->shape(); - } - for (auto u : original_output->users()) { - worklist.push_back(u); - } - to_move.insert(original_output); - while (!worklist.empty()) { - auto inst = worklist.back(); - worklist.pop_back(); - if (to_move.count(inst) > 0) { - continue; - } - // We only support reduces with simple reduction function, since we may need - // to accumulate across iterations manually. - if (inst->opcode() == HloOpcode::kReduce && - inst->to_apply()->instruction_count() == 3 && - inst->to_apply()->num_parameters() == 2 && - inst->to_apply()->root_instruction()->IsElementwise()) { - to_move.insert(inst); - auto other_operand = inst->mutable_operand(1); - auto res = new_operands_set.emplace(other_operand); - if (res.second) { - new_operands.push_back(other_operand); - } - reduce_outputs.push_back(inst); - } else if (inst != computation->root_instruction() && - inst->user_count() > 0 && inst->IsElementwise() && - !inst->HasSideEffectNoRecurse() && - inst->opcode() != HloOpcode::kAllReduce && - absl::c_all_of(inst->operands(), - [inst](const HloInstruction* o) { - return ShapeUtil::CompatibleIgnoringElementType( - o->shape(), inst->shape()); - })) { - // For an elementwise op, we need to make sure that they depend on only - // nodes already in to_move and nodes with small operands. - bool can_include = true; - for (auto operand : inst->operands()) { - if (to_move.count(operand) > 0) { - continue; - } - auto find_result = FindInputNodesIfOnlyDependOnSmallOperands(operand); - if (find_result.first.empty()) { - can_include = false; - break; - } - for (auto n : find_result.first) { - to_move.insert(n); - } - for (auto new_operand : find_result.second) { - auto res = new_operands_set.insert(new_operand); - if (res.second) { - new_operands.push_back(new_operand); - } - } - } - if (!can_include) { - to_move.clear(); - break; - } - to_move.insert(inst); - for (auto u : inst->users()) { - worklist.push_back(u); - } - } else { - to_move.clear(); - break; - } - } - // If nothing is found, to_move could contain only original_output, or cleared - // by the above code. - if (to_move.size() <= 1) { - return Status::OK(); - } - - // We will replace the original loop output with reduce-shape outputs. Create - // the initial buffers before the loop. - for (auto out : reduce_outputs) { - auto padded_out_shape = out->shape(); - int64 operand_dim = 0; - int64 output_dim = 0; - while (output_dim < padded_out_shape.rank()) { - if (absl::c_linear_search(out->dimensions(), operand_dim)) { - // Dimension colapsed. - ++operand_dim; - continue; - } - // Kept dimensions have the same size of the padded shape. - padded_out_shape.set_dimensions(output_dim, - padded_shape.dimensions(operand_dim)); - ++operand_dim; - ++output_dim; - } - auto broadcast = - computation->AddInstruction(HloInstruction::CreateBroadcast( - padded_out_shape, - computation->AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(out->shape().element_type()))), - {})); - new_operands.push_back(broadcast); - } - - auto input_tuple = loop->mutable_operand(0); - // Create the new input subtuple that contains the small operands and the - // reduce-shape result buffers. - auto new_input_subtuple = - computation->AddInstruction(HloInstruction::CreateTuple(new_operands)); - TF_RETURN_IF_ERROR( - input_tuple->ReplaceOperandWithDifferentShape(2, new_input_subtuple)); - auto body = loop->while_body(); - auto body_param = body->parameter_instruction(0); - auto body_root = body->root_instruction(); - CHECK_EQ(body_root->opcode(), HloOpcode::kTuple); - // Update tuple shapes. - for (auto tuple : std::vector{ - input_tuple, loop, loop->while_condition()->parameter_instruction(0), - body_param, body_root}) { - *ShapeUtil::GetMutableSubshape(tuple->mutable_shape(), {2}) = - new_input_subtuple->shape(); - } - auto new_loop_input = - body->AddInstruction(HloInstruction::CreateGetTupleElement( - new_input_subtuple->shape(), body_param, 2)); - - // Now create the moved nodes inside the loop body. - std::unordered_map outside_to_inside; - worklist.clear(); - auto add_users_if_available = [&](HloInstruction* inst) { - for (auto u : inst->users()) { - if (outside_to_inside.count(u) == 0 && to_move.count(u) > 0 && - absl::c_all_of(u->operands(), [&](const HloInstruction* o) { - return outside_to_inside.count(o) > 0; - })) { - worklist.push_back(u); - } - } - }; - for (int64 i = 0; i < new_operands.size(); ++i) { - outside_to_inside[new_operands[i]] = - body->AddInstruction(HloInstruction::CreateGetTupleElement( - new_operands[i]->shape(), new_loop_input, i)); - add_users_if_available(new_operands[i]); - } - // The elementwise nodes will be created with sliced shape. The original loop - // output corresponds to the dynamic-update-slice's update slice. - auto dus = body_root->mutable_operand(2); - CHECK_EQ(dus->opcode(), HloOpcode::kDynamicUpdateSlice); - outside_to_inside[original_output] = dus->mutable_operand(1); - add_users_if_available(original_output); - std::vector slice_offsets(padded_shape.rank()); - for (int64 i = 0; i < slice_offsets.size(); ++i) { - slice_offsets[i] = dus->mutable_operand(i + 2); - } - auto get_slice = [&](HloInstruction* padded) { - return body->AddInstruction(HloInstruction::CreateDynamicSlice( - ShapeUtil::ChangeElementType(dus->operand(1)->shape(), - padded->shape().element_type()), - padded, slice_offsets, dus->operand(1)->shape().dimensions())); - }; - // Helper functions to create nodes with small operands. - auto add_broadcast = [&](const HloInstruction* broadcast) { - auto padded_operand_shape = broadcast->operand(0)->shape(); - for (int64 i = 0; i < broadcast->dimensions().size(); ++i) { - padded_operand_shape.set_dimensions( - i, padded_shape.dimensions(broadcast->dimensions(i))); - } - auto padded_operand = PadToShape(outside_to_inside[broadcast->operand(0)], - padded_operand_shape, nullptr, body); - outside_to_inside[broadcast] = - get_slice(body->AddInstruction(broadcast->CloneWithNewOperands( - ShapeUtil::ChangeElementType(padded_shape, - padded_operand_shape.element_type()), - {padded_operand}))); - }; - auto add_iota = [&](const HloInstruction* iota) { - outside_to_inside[iota] = - get_slice(body->AddInstruction(iota->CloneWithNewOperands( - ShapeUtil::ChangeElementType(padded_shape, - iota->shape().element_type()), - {}))); - }; - auto add_constant = [&](const HloInstruction* constant) { - outside_to_inside[constant] = body->AddInstruction(constant->Clone()); - outside_to_inside[constant] = get_slice( - PadToShape(outside_to_inside[constant], - ShapeUtil::ChangeElementType( - padded_shape, constant->shape().element_type()), - nullptr, body)); - }; - while (!worklist.empty()) { - auto inst = worklist.back(); - worklist.pop_back(); - if (outside_to_inside.count(inst) > 0) { - continue; - } - if (inst->opcode() == HloOpcode::kBroadcast) { - add_broadcast(inst); - } else if (inst->opcode() == HloOpcode::kIota) { - add_iota(inst); - } else if (inst->opcode() == HloOpcode::kConstant) { - add_constant(inst); - } else if (inst->opcode() == HloOpcode::kReduce) { - // This is an output, for which we has special handling later. - } else { - std::vector operands_inside(inst->operand_count()); - for (int64 i = 0; i < operands_inside.size(); ++i) { - operands_inside[i] = outside_to_inside[inst->operand(i)]; - } - outside_to_inside[inst] = body->AddInstruction(inst->CloneWithNewOperands( - ShapeUtil::ChangeElementType(dus->operand(1)->shape(), - inst->shape().element_type()), - operands_inside)); - } - add_users_if_available(inst); - } - std::vector new_outputs_inside(new_operands.size()); - for (int64 i = 0; i < new_outputs_inside.size(); ++i) { - new_outputs_inside[i] = outside_to_inside[new_operands[i]]; - } - // Now create the reduce outpus inside of the loop. - for (int64 i = 0; i < reduce_outputs.size(); ++i) { - auto reduce_outside = reduce_outputs[i]; - CHECK_EQ(reduce_outside->opcode(), HloOpcode::kReduce); - int64 index_in_operand = new_operands.size() - reduce_outputs.size() + i; - auto last_iter_result = outside_to_inside[new_operands[index_in_operand]]; - auto operand0 = outside_to_inside[reduce_outside->operand(0)]; - auto operand1 = outside_to_inside[reduce_outside->operand(1)]; - TF_ASSIGN_OR_RETURN(auto reduce_shape, - ShapeInference::InferReduceShape( - {&operand0->shape(), &operand1->shape()}, - reduce_outside->dimensions(), - reduce_outside->to_apply()->ComputeProgramShape())); - *reduce_shape.mutable_layout() = reduce_outside->shape().layout(); - std::vector reduce_dus_offsets; - // If any collapsed dimension is windowed, we need to accumulate with last - // iteration's result. If such a dimension has padding, we also need to mask - // off invalid data. - bool needs_accumulate = false; - std::vector dims_to_mask; - for (int64 i = 0; i < slice_offsets.size(); ++i) { - if (absl::c_linear_search(reduce_outside->dimensions(), i)) { - if (reduce_outside->operand(0)->shape().dimensions(i) != - operand0->shape().dimensions(i)) { - needs_accumulate = true; - if (unpadded_shape.dimensions(i) != padded_shape.dimensions(i)) { - dims_to_mask.push_back(i); - } - } - continue; - } - reduce_dus_offsets.push_back(slice_offsets[i]); - } - // Mask off invalid data in collapsed dimensions. - for (int64 dim : dims_to_mask) { - auto iota = body->AddInstruction(HloInstruction::CreateIota( - ShapeUtil::ChangeElementType(operand0->shape(), S32), dim)); - auto add = body->AddInstruction(HloInstruction::CreateBinary( - iota->shape(), HloOpcode::kAdd, iota, - body->AddInstruction(HloInstruction::CreateBroadcast( - iota->shape(), slice_offsets[dim], {})))); - auto limit = body->AddInstruction(HloInstruction::CreateBroadcast( - iota->shape(), - body->AddInstruction( - HloInstruction::CreateConstant(LiteralUtil::CreateR0( - reduce_outside->operand(0)->shape().dimensions(dim)))), - {})); - auto compare = body->AddInstruction(HloInstruction::CreateCompare( - ShapeUtil::ChangeElementType(iota->shape(), PRED), add, limit, - ComparisonDirection::kLt)); - operand0 = body->AddInstruction(HloInstruction::CreateTernary( - operand0->shape(), HloOpcode::kSelect, compare, operand0, - body->AddInstruction(HloInstruction::CreateBroadcast( - operand0->shape(), operand1, {})))); - } - auto output_inside = - body->AddInstruction(reduce_outside->CloneWithNewOperands( - reduce_shape, {operand0, operand1})); - // Accumulate with previous results if needed. - if (needs_accumulate) { - auto input_slice = - body->AddInstruction(HloInstruction::CreateDynamicSlice( - output_inside->shape(), last_iter_result, reduce_dus_offsets, - output_inside->shape().dimensions())); - output_inside = body->AddInstruction(HloInstruction::CreateBinary( - output_inside->shape(), - reduce_outside->to_apply()->root_instruction()->opcode(), - output_inside, input_slice)); - } - // Dynamic-update-slice if needed. - if (!ShapeUtil::Compatible(output_inside->shape(), - last_iter_result->shape())) { - output_inside = - body->AddInstruction(HloInstruction::CreateDynamicUpdateSlice( - last_iter_result->shape(), last_iter_result, output_inside, - reduce_dus_offsets)); - } - new_outputs_inside[index_in_operand] = output_inside; - } - // Body output. - auto new_output_inside = - body->AddInstruction(HloInstruction::CreateTuple(new_outputs_inside)); - TF_RETURN_IF_ERROR( - body_root->ReplaceOperandWithDifferentShape(2, new_output_inside)); - TF_RETURN_IF_ERROR(body->RemoveInstructionAndUnusedOperands(dus)); - // Replace uses of the reduces outside the loop. - auto new_output_gte = - computation->AddInstruction(HloInstruction::CreateGetTupleElement( - new_output_inside->shape(), loop, 2)); - for (int64 i = 0; i < reduce_outputs.size(); ++i) { - int64 index_in_operand = new_operands.size() - reduce_outputs.size() + i; - auto new_output = - computation->AddInstruction(HloInstruction::CreateGetTupleElement( - new_outputs_inside[index_in_operand]->shape(), new_output_gte, - index_in_operand)); - if (!ShapeUtil::Compatible(new_output->shape(), - reduce_outputs[i]->shape())) { - new_output = computation->AddInstruction(HloInstruction::CreateSlice( - reduce_outputs[i]->shape(), new_output, - std::vector(new_output->shape().rank(), 0), - reduce_outputs[i]->shape().dimensions(), - std::vector(new_output->shape().rank(), 1))); - } - TF_RETURN_IF_ERROR(reduce_outputs[i]->ReplaceAllUsesWith(new_output)); - TF_RETURN_IF_ERROR( - computation->RemoveInstructionAndUnusedOperands(reduce_outputs[i])); - } - return Status::OK(); -} - -} // namespace - -Status SpmdPartitioningVisitor::DoCodeMotionForWindowedDotGeneralLoops( - HloComputation* computation) { - for (auto& loop : windowed_dot_general_loops_) { - if (loop.windowed_in_contracting_dims || loop.windowed_in_batch_dims) { - // We have a dynamic-slice for the non-windowed operand in - // batch/contracting-dim windowed dot-general. So moving the - // broadcast/iota/elementwise ops into the loop could help reduce memory - // via fusion. - TF_RETURN_IF_ERROR( - SinkInputNodesIntoWindowedDotGeneralLoopOnContractingDimensions( - loop.while_loop, 1 - loop.windowed_operand)); - } - if (!loop.windowed_in_contracting_dims) { - // We have a dynamic-update-slice for the output in - // batch/non-contracting-dim windowed dot-general. So moving reduce ops - // into the loop could help reduce memory. - TF_RETURN_IF_ERROR( - MoveUsersIntoWindowedDotGeneralLoopOnNonContractingDimensions( - loop.while_loop)); - } - } - return Status::OK(); -} - StatusOr SpmdPartitioningVisitor::DoPartition( HloComputation* computation, const HloSharding& root_sharding) { VLOG(2) << "Partitioning computation " << computation->name() << " for " diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc index 7c4d816fd66..65f066db629 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc @@ -885,5 +885,46 @@ int64 ShardCountAtDim(const HloSharding& sharding, int64 dim) { return sharding.tile_assignment().dim(dim); } +absl::optional> GetReshardAllToAllSourceTargetDims( + const HloSharding& source, const HloSharding& target) { + if (source.IsTileMaximal() || target.IsTileMaximal() || + source.tile_assignment().num_dimensions() != + target.tile_assignment().num_dimensions()) { + return absl::nullopt; + } + int64 source_dim = -1; + int64 target_dim = -1; + for (int64 i = 0; i < source.tile_assignment().num_dimensions(); ++i) { + if (source.tile_assignment().dim(i) > 1 && + target.tile_assignment().dim(i) == 1) { + if (source_dim != -1) { + return absl::nullopt; + } + source_dim = i; + } else if (source.tile_assignment().dim(i) == 1 && + target.tile_assignment().dim(i) > 1) { + if (target_dim != -1) { + return absl::nullopt; + } + target_dim = i; + } else if (source.tile_assignment().dim(i) != + target.tile_assignment().dim(i)) { + return absl::nullopt; + } + } + if (source_dim == -1 || target_dim == -1 || source_dim == target_dim) { + return absl::nullopt; + } + return std::pair(source_dim, target_dim); +} + +bool CanReshardWithCollectivePermute(const HloSharding& source, + const HloSharding& target) { + return !source.IsTileMaximal() && !target.IsTileMaximal() && + source.tile_assignment().dimensions() == + target.tile_assignment().dimensions() && + source.tile_assignment() != target.tile_assignment(); +} + } // namespace spmd } // namespace xla diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h index 8389c2f666a..d924a5c7151 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h @@ -265,6 +265,15 @@ HloInstruction* SliceFirstK(HloInstruction* hlo, SpmdBuilder* builder, // Check if a dimension is sharded. int64 ShardCountAtDim(const HloSharding& sharding, int64 dim); +// Returns the pair of source and target dimensions is the resharding can be +// done via all-to-all. +absl::optional> GetReshardAllToAllSourceTargetDims( + const HloSharding& source, const HloSharding& target); + +// Returns whether the resharding can be done via collective-permute. +bool CanReshardWithCollectivePermute(const HloSharding& source, + const HloSharding& target); + } // namespace spmd } // namespace xla From f045dd855f6391304b5252905bad1a079032cfdf Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 21 Jul 2020 12:03:18 -0700 Subject: [PATCH 0951/2522] Do not recommend using the denylist to fix pip smoke test. Using denylist will fix the pip smoke test, but the test will fail during pip testing. PiperOrigin-RevId: 322411394 Change-Id: I708936382182c3f3d882c08bb707dda82da30a47 --- tensorflow/tools/pip_package/pip_smoke_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py index 40d2cff56b4..d2002b58598 100644 --- a/tensorflow/tools/pip_package/pip_smoke_test.py +++ b/tensorflow/tools/pip_package/pip_smoke_test.py @@ -174,8 +174,7 @@ def main(): raise RuntimeError(""" One or more added test dependencies are not in the pip package. If these test dependencies need to be in TensorFlow pip package, please add them to //tensorflow/tools/pip_package/BUILD. -Else either denylist the dependencies in //tensorflow/tools/pip_package/pip_smoke_test.py -or add no_pip tag to the test.""") +Else add no_pip tag to the test.""") else: print("TEST PASSED") From 459a4d3a527f37c9e53a47cd8df93e13a5b8f756 Mon Sep 17 00:00:00 2001 From: Yuefeng Zhou Date: Tue, 21 Jul 2020 12:21:58 -0700 Subject: [PATCH 0952/2522] Enable disabled tests. PiperOrigin-RevId: 322415558 Change-Id: I62cf1855341a611be2518b73dee961cf7503b8c5 --- tensorflow/python/keras/distribute/keras_optimizer_v2_test.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/python/keras/distribute/keras_optimizer_v2_test.py b/tensorflow/python/keras/distribute/keras_optimizer_v2_test.py index 35e355c093c..75c7ce833c5 100644 --- a/tensorflow/python/keras/distribute/keras_optimizer_v2_test.py +++ b/tensorflow/python/keras/distribute/keras_optimizer_v2_test.py @@ -52,7 +52,6 @@ class MirroredStrategyOptimizerV2Test(test.TestCase, parameterized.TestCase): ], mode=['graph', 'eager'])) def testKerasOptimizerWithUnequalInput(self, distribution): - self.skipTest('b/130309197') with distribution.scope(): var = variables.Variable( 2.0, name='var', aggregation=variable_scope.VariableAggregation.SUM) @@ -109,7 +108,6 @@ class MirroredStrategyOptimizerV2Test(test.TestCase, parameterized.TestCase): ], mode=['graph', 'eager'])) def testOptimizerWithKerasModelAndNumpyArrays(self, distribution): - self.skipTest('b/130309197') with self.cached_session(): with distribution.scope(): model = get_model() From e45d52e03932b6aca5ce8aac136b1b688fe2a47a Mon Sep 17 00:00:00 2001 From: Robert David Date: Tue, 21 Jul 2020 12:38:56 -0700 Subject: [PATCH 0953/2522] Change MaybeFuseActivation to only support a single output - that is its only use-case. PiperOrigin-RevId: 322419198 Change-Id: I4307683446795de77be4b0d3dc06396cfa3347c4 --- .../delegates/gpu/common/model_builder.cc | 74 +++++++------------ 1 file changed, 26 insertions(+), 48 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.cc b/tensorflow/lite/delegates/gpu/common/model_builder.cc index d09a8133770..d2fefe5f552 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc @@ -129,16 +129,15 @@ absl::Status IsActivationSupported(TfLiteFusedActivation fused_activation) { // that will have identical output as the given node. New operation node will // depend on the given node output. absl::Status MaybeFuseActivation(TfLiteFusedActivation fused_activation, - const std::vector& output_indices, GraphFloat32* graph, Node* node) { - if (fused_activation == kTfLiteActNone) { - return absl::OkStatus(); - } const auto outputs = graph->FindOutputs(node->id); - if (outputs.empty()) { - return absl::InternalError("Empty outputs in fused node"); + if (outputs.size() != 1) { + return absl::InternalError("Number of outputs != 1"); } switch (fused_activation) { + case kTfLiteActNone: + // Nothing to do here + return absl::OkStatus(); case kTfLiteActRelu: case kTfLiteActReluN1To1: case kTfLiteActRelu6: { @@ -146,36 +145,24 @@ absl::Status MaybeFuseActivation(TfLiteFusedActivation fused_activation, attr.clip = fused_activation == kTfLiteActRelu ? 0.0f : (fused_activation == kTfLiteActReluN1To1 ? 1.0f : 6.0f); - for (auto index : output_indices) { - Node* activation_node; - RETURN_IF_ERROR( - NewPassthroughNode(graph, node, outputs[index], &activation_node)); - activation_node->operation.type = ToString(OperationType::RELU); - activation_node->operation.attributes = attr; - } - break; + Node* activation_node; + RETURN_IF_ERROR( + NewPassthroughNode(graph, node, outputs[0], &activation_node)); + activation_node->operation.type = ToString(OperationType::RELU); + activation_node->operation.attributes = attr; + return absl::OkStatus(); + } + case kTfLiteActTanh: { + Node* activation_node; + RETURN_IF_ERROR( + NewPassthroughNode(graph, node, outputs[0], &activation_node)); + activation_node->operation.type = ToString(OperationType::TANH); + return absl::OkStatus(); } - case kTfLiteActTanh: - for (auto index : output_indices) { - Node* activation_node; - RETURN_IF_ERROR( - NewPassthroughNode(graph, node, outputs[index], &activation_node)); - activation_node->operation.type = ToString(OperationType::TANH); - } - break; default: return absl::NotFoundError( absl::StrCat("Unsupported fused activation: ", fused_activation)); } - return absl::OkStatus(); -} - -absl::Status MaybeFuseActivationToTheSingleOutput( - TfLiteFusedActivation fused_activation, GraphFloat32* graph, Node* node) { - if (graph->FindOutputs(node->id).size() != 1) { - return absl::InternalError("Number of outputs exceeds 1"); - } - return MaybeFuseActivation(fused_activation, {0}, graph, node); } HW ToHW(int32_t h, int32_t w) { return HW(h > 0 ? h : 1, w > 0 ? w : 1); } @@ -389,8 +376,7 @@ class AddOperationParser : public TFLiteOperationParser { node->operation.attributes = std::move(attr); const TfLiteAddParams* tf_options; RETURN_IF_ERROR(RetrieveBuiltinData(tflite_node, &tf_options)); - return MaybeFuseActivationToTheSingleOutput(tf_options->activation, graph, - node); + return MaybeFuseActivation(tf_options->activation, graph, node); } }; @@ -463,8 +449,7 @@ class ConcatenationOperationParser : public TFLiteOperationParser { } const TfLiteConcatenationParams* tf_options; RETURN_IF_ERROR(RetrieveBuiltinData(tflite_node, &tf_options)); - RETURN_IF_ERROR(MaybeFuseActivationToTheSingleOutput(tf_options->activation, - graph, node)); + RETURN_IF_ERROR(MaybeFuseActivation(tf_options->activation, graph, node)); node->operation.attributes = attr; return absl::OkStatus(); } @@ -566,8 +551,7 @@ class Conv2DOperationParser : public TFLiteOperationParser { tf_options->dilation_width_factor); UpdatePadding(tf_options->padding, graph->FindInputs(node->id)[0]->tensor.shape, &attr); - RETURN_IF_ERROR(MaybeFuseActivationToTheSingleOutput(tf_options->activation, - graph, node)); + RETURN_IF_ERROR(MaybeFuseActivation(tf_options->activation, graph, node)); node->operation.attributes = std::move(attr); return absl::OkStatus(); } @@ -684,8 +668,7 @@ class DepthwiseConvolutionOperationParser : public TFLiteOperationParser { std::max(1, tf_options->dilation_width_factor)); UpdatePadding(tf_options->padding, graph->FindInputs(node->id)[0]->tensor.shape, &attr); - RETURN_IF_ERROR(MaybeFuseActivationToTheSingleOutput(tf_options->activation, - graph, node)); + RETURN_IF_ERROR(MaybeFuseActivation(tf_options->activation, graph, node)); const int depth_multiplier = tf_options->depth_multiplier; if (depth_multiplier != 1) { const TfLiteTensor* input = reader->GetInputTensor(0); @@ -850,8 +833,7 @@ class ElementwiseOperationParser : public TFLiteOperationParser { } if (activation) { - RETURN_IF_ERROR( - MaybeFuseActivationToTheSingleOutput(activation, graph, node)); + RETURN_IF_ERROR(MaybeFuseActivation(activation, graph, node)); } } else if (IsTwoArgumentOperationWithConst()) { RETURN_IF_ERROR(reader->VerifyInputsConstsOutputs(tflite_node, @@ -997,8 +979,7 @@ class FullyConnectedOperationParser : public TFLiteOperationParser { conv->operation.type = ToString(OperationType::FULLY_CONNECTED); conv->operation.attributes = std::move(attr); absl::Status result = reader->AddOutputs(conv); - RETURN_IF_ERROR(MaybeFuseActivationToTheSingleOutput(tf_options->activation, - graph, conv)); + RETURN_IF_ERROR(MaybeFuseActivation(tf_options->activation, graph, conv)); return result; } @@ -1252,8 +1233,7 @@ class MulOperationParser : public TFLiteOperationParser { const TfLiteMulParams* tf_options; RETURN_IF_ERROR(RetrieveBuiltinData(tflite_node, &tf_options)); - return MaybeFuseActivationToTheSingleOutput(tf_options->activation, graph, - node); + return MaybeFuseActivation(tf_options->activation, graph, node); } private: @@ -1454,9 +1434,7 @@ class Pooling2DOperationParser : public TFLiteOperationParser { RETURN_IF_ERROR(RetrieveBuiltinData(tflite_node, &tf_options)); } - std::vector max_tensor_id{0}; - RETURN_IF_ERROR(MaybeFuseActivation(tf_options->activation, max_tensor_id, - graph, node)); + RETURN_IF_ERROR(MaybeFuseActivation(tf_options->activation, graph, node)); // Second output is optional. It is not required, it but must be added after // MaybeAddFusedActivation function is called reader->AddOutput(node, 1).IgnoreError(); From 2623d9a80992128db89a536248a74679bfbe7517 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Tue, 21 Jul 2020 19:54:07 +0000 Subject: [PATCH 0954/2522] added TF_GetName and tests for the funciton --- tensorflow/c/kernels.cc | 10 +++++++++- tensorflow/c/kernels.h | 17 +++++++++++++++++ tensorflow/c/kernels_test.cc | 16 ++++++++++++++-- 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc index 8fa50711a8d..42aa2bce54a 100644 --- a/tensorflow/c/kernels.cc +++ b/tensorflow/c/kernels.cc @@ -239,6 +239,14 @@ void TF_OpKernelContext_Failure(TF_OpKernelContext* ctx, TF_Status* status) { DEFINE_TF_GETATTR(Type, TF_DataType, tensorflow::DataType) DEFINE_TF_GETATTR(Int32, tensorflow::int32, int32_t) +string_view TF_OpKernelConstruction_GetName(TF_OpKernelConstruction* ctx) { + auto* cc_ctx = reinterpret_cast(ctx); + string_view string_view_of_name = {.data = cc_ctx->def().name().data(), + .len = cc_ctx->def().name().length()}; + return string_view_of_name; +} + + TF_DataType TF_ExpectedOutputDataType(TF_OpKernelContext* ctx, int i) { auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelContext*>(ctx); return static_cast(cc_ctx->expected_output_dtype(i)); @@ -271,4 +279,4 @@ TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int index, return nullptr; } return tf_tensor; -} +} \ No newline at end of file diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h index 1428f7ab928..e146e896485 100644 --- a/tensorflow/c/kernels.h +++ b/tensorflow/c/kernels.h @@ -111,6 +111,10 @@ TF_CAPI_EXPORT extern void TF_KernelBuilder_HostMemory( TF_CAPI_EXPORT extern void TF_KernelBuilder_Priority( TF_KernelBuilder* kernel_builder, int32_t priority_number); +typedef struct string_view string_view; + +TF_CAPI_EXPORT extern string_view TF_GetName(TF_KernelBuilder* kernel_builder); + // Register the given kernel builder with the TensorFlow runtime. If // registration fails, the given status will be populated. // @@ -184,6 +188,19 @@ TF_CAPI_EXPORT extern void TF_OpKernelConstruction_GetAttrInt32( TF_OpKernelConstruction* ctx, const char* attr_name, int32_t* val, TF_Status* status); +// Used to pass strings across the C API. The caller does not take ownership +// of the underlying data pointer and is not responsible for freeing it. +struct string_view { + const char* data; + size_t len; +}; + +typedef struct string_view string_view; + +// Returns the name of the user-defined NodeDef for this OpKernel. +TF_CAPI_EXPORT extern string_view TF_OpKernelConstruction_GetName( + TF_OpKernelConstruction* ctx); + // Allocates Tensor for output at given index. Caller takes ownership of // returned TF_Tensor and should deallocate it using TF_DeleteTensor(tensor). // diff --git a/tensorflow/c/kernels_test.cc b/tensorflow/c/kernels_test.cc index 423302741de..ddbb86d2f31 100644 --- a/tensorflow/c/kernels_test.cc +++ b/tensorflow/c/kernels_test.cc @@ -53,6 +53,7 @@ limitations under the License. #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/types.h" +#include struct MyCustomKernel { bool created; bool compute_called; @@ -73,6 +74,12 @@ static void* MyCreateFunc(TF_OpKernelConstruction* ctx) { EXPECT_EQ(TF_FLOAT, type); TF_DeleteStatus(status); + // Exercise kernel NodeDef name read + string_view name_string_view = TF_OpKernelConstruction_GetName(ctx); + const char* kernel_name = "SomeKernelName"; + const char* candidate_kernel_name = std::string(name_string_view.data, + name_string_view.len).c_str(); + EXPECT_EQ(0, strcmp(kernel_name, candidate_kernel_name)); return s; } @@ -96,9 +103,11 @@ namespace tensorflow { static std::unique_ptr GetFakeKernel(const char* device_name, const char* op_name, + const char* kernel_name, Status* status) { NodeDef def; def.set_op(op_name); + def.set_name(kernel_name); def.set_device(device_name); def.add_input("input1"); def.add_input("input2"); @@ -144,7 +153,7 @@ TEST(TestKernel, TestRegisterKernelBuilder) { { Status status; std::unique_ptr kernel = - GetFakeKernel(device_name, op_name, &status); + GetFakeKernel(device_name, op_name, kernel_name, &status); TF_EXPECT_OK(status); ASSERT_NE(nullptr, kernel.get()); kernel->Compute(nullptr); @@ -153,6 +162,9 @@ TEST(TestKernel, TestRegisterKernelBuilder) { ASSERT_TRUE(delete_called); } +TEST(TestKernel, TestGetKernelName) { +} + class DummyDevice : public DeviceBase { public: explicit DummyDevice(Env* env) : DeviceBase(env) {} @@ -233,7 +245,7 @@ TEST(TestKernel, TestInputAndOutputCount) { Status status; std::unique_ptr kernel = - GetFakeKernel(device_name, op_name, &status); + GetFakeKernel(device_name, op_name, kernel_name, &status); TF_EXPECT_OK(status); ASSERT_NE(nullptr, kernel.get()); From e5a84bd0caf5b5e2e45f9fe47b16e29f844de3a7 Mon Sep 17 00:00:00 2001 From: Robert David Date: Tue, 21 Jul 2020 12:49:19 -0700 Subject: [PATCH 0955/2522] Cleanup LSTM tests: Remove explicit TensorData constructor calls, just pass initializer list instead. PiperOrigin-RevId: 322421391 Change-Id: Iaf667514ce1cd7773ab46bc1a3a521f277708969 --- tensorflow/lite/kernels/lstm_test.cc | 61 ++++++++++++---------------- 1 file changed, 25 insertions(+), 36 deletions(-) diff --git a/tensorflow/lite/kernels/lstm_test.cc b/tensorflow/lite/kernels/lstm_test.cc index cad57461f30..24682de46ad 100644 --- a/tensorflow/lite/kernels/lstm_test.cc +++ b/tensorflow/lite/kernels/lstm_test.cc @@ -46,43 +46,35 @@ class LSTMOpModel : public SingleOpModel { n_cell_(n_cell), n_output_(n_output), weight_type_(weight_type) { - input_ = AddInput(TensorData{TensorType_FLOAT32, {n_batch, n_input}}); + input_ = AddInput({TensorType_FLOAT32, {n_batch, n_input}}); if (use_cifg) { input_to_input_weights_ = AddNullInput(); } else { - input_to_input_weights_ = - AddInput(TensorData{weight_type, {n_cell, n_input}}); + input_to_input_weights_ = AddInput({weight_type, {n_cell, n_input}}); } - input_to_forget_weights_ = - AddInput(TensorData{weight_type, {n_cell, n_input}}); - input_to_cell_weights_ = - AddInput(TensorData{weight_type, {n_cell, n_input}}); - input_to_output_weights_ = - AddInput(TensorData{weight_type, {n_cell, n_input}}); + input_to_forget_weights_ = AddInput({weight_type, {n_cell, n_input}}); + input_to_cell_weights_ = AddInput({weight_type, {n_cell, n_input}}); + input_to_output_weights_ = AddInput({weight_type, {n_cell, n_input}}); if (use_cifg) { recurrent_to_input_weights_ = AddNullInput(); } else { - recurrent_to_input_weights_ = - AddInput(TensorData{weight_type, {n_cell, n_output}}); + recurrent_to_input_weights_ = AddInput({weight_type, {n_cell, n_output}}); } - recurrent_to_forget_weights_ = - AddInput(TensorData{weight_type, {n_cell, n_output}}); - recurrent_to_cell_weights_ = - AddInput(TensorData{weight_type, {n_cell, n_output}}); - recurrent_to_output_weights_ = - AddInput(TensorData{weight_type, {n_cell, n_output}}); + recurrent_to_forget_weights_ = AddInput({weight_type, {n_cell, n_output}}); + recurrent_to_cell_weights_ = AddInput({weight_type, {n_cell, n_output}}); + recurrent_to_output_weights_ = AddInput({weight_type, {n_cell, n_output}}); if (use_peephole) { if (use_cifg) { cell_to_input_weights_ = AddNullInput(); } else { - cell_to_input_weights_ = AddInput(TensorData{weight_type, {n_cell}}); + cell_to_input_weights_ = AddInput({weight_type, {n_cell}}); } - cell_to_forget_weights_ = AddInput(TensorData{weight_type, {n_cell}}); - cell_to_output_weights_ = AddInput(TensorData{weight_type, {n_cell}}); + cell_to_forget_weights_ = AddInput({weight_type, {n_cell}}); + cell_to_output_weights_ = AddInput({weight_type, {n_cell}}); } else { cell_to_input_weights_ = AddNullInput(); cell_to_forget_weights_ = AddNullInput(); @@ -92,17 +84,16 @@ class LSTMOpModel : public SingleOpModel { if (use_cifg) { input_gate_bias_ = AddNullInput(); } else { - input_gate_bias_ = AddInput(TensorData{TensorType_FLOAT32, {n_cell}}); + input_gate_bias_ = AddInput({TensorType_FLOAT32, {n_cell}}); } - forget_gate_bias_ = AddInput(TensorData{TensorType_FLOAT32, {n_cell}}); - cell_gate_bias_ = AddInput(TensorData{TensorType_FLOAT32, {n_cell}}); - output_gate_bias_ = AddInput(TensorData{TensorType_FLOAT32, {n_cell}}); + forget_gate_bias_ = AddInput({TensorType_FLOAT32, {n_cell}}); + cell_gate_bias_ = AddInput({TensorType_FLOAT32, {n_cell}}); + output_gate_bias_ = AddInput({TensorType_FLOAT32, {n_cell}}); if (use_projection_weights) { - projection_weights_ = - AddInput(TensorData{weight_type, {n_output, n_cell}}); + projection_weights_ = AddInput({weight_type, {n_output, n_cell}}); if (use_projection_bias) { - projection_bias_ = AddInput(TensorData{TensorType_FLOAT32, {n_output}}); + projection_bias_ = AddInput({TensorType_FLOAT32, {n_output}}); } else { projection_bias_ = AddNullInput(); } @@ -112,10 +103,8 @@ class LSTMOpModel : public SingleOpModel { } // Adding the 2 state tensors. - output_state_ = - AddInput(TensorData{TensorType_FLOAT32, {n_batch, n_output}}, true); - cell_state_ = - AddInput(TensorData{TensorType_FLOAT32, {n_batch, n_cell}}, true); + output_state_ = AddInput({TensorType_FLOAT32, {n_batch, n_output}}, true); + cell_state_ = AddInput({TensorType_FLOAT32, {n_batch, n_cell}}, true); // Layer norm weights. if (!model_has_legacy_20_inputs) { @@ -123,21 +112,21 @@ class LSTMOpModel : public SingleOpModel { input_layer_norm_coefficients_ = AddNullInput(); } else { input_layer_norm_coefficients_ = - is_layer_norm ? AddInput(TensorData{TensorType_FLOAT32, {n_cell}}) + is_layer_norm ? AddInput({TensorType_FLOAT32, {n_cell}}) : AddNullInput(); } forget_layer_norm_coefficients_ = - is_layer_norm ? AddInput(TensorData{TensorType_FLOAT32, {n_cell}}) + is_layer_norm ? AddInput({TensorType_FLOAT32, {n_cell}}) : AddNullInput(); cell_layer_norm_coefficients_ = - is_layer_norm ? AddInput(TensorData{TensorType_FLOAT32, {n_cell}}) + is_layer_norm ? AddInput({TensorType_FLOAT32, {n_cell}}) : AddNullInput(); output_layer_norm_coefficients_ = - is_layer_norm ? AddInput(TensorData{TensorType_FLOAT32, {n_cell}}) + is_layer_norm ? AddInput({TensorType_FLOAT32, {n_cell}}) : AddNullInput(); } - output_ = AddOutput(TensorData{TensorType_FLOAT32, {n_output}}); + output_ = AddOutput({TensorType_FLOAT32, {n_output}}); SetBuiltinOp( BuiltinOperator_LSTM, BuiltinOptions_LSTMOptions, From 6c42f4aae394636898491194a56b8bc7564adb04 Mon Sep 17 00:00:00 2001 From: Karim Nosir Date: Tue, 21 Jul 2020 12:52:53 -0700 Subject: [PATCH 0956/2522] Add a pass to raise unknown ops to new internal op type custom_tf op. This to allow doing some graph optimizations on the custom ops. PiperOrigin-RevId: 322422151 Change-Id: I360593084cf82ee9c5c13bb7c1ae6c22718187e0 --- tensorflow/compiler/mlir/lite/BUILD | 1 + .../compiler/mlir/lite/flatbuffer_export.cc | 31 ++++--- tensorflow/compiler/mlir/lite/ir/tfl_ops.td | 28 ++++++- .../lite/tests/end2end/custom_opdef.pbtxt | 16 ++-- tensorflow/compiler/mlir/lite/tests/ops.mlir | 10 +++ .../mlir/lite/tests/raise-custom-ops.mlir | 20 +++++ .../compiler/mlir/lite/tf_tfl_passes.cc | 1 + .../compiler/mlir/lite/transforms/passes.h | 3 + .../mlir/lite/transforms/raise_custom_ops.cc | 80 +++++++++++++++++++ 9 files changed, 172 insertions(+), 18 deletions(-) create mode 100644 tensorflow/compiler/mlir/lite/tests/raise-custom-ops.mlir create mode 100644 tensorflow/compiler/mlir/lite/transforms/raise_custom_ops.cc diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD index 3b67ea3d846..8a60b292bc2 100644 --- a/tensorflow/compiler/mlir/lite/BUILD +++ b/tensorflow/compiler/mlir/lite/BUILD @@ -334,6 +334,7 @@ cc_library( "transforms/optimize_functional_ops.cc", "transforms/prepare_composite_functions_tf.cc", "transforms/prepare_tf.cc", + "transforms/raise_custom_ops.cc", "transforms/runtime_verify.cc", "transforms/split_merged_operands.cc", "transforms/trim_functions_tf.cc", diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc index 09c79d90e26..2e69a1740db 100644 --- a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc +++ b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc @@ -1196,22 +1196,35 @@ Optional> Translator::BuildSubGraph( if (IsConst(&inst)) continue; // Fetch operand and result tensor indices. - std::vector operands; - operands.reserve(inst.getNumOperands()); - for (auto operand : inst.getOperands()) { - if (operand.getType().isa()) - operands.push_back(kTfLiteOptionalTensor); - else - operands.push_back(tensor_index_map.lookup(operand)); - } std::vector results; results.reserve(inst.getNumOperands()); for (auto result : inst.getResults()) { results.push_back(tensor_index_map.lookup(result)); } + Operation* real_inst = &inst; + // CustomTfOp is just a wrapper around a TF op, we export the custom Op + // not the wrapper, so we fetch the op from the region. + if (auto custom_op = dyn_cast(inst)) { + // If we have custom op with a region, then use the first op in the + // region, if it exists, otherwise just use params for custom op. + if (!custom_op.body().empty()) { + real_inst = &custom_op.body().front().front(); + } else { + module_.emitError( + "Invalid CustomTfOp: Custom TF Op have empty region."); + } + } + std::vector operands; + operands.reserve(real_inst->getNumOperands()); + for (auto operand : real_inst->getOperands()) { + if (operand.getType().isa()) + operands.push_back(kTfLiteOptionalTensor); + else + operands.push_back(tensor_index_map.lookup(operand)); + } if (auto tfl_operator = - BuildOperator(&inst, operands, results, intermediates)) + BuildOperator(real_inst, operands, results, intermediates)) operators.push_back(*tfl_operator); else failed_once = true; diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td index 66124ba9982..715d047f0bf 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td @@ -4296,7 +4296,8 @@ def TFL_WhileOp : Op { +def TFL_CustomOp : Op { let summary = "Custom op"; let description = [{ @@ -4319,4 +4320,29 @@ def TFL_CustomOp : Op { let verifier = [{ return Verify(*this); }]; } +def TFL_CustomTfOp : Op]> { + let summary = "Wrapper Op for TF custom ops."; + + let description = [{ + A wrapper op around any Custom TF op. These includes ops defined using + custom_opdefs or linked which are not defined in TF dialect. + This Op just wraps the custom op inside a region. + Note #1, this Op will not include TF Lite custom ops defined using CustomOp. + Note #2, this op is just internal representation inside the converter and + are not exposed/exported when the model is exported to Flatbuffer. + }]; + + let arguments = (ins + Variadic>:$input + ); + let results = (outs Variadic:$output); + + let regions = (region SizedRegion<1>:$body); +} + #endif // TFL_OPS diff --git a/tensorflow/compiler/mlir/lite/tests/end2end/custom_opdef.pbtxt b/tensorflow/compiler/mlir/lite/tests/end2end/custom_opdef.pbtxt index 345468e609e..481be9d4deb 100644 --- a/tensorflow/compiler/mlir/lite/tests/end2end/custom_opdef.pbtxt +++ b/tensorflow/compiler/mlir/lite/tests/end2end/custom_opdef.pbtxt @@ -36,11 +36,11 @@ versions { producer: 27 } -# CHECK-LABEL: func @main -# CHECK-SAME: (%[[ARG_0:[a-z0-9]+]]: tensor<4xi32>, %[[ARG_1:[a-z0-9]+]]: tensor<4xi32>) -> tensor<*xi32> -# CHECK-SAME: control_outputs = "" -# CHECK-SAME: inputs = "input0,input1" -# CHECK-SAME: outputs = "output" -# CHECK-NEXT: %[[OP:[a-z0-9]+]] = "tf.BannaPotatoSaladWithColeslaw"(%[[ARG_0]], %[[ARG_1]]) {T = i32, device = ""} : (tensor<4xi32>, tensor<4xi32>) -> tensor<*xi32> -# CHECK-NEXT: return %[[OP]] : tensor<*xi32> -# CHECK-NEXT: } +# CHECK-LABEL: func @main(%arg0: tensor<4xi32>, %arg1: tensor<4xi32>) -> tensor<*xi32> +# CHECK: attributes {tf.entry_function = {control_outputs = "", inputs = "input0,input1", outputs = "output"}} { +# CHECK-NEXT: %[[CUSTOM:.*]] = "tfl.custom_tf"(%arg0, %arg1) ( { +# CHECK-NEXT: %[[OUTPUTS:.*]] = "tf.BannaPotatoSaladWithColeslaw"(%arg0, %arg1) {T = i32, device = ""} : (tensor<4xi32>, tensor<4xi32>) -> tensor<*xi32> +# CHECK-NEXT: "tfl.yield"(%[[OUTPUTS]]) : (tensor<*xi32>) -> () +# CHECK-NEXT: }) : (tensor<4xi32>, tensor<4xi32>) -> tensor<*xi32> +# CHECK-NEXT: return %[[CUSTOM]] : tensor<*xi32> +# CHECK-NEXT: } diff --git a/tensorflow/compiler/mlir/lite/tests/ops.mlir b/tensorflow/compiler/mlir/lite/tests/ops.mlir index 5f434e954c8..06e05987ee6 100644 --- a/tensorflow/compiler/mlir/lite/tests/ops.mlir +++ b/tensorflow/compiler/mlir/lite/tests/ops.mlir @@ -598,6 +598,16 @@ func @testMaxPool2DWrongOperandStorageType(tensor<1x7x7x16x!quant.uniform, %arg1: tensor<1x64x64x32xf32>, %arg2: tensor<1x64x64x32xf32>) -> (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) { + %0, %1, %2, %3 = "tfl.custom_tf"(%arg0, %arg1, %arg2) ({ + %4, %5, %6, %7 = "tf.TFLite_Detection_PostProcess"(%arg0, %arg1, %arg2) {_output_quantized = true, _output_types = [f32, f32, f32, f32], _support_output_type_float_in_quantized_op = true, detections_per_class = 100 : i64, device = "", h_scale = 5.000000e+00 : f32, max_classes_per_detection = 1 : i64, max_detections = 20 : i64, nms_iou_threshold = 6.000000e-01 : f32, nms_score_threshold = 3.000000e-01 : f32, num_classes = 90 : i64, use_regular_nms = false, w_scale = 5.000000e+00 : f32, x_scale = 1.000000e+01 : f32, y_scale = 1.000000e+01 : f32} : (tensor<1x64x64x32xf32>, tensor<1x64x64x32xf32>, tensor<1x64x64x32xf32>) -> (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) + "tfl.yield"(%4, %5, %6, %7) : (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) -> () + }) : (tensor<1x64x64x32xf32>, tensor<1x64x64x32xf32>, tensor<1x64x64x32xf32>) -> (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) + return %0, %1 : tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32> +} + +// ----- + func @testMaxPoolingWithArgMax2D(%arg0: tensor<1x64x64x32xf32>) -> (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) { // custom op for "tfl.max_pooling_with_argmax_2d"(%arg0) {filter_h = 2 : i32, filter_w = 2 : i32, padding = "SAME", stride_h = 2 : i32, stride_w = 2 : i32} : (tensor<1x64x64x32xf32>) -> (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) %0, %1 = "tfl.custom"(%arg0) {custom_option = opaque<"tfl", "0x01000000020000000200000002000000020000000000000000000000000000000000000000000000"> : tensor<40xi8>, custom_code = "MaxPoolingWithArgmax2D"} : (tensor<1x64x64x32xf32>) -> (tensor<1x32x32x32xf32>, tensor<1x32x32x32xf32>) diff --git a/tensorflow/compiler/mlir/lite/tests/raise-custom-ops.mlir b/tensorflow/compiler/mlir/lite/tests/raise-custom-ops.mlir new file mode 100644 index 00000000000..1bac8019a30 --- /dev/null +++ b/tensorflow/compiler/mlir/lite/tests/raise-custom-ops.mlir @@ -0,0 +1,20 @@ +// RUN: tf-opt -tfl-raise-custom-ops -canonicalize %s -o - | FileCheck %s + +// CHECK-LABEL: custom_op +func @custom_op(%arg0: tensor<4xf32>) -> tensor<4xf32> { + %0 = "tfl.pseudo_const" () {value = dense<1.0> : tensor<4xf32>} : () -> tensor<4xf32> + %1 = "tfl.mul"(%arg0, %0) {fused_activation_function = "NONE"} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> + // will be preserved since it has uses. + %2 = "tf.MyCustomOp"(%1, %0) {fused_activation_function = "RELU", int_attr = 2 : i32} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> + // will be removed since it doesn't have uses and doesn't have side effect. + "tf.MyCustomOp"(%1, %0) {fused_activation_function = "RELU", int_attr = 2 : i32} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> + return %2 : tensor<4xf32> + +// CHECK-NEXT: %[[CST:.*]] = constant dense<1.000000e+00> +// CHECK-NEXT: %[[MUL:.*]] = tfl.mul %arg0, %[[CST]] {fused_activation_function = "NONE"} : tensor<4xf32> +// CHECK-NEXT: %[[CUSTOM:.*]] = "tfl.custom_tf"(%[[MUL]], %[[CST]]) ( { +// CHECK-NEXT: %[[MY_CUSTOM:.*]] = "tf.MyCustomOp"(%[[MUL]], %[[CST]]) {fused_activation_function = "RELU", int_attr = 2 : i32} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> +// CHECK-NEXT: "tfl.yield"(%[[MY_CUSTOM]]) : (tensor<4xf32>) -> () +// CHECK-NEXT: }) : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> +// CHECK-NEXT: return %[[CUSTOM]] : tensor<4xf32> +} diff --git a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc index fc44e778b92..239d52530ec 100644 --- a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc +++ b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc @@ -187,6 +187,7 @@ void AddTFToTFLConversionPasses(const mlir::TFL::PassConfig& pass_config, // so that it can target constants introduced once TensorFlow Identity ops // are removed during legalization. pass_manager->addPass(mlir::TFL::CreateOptimizeFunctionalOpsPass()); + pass_manager->addPass(mlir::TFL::CreateRaiseCustomOpsPass()); pass_manager->addPass(mlir::createSymbolDCEPass()); pass_manager->addNestedPass(mlir::createCanonicalizerPass()); pass_manager->addNestedPass(mlir::createCSEPass()); diff --git a/tensorflow/compiler/mlir/lite/transforms/passes.h b/tensorflow/compiler/mlir/lite/transforms/passes.h index af97931b2a3..804a391231a 100644 --- a/tensorflow/compiler/mlir/lite/transforms/passes.h +++ b/tensorflow/compiler/mlir/lite/transforms/passes.h @@ -91,6 +91,9 @@ std::unique_ptr> CreateWhileOutlinePass(); // Verifies runtime constraints. std::unique_ptr> CreateRuntimeVerifyPass(); +// Creates raise custom ops pass, which legalize custom ops to TFL::CustomOp +std::unique_ptr> CreateRaiseCustomOpsPass(); + } // namespace TFL } // namespace mlir diff --git a/tensorflow/compiler/mlir/lite/transforms/raise_custom_ops.cc b/tensorflow/compiler/mlir/lite/transforms/raise_custom_ops.cc new file mode 100644 index 00000000000..40cca526951 --- /dev/null +++ b/tensorflow/compiler/mlir/lite/transforms/raise_custom_ops.cc @@ -0,0 +1,80 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h" +#include "tensorflow/compiler/mlir/lite/transforms/passes.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" + +namespace mlir { +namespace TFL { +namespace { +// This transformation pass takes an operation with unknown op properties and +// wrap it by a TFL::CustomTfOp. +struct RaiseCustomOpsPass + : public PassWrapper { + void runOnFunction() override; +}; + +void RaiseCustomOpsPass::runOnFunction() { + auto fn = getFunction(); + OpBuilder builder(fn.getContext()); + + llvm::SmallVector custom_ops; + for (Operation &op : fn.getOps()) { + // Skips the ops with known op property. + if (op.getAbstractOperation()) continue; + // Skips already imported ops that are imported as CustomTfOp. + if (op.getParentOfType()) continue; + if (llvm::isa(op) || llvm::isa(op)) + continue; + custom_ops.push_back(&op); + } + + for (auto *op : custom_ops) { + builder.setInsertionPoint(op); + auto custom_op = builder.create( + op->getLoc(), op->getResultTypes(), op->getOperands()); + Region region; + region.push_back(new Block); + + builder.setInsertionPointToEnd(®ion.front()); + Operation *inner_op = builder.clone(*op); + builder.create(op->getLoc(), inner_op->getResults()); + custom_op.body().takeBody(region); + + op->replaceAllUsesWith(custom_op); + op->erase(); + } +} +} // namespace + +// Creates an instance of the TensorFlow Lite dialect raise custom op pass. +std::unique_ptr> CreateRaiseCustomOpsPass() { + return std::make_unique(); +} + +static PassRegistration pass( + "tfl-raise-custom-ops", "Raise custom ops into tflite dialect."); + +} // namespace TFL +} // namespace mlir From e6e0d403a05daee05fced3cf381b3e9b657f3c81 Mon Sep 17 00:00:00 2001 From: Ken Franko Date: Tue, 21 Jul 2020 13:05:02 -0700 Subject: [PATCH 0957/2522] Add SavedModel.LoadOptions to hub.KerasLayer API to pass to load_v2. PiperOrigin-RevId: 322425049 Change-Id: I4fd626c4a7e470bcb647a3d76d1ece16fcb63b28 --- tensorflow/python/keras/saving/save.py | 17 +++--- tensorflow/python/saved_model/BUILD | 10 ++++ tensorflow/python/saved_model/load_context.py | 56 +++++++++++++++++++ 3 files changed, 76 insertions(+), 7 deletions(-) create mode 100644 tensorflow/python/saved_model/load_context.py diff --git a/tensorflow/python/keras/saving/save.py b/tensorflow/python/keras/saving/save.py index 9c83914d380..c0c69c4e715 100644 --- a/tensorflow/python/keras/saving/save.py +++ b/tensorflow/python/keras/saving/save.py @@ -27,6 +27,7 @@ from tensorflow.python.keras.saving.saved_model import load as saved_model_load from tensorflow.python.keras.saving.saved_model import save as saved_model_save from tensorflow.python.keras.utils import generic_utils from tensorflow.python.keras.utils.io_utils import path_to_string +from tensorflow.python.saved_model import load_context from tensorflow.python.saved_model import loader_impl from tensorflow.python.util.tf_export import keras_export @@ -177,14 +178,16 @@ def load_model(filepath, custom_objects=None, compile=True, options=None): # py IOError: In case of an invalid savefile. """ with generic_utils.CustomObjectScope(custom_objects or {}): - if (h5py is not None and ( - isinstance(filepath, h5py.File) or h5py.is_hdf5(filepath))): - return hdf5_format.load_model_from_hdf5(filepath, custom_objects, compile) + with load_context.load_context(options): + if (h5py is not None and + (isinstance(filepath, h5py.File) or h5py.is_hdf5(filepath))): + return hdf5_format.load_model_from_hdf5(filepath, custom_objects, + compile) - filepath = path_to_string(filepath) - if isinstance(filepath, six.string_types): - loader_impl.parse_saved_model(filepath) - return saved_model_load.load(filepath, compile, options) + filepath = path_to_string(filepath) + if isinstance(filepath, six.string_types): + loader_impl.parse_saved_model(filepath) + return saved_model_load.load(filepath, compile, options) raise IOError( 'Unable to load model. Filepath is not an hdf5 file (or h5py is not ' diff --git a/tensorflow/python/saved_model/BUILD b/tensorflow/python/saved_model/BUILD index 858fa10a1eb..5e96ea596bf 100644 --- a/tensorflow/python/saved_model/BUILD +++ b/tensorflow/python/saved_model/BUILD @@ -380,6 +380,15 @@ tf_py_test( ) py_strict_library( + name = "load_context", + srcs = [ + "load_context.py", + ], + srcs_version = "PY2AND3", + deps = [], +) + +py_library( name = "load", srcs = [ "load.py", @@ -387,6 +396,7 @@ py_strict_library( srcs_version = "PY2AND3", deps = [ ":function_deserialization", + ":load_context", ":load_options", ":load_v1_in_v2", ":loader", diff --git a/tensorflow/python/saved_model/load_context.py b/tensorflow/python/saved_model/load_context.py new file mode 100644 index 00000000000..e5988ff2833 --- /dev/null +++ b/tensorflow/python/saved_model/load_context.py @@ -0,0 +1,56 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Context for storing options for loading a SavedModel.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import contextlib +import threading + + +class LoadContext(threading.local): + """A context for loading a model.""" + + def __init__(self): + super(LoadContext, self).__init__() + self._load_options = None + + def set_load_options(self, load_options): + self._load_options = load_options + + def clear_load_options(self): + self._load_options = None + + def load_options(self): + return self._load_options + + +_load_context = LoadContext() + + +@contextlib.contextmanager +def load_context(load_options): + _load_context.set_load_options(load_options) + try: + yield + finally: + _load_context.clear_load_options() + + +def get_load_options(): + """Returns whether under a load context.""" + return _load_context.load_options() From 7b6e259f0c4bd47edc37401b4524e73d43a5da7b Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Tue, 21 Jul 2020 13:08:13 -0700 Subject: [PATCH 0958/2522] Remove unused cache_recursive_attribute PiperOrigin-RevId: 322425784 Change-Id: Ib9b13395c21135969d3b74237fd6f3385891861f --- .../python/training/tracking/layer_utils.py | 93 ------------------- 1 file changed, 93 deletions(-) diff --git a/tensorflow/python/training/tracking/layer_utils.py b/tensorflow/python/training/tracking/layer_utils.py index ab0480c2228..c63abebd219 100644 --- a/tensorflow/python/training/tracking/layer_utils.py +++ b/tensorflow/python/training/tracking/layer_utils.py @@ -49,99 +49,6 @@ def has_weights(obj): return has_weight and not isinstance(obj, type) -def cache_recursive_attribute(key): - """Decorator to cache Layer properties which recursively depend on sub-layers. - - A number of attributes in Keras Layers take the form: - - ``` - @property - def thing(self): - return self._thing or any(layer.thing for layer in self.layers) - ``` - - This means that checking these properties (e.g. dynamic, stateful, etc) must - traverse the entire graph of layers to determine whether any descent has - changed its state. This decorator adds a mechanism for Layers and trackable - data structures to broadcast mutations (including the addition or deletion - of layers) and allows the top level layer to safely cache results. In general, - if computing an attribute triggers a depth first search it is a good candidate - for this caching mechanism. - - The architecture is optimized for safety and correctness rather than absolute - optimality. This manifests in two ways: - 1) Parents are never removed. It is possible for layer A to depend on layer - B but subsequently remove that dependency. In that case, layer B will - continue to broadcast its mutations to layer A until either A or B is - deleted. However because the only effect is to invalidate a cache this - does not affect correctness. (And robustly removing dependencies is - difficult and error prone.) - - 2) Layers aggressively invalidate their caches when there is any ambiguity - of whether or not it is necessary. For instance, consider the following: - ``` - class MyLayer(tf.keras.layers.Layer): - def __init__(self): - super(MyLayer, self).__init__() - - sub_layer = tf.keras.layers.Dense(1) - self.sub_layers = [ - sub_layer # This will be picked up, converted to a ListWrapper, - # and added to self._layers - ] - - # Include the layer twice. - self.sub_layers.append(sub_layer) - - # Remove one copy, but one copy remains. - self.sub_layers.pop() - ``` - In the example layer above, the set of tracked layers actually doesn't - change; however to know that in the general case the Layer needs - significant machinery to reason about what, if anything, has changed. - By invalidating on every mutation we don't need to concern ourselves - with the many types of mutations (append, pop, in-place replacement) - and their specific semantics. - - Because mutations to layers are expected to be infrequent, this very - conservative approach captures the vast majority of the performance gains from - caching recursive properties while still remaining quite lightweight and easy - to reason about. - - `tracking.cached_per_instance` provides a more detailed performance analysis - of the WeakKeyDictionary cache pattern. - - Args: - key: A string indicating which field is being cached. While not strictly - necessary (since it could be obtained from f.__name__), it forces - deliberate behavior when caching an attribute. - - Returns: - A caching decorater specialized to `key`. - """ - cache = weakref.WeakKeyDictionary() - def outer(f): - """Attribute cache which has been specialized.""" - - @functools.wraps(f) - def wrapped(self): - """Cache aware version of `f`.""" - - # Sentinels are unique per Layer/Trackable, but can be hashed. (Unlike - # some trackable data structures.) Consequently it makes sense to use the - # sentinel as a cache key rather than `self`. - sentinel = getattr(self, "_attribute_sentinel") # type: AttributeSentinel - - if not sentinel.get(key) or sentinel not in cache: - cache[sentinel] = f(self) - sentinel.mark_cached(key) - output = cache[sentinel] - return output - - return wrapped - return outer - - def invalidate_recursive_cache(key): """Convenience decorator to invalidate the cache when setting attributes.""" def outer(f): From a7a1fba0907e78672cecbdf024dd21cf74e2f187 Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Tue, 21 Jul 2020 13:18:37 -0700 Subject: [PATCH 0959/2522] Add a python binding to abort collective executor This will be used by a later change to abort the collectives when a peer failure is detected. Currently only on-going RING collectives can be aborted. Collectives that are pending on param resolution and NCCL collectives support will be added in a later change. We use CollectiveExecutor::Abort instead of CancellationManager because 1) we would like to abort all collectives, including subsequent launches since they're likely going to hang as well. 2) we would like to abort with a specific error (Unavailable) so that the user can catch and handle it. 3) There's no easy way to inject a cancellation manager if the collectives are inside tf.function. This will be used after we detect a peer failures to avoid hanging. After abortion no collectives can be launched. The only way to reset now is to restart the program. Supporting reliable reset is not trivial, we may support it in the future but the current priority is to be able to report peer failures instead of hang. PiperOrigin-RevId: 322428082 Change-Id: If2e630437fa9367520f858cfb0b06cf7c134b5cf --- tensorflow/c/c_api_experimental.cc | 9 +++ tensorflow/c/c_api_experimental.h | 8 +++ tensorflow/python/eager/context.py | 15 +++++ tensorflow/python/ops/collective_ops_test.py | 58 ++++++++++++++++++++ tensorflow/python/tfe_wrapper.cc | 7 +++ 5 files changed, 97 insertions(+) diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc index 531dcd367de..b4297033b6d 100644 --- a/tensorflow/c/c_api_experimental.cc +++ b/tensorflow/c/c_api_experimental.cc @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/eager/context.h" #include "tensorflow/core/common_runtime/eager/eager_operation.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h" +#include "tensorflow/core/framework/collective.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/shape_inference.h" #include "tensorflow/core/framework/tensor.pb.h" @@ -551,6 +552,14 @@ TF_CAPI_EXPORT extern void TFE_EnableCollectiveOps(TFE_Context* ctx, status->status = EnableCollectiveOps(server_def, ctx); } +TF_CAPI_EXPORT extern void TFE_AbortCollectiveOps(TFE_Context* ctx, + TF_Status* status) { + tensorflow::EagerContext* context = + tensorflow::ContextFromInterface(tensorflow::unwrap(ctx)); + auto collective_executor_handle = context->GetCollectiveExecutorHandle(); + collective_executor_handle->get()->StartAbort(status->status); +} + TF_ShapeAndTypeList* TF_NewShapeAndTypeList(int num_items) { TF_ShapeAndTypeList* result = new TF_ShapeAndTypeList; result->num_items = num_items; diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h index d0ffbf125fb..ebd14b4b571 100644 --- a/tensorflow/c/c_api_experimental.h +++ b/tensorflow/c/c_api_experimental.h @@ -230,6 +230,14 @@ TF_CAPI_EXPORT extern void TFE_EnableCollectiveOps(TFE_Context* ctx, size_t proto_len, TF_Status* status); +// Aborts all ongoing collectives with the specified status. After abortion, +// subsequent collectives will error with this status immediately. +// +// This is intended to be used when a peer failure is detected. There's yet no +// way to reset the collectives other than restarting the program. +TF_CAPI_EXPORT extern void TFE_AbortCollectiveOps(TFE_Context* ctx, + TF_Status* status); + // Information about the shape of a Tensor and its type. struct TF_ShapeAndType { // Number of dimensions. -1 indicates unknown rank. diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index a83b0ee1f77..4920df18e86 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -756,6 +756,21 @@ class Context(object): self._collective_use_nccl_communication = use_nccl_communication self._collective_device_filters = device_filters + def abort_collective_ops(self, code, message): + """Abort the collective ops. + + This is intended to be used when a peer failure is detected, which allows + the user to handle the case instead of hanging. This aborts all on-going + collectives. After all subsequent collectives error immediately. The only + way to recovery now is to restart the program. + + Args: + code: a `tf.errors` error code. + message: a string. The error message. + """ + self.ensure_initialized() + pywrap_tfe.TFE_AbortCollectiveOps(self._handle, code, message) + @property def _handle(self): if self._context_handle is None: diff --git a/tensorflow/python/ops/collective_ops_test.py b/tensorflow/python/ops/collective_ops_test.py index 6e238c40de8..da553a0012c 100644 --- a/tensorflow/python/ops/collective_ops_test.py +++ b/tensorflow/python/ops/collective_ops_test.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import threading import time from tensorflow.core.protobuf import config_pb2 @@ -530,6 +531,7 @@ class CollectiveOpTest(test.TestCase): @test_util.run_v2_only def testCollectiveGroupSizeMismatch(self): + context._reset_context() cpus = config.list_physical_devices('CPU') self.assertEqual(len(cpus), 1) config.set_logical_device_configuration(cpus[0], [ @@ -688,6 +690,62 @@ class CollectiveOpTest(test.TestCase): run_and_assert(group_size=2, group_key=1) run_and_assert(group_size=3, group_key=2) + @test_util.run_v2_only + def testAbortRing(self): + cpus = config.list_physical_devices('CPU') + config.set_logical_device_configuration(cpus[0], [ + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration() + ]) + group_size = 2 + group_key = 100 + instance_key = 100 + in_tensor = constant_op.constant(1.) + + # First perform a normal collective to finish resolution. + def collective_fn(): + for device in ['CPU:0', 'CPU:1']: + with ops.device(device): + collective_ops.all_reduce( + in_tensor, + group_size, + group_key, + instance_key, + 'Add', + 'Id', + communication_hint='ring') + + def_function.function(collective_fn)() + + # Launch a collective that hangs, and abort the collective executor after + # the launch. + def abort_fn(): + time.sleep(2) + context.context().abort_collective_ops(errors.UNAVAILABLE, 'peer down') + + t = threading.Thread(target=abort_fn) + t.start() + + with self.assertRaisesRegex(errors.UnavailableError, 'peer down'): + collective_ops.all_reduce(in_tensor, group_size, group_key, instance_key, + 'Add', 'Id') + + # After abortion, subsequent collectives should fail immediately. + with self.assertRaisesRegex(errors.UnavailableError, 'peer down'): + collective_ops.all_reduce(in_tensor, group_size, group_key, instance_key, + 'Add', 'Id') + + # Reset the context in order to reset the collective executor. + t.join() + context._reset_context() # pylint: disable=protected-access + # After reset non-NCCL collectives should work. + cpus = config.list_physical_devices('CPU') + config.set_logical_device_configuration(cpus[0], [ + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration() + ]) + def_function.function(collective_fn)() + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/tfe_wrapper.cc b/tensorflow/python/tfe_wrapper.cc index bf11faaf89d..e654ef3070b 100644 --- a/tensorflow/python/tfe_wrapper.cc +++ b/tensorflow/python/tfe_wrapper.cc @@ -828,6 +828,13 @@ PYBIND11_MODULE(_pywrap_tfe, m) { buf.get()->length, status.get()); tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); }); + m.def("TFE_AbortCollectiveOps", [](const py::handle& ctx, int code, + const char* message) { + tensorflow::Safe_TF_StatusPtr status = + tensorflow::make_safe(TF_NewStatus()); + TF_SetStatus(status.get(), static_cast(code), message); + TFE_AbortCollectiveOps(tensorflow::InputTFE_Context(ctx), status.get()); + }); m.def("TF_ListPhysicalDevices", &tensorflow::TF_ListPhysicalDevices); m.def("TF_GetDeviceDetails", &tensorflow::TF_GetDeviceDetails); m.def("TF_DeleteDeviceList", &TF_DeleteDeviceList, From 3496a6f69073bd902a17026bdd616110c62b0961 Mon Sep 17 00:00:00 2001 From: Robert David Date: Tue, 21 Jul 2020 13:48:36 -0700 Subject: [PATCH 0960/2522] Cleanup LSTM tests: Remove unused class members. PiperOrigin-RevId: 322433842 Change-Id: I79afc136c1c6c215204a8f465f11d13fd88d23df --- tensorflow/lite/kernels/lstm_test.cc | 47 +++++++++------------------- 1 file changed, 14 insertions(+), 33 deletions(-) diff --git a/tensorflow/lite/kernels/lstm_test.cc b/tensorflow/lite/kernels/lstm_test.cc index 24682de46ad..ff023459463 100644 --- a/tensorflow/lite/kernels/lstm_test.cc +++ b/tensorflow/lite/kernels/lstm_test.cc @@ -41,11 +41,7 @@ class LSTMOpModel : public SingleOpModel { bool use_projection_bias, float cell_clip, float proj_clip, const TensorType weight_type, bool model_has_legacy_20_inputs, bool is_layer_norm, bool asymmetric_quantize_inputs) - : n_batch_(n_batch), - n_input_(n_input), - n_cell_(n_cell), - n_output_(n_output), - weight_type_(weight_type) { + : n_input_(n_input), n_output_(n_output), weight_type_(weight_type) { input_ = AddInput({TensorType_FLOAT32, {n_batch, n_input}}); if (use_cifg) { @@ -103,8 +99,8 @@ class LSTMOpModel : public SingleOpModel { } // Adding the 2 state tensors. - output_state_ = AddInput({TensorType_FLOAT32, {n_batch, n_output}}, true); - cell_state_ = AddInput({TensorType_FLOAT32, {n_batch, n_cell}}, true); + AddInput({TensorType_FLOAT32, {n_batch, n_output}}, true); + AddInput({TensorType_FLOAT32, {n_batch, n_cell}}, true); // Layer norm weights. if (!model_has_legacy_20_inputs) { @@ -231,8 +227,6 @@ class LSTMOpModel : public SingleOpModel { int num_inputs() { return n_input_; } int num_outputs() { return n_output_; } - int num_cells() { return n_cell_; } - int num_batches() { return n_batch_; } protected: int input_; @@ -262,14 +256,10 @@ class LSTMOpModel : public SingleOpModel { int projection_weights_; int projection_bias_; - int output_state_; - int cell_state_; int output_; - int n_batch_; int n_input_; - int n_cell_; int n_output_; private: @@ -1676,10 +1666,7 @@ class LSTMIntegerOpModel : public SingleOpModel { bool use_8x8_8_implementation, const std::vector>& ranges, const std::vector>& intermediates) - : n_batch_(n_batch), - n_input_(n_input), - n_cell_(n_cell), - n_output_(n_output) { + : n_input_(n_input), n_output_(n_output) { input_ = AddInput({TensorType_INT8, {n_batch, n_input}, ranges[0].first, @@ -1776,16 +1763,16 @@ class LSTMIntegerOpModel : public SingleOpModel { } // Adding the 2 state tensors. - output_state_ = AddInput({TensorType_INT16, - {n_batch, n_output}, - ranges[18].first, - ranges[18].second}, - true); - cell_state_ = AddInput({TensorType_INT16, - {n_batch, n_cell}, - ranges[19].first, - ranges[19].second}, - true); + AddInput({TensorType_INT16, + {n_batch, n_output}, + ranges[18].first, + ranges[18].second}, + true); + AddInput({TensorType_INT16, + {n_batch, n_cell}, + ranges[19].first, + ranges[19].second}, + true); // Layer norm weights. if (use_layer_norm) { @@ -1918,8 +1905,6 @@ class LSTMIntegerOpModel : public SingleOpModel { int num_inputs() { return n_input_; } int num_outputs() { return n_output_; } - int num_cells() { return n_cell_; } - int num_batches() { return n_batch_; } protected: int input_; @@ -1951,12 +1936,8 @@ class LSTMIntegerOpModel : public SingleOpModel { int projection_bias_; int output_; - int output_state_; - int cell_state_; - int n_batch_; int n_input_; - int n_cell_; int n_output_; }; From 0f65643d1bf219620fa8021e32074d8ba6777383 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Tue, 21 Jul 2020 13:50:16 -0700 Subject: [PATCH 0961/2522] Flesh out the KerasTensor docstrings in preparation for enabling KerasTensors by default. This change also adds a `__str__` in addition to the `__repr__`, and updates the error messages raised when passing KerasTensors to APIs that don't have dispatch registered. PiperOrigin-RevId: 322434205 Change-Id: Iecfe135e1463eecdb408cf436ee3a68edcd8cc4b --- .../python/keras/engine/keras_tensor.py | 204 +++++++++++++----- .../python/keras/engine/keras_tensor_test.py | 40 +++- 2 files changed, 175 insertions(+), 69 deletions(-) diff --git a/tensorflow/python/keras/engine/keras_tensor.py b/tensorflow/python/keras/engine/keras_tensor.py index 6bdd11d8ec3..cd141809cfc 100644 --- a/tensorflow/python/keras/engine/keras_tensor.py +++ b/tensorflow/python/keras/engine/keras_tensor.py @@ -28,6 +28,8 @@ from tensorflow.python.ops import array_ops from tensorflow.python.util import nest from tensorflow.python.util import object_identity +# pylint: disable=g-classes-have-attributes + _KERAS_TENSORS_ENABLED = False @@ -53,64 +55,98 @@ class KerasTensor(object): `KerasTensor`s are tensor-like objects that represent the symbolic inputs and outputs of Keras layers during Functional model construction. They are - compromised of the `tf.TypeSpec` of the Tensor that will be - consumed/produced in the corresponding position of the model. + comprised of the `tf.TypeSpec` of the (Composite)Tensor that will be + consumed/produced in the corresponding location of the Functional model. - They implement `tf.Tensor`'s attributes and methods, and also overload - the same operators as `tf.Tensor`. Passing a KerasTensor to a TF API that - supports dispatching will automatically turn that API call into a lambda - layer in the Functional model. + KerasTensors are intended as a private API, so users should never need to + directly instantiate `KerasTensor`s. - `KerasTensor`s are still internal-only and are a work in progress, but they - have several advantages over using a graph `tf.Tensor` to represent - symbolic values in functional models. - - Unlike symbolic tensors, they do not need to refer to a graph. This means - Keras does not need to maintain a never-deleted global background graph - containing all layers ever called during functional model construction when - constructing Functional Models with KerasTensors. These memory savings - can be significant. + **Building Functional Models with KerasTensors** + `tf.keras.Input` produces `KerasTensor`s that represent the symbolic inputs + to your model. - - Triggering Keras functional model construction is simpler - when it just has to check whether something is a KerasTensor, rather - than trying to infer if a tensor was meant to be a symbolic keras - representation or just a value produced during function tracing. + Passing a `KerasTensor` to a `tf.keras.Layer` `__call__` lets the layer know + that you are building a Functional model. The layer __call__ will + infer the output signature and return `KerasTensor`s with `tf.TypeSpec`s + corresponding to the symbolic outputs of that layer call. These output + `KerasTensor`s will have all of the internal KerasHistory metadata attached + to them that Keras needs to construct a Functional Model. - - Autolambda layers (converting tf ops on symbolic Keras tensors to lambda - Keras layers in the model) use TF's internal dispatching mechanism, instead - of trying to manually walk a graph and extract nodes from it. - The dispatching mechanism is simpler, works more reliably, and is less - likely to run into issues with composite tensors or strange tf ops/nodes. + Currently, layers infer the output signature by: + * creating a scratch `FuncGraph` + * making placeholders in the scratch graph that match the input typespecs + * Calling `layer.call` on these placeholders + * extracting the signatures of the outputs before clearing the scratch graph - (And when it fails, it's by design: because dispatch is explicitly not - supported on the op & it's more obvious that dispatch doesn't support the - setting). + (Note: names assigned to KerasTensors by this process are not guaranteed to + be unique, and are subject to implementation details). - - Because they support arbitrary typespecs, models/layers that use - KerasTensors are generally more friendly to composite tensors of different - types than using symbolic graph tensors (which must have a TensorSpec and - can't have arbitrary typespecs) + `tf.nest` methods are used to insure all of the inputs/output data + structures get maintained, with elements swapped between KerasTensors and + placeholders. - To experiment with using KerasTensors instead of symbolic graph `tf.Tensors`, - import keras_tensor directly and call `keras_tensor.enable_keras_tensors()` + In rare cases (such as when directly manipulating shapes using Keras layers), + the layer may be able to partially infer the value of of the output in + addition to just inferring the signature. + When this happens, the returned KerasTensor will also contain the inferred + value information. Follow-on layers can use this information. + during their own output signature inference. + E.g. if one layer produces a symbolic `KerasTensor` that the next layer uses + as the shape of its outputs, partially knowing the value helps infer the + output shape. + + **Automatically converting TF APIs to layers**: + If you passing a `KerasTensor` to a TF API that supports dispatching, + Keras will automatically turn that API call into a lambda + layer in the Functional model, and return KerasTensors representing the + symbolic outputs. + + Most TF APIs that take only tensors as input and produce output tensors + will support dispatching. + + Calling a `tf.function` does not support dispatching, so you cannot pass + `KerasTensor`s as inputs to a `tf.function`. + + Higher-order apis that take methods which produce tensors (e.g. `tf.while`, + `tf.map_fn`, `tf.cond`) also do not currently support dispatching. So, you + cannot directly pass KerasTensors as inputs to these APIs either. If you + want to use these APIs inside of a Functional model, you must put them inside + of a custom layer. + + Args: + type_spec: The `tf.TypeSpec` for the symbolic input created by + `tf.keras.Input`, or symbolically inferred for the output + during a symbolic layer `__call__`. + inferred_value: (Optional) a non-symbolic static value, possibly partially + specified, that could be symbolically inferred for the outputs during + a symbolic layer `__call__`. This will generally only happen when + grabbing and manipulating `tf.int32` shapes directly as tensors. + Statically inferring values in this way and storing them in the + KerasTensor allows follow-on layers to infer output signatures + more effectively. (e.g. when using a symbolic shape tensor to later + construct a tensor with that shape). + name: (optional) string name for this KerasTensor. Names automatically + generated by symbolic layer `__call__`s are not guaranteed to be unique, + and are subject to implementation details. """ - def __init__(self, type_spec, inferred_shape_value=None, name=None): - """Construct a KerasTensor from a type_spec and an optional name.""" + def __init__(self, type_spec, inferred_value=None, name=None): + """Constructs a KerasTensor.""" if not isinstance(type_spec, type_spec_module.TypeSpec): raise ValueError('KerasTensors must be constructed with a `tf.TypeSpec`.') self._type_spec = type_spec - self._inferred_shape_value = inferred_shape_value + self._inferred_value = inferred_value self._name = name @property def type_spec(self): - """Returns the `TypeSpec` that represents this Tensor.""" + """Returns the `tf.TypeSpec` symbolically inferred for this Keras output.""" return self._type_spec @property def shape(self): - """Returns the `TensorShape` that represents the shape of the tensor.""" + """Returns the `TensorShape` symbolically inferred for this Keras output.""" # TODO(kaftan): This is only valid for normal/sparse/ragged tensors. # may need to raise an error when it's not valid for a type_spec, # but some keras code (e.g. build-related stuff) will likely fail when @@ -121,11 +157,39 @@ class KerasTensor(object): return self.shape def __len__(self): - raise TypeError('Keras Functional inputs/outputs do not ' + raise TypeError('Keras symbolic inputs/outputs do not ' 'implement `__len__`. You may be ' - 'seeing this error if you are passing them ' - 'to a TF API that Keras cannot automatically ' - 'convert to a lambda layer.') + 'trying to pass Keras symbolic inputs/outputs ' + 'to a TF API that does not register dispatching, ' + 'preventing Keras from automatically ' + 'converting the API call to a lambda layer ' + 'in the Functional Model. This error will also get raised ' + 'if you try asserting a symbolic input/output directly.') + + @property + def op(self): + raise TypeError('Keras symbolic inputs/outputs do not ' + 'implement `op`. You may be ' + 'trying to pass Keras symbolic inputs/outputs ' + 'to a TF API that does not register dispatching, ' + 'preventing Keras from automatically ' + 'converting the API call to a lambda layer ' + 'in the Functional Model.') + + def __hash__(self): + raise TypeError('Tensors are unhashable. (%s)' + 'Instead, use tensor.ref() as the key.' % self) + + def __array__(self): + raise TypeError( + 'Cannot convert a symbolic Keras input/output to a numpy array. ' + 'This error may indicate that you\'re trying to pass a symbolic value ' + 'to a NumPy call, which is not supported. Or, ' + 'you may be trying to pass Keras symbolic inputs/outputs ' + 'to a TF API that does not register dispatching, ' + 'preventing Keras from automatically ' + 'converting the API call to a lambda layer ' + 'in the Functional Model.') @property def is_tensor_like(self): @@ -143,12 +207,34 @@ class KerasTensor(object): shape = tensor_shape.TensorShape(dim_list) if not self.shape.is_compatible_with(shape): raise ValueError( - "Keras Intermediate Value's shape %s is not" + "Keras symbolic input/output's shape %s is not" "compatible with supplied shape %s" % (self.shape, shape)) else: self._type_spec._shape = shape # pylint: disable=protected-access + def __str__(self): + symbolic_description = '' + inferred_value_string = '' + name_string = '' + + if hasattr(self, '_keras_history'): + layer = self._keras_history.layer + node_index = self._keras_history.node_index + tensor_index = self._keras_history.tensor_index + symbolic_description = ( + ', description="Symbolic value %s from ' + 'symbolic call %s of layer \'%s\'"' % ( + tensor_index, node_index, layer.name)) + if self._inferred_value is not None: + inferred_value_string = ( + ', inferred_value=%s' % self._inferred_value) + if self.name is not None: + name_string = ', name=\'%s\'' % self._name + return 'KerasTensor(type_spec=%s%s%s%s)' % ( + self.type_spec, inferred_value_string, + name_string, symbolic_description) + def __repr__(self): symbolic_description = '' inferred_value_string = '' @@ -164,15 +250,15 @@ class KerasTensor(object): symbolic_description = ( ' (Symbolic value %s from symbolic call %s of layer \'%s\')' % ( tensor_index, node_index, layer.name)) - if self._inferred_shape_value is not None: + if self._inferred_value is not None: inferred_value_string = ( - ' inferred_value=\'%s\'' % self._inferred_shape_value) + ' inferred_value=%s' % self._inferred_value) return '' % ( type_spec_string, inferred_value_string, symbolic_description) @property def dtype(self): - """Returns the `dtype` of elements in the tensor.""" + """Returns the `dtype` symbolically inferred for this Keras output.""" # TODO(kaftan): This is only valid for normal/sparse/ragged tensors. # may need to raise an error when it's not valid for a type_spec, # but some keras code (e.g. build-related stuff) will likely fail when @@ -197,17 +283,17 @@ class KerasTensor(object): shape = [dim.value for dim in self.shape.dims] if shape is None: - raise TypeError('Cannot iterate over a KerasTensor with unknown shape.') + raise TypeError('Cannot iterate over a Tensor with unknown shape.') if not shape: raise TypeError('Cannot iterate over a scalar.') if shape[0] is None: raise TypeError( - 'Cannot iterate over a KerasTensor with unknown first dimension.') + 'Cannot iterate over a Tensor with unknown first dimension.') return _KerasTensorIterator(self, shape[0]) @property def name(self): - """Returns the (optionally provided) name of the described tensor.""" + """Returns the (non-unique, optional) name of this symbolic Keras value.""" return self._name @classmethod @@ -272,7 +358,7 @@ def keras_tensor_to_placeholder(x): if isinstance(x, KerasTensor): spec = x.type_spec - if x._inferred_shape_value is not None: # pylint: disable=protected-access + if x._inferred_value is not None: # pylint: disable=protected-access # If we suspect this KerasTensor might be representing a shape tensor, # and we were able to extract value information with TensorFlow's shape # handling when making the KerasTensor, we construct the placeholder by @@ -288,14 +374,14 @@ def keras_tensor_to_placeholder(x): # manipulated w/ floating point numbers then converted back # * cases where int32 tensors w/ rank > 2 are manipulated before being # used as a shape tensor - inferred_shape_value = array_ops.shape( + inferred_value = array_ops.shape( array_ops.placeholder( - shape=x._inferred_shape_value, dtype=dtypes.int32)) # pylint: disable=protected-access + shape=x._inferred_value, dtype=dtypes.int32)) # pylint: disable=protected-access if spec.shape.rank == 0: # `tf.shape` always returns a rank-1, we may need to turn it back to a # scalar. - inferred_shape_value = inferred_shape_value[0] - return inferred_shape_value # pylint: disable=protected-access + inferred_value = inferred_value[0] + return inferred_value # pylint: disable=protected-access if isinstance(spec, sparse_tensor.SparseTensorSpec): # nest.map_structure loses dense shape information for sparse tensors. @@ -342,7 +428,7 @@ class UserRegisteredSpec(type_spec_module.TypeSpec): def keras_tensor_from_tensor(x): """Convert a traced (composite)tensor to a representative KerasTensor.""" name = getattr(x, 'name', None) - inferred_shape_value = None + inferred_value = None # TODO(b/161487382): # Special-case user-registered symbolic objects (registered by the @@ -381,14 +467,14 @@ def keras_tensor_from_tensor(x): # manipulated w/ floating point numbers then converted back # * cases where int32 tensors w/ rank > 2 are manipulated before being # used as a shape tensor - inferred_shape_value = array_ops.ones(shape=x).shape - if inferred_shape_value.dims: - inferred_shape_value = inferred_shape_value.as_list() + inferred_value = array_ops.ones(shape=x).shape + if inferred_value.dims: + inferred_value = inferred_value.as_list() else: - inferred_shape_value = None + inferred_value = None out = KerasTensor(type_spec, - inferred_shape_value=inferred_shape_value, name=name) + inferred_value=inferred_value, name=name) if user_registered_symbolic: out._user_registered_symbolic_object = x # pylint: disable=protected-access diff --git a/tensorflow/python/keras/engine/keras_tensor_test.py b/tensorflow/python/keras/engine/keras_tensor_test.py index 63e117effec..374b89202a1 100644 --- a/tensorflow/python/keras/engine/keras_tensor_test.py +++ b/tensorflow/python/keras/engine/keras_tensor_test.py @@ -32,51 +32,71 @@ from tensorflow.python.platform import test class KerasTensorTest(test.TestCase): - def test_repr(self): + def test_repr_and_string(self): kt = keras_tensor.KerasTensor( type_spec=tensor_spec.TensorSpec(shape=(1, 2, 3), dtype=dtypes.float32)) + expected_str = ("KerasTensor(type_spec=TensorSpec(shape=(1, 2, 3), " + "dtype=tf.float32, name=None))") expected_repr = "" - self.assertEqual(expected_repr, str(kt)) + self.assertEqual(expected_str, str(kt)) self.assertEqual(expected_repr, repr(kt)) kt = keras_tensor.KerasTensor( type_spec=tensor_spec.TensorSpec(shape=(2,), dtype=dtypes.int32), - inferred_shape_value=[2, 3]) + inferred_value=[2, 3]) + expected_str = ("KerasTensor(type_spec=TensorSpec(shape=(2,), " + "dtype=tf.int32, name=None), inferred_value=[2, 3])") expected_repr = ( - "") - self.assertEqual(expected_repr, str(kt)) + "") + self.assertEqual(expected_str, str(kt)) self.assertEqual(expected_repr, repr(kt)) kt = keras_tensor.KerasTensor( type_spec=sparse_tensor.SparseTensorSpec( shape=(1, 2, 3), dtype=dtypes.float32)) + expected_str = ("KerasTensor(type_spec=SparseTensorSpec(" + "TensorShape([1, 2, 3]), tf.float32))") expected_repr = ( "") - self.assertEqual(expected_repr, str(kt)) + self.assertEqual(expected_str, str(kt)) self.assertEqual(expected_repr, repr(kt)) with testing_utils.use_keras_tensors_scope(True): inp = layers.Input(shape=(3, 5)) kt = layers.Dense(10)(inp) + expected_str = ( + "KerasTensor(type_spec=TensorSpec(shape=(None, 3, 10), " + "dtype=tf.float32, name=None), name='dense/BiasAdd:0', " + "description=\"Symbolic value 0 from symbolic call 0 " + "of layer 'dense'\")") expected_repr = ( "") - self.assertEqual(expected_repr, str(kt)) + self.assertEqual(expected_str, str(kt)) self.assertEqual(expected_repr, repr(kt)) kt = array_ops.reshape(kt, shape=(3, 5, 2)) + expected_str = ( + "KerasTensor(type_spec=TensorSpec(shape=(3, 5, 2), dtype=tf.float32, " + "name=None), name='tf.reshape/Reshape:0', description=\"Symbolic " + "value 0 from symbolic call 0 of layer 'tf.reshape'\")") expected_repr = ("") - self.assertEqual(expected_repr, str(kt)) + self.assertEqual(expected_str, str(kt)) self.assertEqual(expected_repr, repr(kt)) kts = array_ops.unstack(kt) for i in range(3): + expected_str = ( + "KerasTensor(type_spec=TensorSpec(shape=(5, 2), dtype=tf.float32, " + "name=None), name='tf.unstack/unstack:%s', description=\"Symbolic " + "value %s from symbolic call 0 of layer 'tf.unstack'\")" + ) % (i, i) expected_repr = ("" % i) - self.assertEqual(expected_repr, str(kts[i])) + "of layer 'tf.unstack')>") % i + self.assertEqual(expected_str, str(kts[i])) self.assertEqual(expected_repr, repr(kts[i])) if __name__ == "__main__": From 5d7c87bcafaae501945a902e3ea43a5de8b9dd88 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Tue, 21 Jul 2020 13:52:52 -0700 Subject: [PATCH 0962/2522] [tf.data] Fixing type mismatch, which could lead to loss of precision sanitizer errors. PiperOrigin-RevId: 322434758 Change-Id: I949e3a53b943dfa63d76fc9f99110a19b7112e94 --- tensorflow/core/framework/model.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc index bc72ca08034..7e70c0eab75 100644 --- a/tensorflow/core/framework/model.cc +++ b/tensorflow/core/framework/model.cc @@ -304,7 +304,7 @@ class AsyncInterleaveMany : public Node { class KnownRatio : public Node { public: - KnownRatio(Node::Args args, int64 ratio) : Node(args), ratio_(ratio) {} + KnownRatio(Node::Args args, double ratio) : Node(args), ratio_(ratio) {} virtual ~KnownRatio() {} From 4807acc07e7a51c61a0da6a2f6dd4d4558fa08a5 Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Tue, 21 Jul 2020 14:02:42 -0700 Subject: [PATCH 0963/2522] Keras TF API usage cleanup: Copy test_util.use_gpu into Keras and replace the usage within Keras. PiperOrigin-RevId: 322436993 Change-Id: Ic5ed83a643999afec71054aba9484a4a5e4caf88 --- .../python/keras/engine/training_gpu_test.py | 5 +- .../python/keras/layers/convolutional_test.py | 8 +- .../python/keras/layers/preprocessing/BUILD | 1 + .../preprocessing/image_preprocessing_test.py | 84 +++++++++---------- .../keras/optimizer_v2/optimizer_v2_test.py | 34 ++++---- .../python/keras/optimizer_v2/rmsprop_test.py | 5 +- tensorflow/python/keras/testing_utils.py | 20 +++++ 7 files changed, 89 insertions(+), 68 deletions(-) diff --git a/tensorflow/python/keras/engine/training_gpu_test.py b/tensorflow/python/keras/engine/training_gpu_test.py index 996e281bf0c..0498a03a1ed 100644 --- a/tensorflow/python/keras/engine/training_gpu_test.py +++ b/tensorflow/python/keras/engine/training_gpu_test.py @@ -20,10 +20,9 @@ from __future__ import print_function from absl.testing import parameterized import numpy as np - -from tensorflow.python.framework import test_util from tensorflow.python.keras import backend as K from tensorflow.python.keras import combinations +from tensorflow.python.keras import testing_utils from tensorflow.python.keras.engine import input_layer from tensorflow.python.keras.engine import training from tensorflow.python.keras.layers.convolutional import Conv2D @@ -71,7 +70,7 @@ class TrainingGPUTest(test.TestCase, parameterized.TestCase): return simple_model if test.is_gpu_available(cuda_only=True): - with test_util.use_gpu(): + with testing_utils.use_gpu(): losses_to_test = ['sparse_categorical_crossentropy', 'categorical_crossentropy', 'binary_crossentropy'] diff --git a/tensorflow/python/keras/layers/convolutional_test.py b/tensorflow/python/keras/layers/convolutional_test.py index 18e37a9d6a4..3ab3acd0ff9 100644 --- a/tensorflow/python/keras/layers/convolutional_test.py +++ b/tensorflow/python/keras/layers/convolutional_test.py @@ -435,7 +435,7 @@ class GroupedConvTest(keras_parameterized.TestCase): ) def disable_test_group_conv(self, layer_cls, input_shape): if test.is_gpu_available(cuda_only=True): - with test_util.use_gpu(): + with testing_utils.use_gpu(): inputs = random_ops.random_uniform(shape=input_shape) layer = layer_cls(16, 3, groups=4, use_bias=False) @@ -453,7 +453,7 @@ class GroupedConvTest(keras_parameterized.TestCase): def test_group_conv_depthwise(self): if test.is_gpu_available(cuda_only=True): - with test_util.use_gpu(): + with testing_utils.use_gpu(): inputs = random_ops.random_uniform(shape=(3, 27, 27, 32)) layer = keras.layers.Conv2D(32, 3, groups=32, use_bias=False) @@ -474,7 +474,7 @@ class Conv1DTransposeTest(keras_parameterized.TestCase): stack_size = 3 num_col = 6 - with test_util.use_gpu(): + with testing_utils.use_gpu(): testing_utils.layer_test( keras.layers.Conv1DTranspose, kwargs=kwargs, @@ -509,7 +509,7 @@ class Conv3DTransposeTest(keras_parameterized.TestCase): num_col = 6 depth = 5 - with test_util.use_gpu(): + with testing_utils.use_gpu(): testing_utils.layer_test( keras.layers.Conv3DTranspose, kwargs=kwargs, diff --git a/tensorflow/python/keras/layers/preprocessing/BUILD b/tensorflow/python/keras/layers/preprocessing/BUILD index adf10787f1e..1fa6deb8cd9 100644 --- a/tensorflow/python/keras/layers/preprocessing/BUILD +++ b/tensorflow/python/keras/layers/preprocessing/BUILD @@ -473,6 +473,7 @@ cuda_py_test( deps = [ ":image_preprocessing", "//tensorflow/python:client_testlib", + "//tensorflow/python/keras:testing_utils", "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", ], diff --git a/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py index a039ec644e3..b51e948baea 100644 --- a/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py +++ b/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py @@ -47,7 +47,7 @@ class ResizingTest(keras_parameterized.TestCase): orig_width = 8 channels = 3 kwargs.update({'height': expected_height, 'width': expected_width}) - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): testing_utils.layer_test( image_preprocessing.Resizing, kwargs=kwargs, @@ -79,7 +79,7 @@ class ResizingTest(keras_parameterized.TestCase): def test_down_sampling_numeric(self): for dtype in (np.int64, np.float32): - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): input_image = np.reshape(np.arange(0, 16), (1, 4, 4, 1)).astype(dtype) layer = image_preprocessing.Resizing( height=2, width=2, interpolation='nearest') @@ -95,7 +95,7 @@ class ResizingTest(keras_parameterized.TestCase): def test_up_sampling_numeric(self): for dtype in (np.int64, np.float32): - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): input_image = np.reshape(np.arange(0, 4), (1, 2, 2, 1)).astype(dtype) layer = image_preprocessing.Resizing( height=4, width=4, interpolation='nearest') @@ -152,7 +152,7 @@ class CenterCropTest(keras_parameterized.TestCase): (num_samples, orig_height, orig_width, channels)).astype(np.float32) expected_output = get_numpy_center_crop( input_images, expected_height, expected_width) - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): testing_utils.layer_test( image_preprocessing.CenterCrop, kwargs=kwargs, @@ -209,7 +209,7 @@ class RandomCropTest(keras_parameterized.TestCase): orig_width = 8 channels = 3 kwargs = {'height': expected_height, 'width': expected_width} - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): testing_utils.layer_test( image_preprocessing.RandomCrop, kwargs=kwargs, @@ -240,7 +240,7 @@ class RandomCropTest(keras_parameterized.TestCase): with test.mock.patch.object( stateless_random_ops, 'stateless_random_uniform', return_value=mock_offset): - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): layer = image_preprocessing.RandomCrop(height, width) inp = np.random.random((12, 5, 8, 3)) actual_output = layer(inp, training=1) @@ -270,7 +270,7 @@ class RandomCropTest(keras_parameterized.TestCase): np.random.seed(1337) height, width = 8, 16 inp = np.random.random((12, 8, 16, 3)) - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): layer = image_preprocessing.RandomCrop(height, width) actual_output = layer(inp, training=0) self.assertAllClose(inp, actual_output) @@ -279,7 +279,7 @@ class RandomCropTest(keras_parameterized.TestCase): np.random.seed(1337) height, width = 3, 3 inp = np.random.random((12, 10, 6, 3)) - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): layer = image_preprocessing.RandomCrop(height, width) actual_output = layer(inp, training=0) resized_inp = image_ops.resize_images_v2( @@ -291,7 +291,7 @@ class RandomCropTest(keras_parameterized.TestCase): np.random.seed(1337) height, width = 4, 6 inp = np.random.random((12, 8, 16, 3)) - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): layer = image_preprocessing.RandomCrop(height, width) actual_output = layer(inp, training=0) resized_inp = image_ops.resize_images_v2(inp, size=[4, 8]) @@ -359,7 +359,7 @@ class RandomFlipTest(keras_parameterized.TestCase): expected_output = np.flip(expected_output, axis=1) with test.mock.patch.object( random_ops, 'random_uniform', return_value=mock_random): - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): layer = image_preprocessing.RandomFlip(mode) actual_output = layer(inp, training=1) self.assertAllClose(expected_output, actual_output) @@ -396,7 +396,7 @@ class RandomFlipTest(keras_parameterized.TestCase): with CustomObjectScope({'RandomFlip': image_preprocessing.RandomFlip}): input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) expected_output = input_images - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): layer = image_preprocessing.RandomFlip() actual_output = layer(input_images, training=0) self.assertAllClose(expected_output, actual_output) @@ -446,7 +446,7 @@ class RandomContrastTest(keras_parameterized.TestCase): expected_output = (inp - inp_mean) * mock_random + inp_mean with test.mock.patch.object( random_ops, 'random_uniform', return_value=mock_random): - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): layer = image_preprocessing.RandomContrast((lower, upper)) actual_output = layer(inp, training=True) self.assertAllClose(expected_output, actual_output) @@ -467,7 +467,7 @@ class RandomContrastTest(keras_parameterized.TestCase): with CustomObjectScope( {'RandomContrast': image_preprocessing.RandomContrast}): input_images = np.random.random((2, 5, 8, 3)) - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): layer = image_preprocessing.RandomContrast(amplitude) layer(input_images) @@ -476,7 +476,7 @@ class RandomContrastTest(keras_parameterized.TestCase): {'RandomContrast': image_preprocessing.RandomContrast}): input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) expected_output = input_images - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): layer = image_preprocessing.RandomContrast((0.1, 0.2)) actual_output = layer(input_images, training=False) self.assertAllClose(expected_output, actual_output) @@ -485,7 +485,7 @@ class RandomContrastTest(keras_parameterized.TestCase): with CustomObjectScope( {'RandomContrast': image_preprocessing.RandomContrast}): input_images = np.random.randint(low=0, high=255, size=(2, 5, 8, 3)) - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): layer = image_preprocessing.RandomContrast((0.1, 0.2)) layer(input_images) @@ -517,7 +517,7 @@ class RandomTranslationTest(keras_parameterized.TestCase): orig_width = 8 channels = 3 kwargs = {'height_factor': height_factor, 'width_factor': width_factor} - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): testing_utils.layer_test( image_preprocessing.RandomTranslation, kwargs=kwargs, @@ -532,7 +532,7 @@ class RandomTranslationTest(keras_parameterized.TestCase): def test_random_translation_up_numeric_reflect(self): for dtype in (np.int64, np.float32): - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) # Shifting by -.2 * 5 = 1 pixel. layer = image_preprocessing.RandomTranslation( @@ -552,7 +552,7 @@ class RandomTranslationTest(keras_parameterized.TestCase): def test_random_translation_up_numeric_constant(self): for dtype in (np.int64, np.float32): - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) # Shifting by -.2 * 5 = 1 pixel. layer = image_preprocessing.RandomTranslation( @@ -572,7 +572,7 @@ class RandomTranslationTest(keras_parameterized.TestCase): def test_random_translation_down_numeric_reflect(self): for dtype in (np.int64, np.float32): - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) # Shifting by .2 * 5 = 1 pixel. layer = image_preprocessing.RandomTranslation( @@ -592,7 +592,7 @@ class RandomTranslationTest(keras_parameterized.TestCase): def test_random_translation_asymmetric_size_numeric_reflect(self): for dtype in (np.int64, np.float32): - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): input_image = np.reshape(np.arange(0, 16), (1, 8, 2, 1)).astype(dtype) # Shifting by .5 * 8 = 1 pixel. layer = image_preprocessing.RandomTranslation( @@ -615,7 +615,7 @@ class RandomTranslationTest(keras_parameterized.TestCase): def test_random_translation_down_numeric_constant(self): for dtype in (np.int64, np.float32): - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) # Shifting by -.2 * 5 = 1 pixel. layer = image_preprocessing.RandomTranslation( @@ -635,7 +635,7 @@ class RandomTranslationTest(keras_parameterized.TestCase): def test_random_translation_left_numeric_reflect(self): for dtype in (np.int64, np.float32): - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) # Shifting by .2 * 5 = 1 pixel. layer = image_preprocessing.RandomTranslation( @@ -655,7 +655,7 @@ class RandomTranslationTest(keras_parameterized.TestCase): def test_random_translation_left_numeric_constant(self): for dtype in (np.int64, np.float32): - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) # Shifting by -.2 * 5 = 1 pixel. layer = image_preprocessing.RandomTranslation( @@ -678,7 +678,7 @@ class RandomTranslationTest(keras_parameterized.TestCase): {'RandomTranslation': image_preprocessing.RandomTranslation}): input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) expected_output = input_images - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): layer = image_preprocessing.RandomTranslation(.5, .5) actual_output = layer(input_images, training=0) self.assertAllClose(expected_output, actual_output) @@ -996,7 +996,7 @@ class RandomRotationTest(keras_parameterized.TestCase): orig_width = 8 channels = 3 kwargs = {'factor': factor} - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): testing_utils.layer_test( image_preprocessing.RandomRotation, kwargs=kwargs, @@ -1014,7 +1014,7 @@ class RandomRotationTest(keras_parameterized.TestCase): {'RandomTranslation': image_preprocessing.RandomRotation}): input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) expected_output = input_images - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): layer = image_preprocessing.RandomRotation(.5) actual_output = layer(input_images, training=0) self.assertAllClose(expected_output, actual_output) @@ -1025,7 +1025,7 @@ class RandomRotationTest(keras_parameterized.TestCase): And that replicas got the same random result. """ input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): strat = MirroredStrategy(devices=['cpu', 'gpu']) with strat.scope(): layer = image_preprocessing.RandomRotation(.5) @@ -1052,7 +1052,7 @@ class RandomZoomTest(keras_parameterized.TestCase): orig_width = 8 channels = 3 kwargs = {'height_factor': height_factor, 'width_factor': width_factor} - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): testing_utils.layer_test( image_preprocessing.RandomZoom, kwargs=kwargs, @@ -1073,7 +1073,7 @@ class RandomZoomTest(keras_parameterized.TestCase): def test_random_zoom_in_numeric(self): for dtype in (np.int64, np.float32): - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(dtype) layer = image_preprocessing.RandomZoom((-.5, -.5), (-.5, -.5), interpolation='nearest') @@ -1092,7 +1092,7 @@ class RandomZoomTest(keras_parameterized.TestCase): def test_random_zoom_out_numeric(self): for dtype in (np.int64, np.float32): - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(dtype) layer = image_preprocessing.RandomZoom((.5, .5), (.8, .8), fill_mode='constant', @@ -1112,7 +1112,7 @@ class RandomZoomTest(keras_parameterized.TestCase): def test_random_zoom_out_numeric_preserve_aspect_ratio(self): for dtype in (np.int64, np.float32): - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(dtype) layer = image_preprocessing.RandomZoom((.5, .5), fill_mode='constant', @@ -1135,7 +1135,7 @@ class RandomZoomTest(keras_parameterized.TestCase): {'RandomZoom': image_preprocessing.RandomZoom}): input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) expected_output = input_images - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): layer = image_preprocessing.RandomZoom(.5, .5) actual_output = layer(input_images, training=0) self.assertAllClose(expected_output, actual_output) @@ -1157,7 +1157,7 @@ class RandomHeightTest(keras_parameterized.TestCase): orig_height = 5 orig_width = 8 channels = 3 - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): img = np.random.random((num_samples, orig_height, orig_width, channels)) layer = image_preprocessing.RandomHeight(factor) img_out = layer(img, training=True) @@ -1176,7 +1176,7 @@ class RandomHeightTest(keras_parameterized.TestCase): mock_factor = 0 with test.mock.patch.object( gen_stateful_random_ops, 'stateful_uniform', return_value=mock_factor): - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): img = np.random.random((12, 5, 8, 3)) layer = image_preprocessing.RandomHeight(.4) img_out = layer(img, training=True) @@ -1184,7 +1184,7 @@ class RandomHeightTest(keras_parameterized.TestCase): def test_random_height_longer_numeric(self): for dtype in (np.int64, np.float32): - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): input_image = np.reshape(np.arange(0, 6), (2, 3, 1)).astype(dtype) layer = image_preprocessing.RandomHeight(factor=(1., 1.)) # Return type of RandomHeight() is float32 if `interpolation` is not @@ -1204,7 +1204,7 @@ class RandomHeightTest(keras_parameterized.TestCase): def test_random_height_shorter_numeric(self): for dtype in (np.int64, np.float32): - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): input_image = np.reshape(np.arange(0, 8), (4, 2, 1)).astype(dtype) layer = image_preprocessing.RandomHeight( factor=(-.5, -.5), interpolation='nearest') @@ -1226,7 +1226,7 @@ class RandomHeightTest(keras_parameterized.TestCase): with CustomObjectScope({'RandomHeight': image_preprocessing.RandomHeight}): input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) expected_output = input_images - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): layer = image_preprocessing.RandomHeight(.5) actual_output = layer(input_images, training=0) self.assertAllClose(expected_output, actual_output) @@ -1248,7 +1248,7 @@ class RandomWidthTest(keras_parameterized.TestCase): orig_height = 5 orig_width = 8 channels = 3 - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): img = np.random.random((num_samples, orig_height, orig_width, channels)) layer = image_preprocessing.RandomWidth(factor) img_out = layer(img, training=True) @@ -1267,7 +1267,7 @@ class RandomWidthTest(keras_parameterized.TestCase): mock_factor = 0 with test.mock.patch.object( gen_stateful_random_ops, 'stateful_uniform', return_value=mock_factor): - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): img = np.random.random((12, 8, 5, 3)) layer = image_preprocessing.RandomWidth(.4) img_out = layer(img, training=True) @@ -1275,7 +1275,7 @@ class RandomWidthTest(keras_parameterized.TestCase): def test_random_width_longer_numeric(self): for dtype in (np.int64, np.float32): - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): input_image = np.reshape(np.arange(0, 6), (3, 2, 1)).astype(dtype) layer = image_preprocessing.RandomWidth(factor=(1., 1.)) # Return type of RandomWidth() is float32 if `interpolation` is not @@ -1294,7 +1294,7 @@ class RandomWidthTest(keras_parameterized.TestCase): def test_random_width_shorter_numeric(self): for dtype in (np.int64, np.float32): - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): input_image = np.reshape(np.arange(0, 8), (2, 4, 1)).astype(dtype) layer = image_preprocessing.RandomWidth( factor=(-.5, -.5), interpolation='nearest') @@ -1316,7 +1316,7 @@ class RandomWidthTest(keras_parameterized.TestCase): with CustomObjectScope({'RandomWidth': image_preprocessing.RandomWidth}): input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) expected_output = input_images - with tf_test_util.use_gpu(): + with testing_utils.use_gpu(): layer = image_preprocessing.RandomWidth(.5) actual_output = layer(input_images, training=0) self.assertAllClose(expected_output, actual_output) diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py index 1e242256c7b..e994a6e1e44 100644 --- a/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py +++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py @@ -73,7 +73,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testBasic(self): for dtype in _DATA_TYPES: - with test_util.use_gpu(): + with testing_utils.use_gpu(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) loss = lambda: 5 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop @@ -138,7 +138,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testPrecomputedGradient(self): for dtype in _DATA_TYPES: - with test_util.use_gpu(): + with testing_utils.use_gpu(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) loss = lambda: 5 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop @@ -162,7 +162,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testNoGradients(self): for dtype in _DATA_TYPES: - with test_util.use_gpu(): + with testing_utils.use_gpu(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) loss = lambda: 5 * var0 # pylint: disable=cell-var-from-loop @@ -174,7 +174,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testNoGradientsForAnyVariables_Minimize(self): for dtype in _DATA_TYPES: - with test_util.use_gpu(): + with testing_utils.use_gpu(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) loss = lambda: constant_op.constant(5.0) @@ -187,7 +187,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testNoGradientsForAnyVariables_ApplyGradients(self): for dtype in _DATA_TYPES: - with test_util.use_gpu(): + with testing_utils.use_gpu(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) sgd_op = gradient_descent.SGD(3.0) @@ -198,7 +198,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testGradientsAsVariables(self): for i, dtype in enumerate(_DATA_TYPES): - with test_util.use_gpu(): + with testing_utils.use_gpu(): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) loss = lambda: 5 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop @@ -236,7 +236,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testComputeGradientsWithTensors(self): - with test_util.use_gpu(): + with testing_utils.use_gpu(): x = ops.convert_to_tensor_v2(1.0) def f(): @@ -256,7 +256,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): def testConstraint(self): constraint_01 = lambda x: clip_ops.clip_by_value(x, -0.1, 0.) constraint_0 = lambda x: clip_ops.clip_by_value(x, 0., 1.) - with test_util.use_gpu(): + with testing_utils.use_gpu(): var0 = variables.Variable([1.0, 2.0], constraint=constraint_01) var1 = variables.Variable([3.0, 4.0], @@ -278,14 +278,14 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testIterationWithoutMinimize(self): - with test_util.use_gpu(): + with testing_utils.use_gpu(): sgd = gradient_descent.SGD(3.0) self.evaluate(sgd.iterations.initializer) self.assertEqual(0, self.evaluate(sgd.iterations)) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testConfig(self): - with test_util.use_gpu(): + with testing_utils.use_gpu(): opt = gradient_descent.SGD(learning_rate=1.0) config = opt.get_config() opt2 = gradient_descent.SGD.from_config(config) @@ -305,7 +305,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testConfigWithLearningRateDecay(self): - with test_util.use_gpu(): + with testing_utils.use_gpu(): var0 = variables.Variable([[1.0], [2.0]], dtype=dtypes.float32) for decay_schedule in [ learning_rate_schedule.InverseTimeDecay( @@ -336,7 +336,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testGradClipValue(self): - with test_util.use_gpu(): + with testing_utils.use_gpu(): var = variables.Variable([1.0, 2.0]) loss = lambda: 3 * var opt = gradient_descent.SGD(learning_rate=1.0, clipvalue=1.0) @@ -347,7 +347,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testGradClipNorm(self): - with test_util.use_gpu(): + with testing_utils.use_gpu(): var = variables.Variable([1.0]) loss = lambda: 3 * var opt = gradient_descent.SGD(learning_rate=1.0, clipnorm=1.0) @@ -368,7 +368,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testWeights(self): - with test_util.use_gpu(): + with testing_utils.use_gpu(): opt1 = adam.Adam(learning_rate=1.0) var1 = variables.Variable([1.0, 2.0], dtype=dtypes.float32) loss1 = lambda: 3 * var1 @@ -671,7 +671,7 @@ class OptimizersCompatibilityTest(keras_parameterized.TestCase): self.skipTest( 'v1 optimizer does not run in eager mode') np.random.seed(1331) - with test_util.use_gpu(): + with testing_utils.use_gpu(): train_samples = 20 input_dim = 3 num_classes = 2 @@ -757,7 +757,7 @@ class OptimizersCompatibilityTest(keras_parameterized.TestCase): self.skipTest( 'v1 optimizer does not run in eager mode') np.random.seed(1331) - with test_util.use_gpu(): + with testing_utils.use_gpu(): train_samples = 20 input_dim = 3 num_classes = 2 @@ -814,7 +814,7 @@ class OptimizersCompatibilityTest(keras_parameterized.TestCase): self.skipTest( 'v1 optimizer does not run in eager mode') np.random.seed(1331) - with test_util.use_gpu(): + with testing_utils.use_gpu(): train_samples = 20 input_dim = 3 num_classes = 2 diff --git a/tensorflow/python/keras/optimizer_v2/rmsprop_test.py b/tensorflow/python/keras/optimizer_v2/rmsprop_test.py index 5fd91588227..35f795edb53 100644 --- a/tensorflow/python/keras/optimizer_v2/rmsprop_test.py +++ b/tensorflow/python/keras/optimizer_v2/rmsprop_test.py @@ -31,6 +31,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.keras import combinations +from tensorflow.python.keras import testing_utils from tensorflow.python.keras.optimizer_v2 import learning_rate_schedule from tensorflow.python.keras.optimizer_v2 import rmsprop from tensorflow.python.ops import embedding_ops @@ -104,7 +105,7 @@ class RMSpropOptimizerTest(test.TestCase): def testDense(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for (dtype, learning_rate, rho, momentum, epsilon, centered) in _TESTPARAMS: - with ops.get_default_graph().as_default(), test_util.use_gpu(): + with ops.get_default_graph().as_default(), testing_utils.use_gpu(): # Initialize variables for numpy implementation. var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype) @@ -379,7 +380,7 @@ class RMSpropOptimizerTest(test.TestCase): def testSparse(self): # TODO(tanzheny, omalleyt): Fix test in eager mode. for (dtype, learning_rate, rho, momentum, epsilon, centered) in _TESTPARAMS: - with ops.get_default_graph().as_default(), test_util.use_gpu(): + with ops.get_default_graph().as_default(), testing_utils.use_gpu(): # Initialize variables for numpy implementation. var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype) diff --git a/tensorflow/python/keras/testing_utils.py b/tensorflow/python/keras/testing_utils.py index cceaabe37a5..550ff664823 100644 --- a/tensorflow/python/keras/testing_utils.py +++ b/tensorflow/python/keras/testing_utils.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import contextlib import functools import threading @@ -26,6 +27,7 @@ import numpy as np from tensorflow.python import tf2 from tensorflow.python.eager import context from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_spec from tensorflow.python.framework import test_util @@ -917,3 +919,21 @@ def _set_v2_dtype_behavior(fn, enabled): base_layer_utils.V2_DTYPE_BEHAVIOR = v2_dtype_behavior return tf_decorator.make_decorator(fn, wrapper) + + +@contextlib.contextmanager +def device(should_use_gpu): + """Uses gpu when requested and available.""" + if should_use_gpu and test_util.is_gpu_available(): + dev = '/device:GPU:0' + else: + dev = '/device:CPU:0' + with ops.device(dev): + yield + + +@contextlib.contextmanager +def use_gpu(): + """Uses gpu when requested and available.""" + with device(should_use_gpu=True): + yield From 305003dc7b621f897cd76825622b974b1e555cec Mon Sep 17 00:00:00 2001 From: Brian Zhao Date: Tue, 21 Jul 2020 14:07:27 -0700 Subject: [PATCH 0964/2522] Include regexp.h in the parallel mobile build graph. PiperOrigin-RevId: 322438087 Change-Id: Ie2ad5c1816cede7bddef9fd7f056dc96ba56e083 --- tensorflow/core/platform/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD index 9722ba8f897..823a12dfc57 100644 --- a/tensorflow/core/platform/BUILD +++ b/tensorflow/core/platform/BUILD @@ -1540,6 +1540,7 @@ filegroup( "raw_coding.h", "refcount.h", "resource.h", + "regexp.h", "scanner.cc", "scanner.h", "setround.cc", From 89a8f8b2928793300da69105686b942c8200187d Mon Sep 17 00:00:00 2001 From: Karim Nosir Date: Tue, 21 Jul 2020 14:10:13 -0700 Subject: [PATCH 0965/2522] Update hexagon delegate to check for library version PiperOrigin-RevId: 322438763 Change-Id: Ice9390e2216ec958f16f3c92713606dba66f59ac --- .../delegates/hexagon/hexagon_delegate.cc | 42 ++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/delegates/hexagon/hexagon_delegate.cc b/tensorflow/lite/delegates/hexagon/hexagon_delegate.cc index ef3162739a9..f03f300e380 100644 --- a/tensorflow/lite/delegates/hexagon/hexagon_delegate.cc +++ b/tensorflow/lite/delegates/hexagon/hexagon_delegate.cc @@ -74,6 +74,39 @@ class HexagonDelegate : public SimpleDelegateInterface { return options; } + bool VerifyDelegate() { + auto* hexagon_nn = HexagonNNImplementation(); + if (hexagon_nn == nullptr) { + return false; + } + if (hexagon_nn->hexagon_nn_version != nullptr && + hexagon_nn->hexagon_nn_hexagon_interface_version) { + int hexagon_nn_version = -1; + int hexagon_interface_version = + hexagon_nn->hexagon_nn_hexagon_interface_version(); + if (hexagon_nn->hexagon_nn_version(&hexagon_nn_version) != 0) { + TFLITE_LOG_PROD(tflite::TFLITE_LOG_WARNING, + "Failed to fetch Hexagon NN version. This might be " + "because you're using incompatible versions of " + "libhexagon_interface and libhexagon_nn_skel. " + "You must use compatible versions. " + "Refer to Tensorflow Lite Hexagon Delegate Guide."); + return false; + } + if (hexagon_nn_version != hexagon_interface_version) { + TFLITE_LOG_PROD( + tflite::TFLITE_LOG_WARNING, + "Incompatible versions between interface library and " + "libhexagon_skel %d vs %d. You must use compatible versions. " + "Refer to Tensorflow Lite Hexagon Delegate Guide.", + hexagon_interface_version, hexagon_nn_version); + return false; + } + } + return hexagon_nn->hexagon_nn_is_device_supported && + hexagon_nn->hexagon_nn_is_device_supported(); + } + private: TfLiteHexagonDelegateOptions params_; }; @@ -83,9 +116,16 @@ class HexagonDelegate : public SimpleDelegateInterface { TfLiteDelegate* TfLiteHexagonDelegateCreate( const TfLiteHexagonDelegateOptions* options) { + auto hexagon_delegate_interface = + std::make_unique(options); + if (!hexagon_delegate_interface->VerifyDelegate()) { + TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO, + "Hexagon Delegate is not supported.\n"); + return nullptr; + } auto* initialized_delegate = tflite::TfLiteDelegateFactory::CreateSimpleDelegate( - std::make_unique(options)); + std::move(hexagon_delegate_interface)); if (options->enable_dynamic_batch_size) { initialized_delegate->flags |= kTfLiteDelegateFlagsAllowDynamicTensors; } From 0f940d4e1bba06c474571ac4f5edbbe2a4929373 Mon Sep 17 00:00:00 2001 From: Rachel Lim Date: Tue, 21 Jul 2020 14:13:24 -0700 Subject: [PATCH 0966/2522] Disable tsan on barrier_ops_test due to failing test. PiperOrigin-RevId: 322439403 Change-Id: I2c43575424de8a5d0fd9a81eefcf277ac362b92e --- tensorflow/python/kernel_tests/BUILD | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 49928980c41..44428988273 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -47,8 +47,10 @@ tf_py_test( size = "medium", # NOTE(ebrevdo): This test is NOT small. srcs = ["barrier_ops_test.py"], shard_count = 20, - # TODO(b/129706424): Re-enable this test on Mac. - tags = ["no_mac"], + tags = [ + "no_mac", # TODO(b/129706424): Re-enable this test on Mac. + "notsan", # TODO(b/161829717): Re-enable. + ], deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:data_flow_ops", From 26c5ae7dcbaefe0a745a7700a865ce2c8ca3e713 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Tue, 21 Jul 2020 14:14:49 -0700 Subject: [PATCH 0967/2522] Add a way to benchmark ops to TFNP and compare with numpy. Also add a simple single layer benchmark, and some unary op and matmul benchmarks. PiperOrigin-RevId: 322439672 Change-Id: I9a3c9ba472ced3acb7f17e2341c5b864d4d02567 --- .../integration_test/benchmarks/BUILD | 45 +++++ .../benchmarks/micro_benchmarks.py | 165 ++++++++++++++++++ .../integration_test/benchmarks/numpy_mlp.py | 51 ++++++ .../benchmarks/tf_numpy_mlp.py | 53 ++++++ 4 files changed, 314 insertions(+) create mode 100644 tensorflow/python/ops/numpy_ops/integration_test/benchmarks/BUILD create mode 100644 tensorflow/python/ops/numpy_ops/integration_test/benchmarks/micro_benchmarks.py create mode 100644 tensorflow/python/ops/numpy_ops/integration_test/benchmarks/numpy_mlp.py create mode 100644 tensorflow/python/ops/numpy_ops/integration_test/benchmarks/tf_numpy_mlp.py diff --git a/tensorflow/python/ops/numpy_ops/integration_test/benchmarks/BUILD b/tensorflow/python/ops/numpy_ops/integration_test/benchmarks/BUILD new file mode 100644 index 00000000000..31f8d4575b3 --- /dev/null +++ b/tensorflow/python/ops/numpy_ops/integration_test/benchmarks/BUILD @@ -0,0 +1,45 @@ +load("//tensorflow:tensorflow.bzl", "cuda_py_test") + +package( + default_visibility = [ + "//tensorflow:internal", + ], + licenses = ["notice"], # Apache 2.0 +) + +cuda_py_test( + name = "micro_benchmarks", + srcs = ["micro_benchmarks.py"], + python_version = "PY3", + shard_count = 5, + deps = [ + ":numpy_mlp", + ":tf_numpy_mlp", + "//tensorflow:tensorflow_py", + "//tensorflow/python:extra_py_tests_deps", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform", + "//tensorflow/python:platform_benchmark", + "//tensorflow/python/ops/numpy_ops:numpy", + "//third_party/py/numpy", + "@absl_py//absl/flags", + "@absl_py//absl/logging", + ], +) + +py_library( + name = "numpy_mlp", + srcs = ["numpy_mlp.py"], + deps = [ + "//third_party/py/numpy", + ], +) + +py_library( + name = "tf_numpy_mlp", + srcs = ["tf_numpy_mlp.py"], + deps = [ + "//tensorflow:tensorflow_py", + "//tensorflow/python/ops/numpy_ops:numpy", + ], +) diff --git a/tensorflow/python/ops/numpy_ops/integration_test/benchmarks/micro_benchmarks.py b/tensorflow/python/ops/numpy_ops/integration_test/benchmarks/micro_benchmarks.py new file mode 100644 index 00000000000..bff4db17242 --- /dev/null +++ b/tensorflow/python/ops/numpy_ops/integration_test/benchmarks/micro_benchmarks.py @@ -0,0 +1,165 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Lint as: python3 +r"""Micro benchmark. + +bazel run -c opt --config=cuda \ + //third_party/tensorflow/python/ops/numpy_ops/benchmarks:micro_benchmarks -- \ + --number=100 --repeat=100 \ + --benchmarks=. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import gc +import time + +from absl import flags +from absl import logging + +import numpy as np # pylint: disable=unused-import +import tensorflow.compat.v2 as tf + +from tensorflow.python.ops import numpy_ops as tfnp # pylint: disable=g-direct-tensorflow-import +from tensorflow.python.ops.numpy_ops.integration_test.benchmarks import numpy_mlp +from tensorflow.python.ops.numpy_ops.integration_test.benchmarks import tf_numpy_mlp + +FLAGS = flags.FLAGS + +flags.DEFINE_integer('repeat', 100, '#Measurements per benchmark.') +flags.DEFINE_integer('number', 100, '#Runs per a measure.') + + +class MicroBenchmarks(tf.test.Benchmark): + """Main micro benchmark class.""" + + def _benchmark_and_report( + self, + name, + fn, + repeat=None, + number=None): + """Run fn repeat * number times, report time, and return fastest time.""" + # Can't make these default above since the flags may not have been parsed + # at module import time. + repeat = repeat or int(FLAGS.repeat) + number = number or int(FLAGS.number) + + # Warmup + fn() + + times = [] + for _ in range(repeat): + gc.disable() + start = time.time() + for _ in range(number): + fn() + times.append(time.time() - start) + gc.enable() + gc.collect() + + # Regular benchmark to report numbers. + fastest_time_us = min(times) * 1e6 / number + total_time = sum(times) + self.report_benchmark(name=name, + wall_time=total_time, + extras={'fastest_time_us': fastest_time_us}) + + return fastest_time_us + + def benchmark_tf_np_mlp_inference_batch_1_cpu(self): + with tf.device('/CPU:0'): + model = tf_numpy_mlp.MLP() + x = tfnp.ones(shape=(1, 10)).astype(np.float32) + self._benchmark_and_report(self._get_name(), lambda: model.inference(x)) + + def benchmark_tf_np_tf_function_mlp_inference_batch_1_cpu(self): + with tf.device('/CPU:0'): + model = tf_numpy_mlp.MLP() + x = tfnp.ones(shape=(1, 10)).astype(np.float32) + self._benchmark_and_report( + self._get_name(), tf.function(lambda: model.inference(x))) + + def benchmark_numpy_mlp_inference_batch_1_cpu(self): + model = numpy_mlp.MLP() + x = np.random.uniform(size=(1, 10)).astype(np.float32, copy=False) + self._benchmark_and_report(self._get_name(), lambda: model.inference(x)) + + def _benchmark_np_and_tf_np(self, name, op, args, repeat=None): # pylint: disable=redefined-builtin + fn = getattr(np, op) + assert fn is not None + + np_time = self._benchmark_and_report( + '{}_numpy'.format(name), lambda: fn(*args), repeat=repeat) + + fn = getattr(tfnp, op) + assert fn is not None + + with tf.device('CPU:0'): + tf_time = self._benchmark_and_report( + '{}_tfnp_cpu'.format(name), lambda: fn(*args), repeat=repeat) + + return np_time, tf_time + + def _print_times(self, op, sizes, times): + # For easy reporting. + print('For np.{}:'.format(op)) + print('{:<15} {:>11} {:>11}'.format('Size', 'NP time', 'TF NP Time')) + for size, (np_time, tf_time) in zip(sizes, times): + print('{:<15} {:>10.5}us {:>10.5}us'.format( + str(size), np_time, tf_time)) + print() + + def _benchmark_np_and_tf_np_unary(self, op): + sizes = [(100,), (10000,), (1000000,)] + repeats = [FLAGS.repeat] * 2 + [10] + times = [] + for size, repeat in zip(sizes, repeats): + x = np.random.uniform(size=size).astype(np.float32, copy=False) + name = '{}_{}'.format(self._get_name(), size) + times.append(self._benchmark_np_and_tf_np(name, op, (x,), repeat)) + self._print_times(op, sizes, times) + + def benchmark_count_nonzero(self): + self._benchmark_np_and_tf_np_unary('count_nonzero') + + def benchmark_log(self): + self._benchmark_np_and_tf_np_unary('log') + + def benchmark_exp(self): + self._benchmark_np_and_tf_np_unary('exp') + + def benchmark_tanh(self): + self._benchmark_np_and_tf_np_unary('tanh') + + def benchmark_matmul(self): + sizes = [(2, 2), (10, 10), (100, 100), (200, 200), (1000, 1000)] + # Override repeat flag since this can be very slow. + repeats = [FLAGS.repeat] * 3 + [50, 10] + times = [] + for size, repeat in zip(sizes, repeats): + x = np.random.uniform(size=size).astype(np.float32, copy=False) + name = '{}_{}'.format(self._get_name(), size) + times.append( + self._benchmark_np_and_tf_np(name, 'matmul', (x, x), repeat=repeat)) + + self._print_times('matmul', sizes, times) + + +if __name__ == '__main__': + logging.set_verbosity(logging.WARNING) + tf.enable_v2_behavior() + tf.test.main() diff --git a/tensorflow/python/ops/numpy_ops/integration_test/benchmarks/numpy_mlp.py b/tensorflow/python/ops/numpy_ops/integration_test/benchmarks/numpy_mlp.py new file mode 100644 index 00000000000..7e801d96f3a --- /dev/null +++ b/tensorflow/python/ops/numpy_ops/integration_test/benchmarks/numpy_mlp.py @@ -0,0 +1,51 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Builds the MLP network.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +NUM_CLASSES = 3 +INPUT_SIZE = 10 +HIDDEN_UNITS = 10 + + +class MLP(object): + """MLP model. + + T = Relu(Add(MatMul(A, B), C)) + R = Relu(Add(MatMul(T, D), E)) + """ + + def __init__(self, num_classes=NUM_CLASSES, input_size=INPUT_SIZE, + hidden_units=HIDDEN_UNITS): + self.w1 = np.random.uniform(size=[input_size, hidden_units]).astype( + np.float32, copy=False) + self.w2 = np.random.uniform(size=[hidden_units, num_classes]).astype( + np.float32, copy=False) + self.b1 = np.random.uniform(size=[1, hidden_units]).astype( + np.float32, copy=False) + self.b2 = np.random.uniform(size=[1, num_classes]).astype( + np.float32, copy=False) + + def inference(self, inputs): + return self._forward(inputs, self.w1, self.w2, self.b1, self.b2) + + def _forward(self, x, w1, w2, b1, b2): + x = np.maximum(np.matmul(x, w1) + b1, 0.) + x = np.maximum(np.matmul(x, w2) + b2, 0.) + return x diff --git a/tensorflow/python/ops/numpy_ops/integration_test/benchmarks/tf_numpy_mlp.py b/tensorflow/python/ops/numpy_ops/integration_test/benchmarks/tf_numpy_mlp.py new file mode 100644 index 00000000000..f3c4727e18f --- /dev/null +++ b/tensorflow/python/ops/numpy_ops/integration_test/benchmarks/tf_numpy_mlp.py @@ -0,0 +1,53 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Builds the MLP network.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow.compat.v2 as tf +np = tf.experimental.numpy + + +NUM_CLASSES = 3 +INPUT_SIZE = 10 +HIDDEN_UNITS = 10 + + +class MLP(object): + """MLP model. + + T = Relu(Add(MatMul(A, B), C)) + R = Relu(Add(MatMul(T, D), E)) + """ + + def __init__(self, num_classes=NUM_CLASSES, input_size=INPUT_SIZE, + hidden_units=HIDDEN_UNITS): + self.w1 = np.random.uniform(size=[input_size, hidden_units]).astype( + np.float32) + self.w2 = np.random.uniform(size=[hidden_units, num_classes]).astype( + np.float32) + self.b1 = np.random.uniform(size=[1, hidden_units]).astype( + np.float32) + self.b2 = np.random.uniform(size=[1, num_classes]).astype( + np.float32) + + def inference(self, inputs): + return self._forward(inputs, self.w1, self.w2, self.b1, self.b2) + + def _forward(self, x, w1, w2, b1, b2): + x = np.maximum(np.matmul(x, w1) + b1, 0.) + x = np.maximum(np.matmul(x, w2) + b2, 0.) + return x From b8a74b8941af8e6205643058247e181fc80a194c Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Tue, 21 Jul 2020 14:16:46 -0700 Subject: [PATCH 0968/2522] Add tests for running tf.data service with sparse and ragged tensors PiperOrigin-RevId: 322440067 Change-Id: I5edcc526f35211213478609fc460b597ca7fcb0b --- .../kernel_tests/data_service_ops_test.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tensorflow/python/data/kernel_tests/data_service_ops_test.py b/tensorflow/python/data/kernel_tests/data_service_ops_test.py index 98db4fb0d4b..ae847e3924c 100644 --- a/tensorflow/python/data/kernel_tests/data_service_ops_test.py +++ b/tensorflow/python/data/kernel_tests/data_service_ops_test.py @@ -21,6 +21,7 @@ import time from absl.testing import parameterized +from tensorflow.python.data.experimental.ops import batching from tensorflow.python.data.experimental.ops import data_service_ops from tensorflow.python.data.experimental.ops import distribute_options from tensorflow.python.data.experimental.ops import testing @@ -29,11 +30,14 @@ from tensorflow.python.data.kernel_tests import test_base from tensorflow.python.data.ops import dataset_ops from tensorflow.python.eager import def_function from tensorflow.python.framework import combinations +from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import random_seed +from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops +from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import tensor_array_ops from tensorflow.python.platform import test @@ -81,6 +85,30 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): results = [elem.numpy() for elem in ds] self.assertEqual(list(range(num_elements)), results) + @combinations.generate(test_base.eager_only_combinations()) + def testDistributeSparse(self): + dispatcher_address = self.create_cluster(1) + element = sparse_tensor.SparseTensor( + indices=[[0]], + values=constant_op.constant([0], dtype=dtypes.int32), + dense_shape=[1]) + ds = dataset_ops.Dataset.from_tensors(element) + ds = _make_distributed_dataset(ds, dispatcher_address) + results = [sparse_ops.sparse_tensor_to_dense(elem) for elem in ds] + self.assertAllEqual(results, [[0]]) + + @combinations.generate(test_base.eager_only_combinations()) + def testDistributeRagged(self): + dispatcher_address = self.create_cluster(1) + ds = dataset_ops.Dataset.from_tensor_slices([1, 5, 3, 2, 8]) + ds = ds.map(math_ops.range) + ds = ds.apply(batching.dense_to_ragged_batch(2)) + ds = _make_distributed_dataset(ds, dispatcher_address) + results = [elem.to_tensor() for elem in ds] + self.assertAllEqual(results[0], [[0, 0, 0, 0, 0], [0, 1, 2, 3, 4]]) + self.assertAllEqual(results[1], [[0, 1, 2], [0, 1, 0]]) + self.assertAllEqual(results[2], [[0, 1, 2, 3, 4, 5, 6, 7]]) + @combinations.generate(test_base.eager_only_combinations()) def testDifferentShuffleOrders(self): random_seed.set_random_seed(None) From cdc531263a33e8c42c2e9116c44643d83dabb9b2 Mon Sep 17 00:00:00 2001 From: Robert David Date: Tue, 21 Jul 2020 14:18:47 -0700 Subject: [PATCH 0969/2522] Cleanup LSTM tests: simplify some logic. PiperOrigin-RevId: 322440478 Change-Id: I29af023579b9ad667a4357ee757d7e6f0784ce7e --- tensorflow/lite/kernels/lstm_test.cc | 61 ++++++++++++++-------------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/tensorflow/lite/kernels/lstm_test.cc b/tensorflow/lite/kernels/lstm_test.cc index ff023459463..d55cbea2ca6 100644 --- a/tensorflow/lite/kernels/lstm_test.cc +++ b/tensorflow/lite/kernels/lstm_test.cc @@ -49,16 +49,15 @@ class LSTMOpModel : public SingleOpModel { } else { input_to_input_weights_ = AddInput({weight_type, {n_cell, n_input}}); } - input_to_forget_weights_ = AddInput({weight_type, {n_cell, n_input}}); input_to_cell_weights_ = AddInput({weight_type, {n_cell, n_input}}); input_to_output_weights_ = AddInput({weight_type, {n_cell, n_input}}); + if (use_cifg) { recurrent_to_input_weights_ = AddNullInput(); } else { recurrent_to_input_weights_ = AddInput({weight_type, {n_cell, n_output}}); } - recurrent_to_forget_weights_ = AddInput({weight_type, {n_cell, n_output}}); recurrent_to_cell_weights_ = AddInput({weight_type, {n_cell, n_output}}); recurrent_to_output_weights_ = AddInput({weight_type, {n_cell, n_output}}); @@ -88,13 +87,13 @@ class LSTMOpModel : public SingleOpModel { if (use_projection_weights) { projection_weights_ = AddInput({weight_type, {n_output, n_cell}}); - if (use_projection_bias) { - projection_bias_ = AddInput({TensorType_FLOAT32, {n_output}}); - } else { - projection_bias_ = AddNullInput(); - } } else { projection_weights_ = AddNullInput(); + } + if (use_projection_bias) { + CHECK(use_projection_weights); + projection_bias_ = AddInput({TensorType_FLOAT32, {n_output}}); + } else { projection_bias_ = AddNullInput(); } @@ -104,22 +103,25 @@ class LSTMOpModel : public SingleOpModel { // Layer norm weights. if (!model_has_legacy_20_inputs) { - if (use_cifg) { - input_layer_norm_coefficients_ = AddNullInput(); + if (is_layer_norm) { + if (use_cifg) { + input_layer_norm_coefficients_ = AddNullInput(); + } else { + input_layer_norm_coefficients_ = + AddInput({TensorType_FLOAT32, {n_cell}}); + } + forget_layer_norm_coefficients_ = + AddInput({TensorType_FLOAT32, {n_cell}}); + cell_layer_norm_coefficients_ = + AddInput({TensorType_FLOAT32, {n_cell}}); + output_layer_norm_coefficients_ = + AddInput({TensorType_FLOAT32, {n_cell}}); } else { - input_layer_norm_coefficients_ = - is_layer_norm ? AddInput({TensorType_FLOAT32, {n_cell}}) - : AddNullInput(); + input_layer_norm_coefficients_ = AddNullInput(); + forget_layer_norm_coefficients_ = AddNullInput(); + cell_layer_norm_coefficients_ = AddNullInput(); + output_layer_norm_coefficients_ = AddNullInput(); } - forget_layer_norm_coefficients_ = - is_layer_norm ? AddInput({TensorType_FLOAT32, {n_cell}}) - : AddNullInput(); - cell_layer_norm_coefficients_ = - is_layer_norm ? AddInput({TensorType_FLOAT32, {n_cell}}) - : AddNullInput(); - output_layer_norm_coefficients_ = - is_layer_norm ? AddInput({TensorType_FLOAT32, {n_cell}}) - : AddNullInput(); } output_ = AddOutput({TensorType_FLOAT32, {n_output}}); @@ -263,8 +265,7 @@ class LSTMOpModel : public SingleOpModel { int n_output_; private: - template - void PopulateTensor(int index, const std::vector& data) { + void PopulateTensor(int index, const std::vector& data) { // Nothing to do if tensor is an optional input or if data vector is empty. if ((index == kTfLiteOptionalTensor) || data.empty()) return; SingleOpModel::PopulateTensor(index, data); @@ -1749,16 +1750,14 @@ class LSTMIntegerOpModel : public SingleOpModel { {n_output, n_cell}, ranges[16].first, ranges[16].second}); - if (use_projection_bias) { - projection_bias_ = AddInput({TensorType_INT32, - {n_output}, - ranges[17].first, - ranges[17].second}); - } else { - projection_bias_ = AddNullInput(); - } } else { projection_weights_ = AddNullInput(); + } + if (use_projection_bias) { + CHECK(use_projection_weights); + projection_bias_ = AddInput( + {TensorType_INT32, {n_output}, ranges[17].first, ranges[17].second}); + } else { projection_bias_ = AddNullInput(); } From be88c5f8a7a1d23a97af45015a845988eda18f27 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Tue, 21 Jul 2020 14:33:30 -0700 Subject: [PATCH 0970/2522] Internal cleanup: set up boilerplate for tests which require running inside a tf.function. The boilerplate helps keep the tests clean, although its mechanism may be tricky. PiperOrigin-RevId: 322444035 Change-Id: I56bbb1e1eb2feb65e16b8171d4a1bd20e1b190a5 --- .../autograph/operators/control_flow_test.py | 286 ++++++++---------- tensorflow/python/autograph/utils/__init__.py | 1 - tensorflow/python/autograph/utils/testing.py | 88 +++++- 3 files changed, 202 insertions(+), 173 deletions(-) diff --git a/tensorflow/python/autograph/operators/control_flow_test.py b/tensorflow/python/autograph/operators/control_flow_test.py index ce9b1181e05..5f0629a163f 100644 --- a/tensorflow/python/autograph/operators/control_flow_test.py +++ b/tensorflow/python/autograph/operators/control_flow_test.py @@ -31,26 +31,22 @@ import six from tensorflow.python.autograph.operators import control_flow from tensorflow.python.autograph.operators import variables as variable_operators from tensorflow.python.autograph.utils import ag_logging +from tensorflow.python.autograph.utils import testing from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.eager import def_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes -from tensorflow.python.framework import func_graph from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape -from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops -from tensorflow.python.ops import variables from tensorflow.python.ops.ragged import ragged_factory_ops from tensorflow.python.platform import test -@test_util.run_all_in_graph_and_eager_modes -class ForLoopTest(test.TestCase): +class ForLoopTest(testing.AutoGraphTestCase): def test_tensor(self): def body(i): @@ -70,7 +66,7 @@ class ForLoopTest(test.TestCase): set_state=set_state, symbol_names=('s',), opts={}) - self.assertEqual(self.evaluate(s), (1234,)) + self.assertEqual(s, (1234,)) def test_range_tensor(self): def body(i): @@ -90,7 +86,7 @@ class ForLoopTest(test.TestCase): set_state=set_state, symbol_names=('s',), opts={'iterate_names': 'i'}) - self.assertEqual(self.evaluate(s), (1234,)) + self.assertEqual(s, (1234,)) def test_range_tensor_explicit_limit_delta(self): def body(i): @@ -110,7 +106,7 @@ class ForLoopTest(test.TestCase): set_state=set_state, symbol_names=('s',), opts={'iterate_names': 'i'}) - self.assertEqual(self.evaluate(s), (-171207,)) + self.assertEqual(s, (-171207,)) def test_range_tensor_explicit_limit_negative_delta(self): def body(i): @@ -130,7 +126,7 @@ class ForLoopTest(test.TestCase): set_state=set_state, symbol_names=('s',), opts={'iterate_names': 'i'}) - self.assertEqual(self.evaluate(s), (171207,)) + self.assertEqual(s, (171207,)) def test_range_tensor_random_delta(self): def body(i): @@ -151,7 +147,7 @@ class ForLoopTest(test.TestCase): set_state=set_state, symbol_names=('s',), opts={'iterate_names': 'i'}) - self.assertEqual(self.evaluate(s), (1234,)) + self.assertEqual(s, (1234,)) def test_range_tensor_random_negative_delta(self): def body(i): @@ -172,7 +168,7 @@ class ForLoopTest(test.TestCase): set_state=set_state, symbol_names=('s',), opts={'iterate_names': 'i'}) - self.assertEqual(self.evaluate(s), (171207,)) + self.assertEqual(s, (171207,)) def test_tensor_with_extra_test_object_vars(self): class MutableObject(object): @@ -198,7 +194,7 @@ class ForLoopTest(test.TestCase): set_state=set_state, symbol_names=('state.field_1', 'state.field_2'), opts={}) - self.assertEqual(self.evaluate((state.field_1, state.field_2)), (6, 6)) + self.assertEqual((state.field_1, state.field_2), (6, 6)) def test_python(self): def body(i): @@ -297,7 +293,7 @@ class ForLoopTest(test.TestCase): set_state=set_state, symbol_names=('s',), opts={}) - self.assertEqual(self.evaluate(s), (1234,)) + self.assertEqual(s, (1234,)) def test_dataset_with_extra_test(self): def body(i): @@ -317,7 +313,7 @@ class ForLoopTest(test.TestCase): set_state=set_state, symbol_names=('s',), opts={}) - self.assertEqual(self.evaluate(s), (12,)) + self.assertEqual(s, (12,)) def test_dataset_with_extra_test_collection_vars(self): def body(i): @@ -339,7 +335,7 @@ class ForLoopTest(test.TestCase): set_state=set_state, symbol_names=('l[0]', 's'), opts={}) - self.assertEqual(self.evaluate((l[0], s)), (3, 3)) + self.assertEqual((l[0], s), (3, 3)) def test_dataset_with_extra_test_iteration_limiting(self): def body(it): @@ -360,100 +356,83 @@ class ForLoopTest(test.TestCase): set_state=set_state, symbol_names=('i',), opts={}) - self.assertEqual(self.evaluate(i), (3,)) + self.assertEqual(i, (3,)) def test_tf_dataset_no_loop_vars(self): def body(i): v.assign(v.read_value() * 10 + i) - v = variables.Variable(0, dtype=dtypes.int64) - self.evaluate(v.initializer) + v = self.variable('v', 0, dtypes.int64) - # tf.function required for the automatic control dependencies, and because - # ops test for its presence. - @def_function.function - def test_fn(): - control_flow.for_stmt( - dataset_ops.Dataset.range(5), - extra_test=None, - body=body, - get_state=lambda: (), - set_state=lambda _: None, - symbol_names=(), - opts={}) + control_flow.for_stmt( + dataset_ops.Dataset.range(5), + extra_test=None, + body=body, + get_state=lambda: (), + set_state=lambda _: None, + symbol_names=(), + opts={}) - self.evaluate(test_fn()) - self.assertEqual(self.evaluate(v.read_value()), 1234) + self.assertEqual(v.read_value(), 1234) def test_tf_iterator(self): - # graph-mode iterators are only supported inside tf.function. - @def_function.function - def test_fn(): - def body(i): - nonlocal s - s = s * 10 + i + def body(i): + nonlocal s + s = s * 10 + i - def set_state(loop_vars): - nonlocal s - s, = loop_vars + def set_state(loop_vars): + nonlocal s + s, = loop_vars - s = constant_op.constant(0, dtype=dtypes.int64) - control_flow.for_stmt( - iter(dataset_ops.Dataset.range(5)), - extra_test=None, - body=body, - get_state=lambda: (s,), - set_state=set_state, - symbol_names=('s',), - opts={}) - return s - self.assertAllEqual(test_fn(), 1234) + s = constant_op.constant(0, dtype=dtypes.int64) + control_flow.for_stmt( + iter(dataset_ops.Dataset.range(5)), + extra_test=None, + body=body, + get_state=lambda: (s,), + set_state=set_state, + symbol_names=('s',), + opts={}) + + self.assertEqual(s, 1234) def test_tf_iterator_shape_invariants(self): - # graph-mode iterators are only supported inside tf.function. - @def_function.function - def test_fn(): - def body(i): - nonlocal s - s = array_ops.concat([s, [i]], 0) + def body(i): + nonlocal s + s = array_ops.concat([s, [i]], 0) - def set_state(loop_vars): - nonlocal s - s, = loop_vars + def set_state(loop_vars): + nonlocal s + s, = loop_vars - s = constant_op.constant([], dtype=dtypes.int64) - control_flow.for_stmt( - iter(dataset_ops.Dataset.range(5)), - extra_test=None, - body=body, - get_state=lambda: (s,), - set_state=set_state, - symbol_names=('s',), - opts={'shape_invariants': [(s, tensor_shape.TensorShape([None]))]}) - return s - self.assertAllEqual(test_fn(), [0, 1, 2, 3, 4]) + s = constant_op.constant([], dtype=dtypes.int64) + control_flow.for_stmt( + iter(dataset_ops.Dataset.range(5)), + extra_test=None, + body=body, + get_state=lambda: (s,), + set_state=set_state, + symbol_names=('s',), + opts={'shape_invariants': [(s, tensor_shape.TensorShape([None]))]}) + + self.assertAllEqual(s, [0, 1, 2, 3, 4]) def test_tf_iterator_no_loop_vars(self): def body(i): v.assign(v.read_value() * 10 + i) - v = variables.Variable(0, dtype=dtypes.int64) - self.evaluate(v.initializer) + v = self.variable('v', 0, dtypes.int64) - # tf.function required for the automatic control dependencies. - @def_function.function - def test_fn(): - control_flow.for_stmt( - iter(dataset_ops.Dataset.range(5)), - extra_test=None, - body=body, - get_state=lambda: (), - set_state=lambda _: None, - symbol_names=(), - opts={}) + control_flow.for_stmt( + iter(dataset_ops.Dataset.range(5)), + extra_test=None, + body=body, + get_state=lambda: (), + set_state=lambda _: None, + symbol_names=(), + opts={}) - self.evaluate(test_fn()) - self.assertEqual(self.evaluate(v.read_value()), 1234) + self.assertEqual(v.read_value(), 1234) def test_tf_ragged_tensor(self): def body(i): @@ -473,7 +452,8 @@ class ForLoopTest(test.TestCase): set_state=set_state, symbol_names=('s',), opts={}) - self.assertEqual(self.evaluate(s), (123,)) + + self.assertEqual(s, (123,)) def test_tf_ragged_tensor_higher_dimensional(self): def body(i): @@ -497,30 +477,26 @@ class ForLoopTest(test.TestCase): set_state=set_state, symbol_names=('s',), opts={}) - self.assertEqual(self.evaluate(s), (12,)) + + self.assertEqual(s, (12,)) def test_tf_ragged_tensor_no_loop_vars(self): - v = variables.Variable(0, dtype=dtypes.int32) - self.evaluate(v.initializer) + v = self.variable('v', 0, dtypes.int32) def body(i): v.assign(v.read_value() * 10 + i[0]) - # tf.function required for the automatic control dependencies. - @def_function.function(autograph=False) - def test_fn(): - control_flow.for_stmt( - ragged_factory_ops.constant([[1], [2, 4], [3]]), - extra_test=None, - body=body, - get_state=lambda: (), - set_state=lambda _: None, - symbol_names=(), - opts={}) + control_flow.for_stmt( + ragged_factory_ops.constant([[1], [2, 4], [3]]), + extra_test=None, + body=body, + get_state=lambda: (), + set_state=lambda _: None, + symbol_names=(), + opts={}) - self.evaluate(test_fn()) # Note: 123 = ((0*10 + 1)*10+2)*10+3 (first element of each row). - self.assertEqual(self.evaluate(v.read_value()), 123) + self.assertEqual(v.read_value(), 123) def _basic_loop(self, init_value, body_fn): def body(i): @@ -561,8 +537,7 @@ class ForLoopTest(test.TestCase): self._basic_loop(0, lambda i, s: np.array([1], dtype=np.int32)) -@test_util.run_all_in_graph_and_eager_modes -class WhileLoopTest(test.TestCase): +class WhileLoopTest(testing.AutoGraphTestCase): def test_tensor(self): def body(): @@ -584,40 +559,36 @@ class WhileLoopTest(test.TestCase): set_state=set_state, symbol_names=('i', 's'), opts={}) - self.assertEqual(self.evaluate((i, s)), (5, 1234)) + self.assertEqual(i, 5) + self.assertEqual(s, 1234) def test_tensor_with_side_effecting_condition(self): - v = variables.Variable(0) + v = self.variable('v', 0, dtypes.int32) - # tf.function required for the automatic control dependencies. - @def_function.function - def test_fn(): - def cond(): - v.assign(v.read_value() * 10 + i) - return i < n + def cond(): + v.assign(v.read_value() * 10 + i) + return i < n - def body(): - nonlocal i - i += 1 + def body(): + nonlocal i + i += 1 - def set_state(loop_vars): - nonlocal i - i, = loop_vars + def set_state(loop_vars): + nonlocal i + i, = loop_vars - i = 0 - n = constant_op.constant(5) - control_flow.while_stmt( - test=cond, - body=body, - get_state=lambda: (i,), - set_state=set_state, - symbol_names=('i',), - opts={}) - return i + i = 0 + n = constant_op.constant(5) + control_flow.while_stmt( + test=cond, + body=body, + get_state=lambda: (i,), + set_state=set_state, + symbol_names=('i',), + opts={}) - self.evaluate(v.initializer) - self.assertEqual(self.evaluate(test_fn()), (5,)) - self.assertEqual(self.evaluate(v), (12345,)) + self.assertEqual(i, (5,)) + self.assertEqual(v, (12345,)) def test_tensor_with_python_state(self): class MutableObject(object): @@ -642,7 +613,8 @@ class WhileLoopTest(test.TestCase): set_state=set_state, symbol_names=('i', 'state.field'), opts={}) - self.assertEqual(self.evaluate((i, state.field)), (5, 1234)) + self.assertEqual(i, 5) + self.assertEqual(state.field, 1234) def test_python(self): def body(): @@ -679,7 +651,7 @@ class WhileLoopTest(test.TestCase): symbol_names=('i', 's'), opts={}) self.assertEqual(i, 5) - self.assertEqual(self.evaluate(s), 1234) + self.assertEqual(s, 1234) def test_python_while_infinite(self): if not __debug__: @@ -800,8 +772,7 @@ class WhileLoopTest(test.TestCase): self._basic_loop(0, lambda i, s: np.array([1], dtype=np.int32)) -@test_util.run_all_in_graph_and_eager_modes -class IfStmtTest(test.TestCase): +class IfStmtTest(testing.AutoGraphTestCase): def test_tensor(self): @@ -829,8 +800,8 @@ class IfStmtTest(test.TestCase): nouts=1) return i - self.assertEqual(1, self.evaluate(test_fn(constant_op.constant(True)))) - self.assertEqual(-1, self.evaluate(test_fn(constant_op.constant(False)))) + self.assertEqual(test_fn(constant_op.constant(True)), 1) + self.assertEqual(test_fn(constant_op.constant(False)), -1) def test_tensor_no_outputs(self): @@ -858,8 +829,8 @@ class IfStmtTest(test.TestCase): nouts=0) return i - self.assertEqual(None, test_fn(constant_op.constant(True))) - self.assertEqual(None, test_fn(constant_op.constant(False))) + self.assertIsNone(test_fn(constant_op.constant(True))) + self.assertIsNone(test_fn(constant_op.constant(False))) def test_tensor_multiple_returns(self): @@ -889,9 +860,8 @@ class IfStmtTest(test.TestCase): nouts=2) return i, j - self.assertEqual((1, 2), self.evaluate(test_fn(constant_op.constant(True)))) - self.assertEqual((-1, -2), - self.evaluate(test_fn(constant_op.constant(False)))) + self.assertEqual(test_fn(constant_op.constant(True)), (1, 2)) + self.assertEqual(test_fn(constant_op.constant(False)), (-1, -2)) def test_python(self): @@ -915,8 +885,8 @@ class IfStmtTest(test.TestCase): nouts=1) return i - self.assertEqual(1, test_fn(True)) - self.assertEqual(-1, test_fn(False)) + self.assertEqual(test_fn(True), 1) + self.assertEqual(test_fn(False), -1) def test_python_multiple_returns(self): @@ -942,8 +912,8 @@ class IfStmtTest(test.TestCase): nouts=2) return i, j - self.assertEqual((1, 2), test_fn(True)) - self.assertEqual((-1, -2), test_fn(False)) + self.assertEqual(test_fn(True), (1, 2)) + self.assertEqual(test_fn(False), (-1, -2)) def _basic_cond(self, body_fn, else_fn): def body(): @@ -959,16 +929,14 @@ class IfStmtTest(test.TestCase): x, = cond_vars x = 0 - # Eager cond had different semantics, we don't test those here. - with func_graph.FuncGraph('tmp').as_default(): - control_flow.if_stmt( - cond=constant_op.constant(True), - body=body, - orelse=orelse, - get_state=lambda: (x,), - set_state=set_state, - symbol_names=('x',), - nouts=1) + control_flow.if_stmt( + cond=constant_op.constant(True), + body=body, + orelse=orelse, + get_state=lambda: (x,), + set_state=set_state, + symbol_names=('x',), + nouts=1) return x def test_tensor_none_output(self): diff --git a/tensorflow/python/autograph/utils/__init__.py b/tensorflow/python/autograph/utils/__init__.py index 270f9b9d14f..295d6674e2c 100644 --- a/tensorflow/python/autograph/utils/__init__.py +++ b/tensorflow/python/autograph/utils/__init__.py @@ -22,4 +22,3 @@ from tensorflow.python.autograph.utils.context_managers import control_dependenc from tensorflow.python.autograph.utils.misc import alias_tensors from tensorflow.python.autograph.utils.py_func import wrap_py_func from tensorflow.python.autograph.utils.tensor_list import dynamic_list_append -from tensorflow.python.autograph.utils.testing import fake_tf diff --git a/tensorflow/python/autograph/utils/testing.py b/tensorflow/python/autograph/utils/testing.py index a59642c9577..f4238bea397 100644 --- a/tensorflow/python/autograph/utils/testing.py +++ b/tensorflow/python/autograph/utils/testing.py @@ -18,20 +18,82 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import imp +import types +import unittest +from tensorflow.python.eager import def_function from tensorflow.python.framework import ops -from tensorflow.python.ops import gen_math_ops -from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test -def fake_tf(): - """Creates a fake module that looks like TensorFlow, for testing.""" - mod = imp.new_module('tensorflow') - mod_contents = {} - mod_contents.update(gen_math_ops.__dict__) - mod_contents.update(math_ops.__dict__) - mod_contents.update(ops.__dict__) - mod_contents.update(mod.__dict__) - mod.__dict__.update(mod_contents) - return mod +class AutoGraphTestCase(test.TestCase): + """Tests specialized for AutoGraph, which run as tf.functions. + + These tests use a staged programming-like approach: most of the test code runs + as-is inside a tf.function, but the assertions are lifted outside the + function, and run with the corresponding function values instead. + + For example, the test: + + def test_foo(self): + baz = bar(); + self.assertEqual(baz, value) + + is equivalent to writing: + + def test_foo(self): + @tf.function + def test_fn(): + baz = bar(); + return baz, value + + baz_actual, value_actual = test_fn() + self.assertEqual(baz_actual, value_actual) + """ + + def __new__(cls, *args): + obj = super().__new__(cls) + + for name in cls.__dict__: + if not name.startswith(unittest.TestLoader.testMethodPrefix): + continue + m = getattr(obj, name) + if callable(m): + wrapper = obj._run_as_tf_function(m) + setattr(obj, name, types.MethodType(wrapper, obj)) + + return obj + + def _run_as_tf_function(self, fn): + + def wrapper(self): + @def_function.function(autograph=False) # Testing autograph itself. + def fn_wrapper(): + self.assertions = [] + fn() + targets = [args for _, args in self.assertions] + return targets + actuals = self.evaluate(fn_wrapper()) + for (_, args), value in zip(self.assertions, actuals): + args[:] = value + return wrapper + + def variable(self, name, value, dtype): + with ops.init_scope(): + if name not in self.variables: + self.variables[name] = variables.Variable(value, dtype=dtype) + self.evaluate(self.variables[name].initializer) + return self.variables[name] + + def setUp(self): + super().setUp() + self.variables = {} + + def tearDown(self): + for fn, args in self.assertions: + fn(*args) + super().tearDown() + + def assertEqual(self, *args): + self.assertions.append((super().assertEqual, list(args))) From 332adf10ab7e2b575896edfad2d03570c6863ba6 Mon Sep 17 00:00:00 2001 From: Rachel Lim Date: Tue, 21 Jul 2020 14:36:45 -0700 Subject: [PATCH 0971/2522] [tf.data] Add an op that computes the statically-known batch size of a dataset where possible. PiperOrigin-RevId: 322444756 Change-Id: Ib628e9463bf34e5a607fd023d7c065389fff599c --- .../base_api/api_def_ComputeBatchSize.pbtxt | 5 + tensorflow/core/kernels/data/BUILD | 1 + tensorflow/core/kernels/data/dataset_utils.cc | 7 + tensorflow/core/kernels/data/dataset_utils.h | 8 + .../core/kernels/data/experimental/BUILD | 22 +- .../experimental/assert_next_dataset_op.cc | 15 +- .../experimental/compute_batch_size_op.cc | 191 ++++++++++++++++++ tensorflow/core/kernels/data/rewrite_utils.cc | 18 +- .../core/kernels/data/serialization_utils.cc | 23 +++ .../core/kernels/data/serialization_utils.h | 9 + .../core/ops/experimental_dataset_ops.cc | 5 + .../kernel_tests/rebatch_dataset_test.py | 54 +++++ .../data/experimental/ops/distribute.py | 52 +++++ .../api/golden/v1/tensorflow.raw_ops.pbtxt | 4 + .../api/golden/v2/tensorflow.raw_ops.pbtxt | 4 + 15 files changed, 392 insertions(+), 26 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_ComputeBatchSize.pbtxt create mode 100644 tensorflow/core/kernels/data/experimental/compute_batch_size_op.cc diff --git a/tensorflow/core/api_def/base_api/api_def_ComputeBatchSize.pbtxt b/tensorflow/core/api_def/base_api/api_def_ComputeBatchSize.pbtxt new file mode 100644 index 00000000000..b92d02e256d --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ComputeBatchSize.pbtxt @@ -0,0 +1,5 @@ +op { + graph_op_name: "ComputeBatchSize" + visibility: HIDDEN + summary: "Computes the static batch size of a dataset sans partial batches." +} diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD index efce4fb0cf5..f0a58f3cdfe 100644 --- a/tensorflow/core/kernels/data/BUILD +++ b/tensorflow/core/kernels/data/BUILD @@ -54,6 +54,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", + "//tensorflow/core/platform:regexp", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", ], diff --git a/tensorflow/core/kernels/data/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc index 5f0068445a9..1bd29638df6 100644 --- a/tensorflow/core/kernels/data/dataset_utils.cc +++ b/tensorflow/core/kernels/data/dataset_utils.cc @@ -33,6 +33,7 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/lib/strings/proto_serialization.h" +#include "tensorflow/core/platform/regexp.h" #include "tensorflow/core/util/work_sharder.h" namespace tensorflow { @@ -898,5 +899,11 @@ std::string DeterminismPolicy::String() const { } } +bool MatchesAnyVersionRE(StringPiece op_prefix, StringPiece op_to_match) { + // Matches all versions of an op by appending an optional version suffix + auto expected_re = strings::StrCat(RE2::QuoteMeta(op_prefix), "(V\\d+)?"); + return RE2::FullMatch(op_to_match, expected_re); +} + } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h index 9a7e274714a..5c6b14a8782 100644 --- a/tensorflow/core/kernels/data/dataset_utils.h +++ b/tensorflow/core/kernels/data/dataset_utils.h @@ -296,6 +296,14 @@ class DummyResourceOp : public OpKernel { } }; +// Given an op prefix and an op to match, returns whether the op to match +// is a regex match for any version of the op prefix. For example, +// MatchesAnyVersionRE("BatchDataset", "BatchDataset") == true +// MatchesAnyVersionRE("BatchDataset", "BatchDatasetV2") == true +// MatchesAnyVersionRE("BatchDataset", "BatchDatasetV3") == true +// MatchesAnyVersionRE("PaddedBatchDataset", "BatchDataset") == false +bool MatchesAnyVersionRE(StringPiece op_prefix, StringPiece op_to_match); + } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/kernels/data/experimental/BUILD b/tensorflow/core/kernels/data/experimental/BUILD index 35446bdfbea..56220b7bd85 100644 --- a/tensorflow/core/kernels/data/experimental/BUILD +++ b/tensorflow/core/kernels/data/experimental/BUILD @@ -32,7 +32,7 @@ tf_kernel_library( deps = [ "//tensorflow/core:experimental_dataset_ops_op_lib", "//tensorflow/core:framework", - "//tensorflow/core:regexp_internal", + "//tensorflow/core/kernels/data:dataset_utils", "//tensorflow/core/kernels/data:name_utils", ], ) @@ -124,6 +124,25 @@ tf_kernel_library( ], ) +tf_kernel_library( + name = "compute_batch_size_op", + srcs = ["compute_batch_size_op.cc"], + deps = [ + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:dataset_ops_op_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:regexp_internal", + "//tensorflow/core/grappler:graph_view", + "//tensorflow/core/grappler/optimizers/data:graph_utils", + "//tensorflow/core/kernels/data:dataset_utils", + "//tensorflow/core/kernels/data:name_utils", + "//tensorflow/core/kernels/data:serialization_utils", + ], +) + tf_kernel_library( name = "csv_dataset_op", srcs = ["csv_dataset_op.cc"], @@ -736,6 +755,7 @@ tf_kernel_library( ":choose_fastest_branch_dataset_op", ":choose_fastest_dataset_op", ":compression_ops", + ":compute_batch_size_op", ":csv_dataset_op", ":dense_to_sparse_batch_dataset_op", ":directed_interleave_dataset_op", diff --git a/tensorflow/core/kernels/data/experimental/assert_next_dataset_op.cc b/tensorflow/core/kernels/data/experimental/assert_next_dataset_op.cc index adda54a0cd9..cb8dc67d6dd 100644 --- a/tensorflow/core/kernels/data/experimental/assert_next_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/assert_next_dataset_op.cc @@ -18,8 +18,8 @@ limitations under the License. #include "tensorflow/core/framework/partial_tensor_shape.h" #include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/data/dataset_utils.h" #include "tensorflow/core/kernels/data/name_utils.h" -#include "tensorflow/core/platform/regexp.h" namespace tensorflow { namespace data { @@ -97,15 +97,12 @@ class AssertNextDatasetOp::Dataset : public DatasetBase { } int n = tokens.size(); for (size_t i = 0; i < dataset()->transformations_.size(); ++i) { - std::string transformation_escaped = - RE2::QuoteMeta(dataset()->transformations_[i]); - std::string version_suffix = "(V\\d+)?"; - std::string expected_re = - absl::StrCat(transformation_escaped, version_suffix); - if (!RE2::FullMatch(tokens[n - 2 - i], expected_re)) { + if (!MatchesAnyVersionRE(dataset()->transformations_[i], + tokens[n - 2 - i])) { return errors::InvalidArgument("Asserted transformation matching ", - expected_re, " at offset ", i, - " but encountered ", tokens[n - 2 - i], + dataset()->transformations_[i], + " at offset ", i, " but encountered ", + tokens[n - 2 - i], " transformation instead."); } } diff --git a/tensorflow/core/kernels/data/experimental/compute_batch_size_op.cc b/tensorflow/core/kernels/data/experimental/compute_batch_size_op.cc new file mode 100644 index 00000000000..1c4c5dea248 --- /dev/null +++ b/tensorflow/core/kernels/data/experimental/compute_batch_size_op.cc @@ -0,0 +1,191 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/dataset.h" +#include "tensorflow/core/framework/tensor_util.h" +#include "tensorflow/core/grappler/graph_view.h" +#include "tensorflow/core/grappler/optimizers/data/graph_utils.h" +#include "tensorflow/core/kernels/data/dataset_utils.h" +#include "tensorflow/core/kernels/data/name_utils.h" +#include "tensorflow/core/kernels/data/serialization_utils.h" +#include "tensorflow/core/platform/stringprintf.h" + +namespace tensorflow { +namespace data { +namespace experimental { +namespace { + +using grappler::graph_utils::GetScalarConstNodeValue; + +constexpr char kMapAndBatchOp[] = "MapAndBatchDataset"; +constexpr char kExperimentalMapAndBatchOp[] = "ExperimentalMapAndBatchDataset"; + +constexpr std::array kBatchDatasetOps = { + "BatchDataset", + "PaddedBatchDataset", + kMapAndBatchOp, + kExperimentalMapAndBatchOp, +}; + +constexpr std::array kMultipleInputDatasetOps = { + "ConcatenateDataset", + "ZipDataset", +}; + +constexpr std::array kPassThroughOps = { + "AssertCardinalityDataset", + "CacheDataset", + "FilterDataset", + "Identity", + "ModelDataset", + "OptimizeDataset", + "ParseExampleDataset", + "PrefetchDataset", + "RepeatDataset", + "ShardDataset", + "ShuffleAndRepeatDataset", + "ShuffleDataset", + "SkipDataset", + "TakeDataset", +}; + +template +bool IsDatasetNodeOfType(const NodeDef& node, + const std::array& arr) { + for (const auto& dataset_op : arr) { + if (MatchesAnyVersionRE(dataset_op, node.op())) return true; + } + return false; +} + +const NodeDef* GetInputNode(const NodeDef& node, + const grappler::GraphView& graph, + int64 input_index) { + if (node.input_size() == 0) return nullptr; + grappler::GraphView::InputPort input_port = + graph.GetInputPort(node.name(), input_index); + return graph.GetRegularFanin(input_port).node; +} + +// TODO(rachelim): This op traverses the dataset graph using a allowlist-based +// approach. As an alternative, we could instead rewrite all batching datasets' +// drop_remainder parameter to True, then rerun the dataset graph to derive +// new output shapes using C++ shape inference. This is more robust in cases +// where datasets have shape inference implemented in C++. If this allowlist- +// based approach proves hard to maintain, consider doing the alternative. +class ComputeBatchSizeOp : public OpKernel { + public: + explicit ComputeBatchSizeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + DatasetBase* dataset; + OP_REQUIRES_OK(ctx, GetDatasetFromVariantTensor(ctx->input(0), &dataset)); + + std::vector> input_list; + GraphDef graph_def; + string dataset_node_name; + OP_REQUIRES_OK(ctx, AsGraphDefMinimal(ctx, dataset, &input_list, &graph_def, + &dataset_node_name)); + + // Create GraphView for easier traversal of graph. + grappler::GraphView graph_view(&graph_def); + + const NodeDef* node = graph_view.GetNode(dataset_node_name); + OP_REQUIRES(ctx, node != nullptr, + errors::InvalidArgument("Node does not exist in graph")); + int64 batch_size = GetBatchSize(*node, graph_view); + Tensor* result; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &result)); + result->scalar()() = batch_size; + } + + private: + int64 GetBatchSizeFromBatchNode(const NodeDef& node, + const grappler::GraphView& graph) { + int64 arg_index; + if (node.op() == kMapAndBatchOp || + node.op() == kExperimentalMapAndBatchOp) { + arg_index = node.input_size() - 3; + } else { + arg_index = 1; + } + + auto batch_size_node = GetInputNode(node, graph, arg_index); + int64 batch_size; + auto s = GetScalarConstNodeValue(*batch_size_node, &batch_size); + if (!s.ok()) { + VLOG(1) << "Could not compute static batch size. Found batching dataset (" + << node.name() << "), but failed to get its input batch size: " + << s.error_message(); + return -1; + } + return batch_size; + } + + // Helper function that returns the static 0th dimension of a given dataset + // node in the graph. It starts from a node in the graph and recursively + // traverses its inputs until it finds a valid BatchDataset operation, + // and returns its batch size. If the batch size cannot be determined, + // returns -1. + // + // During recursion, it handles four kinds of cases: + // 1. BatchDataset type ops: Returns the value from its batch_size input node. + // 2. Zip / Concatenate dataset ops: Recurses into all inputs to these ops, + // which are themselves all datasets, and returns the batch sizes computed + // by the inputs if they are all the same. + // 3. Core dataset ops which cannot change the size of the 0th dimension of + // dataset output elements: Recurses into the first input parameter. + // 4. All other ops: Fail, returning -1 for unknown. + // TODO(rachelim): For FlatMap type mapping dataset ops, recurse into the + // function definition. + int64 GetBatchSize(const NodeDef& node, const grappler::GraphView& graph) { + if (IsDatasetNodeOfType(node, kBatchDatasetOps)) { + return GetBatchSizeFromBatchNode(node, graph); + } + if (IsDatasetNodeOfType(node, kMultipleInputDatasetOps)) { + const NodeDef* input_0 = GetInputNode(node, graph, 0); + int64 batch_size_0 = GetBatchSize(*input_0, graph); + for (int i = 1; i < node.input_size(); ++i) { + const NodeDef* input = GetInputNode(node, graph, i); + auto batch_size_i = GetBatchSize(*input, graph); + if (batch_size_i != batch_size_0) { + VLOG(1) << "Could not compute batch size: inputs to " << node.name() + << " (" << node.op() << ") had different batch sizes." + << " Namely, input 0 had batch size " << batch_size_0 + << " while input " << i << " had batch size " << batch_size_i + << "."; + return -1; + } + } + return batch_size_0; + } + if (IsDatasetNodeOfType(node, kPassThroughOps)) { + const NodeDef* input = GetInputNode(node, graph, 0); + return GetBatchSize(*input, graph); + } + VLOG(1) << "Encountered dataset node " << node.name() << " (" << node.op() + << ") that prevented further static batch size analysis."; + + return -1; + } +}; + +REGISTER_KERNEL_BUILDER(Name("ComputeBatchSize").Device(DEVICE_CPU), + ComputeBatchSizeOp); + +} // anonymous namespace +} // namespace experimental +} // namespace data +} // namespace tensorflow diff --git a/tensorflow/core/kernels/data/rewrite_utils.cc b/tensorflow/core/kernels/data/rewrite_utils.cc index 0ea708abbc7..dd9bfdb5143 100644 --- a/tensorflow/core/kernels/data/rewrite_utils.cc +++ b/tensorflow/core/kernels/data/rewrite_utils.cc @@ -144,25 +144,11 @@ Status ApplyRewrites(OpKernelContext* ctx, Status RewriteDataset(OpKernelContext* ctx, const DatasetBase* input, std::function config_factory, bool record_fingerprint, DatasetBase** rewritten_input) { - SerializationContext::Params params; std::vector> input_list; - params.input_list = &input_list; - params.external_state_policy = - SerializationContext::ExternalStatePolicy::kIgnore; - params.fail_if_unimplemented = false; - params.serialize_data_tensors = false; - params.preserve_random_seeds = false; - SerializationContext serialization_ctx(params); GraphDef graph_def; - TF_RETURN_IF_ERROR( - AsGraphDef(ctx, input, std::move(serialization_ctx), &graph_def)); - string output_node; - for (const auto& node : graph_def.node()) { - if (node.op() == "_Retval") { - output_node = node.input(0); - } - } + TF_RETURN_IF_ERROR( + AsGraphDefMinimal(ctx, input, &input_list, &graph_def, &output_node)); VLOG(3) << "Before graph rewrites: " << graph_def.DebugString(); TF_RETURN_IF_ERROR( diff --git a/tensorflow/core/kernels/data/serialization_utils.cc b/tensorflow/core/kernels/data/serialization_utils.cc index 5965c9b3295..628d6952c6d 100644 --- a/tensorflow/core/kernels/data/serialization_utils.cc +++ b/tensorflow/core/kernels/data/serialization_utils.cc @@ -53,6 +53,29 @@ Status FindStatefulOps(const GraphDef& graph_def, } // namespace +Status AsGraphDefMinimal(OpKernelContext* ctx, const DatasetBase* input, + std::vector>* input_list, + GraphDef* result, string* dataset_node) { + SerializationContext::Params params; + params.input_list = input_list; + params.external_state_policy = + SerializationContext::ExternalStatePolicy::kIgnore; + params.fail_if_unimplemented = false; + params.serialize_data_tensors = false; + params.preserve_random_seeds = false; + SerializationContext serialization_ctx(params); + TF_RETURN_IF_ERROR( + AsGraphDef(ctx, input, std::move(serialization_ctx), result)); + + // Symbolic `_Retval` node indicates which node corresponds to the dataset. + for (const auto& node : result->node()) { + if (node.op() == "_Retval") { + *dataset_node = node.input(0); + } + } + return Status::OK(); +} + Status AsGraphDef(OpKernelContext* ctx, const DatasetBase* dataset, SerializationContext&& serialization_ctx, GraphDef* graph_def) { diff --git a/tensorflow/core/kernels/data/serialization_utils.h b/tensorflow/core/kernels/data/serialization_utils.h index 2e580ec7fdc..5702919b556 100644 --- a/tensorflow/core/kernels/data/serialization_utils.h +++ b/tensorflow/core/kernels/data/serialization_utils.h @@ -27,6 +27,15 @@ Status AsGraphDef(OpKernelContext* ctx, const DatasetBase* dataset, SerializationContext&& serialization_ctx, GraphDef* graph_def); +// Returns a GraphDef representation of the given dataset using the minimal +// serialization parameters (i.e. ignoring external state, not serializing +// data tensors, not failing if there are datasets which do not have AsGraphDef +// implemented). Sets the `dataset_node` parameter to the dataset's +// node name in the resulting GraphDef. +Status AsGraphDefMinimal(OpKernelContext* ctx, const DatasetBase* input, + std::vector>* input_list, + GraphDef* result, string* dataset_node); + } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/ops/experimental_dataset_ops.cc b/tensorflow/core/ops/experimental_dataset_ops.cc index 5e869a2f0be..2d4b2f43746 100644 --- a/tensorflow/core/ops/experimental_dataset_ops.cc +++ b/tensorflow/core/ops/experimental_dataset_ops.cc @@ -145,6 +145,11 @@ REGISTER_OP("UncompressElement") .Attr("output_shapes: list(shape) >= 1") .SetShapeFn(shape_inference::DatasetIteratorShape); +REGISTER_OP("ComputeBatchSize") + .Input("input_dataset : variant") + .Output("batch_size : int64") + .SetShapeFn(shape_inference::ScalarShape); + REGISTER_OP("CSVDataset") .Input("filenames: string") .Input("compression_type: string") diff --git a/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py index 841c25b6856..c9d0d14dead 100644 --- a/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py @@ -230,5 +230,59 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): _ = distribute._RebatchDataset(dataset, num_replicas=2) +class ComputeBatchSizeTest(test_base.DatasetTestBase, parameterized.TestCase): + + @combinations.generate(test_base.default_test_combinations()) + def testComputeBatchSizeKnown(self): + # When drop_remainder=True, batch size can be inferred from the type spec. + dataset = dataset_ops.Dataset.range(32).batch(4, drop_remainder=True) + dataset = dataset_ops.Dataset.zip((dataset, dataset)) + batch_size = distribute.compute_batch_size(dataset) + self.assertEqual(4, self.evaluate(batch_size)) + + @combinations.generate(test_base.default_test_combinations()) + def testComputeBatchSizeKnownAndMismatched(self): + # Return -1 when different components have different batch sizes. + dataset = dataset_ops.Dataset.range(32) + dataset = dataset_ops.Dataset.zip((dataset.batch(4, drop_remainder=True), + dataset.batch(8, drop_remainder=True))) + batch_size = distribute.compute_batch_size(dataset) + self.assertEqual(-1, self.evaluate(batch_size)) + + @combinations.generate(test_base.default_test_combinations()) + def testComputeBatchSizeUnknown(self): + dataset = dataset_ops.Dataset.range(32).batch(4) + batch_size = distribute.compute_batch_size(dataset) + self.assertEqual(4, self.evaluate(batch_size)) + + @combinations.generate(test_base.default_test_combinations()) + def testComputeBatchSizeWithPassthrough(self): + dataset = dataset_ops.Dataset.range(32).batch(4) + dataset = dataset.take(5) + batch_size = distribute.compute_batch_size(dataset) + self.assertEqual(4, self.evaluate(batch_size)) + + @combinations.generate(test_base.default_test_combinations()) + def testComputeBatchSizeWithPassthroughInvalid(self): + dataset = dataset_ops.Dataset.range(32).batch(4) + dataset = dataset.map(lambda x: x + 1) + batch_size = distribute.compute_batch_size(dataset) + self.assertEqual(-1, self.evaluate(batch_size)) + + @combinations.generate(test_base.default_test_combinations()) + def testComputeBatchSizeWithZip(self): + dataset = dataset_ops.Dataset.range(32).batch(4) + dataset = dataset_ops.Dataset.zip((dataset, dataset)) + batch_size = distribute.compute_batch_size(dataset) + self.assertEqual(4, self.evaluate(batch_size)) + + @combinations.generate(test_base.default_test_combinations()) + def testComputeBatchSizeWithZipMismatched(self): + dataset = dataset_ops.Dataset.range(32) + dataset = dataset_ops.Dataset.zip((dataset.batch(4), dataset.batch(8))) + batch_size = distribute.compute_batch_size(dataset) + self.assertEqual(-1, self.evaluate(batch_size)) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/data/experimental/ops/distribute.py b/tensorflow/python/data/experimental/ops/distribute.py index ae3c13ecc97..9f274201e78 100644 --- a/tensorflow/python/data/experimental/ops/distribute.py +++ b/tensorflow/python/data/experimental/ops/distribute.py @@ -20,6 +20,8 @@ from __future__ import print_function from tensorflow.python.data.experimental.ops.distribute_options import ExternalStatePolicy from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops @@ -169,4 +171,54 @@ def replicate(dataset, devices): return datasets +def compute_batch_size(dataset): + """An operation that returns the batch size of the dataset. + + This op tries to infer the batch size statically by walking up the dataset + tree from the final dataset node and returning the batch size of the first + batching dataset (such as from .batch() and .padded_batch()) that it + encounters. This differs from using the `element_spec` of a dataset in that it + does not account for partial batches. + + This operation may fail if it encounters contradictory batch sizes (for + example, if the dataset is created by zipping together two datasets with + different batch sizes), if there are no explicit batching transformations, or + if there are operations downstream from the batching transformation that may + modify its batch size. In these cases, it returns a -1. + + Args: + dataset: A `tf.data.Dataset` object. + + Returns: + A `tf.int64` Tensor representing the batch size of the dataset sans partial + batches. If this cannot be inferred statically, the value of this tensor + will be -1. + """ + + def get_static_batch_dim(output_shape): + if output_shape.rank is None: + return None + return output_shape.dims[0].value + + batch_dims = [ + get_static_batch_dim(ts._to_legacy_output_shapes()) # pylint: disable=protected-access + for ts in nest.flatten(dataset_ops.get_structure(dataset)) + ] + + if all(d is not None for d in batch_dims): + + if all(d == batch_dims[0] for d in batch_dims): + # If all batch dimensions are known and equal, return that directly. + batch_dim = batch_dims[0] + else: + # If all batch dimensions are known but not all equal, return -1. + batch_dim = -1 + + return constant_op.constant( + batch_dim, dtype=dtypes.int64, name="static_batch_size") + + # If any batch dimensions are unknown, use compute_batch_size op. + return ged_ops.compute_batch_size(dataset._variant_tensor) # pylint: disable=protected-access + + _AutoShardDatasetV1.__doc__ = _AutoShardDataset.__doc__ diff --git a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt index 0db6da3dad2..c597bc2f8f1 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt @@ -792,6 +792,10 @@ tf_module { name: "ComputeAccidentalHits" argspec: "args=[\'true_classes\', \'sampled_candidates\', \'num_true\', \'seed\', \'seed2\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'0\', \'None\'], " } + member_method { + name: "ComputeBatchSize" + argspec: "args=[\'input_dataset\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "Concat" argspec: "args=[\'concat_dim\', \'values\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt index 0db6da3dad2..c597bc2f8f1 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt @@ -792,6 +792,10 @@ tf_module { name: "ComputeAccidentalHits" argspec: "args=[\'true_classes\', \'sampled_candidates\', \'num_true\', \'seed\', \'seed2\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'0\', \'None\'], " } + member_method { + name: "ComputeBatchSize" + argspec: "args=[\'input_dataset\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "Concat" argspec: "args=[\'concat_dim\', \'values\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " From e48b48ffd2954bbb5da0ce9887736afef4faeef2 Mon Sep 17 00:00:00 2001 From: Hye Soo Yang Date: Tue, 21 Jul 2020 14:42:49 -0700 Subject: [PATCH 0972/2522] Extend forward compatibility date. PiperOrigin-RevId: 322446074 Change-Id: I13a7720b156a36e7723616adcb07b41df9a3cc64 --- tensorflow/python/ops/image_ops_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 1b13f25639b..9cc6a6d9c26 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -2673,7 +2673,7 @@ def decode_image(contents, ValueError: On incorrect number of channels. """ with ops.name_scope(name, 'decode_image'): - if compat.forward_compatible(2020, 7, 14): + if compat.forward_compatible(2020, 8, 14): channels = 0 if channels is None else channels if dtype not in [dtypes.float32, dtypes.uint8, dtypes.uint16]: dest_dtype = dtype From cc532b863fad1b408b2d9aad6caf2d839b15b110 Mon Sep 17 00:00:00 2001 From: Jaesung Chung Date: Tue, 21 Jul 2020 14:44:51 -0700 Subject: [PATCH 0973/2522] Make string tensor accept byte array inputs for ParseExample op in Java API PiperOrigin-RevId: 322446486 Change-Id: I1fec0d4f1dd645972fbf04f0647c07731cfc87fa --- tensorflow/lite/java/BUILD | 1 + .../main/java/org/tensorflow/lite/Tensor.java | 21 +++++++++-- .../lite/java/src/main/native/tensor_jni.cc | 35 +++++++++++++------ .../java/org/tensorflow/lite/TensorTest.java | 28 +++++++++++---- 4 files changed, 65 insertions(+), 20 deletions(-) diff --git a/tensorflow/lite/java/BUILD b/tensorflow/lite/java/BUILD index 89be932ab4d..f7a0c892bcf 100644 --- a/tensorflow/lite/java/BUILD +++ b/tensorflow/lite/java/BUILD @@ -325,6 +325,7 @@ java_test( "src/testdata/int32.bin", "src/testdata/int64.bin", "src/testdata/quantized.bin", + "src/testdata/string.bin", ], javacopts = JAVACOPTS, tags = [ diff --git a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java index 558200a7da7..b69ab2072c1 100644 --- a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java +++ b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java @@ -302,7 +302,7 @@ public final class Tensor { } /** Returns the type of the data. */ - static DataType dataTypeOf(Object o) { + DataType dataTypeOf(Object o) { if (o != null) { Class c = o.getClass(); // For arrays, the data elements must be a *primitive* type, e.g., an @@ -316,6 +316,10 @@ public final class Tensor { } else if (int.class.equals(c)) { return DataType.INT32; } else if (byte.class.equals(c)) { + // Byte array can be used for storing string tensors, especially for ParseExample op. + if (dtype == DataType.STRING) { + return DataType.STRING; + } return DataType.UINT8; } else if (long.class.equals(c)) { return DataType.INT64; @@ -345,8 +349,21 @@ public final class Tensor { } /** Returns the shape of an object as an int array. */ - static int[] computeShapeOf(Object o) { + int[] computeShapeOf(Object o) { int size = computeNumDimensions(o); + if (dtype == DataType.STRING) { + Class c = o.getClass(); + if (c.isArray()) { + while (c.isArray()) { + c = c.getComponentType(); + } + // If the given string data is stored in byte streams, the last array dimension should be + // treated as a value. + if (byte.class.equals(c)) { + --size; + } + } + } int[] dimensions = new int[size]; fillShape(o, 0, dimensions); return dimensions; diff --git a/tensorflow/lite/java/src/main/native/tensor_jni.cc b/tensorflow/lite/java/src/main/native/tensor_jni.cc index dfa4e22162a..1f6fa3ed249 100644 --- a/tensorflow/lite/java/src/main/native/tensor_jni.cc +++ b/tensorflow/lite/java/src/main/native/tensor_jni.cc @@ -28,6 +28,9 @@ using tflite::jni::ThrowException; namespace { +static const char* kByteArrayClassPath = "[B"; +static const char* kStringClassPath = "java/lang/String"; + // Convenience handle for obtaining a TfLiteTensor given an interpreter and // tensor index. // @@ -271,13 +274,24 @@ size_t WriteMultiDimensionalArray(JNIEnv* env, jobject src, TfLiteType type, } } -void AddStringDynamicBuffer(JNIEnv* env, jstring src, +void AddStringDynamicBuffer(JNIEnv* env, jobject src, tflite::DynamicBuffer* dst_buffer) { - const char* chars = env->GetStringUTFChars(src, nullptr); - // + 1 for terminating character. - const int byte_len = env->GetStringUTFLength(src) + 1; - dst_buffer->AddString(chars, byte_len); - env->ReleaseStringUTFChars(src, chars); + if (env->IsInstanceOf(src, env->FindClass(kStringClassPath))) { + jstring str = static_cast(src); + const char* chars = env->GetStringUTFChars(str, nullptr); + // + 1 for terminating character. + const int byte_len = env->GetStringUTFLength(str) + 1; + dst_buffer->AddString(chars, byte_len); + env->ReleaseStringUTFChars(str, chars); + } + if (env->IsInstanceOf(src, env->FindClass(kByteArrayClassPath))) { + jbyteArray byte_array = static_cast(src); + jsize byte_array_length = env->GetArrayLength(byte_array); + jbyte* bytes = env->GetByteArrayElements(byte_array, nullptr); + dst_buffer->AddString(reinterpret_cast(bytes), + byte_array_length); + env->ReleaseByteArrayElements(byte_array, bytes, JNI_ABORT); + } } void PopulateStringDynamicBuffer(JNIEnv* env, jobject src, @@ -290,10 +304,9 @@ void PopulateStringDynamicBuffer(JNIEnv* env, jobject src, // recursively call populateStringDynamicBuffer over sub-dimensions. if (dims_left <= 1) { for (int i = 0; i < num_elements; ++i) { - jstring string_obj = - static_cast(env->GetObjectArrayElement(object_array, i)); - AddStringDynamicBuffer(env, string_obj, dst_buffer); - env->DeleteLocalRef(string_obj); + jobject obj = env->GetObjectArrayElement(object_array, i); + AddStringDynamicBuffer(env, obj, dst_buffer); + env->DeleteLocalRef(obj); } } else { for (int i = 0; i < num_elements; ++i) { @@ -358,7 +371,7 @@ void WriteScalar(JNIEnv* env, jobject src, TfLiteType type, void* dst, void WriteScalarString(JNIEnv* env, jobject src, TfLiteTensor* tensor) { tflite::DynamicBuffer dst_buffer; - AddStringDynamicBuffer(env, static_cast(src), &dst_buffer); + AddStringDynamicBuffer(env, src, &dst_buffer); if (!env->ExceptionCheck()) { dst_buffer.WriteToTensor(tensor, /*new_shape=*/nullptr); } diff --git a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java index be09bd5b8fe..4305de8000d 100644 --- a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java +++ b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java @@ -46,6 +46,9 @@ public final class TensorTest { private static final String LONG_MODEL_PATH = "tensorflow/lite/java/src/testdata/int64.bin"; + private static final String STRING_MODEL_PATH = + "tensorflow/lite/java/src/testdata/string.bin"; + private static final String QUANTIZED_MODEL_PATH = "tensorflow/lite/java/src/testdata/quantized.bin"; @@ -412,30 +415,30 @@ public final class TensorTest { @Test public void testDataTypeOf() { float[] testEmptyArray = {}; - DataType dataType = Tensor.dataTypeOf(testEmptyArray); + DataType dataType = tensor.dataTypeOf(testEmptyArray); assertThat(dataType).isEqualTo(DataType.FLOAT32); float[] testFloatArray = {0.783f, 0.251f}; - dataType = Tensor.dataTypeOf(testFloatArray); + dataType = tensor.dataTypeOf(testFloatArray); assertThat(dataType).isEqualTo(DataType.FLOAT32); float[][] testMultiDimArray = {testFloatArray, testFloatArray, testFloatArray}; - dataType = Tensor.dataTypeOf(testMultiDimArray); + dataType = tensor.dataTypeOf(testMultiDimArray); assertThat(dataType).isEqualTo(DataType.FLOAT32); FloatBuffer testFloatBuffer = FloatBuffer.allocate(1); - dataType = Tensor.dataTypeOf(testFloatBuffer); + dataType = tensor.dataTypeOf(testFloatBuffer); assertThat(dataType).isEqualTo(DataType.FLOAT32); float testFloat = 1.0f; - dataType = Tensor.dataTypeOf(testFloat); + dataType = tensor.dataTypeOf(testFloat); assertThat(dataType).isEqualTo(DataType.FLOAT32); try { double[] testDoubleArray = {0.783, 0.251}; - Tensor.dataTypeOf(testDoubleArray); + tensor.dataTypeOf(testDoubleArray); fail(); } catch (IllegalArgumentException e) { assertThat(e).hasMessageThat().contains("cannot resolve DataType of"); } try { Float[] testBoxedArray = {0.783f, 0.251f}; - Tensor.dataTypeOf(testBoxedArray); + tensor.dataTypeOf(testBoxedArray); fail(); } catch (IllegalArgumentException e) { assertThat(e).hasMessageThat().contains("cannot resolve DataType of [Ljava.lang.Float;"); @@ -528,4 +531,15 @@ public final class TensorTest { assertThat(scale).isWithin(1e-6f).of(0.25f); assertThat(zeroPoint).isEqualTo(127); } + + @Test + public void testByteArrayStringTensorInput() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(STRING_MODEL_PATH); + wrapper.resizeInput(0, new int[] {1}); + Tensor stringTensor = wrapper.getInputTensor(0); + + byte[][] byteArray = new byte[][] {new byte[1]}; + assertThat(stringTensor.dataTypeOf(byteArray)).isEqualTo(DataType.STRING); + assertThat(stringTensor.shape()).isEqualTo(new int[] {1}); + } } From 33d842c295717fc07d76f6c4d6e16c83bea88313 Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Tue, 21 Jul 2020 14:45:02 -0700 Subject: [PATCH 0974/2522] fixit for get_json_type. PiperOrigin-RevId: 322446524 Change-Id: I4301abc31713637cdc1a5992a92b4322d405806f --- tensorflow/python/keras/engine/node.py | 4 +- tensorflow/python/keras/engine/training.py | 4 +- .../keras/feature_column/dense_features.py | 4 +- tensorflow/python/keras/saving/hdf5_format.py | 3 +- .../keras/saving/saved_model/json_utils.py | 56 +++++++++++++++++++ .../keras/tests/serialization_util_test.py | 8 +-- .../training/tracking/data_structures_test.py | 8 +-- 7 files changed, 71 insertions(+), 16 deletions(-) diff --git a/tensorflow/python/keras/engine/node.py b/tensorflow/python/keras/engine/node.py index 30771a181f1..c61f9ff5fda 100644 --- a/tensorflow/python/keras/engine/node.py +++ b/tensorflow/python/keras/engine/node.py @@ -28,9 +28,9 @@ from tensorflow.python.framework import tensor_util from tensorflow.python.keras import backend from tensorflow.python.keras.engine import base_layer_utils from tensorflow.python.keras.engine import keras_tensor +from tensorflow.python.keras.saving.saved_model import json_utils from tensorflow.python.keras.utils import tf_utils from tensorflow.python.util import nest -from tensorflow.python.util import serialization _CONSTANT_VALUE = '_CONSTANT_VALUE' @@ -171,7 +171,7 @@ class Node(object): kwargs = nest.map_structure(_serialize_keras_tensor, kwargs) try: - json.dumps(kwargs, default=serialization.get_json_type) + json.dumps(kwargs, default=json_utils.get_json_type) except TypeError: kwarg_types = nest.map_structure(type, kwargs) raise TypeError('Layer ' + self.layer.name + diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index bbab6fc7f98..b51212ca060 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -52,6 +52,7 @@ from tensorflow.python.keras.engine import training_utils from tensorflow.python.keras.mixed_precision.experimental import loss_scale_optimizer as lso from tensorflow.python.keras.saving import hdf5_format from tensorflow.python.keras.saving import save +from tensorflow.python.keras.saving.saved_model import json_utils from tensorflow.python.keras.saving.saved_model import model_serialization from tensorflow.python.keras.utils import generic_utils from tensorflow.python.keras.utils import layer_utils @@ -77,7 +78,6 @@ from tensorflow.python.training.tracking import layer_utils as trackable_layer_u from tensorflow.python.training.tracking import util as trackable_utils from tensorflow.python.util import deprecation from tensorflow.python.util import nest -from tensorflow.python.util import serialization from tensorflow.python.util import tf_decorator from tensorflow.python.util.tf_export import keras_export from tensorflow.tools.docs import doc_controls @@ -2262,7 +2262,7 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): """ model_config = self._updated_config() return json.dumps( - model_config, default=serialization.get_json_type, **kwargs) + model_config, default=json_utils.get_json_type, **kwargs) def to_yaml(self, **kwargs): """Returns a yaml string containing the network configuration. diff --git a/tensorflow/python/keras/feature_column/dense_features.py b/tensorflow/python/keras/feature_column/dense_features.py index ef533b71fe7..132343835b3 100644 --- a/tensorflow/python/keras/feature_column/dense_features.py +++ b/tensorflow/python/keras/feature_column/dense_features.py @@ -24,7 +24,7 @@ from tensorflow.python.feature_column import feature_column_v2 as fc from tensorflow.python.framework import ops from tensorflow.python.keras import backend from tensorflow.python.keras.feature_column import base_feature_layer as kfc -from tensorflow.python.util import serialization +from tensorflow.python.keras.saving.saved_model import json_utils from tensorflow.python.util.tf_export import keras_export @@ -112,7 +112,7 @@ class DenseFeatures(kfc._BaseFeaturesLayer): # pylint: disable=protected-access """ metadata = json.loads(super(DenseFeatures, self)._tracking_metadata) metadata['_is_feature_layer'] = True - return json.dumps(metadata, default=serialization.get_json_type) + return json.dumps(metadata, default=json_utils.get_json_type) def _target_shape(self, input_shape, total_elements): return (input_shape[0], total_elements) diff --git a/tensorflow/python/keras/saving/hdf5_format.py b/tensorflow/python/keras/saving/hdf5_format.py index 7f6dac0cbc0..31c9a6e14e0 100644 --- a/tensorflow/python/keras/saving/hdf5_format.py +++ b/tensorflow/python/keras/saving/hdf5_format.py @@ -35,7 +35,6 @@ from tensorflow.python.keras.utils.generic_utils import LazyLoader from tensorflow.python.keras.utils.io_utils import ask_to_proceed_with_overwrite from tensorflow.python.ops import variables as variables_module from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.util import serialization # pylint: disable=g-import-not-at-top try: @@ -111,7 +110,7 @@ def save_model_to_hdf5(model, filepath, overwrite=True, include_optimizer=True): for k, v in model_metadata.items(): if isinstance(v, (dict, list, tuple)): f.attrs[k] = json.dumps( - v, default=serialization.get_json_type).encode('utf8') + v, default=json_utils.get_json_type).encode('utf8') else: f.attrs[k] = v diff --git a/tensorflow/python/keras/saving/saved_model/json_utils.py b/tensorflow/python/keras/saving/saved_model/json_utils.py index 0ac86d4e692..cf7e2300852 100644 --- a/tensorflow/python/keras/saving/saved_model/json_utils.py +++ b/tensorflow/python/keras/saving/saved_model/json_utils.py @@ -26,10 +26,19 @@ from __future__ import division from __future__ import print_function import json +import numpy as np +import wrapt +from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_shape from tensorflow.python.util import serialization +try: + # This import only works on python 3.3 and above. + import collections.abc as collections_abc # pylint: disable=unused-import, g-import-not-at-top +except ImportError: + import collections as collections_abc # pylint: disable=unused-import, g-import-not-at-top + class Encoder(json.JSONEncoder): """JSON encoder and decoder that handles TensorShapes and tuples.""" @@ -67,3 +76,50 @@ def _decode_helper(obj): elif obj['class_name'] == '__tuple__': return tuple(_decode_helper(i) for i in obj['items']) return obj + + +def get_json_type(obj): + """Serializes any object to a JSON-serializable structure. + + Arguments: + obj: the object to serialize + + Returns: + JSON-serializable structure representing `obj`. + + Raises: + TypeError: if `obj` cannot be serialized. + """ + # if obj is a serializable Keras class instance + # e.g. optimizer, layer + if hasattr(obj, 'get_config'): + return {'class_name': obj.__class__.__name__, 'config': obj.get_config()} + + # if obj is any numpy type + if type(obj).__module__ == np.__name__: + if isinstance(obj, np.ndarray): + return obj.tolist() + else: + return obj.item() + + # misc functions (e.g. loss function) + if callable(obj): + return obj.__name__ + + # if obj is a python 'type' + if type(obj).__name__ == type.__name__: + return obj.__name__ + + if isinstance(obj, tensor_shape.TensorShape): + return obj.as_list() + + if isinstance(obj, dtypes.DType): + return obj.name + + if isinstance(obj, collections_abc.Mapping): + return dict(obj) + + if isinstance(obj, wrapt.ObjectProxy): + return obj.__wrapped__ + + raise TypeError('Not JSON Serializable:', obj) diff --git a/tensorflow/python/keras/tests/serialization_util_test.py b/tensorflow/python/keras/tests/serialization_util_test.py index 0736d2217c8..f24d24ceacb 100644 --- a/tensorflow/python/keras/tests/serialization_util_test.py +++ b/tensorflow/python/keras/tests/serialization_util_test.py @@ -27,8 +27,8 @@ from tensorflow.python.keras.engine import input_layer from tensorflow.python.keras.engine import sequential from tensorflow.python.keras.engine import training from tensorflow.python.keras.layers import core +from tensorflow.python.keras.saving.saved_model import json_utils from tensorflow.python.platform import test -from tensorflow.python.util import serialization @combinations.generate(combinations.combine(mode=["graph", "eager"])) @@ -38,7 +38,7 @@ class SerializationTests(keras_parameterized.TestCase): dense = core.Dense(3) dense(constant_op.constant([[4.]])) round_trip = json.loads(json.dumps( - dense, default=serialization.get_json_type)) + dense, default=json_utils.get_json_type)) self.assertEqual(3, round_trip["config"]["units"]) def test_serialize_sequential(self): @@ -47,7 +47,7 @@ class SerializationTests(keras_parameterized.TestCase): model.add(core.Dense(5)) model(constant_op.constant([[1.]])) sequential_round_trip = json.loads( - json.dumps(model, default=serialization.get_json_type)) + json.dumps(model, default=json_utils.get_json_type)) self.assertEqual( # Note that `config['layers'][0]` will be an InputLayer in V2 # (but not in V1) @@ -59,7 +59,7 @@ class SerializationTests(keras_parameterized.TestCase): model = training.Model(x, y) model(constant_op.constant([[1., 1., 1.]])) model_round_trip = json.loads( - json.dumps(model, default=serialization.get_json_type)) + json.dumps(model, default=json_utils.get_json_type)) self.assertEqual( 10, model_round_trip["config"]["layers"][1]["config"]["units"]) diff --git a/tensorflow/python/training/tracking/data_structures_test.py b/tensorflow/python/training/tracking/data_structures_test.py index 90f8fbdef64..f87bcc8e4d1 100644 --- a/tensorflow/python/training/tracking/data_structures_test.py +++ b/tensorflow/python/training/tracking/data_structures_test.py @@ -30,6 +30,7 @@ from tensorflow.python.eager import def_function from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import tensor_shape +from tensorflow.python.keras.saving.saved_model import json_utils from tensorflow.python.layers import core as non_keras_core from tensorflow.python.module import module from tensorflow.python.ops import array_ops @@ -39,7 +40,6 @@ from tensorflow.python.training.tracking import data_structures from tensorflow.python.training.tracking import tracking from tensorflow.python.training.tracking import util from tensorflow.python.util import nest -from tensorflow.python.util import serialization class ListTests(test.TestCase): @@ -47,7 +47,7 @@ class ListTests(test.TestCase): def testJSONSerialization(self): obj = tracking.AutoTrackable() obj.l = [1] - json.dumps(obj.l, default=serialization.get_json_type) + json.dumps(obj.l, default=json_utils.get_json_type) def testNotTrackable(self): class NotTrackable(object): @@ -337,7 +337,7 @@ class MappingTests(test.TestCase): def testJSONSerialization(self): obj = tracking.AutoTrackable() obj.d = {"a": 2} - json.dumps(obj.d, default=serialization.get_json_type) + json.dumps(obj.d, default=json_utils.get_json_type) def testNoOverwrite(self): mapping = data_structures.Mapping() @@ -519,7 +519,7 @@ class TupleTests(test.TestCase, parameterized.TestCase): def testJSONSerialization(self): obj = tracking.AutoTrackable() obj.l = (1,) - json.dumps(obj.l, default=serialization.get_json_type) + json.dumps(obj.l, default=json_utils.get_json_type) def testNonLayerVariables(self): v = resource_variable_ops.ResourceVariable([1.]) From f1e43cfe4080148888943fd33551d27903df8547 Mon Sep 17 00:00:00 2001 From: Jay Shi Date: Tue, 21 Jul 2020 14:47:49 -0700 Subject: [PATCH 0975/2522] [tf.data] Rename the input for `SnapshotHelper` function. Current name may be a bit misleading. Also add some tests to verify the parent node pointer is correctly copied in `SnapshotTest`. PiperOrigin-RevId: 322447091 Change-Id: Ia27da3a16f6907da6f099fc8fd63aab26eb1dbbb --- tensorflow/core/framework/model.cc | 40 ++++++++++++++----------- tensorflow/core/framework/model.h | 2 +- tensorflow/core/framework/model_test.cc | 40 +++++++++++++++++-------- 3 files changed, 50 insertions(+), 32 deletions(-) diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc index 7e70c0eab75..94355cc6ea5 100644 --- a/tensorflow/core/framework/model.cc +++ b/tensorflow/core/framework/model.cc @@ -979,10 +979,10 @@ std::shared_ptr Node::Snapshot() const { while (!node_pairs.empty()) { auto node_pair = node_pairs.front(); node_pairs.pop_front(); - std::shared_ptr input_node = node_pair.first, - parent_node_copy = node_pair.second; - parent_node_copy->add_input( - input_node->SnapshotHelper(parent_node_copy, &node_pairs)); + std::shared_ptr current = node_pair.first, + cloned_output = node_pair.second; + cloned_output->add_input( + current->SnapshotHelper(cloned_output, &node_pairs)); } return result; } @@ -1185,26 +1185,30 @@ void Node::DebugStringHelper(absl::flat_hash_map* debug_strings) } std::shared_ptr Node::SnapshotHelper( - std::shared_ptr clone_base, Node::NodePairList* node_pairs) const { + std::shared_ptr cloned_output, Node::NodePairList* node_pairs) const { tf_shared_lock l(mu_); - std::shared_ptr result_node = Clone(clone_base); + + // Clone current node(`this`), also set clone of its output node + // (`cloned_output`) to be the output node of the cloned node + // (`cloned_current`). + std::shared_ptr cloned_current = Clone(cloned_output); { - result_node->autotune_.store(autotune_); - result_node->buffered_bytes_.store(buffered_bytes_); - result_node->buffered_elements_.store(buffered_elements_); - result_node->bytes_consumed_.store(bytes_consumed_); - result_node->bytes_produced_.store(bytes_produced_); - result_node->num_elements_.store(num_elements_); - result_node->record_metrics_.store(false); - result_node->processing_time_.store(processing_time_); - mutex_lock l2(result_node->mu_); - result_node->parameters_ = parameters_; + cloned_current->autotune_.store(autotune_); + cloned_current->buffered_bytes_.store(buffered_bytes_); + cloned_current->buffered_elements_.store(buffered_elements_); + cloned_current->bytes_consumed_.store(bytes_consumed_); + cloned_current->bytes_produced_.store(bytes_produced_); + cloned_current->num_elements_.store(num_elements_); + cloned_current->record_metrics_.store(false); + cloned_current->processing_time_.store(processing_time_); + mutex_lock l2(cloned_current->mu_); + cloned_current->parameters_ = parameters_; } for (auto& input : inputs_) { - node_pairs->push_back(std::make_pair(input, result_node)); + node_pairs->push_back(std::make_pair(input, cloned_current)); } - return result_node; + return cloned_current; } void Node::TotalBufferedBytesHelper( diff --git a/tensorflow/core/framework/model.h b/tensorflow/core/framework/model.h index 5ddd64853a8..71c4010ae40 100644 --- a/tensorflow/core/framework/model.h +++ b/tensorflow/core/framework/model.h @@ -492,7 +492,7 @@ class Node { const TF_SHARED_LOCKS_REQUIRED(mu_); // Copy the node and add the (input, copy) pairs to the NodePairList. - std::shared_ptr SnapshotHelper(std::shared_ptr clone_base, + std::shared_ptr SnapshotHelper(std::shared_ptr cloned_output, NodePairList* node_pairs) const; // Compute total buffered bytes for the node and store in the total bytes map. diff --git a/tensorflow/core/framework/model_test.cc b/tensorflow/core/framework/model_test.cc index 5a4d0da374c..3fe965217b3 100644 --- a/tensorflow/core/framework/model_test.cc +++ b/tensorflow/core/framework/model_test.cc @@ -747,27 +747,41 @@ TEST(UnknownGradientTest, Model) { TEST(SnapshotTest, Model) { std::shared_ptr root = model::MakeUnknownNode({0, std::to_string(0), nullptr}); - std::shared_ptr cur_node = root; + std::shared_ptr current = root; - int64 num_nodes = 100; + int64 num_nodes = 20; for (int64 i = 1; i < num_nodes; i++) { - cur_node->add_input( - model::MakeUnknownNode({i, std::to_string(i), cur_node})); - cur_node = cur_node->inputs().front(); + std::shared_ptr input = + model::MakeUnknownNode({i, std::to_string(i), current}); + input->set_autotune(std::rand() % 2 == 1); + current->add_input(input); + current = input; } - std::shared_ptr root_copy = root->Snapshot(); - cur_node = root; - std::shared_ptr cur_node_copy = root_copy; + std::shared_ptr cloned_root = root->Snapshot(); + current = root; + std::shared_ptr cloned_current = cloned_root; for (int64 i = 0; i < num_nodes; i++) { - EXPECT_EQ(cur_node->id(), cur_node_copy->id()); - EXPECT_EQ(cur_node->name(), cur_node_copy->name()); - EXPECT_NE(cur_node.get(), cur_node_copy.get()); + EXPECT_EQ(current->id(), cloned_current->id()); + EXPECT_EQ(current->name(), cloned_current->name()); + EXPECT_EQ(current->autotune(), cloned_current->autotune()); + EXPECT_NE(current.get(), cloned_current.get()); + + if (i > 0) { + EXPECT_EQ(current->output()->long_name(), + cloned_current->output()->long_name()); + EXPECT_EQ(current->output()->autotune(), + cloned_current->output()->autotune()); + EXPECT_NE(current->output(), cloned_current->output()); + } else { + EXPECT_EQ(current->output(), nullptr); + EXPECT_EQ(cloned_current->output(), nullptr); + } if (i < num_nodes - 1) { - cur_node = cur_node->inputs().front(); - cur_node_copy = cur_node_copy->inputs().front(); + current = current->inputs().front(); + cloned_current = cloned_current->inputs().front(); } } } From 2a9b7f290e16a604c05ebafb43b9f6483c17f892 Mon Sep 17 00:00:00 2001 From: Robert David Date: Tue, 21 Jul 2020 14:56:38 -0700 Subject: [PATCH 0976/2522] Cleanup LSTM tests: Remove clipping parameters, they are always 0. PiperOrigin-RevId: 322448946 Change-Id: I6cf7c60f47c2adc6e88a8ea78961ab3f2e5338cb --- tensorflow/lite/kernels/lstm_test.cc | 77 +++++++++------------------- 1 file changed, 24 insertions(+), 53 deletions(-) diff --git a/tensorflow/lite/kernels/lstm_test.cc b/tensorflow/lite/kernels/lstm_test.cc index d55cbea2ca6..c7b86b1f78c 100644 --- a/tensorflow/lite/kernels/lstm_test.cc +++ b/tensorflow/lite/kernels/lstm_test.cc @@ -38,9 +38,9 @@ class LSTMOpModel : public SingleOpModel { public: LSTMOpModel(int n_batch, int n_input, int n_cell, int n_output, bool use_cifg, bool use_peephole, bool use_projection_weights, - bool use_projection_bias, float cell_clip, float proj_clip, - const TensorType weight_type, bool model_has_legacy_20_inputs, - bool is_layer_norm, bool asymmetric_quantize_inputs) + bool use_projection_bias, const TensorType weight_type, + bool model_has_legacy_20_inputs, bool is_layer_norm, + bool asymmetric_quantize_inputs) : n_input_(n_input), n_output_(n_output), weight_type_(weight_type) { input_ = AddInput({TensorType_FLOAT32, {n_batch, n_input}}); @@ -126,11 +126,12 @@ class LSTMOpModel : public SingleOpModel { output_ = AddOutput({TensorType_FLOAT32, {n_output}}); + // TODO(b/161825581): Add tests where cell_clip and/or proj_clip is not the + // default 0. SetBuiltinOp( BuiltinOperator_LSTM, BuiltinOptions_LSTMOptions, - CreateLSTMOptions(builder_, ActivationFunctionType_TANH, cell_clip, - proj_clip, ::tflite::LSTMKernelType_FULL, - asymmetric_quantize_inputs) + CreateLSTMOptions(builder_, ActivationFunctionType_TANH, + LSTMKernelType_FULL, asymmetric_quantize_inputs) .Union()); BuildInterpreter({}); // Input sizes are already set up. @@ -456,7 +457,6 @@ TEST_F(NoCifgNoPeepholeNoProjectionNoClippingLstmTest, LstmBlackBoxTest) { /*use_cifg=*/false, /*use_peephole=*/false, /*use_projection_weights=*/false, /*use_projection_bias=*/false, - /*cell_clip=*/0.0, /*proj_clip=*/0.0, /*weight_type=*/TensorType_FLOAT32, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/false); @@ -479,7 +479,6 @@ TEST_F(NoCifgNoPeepholeNoProjectionNoClippingNoLayerNormLstmTest, /*use_cifg=*/false, /*use_peephole=*/false, /*use_projection_weights=*/false, /*use_projection_bias=*/false, - /*cell_clip=*/0.0, /*proj_clip=*/0.0, /*weight_type=*/TensorType_FLOAT32, /*model_has_legacy_20_inputs=*/false, /*is_layer_norm=*/false, @@ -503,8 +502,7 @@ TEST_P(NoCifgNoPeepholeNoProjectionNoClippingLstmTest, LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, /*use_cifg=*/false, /*use_peephole=*/false, /*use_projection_weights=*/false, - /*use_projection_bias=*/false, /*cell_clip=*/0.0, - /*proj_clip=*/0.0, + /*use_projection_bias=*/false, /*weight_type=*/TensorType_UINT8, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); @@ -530,8 +528,7 @@ TEST_P(NoCifgNoPeepholeNoProjectionNoClippingLstmInt8Test, LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, /*use_cifg=*/false, /*use_peephole=*/false, /*use_projection_weights=*/false, - /*use_projection_bias=*/false, /*cell_clip=*/0.0, - /*proj_clip=*/0.0, + /*use_projection_bias=*/false, /*weight_type=*/TensorType_INT8, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); @@ -598,7 +595,6 @@ TEST_F(CifgNoPeepholeNoProjectionNoClippingLstmTest, LstmBlackBoxTest) { /*use_cifg=*/true, /*use_peephole=*/true, /*use_projection_weights=*/false, /*use_projection_bias=*/false, - /*cell_clip=*/0.0, /*proj_clip=*/0.0, /*weight_type=*/TensorType_FLOAT32, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/false); @@ -622,7 +618,6 @@ TEST_P(CifgNoPeepholeNoProjectionNoClippingLstmTest, /*use_cifg=*/true, /*use_peephole=*/true, /*use_projection_weights=*/false, /*use_projection_bias=*/false, - /*cell_clip=*/0.0, /*proj_clip=*/0.0, /*weight_type=*/TensorType_UINT8, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); @@ -647,7 +642,6 @@ TEST_P(CifgNoPeepholeNoProjectionNoClippingLstmInt8Test, /*use_cifg=*/true, /*use_peephole=*/true, /*use_projection_weights=*/false, /*use_projection_bias=*/false, - /*cell_clip=*/0.0, /*proj_clip=*/0.0, /*weight_type=*/TensorType_INT8, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); @@ -1264,7 +1258,6 @@ TEST_F(NoCifgPeepholeProjectionNoClippingLstmTest, LstmBlackBoxTest) { /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, /*use_projection_bias=*/false, - /*cell_clip=*/0.0, /*proj_clip=*/0.0, /*weight_type=*/TensorType_FLOAT32, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/false); @@ -1287,7 +1280,6 @@ TEST_P(NoCifgPeepholeProjectionNoClippingLstmTest, /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, /*use_projection_bias=*/false, - /*cell_clip=*/0.0, /*proj_clip=*/0.0, /*weight_type=*/TensorType_UINT8, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); @@ -1312,7 +1304,6 @@ TEST_P(NoCifgPeepholeProjectionNoClippingLstmInt8Test, /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, /*use_projection_bias=*/false, - /*cell_clip=*/0.0, /*proj_clip=*/0.0, /*weight_type=*/TensorType_INT8, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); @@ -1393,14 +1384,12 @@ TEST_F(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest, const int n_input = 5; const int n_cell = 4; const int n_output = 3; - const float cell_clip = 0.0; - const float proj_clip = 0.0; LSTMOpModel layer_norm_lstm( n_batch, n_input, n_cell, n_output, /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, - /*use_projection_bias=*/false, cell_clip, proj_clip, + /*use_projection_bias=*/false, /*weight_type=*/TensorType_FLOAT32, /*model_has_legacy_20_inputs=*/false, /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/false); @@ -1431,14 +1420,12 @@ TEST_P(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest, const int n_input = 5; const int n_cell = 4; const int n_output = 3; - const float cell_clip = 0.0; - const float proj_clip = 0.0; LSTMOpModel layer_norm_lstm( n_batch, n_input, n_cell, n_output, /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, - /*use_projection_bias=*/false, cell_clip, proj_clip, + /*use_projection_bias=*/false, /*weight_type=*/TensorType_UINT8, /*model_has_legacy_20_inputs=*/false, /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/GetParam()); @@ -1471,14 +1458,12 @@ TEST_P(NoCifgPeepholeProjectionNoClippingLayerNormLstmInt8Test, const int n_input = 5; const int n_cell = 4; const int n_output = 3; - const float cell_clip = 0.0; - const float proj_clip = 0.0; LSTMOpModel layer_norm_lstm( n_batch, n_input, n_cell, n_output, /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, - /*use_projection_bias=*/false, cell_clip, proj_clip, + /*use_projection_bias=*/false, /*weight_type=*/TensorType_INT8, /*model_has_legacy_20_inputs=*/false, /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/GetParam()); @@ -1552,14 +1537,12 @@ TEST_F(CifgPeepholeProjectionNoClippingLayerNormLstmTest, const int n_input = 5; const int n_cell = 4; const int n_output = 3; - const float cell_clip = 0.0; - const float proj_clip = 0.0; LSTMOpModel layer_norm_lstm( n_batch, n_input, n_cell, n_output, /*use_cifg=*/true, /*use_peephole=*/true, /*use_projection_weights=*/true, - /*use_projection_bias=*/false, cell_clip, proj_clip, + /*use_projection_bias=*/false, /*weight_type=*/TensorType_FLOAT32, /*model_has_legacy_20_inputs=*/false, /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/false); @@ -1590,14 +1573,12 @@ TEST_P(CifgPeepholeProjectionNoClippingLayerNormLstmTest, const int n_input = 5; const int n_cell = 4; const int n_output = 3; - const float cell_clip = 0.0; - const float proj_clip = 0.0; LSTMOpModel layer_norm_lstm( n_batch, n_input, n_cell, n_output, /*use_cifg=*/true, /*use_peephole=*/true, /*use_projection_weights=*/true, - /*use_projection_bias=*/false, cell_clip, proj_clip, + /*use_projection_bias=*/false, /*weight_type=*/TensorType_UINT8, /*model_has_legacy_20_inputs=*/false, /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/GetParam()); @@ -1629,14 +1610,12 @@ TEST_P(CifgPeepholeProjectionNoClippingLayerNormLstmInt8Test, const int n_input = 5; const int n_cell = 4; const int n_output = 3; - const float cell_clip = 0.0; - const float proj_clip = 0.0; LSTMOpModel layer_norm_lstm( n_batch, n_input, n_cell, n_output, /*use_cifg=*/true, /*use_peephole=*/true, /*use_projection_weights=*/true, - /*use_projection_bias=*/false, cell_clip, proj_clip, + /*use_projection_bias=*/false, /*weight_type=*/TensorType_INT8, /*model_has_legacy_20_inputs=*/false, /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/GetParam()); @@ -1663,8 +1642,7 @@ class LSTMIntegerOpModel : public SingleOpModel { LSTMIntegerOpModel(int n_batch, int n_input, int n_cell, int n_output, bool use_cifg, bool use_peephole, bool use_projection_weights, bool use_projection_bias, - bool use_layer_norm, float cell_clip, float proj_clip, - bool use_8x8_8_implementation, + bool use_layer_norm, bool use_8x8_8_implementation, const std::vector>& ranges, const std::vector>& intermediates) : n_input_(n_input), n_output_(n_output) { @@ -1804,10 +1782,11 @@ class LSTMIntegerOpModel : public SingleOpModel { ranges[24].first, ranges[24].second}); - SetBuiltinOp(BuiltinOperator_LSTM, BuiltinOptions_LSTMOptions, - CreateLSTMOptions(builder_, ActivationFunctionType_TANH, - cell_clip, proj_clip) - .Union()); + // TODO(b/161825581): Add tests where cell_clip and/or proj_clip is not the + // default 0. + SetBuiltinOp( + BuiltinOperator_LSTM, BuiltinOptions_LSTMOptions, + CreateLSTMOptions(builder_, ActivationFunctionType_TANH).Union()); BuildInterpreter({}); // Input sizes are already set } @@ -1946,8 +1925,6 @@ TEST(LSTMIntegerOpModel, NoCifgYesLayerNormNoYesProjectionNoPeephole) { const int n_input = 5; const int n_cell = 4; const int n_output = 3; - const float cell_clip = 0.0; - const float proj_clip = 0.0; // Model related weights. const std::vector input_to_input_weights = { @@ -2042,7 +2019,7 @@ TEST(LSTMIntegerOpModel, NoCifgYesLayerNormNoYesProjectionNoPeephole) { /*use_cifg=*/false, /*use_peephole=*/false, /*use_projection_weights=*/true, /*use_projection_bias=*/false, - /*use_layer_norm=*/true, cell_clip, proj_clip, + /*use_layer_norm=*/true, /*use_8x8_8_implementation=*/false, ranges, intermediates); @@ -2108,8 +2085,6 @@ TEST(LSTMIntegerOpModel, NoCifgYesLayerNormNoYesProjectionYesPeephole) { const int n_input = 5; const int n_cell = 4; const int n_output = 3; - const float cell_clip = 0.0; - const float proj_clip = 0.0; // Model related weights. const std::vector input_to_input_weights = { @@ -2210,7 +2185,7 @@ TEST(LSTMIntegerOpModel, NoCifgYesLayerNormNoYesProjectionYesPeephole) { /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, /*use_projection_bias=*/false, - /*use_layer_norm=*/true, cell_clip, proj_clip, + /*use_layer_norm=*/true, /*use_8x8_8_implementation=*/false, ranges, intermediates); @@ -2280,8 +2255,6 @@ TEST(LSTMIntegerOpModel, CifgYesLayerNormNoYesProjectionNoPeephole_8x8_8) { const int n_input = 5; const int n_cell = 4; const int n_output = 3; - const float cell_clip = 0.0; - const float proj_clip = 0.0; // Model related weights. const std::vector input_to_input_weights = { @@ -2379,7 +2352,7 @@ TEST(LSTMIntegerOpModel, CifgYesLayerNormNoYesProjectionNoPeephole_8x8_8) { /*use_cifg=*/true, /*use_peephole=*/false, /*use_projection_weights=*/true, /*use_projection_bias=*/true, - /*use_layer_norm=*/true, cell_clip, proj_clip, + /*use_layer_norm=*/true, /*use_8x8_8_implementation=*/true, ranges, intermediates); @@ -2451,7 +2424,6 @@ TEST(LSTMOpModel, InvalidTypeTest) { /*use_cifg=*/false, /*use_peephole=*/false, /*use_projection_weights=*/false, /*use_projection_bias=*/false, - /*cell_clip=*/0.0, /*proj_clip=*/0.0, /*weight_type=*/TensorType_INT32, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, @@ -2462,7 +2434,6 @@ TEST(LSTMOpModel, InvalidTypeTest) { /*use_cifg=*/false, /*use_peephole=*/false, /*use_projection_weights=*/false, /*use_projection_bias=*/false, - /*cell_clip=*/0.0, /*proj_clip=*/0.0, /*weight_type=*/TensorType_COMPLEX64, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, From 8d46f31b43de2148b7faf0eabdf64d28bed12f14 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Jul 2020 15:14:57 -0700 Subject: [PATCH 0977/2522] Change AllocatePersistentBuffer API to just return a pointer if succeed, or nullptr upon failure Changes the canonical usage pattern from: void* data = nullptr; if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == kTfLiteError) { return nullptr; } return data; to: return context->AllocatePersistentBuffer(context, sizeof(OpData); PiperOrigin-RevId: 322452971 Change-Id: If5de6a44978ce464b33605b8ed186d9767e0716d --- tensorflow/lite/c/common.h | 7 +++---- tensorflow/lite/micro/kernels/add.cc | 7 +------ tensorflow/lite/micro/kernels/cmsis-nn/add.cc | 7 +------ .../lite/micro/kernels/cmsis-nn/conv.cc | 5 ++--- .../micro/kernels/cmsis-nn/depthwise_conv.cc | 7 +------ .../micro/kernels/cmsis-nn/fully_connected.cc | 7 +------ .../lite/micro/kernels/cmsis-nn/pooling.cc | 7 +------ tensorflow/lite/micro/kernels/comparisons.cc | 7 +------ .../lite/micro/kernels/concatenation.cc | 21 +++++++------------ tensorflow/lite/micro/kernels/conv.cc | 19 +++++++---------- .../lite/micro/kernels/depthwise_conv.cc | 19 +++++++---------- tensorflow/lite/micro/kernels/dequantize.cc | 7 +------ .../lite/micro/kernels/fully_connected.cc | 7 +------ tensorflow/lite/micro/kernels/hard_swish.cc | 8 ++----- tensorflow/lite/micro/kernels/logistic.cc | 7 +------ tensorflow/lite/micro/kernels/quantize.cc | 7 +------ tensorflow/lite/micro/kernels/softmax.cc | 7 +------ tensorflow/lite/micro/kernels/svdf.cc | 7 +------ .../micro/kernels/xtensa_hifimini/conv.cc | 19 +++++++---------- .../kernels/xtensa_hifimini/depthwise_conv.cc | 19 +++++++---------- .../xtensa_hifimini/fully_connected.cc | 7 +------ .../micro/kernels/xtensa_hifimini/quantize.cc | 7 +------ .../micro/kernels/xtensa_hifimini/softmax.cc | 7 +------ .../micro/kernels/xtensa_hifimini/svdf.cc | 8 +------ .../fully_connected.cc | 7 +------ .../xtensa_hifimini_staging/quantize.cc | 7 +------ .../xtensa_hifimini_staging/softmax.cc | 7 +------ .../kernels/xtensa_hifimini_staging/svdf.cc | 7 +------ tensorflow/lite/micro/memory_helpers.cc | 6 +++--- tensorflow/lite/micro/memory_helpers_test.cc | 16 +++++++++----- tensorflow/lite/micro/micro_allocator.cc | 13 ++---------- tensorflow/lite/micro/micro_allocator.h | 2 +- tensorflow/lite/micro/micro_interpreter.cc | 6 +++--- tensorflow/lite/micro/micro_interpreter.h | 3 +-- tensorflow/lite/micro/test_helpers.cc | 4 +--- tensorflow/lite/micro/testing/test_utils.cc | 10 ++------- .../benchmark/experimental/c/c_api_types.h | 7 +++---- 37 files changed, 89 insertions(+), 236 deletions(-) diff --git a/tensorflow/lite/c/common.h b/tensorflow/lite/c/common.h index 692a8eaf7a2..0979c4dddf8 100644 --- a/tensorflow/lite/c/common.h +++ b/tensorflow/lite/c/common.h @@ -703,12 +703,11 @@ typedef struct TfLiteContext { void* profiler; // Allocate persistent buffer which has the same life time as the interpreter. + // Returns nullptr on failure. // The memory is allocated from heap for TFL, and from tail in TFLM. - // If *ptr is not nullptr, the pointer will be reallocated. - // This method is only available in Prepare stage. + // This method is only available in Init or Prepare stage. // WARNING: This is an experimental interface that is subject to change. - TfLiteStatus (*AllocatePersistentBuffer)(struct TfLiteContext* ctx, - size_t bytes, void** ptr); + void* (*AllocatePersistentBuffer)(struct TfLiteContext* ctx, size_t bytes); // Allocate a buffer which will be deallocated right after invoke phase. // The memory is allocated from heap in TFL, and from volatile arena in TFLM. diff --git a/tensorflow/lite/micro/kernels/add.cc b/tensorflow/lite/micro/kernels/add.cc index 8d2ea6c5167..be089dace88 100644 --- a/tensorflow/lite/micro/kernels/add.cc +++ b/tensorflow/lite/micro/kernels/add.cc @@ -163,12 +163,7 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/add.cc b/tensorflow/lite/micro/kernels/cmsis-nn/add.cc index 0ccdd16428d..4190e041d9e 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/add.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/add.cc @@ -173,12 +173,7 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc b/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc index 909b37c957e..da608411387 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc @@ -109,9 +109,8 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, } void* Init(TfLiteContext* context, const char* buffer, size_t length) { - void* raw; - context->AllocatePersistentBuffer(context, sizeof(int), &raw); - return raw; + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(int)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc b/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc index 889193a8784..53d2d5692ec 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc @@ -102,12 +102,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc b/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc index c7e64dd11f4..1ea7f98ea1b 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc @@ -77,12 +77,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc b/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc index 77883f48d7e..d0babb4b98d 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc @@ -249,12 +249,7 @@ TfLiteStatus MaxEvalInt8(TfLiteContext* context, const TfLiteNode* node, void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus MaxPrepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/kernels/comparisons.cc b/tensorflow/lite/micro/kernels/comparisons.cc index e63a1f602e9..8f6a8305eb5 100644 --- a/tensorflow/lite/micro/kernels/comparisons.cc +++ b/tensorflow/lite/micro/kernels/comparisons.cc @@ -528,12 +528,7 @@ TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) { void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/kernels/concatenation.cc b/tensorflow/lite/micro/kernels/concatenation.cc index 9b5515a3e14..e6e56a5c32c 100644 --- a/tensorflow/lite/micro/kernels/concatenation.cc +++ b/tensorflow/lite/micro/kernels/concatenation.cc @@ -119,12 +119,7 @@ void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) { void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { @@ -184,15 +179,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { data->params.axis = CalculatePositiveAxis(params->axis, output); data->params.inputs_count = node->inputs->size; - float* input_scales = nullptr; - TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( - context, node->inputs->size * sizeof(float), - reinterpret_cast(&input_scales))); + float* input_scales = + reinterpret_cast(context->AllocatePersistentBuffer( + context, node->inputs->size * sizeof(float))); - int32_t* input_zero_points = nullptr; - TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( - context, node->inputs->size * sizeof(int32_t), - reinterpret_cast(&input_zero_points))); + int32_t* input_zero_points = + reinterpret_cast(context->AllocatePersistentBuffer( + context, node->inputs->size * sizeof(int32_t))); // Allocate persistent scale and zeropoint buffers. // Store input scale and zero point values in OpParams: diff --git a/tensorflow/lite/micro/kernels/conv.cc b/tensorflow/lite/micro/kernels/conv.cc index fec6f1e3c12..1f286dd30b8 100644 --- a/tensorflow/lite/micro/kernels/conv.cc +++ b/tensorflow/lite/micro/kernels/conv.cc @@ -109,12 +109,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { @@ -137,12 +132,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // Dynimically allocate per-channel quantization parameters. const int num_channels = filter->dims->data[kConvQuantizedDimension]; - TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( - context, num_channels * sizeof(int32_t), - reinterpret_cast(&data->per_channel_output_multiplier))); - TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( - context, num_channels * sizeof(int32_t), - reinterpret_cast(&data->per_channel_output_shift))); + data->per_channel_output_multiplier = + reinterpret_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + data->per_channel_output_shift = + reinterpret_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); // All per-channel quantized tensors need valid zero point and scale arrays. if (input->type == kTfLiteInt8) { diff --git a/tensorflow/lite/micro/kernels/depthwise_conv.cc b/tensorflow/lite/micro/kernels/depthwise_conv.cc index f85323b62bb..c75e0d9db54 100644 --- a/tensorflow/lite/micro/kernels/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/depthwise_conv.cc @@ -95,12 +95,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { @@ -124,12 +119,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // quantized types, only a single scale and zero point is needed. const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; // Dynimically allocate per-channel quantization parameters. - TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( - context, num_channels * sizeof(int32_t), - reinterpret_cast(&data->per_channel_output_multiplier))); - TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( - context, num_channels * sizeof(int32_t), - reinterpret_cast(&data->per_channel_output_shift))); + data->per_channel_output_multiplier = + reinterpret_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + data->per_channel_output_shift = + reinterpret_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); // All per-channel quantized tensors need valid zero point and scale arrays. if (input->type == kTfLiteInt8) { diff --git a/tensorflow/lite/micro/kernels/dequantize.cc b/tensorflow/lite/micro/kernels/dequantize.cc index 55a41bf0a1b..607ada6a605 100644 --- a/tensorflow/lite/micro/kernels/dequantize.cc +++ b/tensorflow/lite/micro/kernels/dequantize.cc @@ -38,12 +38,7 @@ struct OpData { void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/kernels/fully_connected.cc b/tensorflow/lite/micro/kernels/fully_connected.cc index 88c150dc224..376626710dc 100644 --- a/tensorflow/lite/micro/kernels/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/fully_connected.cc @@ -72,12 +72,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/kernels/hard_swish.cc b/tensorflow/lite/micro/kernels/hard_swish.cc index d4b46c7d63a..fecb8bda409 100644 --- a/tensorflow/lite/micro/kernels/hard_swish.cc +++ b/tensorflow/lite/micro/kernels/hard_swish.cc @@ -34,12 +34,8 @@ constexpr int kInputTensor = 0; constexpr int kOutputTensor = 0; void* HardSwishInit(TfLiteContext* context, const char* buffer, size_t length) { - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(HardSwishParams), - &data) == kTfLiteError) { - return nullptr; - } - return data; + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(HardSwishParams)); } TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/kernels/logistic.cc b/tensorflow/lite/micro/kernels/logistic.cc index 312594592d7..6fcf60f7d78 100644 --- a/tensorflow/lite/micro/kernels/logistic.cc +++ b/tensorflow/lite/micro/kernels/logistic.cc @@ -68,12 +68,7 @@ TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node, void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/kernels/quantize.cc b/tensorflow/lite/micro/kernels/quantize.cc index 243ed9688ac..2817697919f 100644 --- a/tensorflow/lite/micro/kernels/quantize.cc +++ b/tensorflow/lite/micro/kernels/quantize.cc @@ -36,12 +36,7 @@ struct OpData { void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/kernels/softmax.cc b/tensorflow/lite/micro/kernels/softmax.cc index 6dbae3b0b54..881efdae3e1 100644 --- a/tensorflow/lite/micro/kernels/softmax.cc +++ b/tensorflow/lite/micro/kernels/softmax.cc @@ -101,12 +101,7 @@ void SoftmaxQuantized(const TfLiteTensor* input, TfLiteTensor* output, void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams), - &data) == kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams)); } TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/kernels/svdf.cc b/tensorflow/lite/micro/kernels/svdf.cc index fde5269fe63..c0bae4acc48 100644 --- a/tensorflow/lite/micro/kernels/svdf.cc +++ b/tensorflow/lite/micro/kernels/svdf.cc @@ -338,12 +338,7 @@ constexpr int kOutputTensor = 0; void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc index 26cc0f03d73..dc39cc44e61 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc @@ -303,12 +303,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { @@ -333,12 +328,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // quantized types, only a single scale and zero point is needed. const int num_channels = filter->dims->data[kConvQuantizedDimension]; // Dynimically allocate per-channel quantization parameters. - TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( - context, num_channels * sizeof(int32_t), - reinterpret_cast(&op_data->per_channel_output_multiplier))); - TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( - context, num_channels * sizeof(int32_t), - reinterpret_cast(&op_data->per_channel_output_shift))); + op_data->per_channel_output_multiplier = + reinterpret_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + op_data->per_channel_output_shift = + reinterpret_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); // All per-channel quantized tensors need valid zero point and scale arrays. if (input->type == kTfLiteInt8) { diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc index a3aff598fcb..e7a37b6901d 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc @@ -352,12 +352,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { @@ -381,12 +376,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // quantized types, only a single scale and zero point is needed. const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; // Dynimically allocate per-channel quantization parameters. - TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( - context, num_channels * sizeof(int32_t), - reinterpret_cast(&op_data->per_channel_output_multiplier))); - TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( - context, num_channels * sizeof(int32_t), - reinterpret_cast(&op_data->per_channel_output_shift))); + op_data->per_channel_output_multiplier = + reinterpret_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); + op_data->per_channel_output_shift = + reinterpret_cast(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t))); // All per-channel quantized tensors need valid zero point and scale arrays. if (input->type == kTfLiteInt8) { diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc index a89c53f73e7..6ebfbe75067 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc @@ -163,12 +163,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc index 9e612cf5ae7..e735214dd38 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc @@ -109,12 +109,7 @@ struct OpData { void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc index 3a4b157d265..0fb3646e3e8 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc @@ -148,12 +148,7 @@ TfLiteStatus CalculateSoftmaxOpData(TfLiteContext* context, void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc index dc071b221bd..8520dc2db72 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc @@ -259,13 +259,7 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node, void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context != nullptr); - TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/fully_connected.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/fully_connected.cc index 6264373f2c6..f9b49a2f1ae 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/fully_connected.cc @@ -98,12 +98,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/quantize.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/quantize.cc index 7c521e7d2aa..513f926fae9 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/quantize.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/quantize.cc @@ -110,12 +110,7 @@ struct OpData { void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/softmax.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/softmax.cc index 6588dff6ec7..90fc2cd9903 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/softmax.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/softmax.cc @@ -98,12 +98,7 @@ TfLiteStatus CalculateSoftmaxOpData(TfLiteContext* context, void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/svdf.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/svdf.cc index 6da87687be3..537b48db8eb 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/svdf.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/svdf.cc @@ -190,12 +190,7 @@ TfLiteStatus EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node, void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context != nullptr); TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - void* data = nullptr; - if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == - kTfLiteError) { - return nullptr; - } - return data; + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { diff --git a/tensorflow/lite/micro/memory_helpers.cc b/tensorflow/lite/micro/memory_helpers.cc index 20e6e10c2ed..d1e0392a3bc 100644 --- a/tensorflow/lite/micro/memory_helpers.cc +++ b/tensorflow/lite/micro/memory_helpers.cc @@ -140,9 +140,9 @@ TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context, output->bytes = size; - TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( - context, TfLiteIntArrayGetSizeInBytes(size), - reinterpret_cast(&output->dims))); + output->dims = + reinterpret_cast(context->AllocatePersistentBuffer( + context, TfLiteIntArrayGetSizeInBytes(size))); output->dims->size = input->dims->size; for (int i = 0; i < dimensions_count; i++) { diff --git a/tensorflow/lite/micro/memory_helpers_test.cc b/tensorflow/lite/micro/memory_helpers_test.cc index 25ade769b01..5000a880638 100644 --- a/tensorflow/lite/micro/memory_helpers_test.cc +++ b/tensorflow/lite/micro/memory_helpers_test.cc @@ -20,9 +20,15 @@ limitations under the License. namespace { -TfLiteStatus FakeAllocatePersistentBuffer(TfLiteContext* context, size_t bytes, - void** ptr) { - return kTfLiteOk; +// This just needs to be big enough to handle the array of 5 ints allocated +// in TestAllocateOutputDimensionsFromInput below. +const int kGlobalPersistentBufferLength = 100; +char global_persistent_buffer[kGlobalPersistentBufferLength]; + +// Only need to handle a single allocation at a time for output dimensions +// in TestAllocateOutputDimensionsFromInput. +void* FakeAllocatePersistentBuffer(TfLiteContext* context, size_t bytes) { + return reinterpret_cast(global_persistent_buffer); } } // namespace @@ -181,8 +187,8 @@ TF_LITE_MICRO_TEST(TestAllocateOutputDimensionsFromInput) { TfLiteTensor output_tensor = tflite::testing::CreateInt32Tensor( nullptr, tflite::testing::IntArrayFromInts(output_dims)); TfLiteContext context; - // Set allocator to no-op to avoid segfault. Memory is already allocated for - // output dims. + // Only need to allocate space for output_tensor.dims. Use a simple + // fake allocator. context.AllocatePersistentBuffer = FakeAllocatePersistentBuffer; TF_LITE_MICRO_EXPECT_EQ( diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc index 8ad6db362d7..73e7cd88bc0 100644 --- a/tensorflow/lite/micro/micro_allocator.cc +++ b/tensorflow/lite/micro/micro_allocator.cc @@ -681,17 +681,8 @@ TfLiteStatus MicroAllocator::FinishModelAllocation( return kTfLiteOk; } -TfLiteStatus MicroAllocator::AllocatePersistentBuffer(size_t bytes, - void** ptr) { - uint8_t* data = memory_allocator_->AllocateFromTail(bytes, kBufferAlignment); - if (data == nullptr) { - TF_LITE_REPORT_ERROR(error_reporter_, - "Failed to allocate persistent buffer of size %d", - bytes); - return kTfLiteError; - } - (*ptr) = data; - return kTfLiteOk; +void* MicroAllocator::AllocatePersistentBuffer(size_t bytes) { + return memory_allocator_->AllocateFromTail(bytes, kBufferAlignment); } TfLiteStatus MicroAllocator::RequestScratchBufferInArena(int node_id, diff --git a/tensorflow/lite/micro/micro_allocator.h b/tensorflow/lite/micro/micro_allocator.h index 47dad629944..efd11b8b230 100644 --- a/tensorflow/lite/micro/micro_allocator.h +++ b/tensorflow/lite/micro/micro_allocator.h @@ -154,7 +154,7 @@ class MicroAllocator { // Allocates persistent buffer which has the same life time as the allocator. // The memory is immediately available and is allocated from the tail of the // arena. - TfLiteStatus AllocatePersistentBuffer(size_t bytes, void** ptr); + void* AllocatePersistentBuffer(size_t bytes); // Register a scratch buffer of size `bytes` for Node with `node_id`. // This method only allocates a BufferHandle holding information for memory diff --git a/tensorflow/lite/micro/micro_interpreter.cc b/tensorflow/lite/micro/micro_interpreter.cc index 87c8da7da42..41efe9ecf51 100644 --- a/tensorflow/lite/micro/micro_interpreter.cc +++ b/tensorflow/lite/micro/micro_interpreter.cc @@ -47,10 +47,10 @@ ContextHelper::ContextHelper(ErrorReporter* error_reporter, MicroAllocator* allocator, const Model* model) : allocator_(allocator), error_reporter_(error_reporter), model_(model) {} -TfLiteStatus ContextHelper::AllocatePersistentBuffer(TfLiteContext* ctx, - size_t bytes, void** ptr) { +void* ContextHelper::AllocatePersistentBuffer(TfLiteContext* ctx, + size_t bytes) { return reinterpret_cast(ctx->impl_) - ->allocator_->AllocatePersistentBuffer(bytes, ptr); + ->allocator_->AllocatePersistentBuffer(bytes); } TfLiteStatus ContextHelper::RequestScratchBufferInArena(TfLiteContext* ctx, diff --git a/tensorflow/lite/micro/micro_interpreter.h b/tensorflow/lite/micro/micro_interpreter.h index df70514c8d3..67d74574e61 100644 --- a/tensorflow/lite/micro/micro_interpreter.h +++ b/tensorflow/lite/micro/micro_interpreter.h @@ -42,8 +42,7 @@ class ContextHelper { MicroAllocator* allocator, const Model* model); // Functions that will be assigned to function pointers on TfLiteContext: - static TfLiteStatus AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes, - void** ptr); + static void* AllocatePersistentBuffer(TfLiteContext* ctx, size_t bytes); static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* ctx, size_t bytes, int* buffer_idx); diff --git a/tensorflow/lite/micro/test_helpers.cc b/tensorflow/lite/micro/test_helpers.cc index 5df5b34deaf..7278fea48b3 100644 --- a/tensorflow/lite/micro/test_helpers.cc +++ b/tensorflow/lite/micro/test_helpers.cc @@ -574,9 +574,7 @@ void* SimpleStatefulOp::Init(TfLiteContext* context, const char* buffer, TFLITE_DCHECK(context->GetScratchBuffer == nullptr); TFLITE_DCHECK(context->RequestScratchBufferInArena == nullptr); - void* raw; - TFLITE_DCHECK(context->AllocatePersistentBuffer(context, sizeof(OpData), - &raw) == kTfLiteOk); + void* raw = context->AllocatePersistentBuffer(context, sizeof(OpData)); OpData* data = reinterpret_cast(raw); *data = {}; return raw; diff --git a/tensorflow/lite/micro/testing/test_utils.cc b/tensorflow/lite/micro/testing/test_utils.cc index 5aa73df44a6..ec5396e15e8 100644 --- a/tensorflow/lite/micro/testing/test_utils.cc +++ b/tensorflow/lite/micro/testing/test_utils.cc @@ -44,15 +44,9 @@ int scratch_buffer_count_ = 0; // signature of TfLiteContext::AllocatePersistentBuffer and isn't needed in the // implementation because we are assuming a single global // simple_memory_allocator_ -TfLiteStatus AllocatePersistentBuffer(TfLiteContext* context, size_t bytes, - void** ptr) { +void* AllocatePersistentBuffer(TfLiteContext* context, size_t bytes) { TFLITE_DCHECK(simple_memory_allocator_ != nullptr); - TFLITE_DCHECK(ptr != nullptr); - *ptr = simple_memory_allocator_->AllocateFromTail(bytes, kBufferAlignment); - if (*ptr == nullptr) { - return kTfLiteError; - } - return kTfLiteOk; + return simple_memory_allocator_->AllocateFromTail(bytes, kBufferAlignment); } TfLiteStatus RequestScratchBufferInArena(TfLiteContext* context, size_t bytes, diff --git a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h index 692a8eaf7a2..0979c4dddf8 100644 --- a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h +++ b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h @@ -703,12 +703,11 @@ typedef struct TfLiteContext { void* profiler; // Allocate persistent buffer which has the same life time as the interpreter. + // Returns nullptr on failure. // The memory is allocated from heap for TFL, and from tail in TFLM. - // If *ptr is not nullptr, the pointer will be reallocated. - // This method is only available in Prepare stage. + // This method is only available in Init or Prepare stage. // WARNING: This is an experimental interface that is subject to change. - TfLiteStatus (*AllocatePersistentBuffer)(struct TfLiteContext* ctx, - size_t bytes, void** ptr); + void* (*AllocatePersistentBuffer)(struct TfLiteContext* ctx, size_t bytes); // Allocate a buffer which will be deallocated right after invoke phase. // The memory is allocated from heap in TFL, and from volatile arena in TFLM. From c53ef0e292202793fff861f854b8d7957323ad25 Mon Sep 17 00:00:00 2001 From: Qiao Zhang Date: Tue, 21 Jul 2020 15:20:41 -0700 Subject: [PATCH 0978/2522] Internal dependency changes. PiperOrigin-RevId: 322454123 Change-Id: I48c72e0d7a61b1d977f19dcf49ae7f83acca69db --- tensorflow/core/platform/BUILD | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD index 823a12dfc57..5d805a79549 100644 --- a/tensorflow/core/platform/BUILD +++ b/tensorflow/core/platform/BUILD @@ -357,6 +357,8 @@ filegroup( cc_library( name = "mutex", textual_hdrs = ["mutex.h"], + # TODO(b/161569340): Short-term fix. Remove this visibility rule. + visibility = ["//tensorflow:__subpackages__"], deps = tf_platform_deps("mutex"), ) @@ -773,6 +775,8 @@ filegroup( cc_library( name = "types", hdrs = ["types.h"], + # TODO(b/161569340): Short-term fix. Remove this visibility rule. + visibility = ["//tensorflow:__subpackages__"], deps = [ ":platform", ":tstring", From a17f14a936e9975a0e24ad31bda96e4512f7f3b3 Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Tue, 21 Jul 2020 15:29:01 -0700 Subject: [PATCH 0979/2522] Add a StartAbort() method to CollectiveParamResolver with local implementation This is part of changes towards enabling aborting collectives when the group is unhealthy. BaseCollectiveExecutor used to only abort the BufRendezvous, but the collectives may be waiting on param resolution as well. This change only contains implementation for CollectiveParamResolverLocal, not CollectiveParamResolverDistributed. This means we don't abort the RPCs to the collective leader, nor the RPCs to get device attributes of members of the group. For the former the abortion is delayed until the collective leader is offline (those RPCs are fail_fast), for the latter the abortion is delayed until it gets all device attributes (those RPCs are not fail fast). These are not ideal but should be fine in a deployment where workers will eventually come back. I'll implement CollectiveParamResolverDistributed::StartAbort() in a later change. PiperOrigin-RevId: 322455670 Change-Id: I494d6b367e90a7a14a10dd20e111b37e892e45b9 --- .../base_collective_executor.cc | 1 + .../collective_param_resolver_local.cc | 77 ++++++- .../collective_param_resolver_local.h | 8 + .../collective_param_resolver_local_test.cc | 207 +++++++++++++++++- .../test_collective_executor_mgr.h | 28 ++- .../collective_param_resolver_distributed.cc | 50 +++-- tensorflow/core/framework/collective.h | 3 + tensorflow/python/ops/collective_ops_test.py | 118 +++++++++- 8 files changed, 456 insertions(+), 36 deletions(-) diff --git a/tensorflow/core/common_runtime/base_collective_executor.cc b/tensorflow/core/common_runtime/base_collective_executor.cc index 5d5100e7f2e..ff9c67f0eb0 100644 --- a/tensorflow/core/common_runtime/base_collective_executor.cc +++ b/tensorflow/core/common_runtime/base_collective_executor.cc @@ -214,6 +214,7 @@ BaseCollectiveExecutor::~BaseCollectiveExecutor() {} void BaseCollectiveExecutor::StartAbort(const Status& s) { VLOG(1) << "BaseCollectiveExecutor::StartAbort " << s; + cem_->GetParamResolver()->StartAbort(s); remote_access_->StartAbort(s); } diff --git a/tensorflow/core/common_runtime/collective_param_resolver_local.cc b/tensorflow/core/common_runtime/collective_param_resolver_local.cc index f3dea5c606a..a0153a5fff0 100644 --- a/tensorflow/core/common_runtime/collective_param_resolver_local.cc +++ b/tensorflow/core/common_runtime/collective_param_resolver_local.cc @@ -30,6 +30,7 @@ limitations under the License. #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/status.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/protobuf/config.pb.h" #include "tensorflow/core/util/device_name_utils.h" @@ -123,6 +124,14 @@ void CollectiveParamResolverLocal::CompleteGroupLocal( gr = it->second.get(); } } + { + mutex_lock l(status_mu_); + status = status_; + } + if (!status.ok()) { + done(status, nullptr); + return; + } { mutex_lock gr_lock(gr->mu); // If there is ever an error associated with a group key, we store the error @@ -598,6 +607,16 @@ void CollectiveParamResolverLocal::FindInstanceRec( instance_table_[cp->instance.instance_key].reset(irec); } } + Status status; + { + mutex_lock l(status_mu_); + status = status_; + } + if (!status.ok()) { + mutex_lock il(irec->out_mu); + irec->WaitForOutMu(il); + irec->status = status; + } if (exit_outside_locks) { CallbackWithStatus(done, irec); return; @@ -790,9 +809,12 @@ void CollectiveParamResolverLocal::WaitForGroup(InstanceRec* ir, bool is_source, const IRConsumer& f) { std::vector ready_waiters; - { + do { mutex_lock l(ir->out_mu); ir->WaitForOutMu(l); + if (!ir->status.ok()) { + break; + } CHECK_EQ(cp->group.group_size, ir->known.size()); CHECK_GE(cp->default_rank, 0); if (!ir->known[cp->default_rank]) { @@ -828,11 +850,62 @@ void CollectiveParamResolverLocal::WaitForGroup(InstanceRec* ir, if (!ir->known_waiters.empty()) { ready_waiters = std::move(ir->known_waiters); } - } + } while (false); f(ir); for (auto& f : ready_waiters) { f(ir); } } +void CollectiveParamResolverLocal::StartAbort(const Status& s) { + { + mutex_lock l(status_mu_); + if (!status_.ok()) { + VLOG(1) << "CollectiveParamResolverLocal already aborted. Ignoring " + "subsequent abortion with status: " + << s; + return; + } + status_ = s; + } + StartAbortLocal(s); +} + +void CollectiveParamResolverLocal::StartAbortLocal(const Status& s) { + { + mutex_lock l(group_mu_); + for (const auto& item : group_table_) { + GroupRec* gr = item.second.get(); + std::vector waiting; + { + mutex_lock gl(gr->mu); + gr->status = s; + waiting.swap(gr->waiting); + } + for (const StatusCallback& done : waiting) { + done(s); + } + } + } + std::vector instances; + { + mutex_lock l(instance_mu_); + for (const auto& item : instance_table_) { + instances.push_back(item.second.get()); + } + } + for (InstanceRec* ir : instances) { + std::vector known_waiters; + { + mutex_lock il(ir->out_mu); + ir->WaitForOutMu(il); + ir->status = s; + known_waiters.swap(ir->known_waiters); + } + for (const IRConsumer& done : known_waiters) { + done(ir); + } + } +} + } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/collective_param_resolver_local.h b/tensorflow/core/common_runtime/collective_param_resolver_local.h index c724ed93b7e..2b7528d6377 100644 --- a/tensorflow/core/common_runtime/collective_param_resolver_local.h +++ b/tensorflow/core/common_runtime/collective_param_resolver_local.h @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/core/framework/collective.h" #include "tensorflow/core/lib/gtl/flatmap.h" +#include "tensorflow/core/platform/thread_annotations.h" namespace tensorflow { class CompleteGroupRequest; @@ -58,6 +59,8 @@ class CollectiveParamResolverLocal : public ParamResolverInterface { CancellationManager* cancel_mgr, const StatusCallback& done) override; + void StartAbort(const Status& s) override; + protected: // For access to InstanceRec and CompleteDefaultRanking. friend class CollectiveParamResolverLocalTest; @@ -227,6 +230,9 @@ class CollectiveParamResolverLocal : public ParamResolverInterface { void CallbackWithStatus(const InstanceRecCallback& done, InstanceRec* irec) TF_LOCKS_EXCLUDED(irec->out_mu); + void StartAbortLocal(const Status& s) + TF_LOCKS_EXCLUDED(status_mu_, group_mu_, instance_mu_); + const bool nccl_; const DeviceMgr* dev_mgr_; DeviceResolverInterface* dev_resolver_; // Not owned. @@ -237,6 +243,8 @@ class CollectiveParamResolverLocal : public ParamResolverInterface { mutex instance_mu_; gtl::FlatMap> instance_table_ TF_GUARDED_BY(instance_mu_); + mutex status_mu_; + Status status_ TF_GUARDED_BY(status_mu_); }; } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/collective_param_resolver_local_test.cc b/tensorflow/core/common_runtime/collective_param_resolver_local_test.cc index ef85a1eed93..a998b5b1e48 100644 --- a/tensorflow/core/common_runtime/collective_param_resolver_local_test.cc +++ b/tensorflow/core/common_runtime/collective_param_resolver_local_test.cc @@ -14,15 +14,22 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/common_runtime/collective_param_resolver_local.h" +#include + #include "tensorflow/core/common_runtime/collective_executor_mgr.h" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/device_resolver_local.h" +#include "tensorflow/core/framework/cancellation.h" +#include "tensorflow/core/framework/collective.h" #include "tensorflow/core/lib/core/notification.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/blocking_counter.h" +#include "tensorflow/core/platform/random.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/protobuf/error_codes.pb.h" #include "tensorflow/core/public/session_options.h" namespace tensorflow { @@ -34,15 +41,20 @@ class CollectiveParamResolverLocalTest : public ::testing::Test { CollectiveParamResolverLocalTest() { ConfigProto cp; SessionOptions options; - string task_name = "/job:localhost/replica:0/task:0"; + task_name_ = "/job:localhost/replica:0/task:0"; auto* device_count = options.config.mutable_device_count(); device_count->insert({"CPU", NUM_DEVS}); std::vector> devices; - TF_CHECK_OK(DeviceFactory::AddDevices(options, task_name, &devices)); + TF_CHECK_OK(DeviceFactory::AddDevices(options, task_name_, &devices)); device_mgr_ = absl::make_unique(std::move(devices)); drl_.reset(new DeviceResolverLocal(device_mgr_.get())); + ResetParamResolver(); + } + + void ResetParamResolver() { + ConfigProto cp; prl_.reset(new CollectiveParamResolverLocal(cp, device_mgr_.get(), - drl_.get(), task_name)); + drl_.get(), task_name_)); } void RunCompleteDefaultRanking( @@ -74,6 +86,7 @@ class CollectiveParamResolverLocalTest : public ::testing::Test { } } + string task_name_; std::unique_ptr device_mgr_; std::unique_ptr drl_; std::unique_ptr prl_; @@ -287,4 +300,192 @@ TEST_F(CollectiveParamResolverLocalTest, CompleteParamsBroadcastForgotSender) { } } +CollectiveParams MakeCollectiveParams(int group_key, int instance_key, + bool is_source) { + CollectiveParams cp; + cp.group.group_key = group_key; + cp.group.group_size = NUM_DEVS; + cp.group.device_type = DeviceType("CPU"); + cp.group.num_tasks = 1; + cp.instance.instance_key = instance_key; + // CompleteInstanceLocal only waits for the group for broadcasts. + // Testing with broadcasts yields better coverage. + cp.instance.type = BROADCAST_COLLECTIVE; + cp.is_source = is_source; + return cp; +} + +TEST_F(CollectiveParamResolverLocalTest, AbortPendingGroup) { + CancellationManager cancel_mgr; + std::vector cp(NUM_DEVS - 1); + BlockingCounter start(NUM_DEVS - 1); + BlockingCounter done(NUM_DEVS - 1); + for (int i = 0; i < NUM_DEVS - 1; ++i) { + Env::Default()->SchedClosure([this, i, &cancel_mgr, &cp, &start, &done] { + string device = + strings::StrCat("/job:localhost/replica:0/task:0/device:CPU:", i); + cp[i] = MakeCollectiveParams(/*group_key*/ 100, /*instance_key*/ 100, + /*is_source*/ i == 0); + prl_->CompleteParamsAsync(device, &cp[i], &cancel_mgr, + [&done](const Status& s) { + EXPECT_EQ(s.code(), error::ABORTED); + EXPECT_EQ(s.error_message(), "__aborted__"); + done.DecrementCount(); + }); + start.DecrementCount(); + }); + } + start.Wait(); + prl_->StartAbort(Status(error::ABORTED, "__aborted__")); + done.Wait(); +} + +TEST_F(CollectiveParamResolverLocalTest, AbortPendingInstance) { + CancellationManager cancel_mgr; + std::vector cp(NUM_DEVS); + int group_key = 100; + int instance_key = 100; + // First do a normal CompleteParamsAsync to complete the group; + { + BlockingCounter done(NUM_DEVS); + for (int i = 0; i < NUM_DEVS; ++i) { + Env::Default()->SchedClosure([this, group_key, instance_key, i, + &cancel_mgr, &cp, &done] { + string device = + strings::StrCat("/job:localhost/replica:0/task:0/device:CPU:", i); + cp[i] = MakeCollectiveParams(group_key, instance_key, + /*is_source*/ i == 0); + prl_->CompleteParamsAsync(device, &cp[i], &cancel_mgr, + [&done](const Status& s) { + EXPECT_EQ(s.code(), error::OK); + done.DecrementCount(); + }); + }); + } + done.Wait(); + } + BlockingCounter start(NUM_DEVS - 1); + BlockingCounter done(NUM_DEVS - 1); + for (int i = 0; i < NUM_DEVS - 1; ++i) { + Env::Default()->SchedClosure( + [this, group_key, instance_key, i, &cancel_mgr, &cp, &start, &done] { + string device = + strings::StrCat("/job:localhost/replica:0/task:0/device:CPU:", i); + cp[i] = MakeCollectiveParams(group_key, instance_key + 1, + /*is_source*/ i == 0); + prl_->CompleteParamsAsync( + device, &cp[i], &cancel_mgr, [&done](const Status& s) { + EXPECT_EQ(s.code(), error::ABORTED); + EXPECT_EQ(s.error_message(), "__aborted__"); + done.DecrementCount(); + }); + start.DecrementCount(); + }); + } + start.Wait(); + prl_->StartAbort(Status(error::ABORTED, "__aborted__")); + done.Wait(); +} + +TEST_F(CollectiveParamResolverLocalTest, CompleteParamsAfterAbortion) { + CancellationManager cancel_mgr; + int group_key = 100; + int instance_key = 100; + // First do a normal CompleteParamsAsync to complete the group; + { + std::vector cp(NUM_DEVS); + BlockingCounter done(NUM_DEVS); + for (int i = 0; i < NUM_DEVS; ++i) { + Env::Default()->SchedClosure([this, group_key, instance_key, i, + &cancel_mgr, &cp, &done] { + string device = + strings::StrCat("/job:localhost/replica:0/task:0/device:CPU:", i); + cp[i] = MakeCollectiveParams(group_key, instance_key, + /*is_source*/ i == 0); + prl_->CompleteParamsAsync(device, &cp[i], &cancel_mgr, + [&done](const Status& s) { + EXPECT_EQ(s.code(), error::OK); + done.DecrementCount(); + }); + }); + } + done.Wait(); + } + prl_->StartAbort(Status(error::ABORTED, "__aborted__")); + + auto complete_params = [this, &cancel_mgr](int group_key, int instance_key) { + string device = "/job:localhost/replica:0/task:0/device:CPU:0"; + Notification done; + auto cp = MakeCollectiveParams(group_key, instance_key, + /*is_source*/ true); + prl_->CompleteParamsAsync(device, &cp, &cancel_mgr, + [&done](const Status& s) { + EXPECT_EQ(s.code(), error::ABORTED); + EXPECT_EQ(s.error_message(), "__aborted__"); + done.Notify(); + }); + done.WaitForNotification(); + }; + // It should error without waiting for the all following combinations: + // - existing group, existing instance + complete_params(group_key, instance_key); + // - existing group, new instance + complete_params(group_key, instance_key + 1); + // - new group, new instance + complete_params(group_key + 1, instance_key + 1); +} + +TEST_F(CollectiveParamResolverLocalTest, AbortNormalCompleteParamsAsync) { + // The concurrent nature makes it hard to test abortion, which can happen at + // any moment. We don't have good options to inject control points into the + // code to explicitly test every possible scenarios, so we run the test for + // many times to have a better chance to cover different cases. + CancellationManager cancel_mgr; + std::atomic num_ok = 0; + for (int cnt = 0; cnt < 100; ++cnt) { + // Launching threads that keep doing CompleteInstanceLocal. + BlockingCounter done(NUM_DEVS); + for (int i = 0; i < NUM_DEVS; ++i) { + string device = + strings::StrCat("/job:localhost/replica:0/task:0/device:CPU:", i); + Env::Default()->SchedClosure( + [this, i, device, &num_ok, &cancel_mgr, &done] { + int key = 100; + while (true) { + Status status; + Notification n; + auto cp = + MakeCollectiveParams(/* group_key*/ key, /*instance_key*/ key, + /*is_source*/ i == 0); + prl_->CompleteParamsAsync(device, &cp, &cancel_mgr, + [&status, &n](const Status& s) { + status = s; + n.Notify(); + }); + n.WaitForNotification(); + // The status should be either OK or the aborted status. + if (!status.ok()) { + EXPECT_EQ(status.code(), error::ABORTED); + EXPECT_EQ(status.error_message(), "__aborted__"); + done.DecrementCount(); + return; + } + ++num_ok; + ++key; + } + }); + } + // Introduce a random delay up to 50ms, so that we're more likely to abort + // on different code points each time. + int64 delay_ms = random::New64() % 50000; + Env::Default()->SleepForMicroseconds(delay_ms); + prl_->StartAbort(Status(error::ABORTED, "__aborted__")); + done.Wait(); + ResetParamResolver(); + } + // There should be at least a few successes, otherwise the delay may be too + // short and may not cover certain stages of param resolution. + EXPECT_GT(num_ok.load(), 50); +} + } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/test_collective_executor_mgr.h b/tensorflow/core/common_runtime/test_collective_executor_mgr.h index be99be3a738..22694120403 100644 --- a/tensorflow/core/common_runtime/test_collective_executor_mgr.h +++ b/tensorflow/core/common_runtime/test_collective_executor_mgr.h @@ -53,6 +53,30 @@ class TestCollectiveExecutor : public CollectiveExecutor { } }; +class TestParamResolver : public ParamResolverInterface { + void CompleteParamsAsync(const string& device, CollectiveParams* cp, + CancellationManager* cancel_mgr, + const StatusCallback& done) override { + done(errors::Internal("Unimplemented")); + } + + void CompleteGroupAsync(const CompleteGroupRequest* request, + CompleteGroupResponse* response, + CancellationManager* cancel_mgr, + const StatusCallback& done) override { + done(errors::Internal("Unimplemented")); + } + + void CompleteInstanceAsync(const CompleteInstanceRequest* request, + CompleteInstanceResponse* response, + CancellationManager* cancel_mgr, + const StatusCallback& done) override { + done(errors::Internal("Unimplemented")); + } + + void StartAbort(const Status& s) override { return; } +}; + class TestCollectiveExecutorMgr : public CollectiveExecutorMgrInterface { public: TestCollectiveExecutorMgr() {} @@ -87,8 +111,7 @@ class TestCollectiveExecutorMgr : public CollectiveExecutorMgrInterface { } ParamResolverInterface* GetParamResolver() const override { - LOG(FATAL); - return nullptr; + return ¶m_resolver_; } DeviceResolverInterface* GetDeviceResolver() const override { @@ -115,6 +138,7 @@ class TestCollectiveExecutorMgr : public CollectiveExecutorMgrInterface { mutex mu_; gtl::FlatMap table_ TF_GUARDED_BY(mu_); + mutable TestParamResolver param_resolver_; }; } // namespace tensorflow diff --git a/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.cc b/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.cc index a8738291e5d..91878593fc9 100644 --- a/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.cc +++ b/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.cc @@ -220,31 +220,33 @@ Status CollectiveParamResolverDistributed::UpdateGroupCache( const CompleteGroupResponse& resp) { // Build a new record from resp. std::unique_ptr gr(new GroupRec); - mutex_lock grl(gr->mu); - gr->group.device_type = DeviceType(resp.device_type()); - gr->group.group_key = resp.group_key(); - gr->group.group_size = resp.group_size(); - gr->group.num_tasks = resp.num_tasks(); - if (resp.device_name_size() != gr->group.group_size) { - return errors::Internal( - "CompleteGroupResponse group_size doesn't match device_name list"); + { + mutex_lock grl(gr->mu); + gr->group.device_type = DeviceType(resp.device_type()); + gr->group.group_key = resp.group_key(); + gr->group.group_size = resp.group_size(); + gr->group.num_tasks = resp.num_tasks(); + if (resp.device_name_size() != gr->group.group_size) { + return errors::Internal( + "CompleteGroupResponse group_size doesn't match device_name list"); + } + for (const string& dn : resp.device_name()) { + gr->device_set.insert(dn); + gr->device_list.push_back(dn); + } + if (resp.task_name_size() != gr->group.group_size) { + return errors::Internal( + "CompleteGroupResponse group_size doesn't match task_name list"); + } + for (const string& tn : resp.task_name()) { + gr->task_list.push_back(tn); + gr->task_set.insert(tn); + } + CHECK_EQ(gr->task_set.size(), gr->group.num_tasks); + gr->group.runtime_details.communicator_key = resp.communicator_key(); + VLOG(2) << "Group communicator_key=" + << absl::CEscape(gr->group.runtime_details.communicator_key); } - for (const string& dn : resp.device_name()) { - gr->device_set.insert(dn); - gr->device_list.push_back(dn); - } - if (resp.task_name_size() != gr->group.group_size) { - return errors::Internal( - "CompleteGroupResponse group_size doesn't match task_name list"); - } - for (const string& tn : resp.task_name()) { - gr->task_list.push_back(tn); - gr->task_set.insert(tn); - } - CHECK_EQ(gr->task_set.size(), gr->group.num_tasks); - gr->group.runtime_details.communicator_key = resp.communicator_key(); - VLOG(2) << "Group communicator_key=" - << absl::CEscape(gr->group.runtime_details.communicator_key); { // Group membership should never change. Once a record is in group_table_ // it never gets removed. diff --git a/tensorflow/core/framework/collective.h b/tensorflow/core/framework/collective.h index 24507b901a7..72920cfaa08 100644 --- a/tensorflow/core/framework/collective.h +++ b/tensorflow/core/framework/collective.h @@ -188,6 +188,9 @@ class ParamResolverInterface { CompleteInstanceResponse* response, CancellationManager* cancel_mgr, const StatusCallback& done) = 0; + + // Aborts the resolver. After abortion the resolver can no longer be used. + virtual void StartAbort(const Status& s) = 0; }; // Graphs which utilize Collective Ops in a common instance must diff --git a/tensorflow/python/ops/collective_ops_test.py b/tensorflow/python/ops/collective_ops_test.py index da553a0012c..dd788e911f7 100644 --- a/tensorflow/python/ops/collective_ops_test.py +++ b/tensorflow/python/ops/collective_ops_test.py @@ -43,6 +43,10 @@ from tensorflow.python.platform import tf_logging as logging class CollectiveOpTest(test.TestCase): + def setUp(self): + context._reset_context() # pylint: disable=protected-access + super(CollectiveOpTest, self).setUp() + def _testCollectiveReduce(self, inputs, expected, @@ -165,7 +169,6 @@ class CollectiveOpTest(test.TestCase): @test_util.run_v2_only def testCollectiveTimeoutV2(self): - context._reset_context() timeout = 4.5 cpus = config.list_physical_devices('CPU') self.assertEqual(len(cpus), 1) @@ -208,7 +211,6 @@ class CollectiveOpTest(test.TestCase): @test_util.run_v2_only def testParamResolutionAfterTimeoutV2(self): - context._reset_context() timeout = 1.5 cpus = config.list_physical_devices('CPU') self.assertEqual(len(cpus), 1) @@ -531,7 +533,6 @@ class CollectiveOpTest(test.TestCase): @test_util.run_v2_only def testCollectiveGroupSizeMismatch(self): - context._reset_context() cpus = config.list_physical_devices('CPU') self.assertEqual(len(cpus), 1) config.set_logical_device_configuration(cpus[0], [ @@ -564,7 +565,6 @@ class CollectiveOpTest(test.TestCase): @test_util.run_v2_only def testCollectiveTensorsHaveNoDeviceSpecified(self): - context._reset_context() cpus = config.list_physical_devices('CPU') self.assertEqual(len(cpus), 1) config.set_logical_device_configuration(cpus[0], [ @@ -657,7 +657,6 @@ class CollectiveOpTest(test.TestCase): @test_util.run_v2_only def testMultipleGroups(self): - context._reset_context() cpus = config.list_physical_devices('CPU') self.assertEqual(len(cpus), 1) config.set_logical_device_configuration(cpus[0], [ @@ -690,6 +689,115 @@ class CollectiveOpTest(test.TestCase): run_and_assert(group_size=2, group_key=1) run_and_assert(group_size=3, group_key=2) + @test_util.run_v2_only + def testAbortGroupParamsResolution(self): + group_size = 2 + group_key = 100 + instance_key = 100 + in_tensor = constant_op.constant(1.) + + def abort_fn(): + time.sleep(2) + context.context().abort_collective_ops(errors.UNAVAILABLE, 'peer down') + + t = threading.Thread(target=abort_fn) + t.start() + + with self.assertRaisesRegex(errors.UnavailableError, 'peer down'): + # This hangs on params resolution since we're only launching one + # collective for a group size of 2. + collective_ops.all_reduce(in_tensor, group_size, group_key, instance_key, + 'Add', 'Id') + + # After abortion, subsequent collectives should fail immediately. + with self.assertRaisesRegex(errors.UnavailableError, 'peer down'): + collective_ops.all_reduce(in_tensor, group_size, group_key, instance_key, + 'Add', 'Id') + + # Reset the context in order to reset the collective executor. + context._reset_context() # pylint: disable=protected-access + t.join() + + # After reset non-NCCL collectives should work. + cpus = config.list_physical_devices('CPU') + config.set_logical_device_configuration(cpus[0], [ + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration() + ]) + + def collective_fn(): + for device in ['CPU:0', 'CPU:1']: + with ops.device(device): + collective_ops.all_reduce( + in_tensor, + group_size, + group_key, + instance_key, + 'Add', + 'Id', + communication_hint='ring') + + def_function.function(collective_fn)() + + @test_util.run_v2_only + def testAbortInstanceParamsResolution(self): + cpus = config.list_physical_devices('CPU') + config.set_logical_device_configuration(cpus[0], [ + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration() + ]) + group_size = 2 + group_key = 100 + instance_key = 100 + in_tensor = constant_op.constant(1.) + + def collective_fn(): + for device in ['CPU:0', 'CPU:1']: + with ops.device(device): + collective_ops.all_reduce( + in_tensor, + group_size, + group_key, + instance_key, + 'Add', + 'Id', + communication_hint='ring') + + # First perform a normal all-reduce to complete the group resolution. + def_function.function(collective_fn)() + + def abort_fn(): + time.sleep(2) + context.context().abort_collective_ops(errors.UNAVAILABLE, 'peer down') + + t = threading.Thread(target=abort_fn) + t.start() + + # Use a different instance key to trigger another instance resolution. + instance_key = 101 + with self.assertRaisesRegex(errors.UnavailableError, 'peer down'): + # This hangs on params resolution since we're only launching one + # collective for a group size of 2. + collective_ops.all_reduce(in_tensor, group_size, group_key, instance_key, + 'Add', 'Id') + + # After abortion, subsequent collectives should fail immediately. + with self.assertRaisesRegex(errors.UnavailableError, 'peer down'): + collective_ops.all_reduce(in_tensor, group_size, group_key, instance_key, + 'Add', 'Id') + + # Reset the context in order to reset the collective executor. + context._reset_context() # pylint: disable=protected-access + t.join() + + # After reset non-NCCL collectives should work. + cpus = config.list_physical_devices('CPU') + config.set_logical_device_configuration(cpus[0], [ + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration() + ]) + def_function.function(collective_fn)() + @test_util.run_v2_only def testAbortRing(self): cpus = config.list_physical_devices('CPU') From 7851937693936a068339da83734d956b80361b83 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Jul 2020 15:29:24 -0700 Subject: [PATCH 0980/2522] Integrate LLVM at llvm/llvm-project@8a268bec1b02 Updates LLVM usage to match [8a268bec1b02](https://github.com/llvm/llvm-project/commit/8a268bec1b02) PiperOrigin-RevId: 322455753 Change-Id: Ibe7b366b29711f8a2761a9876fbb66655eb691a8 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 2502f48e895..9585dfa3d95 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -711,8 +711,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "61dd481f11051450522bcd2cfcb7222a90d3464e" - LLVM_SHA256 = "3604007894e3dc73e166b6d70cefe5ee06e1e4fc1b1ae33ca75077498200881c" + LLVM_COMMIT = "8a268bec1b02dd446fbc36e20d0a9af45d764f67" + LLVM_SHA256 = "95fd17a9235584e6ea17d7733458557846f901f904fa2ca1d751ea1ad6b49635" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From f111bc3762ca1223efe0da5faeb098e2d6f4d5b6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Jul 2020 15:31:11 -0700 Subject: [PATCH 0981/2522] Update ops-related pbtxt files. PiperOrigin-RevId: 322456097 Change-Id: I9b5e60c24548993fbe2c7bffbad34fc9dbf8b579 --- .../ops/compat/ops_history_v2/ComputeBatchSize.pbtxt | 11 +++++++++++ tensorflow/core/ops/ops.pbtxt | 11 +++++++++++ 2 files changed, 22 insertions(+) create mode 100644 tensorflow/core/ops/compat/ops_history_v2/ComputeBatchSize.pbtxt diff --git a/tensorflow/core/ops/compat/ops_history_v2/ComputeBatchSize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/ComputeBatchSize.pbtxt new file mode 100644 index 00000000000..13ab4eef4d0 --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/ComputeBatchSize.pbtxt @@ -0,0 +1,11 @@ +op { + name: "ComputeBatchSize" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + output_arg { + name: "batch_size" + type: DT_INT64 + } +} diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index c9ad5aa2dc6..eebf19918b4 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -7891,6 +7891,17 @@ op { } } } +op { + name: "ComputeBatchSize" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + output_arg { + name: "batch_size" + type: DT_INT64 + } +} op { name: "Concat" input_arg { From 318340f1cf091b3ebae2c24c58094f52ccd2fc5d Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Tue, 21 Jul 2020 15:44:27 -0700 Subject: [PATCH 0982/2522] Internal CI changes PiperOrigin-RevId: 322458526 Change-Id: Iac4fdf69f8f27ad86d663cb22272f5a93b71f65a --- .../macos/{cpu_libtensorflow_release.sh => cpu_libtensorflow.sh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tensorflow/tools/ci_build/per_release/macos/{cpu_libtensorflow_release.sh => cpu_libtensorflow.sh} (100%) diff --git a/tensorflow/tools/ci_build/per_release/macos/cpu_libtensorflow_release.sh b/tensorflow/tools/ci_build/per_release/macos/cpu_libtensorflow.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/macos/cpu_libtensorflow_release.sh rename to tensorflow/tools/ci_build/per_release/macos/cpu_libtensorflow.sh From 145d21a90dc65b49a190e85e5478a8b2124f2ecd Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Tue, 21 Jul 2020 15:58:05 -0700 Subject: [PATCH 0983/2522] Open source distributed_tpu_rewrite_pass.cc and associated helper methods PiperOrigin-RevId: 322460893 Change-Id: I8ca6164e8c4ce2b6d6e79db66fbb028305634ca5 --- tensorflow/core/tpu/graph_rewrite/BUILD | 118 + .../core/tpu/graph_rewrite/cond_builder.cc | 83 + .../core/tpu/graph_rewrite/cond_builder.h | 74 + .../distributed_tpu_rewrite_pass.cc | 4105 +++++++++++++++++ .../distributed_tpu_rewrite_pass.h | 589 +++ .../distributed_tpu_rewrite_pass_internal.cc | 45 + .../distributed_tpu_rewrite_pass_internal.h | 38 + .../host_training_loop_optimization_util.cc | 629 +++ .../host_training_loop_optimization_util.h | 80 + .../incomplete_nodedef_builder.cc | 73 + .../incomplete_nodedef_builder.h | 58 + .../tpu_rewrite_pass_registration.cc | 4 +- .../stream_executor/multi_platform_manager.cc | 18 +- .../stream_executor/multi_platform_manager.h | 4 + tensorflow/stream_executor/tpu/BUILD | 1 + .../tpu/tpu_platform_interface.cc | 27 +- .../tpu/tpu_platform_interface.h | 3 + tensorflow/stream_executor/tpu/tpu_topology.h | 1 + 18 files changed, 5939 insertions(+), 11 deletions(-) create mode 100644 tensorflow/core/tpu/graph_rewrite/cond_builder.cc create mode 100644 tensorflow/core/tpu/graph_rewrite/cond_builder.h create mode 100644 tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc create mode 100644 tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h create mode 100644 tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.cc create mode 100644 tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.h create mode 100644 tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.cc create mode 100644 tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.h create mode 100644 tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.cc create mode 100644 tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.h diff --git a/tensorflow/core/tpu/graph_rewrite/BUILD b/tensorflow/core/tpu/graph_rewrite/BUILD index 69238456d57..bffb44c1b97 100644 --- a/tensorflow/core/tpu/graph_rewrite/BUILD +++ b/tensorflow/core/tpu/graph_rewrite/BUILD @@ -13,6 +13,7 @@ cc_library( srcs = ["tpu_rewrite_pass_registration.cc"], deps = [ ":distributed_tpu_configuration_rewrite_pass", + ":distributed_tpu_rewrite_pass", ":encapsulate_tpu_computations_pass", ":variable_merger_pass", "//tensorflow/core:core_cpu", @@ -101,3 +102,120 @@ cc_library( "@com_google_absl//absl/strings", ], ) + +cc_library( + name = "distributed_tpu_rewrite_pass_internal", + srcs = ["distributed_tpu_rewrite_pass_internal.cc"], + hdrs = ["distributed_tpu_rewrite_pass_internal.h"], + deps = [ + "//tensorflow/core:framework", + "@com_google_absl//absl/random", + ], +) + +cc_library( + name = "distributed_tpu_rewrite_pass", + srcs = [ + "distributed_tpu_rewrite_pass.cc", + ], + hdrs = [ + "distributed_tpu_rewrite_pass.h", + ], + deps = [ + ":cond_builder", + ":distributed_tpu_rewrite_helpers", + ":distributed_tpu_rewrite_pass_internal", + ":host_training_loop_optimization_util", + ":incomplete_nodedef_builder", + "//tensorflow/compiler/jit:encapsulate_util", + "//tensorflow/compiler/jit:shape_inference", + "//tensorflow/compiler/tf2xla:resource_operation_table", + "//tensorflow/compiler/tf2xla:sharding_util", + "//tensorflow/compiler/tf2xla:side_effect_util", + "//tensorflow/compiler/tf2xla:tf2xla_util", + "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/xla:array3d", + "//tensorflow/compiler/xla:array4d", + "//tensorflow/compiler/xla:xla_proto_cc", + "//tensorflow/compiler/xla/client:sharding_builder", + "//tensorflow/compiler/xla/service:computation_placer", + "//tensorflow/core:framework", + "//tensorflow/core:graph", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:session_options", + "//tensorflow/core/common_runtime:function", + "//tensorflow/core/common_runtime:graph_constructor", + "//tensorflow/core/common_runtime:lower_function_call_op", + "//tensorflow/core/common_runtime:lower_functional_ops", + "//tensorflow/core/common_runtime:lower_if_op", + "//tensorflow/core/common_runtime:lower_while_op", + "//tensorflow/core/common_runtime:optimization_registry", + "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", + "//tensorflow/core/protobuf/tpu:dynamic_padding_proto_cc", + "//tensorflow/core/protobuf/tpu:topology_proto_cc", + "//tensorflow/core/tpu:tpu_compile_interface", + "//tensorflow/core/tpu:tpu_defs", + "//tensorflow/core/tpu/kernels:tpu_util_c_api_hdrs", + "//tensorflow/stream_executor/tpu:tpu_platform_interface", + "//tensorflow/stream_executor/tpu:tpu_topology_external", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:node_hash_map", + "@com_google_absl//absl/types:span", + ], +) + +cc_library( + name = "incomplete_nodedef_builder", + srcs = ["incomplete_nodedef_builder.cc"], + hdrs = ["incomplete_nodedef_builder.h"], + deps = [ + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/core:core_cpu", + "//tensorflow/core:core_cpu_lib", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + ], +) + +cc_library( + name = "cond_builder", + srcs = ["cond_builder.cc"], + hdrs = ["cond_builder.h"], + deps = [ + ":incomplete_nodedef_builder", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/core:core_cpu", + "//tensorflow/core:core_cpu_lib", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + ], +) + +cc_library( + name = "host_training_loop_optimization_util", + srcs = [ + "host_training_loop_optimization_util.cc", + ], + hdrs = [ + "host_training_loop_optimization_util.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":distributed_tpu_rewrite_pass_internal", + "//tensorflow/compiler/tf2xla:functionalize_control_flow_util", + "//tensorflow/compiler/tf2xla:tf2xla_util", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework_internal", + "//tensorflow/core:graph", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/container:node_hash_set", + "@com_google_absl//absl/types:optional", + ], +) diff --git a/tensorflow/core/tpu/graph_rewrite/cond_builder.cc b/tensorflow/core/tpu/graph_rewrite/cond_builder.cc new file mode 100644 index 00000000000..e16ae08aec3 --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/cond_builder.cc @@ -0,0 +1,83 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/graph_rewrite/cond_builder.h" + +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.h" + +namespace tensorflow { + +CondBuilder::CondBuilder(string name, string device, const NodeDebugInfo& debug, + Graph* graph) + : graph_(graph), name_(std::move(name)), device_(std::move(device)) { + auto new_name = [graph, this](string suffix) { + return graph->NewName(strings::StrCat(name_, "/", suffix)); + }; + TF_CHECK_OK( + IncompleteNodeDefBuilder::Identity(new_name("pred"), DT_BOOL, debug) + .Device(device_) + .Build(graph_, &pred_)); + Node* switch_pred; + TF_CHECK_OK( + IncompleteNodeDefBuilder::Switch(new_name("switch_pred"), DT_BOOL, debug) + .Device(device_) + .Build(graph_, &switch_pred)); + graph_->AddEdge(pred(), 0, switch_pred, 0); + graph_->AddEdge(pred(), 0, switch_pred, 1); + TF_CHECK_OK( + IncompleteNodeDefBuilder::Identity(new_name("switch_f"), DT_BOOL, debug) + .Device(device_) + .Build(graph_, &switch_f_)); + TF_CHECK_OK( + IncompleteNodeDefBuilder::Identity(new_name("switch_t"), DT_BOOL, debug) + .Device(device_) + .Build(graph_, &switch_t_)); + graph_->AddEdge(switch_pred, kElseBranch, switch_f_, 0); + graph_->AddEdge(switch_pred, kThenBranch, switch_t_, 0); + Node* merge_pred; + TF_CHECK_OK(IncompleteNodeDefBuilder::Merge(new_name("merge_pred"), DT_BOOL, + debug, /*n=*/2) + .Device(device_) + .Build(graph_, &merge_pred)); + graph_->AddEdge(switch_f_, 0, merge_pred, kElseBranch); + graph_->AddEdge(switch_t_, 0, merge_pred, kThenBranch); + // Note: when additional return values are added then there should be a + // control dependency between those merge nodes and control_successor_ to + // ensure that it is control successor of conditional. + control_successor_ = merge_pred; +} + +Node* CondBuilder::pred() { return pred_; } + +Node* CondBuilder::switch_f() { return switch_f_; } + +Node* CondBuilder::switch_t() { return switch_t_; } + +Node* CondBuilder::control_successor() { return control_successor_; } + +Status CondBuilder::AddInput(const string& input_name, const DataType& type, + const string& device, const NodeDebugInfo& debug, + Node** input) { + auto b = IncompleteNodeDefBuilder::Switch( + graph_->NewName(strings::StrCat(name_, "/", input_name)), type, debug); + TF_RETURN_IF_ERROR(b.Device(device).Build(graph_, input)); + graph_->AddEdge(pred(), 0, *input, 1); + return Status::OK(); +} + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/cond_builder.h b/tensorflow/core/tpu/graph_rewrite/cond_builder.h new file mode 100644 index 00000000000..29e264dfc0a --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/cond_builder.h @@ -0,0 +1,74 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_TPU_GRAPH_REWRITE_COND_BUILDER_H_ +#define TENSORFLOW_CORE_TPU_GRAPH_REWRITE_COND_BUILDER_H_ + +#include + +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +// Conditional builder. +// Convenience builder to make it easy to construct a conditional. E.g., +// Node* pred = ...; +// CondBuilder cb("cond", g); +// auto switch_var = cb.AddInput("var", DT_RESOURCE); +// g->AddEdge(pred, 0, cb.pred(), 0); +// Will create the nodes of a conditional that takes as input a resource +// variable ("var") as input and that switches on pred. +// +// This currently only handles the case needed by distributed_tpu_rewrite_pass +// and is not completely general. +class CondBuilder { + public: + enum Branch { kElseBranch = 0, kThenBranch = 1 }; + + CondBuilder(string name, string device, const NodeDebugInfo& debug, + Graph* graph); + + // Returns node corresponding to the predicate input. + Node* pred(); + + // Returns node corresponding to switch_f branch of predicate switch. + Node* switch_f(); + + // Returns node corresponding to switch_t branch of predicate switch. + Node* switch_t(); + + // Returns node corresponding to control successor. + Node* control_successor(); + + // Returns the Switch node to feed a value of the given type into the + // conditional. + Status AddInput(const string& input_name, const DataType& type, + const string& device, const NodeDebugInfo& debug, + Node** input); + + private: + Node* control_successor_; + Node* switch_f_; + Node* switch_t_; + Node* pred_; + Graph* const graph_; + const string name_; + const string device_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_GRAPH_REWRITE_COND_BUILDER_H_ diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc new file mode 100644 index 00000000000..f0032f5dfd9 --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc @@ -0,0 +1,4105 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Compilation for distributed TPU (TPU_REPLICATED_CORE devices). + +#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h" + +#include +#include + +#include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" +#include "tensorflow/compiler/jit/encapsulate_util.h" +#include "tensorflow/compiler/tf2xla/resource_operation_table.h" +#include "tensorflow/compiler/tf2xla/sharding_util.h" +#include "tensorflow/compiler/tf2xla/side_effect_util.h" +#include "tensorflow/compiler/tf2xla/tf2xla_util.h" +#include "tensorflow/compiler/xla/array3d.h" +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/client/sharding_builder.h" +#include "tensorflow/compiler/xla/service/computation_placer.h" +#include "tensorflow/compiler/xla/xla.pb.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/graph_constructor.h" +#include "tensorflow/core/common_runtime/lower_function_call_op.h" +#include "tensorflow/core/common_runtime/lower_functional_ops.h" +#include "tensorflow/core/common_runtime/lower_if_op.h" +#include "tensorflow/core/common_runtime/lower_while_op.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/graph_to_functiondef.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/framework/versions.pb.h" +#include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/gtl/cleanup.h" +#include "tensorflow/core/lib/strings/proto_serialization.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/fingerprint.h" +#include "tensorflow/core/protobuf/tpu/compile_metadata.pb.h" +#include "tensorflow/core/protobuf/tpu/dynamic_padding.pb.h" +#include "tensorflow/core/protobuf/tpu/topology.pb.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/tpu/graph_rewrite/cond_builder.h" +#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_helpers.h" +#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.h" +#include "tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.h" +#include "tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.h" +#include "tensorflow/core/tpu/kernels/tpu_util_c_api.h" +#include "tensorflow/core/tpu/tpu_compile_interface.h" +#include "tensorflow/core/tpu/tpu_defs.h" +#include "tensorflow/core/util/device_name_utils.h" +#include "tensorflow/core/util/dump_graph.h" +#include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" + +namespace tensorflow { + +namespace { + +// Device coordinates are defined as (x, y, z, core), thus resulting in a rank 4 +// topology. +constexpr int kTPUTopologyRank = 4; + +// An upper bound on how many cores may be present in the topology. +static constexpr int kTPUMaxTopologySize = 4096; + +// Attribute containing the serialized xla::OpSharding to be passed to the +// corresponding XLA HLO operation, which represents how a shape is distributed +// across logical cores, e.g., replication, single-device, or partitioning. +const char kShardingAttribute[] = "_XlaSharding"; + +const char kTPUPartitionedInput[] = "TPUPartitionedInput"; +const char kTPUPartitionedOutput[] = "TPUPartitionedOutput"; + +static const char* const kTPUCompilationResultAttr = "_tpu_compilation_status"; +static const char* const kPostDeviceRewriteAttr = "_post_device_rewrite"; + +class IntrusiveHeapLink { + public: + using size_type = size_t; + static constexpr size_type kNotMember = -1; + + IntrusiveHeapLink() = default; + + // Only IntrusiveHeap and LinkAccess objects should make these objects. + explicit IntrusiveHeapLink(size_type pos) : pos_{pos} {} + + // Only IntrusiveHeap and LinkAccess should get the value. + size_type get() const { return pos_; } + + private: + size_type pos_{kNotMember}; +}; + +template +struct IntrusiveHeapDataMemberLinkAccess { + IntrusiveHeapLink Get(const T* elem) const { return elem->*M; } + void Set(T* elem, IntrusiveHeapLink link) const { elem->*M = link; } +}; + +template +struct DefaultIntrusiveHeapLinkAccess { + IntrusiveHeapLink Get(const T* elem) const { return elem->heap; } + void Set(T* elem, IntrusiveHeapLink link) const { elem->heap = link; } +}; + +template , + typename Alloc = std::allocator> +class IntrusiveHeap { + public: + typedef typename IntrusiveHeapLink::size_type size_type; + typedef T value_type; + typedef T* pointer; + typedef const T* const_pointer; + typedef PtrCompare pointer_compare_type; + typedef LinkAccess link_access_type; + typedef Alloc allocator_type; + + explicit IntrusiveHeap( + const pointer_compare_type& comp = pointer_compare_type(), + const link_access_type& link_access = link_access_type(), + const allocator_type& alloc = allocator_type()) + : rep_(comp, link_access, alloc) {} + + size_type size() const { return heap().size(); } + + bool empty() const { return heap().empty(); } + + // Return the top element, but don't remove it. + pointer top() const { + DCHECK(!empty()); + return heap()[0]; + } + + // Remove the top() pointer from the heap and return it. + pointer Pop() { + pointer t = top(); + Remove(t); + return t; + } + + // Insert 't' into the heap. + void Push(pointer t) { + SetPositionOf(t, heap().size()); + heap().push_back(t); + FixHeapUp(t); + } + + // Adjust the heap to accommodate changes in '*t'. + void Adjust(pointer t) { + DCHECK(Contains(t)); + size_type h = GetPositionOf(t); + if (h != 0 && compare()(t, heap()[(h - 1) >> 1])) { + FixHeapUp(t); + } else { + FixHeapDown(t); + } + } + + // Remove the specified pointer from the heap. + void Remove(pointer t) { + DCHECK(Contains(t)); + size_type h = GetPositionOf(t); + SetPositionOf(t, IntrusiveHeapLink::kNotMember); + if (h == heap().size() - 1) { + // Fast path for removing from back of heap. + heap().pop_back(); + return; + } + // Move the element from the back of the heap to overwrite 't'. + pointer& elem = heap()[h]; + elem = heap().back(); + SetPositionOf(elem, h); // Element has moved, so update its link. + heap().pop_back(); + Adjust(elem); // Restore the heap invariant. + } + + void Clear() { heap().clear(); } + + bool Contains(const_pointer t) const { + size_type h = GetPositionOf(t); + return (h != IntrusiveHeapLink::kNotMember) && (h < size()) && + heap()[h] == t; + } + + void reserve(size_type n) { heap().reserve(n); } + + size_type capacity() const { return heap().capacity(); } + + allocator_type get_allocator() const { return rep_.heap_.get_allocator(); } + + private: + typedef std::vector heap_type; + + // Empty base class optimization for pointer_compare and link_access. + // The heap_ data member retains a copy of the allocator, so it is not + // stored explicitly. + struct Rep : pointer_compare_type, link_access_type { + explicit Rep(const pointer_compare_type& cmp, + const link_access_type& link_access, + const allocator_type& alloc) + : pointer_compare_type(cmp), + link_access_type(link_access), + heap_(alloc) {} + heap_type heap_; // NOLINT + }; + + const pointer_compare_type& compare() const { return rep_; } + + const link_access_type& link_access() const { return rep_; } + + const heap_type& heap() const { return rep_.heap_; } + heap_type& heap() { return rep_.heap_; } + + size_type GetPositionOf(const_pointer t) const { + return link_access().Get(t).get(); + } + + void SetPositionOf(pointer t, size_type pos) const { + return link_access().Set(t, IntrusiveHeapLink(pos)); + } + + void FixHeapUp(pointer t) { + size_type h = GetPositionOf(t); + while (h != 0) { + size_type parent = (h - 1) >> 1; + if (compare()(heap()[parent], t)) { + break; + } + heap()[h] = heap()[parent]; + SetPositionOf(heap()[h], h); + h = parent; + } + heap()[h] = t; + SetPositionOf(t, h); + } + + void FixHeapDown(pointer t) { + size_type h = GetPositionOf(t); + for (;;) { + size_type kid = (h << 1) + 1; + if (kid >= heap().size()) { + break; + } + if (kid + 1 < heap().size() && compare()(heap()[kid + 1], heap()[kid])) { + ++kid; + } + if (compare()(t, heap()[kid])) { + break; + } + heap()[h] = heap()[kid]; + SetPositionOf(heap()[h], h); + h = kid; + } + + heap()[h] = t; + SetPositionOf(t, h); + } + + Rep rep_; +}; + +string CoreDeviceLabel(int core) { + return strings::StrCat("/device:", DEVICE_TPU_REPLICATED_CORE, ":", core); +} + +// Creates a unique node name with a particular prefix. +string UniqueNodeName(const StringPiece prefix, Graph* graph) { + return graph->NewName(strings::StrCat(prefix, "/_", internal::GetNodeId())); +} + +Status SetNodeDeviceForTPUCommunication(DeviceNameUtils::ParsedName device, + const string& target_device_type, + Node* node) { + TF_RET_CHECK(device.has_type && device.type == DEVICE_TPU_NODE); + TF_RET_CHECK(device.has_id); + TF_RET_CHECK(HasNodeAttr(node->def(), kXlaHasHostTransferAttrName)); + + // Store the device instance as an attr on the Node. + TF_RETURN_IF_ERROR(SetDeviceOrdinalAttributeForNode(node, device.id)); + + // Place the execute Op on the TPU_SYSTEM device so it can access the cache of + // compiled protos in the resource manager. + device.type = target_device_type; + device.id = 0; + + node->set_assigned_device_name(DeviceNameUtils::ParsedNameToString(device)); + return Status::OK(); +} + +// Iterate over the nodes in the original graph and find all the TPUReplicate +// nodes, and all the nodes that are part of outside_compilation clusters. +Status FindTaggedNodes( + Graph* graph, std::vector* replicate_nodes, + std::map* + outside_compilation_nodes, + std::map>* head_tail_outside_compilation_nodes) { + for (Node* node : graph->op_nodes()) { + if (node->type_string() == "_TPUReplicate") { + replicate_nodes->push_back(node); + const AttrValue* cluster_attr = node->attrs().Find(kTPUReplicateAttr); + if (cluster_attr == nullptr) { + return errors::Internal("TPUReplicate node ", node->name(), " has no ", + kTPUReplicateAttr, " attr."); + } else { + const string& cluster = cluster_attr->s(); + if (cluster.empty()) { + return errors::Internal("Attr ", kTPUReplicateAttr, " on node ", + node->name(), " has no string value."); + } + if (outside_compilation_nodes->find(cluster) != + outside_compilation_nodes->end()) { + return errors::Internal( + "TPUReplicate node ", node->name(), " has ", kTPUReplicateAttr, + " attr value '", cluster, + "' which is a duplicate of another TPUReplicate node in the " + "graph."); + } + (*outside_compilation_nodes)[cluster] = + DistributedTPURewritePass::OutsideCompilationNodeMap(); + (*head_tail_outside_compilation_nodes)[cluster] = std::vector(); + } + } + } + for (Node* node : graph->op_nodes()) { + if (node->type_string() != "_TPUReplicate") { + const AttrValue* cluster_attr = node->attrs().Find(kTPUReplicateAttr); + const AttrValue* outside_compilation_attr = + node->attrs().Find(kOutsideCompilationAttr); + if (cluster_attr == nullptr) { + if (outside_compilation_attr != nullptr) { + return errors::Internal("Node ", node->name(), " has ", + kOutsideCompilationAttr, " attr but no ", + kTPUReplicateAttr, " attr."); + } + } else { + const string& cluster = cluster_attr->s(); + if (cluster.empty()) { + return errors::Internal("Attr ", kTPUReplicateAttr, " on node ", + node->name(), " has no string value."); + } + const auto iter = outside_compilation_nodes->find(cluster); + if (iter == outside_compilation_nodes->end()) { + return errors::Internal( + "Attr ", kTPUReplicateAttr, " on node ", node->name(), + " does not correspond to a TPUReplicate node."); + } + if (outside_compilation_attr == nullptr) { + return errors::Internal("Node ", node->name(), " has ", + kTPUReplicateAttr, " attr but no ", + kOutsideCompilationAttr, " attr."); + } + const string& oc_cluster = outside_compilation_attr->s(); + if (oc_cluster.empty()) { + return errors::Internal("Attr ", kOutsideCompilationAttr, " on node ", + node->name(), " has no string value."); + } + + // Outside compilation cluster at head and tail of TPU computation has + // already been moved to host and is already replicated. As so, do not + // replicate outside compilation nodes with replica id attribute. + int replica_id; + if (TryGetNodeAttr(node->def(), kXlaReplicaIdAttrName, &replica_id)) { + const AttrValue* head_attr = + node->attrs().Find("_xla_only_arg_or_oc_input"); + const AttrValue* tail_attr = + node->attrs().Find("_xla_only_ret_or_oc_output"); + if (((head_attr != nullptr) && (head_attr->b())) || + ((tail_attr != nullptr) && (tail_attr->b()))) { + // This is safe as this has the same keys as + // outside_compilation_nodes which we already know has this key. + (*head_tail_outside_compilation_nodes)[cluster].push_back(node); + } + continue; + } + iter->second[oc_cluster].push_back(node); + } + } + } + return Status::OK(); +} + +// Helper class to spread TPU computation arguments and return values +// across cores. +// If all shapes are fully defined, balance by their size. +// If some of them are not fully defined, the undefined shapes size will +// be estimated with the average size of the fully defined ones. +// If none are defined, fall back to round-robin. +class TensorDevicePlacer { + public: + // Creates a TensorDevicePlacer object to distribute arguments or + // return values to a set of num_devices devices, where the types and + // the inferred shapes of the inputs (arguments or return values) are + // passed in types and shapes. + TensorDevicePlacer(int64 num_devices, const DataTypeVector& types, + const std::vector& shapes) + : index_nodes_(num_devices), sizes_(types.size()) { + int64 total_size = 0; + int64 num_defined = 0; + for (int64 i = 0; i < types.size(); ++i) { + sizes_[i] = GetInferredShapeSize(shapes[i], types[i]); + if (sizes_[i] >= 0) { + total_size += sizes_[i]; + ++num_defined; + } + } + // If a shape is undefined, select a size for it which is the average + // of the defined shapes. If no shapes are defined, assign 1 so that we + // get round-robin behavior. + int64 undefined_shape_size = + (num_defined > 0) ? total_size / num_defined : 1; + for (int64 i = 0; i < sizes_.size(); ++i) { + if (sizes_[i] < 0) { + sizes_[i] = undefined_shape_size; + } + } + + for (int64 i = 0; i < num_devices; ++i) { + heap_.Push(&index_nodes_[i]); + } + } + + // Reports that the argument/return-value at index has been assigned + // by the user to a given device. + void ReportDeviceAssigned(int64 device, int64 index) { + DeviceNode* node = &index_nodes_.at(device); + node->size += sizes_.at(index); + heap_.Adjust(node); + } + + // Retrieves the device at which the argument/return-value at index + // should be assigned to. + int64 RetrieveAssignment(int64 index) { + DeviceNode* node = heap_.top(); + int64 device = node - index_nodes_.data(); + node->size += sizes_.at(index); + heap_.Adjust(node); + return device; + } + + private: + struct DeviceNode { + struct Compare { + // Compare functor to implement a min heap using the ::gtl::IntrusiveHeap + // infrastructure. + bool operator()(const DeviceNode* lhs, const DeviceNode* rhs) const { + return lhs->size < rhs->size; + } + }; + + IntrusiveHeapLink heap; + int64 size = 0; + }; + + static int64 GetInferredShapeSize(const InferredShape& ishape, + DataType dtype) { + return ishape.shape.IsFullyDefined() + ? ishape.shape.num_elements() * DataTypeSize(dtype) + : -1; + } + + std::vector index_nodes_; + IntrusiveHeap heap_; + std::vector sizes_; +}; + +Status ValidateCoreNumber(int64 core, int64 num_cores_per_replica) { + if (core < 0 || core >= num_cores_per_replica) { + return tensorflow::errors::InvalidArgument("Invalid core ID: ", core, + ". The valid core IDs are [0..", + num_cores_per_replica, ")"); + } + return Status::OK(); +} + +Status FindHostComputeKeyPlaceholderNodes( + const Graph* graph, const std::vector& replicate_nodes, + std::unordered_map* host_compute_key_placeholder_map) { + host_compute_key_placeholder_map->clear(); + for (const auto node : replicate_nodes) { + (*host_compute_key_placeholder_map)[node->name()] = nullptr; + } + + for (Node* node : graph->op_nodes()) { + if (node->type_string() == "Placeholder" && + str_util::EndsWith(node->name(), "_key_placeholder")) { + const AttrValue* call_node_attr = + node->attrs().Find("_host_compute_call_node"); + if (call_node_attr != nullptr) { + auto iter = host_compute_key_placeholder_map->find(call_node_attr->s()); + if (iter == host_compute_key_placeholder_map->end()) { + return errors::InvalidArgument( + "Node ", node->name(), " has _host_compute_call_node attribute '", + call_node_attr->s(), "' that doesn't correspond to a call node"); + } + if (iter->second != nullptr) { + return errors::InvalidArgument( + "Key placeholder node ", iter->second->name(), " for call node ", + call_node_attr->s(), " previously found as ", + iter->second->name()); + } + iter->second = node; + } + } + } + + return Status::OK(); +} + +Status ReplaceCompilationResultNodeWithIdentity(Graph* graph, Node** node) { + Node* old_node = *node; + // We want to replace the node with an identity node with the same name. + const string& node_name = old_node->name(); + + // Create identity node. + TF_ASSIGN_OR_RETURN( + Node * id_node, + BuildIdentityNode(graph, node_name, DT_STRING, + /*input=*/nullptr, /*requested_device=*/"")); + + // No incoming edges are copied as a new one will be added from compile node + // to id_node. + + // Copy outgoing edges to the id node. + std::vector out_edges(old_node->out_edges().begin(), + old_node->out_edges().end()); + for (const Edge* edge : out_edges) { + Node* dst = edge->dst(); + int src_output = edge->src_output(); + int dst_input = edge->dst_input(); + + if (src_output == Graph::kControlSlot) { + graph->AddControlEdge(id_node, dst); + } else { + graph->AddEdge(id_node, src_output, dst, dst_input); + } + graph->RemoveEdge(edge); + } + graph->RemoveNode(old_node); + + *node = id_node; + return Status::OK(); +} + +Status FillPaddingMap( + const Node& replicate_node, + protobuf::RepeatedPtrField* padding_maps) { + std::vector padding_map_strs; + TF_RETURN_IF_ERROR( + GetNodeAttr(replicate_node.attrs(), "padding_map", &padding_map_strs)); + padding_maps->Reserve(padding_map_strs.size()); + for (const string& padding_map_str : padding_map_strs) { + tpu::PaddingMap* padding_map = padding_maps->Add(); + if (!padding_map->ParseFromString(padding_map_str)) { + return errors::InvalidArgument( + "Malformed padding_map serialized string: ", padding_map_str); + } + } + return Status::OK(); +} + +Status GetStepMarkerLocation(const Node& replicate_node, + xla::DebugOptions::StepMarkerLocation* location) { + string step_marker_location_attr; + TF_RETURN_IF_ERROR(GetNodeAttr(replicate_node.attrs(), "step_marker_location", + &step_marker_location_attr)); + if (step_marker_location_attr.empty()) { + *location = xla::DebugOptions::STEP_MARK_AT_ENTRY; + } else { + if (!xla::DebugOptions::StepMarkerLocation_Parse(step_marker_location_attr, + location)) { + return errors::InvalidArgument("Malformed step_marker_location: ", + step_marker_location_attr); + } + } + return Status::OK(); +} + +// Extracts a map of dimension and number of splits for tiled input from xla +// sharding attribute. +Status GetDimensionIndicesAndNumSplitsFromSharding( + const xla::OpSharding& sharding, std::map* split_dimension_map) { + for (int dim_index = 0; + dim_index < sharding.tile_assignment_dimensions_size(); dim_index++) { + if (sharding.tile_assignment_dimensions(dim_index) > 1) { + split_dimension_map->emplace( + dim_index, sharding.tile_assignment_dimensions(dim_index)); + } + } + + if (split_dimension_map->empty()) { + return errors::InvalidArgument("Arg has unnecessary tiled sharding: ", + sharding.DebugString()); + } + return Status::OK(); +} + +// Updates contents of the function with `function_name` in function library +// definition `flib_def` to `new_graph`. This is required when graph +// transformation happens inside a function call body. +Status UpdateFunctionLibDefinition(const Graph& new_graph, + const std::string& function_name, + FunctionLibraryDefinition* flib_def) { + FunctionDef graph_fdef; + TF_RETURN_IF_ERROR(GraphToFunctionDef(new_graph, function_name, &graph_fdef)); + TF_RETURN_IF_ERROR(flib_def->ReplaceFunction(function_name, graph_fdef)); + return Status::OK(); +} + +struct NodeOut { + Node* node; + int index; +}; + +struct ShardedInputIndex { + int replica_id; + int argument_index; + + bool operator<(const ShardedInputIndex& rhs) const { + return std::tie(replica_id, argument_index) < + std::tie(rhs.replica_id, rhs.argument_index); + } +}; + +struct ShardedInputInfo { + // Split node that would be connected to tiled input Node. + Node* split_node; + // List of splits nodes and output index of the split node from which sharded + // input will be connected to the TPUExecute node. The inputs are ordered by + // logical core ids. + std::vector sharded_inputs; +}; + +// Adds split node and split dimension node to graph for sharding tiled inputs. +// |graph| owns the returned Node* instance. +xla::StatusOr CreateSplitNode(int num_splits, int dim, + int orig_src_output, DataType dtype, + absl::string_view name_prefix, + Node* control_predecessor, Node* orig_src, + Graph* graph) { + const std::string input_assigned_device = orig_src->assigned_device_name(); + + // Add a split dimension node. + NodeDef split_dim_def; + split_dim_def.set_name( + graph->NewName(absl::StrCat(name_prefix, "/split_dim"))); + split_dim_def.set_op("Const"); + split_dim_def.set_device(input_assigned_device); + AddNodeAttr("dtype", DT_INT32, &split_dim_def); + TensorProto tensor_proto; + tensor_proto.set_dtype(DT_INT32); + tensor_proto.add_int_val(dim); + TensorShape shape({}); + shape.AsProto(tensor_proto.mutable_tensor_shape()); + AddNodeAttr("value", tensor_proto, &split_dim_def); + Status s; + Node* split_dim_node = graph->AddNode(split_dim_def, &s); + TF_RETURN_IF_ERROR(s); + // Add a split node. + NodeDef split_def; + split_def.set_name(graph->NewName(absl::StrCat(name_prefix, "/split"))); + split_def.set_op("Split"); + split_def.set_device(input_assigned_device); + AddNodeAttr("num_split", num_splits, &split_def); + AddNodeAttr("T", dtype, &split_def); + split_def.add_input(absl::StrCat(split_dim_node->name(), ":0")); + split_def.add_input(absl::StrCat(orig_src->name(), ":", orig_src_output)); + Node* split_node = graph->AddNode(split_def, &s); + TF_RETURN_IF_ERROR(s); + + graph->AddEdge(split_dim_node, 0, split_node, 0); + graph->AddEdge(orig_src, orig_src_output, split_node, 1); + + // Add a control dependency from `control_predecessor` to newly created + // constant node. This ensures that newly added split/split dim + // nodes are placed inside correct while loop frames when TPUExecute + // node is inside a host training loop. + graph->AddControlEdge(control_predecessor, split_dim_node); + + return split_node; +} + +// Creates a set of splits nodes that shards tiled input node in graph. +xla::StatusOr CreateOrGetSplitNodesForInputSharding( + const xla::OpSharding& sharding, int orig_arg_num, DataType dtype, + int replica_id, int orig_src_output, Node* orig_src, + Node* control_predecessor, Graph* graph, + std::map* + arg_index_to_sharded_input_map) { + ShardedInputIndex input_index{replica_id, orig_arg_num}; + auto iter = arg_index_to_sharded_input_map->find(input_index); + if (iter != arg_index_to_sharded_input_map->end()) { + return iter->second; + } + // Maps input dimension and number of splits with which the + // dimension sharded. + std::map split_dimension_map; + TF_RETURN_IF_ERROR(GetDimensionIndicesAndNumSplitsFromSharding( + sharding, &split_dimension_map)); + TF_RET_CHECK(!split_dimension_map.empty()) + << "Unnecessary sharding attribute found."; + + // For v1 while loop, nodes inside the loop body must either + // 1) Have data edges from while loop input node. + // or + // 2) Have direct control dependency from while loop input control + // node. + // + // As so, if we are adding Split node inside, while loop body, + // we must manually add a control dependency to a node inside + // a while loop (i.e. `control_predecessor`) to constant nodes + // without data in-edges to make sure that added split nodes + // have correct frame name. Else, placer will complain when + // `BuildControlFlow()` is invoked. + + auto sharding_it = split_dimension_map.begin(); + std::queue split_nodes_for_dimension; + int split_dimension = sharding_it->first; + int num_split = sharding_it->second; + + // Creates a tree of split nodes for sharding tiled inputs. Splits nodes + // are created such that input data is sharded in row major order. + // Split nodes at ith depth from the original input node represent nodes + // that split the input data at ith dimension. + TF_ASSIGN_OR_RETURN( + Node * root_split_node, + CreateSplitNode(num_split, split_dimension, orig_src_output, dtype, + absl::StrCat("sharded_input/replica_", replica_id, + "_dim_", split_dimension), + control_predecessor, orig_src, graph)); + sharding_it++; + + split_nodes_for_dimension.emplace(root_split_node); + + while (sharding_it != split_dimension_map.end()) { + split_dimension = sharding_it->first; + num_split = sharding_it->second; + int num_split_nodes_in_dimension = split_nodes_for_dimension.size(); + for (int i = 0; i < num_split_nodes_in_dimension; ++i) { + Node* input_split_node = split_nodes_for_dimension.front(); + split_nodes_for_dimension.pop(); + for (int src_output_index = 0; + src_output_index < input_split_node->num_outputs(); + ++src_output_index) { + TF_ASSIGN_OR_RETURN( + Node * split_node, + CreateSplitNode(num_split, split_dimension, src_output_index, dtype, + absl::StrCat("sharded_input/replica_", replica_id, + "_dim_", split_dimension), + control_predecessor, input_split_node, graph)); + split_nodes_for_dimension.emplace(split_node); + } + } + sharding_it++; + } + + // `split_nodes_for_dimension` now includes final split nodes + // from which sharded data will be fed into TPUExcute nodes -- sorted by + // row major order. + std::vector sharded_inputs_list; + sharded_inputs_list.reserve(split_nodes_for_dimension.size()); + while (!split_nodes_for_dimension.empty()) { + Node* split_node = split_nodes_for_dimension.front(); + split_nodes_for_dimension.pop(); + int num_splits; + TF_RETURN_IF_ERROR( + GetNodeAttr(split_node->def(), "num_split", &num_splits)); + for (int out_index = 0; out_index < num_splits; ++out_index) { + sharded_inputs_list.emplace_back(NodeOut{split_node, out_index}); + } + } + + ShardedInputInfo sharded_input_info{root_split_node, + std::move(sharded_inputs_list)}; + (*arg_index_to_sharded_input_map)[input_index] = sharded_input_info; + return sharded_input_info; +} + +// Creates a concat node to be used for aggregating sharded retvals across +// logical cores. +xla::StatusOr CreateConcatNode(int dim, int num_splits, DataType dtype, + absl::string_view name_prefix, + const std::vector& inputs, + Graph* graph, absl::string_view device) { + // Add a Concat dim node. + NodeDef concat_dim_def; + concat_dim_def.set_name( + graph->NewName(absl::StrCat(name_prefix, "/concat_dim"))); + concat_dim_def.set_op("Const"); + AddNodeAttr("dtype", DT_INT32, &concat_dim_def); + concat_dim_def.set_device(std::string(device)); + TensorProto tensor_proto; + tensor_proto.set_dtype(DT_INT32); + tensor_proto.add_int_val(dim); + TensorShape shape({}); + shape.AsProto(tensor_proto.mutable_tensor_shape()); + AddNodeAttr("value", tensor_proto, &concat_dim_def); + Status s; + Node* concat_dim_node = graph->AddNode(concat_dim_def, &s); + TF_RETURN_IF_ERROR(s); + + // Add a Concat node. + NodeDef concat_def; + concat_def.set_name(graph->NewName(absl::StrCat(name_prefix, "/concat"))); + concat_def.set_op("Concat"); + AddNodeAttr("N", num_splits, &concat_def); + AddNodeAttr("T", dtype, &concat_def); + concat_def.add_input(absl::StrCat(concat_dim_node->name(), ":0")); + concat_def.set_device(std::string(device)); + for (const auto& i : inputs) { + concat_def.add_input(absl::StrCat(i.node->name(), ":", i.index)); + } + Node* concat_node = graph->AddNode(concat_def, &s); + TF_RETURN_IF_ERROR(s); + + graph->AddEdge(concat_dim_node, 0, concat_node, 0); + + // 0th input to concat node is a concat dim node. So we start from 1st input + // and add all input edges. + int dst_input = 1; + for (const auto& i : inputs) { + graph->AddEdge(i.node, i.index, concat_node, dst_input); + ++dst_input; + } + return concat_node; +} + +// Creates a set of Concat nodes that aggregates sharded outputs from TPUExecute +// nodes into a single output. Sharded outputs are concatenated along row major +// order. That is, tiled output along 0th dimension will be concatenated last. +xla::StatusOr CreateConcatNodesForRetval( + const xla::OpSharding& sharding, DataType dtype, int replica_id, + const std::vector& orig_inputs, Graph* graph, + absl::string_view device) { + std::map split_dimension_map; + TF_RETURN_IF_ERROR(GetDimensionIndicesAndNumSplitsFromSharding( + sharding, &split_dimension_map)); + + std::vector inputs_to_sharded_retval = orig_inputs; + + for (auto it = split_dimension_map.rbegin(); it != split_dimension_map.rend(); + it++) { + auto dim = it->first; + auto num_splits = it->second; + + int num_concat_nodes = inputs_to_sharded_retval.size() / num_splits; + int input_index_to_concat_node = 0; + + std::vector new_concat_nodes; + for (int i = 0; i < num_concat_nodes; ++i) { + auto concat_input_it = + inputs_to_sharded_retval.begin() + input_index_to_concat_node; + std::vector inputs(concat_input_it, + concat_input_it + num_splits); + input_index_to_concat_node += num_splits; + + TF_ASSIGN_OR_RETURN( + Node * concat_node, + CreateConcatNode( + dim, num_splits, dtype, + absl::StrCat("sharded_output/replica_", replica_id, "_dim_", dim), + inputs, graph, device)); + new_concat_nodes.emplace_back(NodeOut{concat_node, 0}); + } + inputs_to_sharded_retval = new_concat_nodes; + } + + TF_RET_CHECK(inputs_to_sharded_retval.size() == 1); + return inputs_to_sharded_retval.at(0).node; +} + +absl::optional GetCoreIndexInSharding(const xla::OpSharding& sharding, + int64 core) { + absl::optional output_index; + for (int i = 0; i < sharding.tile_assignment_devices_size(); i++) { + int64 assigned_core = sharding.tile_assignment_devices(i); + if (assigned_core == core) { + output_index = i; + break; + } + } + return output_index; +} + +// Set the padding ops the same devices as the original inputs. If the original +// inputs are on TPUs, the padding ops will be placed on TPUs and XLA on demand +// mode will be triggered, so we don't need to copy the data back to the host +// to do the padding. +Status SetPaddingNodesDevices(Graph* graph) { + for (Node* n : graph->op_nodes()) { + bool tpu_padding_attr; + if (n->type_string() == "Pad" && + GetNodeAttr(n->attrs(), kPostDeviceRewriteAttr, &tpu_padding_attr) + .ok()) { + Node* unpadded_input; + TF_RETURN_IF_ERROR(n->input_node(0, &unpadded_input)); + + const string& requested_device = unpadded_input->requested_device(); + const string& assigned_device = unpadded_input->assigned_device_name(); + if (!requested_device.empty() || !assigned_device.empty()) { + // The output nodes of the original unpadded inputs include the padded + // inputs and real shapes of inputs, we assign those to the same device + // as the original inputs. + for (Node* out : unpadded_input->out_nodes()) { + if (GetNodeAttr(out->attrs(), kPostDeviceRewriteAttr, + &tpu_padding_attr) + .ok()) { + out->set_requested_device(requested_device); + out->set_assigned_device_name(assigned_device); + } + } + // There might be a tf.shape node added before TPUCompileOp, we need to + // set its device as well. + for (Node* out : n->out_nodes()) { + if (n->type_string() == "Shape") { + out->set_requested_device(requested_device); + out->set_assigned_device_name(assigned_device); + } + } + } + } + } + return Status::OK(); +} + +const string& AssignedOrRequestedDevice(const Node* node) { + if (!node->assigned_device_name().empty()) { + return node->assigned_device_name(); + } + return node->requested_device(); +} + +bool IsTpuDevice(const string& device_string) { + DeviceNameUtils::ParsedName device; + return DeviceNameUtils::ParseFullName(device_string, &device) && + device.type == DEVICE_TPU_NODE; +} + +// Returns a set of device ops can be placed on TPU. There is no strict rule of +// thumb to decide which ops should be in the list, but empirically they are +// mostly dummy ops like Identity-like ops or control flow related ops. However +// people can add also add other ops like Pad to allow data stay on TPU. +const absl::flat_hash_set& PlaceOnTPUOpList() { + static const auto place_on_tpu_ops = new absl::flat_hash_set( + {"Identity", "IdentityN", "Enter", "Exit", "Switch", "Merge", + "NextIteration", "Shape"}); + return *place_on_tpu_ops; +} + +// If an op satisfies the following conditions, it will be placed on the same +// TPU device as its inputs: +// (1) The op can be placed on TPU (in the PlaceOnTPUOpList) +// (2) The op itself has no requested or assigned devices. +// (3) All the data inputs of this op are placed on the same device on TPUs. +// There are exceptions like the NextIterations input of Switch node can +// be placed on CPU as it is just a boolean. +// +// Returns true if the node device has been changed, otherwise returns false. +bool PlaceOpsOnTPU(Node* node) { + if (!AssignedOrRequestedDevice(node).empty() || + !PlaceOnTPUOpList().contains(node->type_string())) { + return false; + } + string src_tpu_device = ""; + Node* src_node; + for (const Edge* e : node->in_edges()) { + if (e->IsControlEdge()) { + continue; + } + Node* src = e->src(); + const string& src_device = AssignedOrRequestedDevice(src); + + // Make exceptions that we don't force the some inputs to place on TPUs. + if (node->IsSwitch() && src->IsLoopCond()) { + continue; + } + + if (!IsTpuDevice(src_device) || + (!src_tpu_device.empty() && src_device != src_tpu_device)) { + return false; + } + if (src_tpu_device.empty()) { + src_tpu_device = src_device; + src_node = src; + } + } + node->set_assigned_device_name(src_node->assigned_device_name()); + node->set_requested_device(src_node->requested_device()); + return true; +} + +// Validate sharding configuration derived from XlaSharding attribute. +// Infer the core id from the OpSharding, if necessary. +Status ParseAndValidateSharding(const xla::OpSharding& sharding, + const int num_cores_per_replica, + int64* inferred_core_id, + absl::optional* result) { + if (sharding.type() == xla::OpSharding::MAXIMAL) { + int64 core_annotation = sharding.tile_assignment_devices(0); + TF_RETURN_IF_ERROR( + ValidateCoreNumber(core_annotation, num_cores_per_replica)); + if (*inferred_core_id == -1 || *inferred_core_id > core_annotation) { + *inferred_core_id = core_annotation; + result->emplace(sharding); + } + } else { + if (sharding.type() == xla::OpSharding::OTHER) { + for (int64 core : sharding.tile_assignment_devices()) { + TF_RETURN_IF_ERROR(ValidateCoreNumber(core, num_cores_per_replica)); + } + } + + if (!result->has_value()) { + *result = sharding; + } else { + std::string result_value_serialized; + std::string sharding_serialized; + SerializeToStringDeterministic(result->value(), &result_value_serialized); + SerializeToStringDeterministic(sharding, &sharding_serialized); + + if (result_value_serialized != sharding_serialized) { + // We see different shardings, assign to core 0. + result->emplace(xla::sharding_builder::AssignDevice(0)); + } + } + } + return Status::OK(); +} + +// As XlaSharding node may be followed by Cast op or an Identity op, +// recursively walk the graph and aggregate nodes connectd to +// |input_node| or Cast/Identity op following the |input_node|. +void FindNodesMaybeContainingShardingInfo(const Node& input_node, + std::vector* nodes) { + if (input_node.IsIdentity() || input_node.type_string() == "Cast") { + for (const Node* connected_node : input_node.out_nodes()) + FindNodesMaybeContainingShardingInfo(*connected_node, nodes); + } + nodes->emplace_back(&input_node); +} + +// Parse sharding configuration from |node| or it's adjacent nodes. +// XlaSharding configuration may be derived from +// a) Connected Identity op node. +// b) Connected Cast op node. +xla::StatusOr> +ParseInputShardingFromAdjacentNode(const int num_cores_per_replica, + const Node& node) { + // If |node| has `device` attribute or is a XlaSharding op, + // return the parsed OpSharding. + TF_ASSIGN_OR_RETURN(absl::optional sharding, + ParseShardingFromDevice(node, num_cores_per_replica)); + if (sharding.has_value()) return sharding; + + // XlaShardingOp may be followed by an identity or followed by identity + // and a Cast op. + std::vector potential_nodes_with_input_sharding; + FindNodesMaybeContainingShardingInfo(node, + &potential_nodes_with_input_sharding); + for (const Node* maybe_node_with_sharding_info : + potential_nodes_with_input_sharding) { + if (maybe_node_with_sharding_info->type_string() != "XlaSharding") continue; + + TF_ASSIGN_OR_RETURN(absl::optional sharding_config, + ParseShardingFromDevice(*maybe_node_with_sharding_info, + num_cores_per_replica)); + if (sharding_config.has_value()) return sharding_config; + } + return sharding; +} + +// Walk the graph from an argument node to find OpSharding configuration +// from its neighbor nodes. Sharding configuration may be inferred from +// 1) Parsing XlaSharding attribute from neighboring node. +// 2) If argument node is a resource, then by parsing adjacent nodes +// of the connected ReadVariable op. +Status ParseAndValidateShardingFromNeighbors( + const int num_cores_per_replica, const std::string& arg_node_name, + const Node& neighbor_node, int64* inferred_core_id, bool* is_fast_mem, + absl::optional* result) { + if (neighbor_node.attrs().Find(TPU_FAST_MEM_ATTR) != nullptr) { + *is_fast_mem = true; + VLOG(2) << "place " << neighbor_node.name() << " on fast memory because " + << arg_node_name << " has " << TPU_FAST_MEM_ATTR << " attribute"; + } + + // XlaSharding information may be encoded on node directly connected to the + // argument node. + TF_ASSIGN_OR_RETURN( + absl::optional sharding, + ParseInputShardingFromAdjacentNode(num_cores_per_replica, neighbor_node)); + if (sharding.has_value()) { + TF_RETURN_IF_ERROR(ParseAndValidateSharding( + *sharding, num_cores_per_replica, inferred_core_id, result)); + return Status::OK(); + } + + // When we use variable in TPU computation, we always have a + // XlaSharding op followed by a ReadVariableOp. As so, correctly parse + // the users of ReadVariableOp for potential sharding configuration. + if (neighbor_node.type_string() == "ReadVariableOp") { + for (const Edge* e : neighbor_node.out_edges()) { + if (e->IsControlEdge()) continue; + + if (e->dst()->attrs().Find(TPU_FAST_MEM_ATTR) != nullptr) { + *is_fast_mem = true; + VLOG(2) << "place " << arg_node_name << " on fast memory because " + << e->dst()->name() << TPU_FAST_MEM_ATTR << " attribute"; + } + + TF_ASSIGN_OR_RETURN( + absl::optional sharding, + ParseInputShardingFromAdjacentNode(num_cores_per_replica, *e->dst())); + if (sharding.has_value()) { + TF_RETURN_IF_ERROR(ParseAndValidateSharding( + *sharding, num_cores_per_replica, inferred_core_id, result)); + return Status::OK(); + } + } + } + return Status::OK(); +} + +} // namespace + +// Inputs: +// replication_spec_string: the device to which the TPUReplicate node was +// assigned. +// device_set: the set of TF devices. +// Outputs: +// tpu_compilation_device: the name of the TPU compilation device. +// num_tpus_per_task: the number of TPUs in each task. Verifies that all tasks +// have the same number of TPU devices. +// tpu_devices: the TPU devices, indexed by [task][device]. +static Status GetTPUDeviceNames( + const string& replication_spec_string, const DeviceSet& device_set, + string* tpu_compilation_device, int* num_tpus_per_task, + std::vector>* tpu_devices) { + // TODO(b/110910013) GetSystemDevice parses the spec and returns the name of + // the tpu_system device, which we replace by the cpu device. We do this + // replacement because we want to place the TPUCompileOp (and the compile + // assert op) explicitly on cpu devices on the same job as the tpu_system + // device. + DeviceNameUtils::ParsedName replication_spec; + Device* replication_device; + TF_RETURN_IF_ERROR(DistributedTPURewriteHelpers::GetSystemDevice( + replication_spec_string, device_set, &replication_spec, + &replication_device)); + *tpu_compilation_device = + str_util::StringReplace(replication_device->name(), DEVICE_TPU_SYSTEM, + DEVICE_CPU, /*replace_all=*/true); + + // Finds the set of TPU devices attached to the tasks in the job. + TF_RETURN_IF_ERROR(DistributedTPURewriteHelpers::GetTPUDevices( + replication_spec, device_set, num_tpus_per_task, tpu_devices)); + + return Status::OK(); +} + +// Parses the topology attribute of TPUReplicate, and populates *topology with +// a physical mesh coordinate to (task, device) mapping. +static Status ParseTopologyAttr(const string& topology_attr, + const tpu::TpuTopologyExternal& tpu_topology, + int num_tasks, int num_tpus_per_task, + xla::Array4D>* topology) { + static_assert(4 == kTPUTopologyRank, "Assumes the topology rank is 4"); + tpu::TopologyProto proto; + proto.ParseFromString(topology_attr); + if (proto.mesh_shape_size() != kTPUTopologyRank) { + return errors::InvalidArgument("TPU topology must be rank ", + kTPUTopologyRank); + } + if (proto.num_tasks() != num_tasks) { + return errors::InvalidArgument("Mismatched number of TPU tasks"); + } + if (proto.num_tpu_devices_per_task() != num_tpus_per_task) { + return errors::InvalidArgument("Mismatched number of TPUs per task (", + proto.num_tpu_devices_per_task(), + " != ", num_tpus_per_task, ")."); + } + if (proto.device_coordinates_size() != + num_tasks * num_tpus_per_task * kTPUTopologyRank) { + return errors::InvalidArgument( + "device coordinates should be ", num_tasks, "x", num_tpus_per_task, "x", + kTPUTopologyRank, "; got ", proto.device_coordinates_size()); + } + + int devices_per_chip = tpu_topology.LogicalDevicesPerChip(kTensorCore); + *topology = xla::Array4D>( + tpu_topology.chip_bounds().x, tpu_topology.chip_bounds().y, + tpu_topology.chip_bounds().z, devices_per_chip, {-1, -1}); + int pos = 0; + for (int task = 0; task < num_tasks; ++task) { + for (int device = 0; device < num_tpus_per_task; ++device) { + int32 x = proto.device_coordinates(pos++); + int32 y = proto.device_coordinates(pos++); + int32 z = proto.device_coordinates(pos++); + int32 core = proto.device_coordinates(pos++); + + if (!tpu_topology.HasChip(x, y, z) || core < 0 || + core >= devices_per_chip) { + return errors::InvalidArgument( + "Mesh coordinates (", x, ",", y, ",", z, ",", core, + ") are not valid for the current TPU topology"); + } + if ((*topology)(x, y, z, core).first != -1) { + return errors::InvalidArgument("Duplicate coordinates (", x, ",", y, + ",", z, ",", core, ") in TPU topology"); + } + (*topology)(x, y, z, core) = {task, device}; + } + } + return Status::OK(); +} + +// Parses the value of the device_assignment attribute to TPUReplicate. +// Populates *device_assignment; *device_assignment must be a 2D array with +// shape (num_replicas, num_cores_per_replica). +static Status ParseDeviceAssignmentAttr( + absl::Span device_assignment_attr, + const tpu::TpuTopologyExternal& tpu_topology, int num_replicas, + int num_cores_per_replica, + xla::Array2D* device_assignment) { + static_assert(4 == kTPUTopologyRank, "Assumes the topology rank is 4"); + + const int64 device_assignment_attr_size = + num_replicas * num_cores_per_replica * kTPUTopologyRank; + if (device_assignment_attr.size() != device_assignment_attr_size) { + return errors::InvalidArgument( + "Length of device_assignment attribute must be equal to num_replicas (", + num_replicas, ") * num_cores_per_replica (", num_cores_per_replica, + ") * ", kTPUTopologyRank, " got ", device_assignment_attr.size()); + } + for (int core : device_assignment_attr) { + if (core < 0 || core >= kTPUMaxTopologySize) { + return errors::InvalidArgument( + "Invalid core number in device assignment: ", core); + } + } + + *device_assignment = xla::Array2D( + num_replicas, num_cores_per_replica); + int devices_per_chip = tpu_topology.LogicalDevicesPerChip(kTensorCore); + xla::Array4D replica_assignment( + tpu_topology.chip_bounds().x, tpu_topology.chip_bounds().y, + tpu_topology.chip_bounds().z, devices_per_chip, -1); + int pos = 0; + for (int replica = 0; replica < num_replicas; ++replica) { + for (int logical_core = 0; logical_core < num_cores_per_replica; + ++logical_core) { + int32 x = device_assignment_attr[pos++]; + int32 y = device_assignment_attr[pos++]; + int32 z = device_assignment_attr[pos++]; + int32 core = device_assignment_attr[pos++]; + + if (!tpu_topology.HasChip(x, y, z) || core < 0 || + core >= devices_per_chip) { + return errors::InvalidArgument( + "Mesh coordinates (", x, ",", y, ",", core, + ") are not valid for the current TPU topology"); + } + tpu::TpuCoreLocationExternal core_location = + tpu_topology.Core(x, y, z, kTensorCore, core); + + if (replica_assignment(x, y, z, core) != -1) { + return errors::InvalidArgument("Duplicate coordinates (", x, ",", y, + ",", z, ",", core, + ") in TPU device assignment"); + } + replica_assignment(x, y, z, core) = replica; + (*device_assignment)(replica, logical_core) = core_location; + } + } + return Status::OK(); +} + +// Builds TensorFlow device assignments for the special case of a single core +// computation that is replicated to every core in the mesh. +// LINT.IfChange +static Status BuildFullMeshDeviceAssignment( + int num_replicas, const std::vector>& tpu_devices, + int num_tasks, int num_tpus_per_task, + std::vector>* tf_device_assignment) { + // Assign TensorFlow devices to replicas arbitrarily. + for (int i = 0; i < num_replicas; ++i) { + int task = i / num_tpus_per_task; + int device = i % num_tpus_per_task; + TF_RET_CHECK(task >= 0 && task < num_tasks); + TF_RET_CHECK(device >= 0 && device < num_tpus_per_task); + + // We don't actually know which TF device corresponds to which physical + // device, but it doesn't matter—they're all identical. + (*tf_device_assignment)[i] = {tpu_devices[task][device]->name()}; + } + return Status::OK(); +} +// LINT.ThenChange(//tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc) + +// Builds TensorFlow device assignments for a replicated computation and convert +// device_assignment into xla_device_assignment. +static Status BuildGeneralDeviceAssignment( + int num_replicas, int num_cores_per_replica, + const std::vector>& tpu_devices, + const xla::Array2D& device_assignment, + const xla::Array4D>& topology, + std::vector>* tf_device_assignment, + std::unique_ptr* xla_device_assignment) { + // Assign TensorFlow devices to each computation's replicas according to + // device_assignment and 'topology'. + *xla_device_assignment = absl::make_unique( + num_replicas, num_cores_per_replica); + for (int replica = 0; replica < num_replicas; ++replica) { + for (int computation = 0; computation < num_cores_per_replica; + ++computation) { + const tpu::TpuCoreLocationExternal& core_location = + device_assignment(replica, computation); + + int task; + int device; + std::tie(task, device) = + topology(core_location.chip_coordinates().x, + core_location.chip_coordinates().y, + core_location.chip_coordinates().z, core_location.index()); + + CHECK_LT(computation, num_cores_per_replica); + (**xla_device_assignment)(replica, computation) = core_location.Id(); + + // The communication pattern between replicas will be determined later by + // BuildAllReduceRing. + TF_RET_CHECK(task >= 0 && task < tpu_devices.size()); + TF_RET_CHECK(device >= 0 && device < tpu_devices[task].size()); + (*tf_device_assignment)[replica].push_back( + tpu_devices[task][device]->name()); + } + } + return Status::OK(); +} + +/*static*/ Status DistributedTPURewritePass::BuildDeviceAssignment( + const tpu::TpuTopologyExternal& tpu_topology, int num_tpus_per_task, + const std::vector>& tpu_devices, int num_replicas, + int num_cores_per_replica, const string& topology_attr, + absl::Span device_assignment_attr, + std::vector>* tf_device_assignment, + std::unique_ptr* xla_device_assignment) { + const int num_tasks = tpu_devices.size(); + const int num_tpu_devices = num_tasks * num_tpus_per_task; + VLOG(2) << "num_tasks=" << num_tasks + << " num_tpus_per_task=" << num_tpus_per_task; + + // Checks num_replicas is sane first to avoid integer overflow. + if (num_replicas > num_tpu_devices) { +#ifdef PLATFORM_CLOUD_TPU + return errors::InvalidArgument("Requested num_replicas=", num_replicas, + " but there are only ", num_tpu_devices, + " cores in the TPU topology."); +#else + return errors::InvalidArgument("Requested num_replicas=", num_replicas, + " but there are only ", num_tpu_devices, + " cores in the TPU topology."); +#endif + } + if (num_replicas * num_cores_per_replica > num_tpu_devices) { + return errors::InvalidArgument( + "Requested num_replicas=", num_replicas, " with ", + num_cores_per_replica, " cores per replica, but there are only ", + num_tpu_devices, " cores in the TPU topology"); + } + + tf_device_assignment->clear(); + tf_device_assignment->resize(num_replicas); + + // Special case: we allow the user to omit the topology and device assignment + // information in two cases: + // * there is only one replica and one core per replica. In this case, we + // don't need to know topology information because we don't communicate with + // other cores. + // * the number of replicas is equal to the number of cores in the slice. In + // this case, all cores are running the same program so we don't need to + // know which is which. + if (topology_attr.empty()) { + // LINT.IfChange + if (num_replicas != 1 && num_replicas != num_tpu_devices) { + return errors::InvalidArgument( + "TPUReplicate asked to create ", num_replicas, + " replicas, but the number of cores in the TPU topology is ", + num_tpu_devices, + " and no TPU device assignment was supplied. " + "A TPU device assignment is required if the number of replicas is " + "not 1 or the number of cores in the topology (", + num_tpu_devices, ")"); + } + + if (num_cores_per_replica != 1) { + return errors::InvalidArgument( + "A TPU topology must be provided if num_cores_per_replica != 1"); + } + + if (!device_assignment_attr.empty()) { + return errors::InvalidArgument( + "A TPU topology must be provided if device_assignment_attr is " + "non-empty"); + } + + // If there is only one replica, assign the Tensorflow computation to task 0 + // device 0, and leave the XLA device assignment empty. We don't know which + // core this is in the TPU topology, but it doesn't matter—we don't need to + // communicate with any other cores. + if (num_replicas == 1) { + (*tf_device_assignment)[0] = {tpu_devices[0][0]->name()}; + return Status::OK(); + } + + // Otherwise, num_replicas is equal to the number of cores, and we build a + // device assignment that covers the entire mesh. We do not need to know + // the topology to do so because all cores are identical. + return BuildFullMeshDeviceAssignment(num_replicas, tpu_devices, num_tasks, + num_tpus_per_task, + tf_device_assignment); + // LINT.ThenChange(//tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc) + } + + // Array that maps mesh coordinates to {TF task, TF TPU device #} pairs. + xla::Array4D> topology; + TF_RETURN_IF_ERROR(ParseTopologyAttr(topology_attr, tpu_topology, num_tasks, + num_tpus_per_task, &topology)); + + // Array that maps logical (replica, core) pairs to physical mesh coordinates. + xla::Array2D device_assignment; + TF_RETURN_IF_ERROR(ParseDeviceAssignmentAttr( + device_assignment_attr, tpu_topology, num_replicas, num_cores_per_replica, + &device_assignment)); + + return BuildGeneralDeviceAssignment( + num_replicas, num_cores_per_replica, tpu_devices, device_assignment, + topology, tf_device_assignment, xla_device_assignment); +} + +Status DistributedTPURewritePass::GetComputationForTPUReplicateOp( + const NameAttrList& function, FunctionLibraryRuntime* flr, + Graph* computation, DataTypeVector* arg_types, + DataTypeVector* retval_types) { + FunctionLibraryRuntime::Handle handle; + + TF_RETURN_IF_ERROR( + flr->Instantiate(function.name(), AttrSlice(&function.attr()), &handle)); + + const FunctionBody* fbody = flr->GetFunctionBody(handle); + + CopyGraph(*fbody->graph, computation); + *arg_types = fbody->arg_types; + *retval_types = fbody->ret_types; + return Status::OK(); +} + +// Grab the InferredShape corresponding to an edge input. +static Status GetEdgeShape(const GraphShapeInfo& shape_info, const Edge& edge, + const InferredShape** info) { + auto it = shape_info.find(edge.src()->name()); + if (it == shape_info.end()) { + return errors::InvalidArgument( + "Input to replicated TPU computation is missing InferredShape: ", + edge.src()->name()); + } + TF_RET_CHECK(it->second.size() > edge.src_output()); + *info = &it->second[edge.src_output()]; + return Status::OK(); +} + +Status DistributedTPURewritePass::GetArgAndRetvalShapes( + const GraphShapeInfo& shape_info, const Node& node, + const ParameterInfo& params_info, std::vector* arg_shapes, + std::vector* retval_shapes) { + std::vector input_edges; + TF_RETURN_IF_ERROR(node.input_edges(&input_edges)); + + // If any replica's arg shape is unknown, we will mark the computation's arg + // shape as being unknown. If the shapes differ the TpuExecute Op will raise a + // runtime error. + std::vector any_replica_shape_unknown( + params_info.NumInputsToEachReplica()); + arg_shapes->clear(); + arg_shapes->resize(params_info.NumInputsToEachReplica()); + TF_RET_CHECK(input_edges.size() == params_info.NumInputsFromHost()); + // Determines the shapes of the per-replica arguments and checks that all + // replicas have identical shapes. + int64 edge_pos = 0; + auto check_shape = [&](int input_index) -> Status { + const InferredShape* info; + TF_RETURN_IF_ERROR(GetEdgeShape(shape_info, *input_edges[edge_pos], &info)); + ++edge_pos; + + if ((info->handle_type == DT_INVALID && !info->shape.IsFullyDefined()) || + (info->handle_type != DT_INVALID && + !info->handle_shape.IsFullyDefined())) { + any_replica_shape_unknown[input_index] = true; + } + xla::StatusOr status = + MergeInferredShapes((*arg_shapes)[input_index], *info); + if (!status.ok()) { + return errors::InvalidArgument( + "Mismatched shapes for input ", input_index, ": ", + (*arg_shapes)[input_index].shape.DebugString(), " vs. ", + info->shape.DebugString()); + } + (*arg_shapes)[input_index] = status.ValueOrDie(); + return Status::OK(); + }; + + for (int64 i = 0; i < params_info.NumReplicas(); ++i) { + for (int64 j = 0; j < params_info.NumPerReplicaArgs(); ++j) { + TF_RETURN_IF_ERROR(check_shape(j)); + } + } + + for (int64 i = 0; i < params_info.NumDistributedArgs(); ++i) { + TF_RETURN_IF_ERROR(check_shape(params_info.NumPerReplicaArgs() + i)); + } + + for (int64 i = 0; + i < params_info.NumPerReplicaArgs() + params_info.NumDistributedArgs(); + ++i) { + if (any_replica_shape_unknown[i]) { + (*arg_shapes)[i].shape = PartialTensorShape(); + (*arg_shapes)[i].handle_shape = PartialTensorShape(); + } + } + + // Determines the shape of the broadcast arguments. + for (int64 i = 0; i < params_info.NumBroadcastArgs(); ++i) { + TF_RET_CHECK(node.input_type(edge_pos) != DT_RESOURCE); + const InferredShape* info; + TF_RETURN_IF_ERROR(GetEdgeShape(shape_info, *input_edges[edge_pos], &info)); + (*arg_shapes)[i + params_info.NumPerReplicaArgs() + + params_info.NumDistributedArgs()] + .shape = info->shape; + ++edge_pos; + } + + // Determines the handle shape and handle type of the resource variable + // arguments. + for (int64 i = 0; i < params_info.NumVariables(); ++i) { + TF_RET_CHECK(node.input_type(edge_pos) == DT_RESOURCE); + const InferredShape* info; + TF_RETURN_IF_ERROR(GetEdgeShape(shape_info, *input_edges[edge_pos], &info)); + InferredShape& arg_shape = + (*arg_shapes)[i + params_info.NumPerReplicaArgs() + + params_info.NumDistributedArgs() + + params_info.NumBroadcastArgs()]; + arg_shape.shape = TensorShape(); // Variables are always scalars. + arg_shape.handle_shape = info->handle_shape; + arg_shape.handle_type = info->handle_type; + TF_RET_CHECK(arg_shape.handle_type != DT_INVALID); + ++edge_pos; + } + + // Determines the shape of the guaranteed constants. + // TODO(vinuraja): Can be removed because they are not required for any + // calculations. Leaving them here for symmetry with other structures like + // arg_types, arg_sharding, etc. + for (int64 i = 0; i < params_info.NumGuaranteedConstants(); ++i) { + TF_RET_CHECK(node.input_type(edge_pos) != DT_RESOURCE); + const InferredShape* info; + TF_RETURN_IF_ERROR(GetEdgeShape(shape_info, *input_edges[edge_pos], &info)); + (*arg_shapes)[i + params_info.NumPerReplicaArgs() + + params_info.NumDistributedArgs() + + params_info.NumBroadcastArgs() + params_info.NumVariables()] + .shape = info->shape; + ++edge_pos; + } + + // Extract the return value shapes. + auto it = shape_info.find(node.name()); + retval_shapes->clear(); + if (it != shape_info.end()) { + TF_RET_CHECK(it->second.size() >= node.num_outputs()); + retval_shapes->resize(node.num_outputs()); + for (int i = 0; i < node.num_outputs(); ++i) { + (*retval_shapes)[i].shape = it->second[i].shape; + } + } else if (node.num_outputs() > 0) { + return errors::InvalidArgument( + "Replicated TPU computation is missing InferredShape: ", + FormatNodeForError(node)); + } + return Status::OK(); +} + +// Verifies that all nodes have legal sharding. +static Status ValidateCoreNumbers(const Graph& graph, + int num_cores_per_replica) { + for (Node* n : graph.nodes()) { + TF_ASSIGN_OR_RETURN(absl::optional sharding, + ParseShardingFromDevice(*n, num_cores_per_replica)); + } + return Status::OK(); +} + +static Status InferXlaShardingFromNeighbors( + const Node& n, int num_cores_per_replica, FunctionLibraryRuntime* flr, + CachedFunctionHandles* cached_function_handles, + absl::optional* output_sharding, bool* is_fast_mem) { + int64 core = -1; + absl::optional result; + // We assume the variable has been allocated on fast memory if any consuming + // op has TPU_FAST_MEM_ATTR attribute. This is a protocol between runtime and + // compiler. + *is_fast_mem = false; + for (const Edge* edge : n.out_edges()) { + if (edge->IsControlEdge()) continue; + + TF_RETURN_IF_ERROR(ParseAndValidateShardingFromNeighbors( + num_cores_per_replica, n.name(), *edge->dst(), &core, is_fast_mem, + &result)); + + if (!flr) continue; + + // The nodes deciding this arg's device assignment might be in + // FunctionDef. Instantiate FunctionDefs associated with this node + // and check nodes using this arg. + std::function parse_sharding_from_function = + [&](const Edge* call_edge) { + auto associated_functions = GetAssociatedFunctions( + *call_edge->dst(), flr->GetFunctionLibraryDefinition()); + for (auto& associated_function : associated_functions) { + FunctionLibraryRuntime::Handle handle; + TF_RETURN_IF_ERROR(cached_function_handles->GetOrInstantiate( + associated_function.func_name(), + AttrSlice(&associated_function.attrs()), &handle)); + const FunctionBody* body = flr->GetFunctionBody(handle); + Graph* g = body->graph; + + for (Node* body_node : g->nodes()) { + if (!body_node->IsArg()) continue; + + int index; + TF_RETURN_IF_ERROR( + GetNodeAttr(body_node->attrs(), "index", &index)); + if (index != call_edge->dst_input()) continue; + + for (const Edge* out_edge : body_node->out_edges()) { + if (out_edge->IsControlEdge()) continue; + + TF_RETURN_IF_ERROR(ParseAndValidateShardingFromNeighbors( + num_cores_per_replica, n.name(), *out_edge->dst(), &core, + is_fast_mem, &result)); + + TF_RETURN_IF_ERROR(parse_sharding_from_function(out_edge)); + } + } + } + return Status::OK(); + }; + TF_RETURN_IF_ERROR(parse_sharding_from_function(edge)); + } + *output_sharding = result; + return Status::OK(); +} + +bool UseSpmdForXlaPartitioning(const Node* replicate_node) { + bool spmd_attr; + if (!replicate_node || + !TryGetNodeAttr(replicate_node->attrs(), "use_spmd_for_xla_partitioning", + &spmd_attr)) { + spmd_attr = false; + } + return spmd_attr; +} + +Status DistributedTPURewritePass::AssignArgsAndRetvalsToCores( + int num_cores_per_replica, const ParameterInfo& params_info, + const DataTypeVector& arg_types, + const std::vector& arg_shapes, + const DataTypeVector& retval_types, + const std::vector& retval_shapes, const Graph& graph, + const Node* replicate_node, FunctionLibraryRuntime* flr, + std::vector* arg_sharding, std::vector* arg_fast_mem, + std::vector* retval_sharding) { + // Builds vectors of the argument and return nodes. + std::vector args(arg_types.size()); + std::vector retvals(retval_types.size()); + absl::flat_hash_map partitioned_output_nodes; + for (Node* node : graph.op_nodes()) { + if (node->IsArg()) { + int index; + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "index", &index)); + TF_RET_CHECK(index >= 0 && index < args.size()); + args[index] = node; + } else if (node->IsRetval()) { + int index; + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "index", &index)); + TF_RET_CHECK(index >= 0 && index < retvals.size()); + retvals[index] = node; + } + } + for (const Edge* edge : replicate_node->out_edges()) { + int num_partitioned_outputs = 0; + for (const Edge* out_edge : edge->dst()->out_edges()) { + if (out_edge->dst()->type_string() == kTPUPartitionedOutput) { + partitioned_output_nodes[edge->src_output()] = out_edge->dst(); + num_partitioned_outputs++; + } + } + if (num_partitioned_outputs > 1) { + return errors::InvalidArgument( + "More than one TPUPartitionedOutput per replciated output."); + } + } + + // Verifies there are no missing arguments/return values. + for (int i = 0; i < args.size(); ++i) { + if (args[i] == nullptr) { + return errors::Internal("Missing function argument: ", i); + } + } + for (int i = 0; i < retvals.size(); ++i) { + if (retvals[i] == nullptr) { + return errors::Internal("Missing function return value: ", i); + } + } + + // Assigns a core to each _Arg. Chooses the lowest-numbered core that + // consumes the argument. We choose the lowest-numbered core so the + // assignment is deterministic. + TensorDevicePlacer args_device_selector(num_cores_per_replica, arg_types, + arg_shapes); + arg_sharding->resize(args.size()); + arg_fast_mem->resize(args.size()); + CachedFunctionHandles cached_function_handles(flr); + const bool use_spmd = UseSpmdForXlaPartitioning(replicate_node) || + replicate_inputs_outputs_by_default_for_xla_spmd_; + for (int i = 0; i < args.size(); ++i) { + const Node* n = args[i]; + absl::optional assigned_core; + absl::optional sharding; + bool is_fast_mem; + TF_RETURN_IF_ERROR(InferXlaShardingFromNeighbors( + *n, num_cores_per_replica, flr, &cached_function_handles, &sharding, + &is_fast_mem)); + + if (params_info.IsPerReplicaArg(i) || params_info.IsDistributedArg(i)) { + Node* input_node; + TF_RETURN_IF_ERROR(replicate_node->input_node(i, &input_node)); + if (input_node->type_string() == kTPUPartitionedInput) { + TF_ASSIGN_OR_RETURN(absl::optional parsed_sharding, + GetShardingFromNodeDef(input_node->def())); + if (!parsed_sharding.has_value()) + return errors::InvalidArgument("Missing _XlaSharding attr from: ", + input_node->DebugString()); + sharding = parsed_sharding; + VLOG(1) << "Arg " << i << " parsed sharding information from " + << input_node->name() << " : " + << parsed_sharding->DebugString(); + } + } + + if (sharding.has_value() && enable_automatic_model_parallelism_) { + return tensorflow::errors::InvalidArgument( + "Specifying manual sharding is not allowed when automatic " + "model parallelism is enabled.", + sharding->DebugString()); + } + + if (!sharding.has_value()) { + if (use_spmd && + (params_info.IsVariableArg(i) || params_info.IsBroadcastArg(i) || + ((params_info.IsPerReplicaArg(i) || + params_info.IsDistributedArg(i)) && + arg_types[i] != DT_RESOURCE))) { + // Use replication for host variables or non-variable per-replica + // inputs. + sharding = xla::sharding_builder::Replicate(); + } else { + // TODO(dlibenzi): Distributing variables to cores other than 0 makes + // learning/brain/research/babelfish/trainer:trainer_tpu_test fail. + // For now distribute only per replica arguments, unless + // tf_jf_distribute_vars is set, to allow debugging the issue. + if (((params_info.IsPerReplicaArg(i) || + params_info.IsDistributedArg(i)) && + arg_types[i] != DT_RESOURCE) || + (distribute_vars_ && params_info.IsVariableArg(i))) { + assigned_core = args_device_selector.RetrieveAssignment(i); + } else { + assigned_core = 0; + } + sharding = xla::sharding_builder::AssignDevice(*assigned_core); + } + } else if (sharding->type() == xla::OpSharding::MAXIMAL) { + assigned_core = sharding->tile_assignment_devices(0); + } else if (sharding->type() != xla::OpSharding::REPLICATED && + sharding->type() != xla::OpSharding::OTHER) { + return tensorflow::errors::InvalidArgument( + "Unsupported argument sharding: ", sharding->DebugString()); + } + if (assigned_core.has_value()) { + args_device_selector.ReportDeviceAssigned(*assigned_core, i); + VLOG(3) << "Assigning argument " << i << " (" << n->DebugString() + << ") to core " << *assigned_core; + args[i]->set_assigned_device_name(CoreDeviceLabel(*assigned_core)); + } else if (sharding->type() == xla::OpSharding::OTHER) { + for (int64 core : sharding->tile_assignment_devices()) { + args_device_selector.ReportDeviceAssigned(core, i); + VLOG(3) << "Assigning argument " << i << " (" << n->DebugString() + << ") with tiled sharding to core " << core; + } + } else { + CHECK_EQ(sharding->type(), xla::OpSharding::REPLICATED); + for (int64 core = 0; core < num_cores_per_replica; ++core) { + args_device_selector.ReportDeviceAssigned(core, i); + } + VLOG(3) << "Assigning argument " << i << " (" << n->DebugString() + << ") to all cores"; + } + (*arg_sharding)[i] = *sharding; + (*arg_fast_mem)[i] = is_fast_mem; + if (is_fast_mem) { + VLOG(3) << "Add " << TPU_FAST_MEM_ATTR << " attribute to " + << args[i]->name(); + } + args[i]->AddAttr(kShardingAttribute, sharding->SerializeAsString()); + } + TF_RETURN_IF_ERROR(cached_function_handles.ReleaseAllHandles()); + + // Assigns each _Retval node to the core that produces its value. + TensorDevicePlacer retvals_device_selector(num_cores_per_replica, + retval_types, retval_shapes); + retval_sharding->resize(retvals.size()); + for (int i = 0; i < retvals.size(); ++i) { + const Edge* edge; + TF_RETURN_IF_ERROR(retvals[i]->input_edge(0, &edge)); + + TF_ASSIGN_OR_RETURN( + absl::optional sharding, + ParseShardingFromDevice(*edge->src(), num_cores_per_replica)); + + if (partitioned_output_nodes.contains(i)) { + Node* output_node = partitioned_output_nodes[i]; + TF_ASSIGN_OR_RETURN(absl::optional parsed_sharding, + GetShardingFromNodeDef(output_node->def())); + if (parsed_sharding.has_value()) { + sharding = parsed_sharding; + VLOG(1) << "Retval " << i << " parsed sharding information from " + << output_node->name() << " : " << sharding->DebugString(); + } + } + absl::optional assigned_core; + if (sharding.has_value()) { + if (enable_automatic_model_parallelism_) { + return tensorflow::errors::InvalidArgument( + "Specifying manual sharding is not allowed when automatic " + "model parallelism is enabled.", + sharding->DebugString()); + } + + if (sharding.value().type() == xla::OpSharding::MAXIMAL) { + assigned_core = sharding.value().tile_assignment_devices(0); + TF_RETURN_IF_ERROR( + ValidateCoreNumber(*assigned_core, num_cores_per_replica)); + } else if (sharding.value().type() != xla::OpSharding::REPLICATED && + sharding.value().type() != xla::OpSharding::OTHER) { + return tensorflow::errors::InvalidArgument( + "Unsupported argument sharding: ", sharding->DebugString()); + } + } else { + if (use_spmd) { + sharding = xla::sharding_builder::Replicate(); + } else { + if (distribute_vars_) { + assigned_core = retvals_device_selector.RetrieveAssignment(i); + } else { + assigned_core = 0; + } + sharding = xla::sharding_builder::AssignDevice(*assigned_core); + } + } + if (assigned_core.has_value()) { + retvals[i]->set_assigned_device_name(CoreDeviceLabel(*assigned_core)); + retvals_device_selector.ReportDeviceAssigned(*assigned_core, i); + VLOG(3) << "Assigning return value " << i << " (" + << retvals[i]->DebugString() << ") to core " << *assigned_core; + } else if (sharding->type() == xla::OpSharding::OTHER) { + for (int64 core : sharding->tile_assignment_devices()) { + retvals_device_selector.ReportDeviceAssigned(core, i); + VLOG(3) << "Assigning return value " << i << " (" + << retvals[i]->DebugString() << ") with tiled sharding to core " + << core; + } + } else { + CHECK_EQ(sharding->type(), xla::OpSharding::REPLICATED); + for (int64 core = 0; core < num_cores_per_replica; ++core) { + retvals_device_selector.ReportDeviceAssigned(core, i); + } + VLOG(3) << "Assigning return value " << i << " (" + << retvals[i]->DebugString() << ") to all cores."; + } + retvals[i]->AddAttr(kShardingAttribute, sharding->SerializeAsString()); + (*retval_sharding)[i] = *sharding; + } + return Status::OK(); +} + +// Builds Shape nodes that compute the shapes of arguments whose shapes are not +// statically known. +/* static */ Status DistributedTPURewritePass::BuildDynamicShapeNodes( + const Node& replicate_node, const std::vector& arg_shapes, + const ParameterInfo& params_info, const std::vector& variable_reads, + Graph* graph, std::vector* dynamic_shape_nodes) { + dynamic_shape_nodes->clear(); + + std::vector replicate_input_edges; + TF_RETURN_IF_ERROR(replicate_node.input_edges(&replicate_input_edges)); + + // The compiler determines the shape of each constant by inspecting the value + // of its corresponding host-memory tensor; this happens when a step is run. + // As a result, the shapes of constants are not needed at graph rewrite time. + const int num_args = arg_shapes.size() - params_info.NumGuaranteedConstants(); + TF_RET_CHECK(num_args == params_info.NumPerReplicaArgs() + + params_info.NumDistributedArgs() + + params_info.NumBroadcastArgs() + + params_info.NumVariables()); + + for (int i = 0; i < num_args; ++i) { + const PartialTensorShape* shape = arg_shapes[i].handle_type == DT_INVALID + ? &arg_shapes[i].shape + : &arg_shapes[i].handle_shape; + if (!shape->IsFullyDefined()) { + Node* src; + int src_output; + if (params_info.IsPerReplicaArg(i)) { + TF_RET_CHECK(i < replicate_input_edges.size()); + // All replicas must have the same input shapes. Uses the shape of the + // inputs from the first replica. + src = replicate_input_edges[i]->src(); + src_output = replicate_input_edges[i]->src_output(); + } else if (params_info.IsDistributedArg(i) || + params_info.IsBroadcastArg(i)) { + int64 input_num = + params_info.NumPerReplicaArgs() * params_info.NumReplicas() + i - + params_info.NumPerReplicaArgs(); + TF_RET_CHECK(0 <= input_num && + input_num < replicate_input_edges.size()); + src = replicate_input_edges[input_num]->src(); + src_output = replicate_input_edges[input_num]->src_output(); + } else { + int64 var_num = i - params_info.NumPerReplicaArgs() - + params_info.NumDistributedArgs() - + params_info.NumBroadcastArgs(); + TF_RET_CHECK(0 <= var_num && var_num < variable_reads.size()); + src = variable_reads[var_num]; + src_output = 0; + } + + NodeDef def; + def.set_name(graph->NewName(strings::StrCat(src->name(), "/shape"))); + def.set_op("Shape"); + def.set_device(src->assigned_device_name()); + AddNodeAttr("T", src->output_type(src_output), &def); + AddNodeAttr("out_type", DT_INT64, &def); + MergeDebugInfo(NodeDebugInfo(replicate_node.def()), &def); + + Status status; + Node* shape_node = graph->AddNode(def, &status); + if (!status.ok()) return status; + dynamic_shape_nodes->push_back(shape_node); + + shape_node->set_assigned_device_name(src->assigned_device_name()); + graph->AddEdge(src, src_output, shape_node, 0); + } + } + return Status::OK(); +} + +// Builds a TPUCompile node that compiles the bodies of the function call +// `nodes`. +Status DistributedTPURewritePass::BuildCompileNode( + const Node* replicate_node, const NameAttrList& function, + uint64 library_fingerprint, const ParameterInfo& params_info, + const std::vector& arg_shapes, + const DataTypeVector& arg_types, + const std::vector& guaranteed_constant_nodes, + const string& session_handle, + const std::vector& arg_sharding, + const std::vector& arg_fast_mem, + const std::vector& retval_sharding, + int num_cores_per_replica, const string& compile_device, + const xla::DeviceAssignment* xla_device_assignment, + const std::vector& dynamic_shape_nodes, Graph* graph, + Node** compile_node, int64 autotuner_thresh) { + VLOG(1) << "BuildCompileNode"; + + tpu::TPUCompileMetadataProto proto; + proto.set_num_replicas(params_info.NumReplicas()); + proto.set_num_cores_per_replica(num_cores_per_replica); + proto.set_function_library_fingerprint(library_fingerprint); + proto.set_enable_automatic_model_parallelism( + enable_cross_replica_sharding_mirrored_variables_); + const bool use_spmd = UseSpmdForXlaPartitioning(replicate_node); + proto.set_use_spmd_for_xla_partitioning(use_spmd); + + // Get and fill padding map. + if (replicate_node != nullptr) { + TF_RETURN_IF_ERROR( + FillPaddingMap(*replicate_node, proto.mutable_padding_maps())); + xla::DebugOptions::StepMarkerLocation location; + TF_RETURN_IF_ERROR(GetStepMarkerLocation(*replicate_node, &location)); + proto.set_step_marker_location(location); + } + + if (xla_device_assignment != nullptr) { + TF_RETURN_IF_ERROR( + xla_device_assignment->Serialize(proto.mutable_device_assignment())); + } + + const int num_args = arg_types.size(); + const int num_guaranteed_constants = guaranteed_constant_nodes.size(); + const int guaranteed_const_start_index = num_args - num_guaranteed_constants; + TF_RET_CHECK(num_args == arg_shapes.size()); + TF_RET_CHECK(num_args == arg_sharding.size()) + << num_args << " != " << arg_sharding.size(); + + for (int i = 0; i < num_args; ++i) { + tpu::TPUCompileMetadataProto::Arg* arg = proto.add_args(); + DataType type = arg_types[i]; + const InferredShape& arg_shape = arg_shapes[i]; + if (type == DT_RESOURCE) { + TF_RET_CHECK(arg_shape.handle_type != DT_INVALID) << i; + arg->set_dtype(arg_shape.handle_type); + arg_shape.handle_shape.AsProto(arg->mutable_shape()); + arg->set_kind(tpu::TPUCompileMetadataProto::Arg::VARIABLE); + arg->set_fast_mem(arg_fast_mem[i]); + } else { + arg->set_dtype(type); + arg_shape.shape.AsProto(arg->mutable_shape()); + if (i >= guaranteed_const_start_index) { + const DataType edge_type = + guaranteed_constant_nodes[i - guaranteed_const_start_index] + ->output_type(0); + TF_RET_CHECK(type == edge_type) + << "Arg type: " << type << " but edge type: " << edge_type; + arg->set_kind(tpu::TPUCompileMetadataProto::Arg::GUARANTEED_CONSTANT); + } else { + arg->set_kind(tpu::TPUCompileMetadataProto::Arg::PARAMETER); + } + } + // As long as the argument is not a per-replica one, it should have the same + // value for all replicas. For clarity, we keep the (redundant) checks for + // variable, broadcast and constant types, to prevent bugs in case new types + // with different semantics are introduced in the future. + arg->set_is_same_data_across_replicas( + !params_info.IsPerReplicaArg(i) && !params_info.IsDistributedArg(i) && + (params_info.IsVariableArg(i) || params_info.IsBroadcastArg(i) || + params_info.IsConstantArg(i))); + if (params_info.mirrored_variable_indices().count(i) > 0) { + CHECK_EQ(type, DT_RESOURCE); + arg->set_is_same_data_across_replicas(true); + // 64-bit type is not shardable by XLA:TPU yet. + bool sharding_enabled = (arg_shape.handle_type != DT_COMPLEX64 && + arg_shape.handle_type != DT_INT64 && + arg_shape.handle_type != DT_UINT64 && + arg_shape.handle_type != DT_DOUBLE); + arg->set_enable_xla_sharding( + sharding_enabled ? tpu::TPUCompileMetadataProto::Arg::TENTATIVE + : tpu::TPUCompileMetadataProto::Arg::DISALLOWED); + } + *arg->mutable_sharding() = arg_sharding[i]; + } + + const int num_retvals = retval_sharding.size(); + for (int i = 0; i < num_retvals; ++i) { + *proto.add_retvals()->mutable_sharding() = retval_sharding[i]; + } + proto.set_session_handle(session_handle); + + DataTypeVector constant_arg_types; + constant_arg_types.reserve(num_guaranteed_constants); + for (int i = 0; i < num_guaranteed_constants; ++i) { + constant_arg_types.push_back(arg_types[guaranteed_const_start_index + i]); + } + proto.set_xla_fusion_autotuner_thresh(autotuner_thresh); + + string metadata; + proto.SerializeToString(&metadata); + + NodeDef def; + def.set_name(UniqueNodeName("TPUReplicate/_compile", graph)); + def.set_op("TPUCompile"); + def.set_device(compile_device); + if (replicate_node) { + MergeDebugInfo(NodeDebugInfo(replicate_node->def()), &def); + } + + AddNodeAttr("function", function, &def); + AddNodeAttr("num_computations", num_cores_per_replica, &def); + AddNodeAttr("NumDynamicShapes", static_cast(dynamic_shape_nodes.size()), + &def); + AddNodeAttr("metadata", metadata, &def); + AddNodeAttr("Tguaranteed_constants", constant_arg_types, &def); + + Status status; + *compile_node = graph->AddNode(def, &status); + TF_RETURN_IF_ERROR(status); + + (*compile_node)->set_assigned_device_name(compile_device); + + for (int i = 0; i < dynamic_shape_nodes.size(); ++i) { + graph->AddEdge(dynamic_shape_nodes[i], 0, *compile_node, i); + } + + for (int i = 0; i < num_guaranteed_constants; ++i) { + graph->AddEdge(guaranteed_constant_nodes[i], 0, *compile_node, + dynamic_shape_nodes.size() + i); + } + VLOG(1) << "BuildCompileNode(): " << status; + return status; +} + +Status DistributedTPURewritePass::FindGuaranteedConstantInputs( + const Node& node, const NameRangeMap& input_range_map, + std::vector* guaranteed_constants) { + std::vector input_edges; + TF_RETURN_IF_ERROR(node.input_edges(&input_edges)); + std::pair variables_limits = + input_range_map.at("guaranteed_constants"); + for (int i = variables_limits.first; i < variables_limits.second; ++i) { + guaranteed_constants->push_back(input_edges[i]->src()); + } + return Status::OK(); +} + +Status DistributedTPURewritePass::FindVariableInputs( + const Node& node, const NameRangeMap& input_range_map, + std::vector* variables) { + std::vector input_edges; + TF_RETURN_IF_ERROR(node.input_edges(&input_edges)); + std::pair variables_limits = input_range_map.at("variables"); + for (int i = variables_limits.first; i < variables_limits.second; ++i) { + Node* node = input_edges[i]->src(); + + // Find the type of the VarHandleOp that feeds this node, looking through + // any wrapping Enter or Switch nodes. + while (node->IsEnter() || node->IsSwitch()) { + TF_RETURN_IF_ERROR(node->input_node(0, &node)); + } + // Fix the variable device assignment if it is requested with a full name. + if (!node->has_assigned_device_name() && + !node->requested_device().empty()) { + DeviceNameUtils::ParsedName var_device; + TF_RET_CHECK(DeviceNameUtils::ParseFullName(node->requested_device(), + &var_device)); + if (var_device.has_job && var_device.has_replica && var_device.has_task && + var_device.has_type && var_device.has_id) { + node->set_assigned_device_name(node->requested_device()); + if (node != input_edges[i]->src() && + !input_edges[i]->src()->has_assigned_device_name()) { + input_edges[i]->src()->set_assigned_device_name( + node->requested_device()); + } + } + } + if (node->type_string() == "VarHandleOp") { + DataType dtype; + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "dtype", &dtype)); + variables->push_back(VariableInput{input_edges[i]->src(), + input_edges[i]->src_output(), dtype}); + } else if (node->type_string() == "_Arg") { + std::vector dtypes; + TF_RETURN_IF_ERROR(GetNodeAttr(node->attrs(), "_handle_dtypes", &dtypes)); + if (dtypes.empty()) { + return errors::Internal( + "_Arg node with resource output must have non-empty _handle_dtypes " + "attribute: ", + node->DebugString()); + } + variables->push_back(VariableInput{ + input_edges[i]->src(), input_edges[i]->src_output(), dtypes[0]}); + } else { + return errors::Internal( + "Cannot handle variable input with node type other than VarHandleOp " + "and _Arg: ", + node->DebugString()); + } + } + return Status::OK(); +} + +// Builds a NoOp node, used for building control dependencies. +static Status BuildNoopNode(const Node& source, StringPiece name, + const string& device, Graph* graph, Node** node) { + NodeDefBuilder builder(name, "NoOp", NodeDebugInfo(source)); + if (!device.empty()) { + builder.Device(device); + } + NodeDef def; + TF_RETURN_IF_ERROR(builder.Finalize(&def)); + + Status status; + *node = graph->AddNode(def, &status); + if (!device.empty()) { + (*node)->set_assigned_device_name(device); + } + return status; +} + +Status DistributedTPURewritePass::ConnectHostComputeNodes( + Node* compile_node, Node* key_placeholder_node, Graph* graph) { + // First find all the downstream nodes of the key placeholder node, since we + // want to delete the connecting edges from key_placeholder_node which would + // invalidate the out_nodes iterator. + std::vector host_transfer_nodes; + for (Node* node : key_placeholder_node->out_nodes()) { + host_transfer_nodes.push_back(node); + } + for (Node* node : host_transfer_nodes) { + int input_index = -1; + for (int i = 0; i < node->num_inputs(); i++) { + const Edge* e; + TF_RETURN_IF_ERROR(node->input_edge(i, &e)); + if (e->src() == key_placeholder_node) { + if (input_index != -1) { + return errors::Internal( + "Node ", node->name(), + " has multiple input edges from key placeholder node"); + } + input_index = e->dst_input(); + } + } + if (input_index == -1) { + return errors::Internal("Node ", node->name(), + " has no input edge from key placeholder node"); + } + const Edge* key_edge; + TF_RETURN_IF_ERROR(node->input_edge(input_index, &key_edge)); + graph->RemoveEdge(key_edge); + graph->AddEdge(compile_node, 1, node, input_index); + } + graph->RemoveNode(key_placeholder_node); + return Status::OK(); +} + +Status DistributedTPURewritePass::BuildVariableReads( + absl::Span variables, Node* control_predecessor, + Graph* graph, std::vector* variable_reads) { + variable_reads->resize(variables.size()); + for (int i = 0; i < variables.size(); ++i) { + string name = + graph->NewName(strings::StrCat(variables[i].node->name(), "/read")); + NodeDefBuilder builder(name, "ReadVariableOp", + NodeDebugInfo(*variables[i].node)); + + builder.Attr("dtype", variables[i].dtype); + builder.Device(variables[i].node->assigned_device_name()); + builder.Input(variables[i].node->name(), 0, DT_RESOURCE); + NodeDef def; + TF_RETURN_IF_ERROR(builder.Finalize(&def)); + + Status status; + Node* read_node; + (*variable_reads)[i] = read_node = graph->AddNode(def, &status); + if (!status.ok()) return status; + + read_node->set_requested_device(variables[i].node->requested_device()); + read_node->set_assigned_device_name( + variables[i].node->assigned_device_name()); + graph->AddEdge(variables[i].node, variables[i].index, read_node, 0); + + graph->AddControlEdge(control_predecessor, read_node); + } + return Status::OK(); +} + +bool DistributedTPURewritePass::ContainsResourceWriteOp( + const Graph& graph, const FunctionLibraryDefinition& fld) { + for (const Node* n : graph.nodes()) { + const XlaResourceOpInfo* op_info = GetResourceOpInfoForOp(n->type_string()); + if (op_info && op_info->kind() != XlaResourceOpKind::kRead) { + VLOG(2) << "Found write resource op inside computation"; + return true; + } + } + for (const string& func_name : fld.ListFunctionNames()) { + const FunctionDef* func_def = fld.Find(func_name); + for (const NodeDef& n : func_def->node_def()) { + const XlaResourceOpInfo* op_info = GetResourceOpInfoForOp(n.op()); + if (op_info && op_info->kind() != XlaResourceOpKind::kRead) { + VLOG(2) << "Found write resource op inside " << func_name; + return true; + } + } + } + return false; +} + +Status DistributedTPURewritePass::BuildVariableWrites( + absl::Span variables, Node* control_successor, + absl::Span variable_writes, Graph* graph) { + CHECK_EQ(variables.size(), variable_writes.size()); + for (int i = 0; i < variables.size(); ++i) { + const VariableWrite& write = variable_writes[i]; + NodeDebugInfo debug_info(*variables[i].node); + + auto name = [&](string suffix) { + return graph->NewName( + strings::StrCat(variables[i].node->name(), "/", suffix)); + }; + + Node* write_node; + TF_RETURN_IF_ERROR( + IncompleteNodeDefBuilder(name("assign"), "AssignVariableOp", debug_info) + .AddAttr("dtype", variables[i].dtype) + .Device(variables[i].node->assigned_device_name()) + .Build(graph, &write_node)); + + // Colocate the control flow with the variable. + CondBuilder cb(variables[i].node->name(), + variables[i].node->assigned_device_name(), debug_info, + graph); + + // Inputs to conditional. + Node* switch_val; + TF_RETURN_IF_ERROR( + cb.AddInput("switch_val", variables[i].dtype, + /*device=*/write.value->assigned_device_name(), debug_info, + &switch_val)); + Node* switch_var; + TF_RETURN_IF_ERROR( + cb.AddInput("switch_var", DT_RESOURCE, + /*device=*/variables[i].node->assigned_device_name(), + debug_info, &switch_var)); + // Conditionally write the value back. + graph->AddEdge(variables[i].node, variables[i].index, switch_var, 0); + graph->AddEdge(switch_var, CondBuilder::kThenBranch, write_node, 0); + graph->AddEdge(switch_val, CondBuilder::kThenBranch, write_node, 1); + // Add control edge from the write to value that will be merged. There is no + // output from the write so this control edge ensures the write completes. + graph->AddControlEdge(write_node, cb.switch_t()); + + graph->AddControlEdge(cb.control_successor(), control_successor); + + graph->AddEdge(write.predicate, write.predicate_output, cb.pred(), 0); + graph->AddEdge(write.value, write.value_output, switch_val, 0); + } + return Status::OK(); +} + +namespace { + +// Helper that creates an IdentityN node containing all of the variables +// values on CPU device 'device', except for those that will be split across +// cores. (For split variables, this may cause additional cross-host data +// transfers if more than 1 devices share the same variable partition on a +// remote host.) +// +// A previous iteration of this code built one Identity node per TPU core per +// variable, but this can rapidly become hundreds of thousands of nodes. This +// formulation creates a single IdentityN node containing all of the variables +// on each host. This may cause some unnecessary variable copies if only a +// subset of hosts consume a given variable, but has the virtue of being +// simple, and most models use pure replication where all cores want all the +// variables. +// +// Returns the node and its output index to be consumed by TPUExecute for the +// requested variable index. +xla::StatusOr CreateOrGetPerHostVariableCopy( + const string& host_cpu_device, int64 var_index, + const std::vector& variable_reads, + const DistributedTPURewritePass::ParameterInfo& params_info, + const std::vector& arg_shardings, + const Node& replicate_node, + absl::flat_hash_map>* per_host_var_copies, + Graph* graph) { + auto it = per_host_var_copies->find(host_cpu_device); + if (it != per_host_var_copies->end()) { + return it->second[var_index]; + } + + DataTypeVector dtypes; + // Per-variable data source for TPUExecute. + std::vector index_mapping; + index_mapping.reserve(variable_reads.size()); + dtypes.reserve(variable_reads.size()); + for (int64 i = 0; i < variable_reads.size(); ++i) { + Node* read = variable_reads[i]; + int64 orig_arg_num = + i + params_info.NumPerReplicaArgs() + params_info.NumBroadcastArgs(); + if (arg_shardings[orig_arg_num].type() != xla::OpSharding::OTHER) { + // We haven't built the IdentityN node yet, so temporarily use nullptr. + index_mapping.push_back( + NodeOut{nullptr, static_cast(dtypes.size())}); + dtypes.push_back(read->output_type(0)); + } else { + // Do not copy the full tensor of partitioned variables. + index_mapping.push_back(NodeOut{read, 0}); + } + } + NodeDef ndef; + ndef.set_name( + graph->NewName(absl::StrCat(replicate_node.name(), "/_variable_copy"))); + ndef.set_op("IdentityN"); + ndef.set_device(host_cpu_device); + AddNodeAttr("T", dtypes, &ndef); + Status s; + Node* id_node = graph->AddNode(ndef, &s); + TF_RETURN_IF_ERROR(s); + id_node->set_assigned_device_name(host_cpu_device); + + for (int64 i = 0; i < variable_reads.size(); ++i) { + if (index_mapping[i].node == nullptr) { + // Fill index_mapping with the actual IdentityN node. + index_mapping[i].node = id_node; + // Add the edge to id_node. + graph->AddEdge(variable_reads[i], 0, id_node, index_mapping[i].index); + } + } + + auto result = index_mapping[var_index]; + (*per_host_var_copies)[host_cpu_device] = std::move(index_mapping); + return result; +} + +} // namespace + +Status DistributedTPURewritePass::BuildExecuteNodes( + const ParameterInfo& params_info, int num_tasks, int num_cores_per_replica, + const Node& replicate_node, const DataTypeVector& arg_types, + const std::vector& arg_shapes, + const DataTypeVector& retval_types, + const std::vector& arg_shardings, + const std::vector& retval_shardings, + const std::vector>& tpu_device_names, + Node* compile_node, const std::vector& variable_reads, + Node* control_predecessor, Node* control_successor, + std::vector* variable_writes, Graph* graph) { + VLOG(1) << "BuildExecuteNodes " << replicate_node.DebugString(); + TF_RET_CHECK(params_info.NumReplicas() == tpu_device_names.size()); + + const int num_variables = variable_reads.size(); + const int num_retvals_per_replica = retval_types.size(); + + variable_writes->resize(num_variables); + + std::vector replicate_input_edges; + TF_RETURN_IF_ERROR(replicate_node.input_edges(&replicate_input_edges)); + + // Map from replicate input index to the fan_in node; + absl::flat_hash_map> replicate_input_fan_in_nodes; + absl::flat_hash_map> replicate_output_fan_out_nodes; + absl::flat_hash_map> + replicate_output_fan_out_dst_inputs; + std::vector to_be_removed_nodes; + + for (const Edge* e : replicate_input_edges) { + if (e->src()->type_string() == kTPUPartitionedInput) { + int num_users = 0; + for (const auto& ue : e->src()->out_edges()) { + if (!ue->IsControlEdge()) ++num_users; + } + if (num_users != 1) { + return tensorflow::errors::InvalidArgument( + e->src()->name(), " must only have one user. Found ", num_users); + } + to_be_removed_nodes.push_back(e->src()); + std::vector& nodes = replicate_input_fan_in_nodes[e->dst_input()]; + nodes.resize(num_cores_per_replica, nullptr); + VLOG(2) << "allocate " << num_cores_per_replica + << " for replicate_input_fan_in_nodes[" << e->dst_input() << "]"; + std::vector fan_in_edges; + TF_RETURN_IF_ERROR(e->src()->input_edges(&fan_in_edges)); + TF_RET_CHECK(fan_in_edges.size() == num_cores_per_replica); + + for (const Edge* fe : fan_in_edges) { + nodes[fe->dst_input()] = fe->src(); + VLOG(2) << "replicate_input_fan_in_nodes[" << e->dst_input() << "][" + << fe->dst_input() << "] = " << fe->src()->name(); + } + } + } + + // Replicate output edges are sorted by replica id and then by outputs for + // each replica. For example, if TPU Computation has outputs (output_1, + // output_2, and output_3) and number of replicas is 2, then + // replicate_output_edges order would be: + // output_1_replica_1, output_2_replica_1, output_3_replica_1, + // output_1_replica_2, output_2_replica_2, output_3_replica_2. + std::vector replicate_output_edges(replicate_node.num_outputs(), + nullptr); + for (const Edge* edge : replicate_node.out_edges()) { + if (edge->IsControlEdge()) continue; + + int num_partitioned_outputs = 0; + + for (const Edge* out_edge : edge->dst()->out_edges()) { + if (out_edge->dst()->type_string() == kTPUPartitionedOutput) { + num_partitioned_outputs++; + // Paths between replicate_node and replicate_output_fan_out_nodes: + // ReplicateNode->TpuOutIdenity->kTPUPartitionedOutput->fan-out-nodes + TF_RET_CHECK(edge->dst()->out_edges().size() == 1); + to_be_removed_nodes.push_back(edge->dst()); + to_be_removed_nodes.push_back(out_edge->dst()); + // Get the right replicated id from the replicate_output_edge. + std::vector& nodes = + replicate_output_fan_out_nodes[edge->src_output()]; + std::vector& dst_inputs = + replicate_output_fan_out_dst_inputs[edge->src_output()]; + nodes.resize(num_cores_per_replica, nullptr); + dst_inputs.resize(num_cores_per_replica, 0); + TF_RET_CHECK(out_edge->dst()->out_edges().size() == + num_cores_per_replica); + + for (const Edge* fe : out_edge->dst()->out_edges()) { + nodes[fe->src_output()] = fe->dst(); + dst_inputs[fe->src_output()] = fe->dst_input(); + VLOG(2) << "replicate_output_fan_out_nodes[" << out_edge->src_output() + << "][" << fe->src_output() + << "] = " << fe->dst()->DebugString() << " with dst_input " + << fe->dst_input(); + } + } + } + replicate_output_edges[edge->src_output()] = edge; + if (num_partitioned_outputs > 1) { + return errors::InvalidArgument( + "More than one TPUPartitionedOutput per replciated output."); + } + } + + const int num_execute_args = + arg_shardings.size() - params_info.NumGuaranteedConstants(); + // Inverts the arg_shardings and retval_shardings mappings to + // form core -> {argument number} maps. + std::vector> core_arg_nums(num_cores_per_replica); + for (int i = 0; i < num_execute_args; ++i) { + const auto& sharding = arg_shardings[i]; + if (sharding.type() == xla::OpSharding::MAXIMAL) { + int core = sharding.tile_assignment_devices(0); + TF_RETURN_IF_ERROR(ValidateCoreNumber(core, num_cores_per_replica)); + core_arg_nums[core].push_back(i); + } else if (sharding.type() == xla::OpSharding::OTHER) { + for (int64 core : sharding.tile_assignment_devices()) { + core_arg_nums[core].push_back(i); + } + } else if (sharding.type() == xla::OpSharding::REPLICATED) { + for (int core = 0; core < num_cores_per_replica; ++core) { + core_arg_nums[core].push_back(i); + } + } else { + return tensorflow::errors::InvalidArgument( + "Unsupported argument sharding: ", sharding.DebugString()); + } + } + std::vector> core_retval_nums(num_cores_per_replica); + for (int i = 0; i < retval_shardings.size(); ++i) { + const auto& sharding = retval_shardings[i]; + if (sharding.type() == xla::OpSharding::MAXIMAL) { + int core = sharding.tile_assignment_devices(0); + TF_RETURN_IF_ERROR(ValidateCoreNumber(core, num_cores_per_replica)); + core_retval_nums[core].push_back(i); + } else if (sharding.type() == xla::OpSharding::REPLICATED) { + for (int core = 0; core < num_cores_per_replica; ++core) { + core_retval_nums[core].push_back(i); + } + } else if (sharding.type() == xla::OpSharding::OTHER) { + for (int64 core : sharding.tile_assignment_devices()) { + core_retval_nums[core].push_back(i); + } + } else { + return tensorflow::errors::InvalidArgument( + "Unsupported argument sharding: ", sharding.DebugString()); + } + } + + // Maps host device name to a list of per-variable pairs (variable_copy_node, + // output_index_of_copy_node). + absl::flat_hash_map> per_host_var_copies; + + // Mapping from original resource arg number to a second level map. Second + // level map is from core id to output index of updated variable value. + absl::flat_hash_map> + orig_arg_num_to_output_index_mapping; + // Mapping from retval index to a second level map. Second level map is from + // core id to output index of sharded output value. + std::unordered_map> + retval_index_to_output_index_mapping; + + // Represents mapping of argument index of sharded input to each + // TPUExecute node to its corresponding Split node and its output index + // from which sharded input will be fed into TPUExecute node. + std::map input_index_to_sharded_inputs; + + // Builds one TPUExecute node per core per replica. + std::vector> execute_nodes(params_info.NumReplicas()); + for (int core = 0; core < num_cores_per_replica; ++core) { + DataTypeVector core_retval_types; + for (int output : core_retval_nums[core]) { + core_retval_types.push_back(retval_types[output]); + } + DataTypeVector core_arg_types; + std::vector core_variable_writes; + for (int input : core_arg_nums[core]) { + // Resource variables can be passed either by reference (as a DT_RESOURCE) + // tensor or by value (as the variable's current value). Per-replica or + // distributed resource arguments are always passed by reference and + // broadcast variables are always passed by value. + if (arg_types[input] == DT_RESOURCE && + !params_info.IsPerReplicaArg(input) && + !params_info.IsDistributedArg(input)) { + DataType handle_type = arg_shapes[input].handle_type; + TF_RET_CHECK(handle_type != DT_INVALID) << DataTypeString(handle_type); + core_arg_types.push_back(handle_type); + int base = input - params_info.NumPerReplicaArgs() - + params_info.NumDistributedArgs() - + params_info.NumBroadcastArgs(); + // Variables passed by value will have a corresponding additional output + // containing an updated value for the variable. + core_variable_writes.push_back(base); + core_retval_types.push_back(handle_type); + } else { + core_arg_types.push_back(arg_types[input]); + } + } + + NodeDef def; + def.set_op("TPUExecute"); + MergeDebugInfo(NodeDebugInfo(replicate_node.def()), &def); + AddNodeAttr("Targs", core_arg_types, &def); + AddNodeAttr("Tresults", core_retval_types, &def); + + for (int64 replica = 0; replica < params_info.NumReplicas(); ++replica) { + def.set_name(strings::StrCat(replicate_node.name(), "/_execute_", replica, + "_", core)); + + Status status; + Node* node = graph->AddNode(def, &status); + if (!status.ok()) return status; + execute_nodes[replica].push_back(node); + + node->set_assigned_device_name(tpu_device_names[replica][core]); + + // Add control edges to ensure that execution happens after + // `control_predecessor`, happens before `control_successor`, and is + // triggered by evaluating any operator that depends on the original + // TPUReplicate operator. See the comment at the top of the header file + // for more details. + graph->AddControlEdge(control_predecessor, node); + graph->AddControlEdge(node, control_successor); + + // Add data input edges. + for (int64 i = 0; i < core_arg_nums[core].size(); ++i) { + int64 orig_arg_num = core_arg_nums[core][i]; + VLOG(2) << " replica " << replica << " core " << core << " i " << i + << " orig_arg_num " << orig_arg_num; + if (params_info.IsPerReplicaArg(orig_arg_num) || + params_info.IsDistributedArg(orig_arg_num)) { + // Per-replica input and distributed input + int64 input_num = params_info.IsPerReplicaArg(orig_arg_num) + ? replica * params_info.NumPerReplicaArgs() + + core_arg_nums[core][i] + : params_info.NumReplicas() * + params_info.NumPerReplicaArgs() + + core_arg_nums[core][i] - + params_info.NumPerReplicaArgs(); + + const Edge* edge = replicate_input_edges[input_num]; + VLOG(2) << "replicate_input_edges[" << input_num << "]"; + DataType dtype = edge->src()->output_type(edge->src_output()); + if (dtype == DT_RESOURCE) { + DataType handle_dtype = arg_shapes[orig_arg_num].handle_type; + if (std::find(kTpuAllTypes.begin(), kTpuAllTypes.end(), + handle_dtype) == kTpuAllTypes.end()) { + return errors::InvalidArgument( + "Unsupported resource variable data type for TPU: ", + DataTypeString(handle_dtype), ", caused by output ", + edge->src()->name(), ":", edge->src_output()); + } + } else { + if (std::find(kTpuAllTypes.begin(), kTpuAllTypes.end(), dtype) == + kTpuAllTypes.end()) { + return errors::InvalidArgument( + "Unsupported data type for TPU: ", DataTypeString(dtype), + ", caused by output ", edge->src()->name(), ":", + edge->src_output()); + } + } + if (arg_shardings[orig_arg_num].type() == xla::OpSharding::OTHER) { + // Don't automatically add a split node when input node is + // kTPUPartitionedInput + if (edge->src()->type_string() == kTPUPartitionedInput) { + VLOG(2) << "Connect " + << replicate_input_fan_in_nodes[input_num][core]->name() + << " to " << node->name() << " at " << i; + graph->AddEdge(replicate_input_fan_in_nodes[input_num][core], 0, + node, i); + } else { + if (dtype == DT_RESOURCE) { + return errors::InvalidArgument( + "Tiled sharding for per-replica DT_RESOURCE input must", + "be TPUPartitionedInput. Here got ", + edge->src()->type_string()); + } + const xla::OpSharding& sharding = arg_shardings[orig_arg_num]; + + // Create or get the Split node. + TF_ASSIGN_OR_RETURN( + ShardedInputInfo sharded_input_info, + CreateOrGetSplitNodesForInputSharding( + sharding, orig_arg_num, dtype, replica, + edge->src_output(), edge->src(), control_predecessor, + graph, &input_index_to_sharded_inputs)); + + // Calculate which output we should receive from the Split node. + absl::optional output_index = + GetCoreIndexInSharding(sharding, core); + TF_RET_CHECK(output_index); + + NodeOut split_node_and_index = + sharded_input_info.sharded_inputs.at(output_index.value()); + // Connect with Split node output. + graph->AddEdge(split_node_and_index.node, + split_node_and_index.index, node, i); + } + } else if (edge->src()->type_string() == kTPUPartitionedInput && + arg_shardings[orig_arg_num].type() == + xla::OpSharding::REPLICATED) { + graph->AddEdge(replicate_input_fan_in_nodes[input_num][core], 0, + node, i); + } else { + graph->AddEdge(edge->src(), edge->src_output(), node, i); + } + } else if (params_info.IsBroadcastArg(orig_arg_num)) { + // Broadcast input. + int64 input_num = params_info.FirstBroadcastArgFromHost() + + core_arg_nums[core][i] - + params_info.NumPerReplicaArgs() - + params_info.NumDistributedArgs(); + const Edge* edge = replicate_input_edges[input_num]; + DataType dtype = edge->src()->output_type(edge->src_output()); + if (std::find(kTpuAllTypes.begin(), kTpuAllTypes.end(), dtype) == + kTpuAllTypes.end()) { + return errors::InvalidArgument( + "Unsupported data type for TPU: ", DataTypeString(dtype), + ", caused by output ", edge->src()->name(), ":", + edge->src_output()); + } + graph->AddEdge(edge->src(), edge->src_output(), node, i); + } else { + // Variable input. + int64 variable_num = orig_arg_num - params_info.NumPerReplicaArgs() - + params_info.NumDistributedArgs() - + params_info.NumBroadcastArgs(); + TF_RET_CHECK(variable_num < num_variables); + + Node* variable_read = variable_reads[variable_num]; + DataType dtype = variable_read->output_type(0); + if (std::find(kTpuAllTypes.begin(), kTpuAllTypes.end(), dtype) == + kTpuAllTypes.end()) { + return errors::InvalidArgument( + "Unsupported resource variable data type for TPU: ", + DataTypeString(dtype), ", caused by ReadVariableOp ", + variable_read->DebugString()); + } + DeviceNameUtils::ParsedName requested_device; + string requested = variable_read->requested_device(); + TF_RET_CHECK( + DeviceNameUtils::ParseFullName(requested, &requested_device)); + if (requested_device.type != "TPU") { + // Stage the value via the CPU device on the remote host. The graph + // partitioner will introduce an intermediate copy rather than + // copying the same tensor multiple times across the network, and we + // would prefer that intermediate copy to be in host memory to avoid + // running out of memory if the TPUExecute op on the staging device + // starts running before the _Send ops to the other TPU devices on + // the same host complete. We don't do this if the variables are + // already placed on TPU, otherwise it will cause an unnecessary + // round trip copy. + // TODO(b/79580121): give each replica its own on-device variable + // replica and then delete this code. + string device; + TF_RETURN_IF_ERROR(DeviceNameUtils::DeviceNameToCpuDeviceName( + tpu_device_names[replica][core], &device)); + TF_ASSIGN_OR_RETURN(auto var_data, + CreateOrGetPerHostVariableCopy( + device, variable_num, variable_reads, + params_info, arg_shardings, replicate_node, + &per_host_var_copies, graph)); + + if (arg_shardings[orig_arg_num].type() == xla::OpSharding::OTHER) { + const xla::OpSharding& sharding = arg_shardings[orig_arg_num]; + // Create or get the Split node. + TF_ASSIGN_OR_RETURN( + ShardedInputInfo sharded_input_info, + CreateOrGetSplitNodesForInputSharding( + sharding, orig_arg_num, + arg_shapes[orig_arg_num].handle_type, replica, + var_data.index, var_data.node, control_predecessor, graph, + &input_index_to_sharded_inputs)); + + // Calculate which output we should receive from the Split node. + absl::optional output_index = + GetCoreIndexInSharding(sharding, core); + TF_RET_CHECK(output_index); + NodeOut split_node_and_index = + sharded_input_info.sharded_inputs[output_index.value()]; + // Connect with Split node output. + graph->AddEdge(split_node_and_index.node, + split_node_and_index.index, node, i); + + } else { + graph->AddEdge(var_data.node, var_data.index, node, i); + } + } else { + graph->AddEdge(variable_reads[variable_num], 0, node, i); + } + } + } + + // Adds a program input edge from the compiler. + graph->AddEdge(compile_node, core + 1, node, node->num_inputs() - 1); + + // Add data output edges. + int num_outputs = core_retval_nums[core].size(); + for (int i = 0; i < num_outputs; ++i) { + int output_num = + replica * num_retvals_per_replica + core_retval_nums[core][i]; + const auto& sharding = retval_shardings[core_retval_nums[core][i]]; + if (sharding.type() == xla::OpSharding::OTHER) { + int retval_index = core_retval_nums[core][i]; + retval_index_to_output_index_mapping[retval_index][core] = i; + bool is_last_core = + core == + *std::max_element(sharding.tile_assignment_devices().begin(), + sharding.tile_assignment_devices().end()); + bool isPartitionOutNode = false; + + const Edge* e = replicate_output_edges[output_num]; + const Edge* e_out; + for (const Edge* out_edge : e->dst()->out_edges()) { + if (out_edge->dst()->type_string() == kTPUPartitionedOutput) { + isPartitionOutNode = true; + e_out = out_edge; + } + } + if (isPartitionOutNode) { + graph->AddEdge( + node, i, replicate_output_fan_out_nodes[output_num][core], + replicate_output_fan_out_dst_inputs[output_num][core]); + VLOG(2) << "Connect " << node->name() << " at " << i << " to " + << replicate_output_fan_out_nodes[output_num][core]->name() + << " at " + << replicate_output_fan_out_dst_inputs[output_num][core]; + if (is_last_core) { + graph->RemoveEdge(e); + graph->RemoveEdge(e_out); + } + continue; + } + + // Do this in the iteration of last core in tile assignment, so all + // TPUExecute nodes have been created. + if (!is_last_core) { + continue; + } + + // Add a Concat node. + std::vector orig_inputs; + for (int64 core_id : sharding.tile_assignment_devices()) { + int core_retval_index = + retval_index_to_output_index_mapping[retval_index][core_id]; + orig_inputs.push_back( + NodeOut{execute_nodes[replica][core_id], + static_cast( + core_retval_nums[core_id][core_retval_index])}); + } + DataType dtype = e->src()->output_type(e->src_output()); + TF_ASSIGN_OR_RETURN( + Node * concat_node, + CreateConcatNodesForRetval(sharding, dtype, replica, orig_inputs, + graph, /*device=*/"")); + + const Edge* edge = replicate_output_edges[output_num]; + Node* dst = edge->dst(); + int dst_input = edge->dst_input(); + graph->RemoveEdge(edge); + graph->AddEdge(concat_node, 0, dst, dst_input); + + continue; + } + + // If this is a replicated output, outputs on all cores will be the + // same, and we only take the output from core 0. + if (sharding.type() == xla::OpSharding::REPLICATED && core != 0) { + continue; + } + + // If output has maximal sharding, make sure we only use output from + // TPUExecute node with logical core id equal to core id defined by the + // xla sharding. + if (sharding.type() == xla::OpSharding::MAXIMAL && + core != sharding.tile_assignment_devices(0)) { + continue; + } + + const Edge* replicate_edge_to_replace = + replicate_output_edges[output_num]; + Node* dst = replicate_edge_to_replace->dst(); + int dst_input = replicate_edge_to_replace->dst_input(); + graph->RemoveEdge(replicate_edge_to_replace); + graph->AddEdge(node, i, dst, dst_input); + } + + // Feed the updated variable values from the first replica to the + // variable write nodes. + if (replica == 0) { + for (int i = 0; i < core_variable_writes.size(); ++i) { + int orig_arg_num = + core_variable_writes[i] + params_info.NumPerReplicaArgs() + + params_info.NumDistributedArgs() + params_info.NumBroadcastArgs(); + const auto& sharding = arg_shardings[orig_arg_num]; + // If this is a tiling sharded variable, concat variable updates from + // all cores. + if (sharding.type() == xla::OpSharding::OTHER) { + orig_arg_num_to_output_index_mapping[orig_arg_num][core] = i; + + // Do this in the iteration of last core in tile assignment, so all + // TPUExecute nodes have been created. + if (core != + *std::max_element(sharding.tile_assignment_devices().begin(), + sharding.tile_assignment_devices().end())) { + continue; + } + + // Add a Concat node. + std::vector orig_inputs; + for (int64 core_id : sharding.tile_assignment_devices()) { + int core_retval_num = + orig_arg_num_to_output_index_mapping[orig_arg_num][core_id]; + orig_inputs.push_back( + NodeOut{execute_nodes[0][core_id], + static_cast(core_retval_nums[core_id].size() + + core_retval_num)}); + } + + // Use the variable read's device for the concat. They should both + // be collocated with the variable. + absl::string_view device = + variable_reads[core_variable_writes[i]]->assigned_device_name(); + TF_ASSIGN_OR_RETURN( + Node * concat_node, + CreateConcatNodesForRetval( + sharding, arg_shapes[orig_arg_num].handle_type, replica, + orig_inputs, graph, device)); + // Populate VariableWrite. + VariableWrite& write = variable_writes->at(core_variable_writes[i]); + write.value = concat_node; + write.value_output = 0; + write.predicate = compile_node; + write.predicate_output = num_cores_per_replica + core + 1; + + continue; + } + + // If this is a replicated variable, outputs on all cores will be the + // same, and we only take the output from core 0 for the varialbe + // update. + if (sharding.type() == xla::OpSharding::REPLICATED && core != 0) { + continue; + } + VariableWrite& write = variable_writes->at(core_variable_writes[i]); + write.value = node; + write.value_output = num_outputs + i; + write.predicate = compile_node; + write.predicate_output = num_cores_per_replica + core + 1; + } + } + } + } + + for (Node* node : to_be_removed_nodes) { + graph->RemoveNode(node); + } + return Status::OK(); +} + +/* static */ Status DistributedTPURewritePass::CopyOutsideCompilationNodes( + int replica_index, const std::vector& outside_compilation_nodes, + const DeviceNameUtils::ParsedName& tpu_device, + const DeviceNameUtils::ParsedName& partial_device, + NodeToNodeReplicasMap* node_images, Graph* graph) { + for (Node* node : outside_compilation_nodes) { + NodeDef image_def = node->def(); + MergeDebugInfo(NodeDebugInfo(node->def()), &image_def); + const string suffix = strings::StrCat("/R", replica_index); + // In addition to node name, make the frame name unique to avoid multiple + // LoopCond nodes in one frame. + TF_RETURN_IF_ERROR( + AddPrefixAndSuffixToNode("" /* prefix */, suffix, &image_def)); + Status status; + Node* image = graph->AddNode(image_def, &status); + image->AddAttr(kXlaReplicaIdAttrName, replica_index); + TF_RETURN_IF_ERROR(status); + if (HasNodeAttr(image->def(), kXlaHasHostTransferAttrName)) { + TF_RETURN_IF_ERROR( + SetNodeDeviceForTPUCommunication(tpu_device, DEVICE_CPU, image)); + } else { + const string& original_device_string = + node->assigned_device_name().empty() ? node->requested_device() + : node->assigned_device_name(); + DeviceNameUtils::ParsedName device; + TF_RET_CHECK( + DeviceNameUtils::ParseFullName(original_device_string, &device)); + // If the requested device can be merged with the replica's host device, + // then do so. For example, if the requested device is "/CPU:0" or + // "/GPU:0" then it will be placed on the CPU/GPU of the host where this + // replica is running. But if the requested device is + // "/task:3/replica:2/CPU:0" then it will be placed on that task/replica. + if (DeviceNameUtils::IsSpecification(device, partial_device)) { + TF_RETURN_IF_ERROR( + DeviceNameUtils::MergeDevNames(&device, partial_device)); + } + image->set_requested_device(DeviceNameUtils::ParsedNameToString(device)); + } + std::vector& node_image_vector = (*node_images)[node]; + node_image_vector.resize(replica_index + 1); + node_image_vector[replica_index] = image; + } + return Status::OK(); +} + +/* static */ Status DistributedTPURewritePass::ReplicateOutsideCompilationNodes( + const std::vector>& tf_device_assignment, + const HostComputeCoreMap& host_compute_core, + const OutsideCompilationNodeMap& outside_compilation_nodes, + NodeToNodeReplicasMap* node_images, Graph* graph) { + // Iterate over replicas. + for (int i = 0; i < tf_device_assignment.size(); ++i) { + const auto& core_devices = tf_device_assignment[i]; + for (const auto& oc_cluster_iter : outside_compilation_nodes) { + const string& oc_cluster_name = oc_cluster_iter.first; + const auto& oc_cluster_nodes = oc_cluster_iter.second; + // We previously validated that host_compute_core contains an entry for + // each cluster. + int core = host_compute_core.at(oc_cluster_name); + TF_RET_CHECK(core >= 0 && core < core_devices.size()); + // tpu_device is the device the HostCompute XLA Op for this cluster runs + // on. + DeviceNameUtils::ParsedName tpu_device; + TF_RET_CHECK( + DeviceNameUtils::ParseFullName(core_devices[core], &tpu_device)); + // partial_device contains the replica and task but not the type. + DeviceNameUtils::ParsedName partial_device = tpu_device; + partial_device.has_type = false; + partial_device.has_id = false; + + if (tf_device_assignment.size() == 1) { + // With a single replica don't copy any nodes just put the original + // nodes into the image map. We leave the device placement alone, except + // that we have to fill in the correct core for the host send and + // receive nodes. + for (Node* node : oc_cluster_nodes) { + (*node_images)[node] = {node}; + node->AddAttr(kXlaReplicaIdAttrName, 0); + if (HasNodeAttr(node->def(), kXlaHasHostTransferAttrName)) { + TF_RETURN_IF_ERROR( + SetNodeDeviceForTPUCommunication(tpu_device, DEVICE_CPU, node)); + } + } + } else { + // Iterate over outside_compilation clusters in this computation, adding + // all the nodes with appropriate device assignments. + TF_RETURN_IF_ERROR( + CopyOutsideCompilationNodes(i, oc_cluster_nodes, tpu_device, + partial_device, node_images, graph)); + } + } + } + return Status::OK(); +} + +/* static */ Status DistributedTPURewritePass::CopyOutsideCompilationEdges( + const std::vector& outside_compilation_nodes, + const NodeToNodeReplicasMap& node_images, + const std::unordered_map outside_compilation_inputs, + Graph* graph) { + for (Node* node : outside_compilation_nodes) { + const auto& images = node_images.at(node); + // Make a copy of all edges and iterate on "in_edges", because we might + // remove edges when iteratating through them. + std::vector in_edges(node->in_edges().begin(), + node->in_edges().end()); + for (const Edge* edge : in_edges) { + Node* src = edge->src(); + const auto iter = node_images.find(src); + if (iter == node_images.end()) { + if (images.size() > 1) { + // The source node is a 'normal' node not part of any + // rewrite. Broadcast the value to all replicas. (If images.size() == + // 1 the cluster is not replicated and we can leave the original edge + // in place.) + for (Node* dst : images) { + graph->AddEdge(src, edge->src_output(), dst, edge->dst_input()); + } + } + continue; + } + + // The source node is a replicated outside_compilation node. + const auto& src_images = iter->second; + if (src_images.size() != images.size()) { + return errors::InvalidArgument( + "Graph contains an edge from node ", src->name(), + " in an outside_compilation block replicated ", src_images.size(), + " ways to node ", node->name(), + " in an outside_compilation block replicated ", images.size(), + " ways. Replication factors must match. Leave a comment on " + "tracking bug b/76419636 if you need this to be supported."); + } + bool is_lifted_arg; + string outside_compilation_cluster; + if (GetNodeAttr(src->def(), kXlaIsLiftedArgAttrName, &is_lifted_arg) + .ok() && + GetNodeAttr(src->def(), kOutsideCompilationAttr, + &outside_compilation_cluster) + .ok()) { + const auto input_iter = + outside_compilation_inputs.find(outside_compilation_cluster); + TF_RET_CHECK(input_iter != outside_compilation_inputs.end()); + TF_RET_CHECK(input_iter->second->type_string() == "IdentityN"); + int dst_input = edge->dst_input(); + if (src_images.size() == 1) { + graph->RemoveEdge(edge); + } + for (int i = 0; i < src_images.size(); ++i) { + graph->AddEdge(input_iter->second, i, images[i], dst_input); + } + continue; + } + + bool is_placeholder_for_arg; + string outside_compilation_input_attr; + if (GetNodeAttr(src->def(), kXlaIsPlaceholderForArg, + &is_placeholder_for_arg) + .ok() && + GetNodeAttr(src->def(), kXlaOutsideCompilationInputsAttrName, + &outside_compilation_input_attr) + .ok()) { + const auto input_iter = + outside_compilation_inputs.find(outside_compilation_input_attr); + TF_RET_CHECK(input_iter != outside_compilation_inputs.end()); + TF_RET_CHECK(input_iter->second->type_string() == "IdentityN"); + int dst_input = edge->dst_input(); + if (src_images.size() == 1) { + graph->RemoveEdge(edge); + } + for (int i = 0; i < src_images.size(); ++i) { + graph->AddEdge(input_iter->second, i, images[i], dst_input); + } + continue; + } + + if (images.size() > 1) { + // If images.size() == 1 neither cluster is replicated and we can + // leave the original edges in place. + for (int i = 0; i < src_images.size(); ++i) { + graph->AddEdge(src_images[i], edge->src_output(), images[i], + edge->dst_input()); + } + } + } + for (const Edge* edge : node->out_edges()) { + Node* dst = edge->dst(); + const auto iter = node_images.find(dst); + if (iter == node_images.end()) { + // The source node is a 'normal' node not part of any rewrite. + if (edge->IsControlEdge()) { + // Make the dst node have a control dependency on every replica. + if (images.size() > 1) { + for (int i = 0; i < images.size(); ++i) { + graph->AddControlEdge(images[i], dst); + } + } + // else the cluster is not replicated so we can leave the original + // edge in place. + } else { + // The edge + // is only valid if the outside_compilation block is not replicated. + if (images.size() > 1) { + return errors::InvalidArgument( + "Graph contains an edge from node ", node->name(), + " in an outside_compilation block replicated ", images.size(), + " ways to node ", dst->name(), + " that is not part of an outside_compilation block. Edges from " + "outside_compilation to regular graph nodes are only supported " + "for replication factors of 1. Leave a comment on tracking bug " + "b/76419636 if you need this to be supported."); + } + // else the cluster is not replicated so we can leave the original + // edge in place. + } + } + // The case where src and dst are both in node_images is covered elsewhere + // when iterating over in_edges of dst. + } + } + return Status::OK(); +} + +/* static */ Status DistributedTPURewritePass::ReplicateOutsideCompilationEdges( + const OutsideCompilationNodeMap& outside_compilation_nodes, + const NodeToNodeReplicasMap& node_images, + const std::unordered_map outside_compilation_inputs, + Graph* graph) { + for (const auto& oc_cluster_iter : outside_compilation_nodes) { + TF_RETURN_IF_ERROR( + CopyOutsideCompilationEdges(oc_cluster_iter.second, node_images, + outside_compilation_inputs, graph)); + } + return Status::OK(); +} + +/* static */ Status DistributedTPURewritePass::RemoveOutsideCompilationNodes( + const NodeToNodeReplicasMap& node_images, Graph* graph) { + for (const auto& iter : node_images) { + if (iter.second.size() > 1) { + // The cluster was replicated so remove the original node. + Node* node = iter.first; + graph->RemoveNode(node); + } + } + return Status::OK(); +} + +/* static */ Status +DistributedTPURewritePass::LowerOutsideCompilationFunctionalNodes( + Graph* g, const FunctionLibraryDefinition& flib_def, + const TPUReplicateDeviceNamesMapping& tpu_replicate_device_names_mapping) { + bool modified = false; + do { + std::vector nodes_to_lower; + for (Node* n : g->op_nodes()) { + if (!HasNodeAttr(n->def(), kOutsideCompilationAttr)) { + continue; + } + + if (n->IsWhileNode() || n->IsIfNode() || IsFunctionCall(flib_def, *n)) { + // Only lower functional ops with DT_RESOURCE input, because otherwise + // placer will complain. For normal cases, lowering will cause slowdown + // when related functions are huge (b/139037679). + bool has_resource_input = false; + for (const Edge* e : n->in_edges()) { + if (!e->IsControlEdge() && + e->src()->output_type(e->src_output()) == DT_RESOURCE) { + has_resource_input = true; + break; + } + } + if (has_resource_input) { + nodes_to_lower.push_back(n); + } + } + } + + modified = !nodes_to_lower.empty(); + + auto lower_functional_node = [&flib_def, &g](Node* n) -> Status { + // Clear device assignment. Otherwise all lowered nodes will have + // device assignment, which is not what we want. + n->set_requested_device(""); + + int replica_id; + TF_RETURN_IF_ERROR( + GetNodeAttr(n->def(), kXlaReplicaIdAttrName, &replica_id)); + + string outside_compilation_attr; + TF_RETURN_IF_ERROR(GetNodeAttr(n->def(), kOutsideCompilationAttr, + &outside_compilation_attr)); + + // There are two different kinds of functional outside compilation nodes: + // 1. Nodes that are in outside compilation blocks already. They are + // generated by FunctionalizeControlFlowForXlaPass, and only have + // attribute kOutsideCompilationAttr. + // 2. Mirrored control flow built for outside compilation in functional + // nodes. They are generated by ExtractOutsideCompilationPass, and have + // both kOutsideCompilationAttr and kXlaHasHostTransferAttrName. + // When lowering them, they need to be treated differently. + // For 1), their body functions are always V1 functions written by users, + // and their "control outputs" are control inputs of _Retval nodes. They + // should be lowered as V1 functions. + // For 2), we always add necessary "control outputs" + // (_XlaRecvAtHost/_XlaSendAtHost nodes) to "control_ret" field in their + // FunctionDef's. They should be lowered as V2 functions. + bool is_host_side_mirrored_control_flow = + HasNodeAttr(n->def(), kXlaHasHostTransferAttrName); + + int num_node_ids = g->num_node_ids(); + bool is_call_node = IsFunctionCall(flib_def, *n); + if (n->IsWhileNode()) { + TF_RETURN_IF_ERROR(RewriteWhileNode(n, g, + /*keep_node_fetchable=*/false)); + } else if (n->IsIfNode()) { + TF_RETURN_IF_ERROR(RewriteIfNode(n, g, /*keep_node_fetchable=*/false)); + } else { + TF_RET_CHECK(is_call_node); + // See comments for "is_host_side_mirrored_control_flow" above. + // If this is a node that's in outside compilation block, lower it as + // V1 function. This is controlled by removing + // kLowerAsMultiDeviceFunctionAttr from the node. + if (!is_host_side_mirrored_control_flow) { + n->ClearAttr(LowerFunctionalOpsPass::kLowerAsMultiDeviceFunctionAttr); + } else { + n->ClearAttr(LowerFunctionalOpsPass::kLowerAsMultiDeviceFunctionAttr); + n->AddAttr(LowerFunctionalOpsPass::kLowerAsMultiDeviceFunctionAttr, + true); + } + TF_RETURN_IF_ERROR( + RewriteFunctionCallNode(n, g, flib_def, + /*keep_caller_fetchable=*/false)); + } + + for (int i = num_node_ids; i < g->num_node_ids(); i++) { + Node* node = g->FindNodeId(i); + if (!node) { + continue; + } + + if (!is_call_node && is_host_side_mirrored_control_flow && + IsFunctionCall(flib_def, *node)) { + // For If/While nodes, if they are host side mirrored control flow, + // mark their body function calls with kXlaHasHostTransferAttrName + // attribute to make sure we lower them as V2 function. + node->AddAttr(kXlaHasHostTransferAttrName, true); + } + + if (IsFunctionCall(flib_def, *node) || node->IsWhileNode() || + node->IsIfNode()) { + // Set kOutsideCompilationAttr attribute so we lower these + // nested function call nodes later. + node->AddAttr(kOutsideCompilationAttr, outside_compilation_attr); + // Set kXlaReplicaIdAttrName attribute so we know replica id when we + // lower this function call node. + node->AddAttr(kXlaReplicaIdAttrName, replica_id); + } else if (node->type_string() == "_XlaRecvAtHost" || + node->type_string() == "_XlaSendFromHost") { + // For "_XlaRecvAtHost" and "_XlaSendFromHost" nodes, make sure they + // have kXlaReplicaIdAttrName attribute so later we know which host + // device to assign. + node->AddAttr(kXlaReplicaIdAttrName, replica_id); + } + } + return Status::OK(); + }; + + for (Node* n : nodes_to_lower) { + TF_RETURN_IF_ERROR(lower_functional_node(n)); + } + } while (modified); + + // Set device for all _XlaRecvAtHost and _XlaSendFromHost nodes. + for (Node* n : g->op_nodes()) { + if (n->type_string() != "_XlaRecvAtHost" && + n->type_string() != "_XlaSendFromHost") { + continue; + } + + string replicate; + TF_RETURN_IF_ERROR(GetNodeAttr(n->def(), kTPUReplicateAttr, &replicate)); + auto iter = tpu_replicate_device_names_mapping.find(replicate); + TF_RET_CHECK(iter != tpu_replicate_device_names_mapping.end()); + const auto& tpu_device_names = iter->second; + + int replica_id; + TF_RETURN_IF_ERROR( + GetNodeAttr(n->def(), kXlaReplicaIdAttrName, &replica_id)); + TF_RET_CHECK(replica_id < tpu_device_names.size()); + const string& tpu_device_name = tpu_device_names[replica_id][0]; + string host_device_name; + TF_RETURN_IF_ERROR(DeviceNameUtils::DeviceNameToCpuDeviceName( + tpu_device_name, &host_device_name)); + n->set_assigned_device_name(host_device_name); + // We may run TPU rewrite passes again on the subgraphs of the resulting + // graph. Clear kTPUReplicateAttr and kOutsideCompilationAttr for + // "_XlaRecvAtHost" nodes and "_XlaSendFromHost" nodes, in order to make + // sure that TPU rewrite passes take no effect on host-side subgraphs for + // outside compilation. + n->ClearAttr(kTPUReplicateAttr); + n->ClearAttr(kOutsideCompilationAttr); + } + + // Remove IdentityN nodes generated for outside compilation. IdentityN is + // exempt from resource edge colocation, but here we do need input and output + // for these IdentityN nodes to be colocated. + std::vector identityn_nodes; + for (Node* n : g->op_nodes()) { + if (n->type_string() == "IdentityN" && + HasNodeAttr(n->def(), kXlaOutsideCompilationInputsAttrName)) { + identityn_nodes.push_back(n); + } + } + for (Node* n : identityn_nodes) { + std::vector out_edges(n->out_edges().begin(), + n->out_edges().end()); + for (const Edge* e : out_edges) { + if (e->IsControlEdge()) { + continue; + } + + int src_output = e->src_output(); + const Edge* input_edge; + TF_RETURN_IF_ERROR(n->input_edge(src_output, &input_edge)); + Node* dst = e->dst(); + int dst_input = e->dst_input(); + g->RemoveEdge(e); + g->AddEdge(input_edge->src(), input_edge->src_output(), dst, dst_input); + } + g->RemoveNode(n); + } + + return Status::OK(); +} + +/* static */ Status DistributedTPURewritePass::ParseHostComputeCores( + const Node& replicate_node, + const OutsideCompilationNodeMap& outside_compilation_nodes, + HostComputeCoreMap* host_compute_core) { + std::vector hc_core_string; + TF_RETURN_IF_ERROR(GetNodeAttr(replicate_node.attrs(), "host_compute_core", + &hc_core_string)); + TF_RETURN_IF_ERROR( + ParseHostComputeCoreList(hc_core_string, host_compute_core)); + for (const auto& iter : outside_compilation_nodes) { + const string& oc_cluster_name = iter.first; + if (host_compute_core->find(oc_cluster_name) == host_compute_core->end()) { + // By default put host compute Ops on replicated core 0. + (*host_compute_core)[oc_cluster_name] = 0; + } + } + return Status::OK(); +} + +/* static */ Status DistributedTPURewritePass::GetDeviceTopology( + const DeviceSet& device_set, const Node& replicate_node, int* num_replicas, + int* num_cores_per_replica, int* num_tasks, + std::vector>* tf_device_assignment, + std::unique_ptr* xla_device_assignment, + string* tpu_compilation_device) { + TF_RETURN_IF_ERROR( + GetNodeAttr(replicate_node.attrs(), "num_replicas", num_replicas)); + if (*num_replicas < 1) { + return errors::InvalidArgument("num_replicas must be >= 1, got ", + *num_replicas); + } + + // Find the set of TPU devices in the TF job. + // Indexed by [task number][tpu device number]. + std::vector> tpu_devices; + int num_tpus_per_task; + TF_RETURN_IF_ERROR(GetTPUDeviceNames(replicate_node.requested_device(), + device_set, tpu_compilation_device, + &num_tpus_per_task, &tpu_devices)); + + string topology; + TF_RETURN_IF_ERROR( + GetNodeAttr(replicate_node.attrs(), "topology", &topology)); + TF_RETURN_IF_ERROR(GetNodeAttr( + replicate_node.attrs(), "num_cores_per_replica", num_cores_per_replica)); + std::vector device_assignment; + TF_RETURN_IF_ERROR(GetNodeAttr(replicate_node.attrs(), "device_assignment", + &device_assignment)); + + // TODO(cwhipkey): since we can control multiple pods of different shapes + // from a single worker, it may be desirable to propagate the remote device + // information around (e.g., in DeviceAttributes). This can lead to the mesh + // topology proto being leaked to cloud TPU users (e.g. through GetStatus + // calls); this may be okay, but to be conservative, just assume that the + // master session has the proper flags set. + + // We do not initialize platform right now, but we can still retrieve the + // TPU topology even with an uninitialized platform. + auto* tpu_platform = tpu::TpuPlatformInterface::GetRegisteredPlatform( + /*initialize_platform=*/false); + TF_RET_CHECK(tpu_platform); + tpu::TpuTopologyExternal tpu_topology(tpu_platform->GetTopologyPtr()); + TF_RET_CHECK(num_tpus_per_task == + tpu_topology.LogicalDevicesPerHost(kTensorCore)); + TF_RETURN_IF_ERROR(BuildDeviceAssignment( + tpu_topology, num_tpus_per_task, tpu_devices, *num_replicas, + *num_cores_per_replica, topology, device_assignment, tf_device_assignment, + xla_device_assignment)); + + return Status::OK(); +} + +/* static */ Status DistributedTPURewritePass::GetIOTypes( + int num_replicas, const Node& replicate_node, FunctionLibraryRuntime* flr, + Graph* graph, NameRangeMap* input_name_map, const NameAttrList** function, + std::unique_ptr* computation, DataTypeVector* arg_types, + DataTypeVector* retval_types, ParameterInfo* params_info) { + DataTypeVector input_types, broadcast_input_types, guaranteed_constant_types; + TF_RETURN_IF_ERROR( + GetNodeAttr(replicate_node.attrs(), "Tinputs", &input_types)); + TF_RETURN_IF_ERROR(GetNodeAttr(replicate_node.attrs(), "Tbroadcast_inputs", + &broadcast_input_types)); + TF_RETURN_IF_ERROR(GetNodeAttr(replicate_node.attrs(), + "Tguaranteed_constants", + &guaranteed_constant_types)); + int num_distributed_vars; + TF_RETURN_IF_ERROR(GetNodeAttr(replicate_node.attrs(), + "num_distributed_variables", + &num_distributed_vars)); + const int num_per_replica_inputs = input_types.size() - num_distributed_vars; + + if (num_per_replica_inputs % num_replicas != 0) { + return errors::InvalidArgument( + "Number of inputs to TPUReplicate (", num_per_replica_inputs, + ") is not divisible by the number of replicas (", num_replicas, ")."); + } + + int num_variables; + TF_RETURN_IF_ERROR( + GetNodeAttr(replicate_node.attrs(), "NumVariables", &num_variables)); + + NameRangeMap output_name_map; + TF_RETURN_IF_ERROR(NameRangesForNode(replicate_node, replicate_node.op_def(), + input_name_map, &output_name_map)); + + TF_RETURN_IF_ERROR( + GetNodeAttr(replicate_node.attrs(), "computation", function)); + + *computation = absl::make_unique(graph->op_registry()); + TF_RETURN_IF_ERROR(GetComputationForTPUReplicateOp( + **function, flr, computation->get(), arg_types, retval_types)); + + *params_info = ParameterInfo( + num_replicas, num_per_replica_inputs / num_replicas, num_distributed_vars, + broadcast_input_types.size(), num_variables, + guaranteed_constant_types.size(), retval_types->size()); + + if (arg_types->size() != params_info->NumInputsToEachReplica()) { + return errors::InvalidArgument( + "Computation argument to TPUReplicate has wrong number of " + "arguments. Expected ", + params_info->NumInputsToEachReplica(), " inputs, got ", + arg_types->size()); + } + if (replicate_node.num_outputs() != params_info->NumOutputsToHost()) { + return errors::InvalidArgument( + "Wrong number of outputs from TPUReplicate. Expected ", + params_info->NumOutputsToHost(), " outputs, got ", + replicate_node.num_outputs()); + } + if (enable_cross_replica_sharding_mirrored_variables_) { + std::vector mirrored_variable_indices; + TF_RETURN_IF_ERROR(GetNodeAttr(replicate_node.attrs(), + TPUREPLICATE_MIRRORED_VAR_INDICES_ATTR, + &mirrored_variable_indices)); + for (int index : mirrored_variable_indices) { + TF_RET_CHECK(params_info->IsPerReplicaArg(index) || + params_info->IsDistributedArg(index)) + << "Mirrored variables not categorized as per-replica arguments, " + "index: " + << index; + params_info->mutable_mirrored_variable_indices()->insert(index); + } + } + return Status::OK(); +} + +/* static */ Status DistributedTPURewritePass::BuildSequencingNodes( + const string& tpu_compilation_device, const Node& replicate_node, + Graph* graph, Node** host_transfer_sequencer, Node** control_before, + Node** control_after) { + *host_transfer_sequencer = nullptr; + + TF_RETURN_IF_ERROR( + BuildNoopNode(replicate_node, + graph->NewName(strings::StrCat(replicate_node.name(), "/", + "control_before")), + /*device=*/"", graph, control_before)); + for (const Edge* e : replicate_node.in_edges()) { + if (!e->IsControlEdge()) { + continue; + } + Node* predecessor = e->src(); + if (predecessor->IsSource()) continue; + if (predecessor->type_string() == "NoOp" && + predecessor->attrs().Find("_xla_host_transfer_sequencer") != nullptr) { + // The node is the sequencer for host transfer operations. Its control + // dependency needs to be placed after the execute node, not before. + if (*host_transfer_sequencer != nullptr) { + return errors::Internal("Replicate node ", replicate_node.name(), + " has two transfer sequencer nodes: ", + (*host_transfer_sequencer)->name(), " and ", + predecessor->name()); + } + // Set the correct device to match the other sequencing nodes. + predecessor->set_assigned_device_name(tpu_compilation_device); + *host_transfer_sequencer = predecessor; + } else { + graph->AddControlEdge(predecessor, *control_before); + } + } + + TF_RETURN_IF_ERROR( + BuildNoopNode(replicate_node, + graph->NewName(strings::StrCat(replicate_node.name(), "/", + "control_after")), + /*device=*/tpu_compilation_device, graph, control_after)); + for (Node* successor : replicate_node.out_nodes()) { + if (successor->attrs().Find("_xla_tail_outside_compilation") != nullptr) { + graph->AddControlEdge(successor, *control_after); + } else { + graph->AddControlEdge(*control_after, successor); + } + } + return Status::OK(); +} + +/* static */ Status DistributedTPURewritePass::DealWithConstantsAndVariables( + const Node& replicate_node, const NameRangeMap& input_name_map, + Graph* graph, Node* host_transfer_sequencer, Node* control_before, + Node* control_after, absl::Span variable_nodes, + std::vector* guaranteed_constant_nodes, + std::vector* variable_reads) { + TF_RETURN_IF_ERROR(FindGuaranteedConstantInputs( + replicate_node, input_name_map, guaranteed_constant_nodes)); + + TF_RETURN_IF_ERROR(BuildVariableReads(variable_nodes, control_before, graph, + variable_reads)); + // Add the control dependency from host transfer nodes. + if (host_transfer_sequencer != nullptr) { + graph->AddControlEdge(host_transfer_sequencer, control_after); + } + return Status::OK(); +} + +/* static */ Status +DistributedTPURewritePass::BuildCompilationStatusReturnNodes( + Node* replicate_node, Node* compile_node, Node** control_after_compilation, + Graph* graph) { + const Edge* compilation_edge = nullptr; + for (const auto* e : replicate_node->out_edges()) { + if (e->IsControlEdge() && + e->dst()->type_string() == "TPUCompilationResult") { + TF_RET_CHECK(compilation_edge == nullptr) + << "Multiple compilation result nodes attached to the same replicate " + "cluster."; + compilation_edge = e; + } + } + + // TODO(jpienaar): This should be checked by default, current tests not using + // this are ones that use the "abort upon successful compile flag" which will + // be removed. Leaving this in until then. + if (compilation_edge != nullptr) { + Node* compilation_status = compilation_edge->dst(); + const AttrValue* compile_status_cluster_attr = + compilation_status->attrs().Find(kTPUCompilationResultAttr); + TF_RET_CHECK(compile_status_cluster_attr != nullptr); + const string& compile_status_cluster = compile_status_cluster_attr->s(); + TF_RET_CHECK(!compile_status_cluster.empty()); + const AttrValue* replicate_cluster_attr = + replicate_node->attrs().Find(kTPUReplicateAttr); + TF_RET_CHECK(replicate_cluster_attr != nullptr); + const string& replicate_cluster = replicate_cluster_attr->s(); + TF_RET_CHECK(!replicate_cluster.empty()); + TF_RET_CHECK(compile_status_cluster == replicate_cluster); + + TF_RETURN_IF_ERROR( + ReplaceCompilationResultNodeWithIdentity(graph, &compilation_status)); + graph->AddEdge(compile_node, 0, compilation_status, 0); + } + + NodeDef def; + def.set_name(UniqueNodeName("tpu_compile_succeeded_assert", graph)); + // Create an op to assert that compilation succeeded. The alternative would + // have been to have each execute op check and return an error. + def.set_op("TPUCompileSucceededAssert"); + MergeDebugInfo(NodeDebugInfo(replicate_node->def()), &def); + Status status; + Node* compile_succeeded = graph->AddNode(def, &status); + compile_succeeded->set_assigned_device_name( + compile_node->assigned_device_name()); + TF_RETURN_IF_ERROR(status); + graph->AddEdge(compile_node, 0, compile_succeeded, 0); + + // Build a sequencing node for when compilation has completed. + TF_RETURN_IF_ERROR( + BuildNoopNode(*replicate_node, + graph->NewName(strings::StrCat(compile_node->name(), "/", + "after_compilation")), + /*device=*/"", graph, control_after_compilation)); + graph->AddControlEdge(compile_succeeded, *control_after_compilation); + + return Status::OK(); +} + +// Updates the head and tail outside compiled nodes so that nodes have the +// correct device and removes the replication and outside compilation attributes +// so that these nodes do not trigger further graph optimization passes. +/* static */ Status DistributedTPURewritePass::UpdateHeadTailOutsideCompilation( + const std::vector>& tf_device_assignment, + const std::vector& head_tail_outside_compilation_nodes) { + for (Node* node : head_tail_outside_compilation_nodes) { + int replica_id; + TF_RETURN_IF_ERROR( + GetNodeAttr(node->def(), kXlaReplicaIdAttrName, &replica_id)); + // Since we set the device, this will now run on a task other than 0. We + // clear the two following attributes so that we don't trigger encapsulation + // again on the remote host (which will fail due to a missing + // _TPUReplicateMetadata node for the cluster). + for (const Edge* e : node->in_edges()) { + // Resource consuming ops should colocate with its resource input. + if (e->src()->IsArg() && + e->src()->output_type(e->src_output()) == DT_RESOURCE) { + node->set_requested_device(tf_device_assignment[replica_id][0]); + } + } + if (node->requested_device().empty()) { + string cpu_device; + TF_RETURN_IF_ERROR(DeviceNameUtils::DeviceNameToCpuDeviceName( + tf_device_assignment[replica_id][0], &cpu_device)); + node->set_requested_device(cpu_device); + } + node->ClearAttr(kTPUReplicateAttr); + node->ClearAttr(kOutsideCompilationAttr); + } + return Status::OK(); +} + +/* static */ +Status DistributedTPURewritePass::FingerprintFunctionLibrary( + const FunctionLibraryDefinition& library, uint64* fingerprint) { + // TODO(phawkins): rather than fingerprinting the entire function library, + // consider fingerprinting just the transitive dependencies of a + // computation. + std::string serialized; + FunctionDefLibrary library_proto = library.ToProto(); + if (library_proto.ByteSizeLong() >= 1.5 * 1024 * 1024 * 1024) { + LOG(WARNING) << "Serializing large proto, size: " + << library_proto.ByteSizeLong(); + } + TF_RET_CHECK(SerializeToStringDeterministic(library_proto, &serialized)); + *fingerprint = TpuCompileInterface::Get()->FingerprintString(serialized); + return Status::OK(); +} + +// Performs the rewrite on a single TPUReplicate node. +/* static */ Status DistributedTPURewritePass::RewriteTPUReplicateNode( + const string& session_handle, const DeviceSet& device_set, + Node* replicate_node, FunctionLibraryDefinition* flib_def, + FunctionLibraryRuntime* flr, Node* host_compute_key_placeholder_node, + const OutsideCompilationNodeMap& outside_compilation_nodes, + const std::vector& head_tail_outside_compilation_nodes, + NodeToNodeReplicasMap* outside_compilation_node_images, Graph* graph, + const GraphShapeInfo& shape_info, + TPUReplicateDeviceNamesMapping* tpu_replicate_device_names_mapping, + int64 autotuner_thresh) { + VLOG(2) << "Rewriting node " << replicate_node->name(); + + // num_replicas and num_cores_per_replica are the 'virtual' replicas (copies + // of the computation) and cores (virtual cores within computations) specified + // by the user. They will be mapped to physical TPU cores below. + int num_replicas; + int num_cores_per_replica; + int num_tasks; // Number of tasks. + std::vector> tf_device_assignment; + std::unique_ptr xla_device_assignment; + string tpu_compilation_device; + TF_RETURN_IF_ERROR(GetDeviceTopology( + device_set, *replicate_node, &num_replicas, &num_cores_per_replica, + &num_tasks, &tf_device_assignment, &xla_device_assignment, + &tpu_compilation_device)); + + TF_RETURN_IF_ERROR(UpdateHeadTailOutsideCompilation( + tf_device_assignment, head_tail_outside_compilation_nodes)); + + string replicate; + TF_RETURN_IF_ERROR( + GetNodeAttr(replicate_node->def(), kTPUReplicateAttr, &replicate)); + tpu_replicate_device_names_mapping->emplace(replicate, tf_device_assignment); + + NameRangeMap input_name_map; + const NameAttrList* function; + std::unique_ptr computation; + DataTypeVector arg_types, retval_types; + ParameterInfo params_info; + TF_RETURN_IF_ERROR(GetIOTypes(num_replicas, *replicate_node, flr, graph, + &input_name_map, &function, &computation, + &arg_types, &retval_types, ¶ms_info)); + + std::vector arg_shapes, retval_shapes; + TF_RETURN_IF_ERROR(GetArgAndRetvalShapes( + shape_info, *replicate_node, params_info, &arg_shapes, &retval_shapes)); + + TF_RETURN_IF_ERROR(ValidateCoreNumbers(*computation, num_cores_per_replica)); + + std::vector arg_sharding; + std::vector arg_fast_mem; + std::vector retval_sharding; + TF_RETURN_IF_ERROR(AssignArgsAndRetvalsToCores( + num_cores_per_replica, params_info, arg_types, arg_shapes, retval_types, + retval_shapes, *computation, replicate_node, flr, &arg_sharding, + &arg_fast_mem, &retval_sharding)); + + VLOG(1) << DumpGraphToFile("distributed_tpu_graph_to_replicate", *computation, + flib_def); + + GraphDef graph_def; + graph->ToGraphDef(&graph_def); + FunctionLibraryDefinition reachable_functions = + flib_def->ReachableDefinitions(graph_def); + uint64 library_fingerprint; + + TF_RETURN_IF_ERROR( + FingerprintFunctionLibrary(reachable_functions, &library_fingerprint)); + VLOG(1) << "Fingerprint functions: " + << absl::StrJoin(reachable_functions.ListFunctionNames(), ", "); + VLOG(1) << "library_fingerprint: " << library_fingerprint; + + // Builds trigger nodes that put barriers around the expansion of + // TPUReplicate. In particular, we must guarantee: + // a) variable reads happen after all predecessors of the original + // TPUReplicate. + // b) variable writes happen before all successors of the original + // TPUReplicate. + // c) all replicas execute, even if output tensors are only requested from + // a subset of replicas. This is necessary both to ensure that variable + // updates happen, but also Send/Recv will deadlock if only one half of + // the communicating pair runs. + Node* host_transfer_sequencer; + Node* control_before; + Node* control_after; + TF_RETURN_IF_ERROR(BuildSequencingNodes( + tpu_compilation_device, *replicate_node, graph, &host_transfer_sequencer, + &control_before, &control_after)); + + // Build a vector of variable nodes that are inputs. + std::vector variable_inputs; + TF_RETURN_IF_ERROR( + FindVariableInputs(*replicate_node, input_name_map, &variable_inputs)); + + std::vector guaranteed_constant_nodes; + std::vector variable_reads; + TF_RETURN_IF_ERROR(DealWithConstantsAndVariables( + *replicate_node, input_name_map, graph, host_transfer_sequencer, + control_before, control_after, variable_inputs, + &guaranteed_constant_nodes, &variable_reads)); + + // Builds Shape nodes that compute the dynamic shapes of arguments whose + // shapes are not statically known. + std::vector dynamic_shape_nodes; + TF_RETURN_IF_ERROR(BuildDynamicShapeNodes(*replicate_node, arg_shapes, + params_info, variable_reads, graph, + &dynamic_shape_nodes)); + + // Builds a TPUCompile node that compiles `clusters` on `compile_device`. + Node* compile_node; + TF_RETURN_IF_ERROR(BuildCompileNode( + replicate_node, *function, library_fingerprint, params_info, arg_shapes, + arg_types, guaranteed_constant_nodes, session_handle, arg_sharding, + arg_fast_mem, retval_sharding, num_cores_per_replica, + /*compile_device=*/tpu_compilation_device, xla_device_assignment.get(), + dynamic_shape_nodes, graph, &compile_node, autotuner_thresh)); + + // Compilation must be sequenced after the control node if the TPU computation + // in a control-flow construct, such as a loop. + graph->AddControlEdge(control_before, compile_node); + + Node* control_after_compilation; + TF_RETURN_IF_ERROR(BuildCompilationStatusReturnNodes( + replicate_node, compile_node, &control_after_compilation, graph)); + + std::vector variable_writes; + TF_RETURN_IF_ERROR(BuildExecuteNodes( + params_info, num_tasks, num_cores_per_replica, *replicate_node, arg_types, + arg_shapes, retval_types, arg_sharding, retval_sharding, + tf_device_assignment, compile_node, variable_reads, + control_after_compilation, control_after, &variable_writes, graph)); + bool contains_resource_write_op = + ContainsResourceWriteOp(*graph, reachable_functions); + + VLOG(2) << "contains_resource_write_op: " << contains_resource_write_op; + // Skip conditional write if there is no resource writing op inside TPU + // computation. + if (contains_resource_write_op) { + TF_RETURN_IF_ERROR(BuildVariableWrites(variable_inputs, control_after, + variable_writes, graph)); + } + + if (host_compute_key_placeholder_node != nullptr) { + TF_RETURN_IF_ERROR(ConnectHostComputeNodes( + compile_node, host_compute_key_placeholder_node, graph)); + } + + HostComputeCoreMap host_compute_core; + TF_RETURN_IF_ERROR(ParseHostComputeCores( + *replicate_node, outside_compilation_nodes, &host_compute_core)); + TF_RETURN_IF_ERROR(ReplicateOutsideCompilationNodes( + tf_device_assignment, host_compute_core, outside_compilation_nodes, + outside_compilation_node_images, graph)); + + graph->RemoveNode(replicate_node); + return Status::OK(); +} + +// Adds sharded weight update optimization for each host training loop. +// +// For any host training loop found in the graph, TPUVariableReshard ops +// are inserted to match the best layout chosen by the XLA. +/* static */ Status +DistributedTPURewritePass::PerformHostTrainingLoopOptimization( + Graph* graph, FunctionLibraryDefinition* flib_def, + FunctionLibraryRuntime* flr) { + std::vector host_training_loops_info; + Status s = tpu::DetectHostTrainingLoop( + /*current_function_name=*/nullptr, + /*current_function_attr=*/nullptr, flib_def, graph, flr, + &host_training_loops_info); + if (!s.ok()) { + VLOG(2) << "No valid host training loop found. Skipping sharded weight " + << "update optimization."; + return Status::OK(); + } + + for (const auto& host_loop : host_training_loops_info) { + const auto& function_name = host_loop.encapsulating_function_name; + // `function_name` has value when host training loop is inside a + // function call node. When host training loop is found inside a function + // call node, then, in addition to adding TPUVariableReshard ops, function + // library definition needs to be updated as well. + if (function_name.has_value()) { + const auto& function_attr = host_loop.encapsulating_function_attrs; + TF_RET_CHECK(function_attr.has_value()) + << "Unable to find function attribute for function: " + << *function_name; + + const FunctionDef* function_def = flib_def->Find(*function_name); + TF_RET_CHECK(function_def) + << "Unable to find function : " << *function_name; + + std::unique_ptr fbody; + TF_RETURN_IF_ERROR(FunctionDefToBodyHelper( + *function_def, AttrSlice(&function_attr.value()), flib_def, &fbody)); + Graph* function_graph = fbody->graph; + TF_RETURN_IF_ERROR(tpu::AddReshardOp(function_graph, host_loop)); + TF_RETURN_IF_ERROR(UpdateFunctionLibDefinition(*function_graph, + *function_name, flib_def)); + } else { + TF_RETURN_IF_ERROR(tpu::AddReshardOp(graph, host_loop)); + } + } + return Status::OK(); +} + +Status DistributedTPURewritePass::PlaceUnassignedDeviceNodesOnTPUIfPossible( + Graph* graph) { + ReverseDFS(*graph, {}, PlaceOpsOnTPU); + return Status::OK(); +} + +Status DistributedTPURewritePass::Run( + const GraphOptimizationPassOptions& options) { + VLOG(1) << "DistributedTPURewritePass::Run"; + + Graph* graph = options.graph->get(); + + VLOG(1) << DumpGraphToFile("distributed_tpu_compilation_before", *graph, + options.flib_def); + + const auto* config = &options.session_options->config; + std::unique_ptr pflr( + new ProcessFunctionLibraryRuntime( + nullptr, options.session_options->env, config, + graph->versions().producer(), options.flib_def, + config ? config->graph_options().optimizer_options() + : OptimizerOptions())); + + FunctionLibraryRuntime* flr = + pflr->GetFLR(ProcessFunctionLibraryRuntime::kDefaultFLRDevice); + + // This pass can only run in the session master, which should fill + // in the device_set field to the options. + TF_RET_CHECK(options.device_set != nullptr); + + // Find all the replicate nodes before mutating the graph. + std::vector replicate_nodes; + // Map from compiled subgraph cluster name to the outside_compilation nodes in + // that cluster. + std::map outside_compilation_nodes; + std::map> head_tail_outside_compilation_nodes; + TF_RETURN_IF_ERROR(FindTaggedNodes(graph, &replicate_nodes, + &outside_compilation_nodes, + &head_tail_outside_compilation_nodes)); + + if (replicate_nodes.empty()) { + // Remove unused TPUPartitionedInput nodes. + for (Node* n : graph->nodes()) { + if (n->type_string() == kTPUPartitionedInput) graph->RemoveNode(n); + } + return Status::OK(); + } + + std::unordered_map host_compute_key_placeholder_map; + TF_RETURN_IF_ERROR(FindHostComputeKeyPlaceholderNodes( + graph, replicate_nodes, &host_compute_key_placeholder_map)); + + GraphShapeInfo shape_info; + TF_RETURN_IF_ERROR(InferShapes(graph, /*arg_shapes=*/{}, + flr->GetFunctionLibraryDefinition(), + &shape_info)); + int64 autotuner_thresh = options.session_options->config.experimental() + .xla_fusion_autotuner_thresh(); + + NodeToNodeReplicasMap outside_compilation_node_images; + TPUReplicateDeviceNamesMapping tpu_replicate_device_names_mapping; + for (Node* node : replicate_nodes) { + TF_RETURN_IF_ERROR(RewriteTPUReplicateNode( + options.session_handle, *options.device_set, node, options.flib_def, + flr, host_compute_key_placeholder_map[node->name()], + outside_compilation_nodes[node->name()], + head_tail_outside_compilation_nodes[node->name()], + &outside_compilation_node_images, graph, shape_info, + &tpu_replicate_device_names_mapping, autotuner_thresh)); + } + + // Place the padding nodes generated by dynamic padder on the correct devices. + // TODO(rxsang): Place padding ops on TPUs in + // PlaceUnassignedDeviceNodesOnTPUIfPossible function. + TF_RETURN_IF_ERROR(SetPaddingNodesDevices(graph)); + + std::unordered_map outside_compilation_inputs; + for (Node* n : graph->op_nodes()) { + string lifted_arg_inputs_attr; + if (n->type_string() == "IdentityN" && + GetNodeAttr(n->def(), kXlaOutsideCompilationInputsAttrName, + &lifted_arg_inputs_attr) + .ok()) { + outside_compilation_inputs[lifted_arg_inputs_attr] = n; + } + } + for (const auto& iter : outside_compilation_nodes) { + TF_RETURN_IF_ERROR(ReplicateOutsideCompilationEdges( + iter.second, outside_compilation_node_images, + outside_compilation_inputs, graph)); + } + TF_RETURN_IF_ERROR( + RemoveOutsideCompilationNodes(outside_compilation_node_images, graph)); + TF_RETURN_IF_ERROR(LowerOutsideCompilationFunctionalNodes( + graph, *options.flib_def, tpu_replicate_device_names_mapping)); + + TF_RETURN_IF_ERROR(PlaceUnassignedDeviceNodesOnTPUIfPossible(graph)); + VLOG(1) << DumpGraphToFile("distributed_tpu_compilation_after", *graph, + options.flib_def); + VLOG(1) << "DistributedTPURewritePass::Run() finished"; + + if (enable_cross_replica_sharding_mirrored_variables_) { + VLOG(1) << "Starting host training loop optimization."; + VLOG(1) << DumpGraphToFile("host_loop_optimization_before", *graph, + options.flib_def); + TF_RETURN_IF_ERROR( + PerformHostTrainingLoopOptimization(graph, options.flib_def, flr)); + VLOG(1) << DumpGraphToFile("host_loop_optimization_after", *graph, + options.flib_def); + VLOG(1) << "Host training loop optimization finished."; + } + + return Status::OK(); +} + +bool DistributedTPURewritePass::distribute_vars_ = false; +bool DistributedTPURewritePass:: + replicate_inputs_outputs_by_default_for_xla_spmd_ = false; +bool DistributedTPURewritePass:: + enable_cross_replica_sharding_mirrored_variables_ = true; +bool DistributedTPURewritePass::enable_automatic_model_parallelism_ = false; + +/*static*/ void DistributedTPURewritePass::SetDistributedTpuRewritePassOptions( + bool distribute_vars, bool replicate_inputs_outputs_by_default_for_xla_spmd, + bool enable_cross_replica_sharding_mirrored_variables, + bool enable_automatic_model_parallelism) { + distribute_vars_ = distribute_vars; + replicate_inputs_outputs_by_default_for_xla_spmd_ = + replicate_inputs_outputs_by_default_for_xla_spmd; + enable_cross_replica_sharding_mirrored_variables_ = + enable_cross_replica_sharding_mirrored_variables; + enable_automatic_model_parallelism_ = enable_automatic_model_parallelism; +} + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h new file mode 100644 index 00000000000..52fae7a7c13 --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h @@ -0,0 +1,589 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Rewrites TPUReplicate nodes into replicated computations on TPU. +// +// To represent a distributed TPU computation, we use the +// TPUReplicate operator, that describes a subgraph (represented as a +// Tensorflow function) to replicate across a TPU pod. +// +// Model parallelism and data parallelism: +// --------------------------------------- +// We support two different kinds of parallelism on TPU: +// * data parallelism (replication), or parallelization across batches, and +// * model parallelism, or parallelization within a batch. +// +// The function passed to a TPUReplicate operator is replicated many +// times across a TPU pod (data parallelism). The `num_replicas` attribute +// controls how many replicas of the computation to create. Replicas are mostly +// independent; replicas can only communicate using the CrossReplicaSum +// operator, which is typically used to communicate gradients during training. +// +// Each replica may optionally use more than one TPU core (model +// parallelism). The `num_cores_per_replica` attribute controls how many cores +// there are per replica. For each core, there is a virtual TPU_REPLICATED_CORE +// device that is only valid within replicated TPU computations (e.g., +// TPU_REPLICATED_CORE:0, TPU_REPLICATED_CORE:1, etc.); each TPU_REPLICATED_CORE +// device corresponds to one TPU core in every replica. +// Each replica has runs its own copy of the computation assigned to each +// TPU_REPLICATED_CORE device. +// +// The Python code is responsible for providing a device_assignment that +// describes how the replicated logical cores map to physical cores on the TPU +// topology. +// +// Inputs to TPUReplicate: +// ------------------------------ +// The TPUReplicate operator takes three kinds of inputs, in the +// following order: +// * per-replica inputs. If there are three per-replica inputs (A, B, C) and two +// replicas, the first six arguments to TPUReplicate will be: +// A0 B0 C0 A1 B1 C1 +// where Ai is the A input to the i-th replica. +// * distributed inputs. These inputs follow the per-replica inputs. +// If there are two distributed inputs (E, F) and two replicas, the following +// arguments to TPUReplicate will be: E F. +// But there is local E and F on each replica. +// * broadcast inputs. These inputs follow the distributed inputs. All +// replicas receive a copy of each of these inputs. +// * variables. Resource variables accessed by the computation follow the +// broadcast inputs. +// +// For example, for a computation with two replicas, three per-replica inputs +// (A, B, C), two distributed inputs(E, F), two broadcast inputs (X, Y), and two +// variables (V, W), the arguments to TPUReplicate will be: +// A0 B0 C0 A1 B1 C1 E F X Y V W +// and each replica will receive the following arguments: +// A B C E F X Y V W +// +// Distributed TPU compilation requires that the shapes of all operators +// be known statically at compilation time, before any nodes have executed. +// Shapes are determined using shape information emitted by InferShapes. It +// is not possible to replicate Tensorflow operators with unknown or dynamic +// shapes for TPU at present. +// +// Graph rewrite: +// -------------- +// Compilation replaces TPUReplicate operators with: +// * a single TPUCompile node that compiles the computations, +// * one TPUExecute node for each TPU device in the system that +// executes the relevant computation, +// * one ReadVariableOp for each variable accessed by the replicated +// computation, +// * one AssignVariableOp for each variable accessed by the replicated +// computation. An assignment is built even if a variable is only read by the +// computation. We do not know which variables are written until we apply the +// XlaCompiler to the computation, but that does not happen until after the +// rewrite. Conservatively, we write back the values of all variables after +// the computation completes. +// TODO(phawkins): only write back variables that the computation may write. +// * one Shape node for each Tensor or Variable input to the computation whose +// shape is not statically known at rewrite time. The input shapes are fed +// to the TPUCompile node. +// +// To ensure that the reads and writes seem to happen at the right time in the +// graph execution, we add control edges from all predecessors of the original +// TPUReplicate operator to each of the ReadVariableOp operators. +// Similarly, we add control edges from all of the AssignVariableOp operators to +// all of the successors of the TPUReplicate operator. +// +// The TPUReplicate rewrite must run before placement, since resource +// variable inputs will have DT_RESOURCE, which cannot be sent across devices, +// leading to objections from the placer. The rewrite rewrites the resource +// accesses into explicit ReadVariableOp and AssignVariableOp operators that the +// placer is free to colocate with the variables. + +#ifndef TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_REWRITE_PASS_H_ +#define TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_REWRITE_PASS_H_ + +#include +#include + +#include "absl/container/node_hash_map.h" +#include "absl/types/span.h" +#include "tensorflow/compiler/jit/shape_inference.h" +#include "tensorflow/compiler/xla/service/computation_placer.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/stream_executor/tpu/tpu_topology.h" + +namespace tensorflow { + +// Replaces clusters assigned to TPU_SYSTEM devices with +// TPUCompile and TPUExecute nodes assigned to the corresponding +// TPU devices. +class DistributedTPURewritePass : public GraphOptimizationPass { + public: + static void SetDistributedTpuRewritePassOptions( + bool distribute_vars, + bool replicate_inputs_outputs_by_default_for_xla_spmd, + bool enable_cross_replica_sharding_mirrored_variables, + bool enable_automatic_model_parallelism); + + Status Run(const GraphOptimizationPassOptions& options) override; + + // The following methods are public only for the use of unit tests. + + // See comment at the top of the file for how the inputs are ordered. + // Encapsulates the different TPU replicated node input and output + // information, and provide common APIs over them. + class ParameterInfo { + public: + ParameterInfo() {} + ParameterInfo(int64 num_replicas, int64 num_per_replica_args, + int64 num_distributed_args, int64 num_broadcast_args, + int64 num_variables, int64 num_guaranteed_constants, + int64 num_retvals_per_replica) + : num_replicas_(num_replicas), + num_per_replica_args_(num_per_replica_args), + num_distributed_args_(num_distributed_args), + num_broadcast_args_(num_broadcast_args), + num_variables_(num_variables), + num_guaranteed_constants_(num_guaranteed_constants), + num_retvals_per_replica_(num_retvals_per_replica) {} + + int64 NumReplicas() const { return num_replicas_; } + + int64 NumPerReplicaArgs() const { return num_per_replica_args_; } + + int64 NumDistributedArgs() const { return num_distributed_args_; } + + int64 NumBroadcastArgs() const { return num_broadcast_args_; } + + int64 NumVariables() const { return num_variables_; } + + int64 NumGuaranteedConstants() const { return num_guaranteed_constants_; } + + int64 NumRetvalsPerReplica() const { return num_retvals_per_replica_; } + + bool IsPerReplicaArg(int64 index) const { + return index < num_per_replica_args_; + } + + bool IsDistributedArg(int64 index) const { + return index >= num_per_replica_args_ && + index < (num_per_replica_args_ + num_distributed_args_); + } + + bool IsBroadcastArg(int64 index) const { + return index >= num_per_replica_args_ && + index < (num_per_replica_args_ + num_distributed_args_ + + num_broadcast_args_); + } + + bool IsVariableArg(int64 index) const { + return index >= (num_per_replica_args_ + num_broadcast_args_) && + index < (num_per_replica_args_ + num_distributed_args_ + + num_broadcast_args_ + num_variables_); + } + + bool IsConstantArg(int64 index) const { + return index >= (num_per_replica_args_ + num_distributed_args_ + + num_broadcast_args_ + num_variables_) && + index < (num_per_replica_args_ + num_distributed_args_ + + num_broadcast_args_ + num_variables_ + + num_guaranteed_constants_); + } + + // Returns the number of inputs which has been received by the host. + int64 NumInputsFromHost() const { + return num_replicas_ * num_per_replica_args_ + num_distributed_args_ + + num_broadcast_args_ + num_variables_ + num_guaranteed_constants_; + } + + // Returns the number of inputs which will be sent to each replica. + int64 NumInputsToEachReplica() const { + return num_per_replica_args_ + num_distributed_args_ + + num_broadcast_args_ + num_variables_ + num_guaranteed_constants_; + } + + // Returns the total number of output values returned to the host (for all + // replicas). + int64 NumOutputsToHost() const { + return num_replicas_ * num_retvals_per_replica_; + } + + // Returns the position of the first per-replica argument, within the set + // of all hosts arguments. + // Broadcast arguments follow the distributed arguments. + int64 FirstBroadcastArgFromHost() const { + return num_replicas_ * num_per_replica_args_ + num_distributed_args_; + } + + // Indices of mirrored variables across replicas, which should be + // categorized as per_replica_args. + const std::set& mirrored_variable_indices() const { + return mirrored_variable_indices_; + } + std::set* mutable_mirrored_variable_indices() { + return &mirrored_variable_indices_; + } + + private: + int64 num_replicas_ = 1; + int64 num_per_replica_args_ = 0; + int64 num_distributed_args_ = 0; + int64 num_broadcast_args_ = 0; + int64 num_variables_ = 0; + int64 num_guaranteed_constants_ = 0; + int64 num_retvals_per_replica_ = 0; + std::set mirrored_variable_indices_; + }; + + // Mapping from TPUReplicate cluster name to tpu device names. Value is a + // mapping from [replica][core] to a TF device name. + typedef absl::flat_hash_map>> + TPUReplicateDeviceNamesMapping; + + // Determines which devices to use to run the computation. + // Inputs: + // * num_tpus_per_task: the number of TPU devices attached to each task + // * tpu_devices: a [task][device] collection of TPU devices + // * num_replicas: the number of replicas requested + // * num_cores_per_replica: the number of cores in each computation instance + // * topology_attr: the topology TPUReplicate attribute + // * device_assignment_attr: the device_assignment TPUReplicate attribute + // Outputs: + // * tf_device_assignment: a mapping from [replica][core] to a TF device name + // * xla_device_assignment: a mapping from [replica][core] to a linearized TPU + // coordinate. + // TODO(phawkins): change tf_device_assignment to an xla::Array2D. + static Status BuildDeviceAssignment( + const tpu::TpuTopologyExternal& topology, int num_tpus_per_task, + const std::vector>& tpu_devices, int num_replicas, + int num_cores_per_replica, const string& topology_attr, + absl::Span device_assignment_attr, + std::vector>* tf_device_assignment, + std::unique_ptr* xla_device_assignment); + + // Returns the `computation` graph attached to TPUReplicate operator + // `node`. `flr` is a FunctionLibraryRuntime to use when + // instantiating the function body. Sets `*arg_types` and + // `*retval_types` to the argument/return types of the function. + static Status GetComputationForTPUReplicateOp(const NameAttrList& function, + FunctionLibraryRuntime* flr, + Graph* computation, + DataTypeVector* arg_types, + DataTypeVector* retval_types); + + // Returns the shapes of the argument tensors and return values of the + // TPUReplicate operator `node` using the _output_shapes, + // _output_handle_shapes, and _output_handle_types annotations on the input + // nodes. Expects inputs in the following order (see comment at top of file): + // * num_replicas * num_per_replica_args per-replica inputs, + // * num_broadcast_args broadcast inputs, + // * num_variables variable inputs. + // Returns an error if the input shapes to `node` are not statically known. + // Also verifies that all replicas have identical input shapes for their + // per-replica inputs. + static Status GetArgAndRetvalShapes( + const GraphShapeInfo& shape_info, const Node& node, + const ParameterInfo& params_info, std::vector* arg_shapes, + std::vector* retval_shapes); + + // Assigns arguments and return values to cores. The assignment is represented + // as an XLA op sharding, so that an argument can be replicated across cores. + // `arg_sharding` and `retval_sharding` are vectors of shardings indexed by + // argument/retval number. + // `arg_fast_mem` is vector of fast_mem indication which is indexed by + // argument number. + static Status AssignArgsAndRetvalsToCores( + int num_cores_per_replica, const ParameterInfo& params_info, + const DataTypeVector& arg_types, + const std::vector& arg_shapes, + const DataTypeVector& retval_types, + const std::vector& retval_shapes, const Graph& graph, + const Node* replicate_node, FunctionLibraryRuntime* flr, + std::vector<::xla::OpSharding>* arg_sharding, + std::vector* arg_fast_mem, + std::vector<::xla::OpSharding>* retval_sharding); + + // Computes a fingerprint of the contents of `library`. + static Status FingerprintFunctionLibrary( + const FunctionLibraryDefinition& library, uint64* fingerprint); + + // Populates `*variables` with the "variables" inputs to `index`-th output of + // `node`. + struct VariableInput { + Node* node; + int index; + + // Type of the variable's value. Note that this is different to the type of + // the output of 'variable', which is always DT_RESOURCE. + DataType dtype; + }; + static Status FindVariableInputs(const Node& node, + const NameRangeMap& input_range_map, + std::vector* variables); + + // Populates '*guaranteed_constants' with the "guaranteed_constants" inputs + // to 'node'. + static Status FindGuaranteedConstantInputs( + const Node& node, const NameRangeMap& input_range_map, + std::vector* guaranteed_constants); + + // Builds Shape nodes that compute the shapes of arguments whose shapes are + // not statically known. + static Status BuildDynamicShapeNodes( + const Node& replicate_node, const std::vector& arg_shapes, + const ParameterInfo& params_info, + const std::vector& variable_reads, Graph* graph, + std::vector* dynamic_shape_nodes); + + // Builds a TPUCompile node that compiles the computation in + // `function_names`. calls `nodes`. + // TODO(b/33943292): at present, for model parallelism with Send/Recv to work + // the `nodes` must correspond to the computations assigned to TPU:0, + // TPU:1, ... in order since XLA hard-codes the chip IDs in the generated + // executables. + static Status BuildCompileNode( + const Node* replicate_node, const NameAttrList& function, + uint64 library_fingerprint, const ParameterInfo& params_info, + const std::vector& arg_shapes, + const DataTypeVector& arg_types, + const std::vector& guaranteed_constant_nodes, + const string& session_handle, + const std::vector<::xla::OpSharding>& arg_sharding, + const std::vector& arg_fast_mem, + const std::vector<::xla::OpSharding>& retval_sharding, + int num_cores_per_replica, const string& compile_device, + const xla::DeviceAssignment* xla_device_assignment, + const std::vector& dynamic_shape_nodes, Graph* graph, + Node** compile_node, int64 autotuner_thresh); + + // Builds a TPUCompileSucceededAssert node that verifies that compilation + // succeeded and replaces the TPUCompilationStatus node in the graph. + static Status BuildCompilationStatusReturnNodes( + Node* replicate_node, Node* compile_node, + Node** control_after_compilation, Graph* graph); + + // Builds ReadVariableOp nodes that read `variables`, with a control + // edges that ensure they happen after `control_predecessor`. + static Status BuildVariableReads(absl::Span variables, + Node* control_predecessor, Graph* graph, + std::vector* variable_reads); + + // Returns true if graph or functions contain resource write op, otherwise + // return false. + // TODO(b/137048563): Recognize unused resource rewrite op. + static bool ContainsResourceWriteOp(const Graph& graph, + const FunctionLibraryDefinition& fld); + // Struct that describes a variable value to be written back from TPUExecute. + struct VariableWrite { + // A node:output pair containing a boolean tensor that determines whether + // the value should be written back. + Node* predicate; + int predicate_output; + + // A node:output pair containing the value to be written back. + Node* value; + int value_output; + }; + + // Builds AssignVariableOp nodes that write `variables` with the values from + // `variable_writes`, with control edges that ensure the writes happen before + // `control_successor`. + static Status BuildVariableWrites( + absl::Span variables, Node* control_successor, + absl::Span variable_writes, Graph* graph); + + // Builds TPUExecute operators assigned to each TPU device + // involved in the computation. + // Arguments: + // * `params_info` is the structure containing the information about the + // TPUReplicate node inputs and outputs. + // * `num_tasks` is the number of TensorFlow tasks in the slice. + // * `num_cores_per_replica` is the number of cores which are dedicated to + // each replica. + // * `replicate_node` is the original TPUReplicate node. + // * `arg_types` are the types of the arguments to the computation function + // passed as argument to TPUReplicate, including per-replica, + // broadcast, and variable arguments. + // * `arg_shapes` are the corresponding shapes (and handle types/shapes, if + // applicable). + // * `arg_shardings` and `retval_shardings` are mappings from + // arguments/return indices to shardings, as returned by + // `AssignArgsAndRetvalsToCores`. + // * `pod_devices` lists the devices to assign to each core of each replica. + // * `variable_reads` is a vectors of ReadVariableOp operators, one for each + // variable argument to the computation. + // * The execute operators will have a control edge from + // `control_predecessor` and another control edge to `control_successor`. + // Populates '*variable_writes' with information about variable values to + // write back. + static Status BuildExecuteNodes( + const ParameterInfo& params_info, int num_tasks, + int num_cores_per_replica, const Node& replicate_node, + const DataTypeVector& arg_types, + const std::vector& arg_shapes, + const DataTypeVector& retval_types, + const std::vector<::xla::OpSharding>& arg_shardings, + const std::vector<::xla::OpSharding>& retval_shardings, + const std::vector>& tpu_device_names, + Node* compile_node, const std::vector& variable_reads, + Node* control_predecessor, Node* control_successor, + std::vector* variable_writes, Graph* graph); + + // Connects the compile node to all the host transfer nodes, and removes the + // key placeholder node that was previously standing in for it. + // Arguments: + // * `compile_node` is the TPUCompile node that has been added to the graph. + // * `key_placeholder_node` is the placeholder node to send the key to all the + // host + // * transfer nodes in the original graph. + // * `graph` is the graph being rewritten. + static Status ConnectHostComputeNodes(Node* compile_node, + Node* key_placeholder_node, + Graph* graph); + + // Map from a Node in an outside_compilation cluster in the original graph to + // the list of Nodes, one for each replica, that it is expanded into during + // replication. + typedef absl::node_hash_map> NodeToNodeReplicasMap; + + // Map from the name of an outside_compilation cluster to the model-parallel + // core index that the HostCompute Op should be placed on in that cluster. + typedef std::map HostComputeCoreMap; + + // Map from the name of an outside_compilation cluster to the list of Nodes + // that should run on the host for that cluster. + typedef std::map> OutsideCompilationNodeMap; + + // Copies the outside_compilation nodes in a cluster to create replica + // replica_index. + static Status CopyOutsideCompilationNodes( + int replica_index, const std::vector& outside_compilation_nodes, + const DeviceNameUtils::ParsedName& tpu_device, + const DeviceNameUtils::ParsedName& partial_device, + NodeToNodeReplicasMap* node_images, Graph* graph); + + // Replicates all the nodes in outside_compilation clusters in a compiled + // computation. + static Status ReplicateOutsideCompilationNodes( + const std::vector>& tf_device_assignment, + const HostComputeCoreMap& host_compute_core, + const OutsideCompilationNodeMap& outside_compilation_nodes, + NodeToNodeReplicasMap* node_images, Graph* graph); + + // Lifts the edges between original outside_compilation nodes in a cluster + // onto their replicas. + static Status CopyOutsideCompilationEdges( + const std::vector& outside_compilation_nodes, + const NodeToNodeReplicasMap& node_images, + const std::unordered_map outside_compilation_inputs, + Graph* graph); + + // Lifts all the edges in outside_compilation clusters in a compiled + // computation to their replicas. + static Status ReplicateOutsideCompilationEdges( + const OutsideCompilationNodeMap& outside_compilation_nodes, + const NodeToNodeReplicasMap& node_images, + const std::unordered_map outside_compilation_inputs, + Graph* graph); + + // Removes all the original outside_compilation nodes from the graph, + // following replication. + static Status RemoveOutsideCompilationNodes( + const NodeToNodeReplicasMap& node_images, Graph* graph); + + // Lowers outside compilation functional nodes (If/While/function call). + // Otherwise, when we have multiple workers, device placer will not be able to + // place nodes if outside compilation has DT_RESOURCE inputs (e.g. a + // DT_RESOURCE input fed into multiple While nodes on different devices). + static Status LowerOutsideCompilationFunctionalNodes( + Graph* g, const FunctionLibraryDefinition& flib_def, + const TPUReplicateDeviceNamesMapping& tpu_replicate_device_names_mapping); + + // Parses the 'host_compute_core' attribute on replicate_node to get the + // replicated core id of each outside_compilation cluster. + static Status ParseHostComputeCores( + const Node& replicate_node, + const OutsideCompilationNodeMap& outside_compilation_nodes, + HostComputeCoreMap* host_compute_core); + + // Gets the physical topology information about the TPU system. + static Status GetDeviceTopology( + const DeviceSet& device_set, const Node& replicate_node, + int* num_replicas, int* num_cores_per_replica, int* num_tasks, + std::vector>* tf_device_assignment, + std::unique_ptr* xla_device_assignment, + string* tpu_compilation_device); + + // Gets the types of args, retvals, and parameters. + static Status GetIOTypes( + int num_replicas, const Node& replicate_node, FunctionLibraryRuntime* flr, + Graph* graph, NameRangeMap* input_name_map, const NameAttrList** function, + std::unique_ptr* computation, DataTypeVector* arg_types, + DataTypeVector* retval_types, ParameterInfo* params_info); + + // Find known constants and deals with variable reads. + static Status DealWithConstantsAndVariables( + const Node& replicate_node, const NameRangeMap& input_name_map, + Graph* graph, Node* host_transfer_sequencer, Node* control_before, + Node* control_after, absl::Span variable_nodes, + std::vector* guaranteed_constant_nodes, + std::vector* variable_reads); + + // Adds NoOp nodes for sequencing computation and variable reads/writes. + static Status BuildSequencingNodes(const string& tpu_compilation_device, + const Node& replicate_node, Graph* graph, + Node** host_transfer_sequencer, + Node** control_before, + Node** control_after); + + // Performs the pass's rewrite on a TPUReplicate node `node`. + static Status RewriteTPUReplicateNode( + const string& session_handle, const DeviceSet& device_set, + Node* replicate_node, FunctionLibraryDefinition* flib_def, + FunctionLibraryRuntime* flr, Node* host_compute_key_placeholder_node, + const OutsideCompilationNodeMap& outside_compilation_nodes, + const std::vector& head_tail_outside_compilation_nodes, + NodeToNodeReplicasMap* outside_compilation_node_images, Graph* graph, + const GraphShapeInfo& shape_info, + TPUReplicateDeviceNamesMapping* tpu_replicate_device_names_mapping, + int64 autotuner_thresh); + + // Performs host training loop optimization. For example, when TPUExecute + // node is inside a while loop, then model weight variables can be sharded + // in XLA preferred layout and then unsharded only at the very last iteration + // to reduce the number of all_gather. + static Status PerformHostTrainingLoopOptimization( + Graph* graph, FunctionLibraryDefinition* flib_def, + FunctionLibraryRuntime* flr); + + // Heuristically place some nodes with unassigned devices on TPUs for + // performance reasons. + static Status PlaceUnassignedDeviceNodesOnTPUIfPossible(Graph* graph); + + // Updates the head and tail outside compiled nodes so that nodes have the + // correct device and removes the replication and outside compilation + // attributes so that these nodes do not trigger further graph optimization + // passes. + static Status UpdateHeadTailOutsideCompilation( + const std::vector>& tf_device_assignment, + const std::vector& head_tail_outside_compilation_nodes); + + private: + static bool distribute_vars_; + static bool replicate_inputs_outputs_by_default_for_xla_spmd_; + static bool enable_cross_replica_sharding_mirrored_variables_; + static bool enable_automatic_model_parallelism_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_REWRITE_PASS_H_ diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.cc b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.cc new file mode 100644 index 00000000000..18b158c0335 --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.cc @@ -0,0 +1,45 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.h" + +#include + +#include "absl/random/random.h" + +namespace tensorflow { +namespace { + +static int64 overridden_node_id = -1; + +} // namespace + +namespace internal { + +void OverrideNodeIdForTesting(const int64 node_id) { + overridden_node_id = node_id; +} + +uint64 GetNodeId() { + if (overridden_node_id > -1) { + return overridden_node_id; + } else { + return absl::Uniform(absl::SharedBitGen(), uint64{0}, + std::numeric_limits::max()); + } +} + +} // namespace internal +} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.h b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.h new file mode 100644 index 00000000000..ce80249c30f --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.h @@ -0,0 +1,38 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_REWRITE_PASS_INTERNAL_H_ +#define TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_REWRITE_PASS_INTERNAL_H_ + +#include "tensorflow/core/framework/types.h" + +namespace tensorflow { + +// Implementation details of distributed_tpu_rewrite_pass.cc, please DO NOT +// depend on these. +namespace internal { + +// When set to a value >= 0, overrides the node_id. Used for getting +// deterministic node_ids during testing. +void OverrideNodeIdForTesting(int64 node_id); + +// Retrieves the node id, used to make some node names unique in the rewrite +// pass. +uint64 GetNodeId(); + +} // namespace internal +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_REWRITE_PASS_INTERNAL_H_ diff --git a/tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.cc b/tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.cc new file mode 100644 index 00000000000..fad8e22399c --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.cc @@ -0,0 +1,629 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.h" + +#include +#include +#include + +#include "absl/container/flat_hash_set.h" +#include "absl/container/node_hash_set.h" +#include "tensorflow/compiler/tf2xla/functionalize_control_flow_util.h" +#include "tensorflow/compiler/tf2xla/tf2xla_util.h" +#include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/gtl/cleanup.h" +#include "tensorflow/core/protobuf/tpu/compile_metadata.pb.h" +#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.h" + +namespace tensorflow { +namespace tpu { + +namespace { + +constexpr char kDefaultShardingValue[] = ""; + +const Edge* FindEdgeConnecting(const Node* src, const Node* dst) { + for (const auto e : src->out_edges()) { + if (e->dst()->name() == dst->name()) return &(*e); + } + return nullptr; +} + +// Contains TPUExecute node and its DT_RESOURCE input nodes that +// correspond to model weights. +struct ExecuteNodeInfo { + Node* execute_node; + std::vector var_inputs; +}; + +// Returns whether `node` is in `execute_nodes` or `(identity) -> execute`. +bool IsExecuteNodeOrIdentityToExecuteNode( + const Graph& graph, const std::unordered_set& loop_nodes, // NOLINT + const absl::flat_hash_set& execute_nodes, Node* node) { + if (execute_nodes.find(node) != execute_nodes.end()) return true; + if (loop_nodes.find(node) == loop_nodes.end()) return false; + if (node->IsNextIteration()) return true; + if (!node->IsIdentity()) return false; + + for (const Edge* e : node->out_edges()) { + if (e->IsControlEdge()) continue; + + Node* node = e->dst(); + if (!IsExecuteNodeOrIdentityToExecuteNode(graph, loop_nodes, execute_nodes, + node)) { + return false; + } + } + + return true; +} + +// From input node to the TPUExecute op, finds the corresponding Enter node +// by searching/traversing nodes in below pattern of nodes: +// Enter ----> (identity) ---> While body input +// Returns nullptr if the Enter node is not found. +xla::StatusOr FindEnterNodeFromTPUExecuteNodeInput(Node* input_node) { + Node* node = input_node; + while (node->IsIdentity()) { + TF_RETURN_IF_ERROR(node->input_node(0, &node)); + } + + if (node->IsEnter()) { + return node; + } + return nullptr; +} + +xla::StatusOr ResourceOnlyUsedForTPUExecuteInLoop( + const Graph& graph, const std::unordered_set& loop_nodes, // NOLINT + const Node* enter_node, const absl::flat_hash_set execute_nodes) { + for (const Edge* output_edge : enter_node->out_edges()) { + Node* output_node = output_edge->dst(); + if (output_edge->IsControlEdge() || output_node->IsExit()) continue; + + // If output node is not execute node, it must be output node + // to the while loop body. + if (!IsExecuteNodeOrIdentityToExecuteNode(graph, loop_nodes, execute_nodes, + output_node)) { + return false; + } + } + return true; +} + +// Given a TPUCompile node, find all TPUExecute nodes that executes the compiled +// program and its model weight variable inputs as well. +// TPUCompileMetadataProto of TPUCompile node must be reset to `new_metadata` +// if new reshard ops are added. +Status ExtractExecuteNodeInfo(const Node* compile_node, const Graph& graph, + const std::unordered_set& loop_nodes, // NOLINT + std::vector* execute_node_info, + TPUCompileMetadataProto* new_metadata) { + string metadata_string; + TF_RETURN_IF_ERROR( + GetNodeAttr(compile_node->attrs(), "metadata", &metadata_string)); + new_metadata->ParsePartialFromString(metadata_string); + if (new_metadata->num_cores_per_replica() != 1) { + // We do not support model parallelism yet. + return Status::OK(); + } + + execute_node_info->clear(); + for (Node* node : compile_node->out_nodes()) { + if (node->type_string() == "TPUExecute") { + execute_node_info->push_back({node}); + } + } + if (execute_node_info->empty()) { + return Status::OK(); + } + TF_RET_CHECK(execute_node_info->size() == new_metadata->num_replicas()) + << "Number of replicas does not equal number of execute nodes: " + << new_metadata->num_replicas() << " vs " << execute_node_info->size(); + DataTypeVector arg_types; + TF_RETURN_IF_ERROR(GetNodeAttr((*execute_node_info)[0].execute_node->attrs(), + "Targs", &arg_types)); + for (int64 i = 0; i < arg_types.size(); ++i) { + if (arg_types[i] != DT_RESOURCE) { + continue; + } + const auto sharding_config = new_metadata->args(i).enable_xla_sharding(); + if (sharding_config != TPUCompileMetadataProto::Arg::TENTATIVE && + sharding_config != TPUCompileMetadataProto::Arg::ALLOWED) { + continue; + } + std::vector edges(execute_node_info->size()); + bool is_supported = true; + std::unordered_map> + enter_to_execute_nodes; + for (int64 j = 0; j < edges.size(); ++j) { + auto execute = (*execute_node_info)[j].execute_node; + TF_RETURN_IF_ERROR(execute->input_edge(i, &edges[j])); + TF_RET_CHECK(edges[j]->src()->output_type(edges[j]->src_output()) == + arg_types[i]) + << "Execute op has an unexpected input type."; + // Traverse backwards to find the Enter node from which the input is + // passed. + // This makes sure that we are checking the usages of all potential + // aliases of the input node as well. + TF_ASSIGN_OR_RETURN(auto enter_node, FindEnterNodeFromTPUExecuteNodeInput( + edges[j]->src())); + if (enter_node == nullptr) { + is_supported = false; + enter_to_execute_nodes.clear(); + break; + } + enter_to_execute_nodes[enter_node].insert(edges[j]->dst()); + } + + for (const auto& it : enter_to_execute_nodes) { + // Size of execute nodes should be either 1 (per-replica variables) or + // num_replicas (distributed variables). + if ((it.second.size() != 1) && + (it.second.size() != new_metadata->num_replicas())) { + is_supported = false; + break; + } + TF_ASSIGN_OR_RETURN(bool no_other_use, + ResourceOnlyUsedForTPUExecuteInLoop( + graph, loop_nodes, it.first, it.second)); + if (!no_other_use) { + is_supported = false; + break; + } + } + + // Add the variable input edges only when they are supported for all + // executes. + if (is_supported) { + for (int64 j = 0; j < edges.size(); ++j) { + (*execute_node_info)[j].var_inputs.push_back(edges[j]); + } + new_metadata->mutable_args(i)->set_enable_xla_sharding( + TPUCompileMetadataProto::Arg::ALLOWED); + } + } + + int64 total = 0; + for (const auto& a : new_metadata->args()) { + if (a.enable_xla_sharding() == TPUCompileMetadataProto::Arg::ALLOWED) { + total++; + } + } + TF_RET_CHECK(total == (*execute_node_info)[0].var_inputs.size()) + << " total " << total << " var_inputs " + << (*execute_node_info)[0].var_inputs.size(); + if (total == 0) { + // We don't need to process anything if no input is added. + execute_node_info->clear(); + } + return Status::OK(); +} + +bool IsTPUCompileOp(const Node& n) { return n.type_string() == "TPUCompile"; } + +void FindTPUCompileNodes( + const std::string* current_function_name, + const AttrValueMap* current_function_attr, + const std::unordered_map& frames, + std::vector* host_training_loops_info) { + // Adds frames with no children (i.e., the innermost frames) to a worklist. + std::deque worklist; + + for (auto& frame : frames) { + if (frame.second.num_children == 0) { + worklist.push_back(&frame.second); + } + } + + // Check TPUCompile node from the innermost while loop to the outermost + // while loop. + while (!worklist.empty()) { + const WhileLoopFrame* frame = worklist.front(); + worklist.pop_front(); + + for (const auto& n : frame->nodes) { + if (!IsTPUCompileOp(*n)) continue; + + HostTrainingLoopInfo host_training_loop_info; + host_training_loop_info.compile_node_name = n->name(); + host_training_loop_info.loop_cond_node_name = frame->loop_cond->name(); + host_training_loop_info.while_loop_name = frame->name; + + for (const auto arg : frame->args) { + LoopArgInfo arg_info; + arg_info.enter_node_name = arg.enter->name(); + if (arg.exit) arg_info.exit_node_name = arg.exit->name(); + + host_training_loop_info.loop_arguments.push_back(std::move(arg_info)); + } + host_training_loop_info.loop_nodes = frame->nodes; + + if (current_function_name) { + host_training_loop_info.encapsulating_function_name = + *current_function_name; + } + if (current_function_attr) { + host_training_loop_info.encapsulating_function_attrs = + *current_function_attr; + } + + host_training_loops_info->emplace_back( + std::move(host_training_loop_info)); + } + + // If the parent has no remaining children, add it to the worklist. + --frame->parent->num_children; + if (frame->parent->num_children == 0) { + worklist.push_back(frame->parent); + } + } +} + +// From while loop cond node, finds all loop exit nodes by searching/traversing +// nodes in below pattern of nodes: +// LoopCond -----> Switch -----> Exit +std::vector FindLoopExitNodes(const Node& loop_cond) { + std::vector loop_exit_nodes; + for (const auto e_cond : loop_cond.out_edges()) { + if (e_cond->IsControlEdge() || !e_cond->dst()->IsSwitch()) continue; + auto switch_node = e_cond->dst(); + + for (const auto e_switch : switch_node->out_edges()) { + if (e_switch->IsControlEdge() || !e_switch->dst()->IsExit()) continue; + + loop_exit_nodes.push_back(e_switch->dst()); + } + } + return loop_exit_nodes; +} + +// Find any one of switch nodes in the while loop by traversing the graph +// from while loop condition node. +xla::StatusOr GetLoopSwitchNode(const Node& loop_cond_node) { + Node* loop_switch_node; + for (auto n : loop_cond_node.out_nodes()) { + if (n->IsSwitch()) { + loop_switch_node = n; + break; + } + } + + TF_RET_CHECK(loop_switch_node->IsSwitch()) + << "Unable to find any switch nodes."; + return loop_switch_node; +} + +// Returns or creates a node in that is executed before each loop iteration +// in the while loop. +Status GetOrCreateBeforeEachIterationNode(Graph* graph, Node* loop_switch_node, + Node** node_out) { + // If while loop switch node already has a outgoing data to true brach + // of the switch op, then reuse that node. + for (const auto out_edge : loop_switch_node->out_edges()) { + if (out_edge->src_output() == 1) { + *node_out = out_edge->dst(); + return Status::OK(); + } + } + + // Create Identity node that represents execution at every loop iteration. + NodeDef at_loop_iteration_nodedef; + at_loop_iteration_nodedef.set_op("Identity"); + DataType dtype; + TF_RETURN_IF_ERROR(GetNodeAttr(loop_switch_node->def(), "T", &dtype)); + + AddNodeAttr("T", dtype, &at_loop_iteration_nodedef); + at_loop_iteration_nodedef.set_name(graph->NewName(strings::StrCat( + "TPUVariableReshard/before_iteration", "/_", internal::GetNodeId()))); + + Status status; + Node* at_loop_iteration_node = + graph->AddNode(at_loop_iteration_nodedef, &status); + TF_RETURN_IF_ERROR(status); + + graph->AddEdge(loop_switch_node, 1, at_loop_iteration_node, 0); + *node_out = at_loop_iteration_node; + return Status::OK(); +} + +// Injects NoOp node in that is executed after the very last iteration +// of the while loop but before the while loop exit node. +Status AddNoOpAfterLastIteration(Graph* graph, Node* loop_switch_node, + Node** node_out) { + // Find the exit node from loop switch node. + Node* exit_node; + for (const auto out_node : loop_switch_node->out_nodes()) { + if (out_node->IsExit()) { + exit_node = out_node; + break; + } + } + + TF_RET_CHECK(exit_node != nullptr) + << "Cannot find exit node connected to switch node :" + << loop_switch_node->name(); + + // Create NoOp that represents execution at the end of while loop + // last iteration. + NodeDef after_last_loop_iteration; + after_last_loop_iteration.set_op("Identity"); + DataType dtype; + TF_RETURN_IF_ERROR(GetNodeAttr(loop_switch_node->def(), "T", &dtype)); + + AddNodeAttr("T", dtype, &after_last_loop_iteration); + after_last_loop_iteration.set_name(graph->NewName(strings::StrCat( + "TPUVariableReshard/last_iteration", "/_", internal::GetNodeId()))); + + Status status; + Node* after_last_iteration_node = + graph->AddNode(after_last_loop_iteration, &status); + TF_RETURN_IF_ERROR(status); + + // Newly created node must be executed once after last iteration of the while + // loop and before while loop exits. + graph->AddEdge(loop_switch_node, 0, after_last_iteration_node, 0); + graph->AddControlEdge(after_last_iteration_node, exit_node); + *node_out = after_last_iteration_node; + return Status::OK(); +} + +} // namespace + +Status DetectHostTrainingLoop( + const std::string* current_function_name, + const AttrValueMap* current_function_attr, + const FunctionLibraryDefinition* library, Graph* graph, + FunctionLibraryRuntime* flr, + std::vector* host_training_loops_info) { + std::vector associated_function_list; + for (const auto* n : graph->nodes()) { + const auto associated_functions = GetAssociatedFunctions(*n, library); + if (associated_functions.empty()) continue; + + associated_function_list.insert(associated_function_list.end(), + associated_functions.begin(), + associated_functions.end()); + } + + Status ret_status = Status::OK(); + for (const auto& function : associated_function_list) { + if (function.type() != AssociatedFunctionInfo::kFunctionAttr) continue; + + // Convert the function to Graph. + FunctionLibraryRuntime::Handle handle; + TF_RETURN_IF_ERROR(flr->Instantiate(function.func_name(), + AttrSlice(&function.attrs()), &handle)); + auto cleanup_handle = gtl::MakeCleanup([&]() { + auto s = flr->ReleaseHandle(handle); + if (!s.ok()) { + ret_status.Update(s); + } + }); + const FunctionBody* body = flr->GetFunctionBody(handle); + Graph* function_graph = body->graph; + TF_RETURN_IF_ERROR(DetectHostTrainingLoop( + &function.func_name(), &function.attrs(), library, function_graph, flr, + host_training_loops_info)); + } + + // BuildControlFlowInfo() requires that the graph's source node is connected + // to all source nodes in the graph. Many graphs violate this invariant. + // As so, add edges to source/sink nodes so that this invariant is kept. + FixupSourceAndSinkEdges(graph); + std::vector cf_info; + TF_RETURN_IF_ERROR( + BuildControlFlowInfo(graph, &cf_info, /*unreachable_nodes=*/nullptr)); + + std::unordered_map frames; + TF_RETURN_IF_ERROR(ExtractWhileLoopFrames(cf_info, graph, &frames)); + FindTPUCompileNodes(current_function_name, current_function_attr, frames, + host_training_loops_info); + return ret_status; +} + +Status AddReshardOp(Graph* graph, const HostTrainingLoopInfo& host_loop_info) { + const auto& compile_node_name = host_loop_info.compile_node_name; + const auto node_name_map = graph->BuildNodeNameIndex(); + const auto node_it = node_name_map.find(compile_node_name); + TF_RET_CHECK(node_it != node_name_map.end()) + << "Unable to find compile node : " << compile_node_name; + + const auto compile_node = node_it->second; + std::vector execute_nodes_info; + + Status status; + TPUCompileMetadataProto metadata; + status = + ExtractExecuteNodeInfo(compile_node, *graph, host_loop_info.loop_nodes, + &execute_nodes_info, &metadata); + if (!status.ok()) { + LOG(ERROR) << "Encountered error when trying to extract execute nodes, " + "skipping host loop optimization. Status: " + << status.ToString(); + return Status::OK(); + } + + if (execute_nodes_info.empty()) { + return Status::OK(); + } + + // Update the TPUCompileMetadata such that sharding config of the + // sharded resource variable inputs is set to ALLOWED instead of + // TENTATIVE. + string new_metadata_string; + metadata.SerializeToString(&new_metadata_string); + compile_node->ClearAttr("metadata"); + compile_node->AddAttr("metadata", new_metadata_string); + + // Unsharding of the model weight variables must happen only at the very + // last loop iteration. As so, add while loop condition predicate as an + // input to the sharding switch node. If loop condition is true, we do not + // unshard. + const auto& cond_node_name = host_loop_info.loop_cond_node_name; + auto loop_cond_node_it = node_name_map.find(cond_node_name); + TF_RET_CHECK(loop_cond_node_it != node_name_map.end()) + << "Cannot find loop condition node : " << cond_node_name; + auto* loop_condition_node = loop_cond_node_it->second; + + // In order to make sure that shard/unshard operations are invoked + // at the start of every loop body and at the end of last iteration + // of the loop, respectively, traverse the graph and find a switch node + // of the host training loop. + TF_ASSIGN_OR_RETURN(Node * switch_node, + GetLoopSwitchNode(*loop_condition_node)); + + Node* after_last_iteration_node; + TF_RETURN_IF_ERROR(AddNoOpAfterLastIteration(graph, switch_node, + &after_last_iteration_node)); + + Node* before_loop_iteration_node; + TF_RETURN_IF_ERROR(GetOrCreateBeforeEachIterationNode( + graph, switch_node, &before_loop_iteration_node)); + + // Create const op that represents default sharding value + // (i.e. no-op sharding). + NodeDef default_sharding; + default_sharding.set_op("Const"); + default_sharding.set_name(graph->NewName(strings::StrCat( + "TPUVariableReshard/default_shard_state", "/_", internal::GetNodeId()))); + AddNodeAttr("dtype", DT_STRING, &default_sharding); + + Tensor t(DT_STRING, {2}); + t.vec()(0) = kDefaultShardingValue; + t.vec()(1) = kDefaultShardingValue; + t.AsProtoTensorContent( + (*default_sharding.mutable_attr())["value"].mutable_tensor()); + + Node* default_sharding_node = graph->AddNode(default_sharding, &status); + TF_RETURN_IF_ERROR(status); + // Add control edge between loop condition to make sure that + // default_sharding_node node is inside the while loop frame. + graph->AddControlEdge(loop_condition_node, default_sharding_node); + + // Build a no-op node used to add control edges after unshard nodes. + NodeDef after_unshard; + after_unshard.set_op("NoOp"); + after_unshard.set_name(graph->NewName(strings::StrCat( + "TPUVariableReshard/last_iteration", "/_", internal::GetNodeId()))); + auto after_unshard_node = graph->AddNode(after_unshard, &status); + TF_RETURN_IF_ERROR(status); + + for (auto info : execute_nodes_info) { + auto execute_node = info.execute_node; + // Create Reshard op that optionally shards model weight variables + // prior to program execution. + NodeDef reshard_node_def; + reshard_node_def.set_name(graph->NewName(strings::StrCat( + "TPUVariableReshard/reshard", "/_", internal::GetNodeId()))); + reshard_node_def.set_op("TPUReshardVariables"); + AddNodeAttr("N", static_cast(info.var_inputs.size()), + &reshard_node_def); + Node* reshard_op_node = graph->AddNode(reshard_node_def, &status); + if (!status.ok()) return status; + + reshard_op_node->set_assigned_device_name( + execute_node->assigned_device_name()); + + // Reshard op must execute at every loop iteration prior to + // TPUExecute node. + graph->AddControlEdge(before_loop_iteration_node, reshard_op_node); + graph->AddControlEdge(reshard_op_node, execute_node); + + for (int i = 0; i < info.var_inputs.size(); ++i) { + const auto variable_edge = info.var_inputs[i]; + graph->AddEdge(variable_edge->src(), variable_edge->src_output(), + reshard_op_node, i); + } + + const int new_key_input = info.var_inputs.size(); + // Add program input edge from the compiler(i.e. compilation key). + const auto compilation_key_edge = + FindEdgeConnecting(compile_node, execute_node); + graph->AddEdge(compile_node, compilation_key_edge->src_output(), + reshard_op_node, new_key_input); + + // Create VarHandleOp to store sharding state. Sharding state holds string + // compilation key that identifies whether the graph is re-compiled and the + // variables need to be sharded again. + NodeDef var_handle_def; + var_handle_def.set_op("VarHandleOp"); + var_handle_def.set_name(graph->NewName(strings::StrCat( + "TPUVariableReshard/reshard_state", "/_", internal::GetNodeId()))); + AddNodeAttr("dtype", DT_STRING, &var_handle_def); + AddNodeAttr("shape", TensorShape({}), &var_handle_def); + Node* var_handle_node = graph->AddNode(var_handle_def, &status); + if (!status.ok()) return status; + + // Add control edge between `var_handle_def` node and while loop + // loop condition so that `var_handle_def` is inside the same while loop + // frame. + // TODO(hongjunchoi): Consider adding control edge from another node--such + // as input control node. + graph->AddControlEdge(loop_condition_node, var_handle_node); + + // Connect data edge between var handle op and reshard op. + const int format_state_input = new_key_input + 1; + graph->AddEdge(var_handle_node, 0, reshard_op_node, format_state_input); + + // Create Reshard op that represents unsharding after TPUExecute. + NodeDef unshard_node_def; + unshard_node_def.set_name(graph->NewName(strings::StrCat( + "TPUVariableReshard/unshard", "/_", internal::GetNodeId()))); + unshard_node_def.set_op("TPUReshardVariables"); + AddNodeAttr("N", static_cast(info.var_inputs.size()), + &unshard_node_def); + Node* unshard_op_node = graph->AddNode(unshard_node_def, &status); + TF_RETURN_IF_ERROR(status); + + unshard_op_node->set_assigned_device_name( + execute_node->assigned_device_name()); + + for (int i = 0; i < info.var_inputs.size(); ++i) { + const auto variable_edge = info.var_inputs[i]; + // Connect model weight resource variables to unshard op. Since unshard op + // must be only invoked after the very last loop iteration, for each while + // loop inputs, we traverse backwards to find the switch node of the host + // training loop and connect `output_false` field of the switch node with + // unshard op. + TF_ASSIGN_OR_RETURN( + Node * enter_node, + FindEnterNodeFromTPUExecuteNodeInput(variable_edge->src())); + graph->AddEdge(enter_node, 0, unshard_op_node, i); + } + + // Add control dependency before/after unshard node and the control nodes. + graph->AddControlEdge(after_last_iteration_node, unshard_op_node); + graph->AddControlEdge(unshard_op_node, after_unshard_node); + + graph->AddEdge(default_sharding_node, 0, unshard_op_node, new_key_input); + + // Add data edge from sharding state var handle op to unshard op. + graph->AddEdge(var_handle_node, 0, unshard_op_node, format_state_input); + } + // Add control dependency from after_unshard_node to all exits nodes. This is + // to make sure that the unshard ops will be executed as long as any of the + // exits are used. + for (auto exit : FindLoopExitNodes(*loop_condition_node)) { + graph->AddControlEdge(after_unshard_node, exit); + } + return Status::OK(); +} + +} // namespace tpu +} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.h b/tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.h new file mode 100644 index 00000000000..822dc9edd51 --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/host_training_loop_optimization_util.h @@ -0,0 +1,80 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_TPU_GRAPH_REWRITE_HOST_TRAINING_LOOP_OPTIMIZATION_UTIL_H_ +#define TENSORFLOW_CORE_TPU_GRAPH_REWRITE_HOST_TRAINING_LOOP_OPTIMIZATION_UTIL_H_ + +#include +#include +#include + +#include "absl/types/optional.h" +#include "tensorflow/compiler/tf2xla/functionalize_control_flow_util.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/graph/graph.h" + +namespace tensorflow { +namespace tpu { + +struct LoopArgInfo { + std::string enter_node_name; + // Exit nodes are optional for loop invariant while loop args. + absl::optional exit_node_name; +}; + +struct HostTrainingLoopInfo { + // Name and attribute information about the function in which + // host training loop is included. If host training loop is not + // inside a function call, then `function_name` and `function_attrs` + // are nullopt. + absl::optional encapsulating_function_name; + absl::optional encapsulating_function_attrs; + + // TPU Compile node as within a host training loop. + std::string compile_node_name; + + // Name of the while loop in which TPU compile op is located. + std::string while_loop_name; + + // Name of the node that represents loop condition. + std::string loop_cond_node_name; + + // Exit and Enter node names for each loop arguments. + std::vector loop_arguments; + + std::unordered_set loop_nodes; // NOLINT +}; + +// Walks through the `graph`, recursively if functional nodes exist, and +// identifies all host training loops. Host training loops are the inner +// most while loops that encapsulates TPUCompileOp node. This would be +// later used/analyzed to inroduce host loop specific optimizations such +// as adding sharded weight update. +Status DetectHostTrainingLoop( + const std::string* current_function_name, + const AttrValueMap* current_function_attr, + const FunctionLibraryDefinition* library, Graph* graph, + FunctionLibraryRuntime* flr, + std::vector* host_training_loops_info); + +// Injects VariableReshardOps to before and after TPUExecute op inside +// host training loop body. This effectively applies sharded weight update +// on model weight variables. +Status AddReshardOp(Graph* graph, const HostTrainingLoopInfo& host_loop_info); + +} // namespace tpu +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_GRAPH_REWRITE_HOST_TRAINING_LOOP_OPTIMIZATION_UTIL_H_ diff --git a/tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.cc b/tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.cc new file mode 100644 index 00000000000..47187204f69 --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.cc @@ -0,0 +1,73 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.h" + +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/core/common_runtime/function.h" + +namespace tensorflow { + +IncompleteNodeDefBuilder::IncompleteNodeDefBuilder(const string& name, + const string& op, + const NodeDebugInfo& debug) { + nodedef_.set_name(name); + nodedef_.set_op(op); + MergeDebugInfo(debug, &nodedef_); +} + +IncompleteNodeDefBuilder& IncompleteNodeDefBuilder::AddAttr( + const string& attr, const DataType& type) { + AddNodeAttr(attr, type, &nodedef_); + return *this; +} + +IncompleteNodeDefBuilder& IncompleteNodeDefBuilder::AddAttr(const string& attr, + int val) { + AddNodeAttr(attr, val, &nodedef_); + return *this; +} + +IncompleteNodeDefBuilder& IncompleteNodeDefBuilder::Device( + const string& device) { + nodedef_.set_device(device); + return *this; +} + +Status IncompleteNodeDefBuilder::Build(Graph* graph, Node** n) { + Status status; + *n = graph->AddNode(nodedef_, &status); + return status; +} + +IncompleteNodeDefBuilder IncompleteNodeDefBuilder::Identity( + const string& name, const DataType& type, const NodeDebugInfo& debug) { + return IncompleteNodeDefBuilder(name, "Identity", debug).AddAttr("T", type); +} + +IncompleteNodeDefBuilder IncompleteNodeDefBuilder::Merge( + const string& name, const DataType& type, const NodeDebugInfo& debug, + int n) { + return IncompleteNodeDefBuilder(name, "Merge", debug) + .AddAttr("T", type) + .AddAttr("N", n); +} + +IncompleteNodeDefBuilder IncompleteNodeDefBuilder::Switch( + const string& name, const DataType& type, const NodeDebugInfo& debug) { + return IncompleteNodeDefBuilder(name, "Switch", debug).AddAttr("T", type); +} + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.h b/tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.h new file mode 100644 index 00000000000..88e484f00cf --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/incomplete_nodedef_builder.h @@ -0,0 +1,58 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_TPU_GRAPH_REWRITE_NODEDEF_BUILDER_H_ +#define TENSORFLOW_CORE_TPU_GRAPH_REWRITE_NODEDEF_BUILDER_H_ + +#include + +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +// Convenience builder to build NodeDefs without specifying the inputs. This is +// similar to NodeDefBuilder except inputs are not specified. +// TODO(jpienaar): Clean up NodeDefBuilder and remove this class. +class IncompleteNodeDefBuilder { + public: + IncompleteNodeDefBuilder(const string& name, const string& op, + const NodeDebugInfo& debug); + + IncompleteNodeDefBuilder& AddAttr(const string& attr, const DataType& type); + IncompleteNodeDefBuilder& AddAttr(const string& attr, int val); + + IncompleteNodeDefBuilder& Device(const string& device); + + Status Build(Graph* graph, Node** n); + + static IncompleteNodeDefBuilder Identity(const string& name, + const DataType& type, + const NodeDebugInfo& debug); + static IncompleteNodeDefBuilder Merge(const string& name, + const DataType& type, + const NodeDebugInfo& debug, int n); + static IncompleteNodeDefBuilder Switch(const string& name, + const DataType& type, + const NodeDebugInfo& debug); + + private: + NodeDef nodedef_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_GRAPH_REWRITE_NODEDEF_BUILDER_H_ diff --git a/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc b/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc index ef1831464e2..83a652d7aaa 100644 --- a/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc +++ b/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/optimization_registry.h" #include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_pass.h" +#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h" #include "tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.h" #include "tensorflow/core/tpu/graph_rewrite/variable_merger_pass.h" @@ -30,8 +31,9 @@ REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 34, EncapsulateTPUComputationsPass); REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 39, ExtractOutsideCompilationPass); +REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 40, + DistributedTPURewritePass); REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, 0, VariableMergerPass); - } // namespace } // namespace tensorflow diff --git a/tensorflow/stream_executor/multi_platform_manager.cc b/tensorflow/stream_executor/multi_platform_manager.cc index 6c767d1d66e..120245e34b7 100644 --- a/tensorflow/stream_executor/multi_platform_manager.cc +++ b/tensorflow/stream_executor/multi_platform_manager.cc @@ -55,8 +55,8 @@ class MultiPlatformManagerImpl { TF_LOCKS_EXCLUDED(mu_); port::StatusOr> PlatformsWithFilter( - const std::function& filter) - TF_LOCKS_EXCLUDED(mu_); + const std::function& filter, + bool initialize_platform) TF_LOCKS_EXCLUDED(mu_); using Listener = MultiPlatformManager::Listener; port::Status RegisterListener(std::unique_ptr listener) @@ -188,7 +188,8 @@ port::Status MultiPlatformManagerImpl::RegisterListener( port::StatusOr> MultiPlatformManagerImpl::PlatformsWithFilter( - const std::function& filter) { + const std::function& filter, + bool initialize_platform) { absl::MutexLock lock(&mu_); CHECK_EQ(id_map_.size(), name_map_.size()); std::vector platforms; @@ -196,7 +197,7 @@ MultiPlatformManagerImpl::PlatformsWithFilter( for (const auto& entry : id_map_) { Platform* platform = entry.second; if (filter(platform)) { - if (!platform->Initialized()) { + if (initialize_platform && !platform->Initialized()) { SE_RETURN_IF_ERROR(platform->Initialize({})); } platforms.push_back(platform); @@ -299,7 +300,14 @@ MultiPlatformManager::InitializePlatformWithId( /*static*/ port::StatusOr> MultiPlatformManager::PlatformsWithFilter( const std::function& filter) { - return Impl().PlatformsWithFilter(filter); + return PlatformsWithFilter(filter, /*initialize_platform=*/true); +} + +/*static*/ port::StatusOr> +MultiPlatformManager::PlatformsWithFilter( + const std::function& filter, + bool initialize_platform) { + return Impl().PlatformsWithFilter(filter, initialize_platform); } } // namespace stream_executor diff --git a/tensorflow/stream_executor/multi_platform_manager.h b/tensorflow/stream_executor/multi_platform_manager.h index fbb6effdf83..4fa2d819520 100644 --- a/tensorflow/stream_executor/multi_platform_manager.h +++ b/tensorflow/stream_executor/multi_platform_manager.h @@ -130,6 +130,10 @@ class MultiPlatformManager { static port::StatusOr> PlatformsWithFilter( const std::function& filter); + static port::StatusOr> PlatformsWithFilter( + const std::function& filter, + bool initialize_platform); + // Although the MultiPlatformManager "owns" its platforms, it holds them as // undecorated pointers to prevent races during program exit (between this // object's data and the underlying platforms (e.g., CUDA, OpenCL). diff --git a/tensorflow/stream_executor/tpu/BUILD b/tensorflow/stream_executor/tpu/BUILD index 7fa46ebd8d1..a8557aada48 100644 --- a/tensorflow/stream_executor/tpu/BUILD +++ b/tensorflow/stream_executor/tpu/BUILD @@ -331,6 +331,7 @@ cc_library( name = "tpu_topology_external", srcs = ["tpu_topology.cc"], hdrs = ["tpu_topology.h"], + visibility = ["//visibility:public"], deps = [ ":c_api_decl", "//tensorflow/core/platform:types", diff --git a/tensorflow/stream_executor/tpu/tpu_platform_interface.cc b/tensorflow/stream_executor/tpu/tpu_platform_interface.cc index 7580e709bdf..fa9062c217c 100644 --- a/tensorflow/stream_executor/tpu/tpu_platform_interface.cc +++ b/tensorflow/stream_executor/tpu/tpu_platform_interface.cc @@ -23,10 +23,11 @@ namespace tensorflow { namespace tpu { namespace { -TpuPlatformInterface* GetRegisteredPlatformStatic() { +TpuPlatformInterface* GetRegisteredPlatformStatic(bool initialize_platform) { // Prefer TpuPlatform if it's registered. auto status_or_tpu_platform = - stream_executor::MultiPlatformManager::PlatformWithName("TPU"); + stream_executor::MultiPlatformManager::PlatformWithName( + "TPU", initialize_platform); if (status_or_tpu_platform.ok()) { return static_cast( status_or_tpu_platform.ValueOrDie()); @@ -43,7 +44,8 @@ TpuPlatformInterface* GetRegisteredPlatformStatic() { [](const stream_executor::Platform* platform) { return dynamic_cast(platform) != nullptr; - }); + }, + initialize_platform); if (!status_or_other_tpu_platforms.ok()) { LOG(WARNING) << "Error when getting other TPU platforms: " << status_or_tpu_platform.status(); @@ -64,9 +66,24 @@ TpuPlatformInterface* GetRegisteredPlatformStatic() { /* static */ TpuPlatformInterface* TpuPlatformInterface::GetRegisteredPlatform() { - // Use a local static variable to avoid data races during initialization. + return GetRegisteredPlatform(/*initialize_platform=*/true); +} + +/* static */ +TpuPlatformInterface* TpuPlatformInterface::GetRegisteredPlatform( + bool initialize_platform) { + static bool requested_initialize_platform = initialize_platform; static TpuPlatformInterface* tpu_registered_platform = - GetRegisteredPlatformStatic(); + GetRegisteredPlatformStatic(initialize_platform); + + if (!requested_initialize_platform && initialize_platform) { + // If the first time this function is called, we did not request + // initializing the platform, but the next caller wants the platform + // initialized, we will call GetRegisteredPlatformStatic again to initialize + // the platform. + tpu_registered_platform = GetRegisteredPlatformStatic(initialize_platform); + } + return tpu_registered_platform; } diff --git a/tensorflow/stream_executor/tpu/tpu_platform_interface.h b/tensorflow/stream_executor/tpu/tpu_platform_interface.h index da9e91ffc1c..889375245a8 100644 --- a/tensorflow/stream_executor/tpu/tpu_platform_interface.h +++ b/tensorflow/stream_executor/tpu/tpu_platform_interface.h @@ -33,6 +33,9 @@ class TpuPlatformInterface : public stream_executor::Platform { // is registered or an error occurred. static TpuPlatformInterface* GetRegisteredPlatform(); + // Option to not initialize a platform if not necessary. + static TpuPlatformInterface* GetRegisteredPlatform(bool initialize_platform); + virtual Status Reset() { return Reset(false); } virtual Status Reset(bool only_tear_down) = 0; diff --git a/tensorflow/stream_executor/tpu/tpu_topology.h b/tensorflow/stream_executor/tpu/tpu_topology.h index 48371b6e008..b49b1e24386 100644 --- a/tensorflow/stream_executor/tpu/tpu_topology.h +++ b/tensorflow/stream_executor/tpu/tpu_topology.h @@ -30,6 +30,7 @@ struct TpuChipCoordinatesExternal { class TpuCoreLocationExternal { public: + TpuCoreLocationExternal() : core_location_(nullptr) {} explicit TpuCoreLocationExternal(void* core_location) : core_location_(core_location) {} TpuChipCoordinatesExternal chip_coordinates() const; From 4e4cfe7e65668c4d75c5fbbea9349a2445647186 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Jul 2020 15:59:36 -0700 Subject: [PATCH 0984/2522] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 322461142 Change-Id: Idfc03fde2f11d9cf21ece08f252a6971a8955b85 --- tensorflow/go/op/wrappers.go | 201 +++++++++++++++++++---------------- 1 file changed, 108 insertions(+), 93 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 843ef2fb7e1..eb0a853ba95 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -8508,99 +8508,6 @@ func IteratorGetNextSync(scope *Scope, iterator tf.Output, output_types []tf.Dat return components } -// RaggedCountSparseOutputAttr is an optional argument to RaggedCountSparseOutput. -type RaggedCountSparseOutputAttr func(optionalAttr) - -// RaggedCountSparseOutputMinlength sets the optional minlength attribute to value. -// -// value: Minimum value to count. Can be set to -1 for no minimum. -// If not specified, defaults to -1 -// -// REQUIRES: value >= -1 -func RaggedCountSparseOutputMinlength(value int64) RaggedCountSparseOutputAttr { - return func(m optionalAttr) { - m["minlength"] = value - } -} - -// RaggedCountSparseOutputMaxlength sets the optional maxlength attribute to value. -// -// value: Maximum value to count. Can be set to -1 for no maximum. -// If not specified, defaults to -1 -// -// REQUIRES: value >= -1 -func RaggedCountSparseOutputMaxlength(value int64) RaggedCountSparseOutputAttr { - return func(m optionalAttr) { - m["maxlength"] = value - } -} - -// Performs sparse-output bin counting for a ragged tensor input. -// -// Counts the number of times each value occurs in the input. -// -// Arguments: -// splits: Tensor containing the row splits of the ragged tensor to count. -// values: Tensor containing values of the sparse tensor to count. -// weights: A Tensor of the same shape as indices containing per-index weight values. -// May also be the empty tensor if no weights are used. -// binary_output: Whether to output the number of occurrences of each value or 1. -// -// Returns: -// output_indices: Indices tensor for the resulting sparse tensor object. -// output_values: Values tensor for the resulting sparse tensor object. -// output_dense_shape: Shape tensor for the resulting sparse tensor object. -// END -// } -// attr { -// name: "T" -// description: <= -1 +func RaggedCountSparseOutputMinlength(value int64) RaggedCountSparseOutputAttr { + return func(m optionalAttr) { + m["minlength"] = value + } +} + +// RaggedCountSparseOutputMaxlength sets the optional maxlength attribute to value. +// +// value: Maximum value to count. Can be set to -1 for no maximum. +// If not specified, defaults to -1 +// +// REQUIRES: value >= -1 +func RaggedCountSparseOutputMaxlength(value int64) RaggedCountSparseOutputAttr { + return func(m optionalAttr) { + m["maxlength"] = value + } +} + +// Performs sparse-output bin counting for a ragged tensor input. +// +// Counts the number of times each value occurs in the input. +// +// Arguments: +// splits: Tensor containing the row splits of the ragged tensor to count. +// values: Tensor containing values of the sparse tensor to count. +// weights: A Tensor of the same shape as indices containing per-index weight values. +// May also be the empty tensor if no weights are used. +// binary_output: Whether to output the number of occurrences of each value or 1. +// +// Returns: +// output_indices: Indices tensor for the resulting sparse tensor object. +// output_values: Values tensor for the resulting sparse tensor object. +// output_dense_shape: Shape tensor for the resulting sparse tensor object. +// END +// } +// attr { +// name: "T" +// description: < Date: Tue, 21 Jul 2020 16:03:24 -0700 Subject: [PATCH 0985/2522] Create bazel release configs for GPU Windows branches and propagate those changes to nightly release jobs. PiperOrigin-RevId: 322461877 Change-Id: Ie6d23f22d4246b0d2ea6ce4805e0873a11366f17 --- .bazelrc | 11 +++++++++-- .../release/windows/gpu_py35_full/nightly_release.bat | 2 +- .../release/windows/gpu_py36_full/nightly_release.bat | 2 +- .../release/windows/gpu_py37_full/nightly_release.bat | 2 +- .../release/windows/gpu_py38_full/nightly_release.bat | 2 +- .../ci_build/windows/gpu/pip/build_tf_windows.sh | 2 +- 6 files changed, 14 insertions(+), 7 deletions(-) diff --git a/.bazelrc b/.bazelrc index d3cb67d08d3..82bb0605b08 100644 --- a/.bazelrc +++ b/.bazelrc @@ -81,11 +81,13 @@ # # Release build options (for all operating systems) # release_common: Common options for all builds on all operating systems. +# release_windows_common: Common options for all builds on Windows. # release_gpu_common: Common options for GPU builds on Linux and Windows. # release_cpu_linux: Toolchain and CUDA options for Linux CPU builds. # release_cpu_macos: Toolchain and CUDA options for MacOS CPU builds. # release_gpu_linux: Toolchain and CUDA options for Linux GPU builds. # release_cpu_windows: Toolchain and CUDA options for Windows CPU builds. +# release_gpu_windows: Toolchain and CUDA options for Windows GPU builds. # Allow builds using libc++ as a linker library # This is mostly for OSSFuzz, so we also pass in the flags from environment to clean build file @@ -572,5 +574,10 @@ build:release_gpu_linux --config=release_gpu_common build:release_gpu_linux --config=avx_linux build:release_gpu_linux --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain -build:release_cpu_windows --config=release_common -build:release_cpu_windows --announce_rc +build:release_windows_common --config=release_common +build:release_windows_common --define=no_tensorflow_py_deps=true +build:release_windows_common --announce_rc + +build:release_cpu_windows --config=release_windows_common + +build:release_gpu_windows --config=release_windows_common diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py35_full/nightly_release.bat b/tensorflow/tools/ci_build/release/windows/gpu_py35_full/nightly_release.bat index 6727f08726b..cae34610b12 100644 --- a/tensorflow/tools/ci_build/release/windows/gpu_py35_full/nightly_release.bat +++ b/tensorflow/tools/ci_build/release/windows/gpu_py35_full/nightly_release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python35 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --tf_nightly --extra_build_flags "--config=v2" +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --tf_nightly --extra_build_flags "--config=release_gpu_windows" diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py36_full/nightly_release.bat b/tensorflow/tools/ci_build/release/windows/gpu_py36_full/nightly_release.bat index b1f4b707eb5..e82d65714c4 100644 --- a/tensorflow/tools/ci_build/release/windows/gpu_py36_full/nightly_release.bat +++ b/tensorflow/tools/ci_build/release/windows/gpu_py36_full/nightly_release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python36 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --tf_nightly --extra_build_flags "--config=v2" +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --tf_nightly --extra_build_flags "--config=release_gpu_windows" diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py37_full/nightly_release.bat b/tensorflow/tools/ci_build/release/windows/gpu_py37_full/nightly_release.bat index c283752947c..d152ab709a6 100644 --- a/tensorflow/tools/ci_build/release/windows/gpu_py37_full/nightly_release.bat +++ b/tensorflow/tools/ci_build/release/windows/gpu_py37_full/nightly_release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python37 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --tf_nightly --extra_build_flags "--config=v2" +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --tf_nightly --extra_build_flags "--config=release_gpu_windows" diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py38_full/nightly_release.bat b/tensorflow/tools/ci_build/release/windows/gpu_py38_full/nightly_release.bat index 0a208440148..1ab90bc575d 100644 --- a/tensorflow/tools/ci_build/release/windows/gpu_py38_full/nightly_release.bat +++ b/tensorflow/tools/ci_build/release/windows/gpu_py38_full/nightly_release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python38 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --tf_nightly --extra_build_flags "--config=v2" +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --tf_nightly --extra_build_flags "--config=release_gpu_windows" diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh index 6dd183ceb87..fb195c19ce7 100644 --- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh @@ -138,7 +138,7 @@ fi run_configure_for_gpu_build -bazel build --announce_rc --config=opt --define=no_tensorflow_py_deps=true \ +bazel \ --output_filter=^$ \ ${EXTRA_BUILD_FLAGS} \ tensorflow/tools/pip_package:build_pip_package || exit $? From 834a47f99e04e16f0a5facb44b1edba02c21722a Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Tue, 21 Jul 2020 16:06:35 -0700 Subject: [PATCH 0986/2522] Internal CI changes, for better exposing our release build scripts PiperOrigin-RevId: 322462536 Change-Id: I41cee5996f02217599b2f217791f2d257d948108 --- .../ci_build/{per_release => rel}/macos/cpu_libtensorflow.sh | 0 .../tools/ci_build/{per_release => rel}/macos/cpu_py35_nonpip.sh | 0 .../tools/ci_build/{per_release => rel}/macos/cpu_py35_pip.sh | 0 .../tools/ci_build/{per_release => rel}/macos/cpu_py36_nonpip.sh | 0 .../tools/ci_build/{per_release => rel}/macos/cpu_py36_pip.sh | 0 .../tools/ci_build/{per_release => rel}/macos/cpu_py37_nonpip.sh | 0 .../tools/ci_build/{per_release => rel}/macos/cpu_py37_pip.sh | 0 .../tools/ci_build/{per_release => rel}/macos/cpu_py38_nonpip.sh | 0 .../tools/ci_build/{per_release => rel}/macos/cpu_py38_pip.sh | 0 .../ci_build/{per_release => rel}/ubuntu/cpu_libtensorflow.sh | 0 .../tools/ci_build/{per_release => rel}/ubuntu/cpu_py35_nonpip.sh | 0 .../tools/ci_build/{per_release => rel}/ubuntu/cpu_py35_pip.sh | 0 .../tools/ci_build/{per_release => rel}/ubuntu/cpu_py36_nonpip.sh | 0 .../tools/ci_build/{per_release => rel}/ubuntu/cpu_py36_pip.sh | 0 .../tools/ci_build/{per_release => rel}/ubuntu/cpu_py37_nonpip.sh | 0 .../tools/ci_build/{per_release => rel}/ubuntu/cpu_py37_pip.sh | 0 .../tools/ci_build/{per_release => rel}/ubuntu/cpu_py38_nonpip.sh | 0 .../tools/ci_build/{per_release => rel}/ubuntu/cpu_py38_pip.sh | 0 .../ci_build/{per_release => rel}/ubuntu/gpu_libtensorflow.sh | 0 .../tools/ci_build/{per_release => rel}/ubuntu/gpu_pip_on_cpu.sh | 0 .../tools/ci_build/{per_release => rel}/ubuntu/gpu_py35_nonpip.sh | 0 .../tools/ci_build/{per_release => rel}/ubuntu/gpu_py35_pip.sh | 0 .../tools/ci_build/{per_release => rel}/ubuntu/gpu_py36_nonpip.sh | 0 .../tools/ci_build/{per_release => rel}/ubuntu/gpu_py36_pip.sh | 0 .../tools/ci_build/{per_release => rel}/ubuntu/gpu_py37_nonpip.sh | 0 .../tools/ci_build/{per_release => rel}/ubuntu/gpu_py37_pip.sh | 0 .../tools/ci_build/{per_release => rel}/ubuntu/gpu_py38_nonpip.sh | 0 .../tools/ci_build/{per_release => rel}/ubuntu/gpu_py38_pip.sh | 0 tensorflow/tools/ci_build/{per_release => rel}/ubuntu/sanity.sh | 0 .../ci_build/{per_release => rel}/windows/cpu_libtensorflow.bat | 0 .../tools/ci_build/{per_release => rel}/windows/cpu_py35.bat | 0 .../tools/ci_build/{per_release => rel}/windows/cpu_py36.bat | 0 .../tools/ci_build/{per_release => rel}/windows/cpu_py37.bat | 0 .../tools/ci_build/{per_release => rel}/windows/cpu_py38.bat | 0 .../ci_build/{per_release => rel}/windows/gpu_libtensorflow.bat | 0 .../ci_build/{per_release => rel}/windows/gpu_pip_on_cpu.bat | 0 .../tools/ci_build/{per_release => rel}/windows/gpu_py35.bat | 0 .../tools/ci_build/{per_release => rel}/windows/gpu_py36.bat | 0 .../tools/ci_build/{per_release => rel}/windows/gpu_py37.bat | 0 .../tools/ci_build/{per_release => rel}/windows/gpu_py38.bat | 0 40 files changed, 0 insertions(+), 0 deletions(-) rename tensorflow/tools/ci_build/{per_release => rel}/macos/cpu_libtensorflow.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/macos/cpu_py35_nonpip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/macos/cpu_py35_pip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/macos/cpu_py36_nonpip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/macos/cpu_py36_pip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/macos/cpu_py37_nonpip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/macos/cpu_py37_pip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/macos/cpu_py38_nonpip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/macos/cpu_py38_pip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/ubuntu/cpu_libtensorflow.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/ubuntu/cpu_py35_nonpip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/ubuntu/cpu_py35_pip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/ubuntu/cpu_py36_nonpip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/ubuntu/cpu_py36_pip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/ubuntu/cpu_py37_nonpip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/ubuntu/cpu_py37_pip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/ubuntu/cpu_py38_nonpip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/ubuntu/cpu_py38_pip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/ubuntu/gpu_libtensorflow.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/ubuntu/gpu_pip_on_cpu.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/ubuntu/gpu_py35_nonpip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/ubuntu/gpu_py35_pip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/ubuntu/gpu_py36_nonpip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/ubuntu/gpu_py36_pip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/ubuntu/gpu_py37_nonpip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/ubuntu/gpu_py37_pip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/ubuntu/gpu_py38_nonpip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/ubuntu/gpu_py38_pip.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/ubuntu/sanity.sh (100%) rename tensorflow/tools/ci_build/{per_release => rel}/windows/cpu_libtensorflow.bat (100%) rename tensorflow/tools/ci_build/{per_release => rel}/windows/cpu_py35.bat (100%) rename tensorflow/tools/ci_build/{per_release => rel}/windows/cpu_py36.bat (100%) rename tensorflow/tools/ci_build/{per_release => rel}/windows/cpu_py37.bat (100%) rename tensorflow/tools/ci_build/{per_release => rel}/windows/cpu_py38.bat (100%) rename tensorflow/tools/ci_build/{per_release => rel}/windows/gpu_libtensorflow.bat (100%) rename tensorflow/tools/ci_build/{per_release => rel}/windows/gpu_pip_on_cpu.bat (100%) rename tensorflow/tools/ci_build/{per_release => rel}/windows/gpu_py35.bat (100%) rename tensorflow/tools/ci_build/{per_release => rel}/windows/gpu_py36.bat (100%) rename tensorflow/tools/ci_build/{per_release => rel}/windows/gpu_py37.bat (100%) rename tensorflow/tools/ci_build/{per_release => rel}/windows/gpu_py38.bat (100%) diff --git a/tensorflow/tools/ci_build/per_release/macos/cpu_libtensorflow.sh b/tensorflow/tools/ci_build/rel/macos/cpu_libtensorflow.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/macos/cpu_libtensorflow.sh rename to tensorflow/tools/ci_build/rel/macos/cpu_libtensorflow.sh diff --git a/tensorflow/tools/ci_build/per_release/macos/cpu_py35_nonpip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py35_nonpip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/macos/cpu_py35_nonpip.sh rename to tensorflow/tools/ci_build/rel/macos/cpu_py35_nonpip.sh diff --git a/tensorflow/tools/ci_build/per_release/macos/cpu_py35_pip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py35_pip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/macos/cpu_py35_pip.sh rename to tensorflow/tools/ci_build/rel/macos/cpu_py35_pip.sh diff --git a/tensorflow/tools/ci_build/per_release/macos/cpu_py36_nonpip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py36_nonpip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/macos/cpu_py36_nonpip.sh rename to tensorflow/tools/ci_build/rel/macos/cpu_py36_nonpip.sh diff --git a/tensorflow/tools/ci_build/per_release/macos/cpu_py36_pip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py36_pip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/macos/cpu_py36_pip.sh rename to tensorflow/tools/ci_build/rel/macos/cpu_py36_pip.sh diff --git a/tensorflow/tools/ci_build/per_release/macos/cpu_py37_nonpip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py37_nonpip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/macos/cpu_py37_nonpip.sh rename to tensorflow/tools/ci_build/rel/macos/cpu_py37_nonpip.sh diff --git a/tensorflow/tools/ci_build/per_release/macos/cpu_py37_pip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py37_pip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/macos/cpu_py37_pip.sh rename to tensorflow/tools/ci_build/rel/macos/cpu_py37_pip.sh diff --git a/tensorflow/tools/ci_build/per_release/macos/cpu_py38_nonpip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py38_nonpip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/macos/cpu_py38_nonpip.sh rename to tensorflow/tools/ci_build/rel/macos/cpu_py38_nonpip.sh diff --git a/tensorflow/tools/ci_build/per_release/macos/cpu_py38_pip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py38_pip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/macos/cpu_py38_pip.sh rename to tensorflow/tools/ci_build/rel/macos/cpu_py38_pip.sh diff --git a/tensorflow/tools/ci_build/per_release/ubuntu/cpu_libtensorflow.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_libtensorflow.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/ubuntu/cpu_libtensorflow.sh rename to tensorflow/tools/ci_build/rel/ubuntu/cpu_libtensorflow.sh diff --git a/tensorflow/tools/ci_build/per_release/ubuntu/cpu_py35_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_nonpip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/ubuntu/cpu_py35_nonpip.sh rename to tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_nonpip.sh diff --git a/tensorflow/tools/ci_build/per_release/ubuntu/cpu_py35_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_pip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/ubuntu/cpu_py35_pip.sh rename to tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_pip.sh diff --git a/tensorflow/tools/ci_build/per_release/ubuntu/cpu_py36_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_nonpip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/ubuntu/cpu_py36_nonpip.sh rename to tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_nonpip.sh diff --git a/tensorflow/tools/ci_build/per_release/ubuntu/cpu_py36_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_pip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/ubuntu/cpu_py36_pip.sh rename to tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_pip.sh diff --git a/tensorflow/tools/ci_build/per_release/ubuntu/cpu_py37_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_nonpip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/ubuntu/cpu_py37_nonpip.sh rename to tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_nonpip.sh diff --git a/tensorflow/tools/ci_build/per_release/ubuntu/cpu_py37_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_pip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/ubuntu/cpu_py37_pip.sh rename to tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_pip.sh diff --git a/tensorflow/tools/ci_build/per_release/ubuntu/cpu_py38_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_nonpip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/ubuntu/cpu_py38_nonpip.sh rename to tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_nonpip.sh diff --git a/tensorflow/tools/ci_build/per_release/ubuntu/cpu_py38_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_pip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/ubuntu/cpu_py38_pip.sh rename to tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_pip.sh diff --git a/tensorflow/tools/ci_build/per_release/ubuntu/gpu_libtensorflow.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_libtensorflow.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/ubuntu/gpu_libtensorflow.sh rename to tensorflow/tools/ci_build/rel/ubuntu/gpu_libtensorflow.sh diff --git a/tensorflow/tools/ci_build/per_release/ubuntu/gpu_pip_on_cpu.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_pip_on_cpu.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/ubuntu/gpu_pip_on_cpu.sh rename to tensorflow/tools/ci_build/rel/ubuntu/gpu_pip_on_cpu.sh diff --git a/tensorflow/tools/ci_build/per_release/ubuntu/gpu_py35_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_nonpip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/ubuntu/gpu_py35_nonpip.sh rename to tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_nonpip.sh diff --git a/tensorflow/tools/ci_build/per_release/ubuntu/gpu_py35_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_pip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/ubuntu/gpu_py35_pip.sh rename to tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_pip.sh diff --git a/tensorflow/tools/ci_build/per_release/ubuntu/gpu_py36_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_nonpip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/ubuntu/gpu_py36_nonpip.sh rename to tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_nonpip.sh diff --git a/tensorflow/tools/ci_build/per_release/ubuntu/gpu_py36_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_pip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/ubuntu/gpu_py36_pip.sh rename to tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_pip.sh diff --git a/tensorflow/tools/ci_build/per_release/ubuntu/gpu_py37_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_nonpip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/ubuntu/gpu_py37_nonpip.sh rename to tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_nonpip.sh diff --git a/tensorflow/tools/ci_build/per_release/ubuntu/gpu_py37_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_pip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/ubuntu/gpu_py37_pip.sh rename to tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_pip.sh diff --git a/tensorflow/tools/ci_build/per_release/ubuntu/gpu_py38_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_nonpip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/ubuntu/gpu_py38_nonpip.sh rename to tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_nonpip.sh diff --git a/tensorflow/tools/ci_build/per_release/ubuntu/gpu_py38_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_pip.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/ubuntu/gpu_py38_pip.sh rename to tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_pip.sh diff --git a/tensorflow/tools/ci_build/per_release/ubuntu/sanity.sh b/tensorflow/tools/ci_build/rel/ubuntu/sanity.sh similarity index 100% rename from tensorflow/tools/ci_build/per_release/ubuntu/sanity.sh rename to tensorflow/tools/ci_build/rel/ubuntu/sanity.sh diff --git a/tensorflow/tools/ci_build/per_release/windows/cpu_libtensorflow.bat b/tensorflow/tools/ci_build/rel/windows/cpu_libtensorflow.bat similarity index 100% rename from tensorflow/tools/ci_build/per_release/windows/cpu_libtensorflow.bat rename to tensorflow/tools/ci_build/rel/windows/cpu_libtensorflow.bat diff --git a/tensorflow/tools/ci_build/per_release/windows/cpu_py35.bat b/tensorflow/tools/ci_build/rel/windows/cpu_py35.bat similarity index 100% rename from tensorflow/tools/ci_build/per_release/windows/cpu_py35.bat rename to tensorflow/tools/ci_build/rel/windows/cpu_py35.bat diff --git a/tensorflow/tools/ci_build/per_release/windows/cpu_py36.bat b/tensorflow/tools/ci_build/rel/windows/cpu_py36.bat similarity index 100% rename from tensorflow/tools/ci_build/per_release/windows/cpu_py36.bat rename to tensorflow/tools/ci_build/rel/windows/cpu_py36.bat diff --git a/tensorflow/tools/ci_build/per_release/windows/cpu_py37.bat b/tensorflow/tools/ci_build/rel/windows/cpu_py37.bat similarity index 100% rename from tensorflow/tools/ci_build/per_release/windows/cpu_py37.bat rename to tensorflow/tools/ci_build/rel/windows/cpu_py37.bat diff --git a/tensorflow/tools/ci_build/per_release/windows/cpu_py38.bat b/tensorflow/tools/ci_build/rel/windows/cpu_py38.bat similarity index 100% rename from tensorflow/tools/ci_build/per_release/windows/cpu_py38.bat rename to tensorflow/tools/ci_build/rel/windows/cpu_py38.bat diff --git a/tensorflow/tools/ci_build/per_release/windows/gpu_libtensorflow.bat b/tensorflow/tools/ci_build/rel/windows/gpu_libtensorflow.bat similarity index 100% rename from tensorflow/tools/ci_build/per_release/windows/gpu_libtensorflow.bat rename to tensorflow/tools/ci_build/rel/windows/gpu_libtensorflow.bat diff --git a/tensorflow/tools/ci_build/per_release/windows/gpu_pip_on_cpu.bat b/tensorflow/tools/ci_build/rel/windows/gpu_pip_on_cpu.bat similarity index 100% rename from tensorflow/tools/ci_build/per_release/windows/gpu_pip_on_cpu.bat rename to tensorflow/tools/ci_build/rel/windows/gpu_pip_on_cpu.bat diff --git a/tensorflow/tools/ci_build/per_release/windows/gpu_py35.bat b/tensorflow/tools/ci_build/rel/windows/gpu_py35.bat similarity index 100% rename from tensorflow/tools/ci_build/per_release/windows/gpu_py35.bat rename to tensorflow/tools/ci_build/rel/windows/gpu_py35.bat diff --git a/tensorflow/tools/ci_build/per_release/windows/gpu_py36.bat b/tensorflow/tools/ci_build/rel/windows/gpu_py36.bat similarity index 100% rename from tensorflow/tools/ci_build/per_release/windows/gpu_py36.bat rename to tensorflow/tools/ci_build/rel/windows/gpu_py36.bat diff --git a/tensorflow/tools/ci_build/per_release/windows/gpu_py37.bat b/tensorflow/tools/ci_build/rel/windows/gpu_py37.bat similarity index 100% rename from tensorflow/tools/ci_build/per_release/windows/gpu_py37.bat rename to tensorflow/tools/ci_build/rel/windows/gpu_py37.bat diff --git a/tensorflow/tools/ci_build/per_release/windows/gpu_py38.bat b/tensorflow/tools/ci_build/rel/windows/gpu_py38.bat similarity index 100% rename from tensorflow/tools/ci_build/per_release/windows/gpu_py38.bat rename to tensorflow/tools/ci_build/rel/windows/gpu_py38.bat From 777bdc36a2eeb99974cc0e70c4c973e56f9e9bc8 Mon Sep 17 00:00:00 2001 From: Robert David Date: Tue, 21 Jul 2020 16:18:37 -0700 Subject: [PATCH 0987/2522] Cleanup LSTM tests: Change names for better readability. Also fix a case where the name was wrong: it was testing with peephole connections, but was called 'NoPeephole'. PiperOrigin-RevId: 322464815 Change-Id: I8e10846507cec7b62398138eb1c95027a5f3b409 --- .../delegates/nnapi/acceleration_test_list.cc | 7 +- tensorflow/lite/kernels/lstm_test.cc | 102 ++++++------------ 2 files changed, 38 insertions(+), 71 deletions(-) diff --git a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc index 3fa692c62e1..71ae50b0094 100644 --- a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc +++ b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc @@ -243,9 +243,10 @@ CifgPeepholeNoProjectionNoClippingUnidirectionalLstmTest/NonLayerNormLstmBlackBo -.+UnidirectionalLstmTest/.+ # lstm_test --.+LstmTest/Hybrid.+Int8 --LSTMOpModel/InvalidTypeTest -.+LstmTest/.+,29 +-LstmOpTest/InvalidTypes +.+LstmOpTest/Float,29 +-.+LstmOpTest/HybridInt8 +.+LstmOpTest/HybridUint8,29 # maximum_minimum_test MaxMinOpTest/.+nt8Test,29 diff --git a/tensorflow/lite/kernels/lstm_test.cc b/tensorflow/lite/kernels/lstm_test.cc index c7b86b1f78c..4c77c0c1202 100644 --- a/tensorflow/lite/kernels/lstm_test.cc +++ b/tensorflow/lite/kernels/lstm_test.cc @@ -294,7 +294,7 @@ class LSTMOpModel : public SingleOpModel { const TensorType weight_type_; }; -class BaseLstmTest : public ::testing::TestWithParam { +class BaseLstmOpTest : public ::testing::TestWithParam { protected: // Weights of the LSTM model. Some are optional. std::vector input_to_input_weights_; @@ -397,7 +397,8 @@ class BaseLstmTest : public ::testing::TestWithParam { } }; -class NoCifgNoPeepholeNoProjectionNoClippingLstmTest : public BaseLstmTest { +class NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest + : public BaseLstmOpTest { void SetUp() override { input_to_input_weights_ = {-0.45018822, -0.02338299, -0.0870589, -0.34550029, 0.04266912, -0.15680569, @@ -446,7 +447,7 @@ class NoCifgNoPeepholeNoProjectionNoClippingLstmTest : public BaseLstmTest { } }; -TEST_F(NoCifgNoPeepholeNoProjectionNoClippingLstmTest, LstmBlackBoxTest) { +TEST_F(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, Float) { const int n_batch = 1; const int n_input = 2; // n_cell and n_output have the same size when there is no projection. @@ -464,11 +465,7 @@ TEST_F(NoCifgNoPeepholeNoProjectionNoClippingLstmTest, LstmBlackBoxTest) { VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm); } -class NoCifgNoPeepholeNoProjectionNoClippingNoLayerNormLstmTest - : public NoCifgNoPeepholeNoProjectionNoClippingLstmTest {}; - -TEST_F(NoCifgNoPeepholeNoProjectionNoClippingNoLayerNormLstmTest, - LstmBlackBoxTest) { +TEST_F(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, With24Inputs) { const int n_batch = 1; const int n_input = 2; // n_cell and n_output have the same size when there is no projection. @@ -487,8 +484,7 @@ TEST_F(NoCifgNoPeepholeNoProjectionNoClippingNoLayerNormLstmTest, VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm); } -TEST_P(NoCifgNoPeepholeNoProjectionNoClippingLstmTest, - HybridLstmBlackBoxTestUint8) { +TEST_P(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, HybridUint8) { // TODO(b/158205028): Fix this test if GetForceUseNnapi() && !GetParam(). if (SingleOpModel::GetForceUseNnapi()) { return; @@ -511,11 +507,7 @@ TEST_P(NoCifgNoPeepholeNoProjectionNoClippingLstmTest, /*tolerance=*/0.0157651); } -class NoCifgNoPeepholeNoProjectionNoClippingLstmInt8Test - : public NoCifgNoPeepholeNoProjectionNoClippingLstmTest {}; - -TEST_P(NoCifgNoPeepholeNoProjectionNoClippingLstmInt8Test, - HybridLstmBlackBoxTestInt8) { +TEST_P(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, HybridInt8) { if (SingleOpModel::GetForceUseNnapi() && GetParam()) { return; } @@ -537,7 +529,8 @@ TEST_P(NoCifgNoPeepholeNoProjectionNoClippingLstmInt8Test, /*tolerance=*/0.0157651); } -class CifgNoPeepholeNoProjectionNoClippingLstmTest : public BaseLstmTest { +class Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest + : public BaseLstmOpTest { void SetUp() override { input_to_cell_weights_ = {-0.49770179, -0.27711356, -0.09624726, 0.05100781, 0.04717243, 0.48944736, @@ -584,7 +577,7 @@ class CifgNoPeepholeNoProjectionNoClippingLstmTest : public BaseLstmTest { } }; -TEST_F(CifgNoPeepholeNoProjectionNoClippingLstmTest, LstmBlackBoxTest) { +TEST_F(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest, Float) { const int n_batch = 1; const int n_input = 2; // n_cell and n_output have the same size when there is no projection. @@ -602,8 +595,7 @@ TEST_F(CifgNoPeepholeNoProjectionNoClippingLstmTest, LstmBlackBoxTest) { VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm); } -TEST_P(CifgNoPeepholeNoProjectionNoClippingLstmTest, - HybridLstmBlackBoxTestUint8) { +TEST_P(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest, HybridUint8) { // TODO(b/158205028): Fix this test if GetForceUseNnapi() && !GetParam(). if (SingleOpModel::GetForceUseNnapi()) { return; @@ -624,11 +616,8 @@ TEST_P(CifgNoPeepholeNoProjectionNoClippingLstmTest, VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/0.03573); } -class CifgNoPeepholeNoProjectionNoClippingLstmInt8Test - : public CifgNoPeepholeNoProjectionNoClippingLstmTest {}; -TEST_P(CifgNoPeepholeNoProjectionNoClippingLstmInt8Test, - HybridLstmBlackBoxTestInt8) { +TEST_P(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest, HybridInt8) { if (SingleOpModel::GetForceUseNnapi() && GetParam()) { return; } @@ -649,7 +638,8 @@ TEST_P(CifgNoPeepholeNoProjectionNoClippingLstmInt8Test, VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/0.03573); } -class NoCifgPeepholeProjectionNoClippingLstmTest : public BaseLstmTest { +class NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest + : public BaseLstmOpTest { void SetUp() override { input_to_input_weights_ = { 0.021393683, 0.06124551, 0.046905167, -0.014657677, -0.03149463, @@ -1248,7 +1238,7 @@ class NoCifgPeepholeProjectionNoClippingLstmTest : public BaseLstmTest { } }; -TEST_F(NoCifgPeepholeProjectionNoClippingLstmTest, LstmBlackBoxTest) { +TEST_F(NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest, Float) { const int n_batch = 2; const int n_input = 5; const int n_cell = 20; @@ -1265,8 +1255,7 @@ TEST_F(NoCifgPeepholeProjectionNoClippingLstmTest, LstmBlackBoxTest) { VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm); } -TEST_P(NoCifgPeepholeProjectionNoClippingLstmTest, - HybridLstmBlackBoxTestUint8) { +TEST_P(NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest, HybridUint8) { // TODO(b/158205028): Fix this test if GetForceUseNnapi() && !GetParam(). if (SingleOpModel::GetForceUseNnapi()) { return; @@ -1287,11 +1276,7 @@ TEST_P(NoCifgPeepholeProjectionNoClippingLstmTest, VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/0.00467); } -class NoCifgPeepholeProjectionNoClippingLstmInt8Test - : public NoCifgPeepholeProjectionNoClippingLstmTest {}; - -TEST_P(NoCifgPeepholeProjectionNoClippingLstmInt8Test, - HybridLstmBlackBoxTestInt8) { +TEST_P(NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest, HybridInt8) { if (SingleOpModel::GetForceUseNnapi() && GetParam()) { return; } @@ -1311,8 +1296,7 @@ TEST_P(NoCifgPeepholeProjectionNoClippingLstmInt8Test, VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/0.0015); } -class NoCifgPeepholeProjectionNoClippingLayerNormLstmTest - : public BaseLstmTest { +class NoCifg_Peephole_Projection_LayerNorm_LstmOpTest : public BaseLstmOpTest { void SetUp() override { input_to_input_weights_ = {0.5, 0.6, 0.7, -0.8, -0.9, 0.1, 0.2, 0.3, -0.4, 0.5, -0.8, 0.7, -0.6, 0.5, @@ -1378,8 +1362,7 @@ class NoCifgPeepholeProjectionNoClippingLayerNormLstmTest } }; -TEST_F(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest, - LayerNormLstmBlackBoxTest) { +TEST_F(NoCifg_Peephole_Projection_LayerNorm_LstmOpTest, Float) { const int n_batch = 2; const int n_input = 5; const int n_cell = 4; @@ -1410,8 +1393,7 @@ TEST_F(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest, VerifyGoldens(lstm_input_, lstm_golden_output_, &layer_norm_lstm); } -TEST_P(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest, - HybridLayerNormLstmBlackBoxTestUint8) { +TEST_P(NoCifg_Peephole_Projection_LayerNorm_LstmOpTest, HybridUint8) { // TODO(b/158205028): Fix this test if GetForceUseNnapi() && !GetParam(). if (SingleOpModel::GetForceUseNnapi()) { return; @@ -1446,11 +1428,7 @@ TEST_P(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest, /*tolerance=*/0.0010907); } -class NoCifgPeepholeProjectionNoClippingLayerNormLstmInt8Test - : public NoCifgPeepholeProjectionNoClippingLayerNormLstmTest {}; - -TEST_P(NoCifgPeepholeProjectionNoClippingLayerNormLstmInt8Test, - HybridLayerNormLstmBlackBoxTestInt8) { +TEST_P(NoCifg_Peephole_Projection_LayerNorm_LstmOpTest, HybridInt8) { if (SingleOpModel::GetForceUseNnapi() && GetParam()) { return; } @@ -1485,7 +1463,7 @@ TEST_P(NoCifgPeepholeProjectionNoClippingLayerNormLstmInt8Test, /*tolerance=*/1.06e-3); } -class CifgPeepholeProjectionNoClippingLayerNormLstmTest : public BaseLstmTest { +class Cifg_Peephole_Projection_LayerNorm_LstmOpTest : public BaseLstmOpTest { void SetUp() override { input_to_forget_weights_ = {-0.6, -0.1, 0.3, 0.2, 0.9, -0.5, -0.2, -0.4, 0.3, -0.8, -0.4, 0.3, -0.5, -0.4, @@ -1531,8 +1509,7 @@ class CifgPeepholeProjectionNoClippingLayerNormLstmTest : public BaseLstmTest { } }; -TEST_F(CifgPeepholeProjectionNoClippingLayerNormLstmTest, - LayerNormLstmBlackBoxTest) { +TEST_F(Cifg_Peephole_Projection_LayerNorm_LstmOpTest, Float) { const int n_batch = 2; const int n_input = 5; const int n_cell = 4; @@ -1564,8 +1541,7 @@ TEST_F(CifgPeepholeProjectionNoClippingLayerNormLstmTest, VerifyGoldens(lstm_input_, lstm_golden_output_, &layer_norm_lstm); } -TEST_P(CifgPeepholeProjectionNoClippingLayerNormLstmTest, - HybridLayerNormLstmBlackBoxTestUint8) { +TEST_P(Cifg_Peephole_Projection_LayerNorm_LstmOpTest, HybridUint8) { if (SingleOpModel::GetForceUseNnapi()) { return; } @@ -1601,11 +1577,7 @@ TEST_P(CifgPeepholeProjectionNoClippingLayerNormLstmTest, /*tolerance=*/0.0009021); } -class CifgPeepholeProjectionNoClippingLayerNormLstmInt8Test - : public CifgPeepholeProjectionNoClippingLayerNormLstmTest {}; - -TEST_P(CifgPeepholeProjectionNoClippingLayerNormLstmInt8Test, - HybridLayerNormLstmBlackBoxTestInt8) { +TEST_P(Cifg_Peephole_Projection_LayerNorm_LstmOpTest, HybridInt8) { const int n_batch = 2; const int n_input = 5; const int n_cell = 4; @@ -1919,7 +1891,7 @@ class LSTMIntegerOpModel : public SingleOpModel { int n_output_; }; -TEST(LSTMIntegerOpModel, NoCifgYesLayerNormNoYesProjectionNoPeephole) { +TEST(IntegerLstm, NoCifg_NoPeephole_Projection_LayerNorm) { // Hyper parameters. const int n_batch = 2; const int n_input = 5; @@ -2079,7 +2051,7 @@ TEST(LSTMIntegerOpModel, NoCifgYesLayerNormNoYesProjectionNoPeephole) { } } -TEST(LSTMIntegerOpModel, NoCifgYesLayerNormNoYesProjectionYesPeephole) { +TEST(IntegerLstm, NoCifg_Peephole_Projection_LayerNorm) { // Hyper parameters. const int n_batch = 2; const int n_input = 5; @@ -2249,7 +2221,7 @@ TEST(LSTMIntegerOpModel, NoCifgYesLayerNormNoYesProjectionYesPeephole) { } } -TEST(LSTMIntegerOpModel, CifgYesLayerNormNoYesProjectionNoPeephole_8x8_8) { +TEST(IntegerLstm, Cifg_NoPeephole_Projection_LayerNorm_8x8_8) { // Hyper parameters. const int n_batch = 2; const int n_input = 5; @@ -2414,7 +2386,7 @@ TEST(LSTMIntegerOpModel, CifgYesLayerNormNoYesProjectionNoPeephole_8x8_8) { } #ifdef GTEST_HAS_DEATH_TEST -TEST(LSTMOpModel, InvalidTypeTest) { +TEST(LstmOpTest, InvalidTypes) { const int n_batch = 1; const int n_input = 2; const int n_cell = 4; @@ -2446,17 +2418,11 @@ TEST(LSTMOpModel, InvalidTypeTest) { #define QUANTIZE_PARAMETER_TEST(test) \ INSTANTIATE_TEST_SUITE_P(test, test, ::testing::Bool()) -QUANTIZE_PARAMETER_TEST(NoCifgNoPeepholeNoProjectionNoClippingLstmTest); -QUANTIZE_PARAMETER_TEST(NoCifgNoPeepholeNoProjectionNoClippingLstmInt8Test); -QUANTIZE_PARAMETER_TEST(CifgNoPeepholeNoProjectionNoClippingLstmTest); -QUANTIZE_PARAMETER_TEST(CifgNoPeepholeNoProjectionNoClippingLstmInt8Test); -QUANTIZE_PARAMETER_TEST(NoCifgPeepholeProjectionNoClippingLstmTest); -QUANTIZE_PARAMETER_TEST(NoCifgPeepholeProjectionNoClippingLstmInt8Test); -QUANTIZE_PARAMETER_TEST(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest); -QUANTIZE_PARAMETER_TEST( - NoCifgPeepholeProjectionNoClippingLayerNormLstmInt8Test); -QUANTIZE_PARAMETER_TEST(CifgPeepholeProjectionNoClippingLayerNormLstmTest); -QUANTIZE_PARAMETER_TEST(CifgPeepholeProjectionNoClippingLayerNormLstmInt8Test); +QUANTIZE_PARAMETER_TEST(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest); +QUANTIZE_PARAMETER_TEST(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest); +QUANTIZE_PARAMETER_TEST(NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest); +QUANTIZE_PARAMETER_TEST(NoCifg_Peephole_Projection_LayerNorm_LstmOpTest); +QUANTIZE_PARAMETER_TEST(Cifg_Peephole_Projection_LayerNorm_LstmOpTest); #undef QUANTIZE_PARAMETER_TEST } // namespace From 2a150a026a04f7a66eefb5fae4b1587d4665f4e0 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Tue, 21 Jul 2020 16:20:04 -0700 Subject: [PATCH 0988/2522] Experimental feature. Using special kernels for inference improvements. Added new hint to control behavior. PiperOrigin-RevId: 322465118 Change-Id: I62c2a3ddc75907f2d9e455b7454e1de8c54a9881 --- tensorflow/lite/delegates/gpu/cl/BUILD | 1 + .../delegates/gpu/cl/inference_context.cc | 95 +++++++++------ .../lite/delegates/gpu/cl/model_hints.h | 11 +- .../lite/delegates/gpu/cl/selectors/BUILD | 20 ++++ .../gpu/cl/selectors/special_selector.cc | 111 ++++++++++++++++++ .../gpu/cl/selectors/special_selector.h | 43 +++++++ .../delegates/gpu/cl/selectors/subgraph.cc | 4 +- .../gpu/cl/testing/performance_profiling.cc | 1 + 8 files changed, 242 insertions(+), 44 deletions(-) create mode 100644 tensorflow/lite/delegates/gpu/cl/selectors/special_selector.cc create mode 100644 tensorflow/lite/delegates/gpu/cl/selectors/special_selector.h diff --git a/tensorflow/lite/delegates/gpu/cl/BUILD b/tensorflow/lite/delegates/gpu/cl/BUILD index 9155bc1166a..36cafdb4d3b 100644 --- a/tensorflow/lite/delegates/gpu/cl/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/BUILD @@ -366,6 +366,7 @@ cc_library( ":tensor_type", "//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation", "//tensorflow/lite/delegates/gpu/cl/selectors:operation_selector", + "//tensorflow/lite/delegates/gpu/cl/selectors:special_selector", "//tensorflow/lite/delegates/gpu/common:data_type", "//tensorflow/lite/delegates/gpu/common:memory_management", "//tensorflow/lite/delegates/gpu/common:model", diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.cc b/tensorflow/lite/delegates/gpu/cl/inference_context.cc index 9e57dd175bc..3067c81ec94 100644 --- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc +++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc @@ -30,6 +30,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/cl/model_hints.h" #include "tensorflow/lite/delegates/gpu/cl/precision.h" #include "tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.h" +#include "tensorflow/lite/delegates/gpu/cl/selectors/special_selector.h" #include "tensorflow/lite/delegates/gpu/cl/storage_type_util.h" #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" @@ -261,6 +262,12 @@ void InferenceContext::ReserveGraphTensors( absl::Status InferenceContext::ConvertOperations( const CreationContext& creation_context, const GraphFloat32& graph, ModelHints hints) { + std::map tensor_descriptors; + const auto values = graph.values(); + for (auto value : values) { + tensor_descriptors[value->id] = tensor_reserver_.Get(value->id).descriptor; + } + std::set consumed_nodes; std::vector graph_nodes = graph.nodes(); std::map tensor_usages; // keeps latest index of operation that updated tensor @@ -270,45 +277,54 @@ absl::Status InferenceContext::ConvertOperations( } for (int i = 0; i < graph_nodes.size(); ++i) { const Node& node = *graph_nodes[i]; - auto inputs = graph.FindInputs(node.id); - auto outputs = graph.FindOutputs(node.id); - - // Reordering of input ids and updating of temporary tensors_usage struct. - // This stage is necessary because we are building OperationDef that rely on - // order of input ids. But we also should have input id on first position - // that potentially can be "linking" tensor and as result eliminated(unused) - // We apply it only for ADD operation, because of ADD associativity and - // ADD can be linked. - // In current approach "linking" tensor can be only latest written - // tensor(during linear order of execution) among input tensors. - if (IsGenericAdd(node, inputs, outputs)) { - int latest_written_tensor_index = 0; - int last_usage = tensor_usages[inputs[0]->id]; - for (int j = 1; j < inputs.size(); ++j) { - if (tensor_usages[inputs[j]->id] > last_usage) { - last_usage = tensor_usages[inputs[j]->id]; - latest_written_tensor_index = j; - } - } - std::swap(inputs[0], inputs[latest_written_tensor_index]); - } - for (const auto& out_id : outputs) { - tensor_usages[out_id->id] = i; - } - - OperationDef op_def; - op_def.precision = precision_; - for (int j = 0; j < inputs.size(); ++j) { - op_def.src_tensors.push_back( - tensor_reserver_.Get(inputs[j]->id).descriptor); - } - for (int j = 0; j < outputs.size(); ++j) { - op_def.dst_tensors.push_back( - tensor_reserver_.Get(outputs[j]->id).descriptor); + if (consumed_nodes.find(node.id) != consumed_nodes.end()) { + continue; } GPUOperationsSubgraph gpu_subgraph; - RETURN_IF_ERROR(GPUOperationFromNode(creation_context, op_def, hints, - inputs, outputs, node, &gpu_subgraph)); + if (hints.Check(ModelHints::kAllowSpecialKernels) && + GPUSubgraphFromGraph(creation_context, precision_, graph, node.id, + tensor_descriptors, &consumed_nodes, &gpu_subgraph) + .ok()) { + // Mapping of subgraph (set of nodes) to GPU operations. Should happen + // before straigtforward mapping. + } else { + // Straigtforward mapping of one graph node to GPU operations. + auto inputs = graph.FindInputs(node.id); + auto outputs = graph.FindOutputs(node.id); + // Reordering of input ids and updating of temporary tensors_usage struct. + // This stage is necessary because we are building OperationDef that rely + // on order of input ids. But we also should have input id on first + // position that potentially can be "linking" tensor and as result + // eliminated(unused) We apply it only for ADD operation, because of ADD + // associativity and ADD can be linked. In current approach "linking" + // tensor can be only latest written tensor(during linear order of + // execution) among input tensors. + if (IsGenericAdd(node, inputs, outputs)) { + int latest_written_tensor_index = 0; + int last_usage = tensor_usages[inputs[0]->id]; + for (int j = 1; j < inputs.size(); ++j) { + if (tensor_usages[inputs[j]->id] > last_usage) { + last_usage = tensor_usages[inputs[j]->id]; + latest_written_tensor_index = j; + } + } + std::swap(inputs[0], inputs[latest_written_tensor_index]); + } + consumed_nodes.insert(node.id); + OperationDef op_def; + op_def.precision = precision_; + for (int j = 0; j < inputs.size(); ++j) { + op_def.src_tensors.push_back( + tensor_reserver_.Get(inputs[j]->id).descriptor); + } + for (int j = 0; j < outputs.size(); ++j) { + op_def.dst_tensors.push_back( + tensor_reserver_.Get(outputs[j]->id).descriptor); + } + RETURN_IF_ERROR(GPUOperationFromNode(creation_context, op_def, hints, + inputs, outputs, node, + &gpu_subgraph)); + } std::unordered_map mapping_to_global_ids; for (int j = 0; j < gpu_subgraph.new_tensors.size(); ++j) { const auto& t = gpu_subgraph.new_tensors[j]; @@ -324,7 +340,7 @@ absl::Status InferenceContext::ConvertOperations( for (int j = 0; j < gpu_op.input_ids.size(); ++j) { int id = gpu_op.input_ids[j]; if (id >= 0) { - cl_node.inputs[j] = inputs[id]->id; + cl_node.inputs[j] = id; } else { cl_node.inputs[j] = mapping_to_global_ids[-(id + 1)]; } @@ -333,7 +349,8 @@ absl::Status InferenceContext::ConvertOperations( for (int j = 0; j < gpu_op.output_ids.size(); ++j) { int id = gpu_op.output_ids[j]; if (id >= 0) { - cl_node.outputs[j] = outputs[id]->id; + cl_node.outputs[j] = id; + tensor_usages[id] = i; } else { cl_node.outputs[j] = mapping_to_global_ids[-(id + 1)]; } diff --git a/tensorflow/lite/delegates/gpu/cl/model_hints.h b/tensorflow/lite/delegates/gpu/cl/model_hints.h index 7661cc0dacb..7c0f4b55b1d 100644 --- a/tensorflow/lite/delegates/gpu/cl/model_hints.h +++ b/tensorflow/lite/delegates/gpu/cl/model_hints.h @@ -25,13 +25,18 @@ namespace cl { struct ModelHints { using ModelHint = uint64_t; - // By default we want the fastest inference + // By default we want the fastest inference. static constexpr ModelHint kFastestInference = 0x00000000; - // Can improve compilation time, but inference can be slower + // Can improve compilation time, but inference can be slower. static constexpr ModelHint kReduceKernelsCount = 0x00000001; - // Can improve tuning time, but inference can be slower + // Can improve tuning time, but inference can be slower. static constexpr ModelHint kFastTuning = 0x00000002; + // Experimental. + // Can improve performance and memory consumption, but slow down + // initialization a lot and create more kernels. + static constexpr ModelHint kAllowSpecialKernels = 0x00000004; + void Add(ModelHint hint) { if (hint == kFastestInference) { hints = kFastestInference; diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/BUILD b/tensorflow/lite/delegates/gpu/cl/selectors/BUILD index ff196cfaf71..bf4c7df8651 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/selectors/BUILD @@ -152,6 +152,26 @@ cc_library( ], ) +cc_library( + name = "special_selector", + srcs = ["special_selector.cc"], + hdrs = ["special_selector.h"], + deps = [ + ":subgraph", + "//tensorflow/lite/delegates/gpu/cl:cl_device", + "//tensorflow/lite/delegates/gpu/cl:tensor_type", + "//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation", + "//tensorflow/lite/delegates/gpu/cl/kernels/special:depthwise_conv_plus_1x1_conv", + "//tensorflow/lite/delegates/gpu/common:data_type", + "//tensorflow/lite/delegates/gpu/common:model", + "//tensorflow/lite/delegates/gpu/common:operations", + "//tensorflow/lite/delegates/gpu/common:shape", + "//tensorflow/lite/delegates/gpu/common:status", + "//tensorflow/lite/delegates/gpu/common:tensor", + "@com_google_absl//absl/types:any", + ], +) + cc_library( name = "subgraph", srcs = ["subgraph.cc"], diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/special_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/special_selector.cc new file mode 100644 index 00000000000..8a801b460d1 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/cl/selectors/special_selector.cc @@ -0,0 +1,111 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/delegates/gpu/cl/selectors/special_selector.h" + +#include "absl/types/any.h" +#include "tensorflow/lite/delegates/gpu/cl/cl_device.h" +#include "tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" +#include "tensorflow/lite/delegates/gpu/common/data_type.h" +#include "tensorflow/lite/delegates/gpu/common/operations.h" +#include "tensorflow/lite/delegates/gpu/common/shape.h" +#include "tensorflow/lite/delegates/gpu/common/status.h" +#include "tensorflow/lite/delegates/gpu/common/tensor.h" + +namespace tflite { +namespace gpu { +namespace cl { +namespace { +absl::Status TryDepthwiseConvPlus1x1Conv( + const CreationContext& creation_context, CalculationsPrecision precision, + const GraphFloat32& graph, NodeId first_node_id, + const std::map& tensor_descriptors, + std::set* consumed_nodes, GPUOperationsSubgraph* gpu_subgraph) { + auto* dw_node = graph.GetNode(first_node_id); + if (OperationTypeFromString(dw_node->operation.type) != + OperationType::DEPTHWISE_CONVOLUTION) { + return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable."); + } + auto dw_outputs = graph.FindOutputs(dw_node->id); + auto consumers = graph.FindConsumers(dw_outputs[0]->id); + if (consumers.size() != 1) { + return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable."); + } + auto* conv_node = consumers[0]; + if (consumed_nodes->find(conv_node->id) != consumed_nodes->end()) { + return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable."); + } + if (OperationTypeFromString(conv_node->operation.type) != + OperationType::CONVOLUTION_2D) { + return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable."); + } + if (graph.FindInputs(conv_node->id).size() != 1) { + return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable."); + } + auto dw_attr = absl::any_cast( + dw_node->operation.attributes); + auto conv_attr = + absl::any_cast(conv_node->operation.attributes); + auto dw_inputs = graph.FindInputs(dw_node->id); + auto conv_outputs = graph.FindOutputs(conv_node->id); + OperationDef op_def; + op_def.precision = precision; + auto it = tensor_descriptors.find(dw_inputs[0]->id); + if (it != tensor_descriptors.end()) { + op_def.src_tensors.push_back(it->second); + } + it = tensor_descriptors.find(conv_outputs[0]->id); + if (it != tensor_descriptors.end()) { + op_def.dst_tensors.push_back(it->second); + } + if (!IsDepthwiseConvPlus1x1ConvSupported(*creation_context.device, op_def, + dw_attr, conv_attr)) { + return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable."); + } + std::unique_ptr* gpu_op = + InitSingleOpSubgraph(dw_inputs, conv_outputs, gpu_subgraph); + DepthwiseConvPlus1x1Conv operation; + RETURN_IF_ERROR(CreateDepthwiseConvPlus1x1Conv( + creation_context, op_def, dw_attr, conv_attr, &operation)); + *gpu_op = absl::make_unique(std::move(operation)); + consumed_nodes->insert(dw_node->id); + consumed_nodes->insert(conv_node->id); + return absl::OkStatus(); +} +} // namespace + +absl::Status GPUSubgraphFromGraph( + const CreationContext& creation_context, CalculationsPrecision precision, + const GraphFloat32& graph, NodeId first_node_id, + const std::map& tensor_descriptors, + std::set* consumed_nodes, GPUOperationsSubgraph* gpu_subgraph) { + if (!creation_context.device->IsNvidia()) { + return absl::NotFoundError( + "Experimental feature, enabled for NVidia only, but device is not " + "nvidia gpu."); + } + if (TryDepthwiseConvPlus1x1Conv(creation_context, precision, graph, + first_node_id, tensor_descriptors, + consumed_nodes, gpu_subgraph) + .ok()) { + return absl::OkStatus(); + } + return absl::NotFoundError("No special combination."); +} + +} // namespace cl +} // namespace gpu +} // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/special_selector.h b/tensorflow/lite/delegates/gpu/cl/selectors/special_selector.h new file mode 100644 index 00000000000..687d221aac6 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/cl/selectors/special_selector.h @@ -0,0 +1,43 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_SELECTORS_SPECIAL_SELECTOR_H_ +#define TENSORFLOW_LITE_DELEGATES_GPU_CL_SELECTORS_SPECIAL_SELECTOR_H_ + +#include +#include +#include + +#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h" +#include "tensorflow/lite/delegates/gpu/cl/selectors/subgraph.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" +#include "tensorflow/lite/delegates/gpu/common/model.h" +#include "tensorflow/lite/delegates/gpu/common/status.h" + +namespace tflite { +namespace gpu { +namespace cl { + +absl::Status GPUSubgraphFromGraph( + const CreationContext& creation_context, CalculationsPrecision precision, + const GraphFloat32& graph, NodeId first_node_id, + const std::map& tensor_descriptors, + std::set* consumed_nodes, GPUOperationsSubgraph* gpu_subgraph); + +} // namespace cl +} // namespace gpu +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_SELECTORS_SPECIAL_SELECTOR_H_ diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/subgraph.cc b/tensorflow/lite/delegates/gpu/cl/selectors/subgraph.cc index 0f18a4b7be5..27a40886497 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/subgraph.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/subgraph.cc @@ -32,10 +32,10 @@ std::unique_ptr* InitSingleOpSubgraph( gpu_subgraph->new_tensors.clear(); gpu_subgraph->operations.push_back({}); for (int i = 0; i < inputs.size(); ++i) { - gpu_subgraph->operations[0].input_ids.push_back(i); + gpu_subgraph->operations[0].input_ids.push_back(inputs[i]->id); } for (int i = 0; i < outputs.size(); ++i) { - gpu_subgraph->operations[0].output_ids.push_back(i); + gpu_subgraph->operations[0].output_ids.push_back(outputs[i]->id); } return &gpu_subgraph->operations[0].operation; diff --git a/tensorflow/lite/delegates/gpu/cl/testing/performance_profiling.cc b/tensorflow/lite/delegates/gpu/cl/testing/performance_profiling.cc index 0c500cd0bbe..ab2e52f14ed 100644 --- a/tensorflow/lite/delegates/gpu/cl/testing/performance_profiling.cc +++ b/tensorflow/lite/delegates/gpu/cl/testing/performance_profiling.cc @@ -44,6 +44,7 @@ absl::Status RunModelSample(const std::string& model_name) { ? CalculationsPrecision::F16 : CalculationsPrecision::F32; create_info.storage_type = GetFastestStorageType(env.device()); + create_info.hints.Add(ModelHints::kAllowSpecialKernels); std::cout << "Precision: " << ToString(create_info.precision) << std::endl; std::cout << "Storage type: " << ToString(create_info.storage_type) << std::endl; From 00059bf29c8579e3c434490ed1c5b98ed3739cdb Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Tue, 21 Jul 2020 16:26:28 -0700 Subject: [PATCH 0989/2522] Store zero point values in conv and depthwise-conv kernels. Upcoming changes to TFLM will not allow optimal access to quantization data during the TfLiteRegistration::Eval phase of a kernel. To ensure that conv and depthwise conv are ready for this change, cache zero point values in OpData allocations. Additionally, ensure that the style of passing OpData in depthwise conv matches the current conv implementation. PiperOrigin-RevId: 322466387 Change-Id: If9e8ccd59316a480c7a67e30f8f096e3d46332a4 --- tensorflow/lite/micro/kernels/conv.cc | 28 +++++--- .../lite/micro/kernels/depthwise_conv.cc | 64 +++++++++++-------- 2 files changed, 59 insertions(+), 33 deletions(-) diff --git a/tensorflow/lite/micro/kernels/conv.cc b/tensorflow/lite/micro/kernels/conv.cc index 1f286dd30b8..ef4cb9fbff7 100644 --- a/tensorflow/lite/micro/kernels/conv.cc +++ b/tensorflow/lite/micro/kernels/conv.cc @@ -42,6 +42,12 @@ constexpr int kConvQuantizedDimension = 0; struct OpData { TfLitePaddingValues padding; + + // Cached tensor zero point values for quantized operations. + int32_t input_zero_point; + int32_t filter_zero_point; + int32_t output_zero_point; + // The scaling factor from input to output (aka the 'real multiplier') can // be represented as a fixed point multiplier plus a left shift. int32_t output_multiplier; @@ -158,9 +164,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { affine_quantization->zero_point->size); } - return CalculateOpData(context, node, params, input_width, input_height, - filter_width, filter_height, output_width, - output_height, input->type, data); + TF_LITE_ENSURE_STATUS(CalculateOpData( + context, node, params, input_width, input_height, filter_width, + filter_height, output_width, output_height, input->type, data)); + + data->input_zero_point = input->params.zero_point; + data->filter_zero_point = filter->params.zero_point; + data->output_zero_point = output->params.zero_point; + + return kTfLiteOk; } // namespace conv void EvalQuantized(TfLiteContext* context, TfLiteNode* node, @@ -168,9 +180,9 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* im2col, TfLiteTensor* hwcn_weights, TfLiteTensor* output) { - const int32_t input_offset = -input->params.zero_point; - const int32_t filter_offset = -filter->params.zero_point; - const int32_t output_offset = output->params.zero_point; + const int32_t input_offset = -data.input_zero_point; + const int32_t filter_offset = -data.filter_zero_point; + const int32_t output_offset = data.output_zero_point; // TODO(b/154032858): Investigate removing extra copies. ConvParams op_params; @@ -204,8 +216,8 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, TfLiteTensor* im2col) { // TODO(b/154032858): Investigate removing extra copies. ConvParams op_params; - op_params.input_offset = -input->params.zero_point; - op_params.output_offset = output->params.zero_point; + op_params.input_offset = -data.input_zero_point; + op_params.output_offset = data.output_zero_point; op_params.stride_height = params->stride_height; op_params.stride_width = params->stride_width; op_params.dilation_height_factor = params->dilation_height_factor; diff --git a/tensorflow/lite/micro/kernels/depthwise_conv.cc b/tensorflow/lite/micro/kernels/depthwise_conv.cc index c75e0d9db54..a296c172309 100644 --- a/tensorflow/lite/micro/kernels/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/depthwise_conv.cc @@ -42,6 +42,12 @@ constexpr int kDepthwiseConvQuantizedDimension = 3; struct OpData { TfLitePaddingValues padding; + + // Cached tensor zero point values for quantized operations. + int32_t input_zero_point; + int32_t filter_zero_point; + int32_t output_zero_point; + // The scaling factor from input to output (aka the 'real multiplier') can // be represented as a fixed point multiplier plus a left shift. int32_t output_multiplier; @@ -106,6 +112,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { reinterpret_cast(node->builtin_data); OpData* data = static_cast(node->user_data); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); const TfLiteTensor* input = GetInput(context, node, kInputTensor); const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); @@ -145,12 +152,19 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { affine_quantization->zero_point->size); } - return CalculateOpData(context, node, params, width, height, filter_width, - filter_height, data_type, data); + TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height, + filter_width, filter_height, data_type, + data)); + + data->input_zero_point = input->params.zero_point; + data->filter_zero_point = filter->params.zero_point; + data->output_zero_point = output->params.zero_point; + + return kTfLiteOk; } void EvalFloat(TfLiteContext* context, TfLiteNode* node, - TfLiteDepthwiseConvParams* params, const OpData* data, + TfLiteDepthwiseConvParams* params, const OpData& data, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output) { float output_activation_min, output_activation_max; @@ -160,8 +174,8 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, tflite::DepthwiseParams op_params; // Padding type is ignored, but still set. op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = data->padding.width; - op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data.padding.width; + op_params.padding_values.height = data.padding.height; op_params.stride_width = params->stride_width; op_params.stride_height = params->stride_height; op_params.dilation_width_factor = params->dilation_width_factor; @@ -179,28 +193,28 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, TfLiteDepthwiseConvParams* params, - const OpData* data, const TfLiteTensor* input, + const OpData& data, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output) { DepthwiseParams op_params; op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = data->padding.width; - op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data.padding.width; + op_params.padding_values.height = data.padding.height; op_params.stride_width = params->stride_width; op_params.stride_height = params->stride_height; op_params.dilation_width_factor = params->dilation_width_factor; op_params.dilation_height_factor = params->dilation_height_factor; op_params.depth_multiplier = params->depth_multiplier; - op_params.input_offset = -input->params.zero_point; + op_params.input_offset = -data.input_zero_point; op_params.weights_offset = 0; - op_params.output_offset = output->params.zero_point; + op_params.output_offset = data.output_zero_point; // TODO(b/130439627): Use calculated value for clamping. op_params.quantized_activation_min = std::numeric_limits::min(); op_params.quantized_activation_max = std::numeric_limits::max(); reference_integer_ops::DepthwiseConvPerChannel( - op_params, data->per_channel_output_multiplier, - data->per_channel_output_shift, GetTensorShape(input), + op_params, data.per_channel_output_multiplier, + data.per_channel_output_shift, GetTensorShape(input), GetTensorData(input), GetTensorShape(filter), GetTensorData(filter), GetTensorShape(bias), GetTensorData(bias), GetTensorShape(output), @@ -208,31 +222,31 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, } void EvalQuantized(TfLiteContext* context, TfLiteNode* node, - TfLiteDepthwiseConvParams* params, const OpData* data, + TfLiteDepthwiseConvParams* params, const OpData& data, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output) { - const int32_t input_offset = -input->params.zero_point; - const int32_t filter_offset = -filter->params.zero_point; - const int32_t output_offset = output->params.zero_point; + const int32_t input_offset = -data.input_zero_point; + const int32_t filter_offset = -data.filter_zero_point; + const int32_t output_offset = data.output_zero_point; tflite::DepthwiseParams op_params; // Padding type is ignored, but still set. op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = data->padding.width; - op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data.padding.width; + op_params.padding_values.height = data.padding.height; op_params.stride_width = params->stride_width; op_params.stride_height = params->stride_height; op_params.dilation_width_factor = params->dilation_width_factor; op_params.dilation_height_factor = params->dilation_height_factor; op_params.depth_multiplier = params->depth_multiplier; - op_params.quantized_activation_min = data->output_activation_min; - op_params.quantized_activation_max = data->output_activation_max; + op_params.quantized_activation_min = data.output_activation_min; + op_params.quantized_activation_max = data.output_activation_max; op_params.input_offset = input_offset; op_params.weights_offset = filter_offset; op_params.output_offset = output_offset; - op_params.output_multiplier = data->output_multiplier; + op_params.output_multiplier = data.output_multiplier; // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. - op_params.output_shift = -data->output_shift; + op_params.output_shift = -data.output_shift; tflite::reference_ops::DepthwiseConv( op_params, GetTensorShape(input), GetTensorData(input), @@ -259,14 +273,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // separate ops to avoid dispatch overhead here. switch (input->type) { // Already know in/out types are same. case kTfLiteFloat32: - EvalFloat(context, node, params, &data, input, filter, bias, output); + EvalFloat(context, node, params, data, input, filter, bias, output); break; case kTfLiteInt8: - EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias, + EvalQuantizedPerChannel(context, node, params, data, input, filter, bias, output); break; case kTfLiteUInt8: - EvalQuantized(context, node, params, &data, input, filter, bias, output); + EvalQuantized(context, node, params, data, input, filter, bias, output); break; default: TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", From d378ee85b6fe98ecc7afe4909626caf55f3c206b Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Tue, 21 Jul 2020 16:37:32 -0700 Subject: [PATCH 0990/2522] Implement experimental_distribute_values_from_function for CentralStorage PiperOrigin-RevId: 322468547 Change-Id: I596d0d33f378299a74b853ac322d44e21df4b023 --- .../python/distribute/parameter_server_strategy.py | 10 ++++++---- tensorflow/python/distribute/values_test.py | 6 ++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/distribute/parameter_server_strategy.py b/tensorflow/python/distribute/parameter_server_strategy.py index 5bef04f1ae6..1d4c593d48b 100644 --- a/tensorflow/python/distribute/parameter_server_strategy.py +++ b/tensorflow/python/distribute/parameter_server_strategy.py @@ -410,10 +410,12 @@ class ParameterServerStrategyExtended(distribute_lib.StrategyExtendedV1): self._container_strategy()) def _experimental_distribute_values_from_function(self, value_fn): - # TODO(b/137795644): Implement this method for ParameterServerStrategy if - # needed. - raise NotImplementedError("_experimental_distribute_values_from_function " - "not yet implemented in ParameterServerStrategy.") + per_replica_values = [] + for replica_id in range(self._num_replicas_in_sync): + per_replica_values.append( + value_fn(distribute_lib.ValueContext(replica_id, + self._num_replicas_in_sync))) + return distribute_utils.regroup(per_replica_values, always_wrap=True) def _broadcast_to(self, tensor, destinations): # This is both a fast path for Python constants, and a way to delay diff --git a/tensorflow/python/distribute/values_test.py b/tensorflow/python/distribute/values_test.py index 73d15ce4aea..83f5d73a6fa 100644 --- a/tensorflow/python/distribute/values_test.py +++ b/tensorflow/python/distribute/values_test.py @@ -249,8 +249,7 @@ class DistributedValuesTest(test.TestCase, parameterized.TestCase): strategy_combinations.mirrored_strategy_with_gpu_and_cpu, strategy_combinations.tpu_strategy, strategy_combinations.tpu_strategy_packed_var, - # TODO(b/137795644): support CentralStroageStrategy - # strategy_combinations.central_storage_strategy_with_two_gpus, + strategy_combinations.central_storage_strategy_with_two_gpus, ] + strategy_combinations.multiworker_strategies, mode=["eager"])) def testMakeDistributedValueDefaultDevicePlacement(self, distribution): @@ -271,8 +270,7 @@ class DistributedValuesTest(test.TestCase, parameterized.TestCase): strategy_combinations.mirrored_strategy_with_gpu_and_cpu, strategy_combinations.tpu_strategy, strategy_combinations.tpu_strategy_packed_var, - # TODO(b/137795644): support CentralStroageStrategy - # strategy_combinations.central_storage_strategy_with_two_gpus, + strategy_combinations.central_storage_strategy_with_two_gpus, ] + strategy_combinations.multiworker_strategies, mode=["eager"])) def testMakeDistributedValueExplicitDevicePlacement(self, distribution): From a344ee9bc6060a89f8fa4a4efc6466ea5866366e Mon Sep 17 00:00:00 2001 From: Thomas O'Malley Date: Tue, 21 Jul 2020 16:39:15 -0700 Subject: [PATCH 0991/2522] Allow Model.train_function to be changed during Model.fit PiperOrigin-RevId: 322468882 Change-Id: I89c06a6ecb5a25a529fe6072958b801093478cd8 --- tensorflow/python/keras/callbacks_test.py | 25 ++++++++++++++++++++++ tensorflow/python/keras/engine/training.py | 24 ++++++++++----------- 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py index aca01ca58ad..f103d7506b9 100644 --- a/tensorflow/python/keras/callbacks_test.py +++ b/tensorflow/python/keras/callbacks_test.py @@ -1722,6 +1722,31 @@ class KerasCallbacksTest(keras_parameterized.TestCase): self.assertFalse(cb_list._should_call_test_batch_hooks) self.assertFalse(cb_list._should_call_predict_batch_hooks) + @keras_parameterized.run_all_keras_modes(always_skip_v1=True) + def test_change_tf_functions_during_fit(self): + + class ChangeFunctions(keras.callbacks.Callback): + + def on_epoch_end(self, epochs, logs=None): + + def new_fn(iterator): + raise ValueError('New function substituted successfully.') + + self.model.train_function = new_fn + self.model.test_function = new_fn + self.model.predict_function = new_fn + + model = keras.Sequential([keras.layers.Dense(1)]) + model.compile('sgd', 'mse') + + x, y = np.ones((10, 10)), np.ones((10, 1)) + with self.assertRaisesRegexp(ValueError, 'New function '): + model.fit(x, y, batch_size=2, epochs=2, callbacks=[ChangeFunctions()]) + with self.assertRaisesRegexp(ValueError, 'New function '): + model.evaluate(x, y, batch_size=2) + with self.assertRaisesRegexp(ValueError, 'New function '): + model.predict(x, batch_size=2) + # A summary that was emitted during a test. Fields: # logdir: str. The logdir of the FileWriter to which the summary was diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index b51212ca060..221c8676bb5 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -1076,7 +1076,7 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): steps=data_handler.inferred_steps) self.stop_training = False - train_function = self.make_train_function() + self.train_function = self.make_train_function() self._train_counter.assign(0) callbacks.on_train_begin() training_logs = None @@ -1098,7 +1098,7 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): batch_size=batch_size, _r=1): callbacks.on_train_batch_begin(step) - tmp_logs = train_function(iterator) + tmp_logs = self.train_function(iterator) if data_handler.should_sync: context.async_wait() logs = tmp_logs # No error, now safe to assign to logs. @@ -1373,7 +1373,7 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): steps=data_handler.inferred_steps) logs = {} - test_function = self.make_test_function() + self.test_function = self.make_test_function() self._test_counter.assign(0) callbacks.on_test_begin() for _, iterator in data_handler.enumerate_epochs(): # Single epoch. @@ -1382,7 +1382,7 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): for step in data_handler.steps(): with trace.Trace('test', step_num=step, _r=1): callbacks.on_test_batch_begin(step) - tmp_logs = test_function(iterator) + tmp_logs = self.test_function(iterator) if data_handler.should_sync: context.async_wait() logs = tmp_logs # No error, now safe to assign to logs. @@ -1595,7 +1595,7 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): epochs=1, steps=data_handler.inferred_steps) - predict_function = self.make_predict_function() + self.predict_function = self.make_predict_function() self._predict_counter.assign(0) callbacks.on_predict_begin() batch_outputs = None @@ -1603,7 +1603,7 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): with data_handler.catch_stop_iteration(): for step in data_handler.steps(): callbacks.on_predict_batch_begin(step) - tmp_batch_outputs = predict_function(iterator) + tmp_batch_outputs = self.predict_function(iterator) if data_handler.should_sync: context.async_wait() batch_outputs = tmp_batch_outputs # No error, now safe to assign. @@ -1700,8 +1700,8 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): iterator = data_adapter.single_batch_iterator(self.distribute_strategy, x, y, sample_weight, class_weight) - train_function = self.make_train_function() - logs = train_function(iterator) + self.train_function = self.make_train_function() + logs = self.train_function(iterator) if reset_metrics: self.reset_metrics() @@ -1759,8 +1759,8 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): with self.distribute_strategy.scope(): iterator = data_adapter.single_batch_iterator(self.distribute_strategy, x, y, sample_weight) - test_function = self.make_test_function() - logs = test_function(iterator) + self.test_function = self.make_test_function() + logs = self.test_function(iterator) if reset_metrics: self.reset_metrics() @@ -1793,8 +1793,8 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): _disallow_inside_tf_function('predict_on_batch') with self.distribute_strategy.scope(): iterator = data_adapter.single_batch_iterator(self.distribute_strategy, x) - predict_function = self.make_predict_function() - outputs = predict_function(iterator) + self.predict_function = self.make_predict_function() + outputs = self.predict_function(iterator) return tf_utils.to_numpy_or_python_type(outputs) @deprecation.deprecated( From 07b58e0820cdd23ef147c1233b750764df1b182a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Jul 2020 16:42:01 -0700 Subject: [PATCH 0992/2522] Generate unique node names in AddEmptyNode by appending a suffix instead of crashing. PiperOrigin-RevId: 322469383 Change-Id: I8062711c8d2bfb6e8c1afec44b28e7d3625a54cb --- .../core/grappler/optimizers/graph_optimizer_stage.cc | 11 +++++++---- .../grappler/optimizers/graph_optimizer_stage_test.cc | 4 ++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer_stage.cc b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.cc index 4e955db2f5a..f584b8d1548 100644 --- a/tensorflow/core/grappler/optimizers/graph_optimizer_stage.cc +++ b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.cc @@ -84,11 +84,14 @@ NodeDef* AddCopyNode(const GraphOptimizerContext& ctx, const string& name, } NodeDef* AddEmptyNode(const GraphOptimizerContext& ctx, const string& name) { - CHECK(!ctx.node_map->NodeExists(name)) - << "Node " << name << " already exists in a graph"; + std::string new_name = name; + for (int count = 0; ctx.node_map->NodeExists(new_name); ++count) { + LOG(WARNING) << name << " already exists in the graph."; + new_name = absl::StrCat(name, "_", count); + } NodeDef* new_node = ctx.optimized_graph->add_node(); - new_node->set_name(name); - ctx.node_map->AddNode(name, new_node); + new_node->set_name(new_name); + ctx.node_map->AddNode(new_name, new_node); return new_node; } diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer_stage_test.cc b/tensorflow/core/grappler/optimizers/graph_optimizer_stage_test.cc index 678db7be83f..b0e923803d6 100644 --- a/tensorflow/core/grappler/optimizers/graph_optimizer_stage_test.cc +++ b/tensorflow/core/grappler/optimizers/graph_optimizer_stage_test.cc @@ -228,6 +228,10 @@ TEST_F(GraphOptimizerStageTest, AddNodes) { NodeDef* empty_node_by_name; TF_CHECK_OK(stage.GetInputNode("Add_2", &empty_node_by_name)); EXPECT_EQ(empty_node, empty_node_by_name); + + // Check that AddEmptyNode adds a unique suffix if the node already exists. + NodeDef* unique_empty_node = stage.AddEmptyNode("Add_2"); + EXPECT_EQ(unique_empty_node->name(), "Add_2_0"); } } // namespace From 574239c71b4127462804e9b2b083a2cd1bdf5625 Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Tue, 21 Jul 2020 16:52:17 -0700 Subject: [PATCH 0993/2522] Update `run_v1_only` tests in saver_test with proper reasons. PiperOrigin-RevId: 322471183 Change-Id: I2e6896a382d9a8996a766388ee6b8ce718eadacc --- tensorflow/python/training/saver_test.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index 32cdc2ed5e4..2770a490de8 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -389,7 +389,7 @@ class SaverTest(test.TestCase): ValueError, "The passed save_path is not a valid checkpoint:"): save.restore(sess, "invalid path") - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("train.Saver is V1 only API.") def testInt64(self): save_path = os.path.join(self.get_temp_dir(), "int64") @@ -465,7 +465,7 @@ class SaverTest(test.TestCase): # Verify non-duplicate names work. saver_module.Saver({"v0": v0, "v2": v2.saveable}) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("train.Saver and VariableV1 are V1 only APIs.") def testBasicsWithListOfVariables(self): save_path = os.path.join(self.get_temp_dir(), "basics_with_list") @@ -666,7 +666,7 @@ class SaverTest(test.TestCase): self.assertAllClose(1.0, self.evaluate(one)) self.assertAllClose([2.0, 2.0, 2.0], self.evaluate(twos)) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("train.Saver is V1 only API.") def testReshape(self): save_path = os.path.join(self.get_temp_dir(), "variables_reshape") with session.Session("", graph=ops_lib.Graph()) as sess: @@ -1807,7 +1807,9 @@ class MetaGraphTest(test.TestCase): gfile.MakeDirs(test_dir) return test_dir - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only( + "Queue-based input pipelines have been replaced by `tf.data` " + "and not supported in V2.") def testAddCollectionDef(self): test_dir = self._get_test_dir("good_collection") filename = os.path.join(test_dir, "metafile") @@ -1958,13 +1960,15 @@ class MetaGraphTest(test.TestCase): v1 = sess.graph.get_tensor_by_name("v1:0") self.assertEqual(11.0, self.evaluate(v1)) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only( + "Exporting/importing meta graphs is only supported in V1.") def testMultiSaverCollection(self): test_dir = self._get_test_dir("saver_collection") self._testMultiSaverCollectionSave(test_dir) self._testMultiSaverCollectionRestore(test_dir) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only( + "Exporting/importing meta graphs is only supported in V1.") def testClearExtraneousSavers(self): test_dir = self._get_test_dir("clear_extraneous_savers") filename = os.path.join(test_dir, "metafile") @@ -2052,7 +2056,8 @@ class MetaGraphTest(test.TestCase): lambda e: "does not exist"): saver_module.import_meta_graph(filename) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only( + "Exporting/importing meta graphs is only supported in V1.") def testSliceVariable(self): test_dir = self._get_test_dir("slice_saver") filename = os.path.join(test_dir, "metafile") @@ -2308,7 +2313,7 @@ class MetaGraphTest(test.TestCase): lambda: math_ops.multiply(x, -1.0)))) # pylint: enable=g-long-lambda - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("This exercises Tensor.op which is meaningless in V2.") def testStrippedOpListDef(self): with self.cached_session(): # Creates a graph. @@ -3115,7 +3120,7 @@ class TrackableCompatibilityTests(test.TestCase): # exception" block in Python 3. self.assertNotIn("NewCheckpointReader", cs.exception.message) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("train.Saver is V1 only API.") def testGraphChangedForRestoreErrorRaised(self): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") From dc3f6e60e5ccfb7207a461ee8899412721d67729 Mon Sep 17 00:00:00 2001 From: Jiho Choi Date: Tue, 21 Jul 2020 16:55:32 -0700 Subject: [PATCH 0994/2522] Process tf.data events. PiperOrigin-RevId: 322471777 Change-Id: Ied4eb894ecbbde1861dcd8d189ecd717e7ceb6cb --- .../core/profiler/utils/group_events.cc | 88 +++++++++++++++++-- tensorflow/core/profiler/utils/group_events.h | 6 ++ tensorflow/core/profiler/utils/tf_op_utils.cc | 5 ++ tensorflow/core/profiler/utils/tf_op_utils.h | 3 + .../core/profiler/utils/xplane_schema.cc | 11 +++ .../core/profiler/utils/xplane_schema.h | 11 +++ 6 files changed, 119 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/profiler/utils/group_events.cc b/tensorflow/core/profiler/utils/group_events.cc index 38ad6cd43ee..a9da3b543c6 100644 --- a/tensorflow/core/profiler/utils/group_events.cc +++ b/tensorflow/core/profiler/utils/group_events.cc @@ -66,10 +66,10 @@ absl::optional GetKernelEventType(bool is_host_plane, return absl::nullopt; } -bool IsTfOpEvent(const XPlaneVisitor& visitor, const XEvent& event) { +Category GetTfEventCategory(const XPlaneVisitor& visitor, const XEvent& event) { TfOp tf_op = ParseTfOpFullname(visitor.GetEventMetadata(event.metadata_id())->name()); - return tf_op.category == Category::kTensorFlow; + return tf_op.category; } int64 GetEventType(bool is_host_plane, const XPlaneVisitor& visitor, @@ -83,10 +83,16 @@ int64 GetEventType(bool is_host_plane, const XPlaneVisitor& visitor, // TODO(b/148346217): Make XPlaneVisitor support KernelLaunch and // KernelExecute event types. return *kernel_event_type; - } else if (IsTfOpEvent(visitor, event)) { - return HostEventType::kTfOpRun; } else { - return HostEventType::kUnknownHostEventType; + Category category = GetTfEventCategory(visitor, event); + switch (category) { + case Category::kTensorFlow: + return HostEventType::kTfOpRun; + case Category::kTfData: + return HostEventType::kIterator; + default: + return HostEventType::kUnknownHostEventType; + } } } @@ -641,6 +647,67 @@ void EventForest::ProcessModelIds() { } } +void EventForest::ProcessTfDataEvents() { + absl::flat_hash_map, + EventNode*> + produce_iterators; + for (HostEventType event_type : {HostEventType::kPrefetchProduce, + HostEventType::kParallelInterleaveProduce, + HostEventType::kParallelMapProduce, + HostEventType::kMapAndBatchProduce}) { + auto produce_event_list = gtl::FindOrNull(event_node_map_, event_type); + if (!produce_event_list) continue; + VLOG(1) << produce_event_list->size() << " " + << GetHostEventTypeStr(event_type) << " events found."; + for (auto& produce_event : *produce_event_list) { + absl::optional element_id = + produce_event->GetEventVisitor().GetStat(StatType::kElementId); + if (!element_id.has_value()) continue; + for (EventNode* produce_iterator : produce_event->GetChildren()) { + if (IsIteratorEventName(produce_iterator->GetEventVisitor().Name())) { + absl::optional iterator_id = + produce_iterator->GetEventVisitor().GetStat(StatType::kParentId); + if (!iterator_id.has_value()) break; + produce_iterators[{iterator_id->IntValue(), element_id->IntValue()}] = + produce_iterator; + break; + } + } + } + } + VLOG(1) << produce_iterators.size() << " producer iterators found."; + uint64 num_matched = 0; + for (HostEventType event_type : {HostEventType::kPrefetchConsume, + HostEventType::kParallelInterleaveConsume, + HostEventType::kParallelMapConsume, + HostEventType::kMapAndBatchConsume}) { + auto consume_event_list = gtl::FindOrNull(event_node_map_, event_type); + if (!consume_event_list) continue; + VLOG(1) << consume_event_list->size() << " " + << GetHostEventTypeStr(event_type) << " events found."; + for (auto& consume_event : *consume_event_list) { + absl::optional element_id = + consume_event->GetEventVisitor().GetStat(StatType::kElementId); + if (!element_id.has_value()) continue; + EventNode* consume_iterator = consume_event->GetParent(); + if (!consume_iterator || + !IsIteratorEventName(consume_iterator->GetEventVisitor().Name())) { + continue; + } + absl::optional iterator_id = + consume_iterator->GetEventVisitor().GetStat(StatType::kStepId); + if (!iterator_id.has_value()) continue; + if (auto produce_iterator = gtl::FindOrNull( + produce_iterators, std::make_pair(iterator_id->IntValue(), + element_id->IntValue()))) { + consume_iterator->AddChild(*produce_iterator); + ++num_matched; + } + } + } + VLOG(1) << num_matched << " consumer iterators matched."; +} + EventForest::EventForest( const std::vector& connect_info_list, const std::vector& root_event_types, @@ -664,6 +731,17 @@ EventForest::EventForest( ProcessModelIds(); } +EventForest::EventForest( + const std::function visitor_factory, + XPlane* plane) { + ContextGroupMap context_groups; + visitors_.reserve(1); + CreateStatMetadata(plane); + visitors_.push_back(visitor_factory(plane)); + ConnectIntraThread(visitors_.back(), plane, &context_groups); + ConnectContextGroups(context_groups); +} + std::vector CreateInterThreadConnectInfoList() { std::vector connect_info_list = { {HostEventType::kExecutorStateProcess, diff --git a/tensorflow/core/profiler/utils/group_events.h b/tensorflow/core/profiler/utils/group_events.h index c3c156a85a5..e03acf3a37f 100644 --- a/tensorflow/core/profiler/utils/group_events.h +++ b/tensorflow/core/profiler/utils/group_events.h @@ -156,12 +156,18 @@ class EventForest { const std::function visitor_factory, XSpace* space); + EventForest(const std::function visitor_factory, + XPlane* plane); + const EventNodeMap& GetEventNodeMap() const { return event_node_map_; } const GroupMetadataMap& GetGroupMetadataMap() const { return group_metadata_map_; } + // Connects tf.data events across threads. + void ProcessTfDataEvents(); + private: // Creates an EventNode for each event in event_node_map and connect events // according to the nesting relationship within the thread. diff --git a/tensorflow/core/profiler/utils/tf_op_utils.cc b/tensorflow/core/profiler/utils/tf_op_utils.cc index eeafd8e6525..4f1395d9233 100644 --- a/tensorflow/core/profiler/utils/tf_op_utils.cc +++ b/tensorflow/core/profiler/utils/tf_op_utils.cc @@ -32,6 +32,7 @@ namespace { const absl::string_view kIterator = "Iterator"; const absl::string_view kSeparator = "::"; +const absl::string_view kIteratorPrefix = "Iterator::"; } // namespace @@ -55,6 +56,10 @@ bool IsJaxOpType(absl::string_view op_type) { return RE2::FullMatch(op_type, *kJaxOpTypeRegEx); } +bool IsIteratorEventName(absl::string_view event_name) { + return absl::StartsWith(event_name, kIteratorPrefix); +} + TfOp ParseTfOpFullname(absl::string_view tf_op_fullname) { // TF Op names have the format "name:type". TfOp tf_op = {Category::kUnknown, tf_op_fullname, kUnknownOp}; diff --git a/tensorflow/core/profiler/utils/tf_op_utils.h b/tensorflow/core/profiler/utils/tf_op_utils.h index 4a63d68bffb..999ca91dee3 100644 --- a/tensorflow/core/profiler/utils/tf_op_utils.h +++ b/tensorflow/core/profiler/utils/tf_op_utils.h @@ -95,6 +95,9 @@ bool IsTfOpType(absl::string_view op_type); // Returns true if the given string matches JAX pattern. bool IsJaxOpType(absl::string_view op_type); +// Returns true if the given string matches tf.data iterator pattern. +bool IsIteratorEventName(absl::string_view event_name); + } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/utils/xplane_schema.cc b/tensorflow/core/profiler/utils/xplane_schema.cc index 0cc839681a7..d3b007dbf6c 100644 --- a/tensorflow/core/profiler/utils/xplane_schema.cc +++ b/tensorflow/core/profiler/utils/xplane_schema.cc @@ -94,6 +94,15 @@ const HostEventTypeMap& GetHostEventTypeMap() { // tf.data related. {"IteratorGetNextOp::DoCompute", kIteratorGetNextOp}, {"IteratorGetNextAsOptionalOp::DoCompute", kIteratorGetNextAsOptionalOp}, + {"Iterator", kIterator}, + {"PrefetchProduce", kPrefetchProduce}, + {"PrefetchConsume", kPrefetchConsume}, + {"ParallelInterleaveProduce", kParallelInterleaveProduce}, + {"ParallelInterleaveConsume", kParallelInterleaveConsume}, + {"ParallelMapProduce", kParallelMapProduce}, + {"ParallelMapConsume", kParallelMapConsume}, + {"MapAndBatchProduce", kMapAndBatchProduce}, + {"MapAndBatchConsume", kMapAndBatchConsume}, // JAX related. {"LocalExecutable::ExecuteOnLocalDevices", kExecuteOnLocalDevices}, // GPU related. @@ -136,6 +145,8 @@ const StatTypeMap& GetStatTypeMap() { {"shape", kTensorShapes}, {"kpi_name", kKpiName}, {"kpi_value", kKpiValue}, + {"element_id", kElementId}, + {"parent_id", kParentId}, // XPlane semantics related. {"_pt", kProducerType}, {"_ct", kConsumerType}, diff --git a/tensorflow/core/profiler/utils/xplane_schema.h b/tensorflow/core/profiler/utils/xplane_schema.h index 2f2fea880f6..ac66a4ac8bc 100644 --- a/tensorflow/core/profiler/utils/xplane_schema.h +++ b/tensorflow/core/profiler/utils/xplane_schema.h @@ -84,6 +84,15 @@ enum HostEventType { // tf.data related. kIteratorGetNextOp, kIteratorGetNextAsOptionalOp, + kIterator, + kPrefetchProduce, + kPrefetchConsume, + kParallelInterleaveProduce, + kParallelInterleaveConsume, + kParallelMapProduce, + kParallelMapConsume, + kMapAndBatchProduce, + kMapAndBatchConsume, // JAX related. kExecuteOnLocalDevices, // GPU related. @@ -124,6 +133,8 @@ enum StatType { kTensorShapes, kKpiName, kKpiValue, + kElementId, + kParentId, // XPlane semantics related. kProducerType, kConsumerType, From e286f3d5c5cf7b3466b4b1053f711d4ae754f7eb Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Wed, 22 Jul 2020 00:17:20 +0000 Subject: [PATCH 0995/2522] simplify --- tensorflow/python/kernel_tests/map_ops_test.py | 18 ------------------ tensorflow/python/ops/map_ops.py | 7 ------- 2 files changed, 25 deletions(-) diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index bdf06fc01d4..a80bab228d6 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -197,23 +197,5 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): self.assertAllClose(g2, array_ops.zeros_like(v)) self.assertAllClose(g3, 7) - def testEraseGrad(self): - with backprop.GradientTape(persistent=True) as tape: - m = map_ops.empty_tensor_map() - k = constant_op.constant(1.0) - v = constant_op.constant(2.0) - tape.watch(v) - k2 = constant_op.constant(12.0) - v2 = constant_op.constant(22.0) - tape.watch(v2) - m = map_ops.tensor_map_insert(m, k, v) - m = map_ops.tensor_map_insert(m, k2, v2) - m, e = map_ops.tensor_map_erase(m, k2, v2.dtype) - l = map_ops.tensor_map_lookup(m, k, v.dtype) - self.assertAllClose(l, v) - self.assertAllClose(e, v2) - g = tape.gradient(l * 5, v) - self.assertAllClose(g, 5) - if __name__ == '__main__': test.main() diff --git a/tensorflow/python/ops/map_ops.py b/tensorflow/python/ops/map_ops.py index c28bc5754df..3d37247988c 100644 --- a/tensorflow/python/ops/map_ops.py +++ b/tensorflow/python/ops/map_ops.py @@ -65,10 +65,3 @@ def InsertGrad(op, dmap): lambda: tensor_map_erase(dmap, k, v.dtype)[0], lambda: dmap) return map_grad, key_grad, value_grad - -@ops.RegisterGradient("TensorMapErase") -def EraseGrad(op, dmap, dval): - _, k = op.inputs - key_grad = None - map_grad = dmap - return map_grad, key_grad From d56a14c3d347fd75d295fedeb26230652d14e561 Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Tue, 21 Jul 2020 16:58:42 -0700 Subject: [PATCH 0996/2522] Update `run_v1_only` test with proper reasons. PiperOrigin-RevId: 322472295 Change-Id: If842ffe4d95d4d9a124f26e15c8b6f03738e2450 --- tensorflow/python/training/checkpoint_ops_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/training/checkpoint_ops_test.py b/tensorflow/python/training/checkpoint_ops_test.py index 47ce8d14e88..6f6fe1c18fe 100644 --- a/tensorflow/python/training/checkpoint_ops_test.py +++ b/tensorflow/python/training/checkpoint_ops_test.py @@ -35,7 +35,8 @@ from tensorflow.python.training import checkpoint_ops from tensorflow.python.training import saver as saver_lib -@test_util.run_v1_only('b/120545219') +@test_util.run_v1_only( + 'This is to test V1 name-based checkpoints which is not supported in V2.') class LoadAndRemapWrappersTest(test.TestCase): """Tests for the functionality of the Python wrappers.""" From 88769859323fcbd96c4ebe384513dec6bebd0634 Mon Sep 17 00:00:00 2001 From: Robert David Date: Tue, 21 Jul 2020 17:01:02 -0700 Subject: [PATCH 0997/2522] GPU delegate: Add support for Sigmoid as fused activation. PiperOrigin-RevId: 322472695 Change-Id: Id16daccd009e60308139482653f425b36148a454 --- tensorflow/lite/delegates/gpu/common/model_builder.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.cc b/tensorflow/lite/delegates/gpu/common/model_builder.cc index d2fefe5f552..b62eb12aa7a 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc @@ -112,13 +112,11 @@ absl::Status IsActivationSupported(TfLiteFusedActivation fused_activation) { case kTfLiteActReluN1To1: case kTfLiteActRelu6: case kTfLiteActTanh: + case kTfLiteActSigmoid: return absl::OkStatus(); case kTfLiteActSignBit: return absl::UnimplementedError( "TfLiteFusedActivation.kTfLiteActSignBit"); - case kTfLiteActSigmoid: - return absl::UnimplementedError( - "TfLiteFusedActivation.kTfLiteActSigmoid"); // Do not add default; we want compilation error rather than run-time // error. @@ -159,6 +157,13 @@ absl::Status MaybeFuseActivation(TfLiteFusedActivation fused_activation, activation_node->operation.type = ToString(OperationType::TANH); return absl::OkStatus(); } + case kTfLiteActSigmoid: { + Node* activation_node; + RETURN_IF_ERROR( + NewPassthroughNode(graph, node, outputs[0], &activation_node)); + activation_node->operation.type = ToString(OperationType::SIGMOID); + return absl::OkStatus(); + } break; default: return absl::NotFoundError( absl::StrCat("Unsupported fused activation: ", fused_activation)); From f91a833df2defa0b03e364cba4c22acc63238b25 Mon Sep 17 00:00:00 2001 From: Jaesung Chung Date: Tue, 21 Jul 2020 17:01:44 -0700 Subject: [PATCH 0998/2522] Fix broken tflite support project's links. closes #41612 PiperOrigin-RevId: 322472811 Change-Id: I7b665b5dd1e82b30623d31cbc165cdc47228a60b --- tensorflow/lite/g3doc/guide/codegen.md | 2 +- tensorflow/lite/g3doc/guide/lite_support.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/g3doc/guide/codegen.md b/tensorflow/lite/g3doc/guide/codegen.md index cceb40b1d74..39abc5d7679 100644 --- a/tensorflow/lite/g3doc/guide/codegen.md +++ b/tensorflow/lite/g3doc/guide/codegen.md @@ -173,7 +173,7 @@ generated by the Android Studio ML Model Binding. The Metadata Extractor library is a convinient tool to read the metadata and associated files from a models across different platforms (see the -[Java version](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/support/metadata) +[Java version](https://github.com/tensorflow/tflite-support/tree/master/tensorflow_lite_support/metadata) and the C++ version is coming soon). Users can also build their own metadata extractor tool in other languages using the Flatbuffers library. diff --git a/tensorflow/lite/g3doc/guide/lite_support.md b/tensorflow/lite/g3doc/guide/lite_support.md index 826979efb19..39eeeee3684 100644 --- a/tensorflow/lite/g3doc/guide/lite_support.md +++ b/tensorflow/lite/g3doc/guide/lite_support.md @@ -6,7 +6,7 @@ Mobile application developers typically interact with typed objects such as bitmaps or primitives such as integers. However, the TensorFlow Lite Interpreter that runs the on-device machine learning model uses tensors in the form of ByteBuffer, which can be difficult to debug and manipulate. The -[TensorFlow Lite Android Support Library](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/support/java) +[TensorFlow Lite Android Support Library](https://github.com/tensorflow/tflite-support/tree/master/tensorflow_lite_support/java) is designed to help process the input and output of TensorFlow Lite models, and make the TensorFlow Lite interpreter easier to use. From c4dd26b614943da37f8a892fbc20d162d0d8544d Mon Sep 17 00:00:00 2001 From: Bruce Fontaine Date: Tue, 21 Jul 2020 17:02:48 -0700 Subject: [PATCH 0999/2522] Add option to strategy_test_lib to wrap functions run in tf.function. Add more unit tests to tpu_strategy_test. PiperOrigin-RevId: 322473054 Change-Id: I098934856f7349f6a5b5caccdf0ebba713becea0 --- tensorflow/python/distribute/BUILD | 1 + .../python/distribute/strategy_test_lib.py | 76 ++-- .../python/distribute/tpu_strategy_test.py | 346 ++++++++++++++++-- 3 files changed, 377 insertions(+), 46 deletions(-) diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index 0468c90292d..63c4d27956d 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -648,6 +648,7 @@ tpu_py_test( python_version = "PY3", tags = ["no_oss"], deps = [ + ":strategy_test_lib", ":tpu_strategy", "//tensorflow/python/distribute/cluster_resolver:cluster_resolver_lib", "//tensorflow/python/eager:remote", diff --git a/tensorflow/python/distribute/strategy_test_lib.py b/tensorflow/python/distribute/strategy_test_lib.py index 06913db5c72..9ffbe9424b5 100644 --- a/tensorflow/python/distribute/strategy_test_lib.py +++ b/tensorflow/python/distribute/strategy_test_lib.py @@ -59,6 +59,15 @@ class _TestException(Exception): pass +# Conditionally wrap the fn in a def_function.function (so it runs in graph +# mode). +def _maybe_run_in_function(fn, run_in_function=False): + if not run_in_function or not context.executing_eagerly(): + return fn + else: + return def_function.function()(fn) + + # May be the argument to either distribution.extended.call_for_each_replica() or # get_replica_context().merge_call() def _raise_exception_fn(_=None): @@ -596,50 +605,61 @@ class OneDeviceDistributionTestBase(test.TestCase): class TwoDeviceDistributionTestBase(test.TestCase): """Some tests that should work with any two-device DistributionStrategy.""" - def _test_run(self, strategy): - out1 = strategy.run( - lambda: ds_context.get_replica_context().replica_id_in_sync_group + 1) + def _test_run(self, strategy, run_in_function=False): + out1 = strategy.run(_maybe_run_in_function( + lambda: ds_context.get_replica_context().replica_id_in_sync_group + 1, + run_in_function)) self.assertAllEqual([1, 2], self.evaluate(strategy.unwrap(out1))) - out2 = strategy.run(lambda x: {"a": x * 2, "b": x * x}, args=(out1,)) + out2 = strategy.run(_maybe_run_in_function( + lambda x: {"a": x * 2, "b": x * x}, run_in_function), args=(out1,)) out2_vals = self.evaluate(nest.map_structure(strategy.unwrap, out2)) self.assertAllEqual([2, 4], out2_vals["a"]) self.assertAllEqual([1, 4], out2_vals["b"]) - out3 = strategy.run(lambda b, a: a + 2 * b + 2, kwargs=out2) + out3 = strategy.run(_maybe_run_in_function( + lambda b, a: a + 2 * b + 2, run_in_function), kwargs=out2) self.assertAllEqual([6, 14], self.evaluate(strategy.unwrap(out3))) - def _test_all_reduce_sum(self, strategy): + def _test_all_reduce_sum(self, strategy, run_in_function=False): self._test_collective_comms( strategy, _all_sum, inputs=([1., 3.], [[39., 2.], [3., 41.]]), - expected=(4., [42., 43.])) + expected=(4., [42., 43.]), + run_in_function=run_in_function) - def _test_all_reduce_sum_gradients(self, strategy): + def _test_all_reduce_sum_gradients(self, strategy, run_in_function=False): self._test_collective_comms_gradients( - strategy, _all_sum, inputs=[1., 3.], expected_grads=[4., 4.]) + strategy, _all_sum, inputs=[1., 3.], expected_grads=[4., 4.], + run_in_function=run_in_function) - def _test_all_reduce_sum_gradient_tape(self, strategy): + def _test_all_reduce_sum_gradient_tape(self, strategy, run_in_function=False): self._test_collective_comms_gradient_tape( - strategy, _all_sum, inputs=[1., 3.], expected_grads=[4., 4.]) + strategy, _all_sum, inputs=[1., 3.], expected_grads=[4., 4.], + run_in_function=run_in_function) - def _test_all_reduce_mean(self, strategy): + def _test_all_reduce_mean(self, strategy, run_in_function=False): self._test_collective_comms( strategy, _all_mean, inputs=([1., 3.], [[39., 2.], [3., 41.]]), - expected=(2., [21., 21.5])) + expected=(2., [21., 21.5]), + run_in_function=run_in_function) - def _test_all_reduce_mean_gradients(self, strategy): + def _test_all_reduce_mean_gradients(self, strategy, run_in_function=False): self._test_collective_comms_gradients( - strategy, _all_mean, inputs=[1., 3.], expected_grads=[2., 2.]) + strategy, _all_mean, inputs=[1., 3.], expected_grads=[2., 2.], + run_in_function=run_in_function) - def _test_all_reduce_mean_gradient_tape(self, strategy): + def _test_all_reduce_mean_gradient_tape(self, strategy, + run_in_function=False): self._test_collective_comms_gradient_tape( - strategy, _all_mean, inputs=[1., 3.], expected_grads=[2., 2.]) + strategy, _all_mean, inputs=[1., 3.], expected_grads=[2., 2.], + run_in_function=run_in_function) - def _test_collective_comms(self, strategy, comm_fn, inputs, expected): + def _test_collective_comms(self, strategy, comm_fn, inputs, expected, + run_in_function=False): inputs = strategy.make_input_fn_iterator( lambda _: dataset_ops.Dataset.from_tensor_slices(inputs)) @@ -647,14 +667,16 @@ class TwoDeviceDistributionTestBase(test.TestCase): outputs = self.evaluate( list( map(strategy.experimental_local_results, - strategy.experimental_run(comm_fn, inputs)))) + strategy.experimental_run( + _maybe_run_in_function(comm_fn, run_in_function), inputs)))) self.assertAllEqual([expected[0], expected[0]], outputs[0]) self.assertAllEqual([expected[1], expected[1]], outputs[1]) def _test_collective_comms_gradients(self, strategy, comm_fn, inputs, - expected_grads): - if context.executing_eagerly(): - self.skipTest("`tf.gradients` is not supported with eager execution.") + expected_grads, run_in_function=False): + if context.executing_eagerly() and not run_in_function: + self.skipTest("`tf.gradients` is not supported with eager execution " + "without using tf.functions.") def step(c): x = array_ops.identity(42.) @@ -669,10 +691,12 @@ class TwoDeviceDistributionTestBase(test.TestCase): expected_grads, self.evaluate( strategy.experimental_local_results( - strategy.experimental_run(step, inputs)))) + strategy.experimental_run( + _maybe_run_in_function(step, run_in_function), inputs)))) def _test_collective_comms_gradient_tape(self, strategy, comm_fn, inputs, - expected_grads): + expected_grads, + run_in_function=False): def step(c): x = array_ops.identity(42.) @@ -689,7 +713,9 @@ class TwoDeviceDistributionTestBase(test.TestCase): expected_grads, self.evaluate( strategy.experimental_local_results( - strategy.experimental_run(step, inputs)))) + strategy.experimental_run( + _maybe_run_in_function(step, run_in_function), + inputs)))) class RemoteSingleWorkerMirroredStrategyBase(DistributionTestBase): diff --git a/tensorflow/python/distribute/tpu_strategy_test.py b/tensorflow/python/distribute/tpu_strategy_test.py index 86b375aedf3..7fa927dde52 100644 --- a/tensorflow/python/distribute/tpu_strategy_test.py +++ b/tensorflow/python/distribute/tpu_strategy_test.py @@ -18,12 +18,16 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os + from absl.testing import parameterized +from tensorflow.core.protobuf import config_pb2 from tensorflow.python.data.ops import dataset_ops from tensorflow.python.distribute import distribute_lib from tensorflow.python.distribute import distribution_strategy_context from tensorflow.python.distribute import reduce_util +from tensorflow.python.distribute import strategy_test_lib from tensorflow.python.distribute import tpu_strategy as tpu_lib from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver from tensorflow.python.eager import def_function @@ -38,6 +42,7 @@ from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_spec +from tensorflow.python.module import module from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import embedding_ops @@ -50,6 +55,9 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.tpu import device_assignment as device_assignment_lib from tensorflow.python.tpu import tpu from tensorflow.python.tpu import tpu_strategy_util +from tensorflow.python.training import checkpoint_management +from tensorflow.python.training import server_lib +from tensorflow.python.training.tracking import util FLAGS = flags.FLAGS @@ -71,7 +79,7 @@ def get_tpu_strategy(enable_packed_var=False): resolver = get_tpu_cluster_resolver() remote.connect_to_cluster(resolver) tpu_strategy_util.initialize_tpu_system(resolver) - strategy = tpu_lib.TPUStrategy(resolver) + strategy = tpu_lib.TPUStrategyV2(resolver) strategy._enable_packed_variable_in_eager_mode = enable_packed_var return strategy @@ -155,15 +163,15 @@ class TPUStrategyTest(test.TestCase, parameterized.TestCase): # Computation replicated to all cores. device_assignment = device_assignment_lib.DeviceAssignment.build( topology, num_replicas=2) - strategy = tpu_lib.TPUStrategy( - resolver, device_assignment=device_assignment) + strategy = tpu_lib.TPUStrategyV2( + resolver, experimental_device_assignment=device_assignment) strategy._enable_packed_variable_in_eager_mode = enable_packed_var # Computation on the 1st core. device_assignment2 = device_assignment_lib.DeviceAssignment.build( topology, num_replicas=1) - strategy2 = tpu_lib.TPUStrategy( - resolver, device_assignment=device_assignment2) + strategy2 = tpu_lib.TPUStrategyV2( + resolver, experimental_device_assignment=device_assignment2) def computation(x): return math_ops.square(x) @@ -264,7 +272,7 @@ class TPUStrategyTest(test.TestCase, parameterized.TestCase): resolver = get_tpu_cluster_resolver() remote.connect_to_cluster(resolver) topology = tpu_strategy_util.initialize_tpu_system(resolver) - all_core_strategy = tpu_lib.TPUStrategy(resolver) + all_core_strategy = tpu_lib.TPUStrategyV2(resolver) all_core_strategy._enable_packed_variable_in_eager_mode = enable_packed_var with all_core_strategy.scope(): @@ -274,16 +282,16 @@ class TPUStrategyTest(test.TestCase, parameterized.TestCase): # Computation on the 1st core. device_assignment = device_assignment_lib.DeviceAssignment.build( topology, num_replicas=1) - first_core_strategy = tpu_lib.TPUStrategy( - resolver, device_assignment=device_assignment) + first_core_strategy = tpu_lib.TPUStrategyV2( + resolver, experimental_device_assignment=device_assignment) first_core_strategy._enable_packed_variable_in_eager_mode = ( enable_packed_var) # Computation on the 2nd core. device_assignment2 = device_assignment_lib.DeviceAssignment( topology, [[[0, 0, 0, 1]]]) - second_core_strategy = tpu_lib.TPUStrategy( - resolver, device_assignment=device_assignment2) + second_core_strategy = tpu_lib.TPUStrategyV2( + resolver, experimental_device_assignment=device_assignment2) second_core_strategy._enable_packed_variable_in_eager_mode = ( enable_packed_var) @@ -309,16 +317,16 @@ class TPUStrategyTest(test.TestCase, parameterized.TestCase): # Strategy for the 1st core. device_assignment = device_assignment_lib.DeviceAssignment.build( topology, num_replicas=1) - first_core_strategy = tpu_lib.TPUStrategy( - resolver, device_assignment=device_assignment) + first_core_strategy = tpu_lib.TPUStrategyV2( + resolver, experimental_device_assignment=device_assignment) first_core_strategy._enable_packed_variable_in_eager_mode = ( enable_packed_var) # Strategy for the 2nd core. device_assignment2 = device_assignment_lib.DeviceAssignment( topology, [[[0, 0, 0, 1]]]) - second_core_strategy = tpu_lib.TPUStrategy( - resolver, device_assignment=device_assignment2) + second_core_strategy = tpu_lib.TPUStrategyV2( + resolver, experimental_device_assignment=device_assignment2) second_core_strategy._enable_packed_variable_in_eager_mode = ( enable_packed_var) @@ -576,13 +584,6 @@ class TPUStrategyTest(test.TestCase, parameterized.TestCase): update_variable.get_concrete_function() self.assertLen(strategy.extended.worker_devices, trace_count[0]) - def test_cluster_resolver_available(self, enable_packed_var): - resolver = get_tpu_cluster_resolver() - remote.connect_to_cluster(resolver) - tpu_strategy_util.initialize_tpu_system(resolver) - strategy = tpu_lib.TPUStrategy(resolver) - self.assertIs(strategy.cluster_resolver, resolver) - class TPUStrategyDataPrefetchTest(test.TestCase): @@ -683,5 +684,308 @@ class TPUStrategyDataPrefetchTest(test.TestCase): with self.assertRaisesRegex(ValueError, "TPUStrategy does not support"): iter(strategy.experimental_distribute_datasets_from_function(dataset_fn)) + +class TPUStrategyDistributionTest( + strategy_test_lib.DistributionTestBase, + strategy_test_lib.TwoDeviceDistributionTestBase): + + def test_update_config_proto(self): + resolver = get_tpu_cluster_resolver() + remote.connect_to_cluster(resolver) + tpu_strategy_util.initialize_tpu_system(resolver) + strategy = tpu_lib.TPUStrategyV2(resolver) + + config_proto = config_pb2.ConfigProto() + cluster_spec = server_lib.ClusterSpec({"worker": ["fake1", "fake2"]}) + with test.mock.patch.object( + resolver, "cluster_spec", return_value=cluster_spec): + new_config = strategy.update_config_proto(config_proto) + + # Verify cluster_def. + self.assertProtoEquals(cluster_spec.as_cluster_def(), + new_config.cluster_def) + + # Verify isolate_session_state + self.assertTrue(new_config.isolate_session_state) + + # TODO(b/158110684): enable this test. + def disable_test_numpy_dataset(self): + strategy = get_tpu_strategy() + self._test_numpy_dataset(strategy) + + def test_make_input_fn_iterable(self): + dataset_fn = lambda: dataset_ops.Dataset.range(10) + expected_values = [[i, i+1] for i in range(0, 10, 2)] + distribution = get_tpu_strategy() + input_fn = self._input_fn_to_test_input_context( + dataset_fn, + expected_num_replicas_in_sync=2, + expected_num_input_pipelines=1, + expected_input_pipeline_id=0) + self._test_input_fn_iterable(distribution, input_fn, expected_values) + + def test_make_input_fn_iterator(self): + dataset_fn = lambda: dataset_ops.Dataset.range(10) + expected_values = [[i, i+1] for i in range(0, 10, 2)] + distribution = get_tpu_strategy() + input_fn = self._input_fn_to_test_input_context( + dataset_fn, + expected_num_replicas_in_sync=2, + expected_num_input_pipelines=1, + expected_input_pipeline_id=0) + iterator = distribution.make_input_fn_iterator(input_fn) + self._test_input_fn_iterator( + iterator, + distribution.extended.worker_devices, + expected_values) + + def test_run(self): + strategy = get_tpu_strategy() + self._test_run(strategy, run_in_function=True) + + def test_all_reduce_sum(self): + strategy = get_tpu_strategy() + self._test_all_reduce_sum(strategy, run_in_function=True) + + def test_all_reduce_sum_gradients(self): + strategy = get_tpu_strategy() + self._test_all_reduce_sum_gradients(strategy, run_in_function=True) + + def test_all_reduce_sum_gradient_tape(self): + strategy = get_tpu_strategy() + self._test_all_reduce_sum_gradient_tape(strategy, run_in_function=True) + + def test_all_reduce_mean(self): + strategy = get_tpu_strategy() + self._test_all_reduce_mean(strategy, run_in_function=True) + + def test_all_reduce_mean_gradients(self): + strategy = get_tpu_strategy() + self._test_all_reduce_mean_gradients(strategy, run_in_function=True) + + def test_all_reduce_mean_gradient_tape(self): + strategy = get_tpu_strategy() + self._test_all_reduce_mean_gradient_tape(strategy, run_in_function=True) + + def test_reduce(self): + strategy = get_tpu_strategy() + + inputs = strategy.make_input_fn_iterator( + lambda _: dataset_ops.Dataset.from_tensor_slices([2., 3.])) + + self.evaluate(inputs.initialize()) + per_replica_outputs = strategy.run( + def_function.function(math_ops.square), args=(next(inputs),)) + + with strategy.scope(): + mean = strategy.reduce(reduce_util.ReduceOp.MEAN, per_replica_outputs, + axis=None) + self.assertEqual(6.5, self.evaluate(mean)) + + def test_constraint(self): + strategy = get_tpu_strategy() + + with strategy.scope(): + variable = variables.Variable(initial_value=2., + constraint=lambda x: 0. * x + 1.) + self.assertEqual(variable.value().numpy(), 2) + + @def_function.function + def update_variable(): + variable.assign_add(1) + variable.assign(variable.constraint(variable)) + + update_variable() + self.assertEqual(variable.value().numpy(), 1) + + def test_trainable_variables(self): + strategy = get_tpu_strategy() + self._test_trainable_variable(strategy) + + def test_model_parallelism(self): + resolver = get_tpu_cluster_resolver() + remote.connect_to_cluster(resolver) + topology = tpu_strategy_util.initialize_tpu_system(resolver) + device_assignment = device_assignment_lib.DeviceAssignment( + topology, core_assignment=[[[0, 0, 0, 0], [0, 0, 0, 1]]]) + strategy = tpu_lib.TPUStrategyV2( + resolver, + experimental_device_assignment=device_assignment) + + with strategy.scope(): + v = variables.Variable(2.) + with strategy.extended.experimental_logical_device(1): + w = variables.Variable(3.) + + self.assertLen(strategy.experimental_local_results(v), 1) + self.assertLen(strategy.experimental_local_results(w), 1) + self.assertEqual("/job:localhost/replica:0/task:0/device:TPU:0", + strategy.experimental_local_results(v)[0].device) + self.assertEqual("/job:localhost/replica:0/task:0/device:TPU:1", + strategy.experimental_local_results(w)[0].device) + + logical_devices = [] + @def_function.function + def f(x): + replica_ctx = distribution_strategy_context.get_replica_context() + with replica_ctx.experimental_logical_device(0): + y = v * x + with replica_ctx.experimental_logical_device(1): + z = w * y + logical_devices.append((y.device, z.device)) + return z + + result = strategy.run(f, args=(5.,)) + + self.assertEqual( + [("/device:TPU_REPLICATED_CORE:0", "/device:TPU_REPLICATED_CORE:1")], + logical_devices) + + with self.cached_session(): + self.evaluate(variables.global_variables_initializer()) + self.assertEqual(30., self.evaluate(result)) + + def test_model_parallelism_checkpointing(self): + + class PartitionedModel(module.Module): + + def __init__(self, v, w): + super(PartitionedModel, self).__init__() + + assert distribution_strategy_context.has_strategy() + strategy = distribution_strategy_context.get_strategy() + + with strategy.extended.experimental_logical_device(0): + self.v = variables.Variable(v) + with strategy.extended.experimental_logical_device(1): + self.w = variables.Variable(w) + + def __call__(self, x): + replica_ctx = distribution_strategy_context.get_replica_context() + with replica_ctx.experimental_logical_device(0): + y = self.v * x + with replica_ctx.experimental_logical_device(1): + z = self.w * y + return z + + def change_weights_op(self, v_new, w_new): + return control_flow_ops.group([self.v.assign(v_new), + self.w.assign(w_new)]) + + resolver = get_tpu_cluster_resolver() + remote.connect_to_cluster(resolver) + topology = tpu_strategy_util.initialize_tpu_system(resolver) + device_assignment = device_assignment_lib.DeviceAssignment( + topology, core_assignment=[[[0, 0, 0, 0], [0, 0, 0, 1]]]) + strategy = tpu_lib.TPUStrategyV2( + resolver, + experimental_device_assignment=device_assignment) + + with strategy.scope(): + model = PartitionedModel(2., 3.) + + checkpoint_dir = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") + checkpoint = util.Checkpoint(model=model) + + with self.cached_session() as sess: + self.evaluate(variables.global_variables_initializer()) + checkpoint.save(file_prefix=checkpoint_prefix) + + self.evaluate(model.change_weights_op(1., 4.)) + result = strategy.run(def_function.function(model), args=(5.0,)) + self.assertEqual(20., self.evaluate(result)) + + status = checkpoint.restore( + checkpoint_management.latest_checkpoint(checkpoint_dir)) + status.run_restore_ops(sess) # must run restore op in non-eager mode. + status.assert_consumed() + status.assert_existing_objects_matched() + result = strategy.run(def_function.function(model), args=(5.0,)) + self.assertEqual(30., self.evaluate(result)) + + +class DeviceAssignmentTest(test.TestCase): + + def test_core_assignment(self): + resolver = get_tpu_cluster_resolver() + remote.connect_to_cluster(resolver) + topology = tpu_strategy_util.initialize_tpu_system(resolver) + device_assignment = device_assignment_lib.DeviceAssignment( + topology, core_assignment=[[[0, 0, 0, 0]]]) + self.assertAllEqual([[[0, 0, 0, 0]]], device_assignment.core_assignment) + self.assertEqual(1, device_assignment.num_cores_per_replica) + self.assertEqual(1, device_assignment.num_replicas) + self.assertEqual("/task:0/device:TPU:0", device_assignment.tpu_device()) + self.assertEqual("/task:0/device:CPU:0", device_assignment.host_device()) + + def test_device_assignment_strategy_properties(self): + resolver = get_tpu_cluster_resolver() + remote.connect_to_cluster(resolver) + topology = tpu_strategy_util.initialize_tpu_system(resolver) + device_assignment = device_assignment_lib.DeviceAssignment( + topology, core_assignment=[[[0, 0, 0, 0]]]) + strategy = tpu_lib.TPUStrategyV2( + resolver, + experimental_device_assignment=device_assignment) + self.assertEqual(strategy.extended.num_hosts, 1) + self.assertEqual(strategy.num_replicas_in_sync, 1) + self.assertEqual(strategy.extended.num_replicas_per_host, 1) # pylint: disable=protected-access + + def test_device_assignment_constants(self): + resolver = get_tpu_cluster_resolver() + remote.connect_to_cluster(resolver) + topology = tpu_strategy_util.initialize_tpu_system(resolver) + device_assignment = device_assignment_lib.DeviceAssignment( + topology, + core_assignment=device_assignment_lib.SINGLE_CORE_ASSIGNMENT) + self.assertAllEqual([[[0, 0, 0, 0]]], device_assignment.core_assignment) + self.assertEqual(1, device_assignment.num_cores_per_replica) + self.assertEqual(1, device_assignment.num_replicas) + self.assertEqual("/task:0/device:TPU:0", device_assignment.tpu_device()) + self.assertEqual("/task:0/device:CPU:0", device_assignment.host_device()) + + def test_variables_mismatched_device_assignment(self): + resolver = get_tpu_cluster_resolver() + remote.connect_to_cluster(resolver) + topology = tpu_strategy_util.initialize_tpu_system(resolver) + + strategy0 = tpu_lib.TPUStrategyV2(resolver) + self.assertEqual( + ("/job:localhost/replica:0/task:0/device:TPU:0", + "/job:localhost/replica:0/task:0/device:TPU:1"), + strategy0.extended.worker_devices) + + with strategy0.scope(): + v = variables.Variable(1.) + + v1_assign_op = strategy0.experimental_local_results(v)[1].assign(42.) + + with self.cached_session(): + self.evaluate(variables.global_variables_initializer()) + self.evaluate(v1_assign_op) + self.assertAllEqual([1., 42.], + self.evaluate( + strategy0.experimental_local_results(v))) + + # Second strategy has devices reversed relative to the first. + device_assignment = device_assignment_lib.DeviceAssignment( + topology, core_assignment=[[[0, 0, 0, 1]], [[0, 0, 0, 0]]]) + strategy1 = tpu_lib.TPUStrategyV2( + resolver, + experimental_device_assignment=device_assignment) + self.assertEqual( + ("/job:localhost/replica:0/task:0/device:TPU:1", + "/job:localhost/replica:0/task:0/device:TPU:0"), + strategy1.extended.worker_devices) + + v_read = strategy1.run(def_function.function(v.read_value)) + + with self.cached_session(): + self.assertAllEqual([42., 1.], + self.evaluate( + strategy0.experimental_local_results(v_read))) + + if __name__ == "__main__": test.main() From 540e9cf481aed11012e6803df7fb415bc2dcca2a Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Wed, 22 Jul 2020 00:28:36 +0000 Subject: [PATCH 1000/2522] moved TF_StringView to c_api.h and manually declared TF_StringView members in GetName --- tensorflow/c/c_api.h | 8 ++++++++ tensorflow/c/kernels.cc | 8 ++++---- tensorflow/c/kernels.h | 13 +++--------- tensorflow/c/kernels_test.cc | 38 ++++++++++++++++++------------------ 4 files changed, 34 insertions(+), 33 deletions(-) diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h index 808bcf3bd80..c6ca319c40d 100644 --- a/tensorflow/c/c_api.h +++ b/tensorflow/c/c_api.h @@ -125,6 +125,14 @@ TF_CAPI_EXPORT extern void TF_DeleteBuffer(TF_Buffer*); TF_CAPI_EXPORT extern TF_Buffer TF_GetBuffer(TF_Buffer* buffer); +// -------------------------------------------------------------------------- +// Used to pass strings across the C API. The caller does not take ownership +// of the underlying data pointer and is not responsible for freeing it. +struct TF_StringView { + const char* data; + size_t len; +}; + // -------------------------------------------------------------------------- // TF_SessionOptions holds options that can be passed during session creation. typedef struct TF_SessionOptions TF_SessionOptions; diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc index 42aa2bce54a..6b8348aaa3d 100644 --- a/tensorflow/c/kernels.cc +++ b/tensorflow/c/kernels.cc @@ -239,14 +239,14 @@ void TF_OpKernelContext_Failure(TF_OpKernelContext* ctx, TF_Status* status) { DEFINE_TF_GETATTR(Type, TF_DataType, tensorflow::DataType) DEFINE_TF_GETATTR(Int32, tensorflow::int32, int32_t) -string_view TF_OpKernelConstruction_GetName(TF_OpKernelConstruction* ctx) { +TF_StringView TF_OpKernelConstruction_GetName(TF_OpKernelConstruction* ctx) { auto* cc_ctx = reinterpret_cast(ctx); - string_view string_view_of_name = {.data = cc_ctx->def().name().data(), - .len = cc_ctx->def().name().length()}; + TF_StringView string_view_of_name; + string_view_of_name.data = cc_ctx->def().name().data(); + string_view_of_name.len = cc_ctx->def().name().length(); return string_view_of_name; } - TF_DataType TF_ExpectedOutputDataType(TF_OpKernelContext* ctx, int i) { auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelContext*>(ctx); return static_cast(cc_ctx->expected_output_dtype(i)); diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h index 7e2a3aa5067..cec0ca7b6a2 100644 --- a/tensorflow/c/kernels.h +++ b/tensorflow/c/kernels.h @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/c/tf_datatype.h" #include "tensorflow/c/tf_status.h" +#include "tensorflow/c/c_api.h" // Macro to control visibility of exported symbols in the shared library (.so, // .dylib, .dll). @@ -45,6 +46,7 @@ extern "C" { #endif typedef struct TF_Tensor TF_Tensor; +typedef struct TF_StringView TF_StringView; // -------------------------------------------------------------------------- // C API for TensorFlow Kernels. @@ -184,17 +186,8 @@ TF_CAPI_EXPORT extern void TF_OpKernelConstruction_GetAttrInt32( TF_OpKernelConstruction* ctx, const char* attr_name, int32_t* val, TF_Status* status); -// Used to pass strings across the C API. The caller does not take ownership -// of the underlying data pointer and is not responsible for freeing it. -struct string_view { - const char* data; - size_t len; -}; - -typedef struct string_view string_view; - // Returns the name of the user-defined NodeDef for this OpKernel. -TF_CAPI_EXPORT extern string_view TF_OpKernelConstruction_GetName( +TF_CAPI_EXPORT extern TF_StringView TF_OpKernelConstruction_GetName( TF_OpKernelConstruction* ctx); // Allocates Tensor for output at given index. Caller takes ownership of diff --git a/tensorflow/c/kernels_test.cc b/tensorflow/c/kernels_test.cc index b01e3ea3fac..4a3b83e240c 100644 --- a/tensorflow/c/kernels_test.cc +++ b/tensorflow/c/kernels_test.cc @@ -74,11 +74,11 @@ static void* MyCreateFunc(TF_OpKernelConstruction* ctx) { TF_DeleteStatus(status); // Exercise kernel NodeDef name read - string_view name_string_view = TF_OpKernelConstruction_GetName(ctx); - const char* kernel_name = "SomeKernelName"; - const char* candidate_kernel_name = std::string(name_string_view.data, - name_string_view.len).c_str(); - EXPECT_EQ(0, strcmp(kernel_name, candidate_kernel_name)); + TF_StringView name_string_view = TF_OpKernelConstruction_GetName(ctx); + std::string node_name = "SomeNodeName"; + std::string candidate_node_name = std::string(name_string_view.data, + name_string_view.len); + EXPECT_EQ(node_name, candidate_node_name); return s; } @@ -102,11 +102,11 @@ namespace tensorflow { static std::unique_ptr GetFakeKernel(const char* device_name, const char* op_name, - const char* kernel_name, + const char* node_name, Status* status) { NodeDef def; def.set_op(op_name); - def.set_name(kernel_name); + def.set_name(node_name); def.set_device(device_name); def.add_input("input1"); def.add_input("input2"); @@ -122,7 +122,7 @@ static std::unique_ptr GetFakeKernel(const char* device_name, // Tests registration of a single C kernel and checks that calls through the // C/C++ boundary are being made. TEST(TestKernel, TestRegisterKernelBuilder) { - const char* kernel_name = "SomeKernelName"; + const char* node_name = "SomeNodeName"; const char* op_name = "FooOp"; const char* device_name = "FakeDeviceName1"; @@ -137,7 +137,7 @@ TEST(TestKernel, TestRegisterKernelBuilder) { { TF_Status* status = TF_NewStatus(); - TF_RegisterKernelBuilder(kernel_name, builder, status); + TF_RegisterKernelBuilder(node_name, builder, status); EXPECT_EQ(TF_OK, TF_GetCode(status)); TF_Buffer* buf = TF_GetRegisteredKernelsForOp(op_name, status); EXPECT_EQ(TF_OK, TF_GetCode(status)); @@ -152,7 +152,7 @@ TEST(TestKernel, TestRegisterKernelBuilder) { { Status status; std::unique_ptr kernel = - GetFakeKernel(device_name, op_name, kernel_name, &status); + GetFakeKernel(device_name, op_name, node_name, &status); TF_EXPECT_OK(status); ASSERT_NE(nullptr, kernel.get()); kernel->Compute(nullptr); @@ -170,7 +170,7 @@ class DummyDevice : public DeviceBase { }; TEST(TestKernel, TestInputAndOutputCount) { - const char* kernel_name = "InputOutputCounterKernel"; + const char* node_name = "InputOutputCounterKernel"; const char* op_name = "BarOp"; const char* device_name = "FakeDeviceName2"; @@ -220,7 +220,7 @@ TEST(TestKernel, TestInputAndOutputCount) { { TF_Status* status = TF_NewStatus(); - TF_RegisterKernelBuilder(kernel_name, builder, status); + TF_RegisterKernelBuilder(node_name, builder, status); EXPECT_EQ(TF_OK, TF_GetCode(status)); TF_DeleteStatus(status); } @@ -241,7 +241,7 @@ TEST(TestKernel, TestInputAndOutputCount) { Status status; std::unique_ptr kernel = - GetFakeKernel(device_name, op_name, kernel_name, &status); + GetFakeKernel(device_name, op_name, node_name, &status); TF_EXPECT_OK(status); ASSERT_NE(nullptr, kernel.get()); @@ -260,7 +260,7 @@ TEST(TestKernel, DeleteKernelBuilderIsOkOnNull) { } TEST(TestKernel, TestTypeConstraint) { - const char* kernel_name = "SomeKernelName"; + const char* node_name = "SomeNodeName"; const char* op_name = "TypeOp"; const char* device_name = "FakeDeviceName1"; @@ -275,7 +275,7 @@ TEST(TestKernel, TestTypeConstraint) { TF_Status* status = TF_NewStatus(); TF_KernelBuilder_TypeConstraint(builder, "T", TF_DataType::TF_INT32, status); EXPECT_EQ(TF_OK, TF_GetCode(status)); - TF_RegisterKernelBuilder(kernel_name, builder, status); + TF_RegisterKernelBuilder(node_name, builder, status); EXPECT_EQ(TF_OK, TF_GetCode(status)); TF_Buffer* buf = TF_GetRegisteredKernelsForOp(op_name, status); @@ -304,7 +304,7 @@ TEST(TestKernel, TestTypeConstraint) { } TEST(TestKernel, TestHostMemory) { - const char* kernel_name = "SomeKernelName"; + const char* node_name = "SomeNodeName"; const char* op_name = "HostMemoryOp"; const char* device_name = "FakeDeviceName1"; @@ -319,7 +319,7 @@ TEST(TestKernel, TestHostMemory) { TF_KernelBuilder_HostMemory(builder, "input2"); TF_KernelBuilder_HostMemory(builder, "output1"); TF_Status* status = TF_NewStatus(); - TF_RegisterKernelBuilder(kernel_name, builder, status); + TF_RegisterKernelBuilder(node_name, builder, status); EXPECT_EQ(TF_OK, TF_GetCode(status)); TF_Buffer* buf = TF_GetRegisteredKernelsForOp(op_name, status); @@ -343,12 +343,12 @@ TEST(TestKernel, TestHostMemory) { class DeviceKernelOpTest : public OpsTestBase { protected: - void SetupOp(const char* op_name, const char* kernel_name, + void SetupOp(const char* op_name, const char* node_name, void (*compute_func)(void*, TF_OpKernelContext*)) { TF_KernelBuilder* builder = TF_NewKernelBuilder( op_name, device_name_, nullptr, compute_func, nullptr); TF_Status* status = TF_NewStatus(); - TF_RegisterKernelBuilder(kernel_name, builder, status); + TF_RegisterKernelBuilder(node_name, builder, status); EXPECT_EQ(TF_OK, TF_GetCode(status)); TF_DeleteStatus(status); From 87c2f2dc3b263f90b79c4d31b6d9dbc410d8145d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Jul 2020 17:03:19 -0700 Subject: [PATCH 1001/2522] Allowing a slice to move through a reverse (i.e., slice(reverse) is reverse(slice)). PiperOrigin-RevId: 322473168 Change-Id: Ia8c8563f121cfb3aac52464336a03642c7ae6b2a --- .../xla/service/algebraic_simplifier.cc | 58 +++++++++++++++ .../xla/service/algebraic_simplifier_test.cc | 74 +++++++++++++++++++ 2 files changed, 132 insertions(+) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index c0adef44087..db1c86d9fe3 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -509,6 +509,9 @@ class AlgebraicSimplifierVisitor : public DfsHloRewriteVisitor { // Tries to convert slice(reshape(X)) into reshape(slice(X)) StatusOr TryToReorderSliceAndReshape(HloInstruction* slice); + // Tries to convert slice(reverse(X)) into reverse(slice(X)) + StatusOr TryToReorderSliceAndReverse(HloInstruction* slice); + // Tries to simplify `(and (< a N) (< a K))` in cases where `N <= K` into // `(< a N)`. This is crucial for being able to figure out the loop trip // count. @@ -3574,6 +3577,52 @@ StatusOr AlgebraicSimplifierVisitor::TryToReorderSliceAndReshape( return false; } +// Allowing a slice to move through a reverse with any necessary updates to the +// slice config. +StatusOr AlgebraicSimplifierVisitor::TryToReorderSliceAndReverse( + HloInstruction* slice) { + VLOG(2) << "Entered TryToReorderSliceAndReverse for slice:" + << slice->ToString(); + if (Match(slice, m::Slice(m::Reverse()))) { + HloInstruction* reverse = slice->mutable_operand(0); + HloInstruction* reverse_operand = reverse->mutable_operand(0); + std::vector new_starts = slice->slice_starts(); + std::vector new_limits = slice->slice_limits(); + std::vector new_strides = slice->slice_strides(); + for (auto rdim : reverse->dimensions()) { + int64 start = slice->slice_starts(rdim); + int64 limit = slice->slice_limits(rdim); + int64 stride = slice->slice_strides(rdim); + // find_nth allows us to compute the appropriate index to begin + // with during reverse even in the presence of non-unit strides + int64 find_nth = (limit - start - 1) / stride; + find_nth = start + find_nth * stride; + limit = find_nth + 1; + new_starts[rdim] = + (reverse->shape().dimensions(rdim) - start) - (limit - start); + new_limits[rdim] = reverse->shape().dimensions(rdim) - start; + VLOG(2) << "Analyzing dim:" << rdim << " (start,limit):" << start << "," + << limit << " and new (start, limit):" << new_starts[rdim] << "," + << new_limits[rdim]; + } + // New slice formed from the reverse_operand, but strides and shape of the + // slice output remains the same. New slice's starts and limits are updated + // for ONLY the reversed dimensions as indicated above. + HloInstruction* new_slice = computation_->AddInstruction( + HloInstruction::CreateSlice(slice->shape(), reverse_operand, new_starts, + new_limits, new_strides)); + simplifier_->UpdateLayout(new_slice->mutable_shape()); + TF_RETURN_IF_ERROR(ReplaceWithNewInstruction( + slice, HloInstruction::CreateReverse(new_slice->shape(), new_slice, + reverse->dimensions()))); + // We do not delete the old reverse, since there might be another + // consumer of that reverse (i.e., full reverse output). DCE should take + // care of any deletion that is necessary if there was no use of reverse. + return true; + } + return false; +} + Status AlgebraicSimplifierVisitor::HandleSlice(HloInstruction* slice) { // Delete no-op slices, i.e. where shape = operand shape. if (ReplaceInstructionIfSameShape(slice, slice->mutable_operand(0))) { @@ -3728,6 +3777,15 @@ Status AlgebraicSimplifierVisitor::HandleSlice(HloInstruction* slice) { if (replaced) { return Status::OK(); } + + bool reversed = false; + if (Match(slice, m::Slice(m::Reverse(m::Op())))) { + TF_ASSIGN_OR_RETURN(reversed, TryToReorderSliceAndReverse(slice)); + } + if (reversed) { + return Status::OK(); + } + return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index d2c32d79a91..ea21c7c1d21 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -2014,6 +2014,80 @@ TEST_F(AlgebraicSimplifierTest, RemoveUnaryConcatenate) { EXPECT_THAT(computation->root_instruction(), param0); } +TEST_F(AlgebraicSimplifierTest, SliceReverse) { + const char* const hlo_string = R"( +HloModule module + +ENTRY test { + param = f32[6,7,32] parameter(0) + constant = f32[] constant(0) + pad = f32[8,7,32] pad(param, constant), padding=1_1x0_0x0_0 + rev = f32[8,7,32] reverse(pad), dimensions={0,2} + slice = f32[1,7,32] slice(rev), slice={[2:3:1], [0:7:1], [0:32:1]} + ROOT tuple = (f32[1,7,32]) tuple(slice) +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + AlgebraicSimplifier simplifier(default_options_); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + HloComputation* computation = module->entry_computation(); + EXPECT_THAT(computation->root_instruction(), + GmockMatch(m::Tuple(m::Reverse(m::Slice(m::Pad()))))); + const HloInstruction* slice = + computation->root_instruction()->operand(0)->operand(0); + EXPECT_TRUE( + ShapeUtil::Equal(slice->shape(), ShapeUtil::MakeShape(F32, {1, 7, 32}))); + // slice start,limit of 0th and 2nd dimensions are changed + // while 1st dimension's slice start, limit remains the same since + // it is not reversed. + EXPECT_EQ(slice->slice_starts(0), 5); + EXPECT_EQ(slice->slice_limits(0), 6); + EXPECT_EQ(slice->slice_starts(1), 0); + EXPECT_EQ(slice->slice_limits(1), 7); + EXPECT_EQ(slice->slice_starts(2), 0); + EXPECT_EQ(slice->slice_limits(2), 32); + EXPECT_EQ(slice->slice_strides(0), 1); + EXPECT_EQ(slice->slice_strides(1), 1); + EXPECT_EQ(slice->slice_strides(2), 1); +} + +TEST_F(AlgebraicSimplifierTest, SliceReverseNonUnitEvenOddStrides) { + const char* const hlo_string = R"( +HloModule module + +ENTRY test { + param = f32[6,7,32] parameter(0) + constant = f32[] constant(0) + pad = f32[8,7,32] pad(param, constant), padding=1_1x0_0x0_0 + rev = f32[8,7,32] reverse(pad), dimensions={0,1,2} + slice = f32[1,2,7] slice(rev), slice={[2:3:2], [0:7:4], [0:32:5]} + ROOT tuple = (f32[1,2,7]) tuple(slice) +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + AlgebraicSimplifier simplifier(default_options_); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + HloComputation* computation = module->entry_computation(); + EXPECT_THAT(computation->root_instruction(), + GmockMatch(m::Tuple(m::Reverse(m::Slice(m::Pad()))))); + const HloInstruction* slice = + computation->root_instruction()->operand(0)->operand(0); + EXPECT_TRUE( + ShapeUtil::Equal(slice->shape(), ShapeUtil::MakeShape(F32, {1, 2, 7}))); + // slice start,limit of all dimensions are changed + EXPECT_EQ(slice->slice_starts(0), 5); + EXPECT_EQ(slice->slice_limits(0), 6); + EXPECT_EQ(slice->slice_starts(1), 2); + EXPECT_EQ(slice->slice_limits(1), 7); + EXPECT_EQ(slice->slice_starts(2), 1); + EXPECT_EQ(slice->slice_limits(2), 32); + EXPECT_EQ(slice->slice_strides(0), 2); + EXPECT_EQ(slice->slice_strides(1), 4); + EXPECT_EQ(slice->slice_strides(2), 5); +} + // Test that empty operands of concatenates are removed. TEST_F(AlgebraicSimplifierTest, RemoveEmptyConcatenateOperands) { auto m = CreateNewVerifiedModule(); From 84e49eb656534976f37533882c4f533281492570 Mon Sep 17 00:00:00 2001 From: Russell Power Date: Tue, 21 Jul 2020 17:04:07 -0700 Subject: [PATCH 1002/2522] Adjust TPU shape allocation functions. PiperOrigin-RevId: 322473324 Change-Id: Ib7e55cfd2a25e3dccee33fc3208aa75b739e69b6 --- tensorflow/core/tpu/BUILD | 4 ++ tensorflow/core/tpu/kernels/BUILD | 5 ++ .../tpu_compilation_cache_interface.cc | 3 +- tensorflow/core/tpu/kernels/tpu_op_util.cc | 39 ++++++------ .../core/tpu/kernels/tpu_program_group.cc | 10 ++-- tensorflow/core/tpu/kernels/tpu_util.h | 11 ---- tensorflow/core/tpu/kernels/tpu_util_c_api.h | 8 --- tensorflow/core/tpu/tpu_library_init_fns.inc | 5 +- tensorflow/core/tpu/tpu_node_device.cc | 60 +++++++++++++++++-- .../stream_executor/tpu/c_api_conversions.cc | 12 +++- .../stream_executor/tpu/c_api_conversions.h | 2 + .../stream_executor/tpu/tpu_executor_c_api.h | 12 ++++ 12 files changed, 120 insertions(+), 51 deletions(-) diff --git a/tensorflow/core/tpu/BUILD b/tensorflow/core/tpu/BUILD index c781c5f67ac..d9d0bc440e7 100644 --- a/tensorflow/core/tpu/BUILD +++ b/tensorflow/core/tpu/BUILD @@ -179,10 +179,12 @@ cc_library( hdrs = ["tpu_node_device.h"], visibility = ["//visibility:public"], deps = [ + ":tpu_api", ":tpu_defs", ":tpu_node_device_util", "//tensorflow/compiler/jit:xla_device", "//tensorflow/compiler/jit/kernels:xla_ops", + "//tensorflow/compiler/tf2xla:common", "//tensorflow/compiler/tf2xla:tf2xla_util", "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/core:framework_internal", @@ -196,6 +198,8 @@ cc_library( "//tensorflow/core/platform:status", "//tensorflow/core/tpu/kernels:tpu_configuration_ops", "//tensorflow/core/tpu/kernels:tpu_util", + "//tensorflow/stream_executor/tpu:c_api_conversions", + "//tensorflow/stream_executor/tpu:status_helper", "//tensorflow/stream_executor/tpu:tpu_node_context", "//tensorflow/stream_executor/tpu:tpu_platform_interface", "//tensorflow/stream_executor/tpu:tpu_stream_interface", diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index b3ae4770cda..9cb6e8cbb2b 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -349,6 +349,7 @@ cc_library( "//tensorflow/core:protos_all_cc", "//tensorflow/core/platform:casts", # buildcleaner: keep "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/core/tpu:tpu_api", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:node_hash_map", "@com_google_absl//absl/strings", @@ -405,6 +406,10 @@ cc_library( cc_library( name = "tpu_compilation_metrics", srcs = ["tpu_compilation_metrics.cc"], + copts = select({ + WITH_TPU_SUPPORT: ["-DLIBTFTPU"], + DEFAULT: [], + }), deps = [ ":tpu_compilation_metrics_hdrs", ], diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc index 2631dccbc21..9e1aedf92ce 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/core/platform/casts.h" #include "tensorflow/core/tpu/kernels/tpu_util.h" +#include "tensorflow/core/tpu/tpu_api.h" namespace tensorflow { namespace tpu { @@ -362,7 +363,7 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( // Check if caller has disabled compilation. Set using // internal::ScopedTpuCompileDisabler. - if (!IsTpuCompilationEnabled()) { + if (!UtilApiFn()->TpuCompile_IsTpuCompilationEnabledFn()) { const string error_msg = strings::StrCat( "[TpuCompilationDisabled]: Compilation cache miss, but compilation " "disabled, session_name(", diff --git a/tensorflow/core/tpu/kernels/tpu_op_util.cc b/tensorflow/core/tpu/kernels/tpu_op_util.cc index b3b675e2734..0d02cac7377 100644 --- a/tensorflow/core/tpu/kernels/tpu_op_util.cc +++ b/tensorflow/core/tpu/kernels/tpu_op_util.cc @@ -76,9 +76,10 @@ std::string GuaranteedConstFingerprint( if (fingerprint_in_metadata.empty()) { uint64_t fingerprint = 0; for (const Tensor& constant : guaranteed_constants) { - fingerprint = TpuCompile_CreateGuaranteedConstFingerprint( - fingerprint, constant.tensor_data().data(), - constant.tensor_data().size()); + fingerprint = + tpu::UtilApiFn()->TpuCompile_CreateGuaranteedConstFingerprintFn( + fingerprint, constant.tensor_data().data(), + constant.tensor_data().size()); } return std::to_string(fingerprint); } else { @@ -109,21 +110,23 @@ TpuCompilationCacheKey CreateCompilationCacheKey( } } CompilationCacheKeyResult result = - TpuCompile_CreateCompilationCacheKey(CompilationCacheKeyProperty{ - config_prefix.data(), - shapes_prefix.data(), - function_name.data(), - mlir_module.data(), - flattened_device_ids.data(), - flattened_device_ids.size(), - guaranteed_constants.size(), - function_library_fingerprint, - metadata.num_cores_per_replica(), - metadata.num_replicas(), - mesh_state.data(), - }); - auto buffer_cleanup = gtl::MakeCleanup( - [result]() { TpuCompile_DestroyCompilationCacheKey(result); }); + tpu::UtilApiFn()->TpuCompile_CreateCompilationCacheKeyFn( + CompilationCacheKeyProperty{ + config_prefix.data(), + shapes_prefix.data(), + function_name.data(), + mlir_module.data(), + flattened_device_ids.data(), + flattened_device_ids.size(), + guaranteed_constants.size(), + function_library_fingerprint, + metadata.num_cores_per_replica(), + metadata.num_replicas(), + mesh_state.data(), + }); + auto buffer_cleanup = gtl::MakeCleanup([result]() { + tpu::UtilApiFn()->TpuCompile_DestroyCompilationCacheKeyFn(result); + }); TpuCompilationCacheKey key; key.prefix = result.key; key.debug_string = result.debug_string; diff --git a/tensorflow/core/tpu/kernels/tpu_program_group.cc b/tensorflow/core/tpu/kernels/tpu_program_group.cc index be27b7709dd..e22175af270 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_group.cc +++ b/tensorflow/core/tpu/kernels/tpu_program_group.cc @@ -62,8 +62,8 @@ StatusOr> CompileAheadOfTime( size_t count = 0; StatusHelper status; VLOG(1) << "Run TpuCompile_CompileAheadOfTime."; - TpuCompile_CompileAheadOfTime(serialized_aot_request, &xla_tpu_programs, - &count, status.c_status); + CompileApiFn()->TpuCompile_CompileAheadOfTimeFn( + serialized_aot_request, &xla_tpu_programs, &count, status.c_status); VLOG(1) << "Run CompileAheadOfTime completed."; if (!status.status().ok()) { return status.status(); @@ -159,7 +159,8 @@ int64_t TpuProgramGroup::program_size() const { bool TpuProgramGroup::LogProgramMemorySummary() { bool success = true; for (const XLA_TpuProgram* tpu_program : tpu_programs_) { - success &= TpuProgram_LogProgramMemorySummary(tpu_program); + success &= + TpuProgramApiFn()->TpuProgram_LogProgramMemorySummaryFn(tpu_program); } return success; } @@ -167,7 +168,8 @@ bool TpuProgramGroup::LogProgramMemorySummary() { void TpuProgramGroup::UnloadAndDestroyPrograms() { for (XLA_TpuProgram* tpu_program : tpu_programs_) { StatusHelper status; - TpuProgram_UnloadAndDestroy(tpu_program, status.c_status); + TpuProgramApiFn()->TpuProgram_UnloadAndDestroyFn(tpu_program, + status.c_status); auto s = status.status(); if (!s.ok()) { LOG(ERROR) << "TpuProgramGroup::UnloadPrograms(): " << s.ToString(); diff --git a/tensorflow/core/tpu/kernels/tpu_util.h b/tensorflow/core/tpu/kernels/tpu_util.h index f404ca4a8b7..90eef621b95 100644 --- a/tensorflow/core/tpu/kernels/tpu_util.h +++ b/tensorflow/core/tpu/kernels/tpu_util.h @@ -55,17 +55,6 @@ Status DynamicShapesToTensorShapes(const OpInputList& dynamic_shapes, Status DynamicShapesToTensorShapes(const InputList& dynamic_shapes, std::vector* shapes); -// Given a tensor of `shape` and `type`, as what shape should it be stored on -// the TPU device? This function tranposes or flattens the excessively-padded -// tensors to rank 1, but leaves other tensor shapes alone. -xla::StatusOr TpuShapeRepresentation(const TensorShape& shape, - DataType type, - bool use_fast_memory); - -// Given a tensor, returns the shape of its representation on device, -// fully padded. Contents of `shape` are undefined on error. -Status TpuPaddedShapeFn(const Tensor& tensor, xla::Shape* shape); - // A callback called on exit. void LogAndExit(int code); } // namespace tpu diff --git a/tensorflow/core/tpu/kernels/tpu_util_c_api.h b/tensorflow/core/tpu/kernels/tpu_util_c_api.h index 37c55518721..ddc7a842f49 100644 --- a/tensorflow/core/tpu/kernels/tpu_util_c_api.h +++ b/tensorflow/core/tpu/kernels/tpu_util_c_api.h @@ -17,7 +17,6 @@ limitations under the License. #include "tensorflow/core/tpu/kernels/tpu_mesh_state_c_api.h" #include "tensorflow/core/tpu/libtftpu.h" -#include "tensorflow/stream_executor/tpu/c_api_decl.h" #include "tensorflow/stream_executor/tpu/proto_helper.h" // Property for creating compilation cache key. @@ -47,12 +46,6 @@ extern "C" { // Checks if whether a TPU compilation is enabled. TFTPU_CAPI_EXPORT bool TpuCompile_IsTpuCompilationEnabled(); -// Converts an XLA `Shape` into its equivalent TPU `Shape` representation. -TFTPU_CAPI_EXPORT void TpuCompile_ToTpuShapeRepresentation( - TpuSerializedProto serialized_xla_shape, int data_type, - bool use_fast_memory, TpuSerializedProto* serialized_tensor_shape, - SE_Status* status); - // XLA compilation cannot be cancelled. To avoid hanging the TF worker will exit // when cancellation is requested for an XLA compile op. Some tests require this // behavior to be disabled, and we test for this condition with the following @@ -85,7 +78,6 @@ TFTPU_CAPI_EXPORT uint64_t TpuCompile_CreateGuaranteedConstFingerprint( struct TfTpu_UtilApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuCompile_IsTpuCompilationEnabled); TFTPU_ADD_FN_IN_STRUCT(TpuCompile_ShouldTpuCompileOpIgnoreCancellation); - TFTPU_ADD_FN_IN_STRUCT(TpuCompile_ToTpuShapeRepresentation); TFTPU_ADD_FN_IN_STRUCT(TpuTopology_AvailableCoreCount); TFTPU_ADD_FN_IN_STRUCT(TpuCompile_CreateCompilationCacheKey); TFTPU_ADD_FN_IN_STRUCT(TpuCompile_DestroyCompilationCacheKey); diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index f811769b364..f7f247596fe 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -1,3 +1,4 @@ +#include "third_party/tensorflow/stream_executor/tpu/tpu_executor_c_api.h" namespace { tensorflow::Status SetTpuConfigStructFns(void* library_handle) { @@ -181,6 +182,9 @@ tensorflow::Status SetExecutorStructFn(void* library_handle) { TFTPU_SET_FN(executor_fn, TpuExecutable_ExecuteAsyncOnStream); TFTPU_SET_FN(executor_fn, TpuExecutable_Free); + TFTPU_SET_FN(executor_fn, XlaShapeToTpuShapeRepresentation); + TFTPU_SET_FN(executor_fn, XlaShapeToTpuPaddedShape); + return tensorflow::Status::OK(); } @@ -202,7 +206,6 @@ tensorflow::Status SetTpuUtilStructFns(void* library_handle) { TFTPU_SET_FN(util_fn, TpuTopology_AvailableCoreCount); TFTPU_SET_FN(util_fn, TpuCompile_IsTpuCompilationEnabled); TFTPU_SET_FN(util_fn, TpuCompile_ShouldTpuCompileOpIgnoreCancellation); - TFTPU_SET_FN(util_fn, TpuCompile_ToTpuShapeRepresentation); TFTPU_SET_FN(util_fn, TpuCompile_CreateCompilationCacheKey); TFTPU_SET_FN(util_fn, TpuCompile_DestroyCompilationCacheKey); TFTPU_SET_FN(util_fn, TpuCompile_CreateGuaranteedConstFingerprint); diff --git a/tensorflow/core/tpu/tpu_node_device.cc b/tensorflow/core/tpu/tpu_node_device.cc index 071d53632ee..979bfab47db 100644 --- a/tensorflow/core/tpu/tpu_node_device.cc +++ b/tensorflow/core/tpu/tpu_node_device.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/compiler/jit/kernels/xla_ops.h" #include "tensorflow/compiler/jit/xla_device.h" #include "tensorflow/compiler/jit/xla_device_ops.h" +#include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/tf2xla/tf2xla_util.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/core/common_runtime/copy_tensor.h" @@ -28,10 +29,11 @@ limitations under the License. #include "tensorflow/core/framework/tensor_reference.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/public/session_options.h" -#include "tensorflow/core/tpu/kernels/tpu_configuration_ops.h" -#include "tensorflow/core/tpu/kernels/tpu_util.h" +#include "tensorflow/core/tpu/tpu_api.h" #include "tensorflow/core/tpu/tpu_defs.h" #include "tensorflow/core/tpu/tpu_node_device_util.h" +#include "tensorflow/stream_executor/tpu/c_api_conversions.h" +#include "tensorflow/stream_executor/tpu/status_helper.h" #include "tensorflow/stream_executor/tpu/tpu_node_context.h" #include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" #include "tensorflow/stream_executor/tpu/tpu_stream_interface.h" @@ -43,6 +45,55 @@ static bool tpu_autoclustering_flag = false; static bool tpu_xla_device_failure_closes_chips_flag = true; static bool tpu_use_substreams_for_cross_tpu_device_transfers_flag = true; +// Given a tensor of `shape` and `type`, as what shape should it be stored on +// the TPU device? This function tranposes or flattens the excessively-padded +// tensors to rank 1, but leaves other tensor shapes alone. +xla::StatusOr TpuShapeRepresentation(const TensorShape& shape, + DataType type, + bool use_fast_memory) { + xla::Shape xla_shape; + TF_RETURN_IF_ERROR( + tensorflow::TensorShapeToXLAShape(type, shape, &xla_shape)); + ApiConverter::StackHelper se_shape(xla_shape); + ApiConverter::StackHelper tpu_shape; + StatusHelper status; + tpu::ExecutorApiFn()->XlaShapeToTpuShapeRepresentationFn( + &se_shape.value, type, use_fast_memory, &tpu_shape.value, + status.c_status); + if (!status.status().ok()) { + return status.status(); + } + return tpu_shape.AsCpp(); +} + +// Given a tensor, returns the shape of its representation on device, +// fully padded. Contents of `shape` are undefined on error. +Status TpuPaddedShapeFn(const Tensor& tensor, xla::Shape* shape) { + const tensorflow::XlaTensor* xla_tensor = + tensorflow::XlaTensor::FromTensor(&tensor); + if (xla_tensor == nullptr) { + return errors::InvalidArgument( + "Expected an XlaTensor when computing padded shape"); + } + + if (!xla_tensor->has_shaped_buffer()) { + return errors::InvalidArgument( + "XlaTensor is expected to have device memory allocated when " + "computing padded shape"); + } + + const xla::Shape& on_device_shape = + xla_tensor->shaped_buffer().on_device_shape(); + + StatusHelper status; + ApiConverter::StackHelper se_shape(on_device_shape); + ApiConverter::StackHelper tpu_shape; + tpu::ExecutorApiFn()->XlaShapeToTpuPaddedShapeFn( + &se_shape.value, &tpu_shape.value, status.c_status); + *shape = tpu_shape.AsCpp(); + return Status::OK(); +} + // Check if TPU has been initialized. TPU initialization is not necessary // for 1x1. Status CheckIfTPUInitialized() { @@ -315,9 +366,8 @@ Status TpuNodeDeviceFactory::CreateDevices( options.device_ordinal = i; options.compilation_device_name = DEVICE_TPU_XLA_JIT; options.use_multiple_streams = true; - // TODO(jiawenhao): Implement and enable these. - // options.shape_representation_fn = tpu::TpuShapeRepresentation; - // options.padded_shape_fn = tpu::TpuPaddedShapeFn; + options.shape_representation_fn = &TpuShapeRepresentation; + options.padded_shape_fn = &TpuPaddedShapeFn; auto device = absl::make_unique(session_options, options); // The GpuDeviceInfo actually provides information not only for GPU diff --git a/tensorflow/stream_executor/tpu/c_api_conversions.cc b/tensorflow/stream_executor/tpu/c_api_conversions.cc index 76046c514ad..599233a04b2 100644 --- a/tensorflow/stream_executor/tpu/c_api_conversions.cc +++ b/tensorflow/stream_executor/tpu/c_api_conversions.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/stream_executor/tpu/c_api_conversions.h" #include "tensorflow/stream_executor/tpu/c_api_defn.h" +#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" #include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" namespace ApiConverter { @@ -119,11 +120,16 @@ SE_MaybeOwningDeviceMemory ToC(stream_executor::OwningDeviceMemory* mem) { return se_mem; } +void ToC(const stream_executor::DeviceMemoryBase& base, + SE_DeviceMemoryBase* se_base) { + se_base->opaque = const_cast(base.opaque()); + se_base->payload = base.payload(); + se_base->size = base.size(); +} + SE_DeviceMemoryBase ToC(const stream_executor::DeviceMemoryBase& base) { SE_DeviceMemoryBase se_base; - se_base.opaque = const_cast(base.opaque()); - se_base.payload = base.payload(); - se_base.size = base.size(); + ToC(base, &se_base); return se_base; } diff --git a/tensorflow/stream_executor/tpu/c_api_conversions.h b/tensorflow/stream_executor/tpu/c_api_conversions.h index 15b16388f8c..bfe5f37204c 100644 --- a/tensorflow/stream_executor/tpu/c_api_conversions.h +++ b/tensorflow/stream_executor/tpu/c_api_conversions.h @@ -35,6 +35,8 @@ namespace ApiConverter { // se::DeviceMemoryBase SE_DeviceMemoryBase ToC(const stream_executor::DeviceMemoryBase& base); +void ToC(const stream_executor::DeviceMemoryBase& base, + SE_DeviceMemoryBase* se_base); stream_executor::DeviceMemoryBase FromC(const SE_DeviceMemoryBase& se_base); void Free(SE_DeviceMemoryBase*); diff --git a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h index c1e68cf03a5..0714b73a85b 100644 --- a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h +++ b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h @@ -233,6 +233,15 @@ TFTPU_CAPI_EXPORT void TpuExecutable_ExecuteAsyncOnStream( TFTPU_CAPI_EXPORT void TpuExecutable_Free(SE_Executable*); +// Converts an XLA `Shape` into its equivalent TPU `Shape` representation. +TFTPU_CAPI_EXPORT void XlaShapeToTpuShapeRepresentation( + XLA_Shape* serialized_xla_shape, int data_type, bool use_fast_memory, + XLA_Shape* serialized_tpu_shape, SE_Status* status); + +TFTPU_CAPI_EXPORT void XlaShapeToTpuPaddedShape(XLA_Shape* serialized_xla_shape, + XLA_Shape* serialized_tpu_shape, + SE_Status* status); + struct TfTpu_ExecutorApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_New); TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Free); @@ -343,6 +352,9 @@ struct TfTpu_ExecutorApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_ShapeSize); TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_ExecuteAsyncOnStream); TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Free); + + TFTPU_ADD_FN_IN_STRUCT(XlaShapeToTpuShapeRepresentation); + TFTPU_ADD_FN_IN_STRUCT(XlaShapeToTpuPaddedShape); }; } From 4da55605f05fd7f94ba081f6f7ca10cbe57a56de Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Jul 2020 17:11:25 -0700 Subject: [PATCH 1003/2522] Update tensorboard dependency to 2.3.x TensorBoard release: https://pypi.org/project/tensorboard/2.3.0/ PiperOrigin-RevId: 322474571 Change-Id: I7cb6bcbb101cb9b10d04b832279e62ea9066abca --- tensorflow/tools/pip_package/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index b1337b9070c..67d0e138517 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -63,7 +63,7 @@ REQUIRED_PACKAGES = [ 'numpy >= 1.16.0, < 1.19.0', 'opt_einsum >= 2.3.2', 'protobuf >= 3.9.2', - 'tensorboard >= 2.2.0, < 2.3.0', + 'tensorboard >= 2.3.0, < 3', 'tensorflow_estimator >= 2.2.0, < 2.3.0', 'termcolor >= 1.1.0', 'wrapt >= 1.11.1', From 147843127e8edb81cc2922bc6e1d16a43f6b4478 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Tue, 21 Jul 2020 17:34:25 -0700 Subject: [PATCH 1004/2522] Update benchmark to be a binary instead of test PiperOrigin-RevId: 322477922 Change-Id: I1fd810f37e26e5141f96bbdd7dbb14bd84d8c1f6 --- .../python/ops/numpy_ops/integration_test/benchmarks/BUILD | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tensorflow/python/ops/numpy_ops/integration_test/benchmarks/BUILD b/tensorflow/python/ops/numpy_ops/integration_test/benchmarks/BUILD index 31f8d4575b3..012e1fbfcf3 100644 --- a/tensorflow/python/ops/numpy_ops/integration_test/benchmarks/BUILD +++ b/tensorflow/python/ops/numpy_ops/integration_test/benchmarks/BUILD @@ -1,5 +1,3 @@ -load("//tensorflow:tensorflow.bzl", "cuda_py_test") - package( default_visibility = [ "//tensorflow:internal", @@ -7,11 +5,10 @@ package( licenses = ["notice"], # Apache 2.0 ) -cuda_py_test( +py_binary( name = "micro_benchmarks", srcs = ["micro_benchmarks.py"], python_version = "PY3", - shard_count = 5, deps = [ ":numpy_mlp", ":tf_numpy_mlp", From d8bbfa5d53fcb9691d2c8712513ad0027ccd9432 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Jul 2020 17:47:36 -0700 Subject: [PATCH 1005/2522] Integrate LLVM at llvm/llvm-project@b2589200957a Updates LLVM usage to match [b2589200957a](https://github.com/llvm/llvm-project/commit/b2589200957a) PiperOrigin-RevId: 322479556 Change-Id: I32993fde65a91ea8e869882b3c7546b51695a81c --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 9585dfa3d95..4c1847a8c6b 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -711,8 +711,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "8a268bec1b02dd446fbc36e20d0a9af45d764f67" - LLVM_SHA256 = "95fd17a9235584e6ea17d7733458557846f901f904fa2ca1d751ea1ad6b49635" + LLVM_COMMIT = "b2589200957af50e7d166afca9df6ad46d7418c6" + LLVM_SHA256 = "24c69e3a036d0ec6650b4e05f544aff8a9df65759baf24b13b7fcd4d0948478a" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 2df63a4fa10b6da8c4f92bb55fe14bb30b0e4b73 Mon Sep 17 00:00:00 2001 From: Russell Power Date: Tue, 21 Jul 2020 17:48:06 -0700 Subject: [PATCH 1006/2522] Fix missing status check. PiperOrigin-RevId: 322479633 Change-Id: I6452af8b6f106d6681307caab1c9b41e514d4694 --- tensorflow/core/tpu/tpu_library_init_fns.inc | 1 + tensorflow/core/tpu/tpu_node_device.cc | 3 +++ 2 files changed, 4 insertions(+) diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index f7f247596fe..b88705e4100 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -1,4 +1,5 @@ #include "third_party/tensorflow/stream_executor/tpu/tpu_executor_c_api.h" + namespace { tensorflow::Status SetTpuConfigStructFns(void* library_handle) { diff --git a/tensorflow/core/tpu/tpu_node_device.cc b/tensorflow/core/tpu/tpu_node_device.cc index 979bfab47db..42a1533a97c 100644 --- a/tensorflow/core/tpu/tpu_node_device.cc +++ b/tensorflow/core/tpu/tpu_node_device.cc @@ -90,6 +90,9 @@ Status TpuPaddedShapeFn(const Tensor& tensor, xla::Shape* shape) { ApiConverter::StackHelper tpu_shape; tpu::ExecutorApiFn()->XlaShapeToTpuPaddedShapeFn( &se_shape.value, &tpu_shape.value, status.c_status); + if (!status.ok()) { + return status.status(); + } *shape = tpu_shape.AsCpp(); return Status::OK(); } From cdbae30cd46c62a0684947ad563128a99c835d44 Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Tue, 21 Jul 2020 18:32:35 -0700 Subject: [PATCH 1007/2522] Run memory_profiler on Keras model. PiperOrigin-RevId: 322485342 Change-Id: Ief2694aa2c8e8e69d99eea42f5eb94e080ef1ca6 --- tensorflow/python/keras/benchmarks/BUILD | 15 ++++ .../keras/benchmarks/model_memory_profile.py | 80 +++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 tensorflow/python/keras/benchmarks/model_memory_profile.py diff --git a/tensorflow/python/keras/benchmarks/BUILD b/tensorflow/python/keras/benchmarks/BUILD index 11d3257f5d2..d86e9479259 100644 --- a/tensorflow/python/keras/benchmarks/BUILD +++ b/tensorflow/python/keras/benchmarks/BUILD @@ -179,3 +179,18 @@ py_library( "//tensorflow:tensorflow_py", ], ) + +# Run memory profiler on Keras model. +# Please make sure `meomry_profiler` is installed. +# To run the memory profiler: +# With CPU: +# bazel run -c opt model_memory_profile -- --model=YOUR_MODEL_NAME +# With GPU: +# bazel run -c opt --config=cuda model_memory_profile -- --model=YOUR_MODEL_NAME +py_binary( + name = "model_memory_profile", + srcs = ["model_memory_profile.py"], + python_version = "PY3", + tags = ["no_oss"], + deps = ["//tensorflow:tensorflow_py"], +) diff --git a/tensorflow/python/keras/benchmarks/model_memory_profile.py b/tensorflow/python/keras/benchmarks/model_memory_profile.py new file mode 100644 index 00000000000..eb548a033d5 --- /dev/null +++ b/tensorflow/python/keras/benchmarks/model_memory_profile.py @@ -0,0 +1,80 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Memory profile on Keras model. + +To add a new model for memory profile: +1. Create the model. +2. Decorate it with `@memory_profiler.profile`. +3. Add the model function to the dict `models`. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl import app +from absl import flags + +from absl import logging +import numpy as np + +import tensorflow as tf + +try: + import memory_profiler # pylint:disable=g-import-not-at-top +except ImportError: + memory_profiler = None + + +FLAGS = flags.FLAGS +flags.DEFINE_string('model', None, + 'The model to run memory profiler.') + + +@memory_profiler.profile +def _imdb_lstm_model(): + """LSTM model.""" + x_train = np.random.randint(0, 1999, size=(2500, 100)) + y_train = np.random.random((2500, 1)) + + # IMDB LSTM model. + model = tf.keras.Sequential() + model.add(tf.keras.layers.Embedding(20000, 128)) + model.add(tf.keras.layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2)) + model.add(tf.keras.layers.Dense(1, activation='sigmoid')) + + model.compile('sgd', 'mse') + # Warm up the model with one epoch. + model.fit(x_train, y_train, batch_size=512, epochs=3) + + +def main(_): + # Add the model for memory profile. + models = { + 'lstm': _imdb_lstm_model, + } + + if FLAGS.model in models: + logging.info('Run memory profile on %s.', FLAGS.model) + run_model = models[FLAGS.model] + run_model() + else: + logging.info('The model does not exist. Please verify the model name.') + + +if __name__ == '__main__': + flags.mark_flags_as_required(['model']) + if memory_profiler: + app.run(main) + From 4c886a5cfaa1bc397b674fc09907829d7e0e9879 Mon Sep 17 00:00:00 2001 From: Terry Heo Date: Tue, 21 Jul 2020 18:37:38 -0700 Subject: [PATCH 1008/2522] pi-python37: Install pip package with install_pip_packages_by_version.sh In pi-python37 container, apt-get install doesn't install correct version. Update Dockerfile to use install_pip_packages_by_version.sh to install proper PIP packages for Python 3.7. This changes fixes issue #35116. Test: Use the following command to build RPI3 PIP wheel for Python 3.7. $ CI_DOCKER_EXTRA_PARAMS="-e CI_BUILD_PYTHON=python3.7 -e CROSSTOOL_PYTHON_INCLUDE_PATH=/usr/include/python3.7" \ tensorflow/tools/ci_build/ci_build.sh PI-PYTHON37 \ tensorflow/tools/ci_build/pi/build_raspberry_pi.sh PiperOrigin-RevId: 322486001 Change-Id: Ide83818522223dcbe43b16b971c3c5759d8c55ca --- tensorflow/tools/ci_build/Dockerfile.pi-python37 | 12 +++++++----- .../install/install_pi_python37_toolchain.sh | 4 +++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/tensorflow/tools/ci_build/Dockerfile.pi-python37 b/tensorflow/tools/ci_build/Dockerfile.pi-python37 index 2c1cd2f8942..2432b727bc7 100644 --- a/tensorflow/tools/ci_build/Dockerfile.pi-python37 +++ b/tensorflow/tools/ci_build/Dockerfile.pi-python37 @@ -8,17 +8,19 @@ RUN /install/install_bootstrap_deb_packages.sh RUN add-apt-repository -y ppa:openjdk-r/ppa && \ add-apt-repository -y ppa:george-edison55/cmake-3.x RUN /install/install_deb_packages.sh -RUN /install/install_pip_packages.sh + +# The following line installs the Python 3.7 cross-compilation toolchain. +RUN /install/install_pi_python37_toolchain.sh + +RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.7" +RUN ln -sf /usr/local/lib/python3.7/dist-packages/numpy/core/include/numpy /usr/include/python3.7/numpy + RUN /install/install_bazel.sh RUN /install/install_proto3.sh RUN /install/install_buildifier.sh RUN /install/install_auditwheel.sh RUN /install/install_golang.sh -# The following line installs the Python cross-compilation toolchain. All the -# preceding dependencies should be kept in sync with the main CPU docker file. -RUN /install/install_pi_python37_toolchain.sh - # Set up the master bazelrc configuration file. COPY install/.bazelrc /etc/bazel.bazelrc diff --git a/tensorflow/tools/ci_build/install/install_pi_python37_toolchain.sh b/tensorflow/tools/ci_build/install/install_pi_python37_toolchain.sh index 3bda56af648..446a7ffab2a 100755 --- a/tensorflow/tools/ci_build/install/install_pi_python37_toolchain.sh +++ b/tensorflow/tools/ci_build/install/install_pi_python37_toolchain.sh @@ -23,6 +23,8 @@ echo 'deb [arch=arm64,armhf] http://ports.ubuntu.com/ xenial-backports main rest sed -i 's#deb http://archive.ubuntu.com/ubuntu/#deb [arch=amd64] http://archive.ubuntu.com/ubuntu/#g' /etc/apt/sources.list yes | add-apt-repository ppa:deadsnakes/ppa apt-get update -apt-get install -y python3.7 python3-numpy python3.7-dev python3-pip +apt-get install -y python3.7 python3.7-dev +#/usr/local/bin/python3.7 is needed to use /install/install_pip_packages_by_version.sh +ln -sf /usr/bin/python3.7 /usr/local/bin/python3.7 apt-get install -y libpython3.7-dev:armhf apt-get install -y libpython3.7-dev:arm64 From ad37d15d8ed30fc228e11fa37b463aa3a1826939 Mon Sep 17 00:00:00 2001 From: Marat Dukhan Date: Tue, 21 Jul 2020 18:47:57 -0700 Subject: [PATCH 1009/2522] Document how to enable XNNPACK in pre-built TFLite binaries PiperOrigin-RevId: 322487183 Change-Id: I30dedcf665cb8ccef36855b82162c27e6b1dc728 --- tensorflow/lite/delegates/xnnpack/README.md | 42 ++++++++++++++++++++- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/delegates/xnnpack/README.md b/tensorflow/lite/delegates/xnnpack/README.md index 47c5d7db907..b5c090d56ea 100644 --- a/tensorflow/lite/delegates/xnnpack/README.md +++ b/tensorflow/lite/delegates/xnnpack/README.md @@ -8,9 +8,47 @@ to use the XNNPACK library as an inference engine for TensorFlow Lite. ## Using XNNPACK engine with TensorFlow Lite interpreter XNNPACK integrates with TensorFlow Lite interpreter through the delegation -mechanism. There are three methods to enable XNNPACK engine in TensorFlow Lite. +mechanism. TensorFlow Lite supports several methods to enable XNNPACK +for floating-point inference. -### Enable XNNPACK via Bazel build flags (recommended) +### Enable XNNPACK via Java API on Android (recommended on Android) + +Pre-built [nightly TensorFlow Lite binaries for Android](https://www.tensorflow.org/lite/guide/android#use_the_tensorflow_lite_aar_from_jcenter) +include XNNPACK, albeit it is disabled by default. Use the `setUseXNNPACK` +method in `Interpreter.Options` class to enable it: + +```java +Interpreter.Options interpreterOptions = new Interpreter.Options(); +interpreterOptions.setUseXNNPACK(true); +Interpreter interpreter = new Interpreter(model, interpreterOptions); +``` + +### Enable XNNPACK via Swift/Objective-C API on iOS (recommended on iOS) + +Pre-built [nightly TensorFlow Lite CocoaPods](https://www.tensorflow.org/lite/guide/ios#specifying_versions) +include XNNPACK, but do not enable it by default. Swift developers can use +`InterpreterOptions` object to enable XNNPACK: + +```swift +var options = InterpreterOptions() +options.isXNNPackEnabled = true +var interpreter = try Interpreter(modelPath: "model/path", options: options) +``` + +Objective-C developers can enable XNNPACK via a new property in the +`TFLInterpreterOptions` class: + +```objc +TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init]; +options.useXNNPACK = YES; +NSError *error; +TFLInterpreter *interpreter = + [[TFLInterpreter alloc] initWithModelPath:@"model/path" + options:options + error:&error]; +``` + +### Enable XNNPACK via Bazel build flags (recommended on desktop) When building TensorFlow Lite with Bazel, add `--define tflite_with_xnnpack=true`, and the TensorFlow Lite interpreter will From 49199ce36070d75dafb24b063f3912958f620f00 Mon Sep 17 00:00:00 2001 From: Jaesung Chung Date: Tue, 21 Jul 2020 19:36:37 -0700 Subject: [PATCH 1010/2522] Fix broken tflite support project's links in the guide documents. PiperOrigin-RevId: 322492499 Change-Id: Id8f760a195c271aa5c2dca99ea075d517deec96a --- tensorflow/lite/g3doc/convert/metadata.md | 24 +++++++++++------------ tensorflow/lite/g3doc/guide/android.md | 4 ++-- tensorflow/lite/g3doc/guide/codegen.md | 2 +- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tensorflow/lite/g3doc/convert/metadata.md b/tensorflow/lite/g3doc/convert/metadata.md index 6cc3e4aad84..48345d2756a 100644 --- a/tensorflow/lite/g3doc/convert/metadata.md +++ b/tensorflow/lite/g3doc/convert/metadata.md @@ -34,17 +34,17 @@ TensorFlow Lite metadata tooling supports both Python 2 and Python 3. ## Adding metadata There are three parts to the model metadata in the -[schema](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/support/metadata/metadata_schema.fbs): +[schema](https://github.com/tensorflow/tflite-support/blob/master/tensorflow_lite_support/metadata/metadata_schema.fbs): 1. **Model information** - Overall description of the model as well as items such as licence terms. See - [ModelMetadata](https://github.com/tensorflow/tensorflow/blob/268853ee81edab09e07f455cc918f7ef9a421485/tensorflow/lite/experimental/support/metadata/metadata_schema.fbs#L464). + [ModelMetadata](https://github.com/tensorflow/tflite-support/blob/4cd0551658b6e26030e0ba7fc4d3127152e0d4ae/tensorflow_lite_support/metadata/metadata_schema.fbs#L640). 2. **Input information** - Description of the inputs and pre-processing required such as normalization. See - [SubGraphMetadata.input_tensor_metadata](https://github.com/tensorflow/tensorflow/blob/268853ee81edab09e07f455cc918f7ef9a421485/tensorflow/lite/experimental/support/metadata/metadata_schema.fbs#L452). + [SubGraphMetadata.input_tensor_metadata](https://github.com/tensorflow/tflite-support/blob/4cd0551658b6e26030e0ba7fc4d3127152e0d4ae/tensorflow_lite_support/metadata/metadata_schema.fbs#L590). 3. **Output information** - Description of the output and post-processing required such as mapping to labels. See - [SubGraphMetadata.output_tensor_metadata](https://github.com/tensorflow/tensorflow/blob/268853ee81edab09e07f455cc918f7ef9a421485/tensorflow/lite/experimental/support/metadata/metadata_schema.fbs#L458). + [SubGraphMetadata.output_tensor_metadata](https://github.com/tensorflow/tflite-support/blob/4cd0551658b6e26030e0ba7fc4d3127152e0d4ae/tensorflow_lite_support/metadata/metadata_schema.fbs#L599). Since TensorFlow Lite only supports single subgraph at this point, the [TensorFlow Lite code generator](../guide/codegen.md#generate-code-with-tensorflow-lite-android-code-generator) @@ -65,7 +65,7 @@ Lite metadata: * Feature - Numbers which are unsigned integers or float32. * Image - Metadata currently supports RGB and greyscale images. * Bounding box - Rectangular shape bounding boxes. The schema supports - [a variety of numbering schemes](https://github.com/tensorflow/tensorflow/blob/268853ee81edab09e07f455cc918f7ef9a421485/tensorflow/lite/experimental/support/metadata/metadata_schema.fbs#L165). + [a variety of numbering schemes](https://github.com/tensorflow/tflite-support/blob/4cd0551658b6e26030e0ba7fc4d3127152e0d4ae/tensorflow_lite_support/metadata/metadata_schema.fbs#L214). ### Pack the associated files @@ -87,7 +87,7 @@ file type and where the file is attached to (i.e. `ModelMetadata`, `SubGraphMetadata`, and `TensorMetadata`), [the TensorFlow Lite Android code generator](../guide/codegen.md) may apply corresponding pre/post processing automatically to the object. See -[the \ section of each associate file type](https://github.com/tensorflow/tensorflow/blob/268853ee81edab09e07f455cc918f7ef9a421485/tensorflow/lite/experimental/support/metadata/metadata_schema.fbs#L37-L77) +[the \ section of each associate file type](https://github.com/tensorflow/tflite-support/blob/4cd0551658b6e26030e0ba7fc4d3127152e0d4ae/tensorflow_lite_support/metadata/metadata_schema.fbs#L77-L127) in the schema for more details. ### Normalization and quantization parameters @@ -351,7 +351,7 @@ with open(export_json_file, "w") as f: ## Metadata versioning The -[metadata schema](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/support/metadata/metadata_schema.fbs) +[metadata schema](https://github.com/tensorflow/tflite-support/blob/master/tensorflow_lite_support/metadata/metadata_schema.fbs) is versioned both by the Semantic versioning number, which tracks the changes of the schema file, and by the Flatbuffers file identification, which indicates the true version compatibility. @@ -359,11 +359,11 @@ true version compatibility. ### The Semantic versioning number The metadata schema is versioned by the -[Semantic versioning number](https://github.com/tensorflow/tensorflow/blob/72d30dfb8bc58be931604f853bd161a11b7c9fcc/tensorflow/lite/experimental/support/metadata/metadata_schema.fbs#L53), +[Semantic versioning number](https://github.com/tensorflow/tflite-support/blob/4cd0551658b6e26030e0ba7fc4d3127152e0d4ae/tensorflow_lite_support/metadata/metadata_schema.fbs#L53), such as MAJOR.MINOR.PATCH. It tracks schema changes according to the rules -[here](https://github.com/tensorflow/tensorflow/blob/72d30dfb8bc58be931604f853bd161a11b7c9fcc/tensorflow/lite/experimental/support/metadata/metadata_schema.fbs#L32-L44). +[here](https://github.com/tensorflow/tflite-support/blob/4cd0551658b6e26030e0ba7fc4d3127152e0d4ae/tensorflow_lite_support/metadata/metadata_schema.fbs#L32-L44). See the -[history of fields](https://github.com/tensorflow/tensorflow/blob/72d30dfb8bc58be931604f853bd161a11b7c9fcc/tensorflow/lite/experimental/support/metadata/metadata_schema.fbs#L63) +[history of fields](https://github.com/tensorflow/tflite-support/blob/4cd0551658b6e26030e0ba7fc4d3127152e0d4ae/tensorflow_lite_support/metadata/metadata_schema.fbs#L63) added after version `1.0.0`. ### The Flatbuffers file identification @@ -373,7 +373,7 @@ does not imply the true incompatibility. When bumping up the MAJOR number, it does not necessarily mean the backwards compatibility is broken. Therefore, we use the [Flatbuffers file identification](https://google.github.io/flatbuffers/md__schemas.html), -[file_identifiler](https://github.com/tensorflow/tensorflow/blob/72d30dfb8bc58be931604f853bd161a11b7c9fcc/tensorflow/lite/experimental/support/metadata/metadata_schema.fbs#L61), +[file_identifiler](https://github.com/tensorflow/tflite-support/blob/4cd0551658b6e26030e0ba7fc4d3127152e0d4ae/tensorflow_lite_support/metadata/metadata_schema.fbs#L61), to denote the true compatibility of the metadata schema. The file identifier is exactly 4 characters long. It is fixed to a certain metadata schema and not subject to change by users. If the backward compatibility of the metadata schema @@ -384,7 +384,7 @@ frequently than the metadata_version. ### The minimum necessary metadata parser version The -[minimum necessary metadata parser version](https://github.com/tensorflow/tensorflow/blob/72d30dfb8bc58be931604f853bd161a11b7c9fcc/tensorflow/lite/experimental/support/metadata/metadata_schema.fbs#L565) +[minimum necessary metadata parser version](https://github.com/tensorflow/tflite-support/blob/4cd0551658b6e26030e0ba7fc4d3127152e0d4ae/tensorflow_lite_support/metadata/metadata_schema.fbs#L681) is the minimum version of metadata parser (the Flatbuffers generated code) that can read the metadata Flatbuffers in full. The version is effectively the largest version number among the versions of all the fields populated and the diff --git a/tensorflow/lite/g3doc/guide/android.md b/tensorflow/lite/g3doc/guide/android.md index 2c148ecbe7d..41b4c213504 100644 --- a/tensorflow/lite/g3doc/guide/android.md +++ b/tensorflow/lite/g3doc/guide/android.md @@ -16,7 +16,7 @@ to continuously classify whatever it sees from the device's rear-facing camera. The application can run either on device or emulator. Inference is performed using the TensorFlow Lite Java API and the -[TensorFlow Lite Android Support Library](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/support/java/README.md). +[TensorFlow Lite Android Support Library](https://github.com/tensorflow/tflite-support/blob/master/tensorflow_lite_support/java/README.md). The demo app classifies frames in real-time, displaying the top most probable classifications. It allows the user to choose between a floating point or [quantized](https://www.tensorflow.org/lite/performance/post_training_quantization) @@ -53,7 +53,7 @@ arrays. It also provides pre- and post-processing units that perform tasks such as image resizing and cropping. To get started, follow the instructions in the -[TensorFlow Lite Android Support Library README.md](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/support/java/README.md). +[TensorFlow Lite Android Support Library README.md](https://github.com/tensorflow/tflite-support/blob/master/tensorflow_lite_support/java/README.md). ### Use the TensorFlow Lite AAR from JCenter diff --git a/tensorflow/lite/g3doc/guide/codegen.md b/tensorflow/lite/g3doc/guide/codegen.md index 39abc5d7679..b74bfc5ed40 100644 --- a/tensorflow/lite/g3doc/guide/codegen.md +++ b/tensorflow/lite/g3doc/guide/codegen.md @@ -27,7 +27,7 @@ Lite model with typed objects such as `Bitmap` and `Rect`. The usefulness of the code generator depend on the completeness of the TensorFlow Lite model's metadata entry. Refer to the `` section under relevant fields in -[metadata_schema.fbs](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/support/metadata/metadata_schema.fbs), +[metadata_schema.fbs](https://github.com/tensorflow/tflite-support/blob/master/tensorflow_lite_support/metadata/metadata_schema.fbs), to see how the codegen tool parses each field. ### Generate Wrapper Code From 6a9a8f301f1f3110027090ce3aa0e39b3fe08bc0 Mon Sep 17 00:00:00 2001 From: Marat Dukhan Date: Tue, 21 Jul 2020 19:58:23 -0700 Subject: [PATCH 1011/2522] Document RESHAPE support in XNNPACK delegate PiperOrigin-RevId: 322494306 Change-Id: Ic209a43baae4193934d5f9b3089abd095c0d7686 --- tensorflow/lite/delegates/xnnpack/README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/lite/delegates/xnnpack/README.md b/tensorflow/lite/delegates/xnnpack/README.md index b5c090d56ea..47ed79033cf 100644 --- a/tensorflow/lite/delegates/xnnpack/README.md +++ b/tensorflow/lite/delegates/xnnpack/README.md @@ -253,6 +253,13 @@ Below is the list of current operators and limitations: * Inputs and outputs must be in 32-bit floating-point format. +### `RESHAPE` + +* The first input and the output must be in 32-bit floating-point format. +* The second input (the input with the new shape specification) must be either + static (use `kTfLiteMmapRo` allocation type), or absent (with the new shape + specified via `ReshapeOptions` table). + ### `ROUND` * Inputs and outputs must be in 32-bit floating-point format. From c0b209175d87e367c92f256143ff4d1182a86e66 Mon Sep 17 00:00:00 2001 From: Smit Hinsu Date: Tue, 21 Jul 2020 20:00:19 -0700 Subject: [PATCH 1012/2522] Create entry barrier in parallel_execute_to_islands pass only if it isn't no-op Filter out island inputs that are block arguments and if after that the island doesn't have any inputs and control dependency, do not create the entry barrier island op. PiperOrigin-RevId: 322494456 Change-Id: Ibcb07c7999ee91396d8f459be61627189d5e2b1f --- .../tests/parallel_execute_to_islands.mlir | 40 +++++++++++++++++-- .../transforms/parallel_execute_to_islands.cc | 24 +++++++---- 2 files changed, 53 insertions(+), 11 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/parallel_execute_to_islands.mlir b/tensorflow/compiler/mlir/tensorflow/tests/parallel_execute_to_islands.mlir index 31ca7b28fe7..99e029d52c2 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/parallel_execute_to_islands.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/parallel_execute_to_islands.mlir @@ -17,11 +17,9 @@ func @check_regions_to_islands() { return } -// CHECK: %[[ISLAND_INPUT_CTL:[a-z_0-9]*]] = tf_executor.island { -// CHECK-NEXT: tf_executor.yield -// CHECK: %[[ISLAND_1_CTL:[a-z_0-9]*]] = tf_executor.island(%[[ISLAND_INPUT_CTL]]) { +// CHECK: %[[ISLAND_1_CTL:[a-z_0-9]*]] = tf_executor.island { // CHECK: tf_executor.yield -// CHECK: %[[ISLAND_2_CTL:[a-z_0-9]*]] = tf_executor.island(%[[ISLAND_INPUT_CTL]]) { +// CHECK: %[[ISLAND_2_CTL:[a-z_0-9]*]] = tf_executor.island { // CHECK: tf_executor.yield // CHECK: %{{.*}} = tf_executor.island(%[[ISLAND_1_CTL]], %[[ISLAND_2_CTL]]) { // CHECK-NEXT: tf_executor.yield @@ -192,3 +190,37 @@ func @check_output_barrier_correctly_forwards_outputs(%arg0 : tensor) -> ten // CHECK: tf_executor.yield %[[OP_C_OUTPUT]] : tensor // CHECK: %[[OUTPUT_SINK_OUTPUT:[a-z_0-9]*]]:2, %[[OUTPUT_SINK_CTL:[a-z_0-9]*]] = tf_executor.island { // CHECK-NEXT: tf_executor.yield %[[ISLAND_1_OUTPUT]], %[[ISLAND_2_OUTPUT]] : tensor, tensor + +// CHECK-LABEL: func @check_parallel_execute_using_args +// CHECK-SAME: (%[[ARG_0:[a-z0-9]*]]: tensor) +func @check_parallel_execute_using_args(%arg0 : tensor) { + tf_executor.graph { + %1:2 = tf_executor.island { + %2 = "tf.opA"(%arg0) : (tensor) -> tensor + tf_executor.yield %2 : tensor + } + %2:2 = tf_executor.island { + %3 = "tf.opB"(%arg0) : (tensor) -> tensor + tf_executor.yield %3 : tensor + } + tf_executor.island() { + "tf_device.parallel_execute"() ({ + %4 = "tf.opC"(%arg0, %1#0) : (tensor, tensor) -> tensor + tf_device.return %4 : tensor + }, + { + %5 = "tf.opD"(%arg0, %2#0) : (tensor, tensor) -> tensor + tf_device.return %5 : tensor + }) {} : () -> (tensor, tensor) + tf_executor.yield + } + tf_executor.fetch + } + return +} + +// Verify that args are directly accessed in newly created island without alias +// through entry barrier. + +// CHECK: "tf.opC"(%[[ARG_0]] +// CHECK: "tf.opD"(%[[ARG_0]] diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/parallel_execute_to_islands.cc b/tensorflow/compiler/mlir/tensorflow/transforms/parallel_execute_to_islands.cc index 44205063266..1332c8b6e59 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/parallel_execute_to_islands.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/parallel_execute_to_islands.cc @@ -71,6 +71,7 @@ limitations under the License. #include "llvm/ADT/SmallVector.h" #include "mlir/IR/Block.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/Value.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Support/LLVM.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project @@ -111,8 +112,8 @@ LogicalResult ExpandParallelExecuteToIslands( // executed. llvm::SetVector region_inputs; getUsedValuesDefinedAbove(*execute_region, region_inputs); - llvm::SmallVector execution_control_inputs; - if (region_inputs.empty()) + llvm::SmallVector execution_control_inputs; + if (region_inputs.empty() && input_sink_island) execution_control_inputs.emplace_back(input_sink_island.control()); // Collect result types and operands. @@ -147,13 +148,22 @@ tf_executor::IslandOp CreateInputBarrierIsland( OpBuilder* builder, tf_executor::IslandOp island_op) { builder->setInsertionPoint(island_op); - llvm::SetVector island_inputs; - getUsedValuesDefinedAbove(island_op.body(), island_inputs); + llvm::SetVector all_inputs; + getUsedValuesDefinedAbove(island_op.body(), all_inputs); + // Filter out values that are arguments and doesn't need to be part of the + // entry barrier. + llvm::SmallVector island_inputs; llvm::SmallVector input_types; - input_types.reserve(island_inputs.size()); - for (const auto& input_val : island_inputs) - input_types.emplace_back(input_val.getType()); + island_inputs.reserve(all_inputs.size()); + input_types.reserve(all_inputs.size()); + for (Value val : all_inputs) { + if (!val.isa()) { + island_inputs.push_back(val); + input_types.push_back(val.getType()); + } + } + if (island_inputs.empty() && island_op.controlInputs().empty()) return {}; // Create new island for that forwards all inputs. auto control_type = tf_executor::ControlType::get(island_op.getContext()); From e10e5f540c3fd2b66078aa8fc05cba2147faeb37 Mon Sep 17 00:00:00 2001 From: Ayush Dubey Date: Tue, 21 Jul 2020 20:22:36 -0700 Subject: [PATCH 1013/2522] Fix Kokoro breakage by using initializer list. PiperOrigin-RevId: 322496760 Change-Id: I1ed2845e4a6b6e92323e0122c6a919fa9a6cb962 --- .../core/common_runtime/collective_param_resolver_local_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/common_runtime/collective_param_resolver_local_test.cc b/tensorflow/core/common_runtime/collective_param_resolver_local_test.cc index a998b5b1e48..f23f03dc406 100644 --- a/tensorflow/core/common_runtime/collective_param_resolver_local_test.cc +++ b/tensorflow/core/common_runtime/collective_param_resolver_local_test.cc @@ -441,7 +441,7 @@ TEST_F(CollectiveParamResolverLocalTest, AbortNormalCompleteParamsAsync) { // code to explicitly test every possible scenarios, so we run the test for // many times to have a better chance to cover different cases. CancellationManager cancel_mgr; - std::atomic num_ok = 0; + std::atomic num_ok{0}; for (int cnt = 0; cnt < 100; ++cnt) { // Launching threads that keep doing CompleteInstanceLocal. BlockingCounter done(NUM_DEVS); From cf9bc158f414d2d0c1fb4da91eac91f92c587ff8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 21 Jul 2020 20:36:25 -0700 Subject: [PATCH 1014/2522] Fix data race in tpu_platform_interface PiperOrigin-RevId: 322498158 Change-Id: I56f5e8dbd0aedb7c37d6ef7eb943ec86fef03120 --- tensorflow/stream_executor/tpu/BUILD | 1 + tensorflow/stream_executor/tpu/tpu_platform_interface.cc | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/tensorflow/stream_executor/tpu/BUILD b/tensorflow/stream_executor/tpu/BUILD index a8557aada48..931cfde0cc2 100644 --- a/tensorflow/stream_executor/tpu/BUILD +++ b/tensorflow/stream_executor/tpu/BUILD @@ -275,6 +275,7 @@ cc_library( hdrs = ["tpu_platform_interface.h"], visibility = ["//visibility:public"], deps = [ + "//tensorflow/core/platform:mutex", "//tensorflow/core/platform:types", "//tensorflow/stream_executor:multi_platform_manager", "//tensorflow/stream_executor:stream_executor_headers", diff --git a/tensorflow/stream_executor/tpu/tpu_platform_interface.cc b/tensorflow/stream_executor/tpu/tpu_platform_interface.cc index fa9062c217c..28430392117 100644 --- a/tensorflow/stream_executor/tpu/tpu_platform_interface.cc +++ b/tensorflow/stream_executor/tpu/tpu_platform_interface.cc @@ -17,6 +17,7 @@ limitations under the License. #include +#include "tensorflow/core/platform/mutex.h" #include "tensorflow/stream_executor/multi_platform_manager.h" namespace tensorflow { @@ -72,16 +73,19 @@ TpuPlatformInterface* TpuPlatformInterface::GetRegisteredPlatform() { /* static */ TpuPlatformInterface* TpuPlatformInterface::GetRegisteredPlatform( bool initialize_platform) { + static auto* mu = new mutex; static bool requested_initialize_platform = initialize_platform; static TpuPlatformInterface* tpu_registered_platform = GetRegisteredPlatformStatic(initialize_platform); + mutex_lock lock(*mu); if (!requested_initialize_platform && initialize_platform) { // If the first time this function is called, we did not request // initializing the platform, but the next caller wants the platform // initialized, we will call GetRegisteredPlatformStatic again to initialize // the platform. tpu_registered_platform = GetRegisteredPlatformStatic(initialize_platform); + requested_initialize_platform = true; } return tpu_registered_platform; From 07fcaaa00e8bc01396007379566f4e590884b0e4 Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Wed, 22 Jul 2020 11:57:41 +0800 Subject: [PATCH 1015/2522] richen error messages --- tensorflow/core/lib/io/record_reader.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/lib/io/record_reader.cc b/tensorflow/core/lib/io/record_reader.cc index 3492d851a77..ba01e4b8b9c 100644 --- a/tensorflow/core/lib/io/record_reader.cc +++ b/tensorflow/core/lib/io/record_reader.cc @@ -198,7 +198,8 @@ Status RecordReader::ReadRecord(uint64* offset, tstring* record) { if (!s.ok()) { last_read_failed_ = true; if (errors::IsOutOfRange(s)) { - s = errors::DataLoss("truncated record at ", *offset); + s = errors::DataLoss("truncated record at ", *offset, + "' failed with ", s.error_message()); } return s; } @@ -228,7 +229,8 @@ Status RecordReader::SkipRecords(uint64* offset, int num_to_skip, if (!s.ok()) { last_read_failed_ = true; if (errors::IsOutOfRange(s)) { - s = errors::DataLoss("truncated record at ", *offset); + s = errors::DataLoss("truncated record at ", *offset, + "' failed with ", s.error_message()); } return s; } From 04c82b598b6d5a14a4c2beb230c8d7ddd0047802 Mon Sep 17 00:00:00 2001 From: Chen Chen Date: Tue, 21 Jul 2020 21:17:20 -0700 Subject: [PATCH 1016/2522] Fix input_lib_type_spec_test when TF2_BEHAVIOR=1. PiperOrigin-RevId: 322502827 Change-Id: I75d4d7086cbbb33fc12cb04d25e990f30b77897c --- tensorflow/python/distribute/BUILD | 1 + .../distribute/input_lib_type_spec_test.py | 25 +++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index 63c4d27956d..11cb725ef57 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -984,6 +984,7 @@ distribute_py_test( ":multi_worker_test_base", ":reduce_util", ":strategy_combinations", + ":tpu_strategy", ":values", "//tensorflow/python:control_flow_ops", "//tensorflow/python:errors", diff --git a/tensorflow/python/distribute/input_lib_type_spec_test.py b/tensorflow/python/distribute/input_lib_type_spec_test.py index 7f5b0e09f2c..691b29202e1 100644 --- a/tensorflow/python/distribute/input_lib_type_spec_test.py +++ b/tensorflow/python/distribute/input_lib_type_spec_test.py @@ -27,6 +27,7 @@ from tensorflow.python.data.ops import dataset_ops from tensorflow.python.distribute import combinations from tensorflow.python.distribute import distribute_lib from tensorflow.python.distribute import strategy_combinations +from tensorflow.python.distribute import tpu_strategy from tensorflow.python.distribute import values from tensorflow.python.eager import def_function from tensorflow.python.eager import test @@ -340,7 +341,17 @@ class RaggedTensorDistributedIteratorTest(test.TestCase, distribution.extended.experimental_enable_get_next_as_optional = ( enable_get_next_as_optional) - dist_dataset = distribution.experimental_distribute_dataset(dataset) + if isinstance(distribution, + (tpu_strategy.TPUStrategyV2, tpu_strategy.TPUStrategy)): + # TPUStrategy does not support distributed datasets with device prefetch + # when using sparse or ragged tensors. + options = distribute_lib.InputOptions( + experimental_prefetch_to_device=False) + else: + options = None + + dist_dataset = distribution.experimental_distribute_dataset( + dataset, options) with distribution.scope(): iterator = iter(dist_dataset) _check_type_spec_structure(iterator) @@ -395,7 +406,17 @@ class RaggedTensorDistributedIteratorTest(test.TestCase, distribution.extended.experimental_enable_get_next_as_optional = ( enable_get_next_as_optional) - dist_dataset = distribution.experimental_distribute_dataset(dataset) + if isinstance(distribution, + (tpu_strategy.TPUStrategyV2, tpu_strategy.TPUStrategy)): + # TPUStrategy does not support distributed datasets with device prefetch + # when using sparse or ragged tensors. + options = distribute_lib.InputOptions( + experimental_prefetch_to_device=False) + else: + options = None + + dist_dataset = distribution.experimental_distribute_dataset( + dataset, options) with distribution.scope(): for _ in range(3): iterator = iter(dist_dataset) From 5e3cabe3616205dca0e18e3ece49039f422df287 Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Tue, 21 Jul 2020 21:27:24 -0700 Subject: [PATCH 1017/2522] add json serialization for dimension value. PiperOrigin-RevId: 322503787 Change-Id: Ia96822adff98d2317e0c5c382d8f250b48339af8 --- tensorflow/python/keras/saving/saved_model/json_utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/python/keras/saving/saved_model/json_utils.py b/tensorflow/python/keras/saving/saved_model/json_utils.py index cf7e2300852..4e4b671697a 100644 --- a/tensorflow/python/keras/saving/saved_model/json_utils.py +++ b/tensorflow/python/keras/saving/saved_model/json_utils.py @@ -110,6 +110,9 @@ def get_json_type(obj): if type(obj).__name__ == type.__name__: return obj.__name__ + if isinstance(obj, tensor_shape.Dimension): + return obj.value + if isinstance(obj, tensor_shape.TensorShape): return obj.as_list() From f4c8a6ff504dfc1a5d6f327fde8e3095dc307b7f Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Wed, 22 Jul 2020 12:42:38 +0700 Subject: [PATCH 1018/2522] Add DeleteFile --- .../filesystem/plugins/s3/s3_filesystem.cc | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index d8939db568e..859805f5db7 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -24,6 +24,7 @@ limitations under the License. #include #include #include +#include #include #include #include @@ -961,6 +962,24 @@ void CopyFile(const TF_Filesystem* filesystem, const char* src, const char* dst, s3_file, status); } +void DeleteFile(const TF_Filesystem* filesystem, const char* path, + TF_Status* status) { + Aws::String bucket, object; + ParseS3Path(path, false, &bucket, &object, status); + if (TF_GetCode(status) != TF_OK) return; + auto s3_file = static_cast(filesystem->plugin_filesystem); + GetS3Client(s3_file); + + Aws::S3::Model::DeleteObjectRequest delete_object_request; + delete_object_request.WithBucket(bucket).WithKey(object); + auto delete_object_outcome = + s3_file->s3_client->DeleteObject(delete_object_request); + if (!delete_object_outcome.IsSuccess()) + TF_SetStatusFromAWSError(delete_object_outcome.GetError(), status); + else + TF_SetStatus(status, TF_OK, ""); +} + // TODO(vnvo2409): Implement later } // namespace tf_s3_filesystem From 4d80d85812a6f09497f045d589164afb42d2f6f2 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Wed, 22 Jul 2020 13:03:01 +0700 Subject: [PATCH 1019/2522] Add CreateDir --- .../filesystem/plugins/s3/s3_filesystem.cc | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 859805f5db7..854aff7bcf0 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -980,6 +980,47 @@ void DeleteFile(const TF_Filesystem* filesystem, const char* path, TF_SetStatus(status, TF_OK, ""); } +void CreateDir(const TF_Filesystem* filesystem, const char* path, + TF_Status* status) { + Aws::String bucket, object; + ParseS3Path(path, true, &bucket, &object, status); + if (TF_GetCode(status) != TF_OK) return; + auto s3_file = static_cast(filesystem->plugin_filesystem); + GetS3Client(s3_file); + + if (object.empty()) { + Aws::S3::Model::HeadBucketRequest head_bucket_request; + head_bucket_request.WithBucket(bucket); + auto head_bucket_outcome = + s3_file->s3_client->HeadBucket(head_bucket_request); + if (!head_bucket_outcome.IsSuccess()) + TF_SetStatusFromAWSError(head_bucket_outcome.GetError(), status); + else + TF_SetStatus(status, TF_OK, ""); + return; + } + + Aws::String dir_path = path; + if (dir_path.back() != '/') dir_path.push_back('/'); + + PathExists(filesystem, dir_path.c_str(), status); + if (TF_GetCode(status) == TF_OK) { + std::unique_ptr file( + new TF_WritableFile, [](TF_WritableFile* file) { + if (file != nullptr) { + if (file->plugin_file != nullptr) tf_writable_file::Cleanup(file); + delete file; + } + }); + file->plugin_file = nullptr; + NewWritableFile(filesystem, dir_path.c_str(), file.get(), status); + if (TF_GetCode(status) != TF_OK) return; + tf_writable_file::Close(file.get(), status); + if (TF_GetCode(status) != TF_OK) return; + } + TF_SetStatus(status, TF_OK, ""); +} + // TODO(vnvo2409): Implement later } // namespace tf_s3_filesystem From 310ae4db8511ead68f2312554b80ca1f836bf670 Mon Sep 17 00:00:00 2001 From: Brian Zhao Date: Tue, 21 Jul 2020 23:27:16 -0700 Subject: [PATCH 1020/2522] Disabling failing gpu test. PiperOrigin-RevId: 322516153 Change-Id: Iddfabd8c51635e6f5041f764f6800dbfa56e843d --- tensorflow/python/kernel_tests/BUILD | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 44428988273..53294fb427f 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -3520,6 +3520,9 @@ cuda_py_test( size = "medium", srcs = ["tensordot_op_test.py"], shard_count = 20, + tags = [ + "no_gpu", # TODO(b/161856380): Re-enable when fix lands. + ], deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", From 4e2300bad95ed70dbc575957acf8e3a1616b770f Mon Sep 17 00:00:00 2001 From: Ce Zheng Date: Tue, 21 Jul 2020 23:37:38 -0700 Subject: [PATCH 1021/2522] Split reusable logic from BatchResource to a base class BatchResourceBase, and make the FunctionLibraryRuntime related code overridable. PiperOrigin-RevId: 322517008 Change-Id: Ia4d3db903c4f0e7f81ce1571f9aea6bd6c54a294 --- tensorflow/core/kernels/BUILD | 8 +- tensorflow/core/kernels/batch_kernels.cc | 961 +----------------- tensorflow/core/kernels/batching_util/BUILD | 29 + .../batching_util/batch_resource_base.cc | 638 ++++++++++++ .../batching_util/batch_resource_base.h | 197 ++++ .../kernels/batching_util/concat_split_util.h | 247 +++++ 6 files changed, 1148 insertions(+), 932 deletions(-) create mode 100644 tensorflow/core/kernels/batching_util/batch_resource_base.cc create mode 100644 tensorflow/core/kernels/batching_util/batch_resource_base.h create mode 100644 tensorflow/core/kernels/batching_util/concat_split_util.h diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index c395f7d3e73..a58f598d322 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -655,17 +655,13 @@ cc_library( name = "batch_kernels", srcs = ["batch_kernels.cc"], deps = [ - ":concat_lib_hdrs", ":ops_util_hdrs", - ":split_lib_hdrs", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core/kernels/batching_util:batch_resource_base", + "//tensorflow/core/kernels/batching_util:concat_split_util", "//tensorflow/core/kernels/batching_util:periodic_function_dynamic", - "//tensorflow/core/kernels/batching_util:shared_batch_scheduler_hdrs", - "//tensorflow/core/kernels/batching_util:threadsafe_status", - "//tensorflow/core/util:incremental_barrier", - "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", ], alwayslink = 1, diff --git a/tensorflow/core/kernels/batch_kernels.cc b/tensorflow/core/kernels/batch_kernels.cc index 0dacaf30443..1f430039b40 100644 --- a/tensorflow/core/kernels/batch_kernels.cc +++ b/tensorflow/core/kernels/batch_kernels.cc @@ -13,794 +13,68 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "absl/container/flat_hash_map.h" #include "absl/strings/str_cat.h" #include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_util.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/batching_util/batch_resource_base.h" +#include "tensorflow/core/kernels/batching_util/concat_split_util.h" #include "tensorflow/core/kernels/batching_util/periodic_function.h" -#include "tensorflow/core/kernels/batching_util/shared_batch_scheduler.h" -#include "tensorflow/core/kernels/batching_util/threadsafe_status.h" -#include "tensorflow/core/kernels/concat_lib.h" #include "tensorflow/core/kernels/ops_util.h" -#include "tensorflow/core/kernels/split_lib.h" -#include "tensorflow/core/lib/gtl/cleanup.h" -#include "tensorflow/core/lib/monitoring/percentile_sampler.h" #include "tensorflow/core/lib/random/random.h" -#include "tensorflow/core/platform/context.h" #include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" -#include "tensorflow/core/util/incremental_barrier.h" -#include "tensorflow/core/util/ptr_util.h" namespace tensorflow { -namespace { - -void RecordPaddingSize(int32 padding_size, const string& model_name, - int32 execution_batch_size) { - static auto* cell = tensorflow::monitoring::PercentileSampler<2>::New( - {"/tensorflow/serving/batching/padding_size", - "Tracks the padding size distribution on batches by model_name (if " - "available).", - "model_name", "execution_batch_size"}, - /*percentiles=*/{25.0, 50.0, 75.0, 90.0, 95.0, 99.0}, - /*max_samples=*/1024, tensorflow::monitoring::UnitOfMeasure::kNumber); - cell->GetCell(model_name, absl::StrCat(execution_batch_size)) - ->Add(static_cast(padding_size)); -} - -void RecordInputBatchSize(int32 batch_size, const string& model_name) { - static auto* cell = tensorflow::monitoring::PercentileSampler<1>::New( - {"/tensorflow/serving/batching/input_batch_size", - "Tracks the batch size distribution on the inputs by model_name (if " - "available).", - "model_name"}, - /*percentiles=*/{25.0, 50.0, 75.0, 90.0, 95.0, 99.0}, - /*max_samples=*/1024, tensorflow::monitoring::UnitOfMeasure::kNumber); - cell->GetCell(model_name)->Add(static_cast(batch_size)); -} - -void RecordProcessedBatchSize(int32 batch_size, const string& model_name) { - static auto* cell = tensorflow::monitoring::PercentileSampler<1>::New( - {"/tensorflow/serving/batching/processed_batch_size", - "Tracks the batch size distribution on processing by model_name (if " - "available).", - "model_name"}, - /*percentiles=*/{25.0, 50.0, 75.0, 90.0, 95.0, 99.0}, - /*max_samples=*/1024, tensorflow::monitoring::UnitOfMeasure::kNumber); - cell->GetCell(model_name)->Add(static_cast(batch_size)); -} - -void RecordBatchDelayMs(int64 batch_delay_ms, const string& model_name) { - static auto* cell = monitoring::PercentileSampler<1>::New( - {"/tensorflow/serving/batching/batch_delay_ms", - "Tracks the batching delay for inputs by model_name (if " - "available).", - "model_name"}, - /*percentiles=*/{25.0, 50.0, 75.0, 90.0, 95.0, 99.0}, - /*max_samples=*/1024, monitoring::UnitOfMeasure::kTime); - cell->GetCell(model_name)->Add(static_cast(batch_delay_ms)); -} - -const string& GetModelName(OpKernelContext* ctx) { - static string* kModelNameUnset = new string("model_name_unset"); - if (!ctx->session_metadata()) return *kModelNameUnset; - if (ctx->session_metadata()->name().empty()) return *kModelNameUnset; - return ctx->session_metadata()->name(); -} - -} // namespace - -typedef Eigen::ThreadPoolDevice CPUDevice; -typedef Eigen::GpuDevice GPUDevice; -#ifdef TENSORFLOW_USE_SYCL -typedef Eigen::SyclDevice SYCLDevice; -#endif // TENSORFLOW_USE_SYCL - -// Concatenates 'inputs' into a single tensor along the zeroth dimension. -// Requires that all elements of 'inputs' have element type T. Writes to -// 'output' using 'context' for the allocation to ensure proper device -// placement. -template -Status Concat(OpKernelContext* context, const gtl::ArraySlice inputs, - Tensor* output) { - const int input_dims = inputs[0].dims(); - const TensorShape& input_shape = inputs[0].shape(); - - // Note that we reduce the concat of k-dimensional tensors into a two - // dimensional concat. Assuming the dimensions of any input tensor are - // {y0, y1,...,ym-1}, we flatten it to {1, y}, where y = Prod_i(yi). - std::vector::ConstMatrix>> inputs_flat; - inputs_flat.reserve(inputs.size()); - int64 output_dim0 = 0; - for (size_t i = 0; i < inputs.size(); ++i) { - const Tensor& input = inputs[i]; - if (input.dims() != input_dims) { - return errors::InvalidArgument( - "Ranks of all input tensors should match: shape[0] = ", - input_shape.DebugString(), " vs. shape[", i, - "] = ", input.shape().DebugString()); - } - for (int j = 1; j < input_dims; ++j) { - if (input.dim_size(j) != input_shape.dim_size(j)) { - return errors::InvalidArgument( - "Dimensions of inputs should match: shape[0] = ", - input_shape.DebugString(), " vs. shape[", i, - "] = ", input.shape().DebugString()); - } - } - if (input.NumElements() > 0) { - inputs_flat.emplace_back(new typename TTypes::ConstMatrix( - input.shaped({1, input.NumElements()}))); - } - output_dim0 += input.dim_size(0); - } - - TensorShape output_shape(input_shape); - output_shape.set_dim(0, output_dim0); - TF_RETURN_IF_ERROR( - context->allocate_temp(DataTypeToEnum::value, output_shape, output)); - if (output->NumElements() > 0) { - auto output_flat = output->shaped({1, output->NumElements()}); -#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ - (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) - if (std::is_same::value) { - ConcatGPU(context, inputs_flat, output, &output_flat); - return Status::OK(); - } -#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM - ConcatCPU(context->device(), inputs_flat, &output_flat); - } - - return Status::OK(); -} - -// Same as 'Concat' above, but handles Tensor dtype deduction automatically. -Status Concat(OpKernelContext* context, const gtl::ArraySlice inputs, - Tensor* output) { - const DataType type = inputs[0].dtype(); - Status concat_status; - switch (type) { -#define CASE(type) \ - case DataTypeToEnum::value: \ - concat_status = Concat(context, inputs, output); \ - break; - TF_CALL_ALL_TYPES(CASE); -#undef CASE - default: - concat_status = errors::InvalidArgument("Unsupported data type: ", type); - break; - } - return concat_status; -} - -// The Split*() functions split 'input' with element type T into 'sizes.size()' -// tensors along the zeroth dimension, with the ith split having zeroth- -// dimension size 'sizes[i]'. They allocate the output tensors using 'context', -// for proper device placement. - -// Handles special cases that are cheap. Sets 'done==true' iff it found an -// applicable special case and wrote to the outputs. Otherwise acts as a no-op. -template -Status SplitEasyCases(OpKernelContext* context, const Tensor& input, - const gtl::ArraySlice sizes, - std::vector* outputs, bool* done) { - *done = false; - - int64 total_size = 0; - for (const int64 size : sizes) { - total_size += size; - } - if (total_size > input.shape().dim_size(0)) { - return errors::InvalidArgument( - "Sum of split sizes must not exceed dim0-size of input tensor"); - } - - // Special case 0: trivial 1-way split. - if (sizes.size() == 1 && sizes.at(0) == input.shape().dim_size(0)) { - outputs->push_back(input); - *done = true; - return Status::OK(); - } - - // Special case 1: input is aligned. - if (IsInnerDimsSizeAligned(input.shape())) { - int64 position = 0; - for (const int64 size : sizes) { - outputs->emplace_back(input.Slice(position, position + size)); - position += size; - } - *done = true; - return Status::OK(); - } - - return Status::OK(); -} - -// Handles the general case, on CPU. -template -Status SplitCPU(OpKernelContext* context, const Tensor& input, - const gtl::ArraySlice sizes, - std::vector* outputs) { - int64 suffix_dim_size = 1; - for (int i = 1; i < input.shape().dims(); ++i) { - suffix_dim_size *= input.shape().dim_size(i); - } - auto input_reshaped = - input.shaped({input.shape().dim_size(0), suffix_dim_size}); - - int64 position = 0; - for (const int64 size : sizes) { - TensorShape output_shape = input.shape(); - output_shape.set_dim(0, size); - Tensor output; - TF_RETURN_IF_ERROR( - context->allocate_temp(input.dtype(), output_shape, &output)); - auto output_shaped = output.shaped({size, suffix_dim_size}); - - Eigen::DSizes slice_indices{position, 0}; - Eigen::DSizes slice_sizes{size, suffix_dim_size}; - functor::Split()(context->eigen_device(), - output_shaped, input_reshaped, - slice_indices, slice_sizes); - - outputs->emplace_back(output); - - position += size; - } - - return Status::OK(); -} - -#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ - (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) - -// Handles the general case, on GPU. -template -Status SplitGPU(OpKernelContext* context, const Tensor& input, - const gtl::ArraySlice& sizes, - std::vector* outputs) { - // TODO(olston, apassos): Implement this. - LOG(FATAL) << "Not yet implemented"; // Crash ok -} - -#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM - -// The outer function that dispatches to the various Split*() functions above. -template -Status Split(OpKernelContext* context, const Tensor& input, - const gtl::ArraySlice sizes, std::vector* outputs) { - bool easy_cases_done; - TF_RETURN_IF_ERROR( - SplitEasyCases(context, input, sizes, outputs, &easy_cases_done)); - if (easy_cases_done) { - return Status::OK(); - } - -#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ - (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) -// TODO(olston, apassos): Handle non-CPU cases. -// return SplitGPU(context, input, sizes, outputs); -#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM - return SplitCPU(context, input, sizes, outputs); -} - -// Same as 'Split' above, but handles Tensor dtype automatically. -Status Split(OpKernelContext* context, const Tensor& input, - const gtl::ArraySlice sizes, std::vector* outputs) { - const DataType type = input.dtype(); - Status split_status; - switch (type) { -#define CASE(type) \ - case DataTypeToEnum::value: \ - split_status = Split(context, input, sizes, outputs); \ - break; - TF_CALL_ALL_TYPES(CASE); -#undef CASE - default: - split_status = errors::InvalidArgument("Unsupported data type: ", type); - break; - } - return split_status; -} +using ::tensorflow::concat_split_util::Concat; +using ::tensorflow::concat_split_util::Split; // A class encapsulating the state and logic for batching tensors. -class BatchResource : public ResourceBase { +class BatchResource : public serving::BatchResourceBase { public: - // Given a BatchTask (from one op invocation) with 'num_outputs'== M and - // splitted into N sub tasks, TensorMatrix is a N X M matrix. - // Namely, TensorMatrix[i][j] indicates the i-th split tensor of j-th output; - // concatenating tensors along the 2nd dimension gives a output tensor. - typedef std::vector> TensorMatrix; - static Status Create(int32 num_batch_threads, int32 max_batch_size, int32 batch_timeout_micros, int32 max_enqueued_batches, const std::vector& allowed_batch_sizes, FunctionLibraryRuntime::Handle fhandle, bool enable_large_batch_splitting, std::unique_ptr* resource) { - std::unique_ptr new_resource(new BatchResource); - - Batcher::Options batcher_options; + BatcherT::Options batcher_options; batcher_options.num_batch_threads = num_batch_threads; - TF_RETURN_IF_ERROR( - Batcher::Create(batcher_options, &new_resource->batcher_)); + std::shared_ptr batcher; + TF_RETURN_IF_ERROR(BatcherT::Create(batcher_options, &batcher)); - new_resource->batcher_queue_options_.max_batch_size = max_batch_size; - new_resource->batcher_queue_options_.max_enqueued_batches = - max_enqueued_batches; - new_resource->batcher_queue_options_.batch_timeout_micros = - batch_timeout_micros; - // Support for splitting large batch is still in progress. - new_resource->batcher_queue_options_.enable_large_batch_splitting = - enable_large_batch_splitting; - new_resource->allowed_batch_sizes_ = allowed_batch_sizes; - if (enable_large_batch_splitting) { - new_resource->batcher_queue_options_.split_input_task_func = - [](std::unique_ptr* input_task, - int open_batch_remaining_slot, int max_batch_size, - std::vector>* output_tasks) -> Status { - return SplitInputTask(input_task, open_batch_remaining_slot, - max_batch_size, output_tasks); - }; - - if (allowed_batch_sizes.empty()) { - new_resource->batcher_queue_options_.max_execution_batch_size = - max_batch_size; - } else { - new_resource->batcher_queue_options_.max_execution_batch_size = - *allowed_batch_sizes.rbegin(); - } - } - - new_resource->fhandle_ = fhandle; - - *resource = std::move(new_resource); + resource->reset(new BatchResource( + fhandle, std::move(batcher), + GetBatcherQueueOptions(num_batch_threads, max_batch_size, + batch_timeout_micros, max_enqueued_batches, + allowed_batch_sizes, + enable_large_batch_splitting), + allowed_batch_sizes)); return Status::OK(); } string DebugString() const final { return "BatchResource"; } - // Ingests data from one invocation of the batch op. The data is enqueued to - // be combined with others into a batch, asynchronously. - Status RegisterInput(int64 guid, OpKernelContext* context, - const string& batcher_queue_name, - AsyncOpKernel::DoneCallback done_callback) { - auto batch_components = MakeUnique(); - batch_components->start_time = EnvTime::NowNanos(); - batch_components->guid = guid; - batch_components->propagated_context = Context(ContextKind::kThread); - OpInputList tensors; - TF_RETURN_IF_ERROR(context->input_list("in_tensors", &tensors)); - batch_components->inputs.reserve(tensors.size()); - for (const Tensor& tensor : tensors) { - if (tensor.shape().dims() == 0) { - return errors::InvalidArgument( - "Batching input tensors must have at least one dimension"); - } - if (tensors.size() >= 2 && - tensor.shape().dim_size(0) != tensors[0].shape().dim_size(0)) { - return errors::InvalidArgument( - "Batching input tensors supplied in a given op invocation must " - "have equal 0th-dimension size"); - } - batch_components->inputs.push_back(tensor); - } - RecordInputBatchSize(tensors[0].shape().dim_size(0), GetModelName(context)); - OpInputList captured_tensors; - const auto captured_status = - context->input_list("captured_tensors", &captured_tensors); - if (captured_status.ok()) { - batch_components->captured_inputs.reserve(captured_tensors.size()); - for (const Tensor& captured_tensor : captured_tensors) { - batch_components->captured_inputs.push_back(captured_tensor); - } - } - batch_components->context = context; - batch_components->done_callback = std::move(done_callback); - batch_components->split_index = 0; - batch_components->output = std::make_shared(); - batch_components->status = std::make_shared(); - - BatcherQueue* batcher_queue; - TF_RETURN_IF_ERROR( - LookupOrCreateBatcherQueue(batcher_queue_name, &batcher_queue)); - return batcher_queue->Schedule(&batch_components); - } - private: - BatchResource() = default; + BatchResource(FunctionLibraryRuntime::Handle fhandle, + std::shared_ptr batcher, + const BatcherT::QueueOptions& batcher_queue_options, + std::vector allowed_batch_sizes) + : BatchResourceBase( + /*has_process_batch_function=*/fhandle != kInvalidHandle, + std::move(batcher), batcher_queue_options, + std::move(allowed_batch_sizes)), + fhandle_(fhandle) {} - // One task to be batched, corresponds to a `slice` of input from one batch-op - // invocation. - // - // Given input from one batch-op invocation, a `slice` of this input is: - // 1) Split each Tensor in `BatchTask::inputs` along the 0th dimension. - // 2) 'split_index' is calculated along the 0-th dimension. - // - // Note input from one batch-op invocation is valid and considered a - // specialized `slice`. - struct BatchTask : public serving::BatchTask { - // A unique ID to identify this invocation of Batch. - int64 guid; - - Context propagated_context; - - std::vector inputs; - std::vector captured_inputs; - OpKernelContext* context; - AsyncOpKernel::DoneCallback done_callback; - - // The index of this split, along the 0-th dimension of input from op - // invocation. - int split_index = 0; - - // Two-dimensional tensor matrix, ownership shared by: - // 1) each split of task (to fill one row in this matrix) - // and - // 2) callback that runs to merge output of individual splits for an op - // invocation, after all splits complete. - std::shared_ptr output; - - // 'status' records error (could be from any split) if at least one split - // returns error, OK otherwise. - // Ownership is shared by individual splits and callback. - std::shared_ptr status; - - bool is_partial = false; - - size_t size() const override { return inputs[0].shape().dim_size(0); } - - uint64 start_time; - }; - - using Batcher = serving::SharedBatchScheduler; - using BatcherQueue = serving::BatchScheduler; - using Batch = serving::Batch; - - // Validates that it's legal to combine the tasks in 'batch' into a batch. - // Assumes the batch is non-empty. - static Status ValidateBatch(const Batch& batch) { - for (int task_idx = 0; task_idx < batch.num_tasks(); ++task_idx) { - const BatchTask& task = batch.task(task_idx); - - if (task.inputs.size() != batch.task(0).inputs.size()) { - return errors::InvalidArgument( - "Batching inputs must have equal number of edges"); - } - } - - return Status::OK(); - } - - // Returns the smallest entry in 'allowed_batch_sizes_' that is greater than - // or equal to 'batch_size'. If 'allowed_batch_sizes_' is empty, simply - // returns 'batch_size'. - int RoundToLowestAllowedBatchSize(int batch_size) const { - if (allowed_batch_sizes_.empty()) { - return batch_size; - } - for (int allowed_size : allowed_batch_sizes_) { - if (allowed_size >= batch_size) { - return allowed_size; - } - } - LOG(ERROR) << "Maximum batch size greater than largest allowed size; " - "ignoring allowed sizes constraint"; - return batch_size; - } - - Status ConcatInputTensors(const Batch& batch, OpKernelContext* context, - std::vector* concatenated_tensors) const { - if (batch.num_tasks() == 0) { - return errors::InvalidArgument("Empty batch."); - } - - const int padded_batch_size = RoundToLowestAllowedBatchSize(batch.size()); - const int padding_amount = padded_batch_size - batch.size(); - RecordPaddingSize(padding_amount, GetModelName(context), padded_batch_size); - RecordProcessedBatchSize(padded_batch_size, GetModelName(context)); - - // All tasks should have the same number of input edges. - const int num_inputs = batch.task(0).inputs.size(); - concatenated_tensors->reserve(num_inputs); - - // Process each input one at a time (the typical case has just one). - for (int i = 0; i < num_inputs; ++i) { - // Concatenate the tasks ith input tensors into a big output tensor. - std::vector to_concatenate; - to_concatenate.reserve(batch.num_tasks()); - for (int task_idx = 0; task_idx < batch.num_tasks(); ++task_idx) { - to_concatenate.push_back(batch.task(task_idx).inputs.at(i)); - } - - // Add padding as needed. Use the first row of the first task's tensor as - // the data for padding. - if (padding_amount > 0) { - const Tensor& padding_source = batch.task(0).inputs.at(i); - Tensor padding; - if (padding_source.shape().dim_size(0) == 0) { - return errors::InvalidArgument( - "Cannot use an empty tensor with zero rows as padding when " - "batching. (Input ", - i, " got shape ", padding_source.shape().DebugString(), ".)"); - } - if (padding_source.shape().dim_size(0) == 1) { - padding = padding_source; - } else { - padding = padding_source.Slice(0, 1); - } - for (int i = 0; i < padding_amount; ++i) { - to_concatenate.push_back(padding); - } - } - - Tensor concatenated_tensor; - Status concat_status = - Concat(context, to_concatenate, &concatenated_tensor); - TF_RETURN_IF_ERROR(concat_status); - concatenated_tensors->push_back(concatenated_tensor); - } - return Status::OK(); - } - - // Split 'input' of 'input_task_ptr' along 0th dimension, into a list of - // 'output_tasks'. - // Task sizes are determined by - // 1) open_batch_remaining_slot - // 2) max_batch_size - // 3) size-of-input-task - // in a way that - // 1) Task sizes add up to `size-of-input-task`. - // 2) Task sizes from left to right are like - // [open_batch_remaining_slot, max_batch_size, max_batch_size, ..., - // `size-of-input-task` - `sum-of-previous-elements`]. - // - // REQUIRES: - // Caller should make sure size-of-input-task is greater than - // open_batch_remaining_slot. - static Status SplitInputTask( - std::unique_ptr* input_task_ptr, int open_batch_remaining_slot, - int max_batch_size, - std::vector>* output_tasks) { - BatchTask& input_task = *(*input_task_ptr); - const int64 input_task_size = input_task.size(); - - DCHECK_GT(input_task_size, open_batch_remaining_slot); - - std::shared_ptr shared_status = input_task.status; - - // `split_task_done_callback` runs only after all splitted tasks are - // complete. - std::function split_task_done_callback = - [done_callback = input_task.done_callback, output = input_task.output, - op_kernel_context = input_task.context, status = shared_status]() { - const int num_output = op_kernel_context->num_outputs(); - for (int i = 0; i < num_output; ++i) { - Tensor output_tensor; - - // Concat would memcpy each input tensor to one output tensor. - // In this context, Concat can be further optimized to get rid of - // some (probably all) memcpy when input tensors are slices of - // another copy. - // TODO(b/154140947): - // Add a custom implementation of Split and then optimize Concat. - std::vector to_concatenate; - to_concatenate.reserve(output->size()); - for (int j = 0; j < output->size(); ++j) { - to_concatenate.push_back(std::move((*output)[j][i])); - } - const auto concat_status = - Concat(op_kernel_context, to_concatenate, &output_tensor); - if (!concat_status.ok()) { - status->Update(concat_status); - } - - op_kernel_context->set_output(i, std::move(output_tensor)); - } - op_kernel_context->SetStatus(status->status()); - done_callback(); - }; - IncrementalBarrier barrier(split_task_done_callback); - - std::vector output_task_sizes; - - if (open_batch_remaining_slot > 0) { - output_task_sizes.push_back(open_batch_remaining_slot); - } - - for (int left_task_size = input_task_size - open_batch_remaining_slot; - left_task_size > 0; left_task_size -= max_batch_size) { - int next_task_size = std::min(left_task_size, max_batch_size); - output_task_sizes.push_back(next_task_size); - } - - const int output_task_num = output_task_sizes.size(); - input_task.output->resize(output_task_num); - - for (int i = 0; i < output_task_num; ++i) { - (*input_task.output)[i].resize(input_task.context->num_outputs()); - } - - output_tasks->reserve(output_task_num); - for (int i = 0; i < output_task_num; i++) { - auto task = absl::make_unique(); - task->guid = input_task.guid; - task->propagated_context = Context(ContextKind::kThread); - task->captured_inputs = input_task.captured_inputs; - task->context = input_task.context; - task->done_callback = barrier.Inc(); - task->start_time = input_task.start_time; - task->split_index = i; - task->inputs.reserve(input_task.inputs.size()); - task->is_partial = true; - task->status = input_task.status; - - task->output = input_task.output; - output_tasks->push_back(std::move(task)); - } - - const int num_input_tensors = input_task.inputs.size(); - - // Splits each input tensor according to `output_task_sizes`, and - // initializes input of `output_tasks` with split results. - for (int i = 0; i < num_input_tensors; ++i) { - std::vector split_tensors; - const Tensor& input_tensor = input_task.inputs[i]; - // TODO(b/154140947): - // Figure out the optimal implementation of Split, by using - // 'Tensor::Slice' and eliminating unnecessary memcpy as much as possible. - const Status split_status = Split(input_task.context, input_tensor, - output_task_sizes, &split_tensors); - if (!split_status.ok()) { - return errors::Internal( - "When splitting input, Tensor split operation failed: ", - split_status.ToString()); - } - if (split_tensors.size() != output_task_sizes.size()) { - return errors::Internal( - "When splitting input, tensor split operation did not work as " - "expected; got ", - split_tensors.size(), " splits; expected ", - output_task_sizes.size()); - } - for (int j = 0; j < output_tasks->size(); ++j) { - BatchTask& output_task = *((*output_tasks)[j]); - auto moved_tensor_iter = std::next(split_tensors.begin(), j); - std::move(moved_tensor_iter, moved_tensor_iter + 1, - std::back_inserter(output_task.inputs)); - } - } - return Status::OK(); - } - - Status SplitOutputTensors(const std::vector& combined_outputs, - Batch* batch) const { - DCHECK_GE(batch->num_tasks(), 1); - if (batch->num_tasks() < 1) { - return errors::Internal("Batch size expected to be positive; was ", - batch->num_tasks()); - } - - std::vector task_sizes_plus_optional_padding; - task_sizes_plus_optional_padding.reserve(batch->num_tasks()); - for (int i = 0; i < batch->num_tasks(); ++i) { - task_sizes_plus_optional_padding.push_back(batch->task(i).size()); - } - const int padding_size = - RoundToLowestAllowedBatchSize(batch->size()) - batch->size(); - if (padding_size > 0) { - task_sizes_plus_optional_padding.push_back(padding_size); - } - - // For each output tensor name, a divided-up tensor with one entry per task. - std::map> split_tensors; - - DCHECK_EQ(batch->task(0).context->num_outputs(), combined_outputs.size()); - int combined_outputs_size = combined_outputs.size(); - if (combined_outputs_size != batch->task(0).context->num_outputs()) { - return errors::Internal("Wrong number of batched output tensors"); - } - - // Generate 'split_tensors' and populate the context outputs. - for (int i = 0, iter_limit = combined_outputs.size(); i < iter_limit; ++i) { - const Tensor& output_tensor = combined_outputs[i]; - if (output_tensor.shape().dims() == 0) { - return errors::FailedPrecondition( - "Batched output tensor has 0 dimensions"); - } - if (output_tensor.shape().dim_size(0) != - static_cast(batch->size() + padding_size)) { - return errors::FailedPrecondition( - "Batched output tensor's 0th dimension does not equal the sum of " - "the 0th dimension sizes of the input tensors"); - } - - std::vector split_tensor; - const Status split_status = tensor::Split( - output_tensor, task_sizes_plus_optional_padding, &split_tensor); - DCHECK(split_status.ok()) << split_status.ToString(); - if (!split_status.ok()) { - return errors::Internal("Tensor split operation failed: ", - split_status.ToString()); - } - DCHECK_EQ(split_tensor.size(), task_sizes_plus_optional_padding.size()); - if (split_tensor.size() != task_sizes_plus_optional_padding.size()) { - return errors::Internal( - "Tensor split operation did not work as expected; got ", - split_tensor.size(), " splits; expected ", - task_sizes_plus_optional_padding.size()); - } - - // Ignore a possible final split_tensors entry containing the padding. - for (int j = 0; j < batch->num_tasks(); ++j) { - BatchTask& task = *(batch->mutable_task(j)); - if (task.is_partial) { - std::vector& tensor_vector = (*task.output)[task.split_index]; - tensor_vector[i] = std::move(split_tensor[j]); - } else { - task.context->set_output(i, split_tensor[j]); - } - } - } - - return Status::OK(); - } - - void ProcessFuncBatch(std::unique_ptr batch) const { - if (batch->empty()) { - return; - } - - // We use the 'propagated_context' from one of the threads which setup one - // of the tasks. This will propagate any common context over all the threads - // which are running this Session, of which this BatchOp is a part. - WithContext wc(batch->task(batch->num_tasks() - 1).propagated_context); - - OpKernelContext* last_task_context = - batch->task(batch->num_tasks() - 1).context; - - // Regardless of the outcome, we need to propagate the status to the - // individual tasks and signal that they are done. We use MakeCleanup() to - // ensure that this happens no matter how we exit the method below. - Status status; - bool cleanup_done = false; - auto cleanup_fn = [&cleanup_done, &batch](const Status& status) { - if (cleanup_done) { - return; - } - for (int i = 0; i < batch->num_tasks(); ++i) { - if (batch->task(i).is_partial) { - batch->mutable_task(i)->status->Update(status); - } else { - batch->mutable_task(i)->context->SetStatus(status); - } - - batch->mutable_task(i)->done_callback(); - } - cleanup_done = true; - }; - - auto finally = - gtl::MakeCleanup([&cleanup_fn, &status] { cleanup_fn(status); }); - - status = ValidateBatch(*batch); - if (!status.ok()) { - return; - } - - std::vector concatenated_tensors; - status = - ConcatInputTensors(*batch, last_task_context, &concatenated_tensors); - if (!status.ok()) { - return; - } + void ProcessFuncBatchImpl( + OpKernelContext* last_task_context, absl::Span inputs, + std::vector* combined_outputs, + std::function done) const override { FunctionLibraryRuntime::Options opts; opts.step_container = last_task_context->step_container(); opts.cancellation_manager = last_task_context->cancellation_manager(); @@ -809,185 +83,20 @@ class BatchResource : public ResourceBase { opts.rendezvous = last_task_context->rendezvous(); opts.runner = last_task_context->runner(); opts.run_all_kernels_inline = last_task_context->run_all_kernels_inline(); - auto* flib = last_task_context->function_library(); - std::vector combined_outputs; - Notification done; - std::vector args(concatenated_tensors.begin(), - concatenated_tensors.end()); - const auto& captured_inputs = - batch->task(batch->num_tasks() - 1).captured_inputs; - args.insert(args.end(), captured_inputs.begin(), captured_inputs.end()); - - uint64 current_time = EnvTime::NowNanos(); - const string& model_name = GetModelName(last_task_context); - for (int i = 0; i < batch->num_tasks(); ++i) { - RecordBatchDelayMs((current_time - batch->task(i).start_time) * 1e-6, - model_name); - } - // Releases the cleanup method here, because the callback of the function - // library runtime will handle it now. - finally.release(); - flib->Run( - opts, fhandle_, args, &combined_outputs, [&](const Status& run_status) { - Status final_status; - auto run_finally = gtl::MakeCleanup([&]() { - // We do the cleanup here as an optimization, so that it runs in - // the underlying TF inter-op threadpool. Running it in the - // threadpool, let's the ensuing ops be scheduled faster, - // because the executor will add them to the front of the - // threadpool's task queue rather than the end. - cleanup_fn(final_status); - done.Notify(); - }); - final_status = run_status; - if (!final_status.ok()) { - return; - } - final_status = SplitOutputTensors(combined_outputs, batch.get()); - }); + Notification done_notif; + flib->Run(opts, fhandle_, inputs, combined_outputs, + [&](const Status& run_status) { + done(run_status); + done_notif.Notify(); + }); // By waiting for the notification we are ensuring that this thread isn't // used for processing other batches, which gives the batches time to // coalesce upstream. So overall the number of batches going through the // devices goes down, improving latency and throughput in most cases. - done.WaitForNotification(); + done_notif.WaitForNotification(); } - // Processes a batch of one or more BatchTask entries. - void ProcessBatch(std::unique_ptr batch) const { - if (batch->empty()) { - return; - } - - WithContext wc(batch->task(batch->num_tasks() - 1).propagated_context); - - OpKernelContext* last_task_context = - batch->task(batch->num_tasks() - 1).context; - AsyncOpKernel::DoneCallback last_task_callback = - batch->task(batch->num_tasks() - 1).done_callback; - - OP_REQUIRES_OK_ASYNC(last_task_context, ValidateBatch(*batch), - last_task_callback); - - // All tasks should have the same number of input edges. - const int num_input_edges = batch->task(0).inputs.size(); - std::vector concatenated_tensors; - const Status concat_status = - ConcatInputTensors(*batch, last_task_context, &concatenated_tensors); - OP_REQUIRES_OK_ASYNC(last_task_context, concat_status, last_task_callback); - - // Process each input edge one at a time (the typical case has just one). - for (int i = 0; i < num_input_edges; ++i) { - last_task_context->set_output(i, concatenated_tensors[i]); - - // Emit batch->num_tasks() - 1 empty output tensors. - for (int task_idx = 0; task_idx < batch->num_tasks() - 1; ++task_idx) { - const BatchTask& task = batch->task(task_idx); - TensorShape output_shape(task.inputs[i].shape()); - output_shape.set_dim(0, 0); - Tensor* output = nullptr; - OP_REQUIRES_OK_ASYNC( - task.context, - task.context->allocate_output(i, output_shape, &output), - task.done_callback); - } - } - // Emit batch->num_tasks() - 1 empty index tensors. - for (int task_idx = 0; task_idx < batch->num_tasks() - 1; ++task_idx) { - const BatchTask& task = batch->task(task_idx); - TensorShape index_shape({0, 3}); - Tensor* output = nullptr; - OP_REQUIRES_OK_ASYNC( - task.context, - task.context->allocate_output(num_input_edges, index_shape, &output), - task.done_callback); - } - // Emit all ID tensors. - for (int task_idx = 0; task_idx < batch->num_tasks(); ++task_idx) { - const BatchTask& task = batch->task(task_idx); - Tensor* id; - OP_REQUIRES_OK_ASYNC(task.context, - task.context->allocate_output(num_input_edges + 1, - TensorShape({}), &id), - task.done_callback); - id->scalar()() = task.guid; - } - OP_REQUIRES_OK_ASYNC( - last_task_context, - EmitIndexTensor(last_task_context, *batch, num_input_edges), - last_task_callback); - - // Signal done for each element of the batch. (At this point, the contexts - // are no longer guaranteed to remain live.) - for (int task_idx = 0; task_idx < batch->num_tasks(); ++task_idx) { - batch->mutable_task(task_idx)->done_callback(); - } - } - - // Emits an index tensor, which the Unbatch op will use to un-concatenate - // the tensor and attribute the pieces to the right batch keys. The index - // tensor contains, for each input: [batch_key, start_offset, end_offset] - // where start_offset and end_offset represent the range of entries in the - // concatenated tensors that belong to that input. - // - // Emits the result to the output at 'output_index' using 'context'. - static Status EmitIndexTensor(OpKernelContext* context, const Batch& batch, - int output_index) { - const TensorShape index_shape({batch.num_tasks(), 3}); - Tensor* index = nullptr; - TF_RETURN_IF_ERROR( - context->allocate_output(output_index, index_shape, &index)); - auto index_flat = index->shaped({batch.num_tasks(), 3}); - size_t offset = 0; - for (int task_idx = 0; task_idx < batch.num_tasks(); ++task_idx) { - const BatchTask& task = batch.task(task_idx); - index_flat(task_idx, 0) = task.guid; - index_flat(task_idx, 1) = offset; - index_flat(task_idx, 2) = offset + task.size(); - offset += task.size(); - } - return Status::OK(); - } - - // Looks up the batcher queue for 'queue_name'. If it did't previously exist, - // creates it. - Status LookupOrCreateBatcherQueue(const string& queue_name, - BatcherQueue** queue) { - mutex_lock l(batcher_queues_mu_); - - auto it = batcher_queues_.find(queue_name); - if (it != batcher_queues_.end()) { - *queue = it->second.get(); - return Status::OK(); - } - - std::unique_ptr new_queue; - auto process_batch_callback = [this](std::unique_ptr batch) { - if (fhandle_ == kInvalidHandle) { - ProcessBatch(std::move(batch)); - } else { - ProcessFuncBatch(std::move(batch)); - } - }; - TF_RETURN_IF_ERROR(batcher_->AddQueue(batcher_queue_options_, - process_batch_callback, &new_queue)); - *queue = new_queue.get(); - batcher_queues_[queue_name] = std::move(new_queue); - return Status::OK(); - } - - // A batch scheduler, and options for creating queues. - std::shared_ptr batcher_; - Batcher::QueueOptions batcher_queue_options_; - - // A collection of batcher queues, keyed on queue name. - // TODO(olston): Garbage-collect unused queues (perhaps simply remove empty - // ones (with a time delay?); it's okay if they get recreated later). - mutable mutex batcher_queues_mu_; - std::map> batcher_queues_ - TF_GUARDED_BY(batcher_queues_mu_); - - std::vector allowed_batch_sizes_; FunctionLibraryRuntime::Handle fhandle_; }; diff --git a/tensorflow/core/kernels/batching_util/BUILD b/tensorflow/core/kernels/batching_util/BUILD index e92764712c8..b662e2e066a 100644 --- a/tensorflow/core/kernels/batching_util/BUILD +++ b/tensorflow/core/kernels/batching_util/BUILD @@ -221,3 +221,32 @@ cc_library( "//tensorflow/core:tensorflow", ], ) + +cc_library( + name = "concat_split_util", + hdrs = ["concat_split_util.h"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core/kernels:concat_lib", + "//tensorflow/core/kernels:split_lib", + "//tensorflow/core/platform:status", + ], +) + +cc_library( + name = "batch_resource_base", + srcs = ["batch_resource_base.cc"], + hdrs = ["batch_resource_base.h"], + deps = [ + ":batch_scheduler", + ":concat_split_util", + ":shared_batch_scheduler", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core/kernels/batching_util:threadsafe_status", + "//tensorflow/core/platform:status", + "//tensorflow/core/platform:thread_annotations", + "//tensorflow/core/util:incremental_barrier", + ], +) diff --git a/tensorflow/core/kernels/batching_util/batch_resource_base.cc b/tensorflow/core/kernels/batching_util/batch_resource_base.cc new file mode 100644 index 00000000000..b372f446f7a --- /dev/null +++ b/tensorflow/core/kernels/batching_util/batch_resource_base.cc @@ -0,0 +1,638 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/batching_util/batch_resource_base.h" + +#include "tensorflow/core/framework/ops_util.h" +#include "tensorflow/core/framework/tensor_util.h" +#include "tensorflow/core/kernels/batching_util/concat_split_util.h" +#include "tensorflow/core/lib/gtl/cleanup.h" +#include "tensorflow/core/lib/monitoring/percentile_sampler.h" +#include "tensorflow/core/util/incremental_barrier.h" + +namespace tensorflow { +namespace serving { +namespace { + +void RecordPaddingSize(int32 padding_size, const string& model_name, + int32 execution_batch_size) { + static auto* cell = tensorflow::monitoring::PercentileSampler<2>::New( + {"/tensorflow/serving/batching/padding_size", + "Tracks the padding size distribution on batches by model_name (if " + "available).", + "model_name", "execution_batch_size"}, + /*percentiles=*/{25.0, 50.0, 75.0, 90.0, 95.0, 99.0}, + /*max_samples=*/1024, tensorflow::monitoring::UnitOfMeasure::kNumber); + cell->GetCell(model_name, absl::StrCat(execution_batch_size)) + ->Add(static_cast(padding_size)); +} + +void RecordInputBatchSize(int32 batch_size, const string& model_name) { + static auto* cell = tensorflow::monitoring::PercentileSampler<1>::New( + {"/tensorflow/serving/batching/input_batch_size", + "Tracks the batch size distribution on the inputs by model_name (if " + "available).", + "model_name"}, + /*percentiles=*/{25.0, 50.0, 75.0, 90.0, 95.0, 99.0}, + /*max_samples=*/1024, tensorflow::monitoring::UnitOfMeasure::kNumber); + cell->GetCell(model_name)->Add(static_cast(batch_size)); +} + +void RecordProcessedBatchSize(int32 batch_size, const string& model_name) { + static auto* cell = tensorflow::monitoring::PercentileSampler<1>::New( + {"/tensorflow/serving/batching/processed_batch_size", + "Tracks the batch size distribution on processing by model_name (if " + "available).", + "model_name"}, + /*percentiles=*/{25.0, 50.0, 75.0, 90.0, 95.0, 99.0}, + /*max_samples=*/1024, tensorflow::monitoring::UnitOfMeasure::kNumber); + cell->GetCell(model_name)->Add(static_cast(batch_size)); +} + +void RecordBatchDelayMs(int64 batch_delay_ms, const string& model_name) { + static auto* cell = monitoring::PercentileSampler<1>::New( + {"/tensorflow/serving/batching/batch_delay_ms", + "Tracks the batching delay for inputs by model_name (if " + "available).", + "model_name"}, + /*percentiles=*/{25.0, 50.0, 75.0, 90.0, 95.0, 99.0}, + /*max_samples=*/1024, monitoring::UnitOfMeasure::kTime); + cell->GetCell(model_name)->Add(static_cast(batch_delay_ms)); +} + +const string& GetModelName(OpKernelContext* ctx) { + static string* kModelNameUnset = new string("model_name_unset"); + if (!ctx->session_metadata()) return *kModelNameUnset; + if (ctx->session_metadata()->name().empty()) return *kModelNameUnset; + return ctx->session_metadata()->name(); +} + +} // namespace + +using ::tensorflow::concat_split_util::Concat; +using ::tensorflow::concat_split_util::Split; +using TensorMatrix = std::vector>; + +Status BatchResourceBase::RegisterInput( + int64 guid, OpKernelContext* context, const string& batcher_queue_name, + AsyncOpKernel::DoneCallback done_callback) { + auto batch_components = absl::make_unique(); + batch_components->start_time = EnvTime::NowNanos(); + batch_components->guid = guid; + batch_components->propagated_context = Context(ContextKind::kThread); + OpInputList tensors; + TF_RETURN_IF_ERROR(context->input_list("in_tensors", &tensors)); + batch_components->inputs.reserve(tensors.size()); + for (const Tensor& tensor : tensors) { + if (tensor.shape().dims() == 0) { + return errors::InvalidArgument( + "Batching input tensors must have at least one dimension"); + } + if (tensors.size() >= 2 && + tensor.shape().dim_size(0) != tensors[0].shape().dim_size(0)) { + return errors::InvalidArgument( + "Batching input tensors supplied in a given op invocation must " + "have equal 0th-dimension size"); + } + batch_components->inputs.push_back(tensor); + } + RecordInputBatchSize(tensors[0].shape().dim_size(0), GetModelName(context)); + OpInputList captured_tensors; + const auto captured_status = + context->input_list("captured_tensors", &captured_tensors); + if (captured_status.ok()) { + batch_components->captured_inputs.reserve(captured_tensors.size()); + for (const Tensor& captured_tensor : captured_tensors) { + batch_components->captured_inputs.push_back(captured_tensor); + } + } + batch_components->context = context; + batch_components->done_callback = std::move(done_callback); + batch_components->split_index = 0; + batch_components->output = std::make_shared(); + batch_components->status = std::make_shared(); + + BatcherQueueT* batcher_queue; + TF_RETURN_IF_ERROR( + LookupOrCreateBatcherQueue(batcher_queue_name, &batcher_queue)); + return batcher_queue->Schedule(&batch_components); +} + +/*static*/ BatchResourceBase::BatcherT::QueueOptions +BatchResourceBase::GetBatcherQueueOptions( + int32 num_batch_threads, int32 max_batch_size, int32 batch_timeout_micros, + int32 max_enqueued_batches, const std::vector& allowed_batch_sizes, + bool enable_large_batch_splitting) { + BatcherT::QueueOptions batcher_queue_options; + batcher_queue_options.max_batch_size = max_batch_size; + batcher_queue_options.max_enqueued_batches = max_enqueued_batches; + batcher_queue_options.batch_timeout_micros = batch_timeout_micros; + // Support for splitting large batch is still in progress. + batcher_queue_options.enable_large_batch_splitting = + enable_large_batch_splitting; + if (enable_large_batch_splitting) { + batcher_queue_options.split_input_task_func = + [](std::unique_ptr* input_task, + int open_batch_remaining_slot, int max_batch_size, + std::vector>* output_tasks) -> Status { + return SplitInputTask(input_task, open_batch_remaining_slot, + max_batch_size, output_tasks); + }; + + if (allowed_batch_sizes.empty()) { + batcher_queue_options.max_execution_batch_size = max_batch_size; + } else { + batcher_queue_options.max_execution_batch_size = + *allowed_batch_sizes.rbegin(); + } + } + + return batcher_queue_options; +} + +/*static*/ Status BatchResourceBase::ValidateBatch(const BatchT& batch) { + for (int task_idx = 0; task_idx < batch.num_tasks(); ++task_idx) { + const BatchResourceBase::BatchTask& task = batch.task(task_idx); + + if (task.inputs.size() != batch.task(0).inputs.size()) { + return errors::InvalidArgument( + "Batching inputs must have equal number of edges"); + } + } + + return Status::OK(); +} + +// Returns the smallest entry in 'allowed_batch_sizes_' that is greater than +// or equal to 'batch_size'. If 'allowed_batch_sizes_' is empty, simply +// returns 'batch_size'. +int BatchResourceBase::RoundToLowestAllowedBatchSize(int batch_size) const { + if (allowed_batch_sizes_.empty()) { + return batch_size; + } + for (int allowed_size : allowed_batch_sizes_) { + if (allowed_size >= batch_size) { + return allowed_size; + } + } + LOG(ERROR) << "Maximum batch size greater than largest allowed size; " + "ignoring allowed sizes constraint"; + return batch_size; +} + +Status BatchResourceBase::ConcatInputTensors( + const BatchT& batch, OpKernelContext* context, + std::vector* concatenated_tensors) const { + if (batch.num_tasks() == 0) { + return errors::InvalidArgument("Empty batch."); + } + + const int padded_batch_size = RoundToLowestAllowedBatchSize(batch.size()); + const int padding_amount = padded_batch_size - batch.size(); + RecordPaddingSize(padding_amount, GetModelName(context), padded_batch_size); + RecordProcessedBatchSize(padded_batch_size, GetModelName(context)); + + // All tasks should have the same number of input edges. + const int num_inputs = batch.task(0).inputs.size(); + concatenated_tensors->reserve(num_inputs); + + // Process each input one at a time (the typical case has just one). + for (int i = 0; i < num_inputs; ++i) { + // Concatenate the tasks ith input tensors into a big output tensor. + std::vector to_concatenate; + to_concatenate.reserve(batch.num_tasks()); + for (int task_idx = 0; task_idx < batch.num_tasks(); ++task_idx) { + to_concatenate.push_back(batch.task(task_idx).inputs.at(i)); + } + + // Add padding as needed. Use the first row of the first task's tensor as + // the data for padding. + if (padding_amount > 0) { + const Tensor& padding_source = batch.task(0).inputs.at(i); + Tensor padding; + if (padding_source.shape().dim_size(0) == 0) { + return errors::InvalidArgument( + "Cannot use an empty tensor with zero rows as padding when " + "batching. (Input ", + i, " got shape ", padding_source.shape().DebugString(), ".)"); + } + if (padding_source.shape().dim_size(0) == 1) { + padding = padding_source; + } else { + padding = padding_source.Slice(0, 1); + } + for (int i = 0; i < padding_amount; ++i) { + to_concatenate.push_back(padding); + } + } + + Tensor concatenated_tensor; + Status concat_status = + Concat(context, to_concatenate, &concatenated_tensor); + TF_RETURN_IF_ERROR(concat_status); + concatenated_tensors->push_back(concatenated_tensor); + } + return Status::OK(); +} + +/*static*/ Status BatchResourceBase::SplitInputTask( + std::unique_ptr* input_task_ptr, int open_batch_remaining_slot, + int max_batch_size, std::vector>* output_tasks) { + BatchTask& input_task = *(*input_task_ptr); + const int64 input_task_size = input_task.size(); + + DCHECK_GT(input_task_size, open_batch_remaining_slot); + + std::shared_ptr shared_status = input_task.status; + + // `split_task_done_callback` runs only after all splitted tasks are + // complete. + std::function split_task_done_callback = + [done_callback = input_task.done_callback, output = input_task.output, + op_kernel_context = input_task.context, status = shared_status]() { + const int num_output = op_kernel_context->num_outputs(); + for (int i = 0; i < num_output; ++i) { + Tensor output_tensor; + + // Concat would memcpy each input tensor to one output tensor. + // In this context, Concat can be further optimized to get rid of + // some (probably all) memcpy when input tensors are slices of + // another copy. + // TODO(b/154140947): + // Add a custom implementation of Split and then optimize Concat. + std::vector to_concatenate; + to_concatenate.reserve(output->size()); + for (int j = 0; j < output->size(); ++j) { + to_concatenate.push_back(std::move((*output)[j][i])); + } + const auto concat_status = + Concat(op_kernel_context, to_concatenate, &output_tensor); + if (!concat_status.ok()) { + status->Update(concat_status); + } + + op_kernel_context->set_output(i, std::move(output_tensor)); + } + op_kernel_context->SetStatus(status->status()); + done_callback(); + }; + IncrementalBarrier barrier(split_task_done_callback); + + std::vector output_task_sizes; + + if (open_batch_remaining_slot > 0) { + output_task_sizes.push_back(open_batch_remaining_slot); + } + + for (int left_task_size = input_task_size - open_batch_remaining_slot; + left_task_size > 0; left_task_size -= max_batch_size) { + int next_task_size = std::min(left_task_size, max_batch_size); + output_task_sizes.push_back(next_task_size); + } + + const int output_task_num = output_task_sizes.size(); + input_task.output->resize(output_task_num); + + for (int i = 0; i < output_task_num; ++i) { + (*input_task.output)[i].resize(input_task.context->num_outputs()); + } + + output_tasks->reserve(output_task_num); + for (int i = 0; i < output_task_num; i++) { + auto task = absl::make_unique(); + task->guid = input_task.guid; + task->propagated_context = Context(ContextKind::kThread); + task->captured_inputs = input_task.captured_inputs; + task->context = input_task.context; + task->done_callback = barrier.Inc(); + task->start_time = input_task.start_time; + task->split_index = i; + task->inputs.reserve(input_task.inputs.size()); + task->is_partial = true; + task->status = input_task.status; + + task->output = input_task.output; + output_tasks->push_back(std::move(task)); + } + + const int num_input_tensors = input_task.inputs.size(); + + // Splits each input tensor according to `output_task_sizes`, and + // initializes input of `output_tasks` with split results. + for (int i = 0; i < num_input_tensors; ++i) { + std::vector split_tensors; + const Tensor& input_tensor = input_task.inputs[i]; + // TODO(b/154140947): + // Figure out the optimal implementation of Split, by using + // 'Tensor::Slice' and eliminating unnecessary memcpy as much as possible. + const Status split_status = Split(input_task.context, input_tensor, + output_task_sizes, &split_tensors); + if (!split_status.ok()) { + return errors::Internal( + "When splitting input, Tensor split operation failed: ", + split_status.ToString()); + } + if (split_tensors.size() != output_task_sizes.size()) { + return errors::Internal( + "When splitting input, tensor split operation did not work as " + "expected; got ", + split_tensors.size(), " splits; expected ", output_task_sizes.size()); + } + for (int j = 0; j < output_tasks->size(); ++j) { + BatchTask& output_task = *((*output_tasks)[j]); + auto moved_tensor_iter = std::next(split_tensors.begin(), j); + std::move(moved_tensor_iter, moved_tensor_iter + 1, + std::back_inserter(output_task.inputs)); + } + } + return Status::OK(); +} + +Status BatchResourceBase::SplitOutputTensors( + const std::vector& combined_outputs, BatchT* batch) const { + DCHECK_GE(batch->num_tasks(), 1); + if (batch->num_tasks() < 1) { + return errors::Internal("Batch size expected to be positive; was ", + batch->num_tasks()); + } + + std::vector task_sizes_plus_optional_padding; + task_sizes_plus_optional_padding.reserve(batch->num_tasks()); + for (int i = 0; i < batch->num_tasks(); ++i) { + task_sizes_plus_optional_padding.push_back(batch->task(i).size()); + } + const int padding_size = + RoundToLowestAllowedBatchSize(batch->size()) - batch->size(); + if (padding_size > 0) { + task_sizes_plus_optional_padding.push_back(padding_size); + } + + // For each output tensor name, a divided-up tensor with one entry per task. + std::map> split_tensors; + + DCHECK_EQ(batch->task(0).context->num_outputs(), combined_outputs.size()); + int combined_outputs_size = combined_outputs.size(); + if (combined_outputs_size != batch->task(0).context->num_outputs()) { + return errors::Internal("Wrong number of batched output tensors"); + } + + // Generate 'split_tensors' and populate the context outputs. + for (int i = 0, iter_limit = combined_outputs.size(); i < iter_limit; ++i) { + const Tensor& output_tensor = combined_outputs[i]; + if (output_tensor.shape().dims() == 0) { + return errors::FailedPrecondition( + "Batched output tensor has 0 dimensions"); + } + if (output_tensor.shape().dim_size(0) != + static_cast(batch->size() + padding_size)) { + return errors::FailedPrecondition( + "Batched output tensor's 0th dimension does not equal the sum of " + "the 0th dimension sizes of the input tensors"); + } + + std::vector split_tensor; + const Status split_status = tensor::Split( + output_tensor, task_sizes_plus_optional_padding, &split_tensor); + DCHECK(split_status.ok()) << split_status.ToString(); + if (!split_status.ok()) { + return errors::Internal("Tensor split operation failed: ", + split_status.ToString()); + } + DCHECK_EQ(split_tensor.size(), task_sizes_plus_optional_padding.size()); + if (split_tensor.size() != task_sizes_plus_optional_padding.size()) { + return errors::Internal( + "Tensor split operation did not work as expected; got ", + split_tensor.size(), " splits; expected ", + task_sizes_plus_optional_padding.size()); + } + + // Ignore a possible final split_tensors entry containing the padding. + for (int j = 0; j < batch->num_tasks(); ++j) { + BatchTask& task = *(batch->mutable_task(j)); + if (task.is_partial) { + std::vector& tensor_vector = (*task.output)[task.split_index]; + tensor_vector[i] = std::move(split_tensor[j]); + } else { + task.context->set_output(i, split_tensor[j]); + } + } + } + + return Status::OK(); +} + +void BatchResourceBase::ProcessFuncBatch(std::unique_ptr batch) const { + if (batch->empty()) { + return; + } + + // We use the 'propagated_context' from one of the threads which setup one + // of the tasks. This will propagate any common context over all the threads + // which are running this Session, of which this BatchOp is a part. + WithContext wc(batch->task(batch->num_tasks() - 1).propagated_context); + + OpKernelContext* last_task_context = + batch->task(batch->num_tasks() - 1).context; + + // Regardless of the outcome, we need to propagate the status to the + // individual tasks and signal that they are done. We use MakeCleanup() to + // ensure that this happens no matter how we exit the method below. + Status status; + bool cleanup_done = false; + auto cleanup_fn = [&cleanup_done, &batch](const Status& status) { + if (cleanup_done) { + return; + } + for (int i = 0; i < batch->num_tasks(); ++i) { + if (batch->task(i).is_partial) { + batch->mutable_task(i)->status->Update(status); + } else { + batch->mutable_task(i)->context->SetStatus(status); + } + + batch->mutable_task(i)->done_callback(); + } + cleanup_done = true; + }; + + auto finally = + gtl::MakeCleanup([&cleanup_fn, &status] { cleanup_fn(status); }); + + status = ValidateBatch(*batch); + if (!status.ok()) { + return; + } + + std::vector concatenated_tensors; + status = ConcatInputTensors(*batch, last_task_context, &concatenated_tensors); + if (!status.ok()) { + return; + } + + std::vector combined_outputs; + std::vector args(concatenated_tensors.begin(), + concatenated_tensors.end()); + const auto& captured_inputs = + batch->task(batch->num_tasks() - 1).captured_inputs; + args.insert(args.end(), captured_inputs.begin(), captured_inputs.end()); + + uint64 current_time = EnvTime::NowNanos(); + const string& model_name = GetModelName(last_task_context); + for (int i = 0; i < batch->num_tasks(); ++i) { + RecordBatchDelayMs((current_time - batch->task(i).start_time) * 1e-6, + model_name); + } + // Releases the cleanup method here, because the callback of the function + // library runtime will handle it now. + finally.release(); + ProcessFuncBatchImpl(last_task_context, args, &combined_outputs, + [&](const Status& run_status) { + Status final_status; + auto run_finally = gtl::MakeCleanup([&]() { + // We do the cleanup here as an optimization, so that + // it runs in the underlying TF inter-op threadpool. + // Running it in the threadpool, let's the ensuing + // ops be scheduled faster, because the executor will + // add them to the front of the threadpool's task + // queue rather than the end. + cleanup_fn(final_status); + }); + final_status = run_status; + if (!final_status.ok()) { + return; + } + final_status = + SplitOutputTensors(combined_outputs, batch.get()); + }); +} + +// Processes a batch of one or more BatchTask entries. +void BatchResourceBase::ProcessBatch(std::unique_ptr batch) const { + if (batch->empty()) { + return; + } + + WithContext wc(batch->task(batch->num_tasks() - 1).propagated_context); + + OpKernelContext* last_task_context = + batch->task(batch->num_tasks() - 1).context; + AsyncOpKernel::DoneCallback last_task_callback = + batch->task(batch->num_tasks() - 1).done_callback; + + OP_REQUIRES_OK_ASYNC(last_task_context, ValidateBatch(*batch), + last_task_callback); + + // All tasks should have the same number of input edges. + const int num_input_edges = batch->task(0).inputs.size(); + std::vector concatenated_tensors; + const Status concat_status = + ConcatInputTensors(*batch, last_task_context, &concatenated_tensors); + OP_REQUIRES_OK_ASYNC(last_task_context, concat_status, last_task_callback); + + // Process each input edge one at a time (the typical case has just one). + for (int i = 0; i < num_input_edges; ++i) { + last_task_context->set_output(i, concatenated_tensors[i]); + + // Emit batch->num_tasks() - 1 empty output tensors. + for (int task_idx = 0; task_idx < batch->num_tasks() - 1; ++task_idx) { + const BatchTask& task = batch->task(task_idx); + TensorShape output_shape(task.inputs[i].shape()); + output_shape.set_dim(0, 0); + Tensor* output = nullptr; + OP_REQUIRES_OK_ASYNC( + task.context, task.context->allocate_output(i, output_shape, &output), + task.done_callback); + } + } + // Emit batch->num_tasks() - 1 empty index tensors. + for (int task_idx = 0; task_idx < batch->num_tasks() - 1; ++task_idx) { + const BatchTask& task = batch->task(task_idx); + TensorShape index_shape({0, 3}); + Tensor* output = nullptr; + OP_REQUIRES_OK_ASYNC( + task.context, + task.context->allocate_output(num_input_edges, index_shape, &output), + task.done_callback); + } + // Emit all ID tensors. + for (int task_idx = 0; task_idx < batch->num_tasks(); ++task_idx) { + const BatchTask& task = batch->task(task_idx); + Tensor* id; + OP_REQUIRES_OK_ASYNC(task.context, + task.context->allocate_output(num_input_edges + 1, + TensorShape({}), &id), + task.done_callback); + id->scalar()() = task.guid; + } + OP_REQUIRES_OK_ASYNC( + last_task_context, + EmitIndexTensor(last_task_context, *batch, num_input_edges), + last_task_callback); + + // Signal done for each element of the batch. (At this point, the contexts + // are no longer guaranteed to remain live.) + for (int task_idx = 0; task_idx < batch->num_tasks(); ++task_idx) { + batch->mutable_task(task_idx)->done_callback(); + } +} + +/*static*/ Status BatchResourceBase::EmitIndexTensor(OpKernelContext* context, + const BatchT& batch, + int output_index) { + const TensorShape index_shape({batch.num_tasks(), 3}); + Tensor* index = nullptr; + TF_RETURN_IF_ERROR( + context->allocate_output(output_index, index_shape, &index)); + auto index_flat = index->shaped({batch.num_tasks(), 3}); + size_t offset = 0; + for (int task_idx = 0; task_idx < batch.num_tasks(); ++task_idx) { + const BatchTask& task = batch.task(task_idx); + index_flat(task_idx, 0) = task.guid; + index_flat(task_idx, 1) = offset; + index_flat(task_idx, 2) = offset + task.size(); + offset += task.size(); + } + return Status::OK(); +} + +// Looks up the batcher queue for 'queue_name'. If it did't previously exist, +// creates it. +Status BatchResourceBase::LookupOrCreateBatcherQueue(const string& queue_name, + BatcherQueueT** queue) { + mutex_lock l(batcher_queues_mu_); + + auto it = batcher_queues_.find(queue_name); + if (it != batcher_queues_.end()) { + *queue = it->second.get(); + return Status::OK(); + } + + std::unique_ptr new_queue; + auto process_batch_callback = [this](std::unique_ptr batch) { + if (!has_process_batch_function_) { + ProcessBatch(std::move(batch)); + } else { + ProcessFuncBatch(std::move(batch)); + } + }; + TF_RETURN_IF_ERROR(batcher_->AddQueue(batcher_queue_options_, + process_batch_callback, &new_queue)); + *queue = new_queue.get(); + batcher_queues_[queue_name] = std::move(new_queue); + return Status::OK(); +} + +} // namespace serving +} // namespace tensorflow diff --git a/tensorflow/core/kernels/batching_util/batch_resource_base.h b/tensorflow/core/kernels/batching_util/batch_resource_base.h new file mode 100644 index 00000000000..0471207c951 --- /dev/null +++ b/tensorflow/core/kernels/batching_util/batch_resource_base.h @@ -0,0 +1,197 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_BATCH_RESOURCE_BASE_H_ +#define TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_BATCH_RESOURCE_BASE_H_ + +#include + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/kernels/batching_util/batch_scheduler.h" +#include "tensorflow/core/kernels/batching_util/shared_batch_scheduler.h" +#include "tensorflow/core/kernels/batching_util/threadsafe_status.h" +#include "tensorflow/core/platform/context.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/platform/thread_annotations.h" + +namespace tensorflow { +namespace serving { + +// Base class for resource that encapsulating the state and logic for batching +// tensors. +class BatchResourceBase : public ResourceBase { + public: + // Given a BatchTask (from one op invocation) with 'num_outputs'== M and + // splitted into N sub tasks, TensorMatrix is a N X M matrix. + // Namely, TensorMatrix[i][j] indicates the i-th split tensor of j-th output; + // concatenating tensors along the 2nd dimension gives a output tensor. + typedef std::vector> TensorMatrix; + + // Ingests data from one invocation of the batch op. The data is enqueued to + // be combined with others into a batch, asynchronously. + Status RegisterInput(int64 guid, OpKernelContext* context, + const string& batcher_queue_name, + AsyncOpKernel::DoneCallback done_callback); + + protected: + // One task to be batched, corresponds to a `slice` of input from one batch-op + // invocation. + // + // Given input from one batch-op invocation, a `slice` of this input is: + // 1) Split each Tensor in `BatchTask::inputs` along the 0th dimension. + // 2) 'split_index' is calculated along the 0-th dimension. + // + // Note input from one batch-op invocation is valid and considered a + // specialized `slice`. + struct BatchTask : public tensorflow::serving::BatchTask { + // A unique ID to identify this invocation of Batch. + int64 guid; + + Context propagated_context; + + std::vector inputs; + std::vector captured_inputs; + OpKernelContext* context; + AsyncOpKernel::DoneCallback done_callback; + + // The index of this split, along the 0-th dimension of input from op + // invocation. + int split_index = 0; + + // Two-dimensional tensor matrix, ownership shared by: + // 1) each split of task (to fill one row in this matrix) + // and + // 2) callback that runs to merge output of individual splits for an op + // invocation, after all splits complete. + std::shared_ptr output; + + // 'status' records error (could be from any split) if at least one split + // returns error, OK otherwise. + // Ownership is shared by individual splits and callback. + std::shared_ptr status; + + bool is_partial = false; + + size_t size() const override { return inputs[0].shape().dim_size(0); } + + uint64 start_time; + }; + + // Appending a T suffix to make the type alias different to those in + // tensorflow::serving namespace, because some versions of compiler complain + // about changing meaning of the symbols. + using BatcherT = SharedBatchScheduler; + using BatcherQueueT = BatchScheduler; + using BatchT = Batch; + + BatchResourceBase(bool has_process_batch_function, + std::shared_ptr batcher, + const BatcherT::QueueOptions& batcher_queue_options, + std::vector allowed_batch_sizes) + : has_process_batch_function_(has_process_batch_function), + batcher_(std::move(batcher)), + batcher_queue_options_(batcher_queue_options), + allowed_batch_sizes_(std::move(allowed_batch_sizes)) {} + + static BatcherT::QueueOptions GetBatcherQueueOptions( + int32 num_batch_threads, int32 max_batch_size, int32 batch_timeout_micros, + int32 max_enqueued_batches, const std::vector& allowed_batch_sizes, + bool enable_large_batch_splitting); + + private: + // Implementation of calling the process batch function. + virtual void ProcessFuncBatchImpl( + OpKernelContext* last_task_context, absl::Span inputs, + std::vector* combined_outputs, + std::function done) const = 0; + + // Validates that it's legal to combine the tasks in 'batch' into a batch. + // Assumes the batch is non-empty. + static Status ValidateBatch(const BatchT& batch); + + // Returns the smallest entry in 'allowed_batch_sizes_' that is greater than + // or equal to 'batch_size'. If 'allowed_batch_sizes_' is empty, simply + // returns 'batch_size'. + int RoundToLowestAllowedBatchSize(int batch_size) const; + + Status ConcatInputTensors(const BatchT& batch, OpKernelContext* context, + std::vector* concatenated_tensors) const; + + // Split 'input' of 'input_task_ptr' along 0th dimension, into a list of + // 'output_tasks'. + // Task sizes are determined by + // 1) open_batch_remaining_slot + // 2) max_batch_size + // 3) size-of-input-task + // in a way that + // 1) Task sizes add up to `size-of-input-task`. + // 2) Task sizes from left to right are like + // [open_batch_remaining_slot, max_batch_size, max_batch_size, ..., + // `size-of-input-task` - `sum-of-previous-elements`]. + // + // REQUIRES: + // Caller should make sure size-of-input-task is greater than + // open_batch_remaining_slot. + static Status SplitInputTask( + std::unique_ptr* input_task_ptr, int open_batch_remaining_slot, + int max_batch_size, + std::vector>* output_tasks); + + Status SplitOutputTensors(const std::vector& combined_outputs, + BatchT* batch) const; + + void ProcessFuncBatch(std::unique_ptr batch) const; + + // Processes a batch of one or more BatchTask entries. + void ProcessBatch(std::unique_ptr batch) const; + + // Emits an index tensor, which the Unbatch op will use to un-concatenate + // the tensor and attribute the pieces to the right batch keys. The index + // tensor contains, for each input: [batch_key, start_offset, end_offset] + // where start_offset and end_offset represent the range of entries in the + // concatenated tensors that belong to that input. + // + // Emits the result to the output at 'output_index' using 'context'. + static Status EmitIndexTensor(OpKernelContext* context, const BatchT& batch, + int output_index); + + // Looks up the batcher queue for 'queue_name'. If it did't previously exist, + // creates it. + Status LookupOrCreateBatcherQueue(const string& queue_name, + BatcherQueueT** queue); + + // True if user specified a batch processing function for this resource. + const bool has_process_batch_function_; + // A batch scheduler, and options for creating queues. + std::shared_ptr batcher_; + BatcherT::QueueOptions batcher_queue_options_; + + // A collection of batcher queues, keyed on queue name. + // TODO(olston): Garbage-collect unused queues (perhaps simply remove empty + // ones (with a time delay?); it's okay if they get recreated later). + mutable mutex batcher_queues_mu_; + std::map> batcher_queues_ + TF_GUARDED_BY(batcher_queues_mu_); + + std::vector allowed_batch_sizes_; +}; + +} // namespace serving +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_BATCH_RESOURCE_BASE_H_ diff --git a/tensorflow/core/kernels/batching_util/concat_split_util.h b/tensorflow/core/kernels/batching_util/concat_split_util.h new file mode 100644 index 00000000000..50ffc664452 --- /dev/null +++ b/tensorflow/core/kernels/batching_util/concat_split_util.h @@ -0,0 +1,247 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_CONCAT_SPLIT_UTIL_H_ +#define TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_CONCAT_SPLIT_UTIL_H_ + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/ops_util.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/kernels/concat_lib.h" +#include "tensorflow/core/kernels/split_lib.h" +#include "tensorflow/core/platform/status.h" + +namespace tensorflow { +namespace concat_split_util { + +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; +#ifdef TENSORFLOW_USE_SYCL +typedef Eigen::SyclDevice SYCLDevice; +#endif // TENSORFLOW_USE_SYCL + +// Concatenates 'inputs' into a single tensor along the zeroth dimension. +// Requires that all elements of 'inputs' have element type T. Writes to +// 'output' using 'context' for the allocation to ensure proper device +// placement. +template +Status Concat(OpKernelContext* context, const gtl::ArraySlice inputs, + Tensor* output) { + const int input_dims = inputs[0].dims(); + const TensorShape& input_shape = inputs[0].shape(); + + // Note that we reduce the concat of k-dimensional tensors into a two + // dimensional concat. Assuming the dimensions of any input tensor are + // {y0, y1,...,ym-1}, we flatten it to {1, y}, where y = Prod_i(yi). + std::vector::ConstMatrix>> inputs_flat; + inputs_flat.reserve(inputs.size()); + int64 output_dim0 = 0; + for (size_t i = 0; i < inputs.size(); ++i) { + const Tensor& input = inputs[i]; + if (input.dims() != input_dims) { + return errors::InvalidArgument( + "Ranks of all input tensors should match: shape[0] = ", + input_shape.DebugString(), " vs. shape[", i, + "] = ", input.shape().DebugString()); + } + for (int j = 1; j < input_dims; ++j) { + if (input.dim_size(j) != input_shape.dim_size(j)) { + return errors::InvalidArgument( + "Dimensions of inputs should match: shape[0] = ", + input_shape.DebugString(), " vs. shape[", i, + "] = ", input.shape().DebugString()); + } + } + if (input.NumElements() > 0) { + inputs_flat.emplace_back(new typename TTypes::ConstMatrix( + input.shaped({1, input.NumElements()}))); + } + output_dim0 += input.dim_size(0); + } + + TensorShape output_shape(input_shape); + output_shape.set_dim(0, output_dim0); + TF_RETURN_IF_ERROR( + context->allocate_temp(DataTypeToEnum::value, output_shape, output)); + if (output->NumElements() > 0) { + auto output_flat = output->shaped({1, output->NumElements()}); +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) + if (std::is_same::value) { + ConcatGPU(context, inputs_flat, output, &output_flat); + return Status::OK(); + } +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM + ConcatCPU(context->device(), inputs_flat, &output_flat); + } + + return Status::OK(); +} + +// Same as 'Concat' above, but handles Tensor dtype deduction automatically. +inline Status Concat(OpKernelContext* context, + const gtl::ArraySlice inputs, Tensor* output) { + const DataType type = inputs[0].dtype(); + Status concat_status; + switch (type) { +#define CASE(type) \ + case DataTypeToEnum::value: \ + concat_status = Concat(context, inputs, output); \ + break; + TF_CALL_ALL_TYPES(CASE); +#undef CASE + default: + concat_status = errors::InvalidArgument("Unsupported data type: ", type); + break; + } + return concat_status; +} + +// The Split*() functions split 'input' with element type T into 'sizes.size()' +// tensors along the zeroth dimension, with the ith split having zeroth- +// dimension size 'sizes[i]'. They allocate the output tensors using 'context', +// for proper device placement. + +// Handles special cases that are cheap. Sets 'done==true' iff it found an +// applicable special case and wrote to the outputs. Otherwise acts as a no-op. +template +Status SplitEasyCases(OpKernelContext* context, const Tensor& input, + const gtl::ArraySlice sizes, + std::vector* outputs, bool* done) { + *done = false; + + int64 total_size = 0; + for (const int64 size : sizes) { + total_size += size; + } + if (total_size > input.shape().dim_size(0)) { + return errors::InvalidArgument( + "Sum of split sizes must not exceed dim0-size of input tensor"); + } + + // Special case 0: trivial 1-way split. + if (sizes.size() == 1 && sizes.at(0) == input.shape().dim_size(0)) { + outputs->push_back(input); + *done = true; + return Status::OK(); + } + + // Special case 1: input is aligned. + if (IsInnerDimsSizeAligned(input.shape())) { + int64 position = 0; + for (const int64 size : sizes) { + outputs->emplace_back(input.Slice(position, position + size)); + position += size; + } + *done = true; + return Status::OK(); + } + + return Status::OK(); +} + +// Handles the general case, on CPU. +template +Status SplitCPU(OpKernelContext* context, const Tensor& input, + const gtl::ArraySlice sizes, + std::vector* outputs) { + int64 suffix_dim_size = 1; + for (int i = 1; i < input.shape().dims(); ++i) { + suffix_dim_size *= input.shape().dim_size(i); + } + auto input_reshaped = + input.shaped({input.shape().dim_size(0), suffix_dim_size}); + + int64 position = 0; + for (const int64 size : sizes) { + TensorShape output_shape = input.shape(); + output_shape.set_dim(0, size); + Tensor output; + TF_RETURN_IF_ERROR( + context->allocate_temp(input.dtype(), output_shape, &output)); + auto output_shaped = output.shaped({size, suffix_dim_size}); + + Eigen::DSizes slice_indices{position, 0}; + Eigen::DSizes slice_sizes{size, suffix_dim_size}; + functor::Split()(context->eigen_device(), + output_shaped, input_reshaped, + slice_indices, slice_sizes); + + outputs->emplace_back(output); + + position += size; + } + + return Status::OK(); +} + +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) + +// Handles the general case, on GPU. +template +Status SplitGPU(OpKernelContext* context, const Tensor& input, + const gtl::ArraySlice& sizes, + std::vector* outputs) { + // TODO(olston, apassos): Implement this. + LOG(FATAL) << "Not yet implemented"; // Crash ok +} + +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM + +// The outer function that dispatches to the various Split*() functions above. +template +Status Split(OpKernelContext* context, const Tensor& input, + const gtl::ArraySlice sizes, std::vector* outputs) { + bool easy_cases_done; + TF_RETURN_IF_ERROR( + SplitEasyCases(context, input, sizes, outputs, &easy_cases_done)); + if (easy_cases_done) { + return Status::OK(); + } + +#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \ + (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM) +// TODO(olston, apassos): Handle non-CPU cases. +// return SplitGPU(context, input, sizes, outputs); +#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM + return SplitCPU(context, input, sizes, outputs); +} + +// Same as 'Split' above, but handles Tensor dtype automatically. +inline Status Split(OpKernelContext* context, const Tensor& input, + const gtl::ArraySlice sizes, + std::vector* outputs) { + const DataType type = input.dtype(); + Status split_status; + switch (type) { +#define CASE(type) \ + case DataTypeToEnum::value: \ + split_status = Split(context, input, sizes, outputs); \ + break; + TF_CALL_ALL_TYPES(CASE); +#undef CASE + default: + split_status = errors::InvalidArgument("Unsupported data type: ", type); + break; + } + return split_status; +} + +} // namespace concat_split_util +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_KERNELS_BATCHING_UTIL_CONCAT_SPLIT_UTIL_H_ From a6cd18a1334be6b54784f1d65fe5d435d31d2bb8 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Wed, 22 Jul 2020 00:07:40 -0700 Subject: [PATCH 1022/2522] Add MLIR generated abs kernel for the GPU backend Extract a common base class from the Tanh op that is used for both Tanh and Abs. The abs kernel is also behind the tensorflow_enable_mlir_generated_gpu_kernels flag. PiperOrigin-RevId: 322519985 Change-Id: I4d90b8cb75743496ae3e443b7467c36bbed62487 --- tensorflow/core/kernels/cwise_op_abs.cc | 4 + tensorflow/core/kernels/mlir_generated/BUILD | 42 ++++- .../mlir_generated/cwise_op_gpu_abs.cu.cc | 40 +++++ .../mlir_generated/cwise_op_gpu_base.cu.cc | 129 +++++++++++++++ .../mlir_generated/cwise_op_gpu_base.cu.h | 80 +++++++++ .../mlir_generated/cwise_op_gpu_tanh.cu.cc | 152 +----------------- .../kernels/mlir_generated/gpu_abs_test.cc | 95 +++++++++++ .../op_definitions/abs.mlir.tmpl | 5 + .../op_definitions/tanh.mlir.tmpl | 2 +- 9 files changed, 401 insertions(+), 148 deletions(-) create mode 100644 tensorflow/core/kernels/mlir_generated/cwise_op_gpu_abs.cu.cc create mode 100644 tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.cu.cc create mode 100644 tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.cu.h create mode 100644 tensorflow/core/kernels/mlir_generated/gpu_abs_test.cc create mode 100644 tensorflow/core/kernels/mlir_generated/op_definitions/abs.mlir.tmpl diff --git a/tensorflow/core/kernels/cwise_op_abs.cc b/tensorflow/core/kernels/cwise_op_abs.cc index e4f01cf6c90..d3b09f7078a 100644 --- a/tensorflow/core/kernels/cwise_op_abs.cc +++ b/tensorflow/core/kernels/cwise_op_abs.cc @@ -21,12 +21,15 @@ REGISTER8(UnaryOp, CPU, "Abs", functor::abs, Eigen::half, bfloat16, float, REGISTER2(UnaryOp, CPU, "ComplexAbs", functor::abs, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#ifndef MLIR_GENERATED_GPU_KERNELS_ENABLED REGISTER4(UnaryOp, GPU, "Abs", functor::abs, Eigen::half, float, double, int64); +#endif REGISTER2(UnaryOp, GPU, "ComplexAbs", functor::abs, complex64, complex128); // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel // registration requires all int32 inputs and outputs to be in host memory. +#ifndef MLIR_GENERATED_GPU_KERNELS_ENABLED REGISTER_KERNEL_BUILDER(Name("Abs") .Device(DEVICE_GPU) .HostMemory("x") @@ -34,6 +37,7 @@ REGISTER_KERNEL_BUILDER(Name("Abs") .TypeConstraint("T"), UnaryOp>); #endif +#endif #if TENSORFLOW_USE_SYCL REGISTER3(UnaryOp, SYCL, "Abs", functor::abs, float, double, int64); diff --git a/tensorflow/core/kernels/mlir_generated/BUILD b/tensorflow/core/kernels/mlir_generated/BUILD index fed63ce8433..79ccda50c87 100644 --- a/tensorflow/core/kernels/mlir_generated/BUILD +++ b/tensorflow/core/kernels/mlir_generated/BUILD @@ -26,11 +26,18 @@ config_setting( tf_kernel_library( name = "cwise_op", - gpu_srcs = ["cwise_op_gpu_tanh.cu.cc"], + gpu_srcs = [ + "cwise_op_gpu_base.cu.cc", + "cwise_op_gpu_base.cu.h", + "cwise_op_gpu_abs.cu.cc", + "cwise_op_gpu_tanh.cu.cc", + ], tags = ["manual"], deps = if_cuda([ + ":abs_kernels", ":tanh_kernels", "@com_google_absl//absl/strings", + "//third_party/eigen3", "@com_google_absl//absl/types:span", "//tensorflow/core:framework", "//tensorflow/core:lib", @@ -57,6 +64,25 @@ tf_cuda_cc_test( ], ) +tf_cuda_cc_test( + name = "gpu_abs_test", + size = "small", + srcs = if_mlir_generated_gpu_kernels_enabled(["gpu_abs_test.cc"]), + tags = tf_cuda_tests_tags() + ["no_rocm"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:tensorflow", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/common_runtime:device", + "//tensorflow/core/common_runtime:device_factory", + "//tensorflow/core/kernels:cwise_op", + "//tensorflow/core/kernels:ops_testutil", + ], +) + # TODO(b/160731748): Re-enable when it works again. # gen_kernel_library( # name = "bias_add", @@ -92,3 +118,17 @@ gen_kernel_library( ], unroll_factors = "4", ) + +gen_kernel_library( + name = "abs", + same_shape = "0,1", + tile_size = "256", + types = [ + "f16", + "f32", + "f64", + "i32", + "i64", + ], + unroll_factors = "4", +) diff --git a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_abs.cu.cc b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_abs.cu.cc new file mode 100644 index 00000000000..1920317a7ae --- /dev/null +++ b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_abs.cu.cc @@ -0,0 +1,40 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include + +#include "absl/types/span.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/kernels/mlir_generated/abs_f16_kernel.h" +#include "tensorflow/core/kernels/mlir_generated/abs_f32_kernel.h" +#include "tensorflow/core/kernels/mlir_generated/abs_f64_kernel.h" +#include "tensorflow/core/kernels/mlir_generated/abs_i32_kernel.h" +#include "tensorflow/core/kernels/mlir_generated/abs_i64_kernel.h" +#include "tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.cu.h" + +namespace tensorflow { +namespace { +GENERATE_OP_KERNEL_BASE(Abs); +} // namespace + +REGISTER_AND_GENERATE_KERNEL(Abs, F16, Eigen::half); +REGISTER_AND_GENERATE_KERNEL(Abs, F32, float); +REGISTER_AND_GENERATE_KERNEL(Abs, F64, double); +REGISTER_AND_GENERATE_KERNEL(Abs, I32, int32); +REGISTER_AND_GENERATE_KERNEL(Abs, I64, int64); +} // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.cu.cc b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.cu.cc new file mode 100644 index 00000000000..6287b93d964 --- /dev/null +++ b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.cu.cc @@ -0,0 +1,129 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.cu.h" + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "absl/synchronization/mutex.h" +#include "absl/types/span.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/stream_executor.h" + +namespace tensorflow { +namespace { +Status CreateKernel(absl::string_view kernel_name, uint64_t num_args, + absl::string_view ptx, absl::Span cubin_data, + se::StreamExecutor* stream_exec, + std::unique_ptr& kernel_base) { + se::MultiKernelLoaderSpec loader_spec(num_args); + + if (!cubin_data.empty()) { + loader_spec.AddCudaCubinInMemory( + reinterpret_cast(cubin_data.data()), kernel_name); + } + + kernel_base.reset(new se::KernelBase(stream_exec)); + return stream_exec->GetKernel(loader_spec, kernel_base.get()); +} + +struct LaunchConfig { + se::BlockDim blockDim; + se::ThreadDim threadDim; +}; + +LaunchConfig GetLaunchConfiguration(std::vector tile_sizes, + std::vector unrolling_factors, + std::vector shape) { + LaunchConfig result; + // Ensure the vectors are length 3 and pad with ones. + tile_sizes.resize(3, 1); + unrolling_factors.resize(3, 1); + shape.resize(3, 1); + // The number of threads is given by the tiling size. + result.threadDim = se::ThreadDim(tile_sizes[0], tile_sizes[1], tile_sizes[2]); + // We know that the kernel was generated by mapping the three outer-most + // dimensions to x,y,z dimensions. So we only need to compute those. + std::vector block_dims(3); + for (int i = 0; i < 3; ++i) { + // Compute the number of grids. We use ceildiv here as we have to allocate + // an extra thread/block if the division is not even. The kernel contains + // code to handle the boundaries. + uint64 number_of_threads = Eigen::divup(shape[i], unrolling_factors[i]); + int number_of_grids = Eigen::divup(number_of_threads, tile_sizes[i]); + block_dims[i] = number_of_grids; + } + result.blockDim = se::BlockDim(block_dims[0], block_dims[1], block_dims[2]); + return result; +} +} // namespace + +void MlirGeneratedUnaryOp::Compute(OpKernelContext* ctx) { + auto* stream = ctx->op_device_context()->stream(); + se::KernelBase* kernel; + { + absl::MutexLock l(&mu_); + if (!kernel_) { + OP_REQUIRES_OK(ctx, CreateKernel(name(), 10, "", cubin_data(), + stream->parent(), kernel_)); + } + kernel = kernel_.get(); + } + + const Tensor& inp = ctx->input(0); + Tensor* out = nullptr; + OP_REQUIRES_OK( + ctx, ctx->forward_input_or_allocate_output({0}, 0, inp.shape(), &out)); + + if (inp.NumElements() == 0) { + return; + } + + se::KernelArgsArray<10> args; + + args.add_device_memory_argument( + stream_executor::DeviceMemoryBase(inp.data(), inp.TotalBytes())); + args.add_device_memory_argument( + stream_executor::DeviceMemoryBase(inp.data(), inp.TotalBytes())); + args.add_argument(0); + args.add_argument(inp.NumElements()); + args.add_argument(1); + + args.add_device_memory_argument( + stream_executor::DeviceMemoryBase(out->data(), out->TotalBytes())); + args.add_device_memory_argument( + stream_executor::DeviceMemoryBase(out->data(), out->TotalBytes())); + args.add_argument(0); + args.add_argument(inp.NumElements()); + args.add_argument(1); + + // This has to be aligned with the configuration that was used when building + // the kernels. See the corresponding build rules in the `BUILD` file. + LaunchConfig config = GetLaunchConfiguration( + {256}, {4}, {static_cast(inp.NumElements())}); + OP_REQUIRES_OK(ctx, stream->parent()->Launch(stream, config.threadDim, + config.blockDim, *kernel, args)); +} + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.cu.h b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.cu.h new file mode 100644 index 00000000000..aef8164be57 --- /dev/null +++ b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.cu.h @@ -0,0 +1,80 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_KERNELS_MLIR_GENERATED_CWISE_OP_GPU_BASE_CU_H_ +#define TENSORFLOW_CORE_KERNELS_MLIR_GENERATED_CWISE_OP_GPU_BASE_CU_H_ + +#include +#include + +#include "absl/synchronization/mutex.h" +#include "absl/types/span.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/platform/stream_executor.h" + +namespace tensorflow { +class MlirGeneratedUnaryOp : public OpKernel { + public: + explicit MlirGeneratedUnaryOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override; + + protected: + virtual std::string name() const = 0; + virtual absl::Span cubin_data() const = 0; + + private: + std::unique_ptr kernel_; + absl::Mutex mu_; +}; + +#define GENERATE_OP_KERNEL_BASE(kernel_name) \ + class MlirGenerated##kernel_name##Op : public MlirGeneratedUnaryOp { \ + public: \ + explicit MlirGenerated##kernel_name##Op(OpKernelConstruction* ctx) \ + : MlirGeneratedUnaryOp(ctx) {} \ + \ + protected: \ + std::string name() const override { return #kernel_name "_kernel"; } \ + }; + +#define GENERATE_OP_KERNEL_FOR(kernel_name, data_type) \ + class MlirGenerated##kernel_name##data_type##Op \ + : public MlirGenerated##kernel_name##Op { \ + public: \ + explicit MlirGenerated##kernel_name##data_type##Op( \ + OpKernelConstruction* ctx) \ + : MlirGenerated##kernel_name##Op(ctx) {} \ + \ + private: \ + absl::Span cubin_data() const override { \ + return k##kernel_name##data_type##Kernel; \ + } \ + }; + +#define REGISTER_AND_GENERATE_KERNEL(kernel_name, data_type, native_data_type) \ + namespace { \ + GENERATE_OP_KERNEL_FOR(kernel_name, data_type) \ + } \ + REGISTER_KERNEL_BUILDER(Name(#kernel_name) \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T"), \ + MlirGenerated##kernel_name##data_type##Op); + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_KERNELS_MLIR_GENERATED_CWISE_OP_GPU_BASE_CU_H_ diff --git a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_tanh.cu.cc b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_tanh.cu.cc index a33008b9549..b113c4cad34 100644 --- a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_tanh.cu.cc +++ b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_tanh.cu.cc @@ -13,164 +13,24 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include +#include #include -#include "absl/strings/string_view.h" #include "absl/types/span.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.cu.h" #include "tensorflow/core/kernels/mlir_generated/tanh_f16_kernel.h" #include "tensorflow/core/kernels/mlir_generated/tanh_f32_kernel.h" #include "tensorflow/core/kernels/mlir_generated/tanh_f64_kernel.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/stream_executor.h" namespace tensorflow { namespace { -Status CreateKernel(absl::string_view kernel_name, uint64_t num_args, - absl::string_view ptx, absl::Span cubin_data, - se::StreamExecutor* stream_exec, - std::unique_ptr& kernel_base) { - se::MultiKernelLoaderSpec loader_spec(num_args); - - if (!cubin_data.empty()) { - loader_spec.AddCudaCubinInMemory( - reinterpret_cast(cubin_data.data()), kernel_name); - } - - kernel_base.reset(new se::KernelBase(stream_exec)); - return stream_exec->GetKernel(loader_spec, kernel_base.get()); -} - -struct LaunchConfig { - se::BlockDim blockDim; - se::ThreadDim threadDim; -}; - -LaunchConfig GetLaunchConfiguration(std::vector tile_sizes, - std::vector unrolling_factors, - std::vector shape) { - LaunchConfig result; - // Ensure the vectors are length 3 and pad with ones. - tile_sizes.resize(3, 1); - unrolling_factors.resize(3, 1); - shape.resize(3, 1); - // The number of threads is given by the tiling size. - result.threadDim = se::ThreadDim(tile_sizes[0], tile_sizes[1], tile_sizes[2]); - // We know that the kernel was generated by mapping the three outer-most - // dimensions to x,y,z dimensions. So we only need to compute those. - std::vector block_dims(3); - for (int i = 0; i < 3; ++i) { - // Compute the number of grids. We use ceildiv here as we have to allocate - // an extra thread/block if the division is not even. The kernel contains - // code to handle the boundaries. - int number_of_threads = - (shape[i] + unrolling_factors[i] - 1) / unrolling_factors[i]; - int number_of_grids = - (number_of_threads + tile_sizes[i] - 1) / tile_sizes[i]; - block_dims[i] = number_of_grids; - } - result.blockDim = se::BlockDim(block_dims[0], block_dims[1], block_dims[2]); - return result; -} - -class MlirGeneratedTanhOp : public OpKernel { - public: - explicit MlirGeneratedTanhOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} - - void Compute(OpKernelContext* ctx) override { - auto* stream = ctx->op_device_context()->stream(); - se::KernelBase* kernel; - { - std::lock_guard l(mu_); - if (!kernel_) { - OP_REQUIRES_OK(ctx, CreateKernel("tanh_kernel", 10, "", cubin_data_, - stream->parent(), kernel_)); - } - kernel = kernel_.get(); - } - - const Tensor& inp = ctx->input(0); - Tensor* out = nullptr; - OP_REQUIRES_OK( - ctx, ctx->forward_input_or_allocate_output({0}, 0, inp.shape(), &out)); - - if (inp.NumElements() == 0) { - return; - } - - se::KernelArgsArray<10> args; - - args.add_device_memory_argument( - stream_executor::DeviceMemoryBase(inp.data(), inp.TotalBytes())); - args.add_device_memory_argument( - stream_executor::DeviceMemoryBase(inp.data(), inp.TotalBytes())); - args.add_argument(0); - args.add_argument(inp.NumElements()); - args.add_argument(1); - - args.add_device_memory_argument( - stream_executor::DeviceMemoryBase(out->data(), out->TotalBytes())); - args.add_device_memory_argument( - stream_executor::DeviceMemoryBase(out->data(), out->TotalBytes())); - args.add_argument(0); - args.add_argument(inp.NumElements()); - args.add_argument(1); - - // This has to be aligned with the configuration that was used when - // generating the kernels. See the corresponding build rules in the `BUILD` - // file. - LaunchConfig config = GetLaunchConfiguration( - {256}, {4}, {static_cast(inp.NumElements())}); - OP_REQUIRES_OK( - ctx, stream->parent()->Launch(stream, config.threadDim, config.blockDim, - *kernel, args)); - } - - protected: - absl::Span cubin_data_; - - private: - std::unique_ptr kernel_; - std::mutex mu_; -}; - -class MlirGeneratedTanhF16Op : public MlirGeneratedTanhOp { - public: - explicit MlirGeneratedTanhF16Op(OpKernelConstruction* ctx) - : MlirGeneratedTanhOp(ctx) { - cubin_data_ = kTanhF16Kernel; - } -}; - -class MlirGeneratedTanhF32Op : public MlirGeneratedTanhOp { - public: - explicit MlirGeneratedTanhF32Op(OpKernelConstruction* ctx) - : MlirGeneratedTanhOp(ctx) { - cubin_data_ = kTanhF32Kernel; - } -}; - -class MlirGeneratedTanhF64Op : public MlirGeneratedTanhOp { - public: - explicit MlirGeneratedTanhF64Op(OpKernelConstruction* ctx) - : MlirGeneratedTanhOp(ctx) { - cubin_data_ = kTanhF64Kernel; - } -}; +GENERATE_OP_KERNEL_BASE(Tanh); } // namespace -REGISTER_KERNEL_BUILDER( - Name("Tanh").Device(DEVICE_GPU).TypeConstraint("T"), - MlirGeneratedTanhF16Op); -REGISTER_KERNEL_BUILDER( - Name("Tanh").Device(DEVICE_GPU).TypeConstraint("T"), - MlirGeneratedTanhF32Op); -REGISTER_KERNEL_BUILDER( - Name("Tanh").Device(DEVICE_GPU).TypeConstraint("T"), - MlirGeneratedTanhF64Op); +REGISTER_AND_GENERATE_KERNEL(Tanh, F16, Eigen::half) +REGISTER_AND_GENERATE_KERNEL(Tanh, F32, float) +REGISTER_AND_GENERATE_KERNEL(Tanh, F64, double) } // namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/gpu_abs_test.cc b/tensorflow/core/kernels/mlir_generated/gpu_abs_test.cc new file mode 100644 index 00000000000..ae76c023440 --- /dev/null +++ b/tensorflow/core/kernels/mlir_generated/gpu_abs_test.cc @@ -0,0 +1,95 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include + +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace { + +class GpuAbsTest : public OpsTestBase { + protected: + void SetUp() override { + std::unique_ptr device_gpu( + tensorflow::DeviceFactory::NewDevice("GPU", {}, + "/job:a/replica:0/task:0")); + SetDevice(tensorflow::DEVICE_GPU, std::move(device_gpu)); + } + template + void RunAbsOp(std::initializer_list input) { + TensorShape shape({2, 3}); + TF_ASSERT_OK(NodeDefBuilder("abs_op", "Abs") + .Input(FakeInput(DataTypeToEnum::v())) + .Attr("T", DataTypeToEnum::v()) + .Finalize(node_def())); + + TF_ASSERT_OK(InitOp()); + AddInputFromArray(shape, input); + TF_ASSERT_OK(RunOpKernel()); + + Tensor expected_tensor(allocator(), DataTypeToEnum::value, shape); + std::vector expected; + expected.reserve(input.size()); + for (const T& inp : input) { + expected.push_back(static_cast(std::abs(static_cast(inp)))); + } + test::FillValues(&expected_tensor, expected); + test::ExpectEqual(expected_tensor, *GetOutput(0)); + } +}; + +TEST_F(GpuAbsTest, AbsFloat) { + RunAbsOp({-std::numeric_limits::infinity(), -0.1f, -0.0f, 0.0f, + 0.1f, std::numeric_limits::infinity()}); +} + +TEST_F(GpuAbsTest, AbsDouble) { + RunAbsOp({-std::numeric_limits::infinity(), -0.1, -0.0, 0.0, + 0.1, std::numeric_limits::infinity()}); +} + +TEST_F(GpuAbsTest, AbsHalf) { + RunAbsOp( + {static_cast(-std::numeric_limits::infinity()), + static_cast(-0.1), static_cast(-0.0), + static_cast(0.0), static_cast(0.1), + static_cast(std::numeric_limits::infinity())}); +} + +TEST_F(GpuAbsTest, AbsInt32) { + RunAbsOp({std::numeric_limits::min(), + std::numeric_limits::min() + 1, -1, 0, 1, + std::numeric_limits::max()}); +} + +TEST_F(GpuAbsTest, AbsInt64) { + RunAbsOp({std::numeric_limits::min(), + std::numeric_limits::min() + 1, -1, 0, 1, + std::numeric_limits::max()}); +} + +} // namespace +} // end namespace tensorflow diff --git a/tensorflow/core/kernels/mlir_generated/op_definitions/abs.mlir.tmpl b/tensorflow/core/kernels/mlir_generated/op_definitions/abs.mlir.tmpl new file mode 100644 index 00000000000..ed58f6279ce --- /dev/null +++ b/tensorflow/core/kernels/mlir_generated/op_definitions/abs.mlir.tmpl @@ -0,0 +1,5 @@ +func @Abs(%arg0: tensor) -> tensor { + %0 = "tf.Abs"(%arg0) { } + : (tensor) -> tensor + return %0 : tensor +} diff --git a/tensorflow/core/kernels/mlir_generated/op_definitions/tanh.mlir.tmpl b/tensorflow/core/kernels/mlir_generated/op_definitions/tanh.mlir.tmpl index 73e32155e4b..3188e86a233 100644 --- a/tensorflow/core/kernels/mlir_generated/op_definitions/tanh.mlir.tmpl +++ b/tensorflow/core/kernels/mlir_generated/op_definitions/tanh.mlir.tmpl @@ -1,4 +1,4 @@ -func @tanh(%arg0: tensor) -> tensor { +func @Tanh(%arg0: tensor) -> tensor { %0 = "tf.Tanh"(%arg0) { } : (tensor) -> tensor return %0 : tensor From d4d06a0c28044326ad9acd0e38947bc021b3b7f0 Mon Sep 17 00:00:00 2001 From: abhichou4 Date: Wed, 22 Jul 2020 13:10:40 +0530 Subject: [PATCH 1023/2522] add batch accumulator --- tensorflow/c/eager/tape.h | 111 +++++++++++++++++++++- tensorflow/python/eager/pywrap_tfe_src.cc | 13 ++- 2 files changed, 121 insertions(+), 3 deletions(-) diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index 40cfa87dd66..8f08b212f87 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -252,7 +252,7 @@ class ForwardAccumulator { // // This method is not thread-safe (and in general ForwardAccumulator is not // thread-safe). - Status Accumulate( + virtual Status Accumulate( const string& op_type, const std::vector& input_tensors, const std::vector& output_tensors, gtl::ArraySlice input_tensor_id, @@ -335,6 +335,49 @@ class ForwardAccumulator { std::stack call_state_; }; +template +class ForwardBatchAccumulator + : public ForwardAccumulator { + public: + + bool ShouldRecord(gtl::ArraySlice tensor_ids, + gtl::ArraySlice dtypes); + + Status Accumulate( + const string& op_type, const std::vector& input_tensors, + const std::vector& output_tensors, + gtl::ArraySlice input_tensor_id, + gtl::ArraySlice input_dtypes, + const ForwardFunction* forward_function, + const std::function& backward_function_getter, + const std::function& backward_function_deleter) override; + + private: + Status ForwardpropFromTape( + const std::vector& output_tensors, + const std::function& backward_function_getter, + const std::function& backward_function_deleter, + const std::vector& in_grads, + std::vector* out_grads); + + std::unordered_map accumulated_gradients_; + + const VSpace& vspace_; + + struct AccumulatorCallState { + AccumulatorCallState( + GradientTape* backward_tape, + bool accumulating) + : backward_tape(backward_tape), accumulating(accumulating) {} + + GradientTape* backward_tape; + + bool accumulating; + }; + + std::stack call_state_; +}; + // Template instantiations here inline bool IsDtypeTrainable(DataType dtype) { @@ -1083,6 +1126,72 @@ Status ForwardAccumulator::Accumulate( return Status::OK(); } +template +Status ForwardBatchAccumulator::Accumulate( + const string& op_type, const std::vector& input_tensors, + const std::vector& output_tensors, + gtl::ArraySlice input_tensor_id, + gtl::ArraySlice input_dtypes, + const ForwardFunction* forward_function, + const std::function& backward_function_getter, + const std::function& backward_function_deleter) { + if (!ShouldRecord(input_tensor_id, input_dtypes)) { + return Status::OK(); + } + std::vector new_zeros; + auto delete_new_zeros = gtl::MakeCleanup([&new_zeros, this] { + for (Gradient* tensor : new_zeros) { + this->vspace_.DeleteGradient(tensor); + } + }); + std::vector in_grads; + in_grads.reserve(input_tensors.size()); + // is the shape of zero tensors fine here? + for (int target_index = 0; target_index < input_tensors.size(); + ++target_index) { + const auto current_grad = + accumulated_gradients_.find(input_tensors[target_index].GetID()); + if (current_grad == accumulated_gradients_.end()) { + if (IsDtypeTrainable(input_tensors[target_index].GetDType())) { + // ForwardAccumulator defaults to zeros for unwatched Tensors, unlike + // GradientTape which uses ones. + Gradient* zero = input_tensors[target_index].ZerosLike(); + new_zeros.push_back(zero); + in_grads.push_back(zero); + } else { + in_grads.push_back(nullptr); + } + } else { + in_grads.push_back(current_grad->second); + } + } + + std::vector forward_grads; + if (forward_function==nullptr) { + //Raise apt error + } + else { + TF_RETURN_IF_ERROR((*forward_function)(in_grads, &forward_grads)); + } + for (int i = 0; i < forward_grads.size(); ++i) { + if (forward_grads[i] != nullptr) { + int64 tensor_id = output_tensors[i].GetID(); + auto existing = accumulated_gradients_.find(tensor_id); + if (existing != accumulated_gradients_.end()) { + // This is a somewhat odd case to be in, since it means we have two + // operations which supposedly both created the same Tensor. It comes up + // in recompute_grad, where the gradients have the same value. However, + // only the original gradient is connected to everything else, so we + // should still use that. + vspace_.DeleteGradient(forward_grads[i]); + } else { + accumulated_gradients_[output_tensors[i].GetID()] = forward_grads[i]; + } + } + } + return Status::OK(); +} + template void ForwardAccumulator::Watch( int64 tensor_id, Gradient* tangent) { diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index dcaaafeda5c..de931433b98 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -1535,6 +1535,10 @@ typedef tensorflow::eager::ForwardAccumulator ForwardAccumulator; +typedef tensorflow::eager::ForwardAccumulator + ForwardBatchAccumulator; + // Incremented when a GradientTape or accumulator is newly added to a set, and // used to enforce an ordering between them. std::atomic_uint_fast64_t tape_nesting_id_counter(0); @@ -2797,7 +2801,7 @@ PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* target, return PyList_New(0); } -PyObject* TFE_Py_ForwardAccumulatorNew() { +PyObject* TFE_Py_ForwardAccumulatorNew(PyObject* use_batch) { TFE_Py_ForwardAccumulator_Type.tp_new = PyType_GenericNew; if (PyType_Ready(&TFE_Py_ForwardAccumulator_Type) < 0) return nullptr; TFE_Py_ForwardAccumulator* accumulator = @@ -2808,7 +2812,12 @@ PyObject* TFE_Py_ForwardAccumulatorNew() { "ForwardAccumulator requires a PyVSpace to be registered."), nullptr); } - accumulator->accumulator = new ForwardAccumulator(*py_vspace); + if (PyObject_IsTrue(use_batch)) { + accumulator->accumulator = new ForwardBatchAccumulator(*py_vspace); + } + else { + accumulator->accumulator = new ForwardAccumulator(*py_vspace); + } return reinterpret_cast(accumulator); } From 53c536bb4d24aa2bad123778095fa514943c7425 Mon Sep 17 00:00:00 2001 From: abhichou4 Date: Wed, 22 Jul 2020 13:44:41 +0530 Subject: [PATCH 1024/2522] change ForwardFunction def --- tensorflow/c/eager/tape.h | 8 ++++---- tensorflow/python/eager/pywrap_tfe_src.cc | 6 ++++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index 8f08b212f87..a1dd9666ee0 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -177,12 +177,12 @@ class GradientTape { template class ForwardFunction : public std::function&, - std::vector*)> { + std::vector*, bool)> { public: template explicit ForwardFunction(lambda_type lambda) : std::function&, - std::vector*)>(lambda) {} + std::vector*, bool)>(lambda) {} }; // Computes Jacobian-vector products using forward-mode automatic @@ -1105,7 +1105,7 @@ Status ForwardAccumulator::Accumulate( output_tensors, backward_function_getter, backward_function_deleter, in_grads, &forward_grads)); } else { - TF_RETURN_IF_ERROR((*forward_function)(in_grads, &forward_grads)); + TF_RETURN_IF_ERROR((*forward_function)(in_grads, &forward_grads, false)); } for (int i = 0; i < forward_grads.size(); ++i) { if (forward_grads[i] != nullptr) { @@ -1171,7 +1171,7 @@ Status ForwardBatchAccumulator::Accumula //Raise apt error } else { - TF_RETURN_IF_ERROR((*forward_function)(in_grads, &forward_grads)); + TF_RETURN_IF_ERROR((*forward_function)(in_grads, &forward_grads, true)); } for (int i = 0; i < forward_grads.size(); ++i) { if (forward_grads[i] != nullptr) { diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index de931433b98..f2f11b4775d 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -2559,7 +2559,8 @@ PyObject* TFE_Py_TapeSetRecordOperation(PyObject* op_type, } else { tensorflow::eager::ForwardFunction wrapped_forward_function( [forward_function](const std::vector& input_tangents, - std::vector* output_tangents) { + std::vector* output_tangents, + bool use_batch = false) { return CallOpSpecificJVPFunction(forward_function, input_tangents, output_tangents); }); @@ -3175,7 +3176,8 @@ PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs, tensorflow::eager::ForwardFunction py_forward_function( [op_name, attrs, inputs, results]( const std::vector& input_tangents, - std::vector* output_tangents) { + std::vector* output_tangents, + bool use_batch = false) { return CallJVPFunction(op_name, attrs, inputs, results, input_tangents, output_tangents); }); From 7c1f22635898f3ac09ec1edae7aad27eddc792aa Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Wed, 22 Jul 2020 15:41:56 +0700 Subject: [PATCH 1025/2522] Add DeleteDir --- .../filesystem/plugins/s3/s3_filesystem.cc | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 854aff7bcf0..0c92bd3dd61 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -1021,6 +1021,40 @@ void CreateDir(const TF_Filesystem* filesystem, const char* path, TF_SetStatus(status, TF_OK, ""); } +void DeleteDir(const TF_Filesystem* filesystem, const char* path, + TF_Status* status) { + Aws::String bucket, object; + ParseS3Path(path, false, &bucket, &object, status); + if (TF_GetCode(status) != TF_OK) return; + auto s3_file = static_cast(filesystem->plugin_filesystem); + GetS3Client(s3_file); + + if (object.back() != '/') object.push_back('/'); + Aws::S3::Model::ListObjectsRequest list_objects_request; + list_objects_request.WithBucket(bucket).WithPrefix(object).WithMaxKeys(2); + list_objects_request.SetResponseStreamFactory( + []() { return Aws::New(kS3FileSystemAllocationTag); }); + auto list_objects_outcome = + s3_file->s3_client->ListObjects(list_objects_request); + if (list_objects_outcome.IsSuccess()) { + auto contents = list_objects_outcome.GetResult().GetContents(); + if (contents.size() > 1 || + (contents.size() == 1 && contents[0].GetKey() != object)) { + TF_SetStatus(status, TF_UNKNOWN, + "Cannot delete a non-empty directory. " + "This operation will be retried in case this " + "is due to S3's eventual consistency."); + } + if (contents.size() == 1 && contents[0].GetKey() == object) { + Aws::String dir_path = path; + if (dir_path.back() != '/') dir_path.push_back('/'); + DeleteFile(filesystem, dir_path.c_str(), status); + } + } else { + TF_SetStatusFromAWSError(list_objects_outcome.GetError(), status); + } +} + // TODO(vnvo2409): Implement later } // namespace tf_s3_filesystem From 0e0bc42831fe37e9738a329c9cea2615dd0473aa Mon Sep 17 00:00:00 2001 From: abhichou4 Date: Wed, 22 Jul 2020 14:13:56 +0530 Subject: [PATCH 1026/2522] fix import error --- tensorflow/python/eager/forwardprop.py | 5 +++-- tensorflow/python/eager/pywrap_tfe.h | 2 +- tensorflow/python/tfe_wrapper.cc | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/eager/forwardprop.py b/tensorflow/python/eager/forwardprop.py index cd91295caab..2b6b355f100 100644 --- a/tensorflow/python/eager/forwardprop.py +++ b/tensorflow/python/eager/forwardprop.py @@ -326,7 +326,8 @@ class ForwardAccumulator(object): """ - def __init__(self, primals, tangents): + def __init__(self, primals, tangents, use_batch=False): + """Specify tensors to watch and their Jacobian-vector products. Mathematically, `tangents` is a vector right-multiplying the Jacobian matrix @@ -348,7 +349,7 @@ class ForwardAccumulator(object): ValueError: If the same tensor or variable is specified multiple times in `primals`. """ - self._accumulator = pywrap_tfe.TFE_Py_ForwardAccumulatorNew() + self._accumulator = pywrap_tfe.TFE_Py_ForwardAccumulatorNew(use_batch) self._recording = False primal_ids = set() for primal in nest.flatten(primals): diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h index a5c9c181539..8ab51138d72 100755 --- a/tensorflow/python/eager/pywrap_tfe.h +++ b/tensorflow/python/eager/pywrap_tfe.h @@ -284,7 +284,7 @@ PyObject* TFE_Py_RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* TFE_Py_TapeWatchedVariables(PyObject* tape); // Creates a new forward accumulator. Does not add it to the active set. -PyObject* TFE_Py_ForwardAccumulatorNew(); +PyObject* TFE_Py_ForwardAccumulatorNew(PyObject* use_batch); // Adds a ForwardAccumulator to the active set, meaning it will watch executed // operations. It must not already be in the active set. diff --git a/tensorflow/python/tfe_wrapper.cc b/tensorflow/python/tfe_wrapper.cc index bf11faaf89d..ee5ee02b8bd 100644 --- a/tensorflow/python/tfe_wrapper.cc +++ b/tensorflow/python/tfe_wrapper.cc @@ -730,8 +730,8 @@ PYBIND11_MODULE(_pywrap_tfe, m) { }); // TFE_Py_ForwardAccumulator logic. - m.def("TFE_Py_ForwardAccumulatorNew", []() { - return tensorflow::PyoOrThrow(TFE_Py_ForwardAccumulatorNew()); + m.def("TFE_Py_ForwardAccumulatorNew", [](const py::handle& use_batch) { + return tensorflow::PyoOrThrow(TFE_Py_ForwardAccumulatorNew(use_batch.ptr())); }); m.def("TFE_Py_ForwardAccumulatorSetAdd", [](const py::handle& accumulator) { From 822b68d7413236dcdc8cec6b11973accb4fe948a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Jul 2020 02:01:30 -0700 Subject: [PATCH 1027/2522] Update GraphDef version to 470. PiperOrigin-RevId: 322531876 Change-Id: Ic420e6d683661a7066b009e8db725dc10d13cc1f --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 12c7579fa7d..2bd7ef2307d 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 469 // Updated: 2020/7/21 +#define TF_GRAPH_DEF_VERSION 470 // Updated: 2020/7/22 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 8be37f687ba3d4b182a421e05edc6adc9f527688 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Jul 2020 02:01:32 -0700 Subject: [PATCH 1028/2522] compat: Update forward compatibility horizon to 2020-07-22 PiperOrigin-RevId: 322531879 Change-Id: Ifecb3bccadf1b6cb40393386b1e4dca08dd19148 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 9d79a790cfc..1f0dc8d3c02 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 21) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 22) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From d42d8c538d4d5a0f9ed570018e0e5bacc2b2c6b5 Mon Sep 17 00:00:00 2001 From: Terry Heo Date: Wed, 22 Jul 2020 02:07:52 -0700 Subject: [PATCH 1029/2522] tflite: Refactor core/macros.h, c/c_api.h and c/common.h - Move TFL_CAPI_EXPORT to c/common.h to make them available for delegates. - Create lite:macros module to make core/macros.h available for delegates. PiperOrigin-RevId: 322532576 Change-Id: I8cef05e5f85592e9b29c778a6c91e87b09f6f7cd --- tensorflow/lite/BUILD | 6 +++++- tensorflow/lite/c/c_api.h | 14 -------------- tensorflow/lite/c/common.h | 16 ++++++++++++++++ tensorflow/lite/delegates/flex/BUILD | 1 + tensorflow/lite/delegates/flex/delegate.cc | 3 ++- .../lite/delegates/gpu/cl/gpu_api_delegate.h | 14 -------------- tensorflow/lite/delegates/gpu/delegate.h | 14 -------------- tensorflow/lite/delegates/gpu/gl_delegate.h | 14 -------------- tensorflow/lite/delegates/gpu/metal_delegate.h | 14 +------------- .../lite/delegates/hexagon/hexagon_delegate.h | 14 -------------- .../tools/benchmark/experimental/c/c_api_types.h | 16 ++++++++++++++++ 11 files changed, 41 insertions(+), 85 deletions(-) diff --git a/tensorflow/lite/BUILD b/tensorflow/lite/BUILD index 1c0882ef0aa..61b9972c4d9 100644 --- a/tensorflow/lite/BUILD +++ b/tensorflow/lite/BUILD @@ -650,10 +650,14 @@ cc_test( cc_library( name = "shared_library", hdrs = ["shared_library.h"], - copts = TFLITE_DEFAULT_COPTS, linkopts = if_not_windows(["-ldl"]), ) +cc_library( + name = "macros", + hdrs = ["core/macros.h"], +) + # Shared lib target for convenience, pulls in the core runtime and builtin ops. # Note: This target is not yet finalized, and the exact set of exported (C/C++) # APIs is subject to change. The output library name is platform dependent: diff --git a/tensorflow/lite/c/c_api.h b/tensorflow/lite/c/c_api.h index 754fc3b8bbd..880b80e69b4 100644 --- a/tensorflow/lite/c/c_api.h +++ b/tensorflow/lite/c/c_api.h @@ -66,20 +66,6 @@ limitations under the License. /// TfLiteInterpreterOptionsDelete(options); /// TfLiteModelDelete(model); -#ifdef SWIG -#define TFL_CAPI_EXPORT -#else -#if defined(_WIN32) -#ifdef TFL_COMPILE_LIBRARY -#define TFL_CAPI_EXPORT __declspec(dllexport) -#else -#define TFL_CAPI_EXPORT __declspec(dllimport) -#endif // TFL_COMPILE_LIBRARY -#else -#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) -#endif // _WIN32 -#endif // SWIG - #ifdef __cplusplus extern "C" { #endif // __cplusplus diff --git a/tensorflow/lite/c/common.h b/tensorflow/lite/c/common.h index 0979c4dddf8..615b5fbaa45 100644 --- a/tensorflow/lite/c/common.h +++ b/tensorflow/lite/c/common.h @@ -233,6 +233,22 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a); } \ } while (0) +// Define TFL_CAPI_EXPORT macro to export a function properly with a shared +// library. +#ifdef SWIG +#define TFL_CAPI_EXPORT +#else +#if defined(_WIN32) +#ifdef TFL_COMPILE_LIBRARY +#define TFL_CAPI_EXPORT __declspec(dllexport) +#else +#define TFL_CAPI_EXPORT __declspec(dllimport) +#endif // TFL_COMPILE_LIBRARY +#else +#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) +#endif // _WIN32 +#endif // SWIG + // Single-precision complex data type compatible with the C99 definition. typedef struct TfLiteComplex64 { float re, im; // real and imaginary parts, respectively. diff --git a/tensorflow/lite/delegates/flex/BUILD b/tensorflow/lite/delegates/flex/BUILD index 8320ecebf9a..7039a9fa952 100644 --- a/tensorflow/lite/delegates/flex/BUILD +++ b/tensorflow/lite/delegates/flex/BUILD @@ -92,6 +92,7 @@ cc_library( "//tensorflow/lite/core/api", "//tensorflow/lite/c:common", "//tensorflow/lite:kernel_api", + "//tensorflow/lite:macros", "//tensorflow/lite:minimal_logging", "//tensorflow/lite:string", "//tensorflow/lite:string_util", diff --git a/tensorflow/lite/delegates/flex/delegate.cc b/tensorflow/lite/delegates/flex/delegate.cc index f85b5e60f91..4664ab34700 100644 --- a/tensorflow/lite/delegates/flex/delegate.cc +++ b/tensorflow/lite/delegates/flex/delegate.cc @@ -20,6 +20,7 @@ limitations under the License. #include "absl/strings/str_cat.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/lite/context_util.h" +#include "tensorflow/lite/core/macros.h" #include "tensorflow/lite/delegates/flex/buffer_map.h" #include "tensorflow/lite/delegates/flex/kernel.h" #include "tensorflow/lite/delegates/flex/util.h" @@ -30,7 +31,7 @@ limitations under the License. namespace tflite { // Corresponding weak declaration found in lite/interpreter_builder.cc. -#if !defined(_WIN32) +#if TFLITE_HAS_ATTRIBUTE_WEAK // If weak symbol is not supported (Windows), it can use // TF_AcquireFlexDelegate() path instead. TfLiteDelegateUniquePtr AcquireFlexDelegate() { diff --git a/tensorflow/lite/delegates/gpu/cl/gpu_api_delegate.h b/tensorflow/lite/delegates/gpu/cl/gpu_api_delegate.h index e10489cc99b..81d03a9a32d 100644 --- a/tensorflow/lite/delegates/gpu/cl/gpu_api_delegate.h +++ b/tensorflow/lite/delegates/gpu/cl/gpu_api_delegate.h @@ -28,20 +28,6 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/delegates/gpu/delegate.h" -#ifdef SWIG -#define TFL_CAPI_EXPORT -#else -#if defined(_WIN32) -#ifdef TFL_COMPILE_LIBRARY -#define TFL_CAPI_EXPORT __declspec(dllexport) -#else -#define TFL_CAPI_EXPORT __declspec(dllimport) -#endif // TFL_COMPILE_LIBRARY -#else -#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) -#endif // _WIN32 -#endif // SWIG - #ifdef __cplusplus extern "C" { #endif // __cplusplus diff --git a/tensorflow/lite/delegates/gpu/delegate.h b/tensorflow/lite/delegates/gpu/delegate.h index f03392d9a3c..9af586bfd75 100644 --- a/tensorflow/lite/delegates/gpu/delegate.h +++ b/tensorflow/lite/delegates/gpu/delegate.h @@ -20,20 +20,6 @@ limitations under the License. #include "tensorflow/lite/c/common.h" -#ifdef SWIG -#define TFL_CAPI_EXPORT -#else -#if defined(_WIN32) -#ifdef TFL_COMPILE_LIBRARY -#define TFL_CAPI_EXPORT __declspec(dllexport) -#else -#define TFL_CAPI_EXPORT __declspec(dllimport) -#endif // TFL_COMPILE_LIBRARY -#else -#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) -#endif // _WIN32 -#endif // SWIG - #ifdef __cplusplus extern "C" { #endif // __cplusplus diff --git a/tensorflow/lite/delegates/gpu/gl_delegate.h b/tensorflow/lite/delegates/gpu/gl_delegate.h index fa8eec2ad6b..e6efd646fc3 100644 --- a/tensorflow/lite/delegates/gpu/gl_delegate.h +++ b/tensorflow/lite/delegates/gpu/gl_delegate.h @@ -22,20 +22,6 @@ limitations under the License. #include "absl/base/macros.h" #include "tensorflow/lite/c/common.h" -#ifdef SWIG -#define TFL_CAPI_EXPORT -#else -#if defined(_WIN32) -#ifdef TFL_COMPILE_LIBRARY -#define TFL_CAPI_EXPORT __declspec(dllexport) -#else -#define TFL_CAPI_EXPORT __declspec(dllimport) -#endif // TFL_COMPILE_LIBRARY -#else -#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) -#endif // _WIN32 -#endif // SWIG - #ifdef __cplusplus extern "C" { #endif // __cplusplus diff --git a/tensorflow/lite/delegates/gpu/metal_delegate.h b/tensorflow/lite/delegates/gpu/metal_delegate.h index 1cb660c42d0..e4bdba36799 100644 --- a/tensorflow/lite/delegates/gpu/metal_delegate.h +++ b/tensorflow/lite/delegates/gpu/metal_delegate.h @@ -16,19 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_LITE_DELEGATES_GPU_METAL_DELEGATE_H_ #define TENSORFLOW_LITE_DELEGATES_GPU_METAL_DELEGATE_H_ -#ifdef SWIG -#define TFL_CAPI_EXPORT -#else -#if defined(_WIN32) -#ifdef TFL_COMPILE_LIBRARY -#define TFL_CAPI_EXPORT __declspec(dllexport) -#else -#define TFL_CAPI_EXPORT __declspec(dllimport) -#endif // TFL_COMPILE_LIBRARY -#else -#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) -#endif // _WIN32 -#endif // SWIG +#include "tensorflow/lite/c/common.h" #ifdef __cplusplus extern "C" { diff --git a/tensorflow/lite/delegates/hexagon/hexagon_delegate.h b/tensorflow/lite/delegates/hexagon/hexagon_delegate.h index 264a132b995..931b02e4fa5 100644 --- a/tensorflow/lite/delegates/hexagon/hexagon_delegate.h +++ b/tensorflow/lite/delegates/hexagon/hexagon_delegate.h @@ -17,20 +17,6 @@ limitations under the License. #include "tensorflow/lite/c/common.h" -#ifdef SWIG -#define TFL_CAPI_EXPORT -#else -#if defined(_WIN32) -#ifdef TFL_COMPILE_LIBRARY -#define TFL_CAPI_EXPORT __declspec(dllexport) -#else -#define TFL_CAPI_EXPORT __declspec(dllimport) -#endif // TFL_COMPILE_LIBRARY -#else -#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) -#endif // _WIN32 -#endif // SWIG - #ifdef __cplusplus extern "C" { #endif // __cplusplus diff --git a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h index 0979c4dddf8..615b5fbaa45 100644 --- a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h +++ b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h @@ -233,6 +233,22 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a); } \ } while (0) +// Define TFL_CAPI_EXPORT macro to export a function properly with a shared +// library. +#ifdef SWIG +#define TFL_CAPI_EXPORT +#else +#if defined(_WIN32) +#ifdef TFL_COMPILE_LIBRARY +#define TFL_CAPI_EXPORT __declspec(dllexport) +#else +#define TFL_CAPI_EXPORT __declspec(dllimport) +#endif // TFL_COMPILE_LIBRARY +#else +#define TFL_CAPI_EXPORT __attribute__((visibility("default"))) +#endif // _WIN32 +#endif // SWIG + // Single-precision complex data type compatible with the C99 definition. typedef struct TfLiteComplex64 { float re, im; // real and imaginary parts, respectively. From 2e1efbaf206a044dbc3164e72525a5b047edc1c5 Mon Sep 17 00:00:00 2001 From: Jaesung Chung Date: Wed, 22 Jul 2020 03:11:12 -0700 Subject: [PATCH 1030/2522] Convert TF Broadcast op to the corresponding TFL Broadcast op when 5+ dimension inputs are given. Low dimension, where its rank is at most 4, will be handled by the Mul op as usual in order not to break acceleration support. PiperOrigin-RevId: 322539601 Change-Id: Ic4b249b64ef02aecef57f0ac7a0354e7c67a8799 --- tensorflow/compiler/mlir/lite/BUILD | 23 ++++ tensorflow/compiler/mlir/lite/ir/tfl_ops.td | 51 ++++++++ .../legalize-tf-no-runtime-verification.mlir | 6 +- .../compiler/mlir/lite/tests/legalize-tf.mlir | 12 +- tensorflow/compiler/mlir/lite/tests/ops.mlir | 18 +++ .../compiler/mlir/lite/tests/prepare-tf.mlir | 67 +++++++++++ .../mlir/lite/transforms/legalize_patterns.td | 3 + .../mlir/lite/transforms/legalize_tf.cc | 112 +----------------- .../mlir/lite/transforms/prepare_tf.cc | 43 ++++++- .../mlir/lite/utils/constant_utils.cc | 112 ++++++++++++++++++ .../compiler/mlir/lite/utils/constant_utils.h | 35 ++++++ 11 files changed, 359 insertions(+), 123 deletions(-) create mode 100644 tensorflow/compiler/mlir/lite/utils/constant_utils.cc create mode 100644 tensorflow/compiler/mlir/lite/utils/constant_utils.h diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD index 8a60b292bc2..8a9704e21a8 100644 --- a/tensorflow/compiler/mlir/lite/BUILD +++ b/tensorflow/compiler/mlir/lite/BUILD @@ -237,6 +237,28 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "constant_utils", + srcs = [ + "utils/constant_utils.cc", + ], + hdrs = [ + "utils/constant_utils.h", + ], + copts = ["-std=c++14"], + deps = [ + "//tensorflow/compiler/mlir/tensorflow", + "//tensorflow/compiler/mlir/tensorflow:mangling_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/platform:status", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:StandardOps", + "@llvm-project//mlir:Support", + ], +) + cc_library( name = "lstm_utils", srcs = [ @@ -346,6 +368,7 @@ cc_library( "transforms/passes.h", ], deps = [ + ":constant_utils", ":lstm_utils", ":stateful_ops_utils", ":tensorflow_lite", diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td index 715d047f0bf..3dbfdfc5e04 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td @@ -4345,4 +4345,55 @@ def TFL_CustomTfOp : Op:$body); } +def TFL_BroadcastToOp : TFL_Op<"broadcast_to", [ + PredOpTrait<"input and output must have same element type", + TFL_TCresVTEtIsSameAsOp<0, 0>>, + TFL_OperandHasRankAtMost<0, 8>, + TFL_OperandHasRank<1, 1>, + PredOpTrait<"output dimension count must be at most 8", + Or<[TFL_OperandIsUnrankedPred<1>, + TFL_OperandDimIsAtMost<1, 0, 8>]>>, + NoSideEffect]> { + let summary = "Broadcast an array for a compatible shape."; + + let description = [{ +Broadcasting is the process of making arrays to have compatible shapes +for arithmetic operations. Two shapes are compatible if for each +dimension pair they are either equal or one of them is one. When trying +to broadcast a Tensor to a shape, it starts with the trailing dimensions, +and works its way forward. + +For example, + +>>> x = tf.constant([1, 2, 3]) +>>> y = tf.broadcast_to(x, [3, 3]) +>>> print(y) +tf.Tensor( + [[1 2 3] + [1 2 3] + [1 2 3]], shape=(3, 3), dtype=int32) + +In the above example, the input Tensor with the shape of `[1, 3]` +is broadcasted to output Tensor with shape of `[3, 3]`. + +When doing broadcasted operations such as multiplying a tensor +by a scalar, broadcasting (usually) confers some time or space +benefit, as the broadcasted tensor is never materialized. + +However, `broadcast_to` does not carry with it any such benefits. +The newly-created tensor takes the full memory of the broadcasted +shape. (In a graph context, `broadcast_to` might be fused to +subsequent operation and then be optimized away, however.) + }]; + + let arguments = (ins + TFL_TensorOf<[F32, I32, I1, I8, QI8, UI8, QUI8, I16, QI16, I64, Complex>]>:$input, + TFL_I32OrI64Tensor:$shape + ); + + let results = (outs + TFL_TensorOf<[F32, I32, I1, I8, QI8, UI8, QUI8, I16, QI16, I64, Complex>]>:$output + ); +} + #endif // TFL_OPS diff --git a/tensorflow/compiler/mlir/lite/tests/legalize-tf-no-runtime-verification.mlir b/tensorflow/compiler/mlir/lite/tests/legalize-tf-no-runtime-verification.mlir index 90266b4e78e..1e1e9b365de 100644 --- a/tensorflow/compiler/mlir/lite/tests/legalize-tf-no-runtime-verification.mlir +++ b/tensorflow/compiler/mlir/lite/tests/legalize-tf-no-runtime-verification.mlir @@ -5,8 +5,6 @@ func @broadcast_to_bf16(%arg0: tensor<3xbf16>, %arg1: tensor<2xi64>) -> tensor<3 return %0: tensor<3x3xbf16> // CHECK-LABEL: broadcast_to_bf16 -// CHECK: [[CST:%.*]] = constant dense<1.000000e+00> : tensor -// CHECK: [[FILL:%.*]] = "tfl.fill"(%arg1, [[CST]]) : (tensor<2xi64>, tensor) -> tensor<3x3xbf16> -// CHECK: [[MUL:%.*]] = "tfl.mul"(%arg0, [[FILL]]) {fused_activation_function = "NONE"} : (tensor<3xbf16>, tensor<3x3xbf16>) -> tensor<3x3xbf16> -// CHECK: return [[MUL]] : tensor<3x3xbf16> +// CHECK: [[BCT:%.*]] = "tfl.broadcast_to"(%arg0, %arg1) : (tensor<3xbf16>, tensor<2xi64>) -> tensor<3x3xbf16> +// CHECK: return [[BCT]] : tensor<3x3xbf16> } diff --git a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir index 7cb9c4dd22c..74a33817d32 100644 --- a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir @@ -1487,10 +1487,8 @@ func @broadcast_to_f32(%arg0: tensor<3xf32>, %arg1: tensor<2xi32>) -> tensor<3x3 return %0: tensor<3x3xf32> // CHECK-LABEL: broadcast_to_f32 -// CHECK: [[CST:%.*]] = constant dense<1.000000e+00> : tensor -// CHECK: [[FILL:%.*]] = "tfl.fill"(%arg1, [[CST]]) : (tensor<2xi32>, tensor) -> tensor<3x3xf32> -// CHECK: [[MUL:%.*]] = "tfl.mul"(%arg0, [[FILL]]) {fused_activation_function = "NONE"} : (tensor<3xf32>, tensor<3x3xf32>) -> tensor<3x3xf32> -// CHECK: return [[MUL]] : tensor<3x3xf32> +// CHECK: [[BCT:%.*]] = "tfl.broadcast_to"(%arg0, %arg1) : (tensor<3xf32>, tensor<2xi32>) -> tensor<3x3xf32> +// CHECK: return [[BCT]] : tensor<3x3xf32> } func @broadcast_to_i32(%input: tensor<3xi32>, %shape: tensor<2xi32>) -> tensor<3x3xi32> { @@ -1498,10 +1496,8 @@ func @broadcast_to_i32(%input: tensor<3xi32>, %shape: tensor<2xi32>) -> tensor<3 return %0: tensor<3x3xi32> // CHECK-LABEL: broadcast_to_i32 -// CHECK: [[CST:%.*]] = constant dense<1> : tensor -// CHECK: [[FILL:%.*]] = "tfl.fill"(%arg1, [[CST]]) : (tensor<2xi32>, tensor) -> tensor<3x3xi32> -// CHECK: [[MUL:%.*]] = "tfl.mul"(%arg0, [[FILL]]) {fused_activation_function = "NONE"} : (tensor<3xi32>, tensor<3x3xi32>) -> tensor<3x3xi32> -// CHECK: return [[MUL]] : tensor<3x3xi32> +// CHECK: [[BCT:%.*]] = "tfl.broadcast_to"(%arg0, %arg1) : (tensor<3xi32>, tensor<2xi32>) -> tensor<3x3xi32> +// CHECK: return [[BCT]] : tensor<3x3xi32> } func @matmul_batch(%arg0: tensor<10x15xf32>, %arg1: tensor<15x17xf32>) -> tensor<10x17xf32> { diff --git a/tensorflow/compiler/mlir/lite/tests/ops.mlir b/tensorflow/compiler/mlir/lite/tests/ops.mlir index 06e05987ee6..c10bd26e50a 100644 --- a/tensorflow/compiler/mlir/lite/tests/ops.mlir +++ b/tensorflow/compiler/mlir/lite/tests/ops.mlir @@ -2310,3 +2310,21 @@ func @main(%arg0: tensor, %arg1: tensor<1xf32>) -> tensor { }) : (tensor, tensor<1xf32>) -> (tensor) return %0#0 : tensor } + +// ----- + +// CHECK-LABEL: testBroadcastToWithI32ShapeTensor +func @testBroadcastToWithI32ShapeTensor(tensor, tensor<8xi32>) -> tensor { +^bb0(%arg0: tensor, %arg1: tensor<8xi32>): + // CHECK: "tfl.broadcast_to"(%arg0, %arg1) + %0 = "tfl.broadcast_to"(%arg0, %arg1): (tensor, tensor<8xi32>) -> tensor + return %0 : tensor +} + +// CHECK-LABEL: testBroadcastToWithI64ShapeTensor +func @testBroadcastToWithI64ShapeTensor(tensor, tensor<8xi64>) -> tensor { +^bb0(%arg0: tensor, %arg1: tensor<8xi64>): + // CHECK: "tfl.broadcast_to"(%arg0, %arg1) + %0 = "tfl.broadcast_to"(%arg0, %arg1): (tensor, tensor<8xi64>) -> tensor + return %0 : tensor +} diff --git a/tensorflow/compiler/mlir/lite/tests/prepare-tf.mlir b/tensorflow/compiler/mlir/lite/tests/prepare-tf.mlir index 7b51ec32f89..e8cbcc8d3b3 100644 --- a/tensorflow/compiler/mlir/lite/tests/prepare-tf.mlir +++ b/tensorflow/compiler/mlir/lite/tests/prepare-tf.mlir @@ -578,3 +578,70 @@ func @MatrixSetDiagV3Conversion(%arg0: tensor<3x3xi32>, %arg1: tensor<3xi32>) -> // CHECK: %[[RES:.*]] = "tf.MatrixSetDiag"(%arg0, %arg1) : (tensor<3x3xi32>, tensor<3xi32>) -> tensor<3x3xi32> // CHECK: return %[[RES]] } + +func @broadcast_to_f32_low_dim(%arg0: tensor<3xf32>, %arg1: tensor<2xi32>) -> tensor<3x3xf32> { + %0 = "tf.BroadcastTo"(%arg0, %arg1) : (tensor<3xf32>, tensor<2xi32>) -> tensor<3x3xf32> + return %0: tensor<3x3xf32> + +// CHECK-LABEL: broadcast_to_f32_low_dim +// CHECK: [[CST:%.*]] = constant dense<1.000000e+00> : tensor<3x3xf32> +// CHECK: [[MUL:%.*]] = "tf.Mul"(%arg0, [[CST]]) : (tensor<3xf32>, tensor<3x3xf32>) -> tensor<3x3xf32> +// CHECK: return [[MUL]] : tensor<3x3xf32> +} + +func @broadcast_to_i32_low_dim(%input: tensor<3xi32>, %shape: tensor<2xi32>) -> tensor<3x3xi32> { + %0 = "tf.BroadcastTo"(%input, %shape) : (tensor<3xi32>, tensor<2xi32>) -> tensor<3x3xi32> + return %0: tensor<3x3xi32> + +// CHECK-LABEL: broadcast_to_i32_low_dim +// CHECK: [[CST:%.*]] = constant dense<1> : tensor<3x3xi32> +// CHECK: [[MUL:%.*]] = "tf.Mul"(%arg0, [[CST]]) : (tensor<3xi32>, tensor<3x3xi32>) -> tensor<3x3xi32> +// CHECK: return [[MUL]] : tensor<3x3xi32> +} + +func @broadcast_to_low_dim_with_unknown_shape(%arg0: tensor<3xf32>, %arg1: tensor<*xi32>) -> tensor<3x3xf32> { + %0 = "tf.BroadcastTo"(%arg0, %arg1) : (tensor<3xf32>, tensor<*xi32>) -> tensor<3x3xf32> + return %0: tensor<3x3xf32> + +// CHECK-LABEL: broadcast_to_low_dim_with_unknown_shape +// CHECK: [[CST:%.*]] = constant dense<1.000000e+00> : tensor<3x3xf32> +// CHECK: [[MUL:%.*]] = "tf.Mul"(%arg0, [[CST]]) : (tensor<3xf32>, tensor<3x3xf32>) -> tensor<3x3xf32> +// CHECK: return [[MUL]] : tensor<3x3xf32> +} + +func @broadcast_to_i32_low_dim_with_unknown_output(%input: tensor<3xi32>, %shape: tensor<2xi32>) -> tensor<*xi32> { + %0 = "tf.BroadcastTo"(%input, %shape) : (tensor<3xi32>, tensor<2xi32>) -> tensor<*xi32> + return %0: tensor<*xi32> + +// CHECK-LABEL: broadcast_to_i32_low_dim_with_unknown_output +// CHECK: [[CST:%.*]] = constant dense<1> : tensor +// CHECK: [[FILL:%.*]] = "tf.Fill"(%arg1, [[CST]]) : (tensor<2xi32>, tensor) -> tensor<*xi32> +// CHECK: [[MUL:%.*]] = "tf.Mul"(%arg0, [[FILL]]) : (tensor<3xi32>, tensor<*xi32>) -> tensor<*xi32> +// CHECK: return [[MUL]] : tensor<*xi32> +} + +func @broadcast_to_high_dim_with_unknown_shape(%arg0: tensor<1x2x3x4x5x6xf32>, %arg1: tensor<*xi32>) -> tensor<7x8x1x2x3x4x5x6xf32> { + %0 = "tf.BroadcastTo"(%arg0, %arg1) : (tensor<1x2x3x4x5x6xf32>, tensor<*xi32>) -> tensor<7x8x1x2x3x4x5x6xf32> + return %0: tensor<7x8x1x2x3x4x5x6xf32> + +// CHECK-LABEL: broadcast_to_high_dim_with_unknown_shape +// CHECK: [[BCT:%.*]] = "tf.BroadcastTo"(%arg0, %arg1) : (tensor<1x2x3x4x5x6xf32>, tensor<*xi32>) -> tensor<7x8x1x2x3x4x5x6xf32> +// CHECK: return [[BCT]] : tensor<7x8x1x2x3x4x5x6xf32> +} + +func @broadcast_to_high_dim_with_unknown_output(%arg0: tensor<1x2x3x4x5x6xf32>, %arg1: tensor<8xi32>) -> tensor<*xf32> { + %0 = "tf.BroadcastTo"(%arg0, %arg1) : (tensor<1x2x3x4x5x6xf32>, tensor<8xi32>) -> tensor<*xf32> + return %0: tensor<*xf32> + +// CHECK-LABEL: broadcast_to_high_dim_with_unknown_output +// CHECK: [[BCT:%.*]] = "tf.BroadcastTo"(%arg0, %arg1) : (tensor<1x2x3x4x5x6xf32>, tensor<8xi32>) -> tensor<*xf32> +// CHECK: return [[BCT]] : tensor<*xf32> +} + +func @broadcast_to_with_unknown_shape_and_output(%arg0: tensor<1x2x3x4x5x6xf32>, %arg1: tensor<*xi32>) -> tensor<*xf32> { + %0 = "tf.BroadcastTo"(%arg0, %arg1) : (tensor<1x2x3x4x5x6xf32>, tensor<*xi32>) -> tensor<*xf32> + return %0: tensor<*xf32> + +// CHECK-LABEL: broadcast_to_with_unknown_shape_and_output +// CHECK: "tf.BroadcastTo"(%arg0, %arg1) +} diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td index 47cfaecd3fb..a6adb8f4a61 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td @@ -109,6 +109,9 @@ def LegalizeArgMax : Pat<(TF_ArgMaxOp $input, $dim), def LegalizeArgMin : Pat<(TF_ArgMinOp $input, $dim), (TFL_ArgMinOp $input, $dim)>; +def LegalizeBroadcastTo : Pat<(TF_BroadcastToOp $input, $dim), + (TFL_BroadcastToOp $input, $dim)>; + def LegalizeCeil : Pat<(TF_CeilOp $arg), (TFL_CeilOp $arg)>; def LegalizeCos : Pat<(TF_CosOp $arg), (TFL_CosOp $arg)>; diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc index 1328a2baf5d..7d6866dc570 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc @@ -45,6 +45,7 @@ limitations under the License. #include "tensorflow/compiler/mlir/lite/quantization/quantization_utils.h" #include "tensorflow/compiler/mlir/lite/transforms/passes.h" #include "tensorflow/compiler/mlir/lite/utils/attribute_utils.h" +#include "tensorflow/compiler/mlir/lite/utils/constant_utils.h" #include "tensorflow/compiler/mlir/lite/utils/validators.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" #include "tensorflow/compiler/mlir/tensorflow/utils/mangling_util.h" @@ -137,7 +138,6 @@ DECL_CONVERT_OP(StridedSlice); DECL_CONVERT_OP(Unpack); DECL_CONVERT_OP(Reciprocal); DECL_CONVERT_OP(RandomUniform); -DECL_CONVERT_OP(BroadcastTo); #undef DECL_CONVERT_OP @@ -483,89 +483,6 @@ LogicalResult ConvertTFAssertOp::matchAndRewrite( return success(); } -StatusOr CreateConstOpWithSingleValue(PatternRewriter* rewriter, - Location loc, - ShapedType shaped_type, - int value) { - Type element_type = shaped_type.getElementType(); - ShapedType scalar_type = RankedTensorType::get({}, element_type); - Attribute attr; - switch (element_type.getKind()) { - case mlir::StandardTypes::F16: { - auto floatType = mlir::FloatType::getF16(element_type.getContext()); - auto floatAttr = - mlir::FloatAttr::get(floatType, static_cast(value)); - std::vector floatValues({floatAttr}); - attr = DenseElementsAttr::get(scalar_type, floatValues); - break; - } - case mlir::StandardTypes::BF16: { - auto floatType = mlir::FloatType::getBF16(element_type.getContext()); - auto floatAttr = - mlir::FloatAttr::get(floatType, static_cast(value)); - std::vector floatValues({floatAttr}); - attr = DenseElementsAttr::get(scalar_type, floatValues); - break; - } - case mlir::StandardTypes::F32: { - attr = - DenseElementsAttr::get(scalar_type, static_cast(value)); - break; - } - case mlir::StandardTypes::Complex: { - auto etype = element_type.cast().getElementType(); - if (etype.isF32()) { - auto dialect = etype.getContext()->getRegisteredDialect("tf"); - tensorflow::TensorProto repr; - repr.set_dtype(tensorflow::DT_COMPLEX64); - - tensorflow::TensorShapeProto* shape = repr.mutable_tensor_shape(); - shape->set_unknown_rank(false); - shape->add_dim()->set_size(int64_t{1}); - std::string content; - auto complex_value = - std::complex(static_cast(value), 0.0f); - content.assign(reinterpret_cast(&complex_value), - sizeof(complex_value)); - repr.set_tensor_content(content); - std::string mangled = tensorflow::mangling_util::MangleTensor(repr); - - attr = mlir::OpaqueElementsAttr::get(dialect, scalar_type, mangled); - break; - } - return Status(tensorflow::error::INVALID_ARGUMENT, "Unsupported type"); - } - case mlir::StandardTypes::Integer: { - const auto& itype = element_type.cast(); - switch (itype.getWidth()) { - case 8: - attr = DenseElementsAttr::get(scalar_type, - static_cast(value)); - break; - case 16: - attr = DenseElementsAttr::get(scalar_type, - static_cast(value)); - break; - case 32: - attr = DenseElementsAttr::get(scalar_type, - static_cast(value)); - break; - case 64: - attr = DenseElementsAttr::get(scalar_type, - static_cast(value)); - break; - default: - return Status(tensorflow::error::INVALID_ARGUMENT, - "Unsupported type"); - } - break; - } - default: - return Status(tensorflow::error::INVALID_ARGUMENT, "Unsupported type"); - } - return rewriter->create(loc, scalar_type, attr); -} - LogicalResult ConvertTFReciprocalOp::matchAndRewrite( Operation* op, PatternRewriter& rewriter) const { auto tf_reciprocal_op = cast(op); @@ -586,31 +503,6 @@ LogicalResult ConvertTFReciprocalOp::matchAndRewrite( return success(); } -LogicalResult ConvertTFBroadcastToOp::matchAndRewrite( - Operation* op, PatternRewriter& rewriter) const { - auto tf_broadcast_to_op = cast(op); - auto element_type = tf_broadcast_to_op.input().getType().cast(); - auto output_type = tf_broadcast_to_op.output().getType(); - - auto status_or_const_op = - CreateConstOpWithSingleValue(&rewriter, op->getLoc(), element_type, 1); - if (!status_or_const_op.ok()) { - return failure(); - } - - auto tfl_fill_op = rewriter.create( - op->getLoc(), output_type, tf_broadcast_to_op.shape(), - status_or_const_op.ValueOrDie()); - - StringAttr fused_activation_function = - StringAttr::get("NONE", rewriter.getContext()); - - rewriter.replaceOpWithNewOp( - op, output_type, tf_broadcast_to_op.input(), tfl_fill_op, - fused_activation_function); - return success(); -} - // Legalize unidirectional sequence lstm. struct LegalizeUnidirectionalSequenceLstm : public RewritePattern { explicit LegalizeUnidirectionalSequenceLstm(MLIRContext* context) @@ -751,7 +643,7 @@ void LegalizeTF::runOnFunction() { ConvertTFMatrixDiagV3Op, ConvertTFPackOp, ConvertTFReshapeOp, ConvertTFSplitOp, ConvertTFSplitVOp, ConvertTFStridedSliceOp, ConvertTFUnpackOp, ConvertTFAssertOp, ConvertTFReciprocalOp, - ConvertTFRandomUniformOp, ConvertTFBroadcastToOp>(context); + ConvertTFRandomUniformOp>(context); // Ophint python converter converted tf node pattern. patterns.insert(op); + auto input_type = tf_broadcast_to_op.input().getType().cast(); + auto output_type = tf_broadcast_to_op.output().getType().cast(); + auto shape_type = tf_broadcast_to_op.shape().getType().cast(); + Type element_type = input_type.getElementType(); + + // Allow lowering when low dimension inputs are given and its type is F32 or + // I32. + if (!((output_type.hasRank() && output_type.getRank() <= 4) || + (shape_type.hasStaticShape() && shape_type.getRank() == 1 && + shape_type.getDimSize(0) <= 4))) + return failure(); + if (!((element_type.getKind() == mlir::StandardTypes::F32) || + (element_type.getKind() == mlir::StandardTypes::Integer && + element_type.cast().getWidth() == 32))) + return failure(); + + auto status_or_const_op = + CreateConstOpWithSingleValue(&rewriter, op->getLoc(), input_type, 1); + if (!status_or_const_op.ok()) { + return failure(); + } + + auto tf_fill_op = rewriter.create( + op->getLoc(), output_type, tf_broadcast_to_op.shape(), + status_or_const_op.ValueOrDie()); + + auto mul_op = rewriter.create( + op->getLoc(), output_type, tf_broadcast_to_op.input(), tf_fill_op); + rewriter.replaceOp(op, mul_op.getResult()); + return success(); + } +}; + #include "tensorflow/compiler/mlir/lite/transforms/generated_prepare_tf.inc" // Returns success if all the operations in the `op`'s regions including `op` @@ -739,7 +780,7 @@ void PrepareTFPass::runOnFunction() { patterns.insert, TF::ConvertTFBatchMatMulOp>(ctx); } - patterns.insert(ctx); applyPatternsAndFoldGreedily(func, patterns); } diff --git a/tensorflow/compiler/mlir/lite/utils/constant_utils.cc b/tensorflow/compiler/mlir/lite/utils/constant_utils.cc new file mode 100644 index 00000000000..d244fa9d6e4 --- /dev/null +++ b/tensorflow/compiler/mlir/lite/utils/constant_utils.cc @@ -0,0 +1,112 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/lite/utils/constant_utils.h" + +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/utils/mangling_util.h" +#include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/platform/status.h" + +namespace mlir { +namespace TFL { + +xla::StatusOr CreateConstOpWithSingleValue( + PatternRewriter* rewriter, Location loc, ShapedType shaped_type, + int value) { + Type element_type = shaped_type.getElementType(); + ShapedType scalar_type = RankedTensorType::get({}, element_type); + Attribute attr; + switch (element_type.getKind()) { + case mlir::StandardTypes::F16: { + auto floatType = mlir::FloatType::getF16(element_type.getContext()); + auto floatAttr = + mlir::FloatAttr::get(floatType, static_cast(value)); + std::vector floatValues({floatAttr}); + attr = DenseElementsAttr::get(scalar_type, floatValues); + break; + } + case mlir::StandardTypes::BF16: { + auto floatType = mlir::FloatType::getBF16(element_type.getContext()); + auto floatAttr = + mlir::FloatAttr::get(floatType, static_cast(value)); + std::vector floatValues({floatAttr}); + attr = DenseElementsAttr::get(scalar_type, floatValues); + break; + } + case mlir::StandardTypes::F32: { + attr = + DenseElementsAttr::get(scalar_type, static_cast(value)); + break; + } + case mlir::StandardTypes::Complex: { + auto etype = element_type.cast().getElementType(); + if (etype.isF32()) { + auto dialect = etype.getContext()->getRegisteredDialect("tf"); + tensorflow::TensorProto repr; + repr.set_dtype(tensorflow::DT_COMPLEX64); + + tensorflow::TensorShapeProto* shape = repr.mutable_tensor_shape(); + shape->set_unknown_rank(false); + shape->add_dim()->set_size(int64_t{1}); + std::string content; + auto complex_value = + std::complex(static_cast(value), 0.0f); + content.assign(reinterpret_cast(&complex_value), + sizeof(complex_value)); + repr.set_tensor_content(content); + std::string mangled = tensorflow::mangling_util::MangleTensor(repr); + + attr = mlir::OpaqueElementsAttr::get(dialect, scalar_type, mangled); + break; + } + return tensorflow::Status(tensorflow::error::INVALID_ARGUMENT, + "Unsupported type"); + } + case mlir::StandardTypes::Integer: { + const auto& itype = element_type.cast(); + switch (itype.getWidth()) { + case 8: + attr = DenseElementsAttr::get(scalar_type, + static_cast(value)); + break; + case 16: + attr = DenseElementsAttr::get(scalar_type, + static_cast(value)); + break; + case 32: + attr = DenseElementsAttr::get(scalar_type, + static_cast(value)); + break; + case 64: + attr = DenseElementsAttr::get(scalar_type, + static_cast(value)); + break; + default: + return tensorflow::Status(tensorflow::error::INVALID_ARGUMENT, + "Unsupported type"); + } + break; + } + default: + return tensorflow::Status(tensorflow::error::INVALID_ARGUMENT, + "Unsupported type"); + } + return rewriter->create(loc, scalar_type, attr); +} + +} // namespace TFL +} // namespace mlir diff --git a/tensorflow/compiler/mlir/lite/utils/constant_utils.h b/tensorflow/compiler/mlir/lite/utils/constant_utils.h new file mode 100644 index 00000000000..308fbbc3ee5 --- /dev/null +++ b/tensorflow/compiler/mlir/lite/utils/constant_utils.h @@ -0,0 +1,35 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_MLIR_LITE_UTILS_CONSTANT_UTILS_H_ +#define TENSORFLOW_COMPILER_MLIR_LITE_UTILS_CONSTANT_UTILS_H_ + +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/IR/Location.h" // from @llvm-project +#include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/PatternMatch.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "tensorflow/compiler/xla/statusor.h" + +namespace mlir { +namespace TFL { + +// Returns a Constant op with a single value. +xla::StatusOr CreateConstOpWithSingleValue( + PatternRewriter* rewriter, Location loc, ShapedType shaped_type, int value); + +} // namespace TFL +} // namespace mlir +#endif // TENSORFLOW_COMPILER_MLIR_LITE_UTILS_CONSTANT_UTILS_H_ From 5816f8234404a546b28f5bf6d942e84ac3a3cd06 Mon Sep 17 00:00:00 2001 From: ShengYang1 Date: Wed, 22 Jul 2020 18:25:41 +0800 Subject: [PATCH 1031/2522] Fix MaxPoolGrad --- .../core/kernels/mkl_pooling_ops_common.cc | 16 ++++++++++------ tensorflow/core/kernels/mkl_pooling_ops_common.h | 4 ++++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.cc b/tensorflow/core/kernels/mkl_pooling_ops_common.cc index 5f1c9129ec3..e7a9593dfae 100644 --- a/tensorflow/core/kernels/mkl_pooling_ops_common.cc +++ b/tensorflow/core/kernels/mkl_pooling_ops_common.cc @@ -186,7 +186,9 @@ void MklPoolingBwdPrimitive::Setup(const MklPoolingParams& bwdParams) { context_.diff_dst_md.reset(new memory::desc( {bwdParams.dst_dims}, MklDnnType(), bwdParams.src_format)); #else - context_.diff_dst_md.reset(new memory::desc(bwdParams.diff_dst_md.data)); + context_.src_md.reset(new memory::desc(bwdParams.src_md.data)); + context_.dst_md.reset(new memory::desc({bwdParams.dst_dims}, MklDnnType(), + MEMORY_FORMAT::any)); #endif // !ENABLE_MKLDNN_V1 #ifndef ENABLE_MKLDNN_V1 @@ -202,15 +204,17 @@ void MklPoolingBwdPrimitive::Setup(const MklPoolingParams& bwdParams) { *context_.diff_dst_md, bwdParams.strides, bwdParams.filter_dims, bwdParams.padding_left, bwdParams.padding_right, padding_kind::zero)); #else + // Create a backward primitive. The implementation for backward must comply to + // the workspace format it gets from forward pass, so we directly use src_md + // and dst_md here. context_.bwd_desc.reset(new pooling_backward::desc( - bwdParams.alg_kind, *context_.diff_src_md, *context_.diff_dst_md, - bwdParams.strides, bwdParams.filter_dims, bwdParams.padding_left, - bwdParams.padding_right)); + bwdParams.alg_kind, *context_.src_md, *context_.dst_md, bwdParams.strides, + bwdParams.filter_dims, bwdParams.padding_left, bwdParams.padding_right)); // Create a forward primitive, // which will be used as a hint for creating backward primitive. context_.fwd_desc.reset(new pooling_forward::desc( - bwdParams.prop_kind, bwdParams.alg_kind, *context_.diff_src_md, - *context_.diff_dst_md, bwdParams.strides, bwdParams.filter_dims, + bwdParams.prop_kind, bwdParams.alg_kind, *context_.src_md, + *context_.dst_md, bwdParams.strides, bwdParams.filter_dims, bwdParams.padding_left, bwdParams.padding_right)); #endif // !ENABLE_MKLDNN_V1 context_.fwd_pd.reset( diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl_pooling_ops_common.h index cb3674b2dd4..ec19b8a0398 100644 --- a/tensorflow/core/kernels/mkl_pooling_ops_common.h +++ b/tensorflow/core/kernels/mkl_pooling_ops_common.h @@ -280,6 +280,8 @@ class MklPoolingBwdPrimitive : public MklPrimitive { std::shared_ptr diff_dst_mem; // Memory descriptors. + std::shared_ptr src_md; + std::shared_ptr dst_md; std::shared_ptr diff_src_md; std::shared_ptr diff_dst_md; @@ -306,6 +308,8 @@ class MklPoolingBwdPrimitive : public MklPrimitive { ws_mem(nullptr), diff_src_mem(nullptr), diff_dst_mem(nullptr), + src_md(nullptr), + dst_md(nullptr), diff_src_md(nullptr), diff_dst_md(nullptr), fwd_desc(nullptr), From 6be0e40016b01fbe4d56c981205e4ae6c5281864 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Wed, 22 Jul 2020 13:41:36 +0300 Subject: [PATCH 1032/2522] updated goldens --- .../tools/api/golden/v1/tensorflow.-sparse-tensor.pbtxt | 4 ++++ .../api/golden/v1/tensorflow.sparse.-sparse-tensor.pbtxt | 8 ++++---- .../tools/api/golden/v2/tensorflow.-sparse-tensor.pbtxt | 4 ++++ .../api/golden/v2/tensorflow.sparse.-sparse-tensor.pbtxt | 8 ++++---- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-sparse-tensor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-sparse-tensor.pbtxt index aa89308999c..fe3a8222353 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.-sparse-tensor.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.-sparse-tensor.pbtxt @@ -52,4 +52,8 @@ tf_class { name: "get_shape" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "with_values" + argspec: "args=[\'self\', \'new_values\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-tensor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-tensor.pbtxt index e13dad8be69..f0efebb3c8b 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-tensor.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.sparse.-sparse-tensor.pbtxt @@ -48,12 +48,12 @@ tf_class { name: "from_value" argspec: "args=[\'cls\', \'sparse_tensor_value\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "with_values" - argspec: "args=[\'self\', \'new_values\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "get_shape" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "with_values" + argspec: "args=[\'self\', \'new_values\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-sparse-tensor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-sparse-tensor.pbtxt index aa89308999c..fe3a8222353 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.-sparse-tensor.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.-sparse-tensor.pbtxt @@ -52,4 +52,8 @@ tf_class { name: "get_shape" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "with_values" + argspec: "args=[\'self\', \'new_values\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-tensor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-tensor.pbtxt index e13dad8be69..f0efebb3c8b 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-tensor.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.sparse.-sparse-tensor.pbtxt @@ -48,12 +48,12 @@ tf_class { name: "from_value" argspec: "args=[\'cls\', \'sparse_tensor_value\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "with_values" - argspec: "args=[\'self\', \'new_values\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "get_shape" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "with_values" + argspec: "args=[\'self\', \'new_values\'], varargs=None, keywords=None, defaults=None" + } } From 0fb2791e0f1148d47718e2e4d638120cbf9b541b Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Wed, 22 Jul 2020 14:08:13 +0300 Subject: [PATCH 1033/2522] fixed indentation --- .../python/framework/sparse_tensor_test.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/framework/sparse_tensor_test.py b/tensorflow/python/framework/sparse_tensor_test.py index 736543f669b..aee0894d85d 100644 --- a/tensorflow/python/framework/sparse_tensor_test.py +++ b/tensorflow/python/framework/sparse_tensor_test.py @@ -99,16 +99,16 @@ class SparseTensorTest(test_util.TensorFlowTestCase): self.assertIn(out.op, sp.consumers()) def testWithValues(self): - source = sparse_tensor.SparseTensor( - indices=[[0, 0], [1, 2]], values=[1., 2], dense_shape=[3, 4]) - new_tensor = source.with_values([5.0, 1.0]) - self.assertAllEqual(new_tensor.indices, source.indices) - self.assertAllEqual(new_tensor.values, [5.0, 1.0]) - self.assertAllEqual(new_tensor.dense_shape, source.dense_shape) + source = sparse_tensor.SparseTensor( + indices=[[0, 0], [1, 2]], values=[1., 2], dense_shape=[3, 4]) + new_tensor = source.with_values([5.0, 1.0]) + self.assertAllEqual(new_tensor.indices, source.indices) + self.assertAllEqual(new_tensor.values, [5.0, 1.0]) + self.assertAllEqual(new_tensor.dense_shape, source.dense_shape) - # ensure new value's shape is checked - with self.assertRaises((errors.InvalidArgumentError, ValueError)): - source.with_values([[5.0, 1.0]]) + # ensure new value's shape is checked + with self.assertRaises((errors.InvalidArgumentError, ValueError)): + source.with_values([[5.0, 1.0]]) class ConvertToTensorOrSparseTensorTest(test_util.TensorFlowTestCase): From cc15628121e56e9fc4269d350320c1cd3c0e47ac Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Wed, 22 Jul 2020 12:18:39 +0100 Subject: [PATCH 1034/2522] Addressed reviewer's comments. Change-Id: Ida9f64eb66ee8e96840fdb84723286fccf0c2170 --- tensorflow/lite/python/lite.py | 2 +- tensorflow/lite/python/lite_v2_test.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index a679cdc72dd..0c27b9541e8 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -558,7 +558,7 @@ class TFLiteConverterBaseV2(TFLiteConverterBase): # We only support integer types for post training integer quantization # as we have statistical information to quantize the input and output. if quant_mode.is_post_training_integer_quantize(): - all_types = default_types + [constants.INT8, constants.INT16, constants.QUANTIZED_UINT8] + all_types = default_types + [constants.INT8, constants.QUANTIZED_UINT8, constants.INT16] if self.inference_input_type not in all_types or \ self.inference_output_type not in all_types: all_types_names = ["tf." + t.name for t in all_types] diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py index 2462ea24ca8..cdce0e8c6d9 100644 --- a/tensorflow/lite/python/lite_v2_test.py +++ b/tensorflow/lite/python/lite_v2_test.py @@ -73,7 +73,8 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): @parameterized.named_parameters( ('_INT8InputOutput', lite.constants.INT8), - ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8)) + ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8), + ('_INT16InputOutput', lite.constants.INT16)) @test_util.run_v2_only def testInvalidFloat(self, inference_input_output_type): root = self._getSimpleVariableModel() @@ -194,7 +195,8 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): @parameterized.named_parameters( ('_INT8InputOutput', lite.constants.INT8), - ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8)) + ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8), + ('_INT16InputOutput', lite.constants.INT16)) @test_util.run_v2_only def testInvalidPostTrainingDynamicRangeQuantization( self, inference_input_output_type): From a617aa6bb7ab80d31c8a4a775feba5f3a0e82f6b Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Wed, 22 Jul 2020 13:16:58 +0100 Subject: [PATCH 1035/2522] Addressed reviewer's comment. Change-Id: Iae72065849ac149c24ac3db35d5babc5616aa649 --- tensorflow/lite/python/lite.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index 0c27b9541e8..d134712d5cf 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -558,7 +558,8 @@ class TFLiteConverterBaseV2(TFLiteConverterBase): # We only support integer types for post training integer quantization # as we have statistical information to quantize the input and output. if quant_mode.is_post_training_integer_quantize(): - all_types = default_types + [constants.INT8, constants.QUANTIZED_UINT8, constants.INT16] + all_types = default_types + [constants.INT8, constants.QUANTIZED_UINT8,\ + constants.INT16] if self.inference_input_type not in all_types or \ self.inference_output_type not in all_types: all_types_names = ["tf." + t.name for t in all_types] From 73170e5b17ef11c35b69de9d22c976d2c36cb46f Mon Sep 17 00:00:00 2001 From: Tres Popp Date: Wed, 22 Jul 2020 05:25:26 -0700 Subject: [PATCH 1036/2522] Support CHLO broadcasting operations between scalar and unranked tensors. This is done through reshaping the unranked tensor into a 1D ranked tensor which will result in a safe broadcast/indexing logic when the other operand is a scalar. PiperOrigin-RevId: 322553661 Change-Id: I0e5edda2a5abe37f0dd47b070295f6a951efc3d4 --- tensorflow/compiler/mlir/hlo/BUILD | 3 + .../mhlo/transforms/chlo_legalize_to_hlo.cc | 72 +++++++++++++++++- .../transforms/chlo_legalize_to_hlo_pass.cc | 2 + .../chlo_legalize_to_hlo_broadcasts.mlir | 74 +++++++++++++++++++ 4 files changed, 147 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/BUILD b/tensorflow/compiler/mlir/hlo/BUILD index 86af6f5ffec..9e835979829 100644 --- a/tensorflow/compiler/mlir/hlo/BUILD +++ b/tensorflow/compiler/mlir/hlo/BUILD @@ -690,7 +690,9 @@ cc_library( deps = [ ":hlo", "@llvm-project//mlir:IR", + "@llvm-project//mlir:SCFDialect", "@llvm-project//mlir:Shape", + "@llvm-project//mlir:StandardOps", "@llvm-project//mlir:Transforms", ], ) @@ -720,6 +722,7 @@ cc_library( "@llvm-project//mlir:LLVMDialect", "@llvm-project//mlir:LLVMTransforms", "@llvm-project//mlir:Pass", + "@llvm-project//mlir:SCFDialect", "@llvm-project//mlir:Shape", "@llvm-project//mlir:StandardOps", "@llvm-project//mlir:Transforms", diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc index 06e95e04c76..a5923f270d3 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "mlir/Dialect/SCF/SCF.h" // from @llvm-project #include "mlir/Dialect/Shape/IR/Shape.h" // from @llvm-project +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/MLIRContext.h" // from @llvm-project #include "mlir/IR/OperationSupport.h" // from @llvm-project @@ -22,6 +24,7 @@ limitations under the License. #include "mlir/Transforms/DialectConversion.h" // from @llvm-project #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h" #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h" #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h" namespace mlir { @@ -74,10 +77,6 @@ struct ConvertTrivialNonBroadcastBinaryOp : public OpRewritePattern { // - Legal combinations of degenerate (1-dim) implicit broadcasting. // The restriction on broadcast_dims derives from the definition of the // `shape.broadcast` op, which only supports prefix-padding. -// -// It may be possible to expand this pattern to operate on unranked tensors in -// the future by emitting more code to dynamically differentiate based on rank. -// Whether that is of any practical benefit remains to be seen. template struct ConvertRankedDynamicBroadcastBinaryOp : public OpRewritePattern { @@ -160,6 +159,68 @@ struct ConvertRankedDynamicBroadcastBinaryOp } }; +// Converts a broadcasting binary operation with a scalar operand and an +// unranked operand to a ranked broadcasting operation by dynamically reshaping +// the unranked operand to a 1D tensor. This will always be safe because +// broadcasting from a scalar to another shape always works. +template +struct ConvertUnrankedScalarDynamicBroadcastBinaryOp + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(ChloOpTy op, + PatternRewriter &rewriter) const override { + auto loc = op.getLoc(); + Value lhs = op.lhs(); + Value rhs = op.rhs(); + + auto lhs_ranked_type = lhs.getType().dyn_cast(); + auto lhs_unranked_type = lhs.getType().dyn_cast(); + + auto rhs_ranked_type = rhs.getType().dyn_cast(); + auto rhs_unranked_type = rhs.getType().dyn_cast(); + + bool lhs_is_scalar = lhs_ranked_type && + lhs_ranked_type.getShape().empty() && + rhs_unranked_type; + bool rhs_is_scalar = rhs_ranked_type && + rhs_ranked_type.getShape().empty() && + lhs_unranked_type; + + // Only support the case where exactly one operand is scalar and the other + // is unranked. Other patterns in this file will create more efficient + // lowerings for cases where both ranks are known or will handle the more + // generic case of both inputs being unranked. + if (!(lhs_is_scalar ^ rhs_is_scalar)) return failure(); + + auto result_type = op.getResult().getType().template dyn_cast(); + + // Reshape the non-scalar value into a dynamically sized, rank-1 tensor + Value shape = + rewriter.create(loc, lhs_is_scalar ? rhs : lhs); + Value num_elements = rewriter.create(loc, shape); + Value size = rewriter.create(loc, num_elements); + Value size_tensor = rewriter.create(loc, size); + Value reshaped = rewriter.create( + loc, RankedTensorType::get({-1}, result_type.getElementType()), + lhs_is_scalar ? rhs : lhs, size_tensor); + + // Create a new ranked Chlo op that will be further lowered by other + // patterns into Mhlo. + SmallVector operands{lhs_is_scalar ? lhs : reshaped, + rhs_is_scalar ? rhs : reshaped}; + Value computed = rewriter.create( + loc, SmallVector{reshaped.getType()}, operands, op.getAttrs()); + + // Reshape the result back into an unranked tensor. + Value shape_tensor = rewriter.create( + loc, RankedTensorType::get({-1}, rewriter.getIndexType()), shape); + rewriter.replaceOpWithNewOp(op, result_type, + computed, shape_tensor); + + return success(); + } +}; + template void PopulateForBinaryOp(MLIRContext *context, OwningRewritePatternList *patterns) { @@ -169,6 +230,9 @@ void PopulateForBinaryOp(MLIRContext *context, patterns->insert< ConvertRankedDynamicBroadcastBinaryOp>( context, 5); + patterns->insert< + ConvertUnrankedScalarDynamicBroadcastBinaryOp>( + context); } template diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo_pass.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo_pass.cc index 48749c7d43d..89aa9bad997 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo_pass.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo_pass.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "mlir/Dialect/SCF/SCF.h" // from @llvm-project #include "mlir/Dialect/Shape/IR/Shape.h" // from @llvm-project #include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project @@ -37,6 +38,7 @@ struct TestChloLegalizeToHloPass // The conversion uses helpers from the Standard dialect. conversionTarget.addLegalDialect(); conversionTarget.addLegalDialect(); + conversionTarget.addLegalDialect(); PopulateLegalizeChloToHloPatterns(&getContext(), &conversionPatterns); diff --git a/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir b/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir index 20ad579c9cf..7782b4dcf6b 100644 --- a/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir @@ -237,3 +237,77 @@ func @xorWithoutBroadcast(%arg0: tensor<4xi1>, %arg1: tensor<4xi1>) -> tensor<4x %0 = chlo.broadcast_xor %arg0, %arg1 : (tensor<4xi1>, tensor<4xi1>) -> tensor<4xi1> return %0 : tensor<4xi1> } + +// ----- +func @addScalarUnranked(%arg0: tensor, %arg1: tensor<*xf32>) -> tensor<*xf32> { + %0 = chlo.broadcast_add %arg0, %arg1 : (tensor, tensor<*xf32>) + -> tensor<*xf32> + return %0 : tensor<*xf32> +} + +// CHECK-LABEL: func @addScalarUnranked( +// CHECK-SAME: %[[ARG_0:.*]]: tensor, +// CHECK-SAME: %[[ARG_1:.*]]: tensor<*xf32> +// CHECK-SAME: ) -> tensor<*xf32> { +// First handle the dynamic reshaping of the unranked operand +// to a 1D tensor. +// CHECK: %[[SHAPE_1:.*]] = shape.shape_of %[[ARG_1]] : tensor<*xf32> +// CHECK: %[[NUM_ELEMENTS:.*]] = shape.num_elements %[[SHAPE_1]] +// CHECK: %[[SIZE:.*]] = shape.size_to_index %[[NUM_ELEMENTS]] +// CHECK: %[[SIZE_TENSOR:.*]] = tensor_from_elements(%[[SIZE]]) : tensor<1xindex> +// CHECK: %[[RESHAPED:.*]] = "mhlo.dynamic_reshape"(%[[ARG_1]], %[[SIZE_TENSOR]]) : (tensor<*xf32>, tensor<1xindex>) -> tensor +// The assuming region is part of the second stage of lowering +// with ranked broadcasting logic. +// CHECK: %[[SHAPE_0:.*]] = shape.shape_of %[[ARG_0]] : tensor +// CHECK: %[[SHAPE_RESHAPED:.*]] = shape.shape_of %[[RESHAPED]] : tensor +// CHECK: %[[WITNESS:.*]] = shape.cstr_broadcastable %[[SHAPE_0]], %[[SHAPE_RESHAPED]] +// CHECK: %[[ASSUMING_RESULT:.*]] = shape.assuming %[[WITNESS]] -> (tensor) { +// CHECK: %[[SCALAR_SHAPE:.*]] = shape.const_shape [] +// CHECK: %[[BROADCASTED_SHAPE:.*]] = shape.broadcast %[[SCALAR_SHAPE]], %[[SHAPE_RESHAPED]] +// CHECK: %[[SHAPE_TENSOR:.*]] = shape.to_extent_tensor %[[BROADCASTED_SHAPE]] : tensor<1xindex> +// CHECK: %[[BROADCASTED_LHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG_0]], %[[SHAPE_TENSOR]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor, tensor<1xindex>) -> tensor +// CHECK: %[[BROADCASTED_RHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[RESHAPED]], %[[SHAPE_TENSOR]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor, tensor<1xindex>) -> tensor +// CHECK: %[[BROADCASTED_RESULT:.*]] = mhlo.add %[[BROADCASTED_LHS]], %[[BROADCASTED_RHS]] : tensor +// CHECK: shape.assuming_yield %[[BROADCASTED_RESULT]] : tensor +// CHECK: } +// As part of the unranked logic, the result is reshaped back +// to an unranked tensor. +// CHECK: %[[PROPER_SHAPE_TENSOR:.*]] = shape.to_extent_tensor %[[SHAPE_1]] : tensor +// CHECK: %[[RESHAPED_RESULT:.*]] = "mhlo.dynamic_reshape"(%[[VAL_19:.*]], %[[PROPER_SHAPE_TENSOR]]) : (tensor, tensor) -> tensor<*xf32> +// CHECK: return %[[RESHAPED_RESULT]] : tensor<*xf32> +// CHECK: } + +// ----- +func @addUnrankedScalar(%arg0: tensor<*xf32>, %arg1: tensor) -> tensor<*xf32> { + %0 = chlo.broadcast_add %arg0, %arg1 : (tensor<*xf32>, tensor) + -> tensor<*xf32> + return %0 : tensor<*xf32> +} +// CHECK-LABEL: func @addUnrankedScalar( +// CHECK-SAME: %[[ARG_0:.*]]: tensor<*xf32>, +// CHECK-SAME: %[[ARG_1:.*]]: tensor) -> tensor<*xf32> { +// First handle the dynamic reshaping of the unranked operand +// to a 1D tensor. +// CHECK: %[[SHAPE_0:.*]] = shape.shape_of %[[ARG_0]] : tensor<*xf32> +// CHECK: %[[NUM_ELEMENTS:.*]] = shape.num_elements %[[SHAPE_0]] +// CHECK: %[[SIZE:.*]] = shape.size_to_index %[[NUM_ELEMENTS]] +// CHECK: %[[SIZE_TENSOR:.*]] = tensor_from_elements(%[[SIZE]]) : tensor<1xindex> +// CHECK: %[[RESHAPED:.*]] = "mhlo.dynamic_reshape"(%[[ARG_0]], %[[SIZE_TENSOR]]) : (tensor<*xf32>, tensor<1xindex>) -> tensor +// The assuming region is part of the second stage of lowering +// with ranked broadcasting logic. +// CHECK: %[[SHAPE_RESHAPED:.*]] = shape.shape_of %[[RESHAPED]] : tensor +// CHECK: %[[SHAPE_1:.*]] = shape.shape_of %[[ARG_1]] : tensor +// CHECK: %[[WITNESS:.*]] = shape.cstr_broadcastable %[[SHAPE_RESHAPED]], %[[SHAPE_1]] +// CHECK: %[[ASSUMING_RESULT:.*]] = shape.assuming %[[WITNESS]] -> (tensor) { +// CHECK: %[[SHAPE_TENSOR:.*]] = shape.to_extent_tensor %[[SHAPE_RESHAPED]] : tensor<1xindex> +// CHECK: %[[BROADCASTED_LHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[RESHAPED]], %[[SHAPE_TENSOR]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor, tensor<1xindex>) -> tensor +// CHECK: %[[BROADCASTED_RHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG_1]], %[[SHAPE_TENSOR]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor, tensor<1xindex>) -> tensor +// CHECK: %[[BROADCASTED_RESULT:.*]] = mhlo.add %[[BROADCASTED_LHS]], %[[BROADCASTED_RHS]] : tensor +// CHECK: shape.assuming_yield %[[BROADCASTED_RESULT]] : tensor +// CHECK: } +// As part of the unranked logic, the result is reshaped back +// to an unranked tensor. +// CHECK: %[[PROPER_SHAPE_TENSOR:.*]] = shape.to_extent_tensor %[[SHAPE_0]] : tensor +// CHECK: %[[RESHAPED_RESULT:.*]] = "mhlo.dynamic_reshape"(%[[VAL_19:.*]], %[[PROPER_SHAPE_TENSOR]]) : (tensor, tensor) -> tensor<*xf32> +// CHECK: return %[[RESHAPED_RESULT]] : tensor<*xf32> +// CHECK: } From de3213b3889075f9e74e3880ceff04b799a16c8b Mon Sep 17 00:00:00 2001 From: abhichou4 Date: Wed, 22 Jul 2020 19:01:32 +0530 Subject: [PATCH 1037/2522] reformat --- tensorflow/c/eager/tape.h | 36 +- tensorflow/python/eager/forwardprop.py | 3 +- tensorflow/python/eager/pywrap_tfe_src.cc | 39 +-- tensorflow/python/tfe_wrapper.cc | 385 +++++++++++----------- 4 files changed, 221 insertions(+), 242 deletions(-) diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index a1dd9666ee0..a2ecc460977 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -335,14 +335,13 @@ class ForwardAccumulator { std::stack call_state_; }; -template +template class ForwardBatchAccumulator - : public ForwardAccumulator { + : public ForwardAccumulator { public: - bool ShouldRecord(gtl::ArraySlice tensor_ids, gtl::ArraySlice dtypes); - + Status Accumulate( const string& op_type, const std::vector& input_tensors, const std::vector& output_tensors, @@ -350,8 +349,9 @@ class ForwardBatchAccumulator gtl::ArraySlice input_dtypes, const ForwardFunction* forward_function, const std::function& backward_function_getter, - const std::function& backward_function_deleter) override; - + const std::function& backward_function_deleter) + override; + private: Status ForwardpropFromTape( const std::vector& output_tensors, @@ -362,20 +362,20 @@ class ForwardBatchAccumulator std::unordered_map accumulated_gradients_; - const VSpace& vspace_; + const VSpace& vspace_; struct AccumulatorCallState { AccumulatorCallState( GradientTape* backward_tape, bool accumulating) : backward_tape(backward_tape), accumulating(accumulating) {} - - GradientTape* backward_tape; - - bool accumulating; + + GradientTape* backward_tape; + + bool accumulating; }; - - std::stack call_state_; + + std::stack call_state_; }; // Template instantiations here @@ -1127,7 +1127,8 @@ Status ForwardAccumulator::Accumulate( } template -Status ForwardBatchAccumulator::Accumulate( +Status +ForwardBatchAccumulator::Accumulate( const string& op_type, const std::vector& input_tensors, const std::vector& output_tensors, gtl::ArraySlice input_tensor_id, @@ -1167,10 +1168,9 @@ Status ForwardBatchAccumulator::Accumula } std::vector forward_grads; - if (forward_function==nullptr) { - //Raise apt error - } - else { + if (forward_function == nullptr) { + // Raise apt error + } else { TF_RETURN_IF_ERROR((*forward_function)(in_grads, &forward_grads, true)); } for (int i = 0; i < forward_grads.size(); ++i) { diff --git a/tensorflow/python/eager/forwardprop.py b/tensorflow/python/eager/forwardprop.py index 2b6b355f100..9f55a896512 100644 --- a/tensorflow/python/eager/forwardprop.py +++ b/tensorflow/python/eager/forwardprop.py @@ -218,7 +218,7 @@ pywrap_tfe.TFE_Py_RegisterJVPFunction(_jvp_dispatch) @tf_export("autodiff.ForwardAccumulator", v1=[]) -class ForwardAccumulator(object): +class ForwardAccumulator(): """Computes Jacobian-vector products ("JVP"s) using forward-mode autodiff. Compare to `tf.GradientTape` which computes vector-Jacobian products ("VJP"s) @@ -327,7 +327,6 @@ class ForwardAccumulator(object): """ def __init__(self, primals, tangents, use_batch=False): - """Specify tensors to watch and their Jacobian-vector products. Mathematically, `tangents` is a vector right-multiplying the Jacobian matrix diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index f2f11b4775d..101b13f614e 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -393,7 +393,7 @@ bool SetOpAttrList( } if (type == TF_ATTR_STRING) { - std::unique_ptr values(new const void*[num_values]); + std::unique_ptr values(new const void*[num_values]); std::unique_ptr lengths(new size_t[num_values]); for (int i = 0; i < num_values; ++i) { tensorflow::StringPiece value; @@ -443,7 +443,7 @@ bool SetOpAttrList( std::unique_ptr buffer(new int64_t[total_dims]); // Copy the input dims into the buffer and set dims to point to // the start of each list's dims. - std::unique_ptr dims(new const int64_t*[num_values]); + std::unique_ptr dims(new const int64_t*[num_values]); std::unique_ptr num_dims(new int[num_values]); int64_t* offset = buffer.get(); for (int i = 0; i < num_values; ++i) { @@ -477,7 +477,7 @@ bool SetOpAttrList( status); if (!status->status.ok()) return false; } else if (type == TF_ATTR_FUNC) { - std::unique_ptr funcs(new const TFE_Op*[num_values]); + std::unique_ptr funcs(new const TFE_Op*[num_values]); for (int i = 0; i < num_values; ++i) { tensorflow::Safe_PyObjectPtr py_value(PySequence_ITEM(py_list, i)); // Allow: @@ -539,7 +539,7 @@ void SetOpAttrListDefault( TF_Status* status) { if (type == TF_ATTR_STRING) { int num_values = attr.default_value().list().s_size(); - std::unique_ptr values(new const void*[num_values]); + std::unique_ptr values(new const void*[num_values]); std::unique_ptr lengths(new size_t[num_values]); (*attr_list_sizes)[key] = num_values; for (int i = 0; i < num_values; i++) { @@ -595,7 +595,7 @@ void SetOpAttrListDefault( std::unique_ptr buffer(new int64_t[total_dims]); // Copy the input dims into the buffer and set dims to point to // the start of each list's dims. - std::unique_ptr dims(new const int64_t*[num_values]); + std::unique_ptr dims(new const int64_t*[num_values]); std::unique_ptr num_dims(new int[num_values]); int64_t* offset = buffer.get(); for (int i = 0; i < num_values; ++i) { @@ -615,7 +615,7 @@ void SetOpAttrListDefault( } else if (type == TF_ATTR_FUNC) { int num_values = attr.default_value().list().func_size(); (*attr_list_sizes)[key] = num_values; - std::unique_ptr funcs(new const TFE_Op*[num_values]); + std::unique_ptr funcs(new const TFE_Op*[num_values]); for (int i = 0; i < num_values; i++) { funcs[i] = GetFunc(ctx, attr.default_value().list().func(i), status); } @@ -1535,9 +1535,9 @@ typedef tensorflow::eager::ForwardAccumulator ForwardAccumulator; -typedef tensorflow::eager::ForwardAccumulator - ForwardBatchAccumulator; +typedef tensorflow::eager::ForwardAccumulator + ForwardBatchAccumulator; // Incremented when a GradientTape or accumulator is newly added to a set, and // used to enforce an ordering between them. @@ -2560,7 +2560,7 @@ PyObject* TFE_Py_TapeSetRecordOperation(PyObject* op_type, tensorflow::eager::ForwardFunction wrapped_forward_function( [forward_function](const std::vector& input_tangents, std::vector* output_tangents, - bool use_batch = false) { + bool use_batch = false) { return CallOpSpecificJVPFunction(forward_function, input_tangents, output_tangents); }); @@ -2814,11 +2814,10 @@ PyObject* TFE_Py_ForwardAccumulatorNew(PyObject* use_batch) { nullptr); } if (PyObject_IsTrue(use_batch)) { - accumulator->accumulator = new ForwardBatchAccumulator(*py_vspace); - } - else { - accumulator->accumulator = new ForwardAccumulator(*py_vspace); - } + accumulator->accumulator = new ForwardBatchAccumulator(*py_vspace); + } else { + accumulator->accumulator = new ForwardAccumulator(*py_vspace); + } return reinterpret_cast(accumulator); } @@ -3176,8 +3175,7 @@ PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs, tensorflow::eager::ForwardFunction py_forward_function( [op_name, attrs, inputs, results]( const std::vector& input_tangents, - std::vector* output_tangents, - bool use_batch = false) { + std::vector* output_tangents, bool use_batch = false) { return CallJVPFunction(op_name, attrs, inputs, results, input_tangents, output_tangents); }); @@ -3741,10 +3739,9 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject* args) { for (Py_ssize_t j = 0; j < len; j++) { PyObject* py_input = fast_input_array[j]; tensorflow::Safe_PyObjectPtr py_eager_tensor; - if (!ConvertToTensor( - op_exec_info, py_input, &py_eager_tensor, - []() { return tensorflow::DT_INVALID; }, - [](const tensorflow::DataType dtype) {}, status)) { + if (!ConvertToTensor(op_exec_info, py_input, &py_eager_tensor, + []() { return tensorflow::DT_INVALID; }, + [](const tensorflow::DataType dtype) {}, status)) { return nullptr; } diff --git a/tensorflow/python/tfe_wrapper.cc b/tensorflow/python/tfe_wrapper.cc index ee5ee02b8bd..8003af2dbbe 100644 --- a/tensorflow/python/tfe_wrapper.cc +++ b/tensorflow/python/tfe_wrapper.cc @@ -369,30 +369,28 @@ PYBIND11_MODULE(_pywrap_tfe, m) { }); // // TFE_Context Logic - m.def( - "TFE_NewContext", - [](const TFE_ContextOptions* opts) { - tensorflow::Safe_TF_StatusPtr status = - tensorflow::make_safe(TF_NewStatus()); - TFE_Context* context = TFE_NewContext(opts, status.get()); - tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); - return tensorflow::PyoOrThrow(tensorflow::OutputTFE_Context(context)); - }, - py::return_value_policy::reference); + m.def("TFE_NewContext", + [](const TFE_ContextOptions* opts) { + tensorflow::Safe_TF_StatusPtr status = + tensorflow::make_safe(TF_NewStatus()); + TFE_Context* context = TFE_NewContext(opts, status.get()); + tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); + return tensorflow::PyoOrThrow(tensorflow::OutputTFE_Context(context)); + }, + py::return_value_policy::reference); m.def("TFE_DeleteContext", [](py::handle& o) { TFE_DeleteContext(tensorflow::InputTFE_Context(o)); }); - m.def( - "TFE_ContextListDevices", - [](py::handle& o) { - tensorflow::Safe_TF_StatusPtr status = - tensorflow::make_safe(TF_NewStatus()); - auto output = TFE_ContextListDevices(tensorflow::InputTFE_Context(o), - status.get()); - tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); - return output; - }, - py::return_value_policy::reference); + m.def("TFE_ContextListDevices", + [](py::handle& o) { + tensorflow::Safe_TF_StatusPtr status = + tensorflow::make_safe(TF_NewStatus()); + auto output = TFE_ContextListDevices(tensorflow::InputTFE_Context(o), + status.get()); + tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); + return output; + }, + py::return_value_policy::reference); m.def("TFE_HostAddressSpace", [](py::handle& o, TF_Buffer& buf) { TFE_HostAddressSpace(tensorflow::InputTFE_Context(o), &buf); }); @@ -535,13 +533,12 @@ PYBIND11_MODULE(_pywrap_tfe, m) { }); // TFE_Executor logic - m.def( - "TFE_NewExecutor", - [](const bool is_async) { - TFE_Executor* exc = TFE_NewExecutor(is_async); - return exc; - }, - py::return_value_policy::reference); + m.def("TFE_NewExecutor", + [](const bool is_async) { + TFE_Executor* exc = TFE_NewExecutor(is_async); + return exc; + }, + py::return_value_policy::reference); m.def("TFE_DeleteExecutor", &TFE_DeleteExecutor); m.def("TFE_ExecutorIsAsync", &TFE_ExecutorIsAsync); m.def("TFE_ExecutorWaitForAllPendingNodes", [](TFE_Executor& exc) { @@ -731,7 +728,8 @@ PYBIND11_MODULE(_pywrap_tfe, m) { // TFE_Py_ForwardAccumulator logic. m.def("TFE_Py_ForwardAccumulatorNew", [](const py::handle& use_batch) { - return tensorflow::PyoOrThrow(TFE_Py_ForwardAccumulatorNew(use_batch.ptr())); + return tensorflow::PyoOrThrow( + TFE_Py_ForwardAccumulatorNew(use_batch.ptr())); }); m.def("TFE_Py_ForwardAccumulatorSetAdd", [](const py::handle& accumulator) { @@ -854,48 +852,45 @@ PYBIND11_MODULE(_pywrap_tfe, m) { m.def("TFE_MonitoringCounterCellIncrementBy", &TFE_MonitoringCounterCellIncrementBy); m.def("TFE_MonitoringCounterCellValue", &TFE_MonitoringCounterCellValue); - m.def( - "TFE_MonitoringNewCounter0", - [](const char* name, const char* description) { - tensorflow::Safe_TF_StatusPtr status = - tensorflow::make_safe(TF_NewStatus()); - auto output = - TFE_MonitoringNewCounter0(name, status.get(), description); - tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); - return output; - }, - py::return_value_policy::reference); + m.def("TFE_MonitoringNewCounter0", + [](const char* name, const char* description) { + tensorflow::Safe_TF_StatusPtr status = + tensorflow::make_safe(TF_NewStatus()); + auto output = + TFE_MonitoringNewCounter0(name, status.get(), description); + tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); + return output; + }, + py::return_value_policy::reference); m.def("TFE_MonitoringDeleteCounter0", &TFE_MonitoringDeleteCounter0, py::return_value_policy::reference); m.def("TFE_MonitoringGetCellCounter0", &TFE_MonitoringGetCellCounter0, py::return_value_policy::reference); - m.def( - "TFE_MonitoringNewCounter1", - [](const char* name, const char* description, const char* label1) { - tensorflow::Safe_TF_StatusPtr status = - tensorflow::make_safe(TF_NewStatus()); - auto output = - TFE_MonitoringNewCounter1(name, status.get(), description, label1); - tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); - return output; - }, - py::return_value_policy::reference); + m.def("TFE_MonitoringNewCounter1", + [](const char* name, const char* description, const char* label1) { + tensorflow::Safe_TF_StatusPtr status = + tensorflow::make_safe(TF_NewStatus()); + auto output = TFE_MonitoringNewCounter1(name, status.get(), + description, label1); + tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); + return output; + }, + py::return_value_policy::reference); m.def("TFE_MonitoringDeleteCounter1", &TFE_MonitoringDeleteCounter1, py::return_value_policy::reference); m.def("TFE_MonitoringGetCellCounter1", &TFE_MonitoringGetCellCounter1, py::return_value_policy::reference); - m.def( - "TFE_MonitoringNewCounter2", - [](const char* name, const char* description, const char* label1, - const char* label2) { - tensorflow::Safe_TF_StatusPtr status = - tensorflow::make_safe(TF_NewStatus()); - auto output = TFE_MonitoringNewCounter2(name, status.get(), description, - label1, label2); - tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); - return output; - }, - py::return_value_policy::reference); + m.def("TFE_MonitoringNewCounter2", + [](const char* name, const char* description, const char* label1, + const char* label2) { + tensorflow::Safe_TF_StatusPtr status = + tensorflow::make_safe(TF_NewStatus()); + auto output = TFE_MonitoringNewCounter2(name, status.get(), + description, label1, label2); + tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); + return output; + }, + py::return_value_policy::reference); m.def("TFE_MonitoringDeleteCounter2", &TFE_MonitoringDeleteCounter2, py::return_value_policy::reference); m.def("TFE_MonitoringGetCellCounter2", &TFE_MonitoringGetCellCounter2, @@ -904,48 +899,45 @@ PYBIND11_MODULE(_pywrap_tfe, m) { // TFE_MonitoringIntGauge Logic m.def("TFE_MonitoringIntGaugeCellSet", &TFE_MonitoringIntGaugeCellSet); m.def("TFE_MonitoringIntGaugeCellValue", &TFE_MonitoringIntGaugeCellValue); - m.def( - "TFE_MonitoringNewIntGauge0", - [](const char* name, const char* description) { - tensorflow::Safe_TF_StatusPtr status = - tensorflow::make_safe(TF_NewStatus()); - auto output = - TFE_MonitoringNewIntGauge0(name, status.get(), description); - tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); - return output; - }, - py::return_value_policy::reference); + m.def("TFE_MonitoringNewIntGauge0", + [](const char* name, const char* description) { + tensorflow::Safe_TF_StatusPtr status = + tensorflow::make_safe(TF_NewStatus()); + auto output = + TFE_MonitoringNewIntGauge0(name, status.get(), description); + tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); + return output; + }, + py::return_value_policy::reference); m.def("TFE_MonitoringDeleteIntGauge0", &TFE_MonitoringDeleteIntGauge0, py::return_value_policy::reference); m.def("TFE_MonitoringGetCellIntGauge0", &TFE_MonitoringGetCellIntGauge0, py::return_value_policy::reference); - m.def( - "TFE_MonitoringNewIntGauge1", - [](const char* name, const char* description, const char* label1) { - tensorflow::Safe_TF_StatusPtr status = - tensorflow::make_safe(TF_NewStatus()); - auto output = - TFE_MonitoringNewIntGauge1(name, status.get(), description, label1); - tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); - return output; - }, - py::return_value_policy::reference); + m.def("TFE_MonitoringNewIntGauge1", + [](const char* name, const char* description, const char* label1) { + tensorflow::Safe_TF_StatusPtr status = + tensorflow::make_safe(TF_NewStatus()); + auto output = TFE_MonitoringNewIntGauge1(name, status.get(), + description, label1); + tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); + return output; + }, + py::return_value_policy::reference); m.def("TFE_MonitoringDeleteIntGauge1", &TFE_MonitoringDeleteIntGauge1, py::return_value_policy::reference); m.def("TFE_MonitoringGetCellIntGauge1", &TFE_MonitoringGetCellIntGauge1, py::return_value_policy::reference); - m.def( - "TFE_MonitoringNewIntGauge2", - [](const char* name, const char* description, const char* label1, - const char* label2) { - tensorflow::Safe_TF_StatusPtr status = - tensorflow::make_safe(TF_NewStatus()); - auto output = TFE_MonitoringNewIntGauge2(name, status.get(), - description, label1, label2); - tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); - return output; - }, - py::return_value_policy::reference); + m.def("TFE_MonitoringNewIntGauge2", + [](const char* name, const char* description, const char* label1, + const char* label2) { + tensorflow::Safe_TF_StatusPtr status = + tensorflow::make_safe(TF_NewStatus()); + auto output = TFE_MonitoringNewIntGauge2(name, status.get(), + description, label1, label2); + tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); + return output; + }, + py::return_value_policy::reference); m.def("TFE_MonitoringDeleteIntGauge2", &TFE_MonitoringDeleteIntGauge2, py::return_value_policy::reference); m.def("TFE_MonitoringGetCellIntGauge2", &TFE_MonitoringGetCellIntGauge2, @@ -953,48 +945,45 @@ PYBIND11_MODULE(_pywrap_tfe, m) { m.def("TFE_MonitoringStringGaugeCellSet", &TFE_MonitoringStringGaugeCellSet); m.def("TFE_MonitoringStringGaugeCellValue", &TFE_MonitoringStringGaugeCellValue); - m.def( - "TFE_MonitoringNewStringGauge0", - [](const char* name, const char* description) { - tensorflow::Safe_TF_StatusPtr status = - tensorflow::make_safe(TF_NewStatus()); - auto output = - TFE_MonitoringNewStringGauge0(name, status.get(), description); - tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); - return output; - }, - py::return_value_policy::reference); + m.def("TFE_MonitoringNewStringGauge0", + [](const char* name, const char* description) { + tensorflow::Safe_TF_StatusPtr status = + tensorflow::make_safe(TF_NewStatus()); + auto output = + TFE_MonitoringNewStringGauge0(name, status.get(), description); + tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); + return output; + }, + py::return_value_policy::reference); // TFE_MonitoringStringGauge Logic m.def("TFE_MonitoringDeleteStringGauge0", &TFE_MonitoringDeleteStringGauge0); m.def("TFE_MonitoringGetCellStringGauge0", &TFE_MonitoringGetCellStringGauge0, py::return_value_policy::reference); - m.def( - "TFE_MonitoringNewStringGauge1", - [](const char* name, const char* description, const char* label1) { - tensorflow::Safe_TF_StatusPtr status = - tensorflow::make_safe(TF_NewStatus()); - auto output = TFE_MonitoringNewStringGauge1(name, status.get(), - description, label1); - tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); - return output; - }, - py::return_value_policy::reference); + m.def("TFE_MonitoringNewStringGauge1", + [](const char* name, const char* description, const char* label1) { + tensorflow::Safe_TF_StatusPtr status = + tensorflow::make_safe(TF_NewStatus()); + auto output = TFE_MonitoringNewStringGauge1(name, status.get(), + description, label1); + tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); + return output; + }, + py::return_value_policy::reference); m.def("TFE_MonitoringDeleteStringGauge1", &TFE_MonitoringDeleteStringGauge1); m.def("TFE_MonitoringGetCellStringGauge1", &TFE_MonitoringGetCellStringGauge1, py::return_value_policy::reference); - m.def( - "TFE_MonitoringNewStringGauge2", - [](const char* name, const char* description, const char* label1, - const char* label2) { - tensorflow::Safe_TF_StatusPtr status = - tensorflow::make_safe(TF_NewStatus()); - auto output = TFE_MonitoringNewStringGauge2( - name, status.get(), description, label1, label2); - tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); - return output; - }, - py::return_value_policy::reference); + m.def("TFE_MonitoringNewStringGauge2", + [](const char* name, const char* description, const char* label1, + const char* label2) { + tensorflow::Safe_TF_StatusPtr status = + tensorflow::make_safe(TF_NewStatus()); + auto output = TFE_MonitoringNewStringGauge2( + name, status.get(), description, label1, label2); + tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); + return output; + }, + py::return_value_policy::reference); m.def("TFE_MonitoringDeleteStringGauge2", &TFE_MonitoringDeleteStringGauge2); m.def("TFE_MonitoringGetCellStringGauge2", &TFE_MonitoringGetCellStringGauge2, py::return_value_policy::reference); @@ -1002,48 +991,45 @@ PYBIND11_MODULE(_pywrap_tfe, m) { // TFE_MonitoringBoolGauge Logic m.def("TFE_MonitoringBoolGaugeCellSet", &TFE_MonitoringBoolGaugeCellSet); m.def("TFE_MonitoringBoolGaugeCellValue", &TFE_MonitoringBoolGaugeCellValue); - m.def( - "TFE_MonitoringNewBoolGauge0", - [](const char* name, const char* description) { - tensorflow::Safe_TF_StatusPtr status = - tensorflow::make_safe(TF_NewStatus()); - auto output = - TFE_MonitoringNewBoolGauge0(name, status.get(), description); - tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); - return output; - }, - py::return_value_policy::reference); + m.def("TFE_MonitoringNewBoolGauge0", + [](const char* name, const char* description) { + tensorflow::Safe_TF_StatusPtr status = + tensorflow::make_safe(TF_NewStatus()); + auto output = + TFE_MonitoringNewBoolGauge0(name, status.get(), description); + tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); + return output; + }, + py::return_value_policy::reference); m.def("TFE_MonitoringDeleteBoolGauge0", &TFE_MonitoringDeleteBoolGauge0, py::return_value_policy::reference); m.def("TFE_MonitoringGetCellBoolGauge0", &TFE_MonitoringGetCellBoolGauge0, py::return_value_policy::reference); - m.def( - "TFE_MonitoringNewBoolGauge1", - [](const char* name, const char* description, const char* label1) { - tensorflow::Safe_TF_StatusPtr status = - tensorflow::make_safe(TF_NewStatus()); - auto output = TFE_MonitoringNewBoolGauge1(name, status.get(), - description, label1); - tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); - return output; - }, - py::return_value_policy::reference); + m.def("TFE_MonitoringNewBoolGauge1", + [](const char* name, const char* description, const char* label1) { + tensorflow::Safe_TF_StatusPtr status = + tensorflow::make_safe(TF_NewStatus()); + auto output = TFE_MonitoringNewBoolGauge1(name, status.get(), + description, label1); + tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); + return output; + }, + py::return_value_policy::reference); m.def("TFE_MonitoringDeleteBoolGauge1", &TFE_MonitoringDeleteBoolGauge1, py::return_value_policy::reference); m.def("TFE_MonitoringGetCellBoolGauge1", &TFE_MonitoringGetCellBoolGauge1, py::return_value_policy::reference); - m.def( - "TFE_MonitoringNewBoolGauge2", - [](const char* name, const char* description, const char* label1, - const char* label2) { - tensorflow::Safe_TF_StatusPtr status = - tensorflow::make_safe(TF_NewStatus()); - auto output = TFE_MonitoringNewBoolGauge2(name, status.get(), - description, label1, label2); - tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); - return output; - }, - py::return_value_policy::reference); + m.def("TFE_MonitoringNewBoolGauge2", + [](const char* name, const char* description, const char* label1, + const char* label2) { + tensorflow::Safe_TF_StatusPtr status = + tensorflow::make_safe(TF_NewStatus()); + auto output = TFE_MonitoringNewBoolGauge2( + name, status.get(), description, label1, label2); + tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); + return output; + }, + py::return_value_policy::reference); m.def("TFE_MonitoringDeleteBoolGauge2", &TFE_MonitoringDeleteBoolGauge2, py::return_value_policy::reference); m.def("TFE_MonitoringGetCellBoolGauge2", &TFE_MonitoringGetCellBoolGauge2, @@ -1057,50 +1043,47 @@ PYBIND11_MODULE(_pywrap_tfe, m) { py::return_value_policy::reference); m.def("TFE_MonitoringDeleteBuckets", &TFE_MonitoringDeleteBuckets, py::return_value_policy::reference); - m.def( - "TFE_MonitoringNewSampler0", - [](const char* name, TFE_MonitoringBuckets* buckets, - const char* description) { - tensorflow::Safe_TF_StatusPtr status = - tensorflow::make_safe(TF_NewStatus()); - auto output = - TFE_MonitoringNewSampler0(name, buckets, status.get(), description); - tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); - return output; - }, - py::return_value_policy::reference); + m.def("TFE_MonitoringNewSampler0", + [](const char* name, TFE_MonitoringBuckets* buckets, + const char* description) { + tensorflow::Safe_TF_StatusPtr status = + tensorflow::make_safe(TF_NewStatus()); + auto output = TFE_MonitoringNewSampler0(name, buckets, status.get(), + description); + tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); + return output; + }, + py::return_value_policy::reference); m.def("TFE_MonitoringDeleteSampler0", &TFE_MonitoringDeleteSampler0, py::return_value_policy::reference); m.def("TFE_MonitoringGetCellSampler0", &TFE_MonitoringGetCellSampler0, py::return_value_policy::reference); - m.def( - "TFE_MonitoringNewSampler1", - [](const char* name, TFE_MonitoringBuckets* buckets, - const char* description, const char* label1) { - tensorflow::Safe_TF_StatusPtr status = - tensorflow::make_safe(TF_NewStatus()); - auto output = TFE_MonitoringNewSampler1(name, buckets, status.get(), - description, label1); - tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); - return output; - }, - py::return_value_policy::reference); + m.def("TFE_MonitoringNewSampler1", + [](const char* name, TFE_MonitoringBuckets* buckets, + const char* description, const char* label1) { + tensorflow::Safe_TF_StatusPtr status = + tensorflow::make_safe(TF_NewStatus()); + auto output = TFE_MonitoringNewSampler1(name, buckets, status.get(), + description, label1); + tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); + return output; + }, + py::return_value_policy::reference); m.def("TFE_MonitoringDeleteSampler1", &TFE_MonitoringDeleteSampler1, py::return_value_policy::reference); m.def("TFE_MonitoringGetCellSampler1", &TFE_MonitoringGetCellSampler1, py::return_value_policy::reference); - m.def( - "TFE_MonitoringNewSampler2", - [](const char* name, TFE_MonitoringBuckets* buckets, - const char* description, const char* label1, const char* label2) { - tensorflow::Safe_TF_StatusPtr status = - tensorflow::make_safe(TF_NewStatus()); - auto output = TFE_MonitoringNewSampler2(name, buckets, status.get(), - description, label1, label2); - tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); - return output; - }, - py::return_value_policy::reference); + m.def("TFE_MonitoringNewSampler2", + [](const char* name, TFE_MonitoringBuckets* buckets, + const char* description, const char* label1, const char* label2) { + tensorflow::Safe_TF_StatusPtr status = + tensorflow::make_safe(TF_NewStatus()); + auto output = TFE_MonitoringNewSampler2(name, buckets, status.get(), + description, label1, label2); + tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get()); + return output; + }, + py::return_value_policy::reference); m.def("TFE_MonitoringDeleteSampler2", &TFE_MonitoringDeleteSampler2, py::return_value_policy::reference); m.def("TFE_MonitoringGetCellSampler2", &TFE_MonitoringGetCellSampler2, From 57748fc041155be81e29cfdfca3cd8f8a3bf0458 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 22 Jul 2020 06:52:37 -0700 Subject: [PATCH 1038/2522] [XLA][MLIR] Emit memref strides in the right order Before f32[130,8]{0,1} becomes memref<130x8xf32, affine_map<(d0, d1) -> (d0 * 130 + d1)> but should become memref<130x8xf32, affine_map<(d0, d1) -> (d0 + d1 * 130)> PiperOrigin-RevId: 322565293 Change-Id: Ida7bbbe836f3896c5407c44c072524d00c2aed2f --- tensorflow/compiler/mlir/xla/hlo_utils.cc | 13 +++++++------ .../xla/service/mlir_gpu/tests/copy_transpose.hlo | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/mlir/xla/hlo_utils.cc b/tensorflow/compiler/mlir/xla/hlo_utils.cc index 84c574139e9..cf78c81908d 100644 --- a/tensorflow/compiler/mlir/xla/hlo_utils.cc +++ b/tensorflow/compiler/mlir/xla/hlo_utils.cc @@ -77,13 +77,14 @@ StatusOr> GetPermutationIfAvailable( return tensorflow::errors::Internal( "Permutations for dynamic shapes are not yet supported"); } - llvm::SmallVector permuted_sizes; - for (auto dim : llvm::reverse(shape.layout().minor_to_major())) { - permuted_sizes.push_back(shape.dimensions(dim)); + int64_t accumulated_stride = 1; + llvm::SmallVector strides(shape.rank(), 1); + for (int64 dim : LayoutUtil::MinorToMajor(shape)) { + strides[dim] = accumulated_stride; + accumulated_stride *= shape.dimensions(dim); } - return llvm::SmallVector{AffineMap::get( - permuted_sizes.size(), 0, - makeCanonicalStridedLayoutExpr(permuted_sizes, builder.getContext()))}; + return llvm::SmallVector{ + makeStridedLinearLayoutMap(strides, /*offset=*/0, builder.getContext())}; } } // namespace diff --git a/tensorflow/compiler/xla/service/mlir_gpu/tests/copy_transpose.hlo b/tensorflow/compiler/xla/service/mlir_gpu/tests/copy_transpose.hlo index 3a3dd22b338..8656b4edeb7 100644 --- a/tensorflow/compiler/xla/service/mlir_gpu/tests/copy_transpose.hlo +++ b/tensorflow/compiler/xla/service/mlir_gpu/tests/copy_transpose.hlo @@ -6,7 +6,7 @@ ENTRY %CopyTranspose (x: f32[2,4]) -> f32[2,4]{0,1} { ROOT %copy = f32[2,4]{0,1} copy(f32[2,4] %x) } -// CHECK: #[[MAP0:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> +// CHECK: #[[MAP0:.*]] = affine_map<(d0, d1) -> (d0 + d1 * 2)> // CHECK: func @copy(%[[OPERAND:.*]]: memref<2x4xf32>, // CHECK-SAME: %[[RESULT:.*]]: memref<2x4xf32, #[[MAP0]]>) // CHECK: "lmhlo.copy"(%[[OPERAND]], %[[RESULT]]) From b2a98aea17c546abfe2aca6d0b9eac68b1bd840b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Jul 2020 06:59:04 -0700 Subject: [PATCH 1039/2522] Integrate LLVM at llvm/llvm-project@82dbb1b2b4f1 Updates LLVM usage to match [82dbb1b2b4f1](https://github.com/llvm/llvm-project/commit/82dbb1b2b4f1) PiperOrigin-RevId: 322566150 Change-Id: I33d9d9fe741067cebde5ad73e21871cd5cbafe5a --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 4c1847a8c6b..8cd81be8689 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -711,8 +711,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "b2589200957af50e7d166afca9df6ad46d7418c6" - LLVM_SHA256 = "24c69e3a036d0ec6650b4e05f544aff8a9df65759baf24b13b7fcd4d0948478a" + LLVM_COMMIT = "82dbb1b2b4f1e70ca453cca60a4ba5b856058fc0" + LLVM_SHA256 = "d010981ef3a2531f106b213eb7c27e227b8b49bb87db63346210ec919ebbe392" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 3ebd3f8ff4889e80fad43056a8079d421da2d4a3 Mon Sep 17 00:00:00 2001 From: Chuanhao Zhuge Date: Wed, 22 Jul 2020 09:09:06 -0700 Subject: [PATCH 1040/2522] Set the default device for converting function to MLIR. Enabling some defun Python benchmarks for TFRT. PiperOrigin-RevId: 322586704 Change-Id: Ib51c805bf3410f8387f17e53b4201a9bc9b06489 --- tensorflow/python/eager/benchmarks_test.py | 31 ++++------------------ 1 file changed, 5 insertions(+), 26 deletions(-) diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index 24e86c77a14..a7e2423aa76 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -570,7 +570,6 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._benchmark_tfe_py_execute_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("Mutex corrupt: waiting writer with no waiters") def benchmark_defun_matmul_2_by_2_CPU(self): with context.device(CPU): m = self._m_2_by_2.cpu() @@ -587,7 +586,6 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): num_iters=self._num_iters_2_by_2, execution_mode=context.ASYNC) - @test_util.disable_tfrt("Mutex corrupt: waiting writer with no waiters") def benchmark_defun_matmul_forward_backward_2_by_2_CPU(self): with context.device(CPU): m = self._m_2_by_2.cpu() @@ -643,7 +641,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._benchmark_tfe_py_execute_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("Graph is not supported yet. b/156187905") + @test_util.disable_tfrt("copy to GPU not supported") def benchmark_defun_matmul_2_by_2_GPU(self): if not context.num_gpus(): return @@ -652,7 +650,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._benchmark_defun_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("Graph is not supported yet. b/156187905") + @test_util.disable_tfrt("copy to GPU not supported") def benchmark_defun_args_matmul_2_by_2_GPU(self): if not context.num_gpus(): return @@ -672,7 +670,6 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): num_iters=self._num_iters_2_by_2, execution_mode=context.ASYNC) - @test_util.disable_tfrt("Graph is not supported yet. b/156187905") def benchmark_nested_defun_matmul_2_by_2(self): m = self._m_2_by_2.cpu() self._benchmark_nested_defun_matmul( @@ -765,7 +762,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._benchmark_tfe_py_execute_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784) - @test_util.disable_tfrt("defun not supported") + @test_util.disable_tfrt("copy to GPU not supported") def benchmark_defun_matmul_100_by_784_GPU(self): if not context.num_gpus(): return @@ -774,8 +771,8 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._benchmark_defun_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784) - @test_util.disable_tfrt("defun not supported") - def benchmark_nested_defun_matmul_100_by_784(self): + @test_util.disable_tfrt("copy to GPU not supported") + def benchmark_nested_defun_matmul_100_by_784_GPU(self): m = self._m_100_by_784.gpu() self._benchmark_nested_defun_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784) @@ -847,35 +844,27 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): func() self._run(func, 3000) - @test_util.disable_tfrt("Graph is not supported yet. b/156187905") def benchmark_forwardprop_matmul_256_by_2096_CPU(self): self._benchmark_forwardprop_matmul_CPU(shape=(256, 2096)) - @test_util.disable_tfrt("Graph is not supported yet. b/156187905") def benchmark_forwardprop_in_defun_matmul_256_by_2096_CPU(self): self._benchmark_forwardprop_in_defun_matmul_CPU(shape=(256, 2096)) - @test_util.disable_tfrt("Graph is not supported yet. b/156187905") def benchmark_forwardprop_in_defun_of_defun_matmul_256_by_2096_CPU(self): self._benchmark_forwardprop_in_defun_of_defun_matmul_CPU(shape=(256, 2096)) - @test_util.disable_tfrt("Graph is not supported yet. b/156187905") def benchmark_forwardprop_of_defun_matmul_256_by_2096_CPU(self): self._benchmark_forwardprop_of_defun_matmul_CPU(shape=(256, 2096)) - @test_util.disable_tfrt("Graph is not supported yet. b/156187905") def benchmark_forwardprop_matmul_100_by_784_CPU(self): self._benchmark_forwardprop_matmul_CPU(shape=(100, 784)) - @test_util.disable_tfrt("Graph is not supported yet. b/156187905") def benchmark_forwardprop_in_defun_matmul_100_by_784_CPU(self): self._benchmark_forwardprop_in_defun_matmul_CPU(shape=(100, 784)) - @test_util.disable_tfrt("Graph is not supported yet. b/156187905") def benchmark_forwardprop_in_defun_of_defun_matmul_100_by_784_CPU(self): self._benchmark_forwardprop_in_defun_of_defun_matmul_CPU(shape=(100, 784)) - @test_util.disable_tfrt("Graph is not supported yet. b/156187905") def benchmark_forwardprop_of_defun_matmul_100_by_784_CPU(self): self._benchmark_forwardprop_of_defun_matmul_CPU(shape=(100, 784)) @@ -1117,7 +1106,6 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): func = lambda: array_ops.transpose(m, perm, conjugate) self._run(func, num_iters, execution_mode=execution_mode) - @test_util.disable_tfrt("ConvertToEagerTensorUncached error") def benchmark_tf_transpose_2_by_2_CPU(self): with context.device(CPU): m = self._m_2_by_2.cpu() @@ -1129,7 +1117,6 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): m = self._m_2_by_2.gpu() self._benchmark_transpose(m, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("ConvertToEagerTensorUncached error") def benchmark_tf_transpose_variable_2_by_2_CPU(self): with context.device(CPU): m = resource_variable_ops.ResourceVariable(self._m_2_by_2) @@ -1141,7 +1128,6 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): m = resource_variable_ops.ResourceVariable(self._m_2_by_2) self._benchmark_transpose(m, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("Graph is not supported yet. b/156187905") def benchmark_defun_without_signature(self): def func(t1, t2, t3, t4, t5, t6, t7, t8): @@ -1153,7 +1139,6 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): cache_computation = lambda: defined(t, t, t, t, t, t, t, t) self._run(cache_computation, 30000) - @test_util.disable_tfrt("Graph is not supported yet. b/156187905") def benchmark_defun_without_signature_and_with_kwargs(self): def func(t1, t2, t3, t4, t5, t6, t7, t8): @@ -1166,7 +1151,6 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): return defined(t1=t, t2=t, t3=t, t4=t, t5=t, t6=t, t7=t, t8=t) self._run(cache_computation, 30000) - @test_util.disable_tfrt("Graph is not supported yet. b/156187905") def benchmark_defun_with_signature(self): def func(t1, t2, t3, t4, t5, t6, t7, t8): @@ -1179,7 +1163,6 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): signature_computation = lambda: defined(t, t, t, t, t, t, t, t) self._run(signature_computation, 30000) - @test_util.disable_tfrt("Graph is not supported yet. b/156187905") def benchmark_defun_with_signature_and_kwargs(self): def func(t1, t2, t3, t4, t5, t6, t7, t8): @@ -1232,7 +1215,6 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._benchmark_read_variable_with_tape( m, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("Scan, loops need fallback") def benchmarkScan(self): elems = math_ops.range(1600) @@ -1242,7 +1224,6 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._run(scan, 100) - @test_util.disable_tfrt("Scan, loops need fallback") def benchmarkScanDefun(self): elems = math_ops.range(1600) @@ -1357,11 +1338,9 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): resources.append(resource_variable_ops.ResourceVariable(self._m_2)) self._run(lambda: add_all(resources), num_iters) - @test_util.disable_tfrt("Graph is not supported yet. b/156187905") def benchmarkFunctionWithFiveResourceInputs(self): self._benchmarkFunctionWithResourceInputs(5, 1000) - @test_util.disable_tfrt("Graph is not supported yet. b/156187905") def benchmarkFunctionWithFiveHundredResourceInputs(self): self._benchmarkFunctionWithResourceInputs(500, 100) From 23f6e460f37d474d2224255789b22da153ac446a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Jul 2020 09:46:33 -0700 Subject: [PATCH 1041/2522] The non-android ASharedMemory_create has a bug, where if two calls to create memory regions use the same 'name', they will be mapped to the to same /dev/shm file and trip over each other (particularly if they're different sizes). This CL ensures that each created memory region is unique. PiperOrigin-RevId: 322593774 Change-Id: Ib1137045604955871dd2e33aae8205275201d4b1 --- tensorflow/lite/nnapi/nnapi_implementation.cc | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/nnapi/nnapi_implementation.cc b/tensorflow/lite/nnapi/nnapi_implementation.cc index 52def4de8fd..f18e3b33240 100644 --- a/tensorflow/lite/nnapi/nnapi_implementation.cc +++ b/tensorflow/lite/nnapi/nnapi_implementation.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include +#include #include #ifdef __ANDROID__ @@ -64,8 +65,20 @@ void* LoadFunction(void* handle, const char* name, bool optional) { #ifndef __ANDROID__ // Add /dev/shm implementation of shared memory for non-Android platforms -int ASharedMemory_create(const char* name, size_t size) { - int fd = shm_open(name, O_RDWR | O_CREAT, 0644); +int ASharedMemory_create(const char* /* name */, size_t size) { + // name should not be used to identify the memory region (hence + // 'anonymous' shared memory). Generate a unique name for every create call. + char _tmpname[L_tmpnam]; + if (tmpnam_r(_tmpname) == nullptr) { + return -1; + } + + // tmpnam will produce a string containing with slashes, but shm_open + // won't like that. + std::string _name = std::string(_tmpname); + std::replace(_name.begin(), _name.end(), '/', '-'); + + int fd = shm_open(_name.c_str(), O_RDWR | O_CREAT, 0644); if (fd < 0) { return fd; } From 89f8d5562ed4bf9e884f07711ecbbb1af79f4386 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Wed, 22 Jul 2020 09:50:55 -0700 Subject: [PATCH 1042/2522] Fix a typo in the Windows GPU build. PiperOrigin-RevId: 322594657 Change-Id: I6a504bf758dcf7992d432ef5104672c4f2c672f3 --- tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh index fb195c19ce7..4bba7797556 100644 --- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh @@ -138,9 +138,8 @@ fi run_configure_for_gpu_build -bazel \ +bazel build ${EXTRA_BUILD_FLAGS} \ --output_filter=^$ \ - ${EXTRA_BUILD_FLAGS} \ tensorflow/tools/pip_package:build_pip_package || exit $? if [[ "$SKIP_TEST" == 1 ]]; then From 19589460b9f311844e26f05bdad9af8994d6c4cc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Jul 2020 10:05:17 -0700 Subject: [PATCH 1043/2522] Integrate LLVM at llvm/llvm-project@5623da56d07b Updates LLVM usage to match [5623da56d07b](https://github.com/llvm/llvm-project/commit/5623da56d07b) PiperOrigin-RevId: 322597831 Change-Id: I1fee3b4b4fb0f1105c216a09e4f9480f5e5b8d0d --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 8cd81be8689..2cce1bd0dfa 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -711,8 +711,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "82dbb1b2b4f1e70ca453cca60a4ba5b856058fc0" - LLVM_SHA256 = "d010981ef3a2531f106b213eb7c27e227b8b49bb87db63346210ec919ebbe392" + LLVM_COMMIT = "5623da56d07b2fa434825af0f3e8494afacf3c52" + LLVM_SHA256 = "dce02df09be24922304218e55fb5d4688ec835dcf7a9f9050d61cd70c0ed8706" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From a1cec7e4420c8720cc48acdbcf61873425288188 Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Wed, 22 Jul 2020 10:14:51 -0700 Subject: [PATCH 1044/2522] Code refactoring and cleanup. PiperOrigin-RevId: 322600036 Change-Id: I27a79a6da45b40f1562c38d0f670bf007b458803 --- tensorflow/core/tpu/kernels/tpu_compile_op_common.cc | 4 +++- tensorflow/core/tpu/kernels/tpu_util.h | 3 --- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc index 43143e5d618..a24aa4cd665 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc @@ -390,10 +390,12 @@ Status TpuCompileOpKernelCommon::CompileTFFunctionToHlo( << " seconds to give time for TPUCompileOp to finished."; env->SleepForMicroseconds(kSleepSeconds * 1000000); if (done->load()) { - // If the TPUCompileOp has finished, then terminate peacefully. + // If the TpuCompileOp has finished, then terminate peacefully. return; } + LOG(ERROR) << "Aborting process due to cancelled TpuCompileOp. This " + << "termination is to ensure a consistent state."; std::exit(42); } diff --git a/tensorflow/core/tpu/kernels/tpu_util.h b/tensorflow/core/tpu/kernels/tpu_util.h index 90eef621b95..579fbdf5e85 100644 --- a/tensorflow/core/tpu/kernels/tpu_util.h +++ b/tensorflow/core/tpu/kernels/tpu_util.h @@ -54,9 +54,6 @@ Status DynamicShapesToTensorShapes(const OpInputList& dynamic_shapes, std::vector* shapes); Status DynamicShapesToTensorShapes(const InputList& dynamic_shapes, std::vector* shapes); - -// A callback called on exit. -void LogAndExit(int code); } // namespace tpu } // namespace tensorflow From 177790aafca2d550ffbbf2efcc541ff0b20298d1 Mon Sep 17 00:00:00 2001 From: "Jeffrey A. Dean" Date: Wed, 22 Jul 2020 10:21:35 -0700 Subject: [PATCH 1045/2522] Turn relatively expensive CHECK on Shape::rank() into a DCHECK. Eliminates 2.6% of compilation time in profiling one compile-intensive workload. PiperOrigin-RevId: 322601531 Change-Id: I8786323b318d54cdaf9baaea318a7abf4bccdd59 --- tensorflow/compiler/xla/shape.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/shape.h b/tensorflow/compiler/xla/shape.h index dfaac677724..6a19a1fac09 100644 --- a/tensorflow/compiler/xla/shape.h +++ b/tensorflow/compiler/xla/shape.h @@ -49,7 +49,7 @@ class Shape { // Returns the rank (number of dimensions) of the given shape. Shape must be // an array. int64 rank() const { - CHECK(IsArray()) << "Non-arrays do not have a rank, shape: " << ToString(); + DCHECK(IsArray()) << "Non-arrays do not have a rank, shape: " << ToString(); return dimensions_.size(); } From f97ced1b20799c83698a10e80e4254f8ac1ad5ac Mon Sep 17 00:00:00 2001 From: James Qin Date: Wed, 22 Jul 2020 10:22:06 -0700 Subject: [PATCH 1046/2522] Update kMaxListSummarySize to 50 PiperOrigin-RevId: 322601670 Change-Id: I30335a012b66366afc17dceeb19253f17bc666b8 --- tensorflow/core/framework/attr_value_util.cc | 2 +- tensorflow/core/framework/attr_value_util_test.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/framework/attr_value_util.cc b/tensorflow/core/framework/attr_value_util.cc index a307c8a18c1..712e205c587 100644 --- a/tensorflow/core/framework/attr_value_util.cc +++ b/tensorflow/core/framework/attr_value_util.cc @@ -278,7 +278,7 @@ string SummarizeAttrValue(const AttrValue& attr_value) { pieces.push_back(SummarizeFunc(attr_value.list().func(i))); } } - constexpr int kMaxListSummarySize = 15; + constexpr int kMaxListSummarySize = 50; if (pieces.size() >= kMaxListSummarySize) { pieces.erase(pieces.begin() + 5, pieces.begin() + (pieces.size() - 6)); pieces[5] = "..."; diff --git a/tensorflow/core/framework/attr_value_util_test.cc b/tensorflow/core/framework/attr_value_util_test.cc index 4ffd732f8e1..6d2273fd4b6 100644 --- a/tensorflow/core/framework/attr_value_util_test.cc +++ b/tensorflow/core/framework/attr_value_util_test.cc @@ -160,12 +160,12 @@ TEST(AttrValueUtil, SummarizeAttrValueDoesNotElideShortLists) { } TEST(AttrValueUtil, SummarizeAttrValueElidesLongLists) { - std::vector alist(30); + std::vector alist(60); std::iota(alist.begin(), alist.end(), 0); AttrValue attr_value; SetAttrValue(alist, &attr_value); - EXPECT_EQ("[0, 1, 2, 3, 4, ..., 25, 26, 27, 28, 29]", + EXPECT_EQ("[0, 1, 2, 3, 4, ..., 55, 56, 57, 58, 59]", SummarizeAttrValue(attr_value)); } From 3f324403a59e312ee9a428bdd1b6c4baa529c127 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Wed, 22 Jul 2020 17:49:59 +0000 Subject: [PATCH 1047/2522] fixed mem leak in ops_test --- tensorflow/c/ops_test.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/ops_test.cc b/tensorflow/c/ops_test.cc index 63105b1ad00..5d6ec8df1e3 100644 --- a/tensorflow/c/ops_test.cc +++ b/tensorflow/c/ops_test.cc @@ -323,7 +323,8 @@ TEST(OpsTest, ShapeInferenceScalarShape) { TF_ShapeHandle* TF_scalar_shape = TF_ShapeInferenceContextScalar(C_CTX(&c)); shape_inference::ShapeHandle* scalar_shape = reinterpret_cast(TF_scalar_shape); - ASSERT_EQ("[]", c.DebugString(*scalar_shape)); + ASSERT_EQ("[]", c.DebugString(*scalar_shape)); + TF_DeleteShapeHandle(TF_scalar_shape); } } // namespace From 503defb219c271c23897371fb837daae25c507bb Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Wed, 22 Jul 2020 17:53:56 +0000 Subject: [PATCH 1048/2522] moved typedef to c_api.h --- tensorflow/c/c_api.h | 4 ++-- tensorflow/c/kernels.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h index c6ca319c40d..e49bf3601ec 100644 --- a/tensorflow/c/c_api.h +++ b/tensorflow/c/c_api.h @@ -128,10 +128,10 @@ TF_CAPI_EXPORT extern TF_Buffer TF_GetBuffer(TF_Buffer* buffer); // -------------------------------------------------------------------------- // Used to pass strings across the C API. The caller does not take ownership // of the underlying data pointer and is not responsible for freeing it. -struct TF_StringView { +typedef struct TF_StringView { const char* data; size_t len; -}; +} TF_StringView; // -------------------------------------------------------------------------- // TF_SessionOptions holds options that can be passed during session creation. diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h index cec0ca7b6a2..764f9066faa 100644 --- a/tensorflow/c/kernels.h +++ b/tensorflow/c/kernels.h @@ -46,7 +46,6 @@ extern "C" { #endif typedef struct TF_Tensor TF_Tensor; -typedef struct TF_StringView TF_StringView; // -------------------------------------------------------------------------- // C API for TensorFlow Kernels. From 43045a560b4215e82541f9e01ae63f1d1e1ffbd1 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Wed, 22 Jul 2020 10:55:48 -0700 Subject: [PATCH 1049/2522] Slightly increase CPU binary sizes. PiperOrigin-RevId: 322609511 Change-Id: Ic4f3c69ab27a8ad72dd259ad8491c50e6a055e6f --- .../tools/ci_build/builds/nightly_release_smoke_test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/ci_build/builds/nightly_release_smoke_test.sh b/tensorflow/tools/ci_build/builds/nightly_release_smoke_test.sh index 69ce63c13cb..19e1232cd92 100644 --- a/tensorflow/tools/ci_build/builds/nightly_release_smoke_test.sh +++ b/tensorflow/tools/ci_build/builds/nightly_release_smoke_test.sh @@ -19,8 +19,8 @@ set -e set -x # CPU size -MAC_CPU_MAX_WHL_SIZE=160M -LINUX_CPU_MAX_WHL_SIZE=133M +MAC_CPU_MAX_WHL_SIZE=165M +LINUX_CPU_MAX_WHL_SIZE=138M WIN_CPU_MAX_WHL_SIZE=113M # GPU size LINUX_GPU_MAX_WHL_SIZE=337M From 698fb7a6d2bcd79ffdc7a0e0b8787ef50259edd0 Mon Sep 17 00:00:00 2001 From: Brian Zhao Date: Wed, 22 Jul 2020 11:00:29 -0700 Subject: [PATCH 1050/2522] Disabling failing msan test. PiperOrigin-RevId: 322610573 Change-Id: I6c478f0a4dcf7dd2a8266fdcb37fc43750cef21a --- tensorflow/python/kernel_tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 53294fb427f..e7aebb044fa 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -49,6 +49,7 @@ tf_py_test( shard_count = 20, tags = [ "no_mac", # TODO(b/129706424): Re-enable this test on Mac. + "nomsan", # TODO(b/161902335): Re-enable. "notsan", # TODO(b/161829717): Re-enable. ], deps = [ From 7cd0adacea9ca4bd4e3defb1c6c5d69b1194648e Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Wed, 22 Jul 2020 11:02:28 -0700 Subject: [PATCH 1051/2522] Change cpp gradient function signature to accept a `Context` which contains the local context when calling the gradient function e.g. the default graph. In python gradients, this state is managed via singletons(global context manager stacks) and is implicitly captured. PiperOrigin-RevId: 322611049 Change-Id: I26fe086a687e4989a96f18baed1ccfe2ec7b7c1e --- tensorflow/c/eager/gradients.cc | 3 ++- tensorflow/c/eager/gradients.h | 10 ++++++++-- .../c/experimental/gradients/math_grad.cc | 19 +++++++++---------- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/tensorflow/c/eager/gradients.cc b/tensorflow/c/eager/gradients.cc index 3a7a6282192..f5085fdb926 100644 --- a/tensorflow/c/eager/gradients.cc +++ b/tensorflow/c/eager/gradients.cc @@ -175,7 +175,8 @@ Status TapeVSpace::CallBackwardFunction( gtl::ArraySlice output_gradients, std::vector* result) const { if (backward_function == nullptr) return Status::OK(); - return backward_function->Compute(output_gradients, result); + Context ctx = {ctx_}; + return backward_function->Compute(&ctx, output_gradients, result); } // Looks up the ID of a Gradient. diff --git a/tensorflow/c/eager/gradients.h b/tensorflow/c/eager/gradients.h index e09b6ff8613..267ee5b7ab2 100644 --- a/tensorflow/c/eager/gradients.h +++ b/tensorflow/c/eager/gradients.h @@ -31,7 +31,8 @@ namespace gradients { // // class AddGradientFunction : public GradientFunction { // public: -// Status Compute(absl::Span grad_inputs, +// Status Compute(Context* ctx, +// absl::Span grad_inputs, // std::vector* grad_outputs) override { // grad_outputs->resize(2); // (*grad_outputs)[0] = grad_inputs[0]; @@ -50,11 +51,16 @@ namespace gradients { // Status RegisterGradients(GradientRegistry* registry) { // return registry->Register("Add", AddRegisterer); // } +struct Context { + public: + AbstractContext* ctx; +}; class GradientFunction { public: // TODO(srbs): How we support CompositeTensors e.g. IndexedSlices in // `grad_inputs`. - virtual Status Compute(absl::Span grad_inputs, + virtual Status Compute(Context* ctx, + absl::Span grad_inputs, std::vector* grad_outputs) = 0; virtual ~GradientFunction() {} }; diff --git a/tensorflow/c/experimental/gradients/math_grad.cc b/tensorflow/c/experimental/gradients/math_grad.cc index e27cbb2e02f..47bd8cce23d 100644 --- a/tensorflow/c/experimental/gradients/math_grad.cc +++ b/tensorflow/c/experimental/gradients/math_grad.cc @@ -24,31 +24,30 @@ namespace { class AddGradientFunction : public GradientFunction { public: - explicit AddGradientFunction(AbstractContext* ctx) : ctx_(ctx) {} - Status Compute(absl::Span grad_inputs, + Status Compute(Context* ctx, + absl::Span grad_inputs, std::vector* grad_outputs) override { grad_outputs->resize(2); std::vector identity_outputs(1); // TODO(b/145674566): Handle name unification in tracing code. // TODO(b/161805092): Support broadcasting. - TF_RETURN_IF_ERROR(ops::Identity( - ctx_, {grad_inputs[0]}, absl::MakeSpan(identity_outputs), "Identity0")); + TF_RETURN_IF_ERROR(ops::Identity(ctx->ctx, {grad_inputs[0]}, + absl::MakeSpan(identity_outputs), + "Identity0")); (*grad_outputs)[0] = identity_outputs[0]; - TF_RETURN_IF_ERROR(ops::Identity( - ctx_, {grad_inputs[0]}, absl::MakeSpan(identity_outputs), "Identity1")); + TF_RETURN_IF_ERROR(ops::Identity(ctx->ctx, {grad_inputs[0]}, + absl::MakeSpan(identity_outputs), + "Identity1")); (*grad_outputs)[1] = identity_outputs[0]; return Status::OK(); } ~AddGradientFunction() override {} - - private: - AbstractContext* ctx_; }; } // namespace GradientFunction* AddRegisterer(const ForwardOperation& op) { - return new AddGradientFunction(op.ctx); + return new AddGradientFunction; } } // namespace gradients } // namespace tensorflow From e5e495db7bee77cd0fd5dda3b06bd743cbcf1ef8 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Wed, 22 Jul 2020 11:32:06 -0700 Subject: [PATCH 1052/2522] Change the go_package name from core_protos_go_proto to for_protos_go_proto to match the new BUILD rule name. PiperOrigin-RevId: 322617692 Change-Id: I7f5da54b0a5b6141b0c0899d12a4bc025f10c837 --- tensorflow/core/protobuf/autotuning.proto | 2 +- tensorflow/core/protobuf/bfc_memory_map.proto | 2 +- tensorflow/core/protobuf/cluster.proto | 2 +- tensorflow/core/protobuf/config.proto | 2 +- tensorflow/core/protobuf/control_flow.proto | 2 +- tensorflow/core/protobuf/conv_autotuning.proto | 2 +- tensorflow/core/protobuf/critical_section.proto | 2 +- tensorflow/core/protobuf/debug.proto | 2 +- tensorflow/core/protobuf/debug_event.proto | 2 +- tensorflow/core/protobuf/device_filters.proto | 2 +- tensorflow/core/protobuf/device_properties.proto | 2 +- tensorflow/core/protobuf/eager_service.proto | 2 +- tensorflow/core/protobuf/error_codes.proto | 2 +- tensorflow/core/protobuf/graph_debug_info.proto | 2 +- tensorflow/core/protobuf/master.proto | 2 +- tensorflow/core/protobuf/master_service.proto | 2 +- tensorflow/core/protobuf/meta_graph.proto | 2 +- tensorflow/core/protobuf/named_tensor.proto | 2 +- tensorflow/core/protobuf/queue_runner.proto | 2 +- tensorflow/core/protobuf/remote_tensor_handle.proto | 2 +- tensorflow/core/protobuf/replay_log.proto | 2 +- tensorflow/core/protobuf/rewriter_config.proto | 2 +- tensorflow/core/protobuf/saved_model.proto | 2 +- tensorflow/core/protobuf/saved_object_graph.proto | 2 +- tensorflow/core/protobuf/saver.proto | 2 +- tensorflow/core/protobuf/struct.proto | 2 +- tensorflow/core/protobuf/tensor_bundle.proto | 2 +- tensorflow/core/protobuf/tensorflow_server.proto | 2 +- tensorflow/core/protobuf/trackable_object_graph.proto | 2 +- tensorflow/core/protobuf/transport_options.proto | 2 +- tensorflow/core/protobuf/verifier_config.proto | 2 +- tensorflow/core/protobuf/worker.proto | 2 +- tensorflow/core/protobuf/worker_service.proto | 2 +- 33 files changed, 33 insertions(+), 33 deletions(-) diff --git a/tensorflow/core/protobuf/autotuning.proto b/tensorflow/core/protobuf/autotuning.proto index 44ce088ff41..083a04dba33 100644 --- a/tensorflow/core/protobuf/autotuning.proto +++ b/tensorflow/core/protobuf/autotuning.proto @@ -10,7 +10,7 @@ package tensorflow; import "google/protobuf/any.proto"; import "google/protobuf/duration.proto"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; message CudnnVersion { int32 major = 1; diff --git a/tensorflow/core/protobuf/bfc_memory_map.proto b/tensorflow/core/protobuf/bfc_memory_map.proto index 6f7a5301af1..6e2614c79d2 100644 --- a/tensorflow/core/protobuf/bfc_memory_map.proto +++ b/tensorflow/core/protobuf/bfc_memory_map.proto @@ -2,7 +2,7 @@ syntax = "proto3"; package tensorflow; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // Some of the data from AllocatorStats message MemAllocatorStats { diff --git a/tensorflow/core/protobuf/cluster.proto b/tensorflow/core/protobuf/cluster.proto index 6c7162fe433..41a55e06d6e 100644 --- a/tensorflow/core/protobuf/cluster.proto +++ b/tensorflow/core/protobuf/cluster.proto @@ -21,7 +21,7 @@ option cc_enable_arenas = true; option java_outer_classname = "ClusterProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.distruntime"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // This file contains protos to be used when defining a TensorFlow // cluster. diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto index 3d20d004d46..fec929a0b03 100644 --- a/tensorflow/core/protobuf/config.proto +++ b/tensorflow/core/protobuf/config.proto @@ -13,7 +13,7 @@ option cc_enable_arenas = true; option java_outer_classname = "ConfigProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.framework"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; message GPUOptions { // Fraction of the available GPU memory to allocate for each process. diff --git a/tensorflow/core/protobuf/control_flow.proto b/tensorflow/core/protobuf/control_flow.proto index 24f862029dc..8890af6916d 100644 --- a/tensorflow/core/protobuf/control_flow.proto +++ b/tensorflow/core/protobuf/control_flow.proto @@ -6,7 +6,7 @@ option cc_enable_arenas = true; option java_outer_classname = "ControlFlowProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.framework"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // Control flow context related protocol buffers. diff --git a/tensorflow/core/protobuf/conv_autotuning.proto b/tensorflow/core/protobuf/conv_autotuning.proto index b3af2f0769e..17d31770a59 100644 --- a/tensorflow/core/protobuf/conv_autotuning.proto +++ b/tensorflow/core/protobuf/conv_autotuning.proto @@ -6,7 +6,7 @@ package tensorflow; import "tensorflow/stream_executor/dnn.proto"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // A convolution. Currently it's only used for logging. In the future, we may // want to use it in the API as well. diff --git a/tensorflow/core/protobuf/critical_section.proto b/tensorflow/core/protobuf/critical_section.proto index 991d7ef1280..830f0560a03 100644 --- a/tensorflow/core/protobuf/critical_section.proto +++ b/tensorflow/core/protobuf/critical_section.proto @@ -6,7 +6,7 @@ option cc_enable_arenas = true; option java_outer_classname = "CriticalSectionProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.framework"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // Protocol buffer representing a CriticalSection. message CriticalSectionDef { diff --git a/tensorflow/core/protobuf/debug.proto b/tensorflow/core/protobuf/debug.proto index 21df4a12e7d..2fabd0319fe 100644 --- a/tensorflow/core/protobuf/debug.proto +++ b/tensorflow/core/protobuf/debug.proto @@ -6,7 +6,7 @@ option cc_enable_arenas = true; option java_outer_classname = "DebugProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.framework"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // Option for watching a node in TensorFlow Debugger (tfdbg). message DebugTensorWatch { diff --git a/tensorflow/core/protobuf/debug_event.proto b/tensorflow/core/protobuf/debug_event.proto index 5541c397fb8..5530004d725 100644 --- a/tensorflow/core/protobuf/debug_event.proto +++ b/tensorflow/core/protobuf/debug_event.proto @@ -9,7 +9,7 @@ option cc_enable_arenas = true; option java_outer_classname = "DebugEventProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.util"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // Available modes for extracting debugging information from a Tensor. // TODO(cais): Document the detailed column names and semantics in a separate diff --git a/tensorflow/core/protobuf/device_filters.proto b/tensorflow/core/protobuf/device_filters.proto index 62dd427e03a..8fd8e2ec143 100644 --- a/tensorflow/core/protobuf/device_filters.proto +++ b/tensorflow/core/protobuf/device_filters.proto @@ -21,7 +21,7 @@ option cc_enable_arenas = true; option java_outer_classname = "DeviceFiltersProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.distruntime"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // This file contains protos to be used when defining a TensorFlow // cluster. diff --git a/tensorflow/core/protobuf/device_properties.proto b/tensorflow/core/protobuf/device_properties.proto index f6587c8aef2..b892bdcc854 100644 --- a/tensorflow/core/protobuf/device_properties.proto +++ b/tensorflow/core/protobuf/device_properties.proto @@ -19,7 +19,7 @@ package tensorflow; option cc_enable_arenas = true; option java_outer_classname = "DevicePropertiesProtos"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; message DeviceProperties { // Device type (CPU, GPU, ...) diff --git a/tensorflow/core/protobuf/eager_service.proto b/tensorflow/core/protobuf/eager_service.proto index 179ef19f805..57bbf48ac67 100644 --- a/tensorflow/core/protobuf/eager_service.proto +++ b/tensorflow/core/protobuf/eager_service.proto @@ -11,7 +11,7 @@ import "tensorflow/core/framework/versions.proto"; import "tensorflow/core/protobuf/remote_tensor_handle.proto"; import "tensorflow/core/protobuf/tensorflow_server.proto"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // A proto representation of an eager operation. message Operation { diff --git a/tensorflow/core/protobuf/error_codes.proto b/tensorflow/core/protobuf/error_codes.proto index a880fdfd8c9..4f94d7dedc6 100644 --- a/tensorflow/core/protobuf/error_codes.proto +++ b/tensorflow/core/protobuf/error_codes.proto @@ -6,7 +6,7 @@ option cc_enable_arenas = true; option java_outer_classname = "ErrorCodesProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.framework"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // The canonical error codes for TensorFlow APIs. // diff --git a/tensorflow/core/protobuf/graph_debug_info.proto b/tensorflow/core/protobuf/graph_debug_info.proto index 3d8d7733c64..7af52628cd8 100644 --- a/tensorflow/core/protobuf/graph_debug_info.proto +++ b/tensorflow/core/protobuf/graph_debug_info.proto @@ -6,7 +6,7 @@ option cc_enable_arenas = true; option java_outer_classname = "GraphDebugInfoProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.framework"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; message GraphDebugInfo { // This represents a file/line location in the source code. diff --git a/tensorflow/core/protobuf/master.proto b/tensorflow/core/protobuf/master.proto index 0bd21fd8bd7..e1732a932c9 100644 --- a/tensorflow/core/protobuf/master.proto +++ b/tensorflow/core/protobuf/master.proto @@ -28,7 +28,7 @@ option cc_enable_arenas = true; option java_outer_classname = "DistributedRuntimeProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.distruntime"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; //////////////////////////////////////////////////////////////////////////////// // diff --git a/tensorflow/core/protobuf/master_service.proto b/tensorflow/core/protobuf/master_service.proto index aa8d13f2b86..f9ec50aa5a3 100644 --- a/tensorflow/core/protobuf/master_service.proto +++ b/tensorflow/core/protobuf/master_service.proto @@ -22,7 +22,7 @@ import "tensorflow/core/protobuf/master.proto"; option java_outer_classname = "MasterServiceProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.distruntime"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; //////////////////////////////////////////////////////////////////////////////// // diff --git a/tensorflow/core/protobuf/meta_graph.proto b/tensorflow/core/protobuf/meta_graph.proto index c560451f9db..167e32973eb 100644 --- a/tensorflow/core/protobuf/meta_graph.proto +++ b/tensorflow/core/protobuf/meta_graph.proto @@ -15,7 +15,7 @@ option cc_enable_arenas = true; option java_outer_classname = "MetaGraphProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.framework"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // NOTE: This protocol buffer is evolving, and will go through revisions in the // coming months. diff --git a/tensorflow/core/protobuf/named_tensor.proto b/tensorflow/core/protobuf/named_tensor.proto index 4210f040e82..8d401a0c481 100644 --- a/tensorflow/core/protobuf/named_tensor.proto +++ b/tensorflow/core/protobuf/named_tensor.proto @@ -8,7 +8,7 @@ option cc_enable_arenas = true; option java_outer_classname = "NamedTensorProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.framework"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // A pair of tensor name and tensor values. message NamedTensorProto { diff --git a/tensorflow/core/protobuf/queue_runner.proto b/tensorflow/core/protobuf/queue_runner.proto index 981ae0d463e..c3225d42fd2 100644 --- a/tensorflow/core/protobuf/queue_runner.proto +++ b/tensorflow/core/protobuf/queue_runner.proto @@ -8,7 +8,7 @@ option cc_enable_arenas = true; option java_outer_classname = "QueueRunnerProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.framework"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // Protocol buffer representing a QueueRunner. message QueueRunnerDef { diff --git a/tensorflow/core/protobuf/remote_tensor_handle.proto b/tensorflow/core/protobuf/remote_tensor_handle.proto index 36e3f810b73..904d6a3b209 100644 --- a/tensorflow/core/protobuf/remote_tensor_handle.proto +++ b/tensorflow/core/protobuf/remote_tensor_handle.proto @@ -9,7 +9,7 @@ option cc_enable_arenas = true; option java_outer_classname = "RemoteTensorHandleProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.framework"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; message ResourceDtypeAndShape { DataType dtype = 1; diff --git a/tensorflow/core/protobuf/replay_log.proto b/tensorflow/core/protobuf/replay_log.proto index bf0155946b6..1fe4fd65853 100644 --- a/tensorflow/core/protobuf/replay_log.proto +++ b/tensorflow/core/protobuf/replay_log.proto @@ -5,7 +5,7 @@ package tensorflow; import "tensorflow/core/protobuf/master.proto"; option cc_enable_arenas = true; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // Records the creation of a new replay session. We record the device listing // here to capture the state of the cluster. diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index 9520db92742..2b7830f8bef 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -9,7 +9,7 @@ option cc_enable_arenas = true; option java_outer_classname = "RewriterConfigProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.framework"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; message AutoParallelOptions { bool enable = 1; diff --git a/tensorflow/core/protobuf/saved_model.proto b/tensorflow/core/protobuf/saved_model.proto index 57f018bb249..47a707c8139 100644 --- a/tensorflow/core/protobuf/saved_model.proto +++ b/tensorflow/core/protobuf/saved_model.proto @@ -8,7 +8,7 @@ option cc_enable_arenas = true; option java_outer_classname = "SavedModelProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.framework"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // SavedModel is the high level serialization format for TensorFlow Models. // See [todo: doc links, similar to session_bundle] for more information. diff --git a/tensorflow/core/protobuf/saved_object_graph.proto b/tensorflow/core/protobuf/saved_object_graph.proto index 981908cfa3c..c756644f7ec 100644 --- a/tensorflow/core/protobuf/saved_object_graph.proto +++ b/tensorflow/core/protobuf/saved_object_graph.proto @@ -10,7 +10,7 @@ import "tensorflow/core/protobuf/struct.proto"; import "tensorflow/core/protobuf/trackable_object_graph.proto"; option cc_enable_arenas = true; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // A SavedObjectGraph is part of object-based SavedModels in TF 2.0. It // describes the directed graph of Python objects (or equivalent in other diff --git a/tensorflow/core/protobuf/saver.proto b/tensorflow/core/protobuf/saver.proto index 5ba79447750..208468b2b2c 100644 --- a/tensorflow/core/protobuf/saver.proto +++ b/tensorflow/core/protobuf/saver.proto @@ -6,7 +6,7 @@ option cc_enable_arenas = true; option java_outer_classname = "SaverProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.util"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // Protocol buffer representing the configuration of a Saver. message SaverDef { diff --git a/tensorflow/core/protobuf/struct.proto b/tensorflow/core/protobuf/struct.proto index 0158c4be85f..ee0f089f2a3 100644 --- a/tensorflow/core/protobuf/struct.proto +++ b/tensorflow/core/protobuf/struct.proto @@ -6,7 +6,7 @@ import "tensorflow/core/framework/tensor.proto"; import "tensorflow/core/framework/tensor_shape.proto"; import "tensorflow/core/framework/types.proto"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // `StructuredValue` represents a dynamically typed value representing various // data structures that are inspired by Python data structures typically used in diff --git a/tensorflow/core/protobuf/tensor_bundle.proto b/tensorflow/core/protobuf/tensor_bundle.proto index 04ccc0faf36..999195cc95d 100644 --- a/tensorflow/core/protobuf/tensor_bundle.proto +++ b/tensorflow/core/protobuf/tensor_bundle.proto @@ -11,7 +11,7 @@ option cc_enable_arenas = true; option java_outer_classname = "TensorBundleProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.util"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // Protos used in the tensor bundle module (tf/core/util/tensor_bundle/). diff --git a/tensorflow/core/protobuf/tensorflow_server.proto b/tensorflow/core/protobuf/tensorflow_server.proto index 7136f29d58a..5374172df83 100644 --- a/tensorflow/core/protobuf/tensorflow_server.proto +++ b/tensorflow/core/protobuf/tensorflow_server.proto @@ -25,7 +25,7 @@ option cc_enable_arenas = true; option java_outer_classname = "ServerProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.distruntime"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // Defines the configuration of a single TensorFlow server. message ServerDef { diff --git a/tensorflow/core/protobuf/trackable_object_graph.proto b/tensorflow/core/protobuf/trackable_object_graph.proto index 48dbd92a181..4be996bb3c4 100644 --- a/tensorflow/core/protobuf/trackable_object_graph.proto +++ b/tensorflow/core/protobuf/trackable_object_graph.proto @@ -3,7 +3,7 @@ syntax = "proto3"; package tensorflow; option cc_enable_arenas = true; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // A TensorBundle addition which saves extra information about the objects which // own variables, allowing for more robust checkpoint loading into modified diff --git a/tensorflow/core/protobuf/transport_options.proto b/tensorflow/core/protobuf/transport_options.proto index 23f92c3529f..8d540315e06 100644 --- a/tensorflow/core/protobuf/transport_options.proto +++ b/tensorflow/core/protobuf/transport_options.proto @@ -2,7 +2,7 @@ syntax = "proto3"; package tensorflow; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // Extra data needed on a non-RDMA RecvBufResponse. message RecvBufRespExtra { diff --git a/tensorflow/core/protobuf/verifier_config.proto b/tensorflow/core/protobuf/verifier_config.proto index 4440aad6ac9..21885ffef83 100644 --- a/tensorflow/core/protobuf/verifier_config.proto +++ b/tensorflow/core/protobuf/verifier_config.proto @@ -6,7 +6,7 @@ option cc_enable_arenas = true; option java_outer_classname = "VerifierConfigProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.framework"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // The config for graph verifiers. message VerifierConfig { diff --git a/tensorflow/core/protobuf/worker.proto b/tensorflow/core/protobuf/worker.proto index b6bbcfc3431..f10283531da 100644 --- a/tensorflow/core/protobuf/worker.proto +++ b/tensorflow/core/protobuf/worker.proto @@ -35,7 +35,7 @@ option cc_enable_arenas = true; option java_outer_classname = "WorkerProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.distruntime"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; //////////////////////////////////////////////////////////////////////////////// // diff --git a/tensorflow/core/protobuf/worker_service.proto b/tensorflow/core/protobuf/worker_service.proto index 38d8bc1da6b..d1a53845850 100644 --- a/tensorflow/core/protobuf/worker_service.proto +++ b/tensorflow/core/protobuf/worker_service.proto @@ -22,7 +22,7 @@ import "tensorflow/core/protobuf/worker.proto"; option java_outer_classname = "WorkerServiceProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.distruntime"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/core_protos_go_proto"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; //////////////////////////////////////////////////////////////////////////////// // From a9b7e06aa8db8b54a472ab8a1a258cbdc8f83475 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Wed, 22 Jul 2020 11:34:07 -0700 Subject: [PATCH 1053/2522] Set the python version properly. PiperOrigin-RevId: 322618173 Change-Id: I8b7e5b91fe63282ceda201979a5945604e36c2bd --- .../release/ubuntu_16/cpu_py35_full/nightly_release.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh index 16f1e7524fd..200f3c41725 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh @@ -28,11 +28,11 @@ python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. export CC_OPT_FLAGS='-mavx' -export PYTHON_BIN_PATH=$(which python) +export PYTHON_BIN_PATH=$(which python3.5) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=release_cpu_linux --host_force_python=PY3 tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_linux tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag From 232a4118c83e4253d40700ccf55b679fc1a303d5 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Wed, 22 Jul 2020 11:39:09 -0700 Subject: [PATCH 1054/2522] Fixed bugs introduced by recent CL. PiperOrigin-RevId: 322619249 Change-Id: Id16e10d8f5fb8d77d7a213a69b3d3409b4e609aa --- tensorflow/lite/delegates/gpu/cl/api.cc | 12 ++++++------ .../gpu/cl/selectors/operation_selector.cc | 18 ++++++++++-------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/api.cc b/tensorflow/lite/delegates/gpu/cl/api.cc index 503b04543b4..2a3c84a67cf 100644 --- a/tensorflow/lite/delegates/gpu/cl/api.cc +++ b/tensorflow/lite/delegates/gpu/cl/api.cc @@ -491,14 +491,14 @@ class InferenceRunnerImpl : public InferenceRunner { absl::Status SetInputObject(int index, TensorObject object) override { if (index < 0 || index >= inputs_.size()) { - return absl::OutOfRangeError("Index is out of range"); + return absl::OutOfRangeError("Input index is out of range"); } return inputs_[index]->SetExternalObject(object); } absl::Status SetOutputObject(int index, TensorObject object) override { if (index < 0 || index >= outputs_.size()) { - return absl::OutOfRangeError("Index is out of range"); + return absl::OutOfRangeError("Output index is out of range"); } return outputs_[index]->SetExternalObject(object); } @@ -623,13 +623,13 @@ class InferenceBuilderImpl : public InferenceBuilder { absl::Status SetInputObjectDef(int index, ObjectDef new_def) override { if (index < 0 || index >= inputs_.size()) { - return absl::OutOfRangeError("Index is out of range"); + return absl::OutOfRangeError("Input index is out of range"); } auto def = inputs_[index]; def.external_def.object_def = new_def; if (!tie_factory_->IsSupported(def)) { return absl::InvalidArgumentError( - "New object definition is not supported."); + "New input object definition is not supported."); } inputs_[index] = def; return absl::OkStatus(); @@ -637,13 +637,13 @@ class InferenceBuilderImpl : public InferenceBuilder { absl::Status SetOutputObjectDef(int index, ObjectDef new_def) override { if (index < 0 || index >= outputs_.size()) { - return absl::OutOfRangeError("Index is out of range"); + return absl::OutOfRangeError("Output index is out of range"); } auto def = outputs_[index]; def.external_def.object_def = new_def; if (!tie_factory_->IsSupported(def)) { return absl::InvalidArgumentError( - "New object definition is not supported."); + "New output object definition is not supported."); } outputs_[index] = def; return absl::OkStatus(); diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc index 1863cedb793..3aefbb8d652 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc @@ -57,6 +57,8 @@ bool IsSuitableForWinograd4x4To6x6(const Convolution2DAttributes& attr, } absl::Status WinogradFromNode(const CreationContext& creation_context, + const std::vector& inputs, + const std::vector& outputs, const OperationDef& op_def, ModelHints hints, const BHWC& input_shape, const BHWC& output_shape, const Convolution2DAttributes& attr, @@ -95,7 +97,7 @@ absl::Status WinogradFromNode(const CreationContext& creation_context, auto& winograd_up = gpu_subgraph->operations[0]; RETURN_IF_ERROR(SelectWinograd4x4To36( creation_context, attr.padding, winograd_up_def, &winograd_up.operation)); - winograd_up.input_ids = {0}; + winograd_up.input_ids = {static_cast(inputs[0]->id)}; winograd_up.output_ids = {-1}; OperationDef conv_def; @@ -114,7 +116,7 @@ absl::Status WinogradFromNode(const CreationContext& creation_context, winograd_down_def.dst_tensors.push_back(op_def.dst_tensors[0]); auto& winograd_down = gpu_subgraph->operations[2]; winograd_down.input_ids = {-2}; - winograd_down.output_ids = {0}; + winograd_down.output_ids = {static_cast(outputs[0]->id)}; auto bias_copy = attr.bias; if (bias_copy.shape.v < attr.weights.shape.o) { bias_copy.shape = Linear(attr.weights.shape.o); @@ -202,8 +204,8 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, auto input_shape = inputs[0]->tensor.shape; auto output_shape = outputs[0]->tensor.shape; if (inputs.size() == 1) { - if (WinogradFromNode(creation_context, op_def, hints, input_shape, - output_shape, attr, gpu_subgraph) + if (WinogradFromNode(creation_context, inputs, outputs, op_def, hints, + input_shape, output_shape, attr, gpu_subgraph) .ok()) { return absl::OkStatus(); } else { @@ -215,13 +217,13 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, auto weights_shape = inputs[1]->tensor.shape; TensorDescriptor weights_desc = {op_def.src_tensors[1].data_type, TensorStorageType::BUFFER, - Layout::UNKNOWN}; + Layout::BHWC}; gpu_subgraph->operations.clear(); gpu_subgraph->operations.resize(2); auto& converter_op = gpu_subgraph->operations[0]; auto& conv_op = gpu_subgraph->operations[1]; - conv_op.input_ids = {0, -1}; - conv_op.output_ids = {0}; + conv_op.input_ids = {static_cast(inputs[0]->id), -1}; + conv_op.output_ids = {static_cast(outputs[0]->id)}; OperationDef conv_def = op_def; conv_def.src_tensors[1] = weights_desc; ConvWeightsDescription conv_weights_desc; @@ -242,7 +244,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, converter_def.src_tensors.push_back(op_def.src_tensors[1]); converter_def.dst_tensors.push_back(weights_desc); - converter_op.input_ids = {1}; + converter_op.input_ids = {static_cast(inputs[1]->id)}; converter_op.output_ids = {-1}; return SelectConverterToConvWeights(conv_weights_desc, creation_context, converter_def, hints, From 30885b432e7fce4df88c0496970a4ba12a7c776c Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 22 Jul 2020 11:39:58 -0700 Subject: [PATCH 1055/2522] Remove invalid visibility PiperOrigin-RevId: 322619428 Change-Id: Id83693d12617858a49de3fecfb2343ab1f98616a --- tensorflow/python/BUILD | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 365ef58f796..6399f76741f 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -58,8 +58,6 @@ visibility = [ "//third_party/py/tensorflow_examples:__subpackages__", "//third_party/py/tf_agents:__subpackages__", # For benchmarks. "//third_party/py/tf_slim:__subpackages__", - # TODO(aselle): to pass open source test. - "//bazel_pip/tensorflow/lite/toco/python:__pkg__", "//third_party/py/tensorflow_docs:__subpackages__", ] From 70edbdb6c79f0e3a2d521703ab006e37c504b86e Mon Sep 17 00:00:00 2001 From: Berkin Ilbeyi Date: Wed, 22 Jul 2020 11:50:46 -0700 Subject: [PATCH 1056/2522] [XLA] Allow copy-done to be scheduled earlier to avoid copy ordering issues. We previously required that copy-dones to be scheduled right before the use. We also require asynchronous copies to maintain pipelining order (no nested copy-start/copy-done pairs). This could mean that a smaller buffer that has shorter copy-start/copy-done duration may block a larger buffer due to copy ordering. E.g. this situation might arise which is not allowed: small tensor in default mem---------------------->CS----->CD->use large tensor in default mem------------------>CS----------------->CD->use ====================================================================> time This CL checks if there is an already committed asynchronous copy that violates the pipelining behavior. If so, we attempt to move the copy-done earlier: small tensor in default mem---------------------->CS----->CD->use large tensor in default mem-------->CS----------------->CD----------->use ====================================================================> time PiperOrigin-RevId: 322621813 Change-Id: I8287c11c96a6d71de86a5f2e22cf1846c26ef4f3 --- .../xla/service/memory_space_assignment.cc | 134 +++++++-- .../xla/service/memory_space_assignment.h | 43 ++- .../service/memory_space_assignment_test.cc | 254 ++++++++++++++---- 3 files changed, 354 insertions(+), 77 deletions(-) diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.cc b/tensorflow/compiler/xla/service/memory_space_assignment.cc index 874200e7692..aa978a922e6 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment.cc +++ b/tensorflow/compiler/xla/service/memory_space_assignment.cc @@ -235,6 +235,11 @@ int64 InstructionCountPrefetchIntervalPicker::PreferredEvictionEndTime( return std::min(start_time + min_overlap_count_, latest_end_time); } +int64 InstructionCountPrefetchIntervalPicker::LatestPrefetchStartTime( + const HloUse& use, int64 start_time, int64 end_time) const { + return end_time_ - min_overlap_count_; +} + void InstructionCountPrefetchIntervalPicker::Begin(const HloUse& use, int64 start_time, int64 end_time) { @@ -355,6 +360,49 @@ int64 CostAnalysisPrefetchIntervalPicker::PreferredEvictionEndTime( return end_time; } +int64 CostAnalysisPrefetchIntervalPicker::LatestPrefetchStartTime( + const HloUse& use, int64 start_time, int64 end_time) const { + const Shape& shape = ShapeUtil::GetSubshape( + use.instruction->operand(use.operand_number)->shape(), use.operand_index); + // Find the earliest time that satisfies max_async_copy_to_overlap_ratio_. + float async_copy_elapsed = cost_analysis_.GetAsyncCopyElapsed(shape); + // Estimate the time we would save by having this op in alternate memory. + float elapsed_time = cost_analysis_.GetInstructionElapsed(*use.instruction); + float elapsed_time_in_alternate_mem = + cost_analysis_.GetInstructionElapsedInAlternateMemory( + *use.instruction, use.operand_number, + /*output_in_alternate_mem=*/false); + float inst_elapsed_reduction = elapsed_time - elapsed_time_in_alternate_mem; + int end_nest_level = while_nest_level_[end_time]; + + // Find the latest time we're allowed to start prefetching. + float min_interval = min_async_copy_to_overlap_ratio_ * async_copy_elapsed; + int latest_prefetch_time; + for (latest_prefetch_time = end_time - 1; + latest_prefetch_time >= start_time && + (while_nest_level_[latest_prefetch_time] != end_nest_level || + min_interval > + GetLogicalIntervalElapsed(latest_prefetch_time, end_time) + + inst_elapsed_reduction); + --latest_prefetch_time) { + } + + return latest_prefetch_time; +} + +int64 CostAnalysisPrefetchIntervalPicker::LatestPrefetchEndTime( + int64 original_prefetch_end_time, int64 proposed_prefetch_end_time) const { + // Iterate towards the beginning until we find a suitable end time that is the + // same while nest level as the original prefetch end time. + int64 original_nest_level = while_nest_level_[original_prefetch_end_time]; + int64 new_prefetch_end_time; + for (new_prefetch_end_time = proposed_prefetch_end_time; + while_nest_level_[new_prefetch_end_time] != original_nest_level; + --new_prefetch_end_time) { + } + return new_prefetch_end_time; +} + void CostAnalysisPrefetchIntervalPicker::Begin(const HloUse& use, int64 start_time, int64 end_time) { @@ -374,14 +422,7 @@ void CostAnalysisPrefetchIntervalPicker::Begin(const HloUse& use, // Find the latest time we're allowed to start prefetching. float min_interval = min_async_copy_to_overlap_ratio_ * async_copy_elapsed_; - for (latest_prefetch_time_ = end_logical_time_ - 1; - latest_prefetch_time_ >= start_time && - (while_nest_level_[latest_prefetch_time_] != end_nest_level || - min_interval > GetLogicalIntervalElapsed(latest_prefetch_time_, - end_logical_time_) + - inst_elapsed_reduction_); - --latest_prefetch_time_) { - } + latest_prefetch_time_ = LatestPrefetchStartTime(use, start_time, end_time); // Find the earliest time we're allowed to start prefetching. float max_interval = max_async_copy_to_overlap_ratio_ * @@ -1229,15 +1270,21 @@ void AsynchronousCopyOrdering::RemoveCopy(const AsynchronousCopy& copy) { ranges_.erase(copy_it); } -bool AsynchronousCopyOrdering::ViolatesOrdering(int64 start_time, - int64 end_time) const { +absl::optional AsynchronousCopyOrdering::ViolatesOrdering( + int64 start_time, int64 end_time) const { // We allow identical start and end times. It is enough to check for just the // start time in case we find a match in ranges_ because the found value will // either be identical to {start_time, end_time} (and this doesn't violate) or // its start_time will be smaller and end_time will be larger (this violates). auto copy_it = ranges_.find( {start_time, end_time, MemorySpaceAssignment::MemorySpace::kAlternate}); - return copy_it != ranges_.end() && copy_it->start_time != start_time; + if (copy_it != ranges_.end() && copy_it->start_time != start_time) { + VLOG(4) << "Violates ordering: (" << start_time << ", " << end_time + << ") and (" << copy_it->start_time << ", " << copy_it->end_time + << ")"; + return *copy_it; + } + return absl::nullopt; } /*static*/ MemorySpaceAssignment::Allocation* @@ -1734,8 +1781,9 @@ bool AlternateMemoryBestFitHeap::ViolatesMaximumOutstandingAsyncCopies( } } -bool AlternateMemoryBestFitHeap::ViolatesAsyncCopyOrdering( - int64 start_time, int64 end_time) const { +absl::optional +AlternateMemoryBestFitHeap::ViolatesAsyncCopyOrdering(int64 start_time, + int64 end_time) const { return async_copy_ordering_.ViolatesOrdering(start_time, end_time); } @@ -1945,6 +1993,50 @@ bool AlternateMemoryBestFitHeap::Evict(const AllocationRequest& request) { return true; } +int64 AlternateMemoryBestFitHeap::FindPrefetchEndTime( + const AllocationRequest& request, int64 earliest_prefetch_time) const { + int64 prefetch_end_time = request.latest_prefetch_time; + + for (int retry_number = 0; + retry_number < options_.prefetch_copy_done_reorder_max_retries; + ++retry_number) { + int64 latest_prefetch_time = + options_.prefetch_interval_picker->LatestPrefetchStartTime( + request.use->hlo_use, earliest_prefetch_time, prefetch_end_time); + // Return if we couldn't find a suitable prefetch start time. + if (latest_prefetch_time < earliest_prefetch_time) { + break; + } + + // Return either if there is no other violating asynchronous copy (since we + // don't need to change the prefetch end time) or if the violating + // asynchronous copy ends after the prefetch end time. + auto violating_async_copy = + ViolatesAsyncCopyOrdering(latest_prefetch_time, prefetch_end_time); + if (!violating_async_copy || + violating_async_copy->end_time >= prefetch_end_time) { + break; + } + VLOG(4) << "Violating async copy: (" << violating_async_copy->start_time + << ", " << violating_async_copy->end_time << ")"; + + int64 new_prefetch_end_time = + options_.prefetch_interval_picker->LatestPrefetchEndTime( + prefetch_end_time, violating_async_copy->end_time); + if (new_prefetch_end_time > earliest_prefetch_time) { + VLOG(3) << "Update prefetch end time = " << new_prefetch_end_time; + prefetch_end_time = new_prefetch_end_time; + } else { + VLOG(3) << "Can't update prefetch end time = " << new_prefetch_end_time + << " because earliest prefetch start time = " + << earliest_prefetch_time; + break; + } + } + + return prefetch_end_time; +} + bool AlternateMemoryBestFitHeap::Prefetch( const AllocationRequest& request, const MemorySpaceAssignment::Allocation& prev_allocation_in_default_mem) { @@ -1966,9 +2058,11 @@ bool AlternateMemoryBestFitHeap::Prefetch( earliest_prefetch_time = std::max(earliest_prefetch_time, *request.earliest_prefetch_time); } - options_.prefetch_interval_picker->Begin(request.use->hlo_use, - earliest_prefetch_time, - request.latest_prefetch_time); + int64 prefetch_end_time = + FindPrefetchEndTime(request, earliest_prefetch_time); + + options_.prefetch_interval_picker->Begin( + request.use->hlo_use, earliest_prefetch_time, prefetch_end_time); VLOG(3) << "Trying prefetch picker = " << options_.prefetch_interval_picker->ToDebugString(); @@ -1988,19 +2082,19 @@ bool AlternateMemoryBestFitHeap::Prefetch( : 0; while (!options_.prefetch_interval_picker->Done()) { alternate_mem_interval.start = options_.prefetch_interval_picker->Next(); - CHECK_LT(alternate_mem_interval.start, request.latest_prefetch_time); + CHECK_LT(alternate_mem_interval.start, prefetch_end_time); VLOG(4) << "Trying alternate memory allocation (" << alternate_mem_interval.start << ", " << request.end_time << ")"; // If this additional asynchronous copy would violate the limit, try a // different interval. if (ViolatesAsyncCopyOrdering(alternate_mem_interval.start, - request.latest_prefetch_time)) { + prefetch_end_time)) { VLOG(4) << "This would violate asynchronous copy ordering."; prefetch_failed_due_to_async_copy_ = true; continue; } if (ViolatesMaximumOutstandingAsyncCopies( - alternate_mem_interval.start, request.latest_prefetch_time, + alternate_mem_interval.start, prefetch_end_time, /*is_prefetch=*/true, extra_async_copy_limit)) { VLOG(4) << "This would violate the outstanding async copy limit."; prefetch_failed_due_to_async_copy_ = true; @@ -2022,7 +2116,7 @@ bool AlternateMemoryBestFitHeap::Prefetch( AddAsyncCopy(prev_allocation_in_default_mem, MemorySpace::kAlternate, chunk_candidate->chunk, alternate_mem_interval.start, - request.end_time, request.latest_prefetch_time, + request.end_time, prefetch_end_time, request.allocation_value->allocation_sequence()); request.allocation_value->allocation_sequence()->back()->AddUse( diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.h b/tensorflow/compiler/xla/service/memory_space_assignment.h index d1b508a6a85..4bb1632e5c9 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment.h +++ b/tensorflow/compiler/xla/service/memory_space_assignment.h @@ -198,6 +198,17 @@ class PrefetchIntervalPicker { virtual int64 PreferredEvictionEndTime(const Shape& shape, int64 start_time, int64 latest_end_time) const = 0; + // Returns the latest time that a prefetch can start. + virtual int64 LatestPrefetchStartTime(const HloUse& use, int64 start_time, + int64 end_time) const = 0; + + // Returns the latest time that a prefetch can end that is less than or equal + // to proposed_prefetch_end_time. + virtual int64 LatestPrefetchEndTime(int64 original_prefetch_end_time, + int64 proposed_prefetch_end_time) const { + return proposed_prefetch_end_time; + } + // Begins the iterator for the first start time of the prefetch. virtual void Begin(const HloUse& use, int64 start_time, int64 end_time) = 0; @@ -256,6 +267,9 @@ class InstructionCountPrefetchIntervalPicker : public PrefetchIntervalPicker { int64 PreferredEvictionEndTime(const Shape& shape, int64 start_time, int64 latest_end_time) const override; + int64 LatestPrefetchStartTime(const HloUse& use, int64 start_time, + int64 end_time) const override; + void Begin(const HloUse& use, int64 start_time, int64 end_time) override; int64 Next() override; @@ -292,6 +306,11 @@ class CostAnalysisPrefetchIntervalPicker : public PrefetchIntervalPicker { int64 PreferredEvictionEndTime(const Shape& shape, int64 start_time, int64 latest_end_time) const override; + int64 LatestPrefetchStartTime(const HloUse& use, int64 start_time, + int64 end_time) const override; + int64 LatestPrefetchEndTime(int64 original_prefetch_end_time, + int64 proposed_prefetch_end_time) const override; + void Begin(const HloUse& use, int64 start_time, int64 end_time) override; int64 Next() override; @@ -395,6 +414,11 @@ class MemorySpaceAssignment { // max_outstanding_prefetches). int64 while_use_extra_outstanding_prefetch_limit = 0; + // Specifies the maximum number of times we are willing to move a copy + // done of a prefetch earlier due to an asynchronous copy ordering + // violation. + int64 prefetch_copy_done_reorder_max_retries = 1; + // Specifies the maximum number of retries that will be performed for each // value in case prefetching failed due to running out of asynchronous // copies or asynchronous copy ordering. @@ -850,9 +874,9 @@ class AsynchronousCopyOrdering { // Removes an asynchronous copy. CHECKs that it is removed. void RemoveCopy(const AsynchronousCopy& copy); - // Returns true if the addition of an asynchronous copy in the the given time - // interval would violate the asynchronous copy ordering. E.g., consider the - // following scenario: + // If the addition of an asynchronous copy in the given time interval would + // violate the asynchronous copy ordering, returns the violating + // already-committed asynchronous copy. E.g., consider the following scenario: // CS CD // already committed async copy: +-----------+ // new async copy: +--------+ @@ -860,7 +884,8 @@ class AsynchronousCopyOrdering { // The new asynchronous copy would violate the ordering guarantee because the // copy start is after an already committed asynchronous copy while its copy // done is before the committed copy. - bool ViolatesOrdering(int64 start_time, int64 end_time) const; + absl::optional ViolatesOrdering(int64 start_time, + int64 end_time) const; private: // Stores asynchronous copies in a tree set respecting the pipelining order. @@ -981,6 +1006,10 @@ class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap { // Try evicting to default memory space. Returns true if successful. bool Evict(const AllocationRequest& request); + // Returns the time a copy done of a prefetch should be scheduled. + int64 FindPrefetchEndTime(const AllocationRequest& request, + int64 earliest_prefetch_time) const; + // Try prefetching to alternate memory space. Returns true if successful. bool Prefetch( const AllocationRequest& request, @@ -1045,8 +1074,10 @@ class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap { int64 start_time, int64 end_time, bool is_prefetch, int64 extra_async_copy_limit = 0) const; - // Return true if the asynchronous copy would violate the pipelining order. - bool ViolatesAsyncCopyOrdering(int64 start_time, int64 end_time) const; + // If the asynchronous copy would violate the pipelining order, returns the + // violating asynchronous copy. + absl::optional ViolatesAsyncCopyOrdering( + int64 start_time, int64 end_time) const; // Adds an asynchronous copy to the allocations. void AddAsyncCopy(const MemorySpaceAssignment::Allocation& prev_allocation, diff --git a/tensorflow/compiler/xla/service/memory_space_assignment_test.cc b/tensorflow/compiler/xla/service/memory_space_assignment_test.cc index a92b73cfeb4..d609f7edd1d 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment_test.cc +++ b/tensorflow/compiler/xla/service/memory_space_assignment_test.cc @@ -286,6 +286,92 @@ class MemorySpaceAssignmentTest : public HloTestBase, MemorySpaceAssignmentCostAnalysis::Cache cache_; }; +// For testing purposes, we define a cost analysis where we can control the +// elapsed times of each HLO and asynchronous copy. +class FakeMemorySpaceAssignmentCostAnalysis + : public MemorySpaceAssignmentCostAnalysis { + public: + static StatusOr> + Create(const HloCostAnalysis& cost_analysis, const HloModule& module) { + TF_ASSIGN_OR_RETURN(auto alias_analysis, HloAliasAnalysis::Run(&module)); + TF_ASSIGN_OR_RETURN(auto hlo_live_range, + HloLiveRange::Run(module.schedule(), *alias_analysis, + module.entry_computation())); + auto call_graph = CallGraph::Build(&module); + return absl::WrapUnique(new FakeMemorySpaceAssignmentCostAnalysis( + cost_analysis, /*async_copy_bandwidth_bytes_per_second=*/1, + /*alternate_mem_bandwidth_bytes_per_second=*/1, + std::move(alias_analysis), std::move(hlo_live_range), + std::move(call_graph))); + } + + float GetInstructionElapsed( + const HloInstruction& instruction) const override { + if (get_instruction_elapsed_override_) { + return get_instruction_elapsed_override_(instruction); + } + return 1.0; + } + + float GetInstructionElapsedInAlternateMemory( + const HloInstruction& instruction, + absl::optional operand_in_alternate_mem, + bool output_in_alternate_mem) const override { + if (get_instruction_elapsed_in_alternate_memory_override_) { + return get_instruction_elapsed_in_alternate_memory_override_( + instruction, operand_in_alternate_mem, output_in_alternate_mem); + } + if (operand_in_alternate_mem) { + return 0.5; + } else { + return 1.0; + } + } + + float GetAsyncCopyElapsed(const Shape& shape) const override { + if (get_async_copy_elapsed_override_) { + return get_async_copy_elapsed_override_(shape); + } + return 3.0; + } + + // The following methods can be used to override what the above API calls + // return. + void SetOverrideForGetInstructionElapsed( + std::function function) { + get_instruction_elapsed_override_ = function; + } + void SetOverrideForGetInstructionElapsedInAlternateMemory( + std::function, bool)> + function) { + get_instruction_elapsed_in_alternate_memory_override_ = function; + } + void SetOverrideForGetAsyncCopyElapsed( + std::function function) { + get_async_copy_elapsed_override_ = function; + } + + protected: + FakeMemorySpaceAssignmentCostAnalysis( + const HloCostAnalysis& cost_analysis, + float async_copy_bandwidth_bytes_per_second, + float alternate_mem_bandwidth_bytes_per_second, + std::unique_ptr alias_analysis, + std::unique_ptr hlo_live_range, + std::unique_ptr call_graph) + : MemorySpaceAssignmentCostAnalysis( + cost_analysis, async_copy_bandwidth_bytes_per_second, + alternate_mem_bandwidth_bytes_per_second, std::move(alias_analysis), + std::move(hlo_live_range), std::move(call_graph)) {} + + private: + std::function + get_instruction_elapsed_override_ = nullptr; + std::function, bool)> + get_instruction_elapsed_in_alternate_memory_override_ = nullptr; + std::function get_async_copy_elapsed_override_ = nullptr; +}; + TEST_P(MemorySpaceAssignmentTest, ParameterOnly) { // A module consisting of a single parameter. Inputs/outputs are currently // excluded from memory space assignment. @@ -3750,6 +3836,123 @@ TEST_P(MemorySpaceAssignmentTest, PendingChunkMemoryCorruptionBug) { buffer_interval_compare, &prefetch_interval_picker); } +TEST_P(MemorySpaceAssignmentTest, MoveCopyDoneEarlier) { + // This tests the case where an earlier placed smaller buffer may block a + // larger buffer due to asynchronous copy ordering. The smaller buffer (the + // operand of sin) will be placed first. The cos, whose operand is 3 times + // larger than sin's, needs longer time for the asynhronous copy. The cos is + // placed right after sin, leading to a copy ordering violation: + // + // param1------------------>CS----->CD->sin + // param0------------->CS------------------->CD->cos + // + // To fix this, we need to move copy done for cos earlier and ensure both of + // these buffers get alternate memory allocations: + // + // param1------------------>CS----->CD->sin + // param0-->CS------------------->CD------------>cos + absl::string_view hlo_string = R"( + HloModule module, is_scheduled=true + + ENTRY Entry { + param0 = f32[8,3] parameter(0) + param1 = f32[2,4] parameter(1) + a = f32[2,4] negate(param1) + b = f32[2,4] negate(a) + c = f32[2,4] negate(b) + d = f32[2,4] negate(c) + e = f32[2,4] negate(d) + f = f32[2,4] negate(e) + g = f32[2,4] negate(f) + h = f32[2,4] negate(g) + i = f32[2,4] negate(h) + j = f32[2,4] negate(i) + k = f32[2,4] negate(j) + l = f32[2,4] negate(k) + m = f32[2,4] negate(l) + n = f32[2,4] negate(m) + sin = f32[2,4] sine(param1) + o = f32[2,4] negate(n) + cos = f32[8,3] cosine(param0) + ROOT tuple = (f32[8,3], f32[2,4], f32[2,4]) tuple(cos, sin, o) + } + )"; + + MemorySpaceAssignment::BufferIntervalCompare buffer_interval_compare = + [](const MemorySpaceAssignment::BufferInterval& a, + const MemorySpaceAssignment::BufferInterval& b) { + auto get_opcode_priority = [](const HloOpcode& opcode) { + switch (opcode) { + case HloOpcode::kSin: + return 0; + case HloOpcode::kCos: + return 1; + case HloOpcode::kTanh: + return 2; + default: + return 3; + } + }; + + auto get_user_priority = [&](const HloValue& value) { + int priority = INT_MAX; + for (const auto& use : value.uses()) { + priority = std::min(priority, + get_opcode_priority(use.instruction->opcode())); + } + return priority; + }; + + return get_user_priority(*a.buffer) < get_user_priority(*b.buffer); + }; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + HloCostAnalysis hlo_cost_analysis(ShapeSize); + TF_ASSERT_OK_AND_ASSIGN(auto cost_analysis, + FakeMemorySpaceAssignmentCostAnalysis::Create( + hlo_cost_analysis, *module)); + cost_analysis->SetOverrideForGetAsyncCopyElapsed([](const Shape& shape) { + // This should return 2 for f32[2,4] and 6 for f32[8,3]. + return ShapeSize(shape) / 16; + }); + CostAnalysisPrefetchIntervalPicker interval_picker( + *cost_analysis, + /*min_async_copy_to_overlap_ratio=*/1.0, + /*max_async_copy_to_overlap_ratio=*/4.0, + /*preferred_async_copy_to_overlap_ratio=*/1.5); + AssignMemorySpace(module.get(), /*max_outstanding_async_copies=*/-1, + buffer_interval_compare, &interval_picker); + + // Check that both cos and sin could get their operands prefetched. + const HloInstruction* cos = + module->entry_computation()->GetInstructionWithName("cos"); + const HloInstruction* sin = + module->entry_computation()->GetInstructionWithName("sin"); + EXPECT_THAT(sin->operand(0), + op::AsyncCopy(kAlternateMemorySpace, kDefaultMemorySpace, + op::Parameter(1))); + EXPECT_THAT(cos->operand(0), + op::AsyncCopy(kAlternateMemorySpace, kDefaultMemorySpace, + op::Parameter(0))); + + // Sanity check that the cos' operand copy-done is scheduled earlier than + // sin's operand. + auto find_schedule_index = [&](const HloInstruction* instruction) { + const auto& instructions = + module->schedule().sequence(module->entry_computation()).instructions(); + for (int i = 0; i < instructions.size(); ++i) { + if (instruction == instructions[i]) { + return i; + } + } + CHECK(false); + return -1; + }; + EXPECT_GT(find_schedule_index(sin->operand(0)), + find_schedule_index(cos->operand(0))); +} + TEST_P(MemorySpaceAssignmentTest, Determinism) { // Run memory space assignment a few times to make sure every time it compiles // to the same thing. @@ -4046,57 +4249,6 @@ TEST_P(MemorySpaceAssignmentTest, CrossProgramPrefetchFusionTest) { EXPECT_EQ(cross_program_prefetches.size(), 0); } -// For testing purposes, we define a cost analysis where we can control the -// elapsed times of each HLO and asynchronous copy. -class FakeMemorySpaceAssignmentCostAnalysis - : public MemorySpaceAssignmentCostAnalysis { - public: - static StatusOr> - Create(const HloCostAnalysis& cost_analysis, const HloModule& module) { - TF_ASSIGN_OR_RETURN(auto alias_analysis, HloAliasAnalysis::Run(&module)); - TF_ASSIGN_OR_RETURN(auto hlo_live_range, - HloLiveRange::Run(module.schedule(), *alias_analysis, - module.entry_computation())); - auto call_graph = CallGraph::Build(&module); - return absl::WrapUnique(new FakeMemorySpaceAssignmentCostAnalysis( - cost_analysis, /*async_copy_bandwidth_bytes_per_second=*/1, - /*alternate_mem_bandwidth_bytes_per_second=*/1, - std::move(alias_analysis), std::move(hlo_live_range), - std::move(call_graph))); - } - - float GetInstructionElapsed( - const HloInstruction& instruction) const override { - return 1.0; - } - - float GetInstructionElapsedInAlternateMemory( - const HloInstruction& instruction, - absl::optional operand_in_alternate_mem, - bool output_in_alternate_mem) const override { - if (operand_in_alternate_mem) { - return 0.5; - } else { - return 1.0; - } - } - - float GetAsyncCopyElapsed(const Shape& shape) const override { return 3.0; } - - protected: - FakeMemorySpaceAssignmentCostAnalysis( - const HloCostAnalysis& cost_analysis, - float async_copy_bandwidth_bytes_per_second, - float alternate_mem_bandwidth_bytes_per_second, - std::unique_ptr alias_analysis, - std::unique_ptr hlo_live_range, - std::unique_ptr call_graph) - : MemorySpaceAssignmentCostAnalysis( - cost_analysis, async_copy_bandwidth_bytes_per_second, - alternate_mem_bandwidth_bytes_per_second, std::move(alias_analysis), - std::move(hlo_live_range), std::move(call_graph)) {} -}; - using CostAnalysisPrefetchIntervalPickerTest = HloTestBase; TEST_F(CostAnalysisPrefetchIntervalPickerTest, PrefetchIntervalOrder) { From fb9072b473672500bef0c8e0c262d859f95f535b Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Wed, 22 Jul 2020 11:56:10 -0700 Subject: [PATCH 1057/2522] Adding a macro for marking unused variables and removing compiler warnings --- tensorflow/core/platform/macros.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/core/platform/macros.h b/tensorflow/core/platform/macros.h index 57773c54e3e..b83c72a4ad6 100644 --- a/tensorflow/core/platform/macros.h +++ b/tensorflow/core/platform/macros.h @@ -120,5 +120,13 @@ limitations under the License. do { \ } while (0) #endif +namespace tensorflow { +namespace internal { +template +void remove_unused_variable_compiler_warning(const T&){}; +} +} // namespace tensorflow +#define TF_UNUSED_VARIABLE(x) \ + tensorflow::internal::remove_unused_variable_compiler_warning(x) #endif // TENSORFLOW_CORE_PLATFORM_MACROS_H_ From b7fb9affc88b6eb2da112625053e7aa9a26a9646 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Wed, 22 Jul 2020 11:59:20 -0700 Subject: [PATCH 1058/2522] Remove references to private name_scope endpoint. PiperOrigin-RevId: 322623654 Change-Id: Id82f764b7768fdf40db43bea1e32404cc220a850 --- tensorflow/python/keras/backend_test.py | 2 +- tensorflow/python/keras/feature_column/dense_features.py | 3 +-- .../python/keras/feature_column/dense_features_v2.py | 2 +- .../keras/feature_column/sequence_feature_column.py | 4 ++-- .../python/keras/layers/legacy_rnn/rnn_cell_impl.py | 5 +++-- .../keras/layers/legacy_rnn/rnn_cell_wrapper_impl.py | 8 ++++---- tensorflow/python/keras/layers/normalization_v2.py | 4 ++-- .../keras/layers/preprocessing/image_preprocessing.py | 8 ++++---- tensorflow/python/keras/legacy_tf_layers/base.py | 2 +- tensorflow/python/keras/legacy_tf_layers/base_test.py | 9 +++++---- .../keras/tests/tracking_util_with_v1_optimizers_test.py | 2 +- tensorflow/python/keras/utils/losses_utils.py | 3 +-- 12 files changed, 26 insertions(+), 26 deletions(-) diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py index 48bbedbd4fc..2e0274a509b 100644 --- a/tensorflow/python/keras/backend_test.py +++ b/tensorflow/python/keras/backend_test.py @@ -169,7 +169,7 @@ class BackendUtilsTest(test.TestCase): sess.run(y, feed_dict={x: np.random.random((2, 3))}) def test_learning_phase_name(self): - with ops.name_scope('test_scope'): + with backend.name_scope('test_scope'): # Test that outer name scopes do not affect the learning phase's name. lp = backend.symbolic_learning_phase() self.assertEqual(lp.name, 'keras_learning_phase:0') diff --git a/tensorflow/python/keras/feature_column/dense_features.py b/tensorflow/python/keras/feature_column/dense_features.py index 132343835b3..50403e5459d 100644 --- a/tensorflow/python/keras/feature_column/dense_features.py +++ b/tensorflow/python/keras/feature_column/dense_features.py @@ -21,7 +21,6 @@ from __future__ import print_function import json from tensorflow.python.feature_column import feature_column_v2 as fc -from tensorflow.python.framework import ops from tensorflow.python.keras import backend from tensorflow.python.keras.feature_column import base_feature_layer as kfc from tensorflow.python.keras.saving.saved_model import json_utils @@ -161,7 +160,7 @@ class DenseFeatures(kfc._BaseFeaturesLayer): # pylint: disable=protected-access transformation_cache = fc.FeatureTransformationCache(features) output_tensors = [] for column in self._feature_columns: - with ops.name_scope(column.name): + with backend.name_scope(column.name): try: tensor = column.get_dense_tensor( transformation_cache, self._state_manager, training=training) diff --git a/tensorflow/python/keras/feature_column/dense_features_v2.py b/tensorflow/python/keras/feature_column/dense_features_v2.py index 40c71ce7bd6..ae1294c6fca 100644 --- a/tensorflow/python/keras/feature_column/dense_features_v2.py +++ b/tensorflow/python/keras/feature_column/dense_features_v2.py @@ -89,7 +89,7 @@ class DenseFeatures(dense_features.DenseFeatures): def build(self, _): for column in self._feature_columns: - with ops.name_scope(column.name): + with ops.name_scope_v2(column.name): column.create_state(self._state_manager) # We would like to call Layer.build and not _DenseFeaturesHelper.build. # pylint: disable=protected-access diff --git a/tensorflow/python/keras/feature_column/sequence_feature_column.py b/tensorflow/python/keras/feature_column/sequence_feature_column.py index 5f64ca9642e..cb60bac22eb 100644 --- a/tensorflow/python/keras/feature_column/sequence_feature_column.py +++ b/tensorflow/python/keras/feature_column/sequence_feature_column.py @@ -143,7 +143,7 @@ class SequenceFeatures(kfc._BaseFeaturesLayer): sequence_lengths = [] for column in self._feature_columns: - with ops.name_scope(column.name): + with backend.name_scope(column.name): try: dense_tensor, sequence_length = column.get_sequence_dense_tensor( transformation_cache, self._state_manager, training=training) @@ -164,7 +164,7 @@ class SequenceFeatures(kfc._BaseFeaturesLayer): def _assert_all_equal_and_return(tensors, name=None): """Asserts that all tensors are equal and returns the first one.""" - with ops.name_scope(name, 'assert_all_equal', values=tensors): + with backend.name_scope(name or 'assert_all_equal'): if len(tensors) == 1: return tensors[0] assert_equal_ops = [] diff --git a/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_impl.py b/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_impl.py index 422e7afb31e..96a33280d54 100644 --- a/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_impl.py +++ b/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_impl.py @@ -33,6 +33,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.keras import activations +from tensorflow.python.keras import backend from tensorflow.python.keras import initializers from tensorflow.python.keras.engine import base_layer_utils from tensorflow.python.keras.engine import input_spec @@ -334,7 +335,7 @@ class RNNCell(base_layer.Layer): if (last_batch_size == batch_size and last_dtype == dtype and last_state_size == state_size): return last_output - with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): + with backend.name_scope(type(self).__name__ + "ZeroState"): output = _zero_state_tensors(state_size, batch_size, dtype) if is_eager: self._last_zero_state = (state_size, batch_size, dtype, output) @@ -1269,7 +1270,7 @@ class MultiRNNCell(RNNCell): return self._cells[-1].output_size def zero_state(self, batch_size, dtype): - with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): + with backend.name_scope(type(self).__name__ + "ZeroState"): if self._state_is_tuple: return tuple(cell.zero_state(batch_size, dtype) for cell in self._cells) else: diff --git a/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_wrapper_impl.py b/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_wrapper_impl.py index 62a6baa5640..2e3923918a0 100644 --- a/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_wrapper_impl.py +++ b/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_wrapper_impl.py @@ -113,7 +113,7 @@ class DropoutWrapperBase(object): raise TypeError("dropout_state_filter_visitor must be callable") self._dropout_state_filter = ( dropout_state_filter_visitor or _default_dropout_state_filter_visitor) - with ops.name_scope("DropoutWrapperInit"): + with ops.name_scope_v2("DropoutWrapperInit"): def tensor_and_const_value(v): tensor_value = ops.convert_to_tensor(v) @@ -199,7 +199,7 @@ class DropoutWrapperBase(object): self.built = True def zero_state(self, batch_size, dtype): - with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): + with ops.name_scope_v2(type(self).__name__ + "ZeroState"): return self.cell.zero_state(batch_size, dtype) def _variational_recurrent_dropout_value( @@ -346,7 +346,7 @@ class ResidualWrapperBase(object): return self.cell.output_size def zero_state(self, batch_size, dtype): - with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): + with ops.name_scope_v2(type(self).__name__ + "ZeroState"): return self.cell.zero_state(batch_size, dtype) def _call_wrapped_cell(self, inputs, state, cell_call_fn, **kwargs): @@ -433,7 +433,7 @@ class DeviceWrapperBase(object): return self.cell.output_size def zero_state(self, batch_size, dtype): - with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): + with ops.name_scope_v2(type(self).__name__ + "ZeroState"): with ops.device(self._device): return self.cell.zero_state(batch_size, dtype) diff --git a/tensorflow/python/keras/layers/normalization_v2.py b/tensorflow/python/keras/layers/normalization_v2.py index 48af6b97ce1..969f8f1299e 100644 --- a/tensorflow/python/keras/layers/normalization_v2.py +++ b/tensorflow/python/keras/layers/normalization_v2.py @@ -21,7 +21,7 @@ from __future__ import print_function from tensorflow.python.distribute import distribution_strategy_context as ds from tensorflow.python.distribute import reduce_util from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops +from tensorflow.python.keras import backend from tensorflow.python.keras.layers import normalization from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops @@ -158,7 +158,7 @@ class SyncBatchNormalization(normalization.BatchNormalizationBase): def _calculate_mean_and_var(self, x, axes, keep_dims): - with ops.name_scope('moments', values=[x, axes]): + with backend.name_scope('moments'): # The dynamic range of fp16 is too limited to support the collection of # sufficient statistics. As a workaround we simply perform the operations # on 32-bit floats before converting the mean and variance back to fp16 diff --git a/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py index 25558b7c0fb..d36e192d873 100644 --- a/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py +++ b/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py @@ -585,7 +585,7 @@ def get_translation_matrix(translations, name=None): A tensor of shape (num_images, 8) projective transforms which can be given to `transform`. """ - with ops.name_scope(name, 'translation_matrix'): + with K.name_scope(name or 'translation_matrix'): num_translations = array_ops.shape(translations)[0] # The translation matrix looks like: # [[1 0 -dx] @@ -665,7 +665,7 @@ def transform(images, TypeError: If `image` is an invalid type. ValueError: If output shape is not 1-D int32 Tensor. """ - with ops.name_scope(name, 'transform'): + with K.name_scope(name or 'transform'): if output_shape is None: output_shape = array_ops.shape(images)[1:3] if not context.executing_eagerly(): @@ -708,7 +708,7 @@ def get_rotation_matrix(angles, image_height, image_width, name=None): `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where `k = c0 x + c1 y + 1`. """ - with ops.name_scope(name, 'rotation_matrix'): + with K.name_scope(name or 'rotation_matrix'): x_offset = ((image_width - 1) - (math_ops.cos(angles) * (image_width - 1) - math_ops.sin(angles) * (image_height - 1))) / 2.0 @@ -1015,7 +1015,7 @@ def get_zoom_matrix(zooms, image_height, image_width, name=None): `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where `k = c0 x + c1 y + 1`. """ - with ops.name_scope(name, 'zoom_matrix'): + with K.name_scope(name or 'zoom_matrix'): num_zooms = array_ops.shape(zooms)[0] # The zoom matrix looks like: # [[zx 0 0] diff --git a/tensorflow/python/keras/legacy_tf_layers/base.py b/tensorflow/python/keras/legacy_tf_layers/base.py index e5a37e854ac..25b9ddca65e 100644 --- a/tensorflow/python/keras/legacy_tf_layers/base.py +++ b/tensorflow/python/keras/legacy_tf_layers/base.py @@ -440,7 +440,7 @@ class Layer(base_layer.Layer): with vs.variable_scope( self._scope, reuse=reuse, auxiliary_name_scope=False) as scope: self._current_scope = scope - with ops.name_scope(self._name_scope(), skip_on_eager=False): + with backend.name_scope(self._name_scope()): use_resource = (use_resource or self._use_resource_variables or scope.use_resource) diff --git a/tensorflow/python/keras/legacy_tf_layers/base_test.py b/tensorflow/python/keras/legacy_tf_layers/base_test.py index b3d6789d4dc..36be60f7657 100644 --- a/tensorflow/python/keras/legacy_tf_layers/base_test.py +++ b/tensorflow/python/keras/legacy_tf_layers/base_test.py @@ -28,6 +28,7 @@ from tensorflow.python.eager import def_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.keras import backend from tensorflow.python.keras import combinations from tensorflow.python.keras.engine import base_layer as keras_base_layer from tensorflow.python.keras.engine import input_spec @@ -68,12 +69,12 @@ class BaseLayerTest(test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testKerasStyleAddWeight(self): keras_layer = keras_base_layer.Layer(name='keras_layer') - with ops.name_scope('foo', skip_on_eager=False): + with backend.name_scope('foo'): keras_variable = keras_layer.add_variable( 'my_var', [2, 2], initializer=init_ops.zeros_initializer()) self.assertEqual(keras_variable.name, 'foo/my_var:0') - with ops.name_scope('baz', skip_on_eager=False): + with backend.name_scope('baz'): old_style_layer = base_layers.Layer(name='my_layer') # Test basic variable creation. variable = old_style_layer.add_variable( @@ -83,7 +84,7 @@ class BaseLayerTest(test.TestCase, parameterized.TestCase): with base_layers.keras_style_scope(): layer = base_layers.Layer(name='my_layer') # Test basic variable creation. - with ops.name_scope('bar', skip_on_eager=False): + with backend.name_scope('bar'): variable = layer.add_variable( 'my_var', [2, 2], initializer=init_ops.zeros_initializer()) self.assertEqual(variable.name, 'bar/my_var:0') @@ -241,7 +242,7 @@ class BaseLayerTest(test.TestCase, parameterized.TestCase): my_layer2.apply(inputs) self.assertEqual(my_layer2._scope.name, 'my_layer_2') # Name scope shouldn't affect names. - with ops.name_scope('some_name_scope'): + with backend.name_scope('some_name_scope'): default_layer2 = PrivateLayer() default_layer2.apply(inputs) self.assertEqual(default_layer2._scope.name, 'private_layer_2') diff --git a/tensorflow/python/keras/tests/tracking_util_with_v1_optimizers_test.py b/tensorflow/python/keras/tests/tracking_util_with_v1_optimizers_test.py index 1ba76c19866..01ad9a727bf 100644 --- a/tensorflow/python/keras/tests/tracking_util_with_v1_optimizers_test.py +++ b/tensorflow/python/keras/tests/tracking_util_with_v1_optimizers_test.py @@ -465,7 +465,7 @@ class CheckpointingTests(keras_parameterized.TestCase): root, name=name, shape=[1, 2], dtype=dtypes.float64) (named_variable,), _, _ = trackable_utils._serialize_object_graph( root, saveables_cache=None) - with ops.name_scope("root/" + named_variable.name): + with ops.name_scope_v2("root/" + named_variable.name): pass # Make sure we can use this as an op name if we prefix it. return named_variable.name diff --git a/tensorflow/python/keras/utils/losses_utils.py b/tensorflow/python/keras/utils/losses_utils.py index bd8aeb4065f..b8a063e3b42 100644 --- a/tensorflow/python/keras/utils/losses_utils.py +++ b/tensorflow/python/keras/utils/losses_utils.py @@ -61,8 +61,7 @@ def remove_squeezable_dimensions( Returns: Tuple of `labels` and `predictions`, possibly with last dim squeezed. """ - with ops.name_scope(name, 'remove_squeezable_dimensions', - [labels, predictions]): + with K.name_scope(name or 'remove_squeezable_dimensions'): predictions = ops.convert_to_tensor_v2_with_dispatch(predictions) labels = ops.convert_to_tensor_v2_with_dispatch(labels) predictions_shape = predictions.get_shape() From 93f4d3dfe6ac23a8e513a79bf810925d9037b5f6 Mon Sep 17 00:00:00 2001 From: Chenkai Kuang Date: Wed, 22 Jul 2020 12:03:37 -0700 Subject: [PATCH 1059/2522] Fix a tf.function + tf.distribute read variable test. The test is not failing now because we don't correctly clear `run_functions_eagerly` state in each test, so the test ends up inheriting the state from last test where run_functions_eagerly == True. PiperOrigin-RevId: 322624670 Change-Id: I2ff19dcd4bed453a7f746b6a1734875a4cb6dd2f --- .../python/distribute/tf_function_test.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/distribute/tf_function_test.py b/tensorflow/python/distribute/tf_function_test.py index 6621f51cf32..337a5a39f26 100644 --- a/tensorflow/python/distribute/tf_function_test.py +++ b/tensorflow/python/distribute/tf_function_test.py @@ -36,7 +36,8 @@ from tensorflow.python.ops import variables class TFFunctionTest(test.TestCase, parameterized.TestCase): - def setup(self): + def setUp(self): + super().setUp() # Clear the state for every test. def_function.run_functions_eagerly(False) @@ -105,6 +106,8 @@ class TFFunctionTest(test.TestCase, parameterized.TestCase): )) def testReadVariableInsideFunction(self, distribution, run_functions_eagerly): + def_function.run_functions_eagerly(run_functions_eagerly) + # Get devices on which variables will be placed. Default strategy does not # define this, so assume cpu:0 in that case. try: @@ -127,11 +130,15 @@ class TFFunctionTest(test.TestCase, parameterized.TestCase): def read(): return v.read_value() - for i, d in enumerate(devices): - with ops.device(d): - # Verify that the value from each device is read, when in that device - # scope. - self.assertEqual(math_ops.cast(i, dtypes.float32), read()) + # Verify that the value from each device is read, when in that device + # scope. Doing this inside strategy scope is needed to force function + # retracing on each device, otherwise `read()` will only be traced once + # on the first device and following variable read will always read the value + # on the first replica. + with distribution.scope(): + for i, d in enumerate(devices): + with ops.device(d): + self.assertEqual(math_ops.cast(i, dtypes.float32), read()) if __name__ == "__main__": From 82be9d5dbf0988c6bd9e9103991a0b5f5828d7e8 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Wed, 22 Jul 2020 19:12:21 +0000 Subject: [PATCH 1060/2522] Update tensorflow/core/platform/macros.h --- tensorflow/core/platform/macros.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/platform/macros.h b/tensorflow/core/platform/macros.h index b83c72a4ad6..a38c57d1d04 100644 --- a/tensorflow/core/platform/macros.h +++ b/tensorflow/core/platform/macros.h @@ -120,6 +120,7 @@ limitations under the License. do { \ } while (0) #endif + namespace tensorflow { namespace internal { template From 9b5f67e3d4e4ef4795b2bda1de02ce674b071ddb Mon Sep 17 00:00:00 2001 From: Yanhua Sun Date: Wed, 22 Jul 2020 12:06:19 -0700 Subject: [PATCH 1061/2522] add CudnnRNN/CudnnRNNbackprop to ORDER_INSENSITIVE_STATEFUL_OPS list This is to avoid unnecessary dependency due to stateful CudnnRNN/CudnnRNNBackprop ops PiperOrigin-RevId: 322625371 Change-Id: I2b99c77f20b4de27c9306dd65e45815d359b8172 --- tensorflow/core/grappler/optimizers/function_optimizer.cc | 3 ++- tensorflow/python/framework/auto_control_deps.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index 0e156aaa84c..6312bd0880c 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -844,7 +844,8 @@ const bool IsExemptFromSideEffectsExecutionValidation(const string& op) { // CudnnRNN ops are stateful but they can't generate any observable // side-effect. - "CudnnRNNV2", "CudnnRNNV3", "CudnnRNNBackpropV2", "CudnnRNNBackpropV3", + "CudnnRNN", "CudnnRNNBackprop", "CudnnRNNV2", "CudnnRNNV3", + "CudnnRNNBackpropV2", "CudnnRNNBackpropV3", // TPUEmbedding EnqueueOps are stateful but this is only between ops with // the same device_ordinal on the same host. diff --git a/tensorflow/python/framework/auto_control_deps.py b/tensorflow/python/framework/auto_control_deps.py index 7b49f2e16e8..ccf9877b08d 100644 --- a/tensorflow/python/framework/auto_control_deps.py +++ b/tensorflow/python/framework/auto_control_deps.py @@ -97,7 +97,8 @@ LEGACY_RANDOM_OPS = [ ] _ORDER_INSENSITIVE_STATEFUL_OPS = [ - "CudnnRNNV2", "CudnnRNNV3", "CudnnRNNBackpropV2", "CudnnRNNBackpropV3", + "CudnnRNN", "CudnnRNNBackprop", "CudnnRNNV2", "CudnnRNNV3", + "CudnnRNNBackpropV2", "CudnnRNNBackpropV3", "EnqueueTPUEmbeddingSparseBatch", "EnqueueTPUEmbeddingIntegerBatch", "EnqueueTPUEmbeddingSparseTensorBatch", "EnqueueTPUEmbeddingRaggedTensorBatch", "RestoreV2", "SaveV2" From 803ce808dd349df10c5b639f929a75036efa4c4b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Jul 2020 12:18:58 -0700 Subject: [PATCH 1062/2522] The non-android ASharedMemory_create has a bug, where if two calls to create memory regions use the same 'name', they will be mapped to the to same /dev/shm file and trip over each other (particularly if they're different sizes). This CL ensures that each created memory region is unique. PiperOrigin-RevId: 322628204 Change-Id: I13b3b59cd87107844dcdbb26ed86f337c761d94f --- tensorflow/lite/nnapi/nnapi_implementation.cc | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/tensorflow/lite/nnapi/nnapi_implementation.cc b/tensorflow/lite/nnapi/nnapi_implementation.cc index f18e3b33240..52def4de8fd 100644 --- a/tensorflow/lite/nnapi/nnapi_implementation.cc +++ b/tensorflow/lite/nnapi/nnapi_implementation.cc @@ -20,7 +20,6 @@ limitations under the License. #include #include -#include #include #ifdef __ANDROID__ @@ -65,20 +64,8 @@ void* LoadFunction(void* handle, const char* name, bool optional) { #ifndef __ANDROID__ // Add /dev/shm implementation of shared memory for non-Android platforms -int ASharedMemory_create(const char* /* name */, size_t size) { - // name should not be used to identify the memory region (hence - // 'anonymous' shared memory). Generate a unique name for every create call. - char _tmpname[L_tmpnam]; - if (tmpnam_r(_tmpname) == nullptr) { - return -1; - } - - // tmpnam will produce a string containing with slashes, but shm_open - // won't like that. - std::string _name = std::string(_tmpname); - std::replace(_name.begin(), _name.end(), '/', '-'); - - int fd = shm_open(_name.c_str(), O_RDWR | O_CREAT, 0644); +int ASharedMemory_create(const char* name, size_t size) { + int fd = shm_open(name, O_RDWR | O_CREAT, 0644); if (fd < 0) { return fd; } From eac142afc034c4e5df3e1d1bd4363d82eca5fb3a Mon Sep 17 00:00:00 2001 From: Meghna Natraj Date: Wed, 22 Jul 2020 12:45:37 -0700 Subject: [PATCH 1063/2522] Add the flatbuffers python library to the Kokoro build WindowsOS VMs PiperOrigin-RevId: 322633962 Change-Id: I03c51ea1489bd6cd7aa951e3716a7221d82c50ed --- tensorflow/tools/ci_build/release/common_win.bat | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/tools/ci_build/release/common_win.bat b/tensorflow/tools/ci_build/release/common_win.bat index fa577fcfc33..e460ec8b0e1 100644 --- a/tensorflow/tools/ci_build/release/common_win.bat +++ b/tensorflow/tools/ci_build/release/common_win.bat @@ -26,6 +26,7 @@ SET PATH=%PATH%;C:\%PYTHON_DIRECTORY% @REM TODO(amitpatankar): Make an image with these packages and remove this. +%PIP_EXE% install flatbuffers --upgrade --no-deps %PIP_EXE% install setuptools --upgrade %PIP_EXE% install future>=0.17.1 --no-deps %PIP_EXE% install --ignore-installed --force-reinstall --upgrade tf-estimator-nightly --no-deps From 0443213f2472b53877fd9230194dcc0eead5a155 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 22 Jul 2020 12:50:31 -0700 Subject: [PATCH 1064/2522] Do not run lit_tests with pip tests. the data dependencies wont be available in the installed pip package. PiperOrigin-RevId: 322634880 Change-Id: Ic18165ff5bad456841a2af88f1f3e3d0cc7810ac --- tensorflow/compiler/mlir/glob_lit_test.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/mlir/glob_lit_test.bzl b/tensorflow/compiler/mlir/glob_lit_test.bzl index 9f6856f3636..edbf3663a89 100644 --- a/tensorflow/compiler/mlir/glob_lit_test.bzl +++ b/tensorflow/compiler/mlir/glob_lit_test.bzl @@ -52,7 +52,7 @@ def _run_lit_test(name, data, size, tags, driver, features, exec_properties): native.py_test( name = name, srcs = ["@llvm-project//llvm:lit"], - tags = tags + ["no_windows"], + tags = tags + ["no_pip", "no_windows"], args = [ "tensorflow/compiler/mlir/" + paths.basename(data[-1]) + " --config-prefix=runlit -v", ] + features, From 83ea067f42129aae15f8e9a36c40f62ffee9e5bb Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Wed, 22 Jul 2020 13:04:02 -0700 Subject: [PATCH 1065/2522] Add some micro-specific flatbuffer manipulation functions. PiperOrigin-RevId: 322637827 Change-Id: I4bb00dff3cd8360a71b227b1e3d7a1dc5b1e11b3 --- tensorflow/lite/micro/BUILD | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/lite/micro/BUILD b/tensorflow/lite/micro/BUILD index c42335c9772..803c7718e77 100644 --- a/tensorflow/lite/micro/BUILD +++ b/tensorflow/lite/micro/BUILD @@ -13,6 +13,11 @@ package( licenses = ["notice"], # Apache 2.0 ) +package_group( + name = "micro", + packages = ["//tensorflow/lite/micro/..."], +) + cc_library( name = "micro_compatibility", hdrs = [ From 0cd7ae3cbf590e1391541dea5e530b6d5f0839a3 Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Wed, 22 Jul 2020 13:14:39 -0700 Subject: [PATCH 1066/2522] [TF:TRT] The converter should only access a cuda device when creating static engines. When creating a static engine for an TRTEngineOp, the converter needs to use the memory allocator associated with the cuda device that will be used to execute the engine. As such, the converter saves the current cuda device id before doing the conversion and restores the cuda device id before returning. These steps are not necessary for dynamic engines because the creation of dynamic engines happens at graph execution time and outside the converter. PiperOrigin-RevId: 322640028 Change-Id: Ib0a1f8af1a333dc310822eda24881cab74c0845e --- .../tf2tensorrt/convert/convert_graph.cc | 71 +++++++++++-------- 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc index c9210a1a1e7..c4fc3e4f5da 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc @@ -45,6 +45,7 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/meta_optimizer.h" #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/protobuf/config.pb.h" // NOLINT @@ -332,7 +333,6 @@ void UpdateToEngineNode(const std::vector& infos, Status CreateTRTNode(const ConversionParams& params, const std::vector& infos, int pos, int max_batch_size, Graph* graph, - nvinfer1::IGpuAllocator* alloc, std::vector* engine_nodes) { const auto& info = infos.at(pos); std::vector input_shape_protos; @@ -428,16 +428,30 @@ Status CreateTRTNode(const ConversionParams& params, // Build the engine and get its serialized representation. string segment_string; if (info.engine_type == EngineInfo::EngineType::TRTStatic) { + std::pair device_allocator = + GetDeviceAndAllocator(params, info); + int cuda_device_id = 0; + std::unique_ptr trt_allocator; + if (device_allocator.first >= 0) { + cuda_device_id = device_allocator.first; + trt_allocator.reset(new TRTDeviceAllocator(device_allocator.second)); + } else { + // The value in trt_allocator is a nullptr and cudamalloc will be used. + LOG_WARNING_WITH_PREFIX << "Can't identify the cuda device. Running on " + "device 0 and use cudamalloc as an allocator"; + } + cudaSetDevice(cuda_device_id); + auto trt_logger = GetLoggerRegistry()->LookUp(params.trt_logger_name); - // Create static engine for fp32/fp16 mode. + // Create static engines with precision_mode fp32/fp16. TrtUniquePtrType engine; - // TODO(sami): What happens if 1st dim is not batch? TF_RETURN_IF_ERROR(ConvertGraphDefToEngine( info.segment_graph_def, calibrate_int8 ? TrtPrecisionMode::FP32 : info.precision_mode, max_batch_size, info.max_workspace_size_bytes, input_shapes, trt_logger, - alloc, /*calibrator=*/nullptr, &engine, info.use_calibration, - params.use_implicit_batch, /*convert_successfully=*/nullptr, + trt_allocator.get(), /*calibrator=*/nullptr, &engine, + info.use_calibration, params.use_implicit_batch, + /*convert_successfully=*/nullptr, /*profile=*/nullptr)); TrtUniquePtrType engine_data(engine->serialize()); segment_string = string(static_cast(engine_data->data()), @@ -793,13 +807,27 @@ Status ConvertAfterShapes(const ConversionParams& params) { } } - // Create a TRT node for each segment using its EngineInfo. - int old_cuda_device = 0; - auto err = cudaGetDevice(&old_cuda_device); - if (err != cudaSuccess) { - LOG(ERROR) << "Couldn't get current device: " << cudaGetErrorString(err); + // Save the cuda device if we may need to switch to another cuda device to + // build static engines. + absl::optional old_cuda_device = absl::nullopt; + if (!params.is_dyn_op) { + int cuda_device_id; + cudaError_t cuda_error = cudaGetDevice(&cuda_device_id); + if (cuda_error != cudaSuccess) { + LOG_WARNING_WITH_PREFIX << "Couldn't get current device: " + << cudaGetErrorString(cuda_error); + } else { + VLOG(1) << "Current cuda device is " << cuda_device_id; + old_cuda_device = cuda_device_id; + } } - VLOG(1) << "Current cuda device is " << old_cuda_device; + + auto restore_cuda_device = gtl::MakeCleanup([old_cuda_device] { + if (old_cuda_device.has_value()) { + cudaSetDevice(old_cuda_device.value()); + } + }); + std::vector engine_nodes; engine_nodes.resize(engine_segments.size()); for (int i = 0; i < engine_segments.size(); ++i) { @@ -813,24 +841,8 @@ Status ConvertAfterShapes(const ConversionParams& params) { 2.0; VLOG(1) << "Assigned " << engine.max_workspace_size_bytes << " bytes to " << engine.engine_name; - // The allocator is used to build the engine. The build and the built engine - // will be destroyed after we get the serialized engine string, so it's fine - // to use unique_ptr here. - std::unique_ptr alloc; - auto device_alloc = GetDeviceAndAllocator(params, engine); - int cuda_device_id = 0; - if (device_alloc.first >= 0) { - cuda_device_id = device_alloc.first; - alloc.reset(new TRTDeviceAllocator(device_alloc.second)); - } else { - // Setting allocator as nullptr should get revert to the cudamalloc - LOG_WARNING_WITH_PREFIX - << "Can't identify the cuda device. Running on device 0 "; - } - cudaSetDevice(cuda_device_id); - auto status = - CreateTRTNode(params, engine_segments, i, params.max_batch_size, &graph, - alloc.get(), &engine_nodes); + auto status = CreateTRTNode(params, engine_segments, i, + params.max_batch_size, &graph, &engine_nodes); string msg = StrCat("segment ", i, " consisting of ", converted_segments.at(i).size(), " nodes by ", @@ -859,7 +871,6 @@ Status ConvertAfterShapes(const ConversionParams& params) { } } } - cudaSetDevice(old_cuda_device); graph.ToGraphDef(params.output_graph_def); VLOG(1) << "Returning from conversion"; return Status::OK(); From 9cc3ecf6c36807d0cd23ada6f0e93d055bf56655 Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Wed, 22 Jul 2020 13:15:07 -0700 Subject: [PATCH 1067/2522] [TF:TRT] Reorder two members for OpConverterTest. In OpConverterTest, the tensor buffer allocator should be destructed after the graph scope. Reorder two class members to reflect this, to fix the flakiness of the test. PiperOrigin-RevId: 322640117 Change-Id: I2c3262ac03eff882e9e53060a4961304792a83b9 --- .../tf2tensorrt/convert/convert_nodes_test.cc | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc index 4af83292c03..21ef06a9b79 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc @@ -1309,7 +1309,8 @@ std::vector GetDataAsFloat(InputOutputData& data) { class OpConverterTest : public ::testing::Test { public: OpConverterTest() - : scope_(Scope::NewRootScope()), allocator_(new GpuManagedAllocator()) { + : tensor_buffer_allocator_(new GpuManagedAllocator()), + scope_(Scope::NewRootScope()) { QCHECK_EQ(0, cudaStreamCreate(&stream_)); Reset(); } @@ -1341,7 +1342,7 @@ class OpConverterTest : public ::testing::Test { // Constructs a flat tensor with 'vals' in Unified Memory. template Tensor AsTensor(gtl::ArraySlice vals) { // non-absl ok - Tensor ret(allocator_.get(), DataTypeToEnum::value, + Tensor ret(tensor_buffer_allocator_.get(), DataTypeToEnum::value, {static_cast(vals.size())}); std::copy_n(vals.data(), vals.size(), ret.flat().data()); return ret; @@ -1351,7 +1352,7 @@ class OpConverterTest : public ::testing::Test { template Tensor AsTensor(gtl::ArraySlice vals, // non-absl ok const TensorShape& shape) { - Tensor ret(allocator_.get(), DataTypeToEnum::value, + Tensor ret(tensor_buffer_allocator_.get(), DataTypeToEnum::value, {static_cast(vals.size())}); CHECK(ret.CopyFrom(AsTensor(vals), shape)); return ret; @@ -1363,7 +1364,8 @@ class OpConverterTest : public ::testing::Test { template Tensor AsTensor(std::vector vals, const std::vector input_dims, DataType tf_type) { - Tensor ret(allocator_.get(), tf_type, {static_cast(vals.size())}); + Tensor ret(tensor_buffer_allocator_.get(), tf_type, + {static_cast(vals.size())}); if (tf_type == DT_FLOAT) { auto conv_vals = CastTestVector(vals); std::copy_n(conv_vals.data(), conv_vals.size(), ret.flat().data()); @@ -1646,13 +1648,15 @@ class OpConverterTest : public ::testing::Test { Logger logger_; TrtUniquePtrType engine_; cudaStream_t stream_; - // Used to create placeholders with shape and data type information. The - // created placeholders will be used as inputs to the node to be verified, - // thus we need the shape and data type information to get a non-empty - // GraphProperties. + std::unique_ptr tensor_buffer_allocator_; + // The scope that contains the graph being converted. Because + // tensor_buffer_allocator_ provides the storage for tensor contents that are + // represented as attributes for graph nodes within scope_, + // tensor_buffer_allocator_ needs to be available when destructing scope_. + // Therefore, scope_ comes after tensor_buffer_allocator_ in the class member + // field list. Scope scope_; std::unordered_map node_inputs_; - std::unique_ptr allocator_; }; // General test parameters to be used with ops that take a single input tensor. From a3acac4846bc13d3360db722a5df71ee9099f210 Mon Sep 17 00:00:00 2001 From: Thomas O'Malley Date: Wed, 22 Jul 2020 13:27:31 -0700 Subject: [PATCH 1068/2522] Replace private TF API RUN_FUNCTIONS_EAGERLY with existing public tf.config.functions_run_eagerly function. PiperOrigin-RevId: 322642404 Change-Id: I0c0aac5e9d9a561d445ff1c5f7ead1e0b6a044a9 --- tensorflow/python/keras/engine/training.py | 3 ++- tensorflow/python/keras/engine/training_v1.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index 221c8676bb5..0a4e8551232 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -709,7 +709,8 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): # (2) Explicitly setting run_eagerly causes a Model to be run eagerly. # (3) Not explicitly setting run_eagerly defaults to TF's global setting. return (self.dynamic or self._run_eagerly or - (def_function.RUN_FUNCTIONS_EAGERLY and self._run_eagerly is None)) + (def_function.functions_run_eagerly() and + self._run_eagerly is None)) @run_eagerly.setter def run_eagerly(self, value): diff --git a/tensorflow/python/keras/engine/training_v1.py b/tensorflow/python/keras/engine/training_v1.py index b7e1d21326b..29591e8ffb7 100644 --- a/tensorflow/python/keras/engine/training_v1.py +++ b/tensorflow/python/keras/engine/training_v1.py @@ -548,7 +548,7 @@ class Model(training_lib.Model): if self._run_eagerly is None: # Respect `tf.config.run_functions_eagerly` unless # `run_eagerly` was explicitly passed to `compile`. - return def_function.RUN_FUNCTIONS_EAGERLY + return def_function.functions_run_eagerly() else: return self._run_eagerly else: From a1e6bf86ec54ebb81e77a2af324dd5ae159e0523 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Wed, 22 Jul 2020 13:46:58 -0700 Subject: [PATCH 1069/2522] Port the Fully Connected kernel to the new TF Micro TfLiteEvalTensor API. This changes also introduces a new kernel (e.g. TfLiteRegistration) runner that helps hide the verbose details of running a kernel test today. All existing kernel unit tests continue to mock out data with TfLiteTensor. The new TfLiteRegistrationTestRunner class will automatically handle conversion to TfLiteEvalTensor and setup all internal allocations off of TfLiteContext. This new class should eventually replace the code that exists in 'testing/test_utils.cc' (b/141330728). PiperOrigin-RevId: 322646454 Change-Id: I656071782d37e1eb2cbeb62e45aad3c76deac865 --- tensorflow/lite/micro/kernels/BUILD | 30 ++++ .../lite/micro/kernels/fully_connected.cc | 93 ++++++---- .../micro/kernels/fully_connected_test.cc | 86 +++------ .../lite/micro/kernels/kernel_runner.cc | 166 ++++++++++++++++++ tensorflow/lite/micro/kernels/kernel_runner.h | 83 +++++++++ tensorflow/lite/micro/kernels/kernel_util.cc | 46 +++++ tensorflow/lite/micro/kernels/kernel_util.h | 52 ++++++ 7 files changed, 464 insertions(+), 92 deletions(-) create mode 100644 tensorflow/lite/micro/kernels/kernel_runner.cc create mode 100644 tensorflow/lite/micro/kernels/kernel_runner.h create mode 100644 tensorflow/lite/micro/kernels/kernel_util.cc create mode 100644 tensorflow/lite/micro/kernels/kernel_util.h diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 50e95690d83..fac356b2925 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -103,6 +103,7 @@ cc_library( ], deps = [ ":activation_utils", + ":kernel_util", ":micro_utils", "//tensorflow/lite/c:common", "//tensorflow/lite/kernels:kernel_util", @@ -170,11 +171,13 @@ tflite_micro_cc_test( "fully_connected_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:micro_framework", "//tensorflow/lite/micro:micro_utils", "//tensorflow/lite/micro:op_resolvers", "//tensorflow/lite/micro:test_helpers", + "//tensorflow/lite/micro/kernels:micro_ops", "//tensorflow/lite/micro/testing:micro_test", ], ) @@ -473,6 +476,33 @@ tflite_micro_cc_test( ], ) +cc_library( + name = "kernel_runner", + srcs = [ + "kernel_runner.cc", + ], + hdrs = ["kernel_runner.h"], + build_for_embedded = True, + deps = [ + "//tensorflow/lite/c:common", + "//tensorflow/lite/kernels/internal:compatibility", + "//tensorflow/lite/micro:micro_framework", + ], +) + +cc_library( + name = "kernel_util", + srcs = [ + "kernel_util.cc", + ], + hdrs = ["kernel_util.h"], + build_for_embedded = True, + deps = [ + "//tensorflow/lite/c:common", + "//tensorflow/lite/kernels/internal:types", + ], +) + cc_library( name = "micro_utils", hdrs = ["micro_utils.h"], diff --git a/tensorflow/lite/micro/kernels/fully_connected.cc b/tensorflow/lite/micro/kernels/fully_connected.cc index 376626710dc..03078f893fb 100644 --- a/tensorflow/lite/micro/kernels/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/fully_connected.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -40,6 +41,10 @@ struct OpData { int32_t output_activation_max; // The index of the temporary tensor where the quantized inputs are cached. int input_quantized_index; + // Cached zero point values of tensors. + int32_t input_zero_point; + int32_t filter_zero_point; + int32_t output_zero_point; }; constexpr int kInputTensor = 0; @@ -64,6 +69,10 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( context, activation, output, &data->output_activation_min, &data->output_activation_max)); + + data->input_zero_point = input->params.zero_point; + data->filter_zero_point = filter->params.zero_point; + data->output_zero_point = output->params.zero_point; } return status; } @@ -97,13 +106,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { } TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, - const OpData& data, const TfLiteTensor* input, - const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output) { + const OpData& data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { tflite::FullyConnectedParams op_params; - op_params.input_offset = -input->params.zero_point; - op_params.weights_offset = -filter->params.zero_point; - op_params.output_offset = output->params.zero_point; + op_params.input_offset = -data.input_zero_point; + op_params.weights_offset = -data.filter_zero_point; + op_params.output_offset = data.output_zero_point; op_params.output_multiplier = data.output_multiplier; // TODO(b/138810107): Figure out whether output shift should be inverted op_params.output_shift = -data.output_shift; @@ -111,20 +122,25 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, op_params.quantized_activation_max = data.output_activation_max; reference_integer_ops::FullyConnected( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(filter), GetTensorData(filter), - GetTensorShape(bias), GetTensorData(bias), - GetTensorShape(output), GetTensorData(output)); + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); return kTfLiteOk; } TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, - const OpData& data, const TfLiteTensor* input, - const TfLiteTensor* filter, const TfLiteTensor* bias, - TfLiteTensor* output) { - const int32_t input_offset = -input->params.zero_point; - const int32_t filter_offset = -filter->params.zero_point; - const int32_t output_offset = output->params.zero_point; + const OpData& data, const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { + const int32_t input_offset = -data.input_zero_point; + const int32_t filter_offset = -data.filter_zero_point; + const int32_t output_offset = data.output_zero_point; tflite::FullyConnectedParams op_params; op_params.input_offset = input_offset; @@ -136,12 +152,16 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, op_params.quantized_activation_min = data.output_activation_min; op_params.quantized_activation_max = data.output_activation_max; -#define TF_LITE_FULLY_CONNECTED(output_data_type) \ - reference_ops::FullyConnected( \ - op_params, GetTensorShape(input), GetTensorData(input), \ - GetTensorShape(filter), GetTensorData(filter), \ - GetTensorShape(bias), GetTensorData(bias), \ - GetTensorShape(output), GetTensorData(output)) +#define TF_LITE_FULLY_CONNECTED(output_data_type) \ + reference_ops::FullyConnected( \ + op_params, tflite::micro::GetTensorShape(input), \ + tflite::micro::GetTensorData(input), \ + tflite::micro::GetTensorShape(filter), \ + tflite::micro::GetTensorData(filter), \ + tflite::micro::GetTensorShape(bias), \ + tflite::micro::GetTensorData(bias), \ + tflite::micro::GetTensorShape(output), \ + tflite::micro::GetTensorData(output)) switch (output->type) { case kTfLiteUInt8: TF_LITE_FULLY_CONNECTED(uint8_t); @@ -160,8 +180,9 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, TfLiteFusedActivation activation, - const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output) { + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRange(activation, &output_activation_min, &output_activation_max); @@ -169,10 +190,14 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, op_params.float_activation_min = output_activation_min; op_params.float_activation_max = output_activation_max; tflite::reference_ops::FullyConnected( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(filter), GetTensorData(filter), - GetTensorShape(bias), GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); return kTfLiteOk; } @@ -181,10 +206,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const auto* params = static_cast(node->builtin_data); - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor); - const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kWeightsTensor); + const TfLiteEvalTensor* bias = + tflite::micro::GetEvalInput(context, node, kBiasTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); TFLITE_DCHECK(node->user_data != nullptr); const OpData& data = *(static_cast(node->user_data)); diff --git a/tensorflow/lite/micro/kernels/fully_connected_test.cc b/tensorflow/lite/micro/kernels/fully_connected_test.cc index f977904a37c..95892f5e1d0 100644 --- a/tensorflow/lite/micro/kernels/fully_connected_test.cc +++ b/tensorflow/lite/micro/kernels/fully_connected_test.cc @@ -19,6 +19,8 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" +#include "tensorflow/lite/micro/kernels/micro_ops.h" #include "tensorflow/lite/micro/micro_utils.h" #include "tensorflow/lite/micro/test_helpers.h" #include "tensorflow/lite/micro/testing/micro_test.h" @@ -50,46 +52,28 @@ TfLiteStatus TestFullyConnectedFloat( CreateFloatTensor(output_data, output_dims), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_FULLY_CONNECTED); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - TfLiteFullyConnectedParams builtin_data = { activation, kTfLiteFullyConnectedWeightsFormatDefault, false, false}; - const char* init_data = reinterpret_cast(&builtin_data); - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } int inputs_array_data[] = {3, 0, 1, 2}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 3}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - if (registration->prepare) { - TF_LITE_ENSURE_OK(&context, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TfLiteStatus invoke_status = registration->invoke(&context, &node); + const TfLiteRegistration registration = + ops::micro::Register_FULLY_CONNECTED(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), micro_test::reporter); - if (registration->free) { - registration->free(&context, user_data); + TfLiteStatus status = runner.InitAndPrepare(); + if (status != kTfLiteOk) { + return status; } - if (invoke_status != kTfLiteOk) { - return invoke_status; + + status = runner.Invoke(); + if (status != kTfLiteOk) { + return status; } for (int i = 0; i < output_dims_count; ++i) { TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i], 1e-5f); @@ -123,46 +107,28 @@ TfLiteStatus TestFullyConnectedQuantized( CreateQuantizedTensor(output_data, output_dims, output_min, output_max), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_FULLY_CONNECTED); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - TfLiteFullyConnectedParams builtin_data = { activation, kTfLiteFullyConnectedWeightsFormatDefault, false, false}; - const char* init_data = reinterpret_cast(&builtin_data); - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } int inputs_array_data[] = {3, 0, 1, 2}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 3}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = + ops::micro::Register_FULLY_CONNECTED(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), micro_test::reporter); - if (registration->prepare) { - TF_LITE_ENSURE_OK(&context, registration->prepare(&context, &node)); + TfLiteStatus status = runner.InitAndPrepare(); + if (status != kTfLiteOk) { + return status; } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TfLiteStatus invoke_status = registration->invoke(&context, &node); - if (registration->free) { - registration->free(&context, user_data); - } - if (invoke_status != kTfLiteOk) { - return invoke_status; + + status = runner.Invoke(); + if (status != kTfLiteOk) { + return status; } for (int i = 0; i < output_dims_count; ++i) { TF_LITE_MICRO_EXPECT_EQ(expected_output_data[i], output_data[i]); diff --git a/tensorflow/lite/micro/kernels/kernel_runner.cc b/tensorflow/lite/micro/kernels/kernel_runner.cc new file mode 100644 index 00000000000..d754fc31377 --- /dev/null +++ b/tensorflow/lite/micro/kernels/kernel_runner.cc @@ -0,0 +1,166 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/micro/kernels/kernel_runner.h" + +namespace tflite { +namespace micro { + +namespace { +constexpr size_t kBufferAlignment = 16; +} // namespace + +// TODO(b/161841696): Consider moving away from global arena buffers: +constexpr int KernelRunner::kNumScratchBuffers_; +constexpr int KernelRunner::kKernelRunnerBufferSize_; +uint8_t KernelRunner::kKernelRunnerBuffer_[]; + +KernelRunner::KernelRunner(const TfLiteRegistration& registration, + TfLiteTensor* tensors, int tensors_size, + TfLiteIntArray* inputs, TfLiteIntArray* outputs, + void* builtin_data, ErrorReporter* error_reporter) + : allocator_(SimpleMemoryAllocator::Create( + error_reporter, kKernelRunnerBuffer_, kKernelRunnerBufferSize_)), + registration_(registration), + tensors_(tensors), + error_reporter_(error_reporter) { + // Prepare TfLiteContext: + context_.impl_ = static_cast(this); + context_.ReportError = ReportOpError; + context_.recommended_num_threads = 1; + context_.GetTensor = GetTensor; + context_.GetEvalTensor = GetEvalTensor; + context_.AllocatePersistentBuffer = AllocatePersistentBuffer; + context_.RequestScratchBufferInArena = RequestScratchBufferInArena; + context_.GetScratchBuffer = GetScratchBuffer; + + // Prepare TfLiteNode: + node_.inputs = inputs; + node_.outputs = outputs; + node_.builtin_data = builtin_data; +} + +TfLiteStatus KernelRunner::InitAndPrepare() { + if (registration_.init) { + node_.user_data = + registration_.init(&context_, /*buffer=*/nullptr, /*length=*/0); + } + if (registration_.prepare) { + TF_LITE_ENSURE_STATUS(registration_.prepare(&context_, &node_)); + } + return kTfLiteOk; +} + +TfLiteStatus KernelRunner::Invoke() { + if (registration_.invoke == nullptr) { + TF_LITE_REPORT_ERROR(error_reporter_, + "TfLiteRegistration missing invoke function pointer!"); + return kTfLiteError; + } + return registration_.invoke(&context_, &node_); +} + +TfLiteTensor* KernelRunner::GetTensor(const struct TfLiteContext* context, + int tensor_index) { + TFLITE_DCHECK(context != nullptr); + KernelRunner* runner = reinterpret_cast(context->impl_); + TFLITE_DCHECK(runner != nullptr); + + return &runner->tensors_[tensor_index]; +} + +TfLiteEvalTensor* KernelRunner::GetEvalTensor( + const struct TfLiteContext* context, int tensor_index) { + TFLITE_DCHECK(context != nullptr); + KernelRunner* runner = reinterpret_cast(context->impl_); + TFLITE_DCHECK(runner != nullptr); + + TfLiteEvalTensor* eval_tensor = + reinterpret_cast(runner->allocator_->AllocateTemp( + sizeof(TfLiteEvalTensor), alignof(TfLiteEvalTensor))); + TFLITE_DCHECK(eval_tensor != nullptr); + + // In unit tests, the TfLiteTensor pointer contains the source of truth for + // buffers and values: + eval_tensor->data = runner->tensors_[tensor_index].data; + eval_tensor->dims = runner->tensors_[tensor_index].dims; + eval_tensor->type = runner->tensors_[tensor_index].type; + return eval_tensor; +} + +void* KernelRunner::AllocatePersistentBuffer(TfLiteContext* context, + size_t bytes) { + TFLITE_DCHECK(context != nullptr); + KernelRunner* runner = reinterpret_cast(context->impl_); + TFLITE_DCHECK(runner != nullptr); + + return runner->allocator_->AllocateFromTail(bytes, kBufferAlignment); +} + +TfLiteStatus KernelRunner::RequestScratchBufferInArena(TfLiteContext* context, + size_t bytes, + int* buffer_index) { + TFLITE_DCHECK(context != nullptr); + TFLITE_DCHECK(buffer_index != nullptr); + + KernelRunner* runner = reinterpret_cast(context->impl_); + TFLITE_DCHECK(runner != nullptr); + + if (runner->scratch_buffer_count_ == kNumScratchBuffers_) { + TF_LITE_REPORT_ERROR( + runner->error_reporter_, + "Exceeded the maximum number of scratch tensors allowed (%d).", + kNumScratchBuffers_); + return kTfLiteError; + } + + // For tests, we allocate scratch buffers from the tail and keep them around + // for the lifetime of model. This means that the arena size in the tests will + // be more than what we would have if the scratch buffers could share memory. + runner->scratch_buffers_[runner->scratch_buffer_count_] = + runner->allocator_->AllocateFromTail(bytes, kBufferAlignment); + TFLITE_DCHECK(runner->scratch_buffers_[runner->scratch_buffer_count_] != + nullptr); + + *buffer_index = runner->scratch_buffer_count_++; + return kTfLiteOk; +} + +void* KernelRunner::GetScratchBuffer(TfLiteContext* context, int buffer_index) { + TFLITE_DCHECK(context != nullptr); + KernelRunner* runner = reinterpret_cast(context->impl_); + TFLITE_DCHECK(runner != nullptr); + + TFLITE_DCHECK(runner->scratch_buffer_count_ <= kNumScratchBuffers_); + if (buffer_index >= runner->scratch_buffer_count_) { + return nullptr; + } + return runner->scratch_buffers_[buffer_index]; +} + +void KernelRunner::ReportOpError(struct TfLiteContext* context, + const char* format, ...) { + TFLITE_DCHECK(context != nullptr); + KernelRunner* runner = reinterpret_cast(context->impl_); + TFLITE_DCHECK(runner != nullptr); + + va_list args; + va_start(args, format); + TF_LITE_REPORT_ERROR(runner->error_reporter_, format, args); + va_end(args); +} + +} // namespace micro +} // namespace tflite diff --git a/tensorflow/lite/micro/kernels/kernel_runner.h b/tensorflow/lite/micro/kernels/kernel_runner.h new file mode 100644 index 00000000000..2ae7d09d530 --- /dev/null +++ b/tensorflow/lite/micro/kernels/kernel_runner.h @@ -0,0 +1,83 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_ + +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/compatibility.h" +#include "tensorflow/lite/micro/simple_memory_allocator.h" + +namespace tflite { +namespace micro { + +// Helper class to perform a simulated kernel (i.e. TfLiteRegistration) lifecyle +// (init, prepare, invoke). All internal allocations are handled by this class. +// Simply pass in the registration, list of required tensors, inputs array, +// outputs array, and any pre-builtin data. Calling Invoke() will automatically +// walk the kernl and outputs will be ready on the the TfLiteTensor output +// provided during construction. +class KernelRunner { + public: + KernelRunner(const TfLiteRegistration& registration, TfLiteTensor* tensors, + int tensors_size, TfLiteIntArray* inputs, + TfLiteIntArray* outputs, void* builtin_data, + ErrorReporter* error_reporter); + + // Calls init and prepare on the kernel (i.e. TfLiteRegistration) struct. Any + // exceptions will be reported through the error_reporter and returned as a + // status code here. + TfLiteStatus InitAndPrepare(); + + // Calls init, prepare, and invoke on a given TfLiteRegistration pointer. + // After successful invoke, results will be available in the output tensor as + // passed into the constructor of this class. + TfLiteStatus Invoke(); + + protected: + static TfLiteTensor* GetTensor(const struct TfLiteContext* context, + int tensor_index); + static TfLiteEvalTensor* GetEvalTensor(const struct TfLiteContext* context, + int tensor_index); + static void* AllocatePersistentBuffer(TfLiteContext* context, size_t bytes); + static TfLiteStatus RequestScratchBufferInArena(TfLiteContext* context, + size_t bytes, + int* buffer_index); + static void* GetScratchBuffer(TfLiteContext* context, int buffer_index); + static void ReportOpError(struct TfLiteContext* context, const char* format, + ...); + + private: + static constexpr int kNumScratchBuffers_ = 5; + + static constexpr int kKernelRunnerBufferSize_ = 10000; + static uint8_t kKernelRunnerBuffer_[kKernelRunnerBufferSize_]; + + SimpleMemoryAllocator* allocator_ = nullptr; + const TfLiteRegistration& registration_; + TfLiteTensor* tensors_ = nullptr; + ErrorReporter* error_reporter_ = nullptr; + + TfLiteContext context_ = {}; + TfLiteNode node_ = {}; + + int scratch_buffer_count_ = 0; + uint8_t* scratch_buffers_[kNumScratchBuffers_]; +}; + +} // namespace micro +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_RUNNER_H_ diff --git a/tensorflow/lite/micro/kernels/kernel_util.cc b/tensorflow/lite/micro/kernels/kernel_util.cc new file mode 100644 index 00000000000..074de732be6 --- /dev/null +++ b/tensorflow/lite/micro/kernels/kernel_util.cc @@ -0,0 +1,46 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/micro/kernels/kernel_util.h" + +namespace tflite { +namespace micro { + +// Returns the TfLiteEvalTensor struct for a given input index in a node. +const TfLiteEvalTensor* GetEvalInput(const TfLiteContext* context, + const TfLiteNode* node, int index) { + TFLITE_DCHECK(context != nullptr); + TFLITE_DCHECK(node != nullptr); + return context->GetEvalTensor(context, node->inputs->data[index]); +} + +// Returns the TfLiteEvalTensor struct for a given output index in a node. +TfLiteEvalTensor* GetEvalOutput(const TfLiteContext* context, + const TfLiteNode* node, int index) { + TFLITE_DCHECK(context != nullptr); + TFLITE_DCHECK(node != nullptr); + return context->GetEvalTensor(context, node->outputs->data[index]); +} + +const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor) { + TFLITE_DCHECK(tensor != nullptr); + TfLiteIntArray* dims = tensor->dims; + const int dims_size = dims->size; + const int32_t* dims_data = reinterpret_cast(dims->data); + return RuntimeShape(dims_size, dims_data); +} + +} // namespace micro +} // namespace tflite diff --git a/tensorflow/lite/micro/kernels/kernel_util.h b/tensorflow/lite/micro/kernels/kernel_util.h new file mode 100644 index 00000000000..baf3d2464bb --- /dev/null +++ b/tensorflow/lite/micro/kernels/kernel_util.h @@ -0,0 +1,52 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_ +#define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_ + +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace micro { + +// Returns the TfLiteEvalTensor struct for a given input index in a node. +const TfLiteEvalTensor* GetEvalInput(const TfLiteContext* context, + const TfLiteNode* node, int index); + +// Returns the TfLiteEvalTensor struct for a given output index in a node. +TfLiteEvalTensor* GetEvalOutput(const TfLiteContext* context, + const TfLiteNode* node, int index); + +// Returns data for a TfLiteEvalTensor struct. +template +T* GetTensorData(TfLiteEvalTensor* tensor) { + return tensor != nullptr ? reinterpret_cast(tensor->data.raw) : nullptr; +} + +// Returns const data for a TfLiteEvalTensor struct. +template +const T* GetTensorData(const TfLiteEvalTensor* tensor) { + TFLITE_DCHECK(tensor != nullptr); + return reinterpret_cast(tensor->data.raw); +} + +// Returns the shape of a TfLiteEvalTensor struct. +const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor); + +} // namespace micro +} // namespace tflite + +#endif // TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_ From d90e30fbe429d86bfc1960d2f524963d0e6bbfe0 Mon Sep 17 00:00:00 2001 From: Chenkai Kuang Date: Wed, 22 Jul 2020 13:47:11 -0700 Subject: [PATCH 1070/2522] Add `save_context_key` to function cache. This allows retracing of functions during saving time to accommodate cases where different behaviors in training and saving are desired, for example: 1. DistributedVariable/PackedVariable wants to resolve to the primary component when building function graphs in saving time. 2. ShardedVariable wants to resolve to a single-shard variable in saved graph for better serving performance. Function retracing will only happen if it has already been traced under a distribution strategy, i.e, cases without distribution strategy is not affected. Note that if `ConcreteFunction` is passed to signatures, we will use it as it is. A TODO item is to raise an error if the `ConcreteFunction` is associated with distributed properties. PiperOrigin-RevId: 322646508 Change-Id: I027edc7aede9a418ff5816234653fc17c44b9c27 --- tensorflow/python/distribute/BUILD | 10 +++++++ .../python/distribute/tf_function_test.py | 27 +++++++++++++++++++ tensorflow/python/eager/BUILD | 4 +++ tensorflow/python/eager/def_function_test.py | 18 +++++++++++++ tensorflow/python/eager/function.py | 27 ++++++++++++++++++- 5 files changed, 85 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index 11cb725ef57..f3f98fe50de 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -1686,10 +1686,20 @@ distribute_py_test( ], deps = [ ":combinations", + ":device_util", ":strategy_combinations", + ":values", "//tensorflow/python:array_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:variables", + "//tensorflow/python/compat:v2_compat", "//tensorflow/python/eager:def_function", "//tensorflow/python/eager:test", + "//tensorflow/python/saved_model:save_context", + "//tensorflow/python/saved_model:save_options", + "@absl_py//absl/testing:parameterized", ], ) diff --git a/tensorflow/python/distribute/tf_function_test.py b/tensorflow/python/distribute/tf_function_test.py index 337a5a39f26..967abebdfb3 100644 --- a/tensorflow/python/distribute/tf_function_test.py +++ b/tensorflow/python/distribute/tf_function_test.py @@ -32,6 +32,8 @@ from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables +from tensorflow.python.saved_model import save_context +from tensorflow.python.saved_model import save_options class TFFunctionTest(test.TestCase, parameterized.TestCase): @@ -140,6 +142,31 @@ class TFFunctionTest(test.TestCase, parameterized.TestCase): with ops.device(d): self.assertEqual(math_ops.cast(i, dtypes.float32), read()) + @combinations.generate( + combinations.combine( + distribution=strategy_combinations.all_strategies, mode=["eager"])) + def testRetraceOnSaving(self, distribution): + with distribution.scope(): + v = variables.Variable(0.) + + tracing_count = [0] + + @def_function.function + def func(): + tracing_count[0] += 1 + return v + 1. + + distribution.run(func) + prev_tracing_count = tracing_count[0] + with save_context.save_context(save_options.SaveOptions()): + func() + self.assertEqual(prev_tracing_count + 1, tracing_count[0]) + + prev_tracing_count = tracing_count[0] + with save_context.save_context(save_options.SaveOptions()): + func() + self.assertEqual(prev_tracing_count, tracing_count[0]) + if __name__ == "__main__": v2_compat.enable_v2_behavior() diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index a96d2322b88..b9c43c2e4e9 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -564,6 +564,8 @@ py_library( "//tensorflow/python:pywrap_tf_session", "//tensorflow/python:util", "//tensorflow/python/ops/numpy_ops:numpy", + "//tensorflow/python/saved_model:save_context", + "//tensorflow/python/saved_model:save_options", "//third_party/py/numpy", "@six_archive//:six", ], @@ -817,6 +819,8 @@ cuda_py_test( "//tensorflow/python:constant_op", "//tensorflow/python:framework_ops", "//tensorflow/python/autograph/core", + "//tensorflow/python/saved_model:save_context", + "//tensorflow/python/saved_model:save_options", "@absl_py//absl/testing:parameterized", ], ) diff --git a/tensorflow/python/eager/def_function_test.py b/tensorflow/python/eager/def_function_test.py index 0ae69fa0b8c..8784fb1cd0a 100644 --- a/tensorflow/python/eager/def_function_test.py +++ b/tensorflow/python/eager/def_function_test.py @@ -45,6 +45,8 @@ from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import test +from tensorflow.python.saved_model import save_context +from tensorflow.python.saved_model import save_options def undecorated_function(x): @@ -586,6 +588,22 @@ class DefFunctionTest(test.TestCase, parameterized.TestCase): self.assertIs(func_a, func_b) + def testCacheWithinSaveContext(self): + + @def_function.function + def func(x): + return 2 * x + + func_a = func.get_concrete_function(constant_op.constant(2.)) + func_b = func.get_concrete_function(constant_op.constant(2.)) + + self.assertIs(func_a, func_b) + + with save_context.save_context(save_options.SaveOptions()): + func_c = func.get_concrete_function(constant_op.constant(2.)) + + self.assertIs(func_a, func_c) + @test_util.disable_tfrt('Nested function is not supported') def testInitializationInNestedCall(self): v_holder = [] diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index be8fcb82610..2497b4302f1 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -66,6 +66,8 @@ from tensorflow.python.ops import resource_variable_ops from tensorflow.python.platform import tf_logging as logging from tensorflow.python.profiler import trace +from tensorflow.python.saved_model import save_context +from tensorflow.python.saved_model import save_options from tensorflow.python.util import compat from tensorflow.python.util import function_utils from tensorflow.python.util import lazy_loader @@ -150,6 +152,7 @@ CacheKey = collections.namedtuple("CacheKey", [ "device_functions", "colocation_stack", "in_cross_replica_context", + "variable_policy", "xla_context_id", ]) @@ -2851,6 +2854,10 @@ class Function(object): self._descriptor_cache = weakref.WeakKeyDictionary() self._experimental_compile = experimental_compile + # A boolean indicating whether the function has been traced with + # distribution strategy. + self._traced_with_distribution_strategy = False + def __call__(self, *args, **kwargs): """Calls a graph function specialized to the inputs.""" with self._lock: @@ -3067,10 +3074,24 @@ class Function(object): except (AttributeError, IndexError): pass + # If the function has been traced with a distribution strategy, it might + # need to be retraced at saving time as DistributedVariable created under + # distribution strategy may want different tracing behavior at training and + # saving, e.g, it wants to resolve to the primary component at saving time, + # but wants resolve to the component residing in the current device at + # training time. We achieve this by adding variable_policy to the function + # cache key. + if save_context.in_save_context( + ) and self._traced_with_distribution_strategy: + variable_policy = ( + save_context.get_save_options().experimental_variable_policy) + else: + variable_policy = save_options.VariablePolicy.EXPAND_DISTRIBUTED_VARIABLES + return CacheKey( _make_input_signature_hashable(input_signature), parent_graph, device_functions, colocation_stack, in_cross_replica_context, - xla_context_id) + variable_policy, xla_context_id) def _create_graph_function(self, args, kwargs, override_flat_arg_shapes=None): """Create a `ConcreteFunction` from `args` and `kwargs`.""" @@ -3240,6 +3261,10 @@ class Function(object): self._function_cache.missed.add(call_context_key) graph_function = self._create_graph_function(args, kwargs) self._function_cache.primary[cache_key] = graph_function + + if ops.get_default_graph()._distribution_strategy_stack: + self._traced_with_distribution_strategy = True + return graph_function, args, kwargs From ecd200ec389d5c0ce5db9dad3c945a5ad04bb078 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Wed, 22 Jul 2020 14:05:48 -0700 Subject: [PATCH 1071/2522] Fix the Windows continuous build. PiperOrigin-RevId: 322650627 Change-Id: I46afca9d5bebcdd4a5acfd655a40fdbecc8493c6 --- tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh | 2 +- tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh index 0abdb59fede..e4258f0408e 100644 --- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh @@ -141,7 +141,7 @@ bazel build ${EXTRA_BUILD_FLAGS} \ --output_filter=^$ \ tensorflow/lite:framework tensorflow/lite/examples/minimal:minimal || exit $? -bazel build ${EXTRA_BUILD_FLAGS} \ +bazel build --config=release_cpu_windows ${EXTRA_BUILD_FLAGS} \ --output_filter=^$ \ tensorflow/tools/pip_package:build_pip_package || exit $? diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh index 4bba7797556..1874a23df6d 100644 --- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh @@ -138,7 +138,7 @@ fi run_configure_for_gpu_build -bazel build ${EXTRA_BUILD_FLAGS} \ +bazel build --config=release_gpu_windows ${EXTRA_BUILD_FLAGS} \ --output_filter=^$ \ tensorflow/tools/pip_package:build_pip_package || exit $? From a0b45cb107c5c42165b70e9e073280c138cfcd44 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Wed, 22 Jul 2020 14:12:38 -0700 Subject: [PATCH 1072/2522] [TF2XLA] Fix the test docstring PiperOrigin-RevId: 322652093 Change-Id: I350d983763a351ddd6d0ed6a898182c36baaefbb --- tensorflow/compiler/tests/case_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/tests/case_test.py b/tensorflow/compiler/tests/case_test.py index 3b2dff537da..4da9c4fac7a 100644 --- a/tensorflow/compiler/tests/case_test.py +++ b/tensorflow/compiler/tests/case_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for while loops in XLA.""" +"""Tests for case statements in XLA.""" from __future__ import absolute_import from __future__ import division From a225c7700807350cf022b8256ed36b01acefe4d9 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 22 Jul 2020 14:16:26 -0700 Subject: [PATCH 1073/2522] Change tf_xla_py_test to use tf's implementation of py_test. Otherwise, any pip tests we run becomes a normal tests, and our pip tests do not verify any behaviour for xla_py_tests PiperOrigin-RevId: 322652875 Change-Id: Icb56f419ab632870f7b0bcee23b57ee66eff0971 --- tensorflow/compiler/tests/build_defs.bzl | 3 ++- tensorflow/tensorflow.bzl | 8 ++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/tests/build_defs.bzl b/tensorflow/compiler/tests/build_defs.bzl index 19a1d62cddd..9c941e791ee 100644 --- a/tensorflow/compiler/tests/build_defs.bzl +++ b/tensorflow/compiler/tests/build_defs.bzl @@ -8,6 +8,7 @@ load( "tf_cuda_tests_tags", "tf_exec_properties", ) +load("//tensorflow:tensorflow.bzl", "py_test") def all_backends(): b = ["cpu"] + plugins.keys() @@ -121,7 +122,7 @@ def tf_xla_py_test( updated_name = updated_name[:-5] updated_name += "_mlir_bridge_test" - native.py_test( + py_test( name = updated_name, srcs = srcs, srcs_version = "PY2AND3", diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 1bf4b24559d..3edf2a417ba 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -2198,10 +2198,14 @@ def pywrap_tensorflow_macro( # Note that this only works on Windows. See the definition of # //third_party/tensorflow/tools/pip_package:win_pip_package_marker for specific reasons. # 2. When --define=no_tensorflow_py_deps=false (by default), it's a normal py_test. -def py_test(deps = [], data = [], kernels = [], **kwargs): +def py_test(deps = [], data = [], kernels = [], exec_properties = None, **kwargs): # Python version placeholder if kwargs.get("python_version", None) == "PY3": kwargs["tags"] = kwargs.get("tags", []) + ["no_oss_py2"] + + if not exec_properties: + exec_properties = tf_exec_properties(kwargs) + native.py_test( # TODO(jlebar): Ideally we'd use tcmalloc here., deps = select({ @@ -2212,7 +2216,7 @@ def py_test(deps = [], data = [], kernels = [], **kwargs): "//conditions:default": kernels, clean_dep("//tensorflow:no_tensorflow_py_deps"): ["//tensorflow/tools/pip_package:win_pip_package_marker"], }), - exec_properties = tf_exec_properties(kwargs), + exec_properties = exec_properties, **kwargs ) From 37fd86e0ff6dbac37de6e30c7bca9dd0bf4083e9 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 22 Jul 2020 14:18:19 -0700 Subject: [PATCH 1074/2522] Use the correct implementation of py_test for numpy_ops tests. Otherwise, pip tests become invalid PiperOrigin-RevId: 322653366 Change-Id: I16d02122c31d796b00d0685677cd625a39da6142 --- tensorflow/python/ops/numpy_ops/integration_test/BUILD | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/python/ops/numpy_ops/integration_test/BUILD b/tensorflow/python/ops/numpy_ops/integration_test/BUILD index 05162a4e26a..e5483166406 100644 --- a/tensorflow/python/ops/numpy_ops/integration_test/BUILD +++ b/tensorflow/python/ops/numpy_ops/integration_test/BUILD @@ -1,3 +1,5 @@ +load("//tensorflow:tensorflow.bzl", "py_test") + licenses(["notice"]) py_test( From 392dfaaa9b636469b453777372df879e5852874f Mon Sep 17 00:00:00 2001 From: Russell Power Date: Wed, 22 Jul 2020 14:18:32 -0700 Subject: [PATCH 1075/2522] Add declarations for TPU ops.declarations for TPU ops.declarations for TPU ops.declarations for TPU ops. PiperOrigin-RevId: 322653418 Change-Id: I8338c98d8c5d80b828c04610c8c605aa94514a42 --- tensorflow/core/tpu/BUILD | 3 + tensorflow/core/tpu/ops/BUILD | 44 ++++++ tensorflow/core/tpu/ops/tpu_compile_op.cc | 129 ++++++++++++++++++ tensorflow/core/tpu/ops/tpu_execute_op.cc | 71 ++++++++++ .../core/tpu/ops/tpu_partitioned_input_op.cc | 107 +++++++++++++++ .../core/tpu/ops/tpu_partitioned_output_op.cc | 66 +++++++++ 6 files changed, 420 insertions(+) create mode 100644 tensorflow/core/tpu/ops/BUILD create mode 100644 tensorflow/core/tpu/ops/tpu_compile_op.cc create mode 100644 tensorflow/core/tpu/ops/tpu_execute_op.cc create mode 100644 tensorflow/core/tpu/ops/tpu_partitioned_input_op.cc create mode 100644 tensorflow/core/tpu/ops/tpu_partitioned_output_op.cc diff --git a/tensorflow/core/tpu/BUILD b/tensorflow/core/tpu/BUILD index d9d0bc440e7..8be9fe12d69 100644 --- a/tensorflow/core/tpu/BUILD +++ b/tensorflow/core/tpu/BUILD @@ -159,6 +159,9 @@ cc_library( "//tensorflow:oss": [ ":tpu_node_device", ":tpu_system_device", + "//tensorflow/core/tpu/ops:tpu_compile_op", + "//tensorflow/core/tpu/ops:tpu_execute_op", + "//tensorflow/core/tpu/ops:tpu_partitioned_ops", "//tensorflow/stream_executor/tpu:tpu_executor", "//tensorflow/stream_executor/tpu:tpu_transfer_manager", "//tensorflow/core/tpu:tpu_on_demand_compiler", diff --git a/tensorflow/core/tpu/ops/BUILD b/tensorflow/core/tpu/ops/BUILD new file mode 100644 index 00000000000..63268d6aab1 --- /dev/null +++ b/tensorflow/core/tpu/ops/BUILD @@ -0,0 +1,44 @@ +package( + default_visibility = ["//visibility:public"], + licenses = ["notice"], # Apache 2.0 +) + +cc_library( + name = "tpu_partitioned_ops", + srcs = [ + "tpu_partitioned_input_op.cc", + "tpu_partitioned_output_op.cc", + ], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:graph", + "//tensorflow/core:lib", + ], + alwayslink = 1, +) + +cc_library( + name = "tpu_compile_op", + srcs = [ + "tpu_compile_op.cc", + ], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:graph", + "//tensorflow/core:lib", + ], + alwayslink = 1, +) + +cc_library( + name = "tpu_execute_op", + srcs = [ + "tpu_execute_op.cc", + ], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:graph", + "//tensorflow/core:lib", + ], + alwayslink = 1, +) diff --git a/tensorflow/core/tpu/ops/tpu_compile_op.cc b/tensorflow/core/tpu/ops/tpu_compile_op.cc new file mode 100644 index 00000000000..9f68406dbba --- /dev/null +++ b/tensorflow/core/tpu/ops/tpu_compile_op.cc @@ -0,0 +1,129 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +REGISTER_OP("_TPUCompileMlir") + .Attr("num_computations: int >= 0") + .Attr("mlir_module: string=\"\"") + .Attr("metadata: string") + .Attr("NumDynamicShapes: int >= 0") + // Do not try to optimize me away. We would like the compilation-op to be + // invoked for every step, and not be constant-folded away, in case the + // program is evicted from the compilation cache. + .SetIsStateful() + .Input("dynamic_shapes: NumDynamicShapes * int64") + .Output("compilation_status: string") + .Output("program: num_computations * string") + .SetShapeFn([](shape_inference::InferenceContext* c) { + int num_computations; + TF_RETURN_IF_ERROR( + GetNodeAttr(c->attrs(), "num_computations", &num_computations)); + // Compilation status. + c->set_output(0, c->Scalar()); + // Programs. + for (int i = 0; i < num_computations; ++i) { + c->set_output(i + 1, c->Vector(2)); + } + return Status::OK(); + }) + .Doc( + R"( +Compiles a computations for execution on one or more TPU devices. +For the internal use of the distributed TPU compiler. Note that currently only +single TPU device is supported. + +'mlir_module' is a serialized MLIR module with a `main` function that contains +target computation. +'dynamic_shapes' contains dynamic shapes of arguments whose shapes were not +known statically at TPUReplication rewrite time. +'metadata' is a serialized TPUCompileMetadataProto describing +the shapes and types of the inputs to the computation, as well as a mapping onto +the TPU pod topology. +'program' output is a string key that is passed to the _TPUExecute op and +used to look up the program in the compilation cache. +)"); + +REGISTER_OP("TPUCompile") + .Attr("num_computations: int >= 0") + .Attr("function: func") + .Attr("metadata: string") + .Attr("NumDynamicShapes: int >= 0") + .Attr("Tguaranteed_constants: list(type) >= 0") + // Do not try to optimize me away. We would like the compilation-op to be + // invoked for every step, and not be constant-folded away, in case the + // program is evicted from the compilation cache. + .SetIsStateful() + .Input("dynamic_shapes: NumDynamicShapes * int64") + .Input("guaranteed_constants: Tguaranteed_constants") + .Output("compilation_status: string") + .Output("program: num_computations * string") + .Output("may_modify_variables: num_computations * bool") + .SetShapeFn([](shape_inference::InferenceContext* c) { + int num_computations; + TF_RETURN_IF_ERROR( + GetNodeAttr(c->attrs(), "num_computations", &num_computations)); + // Compilation status. + c->set_output(0, c->Scalar()); + // Programs. + for (int i = 0; i < num_computations; ++i) { + c->set_output(i + 1, c->Vector(2)); + } + // May modify variables. + for (int i = 0; i < num_computations; ++i) { + c->set_output(num_computations + i + 1, c->Scalar()); + } + return Status::OK(); + }) + .Doc( + R"( +Compiles a computations for execution on one or more TPU devices. +For the internal use of the distributed TPU compiler. + +'num_computations' is the number of computations to be compiled. +'function' is a function containing the computation to compile. +'dynamic_shapes' contains dynamic shapes of arguments whose shapes were not +known statically at TPUReplication rewrite time. +'guaranteed_constants' is a list of tensors which have been guaranteed to not +change their values during the session lifetime. These contain tensors marked as +constant using the GuaranteeConstOp. +'metadata' is a serialized TPUCompileMetadataProto describing +the shapes and types of the inputs to the computation, as well as a mapping onto +the TPU pod topology. +Each 'program' output is a string key that is passed to the _TPUExecute op and +used to look up the program in the compilation cache. +'may_modify_variables' indicates whether variables may be modified. +)"); + +REGISTER_OP("TPUCompileSucceededAssert") + .Input("compilation_status: string") + // Do not optimize me away. Read the comment on TPUCompileOp for more + // details. + .SetIsStateful() + .SetShapeFn(shape_inference::NoOutputs) + .Doc( + R"( +Asserts that compilation succeeded. This op produces no output and closes the +device during failure to ensure all pending device interactions fail. + +'compilation_status' is a serialized CompilationResultProto. + )"); + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/ops/tpu_execute_op.cc b/tensorflow/core/tpu/ops/tpu_execute_op.cc new file mode 100644 index 00000000000..6d42c35fc7b --- /dev/null +++ b/tensorflow/core/tpu/ops/tpu_execute_op.cc @@ -0,0 +1,71 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +REGISTER_OP("TPUExecute") + .Input("args: Targs") + .Attr("Targs: list(type) >= 0") + .Input("key: string") + .Output("results: Tresults") + .Attr("Tresults: list(type) >= 0") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext* c) { + shape_inference::ShapeHandle key; + TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 1), 1, &key)); + shape_inference::DimensionHandle unused; + TF_RETURN_IF_ERROR(c->WithValue(c->Dim(key, 0), 2, &unused)); + for (int i = 0; i < c->num_outputs(); ++i) { + c->set_output(i, c->UnknownShape()); + } + return Status::OK(); + }) + .Doc(R"( +Op that loads and executes a TPU program on a TPU device. +For the internal use of the distributed TPU compiler.)"); + +REGISTER_OP("TPUExecuteAndUpdateVariables") + .Input("args: Targs") + .Attr("Targs: list(type) >= 0") + .Input("key: string") + .Output("results: Tresults") + .Attr("Tresults: list(type) >= 0") + .Attr("device_var_reads_indices: list(int) >= 0") + .Attr("device_var_updates_indices: list(int) >= 0") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext* c) { + shape_inference::ShapeHandle key; + TF_RETURN_IF_ERROR(c->WithRank(c->input(c->num_inputs() - 1), 1, &key)); + shape_inference::DimensionHandle unused; + TF_RETURN_IF_ERROR(c->WithValue(c->Dim(key, 0), 2, &unused)); + for (int i = 0; i < c->num_outputs(); ++i) { + c->set_output(i, c->UnknownShape()); + } + return Status::OK(); + }) + .Doc(R"(Op that executes a program with optional in-place variable updates. +It (optionally) reads device variables, loads and executes a TPU program on a +TPU device, and then (optionally) in-place updates variables using the program +outputs, as specified in attributes device_var_reads_indices (program input +indices from directly reading variables) and device_var_updates_indices (program +output indices used to update variables, -1 means no-update/read-only). Such +program outputs are consumed by these variables will not appear in the op +output. For the internal use of the distributed TPU compiler.)"); + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/ops/tpu_partitioned_input_op.cc b/tensorflow/core/tpu/ops/tpu_partitioned_input_op.cc new file mode 100644 index 00000000000..acaed3d072c --- /dev/null +++ b/tensorflow/core/tpu/ops/tpu_partitioned_input_op.cc @@ -0,0 +1,107 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +using shape_inference::InferenceContext; +using shape_inference::ShapeHandle; + +ShapeHandle _UpdatePartitionDim(InferenceContext* c, const ShapeHandle handle, + const int partition_dim) { + ShapeHandle newoutput0; + shape_inference::DimensionHandle new_dim; + TF_CHECK_OK( + c->Multiply(c->Dim(handle, partition_dim), c->num_inputs(), &new_dim)); + TF_CHECK_OK(c->ReplaceDim(handle, partition_dim, new_dim, &newoutput0)); + return newoutput0; +} + +REGISTER_OP("TPUPartitionedInput") + .Input("inputs: N * T") + .Output("output: T") + .Attr("N: int >= 1") + .Attr("T: type") + .Attr("partition_dim: int = 0") + .SetShapeFn([](InferenceContext* c) { + DataType dtype; + TF_RETURN_IF_ERROR(c->GetAttr("T", &dtype)); + int partition_dim; + TF_RETURN_IF_ERROR(c->GetAttr("partition_dim", &partition_dim)); + + ShapeHandle cur = c->input(c->num_inputs() - 1); + for (int i = c->num_inputs() - 2; i >= 0; --i) { + TF_RETURN_WITH_CONTEXT_IF_ERROR(c->Merge(c->input(i), cur, &cur), + "From merging shape ", i, + " with other shapes."); + } + if (partition_dim == -1 || dtype == DT_RESOURCE) { + c->set_output(0, cur); + } else { + ShapeHandle newoutput0 = _UpdatePartitionDim(c, cur, partition_dim); + c->set_output(0, newoutput0); + } + + // If this is a resource, unify the resource shapes. + if (dtype == DT_RESOURCE) { + ShapeHandle previous_shape_handle; + for (int i = c->num_inputs() - 1; i >= 0; --i) { + ShapeHandle shape_handle = + c->input_handle_shapes_and_types(i)->at(0).shape; + if (!c->FullyDefined(shape_handle)) { + return errors::InvalidArgument("Inputs must have static shape,", + "input[", i, + "] has unknown dimension."); + } + if (i != c->num_inputs() - 1) { + ShapeHandle tmp; + if (!c->Merge(shape_handle, previous_shape_handle, &tmp).ok()) { + return errors::InvalidArgument( + "Inputs must have the same shape."); + } + } else { + previous_shape_handle = shape_handle; + } + } + if (partition_dim == -1) { + c->set_output_handle_shapes_and_types( + 0, *c->input_handle_shapes_and_types(0)); + } else { + ShapeHandle newoutput0 = + _UpdatePartitionDim(c, previous_shape_handle, partition_dim); + + std::vector output_shapes_and_types; + output_shapes_and_types.push_back(shape_inference::ShapeAndType( + newoutput0, c->input_handle_shapes_and_types(0)->at(0).dtype)); + c->set_output_handle_shapes_and_types(0, output_shapes_and_types); + } + } + + return Status::OK(); + }) + .Doc(R"doc( +An op that groups a list of partitioned inputs together. This op + +inputs: A list of partitioned inputs which must have the same shape. +output: A handle which represents the full shape of partitioned tensors. +partition_dim: An integer describles which dimension is partitioned. -1 means + those inputs are replicated. +)doc"); + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/ops/tpu_partitioned_output_op.cc b/tensorflow/core/tpu/ops/tpu_partitioned_output_op.cc new file mode 100644 index 00000000000..69ea48d242a --- /dev/null +++ b/tensorflow/core/tpu/ops/tpu_partitioned_output_op.cc @@ -0,0 +1,66 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +using shape_inference::InferenceContext; +using shape_inference::ShapeHandle; + + +REGISTER_OP("TPUPartitionedOutput") + .Input("inputs: T") + .Output("output: num_splits * T") + .Attr("T: type") + .Attr("num_splits: int >= 1") + .Attr("partition_dim: int = 0") + .SetShapeFn([](InferenceContext* c) { + DataType dtype; + TF_RETURN_IF_ERROR(c->GetAttr("T", &dtype)); + int partition_dim; + TF_RETURN_IF_ERROR(c->GetAttr("partition_dim", &partition_dim)); + int num_splits; + TF_RETURN_IF_ERROR(c->GetAttr("num_splits", &num_splits)); + if (dtype == DT_RESOURCE) { + return errors::Unimplemented("Not implemented."); + } + + ShapeHandle input = c->input(0); + ShapeHandle newoutput0; + shape_inference::DimensionHandle new_dim; + TF_RETURN_WITH_CONTEXT_IF_ERROR( + c->Divide(c->Dim(input, partition_dim), num_splits, + true /* evenly_divisible */, &new_dim), + "Number of ways to split should evenly divide the split dimension"); + TF_CHECK_OK(c->ReplaceDim(input, partition_dim, new_dim, &newoutput0)); + for (int i = num_splits - 1; i >= 0; --i) { + c->set_output(i, newoutput0); + } + return Status::OK(); + }) + .Doc(R"doc( +An op that demultiplexes a tensor to be sharded by XLA to a list of partitioned +outputs outside the XLA computation. + +inputs: A tensor which represents the full shape of partitioned tensors. +output: A list of partitioned inputs which must have the same shape. +partition_dim: An integer describles which dimension is partitioned. +)doc"); + +} // namespace tensorflow From d57541184dae517b32fb8a20a2de8f861db2645e Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Wed, 22 Jul 2020 14:21:35 -0700 Subject: [PATCH 1076/2522] Wrap/rewrap ndarrays in batch_jacobian PiperOrigin-RevId: 322654079 Change-Id: I1ac1d2154c3b82f0714b460b3f002fb68e6b7ca5 --- tensorflow/python/eager/backprop.py | 17 +++++++++++++++-- .../python/ops/numpy_ops/np_interop_test.py | 16 ++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index b6e3661fc1d..3c6ffc99fa4 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -1260,6 +1260,12 @@ class GradientTape(object): ValueError: If vectorization of jacobian computation fails or if first dimension of `target` and `source` do not match. """ + rewrap_as_ndarray = False + if isinstance(target, np_arrays.ndarray): + target = target.data + rewrap_as_ndarray = True + if isinstance(source, np_arrays.ndarray): + source = source.data target_shape = target.shape if target_shape.rank is None: dim = tensor_shape.Dimension(None) @@ -1317,9 +1323,16 @@ class GradientTape(object): parallel_iterations=parallel_iterations) new_shape = array_ops.concat([target_shape, source_shape[1:]], axis=0) if output is None: - return array_ops.zeros(new_shape) + output = array_ops.zeros(new_shape) + if rewrap_as_ndarray: + output = np_arrays.tensor_to_ndarray(output) + return output else: output = array_ops.reshape(output, [target_row_size, batch_size, -1]) output = array_ops.transpose(output, [1, 0, 2]) - return array_ops.reshape(output, new_shape) + + output = array_ops.reshape(output, new_shape) + if rewrap_as_ndarray: + output = np_arrays.tensor_to_ndarray(output) + return output diff --git a/tensorflow/python/ops/numpy_ops/np_interop_test.py b/tensorflow/python/ops/numpy_ops/np_interop_test.py index c66d40f54a8..20b8a7118a5 100644 --- a/tensorflow/python/ops/numpy_ops/np_interop_test.py +++ b/tensorflow/python/ops/numpy_ops/np_interop_test.py @@ -300,6 +300,22 @@ class InteropTest(tf.test.TestCase): self.assertIsInstance(jacobian[1], np.ndarray) self.assertAllClose(jacobian, answer) + def testBatchJacobian(self): + with tf.GradientTape() as g: + x = np.asarray([[1., 2.], [3., 4.]]) + y = np.asarray([[3., 4.], [5., 6.]]) + g.watch(x) + g.watch(y) + z = x * x * y + + batch_jacobian = g.batch_jacobian(z, x) + answer = tf.stack( + [tf.linalg.diag(2 * x[0] * y[0]), + tf.linalg.diag(2 * x[1] * y[1])]) + + self.assertIsInstance(batch_jacobian, np.ndarray) + self.assertAllClose(batch_jacobian, answer) + class FunctionTest(InteropTest): From 6e067728911e6c6ce842b784ce251fff62bb4faf Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Wed, 22 Jul 2020 14:22:07 -0700 Subject: [PATCH 1077/2522] [TF] Clean up conv2d/conv3d input ndims check. PiperOrigin-RevId: 322654214 Change-Id: Ide4e2076f34f170a3eb06e6ca483df764c4f6395 --- tensorflow/python/ops/nn_ops.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 74a062fbb07..136d174b49e 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -2256,11 +2256,12 @@ def conv2d( # pylint: disable=redefined-builtin,dangerous-default-value strides = _get_sequence(strides, 2, channel_index, "strides") dilations = _get_sequence(dilations, 2, channel_index, "dilations") - # Try really hard to avoid modifying the legacy name scopes - return early. - shape = getattr(input, "shape", None) - if shape is not None: - ndims = getattr(shape, "ndims", -1) - if ndims == -1: ndims = len(shape) + shape = input.shape + # shape object may lack ndims, e.g., if input is an np.ndarray. In that case, + # we fall back to len(shape). + ndims = getattr(shape, "ndims", -1) + if ndims == -1: + ndims = len(shape) if ndims in (4, 3, 2, 1, 0, None): # We avoid calling squeeze_batch_dims to reduce extra python function # call slowdown in eager mode. This branch doesn't require reshapes. @@ -2989,12 +2990,12 @@ def _conv3d_expanded_batch( dilations=None, name=None): """Helper function for `conv3d`; handles expanded batches.""" - # Try really hard to avoid modifying the legacy name sceops - return early. - shape = getattr(input, "shape", None) - if shape is not None: - ndims = getattr(shape, "ndims", -1) - if ndims == -1: - ndims = len(shape) + shape = input.shape + # shape object may lack ndims, e.g., if input is an np.ndarray. In that case, + # we fall back to len(shape). + ndims = getattr(shape, "ndims", -1) + if ndims == -1: + ndims = len(shape) if ndims in (5, 4, 3, 2, 1, 0, None): # We avoid calling squeeze_batch_dims to reduce extra python function # call slowdown in eager mode. This branch doesn't require reshapes. From 2035a1b0c30a836a7546c9a7fa7f18ba6ab603e5 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Wed, 22 Jul 2020 14:25:23 -0700 Subject: [PATCH 1078/2522] Remove Windows nightly release duplicate flag. PiperOrigin-RevId: 322654899 Change-Id: Id933689e174f1abe38d149274daa7d7d3c6779c3 --- .../ci_build/release/windows/cpu_py35_full/nightly_release.bat | 2 +- .../ci_build/release/windows/cpu_py36_full/nightly_release.bat | 2 +- .../ci_build/release/windows/cpu_py37_full/nightly_release.bat | 2 +- .../ci_build/release/windows/cpu_py38_full/nightly_release.bat | 2 +- .../ci_build/release/windows/gpu_py35_full/nightly_release.bat | 2 +- .../ci_build/release/windows/gpu_py36_full/nightly_release.bat | 2 +- .../ci_build/release/windows/gpu_py37_full/nightly_release.bat | 2 +- .../ci_build/release/windows/gpu_py38_full/nightly_release.bat | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py35_full/nightly_release.bat b/tensorflow/tools/ci_build/release/windows/cpu_py35_full/nightly_release.bat index 0f78ee006ff..6ed1088893f 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py35_full/nightly_release.bat +++ b/tensorflow/tools/ci_build/release/windows/cpu_py35_full/nightly_release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python35 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --tf_nightly --project_name "tf_nightly_cpu" --extra_build_flags "--config=release_cpu_windows" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --tf_nightly --project_name "tf_nightly_cpu" diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py36_full/nightly_release.bat b/tensorflow/tools/ci_build/release/windows/cpu_py36_full/nightly_release.bat index a5ebfb21103..3af98dddeae 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py36_full/nightly_release.bat +++ b/tensorflow/tools/ci_build/release/windows/cpu_py36_full/nightly_release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python36 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --tf_nightly --project_name "tf_nightly_cpu" --extra_build_flags "--config=release_cpu_windows" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --tf_nightly --project_name "tf_nightly_cpu" diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py37_full/nightly_release.bat b/tensorflow/tools/ci_build/release/windows/cpu_py37_full/nightly_release.bat index 9914c0235d1..850c21ee962 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py37_full/nightly_release.bat +++ b/tensorflow/tools/ci_build/release/windows/cpu_py37_full/nightly_release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python37 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --tf_nightly --project_name "tf_nightly_cpu" --extra_build_flags "--config=release_cpu_windows" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --tf_nightly --project_name "tf_nightly_cpu" diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py38_full/nightly_release.bat b/tensorflow/tools/ci_build/release/windows/cpu_py38_full/nightly_release.bat index bee0bb4edea..2456b1e26bb 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py38_full/nightly_release.bat +++ b/tensorflow/tools/ci_build/release/windows/cpu_py38_full/nightly_release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python38 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --tf_nightly --project_name "tf_nightly_cpu" --extra_build_flags "--config=release_cpu_windows" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --tf_nightly --project_name "tf_nightly_cpu" diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py35_full/nightly_release.bat b/tensorflow/tools/ci_build/release/windows/gpu_py35_full/nightly_release.bat index cae34610b12..43e6414a74b 100644 --- a/tensorflow/tools/ci_build/release/windows/gpu_py35_full/nightly_release.bat +++ b/tensorflow/tools/ci_build/release/windows/gpu_py35_full/nightly_release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python35 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --tf_nightly --extra_build_flags "--config=release_gpu_windows" +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --tf_nightly diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py36_full/nightly_release.bat b/tensorflow/tools/ci_build/release/windows/gpu_py36_full/nightly_release.bat index e82d65714c4..15ec83c054e 100644 --- a/tensorflow/tools/ci_build/release/windows/gpu_py36_full/nightly_release.bat +++ b/tensorflow/tools/ci_build/release/windows/gpu_py36_full/nightly_release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python36 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --tf_nightly --extra_build_flags "--config=release_gpu_windows" +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --tf_nightly diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py37_full/nightly_release.bat b/tensorflow/tools/ci_build/release/windows/gpu_py37_full/nightly_release.bat index d152ab709a6..1eb65d8a284 100644 --- a/tensorflow/tools/ci_build/release/windows/gpu_py37_full/nightly_release.bat +++ b/tensorflow/tools/ci_build/release/windows/gpu_py37_full/nightly_release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python37 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --tf_nightly --extra_build_flags "--config=release_gpu_windows" +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --tf_nightly diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py38_full/nightly_release.bat b/tensorflow/tools/ci_build/release/windows/gpu_py38_full/nightly_release.bat index 1ab90bc575d..670793340e8 100644 --- a/tensorflow/tools/ci_build/release/windows/gpu_py38_full/nightly_release.bat +++ b/tensorflow/tools/ci_build/release/windows/gpu_py38_full/nightly_release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python38 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --tf_nightly --extra_build_flags "--config=release_gpu_windows" +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --tf_nightly From e2f7c83bcbf3704d4e2aa53ce5d9f5208da78f31 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Wed, 22 Jul 2020 14:25:37 -0700 Subject: [PATCH 1079/2522] Cleanup of the activations (relu/relu6) kernel. Port the activations kernel to TfLiteEvalTensor, cache quantized data, and use the new TfLiteRegistrationRunner helper class. PiperOrigin-RevId: 322654958 Change-Id: I7b89a6dcca4b90192b07ec470fa18a14ceef0566 --- tensorflow/lite/micro/kernels/BUILD | 1 + tensorflow/lite/micro/kernels/activations.cc | 176 ++++++++++---- .../lite/micro/kernels/activations_test.cc | 219 ++++-------------- 3 files changed, 169 insertions(+), 227 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index fac356b2925..b2544aa3af3 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -531,6 +531,7 @@ tflite_micro_cc_test( "activations_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:op_resolvers", "//tensorflow/lite/micro/testing:micro_test", diff --git a/tensorflow/lite/micro/kernels/activations.cc b/tensorflow/lite/micro/kernels/activations.cc index 8b6e19610b9..128ab8ecbd8 100644 --- a/tensorflow/lite/micro/kernels/activations.cc +++ b/tensorflow/lite/micro/kernels/activations.cc @@ -21,20 +21,53 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/types.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/op_macros.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/micro/micro_utils.h" namespace tflite { namespace ops { namespace micro { namespace activations { +namespace { + +struct ReluOpData { + ReluParams params; +}; + +struct Relu6OpData { + int8_t six_int8; + int8_t zero_int8; + uint8_t six_uint8; + uint8_t zero_uint8; +}; + +} // namespace constexpr int kInputTensor = 0; constexpr int kOutputTensor = 0; -template -inline void ReluQuantized(const TfLiteTensor* input, TfLiteTensor* output, - const Q* input_data, Q* output_data) { - ReluParams params; +template +inline void ReluQuantized(const ReluOpData& data, + const RuntimeShape& input_shape, + const RuntimeShape& output_shape, const T* input_data, + T* output_data) { + const int flat_size = MatchingFlatSize(input_shape, output_shape); + for (int i = 0; i < flat_size; ++i) { + const int32 val = static_cast(input_data[i]); + int32_t clamped = + data.params.output_offset + + MultiplyByQuantizedMultiplier(val - data.params.input_offset, + data.params.output_multiplier, + data.params.output_shift); + clamped = std::max(data.params.quantized_activation_min, clamped); + clamped = std::min(data.params.quantized_activation_max, clamped); + output_data[i] = static_cast(clamped); + } +} + +template +inline void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output, + ReluOpData* data) { float act_min = 0.0; float act_max = std::numeric_limits::infinity(); double real_multiplier = @@ -43,34 +76,22 @@ inline void ReluQuantized(const TfLiteTensor* input, TfLiteTensor* output, const RuntimeShape input_shape = GetTensorShape(input); const RuntimeShape output_shape = GetTensorShape(output); - QuantizeMultiplier(real_multiplier, ¶ms.output_multiplier, - ¶ms.output_shift); + QuantizeMultiplier(real_multiplier, &data->params.output_multiplier, + &data->params.output_shift); - params.quantized_activation_min = - std::max(static_cast(std::numeric_limits::min()), + data->params.quantized_activation_min = + std::max(static_cast(std::numeric_limits::min()), output->params.zero_point + static_cast(roundf(act_min / output->params.scale))); - params.quantized_activation_max = + data->params.quantized_activation_max = act_max == std::numeric_limits::infinity() - ? static_cast(std::numeric_limits::max()) + ? static_cast(std::numeric_limits::max()) : std::min( - static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::max()), output->params.zero_point + static_cast(roundf(act_max / output->params.scale))); - params.input_offset = input->params.zero_point; - params.output_offset = output->params.zero_point; - - const int flat_size = MatchingFlatSize(input_shape, output_shape); - for (int i = 0; i < flat_size; ++i) { - const int32 val = static_cast(input_data[i]); - int32 clamped = params.output_offset + - MultiplyByQuantizedMultiplier(val - params.input_offset, - params.output_multiplier, - params.output_shift); - clamped = std::max(params.quantized_activation_min, clamped); - clamped = std::min(params.quantized_activation_max, clamped); - output_data[i] = static_cast(clamped); - } + data->params.input_offset = input->params.zero_point; + data->params.output_offset = output->params.zero_point; } inline void ReluFloat(const RuntimeShape& input_shape, const float* input_data, @@ -108,29 +129,57 @@ inline void Relu6Quantized(Q lower, Q upper, const RuntimeShape& input_shape, } } +void* ReluInit(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(ReluOpData)); +} + TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + ReluOpData* data = static_cast(node->user_data); + + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + if (input->type == kTfLiteInt8) { + CalculateReluOpData(input, output, data); + } else if (input->type == kTfLiteUInt8) { + CalculateReluOpData(input, output, data); + } + return kTfLiteOk; } TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TFLITE_DCHECK(node->user_data != nullptr); + const ReluOpData& data = *(static_cast(node->user_data)); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); switch (input->type) { case kTfLiteFloat32: { - ReluFloat(GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + ReluFloat(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); return kTfLiteOk; } case kTfLiteInt8: { - ReluQuantized(input, output, GetTensorData(input), - GetTensorData(output)); + ReluQuantized(data, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorData(output)); return kTfLiteOk; } case kTfLiteUInt8: { - ReluQuantized(input, output, GetTensorData(input), - GetTensorData(output)); + ReluQuantized(data, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorData(output)); return kTfLiteOk; } default: { @@ -141,37 +190,62 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) { } } +void* Relu6Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(Relu6OpData)); +} + TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + Relu6OpData* data = static_cast(node->user_data); + + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + + if (input->type == kTfLiteInt8) { + data->six_int8 = FloatToAsymmetricQuantizedInt8(6.0f, input->params.scale, + input->params.zero_point); + data->zero_int8 = input->params.zero_point; + } else if (input->type == kTfLiteUInt8) { + data->six_uint8 = FloatToAsymmetricQuantizedUInt8(6.0f, input->params.scale, + input->params.zero_point); + data->zero_uint8 = input->params.zero_point; + } + return kTfLiteOk; } TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TFLITE_DCHECK(node->user_data != nullptr); + const Relu6OpData& data = *(static_cast(node->user_data)); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); switch (input->type) { case kTfLiteFloat32: { - Relu6Float(GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + Relu6Float(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); return kTfLiteOk; } case kTfLiteInt8: { - const int8_t six = FloatToAsymmetricQuantizedInt8( - 6.0f, input->params.scale, input->params.zero_point); - const int8_t zero = input->params.zero_point; - Relu6Quantized( - zero, six, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + Relu6Quantized(data.zero_int8, data.six_int8, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); return kTfLiteOk; } case kTfLiteUInt8: { - const uint8_t six = FloatToAsymmetricQuantizedUInt8( - 6.0f, input->params.scale, input->params.zero_point); - const uint8_t zero = input->params.zero_point; - Relu6Quantized( - zero, six, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + Relu6Quantized(data.zero_uint8, data.six_uint8, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); return kTfLiteOk; } default: { @@ -185,7 +259,7 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace activations TfLiteRegistration Register_RELU() { - return {/*init=*/nullptr, + return {/*init=*/activations::ReluInit, /*free=*/nullptr, /*prepare=*/activations::ReluPrepare, /*invoke=*/activations::ReluEval, @@ -196,7 +270,7 @@ TfLiteRegistration Register_RELU() { } TfLiteRegistration Register_RELU6() { - return {/*init=*/nullptr, + return {/*init=*/activations::Relu6Init, /*free=*/nullptr, /*prepare=*/activations::Relu6Prepare, /*invoke=*/activations::Relu6Eval, diff --git a/tensorflow/lite/micro/kernels/activations_test.cc b/tensorflow/lite/micro/kernels/activations_test.cc index 85556d10406..db23bdec475 100644 --- a/tensorflow/lite/micro/kernels/activations_test.cc +++ b/tensorflow/lite/micro/kernels/activations_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -38,40 +39,19 @@ void TestReluFloat(const int* input_dims_data, const float* input_data, CreateFloatTensor(output_data, output_dims), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_RELU); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - const char* init_data = nullptr; - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 1}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + const TfLiteRegistration registration = ops::micro::Register_RELU(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); + + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); + for (int i = 0; i < output_elements_count; ++i) { TF_LITE_MICRO_EXPECT_NEAR(golden[i], output_data[i], 1e-5f); } @@ -92,40 +72,19 @@ void TestRelu6Float(const int* input_dims_data, const float* input_data, CreateFloatTensor(output_data, output_dims), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_RELU6); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - const char* init_data = nullptr; - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 1}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + const TfLiteRegistration registration = ops::micro::Register_RELU6(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); + + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); + for (int i = 0; i < output_elements_count; ++i) { TF_LITE_MICRO_EXPECT_NEAR(golden[i], output_data[i], 1e-5f); } @@ -151,40 +110,18 @@ void TestReluUint8(const int* input_dims_data, const float* input_data, output_zero_point), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_RELU); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - const char* init_data = nullptr; - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 1}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + const TfLiteRegistration registration = ops::micro::Register_RELU(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); + + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); AsymmetricQuantize(golden, golden_quantized, output_elements_count, output_scale, output_zero_point); @@ -214,40 +151,18 @@ void TestRelu6Uint8(const int* input_dims_data, const float* input_data, output_zero_point), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_RELU6); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - const char* init_data = nullptr; - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 1}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + const TfLiteRegistration registration = ops::micro::Register_RELU6(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); + + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); AsymmetricQuantize(golden, golden_quantized, output_elements_count, output_scale, output_zero_point); @@ -276,42 +191,18 @@ void TestReluInt8(const int* input_dims_data, const float* input_data, output_zero_point), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_RELU); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - const char* init_data = nullptr; - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 1}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } + const TfLiteRegistration registration = ops::micro::Register_RELU(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); AsymmetricQuantize(golden, golden_quantized, output_elements_count, output_scale, output_zero_point); @@ -340,42 +231,18 @@ void TestRelu6Int8(const int* input_dims_data, const float* input_data, output_zero_point), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_RELU6); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - const char* init_data = nullptr; - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 1}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } + const TfLiteRegistration registration = ops::micro::Register_RELU6(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); AsymmetricQuantize(golden, golden_quantized, output_elements_count, output_scale, output_zero_point); From e3be70aa9d472acaeeeea161bc609dd959decc9b Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Wed, 22 Jul 2020 14:30:00 -0700 Subject: [PATCH 1080/2522] Add a `convert_to_tensor` to the start of Tensor.__getitem__ (_slice_helper) to make sure it dispatches directly, rather than letting the nested tf.strided_slice trigger dispatching. This is important because `tensor.__getitem__` does some input arg manipulation before getting to the `tf.strided_slice`. So, when we try to run the traced code using the args provided to `strided_slice` (e.g. for KerasTensors), we lose information about constants that TPUs need to compile graphs involving shape manipulation. Tracing `__getitem__` and its input args directly does not seem to run into this problem. (Note: this TPU situation is separate from the shape value inferring we do in KerasTensors during Functional API construction/tracing time. This happens at model run-time when running the already-traced code) To get this all to work correctly in practice when dispatching KerasTensors + serializing/deserializing Keras models, this CL also has to: * Add special KerasTensor dispatchers for APIs that may take `slices` as inputs, to make sure they can trigger dispatching & serialize/deserialize correctly. This specialized dispatcher makes sure to unpack any `slices` in the args/kwargs into a namedtuple, before passing it to a specialized Keras TFOpLambda subclass that re-packs any slices. * Add serialization/deserialization support for `ellipsis` objects in Keras ------------------------ Other considered alternatives to get the dispatching/serialization to work correctly for KerasTensors: * add flatten/pack support for slices to `tf.nest`/`tree`. This can be revisited in the future (especially re: dispatchv2), but tree is critical path code and it's not obvious if we should always be flattening/packing slices or not. * Make the dispatched __operators__.getitem method expect slices to have already been unwrapped, and add a step to the __getitem__ overriding that unwraps the slices. This would be somewhat clunky in practice because there are other TF apis that take `slice`s in their args as well, and it might be surprising to dispatch users that the __operators__.getitem dispatch doesn't actually match the standard __getitem__ api. Likewise it's unclear what the performance implication of doing extra packing/unpacking even when not dispatching would be. PiperOrigin-RevId: 322655930 Change-Id: I35417577199393c016f753be685bf2926d62e753 --- tensorflow/python/keras/layers/core.py | 94 +++++++ .../keras/layers/tensorflow_op_layer_test.py | 233 ++++++++++++++++++ .../keras/saving/saved_model/json_utils.py | 6 + tensorflow/python/ops/array_ops.py | 2 + tensorflow/python/util/dispatch_test.py | 78 +++++- tensorflow/python/util/serialization.py | 3 + 6 files changed, 411 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py index 155af8d2398..2d69782a1cf 100644 --- a/tensorflow/python/keras/layers/core.py +++ b/tensorflow/python/keras/layers/core.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import collections import copy import functools import operator @@ -1422,3 +1423,96 @@ class KerasOpDispatcher(dispatch.GlobalOpDispatcher): return self.NOT_SUPPORTED KerasOpDispatcher().register() + +SliceTuple = collections.namedtuple('SliceTuple', ['start', 'stop', 'step']) + + +def _slice_to_named_tuple(x): + if isinstance(x, slice): + return SliceTuple(x.start, x.stop, x.step) + return x + + +def _named_tuple_to_slice(x): + if type(x).__name__ == 'SliceTuple': + return slice(x[0], x[1], x[2]) + return x + + +class SlicingOpLambda(TFOpLambda): + """Wraps TF API symbols in a `Layer` object. + + It is inserted by the Functional API construction whenever users call + a supported TF symbol on KerasTensors. + + Like Lambda layers, this layer tries to raise warnings when it detects users + explicitly use variables in the call. (To let them know + that the layer will not capture the variables). + + This is useful in the case where users do something like: + x = keras.Input(...) + y = tf.Variable(...) + out = x * tf_variable + """ + + @trackable.no_automatic_dependency_tracking + def __init__(self, function, **kwargs): + super(SlicingOpLambda, self).__init__(function, **kwargs) + + original_call = self.call + # Decorate the function to produce this layer's call method + def _call_wrapper(*args, **kwargs): + # Turn any slice nametuples in the args back into `slice` objects. + # This conversion cannot use nest.flatten/map_structure, + # because namedtuples are flattened by nest while slices aren't. + # So, map_structure would only see the individual elements in the + # namedtuple. + # This can't use map_structure_up_to either because the 'shallowness' of + # the shallow tree would have to vary depending on if only one dim or + # multiple are being sliced. + new_args = [] + for arg in args: + arg = _named_tuple_to_slice(arg) + if isinstance(arg, (list, tuple)): + new_arg = [] + for sub_arg in arg: + new_arg.append(_named_tuple_to_slice(sub_arg)) + arg = new_arg + new_args.append(arg) + + # Handle the kwargs too. + new_kwargs = {} + for key, value in kwargs.items(): + value = _named_tuple_to_slice(value) + if isinstance(value, (list, tuple)): + new_value = [] + for v in value: + new_value.append(_named_tuple_to_slice(v)) + value = new_value + new_kwargs[key] = value + + return original_call(*new_args, **new_kwargs) + self.call = tf_decorator.make_decorator(original_call, _call_wrapper) + + +class TFSlicingOpDispatcher(dispatch.OpDispatcher): + """A global dispatcher that allows building a functional model with TF Ops.""" + + def __init__(self, op): + self.op = op + + def handle(self, args, kwargs): + """Handle the specified operation with the specified arguments.""" + args = nest.map_structure(_slice_to_named_tuple, args) + kwargs = nest.map_structure(_slice_to_named_tuple, kwargs) + if any( + isinstance(x, keras_tensor.KerasTensor) + for x in nest.flatten([args, kwargs])): + return SlicingOpLambda(self.op)(*args, **kwargs) + else: + return self.NOT_SUPPORTED + +for slicing_op in [array_ops._slice_helper, # pylint: disable=protected-access + array_ops.boolean_mask, + array_ops.boolean_mask_v2]: + TFSlicingOpDispatcher(slicing_op).register(slicing_op) diff --git a/tensorflow/python/keras/layers/tensorflow_op_layer_test.py b/tensorflow/python/keras/layers/tensorflow_op_layer_test.py index cb044260106..817e746bc70 100644 --- a/tensorflow/python/keras/layers/tensorflow_op_layer_test.py +++ b/tensorflow/python/keras/layers/tensorflow_op_layer_test.py @@ -30,6 +30,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras import testing_utils +from tensorflow.python.keras.engine import keras_tensor from tensorflow.python.keras.optimizer_v2 import adam from tensorflow.python.keras.saving import model_config from tensorflow.python.ops import array_ops @@ -294,6 +295,238 @@ class AutoLambdaTest(keras_parameterized.TestCase): self.assertAllEqual([layer.name for layer in model.layers], [layer.name for layer in new_model.layers]) + def test_getitem_slice_with_step_only(self): + if not context.executing_eagerly(): + self.skipTest('Complex slicing like this fails in v1') + inp = keras.Input(shape=(4, 3, 8)) + slice_step = keras.Input(shape=(), dtype='int32') + + out = inp[..., ::slice_step[0]] + model = keras.Model( + inputs=[inp, slice_step], + outputs=out) + model.compile( + adam.Adam(0.001), + 'mse', + run_eagerly=testing_utils.should_run_eagerly()) + batch_size = 7 + step = 3 + x = array_ops.stack([ + math_ops.range(8) for _ in range(batch_size)]) + args = [x, constant_op.constant(step, shape=(batch_size,))] + expected = array_ops.stack([ + math_ops.range(8)[::step] for _ in range(batch_size)]) + + if keras_tensor.keras_tensors_enabled(): + self.assertIn('tf.__operators__.getitem', ( + x.name for x in model.layers)) + self.assertNotIn('tf.strided_slice', ( + x.name for x in model.layers)) + self.assertAllEqual(model(args), expected) + self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) + + # Make sure it can be successfully saved and loaded + config = model.get_config() + model = keras.Model.from_config(config) + + self.assertAllEqual(model(args), expected) + self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) + + def test_getitem_slice_real_tensor(self): + if not context.executing_eagerly(): + self.skipTest('Complex slicing like this fails in v1') + x = math_ops.range(10.0) + slice_stop = keras.Input(shape=(), dtype='int32') + + out = x[:slice_stop[0]] + model = keras.Model( + inputs=slice_stop, + outputs=out) + model.compile( + adam.Adam(0.001), + 'mse', + run_eagerly=testing_utils.should_run_eagerly()) + batch_size = 7 + stop = 6 + args = constant_op.constant(stop, shape=(batch_size,)) + expected = x[:stop] + + if keras_tensor.keras_tensors_enabled(): + self.assertIn('tf.__operators__.getitem', ( + x.name for x in model.layers)) + # TODO(b/161925288): Fix the dispatch triggering then uncomment: + # self.assertNotIn('tf.strided_slice', ( + # x.name for x in model.layers)) + self.assertAllEqual(model(args), expected) + self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) + + # TODO(b/161925288): Fix the bug then uncomment: + # # Make sure it can be successfully saved and loaded + # config = model.get_config() + # model = keras.Model.from_config(config) + + self.assertAllEqual(model(args), expected) + self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) + + def test_getitem_index_real_tensor(self): + if not context.executing_eagerly(): + self.skipTest('Complex slicing like this fails in v1') + x = math_ops.range(10.0) + slice_stop = keras.Input(shape=(), dtype='int32') + + out = x[slice_stop[0]] + model = keras.Model( + inputs=slice_stop, + outputs=out) + model.compile( + adam.Adam(0.001), + 'mse', + run_eagerly=testing_utils.should_run_eagerly()) + batch_size = 7 + index = 6 + args = constant_op.constant(index, shape=(batch_size,)) + expected = x[index] + + if keras_tensor.keras_tensors_enabled(): + self.assertIn('tf.__operators__.getitem', ( + x.name for x in model.layers)) + # TODO(b/161925288): Fix the bug then uncomment: + # self.assertNotIn('tf.strided_slice', ( + # x.name for x in model.layers)) + self.assertAllEqual(model(args), expected) + self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) + + # TODO(b/161925288): Fix the bug then uncomment: + # # Make sure it can be successfully saved and loaded + # config = model.get_config() + # model = keras.Model.from_config(config) + + self.assertAllEqual(model(args), expected) + self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) + + def test_getitem_slice_with_stop_only(self): + if not context.executing_eagerly(): + self.skipTest('Complex slicing like this fails in v1') + inp = keras.Input(shape=(4, 3, 8)) + slice_stop = keras.Input(shape=(), dtype='int32') + + out = inp[:slice_stop[0]] + model = keras.Model( + inputs=[inp, slice_stop], + outputs=out) + model.compile( + adam.Adam(0.001), + 'mse', + run_eagerly=testing_utils.should_run_eagerly()) + batch_size = 7 + stop = 6 + x = array_ops.stack([ + math_ops.range(8) for _ in range(batch_size)]) + args = [x, constant_op.constant(stop, shape=(batch_size,))] + expected = x[:stop] + + if keras_tensor.keras_tensors_enabled(): + self.assertIn('tf.__operators__.getitem', ( + x.name for x in model.layers)) + self.assertNotIn('tf.strided_slice', ( + x.name for x in model.layers)) + self.assertAllEqual(model(args), expected) + self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) + + # Make sure it can be successfully saved and loaded + config = model.get_config() + model = keras.Model.from_config(config) + + self.assertAllEqual(model(args), expected) + self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) + + def test_getitem_slice_with_stop_and_ellipsis_only(self): + if not context.executing_eagerly(): + self.skipTest('Complex slicing like this fails in v1') + inp = keras.Input(shape=(4, 3, 8)) + slice_stop = keras.Input(shape=(), dtype='int32') + + out = inp[..., :slice_stop[0]] + model = keras.Model( + inputs=[inp, slice_stop], + outputs=out) + model.compile( + adam.Adam(0.001), + 'mse', + run_eagerly=testing_utils.should_run_eagerly()) + batch_size = 7 + stop = 6 + x = array_ops.stack([ + math_ops.range(8) for _ in range(batch_size)]) + args = [x, constant_op.constant(stop, shape=(batch_size,))] + expected = array_ops.stack([ + math_ops.range(8)[:stop] for _ in range(batch_size)]) + + if keras_tensor.keras_tensors_enabled(): + self.assertIn('tf.__operators__.getitem', ( + x.name for x in model.layers)) + self.assertNotIn('tf.strided_slice', ( + x.name for x in model.layers)) + self.assertAllEqual(model(args), expected) + self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) + + # Make sure it can be successfully saved and loaded + config = model.get_config() + model = keras.Model.from_config(config) + + self.assertAllEqual(model(args), expected) + self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) + + def test_getitem_complex_slicing(self): + if not context.executing_eagerly(): + self.skipTest('Complex slicing like this fails in v1') + inp = keras.Input(shape=(4, 3, 8)) + first_dim = keras.Input(shape=(), dtype='int32') + slice_start = keras.Input(shape=(), dtype='int32') + slice_stop = keras.Input(shape=(), dtype='int32') + slice_stride = keras.Input(shape=(), dtype='int32') + + out = inp[..., first_dim[0], slice_start[0]:slice_stop[0]:slice_stride[0]] + model = keras.Model( + inputs=[inp, first_dim, slice_start, slice_stop, slice_stride], + outputs=out) + model.compile( + adam.Adam(0.001), + 'mse', + run_eagerly=testing_utils.should_run_eagerly()) + batch_size = 7 + start = 1 + stop = 6 + step = 2 + x = array_ops.stack([array_ops.stack([array_ops.stack([ + math_ops.range(8) + for _ in range(3)]) for _ in range(4)]) for _ in range(batch_size)]) + args = [x, + constant_op.constant(0, shape=(batch_size,)), + constant_op.constant(start, shape=(batch_size,)), + constant_op.constant(stop, shape=(batch_size,)), + constant_op.constant(step, shape=(batch_size,))] + # Slice the innermost dim. only grab one index from the second-to-innermost + # dim, removing that dim from the shape. + expected = array_ops.stack([array_ops.stack([ + math_ops.range(8)[start:stop:step] + for _ in range(4)]) for _ in range(batch_size)]) + + if keras_tensor.keras_tensors_enabled(): + self.assertIn('tf.__operators__.getitem', ( + x.name for x in model.layers)) + self.assertNotIn('tf.strided_slice', ( + x.name for x in model.layers)) + self.assertAllEqual(model(args), expected) + self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) + + # Make sure it can be successfully saved and loaded + config = model.get_config() + model = keras.Model.from_config(config) + + self.assertAllEqual(model(args), expected) + self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) + def test_numerical_correctness_simple(self): x = ops.convert_to_tensor_v2([[-1., 0., -2., 1.]]) inputs = keras.Input(shape=(4,)) diff --git a/tensorflow/python/keras/saving/saved_model/json_utils.py b/tensorflow/python/keras/saving/saved_model/json_utils.py index 4e4b671697a..d06e4180564 100644 --- a/tensorflow/python/keras/saving/saved_model/json_utils.py +++ b/tensorflow/python/keras/saving/saved_model/json_utils.py @@ -70,11 +70,14 @@ def decode(json_string): def _decode_helper(obj): + """A decoding helper that is TF-object aware.""" if isinstance(obj, dict) and 'class_name' in obj: if obj['class_name'] == 'TensorShape': return tensor_shape.TensorShape(obj['items']) elif obj['class_name'] == '__tuple__': return tuple(_decode_helper(i) for i in obj['items']) + elif obj['class_name'] == '__ellipsis__': + return Ellipsis return obj @@ -122,6 +125,9 @@ def get_json_type(obj): if isinstance(obj, collections_abc.Mapping): return dict(obj) + if obj is Ellipsis: + return {'class_name': '__ellipsis__'} + if isinstance(obj, wrapt.ObjectProxy): return obj.__wrapped__ diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 8e9bc1ef4d3..e9f32dec6b8 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -955,6 +955,8 @@ def _slice_helper(tensor, slice_spec, var=None): TypeError: If the slice indices aren't int, slice, ellipsis, tf.newaxis or scalar int32/int64 tensors. """ + tensor = ops.convert_to_tensor(tensor) + if isinstance(slice_spec, bool) or \ (isinstance(slice_spec, ops.Tensor) and slice_spec.dtype == dtypes.bool) or \ (isinstance(slice_spec, np.ndarray) and slice_spec.dtype == bool): diff --git a/tensorflow/python/util/dispatch_test.py b/tensorflow/python/util/dispatch_test.py index cc4fed0abb7..2b3946ce9f7 100644 --- a/tensorflow/python/util/dispatch_test.py +++ b/tensorflow/python/util/dispatch_test.py @@ -20,6 +20,7 @@ from __future__ import print_function from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.proto_ops import decode_proto @@ -28,6 +29,7 @@ from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging from tensorflow.python.util import deprecation from tensorflow.python.util import dispatch +from tensorflow.python.util import nest from tensorflow.python.util.tf_export import get_canonical_name_for_symbol from tensorflow.python.util.tf_export import tf_export @@ -68,10 +70,38 @@ class TensorTracer(object): ["{}={}".format(name, x) for (name, x) in self.kwargs.items()]) return "{}({})".format(self.name, ", ".join(args)) + @classmethod + def _overload_all_operators(cls): # pylint: disable=invalid-name + """Register overloads for all operators.""" + for operator in ops.Tensor.OVERLOADABLE_OPERATORS: + cls._overload_operator(operator) + + @classmethod + def _overload_operator(cls, operator): # pylint: disable=invalid-name + """Overload an operator with the same overloading as `ops.Tensor`.""" + tensor_oper = getattr(ops.Tensor, operator) + + # Compatibility with Python 2: + # Python 2 unbound methods have type checks for the first arg, + # so we need to extract the underlying function + tensor_oper = getattr(tensor_oper, "__func__", tensor_oper) + setattr(cls, operator, tensor_oper) + +TensorTracer._overload_all_operators() # pylint: disable=protected-access + class TensorTracerOpDispatcher(dispatch.GlobalOpDispatcher): """Global op dispatcher for TensorTracer.""" + def _flatten_with_slice_flattening(self, x): + flat = [] + for val in nest.flatten(x): + if isinstance(val, slice): + flat.extend((val.start, val.stop, val.step)) + else: + flat.append(val) + return flat + def handle(self, op, args, kwargs): # Dispatcher only applies if at least one arg is a TensorTracer. if not (any(self.is_tensor_tracer_arg(x) for x in args) or @@ -82,11 +112,8 @@ class TensorTracerOpDispatcher(dispatch.GlobalOpDispatcher): return TensorTracer(symbol_name, args, kwargs) def is_tensor_tracer_arg(self, value): - if isinstance(value, TensorTracer): - return True - if isinstance(value, (list, tuple)): - if any(isinstance(x, TensorTracer) for x in value): - return True + return any(isinstance(x, TensorTracer) for x in + self._flatten_with_slice_flattening(value)) @test_util.run_all_in_graph_and_eager_modes @@ -214,5 +241,46 @@ class DispatchTest(test_util.TensorFlowTestCase): # Clean up. dispatch._GLOBAL_DISPATCHERS = original_global_dispatchers + def testGlobalDispatcherGetItem(self): + original_global_dispatchers = dispatch._GLOBAL_DISPATCHERS + try: + TensorTracerOpDispatcher().register() + + x = TensorTracer("x") + trace = x[0] + self.assertEqual( + str(trace), + "__operators__.getitem(x, 0)") + + x = TensorTracer("x") + y = TensorTracer("y") + trace = x[y] + self.assertEqual( + str(trace), + "__operators__.getitem(x, y)") + + x = TensorTracer("x") + y = TensorTracer("y") + trace = x[:y] # pylint: disable=invalid-slice-index + self.assertEqual( + str(trace), + "__operators__.getitem(x, slice(None, y, None))") + + x = array_ops.ones(shape=(3, 3)) + y = TensorTracer("y") + trace = x[y] + self.assertEqual( + str(trace), + "__operators__.getitem(%s, y)" % x) + + trace = x[:y] # pylint: disable=invalid-slice-index + self.assertEqual( + str(trace), + "__operators__.getitem(%s, slice(None, y, None))" % x) + + finally: + # Clean up. + dispatch._GLOBAL_DISPATCHERS = original_global_dispatchers + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/python/util/serialization.py b/tensorflow/python/util/serialization.py index 3b1713b4c61..e35d5ff5d5d 100644 --- a/tensorflow/python/util/serialization.py +++ b/tensorflow/python/util/serialization.py @@ -70,6 +70,9 @@ def get_json_type(obj): if isinstance(obj, collections_abc.Mapping): return dict(obj) + if obj is Ellipsis: + return {'class_name': '__ellipsis__'} + if isinstance(obj, wrapt.ObjectProxy): return obj.__wrapped__ From a0c12335d3f2ecc114cc756a69656032283d144d Mon Sep 17 00:00:00 2001 From: Meghna Natraj Date: Wed, 22 Jul 2020 14:40:56 -0700 Subject: [PATCH 1081/2522] Support integer input and output type for Quantize-Aware Trained models PiperOrigin-RevId: 322658564 Change-Id: I388d625fe22df0099dc2ed5a5e87db30a4a9d647 --- RELEASE.md | 4 +- tensorflow/lite/python/BUILD | 15 ++ tensorflow/lite/python/lite.py | 29 ++- tensorflow/lite/python/lite_v2_test.py | 45 ++--- tensorflow/lite/python/util.py | 265 +++++++++++++++++++++++++ tensorflow/lite/python/util_test.py | 163 +++++++++++++++ 6 files changed, 490 insertions(+), 31 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 12b5168954b..7895a0ba113 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -56,6 +56,8 @@ * `tf.lite`: * Better support for ops with high-dimensional broadcasting inputs by adding `BroadcastTo` ops when necessary. + * `TFLiteConverter`: + * Support optional flags `inference_input_type` and `inference_output_type` for full integer quantized models. This allows users to modify the model input and output type to integer types (tf.int8, tf.uint8) instead of defaulting to float type (tf.float32). * `tf.random`: * * Math and Linear Algebra: @@ -68,7 +70,7 @@ * * Other: * We have replaced uses of "whitelist" and "blacklist" with "allowlist" - and "denylist" where possible. Please see + and "denylist" where possible. Please see https://developers.google.com/style/word-list#blacklist for more context. * diff --git a/tensorflow/lite/python/BUILD b/tensorflow/lite/python/BUILD index e26000c810a..55a2a69675d 100644 --- a/tensorflow/lite/python/BUILD +++ b/tensorflow/lite/python/BUILD @@ -212,8 +212,11 @@ py_library( deps = [ ":lite_constants", ":op_hint", + ":schema_py", "//tensorflow/python:tf_optimizer", "//tensorflow/python/eager:wrap_function", + "@absl_py//absl/logging", + "@flatbuffers//:runtime_py", "@six_archive//:six", ], ) @@ -224,12 +227,24 @@ py_test( python_version = "PY3", srcs_version = "PY2AND3", tags = [ + "no_mac", "no_windows", ], deps = [ + ":lite_constants", ":util", + "//tensorflow:tensorflow_py", + "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:convert_to_constants", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:session", + "//third_party/py/numpy", + "@absl_py//absl/testing:parameterized", "@six_archive//:six", ], ) diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index e919aa4b00f..a08b40bbed6 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -61,6 +61,7 @@ from tensorflow.lite.python.util import get_grappler_config as _get_grappler_con from tensorflow.lite.python.util import get_tensor_name as _get_tensor_name from tensorflow.lite.python.util import get_tensors_from_tensor_names as _get_tensors_from_tensor_names from tensorflow.lite.python.util import is_frozen_graph as _is_frozen_graph +from tensorflow.lite.python.util import modify_integer_quantized_model_io_type as _modify_integer_quantized_model_io_type from tensorflow.lite.python.util import run_graph_optimizations as _run_graph_optimizations from tensorflow.lite.python.util import set_tensor_shapes as _set_tensor_shapes from tensorflow.python import keras as _keras @@ -314,6 +315,23 @@ class QuantizationMode(object): else: return False, None + def flags_modify_model_io_type( + self, input_type=constants.FLOAT, output_type=constants.FLOAT): + """Flags for modifying the input and output type of a tflite model.""" + is_post_training_quantize = self.quantizer_flags(input_type, output_type)[0] + is_training_time_only_quantize = self.training_time_int8_allow_float() and \ + not is_post_training_quantize + + # TODO(b/153576658): Consolidate post/during training quantization workflows + # to modify model input/output type after MLIR conversion. + if is_training_time_only_quantize: + return { + "inference_input_type": input_type, + "inference_output_type": output_type, + } + else: + return None + # Below are helpers for the above functions. def _validate_int8_required(self): @@ -557,9 +575,8 @@ class TFLiteConverterBaseV2(TFLiteConverterBase): def _validate_inference_input_output_types(self, quant_mode): """Validate inference_input_type and inference_output_type flags.""" default_types = [constants.FLOAT, None] - # We only support integer types for post training integer quantization - # as we have statistical information to quantize the input and output. - if quant_mode.is_post_training_integer_quantize(): + # We support integer input/output for integer quantized models only. + if quant_mode.training_time_int8_allow_float(): all_types = default_types + [constants.INT8, constants.QUANTIZED_UINT8] if self.inference_input_type not in all_types or \ self.inference_output_type not in all_types: @@ -643,6 +660,12 @@ class TFLiteConverterBaseV2(TFLiteConverterBase): if calibrate_and_quantize: result = self._calibrate_quantize_model(result, **flags) + flags_modify_model_io_type = quant_mode.flags_modify_model_io_type( + self.inference_input_type, self.inference_output_type) + if flags_modify_model_io_type: + result = _modify_integer_quantized_model_io_type( + result, **flags_modify_model_io_type) + if self._experimental_sparsify_model: result = _mlir_sparsify(result) diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py index 6fab4fd6086..4093a9d5bb4 100644 --- a/tensorflow/lite/python/lite_v2_test.py +++ b/tensorflow/lite/python/lite_v2_test.py @@ -374,8 +374,12 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): return tf.keras.Sequential(QLinear(3, input_shape=(2,))) + @parameterized.named_parameters( + ('_DefaultFLOAT32InputOutput', lite.constants.FLOAT), + ('_INT8InputOutput', lite.constants.INT8), + ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8)) @test_util.run_v2_only - def testTrainingTimeQuantization(self): + def testTrainingTimeQuantization(self, inference_input_output_type): model = self._getTrainingTimeQuantizedModel() float_converter = lite.TFLiteConverterV2.from_keras_model(model) @@ -384,37 +388,24 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): quantized_converter = lite.TFLiteConverterV2.from_keras_model(model) quantized_converter.optimizations = [lite.Optimize.DEFAULT] + quantized_converter.inference_input_type = inference_input_output_type + quantized_converter.inference_output_type = inference_input_output_type quantized_tflite = quantized_converter.convert() self.assertTrue(quantized_tflite) - # Ensure that the quantized weights tflite model is smaller. - self.assertLess(len(quantized_tflite), len(float_tflite)) - interpreter = Interpreter(model_content=quantized_tflite) - self.assertEqual(np.float32, interpreter.get_input_details()[0]['dtype']) + interpreter.allocate_tensors() + input_details = interpreter.get_input_details() + self.assertLen(input_details, 1) + self.assertEqual(inference_input_output_type.as_numpy_dtype, + input_details[0]['dtype']) + output_details = interpreter.get_output_details() + self.assertLen(output_details, 1) + self.assertEqual(inference_input_output_type.as_numpy_dtype, + output_details[0]['dtype']) - @parameterized.named_parameters( - ('_INT8InputOutput', lite.constants.INT8), - ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8)) - def testInvalidTrainingTimeQuantization(self, inference_input_output_type): - # We currently don't support integer inference_input_type and - # inference_output_type flags for training time quantization. - - model = self._getTrainingTimeQuantizedModel() - - converter = lite.TFLiteConverterV2.from_keras_model(model) - tflite_model = converter.convert() - self.assertTrue(tflite_model) - - quantized_converter = lite.TFLiteConverterV2.from_keras_model(model) - quantized_converter.optimizations = [lite.Optimize.DEFAULT] - with self.assertRaises(ValueError) as error: - quantized_converter.inference_input_type = inference_input_output_type - quantized_converter.inference_output_type = inference_input_output_type - quantized_converter.convert() - self.assertEqual( - 'The inference_input_type and inference_output_type ' - 'must be tf.float32.', str(error.exception)) + # Ensure that the quantized tflite model is smaller. + self.assertLess(len(quantized_tflite), len(float_tflite)) @test_util.run_v2_only def testNewQuantizer(self): diff --git a/tensorflow/lite/python/util.py b/tensorflow/lite/python/util.py index ff7caad0f88..9f84681c12b 100644 --- a/tensorflow/lite/python/util.py +++ b/tensorflow/lite/python/util.py @@ -19,15 +19,21 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import copy import datetime import sys +from absl import logging + import six from six.moves import range +from flatbuffers.python import flatbuffers from tensorflow.core.protobuf import config_pb2 as _config_pb2 from tensorflow.core.protobuf import graph_debug_info_pb2 from tensorflow.core.protobuf import meta_graph_pb2 as _meta_graph_pb2 +from tensorflow.lite.python import lite_constants as _lite_constants +from tensorflow.lite.python import schema_py_generated as _schema_fb from tensorflow.lite.python.op_hint import convert_op_hints_to_stubs from tensorflow.lite.python.op_hint import find_all_hinted_output_nodes from tensorflow.lite.toco import types_pb2 as _types_pb2 @@ -55,6 +61,25 @@ _MAP_TF_TO_TFLITE_TYPES = { dtypes.bool: _types_pb2.BOOL, } +_MAP_TFLITE_ENUM_TO_TF_TYPES = { + 0: dtypes.float32, + 1: dtypes.float16, + 2: dtypes.int32, + 3: dtypes.uint8, + 4: dtypes.int64, + 5: dtypes.string, + 6: dtypes.bool, + 7: dtypes.int16, + 8: dtypes.complex64, + 9: dtypes.int8, + 10: dtypes.float64, +} + +_TFLITE_FILE_IDENTIFIER = b"TFL3" + +_TFLITE_MODEL_INPUT_OUTPUT_TYPES = (_lite_constants.FLOAT, _lite_constants.INT8, + _lite_constants.QUANTIZED_UINT8) + def convert_dtype_to_tflite_type(tf_dtype): """Converts tf.dtype to TFLite proto type. @@ -74,6 +99,31 @@ def convert_dtype_to_tflite_type(tf_dtype): return result +def _convert_tflite_enum_type_to_tf_type(tflite_enum_type): + """Converts tflite enum type (eg: 0) to tf type (eg: tf.float32). + + Args: + tflite_enum_type: tflite enum type (eg: 0, that corresponds to float32) + + Raises: + ValueError: If an invalid tflite enum type is provided. + + Returns: + tf type (eg: tf.float32) + """ + tf_type = _MAP_TFLITE_ENUM_TO_TF_TYPES.get(tflite_enum_type) + if tf_type is None: + raise ValueError( + "Unsupported enum {}. The valid map of enum to tf.dtypes is : {}" + .format(tflite_enum_type, _MAP_TFLITE_ENUM_TO_TF_TYPES)) + return tf_type + + +def _get_dtype_name(tf_type): + """Converts tf.dtype (eg: tf.float32) to str (eg: "tf.float32").""" + return "tf." + tf_type.name + + def get_tensor_name(tensor): """Returns name of the input tensor. @@ -514,3 +564,218 @@ extern const int {array_name}_len; license_text=license_text) return source_text, header_text + + +def _convert_model_from_bytearray_to_object(model_bytearray): + """Converts a tflite model from a bytearray into a parsable object.""" + model_object = _schema_fb.Model.GetRootAsModel(model_bytearray, 0) + model_object = _schema_fb.ModelT.InitFromObj(model_object) + model_object = copy.deepcopy(model_object) + model_object.subgraphs[0].inputs[0] = model_object.subgraphs[0].inputs[0] + return model_object + + +def _convert_model_from_object_to_bytearray(model_object): + """Converts a tflite model from a parsable object into a bytearray.""" + # Initial size of the buffer, which will grow automatically if needed + builder = flatbuffers.Builder(1024) + model_offset = model_object.Pack(builder) + builder.Finish(model_offset, file_identifier=_TFLITE_FILE_IDENTIFIER) + return bytes(builder.Output()) + + +def _remove_tensors_from_model(model, remove_tensors_idxs): + """Remove tensors from model.""" + if not remove_tensors_idxs: + return + if len(model.subgraphs) > 1: + raise ValueError("Model must only have one subgraph. Instead, it has " + "{} subgraphs.".format(len(model.subgraphs))) + subgraph = model.subgraphs[0] + tensors = subgraph.tensors + operators = subgraph.operators + + logging.debug("Removing tensors at indices : %s", remove_tensors_idxs) + # An optimized check to validate if "remove_tensors_idxs" (eg: [4,5,6]) is an + # exact subset, with ordering, of "tensors" indices (eg: [0,1,2,3,4,5,6]). + if min(remove_tensors_idxs) == len(tensors) - len(remove_tensors_idxs): + logging.debug("Removing tensors only at the end of the tensor list") + del tensors[min(remove_tensors_idxs):] + else: + logging.debug("Removing tensors requires updating the model") + # Map the old tensor indices to new tensor indices + d_old_to_new_tensors = {} + left_shift_by = 0 + for idx in range(len(tensors)): + if idx in remove_tensors_idxs: + left_shift_by += 1 + else: + d_old_to_new_tensors[idx] = idx - left_shift_by + logging.debug("Old to new tensors map: %s", d_old_to_new_tensors.__str__()) + # Update tensor indices referenced throughout the model + def update_tensors(tensor_idxs): + for i, ti in enumerate(tensor_idxs): + tensor_idxs[i] = d_old_to_new_tensors.get(ti, -1) + update_tensors(subgraph.inputs) + update_tensors(subgraph.outputs) + for op in operators: + update_tensors(op.inputs) + update_tensors(op.outputs) + # Delete the tensors + for idx in sorted(remove_tensors_idxs, reverse=True): + tensors.pop(idx) + logging.debug("Removed tensors marked for deletion") + + +def _validate_and_find_int8_quantized_inputs_outputs(model): + """Validate that model input is quantized and output is dequantized.""" + if len(model.subgraphs) > 1: + raise ValueError("Model must only have one subgraph. Instead, it has " + "{} subgraphs.".format(len(model.subgraphs))) + subgraph = model.subgraphs[0] + tensors = subgraph.tensors + operators = subgraph.operators + + # Ensure model has atleast one quantize and dequantize operator + quant_opcode_idx, dequant_opcode_idx = None, None + for idx, opcode in enumerate(model.operatorCodes): + if opcode.builtinCode == _schema_fb.BuiltinOperator.QUANTIZE: + quant_opcode_idx = idx + elif opcode.builtinCode == _schema_fb.BuiltinOperator.DEQUANTIZE: + dequant_opcode_idx = idx + if quant_opcode_idx is not None and dequant_opcode_idx is not None: + break + if quant_opcode_idx is None and dequant_opcode_idx is None: + raise ValueError("Model is not integer quantized as it does not " + "contain quantize/dequantize operators.") + + # Ensure model inputs and outputs are integer quantized + input_quant_ops, output_dequant_ops = [], [] + for op in operators: + # Find input quantize operator + if op.opcodeIndex == quant_opcode_idx and op.inputs[0] in subgraph.inputs: + pos, float_tensor, int_tensor = \ + "input", tensors[op.inputs[0]], tensors[op.outputs[0]] + input_quant_ops.append(op) + # Find output dequantize operator + elif op.opcodeIndex == dequant_opcode_idx and \ + op.outputs[0] in subgraph.outputs: + pos, float_tensor, int_tensor = \ + "output", tensors[op.outputs[0]], tensors[op.inputs[0]] + output_dequant_ops.append(op) + # Otherwise, ignore + else: + continue + # If found, validate the input/output tensor type + if float_tensor.type != _schema_fb.TensorType.FLOAT32: + raise ValueError( + "Model {} type must be tf.float32. Expected type for tensor with " + "name '{}' is tf.float32, instead type is tf.{}".format( + pos, float_tensor.name, + _convert_tflite_enum_type_to_tf_type(float_tensor.type).name)) + if int_tensor.type != _schema_fb.TensorType.INT8: + raise ValueError( + "Model is not integer quantized. Expected type for tensor with " + "name '{}' is tf.int8, instead type is tf.{}".format( + int_tensor.name, + _convert_tflite_enum_type_to_tf_type(int_tensor.type).name)) + + return input_quant_ops, output_dequant_ops + + +def modify_integer_quantized_model_io_type( + model, inference_input_type=_lite_constants.FLOAT, + inference_output_type=_lite_constants.FLOAT): + """Modify the float input/output type of an integer quantized model. + + Args: + model: An int8 quantized tflite model with float input and output. + inference_input_type: tf.DType representing final input type. + (default tf.float32) + inference_output_type: tf.DType representing final output type. + (default tf.float32) + + Returns: + An int8 quantized tflite model with modified input and/or output type. + + Raises: + ValueError: If the model is not int8 quantized or the inference_input_type + and/or inference_input_type is unsupported. + RuntimeError: If the modification was unsuccessful. + + """ + # Return if input and output types default to float + if inference_input_type == _lite_constants.FLOAT and \ + inference_output_type == _lite_constants.FLOAT: + return model + + # Validate input and output types + if inference_input_type not in _TFLITE_MODEL_INPUT_OUTPUT_TYPES: + raise ValueError("The `inference_input_type` should be in {}".format( + tuple(_get_dtype_name(t) for t in _TFLITE_MODEL_INPUT_OUTPUT_TYPES))) + if inference_output_type not in _TFLITE_MODEL_INPUT_OUTPUT_TYPES: + raise ValueError("The `inference_output_type` should be in {}".format( + tuple(_get_dtype_name(t) for t in _TFLITE_MODEL_INPUT_OUTPUT_TYPES))) + + logging.debug(("Attempting to modify the model input from tf.float32 to %s " + "and output from tf.float32 to %s"), + _get_dtype_name(inference_input_type), + _get_dtype_name(inference_output_type)) + # Convert the model to an object + model = _convert_model_from_bytearray_to_object(model) + + # Validate the integer quantized model + input_quant_ops, output_dequant_ops = \ + _validate_and_find_int8_quantized_inputs_outputs(model) + + # Initialize references and variables + if len(model.subgraphs) > 1: + raise ValueError("Model must only have one subgraph. Instead, it has " + "{} subgraphs.".format(len(model.subgraphs))) + subgraph = model.subgraphs[0] + tensors = subgraph.tensors + operators = subgraph.operators + remove_tensors_idxs = set() + + # Modify model input type + if inference_input_type == _lite_constants.QUANTIZED_UINT8: + # Change quant op (float to int8) to quant op (uint8 to int8) + for op in input_quant_ops: + int8_quantization = tensors[op.outputs[0]].quantization + uint8_quantization = _schema_fb.QuantizationParametersT() + uint8_quantization.scale = [int8_quantization.scale[0]] + uint8_quantization.zeroPoint = [int8_quantization.zeroPoint[0] + 128] + tensors[op.inputs[0]].quantization = uint8_quantization + tensors[op.inputs[0]].type = _schema_fb.TensorType.UINT8 + elif inference_input_type == _lite_constants.INT8: + # Remove the inputs and the quant operator + for op in input_quant_ops: + subgraph.inputs[subgraph.inputs == op.inputs[0]] = op.outputs[0] + remove_tensors_idxs.add(op.inputs[0]) + operators.remove(op) + + # Modify model output type + if inference_output_type == _lite_constants.QUANTIZED_UINT8: + # Change dequant op (int8 to float) to quant op (int8 to uint8) + for op in output_dequant_ops: + op.opcodeIndex = input_quant_ops[0].opcodeIndex + int8_quantization = tensors[op.inputs[0]].quantization + uint8_quantization = _schema_fb.QuantizationParametersT() + uint8_quantization.scale = [int8_quantization.scale[0]] + uint8_quantization.zeroPoint = [int8_quantization.zeroPoint[0] + 128] + tensors[op.outputs[0]].quantization = uint8_quantization + tensors[op.outputs[0]].type = _schema_fb.TensorType.UINT8 + elif inference_output_type == _lite_constants.INT8: + # Remove the outputs and the dequant operator + for op in output_dequant_ops: + subgraph.outputs[subgraph.outputs == op.outputs[0]] = op.inputs[0] + remove_tensors_idxs.add(op.outputs[0]) + operators.remove(op) + + # Remove tensors marked for deletion. + _remove_tensors_from_model(model, remove_tensors_idxs) + + # Convert the model to a bytearray + model = _convert_model_from_object_to_bytearray(model) + + return model diff --git a/tensorflow/lite/python/util_test.py b/tensorflow/lite/python/util_test.py index f3c287dd7fc..0e9cbc1e58a 100644 --- a/tensorflow/lite/python/util_test.py +++ b/tensorflow/lite/python/util_test.py @@ -19,7 +19,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from absl.testing import parameterized +import numpy as np from six.moves import range +import tensorflow as tf from tensorflow.lite.python import lite_constants from tensorflow.lite.python import util @@ -61,6 +64,31 @@ class UtilTest(test_util.TensorFlowTestCase): self.assertEqual( util.convert_dtype_to_tflite_type(dtypes.bool), _types_pb2.BOOL) + def testConvertEnumToDtype(self): + self.assertEqual( + util._convert_tflite_enum_type_to_tf_type(0), dtypes.float32) + self.assertEqual( + util._convert_tflite_enum_type_to_tf_type(1), dtypes.float16) + self.assertEqual(util._convert_tflite_enum_type_to_tf_type(2), dtypes.int32) + self.assertEqual(util._convert_tflite_enum_type_to_tf_type(3), dtypes.uint8) + self.assertEqual(util._convert_tflite_enum_type_to_tf_type(4), dtypes.int64) + self.assertEqual( + util._convert_tflite_enum_type_to_tf_type(5), dtypes.string) + self.assertEqual(util._convert_tflite_enum_type_to_tf_type(6), dtypes.bool) + self.assertEqual(util._convert_tflite_enum_type_to_tf_type(7), dtypes.int16) + self.assertEqual( + util._convert_tflite_enum_type_to_tf_type(8), dtypes.complex64) + self.assertEqual(util._convert_tflite_enum_type_to_tf_type(9), dtypes.int8) + self.assertEqual( + util._convert_tflite_enum_type_to_tf_type(10), dtypes.float64) + with self.assertRaises(ValueError) as error: + util._convert_tflite_enum_type_to_tf_type(11) + self.assertEqual( + "Unsupported enum 11. The valid map of enum to tf.dtypes is : " + "{0: tf.float32, 1: tf.float16, 2: tf.int32, 3: tf.uint8, 4: tf.int64, " + "5: tf.string, 6: tf.bool, 7: tf.int16, 8: tf.complex64, 9: tf.int8, " + "10: tf.float64}", str(error.exception)) + def testTensorName(self): with ops.Graph().as_default(): in_tensor = array_ops.placeholder(shape=[4], dtype=dtypes.float32) @@ -195,5 +223,140 @@ class TensorFunctionsTest(test_util.TensorFlowTestCase): self.assertEqual([None, 3, 5], tensor.shape.as_list()) +def _generate_integer_tflite_model(): + """Define an integer post-training quantized tflite model.""" + # Load MNIST dataset + n = 10 # Number of samples + (train_images, train_labels), (test_images, test_labels) = \ + tf.keras.datasets.mnist.load_data() + train_images, train_labels, test_images, test_labels = \ + train_images[:n], train_labels[:n], test_images[:n], test_labels[:n] + + # Normalize the input image so that each pixel value is between 0 to 1. + train_images = train_images / 255.0 + test_images = test_images / 255.0 + + # Define TF model + model = tf.keras.Sequential([ + tf.keras.layers.InputLayer(input_shape=(28, 28)), + tf.keras.layers.Reshape(target_shape=(28, 28, 1)), + tf.keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu"), + tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(10) + ]) + + # Train + model.compile( + optimizer="adam", + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=["accuracy"]) + + model.fit( + train_images, + train_labels, + epochs=1, + validation_split=0.1, + ) + + # Convert TF Model to an Integer Quantized TFLite Model + converter = tf.lite.TFLiteConverter.from_keras_model(model) + converter.optimizations = {tf.lite.Optimize.DEFAULT} + def representative_dataset_gen(): + for _ in range(2): + yield [ + np.random.uniform(low=0, high=1, size=(1, 28, 28)).astype( + np.float32) + ] + converter.representative_dataset = representative_dataset_gen + converter.target_spec.supported_ops = {tf.lite.OpsSet.TFLITE_BUILTINS_INT8} + tflite_model = converter.convert() + + return tflite_model + + +def _test_param_modify_integer_model_io_type(): + """Function to generate parameterized inputs for testing.""" + params = [] + str_template = "_{}{}{}" + map_model_type = { + "PostTraining": True, + # "DuringTraining": False, + } + map_types = { + "": lite_constants.FLOAT, + "INT8": lite_constants.INT8, + "UINT8": lite_constants.QUANTIZED_UINT8 + } + for k1, v1 in map_model_type.items(): + for k2, v2 in map_types.items(): + istr = "_Input{}".format(k2) if k2 else "" + for k3, v3 in map_types.items(): + ostr = "_Output{}".format(k3) if k3 else "" if istr else "_NoUpdate" + params.append((str_template.format(k1, istr, ostr), v1, v2, v3)) + return params + + +# TODO(b/161174063): Merge tests for integer input/output type +class UtilModifyIntegerQuantizedModelIOTypeTest( + test_util.TensorFlowTestCase, parameterized.TestCase): + + @classmethod + def setUpClass(cls): + super(UtilModifyIntegerQuantizedModelIOTypeTest, cls).setUpClass() + cls.post_train_integer_model = _generate_integer_tflite_model() + + @parameterized.named_parameters(_test_param_modify_integer_model_io_type()) + def test(self, is_post_train, in_tftype, out_tftype): + """Modify the float input/output type of an integer quantized model.""" + + def _run_tflite_inference(model, in_tftype, out_tftype): + """Run inference on a model with a specific input/output type.""" + # Load TFLite model and allocate tensors. + interpreter = tf.lite.Interpreter(model_content=model) + interpreter.allocate_tensors() + input_details = interpreter.get_input_details()[0] + output_details = interpreter.get_output_details()[0] + + # Validate TFLite model input and output types + self.assertEqual(input_details["dtype"], in_tftype.as_numpy_dtype) + self.assertEqual(output_details["dtype"], out_tftype.as_numpy_dtype) + + # Define Input + np.random.seed(0) + input_data = np.random.uniform(low=0, high=1, size=(1, 28, 28)) + input_data = input_data.astype(np.float32) + if input_details["dtype"] != np.float32: + # quantize float to int + scale, zero_point = input_details["quantization"] + input_data = input_data / scale + zero_point + input_data = input_data.astype(input_details["dtype"]) + + # Run Inference + interpreter.set_tensor(input_details["index"], input_data) + interpreter.invoke() + + # Get output + output_data = interpreter.get_tensor(output_details["index"])[0] + if output_details["dtype"] != np.float32: + # dequantize int to float + scale, zero_point = output_details["quantization"] + output_data = output_data.astype(np.float32) + output_data = (output_data - zero_point) * scale + + return output_data + + model = self.__class__.post_train_integer_model if is_post_train else None + # Run model inference with float input output type + output_data = _run_tflite_inference(model, tf.float32, tf.float32) + # Run model inference with modified integer input output type + model_io = util.modify_integer_quantized_model_io_type( + model, in_tftype, out_tftype) + output_io_data = _run_tflite_inference(model_io, in_tftype, out_tftype) + + # Validate that both the outputs are the same + self.assertTrue(np.allclose(output_data, output_io_data, atol=1.0)) + + if __name__ == "__main__": test.main() From be699e9701716c248d937108cd20cff5c1aa1331 Mon Sep 17 00:00:00 2001 From: Karim Nosir Date: Wed, 22 Jul 2020 14:48:34 -0700 Subject: [PATCH 1082/2522] Update hexagon NN to use v1.20.0 PiperOrigin-RevId: 322660166 Change-Id: Ia941abdda0f4d99b2be8d072a8da206d72c2661d --- tensorflow/lite/g3doc/performance/hexagon_delegate.md | 7 +++++-- third_party/hexagon/workspace.bzl | 6 +++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tensorflow/lite/g3doc/performance/hexagon_delegate.md b/tensorflow/lite/g3doc/performance/hexagon_delegate.md index 30d108d6990..309d3021f97 100644 --- a/tensorflow/lite/g3doc/performance/hexagon_delegate.md +++ b/tensorflow/lite/g3doc/performance/hexagon_delegate.md @@ -75,10 +75,11 @@ dependencies { * [v1.10.3](https://storage.cloud.google.com/download.tensorflow.org/tflite/hexagon_nn_skel_1_10_3_1.run) * [v1.14](https://storage.cloud.google.com/download.tensorflow.org/tflite/hexagon_nn_skel_v1.14.run) * [v1.17](https://storage.cloud.google.com/download.tensorflow.org/tflite/hexagon_nn_skel_v1.17.0.0.run) + * [v1.20](https://storage.cloud.google.com/download.tensorflow.org/tflite/hexagon_nn_skel_v1.20.0.0.run) Note: You will need to accept the license agreement. -Note: As of 04/28/2020 you should use v1.17. +Note: As of 07/22/2020 you should use v1.20. Note: You must use the hexagon_nn libraries with the compatible version of interface library. Interface library is part of the AAR and fetched by bazel @@ -180,10 +181,12 @@ dependencies { “libhexagon_nn_skel_v66.so” * [v1.10.3](https://storage.cloud.google.com/download.tensorflow.org/tflite/hexagon_nn_skel_1_10_3_1.run) * [v1.14](https://storage.cloud.google.com/download.tensorflow.org/tflite/hexagon_nn_skel_v1.14.run) + * [v1.17](https://storage.cloud.google.com/download.tensorflow.org/tflite/hexagon_nn_skel_v1.17.0.0.run) + * [v1.20](https://storage.cloud.google.com/download.tensorflow.org/tflite/hexagon_nn_skel_v1.20.0.0.run) Note: You will need to accept the license agreement. -Note: As of 03/03/2020 you should use v1.14. +Note: As of 07/22/2020 you should use v1.20. Note: You must use the hexagon_nn libraries with the compatible version of interface library. Interface library is part of the AAR and fetched by bazel diff --git a/third_party/hexagon/workspace.bzl b/third_party/hexagon/workspace.bzl index 1a682f0e8ad..a22e2dbe87e 100644 --- a/third_party/hexagon/workspace.bzl +++ b/third_party/hexagon/workspace.bzl @@ -2,14 +2,14 @@ load("//third_party:repo.bzl", "third_party_http_archive") -# Note: Use libhexagon_nn_skel version 1.17 Only with the current version. +# Note: Use libhexagon_nn_skel version 1.20 Only with the current version. # This comment will be updated with compatible version. def repo(): third_party_http_archive( name = "hexagon_nn", - sha256 = "a0c011f7795e1a09eb7355be295d6442718b8565cc0e3c58a91671dde2bc99fb", + sha256 = "2b0e29a061f389ad52054c12fcae38991b5f731d7a05770c7ac421433ed17cc2", urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/storage.cloud.google.com/download.tensorflow.org/tflite/hexagon_nn_headers_v1.17.0.0.tgz", + "https://storage.googleapis.com/mirror.tensorflow.org/storage.cloud.google.com/download.tensorflow.org/tflite/hexagon_nn_headers_v1.20.0.0.tgz", ], build_file = "//third_party/hexagon:BUILD", ) From cc4adc5c12df9c7cde95a1012410a5a51d169030 Mon Sep 17 00:00:00 2001 From: Vignesh Kothapalli Date: Thu, 23 Jul 2020 03:33:51 +0530 Subject: [PATCH 1083/2522] updated test comment and addressed lint issue --- .../python/data/experimental/kernel_tests/unique_test.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tensorflow/python/data/experimental/kernel_tests/unique_test.py b/tensorflow/python/data/experimental/kernel_tests/unique_test.py index 04a33ea009d..107faf8f0b9 100644 --- a/tensorflow/python/data/experimental/kernel_tests/unique_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/unique_test.py @@ -51,8 +51,7 @@ class UniqueTest(test_base.DatasetTestBase, parameterized.TestCase): for test_case, expected in test_cases: current_test_case = test_case self.assertDatasetProduces(dataset, [ - compat.as_bytes( - element) if dtype == dtypes.string else element + compat.as_bytes(element) if dtype == dtypes.string else element for element in expected ]) @@ -83,8 +82,6 @@ class UniqueTest(test_base.DatasetTestBase, parameterized.TestCase): def testTypeMismatch(self): # raises InternalError when dtypes don't match. - # NOTE: Generating the following expected outputs can be considered/taken up as an - # enhancement in the experimental API. with self.assertRaises(errors.InternalError): self._testSimpleHelper(dtypes.string, [ (["hello", 1, 2, 1], ["hello"]), From 57511d5dcccba2b917de5920c7e3fec6eefa5841 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Wed, 22 Jul 2020 14:50:19 -0700 Subject: [PATCH 1084/2522] Added support of ELU activation for GPU backend. PiperOrigin-RevId: 322660529 Change-Id: Icb285d475197ea487839c76e84a70f0f9ba6a755 --- .../delegates/gpu/cl/kernels/elementwise.cc | 6 +++++ .../gpu/cl/kernels/elementwise_test.cc | 26 +++++++++++++++++++ .../gpu/cl/selectors/operation_selector.cc | 1 + .../delegates/gpu/common/model_builder.cc | 4 +++ .../lite/delegates/gpu/common/operations.cc | 3 +++ .../lite/delegates/gpu/common/operations.h | 1 + .../delegates/gpu/gl/kernels/elementwise.cc | 9 +++++++ .../gpu/gl/kernels/elementwise_test.cc | 15 +++++++++++ .../lite/delegates/gpu/gl/kernels/registry.cc | 1 + tensorflow/lite/delegates/gpu/metal/api.cc | 1 + .../gpu/metal/kernels/elementwise.cc | 5 ++++ .../gpu/metal/kernels/elementwise_test.mm | 15 +++++++++++ 12 files changed, 87 insertions(+) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc index 47d7dababeb..babf564039b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc @@ -37,6 +37,12 @@ std::string GetOneInputCode(const OperationType& op_type, case OperationType::COS: result = "$0 = cos($0);\n"; break; + case OperationType::ELU: + result = "$0.x = $0.x < (FLT)(0.0f) ? exp($0.x) - (FLT)(1.0f) : $0.x;\n"; + result += "$0.y = $0.y < (FLT)(0.0f) ? exp($0.y) - (FLT)(1.0f) : $0.y;\n"; + result += "$0.z = $0.z < (FLT)(0.0f) ? exp($0.z) - (FLT)(1.0f) : $0.z;\n"; + result += "$0.w = $0.w < (FLT)(0.0f) ? exp($0.w) - (FLT)(1.0f) : $0.w;\n"; + break; case OperationType::EXP: result = "$0 = exp($0);\n"; break; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc index 7c3bdbe66e7..ea7e1fa0541 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc @@ -81,6 +81,32 @@ TEST_F(OpenCLOperationTest, Cos) { } } +TEST_F(OpenCLOperationTest, Elu) { + TensorFloat32 src_tensor; + src_tensor.shape = BHWC(1, 1, 1, 7); + src_tensor.data = {0.0f, 1.0f, -1.0f, 100.0f, -100.0f, 0.01f, -0.01f}; + + for (auto storage : env_.GetSupportedStorages()) { + for (auto precision : env_.GetSupportedPrecisions()) { + const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f; + OperationDef op_def; + op_def.precision = precision; + auto data_type = DeduceDataTypeFromPrecision(precision); + op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); + op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); + TensorFloat32 dst_tensor; + ElementwiseOneInput operation = + CreateElementwiseOneInput(op_def, OperationType::ELU); + ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, + BHWC(1, 1, 1, 7), &dst_tensor)); + EXPECT_THAT(dst_tensor.data, + Pointwise(FloatNear(eps), {0.0f, 1.0f, std::exp(-1.0f) - 1.0f, + 100.0f, std::exp(-100.0f) - 1.0f, + 0.01f, std::exp(-0.01f) - 1.0f})); + } + } +} + TEST_F(OpenCLOperationTest, Exp) { TensorFloat32 src_tensor; src_tensor.shape = BHWC(1, 1, 1, 7); diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc index 3aefbb8d652..d16eb982b35 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc @@ -383,6 +383,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, } case OperationType::ABS: case OperationType::COS: + case OperationType::ELU: case OperationType::EXP: case OperationType::HARD_SWISH: case OperationType::LOG: diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.cc b/tensorflow/lite/delegates/gpu/common/model_builder.cc index b62eb12aa7a..e789f1ec980 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc @@ -2641,6 +2641,10 @@ std::unique_ptr NewOperationParser( break; case kTfLiteBuiltinDiv: return std::make_unique(OperationType::DIV); + case kTfLiteBuiltinElu: + return std::make_unique(OperationType::ELU); + case kTfLiteBuiltinExp: + return std::make_unique(OperationType::EXP); case kTfLiteBuiltinFullyConnected: return std::make_unique(); case kTfLiteBuiltinHardSwish: diff --git a/tensorflow/lite/delegates/gpu/common/operations.cc b/tensorflow/lite/delegates/gpu/common/operations.cc index c3861ca2baa..33e7844bbe9 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.cc +++ b/tensorflow/lite/delegates/gpu/common/operations.cc @@ -90,6 +90,8 @@ std::string ToString(enum OperationType op) { return "depthwise_convolution"; case OperationType::DIV: return "div"; + case OperationType::ELU: + return "elu"; case OperationType::EXP: return "exp"; case OperationType::FULLY_CONNECTED: @@ -171,6 +173,7 @@ OperationType OperationTypeFromString(const std::string& name) { {"cos", OperationType::COS}, {"depthwise_convolution", OperationType::DEPTHWISE_CONVOLUTION}, {"div", OperationType::DIV}, + {"elu", OperationType::ELU}, {"exp", OperationType::EXP}, {"fully_connected", OperationType::FULLY_CONNECTED}, {"hard_swish", OperationType::HARD_SWISH}, diff --git a/tensorflow/lite/delegates/gpu/common/operations.h b/tensorflow/lite/delegates/gpu/common/operations.h index 3ad54dd0118..ec9a78ae747 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.h +++ b/tensorflow/lite/delegates/gpu/common/operations.h @@ -43,6 +43,7 @@ enum class OperationType { COS, DEPTHWISE_CONVOLUTION, DIV, + ELU, EXP, FULLY_CONNECTED, HARD_SWISH, diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc index b5971c59667..876d67db351 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc @@ -41,6 +41,14 @@ class ElementwiseOneArgument : public NodeShader { case OperationType::COS: source = "value_0 = cos(value_0);"; break; + case OperationType::ELU: + source = R"( + value_0.x = value_0.x < 0.0 ? exp(value_0.x) - 1.0 : value_0.x; + value_0.y = value_0.y < 0.0 ? exp(value_0.y) - 1.0 : value_0.y; + value_0.z = value_0.z < 0.0 ? exp(value_0.z) - 1.0 : value_0.z; + value_0.w = value_0.w < 0.0 ? exp(value_0.w) - 1.0 : value_0.w; + )"; + break; case OperationType::EXP: source = "value_0 = exp(value_0);"; break; @@ -212,6 +220,7 @@ std::unique_ptr NewElementwiseNodeShader( switch (operation_type) { case OperationType::ABS: case OperationType::COS: + case OperationType::ELU: case OperationType::EXP: case OperationType::LOG: case OperationType::HARD_SWISH: diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise_test.cc b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise_test.cc index 625a09eebf4..60fd9174f90 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise_test.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise_test.cc @@ -60,6 +60,21 @@ TEST(ElementwiseOneArgumentTest, Cos) { Pointwise(FloatNear(1e-6), {1.0, -1.0, -1.0, 0.540302})); } +TEST(ElementwiseOneArgumentTest, Elu) { + OperationType op_type = OperationType::ELU; + const BHWC shape(1, 1, 1, 7); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); + ASSERT_TRUE(model.PopulateTensor( + 0, {0.0f, 1.0f, -1.0f, 100.0f, -100.0f, 0.01f, -0.01f})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), + Pointwise(FloatNear(1e-6), {0.0f, 1.0f, std::exp(-1.0f) - 1.0f, + 100.0f, std::exp(-100.0f) - 1.0f, + 0.01f, std::exp(-0.01f) - 1.0f})); +} + TEST(ElementwiseOneArgumentTest, Exp) { OperationType op_type = OperationType::EXP; const BHWC shape(1, 1, 1, 7); diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc b/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc index 913eebdabbe..b4bfbcd8f56 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc @@ -97,6 +97,7 @@ class Registry : public NodeShader { insert_elementwise_op(Type::ABS); insert_elementwise_op(Type::COS); insert_elementwise_op(Type::DIV); + insert_elementwise_op(Type::ELU); insert_elementwise_op(Type::EXP); insert_elementwise_op(Type::HARD_SWISH); insert_elementwise_op(Type::LOG); diff --git a/tensorflow/lite/delegates/gpu/metal/api.cc b/tensorflow/lite/delegates/gpu/metal/api.cc index 6a54e4e26bd..cbaa00246af 100644 --- a/tensorflow/lite/delegates/gpu/metal/api.cc +++ b/tensorflow/lite/delegates/gpu/metal/api.cc @@ -365,6 +365,7 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node, break; case OperationType::ABS: case OperationType::COS: + case OperationType::ELU: case OperationType::EXP: case OperationType::HARD_SWISH: case OperationType::LOG: diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/elementwise.cc b/tensorflow/lite/delegates/gpu/metal/kernels/elementwise.cc index 963bc1cd5ab..3b07b42afb4 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/elementwise.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/elementwise.cc @@ -38,6 +38,11 @@ std::string OneInputFunctor(OperationType op_type, const std::string& value) { {OperationType::HARD_SWISH, "$0 * clamp($0 / 6.0f + FLT4(0.5f), FLT4(0.0f), FLT4(1.0f))"}, {OperationType::COS, "cos($0)"}, + {OperationType::ELU, + "FLT4($0.x < FLT(0.0f) ? exp($0.x) - FLT(1.0f) : $0.x," + "$0.y < FLT(0.0f) ? exp($0.y) - FLT(1.0f) : $0.y," + "$0.z < FLT(0.0f) ? exp($0.z) - FLT(1.0f) : $0.z," + "$0.w < FLT(0.0f) ? exp($0.w) - FLT(1.0f) : $0.w)"}, {OperationType::EXP, "exp($0)"}, {OperationType::LOG, "log($0)"}, {OperationType::SQRT, "sqrt($0)"}, diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/elementwise_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/elementwise_test.mm index d3327e9ec2c..a95cbf38f02 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/elementwise_test.mm +++ b/tensorflow/lite/delegates/gpu/metal/kernels/elementwise_test.mm @@ -93,6 +93,21 @@ TensorRef GetTensorRef(int ref, const BHWC& shape) { XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str()); } +- (void)testElu { + OperationType op_type = OperationType::ELU; + const BHWC shape(1, 1, 1, 7); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); + XCTAssertTrue(model.PopulateTensor(0, {0.0f, 1.0f, -1.0f, 100.0f, -100.0f, 0.01f, -0.01f})); + auto status = model.Invoke(); + XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str()); + status = CompareVectors({0.0f, 1.0f, std::exp(-1.0f) - 1.0f, 100.0f, + std::exp(-100.0f) - 1.0f, 0.01f, std::exp(-0.01f) - 1.0f}, + model.GetOutput(0), 1e-6f); + XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str()); +} + - (void)testExp { OperationType op_type = OperationType::EXP; const BHWC shape(1, 1, 1, 7); From 37cd17ec00673d4e71ce8a600e78d866fc17eba4 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Wed, 22 Jul 2020 14:50:21 -0700 Subject: [PATCH 1085/2522] Fixed NewRemoveSingleInputAdd/MergePaddingWithAddOperation to support HWC tensor parameter. PiperOrigin-RevId: 322660539 Change-Id: I930f4dd832f39872c4ccf1fa77c1e8321fb2549b --- .../transformations/merge_padding_with.cc | 8 ++-- .../merge_padding_with_test.cc | 42 ++++++++++++++++++- .../gpu/common/transformations/remove_noop.cc | 4 +- .../transformations/remove_noop_test.cc | 29 ++++++++++++- 4 files changed, 77 insertions(+), 6 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/common/transformations/merge_padding_with.cc b/tensorflow/lite/delegates/gpu/common/transformations/merge_padding_with.cc index 2f1621eb34b..5d328cac803 100644 --- a/tensorflow/lite/delegates/gpu/common/transformations/merge_padding_with.cc +++ b/tensorflow/lite/delegates/gpu/common/transformations/merge_padding_with.cc @@ -146,13 +146,15 @@ class MergePaddingWithAddOperation : public NodeTransformation { AddAttributes add_attr = absl::any_cast(add_node->operation.attributes); - const bool is_add_broadcast = + const bool is_add_hwc = + absl::holds_alternative>(add_attr.param); + const bool is_add_linear = absl::holds_alternative>( add_attr.param); const bool is_add_scalar = absl::holds_alternative(add_attr.param); - if (is_add_broadcast || is_add_scalar) { + if (is_add_hwc || is_add_linear || is_add_scalar) { return {TransformStatus::SKIPPED, - "Cannot remove padding when this broadcast/scalar ADD"}; + "Cannot remove padding when ADD has constant argument."}; } absl::Status status = RemovePrecedingNode(graph, node, add_node); diff --git a/tensorflow/lite/delegates/gpu/common/transformations/merge_padding_with_test.cc b/tensorflow/lite/delegates/gpu/common/transformations/merge_padding_with_test.cc index 01aade9812d..6952187364e 100644 --- a/tensorflow/lite/delegates/gpu/common/transformations/merge_padding_with_test.cc +++ b/tensorflow/lite/delegates/gpu/common/transformations/merge_padding_with_test.cc @@ -108,7 +108,7 @@ TEST(MergePaddingWith, MergeTwo) { EXPECT_EQ(HW(2, 2), conv_attr.padding.appended); } -TEST(MergePaddingWithAdd, MergeOne) { +TEST(MergePaddingWithAdd, MergeAlignedPadding) { GraphFloat32 graph; auto input0 = graph.NewValue(); input0->tensor.shape = BHWC(1, 4, 4, 8); @@ -146,6 +146,46 @@ TEST(MergePaddingWithAdd, MergeOne) { EXPECT_EQ(add_node, graph.nodes()[0]); } +TEST(MergePaddingWithAdd, DoNotTrigger_AddWithAttributes) { + GraphFloat32 graph; + auto input0 = graph.NewValue(); + input0->tensor.shape = BHWC(1, 4, 4, 8); + auto input1 = graph.NewValue(); + auto padded = graph.NewValue(); + auto output = graph.NewValue(); + + auto pad_node = graph.NewNode(); + pad_node->operation.type = ToString(OperationType::PAD); + PadAttributes pad_attr; + pad_attr.prepended = BHWC(0, 0, 0, 0); + pad_attr.appended = BHWC(0, 0, 0, 32); + pad_node->operation.attributes = pad_attr; + + ASSERT_TRUE(graph.AddConsumer(pad_node->id, input0->id).ok()); + ASSERT_TRUE(graph.SetProducer(pad_node->id, padded->id).ok()); + + auto add_node = graph.NewNode(); + AddAttributes add_attr; + add_attr.param = Tensor(); + ASSERT_TRUE(graph.AddConsumer(add_node->id, padded->id).ok()); + ASSERT_TRUE(graph.AddConsumer(add_node->id, input1->id).ok()); + ASSERT_TRUE(graph.SetProducer(add_node->id, output->id).ok()); + add_node->operation.type = ToString(OperationType::ADD); + add_node->operation.attributes = add_attr; + + ASSERT_EQ(2, graph.nodes().size()); + ASSERT_EQ(4, graph.values().size()); + + auto transformation = NewMergePaddingWithAdd(); + ModelTransformer transformer(&graph, nullptr); + transformer.Apply("merge_padding", transformation.get()); + + ASSERT_EQ(2, graph.nodes().size()); + ASSERT_EQ(4, graph.values().size()); + EXPECT_EQ(pad_node, graph.nodes()[0]); + EXPECT_EQ(add_node, graph.nodes()[1]); +} + } // namespace } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/common/transformations/remove_noop.cc b/tensorflow/lite/delegates/gpu/common/transformations/remove_noop.cc index b4cdd87109a..2e3cdb0332e 100644 --- a/tensorflow/lite/delegates/gpu/common/transformations/remove_noop.cc +++ b/tensorflow/lite/delegates/gpu/common/transformations/remove_noop.cc @@ -77,7 +77,9 @@ std::unique_ptr NewRemoveSingleInputAdd() { } auto& attr = absl::any_cast(node->operation.attributes); - return !absl::holds_alternative>( + return !absl::holds_alternative>( + attr.param) && + !absl::holds_alternative>( attr.param) && !absl::holds_alternative(attr.param); }); diff --git a/tensorflow/lite/delegates/gpu/common/transformations/remove_noop_test.cc b/tensorflow/lite/delegates/gpu/common/transformations/remove_noop_test.cc index 183b7cdbe13..559140d2852 100644 --- a/tensorflow/lite/delegates/gpu/common/transformations/remove_noop_test.cc +++ b/tensorflow/lite/delegates/gpu/common/transformations/remove_noop_test.cc @@ -56,7 +56,34 @@ TEST(RemoveSingleInputAdd, Smoke) { ASSERT_EQ(output, graph.values()[1]); } -TEST(RemoveSingleInputAdd, DoNotTrigger_Tensor) { +TEST(RemoveSingleInputAdd, DoNotTrigger_TensorHWC) { + GraphFloat32 graph; + auto input = graph.NewValue(); + auto first_node = graph.NewNode(); + ASSERT_TRUE(graph.AddConsumer(first_node->id, input->id).ok()); + + auto add_node = graph.NewNode(); + Value* output; + ASSERT_TRUE(AddOutput(&graph, add_node, &output).ok()); + add_node->operation.type = ToString(OperationType::ADD); + AddAttributes attr; + attr.param = Tensor(); + add_node->operation.attributes = attr; + + Value* temp; + ASSERT_TRUE(ConnectTwoNodes(&graph, first_node, add_node, &temp).ok()); + ASSERT_EQ(2, graph.nodes().size()); + ASSERT_EQ(3, graph.values().size()); + + auto transformation = NewRemoveSingleInputAdd(); + ModelTransformer transformer(&graph, nullptr); + transformer.Apply("noop", transformation.get()); + + EXPECT_EQ(2, graph.nodes().size()); + ASSERT_EQ(3, graph.values().size()); +} + +TEST(RemoveSingleInputAdd, DoNotTrigger_LinearTensor) { GraphFloat32 graph; auto input = graph.NewValue(); auto first_node = graph.NewNode(); From 2ff0762c246494e37e2ca61389fc76a8a77e8abc Mon Sep 17 00:00:00 2001 From: Ken Franko Date: Wed, 22 Jul 2020 14:54:55 -0700 Subject: [PATCH 1086/2522] Add boilerplate skeleton for MarkOpsForOutsideCompilation pass. This pass will mark unsupported(uncompilable) ops in a device cluster with outside compilation attribute. These ops will be extracted to run on CPU instead of cluster device in a later pass. PiperOrigin-RevId: 322661511 Change-Id: I772aa9d3c815fcbff77159d8d6c90c89c1cc1820 --- tensorflow/compiler/mlir/tensorflow/BUILD | 1 + .../mark_ops_for_outside_compilation.cc | 58 +++++++++++++++++++ .../mlir/tensorflow/transforms/passes.h | 5 ++ 3 files changed, 64 insertions(+) create mode 100644 tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index 593e0d90172..6cb0922fa75 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -729,6 +729,7 @@ cc_library( "transforms/graph_pruning.cc", "transforms/launch_to_device_attribute.cc", "transforms/layout_optimization.cc", + "transforms/mark_ops_for_outside_compilation.cc", "transforms/materialize_mlir_passthrough_op.cc", "transforms/optimize.cc", "transforms/parallel_execute_to_islands.cc", diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc new file mode 100644 index 00000000000..7d6ede2e613 --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc @@ -0,0 +1,58 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include + +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "mlir/Pass/PassRegistry.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h" +#include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" + +namespace mlir { +namespace TF { + +namespace { + +// This pass marks unsupported ops in a device cluster with +// `_xla_outside_compilation` attribute so the operations will run on the host +// instead of the device. Unsupported ops are ops that can not be code +// generated to run on the device for the cluster. +struct MarkOpsForOutsideCompilation + : public PassWrapper> { + void runOnOperation() override; +}; + +void MarkOpsForOutsideCompilation::runOnOperation() { + auto module = getOperation(); + + module.walk([&](tf_device::ClusterOp cluster) {}); +} + +} // namespace + +std::unique_ptr> +CreateMarkOpsForOutsideCompilationPass() { + return std::make_unique(); +} + +static PassRegistration pass( + "tf-mark-ops-for-outside-compilation", + "Marks unsupported ops a device cluster for outside compilation."); + +} // namespace TF +} // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h index 5af8a0195a4..8e8cb929f8e 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h +++ b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h @@ -292,6 +292,11 @@ std::unique_ptr> CreateTPUHostComputationExpansionPass(); std::unique_ptr> CreateTPUUpdateEmbeddingEnqueueOpInputsPass(); +// Creates a pass that marks unsupported ops in device cluster for outside +// compilation. +std::unique_ptr> +CreateMarkOpsForOutsideCompilationPass(); + // Creates a pass that extract outside compilation (CPU ops inside TPU cluster) // ops to a separate parallel_execute region to run on CPU. std::unique_ptr> From e60dbe2091ed0af79a605ac95fb27ced81a901d5 Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Wed, 22 Jul 2020 15:13:16 -0700 Subject: [PATCH 1087/2522] Sync TF MLIR ODS (NFC). - Swapped tf.AvgPool3D and tf.AvgPool3DGrad - Updated TensorFlow op registry description for tf.AvgPool3D - Moved tf.InplaceUpdate back into tf_generated_ops.td - Updated tf.InplaceUpdate to have TF_AllTypesMatch trait on `x`, `v`, and `y` - Replaced `value` with `input` in tf.AvgPool3D to match TensorFlow op registry PiperOrigin-RevId: 322665577 Change-Id: I72b3648df1384e3a93cb3adfd684c02a552c7c5a --- .../mlir/tensorflow/ir/tf_generated_ops.td | 75 ++++++++++++------- .../compiler/mlir/tensorflow/ir/tf_ops.td | 25 ------- .../mlir/xla/transforms/legalize_tf.cc | 8 +- .../api_def/base_api/api_def_AvgPool3D.pbtxt | 4 + 4 files changed, 58 insertions(+), 54 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index 863de28df51..836950526c2 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -87,7 +87,7 @@ tf.math.acosh(x) ==> [nan nan 0. 0.62236255 5.9914584 9.903487 inf] TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_AddOp : TF_Op<"Add", [NoSideEffect, ResultsBroadcastableShape, TF_LayoutAgnostic, SameOperandsAndResultElementType]>, +def TF_AddOp : TF_Op<"Add", [NoSideEffect, ResultsBroadcastableShape, SameOperandsAndResultElementType, TF_LayoutAgnostic]>, WithBroadcastableBinOpBuilder { let summary = "Returns x + y element-wise."; @@ -136,7 +136,7 @@ Inputs must be of same size and shape. let hasFolder = 1; } -def TF_AddV2Op : TF_Op<"AddV2", [Commutative, NoSideEffect, ResultsBroadcastableShape, TF_LayoutAgnostic, SameOperandsAndResultElementType]>, +def TF_AddV2Op : TF_Op<"AddV2", [Commutative, NoSideEffect, ResultsBroadcastableShape, SameOperandsAndResultElementType, TF_LayoutAgnostic]>, WithBroadcastableBinOpBuilder { let summary = "Returns x + y element-wise."; @@ -725,6 +725,30 @@ window in `value`. TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } +def TF_AvgPool3DOp : TF_Op<"AvgPool3D", [NoSideEffect]> { + let summary = "Performs 3D average pooling on the input."; + + let description = [{ +Each entry in `output` is the mean of the corresponding size `ksize` window in +`value`. + }]; + + let arguments = (ins + TF_FpTensor:$input, + + Confined]>:$ksize, + Confined]>:$strides, + TF_AnyStrAttrOf<["SAME", "VALID"]>:$padding, + DefaultValuedAttr, "NDHWC">:$data_format + ); + + let results = (outs + TF_FpTensor:$output + ); + + TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; +} + def TF_AvgPool3DGradOp : TF_Op<"AvgPool3DGrad", [NoSideEffect]> { let summary = "Computes gradients of average pooling function."; @@ -745,30 +769,6 @@ def TF_AvgPool3DGradOp : TF_Op<"AvgPool3DGrad", [NoSideEffect]> { TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<1>; } -def TF_AvgPool3DOp : TF_Op<"AvgPool3D", [NoSideEffect]> { - let summary = "Performs 3D average pooling on the input."; - - let description = [{ -Each entry in `output` is the mean of the corresponding size `ksize` -window in `value`. - }]; - - let arguments = (ins - TF_FpTensor:$value, - - Confined]>:$ksize, - Confined]>:$strides, - TF_AnyStrAttrOf<["SAME", "VALID"]>:$padding, - DefaultValuedAttr, "NDHWC">:$data_format - ); - - let results = (outs - TF_FpTensor:$output - ); - - TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; -} - def TF_AvgPoolGradOp : TF_Op<"AvgPoolGrad", [NoSideEffect]> { let summary = "Computes gradients of the average pooling function."; @@ -4231,6 +4231,29 @@ Where to extract the key and value from a line is specified by `key_index` and let results = (outs); } +def TF_InplaceUpdateOp : TF_Op<"InplaceUpdate", [NoSideEffect]> { + let summary = "Updates specified rows 'i' with values 'v'."; + + let description = [{ +Computes `x[i, :] = v; return x`. + +Originally this function is mutative however for compilation we make this +operation create / operate on a copy of `x`. + }]; + + let arguments = (ins + TF_Tensor:$x, + I32Tensor:$i, + TF_Tensor:$v + ); + + let results = (outs + TF_Tensor:$y + ); + + TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; +} + def TF_InvOp : TF_Op<"Inv", [NoSideEffect, SameOperandsAndResultType]> { let summary = "Computes the reciprocal of x element-wise."; diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td index 71b30ae8090..2b64b4595cf 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td @@ -1070,31 +1070,6 @@ def TF_TensorSliceDatasetOp : TF_Op<"TensorSliceDataset", []> { TF_DerivedOperandTypeListAttr Toutput_types = TF_DerivedOperandTypeListAttr<0>; } -// TODO(b/156507832): Move tf.InplaceUpdate to tf_generated_ops.td once -// autogenerated op def matches. -def TF_InplaceUpdateOp : TF_Op<"InplaceUpdate", [NoSideEffect]> { - let summary = "Updates specified rows 'i' with values 'v'."; - - let description = [{ -Computes `x[i, :] = v; return x`. - -Originally this function is mutative however for compilation we make this -operation create / operate on a copy of `x`. - }]; - - let arguments = (ins - TF_Tensor:$x, - I32Tensor:$i, - TF_Tensor:$v - ); - - let results = (outs - TF_Tensor:$y - ); - - TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; -} - def TF_BesselI0eOp : TF_Op<"BesselI0e", [NoSideEffect, SameOperandsAndResultType]> { let summary = "Computes the Bessel i0e function of `x` element-wise."; diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc index 4549386ce16..b724c1b08e0 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc @@ -1836,6 +1836,9 @@ Operation *AvgPoolDivideByCount( return result; } +Value GetAvgPoolInput(TF::AvgPoolOp op) { return op.value(); } +Value GetAvgPoolInput(TF::AvgPool3DOp op) { return op.input(); } + // Converts AvgPool op to HLO ReduceWindow op by setting appropriate window // dimensions with add as the reduction function. The reduction result is // then divided by the number of elements in the window. @@ -1846,8 +1849,9 @@ class ConvertAvgPoolOp : public OpRewritePattern { LogicalResult matchAndRewrite(OpTy op, PatternRewriter &rewriter) const override { + Value input_value = GetAvgPoolInput(op); auto input_type = - op.value().getType().template dyn_cast(); + input_value.getType().template dyn_cast(); if (!input_type) return failure(); // We will do accumulation first; use a larger bitwidth if suitable. @@ -1862,8 +1866,6 @@ class ConvertAvgPoolOp : public OpRewritePattern { else result_type = UnrankedTensorType::get(sum_element_type); - Value input_value = op.value(); - // Convert if we need enlarge the element type's bitwidth. if (input_element_type != sum_element_type) input_value = rewriter.create(op.getLoc(), input_value, diff --git a/tensorflow/core/api_def/base_api/api_def_AvgPool3D.pbtxt b/tensorflow/core/api_def/base_api/api_def_AvgPool3D.pbtxt index 8171566a212..fcaa93acac1 100644 --- a/tensorflow/core/api_def/base_api/api_def_AvgPool3D.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_AvgPool3D.pbtxt @@ -43,4 +43,8 @@ Alternatively, the format could be "NCDHW", the data storage order is: END } summary: "Performs 3D average pooling on the input." + description: < Date: Wed, 22 Jul 2020 15:22:48 -0700 Subject: [PATCH 1088/2522] Plumb TF node name into xla's argument's op metadata. PiperOrigin-RevId: 322667361 Change-Id: Ifcd875d428ce92628fc13354be9d0b4829a65f67 --- tensorflow/compiler/tf2xla/xla_compiler.cc | 5 +++++ tensorflow/compiler/xla/client/xla_builder.cc | 7 ++++++- tensorflow/compiler/xla/client/xla_builder.h | 8 ++++++++ tensorflow/core/protobuf/tpu/compile_metadata.proto | 3 +++ .../graph_rewrite/distributed_tpu_rewrite_pass.cc | 12 +++++++++--- .../tpu/graph_rewrite/distributed_tpu_rewrite_pass.h | 4 +++- tensorflow/core/tpu/kernels/tpu_compile_op_common.cc | 1 + 7 files changed, 35 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index 333fa53790d..0722c30787f 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -1030,6 +1030,11 @@ Status XlaCompiler::BuildArguments( xla::XlaScopedShardingAssignment assign_sharding( builder, it == arg_shardings.end() ? absl::optional() : it->second); + auto& arg = args[input_to_args->at(i)]; + + xla::OpMetadata arg_metadata; + arg_metadata.set_op_name(arg.node_name); + builder->SetOneShotOpMetadata(arg_metadata); arg_handles[i] = xla::GetTupleElement(tuple, i); } } else { diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index cc6a680c4e9..701ce312b70 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -3021,7 +3021,12 @@ StatusOr XlaBuilder::AddInstruction(HloInstructionProto&& instr, instr.add_operand_ids(operand.handle()); } - *instr.mutable_metadata() = metadata_; + if (one_shot_metadata_.has_value()) { + *instr.mutable_metadata() = one_shot_metadata_.value(); + one_shot_metadata_.reset(); + } else { + *instr.mutable_metadata() = metadata_; + } if (sharding_) { *instr.mutable_sharding() = *sharding_; } diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h index 60bdc32e68d..b43a5a2c06e 100644 --- a/tensorflow/compiler/xla/client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_builder.h @@ -153,6 +153,11 @@ class XlaBuilder { // OpMetadata attached until a call to ClearOpMetadata. void SetOpMetadata(OpMetadata metadata) { metadata_ = std::move(metadata); } + // Similar to SetOpMetadata, but only set the metadata for the next op. + void SetOneShotOpMetadata(OpMetadata metadata) { + metadata_ = std::move(metadata); + } + // Clears the HloMetadata state. void ClearOpMetadata() { metadata_.Clear(); } @@ -842,6 +847,9 @@ class XlaBuilder { // throughout the TensorFlow op kernel implementations). OpMetadata metadata_; + // A temporary metadata that will only be applied to the next op created. + absl::optional one_shot_metadata_; + // Sharding for this operator. This is structured as a "model"-like operation, // in order to simplify client code, similar to metadata_. absl::optional sharding_; diff --git a/tensorflow/core/protobuf/tpu/compile_metadata.proto b/tensorflow/core/protobuf/tpu/compile_metadata.proto index 29593bb896f..2b29e8468b2 100644 --- a/tensorflow/core/protobuf/tpu/compile_metadata.proto +++ b/tensorflow/core/protobuf/tpu/compile_metadata.proto @@ -59,6 +59,9 @@ message TPUCompileMetadataProto { // Whether to let XLA to decide the layout during compilation, as opposed to // using a fixed layout determined by the shape. bool unrestricted_layout = 9; + + // Name of the node that the arg comes from. + string name = 10; } repeated Arg args = 1; diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc index f0032f5dfd9..e7c0c2e04b3 100644 --- a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc +++ b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc @@ -1694,7 +1694,8 @@ Status DistributedTPURewritePass::AssignArgsAndRetvalsToCores( const std::vector& retval_shapes, const Graph& graph, const Node* replicate_node, FunctionLibraryRuntime* flr, std::vector* arg_sharding, std::vector* arg_fast_mem, - std::vector* retval_sharding) { + std::vector* retval_sharding, + std::vector* arg_names) { // Builds vectors of the argument and return nodes. std::vector args(arg_types.size()); std::vector retvals(retval_types.size()); @@ -1744,6 +1745,7 @@ Status DistributedTPURewritePass::AssignArgsAndRetvalsToCores( TensorDevicePlacer args_device_selector(num_cores_per_replica, arg_types, arg_shapes); arg_sharding->resize(args.size()); + arg_names->resize(args.size()); arg_fast_mem->resize(args.size()); CachedFunctionHandles cached_function_handles(flr); const bool use_spmd = UseSpmdForXlaPartitioning(replicate_node) || @@ -1832,6 +1834,7 @@ Status DistributedTPURewritePass::AssignArgsAndRetvalsToCores( } (*arg_sharding)[i] = *sharding; (*arg_fast_mem)[i] = is_fast_mem; + (*arg_names)[i] = n->name(); if (is_fast_mem) { VLOG(3) << "Add " << TPU_FAST_MEM_ATTR << " attribute to " << args[i]->name(); @@ -2000,6 +2003,7 @@ Status DistributedTPURewritePass::BuildCompileNode( const string& session_handle, const std::vector& arg_sharding, const std::vector& arg_fast_mem, + const std::vector& arg_names, const std::vector& retval_sharding, int num_cores_per_replica, const string& compile_device, const xla::DeviceAssignment* xla_device_assignment, @@ -2041,6 +2045,7 @@ Status DistributedTPURewritePass::BuildCompileNode( tpu::TPUCompileMetadataProto::Arg* arg = proto.add_args(); DataType type = arg_types[i]; const InferredShape& arg_shape = arg_shapes[i]; + arg->set_name(arg_names[i]); if (type == DT_RESOURCE) { TF_RET_CHECK(arg_shape.handle_type != DT_INVALID) << i; arg->set_dtype(arg_shape.handle_type); @@ -3812,11 +3817,12 @@ Status DistributedTPURewritePass::FingerprintFunctionLibrary( std::vector arg_sharding; std::vector arg_fast_mem; + std::vector arg_names; std::vector retval_sharding; TF_RETURN_IF_ERROR(AssignArgsAndRetvalsToCores( num_cores_per_replica, params_info, arg_types, arg_shapes, retval_types, retval_shapes, *computation, replicate_node, flr, &arg_sharding, - &arg_fast_mem, &retval_sharding)); + &arg_fast_mem, &retval_sharding, &arg_names)); VLOG(1) << DumpGraphToFile("distributed_tpu_graph_to_replicate", *computation, flib_def); @@ -3874,7 +3880,7 @@ Status DistributedTPURewritePass::FingerprintFunctionLibrary( TF_RETURN_IF_ERROR(BuildCompileNode( replicate_node, *function, library_fingerprint, params_info, arg_shapes, arg_types, guaranteed_constant_nodes, session_handle, arg_sharding, - arg_fast_mem, retval_sharding, num_cores_per_replica, + arg_fast_mem, arg_names, retval_sharding, num_cores_per_replica, /*compile_device=*/tpu_compilation_device, xla_device_assignment.get(), dynamic_shape_nodes, graph, &compile_node, autotuner_thresh)); diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h index 52fae7a7c13..ac1a3c38690 100644 --- a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h +++ b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h @@ -310,7 +310,8 @@ class DistributedTPURewritePass : public GraphOptimizationPass { const Node* replicate_node, FunctionLibraryRuntime* flr, std::vector<::xla::OpSharding>* arg_sharding, std::vector* arg_fast_mem, - std::vector<::xla::OpSharding>* retval_sharding); + std::vector<::xla::OpSharding>* retval_sharding, + std::vector* arg_names); // Computes a fingerprint of the contents of `library`. static Status FingerprintFunctionLibrary( @@ -359,6 +360,7 @@ class DistributedTPURewritePass : public GraphOptimizationPass { const string& session_handle, const std::vector<::xla::OpSharding>& arg_sharding, const std::vector& arg_fast_mem, + const std::vector& arg_names, const std::vector<::xla::OpSharding>& retval_sharding, int num_cores_per_replica, const string& compile_device, const xla::DeviceAssignment* xla_device_assignment, diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc index a24aa4cd665..4f10b4761e3 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc @@ -189,6 +189,7 @@ Status TpuCompileOpKernelCommon::BuildComputationArgumentDescriptions( XlaCompiler::Argument& arg = args->back(); arg.type = proto_arg.dtype(); arg.shape = arg_shapes[i]; + arg.node_name = proto_arg.name(); switch (proto_arg.kind()) { case tpu::TPUCompileMetadataProto::Arg::PARAMETER: arg.kind = XlaCompiler::Argument::kParameter; From 62ad3554a348369309a36fc73088dda7a40059af Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Wed, 22 Jul 2020 15:27:56 -0700 Subject: [PATCH 1089/2522] Migrate official release and nightly testing builds to use the new bazel configs. PiperOrigin-RevId: 322668400 Change-Id: Ie60026a1ab63ba57b2b18085d3899d45cc484fd8 --- .../tools/ci_build/release/ubuntu_16/cpu_py35_full/pip.sh | 7 +------ .../ci_build/release/ubuntu_16/cpu_py35_full/pip_v1.sh | 7 +------ .../tools/ci_build/release/ubuntu_16/cpu_py36_full/pip.sh | 7 +------ .../ci_build/release/ubuntu_16/cpu_py36_full/pip_v1.sh | 7 +------ .../tools/ci_build/release/ubuntu_16/cpu_py37_full/pip.sh | 7 +------ .../ci_build/release/ubuntu_16/cpu_py37_full/pip_v1.sh | 7 +------ .../tools/ci_build/release/ubuntu_16/cpu_py38_full/pip.sh | 7 +------ 7 files changed, 7 insertions(+), 42 deletions(-) diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/pip.sh index 5d0cbacb0b7..0dac1c72898 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/pip.sh @@ -28,11 +28,6 @@ export CONTAINER_TYPE="CPU" export TF_PYTHON_VERSION='python3.5' # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py @@ -40,7 +35,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=opt --config=v2 --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain" +export TF_BUILD_FLAGS="--config=release_cpu_linux" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/pip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/pip_v1.sh index 1e2665f4120..0cd81c50940 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/pip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/pip_v1.sh @@ -28,16 +28,11 @@ export CONTAINER_TYPE="CPU" export TF_PYTHON_VERSION='python3.5' # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=opt --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain" +export TF_BUILD_FLAGS="--config=release_cpu_linux" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going" export TF_TEST_TARGETS="//tensorflow/python/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/pip.sh index 25c4de88cdd..52f299ee6db 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/pip.sh @@ -28,11 +28,6 @@ export CONTAINER_TYPE="CPU" export TF_PYTHON_VERSION='python3.6' # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py @@ -40,7 +35,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=opt --config=v2 --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain" +export TF_BUILD_FLAGS="--config=release_cpu_linux" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/pip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/pip_v1.sh index c4d78dc3fe5..96f62c9b228 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/pip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/pip_v1.sh @@ -28,16 +28,11 @@ export CONTAINER_TYPE="CPU" export TF_PYTHON_VERSION='python3.6' # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=opt --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain" +export TF_BUILD_FLAGS="--config=release_cpu_linux" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going" export TF_TEST_TARGETS="//tensorflow/python/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/pip.sh index 940cef32ef8..4d21f5a28a9 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/pip.sh @@ -28,11 +28,6 @@ export CONTAINER_TYPE="CPU" export TF_PYTHON_VERSION='python3.7' # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py @@ -40,7 +35,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=opt --config=v2 --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain" +export TF_BUILD_FLAGS="--config=release_cpu_linux" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/pip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/pip_v1.sh index 2208327388f..19aa6175302 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/pip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/pip_v1.sh @@ -28,16 +28,11 @@ export CONTAINER_TYPE="CPU" export TF_PYTHON_VERSION='python3.7' # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=opt --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain" +export TF_BUILD_FLAGS="--config=release_cpu_linux" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going" export TF_TEST_TARGETS="//tensorflow/python/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/pip.sh index a27d1f863d6..3a4116faa13 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/pip.sh @@ -28,11 +28,6 @@ export CONTAINER_TYPE="CPU" export TF_PYTHON_VERSION='python3.8' # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py @@ -40,7 +35,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=opt --config=v2 --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain" +export TF_BUILD_FLAGS="--config=release_cpu_linux" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" From 39e13608cb38bca093ce92943a72c6d1d2eb252c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Jul 2020 15:31:54 -0700 Subject: [PATCH 1090/2522] Add TensorCore eligibility to overview. PiperOrigin-RevId: 322669191 Change-Id: Iaf651846869a1ea3d9881a08b0b36f48defcfb3d --- tensorflow/core/profiler/convert/BUILD | 2 ++ .../convert/op_stats_to_overview_page.cc | 10 ++++++++++ .../core/profiler/protobuf/overview_page.proto | 2 ++ tensorflow/core/profiler/utils/BUILD | 1 + .../core/profiler/utils/kernel_stats_utils.cc | 16 ++++++++++++++++ .../core/profiler/utils/kernel_stats_utils.h | 5 +++++ .../python/profiler/internal/profiler_wrapper.cc | 3 ++- 7 files changed, 38 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/profiler/convert/BUILD b/tensorflow/core/profiler/convert/BUILD index e24addeb83a..5e0682fc031 100644 --- a/tensorflow/core/profiler/convert/BUILD +++ b/tensorflow/core/profiler/convert/BUILD @@ -96,6 +96,7 @@ cc_library( "//tensorflow/core:lib_internal", "//tensorflow/core/profiler/protobuf:hardware_types_proto_cc", "//tensorflow/core/profiler/protobuf:input_pipeline_proto_cc", + "//tensorflow/core/profiler/protobuf:kernel_stats_proto_cc", "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc", "//tensorflow/core/profiler/protobuf:op_stats_proto_cc", "//tensorflow/core/profiler/protobuf:overview_page_proto_cc", @@ -104,6 +105,7 @@ cc_library( "//tensorflow/core/profiler/utils:diagnostics", "//tensorflow/core/profiler/utils:hardware_type_utils", "//tensorflow/core/profiler/utils:html_utils", + "//tensorflow/core/profiler/utils:kernel_stats_utils", "//tensorflow/core/profiler/utils:math_utils", "//tensorflow/core/profiler/utils:op_metrics_db_utils", "//tensorflow/core/profiler/utils:time_utils", diff --git a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc index 9f71175bcea..da1fcb7cc6a 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h" #include "tensorflow/core/profiler/protobuf/hardware_types.pb.h" #include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h" +#include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h" #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" #include "tensorflow/core/profiler/protobuf/op_stats.pb.h" #include "tensorflow/core/profiler/protobuf/overview_page.pb.h" @@ -33,6 +34,7 @@ limitations under the License. #include "tensorflow/core/profiler/utils/diagnostics.h" #include "tensorflow/core/profiler/utils/hardware_type_utils.h" #include "tensorflow/core/profiler/utils/html_utils.h" +#include "tensorflow/core/profiler/utils/kernel_stats_utils.h" #include "tensorflow/core/profiler/utils/math_utils.h" #include "tensorflow/core/profiler/utils/op_metrics_db_utils.h" #include "tensorflow/core/profiler/utils/time_utils.h" @@ -163,6 +165,9 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) { OverviewPageAnalysis analysis; OpMetricsDb device_tf_op_metrics_db = CreateTfMetricsDbFromDeviceOpMetricsDb( op_stats.device_op_metrics_db(), /*with_idle=*/false); + absl::flat_hash_map> + grouped_kernel_reports = + GroupKernelReportsByOpName(op_stats.kernel_stats_db()); uint64 total_device_time_ps = device_tf_op_metrics_db.total_time_ps(); constexpr int kNumTopOpsShown = 10; double device_cumulative_fraction = 0.0; @@ -177,6 +182,11 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) { op->set_cumulative_time_fraction(device_cumulative_fraction); op->set_flop_rate( SafeDivide(metrics->flops(), PicosToNanos(metrics->time_ps()))); + auto iter = grouped_kernel_reports.find(op->name()); + if (iter != grouped_kernel_reports.end()) { + op->set_is_op_tensorcore_eligible( + iter->second.front()->is_op_tensor_core_eligible()); + } } uint64 total_device_compute_ps = op_stats.device_op_metrics_db().precision_stats().compute_16bit_ps() + diff --git a/tensorflow/core/profiler/protobuf/overview_page.proto b/tensorflow/core/profiler/protobuf/overview_page.proto index 5621ad92a0d..f7219e6153c 100644 --- a/tensorflow/core/profiler/protobuf/overview_page.proto +++ b/tensorflow/core/profiler/protobuf/overview_page.proto @@ -19,6 +19,8 @@ message OverviewTfOp { double cumulative_time_fraction = 4; // How many GFlops/sec that this Op achieves. double flop_rate = 5; + // Whether the Op is eligible to use TensorCores. + bool is_op_tensorcore_eligible = 6; } // Overview result for general analysis. diff --git a/tensorflow/core/profiler/utils/BUILD b/tensorflow/core/profiler/utils/BUILD index d7046f1c214..5a60bf3f2f3 100644 --- a/tensorflow/core/profiler/utils/BUILD +++ b/tensorflow/core/profiler/utils/BUILD @@ -401,6 +401,7 @@ cc_library( deps = [ "//tensorflow/core:lib", "//tensorflow/core/profiler/protobuf:kernel_stats_proto_cc", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/core/profiler/utils/kernel_stats_utils.cc b/tensorflow/core/profiler/utils/kernel_stats_utils.cc index c40c3a89c9c..2f53b51d7a8 100644 --- a/tensorflow/core/profiler/utils/kernel_stats_utils.cc +++ b/tensorflow/core/profiler/utils/kernel_stats_utils.cc @@ -242,5 +242,21 @@ void GroupKernelReports(std::vector* reports, } } +absl::flat_hash_map> +GroupKernelReportsByOpName(const KernelStatsDb& kernel_stats_db) { + absl::flat_hash_map> + grouped_kernel_reports; + for (const KernelReport& kernel_report : kernel_stats_db.reports()) { + std::vector& kernel_reports = + grouped_kernel_reports[kernel_report.op_name()]; + kernel_reports.push_back(&kernel_report); + // Verifies operations with the same name have the same TensorCore + // eligibility. + DCHECK_EQ(kernel_reports.front()->is_op_tensor_core_eligible(), + kernel_reports.back()->is_op_tensor_core_eligible()); + } + return grouped_kernel_reports; +} + } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/utils/kernel_stats_utils.h b/tensorflow/core/profiler/utils/kernel_stats_utils.h index 5b66596d683..ce9208fba1f 100644 --- a/tensorflow/core/profiler/utils/kernel_stats_utils.h +++ b/tensorflow/core/profiler/utils/kernel_stats_utils.h @@ -18,6 +18,7 @@ limitations under the License. #include +#include "absl/container/flat_hash_map.h" #include "absl/strings/string_view.h" #include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h" @@ -53,6 +54,10 @@ void SortKernelsByTotalDurationDesc(KernelStatsDb* kernel_stats_db); // Groups and aggregate common reports into destination KernelStatsDb. void GroupKernelReports(std::vector* reports, KernelStatsDb* dst); +// Groups KernelReport in by tensorflow operation name. +absl::flat_hash_map> +GroupKernelReportsByOpName(const KernelStatsDb& kernel_stats_db); + } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/python/profiler/internal/profiler_wrapper.cc b/tensorflow/python/profiler/internal/profiler_wrapper.cc index 16e986ac2e6..a8799a5f247 100644 --- a/tensorflow/python/profiler/internal/profiler_wrapper.cc +++ b/tensorflow/python/profiler/internal/profiler_wrapper.cc @@ -188,7 +188,8 @@ PYBIND11_MODULE(_pywrap_profiler, m) { xspace.ParseFromString(serialized_xspace_proto); tensorflow::profiler::OverviewPage overview_page = tensorflow::profiler::ConvertOpStatsToOverviewPage( - ConvertXSpaceToOpStats(xspace, {OP_METRICS_DB, STEP_DB})); + ConvertXSpaceToOpStats( + xspace, {OP_METRICS_DB, STEP_DB, KERNEL_STATS_DB})); return py::bytes(overview_page.SerializeAsString()); }); From ff95948cebdadc82e101aa7232f073390304e682 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Wed, 22 Jul 2020 15:45:59 -0700 Subject: [PATCH 1091/2522] Fix error which results in 2 different strategy instances being used(one is used for creating variables under a given scope and the second is used to call `run`. Add a second regex assertion since we will only have one kind of tf.distribute variables (DistributedVariable) going forward. PiperOrigin-RevId: 322671804 Change-Id: Ie9e992669e44486f3217845221577b0535516184 --- .../experimental/autocast_variable_test.py | 18 +++++++++++++----- .../loss_scaling_gradient_tape_test.py | 4 ++-- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py index 48fa93459a7..c3162e0e80a 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py +++ b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py @@ -432,13 +432,21 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase): ) def test_repr_distributed(self): - with mirrored_strategy.MirroredStrategy(['/cpu:1', '/cpu:2']).scope(): + strategy = mirrored_strategy.MirroredStrategy(['/cpu:1', '/cpu:2']) + with strategy.scope(): x = get_var(1., dtypes.float32) x = autocast_variable.create_autocast_variable(x) - self.assertRegex( - repr(x).replace('\n', ' '), - '') + use_policy = getattr(strategy.extended, '_use_policy', False) + if use_policy: + self.assertRegex( + repr(x).replace('\n', ' '), + '') + else: + self.assertRegex( + repr(x).replace('\n', ' '), + '') @parameterized.named_parameters( ('v1', gradient_descent_v1.GradientDescentOptimizer), diff --git a/tensorflow/python/training/experimental/loss_scaling_gradient_tape_test.py b/tensorflow/python/training/experimental/loss_scaling_gradient_tape_test.py index 5c6b4d71649..ec07d215f4f 100644 --- a/tensorflow/python/training/experimental/loss_scaling_gradient_tape_test.py +++ b/tensorflow/python/training/experimental/loss_scaling_gradient_tape_test.py @@ -114,7 +114,7 @@ class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase): with lsgt.LossScaleGradientTape(loss_scale) as g: y = x * x return g.gradient(y, x, output_gradients=constant_op.constant(2.0)) - dy_dx_list = self._run_with_strategy(run_fn, strategy_fn(), use_tf_function) + dy_dx_list = self._run_with_strategy(run_fn, strategy, use_tf_function) self.assertEqual(loss_scale(), 32) for dy_dx in dy_dx_list: self.assertEqual(dy_dx, 12.0) @@ -236,7 +236,7 @@ class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase): dy_dx = g.gradient(y, x) return dz_dx, dy_dx - dz_dx_list, dy_dx_list = self._run_with_strategy(run_fn, strategy_fn(), + dz_dx_list, dy_dx_list = self._run_with_strategy(run_fn, strategy, use_tf_function) for dz_dx in dz_dx_list: self.assertEqual(dz_dx, 108.0) From 97765dc98fe9d79910899ddf4c8312eade550198 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Jul 2020 15:46:36 -0700 Subject: [PATCH 1092/2522] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 322671922 Change-Id: I4913e47ad9e4b6a7d8c477f0d6e2eb318c1bc9b6 --- tensorflow/go/op/wrappers.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index eb0a853ba95..504e6ba3b47 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -28338,6 +28338,9 @@ func AvgPool3DDataFormat(value string) AvgPool3DAttr { // Performs 3D average pooling on the input. // +// Each entry in `output` is the mean of the corresponding size `ksize` window in +// `value`. +// // Arguments: // input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over. // ksize: 1-D tensor of length 5. The size of the window for each dimension of From e7710bf25d3f59db73e6bbd6e6ffe98569d95741 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Wed, 22 Jul 2020 15:53:34 -0700 Subject: [PATCH 1093/2522] Add MWMS test to ctl_correctness_test. Fixed an issue with reducing int values on GPUs. PiperOrigin-RevId: 322673156 Change-Id: I60a85c0689d86ce74e233d3d8f8103b6817b7d28 --- tensorflow/python/distribute/input_lib.py | 4 ++-- tensorflow/python/keras/distribute/BUILD | 2 +- .../keras/distribute/ctl_correctness_test.py | 19 ++++++++++++++++++- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/distribute/input_lib.py b/tensorflow/python/distribute/input_lib.py index 708d5ebca75..23792f69efa 100644 --- a/tensorflow/python/distribute/input_lib.py +++ b/tensorflow/python/distribute/input_lib.py @@ -527,8 +527,8 @@ def _get_next_as_optional(iterator, strategy, name=None): # TODO(b/131423105): we should be able to short-cut the all-reduce in some # cases. if getattr(strategy.extended, "_support_per_replica_values", True): - # Slight hack: `reduce` expects a `PerReplica`, so we pass it one, even - # though it doesn't actually have a value per replica. + # `reduce` expects a `PerReplica`, so we pass it one, even + # though it doesn't actually have a value per replica worker_has_values = values.PerReplica(worker_has_values) global_has_value = strategy.reduce( reduce_util.ReduceOp.SUM, worker_has_values, axis=None) diff --git a/tensorflow/python/keras/distribute/BUILD b/tensorflow/python/keras/distribute/BUILD index 83c25562323..bdd4cbc58aa 100644 --- a/tensorflow/python/keras/distribute/BUILD +++ b/tensorflow/python/keras/distribute/BUILD @@ -198,7 +198,7 @@ distribute_py_test( name = "ctl_correctness_test", srcs = ["ctl_correctness_test.py"], main = "ctl_correctness_test.py", - shard_count = 5, + shard_count = 10, tags = [ "multi_and_single_gpu", ], diff --git a/tensorflow/python/keras/distribute/ctl_correctness_test.py b/tensorflow/python/keras/distribute/ctl_correctness_test.py index eade27ee57c..a55f80e4bf2 100644 --- a/tensorflow/python/keras/distribute/ctl_correctness_test.py +++ b/tensorflow/python/keras/distribute/ctl_correctness_test.py @@ -33,6 +33,7 @@ from tensorflow.python.eager import def_function from tensorflow.python.eager import test from tensorflow.python.framework import dtypes from tensorflow.python.framework import random_seed +from tensorflow.python.framework import test_util from tensorflow.python.keras.distribute import optimizer_combinations from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn @@ -230,6 +231,14 @@ class TestDistributionStrategyDnnCorrectness(test.TestCase, mode=['eager'], iteration_type=['iterator', 'dataset'], inside_func=[False, True], + sync_batchnorm=[True, False]) + + combinations.combine( + distribution=strategy_combinations.multiworker_strategies, + optimizer_fn= + optimizer_combinations.gradient_descent_optimizer_keras_v2_fn, + mode=['eager'], + iteration_type=['iterator', 'dataset'], + inside_func=[False, True], sync_batchnorm=[True, False] )) def test_dnn_correctness_minus_tpus(self, distribution, optimizer_fn, @@ -238,6 +247,14 @@ class TestDistributionStrategyDnnCorrectness(test.TestCase, # TODO(anjs): Identify why this particular V1 optimizer needs a higher tol. if 'FtrlV1' in optimizer_fn._name and 'TPU' in type(distribution).__name__: self.skipTest('Reduced tolerance of the order of 1e-1 required.') + if ('CollectiveAllReduce' in type(distribution).__name__ and + test_util.is_xla_enabled()): + self.skipTest('XLA tests fail with MWMS.') + # Unable to use required_gpus to check if this is a multiGPU combination + # since required_gpus and NamedDistribution cannot be used together. + if ('CollectiveAllReduce' in type(distribution).__name__ + and not inside_func and iteration_type == 'dataset'): + self.skipTest('MWMS tests fail with multiple GPUs.') self.dnn_correctness(distribution, optimizer_fn, iteration_type, inside_func, sync_batchnorm) @@ -263,4 +280,4 @@ class TestDistributionStrategyDnnCorrectness(test.TestCase, if __name__ == '__main__': - test.main() + combinations.main() From 8f278b5c183b485f038b0504308929b4e3bde5a7 Mon Sep 17 00:00:00 2001 From: Jonah Kohn <51345541+jonah-kohn@users.noreply.github.com> Date: Wed, 22 Jul 2020 16:16:29 -0700 Subject: [PATCH 1094/2522] Raise error when calling .fit() w/ batch_size and a tf dataset --- tensorflow/python/keras/engine/data_adapter.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/engine/data_adapter.py b/tensorflow/python/keras/engine/data_adapter.py index 469355dd722..6ad87bd80e2 100644 --- a/tensorflow/python/keras/engine/data_adapter.py +++ b/tensorflow/python/keras/engine/data_adapter.py @@ -690,6 +690,7 @@ class DatasetAdapter(DataAdapter): y=None, sample_weights=None, steps=None, + batch_size=None, **kwargs): super(DatasetAdapter, self).__init__(x, y, **kwargs) # Note that the dataset instance is immutable, its fine to reuse the user @@ -699,7 +700,7 @@ class DatasetAdapter(DataAdapter): # The user-provided steps. self._user_steps = steps - self._validate_args(y, sample_weights, steps) + self._validate_args(y, sample_weights, steps, batch_size) def get_dataset(self): return self._dataset @@ -728,7 +729,7 @@ class DatasetAdapter(DataAdapter): return (self._user_steps is None or cardinality.cardinality(self._dataset).numpy() == self._user_steps) - def _validate_args(self, y, sample_weights, steps): + def _validate_args(self, y, sample_weights, steps, batch_size): """Validates `__init__` arguments.""" # Arguments that shouldn't be passed. if not is_none_or_empty(y): @@ -737,6 +738,10 @@ class DatasetAdapter(DataAdapter): if not is_none_or_empty(sample_weights): raise ValueError("`sample_weight` argument is not supported when using " "dataset as input.") + + if batch_size is not None: + raise ValueError("`batch_size` argument must not be specified when " + "using dataset as input.") if steps is None: if _is_distributed_dataset(self._dataset): From 957502c91a49141a9d494257946769a7a3df0ae3 Mon Sep 17 00:00:00 2001 From: Ken Franko Date: Wed, 22 Jul 2020 16:10:40 -0700 Subject: [PATCH 1095/2522] Test experimental_io_device option. PiperOrigin-RevId: 322676506 Change-Id: I79d1ebbeceba5feaf70dcf308e03c41fe180ec80 --- .../keras/distribute/checkpointing_test.py | 41 +++++++++++++++++++ .../distribute/saved_model_save_load_test.py | 29 +++++++++++++ 2 files changed, 70 insertions(+) diff --git a/tensorflow/python/keras/distribute/checkpointing_test.py b/tensorflow/python/keras/distribute/checkpointing_test.py index 77c335fe46d..b9689adede9 100644 --- a/tensorflow/python/keras/distribute/checkpointing_test.py +++ b/tensorflow/python/keras/distribute/checkpointing_test.py @@ -29,6 +29,7 @@ from tensorflow.python.eager import test from tensorflow.python.keras.optimizer_v2 import adam from tensorflow.python.ops import random_ops from tensorflow.python.ops import variables as variables_lib +from tensorflow.python.training.saving import checkpoint_options from tensorflow.python.training.tracking import util as trackable_utils @@ -93,6 +94,46 @@ class TrainingCheckpointTests(test.TestCase, parameterized.TestCase): ValueError, "optimizer slot variable under the scope"): checkpoint.restore(save_path) + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_one_cpu, + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + strategy_combinations.cloud_tpu_strategy, + strategy_combinations.tpu_strategy, + strategy_combinations.tpu_strategy_packed_var, + strategy_combinations.central_storage_strategy_with_two_gpus, + ], + mode=["eager"])) + def testCheckpointSaveRestoreIoDevice(self, distribution): + + def state(): + with distribution.scope(): + v = variables_lib.Variable(random_ops.random_normal([])) + return v + + ckpt_options = checkpoint_options.CheckpointOptions( + experimental_io_device="/job:localhost") + + def checkpoint(): + v = state() + # Save random weights into checkpoint. + checkpoint = trackable_utils.Checkpoint(v=v) + prefix = os.path.join(self.get_temp_dir(), "ckpt") + with self.test_session(): + save_path = checkpoint.save(prefix, options=ckpt_options) + return save_path + + save_path = checkpoint() + + v = state() + checkpoint = trackable_utils.Checkpoint(v=v) + # Restore from the checkpoint inside a distribution.scope(). + # Check that restore works without error. + with self.test_session(): + with distribution.scope(): + checkpoint.restore(save_path, options=ckpt_options) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/keras/distribute/saved_model_save_load_test.py b/tensorflow/python/keras/distribute/saved_model_save_load_test.py index c8f47386783..39856af2a20 100644 --- a/tensorflow/python/keras/distribute/saved_model_save_load_test.py +++ b/tensorflow/python/keras/distribute/saved_model_save_load_test.py @@ -18,11 +18,17 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os + from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations from tensorflow.python.eager import test from tensorflow.python.framework import tensor_spec +from tensorflow.python.keras.distribute import model_combinations from tensorflow.python.keras.distribute import saved_model_test_base as test_base from tensorflow.python.ops import array_ops +from tensorflow.python.saved_model import load_options as load_options_lib +from tensorflow.python.saved_model import save_options as save_options_lib from tensorflow.python.saved_model import saved_model @@ -146,6 +152,29 @@ class SavedModelTFModuleTest(test_base.TestSavedModelBase): distribution_for_restoring, save_in_scope) + @combinations.generate( + combinations.combine( + model_and_input=[model_combinations.simple_tfmodule_model], + distribution=test_base.strategies + + [strategy_combinations.cloud_tpu_strategy])) + def test_save_load_io_device(self, model_and_input, distribution): + saved_dir = os.path.join(self.get_temp_dir(), 'io_device') + with distribution.scope(): + model = model_and_input.get_model() + x_train, y_train, _ = model_and_input.get_data() + batch_size = model_and_input.get_batch_size() + self._train_model(model, x_train, y_train, batch_size) + call = model.__call__.get_concrete_function(tensor_spec.TensorSpec(None)) + save_options = save_options_lib.SaveOptions( + experimental_io_device='/job:localhost') + saved_model.save(model, saved_dir, signatures=call, options=save_options) + load_options = load_options_lib.LoadOptions( + experimental_io_device='/job:localhost') + # Check that the model can be loaded and training continued without error. + with distribution.scope(): + loaded_model = saved_model.load(saved_dir, options=load_options) + self._train_model(loaded_model, x_train, y_train, batch_size) + if __name__ == '__main__': test.main() From f9dd45444add0f0333df136a716726bec5277ea0 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 22 Jul 2020 16:10:48 -0700 Subject: [PATCH 1096/2522] By default, hide all compiler warnings. Also create a new option to show the warnings. Yes, warnings are useful, but pip package build log is now 85 MB. the logs are becoming unloadable on the browser. PiperOrigin-RevId: 322676534 Change-Id: I080b113f62c75c26997c5525391f8ccfad0ef99c --- .bazelrc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.bazelrc b/.bazelrc index 82bb0605b08..6fe60261538 100644 --- a/.bazelrc +++ b/.bazelrc @@ -28,6 +28,7 @@ # # Other build options: # short_logs: Only log errors during build, skip warnings. +# verbose_logs: Show all compiler warnings during build. # monolithic: Build all TF C++ code into a single shared object. # dynamic_kernels: Try to link all kernels dynamically (experimental). # libc++: Link against libc++ instead of stdlibc++ @@ -331,6 +332,8 @@ build:windows --distinct_host_configuration=false # Suppress all warning messages. build:short_logs --output_filter=DONT_MATCH_ANYTHING +build:verbose_logs --output_filter= +build --config=short_logs # Instruction set optimizations # TODO(gunan): Create a feature in toolchains for avx/avx2 to From ac9c8e2db5eebc43c57f37056569ac04277d3615 Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Wed, 22 Jul 2020 16:16:08 -0700 Subject: [PATCH 1097/2522] Make AbstractTensorHandle RefCounted so that we can use it for refcounting tensors under a GradientTape. PiperOrigin-RevId: 322677493 Change-Id: I054d6127d6ec159be197f524ee6190c2537b1662 --- tensorflow/c/eager/BUILD | 1 + tensorflow/c/eager/abstract_tensor_handle.h | 13 +++---------- .../c/eager/c_api_unified_experimental.cc | 2 +- .../eager/c_api_unified_experimental_graph.cc | 1 - tensorflow/c/eager/gradients.cc | 19 ++++--------------- tensorflow/c/eager/gradients_test.cc | 14 +++++++------- .../eager/immediate_execution_tensor_handle.h | 8 ++++++++ .../c/c_api_unified_experimental_mlir.cc | 2 -- .../core/common_runtime/eager/tensor_handle.h | 3 +-- 9 files changed, 25 insertions(+), 38 deletions(-) diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index dca35b78e0f..61701bc8b21 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -262,6 +262,7 @@ cc_library( ], deps = [ "//tensorflow/core:protos_all_cc", + "//tensorflow/core/platform:refcount", ], ) diff --git a/tensorflow/c/eager/abstract_tensor_handle.h b/tensorflow/c/eager/abstract_tensor_handle.h index de041690420..37e6d1bf29c 100644 --- a/tensorflow/c/eager/abstract_tensor_handle.h +++ b/tensorflow/c/eager/abstract_tensor_handle.h @@ -18,11 +18,12 @@ limitations under the License. #include #include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/platform/refcount.h" namespace tensorflow { // Abstract interface to a Tensor handle in either tracing or immediate // execution mode. -class AbstractTensorHandle { +class AbstractTensorHandle : public core::RefCounted { protected: enum AbstractTensorHandleKind { kGraph, kMlir, kEager, kTfrt }; explicit AbstractTensorHandle(AbstractTensorHandleKind kind) : kind_(kind) {} @@ -34,14 +35,6 @@ class AbstractTensorHandle { AbstractTensorHandleKind getKind() const { return kind_; } - // Release any underlying resources, including the interface object. - // - // WARNING: The destructor of this class is marked as protected to disallow - // clients from directly destroying this object since it may manage it's own - // lifetime through ref counting. Thus this must be allocated on the heap and - // clients MUST call Release() in order to destroy an instance of this class. - virtual void Release() = 0; - private: const AbstractTensorHandleKind kind_; }; @@ -50,7 +43,7 @@ namespace internal { struct AbstractTensorHandleDeleter { void operator()(AbstractTensorHandle* p) const { if (p != nullptr) { - p->Release(); + p->Unref(); } } }; diff --git a/tensorflow/c/eager/c_api_unified_experimental.cc b/tensorflow/c/eager/c_api_unified_experimental.cc index 605a60c186c..8408f7ef60f 100644 --- a/tensorflow/c/eager/c_api_unified_experimental.cc +++ b/tensorflow/c/eager/c_api_unified_experimental.cc @@ -147,7 +147,7 @@ TF_AbstractOp* TF_NewAbstractOp(TF_ExecutionContext* c) { void TF_DeleteAbstractOp(TF_AbstractOp* op) { unwrap(op)->Release(); } -void TF_DeleteAbstractTensor(TF_AbstractTensor* t) { unwrap(t)->Release(); } +void TF_DeleteAbstractTensor(TF_AbstractTensor* t) { unwrap(t)->Unref(); } TF_OutputList* TF_NewOutputList() { return wrap(new OutputList); } void TF_DeleteOutputList(TF_OutputList* o) { delete unwrap(o); } diff --git a/tensorflow/c/eager/c_api_unified_experimental_graph.cc b/tensorflow/c/eager/c_api_unified_experimental_graph.cc index 6c903560e52..7bda3aed76d 100644 --- a/tensorflow/c/eager/c_api_unified_experimental_graph.cc +++ b/tensorflow/c/eager/c_api_unified_experimental_graph.cc @@ -49,7 +49,6 @@ class GraphTensor : public TracingTensorHandle { public: explicit GraphTensor(TF_Output output) : TracingTensorHandle(kGraph), output_(output) {} - void Release() override { delete this; } tensorflow::DataType DataType() const override { return static_cast(TF_OperationOutputType(output_)); diff --git a/tensorflow/c/eager/gradients.cc b/tensorflow/c/eager/gradients.cc index f5085fdb926..cf62dcea926 100644 --- a/tensorflow/c/eager/gradients.cc +++ b/tensorflow/c/eager/gradients.cc @@ -51,25 +51,14 @@ int64 ToId(AbstractTensorHandle* t) { TapeTensor::TapeTensor(AbstractTensorHandle* handle, AbstractContext* ctx) : handle_(handle), ctx_(ctx) { - // TODO(b/160888114): Make AbstractTensorHandle RefCounted. Right now we rely - // on the client to keep this tensor live for the duration of the gradient - // computation. - // handle_->Ref(); + handle_->Ref(); } TapeTensor::TapeTensor(const TapeTensor& other) { handle_ = other.handle_; - // TODO(b/160888114): Make AbstractTensorHandle RefCounted. Right now we rely - // on the client to keep this tensor live for the duration of the gradient - // computation. - // handle_->Ref(); + handle_->Ref(); ctx_ = other.ctx_; } -TapeTensor::~TapeTensor() { - // TODO(b/160888114): Make AbstractTensorHandle RefCounted. Right now we rely - // on the client to keep this tensor live for the duration of the gradient - // computation. - // handle_->Unref(); -} +TapeTensor::~TapeTensor() { handle_->Unref(); } tensorflow::int64 TapeTensor::GetID() const { return ToId(handle_); } @@ -192,7 +181,7 @@ TapeTensor TapeVSpace::TapeTensorFromGradient(AbstractTensorHandle* g) const { void TapeVSpace::MarkAsResult(AbstractTensorHandle* gradient) const {} void TapeVSpace::DeleteGradient(AbstractTensorHandle* gradient) const { - gradient->Release(); + gradient->Unref(); } // Helper functions which delegate to `AbstractOperation`, update diff --git a/tensorflow/c/eager/gradients_test.cc b/tensorflow/c/eager/gradients_test.cc index 0a3d267e937..e02f189c3d2 100644 --- a/tensorflow/c/eager/gradients_test.cc +++ b/tensorflow/c/eager/gradients_test.cc @@ -93,7 +93,7 @@ Status AddGradModel(AbstractContext* ctx, source_tensors_that_are_targets, /*output_gradients=*/{}, &out_grads)); for (auto add_output : add_outputs) { - add_output->Release(); + add_output->Unref(); } outputs[0] = out_grads[0]; outputs[1] = out_grads[1]; @@ -144,14 +144,14 @@ Status RunModel(Model model, AbstractContext* ctx, TF_RETURN_IF_ERROR(model(func_ctx.get(), absl::MakeSpan(func_inputs), absl::MakeSpan(output_list.outputs), registry)); for (auto func_input : func_inputs) { - func_input->Release(); + func_input->Unref(); } AbstractFunction* func = nullptr; TF_RETURN_IF_ERROR(dyn_cast(func_ctx.get()) ->Finalize(&output_list, &func)); scoped_func.reset(func); - output_list.outputs[0]->Release(); - output_list.outputs[1]->Release(); + output_list.outputs[0]->Unref(); + output_list.outputs[1]->Unref(); TF_RETURN_IF_ERROR(ctx->RegisterFunction(func)); } @@ -252,7 +252,7 @@ TEST_P(CppGradients, TestAddGrad) { ASSERT_EQ(errors::OK, s.code()) << s.error_message(); auto result_value = static_cast(TF_TensorData(result_tensor)); EXPECT_EQ(*result_value, 1.0); - outputs[0]->Release(); + outputs[0]->Unref(); TF_DeleteTensor(result_tensor); result_tensor = nullptr; @@ -260,7 +260,7 @@ TEST_P(CppGradients, TestAddGrad) { ASSERT_EQ(errors::OK, s.code()) << s.error_message(); result_value = static_cast(TF_TensorData(result_tensor)); EXPECT_EQ(*result_value, 1.0); - outputs[1]->Release(); + outputs[1]->Unref(); TF_DeleteTensor(result_tensor); } @@ -270,7 +270,7 @@ TEST_P(CppGradients, TestAddGrad) { INSTANTIATE_TEST_SUITE_P( UnifiedCAPI, CppGradients, ::testing::Combine(::testing::Values("graphdef"), - /*tfrt*/ ::testing::Values(false), + /*tfrt*/ ::testing::Values(true, false), /*executing_eagerly*/ ::testing::Values(true, false))); #else INSTANTIATE_TEST_SUITE_P( diff --git a/tensorflow/c/eager/immediate_execution_tensor_handle.h b/tensorflow/c/eager/immediate_execution_tensor_handle.h index f7c77aa06db..6d32d482747 100644 --- a/tensorflow/c/eager/immediate_execution_tensor_handle.h +++ b/tensorflow/c/eager/immediate_execution_tensor_handle.h @@ -50,6 +50,14 @@ class ImmediateExecutionTensorHandle : public AbstractTensorHandle { // Return a copy of the handle. virtual ImmediateExecutionTensorHandle* Copy() = 0; + // Release any underlying resources, including the interface object. + // + // WARNING: The destructor of this class is marked as protected to disallow + // clients from directly destroying this object since it may manage it's own + // lifetime through ref counting. Thus this must be allocated on the heap and + // clients MUST call Release() in order to destroy an instance of this class. + virtual void Release() = 0; + // For LLVM style RTTI. static bool classof(const AbstractTensorHandle* ptr) { return ptr->getKind() == kEager || ptr->getKind() == kTfrt; diff --git a/tensorflow/compiler/mlir/tensorflow/c/c_api_unified_experimental_mlir.cc b/tensorflow/compiler/mlir/tensorflow/c/c_api_unified_experimental_mlir.cc index ffd9c149d2d..51890c1e9ee 100644 --- a/tensorflow/compiler/mlir/tensorflow/c/c_api_unified_experimental_mlir.cc +++ b/tensorflow/compiler/mlir/tensorflow/c/c_api_unified_experimental_mlir.cc @@ -102,8 +102,6 @@ class MlirTensor : public TracingTensorHandle { return type; } - void Release() override { delete this; } - Value getValue() { return value_; } // For LLVM style RTTI. diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.h b/tensorflow/core/common_runtime/eager/tensor_handle.h index 007ba33f231..99f88fe886a 100644 --- a/tensorflow/core/common_runtime/eager/tensor_handle.h +++ b/tensorflow/core/common_runtime/eager/tensor_handle.h @@ -53,8 +53,7 @@ class EagerContext; // Associates a Tensor and a Device, used in the eager runtime. Internal version // of the TFE_TensorHandle struct and the python EagerTensor class // (unrelated to python TensorHandle). -class TensorHandle : public ImmediateExecutionTensorHandle, - public core::RefCounted { +class TensorHandle : public ImmediateExecutionTensorHandle { // TensorHandle for dtype != DT_RESOURCE TensorHandle(tensorflow::Tensor&& t, Device* d, Device* op_device, Device* resource_device, EagerContext* ctx); From d75d7d19afad259ca26b114c5dd1549b59ebb5d0 Mon Sep 17 00:00:00 2001 From: Anna R Date: Wed, 22 Jul 2020 16:18:02 -0700 Subject: [PATCH 1098/2522] Support integer input and output type for Quantize-Aware Trained models PiperOrigin-RevId: 322677886 Change-Id: Ife33cb21cf928905007c48b301c967cdecdb2866 --- RELEASE.md | 4 +- tensorflow/lite/python/BUILD | 15 -- tensorflow/lite/python/lite.py | 29 +-- tensorflow/lite/python/lite_v2_test.py | 47 +++-- tensorflow/lite/python/util.py | 265 ------------------------- tensorflow/lite/python/util_test.py | 163 --------------- 6 files changed, 32 insertions(+), 491 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 15512c8d4e5..243807b705b 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -58,8 +58,6 @@ * `tf.lite`: * Better support for ops with high-dimensional broadcasting inputs by adding `BroadcastTo` ops when necessary. - * `TFLiteConverter`: - * Support optional flags `inference_input_type` and `inference_output_type` for full integer quantized models. This allows users to modify the model input and output type to integer types (tf.int8, tf.uint8) instead of defaulting to float type (tf.float32). * `tf.random`: * * Math and Linear Algebra: @@ -72,7 +70,7 @@ * * Other: * We have replaced uses of "whitelist" and "blacklist" with "allowlist" - and "denylist" where possible. Please see + and "denylist" where possible. Please see https://developers.google.com/style/word-list#blacklist for more context. * diff --git a/tensorflow/lite/python/BUILD b/tensorflow/lite/python/BUILD index 55a2a69675d..e26000c810a 100644 --- a/tensorflow/lite/python/BUILD +++ b/tensorflow/lite/python/BUILD @@ -212,11 +212,8 @@ py_library( deps = [ ":lite_constants", ":op_hint", - ":schema_py", "//tensorflow/python:tf_optimizer", "//tensorflow/python/eager:wrap_function", - "@absl_py//absl/logging", - "@flatbuffers//:runtime_py", "@six_archive//:six", ], ) @@ -227,24 +224,12 @@ py_test( python_version = "PY3", srcs_version = "PY2AND3", tags = [ - "no_mac", "no_windows", ], deps = [ - ":lite_constants", ":util", - "//tensorflow:tensorflow_py", - "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", - "//tensorflow/python:control_flow_ops", - "//tensorflow/python:convert_to_constants", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", "//tensorflow/python:framework_test_lib", - "//tensorflow/python:math_ops", - "//tensorflow/python:session", - "//third_party/py/numpy", - "@absl_py//absl/testing:parameterized", "@six_archive//:six", ], ) diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index a08b40bbed6..e919aa4b00f 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -61,7 +61,6 @@ from tensorflow.lite.python.util import get_grappler_config as _get_grappler_con from tensorflow.lite.python.util import get_tensor_name as _get_tensor_name from tensorflow.lite.python.util import get_tensors_from_tensor_names as _get_tensors_from_tensor_names from tensorflow.lite.python.util import is_frozen_graph as _is_frozen_graph -from tensorflow.lite.python.util import modify_integer_quantized_model_io_type as _modify_integer_quantized_model_io_type from tensorflow.lite.python.util import run_graph_optimizations as _run_graph_optimizations from tensorflow.lite.python.util import set_tensor_shapes as _set_tensor_shapes from tensorflow.python import keras as _keras @@ -315,23 +314,6 @@ class QuantizationMode(object): else: return False, None - def flags_modify_model_io_type( - self, input_type=constants.FLOAT, output_type=constants.FLOAT): - """Flags for modifying the input and output type of a tflite model.""" - is_post_training_quantize = self.quantizer_flags(input_type, output_type)[0] - is_training_time_only_quantize = self.training_time_int8_allow_float() and \ - not is_post_training_quantize - - # TODO(b/153576658): Consolidate post/during training quantization workflows - # to modify model input/output type after MLIR conversion. - if is_training_time_only_quantize: - return { - "inference_input_type": input_type, - "inference_output_type": output_type, - } - else: - return None - # Below are helpers for the above functions. def _validate_int8_required(self): @@ -575,8 +557,9 @@ class TFLiteConverterBaseV2(TFLiteConverterBase): def _validate_inference_input_output_types(self, quant_mode): """Validate inference_input_type and inference_output_type flags.""" default_types = [constants.FLOAT, None] - # We support integer input/output for integer quantized models only. - if quant_mode.training_time_int8_allow_float(): + # We only support integer types for post training integer quantization + # as we have statistical information to quantize the input and output. + if quant_mode.is_post_training_integer_quantize(): all_types = default_types + [constants.INT8, constants.QUANTIZED_UINT8] if self.inference_input_type not in all_types or \ self.inference_output_type not in all_types: @@ -660,12 +643,6 @@ class TFLiteConverterBaseV2(TFLiteConverterBase): if calibrate_and_quantize: result = self._calibrate_quantize_model(result, **flags) - flags_modify_model_io_type = quant_mode.flags_modify_model_io_type( - self.inference_input_type, self.inference_output_type) - if flags_modify_model_io_type: - result = _modify_integer_quantized_model_io_type( - result, **flags_modify_model_io_type) - if self._experimental_sparsify_model: result = _mlir_sparsify(result) diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py index 4093a9d5bb4..6fab4fd6086 100644 --- a/tensorflow/lite/python/lite_v2_test.py +++ b/tensorflow/lite/python/lite_v2_test.py @@ -374,12 +374,8 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): return tf.keras.Sequential(QLinear(3, input_shape=(2,))) - @parameterized.named_parameters( - ('_DefaultFLOAT32InputOutput', lite.constants.FLOAT), - ('_INT8InputOutput', lite.constants.INT8), - ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8)) @test_util.run_v2_only - def testTrainingTimeQuantization(self, inference_input_output_type): + def testTrainingTimeQuantization(self): model = self._getTrainingTimeQuantizedModel() float_converter = lite.TFLiteConverterV2.from_keras_model(model) @@ -388,25 +384,38 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): quantized_converter = lite.TFLiteConverterV2.from_keras_model(model) quantized_converter.optimizations = [lite.Optimize.DEFAULT] - quantized_converter.inference_input_type = inference_input_output_type - quantized_converter.inference_output_type = inference_input_output_type quantized_tflite = quantized_converter.convert() self.assertTrue(quantized_tflite) - interpreter = Interpreter(model_content=quantized_tflite) - interpreter.allocate_tensors() - input_details = interpreter.get_input_details() - self.assertLen(input_details, 1) - self.assertEqual(inference_input_output_type.as_numpy_dtype, - input_details[0]['dtype']) - output_details = interpreter.get_output_details() - self.assertLen(output_details, 1) - self.assertEqual(inference_input_output_type.as_numpy_dtype, - output_details[0]['dtype']) - - # Ensure that the quantized tflite model is smaller. + # Ensure that the quantized weights tflite model is smaller. self.assertLess(len(quantized_tflite), len(float_tflite)) + interpreter = Interpreter(model_content=quantized_tflite) + self.assertEqual(np.float32, interpreter.get_input_details()[0]['dtype']) + + @parameterized.named_parameters( + ('_INT8InputOutput', lite.constants.INT8), + ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8)) + def testInvalidTrainingTimeQuantization(self, inference_input_output_type): + # We currently don't support integer inference_input_type and + # inference_output_type flags for training time quantization. + + model = self._getTrainingTimeQuantizedModel() + + converter = lite.TFLiteConverterV2.from_keras_model(model) + tflite_model = converter.convert() + self.assertTrue(tflite_model) + + quantized_converter = lite.TFLiteConverterV2.from_keras_model(model) + quantized_converter.optimizations = [lite.Optimize.DEFAULT] + with self.assertRaises(ValueError) as error: + quantized_converter.inference_input_type = inference_input_output_type + quantized_converter.inference_output_type = inference_input_output_type + quantized_converter.convert() + self.assertEqual( + 'The inference_input_type and inference_output_type ' + 'must be tf.float32.', str(error.exception)) + @test_util.run_v2_only def testNewQuantizer(self): """Test the model quantized by the new converter.""" diff --git a/tensorflow/lite/python/util.py b/tensorflow/lite/python/util.py index 9f84681c12b..ff7caad0f88 100644 --- a/tensorflow/lite/python/util.py +++ b/tensorflow/lite/python/util.py @@ -19,21 +19,15 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import copy import datetime import sys -from absl import logging - import six from six.moves import range -from flatbuffers.python import flatbuffers from tensorflow.core.protobuf import config_pb2 as _config_pb2 from tensorflow.core.protobuf import graph_debug_info_pb2 from tensorflow.core.protobuf import meta_graph_pb2 as _meta_graph_pb2 -from tensorflow.lite.python import lite_constants as _lite_constants -from tensorflow.lite.python import schema_py_generated as _schema_fb from tensorflow.lite.python.op_hint import convert_op_hints_to_stubs from tensorflow.lite.python.op_hint import find_all_hinted_output_nodes from tensorflow.lite.toco import types_pb2 as _types_pb2 @@ -61,25 +55,6 @@ _MAP_TF_TO_TFLITE_TYPES = { dtypes.bool: _types_pb2.BOOL, } -_MAP_TFLITE_ENUM_TO_TF_TYPES = { - 0: dtypes.float32, - 1: dtypes.float16, - 2: dtypes.int32, - 3: dtypes.uint8, - 4: dtypes.int64, - 5: dtypes.string, - 6: dtypes.bool, - 7: dtypes.int16, - 8: dtypes.complex64, - 9: dtypes.int8, - 10: dtypes.float64, -} - -_TFLITE_FILE_IDENTIFIER = b"TFL3" - -_TFLITE_MODEL_INPUT_OUTPUT_TYPES = (_lite_constants.FLOAT, _lite_constants.INT8, - _lite_constants.QUANTIZED_UINT8) - def convert_dtype_to_tflite_type(tf_dtype): """Converts tf.dtype to TFLite proto type. @@ -99,31 +74,6 @@ def convert_dtype_to_tflite_type(tf_dtype): return result -def _convert_tflite_enum_type_to_tf_type(tflite_enum_type): - """Converts tflite enum type (eg: 0) to tf type (eg: tf.float32). - - Args: - tflite_enum_type: tflite enum type (eg: 0, that corresponds to float32) - - Raises: - ValueError: If an invalid tflite enum type is provided. - - Returns: - tf type (eg: tf.float32) - """ - tf_type = _MAP_TFLITE_ENUM_TO_TF_TYPES.get(tflite_enum_type) - if tf_type is None: - raise ValueError( - "Unsupported enum {}. The valid map of enum to tf.dtypes is : {}" - .format(tflite_enum_type, _MAP_TFLITE_ENUM_TO_TF_TYPES)) - return tf_type - - -def _get_dtype_name(tf_type): - """Converts tf.dtype (eg: tf.float32) to str (eg: "tf.float32").""" - return "tf." + tf_type.name - - def get_tensor_name(tensor): """Returns name of the input tensor. @@ -564,218 +514,3 @@ extern const int {array_name}_len; license_text=license_text) return source_text, header_text - - -def _convert_model_from_bytearray_to_object(model_bytearray): - """Converts a tflite model from a bytearray into a parsable object.""" - model_object = _schema_fb.Model.GetRootAsModel(model_bytearray, 0) - model_object = _schema_fb.ModelT.InitFromObj(model_object) - model_object = copy.deepcopy(model_object) - model_object.subgraphs[0].inputs[0] = model_object.subgraphs[0].inputs[0] - return model_object - - -def _convert_model_from_object_to_bytearray(model_object): - """Converts a tflite model from a parsable object into a bytearray.""" - # Initial size of the buffer, which will grow automatically if needed - builder = flatbuffers.Builder(1024) - model_offset = model_object.Pack(builder) - builder.Finish(model_offset, file_identifier=_TFLITE_FILE_IDENTIFIER) - return bytes(builder.Output()) - - -def _remove_tensors_from_model(model, remove_tensors_idxs): - """Remove tensors from model.""" - if not remove_tensors_idxs: - return - if len(model.subgraphs) > 1: - raise ValueError("Model must only have one subgraph. Instead, it has " - "{} subgraphs.".format(len(model.subgraphs))) - subgraph = model.subgraphs[0] - tensors = subgraph.tensors - operators = subgraph.operators - - logging.debug("Removing tensors at indices : %s", remove_tensors_idxs) - # An optimized check to validate if "remove_tensors_idxs" (eg: [4,5,6]) is an - # exact subset, with ordering, of "tensors" indices (eg: [0,1,2,3,4,5,6]). - if min(remove_tensors_idxs) == len(tensors) - len(remove_tensors_idxs): - logging.debug("Removing tensors only at the end of the tensor list") - del tensors[min(remove_tensors_idxs):] - else: - logging.debug("Removing tensors requires updating the model") - # Map the old tensor indices to new tensor indices - d_old_to_new_tensors = {} - left_shift_by = 0 - for idx in range(len(tensors)): - if idx in remove_tensors_idxs: - left_shift_by += 1 - else: - d_old_to_new_tensors[idx] = idx - left_shift_by - logging.debug("Old to new tensors map: %s", d_old_to_new_tensors.__str__()) - # Update tensor indices referenced throughout the model - def update_tensors(tensor_idxs): - for i, ti in enumerate(tensor_idxs): - tensor_idxs[i] = d_old_to_new_tensors.get(ti, -1) - update_tensors(subgraph.inputs) - update_tensors(subgraph.outputs) - for op in operators: - update_tensors(op.inputs) - update_tensors(op.outputs) - # Delete the tensors - for idx in sorted(remove_tensors_idxs, reverse=True): - tensors.pop(idx) - logging.debug("Removed tensors marked for deletion") - - -def _validate_and_find_int8_quantized_inputs_outputs(model): - """Validate that model input is quantized and output is dequantized.""" - if len(model.subgraphs) > 1: - raise ValueError("Model must only have one subgraph. Instead, it has " - "{} subgraphs.".format(len(model.subgraphs))) - subgraph = model.subgraphs[0] - tensors = subgraph.tensors - operators = subgraph.operators - - # Ensure model has atleast one quantize and dequantize operator - quant_opcode_idx, dequant_opcode_idx = None, None - for idx, opcode in enumerate(model.operatorCodes): - if opcode.builtinCode == _schema_fb.BuiltinOperator.QUANTIZE: - quant_opcode_idx = idx - elif opcode.builtinCode == _schema_fb.BuiltinOperator.DEQUANTIZE: - dequant_opcode_idx = idx - if quant_opcode_idx is not None and dequant_opcode_idx is not None: - break - if quant_opcode_idx is None and dequant_opcode_idx is None: - raise ValueError("Model is not integer quantized as it does not " - "contain quantize/dequantize operators.") - - # Ensure model inputs and outputs are integer quantized - input_quant_ops, output_dequant_ops = [], [] - for op in operators: - # Find input quantize operator - if op.opcodeIndex == quant_opcode_idx and op.inputs[0] in subgraph.inputs: - pos, float_tensor, int_tensor = \ - "input", tensors[op.inputs[0]], tensors[op.outputs[0]] - input_quant_ops.append(op) - # Find output dequantize operator - elif op.opcodeIndex == dequant_opcode_idx and \ - op.outputs[0] in subgraph.outputs: - pos, float_tensor, int_tensor = \ - "output", tensors[op.outputs[0]], tensors[op.inputs[0]] - output_dequant_ops.append(op) - # Otherwise, ignore - else: - continue - # If found, validate the input/output tensor type - if float_tensor.type != _schema_fb.TensorType.FLOAT32: - raise ValueError( - "Model {} type must be tf.float32. Expected type for tensor with " - "name '{}' is tf.float32, instead type is tf.{}".format( - pos, float_tensor.name, - _convert_tflite_enum_type_to_tf_type(float_tensor.type).name)) - if int_tensor.type != _schema_fb.TensorType.INT8: - raise ValueError( - "Model is not integer quantized. Expected type for tensor with " - "name '{}' is tf.int8, instead type is tf.{}".format( - int_tensor.name, - _convert_tflite_enum_type_to_tf_type(int_tensor.type).name)) - - return input_quant_ops, output_dequant_ops - - -def modify_integer_quantized_model_io_type( - model, inference_input_type=_lite_constants.FLOAT, - inference_output_type=_lite_constants.FLOAT): - """Modify the float input/output type of an integer quantized model. - - Args: - model: An int8 quantized tflite model with float input and output. - inference_input_type: tf.DType representing final input type. - (default tf.float32) - inference_output_type: tf.DType representing final output type. - (default tf.float32) - - Returns: - An int8 quantized tflite model with modified input and/or output type. - - Raises: - ValueError: If the model is not int8 quantized or the inference_input_type - and/or inference_input_type is unsupported. - RuntimeError: If the modification was unsuccessful. - - """ - # Return if input and output types default to float - if inference_input_type == _lite_constants.FLOAT and \ - inference_output_type == _lite_constants.FLOAT: - return model - - # Validate input and output types - if inference_input_type not in _TFLITE_MODEL_INPUT_OUTPUT_TYPES: - raise ValueError("The `inference_input_type` should be in {}".format( - tuple(_get_dtype_name(t) for t in _TFLITE_MODEL_INPUT_OUTPUT_TYPES))) - if inference_output_type not in _TFLITE_MODEL_INPUT_OUTPUT_TYPES: - raise ValueError("The `inference_output_type` should be in {}".format( - tuple(_get_dtype_name(t) for t in _TFLITE_MODEL_INPUT_OUTPUT_TYPES))) - - logging.debug(("Attempting to modify the model input from tf.float32 to %s " - "and output from tf.float32 to %s"), - _get_dtype_name(inference_input_type), - _get_dtype_name(inference_output_type)) - # Convert the model to an object - model = _convert_model_from_bytearray_to_object(model) - - # Validate the integer quantized model - input_quant_ops, output_dequant_ops = \ - _validate_and_find_int8_quantized_inputs_outputs(model) - - # Initialize references and variables - if len(model.subgraphs) > 1: - raise ValueError("Model must only have one subgraph. Instead, it has " - "{} subgraphs.".format(len(model.subgraphs))) - subgraph = model.subgraphs[0] - tensors = subgraph.tensors - operators = subgraph.operators - remove_tensors_idxs = set() - - # Modify model input type - if inference_input_type == _lite_constants.QUANTIZED_UINT8: - # Change quant op (float to int8) to quant op (uint8 to int8) - for op in input_quant_ops: - int8_quantization = tensors[op.outputs[0]].quantization - uint8_quantization = _schema_fb.QuantizationParametersT() - uint8_quantization.scale = [int8_quantization.scale[0]] - uint8_quantization.zeroPoint = [int8_quantization.zeroPoint[0] + 128] - tensors[op.inputs[0]].quantization = uint8_quantization - tensors[op.inputs[0]].type = _schema_fb.TensorType.UINT8 - elif inference_input_type == _lite_constants.INT8: - # Remove the inputs and the quant operator - for op in input_quant_ops: - subgraph.inputs[subgraph.inputs == op.inputs[0]] = op.outputs[0] - remove_tensors_idxs.add(op.inputs[0]) - operators.remove(op) - - # Modify model output type - if inference_output_type == _lite_constants.QUANTIZED_UINT8: - # Change dequant op (int8 to float) to quant op (int8 to uint8) - for op in output_dequant_ops: - op.opcodeIndex = input_quant_ops[0].opcodeIndex - int8_quantization = tensors[op.inputs[0]].quantization - uint8_quantization = _schema_fb.QuantizationParametersT() - uint8_quantization.scale = [int8_quantization.scale[0]] - uint8_quantization.zeroPoint = [int8_quantization.zeroPoint[0] + 128] - tensors[op.outputs[0]].quantization = uint8_quantization - tensors[op.outputs[0]].type = _schema_fb.TensorType.UINT8 - elif inference_output_type == _lite_constants.INT8: - # Remove the outputs and the dequant operator - for op in output_dequant_ops: - subgraph.outputs[subgraph.outputs == op.outputs[0]] = op.inputs[0] - remove_tensors_idxs.add(op.outputs[0]) - operators.remove(op) - - # Remove tensors marked for deletion. - _remove_tensors_from_model(model, remove_tensors_idxs) - - # Convert the model to a bytearray - model = _convert_model_from_object_to_bytearray(model) - - return model diff --git a/tensorflow/lite/python/util_test.py b/tensorflow/lite/python/util_test.py index 0e9cbc1e58a..f3c287dd7fc 100644 --- a/tensorflow/lite/python/util_test.py +++ b/tensorflow/lite/python/util_test.py @@ -19,10 +19,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from absl.testing import parameterized -import numpy as np from six.moves import range -import tensorflow as tf from tensorflow.lite.python import lite_constants from tensorflow.lite.python import util @@ -64,31 +61,6 @@ class UtilTest(test_util.TensorFlowTestCase): self.assertEqual( util.convert_dtype_to_tflite_type(dtypes.bool), _types_pb2.BOOL) - def testConvertEnumToDtype(self): - self.assertEqual( - util._convert_tflite_enum_type_to_tf_type(0), dtypes.float32) - self.assertEqual( - util._convert_tflite_enum_type_to_tf_type(1), dtypes.float16) - self.assertEqual(util._convert_tflite_enum_type_to_tf_type(2), dtypes.int32) - self.assertEqual(util._convert_tflite_enum_type_to_tf_type(3), dtypes.uint8) - self.assertEqual(util._convert_tflite_enum_type_to_tf_type(4), dtypes.int64) - self.assertEqual( - util._convert_tflite_enum_type_to_tf_type(5), dtypes.string) - self.assertEqual(util._convert_tflite_enum_type_to_tf_type(6), dtypes.bool) - self.assertEqual(util._convert_tflite_enum_type_to_tf_type(7), dtypes.int16) - self.assertEqual( - util._convert_tflite_enum_type_to_tf_type(8), dtypes.complex64) - self.assertEqual(util._convert_tflite_enum_type_to_tf_type(9), dtypes.int8) - self.assertEqual( - util._convert_tflite_enum_type_to_tf_type(10), dtypes.float64) - with self.assertRaises(ValueError) as error: - util._convert_tflite_enum_type_to_tf_type(11) - self.assertEqual( - "Unsupported enum 11. The valid map of enum to tf.dtypes is : " - "{0: tf.float32, 1: tf.float16, 2: tf.int32, 3: tf.uint8, 4: tf.int64, " - "5: tf.string, 6: tf.bool, 7: tf.int16, 8: tf.complex64, 9: tf.int8, " - "10: tf.float64}", str(error.exception)) - def testTensorName(self): with ops.Graph().as_default(): in_tensor = array_ops.placeholder(shape=[4], dtype=dtypes.float32) @@ -223,140 +195,5 @@ class TensorFunctionsTest(test_util.TensorFlowTestCase): self.assertEqual([None, 3, 5], tensor.shape.as_list()) -def _generate_integer_tflite_model(): - """Define an integer post-training quantized tflite model.""" - # Load MNIST dataset - n = 10 # Number of samples - (train_images, train_labels), (test_images, test_labels) = \ - tf.keras.datasets.mnist.load_data() - train_images, train_labels, test_images, test_labels = \ - train_images[:n], train_labels[:n], test_images[:n], test_labels[:n] - - # Normalize the input image so that each pixel value is between 0 to 1. - train_images = train_images / 255.0 - test_images = test_images / 255.0 - - # Define TF model - model = tf.keras.Sequential([ - tf.keras.layers.InputLayer(input_shape=(28, 28)), - tf.keras.layers.Reshape(target_shape=(28, 28, 1)), - tf.keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu"), - tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), - tf.keras.layers.Flatten(), - tf.keras.layers.Dense(10) - ]) - - # Train - model.compile( - optimizer="adam", - loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), - metrics=["accuracy"]) - - model.fit( - train_images, - train_labels, - epochs=1, - validation_split=0.1, - ) - - # Convert TF Model to an Integer Quantized TFLite Model - converter = tf.lite.TFLiteConverter.from_keras_model(model) - converter.optimizations = {tf.lite.Optimize.DEFAULT} - def representative_dataset_gen(): - for _ in range(2): - yield [ - np.random.uniform(low=0, high=1, size=(1, 28, 28)).astype( - np.float32) - ] - converter.representative_dataset = representative_dataset_gen - converter.target_spec.supported_ops = {tf.lite.OpsSet.TFLITE_BUILTINS_INT8} - tflite_model = converter.convert() - - return tflite_model - - -def _test_param_modify_integer_model_io_type(): - """Function to generate parameterized inputs for testing.""" - params = [] - str_template = "_{}{}{}" - map_model_type = { - "PostTraining": True, - # "DuringTraining": False, - } - map_types = { - "": lite_constants.FLOAT, - "INT8": lite_constants.INT8, - "UINT8": lite_constants.QUANTIZED_UINT8 - } - for k1, v1 in map_model_type.items(): - for k2, v2 in map_types.items(): - istr = "_Input{}".format(k2) if k2 else "" - for k3, v3 in map_types.items(): - ostr = "_Output{}".format(k3) if k3 else "" if istr else "_NoUpdate" - params.append((str_template.format(k1, istr, ostr), v1, v2, v3)) - return params - - -# TODO(b/161174063): Merge tests for integer input/output type -class UtilModifyIntegerQuantizedModelIOTypeTest( - test_util.TensorFlowTestCase, parameterized.TestCase): - - @classmethod - def setUpClass(cls): - super(UtilModifyIntegerQuantizedModelIOTypeTest, cls).setUpClass() - cls.post_train_integer_model = _generate_integer_tflite_model() - - @parameterized.named_parameters(_test_param_modify_integer_model_io_type()) - def test(self, is_post_train, in_tftype, out_tftype): - """Modify the float input/output type of an integer quantized model.""" - - def _run_tflite_inference(model, in_tftype, out_tftype): - """Run inference on a model with a specific input/output type.""" - # Load TFLite model and allocate tensors. - interpreter = tf.lite.Interpreter(model_content=model) - interpreter.allocate_tensors() - input_details = interpreter.get_input_details()[0] - output_details = interpreter.get_output_details()[0] - - # Validate TFLite model input and output types - self.assertEqual(input_details["dtype"], in_tftype.as_numpy_dtype) - self.assertEqual(output_details["dtype"], out_tftype.as_numpy_dtype) - - # Define Input - np.random.seed(0) - input_data = np.random.uniform(low=0, high=1, size=(1, 28, 28)) - input_data = input_data.astype(np.float32) - if input_details["dtype"] != np.float32: - # quantize float to int - scale, zero_point = input_details["quantization"] - input_data = input_data / scale + zero_point - input_data = input_data.astype(input_details["dtype"]) - - # Run Inference - interpreter.set_tensor(input_details["index"], input_data) - interpreter.invoke() - - # Get output - output_data = interpreter.get_tensor(output_details["index"])[0] - if output_details["dtype"] != np.float32: - # dequantize int to float - scale, zero_point = output_details["quantization"] - output_data = output_data.astype(np.float32) - output_data = (output_data - zero_point) * scale - - return output_data - - model = self.__class__.post_train_integer_model if is_post_train else None - # Run model inference with float input output type - output_data = _run_tflite_inference(model, tf.float32, tf.float32) - # Run model inference with modified integer input output type - model_io = util.modify_integer_quantized_model_io_type( - model, in_tftype, out_tftype) - output_io_data = _run_tflite_inference(model_io, in_tftype, out_tftype) - - # Validate that both the outputs are the same - self.assertTrue(np.allclose(output_data, output_io_data, atol=1.0)) - - if __name__ == "__main__": test.main() From d2ab722e3f123fa9abf0914ae8066752d212432b Mon Sep 17 00:00:00 2001 From: Rachel Lim Date: Wed, 22 Jul 2020 16:23:43 -0700 Subject: [PATCH 1099/2522] [tf.data] Add RebatchDatasetV2. Also updates RebatchDataset(V1) tests so that they're more easily readable. PiperOrigin-RevId: 322678990 Change-Id: Ibfd75d270e3951524fbbba9569061f55d3cfa39d --- .../base_api/api_def_RebatchDatasetV2.pbtxt | 22 + .../data/experimental/rebatch_dataset_op.cc | 328 +++++++++++++++ .../core/ops/experimental_dataset_ops.cc | 9 + .../kernel_tests/auto_shard_dataset_test.py | 6 +- .../kernel_tests/rebatch_dataset_test.py | 377 +++++++++++++----- .../rebatch_dataset_serialization_test.py | 6 +- .../data/experimental/ops/distribute.py | 165 +++++++- tensorflow/python/distribute/input_lib.py | 2 +- .../api/golden/v1/tensorflow.raw_ops.pbtxt | 4 + .../api/golden/v2/tensorflow.raw_ops.pbtxt | 4 + 10 files changed, 817 insertions(+), 106 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_RebatchDatasetV2.pbtxt diff --git a/tensorflow/core/api_def/base_api/api_def_RebatchDatasetV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_RebatchDatasetV2.pbtxt new file mode 100644 index 00000000000..3abdff980f3 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_RebatchDatasetV2.pbtxt @@ -0,0 +1,22 @@ +op { + graph_op_name: "RebatchDatasetV2" + visibility: HIDDEN + in_arg { + name: "input_dataset" + description: < output_shapes_; }; +// This dataset rebatches its input batches into batches of different size(s). +// +// This differs from RebatchDatasetOp. Namely, RebatchDatasetV2 rebatches +// incoming batches into batches whose new sizes are specified by the +// `batch_sizes` argument, while RebatchDataset splits its batches based +// on the (dynamic) input batch size and the given number of splits to make (its +// `num_replicas` argument). When used in tf.distribute, this allows +// RebatchDataset to split batches more correctly when the splits are +// distributed across multiple workers and replicas. +class RebatchDatasetV2Op : public UnaryDatasetOpKernel { + public: + explicit RebatchDatasetV2Op(OpKernelConstruction* ctx) + : UnaryDatasetOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_)); + } + + protected: + void MakeDataset(OpKernelContext* ctx, DatasetBase* input, + DatasetBase** output) override { + const Tensor* batch_sizes_tensor; + OP_REQUIRES_OK(ctx, ctx->input("batch_sizes", &batch_sizes_tensor)); + OP_REQUIRES( + ctx, batch_sizes_tensor->dims() <= 1, + errors::InvalidArgument("`batch_sizes` must be a scalar or a vector.")); + + std::vector batch_sizes; + batch_sizes.reserve(batch_sizes_tensor->NumElements()); + for (int i = 0; i < batch_sizes_tensor->NumElements(); ++i) { + batch_sizes.push_back(batch_sizes_tensor->flat()(i)); + } + + bool drop_remainder; + OP_REQUIRES_OK( + ctx, ParseScalarArgument(ctx, "drop_remainder", &drop_remainder)); + + *output = new Dataset(ctx, input, std::move(batch_sizes), drop_remainder, + output_types_, output_shapes_); + } + + private: + class Dataset : public DatasetBase { + public: + Dataset(OpKernelContext* ctx, const DatasetBase* input, + std::vector&& batch_sizes, bool drop_remainder, + const DataTypeVector& output_types, + const std::vector& output_shapes) + : DatasetBase(DatasetContext(ctx)), + input_(input), + batch_sizes_(std::move(batch_sizes)), + drop_remainder_(drop_remainder), + output_types_(output_types), + output_shapes_(output_shapes), + traceme_metadata_( + {{"batch_sizes", absl::StrJoin(batch_sizes, ",")}}) { + input_->Ref(); + } + + ~Dataset() override { input_->Unref(); } + + std::unique_ptr MakeIteratorInternal( + const string& prefix) const override { + name_utils::IteratorPrefixParams params; + return absl::make_unique(Iterator::Params{ + this, name_utils::IteratorPrefix(kDatasetType, prefix, params)}); + } + + const DataTypeVector& output_dtypes() const override { + return output_types_; + } + + const std::vector& output_shapes() const override { + return output_shapes_; + } + + string DebugString() const override { + return name_utils::DatasetDebugString(kDatasetType); + } + + Status CheckExternalState() const override { + return input_->CheckExternalState(); + } + + protected: + Status AsGraphDefInternal(SerializationContext* ctx, + DatasetGraphDefBuilder* b, + Node** output) const override { + Node* input_graph_node = nullptr; + TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node)); + Node* batch_sizes = nullptr; + TF_RETURN_IF_ERROR(b->AddVector(batch_sizes_, &batch_sizes)); + Node* drop_remainder = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(drop_remainder_, &drop_remainder)); + TF_RETURN_IF_ERROR(b->AddDataset( + this, {input_graph_node, batch_sizes, drop_remainder}, output)); + return Status::OK(); + } + + private: + class Iterator : public DatasetIterator { + public: + explicit Iterator(const Params& params) + : DatasetIterator(params) {} + + ~Iterator() override {} + + Status Initialize(IteratorContext* ctx) override { + return dataset()->input_->MakeIterator(ctx, this, prefix(), + &input_impl_); + } + + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + mutex_lock l(mu_); + if (end_of_sequence_) { + *end_of_sequence = true; + return Status::OK(); + } + + *end_of_sequence = false; + + auto desired_batch_size = dataset()->batch_sizes_[batch_sizes_index_]; + // Tracks the size of the current batch as it's built up, possibly from + // different input tensors. + int64 batch_size = 0; + + std::vector> slices_to_concatenate; + // Get slices from input tensors until they make up the whole batch + // size or we run out of input. + while (batch_size < desired_batch_size) { + if (offset_ == -1) { + // Get new input tensors. + tensors_.clear(); + TF_RETURN_IF_ERROR( + input_impl_->GetNext(ctx, &tensors_, &end_of_sequence_)); + if (end_of_sequence_) { + // Break and return partial batch, if any. + break; + } + TF_RETURN_IF_ERROR(ValidateInputTensors()); + offset_ = 0; + } + + int64 slice_end = std::min(offset_ + desired_batch_size - batch_size, + tensors_[0].dim_size(0)); + + std::vector slices; + slices.reserve(tensors_.size()); + for (const auto& tensor : tensors_) { + Tensor slice = tensor.Slice(offset_, slice_end); + slices.push_back(tensor.Slice(offset_, slice_end)); + } + slices_to_concatenate.push_back(std::move(slices)); + + batch_size += (slice_end - offset_); + offset_ = slice_end; + if (offset_ == tensors_[0].dim_size(0)) { + // Exhausted current input tensors, reset. + offset_ = -1; + } + } + + batch_sizes_index_++; + batch_sizes_index_ %= dataset()->batch_sizes_.size(); + + // Return end_of_sequence if GetNext is expected to produce a non-empty + // batch and there are no more inputs, or if drop_remainder is true and + // we can't make a full batch. + if ((batch_size == 0 && desired_batch_size > 0) || + (dataset()->drop_remainder_ && batch_size < desired_batch_size)) { + DCHECK(end_of_sequence_); + *end_of_sequence = true; + return Status::OK(); + } + + const size_t num_components = dataset()->output_dtypes().size(); + out_tensors->reserve(num_components); + + // Special case: desired batch size == 0. This may be the case when, + // with distribution strategies, one of replicas expects an empty batch + // so that the global batch size adds up correctly. + if (desired_batch_size == 0) { + DCHECK_EQ(batch_size, 0); + DCHECK_EQ(slices_to_concatenate.size(), 0); + for (const auto& dtype : dataset()->output_dtypes()) { + out_tensors->push_back(Tensor(dtype)); + } + return Status::OK(); + } + + // Special case: when there's only one slice, we return the slice + // directly where possible instead of copying the tensor data. + if (slices_to_concatenate.size() == 1) { + auto tensors = std::move(slices_to_concatenate[0]); + for (size_t i = 0; i < num_components; ++i) { + // If the slice is aligned, we return it directly. + if (!tensors[i].IsAligned()) { + tensors[i] = tensor::DeepCopy(std::move(tensors[i])); + } + } + *out_tensors = std::move(tensors); + return Status::OK(); + } + + // For each component, concatenate slices into one tensor. + for (size_t i = 0; i < num_components; ++i) { + TensorShape component_shape({batch_size}); + TensorShape remaining_shape = slices_to_concatenate[0][i].shape(); + remaining_shape.RemoveDim(0); + component_shape.AppendShape(remaining_shape); + out_tensors->emplace_back(ctx->allocator({}), + dataset()->output_dtypes()[i], + component_shape); + if (!out_tensors->back().IsInitialized()) { + return errors::ResourceExhausted( + "Failed to allocate memory for the batch of component ", i); + } + int64 dst_offset = 0; + for (size_t j = 0; j < slices_to_concatenate.size(); ++j) { + auto num_slices = slices_to_concatenate[j][i].shape().dim_size(0); + TF_RETURN_IF_ERROR(batch_util::CopyContiguousSlices( + slices_to_concatenate[j][i], 0, dst_offset, num_slices, + &(*out_tensors)[i])); + dst_offset += num_slices; + } + } + + return Status::OK(); + } + + protected: + Status SaveInternal(SerializationContext* ctx, + IteratorStateWriter* writer) override { + mutex_lock l(mu_); + if (!input_impl_) { + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("input_impl_empty"), "")); + } else { + TF_RETURN_IF_ERROR(SaveInput(ctx, writer, input_impl_)); + } + TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("batch_sizes_index"), + batch_sizes_index_)); + TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("offset"), offset_)); + if (batch_sizes_index_ != 0) { + for (int i = 0; i < tensors_.size(); ++i) { + TF_RETURN_IF_ERROR(writer->WriteTensor( + full_name(strings::StrCat("tensors[", i, "]")), tensors_[i])); + } + } + return Status::OK(); + } + + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + if (!reader->Contains(full_name("input_impl_empty"))) { + TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, input_impl_)); + } else { + input_impl_.reset(); + } + TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("batch_sizes_index"), + &batch_sizes_index_)); + TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("offset"), &offset_)); + + tensors_.clear(); + if (batch_sizes_index_ > 0) { + tensors_.resize(dataset()->output_dtypes().size()); + for (int i = 0; i < tensors_.size(); ++i) { + TF_RETURN_IF_ERROR(reader->ReadTensor( + full_name(strings::StrCat("tensors[", i, "]")), &tensors_[i])); + } + } + return Status::OK(); + } + + TraceMeMetadata GetTraceMeMetadata() const override { + return dataset()->traceme_metadata_; + } + + private: + Status ValidateInputTensors() TF_EXCLUSIVE_LOCKS_REQUIRED(mu_) { + for (size_t i = 0; i < tensors_.size(); ++i) { + if (tensors_[i].dims() == 0) { + return errors::InvalidArgument( + "Input element must have a non-scalar value in each " + "component."); + } + if (tensors_[i].dim_size(0) != tensors_[0].dim_size(0)) { + return errors::InvalidArgument( + "Input element must have the same batch size in each " + "component. Component 0 had size ", + tensors_[0].dim_size(0), " but component ", i, " had size, ", + tensors_[i].dim_size(0), "."); + } + } + return Status::OK(); + } + + mutex mu_; + std::unique_ptr input_impl_; + // Whether we have reached the end of the input. + bool end_of_sequence_ TF_GUARDED_BY(mu_) = false; + // Represents the current input tensor(s). + std::vector tensors_ TF_GUARDED_BY(mu_); + // Represents the offset into the current input tensor(s). + // An offset of -1 indicates that there is no data left in the current + // slice. + int64 offset_ TF_GUARDED_BY(mu_) = -1; + // Represents the current index into the batch_sizes list. + int64 batch_sizes_index_ TF_GUARDED_BY(mu_) = 0; + }; + + const DatasetBase* const input_; + const std::vector batch_sizes_; + const bool drop_remainder_; + const DataTypeVector output_types_; + const std::vector output_shapes_; + const TraceMeMetadata traceme_metadata_; + }; + + DataTypeVector output_types_; + std::vector output_shapes_; +}; + REGISTER_KERNEL_BUILDER(Name("RebatchDataset").Device(DEVICE_CPU), RebatchDatasetOp); REGISTER_KERNEL_BUILDER(Name("ExperimentalRebatchDataset").Device(DEVICE_CPU), RebatchDatasetOp); +REGISTER_KERNEL_BUILDER(Name("RebatchDatasetV2").Device(DEVICE_CPU), + RebatchDatasetV2Op); + } // anonymous namespace } // namespace experimental } // namespace data diff --git a/tensorflow/core/ops/experimental_dataset_ops.cc b/tensorflow/core/ops/experimental_dataset_ops.cc index 2d4b2f43746..dd75f99bb70 100644 --- a/tensorflow/core/ops/experimental_dataset_ops.cc +++ b/tensorflow/core/ops/experimental_dataset_ops.cc @@ -792,6 +792,15 @@ REGISTER_OP("RebatchDataset") .Attr("use_fallback: bool = true") .SetShapeFn(shape_inference::ScalarShape); +REGISTER_OP("RebatchDatasetV2") + .Input("input_dataset: variant") + .Input("batch_sizes: int64") + .Input("drop_remainder: bool") + .Output("handle: variant") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape); + REGISTER_OP("SamplingDataset") .Input("input_dataset: variant") .Input("rate: float32") diff --git a/tensorflow/python/data/experimental/kernel_tests/auto_shard_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/auto_shard_dataset_test.py index c2aa406d260..eced1fdea18 100644 --- a/tensorflow/python/data/experimental/kernel_tests/auto_shard_dataset_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/auto_shard_dataset_test.py @@ -394,14 +394,14 @@ class AutoShardDatasetTest(reader_dataset_ops_test_base.TFRecordDatasetTestBase, self.evaluate(self.getNext(dataset)()) @combinations.generate(test_base.default_test_combinations()) - def testShardWithRebatch(self): - # Tests that Rebatch is a passthrough op. + def testShardWithLegacyRebatch(self): + # Tests that RebatchDatasetV1 is a passthrough op. dataset = dataset_ops.Dataset.list_files(self.test_filenames, shuffle=False) dataset = dataset.apply( testing.assert_next(["Shard", "FlatMap", "Batch", "Rebatch"])) dataset = dataset.flat_map(core_readers.TFRecordDataset) dataset = dataset.batch(5) - dataset = distribute._RebatchDataset(dataset, num_replicas=1) + dataset = distribute._LegacyRebatchDataset(dataset, num_replicas=1) dataset = distribute._AutoShardDataset(dataset, 5, 3) nxt = self.getNext(dataset) self.evaluate(nxt()) diff --git a/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py index c9d0d14dead..ce91ae2ea46 100644 --- a/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py @@ -33,22 +33,233 @@ from tensorflow.python.platform import test def _flat_shapes(dataset): - return nest.flatten(dataset_ops.get_legacy_output_shapes(dataset)) + return [ + ts.as_list() + for ts in nest.flatten(dataset_ops.get_legacy_output_shapes(dataset)) + ] class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): + ############################################################################## + # The following tests exercise our static computation of output_shapes. + ############################################################################## + + @combinations.generate(test_base.default_test_combinations()) + def testShapeInferenceNotAllBatchSizesEqual(self): + dataset = dataset_ops.Dataset.range(8).batch(4, drop_remainder=True) + rebatched_dataset = distribute._RebatchDataset( + dataset, batch_sizes=[2, 1, 1]) + expected_shapes = [[None]] + self.assertEqual(expected_shapes, _flat_shapes(rebatched_dataset)) + + @combinations.generate( + combinations.times(test_base.default_test_combinations(), + combinations.combine(drop_remainder=[True, False]))) + def testShapeInferenceInputBatchDimDivisible(self, drop_remainder): + dataset = dataset_ops.Dataset.range(8).batch(4, drop_remainder=True) + rebatched_dataset = distribute._RebatchDataset( + dataset, batch_sizes=[2, 2], drop_remainder=drop_remainder) + expected_shapes = [[2]] + self.assertEqual(expected_shapes, _flat_shapes(rebatched_dataset)) + + @combinations.generate( + combinations.times(test_base.default_test_combinations())) + def testShapeInferenceInputBatchDimUnknown(self): + dataset = dataset_ops.Dataset.range(8).batch(4, drop_remainder=False) + rebatched_dataset = distribute._RebatchDataset( + dataset, batch_sizes=[2, 2], drop_remainder=False) + expected_shapes = [[None]] + self.assertEqual(expected_shapes, _flat_shapes(rebatched_dataset)) + + @combinations.generate( + combinations.times(test_base.default_test_combinations())) + def testShapeInferenceInputBatchDimUnknownWithDropRemainder(self): + dataset = dataset_ops.Dataset.range(8).batch(4, drop_remainder=False) + rebatched_dataset = distribute._RebatchDataset( + dataset, batch_sizes=[2, 2], drop_remainder=True) + expected_shapes = [[2]] + self.assertEqual(expected_shapes, _flat_shapes(rebatched_dataset)) + + @combinations.generate( + combinations.times(test_base.default_test_combinations())) + def testShapeInferenceInputBatchDimIndivisible(self): + dataset = dataset_ops.Dataset.range(10).batch(5, drop_remainder=True) + rebatched_dataset = distribute._RebatchDataset( + dataset, batch_sizes=[2, 2], drop_remainder=False) + expected_shapes = [[None]] + self.assertEqual(expected_shapes, _flat_shapes(rebatched_dataset)) + + @combinations.generate( + combinations.times(test_base.default_test_combinations())) + def testShapeInferenceInputBatchDimIndivisibleWithDropRemainder(self): + dataset = dataset_ops.Dataset.range(10).batch(5, drop_remainder=True) + rebatched_dataset = distribute._RebatchDataset( + dataset, batch_sizes=[2, 2], drop_remainder=True) + expected_shapes = [[2]] + self.assertEqual(expected_shapes, _flat_shapes(rebatched_dataset)) + + ############################################################################## + # The following tests check _RebatchDataset's output. + ############################################################################## + @combinations.generate( + combinations.times(test_base.default_test_combinations(), + combinations.combine(drop_remainder=[True, False]))) + def testBasic(self, drop_remainder): + dataset = dataset_ops.Dataset.range(8).batch(4, drop_remainder=True) + rebatched_dataset = distribute._RebatchDataset( + dataset, batch_sizes=[2, 2], drop_remainder=drop_remainder) + + expected_shapes = [[2]] + self.assertEqual(expected_shapes, _flat_shapes(rebatched_dataset)) + + expected_output = [[0, 1], [2, 3], [4, 5], [6, 7]] + self.assertDatasetProduces(rebatched_dataset, expected_output) + + @combinations.generate( + combinations.times(test_base.default_test_combinations())) + def testPartialBatch(self): + dataset = dataset_ops.Dataset.range(5).batch(4, drop_remainder=False) + rebatched_dataset = distribute._RebatchDataset( + dataset, batch_sizes=[2, 2], drop_remainder=False) + + expected_shapes = [[None]] + self.assertEqual(expected_shapes, _flat_shapes(rebatched_dataset)) + expected_output = [[0, 1], [2, 3], [4]] + self.assertDatasetProduces(rebatched_dataset, expected_output) + + @combinations.generate( + combinations.times(test_base.default_test_combinations())) + def testPartialBatchWithDropRemainder(self): + dataset = dataset_ops.Dataset.range(5).batch(4, drop_remainder=False) + rebatched_dataset = distribute._RebatchDataset( + dataset, batch_sizes=[2, 2], drop_remainder=True) + + expected_shapes = [[2]] + self.assertEqual(expected_shapes, _flat_shapes(rebatched_dataset)) + expected_output = [[0, 1], [2, 3]] + self.assertDatasetProduces(rebatched_dataset, expected_output) + + @combinations.generate( + combinations.times(test_base.default_test_combinations(), + combinations.combine(drop_remainder=[True, False]))) + def testBatchSizeGreaterThanOriginal(self, drop_remainder): + dataset = dataset_ops.Dataset.range(12).batch( + 4, drop_remainder=False) + rebatched_dataset = distribute._RebatchDataset( + dataset, batch_sizes=[6], drop_remainder=drop_remainder) + + expected_output = [[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11]] + self.assertDatasetProduces(rebatched_dataset, expected_output) + + @combinations.generate( + combinations.times(test_base.default_test_combinations(), + combinations.combine(drop_remainder=[True, False]))) + def testEmptySplits(self, drop_remainder): + # It's possible for splits to be empty if the batch size is smaller than + # the number of replicas. Here, we use an example with batch_size == 4 + # and num_replicas == 5. + dataset = dataset_ops.Dataset.range(8).batch(4, drop_remainder=True) + rebatched_dataset = distribute._RebatchDataset( + dataset, batch_sizes=[1, 1, 1, 1, 0], drop_remainder=drop_remainder) + + expected_shapes = [[None]] + self.assertEqual(expected_shapes, _flat_shapes(rebatched_dataset)) + + expected_output = [[0], [1], [2], [3], [], [4], [5], [6], [7], []] + self.assertDatasetProduces(rebatched_dataset, expected_output) + + @combinations.generate( + combinations.times(test_base.default_test_combinations(), + combinations.combine(drop_remainder=[True, False]))) + def testScalarBatchSizeInput(self, drop_remainder): + dataset = dataset_ops.Dataset.range(8).batch( + 4, drop_remainder=True) + rebatched_dataset = distribute._RebatchDataset( + dataset, batch_sizes=2, drop_remainder=drop_remainder) + + expected_shapes = [[2]] + self.assertEqual(expected_shapes, _flat_shapes(rebatched_dataset)) + + expected_output = [[0, 1], [2, 3], [4, 5], [6, 7]] + self.assertDatasetProduces(rebatched_dataset, expected_output) + + @combinations.generate(test_base.default_test_combinations()) + def testMultipleBatches(self): + dataset = dataset_ops.Dataset.range(16).batch( + 2, drop_remainder=True).batch( + 4, drop_remainder=True) + self.assertEqual([[4, 2]], _flat_shapes(dataset)) + + rebatched_dataset = distribute._RebatchDataset(dataset, [2, 2]) + self.assertEqual([[2, 2]], _flat_shapes(rebatched_dataset)) + # Each element is a list of 2 elements where each element is a list of 2. + expected_output = [[[0, 1], [2, 3]], [[4, 5], [6, 7]], [[8, 9], [10, 11]], + [[12, 13], [14, 15]]] + self.assertDatasetProduces(rebatched_dataset, expected_output) + + @combinations.generate(test_base.default_test_combinations()) + def testNestedDictionaryOutput(self): + dataset = dataset_ops.Dataset.range(8).map( + lambda x: {"a": x, "b": {"c": x + 1}}).batch(4, drop_remainder=True) + rebatched_dataset = distribute._RebatchDataset(dataset, [2, 2]) + self.assertEqual([[2], [2]], _flat_shapes(rebatched_dataset)) + + expected_output = [{"a": [0, 1], "b": {"c": [1, 2]}}, + {"a": [2, 3], "b": {"c": [3, 4]}}, + {"a": [4, 5], "b": {"c": [5, 6]}}, + {"a": [6, 7], "b": {"c": [7, 8]}}] + self.assertDatasetProduces(rebatched_dataset, expected_output) + + @combinations.generate( + combinations.times(test_base.default_test_combinations(), + combinations.combine(drop_remainder=[True, False]))) + def testRaggedDataset(self, drop_remainder): + # Set up a dataset that produces ragged tensors with a static batch size. + dataset = dataset_ops.Dataset.from_tensor_slices( + ragged_tensor.RaggedTensor.from_row_lengths( + list(range(10)), [1, 2, 3, 4])) + # The map changes the internal representation of the ragged tensor. + # This test will fail if we don't normalize the tensor representation. + dataset = dataset.batch(4, drop_remainder=True).map(lambda x: x) + + rebatched_dataset = distribute._RebatchDataset( + dataset, batch_sizes=[2, 2]) + + expected_output = [ + ragged_tensor.RaggedTensor.from_row_lengths(list(range(3)), [1, 2]), + ragged_tensor.RaggedTensor.from_row_lengths(list(range(3, 10)), + [3, 4]), + ] + self.assertDatasetProduces(rebatched_dataset, expected_output) + + @combinations.generate(test_base.default_test_combinations()) + def testNoneDataset(self): + # Some datasets, e.g. datasets with None tensors, have components without + # output shapes. Test that this doesn't break rebatching shape inference + # logic. + dataset = dataset_ops.Dataset.range(4) + dataset = dataset.map(lambda x: (x, None)) + dataset = dataset.batch(4, drop_remainder=True) + _ = distribute._RebatchDataset(dataset, batch_sizes=[2, 2]) + + +class LegacyRebatchDatasetTest(test_base.DatasetTestBase, + parameterized.TestCase): + @combinations.generate( combinations.times(test_base.default_test_combinations(), combinations.combine(drop_remainder=[True, False]))) def testBasic(self, drop_remainder): - dataset = dataset_ops.Dataset.range(1024).batch( - 32, drop_remainder=drop_remainder) - rebatched_dataset = distribute._RebatchDataset(dataset, num_replicas=4) - self.assertEqual([[8] if drop_remainder else [None]], - [ts.as_list() for ts in _flat_shapes(rebatched_dataset)]) + dataset = dataset_ops.Dataset.range(8).batch( + 4, drop_remainder=drop_remainder) + rebatched_dataset = distribute._LegacyRebatchDataset( + dataset, num_replicas=2) - expected_output = [[k for k in range(i, i + 8)] for i in range(0, 1024, 8)] # pylint: disable=g-complex-comprehension + expected_shapes = [[2]] if drop_remainder else [[None]] + self.assertEqual(expected_shapes, _flat_shapes(rebatched_dataset)) + + expected_output = [[0, 1], [2, 3], [4, 5], [6, 7]] self.assertDatasetProduces(rebatched_dataset, expected_output) @combinations.generate(test_base.default_test_combinations()) @@ -57,71 +268,54 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): # decode_image results in a tensor of completely unknown shape (i.e. unknown # rank) dataset = dataset.map(image_ops.decode_image) - self.assertEqual([tensor_shape.TensorShape(None)], _flat_shapes(dataset)) - rebatched_dataset = distribute._RebatchDataset(dataset, num_replicas=4) - # Note that we are just testing the dataset shapes, not the actual output. self.assertEqual([tensor_shape.TensorShape(None)], - _flat_shapes(rebatched_dataset)) + nest.flatten( + dataset_ops.get_legacy_output_shapes(dataset))) + rebatched_dataset = distribute._LegacyRebatchDataset( + dataset, num_replicas=4) + # Note that we are just testing the dataset shapes, not the actual output. + self.assertEqual( + [tensor_shape.TensorShape(None)], + nest.flatten(dataset_ops.get_legacy_output_shapes(rebatched_dataset))) @combinations.generate(test_base.default_test_combinations()) def testCanHandleUnknownDims(self): dataset = dataset_ops.Dataset.range(1000) dataset = dataset.batch(10, drop_remainder=False) dataset = dataset.batch(10, drop_remainder=False) - self.assertEqual([[None, None]], - [ts.as_list() for ts in _flat_shapes(dataset)]) - rebatched_dataset = distribute._RebatchDataset(dataset, num_replicas=4) + self.assertEqual([[None, None]], _flat_shapes(dataset)) + rebatched_dataset = distribute._LegacyRebatchDataset( + dataset, num_replicas=4) # Note that we are just testing the dataset shapes, not the actual output. - self.assertEqual([[None, None]], - [ts.as_list() for ts in _flat_shapes(rebatched_dataset)]) + self.assertEqual([[None, None]], _flat_shapes(rebatched_dataset)) @combinations.generate(test_base.default_test_combinations()) def testScalarInputError(self): dataset = dataset_ops.Dataset.range(1024) - distribute._RebatchDataset(dataset.batch(4), num_replicas=4) + distribute._LegacyRebatchDataset(dataset.batch(4), num_replicas=4) with self.assertRaisesRegex(ValueError, ("You can fix the issue " "by adding the `batch`")): - distribute._RebatchDataset(dataset, num_replicas=4) + distribute._LegacyRebatchDataset(dataset, num_replicas=4) @combinations.generate( combinations.times(test_base.default_test_combinations(), combinations.combine(drop_remainder=[True, False]))) def testBatchNotDivisibleByNumReplicas(self, drop_remainder): - dataset = dataset_ops.Dataset.range(1024).batch( - 32, drop_remainder=drop_remainder) - rebatched_dataset = distribute._RebatchDataset(dataset, num_replicas=5) - self.assertEqual([[None]], - [ts.as_list() for ts in _flat_shapes(rebatched_dataset)]) - expected_output = [] - i = 0 - for _ in range(32): # number of steps - # first four minibatches have seven elements - for _ in range(4): - expected_output.append([k for k in range(i, i + 7)]) - i += 7 - # last minibatch has four elements - expected_output.append([k for k in range(i, i + 4)]) - i += 4 - self.assertDatasetProduces(rebatched_dataset, expected_output) - - @combinations.generate(test_base.default_test_combinations()) - def testBatchSizeNotDivisibleByNumReplicas2(self): - dataset = dataset_ops.Dataset.range(32).batch(16, drop_remainder=True) - rebatched_dataset = distribute._RebatchDataset(dataset, num_replicas=5) - # This will rebatch into sub-batches of size 4, since - # ceil(16 / 5) = 4. However, that means only the first 4 replicas will get - # data. - expected_output = [[k for k in range(i, i + 4)] for i in range(0, 16, 4)] - expected_output.extend([[]]) # Last replica gets an empty batch - expected_output.extend( - [[k for k in range(i, i + 4)] for i in range(16, 32, 4)]) - expected_output.extend([[]]) # Last replica gets an empty batch + dataset = dataset_ops.Dataset.range(8).batch( + 4, drop_remainder=drop_remainder) + rebatched_dataset = distribute._LegacyRebatchDataset( + dataset, num_replicas=3) + self.assertEqual([[None]], _flat_shapes(rebatched_dataset)) + # This rebatches into sub-batches of size 2, since ceil(4 / 3) = 2. However, + # this means that only the first 2 replicas will get data. + expected_output = [[0, 1], [2, 3], [], [4, 5], [6, 7], []] self.assertDatasetProduces(rebatched_dataset, expected_output) @combinations.generate(test_base.default_test_combinations()) def testTupleOutput(self): dataset = dataset_ops.Dataset.range(1024).map(lambda x: (x, x)).batch(32) - rebatched_dataset = distribute._RebatchDataset(dataset, num_replicas=4) + rebatched_dataset = distribute._LegacyRebatchDataset( + dataset, num_replicas=4) expected_output = [([k for k in range(i, i + 8)], # pylint: disable=g-complex-comprehension [k for k in range(i, i + 8)]) for i in range(0, 1024, 8)] @@ -129,68 +323,63 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.default_test_combinations()) def testNestedDictionaryOutput(self): - dataset = dataset_ops.Dataset.range(1024).map( - lambda x: {"a": x, "b": {"c": x}}).batch(32) - rebatched_dataset = distribute._RebatchDataset(dataset, num_replicas=4) - expected_output = [{"a": [k for k in range(i, i + 8)], # pylint: disable=g-complex-comprehension - "b": {"c": [k for k in range(i, i + 8)]}} - for i in range(0, 1024, 8)] + dataset = dataset_ops.Dataset.range(8).map( + lambda x: {"a": x, "b": {"c": x + 1}}).batch(4) + rebatched_dataset = distribute._LegacyRebatchDataset( + dataset, num_replicas=2) + expected_output = [{"a": [0, 1], "b": {"c": [1, 2]}}, + {"a": [2, 3], "b": {"c": [3, 4]}}, + {"a": [4, 5], "b": {"c": [5, 6]}}, + {"a": [6, 7], "b": {"c": [7, 8]}}] self.assertDatasetProduces(rebatched_dataset, expected_output) @combinations.generate( combinations.times(test_base.default_test_combinations(), combinations.combine(drop_remainder=[True, False]))) def testFinalPartialBatch(self, drop_remainder): - dataset = dataset_ops.Dataset.range(1032).batch( - 32, drop_remainder=drop_remainder) - rebatched_dataset = distribute._RebatchDataset(dataset, num_replicas=4) - self.assertEqual([[8] if drop_remainder else [None]], - [ts.as_list() for ts in _flat_shapes(rebatched_dataset)]) - - # if drop_remainder, the final partial batch is dropped, even though it - # makes up a complete minibatch. - expected_output = [[k for k in range(i, i + 8)] for i in range(0, 1024, 8)] # pylint: disable=g-complex-comprehension - if not drop_remainder: - # The last partial batch of size 8 is split over 4 replicas - expected_output.extend( - [[k for k in range(i, i + 2)] for i in range(1024, 1032, 2)]) + dataset = dataset_ops.Dataset.range(10).batch( + 4, drop_remainder=drop_remainder) + rebatched_dataset = distribute._LegacyRebatchDataset( + dataset, num_replicas=2) + self.assertEqual([[2] if drop_remainder else [None]], + _flat_shapes(rebatched_dataset)) + if drop_remainder: + expected_output = [[0, 1], [2, 3], [4, 5], [6, 7]] + else: + expected_output = [[0, 1], [2, 3], [4, 5], [6, 7], [8], [9]] self.assertDatasetProduces(rebatched_dataset, expected_output) @combinations.generate( combinations.times(test_base.default_test_combinations(), combinations.combine(drop_remainder=[True, False]))) def testFinalPartialBatchAfterRebatch(self, drop_remainder): - dataset = dataset_ops.Dataset.range(34).batch( - 32, drop_remainder=drop_remainder) - rebatched_dataset = distribute._RebatchDataset(dataset, num_replicas=4) - self.assertEqual([[8] if drop_remainder else [None]], - [ts.as_list() for ts in _flat_shapes(rebatched_dataset)]) - - expected_output = [[k for k in range(i, i + 8)] for i in range(0, 32, 8)] # pylint: disable=g-complex-comprehension - if not drop_remainder: - # The last partial batch of size 2 is split over 4 replicas - expected_output += [[32], [33], [], []] + dataset = dataset_ops.Dataset.range(9).batch( + 4, drop_remainder=drop_remainder) + rebatched_dataset = distribute._LegacyRebatchDataset( + dataset, num_replicas=2) + self.assertEqual([[2] if drop_remainder else [None]], + _flat_shapes(rebatched_dataset)) + if drop_remainder: + expected_output = [[0, 1], [2, 3], [4, 5], [6, 7]] + else: + expected_output = [[0, 1], [2, 3], [4, 5], [6, 7], [8], []] self.assertDatasetProduces(rebatched_dataset, expected_output) @combinations.generate(test_base.default_test_combinations()) def testMultipleBatches(self): - dataset = dataset_ops.Dataset.range(128).batch(4).batch(8) - self.assertEqual([[None, None]], - [ts.as_list() for ts in _flat_shapes(dataset)]) + dataset = dataset_ops.Dataset.range(16).batch(2).batch(4) + self.assertEqual([[None, None]], _flat_shapes(dataset)) - # Each element is a list of 8 elements where each element is a list of 4. - expected_output = [[[j, j + 1, j + 2, j + 3] # pylint: disable=g-complex-comprehension - for j in range(i, i + 32, 4)] # generates 8 elements - for i in range(0, 128, 32)] + # Each element is a list of 4 elements where each element is a list of 2. + expected_output = [[[0, 1], [2, 3], [4, 5], [6, 7]], + [[8, 9], [10, 11], [12, 13], [14, 15]]] self.assertDatasetProduces(dataset, expected_output) - rebatched_dataset = distribute._RebatchDataset(dataset, 4) - self.assertEqual([[None, None]], - [ts.as_list() for ts in _flat_shapes(rebatched_dataset)]) - # Each element is a list of 2 elements where each element is a list of 4. - expected_output = [[[j, j + 1, j + 2, j + 3] # pylint: disable=g-complex-comprehension - for j in range(i, i + 8, 4)] # generates 2 elements - for i in range(0, 128, 8)] + rebatched_dataset = distribute._LegacyRebatchDataset(dataset, 2) + self.assertEqual([[None, None]], _flat_shapes(rebatched_dataset)) + # Each element is a list of 2 elements where each element is a list of 2. + expected_output = [[[0, 1], [2, 3]], [[4, 5], [6, 7]], [[8, 9], [10, 11]], + [[12, 13], [14, 15]]] self.assertDatasetProduces(rebatched_dataset, expected_output) @combinations.generate(test_base.default_test_combinations()) @@ -206,7 +395,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): # This test will fail if we don't normalize the tensor representation. dataset = dataset.map(lambda x: x) - dataset = distribute._RebatchDataset(dataset, num_replicas=8) + dataset = distribute._LegacyRebatchDataset(dataset, num_replicas=8) # After rebatching, batch size is now 4. expected_output = [] value_index = 0 @@ -220,14 +409,14 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): self.assertDatasetProduces(dataset, expected_output) @combinations.generate(test_base.default_test_combinations()) - def testNoOutputShapes(self): + def testNoneDataset(self): # Some datasets, e.g. datasets with None tensors, have components without # output shapes. Test that this doesn't break rebatching shape inference # logic. dataset = dataset_ops.Dataset.range(4) dataset = dataset.map(lambda x: (x, None)) dataset = dataset.batch(4, drop_remainder=True) - _ = distribute._RebatchDataset(dataset, num_replicas=2) + _ = distribute._LegacyRebatchDataset(dataset, num_replicas=2) class ComputeBatchSizeTest(test_base.DatasetTestBase, parameterized.TestCase): diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/rebatch_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/rebatch_dataset_serialization_test.py index 8fa7fcbd10f..e99e1f99a9c 100644 --- a/tensorflow/python/data/experimental/kernel_tests/serialization/rebatch_dataset_serialization_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/serialization/rebatch_dataset_serialization_test.py @@ -27,7 +27,7 @@ from tensorflow.python.framework import combinations from tensorflow.python.platform import test -class RebatchDatasetSerializationTest( +class LegacyRebatchDatasetSerializationTest( dataset_serialization_test_base.DatasetSerializationTestBase, parameterized.TestCase): @@ -35,12 +35,12 @@ class RebatchDatasetSerializationTest( def testCore(self): def build_dataset(num_elements, batch_size): - return distribute._RebatchDataset( + return distribute._LegacyRebatchDataset( dataset_ops.Dataset.range(num_elements).batch( 4 * batch_size, drop_remainder=True), num_replicas=4) - self.run_core_tests(lambda: build_dataset(200, 10), 20) + self.run_core_tests(lambda: build_dataset(64, 8), 8) if __name__ == "__main__": diff --git a/tensorflow/python/data/experimental/ops/distribute.py b/tensorflow/python/data/experimental/ops/distribute.py index 9f274201e78..e9e9c052d63 100644 --- a/tensorflow/python/data/experimental/ops/distribute.py +++ b/tensorflow/python/data/experimental/ops/distribute.py @@ -17,6 +17,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np + from tensorflow.python.data.experimental.ops.distribute_options import ExternalStatePolicy from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest @@ -24,6 +26,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import tensor_util from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops @@ -70,13 +73,165 @@ def _AutoShardDatasetV1(input_dataset, num_workers, index): # pylint: disable=i class _RebatchDataset(dataset_ops.UnaryDataset): - """A `Dataset` that divides the batch size by `num_replicas`. + """A `Dataset` that rebatches elements from its input into new batch sizes. - For each batch in the input dataset, the resulting dataset will produce - `num_replicas` minibatches whose sizes add up to the original batch size. + `_RebatchDataset(input_dataset, batch_sizes)` is functionally equivalent to + `input_dataset.unbatch().batch(N)`, where the value of N cycles through the + `batch_sizes` input list. The elements produced by this dataset have the same + rank as the elements of the input dataset. + + For example: + + ```python + ds = tf.data.Dataset.range(8) + ds = ds.batch(4) + ds = _RebatchDataset(ds, batch_sizes=[2, 1, 1]) + for elem in ds: + print(elem) + >> [0, 1], [2], [3], [4, 5], [6], [7] + + ds = tf.data.Dataset.range(16) + ds = ds.batch(4) + ds = _RebatchDataset(ds, batch_sizes=[6]) + for elem in ds: + print(elem) + >> [0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11], [12, 13, 14, 15] + ``` """ - def __init__(self, input_dataset, num_replicas, use_fallback=True): + def __init__(self, input_dataset, batch_sizes, drop_remainder=False): + """Creates a _RebatchDataset. + + Args: + input_dataset: `Dataset` to rebatch. + batch_sizes: A `tf.int64` scalar or vector, representing the size of + batches to produce. If this argument is a vector, these values are + cycled through in order. + drop_remainder: (Optional.) A `tf.bool` scalar `tf.Tensor`, representing + whether the last batch should be dropped in the case it has fewer than + `batch_sizes[cycle_index] elements; the default behavior is not to drop + the smaller batch. + """ + self._input_dataset = input_dataset + self._batch_sizes = ops.convert_to_tensor( + batch_sizes, dtype=dtypes.int64, name="batch_sizes") + self._drop_remainder = ops.convert_to_tensor( + drop_remainder, dtype=dtypes.bool, name="drop_remainder") + new_batch_dim = self._compute_static_batch_dim() + + # pylint: disable=protected-access + self._element_spec = nest.map_structure( + lambda ts: ts._unbatch()._batch(new_batch_dim), + dataset_ops.get_structure(input_dataset)) + # pylint: enable=protected-access + + input_dataset = dataset_ops.normalize_to_dense(input_dataset) + variant_tensor = ged_ops.rebatch_dataset_v2( + input_dataset._variant_tensor, # pylint: disable=protected-access + batch_sizes=batch_sizes, + drop_remainder=drop_remainder, + **self._flat_structure) + super(_RebatchDataset, self).__init__(input_dataset, variant_tensor) + + def _compute_static_batch_dim(self): + """Computes the static batch dimension of a dataset if it can be determined. + + Given the _RebatchDataset parameters, determines the batch dimension of this + dataset statically. Returns None if this cannot be determined or is + variable. + + Returns: + An integer representing the batch dimension of the dataset. If it cannot + be determined statically, returns None. + + Raises: + ValueError: The batch_sizes parameter is malformed, input_dataset is + not batched, or input_dataset batch sizes are incompatible with each + other. + """ + new_batch_dim = tensor_util.constant_value(self._batch_sizes) + if new_batch_dim is None: + return None + + if isinstance(new_batch_dim, np.ndarray): + if len(new_batch_dim.shape) == 1: + if np.all(new_batch_dim == new_batch_dim[0]): + new_batch_dim = new_batch_dim[0] + else: + return None + elif len(new_batch_dim.shape) > 1: + raise ValueError("Expected batch_sizes to be a scalar or vector.") + + if self._may_form_partial_batches(new_batch_dim): + return None + + return new_batch_dim + + def _may_form_partial_batches(self, desired_batch_size): + """Returns whether this dataset may form partial batches.""" + if tensor_util.constant_value(self._drop_remainder): + return False + + def get_batch_dim(type_spec): + shape = type_spec._to_legacy_output_shapes() # pylint: disable=protected-access + if not isinstance(shape, tensor_shape.TensorShape): + return None + if shape.rank is None: + return None + if len(shape) < 1: + raise ValueError("Expected a dataset whose elements have rank >= 1 " + "but found a dataset whose elements are scalars. " + "You can fix the issue by adding the `batch` " + "transformation to the dataset.") + return shape.dims[0].value + + input_batch_dims = [ + get_batch_dim(ts) + for ts in nest.flatten(dataset_ops.get_structure(self._input_dataset)) + ] + known_input_batch_dims = [d for d in input_batch_dims if d is not None] + + if not known_input_batch_dims: + return True + + known_input_batch_dims = np.asarray(known_input_batch_dims) + if not np.all(known_input_batch_dims == known_input_batch_dims[0]): + raise ValueError( + "Batch dimensions of input dataset are not compatible.") + + return known_input_batch_dims[0] % desired_batch_size != 0 + + @property + def element_spec(self): + return self._element_spec + + +class _LegacyRebatchDataset(dataset_ops.UnaryDataset): + """A `Dataset` that divides its input batches into `num_replicas` sub-batches. + + For each batch in the input dataset, _LegacyRebatchDataset will produce + `num_replicas` smaller batches whose sizes add up to the original batch size. + + For example: + + ```python + ds = tf.data.Dataset.range(8) + ds = ds.batch(4) + ds = _LegacyRebatchDataset(ds, num_replicas=3) + for elem in ds: + print(elem) + >> [0, 1], [2, 3], [], [4, 5], [6, 7], [] + ``` + """ + + def __init__(self, input_dataset, num_replicas): + """Creates a _LegacyRebatchDataset. + + Args: + input_dataset: `Dataset` to rebatch. + num_replicas: A `tf.int64` scalar, representing the number of sub-batches + to split each batch from `input_dataset` into. + """ def recalculate_batch_size(type_spec): """Recalculates the output_shape after dividing it by num_replicas.""" @@ -115,7 +270,7 @@ class _RebatchDataset(dataset_ops.UnaryDataset): input_dataset._variant_tensor, # pylint: disable=protected-access num_replicas=num_replicas, **self._flat_structure) - super(_RebatchDataset, self).__init__(input_dataset, variant_tensor) + super(_LegacyRebatchDataset, self).__init__(input_dataset, variant_tensor) @property def element_spec(self): diff --git a/tensorflow/python/distribute/input_lib.py b/tensorflow/python/distribute/input_lib.py index 23792f69efa..6484cf14e42 100644 --- a/tensorflow/python/distribute/input_lib.py +++ b/tensorflow/python/distribute/input_lib.py @@ -968,7 +968,7 @@ class DistributedDataset(_IterableInput): try: # pylint: disable=protected-access with ops.colocate_with(dataset._variant_tensor): - dataset = distribute._RebatchDataset(dataset, split_batch_by) + dataset = distribute._LegacyRebatchDataset(dataset, split_batch_by) # Add a prefetch to pipeline rebatching for performance. # TODO(rachelim): Instead of inserting an extra prefetch stage here, # leverage static graph rewrites to insert _RebatchDataset before diff --git a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt index c597bc2f8f1..4ad0c0d4448 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt @@ -3312,6 +3312,10 @@ tf_module { name: "RebatchDataset" argspec: "args=[\'input_dataset\', \'num_replicas\', \'output_types\', \'output_shapes\', \'use_fallback\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " } + member_method { + name: "RebatchDatasetV2" + argspec: "args=[\'input_dataset\', \'batch_sizes\', \'drop_remainder\', \'output_types\', \'output_shapes\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "Reciprocal" argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt index c597bc2f8f1..4ad0c0d4448 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt @@ -3312,6 +3312,10 @@ tf_module { name: "RebatchDataset" argspec: "args=[\'input_dataset\', \'num_replicas\', \'output_types\', \'output_shapes\', \'use_fallback\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " } + member_method { + name: "RebatchDatasetV2" + argspec: "args=[\'input_dataset\', \'batch_sizes\', \'drop_remainder\', \'output_types\', \'output_shapes\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "Reciprocal" argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " From 05767520b1314b3574195fe88faa4baaa06b6987 Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Wed, 22 Jul 2020 16:47:57 -0700 Subject: [PATCH 1100/2522] Add APIs for using different processes to register and read from tf.data service. PiperOrigin-RevId: 322683532 Change-Id: I27be53d304c4611bf699eca4a7891d8831d7ce9a --- RELEASE.md | 6 +- .../data/experimental/ops/data_service_ops.py | 355 +++++++++++++----- .../data/experimental/service/__init__.py | 2 + .../kernel_tests/data_service_ops_test.py | 167 +++++--- ...tensorflow.data.experimental.service.pbtxt | 8 + ...tensorflow.data.experimental.service.pbtxt | 8 + 6 files changed, 410 insertions(+), 136 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 243807b705b..509b6aa6c84 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -40,8 +40,12 @@ truncating inputs such as from int64 to int32. * Added `tf.sparse.map_values` to apply a function to the `.value`s of `SparseTensror` arguments. * `tf.data`: + * Added new `tf.data.experimental.service.register_dataset` and + `tf.data.experimental.service.from_dataset_id` APIs to enable one process + to register a dataset with the tf.data service, and another process to + consume data from the dataset. * Added optional `exclude_cols` parameter to CsvDataset. This parameter is - the complement of `select_cols`; at most one of these should be specified. + the complement of `select_cols`; at most one of these should be specified. * We have implemented an optimization which reorders data-discarding transformations such as `take` and `shard` to happen earlier in the dataset when it is safe to do so. The optimization can be disabled via diff --git a/tensorflow/python/data/experimental/ops/data_service_ops.py b/tensorflow/python/data/experimental/ops/data_service_ops.py index c564212949f..b5dd6bba5d8 100644 --- a/tensorflow/python/data/experimental/ops/data_service_ops.py +++ b/tensorflow/python/data/experimental/ops/data_service_ops.py @@ -28,6 +28,7 @@ from tensorflow.python.data.experimental.ops.distribute_options import ExternalS from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_spec from tensorflow.python.ops import gen_experimental_dataset_ops from tensorflow.python.util.tf_export import tf_export @@ -49,7 +50,6 @@ class _DataServiceDatasetV2(dataset_ops.DatasetSource): """A `Dataset` that reads elements from the tf.data service.""" def __init__(self, - input_dataset, dataset_id, processing_mode, address, @@ -60,8 +60,6 @@ class _DataServiceDatasetV2(dataset_ops.DatasetSource): """Constructs a _DataServiceDatasetV2. Args: - input_dataset: The input dataset, which should be registered with the - tf.data service under `dataset_id`. dataset_id: The dataset id for the dataset to read from. processing_mode: A string specifying the policy for how data should be processed by tf.data workers. Currently, the only supported value is @@ -69,9 +67,9 @@ class _DataServiceDatasetV2(dataset_ops.DatasetSource): address: The tf.data service address, e.g. "localhost:5000". protocol: The protocol to use for communicating with the tf.data service, e.g. "grpc". - job_name: (Optional.) The name of the job. This argument makes it - possible for multiple datasets to share the same job. The default - behavior is that the dataset creates anonymous, exclusively owned jobs. + job_name: (Optional.) The name of the job. This argument makes it possible + for multiple datasets to share the same job. The default behavior is + that the dataset creates anonymous, exclusively owned jobs. max_outstanding_requests: (Optional.) A limit on how many elements may be requested at the same time. You can use this option to control the amount of memory used, since `distribute` won't use more than @@ -87,7 +85,6 @@ class _DataServiceDatasetV2(dataset_ops.DatasetSource): if task_refresh_interval_hint_ms is None: task_refresh_interval_hint_ms = dataset_ops.AUTOTUNE - self._input_dataset = input_dataset self._dataset_id = ops.convert_to_tensor( dataset_id, dtype=dtypes.int64, name="dataset_id") self._processing_mode = ops.convert_to_tensor( @@ -102,7 +99,9 @@ class _DataServiceDatasetV2(dataset_ops.DatasetSource): max_outstanding_requests, dtype=dtypes.int64, name="max_outstanding_requests") - self._element_spec = input_dataset.element_spec + # Datasets executed by the tf.data service produce compressed elements + # represented by scalar DT_VARIANTs. + self._element_spec = tensor_spec.TensorSpec(shape=(), dtype=dtypes.variant) variant_tensor = gen_experimental_dataset_ops.data_service_dataset( dataset_id=self._dataset_id, @@ -126,12 +125,10 @@ class _DataServiceDatasetV1(dataset_ops.DatasetV1Adapter): """A `Dataset` that executes its input through the tf.data service.""" @functools.wraps(_DataServiceDatasetV2.__init__) - def __init__(self, input_dataset, dataset_id, processing_mode, address, - protocol, job_name, max_outstanding_requests, - task_refresh_interval_hint_ms): + def __init__(self, dataset_id, processing_mode, address, protocol, job_name, + max_outstanding_requests, task_refresh_interval_hint_ms): self._wrapped = _DataServiceDatasetV2( - input_dataset=input_dataset, dataset_id=dataset_id, processing_mode=processing_mode, address=address, @@ -148,6 +145,106 @@ else: _DataServiceDataset = _DataServiceDatasetV1 +def _parse_service(service): + """Parses a tf.data service string into a (protocol, address) tuple. + + Args: + service: A string in the format "protocol://address". + + Returns: + The parsed (protocol, address) tuple + """ + if not isinstance(service, six.string_types): + raise ValueError( + "service must be a string, but service was of type {0}. service={1}" + .format(type(service), service)) + if not service: + raise ValueError("service must not be empty") + parts = service.split("://") + if len(parts) == 1: + raise ValueError("service string %s does not begin with a protocol. " + "The service should be in the format " + "://

, e.g. grpc://localhost:5000" % + service) + if len(parts) > 2: + raise ValueError("malformed service string has multiple '://': %s" % + service) + return parts + + +def _from_dataset_id(processing_mode, + service, + dataset_id, + element_spec, + job_name=None, + max_outstanding_requests=None, + task_refresh_interval_hint_ms=None): + """Creates a dataset which reads data from the tf.data service. + + This transformation is similar to `from_dataset_id`, but supports additional + parameters which we do not yet want to add to the public Python API. + + Args: + processing_mode: A string specifying the policy for how data should be + processed by tf.data workers. Currently, the only supported value is + "parallel_epochs". + service: A string indicating how to connect to the tf.data service. The + string should be in the format "://
", e.g. + "grpc://localhost:5000". + dataset_id: The id of the dataset to read from. This id is returned by + `register_dataset` when the dataset is registered with the tf.data + service. + element_spec: A nested structure of `tf.TypeSpec`s representing the type of + elements produced by the dataset. Use `tf.data.Dataset.element_spec` to + see the element spec for a given dataset. + job_name: (Optional.) The name of the job. This argument makes it possible + for multiple datasets to share the same job. The default behavior is that + the dataset creates anonymous, exclusively owned jobs. + max_outstanding_requests: (Optional.) A limit on how many elements may be + requested at the same time. You can use this option to control the amount + of memory used, since `distribute` won't use more than `element_size` * + `max_outstanding_requests` of memory. + task_refresh_interval_hint_ms: (Optional.) A hint for how often to query the + dispatcher for task changes. + + Returns: + A `tf.data.Dataset` which reads from the tf.data service. + """ + ProcessingMode.validate(processing_mode) + if job_name is not None: + if not isinstance(job_name, six.string_types): + raise ValueError("job_name must be a string, but job_name was of type " + "{0}. job_name={1}".format(type(job_name), job_name)) + if not job_name: + raise ValueError("job_name must not be empty") + if element_spec is None: + raise ValueError("element_spec must not be None") + protocol, address = _parse_service(service) + + dataset = _DataServiceDataset( + dataset_id=dataset_id, + processing_mode=processing_mode, + address=address, + protocol=protocol, + job_name=job_name, + max_outstanding_requests=max_outstanding_requests, + task_refresh_interval_hint_ms=task_refresh_interval_hint_ms) + # TODO(b/157105111): Make this an autotuned parallel map when we have a way + # to limit memory usage. + # The value 16 is chosen based on experience with pipelines that require + # more than 8 parallel calls to prevent this stage from being a bottleneck. + dataset = dataset.map( + lambda x: compression_ops.uncompress(x, output_spec=element_spec), + num_parallel_calls=16) + + # Disable autosharding for shared jobs. + if job_name: + options = dataset_ops.Options() + options.experimental_distribute.auto_shard_policy = AutoShardPolicy.OFF + dataset = dataset.with_options(options) + return dataset + + def _distribute(processing_mode, service, job_name=None, @@ -163,11 +260,11 @@ def _distribute(processing_mode, processed by tf.data workers. Currently, the only supported value is "parallel_epochs". service: A string indicating how to connect to the tf.data service. The - string should be in the format ://
, e.g. - grpc://localhost:5000. - job_name: (Optional.) The name of the job. This argument makes it - possible for multiple datasets to share the same job. The default behavior - is that the dataset creates anonymous, exclusively owned jobs. + string should be in the format "://
", e.g. + "grpc://localhost:5000". + job_name: (Optional.) The name of the job. This argument makes it possible + for multiple datasets to share the same job. The default behavior is that + the dataset creates anonymous, exclusively owned jobs. max_outstanding_requests: (Optional.) A limit on how many elements may be requested at the same time. You can use this option to control the amount of memory used, since `distribute` won't use more than `element_size` * @@ -179,79 +276,17 @@ def _distribute(processing_mode, Dataset: A `Dataset` of the elements produced by the data service. """ ProcessingMode.validate(processing_mode) - if job_name is not None: - if not isinstance(job_name, six.string_types): - raise ValueError("job_name must be a string, but job_name was of type " - "{0}. job_name={1}".format(type(job_name), job_name)) - if not job_name: - raise ValueError("job_name must not be empty") - if not isinstance(service, six.string_types): - raise ValueError( - "service must be a string, but service was of type {0}. service={1}" - .format(type(service), service)) - if not service: - raise ValueError("service must not be empty") - parts = service.split("://") - if len(parts) == 1: - raise ValueError("service string %s does not begin with a protocol. " - "The service should be in the format " - "://
, e.g. grpc://localhost:5000" % - service) - if len(parts) > 2: - raise ValueError("malformed service string has multiple '://': %s" % - service) - protocol, address = parts - address = ops.convert_to_tensor(address, dtype=dtypes.string, name="address") - protocol = ops.convert_to_tensor( - protocol, dtype=dtypes.string, name="protocol") def _apply_fn(dataset): # pylint: disable=missing-docstring - external_state_policy = dataset.options().experimental_external_state_policy - if external_state_policy is None: - external_state_policy = ExternalStatePolicy.WARN - - uncompressed_spec = dataset.element_spec - # Compress the dataset elements to reduce the amount of data that needs to - # be sent over the network. - # TODO(b/157105111): Make this an autotuned parallel map when we have a way - # to limit memory usage. - dataset = dataset.map(lambda *x: compression_ops.compress(x)) - # Prefetch one compressed element to reduce latency when requesting data - # from tf.data workers. - # TODO(b/157105111): Set this to autotune when we have a way to limit - # memory usage - dataset = dataset.prefetch(1) - # Apply options so that the dataset executed in the tf.data service will - # be optimized and support autotuning. - dataset = dataset._apply_options() # pylint: disable=protected-access - dataset_id = gen_experimental_dataset_ops.register_dataset( - dataset._variant_tensor, # pylint: disable=protected-access - address=address, - protocol=protocol, - external_state_policy=external_state_policy.value) - dataset = _DataServiceDataset( - input_dataset=dataset, - dataset_id=dataset_id, - processing_mode=processing_mode, - address=address, - protocol=protocol, + dataset_id = register_dataset(service, dataset) + return _from_dataset_id( + processing_mode, + service, + dataset_id, + dataset.element_spec, job_name=job_name, max_outstanding_requests=max_outstanding_requests, task_refresh_interval_hint_ms=task_refresh_interval_hint_ms) - # TODO(b/157105111): Make this an autotuned parallel map when we have a way - # to limit memory usage. - # The value 16 is chosen based on experience with pipelines that require - # more than 8 parallel calls to prevent this stage from being a bottleneck. - dataset = dataset.map( - lambda x: compression_ops.uncompress(x, output_spec=uncompressed_spec), - num_parallel_calls=16) - - # Disable autosharding for shared jobs. - if job_name: - options = dataset_ops.Options() - options.experimental_distribute.auto_shard_policy = AutoShardPolicy.OFF - dataset = dataset.with_options(options) - return dataset return _apply_fn @@ -365,8 +400,8 @@ def distribute(processing_mode, processed by tf.data workers. Currently, the only supported value is "parallel_epochs". service: A string indicating how to connect to the tf.data service. The - string should be in the format protocol://address, e.g. - grpc://localhost:5000. + string should be in the format "protocol://address", e.g. + "grpc://localhost:5000". job_name: (Optional.) The name of the job. This argument makes it possible for multiple datasets to share the same job. The default behavior is that the dataset creates anonymous, exclusively owned jobs. @@ -383,3 +418,149 @@ def distribute(processing_mode, service=service, job_name=job_name, max_outstanding_requests=max_outstanding_requests) + + +@tf_export("data.experimental.service.register_dataset") +def register_dataset(service, dataset): + """Registers a dataset with the tf.data service. + + `register_dataset` registers a dataset with the tf.data service so that + datasets can be created later with + `tf.data.experimental.service.from_dataset_id`. This is useful when the + dataset + is registered by one process, then used in another process. When the same + process is both registering and reading from the dataset, it is simpler to use + `tf.data.experimental.service.distribute` instead. + + If the dataset is already registered with the tf.data service, + `register_dataset` returns the already-registered dataset's id. + + >>> dispatcher = tf.data.experimental.service.DispatchServer(port=0) + >>> dispatcher_address = dispatcher.target.split("://")[1] + >>> worker = tf.data.experimental.service.WorkerServer( + ... port=0, dispatcher_address=dispatcher_address) + >>> dataset = tf.data.Dataset.range(10) + >>> dataset_id = tf.data.experimental.service.register_dataset( + ... dispatcher.target, dataset) + >>> dataset = tf.data.experimental.service.from_dataset_id( + ... processing_mode="parallel_epochs", + ... service=dispatcher.target, + ... dataset_id=dataset_id, + ... element_spec=dataset.element_spec) + >>> print(list(dataset.as_numpy_iterator())) + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + + Args: + service: A string indicating how to connect to the tf.data service. The + string should be in the format "protocol://address", e.g. + "grpc://localhost:5000". + dataset: A `tf.data.Dataset` to register with the tf.data service. + + Returns: + A scalar int64 tensor of the registered dataset's id. + """ + protocol, address = _parse_service(service) + external_state_policy = dataset.options().experimental_external_state_policy + if external_state_policy is None: + external_state_policy = ExternalStatePolicy.WARN + + # Compress the dataset elements to reduce the amount of data that needs to + # be sent over the network. + # TODO(b/157105111): Make this an autotuned parallel map when we have a way + # to limit memory usage. + dataset = dataset.map(lambda *x: compression_ops.compress(x)) + # Prefetch one compressed element to reduce latency when requesting data + # from tf.data workers. + # TODO(b/157105111): Set this to autotune when we have a way to limit + # memory usage + dataset = dataset.prefetch(1) + # Apply options so that the dataset executed in the tf.data service will + # be optimized and support autotuning. + dataset = dataset._apply_options() # pylint: disable=protected-access + + dataset_id = gen_experimental_dataset_ops.register_dataset( + dataset._variant_tensor, # pylint: disable=protected-access + address=address, + protocol=protocol, + external_state_policy=external_state_policy.value) + + return dataset_id + + +@tf_export("data.experimental.service.from_dataset_id") +def from_dataset_id(processing_mode, + service, + dataset_id, + element_spec=None, + job_name=None, + max_outstanding_requests=None): + """Creates a dataset which reads data from the tf.data service. + + This is useful when the dataset is registered by one process, then used in + another process. When the same process is both registering and reading from + the dataset, it is simpler to use `tf.data.experimental.service.distribute` + instead. + + Before using `from_dataset_id`, the dataset must have been registered with the + tf.data service using `tf.data.experimental.service.register_dataset`. + `register_dataset` returns a dataset id for the registered dataset. That is + the `dataset_id` which should be passed to `from_dataset_id`. + + The `element_spec` argument indicates the `tf.TypeSpec`s for the elements + produced by the dataset. Currently `element_spec` must be explicitly + specified, and match the dataset registered under `dataset_id`. `element_spec` + defaults to `None` so that in the future we can support automatically + discovering the `element_spec` by querying the tf.data service. + + `tf.data.experimental.service.distribute` is a convenience method which + combines `register_dataset` and `from_dataset_id` into a dataset + transformation. + See the documentation for `tf.data.experimental.service.distribute` for more + detail about how `from_dataset_id` works. + + >>> dispatcher = tf.data.experimental.service.DispatchServer(port=0) + >>> dispatcher_address = dispatcher.target.split("://")[1] + >>> worker = tf.data.experimental.service.WorkerServer( + ... port=0, dispatcher_address=dispatcher_address) + >>> dataset = tf.data.Dataset.range(10) + >>> dataset_id = tf.data.experimental.service.register_dataset( + ... dispatcher.target, dataset) + >>> dataset = tf.data.experimental.service.from_dataset_id( + ... processing_mode="parallel_epochs", + ... service=dispatcher.target, + ... dataset_id=dataset_id, + ... element_spec=dataset.element_spec) + >>> print(list(dataset.as_numpy_iterator())) + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + + Args: + processing_mode: A string specifying the policy for how data should be + processed by tf.data workers. Currently, the only supported value is + "parallel_epochs". + service: A string indicating how to connect to the tf.data service. The + string should be in the format "protocol://address", e.g. + "grpc://localhost:5000". + dataset_id: The id of the dataset to read from. This id is returned by + `register_dataset` when the dataset is registered with the tf.data + service. + element_spec: A nested structure of `tf.TypeSpec`s representing the type of + elements produced by the dataset. Use `tf.data.Dataset.element_spec` to + see the element spec for a given dataset. + job_name: (Optional.) The name of the job. This argument makes it possible + for multiple datasets to share the same job. The default behavior is that + the dataset creates anonymous, exclusively owned jobs. + max_outstanding_requests: (Optional.) A limit on how many elements may be + requested at the same time. You can use this option to control the amount + of memory used, since `distribute` won't use more than `element_size` * + `max_outstanding_requests` of memory. + + Returns: + A `tf.data.Dataset` which reads from the tf.data service. + """ + return _from_dataset_id( + processing_mode=processing_mode, + service=service, + dataset_id=dataset_id, + element_spec=element_spec, + job_name=job_name, + max_outstanding_requests=max_outstanding_requests) diff --git a/tensorflow/python/data/experimental/service/__init__.py b/tensorflow/python/data/experimental/service/__init__.py index e249fb02c19..74ced1a8eb6 100644 --- a/tensorflow/python/data/experimental/service/__init__.py +++ b/tensorflow/python/data/experimental/service/__init__.py @@ -121,5 +121,7 @@ from __future__ import division from __future__ import print_function from tensorflow.python.data.experimental.ops.data_service_ops import distribute +from tensorflow.python.data.experimental.ops.data_service_ops import from_dataset_id +from tensorflow.python.data.experimental.ops.data_service_ops import register_dataset from tensorflow.python.data.experimental.service.server_lib import DispatchServer from tensorflow.python.data.experimental.service.server_lib import WorkerServer diff --git a/tensorflow/python/data/kernel_tests/data_service_ops_test.py b/tensorflow/python/data/kernel_tests/data_service_ops_test.py index ae847e3924c..c0df85427eb 100644 --- a/tensorflow/python/data/kernel_tests/data_service_ops_test.py +++ b/tensorflow/python/data/kernel_tests/data_service_ops_test.py @@ -35,6 +35,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import random_seed from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_spec from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import sparse_ops @@ -44,12 +45,12 @@ from tensorflow.python.platform import test PROTOCOL = "grpc" -def _make_distributed_dataset(dataset, address, job_name=None): +def _make_distributed_dataset(dataset, service, job_name=None): """Creates a distributed dataset with a short task refresh interval.""" return dataset.apply( data_service_ops._distribute( "parallel_epochs", - "{0}://{1}".format(PROTOCOL, address), + service, job_name=job_name, task_refresh_interval_hint_ms=20)) @@ -63,7 +64,7 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): num_workers: The number of workers in the cluster. Returns: - The address of the dispatcher. + A string for connecting to the tf.data service. """ self._dispatcher = server_lib.DispatchServer(port=0, protocol=PROTOCOL) self._servers = [] @@ -74,36 +75,36 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): dispatcher_address=self._dispatcher._address, protocol=PROTOCOL)) - return self._dispatcher._address + return "{0}://{1}".format(PROTOCOL, self._dispatcher._address) @combinations.generate(test_base.eager_only_combinations()) def testDistributeBasic(self): num_elements = 10 - dispatcher_address = self.create_cluster(1) + service = self.create_cluster(1) ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, dispatcher_address) + ds = _make_distributed_dataset(ds, service) results = [elem.numpy() for elem in ds] self.assertEqual(list(range(num_elements)), results) @combinations.generate(test_base.eager_only_combinations()) def testDistributeSparse(self): - dispatcher_address = self.create_cluster(1) + service = self.create_cluster(1) element = sparse_tensor.SparseTensor( indices=[[0]], values=constant_op.constant([0], dtype=dtypes.int32), dense_shape=[1]) ds = dataset_ops.Dataset.from_tensors(element) - ds = _make_distributed_dataset(ds, dispatcher_address) + ds = _make_distributed_dataset(ds, service) results = [sparse_ops.sparse_tensor_to_dense(elem) for elem in ds] self.assertAllEqual(results, [[0]]) @combinations.generate(test_base.eager_only_combinations()) def testDistributeRagged(self): - dispatcher_address = self.create_cluster(1) + service = self.create_cluster(1) ds = dataset_ops.Dataset.from_tensor_slices([1, 5, 3, 2, 8]) ds = ds.map(math_ops.range) ds = ds.apply(batching.dense_to_ragged_batch(2)) - ds = _make_distributed_dataset(ds, dispatcher_address) + ds = _make_distributed_dataset(ds, service) results = [elem.to_tensor() for elem in ds] self.assertAllEqual(results[0], [[0, 0, 0, 0, 0], [0, 1, 2, 3, 4]]) self.assertAllEqual(results[1], [[0, 1, 2], [0, 1, 0]]) @@ -113,10 +114,10 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): def testDifferentShuffleOrders(self): random_seed.set_random_seed(None) num_elements = 100 - dispatcher_address = self.create_cluster(2) + service = self.create_cluster(2) ds = dataset_ops.Dataset.range(num_elements) ds = ds.shuffle(num_elements) - ds = _make_distributed_dataset(ds, dispatcher_address) + ds = _make_distributed_dataset(ds, service) output = [elem.numpy() for elem in ds] # The output will be two sequences of range(num_elements) @@ -134,9 +135,9 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testMultipleEpochs(self): num_elements = 3 - dispatcher_address = self.create_cluster(1) + service = self.create_cluster(1) ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, dispatcher_address) + ds = _make_distributed_dataset(ds, service) for _ in range(10): self.assertEqual(list(range(num_elements)), [elem.numpy() for elem in ds]) @@ -144,9 +145,9 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): def testRepeatedDataset(self): num_elements = 10 num_repetitions = 5 - dispatcher_address = self.create_cluster(1) + service = self.create_cluster(1) ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, dispatcher_address) + ds = _make_distributed_dataset(ds, service) ds = ds.repeat(num_repetitions) self.assertDatasetProduces( ds, expected_output=num_repetitions * list(range(num_elements))) @@ -155,12 +156,12 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): def testConcurrentEpoch(self): num_elements = 10 num_datasets = 3 - dispatcher_address = self.create_cluster(1) + service = self.create_cluster(1) iterators = [] results = [] for _ in range(num_datasets): ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, dispatcher_address) + ds = _make_distributed_dataset(ds, service) iterators.append(iter(ds)) results.append([]) @@ -176,9 +177,9 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): self.skipTest("Not yet implemented") num_elements = 10 num_iterators = 3 - dispatcher_address = self.create_cluster(1) + service = self.create_cluster(1) ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, dispatcher_address) + ds = _make_distributed_dataset(ds, service) result = [] iterators = [] for _ in range(num_iterators): @@ -200,9 +201,9 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): def testMultiWorker(self): num_workers = 3 num_elements = 10 - dispatcher_address = self.create_cluster(num_workers) + service = self.create_cluster(num_workers) ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, dispatcher_address) + ds = _make_distributed_dataset(ds, service) results = [elem.numpy() for elem in ds] self.assertCountEqual(num_workers * list(range(num_elements)), results) @@ -213,7 +214,8 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): port=0, dispatcher_address=self._dispatcher._address, protocol=PROTOCOL) num_elements = 100 ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, self._dispatcher._address) + ds = _make_distributed_dataset( + ds, "{}://{}".format(PROTOCOL, self._dispatcher._address)) iterator = iter(ds) results = [] # Read halfway through the dataset. @@ -241,7 +243,8 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): port=0, dispatcher_address=self._dispatcher._address, protocol=PROTOCOL) num_elements = 100 ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, self._dispatcher._address) + ds = _make_distributed_dataset( + ds, "{}://{}".format(PROTOCOL, self._dispatcher._address)) iterator = iter(ds) # Read halfway through the dataset. midpoint = num_elements // 2 @@ -276,12 +279,12 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): def testMaxOutstandingRequests(self): num_elements = 10 num_workers = 3 - address = self.create_cluster(num_workers) + service = self.create_cluster(num_workers) ds = dataset_ops.Dataset.range(num_elements) ds = ds.apply( data_service_ops._distribute( "parallel_epochs", - "{0}://{1}".format(PROTOCOL, address), + service, max_outstanding_requests=1, task_refresh_interval_hint_ms=20)) self.assertCountEqual(num_workers * list(range(num_elements)), @@ -291,12 +294,12 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): def testInsideFunction(self): num_workers = 3 num_elements = 10 - dispatcher_address = self.create_cluster(num_workers) + service = self.create_cluster(num_workers) @def_function.function def f(): ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, dispatcher_address) + ds = _make_distributed_dataset(ds, service) result = tensor_array_ops.TensorArray( dtypes.int64, size=num_workers * num_elements, dynamic_size=True) i = 0 @@ -311,10 +314,10 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testSharedJobName(self): num_elements = 100 - dispatcher_address = self.create_cluster(1) + service = self.create_cluster(1) ds = dataset_ops.Dataset.range(num_elements) - ds1 = _make_distributed_dataset(ds, dispatcher_address, job_name="job_name") - ds2 = _make_distributed_dataset(ds, dispatcher_address, job_name="job_name") + ds1 = _make_distributed_dataset(ds, service, job_name="job_name") + ds2 = _make_distributed_dataset(ds, service, job_name="job_name") iter1 = iter(ds1) iter2 = iter(ds2) results = [] @@ -330,22 +333,20 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testDifferentJobNames(self): num_elements = 10 - dispatcher_address = self.create_cluster(1) + service = self.create_cluster(1) ds = dataset_ops.Dataset.range(num_elements) - ds1 = _make_distributed_dataset( - ds, dispatcher_address, job_name="job_name1") - ds2 = _make_distributed_dataset( - ds, dispatcher_address, job_name="job_name2") + ds1 = _make_distributed_dataset(ds, service, job_name="job_name1") + ds2 = _make_distributed_dataset(ds, service, job_name="job_name2") self.assertDatasetProduces(ds1, list(range(num_elements))) self.assertDatasetProduces(ds2, list(range(num_elements))) @combinations.generate(test_base.eager_only_combinations()) def testSharedJobNameMultiIteration(self): num_elements = 10 - dispatcher_address = self.create_cluster(1) + service = self.create_cluster(1) ds = dataset_ops.Dataset.range(num_elements) - ds1 = _make_distributed_dataset(ds, dispatcher_address, job_name="job_name") - ds2 = _make_distributed_dataset(ds, dispatcher_address, job_name="job_name") + ds1 = _make_distributed_dataset(ds, service, job_name="job_name") + ds2 = _make_distributed_dataset(ds, service, job_name="job_name") # iteration 1 self.assertDatasetProduces(ds1, list(range(num_elements))) self.assertDatasetProduces(ds2, []) @@ -357,11 +358,11 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): def testSharedJobNameRepeat(self): num_elements = 100 num_repetitions = 3 - dispatcher_address = self.create_cluster(1) + service = self.create_cluster(1) ds = dataset_ops.Dataset.range(num_elements) - ds1 = _make_distributed_dataset(ds, dispatcher_address, job_name="job_name") + ds1 = _make_distributed_dataset(ds, service, job_name="job_name") ds1 = ds1.repeat(num_repetitions) - ds2 = _make_distributed_dataset(ds, dispatcher_address, job_name="job_name") + ds2 = _make_distributed_dataset(ds, service, job_name="job_name") ds2 = ds2.repeat(num_repetitions) results = [] iter1 = iter(ds1) @@ -379,7 +380,7 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testApplyDeterminismOption(self): elements = list(range(10)) - dispatcher_address = self.create_cluster(1) + service = self.create_cluster(1) def dataset_fn(delay_ms): @@ -396,7 +397,7 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): opts = dataset_ops.Options() opts.experimental_deterministic = False ds = ds.with_options(opts) - ds = _make_distributed_dataset(ds, dispatcher_address) + ds = _make_distributed_dataset(ds, service) return ds self.checkDeterminism( @@ -413,8 +414,8 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): options.experimental_external_state_policy = external_state_policy ds = ds.with_options(options) - dispatcher_address = self.create_cluster(3) - ds = _make_distributed_dataset(ds, dispatcher_address) + service = self.create_cluster(3) + ds = _make_distributed_dataset(ds, service) next(iter(ds)) @combinations.generate( @@ -434,12 +435,12 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testDistributeFromInterleave(self): - dispatcher_address = self.create_cluster(1) + service = self.create_cluster(1) ds = dataset_ops.Dataset.range(2) def interleave_fn(_): ds = dataset_ops.Dataset.range(2) - _make_distributed_dataset(ds, dispatcher_address) + _make_distributed_dataset(ds, service) return ds with self.assertRaisesRegex( @@ -473,6 +474,76 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): data_service_ops.distribute( processing_mode="invalid", service="grpc://localhost:5000")) + @combinations.generate(test_base.eager_only_combinations()) + def testFromDatasetId(self): + num_elements = 10 + service = self.create_cluster(1) + + ds = dataset_ops.Dataset.range(num_elements) + dataset_id = data_service_ops.register_dataset(service, ds) + from_dataset_id_ds = data_service_ops.from_dataset_id( + "parallel_epochs", service, dataset_id, ds.element_spec) + self.assertDatasetProduces(from_dataset_id_ds, list(range(num_elements))) + + @combinations.generate(test_base.eager_only_combinations()) + def testFromDatasetIdMultipleComponents(self): + num_elements = 10 + service = self.create_cluster(1) + + ds = dataset_ops.Dataset.range(num_elements) + ds = dataset_ops.Dataset.zip({"a": (ds, ds), "b": ds}) + dataset_id = data_service_ops.register_dataset(service, ds) + from_dataset_id_ds = data_service_ops.from_dataset_id( + "parallel_epochs", service, dataset_id, ds.element_spec) + output = self.getDatasetOutput(from_dataset_id_ds) + for i in range(num_elements): + self.assertEqual(i, output[i]["a"][0]) + self.assertEqual(i, output[i]["a"][1]) + self.assertEqual(i, output[i]["b"]) + + @combinations.generate(test_base.eager_only_combinations()) + def testFromDatasetIdWrongElementSpec(self): + num_elements = 10 + service = self.create_cluster(1) + + ds = dataset_ops.Dataset.range(num_elements) + dataset_id = data_service_ops.register_dataset(service, ds) + wrong_spec = tensor_spec.TensorSpec(shape=(), dtype=dtypes.variant) + from_dataset_id_ds = data_service_ops.from_dataset_id( + "parallel_epochs", service, dataset_id, wrong_spec) + with self.assertRaisesRegex(errors.FailedPreconditionError, + "Expected a tensor of type variant"): + self.evaluate(self.getNext(from_dataset_id_ds)()) + + @combinations.generate(test_base.eager_only_combinations()) + def testFromDatasetIdNotRegistered(self): + service = self.create_cluster(1) + + dataset_id = 0 + element_spec = tensor_spec.TensorSpec(shape=(), dtype=dtypes.variant) + from_dataset_id_ds = data_service_ops.from_dataset_id( + "parallel_epochs", service, dataset_id, element_spec) + with self.assertRaisesRegex(errors.NotFoundError, "Dataset id"): + self.evaluate(self.getNext(from_dataset_id_ds)()) + + @combinations.generate(test_base.eager_only_combinations()) + def testRegisterEquivalentDatasets(self): + ds_1 = dataset_ops.Dataset.range(10) + ds_2 = dataset_ops.Dataset.range(10) + service = self.create_cluster(1) + id_1 = data_service_ops.register_dataset(service, ds_1) + id_2 = data_service_ops.register_dataset(service, ds_2) + self.assertEqual(id_1.numpy(), id_2.numpy()) + + @combinations.generate(test_base.eager_only_combinations()) + def testRegisterDifferentDatasets(self): + ds_1 = dataset_ops.Dataset.range(10) + ds_2 = dataset_ops.Dataset.range(20) + service = self.create_cluster(1) + id_1 = data_service_ops.register_dataset(service, ds_1) + id_2 = data_service_ops.register_dataset(service, ds_2) + self.assertNotEqual(id_1.numpy(), id_2.numpy()) + if __name__ == "__main__": test.main() diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.service.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.service.pbtxt index 12f4f3c2b08..3630c97da93 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.service.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.service.pbtxt @@ -4,4 +4,12 @@ tf_module { name: "distribute" argspec: "args=[\'processing_mode\', \'service\', \'job_name\', \'max_outstanding_requests\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } + member_method { + name: "from_dataset_id" + argspec: "args=[\'processing_mode\', \'service\', \'dataset_id\', \'element_spec\', \'job_name\', \'max_outstanding_requests\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "register_dataset" + argspec: "args=[\'service\', \'dataset\'], varargs=None, keywords=None, defaults=None" + } } diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.pbtxt index 00f0035e082..3ec5cd90ff8 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.pbtxt @@ -12,4 +12,12 @@ tf_module { name: "distribute" argspec: "args=[\'processing_mode\', \'service\', \'job_name\', \'max_outstanding_requests\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } + member_method { + name: "from_dataset_id" + argspec: "args=[\'processing_mode\', \'service\', \'dataset_id\', \'element_spec\', \'job_name\', \'max_outstanding_requests\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "register_dataset" + argspec: "args=[\'service\', \'dataset\'], varargs=None, keywords=None, defaults=None" + } } From 969fdb057dda75cfdc764eda91e7a9ae4fe78a69 Mon Sep 17 00:00:00 2001 From: Jay Shi Date: Wed, 22 Jul 2020 17:02:21 -0700 Subject: [PATCH 1101/2522] [tf.data] Calculate the average input time for the root node of the data input pipeline. Also add a unit test for the `SelfProcessingTime` function. PiperOrigin-RevId: 322686110 Change-Id: I72c0bddb2ed32570325c7d8023aaaffbeab87378 --- tensorflow/core/framework/model.cc | 43 +++++++++---------- tensorflow/core/framework/model.h | 16 ++++--- tensorflow/core/framework/model_test.cc | 38 ++++++++++++---- .../core/kernels/data/model_dataset_op.cc | 40 +++++++++++++++-- 4 files changed, 97 insertions(+), 40 deletions(-) diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc index 94355cc6ea5..3d54ffd51d8 100644 --- a/tensorflow/core/framework/model.cc +++ b/tensorflow/core/framework/model.cc @@ -52,8 +52,7 @@ class InterleaveMany : public Node { if (output_) { inherited_input_time = (*input_times)[output_->long_name()]; } else { - inherited_input_time = - gtl::FindWithDefault(*input_times, kInputTimeKey, 0.0L); + inherited_input_time = (*input_times)[kModelInputTimeKey]; } if (num_inputs() <= 1) { @@ -176,8 +175,7 @@ class AsyncInterleaveMany : public Node { if (output_) { inherited_input_time = (*input_times)[output_->long_name()]; } else { - inherited_input_time = - gtl::FindWithDefault(*input_times, kInputTimeKey, 0.0L); + inherited_input_time = (*input_times)[kModelInputTimeKey]; } if (num_inputs() <= 1) { @@ -323,8 +321,7 @@ class KnownRatio : public Node { if (output_) { inherited_input_time = (*input_times)[output_->long_name()]; } else { - inherited_input_time = - gtl::FindWithDefault(*input_times, kInputTimeKey, 0.0L); + inherited_input_time = (*input_times)[kModelInputTimeKey]; } if (ratio_ == 0) { @@ -423,8 +420,7 @@ class AsyncKnownRatio : public Node { if (output_) { inherited_input_time = (*input_times)[output_->long_name()]; } else { - inherited_input_time = - gtl::FindWithDefault(*input_times, kInputTimeKey, 0.0L); + inherited_input_time = (*input_times)[kModelInputTimeKey]; } double parallelism = 1.0; auto* parallelism_parameter = gtl::FindOrNull(parameters_, kParallelism); @@ -593,8 +589,7 @@ class UnknownRatio : public Node { if (output_) { inherited_input_time = (*input_times)[output_->long_name()]; } else { - inherited_input_time = - gtl::FindWithDefault(*input_times, kInputTimeKey, 0.0L); + inherited_input_time = (*input_times)[kModelInputTimeKey]; } if (num_elements_ == 0 || inputs_.empty() || @@ -692,8 +687,7 @@ class Unknown : public Node { if (output_) { inherited_input_time = (*input_times)[output_->long_name()]; } else { - inherited_input_time = - gtl::FindWithDefault(*input_times, kInputTimeKey, 0.0L); + inherited_input_time = (*input_times)[kModelInputTimeKey]; } (*input_times)[long_name()] = inherited_input_time; } @@ -1295,13 +1289,13 @@ void Model::FlushMetrics() { } void Model::Optimize(AutotuneAlgorithm algorithm, int64 cpu_budget, - int64 ram_budget) { + int64 ram_budget, double model_input_time) { switch (algorithm) { case AutotuneAlgorithm::HILL_CLIMB: - OptimizeHillClimb(cpu_budget, ram_budget); + OptimizeHillClimb(cpu_budget, ram_budget, model_input_time); break; case AutotuneAlgorithm::GRADIENT_DESCENT: - OptimizeGradientDescent(cpu_budget, ram_budget); + OptimizeGradientDescent(cpu_budget, ram_budget, model_input_time); break; } } @@ -1346,7 +1340,8 @@ Model::CollectEssentialParallelism( return essential_parameters; } -void Model::OptimizeGradientDescent(int64 cpu_budget, int64 ram_budget) { +void Model::OptimizeGradientDescent(int64 cpu_budget, int64 ram_budget, + double model_input_time) { std::shared_ptr snapshot; { tf_shared_lock lock(mu_); @@ -1376,7 +1371,7 @@ void Model::OptimizeGradientDescent(int64 cpu_budget, int64 ram_budget) { double new_value; for (int i = 0; i < kMaxIterations; ++i) { absl::flat_hash_map gradients; - new_output_time = OutputTime(snapshot, &gradients); + new_output_time = OutputTime(snapshot, model_input_time, &gradients); int64 model_parallelism = 0; for (auto& pair : essential_parameters) { model_parallelism += std::round(pair.second->value); @@ -1422,7 +1417,8 @@ void Model::OptimizeGradientDescent(int64 cpu_budget, int64 ram_budget) { } } -void Model::OptimizeHillClimb(int64 cpu_budget, int64 ram_budget) { +void Model::OptimizeHillClimb(int64 cpu_budget, int64 ram_budget, + double model_input_time) { std::shared_ptr snapshot; { tf_shared_lock lock(mu_); @@ -1442,7 +1438,8 @@ void Model::OptimizeHillClimb(int64 cpu_budget, int64 ram_budget) { pair.second->value = pair.second->min; } while (true) { - const double output_time = OutputTime(snapshot, /*gradients=*/nullptr); + const double output_time = + OutputTime(snapshot, model_input_time, /*gradients=*/nullptr); bool all_max = true; for (auto& pair : parameters) { if (pair.second->value < pair.second->max) { @@ -1461,7 +1458,8 @@ void Model::OptimizeHillClimb(int64 cpu_budget, int64 ram_budget) { continue; } pair.second->value++; - double new_output_time = OutputTime(snapshot, /*gradients=*/nullptr); + double new_output_time = + OutputTime(snapshot, model_input_time, /*gradients=*/nullptr); double delta = output_time - new_output_time; if (delta > best_delta && (delta > kBufferSizeMinDelta || pair.second->name != kBufferSize)) { @@ -1490,10 +1488,11 @@ void Model::OptimizeHillClimb(int64 cpu_budget, int64 ram_budget) { } } -double Model::OutputTime(std::shared_ptr node, +double Model::OutputTime(std::shared_ptr node, double model_input_time, absl::flat_hash_map* gradients) { // To store the input time for each node. - absl::flat_hash_map input_times; + absl::flat_hash_map input_times = { + {kModelInputTimeKey, model_input_time}}; // TODO(jsimsa): Now that we are accounting for buffer size in wait time // computation, assuming that the input is infinitely fast will result in diff --git a/tensorflow/core/framework/model.h b/tensorflow/core/framework/model.h index 71c4010ae40..bfa6e31209a 100644 --- a/tensorflow/core/framework/model.h +++ b/tensorflow/core/framework/model.h @@ -42,8 +42,8 @@ constexpr int64 kAutotune = -1; constexpr char kParallelism[] = "parallelism"; constexpr char kBufferSize[] = "buffer_size"; -// A key used to identify input time gradient. -constexpr char kInputTimeKey[] = "input_time"; +// A key used to identify the input time of the model. +constexpr char kModelInputTimeKey[] = "model_input_time"; enum class AutotuneAlgorithm { HILL_CLIMB = 0, @@ -609,8 +609,8 @@ class Model { void FlushMetrics() TF_LOCKS_EXCLUDED(mu_); // Uses the given algorithm to perform the autotuning optimization. - void Optimize(AutotuneAlgorithm algorithm, int64 cpu_budget, int64 ram_budget) - TF_LOCKS_EXCLUDED(mu_); + void Optimize(AutotuneAlgorithm algorithm, int64 cpu_budget, int64 ram_budget, + double model_input_time) TF_LOCKS_EXCLUDED(mu_); // Removes the given node. void RemoveNode(std::shared_ptr node) TF_LOCKS_EXCLUDED(mu_); @@ -638,7 +638,8 @@ class Model { // This process is repeated until all parameters reach their maximum values or // the projected output time is less than or equal to the processing time // needed to produce an element divided by CPU budget. - void OptimizeHillClimb(int64 cpu_budget, int64 ram_budget); + void OptimizeHillClimb(int64 cpu_budget, int64 ram_budget, + double model_input_time); // This optimization algorithm starts by setting all tunable parallelism // parameters to the minimum value. It then improves current parameters by @@ -647,12 +648,13 @@ class Model { // repeated until either the output time improvement is smaller than threshold // value or the output time is less than the processing time needed to produce // an element divided by CPU budget. - void OptimizeGradientDescent(int64 cpu_budget, int64 ram_budget); + void OptimizeGradientDescent(int64 cpu_budget, int64 ram_budget, + double model_input_time); // Collects the output time and if `gradients` is not `nullptr`, the output // time gradient w.r.t. tunable parameters of the subtree rooted in the given // node. - double OutputTime(std::shared_ptr node, + double OutputTime(std::shared_ptr node, double model_input_time, absl::flat_hash_map* gradients); // Collects the processing time for the given node. diff --git a/tensorflow/core/framework/model_test.cc b/tensorflow/core/framework/model_test.cc index 3fe965217b3..bdfd2c4df2d 100644 --- a/tensorflow/core/framework/model_test.cc +++ b/tensorflow/core/framework/model_test.cc @@ -56,7 +56,7 @@ TEST_P(AsyncInterleaveManyTest, Model) { async_interleave_many->remove_input(source2); }); absl::flat_hash_map input_times; - input_times[kInputTimeKey] = input_time; + input_times[kModelInputTimeKey] = input_time; EXPECT_EQ(async_interleave_many->TotalBufferedBytes(), 0); EXPECT_EQ(async_interleave_many->TotalMaximumBufferedBytes(), 0); async_interleave_many->record_buffer_event(110, 10); @@ -125,7 +125,7 @@ TEST_P(AsyncKnownRatioTest, Model) { model::MakeSourceNode({2, "source2", async_known_many}); async_known_many->add_input(source2); absl::flat_hash_map input_times; - input_times[kInputTimeKey] = input_time; + input_times[kModelInputTimeKey] = input_time; EXPECT_EQ(async_known_many->TotalBufferedBytes(), 0); EXPECT_EQ(async_known_many->TotalMaximumBufferedBytes(), 0); async_known_many->record_buffer_event(110, 10); @@ -202,6 +202,7 @@ TEST(InterleaveManyTest, Model) { model::MakeSourceNode({3, "source2", interleave_many}); interleave_many->add_input(source2); absl::flat_hash_map input_times; + input_times[kModelInputTimeKey] = 0.0; interleave_many->add_processing_time(100); EXPECT_EQ(interleave_many->processing_time(), 100); EXPECT_EQ(interleave_many->TotalProcessingTime(/*processing_times=*/nullptr), @@ -241,6 +242,7 @@ TEST_P(KnownRatioTest, Model) { model::MakeSourceNode({2, "source2", known_many}); known_many->add_input(source2); absl::flat_hash_map input_times; + input_times[kModelInputTimeKey] = 0.0; source1->add_processing_time(100); EXPECT_EQ(known_many->TotalProcessingTime(/*processing_times=*/nullptr), 0); EXPECT_EQ(known_many->OutputTime(&input_times, nullptr), 0); @@ -289,6 +291,7 @@ INSTANTIATE_TEST_SUITE_P(Test, KnownRatioTest, ::testing::Values(0, 1, 2, 4)); TEST(SourceTest, Model) { std::shared_ptr source = model::MakeSourceNode({0, "source", nullptr}); absl::flat_hash_map input_times; + input_times[kModelInputTimeKey] = 0.0; source->add_processing_time(100); EXPECT_EQ(source->processing_time(), 100); EXPECT_EQ(source->TotalProcessingTime(/*processing_times=*/nullptr), 0); @@ -313,6 +316,7 @@ TEST(UnknownRatioTest, Model) { model::MakeSourceNode({2, "source2", unknown_many}); unknown_many->add_input(source2); absl::flat_hash_map input_times; + input_times[kModelInputTimeKey] = 0.0; unknown_many->add_processing_time(100); EXPECT_EQ(unknown_many->processing_time(), 100); EXPECT_EQ(unknown_many->TotalProcessingTime(/*processing_times=*/nullptr), 0); @@ -348,6 +352,7 @@ TEST(UnknownTest, Model) { model::MakeSourceNode({2, "source2", unknown}); unknown->add_input(source2); absl::flat_hash_map input_times; + input_times[kModelInputTimeKey] = 0.0; source1->add_processing_time(100); EXPECT_EQ(unknown->TotalProcessingTime(/*processing_times=*/nullptr), 0); EXPECT_EQ(unknown->OutputTime(&input_times, nullptr), 0); @@ -528,7 +533,7 @@ TEST(AsyncInterleaveManyGradientTest, Model) { async_interleave_many->remove_input(source2); }); absl::flat_hash_map input_times; - input_times[kInputTimeKey] = input_time; + input_times[kModelInputTimeKey] = input_time; absl::flat_hash_map> parameters; async_interleave_many->CollectTunableParameters(¶meters); async_interleave_many->record_element(); @@ -583,7 +588,7 @@ TEST_P(AsyncKnownRatioGradientTest, Model) { std::shared_ptr source2 = model::MakeSourceNode({2, "source2", async_known_many}); absl::flat_hash_map input_times; - input_times[kInputTimeKey] = input_time; + input_times[kModelInputTimeKey] = input_time; async_known_many->add_input(source2); source1->record_element(); source1->add_processing_time(100); @@ -638,7 +643,7 @@ TEST(InterleaveManyGradientTest, Model) { async_known_many->record_element(); async_known_many->add_processing_time(300); absl::flat_hash_map input_times; - input_times[kInputTimeKey] = input_time; + input_times[kModelInputTimeKey] = input_time; absl::flat_hash_map> parameters; absl::flat_hash_map gradients; interleave_many->CollectTunableParameters(¶meters); @@ -669,7 +674,7 @@ TEST(KnownRatioGradientTest, Model) { async_known_many->record_element(); async_known_many->add_processing_time(300); absl::flat_hash_map input_times; - input_times[kInputTimeKey] = input_time; + input_times[kModelInputTimeKey] = input_time; absl::flat_hash_map> parameters; absl::flat_hash_map gradients; known_many->CollectTunableParameters(¶meters); @@ -700,7 +705,7 @@ TEST(UnknownRatioGradientTest, Model) { async_known_many->record_element(); async_known_many->add_processing_time(300); absl::flat_hash_map input_times; - input_times[kInputTimeKey] = input_time; + input_times[kModelInputTimeKey] = input_time; absl::flat_hash_map> parameters; absl::flat_hash_map gradients; unknown_many->CollectTunableParameters(¶meters); @@ -731,7 +736,7 @@ TEST(UnknownGradientTest, Model) { async_known_many->record_element(); async_known_many->add_processing_time(300); absl::flat_hash_map input_times; - input_times[kInputTimeKey] = input_time; + input_times[kModelInputTimeKey] = input_time; absl::flat_hash_map> parameters; absl::flat_hash_map gradients; unknown->CollectTunableParameters(¶meters); @@ -857,6 +862,23 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values(0, 20, 40, 80, 100), ::testing::Values(0, 1, 2, 4, 10, 20, 40))); +class SelfProcessingTimeTest : public ::testing::TestWithParam {}; + +TEST_P(SelfProcessingTimeTest, Model) { + const int64 add_times = GetParam(); + std::shared_ptr source = model::MakeSourceNode({0, "source", nullptr}); + for (int i = 0; i < add_times; i++) { + source->add_processing_time(i); + source->record_element(); + } + double self_processing_time = + (add_times == 0 ? 0.0 : (static_cast(add_times) - 1.0) / 2.0); + EXPECT_EQ(source->SelfProcessingTime(), self_processing_time); +} + +INSTANTIATE_TEST_SUITE_P(Test, SelfProcessingTimeTest, + ::testing::Values(0, 1, 2, 5, 10, 20, 40)); + } // namespace } // namespace model } // namespace data diff --git a/tensorflow/core/kernels/data/model_dataset_op.cc b/tensorflow/core/kernels/data/model_dataset_op.cc index 8c630fd9646..af9e1e59a35 100644 --- a/tensorflow/core/kernels/data/model_dataset_op.cc +++ b/tensorflow/core/kernels/data/model_dataset_op.cc @@ -136,9 +136,15 @@ class ModelDatasetOp : public UnaryDatasetOpKernel { mutex_lock l(mu_); TF_RETURN_IF_ERROR(EnsureOptimizeThreadStarted(ctx)); params.model = model_; + int64 now_nanos = EnvTime::NowNanos(); + RecordInput(now_nanos); } - return input_impl_->GetNext(IteratorContext(std::move(params)), - out_tensors, end_of_sequence); + Status s = input_impl_->GetNext(IteratorContext(std::move(params)), + out_tensors, end_of_sequence); + int64 now_nanos = EnvTime::NowNanos(); + mutex_lock l(mu_); + RecordOutput(now_nanos); + return s; } protected: @@ -192,8 +198,13 @@ class ModelDatasetOp : public UnaryDatasetOpKernel { } if (cancelled_) return; } + double model_input_time; + { + tf_shared_lock l(mu_); + model_input_time = SelfInputTime(); + } model_->Optimize(dataset()->algorithm_, dataset()->cpu_budget_, - dataset()->ram_budget_); + dataset()->ram_budget_, model_input_time); // Exponentially increase the period of running the optimization // until a threshold is reached. if (optimization_period_ms != kOptimizationPeriodThresholdMs) { @@ -206,12 +217,35 @@ class ModelDatasetOp : public UnaryDatasetOpKernel { } } + void RecordInput(int64 time_nanos) TF_EXCLUSIVE_LOCKS_REQUIRED(mu_) { + if (last_output_time_ != 0) { + DCHECK_LE(last_output_time_, time_nanos); + input_time_ += time_nanos - last_output_time_; + num_input_events_++; + } + } + + void RecordOutput(int64 time_nanos) TF_EXCLUSIVE_LOCKS_REQUIRED(mu_) { + last_output_time_ = time_nanos; + } + + double SelfInputTime() const TF_SHARED_LOCKS_REQUIRED(mu_) { + if (num_input_events_ == 0) { + return 0; + } + return static_cast(input_time_) / + static_cast(num_input_events_); + } + mutex mu_; condition_variable cond_var_; std::shared_ptr model_; std::unique_ptr model_thread_ TF_GUARDED_BY(mu_); bool cancelled_ TF_GUARDED_BY(mu_) = false; std::unique_ptr input_impl_; + int64 num_input_events_ TF_GUARDED_BY(mu_) = 0; + int64 input_time_ TF_GUARDED_BY(mu_) = 0; + int64 last_output_time_ TF_GUARDED_BY(mu_) = 0; }; const DatasetBase* input_; From 9c433fe87c544526629782bfed56d71470cce3e2 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Wed, 22 Jul 2020 17:07:16 -0700 Subject: [PATCH 1102/2522] Port the add kernel to the new TfLiteEvalTensor API. PiperOrigin-RevId: 322687146 Change-Id: I31da4eca82855466d5b9ec6324e7cfb9df9d3506 --- tensorflow/lite/micro/kernels/BUILD | 1 + tensorflow/lite/micro/kernels/add.cc | 61 ++++++++++++++--------- tensorflow/lite/micro/kernels/add_test.cc | 39 +++------------ 3 files changed, 46 insertions(+), 55 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index b2544aa3af3..c4d3a691402 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -420,6 +420,7 @@ tflite_micro_cc_test( "add_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:op_resolvers", "//tensorflow/lite/micro/testing:micro_test", diff --git a/tensorflow/lite/micro/kernels/add.cc b/tensorflow/lite/micro/kernels/add.cc index be089dace88..7190f2af548 100644 --- a/tensorflow/lite/micro/kernels/add.cc +++ b/tensorflow/lite/micro/kernels/add.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/op_macros.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/micro/memory_helpers.h" namespace tflite { @@ -53,6 +54,10 @@ struct OpData { int32 input1_offset; int32 input2_offset; int32 output_offset; + + // Used only for float evals: + float output_activation_min_f32; + float output_activation_max_f32; }; TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params, @@ -90,24 +95,28 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params, TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( context, params->activation, output, &data->output_activation_min, &data->output_activation_max)); + } else if (output->type == kTfLiteFloat32) { + CalculateActivationRange(params->activation, + &data->output_activation_min_f32, + &data->output_activation_max_f32); } return kTfLiteOk; } void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params, - const OpData* data, const TfLiteTensor* input1, - const TfLiteTensor* input2, TfLiteTensor* output) { - float output_activation_min, output_activation_max; - CalculateActivationRange(params->activation, &output_activation_min, - &output_activation_max); + const OpData* data, const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { tflite::ArithmeticParams op_params; - SetActivationParams(output_activation_min, output_activation_max, &op_params); -#define TF_LITE_ADD(opname) \ - reference_ops::opname(op_params, GetTensorShape(input1), \ - GetTensorData(input1), GetTensorShape(input2), \ - GetTensorData(input2), GetTensorShape(output), \ - GetTensorData(output)) + SetActivationParams(data->output_activation_min_f32, + data->output_activation_max_f32, &op_params); +#define TF_LITE_ADD(opname) \ + reference_ops::opname(op_params, tflite::micro::GetTensorShape(input1), \ + tflite::micro::GetTensorData(input1), \ + tflite::micro::GetTensorShape(input2), \ + tflite::micro::GetTensorData(input2), \ + tflite::micro::GetTensorShape(output), \ + tflite::micro::GetTensorData(output)) if (data->requires_broadcast) { TF_LITE_ADD(BroadcastAdd4DSlow); } else { @@ -118,9 +127,9 @@ void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params, TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params, const OpData* data, - const TfLiteTensor* input1, - const TfLiteTensor* input2, - TfLiteTensor* output) { + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { tflite::ArithmeticParams op_params; op_params.left_shift = data->left_shift; @@ -136,12 +145,15 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, SetActivationParams(data->output_activation_min, data->output_activation_max, &op_params); bool need_broadcast = reference_ops::ProcessBroadcastShapes( - GetTensorShape(input1), GetTensorShape(input2), &op_params); -#define TF_LITE_ADD(type, opname, dtype) \ - type::opname(op_params, GetTensorShape(input1), \ - GetTensorData(input1), GetTensorShape(input2), \ - GetTensorData(input2), GetTensorShape(output), \ - GetTensorData(output)); + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); +#define TF_LITE_ADD(type, opname, dtype) \ + type::opname(op_params, tflite::micro::GetTensorShape(input1), \ + tflite::micro::GetTensorData(input1), \ + tflite::micro::GetTensorShape(input2), \ + tflite::micro::GetTensorData(input2), \ + tflite::micro::GetTensorShape(output), \ + tflite::micro::GetTensorData(output)); if (output->type == kTfLiteInt8) { if (need_broadcast) { TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t); @@ -189,9 +201,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TFLITE_DCHECK(node->user_data != nullptr); const OpData* data = static_cast(node->user_data); - const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); - const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); if (output->type == kTfLiteFloat32) { EvalAdd(context, node, params, data, input1, input2, output); diff --git a/tensorflow/lite/micro/kernels/add_test.cc b/tensorflow/lite/micro/kernels/add_test.cc index 332f3edf865..5ea9daee621 100644 --- a/tensorflow/lite/micro/kernels/add_test.cc +++ b/tensorflow/lite/micro/kernels/add_test.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -66,47 +67,21 @@ void ValidateAddGoldens(TfLiteTensor* tensors, int tensors_size, const T* golden, T* output, int output_size, TfLiteFusedActivation activation, float tolerance = 1e-5) { - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(::tflite::BuiltinOperator_ADD); - - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - TfLiteAddParams builtin_data; builtin_data.activation = activation; - const char* init_data = reinterpret_cast(&builtin_data); - const size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } - int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = ops::micro::Register_ADD(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, &builtin_data, + micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output_size; ++i) { TF_LITE_MICRO_EXPECT_NEAR(golden[i], output[i], tolerance); From 983d0003ad09a2f989ab3266d076400d0e8d3bdd Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Wed, 22 Jul 2020 17:15:20 -0700 Subject: [PATCH 1103/2522] sanity -> consistency / smoke. PiperOrigin-RevId: 322688516 Change-Id: I4d4bdc45f45063ec1a67b812107583899a101cf3 --- .../lite/micro/examples/micro_speech/audio_provider_test.cc | 2 +- tensorflow/lite/micro/kernels/cmsis-nn/conv.cc | 2 +- tensorflow/lite/micro/micro_allocator.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/micro/examples/micro_speech/audio_provider_test.cc b/tensorflow/lite/micro/examples/micro_speech/audio_provider_test.cc index 8e32c92b8a2..9249c42ae7c 100644 --- a/tensorflow/lite/micro/examples/micro_speech/audio_provider_test.cc +++ b/tensorflow/lite/micro/examples/micro_speech/audio_provider_test.cc @@ -46,7 +46,7 @@ TF_LITE_MICRO_TEST(TestAudioProvider) { TF_LITE_MICRO_TEST(TestTimer) { // Make sure that the technically-undefined overflow behavior we rely on below // works on this platform. It's still not guaranteed, but at least this is a - // sanity check. Turn off when running with ASan, as it will complain about + // smoke check. Turn off when running with ASan, as it will complain about // the following undefined behavior. #ifndef ADDRESS_SANITIZER int32_t overflow_value = std::numeric_limits::max(); diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc b/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc index da608411387..64e0b22a5f5 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc @@ -245,7 +245,7 @@ TfLiteStatus EvalQuantizedPerChannel( RuntimeShape output_shape = GetTensorShape(output); RuntimeShape bias_shape = GetTensorShape(bias); - // Sanity check. + // Consistency check. TFLITE_DCHECK_LE(conv_params.activation.min, conv_params.activation.max); TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc index 73e7cd88bc0..76cd617fe99 100644 --- a/tensorflow/lite/micro/micro_allocator.cc +++ b/tensorflow/lite/micro/micro_allocator.cc @@ -688,7 +688,7 @@ void* MicroAllocator::AllocatePersistentBuffer(size_t bytes) { TfLiteStatus MicroAllocator::RequestScratchBufferInArena(int node_id, size_t bytes, int* buffer_idx) { - // A sanity check to make sure scratch_buffer_handles_ is contiguous i.e. + // A consistency check to make sure scratch_buffer_handles_ is contiguous i.e. // scratch_buffer_handles_ is pointing to the last allocation from memory // allocator. if (scratch_buffer_handles_ != nullptr && From 4b5edace978df70550e271a240e7e60ff9d23878 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Jul 2020 17:17:50 -0700 Subject: [PATCH 1104/2522] Update ops-related pbtxt files. PiperOrigin-RevId: 322688932 Change-Id: Ic6b8575abd64b13f8ce8060226e953a4a74b5d0d --- .../ops_history_v2/RebatchDatasetV2.pbtxt | 31 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 31 +++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 tensorflow/core/ops/compat/ops_history_v2/RebatchDatasetV2.pbtxt diff --git a/tensorflow/core/ops/compat/ops_history_v2/RebatchDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/RebatchDatasetV2.pbtxt new file mode 100644 index 00000000000..7cc91dbd8e9 --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/RebatchDatasetV2.pbtxt @@ -0,0 +1,31 @@ +op { + name: "RebatchDatasetV2" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "batch_sizes" + type: DT_INT64 + } + input_arg { + name: "drop_remainder" + type: DT_BOOL + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index eebf19918b4..440b06b9465 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -35301,6 +35301,37 @@ op { } } } +op { + name: "RebatchDatasetV2" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "batch_sizes" + type: DT_INT64 + } + input_arg { + name: "drop_remainder" + type: DT_BOOL + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} op { name: "Reciprocal" input_arg { From 30ff851fd0dcba29130259ff7907fe97087eb3fa Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Wed, 22 Jul 2020 17:20:13 -0700 Subject: [PATCH 1105/2522] Remove unused functions. PiperOrigin-RevId: 322689294 Change-Id: I0056a29b09eb5b61cc349b55cefc5eb7691482d2 --- .../person_detection/himax_driver/HM01B0.c | 39 ------------------- .../person_detection/himax_driver/HM01B0.h | 17 -------- .../himax_driver/HM01B0.c | 39 ------------------- .../himax_driver/HM01B0.h | 17 -------- 4 files changed, 112 deletions(-) diff --git a/tensorflow/lite/micro/examples/person_detection/himax_driver/HM01B0.c b/tensorflow/lite/micro/examples/person_detection/himax_driver/HM01B0.c index 8e457ec4ca8..4fc673a1d38 100644 --- a/tensorflow/lite/micro/examples/person_detection/himax_driver/HM01B0.c +++ b/tensorflow/lite/micro/examples/person_detection/himax_driver/HM01B0.c @@ -503,45 +503,6 @@ uint32_t hm01b0_test_walking1s(hm01b0_cfg_t* psCfg) { return hm01b0_load_script(psCfg, psScript, ui32ScriptCmdNum); } -//***************************************************************************** -// -//! @brief Check the data read from HM01B0 in the walking 1s test mode -//! -//! @param pui8Buffer - Pointer to data buffer. -//! @param ui32BufferLen - Buffer length -//! @param ui32PrintCnt - Number of mismatched data to be printed out -//! -//! This function sets HM01B0 in the walking 1s test mode. -//! -//! @return Error code. -// -//***************************************************************************** -void hm01b0_test_walking1s_check_data_sanity(uint8_t* pui8Buffer, - uint32_t ui32BufferLen, - uint32_t ui32PrintCnt) { - uint8_t ui8ByteData = *pui8Buffer; - uint32_t ui32MismatchCnt = 0x00; - - for (uint32_t ui32Idx = 0; ui32Idx < ui32BufferLen; ui32Idx++) { - if (*(pui8Buffer + ui32Idx) != ui8ByteData) { - if (ui32PrintCnt) { - am_util_stdio_printf("[0x%08X] actual 0x%02X expected 0x%02X\n", - ui32Idx, *(pui8Buffer + ui32Idx), ui8ByteData); - am_util_delay_ms(1); - ui32PrintCnt--; - } - ui32MismatchCnt++; - } - - if (ui8ByteData) - ui8ByteData = ui8ByteData << 1; - else - ui8ByteData = 0x01; - } - - am_util_stdio_printf("Mismatch Rate %d/%d\n", ui32MismatchCnt, ui32BufferLen); -} - //***************************************************************************** // //! @brief Software reset HM01B0 diff --git a/tensorflow/lite/micro/examples/person_detection/himax_driver/HM01B0.h b/tensorflow/lite/micro/examples/person_detection/himax_driver/HM01B0.h index e2561da6d10..c7ec4e6676e 100644 --- a/tensorflow/lite/micro/examples/person_detection/himax_driver/HM01B0.h +++ b/tensorflow/lite/micro/examples/person_detection/himax_driver/HM01B0.h @@ -297,23 +297,6 @@ uint32_t hm01b0_init_system(hm01b0_cfg_t *psCfg, hm_script_t *psScript, //***************************************************************************** uint32_t hm01b0_test_walking1s(hm01b0_cfg_t *psCfg); -//***************************************************************************** -// -//! @brief Check the data read from HM01B0 in the walking 1s test mode -//! -//! @param pui8Buffer - Pointer to data buffer. -//! @param ui32BufferLen - Buffer length -//! @param ui32PrintCnt - Number of mismatched data to be printed out -//! -//! This function sets HM01B0 in the walking 1s test mode. -//! -//! @return Error code. -// -//***************************************************************************** -void hm01b0_test_walking1s_check_data_sanity(uint8_t *pui8Buffer, - uint32_t ui32BufferLen, - uint32_t ui32PrintCnt); - //***************************************************************************** // //! @brief Software reset HM01B0 diff --git a/tensorflow/lite/micro/examples/person_detection_experimental/himax_driver/HM01B0.c b/tensorflow/lite/micro/examples/person_detection_experimental/himax_driver/HM01B0.c index 4c89b8e5d76..3ec481a5cd4 100644 --- a/tensorflow/lite/micro/examples/person_detection_experimental/himax_driver/HM01B0.c +++ b/tensorflow/lite/micro/examples/person_detection_experimental/himax_driver/HM01B0.c @@ -497,45 +497,6 @@ uint32_t hm01b0_test_walking1s(hm01b0_cfg_t* psCfg) { return hm01b0_load_script(psCfg, psScript, ui32ScriptCmdNum); } -//***************************************************************************** -// -//! @brief Check the data read from HM01B0 in the walking 1s test mode -//! -//! @param pui8Buffer - Pointer to data buffer. -//! @param ui32BufferLen - Buffer length -//! @param ui32PrintCnt - Number of mismatched data to be printed out -//! -//! This function sets HM01B0 in the walking 1s test mode. -//! -//! @return Error code. -// -//***************************************************************************** -void hm01b0_test_walking1s_check_data_sanity(uint8_t* pui8Buffer, - uint32_t ui32BufferLen, - uint32_t ui32PrintCnt) { - uint8_t ui8ByteData = *pui8Buffer; - uint32_t ui32MismatchCnt = 0x00; - - for (uint32_t ui32Idx = 0; ui32Idx < ui32BufferLen; ui32Idx++) { - if (*(pui8Buffer + ui32Idx) != ui8ByteData) { - if (ui32PrintCnt) { - am_util_stdio_printf("[0x%08X] actual 0x%02X expected 0x%02X\n", - ui32Idx, *(pui8Buffer + ui32Idx), ui8ByteData); - am_util_delay_ms(1); - ui32PrintCnt--; - } - ui32MismatchCnt++; - } - - if (ui8ByteData) - ui8ByteData = ui8ByteData << 1; - else - ui8ByteData = 0x01; - } - - am_util_stdio_printf("Mismatch Rate %d/%d\n", ui32MismatchCnt, ui32BufferLen); -} - //***************************************************************************** // //! @brief Software reset HM01B0 diff --git a/tensorflow/lite/micro/examples/person_detection_experimental/himax_driver/HM01B0.h b/tensorflow/lite/micro/examples/person_detection_experimental/himax_driver/HM01B0.h index 46dcb583122..f95ee7bd76c 100644 --- a/tensorflow/lite/micro/examples/person_detection_experimental/himax_driver/HM01B0.h +++ b/tensorflow/lite/micro/examples/person_detection_experimental/himax_driver/HM01B0.h @@ -286,23 +286,6 @@ uint32_t hm01b0_init_system(hm01b0_cfg_t *psCfg, hm_script_t *psScript, //***************************************************************************** uint32_t hm01b0_test_walking1s(hm01b0_cfg_t *psCfg); -//***************************************************************************** -// -//! @brief Check the data read from HM01B0 in the walking 1s test mode -//! -//! @param pui8Buffer - Pointer to data buffer. -//! @param ui32BufferLen - Buffer length -//! @param ui32PrintCnt - Number of mismatched data to be printed out -//! -//! This function sets HM01B0 in the walking 1s test mode. -//! -//! @return Error code. -// -//***************************************************************************** -void hm01b0_test_walking1s_check_data_sanity(uint8_t *pui8Buffer, - uint32_t ui32BufferLen, - uint32_t ui32PrintCnt); - //***************************************************************************** // //! @brief Software reset HM01B0 From c8bfbd38896747515ceb6bdb303dec5085246345 Mon Sep 17 00:00:00 2001 From: Yuqi Li Date: Wed, 22 Jul 2020 17:24:45 -0700 Subject: [PATCH 1106/2522] fix the failure in notebook for model maker. PiperOrigin-RevId: 322689954 Change-Id: I44939c723477b52c231c43223a63c5164575c4d4 --- .../model_maker_image_classification.ipynb | 6 --- .../model_maker_text_classification.ipynb | 51 +++++++++---------- 2 files changed, 25 insertions(+), 32 deletions(-) diff --git a/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb b/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb index 37b2395dec6..99ebb7087f2 100644 --- a/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb +++ b/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb @@ -657,10 +657,6 @@ "with tf.io.gfile.GFile('model.tflite', 'rb') as f:\n", " model_content = f.read()\n", "\n", - "# Read label names from label file.\n", - "with tf.io.gfile.GFile('labels.txt', 'r') as f:\n", - " label_names = f.read().split('\\n')\n", - "\n", "# Initialze TensorFlow Lite inpterpreter.\n", "interpreter = tf.lite.Interpreter(model_content=model_content)\n", "interpreter.allocate_tensors()\n", @@ -683,8 +679,6 @@ " # Post-processing: remove batch dimension and find the label with highest\n", " # probability.\n", " predict_label = np.argmax(output()[0])\n", - " # Get label name with label index.\n", - " predict_label_name = label_names[predict_label]\n", "\n", " accurate_count += (predict_label == label.numpy())\n", "\n", diff --git a/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb b/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb index e10507ccac7..4a620960899 100644 --- a/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb +++ b/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "cellView": "form", "colab": {}, @@ -93,7 +93,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -116,7 +116,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -159,7 +159,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -219,7 +219,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -242,7 +242,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -266,7 +266,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -289,7 +289,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -313,7 +313,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -360,7 +360,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -403,7 +403,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -433,7 +433,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -460,7 +460,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -483,7 +483,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -510,7 +510,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -535,7 +535,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -570,7 +570,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -598,7 +598,6 @@ " # Add batch dimension and convert to float32 to match with the model's input\n", " # data format.\n", " text = tf.expand_dims(text, 0)\n", - " text = tf.cast(text, tf.float32)\n", "\n", " # Run inference.\n", " interpreter.set_tensor(input_index, text)\n", @@ -671,7 +670,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -694,7 +693,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -718,7 +717,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -747,7 +746,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -770,7 +769,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -797,7 +796,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -822,7 +821,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -846,7 +845,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", From 2f32eed7839ced3250419ab3c3c2523ff952c235 Mon Sep 17 00:00:00 2001 From: Rachel Lim Date: Wed, 22 Jul 2020 17:36:28 -0700 Subject: [PATCH 1107/2522] [tf.data] Have auto_shard use regex match instead of exact match to be more robust to different versions of an op. Also adds test for MatchesAnyVersionRE utility function. PiperOrigin-RevId: 322691658 Change-Id: I5a97e5038c1afc15eca5389a01021660e4574dc8 --- .../core/grappler/optimizers/data/BUILD | 1 + .../grappler/optimizers/data/auto_shard.cc | 26 +++++++------------ .../core/kernels/data/dataset_utils_test.cc | 8 ++++++ .../kernel_tests/auto_shard_dataset_test.py | 13 ++++++++++ 4 files changed, 32 insertions(+), 16 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD index 88cb11b83ce..1daf7e9b76e 100644 --- a/tensorflow/core/grappler/optimizers/data/BUILD +++ b/tensorflow/core/grappler/optimizers/data/BUILD @@ -52,6 +52,7 @@ cc_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core/grappler/utils:functions", + "//tensorflow/core/kernels/data:dataset_utils", ] + tf_protos_all(), alwayslink = 1, ) diff --git a/tensorflow/core/grappler/optimizers/data/auto_shard.cc b/tensorflow/core/grappler/optimizers/data/auto_shard.cc index f688c1f359f..535938d4cf1 100644 --- a/tensorflow/core/grappler/optimizers/data/auto_shard.cc +++ b/tensorflow/core/grappler/optimizers/data/auto_shard.cc @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" #include "tensorflow/core/grappler/optimizers/data/graph_utils.h" #include "tensorflow/core/grappler/utils/functions.h" +#include "tensorflow/core/kernels/data/dataset_utils.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/errors.h" @@ -50,9 +51,8 @@ constexpr char kOutputShapes[] = "output_shapes"; constexpr char kOutputTypes[] = "output_types"; // clang-format off -constexpr std::array kReaderDatasetOps = { +constexpr std::array kReaderDatasetOps = { "FixedLengthRecordDataset", - "FixedLengthRecordDatasetV2", "RecordIODataset", "SSTableDataset", "TextLineDataset", @@ -64,17 +64,14 @@ constexpr std::array kMultipleInputsDatasetOps = { "ZipDataset" }; -constexpr std::array kPassThroughOps = { +constexpr std::array kPassThroughOps = { "_Retval", "AssertNextDataset", "BatchDataset", - "BatchDatasetV2", "ExperimentalMapAndBatchDataset", "ExperimentalRebatchDataset", "PaddedBatchDataset", - "PaddedBatchDatasetV2", "CacheDataset", - "CacheDatasetV2", "FilterDataset", "Identity", "MapAndBatchDataset", @@ -82,8 +79,7 @@ constexpr std::array kPassThroughOps = { "ModelDataset", "OptimizeDataset", "ParallelMapDataset", - "ParallelMapDatasetV2", - "ParseExampleDatasetV2", + "ParseExampleDataset", "PrefetchDataset", "ReduceDataset", "RebatchDataset", @@ -91,23 +87,18 @@ constexpr std::array kPassThroughOps = { "ShardDataset", "ShuffleAndRepeatDataset", "ShuffleDataset", - "ShuffleDatasetV2", - "ShuffleDatasetV3", "SkipDataset", "TakeDataset", "WindowDataset", }; // TODO(frankchn): Process functions within kFuncDatasetOps as well. -constexpr std::array kFuncDatasetOps = { +constexpr std::array kFuncDatasetOps = { "ExperimentalParallelInterleaveDataset", "FlatMapDataset", "InterleaveDataset", - "LegacyParallelInterleaveDatasetV2", + "LegacyParallelInterleaveDataset", "ParallelInterleaveDataset", - "ParallelInterleaveDatasetV2", - "ParallelInterleaveDatasetV3", - "ParallelInterleaveDatasetV4" }; constexpr std::array kUnshardableSourceDatasetOps = { @@ -126,7 +117,10 @@ template bool IsDatasetNodeOfType(const NodeDef& node, const std::array& arr) { for (const auto& dataset_op_name : arr) { - if (node.op() == dataset_op_name) return true; + if (tensorflow::data::MatchesAnyVersionRE(/*op_prefix=*/dataset_op_name, + /*op_to_match=*/node.op())) { + return true; + } } return false; } diff --git a/tensorflow/core/kernels/data/dataset_utils_test.cc b/tensorflow/core/kernels/data/dataset_utils_test.cc index 588624a36cc..1a6e673c3f3 100644 --- a/tensorflow/core/kernels/data/dataset_utils_test.cc +++ b/tensorflow/core/kernels/data/dataset_utils_test.cc @@ -63,6 +63,14 @@ string full_name(string key) { return strings::StrCat(kFullNameRandomHex, kPipe, "Iterator:", key); } +TEST(DatasetUtilsTest, MatchesAnyVersion) { + EXPECT_TRUE(MatchesAnyVersionRE("BatchDataset", "BatchDataset")); + EXPECT_TRUE(MatchesAnyVersionRE("BatchDataset", "BatchDatasetV2")); + EXPECT_TRUE(MatchesAnyVersionRE("BatchDataset", "BatchDatasetV3")); + EXPECT_FALSE(MatchesAnyVersionRE("BatchDataset", "BatchV2Dataset")); + EXPECT_FALSE(MatchesAnyVersionRE("BatchDataset", "PaddedBatchDataset")); +} + TEST(DatasetUtilsTest, VariantTensorDataRoundtrip) { VariantTensorDataWriter writer; TF_ASSERT_OK(writer.WriteScalar(full_name("Int64"), 24)); diff --git a/tensorflow/python/data/experimental/kernel_tests/auto_shard_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/auto_shard_dataset_test.py index eced1fdea18..36587d97ea0 100644 --- a/tensorflow/python/data/experimental/kernel_tests/auto_shard_dataset_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/auto_shard_dataset_test.py @@ -406,6 +406,19 @@ class AutoShardDatasetTest(reader_dataset_ops_test_base.TFRecordDatasetTestBase, nxt = self.getNext(dataset) self.evaluate(nxt()) + @combinations.generate(test_base.default_test_combinations()) + def testShardWithRebatch(self): + # Tests that RebatchDatasetV2 is a passthrough op. + dataset = dataset_ops.Dataset.list_files(self.test_filenames, shuffle=False) + dataset = dataset.apply( + testing.assert_next(["Shard", "FlatMap", "Batch", "Rebatch"])) + dataset = dataset.flat_map(core_readers.TFRecordDataset) + dataset = dataset.batch(5) + dataset = distribute._RebatchDataset(dataset, batch_sizes=5) + dataset = distribute._AutoShardDataset(dataset, 5, 3) + nxt = self.getNext(dataset) + self.evaluate(nxt()) + @combinations.generate(test_base.default_test_combinations()) def testNoReaderPipelines(self): dataset = dataset_ops.Dataset.range(1024) From d7f7908beb64b4fdc19a8eb28f6893e04145069c Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Wed, 22 Jul 2020 17:39:46 -0700 Subject: [PATCH 1108/2522] Port the arg min/max kernel to the new TfLiteEvalTensor API. PiperOrigin-RevId: 322692137 Change-Id: Idc42c69a519ce264ca9b6f3d53f681ced09bcfe5 --- tensorflow/lite/micro/kernels/BUILD | 1 + tensorflow/lite/micro/kernels/arg_min_max.cc | 21 ++++++--- .../lite/micro/kernels/arg_min_max_test.cc | 43 ++++++------------- 3 files changed, 27 insertions(+), 38 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index c4d3a691402..b691472720c 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -321,6 +321,7 @@ tflite_micro_cc_test( "arg_min_max_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:op_resolvers", "//tensorflow/lite/micro/testing:micro_test", diff --git a/tensorflow/lite/micro/kernels/arg_min_max.cc b/tensorflow/lite/micro/kernels/arg_min_max.cc index 86abc1d7a4b..3baf9f04e61 100644 --- a/tensorflow/lite/micro/kernels/arg_min_max.cc +++ b/tensorflow/lite/micro/kernels/arg_min_max.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/micro/kernels/micro_utils.h" namespace tflite { @@ -45,14 +46,20 @@ inline void ArgMinMaxHelper(const RuntimeShape& input1_shape, } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) { - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - const TfLiteTensor* axis = GetInput(context, node, kAxis); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* axis = + tflite::micro::GetEvalInput(context, node, kAxis); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); -#define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type) \ - ArgMinMaxHelper(GetTensorShape(input), GetTensorData(input), \ - GetTensorData(axis), GetTensorShape(output), \ - GetTensorData(output), is_arg_max) +#define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type) \ + ArgMinMaxHelper(tflite::micro::GetTensorShape(input), \ + tflite::micro::GetTensorData(input), \ + tflite::micro::GetTensorData(axis), \ + tflite::micro::GetTensorShape(output), \ + tflite::micro::GetTensorData(output), \ + is_arg_max) if (axis->type == kTfLiteInt32) { if (output->type == kTfLiteInt32) { switch (input->type) { diff --git a/tensorflow/lite/micro/kernels/arg_min_max_test.cc b/tensorflow/lite/micro/kernels/arg_min_max_test.cc index fa46badfc27..dfd04bf74b9 100644 --- a/tensorflow/lite/micro/kernels/arg_min_max_test.cc +++ b/tensorflow/lite/micro/kernels/arg_min_max_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -26,41 +27,21 @@ namespace { void ValidateArgMinMaxGoldens(TfLiteTensor* tensors, int tensors_size, const int32_t* golden, int32_t* output, int output_size, bool using_min) { - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration; - if (using_min) { - registration = resolver.FindOp(tflite::BuiltinOperator_ARG_MIN); - } else { - registration = resolver.FindOp(tflite::BuiltinOperator_ARG_MAX); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, nullptr, init_data_size); - } int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + + const TfLiteRegistration registration = using_min + ? ops::micro::Register_ARG_MIN() + : ops::micro::Register_ARG_MAX(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); + + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); + for (int i = 0; i < output_size; ++i) { TF_LITE_MICRO_EXPECT_EQ(golden[i], output[i]); } From aa30205d22e622d73b98bd6259a80265cb17fb61 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Jul 2020 17:45:41 -0700 Subject: [PATCH 1109/2522] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 322692957 Change-Id: Icb306b53dd9697f19aeee8b5efd0f6ac99eeaca2 --- tensorflow/go/op/wrappers.go | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 504e6ba3b47..64f6298d645 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -10140,6 +10140,34 @@ func TensorArraySizeV2(scope *Scope, handle tf.Output, flow_in tf.Output) (size return op.Output(0) } +// Creates a dataset that changes the batch size. +// +// Creates a dataset that rebatches elements from `input_dataset` into new batch +// sizes. +// +// Arguments: +// input_dataset: A variant tensor representing the input dataset. +// batch_sizes: A vector of integers representing the size of batches to produce. These values +// are cycled through in order. +// +// +// +func RebatchDatasetV2(scope *Scope, input_dataset tf.Output, batch_sizes tf.Output, drop_remainder tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "RebatchDatasetV2", + Input: []tf.Input{ + input_dataset, batch_sizes, drop_remainder, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // RebatchDatasetAttr is an optional argument to RebatchDataset. type RebatchDatasetAttr func(optionalAttr) From 6a98324e3638e22e12f896963cffcf2791cd006a Mon Sep 17 00:00:00 2001 From: Chuanhao Zhuge Date: Wed, 22 Jul 2020 17:47:19 -0700 Subject: [PATCH 1110/2522] Implement a function cache that caches the lowered BEF for tf.function (graph execution). Observed ~22x performance improvement in microbenchmark compared to without cache. PiperOrigin-RevId: 322693183 Change-Id: I2c2b17e1d31d17a525140e7d507fd1f540af8b05 --- .../python/eager/benchmarks/resnet50/resnet50_test.py | 8 ++++++-- .../eager/benchmarks/resnet50/resnet50_test_util.py | 4 +++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/eager/benchmarks/resnet50/resnet50_test.py b/tensorflow/python/eager/benchmarks/resnet50/resnet50_test.py index b2e57c11e3c..e034cf0e296 100644 --- a/tensorflow/python/eager/benchmarks/resnet50/resnet50_test.py +++ b/tensorflow/python/eager/benchmarks/resnet50/resnet50_test.py @@ -287,6 +287,9 @@ class ResNet50Benchmarks(tf.test.Benchmark): # which isn't useful. if 'K20' in device.physical_device_desc: return (16,) + # Quardro P1000. + if 'P1000' in device.physical_device_desc: + return (16,) if 'P100' in device.physical_device_desc: return (16, 32, 64) @@ -355,7 +358,9 @@ class ResNet50Benchmarks(tf.test.Benchmark): (images, labels) = resnet50_test_util.random_batch( batch_size, data_format) model = resnet50.ResNet50(data_format) - optimizer = tf.keras.optimizers.SGD(0.1) + # TODO(b/161911585): tf_to_corert MLIR lowering pipeline should handle + # case when momentum is not set. + optimizer = tf.keras.optimizers.SGD(0.1, 0.1) apply_grads = apply_gradients if defun: model.call = tf.function(model.call) @@ -397,7 +402,6 @@ class ResNet50Benchmarks(tf.test.Benchmark): defun=False, execution_mode=context.ASYNC) - @test_util.disable_tfrt('Graph is not supported yet. b/156187905') def benchmark_eager_train_with_defun(self): self._benchmark_eager_train( 'eager_train_with_defun', MockIterator, diff --git a/tensorflow/python/eager/benchmarks/resnet50/resnet50_test_util.py b/tensorflow/python/eager/benchmarks/resnet50/resnet50_test_util.py index 3c1f73ec304..4f76b788490 100644 --- a/tensorflow/python/eager/benchmarks/resnet50/resnet50_test_util.py +++ b/tensorflow/python/eager/benchmarks/resnet50/resnet50_test_util.py @@ -29,8 +29,10 @@ def device_and_data_format(): return ('/cpu:0', 'channels_last') -def random_batch(batch_size, data_format): +def random_batch(batch_size, data_format, seed=None): """Create synthetic resnet50 images and labels for testing.""" + if seed: + tf.random.set_seed(seed) shape = (3, 224, 224) if data_format == 'channels_first' else (224, 224, 3) shape = (batch_size,) + shape From c72c903472af3124d8c42bff92bc627ab71c7427 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Wed, 22 Jul 2020 17:48:44 -0700 Subject: [PATCH 1111/2522] Change Tensorboard callback to directly write out the train_function graphs at the end of the first training batch, rather than trying to write the potentially-incorrect Keras backend graph when the callback is first initialized. Also add tests to verify that a graph containing the model actually gets written to tensorboard. It turns out it wasn't getting written for some of the sequential models in the test before this CL, because the model had yet to be called when the callback graph writing triggered, so the Keras backend graph was empty. ----------- Note: I was looking to write the test graph too, but I started seeing errors about default callbacks not being allowed to have test batch hooks + problems w/ the validation writer being closed. PiperOrigin-RevId: 322693354 Change-Id: I119b51841ca26c9dca14938b3e49e8f85efdb57c --- tensorflow/python/keras/callbacks.py | 21 ++++++++++++++++----- tensorflow/python/keras/callbacks_test.py | 21 +++++++++++++++++---- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py index f918a754eb9..88dc1d84129 100644 --- a/tensorflow/python/keras/callbacks.py +++ b/tensorflow/python/keras/callbacks.py @@ -2014,8 +2014,10 @@ class TensorBoard(Callback, version_utils.TensorBoardVersionSelector): self._writers = {} # Resets writers. + self._should_write_train_graph = False if self.write_graph: - self._write_keras_model_graph() + self._write_keras_model_summary() + self._should_write_train_graph = True if self.embeddings_freq: self._configure_embeddings() @@ -2042,13 +2044,19 @@ class TensorBoard(Callback, version_utils.TensorBoardVersionSelector): distributed_file_utils.remove_temp_dirpath(self.log_dir, self.model.distribute_strategy) - def _write_keras_model_graph(self): - """Writes Keras graph networks to TensorBoard.""" + def _write_keras_model_train_graph(self): + """Writes Keras model train_function graph to TensorBoard.""" with self._train_writer.as_default(): with summary_ops_v2.always_record_summaries(): - if not self.model.run_eagerly: - summary_ops_v2.graph(K.get_graph(), step=0) + train_fn = self.model.train_function + # If the train_function is a `tf.function`, we can write out a graph + if hasattr(train_fn, 'function_spec'): + summary_ops_v2.graph(train_fn._concrete_stateful_fn.graph, step=0) # pylint: disable=protected-access + def _write_keras_model_summary(self): + """Writes Keras graph network summary to TensorBoard.""" + with self._train_writer.as_default(): + with summary_ops_v2.always_record_summaries(): summary_writable = ( self.model._is_graph_network or # pylint: disable=protected-access self.model.__class__.__name__ == 'Sequential') # pylint: disable=protected-access @@ -2207,6 +2215,9 @@ class TensorBoard(Callback, version_utils.TensorBoardVersionSelector): self._start_trace() def on_train_batch_end(self, batch, logs=None): + if self._should_write_train_graph: + self._write_keras_model_train_graph() + self._should_write_train_graph = False if not self._should_trace: return diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py index f103d7506b9..0992deae7b6 100644 --- a/tensorflow/python/keras/callbacks_test.py +++ b/tensorflow/python/keras/callbacks_test.py @@ -1767,6 +1767,7 @@ class _SummaryFile(object): self.images = set() self.histograms = set() self.tensors = set() + self.graph_defs = [] def list_summaries(logdir): @@ -1793,6 +1794,8 @@ def list_summaries(logdir): continue path = os.path.join(dirpath, filename) for event in summary_iterator.summary_iterator(path): + if event.graph_def: + result.graph_defs.append(event.graph_def) if not event.summary: # (e.g., it's a `graph_def` event) continue for value in event.summary.value: @@ -2217,7 +2220,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): x, y, batch_size=2, - epochs=2, + epochs=3, validation_data=(x, y), callbacks=[tb_cbk]) summary_file = list_summaries(self.logdir) @@ -2227,6 +2230,16 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): _ObservedSummary(logdir=self.train_dir, tag='keras'), }, ) + if not model.run_eagerly: + # There should be one train graph + self.assertLen(summary_file.graph_defs, 1) + for graph_def in summary_file.graph_defs: + graph_def_str = str(graph_def) + + # All the model layers should appear in the graphs + for layer in model.layers: + if 'input' not in layer.name: + self.assertIn(layer.name, graph_def_str) def test_TensorBoard_writeSequentialModel_noInputShape(self): model = keras.models.Sequential([ @@ -2234,7 +2247,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): keras.layers.Flatten(), keras.layers.Dense(1), ]) - model.compile('sgd', 'mse', run_eagerly=False) + model.compile('sgd', 'mse', run_eagerly=testing_utils.should_run_eagerly()) self.fitModelAndAssertKerasModelWritten(model) def test_TensorBoard_writeSequentialModel_withInputShape(self): @@ -2243,7 +2256,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): keras.layers.Flatten(), keras.layers.Dense(1), ]) - model.compile('sgd', 'mse', run_eagerly=False) + model.compile('sgd', 'mse', run_eagerly=testing_utils.should_run_eagerly()) self.fitModelAndAssertKerasModelWritten(model) def test_TensorBoard_writeModel(self): @@ -2252,7 +2265,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): x = keras.layers.Flatten()(x) x = keras.layers.Dense(1)(x) model = keras.models.Model(inputs=inputs, outputs=[x]) - model.compile('sgd', 'mse', run_eagerly=False) + model.compile('sgd', 'mse', run_eagerly=testing_utils.should_run_eagerly()) self.fitModelAndAssertKerasModelWritten(model) def test_TensorBoard_autoTrace(self): From abba54ad2fb4d7c2c08069eacad505f38fab2637 Mon Sep 17 00:00:00 2001 From: "ag.ramesh" Date: Wed, 22 Jul 2020 18:10:21 -0700 Subject: [PATCH 1112/2522] Modified oneDNN build so as to not include the MKL blob when building with Eigen threadpool. --- .bazelrc | 10 +++++ .../core/kernels/mkl_batch_matmul_op.cc | 40 +++++++++++-------- .../core/kernels/mkl_matmul_ops_common.h | 7 +++- tensorflow/tensorflow.bzl | 10 ++--- third_party/mkl/build_defs.bzl | 22 ++-------- third_party/mkl_dnn/BUILD | 12 ++++++ third_party/mkl_dnn/build_defs.bzl | 4 +- third_party/mkl_dnn/mkldnn.BUILD | 6 +-- third_party/mkl_dnn/mkldnn_v1.BUILD | 23 +++-------- 9 files changed, 69 insertions(+), 65 deletions(-) diff --git a/.bazelrc b/.bazelrc index 82bb0605b08..a331ae12804 100644 --- a/.bazelrc +++ b/.bazelrc @@ -164,8 +164,18 @@ build:mkl -c opt # config to build OneDNN backend with a user specified threadpool. build:mkl_threadpool --define=build_with_mkl=true --define=enable_mkl=true build:mkl_threadpool --define=tensorflow_mkldnn_contraction_kernel=0 +build:mkl_threadpool --define=build_with_mkl_dnn_v1_only=true +build:mkl_threadpool --define=build_with_mkl_opensource=true build:mkl_threadpool --define=build_with_mkldnn_threadpool=true build:mkl_threadpool -c opt + +# Config setting to build with oneDNN and without the binary blob +build:mkl_opensource_only --define=build_with_mkl=true --define=enable_mkl=true +build:mkl_opensource_only --define=tensorflow_mkldnn_contraction_kernel=0 +build:mkl_opensource_only --define=build_with_mkl_dnn_v1_only=true +build:mkl_opensource_only --define=build_with_mkl_opensource=true +build:mkl_opensource_only -c opt + # This config refers to building with CUDA available. It does not necessarily # mean that we build CUDA op kernels. build:using_cuda --define=using_cuda=true diff --git a/tensorflow/core/kernels/mkl_batch_matmul_op.cc b/tensorflow/core/kernels/mkl_batch_matmul_op.cc index 87e6002d9cb..1a5821bc5af 100644 --- a/tensorflow/core/kernels/mkl_batch_matmul_op.cc +++ b/tensorflow/core/kernels/mkl_batch_matmul_op.cc @@ -15,21 +15,26 @@ limitations under the License. // See docs in ../ops/math_ops.cc. -// This file uses MKL CBLAS batched xGEMM for acceleration of TF Batch -// Matrix-Matrix Multiplication (MatMul) operations. -// We currently register this kernel only for MKL supported data -// types (float, double, complex64, complex128). The macro INTEL_MKL is defined -// by the build system only when MKL is chosen as an option at configure stage -// and when it is undefined at build time, this file becomes an empty -// compilation unit +// This file uses both oneDNN and MKL CBLAS batched xGEMM for acceleration of +// Batch Matrix-Matrix Multiplication (MatMul) operations. +// We currently register this kernel only for oneDNN supported data +// types (float, bfloat16). This file can be built with and without the use of +// the binary MKL CBLAS calls, controlled by the macro INTEL_MKL_DNN_ONLY. +// If INTEL_MKL_DNN_ONLY is defined, only oneDNN is used. For cases not +// supported by oneDNN (ex. Batchmatmul with broadcasting) we fall back to the +// default CPU implementation. +// if INTEL_MKL_DNN_ONLY is not defined, both oneDNN and MKL CBLAS +// implementations are used. This is only temporary, once we are able handle all +// cases with oneDNN, CBLAS calls will be removed. #define EIGEN_USE_THREADS #if defined(INTEL_MKL) #include +#if !defined(INTEL_MKL_DNN_ONLY) #include "mkl_cblas.h" -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#endif // INTEL_MKL_DNN_ONLY #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" @@ -44,6 +49,7 @@ limitations under the License. #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/matmul_bcast.h" #include "tensorflow/core/util/mkl_util.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" namespace tensorflow { @@ -105,14 +111,14 @@ class BatchMatMulMkl : public OpKernel { "In[0] and In[1] must have compatible batch dimensions: ", lhs.shape().DebugString(), " vs. ", rhs.shape().DebugString())); -#ifdef ENABLE_MKLDNN_THREADPOOL +#if defined(INTEL_MKL_DNN_ONLY) if (bcast.IsBroadcastingRequired()) { // Calling Eigen Kernel for broadcasting case and return. Eigen does // not have BF16 support, so we have to fail graciously in that case. eigen_batch_mm_v2_.Compute(ctx); return; } -#endif // ENABLE_MKLDNN_THREADPOOL +#endif // INTEL_MKL_DNN_ONLY TensorShape out_shape = bcast.output_batch_shape(); auto batch_size = bcast.output_batch_size(); @@ -158,11 +164,11 @@ class BatchMatMulMkl : public OpKernel { std::vector ldc_array(batch_size, N); std::vector group_size(1, batch_size); - bool threadpool_enabled = false; -#ifdef ENABLE_MKLDNN_THREADPOOL - threadpool_enabled = true; -#endif // ENABLE_MKLDNN_THREADPOOL - if (std::is_same::value || threadpool_enabled) { + bool bcast_not_supported = false; +#if defined(INTEL_MKL_DNN_ONLY) + bcast_not_supported = true; +#endif // INTEL_MKL_DNN_ONLY + if (std::is_same::value || bcast_not_supported) { // DNNL bfloat16 API requires a, b, and c as pointers to tensors // represented as flat-byte array. const Scalar* a = nullptr; @@ -227,7 +233,7 @@ class BatchMatMulMkl : public OpKernel { const std::vector& ldb_Array, float** C_Array, const std::vector& ldc_Array, const MKL_INT group_count, const std::vector& group_size, OpKernelContext* ctx) { -#ifndef ENABLE_MKLDNN_THREADPOOL +#if !defined(INTEL_MKL_DNN_ONLY) std::vector TransA_Array( group_size[0], TransA ? CblasTrans : CblasNoTrans); std::vector TransB_Array( @@ -249,7 +255,7 @@ class BatchMatMulMkl : public OpKernel { dnnl_gemm_batch(TransA_Array, TransB_Array, M_Array, N_Array, K_Array, alpha_Array, *A_Array, *B_Array, beta_Array, *C_Array, group_count, group_size, ctx); -#endif // !ENABLE_MKLDNN_THREADPOOL +#endif // INTEL_MKL_DNN_ONLY } // BatchMatMul BFloat16 support only exists in DNNL 1.2 onwards. #if defined(ENABLE_MKLDNN_V1) && defined(ENABLE_INTEL_MKL_BFLOAT16) diff --git a/tensorflow/core/kernels/mkl_matmul_ops_common.h b/tensorflow/core/kernels/mkl_matmul_ops_common.h index d7af614ad04..f8242d06fa6 100644 --- a/tensorflow/core/kernels/mkl_matmul_ops_common.h +++ b/tensorflow/core/kernels/mkl_matmul_ops_common.h @@ -35,7 +35,12 @@ using mkldnn::stream; namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; - +#ifdef INTEL_MKL_DNN_ONLY +// Temporarily copying some definitions from mkl_cblas.h so the same code can +// be used when calling oneDNN or CBLAS batchmatmul in mkl_batch_matmul_op.cc. +typedef enum { CblasRowMajor, CblasColumnMajor } CBLAS_LAYOUT; +#define MKL_INT int +#endif // This structure aggregates multiple inputs to MklDnnMatMul* methods. struct MklDnnMatMulFwdParams { memory::dims src_dims; diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 1bf4b24559d..349c1e1532b 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -47,7 +47,7 @@ load( load( "//third_party/mkl_dnn:build_defs.bzl", "if_mkl_open_source_only", - "if_mkl_v1_open_source_only", + "if_mkl_v1", "if_mkldnn_threadpool", ) load( @@ -327,12 +327,8 @@ def tf_copts( if_tensorrt(["-DGOOGLE_TENSORRT=1"]) + if_mkl(["-DINTEL_MKL=1", "-DEIGEN_USE_VML"]) + if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) + - if_mkl_v1_open_source_only(["-DENABLE_MKLDNN_V1", "-DENABLE_INTEL_MKL_BFLOAT16"]) + - if_mkldnn_threadpool([ - "-DENABLE_MKLDNN_THREADPOOL", - "-DENABLE_MKLDNN_V1", - "-DINTEL_MKL_DNN_ONLY", - ]) + + if_mkl_v1(["-DENABLE_MKLDNN_V1", "-DENABLE_INTEL_MKL_BFLOAT16"]) + + if_mkldnn_threadpool(["-DENABLE_MKLDNN_THREADPOOL", "-DINTEL_MKL_DNN_ONLY"]) + if_enable_mkl(["-DENABLE_MKL"]) + if_ngraph(["-DINTEL_NGRAPH=1"]) + if_android_arm(["-mfpu=neon"]) + diff --git a/third_party/mkl/build_defs.bzl b/third_party/mkl/build_defs.bzl index bd0686523bc..c1ab9f29686 100644 --- a/third_party/mkl/build_defs.bzl +++ b/third_party/mkl/build_defs.bzl @@ -41,24 +41,9 @@ def if_mkl_ml(if_true, if_false = []): a select evaluating to either if_true or if_false as appropriate. """ return select({ - "@org_tensorflow//third_party/mkl_dnn:build_with_mkl_dnn_only": if_false, - "@org_tensorflow//third_party/mkl:build_with_mkl": if_true, - "//conditions:default": if_false, - }) - -def if_mkl_ml_only(if_true, if_false = []): - """Shorthand for select()'ing on whether we're building with MKL-ML only. - - Args: - if_true: expression to evaluate if building with MKL-ML only. - if_false: expression to evaluate if building without MKL, or with MKL-DNN. - - Returns: - a select evaluating to either if_true or if_false as appropriate. - """ - return select({ - "@org_tensorflow//third_party/mkl:build_with_mkl_ml_only": if_true, - "//conditions:default": if_false, + "@org_tensorflow//third_party/mkl_dnn:build_with_mkl_opensource": if_false, + "@org_tensorflow//third_party/mkl_dnn:build_with_mkldnn_threadpool": if_false, + "//conditions:default": if_true, }) def if_mkl_lnx_x64(if_true, if_false = []): @@ -108,6 +93,7 @@ def mkl_deps(): "@org_tensorflow//third_party/mkl_dnn:build_with_mkl_dnn_only": ["@mkl_dnn"], "@org_tensorflow//third_party/mkl_dnn:build_with_mkl_dnn_v1_only": ["@mkl_dnn_v1//:mkl_dnn"], "@org_tensorflow//third_party/mkl_dnn:build_with_mkldnn_threadpool": ["@mkl_dnn_v1//:mkl_dnn"], + "@org_tensorflow//third_party/mkl_dnn:build_with_mkl_opensource": ["@mkl_dnn_v1//:mkl_dnn"], "@org_tensorflow//third_party/mkl:build_with_mkl_ml_only": ["@org_tensorflow//third_party/mkl:intel_binary_blob"], "@org_tensorflow//third_party/mkl:build_with_mkl": [ "@org_tensorflow//third_party/mkl:intel_binary_blob", diff --git a/third_party/mkl_dnn/BUILD b/third_party/mkl_dnn/BUILD index fe558322916..c3059a3dc5c 100644 --- a/third_party/mkl_dnn/BUILD +++ b/third_party/mkl_dnn/BUILD @@ -18,6 +18,16 @@ config_setting( visibility = ["//visibility:public"], ) +config_setting( + name = "build_with_mkl_opensource", + define_values = { + "build_with_mkl": "true", + "build_with_mkl_dnn_v1_only": "true", + "build_with_mkl_opensource": "true", + }, + visibility = ["//visibility:public"], +) + config_setting( name = "build_with_mkl_dnn_v1_only", define_values = { @@ -31,6 +41,8 @@ config_setting( name = "build_with_mkldnn_threadpool", define_values = { "build_with_mkl": "true", + "build_with_mkl_dnn_v1_only": "true", + "build_with_mkl_opensource": "true", "build_with_mkldnn_threadpool": "true", }, visibility = ["//visibility:public"], diff --git a/third_party/mkl_dnn/build_defs.bzl b/third_party/mkl_dnn/build_defs.bzl index bd3b4b94f29..6a3e4f827ce 100644 --- a/third_party/mkl_dnn/build_defs.bzl +++ b/third_party/mkl_dnn/build_defs.bzl @@ -10,11 +10,11 @@ def if_mkl_open_source_only(if_true, if_false = []): """ return select({ - "@org_tensorflow//third_party/mkl_dnn:build_with_mkl_dnn_only": if_true, + "@org_tensorflow//third_party/mkl_dnn:build_with_mkl_opensource": if_true, "//conditions:default": if_false, }) -def if_mkl_v1_open_source_only(if_true, if_false = []): +def if_mkl_v1(if_true, if_false = []): """Returns `if_true` if MKL-DNN v1.x is used. Shorthand for select()'ing on whether we're building with diff --git a/third_party/mkl_dnn/mkldnn.BUILD b/third_party/mkl_dnn/mkldnn.BUILD index 71dde75e2e0..5279043ad29 100644 --- a/third_party/mkl_dnn/mkldnn.BUILD +++ b/third_party/mkl_dnn/mkldnn.BUILD @@ -3,7 +3,7 @@ exports_files(["LICENSE"]) load( "@org_tensorflow//third_party/mkl_dnn:build_defs.bzl", "if_mkl_open_source_only", - "if_mkl_v1_open_source_only", + "if_mkl_v1", ) load( "@org_tensorflow//third_party:common.bzl", @@ -60,7 +60,7 @@ cc_library( "src/cpu/**/*.cpp", "src/cpu/**/*.hpp", "src/cpu/xbyak/*.h", - ]) + if_mkl_v1_open_source_only([ + ]) + if_mkl_v1([ ":mkldnn_config_h", ]) + [":mkldnn_version_h"], hdrs = glob(["include/*"]), @@ -71,7 +71,7 @@ cc_library( ] + if_mkl_open_source_only([ "-UUSE_MKL", "-UUSE_CBLAS", - ]) + if_mkl_v1_open_source_only([ + ]) + if_mkl_v1([ "-UUSE_MKL", "-UUSE_CBLAS", ]) + select({ diff --git a/third_party/mkl_dnn/mkldnn_v1.BUILD b/third_party/mkl_dnn/mkldnn_v1.BUILD index 7bdec138b99..438aa8dc03d 100644 --- a/third_party/mkl_dnn/mkldnn_v1.BUILD +++ b/third_party/mkl_dnn/mkldnn_v1.BUILD @@ -3,7 +3,7 @@ exports_files(["LICENSE"]) load( "@org_tensorflow//third_party/mkl_dnn:build_defs.bzl", "if_mkl_open_source_only", - "if_mkl_v1_open_source_only", + "if_mkl_v1", "if_mkldnn_threadpool", ) load( @@ -85,7 +85,7 @@ cc_library( ] + if_mkl_open_source_only([ "-UUSE_MKL", "-UUSE_CBLAS", - ]) + if_mkl_v1_open_source_only([ + ]) + if_mkl_v1([ "-UUSE_MKL", "-UUSE_CBLAS", ]) + if_mkldnn_threadpool([ @@ -109,21 +109,10 @@ cc_library( "src/cpu/xbyak", ], visibility = ["//visibility:public"], - deps = select({ - "@org_tensorflow//tensorflow:linux_x86_64": [ - "@mkl_linux//:mkl_headers", - "@mkl_linux//:mkl_libs_linux", - ], - "@org_tensorflow//tensorflow:macos": [ - "@mkl_darwin//:mkl_headers", - "@mkl_darwin//:mkl_libs_darwin", - ], - "@org_tensorflow//tensorflow:windows": [ - "@mkl_windows//:mkl_headers", - "@mkl_windows//:mkl_libs_windows", - ], - "//conditions:default": [], - }), + deps = if_mkl_open_source_only( + [], + ["@org_tensorflow//third_party/mkl:intel_binary_blob"], + ), ) cc_library( From dfa281c6867c9bdbe147bd349bee4f7ceaad5a61 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Wed, 22 Jul 2020 18:14:45 -0700 Subject: [PATCH 1113/2522] Port the ceil kernel to the new TfLiteEvalTensor API. PiperOrigin-RevId: 322696884 Change-Id: I31146183495d55bb5e462f724dca4fad8001bea6 --- tensorflow/lite/micro/kernels/BUILD | 1 + tensorflow/lite/micro/kernels/ceil.cc | 13 +++++++---- tensorflow/lite/micro/kernels/ceil_test.cc | 25 +++++++++------------- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index b691472720c..a9b371b2b6a 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -346,6 +346,7 @@ tflite_micro_cc_test( "ceil_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:op_resolvers", "//tensorflow/lite/micro/testing:micro_test", diff --git a/tensorflow/lite/micro/kernels/ceil.cc b/tensorflow/lite/micro/kernels/ceil.cc index f6e4abdc6f5..3bce8a73f55 100644 --- a/tensorflow/lite/micro/kernels/ceil.cc +++ b/tensorflow/lite/micro/kernels/ceil.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -43,11 +44,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); - reference_ops::Ceil(GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + reference_ops::Ceil(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); return kTfLiteOk; } diff --git a/tensorflow/lite/micro/kernels/ceil_test.cc b/tensorflow/lite/micro/kernels/ceil_test.cc index 7261d1c76a6..27caa507c00 100644 --- a/tensorflow/lite/micro/kernels/ceil_test.cc +++ b/tensorflow/lite/micro/kernels/ceil_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -35,26 +36,20 @@ void TestCeil(const int* input_dims_data, const float* input_data, CreateFloatTensor(input_data, input_dims), CreateFloatTensor(output_data, output_dims), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_CEIL); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 1}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = nullptr; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + + const TfLiteRegistration registration = ops::micro::Register_CEIL(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); + + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); + for (int i = 0; i < output_dims_count; ++i) { TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i], 1e-5f); } From 82e7ce307dbbce3dce65a1c3ed7cab72cda12b50 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Wed, 22 Jul 2020 18:20:54 -0700 Subject: [PATCH 1114/2522] Fix GCC build error with tpu_on_demand_compiler.cc and erroneous inclusion PiperOrigin-RevId: 322697661 Change-Id: I259d6ea26f8d8f7f930d2cefd76c7598da705ad4 --- .../core/tpu/tpu_api_dlsym_initializer.cc | 1 + tensorflow/core/tpu/tpu_library_init_fns.inc | 2 -- tensorflow/core/tpu/tpu_on_demand_compiler.cc | 20 +++++++++++-------- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/tpu/tpu_api_dlsym_initializer.cc b/tensorflow/core/tpu/tpu_api_dlsym_initializer.cc index 320dd8c34d4..47d517270dc 100644 --- a/tensorflow/core/tpu/tpu_api_dlsym_initializer.cc +++ b/tensorflow/core/tpu/tpu_api_dlsym_initializer.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/core/tpu/tpu_api.h" #include "tensorflow/core/tpu/tpu_node_device.h" #include "tensorflow/core/tpu/tpu_system_device.h" +#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" #include "tensorflow/stream_executor/tpu/tpu_platform.h" #endif diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index b88705e4100..89ba0fa82b3 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -1,5 +1,3 @@ -#include "third_party/tensorflow/stream_executor/tpu/tpu_executor_c_api.h" - namespace { tensorflow::Status SetTpuConfigStructFns(void* library_handle) { diff --git a/tensorflow/core/tpu/tpu_on_demand_compiler.cc b/tensorflow/core/tpu/tpu_on_demand_compiler.cc index 0839304ce10..61637f5fd0a 100644 --- a/tensorflow/core/tpu/tpu_on_demand_compiler.cc +++ b/tensorflow/core/tpu/tpu_on_demand_compiler.cc @@ -130,14 +130,18 @@ class TpuExecutable : public Executable { }; XLA_HloModuleConfig HloModuleConfigToC(const xla::HloModuleConfig& config) { - XLA_HloModuleConfig hlo_config{ - .seed = config.seed(), - .launch_id = config.launch_id(), - .replica_count = config.replica_count(), - .num_partitions = config.num_partitions(), - .use_spmd_partitioning = config.use_spmd_partitioning(), - .has_static_device_assignment = config.has_static_device_assignment(), - .has_entry_computation_layout = config.has_entry_computation_layout()}; + XLA_HloModuleConfig hlo_config; + + hlo_config.seed = config.seed(); + hlo_config.launch_id = config.launch_id(); + hlo_config.replica_count = config.replica_count(); + hlo_config.num_partitions = config.num_partitions(); + hlo_config.use_spmd_partitioning = config.use_spmd_partitioning(); + hlo_config.has_static_device_assignment = + config.has_static_device_assignment(); + hlo_config.has_entry_computation_layout = + config.has_entry_computation_layout(); + if (config.has_static_device_assignment()) { DeviceAssignmentProto dev_proto; config.static_device_assignment().Serialize(&dev_proto).IgnoreError(); From aedf43fb3cf44ec248fefa4a227c98210d956b95 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Wed, 22 Jul 2020 18:22:08 -0700 Subject: [PATCH 1115/2522] Use standard absl::BitGen instead of function unavailable in open-source absl PiperOrigin-RevId: 322697824 Change-Id: I98242ddd7368d4d302e0ddea52ea5626bd064d45 --- .../graph_rewrite/distributed_tpu_rewrite_pass_internal.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.cc b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.cc index 18b158c0335..46c10b90dc1 100644 --- a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.cc +++ b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass_internal.cc @@ -33,11 +33,11 @@ void OverrideNodeIdForTesting(const int64 node_id) { } uint64 GetNodeId() { + static absl::BitGen bitgen; if (overridden_node_id > -1) { return overridden_node_id; } else { - return absl::Uniform(absl::SharedBitGen(), uint64{0}, - std::numeric_limits::max()); + return absl::Uniform(bitgen, uint64{0}, std::numeric_limits::max()); } } From eecdbbaa288f7307b6c8e7d3e986078195feda85 Mon Sep 17 00:00:00 2001 From: ShengYang1 Date: Thu, 23 Jul 2020 09:42:23 +0800 Subject: [PATCH 1116/2522] Refine --- tensorflow/core/kernels/mkl_avgpooling_op.cc | 3 +-- tensorflow/core/kernels/mkl_maxpooling_op.cc | 3 +-- .../core/kernels/mkl_pooling_ops_common.h | 24 ++++++------------- 3 files changed, 9 insertions(+), 21 deletions(-) diff --git a/tensorflow/core/kernels/mkl_avgpooling_op.cc b/tensorflow/core/kernels/mkl_avgpooling_op.cc index 8de3e327f96..a238f51860b 100644 --- a/tensorflow/core/kernels/mkl_avgpooling_op.cc +++ b/tensorflow/core/kernels/mkl_avgpooling_op.cc @@ -249,8 +249,7 @@ class MklAvgPoolingGradOp : public MklPoolingBackwardOpBase { orig_input_dims_mkl_order, output_dims_mkl_order, filter_dims, strides, padding_left, padding_right, ALGORITHM::pooling_avg_exclude_padding, prop_kind::forward_training, - static_cast(this->data_format_mkldnn_), src_md, - diff_dst_md); + static_cast(this->data_format_mkldnn_), src_md); #else MklPoolingParams bwdParams( orig_input_dims_mkl_order, output_dims_mkl_order, filter_dims, diff --git a/tensorflow/core/kernels/mkl_maxpooling_op.cc b/tensorflow/core/kernels/mkl_maxpooling_op.cc index ac6a1046507..3ed6b9d02a2 100644 --- a/tensorflow/core/kernels/mkl_maxpooling_op.cc +++ b/tensorflow/core/kernels/mkl_maxpooling_op.cc @@ -312,8 +312,7 @@ class MklMaxPoolingGradOp : public MklPoolingBackwardOpBase { orig_input_dims_mkl_order, output_dims_mkl_order, filter_dims, strides, padding_left, padding_right, ALGORITHM::pooling_max, prop_kind::forward_training, - static_cast(this->data_format_mkldnn_), src_md, - diff_dst_md); + static_cast(this->data_format_mkldnn_), src_md); #else MklPoolingParams bwdParams( orig_input_dims_mkl_order, output_dims_mkl_order, filter_dims, diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl_pooling_ops_common.h index ec19b8a0398..3d5498ed77b 100644 --- a/tensorflow/core/kernels/mkl_pooling_ops_common.h +++ b/tensorflow/core/kernels/mkl_pooling_ops_common.h @@ -49,20 +49,12 @@ struct MklPoolingParams { mkldnn::prop_kind prop_kind; MEMORY_FORMAT src_format; memory::desc src_md; -#ifdef ENABLE_MKLDNN_V1 - memory::desc diff_dst_md; -#endif // ENABLE_MKLDNN_V1 MklPoolingParams(memory::dims src_dims, memory::dims dst_dims, memory::dims filter_dims, memory::dims strides, memory::dims padding_left, memory::dims padding_right, mkldnn::algorithm alg_kind, mkldnn::prop_kind prop_kind, -#ifdef ENABLE_MKLDNN_V1 - MEMORY_FORMAT src_format, memory::desc src_md, - memory::desc diff_dst_md = memory::desc()) -#else MEMORY_FORMAT src_format, memory::desc src_md) -#endif // ENABLE_MKLDNN_V1 : src_dims(src_dims), dst_dims(dst_dims), filter_dims(filter_dims), @@ -72,14 +64,7 @@ struct MklPoolingParams { alg_kind(alg_kind), prop_kind(prop_kind), src_format(src_format), -#ifdef ENABLE_MKLDNN_V1 - src_md(src_md), - diff_dst_md(diff_dst_md) { - } -#else - src_md(src_md) { - } -#endif // ENABLE_MKLDNN_V1 + src_md(src_md) {} }; template @@ -282,8 +267,10 @@ class MklPoolingBwdPrimitive : public MklPrimitive { // Memory descriptors. std::shared_ptr src_md; std::shared_ptr dst_md; +#ifndef ENABLE_MKLDNN_V1 std::shared_ptr diff_src_md; std::shared_ptr diff_dst_md; +#endif // Forward and backward pooling descriptors and primitive descriptors. std::shared_ptr fwd_desc; @@ -310,13 +297,16 @@ class MklPoolingBwdPrimitive : public MklPrimitive { diff_dst_mem(nullptr), src_md(nullptr), dst_md(nullptr), +#ifndef ENABLE_MKLDNN_V1 diff_src_md(nullptr), diff_dst_md(nullptr), +#endif fwd_desc(nullptr), bwd_desc(nullptr), fwd_pd(nullptr), bwd_pd(nullptr), - bwd(nullptr) {} + bwd(nullptr) { + } }; struct PoolingBwdContext context_; From 4fde36c19e612fb2def9edef308628455d567c68 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Jul 2020 18:53:11 -0700 Subject: [PATCH 1117/2522] Internal change PiperOrigin-RevId: 322701699 Change-Id: Ibd54a9d310e885ab870f23bfaf65d01349fe15ed --- tensorflow/python/keras/callbacks.py | 21 +++++---------------- tensorflow/python/keras/callbacks_test.py | 21 ++++----------------- 2 files changed, 9 insertions(+), 33 deletions(-) diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py index 88dc1d84129..f918a754eb9 100644 --- a/tensorflow/python/keras/callbacks.py +++ b/tensorflow/python/keras/callbacks.py @@ -2014,10 +2014,8 @@ class TensorBoard(Callback, version_utils.TensorBoardVersionSelector): self._writers = {} # Resets writers. - self._should_write_train_graph = False if self.write_graph: - self._write_keras_model_summary() - self._should_write_train_graph = True + self._write_keras_model_graph() if self.embeddings_freq: self._configure_embeddings() @@ -2044,19 +2042,13 @@ class TensorBoard(Callback, version_utils.TensorBoardVersionSelector): distributed_file_utils.remove_temp_dirpath(self.log_dir, self.model.distribute_strategy) - def _write_keras_model_train_graph(self): - """Writes Keras model train_function graph to TensorBoard.""" + def _write_keras_model_graph(self): + """Writes Keras graph networks to TensorBoard.""" with self._train_writer.as_default(): with summary_ops_v2.always_record_summaries(): - train_fn = self.model.train_function - # If the train_function is a `tf.function`, we can write out a graph - if hasattr(train_fn, 'function_spec'): - summary_ops_v2.graph(train_fn._concrete_stateful_fn.graph, step=0) # pylint: disable=protected-access + if not self.model.run_eagerly: + summary_ops_v2.graph(K.get_graph(), step=0) - def _write_keras_model_summary(self): - """Writes Keras graph network summary to TensorBoard.""" - with self._train_writer.as_default(): - with summary_ops_v2.always_record_summaries(): summary_writable = ( self.model._is_graph_network or # pylint: disable=protected-access self.model.__class__.__name__ == 'Sequential') # pylint: disable=protected-access @@ -2215,9 +2207,6 @@ class TensorBoard(Callback, version_utils.TensorBoardVersionSelector): self._start_trace() def on_train_batch_end(self, batch, logs=None): - if self._should_write_train_graph: - self._write_keras_model_train_graph() - self._should_write_train_graph = False if not self._should_trace: return diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py index 0992deae7b6..f103d7506b9 100644 --- a/tensorflow/python/keras/callbacks_test.py +++ b/tensorflow/python/keras/callbacks_test.py @@ -1767,7 +1767,6 @@ class _SummaryFile(object): self.images = set() self.histograms = set() self.tensors = set() - self.graph_defs = [] def list_summaries(logdir): @@ -1794,8 +1793,6 @@ def list_summaries(logdir): continue path = os.path.join(dirpath, filename) for event in summary_iterator.summary_iterator(path): - if event.graph_def: - result.graph_defs.append(event.graph_def) if not event.summary: # (e.g., it's a `graph_def` event) continue for value in event.summary.value: @@ -2220,7 +2217,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): x, y, batch_size=2, - epochs=3, + epochs=2, validation_data=(x, y), callbacks=[tb_cbk]) summary_file = list_summaries(self.logdir) @@ -2230,16 +2227,6 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): _ObservedSummary(logdir=self.train_dir, tag='keras'), }, ) - if not model.run_eagerly: - # There should be one train graph - self.assertLen(summary_file.graph_defs, 1) - for graph_def in summary_file.graph_defs: - graph_def_str = str(graph_def) - - # All the model layers should appear in the graphs - for layer in model.layers: - if 'input' not in layer.name: - self.assertIn(layer.name, graph_def_str) def test_TensorBoard_writeSequentialModel_noInputShape(self): model = keras.models.Sequential([ @@ -2247,7 +2234,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): keras.layers.Flatten(), keras.layers.Dense(1), ]) - model.compile('sgd', 'mse', run_eagerly=testing_utils.should_run_eagerly()) + model.compile('sgd', 'mse', run_eagerly=False) self.fitModelAndAssertKerasModelWritten(model) def test_TensorBoard_writeSequentialModel_withInputShape(self): @@ -2256,7 +2243,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): keras.layers.Flatten(), keras.layers.Dense(1), ]) - model.compile('sgd', 'mse', run_eagerly=testing_utils.should_run_eagerly()) + model.compile('sgd', 'mse', run_eagerly=False) self.fitModelAndAssertKerasModelWritten(model) def test_TensorBoard_writeModel(self): @@ -2265,7 +2252,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): x = keras.layers.Flatten()(x) x = keras.layers.Dense(1)(x) model = keras.models.Model(inputs=inputs, outputs=[x]) - model.compile('sgd', 'mse', run_eagerly=testing_utils.should_run_eagerly()) + model.compile('sgd', 'mse', run_eagerly=False) self.fitModelAndAssertKerasModelWritten(model) def test_TensorBoard_autoTrace(self): From b7ac6f45c5bdb350bd07f16d1fd85a446d812aad Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Wed, 22 Jul 2020 18:54:06 -0700 Subject: [PATCH 1118/2522] Port the circular buffer kernel to the new TfLiteEvalTensor API. PiperOrigin-RevId: 322701794 Change-Id: Ib3d39f12747791760ed03d9ad39694660d05522d --- tensorflow/lite/micro/kernels/BUILD | 1 + .../lite/micro/kernels/circular_buffer.cc | 11 +- .../micro/kernels/circular_buffer_test.cc | 182 ++++++++---------- 3 files changed, 89 insertions(+), 105 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index a9b371b2b6a..c98e23a54f3 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -585,6 +585,7 @@ tflite_micro_cc_test( "circular_buffer_test.cc", ], deps = [ + ":kernel_runner", ":micro_ops", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:op_resolvers", diff --git a/tensorflow/lite/micro/kernels/circular_buffer.cc b/tensorflow/lite/micro/kernels/circular_buffer.cc index f588d64dcd5..876ea569196 100644 --- a/tensorflow/lite/micro/kernels/circular_buffer.cc +++ b/tensorflow/lite/micro/kernels/circular_buffer.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/op_macros.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" /* * The circular buffer custom operator is used to implement strided streaming @@ -121,8 +122,10 @@ void EvalInt8(const int8_t* input, int num_slots, int depth, int8_t* output) { } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); OpData* data = reinterpret_cast(node->user_data); @@ -130,8 +133,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { int depth = output->dims->data[3]; if (input->type == kTfLiteInt8) { - EvalInt8(GetTensorData(input), num_slots, depth, - GetTensorData(output)); + EvalInt8(tflite::micro::GetTensorData(input), num_slots, depth, + tflite::micro::GetTensorData(output)); } else { TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", TfLiteTypeGetName(input->type), input->type); diff --git a/tensorflow/lite/micro/kernels/circular_buffer_test.cc b/tensorflow/lite/micro/kernels/circular_buffer_test.cc index 4c48060a0a9..770f4565670 100644 --- a/tensorflow/lite/micro/kernels/circular_buffer_test.cc +++ b/tensorflow/lite/micro/kernels/circular_buffer_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/kernels/micro_ops.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -29,93 +30,6 @@ constexpr int kRunPeriod = 2; // TODO(b/149795762): Add this to TfLiteStatus enum. constexpr int kTfLiteAbort = -9; -TfLiteNode PrepareCircularBufferInt8(const int* input_dims_data, - const int8_t* input_data, - const int* output_dims_data, - const int8_t* expected_output_data, - int8_t* output_data) { - const TfLiteRegistration* registration = - ops::micro::Register_CIRCULAR_BUFFER(); - - TfLiteNode node; - TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); - constexpr int inputs_size = 2; - constexpr int outputs_size = 1; - constexpr int tensors_size = inputs_size + outputs_size; - TfLiteTensor tensors[tensors_size] = { - CreateQuantizedTensor(input_data, input_dims, 1, 0), - CreateQuantizedTensor(output_data, output_dims, 1, 0), - }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - // There is one input - tensor 0. - const int inputs_array_data[] = {1, 0}; - TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - // There is one output - tensor 1. - const int outputs_array_data[] = {1, 1}; - TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - - node.inputs = inputs_array; - node.outputs = outputs_array; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->prepare); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - return node; -} - -// Run invoke cycles_until_output times with the supplied input, expecting -// invoke to return kTfLiteAbort until the last iteration, at which point the -// output should match expected_output_data. -TfLiteStatus InvokeCircularBufferInt8(const int* input_dims_data, - const int8_t* input_data, - const int* output_dims_data, - const int8_t* expected_output_data, - int8_t* output_data, TfLiteNode* node) { - TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); - TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); - - const int output_dims_count = ElementCount(*output_dims); - const TfLiteRegistration* registration = - ops::micro::Register_CIRCULAR_BUFFER(); - - constexpr int inputs_size = 2; - constexpr int outputs_size = 1; - constexpr int tensors_size = inputs_size + outputs_size; - TfLiteTensor tensors[tensors_size] = { - CreateQuantizedTensor(input_data, input_dims, 1, 0), - CreateQuantizedTensor(output_data, output_dims, 1, 0), - }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - // There is one input - tensor 0. - const int inputs_array_data[] = {1, 0}; - TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - // There is one output - tensor 1. - const int outputs_array_data[] = {1, 1}; - TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - - node->inputs = inputs_array; - node->outputs = outputs_array; - node->builtin_data = nullptr; - node->custom_initial_data = nullptr; - node->custom_initial_data_size = 0; - - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - - TfLiteStatus status = registration->invoke(&context, node); - - for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_EQ(expected_output_data[i], output_data[i]); - } - return status; -} - } // namespace } // namespace testing } // namespace tflite @@ -125,30 +39,65 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(OutputTensorLength4) { constexpr int depth = 3; constexpr int num_slots = 4; + int8_t input_data[depth]; int8_t output_data[depth * num_slots]; memset(output_data, 0, sizeof(output_data)); + // There are four input dimensions - [1, 1, 1, depth]. const int input_dims[] = {4, 1, 1, 1, depth}; // There are four output dimensions - [1, num_slots, 1, depth]. const int output_dims[] = {4, 1, num_slots, 1, depth}; + TfLiteIntArray* input_tensor_dims = + tflite::testing::IntArrayFromInts(input_dims); + TfLiteIntArray* output_tensor_dims = + tflite::testing::IntArrayFromInts(output_dims); + + const int output_dims_count = tflite::ElementCount(*output_tensor_dims); + + constexpr int inputs_size = 2; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + tflite::testing::CreateQuantizedTensor(input_data, input_tensor_dims, 1, + 0), + tflite::testing::CreateQuantizedTensor(output_data, output_tensor_dims, 1, + 0), + }; + + // There is one input - tensor 0. + const int inputs_array_data[] = {1, 0}; + TfLiteIntArray* inputs_array = + tflite::testing::IntArrayFromInts(inputs_array_data); + // There is one output - tensor 1. + const int outputs_array_data[] = {1, 1}; + TfLiteIntArray* outputs_array = + tflite::testing::IntArrayFromInts(outputs_array_data); + + const TfLiteRegistration* registration = + tflite::ops::micro::Register_CIRCULAR_BUFFER(); + tflite::micro::KernelRunner runner = tflite::micro::KernelRunner( + *registration, tensors, tensors_size, inputs_array, outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + const int8_t goldens[5][16] = {{0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3}, {0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6}, {0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, {4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}}; - int8_t input[depth]; - TfLiteNode node = tflite::testing::PrepareCircularBufferInt8( - input_dims, input, output_dims, goldens[0], output_data); // Expect the circular buffer to run every other invoke for 4xN output. for (int i = 0; i < 5; i++) { for (int j = 0; j < depth; j++) { - input[j] = i * depth + j + 1; + input_data[j] = i * depth + j + 1; + } + TfLiteStatus status = runner.Invoke(); + + for (int j = 0; j < output_dims_count; ++j) { + TF_LITE_MICRO_EXPECT_EQ(goldens[i][j], output_data[j]); } - TfLiteStatus status = tflite::testing::InvokeCircularBufferInt8( - input_dims, input, output_dims, goldens[i], output_data, &node); // Every kRunPeriod iterations, the circular buffer should return kTfLiteOk. if (i % tflite::testing::kRunPeriod == tflite::testing::kRunPeriod - 1) { @@ -162,11 +111,44 @@ TF_LITE_MICRO_TEST(OutputTensorLength4) { TF_LITE_MICRO_TEST(OutputTensorLength5) { constexpr int depth = 4; constexpr int num_slots = 5; + int8_t input_data[depth]; int8_t output_data[depth * num_slots]; memset(output_data, 0, sizeof(output_data)); const int input_dims[] = {4, 1, 1, 1, depth}; const int output_dims[] = {4, 1, num_slots, 1, depth}; + TfLiteIntArray* input_tensor_dims = + tflite::testing::IntArrayFromInts(input_dims); + TfLiteIntArray* output_tensor_dims = + tflite::testing::IntArrayFromInts(output_dims); + + const int output_dims_count = tflite::ElementCount(*output_tensor_dims); + + constexpr int inputs_size = 2; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + tflite::testing::CreateQuantizedTensor(input_data, input_tensor_dims, 1, + 0), + tflite::testing::CreateQuantizedTensor(output_data, output_tensor_dims, 1, + 0), + }; + + // There is one input - tensor 0. + const int inputs_array_data[] = {1, 0}; + TfLiteIntArray* inputs_array = + tflite::testing::IntArrayFromInts(inputs_array_data); + // There is one output - tensor 1. + const int outputs_array_data[] = {1, 1}; + TfLiteIntArray* outputs_array = + tflite::testing::IntArrayFromInts(outputs_array_data); + + const TfLiteRegistration* registration = + tflite::ops::micro::Register_CIRCULAR_BUFFER(); + tflite::micro::KernelRunner runner = tflite::micro::KernelRunner( + *registration, tensors, tensors_size, inputs_array, outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); const int8_t goldens[6][20] = { {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4}, @@ -177,18 +159,16 @@ TF_LITE_MICRO_TEST(OutputTensorLength5) { {5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}}; - int8_t input[depth]; - TfLiteNode node = tflite::testing::PrepareCircularBufferInt8( - input_dims, input, output_dims, goldens[0], output_data); // Expect circular buffer to run every cycle for 5xN output. for (int i = 0; i < 6; i++) { for (int j = 0; j < depth; j++) { - input[j] = i * depth + j + 1; + input_data[j] = i * depth + j + 1; + } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); + + for (int j = 0; j < output_dims_count; ++j) { + TF_LITE_MICRO_EXPECT_EQ(goldens[i][j], output_data[j]); } - TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, - tflite::testing::InvokeCircularBufferInt8( - input_dims, input, output_dims, goldens[i], output_data, &node)); } } From f112d4bd7dc60a1582ab3de8ce12f2f92c5f806a Mon Sep 17 00:00:00 2001 From: Anna R Date: Wed, 22 Jul 2020 18:56:35 -0700 Subject: [PATCH 1119/2522] Disable collective ops abort tests on windows since they seem to be flaky. PiperOrigin-RevId: 322702070 Change-Id: I490a411be4bba5b112bdb3bf748297edbe41e02e --- tensorflow/python/ops/collective_ops_test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/python/ops/collective_ops_test.py b/tensorflow/python/ops/collective_ops_test.py index dd788e911f7..6ddcdb3f31e 100644 --- a/tensorflow/python/ops/collective_ops_test.py +++ b/tensorflow/python/ops/collective_ops_test.py @@ -18,8 +18,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os import threading import time +import unittest from tensorflow.core.protobuf import config_pb2 from tensorflow.core.protobuf import rewriter_config_pb2 @@ -740,6 +742,7 @@ class CollectiveOpTest(test.TestCase): def_function.function(collective_fn)() @test_util.run_v2_only + @unittest.skipIf(os.name == 'nt', 'b/161922535: Flaky on Windows') def testAbortInstanceParamsResolution(self): cpus = config.list_physical_devices('CPU') config.set_logical_device_configuration(cpus[0], [ @@ -799,6 +802,7 @@ class CollectiveOpTest(test.TestCase): def_function.function(collective_fn)() @test_util.run_v2_only + @unittest.skipIf(os.name == 'nt', 'b/161922535: Flaky on Windows') def testAbortRing(self): cpus = config.list_physical_devices('CPU') config.set_logical_device_configuration(cpus[0], [ From fffa1e6548eb3be96d27695b0f6e9de42a0c6817 Mon Sep 17 00:00:00 2001 From: Meghna Natraj Date: Wed, 22 Jul 2020 19:20:04 -0700 Subject: [PATCH 1120/2522] Fix documentation for int16x8 quantization PiperOrigin-RevId: 322704757 Change-Id: Id3c785de088c04ce5c261e7152fcdefeb6d7db29 --- .../lite/g3doc/performance/post_training_quantization.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/g3doc/performance/post_training_quantization.md b/tensorflow/lite/g3doc/performance/post_training_quantization.md index bcc67184b4c..6198798978f 100644 --- a/tensorflow/lite/g3doc/performance/post_training_quantization.md +++ b/tensorflow/lite/g3doc/performance/post_training_quantization.md @@ -161,7 +161,7 @@ def representative_dataset_gen(): yield [input] converter.representative_dataset = representative_dataset_gen converter.optimizations = [tf.lite.Optimize.DEFAULT] -converter.target_spec.supported_ops = [tf.lite.constants.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8] +converter.target_spec.supported_ops = [tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8] tflite_quant_model = converter.convert() @@ -177,7 +177,7 @@ def representative_dataset_gen(): yield [input] converter.representative_dataset = representative_dataset_gen converter.optimizations = [tf.lite.Optimize.DEFAULT] -converter.target_spec.supported_ops = [tf.lite.constants.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8, +converter.target_spec.supported_ops = [tf.lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8, tf.lite.OpsSet.TFLITE_BUILTINS] tflite_quant_model = converter.convert() From 055461803630ad1a4461f2ffd6e488e9a3effbc1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Jul 2020 19:32:01 -0700 Subject: [PATCH 1121/2522] Include Ops that are used via PartitionedCalls to MetaGraphDef.MetaInfoDef.stripped_op_list PiperOrigin-RevId: 322706036 Change-Id: I3f307d07a9d38aeca34f7c550d857c76aed37005 --- tensorflow/python/framework/meta_graph.py | 9 ++++++-- .../python/framework/meta_graph_test.py | 23 +++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/framework/meta_graph.py b/tensorflow/python/framework/meta_graph.py index 327b476c576..dbc2a894d65 100644 --- a/tensorflow/python/framework/meta_graph.py +++ b/tensorflow/python/framework/meta_graph.py @@ -161,12 +161,17 @@ def ops_used_by_graph_def(graph_def): functions_to_process.append(name_to_function[op]) used_ops.add(op) - for node in graph_def.node: + def process_node(node): mark_op_as_used(node.op) + if node.op in ["PartitionedCall", "StatefulPartitionedCall"]: + mark_op_as_used(node.attr["f"].func.name) + + for node in graph_def.node: + process_node(node) while functions_to_process: fun = functions_to_process.pop() for node in fun.node_def: - mark_op_as_used(node.op) + process_node(node) return [op for op in used_ops if op not in name_to_function] diff --git a/tensorflow/python/framework/meta_graph_test.py b/tensorflow/python/framework/meta_graph_test.py index ae44fbce0f0..36acd81fe26 100644 --- a/tensorflow/python/framework/meta_graph_test.py +++ b/tensorflow/python/framework/meta_graph_test.py @@ -161,6 +161,29 @@ class SimpleMetaGraphTest(test.TestCase): op_list = meta_graph.stripped_op_list_for_graph(graph) self.assertEqual(["Const"], [op.name for op in op_list.op]) + def testStrippedOpListPartitionedCalls(self): + # Function A calls B via StatefulPartitionedCall. + graph = graph_pb2.GraphDef() + a = graph.library.function.add() + b = graph.library.function.add() + a.signature.name = "A" + b.signature.name = "B" + node_in_a = a.node_def.add() + node_in_a.op = "StatefulPartitionedCall" + node_in_a.attr["f"].func.name = "B" + b.node_def.add().op = "Const" + b.node_def.add().op = "A" + + # Use A in the graph via PartitionedCall. + node = graph.node.add() + node.op = "PartitionedCall" + node.attr["f"].func.name = "A" + + op_list = meta_graph.stripped_op_list_for_graph(graph) + self.assertSameElements( + ["Const", "PartitionedCall", "StatefulPartitionedCall"], + [op.name for op in op_list.op]) + @test_util.run_deprecated_v1 def testDefaultAttrStripping(self): """Verifies that default attributes are stripped from a graph def.""" From a236b79295c930b48bfd5171d9b2658851f48b6e Mon Sep 17 00:00:00 2001 From: Vignesh Kothapalli Date: Thu, 23 Jul 2020 08:21:29 +0530 Subject: [PATCH 1122/2522] refactored test cases and added details --- .../experimental/kernel_tests/unique_test.py | 71 +++++++++++++------ 1 file changed, 49 insertions(+), 22 deletions(-) diff --git a/tensorflow/python/data/experimental/kernel_tests/unique_test.py b/tensorflow/python/data/experimental/kernel_tests/unique_test.py index 107faf8f0b9..67b7fa08ad1 100644 --- a/tensorflow/python/data/experimental/kernel_tests/unique_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/unique_test.py @@ -81,30 +81,57 @@ class UniqueTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.graph_only_combinations()) def testTypeMismatch(self): - # raises InternalError when dtypes don't match. - with self.assertRaises(errors.InternalError): - self._testSimpleHelper(dtypes.string, [ - (["hello", 1, 2, 1], ["hello"]), - (["hello", "world", 1], ["hello", "world"]), - (["hello", "hello", "world", 1, 2], ["hello", "world"]), - (["hello", "world", 1, 1, 2], ["hello", "world"]), - ([1, 2, "hello"], ["hello"]), - ([1, 1, 2, 3, 3, "hello"], ["hello"]), - ]) + # Placeholder values are needed to fill in the expected array with dummy value so that, + # when the dataset generates the element and observes that there is a type mismatch, + # it raises the proper error and not an OutOfRangeError which occurs when it is unable + # to fetch an element to compare from the expected array in the first place. + string_placeholder = "" + int32_placeholder = 0 + int64_placeholder = 0 - self._testSimpleHelper(dtypes.int32, [ - ([1, "hello", "world"], [1]), - ([1, 2, 1, "hello", "hello", "world"], [1, 2]), - (["hello", 1, 2], [1, 2]), - (["hello", 1, 1, 2, 3, 3], [1, 2, 3]), - ]) + # raises InternalError when element type doesn't match with dtypes.string. + string_cases = [ + (["hello", 1, 2, 1], ["hello"]), + (["hello", "world", 1], ["hello", "world"]), + (["hello", "hello", "world", 1, 2], ["hello", "world"]), + (["hello", "world", 1, 1, 2], ["hello", "world"]), + # In the following cases, when the first element (i.e 1) of the dataset is generated, + # it validates the type and immediately raises the error. This is unlike the above cases, + # wherein the dtype of the starting elements are as expected to start with, + # and the dataset has to loop until it reaches the incorrect dtype element. + # Until then we need to make sure that data with correct type has to match + # for testing purposes. Similar logic applies to dtype.int32 and dtype.64 as well. + ([1, 2, "hello"], [string_placeholder]), + ([1, 1, 2, 3, 3, "hello"], [string_placeholder]), + ] - self._testSimpleHelper(dtypes.int64, [ - ([2, 3, "hello", "world"], [2, 3]), - ([2, 3, 3, "hello", "hello", "world"], [2, 3]), - (["hello", 2, 2], [2]), - (["hello", "hello", 1, 1, 2, 3], [1, 2, 3]), - ]) + # handle each case independently so that an error raised by a single case doesn't interfere + # with the other ones. As per self._testSimpleHelper functionality. + for case in string_cases: + with self.assertRaises(errors.InternalError): + self._testSimpleHelper(dtypes.string, [case]) + + # raises InvalidArgumentError when element type doesn't match with dtypes.int32. + int32_cases = [ + ([1, "hello", "world"], [1]), + ([1, 2, 1, "hello", "hello", "world"], [1, 2]), + (["hello", 1, 2], [int32_placeholder]), + (["hello", 1, 1, 2, 3, 3], [int32_placeholder]), + ] + for case in int32_cases: + with self.assertRaises(errors.InvalidArgumentError): + self._testSimpleHelper(dtypes.int32, [case]) + + # raises InvalidArgumentError when element type doesn't match with dtypes.int64. + int64_cases = [ + ([2, 3, "hello", "world"], [2, 3]), + ([2, 3, 3, "hello", "hello", "world"], [2, 3]), + (["hello", 2, 2], [int64_placeholder]), + (["hello", "hello", 1, 1, 2, 3], [int64_placeholder]), + ] + for case in int64_cases: + with self.assertRaises(errors.InvalidArgumentError): + self._testSimpleHelper(dtypes.int64, [case]) if __name__ == "__main__": From 663cd759ad117bb841b39be644fe5be3ba152a8b Mon Sep 17 00:00:00 2001 From: Robert David Date: Wed, 22 Jul 2020 20:16:06 -0700 Subject: [PATCH 1123/2522] Small LSTM test cleanup: Move setting lstm_golden_output_ to SetUp() for a few tests, consistent with other tests. Drop the input/output parameters of VerifyGoldens, it already sees the member variables. PiperOrigin-RevId: 322710419 Change-Id: I8035f241db6de5934c47b573ef3dea8ddd25d0b9 --- tensorflow/lite/kernels/lstm_test.cc | 174 +++++++++------------------ 1 file changed, 54 insertions(+), 120 deletions(-) diff --git a/tensorflow/lite/kernels/lstm_test.cc b/tensorflow/lite/kernels/lstm_test.cc index 4c77c0c1202..015fbc7050e 100644 --- a/tensorflow/lite/kernels/lstm_test.cc +++ b/tensorflow/lite/kernels/lstm_test.cc @@ -324,9 +324,7 @@ class BaseLstmOpTest : public ::testing::TestWithParam { std::vector> lstm_golden_output_; // Compares output up to tolerance to the result of the lstm given the input. - void VerifyGoldens(const std::vector>& input, - const std::vector>& output, - LSTMOpModel* lstm, float tolerance = 1e-5) { + void VerifyGoldens(LSTMOpModel* lstm, float tolerance = 1e-5) { // Weights are set twice: // - The delegate, if used, needs to know the scales and zero-points of // quantized tensors, which are computed dynamically when weights are set, @@ -337,15 +335,15 @@ class BaseLstmOpTest : public ::testing::TestWithParam { lstm->ApplyDelegate(); SetAllWeightsAndBiases(lstm); - const int num_batches = input.size(); + const int num_batches = lstm_input_.size(); EXPECT_GT(num_batches, 0); const int num_inputs = lstm->num_inputs(); EXPECT_GT(num_inputs, 0); - const int input_sequence_size = input[0].size() / num_inputs; + const int input_sequence_size = lstm_input_[0].size() / num_inputs; EXPECT_GT(input_sequence_size, 0); for (int i = 0; i < input_sequence_size; ++i) { for (int b = 0; b < num_batches; ++b) { - const float* batch_start = input[b].data() + i * num_inputs; + const float* batch_start = lstm_input_[b].data() + i * num_inputs; const float* batch_end = batch_start + num_inputs; lstm->SetInput(b * lstm->num_inputs(), batch_start, batch_end); @@ -356,7 +354,8 @@ class BaseLstmOpTest : public ::testing::TestWithParam { const int num_outputs = lstm->num_outputs(); std::vector expected; for (int b = 0; b < num_batches; ++b) { - const float* golden_start_batch = output[b].data() + i * num_outputs; + const float* golden_start_batch = + lstm_golden_output_[b].data() + i * num_outputs; const float* golden_end_batch = golden_start_batch + num_outputs; expected.insert(expected.end(), golden_start_batch, golden_end_batch); } @@ -462,7 +461,7 @@ TEST_F(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, Float) { /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/false); - VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm); + VerifyGoldens(&lstm); } TEST_F(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, With24Inputs) { @@ -481,7 +480,7 @@ TEST_F(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, With24Inputs) { /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/false); - VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm); + VerifyGoldens(&lstm); } TEST_P(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, HybridUint8) { @@ -503,8 +502,7 @@ TEST_P(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, HybridUint8) { /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); - VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, - /*tolerance=*/0.0157651); + VerifyGoldens(&lstm, /*tolerance=*/0.0157651); } TEST_P(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, HybridInt8) { @@ -525,8 +523,7 @@ TEST_P(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, HybridInt8) { /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); - VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, - /*tolerance=*/0.0157651); + VerifyGoldens(&lstm, /*tolerance=*/0.0157651); } class Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest @@ -592,7 +589,7 @@ TEST_F(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest, Float) { /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/false); - VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm); + VerifyGoldens(&lstm); } TEST_P(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest, HybridUint8) { @@ -614,7 +611,7 @@ TEST_P(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest, HybridUint8) { /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); - VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/0.03573); + VerifyGoldens(&lstm, /*tolerance=*/0.03573); } TEST_P(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest, HybridInt8) { @@ -635,7 +632,7 @@ TEST_P(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest, HybridInt8) { /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); - VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/0.03573); + VerifyGoldens(&lstm, /*tolerance=*/0.03573); } class NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest @@ -1252,7 +1249,7 @@ TEST_F(NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest, Float) { /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/false); - VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm); + VerifyGoldens(&lstm); } TEST_P(NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest, HybridUint8) { @@ -1273,7 +1270,7 @@ TEST_P(NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest, HybridUint8) { /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); - VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/0.00467); + VerifyGoldens(&lstm, /*tolerance=*/0.00467); } TEST_P(NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest, HybridInt8) { @@ -1293,7 +1290,7 @@ TEST_P(NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest, HybridInt8) { /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, /*asymmetric_quantize_inputs=*/GetParam()); - VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/0.0015); + VerifyGoldens(&lstm, /*tolerance=*/0.0015); } class NoCifg_Peephole_Projection_LayerNorm_LstmOpTest : public BaseLstmOpTest { @@ -1359,6 +1356,19 @@ class NoCifg_Peephole_Projection_LayerNorm_LstmOpTest : public BaseLstmOpTest { 0.1, 0.5, 0.2, 0.4, 0.2, // seq 1 0.6, 0.9, 0.2, 0.5, 0.7}, // seq 2 }; + + lstm_golden_output_ = {{ + // Batch0: 3 (input_sequence_size) * 3 (n_output) + 0.0244077, 0.128027, -0.00170918, // seq 0 + 0.0137642, 0.140751, 0.0395835, // seq 1 + -0.00459231, 0.155278, 0.0837377, // seq 2 + }, + { + // Batch1: 3 (input_sequence_size) * 3 (n_output) + -0.00692428, 0.0848741, 0.063445, // seq 0 + -0.00403912, 0.139963, 0.072681, // seq 1 + 0.00752706, 0.161903, 0.0561371, // seq 2 + }}; } }; @@ -1368,7 +1378,7 @@ TEST_F(NoCifg_Peephole_Projection_LayerNorm_LstmOpTest, Float) { const int n_cell = 4; const int n_output = 3; - LSTMOpModel layer_norm_lstm( + LSTMOpModel lstm( n_batch, n_input, n_cell, n_output, /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, @@ -1376,21 +1386,7 @@ TEST_F(NoCifg_Peephole_Projection_LayerNorm_LstmOpTest, Float) { /*weight_type=*/TensorType_FLOAT32, /*model_has_legacy_20_inputs=*/false, /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/false); - // Verify the final output. - lstm_golden_output_ = {{ - // Batch0: 3 (input_sequence_size) * 3 (n_output) - 0.0244077, 0.128027, -0.00170918, // seq 0 - 0.0137642, 0.140751, 0.0395835, // seq 1 - -0.00459231, 0.155278, 0.0837377, // seq 2 - }, - { - // Batch1: 3 (input_sequence_size) * 3 (n_output) - -0.00692428, 0.0848741, 0.063445, // seq 0 - -0.00403912, 0.139963, 0.072681, // seq 1 - 0.00752706, 0.161903, 0.0561371, // seq 2 - }}; - - VerifyGoldens(lstm_input_, lstm_golden_output_, &layer_norm_lstm); + VerifyGoldens(&lstm); } TEST_P(NoCifg_Peephole_Projection_LayerNorm_LstmOpTest, HybridUint8) { @@ -1403,7 +1399,7 @@ TEST_P(NoCifg_Peephole_Projection_LayerNorm_LstmOpTest, HybridUint8) { const int n_cell = 4; const int n_output = 3; - LSTMOpModel layer_norm_lstm( + LSTMOpModel lstm( n_batch, n_input, n_cell, n_output, /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, @@ -1411,21 +1407,7 @@ TEST_P(NoCifg_Peephole_Projection_LayerNorm_LstmOpTest, HybridUint8) { /*weight_type=*/TensorType_UINT8, /*model_has_legacy_20_inputs=*/false, /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/GetParam()); - lstm_golden_output_ = {{ - // Batch0: 3 (input_sequence_size) * 3 (n_output) - 0.0244576, 0.127847, -0.00181765, // seq 0 - 0.0137518, 0.140892, 0.0402234, // seq 1 - -0.0048839, 0.155096, 0.0840309, // seq 2 - }, - { - // Batch1: 3 (input_sequence_size) * 3 (n_output) - -0.00728636, 0.0843957, 0.0634786, // seq 0 - -0.00448382, 0.139278, 0.0737372, // seq 1 - 0.00734616, 0.161793, 0.0560238, // seq 2 - }}; - - VerifyGoldens(lstm_input_, lstm_golden_output_, &layer_norm_lstm, - /*tolerance=*/0.0010907); + VerifyGoldens(&lstm, /*tolerance=*/0.0010907); } TEST_P(NoCifg_Peephole_Projection_LayerNorm_LstmOpTest, HybridInt8) { @@ -1437,7 +1419,7 @@ TEST_P(NoCifg_Peephole_Projection_LayerNorm_LstmOpTest, HybridInt8) { const int n_cell = 4; const int n_output = 3; - LSTMOpModel layer_norm_lstm( + LSTMOpModel lstm( n_batch, n_input, n_cell, n_output, /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, @@ -1445,22 +1427,7 @@ TEST_P(NoCifg_Peephole_Projection_LayerNorm_LstmOpTest, HybridInt8) { /*weight_type=*/TensorType_INT8, /*model_has_legacy_20_inputs=*/false, /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/GetParam()); - // Goldens are calculated from weight_type=TensorType_FLOAT32. - lstm_golden_output_ = {{ - // Batch0: 3 (input_sequence_size) * 3 (n_output) - 0.0244077, 0.128027, -0.00170918, // seq 0 - 0.0137642, 0.140751, 0.0395835, // seq 1 - -0.00459233, 0.155278, 0.0837378, // seq 2 - }, - { - // Batch1: 3 (input_sequence_size) * 3 (n_output) - -0.00692428, 0.0848741, 0.063445, // seq 0 - -0.00403911, 0.139963, 0.072681, // seq 1 - 0.00752708, 0.161903, 0.0561371, // seq 2 - }}; - - VerifyGoldens(lstm_input_, lstm_golden_output_, &layer_norm_lstm, - /*tolerance=*/1.06e-3); + VerifyGoldens(&lstm, /*tolerance=*/1.06e-3); } class Cifg_Peephole_Projection_LayerNorm_LstmOpTest : public BaseLstmOpTest { @@ -1506,6 +1473,19 @@ class Cifg_Peephole_Projection_LayerNorm_LstmOpTest : public BaseLstmOpTest { 0.1, 0.5, 0.2, 0.4, 0.2, // seq 1 0.6, 0.9, 0.2, 0.5, 0.7}, // seq 2 }; + lstm_golden_output_ = { + { + // Batch0: 3 (input_sequence_size) * 3 (n_output) + 0.02129706, 0.140816242, 0.0112733059, // seq 0 + 0.0132302344, 0.152308047, 0.0346313119, // seq 1 + -0.0123688057, 0.165790111, 0.0893077999, // seq 2 + }, + { + // Batch1: 3 (input_sequence_size) * 3 (n_output) + -0.0226350538, 0.0916948169, 0.0769175813, // seq 0 + -0.0269966982, 0.149707705, 0.094149217, // seq 1 + -0.0103429332, 0.173016444, 0.0720508844, // seq 2 + }}; } }; @@ -1515,7 +1495,7 @@ TEST_F(Cifg_Peephole_Projection_LayerNorm_LstmOpTest, Float) { const int n_cell = 4; const int n_output = 3; - LSTMOpModel layer_norm_lstm( + LSTMOpModel lstm( n_batch, n_input, n_cell, n_output, /*use_cifg=*/true, /*use_peephole=*/true, /*use_projection_weights=*/true, @@ -1523,22 +1503,7 @@ TEST_F(Cifg_Peephole_Projection_LayerNorm_LstmOpTest, Float) { /*weight_type=*/TensorType_FLOAT32, /*model_has_legacy_20_inputs=*/false, /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/false); - // Verify the final output. - lstm_golden_output_ = { - { - // Batch0: 3 (input_sequence_size) * 3 (n_output) - 0.02129706, 0.140816242, 0.0112733059, // seq 0 - 0.0132302344, 0.152308047, 0.0346313119, // seq 1 - -0.0123688057, 0.165790111, 0.0893077999, // seq 2 - }, - { - // Batch1: 3 (input_sequence_size) * 3 (n_output) - -0.0226350538, 0.0916948169, 0.0769175813, // seq 0 - -0.0269966982, 0.149707705, 0.094149217, // seq 1 - -0.0103429332, 0.173016444, 0.0720508844, // seq 2 - }}; - - VerifyGoldens(lstm_input_, lstm_golden_output_, &layer_norm_lstm); + VerifyGoldens(&lstm); } TEST_P(Cifg_Peephole_Projection_LayerNorm_LstmOpTest, HybridUint8) { @@ -1550,7 +1515,7 @@ TEST_P(Cifg_Peephole_Projection_LayerNorm_LstmOpTest, HybridUint8) { const int n_cell = 4; const int n_output = 3; - LSTMOpModel layer_norm_lstm( + LSTMOpModel lstm( n_batch, n_input, n_cell, n_output, /*use_cifg=*/true, /*use_peephole=*/true, /*use_projection_weights=*/true, @@ -1558,23 +1523,7 @@ TEST_P(Cifg_Peephole_Projection_LayerNorm_LstmOpTest, HybridUint8) { /*weight_type=*/TensorType_UINT8, /*model_has_legacy_20_inputs=*/false, /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/GetParam()); - // Verify the final output. - lstm_golden_output_ = { - { - // Batch0: 3 (input_sequence_size) * 3 (n_output) - 0.0212250091, 0.140474007, 0.0115012666, // seq 0 - 0.0130806509, 0.152660668, 0.0347516984, // seq 1 - -0.0124010444, 0.166042402, 0.0898982584, // seq 2 - }, - { - // Batch1: 3 (input_sequence_size) * 3 (n_output) - -0.0228835996, 0.0917588323, 0.0778886303, // seq 0 - -0.0275101066, 0.148769245, 0.0938384682, // seq 1 - -0.0103605557, 0.172605693, 0.0728750974, // seq 2 - }}; - - VerifyGoldens(lstm_input_, lstm_golden_output_, &layer_norm_lstm, - /*tolerance=*/0.0009021); + VerifyGoldens(&lstm, /*tolerance=*/0.000971057); } TEST_P(Cifg_Peephole_Projection_LayerNorm_LstmOpTest, HybridInt8) { @@ -1583,7 +1532,7 @@ TEST_P(Cifg_Peephole_Projection_LayerNorm_LstmOpTest, HybridInt8) { const int n_cell = 4; const int n_output = 3; - LSTMOpModel layer_norm_lstm( + LSTMOpModel lstm( n_batch, n_input, n_cell, n_output, /*use_cifg=*/true, /*use_peephole=*/true, /*use_projection_weights=*/true, @@ -1591,22 +1540,7 @@ TEST_P(Cifg_Peephole_Projection_LayerNorm_LstmOpTest, HybridInt8) { /*weight_type=*/TensorType_INT8, /*model_has_legacy_20_inputs=*/false, /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/GetParam()); - // Goldens are results using FLOAT32 inference. - lstm_golden_output_ = {{ - // Batch0: 3 (input_sequence_size) * 3 (n_output) - 0.0212971, 0.140816, 0.0112733, // seq 0 - 0.0132302, 0.152308, 0.0346313, // seq 1 - -0.0123688, 0.16579, 0.0893078, // seq 2 - }, - { - // Batch1: 3 (input_sequence_size) * 3 (n_output) - -0.0226351, 0.0916948, 0.0769176, // seq 0 - -0.0269967, 0.149708, 0.0941492, // seq 1 - -0.0103429, 0.173016, 0.0720509, // seq 2 - }}; - - VerifyGoldens(lstm_input_, lstm_golden_output_, &layer_norm_lstm, - /*tolerance=*/1e-3); + VerifyGoldens(&lstm, /*tolerance=*/1e-3); } class LSTMIntegerOpModel : public SingleOpModel { From f942f4a240785cd11205c483c57cd37879b00516 Mon Sep 17 00:00:00 2001 From: Wenhao Jia Date: Wed, 22 Jul 2020 20:23:13 -0700 Subject: [PATCH 1124/2522] Remove two unused LOG(FATAL) methods from TpuNodeContext. PiperOrigin-RevId: 322711148 Change-Id: Id928e49c8227e4cbf0982443ed08dd46f767eaea --- tensorflow/core/tpu/kernels/tpu_execute_op.cc | 46 ++++++++-------- tensorflow/core/tpu/tpu_execute.cc | 26 +++++----- tensorflow/stream_executor/tpu/BUILD | 3 +- .../stream_executor/tpu/tpu_node_context.cc | 52 ++++++------------- .../stream_executor/tpu/tpu_node_context.h | 36 +++++-------- .../tpu/tpu_node_context_c_api.h | 7 +-- 6 files changed, 69 insertions(+), 101 deletions(-) diff --git a/tensorflow/core/tpu/kernels/tpu_execute_op.cc b/tensorflow/core/tpu/kernels/tpu_execute_op.cc index 8060aa95f17..cbfe789c252 100644 --- a/tensorflow/core/tpu/kernels/tpu_execute_op.cc +++ b/tensorflow/core/tpu/kernels/tpu_execute_op.cc @@ -198,8 +198,8 @@ struct InputBuffers { // Builds an InputBuffers object that describes the inputs to the computation. xla::StatusOr> BuildComputationInputs( OpKernelContext* context, const xla::Shape& input_host_shape, - const VariableUpdateMap& variable_updates, TpuNodeContext* node_context, - se::Stream* stream) { + const VariableUpdateMap& variable_updates, xla::Backend* backend, + int device_ordinal, se::Stream* stream) { profiler::TraceMe trace_me("BuildComputationInputs", /*level=*/2); OpInputList arg_list; TF_RETURN_IF_ERROR(context->input_list("args", &arg_list)); @@ -274,10 +274,8 @@ xla::StatusOr> BuildComputationInputs( validate_shape(variables[i].index(), *variables[i].var()->tensor())); } - se::DeviceMemoryAllocator* const allocator = node_context->memory_allocator(); - xla::TransferManager* const transfer_manager = - node_context->transfer_manager(); - const int device_ordinal = node_context->device_ordinal(); + se::DeviceMemoryAllocator* const allocator = backend->memory_allocator(); + xla::TransferManager* const transfer_manager = backend->transfer_manager(); auto input_buffers = absl::make_unique( transfer_manager->HostShapeToDeviceShape(input_host_shape)); @@ -411,7 +409,7 @@ xla::StatusOr> AllocateOutputTensors( } xla::TransferManager* const transfer_manager = - node_context->transfer_manager(); + node_context->backend()->transfer_manager(); std::vector output_tensor_shapes; output_tensor_shapes.reserve(sub_elements); @@ -434,7 +432,8 @@ xla::StatusOr> AllocateOutputTensors( TF_RET_CHECK(scoped_buffers.on_host_shape().IsTuple()); TF_RET_CHECK(!xla::ShapeUtil::IsNestedTuple(scoped_buffers.on_host_shape())); - se::DeviceMemoryAllocator* const allocator = node_context->memory_allocator(); + se::DeviceMemoryAllocator* const allocator = + node_context->backend()->memory_allocator(); auto output_buffers = absl::make_unique(std::move(scoped_buffers), allocator); @@ -633,10 +632,11 @@ Status TPUExecuteOp::DoWork(OpKernelContext* context) { TpuNodeContext::Create(device_ordinal)); profiler::TraceMe trace_me( - [&, device_ordinal] { - return absl::StrCat("TpuExecuteOp#device_ordinal=", device_ordinal, - ",id=", context->step_id(), - ",iter_num=", context->frame_iter().iter_id, "#"); + [device_ordinal, context] { + return profiler::TraceMeEncode( + "TpuExecuteOp", {{"device_ordinal", device_ordinal}, + {"id", context->step_id()}, + {"iter_num", context->frame_iter().iter_id}}); }, /*level=*/2); profiler::TraceMe trace_me_init("TPUExecuteOp::Init", /*level=*/2); @@ -649,9 +649,9 @@ Status TPUExecuteOp::DoWork(OpKernelContext* context) { // Shapes of the inputs and outputs, in xla::Shape form. const TPUExecutableInfoProto* proto = entry->get().get_executable_info(); - xla::TransferManager* const transfer_manager = - node_context->transfer_manager(); - CHECK(context->op_device_context()); + xla::Backend* const backend = node_context->backend(); + xla::TransferManager* const transfer_manager = backend->transfer_manager(); + TF_RET_CHECK(context->op_device_context()); se::Stream* stream = context->op_device_context()->stream(); TF_RET_CHECK(proto->input_shapes_size() == 1); @@ -666,8 +666,8 @@ Status TPUExecuteOp::DoWork(OpKernelContext* context) { proto->output_tensor_shapes().size())); TF_ASSIGN_OR_RETURN( std::unique_ptr input_buffers, - BuildComputationInputs(context, host_shape, variable_update_map, - node_context.get(), stream)); + BuildComputationInputs(context, host_shape, variable_update_map, backend, + device_ordinal, stream)); // Ideally this should be the host-to-device stream from XlaDeviceContext. // The particular anti-dependency this is avoiding (why we need a separate @@ -680,11 +680,11 @@ Status TPUExecuteOp::DoWork(OpKernelContext* context) { // TODO(jmolloy): Add the necessary plumbing to obtain the proper // host-to-device stream here. TF_ASSIGN_OR_RETURN(auto transfer_stream_ptr, - node_context->BorrowStream(device_ordinal)); + backend->BorrowStream(device_ordinal)); - se::DeviceMemoryAllocator* const allocator = node_context->memory_allocator(); - auto shaped_buffer = - input_buffers->ToShapedBuffer(host_shape, allocator, device_ordinal); + se::DeviceMemoryAllocator* const allocator = backend->memory_allocator(); + auto shaped_buffer = input_buffers->ToShapedBuffer(std::move(host_shape), + allocator, device_ordinal); if (transfer_manager->CanShapedBufferBeAccessedNow(stream->parent(), shaped_buffer)) { TF_RETURN_IF_ERROR(transfer_manager->WriteRootTupleIndexTable( @@ -733,8 +733,8 @@ Status TPUExecuteOp::DoWork(OpKernelContext* context) { << shaped_buffer.ToString(); std::vector input; - input.emplace_back( - xla::ExecutionInput(std::move(input_buffers->buffers), host_shape)); + input.emplace_back(xla::ExecutionInput(std::move(input_buffers->buffers), + shaped_buffer.on_host_shape())); // The buffers to be freed are in the `output` and will be automatically // freed when it goes out of the scope. In async mode, this means the buffers diff --git a/tensorflow/core/tpu/tpu_execute.cc b/tensorflow/core/tpu/tpu_execute.cc index d897eb7ee9b..99547cafc82 100644 --- a/tensorflow/core/tpu/tpu_execute.cc +++ b/tensorflow/core/tpu/tpu_execute.cc @@ -62,12 +62,12 @@ static bool tpu_cancellation_terminates_process = false; static bool tpu_cancellation_closes_chips = true; // Host-side runtime for transfers between TPU and host. +// TODO(b/161940519): Implement this class. class HostTransferManager { public: - using HostCommmandHandler = xla::TpuExecutable::HostCommandHandler; + explicit HostTransferManager(TpuNodeContext*, xla::Backend*) {} - explicit HostTransferManager(TpuNodeContext* node_context) - : node_context_(node_context) {} + using HostCommmandHandler = xla::TpuExecutable::HostCommandHandler; // Returns a function to be called when the TPU triggers a host command // interrupt while executing the current program. @@ -76,8 +76,6 @@ class HostTransferManager { const std::string& rendezvous_key_base, OpKernelContext* ctx); private: - TpuNodeContext* node_context_; // not owned - TF_DISALLOW_COPY_AND_ASSIGN(HostTransferManager); }; @@ -417,7 +415,9 @@ xla::StatusOr TPUExecute( profiler::TraceMe traceme("TPUExecute", 2); TF_RET_CHECK(tpu::TpuPlatformInterface::GetRegisteredPlatform() != nullptr); TF_RET_CHECK(tpu_program != nullptr); - VLOG(1) << "TPUExecute on device " << node_context->tensor_core_location(); + VLOG(1) << "TPUExecute on device " << node_context->device_ordinal(); + + xla::Backend* backend = node_context->backend(); XlaDevice* device = tensorflow::down_cast(ctx->device()->UnderlyingDevice()); @@ -425,19 +425,19 @@ xla::StatusOr TPUExecute( // Create a HostTransferManager to handle Send/Recv operations from the TPU. std::shared_ptr host_transfer_manager = - std::make_shared(node_context); + std::make_shared(node_context, backend); TF_ASSIGN_OR_RETURN(HostTransferManager::HostCommmandHandler handler, host_transfer_manager->Initialize( host_transfers, rendezvous_key_base, ctx)); VLOG(2) << "Cloud TPU: Executing computation on device " - << node_context->index_on_host(); + << node_context->device_ordinal(); xla::ExecutableRunOptions run_options; run_options.set_stream(stream); run_options.set_device_assignment(device_assignment); run_options.set_rng_seed(rng_seed); - run_options.set_allocator(node_context->memory_allocator()); + run_options.set_allocator(backend->memory_allocator()); run_options.set_host_to_device_stream(host_to_device_stream); const xla::ServiceExecutableRunOptions service_run_options(run_options); @@ -460,7 +460,7 @@ xla::StatusOr TPUExecute( TF_ASSIGN_OR_RETURN( module->input_output_alias_config(), xla::HloInputOutputAliasConfig::CreateFromProto( - node_context->transfer_manager()->HostShapeToDeviceShape( + backend->transfer_manager()->HostShapeToDeviceShape( module->config().entry_computation_layout().result_shape()), hlo_metadata.hlo_module().input_output_alias())); TF_RET_CHECK(executable.input_shapes().size() == arguments.size()); @@ -471,11 +471,11 @@ xla::StatusOr TPUExecute( xla::ShapeIndex(prefetch.index().begin(), prefetch.index().end())); } - TF_RETURN_IF_ERROR(UpdateDynamicInputs( - stream, node_context->memory_allocator(), &arguments, input_shapes)); + TF_RETURN_IF_ERROR(UpdateDynamicInputs(stream, backend->memory_allocator(), + &arguments, input_shapes)); auto tpu_executable = absl::make_unique( - tpu_program, std::move(module), handler); + tpu_program, std::move(module), /*host_command_handler=*/handler); const int32 device_ordinal = node_context->device_ordinal(); CancellationToken token; diff --git a/tensorflow/stream_executor/tpu/BUILD b/tensorflow/stream_executor/tpu/BUILD index 931cfde0cc2..17d4490a8f8 100644 --- a/tensorflow/stream_executor/tpu/BUILD +++ b/tensorflow/stream_executor/tpu/BUILD @@ -182,13 +182,12 @@ cc_library( ":tpu_executor_c_api_hdrs", ":tpu_node_context_c_api_hdrs", ":tpu_platform_interface", - ":tpu_transfer_manager_base", "//tensorflow/compiler/xla/service", "//tensorflow/compiler/xla/service:backend", - "//tensorflow/compiler/xla/service:platform_util", "//tensorflow/compiler/xla/service:stream_pool", "//tensorflow/compiler/xla/service:transfer_manager", "//tensorflow/core:framework", + "//tensorflow/core:lib", "//tensorflow/core/tpu:tpu_api", "//tensorflow/stream_executor:device_memory_allocator", "//tensorflow/stream_executor/lib", diff --git a/tensorflow/stream_executor/tpu/tpu_node_context.cc b/tensorflow/stream_executor/tpu/tpu_node_context.cc index b502264cfc7..2d7b73ca526 100644 --- a/tensorflow/stream_executor/tpu/tpu_node_context.cc +++ b/tensorflow/stream_executor/tpu/tpu_node_context.cc @@ -12,13 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ + #include "tensorflow/stream_executor/tpu/tpu_node_context.h" -#include "tensorflow/compiler/xla/service/backend.h" -#include "tensorflow/compiler/xla/service/platform_util.h" -#include "tensorflow/compiler/xla/service/transfer_manager.h" #include "tensorflow/core/tpu/tpu_api.h" -#include "tensorflow/stream_executor/device_memory_allocator.h" #include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" #include "tensorflow/stream_executor/tpu/tpu_node_context_c_api.h" @@ -36,6 +33,8 @@ StatusOr> TpuNodeContext::Create( tpu::NodeContextApiFn()->TpuNodeContext_CreateFn(device_ordinal, status.c_status); if (!status.status().ok()) { + // TpuNodeContext_CreateFn allocates a new XLA_TpuNodeContext regardless of + // status. It needs to be freed if it's not given to a TpuNodeContext below. tpu::NodeContextApiFn()->TpuNodeContext_FreeFn(node_context); return status.status(); } @@ -46,13 +45,6 @@ TpuNodeContext::~TpuNodeContext() { tpu::NodeContextApiFn()->TpuNodeContext_FreeFn(node_context_); } -/* static */ -Status TpuNodeContext::Initialize(int device_ordinal) { - StatusHelper status; - TpuNodeContext_Initialize(device_ordinal, status.c_status); - return status.status(); -} - /* static */ Status TpuNodeContext::StopChipHeartbeats() { StatusHelper status; @@ -68,21 +60,20 @@ Status TpuNodeContext::CloseTpuHost() { } /* static */ -tensorflow::tpu::TpuPlatformInterface* TpuNodeContext::platform() { +Status TpuNodeContext::Initialize(int device_ordinal) { + StatusHelper status; + TpuNodeContext_Initialize(device_ordinal, status.c_status); + return status.status(); +} + +/* static */ +TpuPlatformInterface* TpuNodeContext::platform() { return TpuPlatformInterface::GetRegisteredPlatform(); } -/* static */ -stream_executor::DeviceMemoryAllocator* TpuNodeContext::memory_allocator() { - static stream_executor::StreamExecutorMemoryAllocator* memory_allocator = - new stream_executor::StreamExecutorMemoryAllocator( - platform(), - xla::PlatformUtil::GetStreamExecutors(platform()).ValueOrDie()); - return memory_allocator; -} +int TpuNodeContext::device_ordinal() const { return device_ordinal_; } -/* static */ -xla::Backend* TpuNodeContext::backend() { +xla::Backend* TpuNodeContext::backend() const { static xla::Backend* backend = xla::Backend::CreateBackend( xla::BackendOptions().set_platform(platform())) @@ -91,21 +82,8 @@ xla::Backend* TpuNodeContext::backend() { return backend; } -/* static */ -StatusOr TpuNodeContext::BorrowStream( - int device_ordinal) { - return backend()->BorrowStream(device_ordinal); -} - -/* static */ -StatusOr TpuNodeContext::BorrowStream( - stream_executor::StreamExecutor* executor) { - return backend()->BorrowStream(executor); -} - -/* static */ -xla::TransferManager* TpuNodeContext::transfer_manager() { - return xla::TransferManager::GetForPlatform(platform()).ValueOrDie(); +stream_executor::StreamExecutor* TpuNodeContext::stream_executor() const { + return backend()->stream_executor(device_ordinal_).ValueOrDie(); } } // namespace tpu diff --git a/tensorflow/stream_executor/tpu/tpu_node_context.h b/tensorflow/stream_executor/tpu/tpu_node_context.h index 5f68bc677cc..27cf32f854f 100644 --- a/tensorflow/stream_executor/tpu/tpu_node_context.h +++ b/tensorflow/stream_executor/tpu/tpu_node_context.h @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/stream_pool.h" #include "tensorflow/compiler/xla/service/transfer_manager.h" #include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/platform/macros.h" #include "tensorflow/stream_executor/device_memory_allocator.h" #include "tensorflow/stream_executor/lib/status.h" #include "tensorflow/stream_executor/lib/statusor.h" @@ -33,6 +34,11 @@ limitations under the License. namespace tensorflow { namespace tpu { +// A TpuNodeContext object represents a specific TPU node (core). The static +// class methods represent host-wide actions. +// +// First call Initialize in a freshly reset system. Then call Create to talk to +// individual nodes. class TpuNodeContext final { public: using Status = stream_executor::port::Status; @@ -47,41 +53,25 @@ class TpuNodeContext final { } ~TpuNodeContext(); - TpuNodeContext(const TpuNodeContext&) = delete; - TpuNodeContext& operator=(const TpuNodeContext&) = delete; - - static Status Initialize(int device_ordinal); - static Status StopChipHeartbeats(); static Status CloseTpuHost(); - static tensorflow::tpu::TpuPlatformInterface* platform(); + static Status Initialize(int device_ordinal); - static stream_executor::DeviceMemoryAllocator* memory_allocator(); + static TpuPlatformInterface* platform(); - static xla::TransferManager* transfer_manager(); + int device_ordinal() const; - static xla::Backend* backend(); + xla::Backend* backend() const; - static StatusOr BorrowStream(int device_ordinal); - - static StatusOr BorrowStream( - stream_executor::StreamExecutor* executor); - - stream_executor::StreamExecutor* stream_executor() { - LOG(FATAL) << "Not implemented yet."; - } - - std::string tensor_core_location() { LOG(FATAL) << "Not implemented yet."; } - - int index_on_host() { LOG(FATAL) << "Not implemented yet."; } - - int device_ordinal() const { return device_ordinal_; } + stream_executor::StreamExecutor* stream_executor() const; private: const int device_ordinal_; XLA_TpuNodeContext* const node_context_; + + TF_DISALLOW_COPY_AND_ASSIGN(TpuNodeContext); }; } // namespace tpu diff --git a/tensorflow/stream_executor/tpu/tpu_node_context_c_api.h b/tensorflow/stream_executor/tpu/tpu_node_context_c_api.h index 009671ef985..55288d2ba38 100644 --- a/tensorflow/stream_executor/tpu/tpu_node_context_c_api.h +++ b/tensorflow/stream_executor/tpu/tpu_node_context_c_api.h @@ -26,19 +26,20 @@ XLA_TpuNodeContext* TpuNodeContext_Create(int device_ordinal, SE_Status* status); void TpuNodeContext_Free(XLA_TpuNodeContext* node_context); -void TpuNodeContext_Initialize(int device_ordinal, SE_Status* status); - void TpuNodeContext_StopChipHeartbeats(SE_Status* status); + void TpuNodeContext_CloseTpuHost(SE_Status* status); +void TpuNodeContext_Initialize(int device_ordinal, SE_Status* status); + } // extern "C" struct TfTpu_NodeContextApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuNodeContext_Create); TFTPU_ADD_FN_IN_STRUCT(TpuNodeContext_Free); - TFTPU_ADD_FN_IN_STRUCT(TpuNodeContext_Initialize); TFTPU_ADD_FN_IN_STRUCT(TpuNodeContext_StopChipHeartbeats); TFTPU_ADD_FN_IN_STRUCT(TpuNodeContext_CloseTpuHost); + TFTPU_ADD_FN_IN_STRUCT(TpuNodeContext_Initialize); }; #endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_NODE_CONTEXT_C_API_H_ From ce62fb78de0ebe2b9a862629b61044ef47fccfe9 Mon Sep 17 00:00:00 2001 From: Hannes Achleitner Date: Fri, 1 May 2020 10:34:09 +0200 Subject: [PATCH 1125/2522] Give Android sample a meaningfull name --- tensorflow/examples/android/settings.gradle | 1 + 1 file changed, 1 insertion(+) create mode 100644 tensorflow/examples/android/settings.gradle diff --git a/tensorflow/examples/android/settings.gradle b/tensorflow/examples/android/settings.gradle new file mode 100644 index 00000000000..c3e96d78369 --- /dev/null +++ b/tensorflow/examples/android/settings.gradle @@ -0,0 +1 @@ +rootProject.name = "TensorFlow sample" From 4bc27730247d2dab6f51da558f563fc9cf95427b Mon Sep 17 00:00:00 2001 From: Hannes Achleitner Date: Thu, 23 Jul 2020 05:43:59 +0200 Subject: [PATCH 1126/2522] Update Gradle --- .../examples/android/gradle/wrapper/gradle-wrapper.properties | 4 ++-- tensorflow/examples/android/gradlew | 0 2 files changed, 2 insertions(+), 2 deletions(-) mode change 100644 => 100755 tensorflow/examples/android/gradlew diff --git a/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties b/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties index bd9ee87db37..4a0bf945ec9 100644 --- a/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties +++ b/tensorflow/examples/android/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,6 @@ -#Sat Nov 18 15:06:47 CET 2017 +#Thu Jul 23 05:42:16 CEST 2020 distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-4.1-all.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-6.5.1-all.zip diff --git a/tensorflow/examples/android/gradlew b/tensorflow/examples/android/gradlew old mode 100644 new mode 100755 From 2889981f87c199eb7fc7d8fe98357dd9cff0fd88 Mon Sep 17 00:00:00 2001 From: Hannes Achleitner Date: Thu, 23 Jul 2020 05:44:22 +0200 Subject: [PATCH 1127/2522] Android Studio 4.0 --- tensorflow/examples/android/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/examples/android/build.gradle b/tensorflow/examples/android/build.gradle index c499b935223..ab715c5ca39 100644 --- a/tensorflow/examples/android/build.gradle +++ b/tensorflow/examples/android/build.gradle @@ -29,7 +29,7 @@ buildscript { } dependencies { - classpath 'com.android.tools.build:gradle:3.3.1' + classpath 'com.android.tools.build:gradle:4.0.1' classpath 'org.apache.httpcomponents:httpclient:4.5.4' } } From ee74b70ee522c5490e7acc73c3349d62c36f634b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Jul 2020 20:45:35 -0700 Subject: [PATCH 1128/2522] Integrate LLVM at llvm/llvm-project@99ad956fdaee Updates LLVM usage to match [99ad956fdaee](https://github.com/llvm/llvm-project/commit/99ad956fdaee) PiperOrigin-RevId: 322713350 Change-Id: Ie7ac795ce9dde327a1bd41db99d352b83cbf0b1f --- tensorflow/workspace.bzl | 4 ++-- third_party/llvm/llvm.autogenerated.BUILD | 4 ++-- third_party/mlir/BUILD | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 2cce1bd0dfa..586bda6a3a2 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -711,8 +711,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "5623da56d07b2fa434825af0f3e8494afacf3c52" - LLVM_SHA256 = "dce02df09be24922304218e55fb5d4688ec835dcf7a9f9050d61cd70c0ed8706" + LLVM_COMMIT = "99ad956fdaee5398fdcf46fa49cb433cf52dc461" + LLVM_SHA256 = "e48f529063cc31e5f5844f7395fbecb0a3e9cba0bcbeafa40f5001273bad3c51" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), diff --git a/third_party/llvm/llvm.autogenerated.BUILD b/third_party/llvm/llvm.autogenerated.BUILD index fb1eff30e06..befc20c4fab 100644 --- a/third_party/llvm/llvm.autogenerated.BUILD +++ b/third_party/llvm/llvm.autogenerated.BUILD @@ -155,10 +155,10 @@ gentbl( name = "InstCombineTableGen", tbl_outs = [( "-gen-searchable-tables", - "lib/Transforms/InstCombine/InstCombineTables.inc", + "lib/Target/AMDGPU/InstCombineTables.inc", )], tblgen = ":llvm-tblgen", - td_file = "lib/Transforms/InstCombine/InstCombineTables.td", + td_file = "lib/Target/AMDGPU/InstCombineTables.td", td_srcs = glob([ "include/llvm/CodeGen/*.td", "include/llvm/IR/Intrinsics*.td", diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD index 12f73c54ec6..ae413a160d9 100644 --- a/third_party/mlir/BUILD +++ b/third_party/mlir/BUILD @@ -500,6 +500,7 @@ cc_library( deps = [ ":Affine", ":IR", + ":Support", "@llvm-project//llvm:Support", ], ) From 488448c742e99663a46946a40df477180b2c6935 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Wed, 22 Jul 2020 20:56:00 -0700 Subject: [PATCH 1129/2522] Wrap/rewrap ndarrays in tf.vectorized_map PiperOrigin-RevId: 322714372 Change-Id: I59a66d8d60674800df36712ab53902037642bf2f --- .../python/ops/numpy_ops/np_interop_test.py | 3 +-- .../ops/parallel_for/control_flow_ops.py | 23 ++++++++++++++++--- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/ops/numpy_ops/np_interop_test.py b/tensorflow/python/ops/numpy_ops/np_interop_test.py index 20b8a7118a5..ec350804e02 100644 --- a/tensorflow/python/ops/numpy_ops/np_interop_test.py +++ b/tensorflow/python/ops/numpy_ops/np_interop_test.py @@ -281,8 +281,7 @@ class InteropTest(tf.test.TestCase): a = np.ones((batch_size, 32, 32)) c = tf.vectorized_map(outer_product, a) - # # TODO(nareshmodi): vectorized_map doesn't rewrap tensors in ndarray. - # self.assertIsInstance(c, np.ndarray) + self.assertIsInstance(c, np.ndarray) self.assertEqual(c.shape, (batch_size, 32, 32, 32, 32)) def testJacobian(self): diff --git a/tensorflow/python/ops/parallel_for/control_flow_ops.py b/tensorflow/python/ops/parallel_for/control_flow_ops.py index 8507d9e30e3..deb41873347 100644 --- a/tensorflow/python/ops/parallel_for/control_flow_ops.py +++ b/tensorflow/python/ops/parallel_for/control_flow_ops.py @@ -31,6 +31,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import tensor_array_ops +from tensorflow.python.ops.numpy_ops import np_arrays from tensorflow.python.ops.parallel_for.pfor import PFor from tensorflow.python.ops.parallel_for.pfor import PForConfig from tensorflow.python.platform import tf_logging as logging @@ -246,6 +247,7 @@ def _pfor_impl(loop_fn, loop_fn_outputs = loop_fn(loop_var) # Convert outputs to Tensor if needed. + rewrap_as_ndarray = False tmp_loop_fn_outputs = [] for loop_fn_output in nest.flatten(loop_fn_outputs): if (loop_fn_output is not None and not isinstance( @@ -256,7 +258,12 @@ def _pfor_impl(loop_fn, " Alternatively, output the indices and values of the" " IndexedSlices separately, and handle the vectorized" " outputs directly." % loop_fn_output) - loop_fn_output = ops.convert_to_tensor(loop_fn_output) + loop_fn_output = ops.convert_to_tensor(loop_fn_output) + elif isinstance(loop_fn_output, np_arrays.ndarray): + loop_fn_output = loop_fn_output.data + rewrap_as_ndarray = True + else: + loop_fn_output = ops.convert_to_tensor(loop_fn_output) tmp_loop_fn_outputs.append(loop_fn_output) loop_fn_outputs = nest.pack_sequence_as(loop_fn_outputs, tmp_loop_fn_outputs) @@ -277,7 +284,10 @@ def _pfor_impl(loop_fn, pfor_config=pfor_config) outputs = [] for loop_fn_output in nest.flatten(loop_fn_outputs): - outputs.append(converter.convert(loop_fn_output)) + output = converter.convert(loop_fn_output) + if rewrap_as_ndarray: + output = np_arrays.tensor_to_ndarray(output) + outputs.append(output) return nest.pack_sequence_as(loop_fn_outputs, outputs) else: if pfor_config is not None and pfor_config._has_reductions(): # pylint: disable=protected-access @@ -294,7 +304,10 @@ def _pfor_impl(loop_fn, remaining_outputs = [] flattened_loop_fn_outputs = nest.flatten(loop_fn_outputs) for loop_fn_output in flattened_loop_fn_outputs: - remaining_outputs.append(converter.convert(loop_fn_output)) + output = converter.convert(loop_fn_output) + if rewrap_as_ndarray: + output = np_arrays.tensor_to_ndarray(output) + remaining_outputs.append(output) with ops.name_scope("pfor_tiled"): loop_fn_dtypes = [ops.convert_to_tensor(x).dtype @@ -329,6 +342,10 @@ def _pfor_impl(loop_fn, for x, y in zip(remaining_outputs, tiled_outputs)]) else: outputs = tiled_outputs + flattened_outputs = nest.flatten(outputs) + if rewrap_as_ndarray: + flattened_outputs = [ + np_arrays.tensor_to_ndarray(x) for x in flattened_outputs] return nest.pack_sequence_as(loop_fn_outputs, nest.flatten(outputs)) From 4771a3f138817966af3b268f2e1135080f89dc15 Mon Sep 17 00:00:00 2001 From: Hannes Achleitner Date: Thu, 23 Jul 2020 06:11:59 +0200 Subject: [PATCH 1130/2522] ignore .cxx directory --- tensorflow/examples/android/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/examples/android/.gitignore b/tensorflow/examples/android/.gitignore index d245ab61095..fbd0a2dc7d7 100644 --- a/tensorflow/examples/android/.gitignore +++ b/tensorflow/examples/android/.gitignore @@ -27,3 +27,4 @@ out/ .navigation/ /captures .externalNativeBuild +.cxx From 0efc20fc940777e35f56e94965857fcde31fe912 Mon Sep 17 00:00:00 2001 From: Hannes Achleitner Date: Thu, 23 Jul 2020 06:12:28 +0200 Subject: [PATCH 1131/2522] Update libraries --- tensorflow/examples/android/build.gradle | 2 +- tensorflow/examples/android/download-models.gradle | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/examples/android/build.gradle b/tensorflow/examples/android/build.gradle index ab715c5ca39..499f0141a06 100644 --- a/tensorflow/examples/android/build.gradle +++ b/tensorflow/examples/android/build.gradle @@ -30,7 +30,7 @@ buildscript { dependencies { classpath 'com.android.tools.build:gradle:4.0.1' - classpath 'org.apache.httpcomponents:httpclient:4.5.4' + classpath 'org.apache.httpcomponents:httpclient:4.5.12' } } diff --git a/tensorflow/examples/android/download-models.gradle b/tensorflow/examples/android/download-models.gradle index d3b67eab52b..727ef2cc850 100644 --- a/tensorflow/examples/android/download-models.gradle +++ b/tensorflow/examples/android/download-models.gradle @@ -23,7 +23,7 @@ buildscript { jcenter() } dependencies { - classpath 'de.undercouch:gradle-download-task:3.2.0' + classpath 'de.undercouch:gradle-download-task:4.0.2' } } From 8deeb07947a6ae4dfac5b6ee752b15cec78bee3e Mon Sep 17 00:00:00 2001 From: Hannes Achleitner Date: Thu, 23 Jul 2020 06:12:44 +0200 Subject: [PATCH 1132/2522] targetSdkVersion 29 --- tensorflow/examples/android/AndroidManifest.xml | 4 +++- tensorflow/examples/android/build.gradle | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tensorflow/examples/android/AndroidManifest.xml b/tensorflow/examples/android/AndroidManifest.xml index a3b53da6a35..01aea08708e 100644 --- a/tensorflow/examples/android/AndroidManifest.xml +++ b/tensorflow/examples/android/AndroidManifest.xml @@ -16,6 +16,7 @@ --> @@ -28,7 +29,8 @@ android:debuggable="true" android:label="@string/app_name" android:icon="@drawable/ic_launcher" - android:theme="@style/MaterialTheme"> + android:theme="@style/MaterialTheme" + android:banner="@drawable/ic_launcher"> Date: Thu, 23 Jul 2020 06:25:01 +0200 Subject: [PATCH 1133/2522] fix manifest lint error --- tensorflow/examples/android/AndroidManifest.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/examples/android/AndroidManifest.xml b/tensorflow/examples/android/AndroidManifest.xml index 01aea08708e..7ddf9e08bf4 100644 --- a/tensorflow/examples/android/AndroidManifest.xml +++ b/tensorflow/examples/android/AndroidManifest.xml @@ -26,7 +26,6 @@ Date: Wed, 22 Jul 2020 23:05:10 -0700 Subject: [PATCH 1134/2522] Cleanup of oneDNN build files and removing obsolete code. --- tensorflow/tensorflow.bzl | 2 +- third_party/mkl/BUILD | 9 --------- third_party/mkl/build_defs.bzl | 4 ---- third_party/mkl_dnn/mkldnn_v1.BUILD | 10 ++++++++-- 4 files changed, 9 insertions(+), 16 deletions(-) diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 349c1e1532b..278bf1abfef 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -328,7 +328,7 @@ def tf_copts( if_mkl(["-DINTEL_MKL=1", "-DEIGEN_USE_VML"]) + if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) + if_mkl_v1(["-DENABLE_MKLDNN_V1", "-DENABLE_INTEL_MKL_BFLOAT16"]) + - if_mkldnn_threadpool(["-DENABLE_MKLDNN_THREADPOOL", "-DINTEL_MKL_DNN_ONLY"]) + + if_mkldnn_threadpool(["-DENABLE_MKLDNN_THREADPOOL"]) + if_enable_mkl(["-DENABLE_MKL"]) + if_ngraph(["-DINTEL_NGRAPH=1"]) + if_android_arm(["-mfpu=neon"]) + diff --git a/third_party/mkl/BUILD b/third_party/mkl/BUILD index bbbec855ab7..470b3d50ea5 100644 --- a/third_party/mkl/BUILD +++ b/third_party/mkl/BUILD @@ -10,15 +10,6 @@ config_setting( visibility = ["//visibility:public"], ) -config_setting( - name = "build_with_mkl_ml_only", - define_values = { - "build_with_mkl": "true", - "build_with_mkl_ml_only": "true", - }, - visibility = ["//visibility:public"], -) - config_setting( name = "build_with_mkl_lnx_x64", define_values = { diff --git a/third_party/mkl/build_defs.bzl b/third_party/mkl/build_defs.bzl index c1ab9f29686..7708aa387d9 100644 --- a/third_party/mkl/build_defs.bzl +++ b/third_party/mkl/build_defs.bzl @@ -42,7 +42,6 @@ def if_mkl_ml(if_true, if_false = []): """ return select({ "@org_tensorflow//third_party/mkl_dnn:build_with_mkl_opensource": if_false, - "@org_tensorflow//third_party/mkl_dnn:build_with_mkldnn_threadpool": if_false, "//conditions:default": if_true, }) @@ -92,9 +91,6 @@ def mkl_deps(): return select({ "@org_tensorflow//third_party/mkl_dnn:build_with_mkl_dnn_only": ["@mkl_dnn"], "@org_tensorflow//third_party/mkl_dnn:build_with_mkl_dnn_v1_only": ["@mkl_dnn_v1//:mkl_dnn"], - "@org_tensorflow//third_party/mkl_dnn:build_with_mkldnn_threadpool": ["@mkl_dnn_v1//:mkl_dnn"], - "@org_tensorflow//third_party/mkl_dnn:build_with_mkl_opensource": ["@mkl_dnn_v1//:mkl_dnn"], - "@org_tensorflow//third_party/mkl:build_with_mkl_ml_only": ["@org_tensorflow//third_party/mkl:intel_binary_blob"], "@org_tensorflow//third_party/mkl:build_with_mkl": [ "@org_tensorflow//third_party/mkl:intel_binary_blob", "@mkl_dnn", diff --git a/third_party/mkl_dnn/mkldnn_v1.BUILD b/third_party/mkl_dnn/mkldnn_v1.BUILD index 438aa8dc03d..adbf1161781 100644 --- a/third_party/mkl_dnn/mkldnn_v1.BUILD +++ b/third_party/mkl_dnn/mkldnn_v1.BUILD @@ -6,6 +6,12 @@ load( "if_mkl_v1", "if_mkldnn_threadpool", ) + +load( + "@org_tensorflow//third_party/mkl:build_defs.bzl", + "if_mkl_ml", +) + load( "@org_tensorflow//third_party:common.bzl", "template_rule", @@ -109,9 +115,9 @@ cc_library( "src/cpu/xbyak", ], visibility = ["//visibility:public"], - deps = if_mkl_open_source_only( - [], + deps = if_mkl_ml( ["@org_tensorflow//third_party/mkl:intel_binary_blob"], + [], ), ) From f59ff5d44e15c421a170eaf7ec38b1a1cf2a97de Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Wed, 22 Jul 2020 23:12:08 -0700 Subject: [PATCH 1135/2522] [XLA:SPMD] Improve resharding 1. Fix collective-permute bug. Should not skip i -> i pairs since that would result in 0 in the output. 2. All-to-all sharding for divisible partition counts, i.e., from [4,2] to [2,4] can be done as a subgroup all-to-all, since 4 % 2 == 0 3. Multi-step all-to-all resharding. E.g., resharding from [16,8,1] to [1,16,8] can be done via an intermediate sharding to [16,1,8]. 4. Allow more ReshapeSharding cases. PiperOrigin-RevId: 322729324 Change-Id: Ica2cf164e3c2bd15953ce37d6223723501be5b87 --- .../compiler/xla/service/hlo_sharding_util.cc | 19 ++- .../xla/service/hlo_sharding_util_test.cc | 14 +++ .../xla/service/spmd/spmd_partitioner.cc | 110 ++++++++++++------ .../xla/service/spmd/spmd_partitioner.h | 5 +- .../xla/service/spmd/spmd_partitioner_test.cc | 50 ++++++++ .../xla/service/spmd/spmd_partitioner_util.cc | 94 +++++++++++---- .../xla/service/spmd/spmd_partitioner_util.h | 10 +- 7 files changed, 236 insertions(+), 66 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_sharding_util.cc b/tensorflow/compiler/xla/service/hlo_sharding_util.cc index 7fc05608800..11a24b30ac9 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding_util.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding_util.cc @@ -190,13 +190,22 @@ absl::optional ReshapeSharding(const Shape& source_shape, target_dims_stack.push_back(t_size); } else if (s_size > t_size) { // Dimension split. - if (s_size % t_size != 0 || t_size % s_partitions != 0) { + if (s_size % t_size != 0 || s_size % s_partitions != 0) { + return absl::nullopt; + } + if (t_size % s_partitions == 0) { + target_tile_assignment_dimensions.push_back(s_partitions); + // We have part of the s_size unprocessed, so put it back to stack. + source_dims_stack.push_back(s_size / t_size); + sharding_tile_dims_stack.push_back(1); + } else if (s_partitions % t_size == 0) { + target_tile_assignment_dimensions.push_back(t_size); + // We have part of the s_size unprocessed, so put it back to stack. + source_dims_stack.push_back(s_size / t_size); + sharding_tile_dims_stack.push_back(s_partitions / t_size); + } else { return absl::nullopt; } - target_tile_assignment_dimensions.push_back(s_partitions); - // We have part of the s_size unprocessed, so put it back to stack. - source_dims_stack.push_back(s_size / t_size); - sharding_tile_dims_stack.push_back(1); } else { // Dimension merge. Also merge the source dimension with the next, and // process it next time. diff --git a/tensorflow/compiler/xla/service/hlo_sharding_util_test.cc b/tensorflow/compiler/xla/service/hlo_sharding_util_test.cc index 02496c75965..08f136b2e45 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding_util_test.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding_util_test.cc @@ -76,6 +76,20 @@ TEST(HloShardingUtilTest, ReshapeShardingTiledSplit) { EXPECT_EQ(result.value(), output_sharding); } +TEST(HloShardingUtilTest, ReshapeShardingTiledSplit2) { + Shape input_shape = ShapeUtil::MakeShape(F32, {16, 7}); + Shape output_shape = ShapeUtil::MakeShape(F32, {4, 4, 7}); + Array2D tile(16, 1); + tile.FillIota(0); + HloSharding input_sharding = HloSharding::Tile(tile); + tile.Reshape({4, 4, 1}); + HloSharding output_sharding = HloSharding::Tile(tile); + absl::optional result = + ReshapeSharding(input_shape, output_shape, input_sharding); + EXPECT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), output_sharding); +} + TEST(HloShardingUtilTest, ReshapeShardingTiledSplitThenMerge) { Shape input_shape = ShapeUtil::MakeShape(F32, {16, 4, 7}); Shape output_shape = ShapeUtil::MakeShape(F32, {4, 16, 7}); diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc index ceead32f530..bac5c812814 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc @@ -267,8 +267,7 @@ PartitionedHlo PartitionedHlo::ReshardNoCache(const HloSharding& target) { if (auto src_tgt_dims = GetReshardAllToAllSourceTargetDims(sharding(), target)) { - return ReshardWithAllToAll(target, src_tgt_dims->first, - src_tgt_dims->second); + return ReshardWithAllToAll(target, *src_tgt_dims); } // If not replicated yet, first replicate and then reshard to use one of the @@ -734,40 +733,82 @@ PartitionedHlo PartitionedHlo::Broadcast() const { return PartitionedHlo(result, base_shape_, state_); } -PartitionedHlo PartitionedHlo::ReshardWithAllToAll(const HloSharding& target, - int64 source_dim, - int64 target_dim) const { - const int64 group_size = sharding().tile_assignment().dim(source_dim); - - // If the device order is different in the target, fix the order with - // ReshardWithCollectivePermute. - std::vector xpose_dims(target.tile_assignment().num_dimensions()); - std::iota(xpose_dims.begin(), xpose_dims.end(), 0); - xpose_dims[source_dim] = target_dim; - xpose_dims[target_dim] = source_dim; - auto input_sharding_fixed_device_order = - hlo_sharding_util::TransposeSharding(target, xpose_dims); - if (input_sharding_fixed_device_order != sharding()) { - auto fixed_order = - ReshardWithCollectivePermute(input_sharding_fixed_device_order); - return fixed_order.ReshardWithAllToAll(target, source_dim, target_dim); +PartitionedHlo PartitionedHlo::ReshardWithAllToAll( + const HloSharding& target, + absl::Span> source_target_dims) const { + if (source_target_dims.empty()) { + if (target == sharding()) { + return *this; + } + // If the device order is different in the target, fix the order with + // ReshardWithCollectivePermute. + return ReshardWithCollectivePermute(target); } - auto padded_hlo = - PadBaseShapeBeforeUnevenTiledSharding(hlo_, target, state_.b); + // Swap one pair of dimensions. + int64 source_dim = source_target_dims[0].first; + int64 target_dim = source_target_dims[0].second; + const int64 group_size = sharding().tile_assignment().dim(source_dim) / + sharding().tile_assignment().dim(target_dim); - // The order of ids in the group must follow the target sharding. - std::vector groups(target.tile_assignment().num_elements() / - group_size); - target.tile_assignment().Each( + auto temp_target_tile = sharding().tile_assignment(); + { + std::vector reshape_tile_dims(temp_target_tile.num_dimensions() + 2); + int64 i = 0; + int64 added_source_dim = -1; + int64 added_target_dim = -1; + for (int64 j = 0; j < temp_target_tile.num_dimensions(); ++j) { + if (source_dim == j) { + reshape_tile_dims[i] = temp_target_tile.dim(j) / group_size; + reshape_tile_dims[++i] = group_size; + added_source_dim = i; + } else if (target_dim == j) { + reshape_tile_dims[i] = temp_target_tile.dim(j); + reshape_tile_dims[++i] = 1; + added_target_dim = i; + } else { + reshape_tile_dims[i] = temp_target_tile.dim(j); + } + ++i; + } + temp_target_tile.Reshape(reshape_tile_dims); + std::vector xpose_dims(temp_target_tile.num_dimensions()); + std::iota(xpose_dims.begin(), xpose_dims.end(), 0); + xpose_dims[added_source_dim] = added_target_dim; + xpose_dims[added_target_dim] = added_source_dim; + temp_target_tile = hlo_sharding_util::TransposeSharding( + HloSharding::Tile(temp_target_tile), xpose_dims) + .tile_assignment(); + auto temp_target_tile_dims = sharding().tile_assignment().dimensions(); + temp_target_tile_dims[source_dim] = + sharding().tile_assignment().dim(target_dim); + temp_target_tile_dims[target_dim] = + sharding().tile_assignment().dim(source_dim); + temp_target_tile.Reshape(temp_target_tile_dims); + } + auto temp_target = HloSharding::Tile(temp_target_tile); + + auto padded_shape = hlo_->shape(); + padded_shape.set_dimensions( + target_dim, + RoundUpToNearest(padded_shape.dimensions(target_dim), + temp_target.tile_assignment().dim(target_dim))); + auto padded_hlo = PadToShape(hlo_, padded_shape, state_.b); + + // The order of ids in the group must follow the temp_target sharding. + std::vector groups( + temp_target.tile_assignment().num_elements() / group_size); + temp_target.tile_assignment().Each( [&](absl::Span indices, int64 device) { int64 group_id = 0; for (int64 dim = 0; dim < indices.size(); ++dim) { if (dim == target_dim) { - continue; + group_id *= temp_target.tile_assignment().dim(dim) / group_size; + group_id += indices[dim] / group_size; + } else { + group_id *= temp_target.tile_assignment().dim(dim); + group_id += indices[dim]; } - group_id *= target.tile_assignment().dim(dim); - group_id += indices[dim]; } groups[group_id].add_replica_ids(device); }); @@ -819,14 +860,17 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll(const HloSharding& target, result = state_.b->AddInstruction( HloInstruction::CreateReshape(new_shape, transpose)); - const Shape result_shape = MakePartitionedShape(base_shape_, target); + const Shape result_shape = MakePartitionedShape(base_shape_, temp_target); if (result_shape != result->shape()) { result = state_.b->AddInstruction(HloInstruction::CreateSlice( result_shape, result, std::vector(result_shape.rank(), 0), result_shape.dimensions(), std::vector(result_shape.rank(), 1))); } - result->set_sharding(target); - return PartitionedHlo(result, base_shape_, state_); + result->set_sharding(temp_target); + auto remaining_source_target_dims = source_target_dims; + remaining_source_target_dims.remove_prefix(1); + return PartitionedHlo(result, base_shape_, state_) + .ReshardWithAllToAll(target, remaining_source_target_dims); } PartitionedHlo PartitionedHlo::ReshardWithCollectivePermute( @@ -837,9 +881,7 @@ PartitionedHlo PartitionedHlo::ReshardWithCollectivePermute( sharding().tile_assignment().Each( [&](absl::Span indices, int64 src_device) { int64 dst_device = target.tile_assignment()(indices); - if (dst_device != src_device) { - src_dst_pairs.emplace_back(src_device, dst_device); - } + src_dst_pairs.emplace_back(src_device, dst_device); }); auto cp = state_.collective_ops_creator.create_cross_partition_collective_permute( diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h index 40881b4b91c..606a7ae5f14 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h @@ -284,8 +284,9 @@ class PartitionedHlo { // Helper function to reshard the tensor using AllToAll (instead of the // default of Replicate followed by Slice). - PartitionedHlo ReshardWithAllToAll(const HloSharding& target, - int64 source_dim, int64 target_dim) const; + PartitionedHlo ReshardWithAllToAll( + const HloSharding& target, + absl::Span> source_target_dims) const; // Helper function to reshard the tensor using CollectivePermute. PartitionedHlo ReshardWithCollectivePermute(const HloSharding& target) const; diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc index 5b6c869c5fa..1045d1187b8 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc @@ -3792,6 +3792,56 @@ ENTRY entry { 4); } +TEST_F(SpmdPartitioningTest, SubgroupAllToAllReshard2) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %param0 = f32[8,8] parameter(0), + sharding={devices=[2,4]0,1,2,3,4,5,6,7} + ROOT %copy = f32[8,8] copy(%param0), + sharding={devices=[4,2]0,1,4,5,2,3,6,7} +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/8)); + VLOG(1) << module->ToString(); + + auto root = module->entry_computation()->root_instruction(); + auto all_to_all = op::AllToAll( + AllOf(op::Shape("f32[2,2,2]"), op::Reshape(op::Parameter(0)))); + auto reshape = + AllOf(op::Shape("f32[2,4]"), op::Reshape(op::Transpose(all_to_all))); + EXPECT_THAT(root, op::Copy(op::CollectivePermute(reshape))); +} + +TEST_F(SpmdPartitioningTest, SubgroupAllToAllReshard3) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %param0 = f32[8,8,8] parameter(0), + sharding={devices=[2,4,1]0,1,2,3,4,5,6,7} + ROOT %copy = f32[8,8,8] copy(%param0), + sharding={devices=[1,2,4]0,1,4,5,2,3,6,7} +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/8)); + VLOG(1) << module->ToString(); + + auto root = module->entry_computation()->root_instruction(); + auto all_to_all = op::AllToAll( + AllOf(op::Shape("f32[4,2,4,2]"), op::Reshape(op::Parameter(0)))); + auto reshape = + AllOf(op::Shape("f32[4,8,2]"), op::Reshape(op::Transpose(all_to_all))); + auto all_to_all2 = + op::AllToAll(AllOf(op::Shape("f32[4,2,4,2]"), op::Reshape(reshape))); + auto reshape2 = + AllOf(op::Shape("f32[8,4,2]"), op::Reshape(op::Transpose(all_to_all2))); + EXPECT_THAT(root, op::Copy(op::CollectivePermute(reshape2))); +} + } // namespace } // namespace spmd } // namespace xla diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc index 65f066db629..6beed5a15e5 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc @@ -885,37 +885,89 @@ int64 ShardCountAtDim(const HloSharding& sharding, int64 dim) { return sharding.tile_assignment().dim(dim); } -absl::optional> GetReshardAllToAllSourceTargetDims( - const HloSharding& source, const HloSharding& target) { +absl::optional>> +GetReshardAllToAllSourceTargetDims(const HloSharding& source, + const HloSharding& target) { if (source.IsTileMaximal() || target.IsTileMaximal() || source.tile_assignment().num_dimensions() != target.tile_assignment().num_dimensions()) { return absl::nullopt; } - int64 source_dim = -1; - int64 target_dim = -1; + // Record partition count to index for indices that have different partition + // counts on source and target. + std::map> source_size_to_dim; + std::map> target_size_to_dim; for (int64 i = 0; i < source.tile_assignment().num_dimensions(); ++i) { - if (source.tile_assignment().dim(i) > 1 && - target.tile_assignment().dim(i) == 1) { - if (source_dim != -1) { - return absl::nullopt; - } - source_dim = i; - } else if (source.tile_assignment().dim(i) == 1 && - target.tile_assignment().dim(i) > 1) { - if (target_dim != -1) { - return absl::nullopt; - } - target_dim = i; - } else if (source.tile_assignment().dim(i) != - target.tile_assignment().dim(i)) { + if (source.tile_assignment().dim(i) == target.tile_assignment().dim(i)) { + continue; + } + source_size_to_dim[source.tile_assignment().dim(i)].push_back(i); + target_size_to_dim[target.tile_assignment().dim(i)].push_back(i); + } + // In order to shard via AllToAll, source_size_to_dim and target_size_to_dim + // must have the same distribution. + if (source_size_to_dim.empty() || + source_size_to_dim.size() != target_size_to_dim.size()) { + return absl::nullopt; + } + for (const auto& entry : source_size_to_dim) { + auto target_it = target_size_to_dim.find(entry.first); + if (target_it == target_size_to_dim.end() || + target_it->second.size() != entry.second.size()) { return absl::nullopt; } } - if (source_dim == -1 || target_dim == -1 || source_dim == target_dim) { - return absl::nullopt; + std::vector> result; + auto remove_entry = [](int64 size, int64 dim, + std::map>& size_to_dim) { + size_to_dim[size].erase( + std::remove_if(size_to_dim[size].begin(), size_to_dim[size].end(), + [dim](int64 a) { return a == dim; }), + size_to_dim[size].end()); + if (size_to_dim[size].empty()) { + size_to_dim.erase(size); + } + }; + // Find one pair of dimensions to swap at a time. + while (!source_size_to_dim.empty()) { + int64 source_size = source_size_to_dim.begin()->first; + int64 i = source_size_to_dim.begin()->second.back(); + int64 target_i_size = target.tile_assignment().dim(i); + if (target_i_size == source_size) { + remove_entry(source_size, i, source_size_to_dim); + remove_entry(source_size, i, target_size_to_dim); + continue; + } + auto j_it = source_size_to_dim[target_i_size].begin(); + int64 j = *j_it; + if (source_size == 1) { + // If possible, find a j where the target partition count is not one, so + // that when we swap, the resulting size-1 dimension will still be useful + // to other dimensions. + while (target.tile_assignment().dim(j) == 1) { + if (++j_it == source_size_to_dim[target_i_size].end()) { + break; + } + j = *j_it; + } + } else if (target_i_size % source_size == 0) { + // If possible, find a j where the target partition count is source_size, + // so that we can do a single swap. + while (target.tile_assignment().dim(j) != source_size) { + if (++j_it == source_size_to_dim[target_i_size].end()) { + break; + } + j = *j_it; + } + } else { + return absl::nullopt; + } + result.emplace_back(j, i); + remove_entry(target_i_size, i, target_size_to_dim); + source_size_to_dim.begin()->second.back() = j; + remove_entry(target_i_size, j, source_size_to_dim); } - return std::pair(source_dim, target_dim); + return result; } bool CanReshardWithCollectivePermute(const HloSharding& source, diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h index d924a5c7151..7b737daf78c 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h @@ -265,10 +265,12 @@ HloInstruction* SliceFirstK(HloInstruction* hlo, SpmdBuilder* builder, // Check if a dimension is sharded. int64 ShardCountAtDim(const HloSharding& sharding, int64 dim); -// Returns the pair of source and target dimensions is the resharding can be -// done via all-to-all. -absl::optional> GetReshardAllToAllSourceTargetDims( - const HloSharding& source, const HloSharding& target); +// Returns the list of source-target pairs of dimensions to swap during +// resharding via all-to-all. Reshard can be done by swapping each pair at a +// time. +absl::optional>> +GetReshardAllToAllSourceTargetDims(const HloSharding& source, + const HloSharding& target); // Returns whether the resharding can be done via collective-permute. bool CanReshardWithCollectivePermute(const HloSharding& source, From e543b6842abe399d0b8bf967aa3a3f72ce2b7e19 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 22 Jul 2020 23:22:16 -0700 Subject: [PATCH 1136/2522] Add module comments for tf.experimental.numpy. Point Github README.md to tensorflow.org API documentation. PiperOrigin-RevId: 322730187 Change-Id: I2f192d3f132798cbfc743069a995b49491d026a4 --- tensorflow/python/ops/numpy_ops/README.md | 147 +------------------ tensorflow/python/ops/numpy_ops/__init__.py | 148 +++++++++++++++++++- 2 files changed, 148 insertions(+), 147 deletions(-) diff --git a/tensorflow/python/ops/numpy_ops/README.md b/tensorflow/python/ops/numpy_ops/README.md index 111de75044f..c12f0670a21 100644 --- a/tensorflow/python/ops/numpy_ops/README.md +++ b/tensorflow/python/ops/numpy_ops/README.md @@ -1,144 +1,5 @@ -# NumPy API on TensorFlow +This module implements `tf.experimental.numpy` APIs, which provide NumPy APIs +implemented on top of TensorFlow. -This module provides a subset of NumPy API, built on top of TensorFlow -operations. APIs are based on and have been tested with NumPy 1.16 version. - -The set of supported APIs may be expanded over time. Also future releases may -change the baseline version of NumPy API being supported. A list of some -systematic differences with NumPy are listed later in the "Differences with -NumPy" section. - -## Getting Started - -```python -import tensorflow as tf -from tensorflow.python.ops import numpy_ops as np -print(np.ones([2,1]) + np.ones([1, 2])) -``` - -## Types - -The module provides an `ndarray` class which wraps an immutable `tf.Tensor`. -Additional functions are provided which accept array-like objects. Here -array-like objects includes `ndarrays` as defined by this module, as well as -`tf.Tensor`, in addition to types accepted by NumPy. - -A subset of NumPy dtypes are supported. Type promotion follows NumPy -semantics. - -```python -print(np.ones([1, 2], dtype=np.int16) + np.ones([2, 1], dtype=np.uint8)) -``` - -## Array Interface - -The `ndarray` class implements the `__array__` interface. This should allow -these objects to be passed into contexts that expect a NumPy or array-like -object (e.g. matplotlib). - -```python -import numpy as onp -onp.sum(np.ones([1, 2]) + onp.ones([2, 1])) -``` - - -## TF Interoperability - -The TF-NumPy API calls can be interleaved with TensorFlow calls -without incurring Tensor data copies. This is true even if the `ndarray` or -`tf.Tensor` is placed on a non-CPU device. - -In general, the expected behavior should be on par with that of code involving -`tf.Tensor` and running stateless TensorFlow functions on them. - -```python -np.sum(np.ones([1, 2]) + tf.ones([2, 1])) -``` - -Note that the `__array_priority__` is currently chosen to be lower than -`tf.Tensor`. Hence the `+` operator above returns a `tf.Tensor`. - -Additional examples of interopability include: - -* using `with tf.GradientTape()` scope to compute gradients through the - TF-NumPy API calls. -* using `tf.distribution.Strategy` scope for distributed execution -* using `tf.vectorized_map()` for speeding up code using auto-vectorization - - - -## Device Support - -Given that `ndarray` and functions wrap TensorFlow constructs, the code will -have GPU and TPU support on par with TensorFlow. Device placement can be -controlled by using `with tf.device` scopes. Note that these devices could -be local or remote. - -```python -with tf.device("GPU:0"): - x = np.ones([1, 2]) -print(tf.convert_to_tensor(x).device) -``` - -## Graph and Eager Modes - -Eager mode execution should typically match NumPy semantics of executing -op-by-op. However the same code can be executed in graph mode, by putting it -inside a `tf.function`. The function body can contain NumPy code, and the inputs -can be `ndarray` as well. - -```python -@tf.function -def f(x, y): - return np.sum(x + y) - -f(np.ones([1, 2]), tf.ones([2, 1])) -``` -Python control flow based on `ndarray` values will be translated by -[autograph](https://www.tensorflow.org/code/tensorflow/python/autograph/g3doc/reference/index.md) -into `tf.cond` and `tf.while_loop` constructs. The code can be XLA compiled -for further optimizations. - -However, note that graph mode execution can change behavior of certain -operations since symbolic execution may not have information that is computed -during runtime. Some differences are: - -* Shapes can be incomplete or unknown in graph mode. This means that - `ndarray.shape`, `ndarray.size` and `ndarray.ndim` can return `ndarray` - objects instead of returning integer (or tuple of integer) values. -* `__len__`, `__iter__` and `__index__` properties of `ndarray` - may similarly not be supported in graph mode. Code using these - may need to change to explicit shape operations or control flow - constructs. -* Also note the [autograph limitations](https://www.tensorflow.org/code/tensorflow/python/autograph/g3doc/reference/limitations.md). - - -## Mutation and Variables - -`ndarrays` currently wrap immutable `tf.Tensor`. Hence mutation -operations like slice assigns are not supported. This may change in the future. -Note however that one can directly construct a `tf.Variable` and use that with -the TF-NumPy APIs. - -```python -tf_var = tf.Variable(2.0) -tf_var.assign_add(np.square(tf_var)) -``` - -## Differences with NumPy - -Here is a non-exhaustive list of differences: - -* Not all dtypes are currently supported. e.g. `np.float96`, `np.float128`. - `np.object`, `np.str`, `np.recarray` types are not supported. -* `ndarray` storage is in C order only. Fortran order, views, `stride_tricks` - are not supported. -* Only a subset of functions and modules are supported. This set will be - expanded over time. For supported functions, some arguments or argument - values may not be supported. This differences are generally provide in the - function comments. Full `ufunc` support is also not provided. -* Buffer mutation is currently not supported. `ndarrays` wrap immutable - tensors. This means that output buffer arguments (e..g `out` in ufuncs) are - not supported -* NumPy C API is not supported. NumPy's Cython and Swig integration are not - supported. +Please see [TensorFlow NumPy API +Documentation](https://www.tensorflow.org/api_docs/python/tf/experimental/numpy). diff --git a/tensorflow/python/ops/numpy_ops/__init__.py b/tensorflow/python/ops/numpy_ops/__init__.py index aa45b78946e..d2b1264b752 100644 --- a/tensorflow/python/ops/numpy_ops/__init__.py +++ b/tensorflow/python/ops/numpy_ops/__init__.py @@ -12,11 +12,151 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""tf.experimental.numpy: Numpy API on top of TensorFlow. +"""# tf.experimental.numpy: NumPy API on TensorFlow. -This module provides a subset of numpy APIs, built on top of TensorFlow -operations. Please see documentation here: -https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/ops/numpy_ops/README.md +This module provides a subset of NumPy API, built on top of TensorFlow +operations. APIs are based on and have been tested with NumPy 1.16 version. + +The set of supported APIs may be expanded over time. Also future releases may +change the baseline version of NumPy API being supported. A list of some +systematic differences with NumPy are listed later in the "Differences with +NumPy" section. + +## Getting Started + +In the code snippets below, we will assume that `tf.experimental.numpy` is +imported as `tnp` and NumPy is imported as `np` + +```python +print(tnp.ones([2,1]) + tnp.ones([1, 2])) +``` + +## Types + +The module provides an `ndarray` class which wraps an immutable `tf.Tensor`. +Additional functions are provided which accept array-like objects. Here +array-like objects includes `ndarrays` as defined by this module, as well as +`tf.Tensor`, in addition to types accepted by NumPy. + +A subset of NumPy dtypes are supported. Type promotion follows NumPy +semantics. + +```python +print(tnp.ones([1, 2], dtype=tnp.int16) + tnp.ones([2, 1], dtype=tnp.uint8)) +``` + +## Array Interface + +The `ndarray` class implements the `__array__` interface. This should allow +these objects to be passed into contexts that expect a NumPy or array-like +object (e.g. matplotlib). + +```python +np.sum(tnp.ones([1, 2]) + np.ones([2, 1])) +``` + + +## TF Interoperability + +The TF-NumPy API calls can be interleaved with TensorFlow calls +without incurring Tensor data copies. This is true even if the `ndarray` or +`tf.Tensor` is placed on a non-CPU device. + +In general, the expected behavior should be on par with that of code involving +`tf.Tensor` and running stateless TensorFlow functions on them. + +```python +tnp.sum(tnp.ones([1, 2]) + tf.ones([2, 1])) +``` + +Note that the `__array_priority__` is currently chosen to be lower than +`tf.Tensor`. Hence the `+` operator above returns a `tf.Tensor`. + +Additional examples of interopability include: + +* using `with tf.GradientTape()` scope to compute gradients through the + TF-NumPy API calls. +* using `tf.distribution.Strategy` scope for distributed execution +* using `tf.vectorized_map()` for speeding up code using auto-vectorization + + + +## Device Support + +Given that `ndarray` and functions wrap TensorFlow constructs, the code will +have GPU and TPU support on par with TensorFlow. Device placement can be +controlled by using `with tf.device` scopes. Note that these devices could +be local or remote. + +```python +with tf.device("GPU:0"): + x = tnp.ones([1, 2]) +print(tf.convert_to_tensor(x).device) +``` + +## Graph and Eager Modes + +Eager mode execution should typically match NumPy semantics of executing +op-by-op. However the same code can be executed in graph mode, by putting it +inside a `tf.function`. The function body can contain NumPy code, and the inputs +can be `ndarray` as well. + +```python +@tf.function +def f(x, y): + return tnp.sum(x + y) + +f(tnp.ones([1, 2]), tf.ones([2, 1])) +``` +Python control flow based on `ndarray` values will be translated by +[autograph](https://www.tensorflow.org/code/tensorflow/python/autograph/g3doc/reference/index.md) +into `tf.cond` and `tf.while_loop` constructs. The code can be XLA compiled +for further optimizations. + +However, note that graph mode execution can change behavior of certain +operations since symbolic execution may not have information that is computed +during runtime. Some differences are: + +* Shapes can be incomplete or unknown in graph mode. This means that + `ndarray.shape`, `ndarray.size` and `ndarray.ndim` can return `ndarray` + objects instead of returning integer (or tuple of integer) values. +* `__len__`, `__iter__` and `__index__` properties of `ndarray` + may similarly not be supported in graph mode. Code using these + may need to change to explicit shape operations or control flow + constructs. +* Also note the [autograph limitations]( +https://www.tensorflow.org/code/tensorflow/python/autograph/g3doc/reference/limitations.md). + + +## Mutation and Variables + +`ndarrays` currently wrap immutable `tf.Tensor`. Hence mutation +operations like slice assigns are not supported. This may change in the future. +Note however that one can directly construct a `tf.Variable` and use that with +the TF-NumPy APIs. + +```python +tf_var = tf.Variable(2.0) +tf_var.assign_add(tnp.square(tf_var)) +``` + +## Differences with NumPy + +Here is a non-exhaustive list of differences: + +* Not all dtypes are currently supported. e.g. `np.float96`, `np.float128`. + `np.object`, `np.str`, `np.recarray` types are not supported. +* `ndarray` storage is in C order only. Fortran order, views, `stride_tricks` + are not supported. +* Only a subset of functions and modules are supported. This set will be + expanded over time. For supported functions, some arguments or argument + values may not be supported. This differences are generally provide in the + function comments. Full `ufunc` support is also not provided. +* Buffer mutation is currently not supported. `ndarrays` wrap immutable + tensors. This means that output buffer arguments (e..g `out` in ufuncs) are + not supported +* NumPy C API is not supported. NumPy's Cython and Swig integration are not + supported. """ # TODO(wangpeng): Append `np_export`ed symbols to the comments above. From 537f3ec52f5e12b89294c051bf6ab3f71e3cadbd Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Thu, 23 Jul 2020 00:07:13 -0700 Subject: [PATCH 1137/2522] Support conversion of tf.text ops to Flex ops PiperOrigin-RevId: 322733983 Change-Id: Ie32a3912e7575ff84318de8e6aa2d087eaac8fbe --- tensorflow/lite/delegates/flex/BUILD | 11 +++++++ .../delegates/flex/allowlisted_flex_ops.cc | 31 ++++++++++++++++++- .../flex/allowlisted_flex_ops_internal.h | 3 ++ .../flex/allowlisted_flex_ops_test.cc | 10 ++++++ 4 files changed, 54 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/delegates/flex/BUILD b/tensorflow/lite/delegates/flex/BUILD index 7039a9fa952..a6d71881a3d 100644 --- a/tensorflow/lite/delegates/flex/BUILD +++ b/tensorflow/lite/delegates/flex/BUILD @@ -239,6 +239,17 @@ cc_library( "allowlisted_flex_ops.h", "allowlisted_flex_ops_internal.h", ], + deps = select({ + "//tensorflow:android": [ + "//tensorflow/core:portable_tensorflow_lib_lite", + ], + "//tensorflow:ios": [ + "//tensorflow/core:portable_tensorflow_lib_lite", + ], + "//conditions:default": [ + "//tensorflow/core:framework", + ], + }), ) tf_cc_test( diff --git a/tensorflow/lite/delegates/flex/allowlisted_flex_ops.cc b/tensorflow/lite/delegates/flex/allowlisted_flex_ops.cc index b8cc4ca56fe..e06410485ba 100644 --- a/tensorflow/lite/delegates/flex/allowlisted_flex_ops.cc +++ b/tensorflow/lite/delegates/flex/allowlisted_flex_ops.cc @@ -16,6 +16,7 @@ limitations under the License. #include +#include "tensorflow/core/framework/op.h" #include "tensorflow/lite/delegates/flex/allowlisted_flex_ops_internal.h" namespace tflite { @@ -547,8 +548,36 @@ const std::set& GetFlexAllowlist() { // NOLINTNEXTLINE } +// Allow the tf.text ops if they are registered in the global op registry. +bool IsAllowedTFTextOpForFlex(const std::string& op_name) { + static const std::set* tftext_flex_ops = + new std::set({ + "CaseFoldUTF8", + "ConstrainedSequence", + "MaxSpanningTree", + "NormalizeUTF8", + "NormalizeUTF8WithOffsetsMap", + "RegexSplitWithOffsets", + "RougeL", + "SentenceFragments", + "SentencepieceOp", + "SentencepieceTokenizeOp", + "SentencepieceTokenizeWithOffsetsOp", + "SentencepieceDetokenizeOp", + "SentencepieceVocabSizeOp", + "SplitMergeTokenizeWithOffsets", + "UnicodeScriptTokenizeWithOffsets", + "WhitespaceTokenizeWithOffsets", + "WordpieceTokenizeWithOffsets", + }); + if (tftext_flex_ops->count(op_name) == 0) return false; + return tensorflow::OpRegistry::Global()->LookUp(op_name) != nullptr; +} + bool IsAllowlistedFlexOp(const std::string& tensorflow_op_name) { - return GetFlexAllowlist().count(tensorflow_op_name) != 0; + if (GetFlexAllowlist().count(tensorflow_op_name) != 0) return true; + // Check if the op is an allowlisted tf.text op. + return IsAllowedTFTextOpForFlex(tensorflow_op_name); } } // namespace flex diff --git a/tensorflow/lite/delegates/flex/allowlisted_flex_ops_internal.h b/tensorflow/lite/delegates/flex/allowlisted_flex_ops_internal.h index 8ecb7e4dc99..59392ad2a58 100644 --- a/tensorflow/lite/delegates/flex/allowlisted_flex_ops_internal.h +++ b/tensorflow/lite/delegates/flex/allowlisted_flex_ops_internal.h @@ -24,6 +24,9 @@ namespace flex { // Return the list of allowlisted flex ops. const std::set& GetFlexAllowlist(); +// Return true if op_name is a tf.text op need to be supported by flex delegate. +bool IsAllowedTFTextOpForFlex(const std::string& op_name); + } // namespace flex } // namespace tflite diff --git a/tensorflow/lite/delegates/flex/allowlisted_flex_ops_test.cc b/tensorflow/lite/delegates/flex/allowlisted_flex_ops_test.cc index 67cb07769a8..2e0ced9662b 100644 --- a/tensorflow/lite/delegates/flex/allowlisted_flex_ops_test.cc +++ b/tensorflow/lite/delegates/flex/allowlisted_flex_ops_test.cc @@ -52,6 +52,16 @@ TEST(AllowlistedFlexOpsTest, EveryOpHasKernel) { << "but its kernel is not found."; } } + +TEST(TfTextUtilsTest, TestFlexOpAllowed) { + // Expect false since ConstrainedSequence kernel is not registered. + EXPECT_FALSE(IsAllowedTFTextOpForFlex("ConstrainedSequence")); +} + +TEST(TfTextUtilsTest, TestFlexOpNotAllowed) { + EXPECT_FALSE(IsAllowedTFTextOpForFlex("ngrams")); +} + } // namespace flex } // namespace tflite From d309d1a2f3de12f16afa6d1b33031814428c8b70 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 23 Jul 2020 02:02:12 -0700 Subject: [PATCH 1138/2522] Update GraphDef version to 471. PiperOrigin-RevId: 322745286 Change-Id: I82e0e1a4ec1dda9ea151d54f568b40d94c414ff0 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 2bd7ef2307d..a5f5593c7aa 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 470 // Updated: 2020/7/22 +#define TF_GRAPH_DEF_VERSION 471 // Updated: 2020/7/23 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 289b2220395d0a7e5900722a327bb74ac130166b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 23 Jul 2020 02:02:13 -0700 Subject: [PATCH 1139/2522] compat: Update forward compatibility horizon to 2020-07-23 PiperOrigin-RevId: 322745288 Change-Id: Ia7b7d07eab6dfecee5c76e53472944d04fd676b4 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 1f0dc8d3c02..ed32f4cb277 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 22) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 23) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 60c6a583399421e74aeca7bcac6e3fff3fa6917c Mon Sep 17 00:00:00 2001 From: Jaesung Chung Date: Thu, 23 Jul 2020 02:25:04 -0700 Subject: [PATCH 1140/2522] Add new pattern matching rules for TFL Broadcastable ops, which have high dimenion inputs. - Low dimensions <= 4, for example, should avoid adding BroadcastTo op as usual, which means we need to lower broadcast-able ops as before without BroadcastTo op and lower the BroadcastTo op to the hardware accelerator supported ops as well. - This explicit BroadcastTo op needs to be inserted only when a higher dimension is needed, which will unlock the new opportunity. - There are the broadcast-able 20 TFLite ops (about 15 % of TFLite op set) as the followings: Comparison: LessEqual, GreaterEqual, NotEqual, Greater, Less, Equal (up to four dim.) Activation: PRelu (up to four dim.) Arithmetic: Add, Mul, Div, Sub, FloorDiv, FloorMod, Pow, Maximum, Minimum, SquaredDifference (up to four or five dim.) Dimension: SelectV2 (up to four dim.), BroadcastTo (supported via lowering) PiperOrigin-RevId: 322747836 Change-Id: Ife6563fd677e13b23f985bddbccb515e4b3354ea --- tensorflow/compiler/mlir/lite/ir/tfl_ops.cc | 29 +-- .../compiler/mlir/lite/tests/legalize-tf.mlir | 183 ++++++++++++++++- .../mlir/lite/transforms/legalize_tf.cc | 186 +++++++++++++++++- tensorflow/lite/testing/op_tests/binary_op.py | 13 ++ tensorflow/lite/testing/op_tests/where.py | 10 + 5 files changed, 379 insertions(+), 42 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc index 427b9c692a7..99894ede5f0 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc @@ -147,18 +147,10 @@ bool IsI64Type(Type element_type) { bool VerifyAddOpShapeConstraints(AddOp op) { auto element_type = getElementTypeOrSelf(op.output().getType()); - // Allows F32, QI8, and QUI8 outputs when the operands have valid shapes, + // Allows F32, QI8, QUI8 and I32 outputs when the operands have valid shapes, // which are broadcastable shapes up to five dimension or have same shapes. if (element_type.isF32() || IsQI8Type(element_type) || - IsQUI8Type(element_type)) { - return VerifyOperandsHaveSameShapesOrBroadcastableShape( - /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, - /*max_bcast_rank=*/5); - } - - // Allows I32 output when the operands have valid shapes, which are - // broadcastable shapes up to four dimension or have same shapes. - if (IsI32Type(element_type)) { + IsQUI8Type(element_type) || IsI32Type(element_type)) { return VerifyOperandsHaveSameShapesOrBroadcastableShape( /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, /*max_bcast_rank=*/4); @@ -210,20 +202,13 @@ bool VerifyMulOpShapeConstraints(MulOp op) { } return VerifyOperandsHaveSameShapesOrBroadcastableShape( /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, - /*max_bcast_rank=*/5); + /*max_bcast_rank=*/4); } - // Allows F32 output when the operands have valid shapes, which are - // broadcastable shapes up to five dimension or have same shapes. - if (element_type.isF32()) { - return VerifyOperandsHaveSameShapesOrBroadcastableShape( - /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, - /*max_bcast_rank=*/5); - } - - // Allows I32 and QI16 outputs when the operands have valid shapes, which are - // broadcastable shapes up to four dimension or have same shapes. - if (IsI32Type(element_type) || IsQI16Type(element_type)) { + // Allows I32, QI16 and F32 outputs when the operands have valid shapes, which + // are broadcastable shapes up to four dimension or have same shapes. + if (IsI32Type(element_type) || IsQI16Type(element_type) || + element_type.isF32()) { return VerifyOperandsHaveSameShapesOrBroadcastableShape( /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, /*max_bcast_rank=*/4); diff --git a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir index 74a33817d32..a5174e7c438 100644 --- a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir @@ -25,13 +25,6 @@ func @testAddHighDimsHaveSameShape(%arg0: tensor<1x2x3x4x5x6x7x8xi32>, %arg1: te return %0 : tensor<1x2x3x4x5x6x7x8xi32> } -// CHECK-LABEL: testAddTooHighBroadcastableDims -func @testAddTooHighBroadcastableDims(%arg0: tensor<1x2x3x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // expected-error @+1 {{'tfl.add' op failed to verify that operand #0 and operand #1 have the same shape or broadcastable shapes within the rank 4}} - %0 = "tf.Add"(%arg0, %arg1) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - func @LeakyRelu(%arg0: tensor<1xf32>) -> tensor<1xf32> { %2 = "tf.LeakyRelu"(%arg0) {alpha = 0.1 : f32} : (tensor<1xf32>) -> tensor<1xf32> return %2: tensor<1xf32> @@ -1530,7 +1523,11 @@ func @select_v2_with_6d_broadcasting(%arg0: tensor<1x1x1x1x3x1xi1>, %arg1 : tens %0 = "tf.SelectV2"(%arg0, %arg1, %arg2): (tensor<1x1x1x1x3x1xi1>, tensor<1x1x1x1x1x4xf32>, tensor<1x1x1x2x1x1xf32>) -> tensor<1x1x1x2x3x4xf32> return %0 : tensor<1x1x1x2x3x4xf32> // CHECK-LABEL: select_v2_with_6d_broadcasting -// CHECK: "tf.SelectV2"(%arg0, %arg1, %arg2) +// CHECK: [[CST:%.*]] = constant dense<[1, 1, 1, 2, 3, 4]> : tensor<6xi64> +// CHECK: [[BCT:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) +// CHECK: [[BCT_0:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) +// CHECK: [[BCT_1:%.*]] = "tfl.broadcast_to"(%arg2, [[CST]]) +// CHECK: "tfl.select"([[BCT]], [[BCT_0]], [[BCT_1]]) } // ----- @@ -1540,7 +1537,9 @@ func @maximum_with_6d_broadcasting(%arg0: tensor<1x1x1x1x8x16xf32>, %arg1: tenso return %0 : tensor<1x1x1x1x8x16xf32> // CHECK-LABEL: maximum_with_6d_broadcasting -// CHECK: "tf.Maximum"(%arg0, %arg1) +// CHECK: [[CST:%.*]] = constant dense<[1, 1, 1, 1, 8, 16]> : tensor<6xi64> +// CHECK: [[BCT:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) +// CHECK: "tfl.maximum"(%arg0, [[BCT]]) } // ----- @@ -1549,5 +1548,169 @@ func @add_with_int32_5d_inputs(%arg0: tensor<1x1x1x3x1xi32>, %arg1 : tensor<1x1x %0 = "tf.Add"(%arg0, %arg1): (tensor<1x1x1x3x1xi32>, tensor<1x1x1x1x4xi32>) -> tensor<1x1x1x3x4xi32> return %0 : tensor<1x1x1x3x4xi32> // CHECK-LABEL: add_with_int32_5d_inputs -// CHECK: "tf.Add"(%arg0, %arg1) +// CHECK: [[CST:%.*]] = constant dense<[1, 1, 1, 3, 4]> : tensor<5xi64> +// CHECK: [[BCT:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) +// CHECK: [[BCT_0:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) +// CHECK: tfl.add [[BCT]], [[BCT_0]] +} + +// CHECK-LABEL: testAddWithBroadcastToOps +func @testAddWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: tfl.add [[BCAST]], [[BCAST_1]] {fused_activation_function = "NONE"} : tensor<1x2x3x4x5x6xi32> + %0 = "tf.Add"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + +// CHECK-LABEL: testSubWithBroadcastToOps +func @testSubWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: tfl.sub [[BCAST]], [[BCAST_1]] {fused_activation_function = "NONE"} : tensor<1x2x3x4x5x6xi32> + %0 = "tf.Sub"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + +// CHECK-LABEL: testMulWithBroadcastToOps +func @testMulWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: tfl.mul [[BCAST]], [[BCAST_1]] {fused_activation_function = "NONE"} : tensor<1x2x3x4x5x6xi32> + %0 = "tf.Mul"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + +// CHECK-LABEL: testDivWithBroadcastToOps +func @testDivWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: tfl.div [[BCAST]], [[BCAST_1]] {fused_activation_function = "NONE"} : tensor<1x2x3x4x5x6xi32> + %0 = "tf.Div"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + +// CHECK-LABEL: testFloorDivWithBroadcastToOps +func @testFloorDivWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: tfl.floor_div [[BCAST]], [[BCAST_1]] : tensor<1x2x3x4x5x6xi32> + %0 = "tf.FloorDiv"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + +// CHECK-LABEL: testFloorModWithBroadcastToOps +func @testFloorModWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: "tfl.floor_mod"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi32> + %0 = "tf.FloorMod"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + +// CHECK-LABEL: testPowWithBroadcastToOps +func @testPowWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: tfl.pow [[BCAST]], [[BCAST_1]] : tensor<1x2x3x4x5x6xi32> + %0 = "tf.Pow"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + +// CHECK-LABEL: testMaximumWithBroadcastToOps +func @testMaximumWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: "tfl.maximum"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi32> + %0 = "tf.Maximum"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + +// CHECK-LABEL: testMinimumWithBroadcastToOps +func @testMinimumWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: "tfl.minimum"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi32> + %0 = "tf.Minimum"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + +// CHECK-LABEL: testSelectV2WithBroadcastToOps +func @testSelectV2WithBroadcastToOps(%arg0: tensor<1x2x1x4x1x6xi1>, %arg1: tensor<1x2x3x4x1x1xi32>, %arg2: tensor<1x2x1x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: [[BCAST_2:%.*]] = "tfl.broadcast_to"(%arg2, [[CST]]) + // CHECK: "tfl.select"([[BCAST]], [[BCAST_1]], [[BCAST_2]]) + %0 = "tf.SelectV2"(%arg0, %arg1, %arg2) : (tensor<1x2x1x4x1x6xi1>, tensor<1x2x3x4x1x1xi32>, tensor<1x2x1x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + +// CHECK-LABEL: testLessEqualWithBroadcastToOps +func @testLessEqualWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: "tfl.less_equal"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> + %0 = "tf.LessEqual"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> + return %0 : tensor<1x2x3x4x5x6xi1> +} + +// CHECK-LABEL: testGreaterEqualWithBroadcastToOps +func @testGreaterEqualWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: "tfl.greater_equal"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> + %0 = "tf.GreaterEqual"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> + return %0 : tensor<1x2x3x4x5x6xi1> +} + +// CHECK-LABEL: testEqualWithBroadcastToOps +func @testEqualWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: "tfl.equal"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> + %0 = "tf.Equal"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> + return %0 : tensor<1x2x3x4x5x6xi1> +} + +// CHECK-LABEL: testNotEqualWithBroadcastToOps +func @testNotEqualWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: "tfl.not_equal"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> + %0 = "tf.NotEqual"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> + return %0 : tensor<1x2x3x4x5x6xi1> +} + +// CHECK-LABEL: testLessWithBroadcastToOps +func @testLessWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: "tfl.less"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> + %0 = "tf.Less"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> + return %0 : tensor<1x2x3x4x5x6xi1> +} + +// CHECK-LABEL: testGreaterWithBroadcastToOps +func @testGreaterWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: "tfl.greater"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> + %0 = "tf.Greater"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> + return %0 : tensor<1x2x3x4x5x6xi1> } diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc index 7d6866dc570..2f8370e2b96 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc @@ -631,6 +631,156 @@ struct LegalizeUnidirectionalSequenceRnn : public RewritePattern { } }; +// Put two TFL BroadcastTo ops in front of the given TF binary broadcast op to +// to make binary broadcast-able op conversion always successful and does not +// require flex delegate. +template +class ApplyExplicitBroadcasting : public OpRewritePattern { + public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(SourceOp src_op, + PatternRewriter& rewriter) const override { + Operation* op = static_cast(src_op); + auto lhs = op->getOperand(0); + auto rhs = op->getOperand(1); + + // Should have static shapes to calculate the broadcasted shape. + if (!lhs.getType().cast().hasStaticShape() || + !rhs.getType().cast().hasStaticShape()) { + return failure(); + } + + // Calculate the broadcasted shape. + SmallVector result_shape; + if (!OpTrait::util::getBroadcastedShape( + lhs.getType().cast().getShape(), + rhs.getType().cast().getShape(), result_shape)) { + return failure(); + } + + RankedTensorType result_type = RankedTensorType::get( + result_shape, getElementTypeOrSelf(op->getResult(0).getType())); + + // Create a const op, that stores the above broadcasted shape. + auto new_shape_attr = mlir::DenseIntElementsAttr::get( + RankedTensorType::get(result_shape.size(), rewriter.getIntegerType(64)), + result_shape); + auto new_shape = rewriter.create(op->getLoc(), new_shape_attr); + + // Apply BroadcastTo ops to each input. + auto broadcast_type = RankedTensorType::get( + result_shape, getElementTypeOrSelf(lhs.getType())); + + if (result_type.getShape() != lhs.getType().cast().getShape()) { + lhs = rewriter + .create(op->getLoc(), broadcast_type, lhs, + new_shape) + .output(); + } + if (result_type.getShape() != rhs.getType().cast().getShape()) { + rhs = rewriter + .create(op->getLoc(), broadcast_type, rhs, + new_shape) + .output(); + } + + // Recreate an op with the above Broadcast op results. + rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs); + return success(); + } +}; + +// This specialization is for TF SelectV2 op. SelectV2 op have three inputs and +// they should have broadcastable shapes. +template <> +class ApplyExplicitBroadcasting + : public OpRewritePattern { + public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(TF::SelectV2Op src_op, + PatternRewriter& rewriter) const override { + Operation* op = static_cast(src_op); + auto cond = op->getOperand(0); + auto lhs = op->getOperand(1); + auto rhs = op->getOperand(2); + + // Should have static shapes to calculate the broadcasted shape. + if (!lhs.getType().cast().hasStaticShape() || + !rhs.getType().cast().hasStaticShape() || + !cond.getType().cast().hasStaticShape()) { + return failure(); + } + + // Calculate the broadcasted shape. + SmallVector broadcasted_shape; + if (!OpTrait::util::getBroadcastedShape( + lhs.getType().cast().getShape(), + rhs.getType().cast().getShape(), broadcasted_shape)) { + return failure(); + } + + SmallVector result_shape; + if (!OpTrait::util::getBroadcastedShape( + broadcasted_shape, cond.getType().cast().getShape(), + result_shape)) { + return failure(); + } + + // Create a const op, that stores the above broadcasted shape. + auto shape_type = + RankedTensorType::get(result_shape.size(), rewriter.getIntegerType(64)); + auto new_shape_attr = + mlir::DenseIntElementsAttr::get(shape_type, result_shape); + auto new_shape = rewriter.create(op->getLoc(), new_shape_attr); + + // Apply BroadcastTo ops to each input. + auto cond_result_type = + RankedTensorType::get(result_shape, rewriter.getIntegerType(1)); + auto result_type = RankedTensorType::get( + result_shape, getElementTypeOrSelf(lhs.getType())); + + if (result_shape != cond.getType().cast().getShape()) { + cond = rewriter + .create(op->getLoc(), cond_result_type, + cond, new_shape) + .output(); + } + if (result_shape != lhs.getType().cast().getShape()) { + lhs = rewriter + .create(op->getLoc(), result_type, lhs, + new_shape) + .output(); + } + if (result_shape != rhs.getType().cast().getShape()) { + rhs = rewriter + .create(op->getLoc(), result_type, rhs, + new_shape) + .output(); + } + + // Recreate an op with the above Broadcast op results. + rewriter.replaceOpWithNewOp(op, result_type, cond, lhs, + rhs); + return success(); + } +}; + +void applyPatterns(FuncOp func, ConversionTarget& target, + const OwningRewritePatternList& patterns) { + // Keep trying to convert. + // TODO(karimnosseir): This is similar to what apply greedy patterns does. + // Look if there is a function that tries until it converge. + // Currently unit-test doesn't do multiple tries, so we need this. + const int max_iterations = 15; + for (int i = 0; i < max_iterations; ++i) { + if (failed(applyPartialConversion(func, target, patterns))) { + return; + } + } +} + void LegalizeTF::runOnFunction() { OwningRewritePatternList patterns; auto* context = &getContext(); @@ -681,16 +831,32 @@ void LegalizeTF::runOnFunction() { return success(current_thread_id == llvm::get_threadid()); }); - // Keep trying to convert. - // TODO(karimnosseir): This is similar to what apply greedy patterns does. - // Look if there is a function that tries until it converge. - // Currently unit-test doesn't do multiple tries, so we need this. - const int max_iterations = 15; - for (int i = 0; i < max_iterations; ++i) { - if (failed(applyPartialConversion(func, target, patterns))) { - return; - } - } + applyPatterns(func, target, patterns); + + // Explict BroadcastTo addition for left-over broadcast-able ops. + // The following pattern matchings should be done after the other legalization + // rules in order not to add unnecessary BroadcastTo ops. + patterns.insert, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting>(context); + + applyPatterns(func, target, patterns); } } // namespace diff --git a/tensorflow/lite/testing/op_tests/binary_op.py b/tensorflow/lite/testing/op_tests/binary_op.py index 17ed2f3522d..936563cc63d 100644 --- a/tensorflow/lite/testing/op_tests/binary_op.py +++ b/tensorflow/lite/testing/op_tests/binary_op.py @@ -178,6 +178,19 @@ def make_binary_op_tests(options, }, ] + # High dimension broadcasting support in MLIR converter. + if options.use_experimental_converter: + test_parameters = test_parameters + [ + { + "dtype": [tf.float32], + "input_shape_1": [[8, 7, 6, 5, 4, 3, 2, 1]], + "input_shape_2": [[4, 3, 2, 1]], + "activation": [False], + "fully_quantize": [False], + "dynamic_range_quantize": [False], + }, + ] + # test_parameters include fully_quantize option only when # allow_fully_quantize is True. if not allow_fully_quantize: diff --git a/tensorflow/lite/testing/op_tests/where.py b/tensorflow/lite/testing/op_tests/where.py index 49802422e3f..df91e195820 100644 --- a/tensorflow/lite/testing/op_tests/where.py +++ b/tensorflow/lite/testing/op_tests/where.py @@ -35,6 +35,16 @@ def make_where_tests(options): }, ] + # High dimension broadcasting support in MLIR converter. + if options.use_experimental_converter: + test_parameters = test_parameters + [ + { + "input_dtype": [tf.float32, tf.int32], + "input_shape_set": [([8, 7, 6, 5, 4, 3, 2, 1], [4, 3, 2, 1]),], + "use_where_v2": [True], + }, + ] + def build_graph(parameters): """Build the where op testing graph.""" input_value1 = tf.compat.v1.placeholder( From 1dccb05678ccefb5f32f1a9b37b6c5e4cb93d8fd Mon Sep 17 00:00:00 2001 From: Marat Dukhan Date: Thu, 23 Jul 2020 02:41:23 -0700 Subject: [PATCH 1141/2522] Support RESIZE_BILINEAR operator in XNNPACK delegate PiperOrigin-RevId: 322749575 Change-Id: Ifa2ec971fc18f4877976f2d9970ac45203a43029 --- tensorflow/lite/delegates/xnnpack/BUILD | 31 +++ tensorflow/lite/delegates/xnnpack/README.md | 7 + .../delegates/xnnpack/resize_bilinear_test.cc | 119 ++++++++++++ .../xnnpack/resize_bilinear_tester.cc | 183 ++++++++++++++++++ .../xnnpack/resize_bilinear_tester.h | 115 +++++++++++ .../delegates/xnnpack/xnnpack_delegate.cc | 83 ++++++++ tensorflow/workspace.bzl | 8 +- 7 files changed, 542 insertions(+), 4 deletions(-) create mode 100644 tensorflow/lite/delegates/xnnpack/resize_bilinear_test.cc create mode 100644 tensorflow/lite/delegates/xnnpack/resize_bilinear_tester.cc create mode 100644 tensorflow/lite/delegates/xnnpack/resize_bilinear_tester.h diff --git a/tensorflow/lite/delegates/xnnpack/BUILD b/tensorflow/lite/delegates/xnnpack/BUILD index e2efac24243..3c580edae10 100644 --- a/tensorflow/lite/delegates/xnnpack/BUILD +++ b/tensorflow/lite/delegates/xnnpack/BUILD @@ -229,6 +229,22 @@ cc_library( ], ) +cc_library( + name = "resize_bilinear_tester", + testonly = 1, + srcs = ["resize_bilinear_tester.cc"], + hdrs = ["resize_bilinear_tester.h"], + deps = [ + "//tensorflow/lite:framework", + "//tensorflow/lite:schema_fbs_version", + "//tensorflow/lite/c:common", + "//tensorflow/lite/kernels:builtin_ops", + "//tensorflow/lite/schema:schema_fbs", + "@com_google_googletest//:gtest", + "@flatbuffers", + ], +) + cc_library( name = "softmax_tester", testonly = 1, @@ -635,6 +651,21 @@ cc_test( ], ) +cc_test( + name = "resize_bilinear_test", + srcs = ["resize_bilinear_test.cc"], + linkopts = select({ + "//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS, + "//conditions:default": [], + }), + deps = [ + ":resize_bilinear_tester", + ":test_main", + ":xnnpack_delegate_test_mode", + "@com_google_googletest//:gtest", + ], +) + cc_test( name = "round_test", srcs = ["round_test.cc"], diff --git a/tensorflow/lite/delegates/xnnpack/README.md b/tensorflow/lite/delegates/xnnpack/README.md index 47ed79033cf..6f597006c1b 100644 --- a/tensorflow/lite/delegates/xnnpack/README.md +++ b/tensorflow/lite/delegates/xnnpack/README.md @@ -260,6 +260,13 @@ Below is the list of current operators and limitations: static (use `kTfLiteMmapRo` allocation type), or absent (with the new shape specified via `ReshapeOptions` table). +### `RESIZE_BILINEAR` + +* The first input and the output must be 4D tensors in 32-bit floating-point + format. +* The second input (the input with the new shape specification) must be + static (use `kTfLiteMmapRo` allocation type). + ### `ROUND` * Inputs and outputs must be in 32-bit floating-point format. diff --git a/tensorflow/lite/delegates/xnnpack/resize_bilinear_test.cc b/tensorflow/lite/delegates/xnnpack/resize_bilinear_test.cc new file mode 100644 index 00000000000..e4ff3e63388 --- /dev/null +++ b/tensorflow/lite/delegates/xnnpack/resize_bilinear_test.cc @@ -0,0 +1,119 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include +#include + +#include +#include "tensorflow/lite/delegates/xnnpack/resize_bilinear_tester.h" +#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h" + +namespace tflite { +namespace xnnpack { + +TEST(ResizeBilinear, AlignCenters) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto size_rng = + std::bind(std::uniform_int_distribution(2, 10), std::ref(rng)); + auto channel_rng = + std::bind(std::uniform_int_distribution(2, 16), std::ref(rng)); + + ResizeBilinearTester() + .HalfPixelCenters(true) + .InputHeight(size_rng()) + .InputWidth(size_rng()) + .OutputHeight(size_rng()) + .OutputWidth(size_rng()) + .Channels(channel_rng()) + .Test(xnnpack_delegate.get()); +} + +TEST(ResizeBilinear, AlignCentersTF1X) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto size_rng = + std::bind(std::uniform_int_distribution(2, 10), std::ref(rng)); + auto channel_rng = + std::bind(std::uniform_int_distribution(2, 16), std::ref(rng)); + + ResizeBilinearTester() + .InputHeight(size_rng()) + .InputWidth(size_rng()) + .OutputHeight(size_rng()) + .OutputWidth(size_rng()) + .Channels(channel_rng()) + .Test(xnnpack_delegate.get()); +} + +TEST(ResizeBilinear, AlignCorners) { + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto size_rng = + std::bind(std::uniform_int_distribution(2, 10), std::ref(rng)); + auto channel_rng = + std::bind(std::uniform_int_distribution(2, 16), std::ref(rng)); + + ResizeBilinearTester() + .AlignCorners(true) + .InputHeight(size_rng()) + .InputWidth(size_rng()) + .OutputHeight(size_rng()) + .OutputWidth(size_rng()) + .Channels(channel_rng()) + .Test(xnnpack_delegate.get()); +} + +TEST(ResizeBilinear, MultiThreading) { + TfLiteXNNPackDelegateOptions delegate_options = + TfLiteXNNPackDelegateOptionsDefault(); + delegate_options.num_threads = 2; + std::unique_ptr + xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options), + TfLiteXNNPackDelegateDelete); + + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto size_rng = + std::bind(std::uniform_int_distribution(2, 10), std::ref(rng)); + auto channel_rng = + std::bind(std::uniform_int_distribution(2, 16), std::ref(rng)); + + ResizeBilinearTester() + .InputHeight(size_rng()) + .InputWidth(size_rng()) + .OutputHeight(size_rng()) + .OutputWidth(size_rng()) + .Channels(channel_rng()) + .Test(xnnpack_delegate.get()); +} + +} // namespace xnnpack +} // namespace tflite diff --git a/tensorflow/lite/delegates/xnnpack/resize_bilinear_tester.cc b/tensorflow/lite/delegates/xnnpack/resize_bilinear_tester.cc new file mode 100644 index 00000000000..34730c05719 --- /dev/null +++ b/tensorflow/lite/delegates/xnnpack/resize_bilinear_tester.cc @@ -0,0 +1,183 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/delegates/xnnpack/resize_bilinear_tester.h" + +#include +#include +#include +#include +#include +#include + +#include +#include "flatbuffers/flatbuffers.h" // from @flatbuffers +#include "tensorflow/lite/interpreter.h" +#include "tensorflow/lite/kernels/register.h" +#include "tensorflow/lite/model.h" +#include "tensorflow/lite/schema/schema_generated.h" +#include "tensorflow/lite/version.h" + +namespace tflite { +namespace xnnpack { + +void ResizeBilinearTester::Test(TfLiteDelegate* delegate) const { + std::random_device random_device; + auto rng = std::mt19937(random_device()); + auto input_rng = + std::bind(std::uniform_real_distribution(), std::ref(rng)); + + std::vector buffer = CreateTfLiteModel(); + const Model* model = GetModel(buffer.data()); + + std::unique_ptr delegate_interpreter; + ASSERT_EQ( + InterpreterBuilder(model, ::tflite::ops::builtin::BuiltinOpResolver())( + &delegate_interpreter), + kTfLiteOk); + std::unique_ptr default_interpreter; + ASSERT_EQ( + InterpreterBuilder(model, ::tflite::ops::builtin::BuiltinOpResolver())( + &default_interpreter), + kTfLiteOk); + + ASSERT_TRUE(delegate_interpreter); + ASSERT_TRUE(default_interpreter); + + ASSERT_EQ(delegate_interpreter->inputs().size(), 1); + ASSERT_EQ(default_interpreter->inputs().size(), 1); + + ASSERT_EQ(delegate_interpreter->outputs().size(), 1); + ASSERT_EQ(default_interpreter->outputs().size(), 1); + + ASSERT_EQ(delegate_interpreter->AllocateTensors(), kTfLiteOk); + ASSERT_EQ(default_interpreter->AllocateTensors(), kTfLiteOk); + + ASSERT_EQ(delegate_interpreter->ModifyGraphWithDelegate(delegate), kTfLiteOk); + + float* default_input_data = default_interpreter->typed_tensor( + default_interpreter->inputs()[0]); + std::generate(default_input_data, + default_input_data + + BatchSize() * InputHeight() * InputWidth() * Channels(), + std::ref(input_rng)); + + float* delegate_input_data = delegate_interpreter->typed_tensor( + delegate_interpreter->inputs()[0]); + std::copy(default_input_data, + default_input_data + + BatchSize() * InputHeight() * InputWidth() * Channels(), + delegate_input_data); + + ASSERT_EQ(default_interpreter->Invoke(), kTfLiteOk); + ASSERT_EQ(delegate_interpreter->Invoke(), kTfLiteOk); + + float* default_output_data = default_interpreter->typed_tensor( + default_interpreter->outputs()[0]); + float* delegate_output_data = delegate_interpreter->typed_tensor( + delegate_interpreter->outputs()[0]); + + for (int i = 0; i < BatchSize(); i++) { + for (int y = 0; y < OutputHeight(); y++) { + for (int x = 0; x < OutputWidth(); x++) { + for (int c = 0; c < Channels(); c++) { + const int index = + ((i * OutputHeight() + y) * OutputWidth() + x) * Channels() + c; + ASSERT_NEAR(default_output_data[index], delegate_output_data[index], + std::max(std::abs(default_output_data[index]) * 1.0e-4f, + 10.0f * std::numeric_limits::epsilon())) + << "batch " << i << " / " << BatchSize() << ", y position " << y + << " / " << OutputHeight() << ", x position " << x << " / " + << OutputWidth() << ", channel " << c << " / " << Channels(); + } + } + } + } +} + +std::vector ResizeBilinearTester::CreateTfLiteModel() const { + flatbuffers::FlatBufferBuilder builder; + flatbuffers::Offset operator_code = + CreateOperatorCode(builder, BuiltinOperator_RESIZE_BILINEAR); + + flatbuffers::Offset resize_bilinear_options = + CreateResizeBilinearOptions(builder, AlignCorners(), HalfPixelCenters()); + + const std::array size_data{{OutputHeight(), OutputWidth()}}; + + const std::array, 2> buffers{{ + CreateBuffer(builder, builder.CreateVector({})), + CreateBuffer(builder, + builder.CreateVector( + reinterpret_cast(size_data.data()), + size_data.size() * sizeof(int32_t))), + }}; + + const std::array input_shape{ + {BatchSize(), InputHeight(), InputWidth(), Channels()}}; + const std::array output_shape{ + {BatchSize(), OutputHeight(), OutputWidth(), Channels()}}; + const std::array size_shape{ + {static_cast(size_data.size())}}; + + const std::array, 3> tensors{{ + CreateTensor( + builder, + builder.CreateVector(input_shape.data(), input_shape.size()), + TensorType_FLOAT32), + CreateTensor( + builder, + builder.CreateVector(size_shape.data(), size_shape.size()), + TensorType_INT32, /*buffer=*/1), + CreateTensor(builder, + builder.CreateVector(output_shape.data(), + output_shape.size()), + TensorType_FLOAT32), + }}; + + const std::array op_inputs{{0, 1}}; + const std::array op_outputs{{2}}; + flatbuffers::Offset op = CreateOperator( + builder, /*opcode_index=*/0, + builder.CreateVector(op_inputs.data(), op_inputs.size()), + builder.CreateVector(op_outputs.data(), op_outputs.size()), + BuiltinOptions_ResizeBilinearOptions, resize_bilinear_options.Union()); + + const std::array subgraph_inputs{{0}}; + const std::array subgraph_outputs{{2}}; + flatbuffers::Offset subgraph = CreateSubGraph( + builder, builder.CreateVector(tensors.data(), tensors.size()), + builder.CreateVector(subgraph_inputs.data(), + subgraph_inputs.size()), + builder.CreateVector(subgraph_outputs.data(), + subgraph_outputs.size()), + builder.CreateVector(&op, 1)); + + flatbuffers::Offset description = + builder.CreateString("Resize Bilinear model"); + + flatbuffers::Offset model_buffer = CreateModel( + builder, TFLITE_SCHEMA_VERSION, builder.CreateVector(&operator_code, 1), + builder.CreateVector(&subgraph, 1), description, + builder.CreateVector(buffers.data(), buffers.size())); + + builder.Finish(model_buffer); + + return std::vector(builder.GetBufferPointer(), + builder.GetBufferPointer() + builder.GetSize()); +} + +} // namespace xnnpack +} // namespace tflite diff --git a/tensorflow/lite/delegates/xnnpack/resize_bilinear_tester.h b/tensorflow/lite/delegates/xnnpack/resize_bilinear_tester.h new file mode 100644 index 00000000000..6885fcf9033 --- /dev/null +++ b/tensorflow/lite/delegates/xnnpack/resize_bilinear_tester.h @@ -0,0 +1,115 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_DELEGATES_XNNPACK_RESIZE_BILINEAR_TESTER_H_ +#define TENSORFLOW_LITE_DELEGATES_XNNPACK_RESIZE_BILINEAR_TESTER_H_ + +#include +#include + +#include +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/schema/schema_generated.h" + +namespace tflite { +namespace xnnpack { + +class ResizeBilinearTester { + public: + ResizeBilinearTester() = default; + ResizeBilinearTester(const ResizeBilinearTester&) = delete; + ResizeBilinearTester& operator=(const ResizeBilinearTester&) = delete; + + inline ResizeBilinearTester& BatchSize(int32_t batch_size) { + EXPECT_GT(batch_size, 0); + batch_size_ = batch_size; + return *this; + } + + inline int32_t BatchSize() const { return batch_size_; } + + inline ResizeBilinearTester& Channels(int32_t channels) { + EXPECT_GT(channels, 0); + channels_ = channels; + return *this; + } + + inline int32_t Channels() const { return channels_; } + + inline ResizeBilinearTester& InputHeight(int32_t input_height) { + EXPECT_GT(input_height, 0); + input_height_ = input_height; + return *this; + } + + inline int32_t InputHeight() const { return input_height_; } + + inline ResizeBilinearTester& InputWidth(int32_t input_width) { + EXPECT_GT(input_width, 0); + input_width_ = input_width; + return *this; + } + + inline int32_t InputWidth() const { return input_width_; } + + inline ResizeBilinearTester& OutputHeight(int32_t output_height) { + EXPECT_GT(output_height, 0); + output_height_ = output_height; + return *this; + } + + inline int32_t OutputHeight() const { return output_height_; } + + inline ResizeBilinearTester& OutputWidth(int32_t output_width) { + EXPECT_GT(output_width, 0); + output_width_ = output_width; + return *this; + } + + inline int32_t OutputWidth() const { return output_width_; } + + ResizeBilinearTester& AlignCorners(bool align_corners) { + align_corners_ = align_corners; + return *this; + } + + bool AlignCorners() const { return align_corners_; } + + ResizeBilinearTester& HalfPixelCenters(bool half_pixel_centers) { + half_pixel_centers_ = half_pixel_centers; + return *this; + } + + bool HalfPixelCenters() const { return half_pixel_centers_; } + + void Test(TfLiteDelegate* delegate) const; + + private: + std::vector CreateTfLiteModel() const; + + int32_t batch_size_ = 1; + int32_t channels_ = 1; + int32_t input_height_ = 1; + int32_t input_width_ = 1; + int32_t output_height_ = 1; + int32_t output_width_ = 1; + bool align_corners_ = false; + bool half_pixel_centers_ = false; +}; + +} // namespace xnnpack +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_XNNPACK_RESIZE_BILINEAR_TESTER_H_ diff --git a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc index c7aea59b231..eec223597cb 100644 --- a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc +++ b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc @@ -157,6 +157,7 @@ class Subgraph { case kTfLiteBuiltinMean: case kTfLiteBuiltinPad: case kTfLiteBuiltinReshape: + case kTfLiteBuiltinResizeBilinear: // Ignore the second input (axes, static padding, or new shape), // because it is represented as parameters of the XNNPACK operator // rather than extra input. @@ -930,6 +931,14 @@ class Subgraph { context->tensors, reshape_params, xnnpack_tensors); } + case kTfLiteBuiltinResizeBilinear: { + const TfLiteResizeBilinearParams* resize_params = + static_cast(node->builtin_data); + + return VisitResizeBilinearNode(subgraph, logging_context, node_index, + node, context->tensors, resize_params, + xnnpack_tensors); + } case kTfLiteBuiltinRound: return VisitRoundNode(subgraph, logging_context, node_index, node, context->tensors, xnnpack_tensors); @@ -2460,6 +2469,80 @@ class Subgraph { return kTfLiteOk; } + static TfLiteStatus VisitResizeBilinearNode( + xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index, + TfLiteNode* node, const TfLiteTensor* tensors, + const TfLiteResizeBilinearParams* resize_params, + const std::vector& xnnpack_tensors) { + TF_LITE_ENSURE_STATUS( + CheckNumInputsAndOutputs(logging_context, node, 2, 1, node_index)); + + const TfLiteTensor& input_tensor = tensors[node->inputs->data[0]]; + TF_LITE_ENSURE_STATUS(CheckTensorFloatType( + logging_context, input_tensor, node->inputs->data[0], node_index)); + TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, input_tensor, 4, + node->inputs->data[0])); + TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation( + logging_context, input_tensor, node->inputs->data[0], node_index)); + + const TfLiteTensor& shape_tensor = tensors[node->inputs->data[1]]; + TF_LITE_ENSURE_STATUS(CheckTensorType(logging_context, shape_tensor, + kTfLiteInt32, node->inputs->data[1], + node_index)); + TF_LITE_ENSURE_STATUS(CheckShapeTensorShape( + logging_context, shape_tensor, node->inputs->data[1], node_index)); + if (shape_tensor.dims->data[0] != 2) { + TF_LITE_MAYBE_KERNEL_LOG( + logging_context, + "unexpected number of dimensions %d in the output shape in node %d", + shape_tensor.dims->data[0], node_index); + } + TF_LITE_ENSURE_STATUS(CheckTensorStaticAllocation( + logging_context, shape_tensor, node->inputs->data[1], node_index)); + + const TfLiteTensor& output_tensor = tensors[node->outputs->data[0]]; + TF_LITE_ENSURE_STATUS(CheckTensorFloatType( + logging_context, output_tensor, node->outputs->data[0], node_index)); + TF_LITE_ENSURE_STATUS(CheckTensorShape(logging_context, output_tensor, 4, + node->outputs->data[0])); + TF_LITE_ENSURE_STATUS(CheckTensorNonDynamicAllocation( + logging_context, output_tensor, node->outputs->data[0], node_index)); + + const int32_t* shape_data = + reinterpret_cast(shape_tensor.data.data); + for (int i = 0; i < shape_tensor.dims->size; i++) { + const int32_t dim = shape_data[i]; + if (dim <= 0) { + TF_LITE_MAYBE_KERNEL_LOG( + logging_context, "invalid output dimension #%d value %d in node %d", + i, dim, node_index); + return kTfLiteError; + } + } + + if (subgraph != nullptr) { + uint32_t flags = 0; + if (resize_params->align_corners) { + flags |= XNN_FLAG_ALIGN_CORNERS; + } else if (!resize_params->half_pixel_centers) { + flags |= XNN_FLAG_TENSORFLOW_LEGACY_MODE; + } + const xnn_status status = xnn_define_static_resize_bilinear_2d( + subgraph, static_cast(shape_data[0]), + static_cast(shape_data[1]), + /*input_id=*/xnnpack_tensors[node->inputs->data[0]], + /*output_id=*/xnnpack_tensors[node->outputs->data[0]], flags); + if (status != xnn_status_success) { + TF_LITE_KERNEL_LOG(logging_context, + "failed to delegate RESIZE_BILINEAR node #%d", + node_index); + return kTfLiteError; + } + } + + return kTfLiteOk; + } + static TfLiteStatus VisitRoundNode( xnn_subgraph_t subgraph, TfLiteContext* logging_context, int node_index, TfLiteNode* node, const TfLiteTensor* tensors, diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 586bda6a3a2..0d4f64cf0c2 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -164,11 +164,11 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "XNNPACK", - sha256 = "bd4278ebbe3f6b104f46548717b00bdba95acaab3cbac3de4015c65d868259f8", - strip_prefix = "XNNPACK-d27202dfeaa8d3a96670ba47f3dce2f19305a092", + sha256 = "c6eae589a4af7785da467162acd339bae359842e14c93bddc8fbe84ffd361c70", + strip_prefix = "XNNPACK-aff24e26a760552ee98a036f2a6e95b123e1bc6d", urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/d27202dfeaa8d3a96670ba47f3dce2f19305a092.zip", - "https://github.com/google/XNNPACK/archive/d27202dfeaa8d3a96670ba47f3dce2f19305a092.zip", + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/aff24e26a760552ee98a036f2a6e95b123e1bc6d.zip", + "https://github.com/google/XNNPACK/archive/aff24e26a760552ee98a036f2a6e95b123e1bc6d.zip", ], ) From 5515d47210c9d51c877d8b1b6e55a1fe9a4aad2c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 23 Jul 2020 02:42:29 -0700 Subject: [PATCH 1142/2522] Internal change PiperOrigin-RevId: 322749690 Change-Id: Ia8eeb2c38a1a9d73a5786511bc4dd69e0da0b951 --- tensorflow/compiler/mlir/lite/ir/tfl_ops.cc | 29 ++- .../compiler/mlir/lite/tests/legalize-tf.mlir | 183 +---------------- .../mlir/lite/transforms/legalize_tf.cc | 186 +----------------- tensorflow/lite/testing/op_tests/binary_op.py | 13 -- tensorflow/lite/testing/op_tests/where.py | 10 - 5 files changed, 42 insertions(+), 379 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc index 99894ede5f0..427b9c692a7 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc @@ -147,10 +147,18 @@ bool IsI64Type(Type element_type) { bool VerifyAddOpShapeConstraints(AddOp op) { auto element_type = getElementTypeOrSelf(op.output().getType()); - // Allows F32, QI8, QUI8 and I32 outputs when the operands have valid shapes, + // Allows F32, QI8, and QUI8 outputs when the operands have valid shapes, // which are broadcastable shapes up to five dimension or have same shapes. if (element_type.isF32() || IsQI8Type(element_type) || - IsQUI8Type(element_type) || IsI32Type(element_type)) { + IsQUI8Type(element_type)) { + return VerifyOperandsHaveSameShapesOrBroadcastableShape( + /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, + /*max_bcast_rank=*/5); + } + + // Allows I32 output when the operands have valid shapes, which are + // broadcastable shapes up to four dimension or have same shapes. + if (IsI32Type(element_type)) { return VerifyOperandsHaveSameShapesOrBroadcastableShape( /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, /*max_bcast_rank=*/4); @@ -202,13 +210,20 @@ bool VerifyMulOpShapeConstraints(MulOp op) { } return VerifyOperandsHaveSameShapesOrBroadcastableShape( /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, - /*max_bcast_rank=*/4); + /*max_bcast_rank=*/5); } - // Allows I32, QI16 and F32 outputs when the operands have valid shapes, which - // are broadcastable shapes up to four dimension or have same shapes. - if (IsI32Type(element_type) || IsQI16Type(element_type) || - element_type.isF32()) { + // Allows F32 output when the operands have valid shapes, which are + // broadcastable shapes up to five dimension or have same shapes. + if (element_type.isF32()) { + return VerifyOperandsHaveSameShapesOrBroadcastableShape( + /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, + /*max_bcast_rank=*/5); + } + + // Allows I32 and QI16 outputs when the operands have valid shapes, which are + // broadcastable shapes up to four dimension or have same shapes. + if (IsI32Type(element_type) || IsQI16Type(element_type)) { return VerifyOperandsHaveSameShapesOrBroadcastableShape( /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, /*max_bcast_rank=*/4); diff --git a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir index a5174e7c438..74a33817d32 100644 --- a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir @@ -25,6 +25,13 @@ func @testAddHighDimsHaveSameShape(%arg0: tensor<1x2x3x4x5x6x7x8xi32>, %arg1: te return %0 : tensor<1x2x3x4x5x6x7x8xi32> } +// CHECK-LABEL: testAddTooHighBroadcastableDims +func @testAddTooHighBroadcastableDims(%arg0: tensor<1x2x3x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // expected-error @+1 {{'tfl.add' op failed to verify that operand #0 and operand #1 have the same shape or broadcastable shapes within the rank 4}} + %0 = "tf.Add"(%arg0, %arg1) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + func @LeakyRelu(%arg0: tensor<1xf32>) -> tensor<1xf32> { %2 = "tf.LeakyRelu"(%arg0) {alpha = 0.1 : f32} : (tensor<1xf32>) -> tensor<1xf32> return %2: tensor<1xf32> @@ -1523,11 +1530,7 @@ func @select_v2_with_6d_broadcasting(%arg0: tensor<1x1x1x1x3x1xi1>, %arg1 : tens %0 = "tf.SelectV2"(%arg0, %arg1, %arg2): (tensor<1x1x1x1x3x1xi1>, tensor<1x1x1x1x1x4xf32>, tensor<1x1x1x2x1x1xf32>) -> tensor<1x1x1x2x3x4xf32> return %0 : tensor<1x1x1x2x3x4xf32> // CHECK-LABEL: select_v2_with_6d_broadcasting -// CHECK: [[CST:%.*]] = constant dense<[1, 1, 1, 2, 3, 4]> : tensor<6xi64> -// CHECK: [[BCT:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) -// CHECK: [[BCT_0:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) -// CHECK: [[BCT_1:%.*]] = "tfl.broadcast_to"(%arg2, [[CST]]) -// CHECK: "tfl.select"([[BCT]], [[BCT_0]], [[BCT_1]]) +// CHECK: "tf.SelectV2"(%arg0, %arg1, %arg2) } // ----- @@ -1537,9 +1540,7 @@ func @maximum_with_6d_broadcasting(%arg0: tensor<1x1x1x1x8x16xf32>, %arg1: tenso return %0 : tensor<1x1x1x1x8x16xf32> // CHECK-LABEL: maximum_with_6d_broadcasting -// CHECK: [[CST:%.*]] = constant dense<[1, 1, 1, 1, 8, 16]> : tensor<6xi64> -// CHECK: [[BCT:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) -// CHECK: "tfl.maximum"(%arg0, [[BCT]]) +// CHECK: "tf.Maximum"(%arg0, %arg1) } // ----- @@ -1548,169 +1549,5 @@ func @add_with_int32_5d_inputs(%arg0: tensor<1x1x1x3x1xi32>, %arg1 : tensor<1x1x %0 = "tf.Add"(%arg0, %arg1): (tensor<1x1x1x3x1xi32>, tensor<1x1x1x1x4xi32>) -> tensor<1x1x1x3x4xi32> return %0 : tensor<1x1x1x3x4xi32> // CHECK-LABEL: add_with_int32_5d_inputs -// CHECK: [[CST:%.*]] = constant dense<[1, 1, 1, 3, 4]> : tensor<5xi64> -// CHECK: [[BCT:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) -// CHECK: [[BCT_0:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) -// CHECK: tfl.add [[BCT]], [[BCT_0]] -} - -// CHECK-LABEL: testAddWithBroadcastToOps -func @testAddWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: tfl.add [[BCAST]], [[BCAST_1]] {fused_activation_function = "NONE"} : tensor<1x2x3x4x5x6xi32> - %0 = "tf.Add"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - -// CHECK-LABEL: testSubWithBroadcastToOps -func @testSubWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: tfl.sub [[BCAST]], [[BCAST_1]] {fused_activation_function = "NONE"} : tensor<1x2x3x4x5x6xi32> - %0 = "tf.Sub"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - -// CHECK-LABEL: testMulWithBroadcastToOps -func @testMulWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: tfl.mul [[BCAST]], [[BCAST_1]] {fused_activation_function = "NONE"} : tensor<1x2x3x4x5x6xi32> - %0 = "tf.Mul"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - -// CHECK-LABEL: testDivWithBroadcastToOps -func @testDivWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: tfl.div [[BCAST]], [[BCAST_1]] {fused_activation_function = "NONE"} : tensor<1x2x3x4x5x6xi32> - %0 = "tf.Div"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - -// CHECK-LABEL: testFloorDivWithBroadcastToOps -func @testFloorDivWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: tfl.floor_div [[BCAST]], [[BCAST_1]] : tensor<1x2x3x4x5x6xi32> - %0 = "tf.FloorDiv"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - -// CHECK-LABEL: testFloorModWithBroadcastToOps -func @testFloorModWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: "tfl.floor_mod"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi32> - %0 = "tf.FloorMod"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - -// CHECK-LABEL: testPowWithBroadcastToOps -func @testPowWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: tfl.pow [[BCAST]], [[BCAST_1]] : tensor<1x2x3x4x5x6xi32> - %0 = "tf.Pow"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - -// CHECK-LABEL: testMaximumWithBroadcastToOps -func @testMaximumWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: "tfl.maximum"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi32> - %0 = "tf.Maximum"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - -// CHECK-LABEL: testMinimumWithBroadcastToOps -func @testMinimumWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: "tfl.minimum"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi32> - %0 = "tf.Minimum"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - -// CHECK-LABEL: testSelectV2WithBroadcastToOps -func @testSelectV2WithBroadcastToOps(%arg0: tensor<1x2x1x4x1x6xi1>, %arg1: tensor<1x2x3x4x1x1xi32>, %arg2: tensor<1x2x1x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: [[BCAST_2:%.*]] = "tfl.broadcast_to"(%arg2, [[CST]]) - // CHECK: "tfl.select"([[BCAST]], [[BCAST_1]], [[BCAST_2]]) - %0 = "tf.SelectV2"(%arg0, %arg1, %arg2) : (tensor<1x2x1x4x1x6xi1>, tensor<1x2x3x4x1x1xi32>, tensor<1x2x1x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - -// CHECK-LABEL: testLessEqualWithBroadcastToOps -func @testLessEqualWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: "tfl.less_equal"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> - %0 = "tf.LessEqual"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> - return %0 : tensor<1x2x3x4x5x6xi1> -} - -// CHECK-LABEL: testGreaterEqualWithBroadcastToOps -func @testGreaterEqualWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: "tfl.greater_equal"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> - %0 = "tf.GreaterEqual"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> - return %0 : tensor<1x2x3x4x5x6xi1> -} - -// CHECK-LABEL: testEqualWithBroadcastToOps -func @testEqualWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: "tfl.equal"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> - %0 = "tf.Equal"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> - return %0 : tensor<1x2x3x4x5x6xi1> -} - -// CHECK-LABEL: testNotEqualWithBroadcastToOps -func @testNotEqualWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: "tfl.not_equal"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> - %0 = "tf.NotEqual"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> - return %0 : tensor<1x2x3x4x5x6xi1> -} - -// CHECK-LABEL: testLessWithBroadcastToOps -func @testLessWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: "tfl.less"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> - %0 = "tf.Less"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> - return %0 : tensor<1x2x3x4x5x6xi1> -} - -// CHECK-LABEL: testGreaterWithBroadcastToOps -func @testGreaterWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: "tfl.greater"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> - %0 = "tf.Greater"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> - return %0 : tensor<1x2x3x4x5x6xi1> +// CHECK: "tf.Add"(%arg0, %arg1) } diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc index 2f8370e2b96..7d6866dc570 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc @@ -631,156 +631,6 @@ struct LegalizeUnidirectionalSequenceRnn : public RewritePattern { } }; -// Put two TFL BroadcastTo ops in front of the given TF binary broadcast op to -// to make binary broadcast-able op conversion always successful and does not -// require flex delegate. -template -class ApplyExplicitBroadcasting : public OpRewritePattern { - public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(SourceOp src_op, - PatternRewriter& rewriter) const override { - Operation* op = static_cast(src_op); - auto lhs = op->getOperand(0); - auto rhs = op->getOperand(1); - - // Should have static shapes to calculate the broadcasted shape. - if (!lhs.getType().cast().hasStaticShape() || - !rhs.getType().cast().hasStaticShape()) { - return failure(); - } - - // Calculate the broadcasted shape. - SmallVector result_shape; - if (!OpTrait::util::getBroadcastedShape( - lhs.getType().cast().getShape(), - rhs.getType().cast().getShape(), result_shape)) { - return failure(); - } - - RankedTensorType result_type = RankedTensorType::get( - result_shape, getElementTypeOrSelf(op->getResult(0).getType())); - - // Create a const op, that stores the above broadcasted shape. - auto new_shape_attr = mlir::DenseIntElementsAttr::get( - RankedTensorType::get(result_shape.size(), rewriter.getIntegerType(64)), - result_shape); - auto new_shape = rewriter.create(op->getLoc(), new_shape_attr); - - // Apply BroadcastTo ops to each input. - auto broadcast_type = RankedTensorType::get( - result_shape, getElementTypeOrSelf(lhs.getType())); - - if (result_type.getShape() != lhs.getType().cast().getShape()) { - lhs = rewriter - .create(op->getLoc(), broadcast_type, lhs, - new_shape) - .output(); - } - if (result_type.getShape() != rhs.getType().cast().getShape()) { - rhs = rewriter - .create(op->getLoc(), broadcast_type, rhs, - new_shape) - .output(); - } - - // Recreate an op with the above Broadcast op results. - rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs); - return success(); - } -}; - -// This specialization is for TF SelectV2 op. SelectV2 op have three inputs and -// they should have broadcastable shapes. -template <> -class ApplyExplicitBroadcasting - : public OpRewritePattern { - public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(TF::SelectV2Op src_op, - PatternRewriter& rewriter) const override { - Operation* op = static_cast(src_op); - auto cond = op->getOperand(0); - auto lhs = op->getOperand(1); - auto rhs = op->getOperand(2); - - // Should have static shapes to calculate the broadcasted shape. - if (!lhs.getType().cast().hasStaticShape() || - !rhs.getType().cast().hasStaticShape() || - !cond.getType().cast().hasStaticShape()) { - return failure(); - } - - // Calculate the broadcasted shape. - SmallVector broadcasted_shape; - if (!OpTrait::util::getBroadcastedShape( - lhs.getType().cast().getShape(), - rhs.getType().cast().getShape(), broadcasted_shape)) { - return failure(); - } - - SmallVector result_shape; - if (!OpTrait::util::getBroadcastedShape( - broadcasted_shape, cond.getType().cast().getShape(), - result_shape)) { - return failure(); - } - - // Create a const op, that stores the above broadcasted shape. - auto shape_type = - RankedTensorType::get(result_shape.size(), rewriter.getIntegerType(64)); - auto new_shape_attr = - mlir::DenseIntElementsAttr::get(shape_type, result_shape); - auto new_shape = rewriter.create(op->getLoc(), new_shape_attr); - - // Apply BroadcastTo ops to each input. - auto cond_result_type = - RankedTensorType::get(result_shape, rewriter.getIntegerType(1)); - auto result_type = RankedTensorType::get( - result_shape, getElementTypeOrSelf(lhs.getType())); - - if (result_shape != cond.getType().cast().getShape()) { - cond = rewriter - .create(op->getLoc(), cond_result_type, - cond, new_shape) - .output(); - } - if (result_shape != lhs.getType().cast().getShape()) { - lhs = rewriter - .create(op->getLoc(), result_type, lhs, - new_shape) - .output(); - } - if (result_shape != rhs.getType().cast().getShape()) { - rhs = rewriter - .create(op->getLoc(), result_type, rhs, - new_shape) - .output(); - } - - // Recreate an op with the above Broadcast op results. - rewriter.replaceOpWithNewOp(op, result_type, cond, lhs, - rhs); - return success(); - } -}; - -void applyPatterns(FuncOp func, ConversionTarget& target, - const OwningRewritePatternList& patterns) { - // Keep trying to convert. - // TODO(karimnosseir): This is similar to what apply greedy patterns does. - // Look if there is a function that tries until it converge. - // Currently unit-test doesn't do multiple tries, so we need this. - const int max_iterations = 15; - for (int i = 0; i < max_iterations; ++i) { - if (failed(applyPartialConversion(func, target, patterns))) { - return; - } - } -} - void LegalizeTF::runOnFunction() { OwningRewritePatternList patterns; auto* context = &getContext(); @@ -831,32 +681,16 @@ void LegalizeTF::runOnFunction() { return success(current_thread_id == llvm::get_threadid()); }); - applyPatterns(func, target, patterns); - - // Explict BroadcastTo addition for left-over broadcast-able ops. - // The following pattern matchings should be done after the other legalization - // rules in order not to add unnecessary BroadcastTo ops. - patterns.insert, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting>(context); - - applyPatterns(func, target, patterns); + // Keep trying to convert. + // TODO(karimnosseir): This is similar to what apply greedy patterns does. + // Look if there is a function that tries until it converge. + // Currently unit-test doesn't do multiple tries, so we need this. + const int max_iterations = 15; + for (int i = 0; i < max_iterations; ++i) { + if (failed(applyPartialConversion(func, target, patterns))) { + return; + } + } } } // namespace diff --git a/tensorflow/lite/testing/op_tests/binary_op.py b/tensorflow/lite/testing/op_tests/binary_op.py index 936563cc63d..17ed2f3522d 100644 --- a/tensorflow/lite/testing/op_tests/binary_op.py +++ b/tensorflow/lite/testing/op_tests/binary_op.py @@ -178,19 +178,6 @@ def make_binary_op_tests(options, }, ] - # High dimension broadcasting support in MLIR converter. - if options.use_experimental_converter: - test_parameters = test_parameters + [ - { - "dtype": [tf.float32], - "input_shape_1": [[8, 7, 6, 5, 4, 3, 2, 1]], - "input_shape_2": [[4, 3, 2, 1]], - "activation": [False], - "fully_quantize": [False], - "dynamic_range_quantize": [False], - }, - ] - # test_parameters include fully_quantize option only when # allow_fully_quantize is True. if not allow_fully_quantize: diff --git a/tensorflow/lite/testing/op_tests/where.py b/tensorflow/lite/testing/op_tests/where.py index df91e195820..49802422e3f 100644 --- a/tensorflow/lite/testing/op_tests/where.py +++ b/tensorflow/lite/testing/op_tests/where.py @@ -35,16 +35,6 @@ def make_where_tests(options): }, ] - # High dimension broadcasting support in MLIR converter. - if options.use_experimental_converter: - test_parameters = test_parameters + [ - { - "input_dtype": [tf.float32, tf.int32], - "input_shape_set": [([8, 7, 6, 5, 4, 3, 2, 1], [4, 3, 2, 1]),], - "use_where_v2": [True], - }, - ] - def build_graph(parameters): """Build the where op testing graph.""" input_value1 = tf.compat.v1.placeholder( From bd6b557c02a5cc1d094a7bb180b9779121a58520 Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Thu, 23 Jul 2020 03:38:17 -0700 Subject: [PATCH 1143/2522] Support multiple models in TFLite selective build PiperOrigin-RevId: 322756668 Change-Id: Ib09472f83b9b9a2865560c4df00a24f2961b38f0 --- tensorflow/lite/build_def.bzl | 6 +- tensorflow/lite/java/src/main/native/BUILD | 1 + tensorflow/lite/testing/BUILD | 26 ++++++ .../lite/testing/selective_build_test.cc | 80 +++++++++++++++++++ 4 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 tensorflow/lite/testing/selective_build_test.cc diff --git a/tensorflow/lite/build_def.bzl b/tensorflow/lite/build_def.bzl index ad43b56743a..a3eecb4e35f 100644 --- a/tensorflow/lite/build_def.bzl +++ b/tensorflow/lite/build_def.bzl @@ -755,7 +755,7 @@ def tflite_custom_cc_library(name, models = [], srcs = [], deps = [], visibility if models: gen_selected_ops( name = "%s_registration" % name, - model = models[0], + model = models, ) real_srcs.append(":%s_registration" % name) real_deps.append("//tensorflow/lite/java/src/main/native:selected_ops_jni") @@ -766,6 +766,10 @@ def tflite_custom_cc_library(name, models = [], srcs = [], deps = [], visibility native.cc_library( name = name, srcs = real_srcs, + hdrs = [ + # TODO(b/161323860) replace this by generated header. + "//tensorflow/lite/java/src/main/native:op_resolver.h", + ], copts = tflite_copts(), linkopts = select({ "//tensorflow:windows": [], diff --git a/tensorflow/lite/java/src/main/native/BUILD b/tensorflow/lite/java/src/main/native/BUILD index 52f79615a9f..aba288a314d 100644 --- a/tensorflow/lite/java/src/main/native/BUILD +++ b/tensorflow/lite/java/src/main/native/BUILD @@ -71,5 +71,6 @@ exports_files( [ "exported_symbols.lds", "version_script.lds", + "op_resolver.h", ], ) diff --git a/tensorflow/lite/testing/BUILD b/tensorflow/lite/testing/BUILD index 6452d511acc..b2055a9904d 100644 --- a/tensorflow/lite/testing/BUILD +++ b/tensorflow/lite/testing/BUILD @@ -4,6 +4,7 @@ load( "gen_zipped_test_file", "generated_test_models_all", "merged_test_models", + "tflite_custom_cc_library", ) load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite") load("//tensorflow/lite/testing:tflite_model_test.bzl", "tflite_model_test") @@ -516,6 +517,31 @@ cc_library( ], ) +# A selective built tflite for testing. +tflite_custom_cc_library( + name = "test_tflite_lib", + models = [ + "//tensorflow/lite:testdata/add.bin", + "//tensorflow/lite:testdata/lstm.bin", + ], +) + +cc_test( + name = "selective_build_test", + srcs = ["selective_build_test.cc"], + data = [ + "//tensorflow/lite:testdata/add.bin", + "//tensorflow/lite:testdata/lstm.bin", + ], + deps = [ + ":test_tflite_lib", + "//tensorflow/core:tflite_portable_logging", + "//tensorflow/lite:framework", + "//tensorflow/lite/c:common", + "@com_google_googletest//:gtest", + ], +) + pybind_extension( name = "_pywrap_string_util", srcs = [ diff --git a/tensorflow/lite/testing/selective_build_test.cc b/tensorflow/lite/testing/selective_build_test.cc new file mode 100644 index 00000000000..ad23e382a8d --- /dev/null +++ b/tensorflow/lite/testing/selective_build_test.cc @@ -0,0 +1,80 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include + +#include +#include +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/interpreter.h" +#include "tensorflow/lite/java/src/main/native/op_resolver.h" +#include "tensorflow/lite/model.h" +#include "tensorflow/lite/model_builder.h" + +namespace tflite { +bool RunWithRandomInputs(const std::string& filename) { + std::unique_ptr model = + tflite::FlatBufferModel::BuildFromFile(filename.c_str()); + + // Build the interpreter + std::unique_ptr resolver = CreateOpResolver(); + std::unique_ptr interpreter; + if (tflite::InterpreterBuilder(*model, *resolver)(&interpreter) != + kTfLiteOk) { + LOG(ERROR) << "Could not initialize interpreter for TFLite model."; + return false; + } + + // Resize input tensors, if desired. + if (interpreter->AllocateTensors() != kTfLiteOk) { + LOG(ERROR) << "Could not allocate tensor."; + return false; + } + + // Fill the random data. + std::vector> sample; + for (int tensor_idx : interpreter->inputs()) { + auto tensor = interpreter->tensor(tensor_idx); + std::vector data(tensor->bytes); + for (auto it = data.begin(); it != data.end(); ++it) { + *it = random(); + } + tensor->data.raw = reinterpret_cast(data.data()); + sample.push_back(data); + } + + // Running inference. + if (interpreter->Invoke() != kTfLiteOk) { + LOG(ERROR) << "Failed to run the model."; + return false; + } + return true; +} + +TEST(SelectiveBuiltTest, AddModel) { + std::string model = "third_party/tensorflow/lite/testdata/add.bin"; + EXPECT_THAT(RunWithRandomInputs(model), true); +} + +TEST(SelectiveBuiltTest, LGTMModel) { + std::string model = "third_party/tensorflow/lite/testdata/lstm.bin"; + EXPECT_THAT(RunWithRandomInputs(model), true); +} +} // namespace tflite + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} From e4a48da690fac3443825b535ec02cb31c5625337 Mon Sep 17 00:00:00 2001 From: Jaesung Chung Date: Thu, 23 Jul 2020 04:52:08 -0700 Subject: [PATCH 1144/2522] Add new pattern matching rules for TFL Broadcastable ops, which have high dimenion inputs. - Low dimensions <= 4, for example, should avoid adding BroadcastTo op as usual, which means we need to lower broadcast-able ops as before without BroadcastTo op and lower the BroadcastTo op to the hardware accelerator supported ops as well. - This explicit BroadcastTo op needs to be inserted only when a higher dimension is needed, which will unlock the new opportunity. - There are the broadcast-able 20 TFLite ops (about 15 % of TFLite op set) as the followings: Comparison: LessEqual, GreaterEqual, NotEqual, Greater, Less, Equal (up to four dim.) Activation: PRelu (up to four dim.) Arithmetic: Add, Mul, Div, Sub, FloorDiv, FloorMod, Pow, Maximum, Minimum, SquaredDifference (up to four or five dim.) Dimension: SelectV2 (up to four dim.), BroadcastTo (supported via lowering) PiperOrigin-RevId: 322763761 Change-Id: I637e12f05ac8d4b9e61c1355d3bc00dc8ffe4756 --- tensorflow/compiler/mlir/lite/ir/tfl_ops.cc | 29 +-- .../compiler/mlir/lite/tests/legalize-tf.mlir | 183 ++++++++++++++++- .../mlir/lite/transforms/legalize_patterns.td | 2 +- .../mlir/lite/transforms/legalize_tf.cc | 186 +++++++++++++++++- tensorflow/lite/testing/op_tests/binary_op.py | 13 ++ tensorflow/lite/testing/op_tests/where.py | 10 + 6 files changed, 380 insertions(+), 43 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc index 427b9c692a7..99894ede5f0 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc @@ -147,18 +147,10 @@ bool IsI64Type(Type element_type) { bool VerifyAddOpShapeConstraints(AddOp op) { auto element_type = getElementTypeOrSelf(op.output().getType()); - // Allows F32, QI8, and QUI8 outputs when the operands have valid shapes, + // Allows F32, QI8, QUI8 and I32 outputs when the operands have valid shapes, // which are broadcastable shapes up to five dimension or have same shapes. if (element_type.isF32() || IsQI8Type(element_type) || - IsQUI8Type(element_type)) { - return VerifyOperandsHaveSameShapesOrBroadcastableShape( - /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, - /*max_bcast_rank=*/5); - } - - // Allows I32 output when the operands have valid shapes, which are - // broadcastable shapes up to four dimension or have same shapes. - if (IsI32Type(element_type)) { + IsQUI8Type(element_type) || IsI32Type(element_type)) { return VerifyOperandsHaveSameShapesOrBroadcastableShape( /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, /*max_bcast_rank=*/4); @@ -210,20 +202,13 @@ bool VerifyMulOpShapeConstraints(MulOp op) { } return VerifyOperandsHaveSameShapesOrBroadcastableShape( /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, - /*max_bcast_rank=*/5); + /*max_bcast_rank=*/4); } - // Allows F32 output when the operands have valid shapes, which are - // broadcastable shapes up to five dimension or have same shapes. - if (element_type.isF32()) { - return VerifyOperandsHaveSameShapesOrBroadcastableShape( - /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, - /*max_bcast_rank=*/5); - } - - // Allows I32 and QI16 outputs when the operands have valid shapes, which are - // broadcastable shapes up to four dimension or have same shapes. - if (IsI32Type(element_type) || IsQI16Type(element_type)) { + // Allows I32, QI16 and F32 outputs when the operands have valid shapes, which + // are broadcastable shapes up to four dimension or have same shapes. + if (IsI32Type(element_type) || IsQI16Type(element_type) || + element_type.isF32()) { return VerifyOperandsHaveSameShapesOrBroadcastableShape( /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, /*max_bcast_rank=*/4); diff --git a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir index 74a33817d32..a5174e7c438 100644 --- a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir @@ -25,13 +25,6 @@ func @testAddHighDimsHaveSameShape(%arg0: tensor<1x2x3x4x5x6x7x8xi32>, %arg1: te return %0 : tensor<1x2x3x4x5x6x7x8xi32> } -// CHECK-LABEL: testAddTooHighBroadcastableDims -func @testAddTooHighBroadcastableDims(%arg0: tensor<1x2x3x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // expected-error @+1 {{'tfl.add' op failed to verify that operand #0 and operand #1 have the same shape or broadcastable shapes within the rank 4}} - %0 = "tf.Add"(%arg0, %arg1) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - func @LeakyRelu(%arg0: tensor<1xf32>) -> tensor<1xf32> { %2 = "tf.LeakyRelu"(%arg0) {alpha = 0.1 : f32} : (tensor<1xf32>) -> tensor<1xf32> return %2: tensor<1xf32> @@ -1530,7 +1523,11 @@ func @select_v2_with_6d_broadcasting(%arg0: tensor<1x1x1x1x3x1xi1>, %arg1 : tens %0 = "tf.SelectV2"(%arg0, %arg1, %arg2): (tensor<1x1x1x1x3x1xi1>, tensor<1x1x1x1x1x4xf32>, tensor<1x1x1x2x1x1xf32>) -> tensor<1x1x1x2x3x4xf32> return %0 : tensor<1x1x1x2x3x4xf32> // CHECK-LABEL: select_v2_with_6d_broadcasting -// CHECK: "tf.SelectV2"(%arg0, %arg1, %arg2) +// CHECK: [[CST:%.*]] = constant dense<[1, 1, 1, 2, 3, 4]> : tensor<6xi64> +// CHECK: [[BCT:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) +// CHECK: [[BCT_0:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) +// CHECK: [[BCT_1:%.*]] = "tfl.broadcast_to"(%arg2, [[CST]]) +// CHECK: "tfl.select"([[BCT]], [[BCT_0]], [[BCT_1]]) } // ----- @@ -1540,7 +1537,9 @@ func @maximum_with_6d_broadcasting(%arg0: tensor<1x1x1x1x8x16xf32>, %arg1: tenso return %0 : tensor<1x1x1x1x8x16xf32> // CHECK-LABEL: maximum_with_6d_broadcasting -// CHECK: "tf.Maximum"(%arg0, %arg1) +// CHECK: [[CST:%.*]] = constant dense<[1, 1, 1, 1, 8, 16]> : tensor<6xi64> +// CHECK: [[BCT:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) +// CHECK: "tfl.maximum"(%arg0, [[BCT]]) } // ----- @@ -1549,5 +1548,169 @@ func @add_with_int32_5d_inputs(%arg0: tensor<1x1x1x3x1xi32>, %arg1 : tensor<1x1x %0 = "tf.Add"(%arg0, %arg1): (tensor<1x1x1x3x1xi32>, tensor<1x1x1x1x4xi32>) -> tensor<1x1x1x3x4xi32> return %0 : tensor<1x1x1x3x4xi32> // CHECK-LABEL: add_with_int32_5d_inputs -// CHECK: "tf.Add"(%arg0, %arg1) +// CHECK: [[CST:%.*]] = constant dense<[1, 1, 1, 3, 4]> : tensor<5xi64> +// CHECK: [[BCT:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) +// CHECK: [[BCT_0:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) +// CHECK: tfl.add [[BCT]], [[BCT_0]] +} + +// CHECK-LABEL: testAddWithBroadcastToOps +func @testAddWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: tfl.add [[BCAST]], [[BCAST_1]] {fused_activation_function = "NONE"} : tensor<1x2x3x4x5x6xi32> + %0 = "tf.Add"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + +// CHECK-LABEL: testSubWithBroadcastToOps +func @testSubWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: tfl.sub [[BCAST]], [[BCAST_1]] {fused_activation_function = "NONE"} : tensor<1x2x3x4x5x6xi32> + %0 = "tf.Sub"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + +// CHECK-LABEL: testMulWithBroadcastToOps +func @testMulWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: tfl.mul [[BCAST]], [[BCAST_1]] {fused_activation_function = "NONE"} : tensor<1x2x3x4x5x6xi32> + %0 = "tf.Mul"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + +// CHECK-LABEL: testDivWithBroadcastToOps +func @testDivWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: tfl.div [[BCAST]], [[BCAST_1]] {fused_activation_function = "NONE"} : tensor<1x2x3x4x5x6xi32> + %0 = "tf.Div"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + +// CHECK-LABEL: testFloorDivWithBroadcastToOps +func @testFloorDivWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: tfl.floor_div [[BCAST]], [[BCAST_1]] : tensor<1x2x3x4x5x6xi32> + %0 = "tf.FloorDiv"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + +// CHECK-LABEL: testFloorModWithBroadcastToOps +func @testFloorModWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: "tfl.floor_mod"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi32> + %0 = "tf.FloorMod"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + +// CHECK-LABEL: testPowWithBroadcastToOps +func @testPowWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: tfl.pow [[BCAST]], [[BCAST_1]] : tensor<1x2x3x4x5x6xi32> + %0 = "tf.Pow"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + +// CHECK-LABEL: testMaximumWithBroadcastToOps +func @testMaximumWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: "tfl.maximum"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi32> + %0 = "tf.Maximum"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + +// CHECK-LABEL: testMinimumWithBroadcastToOps +func @testMinimumWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: "tfl.minimum"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi32> + %0 = "tf.Minimum"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + +// CHECK-LABEL: testSelectV2WithBroadcastToOps +func @testSelectV2WithBroadcastToOps(%arg0: tensor<1x2x1x4x1x6xi1>, %arg1: tensor<1x2x3x4x1x1xi32>, %arg2: tensor<1x2x1x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: [[BCAST_2:%.*]] = "tfl.broadcast_to"(%arg2, [[CST]]) + // CHECK: "tfl.select"([[BCAST]], [[BCAST_1]], [[BCAST_2]]) + %0 = "tf.SelectV2"(%arg0, %arg1, %arg2) : (tensor<1x2x1x4x1x6xi1>, tensor<1x2x3x4x1x1xi32>, tensor<1x2x1x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + +// CHECK-LABEL: testLessEqualWithBroadcastToOps +func @testLessEqualWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: "tfl.less_equal"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> + %0 = "tf.LessEqual"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> + return %0 : tensor<1x2x3x4x5x6xi1> +} + +// CHECK-LABEL: testGreaterEqualWithBroadcastToOps +func @testGreaterEqualWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: "tfl.greater_equal"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> + %0 = "tf.GreaterEqual"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> + return %0 : tensor<1x2x3x4x5x6xi1> +} + +// CHECK-LABEL: testEqualWithBroadcastToOps +func @testEqualWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: "tfl.equal"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> + %0 = "tf.Equal"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> + return %0 : tensor<1x2x3x4x5x6xi1> +} + +// CHECK-LABEL: testNotEqualWithBroadcastToOps +func @testNotEqualWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: "tfl.not_equal"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> + %0 = "tf.NotEqual"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> + return %0 : tensor<1x2x3x4x5x6xi1> +} + +// CHECK-LABEL: testLessWithBroadcastToOps +func @testLessWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: "tfl.less"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> + %0 = "tf.Less"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> + return %0 : tensor<1x2x3x4x5x6xi1> +} + +// CHECK-LABEL: testGreaterWithBroadcastToOps +func @testGreaterWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { + // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> + // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) + // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) + // CHECK: "tfl.greater"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> + %0 = "tf.Greater"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> + return %0 : tensor<1x2x3x4x5x6xi1> } diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td index a6adb8f4a61..235a5c65e96 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td @@ -256,7 +256,7 @@ def LegalizeAddv2 : Pat<(TF_AddV2Op $lhs, $rhs), (TFL_AddOp $lhs, $rhs, TFL_AF_None)>; def LegalizeBiasAdd : Pat< (TF_BiasAddOp F32Tensor:$l, F32Tensor:$r, IsDataFormatNHWC:$data_format), - (TFL_AddOp $l, $r, TFL_AF_None)>; + (TF_AddV2Op $l, $r)>; def LegalizeSub : Pat<(TF_SubOp $lhs, $rhs), (TFL_SubOp $lhs, $rhs, TFL_AF_None)>; def LegalizeMul : Pat<(TF_MulOp $lhs, $rhs), diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc index 7d6866dc570..2f8370e2b96 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc @@ -631,6 +631,156 @@ struct LegalizeUnidirectionalSequenceRnn : public RewritePattern { } }; +// Put two TFL BroadcastTo ops in front of the given TF binary broadcast op to +// to make binary broadcast-able op conversion always successful and does not +// require flex delegate. +template +class ApplyExplicitBroadcasting : public OpRewritePattern { + public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(SourceOp src_op, + PatternRewriter& rewriter) const override { + Operation* op = static_cast(src_op); + auto lhs = op->getOperand(0); + auto rhs = op->getOperand(1); + + // Should have static shapes to calculate the broadcasted shape. + if (!lhs.getType().cast().hasStaticShape() || + !rhs.getType().cast().hasStaticShape()) { + return failure(); + } + + // Calculate the broadcasted shape. + SmallVector result_shape; + if (!OpTrait::util::getBroadcastedShape( + lhs.getType().cast().getShape(), + rhs.getType().cast().getShape(), result_shape)) { + return failure(); + } + + RankedTensorType result_type = RankedTensorType::get( + result_shape, getElementTypeOrSelf(op->getResult(0).getType())); + + // Create a const op, that stores the above broadcasted shape. + auto new_shape_attr = mlir::DenseIntElementsAttr::get( + RankedTensorType::get(result_shape.size(), rewriter.getIntegerType(64)), + result_shape); + auto new_shape = rewriter.create(op->getLoc(), new_shape_attr); + + // Apply BroadcastTo ops to each input. + auto broadcast_type = RankedTensorType::get( + result_shape, getElementTypeOrSelf(lhs.getType())); + + if (result_type.getShape() != lhs.getType().cast().getShape()) { + lhs = rewriter + .create(op->getLoc(), broadcast_type, lhs, + new_shape) + .output(); + } + if (result_type.getShape() != rhs.getType().cast().getShape()) { + rhs = rewriter + .create(op->getLoc(), broadcast_type, rhs, + new_shape) + .output(); + } + + // Recreate an op with the above Broadcast op results. + rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs); + return success(); + } +}; + +// This specialization is for TF SelectV2 op. SelectV2 op have three inputs and +// they should have broadcastable shapes. +template <> +class ApplyExplicitBroadcasting + : public OpRewritePattern { + public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(TF::SelectV2Op src_op, + PatternRewriter& rewriter) const override { + Operation* op = static_cast(src_op); + auto cond = op->getOperand(0); + auto lhs = op->getOperand(1); + auto rhs = op->getOperand(2); + + // Should have static shapes to calculate the broadcasted shape. + if (!lhs.getType().cast().hasStaticShape() || + !rhs.getType().cast().hasStaticShape() || + !cond.getType().cast().hasStaticShape()) { + return failure(); + } + + // Calculate the broadcasted shape. + SmallVector broadcasted_shape; + if (!OpTrait::util::getBroadcastedShape( + lhs.getType().cast().getShape(), + rhs.getType().cast().getShape(), broadcasted_shape)) { + return failure(); + } + + SmallVector result_shape; + if (!OpTrait::util::getBroadcastedShape( + broadcasted_shape, cond.getType().cast().getShape(), + result_shape)) { + return failure(); + } + + // Create a const op, that stores the above broadcasted shape. + auto shape_type = + RankedTensorType::get(result_shape.size(), rewriter.getIntegerType(64)); + auto new_shape_attr = + mlir::DenseIntElementsAttr::get(shape_type, result_shape); + auto new_shape = rewriter.create(op->getLoc(), new_shape_attr); + + // Apply BroadcastTo ops to each input. + auto cond_result_type = + RankedTensorType::get(result_shape, rewriter.getIntegerType(1)); + auto result_type = RankedTensorType::get( + result_shape, getElementTypeOrSelf(lhs.getType())); + + if (result_shape != cond.getType().cast().getShape()) { + cond = rewriter + .create(op->getLoc(), cond_result_type, + cond, new_shape) + .output(); + } + if (result_shape != lhs.getType().cast().getShape()) { + lhs = rewriter + .create(op->getLoc(), result_type, lhs, + new_shape) + .output(); + } + if (result_shape != rhs.getType().cast().getShape()) { + rhs = rewriter + .create(op->getLoc(), result_type, rhs, + new_shape) + .output(); + } + + // Recreate an op with the above Broadcast op results. + rewriter.replaceOpWithNewOp(op, result_type, cond, lhs, + rhs); + return success(); + } +}; + +void applyPatterns(FuncOp func, ConversionTarget& target, + const OwningRewritePatternList& patterns) { + // Keep trying to convert. + // TODO(karimnosseir): This is similar to what apply greedy patterns does. + // Look if there is a function that tries until it converge. + // Currently unit-test doesn't do multiple tries, so we need this. + const int max_iterations = 15; + for (int i = 0; i < max_iterations; ++i) { + if (failed(applyPartialConversion(func, target, patterns))) { + return; + } + } +} + void LegalizeTF::runOnFunction() { OwningRewritePatternList patterns; auto* context = &getContext(); @@ -681,16 +831,32 @@ void LegalizeTF::runOnFunction() { return success(current_thread_id == llvm::get_threadid()); }); - // Keep trying to convert. - // TODO(karimnosseir): This is similar to what apply greedy patterns does. - // Look if there is a function that tries until it converge. - // Currently unit-test doesn't do multiple tries, so we need this. - const int max_iterations = 15; - for (int i = 0; i < max_iterations; ++i) { - if (failed(applyPartialConversion(func, target, patterns))) { - return; - } - } + applyPatterns(func, target, patterns); + + // Explict BroadcastTo addition for left-over broadcast-able ops. + // The following pattern matchings should be done after the other legalization + // rules in order not to add unnecessary BroadcastTo ops. + patterns.insert, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting>(context); + + applyPatterns(func, target, patterns); } } // namespace diff --git a/tensorflow/lite/testing/op_tests/binary_op.py b/tensorflow/lite/testing/op_tests/binary_op.py index 17ed2f3522d..936563cc63d 100644 --- a/tensorflow/lite/testing/op_tests/binary_op.py +++ b/tensorflow/lite/testing/op_tests/binary_op.py @@ -178,6 +178,19 @@ def make_binary_op_tests(options, }, ] + # High dimension broadcasting support in MLIR converter. + if options.use_experimental_converter: + test_parameters = test_parameters + [ + { + "dtype": [tf.float32], + "input_shape_1": [[8, 7, 6, 5, 4, 3, 2, 1]], + "input_shape_2": [[4, 3, 2, 1]], + "activation": [False], + "fully_quantize": [False], + "dynamic_range_quantize": [False], + }, + ] + # test_parameters include fully_quantize option only when # allow_fully_quantize is True. if not allow_fully_quantize: diff --git a/tensorflow/lite/testing/op_tests/where.py b/tensorflow/lite/testing/op_tests/where.py index 49802422e3f..df91e195820 100644 --- a/tensorflow/lite/testing/op_tests/where.py +++ b/tensorflow/lite/testing/op_tests/where.py @@ -35,6 +35,16 @@ def make_where_tests(options): }, ] + # High dimension broadcasting support in MLIR converter. + if options.use_experimental_converter: + test_parameters = test_parameters + [ + { + "input_dtype": [tf.float32, tf.int32], + "input_shape_set": [([8, 7, 6, 5, 4, 3, 2, 1], [4, 3, 2, 1]),], + "use_where_v2": [True], + }, + ] + def build_graph(parameters): """Build the where op testing graph.""" input_value1 = tf.compat.v1.placeholder( From a3250240d411ad44932994a28c26e475a27fe880 Mon Sep 17 00:00:00 2001 From: Frederic Bastien Date: Wed, 22 Jul 2020 07:36:37 -0700 Subject: [PATCH 1145/2522] [XLA] Extend the Algebraic Simplier to convert Pow(x, 3) -> x*x*x. This is faster. --- .../xla/service/algebraic_simplifier.cc | 11 ++++++++ .../xla/service/algebraic_simplifier_test.cc | 26 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index db1c86d9fe3..051ed8125aa 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -3088,6 +3088,17 @@ Status AlgebraicSimplifierVisitor::HandlePower(HloInstruction* power) { HloOpcode::kMultiply, lhs, lhs)); } + // Pow(A, 3) is used in GELU. + VLOG(10) << "trying transform [pow(A, 3) => A*A*A]: " << power->ToString(); + if (IsAll(rhs, 3)) { + HloInstruction * tmp = computation_->AddInstruction( + HloInstruction::CreateBinary( + power->shape(), HloOpcode::kMultiply, lhs, lhs)); + return ReplaceWithNewInstruction( + power, HloInstruction::CreateBinary(power->shape(), + HloOpcode::kMultiply, lhs, tmp)); + } + VLOG(10) << "trying transform [pow(A, -1) => 1/A]: " << power->ToString(); if (IsAll(rhs, -1)) { return ReplaceWithNewInstruction( diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index ea21c7c1d21..6a43fc8cccd 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -1568,6 +1568,32 @@ TEST_F(AlgebraicSimplifierTest, Pow2) { GmockMatch(m::Multiply(m::Parameter(0), m::Parameter(0)))); } +// Test that pow(A, 3) is simplified to A*A*A. +TEST_F(AlgebraicSimplifierTest, Pow3) { + auto m = CreateNewVerifiedModule(); + Shape r0f32 = ShapeUtil::MakeShape(F32, {}); + HloComputation::Builder builder(TestName()); + HloInstruction* param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, r0f32, "param0")); + HloInstruction* three = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0(3))); + builder.AddInstruction( + HloInstruction::CreateBinary(r0f32, HloOpcode::kPower, param0, three)); + + auto computation = m->AddEntryComputation(builder.Build()); + + EXPECT_THAT(computation->root_instruction(), + GmockMatch(m::Power(m::Parameter(0), m::Op().Is(three)))); + + AlgebraicSimplifier simplifier(default_options_); + ASSERT_TRUE(simplifier.Run(m.get()).ValueOrDie()); + + EXPECT_THAT(computation->root_instruction(), + GmockMatch(m::Multiply(m::Parameter(0), + m::Multiply(m::Parameter(0), + m::Parameter(0))))); +} + // Test that pow(A, -1) is simplified to 1/A. TEST_F(AlgebraicSimplifierTest, PowNegative1) { auto m = CreateNewVerifiedModule(); From dfe6a8ea3725f57be1eaffc2d38c55c18cbd287a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 23 Jul 2020 06:25:46 -0700 Subject: [PATCH 1146/2522] Support legalizing TF.XlaConv op in the TFLite converter XlaConv is a TF op with the same semantics as the Conv HLO in XLA. This change adds logic for legalizing it via existing passes as well as sets up the infrastructure for legalizing other HLO equivalent TF ops. PiperOrigin-RevId: 322773729 Change-Id: I3d16cf509f7ab80979893391745d18137495a48b --- tensorflow/compiler/mlir/lite/BUILD | 4 +++ .../compiler/mlir/lite/tests/prepare-tf.mlir | 19 +++++++++++++ .../mlir/lite/transforms/prepare_tf.cc | 27 +++++++++++++++++++ .../tensorflow/transforms/legalize_hlo.cc | 12 ++++++--- .../mlir/tensorflow/transforms/passes.h | 6 +++++ .../xla/transforms/legalize_tf_with_tf2xla.cc | 26 ++++++++++++------ .../compiler/mlir/xla/transforms/passes.h | 7 +++++ 7 files changed, 90 insertions(+), 11 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD index 8a9704e21a8..46c1d6f533d 100644 --- a/tensorflow/compiler/mlir/lite/BUILD +++ b/tensorflow/compiler/mlir/lite/BUILD @@ -375,13 +375,17 @@ cc_library( ":tftext_utils", ":validators", "//tensorflow/compiler/mlir:op_or_arg_name_mapper", + "//tensorflow/compiler/mlir/hlo", "//tensorflow/compiler/mlir/lite/quantization:quantization_lib", "//tensorflow/compiler/mlir/tensorflow", "//tensorflow/compiler/mlir/tensorflow:convert_tensor", "//tensorflow/compiler/mlir/tensorflow:mangling_util", "//tensorflow/compiler/mlir/tensorflow:tensorflow_attributes", "//tensorflow/compiler/mlir/tensorflow:tensorflow_types", + "//tensorflow/compiler/mlir/tensorflow:tf_legalize_hlo", "//tensorflow/compiler/mlir/tensorflow:unroll_batch_matmul_pass", + "//tensorflow/compiler/mlir/xla:xla_legalize_tf", + "//tensorflow/compiler/mlir/xla:xla_legalize_tf_with_tf2xla", "//tensorflow/compiler/xla:status", "//tensorflow/compiler/xla:statusor", "//tensorflow/core:framework", diff --git a/tensorflow/compiler/mlir/lite/tests/prepare-tf.mlir b/tensorflow/compiler/mlir/lite/tests/prepare-tf.mlir index e8cbcc8d3b3..c82c557d22d 100644 --- a/tensorflow/compiler/mlir/lite/tests/prepare-tf.mlir +++ b/tensorflow/compiler/mlir/lite/tests/prepare-tf.mlir @@ -1,5 +1,7 @@ // RUN: tf-opt -tfl-prepare-tf %s | FileCheck %s +module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, producer = 268 : i32}} { + func @conv(tensor<256x32x32x3xf32>, tensor<3x3x3x16xf32>, tensor<256x3x32x32xf32>) -> (tensor<256x30x30x16xf32>, tensor<256x16x30x30xf32>, tensor<256x30x30x16xf32>, tensor<256x30x30x16xf32>, tensor<256x30x30x16xf32>) { ^bb0(%arg0: tensor<256x32x32x3xf32>, %arg1: tensor<3x3x3x16xf32>, %arg2: tensor<256x3x32x32xf32>) : // OK @@ -645,3 +647,20 @@ func @broadcast_to_with_unknown_shape_and_output(%arg0: tensor<1x2x3x4x5x6xf32>, // CHECK-LABEL: broadcast_to_with_unknown_shape_and_output // CHECK: "tf.BroadcastTo"(%arg0, %arg1) } + +// CHECK-LABEL: xla_conv +func @xla_conv(%arg0: tensor<4x8x8x16xf32>) -> tensor<4x8x8x16xf32> { + %0 = "tf.Const"() {value = dense<1.000000e+00> : tensor<3x3x16x16xf32>} : () -> tensor<3x3x16x16xf32> loc("Const_1") + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor loc("XlaConv/feature_group_count") + %2 = "tf.Const"() {value = dense<1> : tensor<2x2xi32>} : () -> tensor<2x2xi32> loc("XlaConv/padding") + %3 = "tf.Const"() {value = dense<1> : tensor<2xi32>} : () -> tensor<2xi32> loc("XlaConv/window_strides") + %4 = "tf.XlaConv"(%arg0, %0, %3, %2, %3, %3, %1) {device = "", dimension_numbers = "\18\02 \032\02\00\01@\03P\03Z\02\01\02b\02\01\02", precision_config = ""} : (tensor<4x8x8x16xf32>, tensor<3x3x16x16xf32>, tensor<2xi32>, tensor<2x2xi32>, tensor<2xi32>, tensor<2xi32>, tensor) -> tensor<4x8x8x16xf32> + return %4 : tensor<4x8x8x16xf32> + // CHECK: %[[CST:.*]] = constant dense<0.000000e+00> : tensor<16xf32> + // CHECK: %[[CST0:.*]] = constant dense<1.000000e+00> : tensor<16x3x3x16xf32> + // CHECK: %[[RES:.*]] = "tfl.conv_2d"(%arg0, %[[CST0]], %[[CST]]) {dilation_h_factor = 1 : i32, dilation_w_factor = 1 : i32, fused_activation_function = "NONE", padding = "SAME", stride_h = 1 : i32, stride_w = 1 : i32} : (tensor<4x8x8x16xf32>, tensor<16x3x3x16xf32>, tensor<16xf32>) -> tensor<4x8x8x16xf32> + // CHECK: return %[[RES]] +} + +} + diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc index 9a883a3790e..65500d896c5 100644 --- a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc @@ -41,7 +41,9 @@ limitations under the License. #include "mlir/Analysis/LoopAnalysis.h" // from @llvm-project #include "mlir/Dialect/Quant/FakeQuantSupport.h" // from @llvm-project #include "mlir/Dialect/Quant/UniformSupport.h" // from @llvm-project +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project #include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/Function.h" // from @llvm-project #include "mlir/IR/MLIRContext.h" // from @llvm-project #include "mlir/IR/PatternMatch.h" // from @llvm-project #include "mlir/IR/StandardTypes.h" // from @llvm-project @@ -49,6 +51,7 @@ limitations under the License. #include "mlir/Support/LLVM.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project #include "mlir/Transforms/DialectConversion.h" // from @llvm-project +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h" #include "tensorflow/compiler/mlir/lite/quantization/quantization_utils.h" #include "tensorflow/compiler/mlir/lite/transforms/dilated_conv.h" @@ -58,7 +61,9 @@ limitations under the License. #include "tensorflow/compiler/mlir/lite/utils/validators.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/einsum.h" +#include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/unroll_batch_matmul.h" +#include "tensorflow/compiler/mlir/xla/transforms/passes.h" #define DEBUG_TYPE "tf-tfl-legalization" @@ -737,6 +742,23 @@ LogicalResult ValidateOp(Operation *op) { return failure(has_illegal_ops); } +// Converts a set of TF2XLA ops into pure TF ops for future legalizations as +// TF2XLA ops aren't supported by later stages. +LogicalResult ConvertTf2XlaOps(FuncOp func, MLIRContext *context) { + ConversionTarget target(*context); + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalOp(); + target.addLegalOp(); + target.addIllegalOp(); + + OwningRewritePatternList patterns; + mhlo::PopulateLegalizeTfWithTf2XlaPatterns("XLA_CPU_JIT", patterns); + TF::PopulateLegalizeHloToTfPatterns(&patterns, context); + + return applyPartialConversion(func, target, patterns); +} + void PrepareTFPass::runOnFunction() { OwningRewritePatternList patterns; auto func = getFunction(); @@ -752,6 +774,11 @@ void PrepareTFPass::runOnFunction() { return; } + if (failed(ConvertTf2XlaOps(func, ctx))) { + signalPassFailure(); + return; + } + // This pattern was intented to uses TFL QDQs to preserve the quantization // parameters from the TF Quant ops, thus this pattern should run with the // first `applyPatternsGreedily` method, which would otherwise removes the diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo.cc b/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo.cc index c263dcc75d1..ad241ef9488 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo.cc @@ -41,6 +41,7 @@ limitations under the License. #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h" #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" #include "tensorflow/core/framework/kernel_shape_util.h" namespace mlir { @@ -744,9 +745,7 @@ void LegalizeHloToTf::runOnFunction() { // Add legalization patterns to the list. OwningRewritePatternList patterns; - populateWithGenerated(&context, &patterns); - patterns.insert(&context); + PopulateLegalizeHloToTfPatterns(&patterns, &context); ConversionTarget target(context); target.addLegalDialect(); @@ -762,6 +761,13 @@ static PassRegistration pass( } // end namespace +void PopulateLegalizeHloToTfPatterns(OwningRewritePatternList *patterns, + MLIRContext *context) { + populateWithGenerated(context, patterns); + patterns->insert(context); +} + std::unique_ptr> CreateLegalizeHloToTfPass() { return std::make_unique(); } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h index 8e8cb929f8e..f5ca47d7455 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h +++ b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h @@ -18,6 +18,8 @@ limitations under the License. #include +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/PatternMatch.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project namespace mlir { @@ -148,6 +150,10 @@ CreateTensorArrayOpsDecompositionPass(); // Create a pass that legalize HLO to TF dialect. std::unique_ptr> CreateLegalizeHloToTfPass(); +// Addds the HLO to TF rewrite patterns to the specified pattern list. +void PopulateLegalizeHloToTfPatterns(OwningRewritePatternList* patterns, + MLIRContext* context); + // Matches sequence of ops to TensorFlow fused kernels. This pass should not be // generally used beyond exporting to runtimes that supports these ops. In the // future these fusions may be codegen'd automatically. diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc index c9ccfe90535..34e12d3300e 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc @@ -24,6 +24,7 @@ limitations under the License. #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Diagnostics.h" // from @llvm-project @@ -223,18 +224,20 @@ static std::unique_ptr CreateDeviceMgr( class Tf2XlaRewriter { public: - static LogicalResult RewriteOp(Operation* op, OpBuilder& builder, + static LogicalResult RewriteOp(Operation* op, PatternRewriter& rewriter, const std::string& device_type) { - Tf2XlaRewriter rewriter(op, builder, device_type); - return rewriter.LegalizeOp(); + Tf2XlaRewriter tf2xla_rewriter(op, rewriter, device_type); + return tf2xla_rewriter.LegalizeOp(); } private: - Tf2XlaRewriter(Operation* op, OpBuilder builder, + Tf2XlaRewriter(Operation* op, PatternRewriter& rewriter, const std::string& device_type) : op_(op), device_type_(device_type), - hlo_builder_(op->getName().getStringRef().str(), builder, op->getLoc()), + rewriter_(rewriter), + hlo_builder_(op->getName().getStringRef().str(), rewriter_, + op->getLoc()), context_(nullptr) {} ~Tf2XlaRewriter() { @@ -259,6 +262,7 @@ class Tf2XlaRewriter { Operation* op_; std::string device_type_; + PatternRewriter& rewriter_; ::xla::MlirHloBuilder hlo_builder_; tensorflow::OpOrArgLocNameMapper name_mapper_; @@ -429,6 +433,8 @@ LogicalResult Tf2XlaRewriter::LegalizeOp() { // Replace uses of old results using the corresponding value after the // lowering. + llvm::SmallVector values; + values.reserve(op_->getNumResults()); for (int i = 0, e = op_->getNumResults(); i < e; i++) { tensorflow::Tensor* output = op_context.mutable_output(i); const tensorflow::XlaExpression* expr = @@ -442,10 +448,9 @@ LogicalResult Tf2XlaRewriter::LegalizeOp() { value = hlo_builder_.create(value, old_result.getType()); } - old_result.replaceAllUsesWith(value); + values.push_back(value); } - - op_->erase(); + rewriter_.replaceOp(op_, values); return success(); } @@ -529,6 +534,11 @@ static PassRegistration pass( } // end namespace +void PopulateLegalizeTfWithTf2XlaPatterns(llvm::StringRef device_type, + OwningRewritePatternList& patterns) { + patterns.insert(device_type.str()); +} + std::unique_ptr> createLegalizeTfWithTf2XlaPass( llvm::StringRef device_type) { return std::make_unique(device_type); diff --git a/tensorflow/compiler/mlir/xla/transforms/passes.h b/tensorflow/compiler/mlir/xla/transforms/passes.h index bc261324055..b72c8d66e8a 100644 --- a/tensorflow/compiler/mlir/xla/transforms/passes.h +++ b/tensorflow/compiler/mlir/xla/transforms/passes.h @@ -18,6 +18,9 @@ limitations under the License. #include +#include "llvm/ADT/StringRef.h" +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/PatternMatch.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project namespace mlir { @@ -41,6 +44,10 @@ std::unique_ptr> createLegalizeTFPass( std::unique_ptr> createLegalizeTfWithTf2XlaPass( llvm::StringRef device_type); +/// Adds the TF to XLA via TF2XLA rewrite patterns to the pattern list. +void PopulateLegalizeTfWithTf2XlaPatterns(llvm::StringRef device_type, + OwningRewritePatternList& patterns); + /// Lowers from TF dialect's control flow to HLO dialect's control flow. std::unique_ptr> createLegalizeTFControlFlowPass(); From 1cdfc401e43b919efbeb02dff03c8a5aa2b57913 Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Thu, 23 Jul 2020 14:53:41 +0100 Subject: [PATCH 1147/2522] Added error when we have mismatch in inference types + tests. Change-Id: I8488748318be91f4367fc33aa8e7a01e4ff15729 --- tensorflow/lite/python/lite.py | 18 +++++--- tensorflow/lite/python/lite_v2_test.py | 60 ++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 5 deletions(-) diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index d134712d5cf..f154267ec94 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -202,8 +202,11 @@ class QuantizationMode(object): def is_post_training_integer_quantize(self): """Post training integer quantization.""" return (self.post_training_int8_no_float() or - self.post_training_int8_allow_float() or - self.post_training_int16x8_no_float() or + self.post_training_int8_allow_float()) + + def is_post_training_integer_quantize_16x8(self): + """Post training 16x8 integer quantization.""" + return (self.post_training_int16x8_no_float() or self.post_training_int16x8_allow_float()) def training_time_int8_allow_float(self): @@ -253,7 +256,8 @@ class QuantizationMode(object): def converter_flags(self, inference_ty=None, inference_input_ty=None): """Flags to the converter.""" - if self.is_post_training_integer_quantize(): + if self.is_post_training_integer_quantize() or \ + self.is_post_training_integer_quantize_16x8(): # The inference_input_type is for the quantizer, then we need to keep the # converter inference_input_type to float. inference_input_ty = constants.FLOAT @@ -558,13 +562,17 @@ class TFLiteConverterBaseV2(TFLiteConverterBase): # We only support integer types for post training integer quantization # as we have statistical information to quantize the input and output. if quant_mode.is_post_training_integer_quantize(): - all_types = default_types + [constants.INT8, constants.QUANTIZED_UINT8,\ - constants.INT16] + all_types = default_types + [constants.INT8, constants.QUANTIZED_UINT8] if self.inference_input_type not in all_types or \ self.inference_output_type not in all_types: all_types_names = ["tf." + t.name for t in all_types] raise ValueError("The inference_input_type and inference_output_type " "must be in {}.".format(all_types_names)) + elif quant_mode.is_post_training_integer_quantize_16x8(): + if self.inference_input_type != constants.INT16 or \ + self.inference_output_type != constants.INT16: + raise ValueError("The inference_input_type and inference_output_type " + "must be constants.INT16.") elif self.inference_input_type not in default_types or \ self.inference_output_type not in default_types: raise ValueError("The inference_input_type and inference_output_type " diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py index cdce0e8c6d9..8585bd8a1d4 100644 --- a/tensorflow/lite/python/lite_v2_test.py +++ b/tensorflow/lite/python/lite_v2_test.py @@ -256,6 +256,66 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): # Ensure that the quantized tflite model is smaller. self.assertLess(len(quantized_tflite_model), len(tflite_model)) + def testPostTrainingIntegerAllowFloatQuantizationINT16InputOutput(self): + func, calibration_gen = self._getCalibrationQuantizeModel() + + # Convert float model. + converter = lite.TFLiteConverterV2.from_concrete_functions([func]) + tflite_model = converter.convert() + self.assertTrue(tflite_model) + + # Post-training quantization 16x8 with float fallback allowed. + quantized_converter = lite.TFLiteConverterV2.from_concrete_functions([func]) + quantized_converter.optimizations = [lite.Optimize.DEFAULT] + quantized_converter.representative_dataset = calibration_gen + quantized_converter.target_spec.supported_ops = [ + lite.OpsSet.\ + EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8, + lite.OpsSet.TFLITE_BUILTINS + ] + inference_input_output_type = lite.constants.INT16 + quantized_converter.inference_input_type = inference_input_output_type + quantized_converter.inference_output_type = inference_input_output_type + quantized_tflite_model = quantized_converter.convert() + self.assertTrue(quantized_tflite_model) + + interpreter = Interpreter(model_content=quantized_tflite_model) + interpreter.allocate_tensors() + input_details = interpreter.get_input_details() + self.assertLen(input_details, 1) + self.assertEqual(inference_input_output_type.as_numpy_dtype, + input_details[0]['dtype']) + output_details = interpreter.get_output_details() + self.assertLen(output_details, 1) + self.assertEqual(inference_input_output_type.as_numpy_dtype, + output_details[0]['dtype']) + + # Ensure that the quantized tflite model is smaller. + self.assertLess(len(quantized_tflite_model), len(tflite_model)) + + def testPostTrainingIntegerQuant16x8MismatchInferenceParams(self): + # In this test we check that when we do 16x8 post-training + # quantization and set inference_input(output)_type to + # constants.INT8, we have an error. + func, calibration_gen = self._getCalibrationQuantizeModel() + + # Convert quantized model. + quantized_converter = lite.TFLiteConverterV2.from_concrete_functions([func]) + quantized_converter.optimizations = [lite.Optimize.DEFAULT] + quantized_converter.representative_dataset = calibration_gen + quantized_converter.target_spec.supported_ops = [ + lite.OpsSet.\ + EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8 + ] + + with self.assertRaises(ValueError) as error: + quantized_converter.inference_input_type = lite.constants.INT8 + quantized_converter.inference_output_type = lite.constants.INT8 + quantized_converter.convert() + self.assertEqual( + 'The inference_input_type and inference_output_type ' + 'must be constants.INT16.', str(error.exception)) + @parameterized.named_parameters( ('_DefaultFLOAT32InputOutput_UseTargetTypesFlag', lite.constants.FLOAT, False, False), From b806191a117990a479944b40ec7a4b79843287a2 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Thu, 23 Jul 2020 23:17:28 +0900 Subject: [PATCH 1148/2522] fix trivial typo --- RELEASE.md | 6 +++--- .../Dialect/mhlo/IR/infer_fusibility_op_interface.td | 2 +- tensorflow/core/framework/BUILD | 2 +- tensorflow/lite/delegates/utils/dummy_delegate/README.md | 4 ++-- tensorflow/lite/g3doc/convert/metadata.md | 2 +- tensorflow/lite/g3doc/performance/gpu.md | 2 +- .../lite/micro/tools/make/targets/xtensa_hifi/README.md | 2 +- tensorflow/lite/tools/benchmark/README.md | 4 ++-- tensorflow/lite/tools/delegates/README.md | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 509b6aa6c84..5f791b113fe 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -225,7 +225,7 @@ Coinciding with this change, new releases of [TensorFlow's Docker images](https: `Strategy.extended.update` and `Strategy.extended.update_non_slot`. * Experimental support for shape invariants has been enabled in `tf.function`. See the API docs for - `tf.autograph.experimental.set_loop_options` for additonal info. + `tf.autograph.experimental.set_loop_options` for additional info. * AutoGraph error messages now exclude frames corresponding to APIs internal to AutoGraph. * Improve shape inference for `tf.function` input arguments to unlock more @@ -308,7 +308,7 @@ Coinciding with this change, new releases of [TensorFlow's Docker images](https: also deterministic back-prop of bias-addition in Keras layers) to include when XLA JIT compilation is enabled. * Fix problem, when running on a CUDA GPU and when either environment - variable `TF_DETERMINSTIC_OPS` or environment variable + variable `TF_DETERMINISTIC_OPS` or environment variable `TF_CUDNN_DETERMINISTIC` is set to "true" or "1", in which some layer configurations led to an exception with the message "No algorithm worked!" @@ -372,7 +372,7 @@ TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support * `tf.debugging` * Add `tf.debugging.enable_check_numerics()` and `tf.debugging.disable_check_numerics()` to help debugging the root causes of issues involving infinities and `NaN`s. * `tf.distribute` - * Custom training loop support on TPUs and TPU pods is avaiable through `strategy.experimental_distribute_dataset`, `strategy.experimental_distribute_datasets_from_function`, `strategy.experimental_run_v2`, `strategy.reduce`. + * Custom training loop support on TPUs and TPU pods is available through `strategy.experimental_distribute_dataset`, `strategy.experimental_distribute_datasets_from_function`, `strategy.experimental_run_v2`, `strategy.reduce`. * Support for a global distribution strategy through `tf.distribute.experimental_set_strategy(),` in addition to `strategy.scope()`. * `TensorRT` * [TensorRT 6.0](https://developer.nvidia.com/tensorrt#tensorrt-whats-new) is now supported and enabled by default. This adds support for more TensorFlow ops including Conv3D, Conv3DBackpropInputV2, AvgPool3D, MaxPool3D, ResizeBilinear, and ResizeNearestNeighbor. In addition, the TensorFlow-TensorRT python conversion API is exported as `tf.experimental.tensorrt.Converter`. diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.td b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.td index eb2c1ba3ffe..f8e02d413e9 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.td +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.td @@ -140,7 +140,7 @@ def InferFusibilityOpInterface : OpInterface<"InferFusibilityOpInterface"> { Here the effective workload shape roughly represents the maximum parallelism can be used during the codegen stage. It's used to check the shape-compatibility of the operation. During fusion, we only - try to fuse shape-compatible ops for performace. + try to fuse shape-compatible ops for performance. For example, the effective workload shape of an elementwise op is its output shape, while the effective workload shape of a reduction op may be its operand shape. diff --git a/tensorflow/core/framework/BUILD b/tensorflow/core/framework/BUILD index 8de5f74823e..d6d604e072e 100644 --- a/tensorflow/core/framework/BUILD +++ b/tensorflow/core/framework/BUILD @@ -428,7 +428,7 @@ filegroup( ], ) -# Individual targets. These should be prefered over tensorflow/core:framework +# Individual targets. These should be preferred over tensorflow/core:framework # whenever possible. # This is redundant with the "tensorflow/core:framework" target. It's useful for diff --git a/tensorflow/lite/delegates/utils/dummy_delegate/README.md b/tensorflow/lite/delegates/utils/dummy_delegate/README.md index be3ccae8810..7e08f00e2b8 100644 --- a/tensorflow/lite/delegates/utils/dummy_delegate/README.md +++ b/tensorflow/lite/delegates/utils/dummy_delegate/README.md @@ -2,8 +2,8 @@ A dummy delegate implementation to illustrate * How to use [SimpleDelegateInterface and SimpleDelegateKernelInterface](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/utils/simple_delegate.h) - to faciliate a TFLite delgate creation. A more sophisticated example could be - refered to the [Flex delegate](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/delegates/flex) + to faciliate a TFLite delegate creation. A more sophisticated example could be + referred to the [Flex delegate](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/delegates/flex) * How to leverage the [delegate registrar](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/delegates) to plug in a delegate in TFLite benchmark and task evaluation tools. diff --git a/tensorflow/lite/g3doc/convert/metadata.md b/tensorflow/lite/g3doc/convert/metadata.md index 48345d2756a..4279e409416 100644 --- a/tensorflow/lite/g3doc/convert/metadata.md +++ b/tensorflow/lite/g3doc/convert/metadata.md @@ -389,7 +389,7 @@ is the minimum version of metadata parser (the Flatbuffers generated code) that can read the metadata Flatbuffers in full. The version is effectively the largest version number among the versions of all the fields populated and the smallest compatible version indicated by the file identifier. The minimum -necessary metadata parser version is automaticaly populated by the +necessary metadata parser version is automatically populated by the `MetadataPopulator` when the metadata is populated into a TFLite model. See the [metadata extractor](../guide/codegen.md#read-the-metadata-from-models) about how the minimum necessary metadata parser version is used. diff --git a/tensorflow/lite/g3doc/performance/gpu.md b/tensorflow/lite/g3doc/performance/gpu.md index b8f7c419e5b..3cea6febb21 100644 --- a/tensorflow/lite/g3doc/performance/gpu.md +++ b/tensorflow/lite/g3doc/performance/gpu.md @@ -93,7 +93,7 @@ target 'YourProjectName' -From TensorFlow Lite 2.1.0, GPU delegate is inlcuded in the `TensorFlowLiteC` +From TensorFlow Lite 2.1.0, GPU delegate is included in the `TensorFlowLiteC` pod. You can choose between `TensorFlowLiteC` and `TensorFlowLiteSwift` depending on the language. diff --git a/tensorflow/lite/micro/tools/make/targets/xtensa_hifi/README.md b/tensorflow/lite/micro/tools/make/targets/xtensa_hifi/README.md index fd606a7f96b..6c88ce394c5 100644 --- a/tensorflow/lite/micro/tools/make/targets/xtensa_hifi/README.md +++ b/tensorflow/lite/micro/tools/make/targets/xtensa_hifi/README.md @@ -28,7 +28,7 @@ tensorflow/lite/micro/kernels/xtensa_hifi/ A scratch memory allocation is needed for the HiFi optimized kernels. This allocation is currently done on stack and it's size can be controlled by -defining 'XTENSA_NNLIB_MAX_SCRATCH_SIZE' approproately in the file +defining 'XTENSA_NNLIB_MAX_SCRATCH_SIZE' appropriately in the file 'tensorflow/lite/micro/tools/make/ext_libs/xtensa_hifi_nn_library.inc The files containing the HiFi optimized NN kernels are present in this folder: diff --git a/tensorflow/lite/tools/benchmark/README.md b/tensorflow/lite/tools/benchmark/README.md index 413dd8c6181..cbc57ac0d46 100644 --- a/tensorflow/lite/tools/benchmark/README.md +++ b/tensorflow/lite/tools/benchmark/README.md @@ -82,9 +82,9 @@ Note when `use_legacy_nnapi` is selected, this parameter won't work. `/data/local/tmp/` and this benchmark tool will not correctly use NNAPI. * `nnapi_execution_preference`: `str` (default="") * `nnapi_execution_priority`: `str` (default="") \ - Note this requires Anroid 11+. + Note this requires Android 11+. * `nnapi_accelerator_name`: `str` (default="") \ - Note this requires Anroid 10+. + Note this requires Android 10+. * `disable_nnapi_cpu`: `bool` (default=false) * `nnapi_allow_fp16`: `bool` (default=false) diff --git a/tensorflow/lite/tools/delegates/README.md b/tensorflow/lite/tools/delegates/README.md index 26bf1bcd8fd..aa9d1969e1e 100644 --- a/tensorflow/lite/tools/delegates/README.md +++ b/tensorflow/lite/tools/delegates/README.md @@ -4,7 +4,7 @@ [A TFLite delegate registrar](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/tools/delegates/delegate_provider.h) is provided here. The registrar keeps a list of TFLite delegate providers, each of which defines a list parameters that could be initialized from commandline -argumenents and provides a TFLite delegate instance creation based on those +arguments and provides a TFLite delegate instance creation based on those parameters. This delegate registrar has been used in TFLite evaluation tools and the benchmark model tool. From 9ce9e779f1510f688771090527fcd45de41691ac Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Thu, 23 Jul 2020 08:04:42 -0700 Subject: [PATCH 1149/2522] [XLA:Python] Add has_side_effect to CustomCall bindings. Will fix https://github.com/google/jax/issues/3829 when incorporated into a jaxlib release. PiperOrigin-RevId: 322787456 Change-Id: If2ade6a15875c476d0e160b6ef17a4fb0b2d37fe --- tensorflow/compiler/xla/python/ops.cc | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/xla/python/ops.cc b/tensorflow/compiler/xla/python/ops.cc index 9362a367dfc..3ac4709b160 100644 --- a/tensorflow/compiler/xla/python/ops.cc +++ b/tensorflow/compiler/xla/python/ops.cc @@ -114,24 +114,26 @@ void BuildOpsSubmodule(py::module* m) { "CustomCall", [](XlaBuilder* builder, const py::bytes& call_target_name, absl::Span operands, const Shape& shape, - const py::bytes& opaque) -> XlaOp { - return CustomCall(builder, call_target_name, operands, shape, opaque); + const py::bytes& opaque, bool has_side_effect) -> XlaOp { + return CustomCall(builder, call_target_name, operands, shape, opaque, + has_side_effect); }, py::arg("builder"), py::arg("call_target_name"), py::arg("operands"), - py::arg("shape"), py::arg("opaque") = py::bytes("")); + py::arg("shape"), py::arg("opaque") = py::bytes(""), + py::arg("has_side_effect") = false); ops.def( "CustomCallWithLayout", [](XlaBuilder* builder, const py::bytes& call_target_name, absl::Span operands, const Shape& shape_with_layout, absl::Span operand_shapes_with_layout, - const py::bytes& opaque) -> XlaOp { - return CustomCallWithLayout(builder, call_target_name, operands, - shape_with_layout, - operand_shapes_with_layout, opaque); + const py::bytes& opaque, bool has_side_effect) -> XlaOp { + return CustomCallWithLayout( + builder, call_target_name, operands, shape_with_layout, + operand_shapes_with_layout, opaque, has_side_effect); }, py::arg("builder"), py::arg("call_target_name"), py::arg("operands"), py::arg("shape_with_layout"), py::arg("operand_shapes_with_layout"), - py::arg("opaque") = py::bytes("")); + py::arg("opaque") = py::bytes(""), py::arg("has_side_effect") = false); ops.def("Dot", &Dot, py::arg("lhs"), py::arg("rhs"), py::arg("precision_config") = nullptr); ops.def("DotGeneral", &DotGeneral, py::arg("lhs"), py::arg("rhs"), From 7074d7326ccb26404685fc84bb2d5ea66489bb1a Mon Sep 17 00:00:00 2001 From: Vignesh Kothapalli Date: Thu, 23 Jul 2020 21:22:37 +0530 Subject: [PATCH 1150/2522] refactored tests and added unsupported type test --- .../experimental/kernel_tests/unique_test.py | 125 +++++++++++------- 1 file changed, 79 insertions(+), 46 deletions(-) diff --git a/tensorflow/python/data/experimental/kernel_tests/unique_test.py b/tensorflow/python/data/experimental/kernel_tests/unique_test.py index 67b7fa08ad1..b3fd7936fc9 100644 --- a/tensorflow/python/data/experimental/kernel_tests/unique_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/unique_test.py @@ -78,60 +78,93 @@ class UniqueTest(test_base.DatasetTestBase, parameterized.TestCase): (["foo", "bar", "baz", "baz", "bar", "foo"], ["foo", "bar", "baz"]), ]) + def _checkDatasetRaises(self, dtype, test_cases, error): + """Test whether the dataset raises the appropriate errors + while generating the outputs. + + Args: + dtype: The actual `dtype` of the elements in each test case. + test_cases: A list of lists. The dataset will be created from the list items. + error: The expected error to be raised when a corrupted item in encountered. + """ + + current_test_case = [] + dataset = dataset_ops.Dataset.from_generator(lambda: current_test_case, + dtype).apply(unique.unique()) + + for test_case in test_cases: + current_test_case = test_case + with self.assertRaises(error): + _ = self.getDatasetOutput(dataset) + @combinations.generate(test_base.graph_only_combinations()) - def testTypeMismatch(self): + def testStringTypeMismatch(self): + """Should raise InternalError when element type doesn't match + with dtypes.string.""" - # Placeholder values are needed to fill in the expected array with dummy value so that, - # when the dataset generates the element and observes that there is a type mismatch, - # it raises the proper error and not an OutOfRangeError which occurs when it is unable - # to fetch an element to compare from the expected array in the first place. - string_placeholder = "" - int32_placeholder = 0 - int64_placeholder = 0 - - # raises InternalError when element type doesn't match with dtypes.string. - string_cases = [ - (["hello", 1, 2, 1], ["hello"]), - (["hello", "world", 1], ["hello", "world"]), - (["hello", "hello", "world", 1, 2], ["hello", "world"]), - (["hello", "world", 1, 1, 2], ["hello", "world"]), - # In the following cases, when the first element (i.e 1) of the dataset is generated, - # it validates the type and immediately raises the error. This is unlike the above cases, - # wherein the dtype of the starting elements are as expected to start with, - # and the dataset has to loop until it reaches the incorrect dtype element. - # Until then we need to make sure that data with correct type has to match - # for testing purposes. Similar logic applies to dtype.int32 and dtype.64 as well. - ([1, 2, "hello"], [string_placeholder]), - ([1, 1, 2, 3, 3, "hello"], [string_placeholder]), + test_cases = [ + ["hello", 1], + ["hello", "hello", "world", 3], + ["hello", 1, 1], + ["hello", "world", 1, 2], + [1, "hello"], + [1, 2, "hello"], + [1, 3, "hello", "world"], + [1, 1, "hello", "hello"] ] + self._checkDatasetRaises(dtype=dtypes.string, test_cases=test_cases, + error=errors.InternalError) - # handle each case independently so that an error raised by a single case doesn't interfere - # with the other ones. As per self._testSimpleHelper functionality. - for case in string_cases: - with self.assertRaises(errors.InternalError): - self._testSimpleHelper(dtypes.string, [case]) + @combinations.generate(test_base.graph_only_combinations()) + def testInt32TypeMismatch(self): + """Should raise InvalidArgumentError when element type doesn't + match with dtypes.int32""" - # raises InvalidArgumentError when element type doesn't match with dtypes.int32. - int32_cases = [ - ([1, "hello", "world"], [1]), - ([1, 2, 1, "hello", "hello", "world"], [1, 2]), - (["hello", 1, 2], [int32_placeholder]), - (["hello", 1, 1, 2, 3, 3], [int32_placeholder]), + test_cases = [ + [1, "foo"], + [1, 2, "bar"], + [1, 3, "foo", "bar"], + [1, 4, "foo", "foo"], + ["bar", 1], + ["bar", "foo", 2], + ["bar", "bar", "foo", 3], + ["foo", 1, 1], + ["bar", "bar", 1, 1], ] - for case in int32_cases: - with self.assertRaises(errors.InvalidArgumentError): - self._testSimpleHelper(dtypes.int32, [case]) + self._checkDatasetRaises(dtype=dtypes.int32, test_cases=test_cases, + error=errors.InvalidArgumentError) - # raises InvalidArgumentError when element type doesn't match with dtypes.int64. - int64_cases = [ - ([2, 3, "hello", "world"], [2, 3]), - ([2, 3, 3, "hello", "hello", "world"], [2, 3]), - (["hello", 2, 2], [int64_placeholder]), - (["hello", "hello", 1, 1, 2, 3], [int64_placeholder]), + @combinations.generate(test_base.graph_only_combinations()) + def testInt64TypeMismatch(self): + """Should raise InvalidArgumentError when element type doesn't + match with dtypes.int64.""" + + test_cases = [ + [2, "hello"], + [3, 2, "hello"], + [5, 3, "hello", "world"], + [6, 7, "hello", "hello"], + ["hello", 6], + ["hello", "world", 8], + ["hello", "hello", "world", 8], + ["hello", 9, 9], + ["hello", "world", 10, 10], ] - for case in int64_cases: - with self.assertRaises(errors.InvalidArgumentError): - self._testSimpleHelper(dtypes.int64, [case]) + self._checkDatasetRaises(dtype=dtypes.int64, test_cases=test_cases, + error=errors.InvalidArgumentError) + + @combinations.generate(test_base.graph_only_combinations()) + def testUnsupportedTypes(self): + """Should raise TypeError when element type doesn't match with the + dtypes.int64, dtypes.int32 or dtypes.string (supported types).""" + + sample_unsupported_types = [dtypes.bool, dtypes.double, dtypes.complex64, + dtypes.float32, dtypes.float64, dtypes.qint16, dtypes.qint32] + current_test_case = [] + for dtype in sample_unsupported_types: + with self.assertRaises(TypeError): + _ = dataset_ops.Dataset.from_generator(lambda: current_test_case, + dtype).apply(unique.unique()) if __name__ == "__main__": From 7b66d0397b5d70e73a71dad4001e0c82c34cacf6 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 23 Jul 2020 15:53:43 +0000 Subject: [PATCH 1151/2522] Fix messy formating in tf.linalg.inv's docstring This PR fixes the incorrect formating of tf.linalg.inv's docstring where the summary span into description. This PR fixes 41656. Signed-off-by: Yong Tang --- tensorflow/core/api_def/base_api/api_def_MatrixInverse.pbtxt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_MatrixInverse.pbtxt b/tensorflow/core/api_def/base_api/api_def_MatrixInverse.pbtxt index 25eca0c766b..16c6e0a9d34 100644 --- a/tensorflow/core/api_def/base_api/api_def_MatrixInverse.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_MatrixInverse.pbtxt @@ -16,9 +16,8 @@ Equivalent to np.linalg.inv @end_compatibility END } - summary: "Computes the inverse of one or more square invertible matrices or their" + summary: "Computes the inverse of one or more square invertible matrices or their adjoints (conjugate transposes)." description: < Date: Thu, 23 Jul 2020 09:18:01 -0700 Subject: [PATCH 1152/2522] Add support for lowering mhlo.iota to Linalg. PiperOrigin-RevId: 322799853 Change-Id: I77aa951ebbd707c54af7dd2d6b031b5f22f75178 --- .../mhlo/transforms/legalize_to_linalg.cc | 91 ++++++++++--------- .../hlo/tests/hlo-legalize-to-linalg.mlir | 15 +++ 2 files changed, 62 insertions(+), 44 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_linalg.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_linalg.cc index fd6a7617344..223baf420f9 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_linalg.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_linalg.cc @@ -640,25 +640,25 @@ class ReshapeOpConverter : public OpConversionPattern { } }; -class IotaConverter : public OpConversionPattern { +template +class IotaConverter : public OpConversionPattern { public: - using OpConversionPattern::OpConversionPattern; + using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite( - lmhlo::IotaOp iotaOp, ArrayRef args, + OpTy iotaOp, ArrayRef args, ConversionPatternRewriter& rewriter) const final { - auto resultMemrefType = - iotaOp.getOperand().getType().dyn_cast(); - if (!resultMemrefType) return failure(); + ShapedType resultShapedType = getHloOpResultType(iotaOp); + if (!resultShapedType) return failure(); - auto resultElementType = resultMemrefType.getElementType(); + auto resultElementType = resultShapedType.getElementType(); if (!resultElementType.isSignlessIntOrFloat()) return failure(); // Construct the indexing maps needed for linalg.generic ops. - unsigned nloops = resultMemrefType.getRank(); + unsigned nloops = resultShapedType.getRank(); - rewriter.create( - iotaOp.getLoc(), ArrayRef{}, args, + auto linalgOp = rewriter.create( + iotaOp.getLoc(), isLHLO ? ArrayRef{} : resultShapedType, args, 0, // args_in 1, // args_out llvm::makeArrayRef(rewriter.getMultiDimIdentityMap(nloops)), @@ -669,14 +669,16 @@ class IotaConverter : public OpConversionPattern { nestedLoc, ivs[iotaOp.iota_dimension().getZExtValue()], nestedBuilder.getIntegerType( resultElementType.getIntOrFloatBitWidth())); - if (resultElementType.isa()) { + if (resultElementType.template isa()) { castOp = nestedBuilder.create(nestedLoc, castOp, resultElementType); } nestedBuilder.create(nestedLoc, castOp); }); - - rewriter.replaceOp(iotaOp, llvm::None); + if (isLHLO) + rewriter.replaceOp(iotaOp, llvm::None); + else + rewriter.replaceOp(iotaOp, linalgOp.output_tensors()); return success(); } }; @@ -768,7 +770,7 @@ void populateLHLOToLinalgConversionPattern(MLIRContext* context, patterns->insert, ConstConverter, ConvToLinalgConverter, - IotaConverter, + IotaConverter, LhloBroadcastInDimConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, @@ -870,36 +872,37 @@ namespace mhlo { void populateHLOToLinalgConversionPattern(MLIRContext* context, OwningRewritePatternList* patterns) { - patterns->insert, - HloBroadcastInDimConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - PointwiseToLinalgConverter, - ReshapeOpConverter, - ReverseConverter, - TransposeConverter>(context); + patterns + ->insert, + HloBroadcastInDimConverter, IotaConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + PointwiseToLinalgConverter, + ReshapeOpConverter, + ReverseConverter, + TransposeConverter>(context); } std::unique_ptr> createLegalizeHloToLinalgPass() { diff --git a/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-linalg.mlir b/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-linalg.mlir index 320ce069ac0..db06708bce7 100644 --- a/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-linalg.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-linalg.mlir @@ -557,3 +557,18 @@ func @reverse(%input: tensor<2x3xf32>) -> tensor<2x3xf32> { } // CHECK: linalg.generic // CHECK-SAME: indexing_maps = [#[[OPERAND_MAP]], #[[RESULT_MAP]]] + +// ----- + +// CHECK: #[[RESULT_MAP:.*]] = affine_map<(d0, d1) -> (d0, d1)> +// CHECK-LABEL: func @iota +func @iota() -> tensor<7x10xf32> { + %result = "mhlo.iota"() {iota_dimension = 1 : i64} : () -> (tensor<7x10xf32>) + return %result : tensor<7x10xf32> +} +// CHECK: linalg.indexed_generic +// CHECK-SAME: indexing_maps = [#[[RESULT_MAP]]] +// CHECK-NEXT: ^bb0(%[[D0:.*]]: index, %[[D1:.*]]: index): +// CHECK-NEXT: %[[INT_CAST:.*]] = index_cast %[[D1]] : index to i32 +// CHECK-NEXT: %[[FLOAT_CAST:.*]] = sitofp %[[INT_CAST]] : i32 to f32 +// CHECK-NEXT: linalg.yield %[[FLOAT_CAST]] : f32 From 20ca4bac37b1f05065822a38c4be917427e30f66 Mon Sep 17 00:00:00 2001 From: RJ Skerry-Ryan Date: Thu, 23 Jul 2020 10:00:41 -0700 Subject: [PATCH 1153/2522] Allow CompositeTensors to be returned from functions run on TPU with TpuStrategy. TESTED: - extend composite input unit tests to verify that "flat" and non-"flat" outputs from functions run on TPU can return CompositeTensors. PiperOrigin-RevId: 322807866 Change-Id: I31ea844ac371f0e539bcad6c5d7a6abd9e14d398 --- .../python/distribute/tpu_strategy_test.py | 76 +++++++++++++++++-- tensorflow/python/tpu/tpu.py | 53 ++++++++----- 2 files changed, 105 insertions(+), 24 deletions(-) diff --git a/tensorflow/python/distribute/tpu_strategy_test.py b/tensorflow/python/distribute/tpu_strategy_test.py index 7fa927dde52..2f5be9c211d 100644 --- a/tensorflow/python/distribute/tpu_strategy_test.py +++ b/tensorflow/python/distribute/tpu_strategy_test.py @@ -58,6 +58,7 @@ from tensorflow.python.tpu import tpu_strategy_util from tensorflow.python.training import checkpoint_management from tensorflow.python.training import server_lib from tensorflow.python.training.tracking import util +from tensorflow.python.util import nest FLAGS = flags.FLAGS @@ -473,7 +474,7 @@ class TPUStrategyTest(test.TestCase, parameterized.TestCase): self.assertAllEqual("/job:localhost/replica:0/task:0/device:TPU:1", results[1].backing_device) - def test_composite_input(self, enable_packed_var): + def test_composite_input_output(self, enable_packed_var): strategy = get_tpu_strategy(enable_packed_var) if strategy.num_replicas_in_sync != 2: self.skipTest("Test assumes two replicas.") @@ -488,9 +489,12 @@ class TPUStrategyTest(test.TestCase, parameterized.TestCase): def tpu_function(sparse): # Assumes dense_shape is (2, *) looked_up = array_ops.gather(table, sparse.values) - return math_ops.unsorted_segment_sum(looked_up, sparse.indices[:, 0], 2) + segment_sum = math_ops.unsorted_segment_sum( + looked_up, sparse.indices[:, 0], 2) + return sparse, segment_sum - return strategy.experimental_local_results( + return nest.map_structure( + strategy.experimental_local_results, strategy.run(tpu_function, args=(next(iterator),))) def dataset_fn(_): @@ -511,9 +515,69 @@ class TPUStrategyTest(test.TestCase, parameterized.TestCase): distribute_lib.InputOptions( experimental_prefetch_to_device=False))) - result = sparse_lookup(dataset) - self.assertAllEqual(result, - [[[0.0, 1.0], [3.0, 8.0]], [[0.0, 1.0], [3.0, 8.0]]]) + sparse, result = sparse_lookup(dataset) + + # All replicas return identical reults. + for replica in range(strategy.num_replicas_in_sync): + self.assertIsInstance(sparse[replica], sparse_tensor.SparseTensor) + self.assertAllEqual(sparse[replica].indices, [[0, 0], [1, 0], [1, 1]]) + self.assertAllEqual(sparse[replica].values, [0, 0, 1]) + self.assertAllEqual(sparse[replica].dense_shape, [2, 2]) + self.assertAllEqual(result[replica], [[0.0, 1.0], [3.0, 8.0]]) + + def test_composite_input_non_flat_output(self, enable_packed_var): + strategy = get_tpu_strategy(enable_packed_var) + if strategy.num_replicas_in_sync != 2: + self.skipTest("Test assumes two replicas.") + + with strategy.scope(): + table = variables.Variable( + initial_value=[[0.0, 1.0], [3.0, 7.0]], dtype=dtypes.float32) + + @def_function.function + def sparse_lookup(iterator): + + def tpu_function(sparse): + # Assumes dense_shape is (2, *) + looked_up = array_ops.gather(table, sparse.values) + segment_sum = math_ops.unsorted_segment_sum( + looked_up, sparse.indices[:, 0], 2) + return {"sparse": sparse, "segment_sum": segment_sum} + + return nest.map_structure( + strategy.experimental_local_results, + strategy.run(tpu_function, args=(next(iterator),))) + + def dataset_fn(_): + dataset = dataset_ops.Dataset.range(2) + + def make_sparse(_): + return sparse_tensor.SparseTensor( + indices=array_ops.constant([[0, 0], [1, 0], [1, 1]], + dtype=dtypes.int64), + values=array_ops.constant([0, 0, 1], dtype=dtypes.int32), + dense_shape=array_ops.constant([2, 2], dtype=dtypes.int64)) + + return dataset.map(make_sparse) + + dataset = iter( + strategy.experimental_distribute_datasets_from_function( + dataset_fn, + distribute_lib.InputOptions( + experimental_prefetch_to_device=False))) + + output = sparse_lookup(dataset) + + # All replicas return identical reults. + for replica in range(strategy.num_replicas_in_sync): + self.assertIsInstance(output["sparse"][replica], + sparse_tensor.SparseTensor) + self.assertAllEqual(output["sparse"][replica].indices, + [[0, 0], [1, 0], [1, 1]]) + self.assertAllEqual(output["sparse"][replica].values, [0, 0, 1]) + self.assertAllEqual(output["sparse"][replica].dense_shape, [2, 2]) + self.assertAllEqual(output["segment_sum"][replica], + [[0.0, 1.0], [3.0, 8.0]]) def test_composite_input_dynamic_shapes_outside_compilation( self, enable_packed_var): diff --git a/tensorflow/python/tpu/tpu.py b/tensorflow/python/tpu/tpu.py index 97cb456f50f..5a2f7ba4454 100644 --- a/tensorflow/python/tpu/tpu.py +++ b/tensorflow/python/tpu/tpu.py @@ -50,7 +50,6 @@ from tensorflow.python.tpu import tpu_function from tensorflow.python.tpu.ops import tpu_ops from tensorflow.python.util import compat from tensorflow.python.util import nest -from tensorflow.python.util.compat import collections_abc from tensorflow.python.util.tf_export import tf_export ops.NotDifferentiable("TPUReplicatedInput") @@ -1227,7 +1226,7 @@ def split_compile_and_replicate(computation, nest.flatten(per_replica_input, expand_composites=True) for per_replica_input in inputs ] - # Mask parallel to one replicat's inputs with True for tensors coming from + # Mask parallel to one replica's inputs with True for tensors coming from # composites. is_composite = nest.flatten(nest.map_structure( lambda x: _flatten_and_filter_composite(x, False, True), inputs[0])) @@ -1412,9 +1411,11 @@ def split_compile_and_replicate(computation, outputs_is_flat = xla.is_flat(outputs) if outputs_is_flat: - output_tensors, control_deps = _postprocess_flat_outputs(outputs) + output_tensors, control_deps, pack_template = _postprocess_flat_outputs( + outputs) else: - output_tensors, control_deps = _postprocess_non_flat_outputs(outputs) + output_tensors, control_deps, pack_template = ( + _postprocess_non_flat_outputs(outputs)) # tensor_tracer imports tpu.py. Local import to tensor_tracer to avoid # import-cycle @@ -1473,11 +1474,10 @@ def split_compile_and_replicate(computation, array_ops.identity( ys[replica], name="output_%d_shard_%d" % (i, replica))) - if not outputs_is_flat: - replicated_outputs = [ - nest.pack_sequence_as(outputs, replica_outs) - for replica_outs in replicated_outputs - ] + replicated_outputs = [ + nest.pack_sequence_as(pack_template, replica_outs, expand_composites=True) + for replica_outs in replicated_outputs + ] return [compile_status, replicated_outputs] @@ -1489,7 +1489,9 @@ def _postprocess_flat_outputs(outputs): outputs: Output from `computation` inside `tpu.rewrite`. Returns: - Tensors and Operations extracted from outputs. + - Tensors extracted from outputs. + - Operations extracted from outputs. + - A pack template for use with nest.pack_sequence_as to pack the tensors. """ # Following code segment is to preserve legacy behavior. Previously we only # supported flat outputs and thus for consistency it was nice to convert even @@ -1500,9 +1502,17 @@ def _postprocess_flat_outputs(outputs): # If the computation returns `None`, make it an empty tuple. if outputs is None: outputs = tuple() - # If the computation only returned one value, makes it a tuple. - if not isinstance(outputs, collections_abc.Sequence): - outputs = (outputs,) + + # For legacy / backwards compatibility reasons we return a list for "flat" + # output values (even if the user's flat return value was a different type or + # even just a scalar value) so use nest.flatten to compute a flat list pack + # template. + pack_template = nest.flatten(outputs, expand_composites=False) + + # Even though outputs is already "flat", we flatten any composites so their + # component tensors can be tagged and replicated. The pack_template will be + # used by the caller to repack the composite tensors. + outputs = nest.flatten(outputs, expand_composites=True) # Append `no_op` here so that fetching any return value of this function # will trigger TPUExecute node. @@ -1527,6 +1537,11 @@ def _postprocess_flat_outputs(outputs): "TPU functions must return zero-or more Tensor values followed by " "zero or more Operations.") + # Trim operations off the end of the pack template. output_operations has 1 + # extra element due to the no-op that is added. + if len(output_operations) > 1: + pack_template = pack_template[:1 - len(output_operations)] + # Wraps outputs in Identity ops. Otherwise a replicated input copied # straight to an output would bypass the replicate(). This would be bad # because the TPUReplicatedInput/TPUReplicatedOutput operator would not @@ -1540,7 +1555,7 @@ def _postprocess_flat_outputs(outputs): o.op._set_attr("_tpu_output_identity", attr_value_pb2.AttrValue(b=True)) # pylint: enable=protected-access new_output_tensors.append(o) - return new_output_tensors, output_operations + return new_output_tensors, output_operations, pack_template def _postprocess_non_flat_outputs(outputs): @@ -1550,12 +1565,14 @@ def _postprocess_non_flat_outputs(outputs): outputs: Output from `computation` inside `tpu.rewrite`. Returns: - Tensors extracted from outputs and an empty list because Operations are not - allowed in non-flat outputs.. + - Tensors extracted from outputs. + - An empty Operations list because Operations are not allowed in non-flat + outputs. + - A pack template for use with nest.pack_sequence_as to pack the tensors. """ # Flatten output items. - flat_outputs = nest.flatten(outputs) + flat_outputs = nest.flatten(outputs, expand_composites=True) # Convert all non-Operation outputs to Tensors. for i, o in enumerate(flat_outputs): @@ -1586,7 +1603,7 @@ def _postprocess_non_flat_outputs(outputs): flat_outputs[i] = array_ops.identity(o) # All flat_outputs are Tensors, and no Operations. - return flat_outputs, [] + return flat_outputs, [], outputs def split_compile_and_shard(computation, From 1ab0b9388675eaa5b280ca0ab1e4754fff96bb8b Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Thu, 23 Jul 2020 10:06:16 -0700 Subject: [PATCH 1154/2522] [tf.data] minor fix PiperOrigin-RevId: 322809155 Change-Id: I5489a021e2904ac0573bfdd5c79721e71486eaf1 --- tensorflow/core/grappler/optimizers/data/auto_shard.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/data/auto_shard.cc b/tensorflow/core/grappler/optimizers/data/auto_shard.cc index 535938d4cf1..4ad9cec4fe4 100644 --- a/tensorflow/core/grappler/optimizers/data/auto_shard.cc +++ b/tensorflow/core/grappler/optimizers/data/auto_shard.cc @@ -68,16 +68,16 @@ constexpr std::array kPassThroughOps = { "_Retval", "AssertNextDataset", "BatchDataset", + "CacheDataset", "ExperimentalMapAndBatchDataset", "ExperimentalRebatchDataset", - "PaddedBatchDataset", - "CacheDataset", "FilterDataset", "Identity", "MapAndBatchDataset", "MapDataset", "ModelDataset", "OptimizeDataset", + "PaddedBatchDataset", "ParallelMapDataset", "ParseExampleDataset", "PrefetchDataset", From 5cf9a5de8d0f3ba047aa42b6c7073bad476a2b7b Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Thu, 23 Jul 2020 10:20:17 -0700 Subject: [PATCH 1155/2522] Remove an unused proto include PiperOrigin-RevId: 322812216 Change-Id: Ic6ac2a5df5cb63627801c623533cd779504cba16 --- tensorflow/core/data/service/common.proto | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/data/service/common.proto b/tensorflow/core/data/service/common.proto index 4bde56fe1ca..b9a6f72ddf4 100644 --- a/tensorflow/core/data/service/common.proto +++ b/tensorflow/core/data/service/common.proto @@ -3,7 +3,6 @@ syntax = "proto3"; package tensorflow.data; import "tensorflow/core/framework/graph.proto"; -import "tensorflow/core/framework/types.proto"; message DatasetDef { // We represent datasets as tensorflow GraphDefs which define the operations From bc9809f15acfd82cc598e1483e12cf6d4d79ac01 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Fri, 24 Jul 2020 00:26:02 +0700 Subject: [PATCH 1156/2522] Add GetChildren --- .../filesystem/plugins/s3/s3_filesystem.cc | 56 ++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 0c92bd3dd61..97e97a7bd25 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -45,6 +45,7 @@ limitations under the License. constexpr char kS3FileSystemAllocationTag[] = "S3FileSystemAllocation"; constexpr char kS3ClientAllocationTag[] = "S3ClientAllocation"; constexpr int64_t kS3TimeoutMsec = 300000; // 5 min +constexpr int kS3GetChildrenMaxKeys = 100; constexpr char kExecutorTag[] = "TransferManagerExecutorAllocation"; constexpr int kExecutorPoolSize = 25; @@ -1055,7 +1056,60 @@ void DeleteDir(const TF_Filesystem* filesystem, const char* path, } } -// TODO(vnvo2409): Implement later +int GetChildren(const TF_Filesystem* filesystem, const char* path, + char*** entries, TF_Status* status) { + Aws::String bucket, prefix; + ParseS3Path(path, true, &bucket, &prefix, status); + if (TF_GetCode(status) != TF_OK) return -1; + if (!prefix.empty() && prefix.back() != '/') prefix.push_back('/'); + + auto s3_file = static_cast(filesystem->plugin_filesystem); + GetS3Client(s3_file); + + Aws::S3::Model::ListObjectsRequest list_objects_request; + list_objects_request.WithBucket(bucket) + .WithPrefix(prefix) + .WithMaxKeys(kS3GetChildrenMaxKeys) + .WithDelimiter("/"); + list_objects_request.SetResponseStreamFactory( + []() { return Aws::New(kS3FileSystemAllocationTag); }); + + Aws::S3::Model::ListObjectsResult list_objects_result; + std::vector result; + do { + auto list_objects_outcome = + s3_file->s3_client->ListObjects(list_objects_request); + if (!list_objects_outcome.IsSuccess()) { + TF_SetStatusFromAWSError(list_objects_outcome.GetError(), status); + return -1; + } + + list_objects_result = list_objects_outcome.GetResult(); + for (const auto& object : list_objects_result.GetCommonPrefixes()) { + Aws::String s = object.GetPrefix(); + s.erase(s.length() - 1); + Aws::String entry = s.substr(prefix.length()); + if (entry.length() > 0) { + result.push_back(entry); + } + } + for (const auto& object : list_objects_result.GetContents()) { + Aws::String s = object.GetKey(); + Aws::String entry = s.substr(prefix.length()); + if (entry.length() > 0) { + result.push_back(entry); + } + } + list_objects_result.SetMarker(list_objects_result.GetNextMarker()); + } while (list_objects_result.GetIsTruncated()); + + int num_entries = result.size(); + *entries = static_cast( + plugin_memory_allocate(num_entries * sizeof((*entries)[0]))); + for (int i = 0; i < num_entries; i++) + (*entries)[i] = strdup(result[i].c_str()); + TF_SetStatus(status, TF_OK, ""); +} } // namespace tf_s3_filesystem From d7bb18930969805856e04503e8310ffa068edff6 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Fri, 24 Jul 2020 00:29:25 +0700 Subject: [PATCH 1157/2522] Finish ProvideFilesystemSupportFor --- .../filesystem/plugins/s3/s3_filesystem.cc | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 97e97a7bd25..50be57a263b 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -1117,6 +1117,47 @@ static void ProvideFilesystemSupportFor(TF_FilesystemPluginOps* ops, const char* uri) { TF_SetFilesystemVersionMetadata(ops); ops->scheme = strdup(uri); + + ops->random_access_file_ops = static_cast( + plugin_memory_allocate(TF_RANDOM_ACCESS_FILE_OPS_SIZE)); + ops->random_access_file_ops->cleanup = tf_random_access_file::Cleanup; + ops->random_access_file_ops->read = tf_random_access_file::Read; + + ops->writable_file_ops = static_cast( + plugin_memory_allocate(TF_WRITABLE_FILE_OPS_SIZE)); + ops->writable_file_ops->cleanup = tf_writable_file::Cleanup; + ops->writable_file_ops->append = tf_writable_file::Append; + ops->writable_file_ops->tell = tf_writable_file::Tell; + ops->writable_file_ops->flush = tf_writable_file::Flush; + ops->writable_file_ops->sync = tf_writable_file::Sync; + ops->writable_file_ops->close = tf_writable_file::Close; + + ops->read_only_memory_region_ops = static_cast( + plugin_memory_allocate(TF_READ_ONLY_MEMORY_REGION_OPS_SIZE)); + ops->read_only_memory_region_ops->cleanup = + tf_read_only_memory_region::Cleanup; + ops->read_only_memory_region_ops->data = tf_read_only_memory_region::Data; + ops->read_only_memory_region_ops->length = tf_read_only_memory_region::Length; + + ops->filesystem_ops = static_cast( + plugin_memory_allocate(TF_FILESYSTEM_OPS_SIZE)); + ops->filesystem_ops->init = tf_s3_filesystem::Init; + ops->filesystem_ops->cleanup = tf_s3_filesystem::Cleanup; + ops->filesystem_ops->new_random_access_file = + tf_s3_filesystem::NewRandomAccessFile; + ops->filesystem_ops->new_writable_file = tf_s3_filesystem::NewWritableFile; + ops->filesystem_ops->new_appendable_file = + tf_s3_filesystem::NewAppendableFile; + ops->filesystem_ops->new_read_only_memory_region_from_file = + tf_s3_filesystem::NewReadOnlyMemoryRegionFromFile; + ops->filesystem_ops->create_dir = tf_s3_filesystem::CreateDir; + ops->filesystem_ops->delete_file = tf_s3_filesystem::DeleteFile; + ops->filesystem_ops->delete_dir = tf_s3_filesystem::DeleteDir; + ops->filesystem_ops->copy_file = tf_s3_filesystem::CopyFile; + ops->filesystem_ops->path_exists = tf_s3_filesystem::PathExists; + ops->filesystem_ops->get_file_size = tf_s3_filesystem::GetFileSize; + ops->filesystem_ops->stat = tf_s3_filesystem::Stat; + ops->filesystem_ops->get_children = tf_s3_filesystem::GetChildren; } void TF_InitPlugin(TF_FilesystemPluginInfo* info) { From b6e5c0faa723f924dd80a92efddb584c328118af Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Thu, 23 Jul 2020 10:26:04 -0700 Subject: [PATCH 1158/2522] Type inference: clean up the implementation and improve coverage. This change also adds basic type propagation across local function definition boundaries. This is required for proper inference over AutoGraph-generated code, which creates a lot of local definitions. tfr_gen: fix order of passes. PiperOrigin-RevId: 322813481 Change-Id: Ib145c1fb34856a422b4202ebbd53d5a5d944be69 --- tensorflow/python/autograph/pyct/anno.py | 1 + .../pyct/static_analysis/type_inference.py | 395 ++++++++++++------ .../static_analysis/type_inference_test.py | 150 ++++++- 3 files changed, 414 insertions(+), 132 deletions(-) diff --git a/tensorflow/python/autograph/pyct/anno.py b/tensorflow/python/autograph/pyct/anno.py index bad937c8305..e6c40fc0cc2 100644 --- a/tensorflow/python/autograph/pyct/anno.py +++ b/tensorflow/python/autograph/pyct/anno.py @@ -101,6 +101,7 @@ class Static(NoValue): LIVE_VARS_OUT = ('Symbols live when exiting the node. See liveness.py.') LIVE_VARS_IN = ('Symbols live when entering the node. See liveness.py.') TYPES = 'Static type information. See type_inference.py.' + CLOSURE_TYPES = 'Types of closure symbols at each detected call site.' FAIL = object() diff --git a/tensorflow/python/autograph/pyct/static_analysis/type_inference.py b/tensorflow/python/autograph/pyct/static_analysis/type_inference.py index f8ddbe4e91c..3412a8a6aa0 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/type_inference.py +++ b/tensorflow/python/autograph/pyct/static_analysis/type_inference.py @@ -20,17 +20,20 @@ extracted from static sources: * global and local symbols visible to the function at analysis time * literals -Requires activity analysis. +Requires reaching function definitions analysis. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from typing import Any, Tuple + import gast from tensorflow.python.autograph.pyct import anno from tensorflow.python.autograph.pyct import cfg +from tensorflow.python.autograph.pyct import qual_names from tensorflow.python.autograph.pyct import transformer from tensorflow.python.autograph.pyct.static_analysis import annos @@ -38,30 +41,44 @@ from tensorflow.python.autograph.pyct.static_analysis import annos class Resolver(object): """Resolver objects handle the process of looking up actual names and types. - All resolve_* methods take: - * a first namespace argument, mapping string to actual values - * one or more name arguments, as QN objects + All resolve_* methods: + * have a first namespace argument, mapping string to actual values + * specify names as QN objects + * specify types as a Set of inferred types All resolve_* methods must return either: * a set of `type` objects * None """ - def resolve_external_name(self, ns, name): + def res_name(self, ns, name): """Resolves the type an external (e.g. closure, global) variable.""" raise NotImplementedError('subclasses must implement') - def resolve_external_call(self, ns, name): - """Resolves the return type an external function call.""" - # TODO(mdan): This must accept argument value/types. + def res_value(self, ns, value): + """Resolves the type a literal value.""" raise NotImplementedError('subclasses must implement') - def resolve_external_arg(self, ns, f_name, arg_name, type_anno): + # TODO(mdan): Allow caller to model side effects. + def res_call(self, ns, name, target, args, keywords, starargs, kwargs): + """Resolves the return type an external function or method call. + + Args: + ns: namespace + name: str, the function name + target: if this is a method call, the types of the method target, None + otherwise + args: list or argument types + keywords: dict of name to argument types + starargs: list of types of the *args arguments (should be at most one) + kwargs: list of types of the **kwargs arguments (in order of appearance) + """ + raise NotImplementedError('subclasses must implement') + + def res_arg(self, ns, f_name, arg_name, type_anno): """Resolves the type of a (possibly annotated) function argument.""" raise NotImplementedError('subclasses must implement') - # TODO(mdan): More resolvers as needed. - class _SymbolTable(object): """Abstraction for the state of the CFG walk for type inference. @@ -70,7 +87,7 @@ class _SymbolTable(object): Attributes: value: Dict[qual_names.QN, Set[Type]], mapping symbols to the set of - possible types. + possible types. """ def __init__(self, init_from=None): @@ -107,10 +124,211 @@ class _SymbolTable(object): return 'SymbolTable {}'.format(self.value) -class Analyzer(cfg.GraphVisitor): - """CFG visitor that performs type inference at statement level.""" +_GETITEM = qual_names.QN('__getitem__') - def __init__(self, graph, resolver, namespace, scope): +_HANDLERS = { + gast.Eq: qual_names.QN('__eq__'), + gast.NotEq: qual_names.QN('__ne__'), + gast.Lt: qual_names.QN('__lt__'), + gast.LtE: qual_names.QN('__le__'), + gast.Gt: qual_names.QN('__gt__'), + gast.GtE: qual_names.QN('__ge__'), + gast.In: qual_names.QN('__contains__'), + # TODO(mdan): Is this actually correct? + # NotIn(*) = Not(In(*)) + gast.NotIn: qual_names.QN('__not__'), + + gast.Add: qual_names.QN('__add__'), + gast.Sub: qual_names.QN('__sub__'), + gast.Mult: qual_names.QN('__mul__'), + gast.Div: qual_names.QN('__div__'), + gast.FloorDiv: qual_names.QN('__floordiv__'), + gast.Mod: qual_names.QN('__mod__'), + gast.Pow: qual_names.QN('__pow__'), + gast.LShift: qual_names.QN('__lshift__'), + gast.RShift: qual_names.QN('__rshift__'), + gast.BitOr: qual_names.QN('__or__'), + gast.BitXor: qual_names.QN('__xor__'), + gast.BitAnd: qual_names.QN('__and__'), + gast.MatMult: qual_names.QN('__matmul__'), +} + +_FIXED_RETTYPES = { + gast.Is: bool, + gast.IsNot: bool, +} + + +class StmtInferrer(gast.NodeVisitor): + """Runs type inference on a single AST statement. + + This visitor annotates most nodes with type information. It also sets types + for the symbols modified by this statement in its types_out property. + """ + + def __init__(self, resolver, scope, namespace, closure_types, types_in): + self.resolver = resolver + self.scope = scope + self.namespace = namespace + self.closure_types = closure_types + self.types_in = types_in + self.new_symbols = {} + + def visit(self, node): + types = super().visit(node) + if types is not None: + # TODO(mdan): Normalize by removing subtypes. + anno.setanno(node, anno.Static.TYPES, tuple(types)) + return types + + def visit_FunctionDef(self, node): + # Skip local function definitions. They are analyzed separately. + return None + + def visit_Constant(self, node): + return self.resolver.res_value(self.namespace, node.value) + + def visit_Tuple(self, node): + if isinstance(node.ctx, gast.Load): + for elt in node.elts: + self.visit(elt) + # TODO(mdan): Parameterize it. + return {Tuple} + + assert isinstance(node.ctx, gast.Store) + # TODO(mdan): Implement tuple unpacking. + return None + + def visit_List(self, node): + if isinstance(node.ctx, gast.Load): + el_types = [] + for elt in node.elts: + el_types.append(self.visit(elt)) + return {list} + + raise NotImplementedError('list unpacking') + + def visit_Set(self, node): + raise NotImplementedError() + + def visit_Name(self, node): + name = anno.getanno(node, anno.Basic.QN) + if isinstance(node.ctx, gast.Load): + types = self.types_in.value.get(name, None) + if (types is None) and (name not in self.scope.bound): + if name in self.closure_types: + types = self.closure_types[name] + else: + types = self.resolver.res_name(self.namespace, name) + return types + + elif isinstance(node.ctx, gast.Param): + type_name = anno.getanno(node.annotation, anno.Basic.QN, None) + types = self.resolver.res_arg(self.namespace, self.scope.function_name, + name, type_name) + if types is not None: + self.new_symbols[name] = types + return types + + elif isinstance(node.ctx, gast.Store): + if self.rvalue is not None: + self.new_symbols[name] = self.rvalue + else: + # No type information, assume Any. + self.new_symbols[name] = {Any} + return self.rvalue + + assert False, 'unknown ctx' + + def visit_Call(self, node): + f_name = anno.getanno(node.func, anno.Basic.QN) + + kwargs = [self.visit(kw.value) for kw in node.keywords if kw.arg is None] + keywords = { + kw.arg: self.visit(kw.value) + for kw in node.keywords + if kw.arg is not None + } + is_starred = [isinstance(a, gast.Starred) for a in node.args] + args = [ + self.visit(a) + for a, starred in zip(node.args, is_starred) + if not starred + ] + starargs = [ + self.visit(a.value) + for a, starred in zip(node.args, is_starred) + if starred + ] + + if f_name in self.scope.bound: + # Don't attempt external resolution of local functions. + # TODO(mdan): Use type annotations of the local definition. + return None + + return self.resolver.res_call( + self.namespace, f_name, None, args, keywords, starargs, kwargs) + + def visit_Index(self, node): + return self.visit(node.value) + + def visit_Assign(self, node): + self.rvalue = self.visit(node.value) + + for t in node.targets: + self.visit(t) + + self.rvalue = None + + def visit_Subscript(self, node): + val_type = self.visit(node.value) + slice_type = self.visit(node.slice) + + if val_type is None or slice_type is None: + return None + + return self.resolver.res_call(self.namespace, _GETITEM, val_type, + (slice_type,), {}, (), ()) + + def visit_Compare(self, node): + right_types = [self.visit(c) for c in node.comparators] + op_types = [type(o) for o in node.ops] + if len(op_types) > 1: + raise NotImplementedError('chained comparisons') + assert len(right_types) == 1 + + left_type = self.visit(node.left) + right_type, = right_types + op_type, = op_types + + if left_type is None or right_type is None: + return None + + f_name = _HANDLERS.get(op_type, None) + if f_name is None: + # Python doesn't allow overriding these operators. Their return types are + # fixed. + return {_FIXED_RETTYPES[op_type]} + return self.resolver.res_call(self.namespace, _HANDLERS[op_type], + left_type, (right_type,), {}, (), ()) + + def visit_BinOp(self, node): + left_type = self.visit(node.left) + right_type = self.visit(node.right) + + if left_type is None or right_type is None: + return None + + # TODO(mdan): This does not fully follow Python operator semantics. + # For example, in `a + b` Python will try `a.__add__`, but also `b.__radd__` + return self.resolver.res_call(self.namespace, _HANDLERS[type(node.op)], + left_type, (right_type,), {}, (), ()) + + +class Analyzer(cfg.GraphVisitor): + """CFG visitor that propagates type information across statements.""" + + def __init__(self, graph, resolver, namespace, scope, closure_types): """Creates a new analyzer. Args: @@ -118,80 +336,29 @@ class Analyzer(cfg.GraphVisitor): resolver: Resolver namespace: Dict[str, Any] scope: activity.Scope + closure_types: Dict[QN, Set] """ super(Analyzer, self).__init__(graph) self.resolver = resolver self.namespace = namespace self.scope = scope + self.closure_types = closure_types def init_state(self, _): return _SymbolTable() - def _infer_type(self, node, types_in): - """Infers the return type of an expression.""" - if isinstance(node, gast.Name): - # Normal variables: carry over their existing type. - name = anno.getanno(node, anno.Basic.QN) - types = types_in.value.get(name, None) - if types is not None: - return types - # If type is unknown, resolve it. - if name not in self.scope.bound: - return self.resolver.resolve_external_name(self.namespace, name) - return None + def _update_closure_types(self, ast_node, types): + existing_types = anno.getanno(ast_node, anno.Static.CLOSURE_TYPES, None) - if isinstance(node, gast.Call): - # Function calls: resolve their return type. - f_name = anno.getanno(node.func, anno.Basic.QN) - return self.resolver.resolve_external_call(self.namespace, f_name) + if existing_types is None: + existing_types = {} + anno.setanno(ast_node, anno.Static.CLOSURE_TYPES, existing_types) - else: - raise NotImplementedError(node) - - def _assignment_types(self, node, types_in): - """Propagates types through an assignment operation.""" - targets = node.targets - if len(targets) != 1: - raise NotImplementedError('multiple assignment') - - target, = targets - qn = anno.getanno(target, anno.Basic.QN) - types = self._infer_type(node.value, types_in) - if types is None: - return () - - return (qn, types), - - def _arg_type(self, node): - """Looks up the type of an argument based on its annotation.""" - assert isinstance(node, gast.Name) - name = anno.getanno(node, anno.Basic.QN) - type_name = anno.getanno(node.annotation, anno.Basic.QN, None) - - type_ = self.resolver.resolve_external_arg(self.namespace, - self.scope.function_name, name, - type_name) - return (name, type_), - - def _args_types(self, node): - """Propagates types through argument annotations.""" - types = {} - - for n in node.posonlyargs: - types.update(self._arg_type(n)) - for n in node.args: - types.update(self._arg_type(n)) - for n in node.kwonlyargs: - types.update(self._arg_type(n)) - - if node.vararg: - raise NotImplementedError('vararg') - if node.kwarg: - raise NotImplementedError('kwarg') - - # TODO(mdan): Use kw_defaults, defaults if available. - - return types + for k, v in types.value.items(): + if k in existing_types: + existing_types[k].update(v) + else: + existing_types[k] = set(v) def visit_node(self, node): prev_types_out = self.out[node] @@ -202,10 +369,20 @@ class Analyzer(cfg.GraphVisitor): types_out = _SymbolTable(types_in) ast_node = node.ast_node - if isinstance(ast_node, gast.Assign): - types_out.value.update(self._assignment_types(ast_node, types_in)) - elif isinstance(ast_node, gast.arguments): - types_out.value.update(self._args_types(ast_node)) + + inferrer = StmtInferrer( + self.resolver, self.scope, self.namespace, self.closure_types, types_in) + inferrer.visit(ast_node) + types_out.value.update(inferrer.new_symbols) + + reaching_fndefs = anno.getanno(ast_node, anno.Static.DEFINED_FNS_IN) + node_scope = anno.getanno(ast_node, anno.Static.SCOPE, None) + if node_scope is not None: + # TODO(mdan): Check that it's actually safe to skip nodes without scope. + reads = {str(qn) for qn in node_scope.read} + for def_node in reaching_fndefs: + if def_node.name in reads: + self._update_closure_types(def_node, types_out) self.in_[node] = types_in self.out[node] = types_out @@ -213,65 +390,28 @@ class Analyzer(cfg.GraphVisitor): return prev_types_out != types_out -class TreeAnnotator(transformer.Base): - """AST visitor that annotates each symbol with its possible types.""" +class FunctionVisitor(transformer.Base): + """AST visitor that applies type inference to each function separately.""" def __init__(self, source_info, graphs, resolver): - super(TreeAnnotator, self).__init__(source_info) + super(FunctionVisitor, self).__init__(source_info) self.graphs = graphs self.resolver = resolver - self.current_analyzer = None - self.current_cfg_node = None def visit_FunctionDef(self, node): - parent_analyzer = self.current_analyzer subgraph = self.graphs[node] - scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE) + scope = anno.getanno(node, annos.NodeAnno.ARGS_AND_BODY_SCOPE) + closure_types = anno.getanno(node, anno.Static.CLOSURE_TYPES, {}) - analyzer = Analyzer(subgraph, self.resolver, self.ctx.info.namespace, scope) + analyzer = Analyzer( + subgraph, self.resolver, self.ctx.info.namespace, scope, closure_types) analyzer.visit_forward() # Recursively process any remaining subfunctions. - self.current_analyzer = analyzer - node.args = self.visit(node.args) node.body = self.visit_block(node.body) - self.current_analyzer = parent_analyzer return node - def visit_Name(self, node): - if self.current_analyzer is None: - # Names may appear outside function defs - for example in class - # definitions. - return node - - analyzer = self.current_analyzer - cfg_node = self.current_cfg_node - - assert cfg_node is not None, ('name node, %s, outside of any statement?' - % node.id) - - qn = anno.getanno(node, anno.Basic.QN) - if isinstance(node.ctx, gast.Load): - anno.setanno(node, anno.Static.TYPES, - tuple(analyzer.in_[cfg_node].value.get(qn, ()))) - else: - anno.setanno(node, anno.Static.TYPES, - tuple(analyzer.out[cfg_node].value.get(qn, ()))) - - return node - - def visit(self, node): - parent = self.current_cfg_node - - if (self.current_analyzer is not None and - node in self.current_analyzer.graph.index): - self.current_cfg_node = self.current_analyzer.graph.index[node] - node = super(TreeAnnotator, self).visit(node) - - self.current_cfg_node = parent - return node - def resolve(node, source_info, graphs, resolver): """Performs type inference. @@ -281,9 +421,10 @@ def resolve(node, source_info, graphs, resolver): source_info: transformer.SourceInfo graphs: Dict[ast.FunctionDef, cfg.Graph] resolver: Resolver + Returns: ast.AST """ - visitor = TreeAnnotator(source_info, graphs, resolver) + visitor = FunctionVisitor(source_info, graphs, resolver) node = visitor.visit(node) return node diff --git a/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py b/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py index 3291347396f..fb7324aedfa 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py +++ b/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py @@ -23,19 +23,33 @@ from tensorflow.python.autograph.pyct import cfg from tensorflow.python.autograph.pyct import qual_names from tensorflow.python.autograph.pyct import transpiler from tensorflow.python.autograph.pyct.static_analysis import activity +from tensorflow.python.autograph.pyct.static_analysis import reaching_definitions +from tensorflow.python.autograph.pyct.static_analysis import reaching_fndefs from tensorflow.python.autograph.pyct.static_analysis import type_inference from tensorflow.python.platform import test class TestResolver(type_inference.Resolver): + """A very basic resolver for testing.""" - def resolve_external_name(self, ns, name): + def res_name(self, ns, name): return {type(ns[str(name)])} - def resolve_external_call(self, ns, f_name): - return {ns[str(f_name)].__annotations__['return']} + def res_value(self, ns, value): + del ns + return {type(value)} - def resolve_external_arg(self, ns, f_name, arg_name, type_anno): + def res_call(self, ns, name, target, args, keywords, starargs, kwargs): + name_str = str(name) + if name_str in ns: + return {ns[name_str].__annotations__['return']} + if target is None: + return {'unk_{}'.format(name_str)} + return {'{}_{}'.format(list(target)[0], name_str)} + + def res_arg(self, ns, f_name, arg_name, type_anno): + if f_name == 'magic_no_types': + return None if type_anno is not None: return {{'int': int, 'float': float}[str(type_anno)]} return {'{}_{}'.format(f_name, arg_name)} @@ -43,6 +57,10 @@ class TestResolver(type_inference.Resolver): class TestTranspiler(transpiler.GenericTranspiler): + def __init__(self): + super().__init__() + self.resolver = TestResolver() + def get_transformed_name(self, _): return 'test_item' @@ -50,7 +68,9 @@ class TestTranspiler(transpiler.GenericTranspiler): node = qual_names.resolve(node) node = activity.resolve(node, ctx) graphs = cfg.build(node) - node = type_inference.resolve(node, ctx, graphs, TestResolver()) + node = reaching_definitions.resolve(node, ctx, graphs) + node = reaching_fndefs.resolve(node, ctx, graphs) + node = type_inference.resolve(node, ctx, graphs, self.resolver) return node @@ -62,6 +82,11 @@ class TypeInferenceAnalyzerTest(test.TestCase): self.assertSetEqual( set(anno.getanno(node, anno.Static.TYPES)), set(expected)) + def assertClosureTypes(self, node, expected): + actual = anno.getanno(node, anno.Static.CLOSURE_TYPES) + actual = {str(k): v for k, v in actual.items()} + self.assertDictEqual(actual, expected) + def test_argument(self): def test_fn(a: int, b): @@ -73,6 +98,22 @@ class TypeInferenceAnalyzerTest(test.TestCase): self.assertTypes(fn_body[0].value.elts[0], int) self.assertTypes(fn_body[0].value.elts[1], 'test_fn_b') + def test_argument_of_local_function(self): + + def test_fn(a: int): + + def foo(x: float): + return x + + return foo(a) + + tr = TestTranspiler() + node, _ = tr.transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[0].body[0].value, float) + self.assertClosureTypes(fn_body[0], {'a': {int}}) + def test_straightline_assignment(self): def test_fn(a: int, c): @@ -132,6 +173,105 @@ class TypeInferenceAnalyzerTest(test.TestCase): self.assertTypes(fn_body[0].targets[0], float) self.assertTypes(fn_body[1].value, float) + def test_local_function_closure(self): + + def test_fn(x: int): + + def foo(): + return x + + foo() + + node, _ = TestTranspiler().transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[0].body[0].value, int) + self.assertClosureTypes(fn_body[0], {'x': {int}}) + + def test_local_function_closure_ignored_for_bound_symbols(self): + + def test_fn(x: int): # pylint:disable=unused-argument + + def foo(): + x = x + 1 # pylint:disable=used-before-assignment + + foo() + + node, _ = TestTranspiler().transform(test_fn, None) + fn_body = node.body + + self.assertFalse( + anno.hasanno(fn_body[0].body[0].value.left, anno.Static.TYPES)) + self.assertClosureTypes(fn_body[0], {'x': {int}}) + + def test_local_function_closure_uses_call_site_types(self): + + def test_fn(x: int): + + def foo(): + return x + + x = 1.0 + foo() + + node, _ = TestTranspiler().transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[0].body[0].value, float) + self.assertTypes(fn_body[1].targets[0], float) + self.assertClosureTypes(fn_body[0], {'x': {float}}) + + def test_subscript(self): + + def test_fn(a): + return a[1] + + node, _ = TestTranspiler().transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[0].value, 'test_fn_a___getitem__') + self.assertTypes(fn_body[0].value.value, 'test_fn_a') + self.assertTypes(fn_body[0].value.slice.value, int) + + def test_compare(self): + + def test_fn(a, b): + return a < b + + node, _ = TestTranspiler().transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[0].value, 'test_fn_a___lt__') + self.assertTypes(fn_body[0].value.left, 'test_fn_a') + self.assertTypes(fn_body[0].value.comparators[0], 'test_fn_b') + + def test_binop(self): + + def test_fn(a, b): + return a @ b + + node, _ = TestTranspiler().transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[0].value, 'test_fn_a___matmul__') + self.assertTypes(fn_body[0].value.left, 'test_fn_a') + self.assertTypes(fn_body[0].value.right, 'test_fn_b') + + def test_no_inference_on_unknown_operand_types(self): + + # No information on types of a and b, see TestResolver. + def magic_no_types(a, b): + return a < b, a - b + + node, _ = TestTranspiler().transform(magic_no_types, None) + fn_body = node.body + + # With no information on operand types, the operators will assert nothing. + self.assertFalse( + anno.hasanno(fn_body[0].value.elts[0], anno.Static.TYPES)) + self.assertFalse( + anno.hasanno(fn_body[0].value.elts[1], anno.Static.TYPES)) + if __name__ == '__main__': test.main() From 1bc83bae4f28f7576465f0d23a2ad9a700796198 Mon Sep 17 00:00:00 2001 From: Rachel Lim Date: Thu, 23 Jul 2020 10:37:00 -0700 Subject: [PATCH 1159/2522] [tf.data] Fix RebatchDatasetV2 serialization. PiperOrigin-RevId: 322816035 Change-Id: I15d24b18f5bfeb7162d1c3362aa0dfec71efaa37 --- .../data/experimental/rebatch_dataset_op.cc | 4 ++-- .../rebatch_dataset_serialization_test.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/data/experimental/rebatch_dataset_op.cc b/tensorflow/core/kernels/data/experimental/rebatch_dataset_op.cc index 6bd7d0c69ef..8bb0c2388f6 100644 --- a/tensorflow/core/kernels/data/experimental/rebatch_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/rebatch_dataset_op.cc @@ -510,7 +510,7 @@ class RebatchDatasetV2Op : public UnaryDatasetOpKernel { TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("batch_sizes_index"), batch_sizes_index_)); TF_RETURN_IF_ERROR(writer->WriteScalar(full_name("offset"), offset_)); - if (batch_sizes_index_ != 0) { + if (offset_ != -1) { for (int i = 0; i < tensors_.size(); ++i) { TF_RETURN_IF_ERROR(writer->WriteTensor( full_name(strings::StrCat("tensors[", i, "]")), tensors_[i])); @@ -532,7 +532,7 @@ class RebatchDatasetV2Op : public UnaryDatasetOpKernel { TF_RETURN_IF_ERROR(reader->ReadScalar(full_name("offset"), &offset_)); tensors_.clear(); - if (batch_sizes_index_ > 0) { + if (offset_ != -1) { tensors_.resize(dataset()->output_dtypes().size()); for (int i = 0; i < tensors_.size(); ++i) { TF_RETURN_IF_ERROR(reader->ReadTensor( diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/rebatch_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/rebatch_dataset_serialization_test.py index e99e1f99a9c..fe4eac5b69d 100644 --- a/tensorflow/python/data/experimental/kernel_tests/serialization/rebatch_dataset_serialization_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/serialization/rebatch_dataset_serialization_test.py @@ -43,5 +43,21 @@ class LegacyRebatchDatasetSerializationTest( self.run_core_tests(lambda: build_dataset(64, 8), 8) +class RebatchDatasetSerializationTest( + dataset_serialization_test_base.DatasetSerializationTestBase, + parameterized.TestCase): + + @combinations.generate(test_base.default_test_combinations()) + def testCore(self): + + def build_dataset(num_elements, batch_size): + return distribute._RebatchDataset( + dataset_ops.Dataset.range(num_elements).batch( + 2 * batch_size, drop_remainder=True), + batch_sizes=[batch_size, batch_size]) + + self.run_core_tests(lambda: build_dataset(64, 8), 8) + + if __name__ == "__main__": test.main() From e808ec8e2348309681ef2764c7fc5980d771f8c3 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Thu, 23 Jul 2020 10:39:03 -0700 Subject: [PATCH 1160/2522] Update private TF API usage wrt tensorflow.python.training.experimental.loss_scale. The affect private methods has been copied to keras. PiperOrigin-RevId: 322816540 Change-Id: I814cdbb4a4babcf6691e7f3574d5ff8f6b3dc58c --- .../keras/mixed_precision/experimental/BUILD | 1 - .../mixed_precision/experimental/loss_scale.py | 16 +++++++++++++++- .../experimental/loss_scale_optimizer.py | 3 +-- .../python/training/experimental/loss_scale.py | 6 +----- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/keras/mixed_precision/experimental/BUILD b/tensorflow/python/keras/mixed_precision/experimental/BUILD index 4060e455f84..b143e5946f5 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/BUILD +++ b/tensorflow/python/keras/mixed_precision/experimental/BUILD @@ -169,7 +169,6 @@ py_library( srcs_version = "PY2AND3", deps = [ ":loss_scale", - "//tensorflow/python:loss_scale", "//tensorflow/python/distribute:collective_all_reduce_strategy", "//tensorflow/python/distribute:distribute_lib", "//tensorflow/python/distribute:mirrored_strategy", diff --git a/tensorflow/python/keras/mixed_precision/experimental/loss_scale.py b/tensorflow/python/keras/mixed_precision/experimental/loss_scale.py index 680b0a5b89f..307313d7e36 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/loss_scale.py +++ b/tensorflow/python/keras/mixed_precision/experimental/loss_scale.py @@ -21,6 +21,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import six + from tensorflow.python.keras.utils import generic_utils from tensorflow.python.training.experimental import loss_scale as loss_scale_module @@ -44,6 +46,18 @@ def deserialize(config, custom_objects=None): def get(identifier): + """Get a loss scale object.""" if isinstance(identifier, dict): return deserialize(identifier) - return loss_scale_module.get(identifier) + + if isinstance(identifier, six.integer_types + (float,)): + return loss_scale_module.FixedLossScale(identifier) + if identifier == 'dynamic': + return loss_scale_module.DynamicLossScale() + if isinstance(identifier, loss_scale_module.LossScale): + return identifier + elif identifier is None: + return None + else: + raise ValueError('Could not interpret loss scale identifier: %s' % + identifier) diff --git a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py index 3d37d10791d..c2ae3b375d4 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py +++ b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py @@ -31,7 +31,6 @@ from tensorflow.python.keras.mixed_precision.experimental import loss_scale as k from tensorflow.python.keras.optimizer_v2 import optimizer_v2 from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops -from tensorflow.python.training.experimental import loss_scale as loss_scale_module from tensorflow.python.training.experimental import mixed_precision from tensorflow.python.training.tracking import base as trackable from tensorflow.python.util.tf_export import keras_export @@ -271,7 +270,7 @@ class LossScaleOptimizer(_DelegatingTrackableMixin, optimizer_v2.OptimizerV2): # constructor. _DelegatingTrackableMixin.__init__(self, self._optimizer) - for weight in loss_scale_module.get_loss_scale_weights(self._loss_scale): + for weight in self._loss_scale._weights.values(): # pylint: disable=protected-access # We cannot call `track_variable` in the LossScale class itself, because a # file outside of Keras cannot depend on a Keras file. Calling it here # instead is OK, because a variable only needs to be tracked if used with diff --git a/tensorflow/python/training/experimental/loss_scale.py b/tensorflow/python/training/experimental/loss_scale.py index 86d8cee16dd..542311c75d8 100644 --- a/tensorflow/python/training/experimental/loss_scale.py +++ b/tensorflow/python/training/experimental/loss_scale.py @@ -28,9 +28,9 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.training.tracking import base as trackable -from tensorflow.python.ops import variable_scope from tensorflow.python.util import deprecation from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export @@ -198,10 +198,6 @@ class LossScale(trackable.Trackable): return cls(**config) -def get_loss_scale_weights(loss_scale): - return loss_scale._weights.values() # pylint: disable=protected-access - - @deprecation.deprecated_endpoints('train.experimental.FixedLossScale') @tf_export('mixed_precision.experimental.FixedLossScale', 'train.experimental.FixedLossScale') From 226644ff5109ea7a376a90fcc48eccfcd4830700 Mon Sep 17 00:00:00 2001 From: Meghna Natraj Date: Thu, 23 Jul 2020 10:47:38 -0700 Subject: [PATCH 1161/2522] Add flatbuffers python library to tensorflow pip_package PiperOrigin-RevId: 322818638 Change-Id: I2ff99c4c8f8409b3128a717709e64f03f142ee92 --- tensorflow/tools/pip_package/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 67d0e138517..d21bcfbfc8b 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -54,6 +54,7 @@ _VERSION = '2.4.0' REQUIRED_PACKAGES = [ 'absl-py >= 0.7.0', 'astunparse == 1.6.3', + 'flatbuffers >= 1.12', 'gast == 0.3.3', 'google_pasta >= 0.1.8', 'h5py >= 2.10.0, < 2.11.0', From 24a399e470adae5b28f6aa5b6c664dc77a378309 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Thu, 23 Jul 2020 10:50:26 -0700 Subject: [PATCH 1162/2522] [XLA] [Docs] Document another known issue: dynamic TensorArrays are not supported Moves "known_issues" into a separate page. PiperOrigin-RevId: 322819265 Change-Id: I42d79810267c3dc8cede4ca9b16fb875d2c80430 --- tensorflow/compiler/xla/g3doc/_book.yaml | 2 ++ tensorflow/compiler/xla/g3doc/index.md | 24 -------------- tensorflow/compiler/xla/g3doc/known_issues.md | 32 +++++++++++++++++++ 3 files changed, 34 insertions(+), 24 deletions(-) create mode 100644 tensorflow/compiler/xla/g3doc/known_issues.md diff --git a/tensorflow/compiler/xla/g3doc/_book.yaml b/tensorflow/compiler/xla/g3doc/_book.yaml index e05f69b1e8b..8d217b89ae3 100644 --- a/tensorflow/compiler/xla/g3doc/_book.yaml +++ b/tensorflow/compiler/xla/g3doc/_book.yaml @@ -17,6 +17,8 @@ upper_tabs: path: /xla - title: XLA architecture path: /xla/architecture + - title: Known issues + path: /xla/known_issues - title: Broadcasting semantics path: /xla/broadcasting - title: Develop a new backend for XLA diff --git a/tensorflow/compiler/xla/g3doc/index.md b/tensorflow/compiler/xla/g3doc/index.md index 60bde306266..51d666fba9a 100644 --- a/tensorflow/compiler/xla/g3doc/index.md +++ b/tensorflow/compiler/xla/g3doc/index.md @@ -177,30 +177,6 @@ a bug to a single XLA program by using the [`replay_computation`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/compiler/xla/tools/run_hlo_module_main.cc) and iteratively running it on generated programs. -## Known Issues - -Compilation with XLA can greatly improve the performance of your programs, but -the TensorFlow interop has a number of known sharp corners. - -### TensorArray TF/XLA Interconversion - -The problem manifests itself as an error message -`Support for TensorList crossing the XLA/TF boundary is not implemented`. - -XLA supports `tf.TensorArray`. However, the _interconversion_ between TF and -XLA representations is not implemented yet. -This error often arises when the `TensorArray` is used inside the compiled -block, but the derivative is taken outside. - -Workaround: compile the outermost scope which is taking the derivative. - -### Random Number Generation - -XLA currently ignores TF seeds to random operations. This affects stateful TF -random operations, such as `tf.random.normal`, or `tf.nn.dropout`. XLA will -behave as if the compilation was seeded with a new unique seed at each run. This -limitation does not apply to stateless random ops. - ## XLA Frontends Apart from TensorFlow, XLA programs can be generated by: diff --git a/tensorflow/compiler/xla/g3doc/known_issues.md b/tensorflow/compiler/xla/g3doc/known_issues.md new file mode 100644 index 00000000000..1c03c716a02 --- /dev/null +++ b/tensorflow/compiler/xla/g3doc/known_issues.md @@ -0,0 +1,32 @@ +# Known Issues + +Compilation with XLA can greatly improve the performance of your programs, but +the TensorFlow interop has a number of known sharp corners. + +## TensorArray TF/XLA interconversion + +The problem manifests itself as an error message +`Support for TensorList crossing the XLA/TF boundary is not implemented`. + +XLA supports `tf.TensorArray`. However, the _interconversion_ between TF and +XLA representations is not implemented yet. +This error often arises when the `TensorArray` is used inside the compiled +block, but the derivative is taken outside. + +Workaround: compile the outermost scope which is taking the derivative. + +## Dynamic `tf.TensorArray` is not supported + +Writes into `tf.TensorArray(..., dynamic_size=True)` are not compilable with +XLA, as such writes require an unknown number of reallocations when the array +exceeds the original bound. + +Workaround: provide a statically known bound to your arrays. + +## Random number generation + +XLA currently ignores TF seeds to random operations. This affects stateful TF +random operations, such as `tf.random.normal`, or `tf.nn.dropout`. XLA will +behave as if the compilation was seeded with a new unique seed at each run. This +limitation does not apply to stateless random ops. + From 84dcaf3aefb2f913c0eeb018ade7112edb6020f4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 23 Jul 2020 10:51:04 -0700 Subject: [PATCH 1163/2522] Split the GRPC related code out of capture_profile as profiler_client. PiperOrigin-RevId: 322819419 Change-Id: I84407721c6af80b49d1a3e2df9ee0125a6973e64 --- tensorflow/core/profiler/rpc/client/BUILD | 17 +- .../profiler/rpc/client/capture_profile.cc | 173 +++++++----------- .../profiler/rpc/client/capture_profile.h | 19 +- .../profiler/rpc/client/profiler_client.cc | 78 ++++++++ .../profiler/rpc/client/profiler_client.h | 40 ++++ tensorflow/python/profiler/internal/BUILD | 1 + .../profiler/internal/profiler_wrapper.cc | 42 ++--- 7 files changed, 231 insertions(+), 139 deletions(-) create mode 100644 tensorflow/core/profiler/rpc/client/profiler_client.cc create mode 100644 tensorflow/core/profiler/rpc/client/profiler_client.h diff --git a/tensorflow/core/profiler/rpc/client/BUILD b/tensorflow/core/profiler/rpc/client/BUILD index 609f98aa6c1..9cf1e7a9f7b 100644 --- a/tensorflow/core/profiler/rpc/client/BUILD +++ b/tensorflow/core/profiler/rpc/client/BUILD @@ -10,14 +10,13 @@ cc_library( hdrs = ["capture_profile.h"], visibility = ["//tensorflow/python/profiler/internal:__pkg__"], deps = [ + ":profiler_client", ":save_profile", "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", "//tensorflow/core/profiler:profiler_analysis_proto_cc", "//tensorflow/core/profiler:profiler_options_proto_cc", "//tensorflow/core/profiler:profiler_service_proto_cc", "@com_google_absl//absl/strings", - tf_grpc_cc_dependency(), ], ) @@ -35,3 +34,17 @@ cc_library( "@com_google_absl//absl/time", ], ) + +cc_library( + name = "profiler_client", + srcs = ["profiler_client.cc"], + hdrs = ["profiler_client.h"], + visibility = ["//tensorflow/python/profiler/internal:__pkg__"], + deps = [ + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/profiler:profiler_analysis_proto_cc", + "//tensorflow/core/profiler:profiler_service_proto_cc", + tf_grpc_cc_dependency(), + ], +) diff --git a/tensorflow/core/profiler/rpc/client/capture_profile.cc b/tensorflow/core/profiler/rpc/client/capture_profile.cc index a8642aff54a..e0303d32d13 100644 --- a/tensorflow/core/profiler/rpc/client/capture_profile.cc +++ b/tensorflow/core/profiler/rpc/client/capture_profile.cc @@ -19,20 +19,16 @@ limitations under the License. #include #include -#include "grpcpp/grpcpp.h" -#include "absl/strings/numbers.h" #include "absl/strings/str_join.h" #include "absl/strings/str_split.h" #include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/status.h" #include "tensorflow/core/platform/types.h" -#include "tensorflow/core/profiler/profiler_analysis.grpc.pb.h" #include "tensorflow/core/profiler/profiler_analysis.pb.h" #include "tensorflow/core/profiler/profiler_options.pb.h" -#include "tensorflow/core/profiler/profiler_service.grpc.pb.h" #include "tensorflow/core/profiler/profiler_service.pb.h" +#include "tensorflow/core/profiler/rpc/client/profiler_client.h" #include "tensorflow/core/profiler/rpc/client/save_profile.h" -#include "tensorflow/core/protobuf/error_codes.pb.h" namespace tensorflow { namespace profiler { @@ -40,31 +36,29 @@ namespace { constexpr uint64 kMaxEvents = 1000000; -ProfileRequest PopulateProfileRequest(int duration_ms, - const string& repository_root, - const string& session_id, - const ProfileOptions& opts) { - ProfileRequest request; +MonitorRequest PopulateMonitorRequest(int duration_ms, int monitoring_level, + bool timestamp) { + MonitorRequest request; request.set_duration_ms(duration_ms); - request.set_max_events(kMaxEvents); - request.set_repository_root(repository_root); - request.set_session_id(session_id); - request.add_tools("trace_viewer"); - request.add_tools("op_profile"); - request.add_tools("input_pipeline"); - request.add_tools("kernel_stats"); - request.add_tools("memory_viewer"); - request.add_tools("overview_page"); - request.add_tools("pod_viewer"); - request.add_tools("tensorflow_stats"); - *request.mutable_opts() = opts; + request.set_monitoring_level(monitoring_level); + request.set_timestamp(timestamp); return request; } -inline Status FromGrpcStatus(const ::grpc::Status& s) { - return s.ok() ? Status::OK() - : Status(static_cast(s.error_code()), - s.error_message()); +NewProfileSessionRequest PopulateNewProfileSessionRequest( + const std::string& service_addr, const std::string& repository_root, + const std::vector& hostnames, int duration_ms, + const std::string& session_id, const ProfileOptions& opts) { + NewProfileSessionRequest request; + std::vector parts = absl::StrSplit(service_addr, ':'); + *request.mutable_request() = PopulateProfileRequest( + duration_ms, repository_root, session_id, parts[0], opts); + request.set_repository_root(repository_root); + request.set_session_id(session_id); + for (const auto& hostname : hostnames) { + request.add_hosts(hostname); + } + return request; } inline bool ShouldRetryTracing(Status status) { @@ -78,28 +72,14 @@ inline bool ShouldRetryTracing(Status status) { status.error_message() == "Stream removed"); } -// Returns whether the returned trace is empty. -// Failure are handled by CHECK, i.e. abort() -Status Profile(const string& service_addr, const string& logdir, - int duration_ms, const string& session_id, +Status Profile(const std::string& service_addr, const std::string& logdir, + int duration_ms, const std::string& session_id, const ProfileOptions& opts) { + std::vector parts = absl::StrSplit(service_addr, ':'); ProfileRequest request = - PopulateProfileRequest(duration_ms, logdir, session_id, opts); - std::vector parts = absl::StrSplit(service_addr, ':'); - request.set_host_name(parts[0]); - - ::grpc::ClientContext context; - ::grpc::ChannelArguments channel_args; - // TODO(qiuminxu): use `NewHostPortGrpcChannel` instead once their - channel_args.SetInt(GRPC_ARG_MAX_MESSAGE_LENGTH, - std::numeric_limits::max()); - std::unique_ptr stub = - grpc::ProfilerService::NewStub(::grpc::CreateCustomChannel( - "dns:///" + service_addr, ::grpc::InsecureChannelCredentials(), - channel_args)); + PopulateProfileRequest(duration_ms, logdir, session_id, parts[0], opts); ProfileResponse response; - TF_RETURN_IF_ERROR( - FromGrpcStatus(stub->Profile(&context, request, &response))); + TF_RETURN_IF_ERROR(ProfileGrpc(service_addr, request, &response)); if (!response.empty_trace()) { TF_RETURN_IF_ERROR(SaveTensorboardProfile( @@ -122,76 +102,58 @@ Status Profile(const string& service_addr, const string& logdir, // Start a new profiling session that include all the hosts included in // hostnames, for the time interval of duration_ms. Possibly save the profiling // result in the directory specified by repository_root and session_id. -Status NewSession(const string& service_addr, const string& repository_root, +Status NewSession(const std::string& service_addr, + const std::string& repository_root, const std::vector& hostnames, int duration_ms, - const string& session_id, const ProfileOptions& opts) { - NewProfileSessionRequest new_session_request; - *new_session_request.mutable_request() = - PopulateProfileRequest(duration_ms, repository_root, session_id, opts); - new_session_request.set_repository_root(repository_root); - new_session_request.set_session_id(session_id); - for (const auto& hostname : hostnames) { - new_session_request.add_hosts(hostname); - } - - ::grpc::ClientContext context; - ::grpc::ChannelArguments channel_args; - // TODO(qiuminxu): use `NewHostPortGrpcChannel` instead once their - channel_args.SetMaxReceiveMessageSize(std::numeric_limits::max()); - // TODO(jiesun): GRPC support following relevant naming scheme: - // 1. dns:///host:port - // 2. ipv4:host:port or ipv6:[host]:port - // We might need to change the prefix which depends on what cluster name - // resolver will give us. - std::unique_ptr stub = - grpc::ProfileAnalysis::NewStub(::grpc::CreateCustomChannel( - "dns:///" + service_addr, ::grpc::InsecureChannelCredentials(), - channel_args)); - NewProfileSessionResponse new_session_response; - TF_RETURN_IF_ERROR(FromGrpcStatus( - stub->NewSession(&context, new_session_request, &new_session_response))); + const std::string& session_id, const ProfileOptions& opts) { + NewProfileSessionRequest request = PopulateNewProfileSessionRequest( + service_addr, repository_root, hostnames, duration_ms, session_id, opts); + NewProfileSessionResponse response; + TF_RETURN_IF_ERROR(NewSessionGrpc(service_addr, request, &response)); std::cout << "Profile session succeed for host(s):" << absl::StrJoin(hostnames, ",") << std::endl; - if (new_session_response.empty_trace()) { + if (response.empty_trace()) { return Status(error::Code::UNAVAILABLE, "No trace event is collected"); } return Status::OK(); } -MonitorRequest PopulateMonitorRequest(int duration_ms, int monitoring_level, - bool timestamp) { - MonitorRequest request; - request.set_duration_ms(duration_ms); - request.set_monitoring_level(monitoring_level); - request.set_timestamp(timestamp); - return request; -} - } // namespace -Status ValidateHostPortPair(const string& host_port) { - uint32 port; - std::vector parts = absl::StrSplit(host_port, ':'); - // Must be host:port, port must be a number, host must not contain a '/', - // host also must not be empty. - if (parts.size() != 2 || !absl::SimpleAtoi(parts[1], &port) || - parts[0].find("/") != string::npos || parts[0].empty()) { - return errors::InvalidArgument("Could not interpret \"", host_port, - "\" as a host-port pair."); - } - return Status::OK(); +ProfileRequest PopulateProfileRequest(int duration_ms, + const std::string& repository_root, + const std::string& session_id, + const std::string& host_name, + const ProfileOptions& opts) { + ProfileRequest request; + request.set_duration_ms(duration_ms); + request.set_max_events(kMaxEvents); + request.set_repository_root(repository_root); + request.set_session_id(session_id); + request.set_host_name(host_name); + request.add_tools("trace_viewer"); + request.add_tools("op_profile"); + request.add_tools("input_pipeline"); + request.add_tools("kernel_stats"); + request.add_tools("memory_viewer"); + request.add_tools("memory_profile"); + request.add_tools("overview_page"); + request.add_tools("pod_viewer"); + request.add_tools("tensorflow_stats"); + *request.mutable_opts() = opts; + return request; } // Starts tracing on a single or multiple hosts and saves the result in the // given logdir. If no trace was collected, retries tracing for // num_tracing_attempts. -Status Trace(const string& service_addr, const string& logdir, - const string& workers_list, int duration_ms, +Status Trace(const std::string& service_addr, const std::string& logdir, + const std::string& workers_list, int duration_ms, int num_tracing_attempts, const ProfileOptions& opts) { // Use the current timestamp as the run name. - tensorflow::string session_id = GetCurrentTimeStampAsString(); - std::vector hostnames; + std::string session_id = GetCurrentTimeStampAsString(); + std::vector hostnames; if (!workers_list.empty()) { hostnames = absl::StrSplit(workers_list, ','); } @@ -223,22 +185,13 @@ Status Trace(const string& service_addr, const string& logdir, return status; } -Status Monitor(const string& service_addr, int duration_ms, - int monitoring_level, bool display_timestamp, string* result) { +Status Monitor(const std::string& service_addr, int duration_ms, + int monitoring_level, bool display_timestamp, + std::string* result) { MonitorRequest request = PopulateMonitorRequest(duration_ms, monitoring_level, display_timestamp); - - ::grpc::ClientContext context; - ::grpc::ChannelArguments channel_args; - channel_args.SetInt(GRPC_ARG_MAX_MESSAGE_LENGTH, - std::numeric_limits::max()); - std::unique_ptr stub = - grpc::ProfilerService::NewStub(::grpc::CreateCustomChannel( - "dns:///" + service_addr, ::grpc::InsecureChannelCredentials(), - channel_args)); MonitorResponse response; - TF_RETURN_IF_ERROR( - FromGrpcStatus(stub->Monitor(&context, request, &response))); + TF_RETURN_IF_ERROR(MonitorGrpc(service_addr, request, &response)); *result = response.data(); return Status::OK(); } diff --git a/tensorflow/core/profiler/rpc/client/capture_profile.h b/tensorflow/core/profiler/rpc/client/capture_profile.h index c809d2099ae..5745f24cbfa 100644 --- a/tensorflow/core/profiler/rpc/client/capture_profile.h +++ b/tensorflow/core/profiler/rpc/client/capture_profile.h @@ -17,25 +17,32 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PROFILER_RPC_CLIENT_CAPTURE_PROFILE_H_ #define TENSORFLOW_CORE_PROFILER_RPC_CLIENT_CAPTURE_PROFILE_H_ +#include + #include "tensorflow/core/platform/status.h" -#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/profiler_options.pb.h" +#include "tensorflow/core/profiler/profiler_service.pb.h" namespace tensorflow { namespace profiler { -Status ValidateHostPortPair(const string& host_port); +ProfileRequest PopulateProfileRequest(int duration_ms, + const std::string& repository_root, + const std::string& session_id, + const std::string& host_name, + const ProfileOptions& opts); // Collects one sample of monitoring profile and shows user-friendly metrics. // If timestamp flag is true, timestamp will be displayed in "%H:%M:%S" format. -Status Monitor(const string& service_addr, int duration_ms, - int monitoring_level, bool display_timestamp, string* result); +Status Monitor(const std::string& service_addr, int duration_ms, + int monitoring_level, bool display_timestamp, + std::string* result); // Starts tracing on a single or multiple hosts and saves the result in the // given logdir. If no trace was collected, retries tracing for // num_tracing_attempts. -Status Trace(const string& service_addr, const string& logdir, - const string& workers_list, int duration_ms, +Status Trace(const std::string& service_addr, const std::string& logdir, + const std::string& workers_list, int duration_ms, int num_tracing_attempts, const ProfileOptions& opts); } // namespace profiler diff --git a/tensorflow/core/profiler/rpc/client/profiler_client.cc b/tensorflow/core/profiler/rpc/client/profiler_client.cc new file mode 100644 index 00000000000..0d8fd8411a5 --- /dev/null +++ b/tensorflow/core/profiler/rpc/client/profiler_client.cc @@ -0,0 +1,78 @@ +/* Copyright 2020 The TensorFlow Authors All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/profiler/rpc/client/profiler_client.h" + +#include + +#include "grpcpp/grpcpp.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/protobuf/error_codes.pb.h" + +namespace tensorflow { +namespace profiler { +namespace { + +inline Status FromGrpcStatus(const ::grpc::Status& s) { + return s.ok() ? Status::OK() + : Status(static_cast(s.error_code()), + s.error_message()); +} + +template +std::unique_ptr CreateStub(const std::string& service_addr) { + ::grpc::ChannelArguments channel_args; + channel_args.SetMaxReceiveMessageSize(std::numeric_limits::max()); + return T::NewStub(::grpc::CreateCustomChannel( + "dns:///" + service_addr, ::grpc::InsecureChannelCredentials(), + channel_args)); +} + +} // namespace + +Status ProfileGrpc(const std::string& service_addr, + const ProfileRequest& request, ProfileResponse* response) { + ::grpc::ClientContext context; + std::unique_ptr stub = + CreateStub(service_addr); + TF_RETURN_IF_ERROR( + FromGrpcStatus(stub->Profile(&context, request, response))); + return Status::OK(); +} + +Status NewSessionGrpc(const std::string& service_addr, + const NewProfileSessionRequest& request, + NewProfileSessionResponse* response) { + ::grpc::ClientContext context; + std::unique_ptr stub = + CreateStub(service_addr); + TF_RETURN_IF_ERROR( + FromGrpcStatus(stub->NewSession(&context, request, response))); + return Status::OK(); +} + +Status MonitorGrpc(const std::string& service_addr, + const MonitorRequest& request, MonitorResponse* response) { + ::grpc::ClientContext context; + std::unique_ptr stub = + CreateStub(service_addr); + TF_RETURN_IF_ERROR( + FromGrpcStatus(stub->Monitor(&context, request, response))); + return Status::OK(); +} + +} // namespace profiler +} // namespace tensorflow diff --git a/tensorflow/core/profiler/rpc/client/profiler_client.h b/tensorflow/core/profiler/rpc/client/profiler_client.h new file mode 100644 index 00000000000..d946d607e55 --- /dev/null +++ b/tensorflow/core/profiler/rpc/client/profiler_client.h @@ -0,0 +1,40 @@ +/* Copyright 2020 The TensorFlow Authors All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// GRPC client to perform on-demand profiling + +#ifndef TENSORFLOW_CORE_PROFILER_RPC_CLIENT_PROFILER_CLIENT_H_ +#define TENSORFLOW_CORE_PROFILER_RPC_CLIENT_PROFILER_CLIENT_H_ + +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/profiler/profiler_analysis.grpc.pb.h" +#include "tensorflow/core/profiler/profiler_service.grpc.pb.h" + +namespace tensorflow { +namespace profiler { + +Status ProfileGrpc(const std::string& service_addr, + const ProfileRequest& request, ProfileResponse* response); + +Status NewSessionGrpc(const std::string& service_addr, + const NewProfileSessionRequest& request, + NewProfileSessionResponse* response); + +Status MonitorGrpc(const std::string& service_addr, + const MonitorRequest& request, MonitorResponse* response); + +} // namespace profiler +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_PROFILER_RPC_CLIENT_PROFILER_CLIENT_H_ diff --git a/tensorflow/python/profiler/internal/BUILD b/tensorflow/python/profiler/internal/BUILD index 221246e3c18..142863b3c4f 100644 --- a/tensorflow/python/profiler/internal/BUILD +++ b/tensorflow/python/profiler/internal/BUILD @@ -129,6 +129,7 @@ tf_python_pybind_extension( "//tensorflow/core/profiler/rpc/client:save_profile", "//tensorflow/python:pybind11_status", "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", "@pybind11", ], ) diff --git a/tensorflow/python/profiler/internal/profiler_wrapper.cc b/tensorflow/python/profiler/internal/profiler_wrapper.cc index a8799a5f247..0f57204d1d0 100644 --- a/tensorflow/python/profiler/internal/profiler_wrapper.cc +++ b/tensorflow/python/profiler/internal/profiler_wrapper.cc @@ -16,9 +16,12 @@ limitations under the License. #include #include "absl/memory/memory.h" +#include "absl/strings/numbers.h" #include "pybind11/pybind11.h" #include "pybind11/pytypes.h" +#include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/host_info.h" +#include "tensorflow/core/platform/status.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h" #include "tensorflow/core/profiler/convert/op_stats_to_overview_page.h" @@ -43,20 +46,17 @@ using ::tensorflow::profiler::KERNEL_STATS_DB; using ::tensorflow::profiler::OP_METRICS_DB; using ::tensorflow::profiler::STEP_DB; -tensorflow::ProfileRequest MakeProfileRequest( - const tensorflow::string& logdir, const tensorflow::string& session_id, - const tensorflow::string& host) { - tensorflow::ProfileRequest request; - request.add_tools("trace_viewer"); - request.add_tools("overview_page"); - request.add_tools("input_pipeline"); - request.add_tools("kernel_stats"); - request.add_tools("tensorflow_stats"); - request.add_tools("memory_profile"); - request.set_host_name(host); - request.set_repository_root(logdir); - request.set_session_id(session_id); - return request; +tensorflow::Status ValidateHostPortPair(const std::string& host_port) { + tensorflow::uint32 port; + std::vector parts = absl::StrSplit(host_port, ':'); + // Must be host:port, port must be a number, host must not contain a '/', + // host also must not be empty. + if (parts.size() != 2 || !absl::SimpleAtoi(parts[1], &port) || + parts[0].find("/") != std::string::npos || parts[0].empty()) { + return tensorflow::errors::InvalidArgument( + "Could not interpret \"", host_port, "\" as a host-port pair."); + } + return tensorflow::Status::OK(); } tensorflow::ProfileOptions GetOptions(const py::dict& opts) { @@ -108,9 +108,11 @@ class ProfilerSessionWrapper { tensorflow::MaybeRaiseRegisteredFromStatus(status); tensorflow::ProfileResponse response; - tensorflow::ProfileRequest request = MakeProfileRequest( - logdir_, tensorflow::profiler::GetCurrentTimeStampAsString(), - tensorflow::port::Hostname()); + tensorflow::ProfileRequest request = + tensorflow::profiler::PopulateProfileRequest( + /*duration_ms=*/0, logdir_, + tensorflow::profiler::GetCurrentTimeStampAsString(), + tensorflow::port::Hostname(), /*opts=*/{}); status = tensorflow::profiler::ConvertXSpaceToProfileResponse( xspace, request, &response); tensorflow::MaybeRaiseRegisteredFromStatus(status); @@ -150,8 +152,7 @@ PYBIND11_MODULE(_pywrap_profiler, m) { const char* worker_list, bool include_dataset_ops, int duration_ms, int num_tracing_attempts, py::dict options) { - tensorflow::Status status = - tensorflow::profiler::ValidateHostPortPair(service_addr); + tensorflow::Status status = ValidateHostPortPair(service_addr); tensorflow::MaybeRaiseRegisteredFromStatus(status); tensorflow::ProfileOptions opts = GetOptions(options); opts.set_include_dataset_ops(include_dataset_ops); @@ -163,8 +164,7 @@ PYBIND11_MODULE(_pywrap_profiler, m) { m.def("monitor", [](const char* service_addr, int duration_ms, int monitoring_level, bool display_timestamp) { - tensorflow::Status status = - tensorflow::profiler::ValidateHostPortPair(service_addr); + tensorflow::Status status = ValidateHostPortPair(service_addr); tensorflow::MaybeRaiseRegisteredFromStatus(status); tensorflow::string content; status = tensorflow::profiler::Monitor(service_addr, duration_ms, From 0f0e85d768cd241ffd185b26ea3bed1ce9df5790 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Thu, 23 Jul 2020 10:52:15 -0700 Subject: [PATCH 1164/2522] Removed unused code. PiperOrigin-RevId: 322819694 Change-Id: I1884a4c3af356f34ecefa5249079a8cff1f65b80 --- .../lite/delegates/gpu/cl/kernels/util.cc | 39 ------------------- .../lite/delegates/gpu/cl/linear_storage.cc | 17 -------- .../lite/delegates/gpu/cl/linear_storage.h | 3 -- 3 files changed, 59 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/util.cc b/tensorflow/lite/delegates/gpu/cl/kernels/util.cc index 72426b62d39..26fbc33f17c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/util.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/util.cc @@ -28,39 +28,6 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { -std::string GetReadImageFromDataType(DataType data_type) { - if (data_type == DataType::FLOAT32) { - return "read_imagef"; - } else if (data_type == DataType::FLOAT16) { - return "read_imageh"; - } else { - return "error"; - } -} - -std::string GetWriteImageFromDataType(DataType data_type) { - if (data_type == DataType::FLOAT32) { - return "write_imagef"; - } else if (data_type == DataType::FLOAT16) { - return "write_imageh"; - } else { - return "error"; - } -} - -std::string GetImageModifier(AccessType access) { - switch (access) { - case AccessType::READ: - return "__read_only"; - case AccessType::WRITE: - return "__write_only"; - case AccessType::READ_WRITE: - return "__read_write"; - } -} - -} // namespace std::string GetCommonDefines(CalculationsPrecision precision) { std::string result; @@ -76,8 +43,6 @@ std::string GetCommonDefines(CalculationsPrecision precision) { result += "#define TO_FLT4 convert_float4\n"; result += "#define TO_ACCUM_TYPE convert_float4\n"; result += "#define TO_ACCUM_FLT convert_float\n"; - result += "#define READ_IMAGE read_imagef\n"; - result += "#define WRITE_IMAGE write_imagef\n"; break; case CalculationsPrecision::F16: result += "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n"; @@ -90,8 +55,6 @@ std::string GetCommonDefines(CalculationsPrecision precision) { result += "#define TO_FLT4 convert_half4\n"; result += "#define TO_ACCUM_TYPE convert_half4\n"; result += "#define TO_ACCUM_FLT convert_half\n"; - result += "#define READ_IMAGE read_imageh\n"; - result += "#define WRITE_IMAGE write_imageh\n"; break; case CalculationsPrecision::F32_F16: result += "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n"; @@ -104,8 +67,6 @@ std::string GetCommonDefines(CalculationsPrecision precision) { result += "#define TO_FLT4 convert_half4\n"; result += "#define TO_ACCUM_TYPE convert_float4\n"; result += "#define TO_ACCUM_FLT convert_float\n"; - result += "#define READ_IMAGE read_imageh\n"; - result += "#define WRITE_IMAGE write_imageh\n"; break; } diff --git a/tensorflow/lite/delegates/gpu/cl/linear_storage.cc b/tensorflow/lite/delegates/gpu/cl/linear_storage.cc index 09c56c13e4a..eb822b620f7 100644 --- a/tensorflow/lite/delegates/gpu/cl/linear_storage.cc +++ b/tensorflow/lite/delegates/gpu/cl/linear_storage.cc @@ -111,23 +111,6 @@ LinearStorage& LinearStorage::operator=(LinearStorage&& storage) { return *this; } -std::string LinearStorage::ReadLinearFLT4(const std::string& z_coord) const { - if (storage_type_ == LinearStorageType::BUFFER) { - return absl::StrCat(name_, "[", z_coord, "]"); - } else { - return absl::StrCat("READ_IMAGE(", name_, ", smp_none, (int2)(", z_coord, - ", 0))"); - } -} - -std::string LinearStorage::GetDeclaration() const { - if (storage_type_ == LinearStorageType::BUFFER) { - return absl::StrCat("__global FLT4* ", name_); - } else { - return absl::StrCat("__read_only image2d_t ", name_); - } -} - absl::Status LinearStorage::GetGPUResources( const GPUObjectDescriptor* obj_ptr, GPUResourcesWithValue* resources) const { diff --git a/tensorflow/lite/delegates/gpu/cl/linear_storage.h b/tensorflow/lite/delegates/gpu/cl/linear_storage.h index 29de71c6b5e..2c96c79f596 100644 --- a/tensorflow/lite/delegates/gpu/cl/linear_storage.h +++ b/tensorflow/lite/delegates/gpu/cl/linear_storage.h @@ -75,9 +75,6 @@ class LinearStorage : public GPUObject { LinearStorage& operator=(const LinearStorage&) = delete; void SetName(const std::string& name) { name_ = name; } - cl_mem GetMemoryPtr() const { return memory_; } - std::string ReadLinearFLT4(const std::string& z_coord) const; - std::string GetDeclaration() const; absl::Status GetGPUResources(const GPUObjectDescriptor* obj_ptr, GPUResourcesWithValue* resources) const override; From 3d585c30a3faa4ca299f5e3a72a5777169cfa7ad Mon Sep 17 00:00:00 2001 From: Vignesh Kothapalli Date: Thu, 23 Jul 2020 23:35:33 +0530 Subject: [PATCH 1165/2522] refactored int tests --- .../experimental/kernel_tests/unique_test.py | 46 ++++++------------- 1 file changed, 14 insertions(+), 32 deletions(-) diff --git a/tensorflow/python/data/experimental/kernel_tests/unique_test.py b/tensorflow/python/data/experimental/kernel_tests/unique_test.py index b3fd7936fc9..0567efee1fa 100644 --- a/tensorflow/python/data/experimental/kernel_tests/unique_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/unique_test.py @@ -83,9 +83,10 @@ class UniqueTest(test_base.DatasetTestBase, parameterized.TestCase): while generating the outputs. Args: - dtype: The actual `dtype` of the elements in each test case. - test_cases: A list of lists. The dataset will be created from the list items. - error: The expected error to be raised when a corrupted item in encountered. + dtype: The expected `dtype` of the elements in each test case. + test_cases: A list of lists. The dataset will be created from the + list items. + error: The expected error to be raised. """ current_test_case = [] @@ -95,7 +96,7 @@ class UniqueTest(test_base.DatasetTestBase, parameterized.TestCase): for test_case in test_cases: current_test_case = test_case with self.assertRaises(error): - _ = self.getDatasetOutput(dataset) + self.getDatasetOutput(dataset) @combinations.generate(test_base.graph_only_combinations()) def testStringTypeMismatch(self): @@ -115,10 +116,12 @@ class UniqueTest(test_base.DatasetTestBase, parameterized.TestCase): self._checkDatasetRaises(dtype=dtypes.string, test_cases=test_cases, error=errors.InternalError) - @combinations.generate(test_base.graph_only_combinations()) - def testInt32TypeMismatch(self): + @combinations.generate(combinations.times( + test_base.graph_only_combinations(), + combinations.combine(dtype=[dtypes.int32, dtypes.int64]))) + def testIntTypeMismatch(self, dtype): """Should raise InvalidArgumentError when element type doesn't - match with dtypes.int32""" + match with dtypes.int32, dtypes.int64""" test_cases = [ [1, "foo"], @@ -131,26 +134,7 @@ class UniqueTest(test_base.DatasetTestBase, parameterized.TestCase): ["foo", 1, 1], ["bar", "bar", 1, 1], ] - self._checkDatasetRaises(dtype=dtypes.int32, test_cases=test_cases, - error=errors.InvalidArgumentError) - - @combinations.generate(test_base.graph_only_combinations()) - def testInt64TypeMismatch(self): - """Should raise InvalidArgumentError when element type doesn't - match with dtypes.int64.""" - - test_cases = [ - [2, "hello"], - [3, 2, "hello"], - [5, 3, "hello", "world"], - [6, 7, "hello", "hello"], - ["hello", 6], - ["hello", "world", 8], - ["hello", "hello", "world", 8], - ["hello", 9, 9], - ["hello", "world", 10, 10], - ] - self._checkDatasetRaises(dtype=dtypes.int64, test_cases=test_cases, + self._checkDatasetRaises(dtype=dtype, test_cases=test_cases, error=errors.InvalidArgumentError) @combinations.generate(test_base.graph_only_combinations()) @@ -158,12 +142,10 @@ class UniqueTest(test_base.DatasetTestBase, parameterized.TestCase): """Should raise TypeError when element type doesn't match with the dtypes.int64, dtypes.int32 or dtypes.string (supported types).""" - sample_unsupported_types = [dtypes.bool, dtypes.double, dtypes.complex64, - dtypes.float32, dtypes.float64, dtypes.qint16, dtypes.qint32] - current_test_case = [] - for dtype in sample_unsupported_types: + for dtype in [dtypes.bool, dtypes.double, dtypes.complex64, + dtypes.float32, dtypes.float64, dtypes.qint16, dtypes.qint32]: with self.assertRaises(TypeError): - _ = dataset_ops.Dataset.from_generator(lambda: current_test_case, + _ = dataset_ops.Dataset.from_generator(lambda: [], dtype).apply(unique.unique()) From c3704aaacb7a699a45d5ef25584268dfc884cd26 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Thu, 23 Jul 2020 18:07:44 +0000 Subject: [PATCH 1166/2522] made comments for string view and get_name more clear --- tensorflow/c/c_api.h | 2 +- tensorflow/c/kernels.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h index e49bf3601ec..348c84fcd65 100644 --- a/tensorflow/c/c_api.h +++ b/tensorflow/c/c_api.h @@ -126,7 +126,7 @@ TF_CAPI_EXPORT extern void TF_DeleteBuffer(TF_Buffer*); TF_CAPI_EXPORT extern TF_Buffer TF_GetBuffer(TF_Buffer* buffer); // -------------------------------------------------------------------------- -// Used to pass strings across the C API. The caller does not take ownership +// Used to return strings across the C API. The caller does not take ownership // of the underlying data pointer and is not responsible for freeing it. typedef struct TF_StringView { const char* data; diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h index 764f9066faa..d7b19ec91e4 100644 --- a/tensorflow/c/kernels.h +++ b/tensorflow/c/kernels.h @@ -185,7 +185,7 @@ TF_CAPI_EXPORT extern void TF_OpKernelConstruction_GetAttrInt32( TF_OpKernelConstruction* ctx, const char* attr_name, int32_t* val, TF_Status* status); -// Returns the name of the user-defined NodeDef for this OpKernel. +// Returns the unique operation name for this OpKernel. TF_CAPI_EXPORT extern TF_StringView TF_OpKernelConstruction_GetName( TF_OpKernelConstruction* ctx); From e656213afff2e9491ae1c53e2e76a427a3be20c9 Mon Sep 17 00:00:00 2001 From: Karmel Allison Date: Thu, 23 Jul 2020 10:59:17 -0700 Subject: [PATCH 1167/2522] Rename hlo_algorithm_blacklist to hlo_algorithm_denylist PiperOrigin-RevId: 322821382 Change-Id: Iea0de84b3c82562b5649fc0a6092cd2fb473c83a --- tensorflow/compiler/xla/service/gpu/BUILD | 14 +++++++------- .../xla/service/gpu/gpu_conv_algorithm_picker.cc | 2 +- ...ithm_blacklist.cc => hlo_algorithm_denylist.cc} | 2 +- ...orithm_blacklist.h => hlo_algorithm_denylist.h} | 0 ...list_test.cc => hlo_algorithm_denylist_test.cc} | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) rename tensorflow/compiler/xla/service/gpu/{hlo_algorithm_blacklist.cc => hlo_algorithm_denylist.cc} (97%) rename tensorflow/compiler/xla/service/gpu/{hlo_algorithm_blacklist.h => hlo_algorithm_denylist.h} (100%) rename tensorflow/compiler/xla/service/gpu/{hlo_algorithm_blacklist_test.cc => hlo_algorithm_denylist_test.cc} (97%) diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 753f6867066..c075b39c08e 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -686,7 +686,7 @@ cc_library( ":gpu_autotuning_proto_cc", ":gpu_conv_runner", ":gpu_executable", - ":hlo_algorithm_blacklist", + ":hlo_algorithm_denylist", ":ir_emission_utils", ":stream_executor_util", "@com_google_absl//absl/algorithm:container", @@ -1660,9 +1660,9 @@ tf_proto_library_cc( ) cc_library( - name = "hlo_algorithm_blacklist", - srcs = ["hlo_algorithm_blacklist.cc"], - hdrs = ["hlo_algorithm_blacklist.h"], + name = "hlo_algorithm_denylist", + srcs = ["hlo_algorithm_denylist.cc"], + hdrs = ["hlo_algorithm_denylist.h"], deps = [ ":gpu_autotuning_proto_cc", "//tensorflow/compiler/xla:debug_options_flags", @@ -1673,12 +1673,12 @@ cc_library( ) tf_cc_test( - name = "hlo_algorithm_blacklist_test", - srcs = ["hlo_algorithm_blacklist_test.cc"], + name = "hlo_algorithm_denylist_test", + srcs = ["hlo_algorithm_denylist_test.cc"], data = ["data/hlo_algorithm_denylist.pbtxt"], tags = ["no_pip"], deps = [ - ":hlo_algorithm_blacklist", + ":hlo_algorithm_denylist", "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", diff --git a/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc index 4ca113f6c99..8fb741323f3 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc @@ -24,7 +24,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/backend_configs.pb.h" #include "tensorflow/compiler/xla/service/gpu/convolution_thunk.h" #include "tensorflow/compiler/xla/service/gpu/gpu_autotuning.pb.h" -#include "tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.h" +#include "tensorflow/compiler/xla/service/gpu/hlo_algorithm_denylist.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h" #include "tensorflow/compiler/xla/service/hlo_casting_utils.h" diff --git a/tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.cc b/tensorflow/compiler/xla/service/gpu/hlo_algorithm_denylist.cc similarity index 97% rename from tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.cc rename to tensorflow/compiler/xla/service/gpu/hlo_algorithm_denylist.cc index a68d52cf832..4a0075f2870 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.cc +++ b/tensorflow/compiler/xla/service/gpu/hlo_algorithm_denylist.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.h" +#include "tensorflow/compiler/xla/service/gpu/hlo_algorithm_denylist.h" #include diff --git a/tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.h b/tensorflow/compiler/xla/service/gpu/hlo_algorithm_denylist.h similarity index 100% rename from tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.h rename to tensorflow/compiler/xla/service/gpu/hlo_algorithm_denylist.h diff --git a/tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist_test.cc b/tensorflow/compiler/xla/service/gpu/hlo_algorithm_denylist_test.cc similarity index 97% rename from tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist_test.cc rename to tensorflow/compiler/xla/service/gpu/hlo_algorithm_denylist_test.cc index c4529f855c8..ab1cc1c79de 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist_test.cc +++ b/tensorflow/compiler/xla/service/gpu/hlo_algorithm_denylist_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.h" +#include "tensorflow/compiler/xla/service/gpu/hlo_algorithm_denylist.h" #include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/platform/env.h" From d2ba64c7292d3c7503579e5da243e4f2bd36b8f7 Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Thu, 23 Jul 2020 11:02:22 -0700 Subject: [PATCH 1168/2522] Disable Arduino from the Micro Kokoro build. PiperOrigin-RevId: 322822173 Change-Id: I3d91ddc4b9a5ca9f82a16a965f502a4d182dd47f --- tensorflow/lite/micro/tools/ci_build/test_all.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/micro/tools/ci_build/test_all.sh b/tensorflow/lite/micro/tools/ci_build/test_all.sh index 95366112f17..403acb28a5f 100755 --- a/tensorflow/lite/micro/tools/ci_build/test_all.sh +++ b/tensorflow/lite/micro/tools/ci_build/test_all.sh @@ -49,7 +49,10 @@ tensorflow/lite/micro/tools/ci_build/test_x86.sh echo "Running stm32f4 tests at `date`" tensorflow/lite/micro/tools/ci_build/test_stm32f4.sh -echo "Running Arduino tests at `date`" -tensorflow/lite/micro/tools/ci_build/test_arduino.sh +# TODO(b/158607483): Disabling Arduino because it is slow (~20mins) and has also +# become very flaky from the download of cifar-10-binary.tar.gz which is 160 MB +# and has started failing a lot. +# echo "Running Arduino tests at `date`" +# tensorflow/lite/micro/tools/ci_build/test_arduino.sh echo "Finished all micro tests at `date`" From ca8f4c723f630e300e5777a4a84fe32c179b2671 Mon Sep 17 00:00:00 2001 From: Robert David Date: Thu, 23 Jul 2020 11:05:37 -0700 Subject: [PATCH 1169/2522] Cleanup LSTM tests: Make weight types a TEST_P parameter, combining testcase sets of 3 to a single function. PiperOrigin-RevId: 322823012 Change-Id: I9a929b791dcb676b6bbedace84891bf2939b2162 --- .../delegates/nnapi/acceleration_test_list.cc | 11 +- tensorflow/lite/kernels/lstm_test.cc | 344 ++++++------------ 2 files changed, 120 insertions(+), 235 deletions(-) diff --git a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc index 71ae50b0094..fd0e915d504 100644 --- a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc +++ b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc @@ -244,9 +244,14 @@ CifgPeepholeNoProjectionNoClippingUnidirectionalLstmTest/NonLayerNormLstmBlackBo # lstm_test -LstmOpTest/InvalidTypes -.+LstmOpTest/Float,29 --.+LstmOpTest/HybridInt8 -.+LstmOpTest/HybridUint8,29 +# Float +.+_LstmOpTest\.Test/0,29 +# HybridUint8 +.+_LstmOpTest\.Test/1,29 +.+_LstmOpTest\.Test/2,29 +# HybridInt8 +-.+_LstmOpTest\.Test/3 +-.+_LstmOpTest\.Test/4 # maximum_minimum_test MaxMinOpTest/.+nt8Test,29 diff --git a/tensorflow/lite/kernels/lstm_test.cc b/tensorflow/lite/kernels/lstm_test.cc index 015fbc7050e..d81bd53a575 100644 --- a/tensorflow/lite/kernels/lstm_test.cc +++ b/tensorflow/lite/kernels/lstm_test.cc @@ -294,7 +294,11 @@ class LSTMOpModel : public SingleOpModel { const TensorType weight_type_; }; -class BaseLstmOpTest : public ::testing::TestWithParam { +// Parameters: +// std::get<0>(GetParam()) => weight_type +// std::get<1>(GetParam()) => asymmetric_quantize_inputs +class BaseLstmOpTest + : public ::testing::TestWithParam> { protected: // Weights of the LSTM model. Some are optional. std::vector input_to_input_weights_; @@ -324,7 +328,7 @@ class BaseLstmOpTest : public ::testing::TestWithParam { std::vector> lstm_golden_output_; // Compares output up to tolerance to the result of the lstm given the input. - void VerifyGoldens(LSTMOpModel* lstm, float tolerance = 1e-5) { + void VerifyGoldens(LSTMOpModel* lstm, float tolerance) { // Weights are set twice: // - The delegate, if used, needs to know the scales and zero-points of // quantized tensors, which are computed dynamically when weights are set, @@ -446,84 +450,66 @@ class NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest } }; -TEST_F(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, Float) { +TEST_P(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, + TestWith20Inputs) { const int n_batch = 1; const int n_input = 2; // n_cell and n_output have the same size when there is no projection. const int n_cell = 4; const int n_output = 4; + TensorType weight_type; + bool asymmetric_quantize_inputs; + std::tie(weight_type, asymmetric_quantize_inputs) = GetParam(); + + // TODO(b/158205028): Fix this test if using NN-API. + if (SingleOpModel::GetForceUseNnapi() && weight_type == TensorType_UINT8) { + return; + } + LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, /*use_cifg=*/false, /*use_peephole=*/false, /*use_projection_weights=*/false, - /*use_projection_bias=*/false, - /*weight_type=*/TensorType_FLOAT32, + /*use_projection_bias=*/false, weight_type, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, - /*asymmetric_quantize_inputs=*/false); + asymmetric_quantize_inputs); - VerifyGoldens(&lstm); + static const auto* tolerance_per_type = + new std::map{{TensorType_FLOAT32, 0.00001f}, + {TensorType_UINT8, 0.0157651f}, + {TensorType_INT8, 0.0157651f}}; + VerifyGoldens(&lstm, tolerance_per_type->at(weight_type)); } -TEST_F(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, With24Inputs) { +TEST_P(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, + TestWith24Inputs) { const int n_batch = 1; const int n_input = 2; // n_cell and n_output have the same size when there is no projection. const int n_cell = 4; const int n_output = 4; + TensorType weight_type; + bool asymmetric_quantize_inputs; + std::tie(weight_type, asymmetric_quantize_inputs) = GetParam(); + + // TODO(b/158205028): Fix this test if using NN-API. + if (SingleOpModel::GetForceUseNnapi() && weight_type == TensorType_UINT8) { + return; + } + LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, /*use_cifg=*/false, /*use_peephole=*/false, /*use_projection_weights=*/false, - /*use_projection_bias=*/false, - /*weight_type=*/TensorType_FLOAT32, + /*use_projection_bias=*/false, weight_type, /*model_has_legacy_20_inputs=*/false, - /*is_layer_norm=*/false, - /*asymmetric_quantize_inputs=*/false); + /*is_layer_norm=*/false, asymmetric_quantize_inputs); - VerifyGoldens(&lstm); -} - -TEST_P(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, HybridUint8) { - // TODO(b/158205028): Fix this test if GetForceUseNnapi() && !GetParam(). - if (SingleOpModel::GetForceUseNnapi()) { - return; - } - const int n_batch = 1; - const int n_input = 2; - // n_cell and n_output have the same size when there is no projection. - const int n_cell = 4; - const int n_output = 4; - - LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, - /*use_cifg=*/false, /*use_peephole=*/false, - /*use_projection_weights=*/false, - /*use_projection_bias=*/false, - /*weight_type=*/TensorType_UINT8, - /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, - /*asymmetric_quantize_inputs=*/GetParam()); - - VerifyGoldens(&lstm, /*tolerance=*/0.0157651); -} - -TEST_P(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, HybridInt8) { - if (SingleOpModel::GetForceUseNnapi() && GetParam()) { - return; - } - const int n_batch = 1; - const int n_input = 2; - // n_cell and n_output have the same size when there is no projection. - const int n_cell = 4; - const int n_output = 4; - - LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, - /*use_cifg=*/false, /*use_peephole=*/false, - /*use_projection_weights=*/false, - /*use_projection_bias=*/false, - /*weight_type=*/TensorType_INT8, - /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, - /*asymmetric_quantize_inputs=*/GetParam()); - - VerifyGoldens(&lstm, /*tolerance=*/0.0157651); + static const auto* tolerance_per_type = + new std::map{{TensorType_FLOAT32, 0.00001f}, + {TensorType_UINT8, 0.0157651f}, + {TensorType_INT8, 0.0157651f}}; + VerifyGoldens(&lstm, tolerance_per_type->at(weight_type)); } class Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest @@ -574,65 +560,34 @@ class Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest } }; -TEST_F(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest, Float) { +TEST_P(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest, Test) { const int n_batch = 1; const int n_input = 2; // n_cell and n_output have the same size when there is no projection. const int n_cell = 4; const int n_output = 4; - LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, - /*use_cifg=*/true, /*use_peephole=*/true, - /*use_projection_weights=*/false, - /*use_projection_bias=*/false, - /*weight_type=*/TensorType_FLOAT32, - /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, - /*asymmetric_quantize_inputs=*/false); + TensorType weight_type; + bool asymmetric_quantize_inputs; + std::tie(weight_type, asymmetric_quantize_inputs) = GetParam(); - VerifyGoldens(&lstm); -} - -TEST_P(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest, HybridUint8) { - // TODO(b/158205028): Fix this test if GetForceUseNnapi() && !GetParam(). - if (SingleOpModel::GetForceUseNnapi()) { + // TODO(b/158205028): Fix this test if using NN-API. + if (SingleOpModel::GetForceUseNnapi() && weight_type == TensorType_UINT8) { return; } - const int n_batch = 1; - const int n_input = 2; - // n_cell and n_output have the same size when there is no projection. - const int n_cell = 4; - const int n_output = 4; LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, /*use_cifg=*/true, /*use_peephole=*/true, /*use_projection_weights=*/false, - /*use_projection_bias=*/false, - /*weight_type=*/TensorType_UINT8, + /*use_projection_bias=*/false, weight_type, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, - /*asymmetric_quantize_inputs=*/GetParam()); + asymmetric_quantize_inputs); - VerifyGoldens(&lstm, /*tolerance=*/0.03573); -} - -TEST_P(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest, HybridInt8) { - if (SingleOpModel::GetForceUseNnapi() && GetParam()) { - return; - } - const int n_batch = 1; - const int n_input = 2; - // n_cell and n_output have the same size when there is no projection. - const int n_cell = 4; - const int n_output = 4; - - LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, - /*use_cifg=*/true, /*use_peephole=*/true, - /*use_projection_weights=*/false, - /*use_projection_bias=*/false, - /*weight_type=*/TensorType_INT8, - /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, - /*asymmetric_quantize_inputs=*/GetParam()); - - VerifyGoldens(&lstm, /*tolerance=*/0.03573); + static const auto* tolerance_per_type = + new std::map{{TensorType_FLOAT32, 0.00001f}, + {TensorType_UINT8, 0.03573f}, + {TensorType_INT8, 0.03573f}}; + VerifyGoldens(&lstm, tolerance_per_type->at(weight_type)); } class NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest @@ -1235,62 +1190,34 @@ class NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest } }; -TEST_F(NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest, Float) { +TEST_P(NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest, Test) { const int n_batch = 2; const int n_input = 5; const int n_cell = 20; const int n_output = 16; - LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, - /*use_cifg=*/false, /*use_peephole=*/true, - /*use_projection_weights=*/true, - /*use_projection_bias=*/false, - /*weight_type=*/TensorType_FLOAT32, - /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, - /*asymmetric_quantize_inputs=*/false); + TensorType weight_type; + bool asymmetric_quantize_inputs; + std::tie(weight_type, asymmetric_quantize_inputs) = GetParam(); - VerifyGoldens(&lstm); -} - -TEST_P(NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest, HybridUint8) { - // TODO(b/158205028): Fix this test if GetForceUseNnapi() && !GetParam(). - if (SingleOpModel::GetForceUseNnapi()) { + // TODO(b/158205028): Fix this test if using NN-API. + if (SingleOpModel::GetForceUseNnapi() && weight_type == TensorType_UINT8) { return; } - const int n_batch = 2; - const int n_input = 5; - const int n_cell = 20; - const int n_output = 16; LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, - /*use_projection_bias=*/false, - /*weight_type=*/TensorType_UINT8, + /*use_projection_bias=*/false, weight_type, /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, - /*asymmetric_quantize_inputs=*/GetParam()); + asymmetric_quantize_inputs); - VerifyGoldens(&lstm, /*tolerance=*/0.00467); -} - -TEST_P(NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest, HybridInt8) { - if (SingleOpModel::GetForceUseNnapi() && GetParam()) { - return; - } - const int n_batch = 2; - const int n_input = 5; - const int n_cell = 20; - const int n_output = 16; - - LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, - /*use_cifg=*/false, /*use_peephole=*/true, - /*use_projection_weights=*/true, - /*use_projection_bias=*/false, - /*weight_type=*/TensorType_INT8, - /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, - /*asymmetric_quantize_inputs=*/GetParam()); - - VerifyGoldens(&lstm, /*tolerance=*/0.0015); + static const auto* tolerance_per_type = new std::map{ + {TensorType_FLOAT32, 0.00001f}, + {TensorType_UINT8, 0.00467f}, + {TensorType_INT8, 0.0015f}, + }; + VerifyGoldens(&lstm, tolerance_per_type->at(weight_type)); } class NoCifg_Peephole_Projection_LayerNorm_LstmOpTest : public BaseLstmOpTest { @@ -1372,62 +1299,33 @@ class NoCifg_Peephole_Projection_LayerNorm_LstmOpTest : public BaseLstmOpTest { } }; -TEST_F(NoCifg_Peephole_Projection_LayerNorm_LstmOpTest, Float) { +TEST_P(NoCifg_Peephole_Projection_LayerNorm_LstmOpTest, Test) { const int n_batch = 2; const int n_input = 5; const int n_cell = 4; const int n_output = 3; - LSTMOpModel lstm( - n_batch, n_input, n_cell, n_output, - /*use_cifg=*/false, /*use_peephole=*/true, - /*use_projection_weights=*/true, - /*use_projection_bias=*/false, - /*weight_type=*/TensorType_FLOAT32, /*model_has_legacy_20_inputs=*/false, - /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/false); + TensorType weight_type; + bool asymmetric_quantize_inputs; + std::tie(weight_type, asymmetric_quantize_inputs) = GetParam(); - VerifyGoldens(&lstm); -} - -TEST_P(NoCifg_Peephole_Projection_LayerNorm_LstmOpTest, HybridUint8) { - // TODO(b/158205028): Fix this test if GetForceUseNnapi() && !GetParam(). - if (SingleOpModel::GetForceUseNnapi()) { + // TODO(b/158205028): Fix this test if using NN-API. + if (SingleOpModel::GetForceUseNnapi() && weight_type == TensorType_UINT8) { return; } - const int n_batch = 2; - const int n_input = 5; - const int n_cell = 4; - const int n_output = 3; - LSTMOpModel lstm( - n_batch, n_input, n_cell, n_output, - /*use_cifg=*/false, /*use_peephole=*/true, - /*use_projection_weights=*/true, - /*use_projection_bias=*/false, - /*weight_type=*/TensorType_UINT8, /*model_has_legacy_20_inputs=*/false, - /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/GetParam()); + LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, + /*use_cifg=*/false, /*use_peephole=*/true, + /*use_projection_weights=*/true, + /*use_projection_bias=*/false, weight_type, + /*model_has_legacy_20_inputs=*/false, + /*is_layer_norm=*/true, asymmetric_quantize_inputs); - VerifyGoldens(&lstm, /*tolerance=*/0.0010907); -} - -TEST_P(NoCifg_Peephole_Projection_LayerNorm_LstmOpTest, HybridInt8) { - if (SingleOpModel::GetForceUseNnapi() && GetParam()) { - return; - } - const int n_batch = 2; - const int n_input = 5; - const int n_cell = 4; - const int n_output = 3; - - LSTMOpModel lstm( - n_batch, n_input, n_cell, n_output, - /*use_cifg=*/false, /*use_peephole=*/true, - /*use_projection_weights=*/true, - /*use_projection_bias=*/false, - /*weight_type=*/TensorType_INT8, /*model_has_legacy_20_inputs=*/false, - /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/GetParam()); - - VerifyGoldens(&lstm, /*tolerance=*/1.06e-3); + static const auto* tolerance_per_type = + new std::map{{TensorType_FLOAT32, 0.00001f}, + {TensorType_UINT8, 0.0010907f}, + {TensorType_INT8, 0.00106f}}; + VerifyGoldens(&lstm, tolerance_per_type->at(weight_type)); } class Cifg_Peephole_Projection_LayerNorm_LstmOpTest : public BaseLstmOpTest { @@ -1489,58 +1387,33 @@ class Cifg_Peephole_Projection_LayerNorm_LstmOpTest : public BaseLstmOpTest { } }; -TEST_F(Cifg_Peephole_Projection_LayerNorm_LstmOpTest, Float) { +TEST_P(Cifg_Peephole_Projection_LayerNorm_LstmOpTest, Test) { const int n_batch = 2; const int n_input = 5; const int n_cell = 4; const int n_output = 3; - LSTMOpModel lstm( - n_batch, n_input, n_cell, n_output, - /*use_cifg=*/true, /*use_peephole=*/true, - /*use_projection_weights=*/true, - /*use_projection_bias=*/false, - /*weight_type=*/TensorType_FLOAT32, /*model_has_legacy_20_inputs=*/false, - /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/false); + TensorType weight_type; + bool asymmetric_quantize_inputs; + std::tie(weight_type, asymmetric_quantize_inputs) = GetParam(); - VerifyGoldens(&lstm); -} - -TEST_P(Cifg_Peephole_Projection_LayerNorm_LstmOpTest, HybridUint8) { - if (SingleOpModel::GetForceUseNnapi()) { + // TODO(b/158205028): Fix this test if using NN-API. + if (SingleOpModel::GetForceUseNnapi() && weight_type == TensorType_UINT8) { return; } - const int n_batch = 2; - const int n_input = 5; - const int n_cell = 4; - const int n_output = 3; - LSTMOpModel lstm( - n_batch, n_input, n_cell, n_output, - /*use_cifg=*/true, /*use_peephole=*/true, - /*use_projection_weights=*/true, - /*use_projection_bias=*/false, - /*weight_type=*/TensorType_UINT8, /*model_has_legacy_20_inputs=*/false, - /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/GetParam()); + LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, + /*use_cifg=*/true, /*use_peephole=*/true, + /*use_projection_weights=*/true, + /*use_projection_bias=*/false, weight_type, + /*model_has_legacy_20_inputs=*/false, + /*is_layer_norm=*/true, asymmetric_quantize_inputs); - VerifyGoldens(&lstm, /*tolerance=*/0.000971057); -} - -TEST_P(Cifg_Peephole_Projection_LayerNorm_LstmOpTest, HybridInt8) { - const int n_batch = 2; - const int n_input = 5; - const int n_cell = 4; - const int n_output = 3; - - LSTMOpModel lstm( - n_batch, n_input, n_cell, n_output, - /*use_cifg=*/true, /*use_peephole=*/true, - /*use_projection_weights=*/true, - /*use_projection_bias=*/false, - /*weight_type=*/TensorType_INT8, /*model_has_legacy_20_inputs=*/false, - /*is_layer_norm=*/true, /*asymmetric_quantize_inputs=*/GetParam()); - - VerifyGoldens(&lstm, /*tolerance=*/1e-3); + static const auto* tolerance_per_type = + new std::map{{TensorType_FLOAT32, 0.00001f}, + {TensorType_UINT8, 0.000971057f}, + {TensorType_INT8, 0.001f}}; + VerifyGoldens(&lstm, tolerance_per_type->at(weight_type)); } class LSTMIntegerOpModel : public SingleOpModel { @@ -2349,8 +2222,15 @@ TEST(LstmOpTest, InvalidTypes) { #endif // Test parameter controls asymmetric_quantize_inputs in LSTMOpModel. -#define QUANTIZE_PARAMETER_TEST(test) \ - INSTANTIATE_TEST_SUITE_P(test, test, ::testing::Bool()) +#define QUANTIZE_PARAMETER_TEST(test) \ + INSTANTIATE_TEST_SUITE_P( \ + test, test, \ + ::testing::ValuesIn(std::vector>{ \ + {TensorType_FLOAT32, false}, \ + {TensorType_UINT8, false}, \ + {TensorType_UINT8, true}, \ + {TensorType_INT8, false}, \ + {TensorType_INT8, true}})) QUANTIZE_PARAMETER_TEST(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest); QUANTIZE_PARAMETER_TEST(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest); From 8c4b2fcd7595189f2bbc1f784aa8004052011be2 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Thu, 23 Jul 2020 11:07:21 -0700 Subject: [PATCH 1170/2522] Use bitwise ops for the corresponding Python operators. Fall back to logical ops for DT_BOOL, for backward compatibility. PiperOrigin-RevId: 322823443 Change-Id: Iaad2c274960fe6e2dcf7e509bedc4fafeadabdf0 --- RELEASE.md | 6 +- tensorflow/python/BUILD | 1 + tensorflow/python/framework/ops.py | 1 + tensorflow/python/framework/ops_test.py | 149 ++++++++++++++++++++++++ tensorflow/python/ops/math_ops.py | 37 ++++-- 5 files changed, 186 insertions(+), 8 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 509b6aa6c84..a4f3643a73d 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -31,7 +31,6 @@ * * * TF Core: - * * `tf.types.experimental.TensorLike` is a new `Union` type that can be used as type annotation for variables representing a Tensor or a value that can be converted to Tensor by `tf.convert_to_tensor`. @@ -39,6 +38,11 @@ tf.convert_to_tensor behavior. This avoids operations like tf.reshape truncating inputs such as from int64 to int32. * Added `tf.sparse.map_values` to apply a function to the `.value`s of `SparseTensror` arguments. + * The Python bitwise operators for `Tensor` (`__and__`, `__or__`, `__xor__` + and `__invert__` now support non-`bool` arguments and apply the + corresponding bitwise ops. `bool` arguments continue to be supported and + dispatch to logical ops. This brings them more in line with Python and NumPy + benavior. * `tf.data`: * Added new `tf.data.experimental.service.register_dataset` and `tf.data.experimental.service.from_dataset_id` APIs to enable one process diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 6399f76741f..2c11ecfce7e 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -4119,6 +4119,7 @@ py_library( srcs_version = "PY2AND3", deps = [ ":array_ops", + ":bitwise_ops_gen", ":common_shapes", ":constant_op", ":control_flow_ops_gen", diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 8d6e1aa3e7b..2d47618a62b 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -853,6 +853,7 @@ class Tensor(internal.NativeObject, core_tf_types.Tensor): "Please call `x.shape` rather than `len(x)` for " "shape information.".format(self.name)) + # TODO(mdan): This convoluted machinery is hard to maintain. Clean up. @staticmethod def _override_operator(operator, func): _override_helper(Tensor, operator, func) diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index c5f556dc6ba..4129b55e3fd 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -309,6 +309,155 @@ class TensorAndShapeTest(test_util.TensorFlowTestCase): del x self.assertIsNotNone(x_ref.deref()) + @test_util.run_in_graph_and_eager_modes + def testBitwiseAndNumeric(self): + x = constant_op.constant([0, 1, 3]) + y = constant_op.constant([1, 1, 1]) + + z = x & y + + self.assertAllEqual(z, [0, 1, 1]) + + @test_util.run_in_graph_and_eager_modes + def testBitwiseAndBool(self): + x = constant_op.constant([False, False, True, True]) + y = constant_op.constant([False, True, False, True]) + + z = x & y + + self.assertAllEqual(z, [False, False, False, True]) + + @test_util.run_in_graph_and_eager_modes + def testBitwiseAndErrors(self): + x_int = constant_op.constant(0) + x_bool = constant_op.constant(True) + + if context.executing_eagerly(): # :( + expected_errtype = errors.InvalidArgumentError + else: + expected_errtype = TypeError + + with self.assertRaises(expected_errtype): + _ = x_int & x_bool + with self.assertRaises(expected_errtype): + _ = x_int & constant_op.constant("a") + + with self.assertRaises(expected_errtype): + _ = x_bool & x_int + with self.assertRaises(expected_errtype): + _ = x_bool & constant_op.constant("a") + + with self.assertRaises(expected_errtype): + _ = constant_op.constant("a") & constant_op.constant("b") + + @test_util.run_in_graph_and_eager_modes + def testBitwiseOrNumeric(self): + x = constant_op.constant([0, 1, 2]) + y = constant_op.constant([1, 1, 1]) + + z = x | y + + self.assertAllEqual(z, [1, 1, 3]) + + @test_util.run_in_graph_and_eager_modes + def testBitwiseOrBool(self): + x = constant_op.constant([False, False, True, True]) + y = constant_op.constant([False, True, False, True]) + + z = x | y + + self.assertAllEqual(z, [False, True, True, True]) + + @test_util.run_in_graph_and_eager_modes + def testBitwiseOrErrors(self): + x_int = constant_op.constant(0) + x_bool = constant_op.constant(True) + + if context.executing_eagerly(): # :( + expected_errtype = errors.InvalidArgumentError + else: + expected_errtype = TypeError + + with self.assertRaises(expected_errtype): + _ = x_int | x_bool + with self.assertRaises(expected_errtype): + _ = x_int | constant_op.constant("a") + + with self.assertRaises(expected_errtype): + _ = x_bool | x_int + with self.assertRaises(expected_errtype): + _ = x_bool | constant_op.constant("a") + + with self.assertRaises(expected_errtype): + _ = constant_op.constant("a") | constant_op.constant("b") + + @test_util.run_in_graph_and_eager_modes + def testBitwiseXorNumeric(self): + x = constant_op.constant([0, 1, 3]) + y = constant_op.constant([1, 1, 1]) + + z = x ^ y + + self.assertAllEqual(z, [1, 0, 2]) + + @test_util.run_in_graph_and_eager_modes + def testBitwiseXorBool(self): + x = constant_op.constant([False, False, True, True]) + y = constant_op.constant([False, True, False, True]) + + z = x ^ y + + self.assertAllEqual(z, [False, True, True, False]) + + @test_util.run_in_graph_and_eager_modes + def testBitwiseXorErrors(self): + x_int = constant_op.constant(0) + x_bool = constant_op.constant(True) + + if context.executing_eagerly(): # :( + expected_errtype = errors.InvalidArgumentError + else: + expected_errtype = TypeError + + with self.assertRaises(expected_errtype): + _ = x_int ^ x_bool + with self.assertRaises(expected_errtype): + _ = x_int ^ constant_op.constant("a") + + with self.assertRaises(expected_errtype): + _ = x_bool ^ x_int + with self.assertRaises(expected_errtype): + _ = x_bool ^ constant_op.constant("a") + + with self.assertRaises(expected_errtype): + _ = constant_op.constant("a") ^ constant_op.constant("b") + + @test_util.run_in_graph_and_eager_modes + def testBitwiseNotNumeric(self): + x = constant_op.constant([0, dtypes.int32.min, 1]) + + y = ~x + + self.assertAllEqual(y, [-1, dtypes.int32.max, -2]) + + @test_util.run_in_graph_and_eager_modes + def testBitwiseNotBool(self): + x = constant_op.constant([False, True]) + + y = ~x + + self.assertAllEqual(y, [True, False]) + + @test_util.run_in_graph_and_eager_modes + def testBitwiseNotErrors(self): + if context.executing_eagerly(): # :( + expected_errtype = errors.InvalidArgumentError + else: + expected_errtype = TypeError + + with self.assertRaises(expected_errtype): + _ = ~constant_op.constant("a") + @test_util.disable_tfrt("Graph is not supported yet. b/156187905") @test_util.run_all_in_graph_and_eager_modes diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 6a3440cdae7..0a16c18f7b2 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -85,6 +85,7 @@ from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_array_ops +from tensorflow.python.ops import gen_bitwise_ops from tensorflow.python.ops import gen_data_flow_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import gen_nn_ops @@ -1102,10 +1103,6 @@ def to_complex128(x, name="ToComplex128"): ops.Tensor._override_operator("__neg__", gen_math_ops.neg) ops.Tensor._override_operator("__abs__", abs) -# __invert__ corresponds to the ~ operator. Here we follow the numpy convention -# ~ marks an elementwise bit-wise inverse. This is only implemented for boolean -# tensors and will throw a TypeError if used on nonboolean arrays -ops.Tensor._override_operator("__invert__", gen_math_ops.logical_not) def _OverrideBinaryOperatorHelper(func, op_name, clazz_object=ops.Tensor): @@ -1569,9 +1566,35 @@ def logical_and(x, y, name=None): return gen_math_ops.logical_and(x, y, name) -_OverrideBinaryOperatorHelper(logical_and, "and") -_OverrideBinaryOperatorHelper(gen_math_ops.logical_or, "or") -_OverrideBinaryOperatorHelper(logical_xor, "xor") +def and_(x, y, name=None): + if x.dtype == dtypes.bool: + return gen_math_ops.logical_and(x, y, name) + return gen_bitwise_ops.bitwise_and(x, y) + + +def or_(x, y, name=None): + if x.dtype == dtypes.bool: + return gen_math_ops.logical_or(x, y, name) + return gen_bitwise_ops.bitwise_or(x, y) + + +def xor_(x, y, name=None): + if x.dtype == dtypes.bool: + return logical_xor(x, y, name) + return gen_bitwise_ops.bitwise_xor(x, y) + + +def invert_(x, name=None): + if x.dtype == dtypes.bool: + return gen_math_ops.logical_not(x, name=name) + return gen_bitwise_ops.invert(x, name=name) + + +_OverrideBinaryOperatorHelper(and_, "and") +_OverrideBinaryOperatorHelper(or_, "or") +_OverrideBinaryOperatorHelper(xor_, "xor") +ops.Tensor._override_operator("__invert__", invert_) + ops.Tensor._override_operator("__lt__", gen_math_ops.less) ops.Tensor._override_operator("__le__", gen_math_ops.less_equal) From 01b6bb69f93976a70a5f41ebe6bca91f0c585d42 Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Thu, 23 Jul 2020 11:13:55 -0700 Subject: [PATCH 1171/2522] Add tf.TPUOrdinalSelector to TensorFlow MLIR ODS. This op is auto-generated from the TensorFlow op registry. PiperOrigin-RevId: 322825012 Change-Id: I304cc05df0885e8bf03a9b7d6d0e4f8c4a740ec2 --- .../mlir/tensorflow/ir/tf_generated_ops.td | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index 836950526c2..35a6b0e2343 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -9734,6 +9734,22 @@ For internal use only. ); } +def TF_TPUOrdinalSelectorOp : TF_Op<"TPUOrdinalSelector", []> { + let summary = "A TPU core selector Op."; + + let description = [{ +This Op produces a set of TPU cores (for warm-up) or a single TPU core +(for regular inference) to execute the TPU program on. The output is +consumed by TPUPartitionedCall. + }]; + + let arguments = (ins); + + let results = (outs + I32Tensor:$device_ordinals + ); +} + def TF_TPUReplicatedInputOp : TF_Op<"TPUReplicatedInput", [NoSideEffect]> { let summary = "Connects N inputs to an N-way replicated TPU computation."; From 884bc1c3e8ea143dfe5fd4a5d618f8d39c05ad8a Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Thu, 23 Jul 2020 11:14:11 -0700 Subject: [PATCH 1172/2522] Refactor graph pruning pass in preparation for allowing certain ops to be preserved even when they are not reachable from a fetch. (NFC) - Replaced walker with mlir::visitUsedValuesDefinedAbove - Moved visiting of ops and operands into separate functions - Treat graph fetch (terminator) as any op that should be preserved PiperOrigin-RevId: 322825076 Change-Id: Ibe473c44fe900fc19d60dfe8abc02cd8e0a7a4fc --- .../tensorflow/transforms/graph_pruning.cc | 97 +++++++++++-------- 1 file changed, 59 insertions(+), 38 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/graph_pruning.cc b/tensorflow/compiler/mlir/tensorflow/transforms/graph_pruning.cc index f4d3eda3e7e..859d3ffb23c 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/graph_pruning.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/graph_pruning.cc @@ -19,15 +19,62 @@ limitations under the License. #include "llvm/ADT/iterator_range.h" #include "llvm/Support/Casting.h" #include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/UseDefLists.h" // from @llvm-project #include "mlir/IR/Value.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Pass/PassRegistry.h" // from @llvm-project +#include "mlir/Transforms/RegionUtils.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" namespace mlir { namespace tf_executor { +// Visits an op's operand if it is an output of an Operation in the same +// tf_executor.graph. +void VisitOpOperand(GraphOp graph, Value operand, + llvm::SmallPtrSetImpl* reachable_ops, + llvm::SmallVectorImpl* ops_to_visit) { + Operation* def = operand.getDefiningOp(); + if (def && def->getParentOp() == graph && reachable_ops->insert(def).second) { + // Op has not been visited, add to queue to visit later. + ops_to_visit->push_back(def); + } +} + +// Visits all operands of an op where each operand is an output of an Operation +// in the same tf_executor.graph. +void VisitOpOperands(GraphOp graph, Operation* op, + llvm::SmallPtrSetImpl* reachable_ops, + llvm::SmallVectorImpl* ops_to_visit) { + for (Value operand : op->getOperands()) + VisitOpOperand(graph, operand, reachable_ops, ops_to_visit); +} + +// Visits an op and it's associated operands. IslandOps are handled differently +// where it's regions op operands are also visited as values may be implicitly +// captured within. NextIterationSourceOp will also visit it's associated +// NextIterationSinkOp. +void VisitOp(GraphOp graph, Operation* op, + llvm::SmallPtrSetImpl* reachable_ops, + llvm::SmallVectorImpl* ops_to_visit) { + if (auto island = llvm::dyn_cast(op)) { + mlir::visitUsedValuesDefinedAbove( + island.body(), island.body(), [&](OpOperand* operand) { + VisitOpOperand(graph, operand->get(), reachable_ops, ops_to_visit); + }); + } + + VisitOpOperands(graph, op, reachable_ops, ops_to_visit); + + // If op is a `tf_executor.NextIteration.Source`, visit its associated + // `tf_executor.NextIteration.Sink` op. + if (auto source_op = llvm::dyn_cast(op)) { + Operation* sink_op = source_op.GetSink().getOperation(); + if (reachable_ops->insert(sink_op).second) ops_to_visit->push_back(sink_op); + } +} + // Prunes unreachable operations of a tf_executor.graph operation. void PruneGraph(GraphOp graph) { // A graph has a single block which forms a DAG: operations that aren't @@ -36,49 +83,23 @@ void PruneGraph(GraphOp graph) { llvm::SmallPtrSet reachable_ops; llvm::SmallVector ops_to_visit; - // Visit an op's operands if it is output of an Operation in same graph. - auto visit_op = [&](Operation* op) { - for (Value operand : op->getOperands()) { - Operation* def = operand.getDefiningOp(); - if (def && def->getParentOp() == graph && - reachable_ops.insert(def).second) { - // Op has not been visited, add to queue to visit later. - ops_to_visit.push_back(def); - } - } - }; - - // Visit `fetch` operands. - visit_op(graph.GetFetch()); + // Visit fetches first to create a starting point for ops that are reachable. + reachable_ops.insert(graph.GetFetch()); + VisitOpOperands(graph, graph.GetFetch(), &reachable_ops, &ops_to_visit); + // Visit transitive ops until no there are no reachable ops left that have not + // been visited. while (!ops_to_visit.empty()) { Operation* op = ops_to_visit.pop_back_val(); - if (llvm::isa(op)) { - // Visit island and island inner ops operands. - op->walk([&](Operation* inner_op) { visit_op(inner_op); }); - continue; - } else { - // Op is not an island, only visit its operands. - visit_op(op); - } - - // If op is a `tf_executor.NextIteration.Source`, visit its associated - // `tf_executor.NextIteration.Sink` op. - if (auto source_op = llvm::dyn_cast(op)) { - Operation* sink_op = source_op.GetSink().getOperation(); - if (reachable_ops.insert(sink_op).second) { - ops_to_visit.push_back(sink_op); - } - } + VisitOp(graph, op, &reachable_ops, &ops_to_visit); } - // Erase unreachable ops in reverse order. - for (Operation& op : llvm::make_early_inc_range( - llvm::drop_begin(llvm::reverse(graph.GetBody()), 1))) { - if (reachable_ops.find(&op) == reachable_ops.end()) { - op.erase(); - } - } + // Erase unreachable ops in reverse order so references don't need to be + // dropped before removing an op. Going in reverse order will guarantee that + // when an op to be erased is reached, there are no users left. + for (Operation& op : + llvm::make_early_inc_range(llvm::reverse(graph.GetBody()))) + if (!reachable_ops.contains(&op)) op.erase(); } namespace { From b1c32123aafd991d1c812ca87db8f78712901dfd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 23 Jul 2020 11:17:14 -0700 Subject: [PATCH 1173/2522] Fix bug for node with attr output_shape empty. PiperOrigin-RevId: 322825760 Change-Id: I280fab9cc2670895df4325404734afc9b3f7dafb --- .../python/framework/convert_to_constants.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/framework/convert_to_constants.py b/tensorflow/python/framework/convert_to_constants.py index ea11b1c41dc..10541ed8e34 100644 --- a/tensorflow/python/framework/convert_to_constants.py +++ b/tensorflow/python/framework/convert_to_constants.py @@ -612,12 +612,14 @@ class _While(_FunctionCaller): def convert_variable_to_constant(self, incoming_edge, tensor_data): super(_While, self).convert_variable_to_constant(incoming_edge, tensor_data) node = self.converted_self() - node.node.attr["output_shapes"].list.shape[ - incoming_edge.destination.index].CopyFrom( - tensor_shape_pb2.TensorShapeProto(dim=[ - tensor_shape_pb2.TensorShapeProto.Dim(size=dim) - for dim in tensor_data.numpy.shape - ])) + if node.node.attr["output_shapes"].list.shape: + node.node.attr["output_shapes"].list.shape[ + incoming_edge.destination.index].CopyFrom( + tensor_shape_pb2.TensorShapeProto(dim=[ + tensor_shape_pb2.TensorShapeProto.Dim(size=dim) + for dim in tensor_data.numpy.shape + ])) + # The while's body inputs and outputs have the same type, so here we can go # ahead and change that function's output type. body_name = self._node.attr["body"].func.name From 52de0efaa3fece56e7ad0b97dfb5dd4b296c855c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 23 Jul 2020 11:19:50 -0700 Subject: [PATCH 1174/2522] Add a copy operation type to the GPU commands PiperOrigin-RevId: 322826307 Change-Id: If068eac6823d4165be24db8f9a2e7e7f36040399 --- .../delegates/gpu/cl/kernels/elementwise.cc | 4 ++++ .../gpu/cl/kernels/elementwise_test.cc | 23 +++++++++++++++++++ .../gpu/cl/selectors/operation_selector.cc | 1 + .../delegates/gpu/common/model_builder.cc | 1 + .../lite/delegates/gpu/common/operations.cc | 3 +++ .../lite/delegates/gpu/common/operations.h | 1 + .../delegates/gpu/gl/kernels/elementwise.cc | 4 ++++ .../gpu/gl/kernels/elementwise_test.cc | 12 ++++++++++ tensorflow/lite/delegates/gpu/metal/api.cc | 1 + .../gpu/metal/kernels/elementwise.cc | 1 + .../gpu/metal/kernels/elementwise_test.mm | 13 +++++++++++ 11 files changed, 64 insertions(+) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc index babf564039b..4d2afc5bcd7 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc @@ -37,6 +37,10 @@ std::string GetOneInputCode(const OperationType& op_type, case OperationType::COS: result = "$0 = cos($0);\n"; break; + case OperationType::COPY: + // No op as inout_value will be copied to dest automatically. + result = "\n"; + break; case OperationType::ELU: result = "$0.x = $0.x < (FLT)(0.0f) ? exp($0.x) - (FLT)(1.0f) : $0.x;\n"; result += "$0.y = $0.y < (FLT)(0.0f) ? exp($0.y) - (FLT)(1.0f) : $0.y;\n"; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc index ea7e1fa0541..ac825c0cdfc 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/operations.h" #include "tensorflow/lite/delegates/gpu/common/status.h" +using ::testing::FloatEq; using ::testing::FloatNear; using ::testing::Pointwise; @@ -81,6 +82,28 @@ TEST_F(OpenCLOperationTest, Cos) { } } +TEST_F(OpenCLOperationTest, Copy) { + TensorFloat32 src_tensor; + src_tensor.shape = BHWC(1, 2, 1, 2); + src_tensor.data = {half(0.0f), half(-1.0f), half(-0.05f), half(0.045f)}; + + for (auto storage : env_.GetSupportedStorages()) { + for (auto precision : env_.GetSupportedPrecisions()) { + OperationDef op_def; + op_def.precision = precision; + auto data_type = DeduceDataTypeFromPrecision(precision); + op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); + op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); + TensorFloat32 dst_tensor; + ElementwiseOneInput operation = + CreateElementwiseOneInput(op_def, OperationType::COPY); + ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, + BHWC(1, 2, 1, 2), &dst_tensor)); + EXPECT_THAT(dst_tensor.data, Pointwise(FloatEq(), src_tensor.data)); + } + } +} + TEST_F(OpenCLOperationTest, Elu) { TensorFloat32 src_tensor; src_tensor.shape = BHWC(1, 1, 1, 7); diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc index d16eb982b35..e3b8d5b0eec 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc @@ -382,6 +382,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, return absl::OkStatus(); } case OperationType::ABS: + case OperationType::COPY: case OperationType::COS: case OperationType::ELU: case OperationType::EXP: diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.cc b/tensorflow/lite/delegates/gpu/common/model_builder.cc index e789f1ec980..386743d8a3c 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc @@ -880,6 +880,7 @@ class ElementwiseOperationParser : public TFLiteOperationParser { bool IsOneArgumentOperation() const { switch (operation_type_) { case OperationType::ABS: + case OperationType::COPY: case OperationType::COS: case OperationType::EXP: case OperationType::LOG: diff --git a/tensorflow/lite/delegates/gpu/common/operations.cc b/tensorflow/lite/delegates/gpu/common/operations.cc index 33e7844bbe9..dd0a91b2705 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.cc +++ b/tensorflow/lite/delegates/gpu/common/operations.cc @@ -84,6 +84,8 @@ std::string ToString(enum OperationType op) { return "convolution_2d"; case OperationType::CONVOLUTION_TRANSPOSED: return "convolution_transposed"; + case OperationType::COPY: + return "copy"; case OperationType::COS: return "cos"; case OperationType::DEPTHWISE_CONVOLUTION: @@ -170,6 +172,7 @@ OperationType OperationTypeFromString(const std::string& name) { {"const", OperationType::CONST}, {"convolution_2d", OperationType::CONVOLUTION_2D}, {"convolution_transposed", OperationType::CONVOLUTION_TRANSPOSED}, + {"copy", OperationType::COPY}, {"cos", OperationType::COS}, {"depthwise_convolution", OperationType::DEPTHWISE_CONVOLUTION}, {"div", OperationType::DIV}, diff --git a/tensorflow/lite/delegates/gpu/common/operations.h b/tensorflow/lite/delegates/gpu/common/operations.h index ec9a78ae747..6fdb53a3248 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.h +++ b/tensorflow/lite/delegates/gpu/common/operations.h @@ -40,6 +40,7 @@ enum class OperationType { CONST, CONVOLUTION_2D, CONVOLUTION_TRANSPOSED, + COPY, COS, DEPTHWISE_CONVOLUTION, DIV, diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc index 876d67db351..5d50fcc0118 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc @@ -41,6 +41,9 @@ class ElementwiseOneArgument : public NodeShader { case OperationType::COS: source = "value_0 = cos(value_0);"; break; + case OperationType::COPY: + source = "value_0 = value_0;"; + break; case OperationType::ELU: source = R"( value_0.x = value_0.x < 0.0 ? exp(value_0.x) - 1.0 : value_0.x; @@ -219,6 +222,7 @@ std::unique_ptr NewElementwiseNodeShader( OperationType operation_type) { switch (operation_type) { case OperationType::ABS: + case OperationType::COPY: case OperationType::COS: case OperationType::ELU: case OperationType::EXP: diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise_test.cc b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise_test.cc index 60fd9174f90..a32a4ea9f76 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise_test.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/operations.h" #include "tensorflow/lite/delegates/gpu/gl/kernels/test_util.h" +using ::testing::FloatEq; using ::testing::FloatNear; using ::testing::Pointwise; @@ -60,6 +61,17 @@ TEST(ElementwiseOneArgumentTest, Cos) { Pointwise(FloatNear(1e-6), {1.0, -1.0, -1.0, 0.540302})); } +TEST(ElementwiseOneArgumentTest, Copy) { + OperationType op_type = OperationType::COPY; + const BHWC shape(1, 2, 2, 1); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); + ASSERT_TRUE(model.PopulateTensor(0, {0.0, -6.2, 2.0, 4.0})); + ASSERT_OK(model.Invoke(*NewElementwiseNodeShader(op_type))); + EXPECT_THAT(model.GetOutput(0), Pointwise(FloatEq(), {0.0, -6.2, 2.0, 4.0})); +} + TEST(ElementwiseOneArgumentTest, Elu) { OperationType op_type = OperationType::ELU; const BHWC shape(1, 1, 1, 7); diff --git a/tensorflow/lite/delegates/gpu/metal/api.cc b/tensorflow/lite/delegates/gpu/metal/api.cc index cbaa00246af..648fa166bc0 100644 --- a/tensorflow/lite/delegates/gpu/metal/api.cc +++ b/tensorflow/lite/delegates/gpu/metal/api.cc @@ -364,6 +364,7 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node, absl::any_cast(node->operation.attributes)); break; case OperationType::ABS: + case OperationType::COPY: case OperationType::COS: case OperationType::ELU: case OperationType::EXP: diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/elementwise.cc b/tensorflow/lite/delegates/gpu/metal/kernels/elementwise.cc index 3b07b42afb4..7bac1402fd2 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/elementwise.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/elementwise.cc @@ -50,6 +50,7 @@ std::string OneInputFunctor(OperationType op_type, const std::string& value) { {OperationType::SQUARE, "$0 * $0"}, {OperationType::SIGMOID, "1.0 / (1.0 + exp(-1.0 * $0))"}, {OperationType::TANH, "tanh($0)"}, + {OperationType::COPY, "$0"}, }; if (functors.find(op_type) == functors.end()) { diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/elementwise_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/elementwise_test.mm index a95cbf38f02..4972fdeb1a9 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/elementwise_test.mm +++ b/tensorflow/lite/delegates/gpu/metal/kernels/elementwise_test.mm @@ -79,6 +79,19 @@ TensorRef GetTensorRef(int ref, const BHWC& shape) { XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str()); } +- (void)testCopy { + OperationType op_type = OperationType::COPY; + const BHWC shape(1, 2, 2, 1); + SingleOpModel model({/*type=*/ToString(op_type), /*attributes=*/{}}, + /*inputs=*/{GetTensorRef(0, shape)}, + /*outputs=*/{GetTensorRef(1, shape)}); + XCTAssertTrue(model.PopulateTensor(0, {0.0, -6.2, 2.0, 4.0})); + auto status = model.Invoke(); + XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str()); + status = CompareVectors({0.0, -6.2, 2.0, 4.0}, model.GetOutput(0), 1e-6f); + XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str()); +} + - (void)testDiv { OperationType op_type = OperationType::DIV; const BHWC shape(1, 2, 2, 1); From 0ff8f604fed9822ce90c1108b6f73f9a1b383379 Mon Sep 17 00:00:00 2001 From: Anna R Date: Thu, 23 Jul 2020 11:21:29 -0700 Subject: [PATCH 1175/2522] Disable //tensorflow/lite/testing:selective_build_test on mac. PiperOrigin-RevId: 322826690 Change-Id: Ie0512ca2125d18530353879843681ac3862c5568 --- tensorflow/lite/testing/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/lite/testing/BUILD b/tensorflow/lite/testing/BUILD index b2055a9904d..7df61ca5f35 100644 --- a/tensorflow/lite/testing/BUILD +++ b/tensorflow/lite/testing/BUILD @@ -533,6 +533,7 @@ cc_test( "//tensorflow/lite:testdata/add.bin", "//tensorflow/lite:testdata/lstm.bin", ], + tags = ["no_mac"], # b/161990368 deps = [ ":test_tflite_lib", "//tensorflow/core:tflite_portable_logging", From 6bfea7624ac127687164678300398fd134c46440 Mon Sep 17 00:00:00 2001 From: Robert David Date: Thu, 23 Jul 2020 11:33:13 -0700 Subject: [PATCH 1176/2522] Cleanup LSTM tests: Set the weights and biases, and apply the delegate only once. The previous comment was not correct. PiperOrigin-RevId: 322829351 Change-Id: I7f1683a3e01ffa812e9f9dbcec82d843ede3135b --- tensorflow/lite/kernels/lstm_test.cc | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tensorflow/lite/kernels/lstm_test.cc b/tensorflow/lite/kernels/lstm_test.cc index d81bd53a575..fef01462c67 100644 --- a/tensorflow/lite/kernels/lstm_test.cc +++ b/tensorflow/lite/kernels/lstm_test.cc @@ -134,7 +134,10 @@ class LSTMOpModel : public SingleOpModel { LSTMKernelType_FULL, asymmetric_quantize_inputs) .Union()); - BuildInterpreter({}); // Input sizes are already set up. + // Input shapes are already set up, no need to pass them again. + BuildInterpreter(/*input_shapes=*/{}, /*num_threads=*/-1, + /*allow_fp32_relax_to_fp16=*/false, + /*apply_delegate=*/false); } void SetInputToInputWeights(const std::vector& f) { @@ -329,15 +332,11 @@ class BaseLstmOpTest // Compares output up to tolerance to the result of the lstm given the input. void VerifyGoldens(LSTMOpModel* lstm, float tolerance) { - // Weights are set twice: - // - The delegate, if used, needs to know the scales and zero-points of - // quantized tensors, which are computed dynamically when weights are set, - // so weights have to be set before applying the delegate. - // - Applying a delegate will invalidate the tensor data so weights have to - // be set a second time. + // The delegate, if used, needs to know the scales and zero-points of + // quantized tensors, which are computed dynamically when weights are set, + // so weights have to be set before applying the delegate. SetAllWeightsAndBiases(lstm); lstm->ApplyDelegate(); - SetAllWeightsAndBiases(lstm); const int num_batches = lstm_input_.size(); EXPECT_GT(num_batches, 0); From 78688104bc118097a7968c864197a3c328f1c00b Mon Sep 17 00:00:00 2001 From: Nat Jeffries Date: Thu, 23 Jul 2020 11:37:40 -0700 Subject: [PATCH 1177/2522] Fix allocator build errors in xtensa softmax, conv + depthwise conv kernels. PiperOrigin-RevId: 322830325 Change-Id: I22eb3d1259db1390e6ad2c3caa588279b50fd674 --- tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc | 4 ++-- .../lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc | 4 ++-- tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc | 7 +++---- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc index dc39cc44e61..0e71bfbcb26 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc @@ -329,10 +329,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const int num_channels = filter->dims->data[kConvQuantizedDimension]; // Dynimically allocate per-channel quantization parameters. op_data->per_channel_output_multiplier = - reinterpret_cast(context->AllocatePersistentBuffer( + reinterpret_cast(context->AllocatePersistentBuffer( context, num_channels * sizeof(int32_t))); op_data->per_channel_output_shift = - reinterpret_cast(context->AllocatePersistentBuffer( + reinterpret_cast(context->AllocatePersistentBuffer( context, num_channels * sizeof(int32_t))); // All per-channel quantized tensors need valid zero point and scale arrays. diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc index e7a37b6901d..656fb1b04cb 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc @@ -377,10 +377,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; // Dynimically allocate per-channel quantization parameters. op_data->per_channel_output_multiplier = - reinterpret_cast(context->AllocatePersistentBuffer( + reinterpret_cast(context->AllocatePersistentBuffer( context, num_channels * sizeof(int32_t))); op_data->per_channel_output_shift = - reinterpret_cast(context->AllocatePersistentBuffer( + reinterpret_cast(context->AllocatePersistentBuffer( context, num_channels * sizeof(int32_t))); // All per-channel quantized tensors need valid zero point and scale arrays. diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc index 0fb3646e3e8..f222387c831 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc @@ -167,10 +167,9 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) { // the scale and beta before calculating exp. It is mandatory to apply beta // and scale here, since each softmax op may have different beta and scale // values. Beta and scale will remain constant for a given softmax op. - void* allocated_ptr; - TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( - context, kInt8Range * sizeof(int16_t), &allocated_ptr)); - op_data->exp_lut = static_cast(allocated_ptr); + op_data->exp_lut = static_cast(context->AllocatePersistentBuffer( + context, kInt8Range * sizeof(uint16_t))); + TF_LITE_ENSURE(context, op_data->exp_lut != nullptr); TF_LITE_ENSURE_STATUS( CalculateSoftmaxOpData(context, input, output, params, op_data)); From dc9685322deda182a8ef5c585fe65befbfb079aa Mon Sep 17 00:00:00 2001 From: Gaurav Jain Date: Thu, 23 Jul 2020 11:37:54 -0700 Subject: [PATCH 1178/2522] Handle int64 axis in ReduceTransposer PiperOrigin-RevId: 322830387 Change-Id: I4c5c7a536926fd032d5efc08cddf67e3844bca38 --- .../generic_layout_optimizer_transposer.cc | 7 +- ...eneric_layout_optimizer_transposer_test.cc | 256 +++++++++--------- 2 files changed, 140 insertions(+), 123 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc index a3449621405..0d836fda265 100644 --- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc +++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc @@ -1236,7 +1236,12 @@ bool ReduceTransposer::IsAlongAxis(const Tensor& tensor, return false; } for (int i = 0; i < axis_size; ++i) { - int local_axis = tensor.flat()(i); + int local_axis = 0; + if (tensor.dtype() == DT_INT32) { + local_axis = tensor.flat()(i); + } else { + local_axis = tensor.flat()(i); + } if (local_axis < 0) { local_axis += rank; } diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer_test.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer_test.cc index bf938b650bf..ab0ccf57a4b 100644 --- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer_test.cc +++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer_test.cc @@ -370,6 +370,136 @@ class TransposerTest : public ::testing::Test { void TearDown() override { TF_ASSERT_OK(virtual_cluster_->Shutdown()); } + template + void ReduceTransposerKeepDims() { +#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM) + GTEST_SKIP() << "Neither CUDA nor ROCm is enabled"; +#endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM) + GrapplerItem item; + Scope scope = Scope::NewRootScope(); + + auto input = + ops::RandomUniform(scope.WithOpName("input"), + {kBatchSize, kHeight, kWidth, kDepthIn}, DT_FLOAT); + auto filter = + ops::RandomUniform(scope.WithOpName("filter"), + {kHeight, kWidth, kDepthIn, kDepthOut}, DT_FLOAT); + Output conv2d = ops::Conv2D( + scope.WithOpName("conv2d").WithDevice("/device:GPU:0"), input, filter, + {1, 2, 4, 1}, "SAME", ops::Conv2D::DataFormat(kSrcFormat)); + + auto axis = ops::Const(scope.WithOpName("axis"), {0, 1, 2}, {3}); + auto attrs = ops::Sum::Attrs().KeepDims(true); + auto sum_op = ops::Sum(scope.WithOpName("sum").WithDevice("/device:GPU:0"), + conv2d, axis, attrs); + + auto z = ops::Identity(scope.WithOpName("z"), sum_op); + TF_ASSERT_OK(scope.ToGraphDef(&item.graph)); + + TransposeContext context; + TF_ASSERT_OK(TransposeContext::InitializeTransposeContext( + item, virtual_cluster_.get(), &context)); + context.AssignDeviceAndDataFormats(kGPU, kSrcFormat, kDstFormat); + + DefaultLayoutSensitiveOpTransposer conv2d_transposer; + auto* c2d = context.graph_view->GetNode("conv2d"); + ASSERT_NE(c2d, nullptr); + TF_ASSERT_OK(conv2d_transposer.TransposeNode(&context, c2d)); + + ReduceTransposer reducer_transposer; + auto* sum = context.graph_view->GetNode("sum"); + ASSERT_NE(sum, nullptr); + TF_ASSERT_OK(reducer_transposer.TransposeNode(&context, sum)); + + auto* input_transpose_node = context.graph_view->GetNode( + "sum-0-TransposeNHWCToNCHW-LayoutOptimizer"); + ASSERT_NE(input_transpose_node, nullptr); + + auto* updated_sum_node = context.graph_view->GetNode("sum"); + ASSERT_NE(updated_sum_node, nullptr); + ASSERT_EQ(updated_sum_node->NumRegularFanins(), 2); + VerifyRegularFaninMatch(updated_sum_node, 0, + input_transpose_node->GetName(), 0); + + auto* axis_node = context.graph_view->GetNode( + "sum-1-DataFormatDimMapNHWCToNCHW-LayoutOptimizer"); + ASSERT_NE(axis_node, nullptr); + ASSERT_EQ(axis_node->NumRegularFanins(), 1); + VerifyRegularFaninMatch(axis_node, 0, "axis", 0); + + auto* output_transpose_node = context.graph_view->GetNode( + "sum-0-0-TransposeNCHWToNHWC-LayoutOptimizer"); + ASSERT_NE(output_transpose_node, nullptr); + + auto* z_output_node = context.graph_view->GetNode("z"); + ASSERT_NE(z_output_node, nullptr); + ASSERT_EQ(z_output_node->NumRegularFanins(), 1); + VerifyRegularFaninMatch(z_output_node, 0, output_transpose_node->GetName(), + 0); + } + + template + void ReduceTransposerValidAxisNode() { +#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM) + GTEST_SKIP() << "Neither CUDA nor ROCm is enabled"; +#endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM) + GrapplerItem item; + Scope scope = Scope::NewRootScope(); + + auto input = + ops::RandomUniform(scope.WithOpName("input"), + {kBatchSize, kHeight, kWidth, kDepthIn}, DT_FLOAT); + auto filter = + ops::RandomUniform(scope.WithOpName("filter"), + {kHeight, kWidth, kDepthIn, kDepthOut}, DT_FLOAT); + Output conv2d = ops::Conv2D( + scope.WithOpName("conv2d").WithDevice("/device:GPU:0"), input, filter, + {1, 2, 4, 1}, "SAME", ops::Conv2D::DataFormat(kSrcFormat)); + + auto axis = ops::Const(scope.WithOpName("axis"), {0, 1, 2}, {3}); + auto sum_op = ops::Max(scope.WithOpName("max").WithDevice("/device:GPU:0"), + conv2d, axis); + + auto z = ops::Identity(scope.WithOpName("z"), sum_op); + TF_ASSERT_OK(scope.ToGraphDef(&item.graph)); + + TransposeContext context; + TF_ASSERT_OK(TransposeContext::InitializeTransposeContext( + item, virtual_cluster_.get(), &context)); + context.AssignDeviceAndDataFormats(kGPU, kSrcFormat, kDstFormat); + + DefaultLayoutSensitiveOpTransposer conv2d_transposer; + auto* c2d = context.graph_view->GetNode("conv2d"); + ASSERT_NE(c2d, nullptr); + TF_ASSERT_OK(conv2d_transposer.TransposeNode(&context, c2d)); + + ReduceTransposer reducer_transposer; + auto* max = context.graph_view->GetNode("max"); + ASSERT_NE(max, nullptr); + TF_ASSERT_OK(reducer_transposer.TransposeNode(&context, max)); + + auto* input_transpose_node = context.graph_view->GetNode( + "max-0-TransposeNHWCToNCHW-LayoutOptimizer"); + ASSERT_NE(input_transpose_node, nullptr); + + auto* updated_max_node = context.graph_view->GetNode("max"); + ASSERT_NE(updated_max_node, nullptr); + ASSERT_EQ(updated_max_node->NumRegularFanins(), 2); + VerifyRegularFaninMatch(updated_max_node, 0, + input_transpose_node->GetName(), 0); + + auto* axis_node = context.graph_view->GetNode( + "max-1-DataFormatDimMapNHWCToNCHW-LayoutOptimizer"); + ASSERT_NE(axis_node, nullptr); + ASSERT_EQ(axis_node->NumRegularFanins(), 1); + VerifyRegularFaninMatch(axis_node, 0, "axis", 0); + + auto* z_output_node = context.graph_view->GetNode("z"); + ASSERT_NE(z_output_node, nullptr); + ASSERT_EQ(z_output_node->NumRegularFanins(), 1); + VerifyRegularFaninMatch(z_output_node, 0, updated_max_node->GetName(), 0); + } + std::unique_ptr virtual_cluster_; }; @@ -3637,131 +3767,13 @@ TEST_F(TransposerTest, StridedSliceTransposerConstFaninBadRank) { } TEST_F(TransposerTest, ReduceTransposerKeepDims) { -#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM) - GTEST_SKIP() << "Neither CUDA nor ROCm is enabled"; -#endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM) - GrapplerItem item; - Scope scope = Scope::NewRootScope(); - - auto input = - ops::RandomUniform(scope.WithOpName("input"), - {kBatchSize, kHeight, kWidth, kDepthIn}, DT_FLOAT); - auto filter = - ops::RandomUniform(scope.WithOpName("filter"), - {kHeight, kWidth, kDepthIn, kDepthOut}, DT_FLOAT); - Output conv2d = ops::Conv2D( - scope.WithOpName("conv2d").WithDevice("/device:GPU:0"), input, filter, - {1, 2, 4, 1}, "SAME", ops::Conv2D::DataFormat(kSrcFormat)); - - auto axis = ops::Const(scope.WithOpName("axis"), {0, 1, 2}, {3}); - auto attrs = ops::Sum::Attrs().KeepDims(true); - auto sum_op = ops::Sum(scope.WithOpName("sum").WithDevice("/device:GPU:0"), - conv2d, axis, attrs); - - auto z = ops::Identity(scope.WithOpName("z"), sum_op); - TF_ASSERT_OK(scope.ToGraphDef(&item.graph)); - - TransposeContext context; - TF_ASSERT_OK(TransposeContext::InitializeTransposeContext( - item, virtual_cluster_.get(), &context)); - context.AssignDeviceAndDataFormats(kGPU, kSrcFormat, kDstFormat); - - DefaultLayoutSensitiveOpTransposer conv2d_transposer; - auto* c2d = context.graph_view->GetNode("conv2d"); - ASSERT_NE(c2d, nullptr); - TF_ASSERT_OK(conv2d_transposer.TransposeNode(&context, c2d)); - - ReduceTransposer reducer_transposer; - auto* sum = context.graph_view->GetNode("sum"); - ASSERT_NE(sum, nullptr); - TF_ASSERT_OK(reducer_transposer.TransposeNode(&context, sum)); - - auto* input_transpose_node = - context.graph_view->GetNode("sum-0-TransposeNHWCToNCHW-LayoutOptimizer"); - ASSERT_NE(input_transpose_node, nullptr); - - auto* updated_sum_node = context.graph_view->GetNode("sum"); - ASSERT_NE(updated_sum_node, nullptr); - ASSERT_EQ(updated_sum_node->NumRegularFanins(), 2); - VerifyRegularFaninMatch(updated_sum_node, 0, input_transpose_node->GetName(), - 0); - - auto* axis_node = context.graph_view->GetNode( - "sum-1-DataFormatDimMapNHWCToNCHW-LayoutOptimizer"); - ASSERT_NE(axis_node, nullptr); - ASSERT_EQ(axis_node->NumRegularFanins(), 1); - VerifyRegularFaninMatch(axis_node, 0, "axis", 0); - - auto* output_transpose_node = context.graph_view->GetNode( - "sum-0-0-TransposeNCHWToNHWC-LayoutOptimizer"); - ASSERT_NE(output_transpose_node, nullptr); - - auto* z_output_node = context.graph_view->GetNode("z"); - ASSERT_NE(z_output_node, nullptr); - ASSERT_EQ(z_output_node->NumRegularFanins(), 1); - VerifyRegularFaninMatch(z_output_node, 0, output_transpose_node->GetName(), - 0); + ReduceTransposerKeepDims(); + ReduceTransposerKeepDims(); } TEST_F(TransposerTest, ReduceTransposerValidAxisNode) { -#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM) - GTEST_SKIP() << "Neither CUDA nor ROCm is enabled"; -#endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM) - GrapplerItem item; - Scope scope = Scope::NewRootScope(); - - auto input = - ops::RandomUniform(scope.WithOpName("input"), - {kBatchSize, kHeight, kWidth, kDepthIn}, DT_FLOAT); - auto filter = - ops::RandomUniform(scope.WithOpName("filter"), - {kHeight, kWidth, kDepthIn, kDepthOut}, DT_FLOAT); - Output conv2d = ops::Conv2D( - scope.WithOpName("conv2d").WithDevice("/device:GPU:0"), input, filter, - {1, 2, 4, 1}, "SAME", ops::Conv2D::DataFormat(kSrcFormat)); - - auto axis = ops::Const(scope.WithOpName("axis"), {0, 1, 2}, {3}); - auto sum_op = ops::Max(scope.WithOpName("max").WithDevice("/device:GPU:0"), - conv2d, axis); - - auto z = ops::Identity(scope.WithOpName("z"), sum_op); - TF_ASSERT_OK(scope.ToGraphDef(&item.graph)); - - TransposeContext context; - TF_ASSERT_OK(TransposeContext::InitializeTransposeContext( - item, virtual_cluster_.get(), &context)); - context.AssignDeviceAndDataFormats(kGPU, kSrcFormat, kDstFormat); - - DefaultLayoutSensitiveOpTransposer conv2d_transposer; - auto* c2d = context.graph_view->GetNode("conv2d"); - ASSERT_NE(c2d, nullptr); - TF_ASSERT_OK(conv2d_transposer.TransposeNode(&context, c2d)); - - ReduceTransposer reducer_transposer; - auto* max = context.graph_view->GetNode("max"); - ASSERT_NE(max, nullptr); - TF_ASSERT_OK(reducer_transposer.TransposeNode(&context, max)); - - auto* input_transpose_node = - context.graph_view->GetNode("max-0-TransposeNHWCToNCHW-LayoutOptimizer"); - ASSERT_NE(input_transpose_node, nullptr); - - auto* updated_max_node = context.graph_view->GetNode("max"); - ASSERT_NE(updated_max_node, nullptr); - ASSERT_EQ(updated_max_node->NumRegularFanins(), 2); - VerifyRegularFaninMatch(updated_max_node, 0, input_transpose_node->GetName(), - 0); - - auto* axis_node = context.graph_view->GetNode( - "max-1-DataFormatDimMapNHWCToNCHW-LayoutOptimizer"); - ASSERT_NE(axis_node, nullptr); - ASSERT_EQ(axis_node->NumRegularFanins(), 1); - VerifyRegularFaninMatch(axis_node, 0, "axis", 0); - - auto* z_output_node = context.graph_view->GetNode("z"); - ASSERT_NE(z_output_node, nullptr); - ASSERT_EQ(z_output_node->NumRegularFanins(), 1); - VerifyRegularFaninMatch(z_output_node, 0, updated_max_node->GetName(), 0); + ReduceTransposerValidAxisNode(); + ReduceTransposerValidAxisNode(); } TEST(PermutationTest, PermutesVector) { From 40dc05fb5a5cf8a6db623975f9d2e5f160dad5e9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 23 Jul 2020 11:42:55 -0700 Subject: [PATCH 1179/2522] Quick fix in TFLite writer lib related to optional tensors. PiperOrigin-RevId: 322831460 Change-Id: I6ff70b7b65cabd759888b013a76e21bd51da7ab0 --- tensorflow/lite/experimental/writer/writer_lib.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/lite/experimental/writer/writer_lib.cc b/tensorflow/lite/experimental/writer/writer_lib.cc index 85f57527c31..ed26c7f9038 100644 --- a/tensorflow/lite/experimental/writer/writer_lib.cc +++ b/tensorflow/lite/experimental/writer/writer_lib.cc @@ -320,6 +320,14 @@ TfLiteStatus SubgraphWriter::CheckInputOutput( subgraph_->node_and_registration(op_index); const TfLiteNode& node = node_and_registration->first; for (int tensor_index : TfLiteIntArrayView(node.inputs)) { + if (tensor_index < 0) { + // Skip if optional input not present. + if (tensor_index == kTfLiteOptionalTensor) { + continue; + } else { + return kTfLiteError; + } + } if (TfLiteTensor* tensor = subgraph_->tensor(tensor_index)) { // Skip constant tensors. if (tensor->allocation_type == kTfLiteMmapRo) { From 33b076a9cd6634115b1c9a49ca673a25bbb72aea Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Thu, 23 Jul 2020 15:25:55 -0400 Subject: [PATCH 1180/2522] Add distribution benchmark usages for anti_, bid_ and cnn_ model --- tensorflow/python/keras/benchmarks/BUILD | 6 +-- .../antirectifier_benchmark_test.py | 46 +++++++++++++++++-- .../bidirectional_lstm_benchmark_test.py | 40 ++++++++++++++++ .../cifar10_cnn_benchmark_test.py | 42 +++++++++++++++++ 4 files changed, 128 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/BUILD b/tensorflow/python/keras/benchmarks/BUILD index d86e9479259..bfb2c2af395 100644 --- a/tensorflow/python/keras/benchmarks/BUILD +++ b/tensorflow/python/keras/benchmarks/BUILD @@ -81,7 +81,7 @@ py_library( ], ) -py_test( +cuda_py_test( name = "bidirectional_lstm_benchmark_test", size = "medium", srcs = ["keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py"], @@ -104,7 +104,7 @@ py_test( ], ) -py_test( +cuda_py_test( name = "antirectifier_benchmark_test", srcs = ["keras_examples_benchmarks/antirectifier_benchmark_test.py"], python_version = "PY3", @@ -161,7 +161,7 @@ py_test( ], ) -py_test( +cuda_py_test( name = "cifar10_cnn_benchmark_test", srcs = ["keras_examples_benchmarks/cifar10_cnn_benchmark_test.py"], python_version = "PY3", diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py index e636ee3476d..b3d24cf7e24 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py @@ -53,7 +53,7 @@ class AntirectifierBenchmark(tf.test.Benchmark): # optimizer: Optimizer for model. # Check more details in `measure_performance()` method of # benchmark_util. - def benchmark_pixel_cnn_bs_128(self): + def benchmark_anti_bs_128(self): """Measure performance with batch_size=128 and run_iters=2.""" batch_size = 128 run_iters = 2 @@ -70,7 +70,7 @@ class AntirectifierBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - def benchmark_pixel_cnn_bs_256(self): + def benchmark_anti_bs_256(self): """Measure performance with batch_size=256 and run_iters=3.""" batch_size = 256 run_iters = 3 @@ -87,7 +87,7 @@ class AntirectifierBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - def benchmark_pixel_cnn_bs_512(self): + def benchmark_anti_bs_512(self): """Measure performance with batch_size=512 and run_iters=4.""" batch_size = 512 run_iters = 4 @@ -104,6 +104,46 @@ class AntirectifierBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) + def benchmark_anti_bs_512_gpu_1(self): + """Measure performance with batch_size=512, run_iters=4, gpu=1 and + distribution_strategy=`mirrored`.""" + batch_size = 512 + run_iters = 4 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + num_gpus=1, + distribution_strategy='mirrored', + optimizer="rmsprop", + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=["sparse_categorical_accuracy"]) + + self.report_benchmark( + iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) + + def benchmark_anti_bs_512_gpu_2(self): + """Measure performance with batch_size=512, run_iters=4, gpu=2 and + distribution_strategy=`mirrored`.""" + batch_size = 512 + run_iters = 4 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + num_gpus=2, + distribution_strategy='mirrored', + optimizer="rmsprop", + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=["sparse_categorical_accuracy"]) + + self.report_benchmark( + iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) + class Antirectifier(tf.keras.layers.Layer): """Build simple custome layer.""" diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py index 63e99e36285..75581ac6c7f 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py @@ -106,6 +106,46 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) + def benchmark_bidirect_lstm_imdb_bs_512_gpu_1(self): + """Measure performance with batch_size=512, run_iters=4, gpu=1 and + distribution_strategy=`mirrored`.""" + batch_size = 512 + run_iters = 4 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.imdb_x, + y=self.imdb_y, + batch_size=batch_size, + run_iters=run_iters, + num_gpus=1, + distribution_strategy="mirrored", + optimizer='adam', + loss='binary_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) + + def benchmark_bidirect_lstm_imdb_bs_512_gpu_2(self): + """Measure performance with batch_size=512, run_iters=4, gpu=2 and + distribution_strategy=`mirrored`.""" + batch_size = 512 + run_iters = 4 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.imdb_x, + y=self.imdb_y, + batch_size=batch_size, + run_iters=run_iters, + num_gpus=2, + distribution_strategy="mirrored", + optimizer='adam', + loss='binary_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) + if __name__ == '__main__': tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py index dcc90b67b56..20f478adba1 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py @@ -123,6 +123,48 @@ class Cifar10CNNBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) + def benchmark_cnn_cifar10_bs_1024_gpu_1(self): + """Measure performance with batch_size=1024, run_iters=2, gpu=1 and + distribution_strategy=`mirrored`.""" + batch_size = 1024 + run_iters = 2 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + num_gpus=1, + distribution_strategy="mirrored", + epochs=self.epochs, + optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0001, decay=1e-6), + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) + + def benchmark_cnn_cifar10_bs_1024_gpu_2(self): + """Measure performance with batch_size=1024, run_iters=2, gpu=2 and + distribution_strategy=`mirrored`.""" + batch_size = 1024 + run_iters = 2 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + num_gpus=2, + distribution_strategy="mirrored", + epochs=self.epochs, + optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0001, decay=1e-6), + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) + if __name__ == '__main__': tf.test.main() From 27bbfce766fbb4f81da9c3df319e1f8c10fac0a6 Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Thu, 23 Jul 2020 11:46:31 -0700 Subject: [PATCH 1181/2522] Add metric instrumentation for KPL. NO_RELEASE_UPDATES=n/a PiperOrigin-RevId: 322832242 Change-Id: I19de4ec919068ff290dfa1c420270899cdba57f2 --- .../keras/engine/base_preprocessing_layer.py | 10 ++++-- .../layers/preprocessing/category_crossing.py | 5 +-- .../layers/preprocessing/category_encoding.py | 1 + .../layers/preprocessing/discretization.py | 5 +-- .../keras/layers/preprocessing/hashing.py | 5 +-- .../preprocessing/image_preprocessing.py | 36 ++++++++++++------- .../layers/preprocessing/integer_lookup.py | 2 ++ .../layers/preprocessing/integer_lookup_v1.py | 15 +++++++- .../layers/preprocessing/normalization.py | 8 ++--- .../layers/preprocessing/normalization_v1.py | 6 +++- .../layers/preprocessing/string_lookup.py | 2 ++ .../layers/preprocessing/string_lookup_v1.py | 22 +++++++++++- .../preprocessing/text_vectorization.py | 5 +-- .../preprocessing/text_vectorization_v1.py | 15 ++++++++ ...tal.preprocessing.-category-crossing.pbtxt | 5 +++ ...ing.-category-encoding.__metaclass__.pbtxt | 14 -------- ...erimental.preprocessing.-center-crop.pbtxt | 5 +++ ...mental.preprocessing.-discretization.pbtxt | 5 +++ ....experimental.preprocessing.-hashing.pbtxt | 5 +++ ...essing.-integer-lookup.__metaclass__.pbtxt | 14 -------- ...cessing.-normalization.__metaclass__.pbtxt | 14 -------- ...g.-preprocessing-layer.__metaclass__.pbtxt | 14 -------- ...ental.preprocessing.-random-contrast.pbtxt | 5 +++ ...erimental.preprocessing.-random-crop.pbtxt | 5 +++ ...erimental.preprocessing.-random-flip.pbtxt | 5 +++ ...imental.preprocessing.-random-height.pbtxt | 5 +++ ...ental.preprocessing.-random-rotation.pbtxt | 5 +++ ...al.preprocessing.-random-translation.pbtxt | 5 +++ ...rimental.preprocessing.-random-width.pbtxt | 5 +++ ...erimental.preprocessing.-random-zoom.pbtxt | 5 +++ ...xperimental.preprocessing.-rescaling.pbtxt | 5 +++ ...experimental.preprocessing.-resizing.pbtxt | 5 +++ ...cessing.-string-lookup.__metaclass__.pbtxt | 14 -------- ...ng.-text-vectorization.__metaclass__.pbtxt | 14 -------- ...tal.preprocessing.-category-crossing.pbtxt | 5 +++ ...ing.-category-encoding.__metaclass__.pbtxt | 14 -------- ...erimental.preprocessing.-center-crop.pbtxt | 5 +++ ...mental.preprocessing.-discretization.pbtxt | 5 +++ ....experimental.preprocessing.-hashing.pbtxt | 5 +++ ...essing.-integer-lookup.__metaclass__.pbtxt | 14 -------- ...cessing.-normalization.__metaclass__.pbtxt | 14 -------- ...g.-preprocessing-layer.__metaclass__.pbtxt | 14 -------- ...ental.preprocessing.-random-contrast.pbtxt | 5 +++ ...erimental.preprocessing.-random-crop.pbtxt | 5 +++ ...erimental.preprocessing.-random-flip.pbtxt | 5 +++ ...imental.preprocessing.-random-height.pbtxt | 5 +++ ...ental.preprocessing.-random-rotation.pbtxt | 5 +++ ...al.preprocessing.-random-translation.pbtxt | 5 +++ ...rimental.preprocessing.-random-width.pbtxt | 5 +++ ...erimental.preprocessing.-random-zoom.pbtxt | 5 +++ ...xperimental.preprocessing.-rescaling.pbtxt | 5 +++ ...experimental.preprocessing.-resizing.pbtxt | 5 +++ ...cessing.-string-lookup.__metaclass__.pbtxt | 14 -------- ...ng.-text-vectorization.__metaclass__.pbtxt | 14 -------- 54 files changed, 248 insertions(+), 197 deletions(-) delete mode 100644 tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.__metaclass__.pbtxt delete mode 100644 tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-integer-lookup.__metaclass__.pbtxt delete mode 100644 tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-normalization.__metaclass__.pbtxt delete mode 100644 tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.__metaclass__.pbtxt delete mode 100644 tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-string-lookup.__metaclass__.pbtxt delete mode 100644 tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.__metaclass__.pbtxt delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.__metaclass__.pbtxt delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-integer-lookup.__metaclass__.pbtxt delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-normalization.__metaclass__.pbtxt delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.__metaclass__.pbtxt delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-string-lookup.__metaclass__.pbtxt delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.__metaclass__.pbtxt diff --git a/tensorflow/python/keras/engine/base_preprocessing_layer.py b/tensorflow/python/keras/engine/base_preprocessing_layer.py index c8ba1229ff5..ac4a0669ed7 100644 --- a/tensorflow/python/keras/engine/base_preprocessing_layer.py +++ b/tensorflow/python/keras/engine/base_preprocessing_layer.py @@ -21,9 +21,11 @@ import abc import collections import numpy as np +import six from tensorflow.python.data.ops import dataset_ops from tensorflow.python.eager import context +from tensorflow.python.eager import monitoring from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops @@ -37,13 +39,17 @@ from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.util.tf_export import keras_export +_kpl_gauge = monitoring.StringGauge( + '/tensorflow/api/keras/layers/preprocessing', + 'keras preprocessing layers usage', 'TFVersion') + + @keras_export('keras.layers.experimental.preprocessing.PreprocessingLayer') +@six.add_metaclass(abc.ABCMeta) class PreprocessingLayer(Layer): """Base class for PreprocessingLayers.""" - __metaclass__ = abc.ABCMeta _must_restore_from_config = True - @abc.abstractmethod def adapt(self, data, reset_state=True): # TODO(momernick): Add examples. """Fits the state of the preprocessing layer to the data being passed. diff --git a/tensorflow/python/keras/layers/preprocessing/category_crossing.py b/tensorflow/python/keras/layers/preprocessing/category_crossing.py index 7c80e0e140e..c147e3ab770 100644 --- a/tensorflow/python/keras/layers/preprocessing/category_crossing.py +++ b/tensorflow/python/keras/layers/preprocessing/category_crossing.py @@ -26,7 +26,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_spec -from tensorflow.python.keras.engine.base_layer import Layer +from tensorflow.python.keras.engine import base_preprocessing_layer from tensorflow.python.ops import array_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops.ragged import ragged_array_ops @@ -35,7 +35,7 @@ from tensorflow.python.util.tf_export import keras_export @keras_export('keras.layers.experimental.preprocessing.CategoryCrossing') -class CategoryCrossing(Layer): +class CategoryCrossing(base_preprocessing_layer.PreprocessingLayer): """Category crossing layer. This layer concatenates multiple categorical inputs into a single categorical @@ -115,6 +115,7 @@ class CategoryCrossing(Layer): def __init__(self, depth=None, name=None, separator=None, **kwargs): super(CategoryCrossing, self).__init__(name=name, **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell('V2').set('CategoryCrossing') self.depth = depth if separator is None: separator = '_X_' diff --git a/tensorflow/python/keras/layers/preprocessing/category_encoding.py b/tensorflow/python/keras/layers/preprocessing/category_encoding.py index 128188b09c2..95540176e04 100644 --- a/tensorflow/python/keras/layers/preprocessing/category_encoding.py +++ b/tensorflow/python/keras/layers/preprocessing/category_encoding.py @@ -131,6 +131,7 @@ class CategoryEncoding(base_preprocessing_layer.CombinerPreprocessingLayer): compute_max_element=max_tokens is None, compute_idf=output_mode == TFIDF) super(CategoryEncoding, self).__init__(combiner=combiner, **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell("V2").set("CategoryEncoding") self._max_tokens = max_tokens self._output_mode = output_mode diff --git a/tensorflow/python/keras/layers/preprocessing/discretization.py b/tensorflow/python/keras/layers/preprocessing/discretization.py index 0b6cf89009b..7544ded5949 100644 --- a/tensorflow/python/keras/layers/preprocessing/discretization.py +++ b/tensorflow/python/keras/layers/preprocessing/discretization.py @@ -20,7 +20,7 @@ from __future__ import print_function from tensorflow.python.framework import dtypes from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_spec -from tensorflow.python.keras.engine.base_layer import Layer +from tensorflow.python.keras.engine import base_preprocessing_layer from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops.ragged import ragged_functional_ops @@ -29,7 +29,7 @@ from tensorflow.python.util.tf_export import keras_export @keras_export("keras.layers.experimental.preprocessing.Discretization") -class Discretization(Layer): +class Discretization(base_preprocessing_layer.PreprocessingLayer): """Buckets data into discrete ranges. This layer will place each element of its input data into one of several @@ -61,6 +61,7 @@ class Discretization(Layer): def __init__(self, bins, **kwargs): super(Discretization, self).__init__(**kwargs) + base_preprocessing_layer._kpl_gauge.get_cell("V2").set("Discretization") self.bins = bins def get_config(self): diff --git a/tensorflow/python/keras/layers/preprocessing/hashing.py b/tensorflow/python/keras/layers/preprocessing/hashing.py index 861301637fc..6e4d388d202 100644 --- a/tensorflow/python/keras/layers/preprocessing/hashing.py +++ b/tensorflow/python/keras/layers/preprocessing/hashing.py @@ -27,7 +27,7 @@ from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_spec from tensorflow.python.framework import tensor_util -from tensorflow.python.keras.engine.base_layer import Layer +from tensorflow.python.keras.engine import base_preprocessing_layer from tensorflow.python.ops import gen_sparse_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import string_ops @@ -40,7 +40,7 @@ _DEFAULT_SALT_KEY = [0xDECAFCAFFE, 0xDECAFCAFFE] @keras_export('keras.layers.experimental.preprocessing.Hashing') -class Hashing(Layer): +class Hashing(base_preprocessing_layer.PreprocessingLayer): """Implements categorical feature hashing, also known as "hashing trick". This layer transforms single or multiple categorical inputs to hashed output. @@ -137,6 +137,7 @@ class Hashing(Layer): if num_bins is None or num_bins <= 0: raise ValueError('`num_bins` cannot be `None` or non-positive values.') super(Hashing, self).__init__(name=name, **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell('V2').set('Hashing') self.num_bins = num_bins self.strong_hash = True if salt is not None else False if salt is not None: diff --git a/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py index d36e192d873..e9ec157de59 100644 --- a/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py +++ b/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py @@ -26,7 +26,8 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.keras import backend as K -from tensorflow.python.keras.engine.base_layer import Layer +from tensorflow.python.keras.engine import base_preprocessing_layer +from tensorflow.python.keras.engine.base_preprocessing_layer import PreprocessingLayer from tensorflow.python.keras.engine.input_spec import InputSpec from tensorflow.python.keras.utils import tf_utils from tensorflow.python.ops import array_ops @@ -67,7 +68,7 @@ def check_fill_mode_and_interpolation(fill_mode, interpolation): @keras_export('keras.layers.experimental.preprocessing.Resizing') -class Resizing(Layer): +class Resizing(PreprocessingLayer): """Image resizing layer. Resize the batched image input to target height and width. The input should @@ -94,6 +95,7 @@ class Resizing(Layer): self._interpolation_method = get_interpolation(interpolation) self.input_spec = InputSpec(ndim=4) super(Resizing, self).__init__(name=name, **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell('V2').set('Resizing') def call(self, inputs): outputs = image_ops.resize_images_v2( @@ -118,7 +120,7 @@ class Resizing(Layer): @keras_export('keras.layers.experimental.preprocessing.CenterCrop') -class CenterCrop(Layer): +class CenterCrop(PreprocessingLayer): """Crop the central portion of the images to target height and width. Input shape: @@ -143,6 +145,7 @@ class CenterCrop(Layer): self.target_width = width self.input_spec = InputSpec(ndim=4) super(CenterCrop, self).__init__(name=name, **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell('V2').set('CenterCrop') def call(self, inputs): inputs_shape = array_ops.shape(inputs) @@ -185,7 +188,7 @@ class CenterCrop(Layer): @keras_export('keras.layers.experimental.preprocessing.RandomCrop') -class RandomCrop(Layer): +class RandomCrop(PreprocessingLayer): """Randomly crop the images to target height and width. This layer will crop all the images in the same batch to the same cropping @@ -217,6 +220,7 @@ class RandomCrop(Layer): self._rng = make_generator(self.seed) self.input_spec = InputSpec(ndim=4) super(RandomCrop, self).__init__(name=name, **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell('V2').set('RandomCrop') def call(self, inputs, training=True): if training is None: @@ -292,7 +296,7 @@ class RandomCrop(Layer): @keras_export('keras.layers.experimental.preprocessing.Rescaling') -class Rescaling(Layer): +class Rescaling(PreprocessingLayer): """Multiply inputs by `scale` and adds `offset`. For instance: @@ -321,6 +325,7 @@ class Rescaling(Layer): self.scale = scale self.offset = offset super(Rescaling, self).__init__(name=name, **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell('V2').set('Rescaling') def call(self, inputs): dtype = self._compute_dtype @@ -346,7 +351,7 @@ HORIZONTAL_AND_VERTICAL = 'horizontal_and_vertical' @keras_export('keras.layers.experimental.preprocessing.RandomFlip') -class RandomFlip(Layer): +class RandomFlip(PreprocessingLayer): """Randomly flip each image horizontally and vertically. This layer will flip the images based on the `mode` attribute. @@ -376,6 +381,7 @@ class RandomFlip(Layer): name=None, **kwargs): super(RandomFlip, self).__init__(name=name, **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell('V2').set('RandomFlip') self.mode = mode if mode == HORIZONTAL: self.horizontal = True @@ -426,7 +432,7 @@ class RandomFlip(Layer): # TODO(tanzheny): Add examples, here and everywhere. @keras_export('keras.layers.experimental.preprocessing.RandomTranslation') -class RandomTranslation(Layer): +class RandomTranslation(PreprocessingLayer): """Randomly translate each image during training. Arguments: @@ -520,6 +526,7 @@ class RandomTranslation(Layer): self._rng = make_generator(self.seed) self.input_spec = InputSpec(ndim=4) super(RandomTranslation, self).__init__(name=name, **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell('V2').set('RandomTranslation') def call(self, inputs, training=True): if training is None: @@ -730,7 +737,7 @@ def get_rotation_matrix(angles, image_height, image_width, name=None): @keras_export('keras.layers.experimental.preprocessing.RandomRotation') -class RandomRotation(Layer): +class RandomRotation(PreprocessingLayer): """Randomly rotate each image. By default, random rotations are only applied during training. @@ -806,6 +813,7 @@ class RandomRotation(Layer): self._rng = make_generator(self.seed) self.input_spec = InputSpec(ndim=4) super(RandomRotation, self).__init__(name=name, **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell('V2').set('RandomRotation') def call(self, inputs, training=True): if training is None: @@ -847,7 +855,7 @@ class RandomRotation(Layer): @keras_export('keras.layers.experimental.preprocessing.RandomZoom') -class RandomZoom(Layer): +class RandomZoom(PreprocessingLayer): """Randomly zoom each image during training. Arguments: @@ -947,6 +955,7 @@ class RandomZoom(Layer): self._rng = make_generator(self.seed) self.input_spec = InputSpec(ndim=4) super(RandomZoom, self).__init__(name=name, **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell('V2').set('RandomZoom') def call(self, inputs, training=True): if training is None: @@ -1039,7 +1048,7 @@ def get_zoom_matrix(zooms, image_height, image_width, name=None): @keras_export('keras.layers.experimental.preprocessing.RandomContrast') -class RandomContrast(Layer): +class RandomContrast(PreprocessingLayer): """Adjust the contrast of an image or images by a random factor. Contrast is adjusted independently for each channel of each image during @@ -1083,6 +1092,7 @@ class RandomContrast(Layer): self.seed = seed self.input_spec = InputSpec(ndim=4) super(RandomContrast, self).__init__(name=name, **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell('V2').set('RandomContrast') def call(self, inputs, training=True): if training is None: @@ -1110,7 +1120,7 @@ class RandomContrast(Layer): @keras_export('keras.layers.experimental.preprocessing.RandomHeight') -class RandomHeight(Layer): +class RandomHeight(PreprocessingLayer): """Randomly vary the height of a batch of images during training. Adjusts the height of a batch of images by a random factor. The input @@ -1166,6 +1176,7 @@ class RandomHeight(Layer): self.seed = seed self._rng = make_generator(self.seed) super(RandomHeight, self).__init__(name=name, **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell('V2').set('RandomHeight') def call(self, inputs, training=True): if training is None: @@ -1207,7 +1218,7 @@ class RandomHeight(Layer): @keras_export('keras.layers.experimental.preprocessing.RandomWidth') -class RandomWidth(Layer): +class RandomWidth(PreprocessingLayer): """Randomly vary the width of a batch of images during training. Adjusts the width of a batch of images by a random factor. The input @@ -1264,6 +1275,7 @@ class RandomWidth(Layer): self.seed = seed self._rng = make_generator(self.seed) super(RandomWidth, self).__init__(name=name, **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell('V2').set('RandomWidth') def call(self, inputs, training=True): if training is None: diff --git a/tensorflow/python/keras/layers/preprocessing/integer_lookup.py b/tensorflow/python/keras/layers/preprocessing/integer_lookup.py index 3512b9988c1..d0ffc987e01 100644 --- a/tensorflow/python/keras/layers/preprocessing/integer_lookup.py +++ b/tensorflow/python/keras/layers/preprocessing/integer_lookup.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function from tensorflow.python.framework import dtypes +from tensorflow.python.keras.engine import base_preprocessing_layer from tensorflow.python.keras.layers.preprocessing import index_lookup from tensorflow.python.keras.layers.preprocessing import table_utils from tensorflow.python.util.tf_export import keras_export @@ -201,6 +202,7 @@ class IntegerLookup(index_lookup.IndexLookup): vocabulary=vocabulary, invert=invert, **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell("V2").set("IntegerLookup") def get_config(self): base_config = super(IntegerLookup, self).get_config() diff --git a/tensorflow/python/keras/layers/preprocessing/integer_lookup_v1.py b/tensorflow/python/keras/layers/preprocessing/integer_lookup_v1.py index 2a86e9d56b0..da37b15abd2 100644 --- a/tensorflow/python/keras/layers/preprocessing/integer_lookup_v1.py +++ b/tensorflow/python/keras/layers/preprocessing/integer_lookup_v1.py @@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.keras.engine import base_preprocessing_layer from tensorflow.python.keras.layers.preprocessing import index_lookup_v1 from tensorflow.python.keras.layers.preprocessing import integer_lookup from tensorflow.python.util.tf_export import keras_export @@ -24,4 +25,16 @@ from tensorflow.python.util.tf_export import keras_export @keras_export(v1=["keras.layers.experimental.preprocessing.IntegerLookup"]) class IntegerLookup(integer_lookup.IntegerLookup, index_lookup_v1.IndexLookup): - pass + """Maps integers from a vocabulary to integer indices.""" + + def __init__(self, + max_values=None, + num_oov_indices=1, + mask_value=0, + oov_value=-1, + vocabulary=None, + invert=False, + **kwargs): + super(IntegerLookup, self).__init__(max_values, num_oov_indices, mask_value, + oov_value, vocabulary, invert, **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell("V1").set("IntegerLookup") diff --git a/tensorflow/python/keras/layers/preprocessing/normalization.py b/tensorflow/python/keras/layers/preprocessing/normalization.py index ba2f7eaae89..07f8d40ec24 100644 --- a/tensorflow/python/keras/layers/preprocessing/normalization.py +++ b/tensorflow/python/keras/layers/preprocessing/normalization.py @@ -25,8 +25,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.keras import backend as K -from tensorflow.python.keras.engine.base_preprocessing_layer import Combiner -from tensorflow.python.keras.engine.base_preprocessing_layer import CombinerPreprocessingLayer +from tensorflow.python.keras.engine import base_preprocessing_layer from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops @@ -40,7 +39,7 @@ _VARIANCE_NAME = 'variance' # TODO(momernick): Find a good example of normalization? @keras_export('keras.layers.experimental.preprocessing.Normalization', v1=[]) -class Normalization(CombinerPreprocessingLayer): +class Normalization(base_preprocessing_layer.CombinerPreprocessingLayer): """Feature-wise normalization of the data. This layer will coerce its inputs into a distribution centered around @@ -91,6 +90,7 @@ class Normalization(CombinerPreprocessingLayer): super(Normalization, self).__init__( combiner=_NormalizingCombiner(axis), dtype=dtype, **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell('V2').set('Normalization') if 0 in axis: raise ValueError('The argument \'axis\' may not be 0.') @@ -176,7 +176,7 @@ class Normalization(CombinerPreprocessingLayer): super(Normalization, self).set_weights(weights) -class _NormalizingCombiner(Combiner): +class _NormalizingCombiner(base_preprocessing_layer.Combiner): """Combiner for the Normalization preprocessing layer. This class encapsulates the computations for finding the mean and variance diff --git a/tensorflow/python/keras/layers/preprocessing/normalization_v1.py b/tensorflow/python/keras/layers/preprocessing/normalization_v1.py index 2cb4413cf7f..12b29e36f4a 100644 --- a/tensorflow/python/keras/layers/preprocessing/normalization_v1.py +++ b/tensorflow/python/keras/layers/preprocessing/normalization_v1.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.keras.engine import base_preprocessing_layer from tensorflow.python.keras.engine.base_preprocessing_layer_v1 import CombinerPreprocessingLayer from tensorflow.python.keras.layers.preprocessing import normalization from tensorflow.python.util.tf_export import keras_export @@ -25,4 +26,7 @@ from tensorflow.python.util.tf_export import keras_export @keras_export(v1=['keras.layers.experimental.preprocessing.Normalization']) class Normalization(normalization.Normalization, CombinerPreprocessingLayer): - pass + + def __init__(self, axis=-1, dtype=None, **kwargs): + super(Normalization, self).__init__(axis, dtype, **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell('V1').set('Normalization') diff --git a/tensorflow/python/keras/layers/preprocessing/string_lookup.py b/tensorflow/python/keras/layers/preprocessing/string_lookup.py index d772f57aa4d..c70ac50dd07 100644 --- a/tensorflow/python/keras/layers/preprocessing/string_lookup.py +++ b/tensorflow/python/keras/layers/preprocessing/string_lookup.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function from tensorflow.python.framework import dtypes +from tensorflow.python.keras.engine import base_preprocessing_layer from tensorflow.python.keras.layers.preprocessing import index_lookup from tensorflow.python.keras.layers.preprocessing import table_utils from tensorflow.python.util.tf_export import keras_export @@ -196,6 +197,7 @@ class StringLookup(index_lookup.IndexLookup): vocabulary=vocabulary, invert=invert, **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell("V2").set("StringLookup") def get_config(self): config = {"encoding": self.encoding} diff --git a/tensorflow/python/keras/layers/preprocessing/string_lookup_v1.py b/tensorflow/python/keras/layers/preprocessing/string_lookup_v1.py index 3b5d0679372..59649be720b 100644 --- a/tensorflow/python/keras/layers/preprocessing/string_lookup_v1.py +++ b/tensorflow/python/keras/layers/preprocessing/string_lookup_v1.py @@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.keras.engine import base_preprocessing_layer from tensorflow.python.keras.layers.preprocessing import index_lookup_v1 from tensorflow.python.keras.layers.preprocessing import string_lookup from tensorflow.python.util.tf_export import keras_export @@ -24,4 +25,23 @@ from tensorflow.python.util.tf_export import keras_export @keras_export(v1=["keras.layers.experimental.preprocessing.StringLookup"]) class StringLookup(string_lookup.StringLookup, index_lookup_v1.IndexLookup): - pass + """Maps strings from a vocabulary to integer indices.""" + + def __init__(self, + max_tokens=None, + num_oov_indices=1, + mask_token="", + oov_token="[UNK]", + vocabulary=None, + encoding=None, + invert=False, + **kwargs): + super(StringLookup, self).__init__( + max_tokens=max_tokens, + num_oov_indices=num_oov_indices, + mask_token=mask_token, + oov_token=oov_token, + vocabulary=vocabulary, + invert=invert, + **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell("V1").set("StringLookup") diff --git a/tensorflow/python/keras/layers/preprocessing/text_vectorization.py b/tensorflow/python/keras/layers/preprocessing/text_vectorization.py index 97e3ac4a63c..400a27fb82d 100644 --- a/tensorflow/python/keras/layers/preprocessing/text_vectorization.py +++ b/tensorflow/python/keras/layers/preprocessing/text_vectorization.py @@ -25,7 +25,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_spec from tensorflow.python.keras import backend as K -from tensorflow.python.keras.engine.base_preprocessing_layer import CombinerPreprocessingLayer +from tensorflow.python.keras.engine import base_preprocessing_layer from tensorflow.python.keras.layers.preprocessing import category_encoding from tensorflow.python.keras.layers.preprocessing import string_lookup from tensorflow.python.keras.utils import layer_utils @@ -71,7 +71,7 @@ _ACCUMULATOR_NUM_DOCUMENTS = "num_documents" @keras_export( "keras.layers.experimental.preprocessing.TextVectorization", v1=[]) -class TextVectorization(CombinerPreprocessingLayer): +class TextVectorization(base_preprocessing_layer.CombinerPreprocessingLayer): """Text vectorization layer. This layer has basic options for managing text in a Keras model. It @@ -291,6 +291,7 @@ class TextVectorization(CombinerPreprocessingLayer): super(TextVectorization, self).__init__( combiner=None, **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell("V2").set("TextVectorization") mask_token = "" if output_mode in [None, INT] else None self._index_lookup_layer = self._get_index_lookup_class()( diff --git a/tensorflow/python/keras/layers/preprocessing/text_vectorization_v1.py b/tensorflow/python/keras/layers/preprocessing/text_vectorization_v1.py index 505cdc39547..ecb49d1fbdd 100644 --- a/tensorflow/python/keras/layers/preprocessing/text_vectorization_v1.py +++ b/tensorflow/python/keras/layers/preprocessing/text_vectorization_v1.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.keras.engine import base_preprocessing_layer from tensorflow.python.keras.engine import base_preprocessing_layer_v1 from tensorflow.python.keras.layers.preprocessing import category_encoding_v1 from tensorflow.python.keras.layers.preprocessing import string_lookup_v1 @@ -76,6 +77,20 @@ class TextVectorization(text_vectorization.TextVectorization, vocabulary is less than max_tokens. """ + def __init__(self, + max_tokens=None, + standardize=text_vectorization.LOWER_AND_STRIP_PUNCTUATION, + split=text_vectorization.SPLIT_ON_WHITESPACE, + ngrams=None, + output_mode=text_vectorization.INT, + output_sequence_length=None, + pad_to_max_tokens=True, + **kwargs): + super(TextVectorization, + self).__init__(max_tokens, standardize, split, ngrams, output_mode, + output_sequence_length, pad_to_max_tokens, **kwargs) + base_preprocessing_layer._kpl_gauge.get_cell("V1").set("TextVectorization") + def _get_vectorization_class(self): return category_encoding_v1.CategoryEncoding diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt index d48eb4ecc4a..ceb38316d11 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.CategoryCrossing" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'depth\', \'name\', \'separator\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.__metaclass__.pbtxt deleted file mode 100644 index e907d9a293b..00000000000 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.__metaclass__.pbtxt +++ /dev/null @@ -1,14 +0,0 @@ -path: "tensorflow.keras.layers.experimental.preprocessing.CategoryEncoding.__metaclass__" -tf_class { - is_instance: "" - member_method { - name: "__init__" - } - member_method { - name: "mro" - } - member_method { - name: "register" - argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" - } -} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt index 819c6a693c9..14d43cb08e8 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.CenterCrop" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'height\', \'width\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt index 5f78e5637ea..cb7a793f94d 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.Discretization" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'bins\'], varargs=None, keywords=kwargs, defaults=None" } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt index 51f113d4e4a..75a1efc2f15 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.Hashing" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'num_bins\', \'salt\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-integer-lookup.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-integer-lookup.__metaclass__.pbtxt deleted file mode 100644 index 409509cd4d2..00000000000 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-integer-lookup.__metaclass__.pbtxt +++ /dev/null @@ -1,14 +0,0 @@ -path: "tensorflow.keras.layers.experimental.preprocessing.IntegerLookup.__metaclass__" -tf_class { - is_instance: "" - member_method { - name: "__init__" - } - member_method { - name: "mro" - } - member_method { - name: "register" - argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" - } -} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-normalization.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-normalization.__metaclass__.pbtxt deleted file mode 100644 index 20bb9904d18..00000000000 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-normalization.__metaclass__.pbtxt +++ /dev/null @@ -1,14 +0,0 @@ -path: "tensorflow.keras.layers.experimental.preprocessing.Normalization.__metaclass__" -tf_class { - is_instance: "" - member_method { - name: "__init__" - } - member_method { - name: "mro" - } - member_method { - name: "register" - argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" - } -} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.__metaclass__.pbtxt deleted file mode 100644 index ceebb69d16a..00000000000 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.__metaclass__.pbtxt +++ /dev/null @@ -1,14 +0,0 @@ -path: "tensorflow.keras.layers.experimental.preprocessing.PreprocessingLayer.__metaclass__" -tf_class { - is_instance: "" - member_method { - name: "__init__" - } - member_method { - name: "mro" - } - member_method { - name: "register" - argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" - } -} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-contrast.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-contrast.pbtxt index 0f575454a80..8eca3903616 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-contrast.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-contrast.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomContrast" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'factor\', \'seed\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-crop.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-crop.pbtxt index 6425f6c6ed6..ad813468f53 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-crop.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-crop.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomCrop" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'height\', \'width\', \'seed\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-flip.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-flip.pbtxt index 245723f1f56..15406e778f8 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-flip.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-flip.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomFlip" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'mode\', \'seed\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'horizontal_and_vertical\', \'None\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-height.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-height.pbtxt index e56ec9e2dad..8119cb9687f 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-height.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-height.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomHeight" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'factor\', \'interpolation\', \'seed\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'bilinear\', \'None\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-rotation.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-rotation.pbtxt index 167fa775273..ee9a0254382 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-rotation.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-rotation.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomRotation" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'factor\', \'fill_mode\', \'interpolation\', \'seed\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'reflect\', \'bilinear\', \'None\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-translation.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-translation.pbtxt index 224417edfdd..7e1095e7503 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-translation.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-translation.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomTranslation" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'height_factor\', \'width_factor\', \'fill_mode\', \'interpolation\', \'seed\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'reflect\', \'bilinear\', \'None\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-width.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-width.pbtxt index 48bcfbb94b9..0d113434d80 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-width.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-width.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomWidth" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'factor\', \'interpolation\', \'seed\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'bilinear\', \'None\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt index 570f3a3bc03..fd59a92a4af 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomZoom" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'height_factor\', \'width_factor\', \'fill_mode\', \'interpolation\', \'seed\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'reflect\', \'bilinear\', \'None\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt index fc759ce7ea6..c8fcedd3221 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.Rescaling" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'scale\', \'offset\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.0\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt index abcb2d4876a..7efb8d72dcb 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.Resizing" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'height\', \'width\', \'interpolation\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'bilinear\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-string-lookup.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-string-lookup.__metaclass__.pbtxt deleted file mode 100644 index 4cb57350380..00000000000 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-string-lookup.__metaclass__.pbtxt +++ /dev/null @@ -1,14 +0,0 @@ -path: "tensorflow.keras.layers.experimental.preprocessing.StringLookup.__metaclass__" -tf_class { - is_instance: "" - member_method { - name: "__init__" - } - member_method { - name: "mro" - } - member_method { - name: "register" - argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" - } -} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.__metaclass__.pbtxt deleted file mode 100644 index fe45a5da03b..00000000000 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.__metaclass__.pbtxt +++ /dev/null @@ -1,14 +0,0 @@ -path: "tensorflow.keras.layers.experimental.preprocessing.TextVectorization.__metaclass__" -tf_class { - is_instance: "" - member_method { - name: "__init__" - } - member_method { - name: "mro" - } - member_method { - name: "register" - argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" - } -} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt index d48eb4ecc4a..ceb38316d11 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-crossing.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.CategoryCrossing" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'depth\', \'name\', \'separator\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.__metaclass__.pbtxt deleted file mode 100644 index e907d9a293b..00000000000 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-category-encoding.__metaclass__.pbtxt +++ /dev/null @@ -1,14 +0,0 @@ -path: "tensorflow.keras.layers.experimental.preprocessing.CategoryEncoding.__metaclass__" -tf_class { - is_instance: "" - member_method { - name: "__init__" - } - member_method { - name: "mro" - } - member_method { - name: "register" - argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" - } -} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt index 819c6a693c9..14d43cb08e8 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-center-crop.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.CenterCrop" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'height\', \'width\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt index 5f78e5637ea..cb7a793f94d 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-discretization.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.Discretization" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'bins\'], varargs=None, keywords=kwargs, defaults=None" } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt index 51f113d4e4a..75a1efc2f15 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-hashing.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.Hashing" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'num_bins\', \'salt\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-integer-lookup.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-integer-lookup.__metaclass__.pbtxt deleted file mode 100644 index 409509cd4d2..00000000000 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-integer-lookup.__metaclass__.pbtxt +++ /dev/null @@ -1,14 +0,0 @@ -path: "tensorflow.keras.layers.experimental.preprocessing.IntegerLookup.__metaclass__" -tf_class { - is_instance: "" - member_method { - name: "__init__" - } - member_method { - name: "mro" - } - member_method { - name: "register" - argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" - } -} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-normalization.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-normalization.__metaclass__.pbtxt deleted file mode 100644 index 20bb9904d18..00000000000 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-normalization.__metaclass__.pbtxt +++ /dev/null @@ -1,14 +0,0 @@ -path: "tensorflow.keras.layers.experimental.preprocessing.Normalization.__metaclass__" -tf_class { - is_instance: "" - member_method { - name: "__init__" - } - member_method { - name: "mro" - } - member_method { - name: "register" - argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" - } -} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.__metaclass__.pbtxt deleted file mode 100644 index ceebb69d16a..00000000000 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-preprocessing-layer.__metaclass__.pbtxt +++ /dev/null @@ -1,14 +0,0 @@ -path: "tensorflow.keras.layers.experimental.preprocessing.PreprocessingLayer.__metaclass__" -tf_class { - is_instance: "" - member_method { - name: "__init__" - } - member_method { - name: "mro" - } - member_method { - name: "register" - argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" - } -} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-contrast.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-contrast.pbtxt index 0f575454a80..8eca3903616 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-contrast.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-contrast.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomContrast" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'factor\', \'seed\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-crop.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-crop.pbtxt index 6425f6c6ed6..ad813468f53 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-crop.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-crop.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomCrop" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'height\', \'width\', \'seed\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-flip.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-flip.pbtxt index 245723f1f56..15406e778f8 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-flip.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-flip.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomFlip" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'mode\', \'seed\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'horizontal_and_vertical\', \'None\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-height.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-height.pbtxt index e56ec9e2dad..8119cb9687f 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-height.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-height.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomHeight" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'factor\', \'interpolation\', \'seed\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'bilinear\', \'None\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-rotation.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-rotation.pbtxt index 167fa775273..ee9a0254382 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-rotation.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-rotation.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomRotation" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'factor\', \'fill_mode\', \'interpolation\', \'seed\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'reflect\', \'bilinear\', \'None\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-translation.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-translation.pbtxt index 224417edfdd..7e1095e7503 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-translation.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-translation.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomTranslation" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'height_factor\', \'width_factor\', \'fill_mode\', \'interpolation\', \'seed\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'reflect\', \'bilinear\', \'None\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-width.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-width.pbtxt index 48bcfbb94b9..0d113434d80 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-width.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-width.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomWidth" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'factor\', \'interpolation\', \'seed\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'bilinear\', \'None\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt index 570f3a3bc03..fd59a92a4af 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-random-zoom.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.RandomZoom" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'height_factor\', \'width_factor\', \'fill_mode\', \'interpolation\', \'seed\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'reflect\', \'bilinear\', \'None\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt index fc759ce7ea6..c8fcedd3221 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-rescaling.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.Rescaling" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'scale\', \'offset\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.0\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt index abcb2d4876a..7efb8d72dcb 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-resizing.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.keras.layers.experimental.preprocessing.Resizing" tf_class { is_instance: "" + is_instance: "" is_instance: "" is_instance: "" is_instance: "" @@ -119,6 +120,10 @@ tf_class { name: "__init__" argspec: "args=[\'self\', \'height\', \'width\', \'interpolation\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'bilinear\', \'None\'], " } + member_method { + name: "adapt" + argspec: "args=[\'self\', \'data\', \'reset_state\'], varargs=None, keywords=None, defaults=[\'True\'], " + } member_method { name: "add_loss" argspec: "args=[\'self\', \'losses\'], varargs=None, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-string-lookup.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-string-lookup.__metaclass__.pbtxt deleted file mode 100644 index 4cb57350380..00000000000 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-string-lookup.__metaclass__.pbtxt +++ /dev/null @@ -1,14 +0,0 @@ -path: "tensorflow.keras.layers.experimental.preprocessing.StringLookup.__metaclass__" -tf_class { - is_instance: "" - member_method { - name: "__init__" - } - member_method { - name: "mro" - } - member_method { - name: "register" - argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" - } -} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.__metaclass__.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.__metaclass__.pbtxt deleted file mode 100644 index fe45a5da03b..00000000000 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.experimental.preprocessing.-text-vectorization.__metaclass__.pbtxt +++ /dev/null @@ -1,14 +0,0 @@ -path: "tensorflow.keras.layers.experimental.preprocessing.TextVectorization.__metaclass__" -tf_class { - is_instance: "" - member_method { - name: "__init__" - } - member_method { - name: "mro" - } - member_method { - name: "register" - argspec: "args=[\'cls\', \'subclass\'], varargs=None, keywords=None, defaults=None" - } -} From c5204573ed36ede733a73c75e9514944cf8884a6 Mon Sep 17 00:00:00 2001 From: Robert David Date: Thu, 23 Jul 2020 11:55:10 -0700 Subject: [PATCH 1182/2522] LSTM test cleanup: Use cartesian product for test parameter combinations instead of manually listing all interesting ones. This generates a test case that is redundant (float with quantization parameters), but this makes adding more parameters much simpler. PiperOrigin-RevId: 322834043 Change-Id: I74d402c4be3441df1aa5a80a15c5a40d19a57cf4 --- .../delegates/nnapi/acceleration_test_list.cc | 5 +++-- tensorflow/lite/kernels/lstm_test.cc | 16 +++++++--------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc index fd0e915d504..1167082f217 100644 --- a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc +++ b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc @@ -246,12 +246,13 @@ CifgPeepholeNoProjectionNoClippingUnidirectionalLstmTest/NonLayerNormLstmBlackBo -LstmOpTest/InvalidTypes # Float .+_LstmOpTest\.Test/0,29 -# HybridUint8 .+_LstmOpTest\.Test/1,29 +# HybridUint8 .+_LstmOpTest\.Test/2,29 +.+_LstmOpTest\.Test/3,29 # HybridInt8 --.+_LstmOpTest\.Test/3 -.+_LstmOpTest\.Test/4 +-.+_LstmOpTest\.Test/5 # maximum_minimum_test MaxMinOpTest/.+nt8Test,29 diff --git a/tensorflow/lite/kernels/lstm_test.cc b/tensorflow/lite/kernels/lstm_test.cc index fef01462c67..17fdeb0e473 100644 --- a/tensorflow/lite/kernels/lstm_test.cc +++ b/tensorflow/lite/kernels/lstm_test.cc @@ -2221,15 +2221,13 @@ TEST(LstmOpTest, InvalidTypes) { #endif // Test parameter controls asymmetric_quantize_inputs in LSTMOpModel. -#define QUANTIZE_PARAMETER_TEST(test) \ - INSTANTIATE_TEST_SUITE_P( \ - test, test, \ - ::testing::ValuesIn(std::vector>{ \ - {TensorType_FLOAT32, false}, \ - {TensorType_UINT8, false}, \ - {TensorType_UINT8, true}, \ - {TensorType_INT8, false}, \ - {TensorType_INT8, true}})) +#define QUANTIZE_PARAMETER_TEST(test) \ + INSTANTIATE_TEST_SUITE_P( \ + test, test, \ + ::testing::Combine( \ + ::testing::Values(TensorType_FLOAT32, TensorType_UINT8, \ + TensorType_UINT8), \ + ::testing::Bool())) QUANTIZE_PARAMETER_TEST(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest); QUANTIZE_PARAMETER_TEST(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest); From a8a884db8b1c7413bb3723efd8ddf716fe30aac7 Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Thu, 23 Jul 2020 11:55:42 -0700 Subject: [PATCH 1183/2522] Use proto to configure tf.data service dispatch server. This simplifies adding new configuration properties, so that we don't need to plumb new properties through. This also gives us a single place to document all configuration options (in the .proto file). A followup CL will do the same for worker configuration. PiperOrigin-RevId: 322834156 Change-Id: I72bd7fccabf124fadb6a5675b97556c6c5c3fceb --- tensorflow/core/data/service/BUILD | 3 +++ tensorflow/core/data/service/dispatcher_impl.cc | 14 ++++++++------ tensorflow/core/data/service/dispatcher_impl.h | 8 +++++--- .../core/data/service/grpc_dispatcher_impl.cc | 7 ++++--- .../core/data/service/grpc_dispatcher_impl.h | 3 ++- tensorflow/core/data/service/server_lib.cc | 12 ++++++------ tensorflow/core/data/service/server_lib.h | 6 ++++-- tensorflow/core/data/service/test_cluster.cc | 6 +++++- tensorflow/core/protobuf/BUILD | 2 ++ .../data/experimental/service_config.proto | 12 ++++++++++++ tensorflow/python/data/experimental/service/BUILD | 1 + .../python/data/experimental/service/server_lib.py | 5 ++++- .../experimental/service/server_lib_wrapper.cc | 11 +++++++++-- 13 files changed, 65 insertions(+), 25 deletions(-) create mode 100644 tensorflow/core/protobuf/data/experimental/service_config.proto diff --git a/tensorflow/core/data/service/BUILD b/tensorflow/core/data/service/BUILD index d2a887a82f8..d7cc7a3e528 100644 --- a/tensorflow/core/data/service/BUILD +++ b/tensorflow/core/data/service/BUILD @@ -171,6 +171,7 @@ cc_library( hdrs = ["test_cluster.h"], deps = [ ":server_lib", + "//tensorflow/core:protos_all_cc", "//tensorflow/core/platform:errors", "@com_google_absl//absl/strings", ], @@ -213,6 +214,7 @@ cc_library( deps = [ ":dispatcher_cc_grpc_proto", ":dispatcher_impl", + "//tensorflow/core:protos_all_cc", "//tensorflow/core/distributed_runtime/rpc:grpc_util", tf_grpc_cc_dependency(), ], @@ -254,6 +256,7 @@ cc_library( ":grpc_util", ":grpc_worker_impl", "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", "//tensorflow/core:tensorflow", tf_grpc_cc_dependency(), ], diff --git a/tensorflow/core/data/service/dispatcher_impl.cc b/tensorflow/core/data/service/dispatcher_impl.cc index 22a86570b46..4a1ed0b1f57 100644 --- a/tensorflow/core/data/service/dispatcher_impl.cc +++ b/tensorflow/core/data/service/dispatcher_impl.cc @@ -33,28 +33,29 @@ limitations under the License. #include "tensorflow/core/kernels/data/dataset_utils.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/protobuf/data/experimental/service_config.pb.h" #include "tensorflow/core/public/session_options.h" namespace tensorflow { namespace data { namespace { -Status CreateWorkerStub(const std::string& address, - const std::string& protocol_, +Status CreateWorkerStub(const std::string& address, const std::string& protocol, std::unique_ptr* stub) { ::grpc::ChannelArguments args; args.SetMaxReceiveMessageSize(-1); std::shared_ptr<::grpc::ChannelCredentials> credentials; TF_RETURN_IF_ERROR( - CredentialsFactory::CreateClientCredentials(protocol_, &credentials)); + CredentialsFactory::CreateClientCredentials(protocol, &credentials)); auto channel = ::grpc::CreateCustomChannel(address, credentials, args); *stub = WorkerService::NewStub(channel); return Status::OK(); } } // namespace -DataServiceDispatcherImpl::DataServiceDispatcherImpl(const std::string protocol) - : protocol_(protocol) {} +DataServiceDispatcherImpl::DataServiceDispatcherImpl( + const experimental::DispatcherConfig& config) + : config_(config) {} Status DataServiceDispatcherImpl::RegisterWorker( const RegisterWorkerRequest* request, RegisterWorkerResponse* response) { @@ -295,7 +296,8 @@ DataServiceDispatcherImpl::CreateTaskLocked(Job* job, Status DataServiceDispatcherImpl::EnsureWorkerStubInitialized(Worker* worker) { if (!worker->stub()) { std::unique_ptr stub; - TF_RETURN_IF_ERROR(CreateWorkerStub(worker->address(), protocol_, &stub)); + TF_RETURN_IF_ERROR( + CreateWorkerStub(worker->address(), config_.protocol(), &stub)); worker->set_stub(std::move(stub)); } return Status::OK(); diff --git a/tensorflow/core/data/service/dispatcher_impl.h b/tensorflow/core/data/service/dispatcher_impl.h index 84770f7056f..1c45083b5d3 100644 --- a/tensorflow/core/data/service/dispatcher_impl.h +++ b/tensorflow/core/data/service/dispatcher_impl.h @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/core/data/service/worker.grpc.pb.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/protobuf/data/experimental/service_config.pb.h" #include "tensorflow/core/public/session.h" namespace tensorflow { @@ -42,7 +43,8 @@ namespace data { // iterating over all of or part of the dataset. Workers process tasks. class DataServiceDispatcherImpl { public: - explicit DataServiceDispatcherImpl(const std::string protocol); + explicit DataServiceDispatcherImpl( + const experimental::DispatcherConfig& config); // See dispatcher.proto for API documentation. @@ -198,8 +200,8 @@ class DataServiceDispatcherImpl { // dataset_id, returning an error status describing any difference. Status ValidateMatchingJob(const Job& job, ProcessingMode processing_mode, int64 dataset_id); - // Protocol to use for communicating with workers. - const std::string protocol_; + + const experimental::DispatcherConfig& config_; mutex mu_; diff --git a/tensorflow/core/data/service/grpc_dispatcher_impl.cc b/tensorflow/core/data/service/grpc_dispatcher_impl.cc index 38ecc7057be..a26164ed48f 100644 --- a/tensorflow/core/data/service/grpc_dispatcher_impl.cc +++ b/tensorflow/core/data/service/grpc_dispatcher_impl.cc @@ -17,6 +17,7 @@ limitations under the License. #include "grpcpp/server_context.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_util.h" +#include "tensorflow/core/protobuf/data/experimental/service_config.pb.h" namespace tensorflow { namespace data { @@ -25,9 +26,9 @@ using ::grpc::ServerBuilder; using ::grpc::ServerContext; using ::grpc::Status; -GrpcDispatcherImpl::GrpcDispatcherImpl(ServerBuilder* server_builder, - const std::string& protocol) - : impl_(protocol) { +GrpcDispatcherImpl::GrpcDispatcherImpl( + ServerBuilder* server_builder, const experimental::DispatcherConfig& config) + : impl_(config) { server_builder->RegisterService(this); VLOG(1) << "Registered data service dispatcher"; } diff --git a/tensorflow/core/data/service/grpc_dispatcher_impl.h b/tensorflow/core/data/service/grpc_dispatcher_impl.h index f407bd64127..24bf2d79061 100644 --- a/tensorflow/core/data/service/grpc_dispatcher_impl.h +++ b/tensorflow/core/data/service/grpc_dispatcher_impl.h @@ -19,6 +19,7 @@ limitations under the License. #include "grpcpp/server_builder.h" #include "tensorflow/core/data/service/dispatcher.grpc.pb.h" #include "tensorflow/core/data/service/dispatcher_impl.h" +#include "tensorflow/core/protobuf/data/experimental/service_config.pb.h" namespace tensorflow { namespace data { @@ -35,7 +36,7 @@ namespace data { class GrpcDispatcherImpl : public DispatcherService::Service { public: explicit GrpcDispatcherImpl(grpc::ServerBuilder* server_builder, - const std::string& protocol); + const experimental::DispatcherConfig& config); ~GrpcDispatcherImpl() override {} #define HANDLER(method) \ diff --git a/tensorflow/core/data/service/server_lib.cc b/tensorflow/core/data/service/server_lib.cc index 4f34bf9d0c7..6d912b1c802 100644 --- a/tensorflow/core/data/service/server_lib.cc +++ b/tensorflow/core/data/service/server_lib.cc @@ -72,14 +72,14 @@ void GrpcDataServerBase::Join() { server_->Wait(); } int GrpcDataServerBase::BoundPort() { return bound_port(); } -DispatchGrpcDataServer::DispatchGrpcDataServer(int port, - const std::string& protocol) - : GrpcDataServerBase(port, protocol) {} +DispatchGrpcDataServer::DispatchGrpcDataServer( + const experimental::DispatcherConfig& config) + : GrpcDataServerBase(config.port(), config.protocol()), config_(config) {} DispatchGrpcDataServer::~DispatchGrpcDataServer() { delete service_; } void DispatchGrpcDataServer::AddServiceToBuilder(grpc::ServerBuilder* builder) { - auto service = absl::make_unique(builder, protocol_); + auto service = absl::make_unique(builder, config_); service_ = service.release(); } @@ -122,9 +122,9 @@ Status WorkerGrpcDataServer::StartServiceInternal() { return Status::OK(); } -Status NewDispatchServer(int port, const std::string& protocol, +Status NewDispatchServer(const experimental::DispatcherConfig& config, std::unique_ptr* out_server) { - *out_server = absl::make_unique(port, protocol); + *out_server = absl::make_unique(config); return Status::OK(); } diff --git a/tensorflow/core/data/service/server_lib.h b/tensorflow/core/data/service/server_lib.h index 2190c7a56fe..d147f47c5e4 100644 --- a/tensorflow/core/data/service/server_lib.h +++ b/tensorflow/core/data/service/server_lib.h @@ -19,6 +19,7 @@ limitations under the License. #include "grpcpp/server.h" #include "grpcpp/server_builder.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/protobuf/data/experimental/service_config.pb.h" namespace tensorflow { namespace data { @@ -72,7 +73,7 @@ class GrpcDataServerBase { class DispatchGrpcDataServer : public GrpcDataServerBase { public: - DispatchGrpcDataServer(int requested_port, const std::string& protocol); + explicit DispatchGrpcDataServer(const experimental::DispatcherConfig& config); ~DispatchGrpcDataServer() override; // Returns the number of workers registerd with the dispatcher. @@ -83,6 +84,7 @@ class DispatchGrpcDataServer : public GrpcDataServerBase { Status StartServiceInternal() override { return Status::OK(); } private: + const experimental::DispatcherConfig config_; // Owned. We use a raw pointer because GrpcDispatcherImpl is forward-declared. GrpcDispatcherImpl* service_; }; @@ -106,7 +108,7 @@ class WorkerGrpcDataServer : public GrpcDataServerBase { }; // Creates a dispatch tf.data server and stores it in `*out_server`. -Status NewDispatchServer(int port, const std::string& protocol, +Status NewDispatchServer(const experimental::DispatcherConfig& config, std::unique_ptr* out_server); // Creates a worker tf.data server and stores it in `*out_server`. diff --git a/tensorflow/core/data/service/test_cluster.cc b/tensorflow/core/data/service/test_cluster.cc index 4066a75a374..ad0d2be87d8 100644 --- a/tensorflow/core/data/service/test_cluster.cc +++ b/tensorflow/core/data/service/test_cluster.cc @@ -18,6 +18,7 @@ limitations under the License. #include "absl/strings/str_split.h" #include "tensorflow/core/data/service/server_lib.h" #include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/protobuf/data/experimental/service_config.pb.h" namespace tensorflow { namespace data { @@ -45,7 +46,10 @@ Status TestCluster::Initialize() { "Test cluster has already been initialized."); } initialized_ = true; - TF_RETURN_IF_ERROR(NewDispatchServer(/*port=*/0, kProtocol, &dispatcher_)); + experimental::DispatcherConfig config; + config.set_port(0); + config.set_protocol(kProtocol); + TF_RETURN_IF_ERROR(NewDispatchServer(config, &dispatcher_)); TF_RETURN_IF_ERROR(dispatcher_->Start()); dispatcher_address_ = absl::StrCat("localhost:", dispatcher_->BoundPort()); workers_.reserve(num_workers_); diff --git a/tensorflow/core/protobuf/BUILD b/tensorflow/core/protobuf/BUILD index 21b3ed572f1..69019cec9ce 100644 --- a/tensorflow/core/protobuf/BUILD +++ b/tensorflow/core/protobuf/BUILD @@ -140,6 +140,7 @@ exports_files( # TODO(ebrevdo): Re-enable once CriticalSection is in core. # "critical_section.proto", "data/experimental/snapshot.proto", + "data/experimental/service_config.proto", "debug_event.proto", "meta_graph.proto", "named_tensor.proto", @@ -165,6 +166,7 @@ tf_proto_library( # TODO(ebrevdo): Re-enable once CriticalSection is in core. # "critical_section.proto", "data/experimental/snapshot.proto", + "data/experimental/service_config.proto", "debug_event.proto", "meta_graph.proto", "named_tensor.proto", diff --git a/tensorflow/core/protobuf/data/experimental/service_config.proto b/tensorflow/core/protobuf/data/experimental/service_config.proto new file mode 100644 index 00000000000..5dcc3c69083 --- /dev/null +++ b/tensorflow/core/protobuf/data/experimental/service_config.proto @@ -0,0 +1,12 @@ +syntax = "proto3"; + +package tensorflow.data.experimental; + +// Configuration for a tf.data service DispatchServer. +message DispatcherConfig { + // The port for the dispatcher to bind to. A value of 0 indicates that the + // dispatcher may bind to any available port. + int64 port = 1; + // The protocol for the dispatcher to use when connecting to workers. + string protocol = 2; +} diff --git a/tensorflow/python/data/experimental/service/BUILD b/tensorflow/python/data/experimental/service/BUILD index f08fef2b669..f072c5f2208 100644 --- a/tensorflow/python/data/experimental/service/BUILD +++ b/tensorflow/python/data/experimental/service/BUILD @@ -39,6 +39,7 @@ tf_py_test( srcs = ["server_lib_test.py"], deps = [ ":server_lib", + "//tensorflow/core:protos_all_py", "//tensorflow/python:platform_test", ], ) diff --git a/tensorflow/python/data/experimental/service/server_lib.py b/tensorflow/python/data/experimental/service/server_lib.py index 5a7ce73b4c7..3e355565308 100644 --- a/tensorflow/python/data/experimental/service/server_lib.py +++ b/tensorflow/python/data/experimental/service/server_lib.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function # pylint: disable=invalid-import-order,g-bad-import-order, unused-import +from tensorflow.core.protobuf.data.experimental import service_config_pb2 from tensorflow.python import pywrap_tensorflow from tensorflow.python.data.experimental.service import _pywrap_server_lib from tensorflow.python.util.tf_export import tf_export @@ -68,7 +69,9 @@ class DispatchServer(object): if protocol is None: protocol = "grpc" self._protocol = protocol - self._server = _pywrap_server_lib.TF_DATA_NewDispatchServer(port, protocol) + config = service_config_pb2.DispatcherConfig(port=port, protocol=protocol) + self._server = _pywrap_server_lib.TF_DATA_NewDispatchServer( + config.SerializeToString()) if start: self._server.start() diff --git a/tensorflow/python/data/experimental/service/server_lib_wrapper.cc b/tensorflow/python/data/experimental/service/server_lib_wrapper.cc index e288179dd36..b8250aaeda6 100644 --- a/tensorflow/python/data/experimental/service/server_lib_wrapper.cc +++ b/tensorflow/python/data/experimental/service/server_lib_wrapper.cc @@ -22,6 +22,8 @@ limitations under the License. #include "pybind11/pytypes.h" #include "pybind11/stl.h" #include "tensorflow/core/data/service/server_lib.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/protobuf/data/experimental/service_config.pb.h" #include "tensorflow/python/lib/core/pybind11_lib.h" #include "tensorflow/python/lib/core/pybind11_status.h" @@ -50,11 +52,16 @@ PYBIND11_MODULE(_pywrap_server_lib, m) { m.def( "TF_DATA_NewDispatchServer", - [](int port, std::string protocol) + [](std::string serialized_dispatcher_config) -> std::unique_ptr { + tensorflow::data::experimental::DispatcherConfig config; + if (!config.ParseFromString(serialized_dispatcher_config)) { + tensorflow::MaybeRaiseFromStatus(tensorflow::errors::InvalidArgument( + "Failed to deserialize dispatcher config.")); + } std::unique_ptr server; tensorflow::Status status = - tensorflow::data::NewDispatchServer(port, protocol, &server); + tensorflow::data::NewDispatchServer(config, &server); tensorflow::MaybeRaiseFromStatus(status); return server; }, From c4a2703957b78244a42e2b2571b8da3c46018afa Mon Sep 17 00:00:00 2001 From: Robert David Date: Thu, 23 Jul 2020 11:57:29 -0700 Subject: [PATCH 1184/2522] Explicitly set clipping values to zero. PiperOrigin-RevId: 322834509 Change-Id: Ia3eaf897efd1a58c67631f966de6ae42d56a43ad --- tensorflow/lite/kernels/lstm_test.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/lite/kernels/lstm_test.cc b/tensorflow/lite/kernels/lstm_test.cc index 17fdeb0e473..1687e543f57 100644 --- a/tensorflow/lite/kernels/lstm_test.cc +++ b/tensorflow/lite/kernels/lstm_test.cc @@ -131,6 +131,7 @@ class LSTMOpModel : public SingleOpModel { SetBuiltinOp( BuiltinOperator_LSTM, BuiltinOptions_LSTMOptions, CreateLSTMOptions(builder_, ActivationFunctionType_TANH, + /*cell_clip=*/0.0f, /*proj_clip=*/0.0f, LSTMKernelType_FULL, asymmetric_quantize_inputs) .Union()); From d8dcead44017aa0381ca16254a161099eeb7c2e4 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Thu, 23 Jul 2020 12:06:49 -0700 Subject: [PATCH 1185/2522] Fix stddev 0 bug in Normalization layer PiperOrigin-RevId: 322836694 Change-Id: I22a669e19f369cba271e56b63a08ca2763a6eab8 --- .../python/keras/layers/preprocessing/normalization.py | 3 ++- .../keras/layers/preprocessing/normalization_test.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/keras/layers/preprocessing/normalization.py b/tensorflow/python/keras/layers/preprocessing/normalization.py index 07f8d40ec24..4b75def0247 100644 --- a/tensorflow/python/keras/layers/preprocessing/normalization.py +++ b/tensorflow/python/keras/layers/preprocessing/normalization.py @@ -156,7 +156,8 @@ class Normalization(base_preprocessing_layer.CombinerPreprocessingLayer): # broadcasts the data correctly. mean = array_ops.reshape(self.mean, self._broadcast_shape) variance = array_ops.reshape(self.variance, self._broadcast_shape) - return (inputs - mean) / math_ops.sqrt(variance) + return ((inputs - mean) / + math_ops.maximum(math_ops.sqrt(variance), K.epsilon())) def compute_output_shape(self, input_shape): return input_shape diff --git a/tensorflow/python/keras/layers/preprocessing/normalization_test.py b/tensorflow/python/keras/layers/preprocessing/normalization_test.py index f97b8db50ec..69eafc54adc 100644 --- a/tensorflow/python/keras/layers/preprocessing/normalization_test.py +++ b/tensorflow/python/keras/layers/preprocessing/normalization_test.py @@ -97,6 +97,16 @@ def _get_layer_computation_test_cases(): np.float32), "testcase_name": "3d_multiple_axis" + }, { + "adapt_data": + np.zeros((3, 4)), + "axis": -1, + "test_data": + np.zeros((3, 4)), + "expected": + np.zeros((3, 4)), + "testcase_name": + "zero_variance" }) crossed_test_cases = [] From 93f0f7f817413a630f92657f312d8945de64bdfb Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Thu, 23 Jul 2020 12:19:36 -0700 Subject: [PATCH 1186/2522] Port the comparisons kernels to the new TfLiteEvalTensor API. PiperOrigin-RevId: 322839325 Change-Id: Ib81c73566fe10ebaea51295f775ee463f77d548e --- tensorflow/lite/micro/kernels/BUILD | 1 + tensorflow/lite/micro/kernels/comparisons.cc | 435 +++++++++++------- .../lite/micro/kernels/comparisons_test.cc | 254 +++++----- tensorflow/lite/micro/kernels/kernel_util.cc | 7 + tensorflow/lite/micro/kernels/kernel_util.h | 4 + 5 files changed, 390 insertions(+), 311 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index c98e23a54f3..78e66a35197 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -334,6 +334,7 @@ tflite_micro_cc_test( "comparisons_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:op_resolvers", "//tensorflow/lite/micro/testing:micro_test", diff --git a/tensorflow/lite/micro/kernels/comparisons.cc b/tensorflow/lite/micro/kernels/comparisons.cc index 8f6a8305eb5..ed814527e94 100644 --- a/tensorflow/lite/micro/kernels/comparisons.cc +++ b/tensorflow/lite/micro/kernels/comparisons.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/quantization_util.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -37,81 +38,96 @@ TfLiteStatus EqualEval(TfLiteContext* context, TfLiteNode* node) { TFLITE_DCHECK(node->user_data != nullptr); const OpData* data = static_cast(node->user_data); - const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); - const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); - RuntimeShape input1_shape = GetTensorShape(input1); - RuntimeShape input2_shape = GetTensorShape(input2); - RuntimeShape output_shape = GetTensorShape(output); - bool* output_data = GetTensorData(output); + RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1); + RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + bool* output_data = tflite::micro::GetTensorData(output); - bool requires_broadcast = !HaveSameShapes(input1, input2); + bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2); switch (input1->type) { case kTfLiteBool: requires_broadcast ? reference_ops::Broadcast4DSlowEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::EqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteFloat32: requires_broadcast ? reference_ops::Broadcast4DSlowEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::EqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteInt32: requires_broadcast ? reference_ops::Broadcast4DSlowEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::EqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteInt64: requires_broadcast ? reference_ops::Broadcast4DSlowEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::EqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteUInt8: requires_broadcast ? reference_ops::Broadcast4DSlowEqualWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::EqualWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteInt8: requires_broadcast ? reference_ops::Broadcast4DSlowEqualWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::EqualWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; default: @@ -127,81 +143,96 @@ TfLiteStatus NotEqualEval(TfLiteContext* context, TfLiteNode* node) { TFLITE_DCHECK(node->user_data != nullptr); const OpData* data = static_cast(node->user_data); - const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); - const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); - RuntimeShape input1_shape = GetTensorShape(input1); - RuntimeShape input2_shape = GetTensorShape(input2); - RuntimeShape output_shape = GetTensorShape(output); - bool* output_data = GetTensorData(output); + RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1); + RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + bool* output_data = tflite::micro::GetTensorData(output); - bool requires_broadcast = !HaveSameShapes(input1, input2); + bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2); switch (input1->type) { case kTfLiteBool: requires_broadcast ? reference_ops::Broadcast4DSlowNotEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::NotEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteFloat32: requires_broadcast ? reference_ops::Broadcast4DSlowNotEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::NotEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteInt32: requires_broadcast ? reference_ops::Broadcast4DSlowNotEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::NotEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteInt64: requires_broadcast ? reference_ops::Broadcast4DSlowNotEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::NotEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteUInt8: requires_broadcast ? reference_ops::Broadcast4DSlowNotEqualWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::NotEqualWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteInt8: requires_broadcast ? reference_ops::Broadcast4DSlowNotEqualWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::NotEqualWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; default: @@ -216,70 +247,83 @@ TfLiteStatus GreaterEval(TfLiteContext* context, TfLiteNode* node) { TFLITE_DCHECK(node->user_data != nullptr); const OpData* data = static_cast(node->user_data); - const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); - const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); - RuntimeShape input1_shape = GetTensorShape(input1); - RuntimeShape input2_shape = GetTensorShape(input2); - RuntimeShape output_shape = GetTensorShape(output); - bool* output_data = GetTensorData(output); + RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1); + RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + bool* output_data = tflite::micro::GetTensorData(output); - bool requires_broadcast = !HaveSameShapes(input1, input2); + bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2); switch (input1->type) { case kTfLiteFloat32: requires_broadcast ? reference_ops::Broadcast4DSlowGreaterNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::GreaterNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteInt32: requires_broadcast ? reference_ops::Broadcast4DSlowGreaterNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::GreaterNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteInt64: requires_broadcast ? reference_ops::Broadcast4DSlowGreaterNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::GreaterNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteUInt8: requires_broadcast ? reference_ops::Broadcast4DSlowGreaterWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::GreaterWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteInt8: requires_broadcast ? reference_ops::Broadcast4DSlowGreaterWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::GreaterWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; default: @@ -294,70 +338,83 @@ TfLiteStatus GreaterEqualEval(TfLiteContext* context, TfLiteNode* node) { TFLITE_DCHECK(node->user_data != nullptr); const OpData* data = static_cast(node->user_data); - const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); - const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); - RuntimeShape input1_shape = GetTensorShape(input1); - RuntimeShape input2_shape = GetTensorShape(input2); - RuntimeShape output_shape = GetTensorShape(output); - bool* output_data = GetTensorData(output); + RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1); + RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + bool* output_data = tflite::micro::GetTensorData(output); - bool requires_broadcast = !HaveSameShapes(input1, input2); + bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2); switch (input1->type) { case kTfLiteFloat32: requires_broadcast ? reference_ops::Broadcast4DSlowGreaterEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::GreaterEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteInt32: requires_broadcast ? reference_ops::Broadcast4DSlowGreaterEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::GreaterEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteInt64: requires_broadcast ? reference_ops::Broadcast4DSlowGreaterEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::GreaterEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteUInt8: requires_broadcast ? reference_ops::Broadcast4DSlowGreaterEqualWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::GreaterEqualWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteInt8: requires_broadcast ? reference_ops::Broadcast4DSlowGreaterEqualWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::GreaterEqualWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; default: @@ -372,70 +429,83 @@ TfLiteStatus LessEval(TfLiteContext* context, TfLiteNode* node) { TFLITE_DCHECK(node->user_data != nullptr); const OpData* data = static_cast(node->user_data); - const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); - const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); - RuntimeShape input1_shape = GetTensorShape(input1); - RuntimeShape input2_shape = GetTensorShape(input2); - RuntimeShape output_shape = GetTensorShape(output); - bool* output_data = GetTensorData(output); + RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1); + RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + bool* output_data = tflite::micro::GetTensorData(output); - bool requires_broadcast = !HaveSameShapes(input1, input2); + bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2); switch (input1->type) { case kTfLiteFloat32: requires_broadcast ? reference_ops::Broadcast4DSlowLessNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::LessNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteInt32: requires_broadcast ? reference_ops::Broadcast4DSlowLessNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::LessNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteInt64: requires_broadcast ? reference_ops::Broadcast4DSlowLessNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::LessNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteUInt8: requires_broadcast ? reference_ops::Broadcast4DSlowLessWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::LessWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteInt8: requires_broadcast ? reference_ops::Broadcast4DSlowLessWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::LessWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; default: @@ -450,70 +520,83 @@ TfLiteStatus LessEqualEval(TfLiteContext* context, TfLiteNode* node) { TFLITE_DCHECK(node->user_data != nullptr); const OpData* data = static_cast(node->user_data); - const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); - const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); - RuntimeShape input1_shape = GetTensorShape(input1); - RuntimeShape input2_shape = GetTensorShape(input2); - RuntimeShape output_shape = GetTensorShape(output); - bool* output_data = GetTensorData(output); + RuntimeShape input1_shape = tflite::micro::GetTensorShape(input1); + RuntimeShape input2_shape = tflite::micro::GetTensorShape(input2); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + bool* output_data = tflite::micro::GetTensorData(output); - bool requires_broadcast = !HaveSameShapes(input1, input2); + bool requires_broadcast = !tflite::micro::HaveSameShapes(input1, input2); switch (input1->type) { case kTfLiteFloat32: requires_broadcast ? reference_ops::Broadcast4DSlowLessEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::LessEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteInt32: requires_broadcast ? reference_ops::Broadcast4DSlowLessEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::LessEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteInt64: requires_broadcast ? reference_ops::Broadcast4DSlowLessEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::LessEqualNoScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteUInt8: requires_broadcast ? reference_ops::Broadcast4DSlowLessEqualWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::LessEqualWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; case kTfLiteInt8: requires_broadcast ? reference_ops::Broadcast4DSlowLessEqualWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data) : reference_ops::LessEqualWithScaling( - data->params, input1_shape, GetTensorData(input1), - input2_shape, GetTensorData(input2), output_shape, + data->params, input1_shape, + tflite::micro::GetTensorData(input1), input2_shape, + tflite::micro::GetTensorData(input2), output_shape, output_data); break; default: diff --git a/tensorflow/lite/micro/kernels/comparisons_test.cc b/tensorflow/lite/micro/kernels/comparisons_test.cc index c8a1e2646b3..393f0e22187 100644 --- a/tensorflow/lite/micro/kernels/comparisons_test.cc +++ b/tensorflow/lite/micro/kernels/comparisons_test.cc @@ -17,7 +17,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -29,51 +29,33 @@ constexpr int inputs_size = 2; constexpr int outputs_size = 1; constexpr int tensors_size = inputs_size + outputs_size; -void TestComparison(tflite::BuiltinOperator op, TfLiteTensor* tensors, - bool* expected_output_data, bool* output_data) { +void TestComparison(const TfLiteRegistration& registration, + TfLiteTensor* tensors, bool* expected_output_data, + bool* output_data) { const int output_dims_count = ElementCount(*tensors[inputs_size].dims); - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = resolver.FindOp(op); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - const int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); const int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, /*buffer=*/nullptr, /*length=*/0); - } + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, /*builtin_data=*/nullptr, + micro_test::reporter); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output_dims_count; ++i) { TF_LITE_MICRO_EXPECT_EQ(expected_output_data[i], output_data[i]); } } -void TestComparisonFloat(tflite::BuiltinOperator op, int* input1_dims_data, - float* input1_data, int* input2_dims_data, - float* input2_data, bool* expected_output_data, - int* output_dims_data, bool* output_data) { +void TestComparisonFloat(const TfLiteRegistration& registration, + int* input1_dims_data, float* input1_data, + int* input2_dims_data, float* input2_data, + bool* expected_output_data, int* output_dims_data, + bool* output_data) { TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); @@ -84,13 +66,14 @@ void TestComparisonFloat(tflite::BuiltinOperator op, int* input1_dims_data, CreateBoolTensor(output_data, output_dims), }; - TestComparison(op, tensors, expected_output_data, output_data); + TestComparison(registration, tensors, expected_output_data, output_data); } -void TestComparisonBool(tflite::BuiltinOperator op, int* input1_dims_data, - bool* input1_data, int* input2_dims_data, - bool* input2_data, bool* expected_output_data, - int* output_dims_data, bool* output_data) { +void TestComparisonBool(const TfLiteRegistration& registration, + int* input1_dims_data, bool* input1_data, + int* input2_dims_data, bool* input2_data, + bool* expected_output_data, int* output_dims_data, + bool* output_data) { TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); @@ -101,13 +84,14 @@ void TestComparisonBool(tflite::BuiltinOperator op, int* input1_dims_data, CreateBoolTensor(output_data, output_dims), }; - TestComparison(op, tensors, expected_output_data, output_data); + TestComparison(registration, tensors, expected_output_data, output_data); } -void TestComparisonInt(tflite::BuiltinOperator op, int* input1_dims_data, - int32_t* input1_data, int* input2_dims_data, - int32_t* input2_data, bool* expected_output_data, - int* output_dims_data, bool* output_data) { +void TestComparisonInt(const TfLiteRegistration& registration, + int* input1_dims_data, int32_t* input1_data, + int* input2_dims_data, int32_t* input2_data, + bool* expected_output_data, int* output_dims_data, + bool* output_data) { TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); @@ -118,10 +102,10 @@ void TestComparisonInt(tflite::BuiltinOperator op, int* input1_dims_data, CreateBoolTensor(output_data, output_dims), }; - TestComparison(op, tensors, expected_output_data, output_data); + TestComparison(registration, tensors, expected_output_data, output_data); } -void TestComparisonQuantizedUInt8(tflite::BuiltinOperator op, +void TestComparisonQuantizedUInt8(const TfLiteRegistration& registration, int* input1_dims_data, float* input1_data, uint8_t* input1_quantized, float input1_scale, int input1_zero_point, int* input2_dims_data, @@ -141,10 +125,10 @@ void TestComparisonQuantizedUInt8(tflite::BuiltinOperator op, CreateBoolTensor(output_data, output_dims), }; - TestComparison(op, tensors, expected_output_data, output_data); + TestComparison(registration, tensors, expected_output_data, output_data); } -void TestComparisonQuantizedInt8(tflite::BuiltinOperator op, +void TestComparisonQuantizedInt8(const TfLiteRegistration& registration, int* input1_dims_data, float* input1_data, int8_t* input1_quantized, float input1_scale, int input1_zero_point, int* input2_dims_data, @@ -164,7 +148,7 @@ void TestComparisonQuantizedInt8(tflite::BuiltinOperator op, CreateBoolTensor(output_data, output_dims), }; - TestComparison(op, tensors, expected_output_data, output_data); + TestComparison(registration, tensors, expected_output_data, output_data); } } // namespace @@ -184,9 +168,9 @@ TF_LITE_MICRO_TEST(EqualBool) { int expected_dim[] = {4, 1, 1, 1, 4}; bool output_data[4]; - tflite::testing::TestComparisonBool(tflite::BuiltinOperator_EQUAL, input1_dim, - input1_data, input2_dim, input2_data, - expected_data, expected_dim, output_data); + tflite::testing::TestComparisonBool( + tflite::ops::micro::Register_EQUAL(), input1_dim, input1_data, input2_dim, + input2_data, expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(EqualFloat) { @@ -201,7 +185,7 @@ TF_LITE_MICRO_TEST(EqualFloat) { bool output_data[4]; tflite::testing::TestComparisonFloat( - tflite::BuiltinOperator_EQUAL, input1_dim, input1_data, input2_dim, + tflite::ops::micro::Register_EQUAL(), input1_dim, input1_data, input2_dim, input2_data, expected_data, expected_dim, output_data); } @@ -215,9 +199,9 @@ TF_LITE_MICRO_TEST(EqualInt) { bool expected_data[] = {false, false, true, false}; int expected_dim[] = {4, 1, 1, 1, 4}; bool output_data[4]; - tflite::testing::TestComparisonInt(tflite::BuiltinOperator_EQUAL, input1_dim, - input1_data, input2_dim, input2_data, - expected_data, expected_dim, output_data); + tflite::testing::TestComparisonInt( + tflite::ops::micro::Register_EQUAL(), input1_dim, input1_data, input2_dim, + input2_data, expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(EqualBroadcast) { @@ -231,9 +215,9 @@ TF_LITE_MICRO_TEST(EqualBroadcast) { int expected_dim[] = {4, 1, 1, 1, 4}; bool output_data[4]; - tflite::testing::TestComparisonInt(tflite::BuiltinOperator_EQUAL, input1_dim, - input1_data, input2_dim, input2_data, - expected_data, expected_dim, output_data); + tflite::testing::TestComparisonInt( + tflite::ops::micro::Register_EQUAL(), input1_dim, input1_data, input2_dim, + input2_data, expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(EqualBroadcastTwoD) { @@ -248,9 +232,9 @@ TF_LITE_MICRO_TEST(EqualBroadcastTwoD) { int expected_dim[] = {4, 1, 1, 2, 4}; bool output_data[8]; - tflite::testing::TestComparisonInt(tflite::BuiltinOperator_EQUAL, input1_dim, - input1_data, input2_dim, input2_data, - expected_data, expected_dim, output_data); + tflite::testing::TestComparisonInt( + tflite::ops::micro::Register_EQUAL(), input1_dim, input1_data, input2_dim, + input2_data, expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(NotEqualBool) { @@ -265,8 +249,8 @@ TF_LITE_MICRO_TEST(NotEqualBool) { bool output_data[4]; tflite::testing::TestComparisonBool( - tflite::BuiltinOperator_NOT_EQUAL, input1_dim, input1_data, input2_dim, - input2_data, expected_data, expected_dim, output_data); + tflite::ops::micro::Register_NOT_EQUAL(), input1_dim, input1_data, + input2_dim, input2_data, expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(NotEqualFloat) { @@ -281,8 +265,8 @@ TF_LITE_MICRO_TEST(NotEqualFloat) { bool output_data[4]; tflite::testing::TestComparisonFloat( - tflite::BuiltinOperator_NOT_EQUAL, input1_dim, input1_data, input2_dim, - input2_data, expected_data, expected_dim, output_data); + tflite::ops::micro::Register_NOT_EQUAL(), input1_dim, input1_data, + input2_dim, input2_data, expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(NotEqualInt) { @@ -297,8 +281,8 @@ TF_LITE_MICRO_TEST(NotEqualInt) { bool output_data[4]; tflite::testing::TestComparisonInt( - tflite::BuiltinOperator_NOT_EQUAL, input1_dim, input1_data, input2_dim, - input2_data, expected_data, expected_dim, output_data); + tflite::ops::micro::Register_NOT_EQUAL(), input1_dim, input1_data, + input2_dim, input2_data, expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(NotEqualBroadcast) { @@ -313,8 +297,8 @@ TF_LITE_MICRO_TEST(NotEqualBroadcast) { bool output_data[4]; tflite::testing::TestComparisonInt( - tflite::BuiltinOperator_NOT_EQUAL, input1_dim, input1_data, input2_dim, - input2_data, expected_data, expected_dim, output_data); + tflite::ops::micro::Register_NOT_EQUAL(), input1_dim, input1_data, + input2_dim, input2_data, expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(NotEqualBroadcastTwoD) { @@ -329,8 +313,8 @@ TF_LITE_MICRO_TEST(NotEqualBroadcastTwoD) { bool output_data[8]; tflite::testing::TestComparisonInt( - tflite::BuiltinOperator_NOT_EQUAL, input1_dim, input1_data, input2_dim, - input2_data, expected_data, expected_dim, output_data); + tflite::ops::micro::Register_NOT_EQUAL(), input1_dim, input1_data, + input2_dim, input2_data, expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(GreaterFloat) { @@ -345,8 +329,8 @@ TF_LITE_MICRO_TEST(GreaterFloat) { bool output_data[4]; tflite::testing::TestComparisonFloat( - tflite::BuiltinOperator_GREATER, input1_dim, input1_data, input2_dim, - input2_data, expected_data, expected_dim, output_data); + tflite::ops::micro::Register_GREATER(), input1_dim, input1_data, + input2_dim, input2_data, expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(GreaterInt) { @@ -361,8 +345,8 @@ TF_LITE_MICRO_TEST(GreaterInt) { bool output_data[4]; tflite::testing::TestComparisonInt( - tflite::BuiltinOperator_GREATER, input1_dim, input1_data, input2_dim, - input2_data, expected_data, expected_dim, output_data); + tflite::ops::micro::Register_GREATER(), input1_dim, input1_data, + input2_dim, input2_data, expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(GreaterBroadcast) { @@ -377,8 +361,8 @@ TF_LITE_MICRO_TEST(GreaterBroadcast) { bool output_data[4]; tflite::testing::TestComparisonInt( - tflite::BuiltinOperator_GREATER, input1_dim, input1_data, input2_dim, - input2_data, expected_data, expected_dim, output_data); + tflite::ops::micro::Register_GREATER(), input1_dim, input1_data, + input2_dim, input2_data, expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(GreaterBroadcastTwoD) { @@ -393,8 +377,8 @@ TF_LITE_MICRO_TEST(GreaterBroadcastTwoD) { bool output_data[8]; tflite::testing::TestComparisonInt( - tflite::BuiltinOperator_GREATER, input1_dim, input1_data, input2_dim, - input2_data, expected_data, expected_dim, output_data); + tflite::ops::micro::Register_GREATER(), input1_dim, input1_data, + input2_dim, input2_data, expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(GreaterEqualFloat) { @@ -409,7 +393,7 @@ TF_LITE_MICRO_TEST(GreaterEqualFloat) { bool output_data[4]; tflite::testing::TestComparisonFloat( - tflite::BuiltinOperator_GREATER_EQUAL, input1_dim, input1_data, + tflite::ops::micro::Register_GREATER_EQUAL(), input1_dim, input1_data, input2_dim, input2_data, expected_data, expected_dim, output_data); } @@ -425,7 +409,7 @@ TF_LITE_MICRO_TEST(GreaterEqualInt) { bool output_data[4]; tflite::testing::TestComparisonInt( - tflite::BuiltinOperator_GREATER_EQUAL, input1_dim, input1_data, + tflite::ops::micro::Register_GREATER_EQUAL(), input1_dim, input1_data, input2_dim, input2_data, expected_data, expected_dim, output_data); } @@ -441,7 +425,7 @@ TF_LITE_MICRO_TEST(GreaterEqualBroadcast) { bool output_data[4]; tflite::testing::TestComparisonInt( - tflite::BuiltinOperator_GREATER_EQUAL, input1_dim, input1_data, + tflite::ops::micro::Register_GREATER_EQUAL(), input1_dim, input1_data, input2_dim, input2_data, expected_data, expected_dim, output_data); } @@ -457,7 +441,7 @@ TF_LITE_MICRO_TEST(GreaterEqualBroadcastTwoD) { bool output_data[8]; tflite::testing::TestComparisonInt( - tflite::BuiltinOperator_GREATER_EQUAL, input1_dim, input1_data, + tflite::ops::micro::Register_GREATER_EQUAL(), input1_dim, input1_data, input2_dim, input2_data, expected_data, expected_dim, output_data); } @@ -473,7 +457,7 @@ TF_LITE_MICRO_TEST(LessFloat) { bool output_data[4]; tflite::testing::TestComparisonFloat( - tflite::BuiltinOperator_LESS, input1_dim, input1_data, input2_dim, + tflite::ops::micro::Register_LESS(), input1_dim, input1_data, input2_dim, input2_data, expected_data, expected_dim, output_data); } @@ -488,9 +472,9 @@ TF_LITE_MICRO_TEST(LessInt) { int expected_dim[] = {4, 1, 1, 1, 4}; bool output_data[4]; - tflite::testing::TestComparisonInt(tflite::BuiltinOperator_LESS, input1_dim, - input1_data, input2_dim, input2_data, - expected_data, expected_dim, output_data); + tflite::testing::TestComparisonInt( + tflite::ops::micro::Register_LESS(), input1_dim, input1_data, input2_dim, + input2_data, expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(LessBroadcast) { @@ -504,9 +488,9 @@ TF_LITE_MICRO_TEST(LessBroadcast) { int expected_dim[] = {4, 1, 1, 1, 4}; bool output_data[4]; - tflite::testing::TestComparisonInt(tflite::BuiltinOperator_LESS, input1_dim, - input1_data, input2_dim, input2_data, - expected_data, expected_dim, output_data); + tflite::testing::TestComparisonInt( + tflite::ops::micro::Register_LESS(), input1_dim, input1_data, input2_dim, + input2_data, expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(LessBroadcastTwoD) { @@ -520,9 +504,9 @@ TF_LITE_MICRO_TEST(LessBroadcastTwoD) { int expected_dim[] = {4, 1, 1, 2, 4}; bool output_data[8]; - tflite::testing::TestComparisonInt(tflite::BuiltinOperator_LESS, input1_dim, - input1_data, input2_dim, input2_data, - expected_data, expected_dim, output_data); + tflite::testing::TestComparisonInt( + tflite::ops::micro::Register_LESS(), input1_dim, input1_data, input2_dim, + input2_data, expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(LessEqualFloat) { @@ -537,8 +521,8 @@ TF_LITE_MICRO_TEST(LessEqualFloat) { bool output_data[4]; tflite::testing::TestComparisonFloat( - tflite::BuiltinOperator_LESS_EQUAL, input1_dim, input1_data, input2_dim, - input2_data, expected_data, expected_dim, output_data); + tflite::ops::micro::Register_LESS_EQUAL(), input1_dim, input1_data, + input2_dim, input2_data, expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(LessEqualInt) { @@ -553,8 +537,8 @@ TF_LITE_MICRO_TEST(LessEqualInt) { bool output_data[4]; tflite::testing::TestComparisonInt( - tflite::BuiltinOperator_LESS_EQUAL, input1_dim, input1_data, input2_dim, - input2_data, expected_data, expected_dim, output_data); + tflite::ops::micro::Register_LESS_EQUAL(), input1_dim, input1_data, + input2_dim, input2_data, expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(LessEqualBroadcast) { @@ -569,8 +553,8 @@ TF_LITE_MICRO_TEST(LessEqualBroadcast) { bool output_data[4]; tflite::testing::TestComparisonInt( - tflite::BuiltinOperator_LESS_EQUAL, input1_dim, input1_data, input2_dim, - input2_data, expected_data, expected_dim, output_data); + tflite::ops::micro::Register_LESS_EQUAL(), input1_dim, input1_data, + input2_dim, input2_data, expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(LessEqualBroadcastTwoD) { @@ -585,8 +569,8 @@ TF_LITE_MICRO_TEST(LessEqualBroadcastTwoD) { bool output_data[8]; tflite::testing::TestComparisonInt( - tflite::BuiltinOperator_LESS_EQUAL, input1_dim, input1_data, input2_dim, - input2_data, expected_data, expected_dim, output_data); + tflite::ops::micro::Register_LESS_EQUAL(), input1_dim, input1_data, + input2_dim, input2_data, expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(EqualQuantizedUInt8) { @@ -607,10 +591,10 @@ TF_LITE_MICRO_TEST(EqualQuantizedUInt8) { bool output_data[4]; tflite::testing::TestComparisonQuantizedUInt8( - tflite::BuiltinOperator_EQUAL, input1_dim, input1_data, input1_quantized, - input1_scale, input1_zero_point, input2_dim, input2_data, - input2_quantized, input2_scale, input2_zero_point, expected_data, - expected_dim, output_data); + tflite::ops::micro::Register_EQUAL(), input1_dim, input1_data, + input1_quantized, input1_scale, input1_zero_point, input2_dim, + input2_data, input2_quantized, input2_scale, input2_zero_point, + expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(EqualQuantizedInt8) { @@ -632,10 +616,10 @@ TF_LITE_MICRO_TEST(EqualQuantizedInt8) { bool output_data[4]; tflite::testing::TestComparisonQuantizedInt8( - tflite::BuiltinOperator_EQUAL, input1_dim, input1_data, input1_quantized, - input1_scale, input1_zero_point, input2_dim, input2_data, - input2_quantized, input2_scale, input2_zero_point, expected_data, - expected_dim, output_data); + tflite::ops::micro::Register_EQUAL(), input1_dim, input1_data, + input1_quantized, input1_scale, input1_zero_point, input2_dim, + input2_data, input2_quantized, input2_scale, input2_zero_point, + expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(NotEqualQuantizedUInt8) { @@ -656,7 +640,7 @@ TF_LITE_MICRO_TEST(NotEqualQuantizedUInt8) { bool output_data[4]; tflite::testing::TestComparisonQuantizedUInt8( - tflite::BuiltinOperator_NOT_EQUAL, input1_dim, input1_data, + tflite::ops::micro::Register_NOT_EQUAL(), input1_dim, input1_data, input1_quantized, input1_scale, input1_zero_point, input2_dim, input2_data, input2_quantized, input2_scale, input2_zero_point, expected_data, expected_dim, output_data); @@ -681,7 +665,7 @@ TF_LITE_MICRO_TEST(NotEqualQuantizedInt8) { bool output_data[4]; tflite::testing::TestComparisonQuantizedInt8( - tflite::BuiltinOperator_NOT_EQUAL, input1_dim, input1_data, + tflite::ops::micro::Register_NOT_EQUAL(), input1_dim, input1_data, input1_quantized, input1_scale, input1_zero_point, input2_dim, input2_data, input2_quantized, input2_scale, input2_zero_point, expected_data, expected_dim, output_data); @@ -705,7 +689,7 @@ TF_LITE_MICRO_TEST(GreaterQuantizedUInt8) { bool output_data[4]; tflite::testing::TestComparisonQuantizedUInt8( - tflite::BuiltinOperator_GREATER, input1_dim, input1_data, + tflite::ops::micro::Register_GREATER(), input1_dim, input1_data, input1_quantized, input1_scale, input1_zero_point, input2_dim, input2_data, input2_quantized, input2_scale, input2_zero_point, expected_data, expected_dim, output_data); @@ -729,7 +713,7 @@ TF_LITE_MICRO_TEST(GreaterQuantizedUInt8SmallRange) { bool output_data[4]; tflite::testing::TestComparisonQuantizedUInt8( - tflite::BuiltinOperator_GREATER, input1_dim, input1_data, + tflite::ops::micro::Register_GREATER(), input1_dim, input1_data, input1_quantized, input1_scale, input1_zero_point, input2_dim, input2_data, input2_quantized, input2_scale, input2_zero_point, expected_data, expected_dim, output_data); @@ -752,7 +736,7 @@ TF_LITE_MICRO_TEST(GreaterUInt8EqualQuantized) { bool output_data[4]; tflite::testing::TestComparisonQuantizedUInt8( - tflite::BuiltinOperator_GREATER_EQUAL, input1_dim, input1_data, + tflite::ops::micro::Register_GREATER_EQUAL(), input1_dim, input1_data, input1_quantized, input1_scale, input1_zero_point, input2_dim, input2_data, input2_quantized, input1_scale, input1_zero_point, expected_data, expected_dim, output_data); @@ -775,10 +759,10 @@ TF_LITE_MICRO_TEST(LessQuantizedUInt8) { bool output_data[4]; tflite::testing::TestComparisonQuantizedUInt8( - tflite::BuiltinOperator_LESS, input1_dim, input1_data, input1_quantized, - input1_scale, input1_zero_point, input2_dim, input2_data, - input2_quantized, input1_scale, input1_zero_point, expected_data, - expected_dim, output_data); + tflite::ops::micro::Register_LESS(), input1_dim, input1_data, + input1_quantized, input1_scale, input1_zero_point, input2_dim, + input2_data, input2_quantized, input1_scale, input1_zero_point, + expected_data, expected_dim, output_data); } TF_LITE_MICRO_TEST(LessEqualQuantizedUInt8) { @@ -798,7 +782,7 @@ TF_LITE_MICRO_TEST(LessEqualQuantizedUInt8) { bool output_data[4]; tflite::testing::TestComparisonQuantizedUInt8( - tflite::BuiltinOperator_LESS_EQUAL, input1_dim, input1_data, + tflite::ops::micro::Register_LESS_EQUAL(), input1_dim, input1_data, input1_quantized, input1_scale, input1_zero_point, input2_dim, input2_data, input2_quantized, input1_scale, input1_zero_point, expected_data, expected_dim, output_data); @@ -826,7 +810,7 @@ TF_LITE_MICRO_TEST(EqualQuantizedUInt8WithBroadcast) { bool output_data[6]; tflite::testing::TestComparisonQuantizedUInt8( - tflite::BuiltinOperator_EQUAL, input1_dim, input1_data, + tflite::ops::micro::Register_EQUAL(), input1_dim, input1_data, input1_quantized, input1_scale, input1_zero_point, input2_dim, input2_data, input2_quantized, input1_scale, input1_zero_point, expected_data, expected_dim, output_data); @@ -855,7 +839,7 @@ TF_LITE_MICRO_TEST(NotEqualQuantizedUInt8WithBroadcast) { bool output_data[6]; tflite::testing::TestComparisonQuantizedUInt8( - tflite::BuiltinOperator_NOT_EQUAL, input1_dim, input1_data, + tflite::ops::micro::Register_NOT_EQUAL(), input1_dim, input1_data, input1_quantized, input1_scale, input1_zero_point, input2_dim, input2_data, input2_quantized, input1_scale, input1_zero_point, expected_data, expected_dim, output_data); @@ -884,7 +868,7 @@ TF_LITE_MICRO_TEST(NotEqualQuantizedInt8WithBroadcast) { bool output_data[6]; tflite::testing::TestComparisonQuantizedInt8( - tflite::BuiltinOperator_NOT_EQUAL, input1_dim, input1_data, + tflite::ops::micro::Register_NOT_EQUAL(), input1_dim, input1_data, input1_quantized, input1_scale, input1_zero_point, input2_dim, input2_data, input2_quantized, input1_scale, input1_zero_point, expected_data, expected_dim, output_data); @@ -913,7 +897,7 @@ TF_LITE_MICRO_TEST(GreaterQuantizedUInt8WithBroadcast) { bool output_data[6]; tflite::testing::TestComparisonQuantizedUInt8( - tflite::BuiltinOperator_GREATER, input1_dim, input1_data, + tflite::ops::micro::Register_GREATER(), input1_dim, input1_data, input1_quantized, input1_scale, input1_zero_point, input2_dim, input2_data, input2_quantized, input1_scale, input1_zero_point, expected_data, expected_dim, output_data); @@ -942,7 +926,7 @@ TF_LITE_MICRO_TEST(GreaterQuantizedInt8WithBroadcast) { bool output_data[6]; tflite::testing::TestComparisonQuantizedInt8( - tflite::BuiltinOperator_GREATER, input1_dim, input1_data, + tflite::ops::micro::Register_GREATER(), input1_dim, input1_data, input1_quantized, input1_scale, input1_zero_point, input2_dim, input2_data, input2_quantized, input1_scale, input1_zero_point, expected_data, expected_dim, output_data); @@ -971,7 +955,7 @@ TF_LITE_MICRO_TEST(GreaterEqualQuantizedUInt8WithBroadcast) { bool output_data[6]; tflite::testing::TestComparisonQuantizedUInt8( - tflite::BuiltinOperator_GREATER_EQUAL, input1_dim, input1_data, + tflite::ops::micro::Register_GREATER_EQUAL(), input1_dim, input1_data, input1_quantized, input1_scale, input1_zero_point, input2_dim, input2_data, input2_quantized, input1_scale, input1_zero_point, expected_data, expected_dim, output_data); @@ -1000,7 +984,7 @@ TF_LITE_MICRO_TEST(GreaterEqualQuantizedInt8WithBroadcast) { bool output_data[6]; tflite::testing::TestComparisonQuantizedInt8( - tflite::BuiltinOperator_GREATER_EQUAL, input1_dim, input1_data, + tflite::ops::micro::Register_GREATER_EQUAL(), input1_dim, input1_data, input1_quantized, input1_scale, input1_zero_point, input2_dim, input2_data, input2_quantized, input1_scale, input1_zero_point, expected_data, expected_dim, output_data); @@ -1029,10 +1013,10 @@ TF_LITE_MICRO_TEST(LessQuantizedUInt8WithBroadcast) { bool output_data[6]; tflite::testing::TestComparisonQuantizedUInt8( - tflite::BuiltinOperator_LESS, input1_dim, input1_data, input1_quantized, - input1_scale, input1_zero_point, input2_dim, input2_data, - input2_quantized, input1_scale, input1_zero_point, expected_data, - expected_dim, output_data); + tflite::ops::micro::Register_LESS(), input1_dim, input1_data, + input1_quantized, input1_scale, input1_zero_point, input2_dim, + input2_data, input2_quantized, input1_scale, input1_zero_point, + expected_data, expected_dim, output_data); } } @@ -1058,10 +1042,10 @@ TF_LITE_MICRO_TEST(LessQuantizedInt8WithBroadcast) { bool output_data[6]; tflite::testing::TestComparisonQuantizedInt8( - tflite::BuiltinOperator_LESS, input1_dim, input1_data, input1_quantized, - input1_scale, input1_zero_point, input2_dim, input2_data, - input2_quantized, input1_scale, input1_zero_point, expected_data, - expected_dim, output_data); + tflite::ops::micro::Register_LESS(), input1_dim, input1_data, + input1_quantized, input1_scale, input1_zero_point, input2_dim, + input2_data, input2_quantized, input1_scale, input1_zero_point, + expected_data, expected_dim, output_data); } } @@ -1087,7 +1071,7 @@ TF_LITE_MICRO_TEST(LessEqualQuantizedUInt8WithBroadcast) { bool output_data[6]; tflite::testing::TestComparisonQuantizedUInt8( - tflite::BuiltinOperator_LESS_EQUAL, input1_dim, input1_data, + tflite::ops::micro::Register_LESS_EQUAL(), input1_dim, input1_data, input1_quantized, input1_scale, input1_zero_point, input2_dim, input2_data, input2_quantized, input1_scale, input1_zero_point, expected_data, expected_dim, output_data); @@ -1116,7 +1100,7 @@ TF_LITE_MICRO_TEST(LessEqualQuantizedInt8WithBroadcast) { bool output_data[6]; tflite::testing::TestComparisonQuantizedInt8( - tflite::BuiltinOperator_LESS_EQUAL, input1_dim, input1_data, + tflite::ops::micro::Register_LESS_EQUAL(), input1_dim, input1_data, input1_quantized, input1_scale, input1_zero_point, input2_dim, input2_data, input2_quantized, input1_scale, input1_zero_point, expected_data, expected_dim, output_data); diff --git a/tensorflow/lite/micro/kernels/kernel_util.cc b/tensorflow/lite/micro/kernels/kernel_util.cc index 074de732be6..d70fc5864f5 100644 --- a/tensorflow/lite/micro/kernels/kernel_util.cc +++ b/tensorflow/lite/micro/kernels/kernel_util.cc @@ -42,5 +42,12 @@ const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor) { return RuntimeShape(dims_size, dims_data); } +bool HaveSameShapes(const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2) { + TFLITE_DCHECK(input1 != nullptr); + TFLITE_DCHECK(input2 != nullptr); + return TfLiteIntArrayEqual(input1->dims, input2->dims); +} + } // namespace micro } // namespace tflite diff --git a/tensorflow/lite/micro/kernels/kernel_util.h b/tensorflow/lite/micro/kernels/kernel_util.h index baf3d2464bb..a3af8da91a1 100644 --- a/tensorflow/lite/micro/kernels/kernel_util.h +++ b/tensorflow/lite/micro/kernels/kernel_util.h @@ -46,6 +46,10 @@ const T* GetTensorData(const TfLiteEvalTensor* tensor) { // Returns the shape of a TfLiteEvalTensor struct. const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor); +// Return true if the given tensors have the same shape. +bool HaveSameShapes(const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2); + } // namespace micro } // namespace tflite From a1942e365c306f2938fd2bab0e90f90d459c058b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 23 Jul 2020 12:28:11 -0700 Subject: [PATCH 1187/2522] tracing support for nccl collectives. PiperOrigin-RevId: 322841149 Change-Id: Idd0958d129f7f4602c82c61747766e33cb157634 --- tensorflow/core/nccl/BUILD | 1 + tensorflow/core/nccl/nccl_manager.cc | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/tensorflow/core/nccl/BUILD b/tensorflow/core/nccl/BUILD index 3acf7579f62..a63e862c621 100644 --- a/tensorflow/core/nccl/BUILD +++ b/tensorflow/core/nccl/BUILD @@ -44,6 +44,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:stream_executor", "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/core/profiler/lib:connected_traceme", ]), alwayslink = 1, ) diff --git a/tensorflow/core/nccl/nccl_manager.cc b/tensorflow/core/nccl/nccl_manager.cc index 68af9fbb2f0..619885e9f34 100644 --- a/tensorflow/core/nccl/nccl_manager.cc +++ b/tensorflow/core/nccl/nccl_manager.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/core/lib/core/refcount.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/platform/env.h" +#include "tensorflow/core/profiler/lib/connected_traceme.h" #include "tensorflow/core/profiler/lib/traceme.h" #if GOOGLE_CUDA #include "tensorflow/stream_executor/cuda/cuda_activation.h" @@ -213,6 +214,10 @@ struct NcclManager::Collective : public core::RefCounted { // Guarded by the mutex of the containing Communicator. int available_participants = 0; bool multi_node_ready = false; + // trace_context is used by tracing system to associate collective + // scheduling and execution (cooperative kernel launch), which happen + // on different threads. + uint64 trace_context = 0; Status status; }; @@ -591,6 +596,10 @@ bool NcclManager::CheckReady(const string& collective_key, } void NcclManager::RunCollective(Collective* collective) { + // For TraceMeConsumer in Connection::RPCDone(). + tensorflow::profiler::TraceMeProducer traceme("Schedule Collective"); + collective->trace_context = traceme.GetContextId(); + static mutex collective_mu(LINKER_INITIALIZED); Status status = collective->status; @@ -686,6 +695,9 @@ void NcclManager::LoopKernelLaunches(NcclStream* nccl_stream) { // Launch the nccl kernel. Collective* collective = next_launch.first; + tensorflow::profiler::TraceMeConsumer traceme("Run Collective", + collective->trace_context); + ncclDataType_t data_type = ToNcclType(collective->data_type); int p_idx = next_launch.second; Participant* p = collective->participants[p_idx].get(); @@ -701,6 +713,7 @@ void NcclManager::LoopKernelLaunches(NcclStream* nccl_stream) { << " sendbuff " << sendbuff << " recvbuff " << recvbuff << " nccl_comm " << nccl_comm << " comm_stream " << comm_stream << " cuda_stream " << cu_stream; + profiler::TraceMe trace_me("ncclAllReduce"); nccl_result = ncclAllReduce(sendbuff, recvbuff, p->input->NumElements(), data_type, collective->reduction_op, nccl_comm, *cu_stream); @@ -732,6 +745,7 @@ void NcclManager::LoopKernelLaunches(NcclStream* nccl_stream) { << " sendbuff " << sendbuff << " recvbuff " << recvbuff << " nccl_comm " << nccl_comm << " comm_stream " << comm_stream << " cuda_stream " << cu_stream; + profiler::TraceMe trace_me("ncclBroadcast"); nccl_result = ncclBroadcast(sendbuff, recvbuff, num_elements, data_type, collective->root_rank, nccl_comm, *cu_stream); @@ -742,6 +756,7 @@ void NcclManager::LoopKernelLaunches(NcclStream* nccl_stream) { void* recvbuff = p->output ? const_cast(p->output->tensor_data().data()) : nullptr; + profiler::TraceMe trace_me("ncclReduce"); nccl_result = ncclReduce(sendbuff, recvbuff, p->input->NumElements(), data_type, collective->reduction_op, collective->root_rank, nccl_comm, *cu_stream); @@ -758,6 +773,7 @@ void NcclManager::LoopKernelLaunches(NcclStream* nccl_stream) { << " recvcount " << p->output->NumElements() << " nccl_comm " << nccl_comm << " comm_stream " << comm_stream << " cuda_stream " << cu_stream; + profiler::TraceMe trace_me("ncclAllGather"); nccl_result = ncclAllGather(sendbuff, recvbuff, p->input->NumElements(), data_type, nccl_comm, *cu_stream); break; From 56aa1f7b5845e7ced80b37f8ccc383ebcbc372fa Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Thu, 23 Jul 2020 13:00:52 -0700 Subject: [PATCH 1188/2522] Add Transactional API python bindings --- tensorflow/python/lib/io/file_io_wrapper.cc | 346 ++++++++++++-------- 1 file changed, 206 insertions(+), 140 deletions(-) diff --git a/tensorflow/python/lib/io/file_io_wrapper.cc b/tensorflow/python/lib/io/file_io_wrapper.cc index 54b06d69559..52a4b5b9d53 100644 --- a/tensorflow/python/lib/io/file_io_wrapper.cc +++ b/tensorflow/python/lib/io/file_io_wrapper.cc @@ -32,119 +32,175 @@ limitations under the License. #include "tensorflow/python/lib/core/pybind11_absl.h" #include "tensorflow/python/lib/core/pybind11_status.h" +namespace tensorflow { +struct PyTransactionToken { + TransactionToken* token_; +}; + +inline TransactionToken* TokenFromPyToken(PyTransactionToken* t) { + return (t ? t->token_ : nullptr); +} +} // namespace tensorflow + namespace { namespace py = pybind11; PYBIND11_MODULE(_pywrap_file_io, m) { - m.def("FileExists", [](const std::string& filename) { - tensorflow::Status status; - { - py::gil_scoped_release release; - status = tensorflow::Env::Default()->FileExists(filename); - } - tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); - }); - m.def("DeleteFile", [](const std::string& filename) { - py::gil_scoped_release release; - tensorflow::Status status = - tensorflow::Env::Default()->DeleteFile(filename); - tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); - }); - m.def("ReadFileToString", [](const std::string& filename) { - std::string data; - py::gil_scoped_release release; - const auto status = - ReadFileToString(tensorflow::Env::Default(), filename, &data); - pybind11::gil_scoped_acquire acquire; - tensorflow::MaybeRaiseRegisteredFromStatus(status); - return py::bytes(data); - }); - m.def("WriteStringToFile", - [](const std::string& filename, tensorflow::StringPiece data) { - py::gil_scoped_release release; - const auto status = - WriteStringToFile(tensorflow::Env::Default(), filename, data); - tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); - }); - m.def("GetChildren", [](const std::string& dirname) { - std::vector results; - py::gil_scoped_release release; - const auto status = - tensorflow::Env::Default()->GetChildren(dirname, &results); - pybind11::gil_scoped_acquire acquire; - tensorflow::MaybeRaiseRegisteredFromStatus(status); - return results; - }); - m.def("GetMatchingFiles", [](const std::string& pattern) { - std::vector results; - py::gil_scoped_release release; - const auto status = - tensorflow::Env::Default()->GetMatchingPaths(pattern, &results); - pybind11::gil_scoped_acquire acquire; - tensorflow::MaybeRaiseRegisteredFromStatus(status); - return results; - }); - m.def("CreateDir", [](const std::string& dirname) { - py::gil_scoped_release release; - const auto status = tensorflow::Env::Default()->CreateDir(dirname); - if (tensorflow::errors::IsAlreadyExists(status)) { - return; - } - tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); - }); - m.def("RecursivelyCreateDir", [](const std::string& dirname) { - py::gil_scoped_release release; - const auto status = - tensorflow::Env::Default()->RecursivelyCreateDir(dirname); - tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); - }); - m.def("CopyFile", - [](const std::string& src, const std::string& target, bool overwrite) { - py::gil_scoped_release release; - auto* env = tensorflow::Env::Default(); - tensorflow::Status status; - if (!overwrite && env->FileExists(target).ok()) { - status = tensorflow::errors::AlreadyExists("file already exists"); - } else { - status = env->CopyFile(src, target); - } - tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); - }); - m.def("RenameFile", - [](const std::string& src, const std::string& target, bool overwrite) { - py::gil_scoped_release release; - auto* env = tensorflow::Env::Default(); - tensorflow::Status status; - if (!overwrite && env->FileExists(target).ok()) { - status = tensorflow::errors::AlreadyExists("file already exists"); - } else { - status = env->RenameFile(src, target); - } - tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); - }); - m.def("DeleteRecursively", [](const std::string& dirname) { - py::gil_scoped_release release; - tensorflow::int64 undeleted_files; - tensorflow::int64 undeleted_dirs; - auto status = tensorflow::Env::Default()->DeleteRecursively( - dirname, &undeleted_files, &undeleted_dirs); - if (status.ok() && (undeleted_files > 0 || undeleted_dirs > 0)) { - status = - tensorflow::errors::PermissionDenied("could not fully delete dir"); - } - tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); - }); - m.def("IsDirectory", [](const std::string& dirname) { - py::gil_scoped_release release; - const auto status = tensorflow::Env::Default()->IsDirectory(dirname); - // FAILED_PRECONDITION response means path exists but isn't a dir. - if (tensorflow::errors::IsFailedPrecondition(status)) { - return false; - } + using tensorflow::PyTransactionToken; + using tensorflow::TransactionToken; + py::class_(m, "TransactionToken") + .def("__repr__", [](const PyTransactionToken* t) { + if (t->token_) { + return std::string(t->token_->owner->DecodeTransaction(t->token_)); + } + return std::string("Invalid token!"); + }); - tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); - return true; - }); + m.def( + "FileExists", + [](const std::string& filename, PyTransactionToken* token) { + tensorflow::Status status; + { + py::gil_scoped_release release; + status = tensorflow::Env::Default()->FileExists(filename); + } + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); + }, + py::arg("filename"), py::arg("token") = (PyTransactionToken*)nullptr); + m.def( + "DeleteFile", + [](const std::string& filename, PyTransactionToken* token) { + py::gil_scoped_release release; + tensorflow::Status status = + tensorflow::Env::Default()->DeleteFile(filename); + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); + }, + py::arg("filename"), py::arg("token") = (PyTransactionToken*)nullptr); + m.def( + "ReadFileToString", + [](const std::string& filename, PyTransactionToken* token) { + std::string data; + py::gil_scoped_release release; + const auto status = + ReadFileToString(tensorflow::Env::Default(), filename, &data); + pybind11::gil_scoped_acquire acquire; + tensorflow::MaybeRaiseRegisteredFromStatus(status); + return py::bytes(data); + }, + py::arg("filename"), py::arg("token") = (PyTransactionToken*)nullptr); + m.def( + "WriteStringToFile", + [](const std::string& filename, tensorflow::StringPiece data, PyTransactionToken* token) { + py::gil_scoped_release release; + const auto status = + WriteStringToFile(tensorflow::Env::Default(), filename, data); + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); + }, + py::arg("filename"), py::arg("data"), + py::arg("token") = (PyTransactionToken*)nullptr); + m.def( + "GetChildren", + [](const std::string& dirname, PyTransactionToken* token) { + std::vector results; + py::gil_scoped_release release; + const auto status = + tensorflow::Env::Default()->GetChildren(dirname, &results); + pybind11::gil_scoped_acquire acquire; + tensorflow::MaybeRaiseRegisteredFromStatus(status); + return results; + }, + py::arg("dirname"), py::arg("token") = (PyTransactionToken*)nullptr); + m.def( + "GetMatchingFiles", + [](const std::string& pattern, PyTransactionToken* token) { + std::vector results; + py::gil_scoped_release release; + const auto status = + tensorflow::Env::Default()->GetMatchingPaths(pattern, &results); + pybind11::gil_scoped_acquire acquire; + tensorflow::MaybeRaiseRegisteredFromStatus(status); + return results; + }, + py::arg("pattern"), py::arg("token") = (PyTransactionToken*)nullptr); + m.def( + "CreateDir", + [](const std::string& dirname, PyTransactionToken* token) { + py::gil_scoped_release release; + const auto status = tensorflow::Env::Default()->CreateDir(dirname); + if (tensorflow::errors::IsAlreadyExists(status)) { + return; + } + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); + }, + py::arg("dirname"), py::arg("token") = (PyTransactionToken*)nullptr); + m.def( + "RecursivelyCreateDir", + [](const std::string& dirname, PyTransactionToken* token) { + py::gil_scoped_release release; + const auto status = + tensorflow::Env::Default()->RecursivelyCreateDir(dirname); + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); + }, + py::arg("dirname"), py::arg("token") = (PyTransactionToken*)nullptr); + m.def( + "CopyFile", + [](const std::string& src, const std::string& target, bool overwrite, PyTransactionToken* token) { + py::gil_scoped_release release; + auto* env = tensorflow::Env::Default(); + tensorflow::Status status; + if (!overwrite && env->FileExists(target).ok()) { + status = tensorflow::errors::AlreadyExists("file already exists"); + } else { + status = env->CopyFile(src, target); + } + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); + }, + py::arg("src"), py::arg("target"), py::arg("overwrite"), + py::arg("token") = (PyTransactionToken*)nullptr); + m.def( + "RenameFile", + [](const std::string& src, const std::string& target, bool overwrite, PyTransactionToken* token) { + py::gil_scoped_release release; + auto* env = tensorflow::Env::Default(); + tensorflow::Status status; + if (!overwrite && env->FileExists(target).ok()) { + status = tensorflow::errors::AlreadyExists("file already exists"); + } else { + status = env->RenameFile(src, target); + } + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); + }, + py::arg("src"), py::arg("target"), py::arg("overwrite"), + py::arg("token") = (PyTransactionToken*)nullptr); + m.def( + "DeleteRecursively", + [](const std::string& dirname, PyTransactionToken* token) { + py::gil_scoped_release release; + tensorflow::int64 undeleted_files; + tensorflow::int64 undeleted_dirs; + auto status = tensorflow::Env::Default()->DeleteRecursively( + dirname, &undeleted_files, &undeleted_dirs); + if (status.ok() && (undeleted_files > 0 || undeleted_dirs > 0)) { + status = tensorflow::errors::PermissionDenied( + "could not fully delete dir"); + } + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); + }, + py::arg("dirname"), py::arg("token") = (PyTransactionToken*)nullptr); + m.def( + "IsDirectory", + [](const std::string& dirname, PyTransactionToken* token) { + py::gil_scoped_release release; + const auto status = tensorflow::Env::Default()->IsDirectory(dirname); + // FAILED_PRECONDITION response means path exists but isn't a dir. + if (tensorflow::errors::IsFailedPrecondition(status)) { + return false; + } + + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); + return true; + }, + py::arg("dirname"), py::arg("token") = (PyTransactionToken*)nullptr); m.def("HasAtomicMove", [](const std::string& path) { py::gil_scoped_release release; bool has_atomic_move; @@ -159,29 +215,35 @@ PYBIND11_MODULE(_pywrap_file_io, m) { .def_readonly("mtime_nsec", &tensorflow::FileStatistics::mtime_nsec) .def_readonly("is_directory", &tensorflow::FileStatistics::is_directory); - m.def("Stat", [](const std::string& filename) { - py::gil_scoped_release release; - std::unique_ptr self( - new tensorflow::FileStatistics); - const auto status = tensorflow::Env::Default()->Stat(filename, self.get()); - py::gil_scoped_acquire acquire; - tensorflow::MaybeRaiseRegisteredFromStatus(status); - return self.release(); - }); - - using tensorflow::WritableFile; - py::class_(m, "WritableFile") - .def(py::init([](const std::string& filename, const std::string& mode) { + m.def( + "Stat", + [](const std::string& filename, PyTransactionToken* token) { py::gil_scoped_release release; - auto* env = tensorflow::Env::Default(); - std::unique_ptr self; - const auto status = mode.find("a") == std::string::npos - ? env->NewWritableFile(filename, &self) - : env->NewAppendableFile(filename, &self); + std::unique_ptr self( + new tensorflow::FileStatistics); + const auto status = + tensorflow::Env::Default()->Stat(filename, self.get()); py::gil_scoped_acquire acquire; tensorflow::MaybeRaiseRegisteredFromStatus(status); return self.release(); - })) + }, + py::arg("filename"), py::arg("token") = (PyTransactionToken*)nullptr); + + using tensorflow::WritableFile; + py::class_(m, "WritableFile") + .def(py::init([](const std::string& filename, const std::string& mode, PyTransactionToken* token) { + py::gil_scoped_release release; + auto* env = tensorflow::Env::Default(); + std::unique_ptr self; + const auto status = mode.find("a") == std::string::npos + ? env->NewWritableFile(filename, &self) + : env->NewAppendableFile(filename, &self); + py::gil_scoped_acquire acquire; + tensorflow::MaybeRaiseRegisteredFromStatus(status); + return self.release(); + }), + py::arg("filename"), py::arg("mode"), + py::arg("token") = (PyTransactionToken*)nullptr) .def("append", [](WritableFile* self, tensorflow::StringPiece data) { const auto status = self->Append(data); @@ -209,19 +271,23 @@ PYBIND11_MODULE(_pywrap_file_io, m) { using tensorflow::io::BufferedInputStream; py::class_(m, "BufferedInputStream") - .def(py::init([](const std::string& filename, size_t buffer_size) { - py::gil_scoped_release release; - std::unique_ptr file; - const auto status = - tensorflow::Env::Default()->NewRandomAccessFile(filename, &file); - tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); - std::unique_ptr input_stream( - new tensorflow::io::RandomAccessInputStream(file.release(), - /*owns_file=*/true)); - py::gil_scoped_acquire acquire; - return new BufferedInputStream(input_stream.release(), buffer_size, - /*owns_input_stream=*/true); - })) + .def(py::init([](const std::string& filename, size_t buffer_size, PyTransactionToken* token) { + py::gil_scoped_release release; + std::unique_ptr file; + const auto status = + tensorflow::Env::Default()->NewRandomAccessFile(filename, + &file); + tensorflow::MaybeRaiseRegisteredFromStatusWithGIL(status); + std::unique_ptr + input_stream(new tensorflow::io::RandomAccessInputStream( + file.release(), + /*owns_file=*/true)); + py::gil_scoped_acquire acquire; + return new BufferedInputStream(input_stream.release(), buffer_size, + /*owns_input_stream=*/true); + }), + py::arg("filename"), py::arg("buffer_size"), + py::arg("token") = (PyTransactionToken*)nullptr) .def("read", [](BufferedInputStream* self, tensorflow::int64 bytes_to_read) { py::gil_scoped_release release; From 86617e591a5eea38935a76c983c20458eabbdf2b Mon Sep 17 00:00:00 2001 From: Robert David Date: Thu, 23 Jul 2020 12:42:13 -0700 Subject: [PATCH 1189/2522] LSTM test cleanup: Add test parameter for 20 or 24 inputs. This change increases test coverage, previously only NoCifg_NoPeephole_NoProjection_NoLayerNorm was tested with 24 inputs, other NoLayerNorm tests were only tested with 20. PiperOrigin-RevId: 322844245 Change-Id: I194c7806ea8b81c2fdf38116287f04588c2ff157 --- .../delegates/nnapi/acceleration_test_list.cc | 12 +++- tensorflow/lite/kernels/lstm_test.cc | 65 ++++++------------- 2 files changed, 30 insertions(+), 47 deletions(-) diff --git a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc index 1167082f217..5c54a4447b2 100644 --- a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc +++ b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc @@ -247,12 +247,18 @@ CifgPeepholeNoProjectionNoClippingUnidirectionalLstmTest/NonLayerNormLstmBlackBo # Float .+_LstmOpTest\.Test/0,29 .+_LstmOpTest\.Test/1,29 -# HybridUint8 .+_LstmOpTest\.Test/2,29 .+_LstmOpTest\.Test/3,29 +# HybridUint8 +.+_LstmOpTest\.Test/4,29 +.+_LstmOpTest\.Test/5,29 +.+_LstmOpTest\.Test/6,29 +.+_LstmOpTest\.Test/7,29 # HybridInt8 --.+_LstmOpTest\.Test/4 --.+_LstmOpTest\.Test/5 +-.+_LstmOpTest\.Test/8 +-.+_LstmOpTest\.Test/9 +-.+_LstmOpTest\.Test/10 +-.+_LstmOpTest\.Test/11 # maximum_minimum_test MaxMinOpTest/.+nt8Test,29 diff --git a/tensorflow/lite/kernels/lstm_test.cc b/tensorflow/lite/kernels/lstm_test.cc index 1687e543f57..48ca90496ba 100644 --- a/tensorflow/lite/kernels/lstm_test.cc +++ b/tensorflow/lite/kernels/lstm_test.cc @@ -300,9 +300,10 @@ class LSTMOpModel : public SingleOpModel { // Parameters: // std::get<0>(GetParam()) => weight_type -// std::get<1>(GetParam()) => asymmetric_quantize_inputs +// std::get<1>(GetParam()) => model_has_legacy_20_inputs +// std::get<2>(GetParam()) => asymmetric_quantize_inputs class BaseLstmOpTest - : public ::testing::TestWithParam> { + : public ::testing::TestWithParam> { protected: // Weights of the LSTM model. Some are optional. std::vector input_to_input_weights_; @@ -450,8 +451,7 @@ class NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest } }; -TEST_P(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, - TestWith20Inputs) { +TEST_P(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, Test) { const int n_batch = 1; const int n_input = 2; // n_cell and n_output have the same size when there is no projection. @@ -459,8 +459,10 @@ TEST_P(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, const int n_output = 4; TensorType weight_type; + bool model_has_legacy_20_inputs; bool asymmetric_quantize_inputs; - std::tie(weight_type, asymmetric_quantize_inputs) = GetParam(); + std::tie(weight_type, model_has_legacy_20_inputs, + asymmetric_quantize_inputs) = GetParam(); // TODO(b/158205028): Fix this test if using NN-API. if (SingleOpModel::GetForceUseNnapi() && weight_type == TensorType_UINT8) { @@ -471,38 +473,7 @@ TEST_P(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, /*use_cifg=*/false, /*use_peephole=*/false, /*use_projection_weights=*/false, /*use_projection_bias=*/false, weight_type, - /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, - asymmetric_quantize_inputs); - - static const auto* tolerance_per_type = - new std::map{{TensorType_FLOAT32, 0.00001f}, - {TensorType_UINT8, 0.0157651f}, - {TensorType_INT8, 0.0157651f}}; - VerifyGoldens(&lstm, tolerance_per_type->at(weight_type)); -} - -TEST_P(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, - TestWith24Inputs) { - const int n_batch = 1; - const int n_input = 2; - // n_cell and n_output have the same size when there is no projection. - const int n_cell = 4; - const int n_output = 4; - - TensorType weight_type; - bool asymmetric_quantize_inputs; - std::tie(weight_type, asymmetric_quantize_inputs) = GetParam(); - - // TODO(b/158205028): Fix this test if using NN-API. - if (SingleOpModel::GetForceUseNnapi() && weight_type == TensorType_UINT8) { - return; - } - - LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, - /*use_cifg=*/false, /*use_peephole=*/false, - /*use_projection_weights=*/false, - /*use_projection_bias=*/false, weight_type, - /*model_has_legacy_20_inputs=*/false, + model_has_legacy_20_inputs, /*is_layer_norm=*/false, asymmetric_quantize_inputs); static const auto* tolerance_per_type = @@ -568,8 +539,10 @@ TEST_P(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest, Test) { const int n_output = 4; TensorType weight_type; + bool model_has_legacy_20_inputs; bool asymmetric_quantize_inputs; - std::tie(weight_type, asymmetric_quantize_inputs) = GetParam(); + std::tie(weight_type, model_has_legacy_20_inputs, + asymmetric_quantize_inputs) = GetParam(); // TODO(b/158205028): Fix this test if using NN-API. if (SingleOpModel::GetForceUseNnapi() && weight_type == TensorType_UINT8) { @@ -580,7 +553,7 @@ TEST_P(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest, Test) { /*use_cifg=*/true, /*use_peephole=*/true, /*use_projection_weights=*/false, /*use_projection_bias=*/false, weight_type, - /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, + model_has_legacy_20_inputs, /*is_layer_norm=*/false, asymmetric_quantize_inputs); static const auto* tolerance_per_type = @@ -1197,8 +1170,10 @@ TEST_P(NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest, Test) { const int n_output = 16; TensorType weight_type; + bool model_has_legacy_20_inputs; bool asymmetric_quantize_inputs; - std::tie(weight_type, asymmetric_quantize_inputs) = GetParam(); + std::tie(weight_type, model_has_legacy_20_inputs, + asymmetric_quantize_inputs) = GetParam(); // TODO(b/158205028): Fix this test if using NN-API. if (SingleOpModel::GetForceUseNnapi() && weight_type == TensorType_UINT8) { @@ -1209,7 +1184,7 @@ TEST_P(NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest, Test) { /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, /*use_projection_bias=*/false, weight_type, - /*model_has_legacy_20_inputs=*/true, /*is_layer_norm=*/false, + model_has_legacy_20_inputs, /*is_layer_norm=*/false, asymmetric_quantize_inputs); static const auto* tolerance_per_type = new std::map{ @@ -1306,8 +1281,9 @@ TEST_P(NoCifg_Peephole_Projection_LayerNorm_LstmOpTest, Test) { const int n_output = 3; TensorType weight_type; + // Layer normalization needs 24 inputs. bool asymmetric_quantize_inputs; - std::tie(weight_type, asymmetric_quantize_inputs) = GetParam(); + std::tie(weight_type, std::ignore, asymmetric_quantize_inputs) = GetParam(); // TODO(b/158205028): Fix this test if using NN-API. if (SingleOpModel::GetForceUseNnapi() && weight_type == TensorType_UINT8) { @@ -1394,8 +1370,9 @@ TEST_P(Cifg_Peephole_Projection_LayerNorm_LstmOpTest, Test) { const int n_output = 3; TensorType weight_type; + // Layer normalization needs 24 inputs. bool asymmetric_quantize_inputs; - std::tie(weight_type, asymmetric_quantize_inputs) = GetParam(); + std::tie(weight_type, std::ignore, asymmetric_quantize_inputs) = GetParam(); // TODO(b/158205028): Fix this test if using NN-API. if (SingleOpModel::GetForceUseNnapi() && weight_type == TensorType_UINT8) { @@ -2228,7 +2205,7 @@ TEST(LstmOpTest, InvalidTypes) { ::testing::Combine( \ ::testing::Values(TensorType_FLOAT32, TensorType_UINT8, \ TensorType_UINT8), \ - ::testing::Bool())) + ::testing::Bool(), ::testing::Bool())) QUANTIZE_PARAMETER_TEST(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest); QUANTIZE_PARAMETER_TEST(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest); From eb6f0a6bd6afc5268c2651e4661a95309fa956eb Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Thu, 23 Jul 2020 12:56:34 -0700 Subject: [PATCH 1190/2522] Port the concatenation test to the new TfLiteEvalTensor API. PiperOrigin-RevId: 322847314 Change-Id: Ia094012237fe2fffdb2b1637b40f8f4fe30e8155 --- tensorflow/lite/micro/kernels/BUILD | 1 + .../lite/micro/kernels/concatenation.cc | 54 +++++++------ .../lite/micro/kernels/concatenation_test.cc | 80 ++++++------------- 3 files changed, 55 insertions(+), 80 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 78e66a35197..82defba370f 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -548,6 +548,7 @@ tflite_micro_cc_test( "concatenation_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:op_resolvers", "//tensorflow/lite/micro/testing:micro_test", diff --git a/tensorflow/lite/micro/kernels/concatenation.cc b/tensorflow/lite/micro/kernels/concatenation.cc index e6e56a5c32c..fb47349f283 100644 --- a/tensorflow/lite/micro/kernels/concatenation.cc +++ b/tensorflow/lite/micro/kernels/concatenation.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/internal/types.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -49,12 +50,14 @@ inline int CalculatePositiveAxis(int axis, const TfLiteTensor* output_tensor) { // class VectorOfTensors and class VectorOfQuantizedTensors in TFLite. // Gets shapes from a list of tensors. -inline void GetAllTensorShapes(const TfLiteContext& context, - const TfLiteIntArray& tensor_list, - RuntimeShape all_shapes[kMaxInputNum]) { - for (int i = 0; i < tensor_list.size; ++i) { - const TfLiteTensor* t = &context.tensors[tensor_list.data[i]]; - RuntimeShape shape = GetTensorShape(t); +inline void GetAllInputTensorShapes(const TfLiteContext* context, + const TfLiteNode* node, + RuntimeShape all_shapes[kMaxInputNum]) { + TFLITE_DCHECK(context != nullptr); + TFLITE_DCHECK(node != nullptr); + for (int i = 0; i < node->inputs->size; ++i) { + const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i); + RuntimeShape shape = tflite::micro::GetTensorShape(t); all_shapes[i].ReplaceWith(shape.DimensionsCount(), shape.DimsData()); } } @@ -69,12 +72,14 @@ inline void GetShapesPointers(const RuntimeShape* shapes, size_t num, // Gets data pointers from a list of tensors. template -inline void GetAllTensorData(const TfLiteContext& context, - const TfLiteIntArray& tensor_list, - T* all_data[kMaxInputNum]) { - for (int i = 0; i < tensor_list.size; ++i) { - const TfLiteTensor* t = &context.tensors[tensor_list.data[i]]; - all_data[i] = GetTensorData(t); +inline void GetAllInputTensorData(const TfLiteContext* context, + const TfLiteNode* node, + T* all_data[kMaxInputNum]) { + TFLITE_DCHECK(context != nullptr); + TFLITE_DCHECK(node != nullptr); + for (int i = 0; i < node->inputs->size; ++i) { + const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i); + all_data[i] = tflite::micro::GetTensorData(t); } } @@ -84,18 +89,19 @@ void EvalUnquantized(TfLiteContext* context, TfLiteNode* node) { RuntimeShape inputs_shape[kMaxInputNum]; const RuntimeShape* inputs_shape_ptr[kMaxInputNum]; const data_type* inputs_data[kMaxInputNum]; - GetAllTensorShapes(*context, *node->inputs, inputs_shape); + GetAllInputTensorShapes(context, node, inputs_shape); GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr); - GetAllTensorData(*context, *node->inputs, inputs_data); + GetAllInputTensorData(context, node, inputs_data); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); TFLITE_DCHECK(node->user_data != nullptr); const OpData* data = static_cast(node->user_data); reference_ops::Concatenation(data->params, inputs_shape_ptr, inputs_data, - GetTensorShape(output), - GetTensorData(output)); + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) { @@ -103,18 +109,20 @@ void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) { RuntimeShape inputs_shape[kMaxInputNum]; const RuntimeShape* inputs_shape_ptr[kMaxInputNum]; const uint8_t* inputs_data[kMaxInputNum]; - GetAllTensorShapes(*context, *node->inputs, inputs_shape); + GetAllInputTensorShapes(context, node, inputs_shape); GetShapesPointers(inputs_shape, node->inputs->size, inputs_shape_ptr); - GetAllTensorData(*context, *node->inputs, inputs_data); + GetAllInputTensorData(context, node, inputs_data); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); TFLITE_DCHECK(node->user_data != nullptr); const OpData* data = static_cast(node->user_data); - reference_ops::ConcatenationWithScaling(data->params, inputs_shape_ptr, - inputs_data, GetTensorShape(output), - GetTensorData(output)); + reference_ops::ConcatenationWithScaling( + data->params, inputs_shape_ptr, inputs_data, + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } void* Init(TfLiteContext* context, const char* buffer, size_t length) { diff --git a/tensorflow/lite/micro/kernels/concatenation_test.cc b/tensorflow/lite/micro/kernels/concatenation_test.cc index e9ca6c93e81..c3fa395600f 100644 --- a/tensorflow/lite/micro/kernels/concatenation_test.cc +++ b/tensorflow/lite/micro/kernels/concatenation_test.cc @@ -16,7 +16,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -44,41 +44,24 @@ void TestConcatenateTwoInputs(std::initializer_list input1_dims_data, CreateFloatTensor(input2_data, input2_dims), CreateFloatTensor(output_data, output_dims)}; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_CONCATENATION); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + int inputs_array_data[] = {2, 0, 1}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 2}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); TfLiteConcatenationParams builtin_data = { .axis = axis, .activation = kTfLiteActNone // Only activation supported in this impl }; - int inputs_array_data[] = {2, 0, 1}; - TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - int outputs_array_data[] = {1, 2}; - TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + const TfLiteRegistration registration = + tflite::ops::micro::Register_CONCATENATION(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), micro_test::reporter); - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, /*buffer=*/nullptr, /*length=*/0); - } - - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); const int output_dims_count = ElementCount(*output_dims); for (int i = 0; i < output_dims_count; ++i) { @@ -107,41 +90,24 @@ void TestConcatenateQuantizedTwoInputs( CreateQuantizedTensor(input2_data, input2_dims, input_min, input_max), CreateQuantizedTensor(output_data, output_dims, output_min, output_max)}; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_CONCATENATION); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + int inputs_array_data[] = {2, 0, 1}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 2}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); TfLiteConcatenationParams builtin_data = { .axis = axis, .activation = kTfLiteActNone // Only activation supported in this impl }; - int inputs_array_data[] = {2, 0, 1}; - TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - int outputs_array_data[] = {1, 2}; - TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + const TfLiteRegistration registration = + tflite::ops::micro::Register_CONCATENATION(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), micro_test::reporter); - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, /*buffer=*/nullptr, /*length=*/0); - } - - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); const int output_dims_count = ElementCount(*output_dims); for (int i = 0; i < output_dims_count; ++i) { From a41ca213d70b0ee099a4ab893418a5e884a678df Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 23 Jul 2020 13:06:17 -0700 Subject: [PATCH 1191/2522] Update tensorboard nightly dependency to 2.4.x - 3 Now that TensorBoard 2.3.0 has been released, our nightlies have moved up to 2.4.x alphas. tb-nightly alphas starting from 2.4.0a0 are now synced to TensorFlow's major versions, not minor versions. PiperOrigin-RevId: 322849529 Change-Id: I227c754fb553fd7b2a986ff25dcbcd0ce36b86f3 --- tensorflow/tools/pip_package/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index d21bcfbfc8b..4b03ccd3ee8 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -89,7 +89,7 @@ if '--project_name' in sys.argv: if 'tf_nightly' in project_name: for i, pkg in enumerate(REQUIRED_PACKAGES): if 'tensorboard' in pkg: - REQUIRED_PACKAGES[i] = 'tb-nightly >= 2.3.0a0, < 2.4.0a0' + REQUIRED_PACKAGES[i] = 'tb-nightly >= 2.4.0a0, < 3.0.0a0' elif 'tensorflow_estimator' in pkg: REQUIRED_PACKAGES[i] = 'tf-estimator-nightly' From f5d98d0171f4043d40dc6de516d36b894044d49a Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 23 Jul 2020 20:17:23 +0000 Subject: [PATCH 1192/2522] Fix incorrect mapping in fast tensor <=> np mapping This PR fixes the incorrect mapping in fast tensor <=> np mapping See diff below: ```diff dtypes.qint16.as_numpy_dtype: - fast_tensor_util.AppendInt8ArrayToTensorProto, + fast_tensor_util.AppendInt16ArrayToTensorProto, dtypes.quint16.as_numpy_dtype: - fast_tensor_util.AppendUInt8ArrayToTensorProto, + fast_tensor_util.AppendUInt16ArrayToTensorProto, ``` Signed-off-by: Yong Tang --- tensorflow/python/framework/tensor_util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index 6289ee5b3e8..9b30d053c79 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -114,9 +114,9 @@ if _FAST_TENSOR_UTIL_AVAILABLE: dtypes.quint8.as_numpy_dtype: fast_tensor_util.AppendUInt8ArrayToTensorProto, dtypes.qint16.as_numpy_dtype: - fast_tensor_util.AppendInt8ArrayToTensorProto, + fast_tensor_util.AppendInt16ArrayToTensorProto, dtypes.quint16.as_numpy_dtype: - fast_tensor_util.AppendUInt8ArrayToTensorProto, + fast_tensor_util.AppendUInt16ArrayToTensorProto, dtypes.qint32.as_numpy_dtype: fast_tensor_util.AppendInt32ArrayToTensorProto, # NOTE(touts): Intentionally no way to feed a DT_BFLOAT16. From d8a49c8dacbea40420f4e1bd4ab1a533e2d14c47 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Thu, 23 Jul 2020 13:07:20 -0700 Subject: [PATCH 1193/2522] Functional model get_config/from_config loses potentially important information about the data structure in the first positional argument to the model. E.g. it can't distinguish between layer(x) and layer([x]) This CL makes sure this information gets preserved at the very least for TFOpLambda layers, because it is important for some tf apis such as tf.stack. Fixing this longer-term for all layers will require care to not break existing configs. PiperOrigin-RevId: 322849759 Change-Id: I3db78e9d6d4543a41baeb27dfab6c5a64564abd3 --- tensorflow/python/keras/engine/base_layer.py | 6 ++ .../python/keras/engine/base_layer_v1.py | 6 ++ tensorflow/python/keras/engine/functional.py | 4 +- tensorflow/python/keras/engine/node.py | 3 +- tensorflow/python/keras/layers/core.py | 4 ++ .../keras/layers/tensorflow_op_layer_test.py | 64 ++++++++++++++++--- 6 files changed, 77 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py index e7c394e614d..ac6c1a9a739 100644 --- a/tensorflow/python/keras/engine/base_layer.py +++ b/tensorflow/python/keras/engine/base_layer.py @@ -414,6 +414,12 @@ class Layer(module.Module, version_utils.LayerVersionSelector): # might want to turn it off, like Sequential model. self._auto_track_sub_layers = True + # For backwards compat reasons, most built-in layers do not guarantee + # That they will 100% preserve the structure of input args when saving + # / loading configs. E.g. they may un-nest an arg that is + # a list with one element. + self._preserve_input_structure_in_config = False + @trackable.no_automatic_dependency_tracking @generic_utils.default def build(self, input_shape): diff --git a/tensorflow/python/keras/engine/base_layer_v1.py b/tensorflow/python/keras/engine/base_layer_v1.py index e3cc738c434..9822094df26 100644 --- a/tensorflow/python/keras/engine/base_layer_v1.py +++ b/tensorflow/python/keras/engine/base_layer_v1.py @@ -255,6 +255,12 @@ class Layer(base_layer.Layer): # Mark this layer as having been originally built as a tf1 layer/model self._originally_built_as_v1 = True + # For backwards compat reasons, most built-in layers do not guarantee + # That they will 100% preserve the structure of input args when saving + # / loading configs. E.g. they may un-nest an arg that is + # a list with one element. + self._preserve_input_structure_in_config = False + @trackable.no_automatic_dependency_tracking @generic_utils.default def build(self, input_shape): diff --git a/tensorflow/python/keras/engine/functional.py b/tensorflow/python/keras/engine/functional.py index 2b991cc187c..707dedac028 100644 --- a/tensorflow/python/keras/engine/functional.py +++ b/tensorflow/python/keras/engine/functional.py @@ -1154,7 +1154,9 @@ def reconstruct_from_config(config, custom_objects=None, created_layers=None): # Call layer on its inputs, thus creating the node # and building the layer if needed. if input_tensors is not None: - input_tensors = base_layer_utils.unnest_if_single_tensor(input_tensors) + if not layer._preserve_input_structure_in_config: + input_tensors = ( + base_layer_utils.unnest_if_single_tensor(input_tensors)) output_tensors = layer(input_tensors, **kwargs) # Update node index map. diff --git a/tensorflow/python/keras/engine/node.py b/tensorflow/python/keras/engine/node.py index c61f9ff5fda..eb85bce7e75 100644 --- a/tensorflow/python/keras/engine/node.py +++ b/tensorflow/python/keras/engine/node.py @@ -198,7 +198,8 @@ class Node(object): return tf_utils.ListWrapper(data) data = nest.map_structure(serialize_first_arg_tensor, inputs) - if not nest.is_nested(data): + if (not nest.is_nested(data) and + not self.layer._preserve_input_structure_in_config): data = [data] data = tf_utils.convert_inner_node_data(data) return data diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py index 2d69782a1cf..ca26ee6588f 100644 --- a/tensorflow/python/keras/layers/core.py +++ b/tensorflow/python/keras/layers/core.py @@ -1307,6 +1307,10 @@ class TFOpLambda(Layer): super(TFOpLambda, self).__init__(**kwargs) + # Preserve all argument data structures when saving/loading a config + # (e.g., don't unnest lists that contain one element) + self._preserve_input_structure_in_config = True + # Warning on every invocation will be quite irksome in Eager mode. self._already_warned = False diff --git a/tensorflow/python/keras/layers/tensorflow_op_layer_test.py b/tensorflow/python/keras/layers/tensorflow_op_layer_test.py index 817e746bc70..bbec7ef44c8 100644 --- a/tensorflow/python/keras/layers/tensorflow_op_layer_test.py +++ b/tensorflow/python/keras/layers/tensorflow_op_layer_test.py @@ -295,6 +295,57 @@ class AutoLambdaTest(keras_parameterized.TestCase): self.assertAllEqual([layer.name for layer in model.layers], [layer.name for layer in new_model.layers]) + def test_stack_preserves_correct_shape(self): + ## Test stack([x]) + inp = keras.Input(shape=(), dtype='float32') + + out = array_ops.stack([inp]) + model = keras.Model( + inputs=inp, + outputs=out) + model.compile( + adam.Adam(0.001), + 'mse', + run_eagerly=testing_utils.should_run_eagerly()) + + x = array_ops.ones(shape=(4, 4)) + expected = array_ops.stack([x]) + self.assertAllEqual(expected.shape, (1, 4, 4)) + + self.assertAllEqual(model(x).shape, (1, 4, 4)) + self.assertAllEqual(model(x), expected) + + config = model.get_config() + model = keras.Model.from_config(config) + + self.assertAllEqual(model(x).shape, (1, 4, 4)) + self.assertAllEqual(model(x), expected) + + ## Test stack(x) + inp = keras.Input(shape=(), dtype='float32') + + out = array_ops.stack(inp) + model = keras.Model( + inputs=inp, + outputs=out) + model.compile( + adam.Adam(0.001), + 'mse', + run_eagerly=testing_utils.should_run_eagerly()) + + x = array_ops.ones(shape=(4, 4)) + expected = array_ops.stack(x) + self.assertAllEqual(expected.shape, (4, 4)) + + self.assertAllEqual(model(x).shape, (4, 4)) + self.assertAllEqual(model(x), expected) + + config = model.get_config() + model = keras.Model.from_config(config) + + self.assertAllEqual(model(x).shape, (4, 4)) + self.assertAllEqual(model(x), expected) + def test_getitem_slice_with_step_only(self): if not context.executing_eagerly(): self.skipTest('Complex slicing like this fails in v1') @@ -360,10 +411,8 @@ class AutoLambdaTest(keras_parameterized.TestCase): self.assertAllEqual(model(args), expected) self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) - # TODO(b/161925288): Fix the bug then uncomment: - # # Make sure it can be successfully saved and loaded - # config = model.get_config() - # model = keras.Model.from_config(config) + config = model.get_config() + model = keras.Model.from_config(config) self.assertAllEqual(model(args), expected) self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) @@ -396,10 +445,9 @@ class AutoLambdaTest(keras_parameterized.TestCase): self.assertAllEqual(model(args), expected) self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) - # TODO(b/161925288): Fix the bug then uncomment: - # # Make sure it can be successfully saved and loaded - # config = model.get_config() - # model = keras.Model.from_config(config) + # Make sure it can be successfully saved and loaded + config = model.get_config() + model = keras.Model.from_config(config) self.assertAllEqual(model(args), expected) self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) From 6eaab98392866210ef50619b5e2603b7dddda51a Mon Sep 17 00:00:00 2001 From: Robert David Date: Thu, 23 Jul 2020 13:08:46 -0700 Subject: [PATCH 1194/2522] LSTM test cleanup: Move setting tensor data into the testcase, and remove redundant fixture classes. PiperOrigin-RevId: 322850069 Change-Id: I500bf4dbd78f0ccaf586f44ead04b7f27aa70991 --- .../delegates/nnapi/acceleration_test_list.cc | 24 +- tensorflow/lite/kernels/lstm_test.cc | 1638 ++++++++--------- 2 files changed, 791 insertions(+), 871 deletions(-) diff --git a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc index 5c54a4447b2..b2a54cfdf44 100644 --- a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc +++ b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc @@ -245,20 +245,20 @@ CifgPeepholeNoProjectionNoClippingUnidirectionalLstmTest/NonLayerNormLstmBlackBo # lstm_test -LstmOpTest/InvalidTypes # Float -.+_LstmOpTest\.Test/0,29 -.+_LstmOpTest\.Test/1,29 -.+_LstmOpTest\.Test/2,29 -.+_LstmOpTest\.Test/3,29 +Parameterized/LstmOpTest.+/0,29 +Parameterized/LstmOpTest.+/1,29 +Parameterized/LstmOpTest.+/2,29 +Parameterized/LstmOpTest.+/3,29 # HybridUint8 -.+_LstmOpTest\.Test/4,29 -.+_LstmOpTest\.Test/5,29 -.+_LstmOpTest\.Test/6,29 -.+_LstmOpTest\.Test/7,29 +Parameterized/LstmOpTest.+/4,29 +Parameterized/LstmOpTest.+/5,29 +Parameterized/LstmOpTest.+/6,29 +Parameterized/LstmOpTest.+/7,29 # HybridInt8 --.+_LstmOpTest\.Test/8 --.+_LstmOpTest\.Test/9 --.+_LstmOpTest\.Test/10 --.+_LstmOpTest\.Test/11 +-Parameterized/LstmOpTest.+/8 +-Parameterized/LstmOpTest.+/9 +-Parameterized/LstmOpTest.+/10 +-Parameterized/LstmOpTest.+/11 # maximum_minimum_test MaxMinOpTest/.+nt8Test,29 diff --git a/tensorflow/lite/kernels/lstm_test.cc b/tensorflow/lite/kernels/lstm_test.cc index 48ca90496ba..023df0e67b8 100644 --- a/tensorflow/lite/kernels/lstm_test.cc +++ b/tensorflow/lite/kernels/lstm_test.cc @@ -302,7 +302,7 @@ class LSTMOpModel : public SingleOpModel { // std::get<0>(GetParam()) => weight_type // std::get<1>(GetParam()) => model_has_legacy_20_inputs // std::get<2>(GetParam()) => asymmetric_quantize_inputs -class BaseLstmOpTest +class LstmOpTest : public ::testing::TestWithParam> { protected: // Weights of the LSTM model. Some are optional. @@ -401,57 +401,7 @@ class BaseLstmOpTest } }; -class NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest - : public BaseLstmOpTest { - void SetUp() override { - input_to_input_weights_ = {-0.45018822, -0.02338299, -0.0870589, - -0.34550029, 0.04266912, -0.15680569, - -0.34856534, 0.43890524}; - input_to_cell_weights_ = {-0.50013041, 0.1370284, 0.11810488, 0.2013163, - -0.20583314, 0.44344562, 0.22077113, -0.29909778}; - input_to_forget_weights_ = {0.09701663, 0.20334584, -0.50592935, - -0.31343272, -0.40032279, 0.44781327, - 0.01387155, -0.35593212}; - input_to_output_weights_ = {-0.25065863, -0.28290087, 0.04613829, - 0.40525138, 0.44272184, 0.03897077, - -0.1556896, 0.19487578}; - input_gate_bias_ = {0., 0., 0., 0.}; - cell_gate_bias_ = {0., 0., 0., 0.}; - forget_gate_bias_ = {1., 1., 1., 1.}; - output_gate_bias_ = {0., 0., 0., 0.}; - - recurrent_to_input_weights_ = { - -0.0063535, -0.2042388, 0.31454784, -0.35746509, - 0.28902304, 0.08183324, -0.16555229, 0.02286911, - -0.13566875, 0.03034258, 0.48091322, -0.12528998, - 0.24077177, -0.51332325, -0.33502164, 0.10629296}; - - recurrent_to_cell_weights_ = { - -0.3407414, 0.24443203, -0.2078532, 0.26320225, - 0.05695659, -0.00123841, -0.4744786, -0.35869038, - -0.06418842, -0.13502428, -0.501764, 0.22830659, - -0.46367589, 0.26016325, -0.03894562, -0.16368064}; - - recurrent_to_forget_weights_ = { - -0.48684245, -0.06655136, 0.42224967, 0.2112639, - 0.27654213, 0.20864892, -0.07646349, 0.45877004, - 0.00141793, -0.14609534, 0.36447752, 0.09196436, - 0.28053468, 0.01560611, -0.20127171, -0.01140004}; - - recurrent_to_output_weights_ = { - 0.43385774, -0.17194885, 0.2718237, 0.09215671, - 0.24107647, -0.39835793, 0.18212086, 0.01301402, - 0.48572797, -0.50656658, 0.20047462, -0.20607421, - -0.51818722, -0.15390486, 0.0468148, 0.39922136}; - - lstm_input_ = {{2., 3., 3., 4., 1., 1.}}; - lstm_golden_output_ = {{-0.02973187, 0.1229473, 0.20885126, -0.15358765, - -0.03716109, 0.12507336, 0.41193449, -0.20860538, - -0.15053082, 0.09120187, 0.24278517, -0.12222792}}; - } -}; - -TEST_P(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, Test) { +TEST_P(LstmOpTest, NoCifg_NoPeephole_NoProjection_NoLayerNorm) { const int n_batch = 1; const int n_input = 2; // n_cell and n_output have the same size when there is no projection. @@ -469,6 +419,49 @@ TEST_P(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, Test) { return; } + input_to_input_weights_ = {-0.45018822, -0.02338299, -0.0870589, -0.34550029, + 0.04266912, -0.15680569, -0.34856534, 0.43890524}; + input_to_cell_weights_ = {-0.50013041, 0.1370284, 0.11810488, 0.2013163, + -0.20583314, 0.44344562, 0.22077113, -0.29909778}; + input_to_forget_weights_ = {0.09701663, 0.20334584, -0.50592935, + -0.31343272, -0.40032279, 0.44781327, + 0.01387155, -0.35593212}; + input_to_output_weights_ = {-0.25065863, -0.28290087, 0.04613829, 0.40525138, + 0.44272184, 0.03897077, -0.1556896, 0.19487578}; + input_gate_bias_ = {0., 0., 0., 0.}; + cell_gate_bias_ = {0., 0., 0., 0.}; + forget_gate_bias_ = {1., 1., 1., 1.}; + output_gate_bias_ = {0., 0., 0., 0.}; + + recurrent_to_input_weights_ = { + -0.0063535, -0.2042388, 0.31454784, -0.35746509, + 0.28902304, 0.08183324, -0.16555229, 0.02286911, + -0.13566875, 0.03034258, 0.48091322, -0.12528998, + 0.24077177, -0.51332325, -0.33502164, 0.10629296}; + + recurrent_to_cell_weights_ = { + -0.3407414, 0.24443203, -0.2078532, 0.26320225, + 0.05695659, -0.00123841, -0.4744786, -0.35869038, + -0.06418842, -0.13502428, -0.501764, 0.22830659, + -0.46367589, 0.26016325, -0.03894562, -0.16368064}; + + recurrent_to_forget_weights_ = { + -0.48684245, -0.06655136, 0.42224967, 0.2112639, + 0.27654213, 0.20864892, -0.07646349, 0.45877004, + 0.00141793, -0.14609534, 0.36447752, 0.09196436, + 0.28053468, 0.01560611, -0.20127171, -0.01140004}; + + recurrent_to_output_weights_ = { + 0.43385774, -0.17194885, 0.2718237, 0.09215671, + 0.24107647, -0.39835793, 0.18212086, 0.01301402, + 0.48572797, -0.50656658, 0.20047462, -0.20607421, + -0.51818722, -0.15390486, 0.0468148, 0.39922136}; + + lstm_input_ = {{2., 3., 3., 4., 1., 1.}}; + lstm_golden_output_ = {{-0.02973187, 0.1229473, 0.20885126, -0.15358765, + -0.03716109, 0.12507336, 0.41193449, -0.20860538, + -0.15053082, 0.09120187, 0.24278517, -0.12222792}}; + LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, /*use_cifg=*/false, /*use_peephole=*/false, /*use_projection_weights=*/false, @@ -483,55 +476,7 @@ TEST_P(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest, Test) { VerifyGoldens(&lstm, tolerance_per_type->at(weight_type)); } -class Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest - : public BaseLstmOpTest { - void SetUp() override { - input_to_cell_weights_ = {-0.49770179, -0.27711356, -0.09624726, - 0.05100781, 0.04717243, 0.48944736, - -0.38535351, -0.17212132}; - - input_to_forget_weights_ = {-0.55291498, -0.42866567, 0.13056988, - -0.3633365, -0.22755712, 0.28253698, - 0.24407166, 0.33826375}; - - input_to_output_weights_ = {0.10725588, -0.02335852, -0.55932593, - -0.09426838, -0.44257352, 0.54939759, - 0.01533556, 0.42751634}; - cell_gate_bias_ = {0., 0., 0., 0.}; - forget_gate_bias_ = {1., 1., 1., 1.}; - output_gate_bias_ = {0., 0., 0., 0.}; - - recurrent_to_cell_weights_ = { - 0.54066205, -0.32668582, -0.43562764, -0.56094903, - 0.42957711, 0.01841056, -0.32764608, -0.33027974, - -0.10826075, 0.20675004, 0.19069612, -0.03026325, - -0.54532051, 0.33003211, 0.44901288, 0.21193194}; - - recurrent_to_forget_weights_ = { - -0.13832897, -0.0515101, -0.2359007, -0.16661474, - -0.14340827, 0.36986142, 0.23414481, 0.55899, - 0.10798943, -0.41174671, 0.17751795, -0.34484994, - -0.35874045, -0.11352962, 0.27268326, 0.54058349}; - - recurrent_to_output_weights_ = { - 0.41613156, 0.42610586, -0.16495961, -0.5663873, - 0.30579174, -0.05115908, -0.33941799, 0.23364776, - 0.11178309, 0.09481031, -0.26424935, 0.46261835, - 0.50248802, 0.26114327, -0.43736315, 0.33149987}; - - cell_to_forget_weights_ = {0.47485286, -0.51955009, -0.24458408, - 0.31544167}; - cell_to_output_weights_ = {-0.17135078, 0.82760304, 0.85573703, - -0.77109635}; - - lstm_input_ = {{2., 3., 3., 4., 1., 1.}}; - lstm_golden_output_ = {{-0.36444446, -0.00352185, 0.12886585, -0.05163646, - -0.42312205, -0.01218222, 0.24201041, -0.08124574, - -0.358325, -0.04621704, 0.21641694, -0.06471302}}; - } -}; - -TEST_P(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest, Test) { +TEST_P(LstmOpTest, Cifg_Peephole_NoProjection_NoLayerNorm) { const int n_batch = 1; const int n_input = 2; // n_cell and n_output have the same size when there is no projection. @@ -549,6 +494,45 @@ TEST_P(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest, Test) { return; } + input_to_cell_weights_ = {-0.49770179, -0.27711356, -0.09624726, 0.05100781, + 0.04717243, 0.48944736, -0.38535351, -0.17212132}; + + input_to_forget_weights_ = {-0.55291498, -0.42866567, 0.13056988, -0.3633365, + -0.22755712, 0.28253698, 0.24407166, 0.33826375}; + + input_to_output_weights_ = {0.10725588, -0.02335852, -0.55932593, + -0.09426838, -0.44257352, 0.54939759, + 0.01533556, 0.42751634}; + cell_gate_bias_ = {0., 0., 0., 0.}; + forget_gate_bias_ = {1., 1., 1., 1.}; + output_gate_bias_ = {0., 0., 0., 0.}; + + recurrent_to_cell_weights_ = { + 0.54066205, -0.32668582, -0.43562764, -0.56094903, + 0.42957711, 0.01841056, -0.32764608, -0.33027974, + -0.10826075, 0.20675004, 0.19069612, -0.03026325, + -0.54532051, 0.33003211, 0.44901288, 0.21193194}; + + recurrent_to_forget_weights_ = { + -0.13832897, -0.0515101, -0.2359007, -0.16661474, + -0.14340827, 0.36986142, 0.23414481, 0.55899, + 0.10798943, -0.41174671, 0.17751795, -0.34484994, + -0.35874045, -0.11352962, 0.27268326, 0.54058349}; + + recurrent_to_output_weights_ = { + 0.41613156, 0.42610586, -0.16495961, -0.5663873, + 0.30579174, -0.05115908, -0.33941799, 0.23364776, + 0.11178309, 0.09481031, -0.26424935, 0.46261835, + 0.50248802, 0.26114327, -0.43736315, 0.33149987}; + + cell_to_forget_weights_ = {0.47485286, -0.51955009, -0.24458408, 0.31544167}; + cell_to_output_weights_ = {-0.17135078, 0.82760304, 0.85573703, -0.77109635}; + + lstm_input_ = {{2., 3., 3., 4., 1., 1.}}; + lstm_golden_output_ = {{-0.36444446, -0.00352185, 0.12886585, -0.05163646, + -0.42312205, -0.01218222, 0.24201041, -0.08124574, + -0.358325, -0.04621704, 0.21641694, -0.06471302}}; + LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, /*use_cifg=*/true, /*use_peephole=*/true, /*use_projection_weights=*/false, @@ -563,607 +547,7 @@ TEST_P(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest, Test) { VerifyGoldens(&lstm, tolerance_per_type->at(weight_type)); } -class NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest - : public BaseLstmOpTest { - void SetUp() override { - input_to_input_weights_ = { - 0.021393683, 0.06124551, 0.046905167, -0.014657677, -0.03149463, - 0.09171803, 0.14647801, 0.10797193, -0.0057968358, 0.0019193048, - -0.2726754, 0.10154029, -0.018539885, 0.080349885, -0.10262385, - -0.022599787, -0.09121155, -0.008675967, -0.045206103, -0.0821282, - -0.008045952, 0.015478081, 0.055217247, 0.038719587, 0.044153627, - -0.06453243, 0.05031825, -0.046935108, -0.008164439, 0.014574226, - -0.1671009, -0.15519552, -0.16819797, -0.13971269, -0.11953059, - 0.25005487, -0.22790983, 0.009855087, -0.028140958, -0.11200698, - 0.11295408, -0.0035217577, 0.054485075, 0.05184695, 0.064711206, - 0.10989193, 0.11674786, 0.03490607, 0.07727357, 0.11390585, - -0.1863375, -0.1034451, -0.13945189, -0.049401227, -0.18767063, - 0.042483903, 0.14233552, 0.13832581, 0.18350165, 0.14545603, - -0.028545704, 0.024939531, 0.050929718, 0.0076203286, -0.0029723682, - -0.042484224, -0.11827596, -0.09171104, -0.10808628, -0.16327988, - -0.2273378, -0.0993647, -0.017155107, 0.0023917493, 0.049272764, - 0.0038534778, 0.054764505, 0.089753784, 0.06947234, 0.08014476, - -0.04544234, -0.0497073, -0.07135631, -0.048929106, -0.004042012, - -0.009284026, 0.018042054, 0.0036860977, -0.07427302, -0.11434604, - -0.018995456, 0.031487543, 0.012834908, 0.019977754, 0.044256654, - -0.39292613, -0.18519334, -0.11651281, -0.06809892, 0.011373677}; - - input_to_forget_weights_ = { - -0.0018401089, -0.004852237, 0.03698424, 0.014181704, - 0.028273236, -0.016726194, -0.05249759, -0.10204261, - 0.00861066, -0.040979505, -0.009899187, 0.01923892, - -0.028177269, -0.08535103, -0.14585495, 0.10662567, - -0.01909731, -0.017883534, -0.0047269356, -0.045103323, - 0.0030784295, 0.076784775, 0.07463696, 0.094531395, - 0.0814421, -0.12257899, -0.033945758, -0.031303465, - 0.045630626, 0.06843887, -0.13492945, -0.012480007, - -0.0811829, -0.07224499, -0.09628791, 0.045100946, - 0.0012300825, 0.013964662, 0.099372394, 0.02543059, - 0.06958324, 0.034257296, 0.0482646, 0.06267997, - 0.052625068, 0.12784666, 0.07077897, 0.025725935, - 0.04165009, 0.07241905, 0.018668644, -0.037377294, - -0.06277783, -0.08833636, -0.040120605, -0.011405586, - -0.007808335, -0.010301386, -0.005102167, 0.027717464, - 0.05483423, 0.11449111, 0.11289652, 0.10939839, - 0.13396506, -0.08402166, -0.01901462, -0.044678304, - -0.07720565, 0.014350063, -0.11757958, -0.0652038, - -0.08185733, -0.076754324, -0.092614375, 0.10405491, - 0.052960336, 0.035755895, 0.035839386, -0.012540553, - 0.036881298, 0.02913376, 0.03420159, 0.05448447, - -0.054523353, 0.02582715, 0.02327355, -0.011857179, - -0.0011980024, -0.034641717, -0.026125094, -0.17582615, - -0.15923657, -0.27486774, -0.0006143371, 0.0001771948, - -8.470171e-05, 0.02651807, 0.045790765, 0.06956496}; - - input_to_cell_weights_ = { - -0.04580283, -0.09549462, -0.032418985, -0.06454633, - -0.043528453, 0.043018587, -0.049152344, -0.12418144, - -0.078985475, -0.07596889, 0.019484362, -0.11434962, - -0.0074034138, -0.06314844, -0.092981495, 0.0062155537, - -0.025034338, -0.0028890965, 0.048929527, 0.06235075, - 0.10665918, -0.032036792, -0.08505916, -0.10843358, - -0.13002433, -0.036816437, -0.02130134, -0.016518239, - 0.0047691227, -0.0025825808, 0.066017866, 0.029991534, - -0.10652836, -0.1037554, -0.13056071, -0.03266643, - -0.033702414, -0.006473424, -0.04611692, 0.014419339, - -0.025174323, 0.0396852, 0.081777506, 0.06157468, - 0.10210095, -0.009658194, 0.046511717, 0.03603906, - 0.0069369148, 0.015960095, -0.06507666, 0.09551598, - 0.053568836, 0.06408714, 0.12835667, -0.008714329, - -0.20211966, -0.12093674, 0.029450472, 0.2849013, - -0.029227901, 0.1164364, -0.08560263, 0.09941786, - -0.036999565, -0.028842626, -0.0033637602, -0.017012902, - -0.09720865, -0.11193351, -0.029155117, -0.017936034, - -0.009768936, -0.04223324, -0.036159635, 0.06505112, - -0.021742892, -0.023377212, -0.07221364, -0.06430552, - 0.05453865, 0.091149814, 0.06387331, 0.007518393, - 0.055960953, 0.069779344, 0.046411168, 0.10509911, - 0.07463894, 0.0075130584, 0.012850982, 0.04555431, - 0.056955688, 0.06555285, 0.050801456, -0.009862683, - 0.00826772, -0.026555609, -0.0073611983, -0.0014897042}; - - input_to_output_weights_ = { - -0.0998932, -0.07201956, -0.052803773, -0.15629593, -0.15001918, - -0.07650751, 0.02359855, -0.075155355, -0.08037709, -0.15093534, - 0.029517552, -0.04751393, 0.010350531, -0.02664851, -0.016839722, - -0.023121163, 0.0077019283, 0.012851257, -0.05040649, -0.0129761, - -0.021737747, -0.038305793, -0.06870586, -0.01481247, -0.001285394, - 0.10124236, 0.083122835, 0.053313006, -0.062235646, -0.075637154, - -0.027833903, 0.029774971, 0.1130802, 0.09218906, 0.09506135, - -0.086665764, -0.037162706, -0.038880914, -0.035832845, -0.014481564, - -0.09825003, -0.12048569, -0.097665586, -0.05287633, -0.0964047, - -0.11366429, 0.035777505, 0.13568819, 0.052451383, 0.050649304, - 0.05798951, -0.021852335, -0.099848844, 0.014740475, -0.078897946, - 0.04974699, 0.014160473, 0.06973932, 0.04964942, 0.033364646, - 0.08190124, 0.025535367, 0.050893165, 0.048514254, 0.06945813, - -0.078907564, -0.06707616, -0.11844508, -0.09986688, -0.07509403, - 0.06263226, 0.14925587, 0.20188436, 0.12098451, 0.14639415, - 0.0015017595, -0.014267382, -0.03417257, 0.012711468, 0.0028300495, - -0.024758482, -0.05098548, -0.0821182, 0.014225672, 0.021544158, - 0.08949725, 0.07505268, -0.0020780868, 0.04908258, 0.06476295, - -0.022907063, 0.027562456, 0.040185735, 0.019567577, -0.015598739, - -0.049097303, -0.017121866, -0.083368234, -0.02332002, -0.0840956}; - - input_gate_bias_ = {0.02234832, 0.14757581, 0.18176508, 0.10380666, - 0.053110216, -0.06928846, -0.13942584, -0.11816189, - 0.19483899, 0.03652339, -0.10250295, 0.036714908, - -0.18426876, 0.036065217, 0.21810818, 0.02383196, - -0.043370757, 0.08690144, -0.04444982, 0.00030581196}; - - forget_gate_bias_ = {0.035185695, -0.042891346, -0.03032477, 0.23027696, - 0.11098921, 0.15378423, 0.09263801, 0.09790885, - 0.09508917, 0.061199076, 0.07665568, -0.015443159, - -0.03499149, 0.046190713, 0.08895977, 0.10899629, - 0.40694186, 0.06030037, 0.012413437, -0.06108739}; - - cell_gate_bias_ = {-0.024379363, 0.0055531194, 0.23377132, 0.033463873, - -0.1483596, -0.10639995, -0.091433935, 0.058573797, - -0.06809782, -0.07889636, -0.043246906, -0.09829136, - -0.4279842, 0.034901652, 0.18797937, 0.0075234566, - 0.016178843, 0.1749513, 0.13975595, 0.92058027}; - - output_gate_bias_ = {0.046159424, -0.0012809046, 0.03563469, 0.12648113, - 0.027195795, 0.35373217, -0.018957434, 0.008907322, - -0.0762701, 0.12018895, 0.04216877, 0.0022856654, - 0.040952638, 0.3147856, 0.08225149, -0.057416286, - -0.14995944, -0.008040261, 0.13208859, 0.029760877}; - - recurrent_to_input_weights_ = { - -0.001374326, -0.078856036, 0.10672688, 0.029162422, - -0.11585556, 0.02557986, -0.13446963, -0.035785314, - -0.01244275, 0.025961924, -0.02337298, -0.044228926, - -0.055839065, -0.046598054, -0.010546039, -0.06900766, - 0.027239809, 0.022582639, -0.013296484, -0.05459212, - 0.08981, -0.045407712, 0.08682226, -0.06867011, - -0.14390695, -0.02916037, 0.000996957, 0.091420636, - 0.14283475, -0.07390571, -0.06402044, 0.062524505, - -0.093129106, 0.04860203, -0.08364217, -0.08119002, - 0.009352075, 0.22920375, 0.0016303885, 0.11583097, - -0.13732095, 0.012405723, -0.07551853, 0.06343048, - 0.12162708, -0.031923793, -0.014335606, 0.01790974, - -0.10650317, -0.0724401, 0.08554849, -0.05727212, - 0.06556731, -0.042729504, -0.043227166, 0.011683251, - -0.013082158, -0.029302018, -0.010899579, -0.062036745, - -0.022509435, -0.00964907, -0.01567329, 0.04260106, - -0.07787477, -0.11576462, 0.017356863, 0.048673786, - -0.017577527, -0.05527947, -0.082487635, -0.040137455, - -0.10820036, -0.04666372, 0.022746278, -0.07851417, - 0.01068115, 0.032956902, 0.022433773, 0.0026891115, - 0.08944216, -0.0685835, 0.010513544, 0.07228705, - 0.02032331, -0.059686817, -0.0005566496, -0.086984694, - 0.040414046, -0.1380399, 0.094208956, -0.05722982, - 0.012092817, -0.04989123, -0.086576, -0.003399834, - -0.04696032, -0.045747425, 0.10091314, 0.048676282, - -0.029037097, 0.031399418, -0.0040285117, 0.047237843, - 0.09504992, 0.041799378, -0.049185462, -0.031518843, - -0.10516937, 0.026374253, 0.10058866, -0.0033195973, - -0.041975245, 0.0073591834, 0.0033782164, -0.004325073, - -0.10167381, 0.042500053, -0.01447153, 0.06464186, - -0.017142897, 0.03312627, 0.009205989, 0.024138335, - -0.011337001, 0.035530265, -0.010912711, 0.0706555, - -0.005894094, 0.051841937, -0.1401738, -0.02351249, - 0.0365468, 0.07590991, 0.08838724, 0.021681072, - -0.10086113, 0.019608743, -0.06195883, 0.077335775, - 0.023646897, -0.095322326, 0.02233014, 0.09756986, - -0.048691444, -0.009579111, 0.07595467, 0.11480546, - -0.09801813, 0.019894179, 0.08502348, 0.004032281, - 0.037211012, 0.068537936, -0.048005626, -0.091520436, - -0.028379958, -0.01556313, 0.06554592, -0.045599163, - -0.01672207, -0.020169014, -0.011877351, -0.20212261, - 0.010889619, 0.0047078193, 0.038385306, 0.08540671, - -0.017140968, -0.0035865551, 0.016678626, 0.005633034, - 0.015963363, 0.00871737, 0.060130805, 0.028611384, - 0.10109069, -0.015060172, -0.07894427, 0.06401885, - 0.011584063, -0.024466386, 0.0047652307, -0.09041358, - 0.030737216, -0.0046374933, 0.14215417, -0.11823516, - 0.019899689, 0.006106124, -0.027092824, 0.0786356, - 0.05052217, -0.058925, -0.011402121, -0.024987547, - -0.0013661642, -0.06832946, -0.015667673, -0.1083353, - -0.00096863037, -0.06988685, -0.053350925, -0.027275559, - -0.033664223, -0.07978348, -0.025200296, -0.017207067, - -0.058403496, -0.055697463, 0.005798788, 0.12965427, - -0.062582195, 0.0013350133, -0.10482091, 0.0379771, - 0.072521195, -0.0029455067, -0.13797039, -0.03628521, - 0.013806405, -0.017858358, -0.01008298, -0.07700066, - -0.017081132, 0.019358726, 0.0027079724, 0.004635139, - 0.062634714, -0.02338735, -0.039547626, -0.02050681, - 0.03385117, -0.083611414, 0.002862572, -0.09421313, - 0.058618143, -0.08598433, 0.00972939, 0.023867095, - -0.053934585, -0.023203006, 0.07452513, -0.048767887, - -0.07314807, -0.056307215, -0.10433547, -0.06440842, - 0.04328182, 0.04389765, -0.020006588, -0.09076438, - -0.11652589, -0.021705797, 0.03345259, -0.010329105, - -0.025767034, 0.013057034, -0.07316461, -0.10145612, - 0.06358255, 0.18531723, 0.07759293, 0.12006465, - 0.1305557, 0.058638252, -0.03393652, 0.09622831, - -0.16253184, -2.4580743e-06, 0.079869635, -0.070196845, - -0.005644518, 0.06857898, -0.12598175, -0.035084512, - 0.03156317, -0.12794146, -0.031963028, 0.04692781, - 0.030070418, 0.0071660685, -0.095516115, -0.004643372, - 0.040170413, -0.062104587, -0.0037324072, 0.0554317, - 0.08184801, -0.019164372, 0.06791302, 0.034257166, - -0.10307039, 0.021943003, 0.046745934, 0.0790918, - -0.0265588, -0.007824208, 0.042546265, -0.00977924, - -0.0002440307, -0.017384544, -0.017990116, 0.12252321, - -0.014512694, -0.08251313, 0.08861942, 0.13589665, - 0.026351685, 0.012641483, 0.07466548, 0.044301085, - -0.045414884, -0.051112458, 0.03444247, -0.08502782, - -0.04106223, -0.028126027, 0.028473156, 0.10467447}; - - recurrent_to_cell_weights_ = { - -0.037322544, 0.018592842, 0.0056175636, -0.06253426, - 0.055647098, -0.05713207, -0.05626563, 0.005559383, - 0.03375411, -0.025757805, -0.088049285, 0.06017052, - -0.06570978, 0.007384076, 0.035123326, -0.07920549, - 0.053676967, 0.044480428, -0.07663568, 0.0071805613, - 0.08089997, 0.05143358, 0.038261272, 0.03339287, - -0.027673481, 0.044746667, 0.028349208, 0.020090483, - -0.019443132, -0.030755889, -0.0040000007, 0.04465846, - -0.021585021, 0.0031670958, 0.0053199246, -0.056117613, - -0.10893326, 0.076739706, -0.08509834, -0.027997585, - 0.037871376, 0.01449768, -0.09002357, -0.06111149, - -0.046195522, 0.0422062, -0.005683705, -0.1253618, - -0.012925729, -0.04890792, 0.06985068, 0.037654128, - 0.03398274, -0.004781977, 0.007032333, -0.031787455, - 0.010868644, -0.031489216, 0.09525667, 0.013939797, - 0.0058680447, 0.0167067, 0.02668468, -0.04797466, - -0.048885044, -0.12722108, 0.035304096, 0.06554885, - 0.00972396, -0.039238118, -0.05159735, -0.11329045, - 0.1613692, -0.03750952, 0.06529313, -0.071974665, - -0.11769596, 0.015524369, -0.0013754242, -0.12446318, - 0.02786344, -0.014179351, 0.005264273, 0.14376344, - 0.015983658, 0.03406988, -0.06939408, 0.040699873, - 0.02111075, 0.09669095, 0.041345075, -0.08316494, - -0.07684199, -0.045768797, 0.032298047, -0.041805092, - 0.0119405, 0.0061010392, 0.12652606, 0.0064572375, - -0.024950314, 0.11574242, 0.04508852, -0.04335324, - 0.06760663, -0.027437469, 0.07216407, 0.06977076, - -0.05438599, 0.034033038, -0.028602652, 0.05346137, - 0.043184172, -0.037189785, 0.10420091, 0.00882477, - -0.054019816, -0.074273005, -0.030617684, -0.0028467078, - 0.024302477, -0.0038869337, 0.005332455, 0.0013399826, - 0.04361412, -0.007001822, 0.09631092, -0.06702025, - -0.042049985, -0.035070654, -0.04103342, -0.10273396, - 0.0544271, 0.037184782, -0.13150354, -0.0058036847, - -0.008264958, 0.042035464, 0.05891794, 0.029673764, - 0.0063542654, 0.044788733, 0.054816857, 0.062257513, - -0.00093483756, 0.048938446, -0.004952862, -0.007730018, - -0.04043371, -0.017094059, 0.07229206, -0.023670016, - -0.052195564, -0.025616996, -0.01520939, 0.045104615, - -0.007376126, 0.003533447, 0.006570588, 0.056037236, - 0.12436656, 0.051817212, 0.028532185, -0.08686856, - 0.11868599, 0.07663395, -0.07323171, 0.03463402, - -0.050708205, -0.04458982, -0.11590894, 0.021273347, - 0.1251325, -0.15313013, -0.12224372, 0.17228661, - 0.023029093, 0.086124025, 0.006445803, -0.03496501, - 0.028332196, 0.04449512, -0.042436164, -0.026587414, - -0.006041347, -0.09292539, -0.05678812, 0.03897832, - 0.09465633, 0.008115513, -0.02171956, 0.08304309, - 0.071401566, 0.019622514, 0.032163795, -0.004167056, - 0.02295182, 0.030739572, 0.056506045, 0.004612461, - 0.06524936, 0.059999723, 0.046395954, -0.0045512207, - -0.1335546, -0.030136576, 0.11584653, -0.014678886, - 0.0020118146, -0.09688814, -0.0790206, 0.039770417, - -0.0329582, 0.07922767, 0.029322514, 0.026405897, - 0.04207835, -0.07073373, 0.063781224, 0.0859677, - -0.10925287, -0.07011058, 0.048005477, 0.03438226, - -0.09606514, -0.006669445, -0.043381985, 0.04240257, - -0.06955775, -0.06769346, 0.043903265, -0.026784198, - -0.017840602, 0.024307009, -0.040079936, -0.019946516, - 0.045318738, -0.12233574, 0.026170589, 0.0074471775, - 0.15978073, 0.10185836, 0.10298046, -0.015476589, - -0.039390966, -0.072174534, 0.0739445, -0.1211869, - -0.0347889, -0.07943156, 0.014809798, -0.12412325, - -0.0030663363, 0.039695457, 0.0647603, -0.08291318, - -0.018529687, -0.004423833, 0.0037507233, 0.084633216, - -0.01514876, -0.056505352, -0.012800942, -0.06994386, - 0.012962922, -0.031234352, 0.07029052, 0.016418684, - 0.03618972, 0.055686004, -0.08663945, -0.017404709, - -0.054761406, 0.029065743, 0.052404847, 0.020238016, - 0.0048197987, -0.0214882, 0.07078733, 0.013016777, - 0.06262858, 0.009184685, 0.020785125, -0.043904778, - -0.0270329, -0.03299152, -0.060088247, -0.015162964, - -0.001828936, 0.12642565, -0.056757294, 0.013586685, - 0.09232601, -0.035886683, 0.06000002, 0.05229691, - -0.052580316, -0.082029596, -0.010794592, 0.012947712, - -0.036429964, -0.085508935, -0.13127148, -0.017744139, - 0.031502828, 0.036232427, -0.031581745, 0.023051167, - -0.05325106, -0.03421577, 0.028793324, -0.034633752, - -0.009881397, -0.043551125, -0.018609839, 0.0019097115, - -0.008799762, 0.056595087, 0.0022273948, 0.055752404}; - - recurrent_to_forget_weights_ = { - -0.057784554, -0.026057621, -0.068447545, -0.022581743, - 0.14811787, 0.10826372, 0.09471067, 0.03987225, - -0.0039523416, 0.00030638507, 0.053185795, 0.10572994, - 0.08414449, -0.022036452, -0.00066928595, -0.09203576, - 0.032950465, -0.10985798, -0.023809856, 0.0021431844, - -0.02196096, -0.00326074, 0.00058621005, -0.074678116, - -0.06193199, 0.055729095, 0.03736828, 0.020123724, - 0.061878487, -0.04729229, 0.034919553, -0.07585433, - -0.04421272, -0.044019096, 0.085488975, 0.04058006, - -0.06890133, -0.030951202, -0.024628663, -0.07672815, - 0.034293607, 0.08556707, -0.05293577, -0.033561368, - -0.04899627, 0.0241671, 0.015736353, -0.095442444, - -0.029564252, 0.016493602, -0.035026584, 0.022337519, - -0.026871363, 0.004780428, 0.0077918363, -0.03601621, - 0.016435321, -0.03263031, -0.09543275, -0.047392778, - 0.013454138, 0.028934088, 0.01685226, -0.086110644, - -0.046250615, -0.01847454, 0.047608484, 0.07339695, - 0.034546845, -0.04881143, 0.009128804, -0.08802852, - 0.03761666, 0.008096139, -0.014454086, 0.014361001, - -0.023502491, -0.0011840804, -0.07607001, 0.001856849, - -0.06509276, -0.006021153, -0.08570962, -0.1451793, - 0.060212336, 0.055259194, 0.06974018, 0.049454916, - -0.027794661, -0.08077226, -0.016179763, 0.1169753, - 0.17213494, -0.0056326236, -0.053934924, -0.0124349, - -0.11520337, 0.05409887, 0.088759385, 0.0019655675, - 0.0042065294, 0.03881498, 0.019844765, 0.041858196, - -0.05695512, 0.047233116, 0.038937137, -0.06542224, - 0.014429736, -0.09719407, 0.13908425, -0.05379757, - 0.012321099, 0.082840554, -0.029899208, 0.044217527, - 0.059855383, 0.07711018, -0.045319796, 0.0948846, - -0.011724666, -0.0033288454, -0.033542685, -0.04764985, - -0.13873616, 0.040668588, 0.034832682, -0.015319203, - -0.018715994, 0.046002675, 0.0599172, -0.043107376, - 0.0294216, -0.002314414, -0.022424703, 0.0030315618, - 0.0014641669, 0.0029166266, -0.11878115, 0.013738511, - 0.12375372, -0.0006038222, 0.029104086, 0.087442465, - 0.052958444, 0.07558703, 0.04817258, 0.044462286, - -0.015213451, -0.08783778, -0.0561384, -0.003008196, - 0.047060397, -0.002058388, 0.03429439, -0.018839769, - 0.024734668, 0.024614193, -0.042046934, 0.09597743, - -0.0043254104, 0.04320769, 0.0064070094, -0.0019131786, - -0.02558259, -0.022822596, -0.023273505, -0.02464396, - -0.10991725, -0.006240552, 0.0074488563, 0.024044557, - 0.04383914, -0.046476185, 0.028658995, 0.060410924, - 0.050786525, 0.009452605, -0.0073054377, -0.024810238, - 0.0052906186, 0.0066939713, -0.0020913032, 0.014515517, - 0.015898481, 0.021362653, -0.030262267, 0.016587038, - -0.011442813, 0.041154444, -0.007631438, -0.03423484, - -0.010977775, 0.036152758, 0.0066366293, 0.11915515, - 0.02318443, -0.041350313, 0.021485701, -0.10906167, - -0.028218046, -0.00954771, 0.020531068, -0.11995105, - -0.03672871, 0.024019798, 0.014255957, -0.05221243, - -0.00661567, -0.04630967, 0.033188973, 0.10107534, - -0.014027541, 0.030796422, -0.10270911, -0.035999842, - 0.15443139, 0.07684145, 0.036571592, -0.035900835, - -0.0034699554, 0.06209149, 0.015920248, -0.031122351, - -0.03858649, 0.01849943, 0.13872518, 0.01503974, - 0.069941424, -0.06948533, -0.0088794185, 0.061282158, - -0.047401894, 0.03100163, -0.041533746, -0.10430945, - 0.044574402, -0.01425562, -0.024290353, 0.034563623, - 0.05866852, 0.023947537, -0.09445152, 0.035450947, - 0.02247216, -0.0042998926, 0.061146557, -0.10250651, - 0.020881841, -0.06747029, 0.10062043, -0.0023941975, - 0.03532124, -0.016341697, 0.09685456, -0.016764693, - 0.051808182, 0.05875331, -0.04536488, 0.001626336, - -0.028892258, -0.01048663, -0.009793449, -0.017093895, - 0.010987891, 0.02357273, -0.00010856845, 0.0099760275, - -0.001845119, -0.03551521, 0.0018358806, 0.05763657, - -0.01769146, 0.040995963, 0.02235177, -0.060430344, - 0.11475477, -0.023854522, 0.10071741, 0.0686208, - -0.014250481, 0.034261297, 0.047418304, 0.08562733, - -0.030519066, 0.0060542435, 0.014653856, -0.038836084, - 0.04096551, 0.032249358, -0.08355519, -0.026823482, - 0.056386515, -0.010401743, -0.028396193, 0.08507674, - 0.014410365, 0.020995233, 0.17040324, 0.11511526, - 0.02459721, 0.0066619175, 0.025853224, -0.023133837, - -0.081302024, 0.017264642, -0.009585969, 0.09491168, - -0.051313367, 0.054532815, -0.014298593, 0.10657464, - 0.007076659, 0.10964551, 0.0409152, 0.008275321, - -0.07283536, 0.07937492, 0.04192024, -0.1075027}; - - recurrent_to_output_weights_ = { - 0.025825322, -0.05813119, 0.09495884, -0.045984812, - -0.01255415, -0.0026479573, -0.08196161, -0.054914974, - -0.0046604523, -0.029587349, -0.044576716, -0.07480124, - -0.082868785, 0.023254942, 0.027502948, -0.0039728214, - -0.08683098, -0.08116779, -0.014675607, -0.037924774, - -0.023314456, -0.007401714, -0.09255757, 0.029460307, - -0.08829125, -0.005139627, -0.08989442, -0.0555066, - 0.13596267, -0.025062224, -0.048351806, -0.03850004, - 0.07266485, -0.022414139, 0.05940088, 0.075114764, - 0.09597592, -0.010211725, -0.0049794707, -0.011523867, - -0.025980417, 0.072999895, 0.11091378, -0.081685916, - 0.014416728, 0.043229222, 0.034178585, -0.07530371, - 0.035837382, -0.085607, -0.007721233, -0.03287832, - -0.043848954, -0.06404588, -0.06632928, -0.073643476, - 0.008214239, -0.045984086, 0.039764922, 0.03474462, - 0.060612556, -0.080590084, 0.049127717, 0.04151091, - -0.030063879, 0.008801774, -0.023021035, -0.019558564, - 0.05158114, -0.010947698, -0.011825728, 0.0075720972, - 0.0699727, -0.0039981045, 0.069350146, 0.08799282, - 0.016156472, 0.035502106, 0.11695009, 0.006217345, - 0.13392477, -0.037875112, 0.025745004, 0.08940699, - -0.00924166, 0.0046702605, -0.036598757, -0.08811812, - 0.10522024, -0.032441203, 0.008176899, -0.04454919, - 0.07058152, 0.0067963637, 0.039206743, 0.03259838, - 0.03725492, -0.09515802, 0.013326398, -0.052055415, - -0.025676316, 0.03198509, -0.015951829, -0.058556724, - 0.036879618, 0.043357447, 0.028362012, -0.05908629, - 0.0059240665, -0.04995891, -0.019187413, 0.0276265, - -0.01628143, 0.0025863599, 0.08800015, 0.035250366, - -0.022165963, -0.07328642, -0.009415526, -0.07455109, - 0.11690406, 0.0363299, 0.07411125, 0.042103454, - -0.009660886, 0.019076364, 0.018299393, -0.046004917, - 0.08891175, 0.0431396, -0.026327137, -0.051502608, - 0.08979574, -0.051670972, 0.04940282, -0.07491107, - -0.021240504, 0.022596184, -0.034280192, 0.060163025, - -0.058211457, -0.051837247, -0.01349775, -0.04639988, - -0.035936575, -0.011681591, 0.064818054, 0.0073146066, - -0.021745546, -0.043124277, -0.06471268, -0.07053354, - -0.029321948, -0.05330136, 0.016933719, -0.053782392, - 0.13747959, -0.1361751, -0.11569455, 0.0033329215, - 0.05693899, -0.053219706, 0.063698, 0.07977434, - -0.07924483, 0.06936997, 0.0034815092, -0.007305279, - -0.037325785, -0.07251102, -0.033633437, -0.08677009, - 0.091591336, -0.14165086, 0.021752775, 0.019683983, - 0.0011612234, -0.058154266, 0.049996935, 0.0288841, - -0.0024567875, -0.14345716, 0.010955264, -0.10234828, - 0.1183656, -0.0010731248, -0.023590032, -0.072285876, - -0.0724771, -0.026382286, -0.0014920527, 0.042667855, - 0.0018776858, 0.02986552, 0.009814309, 0.0733756, - 0.12289186, 0.018043943, -0.0458958, 0.049412545, - 0.033632483, 0.05495232, 0.036686596, -0.013781798, - -0.010036754, 0.02576849, -0.08307328, 0.010112348, - 0.042521734, -0.05869831, -0.071689695, 0.03876447, - -0.13275425, -0.0352966, -0.023077697, 0.10285965, - 0.084736146, 0.15568255, -0.00040734606, 0.027835453, - -0.10292561, -0.032401145, 0.10053256, -0.026142767, - -0.08271222, -0.0030240538, -0.016368777, 0.1070414, - 0.042672627, 0.013456989, -0.0437609, -0.022309763, - 0.11576483, 0.04108048, 0.061026827, -0.0190714, - -0.0869359, 0.037901703, 0.0610107, 0.07202949, - 0.01675338, 0.086139716, -0.08795751, -0.014898893, - -0.023771819, -0.01965048, 0.007955471, -0.043740474, - 0.03346837, -0.10549954, 0.090567775, 0.042013682, - -0.03176985, 0.12569028, -0.02421228, -0.029526481, - 0.023851605, 0.031539805, 0.05292009, -0.02344001, - -0.07811758, -0.08834428, 0.10094801, 0.16594367, - -0.06861939, -0.021256343, -0.041093912, -0.06669611, - 0.035498552, 0.021757556, -0.09302526, -0.015403468, - -0.06614931, -0.051798206, -0.013874718, 0.03630673, - 0.010412845, -0.08077351, 0.046185967, 0.0035662893, - 0.03541868, -0.094149634, -0.034814864, 0.003128424, - -0.020674974, -0.03944324, -0.008110165, -0.11113267, - 0.08484226, 0.043586485, 0.040582247, 0.0968012, - -0.065249965, -0.028036479, 0.0050708856, 0.0017462453, - 0.0326779, 0.041296225, 0.09164146, -0.047743853, - -0.015952192, -0.034451712, 0.084197424, -0.05347844, - -0.11768019, 0.085926116, -0.08251791, -0.045081906, - 0.0948852, 0.068401024, 0.024856757, 0.06978981, - -0.057309967, -0.012775832, -0.0032452994, 0.01977615, - -0.041040014, -0.024264973, 0.063464895, 0.05431621, - }; - - cell_to_input_weights_ = { - 0.040369894, 0.030746894, 0.24704495, 0.018586371, -0.037586458, - -0.15312155, -0.11812848, -0.11465643, 0.20259799, 0.11418174, - -0.10116027, -0.011334949, 0.12411352, -0.076769054, -0.052169047, - 0.21198851, -0.38871562, -0.09061183, -0.09683246, -0.21929175}; - - cell_to_forget_weights_ = { - -0.01998659, -0.15568835, -0.24248174, -0.012770197, 0.041331276, - -0.072311886, -0.052123554, -0.0066330447, -0.043891653, 0.036225766, - -0.047248036, 0.021479502, 0.033189066, 0.11952997, -0.020432774, - 0.64658105, -0.06650122, -0.03467612, 0.095340036, 0.23647355}; - - cell_to_output_weights_ = { - 0.08286371, -0.08261836, -0.51210177, 0.002913762, 0.17764764, - -0.5495371, -0.08460716, -0.24552552, 0.030037103, 0.04123544, - -0.11940523, 0.007358328, 0.1890978, 0.4833202, -0.34441817, - 0.36312827, -0.26375428, 0.1457655, -0.19724406, 0.15548733}; - - projection_weights_ = { - -0.009802181, 0.09401916, 0.0717386, -0.13895074, - 0.09641832, 0.060420845, 0.08539281, 0.054285463, - 0.061395317, 0.034448683, -0.042991187, 0.019801661, - -0.16840284, -0.015726732, -0.23041931, -0.024478018, - -0.10959692, -0.013875541, 0.18600968, -0.061274476, - 0.0138165, -0.08160894, -0.07661644, 0.032372914, - 0.16169067, 0.22465782, -0.03993472, -0.004017731, - 0.08633481, -0.28869787, 0.08682067, 0.17240396, - 0.014975425, 0.056431185, 0.031037588, 0.16702051, - 0.0077946745, 0.15140012, 0.29405436, 0.120285, - -0.188994, -0.027265169, 0.043389652, -0.022061434, - 0.014777949, -0.20203483, 0.094781205, 0.19100232, - 0.13987629, -0.036132768, -0.06426278, -0.05108664, - 0.13221376, 0.009441198, -0.16715929, 0.15859416, - -0.040437475, 0.050779544, -0.022187516, 0.012166504, - 0.027685808, -0.07675938, -0.0055694645, -0.09444123, - 0.0046453946, 0.050794356, 0.10770313, -0.20790008, - -0.07149004, -0.11425117, 0.008225835, -0.035802525, - 0.14374903, 0.15262283, 0.048710253, 0.1847461, - -0.007487823, 0.11000021, -0.09542012, 0.22619456, - -0.029149994, 0.08527916, 0.009043713, 0.0042746216, - 0.016261552, 0.022461696, 0.12689082, -0.043589946, - -0.12035478, -0.08361797, -0.050666027, -0.1248618, - -0.1275799, -0.071875185, 0.07377272, 0.09944291, - -0.18897448, -0.1593054, -0.06526116, -0.040107165, - -0.004618631, -0.067624845, -0.007576253, 0.10727444, - 0.041546922, -0.20424393, 0.06907816, 0.050412357, - 0.00724631, 0.039827548, 0.12449835, 0.10747581, - 0.13708383, 0.09134148, -0.12617786, -0.06428341, - 0.09956831, 0.1208086, -0.14676677, -0.0727722, - 0.1126304, 0.010139365, 0.015571211, -0.038128063, - 0.022913318, -0.042050496, 0.16842307, -0.060597885, - 0.10531834, -0.06411776, -0.07451711, -0.03410368, - -0.13393489, 0.06534304, 0.003620307, 0.04490757, - 0.05970546, 0.05197996, 0.02839995, 0.10434969, - -0.013699693, -0.028353551, -0.07260381, 0.047201227, - -0.024575593, -0.036445823, 0.07155557, 0.009672501, - -0.02328883, 0.009533515, -0.03606021, -0.07421458, - -0.028082801, -0.2678904, -0.13221288, 0.18419984, - -0.13012612, -0.014588381, -0.035059117, -0.04824723, - 0.07830115, -0.056184657, 0.03277091, 0.025466874, - 0.14494097, -0.12522776, -0.098633975, -0.10766018, - -0.08317623, 0.08594209, 0.07749552, 0.039474737, - 0.1776665, -0.07409566, -0.0477268, 0.29323658, - 0.10801441, 0.1154011, 0.013952499, 0.10739139, - 0.10708251, -0.051456142, 0.0074137426, -0.10430189, - 0.10034707, 0.045594677, 0.0635285, -0.0715442, - -0.089667566, -0.10811871, 0.00026344223, 0.08298446, - -0.009525053, 0.006585689, -0.24567553, -0.09450807, - 0.09648481, 0.026996298, -0.06419476, -0.04752702, - -0.11063944, -0.23441927, -0.17608605, -0.052156363, - 0.067035615, 0.19271925, -0.0032889997, -0.043264326, - 0.09663576, -0.057112187, -0.10100678, 0.0628376, - 0.04447668, 0.017961001, -0.10094388, -0.10190601, - 0.18335468, 0.10494553, -0.052095775, -0.0026118709, - 0.10539724, -0.04383912, -0.042349473, 0.08438151, - -0.1947263, 0.02251204, 0.11216432, -0.10307853, - 0.17351969, -0.039091777, 0.08066188, -0.00561982, - 0.12633002, 0.11335965, -0.0088127935, -0.019777594, - 0.06864014, -0.059751723, 0.016233567, -0.06894641, - -0.28651384, -0.004228674, 0.019708522, -0.16305895, - -0.07468996, -0.0855457, 0.099339016, -0.07580735, - -0.13775392, 0.08434318, 0.08330512, -0.12131499, - 0.031935584, 0.09180414, -0.08876437, -0.08049874, - 0.008753825, 0.03498998, 0.030215185, 0.03907079, - 0.089751154, 0.029194152, -0.03337423, -0.019092513, - 0.04331237, 0.04299654, -0.036394123, -0.12915532, - 0.09793732, 0.07512415, -0.11319543, -0.032502122, - 0.15661901, 0.07671967, -0.005491124, -0.19379048, - -0.218606, 0.21448623, 0.017840758, 0.1416943, - -0.07051762, 0.19488361, 0.02664691, -0.18104725, - -0.09334311, 0.15026465, -0.15493552, -0.057762887, - -0.11604192, -0.262013, -0.01391798, 0.012185008, - 0.11156489, -0.07483202, 0.06693364, -0.26151478, - 0.046425626, 0.036540434, -0.16435726, 0.17338543, - -0.21401681, -0.11385144, -0.08283257, -0.069031075, - 0.030635102, 0.010969227, 0.11109743, 0.010919218, - 0.027526086, 0.13519906, 0.01891392, -0.046839405, - -0.040167913, 0.017953383, -0.09700955, 0.0061885654, - -0.07000971, 0.026893595, -0.038844477, 0.14543656}; - - lstm_input_ = { - {// Batch0: 4 (input_sequence_size) * 5 (n_input) - 0.787926, 0.151646, 0.071352, 0.118426, 0.458058, // step 0 - 0.596268, 0.998386, 0.568695, 0.864524, 0.571277, // step 1 - 0.073204, 0.296072, 0.743333, 0.069199, 0.045348, // step 2 - 0.867394, 0.291279, 0.013714, 0.482521, 0.626339}, // step 3 - - {// Batch1: 4 (input_sequence_size) * 5 (n_input) - 0.295743, 0.544053, 0.690064, 0.858138, 0.497181, // step 0 - 0.642421, 0.524260, 0.134799, 0.003639, 0.162482, // step 1 - 0.640394, 0.930399, 0.050782, 0.432485, 0.988078, // step 2 - 0.082922, 0.563329, 0.865614, 0.333232, 0.259916} // step 3 - }; - - lstm_golden_output_ = { - {// Batch0: 4 (input_sequence_size) * 16 (n_output) - -0.00396806, 0.029352, -0.00279226, 0.0159977, -0.00835576, - -0.0211779, 0.0283512, -0.0114597, 0.00907307, -0.0244004, - -0.0152191, -0.0259063, 0.00914318, 0.00415118, 0.017147, - 0.0134203, -0.0166936, 0.0381209, 0.000889694, 0.0143363, - -0.0328911, -0.0234288, 0.0333051, -0.012229, 0.0110322, - -0.0457725, -0.000832209, -0.0202817, 0.0327257, 0.0121308, - 0.0155969, 0.0312091, -0.0213783, 0.0350169, 0.000324794, - 0.0276012, -0.0263374, -0.0371449, 0.0446149, -0.0205474, - 0.0103729, -0.0576349, -0.0150052, -0.0292043, 0.0376827, - 0.0136115, 0.0243435, 0.0354492, -0.0189322, 0.0464512, - -0.00251373, 0.0225745, -0.0308346, -0.0317124, 0.0460407, - -0.0189395, 0.0149363, -0.0530162, -0.0150767, -0.0340193, - 0.0286833, 0.00824207, 0.0264887, 0.0305169}, - {// Batch1: 4 (input_sequence_size) * 16 (n_output) - -0.013869, 0.0287268, -0.00334693, 0.00733398, -0.0287926, - -0.0186926, 0.0193662, -0.0115437, 0.00422612, -0.0345232, - 0.00223253, -0.00957321, 0.0210624, 0.013331, 0.0150954, - 0.02168, -0.0141913, 0.0322082, 0.00227024, 0.0260507, - -0.0188721, -0.0296489, 0.0399134, -0.0160509, 0.0116039, - -0.0447318, -0.0150515, -0.0277406, 0.0316596, 0.0118233, - 0.0214762, 0.0293641, -0.0204549, 0.0450315, -0.00117378, - 0.0167673, -0.0375007, -0.0238314, 0.038784, -0.0174034, - 0.0131743, -0.0506589, -0.0048447, -0.0240239, 0.0325789, - 0.00790065, 0.0220157, 0.0333314, -0.0264787, 0.0387855, - -0.000764675, 0.0217599, -0.037537, -0.0335206, 0.0431679, - -0.0211424, 0.010203, -0.062785, -0.00832363, -0.025181, - 0.0412031, 0.0118723, 0.0239643, 0.0394009}}; - } -}; - -TEST_P(NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest, Test) { +TEST_P(LstmOpTest, NoCifg_Peephole_Projection_NoLayerNorm) { const int n_batch = 2; const int n_input = 5; const int n_cell = 20; @@ -1180,6 +564,559 @@ TEST_P(NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest, Test) { return; } + input_to_input_weights_ = { + 0.021393683, 0.06124551, 0.046905167, -0.014657677, -0.03149463, + 0.09171803, 0.14647801, 0.10797193, -0.0057968358, 0.0019193048, + -0.2726754, 0.10154029, -0.018539885, 0.080349885, -0.10262385, + -0.022599787, -0.09121155, -0.008675967, -0.045206103, -0.0821282, + -0.008045952, 0.015478081, 0.055217247, 0.038719587, 0.044153627, + -0.06453243, 0.05031825, -0.046935108, -0.008164439, 0.014574226, + -0.1671009, -0.15519552, -0.16819797, -0.13971269, -0.11953059, + 0.25005487, -0.22790983, 0.009855087, -0.028140958, -0.11200698, + 0.11295408, -0.0035217577, 0.054485075, 0.05184695, 0.064711206, + 0.10989193, 0.11674786, 0.03490607, 0.07727357, 0.11390585, + -0.1863375, -0.1034451, -0.13945189, -0.049401227, -0.18767063, + 0.042483903, 0.14233552, 0.13832581, 0.18350165, 0.14545603, + -0.028545704, 0.024939531, 0.050929718, 0.0076203286, -0.0029723682, + -0.042484224, -0.11827596, -0.09171104, -0.10808628, -0.16327988, + -0.2273378, -0.0993647, -0.017155107, 0.0023917493, 0.049272764, + 0.0038534778, 0.054764505, 0.089753784, 0.06947234, 0.08014476, + -0.04544234, -0.0497073, -0.07135631, -0.048929106, -0.004042012, + -0.009284026, 0.018042054, 0.0036860977, -0.07427302, -0.11434604, + -0.018995456, 0.031487543, 0.012834908, 0.019977754, 0.044256654, + -0.39292613, -0.18519334, -0.11651281, -0.06809892, 0.011373677}; + + input_to_forget_weights_ = { + -0.0018401089, -0.004852237, 0.03698424, 0.014181704, 0.028273236, + -0.016726194, -0.05249759, -0.10204261, 0.00861066, -0.040979505, + -0.009899187, 0.01923892, -0.028177269, -0.08535103, -0.14585495, + 0.10662567, -0.01909731, -0.017883534, -0.0047269356, -0.045103323, + 0.0030784295, 0.076784775, 0.07463696, 0.094531395, 0.0814421, + -0.12257899, -0.033945758, -0.031303465, 0.045630626, 0.06843887, + -0.13492945, -0.012480007, -0.0811829, -0.07224499, -0.09628791, + 0.045100946, 0.0012300825, 0.013964662, 0.099372394, 0.02543059, + 0.06958324, 0.034257296, 0.0482646, 0.06267997, 0.052625068, + 0.12784666, 0.07077897, 0.025725935, 0.04165009, 0.07241905, + 0.018668644, -0.037377294, -0.06277783, -0.08833636, -0.040120605, + -0.011405586, -0.007808335, -0.010301386, -0.005102167, 0.027717464, + 0.05483423, 0.11449111, 0.11289652, 0.10939839, 0.13396506, + -0.08402166, -0.01901462, -0.044678304, -0.07720565, 0.014350063, + -0.11757958, -0.0652038, -0.08185733, -0.076754324, -0.092614375, + 0.10405491, 0.052960336, 0.035755895, 0.035839386, -0.012540553, + 0.036881298, 0.02913376, 0.03420159, 0.05448447, -0.054523353, + 0.02582715, 0.02327355, -0.011857179, -0.0011980024, -0.034641717, + -0.026125094, -0.17582615, -0.15923657, -0.27486774, -0.0006143371, + 0.0001771948, -8.470171e-05, 0.02651807, 0.045790765, 0.06956496}; + + input_to_cell_weights_ = { + -0.04580283, -0.09549462, -0.032418985, -0.06454633, -0.043528453, + 0.043018587, -0.049152344, -0.12418144, -0.078985475, -0.07596889, + 0.019484362, -0.11434962, -0.0074034138, -0.06314844, -0.092981495, + 0.0062155537, -0.025034338, -0.0028890965, 0.048929527, 0.06235075, + 0.10665918, -0.032036792, -0.08505916, -0.10843358, -0.13002433, + -0.036816437, -0.02130134, -0.016518239, 0.0047691227, -0.0025825808, + 0.066017866, 0.029991534, -0.10652836, -0.1037554, -0.13056071, + -0.03266643, -0.033702414, -0.006473424, -0.04611692, 0.014419339, + -0.025174323, 0.0396852, 0.081777506, 0.06157468, 0.10210095, + -0.009658194, 0.046511717, 0.03603906, 0.0069369148, 0.015960095, + -0.06507666, 0.09551598, 0.053568836, 0.06408714, 0.12835667, + -0.008714329, -0.20211966, -0.12093674, 0.029450472, 0.2849013, + -0.029227901, 0.1164364, -0.08560263, 0.09941786, -0.036999565, + -0.028842626, -0.0033637602, -0.017012902, -0.09720865, -0.11193351, + -0.029155117, -0.017936034, -0.009768936, -0.04223324, -0.036159635, + 0.06505112, -0.021742892, -0.023377212, -0.07221364, -0.06430552, + 0.05453865, 0.091149814, 0.06387331, 0.007518393, 0.055960953, + 0.069779344, 0.046411168, 0.10509911, 0.07463894, 0.0075130584, + 0.012850982, 0.04555431, 0.056955688, 0.06555285, 0.050801456, + -0.009862683, 0.00826772, -0.026555609, -0.0073611983, -0.0014897042}; + + input_to_output_weights_ = { + -0.0998932, -0.07201956, -0.052803773, -0.15629593, -0.15001918, + -0.07650751, 0.02359855, -0.075155355, -0.08037709, -0.15093534, + 0.029517552, -0.04751393, 0.010350531, -0.02664851, -0.016839722, + -0.023121163, 0.0077019283, 0.012851257, -0.05040649, -0.0129761, + -0.021737747, -0.038305793, -0.06870586, -0.01481247, -0.001285394, + 0.10124236, 0.083122835, 0.053313006, -0.062235646, -0.075637154, + -0.027833903, 0.029774971, 0.1130802, 0.09218906, 0.09506135, + -0.086665764, -0.037162706, -0.038880914, -0.035832845, -0.014481564, + -0.09825003, -0.12048569, -0.097665586, -0.05287633, -0.0964047, + -0.11366429, 0.035777505, 0.13568819, 0.052451383, 0.050649304, + 0.05798951, -0.021852335, -0.099848844, 0.014740475, -0.078897946, + 0.04974699, 0.014160473, 0.06973932, 0.04964942, 0.033364646, + 0.08190124, 0.025535367, 0.050893165, 0.048514254, 0.06945813, + -0.078907564, -0.06707616, -0.11844508, -0.09986688, -0.07509403, + 0.06263226, 0.14925587, 0.20188436, 0.12098451, 0.14639415, + 0.0015017595, -0.014267382, -0.03417257, 0.012711468, 0.0028300495, + -0.024758482, -0.05098548, -0.0821182, 0.014225672, 0.021544158, + 0.08949725, 0.07505268, -0.0020780868, 0.04908258, 0.06476295, + -0.022907063, 0.027562456, 0.040185735, 0.019567577, -0.015598739, + -0.049097303, -0.017121866, -0.083368234, -0.02332002, -0.0840956}; + + input_gate_bias_ = {0.02234832, 0.14757581, 0.18176508, 0.10380666, + 0.053110216, -0.06928846, -0.13942584, -0.11816189, + 0.19483899, 0.03652339, -0.10250295, 0.036714908, + -0.18426876, 0.036065217, 0.21810818, 0.02383196, + -0.043370757, 0.08690144, -0.04444982, 0.00030581196}; + + forget_gate_bias_ = {0.035185695, -0.042891346, -0.03032477, 0.23027696, + 0.11098921, 0.15378423, 0.09263801, 0.09790885, + 0.09508917, 0.061199076, 0.07665568, -0.015443159, + -0.03499149, 0.046190713, 0.08895977, 0.10899629, + 0.40694186, 0.06030037, 0.012413437, -0.06108739}; + + cell_gate_bias_ = {-0.024379363, 0.0055531194, 0.23377132, 0.033463873, + -0.1483596, -0.10639995, -0.091433935, 0.058573797, + -0.06809782, -0.07889636, -0.043246906, -0.09829136, + -0.4279842, 0.034901652, 0.18797937, 0.0075234566, + 0.016178843, 0.1749513, 0.13975595, 0.92058027}; + + output_gate_bias_ = {0.046159424, -0.0012809046, 0.03563469, 0.12648113, + 0.027195795, 0.35373217, -0.018957434, 0.008907322, + -0.0762701, 0.12018895, 0.04216877, 0.0022856654, + 0.040952638, 0.3147856, 0.08225149, -0.057416286, + -0.14995944, -0.008040261, 0.13208859, 0.029760877}; + + recurrent_to_input_weights_ = { + -0.001374326, -0.078856036, 0.10672688, 0.029162422, + -0.11585556, 0.02557986, -0.13446963, -0.035785314, + -0.01244275, 0.025961924, -0.02337298, -0.044228926, + -0.055839065, -0.046598054, -0.010546039, -0.06900766, + 0.027239809, 0.022582639, -0.013296484, -0.05459212, + 0.08981, -0.045407712, 0.08682226, -0.06867011, + -0.14390695, -0.02916037, 0.000996957, 0.091420636, + 0.14283475, -0.07390571, -0.06402044, 0.062524505, + -0.093129106, 0.04860203, -0.08364217, -0.08119002, + 0.009352075, 0.22920375, 0.0016303885, 0.11583097, + -0.13732095, 0.012405723, -0.07551853, 0.06343048, + 0.12162708, -0.031923793, -0.014335606, 0.01790974, + -0.10650317, -0.0724401, 0.08554849, -0.05727212, + 0.06556731, -0.042729504, -0.043227166, 0.011683251, + -0.013082158, -0.029302018, -0.010899579, -0.062036745, + -0.022509435, -0.00964907, -0.01567329, 0.04260106, + -0.07787477, -0.11576462, 0.017356863, 0.048673786, + -0.017577527, -0.05527947, -0.082487635, -0.040137455, + -0.10820036, -0.04666372, 0.022746278, -0.07851417, + 0.01068115, 0.032956902, 0.022433773, 0.0026891115, + 0.08944216, -0.0685835, 0.010513544, 0.07228705, + 0.02032331, -0.059686817, -0.0005566496, -0.086984694, + 0.040414046, -0.1380399, 0.094208956, -0.05722982, + 0.012092817, -0.04989123, -0.086576, -0.003399834, + -0.04696032, -0.045747425, 0.10091314, 0.048676282, + -0.029037097, 0.031399418, -0.0040285117, 0.047237843, + 0.09504992, 0.041799378, -0.049185462, -0.031518843, + -0.10516937, 0.026374253, 0.10058866, -0.0033195973, + -0.041975245, 0.0073591834, 0.0033782164, -0.004325073, + -0.10167381, 0.042500053, -0.01447153, 0.06464186, + -0.017142897, 0.03312627, 0.009205989, 0.024138335, + -0.011337001, 0.035530265, -0.010912711, 0.0706555, + -0.005894094, 0.051841937, -0.1401738, -0.02351249, + 0.0365468, 0.07590991, 0.08838724, 0.021681072, + -0.10086113, 0.019608743, -0.06195883, 0.077335775, + 0.023646897, -0.095322326, 0.02233014, 0.09756986, + -0.048691444, -0.009579111, 0.07595467, 0.11480546, + -0.09801813, 0.019894179, 0.08502348, 0.004032281, + 0.037211012, 0.068537936, -0.048005626, -0.091520436, + -0.028379958, -0.01556313, 0.06554592, -0.045599163, + -0.01672207, -0.020169014, -0.011877351, -0.20212261, + 0.010889619, 0.0047078193, 0.038385306, 0.08540671, + -0.017140968, -0.0035865551, 0.016678626, 0.005633034, + 0.015963363, 0.00871737, 0.060130805, 0.028611384, + 0.10109069, -0.015060172, -0.07894427, 0.06401885, + 0.011584063, -0.024466386, 0.0047652307, -0.09041358, + 0.030737216, -0.0046374933, 0.14215417, -0.11823516, + 0.019899689, 0.006106124, -0.027092824, 0.0786356, + 0.05052217, -0.058925, -0.011402121, -0.024987547, + -0.0013661642, -0.06832946, -0.015667673, -0.1083353, + -0.00096863037, -0.06988685, -0.053350925, -0.027275559, + -0.033664223, -0.07978348, -0.025200296, -0.017207067, + -0.058403496, -0.055697463, 0.005798788, 0.12965427, + -0.062582195, 0.0013350133, -0.10482091, 0.0379771, + 0.072521195, -0.0029455067, -0.13797039, -0.03628521, + 0.013806405, -0.017858358, -0.01008298, -0.07700066, + -0.017081132, 0.019358726, 0.0027079724, 0.004635139, + 0.062634714, -0.02338735, -0.039547626, -0.02050681, + 0.03385117, -0.083611414, 0.002862572, -0.09421313, + 0.058618143, -0.08598433, 0.00972939, 0.023867095, + -0.053934585, -0.023203006, 0.07452513, -0.048767887, + -0.07314807, -0.056307215, -0.10433547, -0.06440842, + 0.04328182, 0.04389765, -0.020006588, -0.09076438, + -0.11652589, -0.021705797, 0.03345259, -0.010329105, + -0.025767034, 0.013057034, -0.07316461, -0.10145612, + 0.06358255, 0.18531723, 0.07759293, 0.12006465, + 0.1305557, 0.058638252, -0.03393652, 0.09622831, + -0.16253184, -2.4580743e-06, 0.079869635, -0.070196845, + -0.005644518, 0.06857898, -0.12598175, -0.035084512, + 0.03156317, -0.12794146, -0.031963028, 0.04692781, + 0.030070418, 0.0071660685, -0.095516115, -0.004643372, + 0.040170413, -0.062104587, -0.0037324072, 0.0554317, + 0.08184801, -0.019164372, 0.06791302, 0.034257166, + -0.10307039, 0.021943003, 0.046745934, 0.0790918, + -0.0265588, -0.007824208, 0.042546265, -0.00977924, + -0.0002440307, -0.017384544, -0.017990116, 0.12252321, + -0.014512694, -0.08251313, 0.08861942, 0.13589665, + 0.026351685, 0.012641483, 0.07466548, 0.044301085, + -0.045414884, -0.051112458, 0.03444247, -0.08502782, + -0.04106223, -0.028126027, 0.028473156, 0.10467447}; + + recurrent_to_cell_weights_ = { + -0.037322544, 0.018592842, 0.0056175636, -0.06253426, + 0.055647098, -0.05713207, -0.05626563, 0.005559383, + 0.03375411, -0.025757805, -0.088049285, 0.06017052, + -0.06570978, 0.007384076, 0.035123326, -0.07920549, + 0.053676967, 0.044480428, -0.07663568, 0.0071805613, + 0.08089997, 0.05143358, 0.038261272, 0.03339287, + -0.027673481, 0.044746667, 0.028349208, 0.020090483, + -0.019443132, -0.030755889, -0.0040000007, 0.04465846, + -0.021585021, 0.0031670958, 0.0053199246, -0.056117613, + -0.10893326, 0.076739706, -0.08509834, -0.027997585, + 0.037871376, 0.01449768, -0.09002357, -0.06111149, + -0.046195522, 0.0422062, -0.005683705, -0.1253618, + -0.012925729, -0.04890792, 0.06985068, 0.037654128, + 0.03398274, -0.004781977, 0.007032333, -0.031787455, + 0.010868644, -0.031489216, 0.09525667, 0.013939797, + 0.0058680447, 0.0167067, 0.02668468, -0.04797466, + -0.048885044, -0.12722108, 0.035304096, 0.06554885, + 0.00972396, -0.039238118, -0.05159735, -0.11329045, + 0.1613692, -0.03750952, 0.06529313, -0.071974665, + -0.11769596, 0.015524369, -0.0013754242, -0.12446318, + 0.02786344, -0.014179351, 0.005264273, 0.14376344, + 0.015983658, 0.03406988, -0.06939408, 0.040699873, + 0.02111075, 0.09669095, 0.041345075, -0.08316494, + -0.07684199, -0.045768797, 0.032298047, -0.041805092, + 0.0119405, 0.0061010392, 0.12652606, 0.0064572375, + -0.024950314, 0.11574242, 0.04508852, -0.04335324, + 0.06760663, -0.027437469, 0.07216407, 0.06977076, + -0.05438599, 0.034033038, -0.028602652, 0.05346137, + 0.043184172, -0.037189785, 0.10420091, 0.00882477, + -0.054019816, -0.074273005, -0.030617684, -0.0028467078, + 0.024302477, -0.0038869337, 0.005332455, 0.0013399826, + 0.04361412, -0.007001822, 0.09631092, -0.06702025, + -0.042049985, -0.035070654, -0.04103342, -0.10273396, + 0.0544271, 0.037184782, -0.13150354, -0.0058036847, + -0.008264958, 0.042035464, 0.05891794, 0.029673764, + 0.0063542654, 0.044788733, 0.054816857, 0.062257513, + -0.00093483756, 0.048938446, -0.004952862, -0.007730018, + -0.04043371, -0.017094059, 0.07229206, -0.023670016, + -0.052195564, -0.025616996, -0.01520939, 0.045104615, + -0.007376126, 0.003533447, 0.006570588, 0.056037236, + 0.12436656, 0.051817212, 0.028532185, -0.08686856, + 0.11868599, 0.07663395, -0.07323171, 0.03463402, + -0.050708205, -0.04458982, -0.11590894, 0.021273347, + 0.1251325, -0.15313013, -0.12224372, 0.17228661, + 0.023029093, 0.086124025, 0.006445803, -0.03496501, + 0.028332196, 0.04449512, -0.042436164, -0.026587414, + -0.006041347, -0.09292539, -0.05678812, 0.03897832, + 0.09465633, 0.008115513, -0.02171956, 0.08304309, + 0.071401566, 0.019622514, 0.032163795, -0.004167056, + 0.02295182, 0.030739572, 0.056506045, 0.004612461, + 0.06524936, 0.059999723, 0.046395954, -0.0045512207, + -0.1335546, -0.030136576, 0.11584653, -0.014678886, + 0.0020118146, -0.09688814, -0.0790206, 0.039770417, + -0.0329582, 0.07922767, 0.029322514, 0.026405897, + 0.04207835, -0.07073373, 0.063781224, 0.0859677, + -0.10925287, -0.07011058, 0.048005477, 0.03438226, + -0.09606514, -0.006669445, -0.043381985, 0.04240257, + -0.06955775, -0.06769346, 0.043903265, -0.026784198, + -0.017840602, 0.024307009, -0.040079936, -0.019946516, + 0.045318738, -0.12233574, 0.026170589, 0.0074471775, + 0.15978073, 0.10185836, 0.10298046, -0.015476589, + -0.039390966, -0.072174534, 0.0739445, -0.1211869, + -0.0347889, -0.07943156, 0.014809798, -0.12412325, + -0.0030663363, 0.039695457, 0.0647603, -0.08291318, + -0.018529687, -0.004423833, 0.0037507233, 0.084633216, + -0.01514876, -0.056505352, -0.012800942, -0.06994386, + 0.012962922, -0.031234352, 0.07029052, 0.016418684, + 0.03618972, 0.055686004, -0.08663945, -0.017404709, + -0.054761406, 0.029065743, 0.052404847, 0.020238016, + 0.0048197987, -0.0214882, 0.07078733, 0.013016777, + 0.06262858, 0.009184685, 0.020785125, -0.043904778, + -0.0270329, -0.03299152, -0.060088247, -0.015162964, + -0.001828936, 0.12642565, -0.056757294, 0.013586685, + 0.09232601, -0.035886683, 0.06000002, 0.05229691, + -0.052580316, -0.082029596, -0.010794592, 0.012947712, + -0.036429964, -0.085508935, -0.13127148, -0.017744139, + 0.031502828, 0.036232427, -0.031581745, 0.023051167, + -0.05325106, -0.03421577, 0.028793324, -0.034633752, + -0.009881397, -0.043551125, -0.018609839, 0.0019097115, + -0.008799762, 0.056595087, 0.0022273948, 0.055752404}; + + recurrent_to_forget_weights_ = { + -0.057784554, -0.026057621, -0.068447545, -0.022581743, + 0.14811787, 0.10826372, 0.09471067, 0.03987225, + -0.0039523416, 0.00030638507, 0.053185795, 0.10572994, + 0.08414449, -0.022036452, -0.00066928595, -0.09203576, + 0.032950465, -0.10985798, -0.023809856, 0.0021431844, + -0.02196096, -0.00326074, 0.00058621005, -0.074678116, + -0.06193199, 0.055729095, 0.03736828, 0.020123724, + 0.061878487, -0.04729229, 0.034919553, -0.07585433, + -0.04421272, -0.044019096, 0.085488975, 0.04058006, + -0.06890133, -0.030951202, -0.024628663, -0.07672815, + 0.034293607, 0.08556707, -0.05293577, -0.033561368, + -0.04899627, 0.0241671, 0.015736353, -0.095442444, + -0.029564252, 0.016493602, -0.035026584, 0.022337519, + -0.026871363, 0.004780428, 0.0077918363, -0.03601621, + 0.016435321, -0.03263031, -0.09543275, -0.047392778, + 0.013454138, 0.028934088, 0.01685226, -0.086110644, + -0.046250615, -0.01847454, 0.047608484, 0.07339695, + 0.034546845, -0.04881143, 0.009128804, -0.08802852, + 0.03761666, 0.008096139, -0.014454086, 0.014361001, + -0.023502491, -0.0011840804, -0.07607001, 0.001856849, + -0.06509276, -0.006021153, -0.08570962, -0.1451793, + 0.060212336, 0.055259194, 0.06974018, 0.049454916, + -0.027794661, -0.08077226, -0.016179763, 0.1169753, + 0.17213494, -0.0056326236, -0.053934924, -0.0124349, + -0.11520337, 0.05409887, 0.088759385, 0.0019655675, + 0.0042065294, 0.03881498, 0.019844765, 0.041858196, + -0.05695512, 0.047233116, 0.038937137, -0.06542224, + 0.014429736, -0.09719407, 0.13908425, -0.05379757, + 0.012321099, 0.082840554, -0.029899208, 0.044217527, + 0.059855383, 0.07711018, -0.045319796, 0.0948846, + -0.011724666, -0.0033288454, -0.033542685, -0.04764985, + -0.13873616, 0.040668588, 0.034832682, -0.015319203, + -0.018715994, 0.046002675, 0.0599172, -0.043107376, + 0.0294216, -0.002314414, -0.022424703, 0.0030315618, + 0.0014641669, 0.0029166266, -0.11878115, 0.013738511, + 0.12375372, -0.0006038222, 0.029104086, 0.087442465, + 0.052958444, 0.07558703, 0.04817258, 0.044462286, + -0.015213451, -0.08783778, -0.0561384, -0.003008196, + 0.047060397, -0.002058388, 0.03429439, -0.018839769, + 0.024734668, 0.024614193, -0.042046934, 0.09597743, + -0.0043254104, 0.04320769, 0.0064070094, -0.0019131786, + -0.02558259, -0.022822596, -0.023273505, -0.02464396, + -0.10991725, -0.006240552, 0.0074488563, 0.024044557, + 0.04383914, -0.046476185, 0.028658995, 0.060410924, + 0.050786525, 0.009452605, -0.0073054377, -0.024810238, + 0.0052906186, 0.0066939713, -0.0020913032, 0.014515517, + 0.015898481, 0.021362653, -0.030262267, 0.016587038, + -0.011442813, 0.041154444, -0.007631438, -0.03423484, + -0.010977775, 0.036152758, 0.0066366293, 0.11915515, + 0.02318443, -0.041350313, 0.021485701, -0.10906167, + -0.028218046, -0.00954771, 0.020531068, -0.11995105, + -0.03672871, 0.024019798, 0.014255957, -0.05221243, + -0.00661567, -0.04630967, 0.033188973, 0.10107534, + -0.014027541, 0.030796422, -0.10270911, -0.035999842, + 0.15443139, 0.07684145, 0.036571592, -0.035900835, + -0.0034699554, 0.06209149, 0.015920248, -0.031122351, + -0.03858649, 0.01849943, 0.13872518, 0.01503974, + 0.069941424, -0.06948533, -0.0088794185, 0.061282158, + -0.047401894, 0.03100163, -0.041533746, -0.10430945, + 0.044574402, -0.01425562, -0.024290353, 0.034563623, + 0.05866852, 0.023947537, -0.09445152, 0.035450947, + 0.02247216, -0.0042998926, 0.061146557, -0.10250651, + 0.020881841, -0.06747029, 0.10062043, -0.0023941975, + 0.03532124, -0.016341697, 0.09685456, -0.016764693, + 0.051808182, 0.05875331, -0.04536488, 0.001626336, + -0.028892258, -0.01048663, -0.009793449, -0.017093895, + 0.010987891, 0.02357273, -0.00010856845, 0.0099760275, + -0.001845119, -0.03551521, 0.0018358806, 0.05763657, + -0.01769146, 0.040995963, 0.02235177, -0.060430344, + 0.11475477, -0.023854522, 0.10071741, 0.0686208, + -0.014250481, 0.034261297, 0.047418304, 0.08562733, + -0.030519066, 0.0060542435, 0.014653856, -0.038836084, + 0.04096551, 0.032249358, -0.08355519, -0.026823482, + 0.056386515, -0.010401743, -0.028396193, 0.08507674, + 0.014410365, 0.020995233, 0.17040324, 0.11511526, + 0.02459721, 0.0066619175, 0.025853224, -0.023133837, + -0.081302024, 0.017264642, -0.009585969, 0.09491168, + -0.051313367, 0.054532815, -0.014298593, 0.10657464, + 0.007076659, 0.10964551, 0.0409152, 0.008275321, + -0.07283536, 0.07937492, 0.04192024, -0.1075027}; + + recurrent_to_output_weights_ = { + 0.025825322, -0.05813119, 0.09495884, -0.045984812, -0.01255415, + -0.0026479573, -0.08196161, -0.054914974, -0.0046604523, -0.029587349, + -0.044576716, -0.07480124, -0.082868785, 0.023254942, 0.027502948, + -0.0039728214, -0.08683098, -0.08116779, -0.014675607, -0.037924774, + -0.023314456, -0.007401714, -0.09255757, 0.029460307, -0.08829125, + -0.005139627, -0.08989442, -0.0555066, 0.13596267, -0.025062224, + -0.048351806, -0.03850004, 0.07266485, -0.022414139, 0.05940088, + 0.075114764, 0.09597592, -0.010211725, -0.0049794707, -0.011523867, + -0.025980417, 0.072999895, 0.11091378, -0.081685916, 0.014416728, + 0.043229222, 0.034178585, -0.07530371, 0.035837382, -0.085607, + -0.007721233, -0.03287832, -0.043848954, -0.06404588, -0.06632928, + -0.073643476, 0.008214239, -0.045984086, 0.039764922, 0.03474462, + 0.060612556, -0.080590084, 0.049127717, 0.04151091, -0.030063879, + 0.008801774, -0.023021035, -0.019558564, 0.05158114, -0.010947698, + -0.011825728, 0.0075720972, 0.0699727, -0.0039981045, 0.069350146, + 0.08799282, 0.016156472, 0.035502106, 0.11695009, 0.006217345, + 0.13392477, -0.037875112, 0.025745004, 0.08940699, -0.00924166, + 0.0046702605, -0.036598757, -0.08811812, 0.10522024, -0.032441203, + 0.008176899, -0.04454919, 0.07058152, 0.0067963637, 0.039206743, + 0.03259838, 0.03725492, -0.09515802, 0.013326398, -0.052055415, + -0.025676316, 0.03198509, -0.015951829, -0.058556724, 0.036879618, + 0.043357447, 0.028362012, -0.05908629, 0.0059240665, -0.04995891, + -0.019187413, 0.0276265, -0.01628143, 0.0025863599, 0.08800015, + 0.035250366, -0.022165963, -0.07328642, -0.009415526, -0.07455109, + 0.11690406, 0.0363299, 0.07411125, 0.042103454, -0.009660886, + 0.019076364, 0.018299393, -0.046004917, 0.08891175, 0.0431396, + -0.026327137, -0.051502608, 0.08979574, -0.051670972, 0.04940282, + -0.07491107, -0.021240504, 0.022596184, -0.034280192, 0.060163025, + -0.058211457, -0.051837247, -0.01349775, -0.04639988, -0.035936575, + -0.011681591, 0.064818054, 0.0073146066, -0.021745546, -0.043124277, + -0.06471268, -0.07053354, -0.029321948, -0.05330136, 0.016933719, + -0.053782392, 0.13747959, -0.1361751, -0.11569455, 0.0033329215, + 0.05693899, -0.053219706, 0.063698, 0.07977434, -0.07924483, + 0.06936997, 0.0034815092, -0.007305279, -0.037325785, -0.07251102, + -0.033633437, -0.08677009, 0.091591336, -0.14165086, 0.021752775, + 0.019683983, 0.0011612234, -0.058154266, 0.049996935, 0.0288841, + -0.0024567875, -0.14345716, 0.010955264, -0.10234828, 0.1183656, + -0.0010731248, -0.023590032, -0.072285876, -0.0724771, -0.026382286, + -0.0014920527, 0.042667855, 0.0018776858, 0.02986552, 0.009814309, + 0.0733756, 0.12289186, 0.018043943, -0.0458958, 0.049412545, + 0.033632483, 0.05495232, 0.036686596, -0.013781798, -0.010036754, + 0.02576849, -0.08307328, 0.010112348, 0.042521734, -0.05869831, + -0.071689695, 0.03876447, -0.13275425, -0.0352966, -0.023077697, + 0.10285965, 0.084736146, 0.15568255, -0.00040734606, 0.027835453, + -0.10292561, -0.032401145, 0.10053256, -0.026142767, -0.08271222, + -0.0030240538, -0.016368777, 0.1070414, 0.042672627, 0.013456989, + -0.0437609, -0.022309763, 0.11576483, 0.04108048, 0.061026827, + -0.0190714, -0.0869359, 0.037901703, 0.0610107, 0.07202949, + 0.01675338, 0.086139716, -0.08795751, -0.014898893, -0.023771819, + -0.01965048, 0.007955471, -0.043740474, 0.03346837, -0.10549954, + 0.090567775, 0.042013682, -0.03176985, 0.12569028, -0.02421228, + -0.029526481, 0.023851605, 0.031539805, 0.05292009, -0.02344001, + -0.07811758, -0.08834428, 0.10094801, 0.16594367, -0.06861939, + -0.021256343, -0.041093912, -0.06669611, 0.035498552, 0.021757556, + -0.09302526, -0.015403468, -0.06614931, -0.051798206, -0.013874718, + 0.03630673, 0.010412845, -0.08077351, 0.046185967, 0.0035662893, + 0.03541868, -0.094149634, -0.034814864, 0.003128424, -0.020674974, + -0.03944324, -0.008110165, -0.11113267, 0.08484226, 0.043586485, + 0.040582247, 0.0968012, -0.065249965, -0.028036479, 0.0050708856, + 0.0017462453, 0.0326779, 0.041296225, 0.09164146, -0.047743853, + -0.015952192, -0.034451712, 0.084197424, -0.05347844, -0.11768019, + 0.085926116, -0.08251791, -0.045081906, 0.0948852, 0.068401024, + 0.024856757, 0.06978981, -0.057309967, -0.012775832, -0.0032452994, + 0.01977615, -0.041040014, -0.024264973, 0.063464895, 0.05431621, + }; + + cell_to_input_weights_ = { + 0.040369894, 0.030746894, 0.24704495, 0.018586371, -0.037586458, + -0.15312155, -0.11812848, -0.11465643, 0.20259799, 0.11418174, + -0.10116027, -0.011334949, 0.12411352, -0.076769054, -0.052169047, + 0.21198851, -0.38871562, -0.09061183, -0.09683246, -0.21929175}; + + cell_to_forget_weights_ = { + -0.01998659, -0.15568835, -0.24248174, -0.012770197, 0.041331276, + -0.072311886, -0.052123554, -0.0066330447, -0.043891653, 0.036225766, + -0.047248036, 0.021479502, 0.033189066, 0.11952997, -0.020432774, + 0.64658105, -0.06650122, -0.03467612, 0.095340036, 0.23647355}; + + cell_to_output_weights_ = {0.08286371, -0.08261836, -0.51210177, 0.002913762, + 0.17764764, -0.5495371, -0.08460716, -0.24552552, + 0.030037103, 0.04123544, -0.11940523, 0.007358328, + 0.1890978, 0.4833202, -0.34441817, 0.36312827, + -0.26375428, 0.1457655, -0.19724406, 0.15548733}; + + projection_weights_ = { + -0.009802181, 0.09401916, 0.0717386, -0.13895074, 0.09641832, + 0.060420845, 0.08539281, 0.054285463, 0.061395317, 0.034448683, + -0.042991187, 0.019801661, -0.16840284, -0.015726732, -0.23041931, + -0.024478018, -0.10959692, -0.013875541, 0.18600968, -0.061274476, + 0.0138165, -0.08160894, -0.07661644, 0.032372914, 0.16169067, + 0.22465782, -0.03993472, -0.004017731, 0.08633481, -0.28869787, + 0.08682067, 0.17240396, 0.014975425, 0.056431185, 0.031037588, + 0.16702051, 0.0077946745, 0.15140012, 0.29405436, 0.120285, + -0.188994, -0.027265169, 0.043389652, -0.022061434, 0.014777949, + -0.20203483, 0.094781205, 0.19100232, 0.13987629, -0.036132768, + -0.06426278, -0.05108664, 0.13221376, 0.009441198, -0.16715929, + 0.15859416, -0.040437475, 0.050779544, -0.022187516, 0.012166504, + 0.027685808, -0.07675938, -0.0055694645, -0.09444123, 0.0046453946, + 0.050794356, 0.10770313, -0.20790008, -0.07149004, -0.11425117, + 0.008225835, -0.035802525, 0.14374903, 0.15262283, 0.048710253, + 0.1847461, -0.007487823, 0.11000021, -0.09542012, 0.22619456, + -0.029149994, 0.08527916, 0.009043713, 0.0042746216, 0.016261552, + 0.022461696, 0.12689082, -0.043589946, -0.12035478, -0.08361797, + -0.050666027, -0.1248618, -0.1275799, -0.071875185, 0.07377272, + 0.09944291, -0.18897448, -0.1593054, -0.06526116, -0.040107165, + -0.004618631, -0.067624845, -0.007576253, 0.10727444, 0.041546922, + -0.20424393, 0.06907816, 0.050412357, 0.00724631, 0.039827548, + 0.12449835, 0.10747581, 0.13708383, 0.09134148, -0.12617786, + -0.06428341, 0.09956831, 0.1208086, -0.14676677, -0.0727722, + 0.1126304, 0.010139365, 0.015571211, -0.038128063, 0.022913318, + -0.042050496, 0.16842307, -0.060597885, 0.10531834, -0.06411776, + -0.07451711, -0.03410368, -0.13393489, 0.06534304, 0.003620307, + 0.04490757, 0.05970546, 0.05197996, 0.02839995, 0.10434969, + -0.013699693, -0.028353551, -0.07260381, 0.047201227, -0.024575593, + -0.036445823, 0.07155557, 0.009672501, -0.02328883, 0.009533515, + -0.03606021, -0.07421458, -0.028082801, -0.2678904, -0.13221288, + 0.18419984, -0.13012612, -0.014588381, -0.035059117, -0.04824723, + 0.07830115, -0.056184657, 0.03277091, 0.025466874, 0.14494097, + -0.12522776, -0.098633975, -0.10766018, -0.08317623, 0.08594209, + 0.07749552, 0.039474737, 0.1776665, -0.07409566, -0.0477268, + 0.29323658, 0.10801441, 0.1154011, 0.013952499, 0.10739139, + 0.10708251, -0.051456142, 0.0074137426, -0.10430189, 0.10034707, + 0.045594677, 0.0635285, -0.0715442, -0.089667566, -0.10811871, + 0.00026344223, 0.08298446, -0.009525053, 0.006585689, -0.24567553, + -0.09450807, 0.09648481, 0.026996298, -0.06419476, -0.04752702, + -0.11063944, -0.23441927, -0.17608605, -0.052156363, 0.067035615, + 0.19271925, -0.0032889997, -0.043264326, 0.09663576, -0.057112187, + -0.10100678, 0.0628376, 0.04447668, 0.017961001, -0.10094388, + -0.10190601, 0.18335468, 0.10494553, -0.052095775, -0.0026118709, + 0.10539724, -0.04383912, -0.042349473, 0.08438151, -0.1947263, + 0.02251204, 0.11216432, -0.10307853, 0.17351969, -0.039091777, + 0.08066188, -0.00561982, 0.12633002, 0.11335965, -0.0088127935, + -0.019777594, 0.06864014, -0.059751723, 0.016233567, -0.06894641, + -0.28651384, -0.004228674, 0.019708522, -0.16305895, -0.07468996, + -0.0855457, 0.099339016, -0.07580735, -0.13775392, 0.08434318, + 0.08330512, -0.12131499, 0.031935584, 0.09180414, -0.08876437, + -0.08049874, 0.008753825, 0.03498998, 0.030215185, 0.03907079, + 0.089751154, 0.029194152, -0.03337423, -0.019092513, 0.04331237, + 0.04299654, -0.036394123, -0.12915532, 0.09793732, 0.07512415, + -0.11319543, -0.032502122, 0.15661901, 0.07671967, -0.005491124, + -0.19379048, -0.218606, 0.21448623, 0.017840758, 0.1416943, + -0.07051762, 0.19488361, 0.02664691, -0.18104725, -0.09334311, + 0.15026465, -0.15493552, -0.057762887, -0.11604192, -0.262013, + -0.01391798, 0.012185008, 0.11156489, -0.07483202, 0.06693364, + -0.26151478, 0.046425626, 0.036540434, -0.16435726, 0.17338543, + -0.21401681, -0.11385144, -0.08283257, -0.069031075, 0.030635102, + 0.010969227, 0.11109743, 0.010919218, 0.027526086, 0.13519906, + 0.01891392, -0.046839405, -0.040167913, 0.017953383, -0.09700955, + 0.0061885654, -0.07000971, 0.026893595, -0.038844477, 0.14543656}; + + lstm_input_ = { + {// Batch0: 4 (input_sequence_size) * 5 (n_input) + 0.787926, 0.151646, 0.071352, 0.118426, 0.458058, // step 0 + 0.596268, 0.998386, 0.568695, 0.864524, 0.571277, // step 1 + 0.073204, 0.296072, 0.743333, 0.069199, 0.045348, // step 2 + 0.867394, 0.291279, 0.013714, 0.482521, 0.626339}, // step 3 + + {// Batch1: 4 (input_sequence_size) * 5 (n_input) + 0.295743, 0.544053, 0.690064, 0.858138, 0.497181, // step 0 + 0.642421, 0.524260, 0.134799, 0.003639, 0.162482, // step 1 + 0.640394, 0.930399, 0.050782, 0.432485, 0.988078, // step 2 + 0.082922, 0.563329, 0.865614, 0.333232, 0.259916} // step 3 + }; + + lstm_golden_output_ = { + {// Batch0: 4 (input_sequence_size) * 16 (n_output) + -0.00396806, 0.029352, -0.00279226, 0.0159977, -0.00835576, + -0.0211779, 0.0283512, -0.0114597, 0.00907307, -0.0244004, + -0.0152191, -0.0259063, 0.00914318, 0.00415118, 0.017147, + 0.0134203, -0.0166936, 0.0381209, 0.000889694, 0.0143363, + -0.0328911, -0.0234288, 0.0333051, -0.012229, 0.0110322, + -0.0457725, -0.000832209, -0.0202817, 0.0327257, 0.0121308, + 0.0155969, 0.0312091, -0.0213783, 0.0350169, 0.000324794, + 0.0276012, -0.0263374, -0.0371449, 0.0446149, -0.0205474, + 0.0103729, -0.0576349, -0.0150052, -0.0292043, 0.0376827, + 0.0136115, 0.0243435, 0.0354492, -0.0189322, 0.0464512, + -0.00251373, 0.0225745, -0.0308346, -0.0317124, 0.0460407, + -0.0189395, 0.0149363, -0.0530162, -0.0150767, -0.0340193, + 0.0286833, 0.00824207, 0.0264887, 0.0305169}, + {// Batch1: 4 (input_sequence_size) * 16 (n_output) + -0.013869, 0.0287268, -0.00334693, 0.00733398, -0.0287926, + -0.0186926, 0.0193662, -0.0115437, 0.00422612, -0.0345232, + 0.00223253, -0.00957321, 0.0210624, 0.013331, 0.0150954, + 0.02168, -0.0141913, 0.0322082, 0.00227024, 0.0260507, + -0.0188721, -0.0296489, 0.0399134, -0.0160509, 0.0116039, + -0.0447318, -0.0150515, -0.0277406, 0.0316596, 0.0118233, + 0.0214762, 0.0293641, -0.0204549, 0.0450315, -0.00117378, + 0.0167673, -0.0375007, -0.0238314, 0.038784, -0.0174034, + 0.0131743, -0.0506589, -0.0048447, -0.0240239, 0.0325789, + 0.00790065, 0.0220157, 0.0333314, -0.0264787, 0.0387855, + -0.000764675, 0.0217599, -0.037537, -0.0335206, 0.0431679, + -0.0211424, 0.010203, -0.062785, -0.00832363, -0.025181, + 0.0412031, 0.0118723, 0.0239643, 0.0394009}}; + LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, @@ -1195,86 +1132,7 @@ TEST_P(NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest, Test) { VerifyGoldens(&lstm, tolerance_per_type->at(weight_type)); } -class NoCifg_Peephole_Projection_LayerNorm_LstmOpTest : public BaseLstmOpTest { - void SetUp() override { - input_to_input_weights_ = {0.5, 0.6, 0.7, -0.8, -0.9, 0.1, 0.2, - 0.3, -0.4, 0.5, -0.8, 0.7, -0.6, 0.5, - -0.4, -0.5, -0.4, -0.3, -0.2, -0.1}; - - input_to_forget_weights_ = {-0.6, -0.1, 0.3, 0.2, 0.9, -0.5, -0.2, - -0.4, 0.3, -0.8, -0.4, 0.3, -0.5, -0.4, - -0.6, 0.3, -0.4, -0.6, -0.5, -0.5}; - - input_to_cell_weights_ = {-0.4, -0.3, -0.2, -0.1, -0.5, 0.5, -0.2, - -0.3, -0.2, -0.6, 0.6, -0.1, -0.4, -0.3, - -0.7, 0.7, -0.9, -0.5, 0.8, 0.6}; - - input_to_output_weights_ = {-0.8, -0.4, -0.2, -0.9, -0.1, -0.7, 0.3, - -0.3, -0.8, -0.2, 0.6, -0.2, 0.4, -0.7, - -0.3, -0.5, 0.1, 0.5, -0.6, -0.4}; - - input_gate_bias_ = {0.03, 0.15, 0.22, 0.38}; - - forget_gate_bias_ = {0.1, -0.3, -0.2, 0.1}; - - cell_gate_bias_ = {-0.05, 0.72, 0.25, 0.08}; - - output_gate_bias_ = {0.05, -0.01, 0.2, 0.1}; - - recurrent_to_input_weights_ = {-0.2, -0.3, 0.4, 0.1, -0.5, 0.9, - -0.2, -0.3, -0.7, 0.05, -0.2, -0.6}; - - recurrent_to_cell_weights_ = {-0.3, 0.2, 0.1, -0.3, 0.8, -0.08, - -0.2, 0.3, 0.8, -0.6, -0.1, 0.2}; - - recurrent_to_forget_weights_ = {-0.5, -0.3, -0.5, -0.2, 0.6, 0.4, - 0.9, 0.3, -0.1, 0.2, 0.5, 0.2}; - - recurrent_to_output_weights_ = {0.3, -0.1, 0.1, -0.2, -0.5, -0.7, - -0.2, -0.6, -0.1, -0.4, -0.7, -0.2}; - - cell_to_input_weights_ = {0.05, 0.1, 0.25, 0.15}; - - cell_to_forget_weights_ = {-0.02, -0.15, -0.25, -0.03}; - - cell_to_output_weights_ = {0.1, -0.1, -0.5, 0.05}; - - input_layer_norm_coefficients_ = {0.1, 0.2, 0.3, 0.5}; - forget_layer_norm_coefficients_ = {0.2, 0.2, 0.4, 0.3}; - cell_layer_norm_coefficients_ = {0.7, 0.2, 0.3, 0.8}; - output_layer_norm_coefficients_ = {0.6, 0.2, 0.2, 0.5}; - - projection_weights_ = {-0.1, 0.2, 0.01, -0.2, 0.1, 0.5, - 0.3, 0.08, 0.07, 0.2, -0.4, 0.2}; - - lstm_input_ = { - {// Batch0: 3 (input_sequence_size) * 5 (n_input) - 0.7, 0.8, 0.1, 0.2, 0.3, // seq 0 - 0.8, 0.1, 0.2, 0.4, 0.5, // seq 1 - 0.2, 0.7, 0.7, 0.1, 0.7}, // seq 2 - - {// Batch1: 3 (input_sequence_size) * 5 (n_input) - 0.3, 0.2, 0.9, 0.8, 0.1, // seq 0 - 0.1, 0.5, 0.2, 0.4, 0.2, // seq 1 - 0.6, 0.9, 0.2, 0.5, 0.7}, // seq 2 - }; - - lstm_golden_output_ = {{ - // Batch0: 3 (input_sequence_size) * 3 (n_output) - 0.0244077, 0.128027, -0.00170918, // seq 0 - 0.0137642, 0.140751, 0.0395835, // seq 1 - -0.00459231, 0.155278, 0.0837377, // seq 2 - }, - { - // Batch1: 3 (input_sequence_size) * 3 (n_output) - -0.00692428, 0.0848741, 0.063445, // seq 0 - -0.00403912, 0.139963, 0.072681, // seq 1 - 0.00752706, 0.161903, 0.0561371, // seq 2 - }}; - } -}; - -TEST_P(NoCifg_Peephole_Projection_LayerNorm_LstmOpTest, Test) { +TEST_P(LstmOpTest, NoCifg_Peephole_Projection_LayerNorm) { const int n_batch = 2; const int n_input = 5; const int n_cell = 4; @@ -1290,6 +1148,81 @@ TEST_P(NoCifg_Peephole_Projection_LayerNorm_LstmOpTest, Test) { return; } + input_to_input_weights_ = {0.5, 0.6, 0.7, -0.8, -0.9, 0.1, 0.2, + 0.3, -0.4, 0.5, -0.8, 0.7, -0.6, 0.5, + -0.4, -0.5, -0.4, -0.3, -0.2, -0.1}; + + input_to_forget_weights_ = {-0.6, -0.1, 0.3, 0.2, 0.9, -0.5, -0.2, + -0.4, 0.3, -0.8, -0.4, 0.3, -0.5, -0.4, + -0.6, 0.3, -0.4, -0.6, -0.5, -0.5}; + + input_to_cell_weights_ = {-0.4, -0.3, -0.2, -0.1, -0.5, 0.5, -0.2, + -0.3, -0.2, -0.6, 0.6, -0.1, -0.4, -0.3, + -0.7, 0.7, -0.9, -0.5, 0.8, 0.6}; + + input_to_output_weights_ = {-0.8, -0.4, -0.2, -0.9, -0.1, -0.7, 0.3, + -0.3, -0.8, -0.2, 0.6, -0.2, 0.4, -0.7, + -0.3, -0.5, 0.1, 0.5, -0.6, -0.4}; + + input_gate_bias_ = {0.03, 0.15, 0.22, 0.38}; + + forget_gate_bias_ = {0.1, -0.3, -0.2, 0.1}; + + cell_gate_bias_ = {-0.05, 0.72, 0.25, 0.08}; + + output_gate_bias_ = {0.05, -0.01, 0.2, 0.1}; + + recurrent_to_input_weights_ = {-0.2, -0.3, 0.4, 0.1, -0.5, 0.9, + -0.2, -0.3, -0.7, 0.05, -0.2, -0.6}; + + recurrent_to_cell_weights_ = {-0.3, 0.2, 0.1, -0.3, 0.8, -0.08, + -0.2, 0.3, 0.8, -0.6, -0.1, 0.2}; + + recurrent_to_forget_weights_ = {-0.5, -0.3, -0.5, -0.2, 0.6, 0.4, + 0.9, 0.3, -0.1, 0.2, 0.5, 0.2}; + + recurrent_to_output_weights_ = {0.3, -0.1, 0.1, -0.2, -0.5, -0.7, + -0.2, -0.6, -0.1, -0.4, -0.7, -0.2}; + + cell_to_input_weights_ = {0.05, 0.1, 0.25, 0.15}; + + cell_to_forget_weights_ = {-0.02, -0.15, -0.25, -0.03}; + + cell_to_output_weights_ = {0.1, -0.1, -0.5, 0.05}; + + input_layer_norm_coefficients_ = {0.1, 0.2, 0.3, 0.5}; + forget_layer_norm_coefficients_ = {0.2, 0.2, 0.4, 0.3}; + cell_layer_norm_coefficients_ = {0.7, 0.2, 0.3, 0.8}; + output_layer_norm_coefficients_ = {0.6, 0.2, 0.2, 0.5}; + + projection_weights_ = {-0.1, 0.2, 0.01, -0.2, 0.1, 0.5, + 0.3, 0.08, 0.07, 0.2, -0.4, 0.2}; + + lstm_input_ = { + {// Batch0: 3 (input_sequence_size) * 5 (n_input) + 0.7, 0.8, 0.1, 0.2, 0.3, // seq 0 + 0.8, 0.1, 0.2, 0.4, 0.5, // seq 1 + 0.2, 0.7, 0.7, 0.1, 0.7}, // seq 2 + + {// Batch1: 3 (input_sequence_size) * 5 (n_input) + 0.3, 0.2, 0.9, 0.8, 0.1, // seq 0 + 0.1, 0.5, 0.2, 0.4, 0.2, // seq 1 + 0.6, 0.9, 0.2, 0.5, 0.7}, // seq 2 + }; + + lstm_golden_output_ = {{ + // Batch0: 3 (input_sequence_size) * 3 (n_output) + 0.0244077, 0.128027, -0.00170918, // seq 0 + 0.0137642, 0.140751, 0.0395835, // seq 1 + -0.00459231, 0.155278, 0.0837377, // seq 2 + }, + { + // Batch1: 3 (input_sequence_size) * 3 (n_output) + -0.00692428, 0.0848741, 0.063445, // seq 0 + -0.00403912, 0.139963, 0.072681, // seq 1 + 0.00752706, 0.161903, 0.0561371, // seq 2 + }}; + LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, @@ -1304,66 +1237,7 @@ TEST_P(NoCifg_Peephole_Projection_LayerNorm_LstmOpTest, Test) { VerifyGoldens(&lstm, tolerance_per_type->at(weight_type)); } -class Cifg_Peephole_Projection_LayerNorm_LstmOpTest : public BaseLstmOpTest { - void SetUp() override { - input_to_forget_weights_ = {-0.6, -0.1, 0.3, 0.2, 0.9, -0.5, -0.2, - -0.4, 0.3, -0.8, -0.4, 0.3, -0.5, -0.4, - -0.6, 0.3, -0.4, -0.6, -0.5, -0.5}; - input_to_cell_weights_ = {-0.4, -0.3, -0.2, -0.1, -0.5, 0.5, -0.2, - -0.3, -0.2, -0.6, 0.6, -0.1, -0.4, -0.3, - -0.7, 0.7, -0.9, -0.5, 0.8, 0.6}; - input_to_output_weights_ = {-0.8, -0.4, -0.2, -0.9, -0.1, -0.7, 0.3, - -0.3, -0.8, -0.2, 0.6, -0.2, 0.4, -0.7, - -0.3, -0.5, 0.1, 0.5, -0.6, -0.4}; - - forget_gate_bias_ = {0.1, -0.3, -0.2, 0.1}; - cell_gate_bias_ = {-0.05, 0.72, 0.25, 0.08}; - output_gate_bias_ = {0.05, -0.01, 0.2, 0.1}; - - recurrent_to_cell_weights_ = {-0.3, 0.2, 0.1, -0.3, 0.8, -0.08, - -0.2, 0.3, 0.8, -0.6, -0.1, 0.2}; - recurrent_to_forget_weights_ = {-0.5, -0.3, -0.5, -0.2, 0.6, 0.4, - 0.9, 0.3, -0.1, 0.2, 0.5, 0.2}; - recurrent_to_output_weights_ = {0.3, -0.1, 0.1, -0.2, -0.5, -0.7, - -0.2, -0.6, -0.1, -0.4, -0.7, -0.2}; - - cell_to_forget_weights_ = {-0.02, -0.15, -0.25, -0.03}; - cell_to_output_weights_ = {0.1, -0.1, -0.5, 0.05}; - - forget_layer_norm_coefficients_ = {0.2, 0.2, 0.4, 0.3}; - cell_layer_norm_coefficients_ = {0.7, 0.2, 0.3, 0.8}; - output_layer_norm_coefficients_ = {0.6, 0.2, 0.2, 0.5}; - projection_weights_ = {-0.1, 0.2, 0.01, -0.2, 0.1, 0.5, - 0.3, 0.08, 0.07, 0.2, -0.4, 0.2}; - - lstm_input_ = { - {// Batch0: 3 (input_sequence_size) * 5 (n_input) - 0.7, 0.8, 0.1, 0.2, 0.3, // seq 0 - 0.8, 0.1, 0.2, 0.4, 0.5, // seq 1 - 0.2, 0.7, 0.7, 0.1, 0.7}, // seq 2 - - {// Batch1: 3 (input_sequence_size) * 5 (n_input) - 0.3, 0.2, 0.9, 0.8, 0.1, // seq 0 - 0.1, 0.5, 0.2, 0.4, 0.2, // seq 1 - 0.6, 0.9, 0.2, 0.5, 0.7}, // seq 2 - }; - lstm_golden_output_ = { - { - // Batch0: 3 (input_sequence_size) * 3 (n_output) - 0.02129706, 0.140816242, 0.0112733059, // seq 0 - 0.0132302344, 0.152308047, 0.0346313119, // seq 1 - -0.0123688057, 0.165790111, 0.0893077999, // seq 2 - }, - { - // Batch1: 3 (input_sequence_size) * 3 (n_output) - -0.0226350538, 0.0916948169, 0.0769175813, // seq 0 - -0.0269966982, 0.149707705, 0.094149217, // seq 1 - -0.0103429332, 0.173016444, 0.0720508844, // seq 2 - }}; - } -}; - -TEST_P(Cifg_Peephole_Projection_LayerNorm_LstmOpTest, Test) { +TEST_P(LstmOpTest, Cifg_Peephole_Projection_LayerNorm) { const int n_batch = 2; const int n_input = 5; const int n_cell = 4; @@ -1379,6 +1253,61 @@ TEST_P(Cifg_Peephole_Projection_LayerNorm_LstmOpTest, Test) { return; } + input_to_forget_weights_ = {-0.6, -0.1, 0.3, 0.2, 0.9, -0.5, -0.2, + -0.4, 0.3, -0.8, -0.4, 0.3, -0.5, -0.4, + -0.6, 0.3, -0.4, -0.6, -0.5, -0.5}; + input_to_cell_weights_ = {-0.4, -0.3, -0.2, -0.1, -0.5, 0.5, -0.2, + -0.3, -0.2, -0.6, 0.6, -0.1, -0.4, -0.3, + -0.7, 0.7, -0.9, -0.5, 0.8, 0.6}; + input_to_output_weights_ = {-0.8, -0.4, -0.2, -0.9, -0.1, -0.7, 0.3, + -0.3, -0.8, -0.2, 0.6, -0.2, 0.4, -0.7, + -0.3, -0.5, 0.1, 0.5, -0.6, -0.4}; + + forget_gate_bias_ = {0.1, -0.3, -0.2, 0.1}; + cell_gate_bias_ = {-0.05, 0.72, 0.25, 0.08}; + output_gate_bias_ = {0.05, -0.01, 0.2, 0.1}; + + recurrent_to_cell_weights_ = {-0.3, 0.2, 0.1, -0.3, 0.8, -0.08, + -0.2, 0.3, 0.8, -0.6, -0.1, 0.2}; + recurrent_to_forget_weights_ = {-0.5, -0.3, -0.5, -0.2, 0.6, 0.4, + 0.9, 0.3, -0.1, 0.2, 0.5, 0.2}; + recurrent_to_output_weights_ = {0.3, -0.1, 0.1, -0.2, -0.5, -0.7, + -0.2, -0.6, -0.1, -0.4, -0.7, -0.2}; + + cell_to_forget_weights_ = {-0.02, -0.15, -0.25, -0.03}; + cell_to_output_weights_ = {0.1, -0.1, -0.5, 0.05}; + + forget_layer_norm_coefficients_ = {0.2, 0.2, 0.4, 0.3}; + cell_layer_norm_coefficients_ = {0.7, 0.2, 0.3, 0.8}; + output_layer_norm_coefficients_ = {0.6, 0.2, 0.2, 0.5}; + projection_weights_ = {-0.1, 0.2, 0.01, -0.2, 0.1, 0.5, + 0.3, 0.08, 0.07, 0.2, -0.4, 0.2}; + + lstm_input_ = { + {// Batch0: 3 (input_sequence_size) * 5 (n_input) + 0.7, 0.8, 0.1, 0.2, 0.3, // seq 0 + 0.8, 0.1, 0.2, 0.4, 0.5, // seq 1 + 0.2, 0.7, 0.7, 0.1, 0.7}, // seq 2 + + {// Batch1: 3 (input_sequence_size) * 5 (n_input) + 0.3, 0.2, 0.9, 0.8, 0.1, // seq 0 + 0.1, 0.5, 0.2, 0.4, 0.2, // seq 1 + 0.6, 0.9, 0.2, 0.5, 0.7}, // seq 2 + }; + lstm_golden_output_ = { + { + // Batch0: 3 (input_sequence_size) * 3 (n_output) + 0.02129706, 0.140816242, 0.0112733059, // seq 0 + 0.0132302344, 0.152308047, 0.0346313119, // seq 1 + -0.0123688057, 0.165790111, 0.0893077999, // seq 2 + }, + { + // Batch1: 3 (input_sequence_size) * 3 (n_output) + -0.0226350538, 0.0916948169, 0.0769175813, // seq 0 + -0.0269966982, 0.149707705, 0.094149217, // seq 1 + -0.0103429332, 0.173016444, 0.0720508844, // seq 2 + }}; + LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, /*use_cifg=*/true, /*use_peephole=*/true, /*use_projection_weights=*/true, @@ -1675,7 +1604,7 @@ class LSTMIntegerOpModel : public SingleOpModel { int n_output_; }; -TEST(IntegerLstm, NoCifg_NoPeephole_Projection_LayerNorm) { +TEST(IntegerLstmOpTest, NoCifg_NoPeephole_Projection_LayerNorm) { // Hyper parameters. const int n_batch = 2; const int n_input = 5; @@ -1835,7 +1764,7 @@ TEST(IntegerLstm, NoCifg_NoPeephole_Projection_LayerNorm) { } } -TEST(IntegerLstm, NoCifg_Peephole_Projection_LayerNorm) { +TEST(IntegerLstmOpTest, NoCifg_Peephole_Projection_LayerNorm) { // Hyper parameters. const int n_batch = 2; const int n_input = 5; @@ -2005,7 +1934,7 @@ TEST(IntegerLstm, NoCifg_Peephole_Projection_LayerNorm) { } } -TEST(IntegerLstm, Cifg_NoPeephole_Projection_LayerNorm_8x8_8) { +TEST(IntegerLstmOpTest, Cifg_NoPeephole_Projection_LayerNorm_8x8_8) { // Hyper parameters. const int n_batch = 2; const int n_input = 5; @@ -2199,20 +2128,11 @@ TEST(LstmOpTest, InvalidTypes) { #endif // Test parameter controls asymmetric_quantize_inputs in LSTMOpModel. -#define QUANTIZE_PARAMETER_TEST(test) \ - INSTANTIATE_TEST_SUITE_P( \ - test, test, \ - ::testing::Combine( \ - ::testing::Values(TensorType_FLOAT32, TensorType_UINT8, \ - TensorType_UINT8), \ - ::testing::Bool(), ::testing::Bool())) - -QUANTIZE_PARAMETER_TEST(NoCifg_NoPeephole_NoProjection_NoLayerNorm_LstmOpTest); -QUANTIZE_PARAMETER_TEST(Cifg_Peephole_NoProjection_NoLayerNorm_LstmOpTest); -QUANTIZE_PARAMETER_TEST(NoCifg_Peephole_Projection_NoLayerNorm_LstmOpTest); -QUANTIZE_PARAMETER_TEST(NoCifg_Peephole_Projection_LayerNorm_LstmOpTest); -QUANTIZE_PARAMETER_TEST(Cifg_Peephole_Projection_LayerNorm_LstmOpTest); -#undef QUANTIZE_PARAMETER_TEST +INSTANTIATE_TEST_SUITE_P( + Parameterized, LstmOpTest, + ::testing::Combine(::testing::Values(TensorType_FLOAT32, TensorType_UINT8, + TensorType_UINT8), + ::testing::Bool(), ::testing::Bool())); } // namespace } // namespace tflite From e7ff7483698e9391902d68cd7cee185e36d1a6d5 Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Thu, 23 Jul 2020 13:28:57 -0700 Subject: [PATCH 1195/2522] Typo fix in tpu_strategy. PiperOrigin-RevId: 322854163 Change-Id: I9120ef8c106232947169720e1c45e7c34c123fd9 --- tensorflow/python/distribute/tpu_strategy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/distribute/tpu_strategy.py b/tensorflow/python/distribute/tpu_strategy.py index 9684bf2dd6a..8e5ef061dcf 100644 --- a/tensorflow/python/distribute/tpu_strategy.py +++ b/tensorflow/python/distribute/tpu_strategy.py @@ -537,7 +537,7 @@ class TPUExtended(distribute_lib.StrategyExtendedV1): self._logical_device_stack = [0] if context.executing_eagerly(): - # In async remote eager, we want to sync the exectors before exiting the + # In async remote eager, we want to sync the executors before exiting the # program. def async_wait(): if context.context()._context_handle is not None: # pylint: disable=protected-access From cb01b5126bbd6cb244993c3bf37c6f18cda71f62 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Thu, 23 Jul 2020 13:39:08 -0700 Subject: [PATCH 1196/2522] Port the conv kernel to the new TfLiteEvalTensor API. PiperOrigin-RevId: 322856153 Change-Id: Ib53ed3f8e084b45dfb7511189a3a51852e69e0d9 --- tensorflow/lite/micro/kernels/BUILD | 1 + tensorflow/lite/micro/kernels/conv.cc | 80 ++++++++++++------- tensorflow/lite/micro/kernels/conv_test.cc | 45 +++-------- .../lite/micro/kernels/kernel_runner.cc | 5 +- tensorflow/lite/micro/kernels/kernel_runner.h | 2 +- tensorflow/lite/micro/kernels/kernel_util.cc | 6 +- 6 files changed, 67 insertions(+), 72 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 82defba370f..19e46fdf409 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -224,6 +224,7 @@ tflite_micro_cc_test( "conv_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:micro_utils", "//tensorflow/lite/micro:op_resolvers", diff --git a/tensorflow/lite/micro/kernels/conv.cc b/tensorflow/lite/micro/kernels/conv.cc index ef4cb9fbff7..ff20cf684d6 100644 --- a/tensorflow/lite/micro/kernels/conv.cc +++ b/tensorflow/lite/micro/kernels/conv.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -177,9 +178,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { void EvalQuantized(TfLiteContext* context, TfLiteNode* node, TfLiteConvParams* params, const OpData& data, - const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* im2col, - TfLiteTensor* hwcn_weights, TfLiteTensor* output) { + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias, + TfLiteEvalTensor* im2col, TfLiteEvalTensor* hwcn_weights, + TfLiteEvalTensor* output) { const int32_t input_offset = -data.input_zero_point; const int32_t filter_offset = -data.filter_zero_point; const int32_t output_offset = data.output_zero_point; @@ -200,20 +202,25 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, op_params.output_shift = -data.output_shift; op_params.quantized_activation_min = data.output_activation_min; op_params.quantized_activation_max = data.output_activation_max; - reference_ops::Conv(op_params, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output), GetTensorShape(im2col), - GetTensorData(im2col), nullptr); + reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + tflite::micro::GetTensorShape(im2col), + tflite::micro::GetTensorData(im2col), nullptr); } void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, TfLiteConvParams* params, const OpData& data, - const TfLiteTensor* input, - const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output, - TfLiteTensor* im2col) { + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output, + TfLiteEvalTensor* im2col) { // TODO(b/154032858): Investigate removing extra copies. ConvParams op_params; op_params.input_offset = -data.input_zero_point; @@ -229,18 +236,21 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, reference_integer_ops::ConvPerChannel( op_params, data.per_channel_output_multiplier, - data.per_channel_output_shift, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + data.per_channel_output_shift, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } void EvalFloat(TfLiteContext* context, TfLiteNode* node, TfLiteConvParams* params, const OpData& data, - const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* im2col, - TfLiteTensor* hwcn_weights, TfLiteTensor* output) { + const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, TfLiteEvalTensor* im2col, + TfLiteEvalTensor* hwcn_weights, TfLiteEvalTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRange(params->activation, &output_activation_min, &output_activation_max); @@ -256,21 +266,29 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, op_params.float_activation_min = output_activation_min; op_params.float_activation_max = output_activation_max; - reference_ops::Conv(op_params, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output), GetTensorShape(im2col), - GetTensorData(im2col)); + reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + tflite::micro::GetTensorShape(im2col), + tflite::micro::GetTensorData(im2col)); } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); - const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kFilterTensor); + const TfLiteEvalTensor* bias = + tflite::micro::GetEvalInput(context, node, kBiasTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); TFLITE_DCHECK(node->user_data != nullptr); const OpData& data = *(static_cast(node->user_data)); diff --git a/tensorflow/lite/micro/kernels/conv_test.cc b/tensorflow/lite/micro/kernels/conv_test.cc index 6343496dd5a..d73f03e34a1 100644 --- a/tensorflow/lite/micro/kernels/conv_test.cc +++ b/tensorflow/lite/micro/kernels/conv_test.cc @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/micro_utils.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -54,48 +54,25 @@ TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size, int output_length, TfLiteConvParams* conv_params, float tolerance = 1e-5) { - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_CONV_2D); - - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - const char* init_data = reinterpret_cast(conv_params); - size_t init_data_size = 0; - void* user_data = nullptr; - - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } - int inputs_array_data[] = {3, 0, 1, 2}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 3}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(conv_params); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = + tflite::ops::micro::Register_CONV_2D(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(conv_params), micro_test::reporter); - if (registration->prepare) { - TF_LITE_ENSURE_OK(context, registration->prepare(&context, &node)); - } + const char* init_data = reinterpret_cast(conv_params); // TODO(b/154240825): Use a test macro here which fails and returns. - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_ENSURE_OK(context, registration->invoke(&context, &node)); - - if (registration->free) { - registration->free(&context, user_data); + TfLiteStatus status = runner.InitAndPrepare(init_data); + if (status != kTfLiteOk) { + return status; } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output_length; ++i) { TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i], diff --git a/tensorflow/lite/micro/kernels/kernel_runner.cc b/tensorflow/lite/micro/kernels/kernel_runner.cc index d754fc31377..cef6c01cf45 100644 --- a/tensorflow/lite/micro/kernels/kernel_runner.cc +++ b/tensorflow/lite/micro/kernels/kernel_runner.cc @@ -52,10 +52,9 @@ KernelRunner::KernelRunner(const TfLiteRegistration& registration, node_.builtin_data = builtin_data; } -TfLiteStatus KernelRunner::InitAndPrepare() { +TfLiteStatus KernelRunner::InitAndPrepare(const char* init_data) { if (registration_.init) { - node_.user_data = - registration_.init(&context_, /*buffer=*/nullptr, /*length=*/0); + node_.user_data = registration_.init(&context_, init_data, /*length=*/0); } if (registration_.prepare) { TF_LITE_ENSURE_STATUS(registration_.prepare(&context_, &node_)); diff --git a/tensorflow/lite/micro/kernels/kernel_runner.h b/tensorflow/lite/micro/kernels/kernel_runner.h index 2ae7d09d530..45d107e7a37 100644 --- a/tensorflow/lite/micro/kernels/kernel_runner.h +++ b/tensorflow/lite/micro/kernels/kernel_runner.h @@ -39,7 +39,7 @@ class KernelRunner { // Calls init and prepare on the kernel (i.e. TfLiteRegistration) struct. Any // exceptions will be reported through the error_reporter and returned as a // status code here. - TfLiteStatus InitAndPrepare(); + TfLiteStatus InitAndPrepare(const char* init_data = nullptr); // Calls init, prepare, and invoke on a given TfLiteRegistration pointer. // After successful invoke, results will be available in the output tensor as diff --git a/tensorflow/lite/micro/kernels/kernel_util.cc b/tensorflow/lite/micro/kernels/kernel_util.cc index d70fc5864f5..5c389a62ff7 100644 --- a/tensorflow/lite/micro/kernels/kernel_util.cc +++ b/tensorflow/lite/micro/kernels/kernel_util.cc @@ -18,7 +18,6 @@ limitations under the License. namespace tflite { namespace micro { -// Returns the TfLiteEvalTensor struct for a given input index in a node. const TfLiteEvalTensor* GetEvalInput(const TfLiteContext* context, const TfLiteNode* node, int index) { TFLITE_DCHECK(context != nullptr); @@ -26,7 +25,6 @@ const TfLiteEvalTensor* GetEvalInput(const TfLiteContext* context, return context->GetEvalTensor(context, node->inputs->data[index]); } -// Returns the TfLiteEvalTensor struct for a given output index in a node. TfLiteEvalTensor* GetEvalOutput(const TfLiteContext* context, const TfLiteNode* node, int index) { TFLITE_DCHECK(context != nullptr); @@ -35,7 +33,9 @@ TfLiteEvalTensor* GetEvalOutput(const TfLiteContext* context, } const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor) { - TFLITE_DCHECK(tensor != nullptr); + if (tensor == nullptr) { + return RuntimeShape(); + } TfLiteIntArray* dims = tensor->dims; const int dims_size = dims->size; const int32_t* dims_data = reinterpret_cast(dims->data); From fd4b213bad37e62003bf15e79853ab9504c6ddf4 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Thu, 23 Jul 2020 13:43:35 -0700 Subject: [PATCH 1197/2522] Fix KerasTensor Numpy priority so that binary operators w/ kerastensors on the right-hand side and numpy arrays on the left-hand side work correctly. PiperOrigin-RevId: 322857123 Change-Id: I4b4f4efbeb43f5c3e3bc1c733eca22bad10d28fc --- tensorflow/python/keras/engine/keras_tensor.py | 9 +++++++++ .../python/keras/layers/tensorflow_op_layer_test.py | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/tensorflow/python/keras/engine/keras_tensor.py b/tensorflow/python/keras/engine/keras_tensor.py index cd141809cfc..4266c6dbee6 100644 --- a/tensorflow/python/keras/engine/keras_tensor.py +++ b/tensorflow/python/keras/engine/keras_tensor.py @@ -180,6 +180,15 @@ class KerasTensor(object): raise TypeError('Tensors are unhashable. (%s)' 'Instead, use tensor.ref() as the key.' % self) + # Note: This enables the KerasTensor's overloaded "right" binary + # operators to run when the left operand is an ndarray, because it + # accords the Tensor class higher priority than an ndarray, or a + # numpy matrix. + # In the future explore chaning this to using numpy's __numpy_ufunc__ + # mechanism, which allows more control over how Tensors interact + # with ndarrays. + __array_priority__ = 100 + def __array__(self): raise TypeError( 'Cannot convert a symbolic Keras input/output to a numpy array. ' diff --git a/tensorflow/python/keras/layers/tensorflow_op_layer_test.py b/tensorflow/python/keras/layers/tensorflow_op_layer_test.py index bbec7ef44c8..f43b758c33c 100644 --- a/tensorflow/python/keras/layers/tensorflow_op_layer_test.py +++ b/tensorflow/python/keras/layers/tensorflow_op_layer_test.py @@ -575,6 +575,14 @@ class AutoLambdaTest(keras_parameterized.TestCase): self.assertAllEqual(model(args), expected) self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) + def test_left_hand_numpy_multiplication(self): + x = np.asarray([3.0]) + inputs = keras.Input(shape=(4,)) + outputs = x * inputs + model = keras.Model(inputs, outputs) + ones = array_ops.ones((5, 4), dtype='float32') + self.assertAllEqual(model(ones), 3.0 * ones) + def test_numerical_correctness_simple(self): x = ops.convert_to_tensor_v2([[-1., 0., -2., 1.]]) inputs = keras.Input(shape=(4,)) From 138bc6e91344d4e751d750572f757f1a97176d90 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 23 Jul 2020 13:46:22 -0700 Subject: [PATCH 1198/2522] Move trace processing from server to the client side for sampling mode. PiperOrigin-RevId: 322857696 Change-Id: I161df8abf61d11e41a25653b8678719e01d76848 --- .../convert/xplane_to_profile_response.cc | 2 -- .../xplane_to_profile_response_test.cc | 21 ++++++++----------- tensorflow/core/profiler/rpc/BUILD | 1 - tensorflow/core/profiler/rpc/client/BUILD | 1 + .../profiler/rpc/client/capture_profile.cc | 17 +++++++++++++++ .../profiler/rpc/profiler_service_impl.cc | 8 ++++--- 6 files changed, 32 insertions(+), 18 deletions(-) diff --git a/tensorflow/core/profiler/convert/xplane_to_profile_response.cc b/tensorflow/core/profiler/convert/xplane_to_profile_response.cc index d9992cb31bd..54e9a8b2a10 100644 --- a/tensorflow/core/profiler/convert/xplane_to_profile_response.cc +++ b/tensorflow/core/profiler/convert/xplane_to_profile_response.cc @@ -50,7 +50,6 @@ const absl::string_view kInputPipeline = "input_pipeline"; const absl::string_view kOverviewPage = "overview_page"; const absl::string_view kKernelStats = "kernel_stats"; const absl::string_view kMemoryProfile = "memory_profile"; -const absl::string_view kXPlane = "xplane"; template void AddToolData(absl::string_view tool_name, const Proto& tool_output, @@ -74,7 +73,6 @@ Status ConvertXSpaceToProfileResponse(const XSpace& xspace, ProfileResponse* response) { absl::flat_hash_set tools(req.tools().begin(), req.tools().end()); - AddToolData(ToolName(kXPlane), xspace, response); if (tools.empty()) return Status::OK(); if (tools.contains(kTraceViewer)) { Trace trace; diff --git a/tensorflow/core/profiler/convert/xplane_to_profile_response_test.cc b/tensorflow/core/profiler/convert/xplane_to_profile_response_test.cc index ad9ca1028f6..d50cd9a98ff 100644 --- a/tensorflow/core/profiler/convert/xplane_to_profile_response_test.cc +++ b/tensorflow/core/profiler/convert/xplane_to_profile_response_test.cc @@ -77,11 +77,10 @@ TEST(ConvertXPlaneToProfileResponse, OverviewPage) { request.add_tools("overview_page"); ProfileResponse response; TF_CHECK_OK(ConvertXSpaceToProfileResponse(xspace, request, &response)); - EXPECT_EQ(2, response.tool_data_size()); - EXPECT_EQ("overview_page.pb", response.tool_data(/*index=*/1).name()); + EXPECT_EQ(1, response.tool_data_size()); + EXPECT_EQ("overview_page.pb", response.tool_data(0).name()); OverviewPage overview_page; - ASSERT_TRUE( - overview_page.ParseFromString(response.tool_data(/*index=*/1).data())); + ASSERT_TRUE(overview_page.ParseFromString(response.tool_data(0).data())); } TEST(ConvertXPlaneToProfileResponse, InputPipeline) { @@ -91,11 +90,10 @@ TEST(ConvertXPlaneToProfileResponse, InputPipeline) { request.add_tools("input_pipeline"); ProfileResponse response; TF_CHECK_OK(ConvertXSpaceToProfileResponse(xspace, request, &response)); - EXPECT_EQ(2, response.tool_data_size()); - EXPECT_EQ("input_pipeline.pb", response.tool_data(/*index=*/1).name()); + EXPECT_EQ(1, response.tool_data_size()); + EXPECT_EQ("input_pipeline.pb", response.tool_data(0).name()); InputPipelineAnalysisResult input_pipeline; - ASSERT_TRUE( - input_pipeline.ParseFromString(response.tool_data(/*index=*/1).data())); + ASSERT_TRUE(input_pipeline.ParseFromString(response.tool_data(0).data())); } TEST(ConvertXPlaneToProfileResponse, TensorflowStats) { @@ -105,11 +103,10 @@ TEST(ConvertXPlaneToProfileResponse, TensorflowStats) { request.add_tools("tensorflow_stats"); ProfileResponse response; TF_CHECK_OK(ConvertXSpaceToProfileResponse(xspace, request, &response)); - EXPECT_EQ(2, response.tool_data_size()); - EXPECT_EQ("tensorflow_stats.pb", response.tool_data(/*index=*/1).name()); + EXPECT_EQ(1, response.tool_data_size()); + EXPECT_EQ("tensorflow_stats.pb", response.tool_data(0).name()); TfStatsDatabase tf_stats_db; - ASSERT_TRUE( - tf_stats_db.ParseFromString(response.tool_data(/*index=*/1).data())); + ASSERT_TRUE(tf_stats_db.ParseFromString(response.tool_data(0).data())); } } // namespace diff --git a/tensorflow/core/profiler/rpc/BUILD b/tensorflow/core/profiler/rpc/BUILD index 1e572dfd9bd..cc77a7272c2 100644 --- a/tensorflow/core/profiler/rpc/BUILD +++ b/tensorflow/core/profiler/rpc/BUILD @@ -16,7 +16,6 @@ cc_library( deps = [ "//tensorflow/core:lib", "//tensorflow/core/profiler:profiler_service_proto_cc", - "//tensorflow/core/profiler/convert:xplane_to_profile_response", "//tensorflow/core/profiler/lib:profiler_session_headers", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "@com_google_absl//absl/memory", diff --git a/tensorflow/core/profiler/rpc/client/BUILD b/tensorflow/core/profiler/rpc/client/BUILD index 9cf1e7a9f7b..9d9014dca2f 100644 --- a/tensorflow/core/profiler/rpc/client/BUILD +++ b/tensorflow/core/profiler/rpc/client/BUILD @@ -16,6 +16,7 @@ cc_library( "//tensorflow/core/profiler:profiler_analysis_proto_cc", "//tensorflow/core/profiler:profiler_options_proto_cc", "//tensorflow/core/profiler:profiler_service_proto_cc", + "//tensorflow/core/profiler/convert:xplane_to_profile_response", "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/core/profiler/rpc/client/capture_profile.cc b/tensorflow/core/profiler/rpc/client/capture_profile.cc index e0303d32d13..bd82ba64db2 100644 --- a/tensorflow/core/profiler/rpc/client/capture_profile.cc +++ b/tensorflow/core/profiler/rpc/client/capture_profile.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/status.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/convert/xplane_to_profile_response.h" #include "tensorflow/core/profiler/profiler_analysis.pb.h" #include "tensorflow/core/profiler/profiler_options.pb.h" #include "tensorflow/core/profiler/profiler_service.pb.h" @@ -35,6 +36,7 @@ namespace profiler { namespace { constexpr uint64 kMaxEvents = 1000000; +const absl::string_view kXPlanePb = "xplane.pb"; MonitorRequest PopulateMonitorRequest(int duration_ms, int monitoring_level, bool timestamp) { @@ -72,6 +74,19 @@ inline bool ShouldRetryTracing(Status status) { status.error_message() == "Stream removed"); } +// If the ProfileResponse has single 'xplane.pb' tool, convert the xplane to +// other tools and add in ProfileResponse. Otherwise, the ProfileResponse is +// already converted, simply return. +Status ConvertXSpaceToToolsInProfileResponse(const ProfileRequest& request, + ProfileResponse* response) { + if (response->tool_data_size() != 1) return Status::OK(); + if (response->tool_data(0).name() != kXPlanePb) return Status::OK(); + XSpace xspace; + xspace.ParseFromString(response->tool_data(0).data()); + TF_RETURN_IF_ERROR(ConvertXSpaceToProfileResponse(xspace, request, response)); + return Status::OK(); +} + Status Profile(const std::string& service_addr, const std::string& logdir, int duration_ms, const std::string& session_id, const ProfileOptions& opts) { @@ -82,6 +97,8 @@ Status Profile(const std::string& service_addr, const std::string& logdir, TF_RETURN_IF_ERROR(ProfileGrpc(service_addr, request, &response)); if (!response.empty_trace()) { + TF_RETURN_IF_ERROR( + ConvertXSpaceToToolsInProfileResponse(request, &response)); TF_RETURN_IF_ERROR(SaveTensorboardProfile( logdir, session_id, request.host_name(), response, &std::cout)); // Print this at the end so that it's not buried in irrelevant LOG messages. diff --git a/tensorflow/core/profiler/rpc/profiler_service_impl.cc b/tensorflow/core/profiler/rpc/profiler_service_impl.cc index 0a234d7e4da..ba463813fc0 100644 --- a/tensorflow/core/profiler/rpc/profiler_service_impl.cc +++ b/tensorflow/core/profiler/rpc/profiler_service_impl.cc @@ -27,7 +27,6 @@ limitations under the License. #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/status.h" -#include "tensorflow/core/profiler/convert/xplane_to_profile_response.h" #include "tensorflow/core/profiler/internal/profiler_interface.h" #include "tensorflow/core/profiler/lib/profiler_session.h" #include "tensorflow/core/profiler/profiler_service.grpc.pb.h" @@ -37,13 +36,16 @@ limitations under the License. namespace tensorflow { namespace { +const absl::string_view kXPlanePb = "xplane.pb"; + Status CollectDataToResponse(const ProfileRequest& req, ProfilerSession* profiler, ProfileResponse* response) { profiler::XSpace xspace; TF_RETURN_IF_ERROR(profiler->CollectData(&xspace)); - TF_RETURN_IF_ERROR( - profiler::ConvertXSpaceToProfileResponse(xspace, req, response)); + auto* tool_data = response->add_tool_data(); + tool_data->set_name(kXPlanePb.data(), kXPlanePb.size()); + xspace.SerializeToString(tool_data->mutable_data()); return Status::OK(); } From b2a0698502dad8bfc9219b26e49566881e64b7e1 Mon Sep 17 00:00:00 2001 From: Berkin Ilbeyi Date: Thu, 23 Jul 2020 13:48:33 -0700 Subject: [PATCH 1199/2522] [XLA] Fix an MSAN failure PiperOrigin-RevId: 322858160 Change-Id: Ia05338abe9801d5ffd93ecfbac0838bf2bb19376 --- tensorflow/compiler/xla/service/memory_space_assignment.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.cc b/tensorflow/compiler/xla/service/memory_space_assignment.cc index aa978a922e6..b003045e66c 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment.cc +++ b/tensorflow/compiler/xla/service/memory_space_assignment.cc @@ -237,7 +237,7 @@ int64 InstructionCountPrefetchIntervalPicker::PreferredEvictionEndTime( int64 InstructionCountPrefetchIntervalPicker::LatestPrefetchStartTime( const HloUse& use, int64 start_time, int64 end_time) const { - return end_time_ - min_overlap_count_; + return end_time - min_overlap_count_; } void InstructionCountPrefetchIntervalPicker::Begin(const HloUse& use, From 7dbe3fb4ca410264a6db6c0a0345859fd7a2eb11 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Thu, 23 Jul 2020 21:05:41 +0000 Subject: [PATCH 1200/2522] switched to use scalar shape function --- tensorflow/c/kernels/ops/summary.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/c/kernels/ops/summary.cc b/tensorflow/c/kernels/ops/summary.cc index 98b8b743fa1..20a935aeb0a 100644 --- a/tensorflow/c/kernels/ops/summary.cc +++ b/tensorflow/c/kernels/ops/summary.cc @@ -20,8 +20,7 @@ limitations under the License. static void scalar_summary_shape_inference_fn(TF_ShapeInferenceContext* ctx, TF_Status* status) { TF_SetStatus(status, TF_OK, ""); - TF_ShapeHandle* result = TF_NewShapeHandle(); - // Make shape handle a scalar value (empty shape) + TF_ShapeHandle* result = TF_ShapeInferenceContextScalar(ctx); TF_ShapeInferenceContextSetOutput(ctx, 0, result, status); if (TF_GetCode(status) != TF_OK) { TF_SetStatus(status, TF_INVALID_ARGUMENT, From 21aff5af1f81fa360c11e334f7e382bbe8d01247 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Thu, 23 Jul 2020 13:48:52 -0700 Subject: [PATCH 1201/2522] Add __array__ method to `tf.Variable`. `np.array` converts a `tf.Tensor`. It should work on a `tf.Variable` too. PiperOrigin-RevId: 322858218 Change-Id: Ic9389c6544bff82361efa99ab8ddbbf41f7311ff --- .../resource_variable_ops_test.py | 12 ++++++++++++ .../python/ops/resource_variable_ops.py | 19 +++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index edcd8d7a05e..befffe8509f 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -1219,6 +1219,18 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase, # Test operations self.assertAllEqual((v * 2).numpy(), (v + v).numpy()) + def testNumpyDotArray(self): + with context.eager_mode(): + # Scalars use a separate code path. + v1 = resource_variable_ops.ResourceVariable(initial_value=lambda: 1, + name="v1") + self.assertEqual(1, np.array(v1)) + + v2 = resource_variable_ops.ResourceVariable(initial_value=lambda: [1, 2], + name="v2") + self.assertAllEqual(v2.read_value().numpy(), np.array(v2)) + self.assertAllEqual([1, 2], np.array(v2)) + def testContainerEager(self): with context.eager_mode(): v1 = resource_variable_ops.ResourceVariable(initial_value=lambda: 1, diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 8f5d056807f..7b319e4270e 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -23,6 +23,8 @@ import contextlib import functools import weakref +import numpy as np + from tensorflow.core.framework import attr_value_pb2 from tensorflow.core.framework import variable_pb2 from tensorflow.python import _pywrap_utils @@ -474,6 +476,23 @@ class BaseResourceVariable(variables.VariableV1, core.Tensor): else: yield + def __array__(self): + """Allows direct conversion to a numpy array. + + >>> np.array(tf.Variable([1.0])) + array([1.], dtype=float32) + + Returns: + The variable value as a numpy array. + """ + # You can't return `self.numpy()` here because for scalars + # that raises: + # ValueError: object __array__ method not producing an array + # Even `self.read_value().__array__()` and `self.read_value()._numpy()` give + # the same error. The `EagerTensor` class must be doing something behind the + # scenes to make `np.array(tf.constant(1))` work. + return np.asarray(self.numpy()) + def __nonzero__(self): return self.__bool__() From a6e66d50a4b419a4b21c2fecb87252f93e531767 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Thu, 23 Jul 2020 13:51:05 -0700 Subject: [PATCH 1202/2522] Add gradient annotation for XlaSharding op. PiperOrigin-RevId: 322858703 Change-Id: Ie7e6cfa4bf43b466449425a98a7682b0213614e3 --- tensorflow/compiler/tf2xla/kernels/sharding_op.cc | 11 +++++++++-- tensorflow/compiler/tf2xla/python/xla.py | 8 ++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/sharding_op.cc b/tensorflow/compiler/tf2xla/kernels/sharding_op.cc index 1047580264b..da268fe283c 100644 --- a/tensorflow/compiler/tf2xla/kernels/sharding_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/sharding_op.cc @@ -30,8 +30,15 @@ class ShardingOp : public XlaOpKernel { ~ShardingOp() override = default; void Compile(XlaOpKernelContext* ctx) override { - xla::XlaOp input = ctx->Input(0); - auto shape_or = ctx->InputXlaShape(0); + xla::XlaOp input; + { + // The builder might create a broadcast from a constant, so we clear + // sharding for the input. + xla::XlaScopedShardingAssignment no_sharding(ctx->builder(), + absl::nullopt); + input = ctx->Input(0); + } + auto shape_or = ctx->builder()->GetShape(input); OP_REQUIRES_OK(ctx, shape_or.status()); ctx->SetOutput( diff --git a/tensorflow/compiler/tf2xla/python/xla.py b/tensorflow/compiler/tf2xla/python/xla.py index 0ebca2d546f..846dafa2570 100644 --- a/tensorflow/compiler/tf2xla/python/xla.py +++ b/tensorflow/compiler/tf2xla/python/xla.py @@ -28,6 +28,7 @@ from __future__ import division from __future__ import print_function from tensorflow.compiler.tf2xla.ops import gen_xla_ops +from tensorflow.core.framework import attr_value_pb2 from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -415,8 +416,11 @@ sharding = gen_xla_ops.xla_sharding @ops.RegisterGradient("XlaSharding") def _sharding_grad(op, grad): - del op # Unused - return [grad] + grad_sharding = gen_xla_ops.xla_sharding(grad) + # pylint: disable=protected-access + grad_sharding.op._set_attr( + "_XlaSharding", attr_value_pb2.AttrValue(s=op.get_attr("_XlaSharding"))) + return [grad_sharding] spmd_full_to_shard_shape = gen_xla_ops.xla_spmd_full_to_shard_shape From 2587c2a1f2fccd7726e5912f6de7e37c63d07d00 Mon Sep 17 00:00:00 2001 From: Yi Situ Date: Thu, 23 Jul 2020 13:58:49 -0700 Subject: [PATCH 1203/2522] [xprof:gpu] Optimize XPlane to KernelStatsDb converter by switching intermediate data structure from a vector to flat_hash_map. xplane_to_kernel_stats_db.h - Do not provide a direct conversion from XPlane to KernelStatsDb, which is a many to one conversion can be parallelized while being aggregated into a faster data structure (a hash map in this case). PiperOrigin-RevId: 322860348 Change-Id: Ibde4fc7ae4c5222d059f0eb1f77f57aa2878a58a --- tensorflow/core/profiler/convert/BUILD | 6 +- .../convert/xplane_to_kernel_stats_db.cc | 16 ++- .../convert/xplane_to_kernel_stats_db.h | 7 +- .../convert/xplane_to_kernel_stats_db_test.cc | 109 ++++++++++-------- .../profiler/convert/xplane_to_op_stats.cc | 16 ++- tensorflow/core/profiler/utils/BUILD | 1 + .../core/profiler/utils/kernel_stats_utils.cc | 60 +++++----- .../core/profiler/utils/kernel_stats_utils.h | 60 +++++++++- 8 files changed, 177 insertions(+), 98 deletions(-) diff --git a/tensorflow/core/profiler/convert/BUILD b/tensorflow/core/profiler/convert/BUILD index 5e0682fc031..e08eec0fced 100644 --- a/tensorflow/core/profiler/convert/BUILD +++ b/tensorflow/core/profiler/convert/BUILD @@ -421,6 +421,7 @@ cc_library( "//tensorflow/core/profiler/utils:trace_utils", "//tensorflow/core/profiler/utils:xplane_schema", "//tensorflow/core/profiler/utils:xplane_visitor", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", ], @@ -437,15 +438,12 @@ tf_cc_test( "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core:test_main", - "//tensorflow/core:testlib", "//tensorflow/core/profiler/protobuf:kernel_stats_proto_cc", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", - "//tensorflow/core/profiler/utils:tf_xplane_visitor", + "//tensorflow/core/profiler/utils:kernel_stats_utils", "//tensorflow/core/profiler/utils:xplane_builder", "//tensorflow/core/profiler/utils:xplane_schema", "//tensorflow/core/profiler/utils:xplane_test_utils", - "//tensorflow/core/profiler/utils:xplane_utils", - "//tensorflow/core/profiler/utils:xplane_visitor", "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.cc b/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.cc index 4d42d51cf6c..e404e096b70 100644 --- a/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.cc +++ b/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.cc @@ -17,6 +17,7 @@ limitations under the License. #include +#include "absl/container/flat_hash_map.h" #include "absl/strings/string_view.h" #include "absl/types/optional.h" #include "tensorflow/core/platform/logging.h" @@ -33,11 +34,11 @@ limitations under the License. namespace tensorflow { namespace profiler { -KernelStatsDb ConvertDeviceTraceXPlaneToKernelStatsDb( +void ConvertDeviceTraceXPlaneToKernelReports( const XPlane& device_trace, const std::function& - on_kernel_fn) { - KernelStatsDb result; + on_kernel_fn, + KernelReportMap* reports) { XPlaneVisitor plane = CreateTfXPlaneVisitor(&device_trace); plane.ForEachLine([&](const XLineVisitor& line) { if (IsDerivedThreadId(line.Id())) { @@ -92,12 +93,15 @@ KernelStatsDb ConvertDeviceTraceXPlaneToKernelStatsDb( } if (kernel.total_duration_ns()) { - *result.add_reports() = kernel; + KernelReportValue value; + value.total_duration_ns = event.DurationNs(); + value.min_duration_ns = event.DurationNs(); + value.max_duration_ns = event.DurationNs(); + value.occurrences = 1; + InsertOrUpdateKernelReport(kernel, value, reports); } }); }); - - return result; } } // namespace profiler diff --git a/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.h b/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.h index 9c7fca22887..56393c18e2b 100644 --- a/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.h +++ b/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.h @@ -18,17 +18,20 @@ limitations under the License. #include +#include "absl/container/flat_hash_map.h" #include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" +#include "tensorflow/core/profiler/utils/kernel_stats_utils.h" #include "tensorflow/core/profiler/utils/xplane_visitor.h" namespace tensorflow { namespace profiler { -KernelStatsDb ConvertDeviceTraceXPlaneToKernelStatsDb( +void ConvertDeviceTraceXPlaneToKernelReports( const XPlane& device_trace, const std::function& - on_kernel_fn); + on_kernel_fn, + KernelReportMap* reports); } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db_test.cc b/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db_test.cc index 3c4ac648f92..e402b3b6672 100644 --- a/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db_test.cc +++ b/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" +#include "tensorflow/core/profiler/utils/kernel_stats_utils.h" #include "tensorflow/core/profiler/utils/xplane_builder.h" #include "tensorflow/core/profiler/utils/xplane_schema.h" #include "tensorflow/core/profiler/utils/xplane_test_utils.h" @@ -37,7 +38,7 @@ TEST(ConvertXplaneToKernelStats, MultiKernels) { device_trace_builder.GetOrCreateLine(0); XLineBuilder line_builder = device_trace_builder.GetOrCreateLine(0); - CreateXEvent(&device_trace_builder, &line_builder, "kernel_name_0", + CreateXEvent(&device_trace_builder, &line_builder, "kernel_name_shortest", /*offset_ps=*/10000, /*duration_ps=*/1000, {{StatType::kLevel0, "mul_786"}, {StatType::kKernelDetails, R"MULTI(registers_per_thread:16 @@ -51,7 +52,7 @@ block_y:1 block_z:1)MULTI"}, {StatType::kEquation, ""}}); - CreateXEvent(&device_trace_builder, &line_builder, "kernel_name_1", + CreateXEvent(&device_trace_builder, &line_builder, "kernel_name_middle", /*offset_ps=*/20000, /*duration_ps=*/2000, {{StatType::kLevel0, "Conv2D"}, {StatType::kKernelDetails, R"MULTI(registers_per_thread:32 @@ -79,58 +80,68 @@ block_x:64 block_y:1 block_z:1)MULTI"}, {StatType::kEquation, ""}}); - KernelStatsDb kernel_stats = - ConvertDeviceTraceXPlaneToKernelStatsDb(*device_trace, {}); + + KernelReportMap reports; + ConvertDeviceTraceXPlaneToKernelReports(*device_trace, {}, &reports); + KernelStatsDb kernel_stats; + CopyKernelReportsToDb(reports, &kernel_stats); + SortKernelsByTotalDurationDesc(&kernel_stats); EXPECT_EQ(kernel_stats.reports_size(), 3); - const auto& kernel0 = kernel_stats.reports().at(0); - EXPECT_EQ(kernel0.name(), "kernel_name_0"); - EXPECT_EQ(kernel0.registers_per_thread(), 16); - EXPECT_EQ(kernel0.static_shmem_bytes(), 0); - EXPECT_EQ(kernel0.dynamic_shmem_bytes(), 0); - EXPECT_EQ(kernel0.grid_dim().at(0), 1); - EXPECT_EQ(kernel0.grid_dim().at(1), 1); - EXPECT_EQ(kernel0.grid_dim().at(2), 1); - EXPECT_EQ(kernel0.block_dim().at(0), 1); - EXPECT_EQ(kernel0.block_dim().at(1), 1); - EXPECT_EQ(kernel0.block_dim().at(2), 1); - EXPECT_EQ(kernel0.total_duration_ns(), 1); - EXPECT_FALSE(kernel0.is_kernel_using_tensor_core()); - EXPECT_FALSE(kernel0.is_op_tensor_core_eligible()); - EXPECT_EQ(kernel0.op_name(), "mul_786"); + { + const auto& kernel = kernel_stats.reports().at(2); + EXPECT_EQ(kernel.name(), "kernel_name_shortest"); + EXPECT_EQ(kernel.registers_per_thread(), 16); + EXPECT_EQ(kernel.static_shmem_bytes(), 0); + EXPECT_EQ(kernel.dynamic_shmem_bytes(), 0); + EXPECT_EQ(kernel.grid_dim().at(0), 1); + EXPECT_EQ(kernel.grid_dim().at(1), 1); + EXPECT_EQ(kernel.grid_dim().at(2), 1); + EXPECT_EQ(kernel.block_dim().at(0), 1); + EXPECT_EQ(kernel.block_dim().at(1), 1); + EXPECT_EQ(kernel.block_dim().at(2), 1); + EXPECT_EQ(kernel.total_duration_ns(), 1); + EXPECT_FALSE(kernel.is_kernel_using_tensor_core()); + EXPECT_FALSE(kernel.is_op_tensor_core_eligible()); + EXPECT_EQ(kernel.op_name(), "mul_786"); + } - const auto& kernel1 = kernel_stats.reports().at(1); - EXPECT_EQ(kernel1.name(), "kernel_name_1"); - EXPECT_EQ(kernel1.registers_per_thread(), 32); - EXPECT_EQ(kernel1.static_shmem_bytes(), 0); - EXPECT_EQ(kernel1.dynamic_shmem_bytes(), 16384); - EXPECT_EQ(kernel1.grid_dim().at(0), 2); - EXPECT_EQ(kernel1.grid_dim().at(1), 1); - EXPECT_EQ(kernel1.grid_dim().at(2), 1); - EXPECT_EQ(kernel1.block_dim().at(0), 32); - EXPECT_EQ(kernel1.block_dim().at(1), 1); - EXPECT_EQ(kernel1.block_dim().at(2), 1); - EXPECT_EQ(kernel1.total_duration_ns(), 2); - EXPECT_FALSE(kernel1.is_kernel_using_tensor_core()); - EXPECT_TRUE(kernel1.is_op_tensor_core_eligible()); - EXPECT_EQ(kernel1.op_name(), "Conv2D"); + { + const auto& kernel = kernel_stats.reports().at(1); + EXPECT_EQ(kernel.name(), "kernel_name_middle"); + EXPECT_EQ(kernel.registers_per_thread(), 32); + EXPECT_EQ(kernel.static_shmem_bytes(), 0); + EXPECT_EQ(kernel.dynamic_shmem_bytes(), 16384); + EXPECT_EQ(kernel.grid_dim().at(0), 2); + EXPECT_EQ(kernel.grid_dim().at(1), 1); + EXPECT_EQ(kernel.grid_dim().at(2), 1); + EXPECT_EQ(kernel.block_dim().at(0), 32); + EXPECT_EQ(kernel.block_dim().at(1), 1); + EXPECT_EQ(kernel.block_dim().at(2), 1); + EXPECT_EQ(kernel.total_duration_ns(), 2); + EXPECT_FALSE(kernel.is_kernel_using_tensor_core()); + EXPECT_TRUE(kernel.is_op_tensor_core_eligible()); + EXPECT_EQ(kernel.op_name(), "Conv2D"); + } - const auto& kernel2 = kernel_stats.reports().at(2); - EXPECT_EQ(kernel2.name(), "volta_fp16_s884gemm_fp16_128x128_ldg8_f2f_tn"); - EXPECT_EQ(kernel2.registers_per_thread(), 32); - EXPECT_EQ(kernel2.static_shmem_bytes(), 0); - EXPECT_EQ(kernel2.dynamic_shmem_bytes(), 16384); - EXPECT_EQ(kernel2.grid_dim().at(0), 3); - EXPECT_EQ(kernel2.grid_dim().at(1), 1); - EXPECT_EQ(kernel2.grid_dim().at(2), 1); - EXPECT_EQ(kernel2.block_dim().at(0), 64); - EXPECT_EQ(kernel2.block_dim().at(1), 1); - EXPECT_EQ(kernel2.block_dim().at(2), 1); - EXPECT_EQ(kernel2.total_duration_ns(), 3); - EXPECT_TRUE(kernel2.is_kernel_using_tensor_core()); - EXPECT_TRUE(kernel2.is_op_tensor_core_eligible()); - EXPECT_EQ(kernel2.op_name(), "Einsum_80"); + { + const auto& kernel = kernel_stats.reports().at(0); + EXPECT_EQ(kernel.name(), "volta_fp16_s884gemm_fp16_128x128_ldg8_f2f_tn"); + EXPECT_EQ(kernel.registers_per_thread(), 32); + EXPECT_EQ(kernel.static_shmem_bytes(), 0); + EXPECT_EQ(kernel.dynamic_shmem_bytes(), 16384); + EXPECT_EQ(kernel.grid_dim().at(0), 3); + EXPECT_EQ(kernel.grid_dim().at(1), 1); + EXPECT_EQ(kernel.grid_dim().at(2), 1); + EXPECT_EQ(kernel.block_dim().at(0), 64); + EXPECT_EQ(kernel.block_dim().at(1), 1); + EXPECT_EQ(kernel.block_dim().at(2), 1); + EXPECT_EQ(kernel.total_duration_ns(), 3); + EXPECT_TRUE(kernel.is_kernel_using_tensor_core()); + EXPECT_TRUE(kernel.is_op_tensor_core_eligible()); + EXPECT_EQ(kernel.op_name(), "Einsum_80"); + } } } // namespace diff --git a/tensorflow/core/profiler/convert/xplane_to_op_stats.cc b/tensorflow/core/profiler/convert/xplane_to_op_stats.cc index 82a13c71d47..2f4bf2689b0 100644 --- a/tensorflow/core/profiler/convert/xplane_to_op_stats.cc +++ b/tensorflow/core/profiler/convert/xplane_to_op_stats.cc @@ -19,6 +19,7 @@ limitations under the License. #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" +#include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/convert/op_metrics_db_combiner.h" #include "tensorflow/core/profiler/convert/step_events_to_steps_db.h" @@ -154,7 +155,8 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space, op_stats.mutable_device_op_metrics_db()); SetRunEnvironment(device_planes.size(), op_stats.mutable_run_environment()); - std::vector reports; + KernelReportMap reports; + // TODO(b/161942993) parallelize XPlane processing per thread. for (const XPlane* device_trace : device_planes) { if (config.contains(OP_METRICS_DB)) { if (!op_stats.has_perf_env()) { @@ -171,16 +173,18 @@ OpStats ConvertXSpaceToOpStats(const XSpace& space, &step_events); } if (config.contains(KERNEL_STATS_DB)) { - KernelStatsDb kernel_stats_db = ConvertDeviceTraceXPlaneToKernelStatsDb( - *device_trace, /*on_kernel_fn=*/{}); - reports.insert(reports.begin(), kernel_stats_db.reports().begin(), - kernel_stats_db.reports().end()); + ConvertDeviceTraceXPlaneToKernelReports(*device_trace, + /*on_kernel_fn=*/{}, &reports); } } + + // Combine into reports. if (config.contains(KERNEL_STATS_DB)) { - GroupKernelReports(&reports, op_stats.mutable_kernel_stats_db()); + CopyKernelReportsToDb(reports, op_stats.mutable_kernel_stats_db()); + // TODO(b/161943499) Replace sort with a TopK algorithm. SortKernelsByTotalDurationDesc(op_stats.mutable_kernel_stats_db()); } + bool has_device = !device_planes.empty(); // Convert a host plane. if (host_plane && config.contains(OP_METRICS_DB)) { diff --git a/tensorflow/core/profiler/utils/BUILD b/tensorflow/core/profiler/utils/BUILD index 5a60bf3f2f3..92a87e2228c 100644 --- a/tensorflow/core/profiler/utils/BUILD +++ b/tensorflow/core/profiler/utils/BUILD @@ -401,6 +401,7 @@ cc_library( deps = [ "//tensorflow/core:lib", "//tensorflow/core/profiler/protobuf:kernel_stats_proto_cc", + "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", ], diff --git a/tensorflow/core/profiler/utils/kernel_stats_utils.cc b/tensorflow/core/profiler/utils/kernel_stats_utils.cc index 2f53b51d7a8..3d8c1a99c33 100644 --- a/tensorflow/core/profiler/utils/kernel_stats_utils.cc +++ b/tensorflow/core/profiler/utils/kernel_stats_utils.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include +#include "absl/algorithm/container.h" #include "absl/strings/match.h" #include "absl/strings/numbers.h" #include "absl/strings/str_split.h" @@ -142,7 +143,7 @@ bool IsEinsumTensorCoreEligible(absl::string_view equation) { } bool KernelReportLessThanComparator::operator()(const KernelReport& lhs, - const KernelReport& rhs) { + const KernelReport& rhs) const { // Disable formatting to keep vertical alignment for better readability, // and make it easier to reorder columns. // clang-format off @@ -180,7 +181,7 @@ bool KernelReportLessThanComparator::operator()(const KernelReport& lhs, } bool KernelReportEqualToComparator::operator()(const KernelReport& lhs, - const KernelReport& rhs) { + const KernelReport& rhs) const { // Disable formatting to keep vertical alignment for better readability, // and make it easier to reorder columns. // clang-format off @@ -213,32 +214,37 @@ void SortKernelsByTotalDurationDesc(KernelStatsDb* kernel_stats_db) { }); } -void GroupKernelReports(std::vector* reports, - KernelStatsDb* dst) { - // Sort reports by grouping criteria. - std::sort(reports->begin(), reports->end(), KernelReportLessThanComparator()); +void CopyKernelReportsToDb(const KernelReportMap& reports, KernelStatsDb* dst) { + for (const auto& report_value : reports) { + KernelReport* report = dst->add_reports(); + *report = report_value.first; + // Set value using KernelReportValue. + report->set_occurrences(report_value.second.occurrences); + report->set_min_duration_ns(report_value.second.min_duration_ns); + report->set_max_duration_ns(report_value.second.max_duration_ns); + report->set_total_duration_ns(report_value.second.total_duration_ns); + } +} - // Group reports together. - KernelReport* prev = nullptr; - for (const KernelReport& report : *reports) { - DCHECK_EQ(3, report.grid_dim_size()); - DCHECK_EQ(3, report.block_dim_size()); - if (prev != nullptr && KernelReportEqualToComparator()(*prev, report)) { - // Previous element is identical to the one that we are adding, so - // aggregate them. - prev->set_occurrences(prev->occurrences() + 1); - prev->set_max_duration_ns( - std::max(prev->max_duration_ns(), report.max_duration_ns())); - prev->set_min_duration_ns( - std::min(prev->min_duration_ns(), report.min_duration_ns())); - prev->set_total_duration_ns(prev->total_duration_ns() + - report.total_duration_ns()); - } else { - // Current element does not exist yet. - prev = dst->add_reports(); - *prev = report; - prev->set_occurrences(1); - } +void InsertOrUpdateKernelReport(const KernelReport& kernel, + const KernelReportValue& value, + KernelReportMap* dst) { + KernelReportValue& element = (*dst)[kernel]; + if (element.occurrences == 0) { + element = value; + } else { + element.total_duration_ns += value.total_duration_ns; + element.min_duration_ns = + std::min(element.min_duration_ns, value.min_duration_ns); + element.max_duration_ns = + std::max(element.max_duration_ns, value.max_duration_ns); + element.occurrences += 1; + } +} + +void MergeKernelReports(const KernelReportMap& reports, KernelReportMap* dst) { + for (auto& kernel_value : reports) { + InsertOrUpdateKernelReport(kernel_value.first, kernel_value.second, dst); } } diff --git a/tensorflow/core/profiler/utils/kernel_stats_utils.h b/tensorflow/core/profiler/utils/kernel_stats_utils.h index ce9208fba1f..b5ee6806bd9 100644 --- a/tensorflow/core/profiler/utils/kernel_stats_utils.h +++ b/tensorflow/core/profiler/utils/kernel_stats_utils.h @@ -20,6 +20,7 @@ limitations under the License. #include "absl/container/flat_hash_map.h" #include "absl/strings/string_view.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h" namespace tensorflow { @@ -40,19 +41,70 @@ bool IsEinsumTensorCoreEligible(absl::string_view equation); // Less than comparator for Kernel Reports. struct KernelReportLessThanComparator { - bool operator()(const KernelReport& lhs, const KernelReport& rhs); + bool operator()(const KernelReport& lhs, const KernelReport& rhs) const; }; // Equal to comparator for Kernel Reports. struct KernelReportEqualToComparator { - bool operator()(const KernelReport& lhs, const KernelReport& rhs); + bool operator()(const KernelReport& lhs, const KernelReport& rhs) const; }; // Sorts kernel reorts by total duration descendingly. void SortKernelsByTotalDurationDesc(KernelStatsDb* kernel_stats_db); -// Groups and aggregate common reports into destination KernelStatsDb. -void GroupKernelReports(std::vector* reports, KernelStatsDb* dst); +struct KernelReportValue { + uint64 total_duration_ns = 0; + uint64 min_duration_ns = 0; + uint64 max_duration_ns = 0; + uint64 occurrences = 0; +}; + +struct KernelKeyWrap { + const KernelReport* key; + template + friend H AbslHashValue(H h, KernelKeyWrap wrap) { + // Kernel reports are grouped by these fields, hence they are used as + // hashing criteria. + // clang-format off + return H::combine( + std::move(h), + wrap.key->is_kernel_using_tensor_core(), + wrap.key->is_op_tensor_core_eligible(), + wrap.key->block_dim(0), + wrap.key->block_dim(1), + wrap.key->block_dim(2), + wrap.key->grid_dim(0), + wrap.key->grid_dim(1), + wrap.key->grid_dim(2), + wrap.key->registers_per_thread(), + wrap.key->static_shmem_bytes(), + wrap.key->dynamic_shmem_bytes(), + wrap.key->name(), + wrap.key->op_name()); + // clang-format on + } +}; + +struct KernelHash { + size_t operator()(const KernelReport& key) const { + return absl::Hash()(KernelKeyWrap{&key}); + } +}; + +using KernelReportMap = + absl::flat_hash_map; + +// Copies reports into the given KernelStatsDb. +void CopyKernelReportsToDb(const KernelReportMap& reports, KernelStatsDb* dst); + +// Inserts or aggregates KernelReports into the given KernelReportMap. +void InsertOrUpdateKernelReport(const KernelReport& kernel, + const KernelReportValue& value, + KernelReportMap* dst); + +// Aggregates values from one KernelReportMap into another. +void MergeKernelReports(const KernelReportMap& reports, KernelReportMap* dst); // Groups KernelReport in by tensorflow operation name. absl::flat_hash_map> From f300cac5242b750b2189391ce9268580b172d664 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Thu, 23 Jul 2020 14:00:26 -0700 Subject: [PATCH 1204/2522] Port the depthwise conv kernel to the new TfLiteEvalTensor API. PiperOrigin-RevId: 322860697 Change-Id: I7858a70fd0b756ae269c3df63312f09aa3643d5d --- tensorflow/lite/micro/kernels/BUILD | 1 + .../lite/micro/kernels/depthwise_conv.cc | 69 ++++++++++++------- .../lite/micro/kernels/depthwise_conv_test.cc | 46 ++++--------- 3 files changed, 60 insertions(+), 56 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 19e46fdf409..ddf9dd7633d 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -158,6 +158,7 @@ tflite_micro_cc_test( "depthwise_conv_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/kernels/internal:tensor", "//tensorflow/lite/micro:op_resolvers", diff --git a/tensorflow/lite/micro/kernels/depthwise_conv.cc b/tensorflow/lite/micro/kernels/depthwise_conv.cc index a296c172309..687537e2c59 100644 --- a/tensorflow/lite/micro/kernels/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/depthwise_conv.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -165,8 +166,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { void EvalFloat(TfLiteContext* context, TfLiteNode* node, TfLiteDepthwiseConvParams* params, const OpData& data, - const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output) { + const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRange(params->activation, &output_activation_min, &output_activation_max); @@ -185,17 +186,22 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, op_params.float_activation_max = output_activation_max; tflite::reference_ops::DepthwiseConv( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(filter), GetTensorData(filter), - GetTensorShape(bias), GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, TfLiteDepthwiseConvParams* params, - const OpData& data, const TfLiteTensor* input, - const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output) { + const OpData& data, const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { DepthwiseParams op_params; op_params.padding_type = PaddingType::kSame; op_params.padding_values.width = data.padding.width; @@ -214,17 +220,21 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, reference_integer_ops::DepthwiseConvPerChannel( op_params, data.per_channel_output_multiplier, - data.per_channel_output_shift, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + data.per_channel_output_shift, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } void EvalQuantized(TfLiteContext* context, TfLiteNode* node, TfLiteDepthwiseConvParams* params, const OpData& data, - const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output) { + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { const int32_t input_offset = -data.input_zero_point; const int32_t filter_offset = -data.filter_zero_point; const int32_t output_offset = data.output_zero_point; @@ -249,10 +259,14 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, op_params.output_shift = -data.output_shift; tflite::reference_ops::DepthwiseConv( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(filter), GetTensorData(filter), - GetTensorShape(bias), GetTensorData(bias), - GetTensorShape(output), GetTensorData(output)); + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { @@ -263,11 +277,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { reinterpret_cast(node->builtin_data); const OpData& data = *(static_cast(node->user_data)); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); - const TfLiteTensor* bias = - (NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr; + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kFilterTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 3) + ? tflite::micro::GetEvalInput(context, node, kBiasTensor) + : nullptr; // TODO(aselle): Consider whether float conv and quantized conv should be // separate ops to avoid dispatch overhead here. diff --git a/tensorflow/lite/micro/kernels/depthwise_conv_test.cc b/tensorflow/lite/micro/kernels/depthwise_conv_test.cc index 4b9ac7ee775..5e35d54dcb1 100644 --- a/tensorflow/lite/micro/kernels/depthwise_conv_test.cc +++ b/tensorflow/lite/micro/kernels/depthwise_conv_test.cc @@ -16,7 +16,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -42,13 +42,16 @@ TfLiteStatus ValidateDepthwiseConvGoldens( const T* expected_output_data, int output_length, TfLiteDepthwiseConvParams* conv_params, float tolerance, int tensors_size, TfLiteTensor* tensors) { - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); + int inputs_array_data[] = {3, 0, 1, 2}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 3}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_DEPTHWISE_CONV_2D); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + const TfLiteRegistration registration = + tflite::ops::micro::Register_DEPTHWISE_CONV_2D(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(conv_params), micro_test::reporter); int input_depth = tensors[0].dims->data[3]; int output_depth = tensors[1].dims->data[3]; @@ -60,32 +63,13 @@ TfLiteStatus ValidateDepthwiseConvGoldens( conv_params->depth_multiplier = depth_mul; const char* init_data = reinterpret_cast(conv_params); - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } - int inputs_array_data[] = {3, 0, 1, 2}; - TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - int outputs_array_data[] = {1, 3}; - TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(conv_params); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - if (registration->prepare) { - TF_LITE_ENSURE_OK(context, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_ENSURE_OK(context, registration->invoke(&context, &node)); - - if (registration->free) { - registration->free(&context, user_data); + // TODO(b/154240825): Use a test macro here which fails and returns. + TfLiteStatus status = runner.InitAndPrepare(init_data); + if (status != kTfLiteOk) { + return status; } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); const T* output_data = tflite::GetTensorData(&tensors[kOutputTensorIndex]); for (int i = 0; i < output_length; ++i) { From a2866288eb212b27c34c03195366499e33d84073 Mon Sep 17 00:00:00 2001 From: Gabriel Rasskin Date: Thu, 23 Jul 2020 14:26:40 -0700 Subject: [PATCH 1205/2522] Remove tap from arg_def_case --- tensorflow/security/fuzzing/BUILD | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorflow/security/fuzzing/BUILD b/tensorflow/security/fuzzing/BUILD index 87333f8a185..aa3d509af37 100644 --- a/tensorflow/security/fuzzing/BUILD +++ b/tensorflow/security/fuzzing/BUILD @@ -48,9 +48,6 @@ tf_fuzz_target( tf_fuzz_target( name = "arg_def_case_fuzz", srcs = ["arg_def_case_fuzz.cc"], - tags = [ - "notap", # TODO(b/160990158): ArgDefCase invariant is broken - ], deps = [ "//tensorflow/core/platform:str_util", "//tensorflow/core/platform:stringpiece", From 7b232a3ee4a1e0d07f29189cc9013c9f83daeadc Mon Sep 17 00:00:00 2001 From: Gabriel Rasskin Date: Thu, 23 Jul 2020 14:27:15 -0700 Subject: [PATCH 1206/2522] Update arg_def_case_fuzz.cc --- tensorflow/security/fuzzing/arg_def_case_fuzz.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/security/fuzzing/arg_def_case_fuzz.cc b/tensorflow/security/fuzzing/arg_def_case_fuzz.cc index 975a1efa164..676326c0237 100644 --- a/tensorflow/security/fuzzing/arg_def_case_fuzz.cc +++ b/tensorflow/security/fuzzing/arg_def_case_fuzz.cc @@ -28,12 +28,11 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { tensorflow::StringPiece sp(char_data, size); - tensorflow::str_util::ArgDefCase(sp); - for (const auto &c : sp) { + std::string ns = tensorflow::str_util::ArgDefCase(sp); + for (const auto &c : ns) { const bool is_letter = 'a' <= c && c <= 'z'; const bool is_digit = '0' <= c && c <= '9'; if (!is_letter && !is_digit) { - printf("Got '%c'\n", c); assert(c == '_'); } } From 18b810a970d8afdec665ae93716b5946ecca4ab5 Mon Sep 17 00:00:00 2001 From: Berkin Ilbeyi Date: Thu, 23 Jul 2020 14:23:06 -0700 Subject: [PATCH 1207/2522] [XLA] Fix a spurious verification failure with nested conditionals. When allocating alternate memory for conditionals, we assume the conditional will always evict the value back to default memory before the end of the called computations. In other words, the output of conditional can never get alternate memory allocations due to difficulties with aliasing, but the inputs can. When verifying the correctness of memory space assignment, we have to split the conditional uses by the called computations and find the time bounds in each computation that the buffer in alternate memory is used. We previously didn't do this splitting for nested conditionals, causing b/161935244. Now we do this splitting recursively. PiperOrigin-RevId: 322865495 Change-Id: I522ccd9e54e73ee9d3b53997fb28198651338909 --- .../xla/service/memory_space_assignment.cc | 64 +++++++++++++------ .../service/memory_space_assignment_test.cc | 52 +++++++++++++++ 2 files changed, 96 insertions(+), 20 deletions(-) diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.cc b/tensorflow/compiler/xla/service/memory_space_assignment.cc index b003045e66c..803140b804e 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment.cc +++ b/tensorflow/compiler/xla/service/memory_space_assignment.cc @@ -3001,18 +3001,23 @@ Status MemorySpaceAssignment::VerifyAndExportHeapSimulatorTrace() { } } - if (last_use_instruction && - last_use_instruction->opcode() == HloOpcode::kConditional) { + std::function + split_conditional_buffer; + split_conditional_buffer = [&](const HloInstruction* use_instruction, + int64 start_time, int64 end_time, + absl::string_view indent_string) { // Special case when verifying conditional: we internally split the use // of alternate memory in conditionals, so fish them out from the // conditionals. - VLOG(3) << " Splitting conditional buffer: " << buffer.ToString() - << " value: " << value->ToShortString() << ": (" - << time_bound.start << ", " << time_bound.end - << ") off: " << chunk.offset << ", size: " << chunk.size; - int64 earliest_computation_start_time = time_bound.end; + VLOG(3) << indent_string + << "Splitting conditional buffer: " << buffer.ToString() + << " value: " << value->ToShortString() << ": (" << start_time + << ", " << end_time << ") off: " << chunk.offset + << ", size: " << chunk.size; + int64 earliest_computation_start_time = end_time; for (const HloComputation* called_computation : - last_use_instruction->called_computations()) { + use_instruction->called_computations()) { earliest_computation_start_time = std::min(earliest_computation_start_time, hlo_live_range->computation_span_times() @@ -3020,6 +3025,7 @@ Status MemorySpaceAssignment::VerifyAndExportHeapSimulatorTrace() { .start); int64 parameter_time = -1; int64 last_use_time = -1; + const HloInstruction* last_use_instruction = nullptr; for (const HloPosition& position : value->positions()) { if (position.instruction->opcode() == HloOpcode::kParameter && position.instruction->parent() == called_computation) { @@ -3029,26 +3035,44 @@ Status MemorySpaceAssignment::VerifyAndExportHeapSimulatorTrace() { } } for (const HloUse& use : value->uses()) { - if (use.instruction->parent() == called_computation) { - last_use_time = std::max( - last_use_time, - hlo_live_range->instruction_schedule().at(use.instruction)); + int64 use_time = + hlo_live_range->instruction_schedule().at(use.instruction); + if (use.instruction->parent() == called_computation && + use_time > last_use_time) { + last_use_time = use_time; + last_use_instruction = use.instruction; } } if (last_use_time != -1) { CHECK_NE(parameter_time, -1); - VLOG(3) << " computation: " << called_computation->name() << ": (" + VLOG(3) << indent_string + << " computation: " << called_computation->name() << ": (" << parameter_time << ", " << last_use_time << ")"; - TF_RETURN_IF_ERROR(add_allocation_and_verify( - parameter_time, last_use_time, chunk, value)); + CHECK(last_use_instruction); + if (last_use_instruction->opcode() == HloOpcode::kConditional) { + // The last use is another (nested) conditional. Call this + // function recursively. + TF_RETURN_IF_ERROR(split_conditional_buffer( + last_use_instruction, parameter_time, last_use_time, + absl::StrCat(indent_string, " "))); + } else { + TF_RETURN_IF_ERROR(add_allocation_and_verify( + parameter_time, last_use_time, chunk, value)); + } } } - VLOG(3) << " from beginning until first computation: (" - << time_bound.start << ", " - << (earliest_computation_start_time - 1) << ")"; + VLOG(3) << indent_string << " from beginning until first computation: (" + << start_time << ", " << (earliest_computation_start_time - 1) + << ")"; TF_RETURN_IF_ERROR(add_allocation_and_verify( - time_bound.start, earliest_computation_start_time - 1, chunk, - value)); + start_time, earliest_computation_start_time - 1, chunk, value)); + return Status::OK(); + }; + + if (last_use_instruction && + last_use_instruction->opcode() == HloOpcode::kConditional) { + TF_RETURN_IF_ERROR(split_conditional_buffer( + last_use_instruction, time_bound.start, time_bound.end, " ")); } else { VLOG(3) << " buffer: " << buffer.ToString() << " value: " << value->ToShortString() << ": (" diff --git a/tensorflow/compiler/xla/service/memory_space_assignment_test.cc b/tensorflow/compiler/xla/service/memory_space_assignment_test.cc index d609f7edd1d..c0fdc5fc00d 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment_test.cc +++ b/tensorflow/compiler/xla/service/memory_space_assignment_test.cc @@ -2153,6 +2153,58 @@ TEST_P(MemorySpaceAssignmentTest, NestedConditional) { } } +TEST_P(MemorySpaceAssignmentTest, NestedConditionalBufferReuseVerificationBug) { + // Tests a spurious verification failure when there are nested conditionals + // and the innermost conditional computation reuses the buffer. Here, both the + // parameter of true_computation2 and neg2 will get the same buffer. Make sure + // that verification doesn't claim a failure in this case. + absl::string_view hlo_string = R"( + HloModule CondAllocation, is_scheduled=true + + true_computation2 { + p0 = (f32[3]{0}) parameter(0) + gte = f32[3]{0} get-tuple-element(p0), index=0 + neg1 = f32[3]{0} negate(gte) + neg2 = f32[3]{0} negate(neg1) + ROOT neg3 = f32[3]{0} negate(neg2) + } + + false_computation2 { + p0 = (f32[3]{0}) parameter(0) + gte = f32[3]{0} get-tuple-element(p0), index=0 + ROOT neg4 = f32[3]{0} negate(gte) + } + + true_computation1 { + p0 = (f32[3]{0}) parameter(0) + gte = f32[3]{0} get-tuple-element(p0), index=0 + slice = f32[1]{0} slice(gte), slice={[0:1]} + bitcast = f32[] bitcast(slice) + constant = f32[] constant(0.0) + compare = pred[] compare(bitcast, constant), direction=GT + tuple = (f32[3]{0}) tuple(gte) + ROOT conditional = f32[3]{0} conditional(compare, tuple, tuple), true_computation=true_computation2, false_computation=false_computation2 + } + + false_computation1 { + p0 = (f32[3]{0}) parameter(0) + gte = f32[3]{0} get-tuple-element(p0), index=0 + ROOT neg5 = f32[3]{0} negate(gte) + } + + ENTRY entry { + p0 = f32[3]{0} parameter(0) + p1 = pred[] parameter(1) + copy = f32[3]{0} copy(p0) + tuple = (f32[3]{0}) tuple(copy) + ROOT conditional = f32[3]{0} conditional(p1, tuple, tuple), true_computation=true_computation1, false_computation=false_computation1 + } + )"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + AssignMemorySpace(module.get()); +} + TEST_P(MemorySpaceAssignmentTest, RequestIdentifierShouldNotBeAllocatedInAlternateMem) { // Ensure that request identifier returned by Send/Recv HLOs are not allocated From 5a58103a7d1b67edf548e6123f91d9d10cfd4e5d Mon Sep 17 00:00:00 2001 From: Jay Shi Date: Thu, 23 Jul 2020 14:23:28 -0700 Subject: [PATCH 1208/2522] [tf.data] Preparation of using GMock matchers in tensorflow tests. PiperOrigin-RevId: 322865558 Change-Id: Ie8d7af71fec5933f371a8bc4a16133e2d7728566 --- tensorflow/core/platform/test.h | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/platform/test.h b/tensorflow/core/platform/test.h index a2cda11c608..17e15c5f052 100644 --- a/tensorflow/core/platform/test.h +++ b/tensorflow/core/platform/test.h @@ -23,13 +23,30 @@ limitations under the License. #include "tensorflow/core/platform/platform.h" #include "tensorflow/core/platform/types.h" -// As of September 2016, we continue to attempt to avoid the use of gmock aka -// googlemock included in the test framework -// (https://github.com/google/googletest) to discourage over-eager use of mocks -// that lead to cumbersome class hierarchies and tests that might end up not -// testing real code in important ways. #include // IWYU pragma: export +// Includes gmock.h and enables the use of gmock matchers in tensorflow tests. +// +// Test including this header can use the macros EXPECT_THAT(...) and +// ASSERT_THAT(...) in combination with gmock matchers. +// Example: +// std::vector vec = Foo(); +// EXPECT_THAT(vec, ::testing::ElementsAre(1,2,3)); +// EXPECT_THAT(vec, ::testing::UnorderedElementsAre(2,3,1)); +// +// For more details on gmock matchers see: +// https://github.com/google/googletest/blob/master/googlemock/docs/CheatSheet.md#matchers +// +// The advantages of using gmock matchers instead of self defined matchers are +// better error messages, more maintainable tests and more test coverage. +#if defined(PLATFORM_GOOGLE) || defined(PLATFORM_GOOGLE_ANDROID) +#include "testing/base/public/gmock.h" +#else +#include +#include +#include +#endif + namespace tensorflow { namespace testing { From 5882f49288b9db15f68cdad81882b5d9f43a4085 Mon Sep 17 00:00:00 2001 From: Jaesung Chung Date: Thu, 23 Jul 2020 14:30:11 -0700 Subject: [PATCH 1209/2522] Rollback of BroadcastTo op additions (part 1) Rolling back until discussion about builtin ops schema issue is discussed. PiperOrigin-RevId: 322867083 Change-Id: I85bc33675a00ea5ff7253d9d1eb53b047f9f4658 --- tensorflow/compiler/mlir/lite/ir/tfl_ops.cc | 29 ++- .../compiler/mlir/lite/tests/legalize-tf.mlir | 183 +---------------- .../mlir/lite/transforms/legalize_patterns.td | 2 +- .../mlir/lite/transforms/legalize_tf.cc | 186 +----------------- tensorflow/lite/testing/op_tests/binary_op.py | 13 -- tensorflow/lite/testing/op_tests/where.py | 10 - 6 files changed, 43 insertions(+), 380 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc index 99894ede5f0..427b9c692a7 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc @@ -147,10 +147,18 @@ bool IsI64Type(Type element_type) { bool VerifyAddOpShapeConstraints(AddOp op) { auto element_type = getElementTypeOrSelf(op.output().getType()); - // Allows F32, QI8, QUI8 and I32 outputs when the operands have valid shapes, + // Allows F32, QI8, and QUI8 outputs when the operands have valid shapes, // which are broadcastable shapes up to five dimension or have same shapes. if (element_type.isF32() || IsQI8Type(element_type) || - IsQUI8Type(element_type) || IsI32Type(element_type)) { + IsQUI8Type(element_type)) { + return VerifyOperandsHaveSameShapesOrBroadcastableShape( + /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, + /*max_bcast_rank=*/5); + } + + // Allows I32 output when the operands have valid shapes, which are + // broadcastable shapes up to four dimension or have same shapes. + if (IsI32Type(element_type)) { return VerifyOperandsHaveSameShapesOrBroadcastableShape( /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, /*max_bcast_rank=*/4); @@ -202,13 +210,20 @@ bool VerifyMulOpShapeConstraints(MulOp op) { } return VerifyOperandsHaveSameShapesOrBroadcastableShape( /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, - /*max_bcast_rank=*/4); + /*max_bcast_rank=*/5); } - // Allows I32, QI16 and F32 outputs when the operands have valid shapes, which - // are broadcastable shapes up to four dimension or have same shapes. - if (IsI32Type(element_type) || IsQI16Type(element_type) || - element_type.isF32()) { + // Allows F32 output when the operands have valid shapes, which are + // broadcastable shapes up to five dimension or have same shapes. + if (element_type.isF32()) { + return VerifyOperandsHaveSameShapesOrBroadcastableShape( + /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, + /*max_bcast_rank=*/5); + } + + // Allows I32 and QI16 outputs when the operands have valid shapes, which are + // broadcastable shapes up to four dimension or have same shapes. + if (IsI32Type(element_type) || IsQI16Type(element_type)) { return VerifyOperandsHaveSameShapesOrBroadcastableShape( /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, /*max_bcast_rank=*/4); diff --git a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir index a5174e7c438..74a33817d32 100644 --- a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir @@ -25,6 +25,13 @@ func @testAddHighDimsHaveSameShape(%arg0: tensor<1x2x3x4x5x6x7x8xi32>, %arg1: te return %0 : tensor<1x2x3x4x5x6x7x8xi32> } +// CHECK-LABEL: testAddTooHighBroadcastableDims +func @testAddTooHighBroadcastableDims(%arg0: tensor<1x2x3x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { + // expected-error @+1 {{'tfl.add' op failed to verify that operand #0 and operand #1 have the same shape or broadcastable shapes within the rank 4}} + %0 = "tf.Add"(%arg0, %arg1) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> + return %0 : tensor<1x2x3x4x5x6xi32> +} + func @LeakyRelu(%arg0: tensor<1xf32>) -> tensor<1xf32> { %2 = "tf.LeakyRelu"(%arg0) {alpha = 0.1 : f32} : (tensor<1xf32>) -> tensor<1xf32> return %2: tensor<1xf32> @@ -1523,11 +1530,7 @@ func @select_v2_with_6d_broadcasting(%arg0: tensor<1x1x1x1x3x1xi1>, %arg1 : tens %0 = "tf.SelectV2"(%arg0, %arg1, %arg2): (tensor<1x1x1x1x3x1xi1>, tensor<1x1x1x1x1x4xf32>, tensor<1x1x1x2x1x1xf32>) -> tensor<1x1x1x2x3x4xf32> return %0 : tensor<1x1x1x2x3x4xf32> // CHECK-LABEL: select_v2_with_6d_broadcasting -// CHECK: [[CST:%.*]] = constant dense<[1, 1, 1, 2, 3, 4]> : tensor<6xi64> -// CHECK: [[BCT:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) -// CHECK: [[BCT_0:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) -// CHECK: [[BCT_1:%.*]] = "tfl.broadcast_to"(%arg2, [[CST]]) -// CHECK: "tfl.select"([[BCT]], [[BCT_0]], [[BCT_1]]) +// CHECK: "tf.SelectV2"(%arg0, %arg1, %arg2) } // ----- @@ -1537,9 +1540,7 @@ func @maximum_with_6d_broadcasting(%arg0: tensor<1x1x1x1x8x16xf32>, %arg1: tenso return %0 : tensor<1x1x1x1x8x16xf32> // CHECK-LABEL: maximum_with_6d_broadcasting -// CHECK: [[CST:%.*]] = constant dense<[1, 1, 1, 1, 8, 16]> : tensor<6xi64> -// CHECK: [[BCT:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) -// CHECK: "tfl.maximum"(%arg0, [[BCT]]) +// CHECK: "tf.Maximum"(%arg0, %arg1) } // ----- @@ -1548,169 +1549,5 @@ func @add_with_int32_5d_inputs(%arg0: tensor<1x1x1x3x1xi32>, %arg1 : tensor<1x1x %0 = "tf.Add"(%arg0, %arg1): (tensor<1x1x1x3x1xi32>, tensor<1x1x1x1x4xi32>) -> tensor<1x1x1x3x4xi32> return %0 : tensor<1x1x1x3x4xi32> // CHECK-LABEL: add_with_int32_5d_inputs -// CHECK: [[CST:%.*]] = constant dense<[1, 1, 1, 3, 4]> : tensor<5xi64> -// CHECK: [[BCT:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) -// CHECK: [[BCT_0:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) -// CHECK: tfl.add [[BCT]], [[BCT_0]] -} - -// CHECK-LABEL: testAddWithBroadcastToOps -func @testAddWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: tfl.add [[BCAST]], [[BCAST_1]] {fused_activation_function = "NONE"} : tensor<1x2x3x4x5x6xi32> - %0 = "tf.Add"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - -// CHECK-LABEL: testSubWithBroadcastToOps -func @testSubWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: tfl.sub [[BCAST]], [[BCAST_1]] {fused_activation_function = "NONE"} : tensor<1x2x3x4x5x6xi32> - %0 = "tf.Sub"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - -// CHECK-LABEL: testMulWithBroadcastToOps -func @testMulWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: tfl.mul [[BCAST]], [[BCAST_1]] {fused_activation_function = "NONE"} : tensor<1x2x3x4x5x6xi32> - %0 = "tf.Mul"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - -// CHECK-LABEL: testDivWithBroadcastToOps -func @testDivWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: tfl.div [[BCAST]], [[BCAST_1]] {fused_activation_function = "NONE"} : tensor<1x2x3x4x5x6xi32> - %0 = "tf.Div"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - -// CHECK-LABEL: testFloorDivWithBroadcastToOps -func @testFloorDivWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: tfl.floor_div [[BCAST]], [[BCAST_1]] : tensor<1x2x3x4x5x6xi32> - %0 = "tf.FloorDiv"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - -// CHECK-LABEL: testFloorModWithBroadcastToOps -func @testFloorModWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: "tfl.floor_mod"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi32> - %0 = "tf.FloorMod"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - -// CHECK-LABEL: testPowWithBroadcastToOps -func @testPowWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: tfl.pow [[BCAST]], [[BCAST_1]] : tensor<1x2x3x4x5x6xi32> - %0 = "tf.Pow"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - -// CHECK-LABEL: testMaximumWithBroadcastToOps -func @testMaximumWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: "tfl.maximum"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi32> - %0 = "tf.Maximum"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - -// CHECK-LABEL: testMinimumWithBroadcastToOps -func @testMinimumWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: "tfl.minimum"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi32> - %0 = "tf.Minimum"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - -// CHECK-LABEL: testSelectV2WithBroadcastToOps -func @testSelectV2WithBroadcastToOps(%arg0: tensor<1x2x1x4x1x6xi1>, %arg1: tensor<1x2x3x4x1x1xi32>, %arg2: tensor<1x2x1x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: [[BCAST_2:%.*]] = "tfl.broadcast_to"(%arg2, [[CST]]) - // CHECK: "tfl.select"([[BCAST]], [[BCAST_1]], [[BCAST_2]]) - %0 = "tf.SelectV2"(%arg0, %arg1, %arg2) : (tensor<1x2x1x4x1x6xi1>, tensor<1x2x3x4x1x1xi32>, tensor<1x2x1x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi32> - return %0 : tensor<1x2x3x4x5x6xi32> -} - -// CHECK-LABEL: testLessEqualWithBroadcastToOps -func @testLessEqualWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: "tfl.less_equal"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> - %0 = "tf.LessEqual"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> - return %0 : tensor<1x2x3x4x5x6xi1> -} - -// CHECK-LABEL: testGreaterEqualWithBroadcastToOps -func @testGreaterEqualWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: "tfl.greater_equal"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> - %0 = "tf.GreaterEqual"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> - return %0 : tensor<1x2x3x4x5x6xi1> -} - -// CHECK-LABEL: testEqualWithBroadcastToOps -func @testEqualWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: "tfl.equal"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> - %0 = "tf.Equal"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> - return %0 : tensor<1x2x3x4x5x6xi1> -} - -// CHECK-LABEL: testNotEqualWithBroadcastToOps -func @testNotEqualWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: "tfl.not_equal"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> - %0 = "tf.NotEqual"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> - return %0 : tensor<1x2x3x4x5x6xi1> -} - -// CHECK-LABEL: testLessWithBroadcastToOps -func @testLessWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: "tfl.less"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> - %0 = "tf.Less"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> - return %0 : tensor<1x2x3x4x5x6xi1> -} - -// CHECK-LABEL: testGreaterWithBroadcastToOps -func @testGreaterWithBroadcastToOps(%arg0: tensor<1x2x1x4x5x6xi32>, %arg1: tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> { - // CHECK: [[CST:%.*]] = constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi64> - // CHECK: [[BCAST:%.*]] = "tfl.broadcast_to"(%arg0, [[CST]]) - // CHECK: [[BCAST_1:%.*]] = "tfl.broadcast_to"(%arg1, [[CST]]) - // CHECK: "tfl.greater"([[BCAST]], [[BCAST_1]]) : (tensor<1x2x3x4x5x6xi32>, tensor<1x2x3x4x5x6xi32>) -> tensor<1x2x3x4x5x6xi1> - %0 = "tf.Greater"(%arg0, %arg1) : (tensor<1x2x1x4x5x6xi32>, tensor<1x2x3x4x5x1xi32>) -> tensor<1x2x3x4x5x6xi1> - return %0 : tensor<1x2x3x4x5x6xi1> +// CHECK: "tf.Add"(%arg0, %arg1) } diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td index 235a5c65e96..a6adb8f4a61 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td @@ -256,7 +256,7 @@ def LegalizeAddv2 : Pat<(TF_AddV2Op $lhs, $rhs), (TFL_AddOp $lhs, $rhs, TFL_AF_None)>; def LegalizeBiasAdd : Pat< (TF_BiasAddOp F32Tensor:$l, F32Tensor:$r, IsDataFormatNHWC:$data_format), - (TF_AddV2Op $l, $r)>; + (TFL_AddOp $l, $r, TFL_AF_None)>; def LegalizeSub : Pat<(TF_SubOp $lhs, $rhs), (TFL_SubOp $lhs, $rhs, TFL_AF_None)>; def LegalizeMul : Pat<(TF_MulOp $lhs, $rhs), diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc index 2f8370e2b96..7d6866dc570 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc @@ -631,156 +631,6 @@ struct LegalizeUnidirectionalSequenceRnn : public RewritePattern { } }; -// Put two TFL BroadcastTo ops in front of the given TF binary broadcast op to -// to make binary broadcast-able op conversion always successful and does not -// require flex delegate. -template -class ApplyExplicitBroadcasting : public OpRewritePattern { - public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(SourceOp src_op, - PatternRewriter& rewriter) const override { - Operation* op = static_cast(src_op); - auto lhs = op->getOperand(0); - auto rhs = op->getOperand(1); - - // Should have static shapes to calculate the broadcasted shape. - if (!lhs.getType().cast().hasStaticShape() || - !rhs.getType().cast().hasStaticShape()) { - return failure(); - } - - // Calculate the broadcasted shape. - SmallVector result_shape; - if (!OpTrait::util::getBroadcastedShape( - lhs.getType().cast().getShape(), - rhs.getType().cast().getShape(), result_shape)) { - return failure(); - } - - RankedTensorType result_type = RankedTensorType::get( - result_shape, getElementTypeOrSelf(op->getResult(0).getType())); - - // Create a const op, that stores the above broadcasted shape. - auto new_shape_attr = mlir::DenseIntElementsAttr::get( - RankedTensorType::get(result_shape.size(), rewriter.getIntegerType(64)), - result_shape); - auto new_shape = rewriter.create(op->getLoc(), new_shape_attr); - - // Apply BroadcastTo ops to each input. - auto broadcast_type = RankedTensorType::get( - result_shape, getElementTypeOrSelf(lhs.getType())); - - if (result_type.getShape() != lhs.getType().cast().getShape()) { - lhs = rewriter - .create(op->getLoc(), broadcast_type, lhs, - new_shape) - .output(); - } - if (result_type.getShape() != rhs.getType().cast().getShape()) { - rhs = rewriter - .create(op->getLoc(), broadcast_type, rhs, - new_shape) - .output(); - } - - // Recreate an op with the above Broadcast op results. - rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs); - return success(); - } -}; - -// This specialization is for TF SelectV2 op. SelectV2 op have three inputs and -// they should have broadcastable shapes. -template <> -class ApplyExplicitBroadcasting - : public OpRewritePattern { - public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(TF::SelectV2Op src_op, - PatternRewriter& rewriter) const override { - Operation* op = static_cast(src_op); - auto cond = op->getOperand(0); - auto lhs = op->getOperand(1); - auto rhs = op->getOperand(2); - - // Should have static shapes to calculate the broadcasted shape. - if (!lhs.getType().cast().hasStaticShape() || - !rhs.getType().cast().hasStaticShape() || - !cond.getType().cast().hasStaticShape()) { - return failure(); - } - - // Calculate the broadcasted shape. - SmallVector broadcasted_shape; - if (!OpTrait::util::getBroadcastedShape( - lhs.getType().cast().getShape(), - rhs.getType().cast().getShape(), broadcasted_shape)) { - return failure(); - } - - SmallVector result_shape; - if (!OpTrait::util::getBroadcastedShape( - broadcasted_shape, cond.getType().cast().getShape(), - result_shape)) { - return failure(); - } - - // Create a const op, that stores the above broadcasted shape. - auto shape_type = - RankedTensorType::get(result_shape.size(), rewriter.getIntegerType(64)); - auto new_shape_attr = - mlir::DenseIntElementsAttr::get(shape_type, result_shape); - auto new_shape = rewriter.create(op->getLoc(), new_shape_attr); - - // Apply BroadcastTo ops to each input. - auto cond_result_type = - RankedTensorType::get(result_shape, rewriter.getIntegerType(1)); - auto result_type = RankedTensorType::get( - result_shape, getElementTypeOrSelf(lhs.getType())); - - if (result_shape != cond.getType().cast().getShape()) { - cond = rewriter - .create(op->getLoc(), cond_result_type, - cond, new_shape) - .output(); - } - if (result_shape != lhs.getType().cast().getShape()) { - lhs = rewriter - .create(op->getLoc(), result_type, lhs, - new_shape) - .output(); - } - if (result_shape != rhs.getType().cast().getShape()) { - rhs = rewriter - .create(op->getLoc(), result_type, rhs, - new_shape) - .output(); - } - - // Recreate an op with the above Broadcast op results. - rewriter.replaceOpWithNewOp(op, result_type, cond, lhs, - rhs); - return success(); - } -}; - -void applyPatterns(FuncOp func, ConversionTarget& target, - const OwningRewritePatternList& patterns) { - // Keep trying to convert. - // TODO(karimnosseir): This is similar to what apply greedy patterns does. - // Look if there is a function that tries until it converge. - // Currently unit-test doesn't do multiple tries, so we need this. - const int max_iterations = 15; - for (int i = 0; i < max_iterations; ++i) { - if (failed(applyPartialConversion(func, target, patterns))) { - return; - } - } -} - void LegalizeTF::runOnFunction() { OwningRewritePatternList patterns; auto* context = &getContext(); @@ -831,32 +681,16 @@ void LegalizeTF::runOnFunction() { return success(current_thread_id == llvm::get_threadid()); }); - applyPatterns(func, target, patterns); - - // Explict BroadcastTo addition for left-over broadcast-able ops. - // The following pattern matchings should be done after the other legalization - // rules in order not to add unnecessary BroadcastTo ops. - patterns.insert, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting>(context); - - applyPatterns(func, target, patterns); + // Keep trying to convert. + // TODO(karimnosseir): This is similar to what apply greedy patterns does. + // Look if there is a function that tries until it converge. + // Currently unit-test doesn't do multiple tries, so we need this. + const int max_iterations = 15; + for (int i = 0; i < max_iterations; ++i) { + if (failed(applyPartialConversion(func, target, patterns))) { + return; + } + } } } // namespace diff --git a/tensorflow/lite/testing/op_tests/binary_op.py b/tensorflow/lite/testing/op_tests/binary_op.py index 936563cc63d..17ed2f3522d 100644 --- a/tensorflow/lite/testing/op_tests/binary_op.py +++ b/tensorflow/lite/testing/op_tests/binary_op.py @@ -178,19 +178,6 @@ def make_binary_op_tests(options, }, ] - # High dimension broadcasting support in MLIR converter. - if options.use_experimental_converter: - test_parameters = test_parameters + [ - { - "dtype": [tf.float32], - "input_shape_1": [[8, 7, 6, 5, 4, 3, 2, 1]], - "input_shape_2": [[4, 3, 2, 1]], - "activation": [False], - "fully_quantize": [False], - "dynamic_range_quantize": [False], - }, - ] - # test_parameters include fully_quantize option only when # allow_fully_quantize is True. if not allow_fully_quantize: diff --git a/tensorflow/lite/testing/op_tests/where.py b/tensorflow/lite/testing/op_tests/where.py index df91e195820..49802422e3f 100644 --- a/tensorflow/lite/testing/op_tests/where.py +++ b/tensorflow/lite/testing/op_tests/where.py @@ -35,16 +35,6 @@ def make_where_tests(options): }, ] - # High dimension broadcasting support in MLIR converter. - if options.use_experimental_converter: - test_parameters = test_parameters + [ - { - "input_dtype": [tf.float32, tf.int32], - "input_shape_set": [([8, 7, 6, 5, 4, 3, 2, 1], [4, 3, 2, 1]),], - "use_where_v2": [True], - }, - ] - def build_graph(parameters): """Build the where op testing graph.""" input_value1 = tf.compat.v1.placeholder( From 32819cedcb1d5948aea56710ccd38fb7a9da1d41 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Thu, 23 Jul 2020 14:46:11 -0700 Subject: [PATCH 1210/2522] Simplify reshape handling in dynamic dimension inference. When the code was first written last year I was still in the mindset that XLA has to infer dynamic dimension on its own, thus creating some fairly complicated logic. Over the last few months I've been adding more hints in the bridge (by setting the dynamic output dimension in reshape's shape directly) so now instead of letting xla handle this itself, we can rely on the info passed from the client. This way we can reduce the complexity of the logic inside xla, which appearnly has CHECK failure. PiperOrigin-RevId: 322870657 Change-Id: I7751b03e89af86342c12538261e43efdd97dfbe6 --- .../service/dynamic_dimension_inference.cc | 366 ++++++------------ .../xla/service/dynamic_dimension_inference.h | 117 +----- 2 files changed, 117 insertions(+), 366 deletions(-) diff --git a/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc b/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc index 6ebbf622614..2f2456863e9 100644 --- a/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc +++ b/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc @@ -148,15 +148,12 @@ class DynamicDimensionInferenceVisitor : public DfsHloVisitorWithDefault { Status HandleDomain(HloInstruction* hlo) override; private: - using DimensionConstraint = DynamicDimensionInference::DimensionConstraint; using OperandDynamicDimensionFn = std::function; + int64 operand_index, HloInstruction* dynamic_size)>; using DynamicDimensionFn = std::function; + ShapeIndex index, int64 dimension, HloInstruction* dynamic_size)>; Status ForEachOperandDynamicDimension(HloInstruction* inst, const OperandDynamicDimensionFn&); @@ -184,8 +181,7 @@ class DynamicDimensionInferenceVisitor : public DfsHloVisitorWithDefault { Status DynamicDimensionInferenceVisitor::DefaultAction(HloInstruction* hlo) { return ForEachOperandDynamicDimension( hlo, [&](HloInstruction* operand, ShapeIndex index, int64 dimension, - int64 operand_index, HloInstruction* dynamic_size, - DimensionConstraint constraint) { + int64 operand_index, HloInstruction* dynamic_size) { return UnimplementedStrCat( "Asked to propagate a dynamic dimension from hlo ", operand->name(), "@", index.ToString(), "@", dimension, " to hlo ", hlo->ToString(), @@ -197,13 +193,11 @@ Status DynamicDimensionInferenceVisitor::HandleGetTupleElement( HloInstruction* hlo) { return ForEachOperandDynamicDimension( hlo, [&](HloInstruction* operand, ShapeIndex index, int64 dimension, - int64 operand_index, HloInstruction* dynamic_size, - DimensionConstraint constraint) { + int64 operand_index, HloInstruction* dynamic_size) { if (hlo->tuple_index() == index[0]) { ShapeIndex new_index = ShapeIndexView(index).ConsumeFront().ToShapeIndex(); - parent_->SetDynamicSize(hlo, new_index, dimension, dynamic_size, - constraint); + parent_->SetDynamicSize(hlo, new_index, dimension, dynamic_size); } return Status::OK(); }); @@ -212,11 +206,9 @@ Status DynamicDimensionInferenceVisitor::HandleGetTupleElement( Status DynamicDimensionInferenceVisitor::HandleTuple(HloInstruction* hlo) { return ForEachOperandDynamicDimension( hlo, [&](HloInstruction*, ShapeIndex index, int64 dimension, - int64 operand_index, HloInstruction* dynamic_size, - DimensionConstraint constraint) { + int64 operand_index, HloInstruction* dynamic_size) { index.push_front(operand_index); - parent_->SetDynamicSize(hlo, index, dimension, dynamic_size, - constraint); + parent_->SetDynamicSize(hlo, index, dimension, dynamic_size); return Status::OK(); }); } @@ -224,11 +216,9 @@ Status DynamicDimensionInferenceVisitor::HandleTuple(HloInstruction* hlo) { Status DynamicDimensionInferenceVisitor::HandleBroadcast(HloInstruction* hlo) { return ForEachOperandDynamicDimension( hlo, [&](HloInstruction* operand, ShapeIndex index, int64 dimension, - int64 operand_index, HloInstruction* dynamic_size, - DimensionConstraint constraint) { + int64 operand_index, HloInstruction* dynamic_size) { int64 broadcast_dim = hlo->dimensions(dimension); - parent_->SetDynamicSize(hlo, {}, broadcast_dim, dynamic_size, - constraint); + parent_->SetDynamicSize(hlo, {}, broadcast_dim, dynamic_size); return Status::OK(); }); } @@ -244,8 +234,7 @@ Status DynamicDimensionInferenceVisitor::HandleCustomCall(HloInstruction* hlo) { // returns the padded data output and the dynamic sizes of input // dimensions. ShapeIndex data_output = {0}; - parent_->SetDynamicSize(hlo, data_output, i, dynamic_size, - DimensionConstraint(1, 1)); + parent_->SetDynamicSize(hlo, data_output, i, dynamic_size); } } return Status::OK(); @@ -255,15 +244,14 @@ Status DynamicDimensionInferenceVisitor::HandleCustomCall(HloInstruction* hlo) { } return ForEachOperandDynamicDimension( hlo, [&](HloInstruction* operand, ShapeIndex index, int64 dimension, - int64 operand_index, HloInstruction* dynamic_size, - DimensionConstraint constraint) { + int64 operand_index, HloInstruction* dynamic_size) { // Resize custom call should propagate dynamic batch (0) and channel (3) // dimensions. if (hlo->custom_call_target() == "SliceToDynamic" || hlo->custom_call_target() == "Sharding" || (absl::StartsWith(hlo->custom_call_target(), "Resize") && (dimension == 0 || dimension == 3))) { - parent_->SetDynamicSize(hlo, {}, dimension, dynamic_size, constraint); + parent_->SetDynamicSize(hlo, {}, dimension, dynamic_size); return Status::OK(); } return Unimplemented( @@ -274,16 +262,15 @@ Status DynamicDimensionInferenceVisitor::HandleCustomCall(HloInstruction* hlo) { Status DynamicDimensionInferenceVisitor::HandleSort(HloInstruction* hlo) { return ForEachOperandDynamicDimension( - hlo, [&](HloInstruction* operand, ShapeIndex index, - int64 dynamic_dimension, int64 operand_index, - HloInstruction* dynamic_size, DimensionConstraint constraint) { + hlo, + [&](HloInstruction* operand, ShapeIndex index, int64 dynamic_dimension, + int64 operand_index, HloInstruction* dynamic_size) { HloSortInstruction* sort = Cast(hlo); if (sort->values_count() == 0) { - parent_->SetDynamicSize(hlo, {}, dynamic_dimension, dynamic_size, - constraint); + parent_->SetDynamicSize(hlo, {}, dynamic_dimension, dynamic_size); } else { parent_->SetDynamicSize(hlo, {operand_index}, dynamic_dimension, - dynamic_size, constraint); + dynamic_size); } return Status::OK(); @@ -293,8 +280,7 @@ Status DynamicDimensionInferenceVisitor::HandleSort(HloInstruction* hlo) { Status DynamicDimensionInferenceVisitor::HandlePad(HloInstruction* hlo) { return ForEachOperandDynamicDimension( hlo, [&](HloInstruction* operand, ShapeIndex index, int64 dimension, - int64 operand_index, HloInstruction* dynamic_size, - DimensionConstraint constraint) { + int64 operand_index, HloInstruction* dynamic_size) { if (operand_index != 0) { return Unimplemented( "Dynamic dimension on padding value is not supported"); @@ -311,8 +297,7 @@ Status DynamicDimensionInferenceVisitor::HandlePad(HloInstruction* hlo) { hlo->parent()->AddInstruction(HloInstruction::CreateBinary( dynamic_size_adjusted->shape(), HloOpcode::kAdd, dynamic_size_adjusted, adjustment)); - parent_->SetDynamicSize(hlo, {}, dimension, dynamic_size_adjusted, - constraint); + parent_->SetDynamicSize(hlo, {}, dimension, dynamic_size_adjusted); return Status::OK(); } else { return Unimplemented( @@ -327,8 +312,7 @@ Status DynamicDimensionInferenceVisitor::HandlePad(HloInstruction* hlo) { Status DynamicDimensionInferenceVisitor::HandleReduce(HloInstruction* hlo) { return ForEachOperandDynamicDimension( hlo, [&](HloInstruction* operand, ShapeIndex index, int64 dimension, - int64 operand_index, HloInstruction* dynamic_size, - DimensionConstraint constraint) { + int64 operand_index, HloInstruction* dynamic_size) { HloInstruction* reduce = hlo; int64 operand_count = reduce->operand_count(); bool is_variadic_reduce = operand_count > 2; @@ -354,13 +338,12 @@ Status DynamicDimensionInferenceVisitor::HandleReduce(HloInstruction* hlo) { // reduce has a dynamic dimension, we set all outputs to use the // same dynamic size in corresponding dimensions. for (int64 i = 0; i < operand_count / 2; ++i) { - parent_->SetDynamicSize(reduce, {i}, - dimensions_not_reduced_count, - dynamic_size, constraint); + parent_->SetDynamicSize( + reduce, {i}, dimensions_not_reduced_count, dynamic_size); } } else { parent_->SetDynamicSize(reduce, {}, dimensions_not_reduced_count, - dynamic_size, constraint); + dynamic_size); } return Status::OK(); @@ -378,7 +361,7 @@ Status DynamicDimensionInferenceVisitor::HandleDot(HloInstruction* hlo) { return ForEachOperandDynamicDimension( hlo, [&](HloInstruction* operand, ShapeIndex operand_shape_index, int64 operand_dimension, int64 operand_index, - HloInstruction* dynamic_size, DimensionConstraint constraint) { + HloInstruction* dynamic_size) { // There are three types of dimensions in a dot: // A. batch dims // B. contracting dims @@ -451,8 +434,7 @@ Status DynamicDimensionInferenceVisitor::HandleDot(HloInstruction* hlo) { // work item to trace that dimension. auto iter = result_dim_mapping.find(operand_dimension); if (iter != result_dim_mapping.end()) { - parent_->SetDynamicSize(dot, {}, iter->second, dynamic_size, - constraint); + parent_->SetDynamicSize(dot, {}, iter->second, dynamic_size); } return Status::OK(); @@ -463,8 +445,7 @@ Status DynamicDimensionInferenceVisitor::HandleTranspose(HloInstruction* hlo) { return ForEachOperandDynamicDimension( hlo, [&](HloInstruction* operand, ShapeIndex index, int64 dimension, - int64 operand_index, HloInstruction* dynamic_size, - DimensionConstraint constraint) -> Status { + int64 operand_index, HloInstruction* dynamic_size) -> Status { int64 permuted_dim = -1; for (int64 i = 0; i < hlo->dimensions().size(); ++i) { if (hlo->dimensions()[i] == dimension) { @@ -472,8 +453,7 @@ Status DynamicDimensionInferenceVisitor::HandleTranspose(HloInstruction* hlo) { permuted_dim = i; } } - parent_->SetDynamicSize(hlo, {}, permuted_dim, dynamic_size, - constraint); + parent_->SetDynamicSize(hlo, {}, permuted_dim, dynamic_size); return Status::OK(); }); } @@ -482,8 +462,7 @@ Status DynamicDimensionInferenceVisitor::HandleConvolution( HloInstruction* hlo) { return ForEachOperandDynamicDimension( hlo, [&](HloInstruction* operand, ShapeIndex index, int64 dimension, - int64 operand_index, HloInstruction* dynamic_size, - DimensionConstraint constraint) { + int64 operand_index, HloInstruction* dynamic_size) { HloInstruction* conv = hlo; const ConvolutionDimensionNumbers& dimension_numbers = conv->convolution_dimension_numbers(); @@ -492,7 +471,7 @@ Status DynamicDimensionInferenceVisitor::HandleConvolution( if (dimension == dimension_numbers.input_batch_dimension()) { parent_->SetDynamicSize(conv, {}, dimension_numbers.output_batch_dimension(), - dynamic_size, constraint); + dynamic_size); return Status::OK(); } @@ -542,20 +521,18 @@ Status DynamicDimensionInferenceVisitor::HandleConcatenate( dim_size_total, dynamic_dim)); } parent_->SetDynamicSize(hlo, {}, hlo->concatenate_dimension(), - dim_size_total, DimensionConstraint(1, 1)); + dim_size_total); } // Simply pass through non-concat dynamic dimensions. return ForEachOperandDynamicDimension( hlo, [&](HloInstruction* operand, ShapeIndex index, int64 dimension, - int64 operand_index, HloInstruction* dynamic_size, - DimensionConstraint constraint) { + int64 operand_index, HloInstruction* dynamic_size) { int64 concatenate_dimension = hlo->concatenate_dimension(); if (concatenate_dimension == dimension) { return Status::OK(); } - parent_->SetDynamicSize(hlo, index, dimension, dynamic_size, - constraint); + parent_->SetDynamicSize(hlo, index, dimension, dynamic_size); return Status::OK(); }); } @@ -596,18 +573,15 @@ Status DynamicDimensionInferenceVisitor::HandleSetDimensionSize( if (!dimension_is_static) { // Propagate dynamic dimension indicated by this set dimension size // instruction. - parent_->SetDynamicSize(hlo, {}, hlo->dimension(), hlo->mutable_operand(1), - DimensionConstraint(1, 1)); + parent_->SetDynamicSize(hlo, {}, hlo->dimension(), hlo->mutable_operand(1)); } // Also Propagate dynamic dimension already set by operands. TF_RETURN_IF_ERROR(ForEachOperandDynamicDimension( hlo, [&](HloInstruction* operand, ShapeIndex index, int64 dimension, - int64 operand_index, HloInstruction* dynamic_size, - DimensionConstraint constraint) { + int64 operand_index, HloInstruction* dynamic_size) { if (dimension != hlo->dimension()) { - parent_->SetDynamicSize(hlo, index, dimension, dynamic_size, - constraint); + parent_->SetDynamicSize(hlo, index, dimension, dynamic_size); } return Status::OK(); })); @@ -619,10 +593,8 @@ Status DynamicDimensionInferenceVisitor::PassThroughDynamicDimension( HloInstruction* hlo) { return ForEachOperandDynamicDimension( hlo, [&](HloInstruction* operand, ShapeIndex index, int64 dimension, - int64 operand_index, HloInstruction* dynamic_size, - DimensionConstraint constraint) { - parent_->SetDynamicSize(hlo, index, dimension, dynamic_size, - constraint); + int64 operand_index, HloInstruction* dynamic_size) { + parent_->SetDynamicSize(hlo, index, dimension, dynamic_size); return Status::OK(); }); } @@ -654,8 +626,7 @@ Status DynamicDimensionInferenceVisitor::HandleReshape(HloInstruction* hlo) { hlo, [&](HloInstruction* operand, ShapeIndex index, int64 input_dynamic_dimension, int64 operand_index, - HloInstruction* operand_dynamic_size, - DimensionConstraint constraint) -> Status { + HloInstruction* operand_dynamic_size) -> Status { HloInstruction* reshape = hlo; if (reshape->shape().rank() == 0) { VLOG(0) << "Reshaping a dynamic dimension into a scalar, which has " @@ -751,9 +722,6 @@ Status DynamicDimensionInferenceVisitor::HandleReshape(HloInstruction* hlo) { if (output_dynamic_dimension == -1 && output_dim_end - output_dim_start > 1) { - // TODO(yunxing): We now have a better way to decide output dimension - // in the bridge. No need for this constraint propagation logic. - // // One input dimension is splitted into multiple output dimensions. // Output dimension is decomposed from input most major dimension. // In this case, we don't know which one is dynamic, e.g., when we @@ -770,61 +738,17 @@ Status DynamicDimensionInferenceVisitor::HandleReshape(HloInstruction* hlo) { // We use the following logics to disambiguate: // 1. If the user sets "inferred_dimension", then use that as // dynamic dimension. + // 2. If the one dimension in the reshape is dynamic, use that as + // dynamic dimension. + // E.g.: + // [<=4] + // | + // reshape + // | + // [1, <=2, 2] + // We use second dim as dynamic dimension. // - // 2. Use the "multiple_of" constraint, e.g, : - // [<=2, 4] - // | Reshape - // [<=8] - // | Reshape - // [2, 4] // Which is dynamic? - // - // If the dynamic value has to be multiple of 4 (constraint - // created by the first reshape), then 2 must be the dynamic - // dimension. - // - // But this logic doesn't help with the case where two - // dimensions are the same: - // - // [<=3, 3] - // | Reshape - // [<=9] - // | Reshape - // [3, 3] // Which is dynamic? - // - // Both dynamic dimension can be multiple of 3. - // - // We then need the next constraint to disambiguate this case: - // - // 3. Use the "stride" constraint (also see the comment at the - // definition): - // - // [<=3, 3] - // | Reshape - // [<=9] // constraint.stride = 1 - // | Reshape - // [3, 3] - // ^ ^ - // | | - // stride= 1 3 - // - // Each dimension will have different strides, only one will - // satisfy the stride constraint. - // - // Note that the stride constrint itself is not enough: - // - // - // [<=128] - // | Reshape - // [1, 128] - // ^ ^ - // | | - // stride= 1 1 - // - // In this case, both dimensions have the same stride, which is - // ambiguous. That's why we need the "multiple_of" constraint - // as used above. - // - // 4. If all logics above cannot disambiguate, e.g.,: + // 3. If all logics above cannot disambiguate, e.g.,: // // [<=1] // | @@ -833,68 +757,15 @@ Status DynamicDimensionInferenceVisitor::HandleReshape(HloInstruction* hlo) { // [1, 1, 1] // // We bail out and return an error. + // TODO(yunxing): Further simplify this, remove 1. and fully rely + // on 2. output_dynamic_dimension = reshape->inferred_dimension(); if (output_dynamic_dimension == -1) { - // The user of XLA didn't specify a dynamic dimension, try infer - // it from the current constraint. - // - // Find all output dimensions that are decomposed from the first - // dimension. Among those dimensions, find all dimensions that - // satisfy the constraint of the dynamic dimension. In the - // previous example, if `a` is 9 and constraint is a multiple of - // `3', then in the output shape both a/c and c can be dynamic. - int64 current_product = 1; - int64 dimension_iter = output_dim_start; - - // compatible_dimensions are dimensions that satisfies - // "multiple_of" constraints. - std::vector compatible_dimensions; - while (current_product < - operand->shape().dimensions(input_dynamic_dimension)) { - current_product *= reshape->shape().dimensions(dimension_iter); - if (operand->shape().dimensions(input_dynamic_dimension) / - reshape->shape().dimensions(dimension_iter) == - constraint.multiple_of) { - compatible_dimensions.push_back(dimension_iter); + // Try find dynamic dimension from the result shape. + for (int64 i = 0; i < reshape->shape().rank(); ++i) { + if (reshape->shape().is_dynamic_dimension(i)) { + output_dynamic_dimension = i; } - dimension_iter++; - } - CHECK_EQ(current_product, - operand->shape().dimensions(input_dynamic_dimension)) - << "Not a valid reshape: " << hlo->ToString(); - // If there is only one compatible dimension, it must be the - // dynamic one in the output. - if (compatible_dimensions.size() == 1) { - output_dynamic_dimension = compatible_dimensions[0]; - } - - // When there are multiple compatible dimensions, e.g: - // [<=9] - // | Reshape - // [3, 3] - // Use stride constraint to figure out which one is the true - // dynamic one. - // - // [<=9] - // | Reshape - // [3, 3] - // ^ ^ - // | | - // stride= 1 3 - // - std::vector compatible_dimensions_with_stride; - absl::c_copy_if( - compatible_dimensions, - std::back_inserter(compatible_dimensions_with_stride), - [&](int64 dimension) { - int64 stride_total = 1; - for (int64 i = 0; i < dimension + 1; ++i) { - stride_total *= reshape->shape().dimensions(dimension); - } - return stride_total == constraint.stride; - }); - if (compatible_dimensions_with_stride.size() == 1) { - output_dynamic_dimension = compatible_dimensions_with_stride[0]; } } @@ -914,9 +785,8 @@ Status DynamicDimensionInferenceVisitor::HandleReshape(HloInstruction* hlo) { return InvalidArgument( "Reshape's input dynamic dimension is decomposed into " "multiple output dynamic dimensions, but the constraint is " - "ambiguous and XLA can't infer the output dimension %s. " - "Constraint: multiple_of: %lld, stride: %lld", - hlo->ToString(), constraint.multiple_of, constraint.stride); + "ambiguous and XLA can't infer the output dimension %s. ", + hlo->ToString()); } } @@ -931,7 +801,7 @@ Status DynamicDimensionInferenceVisitor::HandleReshape(HloInstruction* hlo) { if (input_dim_size == output_dim_size) { // Simply forward dynamic dimension. parent_->SetDynamicSize(reshape, {}, output_dynamic_dimension, - operand_dynamic_size, constraint); + operand_dynamic_size); } if (input_dim_size > output_dim_size) { @@ -946,9 +816,8 @@ Status DynamicDimensionInferenceVisitor::HandleReshape(HloInstruction* hlo) { operand_dynamic_size->shape(), HloOpcode::kDivide, operand_dynamic_size, divisor_hlo)); - parent_->SetDynamicSize( - reshape, {}, output_dynamic_dimension, new_dynamic_size, - DimensionConstraint(1, constraint.multiple_of / divisor)); + parent_->SetDynamicSize(reshape, {}, output_dynamic_dimension, + new_dynamic_size); } if (input_dim_size < output_dim_size) { @@ -985,12 +854,8 @@ Status DynamicDimensionInferenceVisitor::HandleReshape(HloInstruction* hlo) { hlo->parent()->AddInstruction(HloInstruction::CreateBinary( output_dynamic_size->shape(), HloOpcode::kMultiply, new_dynamic_size, operand_dynamic_size)); - int64 new_multiple_of_constraint = - constraint.multiple_of * output_dim_size / - operand->shape().dimensions(input_dynamic_dimension); - parent_->SetDynamicSize( - reshape, {}, output_dynamic_dimension, new_dynamic_size, - DimensionConstraint(1, new_multiple_of_constraint)); + parent_->SetDynamicSize(reshape, {}, output_dynamic_dimension, + new_dynamic_size); } return Status::OK(); @@ -1001,8 +866,7 @@ Status DynamicDimensionInferenceVisitor::HandleReduceWindow( HloInstruction* hlo) { return ForEachOperandDynamicDimension( hlo, [&](HloInstruction* operand, ShapeIndex index, int64 dimension, - int64 operand_index, HloInstruction* dynamic_size, - DimensionConstraint constraint) { + int64 operand_index, HloInstruction* dynamic_size) { HloInstruction* reduce_window = hlo; const WindowDimension& window_dimension = reduce_window->window().dimensions(dimension); @@ -1013,8 +877,7 @@ Status DynamicDimensionInferenceVisitor::HandleReduceWindow( reduce_window->ToString()); } - parent_->SetDynamicSize(reduce_window, {}, dimension, dynamic_size, - constraint); + parent_->SetDynamicSize(reduce_window, {}, dimension, dynamic_size); return Status::OK(); }); @@ -1024,8 +887,7 @@ Status DynamicDimensionInferenceVisitor::HandleSelectAndScatter( HloInstruction* hlo) { return ForEachOperandDynamicDimension( hlo, [&](HloInstruction* operand, ShapeIndex index, int64 dimension, - int64 operand_index, HloInstruction* dynamic_size, - DimensionConstraint constraint) { + int64 operand_index, HloInstruction* dynamic_size) { HloInstruction* select_and_scatter = hlo; const WindowDimension& window_dimension = select_and_scatter->window().dimensions(dimension); @@ -1036,8 +898,8 @@ Status DynamicDimensionInferenceVisitor::HandleSelectAndScatter( select_and_scatter->ToString()); } - parent_->SetDynamicSize(select_and_scatter, {}, dimension, dynamic_size, - constraint); + parent_->SetDynamicSize(select_and_scatter, {}, dimension, + dynamic_size); return Status::OK(); }); @@ -1046,8 +908,7 @@ Status DynamicDimensionInferenceVisitor::HandleSelectAndScatter( Status DynamicDimensionInferenceVisitor::HandleSlice(HloInstruction* hlo) { return ForEachOperandDynamicDimension( hlo, [&](HloInstruction* operand, ShapeIndex /*index*/, int64 dimension, - int64 /*operand_index*/, HloInstruction* dynamic_size, - DimensionConstraint constraint) { + int64 /*operand_index*/, HloInstruction* dynamic_size) { if (hlo->slice_starts(dimension) != 0 || hlo->slice_strides(dimension) != 1 || hlo->slice_limits(dimension) != @@ -1056,7 +917,7 @@ Status DynamicDimensionInferenceVisitor::HandleSlice(HloInstruction* hlo) { return Status::OK(); } - parent_->SetDynamicSize(hlo, {}, dimension, dynamic_size, constraint); + parent_->SetDynamicSize(hlo, {}, dimension, dynamic_size); return Status::OK(); }); @@ -1066,8 +927,7 @@ Status DynamicDimensionInferenceVisitor::HandleDynamicSlice( HloInstruction* hlo) { return ForEachOperandDynamicDimension( hlo, [&](HloInstruction*, ShapeIndex /*index*/, int64 dimension, - int64 /*operand_index*/, HloInstruction* dynamic_size, - DimensionConstraint constraint) { + int64 /*operand_index*/, HloInstruction* dynamic_size) { if (hlo->shape().dimensions(dimension) != hlo->operand(0)->shape().dimensions(dimension)) { // Slicing a single element out kills the dynamic dimension. @@ -1080,7 +940,7 @@ Status DynamicDimensionInferenceVisitor::HandleDynamicSlice( hlo->ToString()); } - parent_->SetDynamicSize(hlo, {}, dimension, dynamic_size, constraint); + parent_->SetDynamicSize(hlo, {}, dimension, dynamic_size); return Status::OK(); }); @@ -1089,9 +949,9 @@ Status DynamicDimensionInferenceVisitor::HandleDynamicSlice( Status DynamicDimensionInferenceVisitor::HandleDynamicUpdateSlice( HloInstruction* hlo) { return ForEachOperandDynamicDimension( - hlo, [&](HloInstruction* /*operand*/, ShapeIndex /*index*/, - int64 dimension, int64 /*operand_index*/, - HloInstruction* dynamic_size, DimensionConstraint constraint) { + hlo, + [&](HloInstruction* /*operand*/, ShapeIndex /*index*/, int64 dimension, + int64 /*operand_index*/, HloInstruction* dynamic_size) { if (hlo->shape().dimensions(dimension) != hlo->operand(0)->shape().dimensions(dimension)) { return Unimplemented( @@ -1100,7 +960,7 @@ Status DynamicDimensionInferenceVisitor::HandleDynamicUpdateSlice( hlo->ToString()); } - parent_->SetDynamicSize(hlo, {}, dimension, dynamic_size, constraint); + parent_->SetDynamicSize(hlo, {}, dimension, dynamic_size); return Status::OK(); }); @@ -1108,16 +968,16 @@ Status DynamicDimensionInferenceVisitor::HandleDynamicUpdateSlice( Status DynamicDimensionInferenceVisitor::HandleReverse(HloInstruction* hlo) { return ForEachOperandDynamicDimension( - hlo, [&](HloInstruction* /*operand*/, ShapeIndex /*index*/, - int64 dimension, int64 /*operand_index*/, - HloInstruction* dynamic_size, DimensionConstraint constraint) { + hlo, + [&](HloInstruction* /*operand*/, ShapeIndex /*index*/, int64 dimension, + int64 /*operand_index*/, HloInstruction* dynamic_size) { if (absl::c_linear_search(hlo->dimensions(), dimension)) { return Unimplemented( "Dynamic dimension propagation on reversed dimension is not " "supported %s", hlo->ToString()); } - parent_->SetDynamicSize(hlo, {}, dimension, dynamic_size, constraint); + parent_->SetDynamicSize(hlo, {}, dimension, dynamic_size); return Status::OK(); }); @@ -1127,7 +987,7 @@ Status DynamicDimensionInferenceVisitor::HandleGather(HloInstruction* hlo) { return ForEachOperandDynamicDimension( hlo, [&](HloInstruction* operand, ShapeIndex /*index*/, int64 input_dynamic_dimension, int64 operand_index, - HloInstruction* dynamic_size, DimensionConstraint constraint) { + HloInstruction* dynamic_size) { const GatherDimensionNumbers& gather_dims = hlo->gather_dimension_numbers(); if (operand_index != 1) { @@ -1147,8 +1007,7 @@ Status DynamicDimensionInferenceVisitor::HandleGather(HloInstruction* hlo) { output_dimension--; } } - parent_->SetDynamicSize(hlo, {}, output_dimension, dynamic_size, - constraint); + parent_->SetDynamicSize(hlo, {}, output_dimension, dynamic_size); return Status::OK(); } return Unimplemented( @@ -1171,8 +1030,7 @@ Status DynamicDimensionInferenceVisitor::HandleGather(HloInstruction* hlo) { indices_dim++; } if (indices_dim++ == input_dynamic_dimension) { - parent_->SetDynamicSize(hlo, {}, output_dim, dynamic_size, - constraint); + parent_->SetDynamicSize(hlo, {}, output_dim, dynamic_size); return Status::OK(); } } @@ -1220,8 +1078,7 @@ Status DynamicDimensionInferenceVisitor::HandleConditional( TF_RETURN_IF_ERROR(ForEachDynamicDimensionInOperand( hlo, operand_index, [&](HloInstruction*, ShapeIndex, int64, int64, - HloInstruction* dynamic_size, - DimensionConstraint constraint) -> Status { + HloInstruction* dynamic_size) -> Status { TF_RET_CHECK(hlo->operand(operand_index)->shape().IsTuple()) << "Only tuple typed inputs can have dynamic dimension. Please " "file a bug against XLA team."; @@ -1263,8 +1120,7 @@ Status DynamicDimensionInferenceVisitor::HandleConditional( TF_RETURN_IF_ERROR(ForEachDynamicDimensionInOperand( hlo, operand_index, [&](HloInstruction*, ShapeIndex index, int64 dimension, - int64 operand_index, HloInstruction* dynamic_size, - DimensionConstraint constraint) { + int64 operand_index, HloInstruction* dynamic_size) { DynamicParameterBinding::DynamicParameter dynamic_parameter{ 0, {dynamic_size_to_operand_id_index_map[dynamic_size]}}; DynamicParameterBinding::DynamicDimension dynamic_dimension{ @@ -1284,8 +1140,8 @@ Status DynamicDimensionInferenceVisitor::HandleConditional( // that into the root instruction as additional tuple elements. TF_RETURN_IF_ERROR(ForEachDynamicDimension( new_computation->root_instruction(), - [&](ShapeIndex index, int64 dim, HloInstruction* dynamic_size, - DimensionConstraint) -> Status { + [&](ShapeIndex index, int64 dim, + HloInstruction* dynamic_size) -> Status { TF_RET_CHECK(hlo->shape().IsTuple()) << "Only tuple typed conditionals can have dynamic dimension. " "Please file a bug against XLA team."; @@ -1347,11 +1203,9 @@ Status DynamicDimensionInferenceVisitor::HandleScatter(HloInstruction* hlo) { return ForEachOperandDynamicDimension( hlo, [&](HloInstruction* /*operand*/, ShapeIndex /*index*/, int64 dimension, - int64 operand_index, HloInstruction* operand_dynamic_size, - DimensionConstraint constraint) { + int64 operand_index, HloInstruction* operand_dynamic_size) { if (operand_index == 0) { - parent_->SetDynamicSize(hlo, {}, dimension, operand_dynamic_size, - constraint); + parent_->SetDynamicSize(hlo, {}, dimension, operand_dynamic_size); return Status::OK(); } @@ -1385,7 +1239,7 @@ Status DynamicDimensionInferenceVisitor::HandleWhile(HloInstruction* hlo) { int64 operand_count = original_tuple_count; TF_RETURN_IF_ERROR(ForEachOperandDynamicDimension( hlo, [&](HloInstruction*, ShapeIndex index, int64 dim, int64, - HloInstruction* dynamic_size, DimensionConstraint constraint) { + HloInstruction* dynamic_size) { operands_to_add.push_back(dynamic_size); dynamic_output_mapping.mutable_element(index)->emplace(dim, operand_count++); @@ -1413,8 +1267,7 @@ Status DynamicDimensionInferenceVisitor::HandleWhile(HloInstruction* hlo) { TF_RETURN_IF_ERROR(ForEachOperandDynamicDimension( hlo, [&](HloInstruction*, ShapeIndex index, int64 dimension, - int64 operand_index, HloInstruction* dynamic_size, - DimensionConstraint constraint) -> Status { + int64 operand_index, HloInstruction* dynamic_size) -> Status { TF_RET_CHECK(!operands_to_add.empty()); const int64 output_dynamic_size_index = dynamic_output_mapping.element(index).at(dimension); @@ -1431,7 +1284,7 @@ Status DynamicDimensionInferenceVisitor::HandleWhile(HloInstruction* hlo) { ShapeUtil::MakeScalarShape(S32), hlo, output_dynamic_size_index)); parent_->SetDynamicSize(result.replacement_instr, index, dimension, - output_dynamic_size, constraint); + output_dynamic_size); return Status::OK(); })); // Set the replacement instruction as visited to avoid visiting it again. @@ -1465,8 +1318,7 @@ Status DynamicDimensionInferenceVisitor::HandleWhile(HloInstruction* hlo) { // Add dynamic dimension size as new parameters. TF_RETURN_IF_ERROR(ForEachDynamicDimension( hlo->while_body()->root_instruction(), - [&](ShapeIndex index, int64 dim, HloInstruction* dynamic_size, - DimensionConstraint) -> Status { + [&](ShapeIndex index, int64 dim, HloInstruction* dynamic_size) -> Status { const int64 output_index = dynamic_output_mapping.element(index).at(dim); new_root_operands[output_index] = dynamic_size; @@ -1503,8 +1355,7 @@ Status DynamicDimensionInferenceVisitor::HandleParameter(HloInstruction* hlo) { parent_->SetDynamicSize(target_parameter, dynamic_dimension.parameter_index, - dynamic_dimension.dimension, dynamic_size, - DimensionConstraint(1, 1)); + dynamic_dimension.dimension, dynamic_size); return Status::OK(); }); } @@ -1517,10 +1368,8 @@ Status DynamicDimensionInferenceVisitor::ForEachDynamicDimension( HloInstruction* dynamic_size = parent_->GetDynamicSize( dynamic_dimension.inst, dynamic_dimension.index, dynamic_dimension.dim); - CHECK_NE(parent_->constraint_mapping_.count(dynamic_dimension), 0); - TF_RETURN_IF_ERROR(fn(dynamic_dimension.index, dynamic_dimension.dim, - dynamic_size, - parent_->constraint_mapping_[dynamic_dimension])); + TF_RETURN_IF_ERROR( + fn(dynamic_dimension.index, dynamic_dimension.dim, dynamic_size)); } } return Status::OK(); @@ -1536,10 +1385,9 @@ Status DynamicDimensionInferenceVisitor::ForEachDynamicDimensionInOperand( HloInstruction* dynamic_size = parent_->GetDynamicSize( dynamic_dimension.inst, dynamic_dimension.index, dynamic_dimension.dim); - CHECK_NE(parent_->constraint_mapping_.count(dynamic_dimension), 0); TF_RETURN_IF_ERROR(fn(dynamic_dimension.inst, dynamic_dimension.index, - dynamic_dimension.dim, operand_index, dynamic_size, - parent_->constraint_mapping_[dynamic_dimension])); + dynamic_dimension.dim, operand_index, + dynamic_size)); } } return Status::OK(); @@ -1555,6 +1403,24 @@ Status DynamicDimensionInferenceVisitor::ForEachOperandDynamicDimension( return Status::OK(); } +void DynamicDimensionInference::SetDynamicSize(HloInstruction* inst, + const ShapeIndex& index, + int64 dim, + HloInstruction* size) { + VLOG(1) << "Set dimension inst " << inst->ToString() << " index " + << index.ToString() << "@" << dim << " to " << size->ToShortString(); + Shape subshape = ShapeUtil::GetSubshape(inst->shape(), index); + CHECK(!subshape.IsTuple()) << "Can't set a tuple shape to dynamic dimension"; + CHECK(dim < subshape.rank() && dim >= 0) + << "Asked to set invalid dynamic dimension. Shape: " + << subshape.ToString() << ", Dimension: " << dim; + DynamicDimension dynamic_dimension{inst, index, dim}; + // Updating a dynamic dimension twice overwrites the previous one. + dynamic_mapping_[dynamic_dimension] = size; + auto iter = per_hlo_dynamic_dimensions_.try_emplace(inst); + iter.first->second.emplace(dynamic_dimension); +} + void DynamicDimensionInference::CopyMapping(HloInstruction* from, HloInstruction* to) { auto iter = per_hlo_dynamic_dimensions_.find(from); @@ -1564,7 +1430,7 @@ void DynamicDimensionInference::CopyMapping(HloInstruction* from, GetDynamicSize(dynamic_dimension.inst, dynamic_dimension.index, dynamic_dimension.dim); SetDynamicSize(to, dynamic_dimension.index, dynamic_dimension.dim, - dynamic_size, constraint_mapping_[dynamic_dimension]); + dynamic_size); } } } @@ -1624,8 +1490,6 @@ Status DynamicDimensionInference::ForwardDynamicSize(HloInstruction* inst, auto iter = dynamic_mapping_.find(dynamic_dimension); if (iter != dynamic_mapping_.end()) { dynamic_mapping_.insert({dynamic_dimension_new, iter->second}); - constraint_mapping_.insert( - {dynamic_dimension_new, constraint_mapping_[dynamic_dimension]}); auto iter = per_hlo_dynamic_dimensions_.try_emplace(new_inst); iter.first->second.emplace(dynamic_dimension_new); } diff --git a/tensorflow/compiler/xla/service/dynamic_dimension_inference.h b/tensorflow/compiler/xla/service/dynamic_dimension_inference.h index 607d68bd9c3..1597538e9ac 100644 --- a/tensorflow/compiler/xla/service/dynamic_dimension_inference.h +++ b/tensorflow/compiler/xla/service/dynamic_dimension_inference.h @@ -55,8 +55,7 @@ class DynamicDimensionInference { // go into tuples. bool HasDynamicDimension(HloInstruction* inst) const; - // Forward dynamic dimension size at `dim` and its constraint from `inst` to - // `new_inst`. + // Forward dynamic dimension size at `dim` from `inst` to `new_inst`. Status ForwardDynamicSize(HloInstruction* inst, HloInstruction* new_inst, const ShapeIndex& index); @@ -64,9 +63,7 @@ class DynamicDimensionInference { // `inst` at `index` has a dynamic size, and its runtime size is represented // by a scalar instruction `size`. void SetDynamicSize(HloInstruction* inst, const ShapeIndex& index, int64 dim, - HloInstruction* size) { - SetDynamicSize(inst, index, dim, size, DimensionConstraint(1, 1)); - } + HloInstruction* size); // For all tensors whose dynamic dimension is `replace`, replace them with // `with`. @@ -106,116 +103,6 @@ class DynamicDimensionInference { } }; - // DimensionConstraint is attached to each dynamic dimension and describe the - // constraint of each dimension. This is used to disambiguate the index of - // dynamic dimension for reshapes that "splits" a dimension into two. - // - // As an example, consider the following reshapes: - // [<=3, 3] <- Assume first dimension is dynamic. - // | - // Reshape.1 - // | - // [<=9] <- Dimension 9 is dynamic - // | - // Reshape.2 - // | - // [3, 3] <- Ambiguous dimension after splitting 9 into [3, 3] - // - // There is no way to know which dimension is dynamic by looking at the second - // reshape locally. - // - // However, if we look at the dynamic dimension 9, since it comes from - // collapsing a major dynamic dimension of 3 (the dynamic size can be 0, 1, 2, - // 3, denoted as i in the diagram below) and a minor static dimension of 3, we - // know it has certain constraints that the reshape can only be one of the 4 - // forms: - // - // o: Padded Data - // x: Effective Data - // - // [<=3, 3] to [9] - // - // +---+ +---+ +---+ +---+ - // |ooo| |ooo| |ooo| |xxx| - // |ooo| |ooo| |xxx| |xxx| - // |ooo| |xxx| |xxx| |xxx| - // +---+ +---+ +---+ +---+ - // - // Reshape Reshape Reshape Reshape - // - // +-----------+ +-----------+ +-----------+ +-----------+ - // |ooo|ooo|ooo| or |xxx|ooo|ooo| or |xxx|xxx|ooo| or |xxx|xxx|xxx| stride=1 - // +-----------+ +-----------+ +-----------+ +-----------+ - // i = 0 i = 1 i = 2 i = 3 - // - // On the other hand, if the minor dimension 3 is dynamic and major dimension - // is static, we will have the following form: - // - // [3, <=3] to [9] - // - // +---+ +---+ +---+ +---+ - // |ooo| |xoo| |xxo| |xxx| - // |ooo| |xoo| |xxo| |xxx| - // |ooo| |xoo| |xxo| |xxx| - // +---+ +---+ +---+ +---+ - // - // Reshape Reshape Reshape Reshape - // - // +-----------+ +-----------+ +-----------+ +-----------+ - // |ooo|ooo|ooo| or |xoo|xoo|xoo| or |xxo|xxo|xxo| or |xxo|xxo|xxo| stride=3 - // +-----------+ +-----------+ +-----------+ +-----------+ - // i = 0 i = 1 i = 2 i = 3 - // - // By encoding constraint as a stride of elements we can recover this - // information later when we reshape from [9] to [3, 3]. We know which form - // ([3, i] or [i,3]) we should reshape the [9] into. - // - // - struct DimensionConstraint { - explicit DimensionConstraint(int64 s, int64 m) - : stride(s), multiple_of(m) {} - DimensionConstraint() : stride(1), multiple_of(1) {} - // Stride represents the distance of a newly placed element and the previous - // placed element on this dynamic dimension. - int64 stride; - - // multiple_of represents the constraints that - // - // `dynamic_size` % `multiple_of` == 0 - int64 multiple_of; - }; - - using ConstraintMapping = - absl::flat_hash_map; - - ConstraintMapping constraint_mapping_; - - // Update the dynamic mapping so that we know dimension `dim` of instruction - // `inst` at `index` has a dynamic size, and its runtime size is represented - // by a scalar instruction `size`. - void SetDynamicSize(HloInstruction* inst, const ShapeIndex& index, int64 dim, - HloInstruction* size, DimensionConstraint constraint) { - VLOG(1) << "Set dimension inst " << inst->ToString() << " index " - << index.ToString() << "@" << dim << " to " << size->ToShortString() - << " constraint: " << constraint.multiple_of; - Shape subshape = ShapeUtil::GetSubshape(inst->shape(), index); - CHECK(!subshape.IsTuple()) - << "Can't set a tuple shape to dynamic dimension"; - CHECK(dim < subshape.rank() && dim >= 0) - << "Asked to set invalid dynamic dimension. Shape: " - << subshape.ToString() << ", Dimension: " << dim; - DynamicDimension dynamic_dimension{inst, index, dim}; - // Updating a dynamic dimension twice overwrites the previous one. - dynamic_mapping_[dynamic_dimension] = size; - if (constraint_mapping_.count(dynamic_dimension) != 0) { - CHECK_EQ(constraint_mapping_[dynamic_dimension].stride, - constraint.stride); - } - constraint_mapping_[dynamic_dimension] = constraint; - auto iter = per_hlo_dynamic_dimensions_.try_emplace(inst); - iter.first->second.emplace(dynamic_dimension); - } - // Copies the internal mapping from instruction `from` to instruction `to`. // This is useful when an instruction is replaced by the other during the // inferencing process. From 5ed81c1a8095d1583c26b7113e23755b89239090 Mon Sep 17 00:00:00 2001 From: Jian Li Date: Thu, 23 Jul 2020 14:50:16 -0700 Subject: [PATCH 1211/2522] Enable error report for all unsupported ops in post training quantization. PiperOrigin-RevId: 322871464 Change-Id: Ica684531b8be4a214ea07f1862bbd782125fa8fb --- tensorflow/lite/tools/optimize/quantize_model.cc | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/tools/optimize/quantize_model.cc b/tensorflow/lite/tools/optimize/quantize_model.cc index 5cca49ede28..ca9a51abefe 100644 --- a/tensorflow/lite/tools/optimize/quantize_model.cc +++ b/tensorflow/lite/tools/optimize/quantize_model.cc @@ -931,6 +931,10 @@ TfLiteStatus QuantizeWeightsInputOutput( const std::unordered_set& operator_names, const std::unordered_set& real_value_op_set, const TensorType& activations_type, ErrorReporter* error_reporter) { + // Flag to track unsupported ops. + bool quantization_not_supported = false; + + // Loop over the graph and quantize ops. for (size_t subgraph_idx = 0; subgraph_idx < model->subgraphs.size(); subgraph_idx++) { SubGraphT* subgraph = model->subgraphs.at(subgraph_idx).get(); @@ -952,12 +956,12 @@ TfLiteStatus QuantizeWeightsInputOutput( error_reporter, "Quantization to 16x8-bit not yet supported for op: '%s'.\n", EnumNameBuiltinOperator(op_code)); - return kTfLiteError; + quantization_not_supported = true; } else if (!property.quantizable && !allow_float) { TF_LITE_REPORT_ERROR(error_reporter, "Quantization not yet supported for op: '%s'.\n", EnumNameBuiltinOperator(op_code)); - return kTfLiteError; + quantization_not_supported = true; } // Quantize operator inputs/weights. @@ -977,6 +981,11 @@ TfLiteStatus QuantizeWeightsInputOutput( } } } + + // Return; emit errors if there are any. + if (quantization_not_supported) { + return kTfLiteError; + } return kTfLiteOk; } From e8c1ea7130a028b7e91bb5a3a2fb92c1ad8a848f Mon Sep 17 00:00:00 2001 From: Pablo Samuel Castro Date: Thu, 23 Jul 2020 14:57:37 -0700 Subject: [PATCH 1212/2522] Visibility change for internal package. PiperOrigin-RevId: 322872965 Change-Id: I31f389ca4d038d2f90dcaa36e23fa35990005694 --- tensorflow/python/BUILD | 1 + tensorflow/python/data/BUILD | 1 + 2 files changed, 2 insertions(+) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 2c11ecfce7e..a10913b6c4b 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3135,6 +3135,7 @@ py_library( "ops/inplace_ops.py", ], srcs_version = "PY2AND3", + visibility = visibility, deps = [ ":array_ops_gen", ":common_shapes", diff --git a/tensorflow/python/data/BUILD b/tensorflow/python/data/BUILD index d4f4f8055d7..882cc66b673 100644 --- a/tensorflow/python/data/BUILD +++ b/tensorflow/python/data/BUILD @@ -9,6 +9,7 @@ py_library( name = "data", srcs = ["__init__.py"], srcs_version = "PY2AND3", + visibility = ["//tensorflow:internal"], deps = [ "//tensorflow/python:util", "//tensorflow/python/data/experimental", From ba6d661ba81671aba5c7ce30e000b40d71f02b58 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Thu, 23 Jul 2020 22:08:33 +0000 Subject: [PATCH 1213/2522] clean up --- tensorflow/c/kernels/ops/summary.cc | 4 ---- tensorflow/c/kernels/summary_op.cc | 8 +++----- tensorflow/c/kernels/tensor_shape_utils_test.cc | 2 +- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/tensorflow/c/kernels/ops/summary.cc b/tensorflow/c/kernels/ops/summary.cc index 20a935aeb0a..a9c3b697f14 100644 --- a/tensorflow/c/kernels/ops/summary.cc +++ b/tensorflow/c/kernels/ops/summary.cc @@ -22,10 +22,6 @@ static void scalar_summary_shape_inference_fn(TF_ShapeInferenceContext* ctx, TF_SetStatus(status, TF_OK, ""); TF_ShapeHandle* result = TF_ShapeInferenceContextScalar(ctx); TF_ShapeInferenceContextSetOutput(ctx, 0, result, status); - if (TF_GetCode(status) != TF_OK) { - TF_SetStatus(status, TF_INVALID_ARGUMENT, - "Error in setting output shape inference"); - } TF_DeleteShapeHandle(result); } diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index 6b611be7e4f..5d98d0c0477 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -47,7 +47,6 @@ struct Params { } }; - // dummy functions used for kernel registration void* ScalarSummaryOp_Create(TF_OpKernelConstruction* ctx) { return nullptr; @@ -66,7 +65,7 @@ tensorflow::string SingleTag(TF_Tensor* tags); template void ScalarSummaryOp_Compute(void* kernel, TF_OpKernelContext* ctx) { Params params(ctx); - if (TF_GetCode(params.status) != TF_OK){ + if (TF_GetCode(params.status) != TF_OK) { TF_OpKernelContext_Failure(ctx, params.status); return; } @@ -167,6 +166,5 @@ TF_ATTRIBUTE_UNUSED bool IsScalarSummaryOpKernelRegistered = []() { RegisterScalarSummaryOpKernel(); } return true; -}(); - -} // namespace +}(); +} // namespace diff --git a/tensorflow/c/kernels/tensor_shape_utils_test.cc b/tensorflow/c/kernels/tensor_shape_utils_test.cc index a08e4a67e3e..23e5940dc7b 100644 --- a/tensorflow/c/kernels/tensor_shape_utils_test.cc +++ b/tensorflow/c/kernels/tensor_shape_utils_test.cc @@ -29,7 +29,7 @@ namespace { // once out of scope. struct TF_TensorWrapper { TF_Tensor* tf_tensor; - TF_TensorWrapper(TF_Tensor* tensor){ + TF_TensorWrapper(TF_Tensor* tensor) { tf_tensor = tensor; } ~TF_TensorWrapper() { From 284f16a59bf03dac279967a1cd2c0d2b89edc9a0 Mon Sep 17 00:00:00 2001 From: Vignesh Kothapalli Date: Fri, 24 Jul 2020 04:02:34 +0530 Subject: [PATCH 1214/2522] added missing docstrings in dataset_utils module --- .../keras/preprocessing/dataset_utils.py | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/keras/preprocessing/dataset_utils.py b/tensorflow/python/keras/preprocessing/dataset_utils.py index 1c9d283c2f1..055f37e5ca2 100644 --- a/tensorflow/python/keras/preprocessing/dataset_utils.py +++ b/tensorflow/python/keras/preprocessing/dataset_utils.py @@ -189,6 +189,19 @@ def get_training_or_validation_split(samples, labels, validation_split, subset): def labels_to_dataset(labels, label_mode, num_classes): + """Create a tf.data.Dataset from the list/tuple of labels. + + Args: + labels: list/tuple of labels to be converted into a tf.data.Dataset. + label_mode: + - 'binary' indicates that the labels (there can be only 2) + are encoded as `float32` scalars with values 0 or 1 + (e.g. for `binary_crossentropy`). + - 'categorical' means that the labels are + mapped into a categorical vector. + (e.g. for `categorical_crossentropy` loss). + num_classes: number of classes of labels. + """ label_ds = dataset_ops.Dataset.from_tensor_slices(labels) if label_mode == 'binary': label_ds = label_ds.map( @@ -199,7 +212,17 @@ def labels_to_dataset(labels, label_mode, num_classes): def check_validation_split_arg(validation_split, subset, shuffle, seed): - """Raise errors in case of invalid argument values.""" + """Raise errors in case of invalid argument values. + + Args: + shuffle: Whether to shuffle the data. Default: True. + If set to False, sorts the data in alphanumeric order. + seed: Optional random seed for shuffling and transformations. + validation_split: Optional float between 0 and 1, + fraction of data to reserve for validation. + subset: One of "training" or "validation". + Only used if `validation_split` is set. + """ if validation_split and not 0 < validation_split < 1: raise ValueError( '`validation_split` must be between 0 and 1, received: %s' % From f3dd9c4db29f7ef2f154b781d765080477531996 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Thu, 23 Jul 2020 15:29:56 -0700 Subject: [PATCH 1215/2522] Migrate Linux GPU official release and nightly testing builds to use the new bazel configs. PiperOrigin-RevId: 322879678 Change-Id: I45e432b3c6db7b6d198e3e08720c854ef191e573 --- .bazelrc | 4 +++- .../release/ubuntu_16/gpu_py35_full/pip.sh | 20 +++--------------- .../release/ubuntu_16/gpu_py35_full/pip_v1.sh | 19 +++-------------- .../release/ubuntu_16/gpu_py36_full/pip.sh | 20 +++--------------- .../release/ubuntu_16/gpu_py37_full/pip.sh | 20 +++--------------- .../release/ubuntu_16/gpu_py37_full/pip_v1.sh | 21 ++++--------------- .../release/ubuntu_16/gpu_py38_full/pip.sh | 20 +++--------------- 7 files changed, 22 insertions(+), 102 deletions(-) diff --git a/.bazelrc b/.bazelrc index 6fe60261538..6a448b267e0 100644 --- a/.bazelrc +++ b/.bazelrc @@ -550,6 +550,7 @@ try-import %workspace%/.bazelrc.user # Here are bazelrc configs for release builds build:release_common --config=opt build:release_common --config=v2 +build:release_common --distinct_host_configuration=false build:release_common --action_env TF_CONFIGURE_IOS="0" build:release_cpu_linux --config=release_common @@ -567,9 +568,10 @@ build:release_gpu_common --config=tensorrt build:release_gpu_common --action_env CUDA_TOOLKIT_PATH="/usr/local/cuda-10.1" build:release_gpu_common --action_env=TF_CUDA_VERSION="10" build:release_gpu_common --action_env=TF_CUDNN_VERSION="7" +build:release_gpu_common --action_env=TF_NEED_TENSORRT="1" build:release_gpu_common --action_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_35,sm_37,sm_52,sm_60,sm_61,compute_70" build:release_gpu_common --action_env=TENSORRT_INSTALL_PATH="/usr/local/tensorrt" -build:release_gpu_common --action_env=LD_LIBRARY_PATH="/usr/local/tensorrt/lib" +build:release_gpu_common --action_env=LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/tensorrt/lib" build:release_gpu_common --action_env=GCC_HOST_COMPILER_PATH="/usr/bin/gcc-5" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip.sh index abf5c1db4b4..b726a85e564 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip.sh @@ -28,20 +28,7 @@ export CONTAINER_TYPE="GPU" export TF_PYTHON_VERSION='python3.5' # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=1 -export TF_CUDA_VERSION=10 -export TF_CUDNN_VERSION=7 -export TF_NEED_TENSORRT=1 -export TENSORRT_INSTALL_PATH=/usr/local/tensorrt -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) -export PROJECT_NAME="tensorflow_gpu" -export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 - yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. @@ -49,18 +36,17 @@ source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh # Export optional variables for running pip.sh export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py35' -export TF_BUILD_FLAGS="--config=opt --config=v2 --config=cuda --distinct_host_configuration=false \ ---action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain " +export TF_BUILD_FLAGS="--config=release_gpu_linux " export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filters=${TF_TEST_FILTER_TAGS} \ --distinct_host_configuration=false \ ---action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --test_env=TF2_BEHAVIOR=1 \ +--action_env=TF_CUDA_VERSION=10 --action_env=TF_CUDNN_VERSION=7 --test_env=TF2_BEHAVIOR=1 \ --config=cuda --test_output=errors --local_test_jobs=4 --test_lang_filters=py \ --verbose_failures=true --keep_going --define=no_tensorflow_py_deps=true \ --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute " export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" #export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. -export TF_PROJECT_NAME=${PROJECT_NAME} +export TF_PROJECT_NAME="tensorflow_gpu" export TF_PIP_TEST_ROOT="pip_test" # To build both tensorflow and tensorflow-gpu pip packages diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip_v1.sh index a860decbe51..8f00a7a4ad9 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip_v1.sh @@ -28,26 +28,13 @@ export CONTAINER_TYPE="GPU" export TF_PYTHON_VERSION='python3.5' # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=1 -export TF_CUDA_VERSION=10 -export TF_CUDNN_VERSION=7 -export TF_NEED_TENSORRT=1 -export TENSORRT_INSTALL_PATH=/usr/local/tensorrt -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) -export PROJECT_NAME="tensorflow_gpu" -export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 - yes "" | "$PYTHON_BIN_PATH" configure.py + # Export optional variables for running pip.sh export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py35' -export TF_BUILD_FLAGS="--config=opt --config=cuda --distinct_host_configuration=false \ ---action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain " +export TF_BUILD_FLAGS="--config=release_gpu_linux " export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filters=${TF_TEST_FILTER_TAGS} \ --distinct_host_configuration=false \ --action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION \ @@ -57,7 +44,7 @@ export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filt export TF_TEST_TARGETS="//tensorflow/python/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" #export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. -export TF_PROJECT_NAME=${PROJECT_NAME} +export TF_PROJECT_NAME="tensorflow_gpu" export TF_PIP_TEST_ROOT="pip_test" # To build both tensorflow and tensorflow-gpu pip packages diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip.sh index 17b52d9ce6b..6fa72b86011 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip.sh @@ -28,20 +28,7 @@ export CONTAINER_TYPE="GPU" export TF_PYTHON_VERSION='python3.6' # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=1 -export TF_CUDA_VERSION=10 -export TF_CUDNN_VERSION=7 -export TF_NEED_TENSORRT=1 -export TENSORRT_INSTALL_PATH=/usr/local/tensorrt -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) -export PROJECT_NAME="tensorflow_gpu" -export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 - yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. @@ -49,18 +36,17 @@ source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh # Export optional variables for running pip.sh export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py36' -export TF_BUILD_FLAGS="--config=opt --config=v2 --config=cuda --distinct_host_configuration=false \ ---action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain " +export TF_BUILD_FLAGS="--config=release_gpu_linux " export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filters=${TF_TEST_FILTER_TAGS} \ --distinct_host_configuration=false \ ---action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --test_env=TF2_BEHAVIOR=1 \ +--action_env=TF_CUDA_VERSION=10 --action_env=TF_CUDNN_VERSION=7 --test_env=TF2_BEHAVIOR=1 \ --config=cuda --test_output=errors --local_test_jobs=4 --test_lang_filters=py \ --verbose_failures=true --keep_going --define=no_tensorflow_py_deps=true \ --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute " export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" #export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. -export TF_PROJECT_NAME=${PROJECT_NAME} +export TF_PROJECT_NAME=="tensorflow_gpu" export TF_PIP_TEST_ROOT="pip_test" # To build both tensorflow and tensorflow-gpu pip packages diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip.sh index 2b17849b737..53f9cb11d75 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip.sh @@ -28,20 +28,7 @@ export CONTAINER_TYPE="GPU" export TF_PYTHON_VERSION='python3.7' # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=1 -export TF_CUDA_VERSION=10 -export TF_CUDNN_VERSION=7 -export TF_NEED_TENSORRT=1 -export TENSORRT_INSTALL_PATH=/usr/local/tensorrt -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) -export PROJECT_NAME="tensorflow_gpu" -export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 - yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. @@ -49,18 +36,17 @@ source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh # Export optional variables for running pip.sh export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py37' -export TF_BUILD_FLAGS="--config=opt --config=v2 --config=cuda --distinct_host_configuration=false \ ---action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain " +export TF_BUILD_FLAGS="--config=release_gpu_linux " export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filters=${TF_TEST_FILTER_TAGS} \ --distinct_host_configuration=false \ ---action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --test_env=TF2_BEHAVIOR=1 \ +--action_env=TF_CUDA_VERSION=10 --action_env=TF_CUDNN_VERSION=7 --test_env=TF2_BEHAVIOR=1 \ --config=cuda --test_output=errors --local_test_jobs=4 --test_lang_filters=py \ --verbose_failures=true --keep_going --define=no_tensorflow_py_deps=true \ --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute " export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" #export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. -export TF_PROJECT_NAME=${PROJECT_NAME} +export TF_PROJECT_NAME=="tensorflow_gpu" export TF_PIP_TEST_ROOT="pip_test" # To build both tensorflow and tensorflow-gpu pip packages diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip_v1.sh index f6128448b99..f3bebb56243 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip_v1.sh @@ -28,36 +28,23 @@ export CONTAINER_TYPE="GPU" export TF_PYTHON_VERSION='python3.7' # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=1 -export TF_CUDA_VERSION=10 -export TF_CUDNN_VERSION=7 -export TF_NEED_TENSORRT=1 -export TENSORRT_INSTALL_PATH=/usr/local/tensorrt -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) -export PROJECT_NAME="tensorflow_gpu" -export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 - yes "" | "$PYTHON_BIN_PATH" configure.py + # Export optional variables for running pip.sh export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py37' -export TF_BUILD_FLAGS="--config=opt --config=cuda --distinct_host_configuration=false \ ---action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain " +export TF_BUILD_FLAGS="--config=release_gpu_linux " export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filters=${TF_TEST_FILTER_TAGS} \ --distinct_host_configuration=false \ ---action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION \ +--action_env=TF_CUDA_VERSION=10 --action_env=TF_CUDNN_VERSION=7 \ --config=cuda --test_output=errors --local_test_jobs=4 --test_lang_filters=py \ --verbose_failures=true --keep_going --define=no_tensorflow_py_deps=true \ --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute " export TF_TEST_TARGETS="//tensorflow/python/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" #export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. -export TF_PROJECT_NAME=${PROJECT_NAME} +export TF_PROJECT_NAME=="tensorflow_gpu" export TF_PIP_TEST_ROOT="pip_test" # To build both tensorflow and tensorflow-gpu pip packages diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/pip.sh index 1ba8c078021..ecb4e6ae523 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/pip.sh @@ -28,20 +28,7 @@ export CONTAINER_TYPE="GPU" export TF_PYTHON_VERSION='python3.8' # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=1 -export TF_CUDA_VERSION=10 -export TF_CUDNN_VERSION=7 -export TF_NEED_TENSORRT=1 -export TENSORRT_INSTALL_PATH=/usr/local/tensorrt -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) -export PROJECT_NAME="tensorflow_gpu" -export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 - yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. @@ -49,18 +36,17 @@ source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh # Export optional variables for running pip.sh export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py38' -export TF_BUILD_FLAGS="--config=opt --config=v2 --config=cuda --distinct_host_configuration=false \ ---action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain " +export TF_BUILD_FLAGS="--config=release_gpu_linux " export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filters=${TF_TEST_FILTER_TAGS} \ --distinct_host_configuration=false \ ---action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --test_env=TF2_BEHAVIOR=1 \ +--action_env=TF_CUDA_VERSION=10 --action_env=TF_CUDNN_VERSION=7 --test_env=TF2_BEHAVIOR=1 \ --config=cuda --test_output=errors --local_test_jobs=4 --test_lang_filters=py \ --verbose_failures=true --keep_going --define=no_tensorflow_py_deps=true \ --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute " export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" #export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. -export TF_PROJECT_NAME=${PROJECT_NAME} +export TF_PROJECT_NAME=="tensorflow_gpu" export TF_PIP_TEST_ROOT="pip_test" # To build both tensorflow and tensorflow-gpu pip packages From edb88b4be341f74920a8438e2055c29877141463 Mon Sep 17 00:00:00 2001 From: Vignesh Kothapalli Date: Fri, 24 Jul 2020 04:07:19 +0530 Subject: [PATCH 1216/2522] fixed docstring for check_validation_split_arg --- tensorflow/python/keras/preprocessing/dataset_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/python/keras/preprocessing/dataset_utils.py b/tensorflow/python/keras/preprocessing/dataset_utils.py index 055f37e5ca2..09e6485a492 100644 --- a/tensorflow/python/keras/preprocessing/dataset_utils.py +++ b/tensorflow/python/keras/preprocessing/dataset_utils.py @@ -215,8 +215,7 @@ def check_validation_split_arg(validation_split, subset, shuffle, seed): """Raise errors in case of invalid argument values. Args: - shuffle: Whether to shuffle the data. Default: True. - If set to False, sorts the data in alphanumeric order. + shuffle: Whether to shuffle the data. Either True or False. seed: Optional random seed for shuffling and transformations. validation_split: Optional float between 0 and 1, fraction of data to reserve for validation. From 6711d96f6cf461e4b6f48d250a0ffd46b559e69f Mon Sep 17 00:00:00 2001 From: Jaesung Chung Date: Thu, 23 Jul 2020 15:30:14 -0700 Subject: [PATCH 1217/2522] Rollback of BroadcastTo op additions (part 2) Rolling back until discussion about builtin ops schema issue is discussed. PiperOrigin-RevId: 322879756 Change-Id: Ide63ef15dc8a67d3bc1eaa5f7f147d565344ee41 --- tensorflow/compiler/mlir/lite/BUILD | 23 ---- tensorflow/compiler/mlir/lite/ir/tfl_ops.td | 51 -------- .../legalize-tf-no-runtime-verification.mlir | 6 +- .../compiler/mlir/lite/tests/legalize-tf.mlir | 12 +- tensorflow/compiler/mlir/lite/tests/ops.mlir | 18 --- .../compiler/mlir/lite/tests/prepare-tf.mlir | 68 ----------- .../mlir/lite/transforms/legalize_patterns.td | 3 - .../mlir/lite/transforms/legalize_tf.cc | 112 +++++++++++++++++- .../mlir/lite/transforms/prepare_tf.cc | 43 +------ .../mlir/lite/utils/constant_utils.cc | 112 ------------------ .../compiler/mlir/lite/utils/constant_utils.h | 35 ------ 11 files changed, 123 insertions(+), 360 deletions(-) delete mode 100644 tensorflow/compiler/mlir/lite/utils/constant_utils.cc delete mode 100644 tensorflow/compiler/mlir/lite/utils/constant_utils.h diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD index 46c1d6f533d..103009cddb3 100644 --- a/tensorflow/compiler/mlir/lite/BUILD +++ b/tensorflow/compiler/mlir/lite/BUILD @@ -237,28 +237,6 @@ cc_library( alwayslink = 1, ) -cc_library( - name = "constant_utils", - srcs = [ - "utils/constant_utils.cc", - ], - hdrs = [ - "utils/constant_utils.h", - ], - copts = ["-std=c++14"], - deps = [ - "//tensorflow/compiler/mlir/tensorflow", - "//tensorflow/compiler/mlir/tensorflow:mangling_util", - "//tensorflow/compiler/xla:statusor", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core/platform:status", - "@llvm-project//llvm:Support", - "@llvm-project//mlir:IR", - "@llvm-project//mlir:StandardOps", - "@llvm-project//mlir:Support", - ], -) - cc_library( name = "lstm_utils", srcs = [ @@ -368,7 +346,6 @@ cc_library( "transforms/passes.h", ], deps = [ - ":constant_utils", ":lstm_utils", ":stateful_ops_utils", ":tensorflow_lite", diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td index 3dbfdfc5e04..715d047f0bf 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td @@ -4345,55 +4345,4 @@ def TFL_CustomTfOp : Op:$body); } -def TFL_BroadcastToOp : TFL_Op<"broadcast_to", [ - PredOpTrait<"input and output must have same element type", - TFL_TCresVTEtIsSameAsOp<0, 0>>, - TFL_OperandHasRankAtMost<0, 8>, - TFL_OperandHasRank<1, 1>, - PredOpTrait<"output dimension count must be at most 8", - Or<[TFL_OperandIsUnrankedPred<1>, - TFL_OperandDimIsAtMost<1, 0, 8>]>>, - NoSideEffect]> { - let summary = "Broadcast an array for a compatible shape."; - - let description = [{ -Broadcasting is the process of making arrays to have compatible shapes -for arithmetic operations. Two shapes are compatible if for each -dimension pair they are either equal or one of them is one. When trying -to broadcast a Tensor to a shape, it starts with the trailing dimensions, -and works its way forward. - -For example, - ->>> x = tf.constant([1, 2, 3]) ->>> y = tf.broadcast_to(x, [3, 3]) ->>> print(y) -tf.Tensor( - [[1 2 3] - [1 2 3] - [1 2 3]], shape=(3, 3), dtype=int32) - -In the above example, the input Tensor with the shape of `[1, 3]` -is broadcasted to output Tensor with shape of `[3, 3]`. - -When doing broadcasted operations such as multiplying a tensor -by a scalar, broadcasting (usually) confers some time or space -benefit, as the broadcasted tensor is never materialized. - -However, `broadcast_to` does not carry with it any such benefits. -The newly-created tensor takes the full memory of the broadcasted -shape. (In a graph context, `broadcast_to` might be fused to -subsequent operation and then be optimized away, however.) - }]; - - let arguments = (ins - TFL_TensorOf<[F32, I32, I1, I8, QI8, UI8, QUI8, I16, QI16, I64, Complex>]>:$input, - TFL_I32OrI64Tensor:$shape - ); - - let results = (outs - TFL_TensorOf<[F32, I32, I1, I8, QI8, UI8, QUI8, I16, QI16, I64, Complex>]>:$output - ); -} - #endif // TFL_OPS diff --git a/tensorflow/compiler/mlir/lite/tests/legalize-tf-no-runtime-verification.mlir b/tensorflow/compiler/mlir/lite/tests/legalize-tf-no-runtime-verification.mlir index 1e1e9b365de..90266b4e78e 100644 --- a/tensorflow/compiler/mlir/lite/tests/legalize-tf-no-runtime-verification.mlir +++ b/tensorflow/compiler/mlir/lite/tests/legalize-tf-no-runtime-verification.mlir @@ -5,6 +5,8 @@ func @broadcast_to_bf16(%arg0: tensor<3xbf16>, %arg1: tensor<2xi64>) -> tensor<3 return %0: tensor<3x3xbf16> // CHECK-LABEL: broadcast_to_bf16 -// CHECK: [[BCT:%.*]] = "tfl.broadcast_to"(%arg0, %arg1) : (tensor<3xbf16>, tensor<2xi64>) -> tensor<3x3xbf16> -// CHECK: return [[BCT]] : tensor<3x3xbf16> +// CHECK: [[CST:%.*]] = constant dense<1.000000e+00> : tensor +// CHECK: [[FILL:%.*]] = "tfl.fill"(%arg1, [[CST]]) : (tensor<2xi64>, tensor) -> tensor<3x3xbf16> +// CHECK: [[MUL:%.*]] = "tfl.mul"(%arg0, [[FILL]]) {fused_activation_function = "NONE"} : (tensor<3xbf16>, tensor<3x3xbf16>) -> tensor<3x3xbf16> +// CHECK: return [[MUL]] : tensor<3x3xbf16> } diff --git a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir index 74a33817d32..7cb9c4dd22c 100644 --- a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir @@ -1487,8 +1487,10 @@ func @broadcast_to_f32(%arg0: tensor<3xf32>, %arg1: tensor<2xi32>) -> tensor<3x3 return %0: tensor<3x3xf32> // CHECK-LABEL: broadcast_to_f32 -// CHECK: [[BCT:%.*]] = "tfl.broadcast_to"(%arg0, %arg1) : (tensor<3xf32>, tensor<2xi32>) -> tensor<3x3xf32> -// CHECK: return [[BCT]] : tensor<3x3xf32> +// CHECK: [[CST:%.*]] = constant dense<1.000000e+00> : tensor +// CHECK: [[FILL:%.*]] = "tfl.fill"(%arg1, [[CST]]) : (tensor<2xi32>, tensor) -> tensor<3x3xf32> +// CHECK: [[MUL:%.*]] = "tfl.mul"(%arg0, [[FILL]]) {fused_activation_function = "NONE"} : (tensor<3xf32>, tensor<3x3xf32>) -> tensor<3x3xf32> +// CHECK: return [[MUL]] : tensor<3x3xf32> } func @broadcast_to_i32(%input: tensor<3xi32>, %shape: tensor<2xi32>) -> tensor<3x3xi32> { @@ -1496,8 +1498,10 @@ func @broadcast_to_i32(%input: tensor<3xi32>, %shape: tensor<2xi32>) -> tensor<3 return %0: tensor<3x3xi32> // CHECK-LABEL: broadcast_to_i32 -// CHECK: [[BCT:%.*]] = "tfl.broadcast_to"(%arg0, %arg1) : (tensor<3xi32>, tensor<2xi32>) -> tensor<3x3xi32> -// CHECK: return [[BCT]] : tensor<3x3xi32> +// CHECK: [[CST:%.*]] = constant dense<1> : tensor +// CHECK: [[FILL:%.*]] = "tfl.fill"(%arg1, [[CST]]) : (tensor<2xi32>, tensor) -> tensor<3x3xi32> +// CHECK: [[MUL:%.*]] = "tfl.mul"(%arg0, [[FILL]]) {fused_activation_function = "NONE"} : (tensor<3xi32>, tensor<3x3xi32>) -> tensor<3x3xi32> +// CHECK: return [[MUL]] : tensor<3x3xi32> } func @matmul_batch(%arg0: tensor<10x15xf32>, %arg1: tensor<15x17xf32>) -> tensor<10x17xf32> { diff --git a/tensorflow/compiler/mlir/lite/tests/ops.mlir b/tensorflow/compiler/mlir/lite/tests/ops.mlir index c10bd26e50a..06e05987ee6 100644 --- a/tensorflow/compiler/mlir/lite/tests/ops.mlir +++ b/tensorflow/compiler/mlir/lite/tests/ops.mlir @@ -2310,21 +2310,3 @@ func @main(%arg0: tensor, %arg1: tensor<1xf32>) -> tensor { }) : (tensor, tensor<1xf32>) -> (tensor) return %0#0 : tensor } - -// ----- - -// CHECK-LABEL: testBroadcastToWithI32ShapeTensor -func @testBroadcastToWithI32ShapeTensor(tensor, tensor<8xi32>) -> tensor { -^bb0(%arg0: tensor, %arg1: tensor<8xi32>): - // CHECK: "tfl.broadcast_to"(%arg0, %arg1) - %0 = "tfl.broadcast_to"(%arg0, %arg1): (tensor, tensor<8xi32>) -> tensor - return %0 : tensor -} - -// CHECK-LABEL: testBroadcastToWithI64ShapeTensor -func @testBroadcastToWithI64ShapeTensor(tensor, tensor<8xi64>) -> tensor { -^bb0(%arg0: tensor, %arg1: tensor<8xi64>): - // CHECK: "tfl.broadcast_to"(%arg0, %arg1) - %0 = "tfl.broadcast_to"(%arg0, %arg1): (tensor, tensor<8xi64>) -> tensor - return %0 : tensor -} diff --git a/tensorflow/compiler/mlir/lite/tests/prepare-tf.mlir b/tensorflow/compiler/mlir/lite/tests/prepare-tf.mlir index c82c557d22d..066139e179b 100644 --- a/tensorflow/compiler/mlir/lite/tests/prepare-tf.mlir +++ b/tensorflow/compiler/mlir/lite/tests/prepare-tf.mlir @@ -581,73 +581,6 @@ func @MatrixSetDiagV3Conversion(%arg0: tensor<3x3xi32>, %arg1: tensor<3xi32>) -> // CHECK: return %[[RES]] } -func @broadcast_to_f32_low_dim(%arg0: tensor<3xf32>, %arg1: tensor<2xi32>) -> tensor<3x3xf32> { - %0 = "tf.BroadcastTo"(%arg0, %arg1) : (tensor<3xf32>, tensor<2xi32>) -> tensor<3x3xf32> - return %0: tensor<3x3xf32> - -// CHECK-LABEL: broadcast_to_f32_low_dim -// CHECK: [[CST:%.*]] = constant dense<1.000000e+00> : tensor<3x3xf32> -// CHECK: [[MUL:%.*]] = "tf.Mul"(%arg0, [[CST]]) : (tensor<3xf32>, tensor<3x3xf32>) -> tensor<3x3xf32> -// CHECK: return [[MUL]] : tensor<3x3xf32> -} - -func @broadcast_to_i32_low_dim(%input: tensor<3xi32>, %shape: tensor<2xi32>) -> tensor<3x3xi32> { - %0 = "tf.BroadcastTo"(%input, %shape) : (tensor<3xi32>, tensor<2xi32>) -> tensor<3x3xi32> - return %0: tensor<3x3xi32> - -// CHECK-LABEL: broadcast_to_i32_low_dim -// CHECK: [[CST:%.*]] = constant dense<1> : tensor<3x3xi32> -// CHECK: [[MUL:%.*]] = "tf.Mul"(%arg0, [[CST]]) : (tensor<3xi32>, tensor<3x3xi32>) -> tensor<3x3xi32> -// CHECK: return [[MUL]] : tensor<3x3xi32> -} - -func @broadcast_to_low_dim_with_unknown_shape(%arg0: tensor<3xf32>, %arg1: tensor<*xi32>) -> tensor<3x3xf32> { - %0 = "tf.BroadcastTo"(%arg0, %arg1) : (tensor<3xf32>, tensor<*xi32>) -> tensor<3x3xf32> - return %0: tensor<3x3xf32> - -// CHECK-LABEL: broadcast_to_low_dim_with_unknown_shape -// CHECK: [[CST:%.*]] = constant dense<1.000000e+00> : tensor<3x3xf32> -// CHECK: [[MUL:%.*]] = "tf.Mul"(%arg0, [[CST]]) : (tensor<3xf32>, tensor<3x3xf32>) -> tensor<3x3xf32> -// CHECK: return [[MUL]] : tensor<3x3xf32> -} - -func @broadcast_to_i32_low_dim_with_unknown_output(%input: tensor<3xi32>, %shape: tensor<2xi32>) -> tensor<*xi32> { - %0 = "tf.BroadcastTo"(%input, %shape) : (tensor<3xi32>, tensor<2xi32>) -> tensor<*xi32> - return %0: tensor<*xi32> - -// CHECK-LABEL: broadcast_to_i32_low_dim_with_unknown_output -// CHECK: [[CST:%.*]] = constant dense<1> : tensor -// CHECK: [[FILL:%.*]] = "tf.Fill"(%arg1, [[CST]]) : (tensor<2xi32>, tensor) -> tensor<*xi32> -// CHECK: [[MUL:%.*]] = "tf.Mul"(%arg0, [[FILL]]) : (tensor<3xi32>, tensor<*xi32>) -> tensor<*xi32> -// CHECK: return [[MUL]] : tensor<*xi32> -} - -func @broadcast_to_high_dim_with_unknown_shape(%arg0: tensor<1x2x3x4x5x6xf32>, %arg1: tensor<*xi32>) -> tensor<7x8x1x2x3x4x5x6xf32> { - %0 = "tf.BroadcastTo"(%arg0, %arg1) : (tensor<1x2x3x4x5x6xf32>, tensor<*xi32>) -> tensor<7x8x1x2x3x4x5x6xf32> - return %0: tensor<7x8x1x2x3x4x5x6xf32> - -// CHECK-LABEL: broadcast_to_high_dim_with_unknown_shape -// CHECK: [[BCT:%.*]] = "tf.BroadcastTo"(%arg0, %arg1) : (tensor<1x2x3x4x5x6xf32>, tensor<*xi32>) -> tensor<7x8x1x2x3x4x5x6xf32> -// CHECK: return [[BCT]] : tensor<7x8x1x2x3x4x5x6xf32> -} - -func @broadcast_to_high_dim_with_unknown_output(%arg0: tensor<1x2x3x4x5x6xf32>, %arg1: tensor<8xi32>) -> tensor<*xf32> { - %0 = "tf.BroadcastTo"(%arg0, %arg1) : (tensor<1x2x3x4x5x6xf32>, tensor<8xi32>) -> tensor<*xf32> - return %0: tensor<*xf32> - -// CHECK-LABEL: broadcast_to_high_dim_with_unknown_output -// CHECK: [[BCT:%.*]] = "tf.BroadcastTo"(%arg0, %arg1) : (tensor<1x2x3x4x5x6xf32>, tensor<8xi32>) -> tensor<*xf32> -// CHECK: return [[BCT]] : tensor<*xf32> -} - -func @broadcast_to_with_unknown_shape_and_output(%arg0: tensor<1x2x3x4x5x6xf32>, %arg1: tensor<*xi32>) -> tensor<*xf32> { - %0 = "tf.BroadcastTo"(%arg0, %arg1) : (tensor<1x2x3x4x5x6xf32>, tensor<*xi32>) -> tensor<*xf32> - return %0: tensor<*xf32> - -// CHECK-LABEL: broadcast_to_with_unknown_shape_and_output -// CHECK: "tf.BroadcastTo"(%arg0, %arg1) -} - // CHECK-LABEL: xla_conv func @xla_conv(%arg0: tensor<4x8x8x16xf32>) -> tensor<4x8x8x16xf32> { %0 = "tf.Const"() {value = dense<1.000000e+00> : tensor<3x3x16x16xf32>} : () -> tensor<3x3x16x16xf32> loc("Const_1") @@ -663,4 +596,3 @@ func @xla_conv(%arg0: tensor<4x8x8x16xf32>) -> tensor<4x8x8x16xf32> { } } - diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td index a6adb8f4a61..47cfaecd3fb 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_patterns.td @@ -109,9 +109,6 @@ def LegalizeArgMax : Pat<(TF_ArgMaxOp $input, $dim), def LegalizeArgMin : Pat<(TF_ArgMinOp $input, $dim), (TFL_ArgMinOp $input, $dim)>; -def LegalizeBroadcastTo : Pat<(TF_BroadcastToOp $input, $dim), - (TFL_BroadcastToOp $input, $dim)>; - def LegalizeCeil : Pat<(TF_CeilOp $arg), (TFL_CeilOp $arg)>; def LegalizeCos : Pat<(TF_CosOp $arg), (TFL_CosOp $arg)>; diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc index 7d6866dc570..1328a2baf5d 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc @@ -45,7 +45,6 @@ limitations under the License. #include "tensorflow/compiler/mlir/lite/quantization/quantization_utils.h" #include "tensorflow/compiler/mlir/lite/transforms/passes.h" #include "tensorflow/compiler/mlir/lite/utils/attribute_utils.h" -#include "tensorflow/compiler/mlir/lite/utils/constant_utils.h" #include "tensorflow/compiler/mlir/lite/utils/validators.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" #include "tensorflow/compiler/mlir/tensorflow/utils/mangling_util.h" @@ -138,6 +137,7 @@ DECL_CONVERT_OP(StridedSlice); DECL_CONVERT_OP(Unpack); DECL_CONVERT_OP(Reciprocal); DECL_CONVERT_OP(RandomUniform); +DECL_CONVERT_OP(BroadcastTo); #undef DECL_CONVERT_OP @@ -483,6 +483,89 @@ LogicalResult ConvertTFAssertOp::matchAndRewrite( return success(); } +StatusOr CreateConstOpWithSingleValue(PatternRewriter* rewriter, + Location loc, + ShapedType shaped_type, + int value) { + Type element_type = shaped_type.getElementType(); + ShapedType scalar_type = RankedTensorType::get({}, element_type); + Attribute attr; + switch (element_type.getKind()) { + case mlir::StandardTypes::F16: { + auto floatType = mlir::FloatType::getF16(element_type.getContext()); + auto floatAttr = + mlir::FloatAttr::get(floatType, static_cast(value)); + std::vector floatValues({floatAttr}); + attr = DenseElementsAttr::get(scalar_type, floatValues); + break; + } + case mlir::StandardTypes::BF16: { + auto floatType = mlir::FloatType::getBF16(element_type.getContext()); + auto floatAttr = + mlir::FloatAttr::get(floatType, static_cast(value)); + std::vector floatValues({floatAttr}); + attr = DenseElementsAttr::get(scalar_type, floatValues); + break; + } + case mlir::StandardTypes::F32: { + attr = + DenseElementsAttr::get(scalar_type, static_cast(value)); + break; + } + case mlir::StandardTypes::Complex: { + auto etype = element_type.cast().getElementType(); + if (etype.isF32()) { + auto dialect = etype.getContext()->getRegisteredDialect("tf"); + tensorflow::TensorProto repr; + repr.set_dtype(tensorflow::DT_COMPLEX64); + + tensorflow::TensorShapeProto* shape = repr.mutable_tensor_shape(); + shape->set_unknown_rank(false); + shape->add_dim()->set_size(int64_t{1}); + std::string content; + auto complex_value = + std::complex(static_cast(value), 0.0f); + content.assign(reinterpret_cast(&complex_value), + sizeof(complex_value)); + repr.set_tensor_content(content); + std::string mangled = tensorflow::mangling_util::MangleTensor(repr); + + attr = mlir::OpaqueElementsAttr::get(dialect, scalar_type, mangled); + break; + } + return Status(tensorflow::error::INVALID_ARGUMENT, "Unsupported type"); + } + case mlir::StandardTypes::Integer: { + const auto& itype = element_type.cast(); + switch (itype.getWidth()) { + case 8: + attr = DenseElementsAttr::get(scalar_type, + static_cast(value)); + break; + case 16: + attr = DenseElementsAttr::get(scalar_type, + static_cast(value)); + break; + case 32: + attr = DenseElementsAttr::get(scalar_type, + static_cast(value)); + break; + case 64: + attr = DenseElementsAttr::get(scalar_type, + static_cast(value)); + break; + default: + return Status(tensorflow::error::INVALID_ARGUMENT, + "Unsupported type"); + } + break; + } + default: + return Status(tensorflow::error::INVALID_ARGUMENT, "Unsupported type"); + } + return rewriter->create(loc, scalar_type, attr); +} + LogicalResult ConvertTFReciprocalOp::matchAndRewrite( Operation* op, PatternRewriter& rewriter) const { auto tf_reciprocal_op = cast(op); @@ -503,6 +586,31 @@ LogicalResult ConvertTFReciprocalOp::matchAndRewrite( return success(); } +LogicalResult ConvertTFBroadcastToOp::matchAndRewrite( + Operation* op, PatternRewriter& rewriter) const { + auto tf_broadcast_to_op = cast(op); + auto element_type = tf_broadcast_to_op.input().getType().cast(); + auto output_type = tf_broadcast_to_op.output().getType(); + + auto status_or_const_op = + CreateConstOpWithSingleValue(&rewriter, op->getLoc(), element_type, 1); + if (!status_or_const_op.ok()) { + return failure(); + } + + auto tfl_fill_op = rewriter.create( + op->getLoc(), output_type, tf_broadcast_to_op.shape(), + status_or_const_op.ValueOrDie()); + + StringAttr fused_activation_function = + StringAttr::get("NONE", rewriter.getContext()); + + rewriter.replaceOpWithNewOp( + op, output_type, tf_broadcast_to_op.input(), tfl_fill_op, + fused_activation_function); + return success(); +} + // Legalize unidirectional sequence lstm. struct LegalizeUnidirectionalSequenceLstm : public RewritePattern { explicit LegalizeUnidirectionalSequenceLstm(MLIRContext* context) @@ -643,7 +751,7 @@ void LegalizeTF::runOnFunction() { ConvertTFMatrixDiagV3Op, ConvertTFPackOp, ConvertTFReshapeOp, ConvertTFSplitOp, ConvertTFSplitVOp, ConvertTFStridedSliceOp, ConvertTFUnpackOp, ConvertTFAssertOp, ConvertTFReciprocalOp, - ConvertTFRandomUniformOp>(context); + ConvertTFRandomUniformOp, ConvertTFBroadcastToOp>(context); // Ophint python converter converted tf node pattern. patterns.insert(op); - auto input_type = tf_broadcast_to_op.input().getType().cast(); - auto output_type = tf_broadcast_to_op.output().getType().cast(); - auto shape_type = tf_broadcast_to_op.shape().getType().cast(); - Type element_type = input_type.getElementType(); - - // Allow lowering when low dimension inputs are given and its type is F32 or - // I32. - if (!((output_type.hasRank() && output_type.getRank() <= 4) || - (shape_type.hasStaticShape() && shape_type.getRank() == 1 && - shape_type.getDimSize(0) <= 4))) - return failure(); - if (!((element_type.getKind() == mlir::StandardTypes::F32) || - (element_type.getKind() == mlir::StandardTypes::Integer && - element_type.cast().getWidth() == 32))) - return failure(); - - auto status_or_const_op = - CreateConstOpWithSingleValue(&rewriter, op->getLoc(), input_type, 1); - if (!status_or_const_op.ok()) { - return failure(); - } - - auto tf_fill_op = rewriter.create( - op->getLoc(), output_type, tf_broadcast_to_op.shape(), - status_or_const_op.ValueOrDie()); - - auto mul_op = rewriter.create( - op->getLoc(), output_type, tf_broadcast_to_op.input(), tf_fill_op); - rewriter.replaceOp(op, mul_op.getResult()); - return success(); - } -}; - #include "tensorflow/compiler/mlir/lite/transforms/generated_prepare_tf.inc" // Returns success if all the operations in the `op`'s regions including `op` @@ -807,7 +766,7 @@ void PrepareTFPass::runOnFunction() { patterns.insert, TF::ConvertTFBatchMatMulOp>(ctx); } - patterns.insert(ctx); applyPatternsAndFoldGreedily(func, patterns); } diff --git a/tensorflow/compiler/mlir/lite/utils/constant_utils.cc b/tensorflow/compiler/mlir/lite/utils/constant_utils.cc deleted file mode 100644 index d244fa9d6e4..00000000000 --- a/tensorflow/compiler/mlir/lite/utils/constant_utils.cc +++ /dev/null @@ -1,112 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/mlir/lite/utils/constant_utils.h" - -#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" -#include "tensorflow/compiler/mlir/tensorflow/utils/mangling_util.h" -#include "tensorflow/core/framework/tensor.pb.h" -#include "tensorflow/core/framework/tensor_shape.pb.h" -#include "tensorflow/core/platform/status.h" - -namespace mlir { -namespace TFL { - -xla::StatusOr CreateConstOpWithSingleValue( - PatternRewriter* rewriter, Location loc, ShapedType shaped_type, - int value) { - Type element_type = shaped_type.getElementType(); - ShapedType scalar_type = RankedTensorType::get({}, element_type); - Attribute attr; - switch (element_type.getKind()) { - case mlir::StandardTypes::F16: { - auto floatType = mlir::FloatType::getF16(element_type.getContext()); - auto floatAttr = - mlir::FloatAttr::get(floatType, static_cast(value)); - std::vector floatValues({floatAttr}); - attr = DenseElementsAttr::get(scalar_type, floatValues); - break; - } - case mlir::StandardTypes::BF16: { - auto floatType = mlir::FloatType::getBF16(element_type.getContext()); - auto floatAttr = - mlir::FloatAttr::get(floatType, static_cast(value)); - std::vector floatValues({floatAttr}); - attr = DenseElementsAttr::get(scalar_type, floatValues); - break; - } - case mlir::StandardTypes::F32: { - attr = - DenseElementsAttr::get(scalar_type, static_cast(value)); - break; - } - case mlir::StandardTypes::Complex: { - auto etype = element_type.cast().getElementType(); - if (etype.isF32()) { - auto dialect = etype.getContext()->getRegisteredDialect("tf"); - tensorflow::TensorProto repr; - repr.set_dtype(tensorflow::DT_COMPLEX64); - - tensorflow::TensorShapeProto* shape = repr.mutable_tensor_shape(); - shape->set_unknown_rank(false); - shape->add_dim()->set_size(int64_t{1}); - std::string content; - auto complex_value = - std::complex(static_cast(value), 0.0f); - content.assign(reinterpret_cast(&complex_value), - sizeof(complex_value)); - repr.set_tensor_content(content); - std::string mangled = tensorflow::mangling_util::MangleTensor(repr); - - attr = mlir::OpaqueElementsAttr::get(dialect, scalar_type, mangled); - break; - } - return tensorflow::Status(tensorflow::error::INVALID_ARGUMENT, - "Unsupported type"); - } - case mlir::StandardTypes::Integer: { - const auto& itype = element_type.cast(); - switch (itype.getWidth()) { - case 8: - attr = DenseElementsAttr::get(scalar_type, - static_cast(value)); - break; - case 16: - attr = DenseElementsAttr::get(scalar_type, - static_cast(value)); - break; - case 32: - attr = DenseElementsAttr::get(scalar_type, - static_cast(value)); - break; - case 64: - attr = DenseElementsAttr::get(scalar_type, - static_cast(value)); - break; - default: - return tensorflow::Status(tensorflow::error::INVALID_ARGUMENT, - "Unsupported type"); - } - break; - } - default: - return tensorflow::Status(tensorflow::error::INVALID_ARGUMENT, - "Unsupported type"); - } - return rewriter->create(loc, scalar_type, attr); -} - -} // namespace TFL -} // namespace mlir diff --git a/tensorflow/compiler/mlir/lite/utils/constant_utils.h b/tensorflow/compiler/mlir/lite/utils/constant_utils.h deleted file mode 100644 index 308fbbc3ee5..00000000000 --- a/tensorflow/compiler/mlir/lite/utils/constant_utils.h +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_COMPILER_MLIR_LITE_UTILS_CONSTANT_UTILS_H_ -#define TENSORFLOW_COMPILER_MLIR_LITE_UTILS_CONSTANT_UTILS_H_ - -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/Location.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "tensorflow/compiler/xla/statusor.h" - -namespace mlir { -namespace TFL { - -// Returns a Constant op with a single value. -xla::StatusOr CreateConstOpWithSingleValue( - PatternRewriter* rewriter, Location loc, ShapedType shaped_type, int value); - -} // namespace TFL -} // namespace mlir -#endif // TENSORFLOW_COMPILER_MLIR_LITE_UTILS_CONSTANT_UTILS_H_ From 1a3a8deba522de7afa4767edb464efc08b4dcc8a Mon Sep 17 00:00:00 2001 From: Vignesh Kothapalli Date: Fri, 24 Jul 2020 04:10:08 +0530 Subject: [PATCH 1218/2522] removed 'optional' string from docstrings --- tensorflow/python/keras/preprocessing/dataset_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/keras/preprocessing/dataset_utils.py b/tensorflow/python/keras/preprocessing/dataset_utils.py index 09e6485a492..688115862c1 100644 --- a/tensorflow/python/keras/preprocessing/dataset_utils.py +++ b/tensorflow/python/keras/preprocessing/dataset_utils.py @@ -216,9 +216,9 @@ def check_validation_split_arg(validation_split, subset, shuffle, seed): Args: shuffle: Whether to shuffle the data. Either True or False. - seed: Optional random seed for shuffling and transformations. - validation_split: Optional float between 0 and 1, - fraction of data to reserve for validation. + seed: random seed for shuffling and transformations. + validation_split: float between 0 and 1, + fraction of data to reserve for validation. subset: One of "training" or "validation". Only used if `validation_split` is set. """ From 4270608dcc47dfe04fabb3ec47b71b64073c5037 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Thu, 23 Jul 2020 15:36:54 -0700 Subject: [PATCH 1219/2522] Rename the PRESUBMITS_BUILD_TARGETS file to something more appropriate (DEFAULT_TEST_TARGETS). PiperOrigin-RevId: 322881084 Change-Id: I0e33d25a35563e212a35bf360dc1a44b8e03f238 --- .../{PRESUBMIT_BUILD_TARGETS.sh => DEFAULT_TEST_TARGETS.sh} | 0 tensorflow/tools/ci_build/presubmit/macos/py2_cc/build.sh | 2 +- tensorflow/tools/ci_build/presubmit/macos/py37_cc/build.sh | 2 +- .../tools/ci_build/presubmit/ubuntu_16/cpu_py36_full/build.sh | 2 +- .../tools/ci_build/presubmit/ubuntu_16/gpu_py36_full/build.sh | 2 +- tensorflow/tools/ci_build/rel/macos/cpu_py35_nonpip.sh | 2 +- tensorflow/tools/ci_build/rel/macos/cpu_py36_nonpip.sh | 2 +- tensorflow/tools/ci_build/rel/macos/cpu_py37_nonpip.sh | 2 +- tensorflow/tools/ci_build/rel/macos/cpu_py38_nonpip.sh | 2 +- tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_nonpip.sh | 2 +- tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_pip.sh | 2 +- tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_nonpip.sh | 2 +- tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_pip.sh | 2 +- tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_nonpip.sh | 2 +- tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_pip.sh | 2 +- tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_nonpip.sh | 2 +- tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_pip.sh | 2 +- tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_nonpip.sh | 2 +- tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_pip.sh | 2 +- tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_nonpip.sh | 2 +- tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_pip.sh | 2 +- tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_nonpip.sh | 2 +- tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_pip.sh | 2 +- tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_nonpip.sh | 2 +- tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_pip.sh | 2 +- tensorflow/tools/ci_build/release/macos/cpu_py2_full/nonpip.sh | 2 +- .../tools/ci_build/release/macos/cpu_py2_full/nonpip_v1.sh | 2 +- tensorflow/tools/ci_build/release/macos/cpu_py35_full/nonpip.sh | 2 +- tensorflow/tools/ci_build/release/macos/cpu_py36_full/nonpip.sh | 2 +- .../tools/ci_build/release/macos/cpu_py36_full/nonpip_v1.sh | 2 +- tensorflow/tools/ci_build/release/macos/cpu_py37_full/nonpip.sh | 2 +- .../tools/ci_build/release/macos/cpu_py37_full/nonpip_v1.sh | 2 +- tensorflow/tools/ci_build/release/macos/cpu_py38_full/nonpip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/cpu_py2_full/nonpip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/cpu_py2_full/nonpip_v1.sh | 2 +- tensorflow/tools/ci_build/release/ubuntu_16/cpu_py2_full/pip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/cpu_py35_full/nonpip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/cpu_py35_full/nonpip_v1.sh | 2 +- .../tools/ci_build/release/ubuntu_16/cpu_py35_full/pip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/cpu_py36_full/nonpip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/cpu_py36_full/nonpip_v1.sh | 2 +- .../tools/ci_build/release/ubuntu_16/cpu_py36_full/pip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/cpu_py37_full/nonpip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/cpu_py37_full/nonpip_v1.sh | 2 +- .../tools/ci_build/release/ubuntu_16/cpu_py37_full/pip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/cpu_py38_full/nonpip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/cpu_py38_full/pip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip_v1.sh | 2 +- tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip_v1.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py35_full/pip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip_v1.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py36_full/pip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip_v1.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py37_full/pip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py38_full/nonpip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py38_full/pip.sh | 2 +- 61 files changed, 60 insertions(+), 60 deletions(-) rename tensorflow/tools/ci_build/build_scripts/{PRESUBMIT_BUILD_TARGETS.sh => DEFAULT_TEST_TARGETS.sh} (100%) diff --git a/tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh b/tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh similarity index 100% rename from tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh rename to tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh diff --git a/tensorflow/tools/ci_build/presubmit/macos/py2_cc/build.sh b/tensorflow/tools/ci_build/presubmit/macos/py2_cc/build.sh index 0885d208f1a..9bce4d1020c 100644 --- a/tensorflow/tools/ci_build/presubmit/macos/py2_cc/build.sh +++ b/tensorflow/tools/ci_build/presubmit/macos/py2_cc/build.sh @@ -40,7 +40,7 @@ function run_build () { tag_filters="-no_oss,-no_oss_py2,-gpu,-tpu,-benchmark-test,-nomac,-no_mac,-v1only" # Get the default test targets for bazel. - source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh + source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh "${BAZEL_WRAPPER_PATH}" \ test \ diff --git a/tensorflow/tools/ci_build/presubmit/macos/py37_cc/build.sh b/tensorflow/tools/ci_build/presubmit/macos/py37_cc/build.sh index 658432af36d..e648c488a00 100644 --- a/tensorflow/tools/ci_build/presubmit/macos/py37_cc/build.sh +++ b/tensorflow/tools/ci_build/presubmit/macos/py37_cc/build.sh @@ -38,7 +38,7 @@ function run_build () { tag_filters="-no_oss,-no_oss_py2,-gpu,-tpu,-benchmark-test,-nomac,-no_mac,-v1only" # Get the default test targets for bazel. - source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh + source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh "${BAZEL_WRAPPER_PATH}" \ test \ diff --git a/tensorflow/tools/ci_build/presubmit/ubuntu_16/cpu_py36_full/build.sh b/tensorflow/tools/ci_build/presubmit/ubuntu_16/cpu_py36_full/build.sh index 7a1fdfdb069..bde3c3d55e3 100644 --- a/tensorflow/tools/ci_build/presubmit/ubuntu_16/cpu_py36_full/build.sh +++ b/tensorflow/tools/ci_build/presubmit/ubuntu_16/cpu_py36_full/build.sh @@ -46,7 +46,7 @@ function run_build () { tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test""$(maybe_skip_v1)" # Get the default test targets for bazel. - source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh + source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run bazel test command. Double test timeouts to avoid flakes. # //tensorflow/core/platform:setround_test is not supported. See b/64264700 diff --git a/tensorflow/tools/ci_build/presubmit/ubuntu_16/gpu_py36_full/build.sh b/tensorflow/tools/ci_build/presubmit/ubuntu_16/gpu_py36_full/build.sh index 6a7e4c74576..a27cc881f41 100644 --- a/tensorflow/tools/ci_build/presubmit/ubuntu_16/gpu_py36_full/build.sh +++ b/tensorflow/tools/ci_build/presubmit/ubuntu_16/gpu_py36_full/build.sh @@ -50,7 +50,7 @@ function run_build () { tag_filters="gpu,-no_gpu,-nogpu,-benchmark-test,-no_oss,-oss_serial,-no_gpu_presubmit,-gpu_cupti""$(maybe_skip_v1)" # Get the default test targets for bazel. - source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh + source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh RBE_CONFIG="@ubuntu16.04-py3-gcc7_manylinux2010-cuda10.1-cudnn7-tensorrt6.0" TF_CUDA_CONFIG_REPO="${RBE_CONFIG}_config_cuda" diff --git a/tensorflow/tools/ci_build/rel/macos/cpu_py35_nonpip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py35_nonpip.sh index 06fabd7b1c7..7e85779a207 100644 --- a/tensorflow/tools/ci_build/rel/macos/cpu_py35_nonpip.sh +++ b/tensorflow/tools/ci_build/rel/macos/cpu_py35_nonpip.sh @@ -38,7 +38,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-nomac,-no_mac,-no_oss_py35,-v1only,-gpu,-tpu,-benchmark-test" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests set +e diff --git a/tensorflow/tools/ci_build/rel/macos/cpu_py36_nonpip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py36_nonpip.sh index 51cc3da62d6..07d4f7957af 100644 --- a/tensorflow/tools/ci_build/rel/macos/cpu_py36_nonpip.sh +++ b/tensorflow/tools/ci_build/rel/macos/cpu_py36_nonpip.sh @@ -38,7 +38,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-nomac,-no_mac,-no_oss_py36,-v1only,-gpu,-tpu,-benchmark-test" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests set +e diff --git a/tensorflow/tools/ci_build/rel/macos/cpu_py37_nonpip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py37_nonpip.sh index e0f2968b45a..a23ca47a038 100644 --- a/tensorflow/tools/ci_build/rel/macos/cpu_py37_nonpip.sh +++ b/tensorflow/tools/ci_build/rel/macos/cpu_py37_nonpip.sh @@ -38,7 +38,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-nomac,-no_mac$(maybe_skip_v1),-gpu,-tpu,-benchmark-test" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests set +e diff --git a/tensorflow/tools/ci_build/rel/macos/cpu_py38_nonpip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py38_nonpip.sh index 22475f35491..179ecdf97ca 100755 --- a/tensorflow/tools/ci_build/rel/macos/cpu_py38_nonpip.sh +++ b/tensorflow/tools/ci_build/rel/macos/cpu_py38_nonpip.sh @@ -38,7 +38,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-nomac,-no_mac$(maybe_skip_v1),-gpu,-tpu,-benchmark-test" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests set +e diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_nonpip.sh index 5339671cce3..fee64f0beb1 100755 --- a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_nonpip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_nonpip.sh @@ -34,7 +34,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py35,-v1only" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests set +e diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_pip.sh index 5d0cbacb0b7..b938ed2fde1 100755 --- a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_pip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_pip.sh @@ -37,7 +37,7 @@ export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_BUILD_FLAGS="--config=opt --config=v2 --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain" diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_nonpip.sh index c2790420afc..6b05141f00f 100755 --- a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_nonpip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_nonpip.sh @@ -34,7 +34,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py36,-v1only" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests set +e diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_pip.sh index 25c4de88cdd..44ae820c507 100755 --- a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_pip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_pip.sh @@ -37,7 +37,7 @@ export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_BUILD_FLAGS="--config=opt --config=v2 --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain" diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_nonpip.sh index f6415a7c9ad..db0c6056b6c 100755 --- a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_nonpip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_nonpip.sh @@ -34,7 +34,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py37,-v1only" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests set +e diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_pip.sh index 940cef32ef8..28784f9f499 100755 --- a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_pip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_pip.sh @@ -37,7 +37,7 @@ export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_BUILD_FLAGS="--config=opt --config=v2 --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain" diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_nonpip.sh index ff7a9f3baef..36da30167d0 100755 --- a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_nonpip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_nonpip.sh @@ -34,7 +34,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py38,-v1only" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests set +e diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_pip.sh index a27d1f863d6..ace3257479a 100755 --- a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_pip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_pip.sh @@ -37,7 +37,7 @@ export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_BUILD_FLAGS="--config=opt --config=v2 --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain" diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_nonpip.sh index d9a10c9551d..47ed3c4fd2a 100755 --- a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_nonpip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_nonpip.sh @@ -41,7 +41,7 @@ export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py35" diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_pip.sh index abf5c1db4b4..5b0ee602cfa 100755 --- a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_pip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_pip.sh @@ -45,7 +45,7 @@ export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py35' diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_nonpip.sh index 547bb0a1fba..70038a8d875 100755 --- a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_nonpip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_nonpip.sh @@ -41,7 +41,7 @@ export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py36" diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_pip.sh index 17b52d9ce6b..3223149f5a4 100755 --- a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_pip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_pip.sh @@ -45,7 +45,7 @@ export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py36' diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_nonpip.sh index 54a72459fa1..225b2cf4b7b 100755 --- a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_nonpip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_nonpip.sh @@ -41,7 +41,7 @@ export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py37" diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_pip.sh index 2b17849b737..5dfffbe3fe1 100755 --- a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_pip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_pip.sh @@ -45,7 +45,7 @@ export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py37' diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_nonpip.sh index ab88f4712f0..f7678b7436f 100755 --- a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_nonpip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_nonpip.sh @@ -41,7 +41,7 @@ export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py38" diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_pip.sh index 1ba8c078021..cc0a5254607 100755 --- a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_pip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_pip.sh @@ -45,7 +45,7 @@ export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py38' diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nonpip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nonpip.sh index 02e9e2eb9f8..9031cd9be63 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nonpip.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nonpip.sh @@ -36,7 +36,7 @@ export PYTHON_BIN_PATH=$(which python2) yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh tag_filters="-no_oss,-oss_serial,-nomac,-no_mac,-no_oss_py2,-v1only,-gpu,-tpu,-benchmark-test" diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nonpip_v1.sh b/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nonpip_v1.sh index 7a4fb54e250..8817b19fa7b 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nonpip_v1.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nonpip_v1.sh @@ -37,7 +37,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-nomac,-no_mac,-no_oss_py2" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests bazel test --test_output=errors --config=opt \ diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nonpip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nonpip.sh index 06fabd7b1c7..7e85779a207 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nonpip.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nonpip.sh @@ -38,7 +38,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-nomac,-no_mac,-no_oss_py35,-v1only,-gpu,-tpu,-benchmark-test" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests set +e diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nonpip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nonpip.sh index 51cc3da62d6..07d4f7957af 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nonpip.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nonpip.sh @@ -38,7 +38,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-nomac,-no_mac,-no_oss_py36,-v1only,-gpu,-tpu,-benchmark-test" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests set +e diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nonpip_v1.sh b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nonpip_v1.sh index 01e95c37bae..2f639d7fc6b 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nonpip_v1.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nonpip_v1.sh @@ -36,7 +36,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-nomac,-no_mac" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests bazel test --test_output=errors --config=opt \ diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nonpip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nonpip.sh index e0f2968b45a..a23ca47a038 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nonpip.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nonpip.sh @@ -38,7 +38,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-nomac,-no_mac$(maybe_skip_v1),-gpu,-tpu,-benchmark-test" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests set +e diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nonpip_v1.sh b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nonpip_v1.sh index 45d61222726..a05cd81d74f 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nonpip_v1.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nonpip_v1.sh @@ -32,7 +32,7 @@ export PYTHON_BIN_PATH=$(which python3.7) yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh tag_filters="-no_oss,-oss_serial,-nomac,-no_mac" diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nonpip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nonpip.sh index 22475f35491..179ecdf97ca 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nonpip.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nonpip.sh @@ -38,7 +38,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-nomac,-no_mac$(maybe_skip_v1),-gpu,-tpu,-benchmark-test" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests set +e diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py2_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py2_full/nonpip.sh index 5bdb5794e95..8323625662f 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py2_full/nonpip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py2_full/nonpip.sh @@ -35,7 +35,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py2,-v1only" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests set +e diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py2_full/nonpip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py2_full/nonpip_v1.sh index 892c8d07efd..f9241673fd1 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py2_full/nonpip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py2_full/nonpip_v1.sh @@ -34,7 +34,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py2" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests bazel test --test_output=errors --config=opt --test_lang_filters=py \ diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py2_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py2_full/pip.sh index 8524bbbad03..aa1e4b52483 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py2_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py2_full/pip.sh @@ -38,7 +38,7 @@ export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_BUILD_FLAGS="--config=opt --config=v2 --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nonpip.sh index 5339671cce3..fee64f0beb1 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nonpip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nonpip.sh @@ -34,7 +34,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py35,-v1only" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests set +e diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nonpip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nonpip_v1.sh index b91c542011b..4231891fbdb 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nonpip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nonpip_v1.sh @@ -33,7 +33,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py35" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests bazel test --test_output=errors --config=opt --test_lang_filters=py \ diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/pip.sh index 0dac1c72898..bdbb7f15e34 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/pip.sh @@ -32,7 +32,7 @@ export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_BUILD_FLAGS="--config=release_cpu_linux" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nonpip.sh index c2790420afc..6b05141f00f 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nonpip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nonpip.sh @@ -34,7 +34,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py36,-v1only" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests set +e diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nonpip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nonpip_v1.sh index 5481ae4e00e..38d03c8868c 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nonpip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nonpip_v1.sh @@ -33,7 +33,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py36" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests bazel test --test_output=errors --config=opt --test_lang_filters=py \ diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/pip.sh index 52f299ee6db..6277291043c 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/pip.sh @@ -32,7 +32,7 @@ export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_BUILD_FLAGS="--config=release_cpu_linux" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nonpip.sh index f6415a7c9ad..db0c6056b6c 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nonpip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nonpip.sh @@ -34,7 +34,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py37,-v1only" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests set +e diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nonpip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nonpip_v1.sh index c0191560555..098155aa026 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nonpip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nonpip_v1.sh @@ -33,7 +33,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py37" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests bazel test --test_output=errors --config=opt --test_lang_filters=py \ diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/pip.sh index 4d21f5a28a9..ff88ae46f39 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/pip.sh @@ -32,7 +32,7 @@ export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_BUILD_FLAGS="--config=release_cpu_linux" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/nonpip.sh index ff7a9f3baef..36da30167d0 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/nonpip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/nonpip.sh @@ -34,7 +34,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py38,-v1only" # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Run tests set +e diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/pip.sh index 3a4116faa13..52872cfd0a6 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/pip.sh @@ -32,7 +32,7 @@ export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_BUILD_FLAGS="--config=release_cpu_linux" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip.sh index 1dce4b101e3..e8c8b763d4b 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip.sh @@ -41,7 +41,7 @@ export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py2" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip_v1.sh index c8695bdfbdd..20e7977945f 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip_v1.sh @@ -40,7 +40,7 @@ export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py2" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip.sh index 1bf074dde4e..b3f7f158648 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip.sh @@ -45,7 +45,7 @@ export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial' diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip.sh index d9a10c9551d..47ed3c4fd2a 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip.sh @@ -41,7 +41,7 @@ export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py35" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip_v1.sh index dea186ea62e..e4a5a69c10f 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip_v1.sh @@ -40,7 +40,7 @@ export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py35" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip.sh index b726a85e564..2a5c550890b 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip.sh @@ -32,7 +32,7 @@ export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py35' diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip.sh index 547bb0a1fba..70038a8d875 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip.sh @@ -41,7 +41,7 @@ export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py36" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip_v1.sh index 42ea884f790..aaa4d017546 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip_v1.sh @@ -40,7 +40,7 @@ export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py36" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip.sh index 6fa72b86011..9aa724c27b9 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip.sh @@ -32,7 +32,7 @@ export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py36' diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip.sh index 54a72459fa1..225b2cf4b7b 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip.sh @@ -41,7 +41,7 @@ export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py37" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip_v1.sh index 7c2a93f042e..112f232a8e3 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip_v1.sh @@ -40,7 +40,7 @@ export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py37" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip.sh index 53f9cb11d75..9bfc6608a0b 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip.sh @@ -32,7 +32,7 @@ export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py37' diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nonpip.sh index ab88f4712f0..f7678b7436f 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nonpip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nonpip.sh @@ -41,7 +41,7 @@ export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py38" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/pip.sh index ecb4e6ae523..d8838e7704a 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/pip.sh @@ -32,7 +32,7 @@ export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. -source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py38' From bd68226ff6702101f07c988276f6e4f2a06af238 Mon Sep 17 00:00:00 2001 From: Nat Jeffries Date: Thu, 23 Jul 2020 15:40:43 -0700 Subject: [PATCH 1220/2522] Port the tanh and unpack kernels to the new TfLiteEvalTensor API along with other minor clean-ups. PiperOrigin-RevId: 322881825 Change-Id: Ie1615d69e374feab404013d9b891d8d9031244aa --- tensorflow/lite/micro/kernels/BUILD | 2 + tensorflow/lite/micro/kernels/tanh.cc | 63 ++-- tensorflow/lite/micro/kernels/tanh_test.cc | 76 +---- tensorflow/lite/micro/kernels/unpack.cc | 18 +- tensorflow/lite/micro/kernels/unpack_test.cc | 320 +++++++------------ 5 files changed, 188 insertions(+), 291 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index ddf9dd7633d..b9862005346 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -399,6 +399,7 @@ tflite_micro_cc_test( "unpack_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:debug_log", "//tensorflow/lite/micro:op_resolvers", @@ -625,6 +626,7 @@ tflite_micro_cc_test( name = "tanh_test", srcs = ["tanh_test.cc"], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:micro_framework", "//tensorflow/lite/micro:op_resolvers", diff --git a/tensorflow/lite/micro/kernels/tanh.cc b/tensorflow/lite/micro/kernels/tanh.cc index d5f39d4796c..5fa32f8f7ce 100644 --- a/tensorflow/lite/micro/kernels/tanh.cc +++ b/tensorflow/lite/micro/kernels/tanh.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/op_macros.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/micro/micro_utils.h" namespace tflite { @@ -40,6 +41,11 @@ struct OpData { int input_left_shift; }; +void* TanhInit(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node, OpData* data) { TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); @@ -63,45 +69,64 @@ TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node, } return kTfLiteOk; } + +TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + + OpData* data = static_cast(node->user_data); + + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + data->input_zero_point = input->params.zero_point; + return CalculateArithmeticOpData(context, node, data); +} + } // namespace TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - OpData data; - CalculateArithmeticOpData(context, node, &data); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); + switch (input->type) { case kTfLiteFloat32: { - reference_ops::Tanh(GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + reference_ops::Tanh(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); return kTfLiteOk; } break; case kTfLiteInt16: { TanhParams params; params.input_left_shift = data.input_left_shift; - reference_ops::Tanh(params, GetTensorShape(input), - GetTensorData(input), GetTensorShape(output), - GetTensorData(output)); + reference_ops::Tanh(params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); return kTfLiteOk; } break; case kTfLiteUInt8: { TanhParams params; - params.input_zero_point = input->params.zero_point; + params.input_zero_point = data.input_zero_point; params.input_range_radius = data.input_range_radius; params.input_multiplier = data.input_multiplier; params.input_left_shift = data.input_left_shift; - reference_ops::Tanh(params, GetTensorShape(input), - GetTensorData(input), GetTensorShape(output), - GetTensorData(output)); + reference_ops::Tanh(params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); return kTfLiteOk; } break; case kTfLiteInt8: { reference_integer_ops::Tanh( - input->params.zero_point, data.input_range_radius, - data.input_multiplier, data.input_left_shift, - NumElements(input->dims), GetTensorData(input), - GetTensorData(output)); + data.input_zero_point, data.input_range_radius, data.input_multiplier, + data.input_left_shift, NumElements(input->dims), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorData(output)); return kTfLiteOk; } break; default: @@ -115,9 +140,9 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) { } // namespace activations TfLiteRegistration Register_TANH() { - return {/*init=*/nullptr, + return {/*init=*/activations::TanhInit, /*free=*/nullptr, - /*prepare=*/nullptr, + /*prepare=*/activations::TanhPrepare, /*invoke=*/activations::TanhEval, /*profiling_string=*/nullptr, /*builtin_code=*/0, diff --git a/tensorflow/lite/micro/kernels/tanh_test.cc b/tensorflow/lite/micro/kernels/tanh_test.cc index 4ad51a189ec..ef1564f4675 100644 --- a/tensorflow/lite/micro/kernels/tanh_test.cc +++ b/tensorflow/lite/micro/kernels/tanh_test.cc @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -81,40 +81,19 @@ void TestTanhFloat(const int input_dims_data[], const float* input_data, CreateFloatTensor(output_data, output_dims), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_TANH); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - const char* init_data = nullptr; - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 1}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + const TfLiteRegistration registration = tflite::ops::micro::Register_TANH(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, /*builtin_data=*/nullptr, + micro_test::reporter); + + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); + for (int i = 0; i < output_elements_count; ++i) { TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i], tolerance); @@ -147,40 +126,19 @@ void TestTanhQuantized(const int input_dims_data[], const float* input_data, CreateQuantizedTensor(output_quantized, output_dims, output_scale, output_zero_point)}; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_TANH); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - const char* init_data = nullptr; - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 1}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + const TfLiteRegistration registration = tflite::ops::micro::Register_TANH(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, /*builtin_data=*/nullptr, + micro_test::reporter); + + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); + for (int i = 0; i < output_elements_count; ++i) { TF_LITE_MICRO_EXPECT_NEAR(expected_output_quantized[i], output_quantized[i], tolerance); diff --git a/tensorflow/lite/micro/kernels/unpack.cc b/tensorflow/lite/micro/kernels/unpack.cc index 2c36fc56634..557cc57ac7e 100644 --- a/tensorflow/lite/micro/kernels/unpack.cc +++ b/tensorflow/lite/micro/kernels/unpack.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -28,14 +29,16 @@ constexpr int kInputTensor = 0; template TfLiteStatus UnpackImpl(TfLiteContext* context, TfLiteNode* node, - const TfLiteTensor* input, int output_count, int axis) { - const TfLiteTensor* output0 = GetOutput(context, node, 0); + const TfLiteEvalTensor* input, int output_count, + int axis) { + const TfLiteEvalTensor* output0 = + tflite::micro::GetEvalOutput(context, node, 0); const TfLiteIntArray* input_dims = input->dims; const TfLiteIntArray* output_dims = output0->dims; const int dimensions = input_dims->size; if (axis < 0) { - axis += NumDimensions(input); + axis += input->dims->size; } TFLITE_DCHECK_LT(axis, dimensions); @@ -54,11 +57,11 @@ TfLiteStatus UnpackImpl(TfLiteContext* context, TfLiteNode* node, } TFLITE_DCHECK_EQ(output_size, copy_size * outer_size); - const T* input_data = GetTensorData(input); + const T* input_data = tflite::micro::GetTensorData(input); for (int i = 0; i < output_count; ++i) { - TfLiteTensor* t = GetOutput(context, node, i); - T* output_data = GetTensorData(t); + TfLiteEvalTensor* t = tflite::micro::GetEvalOutput(context, node, i); + T* output_data = tflite::micro::GetTensorData(t); for (int k = 0; k < outer_size; ++k) { T* output_ptr = output_data + copy_size * k; int loc = k * output_count * copy_size + i * copy_size; @@ -74,7 +77,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TfLiteUnpackParams* data = reinterpret_cast(node->builtin_data); - const TfLiteTensor* input = GetInput(context, node, kInputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); switch (input->type) { case kTfLiteFloat32: { diff --git a/tensorflow/lite/micro/kernels/unpack_test.cc b/tensorflow/lite/micro/kernels/unpack_test.cc index 1b801c2901d..5b2c36cdf3f 100644 --- a/tensorflow/lite/micro/kernels/unpack_test.cc +++ b/tensorflow/lite/micro/kernels/unpack_test.cc @@ -15,8 +15,8 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/micro/all_ops_resolver.h" #include "tensorflow/lite/micro/debug_log.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -24,19 +24,15 @@ namespace tflite { namespace testing { void TestUnpackThreeOutputsFloat( - std::initializer_list input_dims_data, - std::initializer_list input_data, int axis, - std::initializer_list output1_dims_data, - std::initializer_list expected_output1_data, - std::initializer_list output2_dims_data, - std::initializer_list expected_output2_data, - std::initializer_list output3_dims_data, - std::initializer_list expected_output3_data, float* output1_data, - float* output2_data, float* output3_data) { - TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); - TfLiteIntArray* output1_dims = IntArrayFromInitializer(output1_dims_data); - TfLiteIntArray* output2_dims = IntArrayFromInitializer(output2_dims_data); - TfLiteIntArray* output3_dims = IntArrayFromInitializer(output3_dims_data); + const int* input_dims_data, const float* input_data, int axis, + const int* output1_dims_data, const float* expected_output1_data, + const int* output2_dims_data, const float* expected_output2_data, + const int* output3_dims_data, const float* expected_output3_data, + float* output1_data, float* output2_data, float* output3_data) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* output1_dims = IntArrayFromInts(output1_dims_data); + TfLiteIntArray* output2_dims = IntArrayFromInts(output2_dims_data); + TfLiteIntArray* output3_dims = IntArrayFromInts(output3_dims_data); const int output1_dims_count = ElementCount(*output1_dims); const int output2_dims_count = ElementCount(*output2_dims); const int output3_dims_count = ElementCount(*output3_dims); @@ -63,68 +59,44 @@ void TestUnpackThreeOutputsFloat( output3_data[i] = 23; } - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_UNPACK); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - TfLiteUnpackParams builtin_data = { .num = 3, .axis = axis, }; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, nullptr, 0); - } int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {3, 1, 2, 3}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = tflite::ops::micro::Register_UNPACK(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output1_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output1_data.begin()[i], output1_data[i], - 1e-5f); + TF_LITE_MICRO_EXPECT_NEAR(expected_output1_data[i], output1_data[i], 1e-5f); } for (int i = 0; i < output2_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output2_data.begin()[i], output2_data[i], - 1e-5f); + TF_LITE_MICRO_EXPECT_NEAR(expected_output2_data[i], output2_data[i], 1e-5f); } for (int i = 0; i < output3_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output3_data.begin()[i], output3_data[i], - 1e-5f); + TF_LITE_MICRO_EXPECT_NEAR(expected_output3_data[i], output3_data[i], 1e-5f); } } -void TestUnpackOneOutputFloat(std::initializer_list input_dims_data, - std::initializer_list input_data, int axis, - std::initializer_list output_dims_data, - std::initializer_list expected_output_data, +void TestUnpackOneOutputFloat(const int* input_dims_data, + const float* input_data, int axis, + const int* output_dims_data, + const float* expected_output_data, float* output_data) { - TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); const int output_dims_count = ElementCount(*output_dims); constexpr int input_size = 1; @@ -139,65 +111,39 @@ void TestUnpackOneOutputFloat(std::initializer_list input_dims_data, output_data[i] = 23; } - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_UNPACK); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - TfLiteUnpackParams builtin_data = { .num = 1, .axis = axis, }; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, nullptr, 0); - } int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 1}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = tflite::ops::micro::Register_UNPACK(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], - 1e-5f); + TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i], 1e-5f); } } void TestUnpackThreeOutputsQuantized( - std::initializer_list input_dims_data, - std::initializer_list input_data, int axis, - std::initializer_list output1_dims_data, - std::initializer_list expected_output1_data, - std::initializer_list output2_dims_data, - std::initializer_list expected_output2_data, - std::initializer_list output3_dims_data, - std::initializer_list expected_output3_data, uint8_t* output1_data, - uint8_t* output2_data, uint8_t* output3_data) { - TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); - TfLiteIntArray* output1_dims = IntArrayFromInitializer(output1_dims_data); - TfLiteIntArray* output2_dims = IntArrayFromInitializer(output2_dims_data); - TfLiteIntArray* output3_dims = IntArrayFromInitializer(output3_dims_data); + const int* input_dims_data, const uint8_t* input_data, int axis, + const int* output1_dims_data, const uint8_t* expected_output1_data, + const int* output2_dims_data, const uint8_t* expected_output2_data, + const int* output3_dims_data, const uint8_t* expected_output3_data, + uint8_t* output1_data, uint8_t* output2_data, uint8_t* output3_data) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* output1_dims = IntArrayFromInts(output1_dims_data); + TfLiteIntArray* output2_dims = IntArrayFromInts(output2_dims_data); + TfLiteIntArray* output3_dims = IntArrayFromInts(output3_dims_data); const int output1_dims_count = ElementCount(*output1_dims); const int output2_dims_count = ElementCount(*output2_dims); const int output3_dims_count = ElementCount(*output3_dims); @@ -227,72 +173,47 @@ void TestUnpackThreeOutputsQuantized( output3_data[i] = 23; } - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_UNPACK); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - TfLiteUnpackParams builtin_data = { .num = 3, .axis = axis, }; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, nullptr, 0); - } int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {3, 1, 2, 3}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = tflite::ops::micro::Register_UNPACK(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output1_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_EQ(expected_output1_data.begin()[i], output1_data[i]); + TF_LITE_MICRO_EXPECT_EQ(expected_output1_data[i], output1_data[i]); } for (int i = 0; i < output2_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_EQ(expected_output2_data.begin()[i], output2_data[i]); + TF_LITE_MICRO_EXPECT_EQ(expected_output2_data[i], output2_data[i]); } for (int i = 0; i < output3_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_EQ(expected_output3_data.begin()[i], output3_data[i]); + TF_LITE_MICRO_EXPECT_EQ(expected_output3_data[i], output3_data[i]); } } void TestUnpackThreeOutputsQuantized32( - std::initializer_list input_dims_data, - std::initializer_list input_data, int axis, - std::initializer_list output1_dims_data, - std::initializer_list expected_output1_data, - std::initializer_list output2_dims_data, - std::initializer_list expected_output2_data, - std::initializer_list output3_dims_data, - std::initializer_list expected_output3_data, int32_t* output1_data, - int32_t* output2_data, int32_t* output3_data) { - TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); - TfLiteIntArray* output1_dims = IntArrayFromInitializer(output1_dims_data); - TfLiteIntArray* output2_dims = IntArrayFromInitializer(output2_dims_data); - TfLiteIntArray* output3_dims = IntArrayFromInitializer(output3_dims_data); + const int* input_dims_data, const int32_t* input_data, int axis, + const int* output1_dims_data, const int32_t* expected_output1_data, + const int* output2_dims_data, const int32_t* expected_output2_data, + const int* output3_dims_data, const int32_t* expected_output3_data, + int32_t* output1_data, int32_t* output2_data, int32_t* output3_data) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* output1_dims = IntArrayFromInts(output1_dims_data); + TfLiteIntArray* output2_dims = IntArrayFromInts(output2_dims_data); + TfLiteIntArray* output3_dims = IntArrayFromInts(output3_dims_data); const int output1_dims_count = ElementCount(*output1_dims); const int output2_dims_count = ElementCount(*output2_dims); const int output3_dims_count = ElementCount(*output3_dims); @@ -319,55 +240,34 @@ void TestUnpackThreeOutputsQuantized32( output3_data[i] = 23; } - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_UNPACK); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - TfLiteUnpackParams builtin_data = { .num = 3, .axis = axis, }; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, nullptr, 0); - } int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {3, 1, 2, 3}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = tflite::ops::micro::Register_UNPACK(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output1_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_EQ(expected_output1_data.begin()[i], output1_data[i]); + TF_LITE_MICRO_EXPECT_EQ(expected_output1_data[i], output1_data[i]); } for (int i = 0; i < output2_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_EQ(expected_output2_data.begin()[i], output2_data[i]); + TF_LITE_MICRO_EXPECT_EQ(expected_output2_data[i], output2_data[i]); } for (int i = 0; i < output3_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_EQ(expected_output3_data.begin()[i], output3_data[i]); + TF_LITE_MICRO_EXPECT_EQ(expected_output3_data[i], output3_data[i]); } } @@ -377,6 +277,14 @@ void TestUnpackThreeOutputsQuantized32( TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(UnpackFloatThreeOutputs) { + const int input_shape[] = {2, 3, 2}; + const float input_values[] = {1, 2, 3, 4, 5, 6}; + const int output1_shape[] = {1, 2}; + const float output1_golden[] = {1, 2}; + const int output2_shape[] = {1, 2}; + const float output2_golden[] = {3, 4}; + const int output3_shape[] = {1, 2}; + const float output3_golden[] = {5, 6}; constexpr int output1_dims_count = 2; constexpr int output2_dims_count = 2; constexpr int output3_dims_count = 2; @@ -384,18 +292,20 @@ TF_LITE_MICRO_TEST(UnpackFloatThreeOutputs) { float output2_data[output2_dims_count]; float output3_data[output3_dims_count]; tflite::testing::TestUnpackThreeOutputsFloat( - {2, 3, 2}, // Input shape - {1, 2, 3, 4, 5, 6}, // Input values - 0, {1, 2}, // Output1 shape - {1, 2}, // Output1 values - {1, 2}, // Output2 shape - {3, 4}, // Output2 values - {1, 2}, // Output3 shape - {5, 6}, // Output3 values + input_shape, input_values, 0, output1_shape, output1_golden, + output2_shape, output2_golden, output3_shape, output3_golden, output1_data, output2_data, output3_data); } TF_LITE_MICRO_TEST(UnpackFloatThreeOutputsNegativeAxisTwo) { + const int input_shape[] = {2, 3, 2}; + const float input_values[] = {1, 2, 3, 4, 5, 6}; + const int output1_shape[] = {1, 2}; + const float output1_golden[] = {1, 2}; + const int output2_shape[] = {1, 2}; + const float output2_golden[] = {3, 4}; + const int output3_shape[] = {1, 2}; + const float output3_golden[] = {5, 6}; constexpr int output1_dims_count = 2; constexpr int output2_dims_count = 2; constexpr int output3_dims_count = 2; @@ -403,29 +313,31 @@ TF_LITE_MICRO_TEST(UnpackFloatThreeOutputsNegativeAxisTwo) { float output2_data[output2_dims_count]; float output3_data[output3_dims_count]; tflite::testing::TestUnpackThreeOutputsFloat( - {2, 3, 2}, // Input shape - {1, 2, 3, 4, 5, 6}, // Input values - -2, {1, 2}, // Output1 shape - {1, 2}, // Output1 values - {1, 2}, // Output2 shape - {3, 4}, // Output2 values - {1, 2}, // Output3 shape - {5, 6}, // Output3 values + input_shape, input_values, -2, output1_shape, output1_golden, + output2_shape, output2_golden, output3_shape, output3_golden, output1_data, output2_data, output3_data); } TF_LITE_MICRO_TEST(UnpackFloatOneOutput) { + const int input_shape[] = {2, 1, 6}; + const float input_values[] = {1, 2, 3, 4, 5, 6}; + const int output_shape[] = {1, 6}; + const float golden[] = {1, 2, 3, 4, 5, 6}; constexpr int output_dims_count = 6; float output_data[output_dims_count]; - tflite::testing::TestUnpackOneOutputFloat( - {2, 1, 6}, // Input shape - {1, 2, 3, 4, 5, 6}, // Input values - 0, {1, 6}, // Output shape - {1, 2, 3, 4, 5, 6}, // Output values - output_data); + tflite::testing::TestUnpackOneOutputFloat(input_shape, input_values, 0, + output_shape, golden, output_data); } TF_LITE_MICRO_TEST(UnpackQuantizedThreeOutputs) { + const int input_shape[] = {2, 3, 2}; + const uint8_t input_values[] = {1, 2, 3, 4, 5, 6}; + const int output1_shape[] = {1, 2}; + const uint8_t output1_golden[] = {1, 2}; + const int output2_shape[] = {1, 2}; + const uint8_t output2_golden[] = {3, 4}; + const int output3_shape[] = {1, 2}; + const uint8_t output3_golden[] = {5, 6}; constexpr int output1_dims_count = 2; constexpr int output2_dims_count = 2; constexpr int output3_dims_count = 2; @@ -433,18 +345,20 @@ TF_LITE_MICRO_TEST(UnpackQuantizedThreeOutputs) { uint8_t output2_data[output2_dims_count]; uint8_t output3_data[output3_dims_count]; tflite::testing::TestUnpackThreeOutputsQuantized( - {2, 3, 2}, // Input shape - {1, 2, 3, 4, 5, 6}, // Input values - 0, {1, 2}, // Output1 shape - {1, 2}, // Output1 values - {1, 2}, // Output2 shape - {3, 4}, // Output2 values - {1, 2}, // Output3 shape - {5, 6}, // Output3 values + input_shape, input_values, 0, output1_shape, output1_golden, + output2_shape, output2_golden, output3_shape, output3_golden, output1_data, output2_data, output3_data); } TF_LITE_MICRO_TEST(UnpackQuantized32ThreeOutputs) { + const int input_shape[] = {2, 3, 2}; + const int32_t input_values[] = {1, 2, 3, 4, 5, 6}; + const int output1_shape[] = {1, 2}; + const int32_t output1_golden[] = {1, 2}; + const int output2_shape[] = {1, 2}; + const int32_t output2_golden[] = {3, 4}; + const int output3_shape[] = {1, 2}; + const int32_t output3_golden[] = {5, 6}; constexpr int output1_dims_count = 2; constexpr int output2_dims_count = 2; constexpr int output3_dims_count = 2; @@ -452,14 +366,8 @@ TF_LITE_MICRO_TEST(UnpackQuantized32ThreeOutputs) { int32_t output2_data[output2_dims_count]; int32_t output3_data[output3_dims_count]; tflite::testing::TestUnpackThreeOutputsQuantized32( - {2, 3, 2}, // Input shape - {1, 2, 3, 4, 5, 6}, // Input values - 0, {1, 2}, // Output1 shape - {1, 2}, // Output1 values - {1, 2}, // Output2 shape - {3, 4}, // Output2 values - {1, 2}, // Output3 shape - {5, 6}, // Output3 values + input_shape, input_values, 0, output1_shape, output1_golden, + output2_shape, output2_golden, output3_shape, output3_golden, output1_data, output2_data, output3_data); } From 55018a48503cc74584787de741588f3b9aa22bbd Mon Sep 17 00:00:00 2001 From: David Rim Date: Thu, 23 Jul 2020 16:03:39 -0700 Subject: [PATCH 1221/2522] Add option to revert to previous hybrid quantization scheme PiperOrigin-RevId: 322886366 Change-Id: I7665938cfecd27e1be09ccfdd145ec02eb03d675 --- .../lite/tools/optimize/quantize_weights.cc | 91 ++++++++++++------- .../lite/tools/optimize/quantize_weights.h | 15 +++ .../tools/optimize/quantize_weights_test.cc | 58 +++++++++++- 3 files changed, 129 insertions(+), 35 deletions(-) diff --git a/tensorflow/lite/tools/optimize/quantize_weights.cc b/tensorflow/lite/tools/optimize/quantize_weights.cc index 8bef019a83e..e4840aed5d1 100644 --- a/tensorflow/lite/tools/optimize/quantize_weights.cc +++ b/tensorflow/lite/tools/optimize/quantize_weights.cc @@ -130,7 +130,8 @@ bool IsQuantizedInput(const OperatorCodeT* op_code, // Returns true if the operator supports hybrid evaluation. bool IsHybridEvaluationOp(const OperatorT* op, const OperatorCodeT* op_code, - const CustomOpMap& custom_op_map) { + const CustomOpMap& custom_op_map, + bool use_updated_hybrid_scheme) { const BuiltinOperator builtin_op_code = op_code->builtin_code; // Operations that support hybrid evaluation. bool eval_hybrid = false; @@ -144,7 +145,6 @@ bool IsHybridEvaluationOp(const OperatorT* op, const OperatorCodeT* op_code, } } else if (builtin_op_code == BuiltinOperator_FULLY_CONNECTED || builtin_op_code == BuiltinOperator_CONV_2D || - builtin_op_code == BuiltinOperator_DEPTHWISE_CONV_2D || builtin_op_code == BuiltinOperator_SVDF || builtin_op_code == BuiltinOperator_RNN || builtin_op_code == BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM || @@ -158,6 +158,8 @@ bool IsHybridEvaluationOp(const OperatorT* op, const OperatorCodeT* op_code, if (options->kernel_type == LSTMKernelType_FULL) { eval_hybrid = true; } + } else if (builtin_op_code == BuiltinOperator_DEPTHWISE_CONV_2D) { + eval_hybrid = use_updated_hybrid_scheme; } return eval_hybrid; } @@ -191,7 +193,7 @@ TfLiteStatus InsertQuantizableInputTensorsFromOperator( const ModelT* model, OperatorT* op, uint64_t weights_min_num_elements, const CustomOpMap& custom_op_map, absl::flat_hash_map* tensor_map, - int subgraph_index) { + int subgraph_index, bool use_updated_hybrid_scheme) { SubGraphT* subgraph = model->subgraphs.at(subgraph_index).get(); const OperatorCodeT* op_code = model->operator_codes[op->opcode_index].get(); @@ -231,43 +233,46 @@ TfLiteStatus InsertQuantizableInputTensorsFromOperator( } if (op_code->builtin_code == BuiltinOperator_DEPTHWISE_CONV_2D) { - tensor_map->insert( - {tensor_idx, {tensor, /*is_per_channel=*/true, /*dim=*/3}}); + tensor_map->insert({tensor_idx, + {tensor, /*is_per_channel=*/use_updated_hybrid_scheme, + /*dim=*/3}}); } else if (op_code->builtin_code == BuiltinOperator_CONV_2D) { - tensor_map->insert( - {tensor_idx, {tensor, /*is_per_channel=*/true, /*dim=*/0}}); + tensor_map->insert({tensor_idx, + {tensor, /*is_per_channel=*/use_updated_hybrid_scheme, + /*dim=*/0}}); } else { switch (op_code->builtin_code) { case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM: op->builtin_options.AsBidirectionalSequenceLSTMOptions() - ->asymmetric_quantize_inputs = true; + ->asymmetric_quantize_inputs = use_updated_hybrid_scheme; break; case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN: op->builtin_options.AsBidirectionalSequenceRNNOptions() - ->asymmetric_quantize_inputs = true; + ->asymmetric_quantize_inputs = use_updated_hybrid_scheme; break; case BuiltinOperator_FULLY_CONNECTED: op->builtin_options.AsFullyConnectedOptions() - ->asymmetric_quantize_inputs = true; + ->asymmetric_quantize_inputs = use_updated_hybrid_scheme; break; case BuiltinOperator_LSTM: op->builtin_options.AsLSTMOptions()->asymmetric_quantize_inputs = - true; + use_updated_hybrid_scheme; break; case BuiltinOperator_RNN: - op->builtin_options.AsRNNOptions()->asymmetric_quantize_inputs = true; + op->builtin_options.AsRNNOptions()->asymmetric_quantize_inputs = + use_updated_hybrid_scheme; break; case BuiltinOperator_SVDF: op->builtin_options.AsSVDFOptions()->asymmetric_quantize_inputs = - true; + use_updated_hybrid_scheme; break; case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: op->builtin_options.AsUnidirectionalSequenceLSTMOptions() - ->asymmetric_quantize_inputs = true; + ->asymmetric_quantize_inputs = use_updated_hybrid_scheme; break; case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN: op->builtin_options.AsSequenceRNNOptions() - ->asymmetric_quantize_inputs = true; + ->asymmetric_quantize_inputs = use_updated_hybrid_scheme; break; default: break; @@ -323,25 +328,27 @@ void MakeTensor(const string& name, const std::vector& shape, } // Updates operator code versions for the operators with INT8 inputs. -void UpdateInt8OperatorVersions(ModelT* model) { +void UpdateInt8OperatorVersions(ModelT* model, bool use_updated_hybrid_scheme) { for (int i = 0; i < model->operator_codes.size(); ++i) { const BuiltinOperator& op_code = model->operator_codes[i]->builtin_code; - if (op_code == BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM || + if (op_code == BuiltinOperator_RNN || op_code == BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN || - op_code == BuiltinOperator_EMBEDDING_LOOKUP || - op_code == BuiltinOperator_RNN || op_code == BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM || op_code == BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN) { + model->operator_codes[i]->version = use_updated_hybrid_scheme ? 3 : 2; + } else if (op_code == BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM || + op_code == BuiltinOperator_EMBEDDING_LOOKUP) { model->operator_codes[i]->version = 3; - } else if (op_code == BuiltinOperator_LSTM || - op_code == BuiltinOperator_SVDF) { - model->operator_codes[i]->version = 4; + } else if (op_code == BuiltinOperator_LSTM) { + model->operator_codes[i]->version = use_updated_hybrid_scheme ? 4 : 3; } else if (op_code == BuiltinOperator_CONV_2D) { - model->operator_codes[i]->version = 5; + model->operator_codes[i]->version = use_updated_hybrid_scheme ? 5 : 2; + } else if (op_code == BuiltinOperator_FULLY_CONNECTED) { + model->operator_codes[i]->version = use_updated_hybrid_scheme ? 9 : 3; + } else if (op_code == BuiltinOperator_SVDF) { + model->operator_codes[i]->version = use_updated_hybrid_scheme ? 4 : 2; } else if (op_code == BuiltinOperator_DEPTHWISE_CONV_2D) { model->operator_codes[i]->version = 6; - } else if (op_code == BuiltinOperator_FULLY_CONNECTED) { - model->operator_codes[i]->version = 9; } } } @@ -402,7 +409,8 @@ TfLiteStatus QuantizeWeightsInt8(flatbuffers::FlatBufferBuilder* builder, const Model* input_model, bool use_hybrid_evaluation, uint64_t weights_min_num_elements, - const CustomOpMap& custom_op_map) { + const CustomOpMap& custom_op_map, + bool use_updated_hybrid_scheme) { std::unique_ptr model; model.reset(input_model->UnPack()); @@ -415,7 +423,7 @@ TfLiteStatus QuantizeWeightsInt8(flatbuffers::FlatBufferBuilder* builder, OperatorT* op = subgraph->operators[i].get(); TF_LITE_ENSURE_STATUS(InsertQuantizableInputTensorsFromOperator( model.get(), op, weights_min_num_elements, custom_op_map, &tensor_map, - subgraph_index)); + subgraph_index, use_updated_hybrid_scheme)); } for (std::pair tensor_pair : tensor_map) { @@ -456,8 +464,8 @@ TfLiteStatus QuantizeWeightsInt8(flatbuffers::FlatBufferBuilder* builder, // dequantization we need to add a Dequantize op. bool eval_hybrid = use_hybrid_evaluation && - IsHybridEvaluationOp(consumer_op, consumer_op_code, - custom_op_map) && + IsHybridEvaluationOp(consumer_op, consumer_op_code, custom_op_map, + use_updated_hybrid_scheme) && CheckAllOpInputsQuantized(subgraph, consumer_op, consumer_op_code, custom_op_map) && IsQuantizedInput(consumer_op_code, custom_op_map, @@ -516,7 +524,7 @@ TfLiteStatus QuantizeWeightsInt8(flatbuffers::FlatBufferBuilder* builder, } // Update the modified operator code versions. - UpdateInt8OperatorVersions(model.get()); + UpdateInt8OperatorVersions(model.get(), use_updated_hybrid_scheme); flatbuffers::Offset output_model_location = Model::Pack(*builder, model.get()); @@ -611,7 +619,8 @@ TfLiteStatus QuantizeWeights(flatbuffers::FlatBufferBuilder* builder, // kWeightsMinSizeDefault elements are quantized. CustomOpMap custom_op_map; return QuantizeWeightsInt8(builder, input_model, use_hybrid_evaluation, - weights_min_num_elements, custom_op_map); + weights_min_num_elements, custom_op_map, + kUseUpdatedHybridSchemeDefault); } } // namespace internal @@ -620,7 +629,8 @@ TfLiteStatus QuantizeWeights(flatbuffers::FlatBufferBuilder* builder, uint64_t weights_min_num_elements) { CustomOpMap custom_op_map; return QuantizeWeightsInt8(builder, input_model, true, - weights_min_num_elements, custom_op_map); + weights_min_num_elements, custom_op_map, + kUseUpdatedHybridSchemeDefault); } TfLiteStatus QuantizeWeights(flatbuffers::FlatBufferBuilder* builder, @@ -631,7 +641,8 @@ TfLiteStatus QuantizeWeights(flatbuffers::FlatBufferBuilder* builder, // kWeightsMinSizeDefault elements are quantized. CustomOpMap custom_op_map; return QuantizeWeightsInt8(builder, input_model, true, - kWeightsMinNumElementsDefault, custom_op_map); + kWeightsMinNumElementsDefault, custom_op_map, + kUseUpdatedHybridSchemeDefault); } case BufferType::QUANTIZED_FLOAT16: return QuantizeWeightsFloat16(builder, input_model); @@ -643,7 +654,19 @@ TfLiteStatus QuantizeWeights(flatbuffers::FlatBufferBuilder* builder, uint64_t weights_min_num_elements, const CustomOpMap& custom_op_map) { return QuantizeWeightsInt8(builder, input_model, true, - weights_min_num_elements, custom_op_map); + weights_min_num_elements, custom_op_map, + kUseUpdatedHybridSchemeDefault); +} + +TfLiteStatus QuantizeWeights(flatbuffers::FlatBufferBuilder* builder, + const Model* input_model, + uint64_t weights_min_num_elements, + const CustomOpMap& custom_op_map, + bool use_updated_hybrid_scheme) { + return QuantizeWeightsInt8(builder, input_model, + /*use_hybrid_evaluation=*/true, + weights_min_num_elements, custom_op_map, + use_updated_hybrid_scheme); } } // namespace optimize diff --git a/tensorflow/lite/tools/optimize/quantize_weights.h b/tensorflow/lite/tools/optimize/quantize_weights.h index 528614f0b7b..9212c9a117d 100644 --- a/tensorflow/lite/tools/optimize/quantize_weights.h +++ b/tensorflow/lite/tools/optimize/quantize_weights.h @@ -29,6 +29,13 @@ namespace optimize { // Supported resulting types from quantization process. enum class BufferType { QUANTIZED_INT8, QUANTIZED_FLOAT16 }; +// This macro is for internal use for conversions requiring previous behavior. +#ifdef TFLITE_USE_PREVIOUS_HYBRID_SCHEME +constexpr bool kUseUpdatedHybridSchemeDefault = false; +#else +constexpr bool kUseUpdatedHybridSchemeDefault = true; +#endif + // Quantizes input_model and populates the provided builder with the new model. // By default only weights tensors weight more than 1024 elements will be // quantized. @@ -61,6 +68,14 @@ TfLiteStatus QuantizeWeights(flatbuffers::FlatBufferBuilder* builder, uint64_t weights_min_num_elements, const CustomOpMap& custom_op_map); +// Same as above, but if use updated_hybrid_scheme is false, +// use previous quantization scheme. +TfLiteStatus QuantizeWeights(flatbuffers::FlatBufferBuilder* builder, + const Model* input_model, + uint64_t weights_min_num_elements, + const CustomOpMap& custom_op_map, + bool use_updated_hybrid_scheme); + namespace internal { // If use_hybrid_evaluation is false, will disable using hybrid eval for // operations that support it. diff --git a/tensorflow/lite/tools/optimize/quantize_weights_test.cc b/tensorflow/lite/tools/optimize/quantize_weights_test.cc index 2f92a9ad71c..94bff2d5eb8 100644 --- a/tensorflow/lite/tools/optimize/quantize_weights_test.cc +++ b/tensorflow/lite/tools/optimize/quantize_weights_test.cc @@ -216,7 +216,11 @@ TEST_F(QuantizeWeightsTest, HybridConv) { EXPECT_EQ(quant_tensor->type(), TensorType_INT8) << quant_tensor->name()->str(); auto shape = GetAsVector(quant_tensor->shape()); - EXPECT_EQ(quant_tensor->quantization()->scale()->size(), shape[0]); + if (kUseUpdatedHybridSchemeDefault) { + EXPECT_EQ(quant_tensor->quantization()->scale()->size(), shape[0]); + } else { + EXPECT_EQ(quant_tensor->quantization()->scale()->size(), 1); + } } else { EXPECT_EQ(quant_tensor->type(), TensorType_FLOAT32); } @@ -533,6 +537,58 @@ TEST_F(QuantizeWeightsTest, VerifyCustomOpQuantizationHybrid) { EXPECT_EQ(num_custom_ops_found, 1); } +TEST_F(QuantizeWeightsTest, VerifyUpdatedHybridSchemeFalseQuantizationHybrid) { + LoadBasicModel(); + flatbuffers::FlatBufferBuilder builder; + const CustomOpMap custom_op_map; + auto status = QuantizeWeights(&builder, model_, 0, custom_op_map, false); + EXPECT_EQ(status, kTfLiteOk); + + const uint8_t* buffer = builder.GetBufferPointer(); + const Model* output_model = GetModel(buffer); + ASSERT_TRUE(output_model); + + // Nothing should change. + ASSERT_EQ(output_model->subgraphs()->size(), model_->subgraphs()->size()); + for (size_t subgraph_idx = 0; subgraph_idx < model_->subgraphs()->size(); + subgraph_idx++) { + const auto quantized_graph = output_model->subgraphs()->Get(subgraph_idx); + const auto float_graph = model_->subgraphs()->Get(subgraph_idx); + ASSERT_EQ(quantized_graph->tensors()->size(), + float_graph->tensors()->size()); + // Make sure the graph only has one Conv operation. + ASSERT_EQ(quantized_graph->operators()->size(), 1); + const auto op = quantized_graph->operators()->Get(0); + const uint32_t op_code_idx = op->opcode_index(); + ASSERT_EQ(output_model->operator_codes()->Get(op_code_idx)->builtin_code(), + BuiltinOperator_CONV_2D); + for (size_t i = 0; i < quantized_graph->tensors()->size(); i++) { + const auto quant_tensor = quantized_graph->tensors()->Get(i); + const auto float_tensor = float_graph->tensors()->Get(i); + EXPECT_EQ(quant_tensor->buffer(), float_tensor->buffer()); + EXPECT_EQ(quant_tensor->is_variable(), float_tensor->is_variable()); + EXPECT_EQ(GetAsVector(quant_tensor->shape()), + GetAsVector(float_tensor->shape())); + EXPECT_EQ(quant_tensor->name()->str(), float_tensor->name()->str()); + // If the tensor is a weight, it should have type INT8, otherwise it + // should stay with type FLOAT32. + // If the tensor is a bias, it should have type FLOAT32. + if (quant_tensor->name()->str() == "conv_bias") { + EXPECT_EQ(quant_tensor->type(), TensorType_FLOAT32); + } else if (IsModelInputOrOutput(output_model, i)) { + EXPECT_EQ(quant_tensor->type(), TensorType_FLOAT32); + } else if (quant_tensor->buffer() != 0) { + EXPECT_EQ(quant_tensor->type(), TensorType_INT8) + << quant_tensor->name()->str(); + auto shape = GetAsVector(quant_tensor->shape()); + EXPECT_EQ(quant_tensor->quantization()->scale()->size(), 1); + } else { + EXPECT_EQ(quant_tensor->type(), TensorType_FLOAT32); + } + } + } +} + } // namespace } // namespace optimize } // namespace tflite From 3850477977002e1ac134c551b8a3d0f3cf2bdcd3 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Thu, 23 Jul 2020 16:07:10 -0700 Subject: [PATCH 1222/2522] Port the dequantize kernel to the new TfLiteEvalTensor API. PiperOrigin-RevId: 322887114 Change-Id: I6c02b9a583d4dfda74d6e4931734881815eddc14 --- tensorflow/lite/micro/kernels/BUILD | 1 + tensorflow/lite/micro/kernels/dequantize.cc | 52 +++++++++++-------- .../lite/micro/kernels/dequantize_test.cc | 41 +++------------ 3 files changed, 39 insertions(+), 55 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index b9862005346..5ff1121fedb 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -475,6 +475,7 @@ tflite_micro_cc_test( "dequantize_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:micro_framework", "//tensorflow/lite/micro:op_resolvers", diff --git a/tensorflow/lite/micro/kernels/dequantize.cc b/tensorflow/lite/micro/kernels/dequantize.cc index 607ada6a605..df501887866 100644 --- a/tensorflow/lite/micro/kernels/dequantize.cc +++ b/tensorflow/lite/micro/kernels/dequantize.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/requantize.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -34,6 +35,7 @@ struct OpData { // be represented as a fixed point multiplier plus a left shift. int32_t output_multiplier; int output_shift; + int32_t output_zero_point; }; void* Init(TfLiteContext* context, const char* buffer, size_t length) { @@ -68,6 +70,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { data->quantization_params.zero_point = input->params.zero_point; data->quantization_params.scale = static_cast(input->params.scale); + data->output_zero_point = output->params.zero_point; return kTfLiteOk; } @@ -75,28 +78,31 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TFLITE_DCHECK(node->user_data != nullptr); OpData* data = static_cast(node->user_data); - const TfLiteTensor* input = GetInput(context, node, 0); - TfLiteTensor* output = GetOutput(context, node, 0); + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); if (output->type == kTfLiteFloat32) { switch (input->type) { case kTfLiteUInt8: - reference_ops::Dequantize( - data->quantization_params, GetTensorShape(input), - GetTensorData(input), GetTensorShape(output), - GetTensorData(output)); + reference_ops::Dequantize(data->quantization_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); break; case kTfLiteInt8: - reference_ops::Dequantize( - data->quantization_params, GetTensorShape(input), - GetTensorData(input), GetTensorShape(output), - GetTensorData(output)); + reference_ops::Dequantize(data->quantization_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); break; case kTfLiteInt16: - reference_ops::Dequantize( - data->quantization_params, GetTensorShape(input), - GetTensorData(input), GetTensorShape(output), - GetTensorData(output)); + reference_ops::Dequantize(data->quantization_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); break; default: TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.", @@ -105,21 +111,23 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteError; } } else if (output->type == kTfLiteInt32) { - int flat_size = - MatchingFlatSize(GetTensorShape(input), GetTensorShape(output)); + int flat_size = MatchingFlatSize(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorShape(output)); switch (input->type) { case kTfLiteInt16: { reference_ops::Requantize( - GetTensorData(input), flat_size, data->output_multiplier, - data->output_shift, input->params.zero_point, - output->params.zero_point, GetTensorData(output)); + tflite::micro::GetTensorData(input), flat_size, + data->output_multiplier, data->output_shift, + data->quantization_params.zero_point, data->output_zero_point, + tflite::micro::GetTensorData(output)); break; } case kTfLiteInt8: { reference_ops::Requantize( - GetTensorData(input), flat_size, data->output_multiplier, - data->output_shift, input->params.zero_point, - output->params.zero_point, GetTensorData(output)); + tflite::micro::GetTensorData(input), flat_size, + data->output_multiplier, data->output_shift, + data->quantization_params.zero_point, data->output_zero_point, + tflite::micro::GetTensorData(output)); break; } default: diff --git a/tensorflow/lite/micro/kernels/dequantize_test.cc b/tensorflow/lite/micro/kernels/dequantize_test.cc index 5eb3d80e41e..6b499204b98 100644 --- a/tensorflow/lite/micro/kernels/dequantize_test.cc +++ b/tensorflow/lite/micro/kernels/dequantize_test.cc @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/test_helpers.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -28,44 +28,19 @@ template void ValidateDequantizeGoldens(TfLiteTensor* tensors, int tensors_size, const T* expected_output_data, T* output_data, int output_length, float tolerance = 1e-5) { - TfLiteContext context; - ::tflite::AllOpsResolver resolver; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_DEQUANTIZE); - - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - const char* init_data = nullptr; - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } - int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 1}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = + tflite::ops::micro::Register_DEQUANTIZE(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output_length; ++i) { TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i], 0.001f); From 8aac615a87988ba920f13fb9352d67d4a3558276 Mon Sep 17 00:00:00 2001 From: Jaesung Chung Date: Thu, 23 Jul 2020 16:20:28 -0700 Subject: [PATCH 1223/2522] Add a new pass that converts tf.InitializeTableFromTextFileV2 op to tf.LookupTableImportV2 This is for TFLite use cases. TFLite does not support asset file concept in runtime. This pass will lower the tf.InitializeTableFromTextFileV2 ops in order to remove asset file dependency. PiperOrigin-RevId: 322889562 Change-Id: I04ca2ef1c36ccbcc3daefa8117f676ba52a3fccd --- .../compiler/mlir/lite/tf_tfl_passes.cc | 3 + tensorflow/compiler/mlir/tensorflow/BUILD | 4 + .../tests/init_text_file_to_import.mlir | 14 ++ .../init_text_file_to_import_invalid.mlir | 53 +++++++ .../transforms/init_text_file_to_import.cc | 134 ++++++++++++++++++ .../init_text_file_to_import_test_pass.cc | 99 +++++++++++++ .../mlir/tensorflow/transforms/passes.h | 4 + 7 files changed, 311 insertions(+) create mode 100644 tensorflow/compiler/mlir/tensorflow/tests/init_text_file_to_import.mlir create mode 100644 tensorflow/compiler/mlir/tensorflow/tests/init_text_file_to_import_invalid.mlir create mode 100644 tensorflow/compiler/mlir/tensorflow/transforms/init_text_file_to_import.cc create mode 100644 tensorflow/compiler/mlir/tensorflow/transforms/init_text_file_to_import_test_pass.cc diff --git a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc index 239d52530ec..c49d9a10716 100644 --- a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc +++ b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc @@ -180,6 +180,9 @@ void AddTFToTFLConversionPasses(const mlir::TFL::PassConfig& pass_config, // control flow ops (IfOp, CaseOp). pass_manager->addPass(mlir::createInlinerPass()); + // This pass removes the asset file dependencies in hash table use cases. + pass_manager->addPass(mlir::TF::CreateInitTextFileToImportPass()); + pass_manager->addPass( mlir::TFL::CreateLegalizeTFPass(pass_config.runtime_verification)); pass_manager->addPass(mlir::TFL::CreateOptimizePass()); diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index 6cb0922fa75..26c47e580e8 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -727,6 +727,7 @@ cc_library( "transforms/generated_optimize.inc", "transforms/gpu_fusion.cc", "transforms/graph_pruning.cc", + "transforms/init_text_file_to_import.cc", "transforms/launch_to_device_attribute.cc", "transforms/layout_optimization.cc", "transforms/mark_ops_for_outside_compilation.cc", @@ -826,6 +827,7 @@ cc_library( cc_library( name = "tensorflow_test_passes", srcs = [ + "transforms/init_text_file_to_import_test_pass.cc", "transforms/lift_variables_test_pass.cc", "transforms/lower_tf_pass.cc", ], @@ -841,8 +843,10 @@ cc_library( "//tensorflow/core/platform:errors", "//tensorflow/core/platform:status", "//tensorflow/core/platform:threadpool_options", + "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", "@llvm-project//mlir:Pass", + "@llvm-project//mlir:StandardOps", "@llvm-project//mlir:Support", ], alwayslink = 1, diff --git a/tensorflow/compiler/mlir/tensorflow/tests/init_text_file_to_import.mlir b/tensorflow/compiler/mlir/tensorflow/tests/init_text_file_to_import.mlir new file mode 100644 index 00000000000..6a9581b0e44 --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/tests/init_text_file_to_import.mlir @@ -0,0 +1,14 @@ +// RUN: tf-opt -tf-init-text-file-to-import-test %s | FileCheck %s + +// Tests that the tf.InitializeTableFromTextFileV2 op are inlined. + +func @init_all_tables() { + %cst = constant dense<"%FILE_PLACEHOLDER"> : tensor + %0 = "tf.HashTableV2"() {container = "", device = "", key_dtype = !tf.string, shared_name = "hash_table_/tmp/vocab.txt_-2_-1", use_node_name_sharing = false, value_dtype = i64} : () -> tensor + "tf.InitializeTableFromTextFileV2"(%0, %cst) {delimiter = " ", device = "", key_index = -2 : i64, value_index = -1 : i64, vocab_size = -1 : i64} : (tensor, tensor) -> () + return + // CHECK: [[CST:%.*]] = constant dense<["apple", "banana", "grape"]> : tensor<3x!tf.string> + // CHECK: [[CST_0:%.*]] = constant dense<[0, 1, 2]> : tensor<3xi64> + // CHECK: [[VAL:%.*]] = "tf.HashTableV2"() + // CHECK: "tf.LookupTableImportV2"([[VAL]], [[CST]], [[CST_0]]) +} diff --git a/tensorflow/compiler/mlir/tensorflow/tests/init_text_file_to_import_invalid.mlir b/tensorflow/compiler/mlir/tensorflow/tests/init_text_file_to_import_invalid.mlir new file mode 100644 index 00000000000..05afe1cc27f --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/tests/init_text_file_to_import_invalid.mlir @@ -0,0 +1,53 @@ +// RUN: tf-opt -split-input-file -verify-diagnostics -tf-init-text-file-to-import %s | FileCheck %s + +// Tests that the given vocabulary file does not exist. + +func @init_all_tables() { + %cst = constant dense<"vocab_file_does_not_exist.txt"> : tensor + %0 = "tf.HashTableV2"() {container = "", device = "", key_dtype = !tf.string, shared_name = "hash_table_/tmp/vocab.txt_-2_-1", use_node_name_sharing = false, value_dtype = i64} : () -> tensor + // expected-error @+1 {{'tf.InitializeTableFromTextFileV2' op failed to open vocabulary file (vocab_file_does_not_exist.txt): cannot open input file 'vocab_file_does_not_exist.txt': No such file or directory}} + "tf.InitializeTableFromTextFileV2"(%0, %cst) {delimiter = " ", device = "", key_index = -2 : i64, value_index = -1 : i64, vocab_size = -1 : i64} : (tensor, tensor) -> () + return +} + +// ----- + +// Tests that the tf.InitializeTableFromTextFileV2 op is not converted since +// unsupported key_index, -1. + +func @init_all_tables() { + %cst = constant dense<"vocab_file_does_not_exist.txt"> : tensor + %0 = "tf.HashTableV2"() {container = "", device = "", key_dtype = !tf.string, shared_name = "hash_table_/tmp/vocab.txt_-2_-1", use_node_name_sharing = false, value_dtype = i64} : () -> tensor + "tf.InitializeTableFromTextFileV2"(%0, %cst) {delimiter = " ", device = "", key_index = -1 : i64, value_index = -1 : i64, vocab_size = -1 : i64} : (tensor, tensor) -> () + return + // CHECK: [[VAL:%.*]] = "tf.HashTableV2"() + // CHECK: tf.InitializeTableFromTextFileV2" +} + +// ----- + +// Tests that the tf.InitializeTableFromTextFileV2 op is not converted since +// unsupported value_index, 0. + +func @init_all_tables() { + %cst = constant dense<"vocab_file_does_not_exist.txt"> : tensor + %0 = "tf.HashTableV2"() {container = "", device = "", key_dtype = !tf.string, shared_name = "hash_table_/tmp/vocab.txt_-2_-1", use_node_name_sharing = false, value_dtype = i64} : () -> tensor + "tf.InitializeTableFromTextFileV2"(%0, %cst) {delimiter = " ", device = "", key_index = -2 : i64, value_index = 0 : i64, vocab_size = -1 : i64} : (tensor, tensor) -> () + return + // CHECK: [[VAL:%.*]] = "tf.HashTableV2"() + // CHECK: tf.InitializeTableFromTextFileV2" +} + +// ----- + +// Tests that the tf.InitializeTableFromTextFileV2 op is not converted since +// unsupported vocab_size, 1. + +func @init_all_tables() { + %cst = constant dense<"vocab_file_does_not_exist.txt"> : tensor + %0 = "tf.HashTableV2"() {container = "", device = "", key_dtype = !tf.string, shared_name = "hash_table_/tmp/vocab.txt_-2_-1", use_node_name_sharing = false, value_dtype = i64} : () -> tensor + "tf.InitializeTableFromTextFileV2"(%0, %cst) {delimiter = " ", device = "", key_index = -2 : i64, value_index = -1 : i64, vocab_size = 1 : i64} : (tensor, tensor) -> () + return + // CHECK: [[VAL:%.*]] = "tf.HashTableV2"() + // CHECK: tf.InitializeTableFromTextFileV2" +} diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/init_text_file_to_import.cc b/tensorflow/compiler/mlir/tensorflow/transforms/init_text_file_to_import.cc new file mode 100644 index 00000000000..615ca26012e --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/transforms/init_text_file_to_import.cc @@ -0,0 +1,134 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "llvm/Support/Casting.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/OperationSupport.h" // from @llvm-project +#include "mlir/IR/PatternMatch.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "mlir/Support/FileUtilities.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" + +namespace mlir { +namespace TF { +namespace { + +static constexpr int kTextFileIndex_WholeLine = -2; +static constexpr int kTextFileIndex_LineNumber = -1; + +// InitTextFileToImportPass converts InitializeTableFromTextFileV2Op to the +// corresponding LookupTableImportV2Op if possible. +class InitTextFileToImportPass + : public mlir::PassWrapper { + public: + explicit InitTextFileToImportPass() {} + + private: + void runOnFunction() override; +}; + +class ConvertInitializeTableFromTextFileV2 + : public OpRewritePattern { + public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(InitializeTableFromTextFileV2Op op, + PatternRewriter& rewriter) const override { + // Now, this pattern matching only supports the following case, which is + // commonly used among inference use cases: + // + // tf.lookup.TextFileInitializer( + // "test.txt", tf.string, tf.lookup.TextFileIndex.WHOLE_LINE, + // tf.int64, tf.lookup.TextFileIndex.LINE_NUMBER, delimiter=" ") + // + // In the above case, the delimiter will be not used since the key is just a + // whole line and value is a line number. + if (op.key_index() != kTextFileIndex_WholeLine || + op.value_index() != kTextFileIndex_LineNumber || + op.vocab_size() != -1) { + return failure(); + } + + // Try to find filename from constant op. + DenseStringElementsAttr filename_attr; + if (!matchPattern(op.filename().getDefiningOp(), + m_Constant(&filename_attr))) { + return failure(); + } + StringRef filename = filename_attr.getRawStringData()[0]; + + // Read the content of the file. + std::string error_message; + auto file = openInputFile(filename, &error_message); + if (!file) { + return op.emitOpError("failed to open vocabulary file") + << " (" << filename.str() << "): " << error_message; + } + + // Splits into lines. + SmallVector lines; + file->getBuffer().split(lines, "\n", -1, false); + + // Map each line to line number, starting from zero. + SmallVector line_nums; + line_nums.resize(lines.size()); + std::iota(line_nums.begin(), line_nums.end(), 0); + + // Create constant ops for keys an values. + Value key_constant_tensor = rewriter.create( + op.getLoc(), + DenseStringElementsAttr::get( + RankedTensorType::get(static_cast(lines.size()), + StringType::get(rewriter.getContext())), + lines)); + + Value value_constant_tensor = rewriter.create( + op.getLoc(), rewriter.getI64TensorAttr(line_nums)); + + // Replace the given op with LookupTableImportV2Op. + rewriter.create(op.getLoc(), op.table_handle(), + key_constant_tensor, + value_constant_tensor); + rewriter.eraseOp(op); + return success(); + } +}; + +void InitTextFileToImportPass::runOnFunction() { + OwningRewritePatternList patterns; + MLIRContext* context = &getContext(); + FuncOp func = getFunction(); + + patterns.insert(context); + applyPatternsAndFoldGreedily(func, patterns); +} + +} // namespace + +// Replace InitializeTableFromTextFileV2Ops with LookupTableImportV2Ops. +std::unique_ptr> CreateInitTextFileToImportPass() { + return std::make_unique(); +} + +static PassRegistration pass( + "tf-init-text-file-to-import", + "convert InitializeTableFromTextFileV2 ops to LookupTableImportV2Op to " + "remove the dependency on asset files"); + +} // namespace TF +} // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/init_text_file_to_import_test_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/init_text_file_to_import_test_pass.cc new file mode 100644 index 00000000000..96a04fa6eeb --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/transforms/init_text_file_to_import_test_pass.cc @@ -0,0 +1,99 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "llvm/Support/Casting.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/ToolOutputFile.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/OperationSupport.h" // from @llvm-project +#include "mlir/IR/PatternMatch.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "mlir/Pass/PassManager.h" // from @llvm-project +#include "mlir/Support/FileUtilities.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" + +namespace mlir { +namespace TF { +namespace { + +// InitTextFileToImportTestPass generates a temporary file and run the +// InitTextFileToImportPass for testing purpose. +class InitTextFileToImportTestPass + : public mlir::PassWrapper> { + public: + explicit InitTextFileToImportTestPass() {} + + private: + void runOnOperation() override; +}; + +void InitTextFileToImportTestPass::runOnOperation() { + ModuleOp module = getOperation(); + + // Create a temporary vocab file. + int fd; + SmallString<256> filename; + std::error_code error_code = + llvm::sys::fs::createTemporaryFile("text", "vocab", fd, filename); + if (error_code) return signalPassFailure(); + + llvm::ToolOutputFile temp_file(filename, fd); + const char* dictionary_in_lines = + "apple\n" + "banana\n" + "grape"; + temp_file.os() << dictionary_in_lines; + temp_file.os().flush(); + + // Replace filename constant ops to use the temporary file. + MLIRContext* context = &getContext(); + + for (FuncOp func : module.getOps()) { + llvm::SmallVector constant_ops(func.getOps()); + for (auto op : constant_ops) { + ShapedType shaped_type = + RankedTensorType::get({1}, StringType::get(context)); + + DenseStringElementsAttr attr; + if (!matchPattern(op.getOperation(), m_Constant(&attr))) { + continue; + } + + ArrayRef values = attr.getRawStringData(); + if (values.size() != 1 || values[0] != "%FILE_PLACEHOLDER") { + continue; + } + + op.valueAttr(DenseStringElementsAttr::get(shaped_type, {filename})); + } + } + + // Run the lowering pass. + PassManager pm(context); + pm.addPass(CreateInitTextFileToImportPass()); + if (failed(pm.run(module))) return signalPassFailure(); +} + +} // namespace + +static PassRegistration pass( + "tf-init-text-file-to-import-test", + "generate a temporary file and invoke InitTextFileToImportPass"); + +} // namespace TF +} // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h index f5ca47d7455..a1f527f8846 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h +++ b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h @@ -161,6 +161,10 @@ std::unique_ptr> CreateFusedKernelMatcherPass(); // Creates function pass to select device index/fold tf.DeviceIndex. std::unique_ptr> CreateDeviceIndexSelectorPass(); + +// Creates function pass to replace InitializeTableFromTextFileV2Ops with +// LookupTableImportV2Op ops. +std::unique_ptr> CreateInitTextFileToImportPass(); } // namespace TF namespace tf_executor { From 55f7984d76aea4c577a49572c1ae510c4614b22f Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Thu, 23 Jul 2020 16:28:18 -0700 Subject: [PATCH 1224/2522] Migrate Linux GPU official release and nightly testing builds to use the new bazel configs. PiperOrigin-RevId: 322891131 Change-Id: Ifd8629cbae6c7a7c0e4401836f2610344bb3cc00 --- .../release/ubuntu_16/gpu_py35_full/pip_v1.sh | 19 ++++++++++++++--- .../release/ubuntu_16/gpu_py37_full/pip_v1.sh | 21 +++++++++++++++---- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip_v1.sh index 8f00a7a4ad9..a860decbe51 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip_v1.sh @@ -28,13 +28,26 @@ export CONTAINER_TYPE="GPU" export TF_PYTHON_VERSION='python3.5' # Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=1 +export TF_CUDA_VERSION=10 +export TF_CUDNN_VERSION=7 +export TF_NEED_TENSORRT=1 +export TENSORRT_INSTALL_PATH=/usr/local/tensorrt +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) -yes "" | "$PYTHON_BIN_PATH" configure.py +export PROJECT_NAME="tensorflow_gpu" +export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 +yes "" | "$PYTHON_BIN_PATH" configure.py # Export optional variables for running pip.sh export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py35' -export TF_BUILD_FLAGS="--config=release_gpu_linux " +export TF_BUILD_FLAGS="--config=opt --config=cuda --distinct_host_configuration=false \ +--action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain " export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filters=${TF_TEST_FILTER_TAGS} \ --distinct_host_configuration=false \ --action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION \ @@ -44,7 +57,7 @@ export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filt export TF_TEST_TARGETS="//tensorflow/python/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" #export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. -export TF_PROJECT_NAME="tensorflow_gpu" +export TF_PROJECT_NAME=${PROJECT_NAME} export TF_PIP_TEST_ROOT="pip_test" # To build both tensorflow and tensorflow-gpu pip packages diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip_v1.sh index f3bebb56243..f6128448b99 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip_v1.sh @@ -28,23 +28,36 @@ export CONTAINER_TYPE="GPU" export TF_PYTHON_VERSION='python3.7' # Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=1 +export TF_CUDA_VERSION=10 +export TF_CUDNN_VERSION=7 +export TF_NEED_TENSORRT=1 +export TENSORRT_INSTALL_PATH=/usr/local/tensorrt +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) -yes "" | "$PYTHON_BIN_PATH" configure.py +export PROJECT_NAME="tensorflow_gpu" +export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 +yes "" | "$PYTHON_BIN_PATH" configure.py # Export optional variables for running pip.sh export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py37' -export TF_BUILD_FLAGS="--config=release_gpu_linux " +export TF_BUILD_FLAGS="--config=opt --config=cuda --distinct_host_configuration=false \ +--action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain " export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filters=${TF_TEST_FILTER_TAGS} \ --distinct_host_configuration=false \ ---action_env=TF_CUDA_VERSION=10 --action_env=TF_CUDNN_VERSION=7 \ +--action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION \ --config=cuda --test_output=errors --local_test_jobs=4 --test_lang_filters=py \ --verbose_failures=true --keep_going --define=no_tensorflow_py_deps=true \ --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute " export TF_TEST_TARGETS="//tensorflow/python/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" #export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. -export TF_PROJECT_NAME=="tensorflow_gpu" +export TF_PROJECT_NAME=${PROJECT_NAME} export TF_PIP_TEST_ROOT="pip_test" # To build both tensorflow and tensorflow-gpu pip packages From 2b83dd25372bcf981e7bcef13726be3ffa672883 Mon Sep 17 00:00:00 2001 From: Jaesung Chung Date: Thu, 23 Jul 2020 16:29:56 -0700 Subject: [PATCH 1225/2522] Rollback of BroadcastTo op additions (part 3) Rolling back until discussion about builtin ops schema issue is discussed. PiperOrigin-RevId: 322891511 Change-Id: Idf64195f89855cf6201cef9d5fd0ec24db403ec1 --- RELEASE.md | 3 +- tensorflow/lite/builtin_ops.h | 1 - tensorflow/lite/c/common.c | 23 -- tensorflow/lite/c/common.h | 3 - .../lite/core/api/flatbuffer_conversions.cc | 1 - tensorflow/lite/kernels/BUILD | 14 - tensorflow/lite/kernels/broadcast_to.cc | 136 ---------- tensorflow/lite/kernels/broadcast_to_test.cc | 255 ------------------ tensorflow/lite/kernels/builtin_op_kernels.h | 1 - tensorflow/lite/kernels/internal/BUILD | 1 - tensorflow/lite/kernels/internal/common.h | 7 - .../kernels/internal/reference/broadcast_to.h | 90 ------- tensorflow/lite/kernels/register.cc | 1 - tensorflow/lite/kernels/register_ref.cc | 2 - tensorflow/lite/schema/schema.fbs | 9 +- tensorflow/lite/schema/schema_generated.h | 134 +-------- tensorflow/lite/toco/model.h | 1 - tensorflow/lite/toco/tflite/op_version.cc | 1 - .../benchmark/experimental/c/c_api_types.h | 3 - .../lite/tools/versioning/runtime_version.cc | 1 - .../tools/versioning/runtime_version_test.cc | 2 +- 21 files changed, 15 insertions(+), 674 deletions(-) delete mode 100644 tensorflow/lite/kernels/broadcast_to.cc delete mode 100644 tensorflow/lite/kernels/broadcast_to_test.cc delete mode 100644 tensorflow/lite/kernels/internal/reference/broadcast_to.h diff --git a/RELEASE.md b/RELEASE.md index a4f3643a73d..00c056b1f67 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -64,8 +64,7 @@ True, the function may use type annotations to optimize the tracing performance. * `tf.lite`: - * Better support for ops with high-dimensional broadcasting inputs by adding - `BroadcastTo` ops when necessary. + * * `tf.random`: * * Math and Linear Algebra: diff --git a/tensorflow/lite/builtin_ops.h b/tensorflow/lite/builtin_ops.h index c6440729738..85140289ac1 100644 --- a/tensorflow/lite/builtin_ops.h +++ b/tensorflow/lite/builtin_ops.h @@ -153,7 +153,6 @@ typedef enum { kTfLiteBuiltinDensify = 124, kTfLiteBuiltinSegmentSum = 125, kTfLiteBuiltinBatchMatmul = 126, - kTfLiteBuiltinBroadcastTo = 127, } TfLiteBuiltinOperator; #ifdef __cplusplus diff --git a/tensorflow/lite/c/common.c b/tensorflow/lite/c/common.c index 4bbbcbbf03c..0264f420b12 100644 --- a/tensorflow/lite/c/common.c +++ b/tensorflow/lite/c/common.c @@ -219,29 +219,6 @@ const char* TfLiteTypeGetName(TfLiteType type) { return "Unknown type"; } -// Size of string is not constant, return 0 in such case. -int TfLiteTypeGetSize(TfLiteType type) { - switch (type) { - case kTfLiteUInt8: - case kTfLiteInt8: - return 1; - case kTfLiteBool: - return sizeof(bool); - case kTfLiteInt16: - case kTfLiteFloat16: - return 2; - case kTfLiteFloat32: - case kTfLiteInt32: - return 4; - case kTfLiteInt64: - case kTfLiteComplex64: - case kTfLiteFloat64: - return 8; - default: - return 0; - } -} - TfLiteDelegate TfLiteDelegateCreate() { TfLiteDelegate d = { .data_ = NULL, diff --git a/tensorflow/lite/c/common.h b/tensorflow/lite/c/common.h index 615b5fbaa45..3398d178561 100644 --- a/tensorflow/lite/c/common.h +++ b/tensorflow/lite/c/common.h @@ -284,9 +284,6 @@ typedef enum { // Return the name of a given type, for error reporting purposes. const char* TfLiteTypeGetName(TfLiteType type); -// Return the size of given type in bytes. Return 0 in in case of string. -int TfLiteTypeGetSize(TfLiteType type); - // SupportedQuantizationTypes. typedef enum TfLiteQuantizationType { // No quantization. diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.cc b/tensorflow/lite/core/api/flatbuffer_conversions.cc index 059ad97f551..0652c64f6c2 100644 --- a/tensorflow/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/lite/core/api/flatbuffer_conversions.cc @@ -820,7 +820,6 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_SCATTER_ND: case BuiltinOperator_DENSIFY: case BuiltinOperator_SEGMENT_SUM: - case BuiltinOperator_BROADCAST_TO: return kTfLiteOk; } return kTfLiteError; diff --git a/tensorflow/lite/kernels/BUILD b/tensorflow/lite/kernels/BUILD index 3157081dd21..4351a2c93a2 100644 --- a/tensorflow/lite/kernels/BUILD +++ b/tensorflow/lite/kernels/BUILD @@ -491,7 +491,6 @@ BUILTIN_KERNEL_SRCS = [ "batch_to_space_nd.cc", "bidirectional_sequence_lstm.cc", "bidirectional_sequence_rnn.cc", - "broadcast_to.cc", "cast.cc", "ceil.cc", "comparisons.cc", @@ -985,19 +984,6 @@ cc_test( ], ) -cc_test( - name = "broadcast_to_test", - size = "small", - srcs = ["broadcast_to_test.cc"], - deps = [ - ":builtin_ops", - ":test_main", - ":test_util", - "//tensorflow/lite:framework", - "@com_google_googletest//:gtest", - ], -) - cc_test( name = "cast_test", size = "small", diff --git a/tensorflow/lite/kernels/broadcast_to.cc b/tensorflow/lite/kernels/broadcast_to.cc deleted file mode 100644 index 0e7baca2277..00000000000 --- a/tensorflow/lite/kernels/broadcast_to.cc +++ /dev/null @@ -1,136 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/kernels/internal/reference/broadcast_to.h" - -#include - -#include -#include - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/tensor.h" -#include "tensorflow/lite/kernels/kernel_util.h" - -namespace tflite { -namespace ops { -namespace builtin { -namespace broadcastto { - -constexpr int kInputTensor = 0; -constexpr int kShapeTensor = 1; -constexpr int kOutputTensor = 0; -constexpr int kMaxDims = 8; - -struct BroadcastToContext { - BroadcastToContext(TfLiteContext* context, TfLiteNode* node) { - input = GetInput(context, node, kInputTensor); - shape = GetInput(context, node, kShapeTensor); - output = GetOutput(context, node, kOutputTensor); - } - const TfLiteTensor* input; - const TfLiteTensor* shape; - TfLiteTensor* output; -}; - -TfLiteStatus ResizeOutputTensor(TfLiteContext* context, - BroadcastToContext* op_context) { - // Ensures the shape is 1D tensor. - TF_LITE_ENSURE_EQ(context, NumDimensions(op_context->shape), 1); - - // Ensure output dims is not less than input dims. - int input_num_dims = NumDimensions(op_context->input); - int output_num_dims = SizeOfDimension(op_context->shape, 0); - TF_LITE_ENSURE_MSG(context, input_num_dims <= output_num_dims, - "Output shape must be broadcastable from input shape."); - TF_LITE_ENSURE_MSG(context, output_num_dims <= kMaxDims, - "BroadcastTo only supports 1-8D tensor."); - - // Check if output shape is broadcastable from input shape. - auto get_shape_data = [op_context](int i) -> int32_t { - if (op_context->shape->type == kTfLiteInt32) { - return GetTensorData(op_context->shape)[i]; - } else { - return GetTensorData(op_context->shape)[i]; - } - }; - - int extending_dims = output_num_dims - input_num_dims; - for (int idx = 0; idx < input_num_dims; ++idx) { - TF_LITE_ENSURE_MSG(context, - (SizeOfDimension(op_context->input, idx) == 1 || - SizeOfDimension(op_context->input, idx) == - get_shape_data(extending_dims + idx)), - "Output shape must be broadcastable from input shape."); - } - // Resizing the shape of the output tensor. - TfLiteIntArray* output_shape = TfLiteIntArrayCreate(output_num_dims); - std::unique_ptr - scoped_output_shape(output_shape, TfLiteIntArrayFree); - for (int idx = 0; idx < output_num_dims; ++idx) { - output_shape->data[idx] = get_shape_data(idx); - } - - return context->ResizeTensor(context, op_context->output, - scoped_output_shape.release()); -} - -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - TF_LITE_ENSURE(context, NumInputs(node) == 2); - TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); - TF_LITE_ENSURE_MSG(context, - (NumDimensions(GetInput(context, node, 0)) <= kMaxDims), - "BroadcastTo only supports 1-8D tensor."); - - BroadcastToContext op_context(context, node); - TF_LITE_ENSURE(context, op_context.shape->type == kTfLiteInt32 || - op_context.shape->type == kTfLiteInt64); - TF_LITE_ENSURE_EQ(context, op_context.input->type, op_context.output->type); - - // Not yet support string type due to the use of memcopy with fixed size. - TF_LITE_ENSURE(context, op_context.input->type != kTfLiteString); - - if (IsConstantTensor(op_context.shape)) { - return ResizeOutputTensor(context, &op_context); - } - - SetTensorToDynamic(op_context.output); - return kTfLiteOk; -} - -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - BroadcastToContext op_context(context, node); - if (IsDynamicTensor(op_context.output)) { - TF_LITE_ENSURE_OK(context, ResizeOutputTensor(context, &op_context)); - } - - // BroadcastTo op support upto 8 dims, matching the support of Tensorflow. - reference_ops::BroadcastTo( - GetTensorShape(op_context.input), op_context.input->data.raw, - GetTensorShape(op_context.output), op_context.output->data.raw, - op_context.input->type); - return kTfLiteOk; -} - -} // namespace broadcastto - -TfLiteRegistration* Register_BROADCAST_TO() { - static TfLiteRegistration r = {nullptr, nullptr, broadcastto::Prepare, - broadcastto::Eval}; - return &r; -} - -} // namespace builtin -} // namespace ops -} // namespace tflite diff --git a/tensorflow/lite/kernels/broadcast_to_test.cc b/tensorflow/lite/kernels/broadcast_to_test.cc deleted file mode 100644 index a36ed352055..00000000000 --- a/tensorflow/lite/kernels/broadcast_to_test.cc +++ /dev/null @@ -1,255 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include -#include - -#include -#include "tensorflow/lite/interpreter.h" -#include "tensorflow/lite/kernels/register.h" -#include "tensorflow/lite/kernels/test_util.h" -#include "tensorflow/lite/model.h" - -namespace tflite { -namespace { -using ::testing::ElementsAreArray; - -template -class BroadcastToOpModel : public SingleOpModel { - public: - // BroadcastTo with dynamic shape. - BroadcastToOpModel(std::initializer_list input_shape, - std::initializer_list shape_shape) { - input_ = AddInput({GetTensorType(), input_shape}); - shape_ = AddInput({GetTensorType(), shape_shape}); - output_ = AddOutput(GetTensorType()); - SetBuiltinOp(BuiltinOperator_BROADCAST_TO, - BuiltinOptions_BroadcastToOptions, - CreateBroadcastToOptions(builder_).Union()); - BuildInterpreter({input_shape, shape_shape}); - } - - // BroadcastTo with const shape. - BroadcastToOpModel(std::initializer_list input_shape, - std::initializer_list shape_shape, - std::initializer_list shape_values) { - input_ = AddInput({GetTensorType(), input_shape}); - shape_ = - AddConstInput(GetTensorType(), shape_values, shape_shape); - output_ = AddOutput(GetTensorType()); - SetBuiltinOp(BuiltinOperator_BROADCAST_TO, - BuiltinOptions_BroadcastToOptions, - CreateBroadcastToOptions(builder_).Union()); - BuildInterpreter({input_shape, shape_shape}); - } - - void SetInput(std::initializer_list data) { - PopulateTensor(input_, data); - } - - void SetShape(std::initializer_list data) { - PopulateTensor(shape_, data); - } - - std::vector GetOutput() { - return ExtractVector(output_); - } - std::vector GetOutputShape() { return GetTensorShape(output_); } - - protected: - int input_; - int shape_; - int output_; -}; - -template -class BroadcastToOpTest : public ::testing::Test {}; - -using DataTypes = ::testing::Types; -TYPED_TEST_SUITE(BroadcastToOpTest, DataTypes); - -#ifdef GTEST_HAS_DEATH_TEST -TYPED_TEST(BroadcastToOpTest, ShapeMustBe1D) { - EXPECT_DEATH( - BroadcastToOpModel({2, 3, 4, 4}, {2, 2}, {2, 3, 4, 4}), ""); - // Non-constant Shape tensor. - BroadcastToOpModel m({2, 3, 4, 4}, {2, 2}); - m.SetShape({2, 3, 4, 4}); - EXPECT_THAT(m.InvokeUnchecked(), kTfLiteError); -} - -TYPED_TEST(BroadcastToOpTest, TooManyDimensions) { - EXPECT_DEATH(BroadcastToOpModel({1, 2, 3, 4, 5, 6, 7, 8, 9}, {9}, - {2, 2, 3, 4, 5, 6, 7, 8, 9}), - "BroadcastTo only supports 1-8D tensor."); - EXPECT_DEATH(BroadcastToOpModel({1, 2, 3, 4, 5, 6, 7, 8, 9}, {9}), - "BroadcastTo only supports 1-8D tensor."); -} - -TYPED_TEST(BroadcastToOpTest, MismatchDimension) { - EXPECT_DEATH(BroadcastToOpModel({2, 4, 1, 2}, {4}, {2, 4, 1, 3}), - "Output shape must be broadcastable from input shape."); - EXPECT_DEATH( - BroadcastToOpModel({2, 4, 1, 2, 3}, {4}, {2, 4, 1, 2}), - "Output shape must be broadcastable from input shape."); - - // Non-constant Shape tensor. - BroadcastToOpModel m1({2, 4, 1, 2}, {4}); - m1.SetShape({2, 3, 4, 4}); - EXPECT_THAT(m1.InvokeUnchecked(), kTfLiteError); - BroadcastToOpModel m2({2, 4, 1, 2}, {5}); - m2.SetShape({1, 2, 3, 4, 4}); - EXPECT_THAT(m2.InvokeUnchecked(), kTfLiteError); -} -#endif - -TYPED_TEST(BroadcastToOpTest, BroadcastTo1DConstTest) { - BroadcastToOpModel m({1}, {1}, {4}); - m.SetInput({3}); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({4})); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 3, 3, 3})); -} - -TYPED_TEST(BroadcastToOpTest, BroadcastTo4DConstTest) { - BroadcastToOpModel m({1, 1, 1, 2}, {4}, {1, 1, 2, 2}); - m.SetInput({3, 4}); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 2, 2})); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 4, 3, 4})); -} - -TYPED_TEST(BroadcastToOpTest, BroadcastTo8DConstTest) { - BroadcastToOpModel m({1, 1, 1, 1, 1, 1, 2, 1}, {8}, - {1, 1, 1, 1, 1, 1, 2, 2}); - m.SetInput({3, 4}); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 1, 1, 1, 1, 2, 2})); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 3, 4, 4})); -} - -TYPED_TEST(BroadcastToOpTest, BroadcastTo1DDynamicTest) { - BroadcastToOpModel m({1}, {1}); - m.SetInput({3}); - m.SetShape({4}); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({4})); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 3, 3, 3})); -} - -TYPED_TEST(BroadcastToOpTest, BroadcastTo4DDynamicTest) { - BroadcastToOpModel m({1, 1, 1, 2}, {4}); - m.SetInput({3, 4}); - m.SetShape({1, 1, 2, 2}); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 2, 2})); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 4, 3, 4})); -} - -TYPED_TEST(BroadcastToOpTest, BroadcastTo8DDynamicTest) { - BroadcastToOpModel m({1, 1, 1, 1, 1, 1, 2, 1}, {8}); - m.SetInput({3, 4}); - m.SetShape({1, 1, 1, 1, 1, 1, 2, 2}); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 1, 1, 1, 1, 2, 2})); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 3, 4, 4})); -} - -TYPED_TEST(BroadcastToOpTest, ComplexBroadcast4DConstTest) { - BroadcastToOpModel m({1, 3, 1, 2}, {4}, {3, 3, 2, 2}); - m.SetInput({1, 2, 3, 4, 5, 6}); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 3, 2, 2})); - EXPECT_THAT( - m.GetOutput(), - ElementsAreArray({1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6, 1, 2, 1, 2, 3, 4, - 3, 4, 5, 6, 5, 6, 1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6})); -} - -TYPED_TEST(BroadcastToOpTest, ComplexBroadcast4DDynamicTest) { - BroadcastToOpModel m({1, 3, 1, 2}, {4}); - m.SetInput({1, 2, 3, 4, 5, 6}); - m.SetShape({3, 3, 2, 2}); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 3, 2, 2})); - EXPECT_THAT( - m.GetOutput(), - ElementsAreArray({1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6, 1, 2, 1, 2, 3, 4, - 3, 4, 5, 6, 5, 6, 1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6})); -} - -TYPED_TEST(BroadcastToOpTest, ComplexBroadcast6DConstTest) { - BroadcastToOpModel m({1, 2, 1, 3, 1, 2}, {6}, {2, 2, 1, 3, 2, 2}); - m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 2, 1, 3, 2, 2})); - EXPECT_THAT(m.GetOutput(), - ElementsAreArray({1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6, - 7, 8, 7, 8, 9, 10, 9, 10, 11, 12, 11, 12, - 1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6, - 7, 8, 7, 8, 9, 10, 9, 10, 11, 12, 11, 12})); -} - -TYPED_TEST(BroadcastToOpTest, ComplexBroadcast6DDynamicTest) { - BroadcastToOpModel m({1, 2, 1, 3, 1, 2}, {6}); - m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); - m.SetShape({2, 2, 1, 3, 2, 2}); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 2, 1, 3, 2, 2})); - EXPECT_THAT(m.GetOutput(), - ElementsAreArray({1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6, - 7, 8, 7, 8, 9, 10, 9, 10, 11, 12, 11, 12, - 1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6, - 7, 8, 7, 8, 9, 10, 9, 10, 11, 12, 11, 12})); -} - -TYPED_TEST(BroadcastToOpTest, ExtendingShape4DConstTest) { - BroadcastToOpModel m({3, 1, 2}, {4}, {3, 3, 2, 2}); - m.SetInput({1, 2, 3, 4, 5, 6}); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 3, 2, 2})); - EXPECT_THAT( - m.GetOutput(), - ElementsAreArray({1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6, 1, 2, 1, 2, 3, 4, - 3, 4, 5, 6, 5, 6, 1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6})); -} - -TYPED_TEST(BroadcastToOpTest, NoBroadcastingConstTest) { - BroadcastToOpModel m({3, 1, 2}, {3}, {3, 1, 2}); - m.SetInput({1, 2, 3, 4, 5, 6}); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1, 2})); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 2, 3, 4, 5, 6})); -} - -TYPED_TEST(BroadcastToOpTest, Int64ShapeConstTest) { - BroadcastToOpModel m({1, 1, 1, 1, 1, 1, 2, 1}, {8}, - {1, 1, 1, 1, 1, 1, 2, 2}); - m.SetInput({3, 4}); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 1, 1, 1, 1, 2, 2})); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 3, 4, 4})); -} - -TYPED_TEST(BroadcastToOpTest, Int64ShapeDDynamicTest) { - BroadcastToOpModel m({1, 1, 1, 1, 1, 1, 2, 1}, {8}); - m.SetInput({3, 4}); - m.SetShape({1, 1, 1, 1, 1, 1, 2, 2}); - m.Invoke(); - EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 1, 1, 1, 1, 1, 2, 2})); - EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 3, 4, 4})); -} - -} // namespace -} // namespace tflite diff --git a/tensorflow/lite/kernels/builtin_op_kernels.h b/tensorflow/lite/kernels/builtin_op_kernels.h index fea25f8605c..1c73f06487b 100644 --- a/tensorflow/lite/kernels/builtin_op_kernels.h +++ b/tensorflow/lite/kernels/builtin_op_kernels.h @@ -39,7 +39,6 @@ TfLiteRegistration* Register_BATCH_TO_SPACE_ND(); TfLiteRegistration* Register_BATCH_MATMUL(); TfLiteRegistration* Register_BIDIRECTIONAL_SEQUENCE_LSTM(); TfLiteRegistration* Register_BIDIRECTIONAL_SEQUENCE_RNN(); -TfLiteRegistration* Register_BROADCAST_TO(); TfLiteRegistration* Register_CAST(); TfLiteRegistration* Register_CEIL(); TfLiteRegistration* Register_CONCATENATION(); diff --git a/tensorflow/lite/kernels/internal/BUILD b/tensorflow/lite/kernels/internal/BUILD index 075c1da9865..5acabeb45cd 100644 --- a/tensorflow/lite/kernels/internal/BUILD +++ b/tensorflow/lite/kernels/internal/BUILD @@ -441,7 +441,6 @@ cc_library( "reference/arg_min_max.h", "reference/batch_matmul.h", "reference/binary_function.h", - "reference/broadcast_to.h", "reference/ceil.h", "reference/comparisons.h", "reference/concatenation.h", diff --git a/tensorflow/lite/kernels/internal/common.h b/tensorflow/lite/kernels/internal/common.h index 10cb164e696..c45aff9e47b 100644 --- a/tensorflow/lite/kernels/internal/common.h +++ b/tensorflow/lite/kernels/internal/common.h @@ -665,13 +665,6 @@ inline int SubscriptToIndex(const NdArrayDesc<5>& desc, int indexes[5]) { indexes[4] * desc.strides[4]; } -inline int SubscriptToIndex(const NdArrayDesc<8>& desc, int indexes[8]) { - return indexes[0] * desc.strides[0] + indexes[1] * desc.strides[1] + - indexes[2] * desc.strides[2] + indexes[3] * desc.strides[3] + - indexes[4] * desc.strides[4] + indexes[5] * desc.strides[5] + - indexes[6] * desc.strides[6] + indexes[7] * desc.strides[7]; -} - // Given the dimensions of the operands for an element-wise binary broadcast, // adjusts them so that they can be directly iterated over with simple loops. // Returns the adjusted dims as instances of NdArrayDesc in 'desc0_out' and diff --git a/tensorflow/lite/kernels/internal/reference/broadcast_to.h b/tensorflow/lite/kernels/internal/reference/broadcast_to.h deleted file mode 100644 index 69f4531ba14..00000000000 --- a/tensorflow/lite/kernels/internal/reference/broadcast_to.h +++ /dev/null @@ -1,90 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_ -#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_ - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/common.h" - -namespace tflite { -namespace reference_ops { -template -void BroadcastImpl(const NdArrayDesc& input_desc, const char* input_data, - const NdArrayDesc& output_desc, char* output_data, - int indexes[N], int dim, const int last_broadcasting_dim, - const int type_size) { - // Copy data from input to output. - if (dim == last_broadcasting_dim) { - int copy_size = output_desc.strides[dim] * type_size; - const char* data_src = - input_data + SubscriptToIndex(input_desc, indexes) * type_size; - char* data_dst = - output_data + SubscriptToIndex(output_desc, indexes) * type_size; - for (int i = 0; i < output_desc.extents[dim]; ++i, data_dst += copy_size) { - memcpy(data_dst, data_src, copy_size); - } - return; - } - - // Recursive call to find the next broadcasting. - for (indexes[dim] = 0; indexes[dim] < input_desc.extents[dim]; - ++indexes[dim]) { - BroadcastImpl(input_desc, input_data, output_desc, output_data, indexes, - dim + 1, last_broadcasting_dim, type_size); - } - - // Duplicate data in output tensor. - indexes[dim] = 0; - if (input_desc.extents[dim] != output_desc.extents[dim]) { - int copy_size = output_desc.strides[dim] * type_size; - char* data_src = - output_data + SubscriptToIndex(output_desc, indexes) * type_size; - char* data_dst = data_src + copy_size; - for (int i = 1; i < output_desc.extents[dim]; ++i, data_dst += copy_size) { - memcpy(data_dst, data_src, copy_size); - } - } -} - -template -inline void BroadcastTo(const RuntimeShape& unextended_input_shape, - const char* input_data, - const RuntimeShape& unextended_output_shape, - char* output_data, TfLiteType data_type) { - NdArrayDesc input_desc; - NdArrayDesc output_desc; - CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_input_shape), - &input_desc); - CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape), - &output_desc); - - // Get the last dimension has broadcasting. At this dimension, the data is - // copied from input tensor to output tensor. - int last_broadcast_dim = 0; - for (int i = N - 1; i > 0; --i) { - if (input_desc.extents[i] != output_desc.extents[i]) { - last_broadcast_dim = i; - break; - } - } - - // Broadcasting using memcpy. - int indexes[N] = {0}; - BroadcastImpl(input_desc, input_data, output_desc, output_data, indexes, 0, - last_broadcast_dim, TfLiteTypeGetSize(data_type)); -} -} // namespace reference_ops -} // namespace tflite -#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_TO_H_ diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc index da7480d448d..275340ec225 100644 --- a/tensorflow/lite/kernels/register.cc +++ b/tensorflow/lite/kernels/register.cc @@ -292,7 +292,6 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_BATCH_MATMUL, Register_BATCH_MATMUL(), /* min_version = */ 1, /* max_version = */ 2); - AddBuiltin(BuiltinOperator_BROADCAST_TO, Register_BROADCAST_TO()); AddCustom("NumericVerify", tflite::ops::custom::Register_NUMERIC_VERIFY()); // TODO(andrewharp, ahentz): Move these somewhere more appropriate so that // custom ops aren't always included by default. diff --git a/tensorflow/lite/kernels/register_ref.cc b/tensorflow/lite/kernels/register_ref.cc index b1dd31ab168..233520e2165 100644 --- a/tensorflow/lite/kernels/register_ref.cc +++ b/tensorflow/lite/kernels/register_ref.cc @@ -139,7 +139,6 @@ TfLiteRegistration* Register_DEPTH_TO_SPACE_REF(); TfLiteRegistration* Register_SELECT_V2(); TfLiteRegistration* Register_SEGMENT_SUM(); TfLiteRegistration* Register_BATCH_MATMUL_REF(); -TfLiteRegistration* Register_BROADCAST_TO(); namespace { @@ -208,7 +207,6 @@ BuiltinRefOpResolver::BuiltinRefOpResolver() { Register_SPACE_TO_BATCH_ND_REF()); AddBuiltin(BuiltinOperator_BATCH_TO_SPACE_ND, Register_BATCH_TO_SPACE_ND_REF()); - AddBuiltin(BuiltinOperator_BROADCAST_TO, Register_BROADCAST_TO()); AddBuiltin(BuiltinOperator_MUL, Register_MUL_REF()); AddBuiltin(BuiltinOperator_L2_NORMALIZATION, Register_L2NORM_REF()); AddBuiltin(BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, diff --git a/tensorflow/lite/schema/schema.fbs b/tensorflow/lite/schema/schema.fbs index 949d769b457..878acde1e16 100644 --- a/tensorflow/lite/schema/schema.fbs +++ b/tensorflow/lite/schema/schema.fbs @@ -349,8 +349,7 @@ enum BuiltinOperator : byte { SELECT_V2 = 123, DENSIFY = 124, SEGMENT_SUM = 125, - BATCH_MATMUL = 126, - BROADCAST_TO = 127 + BATCH_MATMUL = 126 } @@ -456,8 +455,7 @@ union BuiltinOptions { SelectV2Options, DensifyOptions, SegmentSumOptions, - BatchMatMulOptions, - BroadcastToOptions + BatchMatMulOptions } enum Padding : byte { SAME, VALID } @@ -977,9 +975,6 @@ table BatchMatMulOptions { adj_y:bool; } -table BroadcastToOptions { -} - // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a // builtin, or a string if the operator is custom. table OperatorCode { diff --git a/tensorflow/lite/schema/schema_generated.h b/tensorflow/lite/schema/schema_generated.h index 7bf79e52e27..a6117dc72ab 100755 --- a/tensorflow/lite/schema/schema_generated.h +++ b/tensorflow/lite/schema/schema_generated.h @@ -349,9 +349,6 @@ struct SegmentSumOptionsT; struct BatchMatMulOptions; struct BatchMatMulOptionsT; -struct BroadcastToOptions; -struct BroadcastToOptionsT; - struct OperatorCode; struct OperatorCodeT; @@ -784,12 +781,11 @@ enum BuiltinOperator { BuiltinOperator_DENSIFY = 124, BuiltinOperator_SEGMENT_SUM = 125, BuiltinOperator_BATCH_MATMUL = 126, - BuiltinOperator_BROADCAST_TO = 127, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_BROADCAST_TO + BuiltinOperator_MAX = BuiltinOperator_BATCH_MATMUL }; -inline const BuiltinOperator (&EnumValuesBuiltinOperator())[128] { +inline const BuiltinOperator (&EnumValuesBuiltinOperator())[127] { static const BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -917,14 +913,13 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[128] { BuiltinOperator_SELECT_V2, BuiltinOperator_DENSIFY, BuiltinOperator_SEGMENT_SUM, - BuiltinOperator_BATCH_MATMUL, - BuiltinOperator_BROADCAST_TO + BuiltinOperator_BATCH_MATMUL }; return values; } inline const char * const *EnumNamesBuiltinOperator() { - static const char * const names[129] = { + static const char * const names[128] = { "ADD", "AVERAGE_POOL_2D", "CONCATENATION", @@ -1052,14 +1047,13 @@ inline const char * const *EnumNamesBuiltinOperator() { "DENSIFY", "SEGMENT_SUM", "BATCH_MATMUL", - "BROADCAST_TO", nullptr }; return names; } inline const char *EnumNameBuiltinOperator(BuiltinOperator e) { - if (flatbuffers::IsOutRange(e, BuiltinOperator_ADD, BuiltinOperator_BROADCAST_TO)) return ""; + if (flatbuffers::IsOutRange(e, BuiltinOperator_ADD, BuiltinOperator_BATCH_MATMUL)) return ""; const size_t index = static_cast(e); return EnumNamesBuiltinOperator()[index]; } @@ -1167,12 +1161,11 @@ enum BuiltinOptions { BuiltinOptions_DensifyOptions = 99, BuiltinOptions_SegmentSumOptions = 100, BuiltinOptions_BatchMatMulOptions = 101, - BuiltinOptions_BroadcastToOptions = 102, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_BroadcastToOptions + BuiltinOptions_MAX = BuiltinOptions_BatchMatMulOptions }; -inline const BuiltinOptions (&EnumValuesBuiltinOptions())[103] { +inline const BuiltinOptions (&EnumValuesBuiltinOptions())[102] { static const BuiltinOptions values[] = { BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -1275,14 +1268,13 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[103] { BuiltinOptions_SelectV2Options, BuiltinOptions_DensifyOptions, BuiltinOptions_SegmentSumOptions, - BuiltinOptions_BatchMatMulOptions, - BuiltinOptions_BroadcastToOptions + BuiltinOptions_BatchMatMulOptions }; return values; } inline const char * const *EnumNamesBuiltinOptions() { - static const char * const names[104] = { + static const char * const names[103] = { "NONE", "Conv2DOptions", "DepthwiseConv2DOptions", @@ -1385,14 +1377,13 @@ inline const char * const *EnumNamesBuiltinOptions() { "DensifyOptions", "SegmentSumOptions", "BatchMatMulOptions", - "BroadcastToOptions", nullptr }; return names; } inline const char *EnumNameBuiltinOptions(BuiltinOptions e) { - if (flatbuffers::IsOutRange(e, BuiltinOptions_NONE, BuiltinOptions_BroadcastToOptions)) return ""; + if (flatbuffers::IsOutRange(e, BuiltinOptions_NONE, BuiltinOptions_BatchMatMulOptions)) return ""; const size_t index = static_cast(e); return EnumNamesBuiltinOptions()[index]; } @@ -1805,10 +1796,6 @@ template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_BatchMatMulOptions; }; -template<> struct BuiltinOptionsTraits { - static const BuiltinOptions enum_value = BuiltinOptions_BroadcastToOptions; -}; - struct BuiltinOptionsUnion { BuiltinOptions type; void *value; @@ -2649,14 +2636,6 @@ struct BuiltinOptionsUnion { return type == BuiltinOptions_BatchMatMulOptions ? reinterpret_cast(value) : nullptr; } - tflite::BroadcastToOptionsT *AsBroadcastToOptions() { - return type == BuiltinOptions_BroadcastToOptions ? - reinterpret_cast(value) : nullptr; - } - const tflite::BroadcastToOptionsT *AsBroadcastToOptions() const { - return type == BuiltinOptions_BroadcastToOptions ? - reinterpret_cast(value) : nullptr; - } }; bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); @@ -9331,46 +9310,6 @@ inline flatbuffers::Offset CreateBatchMatMulOptions( flatbuffers::Offset CreateBatchMatMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const BatchMatMulOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); -struct BroadcastToOptionsT : public flatbuffers::NativeTable { - typedef BroadcastToOptions TableType; - BroadcastToOptionsT() { - } -}; - -struct BroadcastToOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef BroadcastToOptionsT NativeTableType; - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - verifier.EndTable(); - } - BroadcastToOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo(BroadcastToOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); -}; - -struct BroadcastToOptionsBuilder { - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - explicit BroadcastToOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - BroadcastToOptionsBuilder &operator=(const BroadcastToOptionsBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreateBroadcastToOptions( - flatbuffers::FlatBufferBuilder &_fbb) { - BroadcastToOptionsBuilder builder_(_fbb); - return builder_.Finish(); -} - -flatbuffers::Offset CreateBroadcastToOptions(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); - struct OperatorCodeT : public flatbuffers::NativeTable { typedef OperatorCode TableType; tflite::BuiltinOperator builtin_code; @@ -9810,9 +9749,6 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const tflite::BatchMatMulOptions *builtin_options_as_BatchMatMulOptions() const { return builtin_options_type() == tflite::BuiltinOptions_BatchMatMulOptions ? static_cast(builtin_options()) : nullptr; } - const tflite::BroadcastToOptions *builtin_options_as_BroadcastToOptions() const { - return builtin_options_type() == tflite::BuiltinOptions_BroadcastToOptions ? static_cast(builtin_options()) : nullptr; - } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); } @@ -10253,10 +10189,6 @@ template<> inline const tflite::BatchMatMulOptions *Operator::builtin_options_as return builtin_options_as_BatchMatMulOptions(); } -template<> inline const tflite::BroadcastToOptions *Operator::builtin_options_as() const { - return builtin_options_as_BroadcastToOptions(); -} - struct OperatorBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; @@ -13724,29 +13656,6 @@ inline flatbuffers::Offset CreateBatchMatMulOptions(flatbuff _adj_y); } -inline BroadcastToOptionsT *BroadcastToOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { - auto _o = new BroadcastToOptionsT(); - UnPackTo(_o, _resolver); - return _o; -} - -inline void BroadcastToOptions::UnPackTo(BroadcastToOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { - (void)_o; - (void)_resolver; -} - -inline flatbuffers::Offset BroadcastToOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { - return CreateBroadcastToOptions(_fbb, _o, _rehasher); -} - -inline flatbuffers::Offset CreateBroadcastToOptions(flatbuffers::FlatBufferBuilder &_fbb, const BroadcastToOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { - (void)_rehasher; - (void)_o; - struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BroadcastToOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; - return tflite::CreateBroadcastToOptions( - _fbb); -} - inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new OperatorCodeT(); UnPackTo(_o, _resolver); @@ -14556,10 +14465,6 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } - case BuiltinOptions_BroadcastToOptions: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } default: return true; } } @@ -14982,10 +14887,6 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } - case BuiltinOptions_BroadcastToOptions: { - auto ptr = reinterpret_cast(obj); - return ptr->UnPack(resolver); - } default: return nullptr; } } @@ -15396,10 +15297,6 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff auto ptr = reinterpret_cast(value); return CreateBatchMatMulOptions(_fbb, ptr, _rehasher).Union(); } - case BuiltinOptions_BroadcastToOptions: { - auto ptr = reinterpret_cast(value); - return CreateBroadcastToOptions(_fbb, ptr, _rehasher).Union(); - } default: return 0; } } @@ -15810,10 +15707,6 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL value = new tflite::BatchMatMulOptionsT(*reinterpret_cast(u.value)); break; } - case BuiltinOptions_BroadcastToOptions: { - value = new tflite::BroadcastToOptionsT(*reinterpret_cast(u.value)); - break; - } default: break; } @@ -16326,11 +16219,6 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } - case BuiltinOptions_BroadcastToOptions: { - auto ptr = reinterpret_cast(value); - delete ptr; - break; - } default: break; } value = nullptr; @@ -16394,4 +16282,4 @@ inline std::unique_ptr UnPackSizePrefixedModel( } // namespace tflite -#endif // FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_ \ No newline at end of file +#endif // FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_ diff --git a/tensorflow/lite/toco/model.h b/tensorflow/lite/toco/model.h index 2478ca6f4a3..b42fed6fbc1 100644 --- a/tensorflow/lite/toco/model.h +++ b/tensorflow/lite/toco/model.h @@ -43,7 +43,6 @@ enum class OperatorType : uint8 { kAveragePool, kBatchMatMul, kBatchNormalization, - kBroadcastTo, kCeil, kConv, kConcatenation, diff --git a/tensorflow/lite/toco/tflite/op_version.cc b/tensorflow/lite/toco/tflite/op_version.cc index 3793bb50c9f..b16f282bedd 100644 --- a/tensorflow/lite/toco/tflite/op_version.cc +++ b/tensorflow/lite/toco/tflite/op_version.cc @@ -63,7 +63,6 @@ std::string GetMinimumRuntimeVersionForModel(const Model& model) { {{OperatorType::kBatchToSpaceND, 1}, "1.6.0"}, {{OperatorType::kBatchToSpaceND, 2}, "1.14.0"}, {{OperatorType::kBatchMatMul, 1}, kPendingReleaseOpVersion}, - {{OperatorType::kBroadcastTo, 1}, kPendingReleaseOpVersion}, {{OperatorType::kCast, 1}, "1.5.0"}, {{OperatorType::kConcatenation, 1}, "1.5.0"}, {{OperatorType::kConcatenation, 2}, "1.14.0"}, diff --git a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h index 615b5fbaa45..3398d178561 100644 --- a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h +++ b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h @@ -284,9 +284,6 @@ typedef enum { // Return the name of a given type, for error reporting purposes. const char* TfLiteTypeGetName(TfLiteType type); -// Return the size of given type in bytes. Return 0 in in case of string. -int TfLiteTypeGetSize(TfLiteType type); - // SupportedQuantizationTypes. typedef enum TfLiteQuantizationType { // No quantization. diff --git a/tensorflow/lite/tools/versioning/runtime_version.cc b/tensorflow/lite/tools/versioning/runtime_version.cc index 8938c0b5d4b..c2e3f279a90 100644 --- a/tensorflow/lite/tools/versioning/runtime_version.cc +++ b/tensorflow/lite/tools/versioning/runtime_version.cc @@ -59,7 +59,6 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code, {{BuiltinOperator_AVERAGE_POOL_2D, 3}, "2.3.0"}, {{BuiltinOperator_BATCH_MATMUL, 1}, "2.3.0"}, {{BuiltinOperator_BATCH_MATMUL, 2}, "2.3.0"}, - {{BuiltinOperator_BROADCAST_TO, 1}, kPendingReleaseVersion}, {{BuiltinOperator_CONV_2D, 1}, "1.5.0"}, {{BuiltinOperator_CONV_2D, 2}, "1.14.0"}, {{BuiltinOperator_CONV_2D, 3}, "1.14.0"}, diff --git a/tensorflow/lite/tools/versioning/runtime_version_test.cc b/tensorflow/lite/tools/versioning/runtime_version_test.cc index df1ca46410c..c32de228cc3 100644 --- a/tensorflow/lite/tools/versioning/runtime_version_test.cc +++ b/tensorflow/lite/tools/versioning/runtime_version_test.cc @@ -47,7 +47,7 @@ TEST(OpVersionTest, OpversionMissing) { EXPECT_NE(runtime_version, "") << "Please add the version " << version << " of " << tflite::EnumNamesBuiltinOperator()[op_code] - << " to runtime_version.cc"; + << " runtime_version.cc"; } } } From 5c8e4c1b4797c92666e70e3c042d7696d1055111 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Thu, 23 Jul 2020 16:32:21 -0700 Subject: [PATCH 1226/2522] Migrate official release and nightly testing builds to use the new bazel configs. PiperOrigin-RevId: 322892038 Change-Id: Ied7a5487f7bb5d303531cdda691c59007b8048d3 --- .../ci_build/release/ubuntu_16/cpu_py35_full/pip_v1.sh | 7 ++++++- .../ci_build/release/ubuntu_16/cpu_py36_full/pip_v1.sh | 7 ++++++- .../ci_build/release/ubuntu_16/cpu_py37_full/pip_v1.sh | 7 ++++++- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/pip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/pip_v1.sh index 0cd81c50940..1e2665f4120 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/pip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/pip_v1.sh @@ -28,11 +28,16 @@ export CONTAINER_TYPE="CPU" export TF_PYTHON_VERSION='python3.5' # Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=release_cpu_linux" +export TF_BUILD_FLAGS="--config=opt --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going" export TF_TEST_TARGETS="//tensorflow/python/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/pip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/pip_v1.sh index 96f62c9b228..c4d78dc3fe5 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/pip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/pip_v1.sh @@ -28,11 +28,16 @@ export CONTAINER_TYPE="CPU" export TF_PYTHON_VERSION='python3.6' # Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=release_cpu_linux" +export TF_BUILD_FLAGS="--config=opt --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going" export TF_TEST_TARGETS="//tensorflow/python/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/pip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/pip_v1.sh index 19aa6175302..2208327388f 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/pip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/pip_v1.sh @@ -28,11 +28,16 @@ export CONTAINER_TYPE="CPU" export TF_PYTHON_VERSION='python3.7' # Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=release_cpu_linux" +export TF_BUILD_FLAGS="--config=opt --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going" export TF_TEST_TARGETS="//tensorflow/python/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" From 013883e8218df68884e74f34e340fc1c94457327 Mon Sep 17 00:00:00 2001 From: Peng Wang Date: Thu, 23 Jul 2020 16:34:10 -0700 Subject: [PATCH 1227/2522] Some internal change PiperOrigin-RevId: 322892386 Change-Id: Id589e292fad8903ff677cb6b7173afb06b1afea5 --- tensorflow/compiler/tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 16eb3afd6c5..c4dd75de1dd 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -1333,6 +1333,7 @@ tf_xla_py_test( python_version = "PY3", tags = [ "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip + "notap", # b/162025277 ], deps = [ ":xla_test", From 04756161b85ff0974de91e72b8186ab42e8ae3bb Mon Sep 17 00:00:00 2001 From: Terry Heo Date: Thu, 23 Jul 2020 16:42:09 -0700 Subject: [PATCH 1228/2522] Update TFLite Python3.7 wheel build commands Also fix lite/tools/pip_package/build_pip_package_with_bazel.sh to use proper Python interpreter. PiperOrigin-RevId: 322894050 Change-Id: Iae302cf32f4f9b2f80a3df8d3151495e2939f86b --- tensorflow/lite/tools/pip_package/README.md | 4 ++-- .../lite/tools/pip_package/build_pip_package_with_bazel.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/tools/pip_package/README.md b/tensorflow/lite/tools/pip_package/README.md index 6212104be19..e9a18392a55 100644 --- a/tensorflow/lite/tools/pip_package/README.md +++ b/tensorflow/lite/tools/pip_package/README.md @@ -73,7 +73,7 @@ CI_DOCKER_EXTRA_PARAMS="-e CI_BUILD_PYTHON=python3 -e CROSSTOOL_PYTHON_INCLUDE_P ### Cross build for armhf Python 3.7 ```sh -CI_DOCKER_EXTRA_PARAMS="-e CI_BUILD_PYTHON=python3 -e CROSSTOOL_PYTHON_INCLUDE_PATH=/usr/include/python3.7" \ +CI_DOCKER_EXTRA_PARAMS="-e CI_BUILD_PYTHON=python3.7 -e CROSSTOOL_PYTHON_INCLUDE_PATH=/usr/include/python3.7" \ tensorflow/tools/ci_build/ci_build.sh PI-PYTHON37 \ tensorflow/lite/tools/pip_package/build_pip_package_with_bazel.sh armhf ``` @@ -89,7 +89,7 @@ CI_DOCKER_EXTRA_PARAMS="-e CI_BUILD_PYTHON=python3 -e CROSSTOOL_PYTHON_INCLUDE_P ### Cross build for aarch64 Python 3.7 ```sh -CI_DOCKER_EXTRA_PARAMS="-e CI_BUILD_PYTHON=python3 -e CROSSTOOL_PYTHON_INCLUDE_PATH=/usr/include/python3.7" \ +CI_DOCKER_EXTRA_PARAMS="-e CI_BUILD_PYTHON=python3.7 -e CROSSTOOL_PYTHON_INCLUDE_PATH=/usr/include/python3.7" \ tensorflow/tools/ci_build/ci_build.sh PI-PYTHON37 \ tensorflow/lite/tools/pip_package/build_pip_package_with_bazel.sh aarch64 ``` diff --git a/tensorflow/lite/tools/pip_package/build_pip_package_with_bazel.sh b/tensorflow/lite/tools/pip_package/build_pip_package_with_bazel.sh index 4976624e340..c60ceec5e2b 100755 --- a/tensorflow/lite/tools/pip_package/build_pip_package_with_bazel.sh +++ b/tensorflow/lite/tools/pip_package/build_pip_package_with_bazel.sh @@ -16,7 +16,7 @@ set -ex SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PYTHON="${PYTHON:-python3}" +PYTHON="${CI_BUILD_PYTHON:-python3}" VERSION_SUFFIX=${VERSION_SUFFIX:-} export TENSORFLOW_DIR="${SCRIPT_DIR}/../../../.." TENSORFLOW_LITE_DIR="${TENSORFLOW_DIR}/tensorflow/lite" From e1483a9b08e492d93d9c59d50a6066d6e03beef3 Mon Sep 17 00:00:00 2001 From: Russell Power Date: Thu, 23 Jul 2020 16:44:02 -0700 Subject: [PATCH 1229/2522] Add TPU kernel definitions. PiperOrigin-RevId: 322894407 Change-Id: I0cca315be0e3c8344ee9c6686018a26309951708 --- tensorflow/core/tpu/BUILD | 2 + tensorflow/core/tpu/kernels/BUILD | 26 ++ .../core/tpu/kernels/host_compute_ops.cc | 217 +++++++++++ tensorflow/core/tpu/kernels/topk_ops.cc | 360 ++++++++++++++++++ tensorflow/core/tpu/ops/BUILD | 26 ++ tensorflow/core/tpu/ops/host_compute_ops.cc | 140 +++++++ tensorflow/core/tpu/ops/topk_ops.cc | 130 +++++++ 7 files changed, 901 insertions(+) create mode 100644 tensorflow/core/tpu/kernels/host_compute_ops.cc create mode 100644 tensorflow/core/tpu/kernels/topk_ops.cc create mode 100644 tensorflow/core/tpu/ops/host_compute_ops.cc create mode 100644 tensorflow/core/tpu/ops/topk_ops.cc diff --git a/tensorflow/core/tpu/BUILD b/tensorflow/core/tpu/BUILD index 8be9fe12d69..320d07a422f 100644 --- a/tensorflow/core/tpu/BUILD +++ b/tensorflow/core/tpu/BUILD @@ -159,6 +159,8 @@ cc_library( "//tensorflow:oss": [ ":tpu_node_device", ":tpu_system_device", + "//tensorflow/core/tpu/ops:host_compute_ops", + "//tensorflow/core/tpu/ops:topk_ops", "//tensorflow/core/tpu/ops:tpu_compile_op", "//tensorflow/core/tpu/ops:tpu_execute_op", "//tensorflow/core/tpu/ops:tpu_partitioned_ops", diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 9cb6e8cbb2b..0976add8b3c 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -660,3 +660,29 @@ cc_library( ], alwayslink = 1, ) + +cc_library( + name = "topk_ops", + srcs = ["topk_ops.cc"], + deps = [ + "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/xla/client:xla_builder", + "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/core/tpu:tpu_defs", + ], + alwayslink = 1, +) + +cc_library( + name = "host_compute_ops", + srcs = ["host_compute_ops.cc"], + deps = [ + "//tensorflow/compiler/tf2xla:common", + "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core/tpu:tpu_defs", + ], + alwayslink = 1, +) diff --git a/tensorflow/core/tpu/kernels/host_compute_ops.cc b/tensorflow/core/tpu/kernels/host_compute_ops.cc new file mode 100644 index 00000000000..77a7d6f3bf8 --- /dev/null +++ b/tensorflow/core/tpu/kernels/host_compute_ops.cc @@ -0,0 +1,217 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/literal_util.h" +#include "tensorflow/compiler/tf2xla/shape_util.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/rendezvous.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/tpu/tpu_defs.h" + +namespace tensorflow { +namespace { + +// The RecvAtHost op is used to deliver data from the device at the start of a +// host compute block. +class RecvAtHostOp : public AsyncOpKernel { + public: + explicit RecvAtHostOp(OpKernelConstruction* ctx) : AsyncOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("key", &key_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("device_ordinal", &device_ordinal_)); + OP_REQUIRES(ctx, ctx->num_inputs() == 1, + errors::Internal("RecvAtHost must have exactly one input")); + OP_REQUIRES(ctx, ctx->input_type(0) == DT_STRING, + errors::Internal("RecvAtHost input must have string type")); + DeviceNameUtils::ParsedName parsed_name; + OP_REQUIRES( + ctx, + DeviceNameUtils::ParseFullName(ctx->device()->name(), &parsed_name), + errors::Internal("Could not parse device name.")); + parsed_name.type = "TPU"; + parsed_name.id = device_ordinal_; + tpu_device_ = DeviceNameUtils::ParsedNameToString(parsed_name); + parsed_name.type = "CPU"; + parsed_name.id = 0; + cpu_device_ = DeviceNameUtils::ParsedNameToString(parsed_name); + VLOG(2) << " tpu_device_ = " << tpu_device_; + VLOG(2) << " cpu_device_ = " << cpu_device_; + } + + void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override { + const Tensor& input = ctx->input(0); + VLOG(2) << input.DebugString(); + OP_REQUIRES_ASYNC( + ctx, + TensorShapeUtils::IsVector(input.shape()) && + input.shape().dim_size(0) == 2, + errors::InvalidArgument("Input shape ", input.shape().DebugString(), + " is not a vector of length 2."), + done); + const string rendezvous_key_base = input.vec()(1); + OP_REQUIRES_ASYNC( + ctx, ctx->rendezvous() != nullptr, + errors::Internal("Op kernel context needs to provide a rendezvous."), + done); + + // Early return if there is no output to be received. Call `done()` to + // unblock following execution. + if (ctx->num_outputs() == 0) { + done(); + return; + } + + // Make all the parsed keys before starting any rendezvous->Recv calls to + // avoid having to deal with an error case after some Recv have been + // started. + std::vector rendezvous_key(ctx->num_outputs()); + std::vector parsed_key(ctx->num_outputs()); + for (int i = 0; i < ctx->num_outputs(); ++i) { + rendezvous_key[i] = Rendezvous::CreateKey( + tpu_device_, /*src_incarnation=*/1, cpu_device_, + strings::StrCat(rendezvous_key_base, key_, "_dtoh_", i), + FrameAndIter(0, 0)); + + OP_REQUIRES_OK_ASYNC( + ctx, Rendezvous::ParseKey(rendezvous_key[i], &parsed_key[i]), done); + } + + std::atomic_int_fast32_t* counter = + new std::atomic_int_fast32_t(ctx->num_outputs()); + + int num_outputs = ctx->num_outputs(); + for (int i = 0; i < num_outputs; ++i) { + Rendezvous::Args args; + args.device_context = ctx->op_device_context(); + args.alloc_attrs = ctx->output_alloc_attr(i); + + const string& key = rendezvous_key[i]; + VLOG(2) << "Recv " << key; + ctx->rendezvous()->RecvAsync( + parsed_key[i], args, + [ctx, i, counter, key, done](const Status& s, + const Rendezvous::Args& send_args, + const Rendezvous::Args& recv_args, + const Tensor& val, bool is_dead) { + ctx->SetStatus(s); + if (s.ok()) { + ctx->set_output(i, val); + } + int previously_finished = counter->fetch_sub(1); + VLOG(2) << "Processing Recv " << key << " " << s + << " previously finished " << previously_finished; + if (previously_finished == 1) { + delete counter; + done(); + } + }); + } + } + + private: + string key_; + int device_ordinal_; + string tpu_device_; + string cpu_device_; + + // RecvAtHostOp is neither copyable nor movable. + RecvAtHostOp(const RecvAtHostOp&) = delete; + RecvAtHostOp& operator=(const RecvAtHostOp&) = delete; +}; + +// The SendFromHost op is used to deliver data to the device at the end of a +// host compute block. +class SendFromHostOp : public OpKernel { + public: + explicit SendFromHostOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("key", &key_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("device_ordinal", &device_ordinal_)); + OP_REQUIRES(ctx, ctx->num_inputs() > 0, + errors::Internal("SendFromHost must have at least one input")); + OP_REQUIRES( + ctx, ctx->input_type(ctx->num_inputs() - 1) == DT_STRING, + errors::Internal("SendFromHost last input must have string type")); + DeviceNameUtils::ParsedName parsed_name; + OP_REQUIRES( + ctx, + DeviceNameUtils::ParseFullName(ctx->device()->name(), &parsed_name), + errors::Internal("Could not parse device name.")); + parsed_name.type = "TPU"; + parsed_name.id = device_ordinal_; + tpu_device_ = DeviceNameUtils::ParsedNameToString(parsed_name); + parsed_name.type = "CPU"; + parsed_name.id = 0; + cpu_device_ = DeviceNameUtils::ParsedNameToString(parsed_name); + VLOG(2) << " tpu_device_ = " << tpu_device_; + VLOG(2) << " cpu_device_ = " << cpu_device_; + } + + void Compute(OpKernelContext* ctx) override { + const Tensor& key_input = ctx->input(ctx->num_inputs() - 1); + OP_REQUIRES(ctx, + TensorShapeUtils::IsVector(key_input.shape()) && + key_input.shape().dim_size(0) == 2, + errors::InvalidArgument("Key input shape ", + key_input.shape().DebugString(), + " is not a vector of length 2.")); + const string rendezvous_key_base = key_input.vec()(1); + OP_REQUIRES( + ctx, ctx->rendezvous() != nullptr, + errors::Internal("Op kernel context needs to provide a rendezvous.")); + + for (int i = 0; i < ctx->num_inputs() - 1; ++i) { + Rendezvous::Args args; + args.device_context = ctx->op_device_context(); + args.alloc_attrs = ctx->input_alloc_attr(i); + + // TODO(misard) Fix this once we have replication. + string tpu_device = strings::StrCat("/device:TPU:", device_ordinal_); + const string& rendezvous_key = Rendezvous::CreateKey( + cpu_device_, /*src_incarnation=*/1, tpu_device_, + strings::StrCat(rendezvous_key_base, key_, "_htod_", i), + FrameAndIter(0, 0)); + + Rendezvous::ParsedKey parsed_key; + OP_REQUIRES_OK(ctx, Rendezvous::ParseKey(rendezvous_key, &parsed_key)); + VLOG(2) << "Send " << rendezvous_key; + OP_REQUIRES_OK( + ctx, ctx->rendezvous()->Send(parsed_key, args, ctx->input(i), false)); + } + } + + private: + string key_; + int device_ordinal_; + string cpu_device_; + string tpu_device_; + + // SendFromHostOp is neither copyable nor movable. + SendFromHostOp(const SendFromHostOp&) = delete; + SendFromHostOp& operator=(const SendFromHostOp&) = delete; +}; + +} // anonymous namespace + +// These ops execute on the CPU device and must specify a non-negative value for +// device_ordinal to indicate which TPU to send infeed to. +REGISTER_KERNEL_BUILDER(Name("_XlaRecvAtHost").Device(DEVICE_CPU), + RecvAtHostOp); + +REGISTER_KERNEL_BUILDER(Name("_XlaSendFromHost").Device(DEVICE_CPU), + SendFromHostOp); + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/topk_ops.cc b/tensorflow/core/tpu/kernels/topk_ops.cc new file mode 100644 index 00000000000..11ca4e2d74b --- /dev/null +++ b/tensorflow/core/tpu/kernels/topk_ops.cc @@ -0,0 +1,360 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/tpu_defs.h" +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/compiler/xla/client/lib/arithmetic.h" +#include "tensorflow/compiler/xla/client/xla_builder.h" + +namespace tensorflow { +namespace { + +using ::tensorflow::errors::InvalidArgument; + +// Computes the Kth order statistic of a data set. The current +// implementation uses a binary search requiring exactly 32 passes +// over the input data. The running time is linear with respect to +// input size. The median-of-medians algorithm is probably faster, but +// is difficult to implement efficiently in XLA. The implementation +// imposes a total ordering on floats. The ordering is consistent with +// the usual partial order. Positive NaNs are greater than positive +// infinity. Negative NaNs are less than negative infinity. NaNs with +// distinct payloads are treated as distinct. Subnormal numbers are +// preserved (not flushed to zero). Positive infinity is greater than +// all numbers. Negative infinity is less than all numbers. Positive +// is greater than negative zero. There are less than k values greater +// than the kth order statistic. There are at least k values greater +// than or equal to the Kth order statistic. The semantics are not the +// same as TopKUnique. +xla::XlaOp CreateKthOrderStatisticComputation(xla::XlaBuilder* builder, + const TensorShape& input_shape, + const xla::XlaOp input, + const xla::XlaOp k) { + const int64 height = input_shape.dim_size(0); + const int64 width = input_shape.dim_size(1); + + xla::XlaOp input_sm32 = xla::BitcastConvertType(input, xla::S32); + xla::XlaOp zero_r0 = xla::ConstantR0(builder, 0); + xla::XlaOp zero_r1 = xla::Broadcast(zero_r0, {height}); + xla::XlaOp zero_r2 = xla::Broadcast(zero_r0, {height, width}); + + xla::XlaOp max_r0 = xla::ConstantR0(builder, 0x7FFFFFFF); + xla::XlaOp max_r1 = xla::Broadcast(max_r0, {height}); + + // Start at positive zero, so that pivot is always less than top. + xla::XlaOp negative_zero_r0 = xla::ConstantR0(builder, 0x80000000); + xla::XlaOp negative_zero_r1 = xla::Broadcast(negative_zero_r0, {height}); + xla::XlaOp top_r1 = zero_r1; + + for (uint32 mask = 1U << 31; mask; mask >>= 1) { + xla::XlaOp broadcast_mask_r1 = + xla::Broadcast(xla::ConstantR0(builder, mask), {height}); + + // The first iteration of the loop determines if the kth element + // is positive or negative. If the kth element is negative, we + // start the search from +QNAN (0x7FFFFFF). If k is negative, we + // start from -0 (0x8000000). The pivot is less than the top and + // is always half way between the top and the implicit bottom in + // IEEE754 space. + xla::XlaOp pivot_r1 = xla::Xor(top_r1, broadcast_mask_r1); + xla::XlaOp pivot_r2 = xla::Add(pivot_r1, zero_r2, {0}); + xla::XlaOp both_negative_r2 = + xla::Lt(xla::And(input_sm32, pivot_r2), zero_r0); + xla::XlaOp left_r2 = xla::Select(both_negative_r2, pivot_r2, input_sm32); + xla::XlaOp right_r2 = xla::Select(both_negative_r2, input_sm32, pivot_r2); + xla::XlaOp pred_r2 = xla::Gt(left_r2, right_r2); + xla::XlaOp conv_r2 = xla::ConvertElementType(pred_r2, xla::S32); + + xla::XlaComputation add = CreateScalarAddComputation(xla::S32, builder); + xla::XlaOp sum_r1 = xla::Reduce(conv_r2, zero_r0, add, {1}); + + xla::XlaOp pivot_too_low_r1 = xla::Le(k, sum_r1, {}); + + if (mask == (1U << 31)) { + top_r1 = xla::Select(pivot_too_low_r1, max_r1, negative_zero_r1); + } else { + top_r1 = xla::Select(pivot_too_low_r1, top_r1, pivot_r1); + } + } + return xla::BitcastConvertType(top_r1, xla::F32); +} + +class KthOrderStatistic : public XlaOpKernel { + public: + explicit KthOrderStatistic(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("k", &k_)); + OP_REQUIRES(ctx, k_ >= 0, errors::InvalidArgument("Need k >= 0, got ", k_)); + } + + void Compile(XlaOpKernelContext* ctx) override { + xla::XlaBuilder* builder = ctx->builder(); + xla::XlaOp input = ctx->Input(0); + const TensorShape& input_shape = ctx->InputShape(0); + OP_REQUIRES( + ctx, input_shape.dims() == 2, + InvalidArgument("input must be rank-2: ", input_shape.DebugString())); + + xla::XlaOp k = xla::ConstantR0(builder, k_); + xla::XlaOp kth_order_statistics = + CreateKthOrderStatisticComputation(builder, input_shape, input, k); + ctx->SetOutput(0, kth_order_statistics); + } + + private: + int32 k_; +}; + +REGISTER_XLA_OP(Name("KthOrderStatistic"), KthOrderStatistic); + +// Returns the TopK unique values in the array in sorted order and the +// indices of those elements. The running time is proportional to the +// product of K and the input size. Sorting the whole array is more +// efficient for sufficiently large values of K. The median-of-medians +// algorithm is probably faster, but difficult to implement +// efficiently in XLA. If there are fewer than K unique values, the +// results are padded with negative infinity. NaNs are never +// returned. Subnormal numbers are flushed to zero. +// +// If an element appears at multiple indices, the highest index is +// returned. If a TopK element never appears in the input due to +// padding values, the indices are padded with negative one. If a +// padding value appears in the input and padding is needed, the +// highest index of the padding value will be returned. +// +// The semantics are not the same as KthOrderStatistic. +// +// If masked_with_iota is true, the index is already encoded in the lower bits +// of the mantissa, which will be extracted as the index in the output. +// Otherwise, every iteration will use the following algorithm to get the index: +// index = max([i if data[i] == max else -1 for i in size]) +// +// TODO(b/74994968): Replace TopKUnique with an LLO implementation of +// TopK with reasonable semantics. +std::pair CreateTopKUnique( + xla::XlaBuilder* builder, const xla::XlaOp input, + const TensorShape& input_shape, int64 k, bool masked_with_iota) { + const int64 height = input_shape.dim_size(0); + const int64 width = input_shape.dim_size(1); + + xla::XlaOp iota_r1 = xla::Iota(builder, xla::S32, width); + xla::XlaOp iota_r2 = xla::Broadcast(iota_r1, {height}); + + xla::XlaOp negative_one_r0 = xla::ConstantR0(builder, -1); + xla::XlaOp negative_one_r2 = xla::Broadcast(negative_one_r0, {height, width}); + + xla::XlaOp negative_infinity_r0 = xla::ConstantR0(builder, -INFINITY); + xla::XlaOp negative_infinity_r2 = + xla::Broadcast(negative_infinity_r0, {height, width}); + + xla::XlaOp scratch_pad_r2 = input; + std::vector topk_r1s; + std::vector topk_indices; + for (int i = 0; i < k; ++i) { + xla::XlaOp kth_order_statistic_r1 = + xla::Reduce(scratch_pad_r2, negative_infinity_r0, + CreateScalarMaxComputation(xla::F32, builder), {1}); + topk_r1s.push_back(kth_order_statistic_r1); + + xla::XlaOp ge_r2 = xla::Ge(input, kth_order_statistic_r1, {0}); + scratch_pad_r2 = xla::Select(ge_r2, negative_infinity_r2, input); + + if (!masked_with_iota) { + xla::XlaOp eq_r2 = xla::Eq(input, kth_order_statistic_r1, {0}); + xla::XlaOp indices_r2 = xla::Select(eq_r2, iota_r2, negative_one_r2); + xla::XlaOp topk_index_r1 = + xla::Reduce(indices_r2, negative_one_r0, + CreateScalarMaxComputation(xla::S32, builder), {1}); + topk_indices.push_back(topk_index_r1); + } + } + xla::XlaOp topk_r1_concat = xla::ConcatInDim(builder, topk_r1s, 0); + xla::XlaOp topk_r2 = + xla::Transpose(xla::Reshape(topk_r1_concat, {k, height}), {1, 0}); + + xla::XlaOp topk_indices_r2; + if (masked_with_iota) { + int32 log2_ceiling = tensorflow::Log2Ceiling(width); + int32 next_power_of_two = 1U << log2_ceiling; + int32 count_mask = next_power_of_two - 1; + xla::XlaOp mask_r0 = xla::ConstantR0(builder, count_mask); + xla::XlaOp mask_r2 = xla::Broadcast(mask_r0, {height, k}); + xla::XlaOp topk_r2_s32 = xla::BitcastConvertType(topk_r2, xla::S32); + topk_indices_r2 = xla::And(topk_r2_s32, mask_r2); + } else { + xla::XlaOp topk_indices_concat = xla::ConcatInDim(builder, topk_indices, 0); + topk_indices_r2 = + xla::Transpose(xla::Reshape(topk_indices_concat, {k, height}), {1, 0}); + } + return std::make_pair(topk_r2, topk_indices_r2); +} + +class TopKUnique : public XlaOpKernel { + public: + explicit TopKUnique(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("k", &k_)); + OP_REQUIRES(ctx, k_ >= 0, errors::InvalidArgument("Need k >= 0, got ", k_)); + } + + void Compile(XlaOpKernelContext* ctx) override { + xla::XlaBuilder* builder = ctx->builder(); + xla::XlaOp input = ctx->Input(0); + const TensorShape& input_shape = ctx->InputShape(0); + OP_REQUIRES( + ctx, input_shape.dims() == 2, + InvalidArgument("input must be rank-2: ", input_shape.DebugString())); + + auto topk = CreateTopKUnique(builder, input, input_shape, k_, false); + ctx->SetOutput(0, topk.first); + ctx->SetOutput(1, topk.second); + } + + private: + int k_; +}; +REGISTER_XLA_OP(Name("TopKUnique"), TopKUnique); + +// Make all elements in the non-Batch dimension unique and close to +// their initial value on a relative scale, but potential far from +// their initial value in an absolute scale. +// +// This operation is meant to be combined with TopKUnique to avoid +// suppressing identical elements. For most TopK users, the indices of +// the TopK elements are important but the relative order of the TopK +// elements and their exact values is not so important. Ideally, the +// the indices of the TopK elements of the output of MakeUnique are +// the same as the indices of the TopK elements of the inputs. +// +// Its an open question whether it is better to accept the risk of two +// elements in the input to TopK have exactly the same value or the +// risk that MakeUnique will alter the indices of the TopK +// elements. Model owners are encouraged to experiment! +// +// Never returns a sub-normal number. Never returns zero. The sign of +// each input element is always identical to the sign of the +// corresponding output element. Behavior for infinite elements is +// undefined. Behavior for subnormal elements is undefined. +// +// Algorithm: +// 1. Replace zeros with the smallest representable normal floating +// point number with the same sign. +// 2. Mask away enough low order bits that every value can be distinct. +// 3. Replace the low order bits with iota. +// +// TODO(b/74994968): Replace MakeUnique with an LLO implementation of +// TopK with reasonable semantics. +xla::XlaOp CreateMakeUnique(xla::XlaBuilder* builder, const xla::XlaOp input, + const TensorShape& input_shape) { + const int64 height = input_shape.dim_size(0); + const int64 width = input_shape.dim_size(1); + + xla::XlaOp zero_r0 = xla::ConstantR0(builder, 0U); + xla::XlaOp zero_r2 = xla::Broadcast(zero_r0, {height, width}); + + // count_mask is used to mask away the low order bits to ensure + // that every element is distinct. + uint32 log2_ceiling = static_cast(std::ceil(std::log2(width))); + uint32 next_power_of_two = 1U << log2_ceiling; + uint32 count_mask = ~(next_power_of_two - 1); + xla::XlaOp count_mask_r0 = xla::ConstantR0(builder, count_mask); + xla::XlaOp count_mask_r2 = xla::Broadcast(count_mask_r0, {height, width}); + + // smallest_normal is the bit representation of the smallest + // positive normal floating point number. The sign is zero, + // exponent is one, and the fraction is zero. + uint32 smallest_normal = 1U << 23; + xla::XlaOp smallest_normal_r0 = xla::ConstantR0(builder, smallest_normal); + xla::XlaOp smallest_normal_r2 = + xla::Broadcast(smallest_normal_r0, {height, width}); + + // Used to mask away the sign bit when computing the absolute + // value. + uint32 low_bit_mask = ~(1U << 31); + xla::XlaOp low_bit_mask_r0 = xla::ConstantR0(builder, low_bit_mask); + xla::XlaOp low_bit_mask_r2 = xla::Broadcast(low_bit_mask_r0, {height, width}); + + xla::XlaOp iota_r1 = xla::Iota(builder, xla::U32, width); + xla::XlaOp iota_r2 = xla::Broadcast(iota_r1, {height}); + + // Compare the absolute value with positive zero to handle + // negative zero. + // + // Pseudocode: input_no_zeros = abs(input) == 0 ? FLT_MIN : input + xla::XlaOp input_u32_r2 = xla::BitcastConvertType(input, xla::U32); + xla::XlaOp abs_r2 = xla::And(input_u32_r2, low_bit_mask_r2); + xla::XlaOp if_zero_r2 = xla::Eq(abs_r2, zero_r2); + xla::XlaOp smallest_normal_preserving_sign_r2 = + xla::Or(input_u32_r2, smallest_normal_r2); + xla::XlaOp input_no_zeros_r2 = + xla::Select(if_zero_r2, smallest_normal_preserving_sign_r2, input_u32_r2); + + // Discard the low-order bits and replace with iota. + xla::XlaOp and_r2 = xla::And(input_no_zeros_r2, count_mask_r2); + xla::XlaOp or_r2 = xla::Or(and_r2, iota_r2); + return xla::BitcastConvertType(or_r2, xla::F32); +} + +class MakeUnique : public XlaOpKernel { + public: + explicit MakeUnique(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} + + void Compile(XlaOpKernelContext* ctx) override { + xla::XlaBuilder* builder = ctx->builder(); + xla::XlaOp input = ctx->Input(0); + const TensorShape& input_shape = ctx->InputShape(0); + OP_REQUIRES( + ctx, input_shape.dims() == 2, + InvalidArgument("input must be rank-2: ", input_shape.DebugString())); + + ctx->SetOutput(0, CreateMakeUnique(builder, input, input_shape)); + } +}; +REGISTER_XLA_OP(Name("MakeUnique"), MakeUnique); + +// Returns the TopK approximate values in the array in sorted order and the +// indices of those elements. The running time is proportional to the +// product of K and the input size. +// +// The algorithm first updates the lower bits of each element with iota, +// which is used to derive the index. The iota also serves the purpose to +// make each element unique so that each iteration, we are guaranteed to +// get one and only one unique top-1 element. +class TopKWithUnique : public XlaOpKernel { + public: + explicit TopKWithUnique(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("k", &k_)); + OP_REQUIRES(ctx, k_ >= 0, errors::InvalidArgument("Need k >= 0, got ", k_)); + } + + void Compile(XlaOpKernelContext* ctx) override { + xla::XlaBuilder* builder = ctx->builder(); + xla::XlaOp input = ctx->Input(0); + const TensorShape& input_shape = ctx->InputShape(0); + OP_REQUIRES( + ctx, input_shape.dims() == 2, + InvalidArgument("input must be rank-2: ", input_shape.DebugString())); + + xla::XlaOp unique = CreateMakeUnique(builder, input, input_shape); + auto topk = CreateTopKUnique(builder, unique, input_shape, k_, true); + ctx->SetOutput(0, topk.first); + ctx->SetOutput(1, topk.second); + } + + private: + int k_; +}; +REGISTER_XLA_OP(Name("TopKWithUnique"), TopKWithUnique); +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/tpu/ops/BUILD b/tensorflow/core/tpu/ops/BUILD index 63268d6aab1..5bb2b644dd2 100644 --- a/tensorflow/core/tpu/ops/BUILD +++ b/tensorflow/core/tpu/ops/BUILD @@ -42,3 +42,29 @@ cc_library( ], alwayslink = 1, ) + +cc_library( + name = "host_compute_ops", + srcs = [ + "host_compute_ops.cc", + ], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + ], + alwayslink = 1, +) + +cc_library( + name = "topk_ops", + srcs = [ + "topk_ops.cc", + ], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + ], + alwayslink = 1, +) diff --git a/tensorflow/core/tpu/ops/host_compute_ops.cc b/tensorflow/core/tpu/ops/host_compute_ops.cc new file mode 100644 index 00000000000..3c7994ccf2e --- /dev/null +++ b/tensorflow/core/tpu/ops/host_compute_ops.cc @@ -0,0 +1,140 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +using shape_inference::InferenceContext; +using shape_inference::ShapeHandle; + +REGISTER_OP("_HostComputeMlir") + .Input("inputs: Tinputs") + .Output("outputs: Toutputs") + .Attr("Tinputs: list(type) >= 0") + .Attr("Toutputs: list(type) >= 0") + .Attr("key: string") + .Attr("tpu_core: int = 0") + .SetIsStateful() + .Doc(R"doc( +A host-side computation called from a TPU device. + +inputs: A list of tensors that will be sent to the host. +outputs: A list of tensors that will be returned to the device. +Tinputs: The element types of each element in `inputs`. +Toutputs: The element types of each element in `outputs`. +key: A unique identifier for this region used to match up host transfers. +tpu_core: Default core to use for host to device transfers. +)doc"); + +REGISTER_OP("XlaHostCompute") + .Input("inputs: Tinputs") + .Output("outputs: Toutputs") + .Attr("Tinputs: list(type) >= 0") + .Attr("Toutputs: list(type) >= 0") + .Attr("ancestors: list(string) >= 0") + .Attr("shapes: list(shape) >= 0") + .Attr("shape_inference_graph: func") + .Attr("key: string") + .Attr("cost_estimate_ns: int=1000000") + .Attr("tpu_core: int = 0") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext* c) { + const AttrValue* graph; + TF_RETURN_IF_ERROR(c->attrs().Find("shape_inference_graph", &graph)); + if (graph->func().name().empty()) { + const AttrValue* shapes; + TF_RETURN_IF_ERROR(c->attrs().Find("shapes", &shapes)); + if (shapes->list().shape_size() != c->num_outputs()) { + return errors::InvalidArgument( + "_XlaHostCompute has ", c->num_outputs(), + " outputs but 'shapes' attr has ", shapes->list().shape_size(), + " elements"); + } + for (int i = 0; i < c->num_outputs(); ++i) { + shape_inference::ShapeHandle handle; + TF_RETURN_IF_ERROR( + c->MakeShapeFromShapeProto(shapes->list().shape(i), &handle)); + c->set_output(i, handle); + } + return Status::OK(); + } else { + // There is a shape inference graph so the output shapes are not + // statically known. + return ::tensorflow::shape_inference::UnknownShape(c); + } + }) + .Doc(R"doc( +A pseudo-op to represent host-side computation in an XLA program. + +inputs: A list of tensors that will be sent to the host. +outputs: A list of tensors that will be returned to the device. +Tinputs: The element types of each element in `inputs`. +Toutputs: The element types of each element in `outputs`. +ancestors: A list of names of HostCompute computations that must be +sequenced before this computation. +shape_inference_graph: If non-empty, a serialized GraphDef representing a graph +that must be analyzed at compile time to determine the shapes of the outputs. +shapes: If shape_inference_graph is empty, a list of the shapes of `outputs`. +key: A unique identifier for this region used to match up host transfers. +cost_estimate_ns: Estimated duration of the host computation in nanoseconds. +tpu_core: Default core to use for host to device transfers. +)doc"); + +REGISTER_OP("XlaSendToHost") + .Input("input: Tinput") + .Attr("Tinput: type") + .Attr("key: string") + .SetIsStateful() + .Doc(R"doc( +An op to send a tensor to the host. + +input: the tensor that will be sent to the host. +Tinput: element type for input. +key: A unique identifier for this region used to match up host transfers. +)doc"); + +REGISTER_OP("XlaRecvFromHost") + .Output("output: Toutput") + .Attr("Toutput: type") + .Attr("shape: shape") + .Attr("key: string") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext* c) { + const AttrValue* shape_attr; + TF_RETURN_IF_ERROR(c->attrs().Find("shape", &shape_attr)); + if (!shape_attr->has_shape()) { + return errors::InvalidArgument( + "XlaRecvFromHost op does not have valid \"Toutput\" attr."); + } + shape_inference::ShapeHandle handle; + TF_RETURN_IF_ERROR( + c->MakeShapeFromShapeProto(shape_attr->shape(), &handle)); + c->set_output(0, handle); + return Status::OK(); + }) + .Doc(R"doc( +An op to receive a tensor from the host. + +output: the tensor that will be received from the host. +Toutput: element type for output. +shape: shape for output. +key: A unique identifier for this region used to match up host transfers. +)doc"); + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/ops/topk_ops.cc b/tensorflow/core/tpu/ops/topk_ops.cc new file mode 100644 index 00000000000..1656351690d --- /dev/null +++ b/tensorflow/core/tpu/ops/topk_ops.cc @@ -0,0 +1,130 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +using shape_inference::ShapeHandle; + +REGISTER_OP("KthOrderStatistic") + .Input("input: float32") + .Output("output: float32") + .Attr("k: int") + .SetShapeFn([](shape_inference::InferenceContext* c) { + ShapeHandle input; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &input)); + + ShapeHandle s; + TF_RETURN_IF_ERROR(c->Subshape(input, 0, -1, &s)); + c->set_output(0, s); + return Status::OK(); + }) + .Doc(R"doc( +Computes the Kth order statistic of a data set. The current +implementation uses a binary search requiring exactly 32 passes over +the input data. The running time is linear with respect to input +size. The median-of-medians algorithm is probably faster, but is +difficult to implement efficiently in XLA. The implementation imposes +a total ordering on floats. The ordering is consistent with the usual +partial order. Positive NaNs are greater than positive +infinity. Negative NaNs are less than negative infinity. NaNs with +distinct payloads are treated as distinct. Subnormal numbers are +preserved (not flushed to zero). Positive infinity is greater than all +numbers. Negative infinity is less than all numbers. Positive is +greater than negative zero. There are less than k values greater than +the kth order statistic. There are at least k values greater than or +equal to the Kth order statistic. The semantics are not the same as +top_k_unique. +)doc"); + +REGISTER_OP("TopKUnique") + .Input("input: float32") + .Output("topk: float32") + .Output("topk_indices: int32") + .Attr("k: int") + .SetShapeFn([](shape_inference::InferenceContext* c) { + ShapeHandle input; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &input)); + + int32 k; + TF_RETURN_IF_ERROR(c->GetAttr("k", &k)); + + ShapeHandle s; + TF_RETURN_IF_ERROR(c->ReplaceDim(input, 1, c->MakeDim(k), &s)); + c->set_output(0, s); + c->set_output(1, s); + return Status::OK(); + }) + .Doc(R"doc( +Returns the TopK unique values in the array in sorted order. The +running time is proportional to the product of K and the input +size. Sorting the whole array is more efficient for sufficiently large +values of K. The median-of-medians algorithm is probably faster, but +difficult to implement efficiently in XLA. If there are fewer than K +unique numbers (not NANs), the results are padded with negative +infinity. NaNs are never returned. Subnormal numbers are flushed to +zero. If an element appears at multiple indices, the highest index is +returned. If a TopK element never appears in the input due to padding +values, the indices are padded with negative one. If a padding value +appears in the input and padding is needed, the highest index of the +padding value will be returned. The semantics are not the same as +kth_order_statistic. +)doc"); + +REGISTER_OP("MakeUnique") + .Input("input: float32") + .Output("output: float32") + .SetShapeFn([](shape_inference::InferenceContext* c) { + ShapeHandle input; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &input)); + c->set_output(0, input); + return Status::OK(); + }) + .Doc(R"doc( +Make all elements in the non-Batch dimension unique, but \"close\" to +their initial value. Never returns a sub-normal number. Never returns +zero. The sign of each input element is always identical to the sign +of the corresponding output element. Behavior for infinite elements is +undefined. Behavior for subnormal elements is undefined. +)doc"); + +REGISTER_OP("TopKWithUnique") + .Input("input: float32") + .Output("topk: float32") + .Output("topk_indices: int32") + .Attr("k: int") + .SetShapeFn([](shape_inference::InferenceContext* c) { + ShapeHandle input; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &input)); + + int32 k; + TF_RETURN_IF_ERROR(c->GetAttr("k", &k)); + + ShapeHandle s; + TF_RETURN_IF_ERROR(c->ReplaceDim(input, 1, c->MakeDim(k), &s)); + c->set_output(0, s); + c->set_output(1, s); + return Status::OK(); + }) + .Doc(R"doc( +Returns the TopK values in the array in sorted order. This is a combination +of MakeUnique and TopKUnique. The returned top-K will have its lower bits +replaced by iota, thus it will be close to the original value but not exactly +the same. The running time is proportional to the product of K and the input +size. NaNs are never returned. Subnormal numbers are flushed to zero.)doc"); +} // namespace tensorflow From 9d17a0b425db338ae86465f5f3204335986fbae6 Mon Sep 17 00:00:00 2001 From: Ken Franko Date: Thu, 23 Jul 2020 16:48:34 -0700 Subject: [PATCH 1230/2522] Set namespace to TFDevice for MarkOpsForOutsideCompilation pass. This pass is a generic TF device pass so this is a better namespace. PiperOrigin-RevId: 322895228 Change-Id: Id848bd88af6a7d60f428a0b6531e3fb4a507976d --- .../transforms/mark_ops_for_outside_compilation.cc | 4 ++-- .../compiler/mlir/tensorflow/transforms/passes.h | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc index 7d6ede2e613..765a73d5096 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc @@ -23,7 +23,7 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" namespace mlir { -namespace TF { +namespace TFDevice { namespace { @@ -54,5 +54,5 @@ static PassRegistration pass( "tf-mark-ops-for-outside-compilation", "Marks unsupported ops a device cluster for outside compilation."); -} // namespace TF +} // namespace TFDevice } // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h index a1f527f8846..68bc9d09e91 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h +++ b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h @@ -247,6 +247,11 @@ std::unique_ptr> CreateParallelExecuteToIslandsPass(); std::unique_ptr> CreateAnnotateParameterReplicationPass(); +// Creates a pass that marks unsupported ops in device cluster for outside +// compilation. +std::unique_ptr> +CreateMarkOpsForOutsideCompilationPass(); + // Creates a pass that hoists a `tf_device.launch` body and assigns a `device` // attribute to each TensorFlow dialect op in the body based on the `device` // attribute on the `tf_device.launch`. @@ -302,11 +307,6 @@ std::unique_ptr> CreateTPUHostComputationExpansionPass(); std::unique_ptr> CreateTPUUpdateEmbeddingEnqueueOpInputsPass(); -// Creates a pass that marks unsupported ops in device cluster for outside -// compilation. -std::unique_ptr> -CreateMarkOpsForOutsideCompilationPass(); - // Creates a pass that extract outside compilation (CPU ops inside TPU cluster) // ops to a separate parallel_execute region to run on CPU. std::unique_ptr> From 96fcf55461f5590c37b84bc494b2deb9df6b85fb Mon Sep 17 00:00:00 2001 From: Peng Wang Date: Thu, 23 Jul 2020 16:51:17 -0700 Subject: [PATCH 1231/2522] Some internal change PiperOrigin-RevId: 322895707 Change-Id: I6300e2e55c8543478a3fca974aef3365cc0b2323 --- tensorflow/lite/testing/BUILD | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/testing/BUILD b/tensorflow/lite/testing/BUILD index 7df61ca5f35..d17fbdb2398 100644 --- a/tensorflow/lite/testing/BUILD +++ b/tensorflow/lite/testing/BUILD @@ -533,7 +533,12 @@ cc_test( "//tensorflow/lite:testdata/add.bin", "//tensorflow/lite:testdata/lstm.bin", ], - tags = ["no_mac"], # b/161990368 + tags = [ + "no_mac", # b/161990368 + "noasan", # b/162027436 + "nomsan", # b/162027436 + "notsan", # b/162027436 + ], deps = [ ":test_tflite_lib", "//tensorflow/core:tflite_portable_logging", From 3443f359f9ccfc0cd168045e5841007929f8f30b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 23 Jul 2020 17:04:38 -0700 Subject: [PATCH 1232/2522] Integrate LLVM at llvm/llvm-project@9d2da6759b4d Updates LLVM usage to match [9d2da6759b4d](https://github.com/llvm/llvm-project/commit/9d2da6759b4d) PiperOrigin-RevId: 322898084 Change-Id: Iff52b49039a3ec75d95fda888a231bf4f6f8d634 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 0d4f64cf0c2..2d708bd1b5c 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -711,8 +711,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "99ad956fdaee5398fdcf46fa49cb433cf52dc461" - LLVM_SHA256 = "e48f529063cc31e5f5844f7395fbecb0a3e9cba0bcbeafa40f5001273bad3c51" + LLVM_COMMIT = "9d2da6759b4d05d834371bcaaa8fc3d9b3385b18" + LLVM_SHA256 = "e432ea63141c6c274ac71565664b267089fd58b41d052eaca6e4d7be7613e947" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 0be68a5fe4ff596e5b1d7baa2d61cd49b68445c4 Mon Sep 17 00:00:00 2001 From: ShengYang1 Date: Fri, 24 Jul 2020 08:37:06 +0800 Subject: [PATCH 1233/2522] fix concat --- tensorflow/core/kernels/mkl_concat_op.cc | 16 ++++++++-------- tensorflow/python/kernel_tests/concat_op_test.py | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc index 4a5cb0a0d4f..9f08441b73a 100644 --- a/tensorflow/core/kernels/mkl_concat_op.cc +++ b/tensorflow/core/kernels/mkl_concat_op.cc @@ -18,7 +18,6 @@ limitations under the License. #include #include "mkldnn.hpp" -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/bounds_check.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" @@ -32,6 +31,7 @@ limitations under the License. #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/mkl_types.h" #include "tensorflow/core/util/mkl_util.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" using mkldnn::concat; using mkldnn::stream; @@ -376,19 +376,20 @@ class MklConcatFwdPrimitive : public MklPrimitive { context_.data_mem_shdptr.push_back(src_mem); context_.data_mem.push_back(*context_.data_mem_shdptr[i]); } + // Store the expected memory format + context_.dst_md.reset(new memory::desc({concat_fwd_dims.dst_dims}, + MklDnnType(), + concat_fwd_dims.mkl_common_format)); // Create a concat primitive descriptor #ifdef ENABLE_MKLDNN_V1 context_.fwd_pd.reset(new concat::primitive_desc( - concat_fwd_dims.concat_dims, context_.src_md, cpu_engine_)); + *context_.dst_md, concat_fwd_dims.concat_dims, context_.src_md, + cpu_engine_)); #else context_.fwd_pd.reset(new concat::primitive_desc( concat_fwd_dims.concat_dims, context_.src_pd)); #endif // ENABLE_MKLDNN_V1 - // Store the expected memory format - context_.dst_md.reset(new memory::desc({concat_fwd_dims.dst_dims}, - MklDnnType(), - concat_fwd_dims.mkl_common_format)); #ifdef ENABLE_MKLDNN_V1 // Create memory primitive based on dummy data context_.dst_mem.reset( @@ -404,8 +405,7 @@ class MklConcatFwdPrimitive : public MklPrimitive { #ifdef ENABLE_MKLDNN_V1 context_.concat_fwd.reset(new concat(*context_.fwd_pd)); std::unordered_map net_args = { - { MKLDNN_ARG_DST, - *context_.dst_mem }}; + {MKLDNN_ARG_DST, *context_.dst_mem}}; for (int i = 0; i < concat_fwd_dims.num_inputs; ++i) { net_args.insert({MKLDNN_ARG_MULTIPLE_SRC + i, context_.data_mem[i]}); } diff --git a/tensorflow/python/kernel_tests/concat_op_test.py b/tensorflow/python/kernel_tests/concat_op_test.py index ba2d1abbd10..8d05b278aa6 100644 --- a/tensorflow/python/kernel_tests/concat_op_test.py +++ b/tensorflow/python/kernel_tests/concat_op_test.py @@ -68,6 +68,22 @@ class ConcatOpTest(test.TestCase): self.assertAllEqual(result[:, :4], params[p1]) self.assertAllEqual(result[:, 4:], params[p2]) + @test_util.run_deprecated_v1 + def test4DStack(self): + with self.session(use_gpu=True): + p1 = array_ops.placeholder(dtypes.float32, shape=[2, 3, 1, 1]) + p2 = array_ops.placeholder(dtypes.float32, shape=[2, 3, 4, 1]) + c = array_ops.concat([p1, p2], 2) + params = { + p1: np.random.rand(2, 3, 1, 1).astype("f"), + p2: np.random.rand(2, 3, 4, 1).astype("f") + } + result = c.eval(feed_dict=params) + + self.assertEqual(result.shape, c.get_shape()) + self.assertAllEqual(result[:, :, :1, :], params[p1]) + self.assertAllEqual(result[:, :, 1:, :], params[p2]) + def testInt32GPU(self): with test_util.use_gpu(): p1 = np.random.rand(2, 3).astype("i") From 59f3bde4521b271deb9df64cec1c09b237510fba Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Thu, 23 Jul 2020 17:08:06 -0700 Subject: [PATCH 1234/2522] Minor fixes to tutorial colab and docstring. PiperOrigin-RevId: 322898659 Change-Id: I04836fe0984ce215806b4a16461a70ba86ab7f8b --- tensorflow/python/autograph/g3doc/pyct_tutorial.ipynb | 4 ---- tensorflow/python/autograph/pyct/cfg.py | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/tensorflow/python/autograph/g3doc/pyct_tutorial.ipynb b/tensorflow/python/autograph/g3doc/pyct_tutorial.ipynb index 05e7a46c718..8b7b3e9b350 100644 --- a/tensorflow/python/autograph/g3doc/pyct_tutorial.ipynb +++ b/tensorflow/python/autograph/g3doc/pyct_tutorial.ipynb @@ -122,10 +122,6 @@ "\n", "class PyToBasicCpp(transpiler.GenericTranspiler):\n", "\n", - " #TODO(mdan): Document this.\n", - " def get_transformed_name(self, node):\n", - " return 'new_f'\n", - "\n", " def transform_ast(self, node, ctx):\n", " codegen = BasicCppCodegen(ctx)\n", " codegen.visit(node)\n", diff --git a/tensorflow/python/autograph/pyct/cfg.py b/tensorflow/python/autograph/pyct/cfg.py index 9a8ece2bc3a..fa9f99b5a69 100644 --- a/tensorflow/python/autograph/pyct/cfg.py +++ b/tensorflow/python/autograph/pyct/cfg.py @@ -219,7 +219,7 @@ class GraphVisitor(object): (gast.Break, gast.Continue, gast.Raise, gast.Pass)) def _visit_internal(self, mode): - """Visits the CFG, depth-first.""" + """Visits the CFG, breadth-first.""" assert mode in (_WalkMode.FORWARD, _WalkMode.REVERSE) if mode == _WalkMode.FORWARD: open_ = [self.graph.entry] From f3a015274fadab00ec8cad92af2a968e0ecd434f Mon Sep 17 00:00:00 2001 From: Geeta Chavan Date: Thu, 23 Jul 2020 17:17:15 -0700 Subject: [PATCH 1235/2522] Updating estimator version after the final release PiperOrigin-RevId: 322900262 Change-Id: Iad8ead84703059b3414429f9ebfc09ceebc02ab2 --- tensorflow/tools/pip_package/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 4b03ccd3ee8..54021af9975 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -65,7 +65,7 @@ REQUIRED_PACKAGES = [ 'opt_einsum >= 2.3.2', 'protobuf >= 3.9.2', 'tensorboard >= 2.3.0, < 3', - 'tensorflow_estimator >= 2.2.0, < 2.3.0', + 'tensorflow_estimator >= 2.3.0, < 2.4.0', 'termcolor >= 1.1.0', 'wrapt >= 1.11.1', 'wheel >= 0.26', From 25a09d00043fe97905d2c7a3ccb62e3525285894 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Thu, 23 Jul 2020 18:30:45 -0700 Subject: [PATCH 1236/2522] [XLA:SPMD] Recursively handling more Dot cases 1. Allow creating subgrouped collectives 2. Add a mechanism to group devices in a tiled sharding 3. For previously unhandled dot cases, detect whether we can have partial matches of dimensions, then group the matched dimensions and recursively partition it. PiperOrigin-RevId: 322911493 Change-Id: I598041dfe28a7b2b861942e9b706b54b7008d850 --- tensorflow/compiler/xla/service/spmd/BUILD | 1 + .../xla/service/spmd/convolution_handler.cc | 4 +- .../compiler/xla/service/spmd/dot_handler.cc | 717 +++++++++++++----- .../xla/service/spmd/spmd_partitioner.cc | 170 +++-- .../xla/service/spmd/spmd_partitioner.h | 55 +- .../xla/service/spmd/spmd_partitioner_test.cc | 152 +++- .../xla/service/spmd/spmd_partitioner_util.cc | 262 ++++++- .../xla/service/spmd/spmd_partitioner_util.h | 52 +- 8 files changed, 1115 insertions(+), 298 deletions(-) diff --git a/tensorflow/compiler/xla/service/spmd/BUILD b/tensorflow/compiler/xla/service/spmd/BUILD index e41b89f6dff..a67e4cf55c5 100644 --- a/tensorflow/compiler/xla/service/spmd/BUILD +++ b/tensorflow/compiler/xla/service/spmd/BUILD @@ -50,6 +50,7 @@ cc_library( "//tensorflow/compiler/xla/service:tuple_simplifier", "//tensorflow/core/platform:numbers", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", diff --git a/tensorflow/compiler/xla/service/spmd/convolution_handler.cc b/tensorflow/compiler/xla/service/spmd/convolution_handler.cc index 1204df59080..4caa2bbbf35 100644 --- a/tensorflow/compiler/xla/service/spmd/convolution_handler.cc +++ b/tensorflow/compiler/xla/service/spmd/convolution_handler.cc @@ -226,7 +226,7 @@ Status SpmdPartitioningVisitor::HandleConvolutionTiledLhsAndRhs( hlo->batch_group_count(), new_window, hlo->convolution_dimension_numbers(), hlo->precision_config())); auto ar = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, conv, MakeBinaryAdd(hlo->shape().element_type(), module_), + &b_, conv, MakeBinaryAdd(hlo->shape().element_type(), module_), {}, NewChannel()); ar->set_sharding(HloSharding::Replicate()); return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()) @@ -605,7 +605,7 @@ Status SpmdPartitioningVisitor::HandleConvolution(HloInstruction* hlo) { hlo->batch_group_count(), new_window, dnums, hlo->precision_config())); auto ar = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, conv, MakeBinaryAdd(hlo->shape().element_type(), module_), + &b_, conv, MakeBinaryAdd(hlo->shape().element_type(), module_), {}, NewChannel()); ar->set_sharding(HloSharding::Replicate()); return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()) diff --git a/tensorflow/compiler/xla/service/spmd/dot_handler.cc b/tensorflow/compiler/xla/service/spmd/dot_handler.cc index 9ecf21f5841..8fea788b1b7 100644 --- a/tensorflow/compiler/xla/service/spmd/dot_handler.cc +++ b/tensorflow/compiler/xla/service/spmd/dot_handler.cc @@ -80,12 +80,25 @@ Status SpmdPartitioningVisitor::HandleDot(HloInstruction* hlo) { return HandleDotHelper(hlo, mapping, create_sharded_dot); } -Status SpmdPartitioningVisitor::HandleDotHelper( - HloInstruction* hlo, const DotGeneralDimsMapping& dims_mapping, +namespace { + +StatusOr PartitionBaseCase( + PartitionedHlo lhs, PartitionedHlo rhs, const Shape& output_base_shape, + const HloSharding& output_sharding, + const DotGeneralDimsMapping& dims_mapping, int64 num_partitions, const std::function( - HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot) { - const HloSharding& lhs_sharding = hlo->operand(0)->sharding(); - const HloSharding& rhs_sharding = hlo->operand(1)->sharding(); + HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot, + HloModule* module, HloInstruction* original_hlo, int64 lhs_batch_partitions, + int64 rhs_batch_partitions, int64 output_batch_partitions, + int64 lhs_contracting_partitions, int64 rhs_contracting_partitions, + int64 lhs_non_contracting_partitions, int64 rhs_non_contracting_partitions, + int64 output_lhs_non_contracting_partitions, + int64 output_rhs_non_contracting_partitions, + int64 threshold_for_windowed_einsum_mib, SpmdBuilder* b, + std::vector* + windowed_dot_general_loops) { + const HloSharding& lhs_sharding = lhs.sharding(); + const HloSharding& rhs_sharding = rhs.sharding(); // Similar to hlo_sharding_util::TransposeSharding(), but allows // removing/adding non-partitioned dimensions. @@ -132,12 +145,12 @@ Status SpmdPartitioningVisitor::HandleDotHelper( return HloSharding::Tile(reshape_tiles); }; - std::vector lhs_to_rhs_indices(hlo->operand(0)->shape().rank(), -1); - std::vector lhs_to_output_indices(hlo->operand(0)->shape().rank(), -1); - std::vector rhs_to_lhs_indices(hlo->operand(1)->shape().rank(), -1); - std::vector rhs_to_output_indices(hlo->operand(1)->shape().rank(), -1); - std::vector output_to_lhs_indices(hlo->shape().rank(), -1); - std::vector output_to_rhs_indices(hlo->shape().rank(), -1); + std::vector lhs_to_rhs_indices(lhs.base_shape().rank(), -1); + std::vector lhs_to_output_indices(lhs.base_shape().rank(), -1); + std::vector rhs_to_lhs_indices(rhs.base_shape().rank(), -1); + std::vector rhs_to_output_indices(rhs.base_shape().rank(), -1); + std::vector output_to_lhs_indices(output_base_shape.rank(), -1); + std::vector output_to_rhs_indices(output_base_shape.rank(), -1); auto populate_indices_mapping = [&](const DotGeneralDimsMapping::DimsMapping& mapping) { if (mapping.lhs >= 0) { @@ -174,127 +187,84 @@ Status SpmdPartitioningVisitor::HandleDotHelper( auto rhs_sharding_transposed_to_match_output = transpose_sharding( rhs_sharding, rhs_to_output_indices, output_to_rhs_indices); auto output_sharding_transposed_to_match_lhs = transpose_sharding( - hlo->sharding(), output_to_lhs_indices, lhs_to_output_indices); + output_sharding, output_to_lhs_indices, lhs_to_output_indices); auto output_sharding_transposed_to_match_rhs = transpose_sharding( - hlo->sharding(), output_to_rhs_indices, rhs_to_output_indices); + output_sharding, output_to_rhs_indices, rhs_to_output_indices); - // lhs_rhs_or_output: 0 lhs, 1 rhs, 2 output. - auto get_partitions_for_dims = - [&](const HloSharding& sharding, - absl::Span dims, - int lhs_rhs_or_output) { - int64 partitions = 1; - if (sharding.IsTileMaximal()) { - return partitions; - } - for (const auto& dim : dims) { - if (lhs_rhs_or_output == 0) { - partitions *= sharding.tile_assignment().dim(dim.lhs); - } else if (lhs_rhs_or_output == 1) { - partitions *= sharding.tile_assignment().dim(dim.rhs); - } else { - CHECK_EQ(lhs_rhs_or_output, 2); - partitions *= sharding.tile_assignment().dim(dim.output); - } - } - return partitions; - }; - const int64 lhs_batch_partitions = - get_partitions_for_dims(lhs_sharding, dims_mapping.batch_dims, 0); - const int64 rhs_batch_partitions = - get_partitions_for_dims(rhs_sharding, dims_mapping.batch_dims, 1); - const int64 output_batch_partitions = - get_partitions_for_dims(hlo->sharding(), dims_mapping.batch_dims, 2); - const int64 lhs_contracting_partitions = - get_partitions_for_dims(lhs_sharding, dims_mapping.contracting_dims, 0); - const int64 rhs_contracting_partitions = - get_partitions_for_dims(rhs_sharding, dims_mapping.contracting_dims, 1); - const int64 lhs_non_contracting_partitions = get_partitions_for_dims( - lhs_sharding, dims_mapping.lhs_non_contracting_dims, 0); - const int64 rhs_non_contracting_partitions = get_partitions_for_dims( - rhs_sharding, dims_mapping.rhs_non_contracting_dims, 1); - const int64 output_lhs_non_contracting_partitions = get_partitions_for_dims( - hlo->sharding(), dims_mapping.lhs_non_contracting_dims, 2); - const int64 output_rhs_non_contracting_partitions = get_partitions_for_dims( - hlo->sharding(), dims_mapping.rhs_non_contracting_dims, 2); - - auto& lhs = GetPartitionedHlo(hlo->operand(0)); - auto& rhs = GetPartitionedHlo(hlo->operand(1)); // LHS and RHS are partitioned the same way and only partitioned in batch // dimensions. if (lhs_batch_partitions == rhs_batch_partitions && - rhs_batch_partitions == num_partitions_ && + rhs_batch_partitions == num_partitions && lhs_sharding_transposed_to_match_rhs == rhs_sharding) { - TF_ASSIGN_OR_RETURN(auto dot, - create_sharded_dot(lhs.hlo(), rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { - dot->set_sharding(*lhs_sharding_transposed_to_match_output); - return PartitionedHlo(dot, hlo->shape(), MakePartitioningState()) - .Reshard(hlo->sharding()) - .hlo(); - }); - return Status::OK(); + TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(lhs.hlo(), rhs.hlo(), b)); + dot->set_sharding(*lhs_sharding_transposed_to_match_output); + return PartitionedHlo(dot, output_base_shape, lhs.state()) + .Reshard(output_sharding) + .hlo(); } // Try emit batch-partitioned einsum with one operand resharded. Returns - // whether the attempt succeeds. If may_reshard_with_allreduce is false, - // reshard must be done using all-to-all; otherwise this attempt fails. + // partitioned HLO or nullptr if the attempt fails. If + // may_reshard_with_allreduce is false, reshard must be done using + // all-to-all/collective-permute; otherwise this attempt fails. auto try_emit_output_batch_partitioned_einsum_with_reshard = - [&](bool may_reshard_with_allreduce) -> StatusOr { + [&](bool may_reshard_with_allreduce) -> StatusOr { // LHS and output are batch partitioned in the same way. - if (lhs_batch_partitions == num_partitions_ && - output_batch_partitions == num_partitions_ && - lhs_sharding_transposed_to_match_output == hlo->sharding()) { + if (lhs_batch_partitions == num_partitions && + output_batch_partitions == num_partitions && + lhs_sharding_transposed_to_match_output == output_sharding) { if (!may_reshard_with_allreduce && + !CanReshardWithCollectivePermute( + rhs.sharding(), *lhs_sharding_transposed_to_match_rhs) && !GetReshardAllToAllSourceTargetDims( rhs.sharding(), *lhs_sharding_transposed_to_match_rhs)) { - return false; + return nullptr; } auto resharded_rhs = rhs.Reshard(*lhs_sharding_transposed_to_match_rhs); TF_ASSIGN_OR_RETURN( - auto dot, create_sharded_dot(lhs.hlo(), resharded_rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { return dot; }); - return true; + auto dot, create_sharded_dot(lhs.hlo(), resharded_rhs.hlo(), b)); + return dot; } // RHS and output are batch partitioned in the same way. - if (rhs_batch_partitions == num_partitions_ && - output_batch_partitions == num_partitions_ && - rhs_sharding_transposed_to_match_output == hlo->sharding()) { + if (rhs_batch_partitions == num_partitions && + output_batch_partitions == num_partitions && + rhs_sharding_transposed_to_match_output == output_sharding) { if (!may_reshard_with_allreduce && + !CanReshardWithCollectivePermute( + lhs.sharding(), *rhs_sharding_transposed_to_match_lhs) && !GetReshardAllToAllSourceTargetDims( lhs.sharding(), *rhs_sharding_transposed_to_match_lhs)) { - return false; + return nullptr; } auto resharded_lhs = lhs.Reshard(*rhs_sharding_transposed_to_match_lhs); TF_ASSIGN_OR_RETURN( - auto dot, create_sharded_dot(resharded_lhs.hlo(), rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { return dot; }); - return true; + auto dot, create_sharded_dot(resharded_lhs.hlo(), rhs.hlo(), b)); + return dot; } - return false; + return nullptr; }; { // Try batch-parallel by resharding one operand, and not using all-reduce. TF_ASSIGN_OR_RETURN( - bool emitted, + HloInstruction * partitioned_dot, try_emit_output_batch_partitioned_einsum_with_reshard(false)); - if (emitted) { - return Status::OK(); + if (partitioned_dot) { + return partitioned_dot; } } // Try to emit windowed DotGeneral when one operand is partitioned in the same // way as the output along non-contracting dimensions, but the other operand // is tiled in other dimensions. - auto emit_windowed_dot_general = [&](int64 matching_operand, - int64 windowing_operand, - bool windowed_at_contracting_dims, - bool windowed_at_batch_dims) { + auto emit_windowed_dot_general = + [&](int64 matching_operand, int64 windowing_operand, + bool windowed_at_contracting_dims, + bool windowed_at_batch_dims) -> StatusOr { CHECK_EQ(matching_operand + windowing_operand, 1); CHECK(!windowed_at_batch_dims || !windowed_at_contracting_dims); auto unpadded_result_buffer_shape = - MakePartitionedShape(hlo->shape(), hlo->sharding()); + MakePartitionedShape(output_base_shape, output_sharding); auto padded_result_buffer_shape = unpadded_result_buffer_shape; // For windowing at batch/non-contracting dims, we produce the result one // partition at a time, so we need to pad the shape in case of uneven @@ -310,17 +280,17 @@ Status SpmdPartitioningVisitor::HandleDotHelper( if (windowed_at_contracting_dims) { auto& to_mask = windowing_operand == 0 ? lhs : rhs; to_mask = - to_mask.PadWithValue(b_.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(hlo->shape().element_type())))); + to_mask.PadWithValue(b->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(output_base_shape.element_type())))); } - auto result_buffer = CreateZero(padded_result_buffer_shape, &b_); - auto iteration = b_.AddInstruction( + auto result_buffer = CreateZero(padded_result_buffer_shape, b); + auto iteration = b->AddInstruction( HloInstruction::CreateConstant(LiteralUtil::CreateR0(0))); // Create a while loop that computes one window per iteration. During each // iteration, each partition sends its input window to its neighbor using // collective-permute for the next iteration. - SpmdBuilder body_b("windowed_dot_general_body", visiting_hlo_); + SpmdBuilder body_b("windowed_dot_general_body", original_hlo); auto param = body_b.AddInstruction(HloInstruction::CreateParameter( /*parameter_number=*/0, ShapeUtil::MakeTupleShape({lhs.hlo()->shape(), rhs.hlo()->shape(), @@ -335,11 +305,12 @@ Status SpmdPartitioningVisitor::HandleDotHelper( auto i = body_b.AddInstruction( HloInstruction::CreateGetTupleElement(iteration->shape(), param, 3)); - auto partition_id = collective_ops_creator_.create_partition_id(&body_b); + auto partition_id = + lhs.state().collective_ops_creator.create_partition_id(&body_b); auto data_partition_id = body_b.AddInstruction(HloInstruction::CreateBinary( i->shape(), HloOpcode::kAdd, i, partition_id)); auto partition_count = body_b.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR0(num_partitions_))); + LiteralUtil::CreateR0(num_partitions))); data_partition_id = body_b.AddInstruction(HloInstruction::CreateBinary( i->shape(), HloOpcode::kRemainder, data_partition_id, partition_count)); auto dot_lhs = l; @@ -350,7 +321,7 @@ Status SpmdPartitioningVisitor::HandleDotHelper( // operand as replicated, and resharding it to match the windowed operand. auto slice_operand = matching_operand == 0 ? l : r; slice_operand->set_sharding(HloSharding::Replicate()); - auto state = MakePartitioningState(); + auto state = lhs.state(); state.b = &body_b; state.partition_id = data_partition_id; auto slice = PartitionedHlo(slice_operand, slice_operand->shape(), state) @@ -392,26 +363,27 @@ Status SpmdPartitioningVisitor::HandleDotHelper( auto has_more = body_b.AddInstruction(HloInstruction::CreateCompare( ShapeUtil::MakeShape(PRED, {}), i, body_b.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR0(num_partitions_))), + LiteralUtil::CreateR0(num_partitions))), ComparisonDirection::kLt)); // Collective-permute for the next window. We don't need it for the last // iteration, so we use a conditional around the collective-permute. HloInstruction* conditional; { - SpmdBuilder cp_b("window_collective_permute", visiting_hlo_); + SpmdBuilder cp_b("window_collective_permute", original_hlo); { auto p = cp_b.AddInstruction(HloInstruction::CreateParameter( 0, windowing_operand == 0 ? l->shape() : r->shape(), "window")); - std::vector> sd_pairs(num_partitions_); - for (int64 source = 0; source < num_partitions_; ++source) { + std::vector> sd_pairs(num_partitions); + for (int64 source = 0; source < num_partitions; ++source) { // 0 -> n-1, 1 -> 0, 2 -> 1, ... sd_pairs[source] = {source, - (source - 1 + num_partitions_) % num_partitions_}; + (source - 1 + num_partitions) % num_partitions}; } - collective_ops_creator_.create_cross_partition_collective_permute( - &cp_b, p, sd_pairs, (*next_channel_id_)++); + lhs.state() + .collective_ops_creator.create_cross_partition_collective_permute( + &cp_b, p, sd_pairs, (*lhs.state().next_channel_id)++); } - SpmdBuilder ncp_b("last_iteration_noop", visiting_hlo_); + SpmdBuilder ncp_b("last_iteration_noop", original_hlo); { ncp_b.AddInstruction(HloInstruction::CreateParameter( 0, windowing_operand == 0 ? l->shape() : r->shape(), "window")); @@ -419,9 +391,9 @@ Status SpmdPartitioningVisitor::HandleDotHelper( conditional = body_b.AddInstruction(HloInstruction::CreateConditional( windowing_operand == 0 ? l->shape() : r->shape(), has_more, windowing_operand == 0 ? l : r, - module_->AddEmbeddedComputation(cp_b.Build()), + module->AddEmbeddedComputation(cp_b.Build()), windowing_operand == 0 ? l : r, - module_->AddEmbeddedComputation(ncp_b.Build()))); + module->AddEmbeddedComputation(ncp_b.Build()))); } if (windowing_operand == 0) { l = conditional; @@ -430,7 +402,7 @@ Status SpmdPartitioningVisitor::HandleDotHelper( } body_b.AddInstruction(HloInstruction::CreateTuple({l, r, o, i})); - SpmdBuilder cond_b("windowed_dot_general_cond", visiting_hlo_); + SpmdBuilder cond_b("windowed_dot_general_cond", original_hlo); auto cond_param = cond_b.AddInstruction(HloInstruction::CreateParameter( /*parameter_number=*/0, ShapeUtil::MakeTupleShape({lhs.hlo()->shape(), rhs.hlo()->shape(), @@ -441,56 +413,53 @@ Status SpmdPartitioningVisitor::HandleDotHelper( cond_b.AddInstruction(HloInstruction::CreateCompare( ShapeUtil::MakeShape(PRED, {}), cond_i, cond_b.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR0(num_partitions_))), + LiteralUtil::CreateR0(num_partitions))), ComparisonDirection::kLt)); - auto while_loop = b_.AddInstruction(HloInstruction::CreateWhile( - cond_param->shape(), module_->AddEmbeddedComputation(cond_b.Build()), - module_->AddEmbeddedComputation(body_b.Build()), - b_.AddInstruction(HloInstruction::CreateTuple( + auto while_loop = b->AddInstruction(HloInstruction::CreateWhile( + cond_param->shape(), module->AddEmbeddedComputation(cond_b.Build()), + module->AddEmbeddedComputation(body_b.Build()), + b->AddInstruction(HloInstruction::CreateTuple( {lhs.hlo(), rhs.hlo(), result_buffer, iteration})))); - windowed_dot_general_loops_.push_back({while_loop, windowing_operand, + windowed_dot_general_loops->push_back({while_loop, windowing_operand, windowed_at_contracting_dims, windowed_at_batch_dims}); - SetPartitionedHlo(hlo, [&] { - auto result = b_.AddInstruction(HloInstruction::CreateGetTupleElement( - result_buffer->shape(), while_loop, 2)); - if (!ShapeUtil::Compatible(padded_result_buffer_shape, - unpadded_result_buffer_shape)) { - result = b_.AddInstruction(HloInstruction::CreateSlice( - unpadded_result_buffer_shape, result, - std::vector(padded_result_buffer_shape.rank(), 0), - unpadded_result_buffer_shape.dimensions(), - std::vector(padded_result_buffer_shape.rank(), 1))); - } - return result; - }); - return Status::OK(); + auto result = b->AddInstruction(HloInstruction::CreateGetTupleElement( + result_buffer->shape(), while_loop, 2)); + if (!ShapeUtil::Compatible(padded_result_buffer_shape, + unpadded_result_buffer_shape)) { + result = b->AddInstruction(HloInstruction::CreateSlice( + unpadded_result_buffer_shape, result, + std::vector(padded_result_buffer_shape.rank(), 0), + unpadded_result_buffer_shape.dimensions(), + std::vector(padded_result_buffer_shape.rank(), 1))); + } + return result; }; - if (output_lhs_non_contracting_partitions == num_partitions_ && + if (output_lhs_non_contracting_partitions == num_partitions && output_sharding_transposed_to_match_lhs == lhs_sharding && - ShapeSizeInBytes(hlo->operand(1)->shape()) >= - options_.threshold_for_windowed_einsum_mib * 1024 * 1024) { - if (rhs_contracting_partitions == num_partitions_) { + ShapeSizeInBytes(rhs.base_shape()) >= + threshold_for_windowed_einsum_mib * 1024 * 1024) { + if (rhs_contracting_partitions == num_partitions) { return emit_windowed_dot_general(0, 1, true, false); } - if (rhs_non_contracting_partitions == num_partitions_) { + if (rhs_non_contracting_partitions == num_partitions) { return emit_windowed_dot_general(0, 1, false, false); } - if (rhs_batch_partitions == num_partitions_) { + if (rhs_batch_partitions == num_partitions) { return emit_windowed_dot_general(0, 1, false, true); } } - if (output_rhs_non_contracting_partitions == num_partitions_ && + if (output_rhs_non_contracting_partitions == num_partitions && output_sharding_transposed_to_match_rhs == rhs_sharding && - ShapeSizeInBytes(hlo->operand(0)->shape()) >= - options_.threshold_for_windowed_einsum_mib * 1024 * 1024) { - if (lhs_contracting_partitions == num_partitions_) { + ShapeSizeInBytes(lhs.base_shape()) >= + threshold_for_windowed_einsum_mib * 1024 * 1024) { + if (lhs_contracting_partitions == num_partitions) { return emit_windowed_dot_general(1, 0, true, false); } - if (lhs_non_contracting_partitions == num_partitions_) { + if (lhs_non_contracting_partitions == num_partitions) { return emit_windowed_dot_general(1, 0, false, false); } - if (lhs_batch_partitions == num_partitions_) { + if (lhs_batch_partitions == num_partitions) { return emit_windowed_dot_general(1, 0, false, true); } } @@ -498,18 +467,18 @@ Status SpmdPartitioningVisitor::HandleDotHelper( { // Try batch-parallel by resharding one operand, and allowing all-reduce. TF_ASSIGN_OR_RETURN( - bool emitted, + HloInstruction * partitioned_dot, try_emit_output_batch_partitioned_einsum_with_reshard(true)); - if (emitted) { - return Status::OK(); + if (partitioned_dot) { + return partitioned_dot; } } // LHS and RHS have the same partitioned contracting dimensions. if (lhs_contracting_partitions == rhs_contracting_partitions && - lhs_contracting_partitions == num_partitions_) { - auto zero = b_.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(hlo->shape().element_type()))); + lhs_contracting_partitions == num_partitions) { + auto zero = b->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(output_base_shape.element_type()))); // Pad both sides with zero, since NaN at one side cannot be masked by zero // on the other side. if (ShapeSizeInBytes(lhs.base_shape()) < @@ -522,100 +491,91 @@ Status SpmdPartitioningVisitor::HandleDotHelper( rhs = rhs.Reshard(*lhs_sharding_transposed_to_match_rhs).PadWithValue(zero); } - TF_ASSIGN_OR_RETURN(auto dot, - create_sharded_dot(lhs.hlo(), rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { - auto ar = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, dot, MakeBinaryAdd(hlo->shape().element_type(), module_), - NewChannel()); - ar->set_sharding(HloSharding::Replicate()); - return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()) - .Reshard(hlo->sharding()) - .hlo(); - }); - return Status::OK(); + TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(lhs.hlo(), rhs.hlo(), b)); + auto ar = + lhs.state().collective_ops_creator.create_cross_partition_all_reduce( + b, dot, MakeBinaryAdd(output_base_shape.element_type(), module), {}, + (*lhs.state().next_channel_id)++); + ar->set_sharding(HloSharding::Replicate()); + return PartitionedHlo(ar, output_base_shape, lhs.state()) + .Reshard(output_sharding) + .hlo(); } // LHS and output have the same partitioned non-contracting dimensions. - if (lhs_non_contracting_partitions == num_partitions_ && - output_lhs_non_contracting_partitions == num_partitions_ && - lhs_sharding_transposed_to_match_output == hlo->sharding()) { + if (lhs_non_contracting_partitions == num_partitions && + output_lhs_non_contracting_partitions == num_partitions && + lhs_sharding_transposed_to_match_output == output_sharding) { auto rhs_replicated = rhs.Reshard(HloSharding::Replicate()).hlo(); TF_ASSIGN_OR_RETURN(auto dot, - create_sharded_dot(lhs.hlo(), rhs_replicated, &b_)); - SetPartitionedHlo(hlo, [&] { return dot; }); - return Status::OK(); + create_sharded_dot(lhs.hlo(), rhs_replicated, b)); + return dot; } // RHS and output have the same partitioned non-contracting dimensions. - if (rhs_non_contracting_partitions == num_partitions_ && - output_rhs_non_contracting_partitions == num_partitions_ && - rhs_sharding_transposed_to_match_output == hlo->sharding()) { + if (rhs_non_contracting_partitions == num_partitions && + output_rhs_non_contracting_partitions == num_partitions && + rhs_sharding_transposed_to_match_output == output_sharding) { auto lhs_replicated = lhs.Reshard(HloSharding::Replicate()).hlo(); TF_ASSIGN_OR_RETURN(auto dot, - create_sharded_dot(lhs_replicated, rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { return dot; }); - return Status::OK(); + create_sharded_dot(lhs_replicated, rhs.hlo(), b)); + return dot; } // Output is batch partitioned. - if (output_batch_partitions == num_partitions_) { + if (output_batch_partitions == num_partitions) { auto resharded_lhs = lhs.Reshard(*output_sharding_transposed_to_match_lhs); auto resharded_rhs = rhs.Reshard(*output_sharding_transposed_to_match_rhs); TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(resharded_lhs.hlo(), - resharded_rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { return dot; }); - return Status::OK(); + resharded_rhs.hlo(), b)); + return dot; } // Output is partitioned along LHS non-contracting dimensions. - if (output_lhs_non_contracting_partitions == num_partitions_) { + if (output_lhs_non_contracting_partitions == num_partitions) { auto resharded_lhs = lhs.Reshard(*output_sharding_transposed_to_match_lhs); auto replicated_rhs = rhs.Reshard(HloSharding::Replicate()); - TF_ASSIGN_OR_RETURN( - auto dot, - create_sharded_dot(resharded_lhs.hlo(), replicated_rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { return dot; }); - return Status::OK(); + TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(resharded_lhs.hlo(), + replicated_rhs.hlo(), b)); + return dot; } // Output is partitioned along RHS non-contracting dimensions. - if (output_rhs_non_contracting_partitions == num_partitions_) { + if (output_rhs_non_contracting_partitions == num_partitions) { auto replicated_lhs = lhs.Reshard(HloSharding::Replicate()); auto resharded_rhs = rhs.Reshard(*output_sharding_transposed_to_match_rhs); TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(replicated_lhs.hlo(), - resharded_rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { return dot; }); - return Status::OK(); + resharded_rhs.hlo(), b)); + return dot; } // Returns true if it is beneficial to reshard the operand at `operand_idx` // across the contracting dimension. const auto should_partition_contracting_dim = [&](int64 operand_idx) { - if (!hlo->sharding().IsReplicated()) { + if (!output_sharding.IsReplicated()) { return false; } if (operand_idx == 0) { // If LHS and output are replicated, we compare the cost of all-gather // on RHS vs all-reduce on the output. - return (rhs_contracting_partitions == num_partitions_) && + return (rhs_contracting_partitions == num_partitions) && lhs.sharding().IsReplicated() && - ShapeUtil::ElementsIn(hlo->operand(1)->shape()) > - ShapeUtil::ElementsIn(hlo->shape()); + ShapeUtil::ElementsIn(rhs.base_shape()) > + ShapeUtil::ElementsIn(output_base_shape); } else { - return (lhs_contracting_partitions == num_partitions_) && + return (lhs_contracting_partitions == num_partitions) && rhs.sharding().IsReplicated() && - ShapeUtil::ElementsIn(hlo->operand(0)->shape()) > - ShapeUtil::ElementsIn(hlo->shape()); + ShapeUtil::ElementsIn(lhs.base_shape()) > + ShapeUtil::ElementsIn(output_base_shape); } }; // When the output is replicated and one of the operands is partitioned along // contracting dimension, align the other operand to be partitioned along // the contracting dimensions. - if (hlo->sharding().IsReplicated() && (should_partition_contracting_dim(0) || + if (output_sharding.IsReplicated() && (should_partition_contracting_dim(0) || should_partition_contracting_dim(1))) { - auto zero = b_.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(hlo->shape().element_type()))); + auto zero = b->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(output_base_shape.element_type()))); if (should_partition_contracting_dim(0)) { lhs = lhs.Reshard(*rhs_sharding_transposed_to_match_lhs).PadWithValue(zero); @@ -625,19 +585,361 @@ Status SpmdPartitioningVisitor::HandleDotHelper( rhs = rhs.Reshard(*lhs_sharding_transposed_to_match_rhs).PadWithValue(zero); } - TF_ASSIGN_OR_RETURN(auto dot, - create_sharded_dot(lhs.hlo(), rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { - auto ar = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, dot, MakeBinaryAdd(hlo->shape().element_type(), module_), - NewChannel()); - ar->set_sharding(HloSharding::Replicate()); - return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()).hlo(); - }); - return Status::OK(); + TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(lhs.hlo(), rhs.hlo(), b)); + return lhs.state().collective_ops_creator.create_cross_partition_all_reduce( + b, dot, MakeBinaryAdd(output_base_shape.element_type(), module), {}, + (*lhs.state().next_channel_id)++); + } + return nullptr; +} + +StatusOr PartitionDot( + PartitionedHlo lhs, PartitionedHlo rhs, const Shape& output_base_shape, + const HloSharding& output_sharding, + const DotGeneralDimsMapping& dims_mapping, int64 num_partitions, + const std::function( + HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot, + HloModule* module, HloInstruction* original_hlo, + int64 threshold_for_windowed_einsum_mib, SpmdBuilder* b, + std::vector* + windowed_dot_general_loops); + +StatusOr PartitionDotGroupOnBatch( + PartitionedHlo lhs, PartitionedHlo rhs, const Shape& output_base_shape, + const HloSharding& output_sharding, + const DotGeneralDimsMapping& dims_mapping, int64 num_partitions, + const std::function( + HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot, + HloModule* module, HloInstruction* original_hlo, + int64 threshold_for_windowed_einsum_mib, SpmdBuilder* b, + std::vector* + windowed_dot_general_loops) { + std::vector lhs_dims; + std::vector rhs_dims; + std::vector output_dims; + auto lhs_sharding_dims_adjusted_to_output = + lhs.sharding().tile_assignment().dimensions(); + auto rhs_sharding_dims_adjusted_to_output = + lhs.sharding().tile_assignment().dimensions(); + auto output_sharding_dims_adjusted_to_lhs = + output_sharding.tile_assignment().dimensions(); + bool lhs_rhs_dims_matching = true; + for (const auto& dim : dims_mapping.batch_dims) { + lhs_dims.push_back(dim.lhs); + rhs_dims.push_back(dim.rhs); + output_dims.push_back(dim.output); + if (lhs_sharding_dims_adjusted_to_output[dim.lhs] != + rhs_sharding_dims_adjusted_to_output[dim.rhs]) { + lhs_rhs_dims_matching = false; + } + lhs_sharding_dims_adjusted_to_output[dim.lhs] = + output_sharding.tile_assignment().dim(dim.output); + rhs_sharding_dims_adjusted_to_output[dim.rhs] = + output_sharding.tile_assignment().dim(dim.output); + output_sharding_dims_adjusted_to_lhs[dim.output] = + lhs.sharding().tile_assignment().dim(dim.lhs); + } + auto lhs_grouped = GroupShardingOnDims(lhs.sharding(), lhs_dims); + auto rhs_grouped = GroupShardingOnDims(rhs.sharding(), rhs_dims); + auto output_grouped = GroupShardingOnDims(output_sharding, output_dims); + if (lhs_rhs_dims_matching) { + if (ShapeUtil::ByteSizeOf(lhs.base_shape()) > + ShapeUtil::ByteSizeOf(rhs.base_shape())) { + rhs_grouped = AlignGroupsWith(std::move(rhs_grouped), lhs_grouped); + rhs = rhs.Reshard(UngroupSharding(rhs_grouped)); + } else { + lhs_grouped = AlignGroupsWith(std::move(lhs_grouped), rhs_grouped); + lhs = lhs.Reshard(UngroupSharding(lhs_grouped)); + } + auto reshaped_output_tiling = output_sharding.tile_assignment(); + reshaped_output_tiling.Reshape(output_sharding_dims_adjusted_to_lhs); + output_grouped = AlignGroupsWith( + GroupShardingOnDims(HloSharding::Tile(reshaped_output_tiling), + output_dims), + lhs_grouped); + } else { + auto reshaped_lhs_tiling = lhs.sharding().tile_assignment(); + reshaped_lhs_tiling.Reshape(lhs_sharding_dims_adjusted_to_output); + lhs_grouped = AlignGroupsWith( + GroupShardingOnDims(HloSharding::Tile(reshaped_lhs_tiling), lhs_dims), + output_grouped); + lhs = lhs.Reshard(UngroupSharding(lhs_grouped)); + auto reshaped_rhs_tiling = rhs.sharding().tile_assignment(); + reshaped_rhs_tiling.Reshape(rhs_sharding_dims_adjusted_to_output); + rhs_grouped = AlignGroupsWith( + GroupShardingOnDims(HloSharding::Tile(reshaped_rhs_tiling), rhs_dims), + output_grouped); + rhs = rhs.Reshard(UngroupSharding(rhs_grouped)); + } + auto per_group_partitioner_state = CreatePerGroupPartitioningState( + lhs.state(), lhs_grouped.device_groups, b); + lhs.hlo()->set_sharding(lhs_grouped.sharding); + rhs.hlo()->set_sharding(rhs_grouped.sharding); + CHECK(lhs.hlo() != rhs.hlo() || lhs_grouped.sharding == rhs_grouped.sharding); + TF_ASSIGN_OR_RETURN( + auto dot, + PartitionDot( + PartitionedHlo(lhs.hlo(), + GetPerGroupBaseShape(lhs_grouped, lhs.base_shape()), + per_group_partitioner_state), + PartitionedHlo(rhs.hlo(), + GetPerGroupBaseShape(rhs_grouped, rhs.base_shape()), + per_group_partitioner_state), + GetPerGroupBaseShape(output_grouped, output_base_shape), + output_grouped.sharding, dims_mapping, + num_partitions / lhs_grouped.device_groups.size(), create_sharded_dot, + module, original_hlo, threshold_for_windowed_einsum_mib, b, + windowed_dot_general_loops)); + // Reset the LHS sharding to the ungrouped one. + lhs.hlo()->set_sharding(UngroupSharding(lhs_grouped)); + rhs.hlo()->set_sharding(UngroupSharding(rhs_grouped)); + dot->set_sharding(UngroupSharding(output_grouped)); + return PartitionedHlo(dot, output_base_shape, lhs.state()) + .Reshard(output_sharding) + .hlo(); +} + +StatusOr PartitionDotGroupOnNonContracting( + bool lhs_matching, PartitionedHlo matching, PartitionedHlo other, + int64 matching_contracting_partitions, int64 other_contracting_partitions, + int64 matching_non_contracting_partitions, + int64 other_non_contracting_partitions, + int64 output_other_non_contracting_partitions, + const Shape& output_base_shape, const HloSharding& output_sharding, + const DotGeneralDimsMapping& dims_mapping, int64 num_partitions, + const std::function( + HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot, + HloModule* module, HloInstruction* original_hlo, + int64 threshold_for_windowed_einsum_mib, SpmdBuilder* b, + std::vector* + windowed_dot_general_loops) { + const bool may_replicate_other_contracting_dims = + (other_contracting_partitions == matching_non_contracting_partitions && + other_non_contracting_partitions == + output_other_non_contracting_partitions); + const bool may_replicate_other_non_contracting_dims = + matching_non_contracting_partitions == other_non_contracting_partitions && + matching_contracting_partitions == other_contracting_partitions; + std::vector other_group_dims; + if (may_replicate_other_contracting_dims && + (!may_replicate_other_non_contracting_dims || + ShapeUtil::ByteSizeOf(other.base_shape()) <= + ShapeUtil::ByteSizeOf(output_base_shape))) { + for (const auto& dim : dims_mapping.contracting_dims) { + other_group_dims.push_back(lhs_matching ? dim.rhs : dim.lhs); + } + } else if (may_replicate_other_non_contracting_dims) { + for (const auto& dim : lhs_matching + ? dims_mapping.rhs_non_contracting_dims + : dims_mapping.lhs_non_contracting_dims) { + other_group_dims.push_back(lhs_matching ? dim.rhs : dim.lhs); + } + } else { + return nullptr; + } + auto matching_sharding_dims = + matching.sharding().tile_assignment().dimensions(); + std::vector matching_dims; + std::vector output_dims; + // Make sure the partitioning on matching's non-contracting dimensions + // defines the same device groups for both matching and output. + for (const auto& dim : lhs_matching ? dims_mapping.lhs_non_contracting_dims + : dims_mapping.rhs_non_contracting_dims) { + int64 md = lhs_matching ? dim.lhs : dim.rhs; + matching_sharding_dims[md] = + output_sharding.tile_assignment().dim(dim.output); + matching_dims.push_back(md); + output_dims.push_back(dim.output); + } + auto output_grouped = GroupShardingOnDims(output_sharding, output_dims); + auto reshaped_matching_tiling = matching.sharding().tile_assignment(); + reshaped_matching_tiling.Reshape(matching_sharding_dims); + auto matching_grouped = AlignGroupsWith( + GroupShardingOnDims(HloSharding::Tile(reshaped_matching_tiling), + matching_dims), + output_grouped); + matching = matching.Reshard(UngroupSharding(matching_grouped)); + + auto other_grouped = + AlignGroupsWith(GroupShardingOnDims(other.sharding(), other_group_dims), + output_grouped, /*ignore_group_order=*/true); + other = other.Reshard(UngroupSharding(other_grouped)); + auto partially_replicated_other = + other.ReplicatePartial(other_grouped.group_dims); + auto per_group_partitioner_state = CreatePerGroupPartitioningState( + matching.state(), matching_grouped.device_groups, b); + matching.hlo()->set_sharding(matching_grouped.sharding); + partially_replicated_other->set_sharding(other_grouped.sharding); + auto matching_p = PartitionedHlo( + matching.hlo(), + GetPerGroupBaseShape(matching_grouped, matching.base_shape()), + per_group_partitioner_state); + auto other_p = PartitionedHlo(partially_replicated_other, other.base_shape(), + per_group_partitioner_state); + TF_ASSIGN_OR_RETURN( + auto dot, + PartitionDot(lhs_matching ? matching_p : other_p, + lhs_matching ? other_p : matching_p, + GetPerGroupBaseShape(output_grouped, output_base_shape), + output_grouped.sharding, dims_mapping, + num_partitions / matching_grouped.device_groups.size(), + create_sharded_dot, module, original_hlo, + threshold_for_windowed_einsum_mib, b, + windowed_dot_general_loops)); + // Reset matching's sharding to the ungrouped one. + matching.hlo()->set_sharding(UngroupSharding(matching_grouped)); + return dot; +} + +// Recursive partitioning function. If there are partial dimensions matching in +// the operands and output, group the devices and recursively partition the +// in-group dot. +StatusOr PartitionDot( + PartitionedHlo lhs, PartitionedHlo rhs, const Shape& output_base_shape, + const HloSharding& output_sharding, + const DotGeneralDimsMapping& dims_mapping, int64 num_partitions, + const std::function( + HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot, + HloModule* module, HloInstruction* original_hlo, + int64 threshold_for_windowed_einsum_mib, SpmdBuilder* b, + std::vector* + windowed_dot_general_loops) { + // lhs_rhs_or_output: 0 lhs, 1 rhs, 2 output. + auto get_partitions_for_dims = + [&](const HloSharding& sharding, + absl::Span dims, + int lhs_rhs_or_output) { + int64 partitions = 1; + if (sharding.IsTileMaximal()) { + return partitions; + } + for (const auto& dim : dims) { + if (lhs_rhs_or_output == 0) { + partitions *= sharding.tile_assignment().dim(dim.lhs); + } else if (lhs_rhs_or_output == 1) { + partitions *= sharding.tile_assignment().dim(dim.rhs); + } else { + CHECK_EQ(lhs_rhs_or_output, 2); + partitions *= sharding.tile_assignment().dim(dim.output); + } + } + return partitions; + }; + const int64 lhs_batch_partitions = + get_partitions_for_dims(lhs.sharding(), dims_mapping.batch_dims, 0); + const int64 rhs_batch_partitions = + get_partitions_for_dims(rhs.sharding(), dims_mapping.batch_dims, 1); + const int64 output_batch_partitions = + get_partitions_for_dims(output_sharding, dims_mapping.batch_dims, 2); + const int64 lhs_contracting_partitions = + get_partitions_for_dims(lhs.sharding(), dims_mapping.contracting_dims, 0); + const int64 rhs_contracting_partitions = + get_partitions_for_dims(rhs.sharding(), dims_mapping.contracting_dims, 1); + const int64 lhs_non_contracting_partitions = get_partitions_for_dims( + lhs.sharding(), dims_mapping.lhs_non_contracting_dims, 0); + const int64 rhs_non_contracting_partitions = get_partitions_for_dims( + rhs.sharding(), dims_mapping.rhs_non_contracting_dims, 1); + const int64 output_lhs_non_contracting_partitions = get_partitions_for_dims( + output_sharding, dims_mapping.lhs_non_contracting_dims, 2); + const int64 output_rhs_non_contracting_partitions = get_partitions_for_dims( + output_sharding, dims_mapping.rhs_non_contracting_dims, 2); + TF_ASSIGN_OR_RETURN( + auto try_partitioned_dot, + PartitionBaseCase( + lhs, rhs, output_base_shape, output_sharding, dims_mapping, + num_partitions, create_sharded_dot, module, original_hlo, + lhs_batch_partitions, rhs_batch_partitions, output_batch_partitions, + lhs_contracting_partitions, rhs_contracting_partitions, + lhs_non_contracting_partitions, rhs_non_contracting_partitions, + output_lhs_non_contracting_partitions, + output_rhs_non_contracting_partitions, + threshold_for_windowed_einsum_mib, b, windowed_dot_general_loops)); + if (try_partitioned_dot) { + return try_partitioned_dot; } - return DefaultAction(hlo); + // Recursively partition on different types of dimensions. + // + // Case 1: Group partitions by batch. + if (lhs_batch_partitions == rhs_batch_partitions && + lhs_batch_partitions == output_batch_partitions && + lhs_batch_partitions > 1) { + TF_ASSIGN_OR_RETURN( + auto dot, + PartitionDotGroupOnBatch( + lhs, rhs, output_base_shape, output_sharding, dims_mapping, + num_partitions, create_sharded_dot, module, original_hlo, + threshold_for_windowed_einsum_mib, b, windowed_dot_general_loops)); + if (dot) { + return dot; + } + } + + // Case 2: Group partitions by non-contracting dimensions. + const bool may_group_on_lhs_non_contracting = + lhs_non_contracting_partitions == output_lhs_non_contracting_partitions && + lhs_non_contracting_partitions > 1; + const bool may_group_on_rhs_non_contracting = + rhs_non_contracting_partitions == output_rhs_non_contracting_partitions && + rhs_non_contracting_partitions > 1; + if (may_group_on_lhs_non_contracting || may_group_on_rhs_non_contracting) { + // If both match output non-contracting dimensions, choose the one which + // will result in smaller replication of the other operand. + const bool lhs_matching = + may_group_on_lhs_non_contracting && + (!may_group_on_rhs_non_contracting || + lhs_non_contracting_partitions * + ShapeUtil::ByteSizeOf(rhs.hlo()->shape()) <= + rhs_non_contracting_partitions * + ShapeUtil::ByteSizeOf(lhs.hlo()->shape())); + + TF_ASSIGN_OR_RETURN( + auto dot, + PartitionDotGroupOnNonContracting( + lhs_matching, lhs_matching ? lhs : rhs, lhs_matching ? rhs : lhs, + lhs_matching ? lhs_contracting_partitions + : rhs_contracting_partitions, + lhs_matching ? rhs_contracting_partitions + : lhs_contracting_partitions, + lhs_matching ? lhs_non_contracting_partitions + : rhs_non_contracting_partitions, + lhs_matching ? rhs_non_contracting_partitions + : lhs_non_contracting_partitions, + lhs_matching ? output_rhs_non_contracting_partitions + : output_lhs_non_contracting_partitions, + output_base_shape, output_sharding, dims_mapping, num_partitions, + create_sharded_dot, module, original_hlo, + threshold_for_windowed_einsum_mib, b, windowed_dot_general_loops)); + if (dot) { + return dot; + } + } + + // Default action. + TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(lhs.Replicate().hlo(), + rhs.Replicate().hlo(), b)); + dot->set_sharding(HloSharding::Replicate()); + return PartitionedHlo(dot, output_base_shape, lhs.state()) + .Reshard(output_sharding) + .hlo(); +} + +} // namespace + +Status SpmdPartitioningVisitor::HandleDotHelper( + HloInstruction* hlo, const DotGeneralDimsMapping& dims_mapping, + const std::function( + HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot) { + auto& lhs = GetPartitionedHlo(hlo->operand(0)); + auto& rhs = GetPartitionedHlo(hlo->operand(1)); + TF_ASSIGN_OR_RETURN( + auto partitioned_dot, + PartitionDot(lhs, rhs, hlo->shape(), hlo->sharding(), dims_mapping, + num_partitions_, create_sharded_dot, module_, hlo, + options_.threshold_for_windowed_einsum_mib, &b_, + &windowed_dot_general_loops_)); + SetPartitionedHlo(hlo, [&] { return partitioned_dot; }); + return Status::OK(); } namespace { @@ -780,6 +1082,7 @@ Status SinkInputNodesIntoWindowedDotGeneralLoopOnContractingDimensions( [](const HloInstruction* a, const HloInstruction* b) { return a->unique_id() < b->unique_id(); }); + worklist.reserve(nullaries_to_sink.size()); for (auto inst : nullaries_to_sink) { worklist.push_back(inst); } diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc index bac5c812814..7aaa3e32b2a 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc @@ -165,16 +165,6 @@ template namespace { -// Returns the replica group configuration where each replica belongs to its own -// group. -std::vector CreateReplicaGroups(int64 num_replicas) { - std::vector groups(num_replicas); - for (int64 i = 0; i < num_replicas; ++i) { - groups[i].add_replica_ids(i); - } - return groups; -} - // Clears all sharding attributes from instructions in the module. This must be // called only after all SPMD transformation is complete. Status ClearShardingAttributes(HloModule* module) { @@ -195,6 +185,28 @@ Status ClearShardingAttributes(HloModule* module) { return Status::OK(); } +std::vector> GetPartitionGroupsForReplication( + const HloSharding& sharding, absl::Span replication_dims) { + int64 group_size = 1; + for (int64 i : replication_dims) { + group_size *= sharding.tile_assignment().dim(i); + } + std::vector> partition_groups( + sharding.tile_assignment().num_elements() / group_size); + sharding.tile_assignment().Each( + [&](absl::Span indices, int64 partition) { + int64 group_id = 0; + for (int64 i = 0; i < indices.size(); ++i) { + if (!absl::c_linear_search(replication_dims, i)) { + group_id *= sharding.tile_assignment().dim(i); + group_id += indices[i]; + } + } + partition_groups[group_id].push_back(partition); + }); + return partition_groups; +} + } // namespace HloInstruction* SpmdBuilder::AddInstruction( @@ -664,42 +676,57 @@ PartitionedHlo PartitionedHlo::Replicate() { } // 'Tiled' to 'Replicated'. + std::vector all_dims(shape.rank()); + std::iota(all_dims.begin(), all_dims.end(), 0); + HloInstruction* result = ReplicatePartial(all_dims); + result->set_sharding(HloSharding::Replicate()); + return update_cache(PartitionedHlo(result, base_shape_, state_)); +} + +HloInstruction* PartitionedHlo::ReplicatePartial(absl::Span dims) { + CHECK(!sharding().IsTileMaximal()); + const Shape& shard_shape = hlo()->shape(); + Shape target_shape = shard_shape; + Shape padded_target_shape = shard_shape; + for (int64 i : dims) { + padded_target_shape.set_dimensions( + i, shard_shape.dimensions(i) * sharding().tile_assignment().dim(i)); + target_shape.set_dimensions(i, base_shape().dimensions(i)); + } + HloInstruction* result = nullptr; if (state_.collective_ops_creator.create_cross_partition_all_gather) { - result = state_.partitioner->AllGatherShards(state_.b, hlo_, sharding, - NewChannel()); - } - Shape padded_base_shape = shape; - for (int64 i = 0; i < padded_base_shape.rank(); ++i) { - padded_base_shape.set_dimensions( - i, shape.dimensions(i) * sharding.tile_assignment().dim(i)); + result = state_.partitioner->AllGatherShards(state_.b, hlo_, sharding(), + NewChannel(), dims, + state_.collective_ops_creator); } if (result == nullptr) { auto zero = state_.b->AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(shape.element_type()))); + LiteralUtil::Zero(shard_shape.element_type()))); auto zero_bcast = state_.b->AddInstruction( - HloInstruction::CreateBroadcast(padded_base_shape, zero, {})); + HloInstruction::CreateBroadcast(padded_target_shape, zero, {})); + auto offsets = MakePartitionOffsets(padded_target_shape, sharding(), + state_.partition_id, state_.b, dims); auto dus = state_.b->AddInstruction(HloInstruction::CreateDynamicUpdateSlice( - padded_base_shape, zero_bcast, hlo_, - MakePartitionOffsets(padded_base_shape, sharding, - state_.partition_id, state_.b))); + padded_target_shape, zero_bcast, hlo_, offsets)); HloComputation* reduction = - MakeBinaryAdd(shape.element_type(), state_.module); + MakeBinaryAdd(shard_shape.element_type(), state_.module); auto all_reduce = state_.collective_ops_creator.create_cross_partition_all_reduce( - state_.b, dus, reduction, NewChannel()); + state_.b, dus, reduction, + GetPartitionGroupsForReplication(sharding(), dims), NewChannel()); result = all_reduce; } - if (!ShapeUtil::Compatible(base_shape_, padded_base_shape)) { - std::vector start_indices(shape.rank(), 0); - std::vector strides(shape.rank(), 1); - result = state_.b->AddInstruction(HloInstruction::CreateSlice( - base_shape_, result, start_indices, base_shape_.dimensions(), strides)); + if (!ShapeUtil::Compatible(target_shape, padded_target_shape)) { + std::vector start_indices(target_shape.rank(), 0); + std::vector strides(target_shape.rank(), 1); + result = state_.b->AddInstruction( + HloInstruction::CreateSlice(target_shape, result, start_indices, + base_shape_.dimensions(), strides)); } - result->set_sharding(HloSharding::Replicate()); - return update_cache(PartitionedHlo(result, base_shape_, state_)); + return result; } PartitionedHlo PartitionedHlo::Broadcast() const { @@ -728,7 +755,7 @@ PartitionedHlo PartitionedHlo::Broadcast() const { MakeBinaryAdd(shape.element_type(), state_.module); auto result = state_.collective_ops_creator.create_cross_partition_all_reduce( - state_.b, operand, reduction, NewChannel()); + state_.b, operand, reduction, {}, NewChannel()); result->set_sharding(HloSharding::Replicate()); return PartitionedHlo(result, base_shape_, state_); } @@ -796,7 +823,7 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll( auto padded_hlo = PadToShape(hlo_, padded_shape, state_.b); // The order of ids in the group must follow the temp_target sharding. - std::vector groups( + std::vector> groups( temp_target.tile_assignment().num_elements() / group_size); temp_target.tile_assignment().Each( [&](absl::Span indices, int64 device) { @@ -810,7 +837,7 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll( group_id += indices[dim]; } } - groups[group_id].add_replica_ids(device); + groups[group_id].push_back(device); }); HloInstruction* result = nullptr; @@ -1027,7 +1054,7 @@ Status SpmdPartitioningVisitor::HandleConcatenate(HloInstruction* hlo) { offset += operand->shape().dimensions(dimension); } auto all_reduce = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, temp_output, MakeBinaryAdd(hlo->shape().element_type(), module_), + &b_, temp_output, MakeBinaryAdd(hlo->shape().element_type(), module_), {}, NewChannel()); SetPartitionedHlo(hlo, [&] { auto start_indices = @@ -2153,7 +2180,7 @@ Status SpmdPartitioningVisitor::HandleGather(HloInstruction* hlo) { // Combine from different partitions. auto ar = collective_ops_creator_.create_cross_partition_all_reduce( &b_, filtered, - MakeBinaryAdd(filtered->shape().element_type(), module_), + MakeBinaryAdd(filtered->shape().element_type(), module_), {}, NewChannel()); ar->set_sharding(HloSharding::Replicate()); SetPartitionedHlo(hlo, [&]() { @@ -2449,7 +2476,7 @@ Status SpmdPartitioningVisitor::HandleReduce(HloInstruction* hlo) { if (reduce_sharded_dimension) { CHECK(local_reduce->shape().IsArray()); reduce = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, local_reduce, hlo->to_apply(), NewChannel()); + &b_, local_reduce, hlo->to_apply(), {}, NewChannel()); reduce->set_sharding(HloSharding::Replicate()); } else { reduce = local_reduce; @@ -2917,13 +2944,36 @@ SPMDCollectiveOpsCreator GetDefaultCollectiveOpsCreator(int64 num_partitions, [](SpmdBuilder* b) { return b->AddInstruction(HloInstruction::CreatePartitionId()); }, - [num_replicas](SpmdBuilder* b, HloInstruction* operand, - HloComputation* reduction, int64 channel_id) { + [num_replicas, num_partitions]( + SpmdBuilder* b, HloInstruction* operand, HloComputation* reduction, + const std::vector>& partition_subgroups, + int64 channel_id) { + if (partition_subgroups.size() <= 1) { + std::vector groups(num_replicas); + // TODO(yuanzx): Unify subgroup definition with AllToAll. + for (int64 i = 0; i < num_replicas; ++i) { + groups[i].add_replica_ids(i); + } + return b->AddInstruction(HloInstruction::CreateAllReduce( + operand->shape(), {operand}, reduction, groups, + /*constrain_layout=*/false, channel_id, + /*use_global_device_ids=*/false)); + } + + std::vector device_groups; + device_groups.reserve(partition_subgroups.size() * num_replicas); + for (int64 i = 0; i < num_replicas; ++i) { + for (const auto& pgroup : partition_subgroups) { + device_groups.emplace_back(); + for (int64 pid : pgroup) { + device_groups.back().add_replica_ids(i * num_partitions + pid); + } + } + } return b->AddInstruction(HloInstruction::CreateAllReduce( - operand->shape(), {operand}, reduction, - CreateReplicaGroups(num_replicas), + operand->shape(), {operand}, reduction, device_groups, /*constrain_layout=*/false, channel_id, - /*use_global_device_ids=*/false)); + /*use_global_device_ids=*/true)); }, [](SpmdBuilder* b, HloInstruction* operand, std::vector>& src_dst_pairs, @@ -2932,14 +2982,20 @@ SPMDCollectiveOpsCreator GetDefaultCollectiveOpsCreator(int64 num_partitions, operand->shape(), operand, src_dst_pairs, channel_id)); }, [](SpmdBuilder* b, absl::Span operands, - const std::vector& replica_groups, int64 channel_id, - absl::optional split_dimension) { + const std::vector>& partition_subgroups, + int64 channel_id, absl::optional split_dimension) { std::vector shapes(operands.size(), operands[0]->shape()); const Shape output_shape = (shapes.size() == 1) ? shapes[0] : ShapeUtil::MakeTupleShape(shapes); + std::vector groups(partition_subgroups.size()); + for (int64 i = 0; i < groups.size(); ++i) { + for (int64 id : partition_subgroups[i]) { + groups[i].add_replica_ids(id); + } + } return b->AddInstruction(HloInstruction::CreateAllToAll( - output_shape, operands, replica_groups, + output_shape, operands, groups, /*constrain_layout=*/false, channel_id, split_dimension)); }, [num_replicas, num_partitions]( @@ -2970,10 +3026,10 @@ SpmdPartitioner::SpmdPartitioner(int64 num_partitions, int64 num_replicas, num_partitions, num_replicas, std::move(options), GetDefaultCollectiveOpsCreator(num_partitions, num_replicas)) {} -HloInstruction* SpmdPartitioner::AllGatherShards(SpmdBuilder* b, - HloInstruction* operand, - const HloSharding& sharding, - int64 channel_id) { +HloInstruction* SpmdPartitioner::AllGatherShards( + SpmdBuilder* b, HloInstruction* operand, const HloSharding& sharding, + int64 channel_id, absl::Span selected_dims, + const SPMDCollectiveOpsCreator& collectives_creator) { CHECK(!sharding.IsTileMaximal()); // Add one leading dimension to gather all partitions. std::vector shape; @@ -2983,18 +3039,17 @@ HloInstruction* SpmdPartitioner::AllGatherShards(SpmdBuilder* b, } auto reshape = b->AddInstruction(HloInstruction::CreateReshape( ShapeUtil::MakeShape(operand->shape().element_type(), shape), operand)); - std::vector> partition_subgroups(1); - for (int64 pid : sharding.tile_assignment()) { - partition_subgroups[0].push_back(pid); - } - shape[0] = sharding.tile_assignment().num_elements(); - auto result = collective_ops_creator_.create_cross_partition_all_gather( + auto partition_subgroups = + GetPartitionGroupsForReplication(sharding, selected_dims); + shape[0] = partition_subgroups[0].size(); + auto result = collectives_creator.create_cross_partition_all_gather( b, reshape, ShapeUtil::MakeShape(operand->shape().element_type(), shape), partition_subgroups, channel_id, /*all_gather_dimension=*/0); // If n > 1 dimensions are partitioned, split the leading dimension to n. std::vector tiled_dims; for (int64 i = 0; i < sharding.tile_assignment().num_dimensions(); ++i) { - if (sharding.tile_assignment().dim(i) > 1) { + if (sharding.tile_assignment().dim(i) > 1 && + absl::c_linear_search(selected_dims, i)) { tiled_dims.push_back(i); } } @@ -3016,7 +3071,8 @@ HloInstruction* SpmdPartitioner::AllGatherShards(SpmdBuilder* b, std::vector xpose_permutation(result->shape().rank()); int64 split_dims_added = 0; for (int64 i = 0; i < xpose_permutation.size(); ++i) { - if (sharding.tile_assignment().dim(i - split_dims_added) == 1) { + if (sharding.tile_assignment().dim(i - split_dims_added) == 1 || + !absl::c_linear_search(selected_dims, i - split_dims_added)) { xpose_permutation[i] = i + tiled_dims.size() - split_dims_added; } else { xpose_permutation[i] = split_dims_added; diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h index 606a7ae5f14..1cb2d551146 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h @@ -82,8 +82,10 @@ struct SPMDCollectiveOpsCreator { std::function create_partition_id; // Function used to create a cross-partition all-reduce HLO. - std::function + std::function>& partition_subgroups, + int64 channel_id)> create_cross_partition_all_reduce; // Function used to create a cross-partition collective-permute HLO. @@ -96,8 +98,8 @@ struct SPMDCollectiveOpsCreator { // Function used to create a cross-partition all-to-all HLO. std::function operands, - const std::vector& replica_groups, int64 channel_id, - absl::optional split_dimension)> + const std::vector>& partition_subgroups, + int64 channel_id, absl::optional split_dimension)> create_cross_partition_all_to_all; // Function used to create a cross-partition all-gather HLO. This is optional: @@ -169,10 +171,13 @@ class SpmdPartitioner : public HloModulePass { // The default uses a single all-gather even if there are multiple sharded // dimensions, and adds potential reshapes and transposes to achieve that. // If it returns false, the partitioner will fall back to all-reduce. - virtual HloInstruction* AllGatherShards(SpmdBuilder* b, - HloInstruction* operand, - const HloSharding& sharding, - int64 channel_id); + // `selected_dims` specifies the dimensions along which the all-gather happens + // in the tiled sharding, which allows potentially creating a subgroup + // all-gather. + virtual HloInstruction* AllGatherShards( + SpmdBuilder* b, HloInstruction* operand, const HloSharding& sharding, + int64 channel_id, absl::Span selected_dims, + const SPMDCollectiveOpsCreator& collectives_creator); protected: virtual std::unique_ptr CreateVisitor( @@ -215,7 +220,11 @@ class PartitionedHlo { std::tuple> window_reshard_cache; }; + // Use std::unordered_map for pointer stability. std::unordered_map per_hlo_cache; + // Caches for nested partitioning of grouped sharding. Each string key + // represents a unique way of grouping devices. + std::unordered_map groupd_caches; }; struct PartitioningState { SpmdBuilder* b; @@ -270,15 +279,18 @@ class PartitionedHlo { const PartitioningState& state() const { return state_; } + // Helper function to replicate the data on all devices. Could only modify + // the reshard cache. + PartitionedHlo Replicate(); + + // Helper function to replicate the data for partitions along the given dims. + HloInstruction* ReplicatePartial(absl::Span dims); + private: // Same as Reshard except that it does not explicitly modify the reshard // cache, although it would indirectly modify by calling Replicate(). PartitionedHlo ReshardNoCache(const HloSharding& target); - // Helper function to replicate the data on all devices. Could only modify - // the reshard cache. - PartitionedHlo Replicate(); - // Helper function to broadcast data from a single device to all devices. PartitionedHlo Broadcast() const; @@ -417,6 +429,16 @@ class SpmdPartitioningVisitor : public DfsHloVisitorWithDefault { StatusOr DoPartition(HloComputation* computation, const HloSharding& root_sharding); + // Information about a loop created for windowed dot-general. Used when + // DoCodeMotionForWindowedDotGeneralLoops() executes after the visitor + // finishes traversing the graph. + struct WindowedDotGeneralLoop { + HloInstruction* while_loop; + int64 windowed_operand; + bool windowed_in_contracting_dims; + bool windowed_in_batch_dims; + }; + private: Status Preprocess(HloInstruction* hlo) override; Status Postprocess(HloInstruction* hlo) override; @@ -445,15 +467,6 @@ class SpmdPartitioningVisitor : public DfsHloVisitorWithDefault { // partitioned instruction. ConstHloInstructionMap partitioned_instructions_; - // Information about a loop created for windowed dot-general. Used when - // DoCodeMotionForWindowedDotGeneralLoops() executes after the visitor - // finishes traversing the graph. - struct WindowedDotGeneralLoop { - HloInstruction* while_loop; - int64 windowed_operand; - bool windowed_in_contracting_dims; - bool windowed_in_batch_dims; - }; std::vector windowed_dot_general_loops_; HloInstruction* visiting_hlo_; diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc index 1045d1187b8..5f3fd8d53e7 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc @@ -2218,7 +2218,7 @@ ENTRY entry { TF_ASSERT_OK_AND_ASSIGN(auto module, PartitionComputation(hlo_string, /*num_devices=*/2)); - std::cout << module->ToString(); + VLOG(1) << module->ToString(); auto sort = FindInstruction(module.get(), "sort"); EXPECT_EQ(sort->operand(0)->shape().dimensions(1), 209664); EXPECT_EQ(sort->operand(1)->shape().dimensions(1), 209664); @@ -2294,7 +2294,7 @@ ENTRY entry TF_ASSERT_OK_AND_ASSIGN(auto module, PartitionComputation(hlo_string, /*num_devices=*/2)); - std::cout << module->ToString(); + VLOG(1) << module->ToString(); auto sort = FindInstruction(module.get(), "sort"); EXPECT_EQ(sort->operand(0)->shape().dimensions(1), 209664); EXPECT_EQ(sort->operand(1)->shape().dimensions(1), 209664); @@ -3842,6 +3842,154 @@ ENTRY entry { EXPECT_THAT(root, op::Copy(op::CollectivePermute(reshape2))); } +TEST_F(SpmdPartitioningTest, Dot2DPartitionedNonContractingAndContracting0) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %lhs = f32[48,12] parameter(0), sharding={devices=[2,2]0,1,2,3} + %rhs = f32[32,12] parameter(1), sharding={devices=[2,2]0,1,2,3} + ROOT %dot = f32[48,32] dot(%lhs, %rhs), + lhs_batch_dims={}, rhs_batch_dims={}, + lhs_contracting_dims={1}, rhs_contracting_dims={1}, + sharding={devices=[2,2]0,1,2,3} +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/4)); + VLOG(1) << module->ToString(); + + auto lhs = AllOf(op::Shape("f32[24,6]"), op::Parameter(0)); + auto partial_replicated_lhs = + AllOf(op::Shape("f32[24,12]"), + op::AllReduce(op::DynamicUpdateSlice(_, lhs, _, _))); + auto rhs = AllOf(op::Shape("f32[16,6]"), op::Parameter(1)); + auto partial_replicated_rhs = + AllOf(op::Shape("f32[16,12]"), op::AllReduce(op::DynamicUpdateSlice( + _, op::CollectivePermute(rhs), _, _))); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, + AllOf(op::Dot(partial_replicated_lhs, partial_replicated_rhs), + op::Shape("f32[24,16]"))); +} + +TEST_F(SpmdPartitioningTest, Dot2DPartitionedNonContractingAndContracting1) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %lhs = f32[48,100] parameter(0), sharding={devices=[2,2]0,1,2,3} + %rhs = f32[32,100] parameter(1), sharding={devices=[2,2]0,1,2,3} + ROOT %dot = f32[48,32] dot(%lhs, %rhs), + lhs_batch_dims={}, rhs_batch_dims={}, + lhs_contracting_dims={1}, rhs_contracting_dims={1}, + sharding={devices=[2,2]0,1,2,3} +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/4)); + VLOG(1) << module->ToString(); + + auto lhs = AllOf(op::Shape("f32[24,50]"), op::Parameter(0)); + auto rhs = AllOf(op::Shape("f32[16,50]"), op::Parameter(1)); + auto partial_replicated_rhs = + AllOf(op::Shape("f32[32,50]"), + op::AllReduce(op::DynamicUpdateSlice(_, rhs, _, _))); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT( + root, AllOf(op::Shape("f32[24,16]"), + op::DynamicSlice( + op::AllReduce(AllOf(op::Dot(lhs, partial_replicated_rhs), + op::Shape("f32[24,32]"))), + _, _))); +} + +TEST_F(SpmdPartitioningTest, Dot2DPartitionedBatchAndNonContracting) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %lhs = f32[4,24,100] parameter(0), sharding={devices=[2,2,1]0,1,2,3} + %rhs = f32[4,32,100] parameter(1), sharding={devices=[2,2,1]0,1,2,3} + ROOT %dot = f32[4,24,32] dot(%lhs, %rhs), + lhs_batch_dims={0}, rhs_batch_dims={0}, + lhs_contracting_dims={2}, rhs_contracting_dims={2}, + sharding={devices=[2,2,1]0,1,2,3} +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/4)); + VLOG(1) << module->ToString(); + + auto lhs = AllOf(op::Shape("f32[2,12,100]"), op::Parameter(0)); + auto rhs = AllOf(op::Shape("f32[2,16,100]"), op::Parameter(1)); + auto partial_replicated_rhs = + AllOf(op::Shape("f32[2,32,100]"), + op::AllReduce(op::DynamicUpdateSlice(_, rhs, _, _, _))); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, AllOf(op::Shape("f32[2,12,32]"), + op::Dot(lhs, partial_replicated_rhs))); +} + +TEST_F(SpmdPartitioningTest, + Dot2DPartitionedBatchNonContractingAndContracting) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %lhs = f32[4,24,100] parameter(0), sharding={devices=[2,1,2]0,1,2,3} + %rhs = f32[4,32,100] parameter(1), sharding={devices=[2,2,1]0,1,2,3} + ROOT %dot = f32[4,24,32] dot(%lhs, %rhs), + lhs_batch_dims={0}, rhs_batch_dims={0}, + lhs_contracting_dims={2}, rhs_contracting_dims={2}, + sharding={devices=[2,1,2]0,1,2,3} +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/4)); + VLOG(1) << module->ToString(); + + auto lhs = AllOf(op::Shape("f32[2,24,50]"), op::Parameter(0)); + auto rhs = AllOf(op::Shape("f32[2,16,100]"), op::Parameter(1)); + auto partial_replicated_lhs = + AllOf(op::Shape("f32[2,24,100]"), + op::AllReduce(op::DynamicUpdateSlice(_, lhs, _, _, _))); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, AllOf(op::Shape("f32[2,24,16]"), + op::Dot(partial_replicated_lhs, rhs))); +} + +TEST_F(SpmdPartitioningTest, Dot2DPartitionedBatchAndReshard) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %lhs = f32[4,8,24,100] parameter(0), sharding={devices=[2,1,2,1]0,1,2,3} + %rhs = f32[4,8,32,100] parameter(1), sharding={devices=[2,1,2,1]0,1,2,3} + ROOT %dot = f32[4,8,24,32] dot(%lhs, %rhs), + lhs_batch_dims={0,1}, rhs_batch_dims={0,1}, + lhs_contracting_dims={3}, rhs_contracting_dims={3}, + sharding={devices=[1,2,2,1]0,1,2,3} +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/4)); + VLOG(1) << module->ToString(); + + auto lhs = AllOf(op::Shape("f32[2,8,12,100]"), op::Parameter(0)); + auto rhs = AllOf(op::Shape("f32[2,8,16,100]"), op::Parameter(1)); + auto partial_replicated_rhs = + AllOf(op::Shape("f32[2,8,32,100]"), + op::AllReduce(op::DynamicUpdateSlice(_, rhs, _, _, _, _))); + auto dot = + AllOf(op::Shape("f32[2,8,12,32]"), op::Dot(lhs, partial_replicated_rhs)); + auto reshape = AllOf(op::Shape("f32[2,2,4,12,32]"), op::Reshape(dot)); + auto all_to_all = AllOf(op::Shape("f32[2,2,4,12,32]"), op::AllToAll(reshape)); + auto xpose = AllOf(op::Shape("f32[2,2,4,12,32]"), op::Transpose(all_to_all)); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, AllOf(op::Shape("f32[4,4,12,32]"), op::Reshape(xpose))); +} + } // namespace } // namespace spmd } // namespace xla diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc index 6beed5a15e5..4fc8b1585b6 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc @@ -16,7 +16,11 @@ limitations under the License. #include "tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h" #include +#include +#include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" +#include "absl/strings/str_join.h" #include "absl/types/optional.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/hlo_casting_utils.h" @@ -143,10 +147,10 @@ Shape MakeNonPaddedShapeForGivenPartition(const Shape& shape, return partition_shape; } -std::vector MakePartitionOffsets(const Shape& shape, - const HloSharding& sharding, - HloInstruction* partition_id, - SpmdBuilder* b) { +std::vector MakePartitionOffsets( + const Shape& shape, const HloSharding& sharding, + HloInstruction* partition_id, SpmdBuilder* b, + absl::Span dims) { CHECK(!shape.IsTuple()); Array2D offset_array( @@ -158,7 +162,8 @@ std::vector MakePartitionOffsets(const Shape& shape, LiteralUtil::CreateR2FromArray2D(offset_array))); std::vector offsets; for (int64 i = 0; i < shape.rank(); ++i) { - if (sharding.tile_assignment().dim(i) == 1) { + if (sharding.tile_assignment().dim(i) == 1 || + (!dims.empty() && !absl::c_linear_search(dims, i))) { offsets.push_back(b->AddInstruction( HloInstruction::CreateConstant(LiteralUtil::Zero(S32)))); } else { @@ -978,5 +983,252 @@ bool CanReshardWithCollectivePermute(const HloSharding& source, source.tile_assignment() != target.tile_assignment(); } +GroupedSharding GroupShardingOnDims(const HloSharding& sharding, + absl::Span group_dims) { + CHECK(!sharding.IsTileMaximal()); + std::vector grouped_tiling_dims = + sharding.tile_assignment().dimensions(); + std::vector group_dim_sizes(group_dims.size()); + for (int64 i = 0; i < group_dims.size(); ++i) { + group_dim_sizes[i] = grouped_tiling_dims[group_dims[i]]; + grouped_tiling_dims[group_dims[i]] = 1; + } + std::vector> device_groups(Product(group_dim_sizes)); + sharding.tile_assignment().Each( + [&](absl::Span indices, int64 device) { + int64 group_id = 0; + for (int64 dim : group_dims) { + group_id *= sharding.tile_assignment().dim(dim); + group_id += indices[dim]; + } + device_groups[group_id].push_back(device); + }); + Array grouped_tiling(grouped_tiling_dims); + grouped_tiling.FillIota(0); + return GroupedSharding( + std::move(device_groups), + std::vector(group_dims.begin(), group_dims.end()), + std::move(group_dim_sizes), sharding.tile_assignment().num_dimensions(), + HloSharding::Tile(grouped_tiling)); +} + +HloSharding UngroupSharding(const GroupedSharding& grouped_sharding) { + CHECK(!grouped_sharding.sharding.IsTileMaximal()); + std::vector tiling_dims = + grouped_sharding.sharding.tile_assignment().dimensions(); + for (int64 i = 0; i < grouped_sharding.group_dims.size(); ++i) { + tiling_dims[grouped_sharding.group_dims[i]] = + grouped_sharding.group_dim_sizes[i]; + } + Array tiling(tiling_dims); + grouped_sharding.sharding.tile_assignment().Each( + [&](absl::Span indices, int64 device) { + std::vector ungrouped_inds(indices.begin(), indices.end()); + for (int64 g = 0; g < grouped_sharding.device_groups.size(); ++g) { + int64 remaining_group_index = g; + for (int64 i = grouped_sharding.group_dims.size() - 1; i >= 0; --i) { + ungrouped_inds[grouped_sharding.group_dims[i]] = + remaining_group_index % grouped_sharding.group_dim_sizes[i]; + remaining_group_index /= grouped_sharding.group_dim_sizes[i]; + } + tiling(ungrouped_inds) = grouped_sharding.device_groups[g][device]; + } + }); + return HloSharding::Tile(tiling); +} + +GroupedSharding AlignGroupsWith(GroupedSharding grouped_sharding, + const GroupedSharding& reference, + bool ignore_group_order) { + // Returns src -> dst index mapping. + auto get_permutation = [](absl::Span src, + absl::Span dst) { + CHECK_EQ(src.size(), dst.size()); + absl::flat_hash_map dst_reverse_map; + for (int64 i = 0; i < dst.size(); ++i) { + dst_reverse_map[dst[i]] = i; + } + std::vector permutation(src.size()); + for (int64 i = 0; i < src.size(); ++i) { + auto it = dst_reverse_map.find(src[i]); + CHECK(it != dst_reverse_map.end()); + permutation[i] = it->second; + } + return permutation; + }; + CHECK_EQ(grouped_sharding.device_groups.size(), + reference.device_groups.size()); + absl::flat_hash_map device_to_ref_group; + for (int64 g = 0; g < reference.device_groups.size(); ++g) { + for (int64 device : reference.device_groups[g]) { + device_to_ref_group[device] = g; + } + } + auto unique_ref_dev_group = [&](absl::Span devices) -> int64 { + int64 ref_g = -1; + for (int64 device : devices) { + if (ref_g == -1) { + ref_g = device_to_ref_group[device]; + } else if (ref_g != device_to_ref_group[device]) { + return -1; + } + } + return ref_g; + }; + bool matching_groups = true; + std::vector original_src_to_ref_permutation; + for (int64 g = 0; g < grouped_sharding.device_groups.size(); ++g) { + int64 ref_g = unique_ref_dev_group(grouped_sharding.device_groups[g]); + if (ref_g < 0 || (!ignore_group_order && g != ref_g)) { + matching_groups = false; + break; + } + if (g == 0) { + original_src_to_ref_permutation = get_permutation( + grouped_sharding.device_groups[g], reference.device_groups[ref_g]); + } + } + if (matching_groups) { + auto tiles = grouped_sharding.sharding.tile_assignment(); + tiles.Each([&](absl::Span indices, int64* device) { + *device = original_src_to_ref_permutation[*device]; + }); + grouped_sharding.sharding = HloSharding::Tile(tiles); + } + grouped_sharding.device_groups = std::move(reference.device_groups); + return grouped_sharding; +} + +Shape GetPerGroupBaseShape(const GroupedSharding& grouped_sharding, + const Shape& original_base_shape) { + auto result = original_base_shape; + for (int64 i = 0; i < grouped_sharding.group_dims.size(); ++i) { + int64 dim = grouped_sharding.group_dims[i]; + int64 groups = grouped_sharding.group_dim_sizes[i]; + result.set_dimensions(dim, result.dimensions(dim) / groups); + } + return result; +} + +namespace { + +HloInstruction* GetInGroupPartitionId( + HloInstruction* partition_id, + const std::vector>& device_groups, SpmdBuilder* b) { + int64 total_devices = device_groups.size() * device_groups[0].size(); + std::vector in_group_ids(total_devices); + for (uint32 i = 0; i < device_groups.size(); ++i) { + for (uint32 j = 0; j < device_groups[i].size(); ++j) { + in_group_ids[device_groups[i][j]] = j; + } + } + auto id_table = b->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR1(in_group_ids))); + return b->AddInstruction(HloInstruction::CreateReshape( + ShapeUtil::MakeScalarShape(U32), + b->AddInstruction(HloInstruction::CreateDynamicSlice( + ShapeUtil::MakeShape(U32, {1}), id_table, {partition_id}, {1})))); +} + +SPMDCollectiveOpsCreator GetPerGroupCollectiveOpsCreator( + const SPMDCollectiveOpsCreator& creator, + const std::vector>& device_groups) { + SPMDCollectiveOpsCreator result; + result.create_partition_id = [creator, device_groups](SpmdBuilder* b) { + return GetInGroupPartitionId(creator.create_partition_id(b), device_groups, + b); + }; + auto expand_partition_groups = + [device_groups]( + const std::vector>& partition_subgroups) { + if (partition_subgroups.empty()) { + return device_groups; + } + std::vector> result(partition_subgroups.size() * + device_groups.size()); + for (int64 g = 0; g < device_groups.size(); ++g) { + for (int64 i = 0; i < partition_subgroups.size(); ++i) { + result[g * partition_subgroups.size() + i].resize( + partition_subgroups[i].size()); + for (int64 j = 0; j < partition_subgroups[i].size(); ++j) { + result[g * partition_subgroups.size() + i][j] = + device_groups[g][partition_subgroups[i][j]]; + } + } + } + return result; + }; + result.create_cross_partition_all_reduce = + [creator, expand_partition_groups]( + SpmdBuilder* b, HloInstruction* operand, HloComputation* reduction, + const std::vector>& partition_subgroups, + int64 channel_id) { + return creator.create_cross_partition_all_reduce( + b, operand, reduction, expand_partition_groups(partition_subgroups), + channel_id); + }; + result.create_cross_partition_collective_permute = + [creator, device_groups]( + SpmdBuilder* b, HloInstruction* operand, + std::vector>& src_dst_pairs, + int64 next_channel_id) { + std::vector> expanded_pairs( + src_dst_pairs.size() * device_groups.size()); + for (int64 g = 0; g < device_groups.size(); ++g) { + for (int64 i = 0; i < src_dst_pairs.size(); ++i) { + expanded_pairs[g * src_dst_pairs.size() + i] = + std::pair{ + device_groups[g][src_dst_pairs[i].first], + device_groups[g][src_dst_pairs[i].second]}; + } + } + return creator.create_cross_partition_collective_permute( + b, operand, expanded_pairs, next_channel_id); + }; + result.create_cross_partition_all_to_all = + [creator, expand_partition_groups]( + SpmdBuilder* b, absl::Span operands, + const std::vector>& partition_subgroups, + int64 channel_id, absl::optional split_dimension) { + return creator.create_cross_partition_all_to_all( + b, operands, expand_partition_groups(partition_subgroups), + channel_id, split_dimension); + }; + if (creator.create_cross_partition_all_gather) { + result.create_cross_partition_all_gather = + [creator, expand_partition_groups]( + SpmdBuilder* b, HloInstruction* operand, const Shape& ag_shape, + const std::vector>& partition_subgroups, + int64 channel_id, int64 all_gather_dimension) { + return creator.create_cross_partition_all_gather( + b, operand, ag_shape, + expand_partition_groups(partition_subgroups), channel_id, + all_gather_dimension); + }; + } + return result; +} + +} // namespace + +PartitionedHlo::PartitioningState CreatePerGroupPartitioningState( + const PartitionedHlo::PartitioningState& state, + const std::vector>& device_groups, SpmdBuilder* b) { + auto result = state; + result.collective_ops_creator = GetPerGroupCollectiveOpsCreator( + state.collective_ops_creator, device_groups); + result.partition_id = + GetInGroupPartitionId(state.partition_id, device_groups, b); + // Create a string key for the groups. + std::vector per_group_strings(device_groups.size()); + for (int64 i = 0; i < per_group_strings.size(); ++i) { + per_group_strings[i] = absl::StrJoin(device_groups[i], ","); + } + result.reshard_cache = + &state.reshard_cache + ->groupd_caches[absl::StrJoin(per_group_strings, ";")]; + return result; +} + } // namespace spmd } // namespace xla diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h index 7b737daf78c..6e68375f9b9 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h @@ -87,10 +87,12 @@ Shape MakeNonPaddedShapeForGivenPartition(const Shape& shape, // Generates the HLO instructions that represent the dimension offsets on any // device. The size of the returned vector is the rank of the given shape. -std::vector MakePartitionOffsets(const Shape& shape, - const HloSharding& sharding, - HloInstruction* partition_id, - SpmdBuilder* b); +// If `dims` is non-empty, the generated offsets will only be non-zero for those +// dimensions. +std::vector MakePartitionOffsets( + const Shape& shape, const HloSharding& sharding, + HloInstruction* partition_id, SpmdBuilder* b, + absl::Span dims = {}); // Returns the offsets of the partition in the tile assignment. std::vector MakeTiledPartitionOrdinals( @@ -276,6 +278,48 @@ GetReshardAllToAllSourceTargetDims(const HloSharding& source, bool CanReshardWithCollectivePermute(const HloSharding& source, const HloSharding& target); +// Represents grouping devices in a tiled sharding along certain dimensions. +// Elements in group dimensions define different device groups, and the sharding +// represents the in-group sharding. +struct GroupedSharding { + GroupedSharding(std::vector> device_groups, + std::vector group_dims, + std::vector group_dim_sizes, int64 rank, + HloSharding grouped_sharding) + : device_groups(std::move(device_groups)), + group_dims(std::move(group_dims)), + group_dim_sizes(std::move(group_dim_sizes)), + sharding(std::move(grouped_sharding)) {} + std::vector> device_groups; + std::vector group_dims; + std::vector group_dim_sizes; + int64 rank; + HloSharding sharding; +}; + +// Creates a GroupedSharding for a tiled sharding. +GroupedSharding GroupShardingOnDims(const HloSharding& sharding, + absl::Span group_dims); + +// Reconstructs the ungrouped sharding from a GroupedSharding. +HloSharding UngroupSharding(const GroupedSharding& grouped_sharding); + +// Returns a new GroupedSharding that has the same group definition of +// `reference`. +GroupedSharding AlignGroupsWith(GroupedSharding grouped_sharding, + const GroupedSharding& reference, + bool ignore_group_order = false); + +// Returns the per-group base shape, i.e., before applying the in-group +// sharding. +Shape GetPerGroupBaseShape(const GroupedSharding& grouped_sharding, + const Shape& original_base_shape); + +// Creates the nested partitioner state for in-group patitioning. +PartitionedHlo::PartitioningState CreatePerGroupPartitioningState( + const PartitionedHlo::PartitioningState& state, + const std::vector>& device_groups, SpmdBuilder* b); + } // namespace spmd } // namespace xla From 33c2506397c5bd8b0195dece71f02569fabddea1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 23 Jul 2020 18:41:28 -0700 Subject: [PATCH 1237/2522] Algebraic simplification of multiplication chain with constants PiperOrigin-RevId: 322913019 Change-Id: I6d814a107466f7abb78e30d0ae9e0e5c2bdf15ab --- .../xla/service/algebraic_simplifier.cc | 27 +++++++++++++++++++ .../xla/service/algebraic_simplifier_test.cc | 23 ++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index db1c86d9fe3..55b94673f72 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -2475,6 +2475,33 @@ Status AlgebraicSimplifierVisitor::HandleMultiply(HloInstruction* multiply) { } } + { + HloInstruction *a, *b, *c1, *c2; + // Mul(Mul(x, constant1), Mul(y, constant2)) => Mul(Mul(x, y), + // constant1*constant2) + if (Match(multiply, + m::Multiply( + m::MultiplyAnyOrder(m::NonConstant(&a), m::Constant(&c1)), + m::MultiplyAnyOrder(m::NonConstant(&b), m::Constant(&c2))))) { + TF_ASSIGN_OR_RETURN(auto* product_of_constants, + MakeBinaryHlo(HloOpcode::kMultiply, c1, c2)); + if (ShapeUtil::IsScalar(product_of_constants->shape()) && + !ShapeUtil::IsScalar(multiply->shape())) { + product_of_constants = + computation_->AddInstruction(HloInstruction::CreateBroadcast( + multiply->shape(), product_of_constants, {})); + } + + return ReplaceWithNewInstruction( + multiply, + HloInstruction::CreateBinary( + multiply->shape(), HloOpcode::kMultiply, + computation_->AddInstruction(HloInstruction::CreateBinary( + multiply->shape(), HloOpcode::kMultiply, a, b)), + product_of_constants)); + } + } + VLOG(10) << "trying transform [(A * C1) * C2 => A * (C1 * C2)]"; HloInstruction *a, *c1, *c2; if (Match(multiply, diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index ea21c7c1d21..63df97e1cae 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -117,6 +117,29 @@ TEST_F(AlgebraicSimplifierTest, FactorFpAddition) { m::ConstantScalar(0.125)))); } +// (A*C1) * (B*C2) => (A*B)*(C1*C2) +TEST_F(AlgebraicSimplifierTest, MultiplyChain) { + const char* kModuleStr = R"( + HloModule m + test { + p0 = f32[] parameter(0) + p1 = f32[] parameter(1) + c = f32[] constant(2) + d = f32[] constant(4) + x = f32[] multiply(p0, c) + y = f32[] multiply(p1, d) + ROOT z = f32[] multiply(x, y) + } + )"; + TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(kModuleStr)); + ASSERT_TRUE(AlgebraicSimplifier(default_options_).Run(m.get()).ValueOrDie()); + EXPECT_THAT( + m->entry_computation()->root_instruction(), + GmockMatch(m::MultiplyAnyOrder( + m::MultiplyAnyOrder(m::Parameter(0), m::Parameter(1)), + m::MultiplyAnyOrder(m::ConstantScalar(2), m::ConstantScalar(4))))); +} + // A*C + B*C => (A+B)*C if C is a broadcast of a floating-point power of 2. TEST_F(AlgebraicSimplifierTest, FactorFpAdditionWithBroadcast) { const char* kModuleStr = R"( From 5f22efd21509a2404edcb03bd97b152c553b9592 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 23 Jul 2020 19:23:17 -0700 Subject: [PATCH 1238/2522] Update Eigen to: https://gitlab.com/libeigen/eigen/-/commit/2ce2f5198929caab4b41a6ad1b9c93f67d8b9a69 This adds AVX acceleration for bfloat16 (AVX512 already supports it). PiperOrigin-RevId: 322917791 Change-Id: I5a4f2e082333841a41c6fad78ac6bba48cbb4a9e --- tensorflow/core/framework/bfloat16_test.cc | 6 ++---- tensorflow/workspace.bzl | 8 ++++---- third_party/eigen3/gpu_packet_math.patch | 21 --------------------- 3 files changed, 6 insertions(+), 29 deletions(-) diff --git a/tensorflow/core/framework/bfloat16_test.cc b/tensorflow/core/framework/bfloat16_test.cc index 11af55f6e26..fe1296f19fe 100644 --- a/tensorflow/core/framework/bfloat16_test.cc +++ b/tensorflow/core/framework/bfloat16_test.cc @@ -43,8 +43,7 @@ TEST(Bfloat16Test, FlushDenormalsToZero) { } else { ASSERT_EQ(bf_trunc.value, 0x0000) << denorm; } - bfloat16 bf_round = - bfloat16(Eigen::bfloat16_impl::float_to_bfloat16_rtne(denorm)); + bfloat16 bf_round(denorm); ASSERT_EQ(static_cast(bf_round), 0.0f); if (std::signbit(denorm)) { ASSERT_EQ(bf_round.value, 0x8000) << denorm; @@ -100,8 +99,7 @@ TEST_P(Bfloat16Test, TruncateTest) { EXPECT_EQ(GetParam().expected_truncation, float(truncated)); - bfloat16 rounded = bfloat16( - Eigen::bfloat16_impl::float_to_bfloat16_rtne((GetParam().input))); + bfloat16 rounded(GetParam().input); if (std::isnan(GetParam().input)) { EXPECT_TRUE(std::isnan(float(rounded)) || std::isinf(float(rounded))); return; diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 2d708bd1b5c..e971df0330d 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -237,11 +237,11 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): name = "eigen_archive", build_file = clean_dep("//third_party:eigen.BUILD"), patch_file = clean_dep("//third_party/eigen3:gpu_packet_math.patch"), - sha256 = "2ec918767935cf6ec92b1d52c53a304cd13148f0b3dbdf3c3632de4a581d5a5a", # SHARED_EIGEN_SHA - strip_prefix = "eigen-8889a2c1c648f5dd1413dc2d94c2407c7ce1bd32", + sha256 = "9d8cbf2bd665cbb7b684bf4c6c5482b98dc6965847108f260c077049da04bee8", # SHARED_EIGEN_SHA + strip_prefix = "eigen-2ce2f5198929caab4b41a6ad1b9c93f67d8b9a69", urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/gitlab.com/libeigen/eigen/-/archive/8889a2c1c648f5dd1413dc2d94c2407c7ce1bd32/eigen-8889a2c1c648f5dd1413dc2d94c2407c7ce1bd32.tar.gz", - "https://gitlab.com/libeigen/eigen/-/archive/8889a2c1c648f5dd1413dc2d94c2407c7ce1bd32/eigen-8889a2c1c648f5dd1413dc2d94c2407c7ce1bd32.tar.gz", + "https://storage.googleapis.com/mirror.tensorflow.org/gitlab.com/libeigen/eigen/-/archive/2ce2f5198929caab4b41a6ad1b9c93f67d8b9a69/eigen-2ce2f5198929caab4b41a6ad1b9c93f67d8b9a69.tar.gz", + "https://gitlab.com/libeigen/eigen/-/archive/2ce2f5198929caab4b41a6ad1b9c93f67d8b9a69/eigen-2ce2f5198929caab4b41a6ad1b9c93f67d8b9a69.tar.gz", ], ) diff --git a/third_party/eigen3/gpu_packet_math.patch b/third_party/eigen3/gpu_packet_math.patch index 44e2f9a9e06..fdc8961b93d 100644 --- a/third_party/eigen3/gpu_packet_math.patch +++ b/third_party/eigen3/gpu_packet_math.patch @@ -23,24 +23,3 @@ diff -ru a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geom return res; } }; -diff -ru a/Eigen/src/Core/arch/Default/BFloat16.h a/Eigen/src/Core/arch/Default/BFloat16.h ---- a/Eigen/src/Core/arch/Default/BFloat16.h -+++ a/Eigen/src/Core/arch/Default/BFloat16.h -@@ -291,7 +291,7 @@ - return output; - } - const uint16_t* p = reinterpret_cast(&v); --#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) - output.value = p[0]; - #else - output.value = p[1]; -@@ -493,7 +493,7 @@ - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h) { - float result = 0; - unsigned short* q = reinterpret_cast(&result); --#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) - q[0] = h.value; - #else - q[1] = h.value; From a9798f4432436234871a9f0fb12b20fc3731ff8e Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Tue, 30 Jun 2020 19:52:19 +0000 Subject: [PATCH 1239/2522] [ROCm] Fix for ROCm CSB Breakage - 200630 The following commit (which switched G's internal CI to use ROCm 3.5) breaks the ROCm CSB build (which still uses ROCm 3.3) https://github.com/tensorflow/tensorflow/commit/22def20bae7be6d5b790b360abed5919385b16c2 This PR/commit simply puts back a couple of codes that were removed the the previous commit, and makes them condition on ROCm 3.5. Note that the ROCm CSB build will be switching to ROCm 3.5 or higher in the near future, at which point all codes the `true` block for `#if TENSORFLOW_COMPILER_IS_HIP_CLANG` will become default, and those in eht `false / #else` block will be removed. --- tensorflow/core/util/gpu_launch_config.h | 28 ++++++++++++++++++++++++ third_party/gpus/rocm_configure.bzl | 9 +++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/util/gpu_launch_config.h b/tensorflow/core/util/gpu_launch_config.h index 0b943e917da..4c2df39e1a2 100644 --- a/tensorflow/core/util/gpu_launch_config.h +++ b/tensorflow/core/util/gpu_launch_config.h @@ -168,10 +168,25 @@ GpuLaunchConfig GetGpuLaunchConfig(int work_element_count, block_size_limit); CHECK_EQ(err, cudaSuccess); #elif TENSORFLOW_USE_ROCM +#if TENSORFLOW_COMPILER_IS_HIP_CLANG hipError_t err = hipOccupancyMaxPotentialBlockSize( &block_count, &thread_per_block, func, dynamic_shared_memory_size, block_size_limit); CHECK_EQ(err, hipSuccess); +#else + // Earlier versions of this HIP routine incorrectly returned void. + // TODO re-enable hipError_t error checking when HIP is fixed. + // ROCm interface uses unsigned int, convert after checking + uint32_t block_count_uint = 0; + uint32_t thread_per_block_uint = 0; + CHECK_GE(block_size_limit, 0); + uint32_t block_size_limit_uint = static_cast(block_size_limit); + hipOccupancyMaxPotentialBlockSize(&block_count_uint, &thread_per_block_uint, + func, dynamic_shared_memory_size, + block_size_limit_uint); + block_count = static_cast(block_count_uint); + thread_per_block = static_cast(thread_per_block_uint); +#endif #endif block_count = @@ -201,9 +216,22 @@ GpuLaunchConfig GetGpuLaunchConfigFixedBlockSize( &block_count, func, fixed_block_size, dynamic_shared_memory_size); CHECK_EQ(err, cudaSuccess); #elif TENSORFLOW_USE_ROCM +#if TENSORFLOW_COMPILER_IS_HIP_CLANG hipError_t err = hipOccupancyMaxActiveBlocksPerMultiprocessor( &block_count, func, fixed_block_size, dynamic_shared_memory_size); CHECK_EQ(err, hipSuccess); +#else + // Apply the heuristic in GetGpuLaunchConfig(int, const Eigen::GpuDevice&) + // that the kernel is quite simple and will largely be memory-limited. + const int physical_thread_count = std::min( + d.getNumGpuMultiProcessors() * d.maxGpuThreadsPerMultiProcessor(), + work_element_count); + // Assume the kernel be simple enough that it is okay to use 1024 threads + // per workgroup. + int thread_per_block = std::min(1024, d.maxGpuThreadsPerBlock()); + block_count = std::min(DivUp(physical_thread_count, thread_per_block), + d.getNumGpuMultiProcessors()); +#endif #endif block_count = std::min(block_count * d.getNumGpuMultiProcessors(), DivUp(work_element_count, fixed_block_size)); diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl index 6a1204b87db..d28337de836 100644 --- a/third_party/gpus/rocm_configure.bzl +++ b/third_party/gpus/rocm_configure.bzl @@ -35,7 +35,7 @@ load( _GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH" _GCC_HOST_COMPILER_PREFIX = "GCC_HOST_COMPILER_PREFIX" -_ROCM_TOOLKIT_PATH = "ROCM_TOOLKIT_PATH" +_ROCM_TOOLKIT_PATH = "ROCM_PATH" _TF_ROCM_VERSION = "TF_ROCM_VERSION" _TF_MIOPEN_VERSION = "TF_MIOPEN_VERSION" _TF_ROCM_AMDGPU_TARGETS = "TF_ROCM_AMDGPU_TARGETS" @@ -196,6 +196,13 @@ def _rocm_include_path(repository_ctx, rocm_config, bash_bin): inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/10.0.0/include") inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/11.0.0/include") + # Support hcc based off clang 10.0.0 (for ROCm 3.3) + inc_dirs.append(rocm_toolkit_path + "/hcc/compiler/lib/clang/10.0.0/include/") + inc_dirs.append(rocm_toolkit_path + "/hcc/lib/clang/10.0.0/include") + + # Add hcc headers + inc_dirs.append(rocm_toolkit_path + "/hcc/include") + return inc_dirs def _enable_rocm(repository_ctx): From ddafc33a31db310352b656e0831ef2da8975cf87 Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Fri, 24 Jul 2020 03:05:19 +0000 Subject: [PATCH 1240/2522] Updating Dockerfile.rocm to use Ubuntu 18.04 --- tensorflow/tools/ci_build/Dockerfile.rocm | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm index 6d124204ed8..4f5d3ae7291 100644 --- a/tensorflow/tools/ci_build/Dockerfile.rocm +++ b/tensorflow/tools/ci_build/Dockerfile.rocm @@ -1,6 +1,6 @@ # This Dockerfile provides a starting point for a ROCm installation of # MIOpen and tensorflow. -FROM ubuntu:xenial +FROM ubuntu:bionic MAINTAINER Jeff Poznanovic ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/3.3/ @@ -19,9 +19,9 @@ RUN sh -c "echo deb [arch=amd64] $DEB_ROCM_REPO xenial main > /etc/apt/sources. # Install misc pkgs RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \ build-essential \ - clang-3.8 \ - clang-format-3.8 \ - clang-tidy-3.8 \ + clang-6.0 \ + clang-format-6.0 \ + clang-tidy-6.0 \ cmake \ cmake-qt-gui \ ssh \ @@ -91,8 +91,6 @@ RUN touch ${ROCM_PATH}/.info/version COPY install/*.sh /install/ ARG DEBIAN_FRONTEND=noninteractive RUN /install/install_bootstrap_deb_packages.sh -RUN add-apt-repository -y ppa:openjdk-r/ppa && \ - add-apt-repository -y ppa:george-edison55/cmake-3.x RUN /install/install_deb_packages.sh RUN /install/install_pip_packages.sh RUN /install/install_bazel.sh From 39bcd85ef4a2b18b57d7c5c7cb440772d7883106 Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Thu, 23 Jul 2020 20:29:02 -0700 Subject: [PATCH 1241/2522] Use proto to configure tf.data service worker server. This simplifies adding new configuration properties, so that we don't need to plumb new properties through. This also gives us a single place to document all configuration options (in the .proto file). PiperOrigin-RevId: 322924012 Change-Id: Id53b44626ee25259b469e9e4f7d150b227b30261 --- tensorflow/core/data/service/BUILD | 1 + .../core/data/service/grpc_worker_impl.cc | 5 ++-- .../core/data/service/grpc_worker_impl.h | 4 +-- tensorflow/core/data/service/server_lib.cc | 30 +++++-------------- tensorflow/core/data/service/server_lib.h | 25 ++-------------- tensorflow/core/data/service/test_cluster.cc | 8 +++-- tensorflow/core/data/service/worker_impl.cc | 16 +++++----- tensorflow/core/data/service/worker_impl.h | 13 ++++---- .../data/experimental/service_config.proto | 15 ++++++++++ .../data/experimental/service/server_lib.py | 7 ++++- .../service/server_lib_wrapper.cc | 12 +++++--- 11 files changed, 65 insertions(+), 71 deletions(-) diff --git a/tensorflow/core/data/service/BUILD b/tensorflow/core/data/service/BUILD index d7cc7a3e528..913cbf26cf0 100644 --- a/tensorflow/core/data/service/BUILD +++ b/tensorflow/core/data/service/BUILD @@ -227,6 +227,7 @@ cc_library( deps = [ ":worker_cc_grpc_proto", ":worker_impl", + "//tensorflow/core:protos_all_cc", "//tensorflow/core/distributed_runtime/rpc:grpc_util", tf_grpc_cc_dependency(), ], diff --git a/tensorflow/core/data/service/grpc_worker_impl.cc b/tensorflow/core/data/service/grpc_worker_impl.cc index 0cddfce4e0b..c76e1062753 100644 --- a/tensorflow/core/data/service/grpc_worker_impl.cc +++ b/tensorflow/core/data/service/grpc_worker_impl.cc @@ -26,9 +26,8 @@ using ::grpc::ServerContext; using ::grpc::Status; GrpcWorkerImpl::GrpcWorkerImpl(ServerBuilder* server_builder, - const std::string& dispatcher_address, - const std::string& protocol) - : impl_(dispatcher_address, protocol) { + const experimental::WorkerConfig& config) + : impl_(config) { server_builder->RegisterService(this); VLOG(1) << "Registered data service worker"; } diff --git a/tensorflow/core/data/service/grpc_worker_impl.h b/tensorflow/core/data/service/grpc_worker_impl.h index 169ae29ea37..b0881143a57 100644 --- a/tensorflow/core/data/service/grpc_worker_impl.h +++ b/tensorflow/core/data/service/grpc_worker_impl.h @@ -19,6 +19,7 @@ limitations under the License. #include "grpcpp/server_builder.h" #include "tensorflow/core/data/service/worker.grpc.pb.h" #include "tensorflow/core/data/service/worker_impl.h" +#include "tensorflow/core/protobuf/data/experimental/service_config.pb.h" namespace tensorflow { namespace data { @@ -35,8 +36,7 @@ namespace data { class GrpcWorkerImpl : public WorkerService::Service { public: explicit GrpcWorkerImpl(grpc::ServerBuilder* server_builder, - const std::string& dispatcher_address, - const std::string& protocol); + const experimental::WorkerConfig& config); ~GrpcWorkerImpl() override {} void Start(const std::string& worker_address); diff --git a/tensorflow/core/data/service/server_lib.cc b/tensorflow/core/data/service/server_lib.cc index 6d912b1c802..648a189717e 100644 --- a/tensorflow/core/data/service/server_lib.cc +++ b/tensorflow/core/data/service/server_lib.cc @@ -79,8 +79,7 @@ DispatchGrpcDataServer::DispatchGrpcDataServer( DispatchGrpcDataServer::~DispatchGrpcDataServer() { delete service_; } void DispatchGrpcDataServer::AddServiceToBuilder(grpc::ServerBuilder* builder) { - auto service = absl::make_unique(builder, config_); - service_ = service.release(); + service_ = absl::make_unique(builder, config_).release(); } Status DispatchGrpcDataServer::NumWorkers(int* num_workers) { @@ -96,22 +95,17 @@ Status DispatchGrpcDataServer::NumWorkers(int* num_workers) { } WorkerGrpcDataServer::WorkerGrpcDataServer( - int port, const std::string& protocol, - const std::string& dispatcher_address, const std::string& worker_address) - : GrpcDataServerBase(port, protocol), - dispatcher_address_(dispatcher_address), - worker_address_(worker_address) {} + const experimental::WorkerConfig& config) + : GrpcDataServerBase(config.port(), config.protocol()), config_(config) {} WorkerGrpcDataServer::~WorkerGrpcDataServer() { delete service_; } void WorkerGrpcDataServer::AddServiceToBuilder(grpc::ServerBuilder* builder) { - auto service = absl::make_unique(builder, dispatcher_address_, - protocol_); - service_ = service.release(); + service_ = absl::make_unique(builder, config_).release(); } Status WorkerGrpcDataServer::StartServiceInternal() { - std::string worker_address = worker_address_; + std::string worker_address = config_.worker_address(); if (worker_address.empty()) { worker_address = absl::StrCat("localhost:", kPortPlaceholder); } @@ -128,19 +122,9 @@ Status NewDispatchServer(const experimental::DispatcherConfig& config, return Status::OK(); } -Status NewWorkerServer(int port, const std::string& protocol, - const std::string& dispatcher_address, +Status NewWorkerServer(const experimental::WorkerConfig& config, std::unique_ptr* out_server) { - return NewWorkerServer(port, protocol, dispatcher_address, - /*worker_address=*/"", out_server); -} - -Status NewWorkerServer(int port, const std::string& protocol, - const std::string& dispatcher_address, - const std::string& worker_address, - std::unique_ptr* out_server) { - *out_server = absl::make_unique( - port, protocol, dispatcher_address, worker_address); + *out_server = absl::make_unique(config); return Status::OK(); } diff --git a/tensorflow/core/data/service/server_lib.h b/tensorflow/core/data/service/server_lib.h index d147f47c5e4..365241753fb 100644 --- a/tensorflow/core/data/service/server_lib.h +++ b/tensorflow/core/data/service/server_lib.h @@ -91,9 +91,7 @@ class DispatchGrpcDataServer : public GrpcDataServerBase { class WorkerGrpcDataServer : public GrpcDataServerBase { public: - WorkerGrpcDataServer(int requested_port, const std::string& protocol, - const std::string& dispatcher_address, - const std::string& worker_address); + explicit WorkerGrpcDataServer(const experimental::WorkerConfig& config); ~WorkerGrpcDataServer() override; protected: @@ -101,8 +99,7 @@ class WorkerGrpcDataServer : public GrpcDataServerBase { Status StartServiceInternal() override; private: - const std::string dispatcher_address_; - const std::string worker_address_; + const experimental::WorkerConfig config_; // Owned. We use a raw pointer because GrpcWorkerImpl is forward-declared. GrpcWorkerImpl* service_; }; @@ -112,23 +109,7 @@ Status NewDispatchServer(const experimental::DispatcherConfig& config, std::unique_ptr* out_server); // Creates a worker tf.data server and stores it in `*out_server`. -// -// The port can be a specific port or 0. If the port is 0, an available port -// will be chosen in Start(). This value can be queried with BoundPort(). -// -// The worker_address argument is optional. If left empty, it will default to -// "localhost:%port%". When the worker registers with the dispatcher, the worker -// will report the worker address, so that the dispatcher can tell clients where -// to read from. The address may contain the placeholder "%port%", which will be -// replaced with the value of BoundPort(). -Status NewWorkerServer(int port, const std::string& protocol, - const std::string& dispatcher_address, - const std::string& worker_address, - std::unique_ptr* out_server); - -// Creates a worker using the default worker_address. -Status NewWorkerServer(int port, const std::string& protocol, - const std::string& dispatcher_address, +Status NewWorkerServer(const experimental::WorkerConfig& config, std::unique_ptr* out_server); } // namespace data diff --git a/tensorflow/core/data/service/test_cluster.cc b/tensorflow/core/data/service/test_cluster.cc index ad0d2be87d8..8ae3f191407 100644 --- a/tensorflow/core/data/service/test_cluster.cc +++ b/tensorflow/core/data/service/test_cluster.cc @@ -62,8 +62,12 @@ Status TestCluster::Initialize() { Status TestCluster::AddWorker() { std::unique_ptr worker; - TF_RETURN_IF_ERROR( - NewWorkerServer(/*port=*/0, kProtocol, dispatcher_address_, &worker)); + experimental::WorkerConfig config; + config.set_port(0); + config.set_protocol(kProtocol); + config.set_dispatcher_address(dispatcher_address_); + config.set_worker_address("localhost:%port%"); + TF_RETURN_IF_ERROR(NewWorkerServer(config, &worker)); TF_RETURN_IF_ERROR(worker->Start()); worker_addresses_.push_back(absl::StrCat("localhost:", worker->BoundPort())); workers_.push_back(std::move(worker)); diff --git a/tensorflow/core/data/service/worker_impl.cc b/tensorflow/core/data/service/worker_impl.cc index 00659e1d048..39508b1eab0 100644 --- a/tensorflow/core/data/service/worker_impl.cc +++ b/tensorflow/core/data/service/worker_impl.cc @@ -46,8 +46,8 @@ auto* tf_data_service_created = } // namespace DataServiceWorkerImpl::DataServiceWorkerImpl( - const std::string& dispatcher_address, const std::string& protocol) - : dispatcher_address_(dispatcher_address), protocol_(protocol) { + const experimental::WorkerConfig& config) + : config_(config) { tf_data_service_created->GetCell()->Set(true); } @@ -68,7 +68,7 @@ void DataServiceWorkerImpl::Start(const std::string& worker_address) { Status s = Register(); while (!s.ok()) { LOG(WARNING) << "Failed to register with dispatcher at " - << dispatcher_address_ << ": " << s; + << config_.dispatcher_address() << ": " << s; Env::Default()->SleepForMicroseconds(kHeartbeatIntervalMicros); s = Register(); } @@ -173,17 +173,17 @@ Status DataServiceWorkerImpl::EnsureDispatcherStubInitialized() if (!dispatcher_stub_) { ::grpc::ChannelArguments args; std::shared_ptr<::grpc::ChannelCredentials> credentials; - TF_RETURN_IF_ERROR( - CredentialsFactory::CreateClientCredentials(protocol_, &credentials)); - auto channel = - ::grpc::CreateCustomChannel(dispatcher_address_, credentials, args); + TF_RETURN_IF_ERROR(CredentialsFactory::CreateClientCredentials( + config_.protocol(), &credentials)); + auto channel = ::grpc::CreateCustomChannel(config_.dispatcher_address(), + credentials, args); dispatcher_stub_ = DispatcherService::NewStub(channel); } return Status::OK(); } Status DataServiceWorkerImpl::Register() EXCLUSIVE_LOCKS_REQUIRED(mu_) { - VLOG(3) << "Registering with dispatcher at " << dispatcher_address_; + VLOG(3) << "Registering with dispatcher at " << config_.dispatcher_address(); TF_RETURN_IF_ERROR(EnsureDispatcherStubInitialized()); RegisterWorkerRequest req; req.set_worker_address(worker_address_); diff --git a/tensorflow/core/data/service/worker_impl.h b/tensorflow/core/data/service/worker_impl.h index adb3e97bbea..6961312ee34 100644 --- a/tensorflow/core/data/service/worker_impl.h +++ b/tensorflow/core/data/service/worker_impl.h @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/data/service/worker.pb.h" #include "tensorflow/core/data/standalone.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/protobuf/data/experimental/service_config.pb.h" #include "tensorflow/core/public/session.h" namespace tensorflow { @@ -29,12 +30,14 @@ namespace data { // A TensorFlow DataService serves dataset elements over RPC. class DataServiceWorkerImpl { public: - explicit DataServiceWorkerImpl(const std::string& dispatcher_address, - const std::string& protocol); + explicit DataServiceWorkerImpl(const experimental::WorkerConfig& config); ~DataServiceWorkerImpl(); // Starts the worker. The worker needs to know its own address so that it can - // register with the dispatcher. + // register with the dispatcher. This is set in `Start` instead of in the + // constructor because the worker may be binding to port `0`, in which case + // the address isn't known until the worker has started and decided which port + // to bind to. void Start(const std::string& worker_address); // See worker.proto for API documentation. @@ -67,9 +70,7 @@ class DataServiceWorkerImpl { std::unique_ptr iterator; } Task; - const std::string dispatcher_address_; - // Protocol for communicating with the dispatcher. - const std::string protocol_; + const experimental::WorkerConfig config_; // The worker's own address. std::string worker_address_; diff --git a/tensorflow/core/protobuf/data/experimental/service_config.proto b/tensorflow/core/protobuf/data/experimental/service_config.proto index 5dcc3c69083..8708b923720 100644 --- a/tensorflow/core/protobuf/data/experimental/service_config.proto +++ b/tensorflow/core/protobuf/data/experimental/service_config.proto @@ -10,3 +10,18 @@ message DispatcherConfig { // The protocol for the dispatcher to use when connecting to workers. string protocol = 2; } + +// Configuration for a tf.data service WorkerServer. +message WorkerConfig { + // The port for the worker to bind to. A value of 0 indicates that the + // worker may bind to any available port. + int64 port = 1; + // The protocol for the worker to use when connecting to the dispatcher. + string protocol = 2; + // The address of the dispatcher to register with. + string dispatcher_address = 3; + // The address of the worker server. The substring "%port%", if specified, + // will be replaced with the worker's bound port. This is useful when the port + // is set to `0`. + string worker_address = 4; +} diff --git a/tensorflow/python/data/experimental/service/server_lib.py b/tensorflow/python/data/experimental/service/server_lib.py index 3e355565308..99dc9297901 100644 --- a/tensorflow/python/data/experimental/service/server_lib.py +++ b/tensorflow/python/data/experimental/service/server_lib.py @@ -205,8 +205,13 @@ class WorkerServer(object): protocol = "grpc" self._protocol = protocol + config = service_config_pb2.WorkerConfig( + port=port, + protocol=protocol, + dispatcher_address=dispatcher_address, + worker_address=worker_address) self._server = _pywrap_server_lib.TF_DATA_NewWorkerServer( - port, protocol, dispatcher_address, worker_address) + config.SerializeToString()) if start: self._server.start() diff --git a/tensorflow/python/data/experimental/service/server_lib_wrapper.cc b/tensorflow/python/data/experimental/service/server_lib_wrapper.cc index b8250aaeda6..f59c1fb90bf 100644 --- a/tensorflow/python/data/experimental/service/server_lib_wrapper.cc +++ b/tensorflow/python/data/experimental/service/server_lib_wrapper.cc @@ -69,12 +69,16 @@ PYBIND11_MODULE(_pywrap_server_lib, m) { m.def( "TF_DATA_NewWorkerServer", - [](int port, std::string protocol, std::string dispatcher_address, - std::string worker_address) + [](std::string serialized_worker_config) -> std::unique_ptr { + tensorflow::data::experimental::WorkerConfig config; + if (!config.ParseFromString(serialized_worker_config)) { + tensorflow::MaybeRaiseFromStatus(tensorflow::errors::InvalidArgument( + "Failed to deserialize worker config.")); + } std::unique_ptr server; - tensorflow::Status status = tensorflow::data::NewWorkerServer( - port, protocol, dispatcher_address, worker_address, &server); + tensorflow::Status status = + tensorflow::data::NewWorkerServer(config, &server); tensorflow::MaybeRaiseFromStatus(status); return server; }, From 6a775d666e6a75ad409ba8ed658ff4a3bde6f684 Mon Sep 17 00:00:00 2001 From: Brian Zhao Date: Thu, 23 Jul 2020 21:17:08 -0700 Subject: [PATCH 1242/2522] Disabling failing gpu test. PiperOrigin-RevId: 322929322 Change-Id: I037d297f944b17a61488e06678eacba3132a37cd --- tensorflow/python/kernel_tests/BUILD | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index e7aebb044fa..06ebf0034ae 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -3521,9 +3521,6 @@ cuda_py_test( size = "medium", srcs = ["tensordot_op_test.py"], shard_count = 20, - tags = [ - "no_gpu", # TODO(b/161856380): Re-enable when fix lands. - ], deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", From da698a62692b323b150718d4281fba8d9365820a Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Thu, 23 Jul 2020 21:44:49 -0700 Subject: [PATCH 1243/2522] [XLA] Allow for Mixed precision dot and convolution. PiperOrigin-RevId: 322932204 Change-Id: I37106b130cf16d1149feed712ecb942ad9827ac0 --- .../compiler/xla/service/shape_inference.cc | 10 --- tensorflow/compiler/xla/shape_util.h | 8 +++ .../compiler/xla/tests/dot_operation_test.cc | 69 +++++++++++++++++++ tensorflow/core/tpu/tpu_defs.h | 6 +- 4 files changed, 80 insertions(+), 13 deletions(-) diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index ec8e4d23d21..29a728c068e 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -643,11 +643,6 @@ Status ValidateDotDimensionNumbers( return InvalidArgument("%s", message); }; - // Check if both element types are the same. - if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(lhs, rhs)) { - return fail("Element types do not match."); - } - // Validate basic properties of dot dimension numbers. TF_RETURN_IF_ERROR(ValidateDotDimensionNumbers(lhs, rhs, dimension_numbers)); @@ -1621,11 +1616,6 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation, batch_group_count, feature_group_count); } - if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(lhs, rhs)) { - return InvalidArgument( - "Convolution with different element types: %s and %s.", - ShapeUtil::HumanString(lhs), ShapeUtil::HumanString(rhs)); - } if (dnums.input_spatial_dimensions_size() != dnums.kernel_spatial_dimensions_size()) { return InvalidArgument( diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 3789d828528..3f69a8b0aca 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -269,6 +269,14 @@ class ShapeUtil { if (SameElementType(a, b)) { return a.element_type(); } + // If only one of A and B are floating use the floating point type. + if (ElementIsFloating(a) && !ElementIsFloating(b)) { + return a.element_type(); + } + if (ElementIsFloating(b) && !ElementIsFloating(a)) { + return b.element_type(); + } + // Use the higher precision type. return primitive_util::BitWidth(a.element_type()) < primitive_util::BitWidth(b.element_type()) ? b.element_type() diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index 71cfd95f77f..60ba27b2050 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -1463,6 +1463,75 @@ ENTRY SmallIntegerDot { EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); } +XLA_TEST_F(DotOperationTextTest, DISABLED_ON_CPU(U16IotaDot)) { + absl::string_view hlo_string = + R"( +HloModule SmallIntegerDot + +ENTRY SmallIntegerDot { + arg0 = u16[5,55,8] parameter(0) + arg1 = u16[5,8,200] parameter(1) + dot = u16[5,55,200] dot(arg0, arg1), lhs_batch_dims={0}, lhs_contracting_dims={2}, rhs_batch_dims={0}, rhs_contracting_dims={1} + ROOT c = s32[5,55,200] convert(dot) +} +)"; + + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); +} + +XLA_TEST_F(DotOperationTextTest, DISABLED_ON_CPU(U16IotaSquaredDot)) { + absl::string_view hlo_string = + R"( +HloModule SmallIntegerDot + +ENTRY SmallIntegerDot { + arg0 = u16[16,2] iota(), iota_dimension=0 + a = u16[16,2] multiply(arg0, arg0) + r = u16[16,2] multiply(a, a) + arg1 = u16[2,98] iota(), iota_dimension=1 + b = u16[2,98] multiply(arg1, arg1) + s = u16[2,98] multiply(b, b) + ROOT dot = u16[16,98] dot(r, s), lhs_contracting_dims={1}, rhs_contracting_dims={0} +} +)"; + + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); +} + +XLA_TEST_F(DotOperationTextTest, DISABLED_ON_CPU(S16IotaDot)) { + absl::string_view hlo_string = + R"( +HloModule SmallIntegerDot + +ENTRY SmallIntegerDot { + arg0 = s16[5,55,8] iota(), iota_dimension=1 + arg1 = s16[5,8,200] iota(), iota_dimension=2 + ROOT dot = s16[5,55,200] dot(arg0, arg1), lhs_batch_dims={0}, lhs_contracting_dims={2}, rhs_batch_dims={0}, rhs_contracting_dims={1} +} +)"; + + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); +} + +XLA_TEST_F(DotOperationTextTest, DISABLED_ON_CPU(S16IotaSquaredDot)) { + absl::string_view hlo_string = + R"( +HloModule SmallIntegerDot + +ENTRY SmallIntegerDot { + arg0 = s16[16,2] iota(), iota_dimension=0 + a = s16[16,2] multiply(arg0, arg0) + r = s16[16,2] multiply(a, a) + arg1 = s16[2,98] iota(), iota_dimension=1 + b = s16[2,98] multiply(arg1, arg1) + s = s16[2,98] multiply(b, b) + ROOT dot = s16[16,98] dot(r, s), lhs_contracting_dims={1}, rhs_contracting_dims={0} +} +)"; + + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); +} + XLA_TEST_F(DotOperationTextTest, DISABLED_ON_CPU(S8Dot)) { absl::string_view hlo_string = R"( diff --git a/tensorflow/core/tpu/tpu_defs.h b/tensorflow/core/tpu/tpu_defs.h index 1c4b4c4e38e..696fa8dbe3e 100644 --- a/tensorflow/core/tpu/tpu_defs.h +++ b/tensorflow/core/tpu/tpu_defs.h @@ -51,10 +51,10 @@ extern const char* const kTPUReplicateAttr; extern const char* const kOutsideCompilationAttr; // Supported types for TPUs. -static constexpr std::array kTpuAllTypes = { +static constexpr std::array kTpuAllTypes = { {DT_INT32, DT_UINT32, DT_BFLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL, - DT_COMPLEX64, DT_INT64, DT_UINT64, DT_QINT8, DT_QUINT8, DT_INT8, - DT_UINT8}}; + DT_COMPLEX64, DT_INT64, DT_UINT64, DT_QINT8, DT_QUINT8, DT_INT8, DT_UINT8, + DT_INT16, DT_UINT16}}; } // namespace tensorflow From 918466d131554b3135850fc4bedfbd595b772a84 Mon Sep 17 00:00:00 2001 From: Anna R Date: Thu, 23 Jul 2020 21:52:54 -0700 Subject: [PATCH 1244/2522] [XLA:SPMD] Recursively handling more Dot cases 1. Allow creating subgrouped collectives 2. Add a mechanism to group devices in a tiled sharding 3. For previously unhandled dot cases, detect whether we can have partial matches of dimensions, then group the matched dimensions and recursively partition it. PiperOrigin-RevId: 322933217 Change-Id: I01f3510ffc71baa26af66d2c15dcaa97c1c6517b --- tensorflow/compiler/xla/service/spmd/BUILD | 1 - .../xla/service/spmd/convolution_handler.cc | 4 +- .../compiler/xla/service/spmd/dot_handler.cc | 717 +++++------------- .../xla/service/spmd/spmd_partitioner.cc | 170 ++--- .../xla/service/spmd/spmd_partitioner.h | 55 +- .../xla/service/spmd/spmd_partitioner_test.cc | 152 +--- .../xla/service/spmd/spmd_partitioner_util.cc | 262 +------ .../xla/service/spmd/spmd_partitioner_util.h | 52 +- 8 files changed, 298 insertions(+), 1115 deletions(-) diff --git a/tensorflow/compiler/xla/service/spmd/BUILD b/tensorflow/compiler/xla/service/spmd/BUILD index a67e4cf55c5..e41b89f6dff 100644 --- a/tensorflow/compiler/xla/service/spmd/BUILD +++ b/tensorflow/compiler/xla/service/spmd/BUILD @@ -50,7 +50,6 @@ cc_library( "//tensorflow/compiler/xla/service:tuple_simplifier", "//tensorflow/core/platform:numbers", "@com_google_absl//absl/algorithm:container", - "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", diff --git a/tensorflow/compiler/xla/service/spmd/convolution_handler.cc b/tensorflow/compiler/xla/service/spmd/convolution_handler.cc index 4caa2bbbf35..1204df59080 100644 --- a/tensorflow/compiler/xla/service/spmd/convolution_handler.cc +++ b/tensorflow/compiler/xla/service/spmd/convolution_handler.cc @@ -226,7 +226,7 @@ Status SpmdPartitioningVisitor::HandleConvolutionTiledLhsAndRhs( hlo->batch_group_count(), new_window, hlo->convolution_dimension_numbers(), hlo->precision_config())); auto ar = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, conv, MakeBinaryAdd(hlo->shape().element_type(), module_), {}, + &b_, conv, MakeBinaryAdd(hlo->shape().element_type(), module_), NewChannel()); ar->set_sharding(HloSharding::Replicate()); return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()) @@ -605,7 +605,7 @@ Status SpmdPartitioningVisitor::HandleConvolution(HloInstruction* hlo) { hlo->batch_group_count(), new_window, dnums, hlo->precision_config())); auto ar = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, conv, MakeBinaryAdd(hlo->shape().element_type(), module_), {}, + &b_, conv, MakeBinaryAdd(hlo->shape().element_type(), module_), NewChannel()); ar->set_sharding(HloSharding::Replicate()); return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()) diff --git a/tensorflow/compiler/xla/service/spmd/dot_handler.cc b/tensorflow/compiler/xla/service/spmd/dot_handler.cc index 8fea788b1b7..9ecf21f5841 100644 --- a/tensorflow/compiler/xla/service/spmd/dot_handler.cc +++ b/tensorflow/compiler/xla/service/spmd/dot_handler.cc @@ -80,25 +80,12 @@ Status SpmdPartitioningVisitor::HandleDot(HloInstruction* hlo) { return HandleDotHelper(hlo, mapping, create_sharded_dot); } -namespace { - -StatusOr PartitionBaseCase( - PartitionedHlo lhs, PartitionedHlo rhs, const Shape& output_base_shape, - const HloSharding& output_sharding, - const DotGeneralDimsMapping& dims_mapping, int64 num_partitions, +Status SpmdPartitioningVisitor::HandleDotHelper( + HloInstruction* hlo, const DotGeneralDimsMapping& dims_mapping, const std::function( - HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot, - HloModule* module, HloInstruction* original_hlo, int64 lhs_batch_partitions, - int64 rhs_batch_partitions, int64 output_batch_partitions, - int64 lhs_contracting_partitions, int64 rhs_contracting_partitions, - int64 lhs_non_contracting_partitions, int64 rhs_non_contracting_partitions, - int64 output_lhs_non_contracting_partitions, - int64 output_rhs_non_contracting_partitions, - int64 threshold_for_windowed_einsum_mib, SpmdBuilder* b, - std::vector* - windowed_dot_general_loops) { - const HloSharding& lhs_sharding = lhs.sharding(); - const HloSharding& rhs_sharding = rhs.sharding(); + HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot) { + const HloSharding& lhs_sharding = hlo->operand(0)->sharding(); + const HloSharding& rhs_sharding = hlo->operand(1)->sharding(); // Similar to hlo_sharding_util::TransposeSharding(), but allows // removing/adding non-partitioned dimensions. @@ -145,12 +132,12 @@ StatusOr PartitionBaseCase( return HloSharding::Tile(reshape_tiles); }; - std::vector lhs_to_rhs_indices(lhs.base_shape().rank(), -1); - std::vector lhs_to_output_indices(lhs.base_shape().rank(), -1); - std::vector rhs_to_lhs_indices(rhs.base_shape().rank(), -1); - std::vector rhs_to_output_indices(rhs.base_shape().rank(), -1); - std::vector output_to_lhs_indices(output_base_shape.rank(), -1); - std::vector output_to_rhs_indices(output_base_shape.rank(), -1); + std::vector lhs_to_rhs_indices(hlo->operand(0)->shape().rank(), -1); + std::vector lhs_to_output_indices(hlo->operand(0)->shape().rank(), -1); + std::vector rhs_to_lhs_indices(hlo->operand(1)->shape().rank(), -1); + std::vector rhs_to_output_indices(hlo->operand(1)->shape().rank(), -1); + std::vector output_to_lhs_indices(hlo->shape().rank(), -1); + std::vector output_to_rhs_indices(hlo->shape().rank(), -1); auto populate_indices_mapping = [&](const DotGeneralDimsMapping::DimsMapping& mapping) { if (mapping.lhs >= 0) { @@ -187,84 +174,127 @@ StatusOr PartitionBaseCase( auto rhs_sharding_transposed_to_match_output = transpose_sharding( rhs_sharding, rhs_to_output_indices, output_to_rhs_indices); auto output_sharding_transposed_to_match_lhs = transpose_sharding( - output_sharding, output_to_lhs_indices, lhs_to_output_indices); + hlo->sharding(), output_to_lhs_indices, lhs_to_output_indices); auto output_sharding_transposed_to_match_rhs = transpose_sharding( - output_sharding, output_to_rhs_indices, rhs_to_output_indices); + hlo->sharding(), output_to_rhs_indices, rhs_to_output_indices); + // lhs_rhs_or_output: 0 lhs, 1 rhs, 2 output. + auto get_partitions_for_dims = + [&](const HloSharding& sharding, + absl::Span dims, + int lhs_rhs_or_output) { + int64 partitions = 1; + if (sharding.IsTileMaximal()) { + return partitions; + } + for (const auto& dim : dims) { + if (lhs_rhs_or_output == 0) { + partitions *= sharding.tile_assignment().dim(dim.lhs); + } else if (lhs_rhs_or_output == 1) { + partitions *= sharding.tile_assignment().dim(dim.rhs); + } else { + CHECK_EQ(lhs_rhs_or_output, 2); + partitions *= sharding.tile_assignment().dim(dim.output); + } + } + return partitions; + }; + const int64 lhs_batch_partitions = + get_partitions_for_dims(lhs_sharding, dims_mapping.batch_dims, 0); + const int64 rhs_batch_partitions = + get_partitions_for_dims(rhs_sharding, dims_mapping.batch_dims, 1); + const int64 output_batch_partitions = + get_partitions_for_dims(hlo->sharding(), dims_mapping.batch_dims, 2); + const int64 lhs_contracting_partitions = + get_partitions_for_dims(lhs_sharding, dims_mapping.contracting_dims, 0); + const int64 rhs_contracting_partitions = + get_partitions_for_dims(rhs_sharding, dims_mapping.contracting_dims, 1); + const int64 lhs_non_contracting_partitions = get_partitions_for_dims( + lhs_sharding, dims_mapping.lhs_non_contracting_dims, 0); + const int64 rhs_non_contracting_partitions = get_partitions_for_dims( + rhs_sharding, dims_mapping.rhs_non_contracting_dims, 1); + const int64 output_lhs_non_contracting_partitions = get_partitions_for_dims( + hlo->sharding(), dims_mapping.lhs_non_contracting_dims, 2); + const int64 output_rhs_non_contracting_partitions = get_partitions_for_dims( + hlo->sharding(), dims_mapping.rhs_non_contracting_dims, 2); + + auto& lhs = GetPartitionedHlo(hlo->operand(0)); + auto& rhs = GetPartitionedHlo(hlo->operand(1)); // LHS and RHS are partitioned the same way and only partitioned in batch // dimensions. if (lhs_batch_partitions == rhs_batch_partitions && - rhs_batch_partitions == num_partitions && + rhs_batch_partitions == num_partitions_ && lhs_sharding_transposed_to_match_rhs == rhs_sharding) { - TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(lhs.hlo(), rhs.hlo(), b)); - dot->set_sharding(*lhs_sharding_transposed_to_match_output); - return PartitionedHlo(dot, output_base_shape, lhs.state()) - .Reshard(output_sharding) - .hlo(); + TF_ASSIGN_OR_RETURN(auto dot, + create_sharded_dot(lhs.hlo(), rhs.hlo(), &b_)); + SetPartitionedHlo(hlo, [&] { + dot->set_sharding(*lhs_sharding_transposed_to_match_output); + return PartitionedHlo(dot, hlo->shape(), MakePartitioningState()) + .Reshard(hlo->sharding()) + .hlo(); + }); + return Status::OK(); } // Try emit batch-partitioned einsum with one operand resharded. Returns - // partitioned HLO or nullptr if the attempt fails. If - // may_reshard_with_allreduce is false, reshard must be done using - // all-to-all/collective-permute; otherwise this attempt fails. + // whether the attempt succeeds. If may_reshard_with_allreduce is false, + // reshard must be done using all-to-all; otherwise this attempt fails. auto try_emit_output_batch_partitioned_einsum_with_reshard = - [&](bool may_reshard_with_allreduce) -> StatusOr { + [&](bool may_reshard_with_allreduce) -> StatusOr { // LHS and output are batch partitioned in the same way. - if (lhs_batch_partitions == num_partitions && - output_batch_partitions == num_partitions && - lhs_sharding_transposed_to_match_output == output_sharding) { + if (lhs_batch_partitions == num_partitions_ && + output_batch_partitions == num_partitions_ && + lhs_sharding_transposed_to_match_output == hlo->sharding()) { if (!may_reshard_with_allreduce && - !CanReshardWithCollectivePermute( - rhs.sharding(), *lhs_sharding_transposed_to_match_rhs) && !GetReshardAllToAllSourceTargetDims( rhs.sharding(), *lhs_sharding_transposed_to_match_rhs)) { - return nullptr; + return false; } auto resharded_rhs = rhs.Reshard(*lhs_sharding_transposed_to_match_rhs); TF_ASSIGN_OR_RETURN( - auto dot, create_sharded_dot(lhs.hlo(), resharded_rhs.hlo(), b)); - return dot; + auto dot, create_sharded_dot(lhs.hlo(), resharded_rhs.hlo(), &b_)); + SetPartitionedHlo(hlo, [&] { return dot; }); + return true; } // RHS and output are batch partitioned in the same way. - if (rhs_batch_partitions == num_partitions && - output_batch_partitions == num_partitions && - rhs_sharding_transposed_to_match_output == output_sharding) { + if (rhs_batch_partitions == num_partitions_ && + output_batch_partitions == num_partitions_ && + rhs_sharding_transposed_to_match_output == hlo->sharding()) { if (!may_reshard_with_allreduce && - !CanReshardWithCollectivePermute( - lhs.sharding(), *rhs_sharding_transposed_to_match_lhs) && !GetReshardAllToAllSourceTargetDims( lhs.sharding(), *rhs_sharding_transposed_to_match_lhs)) { - return nullptr; + return false; } auto resharded_lhs = lhs.Reshard(*rhs_sharding_transposed_to_match_lhs); TF_ASSIGN_OR_RETURN( - auto dot, create_sharded_dot(resharded_lhs.hlo(), rhs.hlo(), b)); - return dot; + auto dot, create_sharded_dot(resharded_lhs.hlo(), rhs.hlo(), &b_)); + SetPartitionedHlo(hlo, [&] { return dot; }); + return true; } - return nullptr; + return false; }; { // Try batch-parallel by resharding one operand, and not using all-reduce. TF_ASSIGN_OR_RETURN( - HloInstruction * partitioned_dot, + bool emitted, try_emit_output_batch_partitioned_einsum_with_reshard(false)); - if (partitioned_dot) { - return partitioned_dot; + if (emitted) { + return Status::OK(); } } // Try to emit windowed DotGeneral when one operand is partitioned in the same // way as the output along non-contracting dimensions, but the other operand // is tiled in other dimensions. - auto emit_windowed_dot_general = - [&](int64 matching_operand, int64 windowing_operand, - bool windowed_at_contracting_dims, - bool windowed_at_batch_dims) -> StatusOr { + auto emit_windowed_dot_general = [&](int64 matching_operand, + int64 windowing_operand, + bool windowed_at_contracting_dims, + bool windowed_at_batch_dims) { CHECK_EQ(matching_operand + windowing_operand, 1); CHECK(!windowed_at_batch_dims || !windowed_at_contracting_dims); auto unpadded_result_buffer_shape = - MakePartitionedShape(output_base_shape, output_sharding); + MakePartitionedShape(hlo->shape(), hlo->sharding()); auto padded_result_buffer_shape = unpadded_result_buffer_shape; // For windowing at batch/non-contracting dims, we produce the result one // partition at a time, so we need to pad the shape in case of uneven @@ -280,17 +310,17 @@ StatusOr PartitionBaseCase( if (windowed_at_contracting_dims) { auto& to_mask = windowing_operand == 0 ? lhs : rhs; to_mask = - to_mask.PadWithValue(b->AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(output_base_shape.element_type())))); + to_mask.PadWithValue(b_.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(hlo->shape().element_type())))); } - auto result_buffer = CreateZero(padded_result_buffer_shape, b); - auto iteration = b->AddInstruction( + auto result_buffer = CreateZero(padded_result_buffer_shape, &b_); + auto iteration = b_.AddInstruction( HloInstruction::CreateConstant(LiteralUtil::CreateR0(0))); // Create a while loop that computes one window per iteration. During each // iteration, each partition sends its input window to its neighbor using // collective-permute for the next iteration. - SpmdBuilder body_b("windowed_dot_general_body", original_hlo); + SpmdBuilder body_b("windowed_dot_general_body", visiting_hlo_); auto param = body_b.AddInstruction(HloInstruction::CreateParameter( /*parameter_number=*/0, ShapeUtil::MakeTupleShape({lhs.hlo()->shape(), rhs.hlo()->shape(), @@ -305,12 +335,11 @@ StatusOr PartitionBaseCase( auto i = body_b.AddInstruction( HloInstruction::CreateGetTupleElement(iteration->shape(), param, 3)); - auto partition_id = - lhs.state().collective_ops_creator.create_partition_id(&body_b); + auto partition_id = collective_ops_creator_.create_partition_id(&body_b); auto data_partition_id = body_b.AddInstruction(HloInstruction::CreateBinary( i->shape(), HloOpcode::kAdd, i, partition_id)); auto partition_count = body_b.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR0(num_partitions))); + LiteralUtil::CreateR0(num_partitions_))); data_partition_id = body_b.AddInstruction(HloInstruction::CreateBinary( i->shape(), HloOpcode::kRemainder, data_partition_id, partition_count)); auto dot_lhs = l; @@ -321,7 +350,7 @@ StatusOr PartitionBaseCase( // operand as replicated, and resharding it to match the windowed operand. auto slice_operand = matching_operand == 0 ? l : r; slice_operand->set_sharding(HloSharding::Replicate()); - auto state = lhs.state(); + auto state = MakePartitioningState(); state.b = &body_b; state.partition_id = data_partition_id; auto slice = PartitionedHlo(slice_operand, slice_operand->shape(), state) @@ -363,27 +392,26 @@ StatusOr PartitionBaseCase( auto has_more = body_b.AddInstruction(HloInstruction::CreateCompare( ShapeUtil::MakeShape(PRED, {}), i, body_b.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR0(num_partitions))), + LiteralUtil::CreateR0(num_partitions_))), ComparisonDirection::kLt)); // Collective-permute for the next window. We don't need it for the last // iteration, so we use a conditional around the collective-permute. HloInstruction* conditional; { - SpmdBuilder cp_b("window_collective_permute", original_hlo); + SpmdBuilder cp_b("window_collective_permute", visiting_hlo_); { auto p = cp_b.AddInstruction(HloInstruction::CreateParameter( 0, windowing_operand == 0 ? l->shape() : r->shape(), "window")); - std::vector> sd_pairs(num_partitions); - for (int64 source = 0; source < num_partitions; ++source) { + std::vector> sd_pairs(num_partitions_); + for (int64 source = 0; source < num_partitions_; ++source) { // 0 -> n-1, 1 -> 0, 2 -> 1, ... sd_pairs[source] = {source, - (source - 1 + num_partitions) % num_partitions}; + (source - 1 + num_partitions_) % num_partitions_}; } - lhs.state() - .collective_ops_creator.create_cross_partition_collective_permute( - &cp_b, p, sd_pairs, (*lhs.state().next_channel_id)++); + collective_ops_creator_.create_cross_partition_collective_permute( + &cp_b, p, sd_pairs, (*next_channel_id_)++); } - SpmdBuilder ncp_b("last_iteration_noop", original_hlo); + SpmdBuilder ncp_b("last_iteration_noop", visiting_hlo_); { ncp_b.AddInstruction(HloInstruction::CreateParameter( 0, windowing_operand == 0 ? l->shape() : r->shape(), "window")); @@ -391,9 +419,9 @@ StatusOr PartitionBaseCase( conditional = body_b.AddInstruction(HloInstruction::CreateConditional( windowing_operand == 0 ? l->shape() : r->shape(), has_more, windowing_operand == 0 ? l : r, - module->AddEmbeddedComputation(cp_b.Build()), + module_->AddEmbeddedComputation(cp_b.Build()), windowing_operand == 0 ? l : r, - module->AddEmbeddedComputation(ncp_b.Build()))); + module_->AddEmbeddedComputation(ncp_b.Build()))); } if (windowing_operand == 0) { l = conditional; @@ -402,7 +430,7 @@ StatusOr PartitionBaseCase( } body_b.AddInstruction(HloInstruction::CreateTuple({l, r, o, i})); - SpmdBuilder cond_b("windowed_dot_general_cond", original_hlo); + SpmdBuilder cond_b("windowed_dot_general_cond", visiting_hlo_); auto cond_param = cond_b.AddInstruction(HloInstruction::CreateParameter( /*parameter_number=*/0, ShapeUtil::MakeTupleShape({lhs.hlo()->shape(), rhs.hlo()->shape(), @@ -413,53 +441,56 @@ StatusOr PartitionBaseCase( cond_b.AddInstruction(HloInstruction::CreateCompare( ShapeUtil::MakeShape(PRED, {}), cond_i, cond_b.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR0(num_partitions))), + LiteralUtil::CreateR0(num_partitions_))), ComparisonDirection::kLt)); - auto while_loop = b->AddInstruction(HloInstruction::CreateWhile( - cond_param->shape(), module->AddEmbeddedComputation(cond_b.Build()), - module->AddEmbeddedComputation(body_b.Build()), - b->AddInstruction(HloInstruction::CreateTuple( + auto while_loop = b_.AddInstruction(HloInstruction::CreateWhile( + cond_param->shape(), module_->AddEmbeddedComputation(cond_b.Build()), + module_->AddEmbeddedComputation(body_b.Build()), + b_.AddInstruction(HloInstruction::CreateTuple( {lhs.hlo(), rhs.hlo(), result_buffer, iteration})))); - windowed_dot_general_loops->push_back({while_loop, windowing_operand, + windowed_dot_general_loops_.push_back({while_loop, windowing_operand, windowed_at_contracting_dims, windowed_at_batch_dims}); - auto result = b->AddInstruction(HloInstruction::CreateGetTupleElement( - result_buffer->shape(), while_loop, 2)); - if (!ShapeUtil::Compatible(padded_result_buffer_shape, - unpadded_result_buffer_shape)) { - result = b->AddInstruction(HloInstruction::CreateSlice( - unpadded_result_buffer_shape, result, - std::vector(padded_result_buffer_shape.rank(), 0), - unpadded_result_buffer_shape.dimensions(), - std::vector(padded_result_buffer_shape.rank(), 1))); - } - return result; + SetPartitionedHlo(hlo, [&] { + auto result = b_.AddInstruction(HloInstruction::CreateGetTupleElement( + result_buffer->shape(), while_loop, 2)); + if (!ShapeUtil::Compatible(padded_result_buffer_shape, + unpadded_result_buffer_shape)) { + result = b_.AddInstruction(HloInstruction::CreateSlice( + unpadded_result_buffer_shape, result, + std::vector(padded_result_buffer_shape.rank(), 0), + unpadded_result_buffer_shape.dimensions(), + std::vector(padded_result_buffer_shape.rank(), 1))); + } + return result; + }); + return Status::OK(); }; - if (output_lhs_non_contracting_partitions == num_partitions && + if (output_lhs_non_contracting_partitions == num_partitions_ && output_sharding_transposed_to_match_lhs == lhs_sharding && - ShapeSizeInBytes(rhs.base_shape()) >= - threshold_for_windowed_einsum_mib * 1024 * 1024) { - if (rhs_contracting_partitions == num_partitions) { + ShapeSizeInBytes(hlo->operand(1)->shape()) >= + options_.threshold_for_windowed_einsum_mib * 1024 * 1024) { + if (rhs_contracting_partitions == num_partitions_) { return emit_windowed_dot_general(0, 1, true, false); } - if (rhs_non_contracting_partitions == num_partitions) { + if (rhs_non_contracting_partitions == num_partitions_) { return emit_windowed_dot_general(0, 1, false, false); } - if (rhs_batch_partitions == num_partitions) { + if (rhs_batch_partitions == num_partitions_) { return emit_windowed_dot_general(0, 1, false, true); } } - if (output_rhs_non_contracting_partitions == num_partitions && + if (output_rhs_non_contracting_partitions == num_partitions_ && output_sharding_transposed_to_match_rhs == rhs_sharding && - ShapeSizeInBytes(lhs.base_shape()) >= - threshold_for_windowed_einsum_mib * 1024 * 1024) { - if (lhs_contracting_partitions == num_partitions) { + ShapeSizeInBytes(hlo->operand(0)->shape()) >= + options_.threshold_for_windowed_einsum_mib * 1024 * 1024) { + if (lhs_contracting_partitions == num_partitions_) { return emit_windowed_dot_general(1, 0, true, false); } - if (lhs_non_contracting_partitions == num_partitions) { + if (lhs_non_contracting_partitions == num_partitions_) { return emit_windowed_dot_general(1, 0, false, false); } - if (lhs_batch_partitions == num_partitions) { + if (lhs_batch_partitions == num_partitions_) { return emit_windowed_dot_general(1, 0, false, true); } } @@ -467,18 +498,18 @@ StatusOr PartitionBaseCase( { // Try batch-parallel by resharding one operand, and allowing all-reduce. TF_ASSIGN_OR_RETURN( - HloInstruction * partitioned_dot, + bool emitted, try_emit_output_batch_partitioned_einsum_with_reshard(true)); - if (partitioned_dot) { - return partitioned_dot; + if (emitted) { + return Status::OK(); } } // LHS and RHS have the same partitioned contracting dimensions. if (lhs_contracting_partitions == rhs_contracting_partitions && - lhs_contracting_partitions == num_partitions) { - auto zero = b->AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(output_base_shape.element_type()))); + lhs_contracting_partitions == num_partitions_) { + auto zero = b_.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(hlo->shape().element_type()))); // Pad both sides with zero, since NaN at one side cannot be masked by zero // on the other side. if (ShapeSizeInBytes(lhs.base_shape()) < @@ -491,91 +522,100 @@ StatusOr PartitionBaseCase( rhs = rhs.Reshard(*lhs_sharding_transposed_to_match_rhs).PadWithValue(zero); } - TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(lhs.hlo(), rhs.hlo(), b)); - auto ar = - lhs.state().collective_ops_creator.create_cross_partition_all_reduce( - b, dot, MakeBinaryAdd(output_base_shape.element_type(), module), {}, - (*lhs.state().next_channel_id)++); - ar->set_sharding(HloSharding::Replicate()); - return PartitionedHlo(ar, output_base_shape, lhs.state()) - .Reshard(output_sharding) - .hlo(); + TF_ASSIGN_OR_RETURN(auto dot, + create_sharded_dot(lhs.hlo(), rhs.hlo(), &b_)); + SetPartitionedHlo(hlo, [&] { + auto ar = collective_ops_creator_.create_cross_partition_all_reduce( + &b_, dot, MakeBinaryAdd(hlo->shape().element_type(), module_), + NewChannel()); + ar->set_sharding(HloSharding::Replicate()); + return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()) + .Reshard(hlo->sharding()) + .hlo(); + }); + return Status::OK(); } // LHS and output have the same partitioned non-contracting dimensions. - if (lhs_non_contracting_partitions == num_partitions && - output_lhs_non_contracting_partitions == num_partitions && - lhs_sharding_transposed_to_match_output == output_sharding) { + if (lhs_non_contracting_partitions == num_partitions_ && + output_lhs_non_contracting_partitions == num_partitions_ && + lhs_sharding_transposed_to_match_output == hlo->sharding()) { auto rhs_replicated = rhs.Reshard(HloSharding::Replicate()).hlo(); TF_ASSIGN_OR_RETURN(auto dot, - create_sharded_dot(lhs.hlo(), rhs_replicated, b)); - return dot; + create_sharded_dot(lhs.hlo(), rhs_replicated, &b_)); + SetPartitionedHlo(hlo, [&] { return dot; }); + return Status::OK(); } // RHS and output have the same partitioned non-contracting dimensions. - if (rhs_non_contracting_partitions == num_partitions && - output_rhs_non_contracting_partitions == num_partitions && - rhs_sharding_transposed_to_match_output == output_sharding) { + if (rhs_non_contracting_partitions == num_partitions_ && + output_rhs_non_contracting_partitions == num_partitions_ && + rhs_sharding_transposed_to_match_output == hlo->sharding()) { auto lhs_replicated = lhs.Reshard(HloSharding::Replicate()).hlo(); TF_ASSIGN_OR_RETURN(auto dot, - create_sharded_dot(lhs_replicated, rhs.hlo(), b)); - return dot; + create_sharded_dot(lhs_replicated, rhs.hlo(), &b_)); + SetPartitionedHlo(hlo, [&] { return dot; }); + return Status::OK(); } // Output is batch partitioned. - if (output_batch_partitions == num_partitions) { + if (output_batch_partitions == num_partitions_) { auto resharded_lhs = lhs.Reshard(*output_sharding_transposed_to_match_lhs); auto resharded_rhs = rhs.Reshard(*output_sharding_transposed_to_match_rhs); TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(resharded_lhs.hlo(), - resharded_rhs.hlo(), b)); - return dot; + resharded_rhs.hlo(), &b_)); + SetPartitionedHlo(hlo, [&] { return dot; }); + return Status::OK(); } // Output is partitioned along LHS non-contracting dimensions. - if (output_lhs_non_contracting_partitions == num_partitions) { + if (output_lhs_non_contracting_partitions == num_partitions_) { auto resharded_lhs = lhs.Reshard(*output_sharding_transposed_to_match_lhs); auto replicated_rhs = rhs.Reshard(HloSharding::Replicate()); - TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(resharded_lhs.hlo(), - replicated_rhs.hlo(), b)); - return dot; + TF_ASSIGN_OR_RETURN( + auto dot, + create_sharded_dot(resharded_lhs.hlo(), replicated_rhs.hlo(), &b_)); + SetPartitionedHlo(hlo, [&] { return dot; }); + return Status::OK(); } // Output is partitioned along RHS non-contracting dimensions. - if (output_rhs_non_contracting_partitions == num_partitions) { + if (output_rhs_non_contracting_partitions == num_partitions_) { auto replicated_lhs = lhs.Reshard(HloSharding::Replicate()); auto resharded_rhs = rhs.Reshard(*output_sharding_transposed_to_match_rhs); TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(replicated_lhs.hlo(), - resharded_rhs.hlo(), b)); - return dot; + resharded_rhs.hlo(), &b_)); + SetPartitionedHlo(hlo, [&] { return dot; }); + return Status::OK(); } // Returns true if it is beneficial to reshard the operand at `operand_idx` // across the contracting dimension. const auto should_partition_contracting_dim = [&](int64 operand_idx) { - if (!output_sharding.IsReplicated()) { + if (!hlo->sharding().IsReplicated()) { return false; } if (operand_idx == 0) { // If LHS and output are replicated, we compare the cost of all-gather // on RHS vs all-reduce on the output. - return (rhs_contracting_partitions == num_partitions) && + return (rhs_contracting_partitions == num_partitions_) && lhs.sharding().IsReplicated() && - ShapeUtil::ElementsIn(rhs.base_shape()) > - ShapeUtil::ElementsIn(output_base_shape); + ShapeUtil::ElementsIn(hlo->operand(1)->shape()) > + ShapeUtil::ElementsIn(hlo->shape()); } else { - return (lhs_contracting_partitions == num_partitions) && + return (lhs_contracting_partitions == num_partitions_) && rhs.sharding().IsReplicated() && - ShapeUtil::ElementsIn(lhs.base_shape()) > - ShapeUtil::ElementsIn(output_base_shape); + ShapeUtil::ElementsIn(hlo->operand(0)->shape()) > + ShapeUtil::ElementsIn(hlo->shape()); } }; // When the output is replicated and one of the operands is partitioned along // contracting dimension, align the other operand to be partitioned along // the contracting dimensions. - if (output_sharding.IsReplicated() && (should_partition_contracting_dim(0) || + if (hlo->sharding().IsReplicated() && (should_partition_contracting_dim(0) || should_partition_contracting_dim(1))) { - auto zero = b->AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(output_base_shape.element_type()))); + auto zero = b_.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(hlo->shape().element_type()))); if (should_partition_contracting_dim(0)) { lhs = lhs.Reshard(*rhs_sharding_transposed_to_match_lhs).PadWithValue(zero); @@ -585,361 +625,19 @@ StatusOr PartitionBaseCase( rhs = rhs.Reshard(*lhs_sharding_transposed_to_match_rhs).PadWithValue(zero); } - TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(lhs.hlo(), rhs.hlo(), b)); - return lhs.state().collective_ops_creator.create_cross_partition_all_reduce( - b, dot, MakeBinaryAdd(output_base_shape.element_type(), module), {}, - (*lhs.state().next_channel_id)++); - } - return nullptr; -} - -StatusOr PartitionDot( - PartitionedHlo lhs, PartitionedHlo rhs, const Shape& output_base_shape, - const HloSharding& output_sharding, - const DotGeneralDimsMapping& dims_mapping, int64 num_partitions, - const std::function( - HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot, - HloModule* module, HloInstruction* original_hlo, - int64 threshold_for_windowed_einsum_mib, SpmdBuilder* b, - std::vector* - windowed_dot_general_loops); - -StatusOr PartitionDotGroupOnBatch( - PartitionedHlo lhs, PartitionedHlo rhs, const Shape& output_base_shape, - const HloSharding& output_sharding, - const DotGeneralDimsMapping& dims_mapping, int64 num_partitions, - const std::function( - HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot, - HloModule* module, HloInstruction* original_hlo, - int64 threshold_for_windowed_einsum_mib, SpmdBuilder* b, - std::vector* - windowed_dot_general_loops) { - std::vector lhs_dims; - std::vector rhs_dims; - std::vector output_dims; - auto lhs_sharding_dims_adjusted_to_output = - lhs.sharding().tile_assignment().dimensions(); - auto rhs_sharding_dims_adjusted_to_output = - lhs.sharding().tile_assignment().dimensions(); - auto output_sharding_dims_adjusted_to_lhs = - output_sharding.tile_assignment().dimensions(); - bool lhs_rhs_dims_matching = true; - for (const auto& dim : dims_mapping.batch_dims) { - lhs_dims.push_back(dim.lhs); - rhs_dims.push_back(dim.rhs); - output_dims.push_back(dim.output); - if (lhs_sharding_dims_adjusted_to_output[dim.lhs] != - rhs_sharding_dims_adjusted_to_output[dim.rhs]) { - lhs_rhs_dims_matching = false; - } - lhs_sharding_dims_adjusted_to_output[dim.lhs] = - output_sharding.tile_assignment().dim(dim.output); - rhs_sharding_dims_adjusted_to_output[dim.rhs] = - output_sharding.tile_assignment().dim(dim.output); - output_sharding_dims_adjusted_to_lhs[dim.output] = - lhs.sharding().tile_assignment().dim(dim.lhs); - } - auto lhs_grouped = GroupShardingOnDims(lhs.sharding(), lhs_dims); - auto rhs_grouped = GroupShardingOnDims(rhs.sharding(), rhs_dims); - auto output_grouped = GroupShardingOnDims(output_sharding, output_dims); - if (lhs_rhs_dims_matching) { - if (ShapeUtil::ByteSizeOf(lhs.base_shape()) > - ShapeUtil::ByteSizeOf(rhs.base_shape())) { - rhs_grouped = AlignGroupsWith(std::move(rhs_grouped), lhs_grouped); - rhs = rhs.Reshard(UngroupSharding(rhs_grouped)); - } else { - lhs_grouped = AlignGroupsWith(std::move(lhs_grouped), rhs_grouped); - lhs = lhs.Reshard(UngroupSharding(lhs_grouped)); - } - auto reshaped_output_tiling = output_sharding.tile_assignment(); - reshaped_output_tiling.Reshape(output_sharding_dims_adjusted_to_lhs); - output_grouped = AlignGroupsWith( - GroupShardingOnDims(HloSharding::Tile(reshaped_output_tiling), - output_dims), - lhs_grouped); - } else { - auto reshaped_lhs_tiling = lhs.sharding().tile_assignment(); - reshaped_lhs_tiling.Reshape(lhs_sharding_dims_adjusted_to_output); - lhs_grouped = AlignGroupsWith( - GroupShardingOnDims(HloSharding::Tile(reshaped_lhs_tiling), lhs_dims), - output_grouped); - lhs = lhs.Reshard(UngroupSharding(lhs_grouped)); - auto reshaped_rhs_tiling = rhs.sharding().tile_assignment(); - reshaped_rhs_tiling.Reshape(rhs_sharding_dims_adjusted_to_output); - rhs_grouped = AlignGroupsWith( - GroupShardingOnDims(HloSharding::Tile(reshaped_rhs_tiling), rhs_dims), - output_grouped); - rhs = rhs.Reshard(UngroupSharding(rhs_grouped)); - } - auto per_group_partitioner_state = CreatePerGroupPartitioningState( - lhs.state(), lhs_grouped.device_groups, b); - lhs.hlo()->set_sharding(lhs_grouped.sharding); - rhs.hlo()->set_sharding(rhs_grouped.sharding); - CHECK(lhs.hlo() != rhs.hlo() || lhs_grouped.sharding == rhs_grouped.sharding); - TF_ASSIGN_OR_RETURN( - auto dot, - PartitionDot( - PartitionedHlo(lhs.hlo(), - GetPerGroupBaseShape(lhs_grouped, lhs.base_shape()), - per_group_partitioner_state), - PartitionedHlo(rhs.hlo(), - GetPerGroupBaseShape(rhs_grouped, rhs.base_shape()), - per_group_partitioner_state), - GetPerGroupBaseShape(output_grouped, output_base_shape), - output_grouped.sharding, dims_mapping, - num_partitions / lhs_grouped.device_groups.size(), create_sharded_dot, - module, original_hlo, threshold_for_windowed_einsum_mib, b, - windowed_dot_general_loops)); - // Reset the LHS sharding to the ungrouped one. - lhs.hlo()->set_sharding(UngroupSharding(lhs_grouped)); - rhs.hlo()->set_sharding(UngroupSharding(rhs_grouped)); - dot->set_sharding(UngroupSharding(output_grouped)); - return PartitionedHlo(dot, output_base_shape, lhs.state()) - .Reshard(output_sharding) - .hlo(); -} - -StatusOr PartitionDotGroupOnNonContracting( - bool lhs_matching, PartitionedHlo matching, PartitionedHlo other, - int64 matching_contracting_partitions, int64 other_contracting_partitions, - int64 matching_non_contracting_partitions, - int64 other_non_contracting_partitions, - int64 output_other_non_contracting_partitions, - const Shape& output_base_shape, const HloSharding& output_sharding, - const DotGeneralDimsMapping& dims_mapping, int64 num_partitions, - const std::function( - HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot, - HloModule* module, HloInstruction* original_hlo, - int64 threshold_for_windowed_einsum_mib, SpmdBuilder* b, - std::vector* - windowed_dot_general_loops) { - const bool may_replicate_other_contracting_dims = - (other_contracting_partitions == matching_non_contracting_partitions && - other_non_contracting_partitions == - output_other_non_contracting_partitions); - const bool may_replicate_other_non_contracting_dims = - matching_non_contracting_partitions == other_non_contracting_partitions && - matching_contracting_partitions == other_contracting_partitions; - std::vector other_group_dims; - if (may_replicate_other_contracting_dims && - (!may_replicate_other_non_contracting_dims || - ShapeUtil::ByteSizeOf(other.base_shape()) <= - ShapeUtil::ByteSizeOf(output_base_shape))) { - for (const auto& dim : dims_mapping.contracting_dims) { - other_group_dims.push_back(lhs_matching ? dim.rhs : dim.lhs); - } - } else if (may_replicate_other_non_contracting_dims) { - for (const auto& dim : lhs_matching - ? dims_mapping.rhs_non_contracting_dims - : dims_mapping.lhs_non_contracting_dims) { - other_group_dims.push_back(lhs_matching ? dim.rhs : dim.lhs); - } - } else { - return nullptr; - } - auto matching_sharding_dims = - matching.sharding().tile_assignment().dimensions(); - std::vector matching_dims; - std::vector output_dims; - // Make sure the partitioning on matching's non-contracting dimensions - // defines the same device groups for both matching and output. - for (const auto& dim : lhs_matching ? dims_mapping.lhs_non_contracting_dims - : dims_mapping.rhs_non_contracting_dims) { - int64 md = lhs_matching ? dim.lhs : dim.rhs; - matching_sharding_dims[md] = - output_sharding.tile_assignment().dim(dim.output); - matching_dims.push_back(md); - output_dims.push_back(dim.output); - } - auto output_grouped = GroupShardingOnDims(output_sharding, output_dims); - auto reshaped_matching_tiling = matching.sharding().tile_assignment(); - reshaped_matching_tiling.Reshape(matching_sharding_dims); - auto matching_grouped = AlignGroupsWith( - GroupShardingOnDims(HloSharding::Tile(reshaped_matching_tiling), - matching_dims), - output_grouped); - matching = matching.Reshard(UngroupSharding(matching_grouped)); - - auto other_grouped = - AlignGroupsWith(GroupShardingOnDims(other.sharding(), other_group_dims), - output_grouped, /*ignore_group_order=*/true); - other = other.Reshard(UngroupSharding(other_grouped)); - auto partially_replicated_other = - other.ReplicatePartial(other_grouped.group_dims); - auto per_group_partitioner_state = CreatePerGroupPartitioningState( - matching.state(), matching_grouped.device_groups, b); - matching.hlo()->set_sharding(matching_grouped.sharding); - partially_replicated_other->set_sharding(other_grouped.sharding); - auto matching_p = PartitionedHlo( - matching.hlo(), - GetPerGroupBaseShape(matching_grouped, matching.base_shape()), - per_group_partitioner_state); - auto other_p = PartitionedHlo(partially_replicated_other, other.base_shape(), - per_group_partitioner_state); - TF_ASSIGN_OR_RETURN( - auto dot, - PartitionDot(lhs_matching ? matching_p : other_p, - lhs_matching ? other_p : matching_p, - GetPerGroupBaseShape(output_grouped, output_base_shape), - output_grouped.sharding, dims_mapping, - num_partitions / matching_grouped.device_groups.size(), - create_sharded_dot, module, original_hlo, - threshold_for_windowed_einsum_mib, b, - windowed_dot_general_loops)); - // Reset matching's sharding to the ungrouped one. - matching.hlo()->set_sharding(UngroupSharding(matching_grouped)); - return dot; -} - -// Recursive partitioning function. If there are partial dimensions matching in -// the operands and output, group the devices and recursively partition the -// in-group dot. -StatusOr PartitionDot( - PartitionedHlo lhs, PartitionedHlo rhs, const Shape& output_base_shape, - const HloSharding& output_sharding, - const DotGeneralDimsMapping& dims_mapping, int64 num_partitions, - const std::function( - HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot, - HloModule* module, HloInstruction* original_hlo, - int64 threshold_for_windowed_einsum_mib, SpmdBuilder* b, - std::vector* - windowed_dot_general_loops) { - // lhs_rhs_or_output: 0 lhs, 1 rhs, 2 output. - auto get_partitions_for_dims = - [&](const HloSharding& sharding, - absl::Span dims, - int lhs_rhs_or_output) { - int64 partitions = 1; - if (sharding.IsTileMaximal()) { - return partitions; - } - for (const auto& dim : dims) { - if (lhs_rhs_or_output == 0) { - partitions *= sharding.tile_assignment().dim(dim.lhs); - } else if (lhs_rhs_or_output == 1) { - partitions *= sharding.tile_assignment().dim(dim.rhs); - } else { - CHECK_EQ(lhs_rhs_or_output, 2); - partitions *= sharding.tile_assignment().dim(dim.output); - } - } - return partitions; - }; - const int64 lhs_batch_partitions = - get_partitions_for_dims(lhs.sharding(), dims_mapping.batch_dims, 0); - const int64 rhs_batch_partitions = - get_partitions_for_dims(rhs.sharding(), dims_mapping.batch_dims, 1); - const int64 output_batch_partitions = - get_partitions_for_dims(output_sharding, dims_mapping.batch_dims, 2); - const int64 lhs_contracting_partitions = - get_partitions_for_dims(lhs.sharding(), dims_mapping.contracting_dims, 0); - const int64 rhs_contracting_partitions = - get_partitions_for_dims(rhs.sharding(), dims_mapping.contracting_dims, 1); - const int64 lhs_non_contracting_partitions = get_partitions_for_dims( - lhs.sharding(), dims_mapping.lhs_non_contracting_dims, 0); - const int64 rhs_non_contracting_partitions = get_partitions_for_dims( - rhs.sharding(), dims_mapping.rhs_non_contracting_dims, 1); - const int64 output_lhs_non_contracting_partitions = get_partitions_for_dims( - output_sharding, dims_mapping.lhs_non_contracting_dims, 2); - const int64 output_rhs_non_contracting_partitions = get_partitions_for_dims( - output_sharding, dims_mapping.rhs_non_contracting_dims, 2); - TF_ASSIGN_OR_RETURN( - auto try_partitioned_dot, - PartitionBaseCase( - lhs, rhs, output_base_shape, output_sharding, dims_mapping, - num_partitions, create_sharded_dot, module, original_hlo, - lhs_batch_partitions, rhs_batch_partitions, output_batch_partitions, - lhs_contracting_partitions, rhs_contracting_partitions, - lhs_non_contracting_partitions, rhs_non_contracting_partitions, - output_lhs_non_contracting_partitions, - output_rhs_non_contracting_partitions, - threshold_for_windowed_einsum_mib, b, windowed_dot_general_loops)); - if (try_partitioned_dot) { - return try_partitioned_dot; + TF_ASSIGN_OR_RETURN(auto dot, + create_sharded_dot(lhs.hlo(), rhs.hlo(), &b_)); + SetPartitionedHlo(hlo, [&] { + auto ar = collective_ops_creator_.create_cross_partition_all_reduce( + &b_, dot, MakeBinaryAdd(hlo->shape().element_type(), module_), + NewChannel()); + ar->set_sharding(HloSharding::Replicate()); + return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()).hlo(); + }); + return Status::OK(); } - // Recursively partition on different types of dimensions. - // - // Case 1: Group partitions by batch. - if (lhs_batch_partitions == rhs_batch_partitions && - lhs_batch_partitions == output_batch_partitions && - lhs_batch_partitions > 1) { - TF_ASSIGN_OR_RETURN( - auto dot, - PartitionDotGroupOnBatch( - lhs, rhs, output_base_shape, output_sharding, dims_mapping, - num_partitions, create_sharded_dot, module, original_hlo, - threshold_for_windowed_einsum_mib, b, windowed_dot_general_loops)); - if (dot) { - return dot; - } - } - - // Case 2: Group partitions by non-contracting dimensions. - const bool may_group_on_lhs_non_contracting = - lhs_non_contracting_partitions == output_lhs_non_contracting_partitions && - lhs_non_contracting_partitions > 1; - const bool may_group_on_rhs_non_contracting = - rhs_non_contracting_partitions == output_rhs_non_contracting_partitions && - rhs_non_contracting_partitions > 1; - if (may_group_on_lhs_non_contracting || may_group_on_rhs_non_contracting) { - // If both match output non-contracting dimensions, choose the one which - // will result in smaller replication of the other operand. - const bool lhs_matching = - may_group_on_lhs_non_contracting && - (!may_group_on_rhs_non_contracting || - lhs_non_contracting_partitions * - ShapeUtil::ByteSizeOf(rhs.hlo()->shape()) <= - rhs_non_contracting_partitions * - ShapeUtil::ByteSizeOf(lhs.hlo()->shape())); - - TF_ASSIGN_OR_RETURN( - auto dot, - PartitionDotGroupOnNonContracting( - lhs_matching, lhs_matching ? lhs : rhs, lhs_matching ? rhs : lhs, - lhs_matching ? lhs_contracting_partitions - : rhs_contracting_partitions, - lhs_matching ? rhs_contracting_partitions - : lhs_contracting_partitions, - lhs_matching ? lhs_non_contracting_partitions - : rhs_non_contracting_partitions, - lhs_matching ? rhs_non_contracting_partitions - : lhs_non_contracting_partitions, - lhs_matching ? output_rhs_non_contracting_partitions - : output_lhs_non_contracting_partitions, - output_base_shape, output_sharding, dims_mapping, num_partitions, - create_sharded_dot, module, original_hlo, - threshold_for_windowed_einsum_mib, b, windowed_dot_general_loops)); - if (dot) { - return dot; - } - } - - // Default action. - TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(lhs.Replicate().hlo(), - rhs.Replicate().hlo(), b)); - dot->set_sharding(HloSharding::Replicate()); - return PartitionedHlo(dot, output_base_shape, lhs.state()) - .Reshard(output_sharding) - .hlo(); -} - -} // namespace - -Status SpmdPartitioningVisitor::HandleDotHelper( - HloInstruction* hlo, const DotGeneralDimsMapping& dims_mapping, - const std::function( - HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot) { - auto& lhs = GetPartitionedHlo(hlo->operand(0)); - auto& rhs = GetPartitionedHlo(hlo->operand(1)); - TF_ASSIGN_OR_RETURN( - auto partitioned_dot, - PartitionDot(lhs, rhs, hlo->shape(), hlo->sharding(), dims_mapping, - num_partitions_, create_sharded_dot, module_, hlo, - options_.threshold_for_windowed_einsum_mib, &b_, - &windowed_dot_general_loops_)); - SetPartitionedHlo(hlo, [&] { return partitioned_dot; }); - return Status::OK(); + return DefaultAction(hlo); } namespace { @@ -1082,7 +780,6 @@ Status SinkInputNodesIntoWindowedDotGeneralLoopOnContractingDimensions( [](const HloInstruction* a, const HloInstruction* b) { return a->unique_id() < b->unique_id(); }); - worklist.reserve(nullaries_to_sink.size()); for (auto inst : nullaries_to_sink) { worklist.push_back(inst); } diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc index 7aaa3e32b2a..bac5c812814 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc @@ -165,6 +165,16 @@ template namespace { +// Returns the replica group configuration where each replica belongs to its own +// group. +std::vector CreateReplicaGroups(int64 num_replicas) { + std::vector groups(num_replicas); + for (int64 i = 0; i < num_replicas; ++i) { + groups[i].add_replica_ids(i); + } + return groups; +} + // Clears all sharding attributes from instructions in the module. This must be // called only after all SPMD transformation is complete. Status ClearShardingAttributes(HloModule* module) { @@ -185,28 +195,6 @@ Status ClearShardingAttributes(HloModule* module) { return Status::OK(); } -std::vector> GetPartitionGroupsForReplication( - const HloSharding& sharding, absl::Span replication_dims) { - int64 group_size = 1; - for (int64 i : replication_dims) { - group_size *= sharding.tile_assignment().dim(i); - } - std::vector> partition_groups( - sharding.tile_assignment().num_elements() / group_size); - sharding.tile_assignment().Each( - [&](absl::Span indices, int64 partition) { - int64 group_id = 0; - for (int64 i = 0; i < indices.size(); ++i) { - if (!absl::c_linear_search(replication_dims, i)) { - group_id *= sharding.tile_assignment().dim(i); - group_id += indices[i]; - } - } - partition_groups[group_id].push_back(partition); - }); - return partition_groups; -} - } // namespace HloInstruction* SpmdBuilder::AddInstruction( @@ -676,57 +664,42 @@ PartitionedHlo PartitionedHlo::Replicate() { } // 'Tiled' to 'Replicated'. - std::vector all_dims(shape.rank()); - std::iota(all_dims.begin(), all_dims.end(), 0); - HloInstruction* result = ReplicatePartial(all_dims); - result->set_sharding(HloSharding::Replicate()); - return update_cache(PartitionedHlo(result, base_shape_, state_)); -} - -HloInstruction* PartitionedHlo::ReplicatePartial(absl::Span dims) { - CHECK(!sharding().IsTileMaximal()); - const Shape& shard_shape = hlo()->shape(); - Shape target_shape = shard_shape; - Shape padded_target_shape = shard_shape; - for (int64 i : dims) { - padded_target_shape.set_dimensions( - i, shard_shape.dimensions(i) * sharding().tile_assignment().dim(i)); - target_shape.set_dimensions(i, base_shape().dimensions(i)); - } - HloInstruction* result = nullptr; if (state_.collective_ops_creator.create_cross_partition_all_gather) { - result = state_.partitioner->AllGatherShards(state_.b, hlo_, sharding(), - NewChannel(), dims, - state_.collective_ops_creator); + result = state_.partitioner->AllGatherShards(state_.b, hlo_, sharding, + NewChannel()); + } + Shape padded_base_shape = shape; + for (int64 i = 0; i < padded_base_shape.rank(); ++i) { + padded_base_shape.set_dimensions( + i, shape.dimensions(i) * sharding.tile_assignment().dim(i)); } if (result == nullptr) { auto zero = state_.b->AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(shard_shape.element_type()))); + LiteralUtil::Zero(shape.element_type()))); auto zero_bcast = state_.b->AddInstruction( - HloInstruction::CreateBroadcast(padded_target_shape, zero, {})); - auto offsets = MakePartitionOffsets(padded_target_shape, sharding(), - state_.partition_id, state_.b, dims); + HloInstruction::CreateBroadcast(padded_base_shape, zero, {})); auto dus = state_.b->AddInstruction(HloInstruction::CreateDynamicUpdateSlice( - padded_target_shape, zero_bcast, hlo_, offsets)); + padded_base_shape, zero_bcast, hlo_, + MakePartitionOffsets(padded_base_shape, sharding, + state_.partition_id, state_.b))); HloComputation* reduction = - MakeBinaryAdd(shard_shape.element_type(), state_.module); + MakeBinaryAdd(shape.element_type(), state_.module); auto all_reduce = state_.collective_ops_creator.create_cross_partition_all_reduce( - state_.b, dus, reduction, - GetPartitionGroupsForReplication(sharding(), dims), NewChannel()); + state_.b, dus, reduction, NewChannel()); result = all_reduce; } - if (!ShapeUtil::Compatible(target_shape, padded_target_shape)) { - std::vector start_indices(target_shape.rank(), 0); - std::vector strides(target_shape.rank(), 1); - result = state_.b->AddInstruction( - HloInstruction::CreateSlice(target_shape, result, start_indices, - base_shape_.dimensions(), strides)); + if (!ShapeUtil::Compatible(base_shape_, padded_base_shape)) { + std::vector start_indices(shape.rank(), 0); + std::vector strides(shape.rank(), 1); + result = state_.b->AddInstruction(HloInstruction::CreateSlice( + base_shape_, result, start_indices, base_shape_.dimensions(), strides)); } - return result; + result->set_sharding(HloSharding::Replicate()); + return update_cache(PartitionedHlo(result, base_shape_, state_)); } PartitionedHlo PartitionedHlo::Broadcast() const { @@ -755,7 +728,7 @@ PartitionedHlo PartitionedHlo::Broadcast() const { MakeBinaryAdd(shape.element_type(), state_.module); auto result = state_.collective_ops_creator.create_cross_partition_all_reduce( - state_.b, operand, reduction, {}, NewChannel()); + state_.b, operand, reduction, NewChannel()); result->set_sharding(HloSharding::Replicate()); return PartitionedHlo(result, base_shape_, state_); } @@ -823,7 +796,7 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll( auto padded_hlo = PadToShape(hlo_, padded_shape, state_.b); // The order of ids in the group must follow the temp_target sharding. - std::vector> groups( + std::vector groups( temp_target.tile_assignment().num_elements() / group_size); temp_target.tile_assignment().Each( [&](absl::Span indices, int64 device) { @@ -837,7 +810,7 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll( group_id += indices[dim]; } } - groups[group_id].push_back(device); + groups[group_id].add_replica_ids(device); }); HloInstruction* result = nullptr; @@ -1054,7 +1027,7 @@ Status SpmdPartitioningVisitor::HandleConcatenate(HloInstruction* hlo) { offset += operand->shape().dimensions(dimension); } auto all_reduce = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, temp_output, MakeBinaryAdd(hlo->shape().element_type(), module_), {}, + &b_, temp_output, MakeBinaryAdd(hlo->shape().element_type(), module_), NewChannel()); SetPartitionedHlo(hlo, [&] { auto start_indices = @@ -2180,7 +2153,7 @@ Status SpmdPartitioningVisitor::HandleGather(HloInstruction* hlo) { // Combine from different partitions. auto ar = collective_ops_creator_.create_cross_partition_all_reduce( &b_, filtered, - MakeBinaryAdd(filtered->shape().element_type(), module_), {}, + MakeBinaryAdd(filtered->shape().element_type(), module_), NewChannel()); ar->set_sharding(HloSharding::Replicate()); SetPartitionedHlo(hlo, [&]() { @@ -2476,7 +2449,7 @@ Status SpmdPartitioningVisitor::HandleReduce(HloInstruction* hlo) { if (reduce_sharded_dimension) { CHECK(local_reduce->shape().IsArray()); reduce = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, local_reduce, hlo->to_apply(), {}, NewChannel()); + &b_, local_reduce, hlo->to_apply(), NewChannel()); reduce->set_sharding(HloSharding::Replicate()); } else { reduce = local_reduce; @@ -2944,36 +2917,13 @@ SPMDCollectiveOpsCreator GetDefaultCollectiveOpsCreator(int64 num_partitions, [](SpmdBuilder* b) { return b->AddInstruction(HloInstruction::CreatePartitionId()); }, - [num_replicas, num_partitions]( - SpmdBuilder* b, HloInstruction* operand, HloComputation* reduction, - const std::vector>& partition_subgroups, - int64 channel_id) { - if (partition_subgroups.size() <= 1) { - std::vector groups(num_replicas); - // TODO(yuanzx): Unify subgroup definition with AllToAll. - for (int64 i = 0; i < num_replicas; ++i) { - groups[i].add_replica_ids(i); - } - return b->AddInstruction(HloInstruction::CreateAllReduce( - operand->shape(), {operand}, reduction, groups, - /*constrain_layout=*/false, channel_id, - /*use_global_device_ids=*/false)); - } - - std::vector device_groups; - device_groups.reserve(partition_subgroups.size() * num_replicas); - for (int64 i = 0; i < num_replicas; ++i) { - for (const auto& pgroup : partition_subgroups) { - device_groups.emplace_back(); - for (int64 pid : pgroup) { - device_groups.back().add_replica_ids(i * num_partitions + pid); - } - } - } + [num_replicas](SpmdBuilder* b, HloInstruction* operand, + HloComputation* reduction, int64 channel_id) { return b->AddInstruction(HloInstruction::CreateAllReduce( - operand->shape(), {operand}, reduction, device_groups, + operand->shape(), {operand}, reduction, + CreateReplicaGroups(num_replicas), /*constrain_layout=*/false, channel_id, - /*use_global_device_ids=*/true)); + /*use_global_device_ids=*/false)); }, [](SpmdBuilder* b, HloInstruction* operand, std::vector>& src_dst_pairs, @@ -2982,20 +2932,14 @@ SPMDCollectiveOpsCreator GetDefaultCollectiveOpsCreator(int64 num_partitions, operand->shape(), operand, src_dst_pairs, channel_id)); }, [](SpmdBuilder* b, absl::Span operands, - const std::vector>& partition_subgroups, - int64 channel_id, absl::optional split_dimension) { + const std::vector& replica_groups, int64 channel_id, + absl::optional split_dimension) { std::vector shapes(operands.size(), operands[0]->shape()); const Shape output_shape = (shapes.size() == 1) ? shapes[0] : ShapeUtil::MakeTupleShape(shapes); - std::vector groups(partition_subgroups.size()); - for (int64 i = 0; i < groups.size(); ++i) { - for (int64 id : partition_subgroups[i]) { - groups[i].add_replica_ids(id); - } - } return b->AddInstruction(HloInstruction::CreateAllToAll( - output_shape, operands, groups, + output_shape, operands, replica_groups, /*constrain_layout=*/false, channel_id, split_dimension)); }, [num_replicas, num_partitions]( @@ -3026,10 +2970,10 @@ SpmdPartitioner::SpmdPartitioner(int64 num_partitions, int64 num_replicas, num_partitions, num_replicas, std::move(options), GetDefaultCollectiveOpsCreator(num_partitions, num_replicas)) {} -HloInstruction* SpmdPartitioner::AllGatherShards( - SpmdBuilder* b, HloInstruction* operand, const HloSharding& sharding, - int64 channel_id, absl::Span selected_dims, - const SPMDCollectiveOpsCreator& collectives_creator) { +HloInstruction* SpmdPartitioner::AllGatherShards(SpmdBuilder* b, + HloInstruction* operand, + const HloSharding& sharding, + int64 channel_id) { CHECK(!sharding.IsTileMaximal()); // Add one leading dimension to gather all partitions. std::vector shape; @@ -3039,17 +2983,18 @@ HloInstruction* SpmdPartitioner::AllGatherShards( } auto reshape = b->AddInstruction(HloInstruction::CreateReshape( ShapeUtil::MakeShape(operand->shape().element_type(), shape), operand)); - auto partition_subgroups = - GetPartitionGroupsForReplication(sharding, selected_dims); - shape[0] = partition_subgroups[0].size(); - auto result = collectives_creator.create_cross_partition_all_gather( + std::vector> partition_subgroups(1); + for (int64 pid : sharding.tile_assignment()) { + partition_subgroups[0].push_back(pid); + } + shape[0] = sharding.tile_assignment().num_elements(); + auto result = collective_ops_creator_.create_cross_partition_all_gather( b, reshape, ShapeUtil::MakeShape(operand->shape().element_type(), shape), partition_subgroups, channel_id, /*all_gather_dimension=*/0); // If n > 1 dimensions are partitioned, split the leading dimension to n. std::vector tiled_dims; for (int64 i = 0; i < sharding.tile_assignment().num_dimensions(); ++i) { - if (sharding.tile_assignment().dim(i) > 1 && - absl::c_linear_search(selected_dims, i)) { + if (sharding.tile_assignment().dim(i) > 1) { tiled_dims.push_back(i); } } @@ -3071,8 +3016,7 @@ HloInstruction* SpmdPartitioner::AllGatherShards( std::vector xpose_permutation(result->shape().rank()); int64 split_dims_added = 0; for (int64 i = 0; i < xpose_permutation.size(); ++i) { - if (sharding.tile_assignment().dim(i - split_dims_added) == 1 || - !absl::c_linear_search(selected_dims, i - split_dims_added)) { + if (sharding.tile_assignment().dim(i - split_dims_added) == 1) { xpose_permutation[i] = i + tiled_dims.size() - split_dims_added; } else { xpose_permutation[i] = split_dims_added; diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h index 1cb2d551146..606a7ae5f14 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h @@ -82,10 +82,8 @@ struct SPMDCollectiveOpsCreator { std::function create_partition_id; // Function used to create a cross-partition all-reduce HLO. - std::function>& partition_subgroups, - int64 channel_id)> + std::function create_cross_partition_all_reduce; // Function used to create a cross-partition collective-permute HLO. @@ -98,8 +96,8 @@ struct SPMDCollectiveOpsCreator { // Function used to create a cross-partition all-to-all HLO. std::function operands, - const std::vector>& partition_subgroups, - int64 channel_id, absl::optional split_dimension)> + const std::vector& replica_groups, int64 channel_id, + absl::optional split_dimension)> create_cross_partition_all_to_all; // Function used to create a cross-partition all-gather HLO. This is optional: @@ -171,13 +169,10 @@ class SpmdPartitioner : public HloModulePass { // The default uses a single all-gather even if there are multiple sharded // dimensions, and adds potential reshapes and transposes to achieve that. // If it returns false, the partitioner will fall back to all-reduce. - // `selected_dims` specifies the dimensions along which the all-gather happens - // in the tiled sharding, which allows potentially creating a subgroup - // all-gather. - virtual HloInstruction* AllGatherShards( - SpmdBuilder* b, HloInstruction* operand, const HloSharding& sharding, - int64 channel_id, absl::Span selected_dims, - const SPMDCollectiveOpsCreator& collectives_creator); + virtual HloInstruction* AllGatherShards(SpmdBuilder* b, + HloInstruction* operand, + const HloSharding& sharding, + int64 channel_id); protected: virtual std::unique_ptr CreateVisitor( @@ -220,11 +215,7 @@ class PartitionedHlo { std::tuple> window_reshard_cache; }; - // Use std::unordered_map for pointer stability. std::unordered_map per_hlo_cache; - // Caches for nested partitioning of grouped sharding. Each string key - // represents a unique way of grouping devices. - std::unordered_map groupd_caches; }; struct PartitioningState { SpmdBuilder* b; @@ -279,18 +270,15 @@ class PartitionedHlo { const PartitioningState& state() const { return state_; } - // Helper function to replicate the data on all devices. Could only modify - // the reshard cache. - PartitionedHlo Replicate(); - - // Helper function to replicate the data for partitions along the given dims. - HloInstruction* ReplicatePartial(absl::Span dims); - private: // Same as Reshard except that it does not explicitly modify the reshard // cache, although it would indirectly modify by calling Replicate(). PartitionedHlo ReshardNoCache(const HloSharding& target); + // Helper function to replicate the data on all devices. Could only modify + // the reshard cache. + PartitionedHlo Replicate(); + // Helper function to broadcast data from a single device to all devices. PartitionedHlo Broadcast() const; @@ -429,16 +417,6 @@ class SpmdPartitioningVisitor : public DfsHloVisitorWithDefault { StatusOr DoPartition(HloComputation* computation, const HloSharding& root_sharding); - // Information about a loop created for windowed dot-general. Used when - // DoCodeMotionForWindowedDotGeneralLoops() executes after the visitor - // finishes traversing the graph. - struct WindowedDotGeneralLoop { - HloInstruction* while_loop; - int64 windowed_operand; - bool windowed_in_contracting_dims; - bool windowed_in_batch_dims; - }; - private: Status Preprocess(HloInstruction* hlo) override; Status Postprocess(HloInstruction* hlo) override; @@ -467,6 +445,15 @@ class SpmdPartitioningVisitor : public DfsHloVisitorWithDefault { // partitioned instruction. ConstHloInstructionMap partitioned_instructions_; + // Information about a loop created for windowed dot-general. Used when + // DoCodeMotionForWindowedDotGeneralLoops() executes after the visitor + // finishes traversing the graph. + struct WindowedDotGeneralLoop { + HloInstruction* while_loop; + int64 windowed_operand; + bool windowed_in_contracting_dims; + bool windowed_in_batch_dims; + }; std::vector windowed_dot_general_loops_; HloInstruction* visiting_hlo_; diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc index 5f3fd8d53e7..1045d1187b8 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc @@ -2218,7 +2218,7 @@ ENTRY entry { TF_ASSERT_OK_AND_ASSIGN(auto module, PartitionComputation(hlo_string, /*num_devices=*/2)); - VLOG(1) << module->ToString(); + std::cout << module->ToString(); auto sort = FindInstruction(module.get(), "sort"); EXPECT_EQ(sort->operand(0)->shape().dimensions(1), 209664); EXPECT_EQ(sort->operand(1)->shape().dimensions(1), 209664); @@ -2294,7 +2294,7 @@ ENTRY entry TF_ASSERT_OK_AND_ASSIGN(auto module, PartitionComputation(hlo_string, /*num_devices=*/2)); - VLOG(1) << module->ToString(); + std::cout << module->ToString(); auto sort = FindInstruction(module.get(), "sort"); EXPECT_EQ(sort->operand(0)->shape().dimensions(1), 209664); EXPECT_EQ(sort->operand(1)->shape().dimensions(1), 209664); @@ -3842,154 +3842,6 @@ ENTRY entry { EXPECT_THAT(root, op::Copy(op::CollectivePermute(reshape2))); } -TEST_F(SpmdPartitioningTest, Dot2DPartitionedNonContractingAndContracting0) { - const char* const hlo_string = R"( -HloModule module - -ENTRY entry { - %lhs = f32[48,12] parameter(0), sharding={devices=[2,2]0,1,2,3} - %rhs = f32[32,12] parameter(1), sharding={devices=[2,2]0,1,2,3} - ROOT %dot = f32[48,32] dot(%lhs, %rhs), - lhs_batch_dims={}, rhs_batch_dims={}, - lhs_contracting_dims={1}, rhs_contracting_dims={1}, - sharding={devices=[2,2]0,1,2,3} -})"; - - TF_ASSERT_OK_AND_ASSIGN(auto module, - PartitionComputation(hlo_string, /*num_devices=*/4)); - VLOG(1) << module->ToString(); - - auto lhs = AllOf(op::Shape("f32[24,6]"), op::Parameter(0)); - auto partial_replicated_lhs = - AllOf(op::Shape("f32[24,12]"), - op::AllReduce(op::DynamicUpdateSlice(_, lhs, _, _))); - auto rhs = AllOf(op::Shape("f32[16,6]"), op::Parameter(1)); - auto partial_replicated_rhs = - AllOf(op::Shape("f32[16,12]"), op::AllReduce(op::DynamicUpdateSlice( - _, op::CollectivePermute(rhs), _, _))); - auto root = module->entry_computation()->root_instruction(); - EXPECT_THAT(root, - AllOf(op::Dot(partial_replicated_lhs, partial_replicated_rhs), - op::Shape("f32[24,16]"))); -} - -TEST_F(SpmdPartitioningTest, Dot2DPartitionedNonContractingAndContracting1) { - const char* const hlo_string = R"( -HloModule module - -ENTRY entry { - %lhs = f32[48,100] parameter(0), sharding={devices=[2,2]0,1,2,3} - %rhs = f32[32,100] parameter(1), sharding={devices=[2,2]0,1,2,3} - ROOT %dot = f32[48,32] dot(%lhs, %rhs), - lhs_batch_dims={}, rhs_batch_dims={}, - lhs_contracting_dims={1}, rhs_contracting_dims={1}, - sharding={devices=[2,2]0,1,2,3} -})"; - - TF_ASSERT_OK_AND_ASSIGN(auto module, - PartitionComputation(hlo_string, /*num_devices=*/4)); - VLOG(1) << module->ToString(); - - auto lhs = AllOf(op::Shape("f32[24,50]"), op::Parameter(0)); - auto rhs = AllOf(op::Shape("f32[16,50]"), op::Parameter(1)); - auto partial_replicated_rhs = - AllOf(op::Shape("f32[32,50]"), - op::AllReduce(op::DynamicUpdateSlice(_, rhs, _, _))); - auto root = module->entry_computation()->root_instruction(); - EXPECT_THAT( - root, AllOf(op::Shape("f32[24,16]"), - op::DynamicSlice( - op::AllReduce(AllOf(op::Dot(lhs, partial_replicated_rhs), - op::Shape("f32[24,32]"))), - _, _))); -} - -TEST_F(SpmdPartitioningTest, Dot2DPartitionedBatchAndNonContracting) { - const char* const hlo_string = R"( -HloModule module - -ENTRY entry { - %lhs = f32[4,24,100] parameter(0), sharding={devices=[2,2,1]0,1,2,3} - %rhs = f32[4,32,100] parameter(1), sharding={devices=[2,2,1]0,1,2,3} - ROOT %dot = f32[4,24,32] dot(%lhs, %rhs), - lhs_batch_dims={0}, rhs_batch_dims={0}, - lhs_contracting_dims={2}, rhs_contracting_dims={2}, - sharding={devices=[2,2,1]0,1,2,3} -})"; - - TF_ASSERT_OK_AND_ASSIGN(auto module, - PartitionComputation(hlo_string, /*num_devices=*/4)); - VLOG(1) << module->ToString(); - - auto lhs = AllOf(op::Shape("f32[2,12,100]"), op::Parameter(0)); - auto rhs = AllOf(op::Shape("f32[2,16,100]"), op::Parameter(1)); - auto partial_replicated_rhs = - AllOf(op::Shape("f32[2,32,100]"), - op::AllReduce(op::DynamicUpdateSlice(_, rhs, _, _, _))); - auto root = module->entry_computation()->root_instruction(); - EXPECT_THAT(root, AllOf(op::Shape("f32[2,12,32]"), - op::Dot(lhs, partial_replicated_rhs))); -} - -TEST_F(SpmdPartitioningTest, - Dot2DPartitionedBatchNonContractingAndContracting) { - const char* const hlo_string = R"( -HloModule module - -ENTRY entry { - %lhs = f32[4,24,100] parameter(0), sharding={devices=[2,1,2]0,1,2,3} - %rhs = f32[4,32,100] parameter(1), sharding={devices=[2,2,1]0,1,2,3} - ROOT %dot = f32[4,24,32] dot(%lhs, %rhs), - lhs_batch_dims={0}, rhs_batch_dims={0}, - lhs_contracting_dims={2}, rhs_contracting_dims={2}, - sharding={devices=[2,1,2]0,1,2,3} -})"; - - TF_ASSERT_OK_AND_ASSIGN(auto module, - PartitionComputation(hlo_string, /*num_devices=*/4)); - VLOG(1) << module->ToString(); - - auto lhs = AllOf(op::Shape("f32[2,24,50]"), op::Parameter(0)); - auto rhs = AllOf(op::Shape("f32[2,16,100]"), op::Parameter(1)); - auto partial_replicated_lhs = - AllOf(op::Shape("f32[2,24,100]"), - op::AllReduce(op::DynamicUpdateSlice(_, lhs, _, _, _))); - auto root = module->entry_computation()->root_instruction(); - EXPECT_THAT(root, AllOf(op::Shape("f32[2,24,16]"), - op::Dot(partial_replicated_lhs, rhs))); -} - -TEST_F(SpmdPartitioningTest, Dot2DPartitionedBatchAndReshard) { - const char* const hlo_string = R"( -HloModule module - -ENTRY entry { - %lhs = f32[4,8,24,100] parameter(0), sharding={devices=[2,1,2,1]0,1,2,3} - %rhs = f32[4,8,32,100] parameter(1), sharding={devices=[2,1,2,1]0,1,2,3} - ROOT %dot = f32[4,8,24,32] dot(%lhs, %rhs), - lhs_batch_dims={0,1}, rhs_batch_dims={0,1}, - lhs_contracting_dims={3}, rhs_contracting_dims={3}, - sharding={devices=[1,2,2,1]0,1,2,3} -})"; - - TF_ASSERT_OK_AND_ASSIGN(auto module, - PartitionComputation(hlo_string, /*num_devices=*/4)); - VLOG(1) << module->ToString(); - - auto lhs = AllOf(op::Shape("f32[2,8,12,100]"), op::Parameter(0)); - auto rhs = AllOf(op::Shape("f32[2,8,16,100]"), op::Parameter(1)); - auto partial_replicated_rhs = - AllOf(op::Shape("f32[2,8,32,100]"), - op::AllReduce(op::DynamicUpdateSlice(_, rhs, _, _, _, _))); - auto dot = - AllOf(op::Shape("f32[2,8,12,32]"), op::Dot(lhs, partial_replicated_rhs)); - auto reshape = AllOf(op::Shape("f32[2,2,4,12,32]"), op::Reshape(dot)); - auto all_to_all = AllOf(op::Shape("f32[2,2,4,12,32]"), op::AllToAll(reshape)); - auto xpose = AllOf(op::Shape("f32[2,2,4,12,32]"), op::Transpose(all_to_all)); - auto root = module->entry_computation()->root_instruction(); - EXPECT_THAT(root, AllOf(op::Shape("f32[4,4,12,32]"), op::Reshape(xpose))); -} - } // namespace } // namespace spmd } // namespace xla diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc index 4fc8b1585b6..6beed5a15e5 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc @@ -16,11 +16,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h" #include -#include -#include "absl/algorithm/container.h" -#include "absl/container/flat_hash_map.h" -#include "absl/strings/str_join.h" #include "absl/types/optional.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/hlo_casting_utils.h" @@ -147,10 +143,10 @@ Shape MakeNonPaddedShapeForGivenPartition(const Shape& shape, return partition_shape; } -std::vector MakePartitionOffsets( - const Shape& shape, const HloSharding& sharding, - HloInstruction* partition_id, SpmdBuilder* b, - absl::Span dims) { +std::vector MakePartitionOffsets(const Shape& shape, + const HloSharding& sharding, + HloInstruction* partition_id, + SpmdBuilder* b) { CHECK(!shape.IsTuple()); Array2D offset_array( @@ -162,8 +158,7 @@ std::vector MakePartitionOffsets( LiteralUtil::CreateR2FromArray2D(offset_array))); std::vector offsets; for (int64 i = 0; i < shape.rank(); ++i) { - if (sharding.tile_assignment().dim(i) == 1 || - (!dims.empty() && !absl::c_linear_search(dims, i))) { + if (sharding.tile_assignment().dim(i) == 1) { offsets.push_back(b->AddInstruction( HloInstruction::CreateConstant(LiteralUtil::Zero(S32)))); } else { @@ -983,252 +978,5 @@ bool CanReshardWithCollectivePermute(const HloSharding& source, source.tile_assignment() != target.tile_assignment(); } -GroupedSharding GroupShardingOnDims(const HloSharding& sharding, - absl::Span group_dims) { - CHECK(!sharding.IsTileMaximal()); - std::vector grouped_tiling_dims = - sharding.tile_assignment().dimensions(); - std::vector group_dim_sizes(group_dims.size()); - for (int64 i = 0; i < group_dims.size(); ++i) { - group_dim_sizes[i] = grouped_tiling_dims[group_dims[i]]; - grouped_tiling_dims[group_dims[i]] = 1; - } - std::vector> device_groups(Product(group_dim_sizes)); - sharding.tile_assignment().Each( - [&](absl::Span indices, int64 device) { - int64 group_id = 0; - for (int64 dim : group_dims) { - group_id *= sharding.tile_assignment().dim(dim); - group_id += indices[dim]; - } - device_groups[group_id].push_back(device); - }); - Array grouped_tiling(grouped_tiling_dims); - grouped_tiling.FillIota(0); - return GroupedSharding( - std::move(device_groups), - std::vector(group_dims.begin(), group_dims.end()), - std::move(group_dim_sizes), sharding.tile_assignment().num_dimensions(), - HloSharding::Tile(grouped_tiling)); -} - -HloSharding UngroupSharding(const GroupedSharding& grouped_sharding) { - CHECK(!grouped_sharding.sharding.IsTileMaximal()); - std::vector tiling_dims = - grouped_sharding.sharding.tile_assignment().dimensions(); - for (int64 i = 0; i < grouped_sharding.group_dims.size(); ++i) { - tiling_dims[grouped_sharding.group_dims[i]] = - grouped_sharding.group_dim_sizes[i]; - } - Array tiling(tiling_dims); - grouped_sharding.sharding.tile_assignment().Each( - [&](absl::Span indices, int64 device) { - std::vector ungrouped_inds(indices.begin(), indices.end()); - for (int64 g = 0; g < grouped_sharding.device_groups.size(); ++g) { - int64 remaining_group_index = g; - for (int64 i = grouped_sharding.group_dims.size() - 1; i >= 0; --i) { - ungrouped_inds[grouped_sharding.group_dims[i]] = - remaining_group_index % grouped_sharding.group_dim_sizes[i]; - remaining_group_index /= grouped_sharding.group_dim_sizes[i]; - } - tiling(ungrouped_inds) = grouped_sharding.device_groups[g][device]; - } - }); - return HloSharding::Tile(tiling); -} - -GroupedSharding AlignGroupsWith(GroupedSharding grouped_sharding, - const GroupedSharding& reference, - bool ignore_group_order) { - // Returns src -> dst index mapping. - auto get_permutation = [](absl::Span src, - absl::Span dst) { - CHECK_EQ(src.size(), dst.size()); - absl::flat_hash_map dst_reverse_map; - for (int64 i = 0; i < dst.size(); ++i) { - dst_reverse_map[dst[i]] = i; - } - std::vector permutation(src.size()); - for (int64 i = 0; i < src.size(); ++i) { - auto it = dst_reverse_map.find(src[i]); - CHECK(it != dst_reverse_map.end()); - permutation[i] = it->second; - } - return permutation; - }; - CHECK_EQ(grouped_sharding.device_groups.size(), - reference.device_groups.size()); - absl::flat_hash_map device_to_ref_group; - for (int64 g = 0; g < reference.device_groups.size(); ++g) { - for (int64 device : reference.device_groups[g]) { - device_to_ref_group[device] = g; - } - } - auto unique_ref_dev_group = [&](absl::Span devices) -> int64 { - int64 ref_g = -1; - for (int64 device : devices) { - if (ref_g == -1) { - ref_g = device_to_ref_group[device]; - } else if (ref_g != device_to_ref_group[device]) { - return -1; - } - } - return ref_g; - }; - bool matching_groups = true; - std::vector original_src_to_ref_permutation; - for (int64 g = 0; g < grouped_sharding.device_groups.size(); ++g) { - int64 ref_g = unique_ref_dev_group(grouped_sharding.device_groups[g]); - if (ref_g < 0 || (!ignore_group_order && g != ref_g)) { - matching_groups = false; - break; - } - if (g == 0) { - original_src_to_ref_permutation = get_permutation( - grouped_sharding.device_groups[g], reference.device_groups[ref_g]); - } - } - if (matching_groups) { - auto tiles = grouped_sharding.sharding.tile_assignment(); - tiles.Each([&](absl::Span indices, int64* device) { - *device = original_src_to_ref_permutation[*device]; - }); - grouped_sharding.sharding = HloSharding::Tile(tiles); - } - grouped_sharding.device_groups = std::move(reference.device_groups); - return grouped_sharding; -} - -Shape GetPerGroupBaseShape(const GroupedSharding& grouped_sharding, - const Shape& original_base_shape) { - auto result = original_base_shape; - for (int64 i = 0; i < grouped_sharding.group_dims.size(); ++i) { - int64 dim = grouped_sharding.group_dims[i]; - int64 groups = grouped_sharding.group_dim_sizes[i]; - result.set_dimensions(dim, result.dimensions(dim) / groups); - } - return result; -} - -namespace { - -HloInstruction* GetInGroupPartitionId( - HloInstruction* partition_id, - const std::vector>& device_groups, SpmdBuilder* b) { - int64 total_devices = device_groups.size() * device_groups[0].size(); - std::vector in_group_ids(total_devices); - for (uint32 i = 0; i < device_groups.size(); ++i) { - for (uint32 j = 0; j < device_groups[i].size(); ++j) { - in_group_ids[device_groups[i][j]] = j; - } - } - auto id_table = b->AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR1(in_group_ids))); - return b->AddInstruction(HloInstruction::CreateReshape( - ShapeUtil::MakeScalarShape(U32), - b->AddInstruction(HloInstruction::CreateDynamicSlice( - ShapeUtil::MakeShape(U32, {1}), id_table, {partition_id}, {1})))); -} - -SPMDCollectiveOpsCreator GetPerGroupCollectiveOpsCreator( - const SPMDCollectiveOpsCreator& creator, - const std::vector>& device_groups) { - SPMDCollectiveOpsCreator result; - result.create_partition_id = [creator, device_groups](SpmdBuilder* b) { - return GetInGroupPartitionId(creator.create_partition_id(b), device_groups, - b); - }; - auto expand_partition_groups = - [device_groups]( - const std::vector>& partition_subgroups) { - if (partition_subgroups.empty()) { - return device_groups; - } - std::vector> result(partition_subgroups.size() * - device_groups.size()); - for (int64 g = 0; g < device_groups.size(); ++g) { - for (int64 i = 0; i < partition_subgroups.size(); ++i) { - result[g * partition_subgroups.size() + i].resize( - partition_subgroups[i].size()); - for (int64 j = 0; j < partition_subgroups[i].size(); ++j) { - result[g * partition_subgroups.size() + i][j] = - device_groups[g][partition_subgroups[i][j]]; - } - } - } - return result; - }; - result.create_cross_partition_all_reduce = - [creator, expand_partition_groups]( - SpmdBuilder* b, HloInstruction* operand, HloComputation* reduction, - const std::vector>& partition_subgroups, - int64 channel_id) { - return creator.create_cross_partition_all_reduce( - b, operand, reduction, expand_partition_groups(partition_subgroups), - channel_id); - }; - result.create_cross_partition_collective_permute = - [creator, device_groups]( - SpmdBuilder* b, HloInstruction* operand, - std::vector>& src_dst_pairs, - int64 next_channel_id) { - std::vector> expanded_pairs( - src_dst_pairs.size() * device_groups.size()); - for (int64 g = 0; g < device_groups.size(); ++g) { - for (int64 i = 0; i < src_dst_pairs.size(); ++i) { - expanded_pairs[g * src_dst_pairs.size() + i] = - std::pair{ - device_groups[g][src_dst_pairs[i].first], - device_groups[g][src_dst_pairs[i].second]}; - } - } - return creator.create_cross_partition_collective_permute( - b, operand, expanded_pairs, next_channel_id); - }; - result.create_cross_partition_all_to_all = - [creator, expand_partition_groups]( - SpmdBuilder* b, absl::Span operands, - const std::vector>& partition_subgroups, - int64 channel_id, absl::optional split_dimension) { - return creator.create_cross_partition_all_to_all( - b, operands, expand_partition_groups(partition_subgroups), - channel_id, split_dimension); - }; - if (creator.create_cross_partition_all_gather) { - result.create_cross_partition_all_gather = - [creator, expand_partition_groups]( - SpmdBuilder* b, HloInstruction* operand, const Shape& ag_shape, - const std::vector>& partition_subgroups, - int64 channel_id, int64 all_gather_dimension) { - return creator.create_cross_partition_all_gather( - b, operand, ag_shape, - expand_partition_groups(partition_subgroups), channel_id, - all_gather_dimension); - }; - } - return result; -} - -} // namespace - -PartitionedHlo::PartitioningState CreatePerGroupPartitioningState( - const PartitionedHlo::PartitioningState& state, - const std::vector>& device_groups, SpmdBuilder* b) { - auto result = state; - result.collective_ops_creator = GetPerGroupCollectiveOpsCreator( - state.collective_ops_creator, device_groups); - result.partition_id = - GetInGroupPartitionId(state.partition_id, device_groups, b); - // Create a string key for the groups. - std::vector per_group_strings(device_groups.size()); - for (int64 i = 0; i < per_group_strings.size(); ++i) { - per_group_strings[i] = absl::StrJoin(device_groups[i], ","); - } - result.reshard_cache = - &state.reshard_cache - ->groupd_caches[absl::StrJoin(per_group_strings, ";")]; - return result; -} - } // namespace spmd } // namespace xla diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h index 6e68375f9b9..7b737daf78c 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h @@ -87,12 +87,10 @@ Shape MakeNonPaddedShapeForGivenPartition(const Shape& shape, // Generates the HLO instructions that represent the dimension offsets on any // device. The size of the returned vector is the rank of the given shape. -// If `dims` is non-empty, the generated offsets will only be non-zero for those -// dimensions. -std::vector MakePartitionOffsets( - const Shape& shape, const HloSharding& sharding, - HloInstruction* partition_id, SpmdBuilder* b, - absl::Span dims = {}); +std::vector MakePartitionOffsets(const Shape& shape, + const HloSharding& sharding, + HloInstruction* partition_id, + SpmdBuilder* b); // Returns the offsets of the partition in the tile assignment. std::vector MakeTiledPartitionOrdinals( @@ -278,48 +276,6 @@ GetReshardAllToAllSourceTargetDims(const HloSharding& source, bool CanReshardWithCollectivePermute(const HloSharding& source, const HloSharding& target); -// Represents grouping devices in a tiled sharding along certain dimensions. -// Elements in group dimensions define different device groups, and the sharding -// represents the in-group sharding. -struct GroupedSharding { - GroupedSharding(std::vector> device_groups, - std::vector group_dims, - std::vector group_dim_sizes, int64 rank, - HloSharding grouped_sharding) - : device_groups(std::move(device_groups)), - group_dims(std::move(group_dims)), - group_dim_sizes(std::move(group_dim_sizes)), - sharding(std::move(grouped_sharding)) {} - std::vector> device_groups; - std::vector group_dims; - std::vector group_dim_sizes; - int64 rank; - HloSharding sharding; -}; - -// Creates a GroupedSharding for a tiled sharding. -GroupedSharding GroupShardingOnDims(const HloSharding& sharding, - absl::Span group_dims); - -// Reconstructs the ungrouped sharding from a GroupedSharding. -HloSharding UngroupSharding(const GroupedSharding& grouped_sharding); - -// Returns a new GroupedSharding that has the same group definition of -// `reference`. -GroupedSharding AlignGroupsWith(GroupedSharding grouped_sharding, - const GroupedSharding& reference, - bool ignore_group_order = false); - -// Returns the per-group base shape, i.e., before applying the in-group -// sharding. -Shape GetPerGroupBaseShape(const GroupedSharding& grouped_sharding, - const Shape& original_base_shape); - -// Creates the nested partitioner state for in-group patitioning. -PartitionedHlo::PartitioningState CreatePerGroupPartitioningState( - const PartitionedHlo::PartitioningState& state, - const std::vector>& device_groups, SpmdBuilder* b); - } // namespace spmd } // namespace xla From fd1481780bcc57426bf3158a8f94eab95e529384 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 23 Jul 2020 22:02:59 -0700 Subject: [PATCH 1245/2522] Use proto to configure tf.data service worker server. This simplifies adding new configuration properties, so that we don't need to plumb new properties through. This also gives us a single place to document all configuration options (in the .proto file). PiperOrigin-RevId: 322934622 Change-Id: I547740e3c9224c7b74ecf2853672ffeb226d61d1 --- tensorflow/core/data/service/BUILD | 1 - .../core/data/service/grpc_worker_impl.cc | 5 ++-- .../core/data/service/grpc_worker_impl.h | 4 +-- tensorflow/core/data/service/server_lib.cc | 30 ++++++++++++++----- tensorflow/core/data/service/server_lib.h | 25 ++++++++++++++-- tensorflow/core/data/service/test_cluster.cc | 8 ++--- tensorflow/core/data/service/worker_impl.cc | 16 +++++----- tensorflow/core/data/service/worker_impl.h | 13 ++++---- .../data/experimental/service_config.proto | 15 ---------- .../data/experimental/service/server_lib.py | 7 +---- .../service/server_lib_wrapper.cc | 12 +++----- 11 files changed, 71 insertions(+), 65 deletions(-) diff --git a/tensorflow/core/data/service/BUILD b/tensorflow/core/data/service/BUILD index 913cbf26cf0..d7cc7a3e528 100644 --- a/tensorflow/core/data/service/BUILD +++ b/tensorflow/core/data/service/BUILD @@ -227,7 +227,6 @@ cc_library( deps = [ ":worker_cc_grpc_proto", ":worker_impl", - "//tensorflow/core:protos_all_cc", "//tensorflow/core/distributed_runtime/rpc:grpc_util", tf_grpc_cc_dependency(), ], diff --git a/tensorflow/core/data/service/grpc_worker_impl.cc b/tensorflow/core/data/service/grpc_worker_impl.cc index c76e1062753..0cddfce4e0b 100644 --- a/tensorflow/core/data/service/grpc_worker_impl.cc +++ b/tensorflow/core/data/service/grpc_worker_impl.cc @@ -26,8 +26,9 @@ using ::grpc::ServerContext; using ::grpc::Status; GrpcWorkerImpl::GrpcWorkerImpl(ServerBuilder* server_builder, - const experimental::WorkerConfig& config) - : impl_(config) { + const std::string& dispatcher_address, + const std::string& protocol) + : impl_(dispatcher_address, protocol) { server_builder->RegisterService(this); VLOG(1) << "Registered data service worker"; } diff --git a/tensorflow/core/data/service/grpc_worker_impl.h b/tensorflow/core/data/service/grpc_worker_impl.h index b0881143a57..169ae29ea37 100644 --- a/tensorflow/core/data/service/grpc_worker_impl.h +++ b/tensorflow/core/data/service/grpc_worker_impl.h @@ -19,7 +19,6 @@ limitations under the License. #include "grpcpp/server_builder.h" #include "tensorflow/core/data/service/worker.grpc.pb.h" #include "tensorflow/core/data/service/worker_impl.h" -#include "tensorflow/core/protobuf/data/experimental/service_config.pb.h" namespace tensorflow { namespace data { @@ -36,7 +35,8 @@ namespace data { class GrpcWorkerImpl : public WorkerService::Service { public: explicit GrpcWorkerImpl(grpc::ServerBuilder* server_builder, - const experimental::WorkerConfig& config); + const std::string& dispatcher_address, + const std::string& protocol); ~GrpcWorkerImpl() override {} void Start(const std::string& worker_address); diff --git a/tensorflow/core/data/service/server_lib.cc b/tensorflow/core/data/service/server_lib.cc index 648a189717e..6d912b1c802 100644 --- a/tensorflow/core/data/service/server_lib.cc +++ b/tensorflow/core/data/service/server_lib.cc @@ -79,7 +79,8 @@ DispatchGrpcDataServer::DispatchGrpcDataServer( DispatchGrpcDataServer::~DispatchGrpcDataServer() { delete service_; } void DispatchGrpcDataServer::AddServiceToBuilder(grpc::ServerBuilder* builder) { - service_ = absl::make_unique(builder, config_).release(); + auto service = absl::make_unique(builder, config_); + service_ = service.release(); } Status DispatchGrpcDataServer::NumWorkers(int* num_workers) { @@ -95,17 +96,22 @@ Status DispatchGrpcDataServer::NumWorkers(int* num_workers) { } WorkerGrpcDataServer::WorkerGrpcDataServer( - const experimental::WorkerConfig& config) - : GrpcDataServerBase(config.port(), config.protocol()), config_(config) {} + int port, const std::string& protocol, + const std::string& dispatcher_address, const std::string& worker_address) + : GrpcDataServerBase(port, protocol), + dispatcher_address_(dispatcher_address), + worker_address_(worker_address) {} WorkerGrpcDataServer::~WorkerGrpcDataServer() { delete service_; } void WorkerGrpcDataServer::AddServiceToBuilder(grpc::ServerBuilder* builder) { - service_ = absl::make_unique(builder, config_).release(); + auto service = absl::make_unique(builder, dispatcher_address_, + protocol_); + service_ = service.release(); } Status WorkerGrpcDataServer::StartServiceInternal() { - std::string worker_address = config_.worker_address(); + std::string worker_address = worker_address_; if (worker_address.empty()) { worker_address = absl::StrCat("localhost:", kPortPlaceholder); } @@ -122,9 +128,19 @@ Status NewDispatchServer(const experimental::DispatcherConfig& config, return Status::OK(); } -Status NewWorkerServer(const experimental::WorkerConfig& config, +Status NewWorkerServer(int port, const std::string& protocol, + const std::string& dispatcher_address, std::unique_ptr* out_server) { - *out_server = absl::make_unique(config); + return NewWorkerServer(port, protocol, dispatcher_address, + /*worker_address=*/"", out_server); +} + +Status NewWorkerServer(int port, const std::string& protocol, + const std::string& dispatcher_address, + const std::string& worker_address, + std::unique_ptr* out_server) { + *out_server = absl::make_unique( + port, protocol, dispatcher_address, worker_address); return Status::OK(); } diff --git a/tensorflow/core/data/service/server_lib.h b/tensorflow/core/data/service/server_lib.h index 365241753fb..d147f47c5e4 100644 --- a/tensorflow/core/data/service/server_lib.h +++ b/tensorflow/core/data/service/server_lib.h @@ -91,7 +91,9 @@ class DispatchGrpcDataServer : public GrpcDataServerBase { class WorkerGrpcDataServer : public GrpcDataServerBase { public: - explicit WorkerGrpcDataServer(const experimental::WorkerConfig& config); + WorkerGrpcDataServer(int requested_port, const std::string& protocol, + const std::string& dispatcher_address, + const std::string& worker_address); ~WorkerGrpcDataServer() override; protected: @@ -99,7 +101,8 @@ class WorkerGrpcDataServer : public GrpcDataServerBase { Status StartServiceInternal() override; private: - const experimental::WorkerConfig config_; + const std::string dispatcher_address_; + const std::string worker_address_; // Owned. We use a raw pointer because GrpcWorkerImpl is forward-declared. GrpcWorkerImpl* service_; }; @@ -109,7 +112,23 @@ Status NewDispatchServer(const experimental::DispatcherConfig& config, std::unique_ptr* out_server); // Creates a worker tf.data server and stores it in `*out_server`. -Status NewWorkerServer(const experimental::WorkerConfig& config, +// +// The port can be a specific port or 0. If the port is 0, an available port +// will be chosen in Start(). This value can be queried with BoundPort(). +// +// The worker_address argument is optional. If left empty, it will default to +// "localhost:%port%". When the worker registers with the dispatcher, the worker +// will report the worker address, so that the dispatcher can tell clients where +// to read from. The address may contain the placeholder "%port%", which will be +// replaced with the value of BoundPort(). +Status NewWorkerServer(int port, const std::string& protocol, + const std::string& dispatcher_address, + const std::string& worker_address, + std::unique_ptr* out_server); + +// Creates a worker using the default worker_address. +Status NewWorkerServer(int port, const std::string& protocol, + const std::string& dispatcher_address, std::unique_ptr* out_server); } // namespace data diff --git a/tensorflow/core/data/service/test_cluster.cc b/tensorflow/core/data/service/test_cluster.cc index 8ae3f191407..ad0d2be87d8 100644 --- a/tensorflow/core/data/service/test_cluster.cc +++ b/tensorflow/core/data/service/test_cluster.cc @@ -62,12 +62,8 @@ Status TestCluster::Initialize() { Status TestCluster::AddWorker() { std::unique_ptr worker; - experimental::WorkerConfig config; - config.set_port(0); - config.set_protocol(kProtocol); - config.set_dispatcher_address(dispatcher_address_); - config.set_worker_address("localhost:%port%"); - TF_RETURN_IF_ERROR(NewWorkerServer(config, &worker)); + TF_RETURN_IF_ERROR( + NewWorkerServer(/*port=*/0, kProtocol, dispatcher_address_, &worker)); TF_RETURN_IF_ERROR(worker->Start()); worker_addresses_.push_back(absl::StrCat("localhost:", worker->BoundPort())); workers_.push_back(std::move(worker)); diff --git a/tensorflow/core/data/service/worker_impl.cc b/tensorflow/core/data/service/worker_impl.cc index 39508b1eab0..00659e1d048 100644 --- a/tensorflow/core/data/service/worker_impl.cc +++ b/tensorflow/core/data/service/worker_impl.cc @@ -46,8 +46,8 @@ auto* tf_data_service_created = } // namespace DataServiceWorkerImpl::DataServiceWorkerImpl( - const experimental::WorkerConfig& config) - : config_(config) { + const std::string& dispatcher_address, const std::string& protocol) + : dispatcher_address_(dispatcher_address), protocol_(protocol) { tf_data_service_created->GetCell()->Set(true); } @@ -68,7 +68,7 @@ void DataServiceWorkerImpl::Start(const std::string& worker_address) { Status s = Register(); while (!s.ok()) { LOG(WARNING) << "Failed to register with dispatcher at " - << config_.dispatcher_address() << ": " << s; + << dispatcher_address_ << ": " << s; Env::Default()->SleepForMicroseconds(kHeartbeatIntervalMicros); s = Register(); } @@ -173,17 +173,17 @@ Status DataServiceWorkerImpl::EnsureDispatcherStubInitialized() if (!dispatcher_stub_) { ::grpc::ChannelArguments args; std::shared_ptr<::grpc::ChannelCredentials> credentials; - TF_RETURN_IF_ERROR(CredentialsFactory::CreateClientCredentials( - config_.protocol(), &credentials)); - auto channel = ::grpc::CreateCustomChannel(config_.dispatcher_address(), - credentials, args); + TF_RETURN_IF_ERROR( + CredentialsFactory::CreateClientCredentials(protocol_, &credentials)); + auto channel = + ::grpc::CreateCustomChannel(dispatcher_address_, credentials, args); dispatcher_stub_ = DispatcherService::NewStub(channel); } return Status::OK(); } Status DataServiceWorkerImpl::Register() EXCLUSIVE_LOCKS_REQUIRED(mu_) { - VLOG(3) << "Registering with dispatcher at " << config_.dispatcher_address(); + VLOG(3) << "Registering with dispatcher at " << dispatcher_address_; TF_RETURN_IF_ERROR(EnsureDispatcherStubInitialized()); RegisterWorkerRequest req; req.set_worker_address(worker_address_); diff --git a/tensorflow/core/data/service/worker_impl.h b/tensorflow/core/data/service/worker_impl.h index 6961312ee34..adb3e97bbea 100644 --- a/tensorflow/core/data/service/worker_impl.h +++ b/tensorflow/core/data/service/worker_impl.h @@ -21,7 +21,6 @@ limitations under the License. #include "tensorflow/core/data/service/worker.pb.h" #include "tensorflow/core/data/standalone.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/protobuf/data/experimental/service_config.pb.h" #include "tensorflow/core/public/session.h" namespace tensorflow { @@ -30,14 +29,12 @@ namespace data { // A TensorFlow DataService serves dataset elements over RPC. class DataServiceWorkerImpl { public: - explicit DataServiceWorkerImpl(const experimental::WorkerConfig& config); + explicit DataServiceWorkerImpl(const std::string& dispatcher_address, + const std::string& protocol); ~DataServiceWorkerImpl(); // Starts the worker. The worker needs to know its own address so that it can - // register with the dispatcher. This is set in `Start` instead of in the - // constructor because the worker may be binding to port `0`, in which case - // the address isn't known until the worker has started and decided which port - // to bind to. + // register with the dispatcher. void Start(const std::string& worker_address); // See worker.proto for API documentation. @@ -70,7 +67,9 @@ class DataServiceWorkerImpl { std::unique_ptr iterator; } Task; - const experimental::WorkerConfig config_; + const std::string dispatcher_address_; + // Protocol for communicating with the dispatcher. + const std::string protocol_; // The worker's own address. std::string worker_address_; diff --git a/tensorflow/core/protobuf/data/experimental/service_config.proto b/tensorflow/core/protobuf/data/experimental/service_config.proto index 8708b923720..5dcc3c69083 100644 --- a/tensorflow/core/protobuf/data/experimental/service_config.proto +++ b/tensorflow/core/protobuf/data/experimental/service_config.proto @@ -10,18 +10,3 @@ message DispatcherConfig { // The protocol for the dispatcher to use when connecting to workers. string protocol = 2; } - -// Configuration for a tf.data service WorkerServer. -message WorkerConfig { - // The port for the worker to bind to. A value of 0 indicates that the - // worker may bind to any available port. - int64 port = 1; - // The protocol for the worker to use when connecting to the dispatcher. - string protocol = 2; - // The address of the dispatcher to register with. - string dispatcher_address = 3; - // The address of the worker server. The substring "%port%", if specified, - // will be replaced with the worker's bound port. This is useful when the port - // is set to `0`. - string worker_address = 4; -} diff --git a/tensorflow/python/data/experimental/service/server_lib.py b/tensorflow/python/data/experimental/service/server_lib.py index 99dc9297901..3e355565308 100644 --- a/tensorflow/python/data/experimental/service/server_lib.py +++ b/tensorflow/python/data/experimental/service/server_lib.py @@ -205,13 +205,8 @@ class WorkerServer(object): protocol = "grpc" self._protocol = protocol - config = service_config_pb2.WorkerConfig( - port=port, - protocol=protocol, - dispatcher_address=dispatcher_address, - worker_address=worker_address) self._server = _pywrap_server_lib.TF_DATA_NewWorkerServer( - config.SerializeToString()) + port, protocol, dispatcher_address, worker_address) if start: self._server.start() diff --git a/tensorflow/python/data/experimental/service/server_lib_wrapper.cc b/tensorflow/python/data/experimental/service/server_lib_wrapper.cc index f59c1fb90bf..b8250aaeda6 100644 --- a/tensorflow/python/data/experimental/service/server_lib_wrapper.cc +++ b/tensorflow/python/data/experimental/service/server_lib_wrapper.cc @@ -69,16 +69,12 @@ PYBIND11_MODULE(_pywrap_server_lib, m) { m.def( "TF_DATA_NewWorkerServer", - [](std::string serialized_worker_config) + [](int port, std::string protocol, std::string dispatcher_address, + std::string worker_address) -> std::unique_ptr { - tensorflow::data::experimental::WorkerConfig config; - if (!config.ParseFromString(serialized_worker_config)) { - tensorflow::MaybeRaiseFromStatus(tensorflow::errors::InvalidArgument( - "Failed to deserialize worker config.")); - } std::unique_ptr server; - tensorflow::Status status = - tensorflow::data::NewWorkerServer(config, &server); + tensorflow::Status status = tensorflow::data::NewWorkerServer( + port, protocol, dispatcher_address, worker_address, &server); tensorflow::MaybeRaiseFromStatus(status); return server; }, From 37deabbb750b9150c63174e6bf911091463416b0 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Thu, 23 Jul 2020 22:09:37 -0700 Subject: [PATCH 1246/2522] [TF:MLIR] Fold PackOp if it computes tensor shape Move shape related PackOp folding outside of ReshapeOp folding. PiperOrigin-RevId: 322935716 Change-Id: I051fb8d7ed0c4507586d869b9c813f8b60634917 --- .../mlir/tensorflow/ir/tf_generated_ops.td | 2 + .../compiler/mlir/tensorflow/ir/tf_ops_n_z.cc | 179 +++++++++--------- .../mlir/tensorflow/tests/canonicalize.mlir | 52 ++--- .../tensorflow/transforms/canonicalize.td | 5 + 4 files changed, 126 insertions(+), 112 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index 35a6b0e2343..0851975e8e1 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -6334,6 +6334,8 @@ This is the opposite of `unpack`. let verifier = [{ return Verify(*this); }]; + + let hasFolder = 1; } def TF_PadOp : TF_Op<"Pad", [NoSideEffect, TF_FoldOperandsTransposeInterface]> { diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc index 0d9b2610492..785b0bac820 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc @@ -217,6 +217,97 @@ static LogicalResult Verify(PackOp op) { return success(); } +OpFoldResult PackOp::fold(ArrayRef operands) { + // Fold pack operation if it computes the input tensor shape: + // + // %shape = tf.Shape(%arg) // [? x ...] + // %dim0 = tf.StridedSlice(%shape, 0, 1, 1) // get unknown dim0 value + // %pack = tf.Pack(dim0, ...) { axis = 0 } // [? x ...] + // + // Where `...` are some statically known dimensions. In this case %pack can be + // replaced with a %shape. This is a common pattern in models with a dynamic + // batch size. + + // Pack operation should pack at least two values. + if (values().size() < 2) return {}; + + // Dimensions packed along axis = 0 (pack scalars into vector). + if (axis().getSExtValue() != 0) return {}; + + // First packed value is defined by a strided slice operation. + auto slice_op = dyn_cast_or_null(values()[0].getDefiningOp()); + if (!slice_op) return {}; + + // Input to the slice op is defined by shape operation. + auto shape_op = dyn_cast_or_null(slice_op.input().getDefiningOp()); + if (!shape_op) return {}; + + // Input tensor, which shape is reconstructed by the pack operation. + Value tensor = shape_op.input(); + + // All masks are `0` except `shrink_axis_mask` which is equal to `1` (slicing + // scalar value from input vector). + if (slice_op.begin_mask().getSExtValue() != 0 || + slice_op.ellipsis_mask().getSExtValue() != 0 || + slice_op.end_mask().getSExtValue() != 0 || + slice_op.new_axis_mask().getSExtValue() != 0 || + slice_op.shrink_axis_mask().getSExtValue() != 1) + return {}; + + // Returns a value if the `value` is defined by a ConstOp with a single + // integer element in it and has an expected rank. + auto get_const_int = [](Value value, int expected_rank) -> Optional { + auto const_op = dyn_cast_or_null(value.getDefiningOp()); + if (!const_op) return None; + + auto value_attr = const_op.value().dyn_cast(); + if (!value_attr || value_attr.getNumElements() != 1) return None; + + auto value_ty = value_attr.getType(); + if (!value_ty.hasRank() || value_ty.getRank() != expected_rank) return None; + + auto splat = value_attr.getSplatValue(); + return splat.getValue().getSExtValue(); + }; + + // All other packed values are scalar constants. + SmallVector packed_dims; + packed_dims.reserve(values().size() - 1); + for (Value operand : llvm::drop_begin(values(), 1)) { + if (auto dim = get_const_int(operand, /*expected_rank=*/0)) { + packed_dims.push_back(*dim); + } else { + return {}; + } + } + + // Slice exactly the first shape dimension: + // begin = [0] end = [1], strides = [1] + auto begin = get_const_int(slice_op.begin(), /*expected_rank=*/1); + auto end = get_const_int(slice_op.end(), /*expected_rank=*/1); + auto strides = get_const_int(slice_op.strides(), /*expected_rank=*/1); + if (!begin.hasValue() || !end.hasValue() || !strides.hasValue() || + *begin != 0 || *end != 1 || *strides != 1) + return {}; + + // First tensor dimension is dynamic. + auto arg_ty = tensor.getType().dyn_cast(); + if (!arg_ty || !arg_ty.hasRank() || arg_ty.getNumDynamicDims() != 1 || + !arg_ty.isDynamicDim(0)) + return {}; + + // Argument tensor rank is equal to the number of packed dimensions. + if (arg_ty.getRank() != values().size()) return {}; + + // All other dimensions are statically known and equal to packed dims. + auto arg_dims = llvm::drop_begin(arg_ty.getShape(), 1); + if (!std::equal(arg_dims.begin(), arg_dims.end(), packed_dims.begin())) + return {}; + + // Replace %pack with %shape. + return slice_op.input(); +} + //===----------------------------------------------------------------------===// // PadOp //===----------------------------------------------------------------------===// @@ -608,12 +699,11 @@ void ReshapeOp::build(OpBuilder &builder, OperationState &result, Value tensor, void ReshapeOp::getCanonicalizationPatterns(OwningRewritePatternList &results, MLIRContext *context) { - results.insert(context); + results.insert(context); } OpFoldResult ReshapeOp::fold(ArrayRef operands) { Value tensor = this->tensor(); - Value shape = this->shape(); // Fold reshape if operand and result types are the same and all dimensions // are statically known (no-op reshape). @@ -624,90 +714,7 @@ OpFoldResult ReshapeOp::fold(ArrayRef operands) { return tensor; } - // Fold reshape if the shape is computed from the input tensor: - // - // %shape = tf.Shape(%arg) // [? x ...] - // %dim0 = tf.StridedSlice(%shape, 0, 1, 1) // get unknown dim value - // %new_shape = tf.Pack(dim0, ...) { axis = 0 } // [? x ...] - // %reshape = tf.Reshape(%arg, %new_shape) // this is no-op - // - // Where `...` are some statically known dimensions. In this case reshape is - // a no-op and can be replaced by %arg (assuming `...` are equal). - auto pack_op = dyn_cast_or_null(shape.getDefiningOp()); - if (!pack_op || pack_op.values().size() < 2) return {}; - - // Dimensions packed along axis = 0 (pack scalars into vector). - if (pack_op.axis().getSExtValue() != 0) return {}; - - // First packed value is defined by a strided slice operation. - auto slice_op = - dyn_cast_or_null(pack_op.values()[0].getDefiningOp()); - if (!slice_op) return {}; - - // Input to the slice op is defined by shape operation. - auto shape_op = dyn_cast_or_null(slice_op.input().getDefiningOp()); - if (!shape_op || shape_op.input() != tensor) return {}; - - // All masks are `0` except `shrink_axis_mask` which is equal to `1` (slicing - // scalar value from input vector). - if (slice_op.begin_mask().getSExtValue() != 0 || - slice_op.ellipsis_mask().getSExtValue() != 0 || - slice_op.end_mask().getSExtValue() != 0 || - slice_op.new_axis_mask().getSExtValue() != 0 || - slice_op.shrink_axis_mask().getSExtValue() != 1) - return {}; - - // Returns a value if the `value` is defined by a ConstOp with a single - // integer element in it and has an expected rank. - auto get_value = [](Value value, int expected_rank) -> Optional { - auto const_op = dyn_cast_or_null(value.getDefiningOp()); - if (!const_op) return None; - - auto value_attr = const_op.value().dyn_cast(); - if (!value_attr || value_attr.getNumElements() != 1) return None; - - auto value_ty = value_attr.getType(); - if (!value_ty.hasRank() || value_ty.getRank() != expected_rank) return None; - - auto splat = value_attr.getSplatValue(); - return splat.getValue().getSExtValue(); - }; - - // All other packed values are scalar constants. - SmallVector packed_dims; - packed_dims.reserve(pack_op.values().size() - 1); - for (Value operand : llvm::drop_begin(pack_op.values(), 1)) { - if (auto dim = get_value(operand, /*expected_rank=*/0)) { - packed_dims.push_back(*dim); - } else { - return {}; - } - } - - // Slice exactly the first shape dimension: - // begin = [0] end = [1], strides = [1] - auto begin = get_value(slice_op.begin(), /*expected_rank=*/1); - auto end = get_value(slice_op.end(), /*expected_rank=*/1); - auto strides = get_value(slice_op.strides(), /*expected_rank=*/1); - if (!begin.hasValue() || !end.hasValue() || !strides.hasValue() || - *begin != 0 || *end != 1 || *strides != 1) - return {}; - - // First tensor dimension is dynamic. - auto arg_ty = tensor.getType().dyn_cast(); - if (!arg_ty || !arg_ty.hasRank() || arg_ty.getNumDynamicDims() != 1 || - !arg_ty.isDynamicDim(0)) - return {}; - - // Argument tensor rank is equal to the number of packed dimensions. - if (arg_ty.getRank() != pack_op.values().size()) return {}; - - // All other dimensions are statically known and equal to packed dims. - auto arg_dims = llvm::drop_begin(arg_ty.getShape(), 1); - if (!std::equal(arg_dims.begin(), arg_dims.end(), packed_dims.begin())) - return {}; - - return tensor; + return {}; } //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir index 17a19c50998..42659f41c21 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir @@ -377,6 +377,15 @@ func @testRedundantReshape(%arg0: tensor<4x4xi32>) -> tensor<2x8xi32> { // CHECK: return %1 : tensor<2x8xi32> } +// CHECK-LABEL: testReshapeToSelfShape +func @testReshapeToSelfShape(%arg0: tensor) -> tensor { + %0 = "tf.Shape"(%arg0) : (tensor) -> tensor<2xi32> + %1 = "tf.Reshape"(%arg0, %0) : (tensor, tensor<2xi32>) -> tensor + + // CHECK: return %arg0 : tensor + return %1: tensor +} + // CHECK-LABEL: func @testReshapeNoOp func @testReshapeNoOp(%arg0: tensor<2x4xf32>, %arg1: tensor<2xi32>) -> tensor<2x4xf32> { %0 = "tf.Reshape"(%arg0, %arg1) : (tensor<2x4xf32>, tensor<2xi32>) -> tensor<2x4xf32> @@ -385,8 +394,8 @@ func @testReshapeNoOp(%arg0: tensor<2x4xf32>, %arg1: tensor<2xi32>) -> tensor<2x return %0 : tensor<2x4xf32> } -// CHECK-LABEL: func @testReshapeNoOpShapeComputation -func @testReshapeNoOpShapeComputation(%arg0: tensor, %arg1: tensor, %arg2: tensor<*xf32>) -> (tensor, tensor, tensor, tensor, tensor, tensor, tensor<*xf32>) { +// CHECK-LABEL: func @testPackShapeComputation +func @testPackShapeComputation(%arg0: tensor, %arg1: tensor, %arg2: tensor<*xf32>) -> (tensor<2xi32>, tensor<3xi32>, tensor<3xi32>, tensor<3xi32>, tensor<3xi32>, tensor<*xi32>) { // Test dimensions sizes. %d1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor %d2 = "tf.Const"() {value = dense<2> : tensor} : () -> tensor @@ -396,65 +405,56 @@ func @testReshapeNoOpShapeComputation(%arg0: tensor, %arg1: tensor : tensor<1xi32>} : () -> tensor<1xi32> %2 = "tf.Const"() {value = dense<2> : tensor<1xi32>} : () -> tensor<1xi32> - // Fold reshape if the shape is computed from the input tensor: + // Fold pack operation if it computes the input tensor shape: // - // %shape = tf.Shape(%arg) // [? x ...] - // %dim0 = tf.StridedSlice(%shape, 0, 1, 1) // get unknown dim value - // %new_shape = tf.Pack(dim0, ...) { axis = 0 } // [? x ...] - // %reshape = tf.Reshape(%arg, %new_shape) // this is no-op + // %shape = tf.Shape(%arg) // [? x ...] + // %dim0 = tf.StridedSlice(%shape, 0, 1, 1) // get unknown dim0 value + // %pack = tf.Pack(dim0, ...) { axis = 0 } // [? x ...] // - // Where `...` are some statically known dimensions. In this case reshape is - // a no-op and can be replaced by %arg (assuming `...` are equal). + // Where `...` are some statically known dimensions. In this case %pack can be + // replace with a %shape. This is a common pattern in models with a dynamic + // batch size. // Test Rank 2 + // CHECK: %[[SHAPE0:.*]] = "tf.Shape" %3 = "tf.Shape"(%arg0) : (tensor) -> tensor<2xi32> %4 = "tf.StridedSlice"(%3, %0, %1, %1) {shrink_axis_mask = 1 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor %5 = "tf.Pack"(%4, %d1) {axis = 0 : i64} : (tensor, tensor) -> tensor<2xi32> %6 = "tf.Reshape"(%arg0, %5) : (tensor, tensor<2xi32>) -> tensor // Test Rank 3. - + // CHECK: %[[SHAPE1:.*]] = "tf.Shape" %7 = "tf.Shape"(%arg1) : (tensor) -> tensor<3xi32> %8 = "tf.StridedSlice"(%7, %0, %1, %1) {shrink_axis_mask = 1 : i64} : (tensor<3xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor %9 = "tf.Pack"(%8, %d1, %d2) {axis = 0 : i64} : (tensor, tensor, tensor) -> tensor<3xi32> %10 = "tf.Reshape"(%arg1, %9) : (tensor, tensor<3xi32>) -> tensor - // Shape was taken from the op that is not reshaped in the end: - // Reshape(%arg1) vs Shape(%arg0) - %11 = "tf.StridedSlice"(%3, %0, %1, %1) {shrink_axis_mask = 1 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %12 = "tf.Pack"(%11, %d1, %d2) {axis = 0 : i64} : (tensor, tensor, tensor) -> tensor<3xi32> - // CHECK: %[[RESHAPE0:.*]] = "tf.Reshape" - %13 = "tf.Reshape"(%arg1, %12) : (tensor, tensor<3xi32>) -> tensor - // Packed dimensions have different order from the reshape operand: // [?, 1, 2] vs [?, 2, 1] %14 = "tf.StridedSlice"(%7, %0, %1, %1) {shrink_axis_mask = 1 : i64} : (tensor<3xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor %15 = "tf.Pack"(%14, %d2, %d1) {axis = 0 : i64} : (tensor, tensor, tensor) -> tensor<3xi32> - // CHECK: %[[RESHAPE1:.*]] = "tf.Reshape" - %16 = "tf.Reshape"(%arg1, %15) : (tensor, tensor<3xi32>) -> tensor + // CHECK: %[[PACK0:.*]] = "tf.Pack" // StridedSlice takes second dimension from the shape: // begin = [1], end = [2], stride = [1] %17 = "tf.StridedSlice"(%7, %1, %2, %1) {shrink_axis_mask = 1 : i64} : (tensor<3xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor %18 = "tf.Pack"(%17, %d1, %d2) {axis = 0 : i64} : (tensor, tensor, tensor) -> tensor<3xi32> - // CHECK: %[[RESHAPE2:.*]] = "tf.Reshape" - %19 = "tf.Reshape"(%arg1, %18) : (tensor, tensor<3xi32>) -> tensor + // CHECK: %[[PACK1:.*]] = "tf.Pack" // Packed dimensions have higher rank than the reshape operand: // [?, 1] vs [?, 1, 1] %20 = "tf.StridedSlice"(%3, %0, %1, %1) {shrink_axis_mask = 1 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor %21 = "tf.Pack"(%20, %d1, %d1) {axis = 0 : i64} : (tensor, tensor, tensor) -> tensor<3xi32> - // CHECK: %[[RESHAPE3:.*]] = "tf.Reshape" - %22 = "tf.Reshape"(%arg0, %21) : (tensor, tensor<3xi32>) -> tensor + // CHECK: %[[PACK2:.*]] = "tf.Pack" // Make sure a dynamic ranked shape doesn't crash the "canonicalize" pass %23 = "tf.Shape"(%arg2) : (tensor<*xf32>) -> tensor<*xi32> %24 = "tf.StridedSlice"(%23, %0, %1, %1) {shrink_axis_mask = 1 : i64} : (tensor<*xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<*xi32> %25 = "tf.Pack"(%24, %d1) {axis = 0 : i64} : (tensor<*xi32>, tensor) -> tensor<*xi32> - %26 = "tf.Reshape"(%arg2, %25) : (tensor<*xf32>, tensor<*xi32>) -> tensor<*xf32> + // CHECK: %[[PACK3:.*]] = "tf.Pack" - // CHECK: return %arg0, %arg1, %[[RESHAPE0]], %[[RESHAPE1]], %[[RESHAPE2]], %[[RESHAPE3]] - return %6, %10, %13, %16, %19, %22, %26 : tensor, tensor, tensor, tensor, tensor, tensor, tensor<*xf32> + // CHECK: return %[[SHAPE0]], %[[SHAPE1]], %[[PACK0]], %[[PACK1]], %[[PACK2]], %[[PACK3]] + return %5, %9, %15, %18, %21, %25 : tensor<2xi32>, tensor<3xi32>, tensor<3xi32>, tensor<3xi32>, tensor<3xi32>, tensor<*xi32> } // CHECK-LABEL: testSelectScalarPred diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/canonicalize.td b/tensorflow/compiler/mlir/tensorflow/transforms/canonicalize.td index 3f0b5b48af9..d5b7eb7a739 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/canonicalize.td +++ b/tensorflow/compiler/mlir/tensorflow/transforms/canonicalize.td @@ -209,6 +209,11 @@ def ReciprocalNested : Pat<(TF_ReciprocalOp (TF_ReciprocalOp $arg)), def RedundantReshape : Pat<(TF_ReshapeOp (TF_ReshapeOp $arg, $unused), $shape), (TF_ReshapeOp $arg, $shape)>; +def IsSame : Constraint>; +def ReshapeToSelfShape : Pat<(TF_ReshapeOp $arg0, (TF_ShapeOp $arg1)), + (replaceWithValue $arg0), + [(IsSame $arg0, $arg1)]>; + //===----------------------------------------------------------------------===// // Select op patterns. //===----------------------------------------------------------------------===// From 29d635bccc81222d51328afeddb562570f9984a9 Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Thu, 23 Jul 2020 22:24:35 -0700 Subject: [PATCH 1247/2522] Prefer the standard integral types over custom type-aliases. PiperOrigin-RevId: 322937618 Change-Id: I0c0560a13856ee1df1ff187d30244a99cce04f86 --- .../lite/micro/benchmarks/conv_benchmark.cc | 8 +- .../benchmarks/depthwise_conv_benchmark.cc | 8 +- .../micro_speech/recognize_commands.cc | 2 +- .../main_functions.cc | 2 +- tensorflow/lite/micro/kernels/activations.cc | 18 +-- tensorflow/lite/micro/kernels/add.cc | 16 +-- tensorflow/lite/micro/kernels/arc_mli/conv.cc | 14 +- .../micro/kernels/arc_mli/depthwise_conv.cc | 16 +-- .../micro/kernels/arc_mli/fully_connected.cc | 2 +- .../lite/micro/kernels/arc_mli/mli_tf_utils.h | 2 +- .../lite/micro/kernels/arc_mli/pooling.cc | 2 +- .../kernels/arc_mli/pooling_slicing_test.cc | 4 +- tensorflow/lite/micro/kernels/arg_min_max.cc | 7 +- .../lite/micro/kernels/circular_buffer.cc | 2 +- tensorflow/lite/micro/kernels/cmsis-nn/add.cc | 16 +-- .../lite/micro/kernels/cmsis-nn/conv.cc | 10 +- .../micro/kernels/cmsis-nn/depthwise_conv.cc | 10 +- .../micro/kernels/cmsis-nn/fully_connected.cc | 6 +- .../lite/micro/kernels/cmsis-nn/softmax.cc | 3 +- tensorflow/lite/micro/kernels/comparisons.cc | 4 +- .../lite/micro/kernels/concatenation.cc | 2 +- tensorflow/lite/micro/kernels/conv.cc | 8 +- tensorflow/lite/micro/kernels/conv_test.cc | 16 +-- .../lite/micro/kernels/depthwise_conv.cc | 10 +- .../lite/micro/kernels/depthwise_conv_test.cc | 16 +-- tensorflow/lite/micro/kernels/hard_swish.cc | 3 +- tensorflow/lite/micro/kernels/l2norm.cc | 16 +-- tensorflow/lite/micro/kernels/l2norm_test.cc | 6 +- tensorflow/lite/micro/kernels/pad.cc | 6 +- tensorflow/lite/micro/kernels/pooling_test.cc | 4 +- tensorflow/lite/micro/kernels/prelu.cc | 2 +- .../micro/kernels/quantization_util_test.cc | 12 +- tensorflow/lite/micro/kernels/quantize.cc | 2 +- .../lite/micro/kernels/quantize_test.cc | 2 +- tensorflow/lite/micro/kernels/reduce.cc | 6 +- .../micro/kernels/resize_nearest_neighbor.cc | 12 +- .../kernels/resize_nearest_neighbor_test.cc | 120 +++++++++--------- tensorflow/lite/micro/kernels/softmax.cc | 3 +- tensorflow/lite/micro/kernels/sub.cc | 16 +-- tensorflow/lite/micro/kernels/svdf.cc | 6 +- .../lite/micro/kernels/xtensa_hifi/add.cc | 16 +-- .../lite/micro/kernels/xtensa_hifi/conv.cc | 14 +- .../kernels/xtensa_hifi/depthwise_conv.cc | 16 +-- .../lite/micro/kernels/xtensa_hifi/pooling.cc | 16 +-- .../lite/micro/kernels/xtensa_hifi/softmax.cc | 3 +- .../lite/micro/kernels/xtensa_hifi/svdf.cc | 6 +- .../micro/kernels/xtensa_hifimini/conv.cc | 48 +++---- .../kernels/xtensa_hifimini/depthwise_conv.cc | 70 +++++----- .../xtensa_hifimini/fully_connected.cc | 14 +- .../micro/kernels/xtensa_hifimini/quantize.cc | 2 +- .../micro/kernels/xtensa_hifimini/softmax.cc | 21 +-- .../micro/kernels/xtensa_hifimini/svdf.cc | 10 +- .../xtensa_hifimini_staging/quantize.cc | 4 +- .../xtensa_hifimini_staging/softmax.cc | 7 +- .../kernels/xtensa_hifimini_staging/svdf.cc | 6 +- tensorflow/lite/micro/micro_utils.h | 8 +- tensorflow/lite/micro/test_helpers.cc | 8 +- tensorflow/lite/micro/test_helpers.h | 2 +- tensorflow/lite/micro/testing/test_utils.cc | 4 +- 59 files changed, 351 insertions(+), 344 deletions(-) diff --git a/tensorflow/lite/micro/benchmarks/conv_benchmark.cc b/tensorflow/lite/micro/benchmarks/conv_benchmark.cc index cbddbd23f26..d64b31dd39a 100644 --- a/tensorflow/lite/micro/benchmarks/conv_benchmark.cc +++ b/tensorflow/lite/micro/benchmarks/conv_benchmark.cc @@ -151,7 +151,7 @@ int main() { // Output scale of 50 is needed to accomodate a float range of [-6400, 6350] float output_scale = 50.0f; - // Create per-tensor quantized int8 input tensor. + // Create per-tensor quantized int8_t input tensor. int8_t input_quantized[32]; TfLiteTensor input_tensor = tflite::testing::CreateQuantizedTensor( input_values, input_quantized, input_dims, input_scale, input_zero_point); @@ -163,7 +163,7 @@ int main() { tflite::testing::IntArrayFromInts(input_zero_points)}; input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; - // Create per-tensor quantized int8 filter tensor. + // Create per-tensor quantized int8_t filter tensor. int8_t filter_quantized[32 * 32]; TfLiteTensor filter_tensor = tflite::testing::CreateQuantizedTensor( filter_values, filter_quantized, filter_dims, filter_scale, @@ -176,7 +176,7 @@ int main() { tflite::testing::IntArrayFromInts(filter_zero_points)}; filter_tensor.quantization = {kTfLiteAffineQuantization, &filter_quant}; - // Create per-tensor quantized int32 bias tensor. + // Create per-tensor quantized int32_t bias tensor. int32_t bias_quantized[32]; tflite::SymmetricQuantize(bias_values, bias_quantized, 32, input_scale * output_scale); @@ -192,7 +192,7 @@ int main() { tflite::testing::IntArrayFromInts(bias_zero_points)}; bias_tensor.quantization = {kTfLiteAffineQuantization, &bias_quant}; - // Create per-tensor quantized int8 output tensor. + // Create per-tensor quantized int8_t output tensor. int8_t output_quantized[32]; TfLiteTensor output_tensor = tflite::testing::CreateQuantizedTensor( output_quantized, output_dims, output_scale, output_zero_point); diff --git a/tensorflow/lite/micro/benchmarks/depthwise_conv_benchmark.cc b/tensorflow/lite/micro/benchmarks/depthwise_conv_benchmark.cc index ddaea133221..a4133680b9f 100644 --- a/tensorflow/lite/micro/benchmarks/depthwise_conv_benchmark.cc +++ b/tensorflow/lite/micro/benchmarks/depthwise_conv_benchmark.cc @@ -157,7 +157,7 @@ int main() { TfLiteIntArray* bias_dims = tflite::testing::IntArrayFromInts(bias_shape); TfLiteIntArray* output_dims = tflite::testing::IntArrayFromInts(output_shape); - // Create per-tensor quantized int8 input tensor. + // Create per-tensor quantized int8_t input tensor. int8_t input_quantized[input_elements]; TfLiteTensor input_tensor = tflite::testing::CreateQuantizedTensor( input_values, input_quantized, input_dims, input_scale, input_zero_point); @@ -170,7 +170,7 @@ int main() { tflite::testing::IntArrayFromInts(input_zero_points)}; input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; - // Create per-tensor quantized int8 filter tensor. + // Create per-tensor quantized int8_t filter tensor. int8_t filter_quantized[filter_elements]; TfLiteTensor filter_tensor = tflite::testing::CreateQuantizedTensor( filter_values, filter_quantized, filter_dims, filter_scale, 0); @@ -183,7 +183,7 @@ int main() { tflite::testing::IntArrayFromInts(filter_zero_points)}; filter_tensor.quantization = {kTfLiteAffineQuantization, &filter_quant}; - // Create per-tensor quantized int32 bias tensor. + // Create per-tensor quantized int32_t bias tensor. int32_t bias_quantized[bias_elements]; // See https://www.tensorflow.org/lite/performance/quantization_spec for a // detailed explanation of why bias scale is input_scale * filter_scale. @@ -200,7 +200,7 @@ int main() { tflite::testing::IntArrayFromInts(bias_zero_points)}; bias_tensor.quantization = {kTfLiteAffineQuantization, &bias_quant}; - // Create per-tensor quantized int8 output tensor. + // Create per-tensor quantized int8_t output tensor. int8_t output_quantized[output_elements]; TfLiteTensor output_tensor = tflite::testing::CreateQuantizedTensor( output_quantized, output_dims, output_scale, output_zero_point); diff --git a/tensorflow/lite/micro/examples/micro_speech/recognize_commands.cc b/tensorflow/lite/micro/examples/micro_speech/recognize_commands.cc index 47bd10074d3..265c494670d 100644 --- a/tensorflow/lite/micro/examples/micro_speech/recognize_commands.cc +++ b/tensorflow/lite/micro/examples/micro_speech/recognize_commands.cc @@ -50,7 +50,7 @@ TfLiteStatus RecognizeCommands::ProcessLatestResults( if (latest_results->type != kTfLiteInt8) { TF_LITE_REPORT_ERROR( error_reporter_, - "The results for recognition should be int8 elements, but are %d", + "The results for recognition should be int8_t elements, but are %d", latest_results->type); return kTfLiteError; } diff --git a/tensorflow/lite/micro/examples/person_detection_experimental/main_functions.cc b/tensorflow/lite/micro/examples/person_detection_experimental/main_functions.cc index 53b87bffb41..f1ded80d1b9 100644 --- a/tensorflow/lite/micro/examples/person_detection_experimental/main_functions.cc +++ b/tensorflow/lite/micro/examples/person_detection_experimental/main_functions.cc @@ -32,7 +32,7 @@ const tflite::Model* model = nullptr; tflite::MicroInterpreter* interpreter = nullptr; TfLiteTensor* input = nullptr; -// In order to use optimized tensorflow lite kernels, a signed int8 quantized +// In order to use optimized tensorflow lite kernels, a signed int8_t quantized // model is preferred over the legacy unsigned model format. This means that // throughout this project, input images must be converted from unisgned to // signed format. The easiest and quickest way to convert from unsigned to diff --git a/tensorflow/lite/micro/kernels/activations.cc b/tensorflow/lite/micro/kernels/activations.cc index 128ab8ecbd8..2bdc0b5169a 100644 --- a/tensorflow/lite/micro/kernels/activations.cc +++ b/tensorflow/lite/micro/kernels/activations.cc @@ -53,7 +53,7 @@ inline void ReluQuantized(const ReluOpData& data, T* output_data) { const int flat_size = MatchingFlatSize(input_shape, output_shape); for (int i = 0; i < flat_size; ++i) { - const int32 val = static_cast(input_data[i]); + const int32_t val = static_cast(input_data[i]); int32_t clamped = data.params.output_offset + MultiplyByQuantizedMultiplier(val - data.params.input_offset, @@ -79,17 +79,17 @@ inline void CalculateReluOpData(const TfLiteTensor* input, TfLiteTensor* output, QuantizeMultiplier(real_multiplier, &data->params.output_multiplier, &data->params.output_shift); - data->params.quantized_activation_min = - std::max(static_cast(std::numeric_limits::min()), - output->params.zero_point + - static_cast(roundf(act_min / output->params.scale))); + data->params.quantized_activation_min = std::max( + static_cast(std::numeric_limits::min()), + output->params.zero_point + + static_cast(roundf(act_min / output->params.scale))); data->params.quantized_activation_max = act_max == std::numeric_limits::infinity() ? static_cast(std::numeric_limits::max()) - : std::min( - static_cast(std::numeric_limits::max()), - output->params.zero_point + - static_cast(roundf(act_max / output->params.scale))); + : std::min(static_cast(std::numeric_limits::max()), + output->params.zero_point + + static_cast( + roundf(act_max / output->params.scale))); data->params.input_offset = input->params.zero_point; data->params.output_offset = output->params.zero_point; } diff --git a/tensorflow/lite/micro/kernels/add.cc b/tensorflow/lite/micro/kernels/add.cc index 7190f2af548..79a04875def 100644 --- a/tensorflow/lite/micro/kernels/add.cc +++ b/tensorflow/lite/micro/kernels/add.cc @@ -42,18 +42,18 @@ struct OpData { // and the special 16-bit -> 16bit quantized path int input1_shift; int input2_shift; - int32 output_activation_min; - int32 output_activation_max; + int32_t output_activation_min; + int32_t output_activation_max; // These fields are used only in the general 8-bit -> 8bit quantized path - int32 input1_multiplier; - int32 input2_multiplier; - int32 output_multiplier; + int32_t input1_multiplier; + int32_t input2_multiplier; + int32_t output_multiplier; int output_shift; int left_shift; - int32 input1_offset; - int32 input2_offset; - int32 output_offset; + int32_t input1_offset; + int32_t input2_offset; + int32_t output_offset; // Used only for float evals: float output_activation_min_f32; diff --git a/tensorflow/lite/micro/kernels/arc_mli/conv.cc b/tensorflow/lite/micro/kernels/arc_mli/conv.cc index 6f137590b91..905feb1a529 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/conv.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/conv.cc @@ -78,8 +78,8 @@ bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input, const TfLiteConvParams* params) { const auto* affine_quantization = reinterpret_cast(filter->quantization.params); - // MLI optimized version only supports int8 dataype, dilation factor of 1 and - // per-axis quantization of weights (no broadcasting/per-tensor) + // MLI optimized version only supports int8_t dataype, dilation factor of 1 + // and per-axis quantization of weights (no broadcasting/per-tensor) bool ret_val = (filter->type == kTfLiteInt8) && (input->type == kTfLiteInt8) && (bias->type == kTfLiteInt32) && (params->dilation_width_factor == 1) && @@ -176,7 +176,7 @@ TfLiteStatus EvalMliQuantizedPerChannel( OpData* data, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output) { // Run Conv MLI kernel - // MLI optimized version only supports int8 dataype and dilation factor of 1 + // MLI optimized version only supports int8_t dataype and dilation factor of 1 if ((input->type == kTfLiteInt8) && (params->dilation_width_factor == 1) && (params->dilation_height_factor == 1)) { mli_tensor mli_in = {0}; @@ -353,10 +353,10 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, reference_integer_ops::ConvPerChannel( op_params, data->per_channel_output_multiplier, data->per_channel_output_shift, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + GetTensorData(input), GetTensorShape(filter), + GetTensorData(filter), GetTensorShape(bias), + GetTensorData(bias), GetTensorShape(output), + GetTensorData(output)); return kTfLiteOk; #else TF_LITE_KERNEL_LOG(context, diff --git a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc index 9a7edcb847c..9f8a6b4004c 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/depthwise_conv.cc @@ -71,10 +71,10 @@ bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input, const int in_ch = SizeOfDimension(input, 3); const int filters_num = SizeOfDimension(filter, 3); - // MLI optimized version only supports int8 dataype, dilation factor of 1 and - // per-axis quantization of weights (no broadcasting/per-tensor) - // (in_ch == filters_num) || (in_ch == 1)) is a forbidding of - // channel multiplier logic for multichannel input. + // MLI optimized version only supports int8_t dataype, dilation factor of 1 + // and per-axis quantization of weights (no broadcasting/per-tensor) (in_ch == + // filters_num) || (in_ch == 1)) is a forbidding of channel multiplier logic + // for multichannel input. bool ret_val = (filter->type == kTfLiteInt8) && (input->type == kTfLiteInt8) && (bias->type == kTfLiteInt32) && (params->dilation_width_factor == 1) && @@ -373,10 +373,10 @@ TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, reference_integer_ops::DepthwiseConvPerChannel( op_params, data->per_channel_output_multiplier, data->per_channel_output_shift, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + GetTensorData(input), GetTensorShape(filter), + GetTensorData(filter), GetTensorShape(bias), + GetTensorData(bias), GetTensorShape(output), + GetTensorData(output)); return kTfLiteOk; #else TF_LITE_KERNEL_LOG(context, diff --git a/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc b/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc index 41c65faafb2..24b3fed0998 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/fully_connected.cc @@ -55,7 +55,7 @@ constexpr int kOutputTensor = 0; bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, const TfLiteFullyConnectedParams* params) { - // MLI optimized version only supports int8 dataype and no fused Relu and + // MLI optimized version only supports int8_t dataype and no fused Relu and // symmetric per-tensor quantization of weights (not per-axis) bool ret_val = (filter->type == kTfLiteInt8) && (input->type == kTfLiteInt8) && (bias->type == kTfLiteInt32) && diff --git a/tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h b/tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h index 3f9933ada47..1764f1fdf45 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h +++ b/tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h @@ -34,7 +34,7 @@ static void ConvertToMliTensorData(const TfLiteTensor* tfT, mli_tensor* mliT) { } else if (tfT->type == kTfLiteInt32) { mliT->el_type = MLI_EL_ASYM_I32; } else { - TF_LITE_FATAL("Wrong data type. Expected int8 or int32."); + TF_LITE_FATAL("Wrong data type. Expected int8_t or int32_t."); } mliT->capacity = tfT->bytes; diff --git a/tensorflow/lite/micro/kernels/arc_mli/pooling.cc b/tensorflow/lite/micro/kernels/arc_mli/pooling.cc index d59a673d925..44bc966a8e2 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/pooling.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/pooling.cc @@ -43,7 +43,7 @@ enum MliPoolingType { AveragePooling = 0, MaxPooling = 1 }; bool IsMliApplicable(TfLiteContext* context, const TfLiteTensor* input, const TfLitePoolParams* params) { - // MLI optimized version only supports int8 dataype and no fused Relu + // MLI optimized version only supports int8_t dataype and no fused Relu return (input->type == kTfLiteInt8 && params->activation == kTfLiteActNone); } diff --git a/tensorflow/lite/micro/kernels/arc_mli/pooling_slicing_test.cc b/tensorflow/lite/micro/kernels/arc_mli/pooling_slicing_test.cc index 7f21a67d9f7..516b1bf63d6 100644 --- a/tensorflow/lite/micro/kernels/arc_mli/pooling_slicing_test.cc +++ b/tensorflow/lite/micro/kernels/arc_mli/pooling_slicing_test.cc @@ -41,7 +41,7 @@ void TestAveragePoolingQuantized( const T* expected_output_data, const int* output_dims_data, float output_min, float output_max, TfLitePadding padding, TfLiteFusedActivation activation, T* output_data) { - static_assert(sizeof(T) == 1, "Only int8/uint8 data types allowed."); + static_assert(sizeof(T) == 1, "Only int8_t/uint8_t data types allowed."); TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); @@ -112,7 +112,7 @@ void TestMaxPoolQuantized(const int* input_dims_data, const T* input_data, float output_min, float output_max, const int* output_dims_data, TfLitePadding padding, TfLiteFusedActivation activation, T* output_data) { - static_assert(sizeof(T) == 1, "Only int8/uint8 data types allowed."); + static_assert(sizeof(T) == 1, "Only int8_t/uint8_t data types allowed."); TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); diff --git a/tensorflow/lite/micro/kernels/arg_min_max.cc b/tensorflow/lite/micro/kernels/arg_min_max.cc index 3baf9f04e61..12ac0019c05 100644 --- a/tensorflow/lite/micro/kernels/arg_min_max.cc +++ b/tensorflow/lite/micro/kernels/arg_min_max.cc @@ -74,18 +74,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) { break; default: TF_LITE_KERNEL_LOG(context, - "Only float32, uint8 and int8 are " + "Only float32, uint8_t and int8_t are " "supported currently, got %s.", TfLiteTypeGetName(input->type)); return kTfLiteError; } } else { - TF_LITE_KERNEL_LOG(context, "Only int32 are supported currently, got %s.", + TF_LITE_KERNEL_LOG(context, + "Only int32_t are supported currently, got %s.", TfLiteTypeGetName(output->type)); return kTfLiteError; } } else { - TF_LITE_KERNEL_LOG(context, "Only int32 are supported currently, got %s.", + TF_LITE_KERNEL_LOG(context, "Only int32_t are supported currently, got %s.", TfLiteTypeGetName(axis->type)); return kTfLiteError; } diff --git a/tensorflow/lite/micro/kernels/circular_buffer.cc b/tensorflow/lite/micro/kernels/circular_buffer.cc index 876ea569196..b5a8ae1be3b 100644 --- a/tensorflow/lite/micro/kernels/circular_buffer.cc +++ b/tensorflow/lite/micro/kernels/circular_buffer.cc @@ -92,7 +92,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); - // The circular buffer custom operator currently only supports int8. + // The circular buffer custom operator currently only supports int8_t. TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8); // TODO(b/132070898): Use statically slotted OpData structures until a diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/add.cc b/tensorflow/lite/micro/kernels/cmsis-nn/add.cc index 4190e041d9e..c98e7a2c329 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/add.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/add.cc @@ -41,18 +41,18 @@ struct OpData { // and the special 16-bit -> 16bit quantized path int input1_shift; int input2_shift; - int32 output_activation_min; - int32 output_activation_max; + int32_t output_activation_min; + int32_t output_activation_max; // These fields are used only in the general 8-bit -> 8bit quantized path - int32 input1_multiplier; - int32 input2_multiplier; - int32 output_multiplier; + int32_t input1_multiplier; + int32_t input2_multiplier; + int32_t output_multiplier; int output_shift; int left_shift; - int32 input1_offset; - int32 input2_offset; - int32 output_offset; + int32_t input1_offset; + int32_t input2_offset; + int32_t output_offset; }; TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params, diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc b/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc index 64e0b22a5f5..834f107dad0 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc @@ -304,7 +304,7 @@ TfLiteStatus EvalQuantizedPerChannel( arm_status status = arm_convolve_wrapper_s8( &ctx, &conv_params, &quant_params, &input_dims, GetTensorData(input), &filter_dims, GetTensorData(filter), - &bias_dims, GetTensorData(bias), &output_dims, + &bias_dims, GetTensorData(bias), &output_dims, GetTensorData(output)); if (status == ARM_MATH_SUCCESS) { @@ -332,10 +332,10 @@ TfLiteStatus EvalQuantizedPerChannel( reference_integer_ops::ConvPerChannel( op_params, data->per_channel_output_multiplier, data->per_channel_output_shift, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + GetTensorData(input), GetTensorShape(filter), + GetTensorData(filter), GetTensorShape(bias), + GetTensorData(bias), GetTensorShape(output), + GetTensorData(output)); #endif return kTfLiteOk; diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc b/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc index 53d2d5692ec..457b3f854de 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc @@ -304,7 +304,7 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, &ctx, &dw_conv_params, &quant_params, &input_dims, GetTensorData(input), &filter_dims, GetTensorData(filter), &bias_dims, - GetTensorData(bias), &output_dims, + GetTensorData(bias), &output_dims, GetTensorData(output)), ARM_MATH_SUCCESS); } else { @@ -327,10 +327,10 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, reference_integer_ops::DepthwiseConvPerChannel( op_params, data->per_channel_output_multiplier, data->per_channel_output_shift, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + GetTensorData(input), GetTensorShape(filter), + GetTensorData(filter), GetTensorShape(bias), + GetTensorData(bias), GetTensorShape(output), + GetTensorData(output)); } } diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc b/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc index 1ea7f98ea1b..074f4a9f251 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc @@ -99,7 +99,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { input->type, input, filter, bias, output, data)); - if (input->type == kTfLiteInt8 && nullptr != GetTensorData(bias)) { + if (input->type == kTfLiteInt8 && nullptr != GetTensorData(bias)) { RuntimeShape filter_shape = GetTensorShape(filter); RuntimeShape output_shape = GetTensorShape(output); @@ -130,7 +130,7 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* bias, TfLiteTensor* output) { // The 'if' condition can be removed when null handling of bias is added to // arm_fully_connected_s8 - if (nullptr != GetTensorData(bias)) { + if (nullptr != GetTensorData(bias)) { RuntimeShape output_shape = GetTensorShape(output); TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2); const int batches = output_shape.Dims(0); @@ -189,7 +189,7 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, arm_fully_connected_s8(&ctx, &fc_params, &quant_params, &input_dims, GetTensorData(input), &filter_dims, GetTensorData(filter), &bias_dims, - GetTensorData(bias), &output_dims, + GetTensorData(bias), &output_dims, GetTensorData(output)), ARM_MATH_SUCCESS); } else { diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc b/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc index b18d1c9b1e8..790af35f217 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc @@ -38,7 +38,8 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context, TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8); if (output->type == kTfLiteInt16) { TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768); - // NOTE: Current int16 softmax output does not require symmetric scaling + // NOTE: Current int16_t softmax output does not require symmetric + // scaling // - so no need to verify scale here. } else { TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8); diff --git a/tensorflow/lite/micro/kernels/comparisons.cc b/tensorflow/lite/micro/kernels/comparisons.cc index ed814527e94..ed7a20086f8 100644 --- a/tensorflow/lite/micro/kernels/comparisons.cc +++ b/tensorflow/lite/micro/kernels/comparisons.cc @@ -626,12 +626,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { auto input2_offset = -input2->params.zero_point; const int kLeftShift = 8; - int32 input1_multiplier; + int32_t input1_multiplier; int input1_shift; QuantizeMultiplierSmallerThanOneExp( static_cast(input1->params.scale), &input1_multiplier, &input1_shift); - int32 input2_multiplier; + int32_t input2_multiplier; int input2_shift; QuantizeMultiplierSmallerThanOneExp( static_cast(input2->params.scale), &input2_multiplier, diff --git a/tensorflow/lite/micro/kernels/concatenation.cc b/tensorflow/lite/micro/kernels/concatenation.cc index fb47349f283..f64362745be 100644 --- a/tensorflow/lite/micro/kernels/concatenation.cc +++ b/tensorflow/lite/micro/kernels/concatenation.cc @@ -122,7 +122,7 @@ void EvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node) { reference_ops::ConcatenationWithScaling( data->params, inputs_shape_ptr, inputs_data, tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); + tflite::micro::GetTensorData(output)); } void* Init(TfLiteContext* context, const char* buffer, size_t length) { diff --git a/tensorflow/lite/micro/kernels/conv.cc b/tensorflow/lite/micro/kernels/conv.cc index ff20cf684d6..b04906a147b 100644 --- a/tensorflow/lite/micro/kernels/conv.cc +++ b/tensorflow/lite/micro/kernels/conv.cc @@ -237,13 +237,13 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, reference_integer_ops::ConvPerChannel( op_params, data.per_channel_output_multiplier, data.per_channel_output_shift, tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), + tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), - tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorData(filter), tflite::micro::GetTensorShape(bias), - tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorData(bias), tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); + tflite::micro::GetTensorData(output)); } void EvalFloat(TfLiteContext* context, TfLiteNode* node, diff --git a/tensorflow/lite/micro/kernels/conv_test.cc b/tensorflow/lite/micro/kernels/conv_test.cc index d73f03e34a1..be646d63659 100644 --- a/tensorflow/lite/micro/kernels/conv_test.cc +++ b/tensorflow/lite/micro/kernels/conv_test.cc @@ -601,7 +601,7 @@ TF_LITE_MICRO_TEST(BroadcastPerLayerQuantizationToPerChannelShouldMatchGolden) { TfLiteIntArray* output_dims = tflite::testing::IntArrayFromInts(tflite::testing::kOutputShape); - // Create per-layer quantized int8 input tensor. + // Create per-layer quantized int8_t input tensor. TfLiteTensor input_tensor = tflite::testing::CreateQuantizedTensor( tflite::testing::kInputData, input_quantized, input_dims, input_scale, 0); int input_zero_points[2] = {1, 0}; @@ -611,7 +611,7 @@ TF_LITE_MICRO_TEST(BroadcastPerLayerQuantizationToPerChannelShouldMatchGolden) { tflite::testing::IntArrayFromInts(input_zero_points), 0}; input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; - // Create per-layer quantized int8 filter tensor. + // Create per-layer quantized int8_t filter tensor. TfLiteTensor filter_tensor = tflite::testing::CreateQuantizedTensor( tflite::testing::kFilterData, filter_quantized, filter_dims, filter_scale, 0); @@ -622,7 +622,7 @@ TF_LITE_MICRO_TEST(BroadcastPerLayerQuantizationToPerChannelShouldMatchGolden) { tflite::testing::IntArrayFromInts(filter_zero_points), 0}; filter_tensor.quantization = {kTfLiteAffineQuantization, &filter_quant}; - // Create per-layer quantized int32 bias tensor. + // Create per-layer quantized int32_t bias tensor. tflite::SymmetricQuantize(tflite::testing::kBiasData, bias_quantized, tflite::testing::kBiasElements, input_scale * output_scale); @@ -636,7 +636,7 @@ TF_LITE_MICRO_TEST(BroadcastPerLayerQuantizationToPerChannelShouldMatchGolden) { tflite::testing::IntArrayFromInts(bias_zero_points), 0}; bias_tensor.quantization = {kTfLiteAffineQuantization, &bias_quant}; - // Create per-layer quantized int8 output tensor. + // Create per-layer quantized int8_t output tensor. TfLiteTensor output_tensor = tflite::testing::CreateQuantizedTensor( output_data, output_dims, output_scale, 0 /* quantized dimension */); int output_zero_points[2] = {1, 0}; @@ -723,7 +723,7 @@ TF_LITE_MICRO_TEST(Int8Input32x1Filter32x32ShouldMatchGolden) { // Output scale of 50 is needed to accomodate a float range of [-6400, 6350] float output_scale = 50.0f; - // Create per-tensor quantized int8 input tensor. + // Create per-tensor quantized int8_t input tensor. int8_t input_quantized[kSampleSize]; TfLiteTensor input_tensor = tflite::testing::CreateQuantizedTensor( input_values, input_quantized, input_dims, input_scale, input_zero_point); @@ -735,7 +735,7 @@ TF_LITE_MICRO_TEST(Int8Input32x1Filter32x32ShouldMatchGolden) { tflite::testing::IntArrayFromInts(input_zero_points), 0}; input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; - // Create per-tensor quantized int8 filter tensor. + // Create per-tensor quantized int8_t filter tensor. int8_t filter_quantized[kNumFilters * kSampleSize]; TfLiteTensor filter_tensor = tflite::testing::CreateQuantizedTensor( filter_values, filter_quantized, filter_dims, filter_scale, @@ -748,7 +748,7 @@ TF_LITE_MICRO_TEST(Int8Input32x1Filter32x32ShouldMatchGolden) { tflite::testing::IntArrayFromInts(filter_zero_points), 0}; filter_tensor.quantization = {kTfLiteAffineQuantization, &filter_quant}; - // Create per-tensor quantized int32 bias tensor. + // Create per-tensor quantized int32_t bias tensor. int32_t bias_quantized[kSampleSize]; tflite::SymmetricQuantize(bias_values, bias_quantized, kSampleSize, input_scale * output_scale); @@ -764,7 +764,7 @@ TF_LITE_MICRO_TEST(Int8Input32x1Filter32x32ShouldMatchGolden) { tflite::testing::IntArrayFromInts(bias_zero_points), 0}; bias_tensor.quantization = {kTfLiteAffineQuantization, &bias_quant}; - // Create per-tensor quantized int8 output tensor. + // Create per-tensor quantized int8_t output tensor. int8_t output_quantized[kSampleSize]; TfLiteTensor output_tensor = tflite::testing::CreateQuantizedTensor( output_quantized, output_dims, output_scale, output_zero_point); diff --git a/tensorflow/lite/micro/kernels/depthwise_conv.cc b/tensorflow/lite/micro/kernels/depthwise_conv.cc index 687537e2c59..2f6083d56c1 100644 --- a/tensorflow/lite/micro/kernels/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/depthwise_conv.cc @@ -123,7 +123,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { int filter_width = SizeOfDimension(filter, 2); int filter_height = SizeOfDimension(filter, 1); - // Per channel quantization is only needed for int8 inference. For other + // Per channel quantization is only needed for int8_t inference. For other // quantized types, only a single scale and zero point is needed. const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; // Dynimically allocate per-channel quantization parameters. @@ -221,13 +221,13 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, reference_integer_ops::DepthwiseConvPerChannel( op_params, data.per_channel_output_multiplier, data.per_channel_output_shift, tflite::micro::GetTensorShape(input), - tflite::micro::GetTensorData(input), + tflite::micro::GetTensorData(input), tflite::micro::GetTensorShape(filter), - tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorData(filter), tflite::micro::GetTensorShape(bias), - tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorData(bias), tflite::micro::GetTensorShape(output), - tflite::micro::GetTensorData(output)); + tflite::micro::GetTensorData(output)); } void EvalQuantized(TfLiteContext* context, TfLiteNode* node, diff --git a/tensorflow/lite/micro/kernels/depthwise_conv_test.cc b/tensorflow/lite/micro/kernels/depthwise_conv_test.cc index 5e35d54dcb1..e16e9f893cb 100644 --- a/tensorflow/lite/micro/kernels/depthwise_conv_test.cc +++ b/tensorflow/lite/micro/kernels/depthwise_conv_test.cc @@ -787,7 +787,7 @@ TF_LITE_MICRO_TEST(PerChannelBroadcastQuantizationParams) { TfLiteIntArray* bias_dims = tflite::testing::IntArrayFromInts(bias_shape); TfLiteIntArray* output_dims = tflite::testing::IntArrayFromInts(output_shape); - // Create per-layer quantized int8 input tensor. + // Create per-layer quantized int8_t input tensor. TfLiteTensor input_tensor = tflite::testing::CreateQuantizedTensor( input_values, input_quantized, input_dims, input_scale, 0); int input_zero_points[2] = {1, 0}; @@ -797,7 +797,7 @@ TF_LITE_MICRO_TEST(PerChannelBroadcastQuantizationParams) { tflite::testing::IntArrayFromInts(input_zero_points), 0}; input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; - // Create per-layer quantized int8 filter tensor. + // Create per-layer quantized int8_t filter tensor. TfLiteTensor filter_tensor = tflite::testing::CreateQuantizedTensor( filter_values, filter_quantized, filter_dims, filter_scale, 0); int filter_zero_points[2] = {1, 0}; @@ -807,7 +807,7 @@ TF_LITE_MICRO_TEST(PerChannelBroadcastQuantizationParams) { tflite::testing::IntArrayFromInts(filter_zero_points), 0}; filter_tensor.quantization = {kTfLiteAffineQuantization, &filter_quant}; - // Create per-layer quantized int32 bias tensor. + // Create per-layer quantized int32_t bias tensor. tflite::SymmetricQuantize(bias_values, bias_quantized, bias_elements, input_scale * output_scale); TfLiteTensor bias_tensor = @@ -820,7 +820,7 @@ TF_LITE_MICRO_TEST(PerChannelBroadcastQuantizationParams) { tflite::testing::IntArrayFromInts(bias_zero_points), 0}; bias_tensor.quantization = {kTfLiteAffineQuantization, &bias_quant}; - // Create per-layer quantized int8 output tensor. + // Create per-layer quantized int8_t output tensor. TfLiteTensor output_tensor = tflite::testing::CreateQuantizedTensor( output_data, output_dims, output_scale, 0); int output_zero_points[2] = {1, 0}; @@ -922,7 +922,7 @@ TF_LITE_MICRO_TEST(Int8Input32x4Filter32x4ShouldMatchGolden) { TfLiteIntArray* bias_dims = tflite::testing::IntArrayFromInts(bias_shape); TfLiteIntArray* output_dims = tflite::testing::IntArrayFromInts(output_shape); - // Create per-tensor quantized int8 input tensor. + // Create per-tensor quantized int8_t input tensor. int8_t input_quantized[input_elements]; TfLiteTensor input_tensor = tflite::testing::CreateQuantizedTensor( input_values, input_quantized, input_dims, input_scale, input_zero_point); @@ -935,7 +935,7 @@ TF_LITE_MICRO_TEST(Int8Input32x4Filter32x4ShouldMatchGolden) { tflite::testing::IntArrayFromInts(input_zero_points), 0}; input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; - // Create per-tensor quantized int8 filter tensor. + // Create per-tensor quantized int8_t filter tensor. int8_t filter_quantized[filter_elements]; TfLiteTensor filter_tensor = tflite::testing::CreateQuantizedTensor( filter_values, filter_quantized, filter_dims, filter_scale, 0); @@ -948,7 +948,7 @@ TF_LITE_MICRO_TEST(Int8Input32x4Filter32x4ShouldMatchGolden) { tflite::testing::IntArrayFromInts(filter_zero_points), 0}; filter_tensor.quantization = {kTfLiteAffineQuantization, &filter_quant}; - // Create per-tensor quantized int32 bias tensor. + // Create per-tensor quantized int32_t bias tensor. int32_t bias_quantized[bias_elements]; // See https://www.tensorflow.org/lite/performance/quantization_spec for a // detailed explanation of why bias scale is input_scale * filter_scale. @@ -965,7 +965,7 @@ TF_LITE_MICRO_TEST(Int8Input32x4Filter32x4ShouldMatchGolden) { tflite::testing::IntArrayFromInts(bias_zero_points), 0}; bias_tensor.quantization = {kTfLiteAffineQuantization, &bias_quant}; - // Create per-tensor quantized int8 output tensor. + // Create per-tensor quantized int8_t output tensor. int8_t output_quantized[output_elements]; TfLiteTensor output_tensor = tflite::testing::CreateQuantizedTensor( output_quantized, output_dims, output_scale, output_zero_point); diff --git a/tensorflow/lite/micro/kernels/hard_swish.cc b/tensorflow/lite/micro/kernels/hard_swish.cc index fecb8bda409..3e8ecca7cc3 100644 --- a/tensorflow/lite/micro/kernels/hard_swish.cc +++ b/tensorflow/lite/micro/kernels/hard_swish.cc @@ -104,7 +104,8 @@ TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) { } break; default: { TF_LITE_KERNEL_LOG( - context, "Only float32/int8/uint8 are supported currently, got %s", + context, + "Only float32/int8_t/uint8_t are supported currently, got %s", TfLiteTypeGetName(input->type)); return kTfLiteError; } diff --git a/tensorflow/lite/micro/kernels/l2norm.cc b/tensorflow/lite/micro/kernels/l2norm.cc index 16a982344e1..ab4067058a4 100644 --- a/tensorflow/lite/micro/kernels/l2norm.cc +++ b/tensorflow/lite/micro/kernels/l2norm.cc @@ -97,12 +97,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TF_LITE_L2NORM(reference_ops); #undef TF_LITE_L2NORM } else if (output->type == kTfLiteUInt8) { -#define TF_LITE_L2NORM(type) \ - tflite::L2NormalizationParams op_params; \ - op_params.input_zero_point = input->params.zero_point; \ - type::L2Normalization(op_params, GetTensorShape(input), \ - GetTensorData(input), GetTensorShape(output), \ - GetTensorData(output)) +#define TF_LITE_L2NORM(type) \ + tflite::L2NormalizationParams op_params; \ + op_params.input_zero_point = input->params.zero_point; \ + type::L2Normalization(op_params, GetTensorShape(input), \ + GetTensorData(input), GetTensorShape(output), \ + GetTensorData(output)) TF_LITE_L2NORM(reference_ops); #undef TF_LITE_L2NORM @@ -115,8 +115,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); reference_integer_ops::L2Normalization(input->params.zero_point, outer_size, - depth, GetTensorData(input), - GetTensorData(output)); + depth, GetTensorData(input), + GetTensorData(output)); } else { TF_LITE_KERNEL_LOG(context, "Output type is %s, requires float.", TfLiteTypeGetName(output->type)); diff --git a/tensorflow/lite/micro/kernels/l2norm_test.cc b/tensorflow/lite/micro/kernels/l2norm_test.cc index 39eb92a8849..89029bb260a 100644 --- a/tensorflow/lite/micro/kernels/l2norm_test.cc +++ b/tensorflow/lite/micro/kernels/l2norm_test.cc @@ -23,7 +23,7 @@ namespace tflite { namespace testing { namespace { -// used to set the quantization parameters for the int8 and uint8 tests +// used to set the quantization parameters for the int8_t and uint8_t tests constexpr float kInputMin = -2.0; constexpr float kInputMax = 2.0; constexpr float kOutputMin = -1.0; @@ -50,7 +50,7 @@ TfLiteTensor CreateL2NormTensor(const float* data, TfLiteIntArray* dims, return CreateFloatTensor(data, dims); } -TfLiteTensor CreateL2NormTensor(const uint8* data, TfLiteIntArray* dims, +TfLiteTensor CreateL2NormTensor(const uint8_t* data, TfLiteIntArray* dims, bool is_input) { TfLiteTensor tensor; @@ -64,7 +64,7 @@ TfLiteTensor CreateL2NormTensor(const uint8* data, TfLiteIntArray* dims, return tensor; } -TfLiteTensor CreateL2NormTensor(const int8* data, TfLiteIntArray* dims, +TfLiteTensor CreateL2NormTensor(const int8_t* data, TfLiteIntArray* dims, bool is_input) { TfLiteTensor tensor; diff --git a/tensorflow/lite/micro/kernels/pad.cc b/tensorflow/lite/micro/kernels/pad.cc index 7ac39943c5c..b0ddcfda0de 100644 --- a/tensorflow/lite/micro/kernels/pad.cc +++ b/tensorflow/lite/micro/kernels/pad.cc @@ -50,7 +50,7 @@ struct PadContext { resizing_category = ResizingCategory::kGenericResize; const int paddings_total = GetTensorShape(paddings).FlatSize(); - const int32* paddings_data = GetTensorData(paddings); + const int32_t* paddings_data = GetTensorData(paddings); // Paddings will be a n,2 array, and we need to detect 4D arrays with the // pattern { {0,0}, {a, b}, {c, d}, {0,0} }. if (IsConstantTensor(paddings) && paddings_total == 8 && @@ -83,7 +83,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { op_context.output->dims->size * 2); // On Micro, outputs must be properly sized by the converter. - const int32* paddings_data = GetTensorData(op_context.paddings); + const int32_t* paddings_data = GetTensorData(op_context.paddings); for (int i = 0; i < op_context.output->dims->size; i++) { int output_dim = op_context.output->dims->data[i]; int expected_dim = op_context.input->dims->data[i] + paddings_data[i * 2] + @@ -107,7 +107,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // Create before and after padding arrays that are accepted by the kernel. - const int32* paddings_data = GetTensorData(op_context.paddings); + const int32_t* paddings_data = GetTensorData(op_context.paddings); tflite::PadParams op_params; memset(&op_params, 0, sizeof(PadParams)); diff --git a/tensorflow/lite/micro/kernels/pooling_test.cc b/tensorflow/lite/micro/kernels/pooling_test.cc index 23d4b506d8e..73d5d80de7c 100644 --- a/tensorflow/lite/micro/kernels/pooling_test.cc +++ b/tensorflow/lite/micro/kernels/pooling_test.cc @@ -105,7 +105,7 @@ void TestAveragePoolingQuantized( std::initializer_list output_dims_data, float output_min, float output_max, TfLitePadding padding, TfLiteFusedActivation activation, T* output_data) { - static_assert(sizeof(T) == 1, "Only int8/uint8 data types allowed."); + static_assert(sizeof(T) == 1, "Only int8_t/uint8_t data types allowed."); TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); @@ -246,7 +246,7 @@ void TestMaxPoolQuantized(std::initializer_list input_dims_data, std::initializer_list output_dims_data, TfLitePadding padding, TfLiteFusedActivation activation, T* output_data) { - static_assert(sizeof(T) == 1, "Only int8/uint8 data types allowed."); + static_assert(sizeof(T) == 1, "Only int8_t/uint8_t data types allowed."); TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); diff --git a/tensorflow/lite/micro/kernels/prelu.cc b/tensorflow/lite/micro/kernels/prelu.cc index d1d8f977850..3adb63312af 100644 --- a/tensorflow/lite/micro/kernels/prelu.cc +++ b/tensorflow/lite/micro/kernels/prelu.cc @@ -120,7 +120,7 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) { } break; default: TF_LITE_KERNEL_LOG( - context, "Only float32 and uint8 are supported currently, got %d.", + context, "Only float32 and uint8_t are supported currently, got %d.", TfLiteTypeGetName(input->type)); return kTfLiteError; } diff --git a/tensorflow/lite/micro/kernels/quantization_util_test.cc b/tensorflow/lite/micro/kernels/quantization_util_test.cc index 5929f5fd7b5..76ee9eefb7e 100644 --- a/tensorflow/lite/micro/kernels/quantization_util_test.cc +++ b/tensorflow/lite/micro/kernels/quantization_util_test.cc @@ -203,7 +203,7 @@ TF_LITE_MICRO_TEST(QuantizationUtilTest_SafeCast) { // 128 | 10.0 TF_LITE_MICRO_TEST(QuantizationUtilTest_ChooseQuantizationParams) { tflite::QuantizationParams qp = - tflite::ChooseQuantizationParams(-10.0, 30.0); + tflite::ChooseQuantizationParams(-10.0, 30.0); TF_LITE_MICRO_EXPECT_NEAR(qp.scale, 0.156863, 1e-5); TF_LITE_MICRO_EXPECT_EQ(qp.zero_point, 64); } @@ -211,7 +211,7 @@ TF_LITE_MICRO_TEST(QuantizationUtilTest_ChooseQuantizationParams) { TF_LITE_MICRO_TEST( QuantizationUtilTest_ChooseQuantizationParamsZeroPointOnMinBoundary) { tflite::QuantizationParams qp = - tflite::ChooseQuantizationParams(0.0, 30.0); + tflite::ChooseQuantizationParams(0.0, 30.0); TF_LITE_MICRO_EXPECT_NEAR(qp.scale, 0.117647, 1e-5); TF_LITE_MICRO_EXPECT_EQ(qp.zero_point, 0); } @@ -219,7 +219,7 @@ TF_LITE_MICRO_TEST( TF_LITE_MICRO_TEST( QuantizationUtilTest_ChooseQuantizationParamsEmptyRangeZero) { tflite::QuantizationParams qp = - tflite::ChooseQuantizationParams(0.0, 0.0); + tflite::ChooseQuantizationParams(0.0, 0.0); TF_LITE_MICRO_EXPECT_NEAR(qp.scale, 0.0, 1e-5); TF_LITE_MICRO_EXPECT_EQ(qp.zero_point, 0); } @@ -227,7 +227,7 @@ TF_LITE_MICRO_TEST( TF_LITE_MICRO_TEST( QuantizationUtilTest_ChooseQuantizationParamsZeroPointOnMaxBoundary) { tflite::QuantizationParams qp = - tflite::ChooseQuantizationParams(-10.0, 0.0); + tflite::ChooseQuantizationParams(-10.0, 0.0); TF_LITE_MICRO_EXPECT_NEAR(qp.scale, 0.039216, 1e-5); TF_LITE_MICRO_EXPECT_EQ(qp.zero_point, 255); } @@ -418,11 +418,11 @@ TF_LITE_MICRO_TEST(QuantizationUtilTest_QuantizeMultiplierArray) { 0.125, 0.25, 0.5, 1, 2, 4}; const int size = 13; - int32 effective_scale_significand[size]; + int32_t effective_scale_significand[size]; int effective_scale_shift[size]; tflite::QuantizeMultiplierArray(weights, size, effective_scale_significand, effective_scale_shift); - const int32 expected_effective_scale_significand[] = { + const int32_t expected_effective_scale_significand[] = { -1073741824, // float scale = -4 -1073741824, // float scale = -2 -1073741824, // float scale = -1 diff --git a/tensorflow/lite/micro/kernels/quantize.cc b/tensorflow/lite/micro/kernels/quantize.cc index 2817697919f..309d2b59b7d 100644 --- a/tensorflow/lite/micro/kernels/quantize.cc +++ b/tensorflow/lite/micro/kernels/quantize.cc @@ -152,7 +152,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // This Op (QUANTIZE) quantizes the input and produces quantized output. // AffineQuantize takes scale and zero point and quantizes the float value to -// quantized output, in int8 or uint8 format. +// quantized output, in int8_t or uint8_t format. TfLiteRegistration Register_QUANTIZE() { return {/*init=*/quantize::Init, /*free=*/nullptr, diff --git a/tensorflow/lite/micro/kernels/quantize_test.cc b/tensorflow/lite/micro/kernels/quantize_test.cc index b6f885d09e7..2e76fc566af 100644 --- a/tensorflow/lite/micro/kernels/quantize_test.cc +++ b/tensorflow/lite/micro/kernels/quantize_test.cc @@ -32,7 +32,7 @@ void ValidateQuantizeGoldens(TfLiteTensor* tensors, int tensors_size, TfLiteContext context; PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - // Version 1 of quantize supports int8 and uint8 quantization. + // Version 1 of quantize supports int8_t and uint8_t quantization. ::tflite::AllOpsResolver resolver; const TfLiteRegistration* registration = resolver.FindOp(tflite::BuiltinOperator_QUANTIZE); diff --git a/tensorflow/lite/micro/kernels/reduce.cc b/tensorflow/lite/micro/kernels/reduce.cc index 464b7faafad..8d0dbe1ad34 100644 --- a/tensorflow/lite/micro/kernels/reduce.cc +++ b/tensorflow/lite/micro/kernels/reduce.cc @@ -50,7 +50,7 @@ TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) { TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_OK(context, PrepareSimple(context, node)); - // TODO(b/144955155): Support uint8(b/144955155) and int8(b/144955018) + // TODO(b/144955155): Support uint8_t(b/144955155) and int8_t(b/144955018) return kTfLiteOk; } @@ -58,7 +58,7 @@ void ResolveAxis(const int* axis_data, int axis_count, tflite::MeanParams* op_params) { int i = 0; for (; i < axis_count; ++i) { - op_params->axis[i] = static_cast(axis_data[i]); + op_params->axis[i] = static_cast(axis_data[i]); } for (; i < 4; ++i) { op_params->axis[i] = 1; @@ -110,7 +110,7 @@ TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) { } } break; default: - // TODO(b/144955155): Support uint8(b/144955155) and int8(b/144955018) + // TODO(b/144955155): Support uint8_t(b/144955155) and int8_t(b/144955018) TF_LITE_ENSURE_MSG(context, false, "Currently, only float32 input type " "is supported."); diff --git a/tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc b/tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc index dc39bfeebf0..38df726cada 100644 --- a/tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc +++ b/tensorflow/lite/micro/kernels/resize_nearest_neighbor.cc @@ -71,22 +71,22 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { if (output->type == kTfLiteFloat32) { reference_ops::ResizeNearestNeighbor( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(size), GetTensorData(size), - GetTensorShape(output), GetTensorData(output)); + op_params, GetTensorShape(input), GetTensorData(input), + GetTensorShape(size), GetTensorData(size), + GetTensorShape(output), GetTensorData(output)); } else if (output->type == kTfLiteUInt8) { reference_ops::ResizeNearestNeighbor( op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(size), GetTensorData(size), + GetTensorShape(size), GetTensorData(size), GetTensorShape(output), GetTensorData(output)); } else if (output->type == kTfLiteInt8) { reference_ops::ResizeNearestNeighbor( op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(size), GetTensorData(size), + GetTensorShape(size), GetTensorData(size), GetTensorShape(output), GetTensorData(output)); } else { TF_LITE_KERNEL_LOG(context, - "Output type is %d, requires float, uint8 or int8.", + "Output type is %d, requires float, uint8_t or int8_t.", output->type); return kTfLiteError; } diff --git a/tensorflow/lite/micro/kernels/resize_nearest_neighbor_test.cc b/tensorflow/lite/micro/kernels/resize_nearest_neighbor_test.cc index 1c2c22645e6..cbc68bbc9e1 100644 --- a/tensorflow/lite/micro/kernels/resize_nearest_neighbor_test.cc +++ b/tensorflow/lite/micro/kernels/resize_nearest_neighbor_test.cc @@ -22,18 +22,18 @@ namespace tflite { namespace testing { namespace { -using uint8 = std::uint8_t; -using int32 = std::int32_t; +using uint8_t = std::uint8_t; +using int32_t = std::int32_t; TfLiteTensor TestCreateTensor(const float* data, TfLiteIntArray* dims) { return CreateFloatTensor(data, dims); } -TfLiteTensor TestCreateTensor(const uint8* data, TfLiteIntArray* dims) { +TfLiteTensor TestCreateTensor(const uint8_t* data, TfLiteIntArray* dims) { return CreateQuantizedTensor(data, dims, 0, 255); } -TfLiteTensor TestCreateTensor(const int8* data, TfLiteIntArray* dims) { +TfLiteTensor TestCreateTensor(const int8_t* data, TfLiteIntArray* dims) { return CreateQuantizedTensor(data, dims, -128, 127); } @@ -42,7 +42,7 @@ TfLiteTensor TestCreateTensor(const int8* data, TfLiteIntArray* dims) { // Expected sizes should be a 1-D tensor with 2 elements: new_height & new_width template void TestResizeNearestNeighbor(const int* input_dims_data, const T* input_data, - const int32* expected_size_data, + const int32_t* expected_size_data, const T* expected_output_data, const int* output_dims_data, T* output_data) { TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); @@ -101,7 +101,7 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(HorizontalResize) { const int input_dims[] = {4, 1, 1, 2, 1}; const float input_data[] = {3, 6}; - const int32 expected_size_data[] = {1, 3}; + const int32_t expected_size_data[] = {1, 3}; const float expected_output_data[] = {3, 3, 6}; const int output_dims[] = {4, 1, 1, 3, 1}; float output_data[3]; @@ -112,32 +112,32 @@ TF_LITE_MICRO_TEST(HorizontalResize) { } TF_LITE_MICRO_TEST(HorizontalResizeUInt8) { const int input_dims[] = {4, 1, 1, 2, 1}; - const uint8 input_data[] = {3, 6}; - const int32 expected_size_data[] = {1, 3}; - const uint8 expected_output_data[] = {3, 3, 6}; + const uint8_t input_data[] = {3, 6}; + const int32_t expected_size_data[] = {1, 3}; + const uint8_t expected_output_data[] = {3, 3, 6}; const int output_dims[] = {4, 1, 1, 3, 1}; - uint8 output_data[3]; + uint8_t output_data[3]; - tflite::testing::TestResizeNearestNeighbor( + tflite::testing::TestResizeNearestNeighbor( input_dims, input_data, expected_size_data, expected_output_data, output_dims, output_data); } TF_LITE_MICRO_TEST(HorizontalResizeInt8) { const int input_dims[] = {4, 1, 1, 2, 1}; - const int8 input_data[] = {-3, 6}; - const int32 expected_size_data[] = {1, 3}; - const int8 expected_output_data[] = {-3, -3, 6}; + const int8_t input_data[] = {-3, 6}; + const int32_t expected_size_data[] = {1, 3}; + const int8_t expected_output_data[] = {-3, -3, 6}; const int output_dims[] = {4, 1, 1, 3, 1}; - int8 output_data[3]; + int8_t output_data[3]; - tflite::testing::TestResizeNearestNeighbor( + tflite::testing::TestResizeNearestNeighbor( input_dims, input_data, expected_size_data, expected_output_data, output_dims, output_data); } TF_LITE_MICRO_TEST(VerticalResize) { const int input_dims[] = {4, 1, 2, 1, 1}; const float input_data[] = {3, 9}; - const int32 expected_size_data[] = {3, 1}; + const int32_t expected_size_data[] = {3, 1}; const float expected_output_data[] = {3, 3, 9}; const int output_dims[] = {4, 1, 3, 1, 1}; float output_data[3]; @@ -148,25 +148,25 @@ TF_LITE_MICRO_TEST(VerticalResize) { } TF_LITE_MICRO_TEST(VerticalResizeUInt8) { const int input_dims[] = {4, 1, 2, 1, 1}; - const uint8 input_data[] = {3, 9}; - const int32 expected_size_data[] = {3, 1}; - const uint8 expected_output_data[] = {3, 3, 9}; + const uint8_t input_data[] = {3, 9}; + const int32_t expected_size_data[] = {3, 1}; + const uint8_t expected_output_data[] = {3, 3, 9}; const int output_dims[] = {4, 1, 3, 1, 1}; - uint8 output_data[3]; + uint8_t output_data[3]; - tflite::testing::TestResizeNearestNeighbor( + tflite::testing::TestResizeNearestNeighbor( input_dims, input_data, expected_size_data, expected_output_data, output_dims, output_data); } TF_LITE_MICRO_TEST(VerticalResizeInt8) { const int input_dims[] = {4, 1, 2, 1, 1}; - const int8 input_data[] = {3, -9}; - const int32 expected_size_data[] = {3, 1}; - const int8 expected_output_data[] = {3, 3, -9}; + const int8_t input_data[] = {3, -9}; + const int32_t expected_size_data[] = {3, 1}; + const int8_t expected_output_data[] = {3, 3, -9}; const int output_dims[] = {4, 1, 3, 1, 1}; - int8 output_data[3]; + int8_t output_data[3]; - tflite::testing::TestResizeNearestNeighbor( + tflite::testing::TestResizeNearestNeighbor( input_dims, input_data, expected_size_data, expected_output_data, output_dims, output_data); } @@ -176,7 +176,7 @@ TF_LITE_MICRO_TEST(TwoDimensionalResize) { 3, 6, // 9, 12, // }; - const int32 expected_size_data[] = {3, 3}; + const int32_t expected_size_data[] = {3, 3}; const float expected_output_data[] = { 3, 3, 6, // 3, 3, 6, // @@ -192,39 +192,39 @@ TF_LITE_MICRO_TEST(TwoDimensionalResize) { } TF_LITE_MICRO_TEST(TwoDimensionalResizeUInt8) { const int input_dims[] = {4, 1, 2, 2, 1}; - const uint8 input_data[] = { + const uint8_t input_data[] = { 3, 6, // 9, 12 // }; - const int32 expected_size_data[] = {3, 3}; - const uint8 expected_output_data[] = { + const int32_t expected_size_data[] = {3, 3}; + const uint8_t expected_output_data[] = { 3, 3, 6, // 3, 3, 6, // 9, 9, 12 // }; const int output_dims[] = {4, 1, 3, 3, 1}; - uint8 output_data[9]; + uint8_t output_data[9]; - tflite::testing::TestResizeNearestNeighbor( + tflite::testing::TestResizeNearestNeighbor( input_dims, input_data, expected_size_data, expected_output_data, output_dims, output_data); } TF_LITE_MICRO_TEST(TwoDimensionalResizeInt8) { const int input_dims[] = {4, 1, 2, 2, 1}; - const int8 input_data[] = { + const int8_t input_data[] = { 3, -6, // 9, 12, // }; - const int32 expected_size_data[] = {3, 3}; - const int8 expected_output_data[] = { + const int32_t expected_size_data[] = {3, 3}; + const int8_t expected_output_data[] = { 3, 3, -6, // 3, 3, -6, // 9, 9, 12, // }; const int output_dims[] = {4, 1, 3, 3, 1}; - int8 output_data[9]; + int8_t output_data[9]; - tflite::testing::TestResizeNearestNeighbor( + tflite::testing::TestResizeNearestNeighbor( input_dims, input_data, expected_size_data, expected_output_data, output_dims, output_data); } @@ -236,7 +236,7 @@ TF_LITE_MICRO_TEST(TwoDimensionalResizeWithTwoBatches) { 4, 10, // 10, 16 // }; - const int32 expected_size_data[] = {3, 3}; + const int32_t expected_size_data[] = {3, 3}; const float expected_output_data[] = { 3, 3, 6, // 3, 3, 6, // @@ -254,14 +254,14 @@ TF_LITE_MICRO_TEST(TwoDimensionalResizeWithTwoBatches) { } TF_LITE_MICRO_TEST(TwoDimensionalResizeWithTwoBatchesUInt8) { const int input_dims[] = {4, 2, 2, 2, 1}; - const uint8 input_data[] = { + const uint8_t input_data[] = { 3, 6, // 9, 12, // 4, 10, // 10, 16 // }; - const int32 expected_size_data[] = {3, 3}; - const uint8 expected_output_data[] = { + const int32_t expected_size_data[] = {3, 3}; + const uint8_t expected_output_data[] = { 3, 3, 6, // 3, 3, 6, // 9, 9, 12, // @@ -270,22 +270,22 @@ TF_LITE_MICRO_TEST(TwoDimensionalResizeWithTwoBatchesUInt8) { 10, 10, 16, // }; const int output_dims[] = {4, 2, 3, 3, 1}; - uint8 output_data[18]; + uint8_t output_data[18]; - tflite::testing::TestResizeNearestNeighbor( + tflite::testing::TestResizeNearestNeighbor( input_dims, input_data, expected_size_data, expected_output_data, output_dims, output_data); } TF_LITE_MICRO_TEST(TwoDimensionalResizeWithTwoBatchesInt8) { const int input_dims[] = {4, 2, 2, 2, 1}; - const int8 input_data[] = { + const int8_t input_data[] = { 3, 6, // 9, -12, // -4, 10, // 10, 16 // }; - const int32 expected_size_data[] = {3, 3}; - const int8 expected_output_data[] = { + const int32_t expected_size_data[] = {3, 3}; + const int8_t expected_output_data[] = { 3, 3, 6, // 3, 3, 6, // 9, 9, -12, // @@ -294,9 +294,9 @@ TF_LITE_MICRO_TEST(TwoDimensionalResizeWithTwoBatchesInt8) { 10, 10, 16, // }; const int output_dims[] = {4, 2, 3, 3, 1}; - int8 output_data[18]; + int8_t output_data[18]; - tflite::testing::TestResizeNearestNeighbor( + tflite::testing::TestResizeNearestNeighbor( input_dims, input_data, expected_size_data, expected_output_data, output_dims, output_data); } @@ -306,7 +306,7 @@ TF_LITE_MICRO_TEST(ThreeDimensionalResize) { 3, 4, 6, 10, // 9, 10, 12, 16, // }; - const int32 expected_size_data[] = {3, 3}; + const int32_t expected_size_data[] = {3, 3}; const float expected_output_data[] = { 3, 4, 3, 4, 6, 10, // 3, 4, 3, 4, 6, 10, // @@ -321,39 +321,39 @@ TF_LITE_MICRO_TEST(ThreeDimensionalResize) { } TF_LITE_MICRO_TEST(ThreeDimensionalResizeUInt8) { const int input_dims[] = {4, 1, 2, 2, 2}; - const uint8 input_data[] = { + const uint8_t input_data[] = { 3, 4, 6, 10, // 10, 12, 14, 16, // }; - const int32 expected_size_data[] = {3, 3}; - const uint8 expected_output_data[] = { + const int32_t expected_size_data[] = {3, 3}; + const uint8_t expected_output_data[] = { 3, 4, 3, 4, 6, 10, // 3, 4, 3, 4, 6, 10, // 10, 12, 10, 12, 14, 16, // }; const int output_dims[] = {4, 1, 3, 3, 2}; - uint8 output_data[18]; + uint8_t output_data[18]; - tflite::testing::TestResizeNearestNeighbor( + tflite::testing::TestResizeNearestNeighbor( input_dims, input_data, expected_size_data, expected_output_data, output_dims, output_data); } TF_LITE_MICRO_TEST(ThreeDimensionalResizeInt8) { const int input_dims[] = {4, 1, 2, 2, 2}; - const int8 input_data[] = { + const int8_t input_data[] = { 3, 4, -6, 10, // 10, 12, -14, 16, // }; - const int32 expected_size_data[] = {3, 3}; - const int8 expected_output_data[] = { + const int32_t expected_size_data[] = {3, 3}; + const int8_t expected_output_data[] = { 3, 4, 3, 4, -6, 10, // 3, 4, 3, 4, -6, 10, // 10, 12, 10, 12, -14, 16, // }; const int output_dims[] = {4, 1, 3, 3, 2}; - int8 output_data[18]; + int8_t output_data[18]; - tflite::testing::TestResizeNearestNeighbor( + tflite::testing::TestResizeNearestNeighbor( input_dims, input_data, expected_size_data, expected_output_data, output_dims, output_data); } diff --git a/tensorflow/lite/micro/kernels/softmax.cc b/tensorflow/lite/micro/kernels/softmax.cc index 881efdae3e1..e806fe9ae29 100644 --- a/tensorflow/lite/micro/kernels/softmax.cc +++ b/tensorflow/lite/micro/kernels/softmax.cc @@ -42,7 +42,8 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context, TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8); if (output->type == kTfLiteInt16) { TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768); - // NOTE: Current int16 softmax output does not require symmetric scaling + // NOTE: Current int16_t softmax output does not require symmetric + // scaling // - so no need to verify scale here. } else { TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8); diff --git a/tensorflow/lite/micro/kernels/sub.cc b/tensorflow/lite/micro/kernels/sub.cc index ddc03d81856..6c3dc5f917b 100644 --- a/tensorflow/lite/micro/kernels/sub.cc +++ b/tensorflow/lite/micro/kernels/sub.cc @@ -40,18 +40,18 @@ struct OpData { // and the special 16-bit -> 16bit quantized path int input1_shift; int input2_shift; - int32 output_activation_min; - int32 output_activation_max; + int32_t output_activation_min; + int32_t output_activation_max; // These fields are used only in the general 8-bit -> 8bit quantized path - int32 input1_multiplier; - int32 input2_multiplier; - int32 output_multiplier; + int32_t input1_multiplier; + int32_t input2_multiplier; + int32_t output_multiplier; int output_shift; int left_shift; - int32 input1_offset; - int32 input2_offset; - int32 output_offset; + int32_t input1_offset; + int32_t input2_offset; + int32_t output_offset; }; TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteSubParams* params, diff --git a/tensorflow/lite/micro/kernels/svdf.cc b/tensorflow/lite/micro/kernels/svdf.cc index c0bae4acc48..c3adb4d3782 100644 --- a/tensorflow/lite/micro/kernels/svdf.cc +++ b/tensorflow/lite/micro/kernels/svdf.cc @@ -32,8 +32,8 @@ namespace svdf { namespace { struct OpData { - int32 effective_scale_1_a; - int32 effective_scale_2_a; + int32_t effective_scale_1_a; + int32_t effective_scale_2_a; // b versions of each scale are kept at int since the numbers are just the // shift value - typically between [-32, 32]. int effective_scale_1_b; @@ -377,7 +377,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2); // Validate Tensor Output: - // [0] = float/int8, {2, batch_size, num_units} + // [0] = float/int8_t, {2, batch_size, num_units} TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2); diff --git a/tensorflow/lite/micro/kernels/xtensa_hifi/add.cc b/tensorflow/lite/micro/kernels/xtensa_hifi/add.cc index 0e911762981..90590ab0632 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifi/add.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifi/add.cc @@ -42,18 +42,18 @@ struct OpData { // and the special 16-bit -> 16bit quantized path int input1_shift; int input2_shift; - int32 output_activation_min; - int32 output_activation_max; + int32_t output_activation_min; + int32_t output_activation_max; // These fields are used only in the general 8-bit -> 8bit quantized path - int32 input1_multiplier; - int32 input2_multiplier; - int32 output_multiplier; + int32_t input1_multiplier; + int32_t input2_multiplier; + int32_t output_multiplier; int output_shift; int left_shift; - int32 input1_offset; - int32 input2_offset; - int32 output_offset; + int32_t input1_offset; + int32_t input2_offset; + int32_t output_offset; }; TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params, diff --git a/tensorflow/lite/micro/kernels/xtensa_hifi/conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifi/conv.cc index ca968f8ab1c..2de3345bcbf 100755 --- a/tensorflow/lite/micro/kernels/xtensa_hifi/conv.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifi/conv.cc @@ -219,9 +219,9 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, const int stride_height = params->stride_height; const int pad_width = data.padding.width; const int pad_height = data.padding.height; - const int32 output_activation_min = data.output_activation_min; - const int32 output_activation_max = data.output_activation_max; - const int32 output_multiplier = data.output_multiplier; + const int32_t output_activation_min = data.output_activation_min; + const int32_t output_activation_max = data.output_activation_max; + const int32_t output_multiplier = data.output_multiplier; const int output_shift = -data.output_shift; TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); @@ -362,10 +362,10 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, reference_integer_ops::ConvPerChannel( op_params, data.per_channel_output_multiplier, data.per_channel_output_shift, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + GetTensorData(input), GetTensorShape(filter), + GetTensorData(filter), GetTensorShape(bias), + GetTensorData(bias), GetTensorShape(output), + GetTensorData(output)); } TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, diff --git a/tensorflow/lite/micro/kernels/xtensa_hifi/depthwise_conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifi/depthwise_conv.cc index 0c5b484229b..2dd11ed060f 100755 --- a/tensorflow/lite/micro/kernels/xtensa_hifi/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifi/depthwise_conv.cc @@ -142,7 +142,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { int filter_width = SizeOfDimension(filter, 2); int filter_height = SizeOfDimension(filter, 1); - // Per channel quantization is only needed for int8 inference. For other + // Per channel quantization is only needed for int8_t inference. For other // quantized types, only a single scale and zero point is needed. const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; // Dynimically allocate per-channel quantization parameters. @@ -335,10 +335,10 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, reference_integer_ops::DepthwiseConvPerChannel( op_params, data->per_channel_output_multiplier, data->per_channel_output_shift, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + GetTensorData(input), GetTensorShape(filter), + GetTensorData(filter), GetTensorShape(bias), + GetTensorData(bias), GetTensorShape(output), + GetTensorData(output)); } TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, @@ -370,9 +370,9 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, const int pad_width = data->padding.width; const int pad_height = data->padding.height; const int depth_multiplier = params->depth_multiplier; - const int32 output_activation_min = data->output_activation_min; - const int32 output_activation_max = data->output_activation_max; - const int32 output_multiplier = data->output_multiplier; + const int32_t output_activation_min = data->output_activation_min; + const int32_t output_activation_max = data->output_activation_max; + const int32_t output_multiplier = data->output_multiplier; // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. const int output_shift = -data->output_shift; TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); diff --git a/tensorflow/lite/micro/kernels/xtensa_hifi/pooling.cc b/tensorflow/lite/micro/kernels/xtensa_hifi/pooling.cc index 0e6f0d0ab30..ccb3c11844f 100755 --- a/tensorflow/lite/micro/kernels/xtensa_hifi/pooling.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifi/pooling.cc @@ -148,7 +148,7 @@ TfLiteStatus AverageEvalFloat(TfLiteContext* context, const TfLiteNode* node, } out_length = batches * output_height * output_width * depth; - uint32 p_unalign_val = (uint32)out_data_ptr, p_align_val; + uint32_t p_unalign_val = (uint32_t)out_data_ptr, p_align_val; p_align_val = (p_unalign_val + 7) & (~7); // pre loop for activation_min_max @@ -215,8 +215,8 @@ TfLiteStatus AverageEvalQuantized(TfLiteContext* context, const int output_height = output_shape.Dims(1); const int output_width = output_shape.Dims(2); - const uint8* inp_data_ptr; - uint8* out_data_ptr; + const uint8_t* inp_data_ptr; + uint8_t* out_data_ptr; int inp_data_format = 0, out_data_format = 0, out_length; int inp_precision = PREC_ASYM8, out_precision = PREC_ASYM8; void* p_scratch; @@ -262,7 +262,7 @@ TfLiteStatus AverageEvalQuantized(TfLiteContext* context, } out_length = batches * output_height * output_width * depth; - uint32 p_unalign_val = (uint32)out_data_ptr, p_align_val; + uint32_t p_unalign_val = (uint32_t)out_data_ptr, p_align_val; p_align_val = (p_unalign_val + 7) & (~7); // pre loop for activation_min_max @@ -372,7 +372,7 @@ TfLiteStatus MaxEvalFloat(TfLiteContext* context, TfLiteNode* node, } out_length = batches * output_height * output_width * depth; - uint32 p_unalign_val = (uint32)out_data_ptr, p_align_val; + uint32_t p_unalign_val = (uint32_t)out_data_ptr, p_align_val; p_align_val = (p_unalign_val + 7) & (~7); // pre loop for activation_min_max @@ -438,8 +438,8 @@ TfLiteStatus MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node, const int output_height = output_shape.Dims(1); const int output_width = output_shape.Dims(2); - const uint8* inp_data_ptr; - uint8* out_data_ptr; + const uint8_t* inp_data_ptr; + uint8_t* out_data_ptr; int inp_data_format = 0, out_data_format = 0, out_length; int inp_precision = PREC_ASYM8, out_precision = PREC_ASYM8; void* p_scratch; @@ -482,7 +482,7 @@ TfLiteStatus MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node, } out_length = batches * output_height * output_width * depth; - uint32 p_unalign_val = (uint32)out_data_ptr, p_align_val; + uint32_t p_unalign_val = (uint32_t)out_data_ptr, p_align_val; p_align_val = (p_unalign_val + 7) & (~7); // pre loop for activation_min_max diff --git a/tensorflow/lite/micro/kernels/xtensa_hifi/softmax.cc b/tensorflow/lite/micro/kernels/xtensa_hifi/softmax.cc index e4fa19671c2..9d256b3aecc 100755 --- a/tensorflow/lite/micro/kernels/xtensa_hifi/softmax.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifi/softmax.cc @@ -63,7 +63,8 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context, TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8); if (output->type == kTfLiteInt16) { TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768); - // NOTE: Current int16 softmax output does not require symmetric scaling + // NOTE: Current int16_t softmax output does not require symmetric + // scaling // - so no need to verify scale here. } else { TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8); diff --git a/tensorflow/lite/micro/kernels/xtensa_hifi/svdf.cc b/tensorflow/lite/micro/kernels/xtensa_hifi/svdf.cc index f9d846bf8b3..a208713fb9d 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifi/svdf.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifi/svdf.cc @@ -53,8 +53,8 @@ namespace svdf { namespace { struct OpData { - int32 effective_scale_1_a; - int32 effective_scale_2_a; + int32_t effective_scale_1_a; + int32_t effective_scale_2_a; // b versions of each scale are kept at int since the numbers are just the // shift value - typically between [-32, 32]. int effective_scale_1_b; @@ -461,7 +461,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2); // Validate Tensor Output: - // [0] = float/int8, {2, batch_size, num_units} + // [0] = float/int8_t, {2, batch_size, num_units} TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2); diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc index 0e71bfbcb26..011cfc426a1 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc @@ -33,22 +33,22 @@ namespace conv { namespace xtensa { namespace hifimini { -void ConvPerChannel(const ConvParams& params, const int32* output_multiplier, - const int32* output_shift, const RuntimeShape& input_shape, - const int8* input_data, const RuntimeShape& filter_shape, - const int8* filter_data, const RuntimeShape& bias_shape, - const int32* bias_data, const RuntimeShape& output_shape, - int8* output_data) { +void ConvPerChannel(const ConvParams& params, const int32_t* output_multiplier, + const int32_t* output_shift, + const RuntimeShape& input_shape, const int8_t* input_data, + const RuntimeShape& filter_shape, const int8_t* filter_data, + const RuntimeShape& bias_shape, const int32_t* bias_data, + const RuntimeShape& output_shape, int8_t* output_data) { const int stride_width = params.stride_width; const int stride_height = params.stride_height; const int dilation_width_factor = params.dilation_width_factor; const int dilation_height_factor = params.dilation_height_factor; const int pad_width = params.padding_values.width; const int pad_height = params.padding_values.height; - const int32 input_offset = params.input_offset; - const int32 output_offset = params.output_offset; - const int32 output_activation_min = params.quantized_activation_min; - const int32 output_activation_max = params.quantized_activation_max; + const int32_t input_offset = params.input_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; const int batches = input_shape.Dims(0); @@ -169,11 +169,11 @@ void ConvPerChannel(const ConvParams& params, const int32* output_multiplier, inline void Conv1x32Input32x32Filter( const int input_offset, const int output_offset, const int quantized_activation_min, const int quantized_activation_max, - const int32* output_multiplier, const int32* output_shift, - const RuntimeShape& input_shape, const int8* input_data, - const RuntimeShape& filter_shape, const int8* filter_data, - const RuntimeShape& bias_shape, const int32* bias_data, - const RuntimeShape& output_shape, int8* output_data) { + const int32_t* output_multiplier, const int32_t* output_shift, + const RuntimeShape& input_shape, const int8_t* input_data, + const RuntimeShape& filter_shape, const int8_t* filter_data, + const RuntimeShape& bias_shape, const int32_t* bias_data, + const RuntimeShape& output_shape, int8_t* output_data) { ae_p24x2s input_offset_24x2 = AE_MOVPA24(input_offset); ae_q56s output_offset_56 = AE_CVTQ48A32S(output_offset); ae_q56s output_activation_max_56 = AE_CVTQ48A32S(quantized_activation_max); @@ -324,7 +324,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { int output_width = output->dims->data[2]; int output_height = output->dims->data[1]; - // Per channel quantization is only needed for int8 inference. For other + // Per channel quantization is only needed for int8_t inference. For other // quantized types, only a single scale and zero point is needed. const int num_channels = filter->dims->data[kConvQuantizedDimension]; // Dynimically allocate per-channel quantization parameters. @@ -382,10 +382,10 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, xtensa::hifimini::ConvPerChannel( op_params, data->per_channel_output_multiplier, data->per_channel_output_shift, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + GetTensorData(input), GetTensorShape(filter), + GetTensorData(filter), GetTensorShape(bias), + GetTensorData(bias), GetTensorShape(output), + GetTensorData(output)); } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { @@ -409,10 +409,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { op_data->output_activation_min, op_data->output_activation_max, op_data->per_channel_output_multiplier, op_data->per_channel_output_shift, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + GetTensorData(input), GetTensorShape(filter), + GetTensorData(filter), GetTensorShape(bias), + GetTensorData(bias), GetTensorShape(output), + GetTensorData(output)); return kTfLiteOk; } diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc index 656fb1b04cb..1f08b2c4ff4 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc @@ -34,12 +34,12 @@ namespace xtensa { namespace hifimini { inline void DepthwiseConvPerChannel( - const DepthwiseParams& params, const int32* output_multiplier, - const int32* output_shift, const RuntimeShape& input_shape, - const int8* input_data, const RuntimeShape& filter_shape, - const int8* filter_data, const RuntimeShape& bias_shape, - const int32* bias_data, const RuntimeShape& output_shape, - int8* output_data) { + const DepthwiseParams& params, const int32_t* output_multiplier, + const int32_t* output_shift, const RuntimeShape& input_shape, + const int8_t* input_data, const RuntimeShape& filter_shape, + const int8_t* filter_data, const RuntimeShape& bias_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, + int8_t* output_data) { // TODO(b/154032858): Investigate removing extra copies. const int stride_width = params.stride_width; const int stride_height = params.stride_height; @@ -48,10 +48,10 @@ inline void DepthwiseConvPerChannel( const int pad_width = params.padding_values.width; const int pad_height = params.padding_values.height; const int depth_multiplier = params.depth_multiplier; - const int32 input_offset = params.input_offset; - const int32 output_offset = params.output_offset; - const int32 output_activation_min = params.quantized_activation_min; - const int32 output_activation_max = params.quantized_activation_max; + const int32_t input_offset = params.input_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; const int batches = input_shape.Dims(0); @@ -99,16 +99,16 @@ inline void DepthwiseConvPerChannel( ((batch * input_height + in_y) * input_width + in_x) * input_depth + (in_channel); - int32 input_val = input_data[input_idx]; + int32_t input_val = input_data[input_idx]; // Find current filter index, minus 2 for Xtensa load // alignments: int filter_idx = ((filter_y)*filter_width + filter_x) * filter_depth + (output_channel); - int32 filter_val = filter_data[filter_idx]; + int32_t filter_val = filter_data[filter_idx]; - // Load 8bit value as int32 into a 24x24 register and right + // Load 8bit value as int32_t into a 24x24 register and right // shift into 24bit space. Note: value is duplicated in the HH // and LL register - but all calculations are done on the HH // side. @@ -171,11 +171,11 @@ constexpr int kConvolutionalKernelDepth = 32; inline void DepthwiseConv4x32MatchingInputAndFilter( const int input_offset, const int output_offset, const int quantized_activation_min, const int quantized_activation_max, - const int32* output_multiplier, const int32* output_shift, - const RuntimeShape& input_shape, const int8* input_data, - const RuntimeShape& filter_shape, const int8* filter_data, - const RuntimeShape& bias_shape, const int32* bias_data, - const RuntimeShape& output_shape, int8* output_data) { + const int32_t* output_multiplier, const int32_t* output_shift, + const RuntimeShape& input_shape, const int8_t* input_data, + const RuntimeShape& filter_shape, const int8_t* filter_data, + const RuntimeShape& bias_shape, const int32_t* bias_data, + const RuntimeShape& output_shape, int8_t* output_data) { // Convert the (unsigned) 32-bit multiplier down to a 24-bit multiplier. const int32_t mult = output_multiplier[0] >> 8; const int32_t shift = output_shift[0]; @@ -189,16 +189,16 @@ inline void DepthwiseConv4x32MatchingInputAndFilter( const int stride_elements = (kConvolutionalKernelDepth / kConvolutionalKernelWidth); - const int8* input_0_ptr = (const int8*)(input_data - 2); - const int8* weight_0_ptr = (const int8*)(filter_data - 2); + const int8_t* input_0_ptr = (const int8_t*)(input_data - 2); + const int8_t* weight_0_ptr = (const int8_t*)(filter_data - 2); // Apply the kernels in blocks of 4 for all the channels. - const int8* input_1_ptr = input_0_ptr + stride_elements * 4; - const int8* input_2_ptr = input_1_ptr + stride_elements * 4; - const int8* input_3_ptr = input_2_ptr + stride_elements * 4; + const int8_t* input_1_ptr = input_0_ptr + stride_elements * 4; + const int8_t* input_2_ptr = input_1_ptr + stride_elements * 4; + const int8_t* input_3_ptr = input_2_ptr + stride_elements * 4; - const int8* weight_1_ptr = weight_0_ptr + stride_elements * 4; - const int8* weight_2_ptr = weight_1_ptr + stride_elements * 4; - const int8* weight_3_ptr = weight_2_ptr + stride_elements * 4; + const int8_t* weight_1_ptr = weight_0_ptr + stride_elements * 4; + const int8_t* weight_2_ptr = weight_1_ptr + stride_elements * 4; + const int8_t* weight_3_ptr = weight_2_ptr + stride_elements * 4; for (int i = 0; i < num_blocks; ++i) { ae_q56s block_0_acc = AE_ZEROQ56(); @@ -372,7 +372,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { int filter_width = SizeOfDimension(filter, 2); int filter_height = SizeOfDimension(filter, 1); - // Per channel quantization is only needed for int8 inference. For other + // Per channel quantization is only needed for int8_t inference. For other // quantized types, only a single scale and zero point is needed. const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; // Dynimically allocate per-channel quantization parameters. @@ -430,10 +430,10 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, xtensa::hifimini::DepthwiseConvPerChannel( op_params, data->per_channel_output_multiplier, data->per_channel_output_shift, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + GetTensorData(input), GetTensorShape(filter), + GetTensorData(filter), GetTensorShape(bias), + GetTensorData(bias), GetTensorShape(output), + GetTensorData(output)); } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { @@ -460,10 +460,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { std::numeric_limits::min(), std::numeric_limits::max(), op_data->per_channel_output_multiplier, op_data->per_channel_output_shift, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + GetTensorData(input), GetTensorShape(filter), + GetTensorData(filter), GetTensorShape(bias), + GetTensorData(bias), GetTensorShape(output), + GetTensorData(output)); return kTfLiteOk; } switch (input->type) { // Already know in/out types are same. diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc index 6ebfbe75067..8383e02e598 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc @@ -36,16 +36,16 @@ namespace hifimini { void FullyConnected(const FullyConnectedParams& params, const RuntimeShape& input_shape, const int8_t* input_data, const RuntimeShape& filter_shape, const int8_t* filter_data, - const RuntimeShape& bias_shape, const int32* bias_data, + const RuntimeShape& bias_shape, const int32_t* bias_data, const RuntimeShape& output_shape, int8_t* output_data) { // TODO(b/154032858): Investigate removing extra copies. - const int32 input_offset = params.input_offset; - const int32 filter_offset = params.weights_offset; - const int32 output_offset = params.output_offset; - const int32 output_multiplier = params.output_multiplier; + const int32_t input_offset = params.input_offset; + const int32_t filter_offset = params.weights_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_multiplier = params.output_multiplier; const int output_shift = params.output_shift; - const int32 output_activation_min = params.quantized_activation_min; - const int32 output_activation_max = params.quantized_activation_max; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; const int filter_dim_count = filter_shape.DimensionsCount(); const int batches = output_shape.Dims(0); diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc index e735214dd38..d46cc723114 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc @@ -156,7 +156,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // This Op (QUANTIZE) quantizes the input and produces quantized output. // AffineQuantize takes scale and zero point and quantizes the float value to -// quantized output, in int8 or uint8 format. +// quantized output, in int8_t or uint8_t format. TfLiteRegistration Register_QUANTIZE() { return {/*init=*/quantize::Init, /*free=*/nullptr, diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc index f222387c831..83cddd49889 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc @@ -33,12 +33,12 @@ struct OpData { uint16_t* exp_lut; }; -// Number of unique int8 and int16 values. Used in exponent lookup table +// Number of unique int8_t and int16_t values. Used in exponent lookup table // conputation. constexpr int kInt8Range = - std::numeric_limits::max() - std::numeric_limits::min() + 1; -constexpr int kInt16Range = - std::numeric_limits::max() - std::numeric_limits::min() + 1; + std::numeric_limits::max() - std::numeric_limits::min() + 1; +constexpr int kInt16Range = std::numeric_limits::max() - + std::numeric_limits::min() + 1; // Each 16-bit precalculated exponent is expressed as a Q0.16 fixedpoint // value. We special-case e^0 since 1.0 requires 1 integer bit to // express. @@ -47,7 +47,7 @@ constexpr int kExpFractionalBits = 16; // specially. constexpr int kMaxExponentValue = (1 << kExpFractionalBits); -// Quantized softmax with int8 input and int16 output. +// Quantized softmax with int8_t input and int16_t output. // Passing OpData by value does not have much savings in this op, but following // that as a best practice, at least for the xtensa kernels. See b/155656675 for // more details. @@ -97,7 +97,7 @@ TfLiteStatus Softmax(OpData op_data, const RuntimeShape& input_shape, } output_data[i * depth + c] = static_cast(std::max( std::min(full_range_output, - static_cast(std::numeric_limits::max())), + static_cast(std::numeric_limits::max())), static_cast(std::numeric_limits::min()))); } } @@ -118,7 +118,8 @@ TfLiteStatus CalculateSoftmaxOpData(TfLiteContext* context, if (output->type == kTfLiteInt16) { TF_LITE_ENSURE_EQ(context, output->params.zero_point, std::numeric_limits::min()); - // NOTE: Current int16 softmax output does not require symmetric scaling + // NOTE: Current int16_t softmax output does not require symmetric + // scaling // - so no need to verify scale here. } else { TF_LITE_ENSURE_EQ(context, output->params.zero_point, @@ -127,10 +128,10 @@ TfLiteStatus CalculateSoftmaxOpData(TfLiteContext* context, } } - // Precompute e^(-x * input_scale * beta) for every possible int8 input. + // Precompute e^(-x * input_scale * beta) for every possible int8_t input. // This computation is used for every iteration of Softmax. We must compute // using pre-scaled inputs to avoid introducing additional error, while - // restricting our input range to the int8 range. This is valid since beta + // restricting our input range to the int8_t range. This is valid since beta // and input scale are constant for a given op in the graph. Skip index 0 // since that is a special case which requires 1 integer bit instead of 0. for (int i = 1; i <= kInt8Range; i++) { @@ -163,7 +164,7 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) { TFLITE_DCHECK(node->user_data != nullptr); OpData* op_data = static_cast(node->user_data); - // Allocate an array to precompute exponents over all int8 inputs, applying + // Allocate an array to precompute exponents over all int8_t inputs, applying // the scale and beta before calculating exp. It is mandatory to apply beta // and scale here, since each softmax op may have different beta and scale // values. Beta and scale will remain constant for a given softmax op. diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc index 8520dc2db72..3d6ad33cfcb 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc @@ -33,8 +33,8 @@ namespace svdf { namespace { struct OpData { - int32 effective_scale_1_a; - int32 effective_scale_2_a; + int32_t effective_scale_1_a; + int32_t effective_scale_2_a; // b versions of each scale are kept at int since the numbers are just the // shift value - typically between [-32, 32]. int effective_scale_1_b; @@ -153,7 +153,7 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node, dot_prod_24x2, data.effective_scale_1_a, data.effective_scale_1_b); - // Cap min/max and convert to int32: + // Cap min/max and convert to int32_t: dot_prod_56 = AE_MAXQ56S(dot_prod_56, output_int16_min_56); dot_prod_56 = AE_MINQ56S(dot_prod_56, output_int16_max_56); // Truncate immediately since the QR register is already 32 bit aligned: @@ -246,7 +246,7 @@ void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node, data.effective_scale_2_b); // Add output adjustment: x_56 = AE_ADDQ56(x_56, output_zp_56); - // Cap min/max and convert to int32 (already aligned to 32bit): + // Cap min/max and convert to int32_t (already aligned to 32bit): x_56 = AE_MAXQ56S(x_56, output_int8_min_56); x_56 = AE_MINQ56S(x_56, output_int8_max_56); GetTensorData(output_tensor)[i] = @@ -308,7 +308,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2); // Validate Tensor Output: - // [0] = float/int8, {2, batch_size, num_units} + // [0] = float/int8_t, {2, batch_size, num_units} TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2); diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/quantize.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/quantize.cc index 513f926fae9..13c19cc6f34 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/quantize.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/quantize.cc @@ -34,7 +34,7 @@ void AffineQuantize(int scale_multiplier, const tflite::QuantizationParams& op_params, const RuntimeShape& input_shape, const int16_t* input_data, const RuntimeShape& output_shape, int8_t* output_data) { - const int32 zero_point = op_params.zero_point; + const int32_t zero_point = op_params.zero_point; const int flat_size = MatchingFlatSize(input_shape, output_shape); ae_q56s min_val_56 = AE_CVTQ48A32S(INT16_MIN); ae_q56s max_val_56 = AE_CVTQ48A32S(INT16_MAX); @@ -155,7 +155,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // This Op (QUANTIZE) quantizes the input and produces quantized output. // AffineQuantize takes scale and zero point and quantizes the float value to -// quantized output, in int8 or uint8 format. +// quantized output, in int8_t or uint8_t format. TfLiteRegistration Register_QUANTIZE() { return {/*init=*/quantize::Init, /*free=*/nullptr, diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/softmax.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/softmax.cc index 90fc2cd9903..3e5ef198928 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/softmax.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/softmax.cc @@ -72,7 +72,8 @@ TfLiteStatus CalculateSoftmaxOpData(TfLiteContext* context, if (output->type == kTfLiteInt16) { TF_LITE_ENSURE_EQ(context, output->params.zero_point, std::numeric_limits::min()); - // NOTE: Current int16 softmax output does not require symmetric scaling + // NOTE: Current int16_t softmax output does not require symmetric + // scaling // - so no need to verify scale here. } else { TF_LITE_ENSURE_EQ(context, output->params.zero_point, @@ -124,7 +125,7 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) { const TfLiteStatus scratch_status = context->RequestScratchBufferInArena( context, scratch_size, &(op_data->scratch_tensor_index)); TF_LITE_ENSURE_OK(context, scratch_status); - // Allocate an array to precompute exponents over all int8 inputs, applying + // Allocate an array to precompute exponents over all int8_t inputs, applying // the scale and beta before calculating exp. It is mandatory to apply beta // and scale here, since each softmax op may have different beta and scale // values. Beta and scale will remain constant for a given softmax op. @@ -145,7 +146,7 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) { const RuntimeShape& input_shape = GetTensorShape(input); const int8_t* input_data = GetTensorData(input); const RuntimeShape& output_shape = GetTensorShape(output); - int16* output_data = GetTensorData(output); + int16_t* output_data = GetTensorData(output); const int trailing_dim = input_shape.DimensionsCount() - 1; const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/svdf.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/svdf.cc index 537b48db8eb..05256f33306 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/svdf.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini_staging/svdf.cc @@ -55,8 +55,8 @@ namespace svdf { namespace { struct OpData { - int32 effective_scale_1_a; - int32 effective_scale_2_a; + int32_t effective_scale_1_a; + int32_t effective_scale_2_a; // b versions of each scale are kept at int since the numbers are just the // shift value - typically between [-32, 32]. int effective_scale_1_b; @@ -239,7 +239,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2); // Validate Tensor Output: - // [0] = float/int8, {2, batch_size, num_units} + // [0] = float/int8_t, {2, batch_size, num_units} TF_LITE_ENSURE_EQ(context, node->outputs->size, 1); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2); diff --git a/tensorflow/lite/micro/micro_utils.h b/tensorflow/lite/micro/micro_utils.h index 1fc63d130e4..24aebad8a78 100644 --- a/tensorflow/lite/micro/micro_utils.h +++ b/tensorflow/lite/micro/micro_utils.h @@ -48,10 +48,10 @@ int32_t FloatToSymmetricQuantizedInt32(const float value, const float scale); // // There are several key flavors of quantization in TfLite: // asymmetric symmetric per channel -// int8 | X | X | X | -// uint8 | X | X | | -// int16 | X | | | -// int32 | | X | X | +// int8_t | X | X | X | +// uint8_t | X | X | | +// int16_t | X | | | +// int32_t | | X | X | // // The per-op quantization spec can be found here: // https://www.tensorflow.org/lite/performance/quantization_spec diff --git a/tensorflow/lite/micro/test_helpers.cc b/tensorflow/lite/micro/test_helpers.cc index 7278fea48b3..2888a846e94 100644 --- a/tensorflow/lite/micro/test_helpers.cc +++ b/tensorflow/lite/micro/test_helpers.cc @@ -584,7 +584,7 @@ TfLiteStatus SimpleStatefulOp::Prepare(TfLiteContext* context, TfLiteNode* node) { OpData* data = reinterpret_cast(node->user_data); - // Make sure that the input is in uint8 with at least 1 data entry. + // Make sure that the input is in uint8_t with at least 1 data entry. const TfLiteTensor* input = tflite::GetInput(context, node, kInputTensor); if (input->type != kTfLiteUInt8) return kTfLiteError; if (NumElements(input->dims) == 0) return kTfLiteError; @@ -925,8 +925,8 @@ TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized, TfLiteTensor result = CreateTensor(dims, is_variable); result.type = kTfLiteInt32; result.data.i32 = const_cast(quantized); - // Quantized int32 tensors always have a zero point of 0, since the range of - // int32 values is large, and because zero point costs extra cycles during + // Quantized int32_t tensors always have a zero point of 0, since the range of + // int32_t values is large, and because zero point costs extra cycles during // processing. result.params = {bias_scale, 0}; result.quantization = {kTfLiteAffineQuantization, nullptr}; @@ -934,7 +934,7 @@ TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized, return result; } -// Quantizes int32 bias tensor with per-channel weights determined by input +// Quantizes int32_t bias tensor with per-channel weights determined by input // scale multiplied by weight scale for each channel. TfLiteTensor CreatePerChannelQuantizedBiasTensor( const float* input, int32_t* quantized, TfLiteIntArray* dims, diff --git a/tensorflow/lite/micro/test_helpers.h b/tensorflow/lite/micro/test_helpers.h index 8941e394587..a7897145d26 100644 --- a/tensorflow/lite/micro/test_helpers.h +++ b/tensorflow/lite/micro/test_helpers.h @@ -164,7 +164,7 @@ TfLiteTensor CreateQuantizedBiasTensor(const float* data, int32_t* quantized, float weights_scale, bool is_variable = false); -// Quantizes int32 bias tensor with per-channel weights determined by input +// Quantizes int32_t bias tensor with per-channel weights determined by input // scale multiplied by weight scale for each channel. TfLiteTensor CreatePerChannelQuantizedBiasTensor( const float* input, int32_t* quantized, TfLiteIntArray* dims, diff --git a/tensorflow/lite/micro/testing/test_utils.cc b/tensorflow/lite/micro/testing/test_utils.cc index ec5396e15e8..fe89e904769 100644 --- a/tensorflow/lite/micro/testing/test_utils.cc +++ b/tensorflow/lite/micro/testing/test_utils.cc @@ -248,8 +248,8 @@ TfLiteTensor CreateQuantized32Tensor(const int32_t* data, TfLiteIntArray* dims, result.type = kTfLiteInt32; result.data.i32 = const_cast(data); result.dims = dims; - // Quantized int32 tensors always have a zero point of 0, since the range of - // int32 values is large, and because zero point costs extra cycles during + // Quantized int32_t tensors always have a zero point of 0, since the range of + // int32_t values is large, and because zero point costs extra cycles during // processing. result.params = {scale, 0}; result.allocation_type = kTfLiteMemNone; From 56853a42e456833b7682acc66a80bcd59ffa81f3 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Thu, 23 Jul 2020 23:16:23 -0700 Subject: [PATCH 1248/2522] Fix some API linking issues within TPU 1VM (some API calls could only be statically linked) PiperOrigin-RevId: 322942590 Change-Id: I763f8649fc38a555d0d2a6408a634314ab041b7a --- tensorflow/core/tpu/tpu_library_init_fns.inc | 4 ++++ tensorflow/core/tpu/tpu_on_demand_compiler.cc | 4 ++-- tensorflow/stream_executor/tpu/BUILD | 2 +- tensorflow/stream_executor/tpu/c_api_conversions.cc | 11 +++++++---- tensorflow/stream_executor/tpu/tpu_executor_c_api.h | 4 ++++ tensorflow/stream_executor/tpu/tpu_node_context.cc | 3 ++- .../stream_executor/tpu/tpu_transfer_manager.cc | 5 +++-- 7 files changed, 23 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index 89ba0fa82b3..a2c0894d70d 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -132,6 +132,7 @@ tensorflow::Status SetExecutorStructFn(void* library_handle) { TFTPU_SET_FN(executor_fn, TpuStatus_New); TFTPU_SET_FN(executor_fn, TpuStatus_Create); + TFTPU_SET_FN(executor_fn, TpuStatus_Set); TFTPU_SET_FN(executor_fn, TpuStatus_Free); TFTPU_SET_FN(executor_fn, TpuStatus_Message); TFTPU_SET_FN(executor_fn, TpuStatus_Code); @@ -174,6 +175,9 @@ tensorflow::Status SetExecutorStructFn(void* library_handle) { TFTPU_SET_FN(executor_fn, TpuCoreLocation_Index); TFTPU_SET_FN(executor_fn, TpuCoreLocation_Id); + TFTPU_SET_FN(executor_fn, TpuCompiler_New); + TFTPU_SET_FN(executor_fn, TpuCompiler_Free); + TFTPU_SET_FN(executor_fn, TpuCompiler_RunHloPasses); TFTPU_SET_FN(executor_fn, TpuCompiler_RunBackend); TFTPU_SET_FN(executor_fn, TpuCompiler_Compile); diff --git a/tensorflow/core/tpu/tpu_on_demand_compiler.cc b/tensorflow/core/tpu/tpu_on_demand_compiler.cc index 61637f5fd0a..b5e4db5cd49 100644 --- a/tensorflow/core/tpu/tpu_on_demand_compiler.cc +++ b/tensorflow/core/tpu/tpu_on_demand_compiler.cc @@ -167,8 +167,8 @@ XLA_HloModuleConfig HloModuleConfigToC(const xla::HloModuleConfig& config) { class TpuCompiler : public Compiler { public: - TpuCompiler() { compiler_ = TpuCompiler_New(); } - ~TpuCompiler() override {} + TpuCompiler() { compiler_ = ExecutorApiFn()->TpuCompiler_NewFn(); } + ~TpuCompiler() override { ExecutorApiFn()->TpuCompiler_FreeFn(compiler_); } stream_executor::Platform::Id PlatformId() const override { return tensorflow::TpuPlatform::kId; diff --git a/tensorflow/stream_executor/tpu/BUILD b/tensorflow/stream_executor/tpu/BUILD index 17d4490a8f8..adee7546c7c 100644 --- a/tensorflow/stream_executor/tpu/BUILD +++ b/tensorflow/stream_executor/tpu/BUILD @@ -42,10 +42,10 @@ cc_library( "//tensorflow/compiler/xla:executable_run_options", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla:shape_util", - "//tensorflow/compiler/xla:xla_data_proto_cc", "//tensorflow/compiler/xla/service:executable", "//tensorflow/compiler/xla/service:maybe_owning_device_memory", "//tensorflow/compiler/xla/service:shaped_buffer", + "//tensorflow/core/tpu:tpu_api", "//tensorflow/stream_executor:device_memory", "//tensorflow/stream_executor:device_memory_allocator", "@com_google_absl//absl/container:inlined_vector", diff --git a/tensorflow/stream_executor/tpu/c_api_conversions.cc b/tensorflow/stream_executor/tpu/c_api_conversions.cc index 599233a04b2..3c2180e2819 100644 --- a/tensorflow/stream_executor/tpu/c_api_conversions.cc +++ b/tensorflow/stream_executor/tpu/c_api_conversions.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/stream_executor/tpu/c_api_conversions.h" +#include "tensorflow/core/tpu/tpu_api.h" #include "tensorflow/stream_executor/tpu/c_api_defn.h" #include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" #include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" @@ -91,8 +92,9 @@ SE_DeviceMemoryAllocator ToC( ->Allocate(device_ordinal, size, retry_on_failure, memory_space); if (!allocation.ok()) { auto status = allocation.status(); - TpuStatus_Set(se_status, status.code(), status.error_message().data(), - status.error_message().size()); + tensorflow::tpu::ExecutorApiFn()->TpuStatus_SetFn( + se_status, status.code(), status.error_message().data(), + status.error_message().size()); } else { auto& scoped_memory = allocation.ValueOrDie(); memory->wrapped = ApiConverter::ToC(scoped_memory.Release()); @@ -105,8 +107,9 @@ SE_DeviceMemoryAllocator ToC( auto status = reinterpret_cast(ctx) ->Deallocate(device_ordinal, ApiConverter::FromC(*base)); if (!status.ok()) { - TpuStatus_Set(se_status, status.code(), status.error_message().data(), - status.error_message().size()); + tensorflow::tpu::ExecutorApiFn()->TpuStatus_SetFn( + se_status, status.code(), status.error_message().data(), + status.error_message().size()); } }; return se_allocator; diff --git a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h index 0714b73a85b..8bee19f16ed 100644 --- a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h +++ b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h @@ -304,6 +304,7 @@ struct TfTpu_ExecutorApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuStatus_New); TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Create); + TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Set); TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Free); TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Message); TFTPU_ADD_FN_IN_STRUCT(TpuStatus_Code); @@ -346,6 +347,9 @@ struct TfTpu_ExecutorApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Index); TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Id); + TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_New); + TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_Free); + TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_RunHloPasses); TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_RunBackend); TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_Compile); diff --git a/tensorflow/stream_executor/tpu/tpu_node_context.cc b/tensorflow/stream_executor/tpu/tpu_node_context.cc index 2d7b73ca526..b5597e2f88f 100644 --- a/tensorflow/stream_executor/tpu/tpu_node_context.cc +++ b/tensorflow/stream_executor/tpu/tpu_node_context.cc @@ -62,7 +62,8 @@ Status TpuNodeContext::CloseTpuHost() { /* static */ Status TpuNodeContext::Initialize(int device_ordinal) { StatusHelper status; - TpuNodeContext_Initialize(device_ordinal, status.c_status); + tpu::NodeContextApiFn()->TpuNodeContext_InitializeFn(device_ordinal, + status.c_status); return status.status(); } diff --git a/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc b/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc index 29781c81cac..d698f9552f3 100644 --- a/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc +++ b/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc @@ -84,10 +84,11 @@ struct TransferFromDeviceState { std::function done; void TransferFinished(SE_Status* status) { - if (!TpuStatus_Ok(status) && TpuStatus_Ok(status_helper.c_status)) { + if (!tpu::ExecutorApiFn()->TpuStatus_OkFn(status) && + tpu::ExecutorApiFn()->TpuStatus_OkFn(status_helper.c_status)) { status_helper.c_status = status; } else { - TpuStatus_Free(status); + tpu::ExecutorApiFn()->TpuStatus_FreeFn(status); } if (--remaining_transfers == 0) { From 6f1f37fe76154fb788fd78dd18a6be3cde945a24 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 23 Jul 2020 23:38:47 -0700 Subject: [PATCH 1249/2522] [XLA] Allow for Mixed precision dot and convolution. PiperOrigin-RevId: 322944409 Change-Id: I4bac9dce63f68f58293e8406bab69bff83a5e9f3 --- .../compiler/xla/service/shape_inference.cc | 10 +++ tensorflow/compiler/xla/shape_util.h | 8 --- .../compiler/xla/tests/dot_operation_test.cc | 69 ------------------- tensorflow/core/tpu/tpu_defs.h | 6 +- 4 files changed, 13 insertions(+), 80 deletions(-) diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 29a728c068e..ec8e4d23d21 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -643,6 +643,11 @@ Status ValidateDotDimensionNumbers( return InvalidArgument("%s", message); }; + // Check if both element types are the same. + if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(lhs, rhs)) { + return fail("Element types do not match."); + } + // Validate basic properties of dot dimension numbers. TF_RETURN_IF_ERROR(ValidateDotDimensionNumbers(lhs, rhs, dimension_numbers)); @@ -1616,6 +1621,11 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation, batch_group_count, feature_group_count); } + if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(lhs, rhs)) { + return InvalidArgument( + "Convolution with different element types: %s and %s.", + ShapeUtil::HumanString(lhs), ShapeUtil::HumanString(rhs)); + } if (dnums.input_spatial_dimensions_size() != dnums.kernel_spatial_dimensions_size()) { return InvalidArgument( diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 3f69a8b0aca..3789d828528 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -269,14 +269,6 @@ class ShapeUtil { if (SameElementType(a, b)) { return a.element_type(); } - // If only one of A and B are floating use the floating point type. - if (ElementIsFloating(a) && !ElementIsFloating(b)) { - return a.element_type(); - } - if (ElementIsFloating(b) && !ElementIsFloating(a)) { - return b.element_type(); - } - // Use the higher precision type. return primitive_util::BitWidth(a.element_type()) < primitive_util::BitWidth(b.element_type()) ? b.element_type() diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index 60ba27b2050..71cfd95f77f 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -1463,75 +1463,6 @@ ENTRY SmallIntegerDot { EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); } -XLA_TEST_F(DotOperationTextTest, DISABLED_ON_CPU(U16IotaDot)) { - absl::string_view hlo_string = - R"( -HloModule SmallIntegerDot - -ENTRY SmallIntegerDot { - arg0 = u16[5,55,8] parameter(0) - arg1 = u16[5,8,200] parameter(1) - dot = u16[5,55,200] dot(arg0, arg1), lhs_batch_dims={0}, lhs_contracting_dims={2}, rhs_batch_dims={0}, rhs_contracting_dims={1} - ROOT c = s32[5,55,200] convert(dot) -} -)"; - - EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); -} - -XLA_TEST_F(DotOperationTextTest, DISABLED_ON_CPU(U16IotaSquaredDot)) { - absl::string_view hlo_string = - R"( -HloModule SmallIntegerDot - -ENTRY SmallIntegerDot { - arg0 = u16[16,2] iota(), iota_dimension=0 - a = u16[16,2] multiply(arg0, arg0) - r = u16[16,2] multiply(a, a) - arg1 = u16[2,98] iota(), iota_dimension=1 - b = u16[2,98] multiply(arg1, arg1) - s = u16[2,98] multiply(b, b) - ROOT dot = u16[16,98] dot(r, s), lhs_contracting_dims={1}, rhs_contracting_dims={0} -} -)"; - - EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); -} - -XLA_TEST_F(DotOperationTextTest, DISABLED_ON_CPU(S16IotaDot)) { - absl::string_view hlo_string = - R"( -HloModule SmallIntegerDot - -ENTRY SmallIntegerDot { - arg0 = s16[5,55,8] iota(), iota_dimension=1 - arg1 = s16[5,8,200] iota(), iota_dimension=2 - ROOT dot = s16[5,55,200] dot(arg0, arg1), lhs_batch_dims={0}, lhs_contracting_dims={2}, rhs_batch_dims={0}, rhs_contracting_dims={1} -} -)"; - - EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); -} - -XLA_TEST_F(DotOperationTextTest, DISABLED_ON_CPU(S16IotaSquaredDot)) { - absl::string_view hlo_string = - R"( -HloModule SmallIntegerDot - -ENTRY SmallIntegerDot { - arg0 = s16[16,2] iota(), iota_dimension=0 - a = s16[16,2] multiply(arg0, arg0) - r = s16[16,2] multiply(a, a) - arg1 = s16[2,98] iota(), iota_dimension=1 - b = s16[2,98] multiply(arg1, arg1) - s = s16[2,98] multiply(b, b) - ROOT dot = s16[16,98] dot(r, s), lhs_contracting_dims={1}, rhs_contracting_dims={0} -} -)"; - - EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); -} - XLA_TEST_F(DotOperationTextTest, DISABLED_ON_CPU(S8Dot)) { absl::string_view hlo_string = R"( diff --git a/tensorflow/core/tpu/tpu_defs.h b/tensorflow/core/tpu/tpu_defs.h index 696fa8dbe3e..1c4b4c4e38e 100644 --- a/tensorflow/core/tpu/tpu_defs.h +++ b/tensorflow/core/tpu/tpu_defs.h @@ -51,10 +51,10 @@ extern const char* const kTPUReplicateAttr; extern const char* const kOutsideCompilationAttr; // Supported types for TPUs. -static constexpr std::array kTpuAllTypes = { +static constexpr std::array kTpuAllTypes = { {DT_INT32, DT_UINT32, DT_BFLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL, - DT_COMPLEX64, DT_INT64, DT_UINT64, DT_QINT8, DT_QUINT8, DT_INT8, DT_UINT8, - DT_INT16, DT_UINT16}}; + DT_COMPLEX64, DT_INT64, DT_UINT64, DT_QINT8, DT_QUINT8, DT_INT8, + DT_UINT8}}; } // namespace tensorflow From 8d4711c52d6d5788f93f68b539e316b1ff83a2aa Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 23 Jul 2020 23:55:45 -0700 Subject: [PATCH 1250/2522] Ensure each call to ASharedMemory_create produces a unique memory space. The non-android ASharedMemory_create has a bug, where if two calls to create memory regions use the same 'name', they will be mapped to the to same /dev/shm file and trip over each other (particularly if they're different sizes). PiperOrigin-RevId: 322945710 Change-Id: I103e385a2a82addf46f19188dd63baa6818db96d --- tensorflow/lite/nnapi/nnapi_implementation.cc | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/nnapi/nnapi_implementation.cc b/tensorflow/lite/nnapi/nnapi_implementation.cc index 52def4de8fd..25b0d8920dd 100644 --- a/tensorflow/lite/nnapi/nnapi_implementation.cc +++ b/tensorflow/lite/nnapi/nnapi_implementation.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include +#include #include #ifdef __ANDROID__ @@ -64,8 +65,21 @@ void* LoadFunction(void* handle, const char* name, bool optional) { #ifndef __ANDROID__ // Add /dev/shm implementation of shared memory for non-Android platforms -int ASharedMemory_create(const char* name, size_t size) { - int fd = shm_open(name, O_RDWR | O_CREAT, 0644); +int ASharedMemory_create(const char* /* name */, size_t size) { + // Each call to ASharedMemory_create produces a unique memory space, hence + // name should not be used to create the shared memory file, otherwise + // two calls to create memory regions using the same 'name', will collide. + char shm_name_buffer[L_tmpnam]; + if (tmpnam(shm_name_buffer) == nullptr) { + return -1; + } + + // tmpnam will produce a string containing with slashes, but shm_open + // won't like that. + std::string shm_region_name = std::string(shm_name_buffer); + std::replace(shm_region_name.begin(), shm_region_name.end(), '/', '-'); + + int fd = shm_open(shm_region_name.c_str(), O_RDWR | O_CREAT, 0644); if (fd < 0) { return fd; } From 0b5cc6f1b98fa6f5a3bc413cf30a87e4b3f1af8c Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Fri, 24 Jul 2020 00:38:06 -0700 Subject: [PATCH 1251/2522] Fix build and resubmit: [XLA:SPMD] Recursively handling more Dot cases PiperOrigin-RevId: 322949994 Change-Id: I44a8a8e958a7ba4995a667d139f793dfa3a4fe7f --- tensorflow/compiler/xla/service/spmd/BUILD | 1 + .../xla/service/spmd/convolution_handler.cc | 4 +- .../compiler/xla/service/spmd/dot_handler.cc | 717 +++++++++++++----- .../xla/service/spmd/spmd_partitioner.cc | 170 +++-- .../xla/service/spmd/spmd_partitioner.h | 57 +- .../xla/service/spmd/spmd_partitioner_test.cc | 152 +++- .../xla/service/spmd/spmd_partitioner_util.cc | 266 ++++++- .../xla/service/spmd/spmd_partitioner_util.h | 52 +- 8 files changed, 1121 insertions(+), 298 deletions(-) diff --git a/tensorflow/compiler/xla/service/spmd/BUILD b/tensorflow/compiler/xla/service/spmd/BUILD index e41b89f6dff..a67e4cf55c5 100644 --- a/tensorflow/compiler/xla/service/spmd/BUILD +++ b/tensorflow/compiler/xla/service/spmd/BUILD @@ -50,6 +50,7 @@ cc_library( "//tensorflow/compiler/xla/service:tuple_simplifier", "//tensorflow/core/platform:numbers", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", diff --git a/tensorflow/compiler/xla/service/spmd/convolution_handler.cc b/tensorflow/compiler/xla/service/spmd/convolution_handler.cc index 1204df59080..4caa2bbbf35 100644 --- a/tensorflow/compiler/xla/service/spmd/convolution_handler.cc +++ b/tensorflow/compiler/xla/service/spmd/convolution_handler.cc @@ -226,7 +226,7 @@ Status SpmdPartitioningVisitor::HandleConvolutionTiledLhsAndRhs( hlo->batch_group_count(), new_window, hlo->convolution_dimension_numbers(), hlo->precision_config())); auto ar = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, conv, MakeBinaryAdd(hlo->shape().element_type(), module_), + &b_, conv, MakeBinaryAdd(hlo->shape().element_type(), module_), {}, NewChannel()); ar->set_sharding(HloSharding::Replicate()); return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()) @@ -605,7 +605,7 @@ Status SpmdPartitioningVisitor::HandleConvolution(HloInstruction* hlo) { hlo->batch_group_count(), new_window, dnums, hlo->precision_config())); auto ar = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, conv, MakeBinaryAdd(hlo->shape().element_type(), module_), + &b_, conv, MakeBinaryAdd(hlo->shape().element_type(), module_), {}, NewChannel()); ar->set_sharding(HloSharding::Replicate()); return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()) diff --git a/tensorflow/compiler/xla/service/spmd/dot_handler.cc b/tensorflow/compiler/xla/service/spmd/dot_handler.cc index 9ecf21f5841..8fea788b1b7 100644 --- a/tensorflow/compiler/xla/service/spmd/dot_handler.cc +++ b/tensorflow/compiler/xla/service/spmd/dot_handler.cc @@ -80,12 +80,25 @@ Status SpmdPartitioningVisitor::HandleDot(HloInstruction* hlo) { return HandleDotHelper(hlo, mapping, create_sharded_dot); } -Status SpmdPartitioningVisitor::HandleDotHelper( - HloInstruction* hlo, const DotGeneralDimsMapping& dims_mapping, +namespace { + +StatusOr PartitionBaseCase( + PartitionedHlo lhs, PartitionedHlo rhs, const Shape& output_base_shape, + const HloSharding& output_sharding, + const DotGeneralDimsMapping& dims_mapping, int64 num_partitions, const std::function( - HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot) { - const HloSharding& lhs_sharding = hlo->operand(0)->sharding(); - const HloSharding& rhs_sharding = hlo->operand(1)->sharding(); + HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot, + HloModule* module, HloInstruction* original_hlo, int64 lhs_batch_partitions, + int64 rhs_batch_partitions, int64 output_batch_partitions, + int64 lhs_contracting_partitions, int64 rhs_contracting_partitions, + int64 lhs_non_contracting_partitions, int64 rhs_non_contracting_partitions, + int64 output_lhs_non_contracting_partitions, + int64 output_rhs_non_contracting_partitions, + int64 threshold_for_windowed_einsum_mib, SpmdBuilder* b, + std::vector* + windowed_dot_general_loops) { + const HloSharding& lhs_sharding = lhs.sharding(); + const HloSharding& rhs_sharding = rhs.sharding(); // Similar to hlo_sharding_util::TransposeSharding(), but allows // removing/adding non-partitioned dimensions. @@ -132,12 +145,12 @@ Status SpmdPartitioningVisitor::HandleDotHelper( return HloSharding::Tile(reshape_tiles); }; - std::vector lhs_to_rhs_indices(hlo->operand(0)->shape().rank(), -1); - std::vector lhs_to_output_indices(hlo->operand(0)->shape().rank(), -1); - std::vector rhs_to_lhs_indices(hlo->operand(1)->shape().rank(), -1); - std::vector rhs_to_output_indices(hlo->operand(1)->shape().rank(), -1); - std::vector output_to_lhs_indices(hlo->shape().rank(), -1); - std::vector output_to_rhs_indices(hlo->shape().rank(), -1); + std::vector lhs_to_rhs_indices(lhs.base_shape().rank(), -1); + std::vector lhs_to_output_indices(lhs.base_shape().rank(), -1); + std::vector rhs_to_lhs_indices(rhs.base_shape().rank(), -1); + std::vector rhs_to_output_indices(rhs.base_shape().rank(), -1); + std::vector output_to_lhs_indices(output_base_shape.rank(), -1); + std::vector output_to_rhs_indices(output_base_shape.rank(), -1); auto populate_indices_mapping = [&](const DotGeneralDimsMapping::DimsMapping& mapping) { if (mapping.lhs >= 0) { @@ -174,127 +187,84 @@ Status SpmdPartitioningVisitor::HandleDotHelper( auto rhs_sharding_transposed_to_match_output = transpose_sharding( rhs_sharding, rhs_to_output_indices, output_to_rhs_indices); auto output_sharding_transposed_to_match_lhs = transpose_sharding( - hlo->sharding(), output_to_lhs_indices, lhs_to_output_indices); + output_sharding, output_to_lhs_indices, lhs_to_output_indices); auto output_sharding_transposed_to_match_rhs = transpose_sharding( - hlo->sharding(), output_to_rhs_indices, rhs_to_output_indices); + output_sharding, output_to_rhs_indices, rhs_to_output_indices); - // lhs_rhs_or_output: 0 lhs, 1 rhs, 2 output. - auto get_partitions_for_dims = - [&](const HloSharding& sharding, - absl::Span dims, - int lhs_rhs_or_output) { - int64 partitions = 1; - if (sharding.IsTileMaximal()) { - return partitions; - } - for (const auto& dim : dims) { - if (lhs_rhs_or_output == 0) { - partitions *= sharding.tile_assignment().dim(dim.lhs); - } else if (lhs_rhs_or_output == 1) { - partitions *= sharding.tile_assignment().dim(dim.rhs); - } else { - CHECK_EQ(lhs_rhs_or_output, 2); - partitions *= sharding.tile_assignment().dim(dim.output); - } - } - return partitions; - }; - const int64 lhs_batch_partitions = - get_partitions_for_dims(lhs_sharding, dims_mapping.batch_dims, 0); - const int64 rhs_batch_partitions = - get_partitions_for_dims(rhs_sharding, dims_mapping.batch_dims, 1); - const int64 output_batch_partitions = - get_partitions_for_dims(hlo->sharding(), dims_mapping.batch_dims, 2); - const int64 lhs_contracting_partitions = - get_partitions_for_dims(lhs_sharding, dims_mapping.contracting_dims, 0); - const int64 rhs_contracting_partitions = - get_partitions_for_dims(rhs_sharding, dims_mapping.contracting_dims, 1); - const int64 lhs_non_contracting_partitions = get_partitions_for_dims( - lhs_sharding, dims_mapping.lhs_non_contracting_dims, 0); - const int64 rhs_non_contracting_partitions = get_partitions_for_dims( - rhs_sharding, dims_mapping.rhs_non_contracting_dims, 1); - const int64 output_lhs_non_contracting_partitions = get_partitions_for_dims( - hlo->sharding(), dims_mapping.lhs_non_contracting_dims, 2); - const int64 output_rhs_non_contracting_partitions = get_partitions_for_dims( - hlo->sharding(), dims_mapping.rhs_non_contracting_dims, 2); - - auto& lhs = GetPartitionedHlo(hlo->operand(0)); - auto& rhs = GetPartitionedHlo(hlo->operand(1)); // LHS and RHS are partitioned the same way and only partitioned in batch // dimensions. if (lhs_batch_partitions == rhs_batch_partitions && - rhs_batch_partitions == num_partitions_ && + rhs_batch_partitions == num_partitions && lhs_sharding_transposed_to_match_rhs == rhs_sharding) { - TF_ASSIGN_OR_RETURN(auto dot, - create_sharded_dot(lhs.hlo(), rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { - dot->set_sharding(*lhs_sharding_transposed_to_match_output); - return PartitionedHlo(dot, hlo->shape(), MakePartitioningState()) - .Reshard(hlo->sharding()) - .hlo(); - }); - return Status::OK(); + TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(lhs.hlo(), rhs.hlo(), b)); + dot->set_sharding(*lhs_sharding_transposed_to_match_output); + return PartitionedHlo(dot, output_base_shape, lhs.state()) + .Reshard(output_sharding) + .hlo(); } // Try emit batch-partitioned einsum with one operand resharded. Returns - // whether the attempt succeeds. If may_reshard_with_allreduce is false, - // reshard must be done using all-to-all; otherwise this attempt fails. + // partitioned HLO or nullptr if the attempt fails. If + // may_reshard_with_allreduce is false, reshard must be done using + // all-to-all/collective-permute; otherwise this attempt fails. auto try_emit_output_batch_partitioned_einsum_with_reshard = - [&](bool may_reshard_with_allreduce) -> StatusOr { + [&](bool may_reshard_with_allreduce) -> StatusOr { // LHS and output are batch partitioned in the same way. - if (lhs_batch_partitions == num_partitions_ && - output_batch_partitions == num_partitions_ && - lhs_sharding_transposed_to_match_output == hlo->sharding()) { + if (lhs_batch_partitions == num_partitions && + output_batch_partitions == num_partitions && + lhs_sharding_transposed_to_match_output == output_sharding) { if (!may_reshard_with_allreduce && + !CanReshardWithCollectivePermute( + rhs.sharding(), *lhs_sharding_transposed_to_match_rhs) && !GetReshardAllToAllSourceTargetDims( rhs.sharding(), *lhs_sharding_transposed_to_match_rhs)) { - return false; + return nullptr; } auto resharded_rhs = rhs.Reshard(*lhs_sharding_transposed_to_match_rhs); TF_ASSIGN_OR_RETURN( - auto dot, create_sharded_dot(lhs.hlo(), resharded_rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { return dot; }); - return true; + auto dot, create_sharded_dot(lhs.hlo(), resharded_rhs.hlo(), b)); + return dot; } // RHS and output are batch partitioned in the same way. - if (rhs_batch_partitions == num_partitions_ && - output_batch_partitions == num_partitions_ && - rhs_sharding_transposed_to_match_output == hlo->sharding()) { + if (rhs_batch_partitions == num_partitions && + output_batch_partitions == num_partitions && + rhs_sharding_transposed_to_match_output == output_sharding) { if (!may_reshard_with_allreduce && + !CanReshardWithCollectivePermute( + lhs.sharding(), *rhs_sharding_transposed_to_match_lhs) && !GetReshardAllToAllSourceTargetDims( lhs.sharding(), *rhs_sharding_transposed_to_match_lhs)) { - return false; + return nullptr; } auto resharded_lhs = lhs.Reshard(*rhs_sharding_transposed_to_match_lhs); TF_ASSIGN_OR_RETURN( - auto dot, create_sharded_dot(resharded_lhs.hlo(), rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { return dot; }); - return true; + auto dot, create_sharded_dot(resharded_lhs.hlo(), rhs.hlo(), b)); + return dot; } - return false; + return nullptr; }; { // Try batch-parallel by resharding one operand, and not using all-reduce. TF_ASSIGN_OR_RETURN( - bool emitted, + HloInstruction * partitioned_dot, try_emit_output_batch_partitioned_einsum_with_reshard(false)); - if (emitted) { - return Status::OK(); + if (partitioned_dot) { + return partitioned_dot; } } // Try to emit windowed DotGeneral when one operand is partitioned in the same // way as the output along non-contracting dimensions, but the other operand // is tiled in other dimensions. - auto emit_windowed_dot_general = [&](int64 matching_operand, - int64 windowing_operand, - bool windowed_at_contracting_dims, - bool windowed_at_batch_dims) { + auto emit_windowed_dot_general = + [&](int64 matching_operand, int64 windowing_operand, + bool windowed_at_contracting_dims, + bool windowed_at_batch_dims) -> StatusOr { CHECK_EQ(matching_operand + windowing_operand, 1); CHECK(!windowed_at_batch_dims || !windowed_at_contracting_dims); auto unpadded_result_buffer_shape = - MakePartitionedShape(hlo->shape(), hlo->sharding()); + MakePartitionedShape(output_base_shape, output_sharding); auto padded_result_buffer_shape = unpadded_result_buffer_shape; // For windowing at batch/non-contracting dims, we produce the result one // partition at a time, so we need to pad the shape in case of uneven @@ -310,17 +280,17 @@ Status SpmdPartitioningVisitor::HandleDotHelper( if (windowed_at_contracting_dims) { auto& to_mask = windowing_operand == 0 ? lhs : rhs; to_mask = - to_mask.PadWithValue(b_.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(hlo->shape().element_type())))); + to_mask.PadWithValue(b->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(output_base_shape.element_type())))); } - auto result_buffer = CreateZero(padded_result_buffer_shape, &b_); - auto iteration = b_.AddInstruction( + auto result_buffer = CreateZero(padded_result_buffer_shape, b); + auto iteration = b->AddInstruction( HloInstruction::CreateConstant(LiteralUtil::CreateR0(0))); // Create a while loop that computes one window per iteration. During each // iteration, each partition sends its input window to its neighbor using // collective-permute for the next iteration. - SpmdBuilder body_b("windowed_dot_general_body", visiting_hlo_); + SpmdBuilder body_b("windowed_dot_general_body", original_hlo); auto param = body_b.AddInstruction(HloInstruction::CreateParameter( /*parameter_number=*/0, ShapeUtil::MakeTupleShape({lhs.hlo()->shape(), rhs.hlo()->shape(), @@ -335,11 +305,12 @@ Status SpmdPartitioningVisitor::HandleDotHelper( auto i = body_b.AddInstruction( HloInstruction::CreateGetTupleElement(iteration->shape(), param, 3)); - auto partition_id = collective_ops_creator_.create_partition_id(&body_b); + auto partition_id = + lhs.state().collective_ops_creator.create_partition_id(&body_b); auto data_partition_id = body_b.AddInstruction(HloInstruction::CreateBinary( i->shape(), HloOpcode::kAdd, i, partition_id)); auto partition_count = body_b.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR0(num_partitions_))); + LiteralUtil::CreateR0(num_partitions))); data_partition_id = body_b.AddInstruction(HloInstruction::CreateBinary( i->shape(), HloOpcode::kRemainder, data_partition_id, partition_count)); auto dot_lhs = l; @@ -350,7 +321,7 @@ Status SpmdPartitioningVisitor::HandleDotHelper( // operand as replicated, and resharding it to match the windowed operand. auto slice_operand = matching_operand == 0 ? l : r; slice_operand->set_sharding(HloSharding::Replicate()); - auto state = MakePartitioningState(); + auto state = lhs.state(); state.b = &body_b; state.partition_id = data_partition_id; auto slice = PartitionedHlo(slice_operand, slice_operand->shape(), state) @@ -392,26 +363,27 @@ Status SpmdPartitioningVisitor::HandleDotHelper( auto has_more = body_b.AddInstruction(HloInstruction::CreateCompare( ShapeUtil::MakeShape(PRED, {}), i, body_b.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR0(num_partitions_))), + LiteralUtil::CreateR0(num_partitions))), ComparisonDirection::kLt)); // Collective-permute for the next window. We don't need it for the last // iteration, so we use a conditional around the collective-permute. HloInstruction* conditional; { - SpmdBuilder cp_b("window_collective_permute", visiting_hlo_); + SpmdBuilder cp_b("window_collective_permute", original_hlo); { auto p = cp_b.AddInstruction(HloInstruction::CreateParameter( 0, windowing_operand == 0 ? l->shape() : r->shape(), "window")); - std::vector> sd_pairs(num_partitions_); - for (int64 source = 0; source < num_partitions_; ++source) { + std::vector> sd_pairs(num_partitions); + for (int64 source = 0; source < num_partitions; ++source) { // 0 -> n-1, 1 -> 0, 2 -> 1, ... sd_pairs[source] = {source, - (source - 1 + num_partitions_) % num_partitions_}; + (source - 1 + num_partitions) % num_partitions}; } - collective_ops_creator_.create_cross_partition_collective_permute( - &cp_b, p, sd_pairs, (*next_channel_id_)++); + lhs.state() + .collective_ops_creator.create_cross_partition_collective_permute( + &cp_b, p, sd_pairs, (*lhs.state().next_channel_id)++); } - SpmdBuilder ncp_b("last_iteration_noop", visiting_hlo_); + SpmdBuilder ncp_b("last_iteration_noop", original_hlo); { ncp_b.AddInstruction(HloInstruction::CreateParameter( 0, windowing_operand == 0 ? l->shape() : r->shape(), "window")); @@ -419,9 +391,9 @@ Status SpmdPartitioningVisitor::HandleDotHelper( conditional = body_b.AddInstruction(HloInstruction::CreateConditional( windowing_operand == 0 ? l->shape() : r->shape(), has_more, windowing_operand == 0 ? l : r, - module_->AddEmbeddedComputation(cp_b.Build()), + module->AddEmbeddedComputation(cp_b.Build()), windowing_operand == 0 ? l : r, - module_->AddEmbeddedComputation(ncp_b.Build()))); + module->AddEmbeddedComputation(ncp_b.Build()))); } if (windowing_operand == 0) { l = conditional; @@ -430,7 +402,7 @@ Status SpmdPartitioningVisitor::HandleDotHelper( } body_b.AddInstruction(HloInstruction::CreateTuple({l, r, o, i})); - SpmdBuilder cond_b("windowed_dot_general_cond", visiting_hlo_); + SpmdBuilder cond_b("windowed_dot_general_cond", original_hlo); auto cond_param = cond_b.AddInstruction(HloInstruction::CreateParameter( /*parameter_number=*/0, ShapeUtil::MakeTupleShape({lhs.hlo()->shape(), rhs.hlo()->shape(), @@ -441,56 +413,53 @@ Status SpmdPartitioningVisitor::HandleDotHelper( cond_b.AddInstruction(HloInstruction::CreateCompare( ShapeUtil::MakeShape(PRED, {}), cond_i, cond_b.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::CreateR0(num_partitions_))), + LiteralUtil::CreateR0(num_partitions))), ComparisonDirection::kLt)); - auto while_loop = b_.AddInstruction(HloInstruction::CreateWhile( - cond_param->shape(), module_->AddEmbeddedComputation(cond_b.Build()), - module_->AddEmbeddedComputation(body_b.Build()), - b_.AddInstruction(HloInstruction::CreateTuple( + auto while_loop = b->AddInstruction(HloInstruction::CreateWhile( + cond_param->shape(), module->AddEmbeddedComputation(cond_b.Build()), + module->AddEmbeddedComputation(body_b.Build()), + b->AddInstruction(HloInstruction::CreateTuple( {lhs.hlo(), rhs.hlo(), result_buffer, iteration})))); - windowed_dot_general_loops_.push_back({while_loop, windowing_operand, + windowed_dot_general_loops->push_back({while_loop, windowing_operand, windowed_at_contracting_dims, windowed_at_batch_dims}); - SetPartitionedHlo(hlo, [&] { - auto result = b_.AddInstruction(HloInstruction::CreateGetTupleElement( - result_buffer->shape(), while_loop, 2)); - if (!ShapeUtil::Compatible(padded_result_buffer_shape, - unpadded_result_buffer_shape)) { - result = b_.AddInstruction(HloInstruction::CreateSlice( - unpadded_result_buffer_shape, result, - std::vector(padded_result_buffer_shape.rank(), 0), - unpadded_result_buffer_shape.dimensions(), - std::vector(padded_result_buffer_shape.rank(), 1))); - } - return result; - }); - return Status::OK(); + auto result = b->AddInstruction(HloInstruction::CreateGetTupleElement( + result_buffer->shape(), while_loop, 2)); + if (!ShapeUtil::Compatible(padded_result_buffer_shape, + unpadded_result_buffer_shape)) { + result = b->AddInstruction(HloInstruction::CreateSlice( + unpadded_result_buffer_shape, result, + std::vector(padded_result_buffer_shape.rank(), 0), + unpadded_result_buffer_shape.dimensions(), + std::vector(padded_result_buffer_shape.rank(), 1))); + } + return result; }; - if (output_lhs_non_contracting_partitions == num_partitions_ && + if (output_lhs_non_contracting_partitions == num_partitions && output_sharding_transposed_to_match_lhs == lhs_sharding && - ShapeSizeInBytes(hlo->operand(1)->shape()) >= - options_.threshold_for_windowed_einsum_mib * 1024 * 1024) { - if (rhs_contracting_partitions == num_partitions_) { + ShapeSizeInBytes(rhs.base_shape()) >= + threshold_for_windowed_einsum_mib * 1024 * 1024) { + if (rhs_contracting_partitions == num_partitions) { return emit_windowed_dot_general(0, 1, true, false); } - if (rhs_non_contracting_partitions == num_partitions_) { + if (rhs_non_contracting_partitions == num_partitions) { return emit_windowed_dot_general(0, 1, false, false); } - if (rhs_batch_partitions == num_partitions_) { + if (rhs_batch_partitions == num_partitions) { return emit_windowed_dot_general(0, 1, false, true); } } - if (output_rhs_non_contracting_partitions == num_partitions_ && + if (output_rhs_non_contracting_partitions == num_partitions && output_sharding_transposed_to_match_rhs == rhs_sharding && - ShapeSizeInBytes(hlo->operand(0)->shape()) >= - options_.threshold_for_windowed_einsum_mib * 1024 * 1024) { - if (lhs_contracting_partitions == num_partitions_) { + ShapeSizeInBytes(lhs.base_shape()) >= + threshold_for_windowed_einsum_mib * 1024 * 1024) { + if (lhs_contracting_partitions == num_partitions) { return emit_windowed_dot_general(1, 0, true, false); } - if (lhs_non_contracting_partitions == num_partitions_) { + if (lhs_non_contracting_partitions == num_partitions) { return emit_windowed_dot_general(1, 0, false, false); } - if (lhs_batch_partitions == num_partitions_) { + if (lhs_batch_partitions == num_partitions) { return emit_windowed_dot_general(1, 0, false, true); } } @@ -498,18 +467,18 @@ Status SpmdPartitioningVisitor::HandleDotHelper( { // Try batch-parallel by resharding one operand, and allowing all-reduce. TF_ASSIGN_OR_RETURN( - bool emitted, + HloInstruction * partitioned_dot, try_emit_output_batch_partitioned_einsum_with_reshard(true)); - if (emitted) { - return Status::OK(); + if (partitioned_dot) { + return partitioned_dot; } } // LHS and RHS have the same partitioned contracting dimensions. if (lhs_contracting_partitions == rhs_contracting_partitions && - lhs_contracting_partitions == num_partitions_) { - auto zero = b_.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(hlo->shape().element_type()))); + lhs_contracting_partitions == num_partitions) { + auto zero = b->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(output_base_shape.element_type()))); // Pad both sides with zero, since NaN at one side cannot be masked by zero // on the other side. if (ShapeSizeInBytes(lhs.base_shape()) < @@ -522,100 +491,91 @@ Status SpmdPartitioningVisitor::HandleDotHelper( rhs = rhs.Reshard(*lhs_sharding_transposed_to_match_rhs).PadWithValue(zero); } - TF_ASSIGN_OR_RETURN(auto dot, - create_sharded_dot(lhs.hlo(), rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { - auto ar = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, dot, MakeBinaryAdd(hlo->shape().element_type(), module_), - NewChannel()); - ar->set_sharding(HloSharding::Replicate()); - return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()) - .Reshard(hlo->sharding()) - .hlo(); - }); - return Status::OK(); + TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(lhs.hlo(), rhs.hlo(), b)); + auto ar = + lhs.state().collective_ops_creator.create_cross_partition_all_reduce( + b, dot, MakeBinaryAdd(output_base_shape.element_type(), module), {}, + (*lhs.state().next_channel_id)++); + ar->set_sharding(HloSharding::Replicate()); + return PartitionedHlo(ar, output_base_shape, lhs.state()) + .Reshard(output_sharding) + .hlo(); } // LHS and output have the same partitioned non-contracting dimensions. - if (lhs_non_contracting_partitions == num_partitions_ && - output_lhs_non_contracting_partitions == num_partitions_ && - lhs_sharding_transposed_to_match_output == hlo->sharding()) { + if (lhs_non_contracting_partitions == num_partitions && + output_lhs_non_contracting_partitions == num_partitions && + lhs_sharding_transposed_to_match_output == output_sharding) { auto rhs_replicated = rhs.Reshard(HloSharding::Replicate()).hlo(); TF_ASSIGN_OR_RETURN(auto dot, - create_sharded_dot(lhs.hlo(), rhs_replicated, &b_)); - SetPartitionedHlo(hlo, [&] { return dot; }); - return Status::OK(); + create_sharded_dot(lhs.hlo(), rhs_replicated, b)); + return dot; } // RHS and output have the same partitioned non-contracting dimensions. - if (rhs_non_contracting_partitions == num_partitions_ && - output_rhs_non_contracting_partitions == num_partitions_ && - rhs_sharding_transposed_to_match_output == hlo->sharding()) { + if (rhs_non_contracting_partitions == num_partitions && + output_rhs_non_contracting_partitions == num_partitions && + rhs_sharding_transposed_to_match_output == output_sharding) { auto lhs_replicated = lhs.Reshard(HloSharding::Replicate()).hlo(); TF_ASSIGN_OR_RETURN(auto dot, - create_sharded_dot(lhs_replicated, rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { return dot; }); - return Status::OK(); + create_sharded_dot(lhs_replicated, rhs.hlo(), b)); + return dot; } // Output is batch partitioned. - if (output_batch_partitions == num_partitions_) { + if (output_batch_partitions == num_partitions) { auto resharded_lhs = lhs.Reshard(*output_sharding_transposed_to_match_lhs); auto resharded_rhs = rhs.Reshard(*output_sharding_transposed_to_match_rhs); TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(resharded_lhs.hlo(), - resharded_rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { return dot; }); - return Status::OK(); + resharded_rhs.hlo(), b)); + return dot; } // Output is partitioned along LHS non-contracting dimensions. - if (output_lhs_non_contracting_partitions == num_partitions_) { + if (output_lhs_non_contracting_partitions == num_partitions) { auto resharded_lhs = lhs.Reshard(*output_sharding_transposed_to_match_lhs); auto replicated_rhs = rhs.Reshard(HloSharding::Replicate()); - TF_ASSIGN_OR_RETURN( - auto dot, - create_sharded_dot(resharded_lhs.hlo(), replicated_rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { return dot; }); - return Status::OK(); + TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(resharded_lhs.hlo(), + replicated_rhs.hlo(), b)); + return dot; } // Output is partitioned along RHS non-contracting dimensions. - if (output_rhs_non_contracting_partitions == num_partitions_) { + if (output_rhs_non_contracting_partitions == num_partitions) { auto replicated_lhs = lhs.Reshard(HloSharding::Replicate()); auto resharded_rhs = rhs.Reshard(*output_sharding_transposed_to_match_rhs); TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(replicated_lhs.hlo(), - resharded_rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { return dot; }); - return Status::OK(); + resharded_rhs.hlo(), b)); + return dot; } // Returns true if it is beneficial to reshard the operand at `operand_idx` // across the contracting dimension. const auto should_partition_contracting_dim = [&](int64 operand_idx) { - if (!hlo->sharding().IsReplicated()) { + if (!output_sharding.IsReplicated()) { return false; } if (operand_idx == 0) { // If LHS and output are replicated, we compare the cost of all-gather // on RHS vs all-reduce on the output. - return (rhs_contracting_partitions == num_partitions_) && + return (rhs_contracting_partitions == num_partitions) && lhs.sharding().IsReplicated() && - ShapeUtil::ElementsIn(hlo->operand(1)->shape()) > - ShapeUtil::ElementsIn(hlo->shape()); + ShapeUtil::ElementsIn(rhs.base_shape()) > + ShapeUtil::ElementsIn(output_base_shape); } else { - return (lhs_contracting_partitions == num_partitions_) && + return (lhs_contracting_partitions == num_partitions) && rhs.sharding().IsReplicated() && - ShapeUtil::ElementsIn(hlo->operand(0)->shape()) > - ShapeUtil::ElementsIn(hlo->shape()); + ShapeUtil::ElementsIn(lhs.base_shape()) > + ShapeUtil::ElementsIn(output_base_shape); } }; // When the output is replicated and one of the operands is partitioned along // contracting dimension, align the other operand to be partitioned along // the contracting dimensions. - if (hlo->sharding().IsReplicated() && (should_partition_contracting_dim(0) || + if (output_sharding.IsReplicated() && (should_partition_contracting_dim(0) || should_partition_contracting_dim(1))) { - auto zero = b_.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(hlo->shape().element_type()))); + auto zero = b->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(output_base_shape.element_type()))); if (should_partition_contracting_dim(0)) { lhs = lhs.Reshard(*rhs_sharding_transposed_to_match_lhs).PadWithValue(zero); @@ -625,19 +585,361 @@ Status SpmdPartitioningVisitor::HandleDotHelper( rhs = rhs.Reshard(*lhs_sharding_transposed_to_match_rhs).PadWithValue(zero); } - TF_ASSIGN_OR_RETURN(auto dot, - create_sharded_dot(lhs.hlo(), rhs.hlo(), &b_)); - SetPartitionedHlo(hlo, [&] { - auto ar = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, dot, MakeBinaryAdd(hlo->shape().element_type(), module_), - NewChannel()); - ar->set_sharding(HloSharding::Replicate()); - return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()).hlo(); - }); - return Status::OK(); + TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(lhs.hlo(), rhs.hlo(), b)); + return lhs.state().collective_ops_creator.create_cross_partition_all_reduce( + b, dot, MakeBinaryAdd(output_base_shape.element_type(), module), {}, + (*lhs.state().next_channel_id)++); + } + return nullptr; +} + +StatusOr PartitionDot( + PartitionedHlo lhs, PartitionedHlo rhs, const Shape& output_base_shape, + const HloSharding& output_sharding, + const DotGeneralDimsMapping& dims_mapping, int64 num_partitions, + const std::function( + HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot, + HloModule* module, HloInstruction* original_hlo, + int64 threshold_for_windowed_einsum_mib, SpmdBuilder* b, + std::vector* + windowed_dot_general_loops); + +StatusOr PartitionDotGroupOnBatch( + PartitionedHlo lhs, PartitionedHlo rhs, const Shape& output_base_shape, + const HloSharding& output_sharding, + const DotGeneralDimsMapping& dims_mapping, int64 num_partitions, + const std::function( + HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot, + HloModule* module, HloInstruction* original_hlo, + int64 threshold_for_windowed_einsum_mib, SpmdBuilder* b, + std::vector* + windowed_dot_general_loops) { + std::vector lhs_dims; + std::vector rhs_dims; + std::vector output_dims; + auto lhs_sharding_dims_adjusted_to_output = + lhs.sharding().tile_assignment().dimensions(); + auto rhs_sharding_dims_adjusted_to_output = + lhs.sharding().tile_assignment().dimensions(); + auto output_sharding_dims_adjusted_to_lhs = + output_sharding.tile_assignment().dimensions(); + bool lhs_rhs_dims_matching = true; + for (const auto& dim : dims_mapping.batch_dims) { + lhs_dims.push_back(dim.lhs); + rhs_dims.push_back(dim.rhs); + output_dims.push_back(dim.output); + if (lhs_sharding_dims_adjusted_to_output[dim.lhs] != + rhs_sharding_dims_adjusted_to_output[dim.rhs]) { + lhs_rhs_dims_matching = false; + } + lhs_sharding_dims_adjusted_to_output[dim.lhs] = + output_sharding.tile_assignment().dim(dim.output); + rhs_sharding_dims_adjusted_to_output[dim.rhs] = + output_sharding.tile_assignment().dim(dim.output); + output_sharding_dims_adjusted_to_lhs[dim.output] = + lhs.sharding().tile_assignment().dim(dim.lhs); + } + auto lhs_grouped = GroupShardingOnDims(lhs.sharding(), lhs_dims); + auto rhs_grouped = GroupShardingOnDims(rhs.sharding(), rhs_dims); + auto output_grouped = GroupShardingOnDims(output_sharding, output_dims); + if (lhs_rhs_dims_matching) { + if (ShapeUtil::ByteSizeOf(lhs.base_shape()) > + ShapeUtil::ByteSizeOf(rhs.base_shape())) { + rhs_grouped = AlignGroupsWith(std::move(rhs_grouped), lhs_grouped); + rhs = rhs.Reshard(UngroupSharding(rhs_grouped)); + } else { + lhs_grouped = AlignGroupsWith(std::move(lhs_grouped), rhs_grouped); + lhs = lhs.Reshard(UngroupSharding(lhs_grouped)); + } + auto reshaped_output_tiling = output_sharding.tile_assignment(); + reshaped_output_tiling.Reshape(output_sharding_dims_adjusted_to_lhs); + output_grouped = AlignGroupsWith( + GroupShardingOnDims(HloSharding::Tile(reshaped_output_tiling), + output_dims), + lhs_grouped); + } else { + auto reshaped_lhs_tiling = lhs.sharding().tile_assignment(); + reshaped_lhs_tiling.Reshape(lhs_sharding_dims_adjusted_to_output); + lhs_grouped = AlignGroupsWith( + GroupShardingOnDims(HloSharding::Tile(reshaped_lhs_tiling), lhs_dims), + output_grouped); + lhs = lhs.Reshard(UngroupSharding(lhs_grouped)); + auto reshaped_rhs_tiling = rhs.sharding().tile_assignment(); + reshaped_rhs_tiling.Reshape(rhs_sharding_dims_adjusted_to_output); + rhs_grouped = AlignGroupsWith( + GroupShardingOnDims(HloSharding::Tile(reshaped_rhs_tiling), rhs_dims), + output_grouped); + rhs = rhs.Reshard(UngroupSharding(rhs_grouped)); + } + auto per_group_partitioner_state = CreatePerGroupPartitioningState( + lhs.state(), lhs_grouped.device_groups, b); + lhs.hlo()->set_sharding(lhs_grouped.sharding); + rhs.hlo()->set_sharding(rhs_grouped.sharding); + CHECK(lhs.hlo() != rhs.hlo() || lhs_grouped.sharding == rhs_grouped.sharding); + TF_ASSIGN_OR_RETURN( + auto dot, + PartitionDot( + PartitionedHlo(lhs.hlo(), + GetPerGroupBaseShape(lhs_grouped, lhs.base_shape()), + per_group_partitioner_state), + PartitionedHlo(rhs.hlo(), + GetPerGroupBaseShape(rhs_grouped, rhs.base_shape()), + per_group_partitioner_state), + GetPerGroupBaseShape(output_grouped, output_base_shape), + output_grouped.sharding, dims_mapping, + num_partitions / lhs_grouped.device_groups.size(), create_sharded_dot, + module, original_hlo, threshold_for_windowed_einsum_mib, b, + windowed_dot_general_loops)); + // Reset the LHS sharding to the ungrouped one. + lhs.hlo()->set_sharding(UngroupSharding(lhs_grouped)); + rhs.hlo()->set_sharding(UngroupSharding(rhs_grouped)); + dot->set_sharding(UngroupSharding(output_grouped)); + return PartitionedHlo(dot, output_base_shape, lhs.state()) + .Reshard(output_sharding) + .hlo(); +} + +StatusOr PartitionDotGroupOnNonContracting( + bool lhs_matching, PartitionedHlo matching, PartitionedHlo other, + int64 matching_contracting_partitions, int64 other_contracting_partitions, + int64 matching_non_contracting_partitions, + int64 other_non_contracting_partitions, + int64 output_other_non_contracting_partitions, + const Shape& output_base_shape, const HloSharding& output_sharding, + const DotGeneralDimsMapping& dims_mapping, int64 num_partitions, + const std::function( + HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot, + HloModule* module, HloInstruction* original_hlo, + int64 threshold_for_windowed_einsum_mib, SpmdBuilder* b, + std::vector* + windowed_dot_general_loops) { + const bool may_replicate_other_contracting_dims = + (other_contracting_partitions == matching_non_contracting_partitions && + other_non_contracting_partitions == + output_other_non_contracting_partitions); + const bool may_replicate_other_non_contracting_dims = + matching_non_contracting_partitions == other_non_contracting_partitions && + matching_contracting_partitions == other_contracting_partitions; + std::vector other_group_dims; + if (may_replicate_other_contracting_dims && + (!may_replicate_other_non_contracting_dims || + ShapeUtil::ByteSizeOf(other.base_shape()) <= + ShapeUtil::ByteSizeOf(output_base_shape))) { + for (const auto& dim : dims_mapping.contracting_dims) { + other_group_dims.push_back(lhs_matching ? dim.rhs : dim.lhs); + } + } else if (may_replicate_other_non_contracting_dims) { + for (const auto& dim : lhs_matching + ? dims_mapping.rhs_non_contracting_dims + : dims_mapping.lhs_non_contracting_dims) { + other_group_dims.push_back(lhs_matching ? dim.rhs : dim.lhs); + } + } else { + return nullptr; + } + auto matching_sharding_dims = + matching.sharding().tile_assignment().dimensions(); + std::vector matching_dims; + std::vector output_dims; + // Make sure the partitioning on matching's non-contracting dimensions + // defines the same device groups for both matching and output. + for (const auto& dim : lhs_matching ? dims_mapping.lhs_non_contracting_dims + : dims_mapping.rhs_non_contracting_dims) { + int64 md = lhs_matching ? dim.lhs : dim.rhs; + matching_sharding_dims[md] = + output_sharding.tile_assignment().dim(dim.output); + matching_dims.push_back(md); + output_dims.push_back(dim.output); + } + auto output_grouped = GroupShardingOnDims(output_sharding, output_dims); + auto reshaped_matching_tiling = matching.sharding().tile_assignment(); + reshaped_matching_tiling.Reshape(matching_sharding_dims); + auto matching_grouped = AlignGroupsWith( + GroupShardingOnDims(HloSharding::Tile(reshaped_matching_tiling), + matching_dims), + output_grouped); + matching = matching.Reshard(UngroupSharding(matching_grouped)); + + auto other_grouped = + AlignGroupsWith(GroupShardingOnDims(other.sharding(), other_group_dims), + output_grouped, /*ignore_group_order=*/true); + other = other.Reshard(UngroupSharding(other_grouped)); + auto partially_replicated_other = + other.ReplicatePartial(other_grouped.group_dims); + auto per_group_partitioner_state = CreatePerGroupPartitioningState( + matching.state(), matching_grouped.device_groups, b); + matching.hlo()->set_sharding(matching_grouped.sharding); + partially_replicated_other->set_sharding(other_grouped.sharding); + auto matching_p = PartitionedHlo( + matching.hlo(), + GetPerGroupBaseShape(matching_grouped, matching.base_shape()), + per_group_partitioner_state); + auto other_p = PartitionedHlo(partially_replicated_other, other.base_shape(), + per_group_partitioner_state); + TF_ASSIGN_OR_RETURN( + auto dot, + PartitionDot(lhs_matching ? matching_p : other_p, + lhs_matching ? other_p : matching_p, + GetPerGroupBaseShape(output_grouped, output_base_shape), + output_grouped.sharding, dims_mapping, + num_partitions / matching_grouped.device_groups.size(), + create_sharded_dot, module, original_hlo, + threshold_for_windowed_einsum_mib, b, + windowed_dot_general_loops)); + // Reset matching's sharding to the ungrouped one. + matching.hlo()->set_sharding(UngroupSharding(matching_grouped)); + return dot; +} + +// Recursive partitioning function. If there are partial dimensions matching in +// the operands and output, group the devices and recursively partition the +// in-group dot. +StatusOr PartitionDot( + PartitionedHlo lhs, PartitionedHlo rhs, const Shape& output_base_shape, + const HloSharding& output_sharding, + const DotGeneralDimsMapping& dims_mapping, int64 num_partitions, + const std::function( + HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot, + HloModule* module, HloInstruction* original_hlo, + int64 threshold_for_windowed_einsum_mib, SpmdBuilder* b, + std::vector* + windowed_dot_general_loops) { + // lhs_rhs_or_output: 0 lhs, 1 rhs, 2 output. + auto get_partitions_for_dims = + [&](const HloSharding& sharding, + absl::Span dims, + int lhs_rhs_or_output) { + int64 partitions = 1; + if (sharding.IsTileMaximal()) { + return partitions; + } + for (const auto& dim : dims) { + if (lhs_rhs_or_output == 0) { + partitions *= sharding.tile_assignment().dim(dim.lhs); + } else if (lhs_rhs_or_output == 1) { + partitions *= sharding.tile_assignment().dim(dim.rhs); + } else { + CHECK_EQ(lhs_rhs_or_output, 2); + partitions *= sharding.tile_assignment().dim(dim.output); + } + } + return partitions; + }; + const int64 lhs_batch_partitions = + get_partitions_for_dims(lhs.sharding(), dims_mapping.batch_dims, 0); + const int64 rhs_batch_partitions = + get_partitions_for_dims(rhs.sharding(), dims_mapping.batch_dims, 1); + const int64 output_batch_partitions = + get_partitions_for_dims(output_sharding, dims_mapping.batch_dims, 2); + const int64 lhs_contracting_partitions = + get_partitions_for_dims(lhs.sharding(), dims_mapping.contracting_dims, 0); + const int64 rhs_contracting_partitions = + get_partitions_for_dims(rhs.sharding(), dims_mapping.contracting_dims, 1); + const int64 lhs_non_contracting_partitions = get_partitions_for_dims( + lhs.sharding(), dims_mapping.lhs_non_contracting_dims, 0); + const int64 rhs_non_contracting_partitions = get_partitions_for_dims( + rhs.sharding(), dims_mapping.rhs_non_contracting_dims, 1); + const int64 output_lhs_non_contracting_partitions = get_partitions_for_dims( + output_sharding, dims_mapping.lhs_non_contracting_dims, 2); + const int64 output_rhs_non_contracting_partitions = get_partitions_for_dims( + output_sharding, dims_mapping.rhs_non_contracting_dims, 2); + TF_ASSIGN_OR_RETURN( + auto try_partitioned_dot, + PartitionBaseCase( + lhs, rhs, output_base_shape, output_sharding, dims_mapping, + num_partitions, create_sharded_dot, module, original_hlo, + lhs_batch_partitions, rhs_batch_partitions, output_batch_partitions, + lhs_contracting_partitions, rhs_contracting_partitions, + lhs_non_contracting_partitions, rhs_non_contracting_partitions, + output_lhs_non_contracting_partitions, + output_rhs_non_contracting_partitions, + threshold_for_windowed_einsum_mib, b, windowed_dot_general_loops)); + if (try_partitioned_dot) { + return try_partitioned_dot; } - return DefaultAction(hlo); + // Recursively partition on different types of dimensions. + // + // Case 1: Group partitions by batch. + if (lhs_batch_partitions == rhs_batch_partitions && + lhs_batch_partitions == output_batch_partitions && + lhs_batch_partitions > 1) { + TF_ASSIGN_OR_RETURN( + auto dot, + PartitionDotGroupOnBatch( + lhs, rhs, output_base_shape, output_sharding, dims_mapping, + num_partitions, create_sharded_dot, module, original_hlo, + threshold_for_windowed_einsum_mib, b, windowed_dot_general_loops)); + if (dot) { + return dot; + } + } + + // Case 2: Group partitions by non-contracting dimensions. + const bool may_group_on_lhs_non_contracting = + lhs_non_contracting_partitions == output_lhs_non_contracting_partitions && + lhs_non_contracting_partitions > 1; + const bool may_group_on_rhs_non_contracting = + rhs_non_contracting_partitions == output_rhs_non_contracting_partitions && + rhs_non_contracting_partitions > 1; + if (may_group_on_lhs_non_contracting || may_group_on_rhs_non_contracting) { + // If both match output non-contracting dimensions, choose the one which + // will result in smaller replication of the other operand. + const bool lhs_matching = + may_group_on_lhs_non_contracting && + (!may_group_on_rhs_non_contracting || + lhs_non_contracting_partitions * + ShapeUtil::ByteSizeOf(rhs.hlo()->shape()) <= + rhs_non_contracting_partitions * + ShapeUtil::ByteSizeOf(lhs.hlo()->shape())); + + TF_ASSIGN_OR_RETURN( + auto dot, + PartitionDotGroupOnNonContracting( + lhs_matching, lhs_matching ? lhs : rhs, lhs_matching ? rhs : lhs, + lhs_matching ? lhs_contracting_partitions + : rhs_contracting_partitions, + lhs_matching ? rhs_contracting_partitions + : lhs_contracting_partitions, + lhs_matching ? lhs_non_contracting_partitions + : rhs_non_contracting_partitions, + lhs_matching ? rhs_non_contracting_partitions + : lhs_non_contracting_partitions, + lhs_matching ? output_rhs_non_contracting_partitions + : output_lhs_non_contracting_partitions, + output_base_shape, output_sharding, dims_mapping, num_partitions, + create_sharded_dot, module, original_hlo, + threshold_for_windowed_einsum_mib, b, windowed_dot_general_loops)); + if (dot) { + return dot; + } + } + + // Default action. + TF_ASSIGN_OR_RETURN(auto dot, create_sharded_dot(lhs.Replicate().hlo(), + rhs.Replicate().hlo(), b)); + dot->set_sharding(HloSharding::Replicate()); + return PartitionedHlo(dot, output_base_shape, lhs.state()) + .Reshard(output_sharding) + .hlo(); +} + +} // namespace + +Status SpmdPartitioningVisitor::HandleDotHelper( + HloInstruction* hlo, const DotGeneralDimsMapping& dims_mapping, + const std::function( + HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot) { + auto& lhs = GetPartitionedHlo(hlo->operand(0)); + auto& rhs = GetPartitionedHlo(hlo->operand(1)); + TF_ASSIGN_OR_RETURN( + auto partitioned_dot, + PartitionDot(lhs, rhs, hlo->shape(), hlo->sharding(), dims_mapping, + num_partitions_, create_sharded_dot, module_, hlo, + options_.threshold_for_windowed_einsum_mib, &b_, + &windowed_dot_general_loops_)); + SetPartitionedHlo(hlo, [&] { return partitioned_dot; }); + return Status::OK(); } namespace { @@ -780,6 +1082,7 @@ Status SinkInputNodesIntoWindowedDotGeneralLoopOnContractingDimensions( [](const HloInstruction* a, const HloInstruction* b) { return a->unique_id() < b->unique_id(); }); + worklist.reserve(nullaries_to_sink.size()); for (auto inst : nullaries_to_sink) { worklist.push_back(inst); } diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc index bac5c812814..7aaa3e32b2a 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc @@ -165,16 +165,6 @@ template namespace { -// Returns the replica group configuration where each replica belongs to its own -// group. -std::vector CreateReplicaGroups(int64 num_replicas) { - std::vector groups(num_replicas); - for (int64 i = 0; i < num_replicas; ++i) { - groups[i].add_replica_ids(i); - } - return groups; -} - // Clears all sharding attributes from instructions in the module. This must be // called only after all SPMD transformation is complete. Status ClearShardingAttributes(HloModule* module) { @@ -195,6 +185,28 @@ Status ClearShardingAttributes(HloModule* module) { return Status::OK(); } +std::vector> GetPartitionGroupsForReplication( + const HloSharding& sharding, absl::Span replication_dims) { + int64 group_size = 1; + for (int64 i : replication_dims) { + group_size *= sharding.tile_assignment().dim(i); + } + std::vector> partition_groups( + sharding.tile_assignment().num_elements() / group_size); + sharding.tile_assignment().Each( + [&](absl::Span indices, int64 partition) { + int64 group_id = 0; + for (int64 i = 0; i < indices.size(); ++i) { + if (!absl::c_linear_search(replication_dims, i)) { + group_id *= sharding.tile_assignment().dim(i); + group_id += indices[i]; + } + } + partition_groups[group_id].push_back(partition); + }); + return partition_groups; +} + } // namespace HloInstruction* SpmdBuilder::AddInstruction( @@ -664,42 +676,57 @@ PartitionedHlo PartitionedHlo::Replicate() { } // 'Tiled' to 'Replicated'. + std::vector all_dims(shape.rank()); + std::iota(all_dims.begin(), all_dims.end(), 0); + HloInstruction* result = ReplicatePartial(all_dims); + result->set_sharding(HloSharding::Replicate()); + return update_cache(PartitionedHlo(result, base_shape_, state_)); +} + +HloInstruction* PartitionedHlo::ReplicatePartial(absl::Span dims) { + CHECK(!sharding().IsTileMaximal()); + const Shape& shard_shape = hlo()->shape(); + Shape target_shape = shard_shape; + Shape padded_target_shape = shard_shape; + for (int64 i : dims) { + padded_target_shape.set_dimensions( + i, shard_shape.dimensions(i) * sharding().tile_assignment().dim(i)); + target_shape.set_dimensions(i, base_shape().dimensions(i)); + } + HloInstruction* result = nullptr; if (state_.collective_ops_creator.create_cross_partition_all_gather) { - result = state_.partitioner->AllGatherShards(state_.b, hlo_, sharding, - NewChannel()); - } - Shape padded_base_shape = shape; - for (int64 i = 0; i < padded_base_shape.rank(); ++i) { - padded_base_shape.set_dimensions( - i, shape.dimensions(i) * sharding.tile_assignment().dim(i)); + result = state_.partitioner->AllGatherShards(state_.b, hlo_, sharding(), + NewChannel(), dims, + state_.collective_ops_creator); } if (result == nullptr) { auto zero = state_.b->AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(shape.element_type()))); + LiteralUtil::Zero(shard_shape.element_type()))); auto zero_bcast = state_.b->AddInstruction( - HloInstruction::CreateBroadcast(padded_base_shape, zero, {})); + HloInstruction::CreateBroadcast(padded_target_shape, zero, {})); + auto offsets = MakePartitionOffsets(padded_target_shape, sharding(), + state_.partition_id, state_.b, dims); auto dus = state_.b->AddInstruction(HloInstruction::CreateDynamicUpdateSlice( - padded_base_shape, zero_bcast, hlo_, - MakePartitionOffsets(padded_base_shape, sharding, - state_.partition_id, state_.b))); + padded_target_shape, zero_bcast, hlo_, offsets)); HloComputation* reduction = - MakeBinaryAdd(shape.element_type(), state_.module); + MakeBinaryAdd(shard_shape.element_type(), state_.module); auto all_reduce = state_.collective_ops_creator.create_cross_partition_all_reduce( - state_.b, dus, reduction, NewChannel()); + state_.b, dus, reduction, + GetPartitionGroupsForReplication(sharding(), dims), NewChannel()); result = all_reduce; } - if (!ShapeUtil::Compatible(base_shape_, padded_base_shape)) { - std::vector start_indices(shape.rank(), 0); - std::vector strides(shape.rank(), 1); - result = state_.b->AddInstruction(HloInstruction::CreateSlice( - base_shape_, result, start_indices, base_shape_.dimensions(), strides)); + if (!ShapeUtil::Compatible(target_shape, padded_target_shape)) { + std::vector start_indices(target_shape.rank(), 0); + std::vector strides(target_shape.rank(), 1); + result = state_.b->AddInstruction( + HloInstruction::CreateSlice(target_shape, result, start_indices, + base_shape_.dimensions(), strides)); } - result->set_sharding(HloSharding::Replicate()); - return update_cache(PartitionedHlo(result, base_shape_, state_)); + return result; } PartitionedHlo PartitionedHlo::Broadcast() const { @@ -728,7 +755,7 @@ PartitionedHlo PartitionedHlo::Broadcast() const { MakeBinaryAdd(shape.element_type(), state_.module); auto result = state_.collective_ops_creator.create_cross_partition_all_reduce( - state_.b, operand, reduction, NewChannel()); + state_.b, operand, reduction, {}, NewChannel()); result->set_sharding(HloSharding::Replicate()); return PartitionedHlo(result, base_shape_, state_); } @@ -796,7 +823,7 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll( auto padded_hlo = PadToShape(hlo_, padded_shape, state_.b); // The order of ids in the group must follow the temp_target sharding. - std::vector groups( + std::vector> groups( temp_target.tile_assignment().num_elements() / group_size); temp_target.tile_assignment().Each( [&](absl::Span indices, int64 device) { @@ -810,7 +837,7 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll( group_id += indices[dim]; } } - groups[group_id].add_replica_ids(device); + groups[group_id].push_back(device); }); HloInstruction* result = nullptr; @@ -1027,7 +1054,7 @@ Status SpmdPartitioningVisitor::HandleConcatenate(HloInstruction* hlo) { offset += operand->shape().dimensions(dimension); } auto all_reduce = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, temp_output, MakeBinaryAdd(hlo->shape().element_type(), module_), + &b_, temp_output, MakeBinaryAdd(hlo->shape().element_type(), module_), {}, NewChannel()); SetPartitionedHlo(hlo, [&] { auto start_indices = @@ -2153,7 +2180,7 @@ Status SpmdPartitioningVisitor::HandleGather(HloInstruction* hlo) { // Combine from different partitions. auto ar = collective_ops_creator_.create_cross_partition_all_reduce( &b_, filtered, - MakeBinaryAdd(filtered->shape().element_type(), module_), + MakeBinaryAdd(filtered->shape().element_type(), module_), {}, NewChannel()); ar->set_sharding(HloSharding::Replicate()); SetPartitionedHlo(hlo, [&]() { @@ -2449,7 +2476,7 @@ Status SpmdPartitioningVisitor::HandleReduce(HloInstruction* hlo) { if (reduce_sharded_dimension) { CHECK(local_reduce->shape().IsArray()); reduce = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, local_reduce, hlo->to_apply(), NewChannel()); + &b_, local_reduce, hlo->to_apply(), {}, NewChannel()); reduce->set_sharding(HloSharding::Replicate()); } else { reduce = local_reduce; @@ -2917,13 +2944,36 @@ SPMDCollectiveOpsCreator GetDefaultCollectiveOpsCreator(int64 num_partitions, [](SpmdBuilder* b) { return b->AddInstruction(HloInstruction::CreatePartitionId()); }, - [num_replicas](SpmdBuilder* b, HloInstruction* operand, - HloComputation* reduction, int64 channel_id) { + [num_replicas, num_partitions]( + SpmdBuilder* b, HloInstruction* operand, HloComputation* reduction, + const std::vector>& partition_subgroups, + int64 channel_id) { + if (partition_subgroups.size() <= 1) { + std::vector groups(num_replicas); + // TODO(yuanzx): Unify subgroup definition with AllToAll. + for (int64 i = 0; i < num_replicas; ++i) { + groups[i].add_replica_ids(i); + } + return b->AddInstruction(HloInstruction::CreateAllReduce( + operand->shape(), {operand}, reduction, groups, + /*constrain_layout=*/false, channel_id, + /*use_global_device_ids=*/false)); + } + + std::vector device_groups; + device_groups.reserve(partition_subgroups.size() * num_replicas); + for (int64 i = 0; i < num_replicas; ++i) { + for (const auto& pgroup : partition_subgroups) { + device_groups.emplace_back(); + for (int64 pid : pgroup) { + device_groups.back().add_replica_ids(i * num_partitions + pid); + } + } + } return b->AddInstruction(HloInstruction::CreateAllReduce( - operand->shape(), {operand}, reduction, - CreateReplicaGroups(num_replicas), + operand->shape(), {operand}, reduction, device_groups, /*constrain_layout=*/false, channel_id, - /*use_global_device_ids=*/false)); + /*use_global_device_ids=*/true)); }, [](SpmdBuilder* b, HloInstruction* operand, std::vector>& src_dst_pairs, @@ -2932,14 +2982,20 @@ SPMDCollectiveOpsCreator GetDefaultCollectiveOpsCreator(int64 num_partitions, operand->shape(), operand, src_dst_pairs, channel_id)); }, [](SpmdBuilder* b, absl::Span operands, - const std::vector& replica_groups, int64 channel_id, - absl::optional split_dimension) { + const std::vector>& partition_subgroups, + int64 channel_id, absl::optional split_dimension) { std::vector shapes(operands.size(), operands[0]->shape()); const Shape output_shape = (shapes.size() == 1) ? shapes[0] : ShapeUtil::MakeTupleShape(shapes); + std::vector groups(partition_subgroups.size()); + for (int64 i = 0; i < groups.size(); ++i) { + for (int64 id : partition_subgroups[i]) { + groups[i].add_replica_ids(id); + } + } return b->AddInstruction(HloInstruction::CreateAllToAll( - output_shape, operands, replica_groups, + output_shape, operands, groups, /*constrain_layout=*/false, channel_id, split_dimension)); }, [num_replicas, num_partitions]( @@ -2970,10 +3026,10 @@ SpmdPartitioner::SpmdPartitioner(int64 num_partitions, int64 num_replicas, num_partitions, num_replicas, std::move(options), GetDefaultCollectiveOpsCreator(num_partitions, num_replicas)) {} -HloInstruction* SpmdPartitioner::AllGatherShards(SpmdBuilder* b, - HloInstruction* operand, - const HloSharding& sharding, - int64 channel_id) { +HloInstruction* SpmdPartitioner::AllGatherShards( + SpmdBuilder* b, HloInstruction* operand, const HloSharding& sharding, + int64 channel_id, absl::Span selected_dims, + const SPMDCollectiveOpsCreator& collectives_creator) { CHECK(!sharding.IsTileMaximal()); // Add one leading dimension to gather all partitions. std::vector shape; @@ -2983,18 +3039,17 @@ HloInstruction* SpmdPartitioner::AllGatherShards(SpmdBuilder* b, } auto reshape = b->AddInstruction(HloInstruction::CreateReshape( ShapeUtil::MakeShape(operand->shape().element_type(), shape), operand)); - std::vector> partition_subgroups(1); - for (int64 pid : sharding.tile_assignment()) { - partition_subgroups[0].push_back(pid); - } - shape[0] = sharding.tile_assignment().num_elements(); - auto result = collective_ops_creator_.create_cross_partition_all_gather( + auto partition_subgroups = + GetPartitionGroupsForReplication(sharding, selected_dims); + shape[0] = partition_subgroups[0].size(); + auto result = collectives_creator.create_cross_partition_all_gather( b, reshape, ShapeUtil::MakeShape(operand->shape().element_type(), shape), partition_subgroups, channel_id, /*all_gather_dimension=*/0); // If n > 1 dimensions are partitioned, split the leading dimension to n. std::vector tiled_dims; for (int64 i = 0; i < sharding.tile_assignment().num_dimensions(); ++i) { - if (sharding.tile_assignment().dim(i) > 1) { + if (sharding.tile_assignment().dim(i) > 1 && + absl::c_linear_search(selected_dims, i)) { tiled_dims.push_back(i); } } @@ -3016,7 +3071,8 @@ HloInstruction* SpmdPartitioner::AllGatherShards(SpmdBuilder* b, std::vector xpose_permutation(result->shape().rank()); int64 split_dims_added = 0; for (int64 i = 0; i < xpose_permutation.size(); ++i) { - if (sharding.tile_assignment().dim(i - split_dims_added) == 1) { + if (sharding.tile_assignment().dim(i - split_dims_added) == 1 || + !absl::c_linear_search(selected_dims, i - split_dims_added)) { xpose_permutation[i] = i + tiled_dims.size() - split_dims_added; } else { xpose_permutation[i] = split_dims_added; diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h index 606a7ae5f14..d844ac3af1f 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h @@ -20,6 +20,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "absl/types/optional.h" #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" @@ -82,8 +83,10 @@ struct SPMDCollectiveOpsCreator { std::function create_partition_id; // Function used to create a cross-partition all-reduce HLO. - std::function + std::function>& partition_subgroups, + int64 channel_id)> create_cross_partition_all_reduce; // Function used to create a cross-partition collective-permute HLO. @@ -96,8 +99,8 @@ struct SPMDCollectiveOpsCreator { // Function used to create a cross-partition all-to-all HLO. std::function operands, - const std::vector& replica_groups, int64 channel_id, - absl::optional split_dimension)> + const std::vector>& partition_subgroups, + int64 channel_id, absl::optional split_dimension)> create_cross_partition_all_to_all; // Function used to create a cross-partition all-gather HLO. This is optional: @@ -169,10 +172,13 @@ class SpmdPartitioner : public HloModulePass { // The default uses a single all-gather even if there are multiple sharded // dimensions, and adds potential reshapes and transposes to achieve that. // If it returns false, the partitioner will fall back to all-reduce. - virtual HloInstruction* AllGatherShards(SpmdBuilder* b, - HloInstruction* operand, - const HloSharding& sharding, - int64 channel_id); + // `selected_dims` specifies the dimensions along which the all-gather happens + // in the tiled sharding, which allows potentially creating a subgroup + // all-gather. + virtual HloInstruction* AllGatherShards( + SpmdBuilder* b, HloInstruction* operand, const HloSharding& sharding, + int64 channel_id, absl::Span selected_dims, + const SPMDCollectiveOpsCreator& collectives_creator); protected: virtual std::unique_ptr CreateVisitor( @@ -215,7 +221,12 @@ class PartitionedHlo { std::tuple> window_reshard_cache; }; + // Use std::unordered_map for pointer stability. std::unordered_map per_hlo_cache; + // Caches for nested partitioning of grouped sharding. Each string key + // represents a unique way of grouping devices. + absl::flat_hash_map> + groupd_caches; }; struct PartitioningState { SpmdBuilder* b; @@ -270,15 +281,18 @@ class PartitionedHlo { const PartitioningState& state() const { return state_; } + // Helper function to replicate the data on all devices. Could only modify + // the reshard cache. + PartitionedHlo Replicate(); + + // Helper function to replicate the data for partitions along the given dims. + HloInstruction* ReplicatePartial(absl::Span dims); + private: // Same as Reshard except that it does not explicitly modify the reshard // cache, although it would indirectly modify by calling Replicate(). PartitionedHlo ReshardNoCache(const HloSharding& target); - // Helper function to replicate the data on all devices. Could only modify - // the reshard cache. - PartitionedHlo Replicate(); - // Helper function to broadcast data from a single device to all devices. PartitionedHlo Broadcast() const; @@ -417,6 +431,16 @@ class SpmdPartitioningVisitor : public DfsHloVisitorWithDefault { StatusOr DoPartition(HloComputation* computation, const HloSharding& root_sharding); + // Information about a loop created for windowed dot-general. Used when + // DoCodeMotionForWindowedDotGeneralLoops() executes after the visitor + // finishes traversing the graph. + struct WindowedDotGeneralLoop { + HloInstruction* while_loop; + int64 windowed_operand; + bool windowed_in_contracting_dims; + bool windowed_in_batch_dims; + }; + private: Status Preprocess(HloInstruction* hlo) override; Status Postprocess(HloInstruction* hlo) override; @@ -445,15 +469,6 @@ class SpmdPartitioningVisitor : public DfsHloVisitorWithDefault { // partitioned instruction. ConstHloInstructionMap partitioned_instructions_; - // Information about a loop created for windowed dot-general. Used when - // DoCodeMotionForWindowedDotGeneralLoops() executes after the visitor - // finishes traversing the graph. - struct WindowedDotGeneralLoop { - HloInstruction* while_loop; - int64 windowed_operand; - bool windowed_in_contracting_dims; - bool windowed_in_batch_dims; - }; std::vector windowed_dot_general_loops_; HloInstruction* visiting_hlo_; diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc index 1045d1187b8..5f3fd8d53e7 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc @@ -2218,7 +2218,7 @@ ENTRY entry { TF_ASSERT_OK_AND_ASSIGN(auto module, PartitionComputation(hlo_string, /*num_devices=*/2)); - std::cout << module->ToString(); + VLOG(1) << module->ToString(); auto sort = FindInstruction(module.get(), "sort"); EXPECT_EQ(sort->operand(0)->shape().dimensions(1), 209664); EXPECT_EQ(sort->operand(1)->shape().dimensions(1), 209664); @@ -2294,7 +2294,7 @@ ENTRY entry TF_ASSERT_OK_AND_ASSIGN(auto module, PartitionComputation(hlo_string, /*num_devices=*/2)); - std::cout << module->ToString(); + VLOG(1) << module->ToString(); auto sort = FindInstruction(module.get(), "sort"); EXPECT_EQ(sort->operand(0)->shape().dimensions(1), 209664); EXPECT_EQ(sort->operand(1)->shape().dimensions(1), 209664); @@ -3842,6 +3842,154 @@ ENTRY entry { EXPECT_THAT(root, op::Copy(op::CollectivePermute(reshape2))); } +TEST_F(SpmdPartitioningTest, Dot2DPartitionedNonContractingAndContracting0) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %lhs = f32[48,12] parameter(0), sharding={devices=[2,2]0,1,2,3} + %rhs = f32[32,12] parameter(1), sharding={devices=[2,2]0,1,2,3} + ROOT %dot = f32[48,32] dot(%lhs, %rhs), + lhs_batch_dims={}, rhs_batch_dims={}, + lhs_contracting_dims={1}, rhs_contracting_dims={1}, + sharding={devices=[2,2]0,1,2,3} +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/4)); + VLOG(1) << module->ToString(); + + auto lhs = AllOf(op::Shape("f32[24,6]"), op::Parameter(0)); + auto partial_replicated_lhs = + AllOf(op::Shape("f32[24,12]"), + op::AllReduce(op::DynamicUpdateSlice(_, lhs, _, _))); + auto rhs = AllOf(op::Shape("f32[16,6]"), op::Parameter(1)); + auto partial_replicated_rhs = + AllOf(op::Shape("f32[16,12]"), op::AllReduce(op::DynamicUpdateSlice( + _, op::CollectivePermute(rhs), _, _))); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, + AllOf(op::Dot(partial_replicated_lhs, partial_replicated_rhs), + op::Shape("f32[24,16]"))); +} + +TEST_F(SpmdPartitioningTest, Dot2DPartitionedNonContractingAndContracting1) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %lhs = f32[48,100] parameter(0), sharding={devices=[2,2]0,1,2,3} + %rhs = f32[32,100] parameter(1), sharding={devices=[2,2]0,1,2,3} + ROOT %dot = f32[48,32] dot(%lhs, %rhs), + lhs_batch_dims={}, rhs_batch_dims={}, + lhs_contracting_dims={1}, rhs_contracting_dims={1}, + sharding={devices=[2,2]0,1,2,3} +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/4)); + VLOG(1) << module->ToString(); + + auto lhs = AllOf(op::Shape("f32[24,50]"), op::Parameter(0)); + auto rhs = AllOf(op::Shape("f32[16,50]"), op::Parameter(1)); + auto partial_replicated_rhs = + AllOf(op::Shape("f32[32,50]"), + op::AllReduce(op::DynamicUpdateSlice(_, rhs, _, _))); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT( + root, AllOf(op::Shape("f32[24,16]"), + op::DynamicSlice( + op::AllReduce(AllOf(op::Dot(lhs, partial_replicated_rhs), + op::Shape("f32[24,32]"))), + _, _))); +} + +TEST_F(SpmdPartitioningTest, Dot2DPartitionedBatchAndNonContracting) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %lhs = f32[4,24,100] parameter(0), sharding={devices=[2,2,1]0,1,2,3} + %rhs = f32[4,32,100] parameter(1), sharding={devices=[2,2,1]0,1,2,3} + ROOT %dot = f32[4,24,32] dot(%lhs, %rhs), + lhs_batch_dims={0}, rhs_batch_dims={0}, + lhs_contracting_dims={2}, rhs_contracting_dims={2}, + sharding={devices=[2,2,1]0,1,2,3} +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/4)); + VLOG(1) << module->ToString(); + + auto lhs = AllOf(op::Shape("f32[2,12,100]"), op::Parameter(0)); + auto rhs = AllOf(op::Shape("f32[2,16,100]"), op::Parameter(1)); + auto partial_replicated_rhs = + AllOf(op::Shape("f32[2,32,100]"), + op::AllReduce(op::DynamicUpdateSlice(_, rhs, _, _, _))); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, AllOf(op::Shape("f32[2,12,32]"), + op::Dot(lhs, partial_replicated_rhs))); +} + +TEST_F(SpmdPartitioningTest, + Dot2DPartitionedBatchNonContractingAndContracting) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %lhs = f32[4,24,100] parameter(0), sharding={devices=[2,1,2]0,1,2,3} + %rhs = f32[4,32,100] parameter(1), sharding={devices=[2,2,1]0,1,2,3} + ROOT %dot = f32[4,24,32] dot(%lhs, %rhs), + lhs_batch_dims={0}, rhs_batch_dims={0}, + lhs_contracting_dims={2}, rhs_contracting_dims={2}, + sharding={devices=[2,1,2]0,1,2,3} +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/4)); + VLOG(1) << module->ToString(); + + auto lhs = AllOf(op::Shape("f32[2,24,50]"), op::Parameter(0)); + auto rhs = AllOf(op::Shape("f32[2,16,100]"), op::Parameter(1)); + auto partial_replicated_lhs = + AllOf(op::Shape("f32[2,24,100]"), + op::AllReduce(op::DynamicUpdateSlice(_, lhs, _, _, _))); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, AllOf(op::Shape("f32[2,24,16]"), + op::Dot(partial_replicated_lhs, rhs))); +} + +TEST_F(SpmdPartitioningTest, Dot2DPartitionedBatchAndReshard) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %lhs = f32[4,8,24,100] parameter(0), sharding={devices=[2,1,2,1]0,1,2,3} + %rhs = f32[4,8,32,100] parameter(1), sharding={devices=[2,1,2,1]0,1,2,3} + ROOT %dot = f32[4,8,24,32] dot(%lhs, %rhs), + lhs_batch_dims={0,1}, rhs_batch_dims={0,1}, + lhs_contracting_dims={3}, rhs_contracting_dims={3}, + sharding={devices=[1,2,2,1]0,1,2,3} +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/4)); + VLOG(1) << module->ToString(); + + auto lhs = AllOf(op::Shape("f32[2,8,12,100]"), op::Parameter(0)); + auto rhs = AllOf(op::Shape("f32[2,8,16,100]"), op::Parameter(1)); + auto partial_replicated_rhs = + AllOf(op::Shape("f32[2,8,32,100]"), + op::AllReduce(op::DynamicUpdateSlice(_, rhs, _, _, _, _))); + auto dot = + AllOf(op::Shape("f32[2,8,12,32]"), op::Dot(lhs, partial_replicated_rhs)); + auto reshape = AllOf(op::Shape("f32[2,2,4,12,32]"), op::Reshape(dot)); + auto all_to_all = AllOf(op::Shape("f32[2,2,4,12,32]"), op::AllToAll(reshape)); + auto xpose = AllOf(op::Shape("f32[2,2,4,12,32]"), op::Transpose(all_to_all)); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, AllOf(op::Shape("f32[4,4,12,32]"), op::Reshape(xpose))); +} + } // namespace } // namespace spmd } // namespace xla diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc index 6beed5a15e5..454a1da4646 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc @@ -16,7 +16,12 @@ limitations under the License. #include "tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h" #include +#include +#include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" +#include "absl/memory/memory.h" +#include "absl/strings/str_join.h" #include "absl/types/optional.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/hlo_casting_utils.h" @@ -143,10 +148,10 @@ Shape MakeNonPaddedShapeForGivenPartition(const Shape& shape, return partition_shape; } -std::vector MakePartitionOffsets(const Shape& shape, - const HloSharding& sharding, - HloInstruction* partition_id, - SpmdBuilder* b) { +std::vector MakePartitionOffsets( + const Shape& shape, const HloSharding& sharding, + HloInstruction* partition_id, SpmdBuilder* b, + absl::Span dims) { CHECK(!shape.IsTuple()); Array2D offset_array( @@ -158,7 +163,8 @@ std::vector MakePartitionOffsets(const Shape& shape, LiteralUtil::CreateR2FromArray2D(offset_array))); std::vector offsets; for (int64 i = 0; i < shape.rank(); ++i) { - if (sharding.tile_assignment().dim(i) == 1) { + if (sharding.tile_assignment().dim(i) == 1 || + (!dims.empty() && !absl::c_linear_search(dims, i))) { offsets.push_back(b->AddInstruction( HloInstruction::CreateConstant(LiteralUtil::Zero(S32)))); } else { @@ -978,5 +984,255 @@ bool CanReshardWithCollectivePermute(const HloSharding& source, source.tile_assignment() != target.tile_assignment(); } +GroupedSharding GroupShardingOnDims(const HloSharding& sharding, + absl::Span group_dims) { + CHECK(!sharding.IsTileMaximal()); + std::vector grouped_tiling_dims = + sharding.tile_assignment().dimensions(); + std::vector group_dim_sizes(group_dims.size()); + for (int64 i = 0; i < group_dims.size(); ++i) { + group_dim_sizes[i] = grouped_tiling_dims[group_dims[i]]; + grouped_tiling_dims[group_dims[i]] = 1; + } + std::vector> device_groups(Product(group_dim_sizes)); + sharding.tile_assignment().Each( + [&](absl::Span indices, int64 device) { + int64 group_id = 0; + for (int64 dim : group_dims) { + group_id *= sharding.tile_assignment().dim(dim); + group_id += indices[dim]; + } + device_groups[group_id].push_back(device); + }); + Array grouped_tiling(grouped_tiling_dims); + grouped_tiling.FillIota(0); + return GroupedSharding( + std::move(device_groups), + std::vector(group_dims.begin(), group_dims.end()), + std::move(group_dim_sizes), sharding.tile_assignment().num_dimensions(), + HloSharding::Tile(grouped_tiling)); +} + +HloSharding UngroupSharding(const GroupedSharding& grouped_sharding) { + CHECK(!grouped_sharding.sharding.IsTileMaximal()); + std::vector tiling_dims = + grouped_sharding.sharding.tile_assignment().dimensions(); + for (int64 i = 0; i < grouped_sharding.group_dims.size(); ++i) { + tiling_dims[grouped_sharding.group_dims[i]] = + grouped_sharding.group_dim_sizes[i]; + } + Array tiling(tiling_dims); + grouped_sharding.sharding.tile_assignment().Each( + [&](absl::Span indices, int64 device) { + std::vector ungrouped_inds(indices.begin(), indices.end()); + for (int64 g = 0; g < grouped_sharding.device_groups.size(); ++g) { + int64 remaining_group_index = g; + for (int64 i = grouped_sharding.group_dims.size() - 1; i >= 0; --i) { + ungrouped_inds[grouped_sharding.group_dims[i]] = + remaining_group_index % grouped_sharding.group_dim_sizes[i]; + remaining_group_index /= grouped_sharding.group_dim_sizes[i]; + } + tiling(ungrouped_inds) = grouped_sharding.device_groups[g][device]; + } + }); + return HloSharding::Tile(tiling); +} + +GroupedSharding AlignGroupsWith(GroupedSharding grouped_sharding, + const GroupedSharding& reference, + bool ignore_group_order) { + // Returns src -> dst index mapping. + auto get_permutation = [](absl::Span src, + absl::Span dst) { + CHECK_EQ(src.size(), dst.size()); + absl::flat_hash_map dst_reverse_map; + for (int64 i = 0; i < dst.size(); ++i) { + dst_reverse_map[dst[i]] = i; + } + std::vector permutation(src.size()); + for (int64 i = 0; i < src.size(); ++i) { + auto it = dst_reverse_map.find(src[i]); + CHECK(it != dst_reverse_map.end()); + permutation[i] = it->second; + } + return permutation; + }; + CHECK_EQ(grouped_sharding.device_groups.size(), + reference.device_groups.size()); + absl::flat_hash_map device_to_ref_group; + for (int64 g = 0; g < reference.device_groups.size(); ++g) { + for (int64 device : reference.device_groups[g]) { + device_to_ref_group[device] = g; + } + } + auto unique_ref_dev_group = [&](absl::Span devices) -> int64 { + int64 ref_g = -1; + for (int64 device : devices) { + if (ref_g == -1) { + ref_g = device_to_ref_group[device]; + } else if (ref_g != device_to_ref_group[device]) { + return -1; + } + } + return ref_g; + }; + bool matching_groups = true; + std::vector original_src_to_ref_permutation; + for (int64 g = 0; g < grouped_sharding.device_groups.size(); ++g) { + int64 ref_g = unique_ref_dev_group(grouped_sharding.device_groups[g]); + if (ref_g < 0 || (!ignore_group_order && g != ref_g)) { + matching_groups = false; + break; + } + if (g == 0) { + original_src_to_ref_permutation = get_permutation( + grouped_sharding.device_groups[g], reference.device_groups[ref_g]); + } + } + if (matching_groups) { + auto tiles = grouped_sharding.sharding.tile_assignment(); + tiles.Each([&](absl::Span indices, int64* device) { + *device = original_src_to_ref_permutation[*device]; + }); + grouped_sharding.sharding = HloSharding::Tile(tiles); + } + grouped_sharding.device_groups = std::move(reference.device_groups); + return grouped_sharding; +} + +Shape GetPerGroupBaseShape(const GroupedSharding& grouped_sharding, + const Shape& original_base_shape) { + auto result = original_base_shape; + for (int64 i = 0; i < grouped_sharding.group_dims.size(); ++i) { + int64 dim = grouped_sharding.group_dims[i]; + int64 groups = grouped_sharding.group_dim_sizes[i]; + result.set_dimensions(dim, result.dimensions(dim) / groups); + } + return result; +} + +namespace { + +HloInstruction* GetInGroupPartitionId( + HloInstruction* partition_id, + const std::vector>& device_groups, SpmdBuilder* b) { + int64 total_devices = device_groups.size() * device_groups[0].size(); + std::vector in_group_ids(total_devices); + for (uint32 i = 0; i < device_groups.size(); ++i) { + for (uint32 j = 0; j < device_groups[i].size(); ++j) { + in_group_ids[device_groups[i][j]] = j; + } + } + auto id_table = b->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR1(in_group_ids))); + return b->AddInstruction(HloInstruction::CreateReshape( + ShapeUtil::MakeScalarShape(U32), + b->AddInstruction(HloInstruction::CreateDynamicSlice( + ShapeUtil::MakeShape(U32, {1}), id_table, {partition_id}, {1})))); +} + +SPMDCollectiveOpsCreator GetPerGroupCollectiveOpsCreator( + const SPMDCollectiveOpsCreator& creator, + const std::vector>& device_groups) { + SPMDCollectiveOpsCreator result; + result.create_partition_id = [creator, device_groups](SpmdBuilder* b) { + return GetInGroupPartitionId(creator.create_partition_id(b), device_groups, + b); + }; + auto expand_partition_groups = + [device_groups]( + const std::vector>& partition_subgroups) { + if (partition_subgroups.empty()) { + return device_groups; + } + std::vector> result(partition_subgroups.size() * + device_groups.size()); + for (int64 g = 0; g < device_groups.size(); ++g) { + for (int64 i = 0; i < partition_subgroups.size(); ++i) { + result[g * partition_subgroups.size() + i].resize( + partition_subgroups[i].size()); + for (int64 j = 0; j < partition_subgroups[i].size(); ++j) { + result[g * partition_subgroups.size() + i][j] = + device_groups[g][partition_subgroups[i][j]]; + } + } + } + return result; + }; + result.create_cross_partition_all_reduce = + [creator, expand_partition_groups]( + SpmdBuilder* b, HloInstruction* operand, HloComputation* reduction, + const std::vector>& partition_subgroups, + int64 channel_id) { + return creator.create_cross_partition_all_reduce( + b, operand, reduction, expand_partition_groups(partition_subgroups), + channel_id); + }; + result.create_cross_partition_collective_permute = + [creator, device_groups]( + SpmdBuilder* b, HloInstruction* operand, + std::vector>& src_dst_pairs, + int64 next_channel_id) { + std::vector> expanded_pairs( + src_dst_pairs.size() * device_groups.size()); + for (int64 g = 0; g < device_groups.size(); ++g) { + for (int64 i = 0; i < src_dst_pairs.size(); ++i) { + expanded_pairs[g * src_dst_pairs.size() + i] = + std::pair{ + device_groups[g][src_dst_pairs[i].first], + device_groups[g][src_dst_pairs[i].second]}; + } + } + return creator.create_cross_partition_collective_permute( + b, operand, expanded_pairs, next_channel_id); + }; + result.create_cross_partition_all_to_all = + [creator, expand_partition_groups]( + SpmdBuilder* b, absl::Span operands, + const std::vector>& partition_subgroups, + int64 channel_id, absl::optional split_dimension) { + return creator.create_cross_partition_all_to_all( + b, operands, expand_partition_groups(partition_subgroups), + channel_id, split_dimension); + }; + if (creator.create_cross_partition_all_gather) { + result.create_cross_partition_all_gather = + [creator, expand_partition_groups]( + SpmdBuilder* b, HloInstruction* operand, const Shape& ag_shape, + const std::vector>& partition_subgroups, + int64 channel_id, int64 all_gather_dimension) { + return creator.create_cross_partition_all_gather( + b, operand, ag_shape, + expand_partition_groups(partition_subgroups), channel_id, + all_gather_dimension); + }; + } + return result; +} + +} // namespace + +PartitionedHlo::PartitioningState CreatePerGroupPartitioningState( + const PartitionedHlo::PartitioningState& state, + const std::vector>& device_groups, SpmdBuilder* b) { + auto result = state; + result.collective_ops_creator = GetPerGroupCollectiveOpsCreator( + state.collective_ops_creator, device_groups); + result.partition_id = + GetInGroupPartitionId(state.partition_id, device_groups, b); + // Create a string key for the groups. + std::vector per_group_strings(device_groups.size()); + for (int64 i = 0; i < per_group_strings.size(); ++i) { + per_group_strings[i] = absl::StrJoin(device_groups[i], ","); + } + auto& grouped_cache = + state.reshard_cache->groupd_caches[absl::StrJoin(per_group_strings, ";")]; + if (!grouped_cache) { + grouped_cache = absl::make_unique(); + } + result.reshard_cache = grouped_cache.get(); + return result; +} + } // namespace spmd } // namespace xla diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h index 7b737daf78c..6e68375f9b9 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h @@ -87,10 +87,12 @@ Shape MakeNonPaddedShapeForGivenPartition(const Shape& shape, // Generates the HLO instructions that represent the dimension offsets on any // device. The size of the returned vector is the rank of the given shape. -std::vector MakePartitionOffsets(const Shape& shape, - const HloSharding& sharding, - HloInstruction* partition_id, - SpmdBuilder* b); +// If `dims` is non-empty, the generated offsets will only be non-zero for those +// dimensions. +std::vector MakePartitionOffsets( + const Shape& shape, const HloSharding& sharding, + HloInstruction* partition_id, SpmdBuilder* b, + absl::Span dims = {}); // Returns the offsets of the partition in the tile assignment. std::vector MakeTiledPartitionOrdinals( @@ -276,6 +278,48 @@ GetReshardAllToAllSourceTargetDims(const HloSharding& source, bool CanReshardWithCollectivePermute(const HloSharding& source, const HloSharding& target); +// Represents grouping devices in a tiled sharding along certain dimensions. +// Elements in group dimensions define different device groups, and the sharding +// represents the in-group sharding. +struct GroupedSharding { + GroupedSharding(std::vector> device_groups, + std::vector group_dims, + std::vector group_dim_sizes, int64 rank, + HloSharding grouped_sharding) + : device_groups(std::move(device_groups)), + group_dims(std::move(group_dims)), + group_dim_sizes(std::move(group_dim_sizes)), + sharding(std::move(grouped_sharding)) {} + std::vector> device_groups; + std::vector group_dims; + std::vector group_dim_sizes; + int64 rank; + HloSharding sharding; +}; + +// Creates a GroupedSharding for a tiled sharding. +GroupedSharding GroupShardingOnDims(const HloSharding& sharding, + absl::Span group_dims); + +// Reconstructs the ungrouped sharding from a GroupedSharding. +HloSharding UngroupSharding(const GroupedSharding& grouped_sharding); + +// Returns a new GroupedSharding that has the same group definition of +// `reference`. +GroupedSharding AlignGroupsWith(GroupedSharding grouped_sharding, + const GroupedSharding& reference, + bool ignore_group_order = false); + +// Returns the per-group base shape, i.e., before applying the in-group +// sharding. +Shape GetPerGroupBaseShape(const GroupedSharding& grouped_sharding, + const Shape& original_base_shape); + +// Creates the nested partitioner state for in-group patitioning. +PartitionedHlo::PartitioningState CreatePerGroupPartitioningState( + const PartitionedHlo::PartitioningState& state, + const std::vector>& device_groups, SpmdBuilder* b); + } // namespace spmd } // namespace xla From b0cec6bd77ac39bc0f23b20f8a157eccd1b53615 Mon Sep 17 00:00:00 2001 From: Tiezhen WANG Date: Fri, 24 Jul 2020 00:59:02 -0700 Subject: [PATCH 1252/2522] TFLite selective registration: Add android target. This includes build rule for - libtensorflowlite_jni.so - Android target - AAR target PiperOrigin-RevId: 322952184 Change-Id: If0fd97d9f50867dbce45a304758e3b9e9ac3eb0a --- tensorflow/lite/build_def.bzl | 68 ++++++++++++++++++- tensorflow/lite/java/BUILD | 28 ++++++++ ...InterpreterCustomizedAndroidBuildTest.java | 63 +++++++++++++++++ tensorflow/lite/testing/BUILD | 7 ++ 4 files changed, 165 insertions(+), 1 deletion(-) create mode 100644 tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterCustomizedAndroidBuildTest.java diff --git a/tensorflow/lite/build_def.bzl b/tensorflow/lite/build_def.bzl index a3eecb4e35f..001b2fc791e 100644 --- a/tensorflow/lite/build_def.bzl +++ b/tensorflow/lite/build_def.bzl @@ -7,6 +7,8 @@ load( "tf_cc_shared_object", "tf_cc_test", ) +load("//tensorflow/lite/java:aar_with_jni.bzl", "aar_with_jni") +load("@build_bazel_rules_android//android:rules.bzl", "android_library") def tflite_copts(): """Defines compile time flags.""" @@ -732,7 +734,12 @@ def tflite_experimental_runtime_linkopts(if_eager = [], if_non_eager = [], if_no if_none = [] + if_none, ) -def tflite_custom_cc_library(name, models = [], srcs = [], deps = [], visibility = ["//visibility:private"]): +def tflite_custom_cc_library( + name, + models = [], + srcs = [], + deps = [], + visibility = ["//visibility:private"]): """Generates a tflite cc library, stripping off unused operators. This library includes the TfLite runtime as well as all operators needed for the given models. @@ -781,3 +788,62 @@ def tflite_custom_cc_library(name, models = [], srcs = [], deps = [], visibility ] + real_deps), visibility = visibility, ) + +def tflite_custom_android_library( + name, + models = [], + srcs = [], + deps = [], + custom_package = "org.tensorflow.lite", + visibility = ["//visibility:private"]): + """Generates a tflite Android library, stripping off unused operators. + + Note that due to a limitation in the JNI Java wrapper, the compiled TfLite shared binary + has to be named as tensorflowlite_jni.so so please make sure that there is no naming conflict. + i.e. you can't call this rule multiple times in the same build file. + + Args: + name: Name of the target. + models: List of models to be supported. This TFLite build will only include + operators used in these models. If the list is empty, all builtin + operators are included. + srcs: List of files implementing custom operators if any. + deps: Additional dependencies to build all the custom operators. + custom_package: Name of the Java package. It is required by android_library in case + the Java source file can't be inferred from the directory where this rule is used. + visibility: Visibility setting for the generated target. Default to private. + """ + tflite_custom_cc_library(name = "%s_cc" % name, models = models, srcs = srcs, deps = deps, visibility = visibility) + + # JNI wrapper expects a binary file called `libtensorflowlite_jni.so` in java path. + tflite_jni_binary( + name = "libtensorflowlite_jni.so", + linkscript = "//tensorflow/lite/java:tflite_version_script.lds", + deps = [ + ":%s_cc" % name, + "//tensorflow/lite/java/src/main/native:native_framework_only", + ], + ) + + native.cc_library( + name = "%s_jni" % name, + srcs = ["libtensorflowlite_jni.so"], + visibility = visibility, + ) + + android_library( + name = name, + manifest = "//tensorflow/lite/java:AndroidManifest.xml", + deps = [ + ":%s_jni" % name, + "//tensorflow/lite/java:tensorflowlite_java", + "@org_checkerframework_qual", + ], + custom_package = custom_package, + visibility = visibility, + ) + + aar_with_jni( + name = "%s_aar" % name, + android_library = name, + ) diff --git a/tensorflow/lite/java/BUILD b/tensorflow/lite/java/BUILD index f7a0c892bcf..e8f9145065a 100644 --- a/tensorflow/lite/java/BUILD +++ b/tensorflow/lite/java/BUILD @@ -18,6 +18,7 @@ exports_files([ "src/testdata/grace_hopper_224.jpg", "AndroidManifest.xml", "proguard.flags", + "tflite_version_script.lds", ]) JAVA_SRCS = glob([ @@ -340,6 +341,33 @@ java_test( ], ) +java_test( + name = "InterpreterCustomizedAndroidBuildTest", + size = "small", + srcs = [ + "src/test/java/org/tensorflow/lite/InterpreterCustomizedAndroidBuildTest.java", + "src/test/java/org/tensorflow/lite/TestUtils.java", + ], + data = [ + "//tensorflow/lite:testdata/add.bin", + "//tensorflow/lite:testdata/test_model.bin", + ], + javacopts = JAVACOPTS, + # Add customized libtensorflowlite_jni.so to java_path + jvm_flags = ["-Djava.library.path=third_party/tensorflow/lite/testing"], + tags = [ + "no_mac", # TODO(b/122888913): libtensorflowlite_test_jni broke on mac. + "v1only", + ], + test_class = "org.tensorflow.lite.InterpreterCustomizedAndroidBuildTest", + visibility = ["//visibility:private"], + deps = [ + "//tensorflow/lite/testing:customtized_tflite_for_add_ops", + "@com_google_truth", + "@junit", + ], +) + # portable_tests includes files for running TFLite interpreter tests. filegroup( name = "portable_tests", diff --git a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterCustomizedAndroidBuildTest.java b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterCustomizedAndroidBuildTest.java new file mode 100644 index 00000000000..64722e5c3c5 --- /dev/null +++ b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterCustomizedAndroidBuildTest.java @@ -0,0 +1,63 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite; + +import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assert.fail; + +import java.nio.ByteBuffer; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Unit tests for {@link org.tensorflow.lite.Interpreter} with selective registration. */ +@RunWith(JUnit4.class) +public final class InterpreterCustomizedAndroidBuildTest { + // Supported model. + private static final String SUPPORTED_MODEL_PATH = "tensorflow/lite/testdata/add.bin"; + private static final ByteBuffer SUPPORTED_MODEL_BUFFER = + TestUtils.getTestFileAsBuffer(SUPPORTED_MODEL_PATH); + + // Model with unregistered operator. + private static final String UNSUPPORTED_MODEL_PATH = + "tensorflow/lite/testdata/test_model.bin"; + private static final ByteBuffer UNSUPPORTED_MODEL_BUFFER = + TestUtils.getTestFileAsBuffer(UNSUPPORTED_MODEL_PATH); + + @Test + public void testSupportedModel() throws Exception { + try (Interpreter interpreter = new Interpreter(SUPPORTED_MODEL_BUFFER)) { + assertThat(interpreter).isNotNull(); + float[] oneD = {1.23f, 6.54f, 7.81f}; + float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + float[][][][] fourD = {threeD, threeD}; + float[][][][] parsedOutputs = new float[2][8][8][3]; + interpreter.run(fourD, parsedOutputs); + } + } + + @Test + public void testUnsupportedModel() throws Exception { + try (Interpreter interpreter = new Interpreter(UNSUPPORTED_MODEL_BUFFER)) { + fail(); + } catch (IllegalArgumentException e) { + assertThat(e) + .hasMessageThat() + .contains("Cannot create interpreter: Didn't find op for builtin opcode 'CONV_2D'"); + } + } +} diff --git a/tensorflow/lite/testing/BUILD b/tensorflow/lite/testing/BUILD index d17fbdb2398..7fe2cfde439 100644 --- a/tensorflow/lite/testing/BUILD +++ b/tensorflow/lite/testing/BUILD @@ -4,6 +4,7 @@ load( "gen_zipped_test_file", "generated_test_models_all", "merged_test_models", + "tflite_custom_android_library", "tflite_custom_cc_library", ) load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite") @@ -566,6 +567,12 @@ pybind_extension( tflite_portable_test_suite() +tflite_custom_android_library( + name = "customtized_tflite_for_add_ops", + models = ["//tensorflow/lite:testdata/add.bin"], + visibility = ["//visibility:public"], +) + edgetpu_ops = [ "add", "avg_pool", From 629e5219158c97dedbbc6db945b827b337272032 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 24 Jul 2020 01:24:40 -0700 Subject: [PATCH 1253/2522] PR #41640: Raise error when calling .fit() w/ batch_size and a tf dataset Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/41640 Copybara import of the project: -- 8f278b5c183b485f038b0504308929b4e3bde5a7 by Jonah Kohn <51345541+jonah-kohn@users.noreply.github.com>: Raise error when calling .fit() w/ batch_size and a tf dataset PiperOrigin-RevId: 322954821 Change-Id: Idd4a3b9b9b0e52974ae0dcf60ad73c515c976b45 --- tensorflow/python/keras/engine/data_adapter.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/keras/engine/data_adapter.py b/tensorflow/python/keras/engine/data_adapter.py index 33c868d02be..3672ef64da3 100644 --- a/tensorflow/python/keras/engine/data_adapter.py +++ b/tensorflow/python/keras/engine/data_adapter.py @@ -681,7 +681,6 @@ class DatasetAdapter(DataAdapter): y=None, sample_weights=None, steps=None, - batch_size=None, **kwargs): super(DatasetAdapter, self).__init__(x, y, **kwargs) # Note that the dataset instance is immutable, its fine to reuse the user @@ -691,7 +690,7 @@ class DatasetAdapter(DataAdapter): # The user-provided steps. self._user_steps = steps - self._validate_args(y, sample_weights, steps, batch_size) + self._validate_args(y, sample_weights, steps) def get_dataset(self): return self._dataset @@ -720,7 +719,7 @@ class DatasetAdapter(DataAdapter): return (self._user_steps is None or cardinality.cardinality(self._dataset).numpy() == self._user_steps) - def _validate_args(self, y, sample_weights, steps, batch_size): + def _validate_args(self, y, sample_weights, steps): """Validates `__init__` arguments.""" # Arguments that shouldn't be passed. if not is_none_or_empty(y): @@ -730,10 +729,6 @@ class DatasetAdapter(DataAdapter): raise ValueError("`sample_weight` argument is not supported when using " "dataset as input.") - if batch_size is not None: - raise ValueError("`batch_size` argument must not be specified when " - "using dataset as input.") - if steps is None: if _is_distributed_dataset(self._dataset): raise ValueError("When providing a distributed dataset, you must " From a6633d30c35b9a6943a3c60d0318a0670168cf86 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 24 Jul 2020 02:01:43 -0700 Subject: [PATCH 1254/2522] Update GraphDef version to 472. PiperOrigin-RevId: 322958130 Change-Id: Ife86eaa89cf118dfc14131085a143046eef7f376 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index a5f5593c7aa..6a58c09a9fd 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 471 // Updated: 2020/7/23 +#define TF_GRAPH_DEF_VERSION 472 // Updated: 2020/7/24 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From fdbb2851c22c6b96320c0e2cbe61689bc7e4ad5e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 24 Jul 2020 02:01:45 -0700 Subject: [PATCH 1255/2522] compat: Update forward compatibility horizon to 2020-07-24 PiperOrigin-RevId: 322958133 Change-Id: I722ef7754fa7fba81f7a9fea694211a31509c7b0 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index ed32f4cb277..463cabd53bd 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 23) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 24) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 3702f63515d46b2a5631500a0560a32cf9ed6a99 Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Fri, 24 Jul 2020 11:01:12 +0100 Subject: [PATCH 1256/2522] Addressed reviewer's comments. Change-Id: I99bdedc40dfdae8e6e6c57003bf1128051b40c1a --- tensorflow/lite/python/lite.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index f154267ec94..3d2e9c5a536 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -569,10 +569,12 @@ class TFLiteConverterBaseV2(TFLiteConverterBase): raise ValueError("The inference_input_type and inference_output_type " "must be in {}.".format(all_types_names)) elif quant_mode.is_post_training_integer_quantize_16x8(): - if self.inference_input_type != constants.INT16 or \ - self.inference_output_type != constants.INT16: + all_types = default_types + [constants.INT16] + if self.inference_input_type not in all_types or \ + self.inference_output_type not in all_types: + all_types_names = ["tf." + t.name for t in all_types] raise ValueError("The inference_input_type and inference_output_type " - "must be constants.INT16.") + "must be in {}.".format(all_types_names)) elif self.inference_input_type not in default_types or \ self.inference_output_type not in default_types: raise ValueError("The inference_input_type and inference_output_type " From de482ff7038ed825af2a6da18adc07354a9f0590 Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Fri, 24 Jul 2020 11:03:21 +0100 Subject: [PATCH 1257/2522] Fix after quantization of the network. Change-Id: I109ab0e87e8c4b9c16be55e902142469545ea815 --- tensorflow/lite/tools/optimize/operator_property.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/lite/tools/optimize/operator_property.cc b/tensorflow/lite/tools/optimize/operator_property.cc index 4c63929d588..06015dacf12 100644 --- a/tensorflow/lite/tools/optimize/operator_property.cc +++ b/tensorflow/lite/tools/optimize/operator_property.cc @@ -190,6 +190,7 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, property.inputs = {{0, {}}}; property.outputs = {{0, {}}}; property.restrict_same_input_output_scale = true; + property.quantize_input_as_activations = true; property.version = 2; break; case BuiltinOperator_HARD_SWISH: { From 63b8670dae816422d73dde511566a4cbd329ca18 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 24 Jul 2020 04:18:10 -0700 Subject: [PATCH 1258/2522] [XLA:CPU] Directly emit linalg.generic instead of going through matmul This lets us also do matvec in the future. PiperOrigin-RevId: 322972296 Change-Id: I1462ba0d76e621b05a4aa07d91ca525c00017717 --- .../xla/service/cpu/dot_op_emitter.cc | 47 +++++++++++++++++-- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index 1dc5bfa95ec..2b3865b4dba 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -270,11 +270,48 @@ Status DotOpEmitter::EmitLinalgMatmul() { return EmitMlirFuncAndCall( mlir_context_, b_, dot_info_.result_shape, operand_shapes, target_ptr, operand_ptrs, name, [&](mlir::OpBuilder* builder, mlir::FuncOp function) { + CHECK_EQ(dot_info_.dim_nums.lhs_contracting_dimensions_size(), 1); + CHECK_EQ(dot_info_.dim_nums.rhs_contracting_dimensions_size(), 1); + mlir::MLIRContext* context = builder->getContext(); mlir::edsc::ScopedContext scope(*builder, function.getLoc()); mlir::Value a = function.getArgument(0), b = function.getArgument(1), c = function.getArgument(2); - mlir::edsc::intrinsics::linalg_matmul(mlir::TypeRange{}, - mlir::ValueRange{b, c, a}); + + llvm::SmallVector b_exprs( + dot_info_.lhs_shape.rank()); + llvm::SmallVector c_exprs( + dot_info_.rhs_shape.rank()); + + llvm::SmallVector parallel_exprs; + mlir::AffineExpr reduce_expr; + for (int i = 0; i != dot_info_.result_shape.rank(); ++i) { + parallel_exprs.push_back(mlir::getAffineDimExpr(i, context)); + } + reduce_expr = + mlir::getAffineDimExpr(dot_info_.result_shape.rank(), context); + + // The reduction expr is shared for both inputs. + b_exprs[dot_info_.dim_nums.lhs_contracting_dimensions(0)] = reduce_expr; + c_exprs[dot_info_.dim_nums.rhs_contracting_dimensions(0)] = reduce_expr; + + // Fill in the remaining parallel exprs. + int par_expr_num = 0; + for (auto* v : {&b_exprs, &c_exprs}) { + for (auto& e : *v) { + if (!e) { + e = parallel_exprs[par_expr_num++]; + } + } + } + + llvm::SmallVector types( + parallel_exprs.size(), mlir::IteratorType::Parallel); + types.push_back(mlir::IteratorType::Reduction); + + mlir::edsc::StructuredIndexed s_a(a), s_b(b), s_c(c); + mlir::edsc::makeGenericLinalgOp(types, {s_b(b_exprs), s_c(c_exprs)}, + {s_a(parallel_exprs)}, + mlir::edsc::ops::macRegionBuilder); mlir::edsc::intrinsics::std_ret(); mlir::linalg::LinalgTilingOptions tilingOptions; @@ -283,13 +320,13 @@ Status DotOpEmitter::EmitLinalgMatmul() { target_machine_features_.minimum_alignment_for_allocation( ShapeUtil::ByteSizeOf(dot_info_.result_shape)); mlir_strategy::MatmulCodegenStrategy strategy; - strategy.tile(tilingOptions) - .promote( + strategy.tile(tilingOptions) + .promote( mlir::linalg::LinalgPromotionOptions() .setAlignment(alignment) .setUseFullTileBuffersByDefault(true) .setUseAlloca(true)) - .vectorize() + .vectorize() .setVectorTransformsOptions( mlir::vector::VectorTransformsOptions() .setVectorTransformsOptions( From 6f7084becc90e777476caeacc7a41f9aa6fee034 Mon Sep 17 00:00:00 2001 From: Vignesh Kothapalli Date: Fri, 24 Jul 2020 17:44:56 +0530 Subject: [PATCH 1259/2522] removed type mismatch tests --- .../experimental/kernel_tests/unique_test.py | 59 ------------------- 1 file changed, 59 deletions(-) diff --git a/tensorflow/python/data/experimental/kernel_tests/unique_test.py b/tensorflow/python/data/experimental/kernel_tests/unique_test.py index 0567efee1fa..af17eec7502 100644 --- a/tensorflow/python/data/experimental/kernel_tests/unique_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/unique_test.py @@ -78,65 +78,6 @@ class UniqueTest(test_base.DatasetTestBase, parameterized.TestCase): (["foo", "bar", "baz", "baz", "bar", "foo"], ["foo", "bar", "baz"]), ]) - def _checkDatasetRaises(self, dtype, test_cases, error): - """Test whether the dataset raises the appropriate errors - while generating the outputs. - - Args: - dtype: The expected `dtype` of the elements in each test case. - test_cases: A list of lists. The dataset will be created from the - list items. - error: The expected error to be raised. - """ - - current_test_case = [] - dataset = dataset_ops.Dataset.from_generator(lambda: current_test_case, - dtype).apply(unique.unique()) - - for test_case in test_cases: - current_test_case = test_case - with self.assertRaises(error): - self.getDatasetOutput(dataset) - - @combinations.generate(test_base.graph_only_combinations()) - def testStringTypeMismatch(self): - """Should raise InternalError when element type doesn't match - with dtypes.string.""" - - test_cases = [ - ["hello", 1], - ["hello", "hello", "world", 3], - ["hello", 1, 1], - ["hello", "world", 1, 2], - [1, "hello"], - [1, 2, "hello"], - [1, 3, "hello", "world"], - [1, 1, "hello", "hello"] - ] - self._checkDatasetRaises(dtype=dtypes.string, test_cases=test_cases, - error=errors.InternalError) - - @combinations.generate(combinations.times( - test_base.graph_only_combinations(), - combinations.combine(dtype=[dtypes.int32, dtypes.int64]))) - def testIntTypeMismatch(self, dtype): - """Should raise InvalidArgumentError when element type doesn't - match with dtypes.int32, dtypes.int64""" - - test_cases = [ - [1, "foo"], - [1, 2, "bar"], - [1, 3, "foo", "bar"], - [1, 4, "foo", "foo"], - ["bar", 1], - ["bar", "foo", 2], - ["bar", "bar", "foo", 3], - ["foo", 1, 1], - ["bar", "bar", 1, 1], - ] - self._checkDatasetRaises(dtype=dtype, test_cases=test_cases, - error=errors.InvalidArgumentError) - @combinations.generate(test_base.graph_only_combinations()) def testUnsupportedTypes(self): """Should raise TypeError when element type doesn't match with the From 6ade70c1f328b997e3be9cfa6d24f73c8c68444d Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Fri, 24 Jul 2020 20:02:16 +0700 Subject: [PATCH 1260/2522] HDFS registration --- .../filesystem/plugins/hadoop/BUILD | 31 ++++++++ .../plugins/hadoop/hadoop_filesystem.cc | 76 +++++++++++++++++++ .../plugins/hadoop/hadoop_filesystem.h | 21 +++++ 3 files changed, 128 insertions(+) create mode 100644 tensorflow/c/experimental/filesystem/plugins/hadoop/BUILD create mode 100644 tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc create mode 100644 tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.h diff --git a/tensorflow/c/experimental/filesystem/plugins/hadoop/BUILD b/tensorflow/c/experimental/filesystem/plugins/hadoop/BUILD new file mode 100644 index 00000000000..3683bd03317 --- /dev/null +++ b/tensorflow/c/experimental/filesystem/plugins/hadoop/BUILD @@ -0,0 +1,31 @@ +# Experimental hadoop filesystem plugin. +load("//tensorflow:tensorflow.bzl", "get_win_copts", "tf_cc_shared_object", "tf_cc_test") + +package( + licenses = ["notice"], # Apache 2.0 +) + +# Filesystem implementation for HADOOP environments +tf_cc_shared_object( + name = "hadoop_filesystem", + framework_so = [], + linkstatic = False, + per_os_targets = 1, + visibility = ["//visibility:public"], + deps = [":hadoop_filesystem_impl"], +) + +# The real implementation of the filesystem. +cc_library( + name = "hadoop_filesystem_impl", + srcs = ["hadoop_filesystem.cc"], + hdrs = ["hadoop_filesystem.h"], + copts = select({ + "//conditions:default": [], + "//tensorflow:windows": get_win_copts(), + }), + deps = [ + "//tensorflow/c:tf_status", + "//tensorflow/c/experimental/filesystem:filesystem_interface", + ], +) diff --git a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc new file mode 100644 index 00000000000..200d1b23adf --- /dev/null +++ b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc @@ -0,0 +1,76 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.h" + +#include +#include + +#include "tensorflow/c/experimental/filesystem/filesystem_interface.h" +#include "tensorflow/c/tf_status.h" + +// Implementation of a filesystem for HADOOP environments. +// This filesystem will support `hdfs://`, `viewfs://` and `har://` URI schemes. + +static void* plugin_memory_allocate(size_t size) { return calloc(1, size); } +static void plugin_memory_free(void* ptr) { free(ptr); } + +// SECTION 1. Implementation for `TF_RandomAccessFile` +// ---------------------------------------------------------------------------- +namespace tf_random_access_file { + +// TODO(vnvo2409): Implement later + +} // namespace tf_random_access_file + +// SECTION 2. Implementation for `TF_WritableFile` +// ---------------------------------------------------------------------------- +namespace tf_writable_file { + +// TODO(vnvo2409): Implement later + +} // namespace tf_writable_file + +// SECTION 3. Implementation for `TF_ReadOnlyMemoryRegion` +// ---------------------------------------------------------------------------- +namespace tf_read_only_memory_region { + +// TODO(vnvo2409): Implement later + +} // namespace tf_read_only_memory_region + +// SECTION 4. Implementation for `TF_Filesystem`, the actual filesystem +// ---------------------------------------------------------------------------- +namespace tf_hadoop_filesystem { + +// TODO(vnvo2409): Implement later + +} // namespace tf_hadoop_filesystem + +static void ProvideFilesystemSupportFor(TF_FilesystemPluginOps* ops, + const char* uri) { + TF_SetFilesystemVersionMetadata(ops); + ops->scheme = strdup(uri); +} + +void TF_InitPlugin(TF_FilesystemPluginInfo* info) { + info->plugin_memory_allocate = plugin_memory_allocate; + info->plugin_memory_free = plugin_memory_free; + info->num_schemes = 3; + info->ops = static_cast( + plugin_memory_allocate(info->num_schemes * sizeof(info->ops[0]))); + ProvideFilesystemSupportFor(&info->ops[0], "hdfs"); + ProvideFilesystemSupportFor(&info->ops[1], "viewfs"); + ProvideFilesystemSupportFor(&info->ops[2], "har"); +} diff --git a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.h b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.h new file mode 100644 index 00000000000..850cefe0231 --- /dev/null +++ b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.h @@ -0,0 +1,21 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_HADOOP_HADOOP_FILESYSTEM_H_ +#define TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_HADOOP_HADOOP_FILESYSTEM_H_ + +#include "tensorflow/c/experimental/filesystem/filesystem_interface.h" +#include "tensorflow/c/tf_status.h" + +#endif // TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_HADOOP_HADOOP_FILESYSTEM_H_ From 281c377f21fc6e00bdc2edef60f664b3c3c79ffd Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Fri, 24 Jul 2020 20:09:06 +0700 Subject: [PATCH 1261/2522] Add translate name --- .../c/experimental/filesystem/plugins/s3/s3_filesystem.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 50be57a263b..ff47210ed1b 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -1111,6 +1111,10 @@ int GetChildren(const TF_Filesystem* filesystem, const char* path, TF_SetStatus(status, TF_OK, ""); } +static char* TranslateName(const TF_Filesystem* filesystem, const char* uri) { + return strdup(uri); +} + } // namespace tf_s3_filesystem static void ProvideFilesystemSupportFor(TF_FilesystemPluginOps* ops, @@ -1158,6 +1162,7 @@ static void ProvideFilesystemSupportFor(TF_FilesystemPluginOps* ops, ops->filesystem_ops->get_file_size = tf_s3_filesystem::GetFileSize; ops->filesystem_ops->stat = tf_s3_filesystem::Stat; ops->filesystem_ops->get_children = tf_s3_filesystem::GetChildren; + ops->filesystem_ops->translate_name = tf_s3_filesystem::TranslateName; } void TF_InitPlugin(TF_FilesystemPluginInfo* info) { From f3375f0e267cb24146df9a59e320cc527f3ab285 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Fri, 24 Jul 2020 13:58:05 +0000 Subject: [PATCH 1262/2522] change to CPU-only test for now --- tensorflow/core/kernels/map_kernels.cc | 1 - tensorflow/python/kernel_tests/BUILD | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/map_kernels.cc b/tensorflow/core/kernels/map_kernels.cc index db91a660809..6a691538956 100644 --- a/tensorflow/core/kernels/map_kernels.cc +++ b/tensorflow/core/kernels/map_kernels.cc @@ -19,7 +19,6 @@ limitations under the License. namespace tensorflow { - REGISTER_KERNEL_BUILDER(Name("EmptyTensorMap").Device(DEVICE_CPU), EmptyTensorMap); diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 55a8feeb053..7c3a6a43995 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -142,7 +142,8 @@ cuda_py_test( ], ) -cuda_py_test( +# TODO(kattian): add GPU capability and change to cuda_py_test +tf_py_test( name = "map_ops_test", size = "small", srcs = ["map_ops_test.py"], From 2fcfe25451feae5002f45cbe15e94d51d19c7925 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 24 Jul 2020 07:46:23 -0700 Subject: [PATCH 1263/2522] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 322994492 Change-Id: I631545cf77efbf9b0de767534268c27683cb4a8f --- tensorflow/go/op/wrappers.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 64f6298d645..fdc188e6aa3 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -14988,9 +14988,8 @@ func MatrixInverseAdjoint(value bool) MatrixInverseAttr { } } -// Computes the inverse of one or more square invertible matrices or their +// Computes the inverse of one or more square invertible matrices or their adjoints (conjugate transposes). // -// adjoints (conjugate transposes). // // The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions // form square matrices. The output is a tensor of the same shape as the input From 53575a9cd98dc02940f2ef6507ba561076ef3242 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Fri, 24 Jul 2020 08:03:26 -0700 Subject: [PATCH 1264/2522] [MLIR] IfOp/WhileOp: Convert output_shapes to a derived attribute - Also add canonicalization for IfOp and WhileOp to remove the output_shapes attribute from these ops if its present. - Fix shape inference for IfOp to not rely on tensorflow shape inference function and use a custom function instead to infer IfOp output types/shapes and update unit test to exercise the new path. - Update MLIR->Graphdef export test to verify that output_shapes is emitted in the generated graphdef. PiperOrigin-RevId: 322996755 Change-Id: I6d686058b70ef27d443f3868acd71b95525b1fd7 --- .../compiler/mlir/tensorflow/ir/tf_ops.h | 1 + .../compiler/mlir/tensorflow/ir/tf_ops.td | 28 ++++++++- .../compiler/mlir/tensorflow/ir/tf_ops_a_m.cc | 6 +- .../compiler/mlir/tensorflow/ir/tf_ops_a_m.h | 1 + .../mlir/tensorflow/ir/tf_ops_helpers.inc | 20 ++++++ .../compiler/mlir/tensorflow/ir/tf_ops_n_z.cc | 8 +++ .../compiler/mlir/tensorflow/ir/tf_ops_n_z.h | 1 + .../mlir/tensorflow/tests/canonicalize.mlir | 33 ++++++++++ .../mlir2graphdef/functional-if-ops.mlir | 34 ++++++++-- .../mlir2graphdef/functional-while-ops.mlir | 36 +++++++++-- .../tests/resource-device-inference.mlir | 8 +-- .../tensorflow/tests/resource_op_lifting.mlir | 24 +++---- .../tensorflow/tests/shape_inference.mlir | 9 +-- .../tpu-variable-runtime-reformatting.mlir | 9 +-- .../tests/tpu_space_to_depth_pass.mlir | 2 +- .../region_control_flow_to_functional.cc | 3 +- .../transforms/resource_op_lifting.cc | 10 +-- .../tensorflow/transforms/shape_inference.cc | 63 ++++++++++++------- .../transforms/stack_ops_decomposition.cc | 8 --- .../tensor_array_ops_decomposition.cc | 4 -- .../tensor_list_ops_decomposition.cc | 8 --- .../tpu_variable_runtime_reformatting.cc | 10 --- 22 files changed, 220 insertions(+), 106 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h index d06dce81e09..039ed1bc3a8 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h @@ -23,6 +23,7 @@ limitations under the License. #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Dialect.h" // from @llvm-project +#include "mlir/IR/Function.h" // from @llvm-project #include "mlir/IR/Matchers.h" // from @llvm-project #include "mlir/IR/Module.h" // from @llvm-project #include "mlir/IR/OpImplementation.h" // from @llvm-project diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td index 2b64b4595cf..6e81cf57d9a 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td @@ -225,12 +225,25 @@ else_branch: A function that takes 'inputs' and returns a list of TF_DerivedOperandTypeAttr Tcond = TF_DerivedOperandTypeAttr<0>; TF_DerivedOperandTypeListAttr Tin = TF_DerivedOperandTypeListAttr<1>; TF_DerivedResultTypeListAttr Tout = TF_DerivedResultTypeListAttr<0>; + TF_DerivedResultShapeListAttr output_shapes = TF_DerivedResultShapeListAttr<0>; let verifier = [{ return Verify(*this); }]; let hasCanonicalizer = 1; + + let extraClassDeclaration = [{ + // Get the then branch function. + FuncOp then_func() { + return getParentOfType().lookupSymbol(then_branch()); + } + + // Get the else branch function. + FuncOp else_func() { + return getParentOfType().lookupSymbol(else_branch()); + } + }]; } def TF_YieldOp : TF_Op<"Yield", @@ -612,7 +625,6 @@ body: A function that takes a list of tensors and returns another FlatSymbolRefAttr:$cond, FlatSymbolRefAttr:$body, - DefaultValuedAttr:$output_shapes, DefaultValuedAttr:$parallel_iterations, // Used to map StatelessWhile and While op defined in TensorFlow to a common @@ -625,10 +637,24 @@ body: A function that takes a list of tensors and returns another ); TF_DerivedOperandTypeListAttr T = TF_DerivedOperandTypeListAttr<0>; + TF_DerivedResultShapeListAttr output_shapes = TF_DerivedResultShapeListAttr<0>; let verifier = [{ return Verify(*this); }]; + let hasCanonicalizer = 1; + + let extraClassDeclaration = [{ + // Get the condition function. + FuncOp cond_func() { + return getParentOfType().lookupSymbol(cond()); + } + + // Get the body function. + FuncOp body_func() { + return getParentOfType().lookupSymbol(body()); + } + }]; } def TL_WhileRegionOp : TF_Op<"WhileRegion", diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc index f4f9ec42864..6183dde8581 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc @@ -1615,6 +1615,10 @@ static LogicalResult Verify(IfOp op) { return success(); } +//===----------------------------------------------------------------------===// +// IfOp canonicalization. +//===----------------------------------------------------------------------===// + class FoldConstantIfOp : public OpRewritePattern { public: explicit FoldConstantIfOp(MLIRContext *context) @@ -1662,7 +1666,7 @@ LogicalResult FoldConstantIfOp::matchAndRewrite( void IfOp::getCanonicalizationPatterns(OwningRewritePatternList &results, MLIRContext *context) { - results.insert(context); + results.insert>(context); } //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h index b2b78da8993..19a927a23d7 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h @@ -20,6 +20,7 @@ limitations under the License. #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Dialect.h" // from @llvm-project +#include "mlir/IR/Function.h" // from @llvm-project #include "mlir/IR/Matchers.h" // from @llvm-project #include "mlir/IR/Module.h" // from @llvm-project #include "mlir/IR/OpImplementation.h" // from @llvm-project diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc index 33d51301208..71f1560aa6c 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc @@ -578,3 +578,23 @@ LogicalResult VerifyRegionResults(Operation *op, Region ®ion, } return success(); } + +//===----------------------------------------------------------------------===// +// Function control flow canonicalization. +//===----------------------------------------------------------------------===// + +// Eliminate attributes that are not needed, but can get attached to Ops +// during import. +template +struct DropAttributes : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + // Drop the "output_shapes" attribute. + LogicalResult matchAndRewrite(Op op, + PatternRewriter &rewriter) const override { + bool found = op.removeAttr("output_shapes") == + MutableDictionaryAttr::RemoveResult::Removed; + return success(found); + } +}; + diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc index 785b0bac820..df9476b7fe5 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc @@ -2072,6 +2072,14 @@ static LogicalResult Verify(WhileOp op) { return success(); } +//===----------------------------------------------------------------------===// +// WhileOp canonicalization. +//===----------------------------------------------------------------------===// +void WhileOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert>(context); +} + //===----------------------------------------------------------------------===// // WhileRegionOp //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h index b6e9222a370..761c06a475c 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h @@ -20,6 +20,7 @@ limitations under the License. #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Dialect.h" // from @llvm-project +#include "mlir/IR/Function.h" // from @llvm-project #include "mlir/IR/Matchers.h" // from @llvm-project #include "mlir/IR/Module.h" // from @llvm-project #include "mlir/IR/OpImplementation.h" // from @llvm-project diff --git a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir index 42659f41c21..007c123a034 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir @@ -985,3 +985,36 @@ func @testWhileRegionUnusedValue(%arg0 : tensor<*xf32>, %arg1 : tensor, %ar // CHECK: return %[[WHILE_OUT]]#0 : tensor<*xf32> return %0#0 : tensor<*xf32> } + +// Check that output_shapes attribute is removed for tf.If +func @testIfThen(tensor<*xf32>) -> tensor<*xf32> +func @testIfElse(tensor<*xf32>) -> tensor<*xf32> +// CHECK-LABEL: func @testIfDropOutputShapes +func @testIfDropOutputShapes(tensor, tensor<2xf32>) -> tensor<2xf32> { +^bb0(%arg0: tensor, %arg1: tensor<2xf32>): + // CHECK: "tf.If" + // CHECK-NOT: output_shapes + %1 = "tf.If"(%arg0, %arg1) { + then_branch = @testIfThen, else_branch = @testIfElse, is_stateless = false, output_shapes = [#tf.shape<>] + } : (tensor, tensor<2xf32>) -> tensor<2xf32> + + return %1 : tensor<2xf32> +} + +// Check that output_shapes attribute is removed for tf.Whileß +func @testWhileCond(tensor<*xf32>) -> (tensor) +func @testWhileBody(tensor<*xf32>) -> (tensor<*xf32>) +// CHECK-LABEL: func @testWhileDropOutputShapes +func @testWhileDropOutputShapes(tensor<*xf32>) -> (tensor<*xf32>) { +^bb0(%arg0: tensor<*xf32>): + // CHECK: "tf.While" + // CHECK-NOT: output_shapes + %1 = "tf.While"(%arg0) { + cond = @testWhileCond, + body = @testWhileBody, + is_stateless = false, + output_shapes = [#tf.shape<>] + } : (tensor<*xf32>) -> (tensor<*xf32>) + + return %1 : tensor<*xf32> +} diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-if-ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-if-ops.mlir index d9ad36f2ce6..b6933459382 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-if-ops.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-if-ops.mlir @@ -1,13 +1,13 @@ // RUN: tf-mlir-translate -mlir-to-graphdef %s -o - | FileCheck %s -func @main(%arg0: tensor, %arg1: tensor) -> (tensor, tensor) { +func @main(%arg0: tensor, %arg1: tensor, %arg2: tensor<4xf32>, %arg3: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) { %0:2 = tf_executor.graph { %outputs_2, %control_3 = tf_executor.island wraps "tf.Less"(%arg0, %arg1) : (tensor, tensor) -> tensor - %outputs_4, %control_5 = tf_executor.island wraps "tf.If"(%outputs_2, %arg0, %arg1) {else_branch = @cond_false, is_stateless = false, then_branch = @cond_true} : (tensor, tensor, tensor) -> tensor loc("StatefulIf") - %outputs_6, %control_7 = tf_executor.island wraps "tf.If"(%outputs_2, %arg0, %arg1) {else_branch = @cond_false, is_stateless = true, then_branch = @cond_true} : (tensor, tensor, tensor) -> tensor loc("StatelessIf") - tf_executor.fetch %outputs_4, %outputs_6 : tensor, tensor + %outputs_4, %control_5 = tf_executor.island wraps "tf.If"(%outputs_2, %arg2, %arg3) {else_branch = @cond_false, is_stateless = false, then_branch = @cond_true} : (tensor, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> loc("StatefulIf") + %outputs_6, %control_7 = tf_executor.island wraps "tf.If"(%outputs_2, %arg2, %arg3) {else_branch = @cond_false, is_stateless = true, then_branch = @cond_true} : (tensor, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32> loc("StatelessIf") + tf_executor.fetch %outputs_4, %outputs_6 : tensor<4xf32>, tensor<4xf32> } - return %0#0, %0#1 : tensor, tensor + return %0#0, %0#1 : tensor<4xf32>, tensor<4xf32> } func @cond_true(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>) -> tensor<*xf32> { @@ -34,8 +34,32 @@ func @cond_false(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>) -> tensor<*xf32> { // CHECK-NOT: name: // CHECK: op: "If" // CHECK-NOT: is_stateless +// CHECK: attr { +// CHECK: key: "output_shapes" +// CHECK: value { +// CHECK: list { +// CHECK: shape { +// CHECK: dim { +// CHECK: size: 4 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } // CHECK: name: "StatelessIf" // CHECK-NOT: name: // CHECK: op: "StatelessIf" // CHECK-NOT: is_stateless +// CHECK: attr { +// CHECK: key: "output_shapes" +// CHECK: value { +// CHECK: list { +// CHECK: shape { +// CHECK: dim { +// CHECK: size: 4 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-while-ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-while-ops.mlir index 9f14a144d9d..c7a4630d985 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-while-ops.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/functional-while-ops.mlir @@ -1,12 +1,12 @@ // RUN: tf-mlir-translate -mlir-to-graphdef %s -o - | FileCheck %s -func @main(%arg0: tensor, %arg1: tensor) -> (tensor, tensor) { +func @main(%arg0: tensor, %arg1: tensor<5xf32>) -> (tensor<5xf32>, tensor<5xf32>) { %0:2 = tf_executor.graph { - %outputs_2:2, %control_3 = tf_executor.island wraps "tf.While"(%arg0, %arg1) {body = @body, cond = @cond, is_stateless = false} : (tensor, tensor) -> (tensor, tensor) loc("StatefulWhile") - %outputs_4:2, %control_5 = tf_executor.island wraps "tf.While"(%arg0, %arg1) {body = @body, cond = @cond, is_stateless = true} : (tensor, tensor) -> (tensor, tensor) loc("StatelessWhile") - tf_executor.fetch %outputs_2#1, %outputs_4#1 : tensor, tensor + %outputs_2:2, %control_3 = tf_executor.island wraps "tf.While"(%arg0, %arg1) {body = @body, cond = @cond, is_stateless = false} : (tensor, tensor<5xf32>) -> (tensor, tensor<5xf32>) loc("StatefulWhile") + %outputs_4:2, %control_5 = tf_executor.island wraps "tf.While"(%arg0, %arg1) {body = @body, cond = @cond, is_stateless = true} : (tensor, tensor<5xf32>) -> (tensor, tensor<5xf32>) loc("StatelessWhile") + tf_executor.fetch %outputs_2#1, %outputs_4#1 : tensor<5xf32>, tensor<5xf32> } - return %0#0, %0#1 : tensor, tensor + return %0#0, %0#1 : tensor<5xf32>, tensor<5xf32> } func @cond(%arg0: tensor<*xi32>, %arg1: tensor<*xf32>) -> tensor { @@ -36,8 +36,34 @@ func @body(%arg0: tensor<*xi32>, %arg1: tensor<*xf32>) -> (tensor<*xi32>, tensor // CHECK-NOT: name: // CHECK: op: "While" // CHECK-NOT: is_stateless +// CHECK: attr { +// CHECK: key: "output_shapes" +// CHECK: value { +// CHECK: list { +// CHECK: shape { +// CHECK: dim { +// CHECK: size: 5 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } + // CHECK: name: "StatelessWhile" // CHECK-NOT: name: // CHECK: op: "StatelessWhile" // CHECK-NOT: is_stateless +// CHECK: attr { +// CHECK: key: "output_shapes" +// CHECK: value { +// CHECK: list { +// CHECK: shape { +// CHECK: dim { +// CHECK: size: 5 +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } + diff --git a/tensorflow/compiler/mlir/tensorflow/tests/resource-device-inference.mlir b/tensorflow/compiler/mlir/tensorflow/tests/resource-device-inference.mlir index a9e814c647e..a4a7c1dad2e 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/resource-device-inference.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/resource-device-inference.mlir @@ -56,7 +56,7 @@ func @propagate_if_op( "tf.If"(%arg1, %id0, %var_handle) { then_branch = @if_then, else_branch = @if_else, - output_shapes = [], is_stateless = false} + is_stateless = false} : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> () tf_executor.yield @@ -128,8 +128,7 @@ func @propagate_while_op( // CHECK-NEXT: "tf.While" "tf.While"(%arg1, %id0, %var_handle) { body = @while_body, - cond = @while_cond, - output_shapes = [], is_stateless = false} + cond = @while_cond, is_stateless = false} : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> (tensor, tensor<*x!tf.resource>>, @@ -209,8 +208,7 @@ func @error_on_conflict_multiple_callers( : () -> tensor<*x!tf.resource>> "tf.If"(%arg1, %id0, %var_handle) { then_branch = @if_then_and_else, - else_branch = @if_then_and_else, - output_shapes = [], is_stateless = false} + else_branch = @if_then_and_else, is_stateless = false} : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> () "tf.If"(%arg1, %var_handle, %id0) { diff --git a/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir b/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir index cd93e1423ea..d8a87b9bdb4 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir @@ -147,8 +147,7 @@ func @cluster_with_loop() -> () { "tf_device.cluster"() ( { // CHECK: %[[WHILE:.*]]:2 = "tf.While"(%[[COUNT]], %[[READ]]) %2:3 = "tf.While"(%0, %1, %unused) - {body = @while_body, cond = @while_cond, device = "", is_stateless = false, - output_shapes = [#tf.shape<>, #tf.shape<>]} + {body = @while_body, cond = @while_cond, device = "", is_stateless = false} : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) // CHECK: tf_device.return %[[WHILE]]#1 : tensor @@ -197,8 +196,7 @@ func @cluster_with_loop() -> () { "tf_device.cluster"() ( { // CHECK: %[[WHILE:.*]] = "tf.While"(%[[READ]]) %1 = "tf.While"(%0) { - body = @while_body, cond = @while_cond, device = "", is_stateless = false, - output_shapes = [#tf.shape<>]} + body = @while_body, cond = @while_cond, device = "", is_stateless = false} : (tensor<*x!tf.resource>>) -> (tensor<*x!tf.resource>>) // CHECK: tf_device.return %[[WHILE]] : tensor @@ -239,8 +237,7 @@ func @cluster_with_loop() -> () { "tf_device.cluster"() ( { // CHECK: %[[WHILE:.*]] = "tf.While"(%[[READ]]) %1 = "tf.While"(%0) { - body = @while_body, cond = @while_cond, device = "", is_stateless = false, - output_shapes = [#tf.shape<>]} + body = @while_body, cond = @while_cond, device = "", is_stateless = false} : (tensor<*x!tf.resource>>) -> (tensor<*x!tf.resource>>) // CHECK: tf_device.return @@ -278,8 +275,7 @@ func @cluster_with_nested_loop() -> () { "tf_device.cluster"() ( { // CHECK: %[[WHILE:.*]] = "tf.While"(%[[READ]]) %2:2 = "tf.While"(%0, %1) { - body = @while_body, cond = @while_cond, device = "", is_stateless = false, - output_shapes = [#tf.shape<>, #tf.shape<>]} + body = @while_body, cond = @while_cond, device = "", is_stateless = false} : (tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> (tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) // CHECK: tf_device.return %[[WHILE]] : tensor @@ -295,8 +291,7 @@ func @while_body(%arg0: tensor<*x!tf.resource>>, %arg1: tensor<*x!tf -> (tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) { // CHECK: %[[WHILE:.*]] = "tf.While"(%[[BARG0]]) %0:2 = "tf.While"(%arg0, %arg1) { - body = @while_body1, cond = @while_cond1, device = "", is_stateless = false, - output_shapes = [#tf.shape<>, #tf.shape<>]} + body = @while_body1, cond = @while_cond1, device = "", is_stateless = false} : (tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> (tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) // CHECK-NEXT: return %[[WHILE]] @@ -334,8 +329,7 @@ func @cluster_with_loop() -> () { %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>> "tf_device.cluster"() ( { %1 = "tf.While"(%0) { - body = @while_body, cond = @while_cond, device = "", is_stateless = false, - output_shapes = [#tf.shape<>]} + body = @while_body, cond = @while_cond, device = "", is_stateless = false} : (tensor<*x!tf.resource>>) -> (tensor<*x!tf.resource>>) tf_device.return }) {cluster_attr = "cluster_attr"} : () -> () @@ -359,8 +353,7 @@ func @cluster_with_loop() -> () { %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>> "tf_device.cluster"() ( { %1 = "tf.While"(%0) { - body = @while_body, cond = @while_cond, device = "", is_stateless = false, - output_shapes = [#tf.shape<>]} + body = @while_body, cond = @while_cond, device = "", is_stateless = false} : (tensor<*x!tf.resource>>) -> (tensor<*x!tf.resource>>) tf_device.return }) {cluster_attr = "cluster_attr"} : () -> () @@ -384,8 +377,7 @@ func @cluster_with_loop() -> () { %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>> "tf_device.cluster"() ( { %1 = "tf.While"(%0) { - body = @while_body, cond = @while_cond, device = "", is_stateless = false, - output_shapes = [#tf.shape<>]} + body = @while_body, cond = @while_cond, device = "", is_stateless = false} : (tensor<*x!tf.resource>>) -> (tensor<*x!tf.resource>>) tf_device.return }) {cluster_attr = "cluster_attr"} : () -> () diff --git a/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir b/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir index 5a8f63ec63d..4a5e3c8deaa 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir @@ -100,10 +100,11 @@ func @multiple_blocks_one_return(%arg0: tensor) -> tensor<*xf32> { return %1 : tensor } - // CHECK-LABEL: func @shape_from_if_to_branch_functions - func @shape_from_if_to_branch_functions(%arg0: tensor, %arg1: tensor<1x2x3xf32>) -> tensor<1x2x3xf32> { - %0 = "tf.If"(%arg0, %arg1) {Tcond = i1, Tin = ["tfdtype$DT_FLOAT"], Tout = ["tfdtype$DT_FLOAT"], _xla_propagate_compile_time_consts = true, device = "", else_branch = @if_else_branch, is_stateless = true, name = "if", then_branch = @if_then_branch} : (tensor, tensor<1x2x3xf32>) -> tensor<1x2x3xf32> - return %0 : tensor<1x2x3xf32> + // CHECK-LABEL: func @shape_from_if_to_branch_functions_to_results + // CHECK-SAME: (%arg0: tensor, %arg1: tensor<1x2x3xf32>) -> tensor<1x2x3xf32> + func @shape_from_if_to_branch_functions_to_results(%arg0: tensor, %arg1: tensor<1x2x3xf32>) -> tensor<*xf32> { + %0 = "tf.If"(%arg0, %arg1) {Tcond = i1, Tin = ["tfdtype$DT_FLOAT"], Tout = ["tfdtype$DT_FLOAT"], else_branch = @if_else_branch, is_stateless = true, name = "if", then_branch = @if_then_branch} : (tensor, tensor<1x2x3xf32>) -> tensor<*xf32> + return %0 : tensor<*xf32> } // CHECK-LABEL: func @if_then_branch diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu-variable-runtime-reformatting.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu-variable-runtime-reformatting.mlir index 43be8743e51..1e308b42bfc 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu-variable-runtime-reformatting.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu-variable-runtime-reformatting.mlir @@ -20,8 +20,7 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr {T = ["tfdtype$DT_INT32", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE"], body = @while_body_7560, - cond = @while_cond_7550, device = "", is_stateless = false, - output_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>]} + cond = @while_cond_7550, device = "", is_stateless = false} : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, @@ -217,8 +216,7 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr {T = ["tfdtype$DT_INT32", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE"], body = @while_body_7560, - cond = @while_cond_7550, device = "", is_stateless = false, - output_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>]} + cond = @while_cond_7550, device = "", is_stateless = false} : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, @@ -305,8 +303,7 @@ module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, pr {T = ["tfdtype$DT_INT32", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE", "tfdtype$DT_RESOURCE"], body = @while_body_7560, - cond = @while_cond_7550, device = "", is_stateless = false, - output_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>]} + cond = @while_cond_7550, device = "", is_stateless = false} : (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>) -> (tensor, tensor<*x!tf.resource>>, tensor<*x!tf.resource>>, diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_space_to_depth_pass.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_space_to_depth_pass.mlir index 199426b1aa9..280986a7ee1 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_space_to_depth_pass.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_space_to_depth_pass.mlir @@ -7,7 +7,7 @@ module attributes {tf.devices = {"/job:localhost/replica:0/task:0/device:CPU:0" %0 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor %1 = "tf.Const"() {value = dense<2> : tensor} : () -> tensor %2 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %3:10 = "tf.While"(%2, %1, %2, %0, %1, %arg2, %arg4, %arg5, %arg6, %arg7) {_lower_using_switch_merge = true, _num_original_outputs = 10 : i64, _read_only_resource_inputs = [], body = @while_body_2710, cond = @while_cond_2700, device = "", is_stateless = false, output_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>, #tf.shape<>], parallel_iterations = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor>>, tensor>>, tensor>>, tensor>>) -> (tensor, tensor, tensor, tensor, tensor, tensor, tensor>>, tensor>>, tensor>>, tensor>>) + %3:10 = "tf.While"(%2, %1, %2, %0, %1, %arg2, %arg4, %arg5, %arg6, %arg7) {_lower_using_switch_merge = true, _num_original_outputs = 10 : i64, _read_only_resource_inputs = [], body = @while_body_2710, cond = @while_cond_2700, device = "", is_stateless = false, parallel_iterations = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor>>, tensor>>, tensor>>, tensor>>) -> (tensor, tensor, tensor, tensor, tensor, tensor, tensor>>, tensor>>, tensor>>, tensor>>) return } // CHECK-LABEL: func @while_body_2710 diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc b/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc index f1004fa049e..2ae2a976767 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/region_control_flow_to_functional.cc @@ -373,8 +373,7 @@ LogicalResult RegionControlFlowToFunctional::ConvertWhileOp( OpBuilder builder(while_region); auto while_op = builder.create( while_region.getLoc(), new_result_types, new_inputs, cond_name, body_name, - builder.getArrayAttr({}), while_region.parallel_iterations(), - while_region.is_stateless()); + while_region.parallel_iterations(), while_region.is_stateless()); // Redirect old results to new results. for (auto it : llvm::zip( diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc index 74679f19941..a9caeaac50d 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc @@ -627,8 +627,6 @@ LogicalResult HandleWhileLoop(TF::WhileOp while_op, FuncOp body, FuncOp cond) { }); // Recreate the while op. OpBuilder builder(while_op); - auto new_output_shapes = FilterRange>( - while_op.output_shapes().getValue(), resource_arg_uses); // Now use the filtered original operands, which will be replaced by // AddLoadsStoresOutsideControlFlowOp(). auto new_while = builder.create( @@ -636,8 +634,7 @@ LogicalResult HandleWhileLoop(TF::WhileOp while_op, FuncOp body, FuncOp cond) { FilterRange(while_op.getOperands(), resource_arg_uses), while_op.getAttrs()); - // Prepare for AddLoadsStoresOutsideControlFlowOp() and update - // new_output_shapes. + // Prepare for AddLoadsStoresOutsideControlFlowOp(). llvm::SmallDenseMap> arg_data_type_and_updated_output_index; for (const auto& entry : remaining_resource_data_types) { @@ -647,14 +644,9 @@ LogicalResult HandleWhileLoop(TF::WhileOp while_op, FuncOp body, FuncOp cond) { : entry.getFirst(); arg_data_type_and_updated_output_index[entry.getFirst()] = { entry.getSecond(), update_index}; - if (!new_output_shapes.empty()) { - new_output_shapes[entry.getFirst()] = - tensorflow::ConvertTypeToTensorShapeAttr(entry.getSecond()); - } } AddLoadsStoresOutsideControlFlowOp(new_while, arg_data_type_and_updated_output_index); - new_while.setAttr("output_shapes", builder.getArrayAttr(new_output_shapes)); // Replace uses. for (int64_t i = 0; i < old_to_new_indices.size(); ++i) { if (old_to_new_indices[i] >= 0) { diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc index 2551e68dd74..9732dac082a 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc @@ -262,22 +262,6 @@ bool InferShapeForCall(Operation* op) { return changed; } -// Infer the shape IfRegion outputs based on the shapes of the then and else -// yields. -bool InferShapeForIfRegion(IfRegionOp op) { - bool changed = false; - - Operation* then_yield = op.then_branch().front().getTerminator(); - Operation* else_yield = op.else_branch().front().getTerminator(); - for (auto result : zip(op.getResults(), then_yield->getOperandTypes(), - else_yield->getOperandTypes())) { - // If then and else types do not match, skip refinement for that result. - if (std::get<1>(result) != std::get<2>(result)) continue; - changed = RefineResultType(op, std::get<0>(result), std::get<1>(result)) || - changed; - } - return changed; -} bool InferShapeForCast(CastOp op, Dialect* tf_dialect) { Value result = op.getResult(); if (!CanBeRefined(result.getType())) return false; @@ -306,6 +290,37 @@ bool InferShapeForCast(CastOp op, Dialect* tf_dialect) { return true; } +// Infer the shape IfOp outputs based on the shapes of the then and else +// function result types. +bool InferShapeForIf(IfOp op) { + bool changed = false; + auto then_results = op.then_func().getType().getResults(); + auto else_results = op.else_func().getType().getResults(); + for (auto it : llvm::zip(op.getResults(), then_results, else_results)) { + // If then and else types do not match, skip refinement for that result. + if (std::get<1>(it) != std::get<2>(it)) continue; + changed = RefineResultType(op, std::get<0>(it), std::get<1>(it)) || changed; + } + return changed; +} + +// Infer the shape IfRegion outputs based on the shapes of the then and else +// yields. +bool InferShapeForIfRegion(IfRegionOp op) { + bool changed = false; + + Operation* then_yield = op.then_branch().front().getTerminator(); + Operation* else_yield = op.else_branch().front().getTerminator(); + for (auto result : zip(op.getResults(), then_yield->getOperandTypes(), + else_yield->getOperandTypes())) { + // If then and else types do not match, skip refinement for that result. + if (std::get<1>(result) != std::get<2>(result)) continue; + changed = RefineResultType(op, std::get<0>(result), std::get<1>(result)) || + changed; + } + return changed; +} + bool RefineWithInferTypeOpInterface(InferTypeOpInterface infer_ti, Dialect* tf_dialect) { Operation* op = infer_ti.getOperation(); @@ -768,17 +783,23 @@ bool ShapeInference::InferShapeForSingleOperation(Operation* op) { op)) return InferShapeForCall(op); - // Handle IfRegion operations by infering return shape from the then and else - // branches. - if (auto if_region = dyn_cast(op)) - return InferShapeForIfRegion(if_region); - // tf.Cast are only inferred if they have at least one user in the TF dialect // or feeding into the function return. This is necessary to avoid inserting // casts which cannot be refined. if (auto cast_op = dyn_cast(op)) return InferShapeForCast(cast_op, tf_dialect_); + // Handle IfOp here by inferring the shape from the else/then function + // results. Since `output_shapes` is a derived attribute, avoid going down the + // TF InferenceContext path as IfOp shape inference is implemented as just + // a lookup of the output_shapes attribute. + if (auto if_op = dyn_cast(op)) return InferShapeForIf(if_op); + + // Handle IfRegion operations by infering return shape from the then and else + // branches. + if (auto if_region = dyn_cast(op)) + return InferShapeForIfRegion(if_region); + StringRef op_name = op->getName().getStringRef(); // Drop the `tf.` prefix to query TF registry. auto node_name = diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc b/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc index 8d6e1c2ce30..2dc45ee9816 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc @@ -197,24 +197,16 @@ LogicalResult HandleWhileOp( if (!signature_change) return success(); // Create the new while op. auto new_while_operands = llvm::to_vector<8>(while_op.getOperands()); - auto new_output_shapes = - llvm::to_vector<8>(while_op.output_shapes().getValue()); OpBuilder builder(while_op); assert(while_op.getNumOperands() == while_op.getNumResults()); for (int64_t i = 0; i < while_op.getNumResults(); ++i) { auto it = data_var_to_size_var.find(while_op.getOperand(i)); if (it == data_var_to_size_var.end()) continue; new_while_operands.push_back(it->getSecond()); - if (!new_output_shapes.empty()) { - // Size is a scalar shape. - new_output_shapes.push_back( - mlir::TF::ShapeAttr::get(builder.getContext(), ArrayRef())); - } } auto new_while = builder.create(while_op.getLoc(), body.getType().getInputs(), new_while_operands, while_op.getAttrs()); - new_while.setAttr("output_shapes", builder.getArrayAttr(new_output_shapes)); for (int64_t i = 0; i < while_op.getNumResults(); ++i) { if (!getElementTypeOrSelf(while_op.getOperand(i).getType()) .isa()) { diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc index cb30bc35a7a..2c3422e3e00 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc @@ -595,8 +595,6 @@ LogicalResult HandleWhileOp(TF::WhileOp while_op, ModuleOp module, auto new_while = builder.create(while_op.getLoc(), body.getType().getInputs(), operands, while_op.getAttrs()); - // Clear the output shapes as it is not needed for XLA lowering. - new_while.setAttr("output_shapes", builder.getArrayAttr({})); for (int64_t i = 0; i < while_op.getNumOperands(); ++i) { if (ta_arg_buffer_type(i)) { while_op.getResult(i).replaceAllUsesWith(while_op.getOperand(i)); @@ -663,8 +661,6 @@ LogicalResult HandleIfOp(TF::IfOp if_op, ModuleOp module, auto new_if = builder.create(if_op.getLoc(), then_branch.getType().getResults(), operands, if_op.getAttrs()); - // Clear the output shapes as it is not needed for XLA lowering. - new_if.setAttr("output_shapes", builder.getArrayAttr({})); auto ret_forwards_input = [](FuncOp f, int64_t ret_ind) -> int64_t { auto retval = f.front().getTerminator()->getOperand(ret_ind); auto arg = retval.dyn_cast(); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc index 5cbc42a862c..cd055a8dc4a 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc @@ -190,22 +190,14 @@ LogicalResult HandleWhileOp( } // Create the new while op. auto new_while_operands = llvm::to_vector<8>(while_op.getOperands()); - auto new_output_shapes = - llvm::to_vector<8>(while_op.output_shapes().getValue()); for (int64_t i = 0; i < while_op.getNumResults(); ++i) { auto it = buffer_to_size->find(while_op.getOperand(i)); if (it == buffer_to_size->end()) continue; new_while_operands.push_back(it->getSecond().size); - if (!new_output_shapes.empty()) { - // Size is a scalar shape. - new_output_shapes.push_back( - mlir::TF::ShapeAttr::get(builder.getContext(), ArrayRef())); - } } auto new_while = builder.create(while_op.getLoc(), body.getType().getInputs(), new_while_operands, while_op.getAttrs()); - new_while.setAttr("output_shapes", builder.getArrayAttr(new_output_shapes)); for (const auto& entry : output_buffer_to_size) { (*buffer_to_size)[new_while.getResult(std::get<0>(entry))] = { new_while.getResult(std::get<1>(entry)), std::get<2>(entry)}; diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc index 12ce8c57f73..2b2a33b8bc2 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc @@ -365,16 +365,6 @@ TF::WhileOp AddStateVarsToWhileOp(TF::WhileOp while_op, FuncOp body, while_op.getLoc(), append_types(llvm::to_vector<4>(while_op.getResultTypes())), new_while_operands, while_op.getAttrs()); - if (new_while_op.output_shapes().size() != 0) { - auto new_output_shapes = llvm::to_vector<4>(new_while_op.output_shapes()); - // VarHandleOp is a scalar shape resource. - for (int64_t i = 0; i < state_vars.size(); ++i) { - new_output_shapes.push_back( - mlir::TF::ShapeAttr::get(builder.getContext(), ArrayRef())); - } - new_while_op.setAttr("output_shapes", - builder.getArrayAttr(new_output_shapes)); - } while_op.replaceAllUsesWith( new_while_op.getResults().take_front(while_op.getNumResults())); while_op.erase(); From 81c6f051ee1d8ee6b3b7bfe23439ea046d776435 Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Fri, 24 Jul 2020 12:47:06 -0400 Subject: [PATCH 1265/2522] Fix benchmarkname and update BUILD file --- tensorflow/python/keras/benchmarks/BUILD | 1 - .../antirectifier_benchmark_test.py | 10 +++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/BUILD b/tensorflow/python/keras/benchmarks/BUILD index bfb2c2af395..5ab421f9072 100644 --- a/tensorflow/python/keras/benchmarks/BUILD +++ b/tensorflow/python/keras/benchmarks/BUILD @@ -83,7 +83,6 @@ py_library( cuda_py_test( name = "bidirectional_lstm_benchmark_test", - size = "medium", srcs = ["keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py"], python_version = "PY3", tags = COMMON_TAGS, diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py index b3d24cf7e24..a7c171a6f68 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py @@ -53,7 +53,7 @@ class AntirectifierBenchmark(tf.test.Benchmark): # optimizer: Optimizer for model. # Check more details in `measure_performance()` method of # benchmark_util. - def benchmark_anti_bs_128(self): + def benchmark_antirectifier_bs_128(self): """Measure performance with batch_size=128 and run_iters=2.""" batch_size = 128 run_iters = 2 @@ -70,7 +70,7 @@ class AntirectifierBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - def benchmark_anti_bs_256(self): + def benchmark_antirectifier_bs_256(self): """Measure performance with batch_size=256 and run_iters=3.""" batch_size = 256 run_iters = 3 @@ -87,7 +87,7 @@ class AntirectifierBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - def benchmark_anti_bs_512(self): + def benchmark_antirectifier_bs_512(self): """Measure performance with batch_size=512 and run_iters=4.""" batch_size = 512 run_iters = 4 @@ -104,7 +104,7 @@ class AntirectifierBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - def benchmark_anti_bs_512_gpu_1(self): + def benchmark_antirectifier_bs_512_gpu_1(self): """Measure performance with batch_size=512, run_iters=4, gpu=1 and distribution_strategy=`mirrored`.""" batch_size = 512 @@ -124,7 +124,7 @@ class AntirectifierBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - def benchmark_anti_bs_512_gpu_2(self): + def benchmark_antirectifier_bs_512_gpu_2(self): """Measure performance with batch_size=512, run_iters=4, gpu=2 and distribution_strategy=`mirrored`.""" batch_size = 512 From d8891649b204c1b2df0bf76fdf9c8b3e56c25578 Mon Sep 17 00:00:00 2001 From: Vignesh Kothapalli Date: Fri, 24 Jul 2020 22:31:26 +0530 Subject: [PATCH 1266/2522] removed unnecessary import --- tensorflow/python/data/experimental/kernel_tests/unique_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/data/experimental/kernel_tests/unique_test.py b/tensorflow/python/data/experimental/kernel_tests/unique_test.py index af17eec7502..292338b9869 100644 --- a/tensorflow/python/data/experimental/kernel_tests/unique_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/unique_test.py @@ -25,7 +25,6 @@ from tensorflow.python.data.kernel_tests import test_base from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import combinations from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors from tensorflow.python.platform import test from tensorflow.python.util import compat From 7d7b1363075d3d7016eec162329e5634648dd087 Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Fri, 24 Jul 2020 09:59:38 -0700 Subject: [PATCH 1267/2522] [XLA] Allow for Mixed precision dot and convolution. PiperOrigin-RevId: 323015917 Change-Id: I279b96b220d1b4671f603228b676af4bb32789ee --- .../compiler/xla/service/shape_inference.cc | 10 --- tensorflow/compiler/xla/shape_util.h | 8 +++ .../compiler/xla/tests/dot_operation_test.cc | 69 +++++++++++++++++++ tensorflow/core/tpu/tpu_defs.h | 6 +- 4 files changed, 80 insertions(+), 13 deletions(-) diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index ec8e4d23d21..29a728c068e 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -643,11 +643,6 @@ Status ValidateDotDimensionNumbers( return InvalidArgument("%s", message); }; - // Check if both element types are the same. - if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(lhs, rhs)) { - return fail("Element types do not match."); - } - // Validate basic properties of dot dimension numbers. TF_RETURN_IF_ERROR(ValidateDotDimensionNumbers(lhs, rhs, dimension_numbers)); @@ -1621,11 +1616,6 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation, batch_group_count, feature_group_count); } - if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(lhs, rhs)) { - return InvalidArgument( - "Convolution with different element types: %s and %s.", - ShapeUtil::HumanString(lhs), ShapeUtil::HumanString(rhs)); - } if (dnums.input_spatial_dimensions_size() != dnums.kernel_spatial_dimensions_size()) { return InvalidArgument( diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 3789d828528..3f69a8b0aca 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -269,6 +269,14 @@ class ShapeUtil { if (SameElementType(a, b)) { return a.element_type(); } + // If only one of A and B are floating use the floating point type. + if (ElementIsFloating(a) && !ElementIsFloating(b)) { + return a.element_type(); + } + if (ElementIsFloating(b) && !ElementIsFloating(a)) { + return b.element_type(); + } + // Use the higher precision type. return primitive_util::BitWidth(a.element_type()) < primitive_util::BitWidth(b.element_type()) ? b.element_type() diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index 71cfd95f77f..60ba27b2050 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -1463,6 +1463,75 @@ ENTRY SmallIntegerDot { EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); } +XLA_TEST_F(DotOperationTextTest, DISABLED_ON_CPU(U16IotaDot)) { + absl::string_view hlo_string = + R"( +HloModule SmallIntegerDot + +ENTRY SmallIntegerDot { + arg0 = u16[5,55,8] parameter(0) + arg1 = u16[5,8,200] parameter(1) + dot = u16[5,55,200] dot(arg0, arg1), lhs_batch_dims={0}, lhs_contracting_dims={2}, rhs_batch_dims={0}, rhs_contracting_dims={1} + ROOT c = s32[5,55,200] convert(dot) +} +)"; + + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); +} + +XLA_TEST_F(DotOperationTextTest, DISABLED_ON_CPU(U16IotaSquaredDot)) { + absl::string_view hlo_string = + R"( +HloModule SmallIntegerDot + +ENTRY SmallIntegerDot { + arg0 = u16[16,2] iota(), iota_dimension=0 + a = u16[16,2] multiply(arg0, arg0) + r = u16[16,2] multiply(a, a) + arg1 = u16[2,98] iota(), iota_dimension=1 + b = u16[2,98] multiply(arg1, arg1) + s = u16[2,98] multiply(b, b) + ROOT dot = u16[16,98] dot(r, s), lhs_contracting_dims={1}, rhs_contracting_dims={0} +} +)"; + + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); +} + +XLA_TEST_F(DotOperationTextTest, DISABLED_ON_CPU(S16IotaDot)) { + absl::string_view hlo_string = + R"( +HloModule SmallIntegerDot + +ENTRY SmallIntegerDot { + arg0 = s16[5,55,8] iota(), iota_dimension=1 + arg1 = s16[5,8,200] iota(), iota_dimension=2 + ROOT dot = s16[5,55,200] dot(arg0, arg1), lhs_batch_dims={0}, lhs_contracting_dims={2}, rhs_batch_dims={0}, rhs_contracting_dims={1} +} +)"; + + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); +} + +XLA_TEST_F(DotOperationTextTest, DISABLED_ON_CPU(S16IotaSquaredDot)) { + absl::string_view hlo_string = + R"( +HloModule SmallIntegerDot + +ENTRY SmallIntegerDot { + arg0 = s16[16,2] iota(), iota_dimension=0 + a = s16[16,2] multiply(arg0, arg0) + r = s16[16,2] multiply(a, a) + arg1 = s16[2,98] iota(), iota_dimension=1 + b = s16[2,98] multiply(arg1, arg1) + s = s16[2,98] multiply(b, b) + ROOT dot = s16[16,98] dot(r, s), lhs_contracting_dims={1}, rhs_contracting_dims={0} +} +)"; + + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{0, 0})); +} + XLA_TEST_F(DotOperationTextTest, DISABLED_ON_CPU(S8Dot)) { absl::string_view hlo_string = R"( diff --git a/tensorflow/core/tpu/tpu_defs.h b/tensorflow/core/tpu/tpu_defs.h index 1c4b4c4e38e..696fa8dbe3e 100644 --- a/tensorflow/core/tpu/tpu_defs.h +++ b/tensorflow/core/tpu/tpu_defs.h @@ -51,10 +51,10 @@ extern const char* const kTPUReplicateAttr; extern const char* const kOutsideCompilationAttr; // Supported types for TPUs. -static constexpr std::array kTpuAllTypes = { +static constexpr std::array kTpuAllTypes = { {DT_INT32, DT_UINT32, DT_BFLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL, - DT_COMPLEX64, DT_INT64, DT_UINT64, DT_QINT8, DT_QUINT8, DT_INT8, - DT_UINT8}}; + DT_COMPLEX64, DT_INT64, DT_UINT64, DT_QINT8, DT_QUINT8, DT_INT8, DT_UINT8, + DT_INT16, DT_UINT16}}; } // namespace tensorflow From 6b1b0e26bb350a33f94ea59b2dd49eb9a3de9eca Mon Sep 17 00:00:00 2001 From: Ce Zheng Date: Fri, 24 Jul 2020 10:01:26 -0700 Subject: [PATCH 1268/2522] Add tf.BatchFunction to Tensorflow MLIR ODS. PiperOrigin-RevId: 323016261 Change-Id: I55c405fa19f48fc8dbbe9201a7c5a15bbcaa60ce --- .../compiler/mlir/tensorflow/ir/tf_ops.td | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td index 6e81cf57d9a..ac2d4ad44e6 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td @@ -1256,4 +1256,74 @@ def TF_FusedBatchNormV3Op : TF_FusedBatchNormOpBase<"FusedBatchNormV3"> { ); } +def TF_BatchFunctionOp : TF_Op<"BatchFunction", [AttrSizedOperandSegments]> { + let summary = [{ +Batches all the inputs tensors to the computation done by the function. + }]; + + let description = [{ +So, for example, in the following code + + ```python + + # This input will be captured. + y = tf.placeholder_with_default(1.0, shape=[]) + + @tf.Defun(tf.float32) + def computation(a): + return tf.matmul(a, a) + y + + b = gen_batch_ops.batch_function( + f=computation + in_tensors=[a], + captured_tensors=computation.captured_inputs, + Tout=[o.type for o in computation.definition.signature.output_arg], + num_batch_threads=1, + max_batch_size=10, + batch_timeout_micros=100000, # 100ms + allowed_batch_sizes=[3, 10], + batching_queue="") + +If more than one session.run call is simultaneously trying to compute `b` +the values of `a` will be gathered, non-deterministically concatenated +along the first axis, and only one thread will run the computation. + +Assumes that all arguments of the function are Tensors which will be batched +along their first dimension. + +Arguments that are captured, are not batched. The session.run call which does +the concatenation, will use the values of the captured tensors available to it. +Therefore, typical uses of captured tensors should involve values which remain +unchanged across session.run calls. Inference is a good example of this. + +SparseTensor is not supported. The return value of the decorated function +must be a Tensor or a list/tuple of Tensors. + }]; + + let arguments = (ins + Variadic:$in_tensors, + Variadic:$captured_tensors, + + SymbolRefAttr:$f, + I64Attr:$num_batch_threads, + I64Attr:$max_batch_size, + I64Attr:$batch_timeout_micros, + DefaultValuedAttr:$max_enqueued_batches, + DefaultValuedAttr:$allowed_batch_sizes, + StrAttr:$container, + StrAttr:$shared_name, + StrAttr:$batching_queue, + DefaultValuedAttr:$enable_large_batch_splitting, + I32ElementsAttr:$operand_segment_sizes + ); + + let results = (outs + Variadic:$out_tensors + ); + + TF_DerivedOperandTypeListAttr Tin = TF_DerivedOperandTypeListAttr<0>; + TF_DerivedOperandTypeListAttr Tcaptured = TF_DerivedOperandTypeListAttr<1>; + TF_DerivedResultTypeListAttr Tout = TF_DerivedResultTypeListAttr<0>; +} + #endif // TF_OPS From 6f57dbfffabdd3b9ddf11795bca369d7d6ddeb41 Mon Sep 17 00:00:00 2001 From: Steve Chien Date: Fri, 24 Jul 2020 10:03:00 -0700 Subject: [PATCH 1269/2522] Fix typo in CentralStorageStrategy. PiperOrigin-RevId: 323016613 Change-Id: I682e7692cb03fa715d3271e62170043537dcfcca --- tensorflow/python/keras/engine/training.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index 0a4e8551232..18dfc4c1642 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -2733,7 +2733,7 @@ def _minimize(strategy, tape, optimizer, loss, trainable_variables): # Whether to aggregate gradients outside of optimizer. This requires support # of the optimizer and doesn't work with ParameterServerStrategy and - # CentralStroageStrategy. + # CentralStorageStrategy. aggregate_grads_outside_optimizer = ( optimizer._HAS_AGGREGATE_GRAD and # pylint: disable=protected-access not isinstance(strategy.extended, From 06eb028030a0a6f9c3fb0ec46a5ded4e3c0ed03e Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Fri, 24 Jul 2020 10:07:48 -0700 Subject: [PATCH 1270/2522] Port the elementwise kernel to the new TfLiteEvalTensor API. PiperOrigin-RevId: 323017688 Change-Id: I4362d0e3d70dd4449a9e45f0bf8289b0f8824235 --- tensorflow/lite/micro/kernels/BUILD | 1 + tensorflow/lite/micro/kernels/elementwise.cc | 15 +-- .../lite/micro/kernels/elementwise_test.cc | 110 +++++++----------- 3 files changed, 48 insertions(+), 78 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 5ff1121fedb..5f79f7c0c62 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -133,6 +133,7 @@ tflite_micro_cc_test( name = "elementwise_test", srcs = ["elementwise_test.cc"], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:debug_log", "//tensorflow/lite/micro:op_resolvers", diff --git a/tensorflow/lite/micro/kernels/elementwise.cc b/tensorflow/lite/micro/kernels/elementwise.cc index cb1fd852812..64880344664 100644 --- a/tensorflow/lite/micro/kernels/elementwise.cc +++ b/tensorflow/lite/micro/kernels/elementwise.cc @@ -18,6 +18,8 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" +#include "tensorflow/lite/micro/micro_utils.h" namespace tflite { namespace ops { @@ -52,13 +54,13 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) { template inline TfLiteStatus EvalImpl(TfLiteContext* context, TfLiteNode* node, T func(T), TfLiteType expected_type) { - const TfLiteTensor* input = GetInput(context, node, 0); - TfLiteTensor* output = GetOutput(context, node, 0); + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); TF_LITE_ENSURE_TYPES_EQ(context, input->type, expected_type); - const int64_t num_elements = NumElements(input); - const T* in_data = GetTensorData(input); - T* out_data = GetTensorData(output); - for (int64_t i = 0; i < num_elements; ++i) { + const size_t num_elements = ElementCount(*input->dims); + const T* in_data = tflite::micro::GetTensorData(input); + T* out_data = tflite::micro::GetTensorData(output); + for (size_t i = 0; i < num_elements; ++i) { out_data[i] = func(in_data[i]); } return kTfLiteOk; @@ -106,7 +108,6 @@ TfLiteStatus LogicalNotEval(TfLiteContext* context, TfLiteNode* node) { return EvalLogical(context, node, [](bool v) { return !v; }); } - } // namespace } // namespace elementwise diff --git a/tensorflow/lite/micro/kernels/elementwise_test.cc b/tensorflow/lite/micro/kernels/elementwise_test.cc index 8f028b1f451..b7094cbd445 100644 --- a/tensorflow/lite/micro/kernels/elementwise_test.cc +++ b/tensorflow/lite/micro/kernels/elementwise_test.cc @@ -16,13 +16,14 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/all_ops_resolver.h" #include "tensorflow/lite/micro/debug_log.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" namespace tflite { namespace testing { -void TestElementwiseFloat(tflite::BuiltinOperator op, +void TestElementwiseFloat(const TfLiteRegistration& registration, const int* input_dims_data, const float* input_data, const int* output_dims_data, const float* expected_output_data, @@ -43,45 +44,26 @@ void TestElementwiseFloat(tflite::BuiltinOperator op, output_data[i] = 23; } - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = resolver.FindOp(op); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, nullptr, 0); - } static int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); static int outputs_array_data[] = {1, 1}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } for (int i = 0; i < output_dims_count; ++i) { TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i], 1e-5f); } } -void TestElementwiseBool(tflite::BuiltinOperator op, const int* input_dims_data, - const bool* input_data, const int* output_dims_data, +void TestElementwiseBool(const TfLiteRegistration& registration, + const int* input_dims_data, const bool* input_data, + const int* output_dims_data, const bool* expected_output_data, bool* output_data) { TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); @@ -99,39 +81,18 @@ void TestElementwiseBool(tflite::BuiltinOperator op, const int* input_dims_data, output_data[i] = false; } - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = resolver.FindOp(op); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, nullptr, 0); - } - const int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); const int outputs_array_data[] = {1, 1}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } for (int i = 0; i < output_dims_count; ++i) { TF_LITE_MICRO_EXPECT_EQ(expected_output_data[i], output_data[i]); } @@ -148,8 +109,9 @@ TF_LITE_MICRO_TEST(Abs) { const float input[] = {0.01, -0.01, 10, -10}; const float golden[] = {0.01, 0.01, 10, 10}; float output_data[output_dims_count]; - tflite::testing::TestElementwiseFloat(tflite::BuiltinOperator_ABS, shape, - input, shape, golden, output_data); + tflite::testing::TestElementwiseFloat(tflite::ops::micro::Register_ABS(), + shape, input, shape, golden, + output_data); } TF_LITE_MICRO_TEST(Sin) { @@ -158,8 +120,9 @@ TF_LITE_MICRO_TEST(Sin) { const float input[] = {0, 3.1415926, -3.1415926, 1}; const float golden[] = {0, 0, 0, 0.84147}; float output_data[output_dims_count]; - tflite::testing::TestElementwiseFloat(tflite::BuiltinOperator_SIN, shape, - input, shape, golden, output_data); + tflite::testing::TestElementwiseFloat(tflite::ops::micro::Register_SIN(), + shape, input, shape, golden, + output_data); } TF_LITE_MICRO_TEST(Cos) { @@ -168,8 +131,9 @@ TF_LITE_MICRO_TEST(Cos) { const float input[] = {0, 3.1415926, -3.1415926, 1}; const float golden[] = {1, -1, -1, 0.54030}; float output_data[output_dims_count]; - tflite::testing::TestElementwiseFloat(tflite::BuiltinOperator_COS, shape, - input, shape, golden, output_data); + tflite::testing::TestElementwiseFloat(tflite::ops::micro::Register_COS(), + shape, input, shape, golden, + output_data); } TF_LITE_MICRO_TEST(Log) { @@ -178,8 +142,9 @@ TF_LITE_MICRO_TEST(Log) { const float input[] = {1, 2.7182818, 0.5, 2}; const float golden[] = {0, 1, -0.6931472, 0.6931472}; float output_data[output_dims_count]; - tflite::testing::TestElementwiseFloat(tflite::BuiltinOperator_LOG, shape, - input, shape, golden, output_data); + tflite::testing::TestElementwiseFloat(tflite::ops::micro::Register_LOG(), + shape, input, shape, golden, + output_data); } TF_LITE_MICRO_TEST(Sqrt) { @@ -188,8 +153,9 @@ TF_LITE_MICRO_TEST(Sqrt) { const float input[] = {0, 1, 2, 4}; const float golden[] = {0, 1, 1.41421, 2}; float output_data[output_dims_count]; - tflite::testing::TestElementwiseFloat(tflite::BuiltinOperator_SQRT, shape, - input, shape, golden, output_data); + tflite::testing::TestElementwiseFloat(tflite::ops::micro::Register_SQRT(), + shape, input, shape, golden, + output_data); } TF_LITE_MICRO_TEST(Rsqrt) { @@ -198,8 +164,9 @@ TF_LITE_MICRO_TEST(Rsqrt) { const float input[] = {1, 2, 4, 9}; const float golden[] = {1, 0.7071, 0.5, 0.33333}; float output_data[output_dims_count]; - tflite::testing::TestElementwiseFloat(tflite::BuiltinOperator_RSQRT, shape, - input, shape, golden, output_data); + tflite::testing::TestElementwiseFloat(tflite::ops::micro::Register_RSQRT(), + shape, input, shape, golden, + output_data); } TF_LITE_MICRO_TEST(Square) { @@ -208,8 +175,9 @@ TF_LITE_MICRO_TEST(Square) { const float input[] = {1, 2, 0.5, -3.0}; const float golden[] = {1, 4.0, 0.25, 9.0}; float output_data[output_dims_count]; - tflite::testing::TestElementwiseFloat(tflite::BuiltinOperator_SQUARE, shape, - input, shape, golden, output_data); + tflite::testing::TestElementwiseFloat(tflite::ops::micro::Register_SQUARE(), + shape, input, shape, golden, + output_data); } TF_LITE_MICRO_TEST(LogicalNot) { @@ -218,9 +186,9 @@ TF_LITE_MICRO_TEST(LogicalNot) { const bool input[] = {true, false, false, true}; const bool golden[] = {false, true, true, false}; bool output_data[output_dims_count]; - tflite::testing::TestElementwiseBool(tflite::BuiltinOperator_LOGICAL_NOT, - shape, input, shape, golden, - output_data); + tflite::testing::TestElementwiseBool( + tflite::ops::micro::Register_LOGICAL_NOT(), shape, input, shape, golden, + output_data); } TF_LITE_MICRO_TESTS_END From 40d01ae6fd713c4ba79d8efb6b85063a9f3b5924 Mon Sep 17 00:00:00 2001 From: Michael Gester Date: Fri, 24 Jul 2020 10:08:36 -0700 Subject: [PATCH 1271/2522] Moved tf.SparseMatMul lowering code The code should be in lower_tf.cc since it lowers from TF to TF and not to HLO. PiperOrigin-RevId: 323017854 Change-Id: Id93e99b65bc38266f5612fb1cacff1e250699c81 --- .../mlir/tensorflow/transforms/lower_tf.cc | 46 +++++++++++++++- .../mlir/xla/transforms/legalize_tf.cc | 53 ++----------------- 2 files changed, 49 insertions(+), 50 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc index c0de6f557ab..d67739a739b 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc @@ -344,12 +344,56 @@ class LowerPackOp : public OpRewritePattern { } }; +// Lowers `TF::SparseMatMulOp` to `TF::MatMulOp`, ignoring the sparseness hints, +// since we currently don't have an implementation that can use this +// information. Adds appropriate casts where necessary to align element types +// of operands and result for `TF::MatMulOp`. +class LowerSparseMatMulOp : public OpRewritePattern { + public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(TF::SparseMatMulOp op, + PatternRewriter &rewriter) const override { + // Result type must be f32 for applying the pattern (currently this is + // required by the op anyway but this might change). + if (!op.product().getType().cast().getElementType().isF32()) { + return failure(); + } + MLIRContext *context = rewriter.getContext(); + llvm::SmallVector operands{op.a(), op.b()}; + for (Value &operand : operands) { + TensorType tensor_type = operand.getType().cast(); + Type element_type = tensor_type.getElementType(); + if (element_type.isF32()) continue; + // Element type can either be f32 or bf16 for `SparseMatMulOp` so it + // must be bf16 here. + assert(element_type.isBF16()); + Type tensor_type_f32; + if (tensor_type.hasRank()) { + tensor_type_f32 = RankedTensorType::get(tensor_type.getShape(), + FloatType::getF32(context)); + } else { + tensor_type_f32 = UnrankedTensorType::get(FloatType::getF32(context)); + } + // Add cast to f32 to conform with element type of result. + operand = + rewriter.create(op.getLoc(), tensor_type_f32, operand); + } + Value result = rewriter.create( + op.getLoc(), op.product().getType(), operands[0], operands[1], + op.transpose_a(), op.transpose_b()); + + rewriter.replaceOp(op, {result}); + return success(); + } +}; + } // namespace void PopulateLoweringTFPatterns(MLIRContext *context, OwningRewritePatternList *patterns) { patterns->insert(context); + LowerPackOp, LowerSparseMatMulOp>(context); populateWithGenerated(context, patterns); } diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc index b724c1b08e0..b1e74e354fe 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc @@ -5400,50 +5400,6 @@ class ConvertQrOp : public OpRewritePattern { } }; -// Converts `TF::SparseMatMulOp` to `TF::MatMulOp`, ignoring the sparseness -// hints, since we currently don't have an implementation that can use this -// information. Adds appropriate casts where necessary to align element types -// of operands and result for `TF::MatMulOp`. -class ConvertSparseMatMulOp : public OpRewritePattern { - public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(TF::SparseMatMulOp op, - PatternRewriter &rewriter) const override { - // Result type must be f32 for applying the pattern (currently this is - // required by the op anyway but this might change). - if (!op.product().getType().cast().getElementType().isF32()) { - return failure(); - } - MLIRContext *context = rewriter.getContext(); - llvm::SmallVector operands{op.a(), op.b()}; - for (Value &operand : operands) { - TensorType tensor_type = operand.getType().cast(); - Type element_type = tensor_type.getElementType(); - if (element_type.isF32()) continue; - // Element type can either be f32 or bf16 for `SparseMatMulOp` so it - // must be bf16 here. - assert(element_type.isBF16()); - Type tensor_type_f32; - if (tensor_type.hasRank()) { - tensor_type_f32 = RankedTensorType::get(tensor_type.getShape(), - FloatType::getF32(context)); - } else { - tensor_type_f32 = UnrankedTensorType::get(FloatType::getF32(context)); - } - // Add cast to f32 to conform with element type of result. - operand = - rewriter.create(op.getLoc(), tensor_type_f32, operand); - } - Value result = rewriter.create( - op.getLoc(), op.product().getType(), operands[0], operands[1], - op.transpose_a(), op.transpose_b()); - - rewriter.replaceOp(op, {result}); - return success(); - } -}; - // Emits debug information which includes the number of ops of each type which // failed to legalize. void EmitLegalizationErrors(Operation *op, @@ -5533,11 +5489,10 @@ LogicalResult legalizeTF(Operation *op, bool allow_partial_conversion, ConvertDynamicRangeOp, ConvertRangeOp, ConvertSelectV2Op, ConvertSigmoidOp, ConvertShapeOp, ConvertSizeOp, ConvertSoftmaxOp, - ConvertSoftmaxOp, ConvertSparseMatMulOp, - ConvertSplitOp, ConvertSplitVOp, ConvertStridedSliceOp, - ConvertStridedSliceGradOp, ConvertSumOp, ConvertTensorScatterUpdateOp, - ConvertTileOp, ConvertTopKV2Op, ConvertUnpackOp, - ConvertUnsortedSegmentMaxOp, ConvertUnsortedSegmentMinOp, + ConvertSoftmaxOp, ConvertSplitOp, ConvertSplitVOp, + ConvertStridedSliceOp, ConvertStridedSliceGradOp, ConvertSumOp, + ConvertTensorScatterUpdateOp, ConvertTileOp, ConvertTopKV2Op, + ConvertUnpackOp, ConvertUnsortedSegmentMaxOp, ConvertUnsortedSegmentMinOp, ConvertUnsortedSegmentProdOp, ConvertUnsortedSegmentSumOp, ConvertRandomShuffleOp, ConvertXlaShardingOp, ConvertXlaDynamicUpdateSliceOp>(op->getContext()); From 37f8b4fd1b555a2e0b903935eb5fc9906aabca13 Mon Sep 17 00:00:00 2001 From: Feng Liu Date: Fri, 24 Jul 2020 10:19:02 -0700 Subject: [PATCH 1272/2522] Emit scf.for statement for For op PiperOrigin-RevId: 323020012 Change-Id: I1486214b59c616e8deb9f8e8065c61a1335af1d6 --- .../python/autograph/pyct/static_analysis/type_inference.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/autograph/pyct/static_analysis/type_inference.py b/tensorflow/python/autograph/pyct/static_analysis/type_inference.py index 3412a8a6aa0..4e8a9a90020 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/type_inference.py +++ b/tensorflow/python/autograph/pyct/static_analysis/type_inference.py @@ -173,6 +173,7 @@ class StmtInferrer(gast.NodeVisitor): self.closure_types = closure_types self.types_in = types_in self.new_symbols = {} + self.rvalue = None def visit(self, node): types = super().visit(node) From fe10ef671df1ac9d240b3d587dc25ed4605ff887 Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Fri, 24 Jul 2020 10:24:26 -0700 Subject: [PATCH 1273/2522] Prefer the standard integral types over custom type-aliases. PiperOrigin-RevId: 323021115 Change-Id: Ib934f346bcc86de959027b180e50c8eb0e6f8b7e --- .../lite/kernels/internal/reference/add.h | 125 ++++++++-------- .../kernels/internal/reference/batch_matmul.h | 16 +-- .../kernels/internal/reference/comparisons.h | 44 +++--- .../internal/reference/concatenation.h | 12 +- .../lite/kernels/internal/reference/conv.h | 40 +++--- .../internal/reference/depthwiseconv_uint8.h | 88 ++++++------ .../kernels/internal/reference/dequantize.h | 14 +- .../internal/reference/fully_connected.h | 133 +++++++++--------- .../kernels/internal/reference/hard_swish.h | 2 +- .../internal/reference/integer_ops/add.h | 18 +-- .../internal/reference/integer_ops/conv.h | 58 ++++---- .../reference/integer_ops/depthwise_conv.h | 70 ++++----- .../reference/integer_ops/fully_connected.h | 32 ++--- .../reference/integer_ops/l2normalization.h | 8 +- .../internal/reference/integer_ops/mul.h | 36 ++--- .../internal/reference/integer_ops/pooling.h | 26 ++-- .../internal/reference/l2normalization.h | 27 ++-- .../kernels/internal/reference/logistic.h | 14 +- .../lite/kernels/internal/reference/mul.h | 36 ++--- .../lite/kernels/internal/reference/pad.h | 14 +- .../lite/kernels/internal/reference/pooling.h | 21 +-- .../reference/portable_tensor_utils.cc | 76 +++++----- .../reference/portable_tensor_utils.h | 2 +- .../lite/kernels/internal/reference/prelu.h | 26 ++-- .../kernels/internal/reference/quantize.h | 12 +- .../lite/kernels/internal/reference/reduce.h | 14 +- .../reference/resize_nearest_neighbor.h | 43 +++--- .../lite/kernels/internal/reference/softmax.h | 38 ++--- .../lite/kernels/internal/reference/sub.h | 97 ++++++------- .../lite/kernels/internal/reference/svdf.h | 2 +- .../lite/kernels/internal/reference/tanh.h | 36 ++--- 31 files changed, 594 insertions(+), 586 deletions(-) diff --git a/tensorflow/lite/kernels/internal/reference/add.h b/tensorflow/lite/kernels/internal/reference/add.h index d0c40912091..94c58097154 100644 --- a/tensorflow/lite/kernels/internal/reference/add.h +++ b/tensorflow/lite/kernels/internal/reference/add.h @@ -52,33 +52,33 @@ inline void Add(const ArithmeticParams& params, // Element-wise add that can often be used for inner loop of broadcast add as // well as the non-broadcast add. inline void AddElementwise(int size, const ArithmeticParams& params, - const uint8* input1_data, const uint8* input2_data, - uint8* output_data) { + const uint8_t* input1_data, + const uint8_t* input2_data, uint8_t* output_data) { TFLITE_DCHECK_GT(params.input1_offset, -256); TFLITE_DCHECK_GT(params.input2_offset, -256); TFLITE_DCHECK_LT(params.input1_offset, 256); TFLITE_DCHECK_LT(params.input2_offset, 256); for (int i = 0; i < size; ++i) { - const int32 input1_val = params.input1_offset + input1_data[i]; - const int32 input2_val = params.input2_offset + input2_data[i]; - const int32 shifted_input1_val = input1_val * (1 << params.left_shift); - const int32 shifted_input2_val = input2_val * (1 << params.left_shift); - const int32 scaled_input1_val = + const int32_t input1_val = params.input1_offset + input1_data[i]; + const int32_t input2_val = params.input2_offset + input2_data[i]; + const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); + const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); + const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( shifted_input1_val, params.input1_multiplier, params.input1_shift); - const int32 scaled_input2_val = + const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( shifted_input2_val, params.input2_multiplier, params.input2_shift); - const int32 raw_sum = scaled_input1_val + scaled_input2_val; - const int32 raw_output = + const int32_t raw_sum = scaled_input1_val + scaled_input2_val; + const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp( raw_sum, params.output_multiplier, params.output_shift) + params.output_offset; - const int32 clamped_output = + const int32_t clamped_output = std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output)); - output_data[i] = static_cast(clamped_output); + output_data[i] = static_cast(clamped_output); } } @@ -86,40 +86,40 @@ inline void AddElementwise(int size, const ArithmeticParams& params, // broadcast add, so that, for example, scalar-broadcast with batch will still // be fast. inline void AddScalarBroadcast(int size, const ArithmeticParams& params, - uint8 input1_data, const uint8* input2_data, - uint8* output_data) { + uint8_t input1_data, const uint8_t* input2_data, + uint8_t* output_data) { TFLITE_DCHECK_GT(params.input1_offset, -256); TFLITE_DCHECK_GT(params.input2_offset, -256); TFLITE_DCHECK_LT(params.input1_offset, 256); TFLITE_DCHECK_LT(params.input2_offset, 256); - const int32 input1_val = params.input1_offset + input1_data; - const int32 shifted_input1_val = input1_val * (1 << params.left_shift); - const int32 scaled_input1_val = + const int32_t input1_val = params.input1_offset + input1_data; + const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); + const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( shifted_input1_val, params.input1_multiplier, params.input1_shift); for (int i = 0; i < size; ++i) { - const int32 input2_val = params.input2_offset + input2_data[i]; - const int32 shifted_input2_val = input2_val * (1 << params.left_shift); - const int32 scaled_input2_val = + const int32_t input2_val = params.input2_offset + input2_data[i]; + const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); + const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( shifted_input2_val, params.input2_multiplier, params.input2_shift); - const int32 raw_sum = scaled_input1_val + scaled_input2_val; - const int32 raw_output = + const int32_t raw_sum = scaled_input1_val + scaled_input2_val; + const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp( raw_sum, params.output_multiplier, params.output_shift) + params.output_offset; - const int32 clamped_output = + const int32_t clamped_output = std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output)); - output_data[i] = static_cast(clamped_output); + output_data[i] = static_cast(clamped_output); } } inline void Add(const ArithmeticParams& params, - const RuntimeShape& input1_shape, const uint8* input1_data, - const RuntimeShape& input2_shape, const uint8* input2_data, - const RuntimeShape& output_shape, uint8* output_data) { + const RuntimeShape& input1_shape, const uint8_t* input1_data, + const RuntimeShape& input2_shape, const uint8_t* input2_data, + const RuntimeShape& output_shape, uint8_t* output_data) { TFLITE_DCHECK_LE(params.quantized_activation_min, params.quantized_activation_max); const int flat_size = @@ -133,23 +133,24 @@ inline void Add(const ArithmeticParams& params, } inline void Add(const ArithmeticParams& params, - const RuntimeShape& input1_shape, const int16* input1_data, - const RuntimeShape& input2_shape, const int16* input2_data, - const RuntimeShape& output_shape, int16* output_data) { + const RuntimeShape& input1_shape, const int16_t* input1_data, + const RuntimeShape& input2_shape, const int16_t* input2_data, + const RuntimeShape& output_shape, int16_t* output_data) { TFLITE_DCHECK_LE(params.quantized_activation_min, params.quantized_activation_max); const int input1_shift = params.input1_shift; const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape); - const int16 output_activation_min = params.quantized_activation_min; - const int16 output_activation_max = params.quantized_activation_max; + const int16_t output_activation_min = params.quantized_activation_min; + const int16_t output_activation_max = params.quantized_activation_max; TFLITE_DCHECK(input1_shift == 0 || params.input2_shift == 0); TFLITE_DCHECK_LE(input1_shift, 0); TFLITE_DCHECK_LE(params.input2_shift, 0); - const int16* not_shift_input = input1_shift == 0 ? input1_data : input2_data; - const int16* shift_input = input1_shift == 0 ? input2_data : input1_data; + const int16_t* not_shift_input = + input1_shift == 0 ? input1_data : input2_data; + const int16_t* shift_input = input1_shift == 0 ? input2_data : input1_data; const int input_right_shift = input1_shift == 0 ? -params.input2_shift : -input1_shift; @@ -161,8 +162,8 @@ inline void Add(const ArithmeticParams& params, F0 scaled_input = F0::FromRaw( gemmlowp::RoundingDivideByPOT(shift_input[i], input_right_shift)); F0 result = gemmlowp::SaturatingAdd(scaled_input, input_ready_scaled); - const int16 raw_output = result.raw(); - const int16 clamped_output = std::min( + const int16_t raw_output = result.raw(); + const int16_t clamped_output = std::min( output_activation_max, std::max(output_activation_min, raw_output)); output_data[i] = clamped_output; } @@ -218,11 +219,11 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params, inline void BroadcastAdd4DSlow(const ArithmeticParams& params, const RuntimeShape& input1_shape, - const int32* input1_data, + const int32_t* input1_data, const RuntimeShape& input2_shape, - const int32* input2_data, + const int32_t* input2_data, const RuntimeShape& output_shape, - int32* output_data) { + int32_t* output_data) { NdArrayDesc<4> desc1; NdArrayDesc<4> desc2; NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, @@ -259,11 +260,11 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params, inline void BroadcastAdd4DSlow(const ArithmeticParams& params, const RuntimeShape& input1_shape, - const uint8* input1_data, + const uint8_t* input1_data, const RuntimeShape& input2_shape, - const uint8* input2_data, + const uint8_t* input2_data, const RuntimeShape& output_shape, - uint8* output_data) { + uint8_t* output_data) { NdArrayDesc<4> desc1; NdArrayDesc<4> desc2; NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, @@ -286,34 +287,34 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params, for (int y = 0; y < extended_output_shape.Dims(1); ++y) { for (int x = 0; x < extended_output_shape.Dims(2); ++x) { for (int c = 0; c < extended_output_shape.Dims(3); ++c) { - const int32 input1_val = + const int32_t input1_val = params.input1_offset + input1_data[SubscriptToIndex(desc1, b, y, x, c)]; - const int32 input2_val = + const int32_t input2_val = params.input2_offset + input2_data[SubscriptToIndex(desc2, b, y, x, c)]; - const int32 shifted_input1_val = + const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); - const int32 shifted_input2_val = + const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); - const int32 scaled_input1_val = + const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( shifted_input1_val, params.input1_multiplier, params.input1_shift); - const int32 scaled_input2_val = + const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( shifted_input2_val, params.input2_multiplier, params.input2_shift); - const int32 raw_sum = scaled_input1_val + scaled_input2_val; - const int32 raw_output = + const int32_t raw_sum = scaled_input1_val + scaled_input2_val; + const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp( raw_sum, params.output_multiplier, params.output_shift) + params.output_offset; - const int32 clamped_output = + const int32_t clamped_output = std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output)); output_data[Offset(extended_output_shape, b, y, x, c)] = - static_cast(clamped_output); + static_cast(clamped_output); } } } @@ -322,11 +323,11 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params, inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params, const RuntimeShape& unswitched_input1_shape, - const uint8* unswitched_input1_data, + const uint8_t* unswitched_input1_data, const RuntimeShape& unswitched_input2_shape, - const uint8* unswitched_input2_data, + const uint8_t* unswitched_input2_data, const RuntimeShape& output_shape, - uint8* output_data) { + uint8_t* output_data) { ArithmeticParams switched_params = unswitched_params; switched_params.input1_offset = unswitched_params.input2_offset; switched_params.input1_multiplier = unswitched_params.input2_multiplier; @@ -341,18 +342,18 @@ inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params, const ArithmeticParams& params = use_unswitched ? unswitched_params : switched_params; - const uint8* input1_data = + const uint8_t* input1_data = use_unswitched ? unswitched_input1_data : unswitched_input2_data; - const uint8* input2_data = + const uint8_t* input2_data = use_unswitched ? unswitched_input2_data : unswitched_input1_data; // Fivefold nested loops. The second input resets its position for each // iteration of the second loop. The first input resets its position at the // beginning of the fourth loop. The innermost loop is an elementwise add of // sections of the arrays. - uint8* output_data_ptr = output_data; - const uint8* input1_data_ptr = input1_data; - const uint8* input2_data_reset = input2_data; + uint8_t* output_data_ptr = output_data; + const uint8_t* input1_data_ptr = input1_data; + const uint8_t* input2_data_reset = input2_data; // In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared // between input shapes. y3 for input 1 is always broadcast, and so the // dimension there is 1, whereas optionally y1 might be broadcast for input 2. @@ -368,7 +369,7 @@ inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params, // General fivefold pattern, with y4 > 1 so there is a non-broadcast inner // dimension. for (int i0 = 0; i0 < y0; ++i0) { - const uint8* input2_data_ptr; + const uint8_t* input2_data_ptr; for (int i1 = 0; i1 < y1; ++i1) { input2_data_ptr = input2_data_reset; for (int i2 = 0; i2 < y2; ++i2) { @@ -397,7 +398,7 @@ inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params, // for y4 == 1 and the loop over y3 is contained within the // AddScalarBroadcast function. for (int i0 = 0; i0 < y0; ++i0) { - const uint8* input2_data_ptr; + const uint8_t* input2_data_ptr; for (int i1 = 0; i1 < y1; ++i1) { input2_data_ptr = input2_data_reset; for (int i2 = 0; i2 < y2; ++i2) { diff --git a/tensorflow/lite/kernels/internal/reference/batch_matmul.h b/tensorflow/lite/kernels/internal/reference/batch_matmul.h index 05caefaca5d..24c3ffe3d7e 100644 --- a/tensorflow/lite/kernels/internal/reference/batch_matmul.h +++ b/tensorflow/lite/kernels/internal/reference/batch_matmul.h @@ -266,13 +266,13 @@ inline void BatchMatMul(const FullyConnectedParams& params, const int rhs_cols = extended_rhs_shape.Dims(4); const int accum_depth = extended_lhs_shape.Dims(4); - const int32 input_offset = params.input_offset; - const int32 filter_offset = params.weights_offset; - const int32 output_offset = params.output_offset; - const int32 output_multiplier = params.output_multiplier; + const int32_t input_offset = params.input_offset; + const int32_t filter_offset = params.weights_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_multiplier = params.output_multiplier; const int output_shift = params.output_shift; - const int32 output_activation_min = params.quantized_activation_min; - const int32 output_activation_max = params.quantized_activation_max; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; TFLITE_DCHECK_LE(output_activation_min, output_activation_max); for (int b0 = 0; b0 < batch_dim0; ++b0) { @@ -292,8 +292,8 @@ inline void BatchMatMul(const FullyConnectedParams& params, for (int i = 0; i < lhs_rows; ++i) { int32_t total = 0; for (int k = 0; k < accum_depth; ++k) { - int32 lhs_val = lhs_ptr2[accum_depth * i + k]; - int32 rhs_val = rhs_ptr2[accum_depth * j + k]; + int32_t lhs_val = lhs_ptr2[accum_depth * i + k]; + int32_t rhs_val = rhs_ptr2[accum_depth * j + k]; total += (lhs_val + filter_offset) * (rhs_val + input_offset); } total = MultiplyByQuantizedMultiplier(total, output_multiplier, diff --git a/tensorflow/lite/kernels/internal/reference/comparisons.h b/tensorflow/lite/kernels/internal/reference/comparisons.h index d9bc10a9390..49844ab1539 100644 --- a/tensorflow/lite/kernels/internal/reference/comparisons.h +++ b/tensorflow/lite/kernels/internal/reference/comparisons.h @@ -105,30 +105,30 @@ inline void Comparison(const ComparisonParams& op_params, input2_data, output_shape, output_data); } -template F> +template F> inline void ComparisonWithScaling( const ComparisonParams& op_params, const RuntimeShape& input1_shape, const T* input1_data, const RuntimeShape& input2_shape, const T* input2_data, const RuntimeShape& output_shape, bool* output_data) { int left_shift = op_params.left_shift; - int32 input1_offset = op_params.input1_offset; - int32 input1_multiplier = op_params.input1_multiplier; + int32_t input1_offset = op_params.input1_offset; + int32_t input1_multiplier = op_params.input1_multiplier; int input1_shift = op_params.input1_shift; - int32 input2_offset = op_params.input2_offset; - int32 input2_multiplier = op_params.input2_multiplier; + int32_t input2_offset = op_params.input2_offset; + int32_t input2_multiplier = op_params.input2_multiplier; int input2_shift = op_params.input2_shift; const int64_t flatsize = MatchingFlatSize(input1_shape, input2_shape, output_shape); for (int64_t i = 0; i < flatsize; ++i) { - const int32 input1_val = input1_offset + input1_data[i]; - const int32 input2_val = input2_offset + input2_data[i]; - const int32 shifted_input1_val = input1_val * (1 << left_shift); - const int32 shifted_input2_val = input2_val * (1 << left_shift); - const int32 scaled_input1_val = + const int32_t input1_val = input1_offset + input1_data[i]; + const int32_t input2_val = input2_offset + input2_data[i]; + const int32_t shifted_input1_val = input1_val * (1 << left_shift); + const int32_t shifted_input2_val = input2_val * (1 << left_shift); + const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( shifted_input1_val, input1_multiplier, input1_shift); - const int32 scaled_input2_val = + const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( shifted_input2_val, input2_multiplier, input2_shift); output_data[i] = F(scaled_input1_val, scaled_input2_val); @@ -218,7 +218,7 @@ inline void BroadcastComparison4DSlow(const ComparisonParams& op_params, output_shape, output_data); } -template F> +template F> inline void BroadcastComparison4DSlowWithScaling( const ComparisonParams& op_params, const RuntimeShape& unextended_input1_shape, const T* input1_data, @@ -230,29 +230,29 @@ inline void BroadcastComparison4DSlowWithScaling( unextended_output_shape); int left_shift = op_params.left_shift; - int32 input1_offset = op_params.input1_offset; - int32 input1_multiplier = op_params.input1_multiplier; + int32_t input1_offset = op_params.input1_offset; + int32_t input1_multiplier = op_params.input1_multiplier; int input1_shift = op_params.input1_shift; - int32 input2_offset = op_params.input2_offset; - int32 input2_multiplier = op_params.input2_multiplier; + int32_t input2_offset = op_params.input2_offset; + int32_t input2_multiplier = op_params.input2_multiplier; int input2_shift = op_params.input2_shift; for (int b = 0; b < dims.output_shape.Dims(0); ++b) { for (int y = 0; y < dims.output_shape.Dims(1); ++y) { for (int x = 0; x < dims.output_shape.Dims(2); ++x) { for (int c = 0; c < dims.output_shape.Dims(3); ++c) { - const int32 input1_val = + const int32_t input1_val = input1_offset + input1_data[SubscriptToIndex(dims.desc1, b, y, x, c)]; - const int32 input2_val = + const int32_t input2_val = input2_offset + input2_data[SubscriptToIndex(dims.desc2, b, y, x, c)]; - const int32 shifted_input1_val = input1_val * (1 << left_shift); - const int32 shifted_input2_val = input2_val * (1 << left_shift); - const int32 scaled_input1_val = + const int32_t shifted_input1_val = input1_val * (1 << left_shift); + const int32_t shifted_input2_val = input2_val * (1 << left_shift); + const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( shifted_input1_val, input1_multiplier, input1_shift); - const int32 scaled_input2_val = + const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( shifted_input2_val, input2_multiplier, input2_shift); output_data[Offset(dims.output_shape, b, y, x, c)] = diff --git a/tensorflow/lite/kernels/internal/reference/concatenation.h b/tensorflow/lite/kernels/internal/reference/concatenation.h index 958fe3ea249..25959793e9d 100644 --- a/tensorflow/lite/kernels/internal/reference/concatenation.h +++ b/tensorflow/lite/kernels/internal/reference/concatenation.h @@ -74,14 +74,14 @@ inline void Concatenation(const ConcatenationParams& params, // when optimizng this routine further. inline void ConcatenationWithScaling(const ConcatenationParams& params, const RuntimeShape* const* input_shapes, - const uint8* const* input_data, + const uint8_t* const* input_data, const RuntimeShape& output_shape, - uint8* output_data) { + uint8_t* output_data) { int axis = params.axis; - const int32* input_zeropoint = params.input_zeropoint; + const int32_t* input_zeropoint = params.input_zeropoint; const float* input_scale = params.input_scale; int inputs_count = params.inputs_count; - const int32 output_zeropoint = params.output_zeropoint; + const int32_t output_zeropoint = params.output_zeropoint; const float output_scale = params.output_scale; const int concat_dimensions = output_shape.DimensionsCount(); @@ -110,11 +110,11 @@ inline void ConcatenationWithScaling(const ConcatenationParams& params, } const float inverse_output_scale = 1.f / output_scale; - uint8* output_ptr = output_data; + uint8_t* output_ptr = output_data; for (int k = 0; k < outer_size; k++) { for (int i = 0; i < inputs_count; ++i) { const int copy_size = input_shapes[i]->Dims(axis) * base_inner_size; - const uint8* input_ptr = input_data[i] + k * copy_size; + const uint8_t* input_ptr = input_data[i] + k * copy_size; if (input_zeropoint[i] == output_zeropoint && input_scale[i] == output_scale) { memcpy(output_ptr, input_ptr, copy_size); diff --git a/tensorflow/lite/kernels/internal/reference/conv.h b/tensorflow/lite/kernels/internal/reference/conv.h index 55dd869a4b1..d4bf46a86b8 100644 --- a/tensorflow/lite/kernels/internal/reference/conv.h +++ b/tensorflow/lite/kernels/internal/reference/conv.h @@ -99,11 +99,11 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, } inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, - const uint8* input_data, const RuntimeShape& filter_shape, - const uint8* filter_data, const RuntimeShape& bias_shape, - const int32* bias_data, const RuntimeShape& output_shape, - uint8* output_data, const RuntimeShape& im2col_shape, - uint8* im2col_data, void* cpu_backend_context) { + const uint8_t* input_data, const RuntimeShape& filter_shape, + const uint8_t* filter_data, const RuntimeShape& bias_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, + uint8_t* output_data, const RuntimeShape& im2col_shape, + uint8_t* im2col_data, void* cpu_backend_context) { (void)cpu_backend_context; // only used in optimized code. (void)im2col_data; // only used in optimized code. (void)im2col_shape; // only used in optimized code. @@ -113,13 +113,13 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, const int dilation_height_factor = params.dilation_height_factor; const int pad_width = params.padding_values.width; const int pad_height = params.padding_values.height; - const int32 input_offset = params.input_offset; - const int32 filter_offset = params.weights_offset; - const int32 output_offset = params.output_offset; - const int32 output_multiplier = params.output_multiplier; + const int32_t input_offset = params.input_offset; + const int32_t filter_offset = params.weights_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_multiplier = params.output_multiplier; const int output_shift = params.output_shift; - const int32 output_activation_min = params.quantized_activation_min; - const int32 output_activation_max = params.quantized_activation_max; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; TFLITE_DCHECK_LE(output_activation_min, output_activation_max); TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); @@ -143,7 +143,7 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, for (int out_channel = 0; out_channel < output_depth; ++out_channel) { const int in_x_origin = (out_x * stride_width) - pad_width; const int in_y_origin = (out_y * stride_height) - pad_height; - int32 acc = 0; + int32_t acc = 0; for (int filter_y = 0; filter_y < filter_height; ++filter_y) { for (int filter_x = 0; filter_x < filter_width; ++filter_x) { for (int in_channel = 0; in_channel < input_depth; ++in_channel) { @@ -154,9 +154,9 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, // use zero as a default value. if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height)) { - int32 input_val = input_data[Offset(input_shape, batch, in_y, - in_x, in_channel)]; - int32 filter_val = + int32_t input_val = input_data[Offset( + input_shape, batch, in_y, in_x, in_channel)]; + int32_t filter_val = filter_data[Offset(filter_shape, out_channel, filter_y, filter_x, in_channel)]; acc += @@ -174,7 +174,7 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, acc = std::max(acc, output_activation_min); acc = std::min(acc, output_activation_max); output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = - static_cast(acc); + static_cast(acc); } } } @@ -220,7 +220,7 @@ inline void HybridConvPerChannel( for (int out_channel = 0; out_channel < output_depth; ++out_channel) { const int in_x_origin = (out_x * stride_width) - pad_width; const int in_y_origin = (out_y * stride_height) - pad_height; - int32 acc = 0; + int32_t acc = 0; for (int filter_y = 0; filter_y < filter_height; ++filter_y) { for (int filter_x = 0; filter_x < filter_width; ++filter_x) { for (int in_channel = 0; in_channel < input_depth; ++in_channel) { @@ -231,9 +231,9 @@ inline void HybridConvPerChannel( // use zero as a default value. if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height)) { - int32 input_val = input_data[Offset(input_shape, batch, in_y, - in_x, in_channel)]; - int32 filter_val = + int32_t input_val = input_data[Offset( + input_shape, batch, in_y, in_x, in_channel)]; + int32_t filter_val = filter_data[Offset(filter_shape, out_channel, filter_y, filter_x, in_channel)]; acc += filter_val * (input_val - input_offset[batch]); diff --git a/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h b/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h index 70e5dd4012f..20bf83df3d8 100644 --- a/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h +++ b/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h @@ -62,21 +62,21 @@ namespace reference_ops { namespace depthwise_conv { template -inline int32 DepthwiseConvRound(int32 x, int32 quantized_multiplier, - int shift) { +inline int32_t DepthwiseConvRound(int32_t x, int32_t quantized_multiplier, + int shift) { TFLITE_DCHECK_NE(output_rounding, DepthwiseConvOutputRounding::kNone); return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift); } template <> -inline int32 DepthwiseConvRound( - int32 x, int32 quantized_multiplier, int shift) { +inline int32_t DepthwiseConvRound( + int32_t x, int32_t quantized_multiplier, int shift) { return MultiplyByQuantizedMultiplier(x, quantized_multiplier, shift); } template <> -inline int32 DepthwiseConvRound( - int32 x, int32 quantized_multiplier, int shift) { +inline int32_t DepthwiseConvRound( + int32_t x, int32_t quantized_multiplier, int shift) { using gemmlowp::SaturatingRoundingDoublingHighMul; const int left_shift = shift > 0 ? shift : 0; const int right_shift = shift > 0 ? 0 : -shift; @@ -89,13 +89,12 @@ inline int32 DepthwiseConvRound( template struct DepthwiseConvBasicKernel { - static inline void Run(const DepthwiseParams& params, - const RuntimeShape& input_shape, - const uint8* input_data, - const RuntimeShape& filter_shape, - const uint8* filter_data, - const RuntimeShape& bias_shape, const int32* bias_data, - const RuntimeShape& output_shape, uint8* output_data) { + static inline void Run( + const DepthwiseParams& params, const RuntimeShape& input_shape, + const uint8_t* input_data, const RuntimeShape& filter_shape, + const uint8_t* filter_data, const RuntimeShape& bias_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, + uint8_t* output_data) { const int stride_width = params.stride_width; const int stride_height = params.stride_height; const int dilation_width_factor = params.dilation_width_factor; @@ -103,12 +102,12 @@ struct DepthwiseConvBasicKernel { const int pad_width = params.padding_values.width; const int pad_height = params.padding_values.height; const int depth_multiplier = params.depth_multiplier; - const int32 output_activation_min = params.quantized_activation_min; - const int32 output_activation_max = params.quantized_activation_max; - const int32 input_offset = params.input_offset; - const int32 filter_offset = params.weights_offset; - const int32 output_offset = params.output_offset; - const int32 output_multiplier = params.output_multiplier; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + const int32_t input_offset = params.input_offset; + const int32_t filter_offset = params.weights_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_multiplier = params.output_multiplier; const int output_shift = params.output_shift; TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); @@ -135,7 +134,7 @@ struct DepthwiseConvBasicKernel { const int oc = m + ic * depth_multiplier; const int in_x_origin = (out_x * stride_width) - pad_width; const int in_y_origin = (out_y * stride_height) - pad_height; - int32 acc = 0; + int32_t acc = 0; for (int filter_y = 0; filter_y < filter_height; ++filter_y) { for (int filter_x = 0; filter_x < filter_width; ++filter_x) { const int in_x = @@ -146,9 +145,9 @@ struct DepthwiseConvBasicKernel { // use zero as a default value. if ((in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height)) { - int32 input_val = + int32_t input_val = input_data[Offset(input_shape, b, in_y, in_x, ic)]; - int32 filter_val = filter_data[Offset( + int32_t filter_val = filter_data[Offset( filter_shape, 0, filter_y, filter_x, oc)]; acc += (filter_val + filter_offset) * (input_val + input_offset); @@ -164,7 +163,7 @@ struct DepthwiseConvBasicKernel { acc = std::max(acc, output_activation_min); acc = std::min(acc, output_activation_max); output_data[Offset(output_shape, b, out_y, out_x, oc)] = - static_cast(acc); + static_cast(acc); } } } @@ -176,10 +175,10 @@ struct DepthwiseConvBasicKernel { // MultiplyByQuantizedMultiplier or DepthwiseConvRound function. static inline void RunPerChannel( const DepthwiseParams& params, const RuntimeShape& input_shape, - const int8* input_data, const RuntimeShape& filter_shape, - const int8* filter_data, const RuntimeShape& bias_shape, - const int32* bias_data, const RuntimeShape& output_shape, - int8* output_data) { + const int8_t* input_data, const RuntimeShape& filter_shape, + const int8_t* filter_data, const RuntimeShape& bias_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, + int8_t* output_data) { // Get parameters. // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro. const int stride_width = params.stride_width; @@ -189,12 +188,12 @@ struct DepthwiseConvBasicKernel { const int pad_width = params.padding_values.width; const int pad_height = params.padding_values.height; const int depth_multiplier = params.depth_multiplier; - const int32 input_offset = params.input_offset; - const int32 output_offset = params.output_offset; - const int32 output_activation_min = params.quantized_activation_min; - const int32 output_activation_max = params.quantized_activation_max; - const int32* output_multiplier = params.output_multiplier_per_channel; - const int32* output_shift = params.output_shift_per_channel; + const int32_t input_offset = params.input_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + const int32_t* output_multiplier = params.output_multiplier_per_channel; + const int32_t* output_shift = params.output_shift_per_channel; // Check dimensions of the tensors. TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); @@ -222,7 +221,7 @@ struct DepthwiseConvBasicKernel { const int output_channel = m + in_channel * depth_multiplier; const int in_x_origin = (out_x * stride_width) - pad_width; const int in_y_origin = (out_y * stride_height) - pad_height; - int32 acc = 0; + int32_t acc = 0; for (int filter_y = 0; filter_y < filter_height; ++filter_y) { for (int filter_x = 0; filter_x < filter_width; ++filter_x) { const int in_x = @@ -234,17 +233,18 @@ struct DepthwiseConvBasicKernel { (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height); if (is_point_inside_image) { - int32 input_val = input_data[Offset( + int32_t input_val = input_data[Offset( input_shape, batch, in_y, in_x, in_channel)]; - int32 filter_val = filter_data[Offset( + int32_t filter_val = filter_data[Offset( filter_shape, 0, filter_y, filter_x, output_channel)]; // Accumulate with 32 bits accumulator. // In the nudging process during model quantization, we // force real value of 0.0 be represented by a quantized - // value. This guarantees that the input_offset is a int8, - // even though it is represented using int32. int32 += int8 - // * (int8 - int8) so the highest value we can get from each - // accumulation is [-127, 127] * ([-128, 127] - + // value. This guarantees that the input_offset is a int8_t, + // even though it is represented using int32_t. int32_t += + // int8_t + // * (int8_t - int8_t) so the highest value we can get from + // each accumulation is [-127, 127] * ([-128, 127] - // [-128, 127]), which is [-32512, 32512]. log2(32512) // = 14.98, which means we can accumulate at least 2^16 // multiplications without overflow. The accumulator is @@ -279,10 +279,10 @@ struct DepthwiseConvBasicKernel { inline void DepthwiseConv( const DepthwiseParams& params, const RuntimeShape& input_shape, - const uint8* input_data, const RuntimeShape& filter_shape, - const uint8* filter_data, const RuntimeShape& bias_shape, - const int32* bias_data, const RuntimeShape& output_shape, - uint8* output_data) { + const uint8_t* input_data, const RuntimeShape& filter_shape, + const uint8_t* filter_data, const RuntimeShape& bias_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, + uint8_t* output_data) { return depthwise_conv::DepthwiseConvBasicKernel< DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape, input_data, filter_shape, diff --git a/tensorflow/lite/kernels/internal/reference/dequantize.h b/tensorflow/lite/kernels/internal/reference/dequantize.h index 286c9310799..b90951f96e8 100644 --- a/tensorflow/lite/kernels/internal/reference/dequantize.h +++ b/tensorflow/lite/kernels/internal/reference/dequantize.h @@ -32,12 +32,12 @@ inline void Dequantize(const tflite::DequantizationParams& op_params, const RuntimeShape& input_shape, const InputT* input_data, const RuntimeShape& output_shape, OutputT* output_data) { - int32 zero_point = op_params.zero_point; + int32_t zero_point = op_params.zero_point; const double scale = op_params.scale; const int flat_size = MatchingFlatSize(input_shape, output_shape); for (int i = 0; i < flat_size; i++) { - const int32 val = input_data[i]; + const int32_t val = input_data[i]; const OutputT result = static_cast(scale * (val - zero_point)); output_data[i] = result; } @@ -52,11 +52,11 @@ inline void PerChannelDequantize( // Ensure flat size is same. MatchingFlatSize(input_shape, output_shape); - const int32* zero_point = op_params.zero_point; + const int32_t* zero_point = op_params.zero_point; const float* scale = op_params.scale; - const int32 quantized_dimension = op_params.quantized_dimension; - const int32 num_dims = input_shape.DimensionsCount(); - const int32* dims_data = input_shape.DimsData(); + const int32_t quantized_dimension = op_params.quantized_dimension; + const int32_t num_dims = input_shape.DimensionsCount(); + const int32_t* dims_data = input_shape.DimsData(); std::vector current_dim(num_dims, 0); do { @@ -64,7 +64,7 @@ inline void PerChannelDequantize( ReducedOutputOffset(num_dims, reinterpret_cast(dims_data), current_dim.data(), 0, nullptr); const int channel = current_dim[quantized_dimension]; - const int32 val = input_data[offset]; + const int32_t val = input_data[offset]; const float result = static_cast(scale[channel] * (val - zero_point[channel])); output_data[offset] = result; diff --git a/tensorflow/lite/kernels/internal/reference/fully_connected.h b/tensorflow/lite/kernels/internal/reference/fully_connected.h index 204a0fa0afa..39a9cd023d8 100644 --- a/tensorflow/lite/kernels/internal/reference/fully_connected.h +++ b/tensorflow/lite/kernels/internal/reference/fully_connected.h @@ -61,17 +61,17 @@ inline void FullyConnected( inline void FullyConnected( const FullyConnectedParams& params, const RuntimeShape& input_shape, - const uint8* input_data, const RuntimeShape& filter_shape, - const uint8* filter_data, const RuntimeShape& bias_shape, - const int32* bias_data, const RuntimeShape& output_shape, - uint8* output_data) { - const int32 input_offset = params.input_offset; - const int32 filter_offset = params.weights_offset; - const int32 output_offset = params.output_offset; - const int32 output_multiplier = params.output_multiplier; + const uint8_t* input_data, const RuntimeShape& filter_shape, + const uint8_t* filter_data, const RuntimeShape& bias_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, + uint8_t* output_data) { + const int32_t input_offset = params.input_offset; + const int32_t filter_offset = params.weights_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_multiplier = params.output_multiplier; const int output_shift = params.output_shift; - const int32 output_activation_min = params.quantized_activation_min; - const int32 output_activation_max = params.quantized_activation_max; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1); @@ -89,10 +89,10 @@ inline void FullyConnected( const int accum_depth = filter_shape.Dims(filter_dim_count - 1); for (int b = 0; b < batches; ++b) { for (int out_c = 0; out_c < output_depth; ++out_c) { - int32 acc = 0; + int32_t acc = 0; for (int d = 0; d < accum_depth; ++d) { - int32 input_val = input_data[b * accum_depth + d]; - int32 filter_val = filter_data[out_c * accum_depth + d]; + int32_t input_val = input_data[b * accum_depth + d]; + int32_t filter_val = filter_data[out_c * accum_depth + d]; acc += (filter_val + filter_offset) * (input_val + input_offset); } if (bias_data) { @@ -102,24 +102,24 @@ inline void FullyConnected( acc += output_offset; acc = std::max(acc, output_activation_min); acc = std::min(acc, output_activation_max); - output_data[out_c + output_depth * b] = static_cast(acc); + output_data[out_c + output_depth * b] = static_cast(acc); } } } inline void FullyConnected( const FullyConnectedParams& params, const RuntimeShape& input_shape, - const uint8* input_data, const RuntimeShape& filter_shape, - const uint8* filter_data, const RuntimeShape& bias_shape, - const int32* bias_data, const RuntimeShape& output_shape, - int16* output_data) { - const int32 input_offset = params.input_offset; - const int32 filter_offset = params.weights_offset; - const int32 output_offset = params.output_offset; - const int32 output_multiplier = params.output_multiplier; + const uint8_t* input_data, const RuntimeShape& filter_shape, + const uint8_t* filter_data, const RuntimeShape& bias_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, + int16_t* output_data) { + const int32_t input_offset = params.input_offset; + const int32_t filter_offset = params.weights_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_multiplier = params.output_multiplier; const int output_shift = params.output_shift; - const int32 output_activation_min = params.quantized_activation_min; - const int32 output_activation_max = params.quantized_activation_max; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; TFLITE_DCHECK_LE(output_activation_min, output_activation_max); TFLITE_DCHECK_EQ(output_offset, 0); @@ -138,20 +138,21 @@ inline void FullyConnected( for (int out_c = 0; out_c < output_depth; ++out_c) { // Internal accumulation. // Initialize accumulator with the bias-value. - int32 accum = bias_data[out_c]; + int32_t accum = bias_data[out_c]; // Accumulation loop. for (int d = 0; d < accum_depth; ++d) { - int16 input_val = input_data[b * accum_depth + d] + input_offset; - int16 filter_val = filter_data[out_c * accum_depth + d] + filter_offset; + int16_t input_val = input_data[b * accum_depth + d] + input_offset; + int16_t filter_val = + filter_data[out_c * accum_depth + d] + filter_offset; accum += filter_val * input_val; } - // Down-scale the final int32 accumulator to the scale used by our + // Down-scale the final int32_t accumulator to the scale used by our // (16-bit, typically 3 integer bits) fixed-point format. The quantized // multiplier and shift here have been pre-computed offline // (e.g. by toco). accum = MultiplyByQuantizedMultiplier(accum, output_multiplier, output_shift); - // Saturate, cast to int16, and store to output array. + // Saturate, cast to int16_t, and store to output array. accum = std::max(accum, output_activation_min - output_offset); accum = std::min(accum, output_activation_max - output_offset); accum += output_offset; @@ -162,14 +163,14 @@ inline void FullyConnected( inline void ShuffledFullyConnected( const FullyConnectedParams& params, const RuntimeShape& input_shape, - const uint8* input_data, const RuntimeShape& weights_shape, - const uint8* shuffled_weights_data, const RuntimeShape& bias_shape, - const int32* bias_data, const RuntimeShape& output_shape, - int16* output_data, uint8* shuffled_input_workspace_data) { - const int32 output_multiplier = params.output_multiplier; + const uint8_t* input_data, const RuntimeShape& weights_shape, + const uint8_t* shuffled_weights_data, const RuntimeShape& bias_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, + int16_t* output_data, uint8_t* shuffled_input_workspace_data) { + const int32_t output_multiplier = params.output_multiplier; const int output_shift = params.output_shift; - const int32 output_activation_min = params.quantized_activation_min; - const int32 output_activation_max = params.quantized_activation_max; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; TFLITE_DCHECK_LE(output_activation_min, output_activation_max); TFLITE_DCHECK_GE(input_shape.DimensionsCount(), 1); @@ -190,7 +191,7 @@ inline void ShuffledFullyConnected( TFLITE_DCHECK((output_depth % 4) == 0); // Shuffling and xoring of input activations into the workspace buffer - uint8* shuffled_input_workspace_ptr = shuffled_input_workspace_data; + uint8_t* shuffled_input_workspace_ptr = shuffled_input_workspace_data; if (batches == 1) { for (int i = 0; i < accum_depth; i++) { shuffled_input_workspace_data[i] = input_data[i] ^ 0x80; @@ -198,13 +199,13 @@ inline void ShuffledFullyConnected( } else if (batches == 4) { for (int c = 0; c < accum_depth; c += 16) { for (int b = 0; b < 4; b++) { - const uint8* src_data_ptr = input_data + b * accum_depth + c; + const uint8_t* src_data_ptr = input_data + b * accum_depth + c; for (int j = 0; j < 16; j++) { - uint8 src_val = *src_data_ptr++; + uint8_t src_val = *src_data_ptr++; // Flip the sign bit, so that the kernel will only need to - // reinterpret these uint8 values as int8, getting for free the + // reinterpret these uint8_t values as int8_t, getting for free the // subtraction of the zero_point value 128. - uint8 dst_val = src_val ^ 0x80; + uint8_t dst_val = src_val ^ 0x80; *shuffled_input_workspace_ptr++ = dst_val; } } @@ -216,62 +217,62 @@ inline void ShuffledFullyConnected( // Actual computation if (batches == 1) { - int16* output_ptr = output_data; + int16_t* output_ptr = output_data; // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd) - // so that just reinterpreting them as int8 values is equivalent to + // so that just reinterpreting them as int8_t values is equivalent to // subtracting 128 from them, thus implementing for free the subtraction of // the zero_point value 128. - const int8* shuffled_weights_ptr = - reinterpret_cast(shuffled_weights_data); + const int8_t* shuffled_weights_ptr = + reinterpret_cast(shuffled_weights_data); // Likewise, we preshuffled and pre-xored the input data above. - const int8* shuffled_input_data = - reinterpret_cast(shuffled_input_workspace_data); + const int8_t* shuffled_input_data = + reinterpret_cast(shuffled_input_workspace_data); for (int c = 0; c < output_depth; c += 4) { // Internal accumulation. // Initialize accumulator with the bias-value. - int32 accum[4] = {0}; + int32_t accum[4] = {0}; // Accumulation loop. for (int d = 0; d < accum_depth; d += 16) { for (int i = 0; i < 4; i++) { for (int j = 0; j < 16; j++) { - int8 input_val = shuffled_input_data[d + j]; - int8 weights_val = *shuffled_weights_ptr++; + int8_t input_val = shuffled_input_data[d + j]; + int8_t weights_val = *shuffled_weights_ptr++; accum[i] += weights_val * input_val; } } } for (int i = 0; i < 4; i++) { // Add bias value - int32 acc = accum[i] + bias_data[c + i]; - // Down-scale the final int32 accumulator to the scale used by our + int32_t acc = accum[i] + bias_data[c + i]; + // Down-scale the final int32_t accumulator to the scale used by our // (16-bit, typically 3 integer bits) fixed-point format. The quantized // multiplier and shift here have been pre-computed offline // (e.g. by toco). acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); - // Saturate, cast to int16, and store to output array. + // Saturate, cast to int16_t, and store to output array. acc = std::max(acc, output_activation_min); acc = std::min(acc, output_activation_max); output_ptr[c + i] = acc; } } } else if (batches == 4) { - int16* output_ptr = output_data; + int16_t* output_ptr = output_data; // Shuffled weights have had their sign bit (0x80) pre-flipped (xor'd) - // so that just reinterpreting them as int8 values is equivalent to + // so that just reinterpreting them as int8_t values is equivalent to // subtracting 128 from them, thus implementing for free the subtraction of // the zero_point value 128. - const int8* shuffled_weights_ptr = - reinterpret_cast(shuffled_weights_data); + const int8_t* shuffled_weights_ptr = + reinterpret_cast(shuffled_weights_data); // Likewise, we preshuffled and pre-xored the input data above. - const int8* shuffled_input_data = - reinterpret_cast(shuffled_input_workspace_data); + const int8_t* shuffled_input_data = + reinterpret_cast(shuffled_input_workspace_data); for (int c = 0; c < output_depth; c += 4) { - const int8* shuffled_input_ptr = shuffled_input_data; + const int8_t* shuffled_input_ptr = shuffled_input_data; // Accumulation loop. // Internal accumulation. // Initialize accumulator with the bias-value. - int32 accum[4][4]; + int32_t accum[4][4]; for (int i = 0; i < 4; i++) { for (int b = 0; b < 4; b++) { accum[i][b] = 0; @@ -281,8 +282,8 @@ inline void ShuffledFullyConnected( for (int i = 0; i < 4; i++) { for (int b = 0; b < 4; b++) { for (int j = 0; j < 16; j++) { - int8 input_val = shuffled_input_ptr[16 * b + j]; - int8 weights_val = shuffled_weights_ptr[16 * i + j]; + int8_t input_val = shuffled_input_ptr[16 * b + j]; + int8_t weights_val = shuffled_weights_ptr[16 * i + j]; accum[i][b] += weights_val * input_val; } } @@ -293,14 +294,14 @@ inline void ShuffledFullyConnected( for (int i = 0; i < 4; i++) { for (int b = 0; b < 4; b++) { // Add bias value - int32 acc = accum[i][b] + bias_data[c + i]; - // Down-scale the final int32 accumulator to the scale used by our + int32_t acc = accum[i][b] + bias_data[c + i]; + // Down-scale the final int32_t accumulator to the scale used by our // (16-bit, typically 3 integer bits) fixed-point format. The // quantized multiplier and shift here have been pre-computed offline // (e.g. by toco). acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); - // Saturate, cast to int16, and store to output array. + // Saturate, cast to int16_t, and store to output array. acc = std::max(acc, output_activation_min); acc = std::min(acc, output_activation_max); output_ptr[b * output_depth + c + i] = acc; diff --git a/tensorflow/lite/kernels/internal/reference/hard_swish.h b/tensorflow/lite/kernels/internal/reference/hard_swish.h index dd07b09c3b3..cda1b5cf0ad 100644 --- a/tensorflow/lite/kernels/internal/reference/hard_swish.h +++ b/tensorflow/lite/kernels/internal/reference/hard_swish.h @@ -86,7 +86,7 @@ inline void HardSwish(const HardSwishParams& params, // (reluish_multiplier_fixedpoint) and bit-shift such that we represent // that input value on the scale where the real value 3.0f is represented // by the quantized value 32768. (+32768 is actually not representable as - // int16, so this saturates at +32767, and that is seen empirically to be + // int16_t, so this saturates at +32767, and that is seen empirically to be // a negligible contribution to numerical error/bias). // // This code is careful to correctly implement any magnitude of multiplier, diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/add.h b/tensorflow/lite/kernels/internal/reference/integer_ops/add.h index 69b42e08a6d..88ca246eaf4 100644 --- a/tensorflow/lite/kernels/internal/reference/integer_ops/add.h +++ b/tensorflow/lite/kernels/internal/reference/integer_ops/add.h @@ -35,22 +35,22 @@ inline void AddElementwise(int size, const ArithmeticParams& params, TFLITE_DCHECK_LE(params.input2_offset, int8_max_value); for (int i = 0; i < size; ++i) { - const int32 input1_val = params.input1_offset + input1_data[i]; - const int32 input2_val = params.input2_offset + input2_data[i]; - const int32 shifted_input1_val = input1_val * (1 << params.left_shift); - const int32 shifted_input2_val = input2_val * (1 << params.left_shift); - const int32 scaled_input1_val = + const int32_t input1_val = params.input1_offset + input1_data[i]; + const int32_t input2_val = params.input2_offset + input2_data[i]; + const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); + const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); + const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( shifted_input1_val, params.input1_multiplier, params.input1_shift); - const int32 scaled_input2_val = + const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( shifted_input2_val, params.input2_multiplier, params.input2_shift); - const int32 raw_sum = scaled_input1_val + scaled_input2_val; - const int32 raw_output = + const int32_t raw_sum = scaled_input1_val + scaled_input2_val; + const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp( raw_sum, params.output_multiplier, params.output_shift) + params.output_offset; - const int32 clamped_output = + const int32_t clamped_output = std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output)); output_data[i] = static_cast(clamped_output); diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h b/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h index df6b787338d..f4bcb2bd06e 100644 --- a/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h +++ b/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h @@ -22,25 +22,25 @@ namespace reference_integer_ops { // Fixed-point per-channel-quantization convolution reference kernel. inline void ConvPerChannel( - const ConvParams& params, const int32* output_multiplier, - const int32* output_shift, const RuntimeShape& input_shape, - const int8* input_data, const RuntimeShape& filter_shape, - const int8* filter_data, const RuntimeShape& bias_shape, - const int32* bias_data, const RuntimeShape& output_shape, - int8* output_data) { + const ConvParams& params, const int32_t* output_multiplier, + const int32_t* output_shift, const RuntimeShape& input_shape, + const int8_t* input_data, const RuntimeShape& filter_shape, + const int8_t* filter_data, const RuntimeShape& bias_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, + int8_t* output_data) { // Get parameters. - const int32 input_offset = params.input_offset; // r = s(q - Z) + const int32_t input_offset = params.input_offset; // r = s(q - Z) const int stride_width = params.stride_width; const int stride_height = params.stride_height; const int dilation_width_factor = params.dilation_width_factor; const int dilation_height_factor = params.dilation_height_factor; const int pad_width = params.padding_values.width; const int pad_height = params.padding_values.height; - const int32 output_offset = params.output_offset; + const int32_t output_offset = params.output_offset; // Set min and max value of the output. - const int32 output_activation_min = params.quantized_activation_min; - const int32 output_activation_max = params.quantized_activation_max; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; // Consistency check. TFLITE_DCHECK_LE(output_activation_min, output_activation_max); @@ -67,7 +67,7 @@ inline void ConvPerChannel( for (int out_channel = 0; out_channel < output_depth; ++out_channel) { const int in_x_origin = (out_x * stride_width) - pad_width; const int in_y_origin = (out_y * stride_height) - pad_height; - int32 acc = 0; + int32_t acc = 0; for (int filter_y = 0; filter_y < filter_height; ++filter_y) { for (int filter_x = 0; filter_x < filter_width; ++filter_x) { for (int in_channel = 0; in_channel < input_depth; ++in_channel) { @@ -79,18 +79,18 @@ inline void ConvPerChannel( (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height); if (is_point_inside_image) { - int32 input_val = input_data[Offset(input_shape, batch, in_y, - in_x, in_channel)]; - int32 filter_val = + int32_t input_val = input_data[Offset( + input_shape, batch, in_y, in_x, in_channel)]; + int32_t filter_val = filter_data[Offset(filter_shape, out_channel, filter_y, filter_x, in_channel)]; // Accumulate with 32 bits accumulator. // In the nudging process during model quantization, we force // real value of 0.0 be represented by a quantized value. This - // guarantees that the input_offset is a int8, even though it - // is represented using int32. - // int32 += int8 * (int8 - int8) so the highest value we can - // get from each accumulation is [-127, 127] * ([-128, 127] - + // guarantees that the input_offset is a int8_t, even though + // it is represented using int32_t. int32_t += int8_t * + // (int8_t - int8_t) so the highest value we can get from each + // accumulation is [-127, 127] * ([-128, 127] - // [-128, 127]), which is [-32512, 32512]. log2(32512) // = 14.98, which means we can accumulate at least 2^16 // multiplications without overflow. The accumulator is @@ -125,12 +125,12 @@ inline void ConvPerChannel( // Fixed-point per-channel-quantization convolution reference kernel. // 16-bit data and 8-bit filter inline void ConvPerChannel( - const ConvParams& params, const int32* output_multiplier, - const int32* output_shift, const RuntimeShape& input_shape, - const int16* input_data, const RuntimeShape& filter_shape, - const int8* filter_data, const RuntimeShape& bias_shape, + const ConvParams& params, const int32_t* output_multiplier, + const int32_t* output_shift, const RuntimeShape& input_shape, + const int16_t* input_data, const RuntimeShape& filter_shape, + const int8_t* filter_data, const RuntimeShape& bias_shape, const std::int64_t* bias_data, const RuntimeShape& output_shape, - int16* output_data) { + int16_t* output_data) { // Get parameters. const int stride_width = params.stride_width; const int stride_height = params.stride_height; @@ -140,8 +140,8 @@ inline void ConvPerChannel( const int pad_height = params.padding_values.height; // Set min and max value of the output. - const int32 output_activation_min = params.quantized_activation_min; - const int32 output_activation_max = params.quantized_activation_max; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; // Consistency check. TFLITE_DCHECK_LE(output_activation_min, output_activation_max); @@ -180,13 +180,13 @@ inline void ConvPerChannel( (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height); if (is_point_inside_image) { - int32 input_val = input_data[Offset(input_shape, batch, in_y, - in_x, in_channel)]; - int32 filter_val = + int32_t input_val = input_data[Offset( + input_shape, batch, in_y, in_x, in_channel)]; + int32_t filter_val = filter_data[Offset(filter_shape, out_channel, filter_y, filter_x, in_channel)]; // Accumulate with 64 bits accumulator. - // int64 += int8 * int16 so the highest value we can + // int64_t += int8_t * int16_t so the highest value we can // get from each accumulation is [-127, 127] * ([-32768, // 32767] - // [-32768, 32767]), which is [-8322945, 8322945]. diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h b/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h index a4e00981367..6f54e47f344 100644 --- a/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h +++ b/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h @@ -20,12 +20,12 @@ limitations under the License. namespace tflite { namespace reference_integer_ops { inline void DepthwiseConvPerChannel( - const DepthwiseParams& params, const int32* output_multiplier, - const int32* output_shift, const RuntimeShape& input_shape, - const int8* input_data, const RuntimeShape& filter_shape, - const int8* filter_data, const RuntimeShape& bias_shape, - const int32* bias_data, const RuntimeShape& output_shape, - int8* output_data) { + const DepthwiseParams& params, const int32_t* output_multiplier, + const int32_t* output_shift, const RuntimeShape& input_shape, + const int8_t* input_data, const RuntimeShape& filter_shape, + const int8_t* filter_data, const RuntimeShape& bias_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, + int8_t* output_data) { // Get parameters. // TODO(b/141565753): Re-introduce ScopedProfilingLabel on Micro. const int stride_width = params.stride_width; @@ -35,10 +35,10 @@ inline void DepthwiseConvPerChannel( const int pad_width = params.padding_values.width; const int pad_height = params.padding_values.height; const int depth_multiplier = params.depth_multiplier; - const int32 input_offset = params.input_offset; - const int32 output_offset = params.output_offset; - const int32 output_activation_min = params.quantized_activation_min; - const int32 output_activation_max = params.quantized_activation_max; + const int32_t input_offset = params.input_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; // Check dimensions of the tensors. TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); @@ -66,7 +66,7 @@ inline void DepthwiseConvPerChannel( const int output_channel = m + in_channel * depth_multiplier; const int in_x_origin = (out_x * stride_width) - pad_width; const int in_y_origin = (out_y * stride_height) - pad_height; - int32 acc = 0; + int32_t acc = 0; for (int filter_y = 0; filter_y < filter_height; ++filter_y) { for (int filter_x = 0; filter_x < filter_width; ++filter_x) { const int in_x = in_x_origin + dilation_width_factor * filter_x; @@ -77,17 +77,17 @@ inline void DepthwiseConvPerChannel( (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height); if (is_point_inside_image) { - int32 input_val = input_data[Offset(input_shape, batch, in_y, - in_x, in_channel)]; - int32 filter_val = filter_data[Offset( + int32_t input_val = input_data[Offset( + input_shape, batch, in_y, in_x, in_channel)]; + int32_t filter_val = filter_data[Offset( filter_shape, 0, filter_y, filter_x, output_channel)]; // Accumulate with 32 bits accumulator. // In the nudging process during model quantization, we force // real value of 0.0 be represented by a quantized value. This - // guarantees that the input_offset is a int8, even though it - // is represented using int32. - // int32 += int8 * (int8 - int8) so the highest value we can - // get from each accumulation is [-127, 127] * ([-128, 127] - + // guarantees that the input_offset is a int8_t, even though + // it is represented using int32_t. int32_t += int8_t * + // (int8_t - int8_t) so the highest value we can get from each + // accumulation is [-127, 127] * ([-128, 127] - // [-128, 127]), which is [-32512, 32512]. log2(32512) // = 14.98, which means we can accumulate at least 2^16 // multiplications without overflow. The accumulator is @@ -120,12 +120,12 @@ inline void DepthwiseConvPerChannel( } inline void DepthwiseConvPerChannel( - const DepthwiseParams& params, const int32* output_multiplier, - const int32* output_shift, const RuntimeShape& input_shape, - const int16* input_data, const RuntimeShape& filter_shape, - const int8* filter_data, const RuntimeShape& bias_shape, + const DepthwiseParams& params, const int32_t* output_multiplier, + const int32_t* output_shift, const RuntimeShape& input_shape, + const int16_t* input_data, const RuntimeShape& filter_shape, + const int8_t* filter_data, const RuntimeShape& bias_shape, const std::int64_t* bias_data, const RuntimeShape& output_shape, - int16* output_data) { + int16_t* output_data) { // Get parameters. const int stride_width = params.stride_width; const int stride_height = params.stride_height; @@ -134,8 +134,8 @@ inline void DepthwiseConvPerChannel( const int pad_width = params.padding_values.width; const int pad_height = params.padding_values.height; const int depth_multiplier = params.depth_multiplier; - const int32 output_activation_min = params.quantized_activation_min; - const int32 output_activation_max = params.quantized_activation_max; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; // Check dimensions of the tensors. TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); @@ -174,9 +174,9 @@ inline void DepthwiseConvPerChannel( (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height); if (is_point_inside_image) { - int32 input_val = input_data[Offset(input_shape, batch, in_y, - in_x, in_channel)]; - int32 filter_val = filter_data[Offset( + int32_t input_val = input_data[Offset( + input_shape, batch, in_y, in_x, in_channel)]; + int32_t filter_val = filter_data[Offset( filter_shape, 0, filter_y, filter_x, output_channel)]; // Accumulate with 64 bits accumulator. // We assume maximum of 2^16 accumulations as with the 8-bit @@ -190,7 +190,7 @@ inline void DepthwiseConvPerChannel( if (bias_data) { acc += bias_data[output_channel]; } - int32 scaled_acc = MultiplyByQuantizedMultiplier( + int32_t scaled_acc = MultiplyByQuantizedMultiplier( acc, output_multiplier[output_channel], output_shift[output_channel]); scaled_acc = std::max(scaled_acc, output_activation_min); @@ -207,8 +207,8 @@ inline void DepthwiseConvPerChannel( inline void DepthwiseConvHybridPerChannel( const DepthwiseParams& params, float* scaling_factors_ptr, - const RuntimeShape& input_shape, const int8* input_data, - const RuntimeShape& filter_shape, const int8* filter_data, + const RuntimeShape& input_shape, const int8_t* input_data, + const RuntimeShape& filter_shape, const int8_t* filter_data, const RuntimeShape& bias_shape, const float* bias_data, const RuntimeShape& output_shape, float* output_data, const float* per_channel_scale, int32_t* input_offset) { @@ -247,7 +247,7 @@ inline void DepthwiseConvHybridPerChannel( const int output_channel = m + in_channel * depth_multiplier; const int in_x_origin = (out_x * stride_width) - pad_width; const int in_y_origin = (out_y * stride_height) - pad_height; - int32 acc = 0; + int32_t acc = 0; for (int filter_y = 0; filter_y < filter_height; ++filter_y) { for (int filter_x = 0; filter_x < filter_width; ++filter_x) { const int in_x = in_x_origin + dilation_width_factor * filter_x; @@ -258,9 +258,9 @@ inline void DepthwiseConvHybridPerChannel( (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height); if (is_point_inside_image) { - int32 input_val = input_data[Offset(input_shape, batch, in_y, - in_x, in_channel)]; - int32 filter_val = filter_data[Offset( + int32_t input_val = input_data[Offset( + input_shape, batch, in_y, in_x, in_channel)]; + int32_t filter_val = filter_data[Offset( filter_shape, 0, filter_y, filter_x, output_channel)]; acc += filter_val * (input_val - input_offset[batch]); } diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h b/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h index fd9cb0180e1..2bc3e794855 100644 --- a/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h +++ b/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h @@ -24,15 +24,15 @@ inline void FullyConnected( const FullyConnectedParams& params, const RuntimeShape& input_shape, const int8_t* input_data, const RuntimeShape& filter_shape, const int8_t* filter_data, const RuntimeShape& bias_shape, - const int32* bias_data, const RuntimeShape& output_shape, + const int32_t* bias_data, const RuntimeShape& output_shape, int8_t* output_data) { - const int32 input_offset = params.input_offset; - const int32 filter_offset = params.weights_offset; - const int32 output_offset = params.output_offset; - const int32 output_multiplier = params.output_multiplier; + const int32_t input_offset = params.input_offset; + const int32_t filter_offset = params.weights_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_multiplier = params.output_multiplier; const int output_shift = params.output_shift; - const int32 output_activation_min = params.quantized_activation_min; - const int32 output_activation_max = params.quantized_activation_max; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2); @@ -44,10 +44,10 @@ inline void FullyConnected( const int accum_depth = filter_shape.Dims(filter_dim_count - 1); for (int b = 0; b < batches; ++b) { for (int out_c = 0; out_c < output_depth; ++out_c) { - int32 acc = 0; + int32_t acc = 0; for (int d = 0; d < accum_depth; ++d) { - int32 input_val = input_data[b * accum_depth + d]; - int32 filter_val = filter_data[out_c * accum_depth + d]; + int32_t input_val = input_data[b * accum_depth + d]; + int32_t filter_val = filter_data[out_c * accum_depth + d]; acc += (filter_val + filter_offset) * (input_val + input_offset); } if (bias_data) { @@ -68,11 +68,11 @@ inline void FullyConnected( const int8_t* filter_data, const RuntimeShape& bias_shape, const int64_t* bias_data, const RuntimeShape& output_shape, int16_t* output_data) { - const int32 filter_offset = params.weights_offset; - const int32 output_multiplier = params.output_multiplier; + const int32_t filter_offset = params.weights_offset; + const int32_t output_multiplier = params.output_multiplier; const int output_shift = params.output_shift; - const int32 output_activation_min = params.quantized_activation_min; - const int32 output_activation_max = params.quantized_activation_max; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2); @@ -86,8 +86,8 @@ inline void FullyConnected( for (int out_c = 0; out_c < output_depth; ++out_c) { int64_t acc = 0; for (int d = 0; d < accum_depth; ++d) { - int32 input_val = input_data[b * accum_depth + d]; - int32 filter_val = filter_data[out_c * accum_depth + d]; + int32_t input_val = input_data[b * accum_depth + d]; + int32_t filter_val = filter_data[out_c * accum_depth + d]; acc += (filter_val + filter_offset) * input_val; } if (bias_data) { diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h b/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h index 7488a2147c4..31f2de986c8 100644 --- a/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h +++ b/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h @@ -21,8 +21,8 @@ namespace tflite { namespace reference_integer_ops { inline void L2Normalization(int32_t input_zero_point, int32_t outer_size, - int32_t depth, const int8* input_data, - int8* output_data) { + int32_t depth, const int8_t* input_data, + int8_t* output_data) { static constexpr int8_t kMinInt8 = std::numeric_limits::min(); static constexpr int8_t kMaxInt8 = std::numeric_limits::max(); // The output scale must be in sync with Prepare(). @@ -30,7 +30,7 @@ inline void L2Normalization(int32_t input_zero_point, int32_t outer_size, // to [-1, 127/128]. static constexpr int32_t kOutputScale = 7; for (int outer_index = 0; outer_index < outer_size; ++outer_index) { - // int32 = (int8 - int8) ^ 2. + // int32_t = (int8_t - int8_t) ^ 2. // ([-128, 127] - [-128, 127]) ^ 2 = [0, (2^8 - 1)^2] so the accumulator is // safe from overflowing in at least 2^16 steps. int32_t acc = 0; @@ -55,7 +55,7 @@ inline void L2Normalization(int32_t input_zero_point, int32_t outer_size, std::min(static_cast(kMaxInt8), std::max(static_cast(kMinInt8), output_in_q24)); output_data[depth * outer_index + inner_index] = - static_cast(output_in_q24); + static_cast(output_in_q24); } } } diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h b/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h index a815c3f5252..b80838aa089 100644 --- a/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h +++ b/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h @@ -27,14 +27,14 @@ inline void MulElementwise(int size, const ArithmeticParams& params, const T* input1_data, const T* input2_data, T* output_data) { for (int i = 0; i < size; ++i) { - const int32 input1_val = params.input1_offset + input1_data[i]; - const int32 input2_val = params.input2_offset + input2_data[i]; - const int32 unclamped_result = + const int32_t input1_val = params.input1_offset + input1_data[i]; + const int32_t input2_val = params.input2_offset + input2_data[i]; + const int32_t unclamped_result = params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val, params.output_multiplier, params.output_shift); - const int32 clamped_output = + const int32_t clamped_output = std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result)); output_data[i] = static_cast(clamped_output); @@ -57,13 +57,13 @@ inline void Mul(const ArithmeticParams& params, // Mul with 16 bit inputs and int8_t outputs. inline void Mul(const ArithmeticParams& params, - const RuntimeShape& input1_shape, const int16* input1_data, - const RuntimeShape& input2_shape, const int16* input2_data, + const RuntimeShape& input1_shape, const int16_t* input1_data, + const RuntimeShape& input2_shape, const int16_t* input2_data, const RuntimeShape& output_shape, int8_t* output_data) { ruy::profiler::ScopeLabel label("Mul/Int16Int8"); - int32 output_offset = params.output_offset; - int32 output_activation_min = params.quantized_activation_min; - int32 output_activation_max = params.quantized_activation_max; + int32_t output_offset = params.output_offset; + int32_t output_activation_min = params.quantized_activation_min; + int32_t output_activation_max = params.quantized_activation_max; TFLITE_DCHECK_LE(output_activation_min, output_activation_max); const int flat_size = @@ -75,12 +75,12 @@ inline void Mul(const ArithmeticParams& params, F0 unclamped_result = F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]); - int16 rescaled_result = + int16_t rescaled_result = gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8); - int16 clamped_result = - std::min(output_activation_max - output_offset, rescaled_result); - clamped_result = - std::max(output_activation_min - output_offset, clamped_result); + int16_t clamped_result = std::min( + output_activation_max - output_offset, rescaled_result); + clamped_result = std::max(output_activation_min - output_offset, + clamped_result); output_data[i] = output_offset + clamped_result; } } @@ -104,18 +104,18 @@ inline void BroadcastMul4DSlow( for (int y = 0; y < extended_output_shape.Dims(1); ++y) { for (int x = 0; x < extended_output_shape.Dims(2); ++x) { for (int c = 0; c < extended_output_shape.Dims(3); ++c) { - const int32 input1_val = + const int32_t input1_val = params.input1_offset + input1_data[SubscriptToIndex(desc1, b, y, x, c)]; - const int32 input2_val = + const int32_t input2_val = params.input2_offset + input2_data[SubscriptToIndex(desc2, b, y, x, c)]; - const int32 unclamped_result = + const int32_t unclamped_result = params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val, params.output_multiplier, params.output_shift); - const int32 clamped_output = std::min( + const int32_t clamped_output = std::min( params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result)); output_data[Offset(extended_output_shape, b, y, x, c)] = diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h b/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h index 6b49d2b150b..17944bc47dd 100644 --- a/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h +++ b/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h @@ -22,8 +22,9 @@ namespace tflite { namespace reference_integer_ops { inline void AveragePool(const PoolParams& params, - const RuntimeShape& input_shape, const int8* input_data, - const RuntimeShape& output_shape, int8* output_data) { + const RuntimeShape& input_shape, + const int8_t* input_data, + const RuntimeShape& output_shape, int8_t* output_data) { TFLITE_DCHECK_LE(params.quantized_activation_min, params.quantized_activation_max); TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); @@ -52,7 +53,7 @@ inline void AveragePool(const PoolParams& params, const int filter_y_start = std::max(0, -in_y_origin); const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin); - int32 acc = 0; + int32_t acc = 0; int filter_count = 0; for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y) { @@ -71,7 +72,7 @@ inline void AveragePool(const PoolParams& params, acc = std::max(acc, params.quantized_activation_min); acc = std::min(acc, params.quantized_activation_max); output_data[Offset(output_shape, batch, out_y, out_x, channel)] = - static_cast(acc); + static_cast(acc); } } } @@ -79,8 +80,8 @@ inline void AveragePool(const PoolParams& params, } inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape, - const int8* input_data, const RuntimeShape& output_shape, - int8* output_data) { + const int8_t* input_data, const RuntimeShape& output_shape, + int8_t* output_data) { TFLITE_DCHECK_LE(params.quantized_activation_min, params.quantized_activation_max); TFLITE_DCHECK_GE(params.quantized_activation_min, @@ -137,8 +138,9 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape, inline void AveragePool(const PoolParams& params, const RuntimeShape& input_shape, - const int16* input_data, - const RuntimeShape& output_shape, int16* output_data) { + const int16_t* input_data, + const RuntimeShape& output_shape, + int16_t* output_data) { TFLITE_DCHECK_LE(params.quantized_activation_min, params.quantized_activation_max); TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); @@ -167,7 +169,7 @@ inline void AveragePool(const PoolParams& params, const int filter_y_start = std::max(0, -in_y_origin); const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin); - int32 acc = 0; + int32_t acc = 0; int filter_count = 0; for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y) { @@ -186,7 +188,7 @@ inline void AveragePool(const PoolParams& params, acc = std::max(acc, params.quantized_activation_min); acc = std::min(acc, params.quantized_activation_max); output_data[Offset(output_shape, batch, out_y, out_x, channel)] = - static_cast(acc); + static_cast(acc); } } } @@ -194,8 +196,8 @@ inline void AveragePool(const PoolParams& params, } inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape, - const int16* input_data, const RuntimeShape& output_shape, - int16* output_data) { + const int16_t* input_data, const RuntimeShape& output_shape, + int16_t* output_data) { TFLITE_DCHECK_LE(params.quantized_activation_min, params.quantized_activation_max); TFLITE_DCHECK_GE(params.quantized_activation_min, diff --git a/tensorflow/lite/kernels/internal/reference/l2normalization.h b/tensorflow/lite/kernels/internal/reference/l2normalization.h index 00697c2e548..7587d2b5c2e 100644 --- a/tensorflow/lite/kernels/internal/reference/l2normalization.h +++ b/tensorflow/lite/kernels/internal/reference/l2normalization.h @@ -52,40 +52,39 @@ inline void L2Normalization(const tflite::L2NormalizationParams& op_params, inline void L2Normalization(const tflite::L2NormalizationParams& op_params, const RuntimeShape& input_shape, - const uint8* input_data, + const uint8_t* input_data, const RuntimeShape& output_shape, - uint8* output_data) { + uint8_t* output_data) { const int trailing_dim = input_shape.DimensionsCount() - 1; const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); - const int32 input_zero_point = op_params.input_zero_point; + const int32_t input_zero_point = op_params.input_zero_point; for (int i = 0; i < outer_size; ++i) { - int32 square_l2_norm = 0; + int32_t square_l2_norm = 0; for (int c = 0; c < depth; c++) { - int32 diff = input_data[depth * i + c] - input_zero_point; + int32_t diff = input_data[depth * i + c] - input_zero_point; square_l2_norm += diff * diff; } - int32 inv_l2norm_multiplier; + int32_t inv_l2norm_multiplier; int inv_l2norm_shift; GetInvSqrtQuantizedMultiplierExp(square_l2_norm, kReverseShift, &inv_l2norm_multiplier, &inv_l2norm_shift); for (int c = 0; c < depth; c++) { - int32 diff = input_data[depth * i + c] - input_zero_point; - int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp( + int32_t diff = input_data[depth * i + c] - input_zero_point; + int32_t rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp( 128 * diff, inv_l2norm_multiplier, inv_l2norm_shift); - int32 unclamped_output_val = 128 + rescaled_diff; - int32 output_val = - std::min(static_cast(255), - std::max(static_cast(0), unclamped_output_val)); - output_data[depth * i + c] = static_cast(output_val); + int32_t unclamped_output_val = 128 + rescaled_diff; + int32_t output_val = + std::min(static_cast(255), + std::max(static_cast(0), unclamped_output_val)); + output_data[depth * i + c] = static_cast(output_val); } } } - } // namespace reference_ops } // namespace tflite #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_ diff --git a/tensorflow/lite/kernels/internal/reference/logistic.h b/tensorflow/lite/kernels/internal/reference/logistic.h index 8aba51896df..64b7133bec6 100644 --- a/tensorflow/lite/kernels/internal/reference/logistic.h +++ b/tensorflow/lite/kernels/internal/reference/logistic.h @@ -66,8 +66,8 @@ inline void Logistic(const LogisticParams&, const RuntimeShape& input_shape, } inline void Logistic(const LogisticParams& params, - const RuntimeShape& input_shape, const int16* input_data, - const RuntimeShape& output_shape, int16* output_data) { + const RuntimeShape& input_shape, const int16_t* input_data, + const RuntimeShape& output_shape, int16_t* output_data) { const int flat_size = MatchingFlatSize(input_shape, output_shape); for (int i = 0; i < flat_size; i++) { @@ -84,12 +84,12 @@ inline void Logistic(const LogisticParams& params, } } -// Quantized int8 logistic activation. Cheats by dequantizing and requantizing -// around the floating point logistic method. This implementation is slow on -// platforms without a floating point unit. +// Quantized int8_t logistic activation. Cheats by dequantizing and +// requantizing around the floating point logistic method. This implementation +// is slow on platforms without a floating point unit. -// TODO(b/141211002): Delete this int8 implementation once we can reuse the -// approach used in TFLite for int8 Logistic. +// TODO(b/141211002): Delete this int8_t implementation once we can reuse the +// approach used in TFLite for int8_t Logistic. inline void Logistic(const RuntimeShape& input_shape, const int8_t* input_data, float input_scale, int input_zero_point, const RuntimeShape& output_shape, int8_t* output_data, diff --git a/tensorflow/lite/kernels/internal/reference/mul.h b/tensorflow/lite/kernels/internal/reference/mul.h index 54e947db9ca..0578b81bfbc 100644 --- a/tensorflow/lite/kernels/internal/reference/mul.h +++ b/tensorflow/lite/kernels/internal/reference/mul.h @@ -24,20 +24,20 @@ namespace reference_ops { // Element-wise mul that can often be used for inner loop of broadcast Mul as // well as the non-broadcast Mul. inline void MulElementwise(int size, const ArithmeticParams& params, - const uint8* input1_data, const uint8* input2_data, - uint8* output_data) { + const uint8_t* input1_data, + const uint8_t* input2_data, uint8_t* output_data) { for (int i = 0; i < size; ++i) { - const int32 input1_val = params.input1_offset + input1_data[i]; - const int32 input2_val = params.input2_offset + input2_data[i]; - const int32 unclamped_result = + const int32_t input1_val = params.input1_offset + input1_data[i]; + const int32_t input2_val = params.input2_offset + input2_data[i]; + const int32_t unclamped_result = params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val, params.output_multiplier, params.output_shift); - const int32 clamped_output = + const int32_t clamped_output = std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result)); - output_data[i] = static_cast(clamped_output); + output_data[i] = static_cast(clamped_output); } } @@ -60,9 +60,9 @@ inline void Mul(const ArithmeticParams& params, } inline void Mul(const ArithmeticParams& params, - const RuntimeShape& input1_shape, const uint8* input1_data, - const RuntimeShape& input2_shape, const uint8* input2_data, - const RuntimeShape& output_shape, uint8* output_data) { + const RuntimeShape& input1_shape, const uint8_t* input1_data, + const RuntimeShape& input2_shape, const uint8_t* input2_data, + const RuntimeShape& output_shape, uint8_t* output_data) { TFLITE_DCHECK_LE(params.quantized_activation_min, params.quantized_activation_max); const int flat_size = @@ -73,11 +73,11 @@ inline void Mul(const ArithmeticParams& params, inline void BroadcastMul4DSlow(const ArithmeticParams& params, const RuntimeShape& input1_shape, - const uint8* input1_data, + const uint8_t* input1_data, const RuntimeShape& input2_shape, - const uint8* input2_data, + const uint8_t* input2_data, const RuntimeShape& output_shape, - uint8* output_data) { + uint8_t* output_data) { NdArrayDesc<4> desc1; NdArrayDesc<4> desc2; NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, @@ -89,22 +89,22 @@ inline void BroadcastMul4DSlow(const ArithmeticParams& params, for (int y = 0; y < extended_output_shape.Dims(1); ++y) { for (int x = 0; x < extended_output_shape.Dims(2); ++x) { for (int c = 0; c < extended_output_shape.Dims(3); ++c) { - const int32 input1_val = + const int32_t input1_val = params.input1_offset + input1_data[SubscriptToIndex(desc1, b, y, x, c)]; - const int32 input2_val = + const int32_t input2_val = params.input2_offset + input2_data[SubscriptToIndex(desc2, b, y, x, c)]; - const int32 unclamped_result = + const int32_t unclamped_result = params.output_offset + MultiplyByQuantizedMultiplier(input1_val * input2_val, params.output_multiplier, params.output_shift); - const int32 clamped_output = std::min( + const int32_t clamped_output = std::min( params.quantized_activation_max, std::max(params.quantized_activation_min, unclamped_result)); output_data[Offset(extended_output_shape, b, y, x, c)] = - static_cast(clamped_output); + static_cast(clamped_output); } } } diff --git a/tensorflow/lite/kernels/internal/reference/pad.h b/tensorflow/lite/kernels/internal/reference/pad.h index 20fe3434ae5..2a040cefc91 100644 --- a/tensorflow/lite/kernels/internal/reference/pad.h +++ b/tensorflow/lite/kernels/internal/reference/pad.h @@ -32,8 +32,8 @@ constexpr int PadKernelMaxDimensionCount() { return 4; } // equivalent to a simple input1_data. For Pad, it should point to a zero // value. // -// Note that two typenames are required, so that T=P=int32 is considered a -// specialization distinct from P=int32. +// Note that two typenames are required, so that T=P=int32_t is considered a +// specialization distinct from P=int32_t. template inline void PadImpl(const tflite::PadParams& op_params, const RuntimeShape& input_shape, const T* input_data, @@ -116,11 +116,11 @@ inline void Pad(const tflite::PadParams& op_params, output_data); } -// The second (pad-value) input can be int32 when, say, the first is uint8. +// The second (pad-value) input can be int32_t when, say, the first is uint8_t. template inline void Pad(const tflite::PadParams& op_params, const RuntimeShape& input_shape, const T* input_data, - const int32* pad_value_ptr, const RuntimeShape& output_shape, + const int32_t* pad_value_ptr, const RuntimeShape& output_shape, T* output_data) { const T converted_pad_value = static_cast(*pad_value_ptr); PadImpl(op_params, input_shape, input_data, &converted_pad_value, @@ -130,9 +130,9 @@ inline void Pad(const tflite::PadParams& op_params, // This version avoids conflicting template matching. template <> inline void Pad(const tflite::PadParams& op_params, - const RuntimeShape& input_shape, const int32* input_data, - const int32* pad_value_ptr, const RuntimeShape& output_shape, - int32* output_data) { + const RuntimeShape& input_shape, const int32_t* input_data, + const int32_t* pad_value_ptr, const RuntimeShape& output_shape, + int32_t* output_data) { PadImpl(op_params, input_shape, input_data, pad_value_ptr, output_shape, output_data); } diff --git a/tensorflow/lite/kernels/internal/reference/pooling.h b/tensorflow/lite/kernels/internal/reference/pooling.h index a03359cda82..0872f5210c8 100644 --- a/tensorflow/lite/kernels/internal/reference/pooling.h +++ b/tensorflow/lite/kernels/internal/reference/pooling.h @@ -78,8 +78,9 @@ inline void AveragePool(const PoolParams& params, inline void AveragePool(const PoolParams& params, const RuntimeShape& input_shape, - const uint8* input_data, - const RuntimeShape& output_shape, uint8* output_data) { + const uint8_t* input_data, + const RuntimeShape& output_shape, + uint8_t* output_data) { TFLITE_DCHECK_LE(params.quantized_activation_min, params.quantized_activation_max); TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); @@ -108,7 +109,7 @@ inline void AveragePool(const PoolParams& params, const int filter_y_start = std::max(0, -in_y_origin); const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin); - int32 acc = 0; + int32_t acc = 0; int filter_count = 0; for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y) { @@ -125,7 +126,7 @@ inline void AveragePool(const PoolParams& params, acc = std::max(acc, params.quantized_activation_min); acc = std::min(acc, params.quantized_activation_max); output_data[Offset(output_shape, batch, out_y, out_x, channel)] = - static_cast(acc); + static_cast(acc); } } } @@ -237,8 +238,8 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape, } inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape, - const uint8* input_data, const RuntimeShape& output_shape, - uint8* output_data) { + const uint8_t* input_data, const RuntimeShape& output_shape, + uint8_t* output_data) { TFLITE_DCHECK_LE(params.quantized_activation_min, params.quantized_activation_max); TFLITE_DCHECK_GE(params.quantized_activation_min, 0); @@ -269,7 +270,7 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape, const int filter_y_start = std::max(0, -in_y_origin); const int filter_y_end = std::min(params.filter_height, input_height - in_y_origin); - uint8 max = 0; + uint8_t max = 0; for (int filter_y = filter_y_start; filter_y < filter_y_end; ++filter_y) { for (int filter_x = filter_x_start; filter_x < filter_x_end; @@ -281,10 +282,10 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape, input_data[Offset(input_shape, batch, in_y, in_x, channel)]); } } - max = std::max(max, params.quantized_activation_min); - max = std::min(max, params.quantized_activation_max); + max = std::max(max, params.quantized_activation_min); + max = std::min(max, params.quantized_activation_max); output_data[Offset(output_shape, batch, out_y, out_x, channel)] = - static_cast(max); + static_cast(max); } } } diff --git a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc index 4a8d4b0fb6a..d257a170091 100644 --- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc +++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc @@ -97,13 +97,13 @@ void PortableAsymmetricQuantizeFloats(const float* values, const int size, zero_point_from_min_error < zero_point_from_max_error ? zero_point_from_min : zero_point_from_max; - int8 nudged_zero_point = 0; + int8_t nudged_zero_point = 0; if (zero_point_double <= qmin_double) { nudged_zero_point = kMinScale; } else if (zero_point_double >= qmax_double) { nudged_zero_point = kMaxScale; } else { - nudged_zero_point = static_cast(round(zero_point_double)); + nudged_zero_point = static_cast(round(zero_point_double)); } *scaling_factor = scale; *offset = nudged_zero_point; @@ -303,8 +303,8 @@ void PortableMatrixBatchVectorMultiplyAccumulateImpl( for (int row = 0; row < n_output; ++row) { int32_t acc = bias[row]; for (int col = 0; col < n_input; ++col) { - int8 input_val = input[batch * n_input + col]; - int8 weights_val = input_to_gate_weights[row * n_input + col]; + int8_t input_val = input[batch * n_input + col]; + int8_t weights_val = input_to_gate_weights[row * n_input + col]; acc += input_val * weights_val; } acc = MultiplyByQuantizedMultiplier(acc, multiplier, shift); @@ -349,8 +349,8 @@ void PortableMatrixBatchVectorMultiply(const int8_t* input, int32_t n_batch, int32_t n_input, int32_t n_cell, int8_t* gate_output, int8_t gate_output_zp) { - const int32_t int8_max = std::numeric_limits::max(); - const int32_t int8_min = std::numeric_limits::min(); + const int32_t int8_max = std::numeric_limits::max(); + const int32_t int8_min = std::numeric_limits::min(); for (int batch = 0; batch < n_batch; ++batch) { for (int row = 0; row < n_cell; ++row) { int32_t acc = 0; @@ -378,8 +378,8 @@ void PortableMatrixBatchVectorMultiply( int32_t proj_effective_scale_a, int32_t proj_effective_scale_b, const int32_t* gate_bias, int32_t n_batch, int32_t n_hidden, int32_t n_output, int32_t output_zp, int8_t* proj_output) { - const int16_t int8_max = std::numeric_limits::max(); - const int16_t int8_min = std::numeric_limits::min(); + const int16_t int8_max = std::numeric_limits::max(); + const int16_t int8_min = std::numeric_limits::min(); for (int batch = 0; batch < n_batch; ++batch) { for (int row = 0; row < n_output; ++row) { int64_t acc = gate_bias[row]; @@ -389,10 +389,10 @@ void PortableMatrixBatchVectorMultiply( int64_t curr = acc; acc += input_val * weights_val; if (input_val * weights_val > 0 && acc < curr) { - acc = std::numeric_limits::max(); + acc = std::numeric_limits::max(); } if (input_val * weights_val < 0 && acc > curr) { - acc = std::numeric_limits::min(); + acc = std::numeric_limits::min(); } } acc = MultiplyByQuantizedMultiplier(acc, proj_effective_scale_a, @@ -429,10 +429,10 @@ void PortableApplyLayerNorm(const int16_t* input, int32_t mean = static_cast(static_cast(sum) * 1024 / n_input); // TODO(jianlijianli): Avoids overflow but only works for POT n_input. - int32 temp = kTwoToPower20 / n_input; + int32_t temp = kTwoToPower20 / n_input; int64_t variance = sum_sq * temp - static_cast(mean) * static_cast(mean); - int32_t variance2 = static_cast(variance / kTwoToPower20); + int32_t variance2 = static_cast(variance / kTwoToPower20); if (variance2 < 1) { variance2 = variance_limit; } @@ -442,17 +442,17 @@ void PortableApplyLayerNorm(const int16_t* input, &stddev_inverse_a, &stddev_inverse_b); for (int j = 0; j < n_input; ++j) { - const int32 index = i * n_input + j; - int32 val = static_cast(input[index]); - int32 shifted = 1024 * val - mean; - int32 rescaled = MultiplyByQuantizedMultiplier(shifted, stddev_inverse_a, - stddev_inverse_b); + const int32_t index = i * n_input + j; + int32_t val = static_cast(input[index]); + int32_t shifted = 1024 * val - mean; + int32_t rescaled = MultiplyByQuantizedMultiplier( + shifted, stddev_inverse_a, stddev_inverse_b); // TODO(jianlijianli): Saturate this. int64_t val3 = rescaled * layer_norm_weights[j] + bias[j]; - int32 val4 = - static_cast((val3 > 0 ? val3 + 512 : val3 - 512) / 1024); - int32 val5 = MultiplyByQuantizedMultiplier(val4, layer_norm_scale_a, - layer_norm_scale_b + 12); + int32_t val4 = + static_cast((val3 > 0 ? val3 + 512 : val3 - 512) / 1024); + int32_t val5 = MultiplyByQuantizedMultiplier(val4, layer_norm_scale_a, + layer_norm_scale_b + 12); val5 = std::min(std::max(kInt16Min, val5), kInt16Max); output[index] = static_cast(val5); } @@ -465,8 +465,8 @@ void PortableApplyLayerNormFloat(const int16_t* input, int32_t layer_norm_scale_b, const int32_t* bias, int n_batch, int n_input, int16_t* output) { - const int32_t int16_max = std::numeric_limits::max(); - const int32_t int16_min = std::numeric_limits::min(); + const int32_t int16_max = std::numeric_limits::max(); + const int32_t int16_min = std::numeric_limits::min(); // This is to surpress a lint warning. const double two = 2.0; const float layer_norm_scale = @@ -498,7 +498,7 @@ void PortableApplyLayerNormFloat(const int16_t* input, const float weighted_normalized_value = normalized_value * layer_norm_weights[i] * layer_norm_scale + bias[i] * bias_scale; - const int32_t quant_output = static_cast( + const int32_t quant_output = static_cast( std::round(weighted_normalized_value * std::pow(2, 12))); output[index] = std::min(int16_max, std::max(int16_min, quant_output)); } @@ -533,18 +533,18 @@ void PortableApplySigmoid(const int16_t* input, int32_t n_batch, void PortableApplySigmoidFloat(const int16_t* input, int32_t n_batch, int32_t n_input, int16_t* output) { - const int32_t int16_max = std::numeric_limits::max(); - const int32_t int16_min = std::numeric_limits::min(); + const int32_t int16_max = std::numeric_limits::max(); + const int32_t int16_min = std::numeric_limits::min(); for (int batch = 0; batch < n_batch; ++batch) { for (int i = 0; i < n_input; ++i) { const int index = batch * n_input + i; const float float_input = input[index] * std::pow(2, -12); const float float_output = 1.0f / (1.0f + std::exp(-float_input)); const int32_t quant_output = - static_cast(float_output * std::pow(2, 15)); + static_cast(float_output * std::pow(2, 15)); const int32_t quant_output_clamped = std::min(int16_max, std::max(int16_min, quant_output)); - output[index] = static_cast(quant_output_clamped); + output[index] = static_cast(quant_output_clamped); } } } @@ -588,8 +588,8 @@ void PortableApplyTanh(int32_t integer_bits, const int16_t* input, void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch, int32_t n_input, int32_t integer_bits, int16_t* output) { - const int32_t int16_max = std::numeric_limits::max(); - const int32_t int16_min = std::numeric_limits::min(); + const int32_t int16_max = std::numeric_limits::max(); + const int32_t int16_min = std::numeric_limits::min(); const double two = 2.0; for (int batch = 0; batch < n_batch; ++batch) { for (int i = 0; i < n_input; ++i) { @@ -598,10 +598,10 @@ void PortableApplyTanhFloat(const int16_t* input, int32_t n_batch, input[index] * std::pow(two, static_cast(integer_bits)); const float float_output = std::tanh(float_input); const int32_t quant_output = - static_cast(float_output * std::pow(2, 15)); + static_cast(float_output * std::pow(2, 15)); const int32_t quant_output_clamped = std::min(int16_max, std::max(int16_min, quant_output)); - output[index] = static_cast(quant_output_clamped); + output[index] = static_cast(quant_output_clamped); } } } @@ -634,7 +634,7 @@ void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2, value = std::min(std::max(static_cast(-128), value), static_cast(127)); - output[index] = static_cast(value); + output[index] = static_cast(value); } } } @@ -645,7 +645,7 @@ void PortableCwiseAdd(const int16_t* input_1, const int16_t* input_2, for (int i = 0; i < n_input; ++i) { const int index = batch * n_input + i; int32_t sum = input_1[index] + input_2[index]; - const int32 sum_clamped = std::min(kInt16Max, std::max(kInt16Min, sum)); + const int32_t sum_clamped = std::min(kInt16Max, std::max(kInt16Min, sum)); output[index] = static_cast(sum_clamped); } } @@ -793,12 +793,12 @@ void PortableTwoGateSaturatingAdd(const int8_t* input, int8_t input_zp, int32_t recurrent_effective_scale_b, int32_t n_batch, int32_t n_cell, int16_t* output) { - const int32_t int16_max = std::numeric_limits::max(); - const int32_t int16_min = std::numeric_limits::min(); + const int32_t int16_max = std::numeric_limits::max(); + const int32_t int16_min = std::numeric_limits::min(); for (int i = 0; i < n_batch * n_cell; ++i) { - int32_t x = static_cast(input[i]) - static_cast(input_zp); + int32_t x = static_cast(input[i]) - static_cast(input_zp); int32_t h = - static_cast(recurrent[i]) - static_cast(recurrent_zp); + static_cast(recurrent[i]) - static_cast(recurrent_zp); int32_t x_scaled = MultiplyByQuantizedMultiplier(x, input_effective_scale_a, input_effective_scale_b); int32_t h_scaled = MultiplyByQuantizedMultiplier( diff --git a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h index 602576ca3db..054fa43243d 100644 --- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h +++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h @@ -32,7 +32,7 @@ bool IsZeroVector(const float* vector, int v_size) { return PortableIsZeroVector(vector, v_size); } -// Check if all entries of a vector are zero for int8. +// Check if all entries of a vector are zero for int8_t. bool IsZeroVector(const int8_t* vector, int v_size) { return PortableIsZeroVector(vector, v_size); } diff --git a/tensorflow/lite/kernels/internal/reference/prelu.h b/tensorflow/lite/kernels/internal/reference/prelu.h index 4633cb9599a..02db5174ed6 100644 --- a/tensorflow/lite/kernels/internal/reference/prelu.h +++ b/tensorflow/lite/kernels/internal/reference/prelu.h @@ -23,7 +23,7 @@ namespace tflite { namespace reference_ops { -// Broadcast prelu to output_shape for quantized uint8/int8 data. +// Broadcast prelu to output_shape for quantized uint8_t/int8_t data. template inline void BroadcastPrelu4DSlow( const PreluParams& params, const RuntimeShape& input_shape, @@ -44,15 +44,15 @@ inline void BroadcastPrelu4DSlow( for (int c = 0; c < extended_output_shape.Dims(3); ++c) { int output_index = Offset(extended_output_shape, b, y, x, c); int input_index = SubscriptToIndex(desc1, b, y, x, c); - const int32 input_value = + const int32_t input_value = params.input_offset + input_data[input_index]; - int32 output_value; + int32_t output_value; if (input_value >= 0) { output_value = MultiplyByQuantizedMultiplier( input_value, params.output_multiplier_1, params.output_shift_1); } else { auto alpha_index = SubscriptToIndex(desc2, b, y, x, c); - const int32 alpha_value = + const int32_t alpha_value = params.alpha_offset + alpha_data[alpha_index]; output_value = MultiplyByQuantizedMultiplier( @@ -61,9 +61,9 @@ inline void BroadcastPrelu4DSlow( } output_value += params.output_offset; - const int32 quantized_min = std::numeric_limits::min(); - const int32 quantized_max = std::numeric_limits::max(); - const int32 clamped_output = + const int32_t quantized_min = std::numeric_limits::min(); + const int32_t quantized_max = std::numeric_limits::max(); + const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_value)); output_data[output_index] = static_cast(clamped_output); } @@ -77,19 +77,19 @@ inline void Prelu(const PreluParams& params, const RuntimeShape& input_shape, const T* input_data, const RuntimeShape& alpha_shape, const T* alpha_data, const RuntimeShape& output_shape, T* output_data) { - const int32 quantized_min = std::numeric_limits::min(); - const int32 quantized_max = std::numeric_limits::max(); + const int32_t quantized_min = std::numeric_limits::min(); + const int32_t quantized_max = std::numeric_limits::max(); const int flat_size = MatchingElementsSize(input_shape, alpha_shape, output_shape); for (int i = 0; i < flat_size; ++i) { - const int32 input_value = params.input_offset + input_data[i]; - int32 output_value; + const int32_t input_value = params.input_offset + input_data[i]; + int32_t output_value; if (input_value >= 0) { output_value = MultiplyByQuantizedMultiplier( input_value, params.output_multiplier_1, params.output_shift_1); } else { - const int32 alpha_value = params.alpha_offset + alpha_data[i]; + const int32_t alpha_value = params.alpha_offset + alpha_data[i]; output_value = MultiplyByQuantizedMultiplier(input_value * alpha_value, params.output_multiplier_2, @@ -97,7 +97,7 @@ inline void Prelu(const PreluParams& params, const RuntimeShape& input_shape, } output_value += params.output_offset; - const int32 clamped_output = + const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_value)); output_data[i] = static_cast(clamped_output); } diff --git a/tensorflow/lite/kernels/internal/reference/quantize.h b/tensorflow/lite/kernels/internal/reference/quantize.h index d36db06f2e0..6f3f9aeb419 100644 --- a/tensorflow/lite/kernels/internal/reference/quantize.h +++ b/tensorflow/lite/kernels/internal/reference/quantize.h @@ -33,18 +33,18 @@ inline void AffineQuantize(const tflite::QuantizationParams& op_params, const InputT* input_data, const RuntimeShape& output_shape, OutputT* output_data) { - const int32 zero_point = op_params.zero_point; + const int32_t zero_point = op_params.zero_point; const double scale = op_params.scale; const int flat_size = MatchingFlatSize(input_shape, output_shape); - static constexpr int32 min_val = std::numeric_limits::min(); - static constexpr int32 max_val = std::numeric_limits::max(); + static constexpr int32_t min_val = std::numeric_limits::min(); + static constexpr int32_t max_val = std::numeric_limits::max(); for (int i = 0; i < flat_size; i++) { const InputT val = input_data[i]; - int32 unclamped = - static_cast(TfLiteRound(val / static_cast(scale))) + + int32_t unclamped = + static_cast(TfLiteRound(val / static_cast(scale))) + zero_point; - int32 clamped = std::min(std::max(unclamped, min_val), max_val); + int32_t clamped = std::min(std::max(unclamped, min_val), max_val); output_data[i] = clamped; } } diff --git a/tensorflow/lite/kernels/internal/reference/reduce.h b/tensorflow/lite/kernels/internal/reference/reduce.h index 2e54928682a..597d015d0b1 100644 --- a/tensorflow/lite/kernels/internal/reference/reduce.h +++ b/tensorflow/lite/kernels/internal/reference/reduce.h @@ -251,9 +251,9 @@ inline void Mean(const tflite::MeanParams& op_params, inline void Mean(const tflite::MeanParams& op_params, const RuntimeShape& unextended_input_shape, - const uint8_t* input_data, int32 input_zero_point, + const uint8_t* input_data, int32_t input_zero_point, float input_scale, const RuntimeShape& unextended_output_shape, - uint8_t* output_data, int32 output_zero_point, + uint8_t* output_data, int32_t output_zero_point, float output_scale) { ruy::profiler::ScopeLabel label("Mean4D/Uint8"); @@ -282,9 +282,9 @@ inline void Mean(const tflite::MeanParams& op_params, constexpr int32_t kMinValue = std::numeric_limits::min(); constexpr int32_t kMaxValue = std::numeric_limits::max(); - int32 bias = + int32_t bias = output_zero_point - - static_cast(input_zero_point * input_scale / output_scale); + static_cast(input_zero_point * input_scale / output_scale); double real_scale = static_cast(input_scale / (num_elements_in_axis * output_scale)); @@ -293,7 +293,7 @@ inline void Mean(const tflite::MeanParams& op_params, QuantizeMultiplier(real_scale, &multiplier, &shift); for (int out_b = 0; out_b < output_batch; ++out_b) { for (int out_d = 0; out_d < output_depth; ++out_d) { - int32 acc = 0; + int32_t acc = 0; for (int in_h = 0; in_h < input_height; ++in_h) { for (int in_w = 0; in_w < input_width; ++in_w) { acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)]; @@ -312,10 +312,10 @@ inline void Mean(const tflite::MeanParams& op_params, // It does so in two stages, first calculates the sum of elements along the axis // then divides it by the number of element in axis for quantized values. template -inline bool QuantizedMeanOrSum(const T* input_data, int32 input_zero_point, +inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point, float input_scale, const int* input_dims, const int input_num_dims, T* output_data, - int32 output_zero_point, float output_scale, + int32_t output_zero_point, float output_scale, const int* output_dims, const int output_num_dims, const int* axis, const int num_axis_dimensions, bool keep_dims, diff --git a/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h b/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h index e76fc8b6931..95550abc145 100644 --- a/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h +++ b/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h @@ -24,22 +24,23 @@ namespace tflite { namespace reference_ops { -inline int32 GetNearestNeighbor(const int input_value, const int32 input_size, - const int32 output_size, - const bool align_corners, - const bool half_pixel_centers) { +inline int32_t GetNearestNeighbor(const int input_value, + const int32_t input_size, + const int32_t output_size, + const bool align_corners, + const bool half_pixel_centers) { const float scale = (align_corners && output_size > 1) ? (input_size - 1) / static_cast(output_size - 1) : input_size / static_cast(output_size); const float offset = half_pixel_centers ? 0.5f : 0.0f; - int32 output_value = std::min( + int32_t output_value = std::min( align_corners - ? static_cast(TfLiteRound((input_value + offset) * scale)) - : static_cast(std::floor((input_value + offset) * scale)), + ? static_cast(TfLiteRound((input_value + offset) * scale)) + : static_cast(std::floor((input_value + offset) * scale)), input_size - 1); if (half_pixel_centers) { - output_value = std::max(static_cast(0), output_value); + output_value = std::max(static_cast(0), output_value); } return output_value; } @@ -48,7 +49,7 @@ template inline void ResizeNearestNeighbor( const tflite::ResizeNearestNeighborParams& op_params, const RuntimeShape& unextended_input_shape, const T* input_data, - const RuntimeShape& output_size_shape, const int32* output_size_data, + const RuntimeShape& output_size_shape, const int32_t* output_size_data, const RuntimeShape& unextended_output_shape, T* output_data) { TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4); TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); @@ -58,16 +59,16 @@ inline void ResizeNearestNeighbor( const RuntimeShape output_shape = RuntimeShape::ExtendedShape(4, unextended_output_shape); - int32 batches = MatchingDim(input_shape, 0, output_shape, 0); - int32 input_height = input_shape.Dims(1); - int32 input_width = input_shape.Dims(2); - int32 depth = MatchingDim(input_shape, 3, output_shape, 3); + int32_t batches = MatchingDim(input_shape, 0, output_shape, 0); + int32_t input_height = input_shape.Dims(1); + int32_t input_width = input_shape.Dims(2); + int32_t depth = MatchingDim(input_shape, 3, output_shape, 3); // The Tensorflow version of this op allows resize on the width and height // axis only. TFLITE_DCHECK_EQ(output_size_shape.FlatSize(), 2); - int32 output_height = output_size_data[0]; - int32 output_width = output_size_data[1]; + int32_t output_height = output_size_data[0]; + int32_t output_width = output_size_data[1]; const int col_offset = input_shape.Dims(3); const int row_offset = input_shape.Dims(2) * col_offset; @@ -77,14 +78,14 @@ inline void ResizeNearestNeighbor( T* output_ptr = output_data; for (int b = 0; b < batches; ++b) { for (int y = 0; y < output_height; ++y) { - int32 in_y = GetNearestNeighbor(y, input_height, output_height, - op_params.align_corners, - op_params.half_pixel_centers); - const T* y_input_ptr = input_ptr + in_y * row_offset; - for (int x = 0; x < output_width; ++x) { - int32 in_x = GetNearestNeighbor(x, input_width, output_width, + int32_t in_y = GetNearestNeighbor(y, input_height, output_height, op_params.align_corners, op_params.half_pixel_centers); + const T* y_input_ptr = input_ptr + in_y * row_offset; + for (int x = 0; x < output_width; ++x) { + int32_t in_x = GetNearestNeighbor(x, input_width, output_width, + op_params.align_corners, + op_params.half_pixel_centers); const T* x_input_ptr = y_input_ptr + in_x * col_offset; memcpy(output_ptr, x_input_ptr, depth * sizeof(T)); output_ptr += depth; diff --git a/tensorflow/lite/kernels/internal/reference/softmax.h b/tensorflow/lite/kernels/internal/reference/softmax.h index dd44b3c7863..b035b433a0b 100644 --- a/tensorflow/lite/kernels/internal/reference/softmax.h +++ b/tensorflow/lite/kernels/internal/reference/softmax.h @@ -62,13 +62,14 @@ inline void Softmax(const SoftmaxParams& params, } } -// Quantized softmax with int8/uint8 input and int8/uint8/int16 output. +// Quantized softmax with int8_t/uint8_t input and int8_t/uint8_t/int16_t +// output. template inline void Softmax(const SoftmaxParams& params, const RuntimeShape& input_shape, const InputT* input_data, const RuntimeShape& output_shape, OutputT* output_data) { - const int32 input_beta_multiplier = params.input_multiplier; - const int32 input_beta_left_shift = params.input_left_shift; + const int32_t input_beta_multiplier = params.input_multiplier; + const int32_t input_beta_left_shift = params.input_left_shift; const int diff_min = params.diff_min; // The representation chosen for the input to the exp() function is Q5.26. // We need to leave extra space since values that we skip might be as large as @@ -78,9 +79,10 @@ inline void Softmax(const SoftmaxParams& params, static const int kScaledDiffIntegerBits = 5; static const int kAccumulationIntegerBits = 12; using FixedPointScaledDiff = - gemmlowp::FixedPoint; - using FixedPointAccum = gemmlowp::FixedPoint; - using FixedPoint0 = gemmlowp::FixedPoint; + gemmlowp::FixedPoint; + using FixedPointAccum = + gemmlowp::FixedPoint; + using FixedPoint0 = gemmlowp::FixedPoint; const int trailing_dim = input_shape.DimensionsCount() - 1; const int outer_size = @@ -96,10 +98,10 @@ inline void Softmax(const SoftmaxParams& params, FixedPointAccum sum_of_exps = FixedPointAccum::Zero(); for (int c = 0; c < depth; ++c) { - int32 input_diff = - static_cast(input_data[i * depth + c]) - max_in_row; + int32_t input_diff = + static_cast(input_data[i * depth + c]) - max_in_row; if (input_diff >= diff_min) { - const int32 input_diff_rescaled = + const int32_t input_diff_rescaled = MultiplyByQuantizedMultiplierGreaterThanOne( input_diff, input_beta_multiplier, input_beta_left_shift); const FixedPointScaledDiff scaled_diff_f8 = @@ -114,28 +116,28 @@ inline void Softmax(const SoftmaxParams& params, sum_of_exps.raw(), kAccumulationIntegerBits, &num_bits_over_unit)); for (int c = 0; c < depth; ++c) { - int32 input_diff = - static_cast(input_data[i * depth + c]) - max_in_row; + int32_t input_diff = + static_cast(input_data[i * depth + c]) - max_in_row; if (input_diff >= diff_min) { - const int32 input_diff_rescaled = + const int32_t input_diff_rescaled = MultiplyByQuantizedMultiplierGreaterThanOne( input_diff, input_beta_multiplier, input_beta_left_shift); const FixedPointScaledDiff scaled_diff_f8 = FixedPointScaledDiff::FromRaw(input_diff_rescaled); FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8); - int32 unsat_output = gemmlowp::RoundingDivideByPOT( + int32_t unsat_output = gemmlowp::RoundingDivideByPOT( (shifted_scale * exp_in_0).raw(), num_bits_over_unit + 31 - (sizeof(OutputT) * 8)); - const int32 shifted_output = + const int32_t shifted_output = unsat_output + - static_cast(std::numeric_limits::min()); + static_cast(std::numeric_limits::min()); output_data[i * depth + c] = static_cast(std::max( std::min(shifted_output, - static_cast(std::numeric_limits::max())), - static_cast(std::numeric_limits::min()))); + static_cast(std::numeric_limits::max())), + static_cast(std::numeric_limits::min()))); } else { output_data[i * depth + c] = std::numeric_limits::min(); } @@ -143,7 +145,7 @@ inline void Softmax(const SoftmaxParams& params, } } -// Quantized softmax with int16 input and int16 output. +// Quantized softmax with int16_t input and int16_t output. inline void SoftmaxInt16(const SoftmaxParams& params, const RuntimeShape& input_shape, const int16_t* input_data, diff --git a/tensorflow/lite/kernels/internal/reference/sub.h b/tensorflow/lite/kernels/internal/reference/sub.h index 91ef7f2c2fd..b27f251de6c 100644 --- a/tensorflow/lite/kernels/internal/reference/sub.h +++ b/tensorflow/lite/kernels/internal/reference/sub.h @@ -47,11 +47,11 @@ inline void SubNonBroadcast(const ArithmeticParams& params, inline void SubNonBroadcast(const ArithmeticParams& params, const RuntimeShape& input1_shape, - const int32* input1_data, + const int32_t* input1_data, const RuntimeShape& input2_shape, - const int32* input2_data, + const int32_t* input2_data, const RuntimeShape& output_shape, - int32* output_data) { + int32_t* output_data) { const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape); for (int i = 0; i < flat_size; ++i) { @@ -112,12 +112,12 @@ inline void BroadcastSubSlow(const ArithmeticParams& params, template inline void BroadcastSubSlow(const ArithmeticParams& params, const RuntimeShape& input1_shape, - const uint8* input1_data, + const uint8_t* input1_data, const RuntimeShape& input2_shape, - const uint8* input2_data, + const uint8_t* input2_data, const RuntimeShape& output_shape, - uint8* output_data) { - ruy::profiler::ScopeLabel label("BroadcastSubSlow/uint8"); + uint8_t* output_data) { + ruy::profiler::ScopeLabel label("BroadcastSubSlow/uint8_t"); TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N); TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N); TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N); @@ -140,28 +140,28 @@ inline void BroadcastSubSlow(const ArithmeticParams& params, // nesting loops such that the innermost loop has the smallest stride for the // best cache behavior. auto sub_func = [&](int indexes[N]) { - const int32 input1_val = + const int32_t input1_val = params.input1_offset + input1_data[SubscriptToIndex(desc1, indexes)]; - const int32 input2_val = + const int32_t input2_val = params.input2_offset + input2_data[SubscriptToIndex(desc2, indexes)]; - const int32 shifted_input1_val = input1_val * (1 << params.left_shift); - const int32 shifted_input2_val = input2_val * (1 << params.left_shift); - const int32 scaled_input1_val = + const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); + const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); + const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( shifted_input1_val, params.input1_multiplier, params.input1_shift); - const int32 scaled_input2_val = + const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( shifted_input2_val, params.input2_multiplier, params.input2_shift); - const int32 raw_sub = scaled_input1_val - scaled_input2_val; - const int32 raw_output = + const int32_t raw_sub = scaled_input1_val - scaled_input2_val; + const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp( raw_sub, params.output_multiplier, params.output_shift) + params.output_offset; - const int32 clamped_output = + const int32_t clamped_output = std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output)); output_data[SubscriptToIndex(output_desc, indexes)] = - static_cast(clamped_output); + static_cast(clamped_output); }; NDOpsHelper(output_desc, sub_func); } @@ -169,12 +169,12 @@ inline void BroadcastSubSlow(const ArithmeticParams& params, template inline void BroadcastSubSlow(const ArithmeticParams& params, const RuntimeShape& input1_shape, - const int32* input1_data, + const int32_t* input1_data, const RuntimeShape& input2_shape, - const int32* input2_data, + const int32_t* input2_data, const RuntimeShape& output_shape, - int32* output_data) { - ruy::profiler::ScopeLabel label("BroadcastSubSlow/int32"); + int32_t* output_data) { + ruy::profiler::ScopeLabel label("BroadcastSubSlow/int32_t"); TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N); TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N); TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N); @@ -214,7 +214,7 @@ inline void BroadcastSubSlow(const ArithmeticParams& params, const int8_t* input2_data, const RuntimeShape& output_shape, int8_t* output_data) { - ruy::profiler::ScopeLabel label("BroadcastSubSlow/int8"); + ruy::profiler::ScopeLabel label("BroadcastSubSlow/int8_t"); NdArrayDesc desc1; NdArrayDesc desc2; NdArrayDesc output_desc; @@ -267,7 +267,7 @@ void BroadcastSubSlow(const ArithmeticParams& params, const RuntimeShape& input2_shape, const int64_t* input2_data, const RuntimeShape& output_shape, int64_t* output_data) { - ruy::profiler::ScopeLabel label("BroadcastSubSlow/int64"); + ruy::profiler::ScopeLabel label("BroadcastSubSlow/int64_t"); TFLITE_DCHECK_LE(input1_shape.DimensionsCount(), N); TFLITE_DCHECK_LE(input2_shape.DimensionsCount(), N); TFLITE_DCHECK_LE(output_shape.DimensionsCount(), N); @@ -339,33 +339,33 @@ void BroadcastSubSlow(const ArithmeticParams& params, // Element-wise Sub that can often be used for inner loop of broadcast sub as // well as the non-broadcast sub. inline void SubElementwise(int size, const ArithmeticParams& params, - const uint8* input1_data, const uint8* input2_data, - uint8* output_data) { + const uint8_t* input1_data, + const uint8_t* input2_data, uint8_t* output_data) { TFLITE_DCHECK_GT(params.input1_offset, -256); TFLITE_DCHECK_GT(params.input2_offset, -256); TFLITE_DCHECK_LT(params.input1_offset, 256); TFLITE_DCHECK_LT(params.input2_offset, 256); for (int i = 0; i < size; ++i) { - const int32 input1_val = params.input1_offset + input1_data[i]; - const int32 input2_val = params.input2_offset + input2_data[i]; - const int32 shifted_input1_val = input1_val * (1 << params.left_shift); - const int32 shifted_input2_val = input2_val * (1 << params.left_shift); - const int32 scaled_input1_val = + const int32_t input1_val = params.input1_offset + input1_data[i]; + const int32_t input2_val = params.input2_offset + input2_data[i]; + const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); + const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); + const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( shifted_input1_val, params.input1_multiplier, params.input1_shift); - const int32 scaled_input2_val = + const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( shifted_input2_val, params.input2_multiplier, params.input2_shift); - const int32 raw_sub = scaled_input1_val - scaled_input2_val; - const int32 raw_output = + const int32_t raw_sub = scaled_input1_val - scaled_input2_val; + const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp( raw_sub, params.output_multiplier, params.output_shift) + params.output_offset; - const int32 clamped_output = + const int32_t clamped_output = std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output)); - output_data[i] = static_cast(clamped_output); + output_data[i] = static_cast(clamped_output); } } @@ -381,22 +381,22 @@ inline void SubElementwise(int size, const ArithmeticParams& params, TFLITE_DCHECK_LE(params.input2_offset, int8_max_value); for (int i = 0; i < size; ++i) { - const int32 input1_val = params.input1_offset + input1_data[i]; - const int32 input2_val = params.input2_offset + input2_data[i]; - const int32 shifted_input1_val = input1_val * (1 << params.left_shift); - const int32 shifted_input2_val = input2_val * (1 << params.left_shift); - const int32 scaled_input1_val = + const int32_t input1_val = params.input1_offset + input1_data[i]; + const int32_t input2_val = params.input2_offset + input2_data[i]; + const int32_t shifted_input1_val = input1_val * (1 << params.left_shift); + const int32_t shifted_input2_val = input2_val * (1 << params.left_shift); + const int32_t scaled_input1_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( shifted_input1_val, params.input1_multiplier, params.input1_shift); - const int32 scaled_input2_val = + const int32_t scaled_input2_val = MultiplyByQuantizedMultiplierSmallerThanOneExp( shifted_input2_val, params.input2_multiplier, params.input2_shift); - const int32 raw_sub = scaled_input1_val - scaled_input2_val; - const int32 raw_output = + const int32_t raw_sub = scaled_input1_val - scaled_input2_val; + const int32_t raw_output = MultiplyByQuantizedMultiplierSmallerThanOneExp( raw_sub, params.output_multiplier, params.output_shift) + params.output_offset; - const int32 clamped_output = + const int32_t clamped_output = std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output)); output_data[i] = static_cast(clamped_output); @@ -404,9 +404,9 @@ inline void SubElementwise(int size, const ArithmeticParams& params, } inline void Sub(const ArithmeticParams& params, - const RuntimeShape& input1_shape, const uint8* input1_data, - const RuntimeShape& input2_shape, const uint8* input2_data, - const RuntimeShape& output_shape, uint8* output_data) { + const RuntimeShape& input1_shape, const uint8_t* input1_data, + const RuntimeShape& input2_shape, const uint8_t* input2_data, + const RuntimeShape& output_shape, uint8_t* output_data) { TFLITE_DCHECK_LE(params.quantized_activation_min, params.quantized_activation_max); const int flat_size = @@ -474,7 +474,8 @@ void Sub(const ArithmeticParams& params, const RuntimeShape& input1_shape, } inline void SetActivationMinMax(const ArithmeticParams& params, - int32* activation_min, int32* activation_max) { + int32_t* activation_min, + int32_t* activation_max) { *activation_min = params.quantized_activation_min; *activation_max = params.quantized_activation_max; } diff --git a/tensorflow/lite/kernels/internal/reference/svdf.h b/tensorflow/lite/kernels/internal/reference/svdf.h index ffa46b8f422..bb986e4de0a 100644 --- a/tensorflow/lite/kernels/internal/reference/svdf.h +++ b/tensorflow/lite/kernels/internal/reference/svdf.h @@ -268,7 +268,7 @@ inline void EvalHybridSVDF( std::fill_n(scratch_ptr, batch_size * num_filters, 0.0f); if (!tensor_utils::IsZeroVector(input_ptr, batch_size * input_size)) { - // Quantize input from float to int8. + // Quantize input from float to int8_t. tensor_utils::BatchQuantizeFloats(input_ptr, batch_size, input_size, quantized_input_ptr, scaling_factors_ptr, zero_points_ptr, diff --git a/tensorflow/lite/kernels/internal/reference/tanh.h b/tensorflow/lite/kernels/internal/reference/tanh.h index 04c66989b48..3a05c474dd3 100644 --- a/tensorflow/lite/kernels/internal/reference/tanh.h +++ b/tensorflow/lite/kernels/internal/reference/tanh.h @@ -47,8 +47,8 @@ inline void Tanh(const TanhParams&, const RuntimeShape& input_shape, } inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape, - const int16* input_data, const RuntimeShape& output_shape, - int16* output_data) { + const int16_t* input_data, const RuntimeShape& output_shape, + int16_t* output_data) { const int input_left_shift = params.input_left_shift; // Support for shifts is limited until we have a parameterized version of // SaturatingRoundingMultiplyByPOT(). @@ -81,43 +81,43 @@ inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape, } inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape, - const uint8* input_data, const RuntimeShape& output_shape, - uint8* output_data) { - const int32 input_zero_point = params.input_zero_point; - const int32 input_range_radius = params.input_range_radius; - const int32 input_multiplier = params.input_multiplier; + const uint8_t* input_data, const RuntimeShape& output_shape, + uint8_t* output_data) { + const int32_t input_zero_point = params.input_zero_point; + const int32_t input_range_radius = params.input_range_radius; + const int32_t input_multiplier = params.input_multiplier; const int input_left_shift = params.input_left_shift; - const int32 output_zero_point = 128; + const int32_t output_zero_point = 128; const int flat_size = MatchingFlatSize(input_shape, output_shape); for (int i = 0; i < flat_size; i++) { - const uint8 input_val_u8 = input_data[i]; - const int32 input_val_centered = - static_cast(input_val_u8) - input_zero_point; - uint8 output_val; + const uint8_t input_val_u8 = input_data[i]; + const int32_t input_val_centered = + static_cast(input_val_u8) - input_zero_point; + uint8_t output_val; if (input_val_centered <= -input_range_radius) { output_val = 0; } else if (input_val_centered >= input_range_radius) { output_val = 255; } else { - const int32 input_val_rescaled = + const int32_t input_val_rescaled = MultiplyByQuantizedMultiplierGreaterThanOne( input_val_centered, input_multiplier, input_left_shift); - using FixedPoint4 = gemmlowp::FixedPoint; - using FixedPoint0 = gemmlowp::FixedPoint; + using FixedPoint4 = gemmlowp::FixedPoint; + using FixedPoint0 = gemmlowp::FixedPoint; const FixedPoint4 input_val_f4 = FixedPoint4::FromRaw(input_val_rescaled); const FixedPoint0 output_val_f0 = gemmlowp::tanh(input_val_f4); // Convert from Q0.31 to Q24.7. using gemmlowp::RoundingDivideByPOT; - int32 output_val_s32 = RoundingDivideByPOT(output_val_f0.raw(), 24); + int32_t output_val_s32 = RoundingDivideByPOT(output_val_f0.raw(), 24); output_val_s32 += output_zero_point; if (output_val_s32 == 256) { output_val_s32 = 255; } - // Reinterpret as Q0.7, encoded in uint8. + // Reinterpret as Q0.7, encoded in uint8_t. TFLITE_DCHECK_GE(output_val_s32, 0); TFLITE_DCHECK_LE(output_val_s32, 255); - output_val = static_cast(output_val_s32); + output_val = static_cast(output_val_s32); } output_data[i] = output_val; } From 781b6d3fda87832300070eaa320f0420850ef797 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 24 Jul 2020 10:25:30 -0700 Subject: [PATCH 1274/2522] Fixing a latent compilation error. XStat has no set_uint32_value method. This was not triggered because it was only used in the method taking a long (yet unused) and in a class template so it wouldn't be compiled if it wasn't used. PiperOrigin-RevId: 323021336 Change-Id: I78380ebbaf7fcbb2e79adc61fa89749f74634b04 --- tensorflow/core/profiler/utils/xplane_builder.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/profiler/utils/xplane_builder.h b/tensorflow/core/profiler/utils/xplane_builder.h index 10b3727876f..ded2b353c2c 100644 --- a/tensorflow/core/profiler/utils/xplane_builder.h +++ b/tensorflow/core/profiler/utils/xplane_builder.h @@ -46,11 +46,7 @@ class XStatsBuilder { } void AddStatValue(const XStatMetadata& metadata, unsigned long value) { // NOLINT - if (sizeof(unsigned long) == 8) { // NOLINT - AddStat(metadata)->set_uint64_value(value); - } else { - AddStat(metadata)->set_uint32_value(value); - } + AddStat(metadata)->set_uint64_value(value); } void AddStatValue(const XStatMetadata& metadata, unsigned long long value) { // NOLINT @@ -60,11 +56,7 @@ class XStatsBuilder { AddStat(metadata)->set_int64_value(value); } void AddStatValue(const XStatMetadata& metadata, long value) { // NOLINT - if (sizeof(long) == 8) { // NOLINT - AddStat(metadata)->set_int64_value(value); - } else { - AddStat(metadata)->set_int32_value(value); - } + AddStat(metadata)->set_int64_value(value); } void AddStatValue(const XStatMetadata& metadata, long long value) { // NOLINT AddStat(metadata)->set_int64_value(value); From 0cc0e54bc4c33b10970f1e6079d3b866d7c81774 Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Fri, 24 Jul 2020 10:31:03 -0700 Subject: [PATCH 1275/2522] Update `run_deprecated_v1` test with graph scope. PiperOrigin-RevId: 323022464 Change-Id: I189339ea422a4804686a2135f67f73ad6ac7d89e --- .../feature_column/dense_features_v2_test.py | 155 +++++++++--------- 1 file changed, 77 insertions(+), 78 deletions(-) diff --git a/tensorflow/python/keras/feature_column/dense_features_v2_test.py b/tensorflow/python/keras/feature_column/dense_features_v2_test.py index fd915cc722e..bb2ce657c46 100644 --- a/tensorflow/python/keras/feature_column/dense_features_v2_test.py +++ b/tensorflow/python/keras/feature_column/dense_features_v2_test.py @@ -29,7 +29,6 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor -from tensorflow.python.framework import test_util from tensorflow.python.keras import combinations from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras.feature_column import dense_features_v2 as df @@ -436,18 +435,18 @@ class DenseFeaturesTest(keras_parameterized.TestCase): expected_var_names, [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) - @test_util.run_deprecated_v1 def test_multiple_layers_with_same_shared_embedding_column(self): categorical_column_a = fc.categorical_column_with_identity( key='aaa', num_buckets=3) categorical_column_b = fc.categorical_column_with_identity( key='bbb', num_buckets=3) embedding_dimension = 2 - embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2( - [categorical_column_b, categorical_column_a], - dimension=embedding_dimension) + # feature_column.shared_embeddings is not supported in eager. with ops.Graph().as_default(): + embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2( + [categorical_column_b, categorical_column_a], + dimension=embedding_dimension) features = { 'aaa': sparse_tensor.SparseTensor( @@ -470,19 +469,19 @@ class DenseFeaturesTest(keras_parameterized.TestCase): ['aaa_bbb_shared_embedding:0'], [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) - @test_util.run_deprecated_v1 def test_multiple_layers_with_same_shared_embedding_column_diff_graphs(self): categorical_column_a = fc.categorical_column_with_identity( key='aaa', num_buckets=3) categorical_column_b = fc.categorical_column_with_identity( key='bbb', num_buckets=3) embedding_dimension = 2 - embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2( - [categorical_column_b, categorical_column_a], - dimension=embedding_dimension) - all_cols = [embedding_column_a, embedding_column_b] + # feature_column.shared_embeddings is not supported in eager. with ops.Graph().as_default(): + embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2( + [categorical_column_b, categorical_column_a], + dimension=embedding_dimension) + all_cols = [embedding_column_a, embedding_column_b] features = { 'aaa': sparse_tensor.SparseTensor( @@ -522,7 +521,6 @@ class DenseFeaturesTest(keras_parameterized.TestCase): ['aaa_bbb_shared_embedding:0'], [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) - @test_util.run_deprecated_v1 def test_with_1d_sparse_tensor(self): embedding_values = ( (1., 2., 3., 4., 5.), # id 0 @@ -548,38 +546,38 @@ class DenseFeaturesTest(keras_parameterized.TestCase): embedded_country = fc.embedding_column( country, dimension=5, initializer=_initializer) - # Provides 1-dim tensor and dense tensor. - features = { - 'price': - constant_op.constant([ - 11., - 12., - ]), - 'body-style': - sparse_tensor.SparseTensor( - indices=((0,), (1,)), - values=('sedan', 'hardtop'), - dense_shape=(2,)), - # This is dense tensor for the categorical_column. - 'country': - constant_op.constant(['CA', 'US']), - } - self.assertEqual(1, features['price'].shape.ndims) - self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0]) - self.assertEqual(1, features['country'].shape.ndims) + with ops.Graph().as_default(): + # Provides 1-dim tensor and dense tensor. + features = { + 'price': + constant_op.constant([ + 11., + 12., + ]), + 'body-style': + sparse_tensor.SparseTensor( + indices=((0,), (1,)), + values=('sedan', 'hardtop'), + dense_shape=(2,)), + # This is dense tensor for the categorical_column. + 'country': + constant_op.constant(['CA', 'US']), + } + self.assertEqual(1, features['price'].shape.ndims) + self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0]) + self.assertEqual(1, features['country'].shape.ndims) - net = df.DenseFeatures([price, one_hot_body_style, embedded_country])( - features) - self.assertEqual(1 + 3 + 5, net.shape[1]) - with _initialized_session() as sess: + net = df.DenseFeatures([price, one_hot_body_style, embedded_country])( + features) + self.assertEqual(1 + 3 + 5, net.shape[1]) + with _initialized_session() as sess: - # Each row is formed by concatenating `embedded_body_style`, - # `one_hot_body_style`, and `price` in order. - self.assertAllEqual([[0., 0., 1., 11., 12., 13., 14., 15., 11.], - [1., 0., 0., 1., 2., 3., 4., 5., 12.]], - sess.run(net)) + # Each row is formed by concatenating `embedded_body_style`, + # `one_hot_body_style`, and `price` in order. + self.assertAllEqual([[0., 0., 1., 11., 12., 13., 14., 15., 11.], + [1., 0., 0., 1., 2., 3., 4., 5., 12.]], + sess.run(net)) - @test_util.run_deprecated_v1 def test_with_1d_unknown_shape_sparse_tensor(self): embedding_values = ( (1., 2.), # id 0 @@ -606,39 +604,39 @@ class DenseFeaturesTest(keras_parameterized.TestCase): country, dimension=2, initializer=_initializer) # Provides 1-dim tensor and dense tensor. - features = { - 'price': array_ops.placeholder(dtypes.float32), - 'body-style': array_ops.sparse_placeholder(dtypes.string), - # This is dense tensor for the categorical_column. - 'country': array_ops.placeholder(dtypes.string), - } - self.assertIsNone(features['price'].shape.ndims) - self.assertIsNone(features['body-style'].get_shape().ndims) - self.assertIsNone(features['country'].shape.ndims) + with ops.Graph().as_default(): + features = { + 'price': array_ops.placeholder(dtypes.float32), + 'body-style': array_ops.sparse_placeholder(dtypes.string), + # This is dense tensor for the categorical_column. + 'country': array_ops.placeholder(dtypes.string), + } + self.assertIsNone(features['price'].shape.ndims) + self.assertIsNone(features['body-style'].get_shape().ndims) + self.assertIsNone(features['country'].shape.ndims) - price_data = np.array([11., 12.]) - body_style_data = sparse_tensor.SparseTensorValue( - indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,)) - country_data = np.array([['US'], ['CA']]) + price_data = np.array([11., 12.]) + body_style_data = sparse_tensor.SparseTensorValue( + indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,)) + country_data = np.array([['US'], ['CA']]) - net = df.DenseFeatures([price, one_hot_body_style, embedded_country])( - features) - self.assertEqual(1 + 3 + 2, net.shape[1]) - with _initialized_session() as sess: + net = df.DenseFeatures([price, one_hot_body_style, embedded_country])( + features) + self.assertEqual(1 + 3 + 2, net.shape[1]) + with _initialized_session() as sess: - # Each row is formed by concatenating `embedded_body_style`, - # `one_hot_body_style`, and `price` in order. - self.assertAllEqual( - [[0., 0., 1., 1., 2., 11.], [1., 0., 0., 11., 12., 12.]], - sess.run( - net, - feed_dict={ - features['price']: price_data, - features['body-style']: body_style_data, - features['country']: country_data - })) + # Each row is formed by concatenating `embedded_body_style`, + # `one_hot_body_style`, and `price` in order. + self.assertAllEqual( + [[0., 0., 1., 1., 2., 11.], [1., 0., 0., 11., 12., 12.]], + sess.run( + net, + feed_dict={ + features['price']: price_data, + features['body-style']: body_style_data, + features['country']: country_data + })) - @test_util.run_deprecated_v1 def test_with_rank_0_feature(self): # price has 1 dimension in dense_features price = fc.numeric_column('price') @@ -651,15 +649,16 @@ class DenseFeaturesTest(keras_parameterized.TestCase): with self.assertRaisesRegex(ValueError, 'Feature .* cannot have rank 0'): df.DenseFeatures([price])(features) - # Dynamic rank 0 should fail - features = { - 'price': array_ops.placeholder(dtypes.float32), - } - net = df.DenseFeatures([price])(features) - self.assertEqual(1, net.shape[1]) - with _initialized_session() as sess: - with self.assertRaisesOpError('Feature .* cannot have rank 0'): - sess.run(net, feed_dict={features['price']: np.array(1)}) + with ops.Graph().as_default(): + # Dynamic rank 0 should fail + features = { + 'price': array_ops.placeholder(dtypes.float32), + } + net = df.DenseFeatures([price])(features) + self.assertEqual(1, net.shape[1]) + with _initialized_session() as sess: + with self.assertRaisesOpError('Feature .* cannot have rank 0'): + sess.run(net, feed_dict={features['price']: np.array(1)}) if __name__ == '__main__': From 0720e701a36521e2e5299465934edf6f51d52d7d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 24 Jul 2020 10:34:03 -0700 Subject: [PATCH 1276/2522] Integrate LLVM at llvm/llvm-project@f7ffb122d08e Updates LLVM usage to match f7ffb122d08e PiperOrigin-RevId: 323023124 Change-Id: Ib67dbb26d704d9d012c71444676accf48ccdfaa9 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index e971df0330d..580bcc73265 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -711,8 +711,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "9d2da6759b4d05d834371bcaaa8fc3d9b3385b18" - LLVM_SHA256 = "e432ea63141c6c274ac71565664b267089fd58b41d052eaca6e4d7be7613e947" + LLVM_COMMIT = "f7ffb122d08e7a8203557898c67eaac3a857b152" + LLVM_SHA256 = "386d0f7c69f7ac341157a85f3cb1bd45f0c04f82b6bc8f65ca055cf382fd0424" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 3edbef66473c73433203b6162deea07aec7b5070 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 24 Jul 2020 10:37:52 -0700 Subject: [PATCH 1277/2522] GPU tracking all-reduce like collectives. saved it to step_db.proto. PiperOrigin-RevId: 323023936 Change-Id: I84102c0ab3b82875a31cf9cb4236f9624a3b4682 --- tensorflow/core/profiler/convert/BUILD | 1 + .../op_stats_to_input_pipeline_analysis.cc | 3 +- .../convert/step_events_to_steps_db.cc | 11 ++- .../profiler/convert/xplane_to_step_events.cc | 20 +++-- tensorflow/core/profiler/utils/BUILD | 1 + tensorflow/core/profiler/utils/event_span.cc | 17 ++++ tensorflow/core/profiler/utils/event_span.h | 84 +++++++++++-------- 7 files changed, 94 insertions(+), 43 deletions(-) diff --git a/tensorflow/core/profiler/convert/BUILD b/tensorflow/core/profiler/convert/BUILD index e08eec0fced..0c0690ac1c7 100644 --- a/tensorflow/core/profiler/convert/BUILD +++ b/tensorflow/core/profiler/convert/BUILD @@ -339,6 +339,7 @@ cc_library( hdrs = ["xplane_to_step_events.h"], deps = [ "//tensorflow/core:lib", + "//tensorflow/core/profiler/protobuf:steps_db_proto_cc", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", "//tensorflow/core/profiler/utils:event_span", "//tensorflow/core/profiler/utils:tf_xplane_visitor", diff --git a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc index 7dafbc69af1..37749d320e5 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc @@ -208,7 +208,8 @@ InputPipelineAnalysisResult ComputeGenericInputPipelineAnalysisResult( GetTimeInMs(type_ps, DEVICE_WAIT_HOST)); details.set_output_ms(GetTimeInMs(type_ps, DEVICE_TO_HOST)); details.set_device_compute_ms(GetTimeInMs(type_ps, DEVICE_COMPUTE_16) + - GetTimeInMs(type_ps, DEVICE_COMPUTE_32)); + GetTimeInMs(type_ps, DEVICE_COMPUTE_32) + + GetTimeInMs(type_ps, DEVICE_COLLECTIVES)); details.set_device_to_device_ms(GetTimeInMs(type_ps, DEVICE_TO_DEVICE) + GetTimeInMs(type_ps, DEVICE_WAIT_DEVICE)); details.set_host_compute_ms(GetTimeInMs(type_ps, HOST_COMPUTE)); diff --git a/tensorflow/core/profiler/convert/step_events_to_steps_db.cc b/tensorflow/core/profiler/convert/step_events_to_steps_db.cc index e4713cd73fb..6841929dea7 100644 --- a/tensorflow/core/profiler/convert/step_events_to_steps_db.cc +++ b/tensorflow/core/profiler/convert/step_events_to_steps_db.cc @@ -119,10 +119,10 @@ StepDatabaseResult ConvertStepEventsToStepDb( } absl::c_sort(step_numbers); for (const auto& step : step_numbers) { - const auto* events = gtl::FindOrNull(nonoverlapped_step_events, step); - if (events == nullptr) continue; + const auto* step_details = gtl::FindOrNull(nonoverlapped_step_events, step); + if (step_details == nullptr) continue; StepInfoResult step_info = - ConvertStepDetailsToStepInfo(has_device, step, *events); + ConvertStepDetailsToStepInfo(has_device, step, *step_details); if (step_info.duration_ps() == 0) continue; // Do not include non-well-formed steps. PerCoreStepInfo per_core_step_info; @@ -137,6 +137,11 @@ StepDatabaseResult ConvertStepEventsToStepDb( << DebugStepInfo(( *per_core_step_info .mutable_step_info_per_core())[kDefaultGpuLocalCoreId]); + // Populates the collective ops information. + auto& collectives = *per_core_step_info.mutable_all_reduce_db_per_core(); + for (const auto& it : step_details->Collectives()) { + collectives[it.first] = it.second; + } // The remaining fields in PerCoreStepInfo are not filled. *step_db.add_step_sequence() = per_core_step_info; } diff --git a/tensorflow/core/profiler/convert/xplane_to_step_events.cc b/tensorflow/core/profiler/convert/xplane_to_step_events.cc index 00da02c8116..1d80d308193 100644 --- a/tensorflow/core/profiler/convert/xplane_to_step_events.cc +++ b/tensorflow/core/profiler/convert/xplane_to_step_events.cc @@ -20,6 +20,7 @@ limitations under the License. #include "absl/strings/string_view.h" #include "absl/types/optional.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/protobuf/steps_db.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/profiler/utils/event_span.h" #include "tensorflow/core/profiler/utils/tf_xplane_visitor.h" @@ -126,7 +127,8 @@ StepEvents ConvertDeviceStepInfoToStepMarkers(const XLineVisitor& line) { return result; } -StepEvents ConvertDeviceTraceXLineToStepEvents(const XLineVisitor& line) { +StepEvents ConvertDeviceTraceXLineToStepEvents(const uint64 device_id, + const XLineVisitor& line) { StepEvents result; line.ForEachEvent([&](const XEventVisitor& event) { int64 correlation_id = -1; @@ -148,10 +150,17 @@ StepEvents ConvertDeviceTraceXLineToStepEvents(const XLineVisitor& line) { }); if (correlation_id >= 0 && group_id >= 0) { - EventTypeSpan event_type_span( - ClassifyGpuEvent(event.Name(), tensor_shapes), - Timespan(event.TimestampPs(), event.DurationPs())); + EventType event_type = ClassifyGpuEvent(event.Name(), tensor_shapes); + EventTypeSpan event_type_span(event_type, event.GetTimespan()); result[group_id].AddEvent(event_type_span); + if (event_type == DEVICE_COLLECTIVES) { + AllReduceInfo collective_ops; + collective_ops.set_name(string(event.Name())); + collective_ops.set_start_time_ps(event.TimestampPs()); + collective_ops.set_end_time_ps(event.EndOffsetPs()); + // TODO(jiesun): figure out how to get size info etc. + result[group_id].AddCollectiveOpEvent(device_id, collective_ops); + } } }); return result; @@ -167,7 +176,8 @@ StepEvents ConvertDeviceTraceXPlaneToStepEvents(const XPlane& device_trace) { } else if (IsDerivedThreadId(line_id)) { return; } else { - CombineStepEvents(ConvertDeviceTraceXLineToStepEvents(line), &result); + CombineStepEvents(ConvertDeviceTraceXLineToStepEvents(plane.Id(), line), + &result); } }); return result; diff --git a/tensorflow/core/profiler/utils/BUILD b/tensorflow/core/profiler/utils/BUILD index 92a87e2228c..89c416108a5 100644 --- a/tensorflow/core/profiler/utils/BUILD +++ b/tensorflow/core/profiler/utils/BUILD @@ -34,6 +34,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc", + "//tensorflow/core/profiler/protobuf:steps_db_proto_cc", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", diff --git a/tensorflow/core/profiler/utils/event_span.cc b/tensorflow/core/profiler/utils/event_span.cc index 9a0f65941b2..f25c0809a06 100644 --- a/tensorflow/core/profiler/utils/event_span.cc +++ b/tensorflow/core/profiler/utils/event_span.cc @@ -141,6 +141,7 @@ std::vector ToNonOverlappedEvents( void CombineStepDetails(const StepDetails& src, StepDetails* dst) { dst->AppendMarkers(src.Markers()); dst->AppendEvents(src.Events()); + dst->AppendCollectives(src.Collectives()); } EventType ClassifyDeviceCompute(absl::string_view event_name, @@ -171,6 +172,9 @@ EventType ClassifyGpuEvent(absl::string_view event_name, return DEVICE_TO_HOST; if (absl::StartsWithIgnoreCase(event_name, "MEMCPYDtoD")) return DEVICE_TO_DEVICE; + if (absl::StartsWithIgnoreCase(event_name, "nccl")) { + return DEVICE_COLLECTIVES; + } return ClassifyDeviceCompute(event_name, tensor_shapes); } @@ -283,6 +287,8 @@ StepEvents ToNonOverlappedStepEvents(const StepEvents& overlapped_step_events) { step_details.Markers(); *non_overlapped_step_events[step_id].MutableEvents() = ToNonOverlappedEvents(step_details.Events()); + *non_overlapped_step_events[step_id].MutableCollectives() = + step_details.Collectives(); } return non_overlapped_step_events; } @@ -299,6 +305,17 @@ void StepDetails::AppendEvents(const std::vector& other_events) { events_.insert(events_.end(), other_events.begin(), other_events.end()); } +void StepDetails::AppendCollectives( + const absl::flat_hash_map& collectives) { + for (const auto& it : collectives) { + collectives_[it.first] = it.second; + } +} + +void StepDetails::AddCollectiveOpEvent(uint64 core_id, const AllReduceInfo& e) { + *collectives_[core_id].add_all_reduce_info() = e; +} + Timespan StepDetails::StepTime() const { Timespan max_host_step_time; Timespan max_device_step_time; diff --git a/tensorflow/core/profiler/utils/event_span.h b/tensorflow/core/profiler/utils/event_span.h index 1adc6a75d82..b1f325b08e2 100644 --- a/tensorflow/core/profiler/utils/event_span.h +++ b/tensorflow/core/profiler/utils/event_span.h @@ -23,6 +23,7 @@ limitations under the License. #include "absl/strings/string_view.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" +#include "tensorflow/core/profiler/protobuf/steps_db.pb.h" #include "tensorflow/core/profiler/utils/timespan.h" namespace tensorflow { @@ -36,29 +37,31 @@ enum EventType { // executing some events which were not traced. UNKNOWN_TIME = 0, // Host is computing. - HOST_COMPUTE = 1, + HOST_COMPUTE = 10, // Host is compiling. - HOST_COMPILE = 2, + HOST_COMPILE = 20, // Host-to-host communication. - HOST_TO_HOST = 3, + HOST_TO_HOST = 30, // Host-to-device communication. - HOST_TO_DEVICE = 4, + HOST_TO_DEVICE = 40, // Host is preparing to launch a computation on device. - HOST_PREPARE = 5, + HOST_PREPARE = 50, // Host is waiting for input. - HOST_WAIT_INPUT = 6, + HOST_WAIT_INPUT = 60, // Device-to-device communication. - DEVICE_TO_DEVICE = 7, + DEVICE_TO_DEVICE = 70, // Device-to-host communication. - DEVICE_TO_HOST = 8, + DEVICE_TO_HOST = 80, + // Collective Ops such as All-Reduce. + DEVICE_COLLECTIVES = 90, // Device is computing with 32-bit precision. - DEVICE_COMPUTE_32 = 9, + DEVICE_COMPUTE_32 = 100, // Device is computing with 16-bit precision. - DEVICE_COMPUTE_16 = 10, + DEVICE_COMPUTE_16 = 110, // Device is waiting for another device. - DEVICE_WAIT_DEVICE = 11, + DEVICE_WAIT_DEVICE = 120, // Device is waiting for host. - DEVICE_WAIT_HOST = 12, + DEVICE_WAIT_HOST = 130, LAST_EVENT_TYPE = DEVICE_WAIT_HOST }; @@ -108,6 +111,39 @@ struct StepMarker { // Details of a step. Note that this could be the result of combining the // StepDetails of the same step executed on different cores. class StepDetails { + public: + const std::vector& Markers() const { return markers_; } + const std::vector& Events() const { return events_; } + const absl::flat_hash_map& Collectives() const { + return collectives_; + } + // Returns the step time. + Timespan StepTime() const; + std::vector* MutableMarkers() { return &markers_; } + std::vector* MutableEvents() { return &events_; } + absl::flat_hash_map* MutableCollectives() { + return &collectives_; + } + // Adds a step-marker to this step. + void AddMarker(const StepMarker& m); + // Adds an EventTypeSpan to this step. + void AddEvent(const EventTypeSpan& e); + // Adds a collective op to this step. + void AddCollectiveOpEvent(uint64 core_id, const AllReduceInfo& e); + // Appends the step-markers from another step to this step. + void AppendMarkers(const std::vector& other_markers); + // Appends the events from another step to this step. + void AppendEvents(const std::vector& other_events); + // Appends the collectives from another step to this step. + void AppendCollectives( + const absl::flat_hash_map& collectives); + // Equality test. + bool operator==(const StepDetails& other) const; + // Inequality test. + bool operator!=(const StepDetails& other) const { return !(*this == other); } + // Returns a string that prints the content of this object. + std::string DebugString() const; + private: // All step-markers found for marking this step in the traces. There could be // multiple step-markers for a single step for different reasons. One such @@ -117,28 +153,8 @@ class StepDetails { std::vector markers_; // All events belonging to this step. std::vector events_; - - public: - const std::vector& Markers() const { return markers_; } - const std::vector& Events() const { return events_; } - // Returns the step time. - Timespan StepTime() const; - std::vector* MutableMarkers() { return &markers_; } - std::vector* MutableEvents() { return &events_; } - // Adds a step-marker to this step. - void AddMarker(const StepMarker& m); - // Adds an EventTypeSpan to this step. - void AddEvent(const EventTypeSpan& e); - // Appends the step-markers from another step to this step. - void AppendMarkers(const std::vector& other_markers); - // Appends the events from another step to this step. - void AppendEvents(const std::vector& other_events); - // Equality test. - bool operator==(const StepDetails& other) const; - // Inequality test. - bool operator!=(const StepDetails& other) const { return !(*this == other); } - // Returns a string that prints the content of this object. - std::string DebugString() const; + // Collective operation related events such as all-reduce etc. + absl::flat_hash_map collectives_; }; // Map from step_id to the events happened in that step. From a63c4d60cf5fb6a32b7674c216bbfef92ea0b43c Mon Sep 17 00:00:00 2001 From: Jose Baiocchi Date: Fri, 24 Jul 2020 10:47:12 -0700 Subject: [PATCH 1278/2522] Use TraceMeOp and TraceMeEncode to hide TraceString encoding details PiperOrigin-RevId: 323025848 Change-Id: I6f491180763d5d87a2e53296b460d0e41e4b0ac7 --- .../base_collective_executor.cc | 9 +++--- .../common_runtime/eager/kernel_and_device.cc | 2 +- tensorflow/core/common_runtime/executor.cc | 8 ++--- tensorflow/core/framework/dataset.cc | 5 +-- tensorflow/core/framework/dataset.h | 2 +- tensorflow/core/framework/op_kernel.cc | 31 +++++++++++-------- tensorflow/core/framework/op_kernel.h | 8 ++--- tensorflow/core/framework/op_kernel_test.cc | 2 +- tensorflow/core/kernels/BUILD | 5 ++- tensorflow/core/kernels/einsum_op_impl.h | 19 +++++++----- tensorflow/core/kernels/function_ops.cc | 19 +++++++----- tensorflow/core/kernels/function_ops.h | 2 +- tensorflow/core/kernels/sendrecv_ops.cc | 13 ++++---- tensorflow/core/kernels/sendrecv_ops.h | 4 +-- tensorflow/core/profiler/lib/traceme_encode.h | 11 +++++++ 15 files changed, 84 insertions(+), 56 deletions(-) diff --git a/tensorflow/core/common_runtime/base_collective_executor.cc b/tensorflow/core/common_runtime/base_collective_executor.cc index ff9c67f0eb0..85d911da24f 100644 --- a/tensorflow/core/common_runtime/base_collective_executor.cc +++ b/tensorflow/core/common_runtime/base_collective_executor.cc @@ -285,10 +285,11 @@ void BaseCollectiveExecutor::ExecuteAsync(OpKernelContext* ctx, // starve executor threads. remote_access_->RunClosure([col_impl, col_ctx, done_safe, ctx]() { profiler::TraceMe activity( - [&] { - return strings::StrCat(ctx->op_kernel().name_view(), ":", - ctx->op_kernel().type_string_view(), - "#id=", ctx->step_id(), "#"); + [ctx] { + string op = profiler::TraceMeOp(ctx->op_kernel().name_view(), + ctx->op_kernel().type_string_view()); + return profiler::TraceMeEncode(std::move(op), + {{"id", ctx->step_id()}}); }, profiler::TraceMeLevel::kInfo); col_impl->Run([col_impl, col_ctx, done_safe](const Status& s) { diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.cc b/tensorflow/core/common_runtime/eager/kernel_and_device.cc index f47db8eea55..1f506c318bc 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device.cc +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc @@ -296,7 +296,7 @@ Status KernelAndDeviceOp::Run( // 'AnnotatedTraceMe' will trace both scheduling time on host and execution // time on device of the OpKernel. profiler::AnnotatedTraceMe activity( - [&] { return kernel_->TraceString(&context, /*verbose=*/false); }, + [&] { return kernel_->TraceString(context, /*verbose=*/false); }, profiler::TraceMeLevel::kInfo); device_->Compute(kernel_.get(), &context); } diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index d441235ff71..648189ca1ee 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -530,9 +530,9 @@ Status ExecutorState::ProcessSync( tracing::ScopedRegion region(tracing::EventCategory::kCompute, op_kernel->name_view()); profiler::AnnotatedTraceMe activity( - [&] { + [op_kernel, &ctx] { return op_kernel->TraceString( - &ctx, /*verbose=*/profiler::TfOpDetailsEnabled()); + ctx, /*verbose=*/profiler::TfOpDetailsEnabled()); }, profiler::GetTFTraceMeLevel(is_expensive)); device->Compute(op_kernel, &ctx); @@ -597,9 +597,9 @@ void ExecutorState::ProcessAsync( nodestats::SetOpStart(stats); { profiler::AnnotatedTraceMe activity( - [&] { + [async_kernel, state] { return async_kernel->TraceString( - &state->ctx, /*verbose=*/profiler::TfOpDetailsEnabled()); + state->ctx, /*verbose=*/profiler::TfOpDetailsEnabled()); }, profiler::GetTFTraceMeLevel(kernel_stats_->IsExpensive(item))); immutable_state_.params().device->ComputeAsync(async_kernel, &state->ctx, diff --git a/tensorflow/core/framework/dataset.cc b/tensorflow/core/framework/dataset.cc index 65b94ad5ae1..a4c96fe5b1f 100644 --- a/tensorflow/core/framework/dataset.cc +++ b/tensorflow/core/framework/dataset.cc @@ -524,8 +524,9 @@ void DatasetOpKernel::Compute(OpKernelContext* ctx) { } } -string DatasetOpKernel::TraceString(OpKernelContext* ctx, bool verbose) { - return strings::StrCat(name_view(), ":", type_string_view()); +string DatasetOpKernel::TraceString(const OpKernelContext& ctx, + bool verbose) const { + return profiler::TraceMeOp(name_view(), type_string_view()); } // static diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h index 860eb7419e7..d7f8cd752ca 100644 --- a/tensorflow/core/framework/dataset.h +++ b/tensorflow/core/framework/dataset.h @@ -1063,7 +1063,7 @@ class DatasetOpKernel : public OpKernel { // the `DatasetOpKernel` class. static bool IsDatasetOp(const OpDef* op_def); - string TraceString(OpKernelContext* ctx, bool verbose) override; + string TraceString(const OpKernelContext& ctx, bool verbose) const override; protected: // Subclasses should implement this method. It will be called during Compute diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc index e6d2f2c8ea4..1930cc98da1 100644 --- a/tensorflow/core/framework/op_kernel.cc +++ b/tensorflow/core/framework/op_kernel.cc @@ -53,6 +53,7 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/platform_strings.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/lib/traceme.h" #include "tensorflow/core/util/ptr_util.h" namespace tensorflow { @@ -172,34 +173,38 @@ Status OpKernel::OutputRange(StringPiece output_name, int* start, } } -string OpKernel::GetTraceArgument(OpKernelContext* ctx) { - int num_inputs = ctx->num_inputs(); +string OpKernel::ShapeTraceString(const OpKernelContext& ctx) const { + int num_inputs = ctx.num_inputs(); if (num_inputs == 0) return ""; std::vector tensor_shapes; tensor_shapes.reserve(num_inputs); for (int i = 0; i < num_inputs; i++) { - if (!ctx->has_input(i)) { + if (!ctx.has_input(i)) { tensor_shapes.emplace_back(); // Placeholder continue; } - DataType input_dtype = ctx->input_dtype(i); + DataType input_dtype = ctx.input_dtype(i); if (input_dtype == DataType::DT_RESOURCE || input_dtype == DataType::DT_VARIANT || IsRefType(input_dtype)) { tensor_shapes.emplace_back(); // Placeholder continue; } tensor_shapes.emplace_back(strings::StrCat( - DataTypeString(input_dtype), ctx->input(i).shape().DebugString())); + DataTypeString(input_dtype), ctx.input(i).shape().DebugString())); } - return strings::StrCat("shape=(", absl::StrJoin(tensor_shapes, ";"), ")"); + return strings::StrCat("(", absl::StrJoin(tensor_shapes, ";"), ")"); } -string OpKernel::TraceString(OpKernelContext* ctx, bool verbose) { - string trace_string = strings::StrCat(name_view(), ":", type_string_view()); - if (!verbose) return trace_string; - string trace_args = GetTraceArgument(ctx); - if (trace_args.empty()) return trace_string; - return strings::StrCat(trace_string, "#", trace_args, "#"); +string OpKernel::TraceString(const OpKernelContext& ctx, bool verbose) const { + string trace_string = profiler::TraceMeOp(name_view(), type_string_view()); + if (verbose) { + string shape = ShapeTraceString(ctx); + if (!shape.empty()) { + trace_string = + profiler::TraceMeEncode(std::move(trace_string), {{"shape", shape}}); + } + } + return trace_string; } void AsyncOpKernel::Compute(OpKernelContext* context) { @@ -413,7 +418,7 @@ Status OpKernelContext::input_ref_mutex(StringPiece name, mutex** out_mutex) { return Status::OK(); } -const Tensor& OpKernelContext::input(int index) { +const Tensor& OpKernelContext::input(int index) const { CHECK_GE(index, 0); CHECK_LT(index, num_inputs()) << " name: " << op_kernel().name(); CHECK(!input_is_ref(index)); diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h index a4ada3303d3..4638382fe75 100644 --- a/tensorflow/core/framework/op_kernel.h +++ b/tensorflow/core/framework/op_kernel.h @@ -177,12 +177,10 @@ class OpKernel { // Returns a trace string for current computation, op name/type and input // tensor shape/dtype are encoded for profiler cost analysis. Most OpKernel // should use the default implementation. - // Override this function to add OpKernel specific attributes that are - // necessary for cost analysis. - virtual string TraceString(OpKernelContext* ctx, bool verbose); + virtual string TraceString(const OpKernelContext& ctx, bool verbose) const; protected: - string GetTraceArgument(OpKernelContext* ctx); + string ShapeTraceString(const OpKernelContext& ctx) const; private: const std::shared_ptr props_; @@ -734,7 +732,7 @@ class OpKernelContext { // inputs. For Ref inputs use mutable_input below. // REQUIRES: !IsRefType(input_dtype(index)) // TODO(mrry): Convert this to return Status. - const Tensor& input(int index); + const Tensor& input(int index) const; // Returns the named immutable input tensor in "tensor", as defined // in the OpDef. May only be used for non-Ref inputs. For Ref inputs diff --git a/tensorflow/core/framework/op_kernel_test.cc b/tensorflow/core/framework/op_kernel_test.cc index 3c915d13fdc..186f36ccae6 100644 --- a/tensorflow/core/framework/op_kernel_test.cc +++ b/tensorflow/core/framework/op_kernel_test.cc @@ -1105,7 +1105,7 @@ void BM_TraceString(const int iters, const int verbose) { testing::StartTiming(); for (int i = 0; i < iters; ++i) { - auto trace = op->TraceString(ctx.get(), verbose); + auto trace = op->TraceString(*ctx, verbose); } testing::StopTiming(); } diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index a58f598d322..f98053c7d4f 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -3824,6 +3824,7 @@ tf_kernel_library( ":transpose_functor", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core/profiler/lib:traceme", "//third_party/eigen3", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", @@ -5341,7 +5342,9 @@ tf_kernel_library( tf_kernel_library( name = "sendrecv_ops", prefix = "sendrecv_ops", - deps = REQUIRED_DEPS, + deps = REQUIRED_DEPS + [ + "//tensorflow/core/profiler/lib:traceme", + ], ) tf_cc_test( diff --git a/tensorflow/core/kernels/einsum_op_impl.h b/tensorflow/core/kernels/einsum_op_impl.h index 620a144e886..312738442b8 100644 --- a/tensorflow/core/kernels/einsum_op_impl.h +++ b/tensorflow/core/kernels/einsum_op_impl.h @@ -40,6 +40,7 @@ limitations under the License. #include "tensorflow/core/lib/gtl/inlined_vector.h" #include "tensorflow/core/lib/math/math_util.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/profiler/lib/traceme.h" #include "tensorflow/core/util/einsum_op_util.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -715,15 +716,17 @@ class EinsumOp : public OpKernel { ctx->set_output(0, output); } - string TraceString(OpKernelContext* ctx, bool verbose) override { - if (!verbose) { - return strings::StrCat(name_view(), ":", type_string_view(), - "#equation=(", equation_, ")#"); - } else { - string trace_args = GetTraceArgument(ctx); - return strings::StrCat(name_view(), ":", type_string_view(), - "#equation=(", equation_, "),", trace_args, "#"); + string TraceString(const OpKernelContext& ctx, bool verbose) const override { + string op = profiler::TraceMeOp(name_view(), type_string_view()); + string equation = strings::StrCat("(", equation_, ")"); + if (verbose) { + string shape = ShapeTraceString(ctx); + if (!shape.empty()) { + return profiler::TraceMeEncode( + std::move(op), {{"equation", equation}, {"shape", shape}}); + } } + return profiler::TraceMeEncode(std::move(op), {{"equation", equation}}); } private: diff --git a/tensorflow/core/kernels/function_ops.cc b/tensorflow/core/kernels/function_ops.cc index d69292082bc..52a11e0870d 100644 --- a/tensorflow/core/kernels/function_ops.cc +++ b/tensorflow/core/kernels/function_ops.cc @@ -441,13 +441,18 @@ void RemoteCallOp::ComputeAsync(OpKernelContext* ctx, DoneCallback done) { }); } -string RemoteCallOp::TraceString(OpKernelContext* ctx, bool verbose) { - string trace_string = - strings::StrCat(name_view(), "__", func_.name(), ":", type_string_view()); - if (!verbose) return trace_string; - string trace_args = GetTraceArgument(ctx); - if (trace_args.empty()) return trace_string; - return strings::StrCat(trace_string, "#", trace_args, "#"); +string RemoteCallOp::TraceString(const OpKernelContext& ctx, + bool verbose) const { + string trace_string = profiler::TraceMeOp( + strings::StrCat(name_view(), "__", func_.name()), type_string_view()); + if (verbose) { + string shape = ShapeTraceString(ctx); + if (!shape.empty()) { + trace_string = + profiler::TraceMeEncode(std::move(trace_string), {{"shape", shape}}); + } + } + return trace_string; } REGISTER_KERNEL_BUILDER( diff --git a/tensorflow/core/kernels/function_ops.h b/tensorflow/core/kernels/function_ops.h index 69e34c5962d..4fad8c4a3f0 100644 --- a/tensorflow/core/kernels/function_ops.h +++ b/tensorflow/core/kernels/function_ops.h @@ -64,7 +64,7 @@ class RemoteCallOp : public AsyncOpKernel { void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override; - string TraceString(OpKernelContext* ctx, bool verbose) override; + string TraceString(const OpKernelContext& ctx, bool verbose) const override; private: NameAttrList func_; diff --git a/tensorflow/core/kernels/sendrecv_ops.cc b/tensorflow/core/kernels/sendrecv_ops.cc index f4c4fae2910..91d15901b12 100644 --- a/tensorflow/core/kernels/sendrecv_ops.cc +++ b/tensorflow/core/kernels/sendrecv_ops.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/profiler/lib/traceme.h" namespace tensorflow { @@ -111,14 +112,14 @@ void SendOp::Compute(OpKernelContext* ctx) { } } -string SendOp::TraceString(OpKernelContext* ctx, bool verbose) { +string SendOp::TraceString(const OpKernelContext& ctx, bool verbose) const { const auto& attr = def().attr(); auto src_it = attr.find("_src"); auto dst_it = attr.find("_dst"); const string& src = src_it != attr.end() ? src_it->second.s() : ""; const string& dst = dst_it != attr.end() ? dst_it->second.s() : ""; - return strings::StrCat(name_view(), ":", type_string_view(), "#from=", src, - ",to=", dst, "#"); + string op = profiler::TraceMeOp(name_view(), type_string_view()); + return profiler::TraceMeEncode(std::move(op), {{"from", src}, {"to", dst}}); } REGISTER_KERNEL_BUILDER(Name("_Send").Device(DEVICE_CPU), SendOp); @@ -155,14 +156,14 @@ RecvOp::RecvOp(OpKernelConstruction* ctx) : AsyncOpKernel(ctx) { } } -string RecvOp::TraceString(OpKernelContext* ctx, bool verbose) { +string RecvOp::TraceString(const OpKernelContext& ctx, bool verbose) const { const auto& attr = def().attr(); auto src_it = attr.find("_src"); auto dst_it = attr.find("_dst"); const string& src = src_it != attr.end() ? src_it->second.s() : ""; const string& dst = dst_it != attr.end() ? dst_it->second.s() : ""; - return strings::StrCat(name_view(), ":", type_string_view(), "#from=", src, - ",to=", dst, "#"); + string op = profiler::TraceMeOp(name_view(), type_string_view()); + return profiler::TraceMeEncode(std::move(op), {{"from", src}, {"to", dst}}); } namespace { diff --git a/tensorflow/core/kernels/sendrecv_ops.h b/tensorflow/core/kernels/sendrecv_ops.h index 06c5663bc04..36bc22db1e7 100644 --- a/tensorflow/core/kernels/sendrecv_ops.h +++ b/tensorflow/core/kernels/sendrecv_ops.h @@ -26,7 +26,7 @@ class SendOp : public OpKernel { explicit SendOp(OpKernelConstruction* ctx); void Compute(OpKernelContext* ctx) override; - string TraceString(OpKernelContext* ctx, bool verbose) override; + string TraceString(const OpKernelContext& ctx, bool verbose) const override; private: string key_prefix_; @@ -41,7 +41,7 @@ class RecvOp : public AsyncOpKernel { explicit RecvOp(OpKernelConstruction* ctx); void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override; - string TraceString(OpKernelContext* ctx, bool verbose) override; + string TraceString(const OpKernelContext& ctx, bool verbose) const override; private: string key_prefix_; diff --git a/tensorflow/core/profiler/lib/traceme_encode.h b/tensorflow/core/profiler/lib/traceme_encode.h index 91b23740fc3..4dcd6ea469b 100644 --- a/tensorflow/core/profiler/lib/traceme_encode.h +++ b/tensorflow/core/profiler/lib/traceme_encode.h @@ -128,6 +128,17 @@ TF_ATTRIBUTE_ALWAYS_INLINE inline std::string TraceMeEncode( return traceme_internal::AppendArgs(std::string(), args); } +// Concatenates op_name and op_type. +TF_ATTRIBUTE_ALWAYS_INLINE inline std::string TraceMeOp( + absl::string_view op_name, absl::string_view op_type) { + return absl::StrCat(op_name, ":", op_type); +} +TF_ATTRIBUTE_ALWAYS_INLINE inline std::string TraceMeOp( + std::string&& op_name, absl::string_view op_type) { + absl::StrAppend(&op_name, ":", op_type); + return op_name; +} + } // namespace profiler } // namespace tensorflow From e392050297b62772fb9e6aaf10cf1214cb5261e7 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Fri, 24 Jul 2020 10:59:57 -0700 Subject: [PATCH 1279/2522] Port the floor kernel to the new TfLiteEvalTensor API. PiperOrigin-RevId: 323028754 Change-Id: I9a9c7fab2af3fef91ebe697bee44a7e5f7c90aaf --- tensorflow/lite/micro/kernels/BUILD | 1 + tensorflow/lite/micro/kernels/floor.cc | 14 +++++++----- tensorflow/lite/micro/kernels/floor_test.cc | 25 +++++++++------------ 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 5f79f7c0c62..64610aa4581 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -252,6 +252,7 @@ tflite_micro_cc_test( "floor_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:op_resolvers", "//tensorflow/lite/micro/testing:micro_test", diff --git a/tensorflow/lite/micro/kernels/floor.cc b/tensorflow/lite/micro/kernels/floor.cc index 4ef8fc599f7..b8be1cf0e73 100644 --- a/tensorflow/lite/micro/kernels/floor.cc +++ b/tensorflow/lite/micro/kernels/floor.cc @@ -17,7 +17,7 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -28,11 +28,15 @@ constexpr int kInputTensor = 0; constexpr int kOutputTensor = 0; TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteTensor* input = GetInput(context, node, kInputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - reference_ops::Floor(GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + reference_ops::Floor(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); return kTfLiteOk; } } // namespace floor diff --git a/tensorflow/lite/micro/kernels/floor_test.cc b/tensorflow/lite/micro/kernels/floor_test.cc index 2684bf3d8c9..3a27a937b17 100644 --- a/tensorflow/lite/micro/kernels/floor_test.cc +++ b/tensorflow/lite/micro/kernels/floor_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -36,26 +37,20 @@ void TestFloor(const int* input_dims_data, const float* input_data, CreateFloatTensor(input_data, input_dims), CreateFloatTensor(output_data, output_dims), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_FLOOR); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 1}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = nullptr; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + + const TfLiteRegistration registration = tflite::ops::micro::Register_FLOOR(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, /*builtin_data=*/nullptr, + micro_test::reporter); + + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); + for (int i = 0; i < output_dims_count; ++i) { TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i], 1e-5f); } From 7ac63ecb7b9be3256a5998f1462db4037c87c2b5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 24 Jul 2020 11:22:45 -0700 Subject: [PATCH 1280/2522] Fixed a bug where element-wise operations miscalculated broadcast output tensor sizes, if unavailable in the graph. PiperOrigin-RevId: 323034133 Change-Id: I424e32a5aed3713f16c9c05cc5ae9b7782077fb1 --- .../grappler/costs/op_level_cost_estimator.cc | 69 ++++++++++--------- .../costs/op_level_cost_estimator_test.cc | 21 +++++- 2 files changed, 54 insertions(+), 36 deletions(-) diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index fb0d6ecf1d0..9d9cb578b98 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -173,25 +173,33 @@ int64 GetOutputSize(const int64 input, const int64 filter, const int64 stride, } } -// Return the output element count of a binary element-wise op considering +// Return the output element count of a multi-input element-wise op considering // broadcasting. -int64 CwiseOutputElementCount(const TensorShapeProto& input_shape_1, - const TensorShapeProto& input_shape_2) { - bool found_unknown_shapes; - int rank = std::max(1, input_shape_1.dim_size()); - TensorShapeProto output_shape = - MaybeGetMinimumShape(input_shape_1, rank, &found_unknown_shapes); +int64 CwiseOutputElementCount(const OpInfo& op_info) { + int max_rank = 1; + for (const OpInfo::TensorProperties& input_properties : op_info.inputs()) { + max_rank = std::max(max_rank, input_properties.shape().dim_size()); + } - if (input_shape_1.dim_size() == input_shape_2.dim_size()) { - auto shape_1 = - MaybeGetMinimumShape(input_shape_1, rank, &found_unknown_shapes); - auto shape_2 = - MaybeGetMinimumShape(input_shape_2, rank, &found_unknown_shapes); - if (shape_1.dim_size() == shape_2.dim_size()) { - for (int i = 0; i < shape_1.dim_size(); i++) { - output_shape.mutable_dim(i)->set_size( - std::max(shape_1.dim(i).size(), shape_2.dim(i).size())); - } + TensorShapeProto output_shape; + output_shape.mutable_dim()->Reserve(max_rank); + for (int i = 0; i < max_rank; ++i) { + output_shape.add_dim(); + } + + // Expand the shape of the output to follow the numpy-style broadcast rule + // which matches each input starting with the trailing dimensions and working + // its way forward. To do this, iterate through each input shape's dimensions + // in reverse order, and potentially increase the corresponding output + // dimension. + for (const OpInfo::TensorProperties& input_properties : op_info.inputs()) { + const TensorShapeProto& input_shape = input_properties.shape(); + for (int i = input_shape.dim_size() - 1; i >= 0; --i) { + int output_shape_dim_index = + i + output_shape.dim_size() - input_shape.dim_size(); + output_shape.mutable_dim(output_shape_dim_index) + ->set_size(std::max(output_shape.dim(output_shape_dim_index).size(), + input_shape.dim(i).size())); } } @@ -635,9 +643,9 @@ DeviceInfo OpLevelCostEstimator::GetDeviceInfo( Costs OpLevelCostEstimator::PredictCwiseOp(const OpContext& op_context) const { const auto& op_info = op_context.op_info; bool found_unknown_shapes = false; - // For unary or binary element-wise operations, op count is the element count - // of any input. We use the count for the largest input here to be more robust - // in case that the shape is unknown or partially known for other input. + // For element-wise operations, op count is the element count of any input. We + // use the count for the largest input here to be more robust in case that the + // shape is unknown or partially known for other input. int64 op_count = CalculateLargestInputCount(op_info, &found_unknown_shapes); // If output shape is available, try use the element count calculated from // that. @@ -646,12 +654,9 @@ Costs OpLevelCostEstimator::PredictCwiseOp(const OpContext& op_context) const { op_count, CalculateTensorElementCount(op_info.outputs(0), &found_unknown_shapes)); } - // For binary ops, calculate the output shape possibly resulting from - // broadcasting. + // Calculate the output shape possibly resulting from broadcasting. if (op_info.inputs_size() >= 2) { - op_count = - std::max(op_count, CwiseOutputElementCount(op_info.inputs(0).shape(), - op_info.inputs(1).shape())); + op_count = std::max(op_count, CwiseOutputElementCount(op_info)); } int op_cost = 1; @@ -1541,7 +1546,6 @@ Costs OpLevelCostEstimator::PredictFusedConv2DBiasActivation( auto& conv_input = op_context.op_info.inputs(0); auto& filter = op_context.op_info.inputs(1); - auto& bias = op_context.op_info.inputs(2); auto& side_input = op_context.op_info.inputs(3); auto& conv_input_scale = op_context.op_info.inputs(4); auto& side_input_scale = op_context.op_info.inputs(5); @@ -1551,10 +1555,6 @@ Costs OpLevelCostEstimator::PredictFusedConv2DBiasActivation( auto dims = ConvolutionDimensionsFromInputs( conv_input.shape(), filter.shape(), op_context.op_info, &found_unknown_shapes); - - // Construct the shape of our output tensor from our convolution dimensions - // and format, as it may not be available yet. - // TODO(varomodt): should we centralize the Conv2D input/output shapes? OpInfo::TensorProperties output; if (data_format == "NCHW" || data_format == "NCHW_VECT_C") { output = DescribeTensor(DT_FLOAT, {dims.batch, dims.oz, dims.oy, dims.ox}); @@ -1566,15 +1566,18 @@ Costs OpLevelCostEstimator::PredictFusedConv2DBiasActivation( std::vector component_ops = { FusedChildContext(op_context, "Conv2D", output, {conv_input, filter}), FusedChildContext(op_context, "Mul", output, {output, conv_input_scale}), - FusedChildContext(op_context, "BiasAdd", output, {output, bias}), + FusedChildContext( + op_context, "BiasAdd", output, + {output, output}), // Note we're no longer using bias at all FusedChildContext(op_context, "Relu", output, {output})}; // Add our side_input iff it's non-empty. if (side_input.shape().dim_size() > 0) { component_ops.push_back(FusedChildContext(op_context, "Mul", side_input, {side_input, side_input_scale})); - component_ops.push_back( - FusedChildContext(op_context, "Add", output, {side_input, output})); + component_ops.push_back(FusedChildContext( + op_context, "Add", output, + {output, output})); // Note that we're not using side_input here } // Construct an op_context which definitely has our output shape. diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc index a086fe02191..aaaf29ecb40 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc @@ -929,6 +929,23 @@ TEST_F(OpLevelCostEstimatorTest, CastExecutionTime) { EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } +TEST_F(OpLevelCostEstimatorTest, BroadcastAddExecutionTime) { + OpContext op_context; + SetCpuDevice(&op_context.op_info); + op_context.op_info.set_op("Add"); + + DescribeTensor1D(100, op_context.op_info.add_inputs()); + DescribeTensor4D(1, 10, 1, 1, op_context.op_info.add_inputs()); + + auto cost = PredictCosts(op_context); + EXPECT_EQ(Costs::Duration(44), cost.memory_time); + EXPECT_EQ(Costs::Duration(100), cost.compute_time); + EXPECT_EQ(Costs::Duration(144), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); + EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); +} + TEST_F(OpLevelCostEstimatorTest, UnknownOrPartialShape) { { auto cost = PredictCosts(DescribeMatMul(2, 4, 7, 7)); @@ -1100,9 +1117,7 @@ void ExpectTensorShape(const std::vector& expected, TensorShape tensor_shape_expected(expected); TensorShape tensor_shape(tensor_shape_proto); - LOG(INFO) << "Expected: " << tensor_shape_expected.DebugString(); - LOG(INFO) << "TensorShape: " << tensor_shape.DebugString(); - EXPECT_TRUE(tensor_shape_expected == tensor_shape); + EXPECT_EQ(tensor_shape_expected, tensor_shape); } TEST_F(OpLevelCostEstimatorTest, GetTensorShapeProtoFromTensorProto) { From c11cf5465a1661231d0e0f6585831149f82b83f6 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 24 Jul 2020 11:24:14 -0700 Subject: [PATCH 1281/2522] Migrate MacOS CPU official release and nightly testing builds to use the new bazel configs. PiperOrigin-RevId: 323034489 Change-Id: I733b8e571fde050bfa8ceab176285b088844809b --- tensorflow/tools/ci_build/release/macos/cpu_py2_full/pip.sh | 4 +--- tensorflow/tools/ci_build/release/macos/cpu_py35_full/pip.sh | 4 +--- tensorflow/tools/ci_build/release/macos/cpu_py36_full/pip.sh | 4 +--- tensorflow/tools/ci_build/release/macos/cpu_py37_full/pip.sh | 4 +--- tensorflow/tools/ci_build/release/macos/cpu_py38_full/pip.sh | 4 +--- 5 files changed, 5 insertions(+), 15 deletions(-) diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py2_full/pip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py2_full/pip.sh index 0630c117036..bcc7b4500d6 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py2_full/pip.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py2_full/pip.sh @@ -33,13 +33,11 @@ export TF_PYTHON_VERSION='python2' export TF_BUILD_BOTH_CPU_PACKAGES=1 # Run configure. -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=opt --config=v2" +export TF_BUILD_FLAGS="--config=release_cpu_macos" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" export TF_TEST_TARGETS="//tensorflow/python/..." export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py35_full/pip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py35_full/pip.sh index 3f31033b2ac..99c2a149394 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py35_full/pip.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py35_full/pip.sh @@ -33,13 +33,11 @@ export TF_PYTHON_VERSION='python3.5' export TF_BUILD_BOTH_CPU_PACKAGES=1 # Run configure. -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=opt --config=v2" +export TF_BUILD_FLAGS="--config=release_cpu_macos" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" export TF_TEST_TARGETS="//tensorflow/python/..." export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/pip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/pip.sh index 26ee4ea8edb..375a8c705fa 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/pip.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/pip.sh @@ -33,13 +33,11 @@ export TF_PYTHON_VERSION='python3.6' export TF_BUILD_BOTH_CPU_PACKAGES=1 # Run configure. -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=opt --config=v2" +export TF_BUILD_FLAGS="--config=release_cpu_macos" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" export TF_TEST_TARGETS="//tensorflow/python/..." export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/pip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/pip.sh index ed577db961a..ea6779be698 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/pip.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/pip.sh @@ -33,13 +33,11 @@ export TF_PYTHON_VERSION='python3.7' export TF_BUILD_BOTH_CPU_PACKAGES=1 # Run configure. -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=opt --config=v2" +export TF_BUILD_FLAGS="--config=release_cpu_macos" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" export TF_TEST_TARGETS="//tensorflow/python/..." export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py38_full/pip.sh b/tensorflow/tools/ci_build/release/macos/cpu_py38_full/pip.sh index f8eda5a7520..f0ef8e89766 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py38_full/pip.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py38_full/pip.sh @@ -33,13 +33,11 @@ export TF_PYTHON_VERSION='python3.8' export TF_BUILD_BOTH_CPU_PACKAGES=1 # Run configure. -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=opt --config=v2" +export TF_BUILD_FLAGS="--config=release_cpu_macos" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" export TF_TEST_TARGETS="//tensorflow/python/..." export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" From bbcbddf6b1bf2aaf0cad1008715b36ccb2d0ed4c Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Fri, 24 Jul 2020 11:24:47 -0700 Subject: [PATCH 1282/2522] This CL changes the KerasTensor Slicing Op dispatcher to map `slice`s to `dict`s instead of to `namedtuple`s, because the Keras json serialization ends up serializing namedtuples as normal tuples, w/o any name or field information. PiperOrigin-RevId: 323034612 Change-Id: I355293d676e3e6ac50dbf441cdd5de39f8cd0088 --- tensorflow/python/keras/layers/core.py | 31 +++++++++---------- .../python/keras/saving/hdf5_format_test.py | 29 +++++++++++++++++ 2 files changed, 43 insertions(+), 17 deletions(-) diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py index ca26ee6588f..37907b0ecea 100644 --- a/tensorflow/python/keras/layers/core.py +++ b/tensorflow/python/keras/layers/core.py @@ -18,7 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import collections import copy import functools import operator @@ -1428,18 +1427,16 @@ class KerasOpDispatcher(dispatch.GlobalOpDispatcher): KerasOpDispatcher().register() -SliceTuple = collections.namedtuple('SliceTuple', ['start', 'stop', 'step']) - -def _slice_to_named_tuple(x): +def _slice_to_dict(x): if isinstance(x, slice): - return SliceTuple(x.start, x.stop, x.step) + return {'start': x.start, 'stop': x.stop, 'step': x.step} return x -def _named_tuple_to_slice(x): - if type(x).__name__ == 'SliceTuple': - return slice(x[0], x[1], x[2]) +def _dict_to_slice(x): + if isinstance(x, dict): + return slice(x['start'], x['stop'], x['step']) return x @@ -1466,32 +1463,32 @@ class SlicingOpLambda(TFOpLambda): original_call = self.call # Decorate the function to produce this layer's call method def _call_wrapper(*args, **kwargs): - # Turn any slice nametuples in the args back into `slice` objects. + # Turn any slice dicts in the args back into `slice` objects. # This conversion cannot use nest.flatten/map_structure, - # because namedtuples are flattened by nest while slices aren't. + # because dicts are flattened by nest while slices aren't. # So, map_structure would only see the individual elements in the - # namedtuple. + # dict. # This can't use map_structure_up_to either because the 'shallowness' of # the shallow tree would have to vary depending on if only one dim or # multiple are being sliced. new_args = [] for arg in args: - arg = _named_tuple_to_slice(arg) + arg = _dict_to_slice(arg) if isinstance(arg, (list, tuple)): new_arg = [] for sub_arg in arg: - new_arg.append(_named_tuple_to_slice(sub_arg)) + new_arg.append(_dict_to_slice(sub_arg)) arg = new_arg new_args.append(arg) # Handle the kwargs too. new_kwargs = {} for key, value in kwargs.items(): - value = _named_tuple_to_slice(value) + value = _dict_to_slice(value) if isinstance(value, (list, tuple)): new_value = [] for v in value: - new_value.append(_named_tuple_to_slice(v)) + new_value.append(_dict_to_slice(v)) value = new_value new_kwargs[key] = value @@ -1507,8 +1504,8 @@ class TFSlicingOpDispatcher(dispatch.OpDispatcher): def handle(self, args, kwargs): """Handle the specified operation with the specified arguments.""" - args = nest.map_structure(_slice_to_named_tuple, args) - kwargs = nest.map_structure(_slice_to_named_tuple, kwargs) + args = nest.map_structure(_slice_to_dict, args) + kwargs = nest.map_structure(_slice_to_dict, kwargs) if any( isinstance(x, keras_tensor.KerasTensor) for x in nest.flatten([args, kwargs])): diff --git a/tensorflow/python/keras/saving/hdf5_format_test.py b/tensorflow/python/keras/saving/hdf5_format_test.py index 8d88bf8bb09..dea492db4dc 100644 --- a/tensorflow/python/keras/saving/hdf5_format_test.py +++ b/tensorflow/python/keras/saving/hdf5_format_test.py @@ -40,6 +40,7 @@ from tensorflow.python.keras.engine import training from tensorflow.python.keras.saving import hdf5_format from tensorflow.python.lib.io import file_io from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging as logging @@ -832,6 +833,34 @@ class TestWholeModelSaving(keras_parameterized.TestCase): self.assertIsInstance(loaded.optimizer, keras.optimizer_v2.optimizer_v2.OptimizerV2) + @combinations.generate(combinations.combine(mode=['eager'])) + def test_functional_model_with_getitem_op_layer(self): + inp = keras.Input(shape=(8)) + + out = inp[:] + model = keras.Model( + inputs=[inp], + outputs=out) + batch_size = 7 + x = array_ops.stack([ + math_ops.range(8) for _ in range(batch_size)]) + args = [x] + expected = x[:] + + self.assertAllEqual(model(args), expected) + self.assertAllEqual(model.predict(args, batch_size=batch_size), expected) + + # Make sure it can be successfully saved and loaded + save_format = testing_utils.get_save_format() + saved_model_dir = self._save_model_dir() + keras.models.save_model(model, saved_model_dir, save_format=save_format) + + loaded_model = keras.models.load_model(saved_model_dir) + + self.assertAllEqual(loaded_model(args), expected) + self.assertAllEqual(loaded_model.predict(args, batch_size=batch_size), + expected) + # Factory functions to create models that will be serialized inside a Network. def _make_graph_network(input_size, output_size): From d38b9e79199491e313bc0bc9fe08765f201c87fc Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Fri, 24 Jul 2020 11:28:20 -0700 Subject: [PATCH 1283/2522] Port the hard swish kernel to the new TfLiteEvalTensor API. PiperOrigin-RevId: 323035509 Change-Id: I44bf27a574bbe24df5a54fd1f2f6ac52771307ad --- tensorflow/lite/micro/kernels/BUILD | 1 + tensorflow/lite/micro/kernels/hard_swish.cc | 25 +++-- .../lite/micro/kernels/hard_swish_test.cc | 104 ++++-------------- 3 files changed, 41 insertions(+), 89 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 64610aa4581..8d7e5ff7354 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -641,6 +641,7 @@ tflite_micro_cc_test( name = "hard_swish_test", srcs = ["hard_swish_test.cc"], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:micro_framework", "//tensorflow/lite/micro:op_resolvers", diff --git a/tensorflow/lite/micro/kernels/hard_swish.cc b/tensorflow/lite/micro/kernels/hard_swish.cc index 3e8ecca7cc3..11e1d1a769f 100644 --- a/tensorflow/lite/micro/kernels/hard_swish.cc +++ b/tensorflow/lite/micro/kernels/hard_swish.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/types.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/op_macros.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/micro/micro_utils.h" namespace tflite { @@ -82,25 +83,33 @@ TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) { } TfLiteStatus HardSwishEval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); HardSwishParams* params = static_cast(node->user_data); switch (input->type) { case kTfLiteFloat32: { tflite::reference_ops::HardSwish( - GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } break; case kTfLiteUInt8: { tflite::reference_ops::HardSwish( - *params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + *params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } break; case kTfLiteInt8: { tflite::reference_ops::HardSwish( - *params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + *params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } break; default: { TF_LITE_KERNEL_LOG( diff --git a/tensorflow/lite/micro/kernels/hard_swish_test.cc b/tensorflow/lite/micro/kernels/hard_swish_test.cc index 50cafc9b5e5..83cdacc96bc 100644 --- a/tensorflow/lite/micro/kernels/hard_swish_test.cc +++ b/tensorflow/lite/micro/kernels/hard_swish_test.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -99,40 +100,19 @@ void TestHardSwishQuantized(int size, const T* output_data, output_zero_point), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_HARD_SWISH); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - const char* init_data = nullptr; - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 1}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = nullptr; - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } + const TfLiteRegistration registration = + tflite::ops::micro::Register_HARD_SWISH(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, /*builtin_data=*/nullptr, + micro_test::reporter); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); AsymmetricDequantize(output_data, output_elements_count, output_scale, output_zero_point, dequantized_output); @@ -200,40 +180,19 @@ void TestHardSwishQuantizedBias(const int size, const T* output_data, output_zero_point), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_HARD_SWISH); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - const char* init_data = nullptr; - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 1}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = nullptr; - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } + const TfLiteRegistration registration = + tflite::ops::micro::Register_HARD_SWISH(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, /*builtin_data=*/nullptr, + micro_test::reporter); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); AsymmetricDequantize(output_data, output_elements_count, output_scale, output_zero_point, dequantized_output); @@ -273,37 +232,20 @@ void TestHardSwishFloat(const int size, float* output_data, CreateFloatTensor(float_input_values, input_dims), CreateFloatTensor(output_data, output_dims), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_HARD_SWISH); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - const char* init_data = nullptr; - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 1}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + const TfLiteRegistration registration = + tflite::ops::micro::Register_HARD_SWISH(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, /*builtin_data=*/nullptr, + micro_test::reporter); + + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output_elements_count; ++i) { TF_LITE_MICRO_EXPECT_NEAR(float_ref_output_values[i], output_data[i], From 7a1ae0a1611ed77b52c35a6a2469600330620586 Mon Sep 17 00:00:00 2001 From: Anna R Date: Fri, 24 Jul 2020 11:29:35 -0700 Subject: [PATCH 1284/2522] Disable tensordot_op_test_xla_gpu since it is flaky. PiperOrigin-RevId: 323035752 Change-Id: I30ebe6e15bba96ce35dc38912f2656ec433afe77 --- tensorflow/python/kernel_tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 06ebf0034ae..224c86878b4 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -3521,6 +3521,7 @@ cuda_py_test( size = "medium", srcs = ["tensordot_op_test.py"], shard_count = 20, + xla_enable_strict_auto_jit = False, # b/161856380 deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", From 3c85e9e6d2baf6e3f1d1a3de0faad985d7ce4a77 Mon Sep 17 00:00:00 2001 From: Rachel Lim Date: Fri, 24 Jul 2020 11:30:13 -0700 Subject: [PATCH 1285/2522] [tf.data] Rename RebatchDatasetV1 name label from Rebatch to LegacyRebatch. PiperOrigin-RevId: 323035864 Change-Id: Ifc0d5c69e5d8b01b01592752ac4ef0ccb90009de --- .../kernels/data/experimental/rebatch_dataset_op.cc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/kernels/data/experimental/rebatch_dataset_op.cc b/tensorflow/core/kernels/data/experimental/rebatch_dataset_op.cc index 8bb0c2388f6..e2cbe7d9dcc 100644 --- a/tensorflow/core/kernels/data/experimental/rebatch_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/rebatch_dataset_op.cc @@ -27,7 +27,8 @@ inline int64 CeilDiv(int64 dividend, int64 divisor) { return (dividend - 1 + divisor) / divisor; } -constexpr const char* const kDatasetType = "Rebatch"; +constexpr const char* const kDatasetTypeV1 = "Rebatch"; +constexpr const char* const kDatasetTypeV2 = "RebatchV2"; class RebatchDatasetOp : public UnaryDatasetOpKernel { public: @@ -73,7 +74,7 @@ class RebatchDatasetOp : public UnaryDatasetOpKernel { const string& prefix) const override { name_utils::IteratorPrefixParams params; return absl::make_unique(Iterator::Params{ - this, name_utils::IteratorPrefix(kDatasetType, prefix, params)}); + this, name_utils::IteratorPrefix(kDatasetTypeV1, prefix, params)}); } const DataTypeVector& output_dtypes() const override { @@ -87,7 +88,7 @@ class RebatchDatasetOp : public UnaryDatasetOpKernel { string DebugString() const override { name_utils::DatasetDebugStringParams params; params.set_args(num_replicas_); - return name_utils::DatasetDebugString(kDatasetType, params); + return name_utils::DatasetDebugString(kDatasetTypeV1, params); } Status CheckExternalState() const override { @@ -330,7 +331,7 @@ class RebatchDatasetV2Op : public UnaryDatasetOpKernel { const string& prefix) const override { name_utils::IteratorPrefixParams params; return absl::make_unique(Iterator::Params{ - this, name_utils::IteratorPrefix(kDatasetType, prefix, params)}); + this, name_utils::IteratorPrefix(kDatasetTypeV2, prefix, params)}); } const DataTypeVector& output_dtypes() const override { @@ -342,7 +343,7 @@ class RebatchDatasetV2Op : public UnaryDatasetOpKernel { } string DebugString() const override { - return name_utils::DatasetDebugString(kDatasetType); + return name_utils::DatasetDebugString(kDatasetTypeV2); } Status CheckExternalState() const override { From 435d9b7858f0075d5b9f668c47764cce6113429d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 24 Jul 2020 11:31:25 -0700 Subject: [PATCH 1286/2522] Fixed bugs in spelling and cost calculation of SquaredDifference. PiperOrigin-RevId: 323036110 Change-Id: Ibb99d2e3ea4ad1573ee2f120e571faa5bdeb88a0 --- .../core/grappler/costs/op_level_cost_estimator.cc | 4 +++- .../grappler/costs/op_level_cost_estimator_test.cc | 10 ++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index 9d9cb578b98..ae487953692 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -553,7 +553,9 @@ OpLevelCostEstimator::OpLevelCostEstimator() { EIGEN_COST(scalar_product_op)); elementwise_ops_.emplace("RealDiv", EIGEN_COST(scalar_quotient_op)); elementwise_ops_.emplace("ReluGrad", EIGEN_COST(scalar_max_op)); - elementwise_ops_.emplace("SquareDifference", 1); + elementwise_ops_.emplace("SquaredDifference", + EIGEN_COST(scalar_square_op) + + EIGEN_COST(scalar_difference_op)); elementwise_ops_.emplace("Sub", EIGEN_COST(scalar_difference_op)); elementwise_ops_.emplace("TruncateDiv", EIGEN_COST(scalar_quotient_op)); diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc index aaaf29ecb40..bab5833538c 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc @@ -909,6 +909,16 @@ TEST_F(OpLevelCostEstimatorTest, ModExecutionTime) { EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } +TEST_F(OpLevelCostEstimatorTest, SquaredDifferenceExecutionTime) { + auto cost = PredictCosts(DescribeBinaryOp("SquaredDifference", 1000, 2)); + EXPECT_EQ(cost.memory_time, Costs::Duration(3600)); + EXPECT_EQ(cost.compute_time, Costs::Duration(800)); + EXPECT_EQ(cost.execution_time, Costs::Duration(4400)); + EXPECT_EQ(cost.num_ops_total, 1); + EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(cost.num_ops_with_unknown_shapes, 0); +} + TEST_F(OpLevelCostEstimatorTest, ReluExecutionTime) { auto cost = PredictCosts(DescribeUnaryOp("Relu", 1000)); EXPECT_EQ(Costs::Duration(800), cost.memory_time); From 1d4a43b0bfc8737e433697958df5b85a6752505b Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Fri, 24 Jul 2020 11:31:32 -0700 Subject: [PATCH 1287/2522] Prefer the standard integral types over custom type-aliases. PiperOrigin-RevId: 323036134 Change-Id: Id035d66f9e37485eb059cfa5756440c4f72871d1 --- tensorflow/lite/kernels/internal/BUILD | 33 +- tensorflow/lite/kernels/internal/common.h | 117 +++--- .../lite/kernels/internal/compatibility.h | 4 +- .../kernels/internal/quantization_util.cc | 8 +- tensorflow/lite/kernels/internal/tensor.h | 8 +- .../lite/kernels/internal/tensor_utils.cc | 4 +- tensorflow/lite/kernels/internal/types.h | 358 +++++++++--------- 7 files changed, 278 insertions(+), 254 deletions(-) diff --git a/tensorflow/lite/kernels/internal/BUILD b/tensorflow/lite/kernels/internal/BUILD index 5acabeb45cd..f07918f05dc 100644 --- a/tensorflow/lite/kernels/internal/BUILD +++ b/tensorflow/lite/kernels/internal/BUILD @@ -209,6 +209,14 @@ config_setting( }, ) +config_setting( + name = "tf_lite_static_memory", + values = { + "copt": "-DTF_LITE_STATIC_MEMORY", + "cpu": "k8", + }, +) + cc_library( name = "common", srcs = [], @@ -455,16 +463,12 @@ cc_library( "reference/integer_ops/add.h", "reference/integer_ops/conv.h", "reference/integer_ops/depthwise_conv.h", - "reference/integer_ops/dequantize.h", "reference/integer_ops/fully_connected.h", "reference/integer_ops/l2normalization.h", - "reference/integer_ops/log_softmax.h", "reference/integer_ops/logistic.h", - "reference/integer_ops/mean.h", "reference/integer_ops/mul.h", "reference/integer_ops/pooling.h", "reference/integer_ops/tanh.h", - "reference/integer_ops/transpose_conv.h", "reference/l2normalization.h", "reference/logistic.h", "reference/maximum_minimum.h", @@ -477,17 +481,25 @@ cc_library( "reference/process_broadcast_shapes.h", "reference/quantize.h", "reference/reduce.h", - "reference/reference_ops.h", "reference/requantize.h", "reference/resize_nearest_neighbor.h", "reference/round.h", "reference/softmax.h", - "reference/sparse_ops/fully_connected.h", "reference/strided_slice.h", "reference/sub.h", "reference/svdf.h", "reference/tanh.h", - ], + ] + select({ + ":tf_lite_static_memory": [], + "//conditions:default": [ + "reference/integer_ops/dequantize.h", + "reference/integer_ops/log_softmax.h", + "reference/integer_ops/mean.h", + "reference/integer_ops/transpose_conv.h", + "reference/reference_ops.h", + "reference/sparse_ops/fully_connected.h", + ], + }), build_for_embedded = True, copts = tflite_copts(), select_deps = { @@ -787,7 +799,12 @@ cc_library( ":freebsd": [ ":sse_tensor_utils", ], - ":windows": [":sse_tensor_utils"], + ":windows": [ + ":sse_tensor_utils", + ], + ":tf_lite_static_memory": [ + ":portable_tensor_utils", + ], "//conditions:default": [ ":portable_tensor_utils", ], diff --git a/tensorflow/lite/kernels/internal/common.h b/tensorflow/lite/kernels/internal/common.h index c45aff9e47b..66a2d977f39 100644 --- a/tensorflow/lite/kernels/internal/common.h +++ b/tensorflow/lite/kernels/internal/common.h @@ -138,23 +138,24 @@ inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size, #endif } -inline int32 MultiplyByQuantizedMultiplierSmallerThanOneExp( - int32 x, int32 quantized_multiplier, int left_shift) { +inline int32_t MultiplyByQuantizedMultiplierSmallerThanOneExp( + int32_t x, int32_t quantized_multiplier, int left_shift) { using gemmlowp::RoundingDivideByPOT; using gemmlowp::SaturatingRoundingDoublingHighMul; return RoundingDivideByPOT( SaturatingRoundingDoublingHighMul(x, quantized_multiplier), -left_shift); } -inline int32 MultiplyByQuantizedMultiplierGreaterThanOne( - int32 x, int32 quantized_multiplier, int left_shift) { +inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne( + int32_t x, int32_t quantized_multiplier, int left_shift) { using gemmlowp::SaturatingRoundingDoublingHighMul; return SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier); } -inline int32 MultiplyByQuantizedMultiplier(int32 x, int32 quantized_multiplier, - int shift) { +inline int32_t MultiplyByQuantizedMultiplier(int32_t x, + int32_t quantized_multiplier, + int shift) { using gemmlowp::RoundingDivideByPOT; using gemmlowp::SaturatingRoundingDoublingHighMul; int left_shift = shift > 0 ? shift : 0; @@ -164,16 +165,16 @@ inline int32 MultiplyByQuantizedMultiplier(int32 x, int32 quantized_multiplier, right_shift); } -inline int32 MultiplyByQuantizedMultiplier(int64_t x, - int32 quantized_multiplier, - int shift) { +inline int32_t MultiplyByQuantizedMultiplier(int64_t x, + int32_t quantized_multiplier, + int shift) { // Inputs: // - quantized_multiplier has fixed point at bit 31 // - shift is -31 to +7 (negative for right shift) // // Assumptions: The following input ranges are assumed // - quantize_scale>=0 (the usual range is (1<<30) to (1>>31)-1) - // - scaling is chosen so final scaled result fits in int32 + // - scaling is chosen so final scaled result fits in int32_t // - input x is in the range -(1<<47) <= x < (1<<47) assert(quantized_multiplier >= 0); assert(shift >= -31 && shift < 8); @@ -218,9 +219,9 @@ inline int CountLeadingSignBits(T integer_input) { using U = typename std::make_unsigned::type; return integer_input >= 0 ? CountLeadingZeros(static_cast(integer_input)) - 1 - : integer_input != std::numeric_limits::min() - ? CountLeadingZeros(2 * static_cast(-integer_input) - 1) - : 0; + : integer_input != std::numeric_limits::min() + ? CountLeadingZeros(2 * static_cast(-integer_input) - 1) + : 0; #endif } @@ -262,7 +263,7 @@ inline void gen_lut(const std::function& func, double min, std::min(std::max(TfLiteRound(func(max) * 32768.0), -32768.0), 32767.0); } -// int16 func table lookup, e.g., lookup exp() and 1/(1+x) used in softmax +// int16_t func table lookup, e.g., lookup exp() and 1/(1+x) used in softmax inline int16_t generic_int16_table_lookup(int16_t value, const int16_t* lut) { // 512 base value, lut[513] only for calculate slope uint16_t index = static_cast(256 + (value >> 7)); @@ -413,21 +414,21 @@ SaturatingRoundingMultiplyByPOTParam( SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent)); } -// Convert int32 multiplier to int16 with rounding. -inline void DownScaleInt32ToInt16Multiplier(int32_t multiplier_int32, - int16_t* multiplier_int16) { - TFLITE_DCHECK_GE(multiplier_int32, 0); +// Convert int32_t multiplier to int16_t with rounding. +inline void DownScaleInt32ToInt16Multiplier(int32_t multiplier_int32_t, + int16_t* multiplier_int16_t) { + TFLITE_DCHECK_GE(multiplier_int32_t, 0); static constexpr int32_t kRoundingOffset = 1 << 15; - if (multiplier_int32 >= + if (multiplier_int32_t >= std::numeric_limits::max() - kRoundingOffset) { - *multiplier_int16 = std::numeric_limits::max(); + *multiplier_int16_t = std::numeric_limits::max(); return; } - const int32_t result = (multiplier_int32 + kRoundingOffset) >> 16; - TFLITE_DCHECK_LE(result << 16, multiplier_int32 + kRoundingOffset); - TFLITE_DCHECK_GT(result << 16, multiplier_int32 - kRoundingOffset); - *multiplier_int16 = result; - TFLITE_DCHECK_EQ(*multiplier_int16, result); + const int32_t result = (multiplier_int32_t + kRoundingOffset) >> 16; + TFLITE_DCHECK_LE(result << 16, multiplier_int32_t + kRoundingOffset); + TFLITE_DCHECK_GT(result << 16, multiplier_int32_t - kRoundingOffset); + *multiplier_int16_t = result; + TFLITE_DCHECK_EQ(*multiplier_int16_t, result); } // Minimum output bits to accommodate log of maximum input range. It actually @@ -438,15 +439,13 @@ inline void DownScaleInt32ToInt16Multiplier(int32_t multiplier_int32, // ceil(log(abs( log(2.^(0:127))+1 ))/log(2)); ... // ceil(log(abs( log(2.^(0:127))+1 ))/log(2))] constexpr int min_log_x_output_bits(int input_bits) { - return input_bits > 90 - ? 7 - : input_bits > 44 - ? 6 - : input_bits > 21 - ? 5 - : input_bits > 10 - ? 4 - : input_bits > 4 ? 3 : input_bits > 1 ? 2 : 1; + return input_bits > 90 ? 7 + : input_bits > 44 ? 6 + : input_bits > 21 ? 5 + : input_bits > 10 ? 4 + : input_bits > 4 ? 3 + : input_bits > 1 ? 2 + : 1; } // Although currently the name of this function says that it cannot handle @@ -454,17 +453,17 @@ constexpr int min_log_x_output_bits(int input_bits) { // x_max is the largest representable input. In other words, the output range // is symmetric. template -inline gemmlowp::FixedPoint +inline gemmlowp::FixedPoint log_x_for_x_greater_than_or_equal_to_1_impl( - gemmlowp::FixedPoint input_val) { - // assert(__builtin_clz(0u) >= std::numeric_limits::digits - 1); - // assert(__builtin_clz(0u) <= std::numeric_limits::digits); - using FixedPoint0 = gemmlowp::FixedPoint; + gemmlowp::FixedPoint input_val) { + // assert(__builtin_clz(0u) >= std::numeric_limits::digits - 1); + // assert(__builtin_clz(0u) <= std::numeric_limits::digits); + using FixedPoint0 = gemmlowp::FixedPoint; // The reason for accumulating the result with an extra bit of headroom is // that z_pow_2_adj * log_2 might be saturated, and adding num_scaled * // recip_denom will otherwise introduce an error. static constexpr int kAccumIntegerBits = OutputIntegerBits + 1; - using FixedPointAccum = gemmlowp::FixedPoint; + using FixedPointAccum = gemmlowp::FixedPoint; const FixedPoint0 log_2 = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( FixedPoint0, 1488522236, std::log(2.0)); @@ -492,10 +491,10 @@ log_x_for_x_greater_than_or_equal_to_1_impl( // required shift "ourselves" instead of using, say, Rescale. FixedPoint0 z_a = FixedPoint0::FromRaw(input_val.raw()); // z_a_pow_2 = input_integer_bits - z_a_headroom; - int z_a_headroom_plus_1 = CountLeadingZeros(static_cast(z_a.raw())); + int z_a_headroom_plus_1 = CountLeadingZeros(static_cast(z_a.raw())); FixedPoint0 r_a_tmp = SaturatingRoundingMultiplyByPOTParam(z_a, (z_a_headroom_plus_1 - 1)); - const int32 r_a_raw = + const int32_t r_a_raw = SaturatingRoundingMultiplyByPOTParam((r_a_tmp * sqrt_half).raw(), 1); // z_pow_2_adj = max(z_pow_2_a - 0.75, z_pow_2_b - 0.25); // z_pow_2_adj = max(InputIntegerBits - z_a_headroom_plus_1 + 0.25, @@ -507,8 +506,8 @@ log_x_for_x_greater_than_or_equal_to_1_impl( // z_b is treated like z_a, but premultiplying by sqrt(0.5). FixedPoint0 z_b = z_a * sqrt_half; - int z_b_headroom = CountLeadingZeros(static_cast(z_b.raw())) - 1; - const int32 r_b_raw = + int z_b_headroom = CountLeadingZeros(static_cast(z_b.raw())) - 1; + const int32_t r_b_raw = SaturatingRoundingMultiplyByPOTParam(z_a.raw(), z_b_headroom); const FixedPointAccum z_b_pow_2_adj = SaturatingSub( FixedPointAccum::FromRaw(SaturatingRoundingMultiplyByPOTParam( @@ -536,9 +535,9 @@ log_x_for_x_greater_than_or_equal_to_1_impl( } template -inline gemmlowp::FixedPoint +inline gemmlowp::FixedPoint log_x_for_x_greater_than_or_equal_to_1( - gemmlowp::FixedPoint input_val) { + gemmlowp::FixedPoint input_val) { static_assert( OutputIntegerBits >= min_log_x_output_bits(InputIntegerBits), "Output integer bits must be sufficient to accommodate logs of inputs."); @@ -547,25 +546,25 @@ log_x_for_x_greater_than_or_equal_to_1( input_val); } -inline int32 GetReciprocal(int32 x, int x_integer_digits, - int* num_bits_over_unit) { - int headroom_plus_one = CountLeadingZeros(static_cast(x)); +inline int32_t GetReciprocal(int32_t x, int x_integer_digits, + int* num_bits_over_unit) { + int headroom_plus_one = CountLeadingZeros(static_cast(x)); // This is the number of bits to the left of the binary point above 1.0. // Consider x=1.25. In that case shifted_scale=0.8 and // no later adjustment will be needed. *num_bits_over_unit = x_integer_digits - headroom_plus_one; - const int32 shifted_sum_minus_one = - static_cast((static_cast(x) << headroom_plus_one) - - (static_cast(1) << 31)); + const int32_t shifted_sum_minus_one = + static_cast((static_cast(x) << headroom_plus_one) - + (static_cast(1) << 31)); - gemmlowp::FixedPoint shifted_scale = + gemmlowp::FixedPoint shifted_scale = gemmlowp::one_over_one_plus_x_for_x_in_0_1( - gemmlowp::FixedPoint::FromRaw(shifted_sum_minus_one)); + gemmlowp::FixedPoint::FromRaw(shifted_sum_minus_one)); return shifted_scale.raw(); } -inline void GetInvSqrtQuantizedMultiplierExp(int32 input, int reverse_shift, - int32* output_inv_sqrt, +inline void GetInvSqrtQuantizedMultiplierExp(int32_t input, int reverse_shift, + int32_t* output_inv_sqrt, int* output_shift) { TFLITE_DCHECK_GE(input, 0); if (input <= 1) { @@ -585,7 +584,7 @@ inline void GetInvSqrtQuantizedMultiplierExp(int32 input, int reverse_shift, ++*output_shift; } const unsigned max_left_shift_bits = - CountLeadingZeros(static_cast(input)) - 1; + CountLeadingZeros(static_cast(input)) - 1; const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2; const unsigned left_shift_bit_pairs = max_left_shift_bit_pairs - 1; *output_shift -= left_shift_bit_pairs; @@ -597,8 +596,8 @@ inline void GetInvSqrtQuantizedMultiplierExp(int32 input, int reverse_shift, using gemmlowp::SaturatingRoundingMultiplyByPOT; // Using 3 integer bits gives us enough room for the internal arithmetic in // this Newton-Raphson iteration. - using F3 = FixedPoint; - using F0 = FixedPoint; + using F3 = FixedPoint; + using F0 = FixedPoint; const F3 fixedpoint_input = F3::FromRaw(input >> 1); const F3 fixedpoint_half_input = SaturatingRoundingMultiplyByPOT<-1>(fixedpoint_input); diff --git a/tensorflow/lite/kernels/internal/compatibility.h b/tensorflow/lite/kernels/internal/compatibility.h index bfd021ac48d..61becad30c5 100644 --- a/tensorflow/lite/kernels/internal/compatibility.h +++ b/tensorflow/lite/kernels/internal/compatibility.h @@ -76,13 +76,15 @@ limitations under the License. #define TFLITE_CHECK_LT(x, y) ((x) < (y)) ? (void)0 : TFLITE_ABORT #endif -// TODO(ahentz): Clean up. +#ifndef TF_LITE_STATIC_MEMORY +// TODO(b/162019032): Consider removing these type-aliases. using int8 = std::int8_t; using uint8 = std::uint8_t; using int16 = std::int16_t; using uint16 = std::uint16_t; using int32 = std::int32_t; using uint32 = std::uint32_t; +#endif // !defined(TF_LITE_STATIC_MEMORY) // TFLITE_DEPRECATED() // diff --git a/tensorflow/lite/kernels/internal/quantization_util.cc b/tensorflow/lite/kernels/internal/quantization_util.cc index 60e3054056d..cf431cffdf7 100644 --- a/tensorflow/lite/kernels/internal/quantization_util.cc +++ b/tensorflow/lite/kernels/internal/quantization_util.cc @@ -342,13 +342,13 @@ void NudgeQuantizationRange(const float min, const float max, const float quant_max_float = static_cast(quant_max); *nudged_scale = (max - min) / (quant_max_float - quant_min_float); const float zero_point_from_min = quant_min_float - min / *nudged_scale; - uint16 nudged_zero_point; + uint16_t nudged_zero_point; if (zero_point_from_min < quant_min_float) { - nudged_zero_point = static_cast(quant_min); + nudged_zero_point = static_cast(quant_min); } else if (zero_point_from_min > quant_max_float) { - nudged_zero_point = static_cast(quant_max); + nudged_zero_point = static_cast(quant_max); } else { - nudged_zero_point = static_cast(TfLiteRound(zero_point_from_min)); + nudged_zero_point = static_cast(TfLiteRound(zero_point_from_min)); } *nudged_min = (quant_min_float - nudged_zero_point) * (*nudged_scale); *nudged_max = (quant_max_float - nudged_zero_point) * (*nudged_scale); diff --git a/tensorflow/lite/kernels/internal/tensor.h b/tensorflow/lite/kernels/internal/tensor.h index 543117df0e5..905552fc640 100644 --- a/tensorflow/lite/kernels/internal/tensor.h +++ b/tensorflow/lite/kernels/internal/tensor.h @@ -76,12 +76,12 @@ class VectorOfTensors { // A list of quantized tensors in a format that can be used by kernels like // split and concatenation. -class VectorOfQuantizedTensors : public VectorOfTensors { +class VectorOfQuantizedTensors : public VectorOfTensors { public: // Build with the tensors in 'tensor_list'. VectorOfQuantizedTensors(const TfLiteContext& context, const TfLiteIntArray& tensor_list) - : VectorOfTensors(context, tensor_list) { + : VectorOfTensors(context, tensor_list) { for (int i = 0; i < tensor_list.size; ++i) { TfLiteTensor* t = &context.tensors[tensor_list.data[i]]; zero_point_.push_back(t->params.zero_point); @@ -90,10 +90,10 @@ class VectorOfQuantizedTensors : public VectorOfTensors { } const float* scale() const { return scale_.data(); } - const int32* zero_point() const { return zero_point_.data(); } + const int32_t* zero_point() const { return zero_point_.data(); } private: - std::vector zero_point_; + std::vector zero_point_; std::vector scale_; }; diff --git a/tensorflow/lite/kernels/internal/tensor_utils.cc b/tensorflow/lite/kernels/internal/tensor_utils.cc index bf32445d153..5e0999121af 100644 --- a/tensorflow/lite/kernels/internal/tensor_utils.cc +++ b/tensorflow/lite/kernels/internal/tensor_utils.cc @@ -16,9 +16,9 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/optimized/neon_check.h" -#if defined(__SSSE3__) +#if defined(__SSSE3__) && !defined(TF_LITE_STATIC_MEMORY) #include "tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h" -#elif defined(USE_NEON) +#elif defined(USE_NEON) && !defined(TF_LITE_STATIC_MEMORY) #include "tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h" #else #include "tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h" diff --git a/tensorflow/lite/kernels/internal/types.h b/tensorflow/lite/kernels/internal/types.h index d1e6cfab084..aabbc34bf5e 100644 --- a/tensorflow/lite/kernels/internal/types.h +++ b/tensorflow/lite/kernels/internal/types.h @@ -24,24 +24,29 @@ limitations under the License. namespace tflite { -enum class FusedActivationFunctionType : uint8 { kNone, kRelu6, kRelu1, kRelu }; -enum class PaddingType : uint8 { kNone, kSame, kValid }; +enum class FusedActivationFunctionType : uint8_t { + kNone, + kRelu6, + kRelu1, + kRelu +}; +enum class PaddingType : uint8_t { kNone, kSame, kValid }; struct PaddingValues { - int16 width; - int16 height; + int16_t width; + int16_t height; // offset is used for calculating "remaining" padding, for example, `width` // is 1 and `width_offset` is 1, so padding_left is 1 while padding_right is // 1 + 1 = 2. - int16 width_offset; + int16_t width_offset; // Same as width_offset except it's over the height dimension. - int16 height_offset; + int16_t height_offset; }; // This enumeration allows for non-default formats for the weights array // of a fully-connected operator, allowing the use of special optimized // runtime paths. -enum class FullyConnectedWeightsFormat : uint8 { +enum class FullyConnectedWeightsFormat : uint8_t { // Default format (flat 2D layout, the inner contiguous dimension // is input_depth, the outer non-contiguous dimension is output_depth) kDefault, @@ -88,11 +93,11 @@ enum class FullyConnectedWeightsFormat : uint8 { // maximize arithmetic throughput. // // Finally, the 'Int8' part in the name refers to the fact that this - // weights format has each weights value encoded as a signed int8 value, - // even if the data type of the weights buffer is uint8. This is intended + // weights format has each weights value encoded as a signed int8_t value, + // even if the data type of the weights buffer is uint8_t. This is intended // to save runtime kernels the effort to have to XOR the top bit of these // bytes before using them in signed arithmetic, see this file for more - // explanations on the 'signed int8 trick' in matrix multiplication kernels: + // explanations on the 'signed int8_t trick' in matrix multiplication kernels: // // tensorflow/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc // @@ -111,7 +116,7 @@ enum class FullyConnectedWeightsFormat : uint8 { // the real 0 value, and scale designates the difference between the real values // corresponding to consecutive quantized values differing by 1. struct QuantizationParams { - int32 zero_point = 0; + int32_t zero_point = 0; double scale = 0.0; }; @@ -141,19 +146,19 @@ class RuntimeShape { #ifdef TF_LITE_STATIC_MEMORY TFLITE_CHECK(false && "No shape resizing supported on this platform"); #else // TF_LITE_STATIC_MEMORY - dims_pointer_ = new int32[dimensions_count]; + dims_pointer_ = new int32_t[dimensions_count]; #endif // TF_LITE_STATIC_MEMORY } } - RuntimeShape(int shape_size, int32 value) : size_(0) { + RuntimeShape(int shape_size, int32_t value) : size_(0) { Resize(shape_size); for (int i = 0; i < shape_size; ++i) { SetDim(i, value); } } - RuntimeShape(int dimensions_count, const int32* dims_data) : size_(0) { + RuntimeShape(int dimensions_count, const int32_t* dims_data) : size_(0) { ReplaceWith(dimensions_count, dims_data); } @@ -165,14 +170,15 @@ class RuntimeShape { // rolls out. RuntimeShape(RuntimeShape const& other) : size_(other.DimensionsCount()) { if (size_ > kMaxSmallSize) { - dims_pointer_ = new int32[size_]; + dims_pointer_ = new int32_t[size_]; } - std::memcpy(DimsData(), other.DimsData(), sizeof(int32) * size_); + std::memcpy(DimsData(), other.DimsData(), sizeof(int32_t) * size_); } bool operator==(const RuntimeShape& comp) const { return this->size_ == comp.size_ && - std::memcmp(DimsData(), comp.DimsData(), size_ * sizeof(int32)) == 0; + std::memcmp(DimsData(), comp.DimsData(), size_ * sizeof(int32_t)) == + 0; } ~RuntimeShape() { @@ -185,13 +191,13 @@ class RuntimeShape { } } - inline int32 DimensionsCount() const { return size_; } - inline int32 Dims(int i) const { + inline int32_t DimensionsCount() const { return size_; } + inline int32_t Dims(int i) const { TFLITE_DCHECK_GE(i, 0); TFLITE_DCHECK_LT(i, size_); return size_ > kMaxSmallSize ? dims_pointer_[i] : dims_[i]; } - inline void SetDim(int i, int32 val) { + inline void SetDim(int i, int32_t val) { TFLITE_DCHECK_GE(i, 0); TFLITE_DCHECK_LT(i, size_); if (size_ > kMaxSmallSize) { @@ -201,14 +207,14 @@ class RuntimeShape { } } - inline int32* DimsData() { + inline int32_t* DimsData() { return size_ > kMaxSmallSize ? dims_pointer_ : dims_; } - inline const int32* DimsData() const { + inline const int32_t* DimsData() const { return size_ > kMaxSmallSize ? dims_pointer_ : dims_; } // The caller must ensure that the shape is no bigger than 5-D. - inline const int32* DimsDataUpTo5D() const { return dims_; } + inline const int32_t* DimsDataUpTo5D() const { return dims_; } inline void Resize(int dimensions_count) { if (size_ > kMaxSmallSize) { @@ -223,15 +229,15 @@ class RuntimeShape { #ifdef TF_LITE_STATIC_MEMORY TFLITE_CHECK(false && "No shape resizing supported on this platform"); #else // TF_LITE_STATIC_MEMORY - dims_pointer_ = new int32[dimensions_count]; + dims_pointer_ = new int32_t[dimensions_count]; #endif // TF_LITE_STATIC_MEMORY } } - inline void ReplaceWith(int dimensions_count, const int32* dims_data) { + inline void ReplaceWith(int dimensions_count, const int32_t* dims_data) { Resize(dimensions_count); - int32* dst_dims = DimsData(); - std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32)); + int32_t* dst_dims = DimsData(); + std::memcpy(dst_dims, dims_data, dimensions_count * sizeof(int32_t)); } template @@ -239,7 +245,7 @@ class RuntimeShape { const int dimensions_count = std::distance(src_iterable.begin(), src_iterable.end()); Resize(dimensions_count); - int32* data = DimsData(); + int32_t* data = DimsData(); for (auto it : src_iterable) { *data = it; ++data; @@ -288,13 +294,13 @@ class RuntimeShape { SetDim(i, pad_value); } std::memcpy(DimsData() + size_increase, shape.DimsData(), - sizeof(int32) * shape.DimensionsCount()); + sizeof(int32_t) * shape.DimensionsCount()); } - int32 size_; + int32_t size_; union { - int32 dims_[kMaxSmallSize]; - int32* dims_pointer_; + int32_t dims_[kMaxSmallSize]; + int32_t* dims_pointer_; }; }; @@ -713,7 +719,7 @@ void ComputeStrides(Dims* dims) { } } -enum class BroadcastableOpCategory : uint8 { +enum class BroadcastableOpCategory : uint8_t { kNone, kNonBroadcast, // Matching input shapes. kFirstInputBroadcastsFast, // Fivefold nested loops. @@ -729,21 +735,21 @@ static_assert(sizeof(MinMax) == 8, ""); struct ActivationParams { FusedActivationFunctionType activation_type; - // uint8, etc, activation params. - int32 quantized_activation_min; - int32 quantized_activation_max; + // uint8_t, etc, activation params. + int32_t quantized_activation_min; + int32_t quantized_activation_max; }; struct ReluParams : public ActivationParams { - int32 input_offset; - int32 output_offset; - int32 output_multiplier; + int32_t input_offset; + int32_t output_offset; + int32_t output_multiplier; int output_shift; }; // Styles of resizing op usages. For example, kImageStyle can be used with a Pad // op for pattern-specific optimization. -enum class ResizingCategory : uint8 { +enum class ResizingCategory : uint8_t { kNone, kImageStyle, // 4D, operating on inner dimensions, say {0, a, b, 0}. kGenericResize, @@ -753,27 +759,27 @@ enum class ResizingCategory : uint8 { struct ArithmeticParams { // Shape dependent / common to data / op types. BroadcastableOpCategory broadcast_category; - // uint8 inference params. - int32 input1_offset; - int32 input2_offset; - int32 output_offset; - int32 output_multiplier; + // uint8_t inference params. + int32_t input1_offset; + int32_t input2_offset; + int32_t output_offset; + int32_t output_multiplier; int output_shift; - // Add / Sub, not Mul, uint8 inference params. + // Add / Sub, not Mul, uint8_t inference params. int left_shift; - int32 input1_multiplier; + int32_t input1_multiplier; int input1_shift; - int32 input2_multiplier; + int32_t input2_multiplier; int input2_shift; // TODO(b/158622529): Union the following activation params. - // uint8, etc, activation params. - int32 quantized_activation_min; - int32 quantized_activation_max; + // uint8_t, etc, activation params. + int32_t quantized_activation_min; + int32_t quantized_activation_max; // float activation params. float float_activation_min; float float_activation_max; - // int64 activation params. + // int64_t activation params. int64_t int64_activation_min; int64_t int64_activation_max; @@ -790,22 +796,22 @@ struct ArithmeticParams { }; struct ConcatenationParams { - int8 axis; - const int32* input_zeropoint; + int8_t axis; + const int32_t* input_zeropoint; const float* input_scale; - uint16 inputs_count; - int32 output_zeropoint; + uint16_t inputs_count; + int32_t output_zeropoint; float output_scale; }; struct ComparisonParams { - // uint8 inference params. + // uint8_t inference params. int left_shift; - int32 input1_offset; - int32 input1_multiplier; + int32_t input1_offset; + int32_t input1_multiplier; int input1_shift; - int32 input2_offset; - int32 input2_multiplier; + int32_t input2_offset; + int32_t input2_multiplier; int input2_shift; // Shape dependent / common to inference types. bool is_broadcast; @@ -815,81 +821,81 @@ struct ConvParams { PaddingType padding_type; PaddingValues padding_values; // TODO(starka): This was just "stride", so check that width+height is OK. - int16 stride_width; - int16 stride_height; - int16 dilation_width_factor; - int16 dilation_height_factor; - // uint8 inference params. + int16_t stride_width; + int16_t stride_height; + int16_t dilation_width_factor; + int16_t dilation_height_factor; + // uint8_t inference params. // TODO(b/65838351): Use smaller types if appropriate. - int32 input_offset; - int32 weights_offset; - int32 output_offset; - int32 output_multiplier; + int32_t input_offset; + int32_t weights_offset; + int32_t output_offset; + int32_t output_multiplier; int output_shift; - // uint8, etc, activation params. - int32 quantized_activation_min; - int32 quantized_activation_max; + // uint8_t, etc, activation params. + int32_t quantized_activation_min; + int32_t quantized_activation_max; // float activation params. float float_activation_min; float float_activation_max; }; struct DepthToSpaceParams { - int32 block_size; + int32_t block_size; }; struct DepthwiseParams { PaddingType padding_type; PaddingValues padding_values; - int16 stride_width; - int16 stride_height; - int16 dilation_width_factor; - int16 dilation_height_factor; - int16 depth_multiplier; - // uint8 inference params. + int16_t stride_width; + int16_t stride_height; + int16_t dilation_width_factor; + int16_t dilation_height_factor; + int16_t depth_multiplier; + // uint8_t inference params. // TODO(b/65838351): Use smaller types if appropriate. - int32 input_offset; - int32 weights_offset; - int32 output_offset; - int32 output_multiplier; + int32_t input_offset; + int32_t weights_offset; + int32_t output_offset; + int32_t output_multiplier; int output_shift; - // uint8, etc, activation params. - int32 quantized_activation_min; - int32 quantized_activation_max; + // uint8_t, etc, activation params. + int32_t quantized_activation_min; + int32_t quantized_activation_max; // float activation params. float float_activation_min; float float_activation_max; - const int32* output_multiplier_per_channel; - const int32* output_shift_per_channel; + const int32_t* output_multiplier_per_channel; + const int32_t* output_shift_per_channel; }; struct DequantizationParams { double scale; - int32 zero_point; + int32_t zero_point; }; struct PerChannelDequantizationParams { const float* scale; - const int32* zero_point; - int32 quantized_dimension; + const int32_t* zero_point; + int32_t quantized_dimension; }; struct FakeQuantParams { MinMax minmax; - int32 num_bits; + int32_t num_bits; }; struct FullyConnectedParams { - // uint8 inference params. + // uint8_t inference params. // TODO(b/65838351): Use smaller types if appropriate. - int32 input_offset; - int32 weights_offset; - int32 output_offset; - int32 output_multiplier; + int32_t input_offset; + int32_t weights_offset; + int32_t output_offset; + int32_t output_multiplier; int output_shift; - // uint8, etc, activation params. - int32 quantized_activation_min; - int32 quantized_activation_max; + // uint8_t, etc, activation params. + int32_t quantized_activation_min; + int32_t quantized_activation_max; // float activation params. float float_activation_min; float float_activation_max; @@ -900,16 +906,16 @@ struct FullyConnectedParams { }; struct GatherParams { - int16 axis; + int16_t axis; }; struct L2NormalizationParams { - // uint8 inference params. - int32 input_zero_point; + // uint8_t inference params. + int32_t input_zero_point; }; struct LocalResponseNormalizationParams { - int32 range; + int32_t range; double bias; double alpha; double beta; @@ -937,50 +943,50 @@ struct HardSwishParams { }; struct LogisticParams { - // uint8 inference params. - int32 input_zero_point; - int32 input_range_radius; - int32 input_multiplier; + // uint8_t inference params. + int32_t input_zero_point; + int32_t input_range_radius; + int32_t input_multiplier; int input_left_shift; }; struct LstmCellParams { - int32 weights_zero_point; - int32 accum_multiplier; + int32_t weights_zero_point; + int32_t accum_multiplier; int accum_shift; int state_integer_bits; }; struct MeanParams { - int8 axis_count; - int16 axis[4]; + int8_t axis_count; + int16_t axis[4]; }; struct PackParams { - int8 axis; - const int32* input_zeropoint; + int8_t axis; + const int32_t* input_zeropoint; const float* input_scale; - uint16 inputs_count; - int32 output_zeropoint; + uint16_t inputs_count; + int32_t output_zeropoint; float output_scale; }; struct PadParams { - int8 left_padding_count; - int32 left_padding[4]; - int8 right_padding_count; - int32 right_padding[4]; + int8_t left_padding_count; + int32_t left_padding[4]; + int8_t right_padding_count; + int32_t right_padding[4]; ResizingCategory resizing_category; }; struct PreluParams { - int32 input_offset; - int32 alpha_offset; - int32 output_offset; - int32 output_multiplier_1; - int32 output_shift_1; - int32 output_multiplier_2; - int32 output_shift_2; + int32_t input_offset; + int32_t alpha_offset; + int32_t output_offset; + int32_t output_multiplier_1; + int32_t output_shift_1; + int32_t output_multiplier_2; + int32_t output_shift_2; }; struct PoolParams { @@ -991,17 +997,17 @@ struct PoolParams { int stride_width; int filter_height; int filter_width; - // uint8, etc, activation params. - int32 quantized_activation_min; - int32 quantized_activation_max; + // uint8_t, etc, activation params. + int32_t quantized_activation_min; + int32_t quantized_activation_max; // float activation params. float float_activation_min; float float_activation_max; }; struct ReshapeParams { - int8 shape_count; - int32 shape[4]; + int8_t shape_count; + int32_t shape[4]; }; struct ResizeBilinearParams { @@ -1018,22 +1024,22 @@ struct ResizeNearestNeighborParams { }; struct SliceParams { - int8 begin_count; - int32 begin[4]; - int8 size_count; - int32 size[4]; + int8_t begin_count; + int32_t begin[4]; + int8_t size_count; + int32_t size[4]; }; struct SoftmaxParams { // beta is not really used (not a Tensorflow parameter) and not implemented // for LogSoftmax. double beta; - // uint8 inference params. Used even when beta defaults to 1.0. - int32 input_multiplier; - int32 input_left_shift; + // uint8_t inference params. Used even when beta defaults to 1.0. + int32_t input_multiplier; + int32_t input_left_shift; // Reverse scaling is only used by LogSoftmax. - int32 reverse_scaling_divisor; - int32 reverse_scaling_right_shift; + int32_t reverse_scaling_divisor; + int32_t reverse_scaling_right_shift; int diff_min; int32_t zero_point; float scale; @@ -1045,66 +1051,66 @@ struct SoftmaxParams { }; struct SpaceToBatchParams { - // "Zero" padding for uint8 means padding with the output offset. - int32 output_offset; + // "Zero" padding for uint8_t means padding with the output offset. + int32_t output_offset; }; struct SpaceToDepthParams { - int32 block_size; + int32_t block_size; }; struct SplitParams { // Graphs that split into, say, 2000 nodes are encountered. The indices in - // OperatorEdges are of type uint16. - uint16 num_split; - int16 axis; + // OperatorEdges are of type uint16_t. + uint16_t num_split; + int16_t axis; }; struct SqueezeParams { - int8 squeeze_dims_count; - int32 squeeze_dims[4]; + int8_t squeeze_dims_count; + int32_t squeeze_dims[4]; }; struct StridedSliceParams { - int8 start_indices_count; - int32 start_indices[5]; - int8 stop_indices_count; - int32 stop_indices[5]; - int8 strides_count; - int32 strides[5]; + int8_t start_indices_count; + int32_t start_indices[5]; + int8_t stop_indices_count; + int32_t stop_indices[5]; + int8_t strides_count; + int32_t strides[5]; - int16 begin_mask; - int16 ellipsis_mask; - int16 end_mask; - int16 new_axis_mask; - int16 shrink_axis_mask; + int16_t begin_mask; + int16_t ellipsis_mask; + int16_t end_mask; + int16_t new_axis_mask; + int16_t shrink_axis_mask; }; struct TanhParams { - int32 input_zero_point; - int32 input_range_radius; - int32 input_multiplier; + int32_t input_zero_point; + int32_t input_range_radius; + int32_t input_multiplier; int input_left_shift; }; struct TransposeParams { - int8 perm_count; - int32 perm[5]; + int8_t perm_count; + int32_t perm[5]; }; struct UnpackParams { - uint16 num_split; - int16 axis; + uint16_t num_split; + int16_t axis; }; struct LeakyReluParams { float alpha; - int32 input_offset; - int32 output_offset; - int32 output_multiplier_alpha; - int32 output_shift_alpha; - int32 output_multiplier_identity; - int32 output_shift_identity; + int32_t input_offset; + int32_t output_offset; + int32_t output_multiplier_alpha; + int32_t output_shift_alpha; + int32_t output_multiplier_identity; + int32_t output_shift_identity; }; template @@ -1114,7 +1120,7 @@ inline void SetActivationParams(float min, float max, P* params) { } template -inline void SetActivationParams(int32 min, int32 max, P* params) { +inline void SetActivationParams(int32_t min, int32_t max, P* params) { params->quantized_activation_min = min; params->quantized_activation_max = max; } @@ -1126,7 +1132,7 @@ inline void SetActivationParams(int64_t min, int64_t max, P* params) { } template -inline void GetActivationParams(const P& params, int32* min, int32* max) { +inline void GetActivationParams(const P& params, int32_t* min, int32_t* max) { *min = params.quantized_activation_min; *max = params.quantized_activation_max; } From 0c1b3ee3a0c56670ecca729e28afb90417f08dc3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 24 Jul 2020 11:42:07 -0700 Subject: [PATCH 1288/2522] Fixed alphabetic ordering of elementwise ops. PiperOrigin-RevId: 323038412 Change-Id: I87fe4c5c9ad4c32733c5b5775a1f5d2debc97092 --- .../core/grappler/costs/op_level_cost_estimator.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index ae487953692..e416d97949e 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -510,21 +510,21 @@ OpLevelCostEstimator::OpLevelCostEstimator() { elementwise_ops_.emplace("Neg", EIGEN_COST(scalar_opposite_op)); elementwise_ops_.emplace("QuantizeAndDequantizeV2", quantize_and_dequantize_v2_cost); + elementwise_ops_.emplace("QuantizedSigmoid", + EIGEN_COST(scalar_logistic_op)); elementwise_ops_.emplace("QuantizeV2", quantize_v2_cost); elementwise_ops_.emplace("Reciprocal", EIGEN_COST(scalar_inverse_op)); + elementwise_ops_.emplace("Relu", EIGEN_COST(scalar_max_op)); elementwise_ops_.emplace("Rint", 1); elementwise_ops_.emplace("Round", EIGEN_COST(scalar_round_op)); elementwise_ops_.emplace("Rsqrt", EIGEN_COST(scalar_rsqrt_op)); - elementwise_ops_.emplace("Sqrt", EIGEN_COST(scalar_sqrt_op)); - elementwise_ops_.emplace("Square", EIGEN_COST(scalar_square_op)); - elementwise_ops_.emplace("Tanh", EIGEN_COST(scalar_tanh_op)); - elementwise_ops_.emplace("Relu", EIGEN_COST(scalar_max_op)); elementwise_ops_.emplace("Sigmoid", EIGEN_COST(scalar_logistic_op)); - elementwise_ops_.emplace("QuantizedSigmoid", - EIGEN_COST(scalar_logistic_op)); elementwise_ops_.emplace("Sign", EIGEN_COST(scalar_sign_op)); elementwise_ops_.emplace("Sin", EIGEN_COST(scalar_sin_op)); + elementwise_ops_.emplace("Sqrt", EIGEN_COST(scalar_sqrt_op)); + elementwise_ops_.emplace("Square", EIGEN_COST(scalar_square_op)); elementwise_ops_.emplace("Tan", EIGEN_COST(scalar_tan_op)); + elementwise_ops_.emplace("Tanh", EIGEN_COST(scalar_tanh_op)); // Binary ops alphabetically sorted elementwise_ops_.emplace("Add", EIGEN_COST(scalar_sum_op)); elementwise_ops_.emplace("AddV2", EIGEN_COST(scalar_sum_op)); From 3098c7a84d657fd1fe68af703f9ba18f3049ee9b Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Fri, 24 Jul 2020 11:48:02 -0700 Subject: [PATCH 1289/2522] Port the l2norm kernel to the new TfLiteEvalTensor API. PiperOrigin-RevId: 323039627 Change-Id: I1c64000d5e83b5aff8875f1b354439f352c13edd --- tensorflow/lite/micro/kernels/BUILD | 1 + tensorflow/lite/micro/kernels/l2norm.cc | 79 +++++++++++--------- tensorflow/lite/micro/kernels/l2norm_test.cc | 33 ++++---- 3 files changed, 58 insertions(+), 55 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 8d7e5ff7354..7b9ec5dd8bb 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -619,6 +619,7 @@ tflite_micro_cc_test( "l2norm_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:op_resolvers", "//tensorflow/lite/micro/testing:micro_test", diff --git a/tensorflow/lite/micro/kernels/l2norm.cc b/tensorflow/lite/micro/kernels/l2norm.cc index ab4067058a4..f864efa271c 100644 --- a/tensorflow/lite/micro/kernels/l2norm.cc +++ b/tensorflow/lite/micro/kernels/l2norm.cc @@ -18,12 +18,15 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/l2normalization.h" #include "tensorflow/lite/kernels/internal/tensor.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { namespace micro { namespace l2norm { +namespace { + // This file has two implementation of L2Norm. enum KernelType { kReference, @@ -33,9 +36,15 @@ enum KernelType { constexpr int kInputTensor = 0; constexpr int kOutputTensor = 0; +} // namespace + TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { -#if defined(DEBUG) + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + L2NormalizationParams* data = + static_cast(node->user_data); TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); @@ -51,26 +60,33 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type); if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { - TF_LITE_ENSURE_EQ(context, output->params.scale, (1. / 128.)); - if (output->type == kTfLiteUInt8) { - TF_LITE_ENSURE_EQ(context, output->params.zero_point, 128); - } - if (output->type == kTfLiteInt8) { - TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); - } + data->input_zero_point = input->params.zero_point; + } else if (output->type == kTfLiteFloat32) { + data->input_zero_point = 0; } // TODO(ahentz): For some reason our implementations don't support // activations. TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone); -#endif return kTfLiteOk; } +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, + sizeof(L2NormalizationParams)); +} + TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TFLITE_DCHECK(node->user_data != nullptr); + const L2NormalizationParams& data = + *(static_cast(node->user_data)); + + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); // TODO(b/143912164): instead of hardcode the epsilon here, we should read it // from tensorflow, i.e., adding a params. @@ -87,36 +103,29 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // So we don't even need to do handle the epsilon for quantized kernel case. const float epsilon = 1e-6f; if (output->type == kTfLiteFloat32) { -#define TF_LITE_L2NORM(type) \ - tflite::L2NormalizationParams op_params; \ - op_params.input_zero_point = 0; \ - type::L2Normalization(op_params, GetTensorShape(input), \ - GetTensorData(input), GetTensorShape(output), \ - GetTensorData(output), epsilon) - - TF_LITE_L2NORM(reference_ops); -#undef TF_LITE_L2NORM + reference_ops::L2Normalization(data, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + epsilon); } else if (output->type == kTfLiteUInt8) { -#define TF_LITE_L2NORM(type) \ - tflite::L2NormalizationParams op_params; \ - op_params.input_zero_point = input->params.zero_point; \ - type::L2Normalization(op_params, GetTensorShape(input), \ - GetTensorData(input), GetTensorShape(output), \ - GetTensorData(output)) - - TF_LITE_L2NORM(reference_ops); -#undef TF_LITE_L2NORM + reference_ops::L2Normalization( + data, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } else if (output->type == kTfLiteInt8) { - const auto input_shape = GetTensorShape(input); - const auto output_shape = GetTensorShape(output); + const auto input_shape = tflite::micro::GetTensorShape(input); + const auto output_shape = tflite::micro::GetTensorShape(output); const int trailing_dim = input_shape.DimensionsCount() - 1; const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); const int outer_size = MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); - reference_integer_ops::L2Normalization(input->params.zero_point, outer_size, - depth, GetTensorData(input), - GetTensorData(output)); + reference_integer_ops::L2Normalization( + data.input_zero_point, outer_size, depth, + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorData(output)); } else { TF_LITE_KERNEL_LOG(context, "Output type is %s, requires float.", TfLiteTypeGetName(output->type)); @@ -129,7 +138,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace l2norm TfLiteRegistration Register_L2NORM_REF() { - return {/*init=*/nullptr, + return {/*init=*/l2norm::Init, /*free=*/nullptr, /*prepare=*/l2norm::Prepare, /*invoke=*/l2norm::Eval, diff --git a/tensorflow/lite/micro/kernels/l2norm_test.cc b/tensorflow/lite/micro/kernels/l2norm_test.cc index 89029bb260a..791f9036c56 100644 --- a/tensorflow/lite/micro/kernels/l2norm_test.cc +++ b/tensorflow/lite/micro/kernels/l2norm_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -97,31 +98,23 @@ void TestL2Normalization(const int* input_dims_data, const T* input_data, CreateL2NormTensor(output_data, dims, false), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_L2_NORMALIZATION); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - TfLiteL2NormParams builtin_data = { - .activation = kTfLiteActNone, - }; - int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 1}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = nullptr; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + TfLiteL2NormParams builtin_data = { + .activation = kTfLiteActNone, + }; + + const TfLiteRegistration registration = + ops::micro::Register_L2_NORMALIZATION(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), micro_test::reporter); + + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); // Compare the results from dequantization and expected outputs, and make // sure the difference is within a threshold. From 23cb51dfb64f863d4e5d470160d6b44f1ffdb992 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Fri, 24 Jul 2020 12:01:24 -0700 Subject: [PATCH 1290/2522] Port the logical kernel to the TfLiteEvalTensor API. PiperOrigin-RevId: 323042504 Change-Id: I0b7814b588c9ca8b26dbc2eba28035a4226475e3 --- tensorflow/lite/micro/kernels/BUILD | 1 + tensorflow/lite/micro/kernels/logical.cc | 31 ++++++++----- tensorflow/lite/micro/kernels/logical_test.cc | 46 +++++++------------ 3 files changed, 38 insertions(+), 40 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 7b9ec5dd8bb..35813aad620 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -265,6 +265,7 @@ tflite_micro_cc_test( "logical_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:op_resolvers", "//tensorflow/lite/micro/testing:micro_test", diff --git a/tensorflow/lite/micro/kernels/logical.cc b/tensorflow/lite/micro/kernels/logical.cc index cbb818193ac..f4033ba8856 100644 --- a/tensorflow/lite/micro/kernels/logical.cc +++ b/tensorflow/lite/micro/kernels/logical.cc @@ -15,8 +15,8 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/reference/binary_function.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/op_macros.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -31,20 +31,29 @@ constexpr int kOutputTensor = 0; TfLiteStatus LogicalImpl(TfLiteContext* context, TfLiteNode* node, bool (*func)(bool, bool)) { - const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); - const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); - if (HaveSameShapes(input1, input2)) { + if (tflite::micro::HaveSameShapes(input1, input2)) { reference_ops::BinaryFunction( - GetTensorShape(input1), GetTensorData(input1), - GetTensorShape(input2), GetTensorData(input2), - GetTensorShape(output), GetTensorData(output), func); + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), func); } else { reference_ops::BroadcastBinaryFunction4DSlow( - GetTensorShape(input1), GetTensorData(input1), - GetTensorShape(input2), GetTensorData(input2), - GetTensorShape(output), GetTensorData(output), func); + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), func); } return kTfLiteOk; diff --git a/tensorflow/lite/micro/kernels/logical_test.cc b/tensorflow/lite/micro/kernels/logical_test.cc index 89a7a0ae74a..d5355c830b6 100644 --- a/tensorflow/lite/micro/kernels/logical_test.cc +++ b/tensorflow/lite/micro/kernels/logical_test.cc @@ -12,9 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ + #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -22,9 +24,10 @@ namespace tflite { namespace testing { namespace { -void TestLogicalOp(tflite::BuiltinOperator op, const int* input1_dims_data, - const bool* input1_data, const int* input2_dims_data, - const bool* input2_data, const int* output_dims_data, +void TestLogicalOp(const TfLiteRegistration& registration, + const int* input1_dims_data, const bool* input1_data, + const int* input2_dims_data, const bool* input2_data, + const int* output_dims_data, const bool* expected_output_data, bool* output_data) { TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); @@ -40,32 +43,17 @@ void TestLogicalOp(tflite::BuiltinOperator op, const int* input1_dims_data, CreateBoolTensor(output_data, output_dims), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = resolver.FindOp(op); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = nullptr; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); TF_LITE_MICRO_EXPECT_EQ(output_dims_count, 4); for (int i = 0; i < output_dims_count; ++i) { @@ -85,8 +73,8 @@ TF_LITE_MICRO_TEST(LogicalOr) { const bool input2[] = {true, false, true, false}; const bool golden[] = {true, false, true, true}; bool output_data[4]; - tflite::testing::TestLogicalOp(tflite::BuiltinOperator_LOGICAL_OR, shape, - input1, shape, input2, shape, golden, + tflite::testing::TestLogicalOp(tflite::ops::micro::Register_LOGICAL_OR(), + shape, input1, shape, input2, shape, golden, output_data); } @@ -97,7 +85,7 @@ TF_LITE_MICRO_TEST(BroadcastLogicalOr) { const bool input2[] = {false}; const bool golden[] = {true, false, false, true}; bool output_data[4]; - tflite::testing::TestLogicalOp(tflite::BuiltinOperator_LOGICAL_OR, + tflite::testing::TestLogicalOp(tflite::ops::micro::Register_LOGICAL_OR(), input1_shape, input1, input2_shape, input2, input1_shape, golden, output_data); } @@ -108,8 +96,8 @@ TF_LITE_MICRO_TEST(LogicalAnd) { const bool input2[] = {true, false, true, false}; const bool golden[] = {true, false, false, false}; bool output_data[4]; - tflite::testing::TestLogicalOp(tflite::BuiltinOperator_LOGICAL_AND, shape, - input1, shape, input2, shape, golden, + tflite::testing::TestLogicalOp(tflite::ops::micro::Register_LOGICAL_AND(), + shape, input1, shape, input2, shape, golden, output_data); } @@ -120,7 +108,7 @@ TF_LITE_MICRO_TEST(BroadcastLogicalAnd) { const bool input2[] = {true}; const bool golden[] = {true, false, false, true}; bool output_data[4]; - tflite::testing::TestLogicalOp(tflite::BuiltinOperator_LOGICAL_AND, + tflite::testing::TestLogicalOp(tflite::ops::micro::Register_LOGICAL_AND(), input1_shape, input1, input2_shape, input2, input1_shape, golden, output_data); } From fafcee95b3bd2113af7aadda971877d3a3f1ed3a Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Fri, 24 Jul 2020 12:21:53 -0700 Subject: [PATCH 1291/2522] Port the maximum and minimum kernel to the new TfLiteEvalTensor API. PiperOrigin-RevId: 323047306 Change-Id: Ibe327980b0b9868ef80d22a057b741fa3915f14f --- tensorflow/lite/micro/kernels/BUILD | 1 + .../lite/micro/kernels/maximum_minimum.cc | 25 ++-- .../micro/kernels/maximum_minimum_test.cc | 116 ++++++------------ 3 files changed, 52 insertions(+), 90 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 35813aad620..641e0433103 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -290,6 +290,7 @@ tflite_micro_cc_test( "maximum_minimum_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:op_resolvers", "//tensorflow/lite/micro/testing:micro_test", diff --git a/tensorflow/lite/micro/kernels/maximum_minimum.cc b/tensorflow/lite/micro/kernels/maximum_minimum.cc index 3f336cebe5b..a7c343bf58c 100644 --- a/tensorflow/lite/micro/kernels/maximum_minimum.cc +++ b/tensorflow/lite/micro/kernels/maximum_minimum.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/op_macros.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -40,13 +41,13 @@ constexpr int kOutputTensor = 0; struct OpContext { OpContext(TfLiteContext* context, TfLiteNode* node) { - input1 = GetInput(context, node, kInputTensor1); - input2 = GetInput(context, node, kInputTensor2); - output = GetOutput(context, node, kOutputTensor); + input1 = tflite::micro::GetEvalInput(context, node, kInputTensor1); + input2 = tflite::micro::GetEvalInput(context, node, kInputTensor2); + output = tflite::micro::GetEvalOutput(context, node, kOutputTensor); } - const TfLiteTensor* input1; - const TfLiteTensor* input2; - TfLiteTensor* output; + const TfLiteEvalTensor* input1; + const TfLiteEvalTensor* input2; + TfLiteEvalTensor* output; }; struct MaximumOp { @@ -69,12 +70,12 @@ template void TFLiteOperation(TfLiteContext* context, TfLiteNode* node, const OpContext& op_context) { reference_ops::MaximumMinimumBroadcastSlow( - GetTensorShape(op_context.input1), - GetTensorData(op_context.input1), - GetTensorShape(op_context.input2), - GetTensorData(op_context.input2), - GetTensorShape(op_context.output), - GetTensorData(op_context.output), + tflite::micro::GetTensorShape(op_context.input1), + tflite::micro::GetTensorData(op_context.input1), + tflite::micro::GetTensorShape(op_context.input2), + tflite::micro::GetTensorData(op_context.input2), + tflite::micro::GetTensorShape(op_context.output), + tflite::micro::GetTensorData(op_context.output), op_type::template op); } diff --git a/tensorflow/lite/micro/kernels/maximum_minimum_test.cc b/tensorflow/lite/micro/kernels/maximum_minimum_test.cc index 39b892a8212..ee84fcba497 100644 --- a/tensorflow/lite/micro/kernels/maximum_minimum_test.cc +++ b/tensorflow/lite/micro/kernels/maximum_minimum_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -23,7 +24,7 @@ namespace tflite { namespace testing { namespace { -void TestMaxMinFloat(tflite::BuiltinOperator op, +void TestMaxMinFloat(const TfLiteRegistration& registration, std::initializer_list input1_dims_data, std::initializer_list input1_data, std::initializer_list input2_dims_data, @@ -45,32 +46,17 @@ void TestMaxMinFloat(tflite::BuiltinOperator op, CreateFloatTensor(output_data, output_dims), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = resolver.FindOp(op); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = nullptr; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output_dims_count; ++i) { TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], @@ -78,14 +64,17 @@ void TestMaxMinFloat(tflite::BuiltinOperator op, } } -void TestMaxMinQuantized( - tflite::BuiltinOperator op, std::initializer_list input1_dims_data, - std::initializer_list input1_data, float input1_min, - float input1_max, std::initializer_list input2_dims_data, - std::initializer_list input2_data, float input2_min, - float input2_max, std::initializer_list expected_output_data, - float output_min, float output_max, - std::initializer_list output_dims_data, uint8_t* output_data) { +void TestMaxMinQuantized(const TfLiteRegistration& registration, + std::initializer_list input1_dims_data, + std::initializer_list input1_data, + float input1_min, float input1_max, + std::initializer_list input2_dims_data, + std::initializer_list input2_data, + float input2_min, float input2_max, + std::initializer_list expected_output_data, + float output_min, float output_max, + std::initializer_list output_dims_data, + uint8_t* output_data) { TfLiteIntArray* input1_dims = IntArrayFromInitializer(input1_dims_data); TfLiteIntArray* input2_dims = IntArrayFromInitializer(input2_dims_data); TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); @@ -100,32 +89,17 @@ void TestMaxMinQuantized( CreateQuantizedTensor(output_data, output_dims, output_min, output_max), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = resolver.FindOp(op); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = nullptr; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output_dims_count; ++i) { TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]); @@ -133,7 +107,8 @@ void TestMaxMinQuantized( } void TestMaxMinQuantizedInt32( - tflite::BuiltinOperator op, std::initializer_list input1_dims_data, + const TfLiteRegistration& registration, + std::initializer_list input1_dims_data, std::initializer_list input1_data, float input1_scale, std::initializer_list input2_dims_data, std::initializer_list input2_data, float input2_scale, @@ -153,32 +128,17 @@ void TestMaxMinQuantizedInt32( CreateQuantized32Tensor(output_data, output_dims, output_scale), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = resolver.FindOp(op); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = nullptr; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output_dims_count; ++i) { TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]); @@ -197,14 +157,14 @@ TF_LITE_MICRO_TEST(FloatTest) { float output_data[6]; tflite::testing::TestMaxMinFloat( - tflite::BuiltinOperator_MAXIMUM, {3, 3, 1, 2}, + tflite::ops::micro::Register_MAXIMUM(), {3, 3, 1, 2}, data1, // input1 shape and data {3, 3, 1, 2}, data2, // input2 shape and data {1.0, 0.0, 1.0, 12.0, -2.0, -1.43}, // expected output {3, 3, 1, 2}, output_data); // output shape and data buffer tflite::testing::TestMaxMinFloat( - tflite::BuiltinOperator_MINIMUM, {3, 3, 1, 2}, + tflite::ops::micro::Register_MINIMUM(), {3, 3, 1, 2}, data1, // input1 shape and data {3, 3, 1, 2}, data2, // input2 shape and data {-1.0, 0.0, -1.0, 11.0, -3.0, -1.44}, // expected output @@ -224,7 +184,7 @@ TF_LITE_MICRO_TEST(Uint8Test) { uint8_t output_data[6]; tflite::testing::TestMaxMinQuantized( - tflite::BuiltinOperator_MAXIMUM, + tflite::ops::micro::Register_MAXIMUM(), // input1 shape, data and bounds {3, 3, 1, 2}, data1, input1_min, input1_max, // input2 shape, data and bounds @@ -235,7 +195,7 @@ TF_LITE_MICRO_TEST(Uint8Test) { output_min, output_max, {3, 3, 1, 2}, output_data); tflite::testing::TestMaxMinQuantized( - tflite::BuiltinOperator_MINIMUM, + tflite::ops::micro::Register_MINIMUM(), // input1 shape, data and bounds {3, 3, 1, 2}, data1, input1_min, input1_max, // input2 shape, data and bounds @@ -252,14 +212,14 @@ TF_LITE_MICRO_TEST(FloatWithBroadcastTest) { float output_data[6]; tflite::testing::TestMaxMinFloat( - tflite::BuiltinOperator_MAXIMUM, {3, 3, 1, 2}, + tflite::ops::micro::Register_MAXIMUM(), {3, 3, 1, 2}, data1, // input1 shape and data {1, 2}, data2, // input2 shape and data {1.0, 2.0, 0.5, 2.0, 0.5, 11.0}, // expected output {3, 3, 1, 2}, output_data); // output shape and data buffer tflite::testing::TestMaxMinFloat( - tflite::BuiltinOperator_MINIMUM, {3, 3, 1, 2}, + tflite::ops::micro::Register_MINIMUM(), {3, 3, 1, 2}, data1, // input1 shape and data {1, 2}, data2, // input2 shape and data {0.5, 0.0, -1.0, -2.0, -1.44, 2.0}, // expected output @@ -275,7 +235,7 @@ TF_LITE_MICRO_TEST(Int32WithBroadcastTest) { int32_t output_data[6]; tflite::testing::TestMaxMinQuantizedInt32( - tflite::BuiltinOperator_MAXIMUM, + tflite::ops::micro::Register_MAXIMUM(), // input1 shape, data and scale {3, 3, 1, 2}, data1, input1_scale, // input2 shape, data and scale @@ -286,7 +246,7 @@ TF_LITE_MICRO_TEST(Int32WithBroadcastTest) { output_scale, {3, 3, 1, 2}, output_data); tflite::testing::TestMaxMinQuantizedInt32( - tflite::BuiltinOperator_MINIMUM, + tflite::ops::micro::Register_MINIMUM(), // input1 shape, data and scale {3, 3, 1, 2}, data1, input1_scale, // input2 shape, data and scale From 82510e7b75807147348f0f324abca015bc65e30c Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Fri, 24 Jul 2020 12:37:37 -0700 Subject: [PATCH 1292/2522] Port the neg kernel to the new TfLiteEvalTensor API. PiperOrigin-RevId: 323050624 Change-Id: I4f003cda620e9e994365946316fe6650e02d0e0e --- tensorflow/lite/micro/kernels/BUILD | 1 + tensorflow/lite/micro/kernels/neg.cc | 15 +++++++++------ tensorflow/lite/micro/kernels/neg_test.cc | 23 +++++++---------------- 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 641e0433103..188bc5297bb 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -278,6 +278,7 @@ tflite_micro_cc_test( "neg_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:op_resolvers", "//tensorflow/lite/micro/testing:micro_test", diff --git a/tensorflow/lite/micro/kernels/neg.cc b/tensorflow/lite/micro/kernels/neg.cc index 0786218d522..74a95ca32eb 100644 --- a/tensorflow/lite/micro/kernels/neg.cc +++ b/tensorflow/lite/micro/kernels/neg.cc @@ -17,7 +17,7 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -28,14 +28,17 @@ constexpr int kInputTensor = 0; constexpr int kOutputTensor = 0; TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); switch (input->type) { // TODO(wangtz): handle for kTfLiteInt8 case kTfLiteFloat32: - reference_ops::Negate(GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), - GetTensorData(output)); + reference_ops::Negate(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); break; default: TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", diff --git a/tensorflow/lite/micro/kernels/neg_test.cc b/tensorflow/lite/micro/kernels/neg_test.cc index 8c8e6b8b282..2d7c449fcef 100644 --- a/tensorflow/lite/micro/kernels/neg_test.cc +++ b/tensorflow/lite/micro/kernels/neg_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -39,28 +40,18 @@ void TestNegFloat(std::initializer_list input_dims_data, CreateFloatTensor(output_data, output_dims), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_NEG); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 1}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = nullptr; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = tflite::ops::micro::Register_NEG(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[0], output_data[0]); for (int i = 0; i < output_dims_count; ++i) { From 42b7494ff230cfc981f3f027d42a9051f2900ea6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 24 Jul 2020 12:42:13 -0700 Subject: [PATCH 1293/2522] Added BatchMatMulV2 to the estimator. PiperOrigin-RevId: 323051652 Change-Id: Ibb2b2841e190fc529ae339942758ece058524b3f --- .../grappler/costs/op_level_cost_estimator.cc | 5 +- .../costs/op_level_cost_estimator_test.cc | 82 ++++++++++++------- 2 files changed, 56 insertions(+), 31 deletions(-) diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index e416d97949e..d15aa698f39 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -56,6 +56,7 @@ constexpr char kSqueeze[] = "Squeeze"; constexpr char kRecv[] = "_Recv"; constexpr char kSend[] = "_Send"; constexpr char kBatchMatMul[] = "BatchMatMul"; +constexpr char kBatchMatMulV2[] = "BatchMatMulV2"; constexpr char kRank[] = "Rank"; constexpr char kShape[] = "Shape"; constexpr char kShapeN[] = "ShapeN"; @@ -372,6 +373,8 @@ OpLevelCostEstimator::OpLevelCostEstimator() { wrap(&OpLevelCostEstimator::PredictSparseTensorDenseMatMul)); device_cost_impl_.emplace(kBatchMatMul, wrap(&OpLevelCostEstimator::PredictBatchMatMul)); + device_cost_impl_.emplace(kBatchMatMulV2, + wrap(&OpLevelCostEstimator::PredictBatchMatMul)); device_cost_impl_.emplace(kQuantizedMatMul, wrap(&OpLevelCostEstimator::PredictMatMul)); device_cost_impl_.emplace(kQuantizedMatMulV2, @@ -1116,7 +1119,7 @@ int64 OpLevelCostEstimator::CountBatchMatMulOperations( int64 OpLevelCostEstimator::CountBatchMatMulOperations( const OpInfo& op_info, BatchMatMulDimensions* batch_mat_mul, bool* found_unknown_shapes) { - if (op_info.op() != kBatchMatMul) { + if (op_info.op() != kBatchMatMul && op_info.op() != kBatchMatMulV2) { LOG(ERROR) << "Invalid Operation: " << op_info.op(); // TODO(pcma): Try to separate invalid inputs from unknown shapes *found_unknown_shapes = true; diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc index bab5833538c..39140e03139 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc @@ -110,18 +110,6 @@ void DescribeArbitraryRankOutput(const std::vector& dims, DataType dtype, } } -// Returns an OpInfo for a BatchMatMul -OpContext DescribeBatchMatMul(const std::vector& dims_a, - const std::vector& dims_b) { - OpContext op_context; - SetCpuDevice(&op_context.op_info); - op_context.op_info.set_op("BatchMatMul"); - - DescribeArbitraryRankInput(dims_a, DT_FLOAT, &op_context.op_info); - DescribeArbitraryRankInput(dims_b, DT_FLOAT, &op_context.op_info); - return op_context; -} - // Returns an OpInfo for a SparseTensorDenseMatMul OpContext DescribeSparseTensorDenseMatMul(const int nnz_a, const std::vector& dims_b, @@ -515,6 +503,8 @@ OpContext DescribeFusedBatchNorm(const bool is_training, const bool is_grad, class OpLevelCostEstimatorTest : public ::testing::Test { protected: + using BatchMatMulDimensions = OpLevelCostEstimator::BatchMatMulDimensions; + Costs PredictCosts(const OpContext& op_context) const { return estimator_.PredictCosts(op_context); } @@ -529,24 +519,11 @@ class OpLevelCostEstimatorTest : public ::testing::Test { return estimator_.CountBatchMatMulOperations(op_info, found_unknown_shapes); } - int64 CountBatchMatMulDimProduct(const OpInfo& op_info, + int64 CountBatchMatMulOperations(const OpInfo& op_info, + BatchMatMulDimensions* batch_mat_mul, bool* found_unknown_shapes) const { - OpLevelCostEstimator::BatchMatMulDimensions batch_mat_mul; - - batch_mat_mul.matmul_dims.n = 0; - batch_mat_mul.matmul_dims.m = 0; - batch_mat_mul.matmul_dims.k = 0; - - estimator_.CountBatchMatMulOperations(op_info, &batch_mat_mul, - found_unknown_shapes); - int dimension_product = 1; - for (auto dim : batch_mat_mul.batch_dims) dimension_product *= dim; - - dimension_product *= batch_mat_mul.matmul_dims.n; - dimension_product *= batch_mat_mul.matmul_dims.m; - dimension_product *= batch_mat_mul.matmul_dims.k; - - return dimension_product; + return estimator_.CountBatchMatMulOperations(op_info, batch_mat_mul, + found_unknown_shapes); } void SetComputeMemoryOverlap(bool value) { @@ -600,6 +577,49 @@ class OpLevelCostEstimatorTest : public ::testing::Test { OpLevelCostEstimator estimator_; }; +class OpLevelBatchMatMulCostEstimatorTest + : public OpLevelCostEstimatorTest, + public ::testing::WithParamInterface { + protected: + // Returns an OpInfo for a BatchMatMul + OpContext DescribeBatchMatMul(const std::vector& dims_a, + const std::vector& dims_b) { + OpContext op_context; + SetCpuDevice(&op_context.op_info); + op_context.op_info.set_op(GetParam()); + + DescribeArbitraryRankInput(dims_a, DT_FLOAT, &op_context.op_info); + DescribeArbitraryRankInput(dims_b, DT_FLOAT, &op_context.op_info); + return op_context; + } + + int64 CountBatchMatMulOperations(const OpInfo& op_info, + bool* found_unknown_shapes) const { + return OpLevelCostEstimatorTest::CountBatchMatMulOperations( + op_info, found_unknown_shapes); + } + + int64 CountBatchMatMulDimProduct(const OpInfo& op_info, + bool* found_unknown_shapes) const { + BatchMatMulDimensions batch_mat_mul; + + batch_mat_mul.matmul_dims.n = 0; + batch_mat_mul.matmul_dims.m = 0; + batch_mat_mul.matmul_dims.k = 0; + + OpLevelCostEstimatorTest::CountBatchMatMulOperations( + op_info, &batch_mat_mul, found_unknown_shapes); + int dimension_product = 1; + for (auto dim : batch_mat_mul.batch_dims) dimension_product *= dim; + + dimension_product *= batch_mat_mul.matmul_dims.n; + dimension_product *= batch_mat_mul.matmul_dims.m; + dimension_product *= batch_mat_mul.matmul_dims.k; + + return dimension_product; + } +}; + TEST_F(OpLevelCostEstimatorTest, TestPersistentOpCosts) { OpContext op_context; SetCpuDevice(&op_context.op_info); @@ -991,7 +1011,7 @@ TEST_F(OpLevelCostEstimatorTest, UnknownOrPartialShape) { } } -TEST_F(OpLevelCostEstimatorTest, BatchMatMul) { +TEST_P(OpLevelBatchMatMulCostEstimatorTest, TestBatchMatMul) { { auto cost = PredictCosts(DescribeBatchMatMul({}, {})); EXPECT_EQ(1, cost.num_ops_total); @@ -1069,6 +1089,8 @@ TEST_F(OpLevelCostEstimatorTest, BatchMatMul) { &batch_matmul_inaccurate); EXPECT_EQ(prod, 12); } +INSTANTIATE_TEST_SUITE_P(TestBatchMatMul, OpLevelBatchMatMulCostEstimatorTest, + ::testing::Values("BatchMatMul", "BatchMatMulV2")); TEST_F(OpLevelCostEstimatorTest, SparseTensorDenseMatMul) { // Unknown shape cases From c8be53e47f8b13245164ccb25bc45155dac0e072 Mon Sep 17 00:00:00 2001 From: Berkin Ilbeyi Date: Fri, 24 Jul 2020 12:54:03 -0700 Subject: [PATCH 1294/2522] [XLA] Fix a spurious verification failure when while has the same value passed in twice. Each of the two HloValues think they have the same live range in the while body but one of them has no uses. So don't check for overlaps for HloValues that don't have any uses. This should be a safe change. PiperOrigin-RevId: 323054198 Change-Id: Idbbfffcc2fd407a2f8fe50e3451305497b6dc406 --- .../xla/service/memory_space_assignment.cc | 2 +- .../service/memory_space_assignment_test.cc | 53 +++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.cc b/tensorflow/compiler/xla/service/memory_space_assignment.cc index 803140b804e..7d3101c907f 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment.cc +++ b/tensorflow/compiler/xla/service/memory_space_assignment.cc @@ -3073,7 +3073,7 @@ Status MemorySpaceAssignment::VerifyAndExportHeapSimulatorTrace() { last_use_instruction->opcode() == HloOpcode::kConditional) { TF_RETURN_IF_ERROR(split_conditional_buffer( last_use_instruction, time_bound.start, time_bound.end, " ")); - } else { + } else if (!value->uses().empty()) { VLOG(3) << " buffer: " << buffer.ToString() << " value: " << value->ToShortString() << ": (" << time_bound.start << ", " << time_bound.end diff --git a/tensorflow/compiler/xla/service/memory_space_assignment_test.cc b/tensorflow/compiler/xla/service/memory_space_assignment_test.cc index c0fdc5fc00d..0a44eb21359 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment_test.cc +++ b/tensorflow/compiler/xla/service/memory_space_assignment_test.cc @@ -1805,6 +1805,59 @@ TEST_P(MemorySpaceAssignmentTest, WhileInPlaceBuffer) { } } +TEST_P(MemorySpaceAssignmentTest, WhileSharedBufferVerificationBug) { + // Tests a spurious verification failure when a while has the same value + // passed in twice (copy0) and that value is evicted within the while loop. + absl::string_view hlo_string = R"( + HloModule module, is_scheduled=true + + while_cond { + p0 = (f32[3]{0}, f32[3]{0}, f32[3]{0}, pred[]) parameter(0) + ROOT gte = pred[] get-tuple-element(p0), index=3 + } + + while_body { + p0 = (f32[3]{0}, f32[3]{0}, f32[3]{0}, pred[]) parameter(0) + gte0 = f32[3]{0} get-tuple-element(p0), index=0 + gte1 = f32[3]{0} get-tuple-element(p0), index=1 + gte2 = f32[3]{0} get-tuple-element(p0), index=2 + gte3 = pred[] get-tuple-element(p0), index=3 + add = f32[3]{0} add(gte0, gte0) + negate0 = f32[3]{0} negate(add) + negate1 = f32[3]{0} negate(negate0) + negate2 = f32[3]{0} negate(negate1) + negate3 = f32[3]{0} negate(negate2) + negate4 = f32[3]{0} negate(negate3) + negate5 = f32[3]{0} negate(negate4) + negate6 = f32[3]{0} negate(negate5) + negate7 = f32[3]{0} negate(negate6) + negate8 = f32[3]{0} negate(negate7) + negate9 = f32[3]{0} negate(negate8) + negate10 = f32[3]{0} negate(negate9) + negate11 = f32[3]{0} negate(negate10) + negate12 = f32[3]{0} negate(negate11) + negate13 = f32[3]{0} negate(negate12) + negate14 = f32[3]{0} negate(negate13) + negate15 = f32[3]{0} negate(negate14) + negate16 = f32[3]{0} negate(negate15) + ROOT tuple = (f32[3]{0}, f32[3]{0}, f32[3]{0}, pred[]) tuple(gte0, gte0, negate16, gte3) + } + + ENTRY entry { + p0 = f32[3]{0} parameter(0) + p1 = pred[] parameter(1) + copy0 = f32[3]{0} copy(p0) + copy1 = f32[3]{0} copy(p0) + tuple = (f32[3]{0}, f32[3]{0}, f32[3]{0}, pred[]) tuple(copy0, copy0, copy1, p1) + while = (f32[3]{0}, f32[3]{0}, f32[3]{0}, pred[]) while(tuple), condition=while_cond, body=while_body + ROOT gte = f32[3]{0} get-tuple-element(while), index=2 + } + )"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + AssignMemorySpace(module.get()); +} + TEST_P(MemorySpaceAssignmentTest, ControlPredecessorsBug) { // Having control_predecessors on an HLO was preventing us from DCEing an op // that doesn't have any users (tuple.1). The scheduler assumes the graph is From beb4e6eae636088038eb4393b4c0eecd27745558 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Fri, 24 Jul 2020 13:02:09 -0700 Subject: [PATCH 1295/2522] Port the pack kernel to the new TfLiteEvalTensor API. PiperOrigin-RevId: 323055979 Change-Id: I7e6bae0ca294639c527f722d7c3adf7343d95976 --- tensorflow/lite/micro/kernels/BUILD | 1 + tensorflow/lite/micro/kernels/pack.cc | 17 +-- tensorflow/lite/micro/kernels/pack_test.cc | 138 ++++----------------- 3 files changed, 36 insertions(+), 120 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 188bc5297bb..cc719feb4bf 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -391,6 +391,7 @@ tflite_micro_cc_test( "pack_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:debug_log", "//tensorflow/lite/micro:op_resolvers", diff --git a/tensorflow/lite/micro/kernels/pack.cc b/tensorflow/lite/micro/kernels/pack.cc index 7c2a8a2f768..d332fc63653 100644 --- a/tensorflow/lite/micro/kernels/pack.cc +++ b/tensorflow/lite/micro/kernels/pack.cc @@ -16,7 +16,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -28,9 +28,11 @@ constexpr int kOutputTensor = 0; template TfLiteStatus PackImpl(TfLiteContext* context, TfLiteNode* node, - TfLiteTensor* output, int values_count, int axis) { + TfLiteEvalTensor* output, int values_count, int axis) { + const TfLiteEvalTensor* input0 = + tflite::micro::GetEvalInput(context, node, 0); + const int dimensions = output->dims->size; - const TfLiteTensor* input0 = GetInput(context, node, 0); const TfLiteIntArray* input_dims = input0->dims; const TfLiteIntArray* output_dims = output->dims; @@ -52,11 +54,11 @@ TfLiteStatus PackImpl(TfLiteContext* context, TfLiteNode* node, } TFLITE_DCHECK_EQ(input_size, copy_size * outer_size); - T* output_data = GetTensorData(output); + T* output_data = tflite::micro::GetTensorData(output); for (int i = 0; i < values_count; ++i) { - const TfLiteTensor* t = GetInput(context, node, i); - const T* input_data = GetTensorData(t); + const TfLiteEvalTensor* t = tflite::micro::GetEvalInput(context, node, i); + const T* input_data = tflite::micro::GetTensorData(t); for (int k = 0; k < outer_size; ++k) { const T* input_ptr = input_data + copy_size * k; int loc = k * values_count * copy_size + i * copy_size; @@ -72,7 +74,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLitePackParams* data = reinterpret_cast(node->builtin_data); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); switch (output->type) { case kTfLiteFloat32: { diff --git a/tensorflow/lite/micro/kernels/pack_test.cc b/tensorflow/lite/micro/kernels/pack_test.cc index ddd1a39d775..45d5e32ef48 100644 --- a/tensorflow/lite/micro/kernels/pack_test.cc +++ b/tensorflow/lite/micro/kernels/pack_test.cc @@ -15,8 +15,8 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/micro/all_ops_resolver.h" #include "tensorflow/lite/micro/debug_log.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -48,45 +48,22 @@ void TestPackTwoInputsFloat(std::initializer_list input1_dims_data, output_data[i] = 23; } - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_PACK); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - TfLitePackParams builtin_data = { .values_count = 2, .axis = axis, }; - - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, nullptr, 0); - } - int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = tflite::ops::micro::Register_PACK(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output_dims_count; ++i) { TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], @@ -124,46 +101,23 @@ void TestPackThreeInputsFloat(std::initializer_list input1_dims_data, output_data[i] = 23; } - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_PACK); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - TfLitePackParams builtin_data = { .values_count = 3, .axis = axis, }; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, nullptr, 0); - } - int inputs_array_data[] = {3, 0, 1, 2}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 3}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = tflite::ops::micro::Register_PACK(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output_dims_count; ++i) { TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], @@ -198,44 +152,23 @@ void TestPackTwoInputsQuantized( output_data[i] = 23; } - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_PACK); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - TfLitePackParams builtin_data = { .values_count = 2, .axis = axis, }; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, nullptr, 0); - } int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = tflite::ops::micro::Register_PACK(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output_dims_count; ++i) { TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]); @@ -267,44 +200,23 @@ void TestPackTwoInputsQuantized32( output_data[i] = 23; } - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_PACK); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - TfLitePackParams builtin_data = { .values_count = 2, .axis = axis, }; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, nullptr, 0); - } int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = tflite::ops::micro::Register_PACK(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output_dims_count; ++i) { TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]); From 4e5311d545b5440ab93a16e4dbca77fddbc57feb Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Fri, 24 Jul 2020 13:23:27 -0700 Subject: [PATCH 1296/2522] Change Tensorboard callback to directly write out the train_function graphs at the end of the first training batch, rather than trying to write the potentially-incorrect Keras backend graph when the callback is first initialized. Also add tests to verify that a graph containing the model actually gets written to tensorboard. It turns out it wasn't getting written for some of the sequential models in the test before this CL, because the model had yet to be called when the callback graph writing triggered, so the Keras backend graph was empty. ----------- Note: I was looking to write the test graph too, but I started seeing errors about default callbacks not being allowed to have test batch hooks + problems w/ the validation writer being closed. PiperOrigin-RevId: 323060220 Change-Id: I29b1e04c863039ebeff1f2a53b787a62ee5e2572 --- tensorflow/python/keras/callbacks.py | 21 ++++++++++++++++----- tensorflow/python/keras/callbacks_test.py | 21 +++++++++++++++++---- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py index f918a754eb9..88dc1d84129 100644 --- a/tensorflow/python/keras/callbacks.py +++ b/tensorflow/python/keras/callbacks.py @@ -2014,8 +2014,10 @@ class TensorBoard(Callback, version_utils.TensorBoardVersionSelector): self._writers = {} # Resets writers. + self._should_write_train_graph = False if self.write_graph: - self._write_keras_model_graph() + self._write_keras_model_summary() + self._should_write_train_graph = True if self.embeddings_freq: self._configure_embeddings() @@ -2042,13 +2044,19 @@ class TensorBoard(Callback, version_utils.TensorBoardVersionSelector): distributed_file_utils.remove_temp_dirpath(self.log_dir, self.model.distribute_strategy) - def _write_keras_model_graph(self): - """Writes Keras graph networks to TensorBoard.""" + def _write_keras_model_train_graph(self): + """Writes Keras model train_function graph to TensorBoard.""" with self._train_writer.as_default(): with summary_ops_v2.always_record_summaries(): - if not self.model.run_eagerly: - summary_ops_v2.graph(K.get_graph(), step=0) + train_fn = self.model.train_function + # If the train_function is a `tf.function`, we can write out a graph + if hasattr(train_fn, 'function_spec'): + summary_ops_v2.graph(train_fn._concrete_stateful_fn.graph, step=0) # pylint: disable=protected-access + def _write_keras_model_summary(self): + """Writes Keras graph network summary to TensorBoard.""" + with self._train_writer.as_default(): + with summary_ops_v2.always_record_summaries(): summary_writable = ( self.model._is_graph_network or # pylint: disable=protected-access self.model.__class__.__name__ == 'Sequential') # pylint: disable=protected-access @@ -2207,6 +2215,9 @@ class TensorBoard(Callback, version_utils.TensorBoardVersionSelector): self._start_trace() def on_train_batch_end(self, batch, logs=None): + if self._should_write_train_graph: + self._write_keras_model_train_graph() + self._should_write_train_graph = False if not self._should_trace: return diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py index f103d7506b9..0992deae7b6 100644 --- a/tensorflow/python/keras/callbacks_test.py +++ b/tensorflow/python/keras/callbacks_test.py @@ -1767,6 +1767,7 @@ class _SummaryFile(object): self.images = set() self.histograms = set() self.tensors = set() + self.graph_defs = [] def list_summaries(logdir): @@ -1793,6 +1794,8 @@ def list_summaries(logdir): continue path = os.path.join(dirpath, filename) for event in summary_iterator.summary_iterator(path): + if event.graph_def: + result.graph_defs.append(event.graph_def) if not event.summary: # (e.g., it's a `graph_def` event) continue for value in event.summary.value: @@ -2217,7 +2220,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): x, y, batch_size=2, - epochs=2, + epochs=3, validation_data=(x, y), callbacks=[tb_cbk]) summary_file = list_summaries(self.logdir) @@ -2227,6 +2230,16 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): _ObservedSummary(logdir=self.train_dir, tag='keras'), }, ) + if not model.run_eagerly: + # There should be one train graph + self.assertLen(summary_file.graph_defs, 1) + for graph_def in summary_file.graph_defs: + graph_def_str = str(graph_def) + + # All the model layers should appear in the graphs + for layer in model.layers: + if 'input' not in layer.name: + self.assertIn(layer.name, graph_def_str) def test_TensorBoard_writeSequentialModel_noInputShape(self): model = keras.models.Sequential([ @@ -2234,7 +2247,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): keras.layers.Flatten(), keras.layers.Dense(1), ]) - model.compile('sgd', 'mse', run_eagerly=False) + model.compile('sgd', 'mse', run_eagerly=testing_utils.should_run_eagerly()) self.fitModelAndAssertKerasModelWritten(model) def test_TensorBoard_writeSequentialModel_withInputShape(self): @@ -2243,7 +2256,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): keras.layers.Flatten(), keras.layers.Dense(1), ]) - model.compile('sgd', 'mse', run_eagerly=False) + model.compile('sgd', 'mse', run_eagerly=testing_utils.should_run_eagerly()) self.fitModelAndAssertKerasModelWritten(model) def test_TensorBoard_writeModel(self): @@ -2252,7 +2265,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase): x = keras.layers.Flatten()(x) x = keras.layers.Dense(1)(x) model = keras.models.Model(inputs=inputs, outputs=[x]) - model.compile('sgd', 'mse', run_eagerly=False) + model.compile('sgd', 'mse', run_eagerly=testing_utils.should_run_eagerly()) self.fitModelAndAssertKerasModelWritten(model) def test_TensorBoard_autoTrace(self): From 87ab16969f94e6856781add71dcfdafc14ac7082 Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Fri, 24 Jul 2020 13:47:35 -0700 Subject: [PATCH 1297/2522] Update MultiWorkerMirroredStrategy API doc PiperOrigin-RevId: 323064599 Change-Id: Ie75ae964a8edbc9060ef5f1731c8c9ab34404fe6 --- .../collective_all_reduce_strategy.py | 88 +++++++++---------- 1 file changed, 43 insertions(+), 45 deletions(-) diff --git a/tensorflow/python/distribute/collective_all_reduce_strategy.py b/tensorflow/python/distribute/collective_all_reduce_strategy.py index 2281f1ac984..eeef87f5765 100644 --- a/tensorflow/python/distribute/collective_all_reduce_strategy.py +++ b/tensorflow/python/distribute/collective_all_reduce_strategy.py @@ -44,37 +44,53 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import tf_export -# TODO(yuefengz): support in-graph replication. @tf_export("distribute.experimental.MultiWorkerMirroredStrategy", v1=[]) class CollectiveAllReduceStrategy(distribute_lib.Strategy): """A distribution strategy for synchronous training on multiple workers. This strategy implements synchronous distributed training across multiple workers, each with potentially multiple GPUs. Similar to - `tf.distribute.MirroredStrategy`, it creates copies of all variables in the - model on each device across all workers. + `tf.distribute.MirroredStrategy`, it replicates all variables and computations + to each local device. The difference is that it uses a distributed collective + implementation (e.g. all-reduce), so that multiple workers can work together. - It uses CollectiveOps's implementation of multi-worker all-reduce to - to keep variables in sync. A collective op is a single op in the - TensorFlow graph which can automatically choose an all-reduce algorithm in - the TensorFlow runtime according to hardware, network topology and tensor - sizes. + You need to launch your program on each worker and configure + `cluster_resolver` correctly. For example, if you are using + `tf.distribute.cluster_resolver.TFConfigClusterResolver`, each worker needs to + have its corresponding `task_type` and `task_id` set in the `TF_CONFIG` + environment variable. - By default it uses all local GPUs or CPU for single-worker training. + Your program runs on each worker as-is. Note that collectives require each + worker to participate. All `tf.distribute` and non `tf.distribute` API may use + collectives internally, e.g. checkpointing and saving since reading a + `tf.Variable` with `tf.VariableSynchronization.ON_READ` all-reduces the value. + Therefore it's recommended to run exactly the same program on each worker. + Dispatching based on `task_type` or `task_id` of the worker is error-prone. - When 'TF_CONFIG' environment variable is set, it parses cluster_spec, - task_type and task_id from 'TF_CONFIG' and turns into a multi-worker strategy - which mirrored models on GPUs of all machines in a cluster. In the current - implementation, it uses all GPUs in a cluster and it assumes all workers have - the same number of GPUs. + `cluster_resolver.num_accelerators()` determines the number of GPUs the + strategy uses. If it's zero, the strategy uses the CPU. All workers need to + use the same number of devices, otherwise the behavior is undefined. - You can also pass a `distribute.cluster_resolver.ClusterResolver` instance - when instantiating the strategy. The task_type, task_id etc. will be parsed - from the resolver instance instead of from the `TF_CONFIG` env var. + This strategy is not intended for TPU. Use + `tf.distribute.experimental.TPUStrategy` instead. - It supports both eager mode and graph mode. However, for eager mode, it has to - set up the eager context in its constructor and therefore all ops in eager - mode have to run after the strategy object is created. + __Saving__ + + You need to save and checkpoint on all workers instead of just one. This is + because variables whose synchronization=ON_READ triggers aggregation during + saving. It's recommended to save to a different path on each worker to avoid + race conditions. Each worker saves the same thing. See + [Multi-worker training with Keras](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras#model_saving_and_loading) + tutorial for examples. + + __Known Issues__ + + * `tf.distribute.cluster_resolver.TFConfigClusterResolver` does not return the + correct number of accelerators. The strategy uses all available GPUs if + `cluster_resolver` is `tf.distribute.cluster_resolver.TFConfigClusterResolver` + or `None`. + * In eager mode, the strategy needs to be created before calling any other + Tensorflow API. """ # TODO(anjalisridhar): Update our guides with examples showing how we can use @@ -87,14 +103,13 @@ class CollectiveAllReduceStrategy(distribute_lib.Strategy): """Creates the strategy. Args: - communication: optional Enum of type - `distribute.experimental.CollectiveCommunication`. This provides a way - for the user to override the choice of collective op communication. - Possible values include `AUTO`, `RING`, and `NCCL`. - cluster_resolver: optional `distribute.cluster_resolver.ClusterResolver` - object. The default ClusterResolver that is used is the - TFConfigClusterResolver which is instantiated from the TF_CONFIG env - var. + communication: optional + `tf.distribute.experimental.CollectiveCommunication`. This is a hint on + the preferred collective communication implementation. Possible values + include `AUTO`, `RING`, and `NCCL`. + cluster_resolver: optional + `tf.distribute.cluster_resolver.ClusterResolver`. If `None`, + `tf.distribute.cluster_resolver.TFConfigClusterResolver` is used. """ # TODO(b/150151677): consider move communication to CollectiveHints. super(CollectiveAllReduceStrategy, self).__init__( @@ -121,23 +136,6 @@ class CollectiveAllReduceStrategy(distribute_lib.Strategy): obj.extended._initialize_local(TFConfigClusterResolver(), devices=devices) # pylint: disable=protected-access return obj - def scope(self): # pylint: disable=useless-super-delegation - """Returns a context manager selecting this Strategy as current. - - Inside a `with strategy.scope():` code block, this thread - will use a variable creator set by `strategy`, and will - enter its "cross-replica context". - - In `MultiWorkerMirroredStrategy`, all variables created inside - `strategy.scope() will be mirrored on all replicas of each worker. - Moreover, it also sets a default device scope so that ops without - specified devices will end up on the correct worker. - - Returns: - A context manager to use for creating variables with this strategy. - """ - return super(CollectiveAllReduceStrategy, self).scope() - @property def cluster_resolver(self): """Returns the cluster resolver associated with this strategy. From 926c08624849abda617b5e0330b33d94365c08dc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 24 Jul 2020 13:51:47 -0700 Subject: [PATCH 1298/2522] Allow negative axes in tf.nn.sufficient_statistics even when shape of x is unknown. PiperOrigin-RevId: 323065341 Change-Id: I38b4750077030b2243ef18c34a4ee76eeb2883c8 --- tensorflow/python/ops/nn_impl.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index 702da66b1c5..89174b29336 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -1158,9 +1158,23 @@ def sufficient_statistics(x, axes, shift=None, keep_dims=None, name=None, an input that's optionally shifted. See: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Computing_shifted_data + For example: + >>> t = [[1, 2, 3], [4, 5, 6]] + >>> sufficient_statistics(t, [1]) + (, , , None) + >>> sufficient_statistics(t, [-1]) + (, , , None) + Args: x: A `Tensor`. - axes: Array of ints. Axes along which to compute mean and variance. + axes: Array of ints. Axes along which to compute mean and variance. As in + Python, the axes can also be negative numbers. A negative axis is + interpreted as counting from the end of the rank, i.e., axis + + rank(values)-th dimension. shift: A `Tensor` containing the value by which to shift the data for numerical stability, or `None` if no shift is to be performed. A shift close to the true mean provides the most numerically stable results. @@ -1191,8 +1205,11 @@ def sufficient_statistics(x, axes, shift=None, keep_dims=None, name=None, counts *= x_shape.dims[d].value counts = constant_op.constant(counts, dtype=x.dtype) else: # shape needs to be inferred at runtime. + # Normalize axes to be positive. Required for gather. + rank = array_ops.rank(x) + positive_axes = [axis + rank if axis < 0 else axis for axis in axes] x_dims = array_ops.gather( - math_ops.cast(array_ops.shape(x), x.dtype), axes) + math_ops.cast(array_ops.shape(x), x.dtype), positive_axes) counts = math_ops.reduce_prod(x_dims, name="count") if shift is not None: shift = ops.convert_to_tensor(shift, name="shift") From 975b4a5f0d05d6bd2ab9f5f0581bd075591cdd8c Mon Sep 17 00:00:00 2001 From: Jonathan Chu Date: Fri, 24 Jul 2020 21:18:09 +0000 Subject: [PATCH 1299/2522] Add _benchmark_defun_matmul_with_signature and corresponding CPU/GPU tests --- tensorflow/python/eager/benchmarks_test.py | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index 24e86c77a14..629e5fe0721 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -476,6 +476,16 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): func = lambda: f(m, m, transpose_b=transpose_b) self._run(func, num_iters, execution_mode=execution_mode) + def _benchmark_defun_matmul_with_signature(self, m, num_iters, execution_mode=None): + + def defun_matmul(m): + return math_ops.matmul(m, m) + f = function.defun( + defun_matmul, input_signature=[tensor_spec.TensorSpec([2, 2], dtypes.float32)]) + + func = lambda: defun_matmul(m) + self._run(func, num_iters, execution_mode=execution_mode) + def _benchmark_defun_args_matmul(self, m, num_iters, execution_mode=None): @def_function.function @@ -577,6 +587,12 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._benchmark_defun_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) + @test_util.disable_tfrt("With signature") + def benchmark_defun_matmul_2_by_2_CPU_with_signature(self): + with context.device(CPU): + m = self._m_2_by_2.cpu() + self._benchmark_defun_matmul_with_signature(m, num_iters=self._num_iters_2_by_2) + @test_util.disable_tfrt("async not supported") def benchmark_defun_matmul_2_by_2_CPU_async(self): with context.device(CPU): @@ -652,6 +668,14 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._benchmark_defun_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) + @test_util.disable_tfrt("With signature") + def benchmark_defun_matmul_2_by_2_GPU_with_signature(self): + if not context.num_gpus(): + return + with context.device(GPU): + m = self._m_2_by_2.gpu() + self._benchmark_defun_matmul_with_signature(m, num_iters=self._num_iters_2_by_2) + @test_util.disable_tfrt("Graph is not supported yet. b/156187905") def benchmark_defun_args_matmul_2_by_2_GPU(self): if not context.num_gpus(): From dd9e169370b3e8b65df4e1029e4ec4df173693d6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 24 Jul 2020 14:24:51 -0700 Subject: [PATCH 1300/2522] Fix 'unused variable' compiler error when building for ARM Cortex M4 in release mode PiperOrigin-RevId: 323071869 Change-Id: I894b62b4c333838d45e556856f4bfef4f5d98a9c --- tensorflow/lite/micro/memory_helpers.cc | 2 +- tensorflow/lite/micro/micro_allocator.cc | 2 ++ tensorflow/lite/micro/micro_interpreter.cc | 2 ++ tensorflow/lite/micro/micro_optional_debug_tools.cc | 2 ++ tensorflow/lite/micro/micro_profiler.cc | 3 ++- tensorflow/lite/micro/recording_micro_allocator.cc | 2 ++ tensorflow/lite/micro/simple_memory_allocator.cc | 2 ++ tensorflow/lite/micro/test_helpers.cc | 2 ++ 8 files changed, 15 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/micro/memory_helpers.cc b/tensorflow/lite/micro/memory_helpers.cc index d1e0392a3bc..c6180cb4951 100644 --- a/tensorflow/lite/micro/memory_helpers.cc +++ b/tensorflow/lite/micro/memory_helpers.cc @@ -131,7 +131,7 @@ TfLiteStatus AllocateOutputDimensionsFromInput(TfLiteContext* context, input = input1->dims->size > input2->dims->size ? input1 : input2; TF_LITE_ENSURE(context, output->type == input->type); - size_t size; + size_t size = 0; TfLiteTypeSizeOf(input->type, &size); const int dimensions_count = tflite::GetTensorShape(input).DimensionsCount(); for (int i = 0; i < dimensions_count; i++) { diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc index 76cd617fe99..39358b33b16 100644 --- a/tensorflow/lite/micro/micro_allocator.cc +++ b/tensorflow/lite/micro/micro_allocator.cc @@ -97,7 +97,9 @@ TfLiteStatus CheckOfflinePlannedOffsets(const Model* model, int version = metadata_buffer[0]; int subgraph_idx = metadata_buffer[1]; const int nbr_offline_offsets = metadata_buffer[2]; +#ifndef TF_LITE_STRIP_ERROR_STRINGS int* offline_planner_offsets = (int*)&metadata_buffer[3]; +#endif TF_LITE_REPORT_ERROR(error_reporter, "==== Model metadata info: ====="); TF_LITE_REPORT_ERROR(error_reporter, diff --git a/tensorflow/lite/micro/micro_interpreter.cc b/tensorflow/lite/micro/micro_interpreter.cc index 41efe9ecf51..f9eb263f552 100644 --- a/tensorflow/lite/micro/micro_interpreter.cc +++ b/tensorflow/lite/micro/micro_interpreter.cc @@ -68,11 +68,13 @@ void* ContextHelper::GetScratchBuffer(TfLiteContext* ctx, int buffer_idx) { void ContextHelper::ReportOpError(struct TfLiteContext* context, const char* format, ...) { +#ifndef TF_LITE_STRIP_ERROR_STRINGS ContextHelper* helper = static_cast(context->impl_); va_list args; va_start(args, format); TF_LITE_REPORT_ERROR(helper->error_reporter_, format, args); va_end(args); +#endif } TfLiteTensor* ContextHelper::GetTensor(const struct TfLiteContext* context, diff --git a/tensorflow/lite/micro/micro_optional_debug_tools.cc b/tensorflow/lite/micro/micro_optional_debug_tools.cc index 516def3ebe4..4617b3d9825 100644 --- a/tensorflow/lite/micro/micro_optional_debug_tools.cc +++ b/tensorflow/lite/micro/micro_optional_debug_tools.cc @@ -117,6 +117,7 @@ const char* AllocTypeName(TfLiteAllocationType type) { // Helper function to print model flatbuffer data. This function is not called // by default. Hence it's not linked in to the final binary code. void PrintModelData(const Model* model, ErrorReporter* error_reporter) { +#ifndef TF_LITE_STRIP_ERROR_STRINGS auto* subgraphs = model->subgraphs(); const SubGraph* subgraph = (*subgraphs)[0]; const flatbuffers::Vector>* tensors = @@ -139,6 +140,7 @@ void PrintModelData(const Model* model, ErrorReporter* error_reporter) { error_reporter, "Tensor index: %d arena tensor %d size %d ", i, !array_size && !flatbuffer_tensor.is_variable(), tensor_size); } +#endif } // Prints a dump of what tensors and what nodes are in the interpreter. diff --git a/tensorflow/lite/micro/micro_profiler.cc b/tensorflow/lite/micro/micro_profiler.cc index a765b918108..83fb9f64713 100644 --- a/tensorflow/lite/micro/micro_profiler.cc +++ b/tensorflow/lite/micro/micro_profiler.cc @@ -33,9 +33,10 @@ uint32_t MicroProfiler::BeginEvent(const char* tag, EventType event_type, } void MicroProfiler::EndEvent(uint32_t event_handle) { +#ifndef TF_LITE_STRIP_ERROR_STRINGS int32_t end_time = GetCurrentTimeTicks(); TF_LITE_REPORT_ERROR(reporter_, "%s took %d cycles\n", event_tag_, end_time - start_time_); +#endif } - } // namespace tflite diff --git a/tensorflow/lite/micro/recording_micro_allocator.cc b/tensorflow/lite/micro/recording_micro_allocator.cc index 5e338a339ca..7e11523fea0 100644 --- a/tensorflow/lite/micro/recording_micro_allocator.cc +++ b/tensorflow/lite/micro/recording_micro_allocator.cc @@ -104,6 +104,7 @@ void RecordingMicroAllocator::PrintAllocations() const { void RecordingMicroAllocator::PrintRecordedAllocation( RecordedAllocationType allocation_type, const char* allocation_name, const char* allocation_description) const { +#ifndef TF_LITE_STRIP_ERROR_STRINGS RecordedAllocation allocation = GetRecordedAllocation(allocation_type); TF_LITE_REPORT_ERROR( error_reporter(), @@ -111,6 +112,7 @@ void RecordingMicroAllocator::PrintRecordedAllocation( "(requested %d bytes for %d %s)", allocation_name, allocation.used_bytes, allocation.requested_bytes, allocation.count, allocation_description); +#endif } TfLiteStatus RecordingMicroAllocator::AllocateNodeAndRegistrations( diff --git a/tensorflow/lite/micro/simple_memory_allocator.cc b/tensorflow/lite/micro/simple_memory_allocator.cc index 3abec015fe3..48cfdc02a34 100644 --- a/tensorflow/lite/micro/simple_memory_allocator.cc +++ b/tensorflow/lite/micro/simple_memory_allocator.cc @@ -78,11 +78,13 @@ uint8_t* SimpleMemoryAllocator::AllocateFromTail(size_t size, size_t alignment) { uint8_t* const aligned_result = AlignPointerDown(tail_ - size, alignment); if (aligned_result < head_) { +#ifndef TF_LITE_STRIP_ERROR_STRINGS const size_t missing_memory = head_ - aligned_result; TF_LITE_REPORT_ERROR( error_reporter_, "Failed to allocate memory. Requested: %u, available %u, missing: %u", size, size - missing_memory, missing_memory); +#endif return nullptr; } tail_ = aligned_result; diff --git a/tensorflow/lite/micro/test_helpers.cc b/tensorflow/lite/micro/test_helpers.cc index 2888a846e94..23c7ca96408 100644 --- a/tensorflow/lite/micro/test_helpers.cc +++ b/tensorflow/lite/micro/test_helpers.cc @@ -812,11 +812,13 @@ int TestStrcmp(const char* a, const char* b) { // Wrapper to forward kernel errors to the interpreter's error reporter. void ReportOpError(struct TfLiteContext* context, const char* format, ...) { +#ifndef TF_LITE_STRIP_ERROR_STRINGS ErrorReporter* error_reporter = static_cast(context->impl_); va_list args; va_start(args, format); TF_LITE_REPORT_ERROR(error_reporter, format, args); va_end(args); +#endif } // Create a TfLiteIntArray from an array of ints. The first element in the From 145882959af96308a7eab10b02b3d378ea10b6c1 Mon Sep 17 00:00:00 2001 From: Jonathan Chu Date: Fri, 24 Jul 2020 21:32:16 +0000 Subject: [PATCH 1301/2522] Add CPU test for _benchmark_defun_args_matmul, analogous to existing GPU test --- tensorflow/python/eager/benchmarks_test.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index 629e5fe0721..c6212275917 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -593,6 +593,12 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): m = self._m_2_by_2.cpu() self._benchmark_defun_matmul_with_signature(m, num_iters=self._num_iters_2_by_2) + @test_util.disable_tfrt("Graph is not supported yet. b/156187905") + def benchmark_defun_args_matmul_2_by_2_CPU(self): + with context.device(CPU): + m = self._m_2_by_2.cpu() + self._benchmark_defun_args_matmul(m, num_iters=self._num_iters_2_by_2) + @test_util.disable_tfrt("async not supported") def benchmark_defun_matmul_2_by_2_CPU_async(self): with context.device(CPU): From 2a5e737ada6193de7058cc1c2f78dacbeb75d7fa Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Fri, 24 Jul 2020 14:26:08 -0700 Subject: [PATCH 1302/2522] Only run RegisterTpuPlatform() statically when running on Google platforms PiperOrigin-RevId: 323072106 Change-Id: I9fc2b4921deb84985d8b274ca0393d68e6667d4a --- tensorflow/stream_executor/tpu/tpu_platform_registration.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/stream_executor/tpu/tpu_platform_registration.cc b/tensorflow/stream_executor/tpu/tpu_platform_registration.cc index 6f054f57aa9..f0447cf527c 100644 --- a/tensorflow/stream_executor/tpu/tpu_platform_registration.cc +++ b/tensorflow/stream_executor/tpu/tpu_platform_registration.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/initialize.h" #include "tensorflow/stream_executor/tpu/tpu_platform.h" +#if defined(PLATFORM_GOOGLE) REGISTER_MODULE_INITIALIZER(tpu_platform, tensorflow::RegisterTpuPlatform()); DECLARE_MODULE_INITIALIZER(multi_platform_manager); @@ -26,3 +27,4 @@ DECLARE_MODULE_INITIALIZER(multi_platform_manager_listener); REGISTER_MODULE_INITIALIZER_SEQUENCE(tpu_platform, multi_platform_manager); REGISTER_MODULE_INITIALIZER_SEQUENCE(multi_platform_manager_listener, tpu_platform); +#endif From 0943136a4a0c62e673d7d315c79dd96a7f1e306c Mon Sep 17 00:00:00 2001 From: Peng Wang Date: Fri, 24 Jul 2020 14:40:06 -0700 Subject: [PATCH 1303/2522] Adds back a `DoCopy` in StridedSliceAssignOp that was accidentally deleted. PiperOrigin-RevId: 323074864 Change-Id: Ie46f51353a85aa423e562da7e2f3009238cca07e --- tensorflow/core/framework/op_kernel.h | 16 +++++++++++++--- tensorflow/core/kernels/strided_slice_op.cc | 11 +++++++++-- tensorflow/python/kernel_tests/array_ops_test.py | 14 +++++++++++++- 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h index 4638382fe75..b4302999511 100644 --- a/tensorflow/core/framework/op_kernel.h +++ b/tensorflow/core/framework/op_kernel.h @@ -885,10 +885,14 @@ class OpKernelContext { // Tries to forward one of the inputs given in input_indices to // output[output_index]. If none of the given inputs can be forwarded, calls - // allocate_output() to allocate a new output buffer. + // allocate_output() to allocate a new output buffer. The index of the + // forwarded input will be assign to output argument forwarded_input (if it's + // not nullptr). If no inputs are forwarded, forwarded_input will be assigned + // -1. Status forward_input_or_allocate_output( gtl::ArraySlice candidate_input_indices, int output_index, - const TensorShape& output_shape, Tensor** output) TF_MUST_USE_RESULT; + const TensorShape& output_shape, Tensor** output, + int* forwarded_input = nullptr) TF_MUST_USE_RESULT; Status forward_input_or_allocate_output( gtl::ArraySlice candidate_input_names, StringPiece output_name, const TensorShape& output_shape, @@ -1636,13 +1640,19 @@ inline TensorValue OpKernelContext::release_output(int index) { inline Status OpKernelContext::forward_input_or_allocate_output( gtl::ArraySlice candidate_input_indices, int output_index, - const TensorShape& output_shape, Tensor** output) { + const TensorShape& output_shape, Tensor** output, int* forwarded_input) { for (int input_index : candidate_input_indices) { if (forward_input_to_output_with_shape(input_index, output_index, output_shape, output)) { + if (forwarded_input != nullptr) { + *forwarded_input = input_index; + } return Status::OK(); } } + if (forwarded_input != nullptr) { + *forwarded_input = -1; + } return allocate_output(output_index, output_shape, output); } diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc index fc08fa8ff7d..7d9dfa44129 100644 --- a/tensorflow/core/kernels/strided_slice_op.cc +++ b/tensorflow/core/kernels/strided_slice_op.cc @@ -306,8 +306,15 @@ class StridedSliceAssignOp : public OpKernel { if (isTensor) { const Tensor& input = context->input(0); - OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {0}, 0, input.shape(), &old_lhs)); + int forwarded_input; + OP_REQUIRES_OK(context, + context->forward_input_or_allocate_output( + {0}, 0, input.shape(), &old_lhs, &forwarded_input)); + if (forwarded_input < 0) { + OP_REQUIRES_OK(context, + tensorflow::functor::DoCopy( + context->eigen_device(), input, old_lhs)); + } } else { if (context->input_dtype(0) == DT_RESOURCE) { core::RefCountPtr v; diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 829be7ffe45..0a7e4e5af77 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -1228,13 +1228,25 @@ class SliceAssignTest(test_util.TensorFlowTestCase): sess.run(v[:].assign(too_small_val)) @test_util.run_in_graph_and_eager_modes - def testTensorStridedSliceAssign(self): + def testTensorStridedSliceAssignWithInputForward(self): + """Tests tensor_strided_slice_update with input-forwarding taking effect.""" @def_function.function def assign(x): y = x + 1 return gen_array_ops.tensor_strided_slice_update(y, [0], [1], [1], [0]) self.assertAllEqual([0, 1], self.evaluate(assign(array_ops.zeros([2])))) + @test_util.run_in_graph_and_eager_modes + def testTensorStridedSliceAssignNoInputForward(self): + """Tests tensor_strided_slice_update with no input-forwarding.""" + x = constant_op.constant([0.2, 0.3]) + y = x + 1 + # y's buffer won't be forwarded to z because y and z will be alive at the + # same time later. + z = gen_array_ops.tensor_strided_slice_update(y, [0], [1], [1], [0.4]) + ans = y + z + self.assertAllClose([1.6, 2.6], self.evaluate(ans)) + class ShapeSizeRankTest(test_util.TensorFlowTestCase): From cf95331c57da7ba86a75b9706096c62a7ef08646 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 24 Jul 2020 14:51:50 -0700 Subject: [PATCH 1304/2522] Initial check-in of GPU XLA thunks dialect PiperOrigin-RevId: 323077049 Change-Id: I4a9b32e6772aa342b74954d27edfdf43e4b40f62 --- tensorflow/compiler/mlir/runlit.cfg.py | 3 +- tensorflow/compiler/xla/service/gpu/BUILD | 47 +++++++++++++++ .../service/gpu/ir/dialect_registration.cc | 20 +++++++ .../xla/service/gpu/ir/xla_thunks_ops.cc | 42 ++++++++++++++ .../xla/service/gpu/ir/xla_thunks_ops.h | 42 ++++++++++++++ .../xla/service/gpu/ir/xla_thunks_ops.td | 57 +++++++++++++++++++ .../compiler/xla/service/gpu/tests/BUILD | 17 +++++- .../gpu/tests/execute_memzero_thunk.mlir | 15 +++++ 8 files changed, 240 insertions(+), 3 deletions(-) create mode 100644 tensorflow/compiler/xla/service/gpu/ir/dialect_registration.cc create mode 100644 tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.cc create mode 100644 tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.h create mode 100644 tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.td create mode 100644 tensorflow/compiler/xla/service/gpu/tests/execute_memzero_thunk.mlir diff --git a/tensorflow/compiler/mlir/runlit.cfg.py b/tensorflow/compiler/mlir/runlit.cfg.py index e3158f21cb2..718b6c3045a 100644 --- a/tensorflow/compiler/mlir/runlit.cfg.py +++ b/tensorflow/compiler/mlir/runlit.cfg.py @@ -73,7 +73,8 @@ tool_names = [ 'mlir-opt', 'mlir-hlo-opt', 'mlir-translate', 'tf-opt', 'tf_tfl_translate', 'tf_tfjs_translate', 'flatbuffer_to_string', 'flatbuffer_translate', 'tf-mlir-translate', 'mlir-tflite-runner', 'tfcompile', - 'json_to_flatbuffer', 'xla-gpu-opt', 'xla-opt', 'hlo_to_llvm_ir' + 'json_to_flatbuffer', 'xla-gpu-opt', 'xla-opt', 'hlo_to_llvm_ir', + 'xla-thunks-opt' ] tools = [ToolSubst(s, unresolved='ignore') for s in tool_names] llvm_config.add_tool_substitutions(tools, tool_dirs) diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index c075b39c08e..8dfd73e9a6a 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -27,6 +27,7 @@ load( "if_cuda_is_configured", ) load("//tensorflow:tensorflow.bzl", "if_nccl") +load("//third_party/mlir:tblgen.bzl", "gentbl") package( default_visibility = [":friends"], @@ -1875,3 +1876,49 @@ cc_library( "@com_google_absl//absl/types:span", ], ) + +gentbl( + name = "xla_thunks_ops_inc_gen", + tbl_outs = [ + ("-gen-op-decls", "ir/xla_thunks_ops.h.inc"), + ("-gen-op-defs", "ir/xla_thunks_ops.cc.inc"), + ("-gen-struct-attr-decls", "ir/xla_thunks_structs.h.inc"), + ("-gen-struct-attr-defs", "ir/xla_thunks_structs.cc.inc"), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "ir/xla_thunks_ops.td", + td_srcs = [ + "@llvm-project//mlir:LLVMOpsTdFiles", + ], +) + +cc_library( + name = "xla_thunks_ops", + srcs = [ + "ir/xla_thunks_ops.cc", + "ir/xla_thunks_ops.cc.inc", + "ir/xla_thunks_ops.h.inc", + ], + hdrs = [ + "ir/xla_thunks_ops.h", + ], + deps = [ + ":xla_thunks_ops_inc_gen", + "//tensorflow/compiler/mlir/hlo", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:LLVMDialect", + ], +) + +# Library with XLA thunks dialect static initialization. +cc_library( + name = "xla_thunks_dialect_registration", + srcs = [ + "ir/dialect_registration.cc", + ], + deps = [ + ":xla_thunks_ops", + "@llvm-project//mlir:IR", + ], + alwayslink = 1, +) diff --git a/tensorflow/compiler/xla/service/gpu/ir/dialect_registration.cc b/tensorflow/compiler/xla/service/gpu/ir/dialect_registration.cc new file mode 100644 index 00000000000..2e3461951d8 --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/ir/dialect_registration.cc @@ -0,0 +1,20 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.h" + +// Static initialization for GPU thunks op registration. +static mlir::DialectRegistration + xla_thunks_ops; diff --git a/tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.cc b/tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.cc new file mode 100644 index 00000000000..4dbd3196ae6 --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.cc @@ -0,0 +1,42 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This file defines the operations used in the Thunk dialect. + +#include "tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.h" + +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/DialectImplementation.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" + +namespace mlir { +#include "tensorflow/compiler/xla/service/gpu/ir/xla_thunks_structs.cc.inc" +namespace xla_thunks { + +XLAThunksDialect::XLAThunksDialect(MLIRContext *context) + : Dialect(getDialectNamespace(), context) { + addOperations< +#define GET_OP_LIST +#include "tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.cc.inc" + >(); +} + +#define GET_OP_CLASSES +#include "tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.cc.inc" + +} // namespace xla_thunks +} // namespace mlir diff --git a/tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.h b/tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.h new file mode 100644 index 00000000000..ede9adb9ab1 --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.h @@ -0,0 +1,42 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_IR_XLA_THUNKS_OPS_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_IR_XLA_THUNKS_OPS_H_ + +#include "mlir/IR/Dialect.h" // from @llvm-project +#include "mlir/IR/OpDefinition.h" // from @llvm-project +#include "mlir/IR/OpImplementation.h" // from @llvm-project + +namespace mlir { +class OpBuilder; + +#include "tensorflow/compiler/xla/service/gpu/ir/xla_thunks_structs.h.inc" + +namespace xla_thunks { + +class XLAThunksDialect : public Dialect { + public: + explicit XLAThunksDialect(MLIRContext *context); + static StringRef getDialectNamespace() { return "xla_thunks"; } +}; + +#define GET_OP_CLASSES +#include "tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.h.inc" + +} // namespace xla_thunks +} // namespace mlir + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_IR_XLA_THUNKS_OPS_H_ diff --git a/tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.td b/tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.td new file mode 100644 index 00000000000..38602550864 --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.td @@ -0,0 +1,57 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Operation definition file for GPU thunks. + +#ifndef XLA_THUNKS_OPS +#define XLA_THUNKS_OPS + +include "mlir/Dialect/LLVMIR/LLVMOpBase.td" +include "mlir/IR/OpBase.td" + +class LLVMPointerTo + : ContainerType().isPointerTy()">, + "$_self.cast<::mlir::LLVM::LLVMType>().getPointerElementTy()", + "LLVM pointer">; + +def XLAThunks_Dialect : Dialect { + let name = "xla_thunks"; + let cppNamespace = "xla_thunks"; +} + +class ThunkOp traits = []> : + Op; + +def AllocationSlice : StructAttr<"AllocationSlice", XLAThunks_Dialect, [ + StructFieldAttr<"allocation_index", I64Attr>, + StructFieldAttr<"offset", I64Attr>, + StructFieldAttr<"size", I64Attr>, + ]> { + let description = "Defines a slice of an allocation for XLA thunk ops"; +} + +def MemzeroThunkOp : ThunkOp<"execute_memzero_thunk"> { + let arguments = (ins + LLVMPointerTo>:$execute_params, + AllocationSlice:$allocation_slice + ); + let results = (outs + I<1>:$ok, + LLVMPointerTo>:$error_message + ); +} + +#endif // XLA_THUNKS_OPS diff --git a/tensorflow/compiler/xla/service/gpu/tests/BUILD b/tensorflow/compiler/xla/service/gpu/tests/BUILD index a23c14017a4..a2bddd2d0d7 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/BUILD +++ b/tensorflow/compiler/xla/service/gpu/tests/BUILD @@ -479,7 +479,10 @@ glob_lit_tests( "no_pip", ], driver = "@llvm-project//mlir:run_lit.sh", - test_file_exts = ["hlo"], + test_file_exts = [ + "hlo", + "mlir", + ], ) # Bundle together all of the test utilities that are used by tests. @@ -487,7 +490,17 @@ filegroup( name = "test_utilities", testonly = True, data = [ - "//tensorflow/compiler/xla/service/gpu/tests:hlo_to_llvm_ir", + ":hlo_to_llvm_ir", + ":xla-thunks-opt", "@llvm-project//llvm:FileCheck", ], ) + +# Binary with only the thunks dialect registered, for testing purposes. +tf_cc_binary( + name = "xla-thunks-opt", + deps = [ + "//tensorflow/compiler/mlir:tf_mlir_opt_main", + "//tensorflow/compiler/xla/service/gpu:xla_thunks_dialect_registration", + ], +) diff --git a/tensorflow/compiler/xla/service/gpu/tests/execute_memzero_thunk.mlir b/tensorflow/compiler/xla/service/gpu/tests/execute_memzero_thunk.mlir new file mode 100644 index 00000000000..0a891833cd3 --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/tests/execute_memzero_thunk.mlir @@ -0,0 +1,15 @@ +// RUN: xla-thunks-opt %s | FileCheck --color --dump-input=fail %s + +func @main( %execute_params: !llvm<"i8*"> ) { + // CHECK: "xla_thunks.execute_memzero_thunk" + // CHECK-SAME: {allocation_index = 0 : i64, offset = 128 : i64, size = 1024 : i64} + // CHECK-SAME: (!llvm<"i8*">) -> (i1, !llvm<"i8*">) + %ok, %error_message = + "xla_thunks.execute_memzero_thunk"( %execute_params ) + { allocation_slice = { allocation_index = 0 + , offset = 128 + , size = 1024 } } + : (!llvm<"i8*">) -> (i1, !llvm<"i8*">) + return +} + From 35286817caa2777de130d23c8ea57da0e8475eb5 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 24 Jul 2020 15:05:16 -0700 Subject: [PATCH 1305/2522] Migrate Windows CPU official release and nightly testing builds to use the new bazel configs. PiperOrigin-RevId: 323079826 Change-Id: I7f16f0e6a543a150fd2d991606ba3cfb5cf4bf1c --- .../tools/ci_build/release/windows/cpu_py35_full/nightly.bat | 2 +- .../tools/ci_build/release/windows/cpu_py35_full/release.bat | 2 +- .../tools/ci_build/release/windows/cpu_py36_full/nightly.bat | 2 +- .../tools/ci_build/release/windows/cpu_py36_full/release.bat | 2 +- .../tools/ci_build/release/windows/cpu_py37_full/nightly.bat | 2 +- .../tools/ci_build/release/windows/cpu_py37_full/release.bat | 2 +- .../tools/ci_build/release/windows/cpu_py38_full/nightly.bat | 2 +- .../tools/ci_build/release/windows/cpu_py38_full/release.bat | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py35_full/nightly.bat b/tensorflow/tools/ci_build/release/windows/cpu_py35_full/nightly.bat index 131eca130bd..979a30e046c 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py35_full/nightly.bat +++ b/tensorflow/tools/ci_build/release/windows/cpu_py35_full/nightly.bat @@ -19,4 +19,4 @@ SET PYTHON_DIRECTORY=Python35 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --extra_test_flags "--test_env=TF2_BEHAVIOR=1" diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py35_full/release.bat b/tensorflow/tools/ci_build/release/windows/cpu_py35_full/release.bat index 02b12c7650a..175917d7cad 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py35_full/release.bat +++ b/tensorflow/tools/ci_build/release/windows/cpu_py35_full/release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python35 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2 --define=no_tensorflow_py_deps=true" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py36_full/nightly.bat b/tensorflow/tools/ci_build/release/windows/cpu_py36_full/nightly.bat index ee77daa52b4..fd1854603f5 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py36_full/nightly.bat +++ b/tensorflow/tools/ci_build/release/windows/cpu_py36_full/nightly.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python36 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --extra_test_flags "--test_env=TF2_BEHAVIOR=1" diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py36_full/release.bat b/tensorflow/tools/ci_build/release/windows/cpu_py36_full/release.bat index e44e6ca6e18..85b75053eff 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py36_full/release.bat +++ b/tensorflow/tools/ci_build/release/windows/cpu_py36_full/release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python36 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2 --define=no_tensorflow_py_deps=true" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py37_full/nightly.bat b/tensorflow/tools/ci_build/release/windows/cpu_py37_full/nightly.bat index 68d36f68112..69b9449b0c3 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py37_full/nightly.bat +++ b/tensorflow/tools/ci_build/release/windows/cpu_py37_full/nightly.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python37 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --extra_test_flags "--test_env=TF2_BEHAVIOR=1" diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py37_full/release.bat b/tensorflow/tools/ci_build/release/windows/cpu_py37_full/release.bat index c65167a5dc6..d8a6673ba4c 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py37_full/release.bat +++ b/tensorflow/tools/ci_build/release/windows/cpu_py37_full/release.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python37 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2 --define=no_tensorflow_py_deps=true" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py38_full/nightly.bat b/tensorflow/tools/ci_build/release/windows/cpu_py38_full/nightly.bat index 65692431469..0d5b3a7fff8 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py38_full/nightly.bat +++ b/tensorflow/tools/ci_build/release/windows/cpu_py38_full/nightly.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python38 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --extra_test_flags "--test_env=TF2_BEHAVIOR=1" diff --git a/tensorflow/tools/ci_build/release/windows/cpu_py38_full/release.bat b/tensorflow/tools/ci_build/release/windows/cpu_py38_full/release.bat index 06599fc0d8c..86adcda0bb9 100644 --- a/tensorflow/tools/ci_build/release/windows/cpu_py38_full/release.bat +++ b/tensorflow/tools/ci_build/release/windows/cpu_py38_full/release.bat @@ -17,5 +17,5 @@ SET PYTHON_DIRECTORY=Python38 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2 --define=no_tensorflow_py_deps=true" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" From 8b624486c4c7a236adb6ccccfeeef7ed4c8d728f Mon Sep 17 00:00:00 2001 From: Robert Suderman Date: Fri, 24 Jul 2020 15:17:48 -0700 Subject: [PATCH 1306/2522] Modified HLOAbsOp lowering for differing types. PiperOrigin-RevId: 323082107 Change-Id: I0934678aa1423103d7029d0dfd2c8585e62d0f45 --- .../lib/Dialect/mhlo/transforms/lower_complex_patterns.td | 4 +--- tensorflow/compiler/mlir/hlo/tests/lower-complex.mlir | 5 ++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lower_complex_patterns.td b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lower_complex_patterns.td index 0b72ccaa823..eadfebf2fac 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lower_complex_patterns.td +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lower_complex_patterns.td @@ -89,12 +89,10 @@ def : Pat<(HLO_DivOp HLO_ComplexTensor:$lhs, HLO_IntOrFpTensor:$rhs), // Absolute value is evaluated as: // result = sqrt(val.real * val.real + val.imag * val.imag) def : Pat<(HLO_AbsOp HLO_ComplexTensor:$val), - (HLO_ComplexOp (HLO_SqrtOp (HLO_AddOp (HLO_MulOp (HLO_RealOp:$real $val), $real), - (HLO_MulOp (HLO_ImagOp:$imag $val), $imag))), - (HLO_ConstOp (ConstantSplat<"0"> $real)))>; + (HLO_MulOp (HLO_ImagOp:$imag $val), $imag)))>; // Exponential can be lowered to an exponential on the real component and a // sum of sinusoids of the imaginary component, which equates to a normal diff --git a/tensorflow/compiler/mlir/hlo/tests/lower-complex.mlir b/tensorflow/compiler/mlir/hlo/tests/lower-complex.mlir index 8d84e7140f3..a7bd21257a6 100644 --- a/tensorflow/compiler/mlir/hlo/tests/lower-complex.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/lower-complex.mlir @@ -182,11 +182,10 @@ func @abs(%arg0 : tensor<2xf32>, %arg1 : tensor<2xf32>) -> (tensor<2xf32>) { // CHECK-DAG: [[VAL1:%.+]] = mhlo.multiply %arg1, %arg1 // CHECK-DAG: [[VAL2:%.+]] = mhlo.add [[VAL0]], [[VAL1]] // CHECK-DAG: [[VAL3:%.+]] = "mhlo.sqrt"([[VAL2]]) - %1 = "mhlo.abs"(%0) : (tensor<2xcomplex>) -> (tensor<2xcomplex>) - %2 = "mhlo.real"(%1) : (tensor<2xcomplex>) -> (tensor<2xf32>) + %1 = "mhlo.abs"(%0) : (tensor<2xcomplex>) -> (tensor<2xf32>) // CHECK: return [[VAL3]] - return %2 : tensor<2xf32> + return %1 : tensor<2xf32> } // CHECK-LABEL: @exp From 6a68d88188bdb4894d64db95047d4c4905df134e Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Fri, 24 Jul 2020 15:43:27 -0700 Subject: [PATCH 1307/2522] Add release build to the kokoro continuous integration. PiperOrigin-RevId: 323086853 Change-Id: I95d8533605998afce3bcb0be7429e274c3c7a57b --- tensorflow/lite/micro/micro_error_reporter_test.cc | 2 ++ tensorflow/lite/micro/micro_interpreter.cc | 2 ++ tensorflow/lite/micro/tools/ci_build/test_all.sh | 6 +++--- tensorflow/lite/micro/tools/ci_build/test_stm32f4.sh | 7 ++++++- tensorflow/lite/micro/tools/ci_build/test_x86.sh | 7 +++++++ 5 files changed, 20 insertions(+), 4 deletions(-) diff --git a/tensorflow/lite/micro/micro_error_reporter_test.cc b/tensorflow/lite/micro/micro_error_reporter_test.cc index a23fc286fc5..b67a71628ed 100644 --- a/tensorflow/lite/micro/micro_error_reporter_test.cc +++ b/tensorflow/lite/micro/micro_error_reporter_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/lite/micro/micro_error_reporter.h" int main(int argc, char** argv) { +#ifndef TF_LITE_STRIP_ERROR_STRINGS tflite::MicroErrorReporter micro_error_reporter; tflite::ErrorReporter* error_reporter = µ_error_reporter; TF_LITE_REPORT_ERROR(error_reporter, "Number: %d", 42); @@ -23,4 +24,5 @@ int main(int argc, char** argv) { TF_LITE_REPORT_ERROR(error_reporter, "Another % badly-formed %% format string"); TF_LITE_REPORT_ERROR(error_reporter, "~~~%s~~~", "ALL TESTS PASSED"); +#endif // !defined(TF_LITE_STRIP_ERROR_STRINGS) } diff --git a/tensorflow/lite/micro/micro_interpreter.cc b/tensorflow/lite/micro/micro_interpreter.cc index f9eb263f552..8c2f8e031d8 100644 --- a/tensorflow/lite/micro/micro_interpreter.cc +++ b/tensorflow/lite/micro/micro_interpreter.cc @@ -31,6 +31,7 @@ limitations under the License. namespace tflite { namespace { +#ifndef TF_LITE_STRIP_ERROR_STRINGS const char* OpNameFromRegistration(const TfLiteRegistration* registration) { if (registration->builtin_code == BuiltinOperator_CUSTOM) { return registration->custom_name; @@ -38,6 +39,7 @@ const char* OpNameFromRegistration(const TfLiteRegistration* registration) { return EnumNameBuiltinOperator(BuiltinOperator(registration->builtin_code)); } } +#endif // !defined(TF_LITE_STRIP_ERROR_STRINGS) } // namespace diff --git a/tensorflow/lite/micro/tools/ci_build/test_all.sh b/tensorflow/lite/micro/tools/ci_build/test_all.sh index 403acb28a5f..345bf33e216 100755 --- a/tensorflow/lite/micro/tools/ci_build/test_all.sh +++ b/tensorflow/lite/micro/tools/ci_build/test_all.sh @@ -34,6 +34,9 @@ make -f tensorflow/lite/micro/tools/make/Makefile \ echo "Starting to run micro tests at `date`" +echo "Running x86 tests at `date`" +tensorflow/lite/micro/tools/ci_build/test_x86.sh + echo "Running bluepill tests at `date`" tensorflow/lite/micro/tools/ci_build/test_bluepill.sh @@ -43,9 +46,6 @@ tensorflow/lite/micro/tools/ci_build/test_mbed.sh PRESUBMIT echo "Running Sparkfun tests at `date`" tensorflow/lite/micro/tools/ci_build/test_sparkfun.sh -echo "Running x86 tests at `date`" -tensorflow/lite/micro/tools/ci_build/test_x86.sh - echo "Running stm32f4 tests at `date`" tensorflow/lite/micro/tools/ci_build/test_stm32f4.sh diff --git a/tensorflow/lite/micro/tools/ci_build/test_stm32f4.sh b/tensorflow/lite/micro/tools/ci_build/test_stm32f4.sh index be706a3a6bd..2ef1bb1f97f 100755 --- a/tensorflow/lite/micro/tools/ci_build/test_stm32f4.sh +++ b/tensorflow/lite/micro/tools/ci_build/test_stm32f4.sh @@ -32,7 +32,12 @@ readable_run make -f tensorflow/lite/micro/tools/make/Makefile clean # TODO(b/143715361): downloading first to allow for parallel builds. readable_run make -f tensorflow/lite/micro/tools/make/Makefile TAGS=${TAGS} TARGET=${TARGET} third_party_downloads -# Build test binaries first +# First make sure that the release build succeeds. +readable_run make -j8 -f tensorflow/lite/micro/tools/make/Makefile BUILD_TYPE=release TAGS=${TAGS} TARGET=${TARGET} build + +# Next, build w/o release so that we can run the tests and get additional +# debugging info on failures. +readable_run make -f tensorflow/lite/micro/tools/make/Makefile clean readable_run make -j8 -f tensorflow/lite/micro/tools/make/Makefile TAGS=${TAGS} TARGET=${TARGET} build # TODO(b/149597202): Disabled until we can get Docker running inside Docker. diff --git a/tensorflow/lite/micro/tools/ci_build/test_x86.sh b/tensorflow/lite/micro/tools/ci_build/test_x86.sh index c150d828164..49e20b4f84d 100755 --- a/tensorflow/lite/micro/tools/ci_build/test_x86.sh +++ b/tensorflow/lite/micro/tools/ci_build/test_x86.sh @@ -28,4 +28,11 @@ readable_run make -f tensorflow/lite/micro/tools/make/Makefile clean # TODO(b/143715361): downloading first to allow for parallel builds. readable_run make -f tensorflow/lite/micro/tools/make/Makefile third_party_downloads + +# First make sure that the release build succeeds. +readable_run make -j8 -f tensorflow/lite/micro/tools/make/Makefile BUILD_TYPE=release build + +# Next, build w/o release so that we can run the tests and get additional +# debugging info on failures. +readable_run make -f tensorflow/lite/micro/tools/make/Makefile clean readable_run make -s -j8 -f tensorflow/lite/micro/tools/make/Makefile test From b3a7a814f1567776c501a76191b456423d6f835c Mon Sep 17 00:00:00 2001 From: Jonathan Chu Date: Fri, 24 Jul 2020 22:49:35 +0000 Subject: [PATCH 1308/2522] Small fixes --- tensorflow/python/eager/benchmarks_test.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index c6212275917..9513644f0a4 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -478,12 +478,12 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): def _benchmark_defun_matmul_with_signature(self, m, num_iters, execution_mode=None): - def defun_matmul(m): + def func_matmul(m): return math_ops.matmul(m, m) f = function.defun( - defun_matmul, input_signature=[tensor_spec.TensorSpec([2, 2], dtypes.float32)]) + func_matmul, input_signature=[tensor_spec.TensorSpec([2, 2], dtypes.float32)]) - func = lambda: defun_matmul(m) + func = lambda: f(m) self._run(func, num_iters, execution_mode=execution_mode) def _benchmark_defun_args_matmul(self, m, num_iters, execution_mode=None): @@ -587,7 +587,6 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._benchmark_defun_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("With signature") def benchmark_defun_matmul_2_by_2_CPU_with_signature(self): with context.device(CPU): m = self._m_2_by_2.cpu() @@ -674,7 +673,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._benchmark_defun_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) - @test_util.disable_tfrt("With signature") + @test_util.disable_tfrt("copy to GPU not supported") def benchmark_defun_matmul_2_by_2_GPU_with_signature(self): if not context.num_gpus(): return From cae793828ba4c122c595c0a2ce95000591650771 Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Fri, 24 Jul 2020 15:45:37 -0700 Subject: [PATCH 1309/2522] Update `run_deprecated_v1` tests with graph scope. PiperOrigin-RevId: 323087204 Change-Id: I18114e1bd4ea30f79e858dc209aac7ead1316693 --- tensorflow/python/training/input_test.py | 924 ++++++++++++----------- 1 file changed, 473 insertions(+), 451 deletions(-) diff --git a/tensorflow/python/training/input_test.py b/tensorflow/python/training/input_test.py index 5d01d1c73de..421f0f3534e 100644 --- a/tensorflow/python/training/input_test.py +++ b/tensorflow/python/training/input_test.py @@ -27,6 +27,7 @@ from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl +from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops @@ -40,9 +41,10 @@ from tensorflow.python.training import queue_runner_impl from tensorflow.python.util import compat +# Queue-based input pipelines are not supported when eager execution is enabled. +# Please use tf.data instead in TF2. class MatchFilenamesOnceTest(test_lib.TestCase): - @test_util.run_deprecated_v1 def test(self): temp_dir = self.get_temp_dir() filenames = [os.path.join(temp_dir, n) for n in os.listdir(temp_dir)] @@ -53,7 +55,7 @@ class MatchFilenamesOnceTest(test_lib.TestCase): for name in additional: open(name, "w").write("Some contents") filenames = list(set(filenames + additional)) - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): star = inp.match_filenames_once(os.path.join(self.get_temp_dir(), "*")) question = inp.match_filenames_once( os.path.join(self.get_temp_dir(), "match_filenames.?")) @@ -70,18 +72,16 @@ class MatchFilenamesOnceTest(test_lib.TestCase): class LimitEpochsTest(test_lib.TestCase): - @test_util.run_deprecated_v1 def testNoLimit(self): - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): seven = constant_op.constant(7) seven_forever = inp.limit_epochs(seven) variables.local_variables_initializer().run() for _ in range(100): self.assertEqual(7, self.evaluate(seven_forever)) - @test_util.run_deprecated_v1 def testLimit(self): - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): love_me = constant_op.constant("Love Me") love_me_two_times = inp.limit_epochs(love_me, num_epochs=2) self.evaluate(variables.global_variables_initializer()) @@ -94,9 +94,8 @@ class LimitEpochsTest(test_lib.TestCase): class InputProducerTest(test_lib.TestCase): - @test_util.run_deprecated_v1 def testNoShuffle(self): - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): input_tensor = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]] @@ -119,9 +118,8 @@ class InputProducerTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testNoShapeInference(self): - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): # Disable shape inference for the input. input_value = [[1, 2, 3, 4], [5, 6, 7, 8], @@ -145,18 +143,17 @@ class InputProducerTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testShapeError(self): - input_tensor = array_ops.placeholder(dtypes.float32, None) - with self.assertRaisesRegex(ValueError, "fully defined shape"): - _ = inp.input_producer(input_tensor) + with ops.Graph().as_default(): + input_tensor = array_ops.placeholder(dtypes.float32, None) + with self.assertRaisesRegex(ValueError, "fully defined shape"): + _ = inp.input_producer(input_tensor) class StringInputProducerTest(test_lib.TestCase): - @test_util.run_deprecated_v1 def testNoShuffle(self): - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): strings = [b"to", b"be", b"or", b"not", b"to", b"be"] num_epochs = 3 queue = inp.string_input_producer( @@ -177,9 +174,8 @@ class StringInputProducerTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testShuffle(self): - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): strings = [b"a", b"b", b"c"] num_epochs = 600 queue = inp.string_input_producer( @@ -223,12 +219,11 @@ class StringInputProducerTest(test_lib.TestCase): with self.assertRaises(ValueError): _ = inp.string_input_producer([]) - @test_util.run_deprecated_v1 def testNullString(self): # Runtime check for empty string list. This is slightly oblique: # The queue runner should die with an assertion error on the null # input tensor, causing the dequeue to fail with an OutOfRangeError. - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): coord = coordinator.Coordinator() queue = inp.string_input_producer( constant_op.constant( @@ -243,18 +238,16 @@ class StringInputProducerTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testSharedName(self): - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): strings = [b"to", b"be", b"or", b"not", b"to", b"be"] queue = inp.string_input_producer( strings, shared_name="SHARED_NAME_XYZ", name="Q") self.assertProtoEquals("s: 'SHARED_NAME_XYZ'", queue.queue_ref.op.node_def.attr["shared_name"]) - @test_util.run_deprecated_v1 def testConstructionRace(self): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session() as sess: strings = [b"to", b"be", b"or", b"not", b"to", b"be"] queue = inp.string_input_producer(strings, shuffle=False) coord = coordinator.Coordinator() @@ -275,9 +268,8 @@ class StringInputProducerTest(test_lib.TestCase): class RangeInputProducerTest(test_lib.TestCase): - @test_util.run_deprecated_v1 def testNoShuffle(self): - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): num_epochs = 3 range_size = 5 queue = inp.range_input_producer( @@ -298,9 +290,8 @@ class RangeInputProducerTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testShuffle(self): - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): num_epochs = 200 range_size = 2 queue = inp.range_input_producer( @@ -338,9 +329,8 @@ class RangeInputProducerTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testSharedName(self): - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): range_size = 5 queue = inp.range_input_producer( range_size, shared_name="SHARED_NAME_XYZ", name="Q") @@ -350,9 +340,8 @@ class RangeInputProducerTest(test_lib.TestCase): class SliceInputProducerTest(test_lib.TestCase): - @test_util.run_deprecated_v1 def testNoShuffle(self): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): num_epochs = 3 source_strings = [b"Alpha", b"Beta", b"Delta", b"Gamma"] source_ints = [2, 3, 5, 7] @@ -375,9 +364,8 @@ class SliceInputProducerTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testShuffle(self): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): num_epochs = 1200 source_strings = ["A", "B", "D", "G"] source_ints = [7, 3, 5, 2] @@ -420,9 +408,8 @@ class SliceInputProducerTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testSharedName(self): - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): source_strings = ["A", "B", "D", "G"] source_ints = [7, 3, 5, 2] slices = inp.slice_input_producer( @@ -462,7 +449,7 @@ class DictHelperTest(test_lib.TestCase): class BatchTest(test_lib.TestCase): def _testOneThreadHelper(self, use_dict): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): batch_size = 10 num_batches = 3 zero64 = constant_op.constant(0, dtype=dtypes.int64) @@ -513,41 +500,38 @@ class BatchTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testOneThread(self): self._testOneThreadHelper(use_dict=False) - @test_util.run_deprecated_v1 def testOneThreadDict(self): self._testOneThreadHelper(use_dict=True) - @test_util.run_deprecated_v1 def testUint32DataTypes(self): - values = constant_op.constant([0, 1, 2, 3, 4, 5], dtype=dtypes.uint32) - batched = inp.batch([values], batch_size=2) - with self.cached_session() as sess: - coord = coordinator.Coordinator() - threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord) - self.evaluate(batched) - coord.request_stop() - for thread in threads: - thread.join() + with ops.Graph().as_default(): + values = constant_op.constant([0, 1, 2, 3, 4, 5], dtype=dtypes.uint32) + batched = inp.batch([values], batch_size=2) + with self.cached_session() as sess: + coord = coordinator.Coordinator() + threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord) + self.evaluate(batched) + coord.request_stop() + for thread in threads: + thread.join() - @test_util.run_deprecated_v1 def testUint64DataTypes(self): - values = constant_op.constant([0, 1, 2, 3, 4, 5], dtype=dtypes.uint64) - batched = inp.batch([values], batch_size=2) - with self.cached_session() as sess: - coord = coordinator.Coordinator() - threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord) - self.evaluate(batched) - coord.request_stop() - for thread in threads: - thread.join() + with ops.Graph().as_default(): + values = constant_op.constant([0, 1, 2, 3, 4, 5], dtype=dtypes.uint64) + batched = inp.batch([values], batch_size=2) + with self.cached_session() as sess: + coord = coordinator.Coordinator() + threads = queue_runner_impl.start_queue_runners(sess=sess, coord=coord) + self.evaluate(batched) + coord.request_stop() + for thread in threads: + thread.join() - @test_util.run_deprecated_v1 def testOneThreadDynamicPad(self): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): batch_size = 10 num_batches = 3 zero64 = constant_op.constant(0, dtype=dtypes.int64) @@ -577,9 +561,8 @@ class BatchTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testOneThreadEnqueueMany(self): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): batch_size = 10 num_batches = 3 zero64 = constant_op.constant(0, dtype=dtypes.int64) @@ -613,9 +596,8 @@ class BatchTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testManyThreads(self): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): batch_size = 10 num_batches = 3 zero64 = constant_op.constant(0, dtype=dtypes.int64) @@ -654,9 +636,8 @@ class BatchTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testOneThreadSmallerBatch(self): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): batch_size = 10 num_batches = 3 extra_elements = 5 @@ -712,9 +693,8 @@ class BatchTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testManyThreadsSmallerBatch(self): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): batch_size = 10 num_batches = 3 extra_elements = 5 @@ -768,9 +748,8 @@ class BatchTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testSharedName(self): - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): batch_size = 10 num_batches = 3 zero64 = constant_op.constant(0, dtype=dtypes.int64) @@ -786,57 +765,56 @@ class BatchTest(test_lib.TestCase): "s: 'SHARED_NAME_XYZ'", batched[0].op.inputs[0].op.node_def.attr["shared_name"]) - @test_util.run_deprecated_v1 def testCannotInferRankError(self): - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): x = array_ops.placeholder(dtype=dtypes.int64) with self.assertRaisesRegex(ValueError, "Cannot infer Tensor's rank"): inp.batch([x], batch_size=2) - @test_util.run_deprecated_v1 def testBatchedSparseTensorInferredShape(self): - sparse = sparse_tensor.SparseTensor( - indices=[[0]], values=[1.0], dense_shape=[1]) - self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) - batched = inp.batch([sparse], batch_size=2) - self.assertAllEqual((2,), batched.dense_shape.get_shape().as_list()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=[[0]], values=[1.0], dense_shape=[1]) + self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) + batched = inp.batch([sparse], batch_size=2) + self.assertAllEqual((2,), batched.dense_shape.get_shape().as_list()) - @test_util.run_deprecated_v1 def testBatchedSparseTensorInferredShapeEnqueueMany(self): - sparse = sparse_tensor.SparseTensor( - indices=[[0]], values=[1.0], dense_shape=[1]) - self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) - batched = inp.batch([sparse], batch_size=2, enqueue_many=True) - self.assertAllEqual((1,), batched.dense_shape.get_shape().as_list()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=[[0]], values=[1.0], dense_shape=[1]) + self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) + batched = inp.batch([sparse], batch_size=2, enqueue_many=True) + self.assertAllEqual((1,), batched.dense_shape.get_shape().as_list()) - @test_util.run_deprecated_v1 def testBatchedSparseTensorInferredShapeUnknownRank(self): - sparse = sparse_tensor.SparseTensor( - indices=array_ops.placeholder(dtypes.int64), - values=array_ops.placeholder(dtypes.float32), - dense_shape=array_ops.placeholder(dtypes.int64)) - self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) - batched = inp.batch([sparse], batch_size=2) - self.assertIs(None, batched.dense_shape.get_shape().num_elements()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=array_ops.placeholder(dtypes.int64), + values=array_ops.placeholder(dtypes.float32), + dense_shape=array_ops.placeholder(dtypes.int64)) + self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) + batched = inp.batch([sparse], batch_size=2) + self.assertIs(None, batched.dense_shape.get_shape().num_elements()) - @test_util.run_deprecated_v1 def testBatchedSparseTensorInferredShapeUnknownRankEnqueueMany(self): - sparse = sparse_tensor.SparseTensor( - indices=array_ops.placeholder(dtypes.int64), - values=array_ops.placeholder(dtypes.float32), - dense_shape=array_ops.placeholder(dtypes.int64)) - self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) - batched = inp.batch([sparse], batch_size=2, enqueue_many=True) - self.assertIs(None, batched.dense_shape.get_shape().num_elements()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=array_ops.placeholder(dtypes.int64), + values=array_ops.placeholder(dtypes.float32), + dense_shape=array_ops.placeholder(dtypes.int64)) + self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) + batched = inp.batch([sparse], batch_size=2, enqueue_many=True) + self.assertIs(None, batched.dense_shape.get_shape().num_elements()) - @test_util.run_deprecated_v1 def testSingleElementDict(self): - x = inp.batch({"c": [12, 12]}, batch_size=8) - self.assertAllEqual((8, 2), x["c"].get_shape().as_list()) + with ops.Graph().as_default(): + x = inp.batch({"c": [12, 12]}, batch_size=8) + self.assertAllEqual((8, 2), x["c"].get_shape().as_list()) def _testKeepInputHelper(self, num_threads, enqueue_many, keep_input_vector=False): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): batch_size = 5 num_batches = 4 examples = variables.Variable(0) @@ -897,123 +875,132 @@ class BatchTest(test_lib.TestCase): def testMultipleThreadKeepInputEnqueueMany(self): self._testKeepInputHelper(5, True) - @test_util.run_deprecated_v1 def testMaybeEnqueuePerExample(self): self._testKeepInputHelper(1, True, keep_input_vector=True) - @test_util.run_deprecated_v1 def testMultipleThreadMaybeEnqueuePerExample(self): self._testKeepInputHelper(5, True, keep_input_vector=True) - @test_util.run_deprecated_v1 def testInvalidKeepInputVector(self): - # Can't have vector `keep_input` with `enqueue_many=False`. - with self.assertRaisesRegex(ValueError, "`keep_input` cannot be a vector"): - inp.maybe_batch([array_ops.zeros(5)], - keep_input=constant_op.constant([True, False]), - batch_size=1, - enqueue_many=False) - # Can't have `keep_input` with more than one dimension. - with self.assertRaisesRegex(ValueError, "must be 0 or 1 dimensions"): - inp.maybe_batch([array_ops.zeros(5)], - keep_input=constant_op.constant([[True], [False]]), - batch_size=1, - enqueue_many=True) - # `keep_input` must have dimensions determined at graph construction. - with self.assertRaisesRegex(ValueError, - "must be known at graph construction"): - inp.maybe_batch([array_ops.zeros(5)], - keep_input=array_ops.placeholder(dtypes.bool), - batch_size=1, - enqueue_many=True) + with ops.Graph().as_default(): + # Can't have vector `keep_input` with `enqueue_many=False`. + with self.assertRaisesRegex(ValueError, + "`keep_input` cannot be a vector"): + inp.maybe_batch([array_ops.zeros(5)], + keep_input=constant_op.constant([True, False]), + batch_size=1, + enqueue_many=False) + # Can't have `keep_input` with more than one dimension. + with self.assertRaisesRegex(ValueError, "must be 0 or 1 dimensions"): + inp.maybe_batch([array_ops.zeros(5)], + keep_input=constant_op.constant([[True], [False]]), + batch_size=1, + enqueue_many=True) + # `keep_input` must have dimensions determined at graph construction. + with self.assertRaisesRegex(ValueError, + "must be known at graph construction"): + inp.maybe_batch([array_ops.zeros(5)], + keep_input=array_ops.placeholder(dtypes.bool), + batch_size=1, + enqueue_many=True) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShape(self): - sparse = sparse_tensor.SparseTensor( - indices=[[0]], values=[1.0], dense_shape=[1]) - self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) - batched = inp.maybe_batch([sparse], keep_input=True, batch_size=2) - self.assertAllEqual((2,), batched.dense_shape.get_shape().as_list()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=[[0]], values=[1.0], dense_shape=[1]) + self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) + batched = inp.maybe_batch([sparse], keep_input=True, batch_size=2) + self.assertAllEqual((2,), batched.dense_shape.get_shape().as_list()) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShapeEnqueueMany(self): - sparse = sparse_tensor.SparseTensor( - indices=[[0]], values=[1.0], dense_shape=[1]) - self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) - batched = inp.maybe_batch( - [sparse], keep_input=True, batch_size=2, enqueue_many=True) - self.assertAllEqual((1,), batched.dense_shape.get_shape().as_list()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=[[0]], values=[1.0], dense_shape=[1]) + self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) + batched = inp.maybe_batch([sparse], + keep_input=True, + batch_size=2, + enqueue_many=True) + self.assertAllEqual((1,), batched.dense_shape.get_shape().as_list()) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShapeEnqueueManyPerExample(self): - sparse = sparse_tensor.SparseTensor( - indices=[[0], [0]], values=[1.0, 2.0], dense_shape=[2]) - self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) - batched = inp.maybe_batch( - [sparse], keep_input=[True, False], batch_size=2, enqueue_many=True) - self.assertAllEqual((1,), batched.dense_shape.get_shape().as_list()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=[[0], [0]], values=[1.0, 2.0], dense_shape=[2]) + self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) + batched = inp.maybe_batch([sparse], + keep_input=[True, False], + batch_size=2, + enqueue_many=True) + self.assertAllEqual((1,), batched.dense_shape.get_shape().as_list()) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShapeUnknownRank(self): - sparse = sparse_tensor.SparseTensor( - indices=array_ops.placeholder(dtypes.int64), - values=array_ops.placeholder(dtypes.float32), - dense_shape=array_ops.placeholder(dtypes.int64)) - self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) - batched = inp.maybe_batch([sparse], keep_input=True, batch_size=2) - self.assertIs(None, batched.dense_shape.get_shape().num_elements()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=array_ops.placeholder(dtypes.int64), + values=array_ops.placeholder(dtypes.float32), + dense_shape=array_ops.placeholder(dtypes.int64)) + self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) + batched = inp.maybe_batch([sparse], keep_input=True, batch_size=2) + self.assertIs(None, batched.dense_shape.get_shape().num_elements()) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShapeUnknownRankEnqueueMany(self): - sparse = sparse_tensor.SparseTensor( - indices=array_ops.placeholder(dtypes.int64), - values=array_ops.placeholder(dtypes.float32), - dense_shape=array_ops.placeholder(dtypes.int64)) - self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) - batched = inp.maybe_batch( - [sparse], keep_input=True, batch_size=2, enqueue_many=True) - self.assertIs(None, batched.dense_shape.get_shape().num_elements()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=array_ops.placeholder(dtypes.int64), + values=array_ops.placeholder(dtypes.float32), + dense_shape=array_ops.placeholder(dtypes.int64)) + self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) + batched = inp.maybe_batch([sparse], + keep_input=True, + batch_size=2, + enqueue_many=True) + self.assertIs(None, batched.dense_shape.get_shape().num_elements()) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShapeUnknownRankPerExample(self): - sparse = sparse_tensor.SparseTensor( - indices=array_ops.placeholder(dtypes.int64), - values=array_ops.placeholder(dtypes.float32), - dense_shape=array_ops.placeholder(dtypes.int64)) - self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) - batched = inp.maybe_batch( - [sparse], keep_input=[True, False], batch_size=2, enqueue_many=True) - self.assertIs(None, batched.dense_shape.get_shape().num_elements()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=array_ops.placeholder(dtypes.int64), + values=array_ops.placeholder(dtypes.float32), + dense_shape=array_ops.placeholder(dtypes.int64)) + self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) + batched = inp.maybe_batch([sparse], + keep_input=[True, False], + batch_size=2, + enqueue_many=True) + self.assertIs(None, batched.dense_shape.get_shape().num_elements()) - @test_util.run_deprecated_v1 def testMaybeBatchCorrectValues(self): - sparse_t = sparse_tensor.SparseTensor( - indices=[[0, 1], [0, 2], [1, 0], [1, 3]], - dense_shape=[2, 4], - values=[5, 4, 7, 2]) - keep = constant_op.constant([True, False]) - batched = inp.maybe_batch( - [sparse_t], keep_input=keep, batch_size=1, enqueue_many=True) + with ops.Graph().as_default(): + sparse_t = sparse_tensor.SparseTensor( + indices=[[0, 1], [0, 2], [1, 0], [1, 3]], + dense_shape=[2, 4], + values=[5, 4, 7, 2]) + keep = constant_op.constant([True, False]) + batched = inp.maybe_batch([sparse_t], + keep_input=keep, + batch_size=1, + enqueue_many=True) - with self.cached_session(): - coord = coordinator.Coordinator() - threads = queue_runner_impl.start_queue_runners(coord=coord) + with self.cached_session(): + coord = coordinator.Coordinator() + threads = queue_runner_impl.start_queue_runners(coord=coord) - batched_np = self.evaluate(batched) + batched_np = self.evaluate(batched) - coord.request_stop() - for thread in threads: - thread.join() + coord.request_stop() + for thread in threads: + thread.join() - self.assertAllEqual([[0, 1], [0, 2]], batched_np.indices) - self.assertAllEqual([5, 4], batched_np.values) - self.assertAllEqual([1, 4], batched_np.dense_shape) + self.assertAllEqual([[0, 1], [0, 2]], batched_np.indices) + self.assertAllEqual([5, 4], batched_np.values) + self.assertAllEqual([1, 4], batched_np.dense_shape) class BatchJoinTest(test_lib.TestCase): def _testTwoThreadsHelper(self, use_dict): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): # Two threads, the first generates (0..69, "a"). num_a = 70 zero64 = constant_op.constant(0, dtype=dtypes.int64) @@ -1112,17 +1099,15 @@ class BatchJoinTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testTwoThreads(self): self._testTwoThreadsHelper(use_dict=False) - @test_util.run_deprecated_v1 def testTwoThreadsDict(self): self._testTwoThreadsHelper(use_dict=True) - @test_util.run_deprecated_v1 def testMismatchedDictKeys(self): - with self.assertRaisesRegex(ValueError, "must have the same keys"): + with ops.Graph().as_default(), self.assertRaisesRegex( + ValueError, "must have the same keys"): inp.batch_join( [{ "c": 12, @@ -1135,9 +1120,8 @@ class BatchJoinTest(test_lib.TestCase): }], batch_size=8) - @test_util.run_deprecated_v1 def testTwoThreadsDynamicPad(self): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): # Two threads, the first generates (0..69, ["a"] * 1..70). num_a = 70 zero64 = constant_op.constant(0, dtype=dtypes.int64) @@ -1215,9 +1199,8 @@ class BatchJoinTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testTwoThreadsSmallerBatch(self): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): extra_elements = 2 # Two threads, the first generates (0..69, "a"). num_a = 70 + extra_elements @@ -1317,9 +1300,8 @@ class BatchJoinTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testTwoThreadsDynamicPadSmallerBatch(self): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): extra_elements = 2 # Two threads, the first generates (0..69, ["a"] * 1..70). num_a = 70 + extra_elements @@ -1418,9 +1400,8 @@ class BatchJoinTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testSharedName(self): - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): batch_size = 10 num_batches = 3 zero64 = constant_op.constant(0, dtype=dtypes.int64) @@ -1441,21 +1422,20 @@ class BatchJoinTest(test_lib.TestCase): "s: 'SHARED_NAME_XYZ'", batched[0].op.inputs[0].op.node_def.attr["shared_name"]) - @test_util.run_deprecated_v1 def testCannotInferRankError(self): - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): x = array_ops.placeholder(dtype=dtypes.int64) with self.assertRaisesRegex(ValueError, "Cannot infer Tensor's rank"): inp.batch_join([[x]], batch_size=2) - @test_util.run_deprecated_v1 def testSingleElementDict(self): - x = inp.batch_join([{"c": [12, 12]}], batch_size=8) - self.assertAllEqual((8, 2), x["c"].get_shape().as_list()) + with ops.Graph().as_default(): + x = inp.batch_join([{"c": [12, 12]}], batch_size=8) + self.assertAllEqual((8, 2), x["c"].get_shape().as_list()) def _testKeepInputHelper(self, num_threads, enqueue_many, keep_input_vector=False): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): batch_size = 5 num_batches = 4 examples = variables.Variable(0) @@ -1519,123 +1499,132 @@ class BatchJoinTest(test_lib.TestCase): def testMultipleThreadKeepInputEnqueueMany(self): self._testKeepInputHelper(5, True) - @test_util.run_deprecated_v1 def testSingleThreadKeepInputPerExample(self): self._testKeepInputHelper(1, True, keep_input_vector=True) - @test_util.run_deprecated_v1 def testMultipleThreadKeepInputPerExample(self): self._testKeepInputHelper(5, True, keep_input_vector=True) - @test_util.run_deprecated_v1 def testInvalidKeepInputVector(self): - # Can't have vector `keep_input` with `enqueue_many=False`. - with self.assertRaisesRegex(ValueError, "`keep_input` cannot be a vector"): - inp.maybe_batch_join([[array_ops.zeros(5)]], - keep_input=constant_op.constant([True, False]), - batch_size=1, - enqueue_many=False) - # Can't have `keep_input` with more than one dimension. - with self.assertRaisesRegex(ValueError, "must be 0 or 1 dimensions"): - inp.maybe_batch_join([[array_ops.zeros(5)]], - keep_input=constant_op.constant([[True], [False]]), - batch_size=1, - enqueue_many=True) - # `keep_input` must have dimensions determined at graph construction. - with self.assertRaisesRegex(ValueError, - "must be known at graph construction"): - inp.maybe_batch_join([[array_ops.zeros(5)]], - keep_input=array_ops.placeholder(dtypes.bool), - batch_size=1, - enqueue_many=True) + with ops.Graph().as_default(): + # Can't have vector `keep_input` with `enqueue_many=False`. + with self.assertRaisesRegex(ValueError, + "`keep_input` cannot be a vector"): + inp.maybe_batch_join([[array_ops.zeros(5)]], + keep_input=constant_op.constant([True, False]), + batch_size=1, + enqueue_many=False) + # Can't have `keep_input` with more than one dimension. + with self.assertRaisesRegex(ValueError, "must be 0 or 1 dimensions"): + inp.maybe_batch_join([[array_ops.zeros(5)]], + keep_input=constant_op.constant([[True], [False]]), + batch_size=1, + enqueue_many=True) + # `keep_input` must have dimensions determined at graph construction. + with self.assertRaisesRegex(ValueError, + "must be known at graph construction"): + inp.maybe_batch_join([[array_ops.zeros(5)]], + keep_input=array_ops.placeholder(dtypes.bool), + batch_size=1, + enqueue_many=True) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShape(self): - sparse = sparse_tensor.SparseTensor( - indices=[[0]], values=[1.0], dense_shape=[1]) - self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) - batched = inp.maybe_batch_join([[sparse]], keep_input=True, batch_size=2) - self.assertAllEqual((2,), batched.dense_shape.get_shape().as_list()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=[[0]], values=[1.0], dense_shape=[1]) + self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) + batched = inp.maybe_batch_join([[sparse]], keep_input=True, batch_size=2) + self.assertAllEqual((2,), batched.dense_shape.get_shape().as_list()) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShapeEnqueueMany(self): - sparse = sparse_tensor.SparseTensor( - indices=[[0]], values=[1.0], dense_shape=[1]) - self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) - batched = inp.maybe_batch_join( - [[sparse]], keep_input=True, batch_size=2, enqueue_many=True) - self.assertAllEqual((1,), batched.dense_shape.get_shape().as_list()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=[[0]], values=[1.0], dense_shape=[1]) + self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) + batched = inp.maybe_batch_join([[sparse]], + keep_input=True, + batch_size=2, + enqueue_many=True) + self.assertAllEqual((1,), batched.dense_shape.get_shape().as_list()) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShapeEnqueueManyPerExample(self): - sparse = sparse_tensor.SparseTensor( - indices=[[0], [0]], values=[1.0, 2.0], dense_shape=[2]) - self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) - batched = inp.maybe_batch_join( - [[sparse]], keep_input=[True, False], batch_size=2, enqueue_many=True) - self.assertAllEqual((1,), batched.dense_shape.get_shape().as_list()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=[[0], [0]], values=[1.0, 2.0], dense_shape=[2]) + self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) + batched = inp.maybe_batch_join([[sparse]], + keep_input=[True, False], + batch_size=2, + enqueue_many=True) + self.assertAllEqual((1,), batched.dense_shape.get_shape().as_list()) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShapeUnknownRank(self): - sparse = sparse_tensor.SparseTensor( - indices=array_ops.placeholder(dtypes.int64), - values=array_ops.placeholder(dtypes.float32), - dense_shape=array_ops.placeholder(dtypes.int64)) - self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) - batched = inp.maybe_batch_join([[sparse]], keep_input=True, batch_size=2) - self.assertIs(None, batched.dense_shape.get_shape().num_elements()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=array_ops.placeholder(dtypes.int64), + values=array_ops.placeholder(dtypes.float32), + dense_shape=array_ops.placeholder(dtypes.int64)) + self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) + batched = inp.maybe_batch_join([[sparse]], keep_input=True, batch_size=2) + self.assertIs(None, batched.dense_shape.get_shape().num_elements()) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShapeUnknownRankEnqueueMany(self): - sparse = sparse_tensor.SparseTensor( - indices=array_ops.placeholder(dtypes.int64), - values=array_ops.placeholder(dtypes.float32), - dense_shape=array_ops.placeholder(dtypes.int64)) - self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) - batched = inp.maybe_batch_join( - [[sparse]], keep_input=True, batch_size=2, enqueue_many=True) - self.assertIs(None, batched.dense_shape.get_shape().num_elements()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=array_ops.placeholder(dtypes.int64), + values=array_ops.placeholder(dtypes.float32), + dense_shape=array_ops.placeholder(dtypes.int64)) + self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) + batched = inp.maybe_batch_join([[sparse]], + keep_input=True, + batch_size=2, + enqueue_many=True) + self.assertIs(None, batched.dense_shape.get_shape().num_elements()) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShapeUnknownRankPerExample(self): - sparse = sparse_tensor.SparseTensor( - indices=array_ops.placeholder(dtypes.int64), - values=array_ops.placeholder(dtypes.float32), - dense_shape=array_ops.placeholder(dtypes.int64)) - self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) - batched = inp.maybe_batch_join( - [[sparse]], keep_input=[True, False], batch_size=2, enqueue_many=True) - self.assertIs(None, batched.dense_shape.get_shape().num_elements()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=array_ops.placeholder(dtypes.int64), + values=array_ops.placeholder(dtypes.float32), + dense_shape=array_ops.placeholder(dtypes.int64)) + self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) + batched = inp.maybe_batch_join([[sparse]], + keep_input=[True, False], + batch_size=2, + enqueue_many=True) + self.assertIs(None, batched.dense_shape.get_shape().num_elements()) - @test_util.run_deprecated_v1 def testMaybeBatchCorrectValues(self): - sparse = sparse_tensor.SparseTensor( - indices=[[0, 1], [0, 2], [1, 0], [1, 3]], - dense_shape=[2, 4], - values=[5, 4, 7, 2]) - keep = constant_op.constant([True, False]) - batched = inp.maybe_batch_join( - [[sparse]], keep_input=keep, batch_size=1, enqueue_many=True) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=[[0, 1], [0, 2], [1, 0], [1, 3]], + dense_shape=[2, 4], + values=[5, 4, 7, 2]) + keep = constant_op.constant([True, False]) + batched = inp.maybe_batch_join([[sparse]], + keep_input=keep, + batch_size=1, + enqueue_many=True) - with self.cached_session(): - coord = coordinator.Coordinator() - threads = queue_runner_impl.start_queue_runners(coord=coord) + with self.cached_session(): + coord = coordinator.Coordinator() + threads = queue_runner_impl.start_queue_runners(coord=coord) - batched_np = self.evaluate(batched) + batched_np = self.evaluate(batched) - coord.request_stop() - for thread in threads: - thread.join() + coord.request_stop() + for thread in threads: + thread.join() - self.assertAllEqual([[0, 1], [0, 2]], batched_np.indices) - self.assertAllEqual([5, 4], batched_np.values) - self.assertAllEqual([1, 4], batched_np.dense_shape) + self.assertAllEqual([[0, 1], [0, 2]], batched_np.indices) + self.assertAllEqual([5, 4], batched_np.values) + self.assertAllEqual([1, 4], batched_np.dense_shape) class ShuffleBatchTest(test_lib.TestCase): def _testOneThreadHelper(self, use_dict): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): batch_size = 10 num_batches = 3 zero64 = constant_op.constant(0, dtype=dtypes.int64) @@ -1693,17 +1682,14 @@ class ShuffleBatchTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testOneThread(self): self._testOneThreadHelper(use_dict=False) - @test_util.run_deprecated_v1 def testOneThreadDict(self): self._testOneThreadHelper(use_dict=True) - @test_util.run_deprecated_v1 def testOneThreadSmallerBatch(self): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): batch_size = 10 num_batches = 3 extra_elements = 5 @@ -1758,9 +1744,8 @@ class ShuffleBatchTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testManyThreads(self): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): batch_size = 10 num_batches = 3 zero64 = constant_op.constant(0, dtype=dtypes.int64) @@ -1806,9 +1791,8 @@ class ShuffleBatchTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testManyThreadsSmallerBatch(self): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): batch_size = 10 num_batches = 3 extra_elements = 5 @@ -1865,9 +1849,8 @@ class ShuffleBatchTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testSharedName(self): - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): batch_size = 10 num_batches = 3 zero64 = constant_op.constant(0, dtype=dtypes.int64) @@ -1887,7 +1870,7 @@ class ShuffleBatchTest(test_lib.TestCase): def _testKeepInputHelper(self, num_threads, enqueue_many, keep_input_vector=False): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): batch_size = 5 num_batches = 4 examples = variables.Variable(0) @@ -1950,96 +1933,118 @@ class ShuffleBatchTest(test_lib.TestCase): def testMultipleThreadKeepInputEnqueueMany(self): self._testKeepInputHelper(5, True) - @test_util.run_deprecated_v1 def testSingleThreadKeepInputPerExample(self): self._testKeepInputHelper(1, True, keep_input_vector=True) - @test_util.run_deprecated_v1 def testMultipleThreadKeepInputPerExample(self): self._testKeepInputHelper(5, True, keep_input_vector=True) - @test_util.run_deprecated_v1 def testInvalidKeepInputVector(self): - # Can't have vector `keep_input` with `enqueue_many=False`. - with self.assertRaisesRegex(ValueError, "`keep_input` cannot be a vector"): - inp.maybe_shuffle_batch([array_ops.zeros(5)], 1, 10, 1, - keep_input=constant_op.constant([True, False]), - enqueue_many=False) - # Can't have `keep_input` with more than one dimension. - with self.assertRaisesRegex(ValueError, "must be 0 or 1 dimensions"): - inp.maybe_shuffle_batch([array_ops.zeros(5)], 1, 10, 1, - keep_input=constant_op.constant([[True]]), - enqueue_many=True) - # `keep_input` must have dimensions determined at graph construction. - with self.assertRaisesRegex(ValueError, - "must be known at graph construction"): - inp.maybe_shuffle_batch([array_ops.zeros(5)], 1, 10, 1, - keep_input=array_ops.placeholder(dtypes.bool), - enqueue_many=True) + with ops.Graph().as_default(): + # Can't have vector `keep_input` with `enqueue_many=False`. + with self.assertRaisesRegex(ValueError, + "`keep_input` cannot be a vector"): + inp.maybe_shuffle_batch([array_ops.zeros(5)], + 1, + 10, + 1, + keep_input=constant_op.constant([True, False]), + enqueue_many=False) + # Can't have `keep_input` with more than one dimension. + with self.assertRaisesRegex(ValueError, "must be 0 or 1 dimensions"): + inp.maybe_shuffle_batch([array_ops.zeros(5)], + 1, + 10, + 1, + keep_input=constant_op.constant([[True]]), + enqueue_many=True) + # `keep_input` must have dimensions determined at graph construction. + with self.assertRaisesRegex(ValueError, + "must be known at graph construction"): + inp.maybe_shuffle_batch([array_ops.zeros(5)], + 1, + 10, + 1, + keep_input=array_ops.placeholder(dtypes.bool), + enqueue_many=True) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShape(self): - sparse = sparse_tensor.SparseTensor( - indices=[[0]], values=[1.0], dense_shape=[1]) - self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) - batched = inp.maybe_shuffle_batch([sparse], 2, 10, 1, True) - self.assertAllEqual((2,), batched.dense_shape.get_shape().as_list()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=[[0]], values=[1.0], dense_shape=[1]) + self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) + batched = inp.maybe_shuffle_batch([sparse], 2, 10, 1, True) + self.assertAllEqual((2,), batched.dense_shape.get_shape().as_list()) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShapeEnqueueMany(self): - sparse = sparse_tensor.SparseTensor( - indices=[[0]], values=[1.0], dense_shape=[1]) - self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) - batched = inp.maybe_shuffle_batch( - [sparse], 2, 10, 1, True, enqueue_many=True) - self.assertAllEqual((1,), batched.dense_shape.get_shape().as_list()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=[[0]], values=[1.0], dense_shape=[1]) + self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) + batched = inp.maybe_shuffle_batch([sparse], + 2, + 10, + 1, + True, + enqueue_many=True) + self.assertAllEqual((1,), batched.dense_shape.get_shape().as_list()) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShapeEnqueueManyPerExample(self): - sparse = sparse_tensor.SparseTensor( - indices=[[0], [0]], values=[1.0, 2.0], dense_shape=[2]) - self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) - batched = inp.maybe_shuffle_batch( - [sparse], 2, 10, 1, [True, False], enqueue_many=True) - self.assertAllEqual((1,), batched.dense_shape.get_shape().as_list()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=[[0], [0]], values=[1.0, 2.0], dense_shape=[2]) + self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) + batched = inp.maybe_shuffle_batch([sparse], + 2, + 10, + 1, [True, False], + enqueue_many=True) + self.assertAllEqual((1,), batched.dense_shape.get_shape().as_list()) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShapeUnknownRank(self): - sparse = sparse_tensor.SparseTensor( - indices=array_ops.placeholder(dtypes.int64), - values=array_ops.placeholder(dtypes.float32), - dense_shape=array_ops.placeholder(dtypes.int64)) - self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) - batched = inp.maybe_shuffle_batch([sparse], 2, 10, 1, True) - self.assertIs(None, batched.dense_shape.get_shape().num_elements()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=array_ops.placeholder(dtypes.int64), + values=array_ops.placeholder(dtypes.float32), + dense_shape=array_ops.placeholder(dtypes.int64)) + self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) + batched = inp.maybe_shuffle_batch([sparse], 2, 10, 1, True) + self.assertIs(None, batched.dense_shape.get_shape().num_elements()) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShapeUnknownRankEnqueueMany(self): - sparse = sparse_tensor.SparseTensor( - indices=array_ops.placeholder(dtypes.int64), - values=array_ops.placeholder(dtypes.float32), - dense_shape=array_ops.placeholder(dtypes.int64)) - self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) - batched = inp.maybe_shuffle_batch( - [sparse], 2, 10, 1, True, enqueue_many=True) - self.assertIs(None, batched.dense_shape.get_shape().num_elements()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=array_ops.placeholder(dtypes.int64), + values=array_ops.placeholder(dtypes.float32), + dense_shape=array_ops.placeholder(dtypes.int64)) + self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) + batched = inp.maybe_shuffle_batch([sparse], + 2, + 10, + 1, + True, + enqueue_many=True) + self.assertIs(None, batched.dense_shape.get_shape().num_elements()) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShapeUnknownRankPerExample(self): - sparse = sparse_tensor.SparseTensor( - indices=array_ops.placeholder(dtypes.int64), - values=array_ops.placeholder(dtypes.float32), - dense_shape=array_ops.placeholder(dtypes.int64)) - self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) - batched = inp.maybe_shuffle_batch( - [sparse], 2, 10, 1, [True, False], enqueue_many=True) - self.assertIs(None, batched.dense_shape.get_shape().num_elements()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=array_ops.placeholder(dtypes.int64), + values=array_ops.placeholder(dtypes.float32), + dense_shape=array_ops.placeholder(dtypes.int64)) + self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) + batched = inp.maybe_shuffle_batch([sparse], + 2, + 10, + 1, [True, False], + enqueue_many=True) + self.assertIs(None, batched.dense_shape.get_shape().num_elements()) class ShuffleBatchJoinTest(test_lib.TestCase): def _testTwoThreadsHelper(self, use_dict): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): # Two threads, the first generates (0..24, "a"). num_a = 25 zero64 = constant_op.constant(0, dtype=dtypes.int64) @@ -2143,17 +2148,14 @@ class ShuffleBatchJoinTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testTwoThreads(self): self._testTwoThreadsHelper(use_dict=False) - @test_util.run_deprecated_v1 def testTwoThreadsDict(self): self._testTwoThreadsHelper(use_dict=True) - @test_util.run_deprecated_v1 def testTwoThreadsSmallerBatch(self): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): # Two threads, the first generates (0..26, "a"). extra_elements = 2 num_a = 25 + extra_elements @@ -2255,9 +2257,9 @@ class ShuffleBatchJoinTest(test_lib.TestCase): for thread in threads: thread.join() - @test_util.run_deprecated_v1 def testMismatchedDictKeys(self): - with self.assertRaisesRegex(ValueError, "must have the same keys"): + with ops.Graph().as_default(), self.assertRaisesRegex( + ValueError, "must have the same keys"): inp.shuffle_batch_join( [{ "c": 12, @@ -2273,9 +2275,8 @@ class ShuffleBatchJoinTest(test_lib.TestCase): min_after_dequeue=16, seed=223607) - @test_util.run_deprecated_v1 def testSharedName(self): - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): batch_size = 10 num_batches = 3 zero64 = constant_op.constant(0, dtype=dtypes.int64) @@ -2300,7 +2301,7 @@ class ShuffleBatchJoinTest(test_lib.TestCase): def _testKeepInputHelper(self, num_threads, enqueue_many, keep_input_vector=False): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): batch_size = 5 num_batches = 4 examples = variables.Variable(0) @@ -2362,93 +2363,114 @@ class ShuffleBatchJoinTest(test_lib.TestCase): def testMultipleThreadKeepInputEnqueueMany(self): self._testKeepInputHelper(5, True) - @test_util.run_deprecated_v1 def testSingleThreadKeepInputPerExample(self): self._testKeepInputHelper(1, True, keep_input_vector=True) - @test_util.run_deprecated_v1 def testMultipleThreadKeepInputPerExample(self): self._testKeepInputHelper(5, True, keep_input_vector=True) - @test_util.run_deprecated_v1 def testInvalidKeepInputVector(self): - # Can't have vector `keep_input` with `enqueue_many=False`. - with self.assertRaisesRegex(ValueError, "`keep_input` cannot be a vector"): - inp.maybe_shuffle_batch_join( - [[array_ops.zeros(5)]], 1, 10, 1, - keep_input=constant_op.constant([True, False]), - enqueue_many=False) - # Can't have `keep_input` with more than one dimension. - with self.assertRaisesRegex(ValueError, "must be 0 or 1 dimensions"): - inp.maybe_shuffle_batch_join( - [[array_ops.zeros(5)]], 1, 10, 1, - keep_input=constant_op.constant([[True]]), - enqueue_many=True) - # `keep_input` must have dimensions determined at graph construction. - with self.assertRaisesRegex(ValueError, - "must be known at graph construction"): - inp.maybe_shuffle_batch_join( - [[array_ops.zeros(5)]], 1, 10, 1, - keep_input=array_ops.placeholder(dtypes.bool), - enqueue_many=True) + with ops.Graph().as_default(): + # Can't have vector `keep_input` with `enqueue_many=False`. + with self.assertRaisesRegex(ValueError, + "`keep_input` cannot be a vector"): + inp.maybe_shuffle_batch_join([[array_ops.zeros(5)]], + 1, + 10, + 1, + keep_input=constant_op.constant( + [True, False]), + enqueue_many=False) + # Can't have `keep_input` with more than one dimension. + with self.assertRaisesRegex(ValueError, "must be 0 or 1 dimensions"): + inp.maybe_shuffle_batch_join([[array_ops.zeros(5)]], + 1, + 10, + 1, + keep_input=constant_op.constant([[True]]), + enqueue_many=True) + # `keep_input` must have dimensions determined at graph construction. + with self.assertRaisesRegex(ValueError, + "must be known at graph construction"): + inp.maybe_shuffle_batch_join([[array_ops.zeros(5)]], + 1, + 10, + 1, + keep_input=array_ops.placeholder( + dtypes.bool), + enqueue_many=True) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShape(self): - sparse = sparse_tensor.SparseTensor( - indices=[[0]], values=[1.0], dense_shape=[1]) - self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) - batched = inp.maybe_shuffle_batch_join([[sparse]], 2, 10, 1, True) - self.assertAllEqual((2,), batched.dense_shape.get_shape().as_list()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=[[0]], values=[1.0], dense_shape=[1]) + self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) + batched = inp.maybe_shuffle_batch_join([[sparse]], 2, 10, 1, True) + self.assertAllEqual((2,), batched.dense_shape.get_shape().as_list()) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShapeEnqueueMany(self): - sparse = sparse_tensor.SparseTensor( - indices=[[0]], values=[1.0], dense_shape=[1]) - self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) - batched = inp.maybe_shuffle_batch_join( - [[sparse]], 2, 10, 1, True, enqueue_many=True) - self.assertAllEqual((1,), batched.dense_shape.get_shape().as_list()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=[[0]], values=[1.0], dense_shape=[1]) + self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) + batched = inp.maybe_shuffle_batch_join([[sparse]], + 2, + 10, + 1, + True, + enqueue_many=True) + self.assertAllEqual((1,), batched.dense_shape.get_shape().as_list()) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShapeEnqueueManyPerExample(self): - sparse = sparse_tensor.SparseTensor( - indices=[[0], [0]], values=[1.0, 2.0], dense_shape=[2]) - self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) - batched = inp.maybe_shuffle_batch_join( - [[sparse]], 2, 10, 1, [True, False], enqueue_many=True) - self.assertAllEqual((1,), batched.dense_shape.get_shape().as_list()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=[[0], [0]], values=[1.0, 2.0], dense_shape=[2]) + self.assertAllEqual((1,), sparse.dense_shape.get_shape().as_list()) + batched = inp.maybe_shuffle_batch_join([[sparse]], + 2, + 10, + 1, [True, False], + enqueue_many=True) + self.assertAllEqual((1,), batched.dense_shape.get_shape().as_list()) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShapeUnknownRank(self): - sparse = sparse_tensor.SparseTensor( - indices=array_ops.placeholder(dtypes.int64), - values=array_ops.placeholder(dtypes.float32), - dense_shape=array_ops.placeholder(dtypes.int64)) - self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) - batched = inp.maybe_shuffle_batch_join([[sparse]], 2, 10, 1, True) - self.assertIs(None, batched.dense_shape.get_shape().num_elements()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=array_ops.placeholder(dtypes.int64), + values=array_ops.placeholder(dtypes.float32), + dense_shape=array_ops.placeholder(dtypes.int64)) + self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) + batched = inp.maybe_shuffle_batch_join([[sparse]], 2, 10, 1, True) + self.assertIs(None, batched.dense_shape.get_shape().num_elements()) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShapeUnknownRankEnqueueMany(self): - sparse = sparse_tensor.SparseTensor( - indices=array_ops.placeholder(dtypes.int64), - values=array_ops.placeholder(dtypes.float32), - dense_shape=array_ops.placeholder(dtypes.int64)) - self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) - batched = inp.maybe_shuffle_batch_join( - [[sparse]], 2, 10, 1, True, enqueue_many=True) - self.assertIs(None, batched.dense_shape.get_shape().num_elements()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=array_ops.placeholder(dtypes.int64), + values=array_ops.placeholder(dtypes.float32), + dense_shape=array_ops.placeholder(dtypes.int64)) + self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) + batched = inp.maybe_shuffle_batch_join([[sparse]], + 2, + 10, + 1, + True, + enqueue_many=True) + self.assertIs(None, batched.dense_shape.get_shape().num_elements()) - @test_util.run_deprecated_v1 def testMaybeBatchedSparseTensorInferredShapeUnknownRankPerExample(self): - sparse = sparse_tensor.SparseTensor( - indices=array_ops.placeholder(dtypes.int64), - values=array_ops.placeholder(dtypes.float32), - dense_shape=array_ops.placeholder(dtypes.int64)) - self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) - batched = inp.maybe_shuffle_batch_join( - [[sparse]], 2, 10, 1, [True, False], enqueue_many=True) - self.assertIs(None, batched.dense_shape.get_shape().num_elements()) + with ops.Graph().as_default(): + sparse = sparse_tensor.SparseTensor( + indices=array_ops.placeholder(dtypes.int64), + values=array_ops.placeholder(dtypes.float32), + dense_shape=array_ops.placeholder(dtypes.int64)) + self.assertIs(None, sparse.dense_shape.get_shape().num_elements()) + batched = inp.maybe_shuffle_batch_join([[sparse]], + 2, + 10, + 1, [True, False], + enqueue_many=True) + self.assertIs(None, batched.dense_shape.get_shape().num_elements()) if __name__ == "__main__": From 3707838a0d44f05202f9bc599e60edb2bc967fee Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Fri, 24 Jul 2020 16:37:35 -0700 Subject: [PATCH 1310/2522] Pass non empty MLIR module serialized string when constructing TpuCompilationCacheKey. Added a test for MLIR bridge using TPUStrategy compiling two programs with the same signature but different bodies. PiperOrigin-RevId: 323096104 Change-Id: I2d2cd7033f762a0756b7de2ed44aa411234d8ca9 --- .../core/tpu/kernels/tpu_compile_op_common.cc | 5 +- tensorflow/python/distribute/BUILD | 15 ++++ .../tpu_strategy_compilation_test.py | 87 +++++++++++++++++++ 3 files changed, 104 insertions(+), 3 deletions(-) create mode 100644 tensorflow/python/distribute/tpu_strategy_compilation_test.py diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc index 4f10b4761e3..168d2507e34 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc @@ -662,9 +662,8 @@ Status TpuCompileOpKernelCommon::ComputeInternal(OpKernelContext* ctx) { } const TpuCompilationCacheKey key = CreateCompilationCacheKey( - function_.name(), metadata_.function_library_fingerprint(), - /*mlir_module=*/"", guaranteed_constants, dynamic_shapes, metadata_, - *mesh_state); + function_.name(), metadata_.function_library_fingerprint(), mlir_module_, + guaranteed_constants, dynamic_shapes, metadata_, *mesh_state); // Process-wide cache of TPU executables. TpuCompilationCacheInterface* cache; diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index f3f98fe50de..356fb3a7a9f 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -657,6 +657,21 @@ tpu_py_test( ], ) +tpu_py_test( + name = "tpu_strategy_compilation_test", + srcs = ["tpu_strategy_compilation_test.py"], + disable_experimental = True, + disable_mlir_bridge = False, + python_version = "PY3", + tags = ["no_oss"], + deps = [ + ":tpu_strategy", + "//tensorflow/python/distribute/cluster_resolver:cluster_resolver_lib", + "//tensorflow/python/eager:remote", + "//tensorflow/python/eager:test", + ], +) + # Used only by estimator. py_library( name = "estimator_training", diff --git a/tensorflow/python/distribute/tpu_strategy_compilation_test.py b/tensorflow/python/distribute/tpu_strategy_compilation_test.py new file mode 100644 index 00000000000..ed61c063a4f --- /dev/null +++ b/tensorflow/python/distribute/tpu_strategy_compilation_test.py @@ -0,0 +1,87 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for TPUStrategy in regards to compiling programs.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.distribute import tpu_strategy as tpu_lib +from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver +from tensorflow.python.eager import def_function +from tensorflow.python.eager import remote +from tensorflow.python.eager import test +from tensorflow.python.framework import constant_op +from tensorflow.python.platform import flags +from tensorflow.python.tpu import tpu_strategy_util + +FLAGS = flags.FLAGS +flags.DEFINE_string("tpu", "", "Name of TPU to connect to.") +flags.DEFINE_string("project", None, "Name of GCP project with TPU.") +flags.DEFINE_string("zone", None, "Name of GCP zone with TPU.") + + +def get_tpu_cluster_resolver(): + resolver = tpu_cluster_resolver.TPUClusterResolver( + tpu=FLAGS.tpu, + zone=FLAGS.zone, + project=FLAGS.project, + ) + return resolver + + +def get_tpu_strategy(): + resolver = get_tpu_cluster_resolver() + remote.connect_to_cluster(resolver) + tpu_strategy_util.initialize_tpu_system(resolver) + strategy = tpu_lib.TPUStrategyV2(resolver) + return strategy + + +# TODO(b/158494076): Merge this test back into TPUStrategy tests +# (tpu_strategy_test) once MLIR bridge is enabled by default. +class TPUStrategyCompilationTest(test.TestCase): + + def test_functions_compile_same_signature(self): + """Tests compiling different functions with the same signature.""" + strategy = get_tpu_strategy() + + @def_function.function + def return_one(): + + def computation(): + return constant_op.constant(1) + + return strategy.run(computation) + + @def_function.function + def return_two(): + + def computation(): + return constant_op.constant(2) + + return strategy.run(computation) + + expected_result_ones = [1 for _ in range(0, strategy.num_replicas_in_sync)] + self.assertAllEqual(expected_result_ones, + strategy.experimental_local_results(return_one())) + + expected_result_twos = [2 for _ in range(0, strategy.num_replicas_in_sync)] + self.assertAllEqual(expected_result_twos, + strategy.experimental_local_results(return_two())) + + +if __name__ == "__main__": + test.main() From 8ff5a68e5dccc0129a0ae443d265ee56aa475d99 Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Fri, 24 Jul 2020 19:57:39 -0400 Subject: [PATCH 1311/2522] Keep # of gpus as 2 --- .../antirectifier_benchmark_test.py | 20 ------------------ .../bidirectional_lstm_benchmark_test.py | 20 ------------------ .../cifar10_cnn_benchmark_test.py | 21 ------------------- 3 files changed, 61 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py index a7c171a6f68..39a4d7ee698 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py @@ -104,26 +104,6 @@ class AntirectifierBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - def benchmark_antirectifier_bs_512_gpu_1(self): - """Measure performance with batch_size=512, run_iters=4, gpu=1 and - distribution_strategy=`mirrored`.""" - batch_size = 512 - run_iters = 4 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - run_iters=run_iters, - num_gpus=1, - distribution_strategy='mirrored', - optimizer="rmsprop", - loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), - metrics=["sparse_categorical_accuracy"]) - - self.report_benchmark( - iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - def benchmark_antirectifier_bs_512_gpu_2(self): """Measure performance with batch_size=512, run_iters=4, gpu=2 and distribution_strategy=`mirrored`.""" diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py index 75581ac6c7f..397a71ed31e 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py @@ -106,26 +106,6 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - def benchmark_bidirect_lstm_imdb_bs_512_gpu_1(self): - """Measure performance with batch_size=512, run_iters=4, gpu=1 and - distribution_strategy=`mirrored`.""" - batch_size = 512 - run_iters = 4 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.imdb_x, - y=self.imdb_y, - batch_size=batch_size, - run_iters=run_iters, - num_gpus=1, - distribution_strategy="mirrored", - optimizer='adam', - loss='binary_crossentropy', - metrics=['accuracy']) - - self.report_benchmark( - iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - def benchmark_bidirect_lstm_imdb_bs_512_gpu_2(self): """Measure performance with batch_size=512, run_iters=4, gpu=2 and distribution_strategy=`mirrored`.""" diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py index 20f478adba1..0c426072a53 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py @@ -123,27 +123,6 @@ class Cifar10CNNBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - def benchmark_cnn_cifar10_bs_1024_gpu_1(self): - """Measure performance with batch_size=1024, run_iters=2, gpu=1 and - distribution_strategy=`mirrored`.""" - batch_size = 1024 - run_iters = 2 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - run_iters=run_iters, - num_gpus=1, - distribution_strategy="mirrored", - epochs=self.epochs, - optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0001, decay=1e-6), - loss='categorical_crossentropy', - metrics=['accuracy']) - - self.report_benchmark( - iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - def benchmark_cnn_cifar10_bs_1024_gpu_2(self): """Measure performance with batch_size=1024, run_iters=2, gpu=2 and distribution_strategy=`mirrored`.""" From dd918be82cb9702cc9ca022179629fbd8c6d3ed9 Mon Sep 17 00:00:00 2001 From: Nat Jeffries Date: Fri, 24 Jul 2020 17:07:04 -0700 Subject: [PATCH 1312/2522] Fix zero point of -128 causing check error in tflite int8 add kernel. Fixes https://github.com/tensorflow/tensorflow/issues/41674 PiperOrigin-RevId: 323100933 Change-Id: Id8bcbb47dcae13192fd85d55407882a0f736f656 --- .../internal/reference/integer_ops/add.h | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/add.h b/tensorflow/lite/kernels/internal/reference/integer_ops/add.h index 88ca246eaf4..2af6f373ca5 100644 --- a/tensorflow/lite/kernels/internal/reference/integer_ops/add.h +++ b/tensorflow/lite/kernels/internal/reference/integer_ops/add.h @@ -23,16 +23,23 @@ limitations under the License. namespace tflite { namespace reference_integer_ops { +inline void CheckArithmeticParams(const ArithmeticParams& params) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + // Input offset is negative input zero point. Activation tensors are + // asymmetric quantized so they span the full int8 range. + TFLITE_DCHECK_GE(-params.input1_offset, std::numeric_limits::min()); + TFLITE_DCHECK_GE(-params.input2_offset, std::numeric_limits::min()); + TFLITE_DCHECK_LE(-params.input1_offset, std::numeric_limits::max()); + TFLITE_DCHECK_LE(-params.input2_offset, std::numeric_limits::max()); +} + // Element-wise add that can often be used for inner loop of broadcast add as // well as the non-broadcast add. inline void AddElementwise(int size, const ArithmeticParams& params, const int8_t* input1_data, const int8_t* input2_data, int8_t* output_data) { - const int32_t int8_max_value = std::numeric_limits::max(); - TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value); - TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value); - TFLITE_DCHECK_LE(params.input1_offset, int8_max_value); - TFLITE_DCHECK_LE(params.input2_offset, int8_max_value); + CheckArithmeticParams(params); for (int i = 0; i < size; ++i) { const int32_t input1_val = params.input1_offset + input1_data[i]; @@ -61,16 +68,11 @@ inline void Add(const ArithmeticParams& params, const RuntimeShape& input1_shape, const int8_t* input1_data, const RuntimeShape& input2_shape, const int8_t* input2_data, const RuntimeShape& output_shape, int8_t* output_data) { - TFLITE_DCHECK_LE(params.quantized_activation_min, - params.quantized_activation_max); + CheckArithmeticParams(params); + const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape); - const int32_t int8_max_value = std::numeric_limits::max(); - TFLITE_DCHECK_GE(params.input1_offset, -1 * int8_max_value); - TFLITE_DCHECK_GE(params.input2_offset, -1 * int8_max_value); - TFLITE_DCHECK_LE(params.input1_offset, int8_max_value); - TFLITE_DCHECK_LE(params.input2_offset, int8_max_value); AddElementwise(flat_size, params, input1_data, input2_data, output_data); } From d86293c9fdeb829d184e6e3d7aead9116f31741f Mon Sep 17 00:00:00 2001 From: Jiho Choi Date: Fri, 24 Jul 2020 17:11:48 -0700 Subject: [PATCH 1313/2522] Hide tf.data Produce/Consume events in the trace viewer. PiperOrigin-RevId: 323101625 Change-Id: I5898ee322f387a97f000c60f31b2b8677760eabf --- .../convert/xplane_to_trace_events.cc | 5 +- tensorflow/core/profiler/utils/BUILD | 1 - .../core/profiler/utils/xplane_schema.cc | 48 +++++++++++++++---- .../core/profiler/utils/xplane_schema.h | 4 ++ 4 files changed, 45 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/profiler/convert/xplane_to_trace_events.cc b/tensorflow/core/profiler/convert/xplane_to_trace_events.cc index ceb3e003564..cf8e5c7c54a 100644 --- a/tensorflow/core/profiler/convert/xplane_to_trace_events.cc +++ b/tensorflow/core/profiler/convert/xplane_to_trace_events.cc @@ -71,10 +71,7 @@ void ConvertXPlaneToTraceEvents(uint32 device_id, const XPlaneVisitor& xplane, [device_id, resource_id, trace](const XEventVisitor& xevent) { int64 event_type = xevent.Type().value_or(HostEventType::kUnknownHostEventType); - if (event_type == HostEventType::kMemoryAllocation || - event_type == HostEventType::kMemoryDeallocation) { - return; - } + if (IsInternalEvent(event_type)) return; auto* event = trace->add_trace_events(); auto& args = *event->mutable_args(); event->set_device_id(device_id); diff --git a/tensorflow/core/profiler/utils/BUILD b/tensorflow/core/profiler/utils/BUILD index 89c416108a5..03c8d630b1e 100644 --- a/tensorflow/core/profiler/utils/BUILD +++ b/tensorflow/core/profiler/utils/BUILD @@ -200,7 +200,6 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", ], diff --git a/tensorflow/core/profiler/utils/xplane_schema.cc b/tensorflow/core/profiler/utils/xplane_schema.cc index d3b007dbf6c..46ee5db2f71 100644 --- a/tensorflow/core/profiler/utils/xplane_schema.cc +++ b/tensorflow/core/profiler/utils/xplane_schema.cc @@ -16,7 +16,6 @@ limitations under the License. #include "tensorflow/core/profiler/utils/xplane_schema.h" #include "absl/container/flat_hash_map.h" -#include "absl/container/flat_hash_set.h" #include "absl/strings/string_view.h" #include "absl/types/optional.h" #include "tensorflow/core/lib/gtl/map_util.h" @@ -99,6 +98,8 @@ const HostEventTypeMap& GetHostEventTypeMap() { {"PrefetchConsume", kPrefetchConsume}, {"ParallelInterleaveProduce", kParallelInterleaveProduce}, {"ParallelInterleaveConsume", kParallelInterleaveConsume}, + {"ParallelInterleaveInitializeInput", + kParallelInterleaveInitializedInput}, {"ParallelMapProduce", kParallelMapProduce}, {"ParallelMapConsume", kParallelMapConsume}, {"MapAndBatchProduce", kMapAndBatchProduce}, @@ -233,14 +234,45 @@ absl::optional FindStatType(absl::string_view stat_name) { return absl::nullopt; } +bool IsInternalEvent(absl::optional event_type) { + // TODO(b/162102421): Introduce a prefix for internal event names. + if (!event_type.has_value()) return false; + switch (*event_type) { + case HostEventType::kMemoryAllocation: + case HostEventType::kMemoryDeallocation: + case HostEventType::kPrefetchProduce: + case HostEventType::kPrefetchConsume: + case HostEventType::kParallelInterleaveProduce: + case HostEventType::kParallelInterleaveConsume: + case HostEventType::kParallelInterleaveInitializedInput: + case HostEventType::kParallelMapProduce: + case HostEventType::kParallelMapConsume: + case HostEventType::kMapAndBatchProduce: + case HostEventType::kMapAndBatchConsume: + return true; + default: + return false; + } +} + bool IsInternalStat(absl::optional stat_type) { - static const auto* const kInternalStats = new absl::flat_hash_set{ - StatType::kKernelDetails, StatType::kLevel0, - StatType::kProducerType, StatType::kProducerId, - StatType::kConsumerType, StatType::kConsumerId, - StatType::kIsRoot, StatType::kIsAsync, - StatType::kFlops, StatType::kBytesAccessed}; - return stat_type.has_value() && kInternalStats->contains(*stat_type); + // TODO(b/162102421): Introduce a prefix for internal stat names. + if (!stat_type.has_value()) return false; + switch (*stat_type) { + case StatType::kKernelDetails: + case StatType::kLevel0: + case StatType::kProducerType: + case StatType::kProducerId: + case StatType::kConsumerType: + case StatType::kConsumerId: + case StatType::kIsRoot: + case StatType::kIsAsync: + case StatType::kFlops: + case StatType::kBytesAccessed: + return true; + default: + return false; + } } } // namespace profiler diff --git a/tensorflow/core/profiler/utils/xplane_schema.h b/tensorflow/core/profiler/utils/xplane_schema.h index ac66a4ac8bc..c435b46f6d1 100644 --- a/tensorflow/core/profiler/utils/xplane_schema.h +++ b/tensorflow/core/profiler/utils/xplane_schema.h @@ -89,6 +89,7 @@ enum HostEventType { kPrefetchConsume, kParallelInterleaveProduce, kParallelInterleaveConsume, + kParallelInterleaveInitializedInput, kParallelMapProduce, kParallelMapConsume, kMapAndBatchProduce, @@ -212,6 +213,9 @@ inline bool IsStatType(StatType stat_type, absl::string_view stat_name) { absl::optional FindStatType(absl::string_view stat_name); +// Returns true if the given event shouldn't be shown in the trace viewer. +bool IsInternalEvent(absl::optional event_type); + // Returns true if the given stat shouldn't be shown in the trace viewer. bool IsInternalStat(absl::optional stat_type); From 56de3748d7f9a3c701654fe3f093d9b04790fedd Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Sat, 25 Jul 2020 02:28:59 +0200 Subject: [PATCH 1314/2522] Remove deprecated calls to iterator_ops.get_next_as_optional --- tensorflow/python/autograph/operators/control_flow.py | 2 +- .../python/autograph/operators/control_flow_deprecated_py2.py | 2 +- tensorflow/python/autograph/operators/py_builtins.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/autograph/operators/control_flow.py b/tensorflow/python/autograph/operators/control_flow.py index 400bcb81fd5..97a639955b9 100644 --- a/tensorflow/python/autograph/operators/control_flow.py +++ b/tensorflow/python/autograph/operators/control_flow.py @@ -571,7 +571,7 @@ def _tf_iterator_for_stmt( def aug_body(): """Main body passed to _tf_while_stmt.""" - opt_iterate = iterator_ops.get_next_as_optional(iter_) + opt_iterate = iter_.get_next_as_optional() has_next.value = opt_iterate.has_value() loop_vars = aug_get_state() # updated by set_state() in _tf_while_loop. diff --git a/tensorflow/python/autograph/operators/control_flow_deprecated_py2.py b/tensorflow/python/autograph/operators/control_flow_deprecated_py2.py index 5a900fb19ed..a18603c9964 100644 --- a/tensorflow/python/autograph/operators/control_flow_deprecated_py2.py +++ b/tensorflow/python/autograph/operators/control_flow_deprecated_py2.py @@ -590,7 +590,7 @@ def _tf_iterator_for_stmt(itr, extra_test, body, get_state, set_state, def while_body(has_next, *loop_vars): """Main loop body.""" - opt_iterate = iterator_ops.get_next_as_optional(itr) + opt_iterate = itr.get_next_as_optional() has_next = opt_iterate.has_value() if not init_vars: diff --git a/tensorflow/python/autograph/operators/py_builtins.py b/tensorflow/python/autograph/operators/py_builtins.py index f86668c12f0..bf5ea035b54 100644 --- a/tensorflow/python/autograph/operators/py_builtins.py +++ b/tensorflow/python/autograph/operators/py_builtins.py @@ -511,7 +511,7 @@ def next_tf_iterator(iterator, default=UNSPECIFIED): # Without a default, fall back to the "normal" behavior which raises # a runtime exception. return next(iterator) - opt_iterate = iterator_ops.get_next_as_optional(iterator) + opt_iterate = iterator.get_next_as_optional() _verify_structure_compatible( 'the default argument', 'the iterate', default, iterator.element_spec) return control_flow_ops.cond( From 8658b5b3f72a34fd186424055b41f7d28fe6f709 Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Fri, 24 Jul 2020 20:31:40 -0400 Subject: [PATCH 1315/2522] Add other gpu benchmarks. --- tensorflow/python/keras/benchmarks/BUILD | 10 +++---- .../mnist_conv_benchmark_test.py | 21 ++++++++++++++ .../mnist_hierarchical_rnn_benchmark_test.py | 20 +++++++++++++ .../mnist_irnn_benchmark_test.py | 20 +++++++++++++ .../reuters_mlp_benchmark_test.py | 21 ++++++++++++++ ...assification_transformer_benchmark_test.py | 28 ++++++++++++++++--- 6 files changed, 111 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/BUILD b/tensorflow/python/keras/benchmarks/BUILD index d86e9479259..e3849f6b689 100644 --- a/tensorflow/python/keras/benchmarks/BUILD +++ b/tensorflow/python/keras/benchmarks/BUILD @@ -93,7 +93,7 @@ py_test( ], ) -py_test( +cuda_py_test( name = "text_classification_transformer_benchmark_test", srcs = ["keras_examples_benchmarks/text_classification_transformer_benchmark_test.py"], python_version = "PY3", @@ -115,7 +115,7 @@ py_test( ], ) -py_test( +cuda_py_test( name = "mnist_conv_benchmark_test", srcs = ["keras_examples_benchmarks/mnist_conv_benchmark_test.py"], python_version = "PY3", @@ -127,7 +127,7 @@ py_test( ], ) -py_test( +cuda_py_test( name = "mnist_hierarchical_rnn_benchmark_test", srcs = ["keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py"], python_version = "PY3", @@ -138,7 +138,7 @@ py_test( ], ) -py_test( +cuda_py_test( name = "mnist_irnn_benchmark_test", srcs = ["keras_examples_benchmarks/mnist_irnn_benchmark_test.py"], python_version = "PY3", @@ -149,7 +149,7 @@ py_test( ], ) -py_test( +cuda_py_test( name = "reuters_mlp_benchmark_test", srcs = ["keras_examples_benchmarks/reuters_mlp_benchmark_test.py"], python_version = "PY3", diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_conv_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_conv_benchmark_test.py index 462aa9c4c8b..6bc8b6ea0f6 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_conv_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_conv_benchmark_test.py @@ -96,6 +96,27 @@ class ConvMnistBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) + def benchmark_conv_mnist_bs_256_gpu_2(self): + """Measure performance with batch_size=256, run_iters=3, gpu=2 and + distribution_strategy='mirrored'""" + batch_size = 256 + run_iters = 3 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + num_gpus=2, + distribution_strategy="mirrored", + epochs=self.epochs, + optimizer='adam', + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) + def benchmark_conv_mnist_bs_512(self): """Measure performance with batch_size=512 and run_iters=3.""" batch_size = 512 diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py index 8fdafcbae93..faea126f2a5 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py @@ -78,6 +78,26 @@ class HierarchicalRNNBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) + def benchmark_hrnn_mnist_bs_256_gpu_2(self): + """Measure performance with batch_size=256, run_iters=4, gpu=2 and + distribution_strategy='mirrored'""" + batch_size = 256 + run_iters = 4 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + num_gpus=2, + distribution_strategy="mirrored", + optimizer='rmsprop', + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) + def benchmark_hrnn_mnist_bs_512(self): """Measure performance with batch_size=512 and run_iters=5.""" batch_size = 512 diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py index 0acc5f0cfe0..ffed31190e0 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py @@ -112,6 +112,26 @@ class IRNNMnistBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) + def benchmark_irnn_mnist_bs_1024_gpu_1(self): + """Measure performance with batch_size=1024, run_iters=3, gpu=1 and + distribution_strategy='mirrored'""" + batch_size = 1024 + run_iters = 3 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + num_gpus=1, + distribution_strategy="mirrored", + optimizer=tf.keras.optimizers.RMSprop(learning_rate=self.learning_rate), + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) + if __name__ == '__main__': tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py index 81980fe03a4..25b947c4d8a 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py @@ -78,6 +78,27 @@ class MLPReutersBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) + def benchmark_mlp_reuters_bs_128_gpu_1(self): + """Measure performance with batch_size=128, run_iters=2, gpu=1 and + distribution_strategy='mirrored'""" + batch_size = 128 + run_iters = 2 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + num_gpus=1, + distribution_strategy="mirrored", + epochs=self.epochs, + optimizer='adam', + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) + def benchmark_mlp_reuters_bs_256(self): """Measure performance with batch_size=256 and run_iters=3.""" batch_size = 256 diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py index 26bd92cf271..18090a2848e 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py @@ -82,6 +82,23 @@ class TextWithTransformerBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) + def benchmark_text_classification_bs_256(self): + """Measure performance with batch_size=256 and run_iters=3.""" + batch_size = 256 + run_iters = 3 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.imdb_x, + y=self.imdb_y, + batch_size=batch_size, + run_iters=run_iters, + optimizer='adam', + loss='sparse_categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) + def benchmark_text_classification_bs_512(self): """Measure performance with batch_size=512 and run_iters=4.""" batch_size = 512 @@ -99,16 +116,19 @@ class TextWithTransformerBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - def benchmark_text_classification_bs_256(self): - """Measure performance with batch_size=256 and run_iters=3.""" - batch_size = 256 - run_iters = 3 + def benchmark_text_classification_bs_512_gpu_2(self): + """Measure performance with batch_size=512, run_iters=4, gpu=1 and + distribution_strategy='mirrored'""" + batch_size = 512 + run_iters = 4 metrics, wall_time, extras = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, y=self.imdb_y, batch_size=batch_size, run_iters=run_iters, + num_gpus=2, + distribution_strategy="mirrored", optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) From f8c8a778032db1685af4c746cef63fdcd39abf97 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 15 Jul 2020 16:11:17 +0000 Subject: [PATCH 1316/2522] Add qint8 and qint16 support for FillOp This PR tries to address the issue raised in 26069 where qint8 and qint16 were not supported for FillOp. This PR add qint8 and qint16 support for FillOp. Signed-off-by: Yong Tang --- tensorflow/core/kernels/constant_op.cc | 2 ++ tensorflow/core/kernels/fill_functor.cc | 2 ++ tensorflow/python/kernel_tests/constant_op_test.py | 12 ++++++++++++ 3 files changed, 16 insertions(+) diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc index 376effc6535..682da43a9b0 100644 --- a/tensorflow/core/kernels/constant_op.cc +++ b/tensorflow/core/kernels/constant_op.cc @@ -212,6 +212,8 @@ TF_CALL_ALL_TYPES(REGISTER_CPU_KERNEL); // the conversion from uint8 to quint8. REGISTER_KERNEL(CPU, quint8); REGISTER_KERNEL(CPU, quint16); +REGISTER_KERNEL(CPU, qint8); +REGISTER_KERNEL(CPU, qint16); #undef REGISTER_CPU_KERNEL #ifdef TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/kernels/fill_functor.cc b/tensorflow/core/kernels/fill_functor.cc index 174a4e45a79..0619facbd65 100644 --- a/tensorflow/core/kernels/fill_functor.cc +++ b/tensorflow/core/kernels/fill_functor.cc @@ -141,6 +141,8 @@ struct FillFunctor { TF_CALL_ALL_TYPES(DEFINE_FILL_CPU); DEFINE_FILL_CPU(quint8); DEFINE_FILL_CPU(quint16); +DEFINE_FILL_CPU(qint8); +DEFINE_FILL_CPU(qint16); #undef DEFINE_FILL_CPU #ifdef TENSORFLOW_USE_SYCL diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py index c3960081b8d..b77ef804847 100644 --- a/tensorflow/python/kernel_tests/constant_op_test.py +++ b/tensorflow/python/kernel_tests/constant_op_test.py @@ -456,6 +456,18 @@ class ZerosTest(test.TestCase): self.assertFalse(np.any(z_value)) self.assertEqual((2, 3), z_value.shape) + def testQintDtype(self): + for dtype in [ + dtypes_lib.qint8, dtypes_lib.quint8, + dtypes_lib.qint16, dtypes_lib.quint16]: + z = array_ops.zeros([2, 3], dtype=dtype) + self.assertEqual(z.dtype, dtype) + self.assertEqual([2, 3], z.get_shape()) + # cast to int32 so that it can be compred with numpy + # where [qint|quint][8|16] are not available. + z_value = self.evaluate(math_ops.cast(z, dtypes_lib.int32)) + self.assertFalse(np.any(z_value)) + class ZerosLikeTest(test.TestCase): From 11c1cd9ccd53ee2e2cc1fdbd7398988379f7d5af Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Tue, 21 Jul 2020 16:45:59 +0000 Subject: [PATCH 1317/2522] Use np.zeros([]).astype(dtype.as_numpy_dtype) to specify qint8 `zero` ``` + elif dtype.is_quantized: + zero = np.zeros([]).astype(dtype.as_numpy_dtype) ``` Signed-off-by: Yong Tang --- tensorflow/python/ops/array_ops.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index e9f32dec6b8..5d68deb7ac1 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -2806,6 +2806,8 @@ def zeros(shape, dtype=dtypes.float32, name=None): zero = False elif dtype == dtypes.string: zero = "" + elif dtype.is_quantized: + zero = np.zeros([]).astype(dtype.as_numpy_dtype) else: zero = 0 From 52e516c3d1b07baa1251c2c8ae09ad6fddab5b78 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 22 Jul 2020 01:08:51 +0000 Subject: [PATCH 1318/2522] Split test into two, as otherwise the same dtype will be reused in internal tests and cause testing to fail. Signed-off-by: Yong Tang --- .../python/kernel_tests/constant_op_test.py | 30 ++++++++++++------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py index b77ef804847..e35b62a4556 100644 --- a/tensorflow/python/kernel_tests/constant_op_test.py +++ b/tensorflow/python/kernel_tests/constant_op_test.py @@ -456,17 +456,25 @@ class ZerosTest(test.TestCase): self.assertFalse(np.any(z_value)) self.assertEqual((2, 3), z_value.shape) - def testQintDtype(self): - for dtype in [ - dtypes_lib.qint8, dtypes_lib.quint8, - dtypes_lib.qint16, dtypes_lib.quint16]: - z = array_ops.zeros([2, 3], dtype=dtype) - self.assertEqual(z.dtype, dtype) - self.assertEqual([2, 3], z.get_shape()) - # cast to int32 so that it can be compred with numpy - # where [qint|quint][8|16] are not available. - z_value = self.evaluate(math_ops.cast(z, dtypes_lib.int32)) - self.assertFalse(np.any(z_value)) + def testQint8Dtype(self): + dtype = dtypes_lib.qint8 + z = array_ops.zeros([2, 3], dtype=dtype) + self.assertEqual(z.dtype, dtype) + self.assertEqual([2, 3], z.get_shape()) + # cast to int32 so that it can be compred with numpy + # where [qint|quint][8|16] are not available. + z_value = self.evaluate(math_ops.cast(z, dtypes_lib.int32)) + self.assertFalse(np.any(z_value)) + + def testQint16Dtype(self): + dtype = dtypes_lib.qint16 + z = array_ops.zeros([2, 3], dtype=dtype) + self.assertEqual(z.dtype, dtype) + self.assertEqual([2, 3], z.get_shape()) + # cast to int32 so that it can be compred with numpy + # where [qint|quint][8|16] are not available. + z_value = self.evaluate(math_ops.cast(z, dtypes_lib.int32)) + self.assertFalse(np.any(z_value)) class ZerosLikeTest(test.TestCase): From 6b6471f3ffb7f1fefe42d814aa5fb9ab7a535b58 Mon Sep 17 00:00:00 2001 From: Rachel Lim Date: Fri, 24 Jul 2020 17:32:26 -0700 Subject: [PATCH 1319/2522] [tf.data] Adds a utility function that figures out how to rebatch a batch for any given worker. PiperOrigin-RevId: 323104379 Change-Id: I1e6fc7cc513473689b1ffed7da2c1509ac322ec9 --- .../kernel_tests/rebatch_dataset_test.py | 78 ++++++++++++ .../data/experimental/ops/distribute.py | 115 +++++++++++++++++- 2 files changed, 189 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py index ce91ae2ea46..0b614ef0b84 100644 --- a/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py @@ -32,6 +32,84 @@ from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.platform import test +class BatchSizesForWorkerTest(test_base.DatasetTestBase, + parameterized.TestCase): + + def _test(self, global_batch_size, num_workers, num_replicas_per_worker): + """Test that all constraints are met for given parameters.""" + batch_sizes_list = [] + for i in range(num_workers): + batch_sizes_list.append( + self.evaluate( + distribute.batch_sizes_for_worker(global_batch_size, num_workers, + num_replicas_per_worker, i))) + for batch_sizes in batch_sizes_list: + # Constraint (A): for any worker, len(batch_sizes) == W * R + self.assertLen(batch_sizes, num_workers * num_replicas_per_worker) + # Constraint (B): for any worker, sum(batch_sizes) == G + self.assertAllEqual(np.sum(batch_sizes), global_batch_size) + + # Each per-worker batch is split into num_workers global steps + for step_index in range(num_workers): + actual_global_batch = 0 + offset = step_index * num_replicas_per_worker + for batch_sizes in batch_sizes_list: + actual_global_batch += np.sum(batch_sizes[offset:offset + + num_replicas_per_worker]) + # Constraint (C): for any step, batch size across all workers add up to G. + self.assertAllEqual( + global_batch_size, + actual_global_batch, + ) + + # Constraint (D): Batch size of any two replicas differs by at most one + self.assertLessEqual(np.max(batch_sizes_list) - np.min(batch_sizes_list), 1) + + @combinations.generate(test_base.default_test_combinations()) + def testBasic(self): + # Manually verify basic test case. + global_batch_size = 8 + num_workers = 2 + num_replicas_per_worker = 2 + for worker_index in range(4): + batch_sizes = distribute.batch_sizes_for_worker(global_batch_size, + num_workers, + num_replicas_per_worker, + worker_index) + self.assertAllEqual([2, 2, 2, 2], self.evaluate(batch_sizes)) + self._test(global_batch_size, num_workers, num_replicas_per_worker) + + @combinations.generate(test_base.default_test_combinations()) + def testBatchSizeIndivisibleByNumWorkers(self): + global_batch_size = 4 + num_workers = 3 + num_replicas_per_worker = 1 + + def get_batch_sizes_for_worker(worker_index): + return self.evaluate( + distribute.batch_sizes_for_worker(global_batch_size, num_workers, + num_replicas_per_worker, + worker_index)) + + # Manually verify this test case. + self.assertAllEqual([2, 1, 1], get_batch_sizes_for_worker(0)) + self.assertAllEqual([1, 1, 2], get_batch_sizes_for_worker(1)) + self.assertAllEqual([1, 2, 1], get_batch_sizes_for_worker(2)) + self._test(global_batch_size, num_workers, num_replicas_per_worker) + + @combinations.generate(test_base.default_test_combinations()) + def testBatchSizeIndivisibleByNumReplicas(self): + self._test(global_batch_size=4, num_workers=1, num_replicas_per_worker=5) + + @combinations.generate(test_base.default_test_combinations()) + def testBatchSizeSmallerThanNumReplicas(self): + self._test(global_batch_size=4, num_workers=2, num_replicas_per_worker=5) + + @combinations.generate(test_base.default_test_combinations()) + def testBatchSizeSmallerThanNumWorkers(self): + self._test(global_batch_size=4, num_workers=5, num_replicas_per_worker=1) + + def _flat_shapes(dataset): return [ ts.as_list() diff --git a/tensorflow/python/data/experimental/ops/distribute.py b/tensorflow/python/data/experimental/ops/distribute.py index e9e9c052d63..7b05b34c110 100644 --- a/tensorflow/python/data/experimental/ops/distribute.py +++ b/tensorflow/python/data/experimental/ops/distribute.py @@ -27,6 +27,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops @@ -196,8 +197,7 @@ class _RebatchDataset(dataset_ops.UnaryDataset): known_input_batch_dims = np.asarray(known_input_batch_dims) if not np.all(known_input_batch_dims == known_input_batch_dims[0]): - raise ValueError( - "Batch dimensions of input dataset are not compatible.") + raise ValueError("Batch dimensions of input dataset are not compatible.") return known_input_batch_dims[0] % desired_batch_size != 0 @@ -318,14 +318,121 @@ def replicate(dataset, devices): if policy is None: policy = ExternalStatePolicy.WARN graph_def = dataset._as_serialized_graph( - strip_device_assignment=True, - external_state_policy=policy) + strip_device_assignment=True, external_state_policy=policy) for device in devices: ds = _RemoteDataset(graph_def, device, dataset.element_spec) datasets[device] = ds return datasets +def batch_sizes_for_worker(global_batch_size, num_workers, + num_replicas_per_worker, worker_index): + """Determines how to rebatch a dataset for the given worker. + + Given the global batch size, number of workers, number of replicas per worker, + and worker index, returns the correct batch sizes for rebatching a dataset + on worker `worker_index` of `num_workers`, such that each global step (across + all workers and replicas) will consume global_batch_size elements. The + returned value should be passed as the `batch_sizes` input parameter to + `tf.data.experimental.rebatch()`. The returned batch sizes meet the following + constraints: + + Let G = global_batch_size, W = num_workers, R = num_replicas_per_worker + (A) for any worker, len(batch_sizes) = W * R + (B) for any worker, sum(batch_sizes) == G + (C) for any global step (i.e. R iterations on each worker), the sum of batches + consumed by replicas across all workers is G. + (D) any two batch sizes of any two replicas differs by at most one. + + For example, suppose we have G = 7, W = 2, R = 2, and suppose we have two + files which each contain 7 elements: + + ```python + # WORKER 0 + batch_sizes_0 = batch_sizes_for_worker(global_batch_size=global_batch_size, + num_workers=2, + num_replicas_per_worker=2, + worker_index=0) + print(batch_sizes_0) + >> [2, 2, 2, 1] + + dataset_0 = tf.data.Dataset.from_tensor_slices(["file_a", "file_b"]) + dataset_0 = dataset_0.shard(num_shards, index=0) + dataset_0 = dataset_0.batch(7) + dataset_0 = dataset_0.apply(tf.data.experimental.rebatch(batch_sizes_0)) + for elem in dataset_0: + print(elem) + >> [[A0, A1], [A2, A3], [A4, A5], [A6]] + + # WORKER 1 + batch_sizes_1 = batch_sizes_for_worker(global_batch_size=global_batch_size, + num_workers=2, + num_replicas_per_worker=2, + worker_index=1) + print(batch_sizes_1) + >> [2, 1, 2, 2] + + dataset_1 = tf.data.Dataset.from_tensor_slices(["file_a", "file_b"]) + dataset_1 = dataset_1.shard(num_shards, index=1) + dataset_1 = dataset_1.batch(7) + dataset_1 = dataset_1.apply(tf.data.experimental.rebatch(batch_sizes_1)) + for elem in dataset_1: + print(elem) + >> [[B0, B1], [B2], [B3, B4], [B5, B6]] + ``` + + The above example will produce the following elements: + + Step 1: + Worker 0 Replica 0: [A0, A1] + Worker 0 Replica 1: [A2, A3] + Worker 1 Replica 0: [B0, B1] + Worker 1 Replica 1: [B2] + Total batch size = 7 + + Step 2: + Worker 0 Replica 0: [A4, A5] + Worker 0 Replica 1: [A6] + Worker 1 Replica 0: [B3, B4] + Worker 1 Replica 1: [B5, B6] + Total batch size = 7 + + Args: + global_batch_size: A `tf.int64` scalar, representing the global batch size. + num_workers: An integer representing the number of workers the dataset will + be distributed across. + num_replicas_per_worker: An integer representing the number of replicas per + worker. All workers are assumed to have the same number of replicas. + worker_index: An integer index of the worker to be rebatched. + + Returns: + A `tf.int64` vector, representing the batch sizes to rebatch the dataset + into. + """ + # Constraint (A) + num_subbatches = num_workers * num_replicas_per_worker + + # Let N = W * R. Constraint (B) and (D) jointly mean that the iterations + # should have batch size either floor(B/N) or ceil(B/N). Namely, of the N + # subbatches a batch is split into, B - N * floor(B/N) of them will have size + # ceil(B/N), and the rest will have size floor(B/N). + floor = global_batch_size // num_subbatches + num_ceil = global_batch_size - (num_subbatches * floor) + + # For worker 0, we assign the first num_ceil subbatches to have size + # ceil(B/N), and the remainder to have size floor(B/N). The other workers will + # each be offset by R * worker_index in order to meet constraint (C). + worker_0 = array_ops.ones(num_subbatches, dtype=dtypes.int64) + worker_0 = floor * worker_0 + array_ops.concat([ + array_ops.ones(num_ceil, dtype=dtypes.int64), + array_ops.zeros(num_subbatches - num_ceil, dtype=dtypes.int64) + ], + axis=0) + + offset = worker_index * num_replicas_per_worker + return array_ops.concat([worker_0[offset:], worker_0[:offset]], axis=0) + + def compute_batch_size(dataset): """An operation that returns the batch size of the dataset. From 43eabe312e01fb70c780eebfdf53519575cc7fe9 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Fri, 24 Jul 2020 17:39:52 -0700 Subject: [PATCH 1320/2522] Add relevant build configurations for TPU compilation device PiperOrigin-RevId: 323105286 Change-Id: Ica7feeea95e275614025c55e1084e7d23f6e092c --- tensorflow/core/tpu/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/tpu/BUILD b/tensorflow/core/tpu/BUILD index 320d07a422f..11aa95530f3 100644 --- a/tensorflow/core/tpu/BUILD +++ b/tensorflow/core/tpu/BUILD @@ -157,6 +157,7 @@ cc_library( "//tensorflow/stream_executor/tpu:tpu_node_context_c_api_hdrs", ] + select({ "//tensorflow:oss": [ + ":tpu_compilation_device", ":tpu_node_device", ":tpu_system_device", "//tensorflow/core/tpu/ops:host_compute_ops", From a1be5430d45acad5279bf619079f696483c84e12 Mon Sep 17 00:00:00 2001 From: Dong Lin Date: Fri, 24 Jul 2020 18:06:53 -0700 Subject: [PATCH 1321/2522] Disable eager_microbenchmarks_test for py38 oss build PiperOrigin-RevId: 323108393 Change-Id: Id2d24fdd3661bc0a58fdc0cf072e892f803ac068 --- tensorflow/python/keras/benchmarks/BUILD | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/keras/benchmarks/BUILD b/tensorflow/python/keras/benchmarks/BUILD index d86e9479259..487b67ff9ce 100644 --- a/tensorflow/python/keras/benchmarks/BUILD +++ b/tensorflow/python/keras/benchmarks/BUILD @@ -56,7 +56,9 @@ cuda_py_test( size = "medium", srcs = ["eager_microbenchmarks_test.py"], python_version = "PY3", - tags = COMMON_TAGS, + tags = COMMON_TAGS + [ + "no_oss_py38", # TODO(b/162044699) + ], deps = [ "//tensorflow:tensorflow_py", ], From 98b4246afa4d162cc54c03b7c4553992f5ffa48d Mon Sep 17 00:00:00 2001 From: Terry Heo Date: Fri, 24 Jul 2020 18:36:02 -0700 Subject: [PATCH 1322/2522] Refactor Python PIP build scripts for ARM/ARM64 - Simplify build commands with only container name without providing environment variables. - Add Python 3.8 support. - Update build_raspberry_pi.sh to build generate aarch64 binary. [Python 3.8 PIP package for ARM64] The following command is used to build Python 3.8 PIP package for aarch64. $ tensorflow/tools/ci_build/ci_build.sh PI-PYTHON38 \ tensorflow/tools/ci_build/pi/build_raspberry_pi.sh AARCH64 [Test on Ubuntu 20.04 ARM64] I've tested the PIP package on Ubuntu 20.04 64bit with RPI4. There was an issue on installing h5py on Ubuntu 20.04. The following command was needed to install h5py package. $ HDF5_DIR=/usr/lib/aarch64-linux-gnu/hdf5/serial CC=h5cc pip3 install h5py This PR handles #30716 issue. PiperOrigin-RevId: 323111228 Change-Id: I408149305a082afe105215fc380f602961b2fdf0 --- tensorflow/lite/tools/pip_package/README.md | 21 ++++++-------- .../tools/ci_build/Dockerfile.pi-python3 | 5 +++- .../tools/ci_build/Dockerfile.pi-python37 | 10 +++---- .../tools/ci_build/Dockerfile.pi-python38 | 28 +++++++++++++++++++ ...in.sh => install_pi_python3x_toolchain.sh} | 18 ++++++++---- .../tools/ci_build/pi/build_raspberry_pi.sh | 19 +++++++++---- 6 files changed, 72 insertions(+), 29 deletions(-) create mode 100644 tensorflow/tools/ci_build/Dockerfile.pi-python38 rename tensorflow/tools/ci_build/install/{install_pi_python37_toolchain.sh => install_pi_python3x_toolchain.sh} (70%) diff --git a/tensorflow/lite/tools/pip_package/README.md b/tensorflow/lite/tools/pip_package/README.md index e9a18392a55..57e161c5b83 100644 --- a/tensorflow/lite/tools/pip_package/README.md +++ b/tensorflow/lite/tools/pip_package/README.md @@ -65,32 +65,28 @@ tensorflow/lite/tools/pip_package/build_pip_package_with_bazel.sh ### Cross build for armhf Python 3.5 ```sh -CI_DOCKER_EXTRA_PARAMS="-e CI_BUILD_PYTHON=python3 -e CROSSTOOL_PYTHON_INCLUDE_PATH=/usr/include/python3.5" \ - tensorflow/tools/ci_build/ci_build.sh PI-PYTHON3 \ +tensorflow/tools/ci_build/ci_build.sh PI-PYTHON3 \ tensorflow/lite/tools/pip_package/build_pip_package_with_bazel.sh armhf ``` ### Cross build for armhf Python 3.7 ```sh -CI_DOCKER_EXTRA_PARAMS="-e CI_BUILD_PYTHON=python3.7 -e CROSSTOOL_PYTHON_INCLUDE_PATH=/usr/include/python3.7" \ - tensorflow/tools/ci_build/ci_build.sh PI-PYTHON37 \ +tensorflow/tools/ci_build/ci_build.sh PI-PYTHON37 \ tensorflow/lite/tools/pip_package/build_pip_package_with_bazel.sh armhf ``` ### Cross build for aarch64 Python 3.5 ```sh - CI_DOCKER_EXTRA_PARAMS="-e CI_BUILD_PYTHON=python3 -e CROSSTOOL_PYTHON_INCLUDE_PATH=/usr/include/python3.5" \ - tensorflow/tools/ci_build/ci_build.sh PI-PYTHON3 \ +tensorflow/tools/ci_build/ci_build.sh PI-PYTHON3 \ tensorflow/lite/tools/pip_package/build_pip_package_with_bazel.sh aarch64 ``` -### Cross build for aarch64 Python 3.7 +### Cross build for aarch64 Python 3.8 ```sh -CI_DOCKER_EXTRA_PARAMS="-e CI_BUILD_PYTHON=python3.7 -e CROSSTOOL_PYTHON_INCLUDE_PATH=/usr/include/python3.7" \ - tensorflow/tools/ci_build/ci_build.sh PI-PYTHON37 \ +tensorflow/tools/ci_build/ci_build.sh PI-PYTHON38 \ tensorflow/lite/tools/pip_package/build_pip_package_with_bazel.sh aarch64 ``` @@ -109,12 +105,11 @@ CUSTOM_BAZEL_FLAGS=--define=tflite_pip_with_flex=true \ tensorflow/lite/tools/pip_package/build_pip_package_with_bazel.sh ``` -### Cross build with Flex for armhf Python 3.5 +### Cross build with Flex for armhf Python 3.7 ```sh -CI_DOCKER_EXTRA_PARAMS="-e CUSTOM_BAZEL_FLAGS=--define=tflite_pip_with_flex=true \ - -e CI_BUILD_PYTHON=python3 -e CROSSTOOL_PYTHON_INCLUDE_PATH=/usr/include/python3.5" \ - tensorflow/tools/ci_build/ci_build.sh PI-PYTHON3 \ +CI_DOCKER_EXTRA_PARAMS="-e CUSTOM_BAZEL_FLAGS=--define=tflite_pip_with_flex=true" \ + tensorflow/tools/ci_build/ci_build.sh PI-PYTHON37 \ tensorflow/lite/tools/pip_package/build_pip_package_with_bazel.sh armhf ``` diff --git a/tensorflow/tools/ci_build/Dockerfile.pi-python3 b/tensorflow/tools/ci_build/Dockerfile.pi-python3 index bcc5d13f9d5..3dca7e254be 100644 --- a/tensorflow/tools/ci_build/Dockerfile.pi-python3 +++ b/tensorflow/tools/ci_build/Dockerfile.pi-python3 @@ -1,6 +1,9 @@ FROM ubuntu:16.04 -LABEL maintainer="Jan Prach " +LABEL maintainer="Terry Heo " + +ENV CI_BUILD_PYTHON=python3 +ENV CROSSTOOL_PYTHON_INCLUDE_PATH=/usr/include/python3.5 # Copy and run the install scripts. COPY install/*.sh /install/ diff --git a/tensorflow/tools/ci_build/Dockerfile.pi-python37 b/tensorflow/tools/ci_build/Dockerfile.pi-python37 index 2432b727bc7..4e301929147 100644 --- a/tensorflow/tools/ci_build/Dockerfile.pi-python37 +++ b/tensorflow/tools/ci_build/Dockerfile.pi-python37 @@ -1,6 +1,9 @@ FROM ubuntu:16.04 -LABEL maintainer="Jan Prach " +LABEL maintainer="Terry Heo " + +ENV CI_BUILD_PYTHON=python3.7 +ENV CROSSTOOL_PYTHON_INCLUDE_PATH=/usr/include/python3.7 # Copy and run the install scripts. COPY install/*.sh /install/ @@ -10,10 +13,7 @@ RUN add-apt-repository -y ppa:openjdk-r/ppa && \ RUN /install/install_deb_packages.sh # The following line installs the Python 3.7 cross-compilation toolchain. -RUN /install/install_pi_python37_toolchain.sh - -RUN /install/install_pip_packages_by_version.sh "/usr/local/bin/pip3.7" -RUN ln -sf /usr/local/lib/python3.7/dist-packages/numpy/core/include/numpy /usr/include/python3.7/numpy +RUN /install/install_pi_python3x_toolchain.sh "3.7" RUN /install/install_bazel.sh RUN /install/install_proto3.sh diff --git a/tensorflow/tools/ci_build/Dockerfile.pi-python38 b/tensorflow/tools/ci_build/Dockerfile.pi-python38 new file mode 100644 index 00000000000..b7e6ffce073 --- /dev/null +++ b/tensorflow/tools/ci_build/Dockerfile.pi-python38 @@ -0,0 +1,28 @@ +FROM ubuntu:16.04 + +LABEL maintainer="Terry Heo " + +ENV CI_BUILD_PYTHON=python3.8 +ENV CROSSTOOL_PYTHON_INCLUDE_PATH=/usr/include/python3.8 + +# Copy and run the install scripts. +COPY install/*.sh /install/ +RUN /install/install_bootstrap_deb_packages.sh +RUN add-apt-repository -y ppa:openjdk-r/ppa && \ + add-apt-repository -y ppa:george-edison55/cmake-3.x +RUN /install/install_deb_packages.sh + +# The following line installs the Python 3.8 cross-compilation toolchain. +RUN /install/install_pi_python3x_toolchain.sh "3.8" + +RUN /install/install_bazel.sh +RUN /install/install_proto3.sh +RUN /install/install_buildifier.sh +RUN /install/install_auditwheel.sh +RUN /install/install_golang.sh + +# Set up the master bazelrc configuration file. +COPY install/.bazelrc /etc/bazel.bazelrc + +# XLA is not needed for PI +ENV TF_ENABLE_XLA=0 diff --git a/tensorflow/tools/ci_build/install/install_pi_python37_toolchain.sh b/tensorflow/tools/ci_build/install/install_pi_python3x_toolchain.sh similarity index 70% rename from tensorflow/tools/ci_build/install/install_pi_python37_toolchain.sh rename to tensorflow/tools/ci_build/install/install_pi_python3x_toolchain.sh index 446a7ffab2a..e425f7e30b6 100755 --- a/tensorflow/tools/ci_build/install/install_pi_python37_toolchain.sh +++ b/tensorflow/tools/ci_build/install/install_pi_python3x_toolchain.sh @@ -14,6 +14,7 @@ # limitations under the License. # ============================================================================== +PYTHON_VERSION=$1 dpkg --add-architecture armhf dpkg --add-architecture arm64 echo 'deb [arch=arm64,armhf] http://ports.ubuntu.com/ xenial main restricted universe multiverse' >> /etc/apt/sources.list.d/armhf.list @@ -23,8 +24,15 @@ echo 'deb [arch=arm64,armhf] http://ports.ubuntu.com/ xenial-backports main rest sed -i 's#deb http://archive.ubuntu.com/ubuntu/#deb [arch=amd64] http://archive.ubuntu.com/ubuntu/#g' /etc/apt/sources.list yes | add-apt-repository ppa:deadsnakes/ppa apt-get update -apt-get install -y python3.7 python3.7-dev -#/usr/local/bin/python3.7 is needed to use /install/install_pip_packages_by_version.sh -ln -sf /usr/bin/python3.7 /usr/local/bin/python3.7 -apt-get install -y libpython3.7-dev:armhf -apt-get install -y libpython3.7-dev:arm64 +apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev +#/usr/local/bin/python3.x is needed to use /install/install_pip_packages_by_version.sh +ln -sf /usr/bin/python${PYTHON_VERSION} /usr/local/bin/python${PYTHON_VERSION} +apt-get install -y libpython${PYTHON_VERSION}-dev:armhf +apt-get install -y libpython${PYTHON_VERSION}-dev:arm64 + +if [[ "${PYTHON_VERSION}" == "3.8" ]]; then + apt-get install -y python${PYTHON_VERSION}-distutils +fi + +/install/install_pip_packages_by_version.sh "/usr/local/bin/pip${PYTHON_VERSION}" +ln -sf /usr/local/lib/python${PYTHON_VERSION}/dist-packages/numpy/core/include/numpy /usr/include/python${PYTHON_VERSION}/numpy diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh index b3bb368173f..e02a2528747 100755 --- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh +++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh @@ -38,9 +38,11 @@ export TF_ENABLE_XLA=0 yes '' | ./configure -# Fix for curl build problem in 32-bit, see https://stackoverflow.com/questions/35181744/size-of-array-curl-rule-01-is-negative -sudo sed -i 's/define CURL_SIZEOF_LONG 8/define CURL_SIZEOF_LONG 4/g' /usr/include/curl/curlbuild.h -sudo sed -i 's/define CURL_SIZEOF_CURL_OFF_T 8/define CURL_SIZEOF_CURL_OFF_T 4/g' /usr/include/curl/curlbuild.h +if [[ $1 != "AARCH64" ]]; then + # Fix for curl build problem in 32-bit, see https://stackoverflow.com/questions/35181744/size-of-array-curl-rule-01-is-negative + sudo sed -i 's/define CURL_SIZEOF_LONG 8/define CURL_SIZEOF_LONG 4/g' /usr/include/curl/curlbuild.h + sudo sed -i 's/define CURL_SIZEOF_CURL_OFF_T 8/define CURL_SIZEOF_CURL_OFF_T 4/g' /usr/include/curl/curlbuild.h +fi # The system-installed OpenSSL headers get pulled in by the latest BoringSSL # release on this configuration, so move them before we build: @@ -79,6 +81,7 @@ if [[ $1 == "PI_ONE" ]]; then make PREFIX=${OPENBLAS_INSTALL_PATH} install PI_COPTS="--copt=-march=armv6 --copt=-mfpu=vfp + --cpu=armeabi --crosstool_top=@local_config_arm_compiler//:toolchain --copt=-DUSE_GEMM_FOR_CONV --copt=-DUSE_OPENBLAS --copt=-isystem --copt=${OPENBLAS_INSTALL_PATH}/include/ --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR @@ -87,8 +90,15 @@ if [[ $1 == "PI_ONE" ]]; then --linkopt=-l:libopenblas.a" echo "Building for the Pi One/Zero, with no NEON support" WHEEL_ARCH=linux_armv6l +elif [[ $1 == "AARCH64" ]]; then + PI_COPTS="--config=elinux_aarch64 + --copt=-std=gnu11 + --copt=-O3" + WHEEL_ARCH=linux_aarch64 + echo "Building for the aarch64" else PI_COPTS="--copt=-march=armv7-a --copt=-mfpu=neon-vfpv4 + --cpu=armeabi --crosstool_top=@local_config_arm_compiler//:toolchain --copt=-std=gnu11 --copt=-DS_IREAD=S_IRUSR --copt=-DS_IWRITE=S_IWUSR --copt=-O3 --copt=-fno-tree-pre --copt=-fpermissive --copt=-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 @@ -107,8 +117,7 @@ cd ${WORKSPACE_PATH} bazel build -c opt ${PI_COPTS} \ --config=monolithic \ --copt=-funsafe-math-optimizations --copt=-ftree-vectorize \ - --copt=-fomit-frame-pointer --cpu=armeabi \ - --crosstool_top=@local_config_arm_compiler//:toolchain \ + --copt=-fomit-frame-pointer \ --define tensorflow_mkldnn_contraction_kernel=0 \ --verbose_failures \ //tensorflow:libtensorflow.so \ From 6af48136c0580c4829f016a3c6058b9e38ce3473 Mon Sep 17 00:00:00 2001 From: Karim Nosir Date: Fri, 24 Jul 2020 18:50:03 -0700 Subject: [PATCH 1323/2522] Update reorder reshape with binary ops to handle activation functions too PiperOrigin-RevId: 323112543 Change-Id: I53a0f7c6b64625a5851aa898c40a7c98df0c2d83 --- .../compiler/mlir/lite/tests/optimize.mlir | 26 +++++++++++++++++++ .../mlir/lite/transforms/optimize_patterns.td | 4 +-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/tests/optimize.mlir b/tensorflow/compiler/mlir/lite/tests/optimize.mlir index 67d1b314fd2..cafa654f0c0 100644 --- a/tensorflow/compiler/mlir/lite/tests/optimize.mlir +++ b/tensorflow/compiler/mlir/lite/tests/optimize.mlir @@ -400,6 +400,32 @@ func @FuseFullyConnectedReshapeAddConst(%arg0: tensor<40x37xf32>, %arg1: tensor< // FOLD: return %[[fc]] } +// CHECK-LABEL: @FuseFullyConnectedReshapeAddConstWithActivation +// FOLD-LABEL: @FuseFullyConnectedReshapeAddConstWithActivation +func @FuseFullyConnectedReshapeAddConstWithActivation(%arg0: tensor<40x37xf32>, %arg1: tensor<40x37xf32>) -> tensor<40x40xf32> { + %cst = constant dense<3.0> : tensor<40x40xf32> + %cst2 = constant dense<2.0> : tensor<40xf32> + %shape1 = constant dense<[1, 40, 40]> : tensor<3xi32> + %shape2 = constant dense<[40, 40]> : tensor<2xi32> + + %0 = "tfl.fully_connected"(%arg0, %arg1, %cst) {fused_activation_function = "NONE", keep_num_dims = false, weights_format = "DEFAULT"} : (tensor<40x37xf32>, tensor<40x37xf32>, tensor<40x40xf32>) -> (tensor<40x40xf32>) + %1 = "tfl.reshape"(%0, %shape1) : (tensor<40x40xf32>, tensor<3xi32>) -> tensor<1x40x40xf32> + %2 = "tfl.add"(%1, %cst2) {fused_activation_function = "RELU6"} : (tensor<1x40x40xf32>, tensor<40xf32>) -> tensor<1x40x40xf32> + %3 = "tfl.reshape"(%2, %shape2) : (tensor<1x40x40xf32>, tensor<2xi32>) -> tensor<40x40xf32> + + return %3 : tensor<40x40xf32> + + // CHECK: %[[cst:.*]] = constant dense<5.000000e+00> : tensor<40x40xf32> + // CHECK: %[[fc:.*]] = "tfl.fully_connected"(%arg0, %arg1, %[[cst]]) {fused_activation_function = "RELU6", keep_num_dims = false, weights_format = "DEFAULT"} + // CHECK: %[[rs1:.*]] = "tfl.reshape"(%[[fc]] + // CHECK: %[[rs2:.*]] = "tfl.reshape"(%[[rs1]] + // CHECK: return %[[rs2]] + + // FOLD: %[[cst:.*]] = constant dense<5.000000e+00> : tensor<40x40xf32> + // FOLD: %[[fc:.*]] = "tfl.fully_connected"(%arg0, %arg1, %[[cst]]) {fused_activation_function = "RELU6", keep_num_dims = false, weights_format = "DEFAULT"} + // FOLD: return %[[fc]] +} + // CHECK-LABEL: @NotReorderReshapeAddIfNotBroadcastableAfter func @NotReorderReshapeAddIfNotBroadcastableAfter(%arg0: tensor<40x10x4xf32>) -> tensor<40x40xf32> { %cst = constant dense<2.0> : tensor<40xf32> diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td index 0533e810377..bffcf975ccc 100644 --- a/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td +++ b/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td @@ -341,8 +341,8 @@ foreach BinaryOp = [TFL_AddOp, TFL_SubOp, TFL_DivOp, TFL_MulOp] in { // make sure $rhs is the tail shape of $lhs. def MoveBinaryOpBeforeReshape#BinaryOp : Pat< (BinaryOp (TFL_ReshapeOp:$lhs $input, (ConstantOp:$shape $s)), - (ConstantOp:$rhs $a), TFL_AF_None), - (TFL_ReshapeOp (BinaryOp $input, $rhs, TFL_AF_None), $shape), + (ConstantOp:$rhs $a), $act_fn), + (TFL_ReshapeOp (BinaryOp $input, $rhs, $act_fn), $shape), // The broadcasting of "BinaryOp" only happens in the lower // dimensions, and the higher dimensions are same, so we know the // result and input of the "BinaryOp" in the source pattern have From e8d0e18c730779c0e11854f367f97d99d1936703 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Fri, 24 Jul 2020 18:53:24 -0700 Subject: [PATCH 1324/2522] [tf.data] Adding support for serialization of input pipelines that use `from_tensors` or `from_tensor_slices` with dataset input. The new functionality is tested both directly through `graphRoundTrip` test utilitiy as well as through the private `replicate` transformation, which is used by `tf.distribute` to distribute input pipeline across processes. In addition, this CL also makes minor improvements to the `replicate` tests -- consolidating related tests to one file and increasing test coverage. PiperOrigin-RevId: 323112823 Change-Id: I8b296c9ff5f4a0be03d6c29194044acfd4e54618 --- tensorflow/core/framework/dataset.cc | 40 +++++ tensorflow/core/framework/dataset.h | 9 + .../core/kernels/data/tensor_dataset_op.cc | 2 +- .../kernels/data/tensor_slice_dataset_op.cc | 2 +- .../data/experimental/kernel_tests/BUILD | 14 -- .../kernel_tests/replicate_cluster_test.py | 114 ------------ .../kernel_tests/replicate_test.py | 163 +++++++++++++++--- .../kernel_tests/from_tensor_slices_test.py | 17 ++ .../data/kernel_tests/from_tensors_test.py | 7 + 9 files changed, 214 insertions(+), 154 deletions(-) delete mode 100644 tensorflow/python/data/experimental/kernel_tests/replicate_cluster_test.py diff --git a/tensorflow/core/framework/dataset.cc b/tensorflow/core/framework/dataset.cc index a4c96fe5b1f..dcae9ab3ef3 100644 --- a/tensorflow/core/framework/dataset.cc +++ b/tensorflow/core/framework/dataset.cc @@ -449,6 +449,46 @@ Status DatasetBase::DatasetGraphDefBuilder::AddInputDataset( return status; } +Status DatasetBase::DatasetGraphDefBuilder::AddDatasetOrTensor( + SerializationContext* ctx, const Tensor& t, Node** output) { + if (t.dtype() == DT_VARIANT) { + // If the input tensor is a variant, it may represent a multi-dimensional + // array of datasets. We attempt to decode each dataset so that we can use + // their custom serialization logic and combine the result of their + // individual serializations using the `Pack` operation. + // + // If this fails, we fallback to using its Variant::Encode() based + // serialization. + Status s = AddDatasetOrTensorHelper(ctx, t, output); + if (s.ok()) { + return s; + } + } + return AddTensor(t, output); +} + +Status DatasetBase::DatasetGraphDefBuilder::AddDatasetOrTensorHelper( + SerializationContext* ctx, const Tensor& t, Node** output) { + if (t.dims() == 0) { + DatasetBase* dataset; + TF_RETURN_IF_ERROR(GetDatasetFromVariantTensor(t, &dataset)); + return AddInputDataset(ctx, dataset, output); + } + std::vector nodes; + for (int i = 0; i < t.dim_size(0); ++i) { + Node* node; + TF_RETURN_IF_ERROR(AddDatasetOrTensorHelper(ctx, t.SubSlice(i), &node)); + nodes.emplace_back(node); + } + auto op_name = "Pack"; + auto opts = builder()->opts(); + NodeBuilder node_builder(opts.GetNameForOp(op_name), op_name, + opts.op_registry()); + node_builder.Input(std::move(nodes)); + *output = opts.FinalizeBuilder(&node_builder); + return Status::OK(); +} + DatasetBaseIterator::DatasetBaseIterator(const BaseParams& params) : params_(params) { params_.dataset->Ref(); diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h index d7f8cd752ca..358edf159aa 100644 --- a/tensorflow/core/framework/dataset.h +++ b/tensorflow/core/framework/dataset.h @@ -246,6 +246,9 @@ class GraphDefBuilderWrapper { SetAttrValue(value, attr); } + protected: + GraphDefBuilder* builder() { return b_; } + private: void AddPlaceholderInternal(const Tensor& val, Node** output); void AddTensorInternal(const Tensor& val, Node** output); @@ -830,6 +833,12 @@ class DatasetBase : public core::RefCounted { : GraphDefBuilderWrapper(b) {} Status AddInputDataset(SerializationContext* ctx, const DatasetBase* dataset, Node** output); + Status AddDatasetOrTensor(SerializationContext* ctx, const Tensor& val, + Node** output); + + private: + Status AddDatasetOrTensorHelper(SerializationContext* ctx, + const Tensor& val, Node** output); }; // Serializes the dataset into a `GraphDef`, which has two uses: diff --git a/tensorflow/core/kernels/data/tensor_dataset_op.cc b/tensorflow/core/kernels/data/tensor_dataset_op.cc index 20540cf9a57..78cc06a54c5 100644 --- a/tensorflow/core/kernels/data/tensor_dataset_op.cc +++ b/tensorflow/core/kernels/data/tensor_dataset_op.cc @@ -75,7 +75,7 @@ class TensorDatasetOp::Dataset : public DatasetBase { for (const Tensor& t : tensors_) { Node* node; if (ctx->serialize_data_tensors()) { - TF_RETURN_IF_ERROR(b->AddTensor(t, &node)); + TF_RETURN_IF_ERROR(b->AddDatasetOrTensor(ctx, t, &node)); } else { TF_RETURN_IF_ERROR(b->AddPlaceholder(t, &node)); DCHECK_NE(ctx->input_list(), nullptr); diff --git a/tensorflow/core/kernels/data/tensor_slice_dataset_op.cc b/tensorflow/core/kernels/data/tensor_slice_dataset_op.cc index 8831f8d548d..e4f27f55327 100644 --- a/tensorflow/core/kernels/data/tensor_slice_dataset_op.cc +++ b/tensorflow/core/kernels/data/tensor_slice_dataset_op.cc @@ -80,7 +80,7 @@ class TensorSliceDatasetOp::Dataset : public DatasetBase { for (const Tensor& t : tensors_) { Node* node; if (ctx->serialize_data_tensors()) { - TF_RETURN_IF_ERROR(b->AddTensor(t, &node)); + TF_RETURN_IF_ERROR(b->AddDatasetOrTensor(ctx, t, &node)); } else { TF_RETURN_IF_ERROR(b->AddPlaceholder(t, &node)); DCHECK_NE(ctx->input_list(), nullptr); diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD index 63cbdcbd676..18b748904e6 100644 --- a/tensorflow/python/data/experimental/kernel_tests/BUILD +++ b/tensorflow/python/data/experimental/kernel_tests/BUILD @@ -642,20 +642,6 @@ tf_py_test( ], ) -tf_py_test( - name = "replicate_cluster_test", - srcs = ["replicate_cluster_test.py"], - grpc_enabled = True, - tags = ["no_oss"], - deps = [ - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_combinations", - "//tensorflow/python/data/experimental/ops:distribute", - "//tensorflow/python/data/kernel_tests:test_base", - "//tensorflow/python/data/ops:dataset_ops", - ], -) - cuda_py_test( name = "scan_test", size = "small", diff --git a/tensorflow/python/data/experimental/kernel_tests/replicate_cluster_test.py b/tensorflow/python/data/experimental/kernel_tests/replicate_cluster_test.py deleted file mode 100644 index a0461ff7e7e..00000000000 --- a/tensorflow/python/data/experimental/kernel_tests/replicate_cluster_test.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Graph mode cluster tests for the experimental `replicate` transformation.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl.testing import parameterized - -from tensorflow.core.protobuf import config_pb2 -from tensorflow.python.client import session -from tensorflow.python.data.experimental.ops import distribute -from tensorflow.python.data.kernel_tests import test_base -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.framework import combinations -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors -from tensorflow.python.framework import ops -from tensorflow.python.framework import test_util -from tensorflow.python.ops import variable_scope -from tensorflow.python.platform import test - - -class ReplicateClusterTest(test_base.DatasetTestBase, parameterized.TestCase): - - def setUp(self): - super(ReplicateClusterTest, self).setUp() - # Start the local server. - worker_config = config_pb2.ConfigProto() - worker_config.device_count["CPU"] = 2 - worker, _ = test_util.create_local_cluster( - 3, 0, worker_config=worker_config) - self._device0 = "/job:worker/replica:0/task:0/device:CPU:0" - self._device1 = "/job:worker/replica:0/task:1/device:CPU:0" - self._device2 = "/job:worker/replica:0/task:2/device:CPU:0" - self._target = worker[0].target - - @combinations.generate( - combinations.combine(tf_api_version=[1], mode=["graph"])) - def testBasic(self): - with ops.device(self._device0): - dataset0 = dataset_ops.Dataset.range(100) - replicated_ds = distribute.replicate(dataset0, - [self._device1, self._device2]) - dataset1 = replicated_ds[self._device1] - dataset2 = replicated_ds[self._device2] - with ops.device(self._device0): - get_next = self.getNext(dataset0) - with ops.device(self._device1): - get_next1 = self.getNext(dataset1) - with ops.device(self._device2): - get_next2 = self.getNext(dataset2) - - with session.Session(self._target) as sess: - for i in range(100): - self.assertEqual(i, sess.run(get_next())) - self.assertEqual(i, sess.run(get_next1())) - self.assertEqual(i, sess.run(get_next2())) - - @combinations.generate( - combinations.combine(tf_api_version=[1], mode=["graph"])) - def testMap(self): - with ops.device(self._device0): - dataset0 = dataset_ops.Dataset.range(100).map(lambda x: x * 2) - replicated_ds = distribute.replicate(dataset0, - [self._device1, self._device2]) - dataset1 = replicated_ds[self._device1] - dataset2 = replicated_ds[self._device2] - with ops.device(self._device0): - get_next = self.getNext(dataset0) - with ops.device(self._device1): - get_next1 = self.getNext(dataset1) - with ops.device(self._device2): - get_next2 = self.getNext(dataset2) - - with session.Session(self._target) as sess: - for i in range(100): - self.assertEqual(i * 2, sess.run(get_next())) - self.assertEqual(i * 2, sess.run(get_next1())) - self.assertEqual(i * 2, sess.run(get_next2())) - - @combinations.generate( - combinations.combine(tf_api_version=[1], mode=["graph"])) - def testVariableInput(self): - with ops.device(self._device0): - counter_var = variable_scope.get_variable( - "counter", (), dtypes.int32, use_resource=True) - dataset0 = dataset_ops.Dataset.range(100).map( - lambda _: counter_var.assign_add(1)) - replicated_ds = distribute.replicate(dataset0, - [self._device1, self._device2]) - dataset1 = replicated_ds[self._device1] - with ops.device(self._device1): - it1 = dataset_ops.make_initializable_iterator(dataset1) - # We don't support stateful ops across processes in functions as of now. - with session.Session(self._target) as sess: - with self.assertRaises(errors.OpError): - sess.run(it1.initializer) - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/python/data/experimental/kernel_tests/replicate_test.py b/tensorflow/python/data/experimental/kernel_tests/replicate_test.py index 521b38bf5d3..4995b054011 100644 --- a/tensorflow/python/data/experimental/kernel_tests/replicate_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/replicate_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Eager mode tests for the experimental `replicate` transformation.""" +"""Tests for the private `replicate()` transformation.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -23,15 +23,18 @@ from tensorflow.core.protobuf import cluster_pb2 from tensorflow.core.protobuf import config_pb2 from tensorflow.core.protobuf import tensorflow_server_pb2 from tensorflow.python import pywrap_tfe +from tensorflow.python.client import session from tensorflow.python.data.experimental.ops import distribute from tensorflow.python.data.experimental.ops import distribute_options from tensorflow.python.data.kernel_tests import test_base from tensorflow.python.data.ops import dataset_ops from tensorflow.python.eager import context from tensorflow.python.framework import combinations +from tensorflow.python.framework import config from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util from tensorflow.python.ops import random_ops from tensorflow.python.ops import variable_scope from tensorflow.python.platform import test @@ -42,12 +45,18 @@ class LocalReplicateTest(test_base.DatasetTestBase, parameterized.TestCase): def __init__(self, methodName="runTest"): # pylint: disable=invalid-name super(LocalReplicateTest, self).__init__(methodName) + cpus = config.list_physical_devices("CPU") + # Set 3 virtual CPUs + config.set_logical_device_configuration(cpus[0], [ + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration() + ]) self._device0 = "/device:CPU:0" self._device1 = "/device:CPU:1" self._device2 = "/device:CPU:2" - @combinations.generate( - combinations.combine(tf_api_version=[1], mode=["graph", "eager"])) + @combinations.generate(test_base.default_test_combinations()) def testBasic(self): with ops.device(self._device0): dataset0 = dataset_ops.Dataset.range(100) @@ -63,8 +72,43 @@ class LocalReplicateTest(test_base.DatasetTestBase, parameterized.TestCase): with ops.device(self._device2): self.assertDatasetProduces(dataset2, range(100)) - @combinations.generate( - combinations.combine(tf_api_version=[1], mode=["graph", "eager"])) + @combinations.generate(test_base.default_test_combinations()) + def testFromTensorsWithDataset(self): + with ops.device(self._device0): + dataset0 = dataset_ops.Dataset.range(100) + dataset0 = dataset_ops.Dataset.from_tensors(dataset0) + dataset0 = dataset0.flat_map(lambda x: x) + replicated_ds = distribute.replicate(dataset0, + [self._device1, self._device2]) + dataset1 = replicated_ds[self._device1] + dataset2 = replicated_ds[self._device2] + + with ops.device(self._device0): + self.assertDatasetProduces(dataset0, range(100)) + with ops.device(self._device1): + self.assertDatasetProduces(dataset1, range(100)) + with ops.device(self._device2): + self.assertDatasetProduces(dataset2, range(100)) + + @combinations.generate(test_base.default_test_combinations()) + def testFromTensorSlicesWithDataset(self): + with ops.device(self._device0): + dataset0 = dataset_ops.Dataset.range(100) + dataset0 = dataset_ops.Dataset.from_tensor_slices([dataset0]) + dataset0 = dataset0.flat_map(lambda x: x) + replicated_ds = distribute.replicate(dataset0, + [self._device1, self._device2]) + dataset1 = replicated_ds[self._device1] + dataset2 = replicated_ds[self._device2] + + with ops.device(self._device0): + self.assertDatasetProduces(dataset0, range(100)) + with ops.device(self._device1): + self.assertDatasetProduces(dataset1, range(100)) + with ops.device(self._device2): + self.assertDatasetProduces(dataset2, range(100)) + + @combinations.generate(test_base.default_test_combinations()) def testVariableInput(self): with ops.device(self._device0): counter_var = variable_scope.get_variable( @@ -86,8 +130,7 @@ class LocalReplicateTest(test_base.DatasetTestBase, parameterized.TestCase): self.assertDatasetProduces( dataset2, range(201, 301), requires_initialization=True) - @combinations.generate( - combinations.combine(tf_api_version=[1], mode=["graph", "eager"])) + @combinations.generate(test_base.default_test_combinations()) def testExternalStatePolicyIgnore(self): with ops.device(self._device0): dataset0 = dataset_ops.Dataset.range(100).map( @@ -117,8 +160,7 @@ class LocalReplicateTest(test_base.DatasetTestBase, parameterized.TestCase): self.evaluate(get_next1()) self.evaluate(get_next2()) - @combinations.generate( - combinations.combine(tf_api_version=[1], mode=["graph", "eager"])) + @combinations.generate(test_base.default_test_combinations()) def testExternalStatePolicyWarn(self): with ops.device(self._device0): dataset0 = dataset_ops.Dataset.range(100).map( @@ -148,8 +190,7 @@ class LocalReplicateTest(test_base.DatasetTestBase, parameterized.TestCase): self.evaluate(get_next1()) self.evaluate(get_next2()) - @combinations.generate( - combinations.combine(tf_api_version=[1], mode=["graph", "eager"])) + @combinations.generate(test_base.default_test_combinations()) def testExternalStatePolicyFail(self): with ops.device(self._device0): dataset0 = dataset_ops.Dataset.range(100).map( @@ -181,9 +222,6 @@ class LocalReplicateTest(test_base.DatasetTestBase, parameterized.TestCase): self.evaluate(get_next2()) -JOB_NAME = "remote_device" - - def _get_server_def(job_name, local_server_port, remote_server_addresses, task_index): """Returns a server def with a single job + multiple tasks.""" @@ -204,26 +242,27 @@ def _get_server_def(job_name, local_server_port, remote_server_addresses, return server_def -# Pure eager mode test that sets up a cluster of processes. -class RemoteReplicateTest(test_base.DatasetTestBase, parameterized.TestCase): +class EagerClusterReplicateTest(test_base.DatasetTestBase, + parameterized.TestCase): def __init__(self, methodName="runTest"): # pylint: disable=invalid-name - super(RemoteReplicateTest, self).__init__(methodName) + super(EagerClusterReplicateTest, self).__init__(methodName) + self._job_name = "remove_device" self._cached_server1 = server_lib.Server.create_local_server() self._cached_server2 = server_lib.Server.create_local_server() self._cached_server1_target = self._cached_server1.target[len("grpc://"):] self._cached_server2_target = self._cached_server2.target[len("grpc://"):] - self._device0 = "/job:%s/replica:0/task:0/device:CPU:0" % JOB_NAME - self._device1 = "/job:%s/replica:0/task:1/device:CPU:0" % JOB_NAME - self._device2 = "/job:%s/replica:0/task:2/device:CPU:0" % JOB_NAME + self._device0 = "/job:%s/replica:0/task:0/device:CPU:0" % self._job_name + self._device1 = "/job:%s/replica:0/task:1/device:CPU:0" % self._job_name + self._device2 = "/job:%s/replica:0/task:2/device:CPU:0" % self._job_name def setUp(self): - super(RemoteReplicateTest, self).setUp() + super(EagerClusterReplicateTest, self).setUp() # Start the local server. local_port = pywrap_tfe.TF_PickUnusedPortOrDie() context.set_server_def( server_def=_get_server_def( - JOB_NAME, + self._job_name, local_server_port=local_port, remote_server_addresses=[ self._cached_server1_target, self._cached_server2_target @@ -285,7 +324,83 @@ class RemoteReplicateTest(test_base.DatasetTestBase, parameterized.TestCase): dataset2, range(201, 301), requires_initialization=True) +class GraphClusterReplicateTest(test_base.DatasetTestBase, + parameterized.TestCase): + + def setUp(self): + super(GraphClusterReplicateTest, self).setUp() + # Start the local server. + worker_config = config_pb2.ConfigProto() + worker_config.device_count["CPU"] = 2 + worker, _ = test_util.create_local_cluster( + 3, 0, worker_config=worker_config) + self._device0 = "/job:worker/replica:0/task:0/device:CPU:0" + self._device1 = "/job:worker/replica:0/task:1/device:CPU:0" + self._device2 = "/job:worker/replica:0/task:2/device:CPU:0" + self._target = worker[0].target + + @combinations.generate( + combinations.combine(tf_api_version=[1], mode=["graph"])) + def testBasic(self): + with ops.device(self._device0): + dataset0 = dataset_ops.Dataset.range(100) + replicated_ds = distribute.replicate(dataset0, + [self._device1, self._device2]) + dataset1 = replicated_ds[self._device1] + dataset2 = replicated_ds[self._device2] + with ops.device(self._device0): + get_next = self.getNext(dataset0) + with ops.device(self._device1): + get_next1 = self.getNext(dataset1) + with ops.device(self._device2): + get_next2 = self.getNext(dataset2) + + with session.Session(self._target) as sess: + for i in range(100): + self.assertEqual(i, sess.run(get_next())) + self.assertEqual(i, sess.run(get_next1())) + self.assertEqual(i, sess.run(get_next2())) + + @combinations.generate( + combinations.combine(tf_api_version=[1], mode=["graph"])) + def testMap(self): + with ops.device(self._device0): + dataset0 = dataset_ops.Dataset.range(100).map(lambda x: x * 2) + replicated_ds = distribute.replicate(dataset0, + [self._device1, self._device2]) + dataset1 = replicated_ds[self._device1] + dataset2 = replicated_ds[self._device2] + with ops.device(self._device0): + get_next = self.getNext(dataset0) + with ops.device(self._device1): + get_next1 = self.getNext(dataset1) + with ops.device(self._device2): + get_next2 = self.getNext(dataset2) + + with session.Session(self._target) as sess: + for i in range(100): + self.assertEqual(i * 2, sess.run(get_next())) + self.assertEqual(i * 2, sess.run(get_next1())) + self.assertEqual(i * 2, sess.run(get_next2())) + + @combinations.generate( + combinations.combine(tf_api_version=[1], mode=["graph"])) + def testVariableInput(self): + with ops.device(self._device0): + counter_var = variable_scope.get_variable( + "counter", (), dtypes.int32, use_resource=True) + dataset0 = dataset_ops.Dataset.range(100).map( + lambda _: counter_var.assign_add(1)) + replicated_ds = distribute.replicate(dataset0, + [self._device1, self._device2]) + dataset1 = replicated_ds[self._device1] + with ops.device(self._device1): + it1 = dataset_ops.make_initializable_iterator(dataset1) + # We don't support stateful ops across processes in functions as of now. + with session.Session(self._target) as sess: + with self.assertRaises(errors.OpError): + sess.run(it1.initializer) + + if __name__ == "__main__": - ops.enable_eager_execution( - config=config_pb2.ConfigProto(device_count={"CPU": 3})) test.main() diff --git a/tensorflow/python/data/kernel_tests/from_tensor_slices_test.py b/tensorflow/python/data/kernel_tests/from_tensor_slices_test.py index a16518d1111..e7193567457 100644 --- a/tensorflow/python/data/kernel_tests/from_tensor_slices_test.py +++ b/tensorflow/python/data/kernel_tests/from_tensor_slices_test.py @@ -292,6 +292,23 @@ class FromTensorSlicesTest(test_base.DatasetTestBase, parameterized.TestCase): dataset_ops.get_legacy_output_types(dataset)) self.assertDatasetProduces(dataset, expected_output) + @combinations.generate( + combinations.times(test_base.default_test_combinations(), + combinations.combine(depth=[1, 2, 3]))) + def testDatasetInputSerialization(self, depth): + dataset = dataset_ops.Dataset.range(100) + for _ in range(depth): + dataset = [dataset, dataset] + dataset = dataset_ops.Dataset.from_tensor_slices(dataset) + for _ in range(depth - 1): + dataset = dataset.unbatch() + dataset = dataset.flat_map(lambda x: x) + dataset = self.graphRoundTrip(dataset) + expected = list(range(100)) + list(range(100)) + for _ in range(depth - 1): + expected = expected + expected + self.assertDatasetProduces(dataset, expected) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/data/kernel_tests/from_tensors_test.py b/tensorflow/python/data/kernel_tests/from_tensors_test.py index c899c156739..e526745e0e5 100644 --- a/tensorflow/python/data/kernel_tests/from_tensors_test.py +++ b/tensorflow/python/data/kernel_tests/from_tensors_test.py @@ -265,6 +265,13 @@ class FromTensorsTest(test_base.DatasetTestBase, parameterized.TestCase): self.assertEqual(sess.run(iterator.get_next()), 2) + @combinations.generate(test_base.default_test_combinations()) + def testDatasetInputSerialization(self): + dataset = dataset_ops.Dataset.range(100) + dataset = dataset_ops.Dataset.from_tensors(dataset).flat_map(lambda x: x) + dataset = self.graphRoundTrip(dataset) + self.assertDatasetProduces(dataset, range(100)) + if __name__ == "__main__": test.main() From 63dc8f8e14078fa079f25f6c052090b1a2925df5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 24 Jul 2020 18:55:36 -0700 Subject: [PATCH 1325/2522] [Profiler] Add more metrics. PiperOrigin-RevId: 323112988 Change-Id: Ib1147c8ada8d30be11d045b3229130665c272138 --- .../op_stats_to_input_pipeline_analysis.cc | 18 +++++++++++++++++- .../profiler/protobuf/input_pipeline.proto | 18 +++++++++++++++--- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc index 37749d320e5..6828950e6a5 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc @@ -567,7 +567,11 @@ InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis( InputPipelineAnalysisRecommendation recommendation = GenerateRecommendation(); BottleneckAnalysis bottleneck_analysis = ComputeBottleneckAnalysis( result.input_time_breakdown(), result.step_details()); - result.set_overall_input_percent(bottleneck_analysis.input_percent()); + result.set_input_percent(bottleneck_analysis.input_percent()); + result.set_output_percent(bottleneck_analysis.output_percent()); + result.set_idle_percent(bottleneck_analysis.idle_percent()); + result.set_compute_percent(bottleneck_analysis.compute_percent()); + recommendation.mutable_bottleneck_analysis()->PackFrom(bottleneck_analysis); *recommendation.mutable_summary_next_step() = GetSummaryNextStep(bottleneck_analysis.input_classification(), @@ -654,6 +658,7 @@ BottleneckAnalysis ComputeBottleneckAnalysis( double total_host_compute_ms = 0; double total_host_prepare_ms = 0; double total_host_compile_ms = 0; + double total_device_compute_ms = 0; double total_device_to_device_ms = 0; double total_unknown_ms = 0; @@ -670,6 +675,7 @@ BottleneckAnalysis ComputeBottleneckAnalysis( details.host_wait_input_ms() + details.host_to_device_ms(); total_output_ms += details.output_ms(); total_host_prepare_ms += details.host_prepare_ms(); + total_device_compute_ms += details.device_compute_ms(); total_device_to_device_ms += details.device_to_device_ms(); total_host_compute_ms += details.host_compute_ms(); total_host_compile_ms += details.host_compile_ms(); @@ -689,6 +695,12 @@ BottleneckAnalysis ComputeBottleneckAnalysis( return analysis; } double input_percent = 100.0 * total_input_ms / total_step_time_ms; + double output_percent = 100.0 * total_output_ms / total_step_time_ms; + double compute_percent = 100.0 * total_device_compute_ms / total_step_time_ms; + // idle_percent includes host_prepare (i.e. kernel launch, device-to-device, + // host compute, host compile, and unknown. + double idle_percent = + std::max(0.0, 100.0 - input_percent - output_percent - compute_percent); double kernel_launch_percent = 100.0 * total_host_prepare_ms / total_step_time_ms; double all_other_percent = 100.0 * total_unknown_ms / total_step_time_ms; @@ -710,6 +722,10 @@ BottleneckAnalysis ComputeBottleneckAnalysis( BottleneckAnalysis analysis; analysis.set_input_percent(input_percent); + analysis.set_output_percent(output_percent); + analysis.set_idle_percent(idle_percent); + analysis.set_compute_percent(compute_percent); + analysis.set_input_classification(input_classification); analysis.set_input_statement(input_statement); analysis.set_kernel_launch_classification(kernel_launch_classification); diff --git a/tensorflow/core/profiler/protobuf/input_pipeline.proto b/tensorflow/core/profiler/protobuf/input_pipeline.proto index abd3ff78323..b20942d3d36 100644 --- a/tensorflow/core/profiler/protobuf/input_pipeline.proto +++ b/tensorflow/core/profiler/protobuf/input_pipeline.proto @@ -9,6 +9,12 @@ import "tensorflow/core/profiler/protobuf/diagnostics.proto"; message BottleneckAnalysis { // Percentage of step time that is spent on input. double input_percent = 7; + // Percentage of step time that is spent on output. + double output_percent = 8; + // Percentage of step time that is idle for non-I/O-related reason. + double idle_percent = 9; + // Percentage of step time that is spent on compute. + double compute_percent = 10; // Indicates if input is a bottleneck. Possible values: "host", "device", // "both", or "unknown" string input_classification = 1; @@ -104,7 +110,7 @@ message InputPipelineAnalysisRecommendation { // An analysis of different types of bottlenecks. Can be unpacked into a // BottleneckAnalysis. google.protobuf.Any bottleneck_analysis = 2; - // A suggested of step to take next. + // A suggested step to take next. string summary_next_step = 3; } @@ -139,8 +145,14 @@ message InputPipelineAnalysisResult { StepSummary step_time_summary = 2; // Summary of all input-related stall as percentage of step duration. StepSummary input_percent_summary = 3; - // Overall percentage of step time that is waiting for input. - double overall_input_percent = 11; + // Percentage of step time that is waiting for input. + double input_percent = 11; + // Percentage of step time that is doing output. + double output_percent = 13; + // Percentage of step time that is idle for non-I/O-related reason. + double idle_percent = 14; + // Percentage of step time that is doing compute. + double compute_percent = 15; // Details of each step. Can be unpacked into a PerGenericStepDetails. repeated google.protobuf.Any step_details = 4; // The breakdown of the input processing time. From b2b9ca5ef1266107b784f9ec95d9894f194db103 Mon Sep 17 00:00:00 2001 From: Haifeng Jin Date: Fri, 24 Jul 2020 19:32:15 -0700 Subject: [PATCH 1326/2522] Remove private API usage of ragged_tensor.is_ragged. PiperOrigin-RevId: 323115668 Change-Id: Id4412a09a00bd4b24109d85af8d85aa0ea896b4c --- tensorflow/python/keras/engine/base_layer_utils.py | 3 ++- .../python/keras/engine/base_preprocessing_layer.py | 2 +- .../keras/layers/preprocessing/category_crossing.py | 3 ++- .../keras/layers/preprocessing/discretization.py | 4 ++-- .../python/keras/layers/preprocessing/hashing.py | 3 ++- .../python/keras/layers/preprocessing/table_utils.py | 3 ++- .../keras/layers/preprocessing/text_vectorization.py | 6 +++--- tensorflow/python/keras/utils/tf_utils.py | 9 +++++++++ tensorflow/python/keras/utils/tf_utils_test.py | 12 ++++++++++++ 9 files changed, 35 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/keras/engine/base_layer_utils.py b/tensorflow/python/keras/engine/base_layer_utils.py index f393b73cf1d..6fa955399d9 100644 --- a/tensorflow/python/keras/engine/base_layer_utils.py +++ b/tensorflow/python/keras/engine/base_layer_utils.py @@ -30,6 +30,7 @@ from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.keras import backend from tensorflow.python.keras.utils import control_flow_util +from tensorflow.python.keras.utils import tf_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_util_v2 from tensorflow.python.ops import control_flow_v2_func_graphs @@ -216,7 +217,7 @@ def _create_keras_history_helper(tensors, processed_ops, created_layers): if sparse_tensor.is_sparse(tensor): sparse_ops.append(tensor.op) continue - if ragged_tensor.is_ragged(tensor): + if tf_utils.is_ragged(tensor): # Ragged tensors don't have an op property ragged_tensors.append(tensor) continue diff --git a/tensorflow/python/keras/engine/base_preprocessing_layer.py b/tensorflow/python/keras/engine/base_preprocessing_layer.py index ac4a0669ed7..f5577bf058e 100644 --- a/tensorflow/python/keras/engine/base_preprocessing_layer.py +++ b/tensorflow/python/keras/engine/base_preprocessing_layer.py @@ -245,7 +245,7 @@ class CombinerPreprocessingLayer(PreprocessingLayer): def convert_to_list(values, sparse_default_value=None): """Convert a TensorLike, CompositeTensor, or ndarray into a Python list.""" - if ragged_tensor.is_ragged(values): + if tf_utils.is_ragged(values): # There is a corner case when dealing with ragged tensors: if you get an # actual RaggedTensor (not a RaggedTensorValue) passed in non-eager mode, # you can't call to_list() on it without evaluating it first. However, diff --git a/tensorflow/python/keras/layers/preprocessing/category_crossing.py b/tensorflow/python/keras/layers/preprocessing/category_crossing.py index c147e3ab770..bdb29d21c4e 100644 --- a/tensorflow/python/keras/layers/preprocessing/category_crossing.py +++ b/tensorflow/python/keras/layers/preprocessing/category_crossing.py @@ -27,6 +27,7 @@ from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_spec from tensorflow.python.keras.engine import base_preprocessing_layer +from tensorflow.python.keras.utils import tf_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops.ragged import ragged_array_ops @@ -151,7 +152,7 @@ class CategoryCrossing(base_preprocessing_layer.PreprocessingLayer): inputs = [self._preprocess_input(inp) for inp in inputs] depth_tuple = self._depth_tuple if self.depth else (len(inputs),) ragged_out = sparse_out = False - if any(ragged_tensor.is_ragged(inp) for inp in inputs): + if any(tf_utils.is_ragged(inp) for inp in inputs): ragged_out = True elif any(isinstance(inp, sparse_tensor.SparseTensor) for inp in inputs): sparse_out = True diff --git a/tensorflow/python/keras/layers/preprocessing/discretization.py b/tensorflow/python/keras/layers/preprocessing/discretization.py index 7544ded5949..6f5414d1a9f 100644 --- a/tensorflow/python/keras/layers/preprocessing/discretization.py +++ b/tensorflow/python/keras/layers/preprocessing/discretization.py @@ -21,10 +21,10 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_spec from tensorflow.python.keras.engine import base_preprocessing_layer +from tensorflow.python.keras.utils import tf_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops.ragged import ragged_functional_ops -from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.util.tf_export import keras_export @@ -83,7 +83,7 @@ class Discretization(base_preprocessing_layer.PreprocessingLayer): return tensor_spec.TensorSpec(shape=output_shape, dtype=output_dtype) def call(self, inputs): - if ragged_tensor.is_ragged(inputs): + if tf_utils.is_ragged(inputs): integer_buckets = ragged_functional_ops.map_flat_values( gen_math_ops.Bucketize, input=inputs, boundaries=self.bins) # Ragged map_flat_values doesn't touch the non-values tensors in the diff --git a/tensorflow/python/keras/layers/preprocessing/hashing.py b/tensorflow/python/keras/layers/preprocessing/hashing.py index 6e4d388d202..a6de075535c 100644 --- a/tensorflow/python/keras/layers/preprocessing/hashing.py +++ b/tensorflow/python/keras/layers/preprocessing/hashing.py @@ -28,6 +28,7 @@ from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_spec from tensorflow.python.framework import tensor_util from tensorflow.python.keras.engine import base_preprocessing_layer +from tensorflow.python.keras.utils import tf_utils from tensorflow.python.ops import gen_sparse_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import string_ops @@ -183,7 +184,7 @@ class Hashing(base_preprocessing_layer.PreprocessingLayer): else: inputs = string_ops.as_string(inputs) str_to_hash_bucket = self._get_string_to_hash_bucket_fn() - if ragged_tensor.is_ragged(inputs): + if tf_utils.is_ragged(inputs): return ragged_functional_ops.map_flat_values( str_to_hash_bucket, inputs, num_buckets=self.num_bins, name='hash') elif isinstance(inputs, sparse_tensor.SparseTensor): diff --git a/tensorflow/python/keras/layers/preprocessing/table_utils.py b/tensorflow/python/keras/layers/preprocessing/table_utils.py index cf1bfd741c9..3329f32b4fe 100644 --- a/tensorflow/python/keras/layers/preprocessing/table_utils.py +++ b/tensorflow/python/keras/layers/preprocessing/table_utils.py @@ -24,6 +24,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.keras import backend as K +from tensorflow.python.keras.utils import tf_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import string_ops @@ -131,7 +132,7 @@ class TableHandler(object): inputs = ragged_tensor.convert_to_tensor_or_ragged_tensor(inputs) # Run the lookup operation on the converted tensor. - if ragged_tensor.is_ragged(inputs): + if tf_utils.is_ragged(inputs): return self._ragged_lookup(inputs) else: return self._tensor_lookup(inputs) diff --git a/tensorflow/python/keras/layers/preprocessing/text_vectorization.py b/tensorflow/python/keras/layers/preprocessing/text_vectorization.py index 400a27fb82d..2cc8bc2b340 100644 --- a/tensorflow/python/keras/layers/preprocessing/text_vectorization.py +++ b/tensorflow/python/keras/layers/preprocessing/text_vectorization.py @@ -29,13 +29,13 @@ from tensorflow.python.keras.engine import base_preprocessing_layer from tensorflow.python.keras.layers.preprocessing import category_encoding from tensorflow.python.keras.layers.preprocessing import string_lookup from tensorflow.python.keras.utils import layer_utils +from tensorflow.python.keras.utils import tf_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_string_ops from tensorflow.python.ops import string_ops from tensorflow.python.ops.ragged import ragged_functional_ops from tensorflow.python.ops.ragged import ragged_string_ops -from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.util.tf_export import keras_export LOWER_AND_STRIP_PUNCTUATION = "lower_and_strip_punctuation" @@ -516,7 +516,7 @@ class TextVectorization(base_preprocessing_layer.CombinerPreprocessingLayer): def _preprocess(self, inputs): if self._standardize == LOWER_AND_STRIP_PUNCTUATION: - if ragged_tensor.is_ragged(inputs): + if tf_utils.is_ragged(inputs): lowercase_inputs = ragged_functional_ops.map_flat_values( gen_string_ops.string_lower, inputs) # Depending on configuration, we may never touch the non-data tensor @@ -581,7 +581,7 @@ class TextVectorization(base_preprocessing_layer.CombinerPreprocessingLayer): # choose whether to pad or trim it based on each tensor. # We need to convert to dense if we have a ragged tensor. - if ragged_tensor.is_ragged(indexed_data): + if tf_utils.is_ragged(indexed_data): dense_data = indexed_data.to_tensor(default_value=0) else: dense_data = indexed_data diff --git a/tensorflow/python/keras/utils/tf_utils.py b/tensorflow/python/keras/utils/tf_utils.py index c9ad96cd37b..3bc38d44398 100644 --- a/tensorflow/python/keras/utils/tf_utils.py +++ b/tensorflow/python/keras/utils/tf_utils.py @@ -34,6 +34,8 @@ from tensorflow.python.keras import backend as K from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables +from tensorflow.python.ops.ragged import ragged_tensor +from tensorflow.python.ops.ragged import ragged_tensor_value from tensorflow.python.util import nest from tensorflow.python.util import object_identity from tensorflow.python.util import tf_contextlib @@ -413,6 +415,13 @@ def type_spec_from_value(value): return type_spec.type_spec_from_value(value) +def is_ragged(tensor): + """Returns true if `tensor` is a ragged tensor or ragged tensor value.""" + return isinstance( + tensor, + (ragged_tensor.RaggedTensor, ragged_tensor_value.RaggedTensorValue)) + + def is_tensor_or_variable(x): return tensor_util.is_tensor(x) or isinstance(x, variables.Variable) diff --git a/tensorflow/python/keras/utils/tf_utils_test.py b/tensorflow/python/keras/utils/tf_utils_test.py index 04aef00f5a3..9a3939e0c39 100644 --- a/tensorflow/python/keras/utils/tf_utils_test.py +++ b/tensorflow/python/keras/utils/tf_utils_test.py @@ -27,6 +27,7 @@ from tensorflow.python.framework import sparse_tensor from tensorflow.python.keras import combinations from tensorflow.python.keras.utils import tf_utils from tensorflow.python.ops import variables +from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.platform import test try: @@ -182,5 +183,16 @@ class AttrsTest(test.TestCase): nested=Foo(1))) +class TestIsRagged(test.TestCase): + + def test_is_ragged_return_true_for_ragged_tensor(self): + tensor = ragged_tensor.RaggedTensor.from_row_splits( + values=[3, 1, 4, 1, 5, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8]) + self.assertTrue(tf_utils.is_ragged(tensor)) + + def test_is_ragged_return_false_for_list(self): + tensor = [1., 2., 3.] + self.assertFalse(tf_utils.is_ragged(tensor)) + if __name__ == '__main__': test.main() From cd13583aee80e2d9ac4eba66dbc44391bd8d545a Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Fri, 24 Jul 2020 20:34:36 -0700 Subject: [PATCH 1327/2522] [TF2XLA] [NFC] Function are wonderful, let's use them (Simplifying IfOp::Compile) PiperOrigin-RevId: 323119743 Change-Id: I06c5f33bbf543bdaa7509f584f7876511de18ff5 --- tensorflow/compiler/tf2xla/kernels/if_op.cc | 229 +++++++++++--------- 1 file changed, 127 insertions(+), 102 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/if_op.cc b/tensorflow/compiler/tf2xla/kernels/if_op.cc index 2a059f78526..3a88fcf4879 100644 --- a/tensorflow/compiler/tf2xla/kernels/if_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/if_op.cc @@ -47,6 +47,122 @@ XlaIfOp::XlaIfOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { } } +// Populates tensor array gradients for compiled branches, returns whether the +// set of found tensor array gradients is non-empty. +static xla::StatusOr PopulateTensorArrayGradients( + XlaOpKernelContext* ctx, xla::XlaBuilder* b, + absl::Span arguments, + XlaCompiler::CompilationResult* then_result, + XlaCompiler::CompilationResult* else_result) { + bool has_tensor_array_gradients = false; + for (XlaCompiler::CompilationResult* result : {then_result, else_result}) { + for (const XlaCompiler::ResourceUpdate& update : result->resource_updates) { + XlaResource* resource; + TF_RETURN_IF_ERROR( + ctx->GetResourceInput(update.input_index + 1, &resource)); + XlaCompiler::Argument& arg = arguments[update.input_index]; + + // Add any TensorArray gradients touched by the then/else computation to + // the enclosing graph. + for (const string& grad_source : update.tensor_array_gradients_accessed) { + VLOG(5) << "TensorArray " << resource->name() << " accessed gradient " + << grad_source; + XlaResource* gradient; + TF_RETURN_IF_ERROR(resource->GetOrCreateTensorArrayGradient( + grad_source, b, &gradient)); + } + // Add all of the TensorArray gradients to the argument. For simplicity, + // we always pass all known gradients. + for (const auto& gradient : resource->tensor_array_gradients()) { + arg.tensor_array_gradients.insert(gradient.first); + } + if (!resource->tensor_array_gradients().empty()) + has_tensor_array_gradients = true; + } + } + return has_tensor_array_gradients; +} + +// Checks that shapes matches on both sides of the conditional. +static Status ValidateShapes( + XlaOpKernelContext* ctx, const XlaCompiler::CompilationResult& then_result, + const XlaCompiler::CompilationResult& else_result) { + // Check that both branches have identical input shapes. + if (then_result.xla_input_shapes.size() != 1) { + return errors::FailedPrecondition("Expected one input shape"); + } + + xla::Shape then_input_shape = then_result.xla_input_shapes[0]; + if (!then_input_shape.IsTuple()) { + return errors::FailedPrecondition("Expected tuple shape"); + } + + if (else_result.xla_input_shapes.size() != 1) { + return errors::FailedPrecondition("Expected one input shape"); + } + xla::Shape else_input_shape = else_result.xla_input_shapes[0]; + if (!else_input_shape.IsTuple()) { + return errors::FailedPrecondition("Expected tuple shape"); + } + if (!xla::ShapeUtil::Compatible(then_input_shape, else_input_shape)) { + return errors::InvalidArgument( + "Input shapes of then and else branches do not match: ", + xla::ShapeUtil::HumanString(then_input_shape), " vs. ", + xla::ShapeUtil::HumanString(else_input_shape)); + } + + // Check that both branches have identical output shapes. + if (!xla::ShapeUtil::Compatible(then_result.xla_output_shape, + else_result.xla_output_shape)) { + return errors::InvalidArgument( + "Output shapes of then and else branches do not match: ", + xla::ShapeUtil::HumanString(then_result.xla_output_shape), " vs. ", + xla::ShapeUtil::HumanString(else_result.xla_output_shape)); + } + + // Check that both branches have same TensorList output indices. + for (int output_index = 0; output_index < then_result.outputs.size(); + output_index++) { + bool is_tensor_list_in_then_branch = + then_result.outputs[output_index].is_tensor_list; + bool is_tensor_list_in_else_branch = + else_result.outputs[output_index].is_tensor_list; + if (is_tensor_list_in_then_branch != is_tensor_list_in_else_branch) { + return errors::FailedPrecondition( + "Output #", output_index, " is ", + (is_tensor_list_in_then_branch ? "" : "not"), + " a TensorList in then branch, but is ", + (is_tensor_list_in_else_branch ? "" : "not"), + " a TensorList in else branch"); + } + } + + VLOG(2) << "Input shape: " << xla::ShapeUtil::HumanString(then_input_shape); + VLOG(2) << "Output shape: " + << xla::ShapeUtil::HumanString(then_result.xla_output_shape); + + // We set return_updated_values_for_all_resources=true and we pass the same + // arguments to both computations, so the resource update count must match. + if (then_result.resource_updates.size() != + else_result.resource_updates.size()) { + return errors::FailedPrecondition( + "Different number of resources in then and else branch"); + } + + for (int i = 0; i < then_result.resource_updates.size(); ++i) { + const auto& lhs = then_result.resource_updates[i]; + const auto& rhs = else_result.resource_updates[i]; + bool equal = lhs.input_index == rhs.input_index && lhs.shape == rhs.shape && + lhs.tensor_array_gradients_accessed == + rhs.tensor_array_gradients_accessed; + if (!equal) { + return errors::FailedPrecondition( + "Mismatch in resource of then and else branch for resource ", i); + } + } + return Status::OK(); +} + // TODO(b/35949885): There is duplication here with the handling of the // while_op. Refactor the common code out/rework. void XlaIfOp::Compile(XlaOpKernelContext* ctx) { @@ -137,35 +253,12 @@ void XlaIfOp::Compile(XlaOpKernelContext* ctx) { OP_REQUIRES_OK(ctx, compiler->CompileFunction(options, else_branch_, arguments, &else_result)); - bool has_tensor_array_gradients = false; - for (XlaCompiler::CompilationResult* result : {&then_result, &else_result}) { - for (const XlaCompiler::ResourceUpdate& update : result->resource_updates) { - XlaResource* resource; - OP_REQUIRES_OK(ctx, - ctx->GetResourceInput(update.input_index + 1, &resource)); - XlaCompiler::Argument& arg = arguments[update.input_index]; - - // Add any TensorArray gradients touched by the then/else computation to - // the enclosing graph. - for (const string& grad_source : update.tensor_array_gradients_accessed) { - VLOG(5) << "TensorArray " << resource->name() << " accessed gradient " - << grad_source; - XlaResource* gradient; - OP_REQUIRES_OK(ctx, resource->GetOrCreateTensorArrayGradient( - grad_source, b, &gradient)); - } - // Add all of the TensorArray gradients to the argument. For simplicity, - // we always pass all known gradients. - for (const auto& gradient : resource->tensor_array_gradients()) { - arg.tensor_array_gradients.insert(gradient.first); - } - if (!resource->tensor_array_gradients().empty()) - has_tensor_array_gradients = true; - } - } + xla::StatusOr has_tensor_array_gradients = PopulateTensorArrayGradients( + ctx, b, absl::MakeSpan(arguments), &then_result, &else_result); + OP_REQUIRES_OK(ctx, has_tensor_array_gradients.status()); // Recompile the functions to update the argument shapes for tensor arrays. - if (has_tensor_array_gradients) { + if (*has_tensor_array_gradients) { then_result = {}; OP_REQUIRES_OK(ctx, compiler->CompileFunction(options, then_branch_, arguments, &then_result)); @@ -174,72 +267,7 @@ void XlaIfOp::Compile(XlaOpKernelContext* ctx) { arguments, &else_result)); } - // Check that both branches have identical input shapes. - OP_REQUIRES(ctx, then_result.xla_input_shapes.size() == 1, - errors::FailedPrecondition("Expected one input shape")); - xla::Shape then_input_shape = then_result.xla_input_shapes[0]; - OP_REQUIRES(ctx, then_input_shape.IsTuple(), - errors::FailedPrecondition("Expected tuple shape")); - OP_REQUIRES(ctx, else_result.xla_input_shapes.size() == 1, - errors::FailedPrecondition("Expected one input shape")); - xla::Shape else_input_shape = else_result.xla_input_shapes[0]; - OP_REQUIRES(ctx, else_input_shape.IsTuple(), - errors::FailedPrecondition("Expected tuple shape")); - OP_REQUIRES(ctx, - xla::ShapeUtil::Compatible(then_input_shape, else_input_shape), - errors::InvalidArgument( - "Input shapes of then and else branches do not match: ", - xla::ShapeUtil::HumanString(then_input_shape), " vs. ", - xla::ShapeUtil::HumanString(else_input_shape))); - - // Check that both branches have identical output shapes. - OP_REQUIRES( - ctx, - xla::ShapeUtil::Compatible(then_result.xla_output_shape, - else_result.xla_output_shape), - errors::InvalidArgument( - "Output shapes of then and else branches do not match: ", - xla::ShapeUtil::HumanString(then_result.xla_output_shape), " vs. ", - xla::ShapeUtil::HumanString(else_result.xla_output_shape))); - - // Check that both branches have same TensorList output indices. - for (int output_index = 0; output_index < then_result.outputs.size(); - output_index++) { - bool is_tensor_list_in_then_branch = - then_result.outputs[output_index].is_tensor_list; - bool is_tensor_list_in_else_branch = - else_result.outputs[output_index].is_tensor_list; - OP_REQUIRES( - ctx, is_tensor_list_in_then_branch == is_tensor_list_in_else_branch, - errors::FailedPrecondition("Output #", output_index, " is ", - (is_tensor_list_in_then_branch ? "" : "not"), - " a TensorList in then branch, but is ", - (is_tensor_list_in_else_branch ? "" : "not"), - " a TensorList in else branch")); - } - - VLOG(2) << "Input shape: " << xla::ShapeUtil::HumanString(then_input_shape); - VLOG(2) << "Output shape: " - << xla::ShapeUtil::HumanString(then_result.xla_output_shape); - - // We set return_updated_values_for_all_resources=true and we pass the same - // arguments to both computations, so the resource update count must match. - OP_REQUIRES(ctx, - then_result.resource_updates.size() == - else_result.resource_updates.size(), - errors::FailedPrecondition( - "Different number of resources in then and else branch")); - for (int i = 0; i < then_result.resource_updates.size(); ++i) { - const auto& lhs = then_result.resource_updates[i]; - const auto& rhs = else_result.resource_updates[i]; - bool equal = lhs.input_index == rhs.input_index && lhs.shape == rhs.shape && - lhs.tensor_array_gradients_accessed == - rhs.tensor_array_gradients_accessed; - OP_REQUIRES( - ctx, equal, - errors::FailedPrecondition( - "Mismatch in resource of then and else branch for resource ", i)); - } + OP_REQUIRES_OK(ctx, ValidateShapes(ctx, then_result, else_result)); int num_inputs = then_result.input_mapping.size(); std::vector inputs(num_inputs); @@ -263,22 +291,18 @@ void XlaIfOp::Compile(XlaOpKernelContext* ctx) { } } - auto input_tuple = xla::Tuple(b, inputs); + xla::XlaOp input_tuple = xla::Tuple(b, inputs); xla::XlaOp outputs = xla::Conditional(ctx->Input(0), input_tuple, *then_result.computation, input_tuple, *else_result.computation); + // Sets non-variable outputs. for (int i = 0; i < output_types_.size(); ++i) { xla::XlaOp output_handle = xla::GetTupleElement(outputs, i); if (VLOG_IS_ON(2)) { - LOG(INFO) << "Setting output " << i; - auto shape_or = b->GetShape(output_handle); - if (shape_or.ok()) { - LOG(INFO) << "Shape for output " << i << ": " - << xla::ShapeUtil::HumanString(shape_or.ValueOrDie()); - } else { - LOG(INFO) << "Shape unknown for output " << i; - } + xla::StatusOr shape = b->GetShape(output_handle); + VLOG(2) << "Setting output " << i << " with shape " + << (shape.ok() ? shape->ToString() : ""); } // We have checked that both branches have same TensorList output indices. if (then_result.outputs[i].is_tensor_list) { @@ -287,6 +311,7 @@ void XlaIfOp::Compile(XlaOpKernelContext* ctx) { ctx->SetOutput(i, output_handle); } } + if (has_token_input_output_) { // Set token output for this "If" op. Token output is the last output of // XLA computation, which comes after all "normal" TF outputs and resource From 4874f237b8c7d0ce7eaa96d45819419ea37ebde5 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Fri, 24 Jul 2020 20:40:53 -0700 Subject: [PATCH 1328/2522] [TF2XLA] [NFC] Simplify CaseOp::Compile, no need for an extra vector of pointers PiperOrigin-RevId: 323120242 Change-Id: I8242377b2ca5f933e048065af6b6f28b448e5969 --- tensorflow/compiler/tf2xla/kernels/case_op.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/case_op.cc b/tensorflow/compiler/tf2xla/kernels/case_op.cc index fbd54f1ef39..fd3834e79bf 100644 --- a/tensorflow/compiler/tf2xla/kernels/case_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/case_op.cc @@ -160,17 +160,15 @@ void XlaCaseOp::Compile(XlaOpKernelContext* ctx) { XlaCompiler* compiler = ctx->compiler(); std::vector branch_results(num_branches); - std::vector branch_results_p(num_branches); for (int j = 0; j < num_branches; ++j) { OP_REQUIRES_OK(ctx, compiler->CompileFunction(options, branches[j], arguments, &branch_results[j])); - branch_results_p[j] = &branch_results[j]; } bool has_tensor_array_gradients = false; - for (XlaCompiler::CompilationResult* result : branch_results_p) { - for (const XlaCompiler::ResourceUpdate& update : result->resource_updates) { + for (XlaCompiler::CompilationResult& result : branch_results) { + for (const XlaCompiler::ResourceUpdate& update : result.resource_updates) { XlaResource* resource; OP_REQUIRES_OK(ctx, ctx->GetResourceInput(update.input_index + 1, &resource)); From 0de929fdc1291d4863d50ede15b2a1b23167a883 Mon Sep 17 00:00:00 2001 From: Wenhao Jia Date: Fri, 24 Jul 2020 21:40:18 -0700 Subject: [PATCH 1329/2522] Make StatusHelper safer to use and fix a related memory leak. PiperOrigin-RevId: 323124482 Change-Id: I8d3528dd59a1ba1a9f85bf07c93a46eb32f7e645 --- .../stream_executor/tpu/status_helper.h | 20 ++++++++++--------- .../tpu/tpu_transfer_manager.cc | 15 ++++++++------ 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/tensorflow/stream_executor/tpu/status_helper.h b/tensorflow/stream_executor/tpu/status_helper.h index 85c7bf526a9..0e522ce8241 100644 --- a/tensorflow/stream_executor/tpu/status_helper.h +++ b/tensorflow/stream_executor/tpu/status_helper.h @@ -29,22 +29,24 @@ class StatusHelper { tensorflow::tpu::ExecutorApiFn()->TpuStatus_FreeFn(c_status); } - bool ok() const { - return tensorflow::tpu::ExecutorApiFn()->TpuStatus_CodeFn(c_status) == 0; - } - - tensorflow::Status status() const { - if (!ok()) { + static tensorflow::Status FromC(SE_Status* const c_status) { + if (tensorflow::tpu::ExecutorApiFn()->TpuStatus_OkFn(c_status)) { + return tensorflow::Status::OK(); + } else { return tensorflow::Status( tensorflow::error::Code( tensorflow::tpu::ExecutorApiFn()->TpuStatus_CodeFn(c_status)), tensorflow::tpu::ExecutorApiFn()->TpuStatus_MessageFn(c_status)); - } else { - return tensorflow::Status::OK(); } } - SE_Status* c_status; // NOLINT + bool ok() const { + return tensorflow::tpu::ExecutorApiFn()->TpuStatus_OkFn(c_status); + } + + tensorflow::Status status() const { return FromC(c_status); } + + SE_Status* const c_status; // NOLINT }; #endif // TENSORFLOW_STREAM_EXECUTOR_TPU_STATUS_HELPER_H_ diff --git a/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc b/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc index d698f9552f3..a7288003f8d 100644 --- a/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc +++ b/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc @@ -15,6 +15,8 @@ limitations under the License. #include "tensorflow/stream_executor/tpu/tpu_transfer_manager.h" +#include + #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/tpu/tpu_api.h" @@ -80,19 +82,20 @@ Status TpuTransferManager::TransferLiteralToDeviceAsync( struct TransferFromDeviceState { std::atomic remaining_transfers; - StatusHelper status_helper; + SE_Status* overall_status = + tpu::ExecutorApiFn()->TpuStatus_NewFn(); // OK or the first error std::function done; void TransferFinished(SE_Status* status) { if (!tpu::ExecutorApiFn()->TpuStatus_OkFn(status) && - tpu::ExecutorApiFn()->TpuStatus_OkFn(status_helper.c_status)) { - status_helper.c_status = status; - } else { - tpu::ExecutorApiFn()->TpuStatus_FreeFn(status); + tpu::ExecutorApiFn()->TpuStatus_OkFn(overall_status)) { + std::swap(overall_status, status); } + tpu::ExecutorApiFn()->TpuStatus_FreeFn(status); if (--remaining_transfers == 0) { - done(status_helper.status()); + done(StatusHelper::FromC(overall_status)); + tpu::ExecutorApiFn()->TpuStatus_FreeFn(overall_status); delete this; } } From 10559ef615aa7eb252aeaaf373d633e8d1dffe0f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 24 Jul 2020 23:38:07 -0700 Subject: [PATCH 1330/2522] Disable invalid rewrite Square(Sub(x, y)) => Identity(SquaredDiff(x,y)) for complex types. SquaredDiff actually implements conj(x-y)*(x-y). Update API documentation to reflect reality. PiperOrigin-RevId: 323131891 Change-Id: I4898adf0a4e43f6a9cbb8446d8f6b6db11d399e3 --- .../base_api/api_def_SquaredDifference.pbtxt | 2 +- .../optimizers/arithmetic_optimizer.cc | 13 +++- .../optimizers/arithmetic_optimizer_test.cc | 73 +++++++++++-------- tensorflow/core/ops/math_ops.cc | 1 + 4 files changed, 55 insertions(+), 34 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_SquaredDifference.pbtxt b/tensorflow/core/api_def/base_api/api_def_SquaredDifference.pbtxt index 51277692d8c..2edc3bfc198 100644 --- a/tensorflow/core/api_def/base_api/api_def_SquaredDifference.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_SquaredDifference.pbtxt @@ -1,6 +1,6 @@ op { graph_op_name: "SquaredDifference" - summary: "Returns (x - y)(x - y) element-wise." + summary: "Returns conj(x - y)(x - y) element-wise." description: < Identity(SquaredDifference(x, y)) +// Performs the following conversion for real types: +// Square(Sub(x, y)) => Identity(SquaredDifference(x, y) ) class FuseSquaredDiffStage : public ArithmeticOptimizerStage { public: explicit FuseSquaredDiffStage(const GraphOptimizerContext& ctx, @@ -1883,6 +1883,11 @@ class FuseSquaredDiffStage : public ArithmeticOptimizerStage { // elsewhere. if (IsSub(*b) && !IsInPreserveSet(*b) && (NumNonControlOutputs(*b, *ctx().node_map) == 1)) { + // For complex, SquaredDiff computes conj(x-y)*(x-y), so this rewrite is + // invalid. + const DataType type = GetDataTypeFromAttr(*b, "T"); + if ((type == DT_COMPLEX64) || (type == DT_COMPLEX128)) + return Status::OK(); node->set_op("Identity"); b->set_op("SquaredDifference"); AddToOptimizationQueue(node); @@ -3723,12 +3728,12 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps(bool can_use_shapes) { pipeline.AddStage(ctx, ctx_ext); if (options_.remove_stack_slice_same_axis) pipeline.AddStage(ctx, ctx_ext); - if (options_.fuse_squared_diff) - pipeline.AddStage(ctx, ctx_ext); if (options_.simplify_embedding_lookup) pipeline.AddStage(ctx, ctx_ext); if (options_.remove_cast_into_segment_reduction) pipeline.AddStage(ctx, ctx_ext); + if (options_.fuse_squared_diff) + pipeline.AddStage(ctx, ctx_ext); VLOG(1) << "Run " << pipeline.NumStages() << " arithmetic optimizer stages: " << absl::StrJoin(pipeline.StageNames(), ", "); diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index 388fe12805b..d8c60ec897b 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -15,6 +15,8 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/arithmetic_optimizer.h" +#include + #include "absl/strings/match.h" #include "absl/strings/str_cat.h" #include "tensorflow/cc/ops/array_ops.h" @@ -2525,38 +2527,51 @@ TEST_F(ArithmeticOptimizerTest, ConvertSqrtDivToRsqrtMulExcludeFloorDiv) { } TEST_F(ArithmeticOptimizerTest, FuseSquaredDiff) { - tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - auto x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2}); - auto y = ops::Const(s.WithOpName("y"), {3.0f, 4.0f}, {1, 2}); - Output sub_x_y = ops::Sub(s.WithOpName("sub_x_y"), x, y); - Output square_sub_x_y = ops::Square(s.WithOpName("output"), sub_x_y); + for (bool is_complex : {false, true}) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output x = ops::Const(s.WithOpName("x"), {1.0f, 2.0f}, {1, 2}); + Output y = ops::Const(s.WithOpName("y"), {3.0f, 4.0f}, {1, 2}); + Output complex_x = ops::Complex(s.WithOpName("complex_x"), x, x); + Output complex_y = ops::Complex(s.WithOpName("complex_y"), y, y); + Output sub_x_y = + is_complex ? ops::Sub(s.WithOpName("sub_x_y"), complex_x, complex_y) + : ops::Sub(s.WithOpName("sub_x_y"), x, y); + Output square_sub_x_y = ops::Square(s.WithOpName("output"), sub_x_y); - GrapplerItem item; - item.fetch = {"output"}; - TF_CHECK_OK(s.ToGraphDef(&item.graph)); - const auto tensors_expected = EvaluateNodes(item.graph, item.fetch); - ASSERT_EQ(tensors_expected.size(), 1); + GrapplerItem item; + item.fetch = {"output"}; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + const auto tensors_expected = EvaluateNodes(item.graph, item.fetch); + ASSERT_EQ(tensors_expected.size(), 1); - GraphDef output; - ArithmeticOptimizer optimizer; - EnableOnlyFuseSquaredDiff(&optimizer); - OptimizeAndPrune(&optimizer, &item, &output); - const auto tensors = EvaluateNodes(output, item.fetch); - ASSERT_EQ(tensors.size(), 1); + GraphDef output; + ArithmeticOptimizer optimizer; + EnableOnlyFuseSquaredDiff(&optimizer); + OptimizeAndPrune(&optimizer, &item, &output); + const auto tensors = EvaluateNodes(output, item.fetch); + ASSERT_EQ(tensors.size(), 1); - test::ExpectTensorNear(tensors[0], tensors_expected[0], 1e-6); - EXPECT_EQ(output.node_size(), item.graph.node_size()); - for (int i = 0; i < output.node_size(); ++i) { - const NodeDef& node = output.node(i); - if (node.name() == "output") { - EXPECT_EQ(node.op(), "Identity"); - ASSERT_EQ(node.input_size(), 1); - EXPECT_EQ(node.input(0), "sub_x_y"); - } else if (node.name() == "sub_x_y") { - EXPECT_EQ(node.op(), "SquaredDifference"); - ASSERT_EQ(node.input_size(), 2); - EXPECT_EQ(node.input(0), "x"); - EXPECT_EQ(node.input(1), "y"); + if (is_complex) { + test::ExpectTensorNear>(tensors[0], + tensors_expected[0], 1e-6); + EXPECT_EQ(output.node_size(), item.graph.node_size()); + } else { + test::ExpectTensorNear(tensors[0], tensors_expected[0], 1e-6); + // The two unused Complex nodes should get pruned. + EXPECT_EQ(output.node_size(), item.graph.node_size() - 2); + } + for (int i = 0; i < output.node_size(); ++i) { + const NodeDef& node = output.node(i); + if (node.name() == "output") { + EXPECT_EQ(node.op(), is_complex ? "Square" : "Identity"); + ASSERT_EQ(node.input_size(), 1); + EXPECT_EQ(node.input(0), "sub_x_y"); + } else if (node.name() == "sub_x_y") { + EXPECT_EQ(node.op(), is_complex ? "Sub" : "SquaredDifference"); + ASSERT_EQ(node.input_size(), 2); + EXPECT_EQ(node.input(0), is_complex ? "complex_x" : "x"); + EXPECT_EQ(node.input(1), is_complex ? "complex_y" : "y"); + } } } } diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 373f3fdfa96..99be4e2fcd8 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -494,6 +494,7 @@ REGISTER_OP("TruncateDiv") REGISTER_OP("RealDiv").BINARY_MORE().SetShapeFn( shape_inference::BroadcastBinaryOpShapeFn); +// Note SquaredDifference implements conj(x - y)*(x - y). REGISTER_OP("SquaredDifference") .BINARY_FEWER() .SetIsCommutative() From 973cfa0847adcd95a4ca936c605fd3a41a17a772 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 24 Jul 2020 23:45:36 -0700 Subject: [PATCH 1331/2522] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 323132216 Change-Id: I8aec4ea30f5b6d33d6085c9bc1462427c4d0171b --- tensorflow/go/op/wrappers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index fdc188e6aa3..fdec7e2580f 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -32006,7 +32006,7 @@ func VarHandleOp(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...Va return op.Output(0) } -// Returns (x - y)(x - y) element-wise. +// Returns conj(x - y)(x - y) element-wise. // // *NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting // [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) From 9d1863dd7c450cdef9fd2d312cf3c18d6382291f Mon Sep 17 00:00:00 2001 From: Nat Jeffries Date: Sat, 25 Jul 2020 00:36:08 -0700 Subject: [PATCH 1332/2522] Update SVDF tests to use more representative shapes and data. Restructure test to allow re-using float inputs outputs weights and biases in quantized kernels. PiperOrigin-RevId: 323135297 Change-Id: Id0aaf54f134508be6610e3d6f38632e7e817560b --- tensorflow/lite/micro/kernels/svdf.cc | 31 +- tensorflow/lite/micro/kernels/svdf_test.cc | 1178 ++++++++++------- .../micro/kernels/xtensa_hifimini/svdf.cc | 28 +- 3 files changed, 739 insertions(+), 498 deletions(-) diff --git a/tensorflow/lite/micro/kernels/svdf.cc b/tensorflow/lite/micro/kernels/svdf.cc index c3adb4d3782..f0efc8623b1 100644 --- a/tensorflow/lite/micro/kernels/svdf.cc +++ b/tensorflow/lite/micro/kernels/svdf.cc @@ -416,24 +416,19 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8); - const auto* input_params = - reinterpret_cast(input->quantization.params); - const auto* weights_feature_params = - static_cast( - weights_feature->quantization.params); - const auto* state_params = static_cast( - activation_state->quantization.params); - const auto* weight_time_params = - static_cast( - weights_time->quantization.params); - const auto* output_params = static_cast( - output->quantization.params); const double effective_scale_1 = static_cast( - input_params->scale->data[0] * weights_feature_params->scale->data[0] / - state_params->scale->data[0]); - const double effective_scale_2 = static_cast( - state_params->scale->data[0] * weight_time_params->scale->data[0] / - output_params->scale->data[0]); + input->params.scale * weights_feature->params.scale / + activation_state->params.scale); + const double effective_scale_2 = + static_cast(activation_state->params.scale * + weights_time->params.scale / output->params.scale); + + // TODO(b/162018098): Use TF_LITE_ENSURE_NEAR when it is ready. + TF_LITE_ENSURE( + context, + std::abs(static_cast(bias->params.scale) - + static_cast(activation_state->params.scale * + weights_time->params.scale)) < 1e-5); TFLITE_DCHECK(node->user_data != nullptr); OpData* data = static_cast(node->user_data); @@ -503,8 +498,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } case kTfLiteInt8: { - TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActRelu); - EvalIntegerSVDF(context, node, input, weights_feature, weights_time, bias, params, activation_state, output, data, input->params.zero_point, output->params.zero_point); diff --git a/tensorflow/lite/micro/kernels/svdf_test.cc b/tensorflow/lite/micro/kernels/svdf_test.cc index fc0a91481fb..3b2c8eb7af6 100644 --- a/tensorflow/lite/micro/kernels/svdf_test.cc +++ b/tensorflow/lite/micro/kernels/svdf_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include +#include #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" @@ -25,40 +25,10 @@ namespace tflite { namespace testing { namespace { -// naming as follows: svdf__xx -static float svdf_input_3x2x10[] = { - 0.12609188, -0.46347019, -0.89598465, - 0.35867718, 0.36897406, 0.73463392, +// naming as follows: _xx - 0.14278367, -1.64410412, -0.75222826, - -0.57290924, 0.12729003, 0.7567004, - - 0.49837467, 0.19278903, 0.26584083, - 0.17660543, 0.52949083, -0.77931279, - - -0.11186574, 0.13164264, -0.05349274, - -0.72674477, -0.5683046, 0.55900657, - - -0.68892461, 0.37783599, 0.18263303, - -0.63690937, 0.44483393, -0.71817774, - - -0.81299269, -0.86831826, 1.43940818, - -0.95760226, 1.82078898, 0.71135032, - - -1.45006323, -0.82251364, -1.69082689, - -1.65087092, -1.89238167, 1.54172635, - - 0.03966608, -0.24936394, -0.77526885, - 2.06740379, -1.51439476, 1.43768692, - - 0.11771342, -0.23761693, -0.65898693, - 0.31088525, -1.55601168, -0.87661445, - - -0.89477462, 1.67204106, -0.53235275, - -0.6230064, 0.29819036, 1.06939757, -}; - -static float svdf_input_2x2x10[] = { +// 10 inputs each with shape {2, 2}. +const float input_data_2x2x10[] = { 0.12609188, -0.46347019, 0.35867718, 0.36897406, 0.14278367, -1.64410412, -0.57290924, 0.12729003, @@ -80,96 +50,446 @@ static float svdf_input_2x2x10[] = { -0.89477462, 1.67204106, -0.6230064, 0.29819036, }; -static float svdf_golden_output_2x2x30_rank_1[] = { - -0.044205, -0.013757, 0.050369, -0.018447, 0.073010, 0.025142, -0.021154, - 0.013551, -0.209613, -0.062421, 0.150209, -0.108334, 0.028256, -0.006950, - -0.030885, 0.009603, -0.076800, -0.037075, -0.087198, -0.155183, 0.091069, - 0.098446, -0.016083, 0.106475, -0.082123, -0.162238, -0.084434, -0.141074, - -0.029340, -0.090685, 0.053302, -0.030604, -0.201440, 0.088424, 0.139877, - 0.012416, -0.113212, 0.103893, -0.100842, 0.122780, -0.166632, -0.116705, - 0.175298, -0.047163, 0.313077, -0.166485, -0.285860, 0.129069, -0.625911, - 0.046134, 0.138081, -0.129581, -0.521455, -0.061579, 0.230289, 0.114963, - -0.216693, -0.161643, -0.179177, -0.052599, -0.213239, 0.029502, 0.260858, - 0.275045, -0.213689, -0.323608, -0.285635, -0.317687, -0.324092, -0.317972, - -0.208450, -0.462504, -0.255126, -0.218576, -0.041528, 0.179421, -0.440583, - 0.072127, -0.284136, 0.241570, -0.582490, 0.253004, 0.156972, 0.132266, - -0.175340, -0.269495, -0.005782, -0.125683, -0.461215, 0.257511, 0.340125, - 0.140569, -0.866940, -0.075565, 0.484422, 0.018665, 0.059312, -0.006378, - -0.465532, 0.291374, -0.182749, 0.232608, 0.479811, 0.541274, 0.286369, - -0.188810, -0.011561, 0.022947, 0.451862, 0.214710, -0.367849, -0.722380, - -0.072298, -0.270524, -0.083401, -0.038342, -0.035884, -0.565247, -0.427794, - 0.015071}; +// Feature filter of shape {8, 2}. +const float feature_weights_data_2x2x10[] = { + -0.31930989, 0.0079667, 0.39296314, 0.37613347, 0.12416199, 0.15785322, + 0.27901134, 0.3905206, 0.21931258, -0.36137494, -0.10640851, 0.31053296, + -0.36118156, -0.0976817, -0.36916667, 0.22197971}; -static float svdf_golden_output_3x2x10_rank_1[] = { - 0.014899, -0.0517661, -0.143725, -0.00271883, - -0.03004015, 0.09565311, 0.1587342, 0.00784263, +// Time filter of shape {8, 10}. +const float time_weights_data_2x2x10[] = { + -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, + 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, - 0.068281, -0.162217, -0.152268, 0.00323521, - 0.01582633, 0.03858774, -0.03001583, -0.02671271, + 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, + -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, - -0.0317821, -0.0333089, 0.0609602, 0.0333759, - -0.01432795, 0.05524484, 0.1101355, -0.02382665, + -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, + 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, - -0.00623099, -0.077701, -0.391193, -0.0136691, - -0.02333033, 0.02293761, 0.12338032, 0.04326871, + -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, + -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657, - 0.201551, -0.164607, -0.179462, -0.0592739, - 0.01064911, -0.17503069, 0.07821996, -0.00224009, + -0.14884081, 0.19931212, -0.36002168, 0.34663299, -0.11405486, + 0.12672701, 0.39463779, -0.07886535, -0.06384811, 0.08249187, - 0.0886511, -0.0875401, -0.269283, 0.0281379, - -0.02282338, 0.09741908, 0.32973239, 0.12281385, + -0.26816407, -0.19905911, 0.29211238, 0.31264046, -0.28664589, + 0.05698794, 0.11613581, 0.14078894, 0.02187902, -0.21781836, - -0.201174, -0.586145, -0.628624, -0.0330412, - 0.24780814, -0.39304617, -0.22473189, 0.02589256, + -0.15567942, 0.08693647, -0.38256618, 0.36580828, -0.22922277, + -0.0226903, 0.12878349, -0.28122205, -0.10850525, -0.11955214, - -0.0839096, -0.299329, 0.108746, 0.109808, - 0.10084175, -0.06416984, 0.28936723, 0.0026358, + 0.27179423, -0.04710215, 0.31069002, 0.22672787, 0.09580326, + 0.08682203, 0.1258215, 0.1851041, 0.29228821, 0.12366763}; - 0.419114, -0.237824, -0.422627, 0.175115, - -0.2314795, -0.18584411, -0.4228974, -0.12928449, +// Activation state with shape {2, 80}. These initial values must be copied into +// a mutable activation state tensor. - 0.36726, -0.522303, -0.456502, -0.175475, - 0.17012937, -0.34447709, 0.38505614, -0.28158101, +const float initial_activation_state_data_2x2x10[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +// Bias with shape {8} +const float bias_data_2x2x10[] = {0, 0, 0, 0, 0, 0, 0, 0}; + +// 10 outputs each of shape {2, 4} +const float golden_output_2x2x10[] = { + -0.044205, -0.013757, 0.050369, -0.018447, + 0.073010, 0.025142, -0.021154, 0.013551, + + -0.209613, -0.062421, 0.150209, -0.108334, + 0.028256, -0.006950, -0.030885, 0.009603, + + -0.076800, -0.037075, -0.087198, -0.155183, + 0.091069, 0.098446, -0.016083, 0.106475, + + -0.082123, -0.162238, -0.084434, -0.141074, + -0.029340, -0.090685, 0.053302, -0.030604, + + -0.201440, 0.088424, 0.139877, 0.012416, + -0.113212, 0.103893, -0.100842, 0.122780, + + -0.166632, -0.116705, 0.175298, -0.047163, + 0.313077, -0.166485, -0.285860, 0.129069, + + -0.625911, 0.046134, 0.138081, -0.129581, + -0.521455, -0.061579, 0.230289, 0.114963, + + -0.216693, -0.161643, -0.179177, -0.052599, + -0.213239, 0.029502, 0.260858, 0.275045, + + -0.213689, -0.323608, -0.285635, -0.317687, + -0.324092, -0.317972, -0.208450, -0.462504, + + -0.255126, -0.218576, -0.041528, 0.179421, + -0.440583, 0.072127, -0.284136, 0.241570}; + +// Simulated real-world inputs, weights and expected outputs. + +// Input of shape {1x16} +const float input_data_16x1x1[] = { + -0.488494, 2.023762, -2.233117, -0.488494, 3.559030, 9.490748, + -3.210106, -1.953977, -0.279140, 0.907204, 1.674838, 0.000000, + -0.279140, -0.628064, -0.069785, -0.628064, }; -static float svdf_golden_output_3x2x10_rank_2[] = { - -0.09623547, -0.10193135, 0.11083051, -0.0347917, - 0.1141196, 0.12965347, -0.12652366, 0.01007236, +// Feature filter of shape {64, 16}. +const float feature_weights_data_16x1x1[] = { + 0.173588, 0.173588, -0.024798, 0.193426, -0.099193, 0.044637, 0.183507, + 0.183507, 0.044637, 0.198386, -0.069435, 0.084314, 0.312458, 0.024798, + 0.173588, -0.049596, -0.352135, -0.550521, -0.009919, -0.099193, -0.074395, + -0.128951, 0.193426, 0.357095, -0.317418, -0.119032, -0.218225, -0.004960, + -0.386853, -0.133911, 0.252942, -0.019839, -0.024798, -0.054556, -0.069435, + -0.128951, 0.029758, -0.099193, -0.312458, -0.029758, 0.064475, 0.183507, + 0.114072, -0.178547, -0.247982, -0.119032, 0.243023, -0.119032, -0.034718, + -0.178547, 0.019839, 0.128951, -0.223184, -0.009919, -0.213265, 0.168628, + -0.143830, -0.322377, -0.218225, -0.193426, -0.252942, -0.049596, 0.064475, + -0.267821, -0.580279, -0.099193, 0.213265, 0.119032, -0.119032, -0.178547, + 0.610037, 0.109112, 0.049596, -0.014879, -0.049596, -0.193426, 0.039677, + -0.148789, -0.114072, -0.158709, -0.158709, 0.094233, 0.099193, -0.114072, + 0.104153, -0.123991, 0.198386, -0.173588, 0.089274, -0.247982, -0.054556, + 0.123991, 0.183507, 0.114072, 0.188467, 0.302539, 0.044637, 0.039677, + -0.099193, 0.168628, -0.024798, -0.054556, -0.109112, 0.014879, -0.009919, + 0.069435, -0.396772, -0.287660, -0.079354, -0.104153, 0.054556, 0.089274, + -0.099193, 0.114072, 0.034718, 0.119032, 0.282700, -0.119032, -0.505884, + -0.233104, -0.114072, -0.257902, -0.233104, -0.178547, 0.153749, 0.128951, + 0.143830, -0.188467, -0.183507, 0.104153, -0.024798, 0.193426, -0.287660, + 0.168628, -0.009919, 0.119032, -0.024798, -0.099193, -0.203346, 0.099193, + 0.084314, -0.168628, 0.123991, -0.148789, 0.114072, -0.029758, 0.228144, + -0.238063, 0.089274, -0.064475, 0.307498, -0.188467, -0.004960, -0.252942, + -0.173588, -0.158709, -0.044637, -0.009919, 0.312458, -0.262861, 0.059516, + 0.158709, 0.069435, -0.282700, 0.074395, -0.322377, -0.183507, -0.123991, + -0.233104, 0.009919, 0.252942, -0.243023, 0.555481, -0.099193, -0.119032, + -0.441409, 0.148789, 0.084314, -0.168628, -0.183507, 0.188467, 0.024798, + -0.302539, 0.223184, 0.143830, -0.193426, -0.054556, -0.218225, -0.297579, + 0.104153, 0.272781, -0.034718, 0.114072, -0.059516, 0.044637, 0.342216, + 0.421570, 0.138870, -0.024798, -0.039677, -0.163668, -0.034718, 0.396772, + -0.128951, -0.044637, -0.173588, 0.302539, 0.079354, 0.049596, 0.133911, + -0.029758, -0.312458, -0.029758, 0.079354, 0.128951, 0.252942, 0.213265, + 0.014879, 0.287660, 0.178547, 0.297579, 0.352135, 0.401732, 0.024798, + -0.277740, -0.411651, -0.069435, 0.342216, -0.158709, -0.104153, -0.009919, + 0.223184, 0.228144, -0.019839, 0.059516, -0.104153, -0.510844, 0.029758, + -0.406691, 0.089274, 0.421570, 0.163668, -0.143830, -0.019839, -0.039677, + 0.104153, -0.044637, -0.128951, 0.203346, 0.079354, -0.069435, 0.094233, + -0.138870, 0.466207, -0.163668, 0.049596, 0.029758, 0.267821, 0.029758, + -0.049596, 0.009919, 0.004960, -0.099193, 0.094233, -0.262861, 0.089274, + -0.302539, 0.332297, -0.307498, -0.014879, 0.168628, -0.094233, -0.272781, + 0.034718, -0.133911, -0.228144, 0.094233, 0.257902, -0.228144, 0.153749, + -0.054556, -0.252942, 0.054556, 0.218225, -0.054556, 0.302539, 0.282700, + 0.054556, -0.044637, -0.133911, 0.233104, -0.049596, 0.411651, 0.044637, + -0.297579, -0.029758, -0.114072, 0.114072, -0.580279, 0.079354, -0.024798, + -0.347175, -0.128951, -0.099193, 0.238063, -0.104153, -0.009919, 0.158709, + -0.034718, 0.123991, -0.163668, 0.059516, 0.342216, 0.009919, 0.064475, + -0.307498, -0.520763, -0.238063, 0.163668, 0.362054, 0.034718, -0.178547, + -0.104153, -0.257902, 0.322377, 0.054556, 0.148789, -0.178547, 0.084314, + 0.004960, 0.257902, 0.029758, 0.079354, -0.223184, -0.193426, 0.282700, + 0.000000, -0.019839, -0.114072, 0.491005, -0.193426, -0.029758, -0.243023, + 0.009919, 0.089274, -0.277740, -0.089274, 0.104153, 0.337256, 0.138870, + -0.307498, -0.054556, 0.352135, 0.133911, -0.044637, 0.133911, -0.089274, + -0.357095, -0.272781, 0.069435, 0.059516, -0.109112, 0.148789, -0.044637, + -0.019839, -0.153749, 0.123991, -0.223184, 0.322377, 0.074395, -0.312458, + 0.024798, -0.223184, 0.109112, -0.138870, 0.218225, -0.074395, -0.406691, + 0.009919, -0.198386, -0.009919, 0.416611, 0.178547, 0.148789, 0.133911, + -0.004960, 0.069435, -0.054556, -0.044637, 0.297579, 0.059516, -0.456288, + -0.148789, -0.004960, 0.054556, 0.094233, -0.104153, 0.198386, -0.302539, + 0.133911, 0.411651, 0.054556, 0.525723, -0.089274, 0.079354, 0.238063, + 0.079354, -0.039677, 0.039677, 0.029758, 0.332297, -0.014879, -0.367014, + -0.143830, -0.123991, -0.064475, 0.014879, 0.173588, -0.168628, 0.386853, + 0.009919, 0.173588, 0.163668, 0.123991, 0.163668, 0.198386, 0.203346, + -0.401732, -0.009919, 0.272781, -0.173588, 0.044637, 0.238063, 0.133911, + 0.049596, 0.208305, -0.024798, 0.049596, -0.049596, 0.034718, -0.446368, + 0.466207, -0.089274, -0.099193, -0.128951, -0.228144, 0.014879, -0.252942, + 0.074395, -0.223184, -0.168628, -0.292619, 0.178547, 0.153749, -0.014879, + 0.054556, 0.000000, 0.193426, 0.158709, 0.178547, -0.327337, -0.138870, + -0.114072, 0.168628, 0.297579, -0.109112, -0.029758, -0.029758, -0.416611, + 0.059516, 0.000000, -0.168628, -0.322377, 0.238063, -0.128951, -0.029758, + 0.500925, 0.292619, 0.123991, -0.099193, 0.074395, 0.317418, -0.148789, + 0.064475, -0.104153, -0.044637, -0.094233, 0.188467, -0.044637, 0.213265, + -0.233104, -0.049596, 0.004960, -0.198386, 0.287660, -0.148789, -0.257902, + 0.004960, -0.218225, -0.044637, -0.386853, -0.243023, -0.163668, 0.094233, + 0.029758, -0.019839, -0.009919, -0.143830, -0.158709, 0.158709, -0.243023, + -0.039677, -0.297579, 0.069435, 0.049596, 0.302539, 0.059516, 0.074395, + -0.019839, 0.352135, -0.019839, -0.138870, -0.178547, -0.243023, 0.233104, + 0.252942, -0.228144, -0.049596, 0.173588, 0.173588, -0.074395, -0.034718, + -0.292619, 0.362054, 0.183507, 0.243023, -0.203346, -0.044637, 0.054556, + 0.059516, -0.158709, -0.158709, 0.000000, 0.327337, 0.119032, 0.034718, + -0.044637, -0.089274, 0.089274, -0.233104, 0.000000, -0.317418, 0.371974, + 0.213265, 0.307498, -0.178547, -0.367014, 0.039677, -0.059516, 0.168628, + -0.014879, 0.143830, 0.123991, -0.084314, -0.332297, -0.416611, 0.183507, + 0.109112, -0.039677, 0.014879, 0.292619, -0.213265, -0.054556, 0.004960, + 0.123991, 0.119032, 0.000000, -0.332297, -0.312458, -0.198386, -0.213265, + 0.119032, 0.322377, 0.168628, 0.104153, -0.262861, 0.327337, -0.049596, + -0.228144, -0.074395, 0.168628, 0.123991, 0.396772, 0.044637, 0.322377, + 0.193426, 0.267821, -0.178547, 0.297579, 0.148789, -0.218225, -0.138870, + 0.044637, 0.049596, 0.133911, 0.064475, 0.069435, 0.064475, -0.158709, + -0.044637, -0.173588, 0.267821, 0.327337, 0.079354, -0.228144, 0.029758, + 0.014879, 0.198386, -0.109112, -0.133911, 0.431490, 0.099193, 0.421570, + 0.233104, -0.054556, 0.054556, -0.317418, -0.133911, -0.123991, -0.287660, + 0.342216, -0.049596, -0.153749, 0.228144, -0.213265, 0.262861, 0.406691, + -0.084314, -0.004960, 0.193426, 0.188467, -0.099193, -0.223184, 0.163668, + -0.257902, -0.153749, 0.441409, 0.099193, 0.128951, -0.089274, -0.208305, + -0.009919, -0.004960, -0.109112, 0.024798, -0.119032, 0.019839, 0.391812, + -0.024798, 0.198386, 0.327337, -0.505884, -0.099193, 0.510844, -0.148789, + 0.094233, -0.153749, -0.039677, 0.352135, 0.272781, -0.228144, -0.287660, + -0.272781, 0.148789, 0.277740, 0.074395, 0.109112, -0.064475, 0.044637, + 0.074395, -0.292619, 0.153749, -0.064475, -0.114072, 0.198386, -0.039677, + -0.128951, -0.004960, 0.257902, -0.228144, -0.094233, 0.064475, 0.014879, + 0.188467, -0.416611, 0.099193, 0.362054, -0.208305, 0.198386, -0.079354, + 0.009919, 0.119032, 0.332297, 0.243023, -0.168628, 0.158709, 0.039677, + 0.143830, 0.277740, -0.168628, 0.009919, 0.099193, -0.004960, -0.257902, + -0.297579, 0.208305, -0.104153, 0.119032, 0.247982, 0.381893, -0.223184, + -0.367014, -0.327337, -0.168628, -0.094233, 0.208305, -0.019839, 0.183507, + 0.084314, 0.133911, 0.109112, -0.148789, -0.183507, -0.411651, -0.024798, + -0.114072, -0.029758, -0.009919, 0.173588, -0.059516, -0.049596, 0.039677, + 0.317418, 0.138870, -0.247982, -0.084314, 0.158709, 0.054556, -0.084314, + -0.049596, 0.074395, 0.019839, -0.282700, -0.119032, -0.262861, 0.163668, + -0.069435, -0.064475, -0.059516, 0.094233, 0.123991, -0.079354, -0.272781, + -0.267821, 0.233104, 0.114072, -0.218225, 0.540602, 0.089274, 0.262861, + 0.079354, 0.267821, -0.119032, -0.109112, -0.128951, 0.128951, -0.044637, + -0.272781, 0.277740, 0.297579, -0.054556, -0.084314, -0.049596, 0.123991, + 0.059516, 0.238063, -0.168628, -0.009919, 0.163668, -0.307498, 0.109112, + -0.064475, 0.218225, -0.168628, -0.004960, -0.168628, 0.119032, 0.094233, + -0.183507, -0.089274, -0.292619, -0.094233, 0.064475, -0.183507, -0.168628, + 0.089274, 0.074395, -0.367014, -0.024798, -0.069435, 0.119032, -0.302539, + -0.376933, -0.123991, -0.009919, -0.069435, -0.208305, -0.119032, 0.014879, + -0.183507, -0.238063, 0.163668, -0.332297, -0.148789, -0.391812, -0.024798, + -0.133911, -0.059516, -0.123991, 0.123991, -0.292619, -0.044637, 0.059516, + -0.069435, 0.049596, -0.069435, 0.034718, 0.158709, -0.347175, -0.044637, + 0.352135, -0.347175, -0.282700, -0.054556, 0.307498, 0.029758, 0.357095, + -0.148789, 0.208305, -0.317418, 0.009919, 0.004960, -0.243023, 0.049596, + -0.099193, 0.213265, -0.342216, 0.158709, 0.123991, -0.332297, 0.386853, + -0.262861, -0.208305, 0.123991, -0.044637, 0.148789, 0.084314, -0.297579, + -0.307498, -0.163668, 0.337256, -0.014879, 0.074395, 0.178547, -0.004960, + -0.257902, -0.019839, -0.228144, -0.034718, -0.277740, -0.158709, -0.119032, + -0.153749, 0.629876, 0.277740, 0.178547, -0.267821, -0.004960, 0.247982, + 0.084314, -0.094233, 0.000000, -0.039677, 0.332297, 0.178547, 0.009919, + -0.213265, -0.208305, -0.044637, 0.019839, 0.218225, -0.297579, 0.014879, + -0.247982, -0.004960, -0.128951, 0.421570, -0.059516, 0.362054, -0.203346, + -0.143830, -0.099193, -0.024798, 0.094233, -0.123991, 0.163668, 0.109112, + -0.104153, -0.233104, 0.009919, -0.218225, 0.376933, 0.104153, -0.059516, + 0.049596, -0.054556, 0.019839, -0.044637, -0.019839, 0.371974, -0.019839, + 0.104153, 0.168628, -0.024798, -0.272781, -0.158709, 0.223184, 0.044637, + 0.039677, -0.168628, -0.287660, -0.109112, 0.094233, -0.089274, -0.148789, + 0.178547, -0.039677, -0.089274, -0.049596, -0.024798, 0.064475, -0.158709, + 0.089274, 0.029758, -0.247982, 0.362054, 0.024798, -0.004960, -0.099193, + 0.173588, -0.059516, 0.188467, -0.629876, 0.094233, 0.371974, 0.069435, + 0.252942, -0.357095, -0.272781, -0.367014, 0.014879, -0.049596, -0.262861, + 0.009919, -0.094233, -0.094233, 0.059516, 0.223184, 0.133911, 0.411651, + -0.044637, -0.044637, 0.109112, 0.228144, 0.386853, -0.233104, 0.069435, + 0.228144, -0.302539, 0.029758, 0.089274, 0.044637, -0.238063, -0.138870, + -0.158709, -0.019839, 0.049596, 0.039677, 0.000000, -0.069435, 0.109112, + -0.213265, -0.188467, -0.262861, -0.267821, -0.094233, 0.133911, 0.391812, + 0.123991, -0.317418, 0.233104, -0.029758, -0.099193, -0.193426, 0.074395, + -0.009919, 0.252942, 0.322377, -0.530683, 0.208305, 0.252942, 0.203346, + -0.069435, -0.262861}; - -0.16396809, -0.21247184, 0.11259045, -0.04156673, - 0.10132131, -0.06143532, -0.00924693, 0.10084561, +// Time filter of shape {64, 8}. +const float time_weights_data_16x1x1[] = { + -0.052026, 0.043107, 0.053512, 0.013378, 0.011892, -0.182834, -0.108511, + 0.153105, 0.050539, -0.173915, 0.145672, 0.208103, -0.221481, 0.108511, + -0.496475, 0.181347, -0.016351, -0.132294, -0.234859, -0.243778, 0.028243, + -0.228914, -0.130808, -0.167969, -0.041621, -0.306209, -0.193239, -0.028243, + -0.057972, -0.057972, -0.497962, 0.054999, 0.181347, 0.047566, -0.099592, + -0.111484, -0.130808, -0.071350, 0.380532, 0.010405, 0.041621, 0.052026, + 0.022297, 0.081755, 0.098106, 0.099592, -0.584176, -0.023783, 0.062431, + -0.090674, -0.279453, -0.486070, -0.273507, 0.004459, -0.062431, 0.095133, + 0.056485, 0.022297, -0.105538, -0.184320, 0.358235, 0.254183, 0.049053, + 0.084728, 0.218508, 0.078782, -0.136754, -0.017837, -0.124862, -0.118916, + -0.001486, 0.043107, 0.254183, 0.087701, 0.261616, 0.309182, -0.404315, + -0.040134, -0.046080, -0.052026, -0.034188, -0.475665, -0.025270, -0.049053, + -0.046080, -0.062431, 0.020810, 0.040134, -0.135267, -0.169456, -0.050539, + -0.576743, 0.034188, 0.075809, 0.101079, 0.136754, 0.083241, 0.077296, + -0.050539, 0.761064, -0.335938, -0.080268, 0.025270, 0.257156, 0.227427, + 0.252697, 0.065404, 0.115943, 0.222968, -0.026756, -0.054999, 0.107025, + -0.093646, 0.041621, -0.092160, -0.474178, -0.016351, 0.004459, 0.049053, + 0.019324, 0.019324, 0.074323, 0.038648, -0.613905, 0.182834, 0.075809, + 0.028243, 0.019324, 0.010405, -0.011892, 0.001486, -0.492016, -0.224454, + -0.474178, -0.147159, 0.002973, 0.102565, 0.136754, -0.267561, -0.001486, + -0.095133, -0.040134, 0.066890, 0.074323, 0.104052, 0.532150, 0.090674, + 0.072836, -0.053512, -0.004459, 0.020810, 0.046080, 0.062431, 0.477151, + 0.133781, -0.029729, -0.026756, 0.031215, 0.156077, 0.096619, 0.251210, + 0.352289, 0.657012, 0.047566, -0.014865, -0.072836, -0.016351, 0.008919, + -0.053512, 0.016351, 0.300263, 0.047566, 0.020810, 0.169456, 0.001486, + 0.007432, 0.111484, 0.044594, -0.188779, -0.096619, 0.074323, -0.040134, + 0.160537, 0.138240, 0.184320, 0.377559, -0.092160, -0.049053, 0.056485, + -0.032702, 0.001486, -0.083241, -0.472692, -0.114457, -0.117430, -0.075809, + 0.026756, 0.163510, 0.172428, 0.127835, -0.199185, -0.218508, -0.057972, + -0.132294, -0.162023, -0.019324, -0.245265, -0.395396, -0.254183, 0.084728, + 0.248238, 0.191752, 0.221481, 0.173915, 0.173915, -0.208103, -0.077296, + 0.384991, -0.313641, -0.313641, -0.147159, -0.090674, 0.035675, 0.059458, + -0.010405, 0.019324, 0.087701, 0.016351, 0.037161, 0.469719, -0.074323, + 0.092160, 0.026756, 0.090674, 0.098106, 0.004459, -0.034188, 0.492016, + -0.367154, -0.093646, -0.063917, 0.041621, 0.017837, 0.026756, -0.062431, + -0.350803, 0.425125, 0.002973, 0.083241, 0.075809, 0.016351, 0.047566, + -0.185807, -0.107025, -0.098106, -0.144186, 0.255670, 0.020810, 0.105538, + 0.029729, 0.129321, 0.156077, 0.141213, 0.334452, 0.147159, -0.066890, + 0.035675, 0.115943, 0.240805, 0.328506, 0.162023, -0.237832, 0.218508, + 0.233373, 0.214049, 0.099592, 0.026756, -0.322560, -0.236346, -0.166483, + 0.225941, 0.109997, -0.147159, 0.147159, -0.266075, 0.111484, 0.078782, + -0.120403, 0.022297, -0.075809, -0.148645, -0.251210, -0.176888, -0.044594, + -0.023783, 0.016351, 0.026756, -0.013378, -0.069863, -0.112970, 0.013378, + 0.086214, 0.014865, 0.352289, -0.240805, -0.135267, -0.114457, -0.472692, + 0.334452, 0.095133, 0.047566, 0.130808, -0.068377, -0.007432, -0.130808, + -0.121889, -0.053512, -0.245265, -0.371613, -0.083241, 0.000000, -0.028243, + 0.029729, -0.093646, -0.004459, -0.038648, -0.108511, -0.475665, -0.169456, + -0.047566, -0.010405, -0.114457, -0.353776, -0.034188, -0.044594, 0.041621, + -0.047566, -0.107025, 0.004459, 0.053512, 0.047566, -0.358235, -0.193239, + 0.040134, -0.096619, -0.054999, 0.099592, 0.032702, 0.205130, -0.170942, + -0.237832, -0.405801, -0.126348, -0.072836, -0.203644, -0.169456, -0.093646, + -0.074323, 0.078782, 0.607959, -0.437017, -0.164996, -0.166483, 0.043107, + -0.016351, 0.258643, 0.065404, -0.057972, 0.017837, 0.080268, 0.050539, + -0.013378, -0.215536, -0.524718, 0.260129, 0.040134, -0.002973, -0.046080, + 0.020810, 0.025270, 0.145672, 0.515799, 0.233373, 0.011892, 0.139727, + 0.126348, 0.065404, -0.007432, -0.008919, 0.035675, 0.083241, 0.040134, + -0.005946, 0.503907, -0.490529, -0.181347, -0.092160, -0.038648, 0.019324, + 0.133781, -0.011892, 0.041621, 0.062431, -0.062431, -0.040134, -0.092160, + -0.111484, -0.133781, -0.130808, -0.484583, -0.248238, 0.037161, -0.092160, + -0.056485, -0.041621, 0.112970, 0.248238, 0.438503, 0.258643, -0.013378, + 0.004459, 0.043107, 0.040134, 0.017837, 0.101079, 0.264589, 0.212563, + 0.014865, 0.285399, 0.153105, 0.170942, 0.358235, 0.334452, 0.086214, + 0.132294, 0.098106, -0.001486, 0.107025, 0.200671, -0.026756, 0.344857, + 0.227427, -0.041621, 0.098106, 0.063917, -0.093646, 0.130808, 0.285399, + -0.319587, 0.035675, -0.017837, -0.319587, 0.016351, -0.098106, -0.017837, + 0.083241, 0.074323, -0.054999, 0.276480, 0.316614, -0.099592, -0.059458, + 0.156077, -0.043107, 0.035675, 0.056485, -0.022297, 0.017837, -0.001486, + 0.340398, 0.492016, 0.004459, 0.057972, -0.150132, -0.206617, -0.257156, + -0.248238, -0.080268, -0.164996, 0.352289, -0.054999, -0.056485, 0.010405, + -0.049053, -0.041621, -0.099592, 0.013378, -0.089187, 0.057972, -0.413234, + 0.217022, 0.013378, -0.080268, -0.035675, 0.035675, 0.007432, 0.002973, + -0.469719, 0.141213, 0.136754, 0.153105, 0.130808, -0.104052, -0.508367, + -0.291345, -0.072836, -0.019324, -0.252697, -0.214049, -0.214049, 0.130808, + 0.484583}; - 0.01257364, 0.0506071, -0.19287863, -0.07162561, - -0.02033747, 0.22673416, 0.15487903, 0.02525555, +// Bias of shape {64} +const float bias_data_16x1x1[] = { + -0.245395, -0.083545, -0.262522, -0.407912, -0.560898, -0.364789, -0.037964, + -0.378594, 0.178152, 0.400380, -0.301349, -0.240913, -0.159454, -0.158757, + -0.073665, 0.455906, -0.061232, 0.318907, -0.226993, -0.344644, 0.140316, + 0.559608, 0.109774, 0.437391, 0.113849, -0.162068, 0.039572, 0.569472, + 0.460205, 0.113459, 0.370469, 0.176811, 0.203063, -0.296975, -0.271655, + 0.059862, -0.159912, -0.077310, -0.338314, -0.195477, -0.256762, 0.233834, + 0.083172, 0.029040, -0.236288, -0.267054, -0.166627, 0.188319, -0.271391, + -0.222920, 0.106463, 0.263614, 0.384986, -0.125957, -0.095890, 0.363686, + -0.036990, -0.358884, -0.178254, 0.305596, 0.390088, -0.189437, 0.613409, + 0.399639}; - -0.1411963, -0.37054959, 0.01774767, 0.05867489, - 0.09607603, -0.0141301, -0.08995658, 0.12867066, +// Activation state with shape {64, 8}. These initial values must be copied into +// a mutable activation state tensor. +const float initial_activation_state_data_16x1x1[] = { + -0.582275, -0.586623, -1.262373, -1.277279, -1.542175, -1.271999, -1.429757, + -1.184425, -0.462094, -1.443421, 0.230736, -0.494701, -0.354955, -2.534061, + -4.277471, -4.218467, 0.403711, -0.248748, -0.330111, -0.467683, 0.549047, + 0.733511, -0.230115, 0.793136, -1.126353, -0.984123, -0.081984, -0.222351, + 0.692830, 0.517060, 1.367958, 2.118860, -0.116766, -0.826365, -2.402700, + -2.313884, -2.898954, -2.076005, -2.405185, -2.755481, 0.329490, 0.085400, + -1.485966, -2.034702, -2.161405, -1.269515, -1.151818, -1.823841, 0.561469, + 1.109273, 1.693411, -0.082605, -0.069252, -1.225107, -1.330693, -1.411435, + 0.253406, -0.357439, -1.593415, -0.879779, -1.111136, 1.821357, 2.471952, + 1.236908, -4.014127, -2.810448, -2.944604, -1.930980, -1.566398, -0.838166, + -0.319242, 0.749349, 1.156476, 0.658670, 1.997437, 2.080663, 2.912618, + 2.677224, 2.642442, 2.796163, -0.272349, -0.473273, 3.120063, 2.747097, + 3.595510, 1.874150, 2.049919, 2.093396, -1.049959, 0.277939, -1.255541, + -1.052443, -1.810177, -0.883505, -0.538178, 0.524203, -1.017662, -0.269244, + 0.039129, -0.227941, -0.114592, -2.018243, -2.548968, -0.706804, 0.890959, + 0.102480, 0.349986, 0.405885, 1.287216, 0.756181, 0.319242, -0.641590, + -3.841774, -2.716042, -4.342065, -3.826557, -2.924729, -1.643724, -1.237839, + -0.597492, -1.954892, -1.215169, -1.528201, -1.018904, -0.863941, -0.293467, + 0.039439, 0.672023, 1.408019, 1.362679, 1.467644, 1.006171, 0.310236, + -0.249990, -1.048406, -0.752144, -1.831605, -1.058033, -1.096541, -0.293467, + 0.051551, 0.232600, 0.088816, 2.570395, 0.704009, 2.465120, 3.010751, + 2.139357, 0.630410, 1.006171, 1.545281, 1.486898, -1.162998, -2.344317, + -4.593918, -3.522842, -2.872247, -1.416714, -0.642521, -0.230115, 0.315205, + -0.368930, -0.162726, 0.396879, 0.505570, 0.534451, 0.554947, 1.270447, + 0.388805, 0.531967, -1.243119, -0.671713, -1.214859, -0.238189, 0.016459, + -1.164550, 0.609603, 3.293348, 2.600208, 1.454290, -1.034121, -1.760179, + -1.192500, -0.613951, 3.449553, 2.912618, 1.917937, 1.435968, 0.879158, + 1.118279, 0.102791, -0.502465, -0.239121, -0.092853, 1.786265, 1.943091, + 2.547104, 2.630641, 2.585302, 2.965411, -0.945615, -2.538720, -2.474126, + -1.088156, 0.056209, 0.864873, 0.170490, 0.457435, 0.545941, 0.752765, + 1.569503, 1.129459, 0.662086, -0.527929, -0.810838, -1.662978, 1.285042, + 1.653040, 4.130893, 2.961995, 4.147041, 3.256393, 3.881524, 2.522571, + -0.875431, -1.112378, 2.105817, 2.180970, 3.121926, 1.577577, 1.639376, + 2.906407, -0.142230, 0.421101, 2.212335, 2.311399, 3.993321, 3.651719, + 4.206666, 4.678387, -1.304917, -1.130701, -2.543067, -2.500212, -2.197118, + -1.197158, -0.949652, -0.282908, 0.320795, -1.543728, 1.290322, 1.788128, + 3.957297, 3.205774, 2.892432, 2.297114, 0.138814, -0.139435, 0.936920, + 0.344707, 0.723263, -1.772290, -3.138385, -2.287177, -2.405806, -1.859864, + -4.572801, -3.410424, -3.855748, -2.239663, -2.269786, -1.582857, 4.238342, + 3.858543, 2.499901, 1.087535, 0.290051, -0.026086, -0.880400, -2.602692, + -1.404292, 0.253096, -0.665502, -1.443421, -0.925119, -0.096580, 1.115484, + 1.846200, -1.604284, -1.244671, -0.464888, 0.326385, 0.168006, -0.262723, + -0.744691, 0.953379, -0.407127, -0.349986, -1.154302, 0.831023, 1.590931, + 2.538720, 2.063583, 3.697680, -0.752455, -1.293117, -1.330693, -1.869802, + -0.592523, 0.631652, 1.198089, -0.481347, 3.738983, 4.153252, 2.782499, + 2.244321, 0.709289, 1.650245, 1.700865, 0.385078, 2.192460, 2.610456, + 4.009780, 3.492719, 2.574743, 2.116687, 1.856138, 1.205853, 2.722563, + 4.075305, 5.415935, 3.009198, 2.715421, 1.571056, 0.897170, -2.430339, + 0.749970, 0.425760, -0.302783, 0.817359, 1.031636, 1.913589, 2.686229, + 1.631923, -1.459259, -1.793097, -1.187531, -1.553355, -0.844998, -1.296843, + -1.805519, -0.486627, 0.909591, 2.082837, -1.473855, -2.456735, -3.851401, + -2.760139, -3.060438, -2.605487, -2.138735, -2.441519, -1.333177, -1.353984, + -0.245642, -0.588486, 0.033850, 2.084700, 0.076084, 0.690035, 0.747797, + 0.594697, -1.016109, -1.348083, -1.201195, -1.088466, 2.045571, 2.460772, + 0.717984, 0.041613, -0.721711, 1.134738, 2.322269, 1.112378, -0.307441, + -0.581033, -0.868599, -0.018633, 0.856488, 0.919839, 0.303094, -0.433213, + 0.811148, -0.508986, -1.060828, -1.227591, -1.566087, -1.117968, -1.385038, + -2.011101, -0.490353, -1.849616, -0.594697, -1.055859, 1.110205, 0.622646, + 0.145957, 0.359303, 1.012072, 0.774814, -0.400295, -1.484103, -2.007374, + -1.441247, -0.997787, -0.581033, -0.545941, -0.306510, 0.693451, 0.087264, + -0.227320, -1.211753, -1.532859, -1.688753, 0.065215, 0.134777, 0.608051, + -0.393152, -0.214588, -0.635689, -1.499320, 0.069562, -1.555839, -2.633126, + -2.966032, -1.550870, -0.101549, 0.874189, 0.436318, 0.299367, 2.289972, + 2.339659, 2.602071, 1.564535, 0.019254, -0.583207, -1.295912, -2.424749, + -1.221070, -1.175109, -0.577306, -0.102791, 1.877876, 2.568222, 2.173827, + 3.131243, 2.637784, 2.088737, 3.679047, 3.218506, 2.483442, 1.650556, + 1.363611, -0.027328, 1.486898, -0.721711, -3.684327, -3.006093, -3.777491, + -2.327548, -2.737470, -4.549510, -0.060867, 0.127635, 0.680408, 0.581344, + 0.320174, -0.403090, -0.838166, 0.293777, -0.995613, -0.165521, -0.419859, + 1.110515, 1.203679, 1.749931, 2.467294, 4.276539, 0.031055, -0.967664, + 1.167035, 1.865144, 3.221923, 3.248630, 4.121266, 4.187723, 0.749039, + -1.571056, 0.785994, 1.568572, 3.759479, 3.588678, 4.116608, 3.864444, + -0.290051, -0.271107, 0.375140, 0.537556, 0.536314, 0.095959, 0.054656, + 0.088816}; - -0.27142537, -0.16955489, 0.18521598, -0.12528358, - 0.00331409, 0.11167502, 0.02218599, -0.07309391, +// One output with shape {1, 64} +const float golden_output_16x1x1[] = { + -0.349840, -0.786585, -0.411465, -0.481465, -0.609403, -0.325105, -0.100590, + -0.252233, 0.194563, 0.492934, -0.341396, -0.005863, 0.679318, -0.127419, + -0.122997, 0.481488, -0.059769, 0.577355, -0.152513, -0.232819, 0.157115, + 0.553756, -0.231177, 0.417166, 0.514220, -0.936228, -0.321929, 0.637917, + 0.698829, 0.212120, 0.370644, 0.573798, 0.110923, -0.731355, -0.181996, + 0.713996, -0.169233, -1.140784, -0.015105, 0.100089, 1.733966, 0.076599, + 0.057502, -0.930806, -1.225261, -0.394023, -0.213133, 0.170810, -0.307229, + -0.486707, -0.026519, 0.472446, 0.414638, -0.419811, -0.016620, 1.236398, + -0.132612, -0.408581, -0.166886, 0.213419, 0.074304, -0.201534, 0.497987, + -1.690133}; - 0.09593632, -0.28361851, -0.0773851, 0.17199151, - -0.00075242, 0.33691186, -0.1536046, 0.16572715, - - -0.27916506, -0.27626723, 0.42615682, 0.3225764, - -0.37472126, -0.55655634, -0.05013514, 0.289112, - - -0.24418658, 0.07540751, -0.1940318, -0.08911639, - 0.00732617, 0.46737891, 0.26449674, 0.24888524, - - -0.17225097, -0.54660404, -0.38795233, 0.08389944, - 0.07736043, -0.28260678, 0.15666828, 1.14949894, - - -0.57454878, -0.64704704, 0.73235172, -0.34616736, - 0.21120001, -0.22927976, 0.02455296, -0.35906726, -}; +// One output with shape {1, 64} +const float golden_output_relu_16x1x1[] = { + 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, + 0.000000, 0.194563, 0.492934, 0.000000, 0.000000, 0.679318, 0.000000, + 0.000000, 0.481488, 0.000000, 0.577355, 0.000000, 0.000000, 0.157115, + 0.553756, 0.000000, 0.417166, 0.514220, 0.000000, 0.000000, 0.637917, + 0.698829, 0.212120, 0.370644, 0.573798, 0.110923, 0.000000, 0.000000, + 0.713996, 0.000000, 0.000000, 0.000000, 0.100089, 1.733966, 0.076599, + 0.057502, 0.000000, 0.000000, 0.000000, 0.000000, 0.170810, 0.000000, + 0.000000, 0.000000, 0.472446, 0.414638, 0.000000, 0.000000, 1.236398, + 0.000000, 0.000000, 0.000000, 0.213419, 0.074304, 0.000000, 0.497987, + 0.000000}; +template void ValidateSVDFGoldens(const int batch_size, const int num_units, const int input_size, const int rank, TfLiteTensor* tensors, const int tensor_count, - float* golden_input_data, - const int golden_input_data_size, float* output_data, - float* expected_output, float tolerance = 1e-5f) { + TfLiteFusedActivation activaiton, + const T* input_sequences_data, + const int input_sequences_len, T* output_data, + const T* expected_output, float tolerance = 1e-5f) { TfLiteContext context; PopulateContext(tensors, tensor_count, micro_test::reporter, &context); @@ -180,18 +500,17 @@ void ValidateSVDFGoldens(const int batch_size, const int num_units, TfLiteSVDFParams params; params.rank = rank; - params.activation = kTfLiteActNone; + params.activation = activaiton; void* user_data = nullptr; if (registration->init) { user_data = registration->init(&context, nullptr, 0); } - // Bias is an optional tensor: - int inputs_array_data[] = {5, 0, 1, 2, kTfLiteOptionalTensor, 3}; + int inputs_array_data[] = {5, 0, 1, 2, 3, 4}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - int outputs_array_data[] = {1, 4}; + int outputs_array_data[] = {1, 5}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); TfLiteNode node; @@ -211,13 +530,13 @@ void ValidateSVDFGoldens(const int batch_size, const int num_units, } TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - int input_sequence_size = - golden_input_data_size / sizeof(float) / (input_size * batch_size); - for (int i = 0; i < input_sequence_size; ++i) { - float* input_batch_start = golden_input_data + i * input_size * batch_size; - float* input_batch_end = input_batch_start + input_size * batch_size; + int num_inputs = input_sequences_len / (input_size * batch_size); - PopulateFloatTensor(&tensors[0], input_batch_start, input_batch_end); + for (int i = 0; i < num_inputs; ++i) { + const T* input_batch_start = + input_sequences_data + i * input_size * batch_size; + + memcpy(tensors[0].data.raw, input_batch_start, tensors[0].bytes); TfLiteStatus status = registration->invoke(&context, &node); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, status); @@ -238,140 +557,84 @@ void ValidateSVDFGoldens(const int batch_size, const int num_units, } } -void ValidateIntegerSVDFGoldens(const int batch_size, const int num_units, - const int input_size, const int rank, - TfLiteTensor* tensors, const int tensor_count, - int8_t* golden_input_data, - const int golden_input_data_size, - int8_t* output_data, int8_t* expected_output) { - TfLiteContext context; - PopulateContext(tensors, tensor_count, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_SVDF); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - TfLiteSVDFParams params; - params.rank = rank; - params.activation = kTfLiteActRelu; - - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, nullptr, 0); - } - - int inputs_array_data[] = {5, 0, 1, 2, 3, 4}; - TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - - int outputs_array_data[] = {1, 5}; - TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(¶ms); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - - if (registration->prepare) { - TfLiteStatus prepare_status = registration->prepare(&context, &node); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, prepare_status); - // Abort early to make it clear prepare failed. - if (prepare_status != kTfLiteOk) { - return; - } - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - - int input_sequence_size = - golden_input_data_size / sizeof(int8_t) / (input_size * batch_size); - for (int i = 0; i < input_sequence_size; ++i) { - int8_t* input_batch_start = golden_input_data + i * input_size * batch_size; - int8_t* input_batch_end = input_batch_start + input_size * batch_size; - int8_t* tensor_data = tensors[0].data.int8; - while (input_batch_start != input_batch_end) { - *tensor_data++ = *input_batch_start++; - } - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - - int output_idx = 0; - int golden_idx = i * batch_size * num_units; - for (int j = golden_idx; j < golden_idx + batch_size * num_units; ++j) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output[j], output_data[output_idx], 1); - output_idx++; - } - } - - if (registration->free) { - registration->free(&context, user_data); - } -} - void TestSVDF(const int batch_size, const int num_units, const int input_size, - const int memory_size, const int rank, float* input_data, - float* weights_feature_data, float* weights_time_data, - float* activation_state_data, float* scratch_data, - float* output_data, float* golden_input_data, - int golden_input_data_size, float* expected_output, - float tolerance = 1e-5f) { + const int memory_size, const int rank, + TfLiteFusedActivation activation, float* input_data, + const float* feature_weights_data, const float* time_weights_data, + float* activation_state_data, const float* bias_data, + float* scratch_data, float* output_data, + const float* input_sequences_data, int input_sequences_len, + const float* expected_output, float tolerance = 1e-5f) { const int num_filters = num_units * rank; const int input_dims_arg[] = {2, batch_size, input_size}; TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_arg); - const int weights_feature_dims_args[] = {2, num_filters, input_size}; - TfLiteIntArray* weights_feature_dims = - IntArrayFromInts(weights_feature_dims_args); + const int feature_weights_dims_args[] = {2, num_filters, input_size}; + TfLiteIntArray* feature_weights_dims = + IntArrayFromInts(feature_weights_dims_args); - const int weights_time_dims_args[] = {2, num_filters, memory_size}; - TfLiteIntArray* weights_time_dims = IntArrayFromInts(weights_time_dims_args); + const int time_weights_dims_args[] = {2, num_filters, memory_size}; + TfLiteIntArray* time_weights_dims = IntArrayFromInts(time_weights_dims_args); const int activation_state_dims_args[] = {2, batch_size, memory_size * num_filters}; TfLiteIntArray* activation_state_dims = IntArrayFromInts(activation_state_dims_args); + const int bias_dims_args[] = {1, num_units}; + TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_args); + const int output_dims_args[] = {2, batch_size, num_units}; TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_args); - const int tensor_count = 5; // 4 inputs, 1 output + const int tensor_count = 6; // 5 inputs, 1 output TfLiteTensor tensors[] = { CreateFloatTensor(input_data, input_dims), - CreateFloatTensor(weights_feature_data, weights_feature_dims), - CreateFloatTensor(weights_time_data, weights_time_dims), + CreateFloatTensor(feature_weights_data, feature_weights_dims), + CreateFloatTensor(time_weights_data, time_weights_dims), + CreateFloatTensor(bias_data, bias_dims), CreateFloatTensor(activation_state_data, activation_state_dims, /*is_variable=*/true), CreateFloatTensor(output_data, output_dims), }; ValidateSVDFGoldens(batch_size, num_units, input_size, rank, tensors, - tensor_count, golden_input_data, golden_input_data_size, - output_data, expected_output, tolerance); + tensor_count, activation, input_sequences_data, + input_sequences_len, output_data, expected_output, + tolerance); } +// The pattern to this method's arguemnts is: +// +// for each tensor in +// {input, feature weights, time weights, bias, activation state, output}: +// inline void TestIntegerSVDF( const int batch_size, const int num_units, const int input_size, - const int memory_size, const int rank, int8_t* input_data, - float input_scale, int8_t* weights_feature_data, - float weights_feature_scale, int16_t* weights_time_data, - float weights_time_scale, int32_t* bias_data, float bias_scale, - int16_t* activation_state_data, float activation_scale, int8_t* output_data, - float output_scale, int8_t* golden_input_data, int golden_input_data_size, - int8_t* expected_output) { + const int memory_size, const int rank, TfLiteFusedActivation activation, + int8_t* input_quantized, float input_scale, int input_zero_point, + const float* feature_weights_data, int8_t* feature_weights_quantized, + const float feature_weights_scale, const float* time_weights_data, + int16_t* time_weights_quantized, float time_weights_scale, + const float* bias_data, int32_t* bias_quantized, + const float* initial_activation_state_data, + int16_t* activation_state_quantized, float activation_state_scale, + int8_t* output_data, float output_scale, int output_zero_point, + const float* input_sequences_data, int8_t* input_sequences_quantized, + const int input_sequences_len, const float* golden_output, + int8_t* golden_output_quantized, int golden_output_len) { const int num_filters = num_units * rank; const int input_dims_arg[] = {2, batch_size, input_size}; TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_arg); - const int weights_feature_dims_args[] = {2, num_filters, input_size}; - TfLiteIntArray* weights_feature_dims = - IntArrayFromInts(weights_feature_dims_args); + const int feature_weights_dims_args[] = {2, num_filters, input_size}; + TfLiteIntArray* feature_weights_dims = + IntArrayFromInts(feature_weights_dims_args); - const int weights_time_dims_args[] = {2, num_filters, memory_size}; - TfLiteIntArray* weights_time_dims = IntArrayFromInts(weights_time_dims_args); + const int time_weights_dims_args[] = {2, num_filters, memory_size}; + TfLiteIntArray* time_weights_dims = IntArrayFromInts(time_weights_dims_args); const int bias_dims_data[] = {1, num_units}; TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data); @@ -384,66 +647,36 @@ inline void TestIntegerSVDF( const int output_dims_args[] = {2, batch_size, num_units}; TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_args); - // Tensor size is higher due to workarounds in micro buffer usage - // (b/132070898) and re-working scale calculations (b/146029510). const int tensor_count = 6; // 5 inputs, 1 output TfLiteTensor tensors[] = { - CreateQuantizedTensor(input_data, input_dims, input_scale, - /*zero_point=*/0), - CreateQuantizedTensor(weights_feature_data, weights_feature_dims, - weights_feature_scale, /*zero_point=*/0), - CreateQuantizedTensor(weights_time_data, weights_time_dims, - weights_time_scale, /*zero_point=*/0), - CreateQuantized32Tensor(bias_data, bias_dims, bias_scale), - CreateQuantizedTensor(activation_state_data, activation_state_dims, - activation_scale, /*zero_point=*/0, + CreateQuantizedTensor(input_quantized, input_dims, input_scale, + input_zero_point), + CreateQuantizedTensor(feature_weights_data, feature_weights_quantized, + feature_weights_dims, feature_weights_scale, 0), + CreateQuantizedTensor(time_weights_data, time_weights_quantized, + time_weights_dims, time_weights_scale, 0), + CreateQuantizedBiasTensor(bias_data, bias_quantized, bias_dims, + time_weights_scale, activation_state_scale), + CreateQuantizedTensor(initial_activation_state_data, + activation_state_quantized, activation_state_dims, + activation_state_scale, 0, /*is_variable=*/true), CreateQuantizedTensor(output_data, output_dims, output_scale, - /*zero_point=*/0)}; + output_zero_point)}; - // TODO(b/147839421): Affine Quantization Params should be set on tensor - // creation. - int zero_points[] = {1, 0}; + tflite::AsymmetricQuantize(golden_output, golden_output_quantized, + golden_output_len, output_scale, + output_zero_point); + tflite::AsymmetricQuantize(input_sequences_data, input_sequences_quantized, + input_sequences_len, input_scale, + input_zero_point); - // Input quant params: - float input_scales[] = {1, input_scale}; - TfLiteAffineQuantization input_quant = {FloatArrayFromFloats(input_scales), - IntArrayFromInts(zero_points), 0}; - tensors[0].quantization = {kTfLiteAffineQuantization, &input_quant}; - - // Weights features quant params: - float weights_features_scales[] = {1, weights_feature_scale}; - TfLiteAffineQuantization weights_feature_quant = { - FloatArrayFromFloats(weights_features_scales), - IntArrayFromInts(zero_points), 0}; - tensors[1].quantization = {kTfLiteAffineQuantization, &weights_feature_quant}; - - // Weights time quant params: - float weights_time_scales[] = {1, weights_time_scale}; - TfLiteAffineQuantization weights_time_quant = { - FloatArrayFromFloats(weights_time_scales), IntArrayFromInts(zero_points), - 0}; - tensors[2].quantization = {kTfLiteAffineQuantization, &weights_time_quant}; - - // Activation state quant params: - float activation_state_scales[] = {1, activation_scale}; - TfLiteAffineQuantization activation_state_quant = { - FloatArrayFromFloats(activation_state_scales), - IntArrayFromInts(zero_points), 0}; - tensors[4].quantization = {kTfLiteAffineQuantization, - &activation_state_quant}; - - // Output quant params: - float output_scales[] = {1, output_scale}; - TfLiteAffineQuantization output_quant = {FloatArrayFromFloats(output_scales), - IntArrayFromInts(zero_points), 0}; - tensors[5].quantization = {kTfLiteAffineQuantization, &output_quant}; - - ValidateIntegerSVDFGoldens( - batch_size, num_units, input_size, rank, tensors, tensor_count, - golden_input_data, golden_input_data_size, output_data, expected_output); -} // namespace + ValidateSVDFGoldens(batch_size, num_units, input_size, rank, tensors, + tensor_count, activation, input_sequences_quantized, + input_sequences_len, output_data, golden_output_quantized, + /*tolerance*/ 1); +} } // namespace } // namespace testing @@ -451,114 +684,7 @@ inline void TestIntegerSVDF( TF_LITE_MICRO_TESTS_BEGIN -TF_LITE_MICRO_TEST(SvdfFloatInputSize3Rank1ShouldMatchGolden) { - constexpr int batch_size = 2; - constexpr int num_units = 4; - constexpr int input_size = 3; - constexpr int memory_size = 10; - constexpr int rank = 1; - constexpr int num_filters = num_units * rank; - - float weights_feature_data[] = {-0.31930989, -0.36118156, 0.0079667, - 0.37613347, 0.22197971, 0.12416199, - 0.27901134, 0.27557442, 0.3905206, - -0.36137494, -0.06634006, -0.10640851}; - - float weights_time_data[] = { - -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, - 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, - - 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, - -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, - - -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, - 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, - - -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, - -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657}; - - const int input_size_dims_count = batch_size * input_size; - float input_data[input_size_dims_count]; - - const int activation_state_dims_count = - batch_size * memory_size * num_filters; - float activation_state_data[activation_state_dims_count]; - - const int scratch_dims_count = batch_size * num_filters; - float scratch_data[scratch_dims_count]; - - const int output_dims_count = batch_size * num_units; - float output_data[output_dims_count]; - - tflite::testing::TestSVDF( - batch_size, num_units, input_size, memory_size, rank, input_data, - weights_feature_data, weights_time_data, activation_state_data, - scratch_data, output_data, tflite::testing::svdf_input_3x2x10, - sizeof(tflite::testing::svdf_input_3x2x10), - tflite::testing::svdf_golden_output_3x2x10_rank_1); -} - -TF_LITE_MICRO_TEST(SvdfFloatInputSize3Rank2ShouldMatchGolden) { - constexpr int batch_size = 2; - constexpr int num_units = 4; - constexpr int input_size = 3; - constexpr int memory_size = 10; - constexpr int rank = 2; - constexpr int num_filters = num_units * rank; - - float weights_feature_data[] = { - -0.31930989, 0.0079667, 0.39296314, 0.37613347, 0.12416199, - 0.15785322, 0.27901134, 0.3905206, 0.21931258, -0.36137494, - -0.10640851, 0.31053296, -0.36118156, -0.0976817, -0.36916667, - 0.22197971, 0.15294972, 0.38031587, 0.27557442, 0.39635518, - -0.21580373, -0.06634006, -0.02702999, 0.27072677}; - - float weights_time_data[] = { - -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, - 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, - - 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, - -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, - - -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, - 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, - - -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, - -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657, - - -0.14884081, 0.19931212, -0.36002168, 0.34663299, -0.11405486, - 0.12672701, 0.39463779, -0.07886535, -0.06384811, 0.08249187, - - -0.26816407, -0.19905911, 0.29211238, 0.31264046, -0.28664589, - 0.05698794, 0.11613581, 0.14078894, 0.02187902, -0.21781836, - - -0.15567942, 0.08693647, -0.38256618, 0.36580828, -0.22922277, - -0.0226903, 0.12878349, -0.28122205, -0.10850525, -0.11955214, - - 0.27179423, -0.04710215, 0.31069002, 0.22672787, 0.09580326, - 0.08682203, 0.1258215, 0.1851041, 0.29228821, 0.12366763}; - - const int input_size_dims_count = batch_size * input_size; - float input_data[input_size_dims_count]; - - const int activation_state_dims_count = - batch_size * memory_size * num_filters; - float activation_state_data[activation_state_dims_count]; - const int scratch_dims_count = batch_size * num_filters; - float scratch_data[scratch_dims_count]; - - const int output_dims_count = batch_size * num_units; - float output_data[output_dims_count]; - - tflite::testing::TestSVDF( - batch_size, num_units, input_size, memory_size, rank, input_data, - weights_feature_data, weights_time_data, activation_state_data, - scratch_data, output_data, tflite::testing::svdf_input_3x2x10, - sizeof(tflite::testing::svdf_input_3x2x10), - tflite::testing::svdf_golden_output_3x2x10_rank_2); -} - -TF_LITE_MICRO_TEST(SvdfFloatInputSize2Rank1ShouldMatchGolden) { +TF_LITE_MICRO_TEST(SvdfFloat2x2Input2x4OutputShouldMatchGolden) { constexpr int batch_size = 2; constexpr int num_units = 4; constexpr int input_size = 2; @@ -566,44 +692,17 @@ TF_LITE_MICRO_TEST(SvdfFloatInputSize2Rank1ShouldMatchGolden) { constexpr int rank = 2; constexpr int num_filters = num_units * rank; - float weights_feature_data[] = { - -0.31930989, 0.0079667, 0.39296314, 0.37613347, 0.12416199, - 0.15785322, 0.27901134, 0.3905206, 0.21931258, -0.36137494, - -0.10640851, 0.31053296, -0.36118156, -0.0976817, -0.36916667, - 0.22197971, 0.15294972, 0.38031587, 0.27557442, 0.39635518, - -0.21580373, -0.06634006, -0.02702999, 0.27072677}; - - float weights_time_data[] = { - -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156, - 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199, - - 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518, - -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296, - - -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236, - 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846, - - -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166, - -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657, - - -0.14884081, 0.19931212, -0.36002168, 0.34663299, -0.11405486, - 0.12672701, 0.39463779, -0.07886535, -0.06384811, 0.08249187, - - -0.26816407, -0.19905911, 0.29211238, 0.31264046, -0.28664589, - 0.05698794, 0.11613581, 0.14078894, 0.02187902, -0.21781836, - - -0.15567942, 0.08693647, -0.38256618, 0.36580828, -0.22922277, - -0.0226903, 0.12878349, -0.28122205, -0.10850525, -0.11955214, - - 0.27179423, -0.04710215, 0.31069002, 0.22672787, 0.09580326, - 0.08682203, 0.1258215, 0.1851041, 0.29228821, 0.12366763}; - const int input_size_dims_count = batch_size * input_size; float input_data[input_size_dims_count]; const int activation_state_dims_count = batch_size * memory_size * num_filters; float activation_state_data[activation_state_dims_count]; + + memcpy(activation_state_data, + tflite::testing::initial_activation_state_data_2x2x10, + sizeof(tflite::testing::initial_activation_state_data_2x2x10)); + const int scratch_dims_count = batch_size * num_filters; float scratch_data[scratch_dims_count]; @@ -611,79 +710,236 @@ TF_LITE_MICRO_TEST(SvdfFloatInputSize2Rank1ShouldMatchGolden) { float output_data[output_dims_count]; tflite::testing::TestSVDF( - batch_size, num_units, input_size, memory_size, rank, input_data, - weights_feature_data, weights_time_data, activation_state_data, - scratch_data, output_data, tflite::testing::svdf_input_2x2x10, - sizeof(tflite::testing::svdf_input_2x2x10), - tflite::testing::svdf_golden_output_2x2x30_rank_1); + batch_size, num_units, input_size, memory_size, rank, kTfLiteActNone, + input_data, tflite::testing::feature_weights_data_2x2x10, + tflite::testing::time_weights_data_2x2x10, activation_state_data, + tflite::testing::bias_data_2x2x10, scratch_data, output_data, + tflite::testing::input_data_2x2x10, + sizeof(tflite::testing::input_data_2x2x10) / sizeof(float), + tflite::testing::golden_output_2x2x10); } -TF_LITE_MICRO_TEST(SvdfIntegerInputSize2Rank1ShouldMatchGolden) { +TF_LITE_MICRO_TEST(SvdfQuantized2x2Input2x4OutputShouldMatchGolden) { constexpr int batch_size = 2; constexpr int num_units = 4; constexpr int input_size = 2; constexpr int memory_size = 10; - constexpr int rank = 1; + constexpr int rank = 2; constexpr int num_filters = num_units * rank; - int8_t weights_feature_data[] = {-81, -92, 2, 96, 57, 32, - 71, 70, 100, -92, -17, -27}; - - int16_t weights_time_data[] = { - -10464, 12324, 9142, -11842, -11836, 7273, 9029, -2175, 260, 4067, - 12795, -3488, -3202, 5011, 12987, -887, 12875, 5171, 7185, 10174, - -12098, 12461, -7072, 8870, 7739, 11447, 5954, 11765, -5733, 10643, - -3534, 8912, 4693, -7761, -8886, -519, -4898, 5067, 3205, -1107, - }; - - int32_t bias_data[] = {-409707, 641518, 1662434, -113372}; - - int8_t input_sequences_data[] = { - 64, 25, 34, 23, 68, -99, 16, -59, -114, 46, 47, 94, - 18, -128, -96, -73, 16, 96, 64, 25, 34, 23, 68, -99, - 16, -59, -114, 46, 47, 94, 18, -128, -96, -73, 16, 96, - 64, 25, 34, 23, 68, -99, 16, -59, -114, 46, 47, 94, - 18, -128, -96, -73, 16, 96, 64, 25, 34, 23, 68, -99, - 16, -59, -114, 46, 47, 94, 18, -128, -96, -73, 16, 96, - }; - - int8_t expected_output[] = { - -9, 9, 18, -2, -6, 8, 13, -2, 2, -16, 2, 5, 2, -7, - 0, 3, 7, 0, 5, 7, -11, 18, 30, 0, -9, -24, 14, -12, - -1, 1, -20, 2, -19, -20, 20, -13, -1, -10, 50, 4, 26, 32, - 2, -12, -12, 11, -10, -29, 50, -61, 4, 15, 19, -39, 13, 19, - -56, 49, 12, 13, 29, -3, -4, -22, -76, -29, -14, 38, -30, -30, - 27, 0, 39, 16, 49, -14, -18, 28, -35, 11, 45, 0, -13, -61, - 34, -80, 37, 26, 15, -23, 12, 15, 18, 83, -28, -21, -27, -48, - 17, 2, -113, -52, 9, 48, -4, -1, 15, -7, 39, 16, 49, -14, - -18, 28, -35, 11, 45, 0, -13, -61, 34, -80, 37, 26, 15, -23, - 12, 15, 18, 83, -28, -21, -27, -48, 17, 2, -113, -52, 9, 48, - -4, -1, 15, -7, - }; - const int input_size_dims_count = batch_size * input_size; - int8_t input_data[input_size_dims_count]; const int activation_state_dims_count = batch_size * memory_size * num_filters; - int16_t activation_state_data[activation_state_dims_count]; const int output_dims_count = batch_size * num_units; int8_t output_data[output_dims_count]; - float input_scale = 1.f / INT8_MAX; // Range is [-1, 1] - float weights_feature_scale = 0.5f / INT8_MAX; // Range is [-0.5, 0.5] - float weights_time_scale = 1.f / INT16_MAX; // Range is [-1, 1] - float activation_scale = 16.f / INT16_MAX; // Range is [-16, 16] - float bias_scale = 512.f / INT32_MAX; // Range is [-512, 512] - float output_scale = 0.5f / INT8_MAX; // Range is [-0.5, 0.5] + float input_scale = 2.5f / INT8_MAX; // Range is [-2.5, 2.5] + float feature_weights_scale = 1.f / INT8_MAX; // Range is [-1, 1] + float time_weights_scale = 1.f / INT16_MAX; // Range is [-1, 1] + float activation_state_scale = 16.f / INT16_MAX; // Range is [-16, 16] + float output_scale = 1.f / INT8_MAX; // Range is [-1, 1] + + int input_zero_point = 0; + int output_zero_point = 0; + + int8_t input_quantized[input_size_dims_count]; + int8_t input_sequences_quantized[sizeof(tflite::testing::input_data_2x2x10) / + sizeof(float)]; + int8_t feature_weights_quantized + [sizeof(tflite::testing::feature_weights_data_2x2x10) / sizeof(float)]; + int16_t + time_weights_quantized[sizeof(tflite::testing::time_weights_data_2x2x10) / + sizeof(float)]; + int16_t activation_state_quantized[activation_state_dims_count]; + int32_t + bias_quantized[sizeof(tflite::testing::bias_data_2x2x10) / sizeof(float)]; + int8_t golden_quantized[sizeof(tflite::testing::golden_output_2x2x10) / + sizeof(float)]; tflite::testing::TestIntegerSVDF( - batch_size, num_units, input_size, memory_size, rank, input_data, - input_scale, weights_feature_data, weights_feature_scale, - weights_time_data, weights_time_scale, bias_data, bias_scale, - activation_state_data, activation_scale, output_data, output_scale, - input_sequences_data, sizeof(input_sequences_data), expected_output); + batch_size, num_units, input_size, memory_size, rank, kTfLiteActRelu, + input_quantized, input_scale, input_zero_point, + tflite::testing::feature_weights_data_2x2x10, feature_weights_quantized, + feature_weights_scale, tflite::testing::time_weights_data_2x2x10, + time_weights_quantized, time_weights_scale, + tflite::testing::bias_data_2x2x10, bias_quantized, + tflite::testing::initial_activation_state_data_2x2x10, + activation_state_quantized, activation_state_scale, output_data, + output_scale, output_zero_point, tflite::testing::input_data_2x2x10, + input_sequences_quantized, + sizeof(tflite::testing::input_data_2x2x10) / sizeof(float), + tflite::testing::golden_output_2x2x10, golden_quantized, + sizeof(tflite::testing::golden_output_2x2x10) / sizeof(float)); +} + +TF_LITE_MICRO_TEST(SvdfFloat1x16Input64x1OutputShouldMatchGolden) { + constexpr int batch_size = 1; + constexpr int num_units = 64; + constexpr int input_size = 16; + constexpr int memory_size = 8; + constexpr int rank = 1; + constexpr int num_filters = num_units * rank; + constexpr int activation_state_dims_count = + batch_size * memory_size * num_filters; + constexpr int output_dims_count = batch_size * num_units; + constexpr int input_dims_count = batch_size * input_size; + + float input_data[input_dims_count]; + float output_data[output_dims_count]; + float scratch_buffer[batch_size * num_filters]; + float activation_state_data_mutable[activation_state_dims_count]; + + // Initialize activation state to starting values. + memcpy(activation_state_data_mutable, + tflite::testing::initial_activation_state_data_16x1x1, + sizeof(tflite::testing::initial_activation_state_data_16x1x1)); + + tflite::testing::TestSVDF( + batch_size, num_units, input_size, memory_size, rank, kTfLiteActNone, + input_data, tflite::testing::feature_weights_data_16x1x1, + tflite::testing::time_weights_data_16x1x1, activation_state_data_mutable, + tflite::testing::bias_data_16x1x1, scratch_buffer, output_data, + tflite::testing::input_data_16x1x1, input_size, + tflite::testing::golden_output_16x1x1); +} + +TF_LITE_MICRO_TEST(SvdfFloat1x16Input64x1OutputReluShouldMatchGolden) { + constexpr int batch_size = 1; + constexpr int num_units = 64; + constexpr int input_size = 16; + constexpr int memory_size = 8; + constexpr int rank = 1; + constexpr int num_filters = num_units * rank; + constexpr int activation_state_dims_count = + batch_size * memory_size * num_filters; + constexpr int output_dims_count = batch_size * num_units; + constexpr int input_dims_count = batch_size * input_size; + + float input_data[input_dims_count]; + float output_data[output_dims_count]; + float scratch_buffer[batch_size * num_filters]; + float activation_state_data_mutable[activation_state_dims_count]; + + // Initialize activation state to starting values. + memcpy(activation_state_data_mutable, + tflite::testing::initial_activation_state_data_16x1x1, + sizeof(tflite::testing::initial_activation_state_data_16x1x1)); + + tflite::testing::TestSVDF( + batch_size, num_units, input_size, memory_size, rank, kTfLiteActRelu, + input_data, tflite::testing::feature_weights_data_16x1x1, + tflite::testing::time_weights_data_16x1x1, activation_state_data_mutable, + tflite::testing::bias_data_16x1x1, scratch_buffer, output_data, + tflite::testing::input_data_16x1x1, input_size, + tflite::testing::golden_output_relu_16x1x1); +} + +TF_LITE_MICRO_TEST(SvdfQuantized1x16Input64x1OutputShouldMatchGolden) { + constexpr int batch_size = 1; + constexpr int num_units = 64; + constexpr int input_size = 16; + constexpr int memory_size = 8; + constexpr int rank = 1; + constexpr int num_filters = num_units * rank; + constexpr int activation_state_dims_count = + batch_size * memory_size * num_filters; + constexpr int output_dims_count = batch_size * num_units; + constexpr int input_dims_count = batch_size * input_size; + + int8_t output_data[output_dims_count]; + + float input_scale = 0.10075444; + float feature_weights_scale = 0.00649388; + float time_weights_scale = 0.001571355; + float activation_state_scale = 0.00045896982; + float output_scale = 0.051445257; + + int input_zero_point = 2; + int output_zero_point = 0; + + int8_t input_quantized[input_dims_count]; + int8_t input_sequences_quantized[sizeof(tflite::testing::input_data_16x1x1) / + sizeof(float)]; + int8_t feature_weights_quantized + [sizeof(tflite::testing::feature_weights_data_16x1x1) / sizeof(float)]; + int16_t + time_weights_quantized[sizeof(tflite::testing::time_weights_data_16x1x1) / + sizeof(float)]; + int16_t activation_state_quantized[activation_state_dims_count]; + int32_t + bias_quantized[sizeof(tflite::testing::bias_data_16x1x1) / sizeof(float)]; + int8_t golden_quantized[sizeof(tflite::testing::golden_output_16x1x1) / + sizeof(float)]; + + tflite::testing::TestIntegerSVDF( + batch_size, num_units, input_size, memory_size, rank, kTfLiteActNone, + input_quantized, input_scale, input_zero_point, + tflite::testing::feature_weights_data_16x1x1, feature_weights_quantized, + feature_weights_scale, tflite::testing::time_weights_data_16x1x1, + time_weights_quantized, time_weights_scale, + tflite::testing::bias_data_16x1x1, bias_quantized, + tflite::testing::initial_activation_state_data_16x1x1, + activation_state_quantized, activation_state_scale, output_data, + output_scale, output_zero_point, tflite::testing::input_data_16x1x1, + input_sequences_quantized, + sizeof(tflite::testing::input_data_16x1x1) / sizeof(float), + tflite::testing::golden_output_16x1x1, golden_quantized, + sizeof(tflite::testing::golden_output_16x1x1) / sizeof(float)); +} + +TF_LITE_MICRO_TEST(SvdfQuantized1x16Input64x1OutputReluShouldMatchGolden) { + constexpr int batch_size = 1; + constexpr int num_units = 64; + constexpr int input_size = 16; + constexpr int memory_size = 8; + constexpr int rank = 1; + constexpr int num_filters = num_units * rank; + constexpr int activation_state_dims_count = + batch_size * memory_size * num_filters; + constexpr int output_dims_count = batch_size * num_units; + constexpr int input_dims_count = batch_size * input_size; + + int8_t output_data[output_dims_count]; + + float input_scale = 0.10075444; + float feature_weights_scale = 0.00649388; + float time_weights_scale = 0.001571355; + float activation_state_scale = 0.00045896982; + float output_scale = 0.051445257; + + int input_zero_point = 2; + int output_zero_point = -128; + + int8_t input_quantized[input_dims_count]; + int8_t input_sequences_quantized[sizeof(tflite::testing::input_data_16x1x1) / + sizeof(float)]; + int8_t feature_weights_quantized + [sizeof(tflite::testing::feature_weights_data_16x1x1) / sizeof(float)]; + int16_t + time_weights_quantized[sizeof(tflite::testing::time_weights_data_16x1x1) / + sizeof(float)]; + int16_t activation_state_quantized[activation_state_dims_count]; + int32_t + bias_quantized[sizeof(tflite::testing::bias_data_16x1x1) / sizeof(float)]; + int8_t golden_quantized[sizeof(tflite::testing::golden_output_relu_16x1x1) / + sizeof(float)]; + + tflite::testing::TestIntegerSVDF( + batch_size, num_units, input_size, memory_size, rank, kTfLiteActRelu, + input_quantized, input_scale, input_zero_point, + tflite::testing::feature_weights_data_16x1x1, feature_weights_quantized, + feature_weights_scale, tflite::testing::time_weights_data_16x1x1, + time_weights_quantized, time_weights_scale, + tflite::testing::bias_data_16x1x1, bias_quantized, + tflite::testing::initial_activation_state_data_16x1x1, + activation_state_quantized, activation_state_scale, output_data, + output_scale, output_zero_point, tflite::testing::input_data_16x1x1, + input_sequences_quantized, + sizeof(tflite::testing::input_data_16x1x1) / sizeof(float), + tflite::testing::golden_output_16x1x1, golden_quantized, + sizeof(tflite::testing::golden_output_relu_16x1x1) / sizeof(float)); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc index 3d6ad33cfcb..125f3f7b2bf 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc @@ -344,23 +344,16 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // Validate output tensor: TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8); - // Calculate effective scales. - auto* input_params = - static_cast(input->quantization.params); - auto* weights_feature_params = static_cast( - weights_feature->quantization.params); - auto* state_params = static_cast( - activation_state->quantization.params); - auto* weight_time_params = - static_cast(weights_time->quantization.params); - auto* output_params = - static_cast(output->quantization.params); - const float effective_scale_1 = input_params->scale->data[0] * - weights_feature_params->scale->data[0] / - state_params->scale->data[0]; - const float effective_scale_2 = state_params->scale->data[0] * - weight_time_params->scale->data[0] / - output_params->scale->data[0]; + const double effective_scale_1 = + static_cast(input->params.scale * weights_feature->params.scale / + activation_state->params.scale); + const double effective_scale_2 = + static_cast(activation_state->params.scale * + weights_time->params.scale / output->params.scale); + + TF_LITE_ENSURE_EQ(context, static_cast(bias->params.scale), + static_cast(activation_state->params.scale * + weights_time->params.scale)); TFLITE_DCHECK(node->user_data != nullptr); OpData* data = static_cast(node->user_data); @@ -397,7 +390,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* activation_state = GetVariableInput(context, node, kInputActivationStateTensor); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActRelu); TFLITE_DCHECK(node->user_data != nullptr); const OpData& data = *(static_cast(node->user_data)); From 400335e0589e021d0ef6b5e9a37455b61ca5515b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 25 Jul 2020 02:02:17 -0700 Subject: [PATCH 1333/2522] Update GraphDef version to 473. PiperOrigin-RevId: 323140447 Change-Id: Iab135ffd138510a7494176e4ec0085dcb8a92fc7 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 6a58c09a9fd..4b742fe1968 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 472 // Updated: 2020/7/24 +#define TF_GRAPH_DEF_VERSION 473 // Updated: 2020/7/25 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 055c5e107db5a3b7b0899d8974e14ab6ce772fc0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 25 Jul 2020 02:02:19 -0700 Subject: [PATCH 1334/2522] compat: Update forward compatibility horizon to 2020-07-25 PiperOrigin-RevId: 323140451 Change-Id: I0099b15b49a75d5b6b0f71a056418d01268908fd --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 463cabd53bd..9aa6c0de764 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 24) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 25) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 3687158fe74b6f5bf29401cad7c90e59c247464e Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Sat, 25 Jul 2020 15:33:20 +0200 Subject: [PATCH 1335/2522] Directly call sample_distorted_bounding_box_v2 to prevent deprecation warning --- tensorflow/python/ops/image_ops_impl.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 9cc6a6d9c26..c7340d82166 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -2931,10 +2931,18 @@ def sample_distorted_bounding_box_v2(image_size, Provide as input to `tf.image.draw_bounding_boxes`. """ seed1, seed2 = random_seed.get_seed(seed) if seed else (0, 0) - return sample_distorted_bounding_box(image_size, bounding_boxes, seed1, seed2, - min_object_covered, aspect_ratio_range, - area_range, max_attempts, - use_image_if_no_bounding_boxes, name) + with ops.name_scope(name, 'sample_distorted_bounding_box'): + return gen_image_ops.sample_distorted_bounding_box_v2( + image_size, + bounding_boxes, + seed=seed1, + seed2=seed2, + min_object_covered=min_object_covered, + aspect_ratio_range=aspect_ratio_range, + area_range=area_range, + max_attempts=max_attempts, + use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes, + name=name) @tf_export(v1=['image.sample_distorted_bounding_box']) From 92ea455c60b6591654400149403cc379a6cb8031 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Sat, 25 Jul 2020 18:13:26 +0200 Subject: [PATCH 1336/2522] Prefer TensorShape(...) over as_shape(...) when passing a list --- tensorflow/python/framework/tensor_util.py | 2 +- tensorflow/python/tpu/tpu_sharding.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index 9b30d053c79..6c4c9855cf8 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -865,7 +865,7 @@ def constant_value_as_shape(tensor): # pylint: disable=invalid-name ValueError: If the shape is rank-0 and is not statically known to be -1. """ if isinstance(tensor, ops.EagerTensor): - return tensor_shape.as_shape( + return tensor_shape.TensorShape( [dim if dim != -1 else None for dim in tensor.numpy()]) if tensor.get_shape().ndims == 0: diff --git a/tensorflow/python/tpu/tpu_sharding.py b/tensorflow/python/tpu/tpu_sharding.py index 05e6ce2c506..c6f5017efbd 100644 --- a/tensorflow/python/tpu/tpu_sharding.py +++ b/tensorflow/python/tpu/tpu_sharding.py @@ -185,7 +185,7 @@ class ShardingPolicy(object): (shape.as_list(), self._number_of_shards, self._shard_dimension)) dims[self._shard_dimension] //= self._number_of_shards - return tensor_shape.as_shape(dims) + return tensor_shape.TensorShape(dims) def _unshard_shape(self, shape): """Return the unsharded shape that would generate a given sharded shape. @@ -213,7 +213,7 @@ class ShardingPolicy(object): (shape.as_list(), self._shard_dimension)) dims = shape.as_list() dims[self._shard_dimension] *= self._number_of_shards - return tensor_shape.as_shape(dims) + return tensor_shape.TensorShape(dims) def get_unsharded_shape(self, shapes): """Returns the shape of an unsharded Tensor given a list of shards. From 1c29b137644edaec7169fbcf7a2d4154d32b231f Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Sat, 25 Jul 2020 15:33:28 -0700 Subject: [PATCH 1337/2522] Handle UpdateContext correctly in SyncOnReadVariable Inside strategy.extended.update, in_cross_replica_context() returns True, but we should check for UpdateContext as well. Inside UpdateContext, reads and writes should behave as reading/writing the replica local variable. Before this fix, updating synchronization=ON_READ variables from keras optimizers likely yields incorrect results. All replicas are updated with the value from the last replica. PiperOrigin-RevId: 323188475 Change-Id: I2278332868857b0bc97563b311e0c059d7644720 --- tensorflow/python/distribute/values.py | 12 ++++++--- tensorflow/python/distribute/values_test.py | 29 +++++++++++++++++++++ 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py index 9af4bd0bd91..50cd8d73e73 100644 --- a/tensorflow/python/distribute/values.py +++ b/tensorflow/python/distribute/values.py @@ -1023,7 +1023,7 @@ class SyncOnReadVariable(DistributedVariable): # with MirroredVariable. def assign_sub(self, value, use_locking=False, name=None, read_value=True): with ds_context.enter_or_assert_strategy(self._distribute_strategy): - if ds_context.in_cross_replica_context(): + if ds_context.in_cross_replica_context() and not _in_update_replica(): return values_util.on_read_assign_sub_cross_replica( self, value, read_value=read_value) else: @@ -1032,7 +1032,7 @@ class SyncOnReadVariable(DistributedVariable): def assign_add(self, value, use_locking=False, name=None, read_value=True): with ds_context.enter_or_assert_strategy(self._distribute_strategy): - if ds_context.in_cross_replica_context(): + if ds_context.in_cross_replica_context() and not _in_update_replica(): return values_util.on_read_assign_add_cross_replica( self, value, read_value=read_value) else: @@ -1041,7 +1041,7 @@ class SyncOnReadVariable(DistributedVariable): def assign(self, value, use_locking=False, name=None, read_value=True): with ds_context.enter_or_assert_strategy(self._distribute_strategy): - if ds_context.in_cross_replica_context(): + if ds_context.in_cross_replica_context() and not _in_update_replica(): return values_util.on_read_assign_cross_replica( self, value, read_value=read_value) else: @@ -1076,7 +1076,7 @@ class SyncOnReadVariable(DistributedVariable): def value(self): with ds_context.enter_or_assert_strategy(self._distribute_strategy): - if ds_context.in_cross_replica_context(): + if ds_context.in_cross_replica_context() and not _in_update_replica(): if self._aggregation == vs.VariableAggregation.ONLY_FIRST_REPLICA: return self._get_replica(0).value() return self._get_cross_replica() @@ -1462,3 +1462,7 @@ def _is_sync_on_read(val): if val._policy: # pylint: disable=protected-access return not val._policy._is_mirrored() # pylint: disable=protected-access return not isinstance(val, Mirrored) + + +def _in_update_replica(): + return distribute_lib.get_update_replica_id() is not None diff --git a/tensorflow/python/distribute/values_test.py b/tensorflow/python/distribute/values_test.py index 83f5d73a6fa..1c090737d8f 100644 --- a/tensorflow/python/distribute/values_test.py +++ b/tensorflow/python/distribute/values_test.py @@ -554,6 +554,35 @@ class DistributedVariableTest(test.TestCase, parameterized.TestCase): self.evaluate( distribution.experimental_local_results(distribution.run(assign))) + def testStrategyExtendedUpdate(self, distribution, synchronization, + aggregation): + if len(distribution.extended.parameter_devices) != 2: + self.skipTest("n/a: needs exactly two parameter devices") + with distribution.scope(): + v = variables_lib.Variable( + 0., synchronization=synchronization, aggregation=aggregation) + # Note that this is actually real usage. We're doing this in optimizer to + # workaround the current restriction in strategy.extended.update(). + value = values_lib.Mirrored([1., 2.]) + + assign_fn = lambda var, value: var.assign(value) + self.evaluate(distribution.extended.update(v, assign_fn, args=(value,))) + self.assertAllEqual(self.evaluate(v.values), [1., 2.]) + + assign_add_fn = lambda var, value: var.assign_add(value) + self.evaluate(distribution.extended.update(v, assign_add_fn, args=(value,))) + self.assertAllEqual(self.evaluate(v.values), [2., 4.]) + + assign_sub_fn = lambda var, value: var.assign_sub(value) + self.evaluate(distribution.extended.update(v, assign_sub_fn, args=(value,))) + self.assertAllEqual(self.evaluate(v.values), [1., 2.]) + + read_assign_fn = lambda var, value: var.assign_add(var.value() + var. + read_value()) + self.evaluate( + distribution.extended.update(v, read_assign_fn, args=(value,))) + self.assertAllEqual(self.evaluate(v.values), [3., 6.]) + @combinations.generate( combinations.combine( From f391524226affe6c48eb1296d44a51d92f8619b0 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Sat, 25 Jul 2020 16:14:48 -0700 Subject: [PATCH 1338/2522] Add support for variable policy to be used by MirroredStrategy and TPUStrategy. Refactor existing values test and add an option to test variable policy. PiperOrigin-RevId: 323190899 Change-Id: I8d97a625b33b0e97c0e0a3d76d96253a074d3359 --- tensorflow/python/distribute/BUILD | 41 + tensorflow/python/distribute/combinations.py | 10 +- .../python/distribute/distribute_utils.py | 124 +- .../python/distribute/mirrored_strategy.py | 20 +- .../distribute/mirrored_variable_test.py | 6 +- tensorflow/python/distribute/tpu_strategy.py | 7 +- tensorflow/python/distribute/tpu_values.py | 316 +++- tensorflow/python/distribute/values.py | 14 +- tensorflow/python/distribute/values_test.py | 1092 +------------- tensorflow/python/distribute/vars_test.py | 1270 +++++++++++++++++ 10 files changed, 1796 insertions(+), 1104 deletions(-) create mode 100644 tensorflow/python/distribute/vars_test.py diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index 356fb3a7a9f..185b4568868 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -302,6 +302,7 @@ py_library( ":distribute_lib", ":reduce_util", ":shared_variable_creator", + ":tpu_values", ":values", "//tensorflow/python:array_ops", "//tensorflow/python:config", @@ -1224,6 +1225,46 @@ distribute_py_test( ], ) +distribute_py_test( + name = "vars_test", + size = "medium", + srcs = ["vars_test.py"], + main = "vars_test.py", + shard_count = 5, + tags = [ + "multi_and_single_gpu", + "no_rocm", + ], + tpu_tags = [ + "no_oss", # b/150954621 Target too big to run serially reliably. + ], + deps = [ + ":combinations", + ":distribute_lib", + ":strategy_combinations", + ":tpu_strategy", + ":tpu_values", + ":values", + "//tensorflow/python:array_ops", + "//tensorflow/python:checkpoint_management", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:indexed_slices", + "//tensorflow/python:math_ops", + "//tensorflow/python:random_ops", + "//tensorflow/python:training", + "//tensorflow/python:variable_scope", + "//tensorflow/python:variables", + "//tensorflow/python/distribute/cluster_resolver:tpu_cluster_resolver_py", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:def_function", + "//tensorflow/python/eager:test", + "//tensorflow/python/tpu:tpu_lib", + "@absl_py//absl/testing:parameterized", + ], +) + distribute_py_test( name = "ps_values_test", size = "medium", diff --git a/tensorflow/python/distribute/combinations.py b/tensorflow/python/distribute/combinations.py index ad8bb879b93..a86c751ec79 100644 --- a/tensorflow/python/distribute/combinations.py +++ b/tensorflow/python/distribute/combinations.py @@ -58,11 +58,17 @@ class DistributionParameter(combinations_lib.ParameterModifier): """ def modified_arguments(self, kwargs, requested_parameters): - del requested_parameters + # Get the parameter that indicates if we need to set the `_use_policy` flag + # on the strategy object. This is a temporary flag for testing the variable + # policy rollout. + use_var_policy = kwargs.get("use_var_policy", None) distribution_arguments = {} for k, v in kwargs.items(): if isinstance(v, NamedDistribution): - distribution_arguments[k] = v.strategy + strategy = v.strategy + if use_var_policy: + strategy.extended._use_var_policy = use_var_policy + distribution_arguments[k] = strategy return distribution_arguments diff --git a/tensorflow/python/distribute/distribute_utils.py b/tensorflow/python/distribute/distribute_utils.py index 89848b91318..916ebafd8ac 100644 --- a/tensorflow/python/distribute/distribute_utils.py +++ b/tensorflow/python/distribute/distribute_utils.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.distribute import tpu_values as tpu_values_lib from tensorflow.python.distribute import values as values_lib from tensorflow.python.eager import context from tensorflow.python.eager import tape @@ -145,7 +146,7 @@ def select_replica_mirrored(replica_id, structured): def _get_mirrored(x): if isinstance(x, values_lib.DistributedValues): - if not isinstance(x, values_lib.Mirrored): + if not is_mirrored(x): raise TypeError( "Expected value to be mirrored across replicas: %s in %s." % (x, structured)) @@ -245,34 +246,25 @@ def validate_colocate(v, extended): # Variable creation function for sync strategies. -def create_mirrored_variable( # pylint: disable=missing-docstring - strategy, real_mirrored_creator, mirrored_cls, sync_on_read_cls, **kwargs): - # Figure out what collections this variable should be added to. - # We'll add the MirroredVariable to those collections instead. - var_collections = kwargs.pop("collections", None) - if var_collections is None: - var_collections = [ops.GraphKeys.GLOBAL_VARIABLES] - kwargs["collections"] = [] - +def _get_and_validate_synchronization(kwargs): + """Validate that given synchronization value is valid.""" synchronization = kwargs.get("synchronization", - vs.VariableSynchronization.ON_WRITE) - + vs.VariableSynchronization.AUTO) if synchronization == vs.VariableSynchronization.NONE: raise ValueError( - "`NONE` variable synchronization mode is not supported with `Mirrored` " - "distribution strategy. Please change the `synchronization` for " + "`NONE` variable synchronization mode is not supported with " + "tf.distribute strategy. Please change the `synchronization` for " "variable: " + str(kwargs["name"])) - elif synchronization == vs.VariableSynchronization.ON_READ: - is_sync_on_read = True - elif synchronization in (vs.VariableSynchronization.ON_WRITE, - vs.VariableSynchronization.AUTO): - # `AUTO` synchronization defaults to `ON_WRITE`. - is_sync_on_read = False - else: + if synchronization not in (vs.VariableSynchronization.ON_READ, + vs.VariableSynchronization.ON_WRITE, + vs.VariableSynchronization.AUTO): raise ValueError( "Invalid variable synchronization mode: %s for variable: %s" % (synchronization, kwargs["name"])) + return synchronization + +def _validate_aggregation(kwargs): aggregation = kwargs.pop("aggregation", vs.VariableAggregation.NONE) if aggregation not in (vs.VariableAggregation.NONE, @@ -281,6 +273,33 @@ def create_mirrored_variable( # pylint: disable=missing-docstring vs.VariableAggregation.ONLY_FIRST_REPLICA): raise ValueError("Invalid variable aggregation mode: %s for variable: %s" % (aggregation, kwargs["name"])) + return aggregation + + +def _get_variable_policy_class(synchronization, aggregation, policy_mapping): + if synchronization == vs.VariableSynchronization.AUTO: + if aggregation == vs.VariableAggregation.NONE: + # Use AutoPolicy. + return policy_mapping.get(synchronization) + else: + # Revert to OnWritePolicy + return policy_mapping.get(vs.VariableSynchronization.ON_WRITE) + return policy_mapping.get(synchronization) + + +def create_mirrored_variable(strategy, real_mirrored_creator, class_mapping, + policy_mapping, **kwargs): + """Create distributed variables with given synchronization and aggregation.""" + # Figure out what collections this variable should be added to. + # We'll add the MirroredVariable to those collections instead. + var_collections = kwargs.pop("collections", None) + if var_collections is None: + var_collections = [ops.GraphKeys.GLOBAL_VARIABLES] + kwargs["collections"] = [] + + synchronization = _get_and_validate_synchronization(kwargs) + aggregation = _validate_aggregation(kwargs) + use_var_policy = getattr(strategy.extended, "_use_var_policy", False) # Ignore user-specified caching device, not needed for mirrored variables. kwargs.pop("caching_device", None) @@ -290,8 +309,15 @@ def create_mirrored_variable( # pylint: disable=missing-docstring # here. with tape.stop_recording(): value_list = real_mirrored_creator(**kwargs) - var_cls = sync_on_read_cls if is_sync_on_read else mirrored_cls - result = var_cls(strategy, value_list, aggregation) + if use_var_policy: + var_policy_cls = _get_variable_policy_class(synchronization, aggregation, + policy_mapping) + var_policy = var_policy_cls(aggregation=aggregation) + var_cls = class_mapping.get("VariableClass") + result = var_cls(strategy, value_list, aggregation, var_policy=var_policy) + else: + var_cls = class_mapping.get(synchronization) + result = var_cls(strategy, value_list, aggregation) # Install the created DistributedVariable as _distributed_container property # of the underlying variables, to make it easy to map back to the container. for v in result.values: @@ -324,3 +350,55 @@ def create_mirrored_variable( # pylint: disable=missing-docstring ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, result) return result + + +# Utility functions +# Return True if the Value is Mirrored or the Variable is replicated and kept in +# sync. +def is_mirrored(val): + if isinstance(val, values_lib.DistributedVariable): + if val._policy: # pylint: disable=protected-access + return val._policy._is_mirrored() # pylint: disable=protected-access + return isinstance(val, values_lib.Mirrored) + + +def is_sync_on_read(val): + if isinstance(val, values_lib.DistributedVariable): + if val._policy: # pylint: disable=protected-access + return not val._policy._is_mirrored() # pylint: disable=protected-access + return not isinstance(val, values_lib.Mirrored) + +# The following mapping indicates the policy that you must use for a given +# variable `synchronization` and `aggregation` pair. +# AutoPolicy is used for: +# (synchronization=Auto, aggregation=None) +# OnWritePolicy is used for: +# (synchronization=Auto, aggregation=SUM,MEAN,ONLY_FIRST_REPLICA) +# (synchronization=ON_WRITE, aggregation=NONE,SUM,MEAN,ONLY_FIRST_REPLICA) +# OnReadPolicy is used for: +# (synchronization=ON_READ, aggregation=NONE,SUM,MEAN,ONLY_FIRST_REPLICA) +VARIABLE_POLICY_MAPPING = { + vs.VariableSynchronization.AUTO: values_lib.AutoPolicy, + vs.VariableSynchronization.ON_WRITE: values_lib.OnWritePolicy, + vs.VariableSynchronization.ON_READ: values_lib.OnReadPolicy, +} + +VARIABLE_CLASS_MAPPING = { + "VariableClass": values_lib.DistributedVariable, + vs.VariableSynchronization.AUTO: values_lib.MirroredVariable, + vs.VariableSynchronization.ON_WRITE: values_lib.MirroredVariable, + vs.VariableSynchronization.ON_READ: values_lib.SyncOnReadVariable, +} + +TPU_VARIABLE_POLICY_MAPPING = { + vs.VariableSynchronization.AUTO: tpu_values_lib.TPUAutoPolicy, + vs.VariableSynchronization.ON_WRITE: tpu_values_lib.TPUOnWritePolicy, + vs.VariableSynchronization.ON_READ: tpu_values_lib.TPUOnReadPolicy, +} + +TPU_VARIABLE_CLASS_MAPPING = { + "VariableClass": tpu_values_lib.TPUDistributedVariable, + vs.VariableSynchronization.AUTO: tpu_values_lib.TPUMirroredVariable, + vs.VariableSynchronization.ON_WRITE: tpu_values_lib.TPUMirroredVariable, + vs.VariableSynchronization.ON_READ: tpu_values_lib.TPUSyncOnReadVariable, +} diff --git a/tensorflow/python/distribute/mirrored_strategy.py b/tensorflow/python/distribute/mirrored_strategy.py index b424f798476..5323f6131ee 100644 --- a/tensorflow/python/distribute/mirrored_strategy.py +++ b/tensorflow/python/distribute/mirrored_strategy.py @@ -319,6 +319,9 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1): if ops.executing_eagerly_outside_functions(): self.experimental_enable_get_next_as_optional = True + # Flag to turn on VariablePolicy. + self._use_var_policy = False + def _initialize_strategy(self, devices): # The _initialize_strategy method is intended to be used by distribute # coordinator as well. @@ -462,7 +465,8 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1): return distribute_utils.create_mirrored_variable( self._container_strategy(), _real_mirrored_creator, - values.MirroredVariable, values.SyncOnReadVariable, **kwargs) + distribute_utils.VARIABLE_CLASS_MAPPING, + distribute_utils.VARIABLE_POLICY_MAPPING, **kwargs) def _validate_colocate_with_variable(self, colocate_with_variable): distribute_utils.validate_colocate_distributed_variable( @@ -628,10 +632,10 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1): return self._cross_device_ops or self._inferred_cross_device_ops def _reduce_to(self, reduce_op, value, destinations, experimental_hints): - if (isinstance(value, values.Mirrored) and + if (distribute_utils.is_mirrored(value) and reduce_op == reduce_util.ReduceOp.MEAN): return value - assert not isinstance(value, values.Mirrored) + assert not distribute_utils.is_mirrored(value) if not isinstance(value, values.DistributedValues): # This function handles reducing values that are not PerReplica or # Mirrored values. For example, the same value could be present on all @@ -686,10 +690,12 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1): def read_var(self, replica_local_var): """Read the aggregate value of a replica-local variable.""" - if isinstance(replica_local_var, values.SyncOnReadVariable): - return replica_local_var._get_cross_replica() # pylint: disable=protected-access - assert isinstance(replica_local_var, values.Mirrored) - return array_ops.identity(replica_local_var._get()) # pylint: disable=protected-access + # pylint: disable=protected-access + if values._is_sync_on_read(replica_local_var): + return replica_local_var._get_cross_replica() + assert values._is_mirrored(replica_local_var) + return array_ops.identity(replica_local_var._get()) + # pylint: enable=protected-access def _local_results(self, val): if isinstance(val, values.DistributedValues): diff --git a/tensorflow/python/distribute/mirrored_variable_test.py b/tensorflow/python/distribute/mirrored_variable_test.py index 8e7d674947e..03d697fe1eb 100644 --- a/tensorflow/python/distribute/mirrored_variable_test.py +++ b/tensorflow/python/distribute/mirrored_variable_test.py @@ -379,8 +379,7 @@ class MirroredVariableCreationTest(test.TestCase): with distribution.scope(): with self.assertRaisesRegex( ValueError, "`NONE` variable synchronization mode is not " - "supported with `Mirrored` distribution strategy. Please change " - "the `synchronization` for variable: v"): + "supported with "): variable_scope.get_variable( "v", [1], synchronization=variable_scope.VariableSynchronization.NONE) @@ -389,8 +388,7 @@ class MirroredVariableCreationTest(test.TestCase): with distribution.scope(): with self.assertRaisesRegex( ValueError, "`NONE` variable synchronization mode is not " - "supported with `Mirrored` distribution strategy. Please change " - "the `synchronization` for variable: v"): + "supported with "): variable_scope.variable( 1.0, name="v", diff --git a/tensorflow/python/distribute/tpu_strategy.py b/tensorflow/python/distribute/tpu_strategy.py index 8e5ef061dcf..bad6e6aa39f 100644 --- a/tensorflow/python/distribute/tpu_strategy.py +++ b/tensorflow/python/distribute/tpu_strategy.py @@ -544,6 +544,9 @@ class TPUExtended(distribute_lib.StrategyExtendedV1): context.async_wait() atexit.register(async_wait) + # Flag to turn on VariablePolicy + self._use_var_policy = False + def _validate_colocate_with_variable(self, colocate_with_variable): distribute_utils. validate_colocate(colocate_with_variable, self) @@ -870,8 +873,8 @@ class TPUExtended(distribute_lib.StrategyExtendedV1): return distribute_utils.create_mirrored_variable( self._container_strategy(), _real_mirrored_creator, - tpu_values.TPUMirroredVariable, tpu_values.TPUSyncOnReadVariable, - **kwargs) + distribute_utils.TPU_VARIABLE_CLASS_MAPPING, + distribute_utils.TPU_VARIABLE_POLICY_MAPPING, **kwargs) def _reduce_to(self, reduce_op, value, destinations, experimental_hints): if (isinstance(value, values.DistributedValues) or diff --git a/tensorflow/python/distribute/tpu_values.py b/tensorflow/python/distribute/tpu_values.py index 33885531966..ce6d2e7029b 100644 --- a/tensorflow/python/distribute/tpu_values.py +++ b/tensorflow/python/distribute/tpu_values.py @@ -197,10 +197,58 @@ def enclosing_tpu_context(): return None +class TPUDistributedVariable(TPUVariableMixin, values.DistributedVariable): + """DistributedVariable subclass for TPUStrategy.""" + + def _is_mirrored(self): + self._policy._is_mirrored() # pylint: disable=protected-access + + def assign_sub(self, value, use_locking=False, name=None, read_value=True): + return self._policy.assign_sub( + self, value, use_locking=use_locking, name=name, read_value=read_value) + + def assign_add(self, value, use_locking=False, name=None, read_value=True): + return self._policy.assign_add( + self, value, use_locking=use_locking, name=name, read_value=read_value) + + def assign(self, value, use_locking=False, name=None, read_value=True): + return self._policy.assign( + self, value, use_locking=use_locking, name=name, read_value=read_value) + + def scatter_sub(self, sparse_delta, use_locking=False, name=None): + return self._policy.scatter_sub( + self, sparse_delta, use_locking=use_locking, name=name) + + def scatter_add(self, sparse_delta, use_locking=False, name=None): + return self._policy.scatter_add( + self, sparse_delta, use_locking=use_locking, name=name) + + def scatter_mul(self, sparse_delta, use_locking=False, name=None): + return self._policy.scatter_mul( + self, sparse_delta, use_locking=use_locking, name=name) + + def scatter_div(self, sparse_delta, use_locking=False, name=None): + return self._policy.scatter_div( + self, sparse_delta, use_locking=use_locking, name=name) + + def scatter_min(self, sparse_delta, use_locking=False, name=None): + return self._policy.scatter_min( + self, sparse_delta, use_locking=use_locking, name=name) + + def scatter_max(self, sparse_delta, use_locking=False, name=None): + return self._policy.scatter_max( + self, sparse_delta, use_locking=use_locking, name=name) + + def scatter_update(self, sparse_delta, use_locking=False, name=None): + return self._policy.scatter_update( + self, sparse_delta, use_locking=use_locking, name=name) + + class TPUMirroredVariable(TPUVariableMixin, values.MirroredVariable): """Holds a map from replica to TPU variables whose values are kept in sync.""" - def assign_sub(self, value, use_locking=False, name=None, read_value=True): + def assign_sub(self, value, use_locking=False, name=None, + read_value=True): if (enclosing_tpu_context() and self.aggregation == variable_scope.VariableAggregation.NONE): return _make_raw_assign_fn( @@ -210,17 +258,11 @@ class TPUMirroredVariable(TPUVariableMixin, values.MirroredVariable): use_locking=use_locking, name=name, read_value=read_value) + return assign_sub(self, value, use_locking=use_locking, name=name, + read_value=read_value) - assign_sub_fn = _make_raw_assign_fn( - gen_resource_variable_ops.assign_sub_variable_op) - return self._update( - update_fn=assign_sub_fn, - value=value, - use_locking=use_locking, - name=name, - read_value=read_value) - - def assign_add(self, value, use_locking=False, name=None, read_value=True): + def assign_add(self, value, use_locking=False, name=None, + read_value=True): if (enclosing_tpu_context() and self.aggregation == variable_scope.VariableAggregation.NONE): return _make_raw_assign_fn( @@ -230,34 +272,21 @@ class TPUMirroredVariable(TPUVariableMixin, values.MirroredVariable): use_locking=use_locking, name=name, read_value=read_value) - - assign_add_fn = _make_raw_assign_fn( - gen_resource_variable_ops.assign_add_variable_op) - return self._update( - update_fn=assign_add_fn, - value=value, - use_locking=use_locking, - name=name, - read_value=read_value) + return assign_add(self, value, use_locking=use_locking, name=name, + read_value=read_value) def assign(self, value, use_locking=False, name=None, read_value=True): if (enclosing_tpu_context() and self.aggregation == variable_scope.VariableAggregation.NONE): - return _make_raw_assign_fn(gen_resource_variable_ops.assign_variable_op)( - self, - value=value, - use_locking=use_locking, - name=name, - read_value=read_value) - - assign_fn = _make_raw_assign_fn( - gen_resource_variable_ops.assign_variable_op) - return self._update( - update_fn=assign_fn, - value=value, - use_locking=use_locking, - name=name, - read_value=read_value) + return _make_raw_assign_fn( + gen_resource_variable_ops.assign_variable_op)( + self, + value=value, + use_locking=use_locking, + name=name, + read_value=read_value) + return assign(self, value, use_locking=use_locking, name=name, + read_value=read_value) def scatter_sub(self, *args, **kwargs): raise NotImplementedError @@ -312,3 +341,220 @@ class TPUSyncOnReadVariable(TPUVariableMixin, values.SyncOnReadVariable): def _is_mirrored(self): return False + + +# Common method between AutoPolicy, OnWrite and Mirrored variables. +def assign_sub(var, value, use_locking=False, name=None, read_value=True): + assign_sub_fn = _make_raw_assign_fn( + gen_resource_variable_ops.assign_sub_variable_op) + return var._update( # pylint: disable=protected-access + update_fn=assign_sub_fn, + value=value, + use_locking=use_locking, + name=name, + read_value=read_value) + + +def assign_add(var, value, use_locking=False, name=None, read_value=True): + assign_add_fn = _make_raw_assign_fn( + gen_resource_variable_ops.assign_add_variable_op) + return var._update( # pylint: disable=protected-access + update_fn=assign_add_fn, + value=value, + use_locking=use_locking, + name=name, + read_value=read_value) + + +def assign(var, value, use_locking=False, name=None, read_value=True): + assign_fn = _make_raw_assign_fn( + gen_resource_variable_ops.assign_variable_op) + return var._update( # pylint: disable=protected-access + update_fn=assign_fn, + value=value, + use_locking=use_locking, + name=name, + read_value=read_value) + + +class TPUAutoPolicy(values.AutoPolicy): + """Policy defined for `tf.VariableSynchronization.AUTO` synchronization. + + This policy is created when `synchronization` is set to + `tf.VariableSynchronization.AUTO` and `aggregation` is set to + `tf.VariableAggregation.NONE` when creating a `tf.Variable` in `tf.distribute` + scope. + """ + + def assign_sub(self, var, value, use_locking=False, name=None, + read_value=True): + if enclosing_tpu_context(): + return _make_raw_assign_fn( + gen_resource_variable_ops.assign_sub_variable_op)( + var, + value=value, + use_locking=use_locking, + name=name, + read_value=read_value) + return assign_sub(var, value, use_locking=use_locking, name=name, + read_value=read_value) + + def assign_add(self, var, value, use_locking=False, name=None, + read_value=True): + if enclosing_tpu_context(): + return _make_raw_assign_fn( + gen_resource_variable_ops.assign_add_variable_op)( + var, + value=value, + use_locking=use_locking, + name=name, + read_value=read_value) + return assign_add(var, value, use_locking=use_locking, name=name, + read_value=read_value) + + def assign(self, var, value, use_locking=False, name=None, read_value=True): + if enclosing_tpu_context(): + return _make_raw_assign_fn( + gen_resource_variable_ops.assign_variable_op)( + var, + value=value, + use_locking=use_locking, + name=name, + read_value=read_value) + return assign(var, value, use_locking=use_locking, name=name, + read_value=read_value) + + def scatter_sub(self, *args, **kwargs): + raise NotImplementedError + + def scatter_add(self, *args, **kwargs): + raise NotImplementedError + + def scatter_max(self, *args, **kwargs): + raise NotImplementedError + + def scatter_min(self, *args, **kwargs): + raise NotImplementedError + + def scatter_mul(self, *args, **kwargs): + raise NotImplementedError + + def scatter_div(self, *args, **kwargs): + raise NotImplementedError + + def scatter_update(self, *args, **kwargs): + raise NotImplementedError + + def _is_mirrored(self): + return True + + +class TPUOnWritePolicy(values.OnWritePolicy): + """Policy defined for `tf.VariableSynchronization.ON_WRITE` synchronization. + + This policy is created when the following `synchronization` and + `aggregation` parameters are specified when creating a `tf.Variable` in + `tf.distribute` scope: + * `synchronization` is equal to `tf.VariableSynchronization.AUTO` and + aggregation can be any of the following `tf.VariableAggregation` enum + values such as `SUM`, `MEAN` or `ONLY_FIRST_REPLICA`. + * `synchronization` is equal to `tf.VariableSynchronization.ON_WRITE` and + aggregation can be any of the following `tf.VariableAggregation` enum + values such as `NONE`, `SUM`, `MEAN` or `ONLY_FIRST_REPLICA`. + """ + + def assign_sub(self, var, value, use_locking=False, name=None, + read_value=True): + return assign_sub(var, value, use_locking=use_locking, name=name, + read_value=read_value) + + def assign_add(self, var, value, use_locking=False, name=None, + read_value=True): + return assign_add(var, value, use_locking=use_locking, name=name, + read_value=read_value) + + def assign(self, var, value, use_locking=False, name=None, read_value=True): + return assign(var, value, use_locking=use_locking, name=name, + read_value=read_value) + + def scatter_sub(self, *args, **kwargs): + raise NotImplementedError + + def scatter_add(self, *args, **kwargs): + raise NotImplementedError + + def scatter_max(self, *args, **kwargs): + raise NotImplementedError + + def scatter_min(self, *args, **kwargs): + raise NotImplementedError + + def scatter_mul(self, *args, **kwargs): + raise NotImplementedError + + def scatter_div(self, *args, **kwargs): + raise NotImplementedError + + def scatter_update(self, *args, **kwargs): + raise NotImplementedError + + def _is_mirrored(self): + return True + + +class TPUOnReadPolicy(values.OnReadPolicy): + """Policy defined for `tf.VariableSynchronization.ON_READ` synchronization. + + This policy is created when `synchronization` is set to + `tf.VariableSynchronization.ON_READ` and `aggregation` is set to any of the + values allowed by the `tf.VariableAggregation` enum such as `NONE`, `SUM`, + `MEAN` or `ONLY_FIRST_REPLICA`when creating a `tf.Variable` in `tf.distribute` + scope. + """ + + def assign_sub(self, var, *args, **kwargs): + if enclosing_tpu_context() is None: + return super(TPUOnReadPolicy, self).assign_sub(var, *args, **kwargs) + else: + return _make_raw_assign_fn( + gen_resource_variable_ops.assign_sub_variable_op)(var, *args, + **kwargs) + + def assign_add(self, var, *args, **kwargs): + if enclosing_tpu_context() is None: + return super(TPUOnReadPolicy, self).assign_add(var, *args, **kwargs) + else: + return _make_raw_assign_fn( + gen_resource_variable_ops.assign_add_variable_op)(var, *args, + **kwargs) + + def assign(self, var, *args, **kwargs): + if enclosing_tpu_context() is None: + return super(TPUOnReadPolicy, self).assign(var, *args, **kwargs) + else: + return _make_raw_assign_fn(gen_resource_variable_ops.assign_variable_op)( + var, *args, **kwargs) + + def _is_mirrored(self): + return False + + def scatter_sub(self, *args, **kwargs): + raise NotImplementedError + + def scatter_add(self, *args, **kwargs): + raise NotImplementedError + + def scatter_max(self, *args, **kwargs): + raise NotImplementedError + + def scatter_min(self, *args, **kwargs): + raise NotImplementedError + + def scatter_mul(self, *args, **kwargs): + raise NotImplementedError + + def scatter_div(self, *args, **kwargs): + raise NotImplementedError + + def scatter_update(self, *args, **kwargs): + raise NotImplementedError diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py index 50cd8d73e73..7dedbee2041 100644 --- a/tensorflow/python/distribute/values.py +++ b/tensorflow/python/distribute/values.py @@ -700,49 +700,49 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, def scatter_sub(self, sparse_delta, use_locking=False, name=None): if self._policy: - self._policy.scatter_sub( + return self._policy.scatter_sub( self, sparse_delta, use_locking=use_locking, name=name) return values_util.scatter_sub( self, sparse_delta, use_locking=use_locking, name=name) def scatter_add(self, sparse_delta, use_locking=False, name=None): if self._policy: - self._policy.scatter_add( + return self._policy.scatter_add( self, sparse_delta, use_locking=use_locking, name=name) return values_util.scatter_add( self, sparse_delta, use_locking=use_locking, name=name) def scatter_mul(self, sparse_delta, use_locking=False, name=None): if self._policy: - self._policy.scatter_mul( + return self._policy.scatter_mul( self, sparse_delta, use_locking=use_locking, name=name) return values_util.scatter_mul( self, sparse_delta, use_locking=use_locking, name=name) def scatter_div(self, sparse_delta, use_locking=False, name=None): if self._policy: - self._policy.scatter_div( + return self._policy.scatter_div( self, sparse_delta, use_locking=use_locking, name=name) return values_util.scatter_div( self, sparse_delta, use_locking=use_locking, name=name) def scatter_min(self, sparse_delta, use_locking=False, name=None): if self._policy: - self._policy.scatter_min( + return self._policy.scatter_min( self, sparse_delta, use_locking=use_locking, name=name) return values_util.scatter_min( self, sparse_delta, use_locking=use_locking, name=name) def scatter_max(self, sparse_delta, use_locking=False, name=None): if self._policy: - self._policy.scatter_max( + return self._policy.scatter_max( self, sparse_delta, use_locking=use_locking, name=name) return values_util.scatter_max( self, sparse_delta, use_locking=use_locking, name=name) def scatter_update(self, sparse_delta, use_locking=False, name=None): if self._policy: - self._policy.scatter_update( + return self._policy.scatter_update( self, sparse_delta, use_locking=use_locking, name=name) return values_util.scatter_update( self, sparse_delta, use_locking=use_locking, name=name) diff --git a/tensorflow/python/distribute/values_test.py b/tensorflow/python/distribute/values_test.py index 1c090737d8f..e445c1195be 100644 --- a/tensorflow/python/distribute/values_test.py +++ b/tensorflow/python/distribute/values_test.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function import copy -import itertools import os from absl.testing import parameterized @@ -30,14 +29,12 @@ from tensorflow.python import tf2 from tensorflow.python.distribute import combinations from tensorflow.python.distribute import distribute_lib from tensorflow.python.distribute import distribute_utils -from tensorflow.python.distribute import distribution_strategy_context from tensorflow.python.distribute import packed_distributed_variable as packed from tensorflow.python.distribute import strategy_combinations from tensorflow.python.distribute import test_util as ds_test_util from tensorflow.python.distribute import tpu_strategy from tensorflow.python.distribute import tpu_values from tensorflow.python.distribute import values as values_lib -from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver from tensorflow.python.eager import context from tensorflow.python.eager import def_function from tensorflow.python.eager import test @@ -51,19 +48,56 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib from tensorflow.python.saved_model import save_context from tensorflow.python.saved_model import save_options -from tensorflow.python.tpu import tpu_strategy_util from tensorflow.python.training import saver as saver_lib from tensorflow.python.training.tracking import util as trackable_utils from tensorflow.python.types import core from tensorflow.python.util import nest +def _device_str(d): + return "/device:GPU:" + str(d) + + +def _nested_value(d): + return ("a" + d, ["b" + d, {"c": "d" + d, "e": "f" + d}, "g" + d], "h" + d) + + +def _make_mirrored_val(init_val=5.0): + v = [] + devices = ["/device:GPU:0", "/device:CPU:0"] + for d, _ in zip(devices, ["v", "v/replica"]): + with ops.device(d): + v.append(constant_op.constant(init_val)) + return values_lib.Mirrored(v) + + +def _make_mirrored(): + v = [] + devices = ["/device:GPU:0", "/device:CPU:0"] + for d, n, init in zip(devices, ["v", "v/replica"], [1., 2.]): + with ops.device(d): + v.append(variable_scope.get_variable( + name=n, initializer=init, use_resource=True)) + mirrored = values_lib.MirroredVariable( + None, v, variable_scope.VariableAggregation.SUM) + return mirrored + + +def mirrored_and_tpu_strategy_combinations(): + return combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + strategy_combinations.tpu_strategy, + strategy_combinations.tpu_strategy_packed_var, + ], + mode=["graph", "eager"]) + + class DistributedValuesTest(test.TestCase, parameterized.TestCase): def testGetEager(self): @@ -363,45 +397,6 @@ class DistributedDelegateTest(test.TestCase): self.assertEqual(v.x, v_deep_copy.x) -def _device_str(d): - return "/device:GPU:" + str(d) - - -def _nested_value(d): - return ("a" + d, ["b" + d, {"c": "d" + d, "e": "f" + d}, "g" + d], "h" + d) - - -def _make_mirrored_val(init_val=5.0): - v = [] - devices = ["/device:GPU:0", "/device:CPU:0"] - for d, _ in zip(devices, ["v", "v/replica"]): - with ops.device(d): - v.append(constant_op.constant(init_val)) - return values_lib.Mirrored(v) - - -def _make_mirrored(): - v = [] - devices = ["/device:GPU:0", "/device:CPU:0"] - for d, n, init in zip(devices, ["v", "v/replica"], [1., 2.]): - with ops.device(d): - v.append(variable_scope.get_variable( - name=n, initializer=init, use_resource=True)) - mirrored = values_lib.MirroredVariable( - None, v, variable_scope.VariableAggregation.SUM) - return mirrored - - -def mirrored_and_tpu_strategy_combinations(): - return combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.tpu_strategy, - strategy_combinations.tpu_strategy_packed_var, - ], - mode=["graph", "eager"]) - - @combinations.generate( combinations.combine( distribution=[ @@ -796,507 +791,6 @@ class MirroredVariableTest(test.TestCase, parameterized.TestCase): save_path = self._save_normal() self._restore_mirrored(save_path) - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_one_gpu, - ], - mode=["graph"])) - def testFetchAMirroredVariable(self, distribution): - with self.session(graph=ops.Graph()) as sess, distribution.scope(): - with ops.device("/device:GPU:0"): - v = variable_scope.get_variable( - name="v", initializer=1., use_resource=True) - mirrored = values_lib.MirroredVariable( - distribution, (v,), variable_scope.VariableAggregation.MEAN) - sess.run(variables_lib.global_variables_initializer()) - sess.run({"complicated": mirrored}) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.tpu_strategy, - strategy_combinations.tpu_strategy_packed_var, - ], - mode=["eager"])) - def testAssignValueInReplicaContextWithoutAggregation(self, distribution): - with distribution.scope(): - v = variables_lib.Variable(1.0, name="foo") - - @def_function.function - def mytest(): - def model_fn(): - v.assign(5.0) - return v.read_value() - - return distribution.run(model_fn) - - mytest() - self.assertAllEqual([5.0, 5.0], self.evaluate(v.values)) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_one_cpu, - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.tpu_strategy, - strategy_combinations.tpu_strategy_packed_var, - ], - mode=["graph", "eager"])) - def testValueInReplicaContext(self, distribution): - with distribution.scope(): - v = variables_lib.Variable( - 1., aggregation=variables_lib.VariableAggregation.MEAN) - self.evaluate(variables_lib.global_variables_initializer()) - - @def_function.function - def f(): - with ops.control_dependencies([v.assign_add(1.)]): - return v.value() - - results = self.evaluate( - distribution.experimental_local_results( - distribution.run(f))) - for value in results: - self.assertEqual(2., value) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_one_cpu, - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.tpu_strategy, - strategy_combinations.tpu_strategy_packed_var, - ], - mode=["graph", "eager"])) - def testAssignOutOfScope(self, distribution): - with distribution.scope(): - mirrored = variables_lib.Variable(1.) - self.evaluate(mirrored.assign(3.)) - self.assertEqual(self.evaluate(mirrored.read_value()), 3.) - for component in mirrored.values: - self.assertEqual(self.evaluate(component.read_value()), 3.) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=["graph", "eager"])) - def testAssignAggregationMeanDTypeNonFloat(self, distribution): - with distribution.scope(): - v = variables_lib.Variable( - 1, - aggregation=variable_scope.VariableAggregation.MEAN, - dtype=dtypes.int32) - self.evaluate(v.initializer) - - @def_function.function - def assign(): - ctx = distribution_strategy_context.get_replica_context() - return v.assign(ctx.replica_id_in_sync_group) - - # disallow assign() with distributed value in replica context. - with self.assertRaisesRegex(ValueError, - "Cannot update non-float variables"): - self.evaluate( - distribution.experimental_local_results( - distribution.run(assign))) - - # allow assign() with same value in replica context. - @def_function.function - def assign_same(): - return v.assign(2) - - self.evaluate( - distribution.experimental_local_results( - distribution.run(assign_same))) - self.assertEqual(self.evaluate(v.read_value()), 2) - - # allow assign() with mirrored variable in replica context. - with distribution.scope(): - v2 = variables_lib.Variable( - 3, - aggregation=variable_scope.VariableAggregation.SUM, - dtype=dtypes.int32) - self.evaluate(v2.initializer) - - @def_function.function - def assign_mirrored(): - return v.assign(v2) - - self.evaluate( - distribution.experimental_local_results( - distribution.run(assign_mirrored))) - self.assertEqual(self.evaluate(v.read_value()), 3) - - # allow assign() in cross replica context. - with distribution.scope(): - self.evaluate(v.assign(4)) - self.assertEqual(self.evaluate(v.read_value()), 4) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.tpu_strategy, - strategy_combinations.tpu_strategy_packed_var, - ], - mode=["eager"])) - def testInitializedToSameValueInsideEagerRun(self, distribution): - v = [None] - - @def_function.function - def step(): - - def f(): - if v[0] is None: - v[0] = variables_lib.Variable(random_ops.random_normal([])) - - distribution.run(f) - - context.set_global_seed(None) - step() - vals = self.evaluate(v[0].values) - self.assertAllEqual(vals[0], vals[1]) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_one_cpu, - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.tpu_strategy, - strategy_combinations.tpu_strategy_packed_var, - ], - mode=["graph", "eager"])) - def testAggregationOnlyFirstReplica(self, distribution): - with distribution.scope(): - v = variable_scope.variable( - 15., - synchronization=variables_lib.VariableSynchronization.ON_WRITE, - aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) - self.evaluate(variables_lib.global_variables_initializer()) - - @def_function.function - def assign(): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - return v.assign(math_ops.cast(replica_id, dtypes.float32)) - per_replica_results = self.evaluate(distribution.experimental_local_results( - distribution.run(assign))) - # The per-replica values should always match the first replicas value. - self.assertAllEqual( - array_ops.zeros(distribution.num_replicas_in_sync, dtypes.float32), - per_replica_results) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.tpu_strategy, - strategy_combinations.tpu_strategy_packed_var, - ], - mode=["eager"])) - def testInitScope(self, distribution): - - class C(object): - pass - - obj = C() - obj.w = None - obj.v = None - - @def_function.function - def assign(): - with ops.init_scope(): - if obj.w is None: - obj.w = variables_lib.Variable( - 0, aggregation=variables_lib.VariableAggregation.MEAN) - obj.v = variables_lib.Variable( - obj.w.read_value(), - aggregation=variables_lib.VariableAggregation.MEAN) - - return obj.v.assign_add(2) - - per_replica_results = self.evaluate( - distribution.experimental_local_results(distribution.run(assign))) - self.assertAllEqual([2, 2], per_replica_results) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.tpu_strategy, - ], - mode=["eager"])) - def testOperatorOverride(self, distribution): - - with distribution.scope(): - v = variable_scope.variable( - 1, aggregation=variables_lib.VariableAggregation.MEAN) - - self.assertEqual(2, self.evaluate(v + 1)) - - @def_function.function - def add(): - return v + 1 - - per_replica_results = self.evaluate( - distribution.experimental_local_results(distribution.run(add))) - self.assertAllEqual([2, 2], per_replica_results) - - @combinations.generate(mirrored_and_tpu_strategy_combinations()) - def testAssignAdd(self, distribution): - with distribution.scope(): - v = variable_scope.variable( - 1, aggregation=variables_lib.VariableAggregation.MEAN) - self.evaluate(variables_lib.global_variables_initializer()) - - @def_function.function - def assign(): - return v.assign_add(2) - - per_replica_results = self.evaluate( - distribution.experimental_local_results(distribution.run(assign))) - # The per-replica values should always match the first replicas value. - self.assertAllEqual([3, 3], per_replica_results) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=["graph", "eager"])) - def testScatterSub(self, distribution): - with distribution.scope(): - v = variables_lib.Variable( - [0., 0., 0.], aggregation=variables_lib.VariableAggregation.MEAN) - self.evaluate(v.initializer) - - @def_function.function - def scatter_sub(): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - value = indexed_slices.IndexedSlices( - values=array_ops.stack([ - math_ops.cast(replica_id, dtypes.float32), - math_ops.cast(replica_id + 1, dtypes.float32) - ]), - indices=array_ops.stack([replica_id, replica_id + 1]), - dense_shape=(3,)) - return v.scatter_sub(value) - - per_replica_results = self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_sub))) - self.assertAllEqual([[0., -1., -1.], [0., -1., -1.]], per_replica_results) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=["graph", "eager"])) - def testScatterAdd(self, distribution): - with distribution.scope(): - v = variables_lib.Variable( - [0, 0, 0], aggregation=variables_lib.VariableAggregation.SUM) - self.evaluate(v.initializer) - - @def_function.function - def scatter_add(): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - value = indexed_slices.IndexedSlices( - values=array_ops.stack([replica_id, replica_id + 1]), - indices=array_ops.stack([replica_id, replica_id + 1]), - dense_shape=(3,)) - return v.scatter_add(value) - - per_replica_results = self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_add))) - self.assertAllEqual([[0, 2, 2], [0, 2, 2]], per_replica_results) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=["graph", "eager"])) - def testScatterDiv(self, distribution): - with distribution.scope(): - v = variables_lib.Variable( - [1, 6, 1], aggregation=variables_lib.VariableAggregation.SUM) - self.evaluate(v.initializer) - - @def_function.function - def scatter_div(): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - value = indexed_slices.IndexedSlices( - values=array_ops.reshape(replica_id + 2, [1]), - indices=array_ops.reshape(replica_id, [1]), - dense_shape=(3,)) - return v.scatter_div(value) - - per_replica_results = self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_div))) - self.assertAllEqual([[0, 2, 1], [0, 2, 1]], per_replica_results) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=["graph", "eager"])) - def testScatterMul(self, distribution): - with distribution.scope(): - v = variables_lib.Variable( - [2., 1., 1.], aggregation=variables_lib.VariableAggregation.MEAN) - self.evaluate(v.initializer) - - @def_function.function - def scatter_mul(): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - value = indexed_slices.IndexedSlices( - values=array_ops.reshape( - math_ops.cast(replica_id + 2, dtypes.float32), [1]), - indices=array_ops.reshape(replica_id, [1]), - dense_shape=(3,)) - return v.scatter_mul(value) - - per_replica_results = self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_mul))) - self.assertAllClose([[2., 1.5, 1.], [2., 1.5, 1.]], per_replica_results) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=["graph", "eager"])) - def testScatterMin(self, distribution): - with distribution.scope(): - v1 = variables_lib.Variable( - [0, 2, 0], aggregation=variables_lib.VariableAggregation.SUM) - v2 = variables_lib.Variable( - [0, 2, 0], - aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) - self.evaluate(variables_lib.global_variables_initializer()) - - @def_function.function - def scatter_min(v): - value = indexed_slices.IndexedSlices( - values=array_ops.identity([1]), - indices=array_ops.identity([1]), - dense_shape=(3,)) - return v.scatter_min(value) - - with self.assertRaisesRegex(NotImplementedError, "scatter_min.*"): - self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_min, args=(v1,)))) - - per_replica_results = self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_min, args=(v2,)))) - self.assertAllClose([[0, 1, 0], [0, 1, 0]], per_replica_results) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=["graph", "eager"])) - def testScatterMax(self, distribution): - with distribution.scope(): - v1 = variables_lib.Variable( - [0, 0, 0], aggregation=variables_lib.VariableAggregation.SUM) - v2 = variables_lib.Variable( - [0, 0, 0], - aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) - self.evaluate(variables_lib.global_variables_initializer()) - - @def_function.function - def scatter_max(v): - value = indexed_slices.IndexedSlices( - values=array_ops.identity([1]), - indices=array_ops.identity([0]), - dense_shape=(3,)) - return v.scatter_max(value) - - with self.assertRaisesRegex(NotImplementedError, "scatter_max.*"): - self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_max, args=(v1,)))) - - per_replica_results = self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_max, args=(v2,)))) - self.assertAllClose([[1, 0, 0], [1, 0, 0]], per_replica_results) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=["graph", "eager"])) - def testScatterUpdate(self, distribution): - with distribution.scope(): - v1 = variables_lib.Variable( - [0, 0, 0], aggregation=variables_lib.VariableAggregation.SUM) - v2 = variables_lib.Variable( - [0, 0, 0], - aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) - self.evaluate(variables_lib.global_variables_initializer()) - - @def_function.function - def scatter_update(v): - value = indexed_slices.IndexedSlices( - values=array_ops.identity([3]), - indices=array_ops.identity([1]), - dense_shape=(3,)) - return v.scatter_update(value) - - with self.assertRaisesRegex(NotImplementedError, "scatter_update.*"): - self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_update, args=(v1,)))) - - per_replica_results = self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_update, args=(v2,)))) - self.assertAllClose([[0, 3, 0], [0, 3, 0]], per_replica_results) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=["graph", "eager"])) - def testScatterOpsInCrossReplicaContext(self, distribution): - with distribution.scope(): - v1 = variables_lib.Variable( - [1, 1, 1], aggregation=variables_lib.VariableAggregation.SUM) - v2 = variables_lib.Variable([1, 1, 1]) - self.evaluate(variables_lib.global_variables_initializer()) - - value = indexed_slices.IndexedSlices( - values=array_ops.identity([2]), - indices=array_ops.identity([0]), - dense_shape=(3,)) - with distribution.scope(): - self.evaluate(v1.scatter_add(value)) - self.assertAllEqual([3, 1, 1], self.evaluate(v1.read_value())) - - self.evaluate(v2.scatter_min(value)) - self.assertAllEqual([1, 1, 1], self.evaluate(v2.read_value())) - _TPU_STRATEGIES = (tpu_strategy.TPUStrategy, tpu_strategy.TPUStrategyV1) @@ -1321,38 +815,6 @@ def _make_replica_local(method, strategy=None): return v, replica_local -class SyncOnReadVariablePropertiesTest(test.TestCase): - - config = config_pb2.ConfigProto() - config.allow_soft_placement = True - - @test_util.run_in_graph_and_eager_modes(config=config) - def testProperties(self): - if context.num_gpus() < 1 and context.executing_eagerly(): - self.skipTest("A GPU is not available for this test in eager mode.") - v, replica_local = _make_replica_local( - variable_scope.VariableAggregation.SUM) - - self.assertEqual(v[0].constraint, replica_local.constraint) - self.assertEqual(v[0].name, replica_local.name) - self.assertEqual(v[0].dtype, replica_local.dtype) - self.assertEqual(v[0].shape, replica_local.shape) - self.assertEqual(variable_scope.VariableAggregation.SUM, - replica_local.aggregation) - - @test_util.run_v2_only - def testCanPassToDefFun(self): - @def_function.function - def add1(x): - return x + 1 - - v = variable_scope.get_variable( - name="v", initializer=[1.], use_resource=True) - replica_local = values_lib.SyncOnReadVariable( - None, (v,), variable_scope.VariableAggregation.MEAN) - self.assertEqual(2., self.evaluate(add1(replica_local))) - - # TODO(b/144432582): Add variable aggregation type to combinations to simplify # tests. def strategy_and_run_tf_function_combinations(): @@ -1389,6 +851,35 @@ class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase): save_path, _ = self._save_return_saver(sess, var) return save_path + config = config_pb2.ConfigProto() + config.allow_soft_placement = True + + @test_util.run_in_graph_and_eager_modes(config=config) + def testProperties(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + v, replica_local = _make_replica_local( + variable_scope.VariableAggregation.SUM) + + self.assertEqual(v[0].constraint, replica_local.constraint) + self.assertEqual(v[0].name, replica_local.name) + self.assertEqual(v[0].dtype, replica_local.dtype) + self.assertEqual(v[0].shape, replica_local.shape) + self.assertEqual(variable_scope.VariableAggregation.SUM, + replica_local.aggregation) + + @test_util.run_v2_only + def testCanPassToDefFun(self): + @def_function.function + def add1(x): + return x + 1 + + v = variable_scope.get_variable( + name="v", initializer=[1.], use_resource=True) + replica_local = values_lib.SyncOnReadVariable( + None, (v,), variable_scope.VariableAggregation.MEAN) + self.assertEqual(2., self.evaluate(add1(replica_local))) + @combinations.generate(mirrored_and_tpu_strategy_combinations()) def testTensorConversion(self, distribution): with context.graph_mode(): @@ -1585,453 +1076,6 @@ class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase): save_path = self._save_normal() self._restore_replica_local_sum(save_path, distribution) - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testAssign(self, distribution, experimental_run_tf_function): - - def assign(fn, v, update_value, cross_replica): - update_fn = lambda: getattr(v, fn)(update_value) - if cross_replica: - return update_fn() - else: - if experimental_run_tf_function: - update_fn = def_function.function(update_fn) - return distribution.experimental_local_results( - distribution.run(update_fn)) - - updates = [("assign", 1.), ("assign_add", 1.), ("assign_sub", -1.)] - aggregations = [ - variables_lib.VariableAggregation.NONE, - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - options = list( - x for x in itertools.product(updates, aggregations, [True, False])) - for update, aggregation, cross_replica in options: - # VariableAggregation.SUM in cross-replica mode is tested below, - # VariableAggregation.NONE in cross-replica mode is not supported. - if cross_replica and aggregation in [ - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.NONE, - ]: - continue - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - fn, update_value = update - self.evaluate(assign(fn, v, update_value, cross_replica)) - for component in v._values: - self.assertAllEqual(self.evaluate(component.read_value()), - self.evaluate(array_ops.ones_like(component))) - - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testAssignDtypeConversion(self, distribution, - experimental_run_tf_function): - - def assign(fn, v, update_value, cross_replica): - update_fn = lambda: getattr(v, fn)(update_value) - if cross_replica: - return update_fn() - else: - if experimental_run_tf_function: - update_fn = def_function.function(update_fn) - return distribution.experimental_local_results( - distribution.run(update_fn)) - - updates = [("assign", 1), ("assign_add", 1), ("assign_sub", -1)] - aggregations = [ - variables_lib.VariableAggregation.NONE, - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - options = list( - x for x in itertools.product(updates, aggregations, [True, False])) - for update, aggregation, cross_replica in options: - # VariableAggregation.SUM in cross-replica mode is tested below, - # VariableAggregation.NONE in cross-replica mode is not supported. - if cross_replica and aggregation in [ - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.NONE, - ]: - continue - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - fn, update_value = update - self.evaluate(assign(fn, v, update_value, cross_replica)) - for component in v._values: - self.assertAllEqual(self.evaluate(component.read_value()), - self.evaluate(array_ops.ones_like(component))) - - @combinations.generate(mirrored_and_tpu_strategy_combinations()) - def testAssignWithAggregationSum(self, distribution): - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=variables_lib.VariableAggregation.SUM) - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(v.assign(1. * distribution.num_replicas_in_sync)) - for component in v._values: - self.assertAllEqual(self.evaluate(component.read_value()), - self.evaluate(array_ops.ones_like(component))) - - @combinations.generate(mirrored_and_tpu_strategy_combinations()) - def testAssignAddSubWithAggregationSum(self, distribution): - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=variables_lib.VariableAggregation.SUM) - self.evaluate(variables_lib.global_variables_initializer()) - with self.assertRaisesRegex( - ValueError, "SyncOnReadVariable does not support "): - self.evaluate(v.assign_add(1.)) - with self.assertRaisesRegex( - ValueError, "SyncOnReadVariable does not support "): - self.evaluate(v.assign_sub(1.)) - - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testReadValueInReplicaContext(self, distribution, - experimental_run_tf_function): - aggregations = [ - variables_lib.VariableAggregation.NONE, - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - for aggregation in aggregations: - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - if experimental_run_tf_function: - read_var_fn = def_function.function(v.read_value) - else: - read_var_fn = v.read_value - results = self.evaluate( - distribution.experimental_local_results( - distribution.run(read_var_fn))) - for component, value in zip(v._values, results): - self.assertAllEqual(self.evaluate(component.read_value()), value) - - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testReadValueInCrossReplicaContext(self, distribution, - experimental_run_tf_function): - aggregations = [ - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - for aggregation in aggregations: - if isinstance(distribution, _TPU_STRATEGIES): - resolver = tpu_cluster_resolver.TPUClusterResolver("") - tpu_strategy_util.initialize_tpu_system(resolver) - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - - def assign(v=v): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - return v.assign(math_ops.cast(replica_id, dtypes.float32)) - - if experimental_run_tf_function: - assign = def_function.function(assign) - - self.evaluate( - distribution.experimental_local_results(distribution.run(assign))) - num_replicas = distribution.num_replicas_in_sync - sum_of_replica_values = num_replicas * (num_replicas - 1) / 2. - if aggregation == variables_lib.VariableAggregation.SUM: - expected = sum_of_replica_values - elif aggregation == variables_lib.VariableAggregation.MEAN: - expected = sum_of_replica_values / num_replicas - else: - expected = 0 - self.assertEqual(expected, self.evaluate(v.read_value()), aggregation) - self.assertEqual(expected, self.evaluate(v.value()), aggregation) - self.assertEqual(expected, self.evaluate(v), aggregation) - self.assertEqual(expected, self.evaluate(array_ops.identity(v)), - aggregation) - - # TODO(b/145574622): Re-enable this test once ReduceOp argument is - # respected on GPUs. - @combinations.generate(strategy_and_run_tf_function_combinations()) - def disable_testAllReduce(self, distribution, - experimental_run_tf_function): - with distribution.scope(): - v = variable_scope.variable( - 2., - synchronization=variables_lib.VariableSynchronization.ON_WRITE, - aggregation=variables_lib.VariableAggregation.MEAN) - self.evaluate(variables_lib.global_variables_initializer()) - - def all_reduce(): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - return ctx.all_reduce("SUM", v) + math_ops.cast(replica_id, - dtypes.float32) - - if experimental_run_tf_function: - all_reduce = def_function.function(all_reduce) - - per_replica_results = self.evaluate( - distribution.experimental_local_results(distribution.run(all_reduce))) - expected_result = [] - for i in range(distribution.num_replicas_in_sync): - expected_result.append(2.0 * distribution.num_replicas_in_sync + - 1.0 * i) - self.assertEqual(per_replica_results, tuple(expected_result)) - - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testAssignPerReplicaBeforeRead(self, distribution, - experimental_run_tf_function): - aggregations = [ - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - for aggregation in aggregations: - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - - def assign(var=v): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - return var.assign(math_ops.cast(replica_id, dtypes.float32)) - - if experimental_run_tf_function: - assign = def_function.function(assign) - - per_replica_results = self.evaluate( - distribution.experimental_local_results(distribution.run(assign))) - expected_result = [] - for i in range(distribution.num_replicas_in_sync): - expected_result.append(1.0 * i) - self.assertEqual(per_replica_results, tuple(expected_result)) - - @combinations.generate(mirrored_and_tpu_strategy_combinations()) - def testReadValueWithAggregationNoneInCrossReplicaContext(self, distribution): - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=variables_lib.VariableAggregation.NONE) - self.evaluate(variables_lib.global_variables_initializer()) - with self.assertRaisesRegex( - ValueError, "Could not convert from .* VariableAggregation\\.NONE"): - self.evaluate(v.read_value()) - - @combinations.generate(mirrored_and_tpu_strategy_combinations()) - def testInitializedToSameValueInsideEagerRun(self, distribution): - if not context.executing_eagerly(): self.skipTest("eager only") - - v = [None] - @def_function.function - def step(): - def f(): - if v[0] is None: - v[0] = variables_lib.Variable( - random_ops.random_normal([]), - synchronization=variables_lib.VariableSynchronization.ON_READ) - - distribution.run(f) - - context.set_global_seed(None) - step() - vals = self.evaluate(v[0].values) - self.assertAllEqual(vals[0], vals[1]) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.tpu_strategy, - ], - mode=["eager"])) - def testOperatorOverride(self, distribution): - - with distribution.scope(): - v = variable_scope.variable( - 0.0, - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=variables_lib.VariableAggregation.MEAN) - - @def_function.function - def assign(): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - return v.assign(math_ops.cast(replica_id, dtypes.float32)) - - # Assign different replicas with different values. - distribution.run(assign) - - self.assertEqual(1.5, self.evaluate(v + 1)) - - @def_function.function - def add(): - return v + 1 - - per_replica_results = self.evaluate( - distribution.experimental_local_results(distribution.run(add))) - self.assertAllEqual([1, 2], per_replica_results) - - -@combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - aggregation=[ - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ], - mode=["graph", "eager"])) -class SyncOnReadScatterReplicaTest(test.TestCase, parameterized.TestCase): - - def testScatterSub(self, distribution, aggregation): - with distribution.scope(): - v = variables_lib.Variable( - [1., 1., 1.], - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(v.initializer) - - delta = values_lib.PerReplica([ - indexed_slices.IndexedSlices( - values=[[0.], [1.]], indices=[0, 1], dense_shape=(3,)), - indexed_slices.IndexedSlices( - values=[[1.], [2.]], indices=[1, 2], dense_shape=(3,)), - ]) - - with self.assertRaises(NotImplementedError): - self.evaluate(distribution.run(v.scatter_sub, args=(delta,))) - - def testScatterAdd(self, distribution, aggregation): - with distribution.scope(): - v = variables_lib.Variable( - [1., 1., 1.], - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(v.initializer) - - delta = values_lib.PerReplica([ - indexed_slices.IndexedSlices( - values=[[0.], [1.]], indices=[0, 1], dense_shape=(3,)), - indexed_slices.IndexedSlices( - values=[[1.], [2.]], indices=[1, 2], dense_shape=(3,)), - ]) - - with self.assertRaises(NotImplementedError): - self.evaluate(distribution.run(v.scatter_add, args=(delta,))) - - def testScatterDiv(self, distribution, aggregation): - with distribution.scope(): - v = variables_lib.Variable( - [2., 6., 1.], - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(v.initializer) - - delta = values_lib.PerReplica([ - indexed_slices.IndexedSlices( - values=[[2.], [2.]], indices=[0, 1], dense_shape=(3,)), - indexed_slices.IndexedSlices( - values=[[3.], [3.]], indices=[1, 2], dense_shape=(3,)), - ]) - - with self.assertRaises(NotImplementedError): - self.evaluate(distribution.run(v.scatter_div, args=(delta,))) - - def testScatterMul(self, distribution, aggregation): - with distribution.scope(): - v = variables_lib.Variable( - [2., 1., 1.], - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(v.initializer) - - delta = values_lib.PerReplica([ - indexed_slices.IndexedSlices( - values=[[2.], [3.]], indices=[0, 1], dense_shape=(3,)), - indexed_slices.IndexedSlices( - values=[[4.], [5.]], indices=[1, 2], dense_shape=(3,)), - ]) - - with self.assertRaises(NotImplementedError): - self.evaluate(distribution.run(v.scatter_mul, args=(delta,))) - - def testScatterMin(self, distribution, aggregation): - with distribution.scope(): - v = variables_lib.Variable( - [3., 4., 5.], - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(v.initializer) - - delta = values_lib.PerReplica([ - indexed_slices.IndexedSlices( - values=[[1.], [8.]], indices=[0, 1], dense_shape=(3,)), - indexed_slices.IndexedSlices( - values=[[9.], [2.]], indices=[1, 2], dense_shape=(3,)), - ]) - - with self.assertRaises(NotImplementedError): - self.evaluate(distribution.run(v.scatter_min, args=(delta,))) - - def testScatterMax(self, distribution, aggregation): - with distribution.scope(): - v = variables_lib.Variable( - [3., 4., 5.], - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(v.initializer) - - delta = values_lib.PerReplica([ - indexed_slices.IndexedSlices( - values=[[1.], [8.]], indices=[0, 1], dense_shape=(3,)), - indexed_slices.IndexedSlices( - values=[[9.], [2.]], indices=[1, 2], dense_shape=(3,)), - ]) - - with self.assertRaises(NotImplementedError): - self.evaluate(distribution.run(v.scatter_max, args=(delta,))) - - def testScatterUpdate(self, distribution, aggregation): - with distribution.scope(): - v = variables_lib.Variable( - [0., 0., 0.], - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(v.initializer) - - delta = values_lib.PerReplica([ - indexed_slices.IndexedSlices( - values=[[1.], [2.]], indices=[0, 1], dense_shape=(3,)), - indexed_slices.IndexedSlices( - values=[[3.], [4.]], indices=[1, 2], dense_shape=(3,)), - ]) - - with self.assertRaises(NotImplementedError): - self.evaluate(distribution.run(v.scatter_min, args=(delta,))) - class MirroredTest(test.TestCase): diff --git a/tensorflow/python/distribute/vars_test.py b/tensorflow/python/distribute/vars_test.py new file mode 100644 index 00000000000..5866c0c8498 --- /dev/null +++ b/tensorflow/python/distribute/vars_test.py @@ -0,0 +1,1270 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the distributed values library.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import itertools + +from absl.testing import parameterized + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import distribution_strategy_context +from tensorflow.python.distribute import strategy_combinations +from tensorflow.python.distribute import tpu_strategy +from tensorflow.python.distribute import tpu_values +from tensorflow.python.distribute import values +from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver +from tensorflow.python.eager import context +from tensorflow.python.eager import def_function +from tensorflow.python.eager import test +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import indexed_slices +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables as variables_lib +from tensorflow.python.tpu import tpu_strategy_util + + +_TPU_STRATEGIES = (tpu_strategy.TPUStrategy, tpu_strategy.TPUStrategyV1) + + +def strategy_and_run_tf_function_combinations(): + # Test the combination of different strategies and whether a tf.function + # is passed into strategy.run.""" + return combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + ], + mode=["graph", "eager"], + experimental_run_tf_function=[True, False], + use_var_policy=[True, False]) + combinations.combine( + distribution=[ + strategy_combinations.tpu_strategy, + strategy_combinations.tpu_strategy_packed_var, + ], + mode=["graph", "eager"], + experimental_run_tf_function=[True], + use_var_policy=[True, False]) + + +def strategy_with_var_policy(): + return combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + strategy_combinations.tpu_strategy, + strategy_combinations.tpu_strategy_packed_var, + strategy_combinations.central_storage_strategy_with_two_gpus, + ], + mode=["graph", "eager"], + use_var_policy=[True, False]) + + +class OnWriteVariableSync(test.TestCase, parameterized.TestCase): + + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_one_gpu, + ], + mode=["graph"])) + def testFetchAMirroredVariable(self, distribution): + with self.session(graph=ops.Graph()) as sess, distribution.scope(): + with ops.device("/device:GPU:0"): + v = variable_scope.get_variable( + name="v", initializer=1., use_resource=True) + mirrored = values.MirroredVariable( + distribution, (v,), variable_scope.VariableAggregation.MEAN) + sess.run(variables_lib.global_variables_initializer()) + sess.run({"complicated": mirrored}) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testAssign(self, distribution, experimental_run_tf_function, + use_var_policy): + + def assign(fn, v, update_value, cross_replica): + update_fn = lambda: getattr(v, fn)(update_value) + if cross_replica: + return update_fn() + else: + if experimental_run_tf_function: + update_fn = def_function.function(update_fn) + return distribution.experimental_local_results( + distribution.run(update_fn)) + + updates = [("assign", 1.), ("assign_add", 1.), ("assign_sub", -1.)] + aggregations = [ + variables_lib.VariableAggregation.NONE, + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + options = list( + x for x in itertools.product(updates, aggregations, [True, False])) + for update, aggregation, cross_replica in options: + # assign in replica context with SUM does not make sense cause you can + # just do value * num replicas error is 1. is not a distributed value and + # is unsupported for aggregation SUM + if (not cross_replica and aggregation == + variables_lib.VariableAggregation.SUM): + continue + with distribution.scope(): + v = variable_scope.variable( + 0., + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + fn, update_value = update + self.evaluate(assign(fn, v, update_value, cross_replica)) + for component in v._values: + self.assertAllEqual(self.evaluate(component.read_value()), + self.evaluate(array_ops.ones_like(component))) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testAssignOnWriteVar(self, distribution, experimental_run_tf_function, + use_var_policy): + + with distribution.scope(): + v_to_assign = variable_scope.variable( + 2., aggregation=variables_lib.VariableAggregation.MEAN) + v_to_assign_sub = variable_scope.variable( + -2., aggregation=variables_lib.VariableAggregation.MEAN) + + def assign(fn, v, update_value, cross_replica): + update_fn = lambda: getattr(v, fn)(update_value) + if cross_replica: + return update_fn() + else: + if experimental_run_tf_function: + update_fn = def_function.function(update_fn) + return distribution.experimental_local_results( + distribution.run(update_fn)) + + updates = [("assign", v_to_assign), ("assign_add", v_to_assign), + ("assign_sub", v_to_assign_sub)] + aggregations = [ + variables_lib.VariableAggregation.NONE, + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + options = list( + x for x in itertools.product(updates, aggregations, [True, False])) + for update, aggregation, cross_replica in options: + # assign in replica context with SUM does not make sense cause you can + # just do value * num replicas error is 1. is not a distributed value and + # is unsupported for aggregation SUM + if aggregation == variables_lib.VariableAggregation.SUM: + continue + with distribution.scope(): + v = variable_scope.variable( + 0., + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + fn, update_value = update + self.evaluate(assign(fn, v, update_value, cross_replica)) + for component in v._values: + self.assertAllEqual(2.0, self.evaluate(component.read_value())) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testAssignPerReplicaVal(self, distribution, experimental_run_tf_function, + use_var_policy): + + if isinstance(distribution, _TPU_STRATEGIES): + self.skipTest("Assigning PerReplica values is not supported. See" + " sponge/80ba41f8-4220-4516-98ce-bbad48f9f11a.") + + with distribution.scope(): + per_replica_value = values.PerReplica( + [constant_op.constant(2.0), + constant_op.constant(2.0)]) + per_replica_sub_value = values.PerReplica( + [constant_op.constant(-2.0), + constant_op.constant(-2.0)]) + + def assign(fn, v, update_value, cross_replica): + update_fn = lambda: getattr(v, fn)(update_value) + if cross_replica: + return update_fn() + else: + if experimental_run_tf_function: + update_fn = def_function.function(update_fn) + return distribution.experimental_local_results( + distribution.run(update_fn)) + + updates = [("assign", per_replica_value), ("assign_add", per_replica_value), + ("assign_sub", per_replica_sub_value)] + # We don't support assigning PerReplica valus to vars in replica context + # with aggregation=NONE. + aggregations = [ + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + options = list( + x for x in itertools.product(updates, aggregations, [True, False])) + for update, aggregation, cross_replica in options: + # assign in replica context with SUM does not make sense cause you can + # just do value * num replicas error is 1. is not a distributed value and + # is unsupported for aggregation SUM + if cross_replica: + # We don't support assigning PerReplica values to MirroredVariables in + # cross replica context + continue + with distribution.scope(): + v = variable_scope.variable( + 0., + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + fn, update_value = update + self.evaluate(assign(fn, v, update_value, cross_replica)) + if aggregation == variables_lib.VariableAggregation.SUM: + expected = 4.0 + else: + expected = 2.0 + for component in v._values: + self.assertAllEqual(expected, self.evaluate(component.read_value())) + + @combinations.generate(strategy_with_var_policy()) + def testValueInReplicaContext(self, distribution, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + 1., aggregation=variables_lib.VariableAggregation.MEAN) + self.evaluate(variables_lib.global_variables_initializer()) + + @def_function.function + def f(): + with ops.control_dependencies([v.assign_add(1.)]): + return v.value() + + results = self.evaluate( + distribution.experimental_local_results( + distribution.run(f))) + for value in results: + self.assertEqual(2., value) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testReadValueInReplicaContext(self, distribution, + experimental_run_tf_function, + use_var_policy): + aggregations = [ + variables_lib.VariableAggregation.NONE, + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + for aggregation in aggregations: + with distribution.scope(): + v = variable_scope.variable( + 0., + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + if experimental_run_tf_function: + read_var_fn = def_function.function(v.read_value) + else: + read_var_fn = v.read_value + results = self.evaluate( + distribution.experimental_local_results( + distribution.run(read_var_fn))) + for component, value in zip(v._values, results): + self.assertAllEqual(self.evaluate(component.read_value()), value) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testReadValueInCrossReplicaContext(self, distribution, + experimental_run_tf_function, + use_var_policy): + aggregations = [ + variables_lib.VariableAggregation.NONE, + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + for aggregation in aggregations: + with distribution.scope(): + v = variable_scope.variable( + 2., + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + + if experimental_run_tf_function: + read_var_fn = def_function.function(v.read_value) + else: + read_var_fn = v.read_value + + results = read_var_fn() + for component in v._values: + self.assertEqual(self.evaluate(component.read_value()), + self.evaluate(results)) + + @combinations.generate(strategy_with_var_policy()) + def testAssignOutOfScope(self, distribution, use_var_policy): + with distribution.scope(): + mirrored = variables_lib.Variable(1.) + self.evaluate(mirrored.assign(3.)) + self.assertEqual(self.evaluate(mirrored.read_value()), 3.) + for component in mirrored.values: + self.assertEqual(self.evaluate(component.read_value()), 3.) + + @combinations.generate(strategy_with_var_policy()) + def testAssignAggregationMeanDTypeNonFloat(self, distribution, + use_var_policy): + if isinstance(distribution, _TPU_STRATEGIES): + self.skipTest("Fix sponge/6e8ab540-4c0f-4da5-aedf-86505ff810c9 before " + "reenabling test.") + + with distribution.scope(): + v = variables_lib.Variable( + 1, + aggregation=variable_scope.VariableAggregation.MEAN, + dtype=dtypes.int32) + self.evaluate(v.initializer) + + @def_function.function + def assign(): + ctx = distribution_strategy_context.get_replica_context() + return v.assign(ctx.replica_id_in_sync_group) + + # disallow assign() with distributed value in replica context. + with self.assertRaisesRegex(ValueError, + "Cannot update non-float variables"): + self.evaluate( + distribution.experimental_local_results( + distribution.run(assign))) + + # allow assign() with same value in replica context. + @def_function.function + def assign_same(): + return v.assign(2) + + self.evaluate( + distribution.experimental_local_results( + distribution.run(assign_same))) + self.assertEqual(self.evaluate(v.read_value()), 2) + + # allow assign() with mirrored variable in replica context. + with distribution.scope(): + v2 = variables_lib.Variable( + 3, + aggregation=variable_scope.VariableAggregation.SUM, + dtype=dtypes.int32) + self.evaluate(v2.initializer) + + @def_function.function + def assign_mirrored(): + return v.assign(v2) + + self.evaluate( + distribution.experimental_local_results( + distribution.run(assign_mirrored))) + self.assertEqual(self.evaluate(v.read_value()), 3) + + # allow assign() in cross replica context. + with distribution.scope(): + self.evaluate(v.assign(4)) + self.assertEqual(self.evaluate(v.read_value()), 4) + + @combinations.generate(strategy_with_var_policy()) + def testInitializedToSameValueInsideEagerRun(self, distribution, + use_var_policy): + if not context.executing_eagerly(): self.skipTest("eager only test") + v = [None] + + @def_function.function + def step(): + + def f(): + if v[0] is None: + v[0] = variables_lib.Variable(random_ops.random_normal([])) + + distribution.run(f) + + context.set_global_seed(None) + step() + vals = self.evaluate(v[0].values) + self.assertAllEqual(vals[0], vals[1]) + + @combinations.generate(strategy_with_var_policy()) + def testAggregationOnlyFirstReplica(self, distribution, use_var_policy): + with distribution.scope(): + v = variable_scope.variable( + 15., + synchronization=variables_lib.VariableSynchronization.ON_WRITE, + aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) + self.evaluate(variables_lib.global_variables_initializer()) + + @def_function.function + def assign(): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + return v.assign(math_ops.cast(replica_id, dtypes.float32)) + per_replica_results = self.evaluate(distribution.experimental_local_results( + distribution.run(assign))) + # The per-replica values should always match the first replicas value. + self.assertAllEqual( + array_ops.zeros(distribution.num_replicas_in_sync, dtypes.float32), + per_replica_results) + + @combinations.generate(strategy_with_var_policy()) + def testInitScope(self, distribution, use_var_policy): + if not context.executing_eagerly(): self.skipTest("eager only") + + class C(object): + pass + + obj = C() + obj.w = None + obj.v = None + + @def_function.function + def assign(): + with ops.init_scope(): + if obj.w is None: + obj.w = variables_lib.Variable( + 0, aggregation=variables_lib.VariableAggregation.MEAN) + obj.v = variables_lib.Variable( + obj.w.read_value(), + aggregation=variables_lib.VariableAggregation.MEAN) + self.evaluate(variables_lib.global_variables_initializer()) + + return obj.v.assign_add(2) + + per_replica_results = self.evaluate( + distribution.experimental_local_results(distribution.run(assign))) + self.assertAllEqual([2, 2], per_replica_results) + + @combinations.generate(strategy_with_var_policy()) + def testOperatorOverride(self, distribution, use_var_policy): + + with distribution.scope(): + v = variable_scope.variable( + 1, aggregation=variables_lib.VariableAggregation.MEAN) + self.evaluate(variables_lib.global_variables_initializer()) + + self.assertEqual(2, self.evaluate(v + 1)) + + @def_function.function + def add(): + return v + 1 + + per_replica_results = self.evaluate( + distribution.experimental_local_results(distribution.run(add))) + self.assertAllEqual([2, 2], per_replica_results) + + +@combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + ], + mode=["graph", "eager"], + use_var_policy=[True, False])) +class OnWriteVariableSyncScatterTests(test.TestCase, parameterized.TestCase): + + def testScatterSub(self, distribution, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [0., 0., 0.], aggregation=variables_lib.VariableAggregation.MEAN) + self.evaluate(v.initializer) + + @def_function.function + def scatter_sub(): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + value = indexed_slices.IndexedSlices( + values=array_ops.stack([ + math_ops.cast(replica_id, dtypes.float32), + math_ops.cast(replica_id + 1, dtypes.float32) + ]), + indices=array_ops.stack([replica_id, replica_id + 1]), + dense_shape=(3,)) + return v.scatter_sub(value) + + per_replica_results = self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_sub))) + self.assertAllEqual([[0., -1., -1.], [0., -1., -1.]], per_replica_results) + + def testScatterAdd(self, distribution, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [0, 0, 0], aggregation=variables_lib.VariableAggregation.SUM) + self.evaluate(v.initializer) + + @def_function.function + def scatter_add(): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + value = indexed_slices.IndexedSlices( + values=array_ops.stack([replica_id, replica_id + 1]), + indices=array_ops.stack([replica_id, replica_id + 1]), + dense_shape=(3,)) + return v.scatter_add(value) + + per_replica_results = self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_add))) + self.assertAllEqual([[0, 2, 2], [0, 2, 2]], per_replica_results) + + def testScatterDiv(self, distribution, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [1, 6, 1], aggregation=variables_lib.VariableAggregation.SUM) + self.evaluate(v.initializer) + + @def_function.function + def scatter_div(): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + value = indexed_slices.IndexedSlices( + values=array_ops.reshape(replica_id + 2, [1]), + indices=array_ops.reshape(replica_id, [1]), + dense_shape=(3,)) + return v.scatter_div(value) + + per_replica_results = self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_div))) + self.assertAllEqual([[0, 2, 1], [0, 2, 1]], per_replica_results) + + def testScatterMul(self, distribution, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [2., 1., 1.], aggregation=variables_lib.VariableAggregation.MEAN) + self.evaluate(v.initializer) + + @def_function.function + def scatter_mul(): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + value = indexed_slices.IndexedSlices( + values=array_ops.reshape( + math_ops.cast(replica_id + 2, dtypes.float32), [1]), + indices=array_ops.reshape(replica_id, [1]), + dense_shape=(3,)) + return v.scatter_mul(value) + + per_replica_results = self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_mul))) + self.assertAllClose([[2., 1.5, 1.], [2., 1.5, 1.]], per_replica_results) + + def testScatterMin(self, distribution, use_var_policy): + with distribution.scope(): + v1 = variables_lib.Variable( + [0, 2, 0], aggregation=variables_lib.VariableAggregation.SUM) + v2 = variables_lib.Variable( + [0, 2, 0], + aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) + self.evaluate(variables_lib.global_variables_initializer()) + + @def_function.function + def scatter_min(v): + value = indexed_slices.IndexedSlices( + values=array_ops.identity([1]), + indices=array_ops.identity([1]), + dense_shape=(3,)) + return v.scatter_min(value) + + with self.assertRaisesRegex(NotImplementedError, "scatter_min.*"): + self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_min, args=(v1,)))) + + per_replica_results = self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_min, args=(v2,)))) + self.assertAllClose([[0, 1, 0], [0, 1, 0]], per_replica_results) + + def testScatterMax(self, distribution, use_var_policy): + with distribution.scope(): + v1 = variables_lib.Variable( + [0, 0, 0], aggregation=variables_lib.VariableAggregation.SUM) + v2 = variables_lib.Variable( + [0, 0, 0], + aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) + self.evaluate(variables_lib.global_variables_initializer()) + + @def_function.function + def scatter_max(v): + value = indexed_slices.IndexedSlices( + values=array_ops.identity([1]), + indices=array_ops.identity([0]), + dense_shape=(3,)) + return v.scatter_max(value) + + with self.assertRaisesRegex(NotImplementedError, "scatter_max.*"): + self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_max, args=(v1,)))) + + per_replica_results = self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_max, args=(v2,)))) + self.assertAllClose([[1, 0, 0], [1, 0, 0]], per_replica_results) + + def testScatterUpdate(self, distribution, use_var_policy): + with distribution.scope(): + v1 = variables_lib.Variable( + [0, 0, 0], aggregation=variables_lib.VariableAggregation.SUM) + v2 = variables_lib.Variable( + [0, 0, 0], + aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) + self.evaluate(variables_lib.global_variables_initializer()) + + @def_function.function + def scatter_update(v): + value = indexed_slices.IndexedSlices( + values=array_ops.identity([3]), + indices=array_ops.identity([1]), + dense_shape=(3,)) + return v.scatter_update(value) + + with self.assertRaisesRegex(NotImplementedError, "scatter_update.*"): + self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_update, args=(v1,)))) + + per_replica_results = self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_update, args=(v2,)))) + self.assertAllClose([[0, 3, 0], [0, 3, 0]], per_replica_results) + + def testScatterOpsInCrossReplicaContext(self, distribution, use_var_policy): + with distribution.scope(): + v1 = variables_lib.Variable( + [1, 1, 1], aggregation=variables_lib.VariableAggregation.SUM) + v2 = variables_lib.Variable([1, 1, 1]) + self.evaluate(variables_lib.global_variables_initializer()) + + value = indexed_slices.IndexedSlices( + values=array_ops.identity([2]), + indices=array_ops.identity([0]), + dense_shape=(3,)) + with distribution.scope(): + self.evaluate(v1.scatter_add(value)) + self.assertAllEqual([3, 1, 1], self.evaluate(v1.read_value())) + + self.evaluate(v2.scatter_min(value)) + self.assertAllEqual([1, 1, 1], self.evaluate(v2.read_value())) + + +def _make_replica_local(method, strategy=None): + if strategy is None: + devices = ("/device:GPU:0", "/device:CPU:0") + else: + devices = strategy.extended.worker_devices + + v = [] + for d, n, init in zip(devices, ["v", "v/replica"], [1., 2.]): + with ops.device(d): + v.append(variable_scope.get_variable( + name=n, initializer=init, use_resource=True)) + + if (strategy is not None) and isinstance(strategy, _TPU_STRATEGIES): + var_cls = tpu_values.TPUSyncOnReadVariable + else: + var_cls = values.SyncOnReadVariable + replica_local = var_cls(strategy, v, method) + return v, replica_local + + +class OnReadVariableSyncTest(test.TestCase, parameterized.TestCase): + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testAssign(self, distribution, experimental_run_tf_function, + use_var_policy): + + def assign(fn, v, update_value, cross_replica): + update_fn = lambda: getattr(v, fn)(update_value) + if cross_replica: + return update_fn() + else: + if experimental_run_tf_function: + update_fn = def_function.function(update_fn) + return distribution.experimental_local_results( + distribution.run(update_fn)) + + updates = [("assign", 1.), ("assign_add", 1.), ("assign_sub", -1.)] + aggregations = [ + variables_lib.VariableAggregation.NONE, + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + options = list( + x for x in itertools.product(updates, aggregations, [True, False])) + for update, aggregation, cross_replica in options: + # VariableAggregation.SUM in cross-replica mode is tested below, + # VariableAggregation.NONE in cross-replica mode is not supported. + if cross_replica and aggregation in [ + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.NONE, + ]: + continue + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + fn, update_value = update + self.evaluate(assign(fn, v, update_value, cross_replica)) + for component in v._values: + self.assertAllEqual(self.evaluate(component.read_value()), + self.evaluate(array_ops.ones_like(component))) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testAssignOnReadVar(self, distribution, experimental_run_tf_function, + use_var_policy): + + with distribution.scope(): + v_to_assign = variable_scope.variable( + 2., aggregation=variables_lib.VariableAggregation.MEAN) + v_to_assign_sub = variable_scope.variable( + -2., aggregation=variables_lib.VariableAggregation.MEAN) + + def assign(fn, v, update_value, cross_replica): + update_fn = lambda: getattr(v, fn)(update_value) + if cross_replica: + return update_fn() + else: + if experimental_run_tf_function: + update_fn = def_function.function(update_fn) + return distribution.experimental_local_results( + distribution.run(update_fn)) + + updates = [("assign", v_to_assign), ("assign_add", v_to_assign), + ("assign_sub", v_to_assign_sub)] + expected_cross_replica = { + variables_lib.VariableAggregation.SUM: 1.0, + variables_lib.VariableAggregation.MEAN: 2.0, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA: 2.0 + } + expected_replica = { + variables_lib.VariableAggregation.SUM: 2.0, + variables_lib.VariableAggregation.MEAN: 2.0, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA: 2.0 + } + # aggregation=NONE is not supported for OnReadVariables. + aggregations = [ + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + options = list( + x for x in itertools.product(updates, aggregations, [True, False])) + for update, aggregation, cross_replica in options: + # assign in replica context with SUM does not make sense cause you can + # just do value * num replicas error is 1. is not a distributed value and + # is unsupported for aggregation SUM + if aggregation == variables_lib.VariableAggregation.SUM: + continue + with distribution.scope(): + v = variable_scope.variable( + 0., + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + fn, update_value = update + self.evaluate(assign(fn, v, update_value, cross_replica)) + if cross_replica: + for component in v._values: + self.assertAllEqual(expected_cross_replica.get(aggregation), + self.evaluate(component.read_value())) + else: + for component in v._values: + self.assertAllEqual(expected_replica.get(aggregation), + self.evaluate(component.read_value())) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testAssignPerReplicaVal(self, distribution, experimental_run_tf_function, + use_var_policy): + + if isinstance(distribution, _TPU_STRATEGIES): + self.skipTest("Assigning PerReplica values is not supported. See" + " sponge/80ba41f8-4220-4516-98ce-bbad48f9f11a.") + + self.skipTest("We don't support assiging PerReplica values in cross " + "replica context or replica context. see error in " + "sponge/2b2e54c1-eda6-4534-82e1-c73b1dcd517f.") + + with distribution.scope(): + per_replica_value = values.PerReplica( + [constant_op.constant(2.0), + constant_op.constant(2.0)]) + + def assign(fn, v, update_value, cross_replica): + update_fn = lambda: getattr(v, fn)(update_value) + if cross_replica: + return update_fn() + else: + if experimental_run_tf_function: + update_fn = def_function.function(update_fn) + return distribution.experimental_local_results( + distribution.run(update_fn)) + + updates = [("assign", per_replica_value)] + # We don't support assigning PerReplica valus to vars in replica context + # with aggregation=NONE. + aggregations = [ + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + options = list( + x for x in itertools.product(updates, aggregations, [True, False])) + for update, aggregation, cross_replica in options: + # assign in replica context with SUM does not make sense cause you can + # just do value * num replicas error is 1. is not a distributed value and + # is unsupported for aggregation SUM + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + fn, update_value = update + # with self.assertRaisesRegex(ValueError, "Attempt to convert a value "): + self.evaluate(assign(fn, v, update_value, cross_replica)) + if aggregation == variables_lib.VariableAggregation.SUM: + expected = 4.0 + else: + expected = 2.0 + for component in v._values: + self.assertAllEqual(expected, self.evaluate(component.read_value())) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testAssignDtypeConversion(self, distribution, + experimental_run_tf_function, + use_var_policy): + + def assign(fn, v, update_value, cross_replica): + update_fn = lambda: getattr(v, fn)(update_value) + if cross_replica: + return update_fn() + else: + if experimental_run_tf_function: + update_fn = def_function.function(update_fn) + return distribution.experimental_local_results( + distribution.run(update_fn)) + + updates = [("assign", 1), ("assign_add", 1), ("assign_sub", -1)] + aggregations = [ + variables_lib.VariableAggregation.NONE, + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + options = list( + x for x in itertools.product(updates, aggregations, [True, False])) + for update, aggregation, cross_replica in options: + # VariableAggregation.SUM in cross-replica mode is tested below, + # VariableAggregation.NONE in cross-replica mode is not supported. + if cross_replica and aggregation in [ + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.NONE, + ]: + continue + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + fn, update_value = update + self.evaluate(assign(fn, v, update_value, cross_replica)) + for component in v._values: + self.assertAllEqual(self.evaluate(component.read_value()), + self.evaluate(array_ops.ones_like(component))) + + @combinations.generate(strategy_with_var_policy()) + def testAssignWithAggregationSum(self, distribution, use_var_policy): + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=variables_lib.VariableAggregation.SUM) + self.evaluate(variables_lib.global_variables_initializer()) + self.evaluate(v.assign(1. * distribution.num_replicas_in_sync)) + for component in v._values: + self.assertAllEqual(self.evaluate(component.read_value()), + self.evaluate(array_ops.ones_like(component))) + + @combinations.generate(strategy_with_var_policy()) + def testAssignAddSubWithAggregationSum(self, distribution, use_var_policy): + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=variables_lib.VariableAggregation.SUM) + self.evaluate(variables_lib.global_variables_initializer()) + with self.assertRaisesRegex( + ValueError, "SyncOnReadVariable does not support "): + self.evaluate(v.assign_add(1.)) + with self.assertRaisesRegex( + ValueError, "SyncOnReadVariable does not support "): + self.evaluate(v.assign_sub(1.)) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testReadValueInReplicaContext(self, distribution, + experimental_run_tf_function, + use_var_policy): + aggregations = [ + variables_lib.VariableAggregation.NONE, + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + for aggregation in aggregations: + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + if experimental_run_tf_function: + read_var_fn = def_function.function(v.read_value) + else: + read_var_fn = v.read_value + results = self.evaluate( + distribution.experimental_local_results( + distribution.run(read_var_fn))) + for component, value in zip(v._values, results): + self.assertAllEqual(self.evaluate(component.read_value()), value) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testReadValueInCrossReplicaContext(self, distribution, + experimental_run_tf_function, + use_var_policy): + aggregations = [ + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + for aggregation in aggregations: + if isinstance(distribution, _TPU_STRATEGIES): + resolver = tpu_cluster_resolver.TPUClusterResolver("") + tpu_strategy_util.initialize_tpu_system(resolver) + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + + def assign(v=v): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + return v.assign(math_ops.cast(replica_id, dtypes.float32)) + + if experimental_run_tf_function: + assign = def_function.function(assign) + + self.evaluate( + distribution.experimental_local_results(distribution.run(assign))) + num_replicas = distribution.num_replicas_in_sync + sum_of_replica_values = num_replicas * (num_replicas - 1) / 2. + if aggregation == variables_lib.VariableAggregation.SUM: + expected = sum_of_replica_values + elif aggregation == variables_lib.VariableAggregation.MEAN: + expected = sum_of_replica_values / num_replicas + else: + expected = 0 + self.assertEqual(expected, self.evaluate(v.read_value()), aggregation) + self.assertEqual(expected, self.evaluate(v.value()), aggregation) + self.assertEqual(expected, self.evaluate(v), aggregation) + self.assertEqual(expected, self.evaluate(array_ops.identity(v)), + aggregation) + + # TODO(b/145574622): Re-enable this test once ReduceOp argument is + # respected on GPUs. + @combinations.generate(strategy_and_run_tf_function_combinations()) + def disable_testAllReduce(self, distribution, + experimental_run_tf_function, + use_var_policy): + with distribution.scope(): + v = variable_scope.variable( + 2., + synchronization=variables_lib.VariableSynchronization.ON_WRITE, + aggregation=variables_lib.VariableAggregation.MEAN) + self.evaluate(variables_lib.global_variables_initializer()) + + def all_reduce(): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + return ctx.all_reduce("SUM", v) + math_ops.cast(replica_id, + dtypes.float32) + + if experimental_run_tf_function: + all_reduce = def_function.function(all_reduce) + + per_replica_results = self.evaluate( + distribution.experimental_local_results(distribution.run(all_reduce))) + expected_result = [] + for i in range(distribution.num_replicas_in_sync): + expected_result.append(2.0 * distribution.num_replicas_in_sync + + 1.0 * i) + self.assertEqual(per_replica_results, tuple(expected_result)) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testAssignPerReplicaBeforeRead(self, distribution, + experimental_run_tf_function, + use_var_policy): + aggregations = [ + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + for aggregation in aggregations: + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + + def assign(var=v): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + return var.assign(math_ops.cast(replica_id, dtypes.float32)) + + if experimental_run_tf_function: + assign = def_function.function(assign) + + per_replica_results = self.evaluate( + distribution.experimental_local_results(distribution.run(assign))) + expected_result = [] + for i in range(distribution.num_replicas_in_sync): + expected_result.append(1.0 * i) + self.assertEqual(per_replica_results, tuple(expected_result)) + + @combinations.generate(strategy_with_var_policy()) + def testReadValueWithAggregationNoneInCrossReplicaContext(self, distribution, + use_var_policy): + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=variables_lib.VariableAggregation.NONE) + self.evaluate(variables_lib.global_variables_initializer()) + with self.assertRaisesRegex( + ValueError, "Could not convert from .* VariableAggregation\\.NONE"): + self.evaluate(v.read_value()) + + @combinations.generate(strategy_with_var_policy()) + def testInitializedToSameValueInsideEagerRun(self, distribution, + use_var_policy): + if not context.executing_eagerly(): self.skipTest("eager only") + + v = [None] + @def_function.function + def step(): + def f(): + if v[0] is None: + v[0] = variables_lib.Variable( + random_ops.random_normal([]), + synchronization=variables_lib.VariableSynchronization.ON_READ) + + distribution.run(f) + + context.set_global_seed(None) + step() + vals = self.evaluate(v[0].values) + self.assertAllEqual(vals[0], vals[1]) + + @combinations.generate(strategy_with_var_policy()) + def testOperatorOverride(self, distribution, use_var_policy): + + with distribution.scope(): + v = variable_scope.variable( + 0.0, + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=variables_lib.VariableAggregation.MEAN) + self.evaluate(variables_lib.global_variables_initializer()) + + @def_function.function + def assign(): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + return v.assign(math_ops.cast(replica_id, dtypes.float32)) + + # Assign different replicas with different values. + self.evaluate(distribution.experimental_local_results( + distribution.run(assign))) + self.assertEqual(1.5, self.evaluate(v + 1)) + + @def_function.function + def add(): + return v + 1 + + per_replica_results = self.evaluate( + distribution.experimental_local_results(distribution.run(add))) + self.assertAllEqual([1, 2], per_replica_results) + + +@combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + ], + aggregation=[ + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ], + mode=["graph", "eager"], + use_var_policy=[True, False])) +class SyncOnReadScatterReplicaTest(test.TestCase, parameterized.TestCase): + + def testScatterSub(self, distribution, aggregation, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [1., 1., 1.], + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(v.initializer) + + delta = values.PerReplica([ + indexed_slices.IndexedSlices( + values=[[0.], [1.]], indices=[0, 1], dense_shape=(3,)), + indexed_slices.IndexedSlices( + values=[[1.], [2.]], indices=[1, 2], dense_shape=(3,)), + ]) + + with self.assertRaises(NotImplementedError): + self.evaluate(distribution.run(v.scatter_sub, args=(delta,))) + + def testScatterAdd(self, distribution, aggregation, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [1., 1., 1.], + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(v.initializer) + + delta = values.PerReplica([ + indexed_slices.IndexedSlices( + values=[[0.], [1.]], indices=[0, 1], dense_shape=(3,)), + indexed_slices.IndexedSlices( + values=[[1.], [2.]], indices=[1, 2], dense_shape=(3,)), + ]) + + with self.assertRaises(NotImplementedError): + self.evaluate(distribution.run(v.scatter_add, args=(delta,))) + + def testScatterDiv(self, distribution, aggregation, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [2., 6., 1.], + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(v.initializer) + + delta = values.PerReplica([ + indexed_slices.IndexedSlices( + values=[[2.], [2.]], indices=[0, 1], dense_shape=(3,)), + indexed_slices.IndexedSlices( + values=[[3.], [3.]], indices=[1, 2], dense_shape=(3,)), + ]) + + with self.assertRaises(NotImplementedError): + self.evaluate(distribution.run(v.scatter_div, args=(delta,))) + + def testScatterMul(self, distribution, aggregation, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [2., 1., 1.], + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(v.initializer) + + delta = values.PerReplica([ + indexed_slices.IndexedSlices( + values=[[2.], [3.]], indices=[0, 1], dense_shape=(3,)), + indexed_slices.IndexedSlices( + values=[[4.], [5.]], indices=[1, 2], dense_shape=(3,)), + ]) + + with self.assertRaises(NotImplementedError): + self.evaluate(distribution.run(v.scatter_mul, args=(delta,))) + + def testScatterMin(self, distribution, aggregation, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [3., 4., 5.], + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(v.initializer) + + delta = values.PerReplica([ + indexed_slices.IndexedSlices( + values=[[1.], [8.]], indices=[0, 1], dense_shape=(3,)), + indexed_slices.IndexedSlices( + values=[[9.], [2.]], indices=[1, 2], dense_shape=(3,)), + ]) + + with self.assertRaises(NotImplementedError): + self.evaluate(distribution.run(v.scatter_min, args=(delta,))) + + def testScatterMax(self, distribution, aggregation, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [3., 4., 5.], + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(v.initializer) + + delta = values.PerReplica([ + indexed_slices.IndexedSlices( + values=[[1.], [8.]], indices=[0, 1], dense_shape=(3,)), + indexed_slices.IndexedSlices( + values=[[9.], [2.]], indices=[1, 2], dense_shape=(3,)), + ]) + + with self.assertRaises(NotImplementedError): + self.evaluate(distribution.run(v.scatter_max, args=(delta,))) + + def testScatterUpdate(self, distribution, aggregation, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [0., 0., 0.], + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(v.initializer) + + delta = values.PerReplica([ + indexed_slices.IndexedSlices( + values=[[1.], [2.]], indices=[0, 1], dense_shape=(3,)), + indexed_slices.IndexedSlices( + values=[[3.], [4.]], indices=[1, 2], dense_shape=(3,)), + ]) + + with self.assertRaises(NotImplementedError): + self.evaluate(distribution.run(v.scatter_min, args=(delta,))) + + +def _make_index_slices(vals, indices, dense_shape=None): + if dense_shape: + dense_shape = array_ops.identity(dense_shape) + return indexed_slices.IndexedSlices( + array_ops.identity(vals), array_ops.identity(indices), dense_shape) + + +if __name__ == "__main__": + test.main() From a9765493ece68c460865e53a9340946c92006c4f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 25 Jul 2020 19:13:05 -0700 Subject: [PATCH 1339/2522] [XLA:SPMD] Refactor convolution_handler.cc. PiperOrigin-RevId: 323200864 Change-Id: I8eb0cd3bff5bde4912234d2686ab175893a89b15 --- .../xla/service/spmd/convolution_handler.cc | 1002 +++++++++-------- .../xla/service/spmd/spmd_partitioner.h | 3 - 2 files changed, 555 insertions(+), 450 deletions(-) diff --git a/tensorflow/compiler/xla/service/spmd/convolution_handler.cc b/tensorflow/compiler/xla/service/spmd/convolution_handler.cc index 4caa2bbbf35..06aae9347ee 100644 --- a/tensorflow/compiler/xla/service/spmd/convolution_handler.cc +++ b/tensorflow/compiler/xla/service/spmd/convolution_handler.cc @@ -32,21 +32,17 @@ limitations under the License. namespace xla { namespace spmd { +namespace { -Status SpmdPartitioningVisitor::HandleConvolutionTiledLhsAndRhs( - HloInstruction* hlo) { - TF_RET_CHECK(hlo->opcode() == HloOpcode::kConvolution); +// Partition convolution with only paralell dims are tiled +StatusOr PartitionConvolutionWithParallelDimension( + PartitionedHlo lhs, PartitionedHlo rhs, HloInstruction* original_hlo, + int64 num_partitions, const SpmdPartitionerOptions& options, + HloInstruction* partition_id, HloModule* module, SpmdBuilder* b) { + TF_RET_CHECK(original_hlo->opcode() == HloOpcode::kConvolution); - auto lhs = GetPartitionedHlo(hlo->operand(0)); - auto rhs = GetPartitionedHlo(hlo->operand(1)); - TF_RET_CHECK(!lhs.sharding().IsTileMaximal() && - !rhs.sharding().IsTileMaximal()); - - const auto& dnums = hlo->convolution_dimension_numbers(); - - // Check if the operand shardings are aligned. Also we currently don't - // support partitioning non-spatial dimensions. - std::vector rhs_to_lhs_indices(hlo->shape().rank()); + const auto& dnums = original_hlo->convolution_dimension_numbers(); + std::vector rhs_to_lhs_indices(original_hlo->shape().rank()); rhs_to_lhs_indices[dnums.kernel_output_feature_dimension()] = dnums.input_batch_dimension(); rhs_to_lhs_indices[dnums.kernel_input_feature_dimension()] = @@ -55,12 +51,380 @@ Status SpmdPartitioningVisitor::HandleConvolutionTiledLhsAndRhs( rhs_to_lhs_indices[dnums.kernel_spatial_dimensions(i)] = dnums.input_spatial_dimensions(i); } - std::vector lhs_to_rhs_indices(hlo->shape().rank()); + std::vector lhs_to_rhs_indices(original_hlo->shape().rank()); + for (int64 i = 0; i < rhs_to_lhs_indices.size(); ++i) { + lhs_to_rhs_indices[rhs_to_lhs_indices[i]] = i; + } + auto aligned_rhs_sharding = + hlo_sharding_util::TransposeSharding(lhs.sharding(), rhs_to_lhs_indices); + auto aligned_lhs_sharding = + hlo_sharding_util::TransposeSharding(rhs.sharding(), lhs_to_rhs_indices); + + // Handling cases where all the partitioned dimensions are parallel + // dimensions. + int64 lhs_parallel_dim_partitions = 1; + int64 rhs_parallel_dim_partitions = 1; + std::vector parallel_spatial_dims; + for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { + int64 lhs_dim = dnums.input_spatial_dimensions(i); + int64 lhs_size = lhs.base_shape().dimensions(lhs_dim); + const auto& wd = original_hlo->window().dimensions(i); + int64 rhs_dim = dnums.kernel_spatial_dimensions(i); + // Only non reversal window is supported right now. + if (!wd.window_reversal() && + dot_as_convolution_util::ConvSpatialDimensionIsParallel(wd, lhs_size)) { + parallel_spatial_dims.emplace_back(i); + lhs_parallel_dim_partitions *= ShardCountAtDim(lhs.sharding(), lhs_dim); + rhs_parallel_dim_partitions *= ShardCountAtDim(rhs.sharding(), rhs_dim); + } + } + bool lhs_partition_dims_are_parallel = + (lhs_parallel_dim_partitions == num_partitions); + bool rhs_partition_dims_are_parallel = + (rhs_parallel_dim_partitions == num_partitions); + + // If there is a parallel dim and all the partitioned dimensions are parallel + // dimensions in either LHS or RHS, simply create partitioned convolutions. + if (parallel_spatial_dims.empty() || ((!lhs_partition_dims_are_parallel) && + (!rhs_partition_dims_are_parallel))) { + return nullptr; + } + // Reshard LHS or RHS to partition at parallel dimensions as the other + // operand. + if (lhs_partition_dims_are_parallel) { + rhs = rhs.Reshard(aligned_rhs_sharding); + } else { + lhs = lhs.Reshard(aligned_lhs_sharding); + } + + // Get LHS and RHS sharded shape. + auto lhs_shard_shape = MakePartitionedShape(lhs.base_shape(), lhs.sharding()); + auto rhs_shard_shape = MakePartitionedShape(rhs.base_shape(), rhs.sharding()); + + // Update convolution window. + auto new_window = original_hlo->window(); + for (const auto& spatial_dim : parallel_spatial_dims) { + auto wd = new_window.mutable_dimensions(spatial_dim); + wd->set_size(lhs_shard_shape.dimensions( + dnums.input_spatial_dimensions(spatial_dim))); + wd->set_stride(std::max(1, wd->size() - 1)); + wd->set_base_dilation(wd->size()); + } + TF_ASSIGN_OR_RETURN( + Shape sharded_conv_shape, + ShapeInference::InferConvolveShape( + lhs_shard_shape, rhs_shard_shape, original_hlo->feature_group_count(), + original_hlo->batch_group_count(), new_window, dnums)); + *sharded_conv_shape.mutable_layout() = original_hlo->shape().layout(); + auto sharded_conv = b->AddInstruction(HloInstruction::CreateConvolve( + sharded_conv_shape, lhs.hlo(), rhs.hlo(), + original_hlo->feature_group_count(), original_hlo->batch_group_count(), + new_window, dnums, original_hlo->precision_config())); + sharded_conv->set_sharding(original_hlo->sharding()); + return PartitionedHlo(sharded_conv, original_hlo->shape(), lhs.state()) + .Reshard(original_hlo->sharding()) + .hlo(); +} + +// Partition convolution when both LHS and RHS are partitioned at spatial +// dimensions. Halo exchange will happen on RHS only. +StatusOr +PartitionConvolutionWithSpatialDimensionHaloExchangeOnRHS( + PartitionedHlo lhs, PartitionedHlo rhs, HloInstruction* original_hlo, + int64 num_partitions, const SpmdPartitionerOptions& options, + HloInstruction* partition_id, HloModule* module, SpmdBuilder* b) { + TF_RET_CHECK(original_hlo->opcode() == HloOpcode::kConvolution); + TF_RET_CHECK(!lhs.sharding().IsTileMaximal() && + !rhs.sharding().IsTileMaximal()); + + const auto& dnums = original_hlo->convolution_dimension_numbers(); + std::vector rhs_to_lhs_indices(original_hlo->shape().rank()); + rhs_to_lhs_indices[dnums.kernel_output_feature_dimension()] = + dnums.input_batch_dimension(); + rhs_to_lhs_indices[dnums.kernel_input_feature_dimension()] = + dnums.input_feature_dimension(); + for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { + rhs_to_lhs_indices[dnums.kernel_spatial_dimensions(i)] = + dnums.input_spatial_dimensions(i); + } + std::vector lhs_to_rhs_indices(original_hlo->shape().rank()); + for (int64 i = 0; i < rhs_to_lhs_indices.size(); ++i) { + lhs_to_rhs_indices[rhs_to_lhs_indices[i]] = i; + } + auto aligned_rhs_sharding = + hlo_sharding_util::TransposeSharding(lhs.sharding(), rhs_to_lhs_indices); + auto aligned_lhs_sharding = + hlo_sharding_util::TransposeSharding(rhs.sharding(), lhs_to_rhs_indices); + + auto unsupported_sharding = [&](const HloSharding& lhs_sharding, + const HloSharding& rhs_sharding) { + // We currently don't support partitioning input batch or output feature + // dimensions. + return lhs_sharding.tile_assignment().dim(dnums.input_batch_dimension()) != + 1 || + rhs_sharding.tile_assignment().dim( + dnums.kernel_output_feature_dimension()) != 1; + }; + + auto zero = b->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(original_hlo->shape().element_type()))); + if (ShapeSizeInBytes(lhs.base_shape()) < ShapeSizeInBytes(rhs.base_shape())) { + if (unsupported_sharding(aligned_lhs_sharding, rhs.sharding())) { + return nullptr; + } + lhs = lhs.Reshard(aligned_lhs_sharding).PadWithValue(zero); + rhs = rhs.PadWithValue(zero); + } else { + if (unsupported_sharding(lhs.sharding(), aligned_rhs_sharding)) { + return nullptr; + } + lhs = lhs.PadWithValue(zero); + rhs = rhs.Reshard(aligned_rhs_sharding).PadWithValue(zero); + } + + // Reshard RHS so that each shard computes the partial sum of the full + // shape result, and add AllReduce. See HandleConvolutionTiledLhsAndRhs() + // that reshards LHS. + // + // The size of halo on each dimension can be calculated from the + // projection onto the RHS that shard i needs to read. RHS and LHS below + // refers to the shard size of RHS and LHS, WC is the number of windows, + // and D is the window dilation. + // + // * offset(i): LHS * i + low_padding - (WC - 1) * stride + // * limit(i): LHS * (i + 1) + low_padding + // + // Since shard i has RHS of range [i * RHS * D, (i + 1) * RHS * D) + // * left-halo: i * RHS - offset(i) + // = i * (RHS * D - LHS) + (WC - 1) * stride - low_padding + // * right-halo: limit(i) - (i + 1) * RHS + // = (i + 1) * (LHS - RHS * D) + low_pading + Window window = original_hlo->window(); + const auto& collective_ops_creator = lhs.state().collective_ops_creator; + std::vector shard_counts(dnums.input_spatial_dimensions_size()); + std::vector lhs_shard_sizes(dnums.input_spatial_dimensions_size()); + std::vector rhs_shard_sizes(dnums.input_spatial_dimensions_size()); + + for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { + int64 lhs_dimension = dnums.input_spatial_dimensions(i); + int64 rhs_dimension = dnums.kernel_spatial_dimensions(i); + int64 shard_count = rhs.sharding().tile_assignment().dim(rhs_dimension); + auto wd = window.dimensions(i); + if (wd.base_dilation() != 1 || wd.window_reversal()) { + return nullptr; + } + + int64 lhs_shard_size = + CeilOfRatio(lhs.base_shape().dimensions(lhs_dimension), shard_count); + int64 rhs_shard_size = + CeilOfRatio(rhs.base_shape().dimensions(rhs_dimension), shard_count); + shard_counts[i] = shard_count; + lhs_shard_sizes[i] = lhs_shard_size; + rhs_shard_sizes[i] = rhs_shard_size; + } + + std::vector left_halo_size_functions( + original_hlo->shape().rank()); + std::vector right_halo_size_functions( + original_hlo->shape().rank()); + Window new_window = window; + + // Data structures needed for Pad and DynamicSlice on LHS if needed. + bool need_dynamic_slice_lhs = false; + auto partition_ordinals = + MakeTiledPartitionOrdinals(lhs.sharding(), partition_id, b); + std::vector zero_padding(original_hlo->shape().rank()); + PaddingConfig pad_config = window_util::MakeSymmetricPadding(zero_padding); + auto zero_s32 = + b->AddInstruction(HloInstruction::CreateConstant(LiteralUtil::Zero(S32))); + std::vector dynamic_slice_start_indices( + original_hlo->shape().rank(), zero_s32); + Shape dynamic_slice_shape = lhs.hlo()->shape(); + Shape pad_shape = lhs.hlo()->shape(); + + for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { + int64 lhs_dimension = dnums.input_spatial_dimensions(i); + int64 rhs_dimension = dnums.kernel_spatial_dimensions(i); + int64 lhs_shard_size = lhs_shard_sizes[i]; + int64 rhs_shard_size = rhs_shard_sizes[i]; + + if (shard_counts[i] == 1) { + continue; + } + + // Calculate the left and right halo sizes as described in the comments + // above. It calculcates the halo sizes with dilation, so we apply + // CeilOfRatio({left,right}_halo_size, window_dilation). + auto wd = window.dimensions(i); + int64 padding_low = wd.padding_low(); + int64 padding_high = wd.padding_high(); + int64 base = lhs.base_shape().dimensions(lhs_dimension); + int64 window_count = 1 + (padding_low + padding_high + base - + (1 + (wd.size() - 1) * wd.window_dilation())) / + wd.stride(); + left_halo_size_functions[rhs_dimension] = + OffsetCalculation(MultiplyAddDivideOffsetCalculation( + rhs_shard_size * wd.window_dilation() - lhs_shard_size, + (window_count - 1) * wd.stride() - padding_low + + wd.window_dilation() - 1, + wd.window_dilation())); + right_halo_size_functions[rhs_dimension] = + OffsetCalculation(MultiplyAddDivideOffsetCalculation( + lhs_shard_size - rhs_shard_size * wd.window_dilation(), + lhs_shard_size - rhs_shard_size * wd.window_dilation() + + padding_low + wd.window_dilation() - 1, + wd.window_dilation())); + + // New RHS window size includes the maximum of both left and right + // halos. + int64 halo_size = + left_halo_size_functions[rhs_dimension].MaxInRange(1, shard_counts[i]) + + right_halo_size_functions[rhs_dimension].MaxInRange( + 0, shard_counts[i] - 1); + int64 new_window_size = + rhs.hlo()->shape().dimensions(rhs_dimension) + halo_size; + + // The amount of new low padding could be dynamic (e.g., window_dilation + // != 1), which requires pad (to the maximum) and dynamic slice on LHS. + // + // If we consider the first window, the offset of the dilated RHS that + // aligns with the first valid LHS element for shard i is 'padding_low + + // LHS * i'. When the left halo is added to RHS, the offset of the first + // RHS element is (RHS * i - left_halo) * window_dilation. The + // difference between the two values is the amount of padding_low we + // need on LHS. + auto new_padding_low_function = + OffsetCalculation(HloOpcode::kMultiply, + left_halo_size_functions[rhs_dimension], + OffsetCalculation(MultiplyAddDivideOffsetCalculation( + 0, wd.window_dilation(), 1))) - + OffsetCalculation(MultiplyAddDivideOffsetCalculation( + rhs_shard_size * wd.window_dilation() - lhs_shard_size, + -padding_low, 1)); + + int64 new_padding_low_max = + new_padding_low_function.MaxInRange(0, shard_counts[i]); + int64 new_padding_low = new_padding_low_max; + int64 new_padding_high = window_count * wd.stride() + + (new_window_size - 1) * wd.window_dilation() - + new_padding_low - lhs_shard_size; + + // We do pad/dynamic-slice only when the padding is dynamic. + if (!new_padding_low_function.IsConstant()) { + need_dynamic_slice_lhs = true; + new_padding_low = 0; + pad_config.mutable_dimensions(lhs_dimension) + ->set_edge_padding_low(new_padding_low_max); + pad_config.mutable_dimensions(lhs_dimension) + ->set_edge_padding_high(new_padding_low_max); + pad_shape.set_dimensions(lhs_dimension, + lhs_shard_size + 2 * new_padding_low_max); + dynamic_slice_start_indices[lhs_dimension] = + (OffsetCalculation( + MultiplyAddDivideOffsetCalculation(0, new_padding_low_max, 1)) - + new_padding_low_function) + .Calculate(partition_ordinals[lhs_dimension], b); + dynamic_slice_shape.set_dimensions(lhs_dimension, + lhs_shard_size + new_padding_low_max); + } + + // Since the convolution RHS operand size increased with halos, adjust + // the window config accordingly. + new_window.mutable_dimensions(i)->set_padding_low(new_padding_low); + new_window.mutable_dimensions(i)->set_padding_high(new_padding_high); + new_window.mutable_dimensions(i)->set_size( + rhs.hlo()->shape().dimensions(rhs_dimension) + halo_size); + } + + HloInstruction* conv_lhs = lhs.hlo(); + if (need_dynamic_slice_lhs) { + auto pad = b->AddInstruction( + HloInstruction::CreatePad(pad_shape, lhs.hlo(), zero, pad_config)); + conv_lhs = b->AddInstruction(HloInstruction::CreateDynamicSlice( + dynamic_slice_shape, pad, dynamic_slice_start_indices, + dynamic_slice_shape.dimensions())); + } + + // Exchange halo and concatenate. + HloInstruction* rhs_with_halo = rhs.hlo(); + for (int i = 0; i < dnums.kernel_spatial_dimensions_size(); ++i) { + int64 dim = dnums.kernel_spatial_dimensions(i); + int64 explicit_left_padding_on_full_shape = + left_halo_size_functions[dim].Calculate(0); + int64 shard_size_with_halo = new_window.dimensions(i).size(); + + // offset_on_padded_shape and padded_full_shape_size are needed only if + // we want to mask out-of-range values in ExchangeHaloAndGetValidData(). + // Since the default value for both the collective-permute is zero and + // also we call PadWithValue() on both operands at the beginning, we + // don't need to mask here. + // + // TODO(hyoulkee): Consider removing one of the two PadWithValue() calls + // if it's always safe. + auto offset_on_padded_shape = + OffsetCalculation(MultiplyAddDivideOffsetCalculation( + rhs_shard_sizes[i], explicit_left_padding_on_full_shape, 1)) - + left_halo_size_functions[dim]; + int64 padded_full_shape_size = + offset_on_padded_shape.Calculate(shard_counts[i] - 1) + + new_window.dimensions(i).size(); + auto concat = ExchangeHaloAndGetValidData( + rhs_with_halo, rhs.base_shape(), left_halo_size_functions[dim], + right_halo_size_functions[dim], explicit_left_padding_on_full_shape, + padded_full_shape_size, shard_size_with_halo, dim, rhs.sharding(), + offset_on_padded_shape.Calculate(partition_ordinals[dim], b), zero, + partition_ordinals[dim], collective_ops_creator, + lhs.state().next_channel_id, b, + /*mask_invalid_region=*/false); + if (!concat) { + return nullptr; + } + rhs_with_halo = *concat; + } + + auto conv = b->AddInstruction(HloInstruction::CreateConvolve( + original_hlo->shape(), conv_lhs, rhs_with_halo, + original_hlo->feature_group_count(), original_hlo->batch_group_count(), + new_window, dnums, original_hlo->precision_config())); + auto ar = collective_ops_creator.create_cross_partition_all_reduce( + b, conv, MakeBinaryAdd(original_hlo->shape().element_type(), module), {}, + (*lhs.state().next_channel_id)++); + ar->set_sharding(HloSharding::Replicate()); + return PartitionedHlo(ar, original_hlo->shape(), lhs.state()) + .Reshard(original_hlo->sharding()) + .hlo(); +} + +// Partition convolution when both LHS and RHS are partitioned at spatial +// dimensions. Halo exchange will happen on LHS only. +StatusOr +PartitionConvolutionWithSpatialDimensionHaloExchangeOnLHS( + PartitionedHlo lhs, PartitionedHlo rhs, HloInstruction* original_hlo, + int64 num_partitions, const SpmdPartitionerOptions& options, + HloInstruction* partition_id, HloModule* module, SpmdBuilder* b) { + TF_RET_CHECK(original_hlo->opcode() == HloOpcode::kConvolution); + TF_RET_CHECK(!lhs.sharding().IsTileMaximal() && + !rhs.sharding().IsTileMaximal()); + + const auto& dnums = original_hlo->convolution_dimension_numbers(); + + // Check if the operand shardings are aligned. Also we currently don't + // support partitioning non-spatial dimensions. + std::vector rhs_to_lhs_indices(original_hlo->shape().rank()); + rhs_to_lhs_indices[dnums.kernel_output_feature_dimension()] = + dnums.input_batch_dimension(); + rhs_to_lhs_indices[dnums.kernel_input_feature_dimension()] = + dnums.input_feature_dimension(); + for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { + rhs_to_lhs_indices[dnums.kernel_spatial_dimensions(i)] = + dnums.input_spatial_dimensions(i); + } + std::vector lhs_to_rhs_indices(original_hlo->shape().rank()); for (int64 i = 0; i < rhs_to_lhs_indices.size(); ++i) { lhs_to_rhs_indices[rhs_to_lhs_indices[i]] = i; } - Window window = hlo->window(); + Window window = original_hlo->window(); std::vector reversed_rhs_dims; for (int64 i = 0; i < window.dimensions_size(); ++i) { if (window.dimensions(i).window_reversal()) { @@ -71,7 +435,7 @@ Status SpmdPartitioningVisitor::HandleConvolutionTiledLhsAndRhs( // Make the reversed dims left-padded to prepare for window reversal. auto left_padded_rhs = HaloExchangeToPadOnLeft(rhs, reversed_rhs_dims); if (left_padded_rhs == nullptr) { - return DefaultAction(hlo); + return nullptr; } left_padded_rhs->set_sharding(rhs.sharding()); rhs = PartitionedHlo(left_padded_rhs, rhs.base_shape(), rhs.state()); @@ -93,17 +457,17 @@ Status SpmdPartitioningVisitor::HandleConvolutionTiledLhsAndRhs( dnums.kernel_output_feature_dimension()) != 1; }; - auto zero = b_.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(hlo->shape().element_type()))); + auto zero = b->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(original_hlo->shape().element_type()))); if (ShapeSizeInBytes(lhs.base_shape()) < ShapeSizeInBytes(rhs.base_shape())) { if (unsupported_sharding(aligned_lhs_sharding, rhs.sharding())) { - return DefaultAction(hlo); + return nullptr; } lhs = lhs.Reshard(aligned_lhs_sharding).PadWithValue(zero); rhs = rhs.PadWithValue(zero, reversed_rhs_dims); } else { if (unsupported_sharding(lhs.sharding(), aligned_rhs_sharding)) { - return DefaultAction(hlo); + return nullptr; } lhs = lhs.PadWithValue(zero); rhs = @@ -135,7 +499,7 @@ Status SpmdPartitioningVisitor::HandleConvolutionTiledLhsAndRhs( int64 shard_count = lhs.sharding().tile_assignment().dim(lhs_dimension); auto wd = window.dimensions(i); if (wd.base_dilation() != 1) { - return DefaultAction(hlo); + return nullptr; } int64 lhs_shard_size = @@ -147,12 +511,14 @@ Status SpmdPartitioningVisitor::HandleConvolutionTiledLhsAndRhs( rhs_shard_sizes[i] = rhs_shard_size; } - std::vector left_halo_size_functions(hlo->shape().rank()); - std::vector right_halo_size_functions(hlo->shape().rank()); + std::vector left_halo_size_functions( + original_hlo->shape().rank()); + std::vector right_halo_size_functions( + original_hlo->shape().rank()); Window new_window = window; auto partition_ordinals = - MakeTiledPartitionOrdinals(lhs.sharding(), partition_id_, &b_); + MakeTiledPartitionOrdinals(lhs.sharding(), partition_id, b); HloInstruction* lhs_with_halo = lhs.hlo(); for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { int64 lhs_dimension = dnums.input_spatial_dimensions(i); @@ -211,31 +577,173 @@ Status SpmdPartitioningVisitor::HandleConvolutionTiledLhsAndRhs( lhs_with_halo, lhs.base_shape(), left_halo_size_functions[dim], right_halo_size_functions[dim], explicit_left_padding_on_full_shape, padded_full_shape_size, shard_size_with_halo, dim, lhs.sharding(), - offset_on_padded_shape.Calculate(partition_ordinals[dim], &b_), zero, - partition_ordinals[dim], collective_ops_creator_, next_channel_id_, &b_, + offset_on_padded_shape.Calculate(partition_ordinals[dim], b), zero, + partition_ordinals[dim], lhs.state().collective_ops_creator, + lhs.state().next_channel_id, b, /*mask_invalid_region=*/false); if (!concat) { - return DefaultAction(hlo); + return nullptr; } lhs_with_halo = *concat; } - SetPartitionedHlo(hlo, [&]() { - auto conv = b_.AddInstruction(HloInstruction::CreateConvolve( - hlo->shape(), lhs_with_halo, rhs.hlo(), hlo->feature_group_count(), - hlo->batch_group_count(), new_window, - hlo->convolution_dimension_numbers(), hlo->precision_config())); - auto ar = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, conv, MakeBinaryAdd(hlo->shape().element_type(), module_), {}, - NewChannel()); - ar->set_sharding(HloSharding::Replicate()); - return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()) - .Reshard(hlo->sharding()) - .hlo(); - }); - return Status::OK(); + auto conv = b->AddInstruction(HloInstruction::CreateConvolve( + original_hlo->shape(), lhs_with_halo, rhs.hlo(), + original_hlo->feature_group_count(), original_hlo->batch_group_count(), + new_window, original_hlo->convolution_dimension_numbers(), + original_hlo->precision_config())); + auto ar = + lhs.state().collective_ops_creator.create_cross_partition_all_reduce( + b, conv, MakeBinaryAdd(original_hlo->shape().element_type(), module), + {}, (*lhs.state().next_channel_id)++); + ar->set_sharding(HloSharding::Replicate()); + return PartitionedHlo(ar, original_hlo->shape(), lhs.state()) + .Reshard(original_hlo->sharding()) + .hlo(); } +// Partition convolution when output is sharded. Will shard LHS with replicated +// RHS. +StatusOr PartitionConvolutionBaseCase( + PartitionedHlo lhs, PartitionedHlo rhs, HloInstruction* original_hlo, + int64 num_partitions, const SpmdPartitionerOptions& options, + HloInstruction* partition_id, HloModule* module, SpmdBuilder* b) { + TF_RET_CHECK(original_hlo->opcode() == HloOpcode::kConvolution); + const auto& dnums = original_hlo->convolution_dimension_numbers(); + const auto& sharding = original_hlo->sharding(); + TF_RET_CHECK(!sharding.IsTileMaximal()); + // We don't currently support sharding on output feature dimension. + if (sharding.tile_assignment().dim(dnums.output_feature_dimension()) > 1) { + return nullptr; + } + + // Check if the operand and the output sharding are aligned. + std::vector input_to_output_indices(original_hlo->shape().rank()); + input_to_output_indices[dnums.input_batch_dimension()] = + dnums.output_batch_dimension(); + input_to_output_indices[dnums.input_feature_dimension()] = + dnums.output_feature_dimension(); + for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { + input_to_output_indices[dnums.input_spatial_dimensions(i)] = + dnums.output_spatial_dimensions(i); + } + auto target_operand_sharding = + hlo_sharding_util::TransposeSharding(sharding, input_to_output_indices); + lhs = lhs.Reshard(target_operand_sharding); + + // Replicate the RHS. + rhs = rhs.Reshard(HloSharding::Replicate()); + + // Convolution window config does not include batch and feature dimensions, + // whereas ReshardAsWindowedInput() expects the same number of window + // dimensions as the rank of the operand. So add two more trivial + // dimensions. + std::vector ones(original_hlo->shape().rank(), 1); + auto operand_window = window_util::MakeWindow(ones); + for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { + *operand_window.mutable_dimensions(dnums.input_spatial_dimensions(i)) = + original_hlo->window().dimensions(i); + } + + auto zero = b->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(original_hlo->shape().element_type()))); + auto resharded_operand_and_window = + lhs.ReshardAsWindowedInput(operand_window, target_operand_sharding, zero); + if (!resharded_operand_and_window.has_value()) { + return nullptr; + } + Window new_window; + for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { + *new_window.add_dimensions() = + resharded_operand_and_window->shard_window.dimensions( + dnums.input_spatial_dimensions(i)); + } + TF_ASSIGN_OR_RETURN( + Shape sharded_conv_shape, + ShapeInference::InferConvolveShape( + resharded_operand_and_window->sharded_input->shape(), + rhs.hlo()->shape(), original_hlo->feature_group_count(), + original_hlo->batch_group_count(), new_window, dnums)); + auto shard_shape = + MakePartitionedShape(original_hlo->shape(), original_hlo->sharding()); + *sharded_conv_shape.mutable_layout() = shard_shape.layout(); + auto sharded_conv = b->AddInstruction(HloInstruction::CreateConvolve( + sharded_conv_shape, resharded_operand_and_window->sharded_input, + rhs.hlo(), original_hlo->feature_group_count(), + original_hlo->batch_group_count(), new_window, dnums, + original_hlo->precision_config())); + if (!resharded_operand_and_window->dynamic_slice_index_on_output + .has_value()) { + CHECK(ShapeUtil::Compatible(shard_shape, sharded_conv->shape())); + return sharded_conv; + } + return b->AddInstruction(HloInstruction::CreateDynamicSlice( + shard_shape, sharded_conv, + *resharded_operand_and_window->dynamic_slice_index_on_output, + shard_shape.dimensions())); +} + +// Partition convolution. +StatusOr PartitionConvolution( + PartitionedHlo lhs, PartitionedHlo rhs, HloInstruction* original_hlo, + int64 num_partitions, const SpmdPartitionerOptions& options, + HloInstruction* partition_id, HloModule* module, SpmdBuilder* b) { + TF_RET_CHECK(original_hlo->opcode() == HloOpcode::kConvolution); + + // Case 1: Either RHS or LHS is only partitioned at parallel dimensions. + TF_ASSIGN_OR_RETURN(auto parallel_partitioned_conv, + PartitionConvolutionWithParallelDimension( + lhs, rhs, original_hlo, num_partitions, options, + partition_id, module, b)); + if (parallel_partitioned_conv) { + return parallel_partitioned_conv; + } + + // Case 2: both RHS and LHS are tiled. + // Handling cases where both operands' shardings are aligned. We check that + // the LHS batch dimension is not partitioned because it is mapped to the + // output feature dimension in aligned_rhs_sharding, which are not the same + // dimension. + if (!lhs.sharding().IsTileMaximal() && !rhs.sharding().IsTileMaximal()) { + if (options.conv_halo_exchange_always_on_lhs) { + TF_ASSIGN_OR_RETURN( + auto partitioned_conv, + PartitionConvolutionWithSpatialDimensionHaloExchangeOnLHS( + lhs, rhs, original_hlo, num_partitions, options, partition_id, + module, b)); + if (partitioned_conv) { + return partitioned_conv; + } + } else { + TF_ASSIGN_OR_RETURN( + auto partitioned_conv, + PartitionConvolutionWithSpatialDimensionHaloExchangeOnRHS( + lhs, rhs, original_hlo, num_partitions, options, partition_id, + module, b)); + + if (partitioned_conv) { + return partitioned_conv; + } + } + } + + // Case 3: output is tiled. + const HloSharding& sharding = original_hlo->sharding(); + if (!sharding.IsTileMaximal()) { + TF_ASSIGN_OR_RETURN( + auto partitioned_conv, + PartitionConvolutionBaseCase(lhs, rhs, original_hlo, num_partitions, + options, partition_id, module, b)); + + if (partitioned_conv) { + return partitioned_conv; + } + } + return nullptr; +} + +} // namespace + Status SpmdPartitioningVisitor::HandleConvolution(HloInstruction* hlo) { auto dot_dnums = dot_as_convolution_util::ParseDotGeneralFromConvolution(hlo); if (dot_dnums) { @@ -279,413 +787,13 @@ Status SpmdPartitioningVisitor::HandleConvolution(HloInstruction* hlo) { auto lhs = GetPartitionedHlo(hlo->operand(0)); auto rhs = GetPartitionedHlo(hlo->operand(1)); - const HloSharding& sharding = hlo->sharding(); - const auto& dnums = hlo->convolution_dimension_numbers(); - std::vector rhs_to_lhs_indices(hlo->shape().rank()); - rhs_to_lhs_indices[dnums.kernel_output_feature_dimension()] = - dnums.input_batch_dimension(); - rhs_to_lhs_indices[dnums.kernel_input_feature_dimension()] = - dnums.input_feature_dimension(); - for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { - rhs_to_lhs_indices[dnums.kernel_spatial_dimensions(i)] = - dnums.input_spatial_dimensions(i); - } - std::vector lhs_to_rhs_indices(hlo->shape().rank()); - for (int64 i = 0; i < rhs_to_lhs_indices.size(); ++i) { - lhs_to_rhs_indices[rhs_to_lhs_indices[i]] = i; - } - auto aligned_rhs_sharding = - hlo_sharding_util::TransposeSharding(lhs.sharding(), rhs_to_lhs_indices); - auto aligned_lhs_sharding = - hlo_sharding_util::TransposeSharding(rhs.sharding(), lhs_to_rhs_indices); + TF_ASSIGN_OR_RETURN( + auto partitioned_conv, + PartitionConvolution(lhs, rhs, hlo, num_partitions_, options_, + partition_id_, module_, &b_)); - // Handling cases where all the partitioned dimensions are parallel - // dimensions. - int64 lhs_parallel_dim_partitions = 1; - int64 rhs_parallel_dim_partitions = 1; - std::vector parallel_spatial_dims; - for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { - int64 lhs_dim = dnums.input_spatial_dimensions(i); - int64 lhs_size = lhs.base_shape().dimensions(lhs_dim); - const auto& wd = hlo->window().dimensions(i); - int64 rhs_dim = dnums.kernel_spatial_dimensions(i); - // Only non reversal window is supported right now. - if (!wd.window_reversal() && - dot_as_convolution_util::ConvSpatialDimensionIsParallel(wd, lhs_size)) { - parallel_spatial_dims.emplace_back(i); - lhs_parallel_dim_partitions *= ShardCountAtDim(lhs.sharding(), lhs_dim); - rhs_parallel_dim_partitions *= ShardCountAtDim(rhs.sharding(), rhs_dim); - } - } - bool lhs_partition_dims_are_parallel = - (lhs_parallel_dim_partitions == num_partitions_); - bool rhs_partition_dims_are_parallel = - (rhs_parallel_dim_partitions == num_partitions_); - - // If there is a parallel dim and all the partitioned dimensions are parallel - // dimensions in either LHS or RHS, simply create partitioned convolutions. - if (!parallel_spatial_dims.empty() && - (lhs_partition_dims_are_parallel || rhs_partition_dims_are_parallel)) { - // Reshard LHS or RHS to partition at parallel dimensions as the other - // operand. - if (lhs_partition_dims_are_parallel) { - rhs = rhs.Reshard(aligned_rhs_sharding); - } else { - lhs = lhs.Reshard(aligned_lhs_sharding); - } - auto lhs_shard_shape = - MakePartitionedShape(lhs.base_shape(), lhs.sharding()); - auto rhs_shard_shape = - MakePartitionedShape(rhs.base_shape(), rhs.sharding()); - // Update convolution window. - auto new_window = hlo->window(); - for (const auto& spatial_dim : parallel_spatial_dims) { - auto wd = new_window.mutable_dimensions(spatial_dim); - wd->set_size(lhs_shard_shape.dimensions( - dnums.input_spatial_dimensions(spatial_dim))); - wd->set_stride(std::max(1, wd->size() - 1)); - wd->set_base_dilation(wd->size()); - } - TF_ASSIGN_OR_RETURN( - Shape sharded_conv_shape, - ShapeInference::InferConvolveShape( - lhs_shard_shape, rhs_shard_shape, hlo->feature_group_count(), - hlo->batch_group_count(), new_window, dnums)); - *sharded_conv_shape.mutable_layout() = hlo->shape().layout(); - SetPartitionedHlo(hlo, [&]() { - auto sharded_conv = b_.AddInstruction(HloInstruction::CreateConvolve( - sharded_conv_shape, lhs.hlo(), rhs.hlo(), hlo->feature_group_count(), - hlo->batch_group_count(), new_window, dnums, - hlo->precision_config())); - sharded_conv->set_sharding(hlo->sharding()); - return PartitionedHlo(sharded_conv, hlo->shape(), MakePartitioningState()) - .Reshard(hlo->sharding()) - .hlo(); - }); - return Status::OK(); - } - - // Handling cases where both operands' shardings are aligned. We check that - // the LHS batch dimension is not partitioned because it is mapped to the - // output feature dimension in aligned_rhs_sharding, which are not the same - // dimension. - if (!lhs.sharding().IsTileMaximal() && !rhs.sharding().IsTileMaximal()) { - if (options_.conv_halo_exchange_always_on_lhs) { - return HandleConvolutionTiledLhsAndRhs(hlo); - } else { - // Reshard RHS so that each shard computes the partial sum of the full - // shape result, and add AllReduce. See HandleConvolutionTiledLhsAndRhs() - // that reshards LHS. - // - // The size of halo on each dimension can be calculated from the - // projection onto the RHS that shard i needs to read. RHS and LHS below - // refers to the shard size of RHS and LHS, WC is the number of windows, - // and D is the window dilation. - // - // * offset(i): LHS * i + low_padding - (WC - 1) * stride - // * limit(i): LHS * (i + 1) + low_padding - // - // Since shard i has RHS of range [i * RHS * D, (i + 1) * RHS * D) - // * left-halo: i * RHS - offset(i) - // = i * (RHS * D - LHS) + (WC - 1) * stride - low_padding - // * right-halo: limit(i) - (i + 1) * RHS - // = (i + 1) * (LHS - RHS * D) + low_pading - - auto unsupported_sharding = [&](const HloSharding& lhs_sharding, - const HloSharding& rhs_sharding) { - // We currently don't support partitioning input batch or output feature - // dimensions. - return lhs_sharding.tile_assignment().dim( - dnums.input_batch_dimension()) != 1 || - rhs_sharding.tile_assignment().dim( - dnums.kernel_output_feature_dimension()) != 1; - }; - auto zero = b_.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(hlo->shape().element_type()))); - if (ShapeSizeInBytes(lhs.base_shape()) < - ShapeSizeInBytes(rhs.base_shape())) { - if (unsupported_sharding(aligned_lhs_sharding, rhs.sharding())) { - return DefaultAction(hlo); - } - lhs = lhs.Reshard(aligned_lhs_sharding).PadWithValue(zero); - rhs = rhs.PadWithValue(zero); - } else { - if (unsupported_sharding(lhs.sharding(), aligned_rhs_sharding)) { - return DefaultAction(hlo); - } - lhs = lhs.PadWithValue(zero); - rhs = rhs.Reshard(aligned_rhs_sharding).PadWithValue(zero); - } - - Window window = hlo->window(); - std::vector shard_counts(dnums.input_spatial_dimensions_size()); - std::vector lhs_shard_sizes(dnums.input_spatial_dimensions_size()); - std::vector rhs_shard_sizes(dnums.input_spatial_dimensions_size()); - for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { - int64 lhs_dimension = dnums.input_spatial_dimensions(i); - int64 rhs_dimension = dnums.kernel_spatial_dimensions(i); - int64 shard_count = rhs.sharding().tile_assignment().dim(rhs_dimension); - auto wd = window.dimensions(i); - if (wd.base_dilation() != 1 || wd.window_reversal()) { - return DefaultAction(hlo); - } - - int64 lhs_shard_size = CeilOfRatio( - lhs.base_shape().dimensions(lhs_dimension), shard_count); - int64 rhs_shard_size = CeilOfRatio( - rhs.base_shape().dimensions(rhs_dimension), shard_count); - shard_counts[i] = shard_count; - lhs_shard_sizes[i] = lhs_shard_size; - rhs_shard_sizes[i] = rhs_shard_size; - } - - std::vector left_halo_size_functions( - hlo->shape().rank()); - std::vector right_halo_size_functions( - hlo->shape().rank()); - Window new_window = window; - - // Data structures needed for Pad and DynamicSlice on LHS if needed. - bool need_dynamic_slice_lhs = false; - auto partition_ordinals = - MakeTiledPartitionOrdinals(lhs.sharding(), partition_id_, &b_); - std::vector zero_padding(hlo->shape().rank()); - PaddingConfig pad_config = - window_util::MakeSymmetricPadding(zero_padding); - auto zero_s32 = b_.AddInstruction( - HloInstruction::CreateConstant(LiteralUtil::Zero(S32))); - std::vector dynamic_slice_start_indices( - hlo->shape().rank(), zero_s32); - Shape dynamic_slice_shape = lhs.hlo()->shape(); - Shape pad_shape = lhs.hlo()->shape(); - - for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { - int64 lhs_dimension = dnums.input_spatial_dimensions(i); - int64 rhs_dimension = dnums.kernel_spatial_dimensions(i); - int64 lhs_shard_size = lhs_shard_sizes[i]; - int64 rhs_shard_size = rhs_shard_sizes[i]; - - if (shard_counts[i] == 1) { - continue; - } - - // Calculate the left and right halo sizes as described in the comments - // above. It calculcates the halo sizes with dilation, so we apply - // CeilOfRatio({left,right}_halo_size, window_dilation). - auto wd = window.dimensions(i); - int64 padding_low = wd.padding_low(); - int64 padding_high = wd.padding_high(); - int64 base = lhs.base_shape().dimensions(lhs_dimension); - int64 window_count = - 1 + (padding_low + padding_high + base - - (1 + (wd.size() - 1) * wd.window_dilation())) / - wd.stride(); - left_halo_size_functions[rhs_dimension] = - OffsetCalculation(MultiplyAddDivideOffsetCalculation( - rhs_shard_size * wd.window_dilation() - lhs_shard_size, - (window_count - 1) * wd.stride() - padding_low + - wd.window_dilation() - 1, - wd.window_dilation())); - right_halo_size_functions[rhs_dimension] = - OffsetCalculation(MultiplyAddDivideOffsetCalculation( - lhs_shard_size - rhs_shard_size * wd.window_dilation(), - lhs_shard_size - rhs_shard_size * wd.window_dilation() + - padding_low + wd.window_dilation() - 1, - wd.window_dilation())); - - // New RHS window size includes the maximum of both left and right - // halos. - int64 halo_size = left_halo_size_functions[rhs_dimension].MaxInRange( - 1, shard_counts[i]) + - right_halo_size_functions[rhs_dimension].MaxInRange( - 0, shard_counts[i] - 1); - int64 new_window_size = - rhs.hlo()->shape().dimensions(rhs_dimension) + halo_size; - - // The amount of new low padding could be dynamic (e.g., window_dilation - // != 1), which requires pad (to the maximum) and dynamic slice on LHS. - // - // If we consider the first window, the offset of the dilated RHS that - // aligns with the first valid LHS element for shard i is 'padding_low + - // LHS * i'. When the left halo is added to RHS, the offset of the first - // RHS element is (RHS * i - left_halo) * window_dilation. The - // difference between the two values is the amount of padding_low we - // need on LHS. - auto new_padding_low_function = - OffsetCalculation( - HloOpcode::kMultiply, left_halo_size_functions[rhs_dimension], - OffsetCalculation(MultiplyAddDivideOffsetCalculation( - 0, wd.window_dilation(), 1))) - - OffsetCalculation(MultiplyAddDivideOffsetCalculation( - rhs_shard_size * wd.window_dilation() - lhs_shard_size, - -padding_low, 1)); - - int64 new_padding_low_max = - new_padding_low_function.MaxInRange(0, shard_counts[i]); - int64 new_padding_low = new_padding_low_max; - int64 new_padding_high = window_count * wd.stride() + - (new_window_size - 1) * wd.window_dilation() - - new_padding_low - lhs_shard_size; - - // We do pad/dynamic-slice only when the padding is dynamic. - if (!new_padding_low_function.IsConstant()) { - need_dynamic_slice_lhs = true; - new_padding_low = 0; - pad_config.mutable_dimensions(lhs_dimension) - ->set_edge_padding_low(new_padding_low_max); - pad_config.mutable_dimensions(lhs_dimension) - ->set_edge_padding_high(new_padding_low_max); - pad_shape.set_dimensions(lhs_dimension, - lhs_shard_size + 2 * new_padding_low_max); - dynamic_slice_start_indices[lhs_dimension] = - (OffsetCalculation(MultiplyAddDivideOffsetCalculation( - 0, new_padding_low_max, 1)) - - new_padding_low_function) - .Calculate(partition_ordinals[lhs_dimension], &b_); - dynamic_slice_shape.set_dimensions( - lhs_dimension, lhs_shard_size + new_padding_low_max); - } - - // Since the convolution RHS operand size increased with halos, adjust - // the window config accordingly. - new_window.mutable_dimensions(i)->set_padding_low(new_padding_low); - new_window.mutable_dimensions(i)->set_padding_high(new_padding_high); - new_window.mutable_dimensions(i)->set_size( - rhs.hlo()->shape().dimensions(rhs_dimension) + halo_size); - } - - HloInstruction* conv_lhs = lhs.hlo(); - if (need_dynamic_slice_lhs) { - auto pad = b_.AddInstruction( - HloInstruction::CreatePad(pad_shape, lhs.hlo(), zero, pad_config)); - conv_lhs = b_.AddInstruction(HloInstruction::CreateDynamicSlice( - dynamic_slice_shape, pad, dynamic_slice_start_indices, - dynamic_slice_shape.dimensions())); - } - - // Exchange halo and concatenate. - HloInstruction* rhs_with_halo = rhs.hlo(); - for (int i = 0; i < dnums.kernel_spatial_dimensions_size(); ++i) { - int64 dim = dnums.kernel_spatial_dimensions(i); - int64 explicit_left_padding_on_full_shape = - left_halo_size_functions[dim].Calculate(0); - int64 shard_size_with_halo = new_window.dimensions(i).size(); - - // offset_on_padded_shape and padded_full_shape_size are needed only if - // we want to mask out-of-range values in ExchangeHaloAndGetValidData(). - // Since the default value for both the collective-permute is zero and - // also we call PadWithValue() on both operands at the beginning, we - // don't need to mask here. - // - // TODO(hyoulkee): Consider removing one of the two PadWithValue() calls - // if it's always safe. - auto offset_on_padded_shape = - OffsetCalculation(MultiplyAddDivideOffsetCalculation( - rhs_shard_sizes[i], explicit_left_padding_on_full_shape, 1)) - - left_halo_size_functions[dim]; - int64 padded_full_shape_size = - offset_on_padded_shape.Calculate(shard_counts[i] - 1) + - new_window.dimensions(i).size(); - auto concat = ExchangeHaloAndGetValidData( - rhs_with_halo, rhs.base_shape(), left_halo_size_functions[dim], - right_halo_size_functions[dim], explicit_left_padding_on_full_shape, - padded_full_shape_size, shard_size_with_halo, dim, rhs.sharding(), - offset_on_padded_shape.Calculate(partition_ordinals[dim], &b_), - zero, partition_ordinals[dim], collective_ops_creator_, - next_channel_id_, &b_, /*mask_invalid_region=*/false); - if (!concat) { - return DefaultAction(hlo); - } - rhs_with_halo = *concat; - } - - SetPartitionedHlo(hlo, [&]() { - auto conv = b_.AddInstruction(HloInstruction::CreateConvolve( - hlo->shape(), conv_lhs, rhs_with_halo, hlo->feature_group_count(), - hlo->batch_group_count(), new_window, dnums, - hlo->precision_config())); - auto ar = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, conv, MakeBinaryAdd(hlo->shape().element_type(), module_), {}, - NewChannel()); - ar->set_sharding(HloSharding::Replicate()); - return PartitionedHlo(ar, hlo->shape(), MakePartitioningState()) - .Reshard(hlo->sharding()) - .hlo(); - }); - return Status::OK(); - } - } - - if (!sharding.IsTileMaximal()) { - // We don't currently support sharding on output feature dimension. - if (sharding.tile_assignment().dim(dnums.output_feature_dimension()) > 1) { - return DefaultAction(hlo); - } - - // Check if the operand and the output sharding are aligned. - std::vector input_to_output_indices(hlo->shape().rank()); - input_to_output_indices[dnums.input_batch_dimension()] = - dnums.output_batch_dimension(); - input_to_output_indices[dnums.input_feature_dimension()] = - dnums.output_feature_dimension(); - for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { - input_to_output_indices[dnums.input_spatial_dimensions(i)] = - dnums.output_spatial_dimensions(i); - } - auto target_operand_sharding = - hlo_sharding_util::TransposeSharding(sharding, input_to_output_indices); - lhs = lhs.Reshard(target_operand_sharding); - - // Replicate the RHS. - rhs = rhs.Reshard(HloSharding::Replicate()); - - // Convolution window config does not include batch and feature dimensions, - // whereas ReshardAsWindowedInput() expects the same number of window - // dimensions as the rank of the operand. So add two more trivial - // dimensions. - std::vector ones(hlo->shape().rank(), 1); - auto operand_window = window_util::MakeWindow(ones); - for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { - *operand_window.mutable_dimensions(dnums.input_spatial_dimensions(i)) = - hlo->window().dimensions(i); - } - - auto zero = b_.AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(hlo->shape().element_type()))); - auto resharded_operand_and_window = lhs.ReshardAsWindowedInput( - operand_window, target_operand_sharding, zero); - if (!resharded_operand_and_window.has_value()) { - return DefaultAction(hlo); - } - Window new_window; - for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { - *new_window.add_dimensions() = - resharded_operand_and_window->shard_window.dimensions( - dnums.input_spatial_dimensions(i)); - } - TF_ASSIGN_OR_RETURN( - Shape sharded_conv_shape, - ShapeInference::InferConvolveShape( - resharded_operand_and_window->sharded_input->shape(), - rhs.hlo()->shape(), hlo->feature_group_count(), - hlo->batch_group_count(), new_window, dnums)); - auto shard_shape = MakePartitionedShape(hlo->shape(), hlo->sharding()); - *sharded_conv_shape.mutable_layout() = shard_shape.layout(); - SetPartitionedHlo(hlo, [&]() { - auto sharded_conv = b_.AddInstruction(HloInstruction::CreateConvolve( - sharded_conv_shape, resharded_operand_and_window->sharded_input, - rhs.hlo(), hlo->feature_group_count(), hlo->batch_group_count(), - new_window, dnums, hlo->precision_config())); - if (!resharded_operand_and_window->dynamic_slice_index_on_output - .has_value()) { - CHECK(ShapeUtil::Compatible(shard_shape, sharded_conv->shape())); - return sharded_conv; - } - return b_.AddInstruction(HloInstruction::CreateDynamicSlice( - shard_shape, sharded_conv, - *resharded_operand_and_window->dynamic_slice_index_on_output, - shard_shape.dimensions())); - }); + if (partitioned_conv) { + SetPartitionedHlo(hlo, [&] { return partitioned_conv; }); return Status::OK(); } return DefaultAction(hlo); diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h index d844ac3af1f..d6e6818608b 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h @@ -370,9 +370,6 @@ class SpmdPartitioningVisitor : public DfsHloVisitorWithDefault { Status HandleIota(HloInstruction* hlo) override; Status HandlePartitionId(HloInstruction* hlo) override; - // Handles convolution where both LHS and RHS operands are tiled. - Status HandleConvolutionTiledLhsAndRhs(HloInstruction* hlo); - // Implementation of dot partitioning given DotGeneralDimsMapping. Status HandleDotHelper( HloInstruction* hlo, const DotGeneralDimsMapping& dims_mapping, From 5c083e38528838843702e0581485461126587ac1 Mon Sep 17 00:00:00 2001 From: Peng Wang Date: Sat, 25 Jul 2020 20:14:58 -0700 Subject: [PATCH 1340/2522] Some internal change PiperOrigin-RevId: 323203954 Change-Id: I79368d3d25d355b7957f3996a48887b53f53b37c --- tensorflow/python/kernel_tests/random/BUILD | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/python/kernel_tests/random/BUILD b/tensorflow/python/kernel_tests/random/BUILD index 06360fc2095..31e0417102d 100644 --- a/tensorflow/python/kernel_tests/random/BUILD +++ b/tensorflow/python/kernel_tests/random/BUILD @@ -120,6 +120,9 @@ cuda_py_test( size = "medium", srcs = ["stateless_random_ops_test.py"], shard_count = 10, + tags = [ + "notap", # b/162112278 + ], tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", From 82e0f12b98c566a57b0c43332be2e6a35b99550e Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Sat, 25 Jul 2020 21:01:48 -0700 Subject: [PATCH 1341/2522] [BatchScheduler] Avoid mutex contention when deciding if a batch is empty. This is an optimization on read-path only. PiperOrigin-RevId: 323206654 Change-Id: I566f6be038c77b7c5a66aad7a036c9d696f28a17 --- .../kernels/batching_util/batch_scheduler.h | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/kernels/batching_util/batch_scheduler.h b/tensorflow/core/kernels/batching_util/batch_scheduler.h index bfafb5ed062..e993e592e12 100644 --- a/tensorflow/core/kernels/batching_util/batch_scheduler.h +++ b/tensorflow/core/kernels/batching_util/batch_scheduler.h @@ -128,6 +128,8 @@ class Batch { // The sum of the sizes of the tasks in 'tasks_'. size_t size_ TF_GUARDED_BY(mu_) = 0; + std::atomic empty_ TF_GUARDED_BY(mu_){true}; + // Whether the batch has been closed. Notification closed_; @@ -215,6 +217,7 @@ void Batch::AddTask(std::unique_ptr task) { mutex_lock l(mu_); size_ += task->size(); tasks_.push_back(std::move(task)); + empty_.store(false); } } @@ -228,6 +231,9 @@ std::unique_ptr Batch::RemoveTask() { std::unique_ptr task = std::move(tasks_.back()); size_ -= task->size(); tasks_.pop_back(); + if (tasks_.empty()) { + empty_.store(true); + } return task; } } @@ -241,16 +247,13 @@ int Batch::num_tasks() const { } template -bool Batch::empty() const { - { - // tracer is added to zoom in about this method. - // TODO(b/160249203): Remove tracer after evaluating a change to reduce - // lock contention and cpu usage (which is observed in profiler and - // very data-driven). - tensorflow::profiler::TraceMe tracer("BatchTask::empty"); - mutex_lock l(mu_); - return tasks_.empty(); - } +bool Batch::empty() const TF_NO_THREAD_SAFETY_ANALYSIS { + // tracer is added to zoom in about this method. + // TODO(b/160249203): Remove tracer after evaluating a change to reduce + // lock contention and cpu usage (which is observed in profiler and + // very data-driven). + tensorflow::profiler::TraceMe tracer("BatchTask::empty"); + return empty_.load(); } template From 53ae4101be4334c1596c2aed5f5040cdbb8c7386 Mon Sep 17 00:00:00 2001 From: Chao Mei Date: Sat, 25 Jul 2020 21:29:11 -0700 Subject: [PATCH 1342/2522] Add more details about how a new delegate could be plugged into TFLite kernels tests and tools using the dummy delegate as an example. PiperOrigin-RevId: 323208342 Change-Id: I55a6cbee80a4eb6ac873306fce30e8fce4d4aa93 --- .../delegates/utils/dummy_delegate/README.md | 118 ++++++++++++++++-- 1 file changed, 107 insertions(+), 11 deletions(-) diff --git a/tensorflow/lite/delegates/utils/dummy_delegate/README.md b/tensorflow/lite/delegates/utils/dummy_delegate/README.md index 48b70af4b76..e77d92b9d8a 100644 --- a/tensorflow/lite/delegates/utils/dummy_delegate/README.md +++ b/tensorflow/lite/delegates/utils/dummy_delegate/README.md @@ -1,14 +1,110 @@ -#Description -A dummy delegate implementation to illustrate +When speaking of a TFLite delegate, how to create it and how to reuse existing +TFLite testing and tooling with the new delegate are two major challenging +issues. Here, we show a dummy delegate implementation to illustrate our +recommended approaches to address these issues. -* How to use - [SimpleDelegateInterface and SimpleDelegateKernelInterface](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/utils/simple_delegate.h) - to faciliate a TFLite delegate creation. A more sophisticated example could - be referred to the - [Flex delegate](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/delegates/flex) +## Delegate Creation -* How to leverage the - [delegate registrar](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/delegates) - to plug in a delegate in TFLite benchmark and task evaluation tools. +We recommend using +[SimpleDelegateInterface and SimpleDelegateKernelInterface](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/utils/simple_delegate.h). +We believe such APIs will make it easier to create a TFLite delegate. At a high +level, developers only need to focus on -More detailed guide is coming soon. +* Whether a TFLite node in the graph is supported by the delegate or not. +* Given the set of supported nodes (i.e. a subgraph of the original model +graph), implement a delegate kernel that executes this set of nodes. + +The dummy delegate implementation here is a good starting point to understand +the ideas above. For more sophisticated examples, refer to [Flex delegate](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/delegates/flex), + [Hexagon delegate](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/delegates/hexagon). + +## Testing & Tooling + +We recommend levaraging the +[delegate registrar](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/delegates) +to plug in the newly created TFLite delegate to reuse existing TFLite kernel +tests and utility tools including the model benchmark tool and the task +evaluation tools. In short, create a delegate provider like the +[`dummy_delegate_provider`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/utils/dummy_delegate/dummy_delegate_provider.cc) +here, and then add it as an extra dependency when building the binary. Refer +[here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/delegates) +for more delegate provider examples. The following details the above in the +context of this dummy delegate. + +###Kernel Tests +Tests referred here are defined in [tensorflow/lite/kernels](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/kernels). +They are based on the + [test_util library](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/kernels/test_util.h) + and the [testing main function stub](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/kernels/test_main.cc). + +To plug in the newly created delegate and reuse these tests, simply add the +created delegate provider as an extra dependency to +[`test_util_delegate_providers`](https://github.com/tensorflow/tensorflow/blob/f09dc5cf6e7fde978f9891638f529cd52a3c878f/tensorflow/lite/kernels/BUILD#L203) +and remove others that are not relevant, like the following: + +``` +cc_library( + name = "tflite_driver_delegate_providers", + deps = [ + # Existing delegate providers that might be still relevant. + ":dummy_delegate_provider", + ], + alwayslink = 1, +) +``` + +Then build a kernel test, and specify the commandline flags defined in the +delegate provider when executing the test. Take this case as an example, + +``` +bazel build -c opt tensorflow/lite/kernels:add_test + +# Setting --use_dummy_delegate=true will apply the dummy delegate to the +# TFLite model graph +bazel-bin/tensorflow/lite/kernels/add_test --use_dummy_delegate=true +``` + +### Benchmark and Task Evaluation Tools + +In TFLite, we have developed +[model benchmark tool](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/benchmark) +and +[task evaluation tools](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/evaluation/tasks) +that already have integrated existing various TFLite delegates. To reuse these +tools for the new delegate, similar to the kernel testing above, we simply add +the created delegate provider as an additional dependency when building the +binary. See rules in the +[BUILD](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/utils/BUILD) +file for details. + +Take reusing the TFLite model benchmark tool as an example, after the delegate +provider is created, define the BUILD rule like the following: + +``` +cc_binary( + name = "benchmark_model_plus_dummy_delegate", + copts = tflite_copts(), + linkopts = task_linkopts(), + deps = [ + # Simply add the delegate provider as an extra dep. + ":dummy_delegate_provider", + "//tensorflow/lite/tools/benchmark:benchmark_model_main", + ], +) +``` + +Now build the binary, and specify the commandline flags defined in this new +delegate provider and others detailed in the benchmark model tool +[doc](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/tools/benchmark/README.md) +when running the benchmark tool like the following: + +``` +bazel build -c opt tensorflow/lite/delegates/utils/dummy_delegate:benchmark_model_plus_dummy_delegate + +# Setting --use_dummy_delegate=true will apply the dummy delegate to the +# TFLite model graph. +bazel-bin/tensorflow/lite/delegates/utils/dummy_delegate/benchmark_model_plus_dummy_delegate --graph=/tmp/mobilenet-v2.tflite --use_dummy_delegate=true + +``` + +More detailed guide on TFLite delegate is coming soon. From 03300ba696f102ab4dd11703a558f3ac65363e21 Mon Sep 17 00:00:00 2001 From: Chao Mei Date: Sat, 25 Jul 2020 21:37:14 -0700 Subject: [PATCH 1343/2522] Extract the KernelTestDelegateProviders into a separate library from test_util, and apply it in lite/testing/tflite_driver. PiperOrigin-RevId: 323208799 Change-Id: I047f9aa54e32263c0b21aa673bea8cc7de751ba7 --- tensorflow/lite/kernels/BUILD | 40 +++++++++-- .../lite/kernels/test_delegate_providers.cc | 57 +++++++++++++++ .../lite/kernels/test_delegate_providers.h | 71 +++++++++++++++++++ .../kernels/test_delegate_providers_test.cc | 50 +++++++++++++ tensorflow/lite/kernels/test_main.cc | 1 + tensorflow/lite/kernels/test_util.cc | 47 ++---------- tensorflow/lite/kernels/test_util.h | 48 ------------- tensorflow/lite/kernels/test_util_test.cc | 27 ------- tensorflow/lite/testing/BUILD | 21 +++++- .../testing/generated_examples_zip_test.cc | 22 +++--- tensorflow/lite/testing/tflite_driver.cc | 17 +++++ tensorflow/lite/testing/tflite_driver.h | 7 +- 12 files changed, 276 insertions(+), 132 deletions(-) create mode 100644 tensorflow/lite/kernels/test_delegate_providers.cc create mode 100644 tensorflow/lite/kernels/test_delegate_providers.h create mode 100644 tensorflow/lite/kernels/test_delegate_providers_test.cc diff --git a/tensorflow/lite/kernels/BUILD b/tensorflow/lite/kernels/BUILD index 4351a2c93a2..e9ac9110869 100644 --- a/tensorflow/lite/kernels/BUILD +++ b/tensorflow/lite/kernels/BUILD @@ -171,7 +171,7 @@ cc_library( deps = [ ":acceleration_test_util", ":builtin_ops", - ":test_util_delegate_providers", + ":test_delegate_providers_lib", "//tensorflow/core/platform:logging", "//tensorflow/lite:framework", "//tensorflow/lite:schema_fbs_version", @@ -189,7 +189,6 @@ cc_library( "//tensorflow/lite/tools:command_line_flags", "//tensorflow/lite/tools:logging", "//tensorflow/lite/tools:tool_params", - "//tensorflow/lite/tools/delegates:delegate_provider_hdr", "//tensorflow/lite/tools/optimize:quantization_utils", "//tensorflow/lite/tools/optimize/sparsity:format_converter", "//tensorflow/lite/tools/versioning", @@ -198,7 +197,8 @@ cc_library( ], ) -# A convenient library for tflite delegate execution providers. +# A convenient library of tflite delegate execution providers for kernel tests +# based on SingleOpModel or its derivatives defined in test_util.h/cc. cc_library( name = "test_util_delegate_providers", copts = tflite_copts(), @@ -220,13 +220,28 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "test_delegate_providers_lib", + srcs = ["test_delegate_providers.cc"], + hdrs = ["test_delegate_providers.h"], + copts = tflite_copts(), + deps = [ + "//tensorflow/lite/tools:command_line_flags", + "//tensorflow/lite/tools:logging", + "//tensorflow/lite/tools:tool_params", + "//tensorflow/lite/tools/delegates:delegate_provider_hdr", + ], +) + # TODO(b/132204084): Create tflite_cc_test rule to automate test_main inclusion. cc_library( name = "test_main", testonly = 1, srcs = ["test_main.cc"], deps = [ + ":test_delegate_providers_lib", ":test_util", + ":test_util_delegate_providers", "//tensorflow/lite/testing:util", "//tensorflow/lite/tools:command_line_flags", "@com_google_googletest//:gtest", @@ -456,6 +471,17 @@ cc_test( name = "test_util_test", size = "small", srcs = ["test_util_test.cc"], + deps = [ + ":test_util", + "//tensorflow/lite/testing:util", + "@com_google_googletest//:gtest", + ], +) + +cc_test( + name = "test_delegate_providers_lib_test", + size = "small", + srcs = ["test_delegate_providers_test.cc"], # See details in https://github.com/bazelbuild/bazel/issues/11552 to avoid # lazy symbol binding failure on macOS. linkstatic = select({ @@ -463,9 +489,11 @@ cc_test( "//conditions:default": False, }), deps = [ - ":test_util", - "//tensorflow/lite/testing:util", - "@com_google_googletest//:gtest", + ":test_delegate_providers_lib", + "//tensorflow/lite/tools/delegates:default_execution_provider", + "//tensorflow/lite/tools/delegates:nnapi_delegate_provider", + "//tensorflow/lite/tools/delegates:xnnpack_delegate_provider", + "@com_google_googletest//:gtest_main", ], ) diff --git a/tensorflow/lite/kernels/test_delegate_providers.cc b/tensorflow/lite/kernels/test_delegate_providers.cc new file mode 100644 index 00000000000..d2cb2d1021d --- /dev/null +++ b/tensorflow/lite/kernels/test_delegate_providers.cc @@ -0,0 +1,57 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/lite/kernels/test_delegate_providers.h" + +#include "tensorflow/lite/tools/command_line_flags.h" +#include "tensorflow/lite/tools/logging.h" + +namespace tflite { +/*static*/ KernelTestDelegateProviders* KernelTestDelegateProviders::Get() { + static KernelTestDelegateProviders* const providers = + new KernelTestDelegateProviders(); + return providers; +} + +KernelTestDelegateProviders::KernelTestDelegateProviders() { + for (const auto& one : tools::GetRegisteredDelegateProviders()) { + params_.Merge(one->DefaultParams()); + } +} + +bool KernelTestDelegateProviders::InitFromCmdlineArgs(int* argc, + const char** argv) { + std::vector flags; + for (const auto& one : tools::GetRegisteredDelegateProviders()) { + auto one_flags = one->CreateFlags(¶ms_); + flags.insert(flags.end(), one_flags.begin(), one_flags.end()); + } + return tflite::Flags::Parse(argc, argv, flags); +} + +std::vector +KernelTestDelegateProviders::CreateAllDelegates( + const tools::ToolParams& params) const { + std::vector delegates; + for (const auto& one : tools::GetRegisteredDelegateProviders()) { + auto ptr = one->CreateTfLiteDelegate(params); + // It's possible that a delegate of certain type won't be created as + // user-specified benchmark params tells not to. + if (ptr == nullptr) continue; + delegates.emplace_back(std::move(ptr)); + TFLITE_LOG(INFO) << one->GetName() << " delegate is created."; + } + return delegates; +} +} // namespace tflite diff --git a/tensorflow/lite/kernels/test_delegate_providers.h b/tensorflow/lite/kernels/test_delegate_providers.h new file mode 100644 index 00000000000..668441c6b77 --- /dev/null +++ b/tensorflow/lite/kernels/test_delegate_providers.h @@ -0,0 +1,71 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_TEST_DELEGATE_PROVIDERS_H_ +#define TENSORFLOW_LITE_KERNELS_TEST_DELEGATE_PROVIDERS_H_ + +#include + +#include "tensorflow/lite/tools/delegates/delegate_provider.h" +#include "tensorflow/lite/tools/tool_params.h" + +namespace tflite { +// A utility class to provide TfLite delegate creations for kernel tests. The +// options of a particular delegate could be specified from commandline flags by +// using the delegate provider registrar as implemented in lite/tools/delegates +// directory. +class KernelTestDelegateProviders { + public: + // Returns a global KernelTestDelegateProviders instance. + static KernelTestDelegateProviders* Get(); + + KernelTestDelegateProviders(); + + // Initialize delegate-related parameters from commandline arguments and + // returns true if successful. + bool InitFromCmdlineArgs(int* argc, const char** argv); + + // This provides a way to overwrite parameter values programmatically before + // creating TfLite delegates. Note, changes to the returned ToolParams will + // have a global impact on creating TfLite delegates. + // If a local-only change is preferred, recommend using the following workflow + // create TfLite delegates via delegate providers: + // tools::ToolParams local_params; + // local_params.Merge(KernelTestDelegateProviders::Get()->ConstParams()); + // Overwrite params in local_params by calling local_params.Set<...>(...); + // Get TfLite delegates via + // KernelTestDelegateProviders::Get()->CreateAllDelegates(local_params); + tools::ToolParams* MutableParams() { return ¶ms_; } + const tools::ToolParams& ConstParams() const { return params_; } + + // Create a list of TfLite delegates based on the provided parameters + // `params`. + std::vector CreateAllDelegates( + const tools::ToolParams& params) const; + + // Similar to the above, but creating a list of TfLite delegates based on what + // have been initialized (i.e. 'params_'). + std::vector CreateAllDelegates() const { + return CreateAllDelegates(params_); + } + + private: + // Contain delegate-related parameters that are initialized from command-line + // flags. + tools::ToolParams params_; +}; + +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_TEST_DELEGATE_PROVIDERS_H_ diff --git a/tensorflow/lite/kernels/test_delegate_providers_test.cc b/tensorflow/lite/kernels/test_delegate_providers_test.cc new file mode 100644 index 00000000000..8ec09f5bf25 --- /dev/null +++ b/tensorflow/lite/kernels/test_delegate_providers_test.cc @@ -0,0 +1,50 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/lite/kernels/test_delegate_providers.h" + +#include +#include + +namespace tflite { +namespace { +TEST(KernelTestDelegateProvidersTest, DelegateProvidersParams) { + KernelTestDelegateProviders providers; + const auto& params = providers.ConstParams(); + EXPECT_TRUE(params.HasParam("use_xnnpack")); + EXPECT_TRUE(params.HasParam("use_nnapi")); + + int argc = 3; + const char* argv[] = {"program_name", "--use_nnapi=true", + "--other_undefined_flag=1"}; + EXPECT_TRUE(providers.InitFromCmdlineArgs(&argc, argv)); + EXPECT_TRUE(params.Get("use_nnapi")); + EXPECT_EQ(2, argc); + EXPECT_EQ("--other_undefined_flag=1", argv[1]); +} + +TEST(KernelTestDelegateProvidersTest, CreateTfLiteDelegates) { +#if !defined(__Fuchsia__) && !defined(TFLITE_WITHOUT_XNNPACK) + KernelTestDelegateProviders providers; + providers.MutableParams()->Set("use_xnnpack", true); + EXPECT_GE(providers.CreateAllDelegates().size(), 1); + + tools::ToolParams local_params; + local_params.Merge(providers.ConstParams()); + local_params.Set("use_xnnpack", false); + EXPECT_TRUE(providers.CreateAllDelegates(local_params).empty()); +#endif +} +} // namespace +} // namespace tflite diff --git a/tensorflow/lite/kernels/test_main.cc b/tensorflow/lite/kernels/test_main.cc index dd8fb0405ab..a1b1a913281 100644 --- a/tensorflow/lite/kernels/test_main.cc +++ b/tensorflow/lite/kernels/test_main.cc @@ -15,6 +15,7 @@ limitations under the License. #include #include +#include "tensorflow/lite/kernels/test_delegate_providers.h" #include "tensorflow/lite/kernels/test_util.h" #include "tensorflow/lite/testing/util.h" #include "tensorflow/lite/tools/command_line_flags.h" diff --git a/tensorflow/lite/kernels/test_util.cc b/tensorflow/lite/kernels/test_util.cc index 80ed66a4340..c3a40252cfb 100644 --- a/tensorflow/lite/kernels/test_util.cc +++ b/tensorflow/lite/kernels/test_util.cc @@ -40,12 +40,12 @@ limitations under the License. #include "tensorflow/lite/interpreter.h" #include "tensorflow/lite/kernels/acceleration_test_util.h" #include "tensorflow/lite/kernels/register.h" +#include "tensorflow/lite/kernels/test_delegate_providers.h" #include "tensorflow/lite/model.h" #include "tensorflow/lite/nnapi/nnapi_implementation.h" #include "tensorflow/lite/schema/schema_generated.h" #include "tensorflow/lite/string_type.h" #include "tensorflow/lite/string_util.h" -#include "tensorflow/lite/tools/command_line_flags.h" #include "tensorflow/lite/tools/logging.h" #include "tensorflow/lite/tools/versioning/op_version.h" #include "tensorflow/lite/version.h" @@ -234,8 +234,12 @@ void SingleOpModel::BuildInterpreter( // static bool SingleOpModel::GetForceUseNnapi() { - return tflite::KernelTestDelegateProviders::Get()->ConstParams().Get( - "use_nnapi"); + const auto& delegate_params = + tflite::KernelTestDelegateProviders::Get()->ConstParams(); + // It's possible this library isn't linked with the nnapi delegate provider + // lib. + return delegate_params.HasParam("use_nnapi") && + delegate_params.Get("use_nnapi"); } int32_t SingleOpModel::GetTensorSize(int index) const { @@ -374,41 +378,4 @@ void MultiOpModel::AddCustomOp( builder_.CreateVector(custom_option), CustomOptionsFormat_FLEXBUFFERS)); } - -/*static*/ KernelTestDelegateProviders* KernelTestDelegateProviders::Get() { - static KernelTestDelegateProviders* const providers = - new KernelTestDelegateProviders(); - return providers; -} - -KernelTestDelegateProviders::KernelTestDelegateProviders() { - for (const auto& one : tools::GetRegisteredDelegateProviders()) { - params_.Merge(one->DefaultParams()); - } -} - -bool KernelTestDelegateProviders::InitFromCmdlineArgs(int* argc, - const char** argv) { - std::vector flags; - for (const auto& one : tools::GetRegisteredDelegateProviders()) { - auto one_flags = one->CreateFlags(¶ms_); - flags.insert(flags.end(), one_flags.begin(), one_flags.end()); - } - return tflite::Flags::Parse(argc, argv, flags); -} - -std::vector -KernelTestDelegateProviders::CreateAllDelegates( - const tools::ToolParams& params) const { - std::vector delegates; - for (const auto& one : tools::GetRegisteredDelegateProviders()) { - auto ptr = one->CreateTfLiteDelegate(params); - // It's possible that a delegate of certain type won't be created as - // user-specified benchmark params tells not to. - if (ptr == nullptr) continue; - delegates.emplace_back(std::move(ptr)); - TFLITE_LOG(INFO) << one->GetName() << " delegate is created."; - } - return delegates; -} } // namespace tflite diff --git a/tensorflow/lite/kernels/test_util.h b/tensorflow/lite/kernels/test_util.h index 27b59cf3c4c..c08a40f06a8 100644 --- a/tensorflow/lite/kernels/test_util.h +++ b/tensorflow/lite/kernels/test_util.h @@ -45,10 +45,8 @@ limitations under the License. #include "tensorflow/lite/string_type.h" #include "tensorflow/lite/string_util.h" #include "tensorflow/lite/testing/util.h" // IWYU pragma: keep -#include "tensorflow/lite/tools/delegates/delegate_provider.h" #include "tensorflow/lite/tools/optimize/quantization_utils.h" #include "tensorflow/lite/tools/optimize/sparsity/format_converter.h" -#include "tensorflow/lite/tools/tool_params.h" #include "tensorflow/lite/type_to_tflitetype.h" namespace tflite { @@ -899,52 +897,6 @@ class MultiOpModel : public SingleOpModel { return AddTensor(t, {}, false); } }; - -// A utility class to provide TfLite delegate creations for kernel tests. The -// options of a particular delegate could be specified from commandline flags by -// using the delegate provider registrar as implemented in lite/tools/delegates -// directory. -class KernelTestDelegateProviders { - public: - // Returns a global KernelTestDelegateProviders instance. - static KernelTestDelegateProviders* Get(); - - KernelTestDelegateProviders(); - - // Initialize delegate-related parameters from commandline arguments and - // returns true if successful. - bool InitFromCmdlineArgs(int* argc, const char** argv); - - // This provides a way to overwrite parameter values programmatically before - // creating TfLite delegates. Note, changes to the returned ToolParams will - // have a global impact on creating TfLite delegates. - // If a local-only change is preferred, recommend using the following workflow - // create TfLite delegates via delegate providers: - // tools::ToolParams local_params; - // local_params.Merge(KernelTestDelegateProviders::Get()->ConstParams()); - // Overwrite params in local_params by calling local_params.Set<...>(...); - // Get TfLite delegates via - // KernelTestDelegateProviders::Get()->CreateAllDelegates(local_params); - tools::ToolParams* MutableParams() { return ¶ms_; } - const tools::ToolParams& ConstParams() const { return params_; } - - // Create a list of TfLite delegates based on the provided parameters - // `params`. - std::vector CreateAllDelegates( - const tools::ToolParams& params) const; - - // Similar to the above, but creating a list of TfLite delegates based on what - // have been initialized (i.e. 'params_'). - std::vector CreateAllDelegates() const { - return CreateAllDelegates(params_); - } - - private: - // Contain delegate-related parameters that are initialized from command-line - // flags. - tools::ToolParams params_; -}; - } // namespace tflite #endif // TENSORFLOW_LITE_KERNELS_TEST_UTIL_H_ diff --git a/tensorflow/lite/kernels/test_util_test.cc b/tensorflow/lite/kernels/test_util_test.cc index 88dca69b614..e6f865f6cd6 100644 --- a/tensorflow/lite/kernels/test_util_test.cc +++ b/tensorflow/lite/kernels/test_util_test.cc @@ -47,33 +47,6 @@ TEST(TestUtilTest, QuantizeVectorScalingUp) { EXPECT_THAT(q_data, ElementsAreArray(expected)); } -TEST(KernelTestDelegateProvidersTest, DelegateProvidersParams) { - KernelTestDelegateProviders providers; - const auto& params = providers.ConstParams(); - EXPECT_TRUE(params.HasParam("use_xnnpack")); - EXPECT_TRUE(params.HasParam("use_nnapi")); - - int argc = 3; - const char* argv[] = {"program_name", "--use_nnapi=true", - "--other_undefined_flag=1"}; - EXPECT_TRUE(providers.InitFromCmdlineArgs(&argc, argv)); - EXPECT_TRUE(params.Get("use_nnapi")); - EXPECT_EQ(2, argc); - EXPECT_EQ("--other_undefined_flag=1", argv[1]); -} - -TEST(KernelTestDelegateProvidersTest, CreateTfLiteDelegates) { -#if !defined(__Fuchsia__) && !defined(TFLITE_WITHOUT_XNNPACK) - KernelTestDelegateProviders providers; - providers.MutableParams()->Set("use_xnnpack", true); - EXPECT_GE(providers.CreateAllDelegates().size(), 1); - - tools::ToolParams local_params; - local_params.Merge(providers.ConstParams()); - local_params.Set("use_xnnpack", false); - EXPECT_TRUE(providers.CreateAllDelegates(local_params).empty()); -#endif -} } // namespace } // namespace tflite diff --git a/tensorflow/lite/testing/BUILD b/tensorflow/lite/testing/BUILD index 7fe2cfde439..d0744c49445 100644 --- a/tensorflow/lite/testing/BUILD +++ b/tensorflow/lite/testing/BUILD @@ -59,12 +59,14 @@ exports_files([ deps = [ ":parse_testdata_lib", ":tflite_driver", + ":tflite_driver_delegate_providers", ":util", "@com_google_googletest//:gtest", "@com_googlesource_code_re2//:re2", "//tensorflow/lite:builtin_op_data", "//tensorflow/lite:framework", "//tensorflow/lite/kernels:builtin_ops", + "//tensorflow/lite/kernels:test_delegate_providers_lib", ] + select({ "//conditions:default": [ "//tensorflow/core:framework_internal", @@ -229,8 +231,9 @@ cc_library( "//tensorflow/lite:string_util", "//tensorflow/lite/kernels:builtin_ops", "//tensorflow/lite/kernels:custom_ops", - "//tensorflow/lite/kernels/hashtable:hashtable_op_kernels", "//tensorflow/lite/kernels:reference_ops", + "//tensorflow/lite/kernels:test_delegate_providers_lib", + "//tensorflow/lite/kernels/hashtable:hashtable_op_kernels", "//tensorflow/lite/tools/evaluation:utils", ] + select({ "//tensorflow:ios": [], @@ -238,6 +241,22 @@ cc_library( }), ) +# A convenient library of tflite delegate execution providers for tests based +# on the `tflite_driver` library. +cc_library( + name = "tflite_driver_delegate_providers", + deps = [ + "//tensorflow/lite/tools/delegates:coreml_delegate_provider", + "//tensorflow/lite/tools/delegates:default_execution_provider", + "//tensorflow/lite/tools/delegates:external_delegate_provider", + "//tensorflow/lite/tools/delegates:gpu_delegate_provider", + "//tensorflow/lite/tools/delegates:hexagon_delegate_provider", + "//tensorflow/lite/tools/delegates:nnapi_delegate_provider", + "//tensorflow/lite/tools/delegates:xnnpack_delegate_provider", + ], + alwayslink = 1, +) + tf_cc_test( name = "tflite_driver_test", size = "small", diff --git a/tensorflow/lite/testing/generated_examples_zip_test.cc b/tensorflow/lite/testing/generated_examples_zip_test.cc index 92f696d0e65..8d03911eb87 100644 --- a/tensorflow/lite/testing/generated_examples_zip_test.cc +++ b/tensorflow/lite/testing/generated_examples_zip_test.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/core/platform/status.h" #include "tensorflow/core/platform/subprocess.h" #include "tensorflow/core/util/command_line_flags.h" +#include "tensorflow/lite/kernels/test_delegate_providers.h" #include "tensorflow/lite/testing/parse_testdata.h" #include "tensorflow/lite/testing/tflite_driver.h" #include "tensorflow/lite/testing/util.h" @@ -47,7 +48,6 @@ string* FLAGS_tar_binary_path = new string("/bin/tar"); string* FLAGS_unzip_binary_path = new string("/system/bin/unzip"); string* FLAGS_tar_binary_path = new string("/system/bin/tar"); #endif -bool FLAGS_use_nnapi = false; bool FLAGS_ignore_unsupported_nnapi = false; } // namespace @@ -298,9 +298,10 @@ TEST_P(OpsTest, RunZipTests) { std::ifstream tflite_stream(tflite_test_case); ASSERT_TRUE(tflite_stream.is_open()) << tflite_test_case; - tflite::testing::TfLiteDriver test_driver( - FLAGS_use_nnapi ? TfLiteDriver::DelegateType::kNnapi - : TfLiteDriver::DelegateType::kNone); + tflite::testing::TfLiteDriver test_driver; + const bool use_nnapi = + tflite::KernelTestDelegateProviders::Get()->ConstParams().Get( + "use_nnapi"); auto quantized_tests_error = GetQuantizeTestsError(); bool fully_quantize = false; @@ -317,7 +318,7 @@ TEST_P(OpsTest, RunZipTests) { test_driver.SetModelBaseDir(tflite_dir); auto broken_tests = GetKnownBrokenTests(); - if (FLAGS_use_nnapi) { + if (use_nnapi) { auto kBrokenNnapiTests = GetKnownBrokenNnapiTests(); broken_tests.insert(kBrokenNnapiTests.begin(), kBrokenNnapiTests.end()); } @@ -334,7 +335,7 @@ TEST_P(OpsTest, RunZipTests) { } } if (bug_number.empty()) { - if (FLAGS_use_nnapi && FLAGS_ignore_unsupported_nnapi && !result) { + if (use_nnapi && FLAGS_ignore_unsupported_nnapi && !result) { EXPECT_EQ(message, string("Failed to invoke interpreter")) << message; } else { EXPECT_TRUE(result) << message; @@ -408,8 +409,6 @@ int main(int argc, char** argv) { tensorflow::Flag("tar_binary_path", tflite::testing::FLAGS_tar_binary_path, "Location of a suitable tar binary."), - tensorflow::Flag("use_nnapi", &tflite::testing::FLAGS_use_nnapi, - "Whether to enable the NNAPI delegate"), tensorflow::Flag("ignore_unsupported_nnapi", &tflite::testing::FLAGS_ignore_unsupported_nnapi, "Don't fail tests just because delegation to NNAPI " @@ -417,7 +416,12 @@ int main(int argc, char** argv) { bool success = tensorflow::Flags::Parse(&argc, argv, flags); if (!success || (argc == 2 && !strcmp(argv[1], "--helpfull"))) { fprintf(stderr, "%s", tensorflow::Flags::Usage(argv[0], flags).c_str()); - return 1; + return EXIT_FAILURE; + } + + if (!tflite::testing::TfLiteDriver::InitTestDelegateProviders( + &argc, const_cast(argv))) { + return EXIT_FAILURE; } ::tflite::LogToStderr(); diff --git a/tensorflow/lite/testing/tflite_driver.cc b/tensorflow/lite/testing/tflite_driver.cc index ae352ce04c4..526b3968b21 100644 --- a/tensorflow/lite/testing/tflite_driver.cc +++ b/tensorflow/lite/testing/tflite_driver.cc @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/lite/kernels/hashtable/hashtable_ops.h" #include "tensorflow/lite/kernels/register.h" #include "tensorflow/lite/kernels/register_ref.h" +#include "tensorflow/lite/kernels/test_delegate_providers.h" #include "tensorflow/lite/string_util.h" #include "tensorflow/lite/testing/join.h" #include "tensorflow/lite/testing/split.h" @@ -346,6 +347,12 @@ bool TfLiteDriver::DataExpectation::Check(bool verbose, } } +/* static */ +bool TfLiteDriver::InitTestDelegateProviders(int* argc, const char** argv) { + return tflite::KernelTestDelegateProviders::Get()->InitFromCmdlineArgs(argc, + argv); +} + TfLiteDriver::TfLiteDriver(DelegateType delegate_type, bool reference_kernel) : delegate_(nullptr, nullptr), relative_threshold_(kRelativeThreshold), @@ -414,6 +421,16 @@ void TfLiteDriver::LoadModel(const string& bin_file_path) { Invalidate("Unable to the build graph using the delegate"); return; } + } else { + auto* delegate_providers = tflite::KernelTestDelegateProviders::Get(); + for (auto& one : delegate_providers->CreateAllDelegates()) { + if (interpreter_->ModifyGraphWithDelegate(std::move(one)) != kTfLiteOk) { + Invalidate( + "Unable to the build graph using the delegate initialized from " + "tflite::KernelTestDelegateProviders"); + return; + } + } } must_allocate_tensors_ = true; diff --git a/tensorflow/lite/testing/tflite_driver.h b/tensorflow/lite/testing/tflite_driver.h index bce3e9c4c01..1d7095efdb4 100644 --- a/tensorflow/lite/testing/tflite_driver.h +++ b/tensorflow/lite/testing/tflite_driver.h @@ -40,10 +40,15 @@ class TfLiteDriver : public TestRunner { kFlex, }; + // Initialize the global test delegate providers from commandline arguments + // and returns true if successful. + static bool InitTestDelegateProviders(int* argc, const char** argv); + /** * Creates a new TfLiteDriver * @param delegate The (optional) delegate to use. - * @param reference_kernel Whether to use the builtin reference kernel ops. + * @param reference_kernel Whether to use the builtin reference kernel + * ops. */ explicit TfLiteDriver(DelegateType delegate_type = DelegateType::kNone, bool reference_kernel = false); From 0126eabd3aae4fecdee62b523993f45babc681e6 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Sat, 25 Jul 2020 22:15:01 -0700 Subject: [PATCH 1344/2522] Internal change PiperOrigin-RevId: 323210716 Change-Id: I46118381520dfdfd9360d4f9744dda8fae44dfaf --- third_party/mlir/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD index ae413a160d9..9245404a8c9 100644 --- a/third_party/mlir/BUILD +++ b/third_party/mlir/BUILD @@ -740,6 +740,7 @@ cc_library( ":MLIRShapeCanonicalizationIncGen", ":ShapeOpsIncGen", ":SideEffectInterfaces", + ":StandardOps", ":Support", "@llvm-project//llvm:Support", ], From 6a102105aa745660e6611193007803b999289468 Mon Sep 17 00:00:00 2001 From: Jing Pu Date: Sat, 25 Jul 2020 23:35:16 -0700 Subject: [PATCH 1345/2522] Make the trait of TFL_BatchMatMulOp quantization aware. PiperOrigin-RevId: 323214973 Change-Id: I8a85ed8e1c4d0c5a5fcabaeb94b0f1c241ceb266 --- tensorflow/compiler/mlir/lite/ir/tfl_ops.td | 5 ++++- tensorflow/compiler/mlir/lite/tests/ops.mlir | 7 +++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td index 715d047f0bf..6dc9fda656f 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td @@ -943,7 +943,10 @@ def TFL_BatchMatMulOp : TFL_Op<"batch_matmul", [ NoSideEffect, TFL_OperandHasAtleastRank<0, 2>, TFL_OperandHasAtleastRank<1, 2>, - SameOperandsAndResultElementType]> { + PredOpTrait<"x and output must have same element type", + TFL_TCresVTEtIsSameAsOp<0, 0>>, + PredOpTrait<"y and output must have same element type", + TFL_TCresVTEtIsSameAsOp<0, 1>>]> { let summary = "Batch Matrix Multiply Operator"; diff --git a/tensorflow/compiler/mlir/lite/tests/ops.mlir b/tensorflow/compiler/mlir/lite/tests/ops.mlir index 06e05987ee6..7ef6997f938 100644 --- a/tensorflow/compiler/mlir/lite/tests/ops.mlir +++ b/tensorflow/compiler/mlir/lite/tests/ops.mlir @@ -1248,6 +1248,13 @@ func @testSpaceToBatchND(%arg0 : tensor<1x4x4x3xf32>, %arg1 : tensor<2xi32>, %ar // ----- +func @testBatchMatmulQuant(%arg0 : tensor<1x4x384x32x!quant.uniform>, %arg1 : tensor<1x4x384x32x!quant.uniform>) -> tensor<1x4x384x384x!quant.uniform> { + // CHECK: "tfl.batch_matmul"(%arg0, %arg1) + %0 = "tfl.batch_matmul"(%arg0, %arg1) {adj_x = false, adj_y = true} : (tensor<1x4x384x32x!quant.uniform>, tensor<1x4x384x32x!quant.uniform>) -> tensor<1x4x384x384x!quant.uniform> + return %0 : tensor<1x4x384x384x!quant.uniform> +} +// ----- + func @testConcat(%arg0: tensor<1x2xi32>, %arg1: tensor<1x2xi32>) -> tensor<2x2xi32> { // CHECK: "tfl.concatenation"(%arg0, %arg1) {axis = 0 : i32, fused_activation_function = "NONE"} %0 = "tfl.concatenation"(%arg0, %arg1) {axis = 0 : i32, fused_activation_function = "NONE"} : (tensor<1x2xi32>, tensor<1x2xi32>) -> tensor<2x2xi32> From a0a9cf2574257c368a2d57505b51470536364edb Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Sun, 26 Jul 2020 01:16:02 -0700 Subject: [PATCH 1346/2522] [XLA:SPMD] Handle replicated operands in recursive Dot partitioning Also includes utilities to partially shard data into groups (group-level dynamic slice). PiperOrigin-RevId: 323221306 Change-Id: If6e727270613210bee83b521d5254fde94d8cf0b --- tensorflow/compiler/xla/service/spmd/BUILD | 1 + .../compiler/xla/service/spmd/dot_handler.cc | 227 +++++++++++++----- .../xla/service/spmd/spmd_partitioner_test.cc | 87 +++++++ .../xla/service/spmd/spmd_partitioner_util.cc | 42 ++++ .../xla/service/spmd/spmd_partitioner_util.h | 8 + 5 files changed, 308 insertions(+), 57 deletions(-) diff --git a/tensorflow/compiler/xla/service/spmd/BUILD b/tensorflow/compiler/xla/service/spmd/BUILD index a67e4cf55c5..ce19934bb88 100644 --- a/tensorflow/compiler/xla/service/spmd/BUILD +++ b/tensorflow/compiler/xla/service/spmd/BUILD @@ -51,6 +51,7 @@ cc_library( "//tensorflow/core/platform:numbers", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", diff --git a/tensorflow/compiler/xla/service/spmd/dot_handler.cc b/tensorflow/compiler/xla/service/spmd/dot_handler.cc index 8fea788b1b7..16c892c74f7 100644 --- a/tensorflow/compiler/xla/service/spmd/dot_handler.cc +++ b/tensorflow/compiler/xla/service/spmd/dot_handler.cc @@ -14,6 +14,9 @@ limitations under the License. ==============================================================================*/ #include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/types/optional.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" @@ -608,6 +611,8 @@ StatusOr PartitionDotGroupOnBatch( PartitionedHlo lhs, PartitionedHlo rhs, const Shape& output_base_shape, const HloSharding& output_sharding, const DotGeneralDimsMapping& dims_mapping, int64 num_partitions, + int64 lhs_contracting_partitions, int64 rhs_contracting_partitions, + int64 lhs_non_contracting_partitions, int64 rhs_non_contracting_partitions, const std::function( HloInstruction*, HloInstruction*, SpmdBuilder*)>& create_sharded_dot, HloModule* module, HloInstruction* original_hlo, @@ -618,9 +623,13 @@ StatusOr PartitionDotGroupOnBatch( std::vector rhs_dims; std::vector output_dims; auto lhs_sharding_dims_adjusted_to_output = - lhs.sharding().tile_assignment().dimensions(); + lhs.sharding().IsReplicated() + ? std::vector(lhs.base_shape().rank(), 1) + : lhs.sharding().tile_assignment().dimensions(); auto rhs_sharding_dims_adjusted_to_output = - lhs.sharding().tile_assignment().dimensions(); + rhs.sharding().IsReplicated() + ? std::vector(rhs.base_shape().rank(), 1) + : rhs.sharding().tile_assignment().dimensions(); auto output_sharding_dims_adjusted_to_lhs = output_sharding.tile_assignment().dimensions(); bool lhs_rhs_dims_matching = true; @@ -639,10 +648,14 @@ StatusOr PartitionDotGroupOnBatch( output_sharding_dims_adjusted_to_lhs[dim.output] = lhs.sharding().tile_assignment().dim(dim.lhs); } - auto lhs_grouped = GroupShardingOnDims(lhs.sharding(), lhs_dims); - auto rhs_grouped = GroupShardingOnDims(rhs.sharding(), rhs_dims); auto output_grouped = GroupShardingOnDims(output_sharding, output_dims); + PartitionedHlo per_group_lhs = lhs; + PartitionedHlo per_group_rhs = rhs; + auto lhs_sharding = lhs.sharding(); + auto rhs_sharding = rhs.sharding(); if (lhs_rhs_dims_matching) { + auto lhs_grouped = GroupShardingOnDims(lhs.sharding(), lhs_dims); + auto rhs_grouped = GroupShardingOnDims(rhs.sharding(), rhs_dims); if (ShapeUtil::ByteSizeOf(lhs.base_shape()) > ShapeUtil::ByteSizeOf(rhs.base_shape())) { rhs_grouped = AlignGroupsWith(std::move(rhs_grouped), lhs_grouped); @@ -657,42 +670,136 @@ StatusOr PartitionDotGroupOnBatch( GroupShardingOnDims(HloSharding::Tile(reshaped_output_tiling), output_dims), lhs_grouped); + auto per_group_partitioner_state = CreatePerGroupPartitioningState( + lhs.state(), lhs_grouped.device_groups, b); + lhs.hlo()->set_sharding(lhs_grouped.sharding); + rhs.hlo()->set_sharding(rhs_grouped.sharding); + CHECK(lhs.hlo() != rhs.hlo() || + lhs_grouped.sharding == rhs_grouped.sharding); + per_group_lhs = PartitionedHlo( + lhs.hlo(), GetPerGroupBaseShape(lhs_grouped, lhs.base_shape()), + per_group_partitioner_state); + per_group_rhs = PartitionedHlo( + rhs.hlo(), GetPerGroupBaseShape(rhs_grouped, rhs.base_shape()), + per_group_partitioner_state); } else { - auto reshaped_lhs_tiling = lhs.sharding().tile_assignment(); - reshaped_lhs_tiling.Reshape(lhs_sharding_dims_adjusted_to_output); - lhs_grouped = AlignGroupsWith( - GroupShardingOnDims(HloSharding::Tile(reshaped_lhs_tiling), lhs_dims), - output_grouped); - lhs = lhs.Reshard(UngroupSharding(lhs_grouped)); - auto reshaped_rhs_tiling = rhs.sharding().tile_assignment(); - reshaped_rhs_tiling.Reshape(rhs_sharding_dims_adjusted_to_output); - rhs_grouped = AlignGroupsWith( - GroupShardingOnDims(HloSharding::Tile(reshaped_rhs_tiling), rhs_dims), - output_grouped); - rhs = rhs.Reshard(UngroupSharding(rhs_grouped)); + auto per_group_partitioner_state = CreatePerGroupPartitioningState( + lhs.state(), output_grouped.device_groups, b); + auto reshard_to_output_batch = + [&](PartitionedHlo operand, absl::Span batch_dims, + absl::Span contracting_dims, + absl::Span non_contracting_dims, + int64 contracting_dim_partitions, + int64 non_contracting_dim_partitions, + int64 other_contracting_dim_partitions, + std::vector* sharding_dims_adjusted_to_output) + -> absl::optional { + if (operand.sharding().IsReplicated()) { + auto partially_sharded = PerGroupSliceFromReplicated( + operand.hlo(), operand.state().partition_id, + output_grouped.device_groups, batch_dims, + output_grouped.group_dim_sizes, b); + partially_sharded->set_sharding(HloSharding::Replicate()); + return PartitionedHlo(partially_sharded, partially_sharded->shape(), + per_group_partitioner_state); + } + auto reshaped_tiling = operand.sharding().tile_assignment(); + // It's possible that the operand is not initially sharded on batch + // dimensions in the same way as the output, although being tiled. In that + // case, the current sharding_dims_adjusted_to_output may contain more + // partitions than available devices. We remove partitioning on other + // dimensions. + if (Product(*sharding_dims_adjusted_to_output) > + reshaped_tiling.num_elements()) { + if (Product(*sharding_dims_adjusted_to_output) % + reshaped_tiling.num_elements() != + 0) { + return absl::nullopt; + } + int64 ratio = Product(*sharding_dims_adjusted_to_output) / + reshaped_tiling.num_elements(); + if (ratio == non_contracting_dim_partitions && + (ratio != contracting_dim_partitions || + contracting_dim_partitions == other_contracting_dim_partitions)) { + for (int64 dim : non_contracting_dims) { + (*sharding_dims_adjusted_to_output)[dim] = 1; + } + } else if (ratio == contracting_dim_partitions) { + for (int64 dim : contracting_dims) { + (*sharding_dims_adjusted_to_output)[dim] = 1; + } + } + } + // If the operand is initially sharded more ways than the output in the + // batch dimensions, sharding_dims_adjusted_to_output currently contains + // fewer partitions than available devices. We do not handle this case. + if (Product(*sharding_dims_adjusted_to_output) < + reshaped_tiling.num_elements()) { + return absl::nullopt; + } + reshaped_tiling.Reshape(*sharding_dims_adjusted_to_output); + auto grouped = AlignGroupsWith( + GroupShardingOnDims(HloSharding::Tile(reshaped_tiling), batch_dims), + output_grouped); + auto resharded = operand.Reshard(UngroupSharding(grouped)); + resharded.hlo()->set_sharding(grouped.sharding); + return PartitionedHlo(resharded.hlo(), + GetPerGroupBaseShape(grouped, operand.base_shape()), + per_group_partitioner_state); + }; + std::vector lhs_contracting_dims; + std::vector rhs_contracting_dims; + lhs_contracting_dims.reserve(dims_mapping.contracting_dims.size()); + rhs_contracting_dims.reserve(dims_mapping.contracting_dims.size()); + for (const auto& dim : dims_mapping.contracting_dims) { + lhs_contracting_dims.push_back(dim.lhs); + rhs_contracting_dims.push_back(dim.rhs); + } + std::vector lhs_non_contracting_dims; + std::vector rhs_non_contracting_dims; + lhs_non_contracting_dims.reserve( + dims_mapping.lhs_non_contracting_dims.size()); + rhs_non_contracting_dims.reserve( + dims_mapping.rhs_non_contracting_dims.size()); + for (const auto& dim : dims_mapping.lhs_non_contracting_dims) { + lhs_non_contracting_dims.push_back(dim.lhs); + } + for (const auto& dim : dims_mapping.rhs_non_contracting_dims) { + rhs_non_contracting_dims.push_back(dim.rhs); + } + if (auto resharded = reshard_to_output_batch( + lhs, lhs_dims, lhs_contracting_dims, lhs_non_contracting_dims, + lhs_contracting_partitions, lhs_non_contracting_partitions, + rhs_contracting_partitions, + &lhs_sharding_dims_adjusted_to_output)) { + per_group_lhs = *resharded; + } else { + return nullptr; + } + if (auto resharded = reshard_to_output_batch( + rhs, rhs_dims, rhs_contracting_dims, rhs_non_contracting_dims, + rhs_contracting_partitions, rhs_non_contracting_partitions, + lhs_contracting_partitions, + &rhs_sharding_dims_adjusted_to_output)) { + per_group_rhs = *resharded; + } else { + return nullptr; + } + CHECK(lhs.hlo() != rhs.hlo() || + per_group_lhs.sharding() == per_group_rhs.sharding()); } - auto per_group_partitioner_state = CreatePerGroupPartitioningState( - lhs.state(), lhs_grouped.device_groups, b); - lhs.hlo()->set_sharding(lhs_grouped.sharding); - rhs.hlo()->set_sharding(rhs_grouped.sharding); - CHECK(lhs.hlo() != rhs.hlo() || lhs_grouped.sharding == rhs_grouped.sharding); TF_ASSIGN_OR_RETURN( auto dot, - PartitionDot( - PartitionedHlo(lhs.hlo(), - GetPerGroupBaseShape(lhs_grouped, lhs.base_shape()), - per_group_partitioner_state), - PartitionedHlo(rhs.hlo(), - GetPerGroupBaseShape(rhs_grouped, rhs.base_shape()), - per_group_partitioner_state), - GetPerGroupBaseShape(output_grouped, output_base_shape), - output_grouped.sharding, dims_mapping, - num_partitions / lhs_grouped.device_groups.size(), create_sharded_dot, - module, original_hlo, threshold_for_windowed_einsum_mib, b, - windowed_dot_general_loops)); - // Reset the LHS sharding to the ungrouped one. - lhs.hlo()->set_sharding(UngroupSharding(lhs_grouped)); - rhs.hlo()->set_sharding(UngroupSharding(rhs_grouped)); + PartitionDot(per_group_lhs, per_group_rhs, + GetPerGroupBaseShape(output_grouped, output_base_shape), + output_grouped.sharding, dims_mapping, + num_partitions / output_grouped.device_groups.size(), + create_sharded_dot, module, original_hlo, + threshold_for_windowed_einsum_mib, b, + windowed_dot_general_loops)); + // Make sure the operands' sharding are set to the ungrouped ones. + lhs.hlo()->set_sharding(lhs_sharding); + rhs.hlo()->set_sharding(rhs_sharding); dot->set_sharding(UngroupSharding(output_grouped)); return PartitionedHlo(dot, output_base_shape, lhs.state()) .Reshard(output_sharding) @@ -734,7 +841,7 @@ StatusOr PartitionDotGroupOnNonContracting( : dims_mapping.lhs_non_contracting_dims) { other_group_dims.push_back(lhs_matching ? dim.rhs : dim.lhs); } - } else { + } else if (!other.sharding().IsReplicated()) { return nullptr; } auto matching_sharding_dims = @@ -759,21 +866,24 @@ StatusOr PartitionDotGroupOnNonContracting( matching_dims), output_grouped); matching = matching.Reshard(UngroupSharding(matching_grouped)); - - auto other_grouped = - AlignGroupsWith(GroupShardingOnDims(other.sharding(), other_group_dims), - output_grouped, /*ignore_group_order=*/true); - other = other.Reshard(UngroupSharding(other_grouped)); - auto partially_replicated_other = - other.ReplicatePartial(other_grouped.group_dims); auto per_group_partitioner_state = CreatePerGroupPartitioningState( matching.state(), matching_grouped.device_groups, b); matching.hlo()->set_sharding(matching_grouped.sharding); - partially_replicated_other->set_sharding(other_grouped.sharding); auto matching_p = PartitionedHlo( matching.hlo(), GetPerGroupBaseShape(matching_grouped, matching.base_shape()), per_group_partitioner_state); + + auto partially_replicated_other = other.hlo(); + if (!other.sharding().IsReplicated()) { + auto other_grouped = + AlignGroupsWith(GroupShardingOnDims(other.sharding(), other_group_dims), + output_grouped, /*ignore_group_order=*/true); + other = other.Reshard(UngroupSharding(other_grouped)); + partially_replicated_other = + other.ReplicatePartial(other_grouped.group_dims); + partially_replicated_other->set_sharding(other_grouped.sharding); + } auto other_p = PartitionedHlo(partially_replicated_other, other.base_shape(), per_group_partitioner_state); TF_ASSIGN_OR_RETURN( @@ -861,15 +971,18 @@ StatusOr PartitionDot( // Recursively partition on different types of dimensions. // // Case 1: Group partitions by batch. - if (lhs_batch_partitions == rhs_batch_partitions && - lhs_batch_partitions == output_batch_partitions && - lhs_batch_partitions > 1) { + if ((lhs_batch_partitions == output_batch_partitions || + rhs_batch_partitions == output_batch_partitions) && + output_batch_partitions > 1) { TF_ASSIGN_OR_RETURN( auto dot, PartitionDotGroupOnBatch( lhs, rhs, output_base_shape, output_sharding, dims_mapping, - num_partitions, create_sharded_dot, module, original_hlo, - threshold_for_windowed_einsum_mib, b, windowed_dot_general_loops)); + num_partitions, lhs_contracting_partitions, + rhs_contracting_partitions, lhs_non_contracting_partitions, + rhs_non_contracting_partitions, create_sharded_dot, module, + original_hlo, threshold_for_windowed_einsum_mib, b, + windowed_dot_general_loops)); if (dot) { return dot; } @@ -956,11 +1069,11 @@ namespace { // // FindInputNodesIfOnlyDependOnSmallOperands(multiply) will return // <{broadcast, iota, constant, add, multiply}, [a]>. -std::pair, std::vector> +std::pair, std::vector> FindInputNodesIfOnlyDependOnSmallOperands(HloInstruction* hlo) { - std::unordered_set nodes_found; + absl::flat_hash_set nodes_found; std::vector new_operands; - std::unordered_set new_operands_set; + absl::flat_hash_set new_operands_set; std::vector worklist; worklist.push_back(hlo); while (!worklist.empty()) { @@ -1053,7 +1166,7 @@ Status SinkInputNodesIntoWindowedDotGeneralLoopOnContractingDimensions( // Create nodes inside the loop body. std::vector worklist; - std::unordered_map outside_to_inside; + absl::flat_hash_map outside_to_inside; auto add_users_if_available = [&](HloInstruction* inst) { for (auto u : inst->users()) { if (outside_to_inside.count(u) == 0 && to_sink.count(u) > 0 && @@ -1143,9 +1256,9 @@ Status MoveUsersIntoWindowedDotGeneralLoopOnNonContractingDimensions( // Find the reduce outputs and the input nodes they depend on, if input nodes // only have small operands. - std::unordered_set to_move; + absl::flat_hash_set to_move; std::vector new_operands; - std::unordered_set new_operands_set; + absl::flat_hash_set new_operands_set; std::vector reduce_outputs; std::vector worklist; Shape padded_shape = user_gte->shape(); @@ -1280,7 +1393,7 @@ Status MoveUsersIntoWindowedDotGeneralLoopOnNonContractingDimensions( new_input_subtuple->shape(), body_param, 2)); // Now create the moved nodes inside the loop body. - std::unordered_map outside_to_inside; + absl::flat_hash_map outside_to_inside; worklist.clear(); auto add_users_if_available = [&](HloInstruction* inst) { for (auto u : inst->users()) { diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc index 5f3fd8d53e7..278a0685088 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc @@ -3904,6 +3904,34 @@ ENTRY entry { _, _))); } +TEST_F(SpmdPartitioningTest, Dot2DPartitionedNonContractingAndContracting2) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %lhs = f32[48,100] parameter(0), sharding={replicated} + %rhs = f32[32,100] parameter(1), sharding={devices=[2,2]0,1,2,3} + ROOT %dot = f32[48,32] dot(%lhs, %rhs), + lhs_batch_dims={}, rhs_batch_dims={}, + lhs_contracting_dims={1}, rhs_contracting_dims={1}, + sharding={devices=[2,2]0,1,2,3} +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/4)); + VLOG(1) << module->ToString(); + + auto lhs = AllOf(op::Shape("f32[48,100]"), op::Parameter(0)); + auto lhs_slice = AllOf(op::Shape("f32[24,100]"), op::DynamicSlice(lhs, _, _)); + auto rhs = AllOf(op::Shape("f32[16,50]"), op::Parameter(1)); + auto partial_replicated_rhs = AllOf( + op::Shape("f32[16,100]"), op::AllReduce(op::DynamicUpdateSlice( + _, op::CollectivePermute(rhs), _, _))); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, AllOf(op::Shape("f32[24,16]"), + op::Dot(lhs_slice, partial_replicated_rhs))); +} + TEST_F(SpmdPartitioningTest, Dot2DPartitionedBatchAndNonContracting) { const char* const hlo_string = R"( HloModule module @@ -3931,6 +3959,65 @@ ENTRY entry { op::Dot(lhs, partial_replicated_rhs))); } +TEST_F(SpmdPartitioningTest, Dot2DPartitionedBatchAndContracting) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %lhs = f32[4,24,100] parameter(0), sharding={devices=[2,1,2]0,1,2,3} + %rhs = f32[4,32,100] parameter(1), sharding={devices=[1,2,2]0,1,2,3} + ROOT %dot = f32[4,24,32] dot(%lhs, %rhs), + lhs_batch_dims={0}, rhs_batch_dims={0}, + lhs_contracting_dims={2}, rhs_contracting_dims={2}, + sharding={devices=[2,2,1]0,1,2,3} +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/4)); + VLOG(1) << module->ToString(); + + auto lhs = AllOf(op::Shape("f32[2,24,50]"), op::Parameter(0)); + auto rhs = AllOf(op::Shape("f32[4,16,50]"), op::Parameter(1)); + auto resharded_rhs = + AllOf(op::Shape("f32[2,32,50]"), + op::Reshape(op::Transpose(op::AllToAll(op::Reshape(rhs))))); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, AllOf(op::Shape("f32[2,12,32]"), + op::DynamicSlice( + AllOf(op::Shape("f32[2,24,32]"), + op::AllReduce(op::Dot(lhs, resharded_rhs))), + _, _, _))); +} + +TEST_F(SpmdPartitioningTest, Dot2DPartitionedBatchAndContracting2) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %lhs = f32[4,24,100] parameter(0), sharding={devices=[2,1,2]0,1,2,3} + %rhs = f32[4,32,100] parameter(1), sharding={replicated} + ROOT %dot = f32[4,24,32] dot(%lhs, %rhs), + lhs_batch_dims={0}, rhs_batch_dims={0}, + lhs_contracting_dims={2}, rhs_contracting_dims={2}, + sharding={devices=[2,2,1]0,1,2,3} +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/4)); + VLOG(1) << module->ToString(); + + auto lhs = AllOf(op::Shape("f32[2,24,50]"), op::Parameter(0)); + auto resharded_lhs = + AllOf(op::Shape("f32[2,12,100]"), + op::Reshape(op::Transpose(op::AllToAll(op::Reshape(lhs))))); + auto rhs = AllOf(op::Shape("f32[4,32,100]"), op::Parameter(1)); + auto rhs_slice = + AllOf(op::Shape("f32[2,32,100]"), op::DynamicSlice(rhs, _, _, _)); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, AllOf(op::Shape("f32[2,12,32]"), + op::Dot(resharded_lhs, rhs_slice))); +} + TEST_F(SpmdPartitioningTest, Dot2DPartitionedBatchNonContractingAndContracting) { const char* const hlo_string = R"( diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc index 454a1da4646..6029e490eb4 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc @@ -1234,5 +1234,47 @@ PartitionedHlo::PartitioningState CreatePerGroupPartitioningState( return result; } +HloInstruction* PerGroupSliceFromReplicated( + HloInstruction* replicated, HloInstruction* partition_id, + const std::vector>& device_groups, + absl::Span group_dims, absl::Span group_dim_sizes, + SpmdBuilder* b) { + std::vector group_ids(device_groups.size() * device_groups[0].size()); + for (int64 g = 0; g < device_groups.size(); ++g) { + for (int64 device : device_groups[g]) { + group_ids[device] = g; + } + } + auto group_id_table = b->AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR1(group_ids))); + auto group_id = b->AddInstruction(HloInstruction::CreateReshape( + ShapeUtil::MakeScalarShape(U32), + b->AddInstruction(HloInstruction::CreateDynamicSlice( + ShapeUtil::MakeShape(U32, {1}), group_id_table, {partition_id}, + {1})))); + std::vector group_level_tile_dims(replicated->shape().rank(), 1); + for (int64 i = 0; i < group_dims.size(); ++i) { + group_level_tile_dims[group_dims[i]] = group_dim_sizes[i]; + } + Array group_level_tile(group_level_tile_dims); + group_level_tile.Each([&](absl::Span indices, int64* group) { + *group = 0; + for (int64 dim : group_dims) { + *group *= group_level_tile.dim(dim); + *group += indices[dim]; + } + }); + auto group_level_sharding = HloSharding::Tile(group_level_tile); + auto padded_hlo = PadBaseShapeBeforeUnevenTiledSharding( + replicated, group_level_sharding, b); + auto shard_shape = + MakePartitionedShape(replicated->shape(), group_level_sharding); + return b->AddInstruction(HloInstruction::CreateDynamicSlice( + shard_shape, padded_hlo, + MakePartitionOffsets(replicated->shape(), group_level_sharding, group_id, + b), + shard_shape.dimensions())); +} + } // namespace spmd } // namespace xla diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h index 6e68375f9b9..c2e1f9357ce 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h @@ -320,6 +320,14 @@ PartitionedHlo::PartitioningState CreatePerGroupPartitioningState( const PartitionedHlo::PartitioningState& state, const std::vector>& device_groups, SpmdBuilder* b); +// Partially shards a replicated HLO into groups along the group dimensions, and +// within each group data is still replicated. +HloInstruction* PerGroupSliceFromReplicated( + HloInstruction* replicated, HloInstruction* partition_id, + const std::vector>& device_groups, + absl::Span group_dims, absl::Span group_dim_sizes, + SpmdBuilder* b); + } // namespace spmd } // namespace xla From 11223b10bc9c66d625aef46c1244ca6134b939eb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 26 Jul 2020 02:01:55 -0700 Subject: [PATCH 1347/2522] Update GraphDef version to 474. PiperOrigin-RevId: 323223572 Change-Id: Ia7596afa32ed7c08fabd7f37a2df2241992bd49b --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 4b742fe1968..9e7abdbf45a 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 473 // Updated: 2020/7/25 +#define TF_GRAPH_DEF_VERSION 474 // Updated: 2020/7/26 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 3a3d3591923501aa6fb03ef4da1d64400ad209d8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 26 Jul 2020 02:01:55 -0700 Subject: [PATCH 1348/2522] compat: Update forward compatibility horizon to 2020-07-26 PiperOrigin-RevId: 323223574 Change-Id: Id574f505a1ebf0b74e489e892ded8920b9592bb9 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 9aa6c0de764..9ca436a4c18 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 25) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 26) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 68f9891f2f58dde14f6350ed824b28f92ceb3997 Mon Sep 17 00:00:00 2001 From: Cheng CHEN Date: Sun, 26 Jul 2020 23:25:28 +0800 Subject: [PATCH 1349/2522] Fix the usage of uninitialized variable. --- .../kernels/batching_util/adaptive_shared_batch_scheduler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h index fedea93849c..f4dc47757d3 100644 --- a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h +++ b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h @@ -425,7 +425,7 @@ void AdaptiveSharedBatchScheduler::MaybeScheduleNextBatch() { return; } auto best_it = batches_.end(); - double best_score; + double best_score = std::numeric_limits::max; int64 now_micros = GetEnv()->NowMicros(); for (auto it = batches_.begin(); it != batches_.end(); it++) { if ((*it)->schedulable_time_micros() > now_micros) continue; From 3f4e4cb19d763136b0f400d708a4e87443c4da95 Mon Sep 17 00:00:00 2001 From: Vividha <50369708+V2dha@users.noreply.github.com> Date: Sun, 26 Jul 2020 23:39:47 +0530 Subject: [PATCH 1350/2522] Replaced the batch sizes with 'None' instead '?' I have replaced the default text to be shown if batch sizes are not present as 'None' instead of '?' because it is more readable and understandable during the plotting of the model. Furthermore, it avoids the confusion of any error in the model and signifies that the fact that the programmer voluntarily haven't given any batch sizes --- tensorflow/python/keras/utils/vis_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/utils/vis_utils.py b/tensorflow/python/keras/utils/vis_utils.py index 32e32b587fb..8e587e0c80d 100644 --- a/tensorflow/python/keras/utils/vis_utils.py +++ b/tensorflow/python/keras/utils/vis_utils.py @@ -206,7 +206,7 @@ def model_to_dot(model, if show_shapes: def format_shape(shape): - return str(shape).replace(str(None), '?') + return str(shape).replace(str(None), 'None') try: outputlabels = format_shape(layer.output_shape) From cb3cf00aade8e747783b57debe324d6bc00b77b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tar=C3=A9=20Gaskin?= Date: Sun, 26 Jul 2020 20:42:00 +0000 Subject: [PATCH 1351/2522] c, compiler, jit resolutions --- tensorflow/c/eager/tape.h | 10 +++--- tensorflow/cc/framework/gradients.cc | 7 ++-- tensorflow/cc/framework/while_gradients.cc | 2 +- tensorflow/compiler/aot/codegen.cc | 16 +++++---- tensorflow/compiler/jit/build_xla_ops_pass.cc | 2 +- .../compiler/jit/compilability_check_util.h | 2 +- tensorflow/compiler/jit/device_util.cc | 6 ++-- tensorflow/compiler/jit/device_util.h | 2 +- .../jit/encapsulate_subgraphs_pass.cc | 6 ++-- tensorflow/compiler/jit/encapsulate_util.cc | 8 ++--- .../jit/encapsulate_xla_computations_pass.cc | 4 +-- .../jit/extract_outside_compilation_pass.cc | 33 ++++++++++--------- .../compiler/jit/graphcycles/graphcycles.cc | 2 +- .../increase_dynamism_for_auto_jit_pass.cc | 2 +- tensorflow/compiler/jit/shape_inference.cc | 2 +- tensorflow/compiler/jit/xla_cluster_util.cc | 2 +- .../compiler/jit/xla_compilation_cache.cc | 12 +++---- tensorflow/compiler/jit/xla_launch_util.cc | 8 ++--- 18 files changed, 67 insertions(+), 59 deletions(-) diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index 40cfa87dd66..f52a5e32c1a 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -573,7 +573,7 @@ Status InitialGradients( gtl::ArraySlice output_gradients, const TensorTape& tensor_tape, const OpTape& op_tape, std::unordered_map>* result) { - for (int i = 0; i < target_tensor_ids.size(); ++i) { + for (int i = 0, end = target_tensor_ids.size(); i < end; ++i) { const int64 id = target_tensor_ids[i]; if (output_gradients.empty() || output_gradients[i] == nullptr) { auto tensor_it = tensor_tape.find(id); @@ -699,7 +699,7 @@ Status GradientTape::ComputeGradient( std::vector out_gradients; out_gradients.reserve(trace.output_tensor_info.size()); std::vector unneeded_gradients; - for (int i = 0; i < trace.input_tensor_id.size(); i++) { + for (int i = 0, end = trace.input_tensor_id.size(); i < end; i++) { const auto& in_tensor_id = trace.input_tensor_id[i]; if (tensor_tape_.find(in_tensor_id) == tensor_tape_.end() && sources_set.find(in_tensor_id) == sources_set.end()) { @@ -709,7 +709,7 @@ Status GradientTape::ComputeGradient( bool any_gradient_nonzero = false; std::vector zero_indices; - for (int i = 0; i < trace.output_tensor_info.size(); ++i) { + for (int i = 0, end = trace.output_tensor_info.size(); i < end; ++i) { const int64 id = trace.output_tensor_info[i].GetID(); auto grad_it = gradients.find(id); if (grad_it == gradients.end()) { @@ -775,7 +775,7 @@ Status GradientTape::ComputeGradient( } VLOG(1) << "Got " << in_gradients.size() << " in_gradients for " << trace.input_tensor_id.size() << " sources"; - for (int i = 0; i < in_gradients.size(); ++i) { + for (int i = 0, end = in_gradients.size(); i < end; ++i) { const int64 id = trace.input_tensor_id[i]; if (in_gradients[i] != nullptr) { auto& unaggregated_grads = gradients[id]; @@ -968,7 +968,7 @@ ForwardAccumulator::ForwardpropFromTape( targets.reserve(grad.size()); used_in_grads.reserve(grad.size()); std::unordered_map sources_that_are_targets; - for (int grad_index = 0; grad_index < grad.size(); ++grad_index) { + for (int grad_index = 0, end = grad.size(); grad_index < end; ++grad_index) { Gradient* grad_tensor = grad[grad_index]; if (grad_tensor != nullptr) { int64 tensor_id = vspace_.TensorId(grad_tensor); diff --git a/tensorflow/cc/framework/gradients.cc b/tensorflow/cc/framework/gradients.cc index 88cd3fe79d6..4229c356eff 100644 --- a/tensorflow/cc/framework/gradients.cc +++ b/tensorflow/cc/framework/gradients.cc @@ -425,7 +425,7 @@ Status SymbolicGradientBuilder::ProcessWhileLoop(Node* exit_node, // Backprop along the in edges to the while loop (i.e. the inputs to the enter // nodes) DCHECK_EQ(dx.size(), while_ctx->enter_nodes().size()); - for (int i = 0; i < dx.size(); ++i) { + for (int i = 0, end = dx.size(); i < end; ++i) { Node* enter_node = while_ctx->enter_nodes()[i]; for (const Edge* e : enter_node->in_edges()) { if (e->IsControlEdge()) continue; @@ -489,7 +489,7 @@ Status SymbolicGradientBuilder::AddGradients() { // All loop-specific control flow ops should have been handled above DCHECK(!n->IsEnter() && !n->IsNextIteration()) << n->DebugString(); - const size_t num_no_grad = no_grad_dy_indices.size(); + const int num_no_grad = no_grad_dy_indices.size(); if (IsPrimitiveOpWithNoGrad(n->type_string()) || num_no_grad == num_y) { // No grad defined for this op, or all outputs returned 'NoGradient': // Backprop 'NoGradient' along the in edges. @@ -525,7 +525,8 @@ Status SymbolicGradientBuilder::AddGradients() { for (const Edge* e : n->in_edges()) { if (e->IsControlEdge()) continue; int dx_index = e->dst_input(); - if (dx_index >= dx.size()) { + const int dx_size = dx.size(); + if (dx_index >= dx_size) { return errors::Internal( "Invalid gradient output index: ", dx_index, " size: ", dx.size()); } diff --git a/tensorflow/cc/framework/while_gradients.cc b/tensorflow/cc/framework/while_gradients.cc index 81870a0efa3..e241cfaebe9 100644 --- a/tensorflow/cc/framework/while_gradients.cc +++ b/tensorflow/cc/framework/while_gradients.cc @@ -34,7 +34,7 @@ Output ToOutput(OutputTensor output_tensor) { std::vector ToOutputVector( const std::vector& output_tensors) { - size_t n = output_tensors.size(); + const int n = output_tensors.size(); std::vector result; result.reserve(n); for (int i = 0; i < n; ++i) result.push_back(ToOutput(output_tensors[i])); diff --git a/tensorflow/compiler/aot/codegen.cc b/tensorflow/compiler/aot/codegen.cc index e4df3090046..625e7c3532a 100644 --- a/tensorflow/compiler/aot/codegen.cc +++ b/tensorflow/compiler/aot/codegen.cc @@ -172,7 +172,7 @@ string RewriteWithName(const string& name, string code, Status GenArgMethods(const tf2xla::Config& config, const xla::ProgramShapeProto& ps, const CompileResult& compile_result, string* methods) { - size_t num_args = ps.parameters_size(); + const int num_args = ps.parameters_size(); // feed_size() + variable_size() is the maximum number of args as an // implementation may not create an argument for an unused variable. if (config.feed_size() + config.variable_size() < num_args) { @@ -229,8 +229,9 @@ Status GenResultMethods(const tf2xla::Config& config, int readonly_variables = absl::c_count_if( config.variable(), [](const tf2xla::Variable& var) { return var.readonly(); }); - if (config.fetch_size() + config.variable_size() - readonly_variables != - num_results) { + const int actual_num_results = config.fetch_size() + + config.variable_size() - readonly_variables; + if (actual_num_results != num_results) { return errors::InvalidArgument("mismatch between fetch_size(", config.fetch_size(), ")+variable_size(", config.variable_size(), ") and tuple_size(", @@ -273,7 +274,7 @@ Status GenResultMethods(const tf2xla::Config& config, // Generate methods for variables. Status GenVariableMethods(const tf2xla::Config& config, const xla::ProgramShapeProto& ps, string* methods) { - size_t num_args = ps.parameters_size(); + const int num_args = ps.parameters_size(); for (int i = config.feed_size(); i < num_args; ++i) { std::vector> rewrites; TF_RETURN_IF_ERROR( @@ -401,7 +402,8 @@ Status GenerateHeader(const CodegenOpts& opts, const tf2xla::Config& config, ::xla::cpu::CreateArgIndexTableFromBufferInfos(buffer_infos); std::vector buffer_infos_as_strings = BufferInfosToCppExpression(buffer_infos); - if (result_index < 0 || result_index >= buffer_infos.size()) { + const int64 buffer_infos_size = buffer_infos.size(); + if (result_index < 0 || result_index >= buffer_infos_size) { return errors::InvalidArgument("result index: ", result_index, " is outside the range of temp sizes: [0,", buffer_infos.size(), ")"); @@ -797,8 +799,8 @@ Status ParseCppClass(const string& cpp_class, string* class_name, // Allow a fully qualified name that starts with "::". parts.erase(parts.begin()); } - for (int i = 0; i < parts.size(); ++i) { - if (i < parts.size() - 1) { + for (int i = 0, end = parts.size(); i < end; ++i) { + if (i < end - 1) { TF_RETURN_IF_ERROR(ValidateCppIdent( parts[i], "in namespace component of cpp_class: " + cpp_class)); namespaces->push_back(parts[i]); diff --git a/tensorflow/compiler/jit/build_xla_ops_pass.cc b/tensorflow/compiler/jit/build_xla_ops_pass.cc index 5a57008cf61..d6f50532f62 100644 --- a/tensorflow/compiler/jit/build_xla_ops_pass.cc +++ b/tensorflow/compiler/jit/build_xla_ops_pass.cc @@ -452,7 +452,7 @@ Status PredicateInt32Inputs(const Scope& root, Node* n, root.graph()->AddControlEdge(predicate_as_control.node(), identity_n.operation.node()); - for (int i = 0; i < int32_inputs.size(); i++) { + for (int32 i = 0, end = int32_inputs.size(); i < end; i++) { TF_RETURN_IF_ERROR(root.graph()->UpdateEdge(identity_n[i].node(), i, n, int32_inputs_input_idxs[i])); } diff --git a/tensorflow/compiler/jit/compilability_check_util.h b/tensorflow/compiler/jit/compilability_check_util.h index a21cb6b98dd..3b20784cc29 100644 --- a/tensorflow/compiler/jit/compilability_check_util.h +++ b/tensorflow/compiler/jit/compilability_check_util.h @@ -257,7 +257,7 @@ class RecursiveCompilabilityChecker { UncompilableNodesMap* uncompilable_nodes_map); // Make sure we don't recurse infinitely on recursive functions. - const int kMaxRecursionDepth = 10; + const size_t kMaxRecursionDepth = 10; const OperationFilter& op_filter_; const DeviceType& jit_device_type_; diff --git a/tensorflow/compiler/jit/device_util.cc b/tensorflow/compiler/jit/device_util.cc index 375d30c4cf3..d8749baf872 100644 --- a/tensorflow/compiler/jit/device_util.cc +++ b/tensorflow/compiler/jit/device_util.cc @@ -26,8 +26,8 @@ using xla::StatusOr; void DeviceSet::Insert(DeviceId device_id) { int word_index = device_id.id() / kWordSize; int bit_index = device_id.id() % kWordSize; - - if (word_index >= storage_.size()) { + const int storage_size = storage_.size(); + if (word_index >= storage_size) { storage_.resize(word_index + 1, 0); } @@ -39,7 +39,7 @@ void DeviceSet::UnionWith(const DeviceSet& other) { storage_.resize(other.storage_.size(), 0); } - for (int i = 0; i < other.storage_.size(); i++) { + for (int i = 0, end = other.storage_.size(); i < end; i++) { storage_[i] |= other.storage_[i]; } } diff --git a/tensorflow/compiler/jit/device_util.h b/tensorflow/compiler/jit/device_util.h index 35f3321b47b..33fb587c8ad 100644 --- a/tensorflow/compiler/jit/device_util.h +++ b/tensorflow/compiler/jit/device_util.h @@ -72,7 +72,7 @@ class DeviceSet { void ForEach(FnTy func) const { // This is really a poor man's iterator, we should consider writing a proper // iterator if this ends up being used widely. - for (int word_index = 0; word_index < storage_.size(); word_index++) { + for (int word_index = 0, end = storage_.size(); word_index < end; word_index++) { uint64 word = storage_[word_index]; while (word != 0) { uint64 only_lowest_bit_set = word & -word; diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc index 435c2ec5f7f..8230cde8660 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc @@ -1132,7 +1132,8 @@ static Status GetArgTypes(const Graph& graph, DataTypeVector* types) { if (n->type_string() == kArgOp) { int index; TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "index", &index)); - if (index < 0 || index >= types->size()) { + const int types_size = types->size(); + if (index < 0 || index >= types_size) { return errors::InvalidArgument("Invalid argument number"); } (*types)[index] = n->output_type(0); @@ -1149,7 +1150,8 @@ static Status RenumberArguments(Graph* graph, if (n->type_string() == kArgOp) { int index; TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "index", &index)); - if (index < 0 || index >= permutation.size()) { + const int permutation_size = permutation.size(); + if (index < 0 || index >= permutation_size) { return errors::InvalidArgument("Invalid argument number"); } n->AddAttr("index", permutation[index]); diff --git a/tensorflow/compiler/jit/encapsulate_util.cc b/tensorflow/compiler/jit/encapsulate_util.cc index 5325f6faa31..12afee70716 100644 --- a/tensorflow/compiler/jit/encapsulate_util.cc +++ b/tensorflow/compiler/jit/encapsulate_util.cc @@ -139,7 +139,7 @@ Status PreprocessDataEdgesBetweenOutsideCompilations( // Remove the edge from host to outside compilation. Add a placeholder as // outside compilation node input. std::map, Node*> placeholders; - for (int i = 0; i < edges.size(); i++) { + for (int i = 0, end = edges.size(); i < end; i++) { Node* dst = g->FindNodeId(edges[i].dst_node_id); const Edge* e; TF_RETURN_IF_ERROR(dst->input_edge(edges[i].dst_input, &e)); @@ -185,7 +185,7 @@ Status PreprocessDataEdgesBetweenOutsideCompilations( // Other edge in `edges` might have `e->dst()` as src or dst // node. Before removing `e->dst()`, replace those edges with // corresponding edges for `dst_replace_node`. - for (int j = i + 1; j < edges.size(); j++) { + for (int j = i + 1, end = edges.size(); j < end; j++) { if (edges[j].dst_node_id == edges[i].dst_node_id) { edges[j].dst_node_id = dst_replace_node->id(); } @@ -238,7 +238,7 @@ Status PostprocessDataEdgesBetweenOutsideCompilations( g->AddControlEdge(original_node, e->dst()); g->RemoveEdge(e); } - for (int i = 0; i < data_edges.size(); i++) { + for (int i = 0, end = data_edges.size(); i < end; i++) { Node* dst = data_edges[i].dst; NodeDef new_def = dst->def(); int dst_input = data_edges[i].dst_input; @@ -253,7 +253,7 @@ Status PostprocessDataEdgesBetweenOutsideCompilations( // Other edges might have `dst` as dst node. Update those edges with // `replace_node`. - for (int j = i + 1; j < data_edges.size(); j++) { + for (int j = i + 1, end = data_edges.size(); j < end; j++) { if (data_edges[j].dst == dst) { data_edges[j].dst = replace_node; } diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc index 2b7a6c83b8b..ed25baa62ff 100644 --- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc +++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc @@ -351,14 +351,14 @@ Status RewriteSubgraph(const std::vector& arg_source_tensors, if (!status.ok()) { return status; } - for (int i = 0; i < data_inputs.size(); ++i) { + for (int i = 0, end = data_inputs.size(); i < end; ++i) { graph->AddEdge(data_inputs[i].first, data_inputs[i].second, xla_launch, i); } for (Node* n : control_inputs) { graph->AddControlEdge(n, xla_launch); } - for (int i = 0; i < data_outputs.size(); ++i) { + for (int i = 0, end = data_outputs.size(); i < end; ++i) { for (const auto& successor : data_outputs[i]) { graph->AddEdge(xla_launch, i, successor.first, successor.second); } diff --git a/tensorflow/compiler/jit/extract_outside_compilation_pass.cc b/tensorflow/compiler/jit/extract_outside_compilation_pass.cc index 5f1c3d536a8..4a2b6136d53 100644 --- a/tensorflow/compiler/jit/extract_outside_compilation_pass.cc +++ b/tensorflow/compiler/jit/extract_outside_compilation_pass.cc @@ -95,7 +95,7 @@ Status GetArgDataTypes(const std::vector& arg_nodes, TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "T", &dtype)); (*recv_at_host_dtypes)[index] = dtype; } - for (int i = 0; i < recv_at_host_dtypes->size(); i++) { + for (int i = 0, end = recv_at_host_dtypes->size(); i < end; i++) { if ((*recv_at_host_dtypes)[i] == DT_INVALID) { return errors::Internal("Cannot get datatype for input ", i); } @@ -160,7 +160,7 @@ xla::StatusOr ReplaceArgNodesWithRecvAtHostNode( } // Rewrite dst nodes because their input changed. - for (int i = 0; i < out_edge_info.size(); i++) { + for (int i = 0, end = out_edge_info.size(); i < end; i++) { const OutEdgeInfo edge = out_edge_info[i]; if (edge.dst_input == Graph::kControlSlot) { continue; @@ -174,7 +174,7 @@ xla::StatusOr ReplaceArgNodesWithRecvAtHostNode( // Other edges might have `dst` as dst node as well. Update those edges // with `dst_replace`. - for (int j = i + 1; j < out_edge_info.size(); j++) { + for (int j = i + 1, end = out_edge_info.size(); j < end; j++) { if (out_edge_info[j].dst == dst) { out_edge_info[j].dst = dst_replace; } @@ -196,7 +196,7 @@ Status GetRetDataTypes(const std::vector& ret_nodes, TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "T", &dtype)); (*send_from_host_dtypes)[index] = dtype; } - for (int i = 0; i < send_from_host_dtypes->size(); i++) { + for (int i = 0, end = send_from_host_dtypes->size(); i < end; i++) { if ((*send_from_host_dtypes)[i] == DT_INVALID) { return errors::Internal("Cannot get datatype for output ", i); } @@ -226,7 +226,8 @@ xla::StatusOr BuildSendFromHostNode( for (auto* n : ret_nodes) { int index; TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "index", &index)); - if (index < 0 || index >= send_from_host_dtypes.size()) { + const int send_from_host_dtypes_size = send_from_host_dtypes.size(); + if (index < 0 || index >= send_from_host_dtypes_size) { return errors::Internal("Invalid _Retval index: ", index); } for (auto edge : n->in_edges()) { @@ -360,8 +361,9 @@ xla::StatusOr BuildXlaHostComputeNodeDef( if (e->IsControlEdge()) { continue; } - - if (e->dst_input() < 0 || e->dst_input() >= input_dtypes.size()) { + + const int input_dtypes_size = input_dtypes.size(); + if (e->dst_input() < 0 || e->dst_input() >= input_dtypes_size) { return errors::Internal("Invalid dst_input: ", e->dst_input()); } inputs[e->dst_input()] = NodeDefBuilder::NodeOut{ @@ -500,7 +502,7 @@ void AddEdgesFromOutsideCompilationNodes( const std::vector& data_types, const std::vector& outside_compilation_nodes, Graph* g, Node* n) { // Add edges from outside compilation nodes to While node. - for (int i = original_arg_count; i < data_types.size(); i++) { + for (int i = original_arg_count, end = data_types.size(); i < end; i++) { Node* outside_compilation_node = outside_compilation_nodes[i - original_arg_count]; g->AddEdge(outside_compilation_node, 0, n, i + arg_to_input_edge_offset); @@ -619,7 +621,7 @@ Status PostprocessLiftedArgsForWhile( lifted_arg_nodes_and_outside_compilation_nodes.end(), std::back_inserter(lifted_arg_nodes), [](const std::pair& pair) { return pair.first; }); - for (int i = original_arg_count; i < data_types.size(); i++) { + for (int i = original_arg_count, end = data_types.size(); i < end; i++) { TF_ASSIGN_OR_RETURN(Node * arg_node, AddOutsideCompilationInputArgToFunctionBody( *body_function_body, i, data_types[i])); @@ -648,7 +650,7 @@ Status PostprocessLiftedArgsForWhile( AttrSlice(&cond_func.attr()), fld, &cond_function_body)); - for (int i = original_arg_count; i < data_types.size(); i++) { + for (int i = original_arg_count, end = data_types.size(); i < end; i++) { xla::StatusOr arg_node_or = AddOutsideCompilationInputArgToFunctionBody(*cond_function_body, i, data_types[i]); @@ -759,7 +761,7 @@ Status PostprocessLiftedArgsForIf( data_types, outside_compilation_nodes, g, n); - for (int i = original_arg_count; i < data_types.size(); ++i) { + for (int i = original_arg_count, end = data_types.size(); i < end; ++i) { TF_ASSIGN_OR_RETURN(Node * then_branch_arg_node, AddOutsideCompilationInputArgToFunctionBody( *then_branch_function_body, i, data_types[i])); @@ -837,7 +839,7 @@ Status PostprocessLiftedArgsForCall( lifted_arg_nodes_and_outside_compilation_nodes.end(), std::back_inserter(lifted_arg_nodes), [](const std::pair& pair) { return pair.first; }); - for (int i = original_arg_count; i < data_types.size(); ++i) { + for (int i = original_arg_count, end = data_types.size(); i < end; ++i) { TF_ASSIGN_OR_RETURN( Node * arg_node, AddOutsideCompilationInputArgToFunctionBody(*fbody, i, data_types[i])); @@ -855,7 +857,7 @@ Status PostprocessLiftedArgsForCall( // We need to recreate the node. Otherwise TF will not know n->num_inputs() // has increased. NodeDef node_def = n->def(); - for (int i = original_arg_count; i < data_types.size(); i++) { + for (int i = original_arg_count, end = data_types.size(); i < end; i++) { Node* outside_compilation_node = lifted_arg_nodes_and_outside_compilation_nodes[i - original_arg_count] .second; @@ -1803,8 +1805,9 @@ TF_ATTRIBUTE_NOINLINE Status ExtractOutsideCompilationForFuncCallNode( if (e->IsControlEdge()) { continue; } - - TF_RET_CHECK(e->dst_input() >= 0 && e->dst_input() < inputs.size()); + + const int input_size_check = e->dst_input() < inputs.size(); + TF_RET_CHECK(e->dst_input() >= 0 && input_size_check); inputs[e->dst_input()] = NodeDefBuilder::NodeOut{e->src()->name(), e->src_output(), e->src()->output_type(e->src_output())}; diff --git a/tensorflow/compiler/jit/graphcycles/graphcycles.cc b/tensorflow/compiler/jit/graphcycles/graphcycles.cc index 6c5e3a745e2..416e101a025 100644 --- a/tensorflow/compiler/jit/graphcycles/graphcycles.cc +++ b/tensorflow/compiler/jit/graphcycles/graphcycles.cc @@ -461,7 +461,7 @@ string GraphCycles::DebugString() const { } string result = "digraph {\n"; - for (int i = 0; i < rep_->nodes_.size(); i++) { + for (int i = 0, end = rep_->nodes_.size(); i < end; i++) { if (free_nodes_set.contains(i)) { continue; } diff --git a/tensorflow/compiler/jit/increase_dynamism_for_auto_jit_pass.cc b/tensorflow/compiler/jit/increase_dynamism_for_auto_jit_pass.cc index 23931a0d7cd..bf9d88b73fa 100644 --- a/tensorflow/compiler/jit/increase_dynamism_for_auto_jit_pass.cc +++ b/tensorflow/compiler/jit/increase_dynamism_for_auto_jit_pass.cc @@ -194,7 +194,7 @@ Status ComputeSliceSize(const Scope& host_scope, ConstantCache constant_pool(host_scope, control_deps); std::vector slice_size; - for (int i = 0; i < slice_inputs.size_as_vector.size(); i++) { + for (int i = 0, end = slice_inputs.size_as_vector.size(); i < end; i++) { if (slice_inputs.size_as_vector[i] >= 0) { slice_size.push_back( constant_pool.Get1DHostConstant(slice_inputs.size_as_vector[i])); diff --git a/tensorflow/compiler/jit/shape_inference.cc b/tensorflow/compiler/jit/shape_inference.cc index 72804ff57e4..7f585e70ec4 100644 --- a/tensorflow/compiler/jit/shape_inference.cc +++ b/tensorflow/compiler/jit/shape_inference.cc @@ -36,7 +36,7 @@ Status ShapeHandleToTensorShape(shape_inference::InferenceContext* context, if (!context->RankKnown(handle)) return Status::OK(); std::vector dims(context->Rank(handle)); - for (int32 i = 0; i < dims.size(); ++i) { + for (int32 i = 0, end = dims.size(); i < end; ++i) { dims[i] = context->Value(context->Dim(handle, i)); } return PartialTensorShape::MakePartialShape(dims.data(), dims.size(), shape); diff --git a/tensorflow/compiler/jit/xla_cluster_util.cc b/tensorflow/compiler/jit/xla_cluster_util.cc index b8b11d2c7cd..38c23b7fa25 100644 --- a/tensorflow/compiler/jit/xla_cluster_util.cc +++ b/tensorflow/compiler/jit/xla_cluster_util.cc @@ -489,7 +489,7 @@ Status GetNodesRelatedToRefVariablesInDirection( /*stable_comparator=*/NodeComparatorName()); } - int old_result_size; + size_t old_result_size; int iterations = 0; const int kMaxIterations = 10 * 1000; diff --git a/tensorflow/compiler/jit/xla_compilation_cache.cc b/tensorflow/compiler/jit/xla_compilation_cache.cc index 62b0c0ab4cf..b1525337dbc 100644 --- a/tensorflow/compiler/jit/xla_compilation_cache.cc +++ b/tensorflow/compiler/jit/xla_compilation_cache.cc @@ -97,7 +97,7 @@ bool XlaCompilationCache::Signature::operator==(const Signature& other) const { if (arg_shapes != other.arg_shapes) return false; if (arg_values.size() != other.arg_values.size()) return false; - for (int i = 0; i < arg_values.size(); ++i) { + for (int i = 0, end = arg_values.size(); i < end; ++i) { if (arg_values[i].dtype() != other.arg_values[i].dtype() || arg_values[i].shape() != other.arg_values[i].shape() || arg_values[i].tensor_data() != other.arg_values[i].tensor_data()) { @@ -158,7 +158,7 @@ Status XlaCompilationCache::BuildExecutable( std::vector argument_layouts( result.xla_input_shapes.size()); - for (int i = 0; i < result.xla_input_shapes.size(); ++i) { + for (int i = 0, end = result.xla_input_shapes.size(); i < end; ++i) { argument_layouts[i] = &result.xla_input_shapes[i]; } xla::ExecutableBuildOptions build_options; @@ -224,7 +224,7 @@ static xla::StatusOr> CreateGraph( // Create dummy _Arg nodes. Link these to `node` and also via a control // dependency edge to the _SOURCE node. - for (int64 i = 0; i < args.size(); ++i) { + for (int64 i = 0, end = args.size(); i < end; ++i) { Node* node; string arg_name = absl::StrCat("_arg", i); Status status = @@ -240,7 +240,7 @@ static xla::StatusOr> CreateGraph( } // Similarly with return values, create dummy _Retval nodes fed by `node`. - for (int64 i = 0; i < result_types.size(); ++i) { + for (int64 i = 0, end = result_types.size(); i < end; ++i) { Node* node; string retval_name = absl::StrCat("_retval", i); Status status = NodeBuilder(retval_name, FunctionLibraryDefinition::kRetOp) @@ -271,7 +271,7 @@ Status XlaCompilationCache::CompileSingleOp( auto compile_op = [&](XlaCompiler* compiler, XlaCompiler::CompilationResult* result) { std::vector result_dtypes(ctx->num_outputs()); - for (int i = 0; i < result_dtypes.size(); ++i) { + for (int i = 0, end = result_dtypes.size(); i < end; ++i) { result_dtypes[i] = ctx->expected_output_dtype(i); } @@ -330,7 +330,7 @@ Status XlaCompilationCache::CompileImpl( if (VLOG_IS_ON(2)) { VLOG(2) << "num_inputs=" << args.size(); - for (int i = 0; i < args.size(); i++) { + for (int i = 0, end = args.size(); i < end; i++) { VLOG(3) << i << ": " << args[i].HumanString(); } } diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc index 41abe86df6e..dfa7aca2a9e 100644 --- a/tensorflow/compiler/jit/xla_launch_util.cc +++ b/tensorflow/compiler/jit/xla_launch_util.cc @@ -156,7 +156,7 @@ Status SnapshotResourceVariables(OpKernelContext* ctx, absl::Span variable_indices, absl::Span variable_infos, ResourceVarsSnapshot* result) { - for (int i = 0; i < variable_indices.size(); i++) { + for (int i = 0, end = variable_indices.size(); i < end; i++) { Var* var = variable_infos[i].var(); (*result)[variable_indices[i]] = var ? absl::make_optional(*var->tensor()) : absl::nullopt; @@ -206,7 +206,7 @@ XlaComputationLaunchContext::PopulateInputs( xla::TransferManager* transfer_manager = client_->backend().transfer_manager(); - for (int i = 0; i < compilation_result->xla_input_shapes.size(); ++i) { + for (int i = 0, end = compilation_result->xla_input_shapes.size(); i < end; ++i) { int arg_num = compilation_result->input_mapping[i]; CHECK_GE(arg_num, missing_ctx_input_prefix); const xla::Shape& shape = compilation_result->xla_input_shapes[i]; @@ -466,7 +466,7 @@ Status XlaComputationLaunchContext::PopulateOutputs( // Copy XLA results to the OpOutputList. int output_num = 0; - for (int i = 0; i < ctx->num_outputs(); ++i) { + for (int i = 0, end = ctx->num_outputs(); i < end; ++i) { const TensorShape& shape = output_tensor_shapes[i]; const DataType& type = compilation_result->outputs[i].type; VLOG(2) << "Populating output for retval " << i << " shape " @@ -514,7 +514,7 @@ Status XlaComputationLaunchContext::PopulateOutputs( } // Apply variable updates, if any. - for (int i = 0; i < compilation_result->resource_updates.size(); ++i) { + for (int i = 0, end = compilation_result->resource_updates.size(); i < end; ++i) { const XlaCompiler::ResourceUpdate& write = compilation_result->resource_updates[i]; int actual_input_index = write.input_index - missing_ctx_input_prefix; From 755dff7753d88c48f23c3ca0a95d1a2ce49e5233 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tar=C3=A9=20Gaskin?= Date: Sun, 26 Jul 2020 21:52:18 +0000 Subject: [PATCH 1352/2522] mlir lite resolutions --- .../compiler/mlir/lite/flatbuffer_export.cc | 4 +- .../compiler/mlir/lite/flatbuffer_import.cc | 13 +- tensorflow/compiler/mlir/lite/ir/tfl_ops.cc | 39 ++-- .../python/saved_model_to_tfl_flatbuffer.cc | 6 +- .../mlir/lite/transforms/dilated_conv.h | 2 +- .../mlir/lite/transforms/legalize_tf.cc | 188 +++++++++++++++++- .../compiler/mlir/lite/transforms/optimize.cc | 4 +- .../prepare_composite_functions_tf.cc | 3 +- .../mlir/lite/transforms/prepare_tf.cc | 30 ++- .../compiler/mlir/lite/utils/lstm_utils.cc | 2 +- 10 files changed, 237 insertions(+), 54 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc index 2e69a1740db..89fae87cb25 100644 --- a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc +++ b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc @@ -1418,7 +1418,7 @@ BufferOffset Translator::BuildSparsityParameters( } else { auto segments = dim_metadata.segments(); std::vector vector_segments(segments.size(), 0); - for (int j = 0; j < segments.size(); j++) { + for (int j = 0, end = segments.size(); j < end; j++) { vector_segments[j] = segments[j].dyn_cast().getInt(); } tflite::SparseIndexVector segments_type; @@ -1450,7 +1450,7 @@ BufferOffset Translator::BuildSparsityParameters( auto indices = dim_metadata.indices(); std::vector vector_indices(indices.size(), 0); int max_of_indices = 0; - for (int j = 0; j < indices.size(); j++) { + for (int j = 0, end = indices.size(); j < end; j++) { vector_indices[j] = indices[j].dyn_cast().getInt(); if (vector_indices[j] > max_of_indices) { max_of_indices = vector_indices[j]; diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_import.cc b/tensorflow/compiler/mlir/lite/flatbuffer_import.cc index fa85b4e50fd..29484fabbea 100644 --- a/tensorflow/compiler/mlir/lite/flatbuffer_import.cc +++ b/tensorflow/compiler/mlir/lite/flatbuffer_import.cc @@ -229,7 +229,7 @@ mlir::Operation* ConvertMinMaxToStatsOp(const TensorT& tensor, OpBuilder b, llvm::SmallVector min_maxs; min_maxs.reserve(mins.size() * 2); - for (int i = 0; i < mins.size(); ++i) { + for (int i = 0, end = mins.size(); i < end; ++i) { llvm::APFloat min(mins[i]); llvm::APFloat max(maxs[i]); min_maxs.push_back(min); @@ -281,7 +281,7 @@ std::vector ReadAsLittleEndian(ArrayRef bytes) { int bytes_len = bytes.size(); assert(bytes_len % read_size == 0); - size_t elem_count = bytes_len / read_size; + int elem_count = bytes_len / read_size; ret.reserve(elem_count); const char* data_ptr = reinterpret_cast(bytes.data()); @@ -318,7 +318,7 @@ StatusOr ConvertFloatBuffer( switch (elem_type.getWidth()) { case 16: { assert(bytes_len % 2 == 0); - size_t elem_count = bytes_len / 2; + int elem_count = bytes_len / 2; std::vector values; values.reserve(elem_count); @@ -337,12 +337,11 @@ StatusOr ConvertFloatBuffer( } case 32: { assert(bytes_len % 4 == 0); - size_t elem_count = bytes_len / 4; + int elem_count = bytes_len / 4; std::vector values; values.reserve(elem_count); const char* data = reinterpret_cast(buffer.data()); - for (int i = 0; i < elem_count; i++) { uint32_t bit_repr = llvm::support::endian::readNext ConvertFloatBuffer( } case 64: { assert(bytes_len % 8 == 0); - size_t elem_count = bytes_len / 8; + int elem_count = bytes_len / 8; std::vector values; values.reserve(elem_count); @@ -829,7 +828,7 @@ StatusOr ConvertSubgraph( // Add state variables to inputs. absl::flat_hash_set input_index_set(func_inputs.begin(), func_inputs.end()); - for (int i = 0; i < subgraph.tensors.size(); i++) { + for (int i = 0, end = subgraph.tensors.size(); i < end; i++) { auto& tensor = *subgraph.tensors.at(i); if (tensor.is_variable && !input_index_set.contains(i)) { func_inputs.emplace_back(i); diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc index 427b9c692a7..c7c3f5713f1 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc @@ -147,18 +147,10 @@ bool IsI64Type(Type element_type) { bool VerifyAddOpShapeConstraints(AddOp op) { auto element_type = getElementTypeOrSelf(op.output().getType()); - // Allows F32, QI8, and QUI8 outputs when the operands have valid shapes, + // Allows F32, QI8, QUI8 and I32 outputs when the operands have valid shapes, // which are broadcastable shapes up to five dimension or have same shapes. if (element_type.isF32() || IsQI8Type(element_type) || - IsQUI8Type(element_type)) { - return VerifyOperandsHaveSameShapesOrBroadcastableShape( - /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, - /*max_bcast_rank=*/5); - } - - // Allows I32 output when the operands have valid shapes, which are - // broadcastable shapes up to four dimension or have same shapes. - if (IsI32Type(element_type)) { + IsQUI8Type(element_type) || IsI32Type(element_type)) { return VerifyOperandsHaveSameShapesOrBroadcastableShape( /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, /*max_bcast_rank=*/4); @@ -210,20 +202,13 @@ bool VerifyMulOpShapeConstraints(MulOp op) { } return VerifyOperandsHaveSameShapesOrBroadcastableShape( /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, - /*max_bcast_rank=*/5); + /*max_bcast_rank=*/4); } - // Allows F32 output when the operands have valid shapes, which are - // broadcastable shapes up to five dimension or have same shapes. - if (element_type.isF32()) { - return VerifyOperandsHaveSameShapesOrBroadcastableShape( - /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, - /*max_bcast_rank=*/5); - } - - // Allows I32 and QI16 outputs when the operands have valid shapes, which are - // broadcastable shapes up to four dimension or have same shapes. - if (IsI32Type(element_type) || IsQI16Type(element_type)) { + // Allows I32, QI16 and F32 outputs when the operands have valid shapes, which + // are broadcastable shapes up to four dimension or have same shapes. + if (IsI32Type(element_type) || IsQI16Type(element_type) || + element_type.isF32()) { return VerifyOperandsHaveSameShapesOrBroadcastableShape( /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, /*max_bcast_rank=*/4); @@ -773,7 +758,8 @@ static LogicalResult Verify(CustomOp op) { op.custom_option().cast(); if (!opaque_attr.getType().hasStaticShape()) return op.emitOpError("custom_option should have a static shape."); - if (opaque_attr.getValue().size() != + const int opaque_attr_getValue_size = opaque_attr.getValue().size(); + if (opaque_attr_getValue_size != opaque_attr.getType().cast().getDimSize(0)) return op.emitOpError( "custom_option should have the same length of content with shape."); @@ -955,7 +941,7 @@ static LogicalResult Verify(ScatterNdOp op) { // Checks whether the last `(shape_type.getDimSize(0) - outermost_dim)` // dimensions of `updates` and `shape` are equal. for (auto shape_it : llvm::enumerate(shape_value)) { - auto i = shape_it.index(); + long int i = shape_it.index(); auto value = shape_it.value().getSExtValue(); if (i >= outermost_dim) { auto corresponding_dim = i - outermost_dim + outer_dims; @@ -1192,7 +1178,8 @@ struct RemoveRedundantUnpackPack : public RewritePattern { return failure(); const int total_pack_inputs = pack_op.getNumOperands(); - if (total_pack_inputs != input_unpack_op.getNumResults()) return failure(); + const int input_unpack_op_getNumResults = input_unpack_op.getNumResults(); + if (total_pack_inputs != input_unpack_op_getNumResults) return failure(); for (auto input_output : llvm::zip(pack_op.getOperands(), input_unpack_op.getResults())) { Value pack_input = std::get<0>(input_output); @@ -1261,7 +1248,7 @@ static LogicalResult Verify(SliceOp op) { } if (begin && size && input_type.hasStaticShape()) { - const int input_rank = begin.getNumElements(); + const uint64_t input_rank = begin.getNumElements(); for (uint64_t i = 0; i < input_rank; i++) { int begin_i = begin.getValue({i}).cast().getValue().getSExtValue(); diff --git a/tensorflow/compiler/mlir/lite/python/saved_model_to_tfl_flatbuffer.cc b/tensorflow/compiler/mlir/lite/python/saved_model_to_tfl_flatbuffer.cc index dafcfd11147..529c9ee9238 100644 --- a/tensorflow/compiler/mlir/lite/python/saved_model_to_tfl_flatbuffer.cc +++ b/tensorflow/compiler/mlir/lite/python/saved_model_to_tfl_flatbuffer.cc @@ -75,7 +75,8 @@ Status HandleInputOutputArraysWithModule(const toco::ModelFlags& model_flags, } auto input_names = input_attr.cast().getValue(); input_names.split(function_input_names, ","); - if (function_input_names.size() != model_flags.input_arrays().size()) { + const int function_input_names_size = function_input_names.size(); + if (function_input_names_size != model_flags.input_arrays().size()) { return errors::InvalidArgument( "input array size mismatch: got ", function_input_names.size(), ", expected: ", model_flags.input_arrays().size()); @@ -99,7 +100,8 @@ Status HandleInputOutputArraysWithModule(const toco::ModelFlags& model_flags, } auto output_names = output_attr.cast().getValue(); output_names.split(function_output_names, ","); - if (function_output_names.size() != model_flags.output_arrays().size()) { + const int function_output_names_size = function_output_names.size(); + if (function_output_names_size != model_flags.output_arrays().size()) { return errors::InvalidArgument( "output array size mismatch: got ", function_output_names.size(), ", expected: ", model_flags.output_arrays().size()); diff --git a/tensorflow/compiler/mlir/lite/transforms/dilated_conv.h b/tensorflow/compiler/mlir/lite/transforms/dilated_conv.h index b745be7753a..2054bab4185 100644 --- a/tensorflow/compiler/mlir/lite/transforms/dilated_conv.h +++ b/tensorflow/compiler/mlir/lite/transforms/dilated_conv.h @@ -276,7 +276,7 @@ ConvertTFDilatedConvOp::ExtractDilationsAttrFromBlockShape( } // Check that the block_shape of `stb_op` and `bts_op` are equal. if (stb_bs_attr.getNumElements() != bts_bs_attr.getNumElements()) return {}; - for (uint64_t i = 0; i < stb_bs_attr.getNumElements(); ++i) { + for (uint64_t i = 0, end = stb_bs_attr.getNumElements(); i < end; ++i) { if (stb_bs_attr.getValue({i}) != bts_bs_attr.getValue({i})) return {}; } diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc index 7d6866dc570..c85e8259711 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc @@ -170,7 +170,7 @@ LogicalResult ConvertTFRandomUniformOp::matchAndRewrite( size_t num_samples = Distribution::kResultElementCount; llvm::SmallVector data; data.resize(num_elements); - while (offset < num_elements) { + while (static_cast(offset) < num_elements) { const typename Distribution::ResultType samples = dist(&generator); std::copy(&samples[0], &samples[0] + std::min(num_samples, data.size() - offset), @@ -631,6 +631,156 @@ struct LegalizeUnidirectionalSequenceRnn : public RewritePattern { } }; +// Put two TFL BroadcastTo ops in front of the given TF binary broadcast op to +// to make binary broadcast-able op conversion always successful and does not +// require flex delegate. +template +class ApplyExplicitBroadcasting : public OpRewritePattern { + public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(SourceOp src_op, + PatternRewriter& rewriter) const override { + Operation* op = static_cast(src_op); + auto lhs = op->getOperand(0); + auto rhs = op->getOperand(1); + + // Should have static shapes to calculate the broadcasted shape. + if (!lhs.getType().cast().hasStaticShape() || + !rhs.getType().cast().hasStaticShape()) { + return failure(); + } + + // Calculate the broadcasted shape. + SmallVector result_shape; + if (!OpTrait::util::getBroadcastedShape( + lhs.getType().cast().getShape(), + rhs.getType().cast().getShape(), result_shape)) { + return failure(); + } + + RankedTensorType result_type = RankedTensorType::get( + result_shape, getElementTypeOrSelf(op->getResult(0).getType())); + + // Create a const op, that stores the above broadcasted shape. + auto new_shape_attr = mlir::DenseIntElementsAttr::get( + RankedTensorType::get(result_shape.size(), rewriter.getIntegerType(64)), + result_shape); + auto new_shape = rewriter.create(op->getLoc(), new_shape_attr); + + // Apply BroadcastTo ops to each input. + auto broadcast_type = RankedTensorType::get( + result_shape, getElementTypeOrSelf(lhs.getType())); + + if (result_type.getShape() != lhs.getType().cast().getShape()) { + lhs = rewriter + .create(op->getLoc(), broadcast_type, lhs, + new_shape) + .output(); + } + if (result_type.getShape() != rhs.getType().cast().getShape()) { + rhs = rewriter + .create(op->getLoc(), broadcast_type, rhs, + new_shape) + .output(); + } + + // Recreate an op with the above Broadcast op results. + rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs); + return success(); + } +}; + +// This specialization is for TF SelectV2 op. SelectV2 op have three inputs and +// they should have broadcastable shapes. +template <> +class ApplyExplicitBroadcasting + : public OpRewritePattern { + public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(TF::SelectV2Op src_op, + PatternRewriter& rewriter) const override { + Operation* op = static_cast(src_op); + auto cond = op->getOperand(0); + auto lhs = op->getOperand(1); + auto rhs = op->getOperand(2); + + // Should have static shapes to calculate the broadcasted shape. + if (!lhs.getType().cast().hasStaticShape() || + !rhs.getType().cast().hasStaticShape() || + !cond.getType().cast().hasStaticShape()) { + return failure(); + } + + // Calculate the broadcasted shape. + SmallVector broadcasted_shape; + if (!OpTrait::util::getBroadcastedShape( + lhs.getType().cast().getShape(), + rhs.getType().cast().getShape(), broadcasted_shape)) { + return failure(); + } + + SmallVector result_shape; + if (!OpTrait::util::getBroadcastedShape( + broadcasted_shape, cond.getType().cast().getShape(), + result_shape)) { + return failure(); + } + + // Create a const op, that stores the above broadcasted shape. + auto shape_type = + RankedTensorType::get(result_shape.size(), rewriter.getIntegerType(64)); + auto new_shape_attr = + mlir::DenseIntElementsAttr::get(shape_type, result_shape); + auto new_shape = rewriter.create(op->getLoc(), new_shape_attr); + + // Apply BroadcastTo ops to each input. + auto cond_result_type = + RankedTensorType::get(result_shape, rewriter.getIntegerType(1)); + auto result_type = RankedTensorType::get( + result_shape, getElementTypeOrSelf(lhs.getType())); + + if (result_shape != cond.getType().cast().getShape()) { + cond = rewriter + .create(op->getLoc(), cond_result_type, + cond, new_shape) + .output(); + } + if (result_shape != lhs.getType().cast().getShape()) { + lhs = rewriter + .create(op->getLoc(), result_type, lhs, + new_shape) + .output(); + } + if (result_shape != rhs.getType().cast().getShape()) { + rhs = rewriter + .create(op->getLoc(), result_type, rhs, + new_shape) + .output(); + } + + // Recreate an op with the above Broadcast op results. + rewriter.replaceOpWithNewOp(op, result_type, cond, lhs, + rhs); + return success(); + } +}; + +void applyPatterns(FuncOp func, ConversionTarget& target, + const OwningRewritePatternList& patterns) { + // Keep trying to convert. + // TODO(karimnosseir): This is similar to what apply greedy patterns does. + // Look if there is a function that tries until it converge. + // Currently unit-test doesn't do multiple tries, so we need this. + const int max_iterations = 15; + for (int i = 0; i < max_iterations; ++i) { + if (failed(applyPartialConversion(func, target, patterns))) { + return; + } + } +} + void LegalizeTF::runOnFunction() { OwningRewritePatternList patterns; auto* context = &getContext(); @@ -681,16 +831,32 @@ void LegalizeTF::runOnFunction() { return success(current_thread_id == llvm::get_threadid()); }); - // Keep trying to convert. - // TODO(karimnosseir): This is similar to what apply greedy patterns does. - // Look if there is a function that tries until it converge. - // Currently unit-test doesn't do multiple tries, so we need this. - const int max_iterations = 15; - for (int i = 0; i < max_iterations; ++i) { - if (failed(applyPartialConversion(func, target, patterns))) { - return; - } - } + applyPatterns(func, target, patterns); + + // Explict BroadcastTo addition for left-over broadcast-able ops. + // The following pattern matchings should be done after the other legalization + // rules in order not to add unnecessary BroadcastTo ops. + patterns.insert, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting, + ApplyExplicitBroadcasting>(context); + + applyPatterns(func, target, patterns); } } // namespace diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize.cc b/tensorflow/compiler/mlir/lite/transforms/optimize.cc index d26a4906420..751c5266f65 100644 --- a/tensorflow/compiler/mlir/lite/transforms/optimize.cc +++ b/tensorflow/compiler/mlir/lite/transforms/optimize.cc @@ -198,7 +198,7 @@ DenseElementsAttr GetShape(Value output_val) { auto output_type = output_val.getType().cast(); auto shape_vector = output_type.getShape(); std::vector shape(shape_vector.size()); - for (int i = 0; i < shape_vector.size(); ++i) { + for (int i = 0, end = shape_vector.size(); i < end; ++i) { shape[i] = shape_vector[i]; } return mlir::DenseElementsAttr::get( @@ -684,7 +684,7 @@ struct ConvertTrivialTransposeOpToReshapeOp SmallVector old_major_index_ordering; SmallVector new_major_index_ordering; - for (int i = 0; i < input_shape.size(); i++) { + for (int i = 0, end = input_shape.size(); i < end; i++) { if (input_shape[i] != 1) { old_major_index_ordering.push_back(i); } diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc index f7923847835..9261deab18b 100644 --- a/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc @@ -225,7 +225,8 @@ void PrepareCompositeFunctionsPass::ConvertTFImplementsWithAttributes( LogicalResult CheckOutputConsumer( Operation* call_op, int expected_num_outputs, llvm::DenseSet expected_consumer_indices) { - if (call_op->getNumResults() != expected_num_outputs) return failure(); + const int call_op_getNumResults = call_op->getNumResults(); + if (call_op_getNumResults != expected_num_outputs) return failure(); for (int i = 0; i < expected_num_outputs; ++i) { auto it = expected_consumer_indices.find(i); diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc index 9a883a3790e..0a7802c3229 100644 --- a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc @@ -41,7 +41,9 @@ limitations under the License. #include "mlir/Analysis/LoopAnalysis.h" // from @llvm-project #include "mlir/Dialect/Quant/FakeQuantSupport.h" // from @llvm-project #include "mlir/Dialect/Quant/UniformSupport.h" // from @llvm-project +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project #include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/Function.h" // from @llvm-project #include "mlir/IR/MLIRContext.h" // from @llvm-project #include "mlir/IR/PatternMatch.h" // from @llvm-project #include "mlir/IR/StandardTypes.h" // from @llvm-project @@ -49,6 +51,7 @@ limitations under the License. #include "mlir/Support/LLVM.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project #include "mlir/Transforms/DialectConversion.h" // from @llvm-project +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h" #include "tensorflow/compiler/mlir/lite/quantization/quantization_utils.h" #include "tensorflow/compiler/mlir/lite/transforms/dilated_conv.h" @@ -58,7 +61,9 @@ limitations under the License. #include "tensorflow/compiler/mlir/lite/utils/validators.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/einsum.h" +#include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/unroll_batch_matmul.h" +#include "tensorflow/compiler/mlir/xla/transforms/passes.h" #define DEBUG_TYPE "tf-tfl-legalization" @@ -495,7 +500,8 @@ struct ConvertTFStridedSlice : public RewritePattern { original_input_type.getShape(); SmallVector new_shape; int index = 0; - while (index < original_input_shape.size() || new_axis_mask) { + const int original_input_shape_size = original_input_shape.size(); + while (index < original_input_shape_size || new_axis_mask) { if (new_axis_mask & 1) { new_shape.emplace_back(1); } else { @@ -737,6 +743,23 @@ LogicalResult ValidateOp(Operation *op) { return failure(has_illegal_ops); } +// Converts a set of TF2XLA ops into pure TF ops for future legalizations as +// TF2XLA ops aren't supported by later stages. +LogicalResult ConvertTf2XlaOps(FuncOp func, MLIRContext *context) { + ConversionTarget target(*context); + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalOp(); + target.addLegalOp(); + target.addIllegalOp(); + + OwningRewritePatternList patterns; + mhlo::PopulateLegalizeTfWithTf2XlaPatterns("XLA_CPU_JIT", patterns); + TF::PopulateLegalizeHloToTfPatterns(&patterns, context); + + return applyPartialConversion(func, target, patterns); +} + void PrepareTFPass::runOnFunction() { OwningRewritePatternList patterns; auto func = getFunction(); @@ -752,6 +775,11 @@ void PrepareTFPass::runOnFunction() { return; } + if (failed(ConvertTf2XlaOps(func, ctx))) { + signalPassFailure(); + return; + } + // This pattern was intented to uses TFL QDQs to preserve the quantization // parameters from the TF Quant ops, thus this pattern should run with the // first `applyPatternsGreedily` method, which would otherwise removes the diff --git a/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc b/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc index 2f876c68fb8..3a469dd7341 100644 --- a/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc +++ b/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc @@ -134,7 +134,7 @@ Value SliceRankedTensor(OpBuilder* builder, Value input, // the input tensor's dimensions, return 0-valued tensor of the requested // shape. ArrayRef input_shape = GetRankedTensorShape(input); - for (int i = 0; i < input_shape.size(); i++) { + for (int i = 0, end = input_shape.size(); i < end; i++) { if (begin_values[i] < 0 || (begin_values[i] + size_values[i] > input_shape[i])) { return CreateF32SplatConst(builder, size_shape, 0, location); From ae2b8a6dab670a3cf67c4b3ab770722ca84de0cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tar=C3=A9=20Gaskin?= Date: Sun, 26 Jul 2020 22:00:14 +0000 Subject: [PATCH 1353/2522] mlir directory resolutions --- .../mlir/tensorflow/ir/tf_executor.cc | 8 ++++--- .../mlir/tensorflow/transforms/einsum.cc | 6 ++--- .../transforms/promote_resources_to_args.cc | 5 ++-- .../transforms/resource_op_lifting.cc | 7 +++--- .../transforms/tpu_cluster_formation.cc | 6 +++-- .../tpu_merge_variables_with_execute.cc | 6 ++--- .../tpu_variable_runtime_reformatting.cc | 4 ++-- .../tensorflow/translate/export_graphdef.cc | 6 +++-- .../mlir/tensorflow/translate/import_model.cc | 6 +++-- .../translate/mlir_roundtrip_flags.cc | 2 +- .../mlir/tensorflow/utils/export_utils.cc | 4 ++-- .../utils/tpu_rewrite_device_util.cc | 6 +++-- .../tensorflow/utils/xla_sharding_util.cc | 5 ++-- .../compiler/mlir/xla/mlir_hlo_to_hlo.cc | 10 ++++---- .../mlir/xla/transforms/legalize_tf.cc | 23 +++++++++++-------- 15 files changed, 60 insertions(+), 44 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc index 8db06e83527..c18723b0982 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc @@ -190,14 +190,15 @@ LogicalResult Verify(GraphOp graph) { for (int i : llvm::seq(0, fetch.getNumOperands())) { Value operand = fetch.getOperand(i); // Break out of the loop at the first control operand encountered. + const int64_t num_results = graph.getNumResults(); if (operand.getType().isa()) { - if (i != graph.getNumResults()) + if (i != num_results) return fetch.emitOpError() << "operand #" << i << " is a control type, can't be bound to a graph result"; break; } - if (i >= graph.getNumResults()) + if (i >= num_results) return fetch.emitOpError() << "operand #" << i << " does not have a graph results to bind"; if (graph.getResult(i).getType() != operand.getType()) @@ -311,7 +312,8 @@ LogicalResult Verify(IslandOp island) { // Ensure that the yield terminator operands matches the island results type. int result_count = island.getNumResults() - 1; // -1 for the control token - if (yield.getNumOperands() != result_count) + const int num_operands = yield.getNumOperands(); + if (num_operands != result_count) return yield.emitOpError() << "has " << yield.getNumOperands() << " operand, but island returns " << result_count; diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/einsum.cc b/tensorflow/compiler/mlir/tensorflow/transforms/einsum.cc index c05a0ad1b62..69dab58c3f5 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/einsum.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/einsum.cc @@ -74,7 +74,7 @@ constexpr int kNumSupportedEquationVariables = 5; // A - E for now. bool tokenizeEquation(const llvm::StringRef& equation, std::vector* tokens) { std::map label_axis_mapping; - int index = 0; + size_t index = 0; int variable_count = 0; llvm::Regex r("[[:alpha:]]"); while (index < equation.size()) { @@ -177,7 +177,7 @@ TF::TransposeOp createTransposeOp(Value value, Location loc, auto perm_attr = DenseElementsAttr::get(perm_type, permutation); auto perm_op = rewriter->create(loc, perm_type, perm_attr); std::vector transposed_shape(shape.begin(), shape.end()); - for (int i = 0; i < shape.size(); ++i) { + for (int i = 0, end = shape.size(); i < end; ++i) { transposed_shape[i] = shape[permutation[i]]; } auto transposed_type = @@ -197,7 +197,7 @@ TF::SumOp createSumOp(Value value, Location loc, auto redux_op = rewriter->create(loc, redux_type, redux_attr); std::vector sum_shape(shape.size() - redux_axes.size()); int count = 0; - for (int i = 0; i < shape.size(); ++i) { + for (int i = 0, end = shape.size(); i < end; ++i) { if (std::find(redux_axes.begin(), redux_axes.end(), i) == redux_axes.end()) { sum_shape[count] = shape[i]; diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/promote_resources_to_args.cc b/tensorflow/compiler/mlir/tensorflow/transforms/promote_resources_to_args.cc index 961287b0b1f..4926dbaf4fb 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/promote_resources_to_args.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/promote_resources_to_args.cc @@ -304,7 +304,7 @@ LogicalResult PromoteResourcesToArguments( continue; } - const auto index = resource_and_index.index(); + const long int index = resource_and_index.index(); const bool is_var_handle = index >= var_handles_start_idx; if (resource.write) { if (!is_var_handle || resource.read) { @@ -342,7 +342,8 @@ LogicalResult PromoteResourcesToArguments( } // Rewrite return if there are variable writes. - if (return_operands.size() > num_results_before) { + const int return_operands_size = return_operands.size(); + if (return_operands_size > num_results_before) { builder.create(return_op.getLoc(), return_operands); return_op.erase(); } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc index 74679f19941..9c4963ea1c8 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc @@ -656,7 +656,7 @@ LogicalResult HandleWhileLoop(TF::WhileOp while_op, FuncOp body, FuncOp cond) { arg_data_type_and_updated_output_index); new_while.setAttr("output_shapes", builder.getArrayAttr(new_output_shapes)); // Replace uses. - for (int64_t i = 0; i < old_to_new_indices.size(); ++i) { + for (int64_t i = 0, end = old_to_new_indices.size(); i < end; ++i) { if (old_to_new_indices[i] >= 0) { while_op.getResult(i).replaceAllUsesWith( new_while.getResult(old_to_new_indices[i])); @@ -802,7 +802,7 @@ LogicalResult HandleCaseOrIfOp(CaseOrIfOp op, ArrayRef branches) { AddLoadsStoresOutsideControlFlowOp(new_op, arg_data_type_and_updated_output_index); // Replace uses. - for (int64_t i = 0; i < old_to_new_output_indices.size(); ++i) { + for (int64_t i = 0, end = old_to_new_output_indices.size(); i < end; ++i) { if (old_to_new_output_indices[i] >= 0) { op.getResult(i).replaceAllUsesWith( new_op.getResult(old_to_new_output_indices[i])); @@ -946,7 +946,8 @@ void UpdatePartitionedCallOpWithNewCallee( AddLoadsStoresOutsideControlFlowOp( new_call, lifting_info.arg_data_type_and_updated_output_index); // Replace uses. - for (int64_t i = 0; i < lifting_info.old_to_new_output_indices.size(); ++i) { + for (int64_t i = 0, end = lifting_info.old_to_new_output_indices.size(); + i < end; ++i) { if (lifting_info.old_to_new_output_indices[i] >= 0) { call_op.getResult(i).replaceAllUsesWith( new_call.getResult(lifting_info.old_to_new_output_indices[i])); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_cluster_formation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_cluster_formation.cc index 9abf67b62a9..162ecd77d4f 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_cluster_formation.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_cluster_formation.cc @@ -344,8 +344,9 @@ LogicalResult ReplicateCluster(tf_device::ClusterOp cluster, int num_replicas) { for (auto& pos_and_input : llvm::enumerate(replicated_input_ops)) { auto input = pos_and_input.value(); bool is_packed = llvm::cast(input).is_packed(); + const int num_operands = input->getNumOperands(); int num_inputs = is_packed ? 1 : num_replicas; - if (input->getNumOperands() != num_inputs) + if (num_operands != num_inputs) return input->emitOpError() << "requires " << num_inputs << " operands"; auto tpu_replicated_input = llvm::cast(input); @@ -393,7 +394,8 @@ LogicalResult ReplicateCluster(tf_device::ClusterOp cluster, int num_replicas) { << "requires output of " << cluster.getOperationName() << " to lead to a 'tf.TPUReplicatedOutput' op"; - if (def->getNumResults() != num_replicas) + const int def_NumResults = def->getNumResults(); + if (def_NumResults != num_replicas) return def->emitOpError() << "requires " << num_replicas << " results"; auto replicate_outputs = llvm::make_range( diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_merge_variables_with_execute.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_merge_variables_with_execute.cc index 3fd0dcd5a67..52c9287b619 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_merge_variables_with_execute.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_merge_variables_with_execute.cc @@ -298,7 +298,7 @@ VariableAccessesForTPUExecute BuildVariableAccessInfo( // Populate infos.old_to_new_output_mapping. int new_output_index = 0; infos.old_to_new_output_mapping.resize(execute_launch.getNumResults()); - for (int i = 0; i < execute_launch.getNumResults(); ++i) { + for (int i = 0, end = execute_launch.getNumResults(); i < end; ++i) { if (output_fused[i]) { infos.old_to_new_output_mapping[i] = -1; } else { @@ -375,7 +375,7 @@ void ReplaceParallelExecute(tf_device::ParallelExecuteOp parallel_execute, // Replace the uses of the original parallel_execute for the region containing // the merged execute. auto old_region_results = parallel_execute.GetRegionOutputs(region_index); - for (int i = 0; i < infos.old_to_new_output_mapping.size(); ++i) { + for (int i = 0, end = infos.old_to_new_output_mapping.size(); i < end; ++i) { if (infos.old_to_new_output_mapping[i] < 0) continue; old_region_results[i].replaceAllUsesWith(new_parallel_execute_op->getResult( infos.old_to_new_output_mapping[i] + num_results_before_region)); @@ -407,7 +407,7 @@ void ReplaceExecute(tf_device::LaunchOp execute_launch, tf_device::LaunchOp merged_execute_launch, const VariableAccessesForTPUExecute& infos) { // Replace the uses. - for (int i = 0; i < infos.old_to_new_output_mapping.size(); ++i) { + for (int i = 0, end = infos.old_to_new_output_mapping.size(); i < end; ++i) { if (infos.old_to_new_output_mapping[i] < 0) continue; execute_launch.getResult(i).replaceAllUsesWith( merged_execute_launch.getResult(infos.old_to_new_output_mapping[i])); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc index 12ce8c57f73..b33d37116cb 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc @@ -351,7 +351,7 @@ TF::WhileOp AddStateVarsToWhileOp(TF::WhileOp while_op, FuncOp body, cond.setType(FunctionType::get(append_types(cond.getType().getInputs()), cond.getType().getResults(), cond.getContext())); - for (int64_t i = 0; i < state_vars.size(); ++i) { + for (int64_t i = 0, end = state_vars.size(); i < end; ++i) { int64_t arg_index = body.getNumArguments() - state_vars.size() + i; TF::VarHandleOp state_var = state_vars[i]; auto device_attr = state_var.getAttr(kDeviceAttr); @@ -368,7 +368,7 @@ TF::WhileOp AddStateVarsToWhileOp(TF::WhileOp while_op, FuncOp body, if (new_while_op.output_shapes().size() != 0) { auto new_output_shapes = llvm::to_vector<4>(new_while_op.output_shapes()); // VarHandleOp is a scalar shape resource. - for (int64_t i = 0; i < state_vars.size(); ++i) { + for (int64_t i = 0, end = state_vars.size(); i < end; ++i) { new_output_shapes.push_back( mlir::TF::ShapeAttr::get(builder.getContext(), ArrayRef())); } diff --git a/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc b/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc index 7983dfe0065..e508f8fbd6b 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc @@ -511,14 +511,16 @@ StatusOr> Exporter::Convert( // generate unique names. if (!output_names.empty()) { const int num_data_results = graph_op.getNumResults(); - TF_RET_CHECK(output_names.size() == num_data_results) + const int64 output_names_size = output_names.size(); + TF_RET_CHECK(output_names_size == num_data_results) << "output names (" << output_names.size() << ") != terminator operands (" << num_data_results << ")"; llvm::DenseMap output_op_to_name; llvm::StringMap name_to_op; for (const auto& it : llvm::enumerate(graph_op.GetFetch().getOperands())) { // Skip control rets. - if (it.index() >= num_data_results) break; + const int64 it_index = it.index(); + if (it_index >= num_data_results) break; // TODO(jpienaar): If there is a result index specified, ensure only one // and that it matches the result index of the op. std::string orig_name(output_names[it.index()]); diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc index a12378b66ba..2bf2c900cd2 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc @@ -2387,7 +2387,8 @@ GraphDefImporter::GetArgsRetsAndTypesFromFunctionGraph( "' is missing attribute 'index'"); auto index = attr->i(); - if (nodes->size() < index + 1) nodes->resize(index + 1); + const int nodes_size = nodes->size(); + if (nodes_size < index + 1) nodes->resize(index + 1); if ((*nodes)[index].node != nullptr) return errors::InvalidArgument(node->type_string(), " node '", @@ -3108,7 +3109,8 @@ Status CreateSavedModelIR( TF_ASSIGN_OR_RETURN(auto input_index_paths, input_linearizer.GetLeafIndexPaths( error_context + "in input signature: ")); - if (bound_input_base != input_index_paths.size()) { + const int input_index_paths_size = input_index_paths.size(); + if (bound_input_base != input_index_paths_size) { return errors::InvalidArgument( error_context, "Argument mismatch between concrete function input signature " diff --git a/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.cc b/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.cc index 4640cb6ce64..f6d370ca604 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/mlir_roundtrip_flags.cc @@ -94,7 +94,7 @@ Status ParseInputArrayInfo(const std::vector& node_names, } // StringMap doesn't support reserve else reserve input map size here. - for (int i = 0; i < node_names.size(); i++) { + for (int i = 0, end = node_names.size(); i < end; i++) { auto& name = node_names[i]; if (name.empty()) continue; diff --git a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc index 7e018966396..0364b935b92 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc @@ -82,7 +82,7 @@ Status ConvertLocation(mlir::Location inst_loc, if (locations.size() <= 1) return errors::InvalidArgument("expected experimental debuf info."); // skip the first one, which is the name of the node_def. - for (int i = 0; i < locations.size() - 1; ++i) { + for (int i = 0, end = locations.size() - 1; i < end; ++i) { TF_RETURN_IF_ERROR(ConvertLocation(locations[i], debug_info)); } } @@ -518,7 +518,7 @@ Status SetSizeAttribute(absl::string_view name, size_t size, // This should be extremely rare as it means we are adding the same // attribute multiple times/have some redundancy in representing this // attribute. - int64 actual_size = result.first->second.i(); + size_t actual_size = result.first->second.i(); // Just check via string output as we shouldn't get here and if we do they // should be trivially the same, else fail. if (actual_size != size) diff --git a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc index f884b75bce1..843d491c330 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc @@ -149,7 +149,8 @@ Status GetTPUDevices( std::next(system_devices.begin()), system_devices.end())) { auto host_tpu_devices = lookup(device_spec); // Check number of TPU devices per host all match. - if (num_tpus_per_host != host_tpu_devices.size()) + const int64 host_tpu_devices_size = host_tpu_devices.size(); + if (num_tpus_per_host != host_tpu_devices_size) return errors::InvalidArgument( "expected the number of TPU devices per host to be ", num_tpus_per_host, ", got ", host_tpu_devices.size()); @@ -354,7 +355,8 @@ GetGeneralTPUExecutionDeviceAssignment( const int expected_device_assignment_size = num_replicas * num_cores_per_replica * kTPUTopologyRank; - if (device_assignment_attr.size() != expected_device_assignment_size) + const int device_assignment_attr_size = device_assignment_attr.size(); + if (device_assignment_attr_size != expected_device_assignment_size) return errors::InvalidArgument( "length of '", kDeviceAssignmentAttr, "' must be 'num_replicas' * 'num_cores_per_replica' * ", diff --git a/tensorflow/compiler/mlir/tensorflow/utils/xla_sharding_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/xla_sharding_util.cc index 083a5abf840..f662005f8a3 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/xla_sharding_util.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/xla_sharding_util.cc @@ -242,7 +242,8 @@ mlir::LogicalResult ExtractInputsForLogicalDevices( cluster_func.getLoc(), sharding, input_value, builder, &tiled_inputs); if (mlir::failed(result)) return mlir::failure(); - if (tiled_inputs.size() != num_cores_per_replica) + const int64 tiled_inputs_size = tiled_inputs.size(); + if (tiled_inputs_size != num_cores_per_replica) cluster_func.emitError(llvm::formatv( "incorrect {0}-th tiled input sharding received. " "Product of tile sharding splits({1}) must be equal to " @@ -376,7 +377,7 @@ mlir::LogicalResult HandleTileShardedOutputs( llvm::SmallVector new_outputs; new_outputs.reserve(num_splits); - for (int i = 0; i < outputs_to_merge.size(); i = i + num_splits) { + for (int i = 0, end = outputs_to_merge.size(); i < end; i = i + num_splits) { mlir::TF::ConcatOp concat_op; auto result = CreateConcatOp(concat_dimension, location, diff --git a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc index a4c3c43cfbf..7faac83a8de 100644 --- a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc +++ b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc @@ -170,8 +170,8 @@ static std::vector> Convert_source_target_pairs( static std::vector Convert_replica_groups( mlir::DenseIntElementsAttr groups) { - int64_t num_groups = groups.getType().getDimSize(0); - int64_t group_size = groups.getType().getDimSize(1); + uint64_t num_groups = groups.getType().getDimSize(0); + uint64_t group_size = groups.getType().getDimSize(1); std::vector result; result.reserve(num_groups); @@ -435,14 +435,14 @@ static void ExtractShardingsFromFunction( llvm::SmallVectorImpl>* ret_shardings) { arg_shardings->resize(function.getNumArguments(), absl::optional()); - for (int i = 0; i < function.getNumArguments(); ++i) + for (int i = 0, end = function.getNumArguments(); i < end; ++i) if (auto sharding = function.getArgAttrOfType(i, kShardingAttr)) (*arg_shardings)[i] = CreateOpShardingFromStringRef(sharding.getValue()); ret_shardings->resize(function.getNumResults(), absl::optional()); - for (int i = 0; i < function.getNumResults(); ++i) + for (int i = 0, end = function.getNumResults(); i < end; ++i) if (auto sharding = function.getResultAttrOfType(i, kShardingAttr)) (*ret_shardings)[i] = CreateOpShardingFromStringRef(sharding.getValue()); @@ -758,7 +758,7 @@ LogicalResult ExportXlaOp(PadOp op, OpLoweringContext ctx) { auto edge_padding_low = ConvertDenseIntAttr(op.edge_padding_low()); auto edge_padding_high = ConvertDenseIntAttr(op.edge_padding_high()); auto interior_padding = ConvertDenseIntAttr(op.interior_padding()); - for (xla::int64 i = 0; i < edge_padding_low.size(); ++i) { + for (xla::int64 i = 0, end = edge_padding_low.size(); i < end; ++i) { auto* dims = padding_config.add_dimensions(); dims->set_edge_padding_low(edge_padding_low[i]); dims->set_edge_padding_high(edge_padding_high[i]); diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc index 4549386ce16..2a5f553240b 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc @@ -365,7 +365,7 @@ static Value UpdateSliceInMinorDims(Location loc, Value v, Value update, ArrayRef minor_starts, OpBuilder *builder) { llvm::SmallVector dus_starts(minor_starts.size()); - for (int64_t i = 0; i < minor_starts.size(); ++i) { + for (uint64_t i = 0; i < minor_starts.size(); ++i) { dus_starts[i] = GetScalarConstOfType(builder->getIntegerType(32), loc, minor_starts[i], builder); } @@ -808,7 +808,7 @@ static DenseIntElementsAttr SliceDenseIntElementsAttrColumn2D( values.reserve(shaped_type.getNumElements() / shape[1]); for (auto it : llvm::enumerate(int_attr.getIntValues())) { - if (it.index() % shape[1] == column) { + if ( static_cast(it.index() % shape[1]) == column) { values.push_back(it.value().getSExtValue()); } } @@ -1836,6 +1836,9 @@ Operation *AvgPoolDivideByCount( return result; } +Value GetAvgPoolInput(TF::AvgPoolOp op) { return op.value(); } +Value GetAvgPoolInput(TF::AvgPool3DOp op) { return op.input(); } + // Converts AvgPool op to HLO ReduceWindow op by setting appropriate window // dimensions with add as the reduction function. The reduction result is // then divided by the number of elements in the window. @@ -1846,8 +1849,9 @@ class ConvertAvgPoolOp : public OpRewritePattern { LogicalResult matchAndRewrite(OpTy op, PatternRewriter &rewriter) const override { + Value input_value = GetAvgPoolInput(op); auto input_type = - op.value().getType().template dyn_cast(); + input_value.getType().template dyn_cast(); if (!input_type) return failure(); // We will do accumulation first; use a larger bitwidth if suitable. @@ -1862,8 +1866,6 @@ class ConvertAvgPoolOp : public OpRewritePattern { else result_type = UnrankedTensorType::get(sum_element_type); - Value input_value = op.value(); - // Convert if we need enlarge the element type's bitwidth. if (input_element_type != sum_element_type) input_value = rewriter.create(op.getLoc(), input_value, @@ -2680,7 +2682,7 @@ class ConvertSplitVOp : public OpRewritePattern { SmallVector slices; slices.reserve(op.getNumResults()); - for (int i = 0; i < op.getNumResults(); ++i) { + for (int i = 0, end = op.getNumResults(); i < end; ++i) { end_indices[dim_index] = begin_indices[dim_index] + split_sizes[i]; slices.push_back(rewriter.create( op.getLoc(), op.value(), GetI64ElementsAttr(begin_indices, &rewriter), @@ -2855,7 +2857,7 @@ class ConvertStridedSliceOp : public OpRewritePattern { // verifier. int64_t slicing_dim_size = op.begin().getType().cast().getShape()[0]; - auto input_rank = input_shape.size(); + const int input_rank = input_shape.size(); for (int d = slicing_dim_size; d < input_rank; ++d) { // We only support slicing major dimensions, so minor dimensions after // slicing dimensions are all sliced with their full sizes. @@ -2896,7 +2898,7 @@ class ConvertStridedSliceOp : public OpRewritePattern { } // For non-slice dims, get the full slice of that dimension. - for (int d = slicing_dim_size; d < input_shape.size(); ++d) { + for (int d = slicing_dim_size, end = input_shape.size(); d < end; ++d) { slice_sizes.push_back(input_shape[d]); slice_begin_indices.push_back(zero); } @@ -3592,7 +3594,8 @@ class ConvertTileOp : public OpRewritePattern { multiples.getType().getRank() != 1) return failure(); - if (multiples.getNumElements() != input_shape.size()) return failure(); + const int64_t input_shape_size = input_shape.size(); + if (multiples.getNumElements() != input_shape_size) return failure(); SmallVector broadcasted_shape; SmallVector broadcast_dimensions; @@ -4379,7 +4382,7 @@ class ConvertUnpackOp : public OpRewritePattern { SmallVector results; results.reserve(op.getNumResults()); - for (int i = 0; i < op.getNumResults(); ++i) { + for (int i = 0, end = op.getNumResults(); i < end; ++i) { begin_indices[axis] = i; end_indices[axis] = i + 1; From ed1feb0c26d11d27a31e841d6b5bda2b6a9622f2 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Sun, 26 Jul 2020 18:02:56 -0400 Subject: [PATCH 1354/2522] Update tpu_variable_runtime_reformatting.cc --- .../tensorflow/transforms/tpu_variable_runtime_reformatting.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc index 1e150744e4a..d1e6160e90c 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc @@ -365,7 +365,6 @@ TF::WhileOp AddStateVarsToWhileOp(TF::WhileOp while_op, FuncOp body, while_op.getLoc(), append_types(llvm::to_vector<4>(while_op.getResultTypes())), new_while_operands, while_op.getAttrs()); - while_op.replaceAllUsesWith( new_while_op.getResults().take_front(while_op.getNumResults())); while_op.erase(); From ad58928e656eae6c02937239885ef2e82f8d7c15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tar=C3=A9=20Gaskin?= Date: Sun, 26 Jul 2020 22:06:02 +0000 Subject: [PATCH 1355/2522] tf2xla directory resolutions --- tensorflow/compiler/tf2xla/const_analysis.cc | 3 +-- .../compiler/tf2xla/functionalize_cond.cc | 12 ++++++---- .../compiler/tf2xla/functionalize_while.cc | 10 ++++---- tensorflow/compiler/tf2xla/graph_compiler.cc | 7 +++--- tensorflow/compiler/tf2xla/lib/data_format.cc | 2 +- tensorflow/compiler/tf2xla/literal_util.cc | 2 +- .../tf2xla/rearrange_function_argument.cc | 20 ++++++++-------- tensorflow/compiler/tf2xla/tf2xla.cc | 2 +- tensorflow/compiler/tf2xla/tf2xla_util.cc | 4 ++-- tensorflow/compiler/tf2xla/xla_compiler.cc | 23 +++++++++++-------- 10 files changed, 46 insertions(+), 39 deletions(-) diff --git a/tensorflow/compiler/tf2xla/const_analysis.cc b/tensorflow/compiler/tf2xla/const_analysis.cc index 1da34266460..3b7c986fe00 100644 --- a/tensorflow/compiler/tf2xla/const_analysis.cc +++ b/tensorflow/compiler/tf2xla/const_analysis.cc @@ -74,8 +74,7 @@ Status CondConstInputIndices( *(fbody->graph), &compile_time_const_arg_indices, /*compile_time_const_nodes=*/nullptr, flib_runtime)); } - for (int i = 0, iter_limit = compile_time_const_arg_indices.size(); - i < iter_limit; i++) { + for (int i = 0, end = compile_time_const_arg_indices.size(); i < end; i++) { if (compile_time_const_arg_indices[i]) { // The 0th input is the pred or branch index, which is not passed to the // branches. So the i'th input of a branch function corresponds to the diff --git a/tensorflow/compiler/tf2xla/functionalize_cond.cc b/tensorflow/compiler/tf2xla/functionalize_cond.cc index 459b2814c0d..ef8222853d2 100644 --- a/tensorflow/compiler/tf2xla/functionalize_cond.cc +++ b/tensorflow/compiler/tf2xla/functionalize_cond.cc @@ -224,7 +224,8 @@ string DebugString(const CondArgNodes& nodes) { } StateMap::CondId StateMap::LookupCondId(const Node* node) const { - if (node->id() < node_to_condid_map_.size()) + const int64 node_to_condid_map_size = node_to_condid_map_.size(); + if (node->id() < node_to_condid_map_size) return node_to_condid_map_[node->id()]; return added_node_condid_mapping_.at(node->id()); } @@ -235,14 +236,16 @@ StateMap::CondId StateMap::GetCondId(const StateMap::CondState& state) { } void StateMap::ResetCondId(const Node* node, StateMap::CondId id) { - if (node->id() < node_to_condid_map_.size()) + const int64 node_to_condid_map_size = node_to_condid_map_.size(); + if (node->id() < node_to_condid_map_size) node_to_condid_map_[node->id()] = id; else added_node_condid_mapping_[node->id()] = id; } StateMap::AncestorId StateMap::LookupAncestorId(const Node* node) const { - if (node->id() < node_to_ancestorid_map_.size()) + const int64 node_to_ancestorid_map_size = node_to_ancestorid_map_.size(); + if (node->id() < node_to_ancestorid_map_size) return node_to_ancestorid_map_[node->id()]; return added_node_ancestorid_mapping_.at(node->id()); } @@ -254,7 +257,8 @@ StateMap::AncestorId StateMap::GetAncestorId( } void StateMap::ResetAncestorId(const Node* node, StateMap::AncestorId id) { - if (node->id() < node_to_ancestorid_map_.size()) + const int64 node_to_ancestorid_map_size = node_to_ancestorid_map_.size(); + if (node->id() < node_to_ancestorid_map_size) node_to_ancestorid_map_[node->id()] = id; else added_node_ancestorid_mapping_[node->id()] = id; diff --git a/tensorflow/compiler/tf2xla/functionalize_while.cc b/tensorflow/compiler/tf2xla/functionalize_while.cc index cea4973f42b..dce5efe5557 100644 --- a/tensorflow/compiler/tf2xla/functionalize_while.cc +++ b/tensorflow/compiler/tf2xla/functionalize_while.cc @@ -130,7 +130,7 @@ Status BuildLoopCondition(const Graph& graph, WhileLoopFrame* frame, std::vector squash_src_outputs(graph.num_node_ids(), false); // Build one _Arg node for each Enter node. - for (int i = 0; i < frame->args.size(); ++i) { + for (int i = 0, end = frame->args.size(); i < end; ++i) { const WhileLoopArg& arg = frame->args[i]; TF_ASSIGN_OR_RETURN(Node * arg_node, @@ -170,7 +170,7 @@ Status BuildLoopBody(const Graph& graph, WhileLoopFrame* frame, std::vector next_iterations; next_iterations.reserve(frame->args.size()); arg_types->reserve(frame->args.size()); - for (int i = 0; i < frame->args.size(); ++i) { + for (int i = 0, end = frame->args.size(); i < end; ++i) { const WhileLoopArg& arg = frame->args[i]; DataType dtype = arg.enter->input_type(0); @@ -235,7 +235,7 @@ Status FunctionalizeLoop(Graph* graph, WhileLoopFrame* frame, } else { std::vector edges(arg.enter->out_edges().begin(), arg.enter->out_edges().end()); - for (int i = 0; i < edges.size(); ++i) { + for (int i = 0, end = edges.size(); i < end; ++i) { if (edges[i]->IsControlEdge() && edges[i]->dst()->IsSink()) { continue; } @@ -447,7 +447,7 @@ Status FunctionalizeLoop(Graph* graph, WhileLoopFrame* frame, } } std::vector inputs; - for (int i = 0; i < frame->args.size(); ++i) { + for (int i = 0, end = frame->args.size(); i < end; ++i) { const WhileLoopArg& arg = frame->args[i]; const Edge* in_edge; TF_RETURN_IF_ERROR(arg.enter->input_edge(0, &in_edge)); @@ -463,7 +463,7 @@ Status FunctionalizeLoop(Graph* graph, WhileLoopFrame* frame, TF_ASSIGN_OR_RETURN(Node * while_node, AddNodeDefToGraph(while_def, graph)); // Copies edges to the Enter nodes and from the Exit nodes onto the While. - for (int i = 0; i < frame->args.size(); ++i) { + for (int i = 0, end = frame->args.size(); i < end; ++i) { const WhileLoopArg& arg = frame->args[i]; const Edge* in_edge; TF_RETURN_IF_ERROR(arg.enter->input_edge(0, &in_edge)); diff --git a/tensorflow/compiler/tf2xla/graph_compiler.cc b/tensorflow/compiler/tf2xla/graph_compiler.cc index 5f6dcad5538..30a7e94775b 100644 --- a/tensorflow/compiler/tf2xla/graph_compiler.cc +++ b/tensorflow/compiler/tf2xla/graph_compiler.cc @@ -65,7 +65,7 @@ Status PrepareArguments(XlaOpKernelContext* ctx, Graph* graph, /*compile_time_const_nodes=*/nullptr, ctx->function_library())); args->resize(expressions.size()); - for (int i = 0, iter_limit = args->size(); i < iter_limit; ++i) { + for (int i = 0, end = args->size(); i < end; ++i) { XlaCompiler::Argument& arg = (*args)[i]; arg.type = ctx->input_type(i); arg.shape = ctx->InputShape(i); @@ -269,7 +269,7 @@ Status GraphCompiler::CompileFunctionalNode(Node* n, TF_RET_CHECK(arguments.size() == expressions.size()); std::vector handles; - for (int64 i = 0, iter_limit = expressions.size(); i < iter_limit; ++i) { + for (int64 i = 0, end = expressions.size(); i < end; ++i) { if (arguments[i].kind == XlaCompiler::Argument::kConstant) { continue; } @@ -313,8 +313,7 @@ Status GraphCompiler::CompileFunctionalNode(Node* n, } } - for (int64 i = 0, iter_limit = result.resource_updates.size(); i < iter_limit; - i++) { + for (int64 i = 0, end = result.resource_updates.size(); i < end; i++) { if (result.resource_updates[i].modified) { XlaResource* resource = expressions[result.resource_updates[i].input_index]->resource(); diff --git a/tensorflow/compiler/tf2xla/lib/data_format.cc b/tensorflow/compiler/tf2xla/lib/data_format.cc index 2ab86c78e44..e5913a8bbf3 100644 --- a/tensorflow/compiler/tf2xla/lib/data_format.cc +++ b/tensorflow/compiler/tf2xla/lib/data_format.cc @@ -66,7 +66,7 @@ xla::StatusOr Expand(xla::XlaOp input, int64 dim) { // Move the newly created dimension to the end with a transpose. std::vector permutation; - for (int64 i = 0, iter_limit = expanded_shape.size(); i != iter_limit; ++i) { + for (int64 i = 0, end = expanded_shape.size(); i != end; ++i) { permutation.push_back(i); if (i == dim) { ++i; diff --git a/tensorflow/compiler/tf2xla/literal_util.cc b/tensorflow/compiler/tf2xla/literal_util.cc index 42a95bbb9f8..74ca16bbaeb 100644 --- a/tensorflow/compiler/tf2xla/literal_util.cc +++ b/tensorflow/compiler/tf2xla/literal_util.cc @@ -72,7 +72,7 @@ Status HostTensorsToBorrowingLiteralTuple(absl::Span host_tensors, buf_ptrs.reserve(host_tensors.size()); std::vector tensor_shapes(host_tensors.size()); - for (int i = 0, iter_limit = host_tensors.size(); i < iter_limit; i++) { + for (int i = 0, end = host_tensors.size(); i < end; i++) { // Validate runtime shapes and fail if it doesn't match the contract. const Tensor* tensor = &host_tensors[i]; buf_ptrs.emplace_back(static_cast(DMAHelper::base(tensor))); diff --git a/tensorflow/compiler/tf2xla/rearrange_function_argument.cc b/tensorflow/compiler/tf2xla/rearrange_function_argument.cc index b6f8928f31e..635585639a8 100644 --- a/tensorflow/compiler/tf2xla/rearrange_function_argument.cc +++ b/tensorflow/compiler/tf2xla/rearrange_function_argument.cc @@ -41,7 +41,7 @@ std::vector ShuffleInputDataTypeAttribute( const std::vector& in_types, const std::vector& index_mapping) { std::vector result(index_mapping.size()); - for (int i = 0; i < in_types.size(); i++) { + for (int i = 0, end = in_types.size(); i < end; i++) { result[index_mapping.at(i)] = in_types[i]; } return result; @@ -56,7 +56,7 @@ Status InputTypesNeedsRearrange(const std::vector& in_types, bool* need_rewrite, int* resource_input_count, std::vector* index_mapping) { int first_resource_index = -1; - for (int i = 0; i < in_types.size(); i++) { + for (int i = 0, end = in_types.size(); i < end; i++) { DataType type = in_types[i]; if (type == DT_RESOURCE) { first_resource_index = i; @@ -70,7 +70,7 @@ Status InputTypesNeedsRearrange(const std::vector& in_types, } *need_rewrite = false; - for (int i = first_resource_index + 1; i < in_types.size(); i++) { + for (int i = first_resource_index + 1, end = in_types.size(); i < end; i++) { if (in_types[i] != DT_RESOURCE) { *need_rewrite = true; break; @@ -81,7 +81,7 @@ Status InputTypesNeedsRearrange(const std::vector& in_types, } *resource_input_count = 0; - for (int i = 0; i < in_types.size(); i++) { + for (int i = 0, end = in_types.size(); i < end; i++) { DataType type = in_types[i]; if (type == DT_RESOURCE) { ++(*resource_input_count); @@ -90,7 +90,7 @@ Status InputTypesNeedsRearrange(const std::vector& in_types, int non_resource_index = 0, resource_index = in_types.size() - *resource_input_count; index_mapping->resize(in_types.size()); - for (int i = 0; i < in_types.size(); i++) { + for (int i = 0, end = in_types.size(); i < end; i++) { if (in_types[i] != DT_RESOURCE) { (*index_mapping)[i] = non_resource_index; non_resource_index++; @@ -146,7 +146,7 @@ Status ReorderOutputEdges(Graph* g, Node* n, int input_count, int dst_input = e->dst_input(); g->RemoveEdge(e); - if (new_src_output < input_count - resource_input_count) { + if (new_src_output < static_cast(input_count - resource_input_count)) { g->AddEdge(n, new_src_output, dst, dst_input); } else { const Edge* input_edge; @@ -180,7 +180,7 @@ Status CalculateRetvalRearrange( const gtl::InlinedVector& ret_nodes, // non-absl ok std::map* retval_index_mapping, std::map* resource_retval_to_arg) { - for (int i = 0; i < ret_nodes.size(); i++) { + for (int i = 0, end = ret_nodes.size(); i < end; i++) { Node* n = ret_nodes[i]; DataType t; TF_RETURN_IF_ERROR(GetNodeAttr(n->def(), "T", &t)); @@ -261,7 +261,7 @@ Status RearrangeOutputEdges(Node* n, Graph* g, void RearrangeRetvalNodes( const gtl::InlinedVector& ret_nodes, // non-absl ok Graph* g, const std::map& retval_index_mapping) { - for (int i = 0; i < ret_nodes.size(); i++) { + for (int i = 0, end = ret_nodes.size(); i < end; i++) { Node* n = ret_nodes[i]; auto iter = retval_index_mapping.find(i); if (iter == retval_index_mapping.end()) { @@ -317,7 +317,7 @@ Status MaybeRewriteWhileNode( // lambda resource_var1, resource_var2: [resource_var2, resource_var1], // [resource_var1, resource_var2]) if (attr_name == "body") { - for (int i = 0; i < fbody->ret_nodes.size(); i++) { + for (int i = 0, end = fbody->ret_nodes.size(); i < end; i++) { Node* n = fbody->ret_nodes[i]; DataType dtype; TF_RETURN_IF_ERROR(GetNodeAttr(n->def(), "T", &dtype)); @@ -349,7 +349,7 @@ Status MaybeRewriteWhileNode( RearrangeArgNodes(&fbody->arg_nodes, index_mapping); if (attr_name == "body") { - for (int i = 0; i < fbody->ret_nodes.size(); i++) { + for (int i = 0, end = fbody->ret_nodes.size(); i < end; i++) { Node* n = fbody->ret_nodes[i]; int new_index = index_mapping.at(i); if (new_index < types.size() - resource_input_count) { diff --git a/tensorflow/compiler/tf2xla/tf2xla.cc b/tensorflow/compiler/tf2xla/tf2xla.cc index 0454bbb771a..242a2b04ab9 100644 --- a/tensorflow/compiler/tf2xla/tf2xla.cc +++ b/tensorflow/compiler/tf2xla/tf2xla.cc @@ -87,7 +87,7 @@ Status ConvertGraphToXla(std::unique_ptr graph, *computation = std::move(*result.computation); int num_const_results = 0; - for (int i = 0, iter_limit = result.outputs.size(); i < iter_limit; ++i) { + for (int i = 0, end = result.outputs.size(); i < end; ++i) { // Ending up with const results (i.e. output args) is an error, since it // means that one or more fetches that the user specified will be dropped // from the generated function. It's most likely a configuration error, diff --git a/tensorflow/compiler/tf2xla/tf2xla_util.cc b/tensorflow/compiler/tf2xla/tf2xla_util.cc index 5229104e674..8863b08b77b 100644 --- a/tensorflow/compiler/tf2xla/tf2xla_util.cc +++ b/tensorflow/compiler/tf2xla/tf2xla_util.cc @@ -143,7 +143,7 @@ Status ReplaceArgUsageWithConstNode( usages.push_back({e->dst()->id(), e->dst_input()}); } - for (int i = 0; i < usages.size(); i++) { + for (int i = 0, end = usages.size(); i < end; i++) { // Make a copy of `usage_node`, and change its input to const node. Node* usage_node = g->FindNodeId(usages[i].dst_node_id); NodeDef replace_def = usage_node->def(); @@ -158,7 +158,7 @@ Status ReplaceArgUsageWithConstNode( // Later entries in `usages` might have `usage_node` as dst node, but // `usage_node` is removed. Replace such entries with `replace_node`. - for (int j = i + 1; j < usages.size(); j++) { + for (int j = i + 1, end = usages.size(); j < end; j++) { if (usages[j].dst_node_id == usages[i].dst_node_id) { usages[j].dst_node_id = replace_node->id(); } diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index 333fa53790d..97254c17b09 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -64,7 +64,7 @@ Status CheckSignature(const DataTypeVector& types, return errors::Internal("Compilation arguments have ", args.size(), " elements while function has ", types.size()); } - for (int i = 0, iter_limit = types.size(); i < iter_limit; ++i) { + for (int i = 0, end = types.size(); i < end; ++i) { // Don't perform type checks on resource variables and tensor // lists (DT_VARIANT) as we have to trick the type system in order to // plumb them through. DT_VARIANTS are wrapped in a DT_UINT8 tensor. @@ -192,7 +192,7 @@ Status BuildComputation( // replicate sharding is used. The first element is the output index, second // element is the sharding. std::unordered_map retval_index_and_sharding; - for (int i = 0, iter_limit = retvals.size(); i < iter_limit; ++i) { + for (int i = 0, end = retvals.size(); i < end; ++i) { XlaCompiler::OutputDescription& output = (*outputs)[i]; const XlaExpression& retval = retvals[i]; output.type = retval.dtype(); @@ -362,7 +362,7 @@ Status BuildComputation( xla::Shape shape = xla::ShapeUtil::MakeTupleShape(elem_shapes); // Copy specified sharding from retval_index_and_sharding. std::vector sharding_elems; - for (int i = 0, iter_limit = elems.size(); i < iter_limit; i++) { + for (int i = 0, end = elems.size(); i < end; i++) { const auto& iter = retval_index_and_sharding.find(i); TF_RET_CHECK(iter != retval_index_and_sharding.end()); const xla::OpSharding& sub_op_sharding = iter->second; @@ -707,7 +707,7 @@ Status XlaCompiler::CompileFunction( // Set shapes for _Arg nodes. They are useful for constant folding (e.g. an // Xla op requires a compile-time constant input, and that input is shape of // an _Arg node. - for (int i = 0, iter_limit = args.size(); i < iter_limit; i++) { + for (int i = 0, end = args.size(); i < end; i++) { // Skip resource variables and tensor lists. DataType dtype; TF_RETURN_IF_ERROR(GetNodeAttr(fbody->arg_nodes[i]->def(), "T", &dtype)); @@ -949,7 +949,7 @@ Status XlaCompiler::BuildArguments( // to the d'th XLA input. Note that the value -1 corresponds to constants, or // other args that don't correspond to an input. std::vector arg_to_inputs(args.size(), -1); - for (int i = 0, iter_limit = input_to_args->size(); i < iter_limit; i++) { + for (int i = 0, end = input_to_args->size(); i < end; i++) { arg_to_inputs[input_to_args->at(i)] = i; } @@ -995,7 +995,7 @@ Status XlaCompiler::BuildArguments( : it->second; } std::vector is_same_across_replicas; - for (int i = 0, iter_limit = input_to_args->size(); i < iter_limit; ++i) { + for (int i = 0, end = input_to_args->size(); i < end; ++i) { // Add an entry to is_same_across_replicas for every leaf buffer. is_same_across_replicas.insert( is_same_across_replicas.end(), @@ -1011,7 +1011,7 @@ Status XlaCompiler::BuildArguments( tuple = xla::Parameter(builder, 0, (*input_shapes)[0], "arg_tuple"); } - for (int i = 0, iter_limit = input_to_args->size(); i < iter_limit; ++i) { + for (int i = 0, end = input_to_args->size(); i < end; ++i) { const XlaCompiler::Argument& arg = args[input_to_args->at(i)]; for (const auto& dim_and_arg_num : arg.dynamic_dim_to_arg_num_map) { int dynamic_size_param_index = arg_to_inputs.at(dim_and_arg_num.second); @@ -1030,6 +1030,11 @@ Status XlaCompiler::BuildArguments( xla::XlaScopedShardingAssignment assign_sharding( builder, it == arg_shardings.end() ? absl::optional() : it->second); + auto& arg = args[input_to_args->at(i)]; + + xla::OpMetadata arg_metadata; + arg_metadata.set_op_name(arg.node_name); + builder->SetOneShotOpMetadata(arg_metadata); arg_handles[i] = xla::GetTupleElement(tuple, i); } } else { @@ -1052,7 +1057,7 @@ Status XlaCompiler::BuildArguments( } } - for (int i = 0, iter_limit = input_to_args->size(); i < iter_limit; ++i) { + for (int i = 0, end = input_to_args->size(); i < end; ++i) { const XlaCompiler::Argument& arg = args[input_to_args->at(i)]; for (const auto& dim_and_arg_num : arg.dynamic_dim_to_arg_num_map) { int dynamic_size_param_index = arg_to_inputs.at(dim_and_arg_num.second); @@ -1373,7 +1378,7 @@ void SetTransfer(const string& key, absl::Span types, tf2xla::HostTransferMetadata* transfer) { transfer->set_key(key); CHECK(types.size() == shapes.size()); - for (int i = 0, iter_limit = types.size(); i < iter_limit; ++i) { + for (int i = 0, end = types.size(); i < end; ++i) { tf2xla::TensorMetadata* metadata = transfer->add_metadata(); metadata->set_type(types[i]); shapes[i].AsProto(metadata->mutable_shape()); From 939db02ff575c90fecdba9022dda5fb13012e16f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tar=C3=A9=20Gaskin?= Date: Sun, 26 Jul 2020 22:14:33 +0000 Subject: [PATCH 1356/2522] xla directory resolutions --- tensorflow/compiler/xla/array.h | 5 +-- tensorflow/compiler/xla/client/client.cc | 4 +-- tensorflow/compiler/xla/client/lib/math.cc | 4 +-- tensorflow/compiler/xla/client/lib/pooling.cc | 7 ++-- tensorflow/compiler/xla/client/lib/slicing.cc | 5 +-- .../compiler/xla/client/local_client.cc | 5 +-- tensorflow/compiler/xla/client/xla_builder.cc | 26 +++++++++----- tensorflow/compiler/xla/index_util.cc | 6 ++-- tensorflow/compiler/xla/layout_util.cc | 3 +- tensorflow/compiler/xla/literal.cc | 34 +++++++++++-------- tensorflow/compiler/xla/literal_util.cc | 12 +++---- .../compiler/xla/metric_table_report.cc | 14 +++++--- 12 files changed, 74 insertions(+), 51 deletions(-) diff --git a/tensorflow/compiler/xla/array.h b/tensorflow/compiler/xla/array.h index 67bad0f8af7..4654ec2d53d 100644 --- a/tensorflow/compiler/xla/array.h +++ b/tensorflow/compiler/xla/array.h @@ -403,7 +403,8 @@ class Array { // Returns the size of the dimension at the given index. int64 dim(int64 n) const { - CHECK(n < sizes_.size()); + const int64 sizes_size = sizes_.size(); + CHECK(n < sizes_size); return sizes_[n]; } @@ -427,7 +428,7 @@ class Array { if (sizes_.size() != other.sizes_.size()) { return false; } - for (int64 i = 0; i < sizes_.size(); ++i) { + for (int64 i = 0, end = sizes_.size(); i < end; ++i) { if (sizes_[i] != other.sizes_[i]) { return false; } diff --git a/tensorflow/compiler/xla/client/client.cc b/tensorflow/compiler/xla/client/client.cc index 4f020bcec27..09449aeb8b8 100644 --- a/tensorflow/compiler/xla/client/client.cc +++ b/tensorflow/compiler/xla/client/client.cc @@ -312,7 +312,7 @@ StatusOr> Client::Execute( // device 0. // // TODO(b/118493728): Allow Execute to return one result per computation. - for (int64 i = 0; i < results.size(); i++) { + for (int64 i = 0, end = results.size(); i < end; i++) { TF_ASSIGN_OR_RETURN(const Shape& shape, GetShape(*results[i])); if (!ShapeUtil::IsEmptyTuple(shape)) { VLOG(3) << "Fetching result from device " << i << ": " @@ -350,7 +350,7 @@ StatusOr>> Client::ExecuteParallel( } std::vector> outputs; - for (size_t i = 0; i < response.responses_size(); ++i) { + for (size_t i = 0, end = response.responses_size(); i < end; ++i) { outputs.push_back( absl::make_unique(stub_, response.responses(i).output())); if (i < computations.size() && diff --git a/tensorflow/compiler/xla/client/lib/math.cc b/tensorflow/compiler/xla/client/lib/math.cc index baafd7d705b..6fdaab58686 100644 --- a/tensorflow/compiler/xla/client/lib/math.cc +++ b/tensorflow/compiler/xla/client/lib/math.cc @@ -511,7 +511,7 @@ XlaOp Lgamma(XlaOp input) { XlaOp z = Select(need_to_reflect, -input, input - one); XlaOp x = base_lanczos_coeff; - for (int i = 0; i < kLanczosCoefficients.size(); ++i) { + for (int i = 0, end = kLanczosCoefficients.size(); i < end; ++i) { XlaOp lanczos_coefficient = ScalarLike(input, kLanczosCoefficients[i]); XlaOp index = ScalarLike(input, i); x = x + lanczos_coefficient / (z + index + one); @@ -647,7 +647,7 @@ XlaOp Digamma(XlaOp input) { XlaOp num = zero; XlaOp denom = base_lanczos_coeff; - for (int i = 0; i < kLanczosCoefficients.size(); ++i) { + for (int i = 0, end = kLanczosCoefficients.size(); i < end; ++i) { XlaOp lanczos_coefficient = ScalarLike(input, kLanczosCoefficients[i]); XlaOp index = ScalarLike(input, i); num = num - lanczos_coefficient / ((z + index + one) * (z + index + one)); diff --git a/tensorflow/compiler/xla/client/lib/pooling.cc b/tensorflow/compiler/xla/client/lib/pooling.cc index 45033ec07e7..6a0db64b834 100644 --- a/tensorflow/compiler/xla/client/lib/pooling.cc +++ b/tensorflow/compiler/xla/client/lib/pooling.cc @@ -198,15 +198,16 @@ XlaOp AvgPoolGrad(XlaOp out_backprop, absl::Span gradients_size, XlaBuilder* b = out_backprop.builder(); return b->ReportErrorOrReturn([&]() -> StatusOr { const int num_dims = kernel_size.size(); - - if (gradients_size.size() != num_dims) { + const int gradients_size_size = gradients_size.size(); + if (gradients_size_size != num_dims) { return tensorflow::errors::InvalidArgument("gradients must be ", num_dims, "-dimensional"); } TF_ASSIGN_OR_RETURN(Shape out_backprop_xla_shape, b->GetShape(out_backprop)); - if (out_backprop_xla_shape.dimensions().size() != num_dims) { + const int obxsd_size = out_backprop_xla_shape.dimensions().size(); + if (obxsd_size != num_dims) { return tensorflow::errors::InvalidArgument("out_backprop must be ", num_dims, "-dimensional"); } diff --git a/tensorflow/compiler/xla/client/lib/slicing.cc b/tensorflow/compiler/xla/client/lib/slicing.cc index 1ea713467f8..ebb35c5df82 100644 --- a/tensorflow/compiler/xla/client/lib/slicing.cc +++ b/tensorflow/compiler/xla/client/lib/slicing.cc @@ -74,12 +74,13 @@ XlaOp UpdateSlice(XlaOp x, XlaOp update, absl::Span start) { return builder->ReportErrorOrReturn([&]() -> StatusOr { TF_ASSIGN_OR_RETURN(Shape shape, builder->GetShape(x)); const int64 n_dims = shape.rank(); - TF_RET_CHECK(start.size() == n_dims); + const int64 start_size = start.size(); + TF_RET_CHECK(start_size == n_dims); // TODO(phawkins): make int64 work on all backends, remove the int32 cast. std::vector start_as_int32(start.begin(), start.end()); std::vector start_ops(start.size()); - for (int i = 0; i < start.size(); ++i) { + for (int i = 0, end = start.size(); i < end; ++i) { start_ops[i] = ConstantR0(builder, start_as_int32[i]); } return DynamicUpdateSlice(x, update, start_ops); diff --git a/tensorflow/compiler/xla/client/local_client.cc b/tensorflow/compiler/xla/client/local_client.cc index fee92957096..1389f548c5d 100644 --- a/tensorflow/compiler/xla/client/local_client.cc +++ b/tensorflow/compiler/xla/client/local_client.cc @@ -122,12 +122,13 @@ LocalExecutable::RunHelper(const absl::Span argument_shapes, executable_->module_config().entry_computation_layout(); // Check argument number, shapes, and layouts. - if (argument_shapes.size() != computation_layout.parameter_count()) { + const int argument_shapes_size = argument_shapes.size(); + if (argument_shapes_size != computation_layout.parameter_count()) { return InvalidArgument( "invalid number of arguments for computation: expected %d, got %u", computation_layout.parameter_count(), argument_shapes.size()); } - for (int i = 0; i < argument_shapes.size(); ++i) { + for (int i = 0, end = argument_shapes.size(); i < end; ++i) { if (!computation_layout.parameter_layout(i).MatchesLayoutInShape( *argument_shapes[i])) { return InvalidParameterArgument( diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index cc6a680c4e9..c84d2b519dc 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -766,15 +766,17 @@ XlaOp XlaBuilder::BroadcastInDim( TF_ASSIGN_OR_RETURN(auto output_shape, ShapeUtil::MakeValidatedShape( operand_shape->element_type(), out_dim_size)); - if (operand_shape->rank() != broadcast_dimensions.size()) { + tensorflow::int64 broadcast_dimensions_size = broadcast_dimensions.size(); + if (operand_shape->rank() != broadcast_dimensions_size) { return InvalidArgument( "Size of broadcast_dimensions has to match operand's rank; operand " "rank: %lld, size of broadcast_dimensions %u.", operand_shape->rank(), broadcast_dimensions.size()); } - for (int i = 0; i < broadcast_dimensions.size(); i++) { + for (int i = 0, end = broadcast_dimensions.size(); i < end; i++) { + const tensorflow::int64 out_dim_size_size = out_dim_size.size(); if (broadcast_dimensions[i] < 0 || - broadcast_dimensions[i] > out_dim_size.size()) { + broadcast_dimensions[i] > out_dim_size_size) { return InvalidArgument("Broadcast dimension %lld is out of bound", broadcast_dimensions[i]); } @@ -786,7 +788,7 @@ XlaOp XlaBuilder::BroadcastInDim( *operand_shape, output_shape, broadcast_dimensions) .status()); std::vector in_dim_size(out_dim_size.begin(), out_dim_size.end()); - for (int i = 0; i < broadcast_dimensions.size(); i++) { + for (int i = 0, end = broadcast_dimensions.size(); i < end; i++) { in_dim_size[broadcast_dimensions[i]] = operand_shape->dimensions(i); } const auto& in_dim_shape = @@ -835,7 +837,7 @@ StatusOr XlaBuilder::SliceInternal(const Shape& shape, XlaOp operand, absl::Span strides) { HloInstructionProto instr; *instr.mutable_shape() = shape.ToProto(); - for (int i = 0; i < start_indices.size(); i++) { + for (int i = 0, end = start_indices.size(); i < end; i++) { auto* slice_config = instr.add_slice_dimensions(); slice_config->set_start(start_indices[i]); slice_config->set_limit(limit_indices[i]); @@ -1543,7 +1545,7 @@ XlaOp XlaBuilder::AfterAll(absl::Span tokens) { if (tokens.empty()) { return InvalidArgument("AfterAll requires at least one operand"); } - for (int i = 0; i < tokens.size(); ++i) { + for (int i = 0, end = tokens.size(); i < end; ++i) { XlaOp operand = tokens[i]; TF_ASSIGN_OR_RETURN(const Shape* operand_shape, GetShapePtr(operand)); if (!operand_shape->IsToken()) { @@ -2007,7 +2009,7 @@ XlaOp XlaBuilder::ConditionalImpl( std::vector branch_operand_shapes(branch_operands.size()); std::vector branch_computation_shapes( branch_computations.size()); - for (int j = 0; j < branch_operands.size(); ++j) { + for (int j = 0, end = branch_operands.size(); j < end; ++j) { TF_ASSIGN_OR_RETURN(branch_operand_shapes[j], GetShape(branch_operands[j])); TF_ASSIGN_OR_RETURN(branch_computation_shapes[j], @@ -2416,7 +2418,8 @@ XlaOp XlaBuilder::AllToAll(XlaOp operand, int64 split_dimension, if (layout) { TF_RET_CHECK(shape.IsTuple() && !ShapeUtil::IsNestedTuple(shape)); for (int64 i = 0; i < ShapeUtil::TupleElementCount(shape); ++i) { - if (layout->minor_to_major().size() != shape.tuple_shapes(i).rank()) { + const int64 layout_minor_to_major_size = layout->minor_to_major().size(); + if (layout_minor_to_major_size != shape.tuple_shapes(i).rank()) { return InvalidArgument( "Provided layout must be compatible with the operand shape: %s " "vs %s", @@ -3021,7 +3024,12 @@ StatusOr XlaBuilder::AddInstruction(HloInstructionProto&& instr, instr.add_operand_ids(operand.handle()); } - *instr.mutable_metadata() = metadata_; + if (one_shot_metadata_.has_value()) { + *instr.mutable_metadata() = one_shot_metadata_.value(); + one_shot_metadata_.reset(); + } else { + *instr.mutable_metadata() = metadata_; + } if (sharding_) { *instr.mutable_sharding() = *sharding_; } diff --git a/tensorflow/compiler/xla/index_util.cc b/tensorflow/compiler/xla/index_util.cc index 463a8d95fc5..4bec454e520 100644 --- a/tensorflow/compiler/xla/index_util.cc +++ b/tensorflow/compiler/xla/index_util.cc @@ -143,7 +143,8 @@ namespace xla { /* static */ bool IndexUtil::IndexInBounds(const Shape& shape, absl::Span index) { int64 rank = shape.rank(); - if (rank != index.size()) { + const int64 index_size = index.size(); + if (rank != index_size) { return false; } for (int64 d = 0; d < rank; ++d) { @@ -157,7 +158,8 @@ namespace xla { /* static */ int IndexUtil::CompareIndices(absl::Span lhs, absl::Span rhs) { int64 rank = lhs.size(); - CHECK_EQ(rhs.size(), rank); + const int64 rhs_rank = rhs.size(); + CHECK_EQ(rhs_rank, rank); for (int64 dim = 0; dim < rank; ++dim) { if (lhs[dim] < rhs[dim]) { return -1; diff --git a/tensorflow/compiler/xla/layout_util.cc b/tensorflow/compiler/xla/layout_util.cc index faa33e292c2..299a402bcf6 100644 --- a/tensorflow/compiler/xla/layout_util.cc +++ b/tensorflow/compiler/xla/layout_util.cc @@ -342,7 +342,8 @@ Layout CreateDefaultLayoutForRank(int64 rank) { /* static */ std::vector LayoutUtil::MakeLogicalToPhysical( const Layout& layout) { std::vector logical_to_physical(layout.minor_to_major_size()); - for (int64 physical = 0; physical < logical_to_physical.size(); ++physical) { + for (int64 physical = 0, end = logical_to_physical.size(); + physical < end; ++physical) { const int64 logical = Major(layout, physical); logical_to_physical[logical] = physical; } diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc index d2b300f0b2d..d03f3f8140f 100644 --- a/tensorflow/compiler/xla/literal.cc +++ b/tensorflow/compiler/xla/literal.cc @@ -58,7 +58,7 @@ constexpr int kMinimumAlignment = 64; // Precondition: size % 2 == 0 (elements in the array are 16 bits long) void ConvertEndianShort(string* bytes) { CHECK_EQ(bytes->size() / 2, 0); - for (int64 i = 0; i < bytes->size(); i += 2) { + for (int64 i = 0, end = bytes->size(); i < end; i += 2) { std::swap((*bytes)[i], (*bytes)[i + 1]); } } @@ -249,8 +249,10 @@ template Status MutableLiteralBase::CopySliceFromInternal( const LiteralBase& src_literal, absl::Span src_base, absl::Span dest_base, absl::Span copy_size) { - TF_RET_CHECK(src_literal.shape().rank() == src_base.size()); - TF_RET_CHECK(shape().rank() == dest_base.size()); + const int64 src_base_size = src_base.size(); + const int64 dest_base_size = dest_base.size(); + TF_RET_CHECK(src_literal.shape().rank() == src_base_size); + TF_RET_CHECK(shape().rank() == dest_base_size); auto linear_index = [](const Shape& shape, absl::Span multi_index) { @@ -564,7 +566,7 @@ Status MutableLiteralBase::CopyFrom(const LiteralSlice& src_literal, } // Construct the index of the corresponding piece in the source literal. ShapeIndex src_piece_index = src_shape_index; - for (int64 i = dest_shape_index.size(); i < index.size(); ++i) { + for (int64 i = dest_shape_index.size(), end = index.size(); i < end; ++i) { src_piece_index.push_back(index[i]); } TF_RETURN_IF_ERROR( @@ -755,7 +757,7 @@ StatusOr LiteralBase::Broadcast( return InvalidArgument("Broadcast only supports arrays."); } - for (int64 i = 0; i < dimensions.size(); i++) { + for (int64 i = 0, end = dimensions.size(); i < end; i++) { TF_RET_CHECK(shape().dimensions(i) == result_shape.dimensions(dimensions[i])); } @@ -779,7 +781,7 @@ StatusOr LiteralBase::Broadcast( ShapeUtil::ForEachIndex( result_shape, [&](absl::Span output_index) { - for (int64 i = 0; i < dimensions.size(); ++i) { + for (int64 i = 0, end = dimensions.size(); i < end; ++i) { scratch_source_index[i] = output_index[dimensions[i]]; } int64 dest_index = IndexUtil::MultidimensionalIndexToLinearIndex( @@ -1185,8 +1187,9 @@ void DenseArrayToStringHelper(const LiteralBase& literal, } // Handle the non-innermost tensors of a 2D+ tensor. if (brace == "{") { + const int64 accum_indices_size = accum_indices->size(); if (rank > 3 && !accum_indices->empty() && - accum_indices->size() < rank) { + accum_indices_size < rank) { int index = accum_indices->size() - 1; int value = accum_indices->back(); return StrCat(brace, " /*i", index, "=", value, "*/\n"); @@ -1520,7 +1523,7 @@ StatusOr LiteralBase::ConvertToShape(const Shape& dest_shape) const { } Literal literal(ShapeUtil::MakeTupleShape(element_shapes), /*allocate_arrays=*/false); - for (int i = 0; i < elements.size(); ++i) { + for (int i = 0, end = elements.size(); i < end; ++i) { TF_CHECK_OK( literal.MoveFrom(std::move(elements[i]), /*dest_shape_index=*/{i})); } @@ -1891,13 +1894,13 @@ bool LiteralBase::IsR1Iota() const { auto is_iota_at_idx = [&](const int64 idx) { switch (shape().element_type()) { case U8: - return Get({idx}) == idx; + return Get({idx}) == static_cast(idx); case U16: - return Get({idx}) == idx; + return Get({idx}) == static_cast(idx); case U32: - return Get({idx}) == idx; + return Get({idx}) == static_cast(idx); case U64: - return Get({idx}) == idx; + return Get({idx}) == static_cast(idx); case S8: return Get({idx}) == idx; case S16: @@ -2174,8 +2177,9 @@ Status LiteralBase::Piece::CopyFromProto(const LiteralProto& proto) { } case C128: { auto complex_data = data(); - TF_RET_CHECK(proto.c128s_size() == complex_data.size() * 2); - for (int64 i = 0; i < complex_data.size(); ++i) { + const int64 complex_data_size_doubled = complex_data.size() * 2; + TF_RET_CHECK(proto.c128s_size() == complex_data_size_doubled); + for (int64 i = 0, end = complex_data.size(); i < end; ++i) { complex_data[i] = complex128{proto.c128s(i * 2), proto.c128s(i * 2 + 1)}; } @@ -2394,7 +2398,7 @@ BorrowingLiteral::BorrowingLiteral(absl::Span src_buf_ptrs, root_piece_.set_subshape(shape_.get()); BuildPieceSubtree(*shape_, &root_piece_); - for (int i = 0; i < src_buf_ptrs.size(); ++i) { + for (int i = 0, end = src_buf_ptrs.size(); i < end; ++i) { const auto& src_shape = shape_->tuple_shapes(i); CHECK(src_shape.IsArray()); root_piece_.child(i).set_buffer(const_cast(src_buf_ptrs[i])); diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index 4304c207cad..0286aa20b3b 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -67,7 +67,7 @@ Literal ConvertType(LiteralSlice literal) { primitive_util::NativeToPrimitiveType()) { auto src = literal.data(shape_index); auto dest = result.data(shape_index); - for (int64 i = 0; i < src.size(); ++i) { + for (int64 i = 0, end = src.size(); i < end; ++i) { dest[i] = static_cast(src[i]); } } else { @@ -329,7 +329,7 @@ Literal ConvertType(LiteralSlice literal) { /* static */ Literal LiteralUtil::CreateR1U8(absl::string_view value) { Literal literal(ShapeUtil::MakeShape(U8, {static_cast(value.size())})); - for (int i = 0; i < value.size(); ++i) { + for (int i = 0, end = value.size(); i < end; ++i) { literal.Set({i}, value[i]); } return literal; @@ -345,7 +345,7 @@ Literal ConvertType(LiteralSlice literal) { absl::Span new_dimensions, absl::Span minor_to_major, const LiteralSlice& literal) { int64 new_num_elements = 1; - for (int64 i = 0; i < new_dimensions.size(); ++i) { + for (int64 i = 0, end = new_dimensions.size(); i < end; ++i) { new_num_elements *= new_dimensions[i]; } CHECK_EQ(ShapeUtil::ElementsIn(literal.shape()), new_num_elements); @@ -472,7 +472,7 @@ Literal ConvertType(LiteralSlice literal) { element_shapes.push_back(element->shape()); } Literal literal(ShapeUtil::MakeTupleShape(element_shapes)); - for (int i = 0; i < elements.size(); ++i) { + for (int i = 0, end = elements.size(); i < end; ++i) { TF_CHECK_OK(literal.CopyFrom(*elements[i], /*dest_shape_index=*/{i})); } return literal; @@ -485,7 +485,7 @@ Literal ConvertType(LiteralSlice literal) { element_shapes.push_back(element.shape()); } Literal literal(ShapeUtil::MakeTupleShape(element_shapes)); - for (int i = 0; i < elements.size(); ++i) { + for (int i = 0, end = elements.size(); i < end; ++i) { TF_CHECK_OK(literal.CopyFrom(elements[i], /*dest_shape_index=*/{i})); } return literal; @@ -499,7 +499,7 @@ Literal ConvertType(LiteralSlice literal) { element_shapes.push_back(element.shape()); } Literal literal(ShapeUtil::MakeTupleShape(element_shapes)); - for (int64 i = 0; i < elements.size(); ++i) { + for (int64 i = 0, end = elements.size(); i < end; ++i) { TF_CHECK_OK( literal.MoveFrom(std::move(elements[i]), /*dest_shape_index=*/{i})); } diff --git a/tensorflow/compiler/xla/metric_table_report.cc b/tensorflow/compiler/xla/metric_table_report.cc index bad65ac3201..be235482718 100644 --- a/tensorflow/compiler/xla/metric_table_report.cc +++ b/tensorflow/compiler/xla/metric_table_report.cc @@ -80,9 +80,11 @@ void MetricTableReport::WriteReportToInfoLog(double expected_metric_sum) { int64 pos = 0; const string report = MakeReport(expected_metric_sum); - while (pos < report.size()) { + const int report_size = report.size(); + while (pos < report_size) { int64 end_of_line = report.find('\n', pos); - if (end_of_line == string::npos) { + const int64 _npos = string::npos; + if (end_of_line == _npos) { end_of_line = report.size(); } absl::string_view line(report.data() + pos, end_of_line - pos); @@ -161,7 +163,8 @@ void MetricTableReport::AppendCategoryTable() { const char* const kIndentPrefix = " * "; int64 entries_to_show = std::min(max_entries_per_category_to_show_, category.entries.size()); - if (category.entries.size() == entries_to_show + 1) { + const int64 category_entries_size = category.entries.size(); + if (category_entries_size == entries_to_show + 1) { // May as well show the last entry on the line that would otherwise say // that there is a single entry not shown. ++entries_to_show; @@ -224,7 +227,8 @@ void MetricTableReport::AppendTableRow(const string& text, const double metric, // Don't try to make a gigantic string and crash if expected_metric_sum_ is // wrong somehow. int64 padding_len = 1; - if (max_metric_string_size >= metric_string.size()) { + const int64 metric_string_size = metric_string.size(); + if (max_metric_string_size >= metric_string_size) { padding_len += max_metric_string_size - metric_string.size(); } string padding(padding_len, ' '); @@ -254,7 +258,7 @@ string MetricTableReport::MetricString(double metric) { sp1.remove_prefix(1); } // Copy rest of input characters. - for (int64 i = 0; i < sp1.size(); ++i) { + for (int64 i = 0, end = sp1.size(); i < end; ++i) { if (i > 0 && (sp1.size() - i) % 3 == 0) { output.push_back(','); } From d469a3afe15c8ed3daea39953a5aa3c99f91fdfb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tar=C3=A9=20Gaskin?= Date: Sun, 26 Jul 2020 23:49:59 +0000 Subject: [PATCH 1357/2522] tensorflow/core resolutions set 1 --- .../core/common_runtime/bfc_allocator.cc | 2 +- .../core/common_runtime/bfc_allocator.h | 2 +- .../core/common_runtime/eager/execute.cc | 22 +++++++++---------- tensorflow/core/common_runtime/gradients.cc | 6 ++--- .../collective_param_resolver_distributed.cc | 5 +++-- .../collective_rma_distributed.cc | 3 ++- .../eager/eager_service_impl.h | 5 ++--- .../distributed_runtime/eager/remote_mgr.cc | 2 +- .../core/distributed_runtime/graph_mgr.cc | 2 +- .../distributed_runtime/master_session.cc | 4 ++-- .../rpc/eager/grpc_eager_client.cc | 2 +- .../rpc/grpc_remote_master.cc | 4 ++-- .../distributed_runtime/rpc/grpc_session.cc | 4 ++-- .../distributed_runtime/rpc/grpc_state.cc | 2 +- tensorflow/core/framework/common_shape_fns.cc | 20 +++++++++-------- tensorflow/core/framework/op_def_util.cc | 4 ++-- tensorflow/core/framework/shape_inference.cc | 1 + 17 files changed, 47 insertions(+), 43 deletions(-) diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc index 92e847407c8..6f75d944a12 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.cc +++ b/tensorflow/core/common_runtime/bfc_allocator.cc @@ -832,7 +832,7 @@ bool BFCAllocator::MergeTimestampedChunks(size_t required_bytes) { // to to_merge. If this is a standard merge (required_bytes == 0) then // merge them all, otherwise merge just until a Chunk of the required size // is produced. - for (int ci = 0; ci < to_merge.size(); ++ci) { + for (int ci = 0, end = to_merge.size(); ci < end; ++ci) { void* ptr = to_merge[ci]; // It's possible that the Chunk associated with this memory location got // merged and deallocated in a prior iteration so refetch the handle and diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h index 509fa9e8eed..cfe54c23abe 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.h +++ b/tensorflow/core/common_runtime/bfc_allocator.h @@ -133,7 +133,7 @@ class BFCAllocator : public Allocator { // A ChunkHandle is an index into the chunks_ vector in BFCAllocator // kInvalidChunkHandle means an invalid chunk typedef size_t ChunkHandle; - static constexpr int kInvalidChunkHandle = -1; + static constexpr ChunkHandle kInvalidChunkHandle = -1; typedef int BinNum; static constexpr int kInvalidBinNum = -1; diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc index fec31da703e..07667ec79a7 100644 --- a/tensorflow/core/common_runtime/eager/execute.cc +++ b/tensorflow/core/common_runtime/eager/execute.cc @@ -411,7 +411,7 @@ Status GetOrCreateKernelAndDevice( // When LazyCopyFunctionRemoteInputs is disabled, all inputs need to be on // local devices, since we execute a remote function through worker service, // which doesn't accept remote inputs. - for (int i = 0; i < op->Inputs().size(); i++) { + for (int i = 0, end = op->Inputs().size(); i < end; i++) { TensorHandle* input = op->Inputs()[i]; if (!ctx.LazyCopyFunctionRemoteInputs() && input->Type() == TensorHandle::REMOTE) { @@ -624,7 +624,7 @@ Status EagerLocalExecute(EagerOperation* op, TensorHandle** retvals, Status s; if (executor.Async()) { const DataTypeVector& output_dtypes = kernel->output_dtypes(); - for (int i = 0; i < num_outputs; ++i) { + for (int i = 0, end = num_outputs; i < end; ++i) { retvals[i] = TensorHandle::CreateEmptyLocalHandle( /* d= */ ctx.CanonicalDevice(kernel->OutputDevice(i)), /* op_device= */ kernel->device(), @@ -645,7 +645,7 @@ Status EagerLocalExecute(EagerOperation* op, TensorHandle** retvals, // performance. s = executor.AddOrExecute(std::move(node)); } else { - for (int i = 0; i < num_outputs; ++i) { + for (int i = 0, end = num_outputs; i < end; ++i) { retvals[i] = nullptr; } ExecuteNode node(&ctx, op->Inputs(), op->remote_func_params(), kernel, @@ -660,7 +660,7 @@ Status EagerLocalExecute(EagerOperation* op, TensorHandle** retvals, // Since the operation failed, we need to Unref any outputs if they were // allocated. if (!s.ok()) { - for (int i = 0; i < num_outputs; ++i) { + for (int i = 0, end = num_outputs; i < end; ++i) { if (retvals[i] != nullptr) { retvals[i]->Unref(); } @@ -770,7 +770,7 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals, profiler::TraceMeLevel::kInfo); const bool eagerly_copy_function_remote_inputs = !ctx.LazyCopyFunctionRemoteInputs() || !op->is_function(); - for (int i = 0; i < op->Inputs().size(); i++) { + for (int i = 0, end = op->Inputs().size(); i < end; i++) { tensorflow::TensorHandle* input = op->Inputs()[i]; tensorflow::Device* input_device = absl::get(input->device()); tensorflow::Device* input_device_or_cpu = @@ -837,7 +837,7 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals, DataTypeVector output_dtypes; TF_RETURN_IF_ERROR(GetOutputDTypes(op, &output_dtypes)); - const size_t num_outputs = static_cast(output_dtypes.size()); + const int64 num_outputs = output_dtypes.size(); if (num_outputs != *num_retvals) { return errors::InvalidArgument( "num_retvals does not match expected output dtypes"); @@ -845,7 +845,7 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals, *num_retvals = num_outputs; const tensorflow::uint64 id = remote_op->id(); - for (int i = 0; i < num_outputs; ++i) { + for (int i = 0, end = num_outputs; i < end; ++i) { // TODO(nareshmodi): Change the callback to instead add the decref to a // list of pending decrefs that we can send as a batch with the next // execute. @@ -898,7 +898,7 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals, // Since the operation failed, we need to Unref any outputs that were // allocated. if (!s.ok()) { - for (int i = 0; i < num_outputs; ++i) { + for (int i = 0, end = num_outputs; i < end; ++i) { retvals[i]->Unref(); } } @@ -910,7 +910,7 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals, Status GetKernelOutputs(std::vector* outputs, int num_outputs, TensorHandle** retvals, EagerContext* ctx, KernelAndDevice* kernel) { - for (int i = 0; i < num_outputs; ++i) { + for (int i = 0, end = num_outputs; i < end; ++i) { if (retvals[i] == nullptr) { retvals[i] = TensorHandle::CreateLocalHandle( std::move((*outputs)[i]), @@ -1305,7 +1305,7 @@ void EagerLocalExecuteAsync(EagerOperation* op, TensorHandle** retvals, graph_collector = ctx.GetGraphCollector(); } - for (int i = 0; i < num_outputs; ++i) { + for (int i = 0, end = num_outputs; i < end; ++i) { retvals[i] = nullptr; } @@ -1317,7 +1317,7 @@ void EagerLocalExecuteAsync(EagerOperation* op, TensorHandle** retvals, // Since the operation failed, we need to Unref any outputs if they were // allocated. if (!s.ok()) { - for (int i = 0; i < num_outputs; ++i) { + for (int i = 0, end = num_outputs; i < end; ++i) { if (retvals[i] != nullptr) { retvals[i]->Unref(); } diff --git a/tensorflow/core/common_runtime/gradients.cc b/tensorflow/core/common_runtime/gradients.cc index 5230f354df9..449dace2ddb 100644 --- a/tensorflow/core/common_runtime/gradients.cc +++ b/tensorflow/core/common_runtime/gradients.cc @@ -130,7 +130,7 @@ static Node* AddSymGrad(Graph* g, Node* n, gtl::ArraySlice grads) { // The gradient node's outputs have the same types as the node 'n's // inputs, except for resources. DataTypeVector out_types = n->input_types(); - for (int i = 0; i < out_types.size(); ++i) { + for (int i = 0, end = out_types.size(); i < end; ++i) { if (out_types[i] == DT_RESOURCE) { // TODO(apassos): figure out how to get the right dtype out_types[i] = DT_FLOAT; @@ -221,7 +221,7 @@ SymbolicGradientBuilder::SymbolicGradientBuilder( x_grad_node_outputs_->clear(); x_grad_node_outputs_->resize(x_node_outputs_.size()); stop_nodes_.reserve(x_node_outputs_.size()); - for (int i = 0; i < x_node_outputs_.size(); ++i) { + for (int i = 0, end = x_node_outputs_.size(); i < end; ++i) { stop_nodes_.insert(x_node_outputs_[i].node->id()); } } @@ -397,7 +397,7 @@ Status SymbolicGradientBuilder::Compute() { } } - for (int i = 0; i < x_node_outputs_.size(); ++i) { + for (int i = 0, end = x_node_outputs_.size(); i < end; ++i) { (*x_grad_node_outputs_)[i] = SumGradients(x_node_outputs_[i]); } diff --git a/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.cc b/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.cc index 91878593fc9..bfcd5b85ea4 100644 --- a/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.cc +++ b/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.cc @@ -341,7 +341,8 @@ void CollectiveParamResolverDistributed::UpdateInstanceCache( } if (ir->known_count < cp->group.group_size) { ir->known_count = cp->group.group_size; - if (ir->known.size() != cp->group.group_size) { + const int ir_known_size = ir->known.size(); + if (ir_known_size != cp->group.group_size) { ir->status = errors::Internal( "UpdateInstanceCache:: CompleteInstanceResponse for instance ", cp->instance.instance_key, " has known.size()=", ir->known.size(), @@ -349,7 +350,7 @@ void CollectiveParamResolverDistributed::UpdateInstanceCache( status = ir->status; break; } - for (int i = 0; i < ir->known.size(); ++i) { + for (int i = 0; i < ir_known_size; ++i) { ir->known[i] = true; } } diff --git a/tensorflow/core/distributed_runtime/collective_rma_distributed.cc b/tensorflow/core/distributed_runtime/collective_rma_distributed.cc index c645c74f903..d740a165797 100644 --- a/tensorflow/core/distributed_runtime/collective_rma_distributed.cc +++ b/tensorflow/core/distributed_runtime/collective_rma_distributed.cc @@ -109,7 +109,8 @@ void CollectiveRemoteAccessDistributed::RecvFromPeer( for (const auto& chunk : extra.tensor_content()) { num_bytes += chunk.size(); } - if (num_bytes != to_tensor->TotalBytes()) { + const int64 to_tensor_TotalBytes = to_tensor->TotalBytes(); + if (num_bytes != to_tensor_TotalBytes) { done(errors::Internal("RecvBufResponse returned ", num_bytes, " bytes where to_tensor expected ", to_tensor->TotalBytes())); diff --git a/tensorflow/core/distributed_runtime/eager/eager_service_impl.h b/tensorflow/core/distributed_runtime/eager/eager_service_impl.h index 09db3883a15..e8b4e1e5090 100644 --- a/tensorflow/core/distributed_runtime/eager/eager_service_impl.h +++ b/tensorflow/core/distributed_runtime/eager/eager_service_impl.h @@ -148,9 +148,8 @@ class EagerServiceImpl { bool IsStale() { mutex_lock l(last_accessed_mu_); - return (destroy_after_micros_ > 0 && - (env_->env->NowMicros() - last_accessed_micros_) > - destroy_after_micros_); + const int64 time_passed = env_->env->NowMicros() - last_accessed_micros_; + return (destroy_after_micros_ > 0 && time_passed > destroy_after_micros_); } private: diff --git a/tensorflow/core/distributed_runtime/eager/remote_mgr.cc b/tensorflow/core/distributed_runtime/eager/remote_mgr.cc index 9003f2b3f17..e755cd247a6 100644 --- a/tensorflow/core/distributed_runtime/eager/remote_mgr.cc +++ b/tensorflow/core/distributed_runtime/eager/remote_mgr.cc @@ -28,7 +28,7 @@ void RemoteMgr::AddOperationOutputs( const gtl::ArraySlice handles, int64 operation_id) { mutex_lock l(remote_tensor_handle_mu_); - for (int i = 0; i < handles.size(); i++) { + for (int i = 0, end = handles.size(); i < end; i++) { // TODO(nareshmodi): Correctly handle operation_id not being unique. remote_tensor_handle_map_.emplace( RemoteTensorHandleInternal(operation_id, i), handles[i]); diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc index dd9633676f6..cce4a3f7960 100644 --- a/tensorflow/core/distributed_runtime/graph_mgr.cc +++ b/tensorflow/core/distributed_runtime/graph_mgr.cc @@ -403,7 +403,7 @@ void GraphMgr::RecvOutputsAsync(const int64 step_id, NamedTensors* out, [done, rendezvous, received_keys, out, keys](const Status s) { rendezvous->Unref(); size_t output_size = 0; - for (int i = 0; i < keys.size(); ++i) { + for (int i = 0, end = keys.size(); i < end; ++i) { (*out)[keys[i]] = (*received_keys)[i]; output_size += (*out)[keys[i]].AllocatedBytes(); } diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc index 9d0a03805d0..fb3a6659848 100644 --- a/tensorflow/core/distributed_runtime/master_session.cc +++ b/tensorflow/core/distributed_runtime/master_session.cc @@ -836,7 +836,7 @@ Status MasterSession::ReffedClientGraph::RunPartitions( << execution_count; // Maps the names of fed tensors to their index in `req`. std::unordered_map feeds(3); - for (size_t i = 0; i < callable_opts_.feed_size(); ++i) { + for (size_t i = 0, end = callable_opts_.feed_size(); i < end; ++i) { if (!feeds.insert({callable_opts_.feed(i), i}).second) { // MakeCallable will fail if there are two feeds with the same name. return errors::Internal("Duplicated feeds in callable: ", @@ -1564,7 +1564,7 @@ uint64 MasterSession::NewStepId(int64 graph_key) { } else { uint64 step_id = env_->collective_executor_mgr->NextStepId(graph_key); int32 retry_count = 0; - while (step_id == CollectiveExecutor::kInvalidId) { + while (static_cast(step_id) == CollectiveExecutor::kInvalidId) { Notification note; Status status; env_->collective_executor_mgr->RefreshStepIdSequenceAsync( diff --git a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc index 425b25e2386..0faf8c1437a 100644 --- a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc +++ b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc @@ -231,7 +231,7 @@ class GrpcEagerClientCache : public EagerClientCache { explicit GrpcEagerClientCache( std::shared_ptr cache) : next_round_robin_assignment_(0), cache_(cache), threads_(4) { - for (int i = 0; i < threads_.size(); i++) { + for (int i = 0, end = threads_.size(); i < end; i++) { threads_[i].reset(new GrpcEagerClientThread()); } } diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc b/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc index 8c8eef0e1a4..1781c643e1d 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc @@ -176,8 +176,8 @@ class GrpcRemoteMaster : public MasterInterface { ? deadline_with_backoff_micros : expired_time_micros; Env::Default()->SleepForMicroseconds(backoff_until - now_micros); - if (Env::Default()->NowMicros() > expired_time_micros && - timeout_in_ms > 0) { + const int64 default_now_micros = Env::Default()->NowMicros(); + if (default_now_micros > expired_time_micros && timeout_in_ms > 0) { // If timeout_in_ms is set, exit the retry loop on timeout. return errors::DeadlineExceeded(ctx.debug_error_string()); } diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_session.cc b/tensorflow/core/distributed_runtime/rpc/grpc_session.cc index 4dcddc2d133..f777ec468dc 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_session.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_session.cc @@ -229,7 +229,7 @@ Status GrpcSession::RunHelper( // Build an index from fetch tensor name to first index in // output_tensor_names. std::unordered_map output_name_to_offset; - for (int i = 0; i < output_tensor_names.size(); ++i) { + for (int i = 0, end = output_tensor_names.size(); i < end; ++i) { const string& name = output_tensor_names[i]; if (output_name_to_offset.insert(std::make_pair(name, i)).second) { req->add_fetch(name); @@ -267,7 +267,7 @@ Status GrpcSession::RunHelper( // In the unlikely event that output_tensor_names contains duplicates, fill in // the duplicate values. if (output_name_to_offset.size() != output_tensor_names.size()) { - for (int i = 0; i < output_tensor_names.size(); ++i) { + for (int i = 0, end = output_tensor_names.size(); i < end; ++i) { const string& name = output_tensor_names[i]; int offset = output_name_to_offset[name]; if (offset != i) { diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_state.cc b/tensorflow/core/distributed_runtime/rpc/grpc_state.cc index fc3695e5461..9edca51e7a3 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_state.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_state.cc @@ -203,7 +203,7 @@ void ExchangeQueue::CheckInvariants() { return; } - for (int i = 1; i < exchanges_.size(); ++i) { + for (int i = 1, end = exchanges_.size(); i < end; ++i) { const Exchange& e0 = exchanges_[i - 1]; const Exchange& e1 = exchanges_[i]; // The first exchange in the pair is the one that arrived later and is diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc index b9efddf4cdb..6ce5d7ed9cf 100644 --- a/tensorflow/core/framework/common_shape_fns.cc +++ b/tensorflow/core/framework/common_shape_fns.cc @@ -167,7 +167,8 @@ Status EinsumShape(shape_inference::InferenceContext* c) { return errors::InvalidArgument("Expected either 1 or 2 inputs but got: ", c->num_inputs()); } - if (c->num_inputs() != input_labels.size()) { + const int input_labels_size = input_labels.size(); + if (c->num_inputs() != input_labels_size) { return errors::InvalidArgument("Expected ", input_labels.size(), " inputs for equation ", equation, " but got: ", c->num_inputs()); @@ -177,7 +178,7 @@ Status EinsumShape(shape_inference::InferenceContext* c) { // the broadcast shapes that map to ellipsis. absl::flat_hash_map label_to_dimension; gtl::InlinedVector input_bcast_shapes(c->num_inputs()); - for (int i = 0; i < c->num_inputs(); ++i) { + for (int i = 0, end = c->num_inputs(); i < end; ++i) { bool has_ellipsis = false; TF_RETURN_IF_ERROR(ValidateEinsumEllipsis(input_labels[i], &has_ellipsis)); ShapeHandle input_shape = c->input(i); @@ -202,7 +203,7 @@ Status EinsumShape(shape_inference::InferenceContext* c) { input_bcast_shapes[i] = c->Scalar(); // Run through the input labels; populate label_to_dimension mapping and // compute the broadcast shapes corresponding to the ellipsis (if present). - for (int label_idx = 0; label_idx < input_labels[i].size(); ++label_idx) { + for (int label_idx = 0, end = input_labels[i].size(); label_idx < end; ++label_idx) { const char label = input_labels[i][label_idx]; // Calculate the input axis that the current label is referring to. After // the ellipsis, the axis may be found by using negative indices; i.e the @@ -281,7 +282,7 @@ Status EinsumShape(shape_inference::InferenceContext* c) { // Create the output shape from output labels and label_to_dimension mapping. std::vector output_dims; - for (int label_idx = 0; label_idx < output_labels.size(); ++label_idx) { + for (int label_idx = 0, end = output_labels.size(); label_idx < end; ++label_idx) { const char label = output_labels[label_idx]; // Append the output_bcast_shape when the ellipsis is encountered. if (label == '.') { @@ -473,7 +474,8 @@ Status DatasetIteratorShape(shape_inference::InferenceContext* c) { TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused)); std::vector output_shapes; TF_RETURN_IF_ERROR(c->GetAttr("output_shapes", &output_shapes)); - if (output_shapes.size() != c->num_outputs()) { + const int output_shapes_size = output_shapes.size(); + if (output_shapes_size != c->num_outputs()) { return errors::InvalidArgument( "`output_shapes` must be the same length as `output_types` (", output_shapes.size(), " vs. ", c->num_outputs()); @@ -503,7 +505,7 @@ Status MakeShapeFromFormat(TensorFormat format, DimensionOrConstant N, dims_actual[GetTensorInnerWidthDimIndex(num_dims, format)] = context->MakeDim(4); } - for (int spatial_dim = 0; spatial_dim < spatial.size(); spatial_dim++) { + for (int spatial_dim = 0, end = spatial.size(); spatial_dim < end; spatial_dim++) { dims_actual[GetTensorSpatialDimIndex(num_dims, format, spatial_dim)] = context->MakeDim(spatial[spatial_dim]); } @@ -520,7 +522,7 @@ Status DimensionsFromShape(ShapeHandle shape, TensorFormat format, // Batch. *batch_dim = context->Dim(shape, GetTensorBatchDimIndex(rank, format)); // Spatial. - for (int spatial_dim_index = 0; spatial_dim_index < spatial_dims.size(); + for (int spatial_dim_index = 0, end = spatial_dims.size(); spatial_dim_index < end; ++spatial_dim_index) { spatial_dims[spatial_dim_index] = context->Dim( shape, GetTensorSpatialDimIndex(rank, format, spatial_dim_index)); @@ -546,7 +548,7 @@ Status ShapeFromDimensions(DimensionHandle batch_dim, // Batch. out_dims[tensorflow::GetTensorBatchDimIndex(rank, format)] = batch_dim; // Spatial. - for (int spatial_dim_index = 0; spatial_dim_index < spatial_dims.size(); + for (int spatial_dim_index = 0, end = spatial_dims.size(); spatial_dim_index < end; ++spatial_dim_index) { out_dims[tensorflow::GetTensorSpatialDimIndex( rank, format, spatial_dim_index)] = spatial_dims[spatial_dim_index]; @@ -2338,7 +2340,7 @@ Status ExplicitShapes(InferenceContext* c) { if (shapes.empty()) { return errors::Internal("shapes attribute is empty"); } - for (int i = 0; i < shapes.size(); ++i) { + for (int i = 0, end = shapes.size(); i < end; ++i) { ShapeHandle output_shape; TF_RETURN_IF_ERROR( c->MakeShapeFromPartialTensorShape(shapes[i], &output_shape)); diff --git a/tensorflow/core/framework/op_def_util.cc b/tensorflow/core/framework/op_def_util.cc index f40d867bb6f..486f92b3b20 100644 --- a/tensorflow/core/framework/op_def_util.cc +++ b/tensorflow/core/framework/op_def_util.cc @@ -661,7 +661,7 @@ Status OpDefCompatible(const OpDef& old_op, const OpDef& new_op) { "' vs. '", new_in_sig, "'"); VALIDATE(old_in_ref.size() == new_in_ref.size(), // Should not happen "Unexpected change in input ref lists."); - for (int i = 0, iter_limit = old_in_ref.size(); i < iter_limit; ++i) { + for (int i = 0, end = old_in_ref.size(); i < end; ++i) { // Allowed to remove "ref" from an input (or leave it unchanged). VALIDATE(old_in_ref[i] || !new_in_ref[i], "Input ", i, " changed from non-ref to ref"); @@ -677,7 +677,7 @@ Status OpDefCompatible(const OpDef& old_op, const OpDef& new_op) { old_out_sig, "' vs. '", new_out_sig, "'"); VALIDATE(old_out_ref.size() == new_out_ref.size(), // Should not happen "Unexpected change in output ref lists"); - for (int i = 0, iter_limit = old_out_ref.size(); i < iter_limit; ++i) { + for (int i = 0, end = old_out_ref.size(); i < end; ++i) { // Allowed to add "ref" to an output (or leave it unchanged). VALIDATE(!old_out_ref[i] || new_out_ref[i], "Output ", i, " changed from ref to non-ref"); diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc index 456c1826572..72fff010d08 100644 --- a/tensorflow/core/framework/shape_inference.cc +++ b/tensorflow/core/framework/shape_inference.cc @@ -211,6 +211,7 @@ void InferenceContext::PostInputInit( } input_handle_shapes_and_types_ = std::move(input_handle_data); } + const int inputs_size = inputs_.size(); if (inputs_size != num_inputs_from_node_def) { construction_status_ = errors::InvalidArgument( From 8cf05857150a7af76a25d8f0b27128df1754ccd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tar=C3=A9=20Gaskin?= Date: Sun, 26 Jul 2020 23:56:14 +0000 Subject: [PATCH 1358/2522] tensorflow/core resolutions, set 2 of 3 --- .../core/grappler/costs/graph_properties.cc | 8 +++----- .../core/grappler/costs/virtual_scheduler.cc | 8 ++++---- .../grappler/graph_analyzer/graph_analyzer.cc | 13 ++++++++----- .../core/grappler/graph_analyzer/sig_node.cc | 8 +++++--- .../core/grappler/graph_analyzer/subgraph.cc | 6 ++++-- .../grappler/optimizers/arithmetic_optimizer.cc | 13 ++++++------- .../core/grappler/optimizers/constant_folding.cc | 6 +++--- .../optimizers/data/map_vectorization.cc | 4 ++-- .../parse_single_example_vectorizer.cc | 2 +- .../optimizers/data/vectorization_utils.cc | 10 +++++----- .../generic_layout_optimizer_transposer.cc | 5 ++--- .../optimizers/scoped_allocator_optimizer.cc | 7 +++---- tensorflow/core/grappler/utils/functions.cc | 10 ++++++---- .../core/kernels/boosted_trees/resources.cc | 8 +++++--- .../core/kernels/data/captured_function.cc | 5 ++--- .../kernels/data/experimental/snapshot_util.cc | 16 +++++++++------- tensorflow/core/kernels/range_sampler.cc | 2 +- .../kernels/remote_fused_graph_execute_utils.cc | 5 ++--- 18 files changed, 71 insertions(+), 65 deletions(-) diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index c3df2c1f15b..23b4f0c8e5a 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -785,8 +785,7 @@ class SymbolicShapeRefiner { MutableGraphView gv(&grappler_function_item.graph); // Forward shapes from function input nodes to argument nodes. - for (int i = 0, iter_limit = grappler_function_item.inputs().size(); - i < iter_limit; ++i) { + for (int i = 0, end = grappler_function_item.inputs().size(); i < end; ++i) { auto& fun_input = grappler_function_item.input(i); NodeDef* fun_node = gv.GetNode(fun_input.node_name); const TensorId input_tensor = ParseTensorName(function_node->input(i)); @@ -1284,8 +1283,7 @@ class SymbolicShapeRefiner { } for (int i = grappler_function_item.inputs().size(), - iter_limit = function_node->input_size(); - i < iter_limit; ++i) { + end = function_node->input_size(); i < end; ++i) { const string& input = function_node->input(i); if (!IsControlInput(input)) { return errors::FailedPrecondition( @@ -2310,7 +2308,7 @@ Status GraphProperties::UpdateEnqueue( // TODO(bsteiner): handle EnqueueMany as well. std::vector shapes_and_types; - for (int i = 1, iter_limit = ctx->input_types.size(); i < iter_limit; ++i) { + for (int i = 1, end = ctx->input_types.size(); i < end; ++i) { GraphView::InputPort inp(enqueue_node, i); GraphView::OutputPort fanin = shape_refiner->graph().GetRegularFanin(inp); InferenceContext* in = shape_refiner->GetContext(fanin.node); diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc index 67af304b081..2a33806719d 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler.cc +++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc @@ -523,8 +523,8 @@ Status SchedulerState::Init(const GrapplerItem* item, if (IsPersistent(*curr_node)) { auto& device_state = device_[curr_node_device]; for (int port_num = 0, - port_num_iter_limit = curr_node_state.output_properties.size(); - port_num < port_num_iter_limit; ++port_num) { + port_num_end = curr_node_state.output_properties.size(); + port_num < port_num_end; ++port_num) { device_state.persistent_nodes.insert( std::make_pair(curr_node, port_num)); } @@ -1121,8 +1121,8 @@ void SchedulerState::GenerateRunMetadata(RunMetadata* metadata) { const NodeState& nodestate = node_map_.at(node_def); NodeExecStats* node_stats = device_stepstats->add_node_stats(); uint64 total_output_size = 0; - for (int slot = 0, slot_iter_limit = nodestate.output_properties.size(); - slot < slot_iter_limit; slot++) { + for (int slot = 0, slot_end = nodestate.output_properties.size(); + slot < slot_end; slot++) { const auto& properties = nodestate.output_properties[slot]; NodeOutput* no = node_stats->add_output(); no->set_slot(slot); diff --git a/tensorflow/core/grappler/graph_analyzer/graph_analyzer.cc b/tensorflow/core/grappler/graph_analyzer/graph_analyzer.cc index f3796fcf861..e0de3257b0d 100644 --- a/tensorflow/core/grappler/graph_analyzer/graph_analyzer.cc +++ b/tensorflow/core/grappler/graph_analyzer/graph_analyzer.cc @@ -92,7 +92,8 @@ void GraphAnalyzer::FindSubgraphs() { } void GraphAnalyzer::ExtendSubgraph(Subgraph* parent) { - bool will_complete = (parent->id().size() + 1 == subgraph_size_); + const int parent_id_size_plus_one = parent->id().size() + 1; + bool will_complete = (parent_id_size_plus_one == subgraph_size_); SubgraphPtrSet& sg_set = will_complete ? result_ : partial_; const GenNode* last_all_or_none_node = nullptr; @@ -151,7 +152,8 @@ void GraphAnalyzer::ExtendSubgraphAllOrNone(Subgraph* parent, // point in growing it more, can just skip over the rest of the links. for (const auto& link : nbit->second) { id.insert(link.node); - if (id.size() > subgraph_size_) { + const int id_size = id.size(); + if (id_size > subgraph_size_) { return; // Too big. } } @@ -177,7 +179,8 @@ void GraphAnalyzer::ExtendSubgraphPortAllOrNone(Subgraph* parent, // point in growing it more, can just skip over the rest of the links. for (const auto& link : nbit->second) { id.insert(link.node); - if (id.size() > subgraph_size_) { + const int id_size = id.size(); + if (id_size > subgraph_size_) { return; // Too big. } } @@ -198,8 +201,8 @@ void GraphAnalyzer::AddExtendedSubgraph(Subgraph* parent, // This subgraph was already found by extending from a different path. return; } - - if (id.size() != subgraph_size_) { + const int id_size = id.size(); + if (id_size != subgraph_size_) { todo_.push_back(sg.get()); } spec_sg_set.insert(std::move(sg)); diff --git a/tensorflow/core/grappler/graph_analyzer/sig_node.cc b/tensorflow/core/grappler/graph_analyzer/sig_node.cc index c71ad3100f4..86d93966a87 100644 --- a/tensorflow/core/grappler/graph_analyzer/sig_node.cc +++ b/tensorflow/core/grappler/graph_analyzer/sig_node.cc @@ -113,7 +113,8 @@ void SigNode::ComputeTopoHash(int distance) { return; } - CHECK(topo_hash_.size() == distance); + const int64 topo_hash_size = topo_hash_.size(); + CHECK(topo_hash_size == distance); int prev = distance - 1; @@ -154,7 +155,8 @@ void SigNode::ComputeTopoHash(int distance) { size_t SigNode::GetTopoHash(int distance) const { CHECK(!topo_hash_.empty()); - if (distance >= topo_hash_.size()) { + const int64 topo_hash_size = topo_hash_.size(); + if (distance >= topo_hash_size) { CHECK(hash_is_final_); return topo_hash_.back(); } else { @@ -393,7 +395,7 @@ void Signature::OrderLinks() { int first_idx = -1; int idx; - for (idx = 0; idx < node->hashed_peers_.size(); ++idx) { + for (idx = 0; idx < static_cast(node->hashed_peers_.size()); ++idx) { auto& entry = node->hashed_peers_[idx]; if (entry.link_hash == cur_link_hash) { continue; diff --git a/tensorflow/core/grappler/graph_analyzer/subgraph.cc b/tensorflow/core/grappler/graph_analyzer/subgraph.cc index 28a91e0f843..e196181467e 100644 --- a/tensorflow/core/grappler/graph_analyzer/subgraph.cc +++ b/tensorflow/core/grappler/graph_analyzer/subgraph.cc @@ -147,7 +147,8 @@ bool SubgraphIterator::NextIfSamePort() { if (AtEnd()) { return false; } - if (link_idx_ + 1 < link_map_it_->second.size()) { + const int64 link_map_it_second_size = link_map_it_->second.size(); + if (link_idx_ + 1 < link_map_it_second_size) { ++link_idx_; return true; } else { @@ -174,7 +175,8 @@ void SubgraphIterator::SkipNode() { bool SubgraphIterator::PropagateNext() { // Loops are used to skip over the empty entries. - while (link_idx_ >= link_map_it_->second.size()) { + const int64 link_map_it_second_size = link_map_it_->second.size(); + while (link_idx_ >= link_map_it_second_size) { ++link_map_it_; while (link_map_it_ == (*id_it_)->links().end()) { if (++id_it_ == id_->end()) { diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 0679b92d914..8fa0ef17f11 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -598,7 +598,7 @@ class AddOpsRewriteStage : public ArithmeticNodesGroupOptimizerStage { std::deque add_ops; // Prepare leaf AddN nodes for inputs of equal shape - for (int i = 0, iter_limit = shapes.size(); i < iter_limit; ++i) { + for (int i = 0, end = shapes.size(); i < end; ++i) { const auto node_name = leaf_node_name(i); const auto& inputs = shape_sig_to_inputs[ShapeSignature(shapes[i])]; add_ops.push_back(AddInputsOfSymbolicallyEqualShape(*group.root_node, @@ -750,8 +750,7 @@ class HoistCommonFactorOutOfAggregation : public ArithmeticOptimizerStage { ctx().node_map->AddOutput(new_add_node->name(), new_outer_node->name()); // Hoist non-shared factors up into the new AddN node. - for (int i = 0, iter_limit = unique_factors.size(); i < iter_limit; - ++i) { + for (int i = 0, end = unique_factors.size(); i < end; ++i) { const string& unique_factor_i = unique_factors[i]; new_add_node->set_input(i, unique_factor_i); ctx().node_map->AddOutput(unique_factor_i, new_add_node->name()); @@ -1203,7 +1202,7 @@ class RemoveIdentityTranspose : public ArithmeticOptimizerStage { if (a.size() != b.size()) { return false; } - for (int i = 0, iter_limit = a.size(); i < iter_limit; ++i) { + for (int i = 0, end = a.size(); i < end; ++i) { if (a[b[i]] != i) { return false; } @@ -1212,7 +1211,7 @@ class RemoveIdentityTranspose : public ArithmeticOptimizerStage { } bool IsIdentityPermutation(const std::vector& perm) { - for (int64 i = 0, iter_limit = perm.size(); i < iter_limit; ++i) { + for (int64 i = 0, end = perm.size(); i < end; ++i) { if (i != perm[i]) { return false; } @@ -3375,7 +3374,7 @@ class RemoveStackSliceSameAxis : public ArithmeticOptimizerStage { int begin_index = -1; int64 begin_value = 0; - for (int i = 0, iter_limit = slice_begin_vec.size(); i < iter_limit; ++i) { + for (int i = 0, end = slice_begin_vec.size(); i < end; ++i) { const int64 v = slice_begin_vec[i]; if (v != 0) { if (begin_index != -1) { @@ -3389,7 +3388,7 @@ class RemoveStackSliceSameAxis : public ArithmeticOptimizerStage { int end_index = -1; int64 end_value = 0; - for (int i = 0, iter_limit = slice_begin_vec.size(); i < iter_limit; ++i) { + for (int i = 0, end = slice_begin_vec.size(); i < end; ++i) { const int64 v = slice_end_vec[i]; if (v != pack_output_shape.dim_size(i)) { if (end_index != -1) { diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index ce4e101e419..2d0ad35044e 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -642,12 +642,12 @@ Status ConstantFolding::MaterializeBroadcastGradientArgs( // These extra dims could be equal to 1, in which case there is no // broadcasting. It could also be greater than 1, in which case there would // be broadcasting. Since we don't know, we'll just punt. - for (int i = common_dims, iter_limit = shape1.size(); i < iter_limit; ++i) { + for (int i = common_dims, end = shape1.size(); i < end; ++i) { if (shape1[i] < 0) { return Status::OK(); } } - for (int i = common_dims, iter_limit = shape2.size(); i < iter_limit; ++i) { + for (int i = common_dims, end = shape2.size(); i < end; ++i) { if (shape2[i] < 0) { return Status::OK(); } @@ -1463,7 +1463,7 @@ Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output_graph, VLOG(2) << "Folded node: " << SummarizeNodeDef(*node); NodeDef* constant_output = nullptr; - for (int i = 0, iter_limit = const_nodes.size(); i < iter_limit; i++) { + for (int i = 0, end = const_nodes.size(); i < end; i++) { NodeDef* const_node = &const_nodes[i]; VLOG(3) << "Generated constant node: " << SummarizeNodeDef(*const_node); if (const_node->name().empty()) { diff --git a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc index 50eac2e23df..ce8830309e9 100644 --- a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc +++ b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc @@ -103,7 +103,7 @@ FunctionDef* CreateMapDefunWrapper(const NodeDef& map_node, // Set return values to match output names string output_prefix = strings::StrCat(map_defun_node->name(), ":output:"); - for (size_t i = 0; i < vectorized_func->signature().output_arg_size(); ++i) { + for (size_t i = 0; i < static_cast(vectorized_func->signature().output_arg_size()); ++i) { const auto& output_arg = vectorized_func->signature().output_arg(i); (*vectorized_func->mutable_ret())[output_arg.name()] = strings::StrCat(output_prefix, i); @@ -238,7 +238,7 @@ Status AddNewBatchNode(const NodeDef& old_batch_node, const NodeDef& input_node, } } - for (size_t i = 0; i < input_shapes.size(); ++i) { + for (size_t i = 0, end = input_shapes.size(); i < end; ++i) { // Note: We already checked earlier that input shapes are all fully defined. TensorShapeProto* shape = output_shapes_attr.mutable_list()->add_shape(); TensorShapeProto_Dim* dim = shape->add_dim(); diff --git a/tensorflow/core/grappler/optimizers/data/vectorization/parse_single_example_vectorizer.cc b/tensorflow/core/grappler/optimizers/data/vectorization/parse_single_example_vectorizer.cc index f81b2d01d99..bf3a80428b6 100644 --- a/tensorflow/core/grappler/optimizers/data/vectorization/parse_single_example_vectorizer.cc +++ b/tensorflow/core/grappler/optimizers/data/vectorization/parse_single_example_vectorizer.cc @@ -87,7 +87,7 @@ class ParseSingleExampleVectorizer : public Vectorizer { TF_RETURN_IF_ERROR(node_builder.Finalize(outer_scope, &new_node)); // Add output mappings - for (size_t i = 0; i < node.num_outputs(); ++i) { + for (int i = 0; i < node.num_outputs(); ++i) { outputs->emplace_back(new_node, i, true); } return Status::OK(); diff --git a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc index 185cf5642e9..240d6edd7a9 100644 --- a/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc +++ b/tensorflow/core/grappler/optimizers/data/vectorization_utils.cc @@ -251,7 +251,7 @@ Status Vectorization::AddConversionMapping(Node* op_node) { // The inputs for the node to be converted may already have been converted // themselves. For those that are not, we promote them to MapDefun outputs. - for (size_t i = 0; i < op_node->num_inputs(); ++i) { + for (int i = 0; i < op_node->num_inputs(); ++i) { auto edge = input_edges[i]; if (auto found = gtl::FindOrNull(conversion_map_, {edge->src(), edge->src_output()})) { @@ -279,15 +279,15 @@ Status Vectorization::AddConversionMapping(Node* op_node) { << "\" failed with error: " << s; return s; } - - if (op_node->num_outputs() != outputs.size()) { + const int64 op_node_num_outputs = op_node->num_outputs(); + if (op_node_num_outputs != outputs.size()) { return errors::Internal( "Number of vectorizer outputs does not match. Expected: ", op_node->num_outputs(), " Actual: ", outputs.size()); } // Add output mappings. - for (size_t i = 0; i < op_node->num_outputs(); ++i) { + for (int i = 0; i < op_node->num_outputs(); ++i) { conversion_map_.insert({{op_node, i}, outputs[i]}); } @@ -521,7 +521,7 @@ Status Vectorization::AddArgTensorMappings() { // Captured inputs. These are applied (without slicing) to every iteration of // the map function, hence are mapped to unstacked nodes. - for (int i = num_args; i < map_defun_fn_->arg_nodes.size(); ++i) { + for (int i = num_args, end = map_defun_fn_->arg_nodes.size(); i < end; ++i) { TF_RETURN_IF_ERROR(add_conversion(map_defun_fn_->arg_nodes[i], false)); } diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc index bedd16a18ce..07fc30b0a81 100644 --- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc +++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc @@ -242,7 +242,7 @@ Status Transposer::CreateConstPermNode(TransposeContext* context, AttrValue attr_tensor; Tensor tensor(DT_INT32, TensorShape({4})); - for (int i = 0, iter_limit = permutation.size(); i < iter_limit; i++) { + for (int i = 0, end = permutation.size(); i < end; i++) { tensor.flat()(i) = permutation[i]; } tensor.AsProtoTensorContent(attr_tensor.mutable_tensor()); @@ -1567,8 +1567,7 @@ Status StridedSliceTransposer::PermuteMask(TransposeContext* context, return errors::InvalidArgument("invalid mask value: ", mask_i); } int result = 0; - for (int i = 0, iter_limit = context->src_to_dst.size(); i < iter_limit; - i++) { + for (int i = 0, end = context->src_to_dst.size(); i < end; i++) { const int final_pos = context->src_to_dst[i]; const int position_mask = 1 << final_pos; const int bit_i = (mask_i & position_mask) >> final_pos; diff --git a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc index 3b3a747fd18..d5d3bba3eda 100644 --- a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc @@ -521,7 +521,7 @@ class UnaryElementwiseRewriter : public ScopedAllocatorOptimizer::Rewriter { // Add control edges from the ScopedAllocatorOp to all of the // input nodes and mark them for allocation from backing tensor. - for (int i = 0, iter_limit = inputs.size(); i < iter_limit; ++i) { + for (int i = 0, end = inputs.size(); i < end; ++i) { auto& nd = inputs[i]; if (IsArg(*nd.from_node_def)) { return errors::Internal( @@ -548,8 +548,7 @@ class UnaryElementwiseRewriter : public ScopedAllocatorOptimizer::Rewriter { std::vector inputs_to_first; LOG_WARNING_AND_RETURN_IF_ERROR(GetDataInputs( graph, sa_opti->node_map(), nd.from_node_def, &inputs_to_first)); - for (int i = 0, iter_limit = inputs_to_first.size(); i < iter_limit; - ++i) { + for (int i = 0, end = inputs_to_first.size(); i < end; ++i) { if (fanout.find(inputs_to_first[i].from_node_def) != fanout.end()) { VLOG(2) << "Found node " << inputs_to_first[i].from_node_def->name() << " in the fanout of " << sa_name; @@ -589,7 +588,7 @@ class UnaryElementwiseRewriter : public ScopedAllocatorOptimizer::Rewriter { VLOG(2) << "BuildSAConcatNode " << sac_name; // control input: edge name -> source node name absl::flat_hash_map sac_ctl_inputs; - for (int i = 0, iter_limit = ops.size(); i < iter_limit; ++i) { + for (int i = 0, end = ops.size(); i < end; ++i) { NodeDef* old_op = ops[i]; for (const string& old_op_input : old_op->input()) { int position = 0; diff --git a/tensorflow/core/grappler/utils/functions.cc b/tensorflow/core/grappler/utils/functions.cc index a83fb824cc3..91ad261f969 100644 --- a/tensorflow/core/grappler/utils/functions.cc +++ b/tensorflow/core/grappler/utils/functions.cc @@ -313,7 +313,8 @@ Status ReplaceInputWithConst(const NodeDef& input_const, int input_index, return errors::InvalidArgument("Input node is not a constant: ", SummarizeNodeDef(input_const)); } - if (input_index < 0 || input_index >= item->input_size()) { + const int item_input_size = item->input_size(); + if (input_index < 0 || input_index >= item_input_size) { return errors::InvalidArgument( "Function input index is out of bound: index=", input_index, " input_size=", item->input_size()); @@ -354,7 +355,8 @@ Status RemoveFunctionOutputs(const absl::flat_hash_set& remove_outputs, // Do some sanity checking of the removed outputs positions. for (int remove_output : remove_outputs) { - if (remove_output < 0 || remove_output >= item->output_size()) { + const int item_output_size = item->output_size(); + if (remove_output < 0 || remove_output >= item_output_size) { return errors::InvalidArgument( "Function output index is out of bound: index=", remove_output, " output_size=", item->output_size()); @@ -366,7 +368,7 @@ Status RemoveFunctionOutputs(const absl::flat_hash_set& remove_outputs, return remove_output_args.find(&output) != remove_output_args.end(); }; - for (int i = 0; i < item->output_size(); ++i) { + for (int i = 0, end = item->output_size(); i < end; ++i) { const OutputArgInstantiation& output = item->output(i); if (remove_outputs.contains(i)) { VLOG(3) << "Remove functions output: name=" << output.node_name @@ -580,7 +582,7 @@ Status MakeFunctionDef(const GrapplerFunctionItem& item, } // Copy function arg attributes. - for (int i = 0; i < item.arg_attr().size(); ++i) { + for (int i = 0, end = item.arg_attr().size(); i < end; ++i) { const auto* attr = item.arg_attr().at(i); if (attr != nullptr) { (*func->mutable_arg_attr())[i] = *attr; diff --git a/tensorflow/core/kernels/boosted_trees/resources.cc b/tensorflow/core/kernels/boosted_trees/resources.cc index 0484314f205..7cffb1fb180 100644 --- a/tensorflow/core/kernels/boosted_trees/resources.cc +++ b/tensorflow/core/kernels/boosted_trees/resources.cc @@ -424,7 +424,8 @@ void BoostedTreesEnsembleResource::PostPruneTree(const int32 current_tree, ->mutable_post_pruned_nodes_meta(); for (int32 i = 0; i < num_nodes; ++i) { - if (index_for_deleted < nodes_to_delete.size() && + const int64 nodes_to_delete_size = nodes_to_delete.size(); + if (index_for_deleted < nodes_to_delete_size && i == nodes_to_delete[index_for_deleted]) { // Node i will get removed, ++index_for_deleted; @@ -455,7 +456,8 @@ void BoostedTreesEnsembleResource::PostPruneTree(const int32 current_tree, protobuf::RepeatedPtrField new_nodes; new_nodes.Reserve(old_to_new_ids.size()); for (auto node : *(tree->mutable_nodes())) { - if (index_for_deleted < nodes_to_delete.size() && + const int64 nodes_to_delete_size = nodes_to_delete.size(); + if (index_for_deleted < nodes_to_delete_size && i == nodes_to_delete[index_for_deleted]) { ++index_for_deleted; ++i; @@ -570,7 +572,7 @@ void BoostedTreesEnsembleResource::RecursivelyDoPostPrunePreparation( if (node_metadata.has_original_leaf()) { parent_values = node_value(tree_id, node_id); } - for (int32 i = 0; i < parent_values.size(); ++i) { + for (int32 i = 0, end = parent_values.size(); i < end; ++i) { nodes_meta->at(left_id).second.emplace_back(parent_values[i] - left_child_values[i]); nodes_meta->at(right_id).second.emplace_back(parent_values[i] - diff --git a/tensorflow/core/kernels/data/captured_function.cc b/tensorflow/core/kernels/data/captured_function.cc index dcc04a7a299..0066764baa0 100644 --- a/tensorflow/core/kernels/data/captured_function.cc +++ b/tensorflow/core/kernels/data/captured_function.cc @@ -216,7 +216,7 @@ Status CreateShortCircuitInfo(OpKernelConstruction* ctx, last_use[indices[i]] = i; } can_move.resize(indices.size()); - for (int i = 0, iter_limit = indices.size(); i < iter_limit; ++i) { + for (int i = 0, end = indices.size(); i < end; ++i) { can_move[i] = last_use[indices[i]] == i; } } @@ -663,8 +663,7 @@ Status CapturedFunction::Instantiate( inst_opts.composite_devices[it.first] = &it.second; } - for (int i = 0, iter_limit = fdef->signature().output_arg_size(); - i < iter_limit; ++i) { + for (int i = 0, end = fdef->signature().output_arg_size(); i < end; ++i) { inst_opts.output_devices.push_back(inst_opts.target); } diff --git a/tensorflow/core/kernels/data/experimental/snapshot_util.cc b/tensorflow/core/kernels/data/experimental/snapshot_util.cc index 3b051d7d572..7b4b68a6aec 100644 --- a/tensorflow/core/kernels/data/experimental/snapshot_util.cc +++ b/tensorflow/core/kernels/data/experimental/snapshot_util.cc @@ -215,7 +215,7 @@ Status CustomWriter::WriteTensors(const std::vector& tensors) { tensor_protos.reserve(num_complex_); experimental::SnapshotTensorMetadata metadata; int64 total_size = 0; - for (int i = 0; i < tensors.size(); ++i) { + for (int i = 0, end = tensors.size(); i < end; ++i) { const Tensor& tensor = tensors[i]; experimental::TensorMetadata* tensor_metadata = metadata.add_tensor_metadata(); @@ -239,7 +239,7 @@ Status CustomWriter::WriteTensors(const std::vector& tensors) { char* position = uncompressed.data(); int buffer_index = 0; int proto_index = 0; - for (int i = 0; i < tensors.size(); ++i) { + for (int i = 0, end = tensors.size(); i < end; ++i) { const auto& tensor_metadata = metadata.tensor_metadata(i); if (simple_tensor_mask_[i]) { memcpy(position, tensor_buffers[buffer_index]->data(), @@ -514,7 +514,8 @@ class Reader::NestedDataset : public DatasetBase { Status GetNextInternal(IteratorContext* ctx, std::vector* out_tensors, bool* end_of_sequence) override { - *end_of_sequence = dataset()->datasets_.size() == index_; + const int64 dataset_datasets_size = dataset()->datasets_.size(); + *end_of_sequence = dataset_datasets_size == index_; if (!*end_of_sequence) { Tensor tensor(DT_VARIANT, TensorShape({})); @@ -704,7 +705,7 @@ Status CustomReader::ReadTensors(std::vector* read_tensors) { int simple_index = 0; int complex_index = 0; - for (int i = 0; i < simple_tensor_mask_.size(); ++i) { + for (int i = 0, end = simple_tensor_mask_.size(); i < end; ++i) { if (simple_tensor_mask_[i]) { read_tensors->push_back(std::move(simple_tensors[simple_index])); simple_index++; @@ -774,7 +775,7 @@ Status CustomReader::SnappyUncompress( std::vector iov(num_tensors); int index = 0; int64 total_size = 0; - for (int i = 0; i < simple_tensor_mask_.size(); ++i) { + for (int i = 0, end = simple_tensor_mask_.size(); i < end; ++i) { const auto& tensor_metadata = metadata->tensor_metadata(i); if (simple_tensor_mask_[i]) { TensorShape shape(tensor_metadata.tensor_shape()); @@ -794,7 +795,8 @@ Status CustomReader::SnappyUncompress( total_size += iov[index].iov_len; index++; } - if (size != total_size) { + const int64 size_int = size; + if (size_int != total_size) { return errors::Internal("Uncompressed size mismatch. Snappy expects ", size, " whereas the tensor metadata suggests ", total_size); @@ -905,7 +907,7 @@ Status DetermineOpState(const std::string& mode_string, bool file_exists, } if (metadata->creation_timestamp() >= - (static_cast(EnvTime::NowMicros()) - + static_cast(static_cast(EnvTime::NowMicros()) - pending_snapshot_expiry_seconds * 1000000)) { // Someone else is already writing and time has not expired. *mode = PASSTHROUGH; diff --git a/tensorflow/core/kernels/range_sampler.cc b/tensorflow/core/kernels/range_sampler.cc index d759f315cb0..8c8da47880d 100644 --- a/tensorflow/core/kernels/range_sampler.cc +++ b/tensorflow/core/kernels/range_sampler.cc @@ -84,7 +84,7 @@ void RangeSampler::SampleBatchGetExpectedCountAvoid( int num_tries; if (unique) { - CHECK_LE(batch_size + avoided_values.size(), range_); + CHECK_LE( static_cast(batch_size + avoided_values.size()), range_); std::unordered_set used(batch_size); used.insert(avoided_values.begin(), avoided_values.end()); int num_picked = 0; diff --git a/tensorflow/core/kernels/remote_fused_graph_execute_utils.cc b/tensorflow/core/kernels/remote_fused_graph_execute_utils.cc index a7c80fdcb77..d5a4cb5f944 100644 --- a/tensorflow/core/kernels/remote_fused_graph_execute_utils.cc +++ b/tensorflow/core/kernels/remote_fused_graph_execute_utils.cc @@ -970,11 +970,10 @@ RemoteFusedGraphExecuteUtils::BuildRemoteFusedGraphExecuteOpNode( border_inputs, border_outputs, require_shape_type, &graph, &fused_node)); for (const Node* node : graph.nodes()) { - for (int i = 0, iter_limit = node->num_inputs(); i < iter_limit; ++i) { + for (int i = 0, end = node->num_inputs(); i < end; ++i) { const Edge* edge = nullptr; TF_RETURN_IF_ERROR(node->input_edge(i, &edge)); - for (int j = 0, second_iter_limit = border_outputs.size(); - j < second_iter_limit; ++j) { + for (int j = 0, second_end = border_outputs.size(); j < second_end; ++j) { const string& output = border_outputs.at(j); const TensorId tid = ParseTensorName(output); const string output_name(tid.first); From de79fed9cf55d184bbdb72c6c8b731c219fddde8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tar=C3=A9=20Gaskin?= Date: Mon, 27 Jul 2020 00:02:22 +0000 Subject: [PATCH 1359/2522] tensorflow/core resolutions 3/3 --- .../convert/op_stats_to_overview_page.cc | 17 ++++++++++++++--- .../convert/xplane_to_memory_profile.cc | 5 +++-- .../profiler/convert/xplane_to_tf_functions.cc | 2 +- .../core/profiler/internal/tfprof_code.cc | 10 ++++++---- tensorflow/core/profiler/internal/tfprof_op.cc | 5 +++-- tensorflow/core/profiler/internal/tfprof_op.h | 3 ++- .../core/profiler/internal/tfprof_timeline.cc | 2 +- tensorflow/core/profiler/utils/event_span.cc | 7 +++---- ...u_embedding_optimization_parameters_utils.cc | 6 ++---- tensorflow/core/util/padding.cc | 3 ++- 10 files changed, 37 insertions(+), 23 deletions(-) diff --git a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc index 9f71175bcea..154d2897613 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h" #include "tensorflow/core/profiler/protobuf/hardware_types.pb.h" #include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h" +#include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h" #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" #include "tensorflow/core/profiler/protobuf/op_stats.pb.h" #include "tensorflow/core/profiler/protobuf/overview_page.pb.h" @@ -33,6 +34,7 @@ limitations under the License. #include "tensorflow/core/profiler/utils/diagnostics.h" #include "tensorflow/core/profiler/utils/hardware_type_utils.h" #include "tensorflow/core/profiler/utils/html_utils.h" +#include "tensorflow/core/profiler/utils/kernel_stats_utils.h" #include "tensorflow/core/profiler/utils/math_utils.h" #include "tensorflow/core/profiler/utils/op_metrics_db_utils.h" #include "tensorflow/core/profiler/utils/time_utils.h" @@ -163,6 +165,9 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) { OverviewPageAnalysis analysis; OpMetricsDb device_tf_op_metrics_db = CreateTfMetricsDbFromDeviceOpMetricsDb( op_stats.device_op_metrics_db(), /*with_idle=*/false); + absl::flat_hash_map> + grouped_kernel_reports = + GroupKernelReportsByOpName(op_stats.kernel_stats_db()); uint64 total_device_time_ps = device_tf_op_metrics_db.total_time_ps(); constexpr int kNumTopOpsShown = 10; double device_cumulative_fraction = 0.0; @@ -177,6 +182,11 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) { op->set_cumulative_time_fraction(device_cumulative_fraction); op->set_flop_rate( SafeDivide(metrics->flops(), PicosToNanos(metrics->time_ps()))); + auto iter = grouped_kernel_reports.find(op->name()); + if (iter != grouped_kernel_reports.end()) { + op->set_is_op_tensorcore_eligible( + iter->second.front()->is_op_tensor_core_eligible()); + } } uint64 total_device_compute_ps = op_stats.device_op_metrics_db().precision_stats().compute_16bit_ps() + @@ -287,15 +297,16 @@ std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db) { // Sorts candidates in descending order of expensive_call_percent. absl::c_sort(candidates, cmp); std::string expensive_functions = ""; - auto num_functions_shown = std::min( + const int64 num_functions_shown = std::min( static_cast(3), candidates.size()); - for (auto i = 0; i < num_functions_shown; i++) { + for (int64 i = 0; i < num_functions_shown; i++) { if (i > 0) absl::StrAppend(&expensive_functions, ", "); absl::StrAppend(&expensive_functions, "\"", candidates[i].function_name, "\""); } - if (candidates.size() > num_functions_shown) + const int64 candidates_size = candidates.size(); + if (candidates_size > num_functions_shown) absl::StrAppend(&expensive_functions, " and more"); return absl::StrCat("Expensive tf-functions detected (", expensive_functions, ") due to either retracing or eager execution."); diff --git a/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc b/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc index ee4e597c5b9..e8783a71709 100644 --- a/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc +++ b/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc @@ -404,7 +404,7 @@ void ProcessActiveAllocations(int64 peak_bytes_profile_step_id, // Fill the sorted active_allocations proto messages at peak memory usage. // Merge identical allocations and show occurrences. - for (int i = 0; i < active_allocs.size(); i++) { + for (int i = 0, end = active_allocs.size(); i < end; i++) { ActiveAllocation* allocation = memory_profile->add_active_allocations(); allocation->set_snapshot_index(active_allocs[i].first); if (active_allocs[i].first < 0) { @@ -413,7 +413,8 @@ void ProcessActiveAllocations(int64 peak_bytes_profile_step_id, allocation->set_special_index(-1); } allocation->set_num_occurrences(1); - while (i < active_allocs.size() - 1 && + const int active_allocs_size = active_allocs.size() - 1; + while (i < active_allocs_size && active_allocs[i] == active_allocs[i + 1]) { allocation->set_num_occurrences(allocation->num_occurrences() + 1); i++; diff --git a/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc b/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc index e0b517d797a..eecfb9fba7b 100644 --- a/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc +++ b/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc @@ -206,7 +206,7 @@ class TfFunctionExecutions { std::string DebugString() const { std::string result = "\nActivations:\n"; - for (int i = 0, iter_limit = activations_.size(); i < iter_limit; i++) { + for (int i = 0, end = activations_.size(); i < end; i++) { absl::StrAppend(&result, "[", i, "] ", activations_[i].DebugString(), "\n"); } diff --git a/tensorflow/core/profiler/internal/tfprof_code.cc b/tensorflow/core/profiler/internal/tfprof_code.cc index f104cffafcc..c37efcf222c 100644 --- a/tensorflow/core/profiler/internal/tfprof_code.cc +++ b/tensorflow/core/profiler/internal/tfprof_code.cc @@ -421,14 +421,15 @@ void TFCode::AddNode(TFGraphNode* node) { // TODO(xpan): Consider to release CodeDef after TFCode is built. It // takes a lot of memory. std::set traces; - for (int i = 0; i < node->call_stack()->traces().size(); ++i) { + for (int i = 0, end = node->call_stack()->traces().size(); i < end; ++i) { // Unlike op name, which is globally unique, trace name is only unique // w.r.t. it's parent. const string& trace = GetTraceString(node->call_stack()->traces().at(i)); traces.insert(trace); pre_code_node = pre_code_node->AddChildren( trace, &node->call_stack()->traces().at(i), ""); - if (i == node->call_stack()->traces().size() - 1) { + const int64 last_index = node->call_stack()->traces().size() - 1; + if (i == last_index) { pre_code_node->node->AddGraphNode(node); } } @@ -446,12 +447,13 @@ void TFCode::Build() { TFGraphNode* fn = forward_it->second; CodeNode* leaf = nullptr; CodeNode* pre_code_node = root_.get(); - for (int i = 0; i < fn->call_stack()->traces().size(); ++i) { + for (int i = 0, end = fn->call_stack()->traces().size(); i < end; ++i) { const string& trace = GetTraceString(fn->call_stack()->traces().at(i)) + kGradientSuffix; pre_code_node = pre_code_node->AddChildren( trace, &fn->call_stack()->traces().at(i), kGradientSuffix); - if (i == fn->call_stack()->traces().size() - 1) { + const int64 max_stack_trace_allowed_size = fn->call_stack()->traces().size() - 1; + if (i == max_stack_trace_allowed_size) { leaf = pre_code_node; } } diff --git a/tensorflow/core/profiler/internal/tfprof_op.cc b/tensorflow/core/profiler/internal/tfprof_op.cc index adc12882f9e..8daac471a14 100644 --- a/tensorflow/core/profiler/internal/tfprof_op.cc +++ b/tensorflow/core/profiler/internal/tfprof_op.cc @@ -146,7 +146,7 @@ const ShowMultiNode* TFOp::ShowInternal(const Options& opts, int64 depth = 0; std::vector show_nodes; int64 start = SearchRoot(account_nodes, opts.start_name_regexes); - for (int64 i = start; i < account_nodes.size(); ++i, ++depth) { + for (int64 i = start, end = account_nodes.size(); i < end; ++i, ++depth) { OpNode* n = account_nodes[i]; if (ShouldTrim(n, opts.trim_name_regexes) || depth > opts.max_depth) { break; @@ -195,7 +195,8 @@ int64 TFOp::SearchRoot(const std::vector nodes, return 0; } int64 i = 0; - for (; i < nodes.size(); ++i) { + const int64 nodes_size = nodes.size(); + for (; i < nodes_size; ++i) { for (const string& regex : regexes) { if (RE2::FullMatch(nodes[i]->name(), regex)) { return i; diff --git a/tensorflow/core/profiler/internal/tfprof_op.h b/tensorflow/core/profiler/internal/tfprof_op.h index aa22182d36c..df222a1eb20 100644 --- a/tensorflow/core/profiler/internal/tfprof_op.h +++ b/tensorflow/core/profiler/internal/tfprof_op.h @@ -57,7 +57,8 @@ class TFOp : public TFMultiShow { bool ShouldShowIfExtra(const ShowMultiNode* node, const Options& opts, int depth) const override { - if (opts.min_occurrence > node->node->graph_nodes().size()) { + const int max_num_graph_nodes = node->node->graph_nodes().size(); + if (opts.min_occurrence > max_num_graph_nodes) { return false; } return true; diff --git a/tensorflow/core/profiler/internal/tfprof_timeline.cc b/tensorflow/core/profiler/internal/tfprof_timeline.cc index 96e880dc999..e52ef3595f2 100644 --- a/tensorflow/core/profiler/internal/tfprof_timeline.cc +++ b/tensorflow/core/profiler/internal/tfprof_timeline.cc @@ -348,7 +348,7 @@ void Timeline::AllocateLanes() { int64 start_time = tnode.second->start_micros; int64 end_time = tnode.second->start_micros + tnode.second->exec_micros; int64 l = -1; - for (int64 i = 0; i < p->lanes.size(); ++i) { + for (int64 i = 0, end = p->lanes.size(); i < end; ++i) { const auto& lane = p->lanes[i]; l = i; for (auto cur_it = lane.rbegin(); cur_it != lane.rend(); ++cur_it) { diff --git a/tensorflow/core/profiler/utils/event_span.cc b/tensorflow/core/profiler/utils/event_span.cc index 9a0f65941b2..86de2f83eb7 100644 --- a/tensorflow/core/profiler/utils/event_span.cc +++ b/tensorflow/core/profiler/utils/event_span.cc @@ -128,8 +128,7 @@ std::vector ToNonOverlappedEvents( if (event_boundaries.empty()) return result; result.reserve(event_boundaries.size()); PriorityTracker priority_tracker; - for (int64 i = 0, iter_limit = (event_boundaries.size() - 1); i < iter_limit; - i++) { + for (int64 i = 0, end = (event_boundaries.size() - 1); i < end; i++) { EventType highest_priority = priority_tracker.Update(event_boundaries[i]); result.push_back({highest_priority, Timespan::FromEndPoints( event_boundaries[i].time_ps, @@ -326,12 +325,12 @@ Timespan StepDetails::StepTime() const { std::string StepDetails::DebugString() const { std::string result = "(["; - for (int i = 0, iter_limit = markers_.size(); i < iter_limit; i++) { + for (int i = 0, end = markers_.size(); i < end; i++) { if (i > 0) absl::StrAppend(&result, ", "); absl::StrAppend(&result, PrintStepMarker(markers_[i])); } absl::StrAppend(&result, "], ["); - for (int i = 0, iter_limit = events_.size(); i < iter_limit; i++) { + for (int i = 0, end = events_.size(); i < end; i++) { if (i > 0) absl::StrAppend(&result, ", "); absl::StrAppend(&result, PrintEventTypeSpan(events_[i])); } diff --git a/tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.cc b/tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.cc index 22790db5a3e..961858665a4 100644 --- a/tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.cc +++ b/tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.cc @@ -336,8 +336,7 @@ Status LoadOpShapeFunction::operator()( }); std::vector inputs(user_param_count); int input_index = 0; - for (int i = 0, iter_limit = state_variable_specs.size(); i < iter_limit; - ++i) { + for (int i = 0, end = state_variable_specs.size(); i < end; ++i) { if (state_variable_specs[i].has_user_defined() || is_debug_op_) { std::vector input_temp; TF_RETURN_IF_ERROR(c->input(state_variable_specs[i].name(), &input_temp)); @@ -389,8 +388,7 @@ Status RetrieveOpShapeFunction::operator()( TF_RETURN_IF_ERROR(c->GetAttr("num_shards", &num_shards)); int shard_id; TF_RETURN_IF_ERROR(c->GetAttr("shard_id", &shard_id)); - for (int j = 0, iter_limit = state_variable_specs.size(); j < iter_limit; - ++j) { + for (int j = 0, end = state_variable_specs.size(); j < end; ++j) { if (state_variable_specs[j].has_user_defined() || is_debug_op_) { auto shape = c->MakeShape( std::vector(2, c->UnknownDim())); diff --git a/tensorflow/core/util/padding.cc b/tensorflow/core/util/padding.cc index c26de3b748c..6ffbc8040ed 100644 --- a/tensorflow/core/util/padding.cc +++ b/tensorflow/core/util/padding.cc @@ -37,7 +37,8 @@ Status CheckValidPadding(Padding padding_type, const std::vector& explicit_paddings, int num_dims, TensorFormat data_format) { if (padding_type == Padding::EXPLICIT) { - if (static_cast(explicit_paddings.size()) != 2 * num_dims) { + const int explicit_paddings_size = explicit_paddings.size(); + if (explicit_paddings_size != 2 * num_dims) { return errors::InvalidArgument( "explicit_paddings attribute must contain ", 2 * num_dims, " values, but got: ", explicit_paddings.size()); From 99686bb275cf2ab083425d3e962604e6618b53f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tar=C3=A9=20Gaskin?= Date: Mon, 27 Jul 2020 00:08:05 +0000 Subject: [PATCH 1360/2522] resolutions --- .../delegates/nnapi/nnapi_delegate_kernel.h | 12 ++++++++---- .../lite/delegates/nnapi/quant_lstm_sup.cc | 2 +- .../convert_trivial_transpose_to_reshape.cc | 2 +- .../toco/graph_transformations/dequantize.cc | 2 +- .../graph_transformations/drop_fake_quant.cc | 3 +-- ..._uint8_weights_safe_for_fast_int8_kernels.cc | 2 +- tensorflow/lite/toco/tflite/import.cc | 2 +- tensorflow/lite/tools/optimize/model_utils.cc | 2 +- .../lite/tools/optimize/quantization_utils.cc | 10 +++++----- .../lite/tools/optimize/quantize_weights.cc | 9 +++++---- tensorflow/lite/tools/verifier.cc | 17 +++++++++-------- tensorflow/python/client/session_ref.cc | 3 +-- tensorflow/python/lib/core/ndarray_tensor.cc | 2 +- .../stream_executor/device_description.cc | 6 +++--- tensorflow/stream_executor/stream.cc | 4 ++-- .../stream_executor/stream_executor_pimpl.cc | 2 +- 16 files changed, 42 insertions(+), 38 deletions(-) diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h index 9aa0f303cc2..dbe3f76bc52 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h @@ -39,7 +39,8 @@ class OperandMapping { // Given a TFLite index return the ANN index. If it doesn't exist // return -1. int lite_index_to_ann(int index) const { - if (index >= 0 && index < lite_tensor_to_ann_tensor_.size()) + const int64_t max_size = lite_tensor_to_ann_tensor_.size(); + if (index >= 0 && index < max_size) return lite_tensor_to_ann_tensor_[index]; else return -1; @@ -60,7 +61,8 @@ class OperandMapping { // Add a new mapping from `tflite_index` and return the NN API tensor index. int add_new_ann_tensor_index(int tflite_index) { - if (tflite_index >= lite_tensor_to_ann_tensor_.size()) { + const int64_t current_size = lite_tensor_to_ann_tensor_.size(); + if (tflite_index >= current_size) { lite_tensor_to_ann_tensor_.resize(tflite_index + 1, -1); } const int new_tensor_index = next_ann_tensor_index_++; @@ -72,7 +74,8 @@ class OperandMapping { // converted during copying the data to the memory allocated for NN API. // kTfLiteNoType means no conversion is needed. TfLiteType lite_index_to_ann_type_conversion(int index) const { - if (index >= 0 && index < index_to_type_conversion_.size()) + const int64_t max_size = index_to_type_conversion_.size(); + if (index >= 0 && index < max_size) return index_to_type_conversion_[index]; else return kTfLiteNoType; @@ -80,7 +83,8 @@ class OperandMapping { // Add a new mapping from TFLite index to a type conversion. void add_type_conversion(int tflite_index, TfLiteType tflite_type) { - if (tflite_index >= index_to_type_conversion_.size()) { + const int64_t current_size = index_to_type_conversion_.size(); + if (tflite_index >= current_size) { index_to_type_conversion_.resize(tflite_index + 1, kTfLiteNoType); } index_to_type_conversion_[tflite_index] = tflite_type; diff --git a/tensorflow/lite/delegates/nnapi/quant_lstm_sup.cc b/tensorflow/lite/delegates/nnapi/quant_lstm_sup.cc index bcf2ff61825..2337296444d 100644 --- a/tensorflow/lite/delegates/nnapi/quant_lstm_sup.cc +++ b/tensorflow/lite/delegates/nnapi/quant_lstm_sup.cc @@ -36,7 +36,7 @@ void ExtractQuantLstmWeightsSubmatrix(const TfLiteIntArray* submatrix_dims, submatrix->resize(NumElements(submatrix_dims)); - for (uint32_t i = 0; i < submatrix_rows * submatrix_cols; ++i) { + for (uint32_t i = 0, end = submatrix_rows * submatrix_cols; i < end; ++i) { const uint32_t row = i / submatrix_cols; const uint32_t column = i % submatrix_cols; (*submatrix)[i] = diff --git a/tensorflow/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc b/tensorflow/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc index aa8e2556d1a..869d3486fba 100644 --- a/tensorflow/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc +++ b/tensorflow/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc @@ -31,7 +31,7 @@ bool TransposeAffectsMemoryOrder(std::vector perm, // just the shape) then the flat buffer representation shouldn't change. std::vector old_major_index_ordering; std::vector new_major_index_ordering; - for (int i = 0, iter_limit = in_shape.size(); i < iter_limit; i++) { + for (int i = 0, end = in_shape.size(); i < end; i++) { if (in_shape[i] != 1) { old_major_index_ordering.push_back(i); } diff --git a/tensorflow/lite/toco/graph_transformations/dequantize.cc b/tensorflow/lite/toco/graph_transformations/dequantize.cc index bd2fdff3497..e6f796ed361 100644 --- a/tensorflow/lite/toco/graph_transformations/dequantize.cc +++ b/tensorflow/lite/toco/graph_transformations/dequantize.cc @@ -35,7 +35,7 @@ void DequantizeBuffer(Array* array) { auto& new_data = array->GetMutableBuffer().data; new_data.resize(old_data.size()); const auto& qparams = array->GetQuantizationParams(); - for (int i = 0, iter_limit = old_data.size(); i < iter_limit; i++) { + for (int i = 0, end = old_data.size(); i < end; i++) { new_data[i] = qparams.scale * (old_data[i] - qparams.zero_point); } } diff --git a/tensorflow/lite/toco/graph_transformations/drop_fake_quant.cc b/tensorflow/lite/toco/graph_transformations/drop_fake_quant.cc index d684cc6971c..996d714ae8c 100644 --- a/tensorflow/lite/toco/graph_transformations/drop_fake_quant.cc +++ b/tensorflow/lite/toco/graph_transformations/drop_fake_quant.cc @@ -45,8 +45,7 @@ namespace toco { } // Drop min/max inputs - for (int i = 1, iter_limit = fakequant_op->inputs.size(); i < iter_limit; - i++) { + for (int i = 1, end = fakequant_op->inputs.size(); i < end; i++) { if (CountOpsWithInput(*model, fakequant_op->inputs[i]) == 1) { model->EraseArray(fakequant_op->inputs[i]); } diff --git a/tensorflow/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc b/tensorflow/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc index 2cb930b8a5c..6f4d4a783c0 100644 --- a/tensorflow/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc +++ b/tensorflow/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc @@ -166,7 +166,7 @@ namespace toco { int index_of_previous_bad_value = 0; bool changed = false; - for (int i = 0, iter_limit = buffer_data.size(); i < iter_limit; i++) { + for (int i = 0, end = buffer_data.size(); i < end; i++) { if (buffer_data[i] == 0) { count_bad++; if (count_bad > 1) { diff --git a/tensorflow/lite/toco/tflite/import.cc b/tensorflow/lite/toco/tflite/import.cc index 136aa4ffaa8..3f082afa509 100644 --- a/tensorflow/lite/toco/tflite/import.cc +++ b/tensorflow/lite/toco/tflite/import.cc @@ -157,7 +157,7 @@ void ImportOperators( } } auto outputs = input_op->outputs(); - for (int i = 0; i < outputs->Length(); i++) { + for (int i = 0, end = outputs->Length(); i < end; i++) { auto output_index = outputs->Get(i); const std::string& output_name = tensors_table.at(output_index); op->outputs.push_back(output_name); diff --git a/tensorflow/lite/tools/optimize/model_utils.cc b/tensorflow/lite/tools/optimize/model_utils.cc index ae868cf21b8..f30fa8b7bdd 100644 --- a/tensorflow/lite/tools/optimize/model_utils.cc +++ b/tensorflow/lite/tools/optimize/model_utils.cc @@ -125,7 +125,7 @@ bool HasMinMax(const TensorT* tensor) { } void SetOperatorCodeVersion(ModelT* model) { - for (int subgraph_idx = 0; subgraph_idx < model->subgraphs.size(); + for (int subgraph_idx = 0, end = model->subgraphs.size(); subgraph_idx < end; subgraph_idx++) { SubGraphT* subgraph = model->subgraphs.at(subgraph_idx).get(); // Iterate backward to avoid messing with index. diff --git a/tensorflow/lite/tools/optimize/quantization_utils.cc b/tensorflow/lite/tools/optimize/quantization_utils.cc index b3aa23b2579..81110071dc9 100644 --- a/tensorflow/lite/tools/optimize/quantization_utils.cc +++ b/tensorflow/lite/tools/optimize/quantization_utils.cc @@ -259,7 +259,7 @@ TfLiteStatus AdjustWeightsForBiasScale(QuantizationParametersT* quant_params, // Per channel quantization if (channel_dim_size > 1) { - for (size_t i = 0; i < channel_dim_size; ++i) { + for (int i = 0; i < channel_dim_size; ++i) { // Current scale is not compatible with bias. Adjust max/min values. if (std::abs(bias_data[i]) >= 0.5 * input_scale * weight_scales[i] * kScale) { @@ -636,7 +636,7 @@ TfLiteStatus SymmetricPerChannelBiasQuantize(ModelT* model, TensorT* tensor, ErrorReporter* error_reporter) { // Compute scales. std::vector scales(number_of_dimension); - for (size_t i = 0; i < number_of_dimension; i++) { + for (int i = 0; i < number_of_dimension; i++) { scales[i] = input_scale * weight_scales[i]; } @@ -703,19 +703,19 @@ float GetEffectiveScale(ModelT* model, SubGraphT* subgraph, int op_idx, std::vector factors) { float scale = 1.0f; OperatorT* op = subgraph->operators[op_idx].get(); - for (int i = 0; i < input_index.size(); ++i) { + for (int i = 0, end = input_index.size(); i < end; ++i) { const int index_local = input_index[i]; const int index_global = op->inputs[index_local]; const TensorT* tensor = subgraph->tensors[index_global].get(); scale *= tensor->quantization->scale[0]; } - for (int i = 0; i < intermediate_index.size(); ++i) { + for (int i = 0, end = intermediate_index.size(); i < end; ++i) { const int index_local = intermediate_index[i]; const int index_global = op->intermediates[index_local]; const TensorT* tensor = subgraph->tensors[index_global].get(); scale *= tensor->quantization->scale[0]; } - for (int i = 0; i < factors.size(); ++i) { + for (int i = 0, end = factors.size(); i < end; ++i) { scale *= factors[i]; } return scale; diff --git a/tensorflow/lite/tools/optimize/quantize_weights.cc b/tensorflow/lite/tools/optimize/quantize_weights.cc index 8bef019a83e..b6f15962bfc 100644 --- a/tensorflow/lite/tools/optimize/quantize_weights.cc +++ b/tensorflow/lite/tools/optimize/quantize_weights.cc @@ -324,7 +324,7 @@ void MakeTensor(const string& name, const std::vector& shape, // Updates operator code versions for the operators with INT8 inputs. void UpdateInt8OperatorVersions(ModelT* model) { - for (int i = 0; i < model->operator_codes.size(); ++i) { + for (int i = 0, end = model->operator_codes.size(); i < end; ++i) { const BuiltinOperator& op_code = model->operator_codes[i]->builtin_code; if (op_code == BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM || op_code == BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN || @@ -406,7 +406,7 @@ TfLiteStatus QuantizeWeightsInt8(flatbuffers::FlatBufferBuilder* builder, std::unique_ptr model; model.reset(input_model->UnPack()); - for (int subgraph_index = 0; subgraph_index < model->subgraphs.size(); + for (int subgraph_index = 0, end = model->subgraphs.size(); subgraph_index < end; ++subgraph_index) { SubGraphT* subgraph = model->subgraphs.at(subgraph_index).get(); @@ -530,12 +530,13 @@ TfLiteStatus QuantizeWeightsFloat16(flatbuffers::FlatBufferBuilder* builder, std::unique_ptr model; model.reset(input_model->UnPack()); - for (int subgraph_index = 0; subgraph_index < model->subgraphs.size(); + for (int subgraph_index = 0, end = model->subgraphs.size(); subgraph_index < end + ; ++subgraph_index) { SubGraphT* subgraph = model->subgraphs.at(subgraph_index).get(); absl::flat_hash_map tensor_map; - for (int i = 0; i < subgraph->operators.size(); ++i) { + for (int i = 0, sub_end = subgraph->operators.size(); i < sub_end; ++i) { OperatorT* op = subgraph->operators[i].get(); for (auto tensor_idx : op->inputs) { // Skip optional tensors. diff --git a/tensorflow/lite/tools/verifier.cc b/tensorflow/lite/tools/verifier.cc index 12b24e6f2d8..9f15b9f0800 100644 --- a/tensorflow/lite/tools/verifier.cc +++ b/tensorflow/lite/tools/verifier.cc @@ -106,9 +106,9 @@ bool VerifyStringTensorBuffer(const Tensor& tensor, const Buffer& buffer, return false; } offset += sizeof(int32_t); - for (int i = 1; i <= num_strings; i++, offset += sizeof(int32_t)) { + for (int i = 1, end = num_strings; i <= end; i++, offset += sizeof(int32_t)) { int string_offset = *GetIntPtr(buffer_ptr + offset); - if (string_offset < prev_ptr || string_offset > buffer_size) { + if (string_offset < static_cast(prev_ptr) || string_offset > static_cast(buffer_size)) { ReportError(error_reporter, "String tensor %s buffer is invalid: index %d", NameOrEmptyString(tensor.name()), i); @@ -221,7 +221,7 @@ absl::optional VerifyAndCountElements( } } - if (num_elements != array_segments_size - 1) { + if (static_cast(num_elements) != array_segments_size - 1) { return absl::nullopt; } @@ -254,15 +254,16 @@ absl::optional VerifyAndCountSparseElements(const Tensor& tensor) { const int total_dims = sparsity->traversal_order()->size(); const int original_rank = tensor.shape()->size(); - + const int sparsity_dim_metadata_size = sparsity->dim_metadata()->size(); if (total_dims < original_rank || - sparsity->dim_metadata()->size() != total_dims) { + sparsity_dim_metadata_size != total_dims) { return absl::nullopt; } const int block_rank = total_dims - original_rank; + const int sparsity_block_map_size = sparsity->block_map()->size(); if (block_rank > 0 && (sparsity->block_map() == nullptr || - sparsity->block_map()->size() != block_rank)) { + sparsity_block_map_size != block_rank)) { return absl::nullopt; } @@ -446,7 +447,7 @@ bool VerifySubGraphConsistency(const Model& model, const SubGraph& subgraph, absl::flat_hash_set subgraph_input_tensors, constant_tensors, variable_tensors, output_tensors; if (subgraph.tensors()) { - for (int i = 0; i < subgraph.tensors()->size(); ++i) { + for (int i = 0, end = subgraph.tensors()->size(); i < end; ++i) { const auto* tensor = subgraph.tensors()->Get(i); if (IsConstantTensor(*tensor, model)) { constant_tensors.insert(i); @@ -462,7 +463,7 @@ bool VerifySubGraphConsistency(const Model& model, const SubGraph& subgraph, } if (subgraph.operators()) { - for (int op_idx = 0; op_idx < subgraph.operators()->size(); ++op_idx) { + for (int op_idx = 0, end = subgraph.operators()->size(); op_idx < end; ++op_idx) { const auto* op = subgraph.operators()->Get(op_idx); if (!model.operator_codes() || (op->opcode_index() >= model.operator_codes()->size())) { diff --git a/tensorflow/python/client/session_ref.cc b/tensorflow/python/client/session_ref.cc index 3e9ef302c76..dfbeb3a4b29 100644 --- a/tensorflow/python/client/session_ref.cc +++ b/tensorflow/python/client/session_ref.cc @@ -146,8 +146,7 @@ class SessionLogger { // Build an index from fetch tensor name to first index in // output_tensor_names. std::unordered_map output_name_to_offset; - for (int i = 0, iter_limit = output_tensor_names.size(); i < iter_limit; - ++i) { + for (int i = 0, end = output_tensor_names.size(); i < end; ++i) { const string& name = output_tensor_names[i]; if (output_name_to_offset.insert(std::make_pair(name, i)).second) { req->add_fetch(name); diff --git a/tensorflow/python/lib/core/ndarray_tensor.cc b/tensorflow/python/lib/core/ndarray_tensor.cc index e2fb3ec8dc9..7314fec41ed 100644 --- a/tensorflow/python/lib/core/ndarray_tensor.cc +++ b/tensorflow/python/lib/core/ndarray_tensor.cc @@ -271,7 +271,7 @@ Status CopyTF_TensorStringsToPyArray(const TF_Tensor* src, uint64 nelems, std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); auto iter = make_safe(PyArray_IterNew(reinterpret_cast(dst))); - for (int64 i = 0; i < nelems; ++i) { + for (int64 i = 0; i < static_cast(nelems); ++i) { const tstring& tstr_i = tstr[i]; auto py_string = make_safe(PyBytes_FromStringAndSize(tstr_i.data(), tstr_i.size())); diff --git a/tensorflow/stream_executor/device_description.cc b/tensorflow/stream_executor/device_description.cc index 635e2624ec5..2be05d777b9 100644 --- a/tensorflow/stream_executor/device_description.cc +++ b/tensorflow/stream_executor/device_description.cc @@ -125,9 +125,9 @@ bool DeviceDescription::rocm_amdgpu_isa_version(int *version) const { bool ThreadDimOk(const DeviceDescription &device_description, const ThreadDim &thread_dim) { - auto total_threads = thread_dim.x * thread_dim.y * thread_dim.z; - auto threads_per_block_limit = device_description.threads_per_block_limit(); - if (total_threads > static_cast(threads_per_block_limit)) { + const int64 total_threads = thread_dim.x * thread_dim.y * thread_dim.z; + const int64 threads_per_block_limit = device_description.threads_per_block_limit(); + if (total_threads > threads_per_block_limit) { VLOG(2) << "exceeded total-thread-per-block limit: " << total_threads << " vs limit " << threads_per_block_limit; return false; diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc index da418122375..505d54cf5bf 100644 --- a/tensorflow/stream_executor/stream.cc +++ b/tensorflow/stream_executor/stream.cc @@ -1886,7 +1886,7 @@ Stream *Stream::GetOrCreateSubStream() { // Look for the first reusable sub_stream that is ok, dropping !ok sub_streams // we encounter along the way. - for (int64 index = 0; index < sub_streams_.size();) { + for (size_t index = 0; index < sub_streams_.size();) { std::pair, bool> &pair = sub_streams_[index]; if (pair.second) { // The sub_stream is reusable. @@ -1937,7 +1937,7 @@ void Stream::ReturnSubStream(Stream *sub_stream) { absl::MutexLock lock(&mu_); // Look for the sub-stream. - for (int64 index = 0; index < sub_streams_.size(); ++index) { + for (int64 index = 0, end = sub_streams_.size(); index < end; ++index) { std::pair, bool> &pair = sub_streams_[index]; if (pair.first.get() != sub_stream) { continue; diff --git a/tensorflow/stream_executor/stream_executor_pimpl.cc b/tensorflow/stream_executor/stream_executor_pimpl.cc index f2ec8f3dc18..d23f1472e33 100644 --- a/tensorflow/stream_executor/stream_executor_pimpl.cc +++ b/tensorflow/stream_executor/stream_executor_pimpl.cc @@ -478,7 +478,7 @@ port::Status StreamExecutor::GetStatus(Stream *stream) { DeviceMemoryBase StreamExecutor::Allocate(uint64 size, int64 memory_space) { if (memory_limit_bytes_ > 0 && - mem_alloc_bytes_ + size > memory_limit_bytes_) { + static_cast(mem_alloc_bytes_ + size) > memory_limit_bytes_) { LOG(WARNING) << "Not enough memory to allocate " << size << " on device " << device_ordinal_ << " within provided limit. [used=" << mem_alloc_bytes_ From 33d8ede1bcf285d81b45b3a5021dc68f24fef674 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 26 Jul 2020 18:32:30 -0700 Subject: [PATCH 1361/2522] Reassociate multiply and broadcasts PiperOrigin-RevId: 323282504 Change-Id: I0bbc1a9414f10ef1148ec48abf0563b39a9763b4 --- .../xla/service/algebraic_simplifier.cc | 37 +++++++++++++++++++ .../xla/service/algebraic_simplifier_test.cc | 23 ++++++++++++ 2 files changed, 60 insertions(+) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 86beb3de694..3ea516674b6 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -2502,6 +2502,43 @@ Status AlgebraicSimplifierVisitor::HandleMultiply(HloInstruction* multiply) { } } + { + HloInstruction *a, *b, *constant, *op; + // Mul(Mul(a, constant1), Broadcast(b)) => + // Mul(Broadcast(Mul(b, constant1), a)) + if (Match(multiply, + m::MultiplyAnyOrder(m::MultiplyAnyOrder(m::NonConstant(&a), + m::Constant(&constant)), + m::Op(&op))) || + Match(multiply, + m::MultiplyAnyOrder( + m::MultiplyAnyOrder(m::NonConstant(&a), + m::Broadcast(m::Constant(&constant))), + m::Op(&op)))) { + // Check that the other side was a broadcast, and not of a constant. + if (ShapeUtil::IsScalar(constant->shape()) && + Match(op, m::Broadcast(m::NonConstant()))) { + auto dims = op->dimensions(); + b = op->mutable_operand(0); + if (!ShapeUtil::IsScalar(b->shape())) { + constant = computation_->AddInstruction( + HloInstruction::CreateBroadcast(b->shape(), constant, {})); + } + + auto new_mul = + computation_->AddInstruction(HloInstruction::CreateBinary( + b->shape(), HloOpcode::kMultiply, b, constant)); + + return ReplaceWithNewInstruction( + multiply, + HloInstruction::CreateBinary( + multiply->shape(), HloOpcode::kMultiply, a, + computation_->AddInstruction(HloInstruction::CreateBroadcast( + multiply->shape(), new_mul, dims)))); + } + } + } + VLOG(10) << "trying transform [(A * C1) * C2 => A * (C1 * C2)]"; HloInstruction *a, *c1, *c2; if (Match(multiply, diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 137fbcc01a6..034d8ec4361 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -140,6 +140,29 @@ TEST_F(AlgebraicSimplifierTest, MultiplyChain) { m::MultiplyAnyOrder(m::ConstantScalar(2), m::ConstantScalar(4))))); } +// MUL(MUL(X, BROADCAST(constant)), BROADCAST(Y)) ==> +// MUL(X, BROADCAST(MUL(Y, BROADCAST(constant)))) +TEST_F(AlgebraicSimplifierTest, MultiplyBroadcastReassoc) { + const char* kModuleStr = R"( + HloModule m + test { + p0 = f32[2,2] parameter(0) + p1 = f32[] parameter(1) + b = f32[] constant(2) + c = f32[2, 2] broadcast(b), dimensions={} + x = f32[2,2] multiply(p0, c) + y = f32[2,2] broadcast(p1), dimensions={} + ROOT z = f32[2,2] multiply(y, x) + } + )"; + TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(kModuleStr)); + ASSERT_TRUE(AlgebraicSimplifier(default_options_).Run(m.get()).ValueOrDie()); + EXPECT_THAT(m->entry_computation()->root_instruction(), + GmockMatch(m::MultiplyAnyOrder( + m::Parameter(0), m::Broadcast(m::MultiplyAnyOrder( + m::Parameter(1), m::Constant()))))); +} + // A*C + B*C => (A+B)*C if C is a broadcast of a floating-point power of 2. TEST_F(AlgebraicSimplifierTest, FactorFpAdditionWithBroadcast) { const char* kModuleStr = R"( From 7145fc0e49be01ef6943f4df386ce38567e37797 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 26 Jul 2020 19:22:47 -0700 Subject: [PATCH 1362/2522] Reassociate multiply and broadcasts PiperOrigin-RevId: 323286009 Change-Id: I8bc2e4b9cb02988679daa8827f0f220691a4ff0b --- .../xla/service/algebraic_simplifier.cc | 37 ------------------- .../xla/service/algebraic_simplifier_test.cc | 23 ------------ 2 files changed, 60 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 3ea516674b6..86beb3de694 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -2502,43 +2502,6 @@ Status AlgebraicSimplifierVisitor::HandleMultiply(HloInstruction* multiply) { } } - { - HloInstruction *a, *b, *constant, *op; - // Mul(Mul(a, constant1), Broadcast(b)) => - // Mul(Broadcast(Mul(b, constant1), a)) - if (Match(multiply, - m::MultiplyAnyOrder(m::MultiplyAnyOrder(m::NonConstant(&a), - m::Constant(&constant)), - m::Op(&op))) || - Match(multiply, - m::MultiplyAnyOrder( - m::MultiplyAnyOrder(m::NonConstant(&a), - m::Broadcast(m::Constant(&constant))), - m::Op(&op)))) { - // Check that the other side was a broadcast, and not of a constant. - if (ShapeUtil::IsScalar(constant->shape()) && - Match(op, m::Broadcast(m::NonConstant()))) { - auto dims = op->dimensions(); - b = op->mutable_operand(0); - if (!ShapeUtil::IsScalar(b->shape())) { - constant = computation_->AddInstruction( - HloInstruction::CreateBroadcast(b->shape(), constant, {})); - } - - auto new_mul = - computation_->AddInstruction(HloInstruction::CreateBinary( - b->shape(), HloOpcode::kMultiply, b, constant)); - - return ReplaceWithNewInstruction( - multiply, - HloInstruction::CreateBinary( - multiply->shape(), HloOpcode::kMultiply, a, - computation_->AddInstruction(HloInstruction::CreateBroadcast( - multiply->shape(), new_mul, dims)))); - } - } - } - VLOG(10) << "trying transform [(A * C1) * C2 => A * (C1 * C2)]"; HloInstruction *a, *c1, *c2; if (Match(multiply, diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 034d8ec4361..137fbcc01a6 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -140,29 +140,6 @@ TEST_F(AlgebraicSimplifierTest, MultiplyChain) { m::MultiplyAnyOrder(m::ConstantScalar(2), m::ConstantScalar(4))))); } -// MUL(MUL(X, BROADCAST(constant)), BROADCAST(Y)) ==> -// MUL(X, BROADCAST(MUL(Y, BROADCAST(constant)))) -TEST_F(AlgebraicSimplifierTest, MultiplyBroadcastReassoc) { - const char* kModuleStr = R"( - HloModule m - test { - p0 = f32[2,2] parameter(0) - p1 = f32[] parameter(1) - b = f32[] constant(2) - c = f32[2, 2] broadcast(b), dimensions={} - x = f32[2,2] multiply(p0, c) - y = f32[2,2] broadcast(p1), dimensions={} - ROOT z = f32[2,2] multiply(y, x) - } - )"; - TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(kModuleStr)); - ASSERT_TRUE(AlgebraicSimplifier(default_options_).Run(m.get()).ValueOrDie()); - EXPECT_THAT(m->entry_computation()->root_instruction(), - GmockMatch(m::MultiplyAnyOrder( - m::Parameter(0), m::Broadcast(m::MultiplyAnyOrder( - m::Parameter(1), m::Constant()))))); -} - // A*C + B*C => (A+B)*C if C is a broadcast of a floating-point power of 2. TEST_F(AlgebraicSimplifierTest, FactorFpAdditionWithBroadcast) { const char* kModuleStr = R"( From 03d890dfae5cd71da88dcd003be95d60976978db Mon Sep 17 00:00:00 2001 From: Anna R Date: Sun, 26 Jul 2020 23:04:09 -0700 Subject: [PATCH 1363/2522] Add support for variable policy to be used by MirroredStrategy and TPUStrategy. Refactor existing values test and add an option to test variable policy. PiperOrigin-RevId: 323304491 Change-Id: I5c00791bc62a930274c254b33f4a47d671d0b7bf --- tensorflow/python/distribute/BUILD | 41 - tensorflow/python/distribute/combinations.py | 10 +- .../python/distribute/distribute_utils.py | 124 +- .../python/distribute/mirrored_strategy.py | 20 +- .../distribute/mirrored_variable_test.py | 6 +- tensorflow/python/distribute/tpu_strategy.py | 7 +- tensorflow/python/distribute/tpu_values.py | 316 +--- tensorflow/python/distribute/values.py | 14 +- tensorflow/python/distribute/values_test.py | 1092 +++++++++++++- tensorflow/python/distribute/vars_test.py | 1270 ----------------- 10 files changed, 1104 insertions(+), 1796 deletions(-) delete mode 100644 tensorflow/python/distribute/vars_test.py diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index 185b4568868..356fb3a7a9f 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -302,7 +302,6 @@ py_library( ":distribute_lib", ":reduce_util", ":shared_variable_creator", - ":tpu_values", ":values", "//tensorflow/python:array_ops", "//tensorflow/python:config", @@ -1225,46 +1224,6 @@ distribute_py_test( ], ) -distribute_py_test( - name = "vars_test", - size = "medium", - srcs = ["vars_test.py"], - main = "vars_test.py", - shard_count = 5, - tags = [ - "multi_and_single_gpu", - "no_rocm", - ], - tpu_tags = [ - "no_oss", # b/150954621 Target too big to run serially reliably. - ], - deps = [ - ":combinations", - ":distribute_lib", - ":strategy_combinations", - ":tpu_strategy", - ":tpu_values", - ":values", - "//tensorflow/python:array_ops", - "//tensorflow/python:checkpoint_management", - "//tensorflow/python:constant_op", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:indexed_slices", - "//tensorflow/python:math_ops", - "//tensorflow/python:random_ops", - "//tensorflow/python:training", - "//tensorflow/python:variable_scope", - "//tensorflow/python:variables", - "//tensorflow/python/distribute/cluster_resolver:tpu_cluster_resolver_py", - "//tensorflow/python/eager:context", - "//tensorflow/python/eager:def_function", - "//tensorflow/python/eager:test", - "//tensorflow/python/tpu:tpu_lib", - "@absl_py//absl/testing:parameterized", - ], -) - distribute_py_test( name = "ps_values_test", size = "medium", diff --git a/tensorflow/python/distribute/combinations.py b/tensorflow/python/distribute/combinations.py index a86c751ec79..ad8bb879b93 100644 --- a/tensorflow/python/distribute/combinations.py +++ b/tensorflow/python/distribute/combinations.py @@ -58,17 +58,11 @@ class DistributionParameter(combinations_lib.ParameterModifier): """ def modified_arguments(self, kwargs, requested_parameters): - # Get the parameter that indicates if we need to set the `_use_policy` flag - # on the strategy object. This is a temporary flag for testing the variable - # policy rollout. - use_var_policy = kwargs.get("use_var_policy", None) + del requested_parameters distribution_arguments = {} for k, v in kwargs.items(): if isinstance(v, NamedDistribution): - strategy = v.strategy - if use_var_policy: - strategy.extended._use_var_policy = use_var_policy - distribution_arguments[k] = strategy + distribution_arguments[k] = v.strategy return distribution_arguments diff --git a/tensorflow/python/distribute/distribute_utils.py b/tensorflow/python/distribute/distribute_utils.py index 916ebafd8ac..89848b91318 100644 --- a/tensorflow/python/distribute/distribute_utils.py +++ b/tensorflow/python/distribute/distribute_utils.py @@ -18,7 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.distribute import tpu_values as tpu_values_lib from tensorflow.python.distribute import values as values_lib from tensorflow.python.eager import context from tensorflow.python.eager import tape @@ -146,7 +145,7 @@ def select_replica_mirrored(replica_id, structured): def _get_mirrored(x): if isinstance(x, values_lib.DistributedValues): - if not is_mirrored(x): + if not isinstance(x, values_lib.Mirrored): raise TypeError( "Expected value to be mirrored across replicas: %s in %s." % (x, structured)) @@ -246,25 +245,34 @@ def validate_colocate(v, extended): # Variable creation function for sync strategies. -def _get_and_validate_synchronization(kwargs): - """Validate that given synchronization value is valid.""" +def create_mirrored_variable( # pylint: disable=missing-docstring + strategy, real_mirrored_creator, mirrored_cls, sync_on_read_cls, **kwargs): + # Figure out what collections this variable should be added to. + # We'll add the MirroredVariable to those collections instead. + var_collections = kwargs.pop("collections", None) + if var_collections is None: + var_collections = [ops.GraphKeys.GLOBAL_VARIABLES] + kwargs["collections"] = [] + synchronization = kwargs.get("synchronization", - vs.VariableSynchronization.AUTO) + vs.VariableSynchronization.ON_WRITE) + if synchronization == vs.VariableSynchronization.NONE: raise ValueError( - "`NONE` variable synchronization mode is not supported with " - "tf.distribute strategy. Please change the `synchronization` for " + "`NONE` variable synchronization mode is not supported with `Mirrored` " + "distribution strategy. Please change the `synchronization` for " "variable: " + str(kwargs["name"])) - if synchronization not in (vs.VariableSynchronization.ON_READ, - vs.VariableSynchronization.ON_WRITE, - vs.VariableSynchronization.AUTO): + elif synchronization == vs.VariableSynchronization.ON_READ: + is_sync_on_read = True + elif synchronization in (vs.VariableSynchronization.ON_WRITE, + vs.VariableSynchronization.AUTO): + # `AUTO` synchronization defaults to `ON_WRITE`. + is_sync_on_read = False + else: raise ValueError( "Invalid variable synchronization mode: %s for variable: %s" % (synchronization, kwargs["name"])) - return synchronization - -def _validate_aggregation(kwargs): aggregation = kwargs.pop("aggregation", vs.VariableAggregation.NONE) if aggregation not in (vs.VariableAggregation.NONE, @@ -273,33 +281,6 @@ def _validate_aggregation(kwargs): vs.VariableAggregation.ONLY_FIRST_REPLICA): raise ValueError("Invalid variable aggregation mode: %s for variable: %s" % (aggregation, kwargs["name"])) - return aggregation - - -def _get_variable_policy_class(synchronization, aggregation, policy_mapping): - if synchronization == vs.VariableSynchronization.AUTO: - if aggregation == vs.VariableAggregation.NONE: - # Use AutoPolicy. - return policy_mapping.get(synchronization) - else: - # Revert to OnWritePolicy - return policy_mapping.get(vs.VariableSynchronization.ON_WRITE) - return policy_mapping.get(synchronization) - - -def create_mirrored_variable(strategy, real_mirrored_creator, class_mapping, - policy_mapping, **kwargs): - """Create distributed variables with given synchronization and aggregation.""" - # Figure out what collections this variable should be added to. - # We'll add the MirroredVariable to those collections instead. - var_collections = kwargs.pop("collections", None) - if var_collections is None: - var_collections = [ops.GraphKeys.GLOBAL_VARIABLES] - kwargs["collections"] = [] - - synchronization = _get_and_validate_synchronization(kwargs) - aggregation = _validate_aggregation(kwargs) - use_var_policy = getattr(strategy.extended, "_use_var_policy", False) # Ignore user-specified caching device, not needed for mirrored variables. kwargs.pop("caching_device", None) @@ -309,15 +290,8 @@ def create_mirrored_variable(strategy, real_mirrored_creator, class_mapping, # here. with tape.stop_recording(): value_list = real_mirrored_creator(**kwargs) - if use_var_policy: - var_policy_cls = _get_variable_policy_class(synchronization, aggregation, - policy_mapping) - var_policy = var_policy_cls(aggregation=aggregation) - var_cls = class_mapping.get("VariableClass") - result = var_cls(strategy, value_list, aggregation, var_policy=var_policy) - else: - var_cls = class_mapping.get(synchronization) - result = var_cls(strategy, value_list, aggregation) + var_cls = sync_on_read_cls if is_sync_on_read else mirrored_cls + result = var_cls(strategy, value_list, aggregation) # Install the created DistributedVariable as _distributed_container property # of the underlying variables, to make it easy to map back to the container. for v in result.values: @@ -350,55 +324,3 @@ def create_mirrored_variable(strategy, real_mirrored_creator, class_mapping, ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, result) return result - - -# Utility functions -# Return True if the Value is Mirrored or the Variable is replicated and kept in -# sync. -def is_mirrored(val): - if isinstance(val, values_lib.DistributedVariable): - if val._policy: # pylint: disable=protected-access - return val._policy._is_mirrored() # pylint: disable=protected-access - return isinstance(val, values_lib.Mirrored) - - -def is_sync_on_read(val): - if isinstance(val, values_lib.DistributedVariable): - if val._policy: # pylint: disable=protected-access - return not val._policy._is_mirrored() # pylint: disable=protected-access - return not isinstance(val, values_lib.Mirrored) - -# The following mapping indicates the policy that you must use for a given -# variable `synchronization` and `aggregation` pair. -# AutoPolicy is used for: -# (synchronization=Auto, aggregation=None) -# OnWritePolicy is used for: -# (synchronization=Auto, aggregation=SUM,MEAN,ONLY_FIRST_REPLICA) -# (synchronization=ON_WRITE, aggregation=NONE,SUM,MEAN,ONLY_FIRST_REPLICA) -# OnReadPolicy is used for: -# (synchronization=ON_READ, aggregation=NONE,SUM,MEAN,ONLY_FIRST_REPLICA) -VARIABLE_POLICY_MAPPING = { - vs.VariableSynchronization.AUTO: values_lib.AutoPolicy, - vs.VariableSynchronization.ON_WRITE: values_lib.OnWritePolicy, - vs.VariableSynchronization.ON_READ: values_lib.OnReadPolicy, -} - -VARIABLE_CLASS_MAPPING = { - "VariableClass": values_lib.DistributedVariable, - vs.VariableSynchronization.AUTO: values_lib.MirroredVariable, - vs.VariableSynchronization.ON_WRITE: values_lib.MirroredVariable, - vs.VariableSynchronization.ON_READ: values_lib.SyncOnReadVariable, -} - -TPU_VARIABLE_POLICY_MAPPING = { - vs.VariableSynchronization.AUTO: tpu_values_lib.TPUAutoPolicy, - vs.VariableSynchronization.ON_WRITE: tpu_values_lib.TPUOnWritePolicy, - vs.VariableSynchronization.ON_READ: tpu_values_lib.TPUOnReadPolicy, -} - -TPU_VARIABLE_CLASS_MAPPING = { - "VariableClass": tpu_values_lib.TPUDistributedVariable, - vs.VariableSynchronization.AUTO: tpu_values_lib.TPUMirroredVariable, - vs.VariableSynchronization.ON_WRITE: tpu_values_lib.TPUMirroredVariable, - vs.VariableSynchronization.ON_READ: tpu_values_lib.TPUSyncOnReadVariable, -} diff --git a/tensorflow/python/distribute/mirrored_strategy.py b/tensorflow/python/distribute/mirrored_strategy.py index 5323f6131ee..b424f798476 100644 --- a/tensorflow/python/distribute/mirrored_strategy.py +++ b/tensorflow/python/distribute/mirrored_strategy.py @@ -319,9 +319,6 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1): if ops.executing_eagerly_outside_functions(): self.experimental_enable_get_next_as_optional = True - # Flag to turn on VariablePolicy. - self._use_var_policy = False - def _initialize_strategy(self, devices): # The _initialize_strategy method is intended to be used by distribute # coordinator as well. @@ -465,8 +462,7 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1): return distribute_utils.create_mirrored_variable( self._container_strategy(), _real_mirrored_creator, - distribute_utils.VARIABLE_CLASS_MAPPING, - distribute_utils.VARIABLE_POLICY_MAPPING, **kwargs) + values.MirroredVariable, values.SyncOnReadVariable, **kwargs) def _validate_colocate_with_variable(self, colocate_with_variable): distribute_utils.validate_colocate_distributed_variable( @@ -632,10 +628,10 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1): return self._cross_device_ops or self._inferred_cross_device_ops def _reduce_to(self, reduce_op, value, destinations, experimental_hints): - if (distribute_utils.is_mirrored(value) and + if (isinstance(value, values.Mirrored) and reduce_op == reduce_util.ReduceOp.MEAN): return value - assert not distribute_utils.is_mirrored(value) + assert not isinstance(value, values.Mirrored) if not isinstance(value, values.DistributedValues): # This function handles reducing values that are not PerReplica or # Mirrored values. For example, the same value could be present on all @@ -690,12 +686,10 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1): def read_var(self, replica_local_var): """Read the aggregate value of a replica-local variable.""" - # pylint: disable=protected-access - if values._is_sync_on_read(replica_local_var): - return replica_local_var._get_cross_replica() - assert values._is_mirrored(replica_local_var) - return array_ops.identity(replica_local_var._get()) - # pylint: enable=protected-access + if isinstance(replica_local_var, values.SyncOnReadVariable): + return replica_local_var._get_cross_replica() # pylint: disable=protected-access + assert isinstance(replica_local_var, values.Mirrored) + return array_ops.identity(replica_local_var._get()) # pylint: disable=protected-access def _local_results(self, val): if isinstance(val, values.DistributedValues): diff --git a/tensorflow/python/distribute/mirrored_variable_test.py b/tensorflow/python/distribute/mirrored_variable_test.py index 03d697fe1eb..8e7d674947e 100644 --- a/tensorflow/python/distribute/mirrored_variable_test.py +++ b/tensorflow/python/distribute/mirrored_variable_test.py @@ -379,7 +379,8 @@ class MirroredVariableCreationTest(test.TestCase): with distribution.scope(): with self.assertRaisesRegex( ValueError, "`NONE` variable synchronization mode is not " - "supported with "): + "supported with `Mirrored` distribution strategy. Please change " + "the `synchronization` for variable: v"): variable_scope.get_variable( "v", [1], synchronization=variable_scope.VariableSynchronization.NONE) @@ -388,7 +389,8 @@ class MirroredVariableCreationTest(test.TestCase): with distribution.scope(): with self.assertRaisesRegex( ValueError, "`NONE` variable synchronization mode is not " - "supported with "): + "supported with `Mirrored` distribution strategy. Please change " + "the `synchronization` for variable: v"): variable_scope.variable( 1.0, name="v", diff --git a/tensorflow/python/distribute/tpu_strategy.py b/tensorflow/python/distribute/tpu_strategy.py index bad6e6aa39f..8e5ef061dcf 100644 --- a/tensorflow/python/distribute/tpu_strategy.py +++ b/tensorflow/python/distribute/tpu_strategy.py @@ -544,9 +544,6 @@ class TPUExtended(distribute_lib.StrategyExtendedV1): context.async_wait() atexit.register(async_wait) - # Flag to turn on VariablePolicy - self._use_var_policy = False - def _validate_colocate_with_variable(self, colocate_with_variable): distribute_utils. validate_colocate(colocate_with_variable, self) @@ -873,8 +870,8 @@ class TPUExtended(distribute_lib.StrategyExtendedV1): return distribute_utils.create_mirrored_variable( self._container_strategy(), _real_mirrored_creator, - distribute_utils.TPU_VARIABLE_CLASS_MAPPING, - distribute_utils.TPU_VARIABLE_POLICY_MAPPING, **kwargs) + tpu_values.TPUMirroredVariable, tpu_values.TPUSyncOnReadVariable, + **kwargs) def _reduce_to(self, reduce_op, value, destinations, experimental_hints): if (isinstance(value, values.DistributedValues) or diff --git a/tensorflow/python/distribute/tpu_values.py b/tensorflow/python/distribute/tpu_values.py index ce6d2e7029b..33885531966 100644 --- a/tensorflow/python/distribute/tpu_values.py +++ b/tensorflow/python/distribute/tpu_values.py @@ -197,58 +197,10 @@ def enclosing_tpu_context(): return None -class TPUDistributedVariable(TPUVariableMixin, values.DistributedVariable): - """DistributedVariable subclass for TPUStrategy.""" - - def _is_mirrored(self): - self._policy._is_mirrored() # pylint: disable=protected-access - - def assign_sub(self, value, use_locking=False, name=None, read_value=True): - return self._policy.assign_sub( - self, value, use_locking=use_locking, name=name, read_value=read_value) - - def assign_add(self, value, use_locking=False, name=None, read_value=True): - return self._policy.assign_add( - self, value, use_locking=use_locking, name=name, read_value=read_value) - - def assign(self, value, use_locking=False, name=None, read_value=True): - return self._policy.assign( - self, value, use_locking=use_locking, name=name, read_value=read_value) - - def scatter_sub(self, sparse_delta, use_locking=False, name=None): - return self._policy.scatter_sub( - self, sparse_delta, use_locking=use_locking, name=name) - - def scatter_add(self, sparse_delta, use_locking=False, name=None): - return self._policy.scatter_add( - self, sparse_delta, use_locking=use_locking, name=name) - - def scatter_mul(self, sparse_delta, use_locking=False, name=None): - return self._policy.scatter_mul( - self, sparse_delta, use_locking=use_locking, name=name) - - def scatter_div(self, sparse_delta, use_locking=False, name=None): - return self._policy.scatter_div( - self, sparse_delta, use_locking=use_locking, name=name) - - def scatter_min(self, sparse_delta, use_locking=False, name=None): - return self._policy.scatter_min( - self, sparse_delta, use_locking=use_locking, name=name) - - def scatter_max(self, sparse_delta, use_locking=False, name=None): - return self._policy.scatter_max( - self, sparse_delta, use_locking=use_locking, name=name) - - def scatter_update(self, sparse_delta, use_locking=False, name=None): - return self._policy.scatter_update( - self, sparse_delta, use_locking=use_locking, name=name) - - class TPUMirroredVariable(TPUVariableMixin, values.MirroredVariable): """Holds a map from replica to TPU variables whose values are kept in sync.""" - def assign_sub(self, value, use_locking=False, name=None, - read_value=True): + def assign_sub(self, value, use_locking=False, name=None, read_value=True): if (enclosing_tpu_context() and self.aggregation == variable_scope.VariableAggregation.NONE): return _make_raw_assign_fn( @@ -258,11 +210,17 @@ class TPUMirroredVariable(TPUVariableMixin, values.MirroredVariable): use_locking=use_locking, name=name, read_value=read_value) - return assign_sub(self, value, use_locking=use_locking, name=name, - read_value=read_value) - def assign_add(self, value, use_locking=False, name=None, - read_value=True): + assign_sub_fn = _make_raw_assign_fn( + gen_resource_variable_ops.assign_sub_variable_op) + return self._update( + update_fn=assign_sub_fn, + value=value, + use_locking=use_locking, + name=name, + read_value=read_value) + + def assign_add(self, value, use_locking=False, name=None, read_value=True): if (enclosing_tpu_context() and self.aggregation == variable_scope.VariableAggregation.NONE): return _make_raw_assign_fn( @@ -272,21 +230,34 @@ class TPUMirroredVariable(TPUVariableMixin, values.MirroredVariable): use_locking=use_locking, name=name, read_value=read_value) - return assign_add(self, value, use_locking=use_locking, name=name, - read_value=read_value) + + assign_add_fn = _make_raw_assign_fn( + gen_resource_variable_ops.assign_add_variable_op) + return self._update( + update_fn=assign_add_fn, + value=value, + use_locking=use_locking, + name=name, + read_value=read_value) def assign(self, value, use_locking=False, name=None, read_value=True): if (enclosing_tpu_context() and self.aggregation == variable_scope.VariableAggregation.NONE): - return _make_raw_assign_fn( - gen_resource_variable_ops.assign_variable_op)( - self, - value=value, - use_locking=use_locking, - name=name, - read_value=read_value) - return assign(self, value, use_locking=use_locking, name=name, - read_value=read_value) + return _make_raw_assign_fn(gen_resource_variable_ops.assign_variable_op)( + self, + value=value, + use_locking=use_locking, + name=name, + read_value=read_value) + + assign_fn = _make_raw_assign_fn( + gen_resource_variable_ops.assign_variable_op) + return self._update( + update_fn=assign_fn, + value=value, + use_locking=use_locking, + name=name, + read_value=read_value) def scatter_sub(self, *args, **kwargs): raise NotImplementedError @@ -341,220 +312,3 @@ class TPUSyncOnReadVariable(TPUVariableMixin, values.SyncOnReadVariable): def _is_mirrored(self): return False - - -# Common method between AutoPolicy, OnWrite and Mirrored variables. -def assign_sub(var, value, use_locking=False, name=None, read_value=True): - assign_sub_fn = _make_raw_assign_fn( - gen_resource_variable_ops.assign_sub_variable_op) - return var._update( # pylint: disable=protected-access - update_fn=assign_sub_fn, - value=value, - use_locking=use_locking, - name=name, - read_value=read_value) - - -def assign_add(var, value, use_locking=False, name=None, read_value=True): - assign_add_fn = _make_raw_assign_fn( - gen_resource_variable_ops.assign_add_variable_op) - return var._update( # pylint: disable=protected-access - update_fn=assign_add_fn, - value=value, - use_locking=use_locking, - name=name, - read_value=read_value) - - -def assign(var, value, use_locking=False, name=None, read_value=True): - assign_fn = _make_raw_assign_fn( - gen_resource_variable_ops.assign_variable_op) - return var._update( # pylint: disable=protected-access - update_fn=assign_fn, - value=value, - use_locking=use_locking, - name=name, - read_value=read_value) - - -class TPUAutoPolicy(values.AutoPolicy): - """Policy defined for `tf.VariableSynchronization.AUTO` synchronization. - - This policy is created when `synchronization` is set to - `tf.VariableSynchronization.AUTO` and `aggregation` is set to - `tf.VariableAggregation.NONE` when creating a `tf.Variable` in `tf.distribute` - scope. - """ - - def assign_sub(self, var, value, use_locking=False, name=None, - read_value=True): - if enclosing_tpu_context(): - return _make_raw_assign_fn( - gen_resource_variable_ops.assign_sub_variable_op)( - var, - value=value, - use_locking=use_locking, - name=name, - read_value=read_value) - return assign_sub(var, value, use_locking=use_locking, name=name, - read_value=read_value) - - def assign_add(self, var, value, use_locking=False, name=None, - read_value=True): - if enclosing_tpu_context(): - return _make_raw_assign_fn( - gen_resource_variable_ops.assign_add_variable_op)( - var, - value=value, - use_locking=use_locking, - name=name, - read_value=read_value) - return assign_add(var, value, use_locking=use_locking, name=name, - read_value=read_value) - - def assign(self, var, value, use_locking=False, name=None, read_value=True): - if enclosing_tpu_context(): - return _make_raw_assign_fn( - gen_resource_variable_ops.assign_variable_op)( - var, - value=value, - use_locking=use_locking, - name=name, - read_value=read_value) - return assign(var, value, use_locking=use_locking, name=name, - read_value=read_value) - - def scatter_sub(self, *args, **kwargs): - raise NotImplementedError - - def scatter_add(self, *args, **kwargs): - raise NotImplementedError - - def scatter_max(self, *args, **kwargs): - raise NotImplementedError - - def scatter_min(self, *args, **kwargs): - raise NotImplementedError - - def scatter_mul(self, *args, **kwargs): - raise NotImplementedError - - def scatter_div(self, *args, **kwargs): - raise NotImplementedError - - def scatter_update(self, *args, **kwargs): - raise NotImplementedError - - def _is_mirrored(self): - return True - - -class TPUOnWritePolicy(values.OnWritePolicy): - """Policy defined for `tf.VariableSynchronization.ON_WRITE` synchronization. - - This policy is created when the following `synchronization` and - `aggregation` parameters are specified when creating a `tf.Variable` in - `tf.distribute` scope: - * `synchronization` is equal to `tf.VariableSynchronization.AUTO` and - aggregation can be any of the following `tf.VariableAggregation` enum - values such as `SUM`, `MEAN` or `ONLY_FIRST_REPLICA`. - * `synchronization` is equal to `tf.VariableSynchronization.ON_WRITE` and - aggregation can be any of the following `tf.VariableAggregation` enum - values such as `NONE`, `SUM`, `MEAN` or `ONLY_FIRST_REPLICA`. - """ - - def assign_sub(self, var, value, use_locking=False, name=None, - read_value=True): - return assign_sub(var, value, use_locking=use_locking, name=name, - read_value=read_value) - - def assign_add(self, var, value, use_locking=False, name=None, - read_value=True): - return assign_add(var, value, use_locking=use_locking, name=name, - read_value=read_value) - - def assign(self, var, value, use_locking=False, name=None, read_value=True): - return assign(var, value, use_locking=use_locking, name=name, - read_value=read_value) - - def scatter_sub(self, *args, **kwargs): - raise NotImplementedError - - def scatter_add(self, *args, **kwargs): - raise NotImplementedError - - def scatter_max(self, *args, **kwargs): - raise NotImplementedError - - def scatter_min(self, *args, **kwargs): - raise NotImplementedError - - def scatter_mul(self, *args, **kwargs): - raise NotImplementedError - - def scatter_div(self, *args, **kwargs): - raise NotImplementedError - - def scatter_update(self, *args, **kwargs): - raise NotImplementedError - - def _is_mirrored(self): - return True - - -class TPUOnReadPolicy(values.OnReadPolicy): - """Policy defined for `tf.VariableSynchronization.ON_READ` synchronization. - - This policy is created when `synchronization` is set to - `tf.VariableSynchronization.ON_READ` and `aggregation` is set to any of the - values allowed by the `tf.VariableAggregation` enum such as `NONE`, `SUM`, - `MEAN` or `ONLY_FIRST_REPLICA`when creating a `tf.Variable` in `tf.distribute` - scope. - """ - - def assign_sub(self, var, *args, **kwargs): - if enclosing_tpu_context() is None: - return super(TPUOnReadPolicy, self).assign_sub(var, *args, **kwargs) - else: - return _make_raw_assign_fn( - gen_resource_variable_ops.assign_sub_variable_op)(var, *args, - **kwargs) - - def assign_add(self, var, *args, **kwargs): - if enclosing_tpu_context() is None: - return super(TPUOnReadPolicy, self).assign_add(var, *args, **kwargs) - else: - return _make_raw_assign_fn( - gen_resource_variable_ops.assign_add_variable_op)(var, *args, - **kwargs) - - def assign(self, var, *args, **kwargs): - if enclosing_tpu_context() is None: - return super(TPUOnReadPolicy, self).assign(var, *args, **kwargs) - else: - return _make_raw_assign_fn(gen_resource_variable_ops.assign_variable_op)( - var, *args, **kwargs) - - def _is_mirrored(self): - return False - - def scatter_sub(self, *args, **kwargs): - raise NotImplementedError - - def scatter_add(self, *args, **kwargs): - raise NotImplementedError - - def scatter_max(self, *args, **kwargs): - raise NotImplementedError - - def scatter_min(self, *args, **kwargs): - raise NotImplementedError - - def scatter_mul(self, *args, **kwargs): - raise NotImplementedError - - def scatter_div(self, *args, **kwargs): - raise NotImplementedError - - def scatter_update(self, *args, **kwargs): - raise NotImplementedError diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py index 7dedbee2041..50cd8d73e73 100644 --- a/tensorflow/python/distribute/values.py +++ b/tensorflow/python/distribute/values.py @@ -700,49 +700,49 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, def scatter_sub(self, sparse_delta, use_locking=False, name=None): if self._policy: - return self._policy.scatter_sub( + self._policy.scatter_sub( self, sparse_delta, use_locking=use_locking, name=name) return values_util.scatter_sub( self, sparse_delta, use_locking=use_locking, name=name) def scatter_add(self, sparse_delta, use_locking=False, name=None): if self._policy: - return self._policy.scatter_add( + self._policy.scatter_add( self, sparse_delta, use_locking=use_locking, name=name) return values_util.scatter_add( self, sparse_delta, use_locking=use_locking, name=name) def scatter_mul(self, sparse_delta, use_locking=False, name=None): if self._policy: - return self._policy.scatter_mul( + self._policy.scatter_mul( self, sparse_delta, use_locking=use_locking, name=name) return values_util.scatter_mul( self, sparse_delta, use_locking=use_locking, name=name) def scatter_div(self, sparse_delta, use_locking=False, name=None): if self._policy: - return self._policy.scatter_div( + self._policy.scatter_div( self, sparse_delta, use_locking=use_locking, name=name) return values_util.scatter_div( self, sparse_delta, use_locking=use_locking, name=name) def scatter_min(self, sparse_delta, use_locking=False, name=None): if self._policy: - return self._policy.scatter_min( + self._policy.scatter_min( self, sparse_delta, use_locking=use_locking, name=name) return values_util.scatter_min( self, sparse_delta, use_locking=use_locking, name=name) def scatter_max(self, sparse_delta, use_locking=False, name=None): if self._policy: - return self._policy.scatter_max( + self._policy.scatter_max( self, sparse_delta, use_locking=use_locking, name=name) return values_util.scatter_max( self, sparse_delta, use_locking=use_locking, name=name) def scatter_update(self, sparse_delta, use_locking=False, name=None): if self._policy: - return self._policy.scatter_update( + self._policy.scatter_update( self, sparse_delta, use_locking=use_locking, name=name) return values_util.scatter_update( self, sparse_delta, use_locking=use_locking, name=name) diff --git a/tensorflow/python/distribute/values_test.py b/tensorflow/python/distribute/values_test.py index e445c1195be..1c090737d8f 100644 --- a/tensorflow/python/distribute/values_test.py +++ b/tensorflow/python/distribute/values_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import copy +import itertools import os from absl.testing import parameterized @@ -29,12 +30,14 @@ from tensorflow.python import tf2 from tensorflow.python.distribute import combinations from tensorflow.python.distribute import distribute_lib from tensorflow.python.distribute import distribute_utils +from tensorflow.python.distribute import distribution_strategy_context from tensorflow.python.distribute import packed_distributed_variable as packed from tensorflow.python.distribute import strategy_combinations from tensorflow.python.distribute import test_util as ds_test_util from tensorflow.python.distribute import tpu_strategy from tensorflow.python.distribute import tpu_values from tensorflow.python.distribute import values as values_lib +from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver from tensorflow.python.eager import context from tensorflow.python.eager import def_function from tensorflow.python.eager import test @@ -48,56 +51,19 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib from tensorflow.python.saved_model import save_context from tensorflow.python.saved_model import save_options +from tensorflow.python.tpu import tpu_strategy_util from tensorflow.python.training import saver as saver_lib from tensorflow.python.training.tracking import util as trackable_utils from tensorflow.python.types import core from tensorflow.python.util import nest -def _device_str(d): - return "/device:GPU:" + str(d) - - -def _nested_value(d): - return ("a" + d, ["b" + d, {"c": "d" + d, "e": "f" + d}, "g" + d], "h" + d) - - -def _make_mirrored_val(init_val=5.0): - v = [] - devices = ["/device:GPU:0", "/device:CPU:0"] - for d, _ in zip(devices, ["v", "v/replica"]): - with ops.device(d): - v.append(constant_op.constant(init_val)) - return values_lib.Mirrored(v) - - -def _make_mirrored(): - v = [] - devices = ["/device:GPU:0", "/device:CPU:0"] - for d, n, init in zip(devices, ["v", "v/replica"], [1., 2.]): - with ops.device(d): - v.append(variable_scope.get_variable( - name=n, initializer=init, use_resource=True)) - mirrored = values_lib.MirroredVariable( - None, v, variable_scope.VariableAggregation.SUM) - return mirrored - - -def mirrored_and_tpu_strategy_combinations(): - return combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.tpu_strategy, - strategy_combinations.tpu_strategy_packed_var, - ], - mode=["graph", "eager"]) - - class DistributedValuesTest(test.TestCase, parameterized.TestCase): def testGetEager(self): @@ -397,6 +363,45 @@ class DistributedDelegateTest(test.TestCase): self.assertEqual(v.x, v_deep_copy.x) +def _device_str(d): + return "/device:GPU:" + str(d) + + +def _nested_value(d): + return ("a" + d, ["b" + d, {"c": "d" + d, "e": "f" + d}, "g" + d], "h" + d) + + +def _make_mirrored_val(init_val=5.0): + v = [] + devices = ["/device:GPU:0", "/device:CPU:0"] + for d, _ in zip(devices, ["v", "v/replica"]): + with ops.device(d): + v.append(constant_op.constant(init_val)) + return values_lib.Mirrored(v) + + +def _make_mirrored(): + v = [] + devices = ["/device:GPU:0", "/device:CPU:0"] + for d, n, init in zip(devices, ["v", "v/replica"], [1., 2.]): + with ops.device(d): + v.append(variable_scope.get_variable( + name=n, initializer=init, use_resource=True)) + mirrored = values_lib.MirroredVariable( + None, v, variable_scope.VariableAggregation.SUM) + return mirrored + + +def mirrored_and_tpu_strategy_combinations(): + return combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + strategy_combinations.tpu_strategy, + strategy_combinations.tpu_strategy_packed_var, + ], + mode=["graph", "eager"]) + + @combinations.generate( combinations.combine( distribution=[ @@ -791,6 +796,507 @@ class MirroredVariableTest(test.TestCase, parameterized.TestCase): save_path = self._save_normal() self._restore_mirrored(save_path) + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_one_gpu, + ], + mode=["graph"])) + def testFetchAMirroredVariable(self, distribution): + with self.session(graph=ops.Graph()) as sess, distribution.scope(): + with ops.device("/device:GPU:0"): + v = variable_scope.get_variable( + name="v", initializer=1., use_resource=True) + mirrored = values_lib.MirroredVariable( + distribution, (v,), variable_scope.VariableAggregation.MEAN) + sess.run(variables_lib.global_variables_initializer()) + sess.run({"complicated": mirrored}) + + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + strategy_combinations.tpu_strategy, + strategy_combinations.tpu_strategy_packed_var, + ], + mode=["eager"])) + def testAssignValueInReplicaContextWithoutAggregation(self, distribution): + with distribution.scope(): + v = variables_lib.Variable(1.0, name="foo") + + @def_function.function + def mytest(): + def model_fn(): + v.assign(5.0) + return v.read_value() + + return distribution.run(model_fn) + + mytest() + self.assertAllEqual([5.0, 5.0], self.evaluate(v.values)) + + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_one_cpu, + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + strategy_combinations.tpu_strategy, + strategy_combinations.tpu_strategy_packed_var, + ], + mode=["graph", "eager"])) + def testValueInReplicaContext(self, distribution): + with distribution.scope(): + v = variables_lib.Variable( + 1., aggregation=variables_lib.VariableAggregation.MEAN) + self.evaluate(variables_lib.global_variables_initializer()) + + @def_function.function + def f(): + with ops.control_dependencies([v.assign_add(1.)]): + return v.value() + + results = self.evaluate( + distribution.experimental_local_results( + distribution.run(f))) + for value in results: + self.assertEqual(2., value) + + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_one_cpu, + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + strategy_combinations.tpu_strategy, + strategy_combinations.tpu_strategy_packed_var, + ], + mode=["graph", "eager"])) + def testAssignOutOfScope(self, distribution): + with distribution.scope(): + mirrored = variables_lib.Variable(1.) + self.evaluate(mirrored.assign(3.)) + self.assertEqual(self.evaluate(mirrored.read_value()), 3.) + for component in mirrored.values: + self.assertEqual(self.evaluate(component.read_value()), 3.) + + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + ], + mode=["graph", "eager"])) + def testAssignAggregationMeanDTypeNonFloat(self, distribution): + with distribution.scope(): + v = variables_lib.Variable( + 1, + aggregation=variable_scope.VariableAggregation.MEAN, + dtype=dtypes.int32) + self.evaluate(v.initializer) + + @def_function.function + def assign(): + ctx = distribution_strategy_context.get_replica_context() + return v.assign(ctx.replica_id_in_sync_group) + + # disallow assign() with distributed value in replica context. + with self.assertRaisesRegex(ValueError, + "Cannot update non-float variables"): + self.evaluate( + distribution.experimental_local_results( + distribution.run(assign))) + + # allow assign() with same value in replica context. + @def_function.function + def assign_same(): + return v.assign(2) + + self.evaluate( + distribution.experimental_local_results( + distribution.run(assign_same))) + self.assertEqual(self.evaluate(v.read_value()), 2) + + # allow assign() with mirrored variable in replica context. + with distribution.scope(): + v2 = variables_lib.Variable( + 3, + aggregation=variable_scope.VariableAggregation.SUM, + dtype=dtypes.int32) + self.evaluate(v2.initializer) + + @def_function.function + def assign_mirrored(): + return v.assign(v2) + + self.evaluate( + distribution.experimental_local_results( + distribution.run(assign_mirrored))) + self.assertEqual(self.evaluate(v.read_value()), 3) + + # allow assign() in cross replica context. + with distribution.scope(): + self.evaluate(v.assign(4)) + self.assertEqual(self.evaluate(v.read_value()), 4) + + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + strategy_combinations.tpu_strategy, + strategy_combinations.tpu_strategy_packed_var, + ], + mode=["eager"])) + def testInitializedToSameValueInsideEagerRun(self, distribution): + v = [None] + + @def_function.function + def step(): + + def f(): + if v[0] is None: + v[0] = variables_lib.Variable(random_ops.random_normal([])) + + distribution.run(f) + + context.set_global_seed(None) + step() + vals = self.evaluate(v[0].values) + self.assertAllEqual(vals[0], vals[1]) + + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_one_cpu, + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + strategy_combinations.tpu_strategy, + strategy_combinations.tpu_strategy_packed_var, + ], + mode=["graph", "eager"])) + def testAggregationOnlyFirstReplica(self, distribution): + with distribution.scope(): + v = variable_scope.variable( + 15., + synchronization=variables_lib.VariableSynchronization.ON_WRITE, + aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) + self.evaluate(variables_lib.global_variables_initializer()) + + @def_function.function + def assign(): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + return v.assign(math_ops.cast(replica_id, dtypes.float32)) + per_replica_results = self.evaluate(distribution.experimental_local_results( + distribution.run(assign))) + # The per-replica values should always match the first replicas value. + self.assertAllEqual( + array_ops.zeros(distribution.num_replicas_in_sync, dtypes.float32), + per_replica_results) + + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + strategy_combinations.tpu_strategy, + strategy_combinations.tpu_strategy_packed_var, + ], + mode=["eager"])) + def testInitScope(self, distribution): + + class C(object): + pass + + obj = C() + obj.w = None + obj.v = None + + @def_function.function + def assign(): + with ops.init_scope(): + if obj.w is None: + obj.w = variables_lib.Variable( + 0, aggregation=variables_lib.VariableAggregation.MEAN) + obj.v = variables_lib.Variable( + obj.w.read_value(), + aggregation=variables_lib.VariableAggregation.MEAN) + + return obj.v.assign_add(2) + + per_replica_results = self.evaluate( + distribution.experimental_local_results(distribution.run(assign))) + self.assertAllEqual([2, 2], per_replica_results) + + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + strategy_combinations.tpu_strategy, + ], + mode=["eager"])) + def testOperatorOverride(self, distribution): + + with distribution.scope(): + v = variable_scope.variable( + 1, aggregation=variables_lib.VariableAggregation.MEAN) + + self.assertEqual(2, self.evaluate(v + 1)) + + @def_function.function + def add(): + return v + 1 + + per_replica_results = self.evaluate( + distribution.experimental_local_results(distribution.run(add))) + self.assertAllEqual([2, 2], per_replica_results) + + @combinations.generate(mirrored_and_tpu_strategy_combinations()) + def testAssignAdd(self, distribution): + with distribution.scope(): + v = variable_scope.variable( + 1, aggregation=variables_lib.VariableAggregation.MEAN) + self.evaluate(variables_lib.global_variables_initializer()) + + @def_function.function + def assign(): + return v.assign_add(2) + + per_replica_results = self.evaluate( + distribution.experimental_local_results(distribution.run(assign))) + # The per-replica values should always match the first replicas value. + self.assertAllEqual([3, 3], per_replica_results) + + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + ], + mode=["graph", "eager"])) + def testScatterSub(self, distribution): + with distribution.scope(): + v = variables_lib.Variable( + [0., 0., 0.], aggregation=variables_lib.VariableAggregation.MEAN) + self.evaluate(v.initializer) + + @def_function.function + def scatter_sub(): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + value = indexed_slices.IndexedSlices( + values=array_ops.stack([ + math_ops.cast(replica_id, dtypes.float32), + math_ops.cast(replica_id + 1, dtypes.float32) + ]), + indices=array_ops.stack([replica_id, replica_id + 1]), + dense_shape=(3,)) + return v.scatter_sub(value) + + per_replica_results = self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_sub))) + self.assertAllEqual([[0., -1., -1.], [0., -1., -1.]], per_replica_results) + + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + ], + mode=["graph", "eager"])) + def testScatterAdd(self, distribution): + with distribution.scope(): + v = variables_lib.Variable( + [0, 0, 0], aggregation=variables_lib.VariableAggregation.SUM) + self.evaluate(v.initializer) + + @def_function.function + def scatter_add(): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + value = indexed_slices.IndexedSlices( + values=array_ops.stack([replica_id, replica_id + 1]), + indices=array_ops.stack([replica_id, replica_id + 1]), + dense_shape=(3,)) + return v.scatter_add(value) + + per_replica_results = self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_add))) + self.assertAllEqual([[0, 2, 2], [0, 2, 2]], per_replica_results) + + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + ], + mode=["graph", "eager"])) + def testScatterDiv(self, distribution): + with distribution.scope(): + v = variables_lib.Variable( + [1, 6, 1], aggregation=variables_lib.VariableAggregation.SUM) + self.evaluate(v.initializer) + + @def_function.function + def scatter_div(): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + value = indexed_slices.IndexedSlices( + values=array_ops.reshape(replica_id + 2, [1]), + indices=array_ops.reshape(replica_id, [1]), + dense_shape=(3,)) + return v.scatter_div(value) + + per_replica_results = self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_div))) + self.assertAllEqual([[0, 2, 1], [0, 2, 1]], per_replica_results) + + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + ], + mode=["graph", "eager"])) + def testScatterMul(self, distribution): + with distribution.scope(): + v = variables_lib.Variable( + [2., 1., 1.], aggregation=variables_lib.VariableAggregation.MEAN) + self.evaluate(v.initializer) + + @def_function.function + def scatter_mul(): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + value = indexed_slices.IndexedSlices( + values=array_ops.reshape( + math_ops.cast(replica_id + 2, dtypes.float32), [1]), + indices=array_ops.reshape(replica_id, [1]), + dense_shape=(3,)) + return v.scatter_mul(value) + + per_replica_results = self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_mul))) + self.assertAllClose([[2., 1.5, 1.], [2., 1.5, 1.]], per_replica_results) + + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + ], + mode=["graph", "eager"])) + def testScatterMin(self, distribution): + with distribution.scope(): + v1 = variables_lib.Variable( + [0, 2, 0], aggregation=variables_lib.VariableAggregation.SUM) + v2 = variables_lib.Variable( + [0, 2, 0], + aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) + self.evaluate(variables_lib.global_variables_initializer()) + + @def_function.function + def scatter_min(v): + value = indexed_slices.IndexedSlices( + values=array_ops.identity([1]), + indices=array_ops.identity([1]), + dense_shape=(3,)) + return v.scatter_min(value) + + with self.assertRaisesRegex(NotImplementedError, "scatter_min.*"): + self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_min, args=(v1,)))) + + per_replica_results = self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_min, args=(v2,)))) + self.assertAllClose([[0, 1, 0], [0, 1, 0]], per_replica_results) + + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + ], + mode=["graph", "eager"])) + def testScatterMax(self, distribution): + with distribution.scope(): + v1 = variables_lib.Variable( + [0, 0, 0], aggregation=variables_lib.VariableAggregation.SUM) + v2 = variables_lib.Variable( + [0, 0, 0], + aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) + self.evaluate(variables_lib.global_variables_initializer()) + + @def_function.function + def scatter_max(v): + value = indexed_slices.IndexedSlices( + values=array_ops.identity([1]), + indices=array_ops.identity([0]), + dense_shape=(3,)) + return v.scatter_max(value) + + with self.assertRaisesRegex(NotImplementedError, "scatter_max.*"): + self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_max, args=(v1,)))) + + per_replica_results = self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_max, args=(v2,)))) + self.assertAllClose([[1, 0, 0], [1, 0, 0]], per_replica_results) + + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + ], + mode=["graph", "eager"])) + def testScatterUpdate(self, distribution): + with distribution.scope(): + v1 = variables_lib.Variable( + [0, 0, 0], aggregation=variables_lib.VariableAggregation.SUM) + v2 = variables_lib.Variable( + [0, 0, 0], + aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) + self.evaluate(variables_lib.global_variables_initializer()) + + @def_function.function + def scatter_update(v): + value = indexed_slices.IndexedSlices( + values=array_ops.identity([3]), + indices=array_ops.identity([1]), + dense_shape=(3,)) + return v.scatter_update(value) + + with self.assertRaisesRegex(NotImplementedError, "scatter_update.*"): + self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_update, args=(v1,)))) + + per_replica_results = self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_update, args=(v2,)))) + self.assertAllClose([[0, 3, 0], [0, 3, 0]], per_replica_results) + + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + ], + mode=["graph", "eager"])) + def testScatterOpsInCrossReplicaContext(self, distribution): + with distribution.scope(): + v1 = variables_lib.Variable( + [1, 1, 1], aggregation=variables_lib.VariableAggregation.SUM) + v2 = variables_lib.Variable([1, 1, 1]) + self.evaluate(variables_lib.global_variables_initializer()) + + value = indexed_slices.IndexedSlices( + values=array_ops.identity([2]), + indices=array_ops.identity([0]), + dense_shape=(3,)) + with distribution.scope(): + self.evaluate(v1.scatter_add(value)) + self.assertAllEqual([3, 1, 1], self.evaluate(v1.read_value())) + + self.evaluate(v2.scatter_min(value)) + self.assertAllEqual([1, 1, 1], self.evaluate(v2.read_value())) + _TPU_STRATEGIES = (tpu_strategy.TPUStrategy, tpu_strategy.TPUStrategyV1) @@ -815,6 +1321,38 @@ def _make_replica_local(method, strategy=None): return v, replica_local +class SyncOnReadVariablePropertiesTest(test.TestCase): + + config = config_pb2.ConfigProto() + config.allow_soft_placement = True + + @test_util.run_in_graph_and_eager_modes(config=config) + def testProperties(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + v, replica_local = _make_replica_local( + variable_scope.VariableAggregation.SUM) + + self.assertEqual(v[0].constraint, replica_local.constraint) + self.assertEqual(v[0].name, replica_local.name) + self.assertEqual(v[0].dtype, replica_local.dtype) + self.assertEqual(v[0].shape, replica_local.shape) + self.assertEqual(variable_scope.VariableAggregation.SUM, + replica_local.aggregation) + + @test_util.run_v2_only + def testCanPassToDefFun(self): + @def_function.function + def add1(x): + return x + 1 + + v = variable_scope.get_variable( + name="v", initializer=[1.], use_resource=True) + replica_local = values_lib.SyncOnReadVariable( + None, (v,), variable_scope.VariableAggregation.MEAN) + self.assertEqual(2., self.evaluate(add1(replica_local))) + + # TODO(b/144432582): Add variable aggregation type to combinations to simplify # tests. def strategy_and_run_tf_function_combinations(): @@ -851,35 +1389,6 @@ class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase): save_path, _ = self._save_return_saver(sess, var) return save_path - config = config_pb2.ConfigProto() - config.allow_soft_placement = True - - @test_util.run_in_graph_and_eager_modes(config=config) - def testProperties(self): - if context.num_gpus() < 1 and context.executing_eagerly(): - self.skipTest("A GPU is not available for this test in eager mode.") - v, replica_local = _make_replica_local( - variable_scope.VariableAggregation.SUM) - - self.assertEqual(v[0].constraint, replica_local.constraint) - self.assertEqual(v[0].name, replica_local.name) - self.assertEqual(v[0].dtype, replica_local.dtype) - self.assertEqual(v[0].shape, replica_local.shape) - self.assertEqual(variable_scope.VariableAggregation.SUM, - replica_local.aggregation) - - @test_util.run_v2_only - def testCanPassToDefFun(self): - @def_function.function - def add1(x): - return x + 1 - - v = variable_scope.get_variable( - name="v", initializer=[1.], use_resource=True) - replica_local = values_lib.SyncOnReadVariable( - None, (v,), variable_scope.VariableAggregation.MEAN) - self.assertEqual(2., self.evaluate(add1(replica_local))) - @combinations.generate(mirrored_and_tpu_strategy_combinations()) def testTensorConversion(self, distribution): with context.graph_mode(): @@ -1076,6 +1585,453 @@ class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase): save_path = self._save_normal() self._restore_replica_local_sum(save_path, distribution) + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testAssign(self, distribution, experimental_run_tf_function): + + def assign(fn, v, update_value, cross_replica): + update_fn = lambda: getattr(v, fn)(update_value) + if cross_replica: + return update_fn() + else: + if experimental_run_tf_function: + update_fn = def_function.function(update_fn) + return distribution.experimental_local_results( + distribution.run(update_fn)) + + updates = [("assign", 1.), ("assign_add", 1.), ("assign_sub", -1.)] + aggregations = [ + variables_lib.VariableAggregation.NONE, + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + options = list( + x for x in itertools.product(updates, aggregations, [True, False])) + for update, aggregation, cross_replica in options: + # VariableAggregation.SUM in cross-replica mode is tested below, + # VariableAggregation.NONE in cross-replica mode is not supported. + if cross_replica and aggregation in [ + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.NONE, + ]: + continue + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + fn, update_value = update + self.evaluate(assign(fn, v, update_value, cross_replica)) + for component in v._values: + self.assertAllEqual(self.evaluate(component.read_value()), + self.evaluate(array_ops.ones_like(component))) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testAssignDtypeConversion(self, distribution, + experimental_run_tf_function): + + def assign(fn, v, update_value, cross_replica): + update_fn = lambda: getattr(v, fn)(update_value) + if cross_replica: + return update_fn() + else: + if experimental_run_tf_function: + update_fn = def_function.function(update_fn) + return distribution.experimental_local_results( + distribution.run(update_fn)) + + updates = [("assign", 1), ("assign_add", 1), ("assign_sub", -1)] + aggregations = [ + variables_lib.VariableAggregation.NONE, + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + options = list( + x for x in itertools.product(updates, aggregations, [True, False])) + for update, aggregation, cross_replica in options: + # VariableAggregation.SUM in cross-replica mode is tested below, + # VariableAggregation.NONE in cross-replica mode is not supported. + if cross_replica and aggregation in [ + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.NONE, + ]: + continue + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + fn, update_value = update + self.evaluate(assign(fn, v, update_value, cross_replica)) + for component in v._values: + self.assertAllEqual(self.evaluate(component.read_value()), + self.evaluate(array_ops.ones_like(component))) + + @combinations.generate(mirrored_and_tpu_strategy_combinations()) + def testAssignWithAggregationSum(self, distribution): + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=variables_lib.VariableAggregation.SUM) + self.evaluate(variables_lib.global_variables_initializer()) + self.evaluate(v.assign(1. * distribution.num_replicas_in_sync)) + for component in v._values: + self.assertAllEqual(self.evaluate(component.read_value()), + self.evaluate(array_ops.ones_like(component))) + + @combinations.generate(mirrored_and_tpu_strategy_combinations()) + def testAssignAddSubWithAggregationSum(self, distribution): + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=variables_lib.VariableAggregation.SUM) + self.evaluate(variables_lib.global_variables_initializer()) + with self.assertRaisesRegex( + ValueError, "SyncOnReadVariable does not support "): + self.evaluate(v.assign_add(1.)) + with self.assertRaisesRegex( + ValueError, "SyncOnReadVariable does not support "): + self.evaluate(v.assign_sub(1.)) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testReadValueInReplicaContext(self, distribution, + experimental_run_tf_function): + aggregations = [ + variables_lib.VariableAggregation.NONE, + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + for aggregation in aggregations: + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + if experimental_run_tf_function: + read_var_fn = def_function.function(v.read_value) + else: + read_var_fn = v.read_value + results = self.evaluate( + distribution.experimental_local_results( + distribution.run(read_var_fn))) + for component, value in zip(v._values, results): + self.assertAllEqual(self.evaluate(component.read_value()), value) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testReadValueInCrossReplicaContext(self, distribution, + experimental_run_tf_function): + aggregations = [ + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + for aggregation in aggregations: + if isinstance(distribution, _TPU_STRATEGIES): + resolver = tpu_cluster_resolver.TPUClusterResolver("") + tpu_strategy_util.initialize_tpu_system(resolver) + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + + def assign(v=v): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + return v.assign(math_ops.cast(replica_id, dtypes.float32)) + + if experimental_run_tf_function: + assign = def_function.function(assign) + + self.evaluate( + distribution.experimental_local_results(distribution.run(assign))) + num_replicas = distribution.num_replicas_in_sync + sum_of_replica_values = num_replicas * (num_replicas - 1) / 2. + if aggregation == variables_lib.VariableAggregation.SUM: + expected = sum_of_replica_values + elif aggregation == variables_lib.VariableAggregation.MEAN: + expected = sum_of_replica_values / num_replicas + else: + expected = 0 + self.assertEqual(expected, self.evaluate(v.read_value()), aggregation) + self.assertEqual(expected, self.evaluate(v.value()), aggregation) + self.assertEqual(expected, self.evaluate(v), aggregation) + self.assertEqual(expected, self.evaluate(array_ops.identity(v)), + aggregation) + + # TODO(b/145574622): Re-enable this test once ReduceOp argument is + # respected on GPUs. + @combinations.generate(strategy_and_run_tf_function_combinations()) + def disable_testAllReduce(self, distribution, + experimental_run_tf_function): + with distribution.scope(): + v = variable_scope.variable( + 2., + synchronization=variables_lib.VariableSynchronization.ON_WRITE, + aggregation=variables_lib.VariableAggregation.MEAN) + self.evaluate(variables_lib.global_variables_initializer()) + + def all_reduce(): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + return ctx.all_reduce("SUM", v) + math_ops.cast(replica_id, + dtypes.float32) + + if experimental_run_tf_function: + all_reduce = def_function.function(all_reduce) + + per_replica_results = self.evaluate( + distribution.experimental_local_results(distribution.run(all_reduce))) + expected_result = [] + for i in range(distribution.num_replicas_in_sync): + expected_result.append(2.0 * distribution.num_replicas_in_sync + + 1.0 * i) + self.assertEqual(per_replica_results, tuple(expected_result)) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testAssignPerReplicaBeforeRead(self, distribution, + experimental_run_tf_function): + aggregations = [ + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + for aggregation in aggregations: + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + + def assign(var=v): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + return var.assign(math_ops.cast(replica_id, dtypes.float32)) + + if experimental_run_tf_function: + assign = def_function.function(assign) + + per_replica_results = self.evaluate( + distribution.experimental_local_results(distribution.run(assign))) + expected_result = [] + for i in range(distribution.num_replicas_in_sync): + expected_result.append(1.0 * i) + self.assertEqual(per_replica_results, tuple(expected_result)) + + @combinations.generate(mirrored_and_tpu_strategy_combinations()) + def testReadValueWithAggregationNoneInCrossReplicaContext(self, distribution): + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=variables_lib.VariableAggregation.NONE) + self.evaluate(variables_lib.global_variables_initializer()) + with self.assertRaisesRegex( + ValueError, "Could not convert from .* VariableAggregation\\.NONE"): + self.evaluate(v.read_value()) + + @combinations.generate(mirrored_and_tpu_strategy_combinations()) + def testInitializedToSameValueInsideEagerRun(self, distribution): + if not context.executing_eagerly(): self.skipTest("eager only") + + v = [None] + @def_function.function + def step(): + def f(): + if v[0] is None: + v[0] = variables_lib.Variable( + random_ops.random_normal([]), + synchronization=variables_lib.VariableSynchronization.ON_READ) + + distribution.run(f) + + context.set_global_seed(None) + step() + vals = self.evaluate(v[0].values) + self.assertAllEqual(vals[0], vals[1]) + + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.tpu_strategy, + ], + mode=["eager"])) + def testOperatorOverride(self, distribution): + + with distribution.scope(): + v = variable_scope.variable( + 0.0, + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=variables_lib.VariableAggregation.MEAN) + + @def_function.function + def assign(): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + return v.assign(math_ops.cast(replica_id, dtypes.float32)) + + # Assign different replicas with different values. + distribution.run(assign) + + self.assertEqual(1.5, self.evaluate(v + 1)) + + @def_function.function + def add(): + return v + 1 + + per_replica_results = self.evaluate( + distribution.experimental_local_results(distribution.run(add))) + self.assertAllEqual([1, 2], per_replica_results) + + +@combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + ], + aggregation=[ + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ], + mode=["graph", "eager"])) +class SyncOnReadScatterReplicaTest(test.TestCase, parameterized.TestCase): + + def testScatterSub(self, distribution, aggregation): + with distribution.scope(): + v = variables_lib.Variable( + [1., 1., 1.], + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(v.initializer) + + delta = values_lib.PerReplica([ + indexed_slices.IndexedSlices( + values=[[0.], [1.]], indices=[0, 1], dense_shape=(3,)), + indexed_slices.IndexedSlices( + values=[[1.], [2.]], indices=[1, 2], dense_shape=(3,)), + ]) + + with self.assertRaises(NotImplementedError): + self.evaluate(distribution.run(v.scatter_sub, args=(delta,))) + + def testScatterAdd(self, distribution, aggregation): + with distribution.scope(): + v = variables_lib.Variable( + [1., 1., 1.], + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(v.initializer) + + delta = values_lib.PerReplica([ + indexed_slices.IndexedSlices( + values=[[0.], [1.]], indices=[0, 1], dense_shape=(3,)), + indexed_slices.IndexedSlices( + values=[[1.], [2.]], indices=[1, 2], dense_shape=(3,)), + ]) + + with self.assertRaises(NotImplementedError): + self.evaluate(distribution.run(v.scatter_add, args=(delta,))) + + def testScatterDiv(self, distribution, aggregation): + with distribution.scope(): + v = variables_lib.Variable( + [2., 6., 1.], + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(v.initializer) + + delta = values_lib.PerReplica([ + indexed_slices.IndexedSlices( + values=[[2.], [2.]], indices=[0, 1], dense_shape=(3,)), + indexed_slices.IndexedSlices( + values=[[3.], [3.]], indices=[1, 2], dense_shape=(3,)), + ]) + + with self.assertRaises(NotImplementedError): + self.evaluate(distribution.run(v.scatter_div, args=(delta,))) + + def testScatterMul(self, distribution, aggregation): + with distribution.scope(): + v = variables_lib.Variable( + [2., 1., 1.], + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(v.initializer) + + delta = values_lib.PerReplica([ + indexed_slices.IndexedSlices( + values=[[2.], [3.]], indices=[0, 1], dense_shape=(3,)), + indexed_slices.IndexedSlices( + values=[[4.], [5.]], indices=[1, 2], dense_shape=(3,)), + ]) + + with self.assertRaises(NotImplementedError): + self.evaluate(distribution.run(v.scatter_mul, args=(delta,))) + + def testScatterMin(self, distribution, aggregation): + with distribution.scope(): + v = variables_lib.Variable( + [3., 4., 5.], + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(v.initializer) + + delta = values_lib.PerReplica([ + indexed_slices.IndexedSlices( + values=[[1.], [8.]], indices=[0, 1], dense_shape=(3,)), + indexed_slices.IndexedSlices( + values=[[9.], [2.]], indices=[1, 2], dense_shape=(3,)), + ]) + + with self.assertRaises(NotImplementedError): + self.evaluate(distribution.run(v.scatter_min, args=(delta,))) + + def testScatterMax(self, distribution, aggregation): + with distribution.scope(): + v = variables_lib.Variable( + [3., 4., 5.], + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(v.initializer) + + delta = values_lib.PerReplica([ + indexed_slices.IndexedSlices( + values=[[1.], [8.]], indices=[0, 1], dense_shape=(3,)), + indexed_slices.IndexedSlices( + values=[[9.], [2.]], indices=[1, 2], dense_shape=(3,)), + ]) + + with self.assertRaises(NotImplementedError): + self.evaluate(distribution.run(v.scatter_max, args=(delta,))) + + def testScatterUpdate(self, distribution, aggregation): + with distribution.scope(): + v = variables_lib.Variable( + [0., 0., 0.], + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(v.initializer) + + delta = values_lib.PerReplica([ + indexed_slices.IndexedSlices( + values=[[1.], [2.]], indices=[0, 1], dense_shape=(3,)), + indexed_slices.IndexedSlices( + values=[[3.], [4.]], indices=[1, 2], dense_shape=(3,)), + ]) + + with self.assertRaises(NotImplementedError): + self.evaluate(distribution.run(v.scatter_min, args=(delta,))) + class MirroredTest(test.TestCase): diff --git a/tensorflow/python/distribute/vars_test.py b/tensorflow/python/distribute/vars_test.py deleted file mode 100644 index 5866c0c8498..00000000000 --- a/tensorflow/python/distribute/vars_test.py +++ /dev/null @@ -1,1270 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for the distributed values library.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import itertools - -from absl.testing import parameterized - -from tensorflow.python.distribute import combinations -from tensorflow.python.distribute import distribution_strategy_context -from tensorflow.python.distribute import strategy_combinations -from tensorflow.python.distribute import tpu_strategy -from tensorflow.python.distribute import tpu_values -from tensorflow.python.distribute import values -from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver -from tensorflow.python.eager import context -from tensorflow.python.eager import def_function -from tensorflow.python.eager import test -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import indexed_slices -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.ops import variables as variables_lib -from tensorflow.python.tpu import tpu_strategy_util - - -_TPU_STRATEGIES = (tpu_strategy.TPUStrategy, tpu_strategy.TPUStrategyV1) - - -def strategy_and_run_tf_function_combinations(): - # Test the combination of different strategies and whether a tf.function - # is passed into strategy.run.""" - return combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=["graph", "eager"], - experimental_run_tf_function=[True, False], - use_var_policy=[True, False]) + combinations.combine( - distribution=[ - strategy_combinations.tpu_strategy, - strategy_combinations.tpu_strategy_packed_var, - ], - mode=["graph", "eager"], - experimental_run_tf_function=[True], - use_var_policy=[True, False]) - - -def strategy_with_var_policy(): - return combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.tpu_strategy, - strategy_combinations.tpu_strategy_packed_var, - strategy_combinations.central_storage_strategy_with_two_gpus, - ], - mode=["graph", "eager"], - use_var_policy=[True, False]) - - -class OnWriteVariableSync(test.TestCase, parameterized.TestCase): - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_one_gpu, - ], - mode=["graph"])) - def testFetchAMirroredVariable(self, distribution): - with self.session(graph=ops.Graph()) as sess, distribution.scope(): - with ops.device("/device:GPU:0"): - v = variable_scope.get_variable( - name="v", initializer=1., use_resource=True) - mirrored = values.MirroredVariable( - distribution, (v,), variable_scope.VariableAggregation.MEAN) - sess.run(variables_lib.global_variables_initializer()) - sess.run({"complicated": mirrored}) - - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testAssign(self, distribution, experimental_run_tf_function, - use_var_policy): - - def assign(fn, v, update_value, cross_replica): - update_fn = lambda: getattr(v, fn)(update_value) - if cross_replica: - return update_fn() - else: - if experimental_run_tf_function: - update_fn = def_function.function(update_fn) - return distribution.experimental_local_results( - distribution.run(update_fn)) - - updates = [("assign", 1.), ("assign_add", 1.), ("assign_sub", -1.)] - aggregations = [ - variables_lib.VariableAggregation.NONE, - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - options = list( - x for x in itertools.product(updates, aggregations, [True, False])) - for update, aggregation, cross_replica in options: - # assign in replica context with SUM does not make sense cause you can - # just do value * num replicas error is 1. is not a distributed value and - # is unsupported for aggregation SUM - if (not cross_replica and aggregation == - variables_lib.VariableAggregation.SUM): - continue - with distribution.scope(): - v = variable_scope.variable( - 0., - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - fn, update_value = update - self.evaluate(assign(fn, v, update_value, cross_replica)) - for component in v._values: - self.assertAllEqual(self.evaluate(component.read_value()), - self.evaluate(array_ops.ones_like(component))) - - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testAssignOnWriteVar(self, distribution, experimental_run_tf_function, - use_var_policy): - - with distribution.scope(): - v_to_assign = variable_scope.variable( - 2., aggregation=variables_lib.VariableAggregation.MEAN) - v_to_assign_sub = variable_scope.variable( - -2., aggregation=variables_lib.VariableAggregation.MEAN) - - def assign(fn, v, update_value, cross_replica): - update_fn = lambda: getattr(v, fn)(update_value) - if cross_replica: - return update_fn() - else: - if experimental_run_tf_function: - update_fn = def_function.function(update_fn) - return distribution.experimental_local_results( - distribution.run(update_fn)) - - updates = [("assign", v_to_assign), ("assign_add", v_to_assign), - ("assign_sub", v_to_assign_sub)] - aggregations = [ - variables_lib.VariableAggregation.NONE, - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - options = list( - x for x in itertools.product(updates, aggregations, [True, False])) - for update, aggregation, cross_replica in options: - # assign in replica context with SUM does not make sense cause you can - # just do value * num replicas error is 1. is not a distributed value and - # is unsupported for aggregation SUM - if aggregation == variables_lib.VariableAggregation.SUM: - continue - with distribution.scope(): - v = variable_scope.variable( - 0., - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - fn, update_value = update - self.evaluate(assign(fn, v, update_value, cross_replica)) - for component in v._values: - self.assertAllEqual(2.0, self.evaluate(component.read_value())) - - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testAssignPerReplicaVal(self, distribution, experimental_run_tf_function, - use_var_policy): - - if isinstance(distribution, _TPU_STRATEGIES): - self.skipTest("Assigning PerReplica values is not supported. See" - " sponge/80ba41f8-4220-4516-98ce-bbad48f9f11a.") - - with distribution.scope(): - per_replica_value = values.PerReplica( - [constant_op.constant(2.0), - constant_op.constant(2.0)]) - per_replica_sub_value = values.PerReplica( - [constant_op.constant(-2.0), - constant_op.constant(-2.0)]) - - def assign(fn, v, update_value, cross_replica): - update_fn = lambda: getattr(v, fn)(update_value) - if cross_replica: - return update_fn() - else: - if experimental_run_tf_function: - update_fn = def_function.function(update_fn) - return distribution.experimental_local_results( - distribution.run(update_fn)) - - updates = [("assign", per_replica_value), ("assign_add", per_replica_value), - ("assign_sub", per_replica_sub_value)] - # We don't support assigning PerReplica valus to vars in replica context - # with aggregation=NONE. - aggregations = [ - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - options = list( - x for x in itertools.product(updates, aggregations, [True, False])) - for update, aggregation, cross_replica in options: - # assign in replica context with SUM does not make sense cause you can - # just do value * num replicas error is 1. is not a distributed value and - # is unsupported for aggregation SUM - if cross_replica: - # We don't support assigning PerReplica values to MirroredVariables in - # cross replica context - continue - with distribution.scope(): - v = variable_scope.variable( - 0., - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - fn, update_value = update - self.evaluate(assign(fn, v, update_value, cross_replica)) - if aggregation == variables_lib.VariableAggregation.SUM: - expected = 4.0 - else: - expected = 2.0 - for component in v._values: - self.assertAllEqual(expected, self.evaluate(component.read_value())) - - @combinations.generate(strategy_with_var_policy()) - def testValueInReplicaContext(self, distribution, use_var_policy): - with distribution.scope(): - v = variables_lib.Variable( - 1., aggregation=variables_lib.VariableAggregation.MEAN) - self.evaluate(variables_lib.global_variables_initializer()) - - @def_function.function - def f(): - with ops.control_dependencies([v.assign_add(1.)]): - return v.value() - - results = self.evaluate( - distribution.experimental_local_results( - distribution.run(f))) - for value in results: - self.assertEqual(2., value) - - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testReadValueInReplicaContext(self, distribution, - experimental_run_tf_function, - use_var_policy): - aggregations = [ - variables_lib.VariableAggregation.NONE, - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - for aggregation in aggregations: - with distribution.scope(): - v = variable_scope.variable( - 0., - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - if experimental_run_tf_function: - read_var_fn = def_function.function(v.read_value) - else: - read_var_fn = v.read_value - results = self.evaluate( - distribution.experimental_local_results( - distribution.run(read_var_fn))) - for component, value in zip(v._values, results): - self.assertAllEqual(self.evaluate(component.read_value()), value) - - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testReadValueInCrossReplicaContext(self, distribution, - experimental_run_tf_function, - use_var_policy): - aggregations = [ - variables_lib.VariableAggregation.NONE, - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - for aggregation in aggregations: - with distribution.scope(): - v = variable_scope.variable( - 2., - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - - if experimental_run_tf_function: - read_var_fn = def_function.function(v.read_value) - else: - read_var_fn = v.read_value - - results = read_var_fn() - for component in v._values: - self.assertEqual(self.evaluate(component.read_value()), - self.evaluate(results)) - - @combinations.generate(strategy_with_var_policy()) - def testAssignOutOfScope(self, distribution, use_var_policy): - with distribution.scope(): - mirrored = variables_lib.Variable(1.) - self.evaluate(mirrored.assign(3.)) - self.assertEqual(self.evaluate(mirrored.read_value()), 3.) - for component in mirrored.values: - self.assertEqual(self.evaluate(component.read_value()), 3.) - - @combinations.generate(strategy_with_var_policy()) - def testAssignAggregationMeanDTypeNonFloat(self, distribution, - use_var_policy): - if isinstance(distribution, _TPU_STRATEGIES): - self.skipTest("Fix sponge/6e8ab540-4c0f-4da5-aedf-86505ff810c9 before " - "reenabling test.") - - with distribution.scope(): - v = variables_lib.Variable( - 1, - aggregation=variable_scope.VariableAggregation.MEAN, - dtype=dtypes.int32) - self.evaluate(v.initializer) - - @def_function.function - def assign(): - ctx = distribution_strategy_context.get_replica_context() - return v.assign(ctx.replica_id_in_sync_group) - - # disallow assign() with distributed value in replica context. - with self.assertRaisesRegex(ValueError, - "Cannot update non-float variables"): - self.evaluate( - distribution.experimental_local_results( - distribution.run(assign))) - - # allow assign() with same value in replica context. - @def_function.function - def assign_same(): - return v.assign(2) - - self.evaluate( - distribution.experimental_local_results( - distribution.run(assign_same))) - self.assertEqual(self.evaluate(v.read_value()), 2) - - # allow assign() with mirrored variable in replica context. - with distribution.scope(): - v2 = variables_lib.Variable( - 3, - aggregation=variable_scope.VariableAggregation.SUM, - dtype=dtypes.int32) - self.evaluate(v2.initializer) - - @def_function.function - def assign_mirrored(): - return v.assign(v2) - - self.evaluate( - distribution.experimental_local_results( - distribution.run(assign_mirrored))) - self.assertEqual(self.evaluate(v.read_value()), 3) - - # allow assign() in cross replica context. - with distribution.scope(): - self.evaluate(v.assign(4)) - self.assertEqual(self.evaluate(v.read_value()), 4) - - @combinations.generate(strategy_with_var_policy()) - def testInitializedToSameValueInsideEagerRun(self, distribution, - use_var_policy): - if not context.executing_eagerly(): self.skipTest("eager only test") - v = [None] - - @def_function.function - def step(): - - def f(): - if v[0] is None: - v[0] = variables_lib.Variable(random_ops.random_normal([])) - - distribution.run(f) - - context.set_global_seed(None) - step() - vals = self.evaluate(v[0].values) - self.assertAllEqual(vals[0], vals[1]) - - @combinations.generate(strategy_with_var_policy()) - def testAggregationOnlyFirstReplica(self, distribution, use_var_policy): - with distribution.scope(): - v = variable_scope.variable( - 15., - synchronization=variables_lib.VariableSynchronization.ON_WRITE, - aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) - self.evaluate(variables_lib.global_variables_initializer()) - - @def_function.function - def assign(): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - return v.assign(math_ops.cast(replica_id, dtypes.float32)) - per_replica_results = self.evaluate(distribution.experimental_local_results( - distribution.run(assign))) - # The per-replica values should always match the first replicas value. - self.assertAllEqual( - array_ops.zeros(distribution.num_replicas_in_sync, dtypes.float32), - per_replica_results) - - @combinations.generate(strategy_with_var_policy()) - def testInitScope(self, distribution, use_var_policy): - if not context.executing_eagerly(): self.skipTest("eager only") - - class C(object): - pass - - obj = C() - obj.w = None - obj.v = None - - @def_function.function - def assign(): - with ops.init_scope(): - if obj.w is None: - obj.w = variables_lib.Variable( - 0, aggregation=variables_lib.VariableAggregation.MEAN) - obj.v = variables_lib.Variable( - obj.w.read_value(), - aggregation=variables_lib.VariableAggregation.MEAN) - self.evaluate(variables_lib.global_variables_initializer()) - - return obj.v.assign_add(2) - - per_replica_results = self.evaluate( - distribution.experimental_local_results(distribution.run(assign))) - self.assertAllEqual([2, 2], per_replica_results) - - @combinations.generate(strategy_with_var_policy()) - def testOperatorOverride(self, distribution, use_var_policy): - - with distribution.scope(): - v = variable_scope.variable( - 1, aggregation=variables_lib.VariableAggregation.MEAN) - self.evaluate(variables_lib.global_variables_initializer()) - - self.assertEqual(2, self.evaluate(v + 1)) - - @def_function.function - def add(): - return v + 1 - - per_replica_results = self.evaluate( - distribution.experimental_local_results(distribution.run(add))) - self.assertAllEqual([2, 2], per_replica_results) - - -@combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=["graph", "eager"], - use_var_policy=[True, False])) -class OnWriteVariableSyncScatterTests(test.TestCase, parameterized.TestCase): - - def testScatterSub(self, distribution, use_var_policy): - with distribution.scope(): - v = variables_lib.Variable( - [0., 0., 0.], aggregation=variables_lib.VariableAggregation.MEAN) - self.evaluate(v.initializer) - - @def_function.function - def scatter_sub(): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - value = indexed_slices.IndexedSlices( - values=array_ops.stack([ - math_ops.cast(replica_id, dtypes.float32), - math_ops.cast(replica_id + 1, dtypes.float32) - ]), - indices=array_ops.stack([replica_id, replica_id + 1]), - dense_shape=(3,)) - return v.scatter_sub(value) - - per_replica_results = self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_sub))) - self.assertAllEqual([[0., -1., -1.], [0., -1., -1.]], per_replica_results) - - def testScatterAdd(self, distribution, use_var_policy): - with distribution.scope(): - v = variables_lib.Variable( - [0, 0, 0], aggregation=variables_lib.VariableAggregation.SUM) - self.evaluate(v.initializer) - - @def_function.function - def scatter_add(): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - value = indexed_slices.IndexedSlices( - values=array_ops.stack([replica_id, replica_id + 1]), - indices=array_ops.stack([replica_id, replica_id + 1]), - dense_shape=(3,)) - return v.scatter_add(value) - - per_replica_results = self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_add))) - self.assertAllEqual([[0, 2, 2], [0, 2, 2]], per_replica_results) - - def testScatterDiv(self, distribution, use_var_policy): - with distribution.scope(): - v = variables_lib.Variable( - [1, 6, 1], aggregation=variables_lib.VariableAggregation.SUM) - self.evaluate(v.initializer) - - @def_function.function - def scatter_div(): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - value = indexed_slices.IndexedSlices( - values=array_ops.reshape(replica_id + 2, [1]), - indices=array_ops.reshape(replica_id, [1]), - dense_shape=(3,)) - return v.scatter_div(value) - - per_replica_results = self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_div))) - self.assertAllEqual([[0, 2, 1], [0, 2, 1]], per_replica_results) - - def testScatterMul(self, distribution, use_var_policy): - with distribution.scope(): - v = variables_lib.Variable( - [2., 1., 1.], aggregation=variables_lib.VariableAggregation.MEAN) - self.evaluate(v.initializer) - - @def_function.function - def scatter_mul(): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - value = indexed_slices.IndexedSlices( - values=array_ops.reshape( - math_ops.cast(replica_id + 2, dtypes.float32), [1]), - indices=array_ops.reshape(replica_id, [1]), - dense_shape=(3,)) - return v.scatter_mul(value) - - per_replica_results = self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_mul))) - self.assertAllClose([[2., 1.5, 1.], [2., 1.5, 1.]], per_replica_results) - - def testScatterMin(self, distribution, use_var_policy): - with distribution.scope(): - v1 = variables_lib.Variable( - [0, 2, 0], aggregation=variables_lib.VariableAggregation.SUM) - v2 = variables_lib.Variable( - [0, 2, 0], - aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) - self.evaluate(variables_lib.global_variables_initializer()) - - @def_function.function - def scatter_min(v): - value = indexed_slices.IndexedSlices( - values=array_ops.identity([1]), - indices=array_ops.identity([1]), - dense_shape=(3,)) - return v.scatter_min(value) - - with self.assertRaisesRegex(NotImplementedError, "scatter_min.*"): - self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_min, args=(v1,)))) - - per_replica_results = self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_min, args=(v2,)))) - self.assertAllClose([[0, 1, 0], [0, 1, 0]], per_replica_results) - - def testScatterMax(self, distribution, use_var_policy): - with distribution.scope(): - v1 = variables_lib.Variable( - [0, 0, 0], aggregation=variables_lib.VariableAggregation.SUM) - v2 = variables_lib.Variable( - [0, 0, 0], - aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) - self.evaluate(variables_lib.global_variables_initializer()) - - @def_function.function - def scatter_max(v): - value = indexed_slices.IndexedSlices( - values=array_ops.identity([1]), - indices=array_ops.identity([0]), - dense_shape=(3,)) - return v.scatter_max(value) - - with self.assertRaisesRegex(NotImplementedError, "scatter_max.*"): - self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_max, args=(v1,)))) - - per_replica_results = self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_max, args=(v2,)))) - self.assertAllClose([[1, 0, 0], [1, 0, 0]], per_replica_results) - - def testScatterUpdate(self, distribution, use_var_policy): - with distribution.scope(): - v1 = variables_lib.Variable( - [0, 0, 0], aggregation=variables_lib.VariableAggregation.SUM) - v2 = variables_lib.Variable( - [0, 0, 0], - aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) - self.evaluate(variables_lib.global_variables_initializer()) - - @def_function.function - def scatter_update(v): - value = indexed_slices.IndexedSlices( - values=array_ops.identity([3]), - indices=array_ops.identity([1]), - dense_shape=(3,)) - return v.scatter_update(value) - - with self.assertRaisesRegex(NotImplementedError, "scatter_update.*"): - self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_update, args=(v1,)))) - - per_replica_results = self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_update, args=(v2,)))) - self.assertAllClose([[0, 3, 0], [0, 3, 0]], per_replica_results) - - def testScatterOpsInCrossReplicaContext(self, distribution, use_var_policy): - with distribution.scope(): - v1 = variables_lib.Variable( - [1, 1, 1], aggregation=variables_lib.VariableAggregation.SUM) - v2 = variables_lib.Variable([1, 1, 1]) - self.evaluate(variables_lib.global_variables_initializer()) - - value = indexed_slices.IndexedSlices( - values=array_ops.identity([2]), - indices=array_ops.identity([0]), - dense_shape=(3,)) - with distribution.scope(): - self.evaluate(v1.scatter_add(value)) - self.assertAllEqual([3, 1, 1], self.evaluate(v1.read_value())) - - self.evaluate(v2.scatter_min(value)) - self.assertAllEqual([1, 1, 1], self.evaluate(v2.read_value())) - - -def _make_replica_local(method, strategy=None): - if strategy is None: - devices = ("/device:GPU:0", "/device:CPU:0") - else: - devices = strategy.extended.worker_devices - - v = [] - for d, n, init in zip(devices, ["v", "v/replica"], [1., 2.]): - with ops.device(d): - v.append(variable_scope.get_variable( - name=n, initializer=init, use_resource=True)) - - if (strategy is not None) and isinstance(strategy, _TPU_STRATEGIES): - var_cls = tpu_values.TPUSyncOnReadVariable - else: - var_cls = values.SyncOnReadVariable - replica_local = var_cls(strategy, v, method) - return v, replica_local - - -class OnReadVariableSyncTest(test.TestCase, parameterized.TestCase): - - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testAssign(self, distribution, experimental_run_tf_function, - use_var_policy): - - def assign(fn, v, update_value, cross_replica): - update_fn = lambda: getattr(v, fn)(update_value) - if cross_replica: - return update_fn() - else: - if experimental_run_tf_function: - update_fn = def_function.function(update_fn) - return distribution.experimental_local_results( - distribution.run(update_fn)) - - updates = [("assign", 1.), ("assign_add", 1.), ("assign_sub", -1.)] - aggregations = [ - variables_lib.VariableAggregation.NONE, - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - options = list( - x for x in itertools.product(updates, aggregations, [True, False])) - for update, aggregation, cross_replica in options: - # VariableAggregation.SUM in cross-replica mode is tested below, - # VariableAggregation.NONE in cross-replica mode is not supported. - if cross_replica and aggregation in [ - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.NONE, - ]: - continue - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - fn, update_value = update - self.evaluate(assign(fn, v, update_value, cross_replica)) - for component in v._values: - self.assertAllEqual(self.evaluate(component.read_value()), - self.evaluate(array_ops.ones_like(component))) - - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testAssignOnReadVar(self, distribution, experimental_run_tf_function, - use_var_policy): - - with distribution.scope(): - v_to_assign = variable_scope.variable( - 2., aggregation=variables_lib.VariableAggregation.MEAN) - v_to_assign_sub = variable_scope.variable( - -2., aggregation=variables_lib.VariableAggregation.MEAN) - - def assign(fn, v, update_value, cross_replica): - update_fn = lambda: getattr(v, fn)(update_value) - if cross_replica: - return update_fn() - else: - if experimental_run_tf_function: - update_fn = def_function.function(update_fn) - return distribution.experimental_local_results( - distribution.run(update_fn)) - - updates = [("assign", v_to_assign), ("assign_add", v_to_assign), - ("assign_sub", v_to_assign_sub)] - expected_cross_replica = { - variables_lib.VariableAggregation.SUM: 1.0, - variables_lib.VariableAggregation.MEAN: 2.0, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA: 2.0 - } - expected_replica = { - variables_lib.VariableAggregation.SUM: 2.0, - variables_lib.VariableAggregation.MEAN: 2.0, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA: 2.0 - } - # aggregation=NONE is not supported for OnReadVariables. - aggregations = [ - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - options = list( - x for x in itertools.product(updates, aggregations, [True, False])) - for update, aggregation, cross_replica in options: - # assign in replica context with SUM does not make sense cause you can - # just do value * num replicas error is 1. is not a distributed value and - # is unsupported for aggregation SUM - if aggregation == variables_lib.VariableAggregation.SUM: - continue - with distribution.scope(): - v = variable_scope.variable( - 0., - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - fn, update_value = update - self.evaluate(assign(fn, v, update_value, cross_replica)) - if cross_replica: - for component in v._values: - self.assertAllEqual(expected_cross_replica.get(aggregation), - self.evaluate(component.read_value())) - else: - for component in v._values: - self.assertAllEqual(expected_replica.get(aggregation), - self.evaluate(component.read_value())) - - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testAssignPerReplicaVal(self, distribution, experimental_run_tf_function, - use_var_policy): - - if isinstance(distribution, _TPU_STRATEGIES): - self.skipTest("Assigning PerReplica values is not supported. See" - " sponge/80ba41f8-4220-4516-98ce-bbad48f9f11a.") - - self.skipTest("We don't support assiging PerReplica values in cross " - "replica context or replica context. see error in " - "sponge/2b2e54c1-eda6-4534-82e1-c73b1dcd517f.") - - with distribution.scope(): - per_replica_value = values.PerReplica( - [constant_op.constant(2.0), - constant_op.constant(2.0)]) - - def assign(fn, v, update_value, cross_replica): - update_fn = lambda: getattr(v, fn)(update_value) - if cross_replica: - return update_fn() - else: - if experimental_run_tf_function: - update_fn = def_function.function(update_fn) - return distribution.experimental_local_results( - distribution.run(update_fn)) - - updates = [("assign", per_replica_value)] - # We don't support assigning PerReplica valus to vars in replica context - # with aggregation=NONE. - aggregations = [ - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - options = list( - x for x in itertools.product(updates, aggregations, [True, False])) - for update, aggregation, cross_replica in options: - # assign in replica context with SUM does not make sense cause you can - # just do value * num replicas error is 1. is not a distributed value and - # is unsupported for aggregation SUM - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - fn, update_value = update - # with self.assertRaisesRegex(ValueError, "Attempt to convert a value "): - self.evaluate(assign(fn, v, update_value, cross_replica)) - if aggregation == variables_lib.VariableAggregation.SUM: - expected = 4.0 - else: - expected = 2.0 - for component in v._values: - self.assertAllEqual(expected, self.evaluate(component.read_value())) - - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testAssignDtypeConversion(self, distribution, - experimental_run_tf_function, - use_var_policy): - - def assign(fn, v, update_value, cross_replica): - update_fn = lambda: getattr(v, fn)(update_value) - if cross_replica: - return update_fn() - else: - if experimental_run_tf_function: - update_fn = def_function.function(update_fn) - return distribution.experimental_local_results( - distribution.run(update_fn)) - - updates = [("assign", 1), ("assign_add", 1), ("assign_sub", -1)] - aggregations = [ - variables_lib.VariableAggregation.NONE, - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - options = list( - x for x in itertools.product(updates, aggregations, [True, False])) - for update, aggregation, cross_replica in options: - # VariableAggregation.SUM in cross-replica mode is tested below, - # VariableAggregation.NONE in cross-replica mode is not supported. - if cross_replica and aggregation in [ - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.NONE, - ]: - continue - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - fn, update_value = update - self.evaluate(assign(fn, v, update_value, cross_replica)) - for component in v._values: - self.assertAllEqual(self.evaluate(component.read_value()), - self.evaluate(array_ops.ones_like(component))) - - @combinations.generate(strategy_with_var_policy()) - def testAssignWithAggregationSum(self, distribution, use_var_policy): - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=variables_lib.VariableAggregation.SUM) - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(v.assign(1. * distribution.num_replicas_in_sync)) - for component in v._values: - self.assertAllEqual(self.evaluate(component.read_value()), - self.evaluate(array_ops.ones_like(component))) - - @combinations.generate(strategy_with_var_policy()) - def testAssignAddSubWithAggregationSum(self, distribution, use_var_policy): - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=variables_lib.VariableAggregation.SUM) - self.evaluate(variables_lib.global_variables_initializer()) - with self.assertRaisesRegex( - ValueError, "SyncOnReadVariable does not support "): - self.evaluate(v.assign_add(1.)) - with self.assertRaisesRegex( - ValueError, "SyncOnReadVariable does not support "): - self.evaluate(v.assign_sub(1.)) - - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testReadValueInReplicaContext(self, distribution, - experimental_run_tf_function, - use_var_policy): - aggregations = [ - variables_lib.VariableAggregation.NONE, - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - for aggregation in aggregations: - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - if experimental_run_tf_function: - read_var_fn = def_function.function(v.read_value) - else: - read_var_fn = v.read_value - results = self.evaluate( - distribution.experimental_local_results( - distribution.run(read_var_fn))) - for component, value in zip(v._values, results): - self.assertAllEqual(self.evaluate(component.read_value()), value) - - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testReadValueInCrossReplicaContext(self, distribution, - experimental_run_tf_function, - use_var_policy): - aggregations = [ - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - for aggregation in aggregations: - if isinstance(distribution, _TPU_STRATEGIES): - resolver = tpu_cluster_resolver.TPUClusterResolver("") - tpu_strategy_util.initialize_tpu_system(resolver) - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - - def assign(v=v): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - return v.assign(math_ops.cast(replica_id, dtypes.float32)) - - if experimental_run_tf_function: - assign = def_function.function(assign) - - self.evaluate( - distribution.experimental_local_results(distribution.run(assign))) - num_replicas = distribution.num_replicas_in_sync - sum_of_replica_values = num_replicas * (num_replicas - 1) / 2. - if aggregation == variables_lib.VariableAggregation.SUM: - expected = sum_of_replica_values - elif aggregation == variables_lib.VariableAggregation.MEAN: - expected = sum_of_replica_values / num_replicas - else: - expected = 0 - self.assertEqual(expected, self.evaluate(v.read_value()), aggregation) - self.assertEqual(expected, self.evaluate(v.value()), aggregation) - self.assertEqual(expected, self.evaluate(v), aggregation) - self.assertEqual(expected, self.evaluate(array_ops.identity(v)), - aggregation) - - # TODO(b/145574622): Re-enable this test once ReduceOp argument is - # respected on GPUs. - @combinations.generate(strategy_and_run_tf_function_combinations()) - def disable_testAllReduce(self, distribution, - experimental_run_tf_function, - use_var_policy): - with distribution.scope(): - v = variable_scope.variable( - 2., - synchronization=variables_lib.VariableSynchronization.ON_WRITE, - aggregation=variables_lib.VariableAggregation.MEAN) - self.evaluate(variables_lib.global_variables_initializer()) - - def all_reduce(): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - return ctx.all_reduce("SUM", v) + math_ops.cast(replica_id, - dtypes.float32) - - if experimental_run_tf_function: - all_reduce = def_function.function(all_reduce) - - per_replica_results = self.evaluate( - distribution.experimental_local_results(distribution.run(all_reduce))) - expected_result = [] - for i in range(distribution.num_replicas_in_sync): - expected_result.append(2.0 * distribution.num_replicas_in_sync + - 1.0 * i) - self.assertEqual(per_replica_results, tuple(expected_result)) - - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testAssignPerReplicaBeforeRead(self, distribution, - experimental_run_tf_function, - use_var_policy): - aggregations = [ - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - for aggregation in aggregations: - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - - def assign(var=v): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - return var.assign(math_ops.cast(replica_id, dtypes.float32)) - - if experimental_run_tf_function: - assign = def_function.function(assign) - - per_replica_results = self.evaluate( - distribution.experimental_local_results(distribution.run(assign))) - expected_result = [] - for i in range(distribution.num_replicas_in_sync): - expected_result.append(1.0 * i) - self.assertEqual(per_replica_results, tuple(expected_result)) - - @combinations.generate(strategy_with_var_policy()) - def testReadValueWithAggregationNoneInCrossReplicaContext(self, distribution, - use_var_policy): - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=variables_lib.VariableAggregation.NONE) - self.evaluate(variables_lib.global_variables_initializer()) - with self.assertRaisesRegex( - ValueError, "Could not convert from .* VariableAggregation\\.NONE"): - self.evaluate(v.read_value()) - - @combinations.generate(strategy_with_var_policy()) - def testInitializedToSameValueInsideEagerRun(self, distribution, - use_var_policy): - if not context.executing_eagerly(): self.skipTest("eager only") - - v = [None] - @def_function.function - def step(): - def f(): - if v[0] is None: - v[0] = variables_lib.Variable( - random_ops.random_normal([]), - synchronization=variables_lib.VariableSynchronization.ON_READ) - - distribution.run(f) - - context.set_global_seed(None) - step() - vals = self.evaluate(v[0].values) - self.assertAllEqual(vals[0], vals[1]) - - @combinations.generate(strategy_with_var_policy()) - def testOperatorOverride(self, distribution, use_var_policy): - - with distribution.scope(): - v = variable_scope.variable( - 0.0, - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=variables_lib.VariableAggregation.MEAN) - self.evaluate(variables_lib.global_variables_initializer()) - - @def_function.function - def assign(): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - return v.assign(math_ops.cast(replica_id, dtypes.float32)) - - # Assign different replicas with different values. - self.evaluate(distribution.experimental_local_results( - distribution.run(assign))) - self.assertEqual(1.5, self.evaluate(v + 1)) - - @def_function.function - def add(): - return v + 1 - - per_replica_results = self.evaluate( - distribution.experimental_local_results(distribution.run(add))) - self.assertAllEqual([1, 2], per_replica_results) - - -@combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - aggregation=[ - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ], - mode=["graph", "eager"], - use_var_policy=[True, False])) -class SyncOnReadScatterReplicaTest(test.TestCase, parameterized.TestCase): - - def testScatterSub(self, distribution, aggregation, use_var_policy): - with distribution.scope(): - v = variables_lib.Variable( - [1., 1., 1.], - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(v.initializer) - - delta = values.PerReplica([ - indexed_slices.IndexedSlices( - values=[[0.], [1.]], indices=[0, 1], dense_shape=(3,)), - indexed_slices.IndexedSlices( - values=[[1.], [2.]], indices=[1, 2], dense_shape=(3,)), - ]) - - with self.assertRaises(NotImplementedError): - self.evaluate(distribution.run(v.scatter_sub, args=(delta,))) - - def testScatterAdd(self, distribution, aggregation, use_var_policy): - with distribution.scope(): - v = variables_lib.Variable( - [1., 1., 1.], - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(v.initializer) - - delta = values.PerReplica([ - indexed_slices.IndexedSlices( - values=[[0.], [1.]], indices=[0, 1], dense_shape=(3,)), - indexed_slices.IndexedSlices( - values=[[1.], [2.]], indices=[1, 2], dense_shape=(3,)), - ]) - - with self.assertRaises(NotImplementedError): - self.evaluate(distribution.run(v.scatter_add, args=(delta,))) - - def testScatterDiv(self, distribution, aggregation, use_var_policy): - with distribution.scope(): - v = variables_lib.Variable( - [2., 6., 1.], - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(v.initializer) - - delta = values.PerReplica([ - indexed_slices.IndexedSlices( - values=[[2.], [2.]], indices=[0, 1], dense_shape=(3,)), - indexed_slices.IndexedSlices( - values=[[3.], [3.]], indices=[1, 2], dense_shape=(3,)), - ]) - - with self.assertRaises(NotImplementedError): - self.evaluate(distribution.run(v.scatter_div, args=(delta,))) - - def testScatterMul(self, distribution, aggregation, use_var_policy): - with distribution.scope(): - v = variables_lib.Variable( - [2., 1., 1.], - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(v.initializer) - - delta = values.PerReplica([ - indexed_slices.IndexedSlices( - values=[[2.], [3.]], indices=[0, 1], dense_shape=(3,)), - indexed_slices.IndexedSlices( - values=[[4.], [5.]], indices=[1, 2], dense_shape=(3,)), - ]) - - with self.assertRaises(NotImplementedError): - self.evaluate(distribution.run(v.scatter_mul, args=(delta,))) - - def testScatterMin(self, distribution, aggregation, use_var_policy): - with distribution.scope(): - v = variables_lib.Variable( - [3., 4., 5.], - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(v.initializer) - - delta = values.PerReplica([ - indexed_slices.IndexedSlices( - values=[[1.], [8.]], indices=[0, 1], dense_shape=(3,)), - indexed_slices.IndexedSlices( - values=[[9.], [2.]], indices=[1, 2], dense_shape=(3,)), - ]) - - with self.assertRaises(NotImplementedError): - self.evaluate(distribution.run(v.scatter_min, args=(delta,))) - - def testScatterMax(self, distribution, aggregation, use_var_policy): - with distribution.scope(): - v = variables_lib.Variable( - [3., 4., 5.], - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(v.initializer) - - delta = values.PerReplica([ - indexed_slices.IndexedSlices( - values=[[1.], [8.]], indices=[0, 1], dense_shape=(3,)), - indexed_slices.IndexedSlices( - values=[[9.], [2.]], indices=[1, 2], dense_shape=(3,)), - ]) - - with self.assertRaises(NotImplementedError): - self.evaluate(distribution.run(v.scatter_max, args=(delta,))) - - def testScatterUpdate(self, distribution, aggregation, use_var_policy): - with distribution.scope(): - v = variables_lib.Variable( - [0., 0., 0.], - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(v.initializer) - - delta = values.PerReplica([ - indexed_slices.IndexedSlices( - values=[[1.], [2.]], indices=[0, 1], dense_shape=(3,)), - indexed_slices.IndexedSlices( - values=[[3.], [4.]], indices=[1, 2], dense_shape=(3,)), - ]) - - with self.assertRaises(NotImplementedError): - self.evaluate(distribution.run(v.scatter_min, args=(delta,))) - - -def _make_index_slices(vals, indices, dense_shape=None): - if dense_shape: - dense_shape = array_ops.identity(dense_shape) - return indexed_slices.IndexedSlices( - array_ops.identity(vals), array_ops.identity(indices), dense_shape) - - -if __name__ == "__main__": - test.main() From 664a1515be130ec1d331a6a099850828c7c7ad0b Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 27 Jul 2020 00:24:33 -0700 Subject: [PATCH 1364/2522] Remove dead code; NFC PiperOrigin-RevId: 323311277 Change-Id: Iafb1100947d82391e2a377c2e7967224819c925c --- tensorflow/compiler/xla/service/elemental_ir_emitter.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc index 4b6c30cadc4..98d523487b4 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc @@ -2462,10 +2462,6 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator( operand_to_generator.at(hlo->operand(i))(index)); operands.push_back(operand_value); } - std::vector input_generators; - for (const HloInstruction* instr : hlo->operands()) { - input_generators.push_back(operand_to_generator.at(instr)); - } return EmitElementalMap(Cast(hlo), operands); }; case HloOpcode::kReduceWindow: From 9f2e1a72467c2e85097d29bd2300e95f58a1b4ec Mon Sep 17 00:00:00 2001 From: Renjie Liu Date: Mon, 27 Jul 2020 00:50:47 -0700 Subject: [PATCH 1365/2522] Not fuse keras lstm if batch size is unknown. PiperOrigin-RevId: 323314051 Change-Id: I84890db11e1a244f0fc676d1b233f60fd47a4185 --- .../tests/prepare-composite-functions-tf.mlir | 156 ++++++++++-------- .../prepare_composite_functions_tf.cc | 9 + 2 files changed, 100 insertions(+), 65 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/tests/prepare-composite-functions-tf.mlir b/tensorflow/compiler/mlir/lite/tests/prepare-composite-functions-tf.mlir index 684c58ce003..a596595b2eb 100644 --- a/tensorflow/compiler/mlir/lite/tests/prepare-composite-functions-tf.mlir +++ b/tensorflow/compiler/mlir/lite/tests/prepare-composite-functions-tf.mlir @@ -154,18 +154,18 @@ func @layernormalizedlstmcellsimple(%arg0: tensor<1x?xf32>, %arg1: tensor<3x4xf3 // ----- module { -func @inference_standard_lstm_time_major(%arg0: tensor, %arg1: tensor, %arg2: tensor, %arg3: tensor<8x40xf32>, %arg4: tensor<10x40xf32>, %arg5: tensor<40xf32>) -> (tensor, tensor, tensor, tensor, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = false, tf.time_major = true} { +func @inference_standard_lstm_time_major(%arg0: tensor, %arg1: tensor<8x10xf32>, %arg2: tensor<8x10xf32>, %arg3: tensor<8x40xf32>, %arg4: tensor<10x40xf32>, %arg5: tensor<40xf32>) -> (tensor<8x10xf32>, tensor, tensor<8x10xf32>, tensor<8x10xf32>, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: false", "tfshape$unknown_rank: false"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = false, tf.time_major = true} { %0 = "tf.BatchMatMulV2"(%arg0, %arg3) {adj_x = false, adj_y = false} : (tensor, tensor<8x40xf32>) -> tensor %1 = "tf.Add"(%0, %arg5) : (tensor, tensor<40xf32>) -> tensor %2 = "tf.BatchMatMulV2"(%1, %arg4) {adj_x = false, adj_y = true} : (tensor, tensor<10x40xf32>) -> tensor - %3 = "tf.Add"(%2, %arg1) : (tensor, tensor) -> tensor - %4 = "tf.Add"(%2, %arg2) : (tensor, tensor) -> tensor - %5 = "tf.Add"(%arg1, %arg2) : (tensor, tensor) -> tensor + %3 = "tf.Add"(%2, %arg1) : (tensor, tensor<8x10xf32>) -> tensor + %4 = "tf.Add"(%2, %arg2) : (tensor, tensor<8x10xf32>) -> tensor + %5 = "tf.Add"(%arg1, %arg2) : (tensor<8x10xf32>, tensor<8x10xf32>) -> tensor<8x10xf32> %6 = "tf.Const"() {_output_shapes = ["tfshape$"], device = "/device:CPU:0", dtype = f32, value = dense<1.000000e+00> : tensor} : () -> tensor - return %5, %4, %5, %5, %6 : tensor, tensor, tensor, tensor, tensor + return %5, %4, %5, %5, %6 : tensor<8x10xf32>, tensor, tensor<8x10xf32>, tensor<8x10xf32>, tensor } -// CHECK: func @inference_standard_lstm_time_major([[VAL_0:%.*]]: tensor, [[VAL_1:%.*]]: tensor, [[VAL_2:%.*]]: tensor, [[VAL_3:%.*]]: tensor<8x40xf32>, [[VAL_4:%.*]]: tensor<10x40xf32>, [[VAL_5:%.*]]: tensor<40xf32>) -> (tensor<8x10xf32>, tensor, tensor, tensor, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = false, tf.time_major = true} { +// CHECK: func @inference_standard_lstm_time_major([[VAL_0:%.*]]: tensor, [[VAL_1:%.*]]: tensor<8x10xf32>, [[VAL_2:%.*]]: tensor<8x10xf32>, [[VAL_3:%.*]]: tensor<8x40xf32>, [[VAL_4:%.*]]: tensor<10x40xf32>, [[VAL_5:%.*]]: tensor<40xf32>) -> (tensor<8x10xf32>, tensor, tensor<8x10xf32>, tensor<8x10xf32>, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: false", "tfshape$unknown_rank: false"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = false, tf.time_major = true} { // CHECK: [[VAL_6:%.*]] = constant dense<[1, 0]> : tensor<2xi32> // CHECK: [[VAL_7:%.*]] = "tf.Transpose"([[VAL_3]], [[VAL_6]]) : (tensor<8x40xf32>, tensor<2xi32>) -> tensor<40x8xf32> // CHECK: [[VAL_8:%.*]] = constant dense<[1, 0]> : tensor<2xi32> @@ -180,33 +180,33 @@ func @inference_standard_lstm_time_major(%arg0: tensor, %arg1: tensor // CHECK: [[VAL_17:%.*]] = "tf.Const"() {value = dense<0> : tensor} : () -> tensor // CHECK: [[VAL_18:%.*]]:4 = "tf.SplitV"([[VAL_5]], [[VAL_16]], [[VAL_17]]) : (tensor<40xf32>, tensor<4xi32>, tensor) -> (tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>) // CHECK: [[VAL_19:%.*]] = constant unit -// CHECK: [[VAL_20:%.*]] = "tfl.unidirectional_sequence_lstm"([[VAL_0]], [[VAL_12]]#0, [[VAL_12]]#1, [[VAL_12]]#2, [[VAL_12]]#3, [[VAL_15]]#0, [[VAL_15]]#1, [[VAL_15]]#2, [[VAL_15]]#3, [[VAL_19]], [[VAL_19]], [[VAL_19]], [[VAL_18]]#0, [[VAL_18]]#1, [[VAL_18]]#2, [[VAL_18]]#3, [[VAL_19]], [[VAL_19]], [[VAL_1]], [[VAL_2]], [[VAL_19]], [[VAL_19]], [[VAL_19]], [[VAL_19]]) {cell_clip = 1.000000e+01 : f32, fused_activation_function = "TANH", proj_clip = 0.000000e+00 : f32, time_major = true} : (tensor, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, none, none, none, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, none, none, tensor, tensor, none, none, none, none) -> tensor +// CHECK: [[VAL_20:%.*]] = "tfl.unidirectional_sequence_lstm"([[VAL_0]], [[VAL_12]]#0, [[VAL_12]]#1, [[VAL_12]]#2, [[VAL_12]]#3, [[VAL_15]]#0, [[VAL_15]]#1, [[VAL_15]]#2, [[VAL_15]]#3, [[VAL_19]], [[VAL_19]], [[VAL_19]], [[VAL_18]]#0, [[VAL_18]]#1, [[VAL_18]]#2, [[VAL_18]]#3, [[VAL_19]], [[VAL_19]], [[VAL_1]], [[VAL_2]], [[VAL_19]], [[VAL_19]], [[VAL_19]], [[VAL_19]]) {cell_clip = 1.000000e+01 : f32, fused_activation_function = "TANH", proj_clip = 0.000000e+00 : f32, time_major = true} : (tensor, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, none, none, none, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, none, none, tensor<8x10xf32>, tensor<8x10xf32>, none, none, none, none) -> tensor // CHECK: [[VAL_21:%.*]] = constant dense<[-1, 0, 0]> : tensor<3xi32> // CHECK: [[VAL_22:%.*]] = constant dense<0> : tensor<3xi32> // CHECK: [[VAL_23:%.*]] = constant dense<1> : tensor<3xi32> // CHECK: [[VAL_24:%.*]] = "tf.StridedSlice"([[VAL_20]], [[VAL_21]], [[VAL_22]], [[VAL_23]]) {begin_mask = 6 : i64, ellipsis_mask = 0 : i64, end_mask = 6 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<3xi32>, tensor<3xi32>, tensor<3xi32>) -> tensor<8x10xf32> -// CHECK: [[VAL_25:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor -// CHECK: [[VAL_26:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor +// CHECK: [[VAL_25:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor<8x10xf32> +// CHECK: [[VAL_26:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor<8x10xf32> // CHECK: [[VAL_27:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor -// CHECK: return [[VAL_24]], [[VAL_20]], [[VAL_25]], [[VAL_26]], [[VAL_27]] : tensor<8x10xf32>, tensor, tensor, tensor, tensor +// CHECK: return [[VAL_24]], [[VAL_20]], [[VAL_25]], [[VAL_26]], [[VAL_27]] : tensor<8x10xf32>, tensor, tensor<8x10xf32>, tensor<8x10xf32>, tensor // CHECK: } } // ----- module { -func @inference_standard_lstm_non_time_major(%arg0: tensor<8x8x8xf32>, %arg1: tensor, %arg2: tensor, %arg3: tensor<8x40xf32>, %arg4: tensor<10x40xf32>, %arg5: tensor<40xf32>) -> (tensor, tensor<8x8x10xf32>, tensor, tensor, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = false, tf.time_major = false} { +func @inference_standard_lstm_non_time_major(%arg0: tensor<8x8x8xf32>, %arg1: tensor<8x10xf32>, %arg2: tensor<8x10xf32>, %arg3: tensor<8x40xf32>, %arg4: tensor<10x40xf32>, %arg5: tensor<40xf32>) -> (tensor<8x10xf32>, tensor<8x8x10xf32>, tensor<8x10xf32>, tensor<8x10xf32>, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: false", "tfshape$unknown_rank: false"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = false, tf.time_major = false} { %0 = "tf.BatchMatMulV2"(%arg0, %arg3) {adj_x = false, adj_y = false} : (tensor<8x8x8xf32>, tensor<8x40xf32>) -> tensor<8x8x40xf32> %1 = "tf.Add"(%0, %arg5) : (tensor<8x8x40xf32>, tensor<40xf32>) -> tensor<8x8x40xf32> %2 = "tf.BatchMatMulV2"(%1, %arg4) {adj_x = false, adj_y = true} : (tensor<8x8x40xf32>, tensor<10x40xf32>) -> tensor<8x8x10xf32> - %3 = "tf.Add"(%2, %arg1) : (tensor<8x8x10xf32>, tensor) -> tensor<8x8x10xf32> - %4 = "tf.Add"(%2, %arg2) : (tensor<8x8x10xf32>, tensor) -> tensor<8x8x10xf32> - %5 = "tf.Add"(%arg1, %arg2) : (tensor, tensor) -> tensor + %3 = "tf.Add"(%2, %arg1) : (tensor<8x8x10xf32>, tensor<8x10xf32>) -> tensor<8x8x10xf32> + %4 = "tf.Add"(%2, %arg2) : (tensor<8x8x10xf32>, tensor<8x10xf32>) -> tensor<8x8x10xf32> + %5 = "tf.Add"(%arg1, %arg2) : (tensor<8x10xf32>, tensor<8x10xf32>) -> tensor<8x10xf32> %6 = "tf.Const"() {_output_shapes = ["tfshape$"], device = "/device:CPU:0", dtype = f32, value = dense<1.000000e+00> : tensor} : () -> tensor - return %5, %4, %5, %5, %6 : tensor, tensor<8x8x10xf32>, tensor, tensor, tensor + return %5, %4, %5, %5, %6 : tensor<8x10xf32>, tensor<8x8x10xf32>, tensor<8x10xf32>, tensor<8x10xf32>, tensor } -// CHECK: func @inference_standard_lstm_non_time_major([[VAL_0:%.*]]: tensor<8x8x8xf32>, [[VAL_1:%.*]]: tensor, [[VAL_2:%.*]]: tensor, [[VAL_3:%.*]]: tensor<8x40xf32>, [[VAL_4:%.*]]: tensor<10x40xf32>, [[VAL_5:%.*]]: tensor<40xf32>) -> (tensor<8x10xf32>, tensor<8x8x10xf32>, tensor, tensor, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = false, tf.time_major = false} { +// CHECK: func @inference_standard_lstm_non_time_major([[VAL_0:%.*]]: tensor<8x8x8xf32>, [[VAL_1:%.*]]: tensor<8x10xf32>, [[VAL_2:%.*]]: tensor<8x10xf32>, [[VAL_3:%.*]]: tensor<8x40xf32>, [[VAL_4:%.*]]: tensor<10x40xf32>, [[VAL_5:%.*]]: tensor<40xf32>) -> (tensor<8x10xf32>, tensor<8x8x10xf32>, tensor<8x10xf32>, tensor<8x10xf32>, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: false", "tfshape$unknown_rank: false"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = false, tf.time_major = false} { // CHECK: [[VAL_6:%.*]] = constant dense<[1, 0]> : tensor<2xi32> // CHECK: [[VAL_7:%.*]] = "tf.Transpose"([[VAL_3]], [[VAL_6]]) : (tensor<8x40xf32>, tensor<2xi32>) -> tensor<40x8xf32> // CHECK: [[VAL_8:%.*]] = constant dense<[1, 0]> : tensor<2xi32> @@ -221,15 +221,15 @@ func @inference_standard_lstm_non_time_major(%arg0: tensor<8x8x8xf32>, %arg1: te // CHECK: [[VAL_17:%.*]] = "tf.Const"() {value = dense<0> : tensor} : () -> tensor // CHECK: [[VAL_18:%.*]]:4 = "tf.SplitV"([[VAL_5]], [[VAL_16]], [[VAL_17]]) : (tensor<40xf32>, tensor<4xi32>, tensor) -> (tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>) // CHECK: [[VAL_19:%.*]] = constant unit -// CHECK: [[VAL_20:%.*]] = "tfl.unidirectional_sequence_lstm"([[VAL_0]], [[VAL_12]]#0, [[VAL_12]]#1, [[VAL_12]]#2, [[VAL_12]]#3, [[VAL_15]]#0, [[VAL_15]]#1, [[VAL_15]]#2, [[VAL_15]]#3, [[VAL_19]], [[VAL_19]], [[VAL_19]], [[VAL_18]]#0, [[VAL_18]]#1, [[VAL_18]]#2, [[VAL_18]]#3, [[VAL_19]], [[VAL_19]], [[VAL_1]], [[VAL_2]], [[VAL_19]], [[VAL_19]], [[VAL_19]], [[VAL_19]]) {cell_clip = 1.000000e+01 : f32, fused_activation_function = "TANH", proj_clip = 0.000000e+00 : f32, time_major = false} : (tensor<8x8x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, none, none, none, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, none, none, tensor, tensor, none, none, none, none) -> tensor<8x8x10xf32> +// CHECK: [[VAL_20:%.*]] = "tfl.unidirectional_sequence_lstm"([[VAL_0]], [[VAL_12]]#0, [[VAL_12]]#1, [[VAL_12]]#2, [[VAL_12]]#3, [[VAL_15]]#0, [[VAL_15]]#1, [[VAL_15]]#2, [[VAL_15]]#3, [[VAL_19]], [[VAL_19]], [[VAL_19]], [[VAL_18]]#0, [[VAL_18]]#1, [[VAL_18]]#2, [[VAL_18]]#3, [[VAL_19]], [[VAL_19]], [[VAL_1]], [[VAL_2]], [[VAL_19]], [[VAL_19]], [[VAL_19]], [[VAL_19]]) {cell_clip = 1.000000e+01 : f32, fused_activation_function = "TANH", proj_clip = 0.000000e+00 : f32, time_major = false} : (tensor<8x8x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, none, none, none, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, none, none, tensor<8x10xf32>, tensor<8x10xf32>, none, none, none, none) -> tensor<8x8x10xf32> // CHECK: [[VAL_21:%.*]] = constant dense<[0, -1, 0]> : tensor<3xi32> // CHECK: [[VAL_22:%.*]] = constant dense<0> : tensor<3xi32> // CHECK: [[VAL_23:%.*]] = constant dense<1> : tensor<3xi32> // CHECK: [[VAL_24:%.*]] = "tf.StridedSlice"([[VAL_20]], [[VAL_21]], [[VAL_22]], [[VAL_23]]) {begin_mask = 5 : i64, ellipsis_mask = 0 : i64, end_mask = 5 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 2 : i64} : (tensor<8x8x10xf32>, tensor<3xi32>, tensor<3xi32>, tensor<3xi32>) -> tensor<8x10xf32> -// CHECK: [[VAL_25:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor -// CHECK: [[VAL_26:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor +// CHECK: [[VAL_25:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor<8x10xf32> +// CHECK: [[VAL_26:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor<8x10xf32> // CHECK: [[VAL_27:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor -// CHECK: return [[VAL_24]], [[VAL_20]], [[VAL_25]], [[VAL_26]], [[VAL_27]] : tensor<8x10xf32>, tensor<8x8x10xf32>, tensor, tensor, tensor +// CHECK: return [[VAL_24]], [[VAL_20]], [[VAL_25]], [[VAL_26]], [[VAL_27]] : tensor<8x10xf32>, tensor<8x8x10xf32>, tensor<8x10xf32>, tensor<8x10xf32>, tensor // CHECK: } } @@ -237,18 +237,18 @@ func @inference_standard_lstm_non_time_major(%arg0: tensor<8x8x8xf32>, %arg1: te // ----- module { -func @inference_standard_lstm_time_major_go_backwards(%arg0: tensor, %arg1: tensor, %arg2: tensor, %arg3: tensor<8x40xf32>, %arg4: tensor<10x40xf32>, %arg5: tensor<40xf32>) -> (tensor, tensor, tensor, tensor, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = true, tf.time_major = true} { +func @inference_standard_lstm_time_major_go_backwards(%arg0: tensor, %arg1: tensor<8x10xf32>, %arg2: tensor<8x10xf32>, %arg3: tensor<8x40xf32>, %arg4: tensor<10x40xf32>, %arg5: tensor<40xf32>) -> (tensor<8x10xf32>, tensor, tensor<8x10xf32>, tensor<8x10xf32>, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: false", "tfshape$unknown_rank: false"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = true, tf.time_major = true} { %0 = "tf.BatchMatMulV2"(%arg0, %arg3) {adj_x = false, adj_y = false} : (tensor, tensor<8x40xf32>) -> tensor %1 = "tf.Add"(%0, %arg5) : (tensor, tensor<40xf32>) -> tensor %2 = "tf.BatchMatMulV2"(%1, %arg4) {adj_x = false, adj_y = true} : (tensor, tensor<10x40xf32>) -> tensor - %3 = "tf.Add"(%2, %arg1) : (tensor, tensor) -> tensor - %4 = "tf.Add"(%2, %arg2) : (tensor, tensor) -> tensor - %5 = "tf.Add"(%arg1, %arg2) : (tensor, tensor) -> tensor + %3 = "tf.Add"(%2, %arg1) : (tensor, tensor<8x10xf32>) -> tensor + %4 = "tf.Add"(%2, %arg2) : (tensor, tensor<8x10xf32>) -> tensor + %5 = "tf.Add"(%arg1, %arg2) : (tensor<8x10xf32>, tensor<8x10xf32>) -> tensor<8x10xf32> %6 = "tf.Const"() {_output_shapes = ["tfshape$"], device = "/device:CPU:0", dtype = f32, value = dense<1.000000e+00> : tensor} : () -> tensor - return %5, %4, %5, %5, %6 : tensor, tensor, tensor, tensor, tensor + return %5, %4, %5, %5, %6 : tensor<8x10xf32>, tensor, tensor<8x10xf32>, tensor<8x10xf32>, tensor } -// CHECK: func @inference_standard_lstm_time_major_go_backwards([[VAL_0:%.*]]: tensor, [[VAL_1:%.*]]: tensor, [[VAL_2:%.*]]: tensor, [[VAL_3:%.*]]: tensor<8x40xf32>, [[VAL_4:%.*]]: tensor<10x40xf32>, [[VAL_5:%.*]]: tensor<40xf32>) -> (tensor<8x10xf32>, tensor, tensor, tensor, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = true, tf.time_major = true} { +// CHECK: func @inference_standard_lstm_time_major_go_backwards([[VAL_0:%.*]]: tensor, [[VAL_1:%.*]]: tensor<8x10xf32>, [[VAL_2:%.*]]: tensor<8x10xf32>, [[VAL_3:%.*]]: tensor<8x40xf32>, [[VAL_4:%.*]]: tensor<10x40xf32>, [[VAL_5:%.*]]: tensor<40xf32>) -> (tensor<8x10xf32>, tensor, tensor<8x10xf32>, tensor<8x10xf32>, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: false", "tfshape$unknown_rank: false"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = true, tf.time_major = true} { // CHECK: [[VAL_6:%.*]] = constant dense<0> : tensor<1xi32> // CHECK: [[VAL_7:%.*]] = "tf.ReverseV2"([[VAL_0]], [[VAL_6]]) : (tensor, tensor<1xi32>) -> tensor // CHECK: [[VAL_8:%.*]] = constant dense<[1, 0]> : tensor<2xi32> @@ -265,15 +265,15 @@ func @inference_standard_lstm_time_major_go_backwards(%arg0: tensor, // CHECK: [[VAL_19:%.*]] = "tf.Const"() {value = dense<0> : tensor} : () -> tensor // CHECK: [[VAL_20:%.*]]:4 = "tf.SplitV"([[VAL_5]], [[VAL_18]], [[VAL_19]]) : (tensor<40xf32>, tensor<4xi32>, tensor) -> (tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>) // CHECK: [[VAL_21:%.*]] = constant unit -// CHECK: [[VAL_22:%.*]] = "tfl.unidirectional_sequence_lstm"([[VAL_7]], [[VAL_14]]#0, [[VAL_14]]#1, [[VAL_14]]#2, [[VAL_14]]#3, [[VAL_17]]#0, [[VAL_17]]#1, [[VAL_17]]#2, [[VAL_17]]#3, [[VAL_21]], [[VAL_21]], [[VAL_21]], [[VAL_20]]#0, [[VAL_20]]#1, [[VAL_20]]#2, [[VAL_20]]#3, [[VAL_21]], [[VAL_21]], [[VAL_1]], [[VAL_2]], [[VAL_21]], [[VAL_21]], [[VAL_21]], [[VAL_21]]) {cell_clip = 1.000000e+01 : f32, fused_activation_function = "TANH", proj_clip = 0.000000e+00 : f32, time_major = true} : (tensor, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, none, none, none, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, none, none, tensor, tensor, none, none, none, none) -> tensor +// CHECK: [[VAL_22:%.*]] = "tfl.unidirectional_sequence_lstm"([[VAL_7]], [[VAL_14]]#0, [[VAL_14]]#1, [[VAL_14]]#2, [[VAL_14]]#3, [[VAL_17]]#0, [[VAL_17]]#1, [[VAL_17]]#2, [[VAL_17]]#3, [[VAL_21]], [[VAL_21]], [[VAL_21]], [[VAL_20]]#0, [[VAL_20]]#1, [[VAL_20]]#2, [[VAL_20]]#3, [[VAL_21]], [[VAL_21]], [[VAL_1]], [[VAL_2]], [[VAL_21]], [[VAL_21]], [[VAL_21]], [[VAL_21]]) {cell_clip = 1.000000e+01 : f32, fused_activation_function = "TANH", proj_clip = 0.000000e+00 : f32, time_major = true} : (tensor, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, none, none, none, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, none, none, tensor<8x10xf32>, tensor<8x10xf32>, none, none, none, none) -> tensor // CHECK: [[VAL_23:%.*]] = constant dense<[-1, 0, 0]> : tensor<3xi32> // CHECK: [[VAL_24:%.*]] = constant dense<0> : tensor<3xi32> // CHECK: [[VAL_25:%.*]] = constant dense<1> : tensor<3xi32> // CHECK: [[VAL_26:%.*]] = "tf.StridedSlice"([[VAL_22]], [[VAL_23]], [[VAL_24]], [[VAL_25]]) {begin_mask = 6 : i64, ellipsis_mask = 0 : i64, end_mask = 6 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<3xi32>, tensor<3xi32>, tensor<3xi32>) -> tensor<8x10xf32> -// CHECK: [[VAL_27:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor -// CHECK: [[VAL_28:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor +// CHECK: [[VAL_27:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor<8x10xf32> +// CHECK: [[VAL_28:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor<8x10xf32> // CHECK: [[VAL_29:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor -// CHECK: return [[VAL_26]], [[VAL_22]], [[VAL_27]], [[VAL_28]], [[VAL_29]] : tensor<8x10xf32>, tensor, tensor, tensor, tensor +// CHECK: return [[VAL_26]], [[VAL_22]], [[VAL_27]], [[VAL_28]], [[VAL_29]] : tensor<8x10xf32>, tensor, tensor<8x10xf32>, tensor<8x10xf32>, tensor // CHECK: } } @@ -281,18 +281,18 @@ func @inference_standard_lstm_time_major_go_backwards(%arg0: tensor, // ----- module { -func @inference_standard_lstm_non_time_major_go_backwards(%arg0: tensor<8x8x8xf32>, %arg1: tensor, %arg2: tensor, %arg3: tensor<8x40xf32>, %arg4: tensor<10x40xf32>, %arg5: tensor<40xf32>) -> (tensor, tensor<8x8x10xf32>, tensor, tensor, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = true, tf.time_major = false} { +func @inference_standard_lstm_non_time_major_go_backwards(%arg0: tensor<8x8x8xf32>, %arg1: tensor<8x10xf32>, %arg2: tensor<8x10xf32>, %arg3: tensor<8x40xf32>, %arg4: tensor<10x40xf32>, %arg5: tensor<40xf32>) -> (tensor<8x10xf32>, tensor<8x8x10xf32>, tensor<8x10xf32>, tensor<8x10xf32>, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: false", "tfshape$unknown_rank: false"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = true, tf.time_major = false} { %0 = "tf.BatchMatMulV2"(%arg0, %arg3) {adj_x = false, adj_y = false} : (tensor<8x8x8xf32>, tensor<8x40xf32>) -> tensor<8x8x40xf32> %1 = "tf.Add"(%0, %arg5) : (tensor<8x8x40xf32>, tensor<40xf32>) -> tensor<8x8x40xf32> %2 = "tf.BatchMatMulV2"(%1, %arg4) {adj_x = false, adj_y = true} : (tensor<8x8x40xf32>, tensor<10x40xf32>) -> tensor<8x8x10xf32> - %3 = "tf.Add"(%2, %arg1) : (tensor<8x8x10xf32>, tensor) -> tensor<8x8x10xf32> - %4 = "tf.Add"(%2, %arg2) : (tensor<8x8x10xf32>, tensor) -> tensor<8x8x10xf32> - %5 = "tf.Add"(%arg1, %arg2) : (tensor, tensor) -> tensor + %3 = "tf.Add"(%2, %arg1) : (tensor<8x8x10xf32>, tensor<8x10xf32>) -> tensor<8x8x10xf32> + %4 = "tf.Add"(%2, %arg2) : (tensor<8x8x10xf32>, tensor<8x10xf32>) -> tensor<8x8x10xf32> + %5 = "tf.Add"(%arg1, %arg2) : (tensor<8x10xf32>, tensor<8x10xf32>) -> tensor<8x10xf32> %6 = "tf.Const"() {_output_shapes = ["tfshape$"], device = "/device:CPU:0", dtype = f32, value = dense<1.000000e+00> : tensor} : () -> tensor - return %5, %4, %5, %5, %6 : tensor, tensor<8x8x10xf32>, tensor, tensor, tensor + return %5, %4, %5, %5, %6 : tensor<8x10xf32>, tensor<8x8x10xf32>, tensor<8x10xf32>, tensor<8x10xf32>, tensor } -// CHECK: func @inference_standard_lstm_non_time_major_go_backwards([[VAL_0:%.*]]: tensor<8x8x8xf32>, [[VAL_1:%.*]]: tensor, [[VAL_2:%.*]]: tensor, [[VAL_3:%.*]]: tensor<8x40xf32>, [[VAL_4:%.*]]: tensor<10x40xf32>, [[VAL_5:%.*]]: tensor<40xf32>) -> (tensor<8x10xf32>, tensor<8x8x10xf32>, tensor, tensor, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = true, tf.time_major = false} { +// CHECK: func @inference_standard_lstm_non_time_major_go_backwards([[VAL_0:%.*]]: tensor<8x8x8xf32>, [[VAL_1:%.*]]: tensor<8x10xf32>, [[VAL_2:%.*]]: tensor<8x10xf32>, [[VAL_3:%.*]]: tensor<8x40xf32>, [[VAL_4:%.*]]: tensor<10x40xf32>, [[VAL_5:%.*]]: tensor<40xf32>) -> (tensor<8x10xf32>, tensor<8x8x10xf32>, tensor<8x10xf32>, tensor<8x10xf32>, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: false", "tfshape$unknown_rank: false"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = true, tf.time_major = false} { // CHECK: [[VAL_6:%.*]] = constant dense<1> : tensor<1xi32> // CHECK: [[VAL_7:%.*]] = "tf.ReverseV2"([[VAL_0]], [[VAL_6]]) : (tensor<8x8x8xf32>, tensor<1xi32>) -> tensor<8x8x8xf32> // CHECK: [[VAL_8:%.*]] = constant dense<[1, 0]> : tensor<2xi32> @@ -309,15 +309,15 @@ func @inference_standard_lstm_non_time_major_go_backwards(%arg0: tensor<8x8x8xf3 // CHECK: [[VAL_19:%.*]] = "tf.Const"() {value = dense<0> : tensor} : () -> tensor // CHECK: [[VAL_20:%.*]]:4 = "tf.SplitV"([[VAL_5]], [[VAL_18]], [[VAL_19]]) : (tensor<40xf32>, tensor<4xi32>, tensor) -> (tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>) // CHECK: [[VAL_21:%.*]] = constant unit -// CHECK: [[VAL_22:%.*]] = "tfl.unidirectional_sequence_lstm"([[VAL_7]], [[VAL_14]]#0, [[VAL_14]]#1, [[VAL_14]]#2, [[VAL_14]]#3, [[VAL_17]]#0, [[VAL_17]]#1, [[VAL_17]]#2, [[VAL_17]]#3, [[VAL_21]], [[VAL_21]], [[VAL_21]], [[VAL_20]]#0, [[VAL_20]]#1, [[VAL_20]]#2, [[VAL_20]]#3, [[VAL_21]], [[VAL_21]], [[VAL_1]], [[VAL_2]], [[VAL_21]], [[VAL_21]], [[VAL_21]], [[VAL_21]]) {cell_clip = 1.000000e+01 : f32, fused_activation_function = "TANH", proj_clip = 0.000000e+00 : f32, time_major = false} : (tensor<8x8x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, none, none, none, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, none, none, tensor, tensor, none, none, none, none) -> tensor<8x8x10xf32> +// CHECK: [[VAL_22:%.*]] = "tfl.unidirectional_sequence_lstm"([[VAL_7]], [[VAL_14]]#0, [[VAL_14]]#1, [[VAL_14]]#2, [[VAL_14]]#3, [[VAL_17]]#0, [[VAL_17]]#1, [[VAL_17]]#2, [[VAL_17]]#3, [[VAL_21]], [[VAL_21]], [[VAL_21]], [[VAL_20]]#0, [[VAL_20]]#1, [[VAL_20]]#2, [[VAL_20]]#3, [[VAL_21]], [[VAL_21]], [[VAL_1]], [[VAL_2]], [[VAL_21]], [[VAL_21]], [[VAL_21]], [[VAL_21]]) {cell_clip = 1.000000e+01 : f32, fused_activation_function = "TANH", proj_clip = 0.000000e+00 : f32, time_major = false} : (tensor<8x8x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, none, none, none, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, none, none, tensor<8x10xf32>, tensor<8x10xf32>, none, none, none, none) -> tensor<8x8x10xf32> // CHECK: [[VAL_23:%.*]] = constant dense<[0, -1, 0]> : tensor<3xi32> // CHECK: [[VAL_24:%.*]] = constant dense<0> : tensor<3xi32> // CHECK: [[VAL_25:%.*]] = constant dense<1> : tensor<3xi32> // CHECK: [[VAL_26:%.*]] = "tf.StridedSlice"([[VAL_22]], [[VAL_23]], [[VAL_24]], [[VAL_25]]) {begin_mask = 5 : i64, ellipsis_mask = 0 : i64, end_mask = 5 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 2 : i64} : (tensor<8x8x10xf32>, tensor<3xi32>, tensor<3xi32>, tensor<3xi32>) -> tensor<8x10xf32> -// CHECK: [[VAL_27:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor -// CHECK: [[VAL_28:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor +// CHECK: [[VAL_27:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor<8x10xf32> +// CHECK: [[VAL_28:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor<8x10xf32> // CHECK: [[VAL_29:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor -// CHECK: return [[VAL_26]], [[VAL_22]], [[VAL_27]], [[VAL_28]], [[VAL_29]] : tensor<8x10xf32>, tensor<8x8x10xf32>, tensor, tensor, tensor +// CHECK: return [[VAL_26]], [[VAL_22]], [[VAL_27]], [[VAL_28]], [[VAL_29]] : tensor<8x10xf32>, tensor<8x8x10xf32>, tensor<8x10xf32>, tensor<8x10xf32>, tensor // CHECK: } } @@ -325,25 +325,25 @@ func @inference_standard_lstm_non_time_major_go_backwards(%arg0: tensor<8x8x8xf3 // ----- module { -func @inference_can_fuse(%arg0: tensor, %arg1: tensor, %arg2: tensor, %arg3: tensor<8x40xf32>, %arg4: tensor<10x40xf32>, %arg5: tensor<40xf32>) { +func @inference_can_fuse(%arg0: tensor, %arg1: tensor<8x10xf32>, %arg2: tensor<8x10xf32>, %arg3: tensor<8x40xf32>, %arg4: tensor<10x40xf32>, %arg5: tensor<40xf32>) { %0 = "tf.Const"() {_output_shapes = ["tfshape$"], device = "", dtype = f32, value = dense<0.000000e+00> : tensor} : () -> tensor - %1:5 = "tf.PartitionedCall"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5) {Tin = ["tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT"], Tout = ["tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT"], _output_shapes = ["tfshape$dim { size: 9 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 9 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$"], _read_only_resource_inputs = [], config = "", config_proto = "\0A\07\0A\03CPU\10\01\0A\07\0A\03GPU\10\002\02J\008\01", device = "", executor_type = "", f = @inference_standard_lstm_time_major_can_fuse} : (tensor, tensor, tensor, tensor<8x40xf32>, tensor<10x40xf32>, tensor<40xf32>) -> (tensor, tensor, tensor, tensor, tensor) + %1:5 = "tf.PartitionedCall"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5) {Tin = ["tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT"], Tout = ["tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT"], _output_shapes = ["tfshape$dim { size: 9 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 9 } dim { size: 10 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$"], _read_only_resource_inputs = [], config = "", config_proto = "\0A\07\0A\03CPU\10\01\0A\07\0A\03GPU\10\002\02J\008\01", device = "", executor_type = "", f = @inference_standard_lstm_time_major_can_fuse} : (tensor, tensor<8x10xf32>, tensor<8x10xf32>, tensor<8x40xf32>, tensor<10x40xf32>, tensor<40xf32>) -> (tensor<8x10xf32>, tensor, tensor<8x10xf32>, tensor<8x10xf32>, tensor) %2 = "tf.Add"(%0, %1#1) : (tensor, tensor) -> tensor return } -func @inference_standard_lstm_time_major_can_fuse(%arg0: tensor, %arg1: tensor, %arg2: tensor, %arg3: tensor<8x40xf32>, %arg4: tensor<10x40xf32>, %arg5: tensor<40xf32>) -> (tensor, tensor, tensor, tensor, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = false, tf.time_major = true} { +func @inference_standard_lstm_time_major_can_fuse(%arg0: tensor, %arg1: tensor<8x10xf32>, %arg2: tensor<8x10xf32>, %arg3: tensor<8x40xf32>, %arg4: tensor<10x40xf32>, %arg5: tensor<40xf32>) -> (tensor<8x10xf32>, tensor, tensor<8x10xf32>, tensor<8x10xf32>, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: false", "tfshape$unknown_rank: false"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = false, tf.time_major = true} { %0 = "tf.BatchMatMulV2"(%arg0, %arg3) {adj_x = false, adj_y = false} : (tensor, tensor<8x40xf32>) -> tensor %1 = "tf.Add"(%0, %arg5) : (tensor, tensor<40xf32>) -> tensor %2 = "tf.BatchMatMulV2"(%1, %arg4) {adj_x = false, adj_y = true} : (tensor, tensor<10x40xf32>) -> tensor - %3 = "tf.Add"(%2, %arg1) : (tensor, tensor) -> tensor - %4 = "tf.Add"(%2, %arg2) : (tensor, tensor) -> tensor - %5 = "tf.Add"(%arg1, %arg2) : (tensor, tensor) -> tensor + %3 = "tf.Add"(%2, %arg1) : (tensor, tensor<8x10xf32>) -> tensor + %4 = "tf.Add"(%2, %arg2) : (tensor, tensor<8x10xf32>) -> tensor + %5 = "tf.Add"(%arg1, %arg2) : (tensor<8x10xf32>, tensor<8x10xf32>) -> tensor<8x10xf32> %6 = "tf.Const"() {_output_shapes = ["tfshape$"], device = "/device:CPU:0", dtype = f32, value = dense<1.000000e+00> : tensor} : () -> tensor - return %5, %4, %5, %5, %6 : tensor, tensor, tensor, tensor, tensor + return %5, %4, %5, %5, %6 : tensor<8x10xf32>, tensor, tensor<8x10xf32>, tensor<8x10xf32>, tensor } -// CHECK: func @inference_standard_lstm_time_major_can_fuse([[VAL_0:%.*]]: tensor, [[VAL_1:%.*]]: tensor, [[VAL_2:%.*]]: tensor, [[VAL_3:%.*]]: tensor<8x40xf32>, [[VAL_4:%.*]]: tensor<10x40xf32>, [[VAL_5:%.*]]: tensor<40xf32>) -> (tensor<8x10xf32>, tensor, tensor, tensor, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = false, tf.time_major = true} { +// CHECK: func @inference_standard_lstm_time_major_can_fuse([[VAL_0:%.*]]: tensor, [[VAL_1:%.*]]: tensor<8x10xf32>, [[VAL_2:%.*]]: tensor<8x10xf32>, [[VAL_3:%.*]]: tensor<8x40xf32>, [[VAL_4:%.*]]: tensor<10x40xf32>, [[VAL_5:%.*]]: tensor<40xf32>) -> (tensor<8x10xf32>, tensor, tensor<8x10xf32>, tensor<8x10xf32>, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: false", "tfshape$unknown_rank: false"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = false, tf.time_major = true} { // CHECK: [[VAL_6:%.*]] = constant dense<[1, 0]> : tensor<2xi32> // CHECK: [[VAL_7:%.*]] = "tf.Transpose"([[VAL_3]], [[VAL_6]]) : (tensor<8x40xf32>, tensor<2xi32>) -> tensor<40x8xf32> // CHECK: [[VAL_8:%.*]] = constant dense<[1, 0]> : tensor<2xi32> @@ -358,15 +358,15 @@ func @inference_standard_lstm_time_major_can_fuse(%arg0: tensor, %arg // CHECK: [[VAL_17:%.*]] = "tf.Const"() {value = dense<0> : tensor} : () -> tensor // CHECK: [[VAL_18:%.*]]:4 = "tf.SplitV"([[VAL_5]], [[VAL_16]], [[VAL_17]]) : (tensor<40xf32>, tensor<4xi32>, tensor) -> (tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>) // CHECK: [[VAL_19:%.*]] = constant unit -// CHECK: [[VAL_20:%.*]] = "tfl.unidirectional_sequence_lstm"([[VAL_0]], [[VAL_12]]#0, [[VAL_12]]#1, [[VAL_12]]#2, [[VAL_12]]#3, [[VAL_15]]#0, [[VAL_15]]#1, [[VAL_15]]#2, [[VAL_15]]#3, [[VAL_19]], [[VAL_19]], [[VAL_19]], [[VAL_18]]#0, [[VAL_18]]#1, [[VAL_18]]#2, [[VAL_18]]#3, [[VAL_19]], [[VAL_19]], [[VAL_1]], [[VAL_2]], [[VAL_19]], [[VAL_19]], [[VAL_19]], [[VAL_19]]) {cell_clip = 1.000000e+01 : f32, fused_activation_function = "TANH", proj_clip = 0.000000e+00 : f32, time_major = true} : (tensor, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, none, none, none, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, none, none, tensor, tensor, none, none, none, none) -> tensor +// CHECK: [[VAL_20:%.*]] = "tfl.unidirectional_sequence_lstm"([[VAL_0]], [[VAL_12]]#0, [[VAL_12]]#1, [[VAL_12]]#2, [[VAL_12]]#3, [[VAL_15]]#0, [[VAL_15]]#1, [[VAL_15]]#2, [[VAL_15]]#3, [[VAL_19]], [[VAL_19]], [[VAL_19]], [[VAL_18]]#0, [[VAL_18]]#1, [[VAL_18]]#2, [[VAL_18]]#3, [[VAL_19]], [[VAL_19]], [[VAL_1]], [[VAL_2]], [[VAL_19]], [[VAL_19]], [[VAL_19]], [[VAL_19]]) {cell_clip = 1.000000e+01 : f32, fused_activation_function = "TANH", proj_clip = 0.000000e+00 : f32, time_major = true} : (tensor, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, none, none, none, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, none, none, tensor<8x10xf32>, tensor<8x10xf32>, none, none, none, none) -> tensor // CHECK: [[VAL_21:%.*]] = constant dense<[-1, 0, 0]> : tensor<3xi32> // CHECK: [[VAL_22:%.*]] = constant dense<0> : tensor<3xi32> // CHECK: [[VAL_23:%.*]] = constant dense<1> : tensor<3xi32> // CHECK: [[VAL_24:%.*]] = "tf.StridedSlice"([[VAL_20]], [[VAL_21]], [[VAL_22]], [[VAL_23]]) {begin_mask = 6 : i64, ellipsis_mask = 0 : i64, end_mask = 6 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<3xi32>, tensor<3xi32>, tensor<3xi32>) -> tensor<8x10xf32> -// CHECK: [[VAL_25:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor -// CHECK: [[VAL_26:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor +// CHECK: [[VAL_25:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor<8x10xf32> +// CHECK: [[VAL_26:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor<8x10xf32> // CHECK: [[VAL_27:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor -// CHECK: return [[VAL_24]], [[VAL_20]], [[VAL_25]], [[VAL_26]], [[VAL_27]] : tensor<8x10xf32>, tensor, tensor, tensor, tensor +// CHECK: return [[VAL_24]], [[VAL_20]], [[VAL_25]], [[VAL_26]], [[VAL_27]] : tensor<8x10xf32>, tensor, tensor<8x10xf32>, tensor<8x10xf32>, tensor // CHECK: } } @@ -374,26 +374,26 @@ func @inference_standard_lstm_time_major_can_fuse(%arg0: tensor, %arg // ----- module { -func @inference_can_fuse_last_output(%arg0: tensor, %arg1: tensor, %arg2: tensor, %arg3: tensor<8x40xf32>, %arg4: tensor<10x40xf32>, %arg5: tensor<40xf32>) { +func @inference_can_fuse_last_output(%arg0: tensor, %arg1: tensor<8x10xf32>, %arg2: tensor<8x10xf32>, %arg3: tensor<8x40xf32>, %arg4: tensor<10x40xf32>, %arg5: tensor<40xf32>) { %0 = "tf.Const"() {_output_shapes = ["tfshape$"], device = "", dtype = f32, value = dense<0.000000e+00> : tensor} : () -> tensor - %1:5 = "tf.PartitionedCall"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5) {Tin = ["tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT"], Tout = ["tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT"], _output_shapes = ["tfshape$dim { size: 9 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 9 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$"], _read_only_resource_inputs = [], config = "", config_proto = "\0A\07\0A\03CPU\10\01\0A\07\0A\03GPU\10\002\02J\008\01", device = "", executor_type = "", f = @inference_standard_lstm_time_major_can_fuse_last_output} : (tensor, tensor, tensor, tensor<8x40xf32>, tensor<10x40xf32>, tensor<40xf32>) -> (tensor<8x10xf32>, tensor, tensor, tensor, tensor) + %1:5 = "tf.PartitionedCall"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5) {Tin = ["tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT"], Tout = ["tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT", "tfdtype$DT_FLOAT"], _output_shapes = ["tfshape$dim { size: 9 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 9 } dim { size: 10 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$"], _read_only_resource_inputs = [], config = "", config_proto = "\0A\07\0A\03CPU\10\01\0A\07\0A\03GPU\10\002\02J\008\01", device = "", executor_type = "", f = @inference_standard_lstm_time_major_can_fuse_last_output} : (tensor, tensor<8x10xf32>, tensor<8x10xf32>, tensor<8x40xf32>, tensor<10x40xf32>, tensor<40xf32>) -> (tensor<8x10xf32>, tensor, tensor<8x10xf32>, tensor<8x10xf32>, tensor) %2 = "tf.Add"(%0, %1#0) : (tensor, tensor<8x10xf32>) -> tensor<8x10xf32> return } -func @inference_standard_lstm_time_major_can_fuse_last_output(%arg0: tensor, %arg1: tensor, %arg2: tensor, %arg3: tensor<8x40xf32>, %arg4: tensor<10x40xf32>, %arg5: tensor<40xf32>) -> (tensor<8x10xf32>, tensor, tensor, tensor, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = false, tf.time_major = true} { +func @inference_standard_lstm_time_major_can_fuse_last_output(%arg0: tensor, %arg1: tensor<8x10xf32>, %arg2: tensor<8x10xf32>, %arg3: tensor<8x40xf32>, %arg4: tensor<10x40xf32>, %arg5: tensor<40xf32>) -> (tensor<8x10xf32>, tensor, tensor<8x10xf32>, tensor<8x10xf32>, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: false", "tfshape$unknown_rank: false"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = false, tf.time_major = true} { %0 = "tf.BatchMatMulV2"(%arg0, %arg3) {adj_x = false, adj_y = false} : (tensor, tensor<8x40xf32>) -> tensor %1 = "tf.Add"(%0, %arg5) : (tensor, tensor<40xf32>) -> tensor %2 = "tf.BatchMatMulV2"(%1, %arg4) {adj_x = false, adj_y = true} : (tensor, tensor<10x40xf32>) -> tensor - %3 = "tf.Add"(%2, %arg1) : (tensor, tensor) -> tensor - %4 = "tf.Add"(%2, %arg2) : (tensor, tensor) -> tensor - %5 = "tf.Add"(%arg1, %arg2) : (tensor, tensor) -> tensor + %3 = "tf.Add"(%2, %arg1) : (tensor, tensor<8x10xf32>) -> tensor + %4 = "tf.Add"(%2, %arg2) : (tensor, tensor<8x10xf32>) -> tensor + %5 = "tf.Add"(%arg1, %arg2) : (tensor<8x10xf32>, tensor<8x10xf32>) -> tensor<8x10xf32> %6 = "tf.Const"() {_output_shapes = ["tfshape$"], device = "/device:CPU:0", dtype = f32, value = dense<1.000000e+00> : tensor} : () -> tensor - %7 = "tf.Add"(%arg1, %arg2) : (tensor, tensor) -> tensor<8x10xf32> - return %7, %4, %5, %5, %6 : tensor<8x10xf32>, tensor, tensor, tensor, tensor + %7 = "tf.Add"(%arg1, %arg2) : (tensor<8x10xf32>, tensor<8x10xf32>) -> tensor<8x10xf32> + return %7, %4, %5, %5, %6 : tensor<8x10xf32>, tensor, tensor<8x10xf32>, tensor<8x10xf32>, tensor } -// CHECK: func @inference_standard_lstm_time_major_can_fuse_last_output([[VAL_0:%.*]]: tensor, [[VAL_1:%.*]]: tensor, [[VAL_2:%.*]]: tensor, [[VAL_3:%.*]]: tensor<8x40xf32>, [[VAL_4:%.*]]: tensor<10x40xf32>, [[VAL_5:%.*]]: tensor<40xf32>) -> (tensor<8x10xf32>, tensor, tensor, tensor, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = false, tf.time_major = true} { +// CHECK: func @inference_standard_lstm_time_major_can_fuse_last_output([[VAL_0:%.*]]: tensor, [[VAL_1:%.*]]: tensor<8x10xf32>, [[VAL_2:%.*]]: tensor<8x10xf32>, [[VAL_3:%.*]]: tensor<8x40xf32>, [[VAL_4:%.*]]: tensor<10x40xf32>, [[VAL_5:%.*]]: tensor<40xf32>) -> (tensor<8x10xf32>, tensor, tensor<8x10xf32>, tensor<8x10xf32>, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$dim { size: 8 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: false", "tfshape$unknown_rank: false"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = false, tf.time_major = true} { // CHECK: [[VAL_6:%.*]] = constant dense<[1, 0]> : tensor<2xi32> // CHECK: [[VAL_7:%.*]] = "tf.Transpose"([[VAL_3]], [[VAL_6]]) : (tensor<8x40xf32>, tensor<2xi32>) -> tensor<40x8xf32> // CHECK: [[VAL_8:%.*]] = constant dense<[1, 0]> : tensor<2xi32> @@ -408,15 +408,15 @@ func @inference_standard_lstm_time_major_can_fuse_last_output(%arg0: tensor : tensor} : () -> tensor // CHECK: [[VAL_18:%.*]]:4 = "tf.SplitV"([[VAL_5]], [[VAL_16]], [[VAL_17]]) : (tensor<40xf32>, tensor<4xi32>, tensor) -> (tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>) // CHECK: [[VAL_19:%.*]] = constant unit -// CHECK: [[VAL_20:%.*]] = "tfl.unidirectional_sequence_lstm"([[VAL_0]], [[VAL_12]]#0, [[VAL_12]]#1, [[VAL_12]]#2, [[VAL_12]]#3, [[VAL_15]]#0, [[VAL_15]]#1, [[VAL_15]]#2, [[VAL_15]]#3, [[VAL_19]], [[VAL_19]], [[VAL_19]], [[VAL_18]]#0, [[VAL_18]]#1, [[VAL_18]]#2, [[VAL_18]]#3, [[VAL_19]], [[VAL_19]], [[VAL_1]], [[VAL_2]], [[VAL_19]], [[VAL_19]], [[VAL_19]], [[VAL_19]]) {cell_clip = 1.000000e+01 : f32, fused_activation_function = "TANH", proj_clip = 0.000000e+00 : f32, time_major = true} : (tensor, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, none, none, none, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, none, none, tensor, tensor, none, none, none, none) -> tensor +// CHECK: [[VAL_20:%.*]] = "tfl.unidirectional_sequence_lstm"([[VAL_0]], [[VAL_12]]#0, [[VAL_12]]#1, [[VAL_12]]#2, [[VAL_12]]#3, [[VAL_15]]#0, [[VAL_15]]#1, [[VAL_15]]#2, [[VAL_15]]#3, [[VAL_19]], [[VAL_19]], [[VAL_19]], [[VAL_18]]#0, [[VAL_18]]#1, [[VAL_18]]#2, [[VAL_18]]#3, [[VAL_19]], [[VAL_19]], [[VAL_1]], [[VAL_2]], [[VAL_19]], [[VAL_19]], [[VAL_19]], [[VAL_19]]) {cell_clip = 1.000000e+01 : f32, fused_activation_function = "TANH", proj_clip = 0.000000e+00 : f32, time_major = true} : (tensor, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x8xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, tensor<10x10xf32>, none, none, none, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, tensor<10xf32>, none, none, tensor<8x10xf32>, tensor<8x10xf32>, none, none, none, none) -> tensor // CHECK: [[VAL_21:%.*]] = constant dense<[-1, 0, 0]> : tensor<3xi32> // CHECK: [[VAL_22:%.*]] = constant dense<0> : tensor<3xi32> // CHECK: [[VAL_23:%.*]] = constant dense<1> : tensor<3xi32> // CHECK: [[VAL_24:%.*]] = "tf.StridedSlice"([[VAL_20]], [[VAL_21]], [[VAL_22]], [[VAL_23]]) {begin_mask = 6 : i64, ellipsis_mask = 0 : i64, end_mask = 6 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<3xi32>, tensor<3xi32>, tensor<3xi32>) -> tensor<8x10xf32> -// CHECK: [[VAL_25:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor -// CHECK: [[VAL_26:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor +// CHECK: [[VAL_25:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor<8x10xf32> +// CHECK: [[VAL_26:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor<8x10xf32> // CHECK: [[VAL_27:%.*]] = "tf.Const"() {value = dense<0.000000e+00> : tensor<1xf32>} : () -> tensor -// CHECK: return [[VAL_24]], [[VAL_20]], [[VAL_25]], [[VAL_26]], [[VAL_27]] : tensor<8x10xf32>, tensor, tensor, tensor, tensor +// CHECK: return [[VAL_24]], [[VAL_20]], [[VAL_25]], [[VAL_26]], [[VAL_27]] : tensor<8x10xf32>, tensor, tensor<8x10xf32>, tensor<8x10xf32>, tensor // CHECK: } } @@ -456,6 +456,32 @@ func @inference_standard_lstm_time_major_cannot_fuse(%arg0: tensor, % // ----- +module { +func @dynamic_shape_non_fuse_standard_lstm(%arg0: tensor, %arg1: tensor, %arg2: tensor, %arg3: tensor<8x40xf32>, %arg4: tensor<10x40xf32>, %arg5: tensor<40xf32>) -> (tensor, tensor, tensor, tensor, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = false, tf.time_major = true} { + %0 = "tf.BatchMatMulV2"(%arg0, %arg3) {adj_x = false, adj_y = false} : (tensor, tensor<8x40xf32>) -> tensor + %1 = "tf.Add"(%0, %arg5) : (tensor, tensor<40xf32>) -> tensor + %2 = "tf.BatchMatMulV2"(%1, %arg4) {adj_x = false, adj_y = true} : (tensor, tensor<10x40xf32>) -> tensor + %3 = "tf.Add"(%2, %arg1) : (tensor, tensor) -> tensor + %4 = "tf.Add"(%2, %arg2) : (tensor, tensor) -> tensor + %5 = "tf.Add"(%arg1, %arg2) : (tensor, tensor) -> tensor + %6 = "tf.Const"() {_output_shapes = ["tfshape$"], device = "/device:CPU:0", dtype = f32, value = dense<1.000000e+00> : tensor} : () -> tensor + return %5, %4, %5, %5, %6 : tensor, tensor, tensor, tensor, tensor +} + +// CHECK: func @dynamic_shape_non_fuse_standard_lstm(%[[VAL_0:.*]]: tensor, %[[VAL_1:.*]]: tensor, %[[VAL_2:.*]]: tensor, %[[VAL_3:.*]]: tensor<8x40xf32>, %[[VAL_4:.*]]: tensor<10x40xf32>, %[[VAL_5:.*]]: tensor<40xf32>) -> (tensor, tensor, tensor, tensor, tensor) attributes {tf._input_shapes = ["tfshape$dim { size: -1 } dim { size: 8 } dim { size: 8 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$dim { size: -1 } dim { size: 10 }", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true", "tfshape$unknown_rank: true"], tf.api_implements = "lstm_b4e9f0e7-ac55-42bc-8ef2-8496419a608c", tf.api_preferred_device = "CPU", tf.go_backwards = false, tf.time_major = true} { +// CHECK: %[[VAL_6:.*]] = "tf.BatchMatMulV2"(%[[VAL_0]], %[[VAL_3]]) {adj_x = false, adj_y = false} : (tensor, tensor<8x40xf32>) -> tensor +// CHECK: %[[VAL_7:.*]] = "tf.Add"(%[[VAL_6]], %[[VAL_5]]) : (tensor, tensor<40xf32>) -> tensor +// CHECK: %[[VAL_8:.*]] = "tf.BatchMatMulV2"(%[[VAL_7]], %[[VAL_4]]) {adj_x = false, adj_y = true} : (tensor, tensor<10x40xf32>) -> tensor +// CHECK: %[[VAL_9:.*]] = "tf.Add"(%[[VAL_8]], %[[VAL_1]]) : (tensor, tensor) -> tensor +// CHECK: %[[VAL_10:.*]] = "tf.Add"(%[[VAL_8]], %[[VAL_2]]) : (tensor, tensor) -> tensor +// CHECK: %[[VAL_11:.*]] = "tf.Add"(%[[VAL_1]], %[[VAL_2]]) : (tensor, tensor) -> tensor +// CHECK: %[[VAL_12:.*]] = "tf.Const"() {_output_shapes = ["tfshape$"], device = "/device:CPU:0", dtype = f32, value = dense<1.000000e+00> : tensor} : () -> tensor +// CHECK: return %[[VAL_11]], %[[VAL_10]], %[[VAL_11]], %[[VAL_11]], %[[VAL_12]] : tensor, tensor, tensor, tensor, tensor +// CHECK: } +} + +// ----- + module { func @nms_padded(%arg0: tensor<100x4xf32>, %arg1: tensor<100xf32>, %arg2: tensor, %arg3: tensor, %arg4: tensor, %arg5: tensor, %arg6: tensor, %arg7: tensor, %arg8: tensor) -> (tensor<1x10xi32>, tensor) attributes {tf._implements = "non_max_suppression_padded_v2", tf._reference = "mlir"} { %0 = "tf.Const"() {value = dense<1> : tensor<1x10xi32>} : () -> tensor<1x10xi32> diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc index f7923847835..4f5c87a8a4c 100644 --- a/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc @@ -258,6 +258,15 @@ LogicalResult CheckFusableKerasLstm(FuncOp lstm_func, ModuleOp module) { if (result.wasInterrupted()) return failure(); } + // We should know the batch size in advance for the lstm fusion. + // A good indicator of batch size is both cell state and input state have + // fixed shape. (indices 1 & 2). + for (int i = 1; i < 3; ++i) { + auto input = lstm_func.getArgument(i); + auto input_type = input.getType().dyn_cast_or_null(); + if (!input_type || !input_type.hasStaticShape()) return failure(); + } + return success(); } From 040baca5460f63dd57a757bef55a79866e6f4565 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 02:02:12 -0700 Subject: [PATCH 1366/2522] compat: Update forward compatibility horizon to 2020-07-27 PiperOrigin-RevId: 323320822 Change-Id: Ieaaad51b40db7fef2cabab4bd15779527e772fa3 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 9ca436a4c18..8cb19c601cf 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 26) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 27) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From e113bcbd5a8e344b5382554fb0ff67515d247ffc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 02:02:12 -0700 Subject: [PATCH 1367/2522] Update GraphDef version to 475. PiperOrigin-RevId: 323320826 Change-Id: I3d220330389f03da6eb11b0cd8facdaedbd62d67 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 9e7abdbf45a..1382ab7bbe9 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 474 // Updated: 2020/7/26 +#define TF_GRAPH_DEF_VERSION 475 // Updated: 2020/7/27 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 8c6f2d55762f3fc94f98fdd8b3c5d59ee1276dba Mon Sep 17 00:00:00 2001 From: Chao Mei Date: Mon, 27 Jul 2020 02:32:03 -0700 Subject: [PATCH 1368/2522] Create a short description for the external delegate. PiperOrigin-RevId: 323323861 Change-Id: I53248ba0444aea395541c225e533b2dddd828089 --- tensorflow/lite/delegates/external/README.md | 33 ++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 tensorflow/lite/delegates/external/README.md diff --git a/tensorflow/lite/delegates/external/README.md b/tensorflow/lite/delegates/external/README.md new file mode 100644 index 00000000000..d110dede5b7 --- /dev/null +++ b/tensorflow/lite/delegates/external/README.md @@ -0,0 +1,33 @@ +# What is an External Delegate? +An external delegate is a special Tensorflow Lite delegate that is simply +initialized from loading a dynamic library which encapsulates an actual +Tensorflow Lite delegate implementation. The actual delegate exposes the +following two creation and deletion C APIs: + +* __tflite_plugin_create_delegate__ (declaration seen below) creates a delegate +object based on provided key-value options. It may return NULL to indicate an +error with the detailed information reported by calling `report_error` if +provided. Each option key and value should be null-terminated. + +``` +TfLiteDelegate* tflite_plugin_create_delegate( + char** options_keys, char** options_values, size_t num_options, + void (*report_error)(const char *)) +``` + +* __tflite_plugin_destroy_delegate__ (declaration seen below) destroys the +delegate object that is created by the previous API. NULL as an argument value +is allowed. + +``` +void tflite_plugin_destroy_delegate(TfLiteDelegate* delegate) +``` + +The external delegate provides an opague and transparent way to utilize a +Tensorflow Lite delegate when performing inference. In other words, one may +replace the actual Tensorflow Lite delegate by simply updating the dynamic +library without changing the application code. We developed this mainly for +delegate evaluation. + +Note, this delegate is the corresponding C++ implementation to the one for +Tensorflow Lite Python binding as shown [here](https://github.com/tensorflow/tensorflow/blob/7145fc0e49be01ef6943f4df386ce38567e37797/tensorflow/lite/python/interpreter.py#L42). From 7c0ed7acaab059246a58ea063efc610bdeeca7ce Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Mon, 27 Jul 2020 03:43:12 -0700 Subject: [PATCH 1369/2522] Only instantiate abs kernel templates if needed. When MLIR generated GPU kernels are used, there is no need to instantiate the GPU kernels based on Eigen. This would just lead to unused code being linked in. PiperOrigin-RevId: 323332367 Change-Id: Id504c6db307d4729f470bf18e508833537848fb9 --- tensorflow/core/kernels/cwise_op_gpu_abs.cu.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/core/kernels/cwise_op_gpu_abs.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_abs.cu.cc index f5b4f014c2c..ee14f2950d6 100644 --- a/tensorflow/core/kernels/cwise_op_gpu_abs.cu.cc +++ b/tensorflow/core/kernels/cwise_op_gpu_abs.cu.cc @@ -19,7 +19,11 @@ limitations under the License. namespace tensorflow { namespace functor { +#ifdef MLIR_GENERATED_GPU_KERNELS_ENABLED +DEFINE_UNARY2(abs, complex64, complex128); +#else DEFINE_UNARY6(abs, Eigen::half, float, double, int64, complex64, complex128); +#endif } // namespace functor } // namespace tensorflow From 8fb96913762119ba4e1f1321c1e712adb9aab75f Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Mon, 27 Jul 2020 06:13:00 -0700 Subject: [PATCH 1370/2522] [MLIR][KERNEL_GEN] Add TFFramework MLIR dialect. PiperOrigin-RevId: 323347540 Change-Id: I247e2b4eaa7f072dd03ba2462ec981dc9d31fd1a --- tensorflow/compiler/mlir/runlit.cfg.py | 2 +- tensorflow/compiler/mlir/runlit.site.cfg.py | 1 + .../compiler/mlir/tools/kernel_gen/BUILD | 12 ++ .../compiler/mlir/tools/kernel_gen/ir/BUILD | 55 +++++++ .../kernel_gen/ir/dialect_registration.cc | 21 +++ .../tools/kernel_gen/ir/tf_framework_ops.cc | 87 ++++++++++ .../tools/kernel_gen/ir/tf_framework_ops.h | 70 ++++++++ .../tools/kernel_gen/ir/tf_framework_ops.td | 151 ++++++++++++++++++ .../mlir/tools/kernel_gen/tests/BUILD | 19 +++ .../mlir/tools/kernel_gen/tests/invalid.mlir | 15 ++ .../mlir/tools/kernel_gen/tests/ops.mlir | 21 +++ 11 files changed, 453 insertions(+), 1 deletion(-) create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/ir/BUILD create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/ir/dialect_registration.cc create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.td create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/tests/BUILD create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/tests/invalid.mlir create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/tests/ops.mlir diff --git a/tensorflow/compiler/mlir/runlit.cfg.py b/tensorflow/compiler/mlir/runlit.cfg.py index 718b6c3045a..29536788679 100644 --- a/tensorflow/compiler/mlir/runlit.cfg.py +++ b/tensorflow/compiler/mlir/runlit.cfg.py @@ -74,7 +74,7 @@ tool_names = [ 'tf_tfjs_translate', 'flatbuffer_to_string', 'flatbuffer_translate', 'tf-mlir-translate', 'mlir-tflite-runner', 'tfcompile', 'json_to_flatbuffer', 'xla-gpu-opt', 'xla-opt', 'hlo_to_llvm_ir', - 'xla-thunks-opt' + 'kernel-gen-opt' ] tools = [ToolSubst(s, unresolved='ignore') for s in tool_names] llvm_config.add_tool_substitutions(tools, tool_dirs) diff --git a/tensorflow/compiler/mlir/runlit.site.cfg.py b/tensorflow/compiler/mlir/runlit.site.cfg.py index 82175d7f680..b4d3e6185a6 100644 --- a/tensorflow/compiler/mlir/runlit.site.cfg.py +++ b/tensorflow/compiler/mlir/runlit.site.cfg.py @@ -47,6 +47,7 @@ mlir_tf_tools_dirs = [ 'tensorflow/compiler/mlir/tensorflow', 'tensorflow/compiler/mlir/tfjs', 'tensorflow/compiler/mlir/xla', + 'tensorflow/compiler/mlir/tools/kernel_gen', 'tensorflow/compiler/aot', 'tensorflow/compiler/xla/service/mlir_gpu', 'tensorflow/compiler/xla/service/gpu/tests', diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD index b5735f823e4..de5926301dd 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/BUILD +++ b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD @@ -50,3 +50,15 @@ tf_cc_binary( "@llvm-project//llvm:Support", ], ) + +tf_cc_binary( + name = "kernel-gen-opt", + visibility = ["//tensorflow/compiler/mlir/tools/kernel_gen/tests:__pkg__"], + deps = [ + "//tensorflow/compiler/mlir/tensorflow:tensorflow_dialect_registration", + "//tensorflow/compiler/mlir/tools/kernel_gen/ir:tf_framework_dialect_registration", + "@llvm-project//mlir:AllPassesAndDialects", + "@llvm-project//mlir:MlirOptLib", + "@llvm-project//mlir:MlirOptMain", + ], +) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/ir/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/ir/BUILD new file mode 100644 index 00000000000..11fe9159e6d --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/ir/BUILD @@ -0,0 +1,55 @@ +load("//third_party/mlir:tblgen.bzl", "gentbl") + +package( + default_visibility = [":friends"], + licenses = ["notice"], # Apache 2.0 +) + +package_group( + name = "friends", + includes = ["//third_party/mlir:subpackages"], + packages = ["//tensorflow/compiler/mlir/..."], +) + +gentbl( + name = "tf_framework_ops_inc_gen", + tbl_outs = [ + ("-gen-op-decls", "tf_framework_ops.h.inc"), + ("-gen-op-defs", "tf_framework_ops.cc.inc"), + ("-gen-struct-attr-decls", "tf_framework_structs.h.inc"), + ("-gen-struct-attr-defs", "tf_framework_structs.cc.inc"), + ("-gen-dialect-decls", "tf_framework_dialect.h.inc"), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "tf_framework_ops.td", + td_srcs = [ + "tf_framework_ops.td", + "@llvm-project//mlir:OpBaseTdFiles", + "@llvm-project//mlir:include/mlir/Interfaces/SideEffectInterfaces.td", + ], +) + +cc_library( + name = "tf_framework_ops", + srcs = [ + "tf_framework_ops.cc", + "tf_framework_ops.cc.inc", + "tf_framework_ops.h.inc", + ], + hdrs = ["tf_framework_ops.h"], + deps = [ + ":tf_framework_ops_inc_gen", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:SideEffects", + ], +) + +cc_library( + name = "tf_framework_dialect_registration", + srcs = ["dialect_registration.cc"], + deps = [ + ":tf_framework_ops", + "@llvm-project//mlir:IR", + ], + alwayslink = 1, +) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/ir/dialect_registration.cc b/tensorflow/compiler/mlir/tools/kernel_gen/ir/dialect_registration.cc new file mode 100644 index 00000000000..a2e5955b570 --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/ir/dialect_registration.cc @@ -0,0 +1,21 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h" + +// Static initialization for TF Framework dialect registration. +static mlir::DialectRegistration< + mlir::kernel_gen::tf_framework::TFFrameworkDialect> + tf_framework_ops; diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc new file mode 100644 index 00000000000..9a8a6ea006b --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc @@ -0,0 +1,87 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This file defines the operations used in the tf_framework dialect. + +#include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h" + +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/DialectImplementation.h" // from @llvm-project + +namespace mlir { +#include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_structs.cc.inc" + +namespace kernel_gen { +namespace tf_framework { + +TFFrameworkDialect::TFFrameworkDialect(MLIRContext *context) + : Dialect(getDialectNamespace(), context) { + addOperations< +#define GET_OP_LIST +#include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc.inc" + >(); + addTypes(); +} + +/// Parse a type registered to this dialect. +Type TFFrameworkDialect::parseType(DialectAsmParser &parser) const { + StringRef keyword; + if (parser.parseKeyword(&keyword)) return Type(); + + if (keyword == "op_kernel_context") { + return OpKernelContextType::get(getContext()); + } + + parser.emitError(parser.getNameLoc(), "unknown TF Framework type: ") + << keyword; + return Type(); +} + +/// Print a type registered to this dialect. +void TFFrameworkDialect::printType(Type type, DialectAsmPrinter &os) const { + switch (type.getKind()) { + case TFFrameworkTypes::OpKernelContextType: + os << "op_kernel_context"; + return; + default: + llvm_unreachable("unexpected TF Framework type kind"); + } +} + +//===----------------------------------------------------------------------===// +// AllocLikeOp +//===----------------------------------------------------------------------===// +template +static LogicalResult Verify(AllocLikeOp op) { + static_assert(llvm::is_one_of::value, + "applies to only alloc_output or alloc_temp"); + // Check that the total number of operands matches the number of dynamic + // dimensions specified in the memref type. + unsigned result_dyn_dims = op.getType().getNumDynamicDims(); + unsigned dyn_sizes_count = op.dyn_sizes().size(); + if (dyn_sizes_count != result_dyn_dims) + return op.emitOpError() + << "`dyn_sizes` count " << dyn_sizes_count + << " does not match dynamic dimensions count in the result type" + << op.getType(); + return success(); +} + +#define GET_OP_CLASSES +#include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc.inc" + +} // namespace tf_framework +} // namespace kernel_gen +} // namespace mlir diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h new file mode 100644 index 00000000000..6fde701d4c0 --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h @@ -0,0 +1,70 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This file defines the operations used in the TFFramework dialect. +// +#ifndef TENSORFLOW_COMPILER_MLIR_TOOLS_KERNEL_GEN_IR_TF_FRAMEWORK_OPS_H_ +#define TENSORFLOW_COMPILER_MLIR_TOOLS_KERNEL_GEN_IR_TF_FRAMEWORK_OPS_H_ + +#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/Dialect.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/OpDefinition.h" // from @llvm-project +#include "mlir/IR/OpImplementation.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "mlir/Interfaces/SideEffectInterfaces.h" // from @llvm-project + +namespace mlir { + +#include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_structs.h.inc" + +namespace kernel_gen { +namespace tf_framework { + +namespace TFFrameworkTypes { +enum Kind { + // TODO(pifon): Replace enum value with + // OpKernelContextType = Type::FIRST_TF_FRAMEWORK_TYPE, + // after DialectSymbolRegistry.def is updated. + OpKernelContextType = Type::FIRST_PRIVATE_EXPERIMENTAL_0_TYPE, +}; +} // namespace TFFrameworkTypes + +/// OpKernelContextType corresponds to C++ class OpKernelContext defined in +/// tensorflow/core/framework/op_kernel.h +class OpKernelContextType + : public Type::TypeBase { + public: + using Base::Base; + + static OpKernelContextType get(MLIRContext *context) { + return Base::get(context, TFFrameworkTypes::Kind::OpKernelContextType); + } + + /// Support method to enable LLVM-style type casting. + static bool kindof(unsigned kind) { + return kind == TFFrameworkTypes::Kind::OpKernelContextType; + } +}; + +#define GET_OP_CLASSES +#include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_dialect.h.inc" +#include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h.inc" + +} // namespace tf_framework +} // namespace kernel_gen +} // namespace mlir + +#endif // TENSORFLOW_COMPILER_MLIR_TOOLS_KERNEL_GEN_IR_TF_FRAMEWORK_OPS_H_ diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.td b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.td new file mode 100644 index 00000000000..e5488346008 --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.td @@ -0,0 +1,151 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This is the operation definition file for TF Framework ops. + +#ifndef TF_FRAMEWORK_OPS +#define TF_FRAMEWORK_OPS + +include "mlir/IR/OpBase.td" +include "mlir/Interfaces/SideEffectInterfaces.td" + +def TFFramework_Dialect : Dialect { + let name = "tf_framework"; + + let summary = "Types and operations for tf_framework dialect"; + let description = [{ + This dialect contains operations and types for that correspond to + TensorFlow C++ Framework. + }]; + let cppNamespace = "kernel_gen::tf_framework"; +} + +def TFFramework_OpKernelContextType : DialectType()">, + "op_kernel_construction">, + BuildableType<"$_builder.getType<::mlir::kernel_gen::tf_framework::OpKernelContextType>()"> { + let typeDescription = [{ + OpKernelContextType corresponds to C++ class OpKernelContext defined in + tensorflow/core/framework/op_kernel.h + }]; +} + +def AllocatorAttributes : StructAttr<"AllocatorAttributes", + TFFramework_Dialect, [ + StructFieldAttr<"on_host", BoolAttr>, + StructFieldAttr<"nic_compatible", BoolAttr>, + StructFieldAttr<"gpu_compatible", BoolAttr>]> { + let description = "Equivalent to `tensorflow::AllocatorAttributes` in C++"; +} + +def AllocationAttributes : StructAttr<"AllocationAttributes", + TFFramework_Dialect, [ + StructFieldAttr<"no_retry_on_failure", + DefaultValuedAttr>, + StructFieldAttr<"allocation_will_be_logged", + DefaultValuedAttr>]> { + let description = "Equivalent to `tensorflow::AllocationAttributes` in C++"; +} + + +// Base class for TF Framework dialect ops. +class TFFramework_Op traits = []> : + Op { + let verifier = "return Verify(*this);"; +} + + +// Base class for TF Framework alloc ops. +class TFFramework_AllocLikeOp traits = []> : + TFFramework_Op]>], traits)> { + + let arguments = (ins TFFramework_OpKernelContextType:$op_kernel_ctx, + Variadic:$dyn_sizes, + OptionalAttr:$allocator_attrs, + OptionalAttr:$allocation_attrs); + let results = (outs Res]>:$result); + + let builders = [ + OpBuilder<[{ + OpBuilder &builder, OperationState &result, MemRefType memref_type, + Value op_kernel_ctx, + AllocatorAttributes allocator_attrs = AllocatorAttributes(), + AllocationAttributes allocation_attrs = AllocationAttributes() + }], [{ + result.addOperands(op_kernel_ctx); + result.types.push_back(memref_type); + if (allocator_attrs) + result.addAttribute("allocator_attrs", allocator_attrs); + if (allocation_attrs) + result.addAttribute("allocation_attrs", allocation_attrs); + }]>, + + OpBuilder<[{ + OpBuilder &builder, OperationState &result, MemRefType memref_type, + Value op_kernel_ctx, ValueRange dyn_sizes, + AllocatorAttributes allocator_attrs = AllocatorAttributes(), + AllocationAttributes allocation_attrs = AllocationAttributes() + }], [{ + build(builder, result, memref_type, op_kernel_ctx, allocator_attrs, + allocation_attrs); + result.addOperands(dyn_sizes); + }]>]; + + let extraClassDeclaration = [{ + MemRefType getType() { return getResult().getType().cast(); } + }]; + let assemblyFormat = [{ + `(` $op_kernel_ctx (`,` $dyn_sizes^ )? `)` attr-dict `:` type($result) + }]; +} + +//===----------------------------------------------------------------------===// +// AllocOutputOp +//===----------------------------------------------------------------------===// + +def TFFramework_AllocOutputOp + : TFFramework_AllocLikeOp<"alloc_output", DefaultResource> { + let summary = "allocation of tensorsmemory allocation operation"; + let description = [{ + Allocation of output tensors during kernel execution in the Compute method. + + This should be used to allocate any tensor that is going to be used as an + output from the kernel at the end of the current execution. + + Defined in third_party/tensorflow/core/framework/op_kernel.cc. + }]; +} + +//===----------------------------------------------------------------------===// +// AllocTempOp +//===----------------------------------------------------------------------===// + +def TFFramework_AllocTempOp + : TFFramework_AllocLikeOp<"alloc_temp", DefaultResource> { + let summary = "memory allocation operation"; + let description = [{ + Allocation of temp tensors during kernel execution in the Compute method. + + This should be used to allocate any scratch storage that is needed while + the kernel is executing, and will not be retained. + + Defined in third_party/tensorflow/core/framework/op_kernel.cc. + }]; +} + +#endif // TF_FRAMEWORK_OPS diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/tests/BUILD new file mode 100644 index 00000000000..db878df991b --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tests/BUILD @@ -0,0 +1,19 @@ +load("//tensorflow/compiler/mlir:glob_lit_test.bzl", "glob_lit_tests") + +package(licenses = ["notice"]) + +glob_lit_tests( + data = [":test_utilities"], + driver = "@llvm-project//mlir:run_lit.sh", + test_file_exts = ["mlir"], +) + +# Bundle together all of the test utilities that are used by tests. +filegroup( + name = "test_utilities", + testonly = True, + data = [ + "//tensorflow/compiler/mlir/tools/kernel_gen:kernel-gen-opt", + "@llvm-project//llvm:FileCheck", + ], +) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/invalid.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/invalid.mlir new file mode 100644 index 00000000000..e8983b701c8 --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tests/invalid.mlir @@ -0,0 +1,15 @@ +// RUN: kernel-gen-opt %s -split-input-file -verify-diagnostics + +func @alloc_output(%ctx: !tf_framework.op_kernel_context, %size : index) { + // expected-error @+1 {{`dyn_sizes` count 1 does not match dynamic dimensions}} + %buf = tf_framework.alloc_output(%ctx, %size) : memref + return +} + +// ----- + +func @alloc_temp(%ctx: !tf_framework.op_kernel_context, %size : index) { + // expected-error @+1 {{`dyn_sizes` count 1 does not match dynamic dimensions}} + %buf = tf_framework.alloc_temp(%ctx, %size) : memref<10xi8> + return +} diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/ops.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/ops.mlir new file mode 100644 index 00000000000..d2a4a8866ce --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tests/ops.mlir @@ -0,0 +1,21 @@ +// RUN: kernel-gen-opt %s | FileCheck %s +// Verify the printed output can be parsed. +// RUN: kernel-gen-opt %s | kernel-gen-opt -allow-unregistered-dialect | FileCheck %s +// Verify the generic form can be parsed. +// RUN: kernel-gen-opt -mlir-print-op-generic %s | kernel-gen-opt -allow-unregistered-dialect | FileCheck %s + +// CHECK-LABEL: func @alloc_output +func @alloc_output(%ctx: !tf_framework.op_kernel_context, + %size_0 : index , %size_2 : index) { + %buf_0 = tf_framework.alloc_output(%ctx) : memref<10xi8> + %buf_1 = tf_framework.alloc_output(%ctx, %size_0, %size_2) : memref + return +} + +// CHECK-LABEL: func @alloc_temp +func @alloc_temp(%ctx: !tf_framework.op_kernel_context, + %size_0 : index , %size_2 : index) { + %buf_0 = tf_framework.alloc_temp(%ctx) : memref<10xi8> + %buf_1 = tf_framework.alloc_temp(%ctx, %size_0, %size_2) : memref + return +} From 0b175ea29563da11dcfed3c70fbb93beff36f118 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Mon, 27 Jul 2020 06:43:24 -0700 Subject: [PATCH 1371/2522] Cleanup, store quant data, and port the mul kernel to the new TfLiteEvalTensor API. The current implemenation duped some fields in OpData and calculated every call to Eval(). This change cleans up and stores this data one time and uses the new TfLiteEvalTensor API. PiperOrigin-RevId: 323350992 Change-Id: Icc3892a8a6cee33110111e5a3bd253e7656e6595 --- tensorflow/lite/micro/kernels/BUILD | 1 + tensorflow/lite/micro/kernels/mul.cc | 188 ++++++++++++++-------- tensorflow/lite/micro/kernels/mul_test.cc | 72 ++------- 3 files changed, 131 insertions(+), 130 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index cc719feb4bf..717ab188f2c 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -304,6 +304,7 @@ tflite_micro_cc_test( "mul_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:op_resolvers", "//tensorflow/lite/micro/testing:micro_test", diff --git a/tensorflow/lite/micro/kernels/mul.cc b/tensorflow/lite/micro/kernels/mul.cc index 951ae9c99ee..36e41a36456 100644 --- a/tensorflow/lite/micro/kernels/mul.cc +++ b/tensorflow/lite/micro/kernels/mul.cc @@ -21,23 +21,31 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/micro/memory_helpers.h" namespace tflite { namespace ops { namespace micro { namespace mul { +namespace { constexpr int kInput1Tensor = 0; constexpr int kInput2Tensor = 1; constexpr int kOutputTensor = 0; struct OpData { + int32_t input1_zero_point; + int32_t input2_zero_point; + int32_t output_activation_min; int32_t output_activation_max; - + int32_t output_zero_point; int32_t output_multiplier; int output_shift; + + float output_activation_min_f32; + float output_activation_max_f32; }; TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, @@ -61,105 +69,143 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, static_cast(output->params.scale); QuantizeMultiplier(real_multiplier, &data->output_multiplier, &data->output_shift); + + data->input1_zero_point = input1->params.zero_point; + data->input2_zero_point = input2->params.zero_point; + data->output_zero_point = output->params.zero_point; + } else { + CalculateActivationRange(params->activation, + &data->output_activation_min_f32, + &data->output_activation_max_f32); } return kTfLiteOk; } -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor); - const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); +} // namespace - if (output->dims->size == 0) { - return AllocateOutputDimensionsFromInput(context, input1, input2, output); - } +void EvalQuantized(TfLiteContext* context, TfLiteNode* node, const OpData* data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { + tflite::ArithmeticParams op_params = {}; + op_params.quantized_activation_min = data->output_activation_min; + op_params.quantized_activation_max = data->output_activation_max; + op_params.float_activation_max = data->output_activation_max_f32; + op_params.input1_offset = -data->input1_zero_point; + op_params.input2_offset = -data->input2_zero_point; + op_params.output_offset = data->output_zero_point; + op_params.output_multiplier = data->output_multiplier; + op_params.output_shift = data->output_shift; - return kTfLiteOk; -} + bool need_broadcast = reference_ops::ProcessBroadcastShapes( + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); -void EvalQuantized(TfLiteContext* context, TfLiteNode* node, - TfLiteMulParams* params, OpData* data, - const TfLiteTensor* input1, const TfLiteTensor* input2, - TfLiteTensor* output) { - if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8) { - tflite::ArithmeticParams op_params; - SetActivationParams(data->output_activation_min, - data->output_activation_max, &op_params); - op_params.input1_offset = -input1->params.zero_point; - op_params.input2_offset = -input2->params.zero_point; - op_params.output_offset = output->params.zero_point; - op_params.output_multiplier = data->output_multiplier; - op_params.output_shift = data->output_shift; - bool need_broadcast = reference_ops::ProcessBroadcastShapes( - GetTensorShape(input1), GetTensorShape(input2), &op_params); - -#define TF_LITE_MUL(type, opname, dtype) \ - type::opname(op_params, GetTensorShape(input1), \ - GetTensorData(input1), GetTensorShape(input2), \ - GetTensorData(input2), GetTensorShape(output), \ - GetTensorData(output)); - - if (output->type == kTfLiteInt8) { - if (need_broadcast) { - TF_LITE_MUL(reference_integer_ops, BroadcastMul4DSlow, int8_t); - } else { - TF_LITE_MUL(reference_integer_ops, Mul, int8_t); - } - } else if (output->type == kTfLiteUInt8) { - if (need_broadcast) { - TF_LITE_MUL(reference_ops, BroadcastMul4DSlow, uint8_t); - } else { - TF_LITE_MUL(reference_ops, Mul, uint8_t); - } + if (output->type == kTfLiteInt8) { + if (need_broadcast) { + reference_integer_ops::BroadcastMul4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + reference_integer_ops::Mul(op_params, + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } + } else if (output->type == kTfLiteUInt8) { + if (need_broadcast) { + reference_integer_ops::BroadcastMul4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); + } else { + reference_integer_ops::Mul(op_params, + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } -#undef TF_LITE_MUL } } void EvalFloat(TfLiteContext* context, TfLiteNode* node, - TfLiteMulParams* params, OpData* data, - const TfLiteTensor* input1, const TfLiteTensor* input2, - TfLiteTensor* output) { - float output_activation_min, output_activation_max; - CalculateActivationRange(params->activation, &output_activation_min, - &output_activation_max); - tflite::ArithmeticParams op_params; - SetActivationParams(output_activation_min, output_activation_max, &op_params); + TfLiteMulParams* params, const OpData* data, + const TfLiteEvalTensor* input1, const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { + tflite::ArithmeticParams op_params = {}; + op_params.float_activation_min = data->output_activation_min_f32; + op_params.float_activation_max = data->output_activation_max_f32; bool need_broadcast = reference_ops::ProcessBroadcastShapes( - GetTensorShape(input1), GetTensorShape(input2), &op_params); -#define TF_LITE_MUL(opname) \ - reference_ops::opname(op_params, GetTensorShape(input1), \ - GetTensorData(input1), GetTensorShape(input2), \ - GetTensorData(input2), GetTensorShape(output), \ - GetTensorData(output)); + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); if (need_broadcast) { - TF_LITE_MUL(BroadcastMul4DSlow); + reference_ops::BroadcastMul4DSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } else { - TF_LITE_MUL(Mul); + reference_ops::Mul(op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } -#undef TF_LITE_MUL +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + OpData* data = static_cast(node->user_data); + + return CalculateOpData(context, node, params, data); } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); auto* params = reinterpret_cast(node->builtin_data); - OpData data; - const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor); - const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); - TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, &data)); + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInput1Tensor); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInput2Tensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); switch (input1->type) { case kTfLiteUInt8: case kTfLiteInt8: - EvalQuantized(context, node, params, &data, input1, input2, output); + EvalQuantized(context, node, data, input1, input2, output); break; case kTfLiteFloat32: - EvalFloat(context, node, params, &data, input1, input2, output); + EvalFloat(context, node, params, data, input1, input2, output); break; default: TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", @@ -172,9 +218,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace mul TfLiteRegistration Register_MUL() { - return {/*init=*/nullptr, + return {/*init=*/mul::Init, /*free=*/nullptr, - /*prepare=*/nullptr, + /*prepare=*/mul::Prepare, /*invoke=*/mul::Eval, /*profiling_string=*/nullptr, /*builtin_code=*/0, diff --git a/tensorflow/lite/micro/kernels/mul_test.cc b/tensorflow/lite/micro/kernels/mul_test.cc index 3601d91d8f7..e39f6693fcd 100644 --- a/tensorflow/lite/micro/kernels/mul_test.cc +++ b/tensorflow/lite/micro/kernels/mul_test.cc @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -35,8 +35,6 @@ void TestMulFloat(std::initializer_list input1_dims_data, TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); const int output_dims_count = ElementCount(*output_dims); - ::tflite::AllOpsResolver resolver; - constexpr int inputs_size = 2; constexpr int outputs_size = 1; constexpr int tensors_size = inputs_size + outputs_size; @@ -46,43 +44,22 @@ void TestMulFloat(std::initializer_list input1_dims_data, CreateFloatTensor(output_data, output_dims), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_MUL); - - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - TfLiteMulParams builtin_data = { .activation = activation, }; - const char* init_data = reinterpret_cast(&builtin_data); - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } - int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = tflite::ops::micro::Register_MUL(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output_dims_count; i++) { TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], @@ -106,8 +83,6 @@ void TestMulQuantized(std::initializer_list input1_dims_data, TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); const int output_dims_count = ElementCount(*output_dims); - ::tflite::AllOpsResolver resolver; - constexpr int inputs_size = 2; constexpr int outputs_size = 1; constexpr int tensors_size = inputs_size + outputs_size; @@ -117,43 +92,22 @@ void TestMulQuantized(std::initializer_list input1_dims_data, CreateQuantizedTensor(output_data, output_dims, output_min, output_max), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_MUL); - - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - TfLiteMulParams builtin_data = { .activation = activation, }; - const char* init_data = reinterpret_cast(&builtin_data); - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } - int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = tflite::ops::micro::Register_MUL(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output_dims_count; i++) { TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], From 0c257f78ec12483352f2efdc4bacef473d3c3b03 Mon Sep 17 00:00:00 2001 From: Thomas Joerg Date: Mon, 27 Jul 2020 07:13:38 -0700 Subject: [PATCH 1372/2522] Integrate LLVM at llvm/llvm-project@eed333149d17 Updates LLVM usage to match [eed333149d17](https://github.com/llvm/llvm-project/commit/eed333149d17) PiperOrigin-RevId: 323354988 Change-Id: I158cfc8b7706bfb298eb0b3423c3371d3ff0d3b5 --- .../mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc | 3 +- .../hlo/lib/Dialect/mhlo/IR/hlo_patterns.td | 9 +++-- .../mhlo/transforms/chlo_legalize_to_hlo.cc | 1 - .../mhlo/transforms/transform_unranked_hlo.cc | 7 ++-- .../compiler/mlir/hlo/tests/canonicalize.mlir | 19 +++++++--- .../chlo_legalize_to_hlo_broadcasts.mlir | 38 +++++++++++-------- .../hlo/tests/mhlo-transform-unranked.mlir | 12 +++--- .../xla/tests/legalize-tf-BatchMatMulV2.mlir | 8 ++-- .../compiler/mlir/xla/tests/legalize-tf.mlir | 10 ++--- tensorflow/workspace.bzl | 4 +- third_party/mlir/BUILD | 20 ---------- 11 files changed, 65 insertions(+), 66 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc index 411c6583861..fe14e40b3eb 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc @@ -747,7 +747,8 @@ class DynamicBroadcastInDimOpNotActuallyDynamic void DynamicBroadcastInDimOp::getCanonicalizationPatterns( OwningRewritePatternList& results, MLIRContext* context) { results.insert(context); + DynamicBroadcastToOwnShape_1, DynamicBroadcastToOwnShape_2>( + context); } //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_patterns.td b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_patterns.td index e598465f034..dab49740b4b 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_patterns.td +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_patterns.td @@ -22,8 +22,11 @@ def EqualBinaryOperands : Constraint>; // Canonicalization patterns. -def DynamicBroadcastToOwnShape : Pat< - (HLO_DynamicBroadcastInDimOp:$op $arg0, - (Shape_ToExtentTensorOp (Shape_ShapeOfOp $arg1)), $attr), +def DynamicBroadcastToOwnShape_1 : Pat< + (HLO_DynamicBroadcastInDimOp:$op $arg0, + (Shape_ToExtentTensorOp (Shape_ShapeOfOp $arg1)), $attr), + (replaceWithValue $arg0), [(EqualBinaryOperands $arg0, $arg1)]>; +def DynamicBroadcastToOwnShape_2 : Pat< + (HLO_DynamicBroadcastInDimOp:$op $arg0, (Shape_ShapeOfOp $arg1), $attr), (replaceWithValue $arg0), [(EqualBinaryOperands $arg0, $arg1)]>; diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc index a5923f270d3..d86d01df3b3 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc @@ -29,7 +29,6 @@ limitations under the License. namespace mlir { namespace chlo { - namespace { // Converts binary ops that statically are determined to not broadcast directly diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/transform_unranked_hlo.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/transform_unranked_hlo.cc index 53947855cc7..d2852394ee8 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/transform_unranked_hlo.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/transform_unranked_hlo.cc @@ -61,10 +61,9 @@ struct UnaryElementwiseOpConversion : public OpRewritePattern { // Generate IR to flatten the operand. auto loc = op.getLoc(); Value shape = rewriter.create(loc, operand); - Value numElements = rewriter.create( - loc, rewriter.getType(), shape); - Value numElementsAsIndex = rewriter.create( - loc, rewriter.getIndexType(), numElements); + Value numElements = rewriter.create(loc, shape); + Value numElementsAsIndex = + rewriter.create(loc, numElements); Value flatShapeAsDimTensor = rewriter.create(loc, numElementsAsIndex); auto flatTensorTy = RankedTensorType::get({ShapedType::kDynamicSize}, diff --git a/tensorflow/compiler/mlir/hlo/tests/canonicalize.mlir b/tensorflow/compiler/mlir/hlo/tests/canonicalize.mlir index f773c95237e..f0fe52266f0 100644 --- a/tensorflow/compiler/mlir/hlo/tests/canonicalize.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/canonicalize.mlir @@ -365,11 +365,20 @@ func @dynamic_broadcast_in_dim_op_not_actually_dynamic(%arg0: tensor<4xf32>, %ar return %0 : tensor<5x4xf32> } -// CHECK-LABEL: func @dynamic_broadcast_in_dim_to_same_shape -func @dynamic_broadcast_in_dim_to_same_shape(%arg0: tensor) -> tensor { -// CHECK-SAME: %[[ARG:.*]]: tensor - %0 = shape.shape_of %arg0 : tensor - %1 = shape.to_extent_tensor %0 : tensor<1xindex> +// CHECK-LABEL: func @dynamic_broadcast_in_dim_to_same_shape_1 +func @dynamic_broadcast_in_dim_to_same_shape_1(%arg0: tensor) -> tensor { + // CHECK-SAME: %[[ARG:.*]]: tensor + %0 = shape.shape_of %arg0 : tensor -> tensor<1xindex> + %2 = "mhlo.dynamic_broadcast_in_dim"(%arg0, %0) { broadcast_dimensions = dense<0> : tensor<1xi64> } : (tensor, tensor<1xindex>) -> tensor + // CHECK: return %[[ARG]] : tensor + return %2 : tensor +} + +// CHECK-LABEL: func @dynamic_broadcast_in_dim_to_same_shape_2 +func @dynamic_broadcast_in_dim_to_same_shape_2(%arg0: tensor) -> tensor { + // CHECK-SAME: %[[ARG:.*]]: tensor + %0 = shape.shape_of %arg0 : tensor -> !shape.shape + %1 = shape.to_extent_tensor %0 : !shape.shape -> tensor<1xindex> %2 = "mhlo.dynamic_broadcast_in_dim"(%arg0, %1) { broadcast_dimensions = dense<0> : tensor<1xi64> } : (tensor, tensor<1xindex>) -> tensor // CHECK: return %[[ARG]] : tensor return %2 : tensor diff --git a/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir b/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir index 7782b4dcf6b..997136e58cc 100644 --- a/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir @@ -18,7 +18,9 @@ func @dynamicBroadcast(%arg0: tensor, %arg1: tensor) -> tensor : tensor<1xi64>} // CHECK-DAG: %[[ARG1_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG1]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} @@ -39,7 +41,9 @@ func @dynamicBroadcastComplex(%arg0: tensor, %arg1: tensor) -> t // CHECK-DAG: %[[ARG1_S:.+]] = shape.shape_of %[[ARG1]] // CHECK-NEXT: %[[WITNESS:.+]] = shape.cstr_broadcastable %[[ARG0_S]], %[[ARG1_S]] // CHECK-NEXT: %[[FINAL_RESULT:.+]] = shape.assuming %[[WITNESS]] - // CHECK-NEXT: %[[RESULT_S:.+]] = shape.broadcast %[[ARG0_S]], %[[ARG1_S]] + // CHECK-DAG: %[[ARG0_SS:.+]] = shape.shape_of %[[ARG0]] + // CHECK-DAG: %[[ARG1_SS:.+]] = shape.shape_of %[[ARG1]] + // CHECK-NEXT: %[[RESULT_S:.+]] = shape.broadcast %[[ARG0_SS]], %[[ARG1_SS]] // CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_S]] // CHECK-DAG: %[[ARG0_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG0]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor, tensor<2xindex>) -> tensor // CHECK-DAG: %[[ARG1_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG1]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor, tensor<2xindex>) -> tensor @@ -60,7 +64,9 @@ func @dynamicBroadcastCompare(%arg0: tensor, %arg1: tensor) -> t // CHECK-DAG: %[[ARG1_S:.+]] = shape.shape_of %[[ARG1]] // CHECK: %[[WITNESS:.+]] = shape.cstr_broadcastable %[[ARG0_S]], %[[ARG1_S]] // CHECK: %[[FINAL_RESULT:.+]] = shape.assuming %[[WITNESS]] - // CHECK: %[[RESULT_S:.+]] = shape.broadcast %[[ARG0_S]], %[[ARG1_S]] + // CHECK-DAG: %[[ARG0_SS:.+]] = shape.shape_of %[[ARG0]] + // CHECK-DAG: %[[ARG1_SS:.+]] = shape.shape_of %[[ARG1]] + // CHECK: %[[RESULT_S:.+]] = shape.broadcast %[[ARG0_SS]], %[[ARG1_SS]] // CHECK: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_S]] // CHECK-DAG: %[[ARG0_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG0]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor, tensor<2xindex>) -> tensor // CHECK-DAG: %[[ARG1_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG1]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor, tensor<2xindex>) -> tensor @@ -253,8 +259,8 @@ func @addScalarUnranked(%arg0: tensor, %arg1: tensor<*xf32>) -> tensor<*xf3 // to a 1D tensor. // CHECK: %[[SHAPE_1:.*]] = shape.shape_of %[[ARG_1]] : tensor<*xf32> // CHECK: %[[NUM_ELEMENTS:.*]] = shape.num_elements %[[SHAPE_1]] -// CHECK: %[[SIZE:.*]] = shape.size_to_index %[[NUM_ELEMENTS]] -// CHECK: %[[SIZE_TENSOR:.*]] = tensor_from_elements(%[[SIZE]]) : tensor<1xindex> +// CHECK: %[[NUM_ELEMENTS_INDEX:.*]] = shape.size_to_index %[[NUM_ELEMENTS]] +// CHECK: %[[SIZE_TENSOR:.*]] = tensor_from_elements(%[[NUM_ELEMENTS_INDEX]]) : tensor<1xindex> // CHECK: %[[RESHAPED:.*]] = "mhlo.dynamic_reshape"(%[[ARG_1]], %[[SIZE_TENSOR]]) : (tensor<*xf32>, tensor<1xindex>) -> tensor // The assuming region is part of the second stage of lowering // with ranked broadcasting logic. @@ -263,8 +269,9 @@ func @addScalarUnranked(%arg0: tensor, %arg1: tensor<*xf32>) -> tensor<*xf3 // CHECK: %[[WITNESS:.*]] = shape.cstr_broadcastable %[[SHAPE_0]], %[[SHAPE_RESHAPED]] // CHECK: %[[ASSUMING_RESULT:.*]] = shape.assuming %[[WITNESS]] -> (tensor) { // CHECK: %[[SCALAR_SHAPE:.*]] = shape.const_shape [] +// CHECK: %[[SHAPE_RESHAPED:.*]] = shape.shape_of %[[RESHAPED]] : tensor // CHECK: %[[BROADCASTED_SHAPE:.*]] = shape.broadcast %[[SCALAR_SHAPE]], %[[SHAPE_RESHAPED]] -// CHECK: %[[SHAPE_TENSOR:.*]] = shape.to_extent_tensor %[[BROADCASTED_SHAPE]] : tensor<1xindex> +// CHECK: %[[SHAPE_TENSOR:.*]] = shape.to_extent_tensor %[[BROADCASTED_SHAPE]] : !shape.shape -> tensor<1xindex> // CHECK: %[[BROADCASTED_LHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG_0]], %[[SHAPE_TENSOR]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor, tensor<1xindex>) -> tensor // CHECK: %[[BROADCASTED_RHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[RESHAPED]], %[[SHAPE_TENSOR]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor, tensor<1xindex>) -> tensor // CHECK: %[[BROADCASTED_RESULT:.*]] = mhlo.add %[[BROADCASTED_LHS]], %[[BROADCASTED_RHS]] : tensor @@ -272,8 +279,8 @@ func @addScalarUnranked(%arg0: tensor, %arg1: tensor<*xf32>) -> tensor<*xf3 // CHECK: } // As part of the unranked logic, the result is reshaped back // to an unranked tensor. -// CHECK: %[[PROPER_SHAPE_TENSOR:.*]] = shape.to_extent_tensor %[[SHAPE_1]] : tensor -// CHECK: %[[RESHAPED_RESULT:.*]] = "mhlo.dynamic_reshape"(%[[VAL_19:.*]], %[[PROPER_SHAPE_TENSOR]]) : (tensor, tensor) -> tensor<*xf32> +// CHECK: %[[SHAPE_2:.*]] = shape.to_extent_tensor %[[SHAPE_1]] : tensor -> tensor +// CHECK: %[[RESHAPED_RESULT:.*]] = "mhlo.dynamic_reshape"(%[[ASSUMING_RESULT:.*]], %[[SHAPE_2]]) : (tensor, tensor) -> tensor<*xf32> // CHECK: return %[[RESHAPED_RESULT]] : tensor<*xf32> // CHECK: } @@ -290,8 +297,8 @@ func @addUnrankedScalar(%arg0: tensor<*xf32>, %arg1: tensor) -> tensor<*xf3 // to a 1D tensor. // CHECK: %[[SHAPE_0:.*]] = shape.shape_of %[[ARG_0]] : tensor<*xf32> // CHECK: %[[NUM_ELEMENTS:.*]] = shape.num_elements %[[SHAPE_0]] -// CHECK: %[[SIZE:.*]] = shape.size_to_index %[[NUM_ELEMENTS]] -// CHECK: %[[SIZE_TENSOR:.*]] = tensor_from_elements(%[[SIZE]]) : tensor<1xindex> +// CHECK: %[[NUM_ELEMENTS_INDEX:.*]] = shape.size_to_index %[[NUM_ELEMENTS]] +// CHECK: %[[SIZE_TENSOR:.*]] = tensor_from_elements(%[[NUM_ELEMENTS_INDEX]]) : tensor<1xindex> // CHECK: %[[RESHAPED:.*]] = "mhlo.dynamic_reshape"(%[[ARG_0]], %[[SIZE_TENSOR]]) : (tensor<*xf32>, tensor<1xindex>) -> tensor // The assuming region is part of the second stage of lowering // with ranked broadcasting logic. @@ -299,15 +306,16 @@ func @addUnrankedScalar(%arg0: tensor<*xf32>, %arg1: tensor) -> tensor<*xf3 // CHECK: %[[SHAPE_1:.*]] = shape.shape_of %[[ARG_1]] : tensor // CHECK: %[[WITNESS:.*]] = shape.cstr_broadcastable %[[SHAPE_RESHAPED]], %[[SHAPE_1]] // CHECK: %[[ASSUMING_RESULT:.*]] = shape.assuming %[[WITNESS]] -> (tensor) { -// CHECK: %[[SHAPE_TENSOR:.*]] = shape.to_extent_tensor %[[SHAPE_RESHAPED]] : tensor<1xindex> -// CHECK: %[[BROADCASTED_LHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[RESHAPED]], %[[SHAPE_TENSOR]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor, tensor<1xindex>) -> tensor -// CHECK: %[[BROADCASTED_RHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG_1]], %[[SHAPE_TENSOR]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor, tensor<1xindex>) -> tensor +// CHECK: %[[SHAPE_OF:.*]] = shape.shape_of %[[RESHAPED]] : tensor +// CHECK: %[[SHAPE_RESHAPED:.*]] = shape.to_extent_tensor %[[SHAPE_OF]] +// CHECK: %[[BROADCASTED_LHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[RESHAPED]], %[[SHAPE_RESHAPED]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor, tensor<1xindex>) -> tensor +// CHECK: %[[BROADCASTED_RHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG_1]], %[[SHAPE_RESHAPED]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor, tensor<1xindex>) -> tensor // CHECK: %[[BROADCASTED_RESULT:.*]] = mhlo.add %[[BROADCASTED_LHS]], %[[BROADCASTED_RHS]] : tensor // CHECK: shape.assuming_yield %[[BROADCASTED_RESULT]] : tensor // CHECK: } // As part of the unranked logic, the result is reshaped back // to an unranked tensor. -// CHECK: %[[PROPER_SHAPE_TENSOR:.*]] = shape.to_extent_tensor %[[SHAPE_0]] : tensor -// CHECK: %[[RESHAPED_RESULT:.*]] = "mhlo.dynamic_reshape"(%[[VAL_19:.*]], %[[PROPER_SHAPE_TENSOR]]) : (tensor, tensor) -> tensor<*xf32> +// CHECK: %[[SHAPE_2:.*]] = shape.to_extent_tensor %[[SHAPE_0]] +// CHECK: %[[RESHAPED_RESULT:.*]] = "mhlo.dynamic_reshape"(%[[ASSUMING_RESULT:.*]], %[[SHAPE_2]]) : (tensor, tensor) -> tensor<*xf32> // CHECK: return %[[RESHAPED_RESULT]] : tensor<*xf32> // CHECK: } diff --git a/tensorflow/compiler/mlir/hlo/tests/mhlo-transform-unranked.mlir b/tensorflow/compiler/mlir/hlo/tests/mhlo-transform-unranked.mlir index 80474156f29..6cc07e0460c 100644 --- a/tensorflow/compiler/mlir/hlo/tests/mhlo-transform-unranked.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/mhlo-transform-unranked.mlir @@ -5,9 +5,9 @@ func @sqr_transform_result(%a: tensor<*xf32>) -> tensor<*xf32> { // Flatten operand shape. - %shape = shape.shape_of %a : tensor<*xf32> - %num_elements = shape.num_elements %shape - %num_elements_as_index = shape.size_to_index %num_elements + %shape = shape.shape_of %a : tensor<*xf32> -> tensor + %num_elements = shape.num_elements %shape : tensor -> index + %num_elements_as_index = shape.size_to_index %num_elements : index %flat_shape = tensor_from_elements(%num_elements_as_index) : tensor<1xindex> %flat_a = "mhlo.dynamic_reshape"(%a, %flat_shape) : (tensor<*xf32>, tensor<1xindex>) -> tensor @@ -16,7 +16,7 @@ func @sqr_transform_result(%a: tensor<*xf32>) -> tensor<*xf32> { %flat_b = "mhlo.sqrt"(%flat_a) : (tensor) -> tensor // Restore original shape. - %shape_as_extent_tensor = shape.to_extent_tensor %shape : tensor + %shape_as_extent_tensor = shape.to_extent_tensor %shape : tensor -> tensor %b = "mhlo.dynamic_reshape"(%flat_b, %shape_as_extent_tensor) : (tensor, tensor) -> tensor<*xf32> @@ -73,14 +73,14 @@ func @sqrt_static(%a: tensor<2x3xf32>) -> tensor<2x3xf32> { func @add_unranked(%a : tensor<*xf32>, %b : tensor<*xf32>) -> tensor<*xf32> { // CHECK: %[[SHAPE_A:.*]] = shape.shape_of %[[A]] // CHECK: %[[SHAPE_B:.*]] = shape.shape_of %[[B]] - // CHECK: %[[SHAPE:.*]] = shape.any %[[SHAPE_A]], %[[SHAPE_B]] + // CHECK: %[[SHAPE:.*]] = "shape.any"(%[[SHAPE_A]], %[[SHAPE_B]]) // CHECK: %[[NUM_ELEMENTS:.*]] = shape.num_elements %[[SHAPE]] // CHECK: %[[NUM_ELEMENTS_AS_INDEX:.*]] = shape.size_to_index %[[NUM_ELEMENTS]] // CHECK: %[[FLAT_SHAPE:.*]] = tensor_from_elements(%[[NUM_ELEMENTS_AS_INDEX]]) : tensor<1xindex> // CHECK: %[[FLAT_A:.*]] = "mhlo.dynamic_reshape"(%[[A]], %[[FLAT_SHAPE]]) : (tensor<*xf32>, tensor<1xindex>) -> tensor // CHECK: %[[FLAT_B:.*]] = "mhlo.dynamic_reshape"(%[[B]], %[[FLAT_SHAPE]]) : (tensor<*xf32>, tensor<1xindex>) -> tensor // CHECK: %[[FLAT_RESULT:.*]] = mhlo.add %[[FLAT_A]], %[[FLAT_B]] : tensor - // CHECK: %[[SHAPE_AS_EXTENT_TENSOR:.*]] = shape.to_extent_tensor %[[SHAPE]] : tensor + // CHECK: %[[SHAPE_AS_EXTENT_TENSOR:.*]] = shape.to_extent_tensor %[[SHAPE]] // CHECK: %[[RESULT:.*]] = "mhlo.dynamic_reshape"(%[[FLAT_RESULT]], %[[SHAPE_AS_EXTENT_TENSOR]]) : (tensor, tensor) -> tensor<*xf32> // CHECK: return %[[RESULT]] : tensor<*xf32> %result = mhlo.add %a, %b : tensor<*xf32> diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf-BatchMatMulV2.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf-BatchMatMulV2.mlir index ddfc02af7c4..69eaeeb946d 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf-BatchMatMulV2.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf-BatchMatMulV2.mlir @@ -10,14 +10,14 @@ func @batchmatmulv2_basic(%arg0: tensor<1x4x2xf32>, %arg1: tensor<3x2x4xf32>) -> // CHECK: [[LHSSHAPE:%.*]] = shape.shape_of [[LHS]] : tensor<1x4x2xf32> // CHECK: [[RHSSHAPE:%.*]] = shape.shape_of [[RHS]] : tensor<3x2x4xf32> // CHECK: [[CM2:%.*]] = constant -2 : i32 -// CHECK: [[LHSHEAD:%.*]], [[LHSTAIL:%.*]] = "shape.split_at"([[LHSSHAPE]], [[CM2]]) : (!shape.shape, i32) -> (!shape.shape, !shape.shape) -// CHECK: [[RHSHEAD:%.*]], [[RHSTAIL:%.*]] = "shape.split_at"([[RHSSHAPE]], [[CM2]]) : (!shape.shape, i32) -> (!shape.shape, !shape.shape) +// CHECK: [[LHSHEAD:%.*]], [[LHSTAIL:%.*]] = "shape.split_at"([[LHSSHAPE]], [[CM2]]) +// CHECK: [[RHSHEAD:%.*]], [[RHSTAIL:%.*]] = "shape.split_at"([[RHSSHAPE]], [[CM2]]) // CHECK: [[BCASTHEAD:%.*]] = shape.broadcast [[LHSHEAD]], [[RHSHEAD]] // CHECK: [[LHSBCASTSHAPE:%.*]] = shape.concat [[BCASTHEAD]], [[LHSTAIL]] -// CHECK: [[LHSSHAPEEXTENTS:%.*]] = shape.to_extent_tensor [[LHSBCASTSHAPE]] : tensor<3xindex> +// CHECK: [[LHSSHAPEEXTENTS:%.*]] = shape.to_extent_tensor [[LHSBCASTSHAPE]] // CHECK: [[LHSBCAST:%.*]] = "mhlo.dynamic_broadcast_in_dim"([[LHS]], [[LHSSHAPEEXTENTS]]) {broadcast_dimensions = dense<[0, 1, 2]> : tensor<3xi64>} : (tensor<1x4x2xf32>, tensor<3xindex>) -> tensor<3x4x2xf32> // CHECK: [[RHSBCASTSHAPE:%.*]] = shape.concat [[BCASTHEAD]], [[RHSTAIL]] -// CHECK: [[RHSSHAPEEXTENTS:%.*]] = shape.to_extent_tensor [[RHSBCASTSHAPE]] : tensor<3xindex> +// CHECK: [[RHSSHAPEEXTENTS:%.*]] = shape.to_extent_tensor [[RHSBCASTSHAPE]] // CHECK: [[RHSBCAST:%.*]] = "mhlo.dynamic_broadcast_in_dim"([[RHS]], [[RHSSHAPEEXTENTS]]) {broadcast_dimensions = dense<[0, 1, 2]> : tensor<3xi64>} : (tensor<3x2x4xf32>, tensor<3xindex>) -> tensor<3x2x4xf32> // CHECK: [[RESULT:%.*]] = "mhlo.dot_general"([[LHSBCAST]], [[RHSBCAST]]) {dot_dimension_numbers = {lhs_batching_dimensions = dense<0> : tensor<1xi64>, lhs_contracting_dimensions = dense<2> : tensor<1xi64>, rhs_batching_dimensions = dense<0> : tensor<1xi64>, rhs_contracting_dimensions = dense<1> : tensor<1xi64>}} : (tensor<3x4x2xf32>, tensor<3x2x4xf32>) -> tensor<3x4x4xf32> // CHECK: return [[RESULT]] : tensor<3x4x4xf32> diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir index 7e897c36fe7..95acf5d8875 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir @@ -1505,7 +1505,7 @@ func @simple_softmax(%arg0: tensor<2x3xf32>) -> tensor<2x3xf32> { // CHECK: %[[CASTED_MAX:.*]] = "mhlo.convert"(%[[MAX]]) : (tensor<2xf32>) -> tensor<2xf32> // CHECK: %[[RESULT_SHAPE:.+]] = shape.shape_of %[[ARG0]] - // CHECK: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_SHAPE]] : tensor<2xindex> + // CHECK: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_SHAPE]] // CHECK: %[[BCAST_MAX:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[CASTED_MAX]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} // CHECK: %[[SHIFTED_INP:.*]] = mhlo.subtract %[[ARG0]], %[[BCAST_MAX]] // CHECK: %[[EXP:.*]] = "mhlo.exponential"(%[[SHIFTED_INP]]) @@ -1520,7 +1520,7 @@ func @simple_softmax(%arg0: tensor<2x3xf32>) -> tensor<2x3xf32> { // CHECK: %[[CASTED_SUM:.*]] = "mhlo.convert"(%[[SUM]]) : (tensor<2xf32>) -> tensor<2xf32> // CHECK: %[[RESULT_SHAPE:.+]] = shape.shape_of %[[ARG0]] - // CHECK: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_SHAPE]] : tensor<2xindex> + // CHECK: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_SHAPE]] // CHECK: %[[BCAST_SUM:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[CASTED_SUM]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} // CHECK: %[[RESULT:.*]] = mhlo.divide %[[EXP]], %[[BCAST_SUM]] // CHECK: return %[[RESULT]] @@ -1577,7 +1577,7 @@ func @simple_logsoftmax(%arg0: tensor<2x3xf32>) -> tensor<2x3xf32> { // CHECK: %[[CASTED_SUM:.*]] = "mhlo.convert"(%[[SUM]]) : (tensor<2xf32>) -> tensor<2xf32> // CHECK: %[[LOG:.*]] = "mhlo.log"(%[[CASTED_SUM]]) : (tensor<2xf32>) -> tensor<2xf32> // CHECK: %[[RESULT_SHAPE:.+]] = shape.shape_of %[[ARG0]] - // CHECK: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_SHAPE]] : tensor<2xindex> + // CHECK: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_SHAPE]] // CHECK: %[[BCAST_SUM:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[LOG]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} // CHECK: %[[RESULT:.*]] = mhlo.subtract {{.*}}, %[[BCAST_SUM]] // CHECK: return %[[RESULT]] @@ -1920,7 +1920,7 @@ func @neg_unranked(%arg0: tensor<*xf32>) -> tensor<*xf32> { func @sigmoid(%arg0: tensor<2xf32>) -> tensor<2xf32> { // CHECK-DAG: [[SCALAR:%.+]] = mhlo.constant dense<5.000000e-01> : tensor // CHECK-DAG: [[SHAPE:%.+]] = shape.shape_of %arg0 : tensor<2xf32> - // CHECK-DAG: [[SHAPE_VAL:%.+]] = shape.to_extent_tensor [[SHAPE]] : tensor<1xindex> + // CHECK-DAG: [[SHAPE_VAL:%.+]] = shape.to_extent_tensor [[SHAPE]] // CHECK-DAG: [[HALF:%.+]] = "mhlo.dynamic_broadcast_in_dim"([[SCALAR]], [[SHAPE_VAL]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor, tensor<1xindex>) -> tensor<2xf32> // CHECK-DAG: [[R1:%.+]] = mhlo.multiply %arg0, [[HALF]] : tensor<2xf32> // CHECK-DAG: [[R2:%.+]] = "mhlo.tanh"([[R1]]) : (tensor<2xf32>) -> tensor<2xf32> @@ -1942,7 +1942,7 @@ func @sigmoid_complex(%arg0: tensor<2xcomplex>) -> tensor<2xcomplex> { func @sigmoid_unranked(%arg0: tensor<*xf32>) -> tensor<*xf32> { // CHECK-DAG: [[SCALAR:%.+]] = mhlo.constant dense<5.000000e-01> : tensor // CHECK-DAG: [[SHAPE:%.+]] = shape.shape_of %arg0 : tensor<*xf32> - // CHECK-DAG: [[SHAPE_VAL:%.+]] = shape.to_extent_tensor [[SHAPE]] : tensor + // CHECK-DAG: [[SHAPE_VAL:%.+]] = shape.to_extent_tensor [[SHAPE]] // CHECK-DAG: [[HALF:%.+]] = "mhlo.dynamic_broadcast_in_dim"([[SCALAR]], [[SHAPE_VAL]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor, tensor) -> tensor<*xf32> // CHECK-DAG: [[R1:%.+]] = mhlo.multiply %arg0, [[HALF]] : tensor<*xf32> // CHECK-DAG: [[R2:%.+]] = "mhlo.tanh"([[R1]]) : (tensor<*xf32>) -> tensor<*xf32> diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 580bcc73265..630f8b80c6d 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -711,8 +711,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "f7ffb122d08e7a8203557898c67eaac3a857b152" - LLVM_SHA256 = "386d0f7c69f7ac341157a85f3cb1bd45f0c04f82b6bc8f65ca055cf382fd0424" + LLVM_COMMIT = "eed333149d178b69fdaf39b9419b7ca032520182" + LLVM_SHA256 = "ee6ba5d5f25dfbfe524ef58f15ab64c772b3bd6d45b0b01b9b88aa222992d3c2" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD index 9245404a8c9..7e42a4b40f2 100644 --- a/third_party/mlir/BUILD +++ b/third_party/mlir/BUILD @@ -759,32 +759,12 @@ cc_library( ":Pass", ":SCFDialect", ":Shape", - ":ShapeToStandardPatternsIncGen", ":StandardOps", ":Support", ":Transforms", ], ) -gentbl( - name = "ShapeToStandardPatternsIncGen", - strip_include_prefix = "include/mlir/Conversion/ShapeToStandard", - tbl_outs = [ - ( - "-gen-rewriters", - "include/mlir/Conversion/ShapeToStandard/ShapeToStandardPatterns.inc", - ), - ], - tblgen = ":mlir-tblgen", - td_file = "lib/Conversion/ShapeToStandard/ShapeToStandardPatterns.td", - td_srcs = [ - ":StdOpsTdFiles", - "include/mlir/Dialect/Shape/IR/ShapeBase.td", - "include/mlir/Dialect/Shape/IR/ShapeOps.td", - "include/mlir/Interfaces/InferTypeOpInterface.td", - ], -) - cc_library( name = "ShapeToSCF", srcs = glob([ From 76319741cd303273a542eae0cdf78df61e2c4e83 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Mon, 27 Jul 2020 08:33:05 -0700 Subject: [PATCH 1373/2522] [MLIR][NFC] Use CallOpInterface::resolveCallable() to reduce some code clutter - Use this to reduce the nesting of if's needed to get to the FuncOp for a call - Add helper functions to get attached FuncOp for WhileOp PiperOrigin-RevId: 323365892 Change-Id: If6c6d4f1c8359c5df50366f90cbcb67fc9311771 --- .../prepare_composite_functions_tf.cc | 20 +++--- .../compiler/mlir/tensorflow/ir/tf_ops.td | 16 ++++- .../executor_tpuv1_inline_tpu_island.cc | 8 +-- .../transforms/optimize_global_tensors.cc | 29 +++----- .../tensorflow/transforms/shape_inference.cc | 69 ++++++++----------- 5 files changed, 67 insertions(+), 75 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc index 4f5c87a8a4c..30444a6c774 100644 --- a/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc @@ -239,18 +239,14 @@ LogicalResult CheckOutputConsumer( LogicalResult CheckFusableKerasLstm(FuncOp lstm_func, ModuleOp module) { for (auto func : module.getOps()) { - auto result = func.walk([&](Operation* op) { - if (auto call_op = dyn_cast(op)) { - CallInterfaceCallable callable = call_op.getCallableForCallee(); - if (auto sym = callable.dyn_cast()) { - if (sym.getRootReference() == lstm_func.getName()) { - // Keras LSTM have 5 outputs. - // We should make sure only the first or the second output are - // consumed. - if (failed(CheckOutputConsumer(call_op, 5, {0, 1}))) - return WalkResult::interrupt(); - } - } + if (func == lstm_func) continue; + auto result = func.walk([&](CallOpInterface op) { + if (dyn_cast(op.resolveCallable()) == lstm_func) { + // Keras LSTM have 5 outputs. + // We should make sure only the first or the second output are + // consumed. + if (failed(CheckOutputConsumer(op.getOperation(), 5, {0, 1}))) + return WalkResult::interrupt(); } return WalkResult::advance(); }); diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td index ac2d4ad44e6..7dd8609eea9 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td @@ -236,12 +236,12 @@ else_branch: A function that takes 'inputs' and returns a list of let extraClassDeclaration = [{ // Get the then branch function. FuncOp then_func() { - return getParentOfType().lookupSymbol(then_branch()); + return SymbolTable::lookupNearestSymbolFrom(*this, then_branch()); } // Get the else branch function. FuncOp else_func() { - return getParentOfType().lookupSymbol(else_branch()); + return SymbolTable::lookupNearestSymbolFrom(*this, else_branch()); } }]; } @@ -655,6 +655,18 @@ body: A function that takes a list of tensors and returns another return getParentOfType().lookupSymbol(body()); } }]; + + let extraClassDeclaration = [{ + // Get the condition function. + FuncOp cond_func() { + return SymbolTable::lookupNearestSymbolFrom(*this, cond()); + } + + // Get the body function. + FuncOp body_func() { + return SymbolTable::lookupNearestSymbolFrom(*this, body()); + } + }]; } def TL_WhileRegionOp : TF_Op<"WhileRegion", diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/executor_tpuv1_inline_tpu_island.cc b/tensorflow/compiler/mlir/tensorflow/transforms/executor_tpuv1_inline_tpu_island.cc index 9a533798208..f624d6cad58 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/executor_tpuv1_inline_tpu_island.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/executor_tpuv1_inline_tpu_island.cc @@ -61,11 +61,11 @@ void TPUBridgeExecutorIslandInlining::runOnOperation() { LLVM_DEBUG(llvm::dbgs() << "Found call to inline: " << *call_op.getOperation() << "\n"); - FuncOp called_func = dyn_cast_or_null( - symbol_table.lookupSymbolIn(getOperation(), call_op.f())); + auto call_interface = cast(call_op.getOperation()); + auto called_func = + dyn_cast_or_null(call_interface.resolveCallable()); - if (failed(inlineCall(inliner, - cast(call_op.getOperation()), + if (failed(inlineCall(inliner, call_interface, cast(called_func.getOperation()), called_func.getCallableRegion(), /* shouldCloneInlinedRegion = */ false))) { diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/optimize_global_tensors.cc b/tensorflow/compiler/mlir/tensorflow/transforms/optimize_global_tensors.cc index 67a6c8dd6dd..6fee693554e 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/optimize_global_tensors.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/optimize_global_tensors.cc @@ -68,9 +68,8 @@ bool IsResource(Value value) { return IsResourceType(value.getType()); } class ResourceAnalyzer { public: explicit ResourceAnalyzer(ModuleOp module) { - SymbolTable symbol_table(module); for (auto func : module.getOps()) { - AnalyzeFunc(func, symbol_table); + AnalyzeFunc(func); } } @@ -89,7 +88,7 @@ class ResourceAnalyzer { // written". Do this recursively across the chain of funcs via call or control // flow ops. // TODO(ashwinm): Move to iterative traversal. - LogicalResult AnalyzeFunc(FuncOp func, const SymbolTable& symbol_table) { + LogicalResult AnalyzeFunc(FuncOp func) { // Avoid infinite recursion. if (!discovered_.insert(func).second) { return success(); @@ -104,24 +103,20 @@ class ResourceAnalyzer { return; } if (auto call = dyn_cast(op)) { - if (auto sym = op->getAttrOfType("f")) { - PropagatePotentiallyWrittenUpFromCallee( - sym.cast().getValue(), call.getArgOperands(), - symbol_table); + if (auto func = dyn_cast(call.resolveCallable())) { + PropagatePotentiallyWrittenUpFromCallee(func, call.getArgOperands()); } return; } if (auto if_op = dyn_cast(op)) { - for (auto callee : {if_op.then_branch(), if_op.else_branch()}) { - PropagatePotentiallyWrittenUpFromCallee(callee, if_op.input(), - symbol_table); + for (auto callee : {if_op.then_func(), if_op.else_func()}) { + PropagatePotentiallyWrittenUpFromCallee(callee, if_op.input()); } return; } if (auto while_op = dyn_cast(op)) { - for (auto callee : {while_op.cond(), while_op.body()}) { - PropagatePotentiallyWrittenUpFromCallee(callee, while_op.input(), - symbol_table); + for (auto callee : {while_op.cond_func(), while_op.body_func()}) { + PropagatePotentiallyWrittenUpFromCallee(callee, while_op.input()); } return; } @@ -149,15 +144,13 @@ class ResourceAnalyzer { }); } - // Given a funcOp associated with the callee and operands from the + // Given a FuncOp associated with the callee and operands from the // corresponding callOp, propagate the potentially written decision to the // callOp's operands, if the corresponding func's arguments are potentially // written resources. void PropagatePotentiallyWrittenUpFromCallee( - StringRef callee, Operation::operand_range propagate_to, - const SymbolTable& symbol_table) { - auto func = symbol_table.lookup(callee); - AnalyzeFunc(func, symbol_table); + FuncOp func, Operation::operand_range propagate_to) { + AnalyzeFunc(func); for (auto t : llvm::zip(func.getArguments(), propagate_to)) { if (!IsResource(std::get<0>(t))) { continue; diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc index 9732dac082a..597fbe2c0b1 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc @@ -39,6 +39,7 @@ limitations under the License. #include "mlir/IR/StandardTypes.h" // from @llvm-project #include "mlir/IR/SymbolTable.h" // from @llvm-project #include "mlir/IR/Value.h" // from @llvm-project +#include "mlir/Interfaces/CallInterfaces.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Pass/PassRegistry.h" // from @llvm-project #include "mlir/Support/LLVM.h" // from @llvm-project @@ -243,14 +244,11 @@ bool RefineResultType(Operation* op, Value result, // Infers the shape from a (Stateful)PartionedCall operation by looking up the // called function and propagating the return type. -bool InferShapeForCall(Operation* op) { - auto call_op = cast(op); - CallInterfaceCallable callable = call_op.getCallableForCallee(); - SymbolRefAttr sym = callable.dyn_cast(); - if (!sym) return false; - FuncOp func = dyn_cast(SymbolTable::lookupNearestSymbolFrom(op, sym)); +bool InferShapeForCall(CallOpInterface call_op) { + FuncOp func = dyn_cast(call_op.resolveCallable()); if (!func) return false; + Operation* op = call_op.getOperation(); bool changed = false; // Map each of the results of the call to the returned type of the // function. @@ -533,7 +531,7 @@ class ShapeInference { // like predicate). LogicalResult PropagateShapeToFunctions( ModuleOp module, Operation::operand_type_range input_types, - ArrayRef func_names, int64_t max_iteration); + ArrayRef functions, int64_t max_iteration); // Propagates shapes to regions given the shapes of the inputs of the regions. // All regions provided in `regions` are assumed to have inputs of type @@ -555,13 +553,13 @@ class ShapeInference { // // TODO(b/154065712): Move this to a more general inter-procedural constant // folding pass. - void PropagateConstantToCallee(CallOpInterface call_op, - SymbolRefAttr callee_sym, ModuleOp module); + void PropagateConstantToCallee(CallOpInterface call_op, FuncOp func, + ModuleOp module); // Propagates any constant return value of the callee function to the call // op's corresponding result. - void PropagateConstantFromCallee(CallOpInterface call_op, - SymbolRefAttr callee_sym, ModuleOp module); + void PropagateConstantFromCallee(CallOpInterface call_op, FuncOp func, + ModuleOp module); // Tries to compute the result of folding the op. This doesn't actually // perform constant folding, it is just computes the equivalent constants. @@ -779,9 +777,7 @@ bool ShapeInference::InferShapeForSingleOperation(Operation* op) { // Handle call operations by looking up callee and infering return shape as // needed. - if (isa( - op)) - return InferShapeForCall(op); + if (auto call = dyn_cast(op)) return InferShapeForCall(call); // tf.Cast are only inferred if they have at least one user in the TF dialect // or feeding into the function return. This is necessary to avoid inserting @@ -984,14 +980,13 @@ bool ShapeInference::InferShapeForSingleOperation(Operation* op) { LogicalResult ShapeInference::PropagateShapeToFunctions( ModuleOp module, Operation::operand_type_range input_types, - ArrayRef func_names, int64_t max_iteration) { + ArrayRef functions, int64_t max_iteration) { bool all_succeeded = true; auto types = llvm::to_vector<4>(input_types); // If shape propagation fails for one function, return failure, but do not // early exit and attempt to propagate shapes for all provided functions to // have a best-effort propagation. - for (auto func_name : func_names) { - FuncOp func = module.lookupSymbol(func_name); + for (FuncOp func : functions) { auto func_uses = SymbolTable::getSymbolUses(func, &module.getBodyRegion()); if (!llvm::hasSingleElement(func_uses.getValue())) { int num_uses = std::distance(func_uses->begin(), func_uses->end()); @@ -1046,12 +1041,9 @@ LogicalResult ShapeInference::PropagateShapeToRegions( } void ShapeInference::PropagateConstantToCallee(CallOpInterface call_op, - SymbolRefAttr callee_sym, - ModuleOp module) { - auto func = module.lookupSymbol(callee_sym.getRootReference()); + FuncOp func, ModuleOp module) { auto func_uses = SymbolTable::getSymbolUses(func, &module.getBodyRegion()); - int num_uses = std::distance(func_uses->begin(), func_uses->end()); - if (num_uses != 1) return; + if (!llvm::hasSingleElement(func_uses.getValue())) return; OpBuilder builder(&func.front().front()); Operation* op = call_op.getOperation(); @@ -1077,9 +1069,7 @@ void ShapeInference::PropagateConstantToCallee(CallOpInterface call_op, } void ShapeInference::PropagateConstantFromCallee(CallOpInterface call_op, - SymbolRefAttr callee_sym, - ModuleOp module) { - auto func = module.lookupSymbol(callee_sym.getRootReference()); + FuncOp func, ModuleOp module) { // If the return value is a constant, use the constant as the value of // the call return. Operation* op = call_op.getOperation(); @@ -1111,28 +1101,29 @@ LogicalResult ShapeInference::PropagateShapeIntoAttachedFunctions( if (auto if_op = dyn_cast(op)) { return PropagateShapeToFunctions( module, drop_begin(if_op.getOperandTypes(), 1), - {if_op.then_branch(), if_op.else_branch()}, max_iteration); + {if_op.then_func(), if_op.else_func()}, max_iteration); } else if (auto case_op = dyn_cast(op)) { - SmallVector branches; - for (Attribute branch : case_op.branches()) - branches.push_back(branch.cast().getValue()); + SmallVector branches; + for (Attribute branch : case_op.branches()) { + auto sym = branch.cast(); + branches.push_back(SymbolTable::lookupNearestSymbolFrom(op, sym)); + } return PropagateShapeToFunctions(module, drop_begin(case_op.getOperandTypes(), 1), branches, max_iteration); } else if (auto while_op = dyn_cast(op)) { - return PropagateShapeToFunctions(module, while_op.getOperandTypes(), - {while_op.cond(), while_op.body()}, - max_iteration); + return PropagateShapeToFunctions( + module, while_op.getOperandTypes(), + {while_op.cond_func(), while_op.body_func()}, max_iteration); } else if (auto call_op = dyn_cast(op)) { - CallInterfaceCallable callable = call_op.getCallableForCallee(); - if (SymbolRefAttr sym = callable.dyn_cast()) { - PropagateConstantToCallee(call_op, sym, module); - if (failed(PropagateShapeToFunctions( - module, call_op.getArgOperands().getTypes(), - {sym.getRootReference()}, max_iteration))) { + if (auto func = dyn_cast(call_op.resolveCallable())) { + PropagateConstantToCallee(call_op, func, module); + if (failed(PropagateShapeToFunctions(module, + call_op.getArgOperands().getTypes(), + {func}, max_iteration))) { return failure(); } - PropagateConstantFromCallee(call_op, sym, module); + PropagateConstantFromCallee(call_op, func, module); return success(); } } From e29e1f4e574caab071e93cfb91fa9ee0944cd87c Mon Sep 17 00:00:00 2001 From: Pankaj Kanwar Date: Mon, 27 Jul 2020 09:01:03 -0700 Subject: [PATCH 1374/2522] Enable the new version of non-max suppression. PiperOrigin-RevId: 323370581 Change-Id: I060e61f253e271da6ab42a2815e085e8fa172e5f --- .../core/kernels/non_max_suppression_op.cu.cc | 31 +++++++++---------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/tensorflow/core/kernels/non_max_suppression_op.cu.cc b/tensorflow/core/kernels/non_max_suppression_op.cu.cc index 8c833be75ae..8ec26ba13d7 100644 --- a/tensorflow/core/kernels/non_max_suppression_op.cu.cc +++ b/tensorflow/core/kernels/non_max_suppression_op.cu.cc @@ -726,24 +726,21 @@ REGISTER_KERNEL_BUILDER(Name("NonMaxSuppressionV2") .HostMemory("max_output_size"), NonMaxSuppressionV2GPUOp); -// TODO(laigd): enable once b/141559125 is fixed. -// REGISTER_KERNEL_BUILDER(Name("NonMaxSuppressionV3") -// .TypeConstraint("T") -// .Device(DEVICE_GPU) -// .HostMemory("iou_threshold") -// .HostMemory("max_output_size") -// .HostMemory("score_threshold"), -// NonMaxSuppressionV3GPUOp); +REGISTER_KERNEL_BUILDER(Name("NonMaxSuppressionV3") + .TypeConstraint("T") + .Device(DEVICE_GPU) + .HostMemory("iou_threshold") + .HostMemory("max_output_size") + .HostMemory("score_threshold"), + NonMaxSuppressionV3GPUOp); -// TODO(b/143610288): this op tries to allocate 4GB of memory for the mask for -// some model and cause OOM. -// REGISTER_KERNEL_BUILDER(Name("NonMaxSuppressionV4") -// .TypeConstraint("T") -// .Device(DEVICE_GPU) -// .HostMemory("iou_threshold") -// .HostMemory("max_output_size") -// .HostMemory("score_threshold"), -// NonMaxSuppressionV4GPUOp); +REGISTER_KERNEL_BUILDER(Name("NonMaxSuppressionV4") + .TypeConstraint("T") + .Device(DEVICE_GPU) + .HostMemory("iou_threshold") + .HostMemory("max_output_size") + .HostMemory("score_threshold"), + NonMaxSuppressionV4GPUOp); } // namespace tensorflow #endif From 7eb5039543dab1d1186e5cc2ec875326d624f0d5 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Mon, 27 Jul 2020 09:04:58 -0700 Subject: [PATCH 1375/2522] [XLA] Refactor run_hlo_module to avoid depending on TensorFlow test utilities. PiperOrigin-RevId: 323371427 Change-Id: If0f7693752975722a511fa97c43cd3d60e22cdff --- tensorflow/compiler/xla/tools/BUILD | 6 +- .../compiler/xla/tools/run_hlo_module.cc | 62 ++++++++++++++++--- .../compiler/xla/tools/run_hlo_module.h | 3 +- .../compiler/xla/tools/run_hlo_module_main.cc | 6 +- 4 files changed, 60 insertions(+), 17 deletions(-) diff --git a/tensorflow/compiler/xla/tools/BUILD b/tensorflow/compiler/xla/tools/BUILD index b113b498e22..fc1ca7d3105 100644 --- a/tensorflow/compiler/xla/tools/BUILD +++ b/tensorflow/compiler/xla/tools/BUILD @@ -308,17 +308,18 @@ cc_library( ":prepare_reference_module", "//tensorflow/compiler/xla:debug_options_flags", "//tensorflow/compiler/xla:error_spec", + "//tensorflow/compiler/xla:literal_comparison", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla/client/lib:testing", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_runner", "//tensorflow/compiler/xla/service:hlo_verifier", "//tensorflow/compiler/xla/service:platform_util", - "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:test_utils", + "//tensorflow/core:lib", "//tensorflow/core/platform:logging", + "//tensorflow/core/platform:path", "//tensorflow/core/platform:status", - "//tensorflow/core/platform:test", "//tensorflow/stream_executor:platform", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:span", @@ -339,6 +340,7 @@ tf_cc_binary( "//tensorflow/core:framework_internal", "//tensorflow/core/platform:logging", "//tensorflow/core/platform:platform_port", + "//tensorflow/core/platform:path", "//tensorflow/core/platform:status", "//tensorflow/core/platform:test", ] + if_cuda_or_rocm([ diff --git a/tensorflow/compiler/xla/tools/run_hlo_module.cc b/tensorflow/compiler/xla/tools/run_hlo_module.cc index 39b545af393..be9b23efb12 100644 --- a/tensorflow/compiler/xla/tools/run_hlo_module.cc +++ b/tensorflow/compiler/xla/tools/run_hlo_module.cc @@ -27,24 +27,66 @@ limitations under the License. #include "tensorflow/compiler/xla/client/lib/testing.h" #include "tensorflow/compiler/xla/debug_options_flags.h" #include "tensorflow/compiler/xla/error_spec.h" +#include "tensorflow/compiler/xla/literal_comparison.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_runner.h" #include "tensorflow/compiler/xla/service/hlo_verifier.h" #include "tensorflow/compiler/xla/service/platform_util.h" -#include "tensorflow/compiler/xla/tests/literal_test_util.h" #include "tensorflow/compiler/xla/tests/test_utils.h" #include "tensorflow/compiler/xla/tools/hlo_module_loader.h" #include "tensorflow/compiler/xla/tools/prepare_reference_module.h" #include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/path.h" #include "tensorflow/core/platform/status.h" -#include "tensorflow/core/platform/test.h" - -namespace se = ::stream_executor; namespace xla { namespace { +// Writes the given literal to a file in the test temporary directory. +void WriteLiteralToTempFile(const LiteralSlice& literal, const string& name) { + // Bazel likes for tests to write "debugging outputs" like these to + // TEST_UNDECLARED_OUTPUTS_DIR. This plays well with tools that inspect test + // results, especially when they're run on remote machines. + auto* env = tensorflow::Env::Default(); + string binary_filename; + string text_filename; + string outdir; + if (tensorflow::io::GetTestUndeclaredOutputsDir(&outdir)) { + string filename = tensorflow::io::JoinPath( + outdir, absl::StrFormat("tempfile-%d-%s", env->NowMicros(), name)); + binary_filename = absl::StrCat(filename, ".pb"); + text_filename = absl::StrCat(filename, ".txt"); + } else { + binary_filename = + tensorflow::io::GetTempFilename(absl::StrCat(name, ".pb")); + text_filename = tensorflow::io::GetTempFilename(absl::StrCat(name, ".txt")); + } + + TF_CHECK_OK( + tensorflow::WriteBinaryProto(env, binary_filename, literal.ToProto())); + TF_CHECK_OK( + tensorflow::WriteStringToFile(env, text_filename, literal.ToString())); + LOG(ERROR) << "wrote Literal to " << name << " binary: " << binary_filename + << " text: " << text_filename; +} + +// Callback helper that dumps literals to temporary files in the event of a +// miscomparison. +void OnMiscompare(const LiteralSlice& expected, const LiteralSlice& actual, + const LiteralSlice& mismatches, + const ShapeIndex& /*shape_index*/) { + LOG(INFO) << "expected: " << ShapeUtil::HumanString(expected.shape()) << " " + << literal_comparison::ToStringTruncated(expected); + LOG(INFO) << "actual: " << ShapeUtil::HumanString(actual.shape()) << " " + << literal_comparison::ToStringTruncated(actual); + LOG(INFO) << "Dumping literals to temp files..."; + WriteLiteralToTempFile(expected, "expected"); + WriteLiteralToTempFile(actual, "actual"); + WriteLiteralToTempFile(mismatches, "mismatches"); +} + Literal ExecuteOnPlatform(std::unique_ptr module, absl::Span args, se::Platform* platform, bool run_hlo_passes) { @@ -69,7 +111,7 @@ Literal ExecuteOnPlatform(std::unique_ptr module, } } // namespace -::testing::AssertionResult RunAndCompare( +Status RunAndCompare( const std::string& hlo_filename, const std::string& test_platform_name, const std::string& reference_platform_name, std::minstd_rand0* engine, const RunHloModuleOptions& options, @@ -122,7 +164,7 @@ Literal ExecuteOnPlatform(std::unique_ptr module, if (reference_module == nullptr) { std::cerr << "Skipping reference platform\n"; - return ::testing::AssertionSuccess(); + return Status::OK(); } Literal reference_result = @@ -136,10 +178,10 @@ Literal ExecuteOnPlatform(std::unique_ptr module, } ErrorSpec error_spec(static_cast(options.abs_error_bound), static_cast(options.rel_error_bound)); - return LiteralTestUtil::Near(/*expected=*/reference_result, - /*actual=*/test_result, - /*error_spec=*/error_spec, - /*detailed_message=*/true); + return literal_comparison::Near(/*expected=*/reference_result, + /*actual=*/test_result, + /*error=*/error_spec, + /*detailed_message=*/true, &OnMiscompare); } } // namespace xla diff --git a/tensorflow/compiler/xla/tools/run_hlo_module.h b/tensorflow/compiler/xla/tools/run_hlo_module.h index 932cc22f4dd..57f81cc7c94 100644 --- a/tensorflow/compiler/xla/tools/run_hlo_module.h +++ b/tensorflow/compiler/xla/tools/run_hlo_module.h @@ -22,7 +22,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/core/platform/status.h" -#include "tensorflow/core/platform/test.h" #include "tensorflow/stream_executor/platform.h" namespace xla { @@ -63,7 +62,7 @@ struct RunHloModuleOptions { // the results. 'reference_module_modifier_hook' can be used to transform the // HloModule before it is run on the reference platform. This may be necessary // to match the numerics of the test platform. -::testing::AssertionResult RunAndCompare( +Status RunAndCompare( const std::string& hlo_filename, const std::string& test_platform_name, const std::string& reference_platform_name, std::minstd_rand0* engine, const RunHloModuleOptions& options, diff --git a/tensorflow/compiler/xla/tools/run_hlo_module_main.cc b/tensorflow/compiler/xla/tools/run_hlo_module_main.cc index 39d7826e162..9d153491862 100644 --- a/tensorflow/compiler/xla/tools/run_hlo_module_main.cc +++ b/tensorflow/compiler/xla/tools/run_hlo_module_main.cc @@ -156,7 +156,7 @@ int main(int argc, char** argv) { if (iteration_count != 1) { std::cerr << "\n=== Iteration " << i << "\n"; } - ::testing::AssertionResult matched = + xla::Status matched = xla::RunAndCompare(hlo_filename, test_platform_name, reference_platform_name, &engine, opts); @@ -164,13 +164,13 @@ int main(int argc, char** argv) { // used. Without a reference, the test just verifies that nothing blew up // when running the module. if (!reference_platform_name.empty()) { - if (matched) { + if (matched.ok()) { // Success. std::cerr << "\n** Results on " << test_platform_name << " and " << reference_platform_name << " are close enough. **\n"; } else { failure_count++; - std::cerr << matched.message() << "\n"; + std::cerr << matched << "\n"; } } } From 3b8ffcaa8a171ef707b5ef342db84f576a0d4944 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Mon, 27 Jul 2020 09:37:37 -0700 Subject: [PATCH 1376/2522] Fix bad merge that dropped tool PiperOrigin-RevId: 323377442 Change-Id: I85e9318f862553cdb499d988114624f751097647 --- tensorflow/compiler/mlir/runlit.cfg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/mlir/runlit.cfg.py b/tensorflow/compiler/mlir/runlit.cfg.py index 29536788679..45c8dce8422 100644 --- a/tensorflow/compiler/mlir/runlit.cfg.py +++ b/tensorflow/compiler/mlir/runlit.cfg.py @@ -74,7 +74,7 @@ tool_names = [ 'tf_tfjs_translate', 'flatbuffer_to_string', 'flatbuffer_translate', 'tf-mlir-translate', 'mlir-tflite-runner', 'tfcompile', 'json_to_flatbuffer', 'xla-gpu-opt', 'xla-opt', 'hlo_to_llvm_ir', - 'kernel-gen-opt' + 'kernel-gen-opt', 'xla-thunks-opt' ] tools = [ToolSubst(s, unresolved='ignore') for s in tool_names] llvm_config.add_tool_substitutions(tools, tool_dirs) From c47f1d6ba58e7c5f6cf07a6842a5f3925a089cd2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 10:17:46 -0700 Subject: [PATCH 1377/2522] Fix scoped annotation for CUPTI_DRIVER_TRACE_CBID_cuLaunchCooperativeKernelMultiDevice. AnnotationMap are thread-safe. PiperOrigin-RevId: 323386124 Change-Id: I83d9376ea5a2300479db9b7667b709fdfaa73159 --- .../core/profiler/internal/gpu/cupti_tracer.cc | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc b/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc index b620b51cc99..bda7d5840ab 100644 --- a/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc +++ b/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc @@ -1564,8 +1564,18 @@ Status CuptiTracer::HandleCallback(CUpti_CallbackDomain domain, // Set up the map from correlation id to annotation string. const auto &annotation = AnnotationStack::Get(); if (!annotation.empty()) { - collector_->annotation_map()->Add(device_id, cbdata->correlationId, - annotation); + if (cbid == + CUPTI_DRIVER_TRACE_CBID_cuLaunchCooperativeKernelMultiDevice) { + // Kernels are launched on different devices by this API call, therefore + // we need to populate per device annotation map respectively. + for (int i = 0; i < num_gpus_; ++i) { + collector_->annotation_map()->Add(i, cbdata->correlationId, + annotation); + } + } else { + collector_->annotation_map()->Add(device_id, cbdata->correlationId, + annotation); + } } TF_RETURN_IF_ERROR(cupti_driver_api_hook_->OnDriverApiExit( From 62a0d7ce5e1a4ff0c2a18a154afb0ddce560189c Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Mon, 27 Jul 2020 13:26:45 -0400 Subject: [PATCH 1378/2522] Change the number of gpu. --- .../keras_examples_benchmarks/mnist_irnn_benchmark_test.py | 6 +++--- .../keras_examples_benchmarks/reuters_mlp_benchmark_test.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py index ffed31190e0..0823c9a2d6a 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py @@ -112,8 +112,8 @@ class IRNNMnistBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - def benchmark_irnn_mnist_bs_1024_gpu_1(self): - """Measure performance with batch_size=1024, run_iters=3, gpu=1 and + def benchmark_irnn_mnist_bs_1024_gpu_3(self): + """Measure performance with batch_size=1024, run_iters=3, gpu=3 and distribution_strategy='mirrored'""" batch_size = 1024 run_iters = 3 @@ -123,7 +123,7 @@ class IRNNMnistBenchmark(tf.test.Benchmark): y=self.y_train, batch_size=batch_size, run_iters=run_iters, - num_gpus=1, + num_gpus=3, distribution_strategy="mirrored", optimizer=tf.keras.optimizers.RMSprop(learning_rate=self.learning_rate), loss='categorical_crossentropy', diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py index 25b947c4d8a..49b200e0751 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py @@ -78,8 +78,8 @@ class MLPReutersBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, wall_time=wall_time, metrics=metrics, extras=extras) - def benchmark_mlp_reuters_bs_128_gpu_1(self): - """Measure performance with batch_size=128, run_iters=2, gpu=1 and + def benchmark_mlp_reuters_bs_128_gpu_3(self): + """Measure performance with batch_size=128, run_iters=2, gpu=3 and distribution_strategy='mirrored'""" batch_size = 128 run_iters = 2 @@ -89,7 +89,7 @@ class MLPReutersBenchmark(tf.test.Benchmark): y=self.y_train, batch_size=batch_size, run_iters=run_iters, - num_gpus=1, + num_gpus=3, distribution_strategy="mirrored", epochs=self.epochs, optimizer='adam', From d003f7f34a06adcb34ba6304ad1c3b268ad64208 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Mon, 27 Jul 2020 17:35:30 +0000 Subject: [PATCH 1379/2522] return status in TensorMapDeviceCopy --- tensorflow/core/kernels/tensor_map.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/tensor_map.cc b/tensorflow/core/kernels/tensor_map.cc index 4f694a37b17..8776add400c 100644 --- a/tensorflow/core/kernels/tensor_map.cc +++ b/tensorflow/core/kernels/tensor_map.cc @@ -58,8 +58,8 @@ static Status TensorMapDeviceCopy( for (const std::pair& p : from.tensors()) { TensorKey to_key(p.first.dtype()); Tensor to_val(p.second.dtype()); - copy(p.first, &to_key); - copy(p.second, &to_val); + TF_RETURN_IF_ERROR(copy(p.first, &to_key)); + TF_RETURN_IF_ERROR(copy(p.second, &to_val)); to->tensors().emplace(to_key, to_val); } return Status::OK(); From 62decd1734d42bbf6fb9f261b0fbe3cccff972a5 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Mon, 27 Jul 2020 10:34:19 -0700 Subject: [PATCH 1380/2522] Added script for running delegate testing on Linux/Android. Fixes to delegate testing. PiperOrigin-RevId: 323390232 Change-Id: Iafc6e97c399933a0e45a83f3276ac2a44f82611e --- .../gpu/cl/testing/delegate_testing.cc | 19 +++- .../gpu/cl/testing/run_delegate_testing.sh | 96 +++++++++++++++++++ 2 files changed, 110 insertions(+), 5 deletions(-) create mode 100755 tensorflow/lite/delegates/gpu/cl/testing/run_delegate_testing.sh diff --git a/tensorflow/lite/delegates/gpu/cl/testing/delegate_testing.cc b/tensorflow/lite/delegates/gpu/cl/testing/delegate_testing.cc index 4e92f897d96..10b7ac34404 100644 --- a/tensorflow/lite/delegates/gpu/cl/testing/delegate_testing.cc +++ b/tensorflow/lite/delegates/gpu/cl/testing/delegate_testing.cc @@ -32,11 +32,19 @@ namespace { void FillInputTensor(tflite::Interpreter* interpreter) { for (int k = 0; k < interpreter->inputs().size(); ++k) { - float* p = interpreter->typed_input_tensor(k); - const auto n = - tflite::NumElements(interpreter->tensor(interpreter->inputs()[k])); - for (int i = 0; i < n; ++i) { - p[i] = std::sin(i); + TfLiteTensor* tensor_ptr = interpreter->tensor(interpreter->inputs()[k]); + const auto tensor_elements_count = tflite::NumElements(tensor_ptr); + if (tensor_ptr->type == kTfLiteFloat32) { + float* p = interpreter->typed_input_tensor(k); + for (int i = 0; i < tensor_elements_count; ++i) { + p[i] = std::sin(i); + } + } + if (tensor_ptr->type == kTfLiteInt32) { + int* p = interpreter->typed_input_tensor(k); + for (int i = 0; i < tensor_elements_count; ++i) { + p[i] = i % 2; + } } } } @@ -124,6 +132,7 @@ int main(int argc, char** argv) { options.inference_priority1 = TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY; options.inference_priority2 = TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE; options.inference_priority3 = TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION; + options.max_delegated_partitions = 1; auto* gpu_delegate = TfLiteGpuDelegateV2Create(&options); status = gpu_inference->ModifyGraphWithDelegate(gpu_delegate); if (status != kTfLiteOk) { diff --git a/tensorflow/lite/delegates/gpu/cl/testing/run_delegate_testing.sh b/tensorflow/lite/delegates/gpu/cl/testing/run_delegate_testing.sh new file mode 100755 index 00000000000..7b86407dbad --- /dev/null +++ b/tensorflow/lite/delegates/gpu/cl/testing/run_delegate_testing.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +shopt -s expand_aliases # to work with commands aliases in .sh + +description="Delegate testing sample: +Compares GPU backend vs TFLite CPU(speed/correctness). +How to use: +[-h or --help, print instructions] +[-m or --model_path, path to the model in .tflite format] +[-d or --device, select device](optional, if you have few connected devices)" + +model_path="" +alias ADB='adb' +host="" + +while [[ "$1" != "" ]]; do + case $1 in + -m | --model_path) + shift + model_path=$1 + ;; + -d | --device) + shift + if [[ "$1" == "HOST" ]] + then + host="HOST" + fi + alias ADB='adb -s '$1'' + ;; + -h | --help) + echo "$description" + exit + ;; + esac + shift +done + +if [ "$model_path" = "" ] +then +echo "No model provided." +echo "$description" +exit +fi + +SHELL_DIR=$(dirname "$0") +BINARY_NAME=delegate_testing + +if [[ "$host" == "HOST" ]] +then +bazel build -c opt --copt -DCL_DELEGATE_NO_GL //"$SHELL_DIR":"$BINARY_NAME" +chmod +x bazel-bin/"$SHELL_DIR"/"$BINARY_NAME" +./bazel-bin/"$SHELL_DIR"/"$BINARY_NAME" "$model_path" +exit +fi + +model_name=${model_path##*/} # finds last token after '/' + +OPENCL_DIR=/data/local/tmp/delegate_testing/ + +ADB shell mkdir -p $OPENCL_DIR + +ADB push "$model_path" "$OPENCL_DIR" + +declare -a BUILD_CONFIG +abi_version=$(ADB shell getprop ro.product.cpu.abi | tr -d '\r') +if [[ "$abi_version" == "armeabi-v7a" ]]; then +#"32 bit" +BUILD_CONFIG=( --config=android_arm -c opt --copt=-fPIE --linkopt=-pie ) +else +#"64 bit" +BUILD_CONFIG=( --config=android_arm64 -c opt ) +fi + +bazel build "${BUILD_CONFIG[@]}" //$SHELL_DIR:$BINARY_NAME + +ADB push bazel-bin/$SHELL_DIR/$BINARY_NAME $OPENCL_DIR + +ADB shell chmod +x $OPENCL_DIR/$BINARY_NAME +ADB shell "cd $OPENCL_DIR && ./$BINARY_NAME $model_name" + +# clean up files from device +ADB shell rm -rf $OPENCL_DIR From f4619e69b8e97af4528e5dbf2c78f85e1c4266f8 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 28 Jul 2020 00:34:04 +0700 Subject: [PATCH 1381/2522] Add s3_filesystem_test init --- .../experimental/filesystem/plugins/s3/BUILD | 17 +++ .../filesystem/plugins/s3/s3_filesystem.cc | 5 +- .../filesystem/plugins/s3/s3_filesystem.h | 3 + .../plugins/s3/s3_filesystem_test.cc | 106 ++++++++++++++++++ 4 files changed, 128 insertions(+), 3 deletions(-) create mode 100644 tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/BUILD b/tensorflow/c/experimental/filesystem/plugins/s3/BUILD index 280fb677e4c..ec48f5de2b6 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/BUILD +++ b/tensorflow/c/experimental/filesystem/plugins/s3/BUILD @@ -44,3 +44,20 @@ cc_library( ], alwayslink = 1, ) + +tf_cc_test( + name = "s3_filesystem_test", + srcs = [ + "s3_filesystem_test.cc", + ], + tags = [ + "manual", + "notap", + ], + deps = [ + ":s3_filesystem_impl", + "//tensorflow/core/platform:path", + "//tensorflow/core/platform:stacktrace_handler", + "//tensorflow/core/platform:test", + ], +) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index ff47210ed1b..9555fe61881 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -82,9 +82,8 @@ static inline void TF_SetStatusFromAWSError( } } -static void ParseS3Path(const Aws::String& fname, bool object_empty_ok, - Aws::String* bucket, Aws::String* object, - TF_Status* status) { +void ParseS3Path(const Aws::String& fname, bool object_empty_ok, + Aws::String* bucket, Aws::String* object, TF_Status* status) { size_t scheme_end = fname.find("://") + 2; if (fname.substr(0, scheme_end + 1) != "s3://") { TF_SetStatus(status, TF_INVALID_ARGUMENT, diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h index 42cf3134b47..8360d018b46 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h @@ -26,6 +26,9 @@ limitations under the License. #include "tensorflow/c/experimental/filesystem/filesystem_interface.h" #include "tensorflow/c/tf_status.h" +void ParseS3Path(const Aws::String& fname, bool object_empty_ok, + Aws::String* bucket, Aws::String* object, TF_Status* status); + namespace tf_s3_filesystem { typedef struct S3File { std::shared_ptr s3_client; diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc new file mode 100644 index 00000000000..431da1419f7 --- /dev/null +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc @@ -0,0 +1,106 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h" + +#include + +#include "tensorflow/core/platform/path.h" +#include "tensorflow/core/platform/stacktrace_handler.h" +#include "tensorflow/core/platform/test.h" + +#define ASSERT_TF_OK(x) ASSERT_EQ(TF_OK, TF_GetCode(x)) << TF_Message(x) + +static std::string InitializeTmpDir() { + // This env should be something like `s3://bucket/path` + const char* test_dir = getenv("S3_TEST_TMPDIR"); + if (test_dir != nullptr) { + Aws::String bucket, object; + TF_Status* status = TF_NewStatus(); + ParseS3Path(test_dir, true, &bucket, &object, status); + if (TF_GetCode(status) != TF_OK) { + TF_DeleteStatus(status); + return ""; + } + TF_DeleteStatus(status); + + // We add a random value into `test_dir` to ensures that two consecutive + // runs are unlikely to clash. + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> distribution; + std::string rng_val = std::to_string(distribution(gen)); + return tensorflow::io::JoinPath(std::string(test_dir), rng_val); + } else { + return ""; + } +} + +static std::string* GetTmpDir() { + static std::string tmp_dir = InitializeTmpDir(); + if (tmp_dir == "") + return nullptr; + else + return &tmp_dir; +} + +namespace tensorflow { +namespace { + +class S3FilesystemTest : public ::testing::Test { + public: + void SetUp() override { + root_dir_ = io::JoinPath( + *GetTmpDir(), + ::testing::UnitTest::GetInstance()->current_test_info()->name()); + status_ = TF_NewStatus(); + filesystem_ = new TF_Filesystem; + tf_s3_filesystem::Init(filesystem_, status_); + ASSERT_TF_OK(status_) << "Could not initialize filesystem. " + << TF_Message(status_); + } + void TearDown() override { + TF_DeleteStatus(status_); + tf_s3_filesystem::Cleanup(filesystem_); + delete filesystem_; + } + + std::string GetURIForPath(const std::string& path) { + const std::string translated_name = + tensorflow::io::JoinPath(root_dir_, path); + return translated_name; + } + + protected: + TF_Filesystem* filesystem_; + TF_Status* status_; + + private: + std::string root_dir_; +}; + +TEST_F(S3FilesystemTest, Init) { ASSERT_TF_OK(status_); } + +} // namespace +} // namespace tensorflow + +GTEST_API_ int main(int argc, char** argv) { + tensorflow::testing::InstallStacktraceHandler(); + if (!GetTmpDir()) { + std::cerr << "Could not read S3_TEST_TMPDIR env"; + return -1; + } + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} From 765c642088113944a6c1fef5c4a6f141a7be3b55 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Mon, 27 Jul 2020 10:37:57 -0700 Subject: [PATCH 1382/2522] Add support for variable policy to be used by MirroredStrategy and TPUStrategy. Refactor existing values test and add an option to test variable policy. PiperOrigin-RevId: 323391056 Change-Id: I4585e3a8e0a09300c09de95191c034a10255a58c --- tensorflow/python/distribute/BUILD | 41 + tensorflow/python/distribute/combinations.py | 10 +- .../python/distribute/distribute_utils.py | 124 +- .../python/distribute/mirrored_strategy.py | 20 +- .../distribute/mirrored_variable_test.py | 6 +- tensorflow/python/distribute/tpu_strategy.py | 7 +- tensorflow/python/distribute/tpu_values.py | 316 +++- tensorflow/python/distribute/values.py | 14 +- tensorflow/python/distribute/values_test.py | 1092 +------------- tensorflow/python/distribute/vars_test.py | 1269 +++++++++++++++++ 10 files changed, 1795 insertions(+), 1104 deletions(-) create mode 100644 tensorflow/python/distribute/vars_test.py diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index 356fb3a7a9f..185b4568868 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -302,6 +302,7 @@ py_library( ":distribute_lib", ":reduce_util", ":shared_variable_creator", + ":tpu_values", ":values", "//tensorflow/python:array_ops", "//tensorflow/python:config", @@ -1224,6 +1225,46 @@ distribute_py_test( ], ) +distribute_py_test( + name = "vars_test", + size = "medium", + srcs = ["vars_test.py"], + main = "vars_test.py", + shard_count = 5, + tags = [ + "multi_and_single_gpu", + "no_rocm", + ], + tpu_tags = [ + "no_oss", # b/150954621 Target too big to run serially reliably. + ], + deps = [ + ":combinations", + ":distribute_lib", + ":strategy_combinations", + ":tpu_strategy", + ":tpu_values", + ":values", + "//tensorflow/python:array_ops", + "//tensorflow/python:checkpoint_management", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:indexed_slices", + "//tensorflow/python:math_ops", + "//tensorflow/python:random_ops", + "//tensorflow/python:training", + "//tensorflow/python:variable_scope", + "//tensorflow/python:variables", + "//tensorflow/python/distribute/cluster_resolver:tpu_cluster_resolver_py", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:def_function", + "//tensorflow/python/eager:test", + "//tensorflow/python/tpu:tpu_lib", + "@absl_py//absl/testing:parameterized", + ], +) + distribute_py_test( name = "ps_values_test", size = "medium", diff --git a/tensorflow/python/distribute/combinations.py b/tensorflow/python/distribute/combinations.py index ad8bb879b93..a86c751ec79 100644 --- a/tensorflow/python/distribute/combinations.py +++ b/tensorflow/python/distribute/combinations.py @@ -58,11 +58,17 @@ class DistributionParameter(combinations_lib.ParameterModifier): """ def modified_arguments(self, kwargs, requested_parameters): - del requested_parameters + # Get the parameter that indicates if we need to set the `_use_policy` flag + # on the strategy object. This is a temporary flag for testing the variable + # policy rollout. + use_var_policy = kwargs.get("use_var_policy", None) distribution_arguments = {} for k, v in kwargs.items(): if isinstance(v, NamedDistribution): - distribution_arguments[k] = v.strategy + strategy = v.strategy + if use_var_policy: + strategy.extended._use_var_policy = use_var_policy + distribution_arguments[k] = strategy return distribution_arguments diff --git a/tensorflow/python/distribute/distribute_utils.py b/tensorflow/python/distribute/distribute_utils.py index 89848b91318..916ebafd8ac 100644 --- a/tensorflow/python/distribute/distribute_utils.py +++ b/tensorflow/python/distribute/distribute_utils.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.distribute import tpu_values as tpu_values_lib from tensorflow.python.distribute import values as values_lib from tensorflow.python.eager import context from tensorflow.python.eager import tape @@ -145,7 +146,7 @@ def select_replica_mirrored(replica_id, structured): def _get_mirrored(x): if isinstance(x, values_lib.DistributedValues): - if not isinstance(x, values_lib.Mirrored): + if not is_mirrored(x): raise TypeError( "Expected value to be mirrored across replicas: %s in %s." % (x, structured)) @@ -245,34 +246,25 @@ def validate_colocate(v, extended): # Variable creation function for sync strategies. -def create_mirrored_variable( # pylint: disable=missing-docstring - strategy, real_mirrored_creator, mirrored_cls, sync_on_read_cls, **kwargs): - # Figure out what collections this variable should be added to. - # We'll add the MirroredVariable to those collections instead. - var_collections = kwargs.pop("collections", None) - if var_collections is None: - var_collections = [ops.GraphKeys.GLOBAL_VARIABLES] - kwargs["collections"] = [] - +def _get_and_validate_synchronization(kwargs): + """Validate that given synchronization value is valid.""" synchronization = kwargs.get("synchronization", - vs.VariableSynchronization.ON_WRITE) - + vs.VariableSynchronization.AUTO) if synchronization == vs.VariableSynchronization.NONE: raise ValueError( - "`NONE` variable synchronization mode is not supported with `Mirrored` " - "distribution strategy. Please change the `synchronization` for " + "`NONE` variable synchronization mode is not supported with " + "tf.distribute strategy. Please change the `synchronization` for " "variable: " + str(kwargs["name"])) - elif synchronization == vs.VariableSynchronization.ON_READ: - is_sync_on_read = True - elif synchronization in (vs.VariableSynchronization.ON_WRITE, - vs.VariableSynchronization.AUTO): - # `AUTO` synchronization defaults to `ON_WRITE`. - is_sync_on_read = False - else: + if synchronization not in (vs.VariableSynchronization.ON_READ, + vs.VariableSynchronization.ON_WRITE, + vs.VariableSynchronization.AUTO): raise ValueError( "Invalid variable synchronization mode: %s for variable: %s" % (synchronization, kwargs["name"])) + return synchronization + +def _validate_aggregation(kwargs): aggregation = kwargs.pop("aggregation", vs.VariableAggregation.NONE) if aggregation not in (vs.VariableAggregation.NONE, @@ -281,6 +273,33 @@ def create_mirrored_variable( # pylint: disable=missing-docstring vs.VariableAggregation.ONLY_FIRST_REPLICA): raise ValueError("Invalid variable aggregation mode: %s for variable: %s" % (aggregation, kwargs["name"])) + return aggregation + + +def _get_variable_policy_class(synchronization, aggregation, policy_mapping): + if synchronization == vs.VariableSynchronization.AUTO: + if aggregation == vs.VariableAggregation.NONE: + # Use AutoPolicy. + return policy_mapping.get(synchronization) + else: + # Revert to OnWritePolicy + return policy_mapping.get(vs.VariableSynchronization.ON_WRITE) + return policy_mapping.get(synchronization) + + +def create_mirrored_variable(strategy, real_mirrored_creator, class_mapping, + policy_mapping, **kwargs): + """Create distributed variables with given synchronization and aggregation.""" + # Figure out what collections this variable should be added to. + # We'll add the MirroredVariable to those collections instead. + var_collections = kwargs.pop("collections", None) + if var_collections is None: + var_collections = [ops.GraphKeys.GLOBAL_VARIABLES] + kwargs["collections"] = [] + + synchronization = _get_and_validate_synchronization(kwargs) + aggregation = _validate_aggregation(kwargs) + use_var_policy = getattr(strategy.extended, "_use_var_policy", False) # Ignore user-specified caching device, not needed for mirrored variables. kwargs.pop("caching_device", None) @@ -290,8 +309,15 @@ def create_mirrored_variable( # pylint: disable=missing-docstring # here. with tape.stop_recording(): value_list = real_mirrored_creator(**kwargs) - var_cls = sync_on_read_cls if is_sync_on_read else mirrored_cls - result = var_cls(strategy, value_list, aggregation) + if use_var_policy: + var_policy_cls = _get_variable_policy_class(synchronization, aggregation, + policy_mapping) + var_policy = var_policy_cls(aggregation=aggregation) + var_cls = class_mapping.get("VariableClass") + result = var_cls(strategy, value_list, aggregation, var_policy=var_policy) + else: + var_cls = class_mapping.get(synchronization) + result = var_cls(strategy, value_list, aggregation) # Install the created DistributedVariable as _distributed_container property # of the underlying variables, to make it easy to map back to the container. for v in result.values: @@ -324,3 +350,55 @@ def create_mirrored_variable( # pylint: disable=missing-docstring ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, result) return result + + +# Utility functions +# Return True if the Value is Mirrored or the Variable is replicated and kept in +# sync. +def is_mirrored(val): + if isinstance(val, values_lib.DistributedVariable): + if val._policy: # pylint: disable=protected-access + return val._policy._is_mirrored() # pylint: disable=protected-access + return isinstance(val, values_lib.Mirrored) + + +def is_sync_on_read(val): + if isinstance(val, values_lib.DistributedVariable): + if val._policy: # pylint: disable=protected-access + return not val._policy._is_mirrored() # pylint: disable=protected-access + return not isinstance(val, values_lib.Mirrored) + +# The following mapping indicates the policy that you must use for a given +# variable `synchronization` and `aggregation` pair. +# AutoPolicy is used for: +# (synchronization=Auto, aggregation=None) +# OnWritePolicy is used for: +# (synchronization=Auto, aggregation=SUM,MEAN,ONLY_FIRST_REPLICA) +# (synchronization=ON_WRITE, aggregation=NONE,SUM,MEAN,ONLY_FIRST_REPLICA) +# OnReadPolicy is used for: +# (synchronization=ON_READ, aggregation=NONE,SUM,MEAN,ONLY_FIRST_REPLICA) +VARIABLE_POLICY_MAPPING = { + vs.VariableSynchronization.AUTO: values_lib.AutoPolicy, + vs.VariableSynchronization.ON_WRITE: values_lib.OnWritePolicy, + vs.VariableSynchronization.ON_READ: values_lib.OnReadPolicy, +} + +VARIABLE_CLASS_MAPPING = { + "VariableClass": values_lib.DistributedVariable, + vs.VariableSynchronization.AUTO: values_lib.MirroredVariable, + vs.VariableSynchronization.ON_WRITE: values_lib.MirroredVariable, + vs.VariableSynchronization.ON_READ: values_lib.SyncOnReadVariable, +} + +TPU_VARIABLE_POLICY_MAPPING = { + vs.VariableSynchronization.AUTO: tpu_values_lib.TPUAutoPolicy, + vs.VariableSynchronization.ON_WRITE: tpu_values_lib.TPUOnWritePolicy, + vs.VariableSynchronization.ON_READ: tpu_values_lib.TPUOnReadPolicy, +} + +TPU_VARIABLE_CLASS_MAPPING = { + "VariableClass": tpu_values_lib.TPUDistributedVariable, + vs.VariableSynchronization.AUTO: tpu_values_lib.TPUMirroredVariable, + vs.VariableSynchronization.ON_WRITE: tpu_values_lib.TPUMirroredVariable, + vs.VariableSynchronization.ON_READ: tpu_values_lib.TPUSyncOnReadVariable, +} diff --git a/tensorflow/python/distribute/mirrored_strategy.py b/tensorflow/python/distribute/mirrored_strategy.py index b424f798476..5323f6131ee 100644 --- a/tensorflow/python/distribute/mirrored_strategy.py +++ b/tensorflow/python/distribute/mirrored_strategy.py @@ -319,6 +319,9 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1): if ops.executing_eagerly_outside_functions(): self.experimental_enable_get_next_as_optional = True + # Flag to turn on VariablePolicy. + self._use_var_policy = False + def _initialize_strategy(self, devices): # The _initialize_strategy method is intended to be used by distribute # coordinator as well. @@ -462,7 +465,8 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1): return distribute_utils.create_mirrored_variable( self._container_strategy(), _real_mirrored_creator, - values.MirroredVariable, values.SyncOnReadVariable, **kwargs) + distribute_utils.VARIABLE_CLASS_MAPPING, + distribute_utils.VARIABLE_POLICY_MAPPING, **kwargs) def _validate_colocate_with_variable(self, colocate_with_variable): distribute_utils.validate_colocate_distributed_variable( @@ -628,10 +632,10 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1): return self._cross_device_ops or self._inferred_cross_device_ops def _reduce_to(self, reduce_op, value, destinations, experimental_hints): - if (isinstance(value, values.Mirrored) and + if (distribute_utils.is_mirrored(value) and reduce_op == reduce_util.ReduceOp.MEAN): return value - assert not isinstance(value, values.Mirrored) + assert not distribute_utils.is_mirrored(value) if not isinstance(value, values.DistributedValues): # This function handles reducing values that are not PerReplica or # Mirrored values. For example, the same value could be present on all @@ -686,10 +690,12 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1): def read_var(self, replica_local_var): """Read the aggregate value of a replica-local variable.""" - if isinstance(replica_local_var, values.SyncOnReadVariable): - return replica_local_var._get_cross_replica() # pylint: disable=protected-access - assert isinstance(replica_local_var, values.Mirrored) - return array_ops.identity(replica_local_var._get()) # pylint: disable=protected-access + # pylint: disable=protected-access + if values._is_sync_on_read(replica_local_var): + return replica_local_var._get_cross_replica() + assert values._is_mirrored(replica_local_var) + return array_ops.identity(replica_local_var._get()) + # pylint: enable=protected-access def _local_results(self, val): if isinstance(val, values.DistributedValues): diff --git a/tensorflow/python/distribute/mirrored_variable_test.py b/tensorflow/python/distribute/mirrored_variable_test.py index 8e7d674947e..03d697fe1eb 100644 --- a/tensorflow/python/distribute/mirrored_variable_test.py +++ b/tensorflow/python/distribute/mirrored_variable_test.py @@ -379,8 +379,7 @@ class MirroredVariableCreationTest(test.TestCase): with distribution.scope(): with self.assertRaisesRegex( ValueError, "`NONE` variable synchronization mode is not " - "supported with `Mirrored` distribution strategy. Please change " - "the `synchronization` for variable: v"): + "supported with "): variable_scope.get_variable( "v", [1], synchronization=variable_scope.VariableSynchronization.NONE) @@ -389,8 +388,7 @@ class MirroredVariableCreationTest(test.TestCase): with distribution.scope(): with self.assertRaisesRegex( ValueError, "`NONE` variable synchronization mode is not " - "supported with `Mirrored` distribution strategy. Please change " - "the `synchronization` for variable: v"): + "supported with "): variable_scope.variable( 1.0, name="v", diff --git a/tensorflow/python/distribute/tpu_strategy.py b/tensorflow/python/distribute/tpu_strategy.py index 8e5ef061dcf..bad6e6aa39f 100644 --- a/tensorflow/python/distribute/tpu_strategy.py +++ b/tensorflow/python/distribute/tpu_strategy.py @@ -544,6 +544,9 @@ class TPUExtended(distribute_lib.StrategyExtendedV1): context.async_wait() atexit.register(async_wait) + # Flag to turn on VariablePolicy + self._use_var_policy = False + def _validate_colocate_with_variable(self, colocate_with_variable): distribute_utils. validate_colocate(colocate_with_variable, self) @@ -870,8 +873,8 @@ class TPUExtended(distribute_lib.StrategyExtendedV1): return distribute_utils.create_mirrored_variable( self._container_strategy(), _real_mirrored_creator, - tpu_values.TPUMirroredVariable, tpu_values.TPUSyncOnReadVariable, - **kwargs) + distribute_utils.TPU_VARIABLE_CLASS_MAPPING, + distribute_utils.TPU_VARIABLE_POLICY_MAPPING, **kwargs) def _reduce_to(self, reduce_op, value, destinations, experimental_hints): if (isinstance(value, values.DistributedValues) or diff --git a/tensorflow/python/distribute/tpu_values.py b/tensorflow/python/distribute/tpu_values.py index 33885531966..ce6d2e7029b 100644 --- a/tensorflow/python/distribute/tpu_values.py +++ b/tensorflow/python/distribute/tpu_values.py @@ -197,10 +197,58 @@ def enclosing_tpu_context(): return None +class TPUDistributedVariable(TPUVariableMixin, values.DistributedVariable): + """DistributedVariable subclass for TPUStrategy.""" + + def _is_mirrored(self): + self._policy._is_mirrored() # pylint: disable=protected-access + + def assign_sub(self, value, use_locking=False, name=None, read_value=True): + return self._policy.assign_sub( + self, value, use_locking=use_locking, name=name, read_value=read_value) + + def assign_add(self, value, use_locking=False, name=None, read_value=True): + return self._policy.assign_add( + self, value, use_locking=use_locking, name=name, read_value=read_value) + + def assign(self, value, use_locking=False, name=None, read_value=True): + return self._policy.assign( + self, value, use_locking=use_locking, name=name, read_value=read_value) + + def scatter_sub(self, sparse_delta, use_locking=False, name=None): + return self._policy.scatter_sub( + self, sparse_delta, use_locking=use_locking, name=name) + + def scatter_add(self, sparse_delta, use_locking=False, name=None): + return self._policy.scatter_add( + self, sparse_delta, use_locking=use_locking, name=name) + + def scatter_mul(self, sparse_delta, use_locking=False, name=None): + return self._policy.scatter_mul( + self, sparse_delta, use_locking=use_locking, name=name) + + def scatter_div(self, sparse_delta, use_locking=False, name=None): + return self._policy.scatter_div( + self, sparse_delta, use_locking=use_locking, name=name) + + def scatter_min(self, sparse_delta, use_locking=False, name=None): + return self._policy.scatter_min( + self, sparse_delta, use_locking=use_locking, name=name) + + def scatter_max(self, sparse_delta, use_locking=False, name=None): + return self._policy.scatter_max( + self, sparse_delta, use_locking=use_locking, name=name) + + def scatter_update(self, sparse_delta, use_locking=False, name=None): + return self._policy.scatter_update( + self, sparse_delta, use_locking=use_locking, name=name) + + class TPUMirroredVariable(TPUVariableMixin, values.MirroredVariable): """Holds a map from replica to TPU variables whose values are kept in sync.""" - def assign_sub(self, value, use_locking=False, name=None, read_value=True): + def assign_sub(self, value, use_locking=False, name=None, + read_value=True): if (enclosing_tpu_context() and self.aggregation == variable_scope.VariableAggregation.NONE): return _make_raw_assign_fn( @@ -210,17 +258,11 @@ class TPUMirroredVariable(TPUVariableMixin, values.MirroredVariable): use_locking=use_locking, name=name, read_value=read_value) + return assign_sub(self, value, use_locking=use_locking, name=name, + read_value=read_value) - assign_sub_fn = _make_raw_assign_fn( - gen_resource_variable_ops.assign_sub_variable_op) - return self._update( - update_fn=assign_sub_fn, - value=value, - use_locking=use_locking, - name=name, - read_value=read_value) - - def assign_add(self, value, use_locking=False, name=None, read_value=True): + def assign_add(self, value, use_locking=False, name=None, + read_value=True): if (enclosing_tpu_context() and self.aggregation == variable_scope.VariableAggregation.NONE): return _make_raw_assign_fn( @@ -230,34 +272,21 @@ class TPUMirroredVariable(TPUVariableMixin, values.MirroredVariable): use_locking=use_locking, name=name, read_value=read_value) - - assign_add_fn = _make_raw_assign_fn( - gen_resource_variable_ops.assign_add_variable_op) - return self._update( - update_fn=assign_add_fn, - value=value, - use_locking=use_locking, - name=name, - read_value=read_value) + return assign_add(self, value, use_locking=use_locking, name=name, + read_value=read_value) def assign(self, value, use_locking=False, name=None, read_value=True): if (enclosing_tpu_context() and self.aggregation == variable_scope.VariableAggregation.NONE): - return _make_raw_assign_fn(gen_resource_variable_ops.assign_variable_op)( - self, - value=value, - use_locking=use_locking, - name=name, - read_value=read_value) - - assign_fn = _make_raw_assign_fn( - gen_resource_variable_ops.assign_variable_op) - return self._update( - update_fn=assign_fn, - value=value, - use_locking=use_locking, - name=name, - read_value=read_value) + return _make_raw_assign_fn( + gen_resource_variable_ops.assign_variable_op)( + self, + value=value, + use_locking=use_locking, + name=name, + read_value=read_value) + return assign(self, value, use_locking=use_locking, name=name, + read_value=read_value) def scatter_sub(self, *args, **kwargs): raise NotImplementedError @@ -312,3 +341,220 @@ class TPUSyncOnReadVariable(TPUVariableMixin, values.SyncOnReadVariable): def _is_mirrored(self): return False + + +# Common method between AutoPolicy, OnWrite and Mirrored variables. +def assign_sub(var, value, use_locking=False, name=None, read_value=True): + assign_sub_fn = _make_raw_assign_fn( + gen_resource_variable_ops.assign_sub_variable_op) + return var._update( # pylint: disable=protected-access + update_fn=assign_sub_fn, + value=value, + use_locking=use_locking, + name=name, + read_value=read_value) + + +def assign_add(var, value, use_locking=False, name=None, read_value=True): + assign_add_fn = _make_raw_assign_fn( + gen_resource_variable_ops.assign_add_variable_op) + return var._update( # pylint: disable=protected-access + update_fn=assign_add_fn, + value=value, + use_locking=use_locking, + name=name, + read_value=read_value) + + +def assign(var, value, use_locking=False, name=None, read_value=True): + assign_fn = _make_raw_assign_fn( + gen_resource_variable_ops.assign_variable_op) + return var._update( # pylint: disable=protected-access + update_fn=assign_fn, + value=value, + use_locking=use_locking, + name=name, + read_value=read_value) + + +class TPUAutoPolicy(values.AutoPolicy): + """Policy defined for `tf.VariableSynchronization.AUTO` synchronization. + + This policy is created when `synchronization` is set to + `tf.VariableSynchronization.AUTO` and `aggregation` is set to + `tf.VariableAggregation.NONE` when creating a `tf.Variable` in `tf.distribute` + scope. + """ + + def assign_sub(self, var, value, use_locking=False, name=None, + read_value=True): + if enclosing_tpu_context(): + return _make_raw_assign_fn( + gen_resource_variable_ops.assign_sub_variable_op)( + var, + value=value, + use_locking=use_locking, + name=name, + read_value=read_value) + return assign_sub(var, value, use_locking=use_locking, name=name, + read_value=read_value) + + def assign_add(self, var, value, use_locking=False, name=None, + read_value=True): + if enclosing_tpu_context(): + return _make_raw_assign_fn( + gen_resource_variable_ops.assign_add_variable_op)( + var, + value=value, + use_locking=use_locking, + name=name, + read_value=read_value) + return assign_add(var, value, use_locking=use_locking, name=name, + read_value=read_value) + + def assign(self, var, value, use_locking=False, name=None, read_value=True): + if enclosing_tpu_context(): + return _make_raw_assign_fn( + gen_resource_variable_ops.assign_variable_op)( + var, + value=value, + use_locking=use_locking, + name=name, + read_value=read_value) + return assign(var, value, use_locking=use_locking, name=name, + read_value=read_value) + + def scatter_sub(self, *args, **kwargs): + raise NotImplementedError + + def scatter_add(self, *args, **kwargs): + raise NotImplementedError + + def scatter_max(self, *args, **kwargs): + raise NotImplementedError + + def scatter_min(self, *args, **kwargs): + raise NotImplementedError + + def scatter_mul(self, *args, **kwargs): + raise NotImplementedError + + def scatter_div(self, *args, **kwargs): + raise NotImplementedError + + def scatter_update(self, *args, **kwargs): + raise NotImplementedError + + def _is_mirrored(self): + return True + + +class TPUOnWritePolicy(values.OnWritePolicy): + """Policy defined for `tf.VariableSynchronization.ON_WRITE` synchronization. + + This policy is created when the following `synchronization` and + `aggregation` parameters are specified when creating a `tf.Variable` in + `tf.distribute` scope: + * `synchronization` is equal to `tf.VariableSynchronization.AUTO` and + aggregation can be any of the following `tf.VariableAggregation` enum + values such as `SUM`, `MEAN` or `ONLY_FIRST_REPLICA`. + * `synchronization` is equal to `tf.VariableSynchronization.ON_WRITE` and + aggregation can be any of the following `tf.VariableAggregation` enum + values such as `NONE`, `SUM`, `MEAN` or `ONLY_FIRST_REPLICA`. + """ + + def assign_sub(self, var, value, use_locking=False, name=None, + read_value=True): + return assign_sub(var, value, use_locking=use_locking, name=name, + read_value=read_value) + + def assign_add(self, var, value, use_locking=False, name=None, + read_value=True): + return assign_add(var, value, use_locking=use_locking, name=name, + read_value=read_value) + + def assign(self, var, value, use_locking=False, name=None, read_value=True): + return assign(var, value, use_locking=use_locking, name=name, + read_value=read_value) + + def scatter_sub(self, *args, **kwargs): + raise NotImplementedError + + def scatter_add(self, *args, **kwargs): + raise NotImplementedError + + def scatter_max(self, *args, **kwargs): + raise NotImplementedError + + def scatter_min(self, *args, **kwargs): + raise NotImplementedError + + def scatter_mul(self, *args, **kwargs): + raise NotImplementedError + + def scatter_div(self, *args, **kwargs): + raise NotImplementedError + + def scatter_update(self, *args, **kwargs): + raise NotImplementedError + + def _is_mirrored(self): + return True + + +class TPUOnReadPolicy(values.OnReadPolicy): + """Policy defined for `tf.VariableSynchronization.ON_READ` synchronization. + + This policy is created when `synchronization` is set to + `tf.VariableSynchronization.ON_READ` and `aggregation` is set to any of the + values allowed by the `tf.VariableAggregation` enum such as `NONE`, `SUM`, + `MEAN` or `ONLY_FIRST_REPLICA`when creating a `tf.Variable` in `tf.distribute` + scope. + """ + + def assign_sub(self, var, *args, **kwargs): + if enclosing_tpu_context() is None: + return super(TPUOnReadPolicy, self).assign_sub(var, *args, **kwargs) + else: + return _make_raw_assign_fn( + gen_resource_variable_ops.assign_sub_variable_op)(var, *args, + **kwargs) + + def assign_add(self, var, *args, **kwargs): + if enclosing_tpu_context() is None: + return super(TPUOnReadPolicy, self).assign_add(var, *args, **kwargs) + else: + return _make_raw_assign_fn( + gen_resource_variable_ops.assign_add_variable_op)(var, *args, + **kwargs) + + def assign(self, var, *args, **kwargs): + if enclosing_tpu_context() is None: + return super(TPUOnReadPolicy, self).assign(var, *args, **kwargs) + else: + return _make_raw_assign_fn(gen_resource_variable_ops.assign_variable_op)( + var, *args, **kwargs) + + def _is_mirrored(self): + return False + + def scatter_sub(self, *args, **kwargs): + raise NotImplementedError + + def scatter_add(self, *args, **kwargs): + raise NotImplementedError + + def scatter_max(self, *args, **kwargs): + raise NotImplementedError + + def scatter_min(self, *args, **kwargs): + raise NotImplementedError + + def scatter_mul(self, *args, **kwargs): + raise NotImplementedError + + def scatter_div(self, *args, **kwargs): + raise NotImplementedError + + def scatter_update(self, *args, **kwargs): + raise NotImplementedError diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py index 50cd8d73e73..7dedbee2041 100644 --- a/tensorflow/python/distribute/values.py +++ b/tensorflow/python/distribute/values.py @@ -700,49 +700,49 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, def scatter_sub(self, sparse_delta, use_locking=False, name=None): if self._policy: - self._policy.scatter_sub( + return self._policy.scatter_sub( self, sparse_delta, use_locking=use_locking, name=name) return values_util.scatter_sub( self, sparse_delta, use_locking=use_locking, name=name) def scatter_add(self, sparse_delta, use_locking=False, name=None): if self._policy: - self._policy.scatter_add( + return self._policy.scatter_add( self, sparse_delta, use_locking=use_locking, name=name) return values_util.scatter_add( self, sparse_delta, use_locking=use_locking, name=name) def scatter_mul(self, sparse_delta, use_locking=False, name=None): if self._policy: - self._policy.scatter_mul( + return self._policy.scatter_mul( self, sparse_delta, use_locking=use_locking, name=name) return values_util.scatter_mul( self, sparse_delta, use_locking=use_locking, name=name) def scatter_div(self, sparse_delta, use_locking=False, name=None): if self._policy: - self._policy.scatter_div( + return self._policy.scatter_div( self, sparse_delta, use_locking=use_locking, name=name) return values_util.scatter_div( self, sparse_delta, use_locking=use_locking, name=name) def scatter_min(self, sparse_delta, use_locking=False, name=None): if self._policy: - self._policy.scatter_min( + return self._policy.scatter_min( self, sparse_delta, use_locking=use_locking, name=name) return values_util.scatter_min( self, sparse_delta, use_locking=use_locking, name=name) def scatter_max(self, sparse_delta, use_locking=False, name=None): if self._policy: - self._policy.scatter_max( + return self._policy.scatter_max( self, sparse_delta, use_locking=use_locking, name=name) return values_util.scatter_max( self, sparse_delta, use_locking=use_locking, name=name) def scatter_update(self, sparse_delta, use_locking=False, name=None): if self._policy: - self._policy.scatter_update( + return self._policy.scatter_update( self, sparse_delta, use_locking=use_locking, name=name) return values_util.scatter_update( self, sparse_delta, use_locking=use_locking, name=name) diff --git a/tensorflow/python/distribute/values_test.py b/tensorflow/python/distribute/values_test.py index 1c090737d8f..e445c1195be 100644 --- a/tensorflow/python/distribute/values_test.py +++ b/tensorflow/python/distribute/values_test.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function import copy -import itertools import os from absl.testing import parameterized @@ -30,14 +29,12 @@ from tensorflow.python import tf2 from tensorflow.python.distribute import combinations from tensorflow.python.distribute import distribute_lib from tensorflow.python.distribute import distribute_utils -from tensorflow.python.distribute import distribution_strategy_context from tensorflow.python.distribute import packed_distributed_variable as packed from tensorflow.python.distribute import strategy_combinations from tensorflow.python.distribute import test_util as ds_test_util from tensorflow.python.distribute import tpu_strategy from tensorflow.python.distribute import tpu_values from tensorflow.python.distribute import values as values_lib -from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver from tensorflow.python.eager import context from tensorflow.python.eager import def_function from tensorflow.python.eager import test @@ -51,19 +48,56 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib from tensorflow.python.saved_model import save_context from tensorflow.python.saved_model import save_options -from tensorflow.python.tpu import tpu_strategy_util from tensorflow.python.training import saver as saver_lib from tensorflow.python.training.tracking import util as trackable_utils from tensorflow.python.types import core from tensorflow.python.util import nest +def _device_str(d): + return "/device:GPU:" + str(d) + + +def _nested_value(d): + return ("a" + d, ["b" + d, {"c": "d" + d, "e": "f" + d}, "g" + d], "h" + d) + + +def _make_mirrored_val(init_val=5.0): + v = [] + devices = ["/device:GPU:0", "/device:CPU:0"] + for d, _ in zip(devices, ["v", "v/replica"]): + with ops.device(d): + v.append(constant_op.constant(init_val)) + return values_lib.Mirrored(v) + + +def _make_mirrored(): + v = [] + devices = ["/device:GPU:0", "/device:CPU:0"] + for d, n, init in zip(devices, ["v", "v/replica"], [1., 2.]): + with ops.device(d): + v.append(variable_scope.get_variable( + name=n, initializer=init, use_resource=True)) + mirrored = values_lib.MirroredVariable( + None, v, variable_scope.VariableAggregation.SUM) + return mirrored + + +def mirrored_and_tpu_strategy_combinations(): + return combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + strategy_combinations.tpu_strategy, + strategy_combinations.tpu_strategy_packed_var, + ], + mode=["graph", "eager"]) + + class DistributedValuesTest(test.TestCase, parameterized.TestCase): def testGetEager(self): @@ -363,45 +397,6 @@ class DistributedDelegateTest(test.TestCase): self.assertEqual(v.x, v_deep_copy.x) -def _device_str(d): - return "/device:GPU:" + str(d) - - -def _nested_value(d): - return ("a" + d, ["b" + d, {"c": "d" + d, "e": "f" + d}, "g" + d], "h" + d) - - -def _make_mirrored_val(init_val=5.0): - v = [] - devices = ["/device:GPU:0", "/device:CPU:0"] - for d, _ in zip(devices, ["v", "v/replica"]): - with ops.device(d): - v.append(constant_op.constant(init_val)) - return values_lib.Mirrored(v) - - -def _make_mirrored(): - v = [] - devices = ["/device:GPU:0", "/device:CPU:0"] - for d, n, init in zip(devices, ["v", "v/replica"], [1., 2.]): - with ops.device(d): - v.append(variable_scope.get_variable( - name=n, initializer=init, use_resource=True)) - mirrored = values_lib.MirroredVariable( - None, v, variable_scope.VariableAggregation.SUM) - return mirrored - - -def mirrored_and_tpu_strategy_combinations(): - return combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.tpu_strategy, - strategy_combinations.tpu_strategy_packed_var, - ], - mode=["graph", "eager"]) - - @combinations.generate( combinations.combine( distribution=[ @@ -796,507 +791,6 @@ class MirroredVariableTest(test.TestCase, parameterized.TestCase): save_path = self._save_normal() self._restore_mirrored(save_path) - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_one_gpu, - ], - mode=["graph"])) - def testFetchAMirroredVariable(self, distribution): - with self.session(graph=ops.Graph()) as sess, distribution.scope(): - with ops.device("/device:GPU:0"): - v = variable_scope.get_variable( - name="v", initializer=1., use_resource=True) - mirrored = values_lib.MirroredVariable( - distribution, (v,), variable_scope.VariableAggregation.MEAN) - sess.run(variables_lib.global_variables_initializer()) - sess.run({"complicated": mirrored}) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.tpu_strategy, - strategy_combinations.tpu_strategy_packed_var, - ], - mode=["eager"])) - def testAssignValueInReplicaContextWithoutAggregation(self, distribution): - with distribution.scope(): - v = variables_lib.Variable(1.0, name="foo") - - @def_function.function - def mytest(): - def model_fn(): - v.assign(5.0) - return v.read_value() - - return distribution.run(model_fn) - - mytest() - self.assertAllEqual([5.0, 5.0], self.evaluate(v.values)) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_one_cpu, - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.tpu_strategy, - strategy_combinations.tpu_strategy_packed_var, - ], - mode=["graph", "eager"])) - def testValueInReplicaContext(self, distribution): - with distribution.scope(): - v = variables_lib.Variable( - 1., aggregation=variables_lib.VariableAggregation.MEAN) - self.evaluate(variables_lib.global_variables_initializer()) - - @def_function.function - def f(): - with ops.control_dependencies([v.assign_add(1.)]): - return v.value() - - results = self.evaluate( - distribution.experimental_local_results( - distribution.run(f))) - for value in results: - self.assertEqual(2., value) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_one_cpu, - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.tpu_strategy, - strategy_combinations.tpu_strategy_packed_var, - ], - mode=["graph", "eager"])) - def testAssignOutOfScope(self, distribution): - with distribution.scope(): - mirrored = variables_lib.Variable(1.) - self.evaluate(mirrored.assign(3.)) - self.assertEqual(self.evaluate(mirrored.read_value()), 3.) - for component in mirrored.values: - self.assertEqual(self.evaluate(component.read_value()), 3.) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=["graph", "eager"])) - def testAssignAggregationMeanDTypeNonFloat(self, distribution): - with distribution.scope(): - v = variables_lib.Variable( - 1, - aggregation=variable_scope.VariableAggregation.MEAN, - dtype=dtypes.int32) - self.evaluate(v.initializer) - - @def_function.function - def assign(): - ctx = distribution_strategy_context.get_replica_context() - return v.assign(ctx.replica_id_in_sync_group) - - # disallow assign() with distributed value in replica context. - with self.assertRaisesRegex(ValueError, - "Cannot update non-float variables"): - self.evaluate( - distribution.experimental_local_results( - distribution.run(assign))) - - # allow assign() with same value in replica context. - @def_function.function - def assign_same(): - return v.assign(2) - - self.evaluate( - distribution.experimental_local_results( - distribution.run(assign_same))) - self.assertEqual(self.evaluate(v.read_value()), 2) - - # allow assign() with mirrored variable in replica context. - with distribution.scope(): - v2 = variables_lib.Variable( - 3, - aggregation=variable_scope.VariableAggregation.SUM, - dtype=dtypes.int32) - self.evaluate(v2.initializer) - - @def_function.function - def assign_mirrored(): - return v.assign(v2) - - self.evaluate( - distribution.experimental_local_results( - distribution.run(assign_mirrored))) - self.assertEqual(self.evaluate(v.read_value()), 3) - - # allow assign() in cross replica context. - with distribution.scope(): - self.evaluate(v.assign(4)) - self.assertEqual(self.evaluate(v.read_value()), 4) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.tpu_strategy, - strategy_combinations.tpu_strategy_packed_var, - ], - mode=["eager"])) - def testInitializedToSameValueInsideEagerRun(self, distribution): - v = [None] - - @def_function.function - def step(): - - def f(): - if v[0] is None: - v[0] = variables_lib.Variable(random_ops.random_normal([])) - - distribution.run(f) - - context.set_global_seed(None) - step() - vals = self.evaluate(v[0].values) - self.assertAllEqual(vals[0], vals[1]) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_one_cpu, - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.tpu_strategy, - strategy_combinations.tpu_strategy_packed_var, - ], - mode=["graph", "eager"])) - def testAggregationOnlyFirstReplica(self, distribution): - with distribution.scope(): - v = variable_scope.variable( - 15., - synchronization=variables_lib.VariableSynchronization.ON_WRITE, - aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) - self.evaluate(variables_lib.global_variables_initializer()) - - @def_function.function - def assign(): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - return v.assign(math_ops.cast(replica_id, dtypes.float32)) - per_replica_results = self.evaluate(distribution.experimental_local_results( - distribution.run(assign))) - # The per-replica values should always match the first replicas value. - self.assertAllEqual( - array_ops.zeros(distribution.num_replicas_in_sync, dtypes.float32), - per_replica_results) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.tpu_strategy, - strategy_combinations.tpu_strategy_packed_var, - ], - mode=["eager"])) - def testInitScope(self, distribution): - - class C(object): - pass - - obj = C() - obj.w = None - obj.v = None - - @def_function.function - def assign(): - with ops.init_scope(): - if obj.w is None: - obj.w = variables_lib.Variable( - 0, aggregation=variables_lib.VariableAggregation.MEAN) - obj.v = variables_lib.Variable( - obj.w.read_value(), - aggregation=variables_lib.VariableAggregation.MEAN) - - return obj.v.assign_add(2) - - per_replica_results = self.evaluate( - distribution.experimental_local_results(distribution.run(assign))) - self.assertAllEqual([2, 2], per_replica_results) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.tpu_strategy, - ], - mode=["eager"])) - def testOperatorOverride(self, distribution): - - with distribution.scope(): - v = variable_scope.variable( - 1, aggregation=variables_lib.VariableAggregation.MEAN) - - self.assertEqual(2, self.evaluate(v + 1)) - - @def_function.function - def add(): - return v + 1 - - per_replica_results = self.evaluate( - distribution.experimental_local_results(distribution.run(add))) - self.assertAllEqual([2, 2], per_replica_results) - - @combinations.generate(mirrored_and_tpu_strategy_combinations()) - def testAssignAdd(self, distribution): - with distribution.scope(): - v = variable_scope.variable( - 1, aggregation=variables_lib.VariableAggregation.MEAN) - self.evaluate(variables_lib.global_variables_initializer()) - - @def_function.function - def assign(): - return v.assign_add(2) - - per_replica_results = self.evaluate( - distribution.experimental_local_results(distribution.run(assign))) - # The per-replica values should always match the first replicas value. - self.assertAllEqual([3, 3], per_replica_results) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=["graph", "eager"])) - def testScatterSub(self, distribution): - with distribution.scope(): - v = variables_lib.Variable( - [0., 0., 0.], aggregation=variables_lib.VariableAggregation.MEAN) - self.evaluate(v.initializer) - - @def_function.function - def scatter_sub(): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - value = indexed_slices.IndexedSlices( - values=array_ops.stack([ - math_ops.cast(replica_id, dtypes.float32), - math_ops.cast(replica_id + 1, dtypes.float32) - ]), - indices=array_ops.stack([replica_id, replica_id + 1]), - dense_shape=(3,)) - return v.scatter_sub(value) - - per_replica_results = self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_sub))) - self.assertAllEqual([[0., -1., -1.], [0., -1., -1.]], per_replica_results) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=["graph", "eager"])) - def testScatterAdd(self, distribution): - with distribution.scope(): - v = variables_lib.Variable( - [0, 0, 0], aggregation=variables_lib.VariableAggregation.SUM) - self.evaluate(v.initializer) - - @def_function.function - def scatter_add(): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - value = indexed_slices.IndexedSlices( - values=array_ops.stack([replica_id, replica_id + 1]), - indices=array_ops.stack([replica_id, replica_id + 1]), - dense_shape=(3,)) - return v.scatter_add(value) - - per_replica_results = self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_add))) - self.assertAllEqual([[0, 2, 2], [0, 2, 2]], per_replica_results) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=["graph", "eager"])) - def testScatterDiv(self, distribution): - with distribution.scope(): - v = variables_lib.Variable( - [1, 6, 1], aggregation=variables_lib.VariableAggregation.SUM) - self.evaluate(v.initializer) - - @def_function.function - def scatter_div(): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - value = indexed_slices.IndexedSlices( - values=array_ops.reshape(replica_id + 2, [1]), - indices=array_ops.reshape(replica_id, [1]), - dense_shape=(3,)) - return v.scatter_div(value) - - per_replica_results = self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_div))) - self.assertAllEqual([[0, 2, 1], [0, 2, 1]], per_replica_results) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=["graph", "eager"])) - def testScatterMul(self, distribution): - with distribution.scope(): - v = variables_lib.Variable( - [2., 1., 1.], aggregation=variables_lib.VariableAggregation.MEAN) - self.evaluate(v.initializer) - - @def_function.function - def scatter_mul(): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - value = indexed_slices.IndexedSlices( - values=array_ops.reshape( - math_ops.cast(replica_id + 2, dtypes.float32), [1]), - indices=array_ops.reshape(replica_id, [1]), - dense_shape=(3,)) - return v.scatter_mul(value) - - per_replica_results = self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_mul))) - self.assertAllClose([[2., 1.5, 1.], [2., 1.5, 1.]], per_replica_results) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=["graph", "eager"])) - def testScatterMin(self, distribution): - with distribution.scope(): - v1 = variables_lib.Variable( - [0, 2, 0], aggregation=variables_lib.VariableAggregation.SUM) - v2 = variables_lib.Variable( - [0, 2, 0], - aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) - self.evaluate(variables_lib.global_variables_initializer()) - - @def_function.function - def scatter_min(v): - value = indexed_slices.IndexedSlices( - values=array_ops.identity([1]), - indices=array_ops.identity([1]), - dense_shape=(3,)) - return v.scatter_min(value) - - with self.assertRaisesRegex(NotImplementedError, "scatter_min.*"): - self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_min, args=(v1,)))) - - per_replica_results = self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_min, args=(v2,)))) - self.assertAllClose([[0, 1, 0], [0, 1, 0]], per_replica_results) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=["graph", "eager"])) - def testScatterMax(self, distribution): - with distribution.scope(): - v1 = variables_lib.Variable( - [0, 0, 0], aggregation=variables_lib.VariableAggregation.SUM) - v2 = variables_lib.Variable( - [0, 0, 0], - aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) - self.evaluate(variables_lib.global_variables_initializer()) - - @def_function.function - def scatter_max(v): - value = indexed_slices.IndexedSlices( - values=array_ops.identity([1]), - indices=array_ops.identity([0]), - dense_shape=(3,)) - return v.scatter_max(value) - - with self.assertRaisesRegex(NotImplementedError, "scatter_max.*"): - self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_max, args=(v1,)))) - - per_replica_results = self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_max, args=(v2,)))) - self.assertAllClose([[1, 0, 0], [1, 0, 0]], per_replica_results) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=["graph", "eager"])) - def testScatterUpdate(self, distribution): - with distribution.scope(): - v1 = variables_lib.Variable( - [0, 0, 0], aggregation=variables_lib.VariableAggregation.SUM) - v2 = variables_lib.Variable( - [0, 0, 0], - aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) - self.evaluate(variables_lib.global_variables_initializer()) - - @def_function.function - def scatter_update(v): - value = indexed_slices.IndexedSlices( - values=array_ops.identity([3]), - indices=array_ops.identity([1]), - dense_shape=(3,)) - return v.scatter_update(value) - - with self.assertRaisesRegex(NotImplementedError, "scatter_update.*"): - self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_update, args=(v1,)))) - - per_replica_results = self.evaluate( - distribution.experimental_local_results( - distribution.run(scatter_update, args=(v2,)))) - self.assertAllClose([[0, 3, 0], [0, 3, 0]], per_replica_results) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=["graph", "eager"])) - def testScatterOpsInCrossReplicaContext(self, distribution): - with distribution.scope(): - v1 = variables_lib.Variable( - [1, 1, 1], aggregation=variables_lib.VariableAggregation.SUM) - v2 = variables_lib.Variable([1, 1, 1]) - self.evaluate(variables_lib.global_variables_initializer()) - - value = indexed_slices.IndexedSlices( - values=array_ops.identity([2]), - indices=array_ops.identity([0]), - dense_shape=(3,)) - with distribution.scope(): - self.evaluate(v1.scatter_add(value)) - self.assertAllEqual([3, 1, 1], self.evaluate(v1.read_value())) - - self.evaluate(v2.scatter_min(value)) - self.assertAllEqual([1, 1, 1], self.evaluate(v2.read_value())) - _TPU_STRATEGIES = (tpu_strategy.TPUStrategy, tpu_strategy.TPUStrategyV1) @@ -1321,38 +815,6 @@ def _make_replica_local(method, strategy=None): return v, replica_local -class SyncOnReadVariablePropertiesTest(test.TestCase): - - config = config_pb2.ConfigProto() - config.allow_soft_placement = True - - @test_util.run_in_graph_and_eager_modes(config=config) - def testProperties(self): - if context.num_gpus() < 1 and context.executing_eagerly(): - self.skipTest("A GPU is not available for this test in eager mode.") - v, replica_local = _make_replica_local( - variable_scope.VariableAggregation.SUM) - - self.assertEqual(v[0].constraint, replica_local.constraint) - self.assertEqual(v[0].name, replica_local.name) - self.assertEqual(v[0].dtype, replica_local.dtype) - self.assertEqual(v[0].shape, replica_local.shape) - self.assertEqual(variable_scope.VariableAggregation.SUM, - replica_local.aggregation) - - @test_util.run_v2_only - def testCanPassToDefFun(self): - @def_function.function - def add1(x): - return x + 1 - - v = variable_scope.get_variable( - name="v", initializer=[1.], use_resource=True) - replica_local = values_lib.SyncOnReadVariable( - None, (v,), variable_scope.VariableAggregation.MEAN) - self.assertEqual(2., self.evaluate(add1(replica_local))) - - # TODO(b/144432582): Add variable aggregation type to combinations to simplify # tests. def strategy_and_run_tf_function_combinations(): @@ -1389,6 +851,35 @@ class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase): save_path, _ = self._save_return_saver(sess, var) return save_path + config = config_pb2.ConfigProto() + config.allow_soft_placement = True + + @test_util.run_in_graph_and_eager_modes(config=config) + def testProperties(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + v, replica_local = _make_replica_local( + variable_scope.VariableAggregation.SUM) + + self.assertEqual(v[0].constraint, replica_local.constraint) + self.assertEqual(v[0].name, replica_local.name) + self.assertEqual(v[0].dtype, replica_local.dtype) + self.assertEqual(v[0].shape, replica_local.shape) + self.assertEqual(variable_scope.VariableAggregation.SUM, + replica_local.aggregation) + + @test_util.run_v2_only + def testCanPassToDefFun(self): + @def_function.function + def add1(x): + return x + 1 + + v = variable_scope.get_variable( + name="v", initializer=[1.], use_resource=True) + replica_local = values_lib.SyncOnReadVariable( + None, (v,), variable_scope.VariableAggregation.MEAN) + self.assertEqual(2., self.evaluate(add1(replica_local))) + @combinations.generate(mirrored_and_tpu_strategy_combinations()) def testTensorConversion(self, distribution): with context.graph_mode(): @@ -1585,453 +1076,6 @@ class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase): save_path = self._save_normal() self._restore_replica_local_sum(save_path, distribution) - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testAssign(self, distribution, experimental_run_tf_function): - - def assign(fn, v, update_value, cross_replica): - update_fn = lambda: getattr(v, fn)(update_value) - if cross_replica: - return update_fn() - else: - if experimental_run_tf_function: - update_fn = def_function.function(update_fn) - return distribution.experimental_local_results( - distribution.run(update_fn)) - - updates = [("assign", 1.), ("assign_add", 1.), ("assign_sub", -1.)] - aggregations = [ - variables_lib.VariableAggregation.NONE, - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - options = list( - x for x in itertools.product(updates, aggregations, [True, False])) - for update, aggregation, cross_replica in options: - # VariableAggregation.SUM in cross-replica mode is tested below, - # VariableAggregation.NONE in cross-replica mode is not supported. - if cross_replica and aggregation in [ - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.NONE, - ]: - continue - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - fn, update_value = update - self.evaluate(assign(fn, v, update_value, cross_replica)) - for component in v._values: - self.assertAllEqual(self.evaluate(component.read_value()), - self.evaluate(array_ops.ones_like(component))) - - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testAssignDtypeConversion(self, distribution, - experimental_run_tf_function): - - def assign(fn, v, update_value, cross_replica): - update_fn = lambda: getattr(v, fn)(update_value) - if cross_replica: - return update_fn() - else: - if experimental_run_tf_function: - update_fn = def_function.function(update_fn) - return distribution.experimental_local_results( - distribution.run(update_fn)) - - updates = [("assign", 1), ("assign_add", 1), ("assign_sub", -1)] - aggregations = [ - variables_lib.VariableAggregation.NONE, - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - options = list( - x for x in itertools.product(updates, aggregations, [True, False])) - for update, aggregation, cross_replica in options: - # VariableAggregation.SUM in cross-replica mode is tested below, - # VariableAggregation.NONE in cross-replica mode is not supported. - if cross_replica and aggregation in [ - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.NONE, - ]: - continue - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - fn, update_value = update - self.evaluate(assign(fn, v, update_value, cross_replica)) - for component in v._values: - self.assertAllEqual(self.evaluate(component.read_value()), - self.evaluate(array_ops.ones_like(component))) - - @combinations.generate(mirrored_and_tpu_strategy_combinations()) - def testAssignWithAggregationSum(self, distribution): - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=variables_lib.VariableAggregation.SUM) - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(v.assign(1. * distribution.num_replicas_in_sync)) - for component in v._values: - self.assertAllEqual(self.evaluate(component.read_value()), - self.evaluate(array_ops.ones_like(component))) - - @combinations.generate(mirrored_and_tpu_strategy_combinations()) - def testAssignAddSubWithAggregationSum(self, distribution): - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=variables_lib.VariableAggregation.SUM) - self.evaluate(variables_lib.global_variables_initializer()) - with self.assertRaisesRegex( - ValueError, "SyncOnReadVariable does not support "): - self.evaluate(v.assign_add(1.)) - with self.assertRaisesRegex( - ValueError, "SyncOnReadVariable does not support "): - self.evaluate(v.assign_sub(1.)) - - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testReadValueInReplicaContext(self, distribution, - experimental_run_tf_function): - aggregations = [ - variables_lib.VariableAggregation.NONE, - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - for aggregation in aggregations: - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - if experimental_run_tf_function: - read_var_fn = def_function.function(v.read_value) - else: - read_var_fn = v.read_value - results = self.evaluate( - distribution.experimental_local_results( - distribution.run(read_var_fn))) - for component, value in zip(v._values, results): - self.assertAllEqual(self.evaluate(component.read_value()), value) - - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testReadValueInCrossReplicaContext(self, distribution, - experimental_run_tf_function): - aggregations = [ - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - for aggregation in aggregations: - if isinstance(distribution, _TPU_STRATEGIES): - resolver = tpu_cluster_resolver.TPUClusterResolver("") - tpu_strategy_util.initialize_tpu_system(resolver) - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - - def assign(v=v): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - return v.assign(math_ops.cast(replica_id, dtypes.float32)) - - if experimental_run_tf_function: - assign = def_function.function(assign) - - self.evaluate( - distribution.experimental_local_results(distribution.run(assign))) - num_replicas = distribution.num_replicas_in_sync - sum_of_replica_values = num_replicas * (num_replicas - 1) / 2. - if aggregation == variables_lib.VariableAggregation.SUM: - expected = sum_of_replica_values - elif aggregation == variables_lib.VariableAggregation.MEAN: - expected = sum_of_replica_values / num_replicas - else: - expected = 0 - self.assertEqual(expected, self.evaluate(v.read_value()), aggregation) - self.assertEqual(expected, self.evaluate(v.value()), aggregation) - self.assertEqual(expected, self.evaluate(v), aggregation) - self.assertEqual(expected, self.evaluate(array_ops.identity(v)), - aggregation) - - # TODO(b/145574622): Re-enable this test once ReduceOp argument is - # respected on GPUs. - @combinations.generate(strategy_and_run_tf_function_combinations()) - def disable_testAllReduce(self, distribution, - experimental_run_tf_function): - with distribution.scope(): - v = variable_scope.variable( - 2., - synchronization=variables_lib.VariableSynchronization.ON_WRITE, - aggregation=variables_lib.VariableAggregation.MEAN) - self.evaluate(variables_lib.global_variables_initializer()) - - def all_reduce(): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - return ctx.all_reduce("SUM", v) + math_ops.cast(replica_id, - dtypes.float32) - - if experimental_run_tf_function: - all_reduce = def_function.function(all_reduce) - - per_replica_results = self.evaluate( - distribution.experimental_local_results(distribution.run(all_reduce))) - expected_result = [] - for i in range(distribution.num_replicas_in_sync): - expected_result.append(2.0 * distribution.num_replicas_in_sync + - 1.0 * i) - self.assertEqual(per_replica_results, tuple(expected_result)) - - @combinations.generate(strategy_and_run_tf_function_combinations()) - def testAssignPerReplicaBeforeRead(self, distribution, - experimental_run_tf_function): - aggregations = [ - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ] - for aggregation in aggregations: - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(variables_lib.global_variables_initializer()) - - def assign(var=v): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - return var.assign(math_ops.cast(replica_id, dtypes.float32)) - - if experimental_run_tf_function: - assign = def_function.function(assign) - - per_replica_results = self.evaluate( - distribution.experimental_local_results(distribution.run(assign))) - expected_result = [] - for i in range(distribution.num_replicas_in_sync): - expected_result.append(1.0 * i) - self.assertEqual(per_replica_results, tuple(expected_result)) - - @combinations.generate(mirrored_and_tpu_strategy_combinations()) - def testReadValueWithAggregationNoneInCrossReplicaContext(self, distribution): - with distribution.scope(): - v = variable_scope.variable( - 0., - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=variables_lib.VariableAggregation.NONE) - self.evaluate(variables_lib.global_variables_initializer()) - with self.assertRaisesRegex( - ValueError, "Could not convert from .* VariableAggregation\\.NONE"): - self.evaluate(v.read_value()) - - @combinations.generate(mirrored_and_tpu_strategy_combinations()) - def testInitializedToSameValueInsideEagerRun(self, distribution): - if not context.executing_eagerly(): self.skipTest("eager only") - - v = [None] - @def_function.function - def step(): - def f(): - if v[0] is None: - v[0] = variables_lib.Variable( - random_ops.random_normal([]), - synchronization=variables_lib.VariableSynchronization.ON_READ) - - distribution.run(f) - - context.set_global_seed(None) - step() - vals = self.evaluate(v[0].values) - self.assertAllEqual(vals[0], vals[1]) - - @combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.tpu_strategy, - ], - mode=["eager"])) - def testOperatorOverride(self, distribution): - - with distribution.scope(): - v = variable_scope.variable( - 0.0, - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=variables_lib.VariableAggregation.MEAN) - - @def_function.function - def assign(): - ctx = distribution_strategy_context.get_replica_context() - replica_id = ctx.replica_id_in_sync_group - return v.assign(math_ops.cast(replica_id, dtypes.float32)) - - # Assign different replicas with different values. - distribution.run(assign) - - self.assertEqual(1.5, self.evaluate(v + 1)) - - @def_function.function - def add(): - return v + 1 - - per_replica_results = self.evaluate( - distribution.experimental_local_results(distribution.run(add))) - self.assertAllEqual([1, 2], per_replica_results) - - -@combinations.generate( - combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - aggregation=[ - variables_lib.VariableAggregation.MEAN, - variables_lib.VariableAggregation.SUM, - variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, - ], - mode=["graph", "eager"])) -class SyncOnReadScatterReplicaTest(test.TestCase, parameterized.TestCase): - - def testScatterSub(self, distribution, aggregation): - with distribution.scope(): - v = variables_lib.Variable( - [1., 1., 1.], - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(v.initializer) - - delta = values_lib.PerReplica([ - indexed_slices.IndexedSlices( - values=[[0.], [1.]], indices=[0, 1], dense_shape=(3,)), - indexed_slices.IndexedSlices( - values=[[1.], [2.]], indices=[1, 2], dense_shape=(3,)), - ]) - - with self.assertRaises(NotImplementedError): - self.evaluate(distribution.run(v.scatter_sub, args=(delta,))) - - def testScatterAdd(self, distribution, aggregation): - with distribution.scope(): - v = variables_lib.Variable( - [1., 1., 1.], - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(v.initializer) - - delta = values_lib.PerReplica([ - indexed_slices.IndexedSlices( - values=[[0.], [1.]], indices=[0, 1], dense_shape=(3,)), - indexed_slices.IndexedSlices( - values=[[1.], [2.]], indices=[1, 2], dense_shape=(3,)), - ]) - - with self.assertRaises(NotImplementedError): - self.evaluate(distribution.run(v.scatter_add, args=(delta,))) - - def testScatterDiv(self, distribution, aggregation): - with distribution.scope(): - v = variables_lib.Variable( - [2., 6., 1.], - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(v.initializer) - - delta = values_lib.PerReplica([ - indexed_slices.IndexedSlices( - values=[[2.], [2.]], indices=[0, 1], dense_shape=(3,)), - indexed_slices.IndexedSlices( - values=[[3.], [3.]], indices=[1, 2], dense_shape=(3,)), - ]) - - with self.assertRaises(NotImplementedError): - self.evaluate(distribution.run(v.scatter_div, args=(delta,))) - - def testScatterMul(self, distribution, aggregation): - with distribution.scope(): - v = variables_lib.Variable( - [2., 1., 1.], - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(v.initializer) - - delta = values_lib.PerReplica([ - indexed_slices.IndexedSlices( - values=[[2.], [3.]], indices=[0, 1], dense_shape=(3,)), - indexed_slices.IndexedSlices( - values=[[4.], [5.]], indices=[1, 2], dense_shape=(3,)), - ]) - - with self.assertRaises(NotImplementedError): - self.evaluate(distribution.run(v.scatter_mul, args=(delta,))) - - def testScatterMin(self, distribution, aggregation): - with distribution.scope(): - v = variables_lib.Variable( - [3., 4., 5.], - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(v.initializer) - - delta = values_lib.PerReplica([ - indexed_slices.IndexedSlices( - values=[[1.], [8.]], indices=[0, 1], dense_shape=(3,)), - indexed_slices.IndexedSlices( - values=[[9.], [2.]], indices=[1, 2], dense_shape=(3,)), - ]) - - with self.assertRaises(NotImplementedError): - self.evaluate(distribution.run(v.scatter_min, args=(delta,))) - - def testScatterMax(self, distribution, aggregation): - with distribution.scope(): - v = variables_lib.Variable( - [3., 4., 5.], - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(v.initializer) - - delta = values_lib.PerReplica([ - indexed_slices.IndexedSlices( - values=[[1.], [8.]], indices=[0, 1], dense_shape=(3,)), - indexed_slices.IndexedSlices( - values=[[9.], [2.]], indices=[1, 2], dense_shape=(3,)), - ]) - - with self.assertRaises(NotImplementedError): - self.evaluate(distribution.run(v.scatter_max, args=(delta,))) - - def testScatterUpdate(self, distribution, aggregation): - with distribution.scope(): - v = variables_lib.Variable( - [0., 0., 0.], - synchronization=variables_lib.VariableSynchronization.ON_READ, - aggregation=aggregation) - self.evaluate(v.initializer) - - delta = values_lib.PerReplica([ - indexed_slices.IndexedSlices( - values=[[1.], [2.]], indices=[0, 1], dense_shape=(3,)), - indexed_slices.IndexedSlices( - values=[[3.], [4.]], indices=[1, 2], dense_shape=(3,)), - ]) - - with self.assertRaises(NotImplementedError): - self.evaluate(distribution.run(v.scatter_min, args=(delta,))) - class MirroredTest(test.TestCase): diff --git a/tensorflow/python/distribute/vars_test.py b/tensorflow/python/distribute/vars_test.py new file mode 100644 index 00000000000..98d0c1bb2d2 --- /dev/null +++ b/tensorflow/python/distribute/vars_test.py @@ -0,0 +1,1269 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the distributed values library.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import itertools + +from absl.testing import parameterized + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import distribution_strategy_context +from tensorflow.python.distribute import strategy_combinations +from tensorflow.python.distribute import tpu_strategy +from tensorflow.python.distribute import tpu_values +from tensorflow.python.distribute import values +from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver +from tensorflow.python.eager import context +from tensorflow.python.eager import def_function +from tensorflow.python.eager import test +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import indexed_slices +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables as variables_lib +from tensorflow.python.tpu import tpu_strategy_util + + +_TPU_STRATEGIES = (tpu_strategy.TPUStrategy, tpu_strategy.TPUStrategyV1) + + +def strategy_and_run_tf_function_combinations(): + # Test the combination of different strategies and whether a tf.function + # is passed into strategy.run.""" + return combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + ], + mode=["graph", "eager"], + experimental_run_tf_function=[True, False], + use_var_policy=[True, False]) + combinations.combine( + distribution=[ + strategy_combinations.tpu_strategy, + strategy_combinations.tpu_strategy_packed_var, + ], + mode=["graph", "eager"], + experimental_run_tf_function=[True], + use_var_policy=[True, False]) + + +def strategy_with_var_policy(): + return combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + strategy_combinations.tpu_strategy, + strategy_combinations.tpu_strategy_packed_var, + ], + mode=["graph", "eager"], + use_var_policy=[True, False]) + + +class OnWriteVariableSync(test.TestCase, parameterized.TestCase): + + @combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_one_gpu, + ], + mode=["graph"])) + def testFetchAMirroredVariable(self, distribution): + with self.session(graph=ops.Graph()) as sess, distribution.scope(): + with ops.device("/device:GPU:0"): + v = variable_scope.get_variable( + name="v", initializer=1., use_resource=True) + mirrored = values.MirroredVariable( + distribution, (v,), variable_scope.VariableAggregation.MEAN) + sess.run(variables_lib.global_variables_initializer()) + sess.run({"complicated": mirrored}) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testAssign(self, distribution, experimental_run_tf_function, + use_var_policy): + + def assign(fn, v, update_value, cross_replica): + update_fn = lambda: getattr(v, fn)(update_value) + if cross_replica: + return update_fn() + else: + if experimental_run_tf_function: + update_fn = def_function.function(update_fn) + return distribution.experimental_local_results( + distribution.run(update_fn)) + + updates = [("assign", 1.), ("assign_add", 1.), ("assign_sub", -1.)] + aggregations = [ + variables_lib.VariableAggregation.NONE, + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + options = list( + x for x in itertools.product(updates, aggregations, [True, False])) + for update, aggregation, cross_replica in options: + # assign in replica context with SUM does not make sense cause you can + # just do value * num replicas error is 1. is not a distributed value and + # is unsupported for aggregation SUM + if (not cross_replica and aggregation == + variables_lib.VariableAggregation.SUM): + continue + with distribution.scope(): + v = variable_scope.variable( + 0., + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + fn, update_value = update + self.evaluate(assign(fn, v, update_value, cross_replica)) + for component in v._values: + self.assertAllEqual(self.evaluate(component.read_value()), + self.evaluate(array_ops.ones_like(component))) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testAssignOnWriteVar(self, distribution, experimental_run_tf_function, + use_var_policy): + + with distribution.scope(): + v_to_assign = variable_scope.variable( + 2., aggregation=variables_lib.VariableAggregation.MEAN) + v_to_assign_sub = variable_scope.variable( + -2., aggregation=variables_lib.VariableAggregation.MEAN) + + def assign(fn, v, update_value, cross_replica): + update_fn = lambda: getattr(v, fn)(update_value) + if cross_replica: + return update_fn() + else: + if experimental_run_tf_function: + update_fn = def_function.function(update_fn) + return distribution.experimental_local_results( + distribution.run(update_fn)) + + updates = [("assign", v_to_assign), ("assign_add", v_to_assign), + ("assign_sub", v_to_assign_sub)] + aggregations = [ + variables_lib.VariableAggregation.NONE, + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + options = list( + x for x in itertools.product(updates, aggregations, [True, False])) + for update, aggregation, cross_replica in options: + # assign in replica context with SUM does not make sense cause you can + # just do value * num replicas error is 1. is not a distributed value and + # is unsupported for aggregation SUM + if aggregation == variables_lib.VariableAggregation.SUM: + continue + with distribution.scope(): + v = variable_scope.variable( + 0., + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + fn, update_value = update + self.evaluate(assign(fn, v, update_value, cross_replica)) + for component in v._values: + self.assertAllEqual(2.0, self.evaluate(component.read_value())) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testAssignPerReplicaVal(self, distribution, experimental_run_tf_function, + use_var_policy): + + if isinstance(distribution, _TPU_STRATEGIES): + self.skipTest("Assigning PerReplica values is not supported. See" + " sponge/80ba41f8-4220-4516-98ce-bbad48f9f11a.") + + with distribution.scope(): + per_replica_value = values.PerReplica( + [constant_op.constant(2.0), + constant_op.constant(2.0)]) + per_replica_sub_value = values.PerReplica( + [constant_op.constant(-2.0), + constant_op.constant(-2.0)]) + + def assign(fn, v, update_value, cross_replica): + update_fn = lambda: getattr(v, fn)(update_value) + if cross_replica: + return update_fn() + else: + if experimental_run_tf_function: + update_fn = def_function.function(update_fn) + return distribution.experimental_local_results( + distribution.run(update_fn)) + + updates = [("assign", per_replica_value), ("assign_add", per_replica_value), + ("assign_sub", per_replica_sub_value)] + # We don't support assigning PerReplica valus to vars in replica context + # with aggregation=NONE. + aggregations = [ + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + options = list( + x for x in itertools.product(updates, aggregations, [True, False])) + for update, aggregation, cross_replica in options: + # assign in replica context with SUM does not make sense cause you can + # just do value * num replicas error is 1. is not a distributed value and + # is unsupported for aggregation SUM + if cross_replica: + # We don't support assigning PerReplica values to MirroredVariables in + # cross replica context + continue + with distribution.scope(): + v = variable_scope.variable( + 0., + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + fn, update_value = update + self.evaluate(assign(fn, v, update_value, cross_replica)) + if aggregation == variables_lib.VariableAggregation.SUM: + expected = 4.0 + else: + expected = 2.0 + for component in v._values: + self.assertAllEqual(expected, self.evaluate(component.read_value())) + + @combinations.generate(strategy_with_var_policy()) + def testValueInReplicaContext(self, distribution, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + 1., aggregation=variables_lib.VariableAggregation.MEAN) + self.evaluate(variables_lib.global_variables_initializer()) + + @def_function.function + def f(): + with ops.control_dependencies([v.assign_add(1.)]): + return v.value() + + results = self.evaluate( + distribution.experimental_local_results( + distribution.run(f))) + for value in results: + self.assertEqual(2., value) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testReadValueInReplicaContext(self, distribution, + experimental_run_tf_function, + use_var_policy): + aggregations = [ + variables_lib.VariableAggregation.NONE, + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + for aggregation in aggregations: + with distribution.scope(): + v = variable_scope.variable( + 0., + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + if experimental_run_tf_function: + read_var_fn = def_function.function(v.read_value) + else: + read_var_fn = v.read_value + results = self.evaluate( + distribution.experimental_local_results( + distribution.run(read_var_fn))) + for component, value in zip(v._values, results): + self.assertAllEqual(self.evaluate(component.read_value()), value) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testReadValueInCrossReplicaContext(self, distribution, + experimental_run_tf_function, + use_var_policy): + aggregations = [ + variables_lib.VariableAggregation.NONE, + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + for aggregation in aggregations: + with distribution.scope(): + v = variable_scope.variable( + 2., + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + + if experimental_run_tf_function: + read_var_fn = def_function.function(v.read_value) + else: + read_var_fn = v.read_value + + results = read_var_fn() + for component in v._values: + self.assertEqual(self.evaluate(component.read_value()), + self.evaluate(results)) + + @combinations.generate(strategy_with_var_policy()) + def testAssignOutOfScope(self, distribution, use_var_policy): + with distribution.scope(): + mirrored = variables_lib.Variable(1.) + self.evaluate(mirrored.assign(3.)) + self.assertEqual(self.evaluate(mirrored.read_value()), 3.) + for component in mirrored.values: + self.assertEqual(self.evaluate(component.read_value()), 3.) + + @combinations.generate(strategy_with_var_policy()) + def testAssignAggregationMeanDTypeNonFloat(self, distribution, + use_var_policy): + if isinstance(distribution, _TPU_STRATEGIES): + self.skipTest("Fix sponge/6e8ab540-4c0f-4da5-aedf-86505ff810c9 before " + "reenabling test.") + + with distribution.scope(): + v = variables_lib.Variable( + 1, + aggregation=variable_scope.VariableAggregation.MEAN, + dtype=dtypes.int32) + self.evaluate(v.initializer) + + @def_function.function + def assign(): + ctx = distribution_strategy_context.get_replica_context() + return v.assign(ctx.replica_id_in_sync_group) + + # disallow assign() with distributed value in replica context. + with self.assertRaisesRegex(ValueError, + "Cannot update non-float variables"): + self.evaluate( + distribution.experimental_local_results( + distribution.run(assign))) + + # allow assign() with same value in replica context. + @def_function.function + def assign_same(): + return v.assign(2) + + self.evaluate( + distribution.experimental_local_results( + distribution.run(assign_same))) + self.assertEqual(self.evaluate(v.read_value()), 2) + + # allow assign() with mirrored variable in replica context. + with distribution.scope(): + v2 = variables_lib.Variable( + 3, + aggregation=variable_scope.VariableAggregation.SUM, + dtype=dtypes.int32) + self.evaluate(v2.initializer) + + @def_function.function + def assign_mirrored(): + return v.assign(v2) + + self.evaluate( + distribution.experimental_local_results( + distribution.run(assign_mirrored))) + self.assertEqual(self.evaluate(v.read_value()), 3) + + # allow assign() in cross replica context. + with distribution.scope(): + self.evaluate(v.assign(4)) + self.assertEqual(self.evaluate(v.read_value()), 4) + + @combinations.generate(strategy_with_var_policy()) + def testInitializedToSameValueInsideEagerRun(self, distribution, + use_var_policy): + if not context.executing_eagerly(): self.skipTest("eager only test") + v = [None] + + @def_function.function + def step(): + + def f(): + if v[0] is None: + v[0] = variables_lib.Variable(random_ops.random_normal([])) + + distribution.run(f) + + context.set_global_seed(None) + step() + vals = self.evaluate(v[0].values) + self.assertAllEqual(vals[0], vals[1]) + + @combinations.generate(strategy_with_var_policy()) + def testAggregationOnlyFirstReplica(self, distribution, use_var_policy): + with distribution.scope(): + v = variable_scope.variable( + 15., + synchronization=variables_lib.VariableSynchronization.ON_WRITE, + aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) + self.evaluate(variables_lib.global_variables_initializer()) + + @def_function.function + def assign(): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + return v.assign(math_ops.cast(replica_id, dtypes.float32)) + per_replica_results = self.evaluate(distribution.experimental_local_results( + distribution.run(assign))) + # The per-replica values should always match the first replicas value. + self.assertAllEqual( + array_ops.zeros(distribution.num_replicas_in_sync, dtypes.float32), + per_replica_results) + + @combinations.generate(strategy_with_var_policy()) + def testInitScope(self, distribution, use_var_policy): + if not context.executing_eagerly(): self.skipTest("eager only") + + class C(object): + pass + + obj = C() + obj.w = None + obj.v = None + + @def_function.function + def assign(): + with ops.init_scope(): + if obj.w is None: + obj.w = variables_lib.Variable( + 0, aggregation=variables_lib.VariableAggregation.MEAN) + obj.v = variables_lib.Variable( + obj.w.read_value(), + aggregation=variables_lib.VariableAggregation.MEAN) + self.evaluate(variables_lib.global_variables_initializer()) + + return obj.v.assign_add(2) + + per_replica_results = self.evaluate( + distribution.experimental_local_results(distribution.run(assign))) + self.assertAllEqual([2, 2], per_replica_results) + + @combinations.generate(strategy_with_var_policy()) + def testOperatorOverride(self, distribution, use_var_policy): + + with distribution.scope(): + v = variable_scope.variable( + 1, aggregation=variables_lib.VariableAggregation.MEAN) + self.evaluate(variables_lib.global_variables_initializer()) + + self.assertEqual(2, self.evaluate(v + 1)) + + @def_function.function + def add(): + return v + 1 + + per_replica_results = self.evaluate( + distribution.experimental_local_results(distribution.run(add))) + self.assertAllEqual([2, 2], per_replica_results) + + +@combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + ], + mode=["graph", "eager"], + use_var_policy=[True, False])) +class OnWriteVariableSyncScatterTests(test.TestCase, parameterized.TestCase): + + def testScatterSub(self, distribution, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [0., 0., 0.], aggregation=variables_lib.VariableAggregation.MEAN) + self.evaluate(v.initializer) + + @def_function.function + def scatter_sub(): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + value = indexed_slices.IndexedSlices( + values=array_ops.stack([ + math_ops.cast(replica_id, dtypes.float32), + math_ops.cast(replica_id + 1, dtypes.float32) + ]), + indices=array_ops.stack([replica_id, replica_id + 1]), + dense_shape=(3,)) + return v.scatter_sub(value) + + per_replica_results = self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_sub))) + self.assertAllEqual([[0., -1., -1.], [0., -1., -1.]], per_replica_results) + + def testScatterAdd(self, distribution, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [0, 0, 0], aggregation=variables_lib.VariableAggregation.SUM) + self.evaluate(v.initializer) + + @def_function.function + def scatter_add(): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + value = indexed_slices.IndexedSlices( + values=array_ops.stack([replica_id, replica_id + 1]), + indices=array_ops.stack([replica_id, replica_id + 1]), + dense_shape=(3,)) + return v.scatter_add(value) + + per_replica_results = self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_add))) + self.assertAllEqual([[0, 2, 2], [0, 2, 2]], per_replica_results) + + def testScatterDiv(self, distribution, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [1, 6, 1], aggregation=variables_lib.VariableAggregation.SUM) + self.evaluate(v.initializer) + + @def_function.function + def scatter_div(): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + value = indexed_slices.IndexedSlices( + values=array_ops.reshape(replica_id + 2, [1]), + indices=array_ops.reshape(replica_id, [1]), + dense_shape=(3,)) + return v.scatter_div(value) + + per_replica_results = self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_div))) + self.assertAllEqual([[0, 2, 1], [0, 2, 1]], per_replica_results) + + def testScatterMul(self, distribution, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [2., 1., 1.], aggregation=variables_lib.VariableAggregation.MEAN) + self.evaluate(v.initializer) + + @def_function.function + def scatter_mul(): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + value = indexed_slices.IndexedSlices( + values=array_ops.reshape( + math_ops.cast(replica_id + 2, dtypes.float32), [1]), + indices=array_ops.reshape(replica_id, [1]), + dense_shape=(3,)) + return v.scatter_mul(value) + + per_replica_results = self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_mul))) + self.assertAllClose([[2., 1.5, 1.], [2., 1.5, 1.]], per_replica_results) + + def testScatterMin(self, distribution, use_var_policy): + with distribution.scope(): + v1 = variables_lib.Variable( + [0, 2, 0], aggregation=variables_lib.VariableAggregation.SUM) + v2 = variables_lib.Variable( + [0, 2, 0], + aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) + self.evaluate(variables_lib.global_variables_initializer()) + + @def_function.function + def scatter_min(v): + value = indexed_slices.IndexedSlices( + values=array_ops.identity([1]), + indices=array_ops.identity([1]), + dense_shape=(3,)) + return v.scatter_min(value) + + with self.assertRaisesRegex(NotImplementedError, "scatter_min.*"): + self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_min, args=(v1,)))) + + per_replica_results = self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_min, args=(v2,)))) + self.assertAllClose([[0, 1, 0], [0, 1, 0]], per_replica_results) + + def testScatterMax(self, distribution, use_var_policy): + with distribution.scope(): + v1 = variables_lib.Variable( + [0, 0, 0], aggregation=variables_lib.VariableAggregation.SUM) + v2 = variables_lib.Variable( + [0, 0, 0], + aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) + self.evaluate(variables_lib.global_variables_initializer()) + + @def_function.function + def scatter_max(v): + value = indexed_slices.IndexedSlices( + values=array_ops.identity([1]), + indices=array_ops.identity([0]), + dense_shape=(3,)) + return v.scatter_max(value) + + with self.assertRaisesRegex(NotImplementedError, "scatter_max.*"): + self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_max, args=(v1,)))) + + per_replica_results = self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_max, args=(v2,)))) + self.assertAllClose([[1, 0, 0], [1, 0, 0]], per_replica_results) + + def testScatterUpdate(self, distribution, use_var_policy): + with distribution.scope(): + v1 = variables_lib.Variable( + [0, 0, 0], aggregation=variables_lib.VariableAggregation.SUM) + v2 = variables_lib.Variable( + [0, 0, 0], + aggregation=variables_lib.VariableAggregation.ONLY_FIRST_REPLICA) + self.evaluate(variables_lib.global_variables_initializer()) + + @def_function.function + def scatter_update(v): + value = indexed_slices.IndexedSlices( + values=array_ops.identity([3]), + indices=array_ops.identity([1]), + dense_shape=(3,)) + return v.scatter_update(value) + + with self.assertRaisesRegex(NotImplementedError, "scatter_update.*"): + self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_update, args=(v1,)))) + + per_replica_results = self.evaluate( + distribution.experimental_local_results( + distribution.run(scatter_update, args=(v2,)))) + self.assertAllClose([[0, 3, 0], [0, 3, 0]], per_replica_results) + + def testScatterOpsInCrossReplicaContext(self, distribution, use_var_policy): + with distribution.scope(): + v1 = variables_lib.Variable( + [1, 1, 1], aggregation=variables_lib.VariableAggregation.SUM) + v2 = variables_lib.Variable([1, 1, 1]) + self.evaluate(variables_lib.global_variables_initializer()) + + value = indexed_slices.IndexedSlices( + values=array_ops.identity([2]), + indices=array_ops.identity([0]), + dense_shape=(3,)) + with distribution.scope(): + self.evaluate(v1.scatter_add(value)) + self.assertAllEqual([3, 1, 1], self.evaluate(v1.read_value())) + + self.evaluate(v2.scatter_min(value)) + self.assertAllEqual([1, 1, 1], self.evaluate(v2.read_value())) + + +def _make_replica_local(method, strategy=None): + if strategy is None: + devices = ("/device:GPU:0", "/device:CPU:0") + else: + devices = strategy.extended.worker_devices + + v = [] + for d, n, init in zip(devices, ["v", "v/replica"], [1., 2.]): + with ops.device(d): + v.append(variable_scope.get_variable( + name=n, initializer=init, use_resource=True)) + + if (strategy is not None) and isinstance(strategy, _TPU_STRATEGIES): + var_cls = tpu_values.TPUSyncOnReadVariable + else: + var_cls = values.SyncOnReadVariable + replica_local = var_cls(strategy, v, method) + return v, replica_local + + +class OnReadVariableSyncTest(test.TestCase, parameterized.TestCase): + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testAssign(self, distribution, experimental_run_tf_function, + use_var_policy): + + def assign(fn, v, update_value, cross_replica): + update_fn = lambda: getattr(v, fn)(update_value) + if cross_replica: + return update_fn() + else: + if experimental_run_tf_function: + update_fn = def_function.function(update_fn) + return distribution.experimental_local_results( + distribution.run(update_fn)) + + updates = [("assign", 1.), ("assign_add", 1.), ("assign_sub", -1.)] + aggregations = [ + variables_lib.VariableAggregation.NONE, + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + options = list( + x for x in itertools.product(updates, aggregations, [True, False])) + for update, aggregation, cross_replica in options: + # VariableAggregation.SUM in cross-replica mode is tested below, + # VariableAggregation.NONE in cross-replica mode is not supported. + if cross_replica and aggregation in [ + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.NONE, + ]: + continue + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + fn, update_value = update + self.evaluate(assign(fn, v, update_value, cross_replica)) + for component in v._values: + self.assertAllEqual(self.evaluate(component.read_value()), + self.evaluate(array_ops.ones_like(component))) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testAssignOnReadVar(self, distribution, experimental_run_tf_function, + use_var_policy): + + with distribution.scope(): + v_to_assign = variable_scope.variable( + 2., aggregation=variables_lib.VariableAggregation.MEAN) + v_to_assign_sub = variable_scope.variable( + -2., aggregation=variables_lib.VariableAggregation.MEAN) + + def assign(fn, v, update_value, cross_replica): + update_fn = lambda: getattr(v, fn)(update_value) + if cross_replica: + return update_fn() + else: + if experimental_run_tf_function: + update_fn = def_function.function(update_fn) + return distribution.experimental_local_results( + distribution.run(update_fn)) + + updates = [("assign", v_to_assign), ("assign_add", v_to_assign), + ("assign_sub", v_to_assign_sub)] + expected_cross_replica = { + variables_lib.VariableAggregation.SUM: 1.0, + variables_lib.VariableAggregation.MEAN: 2.0, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA: 2.0 + } + expected_replica = { + variables_lib.VariableAggregation.SUM: 2.0, + variables_lib.VariableAggregation.MEAN: 2.0, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA: 2.0 + } + # aggregation=NONE is not supported for OnReadVariables. + aggregations = [ + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + options = list( + x for x in itertools.product(updates, aggregations, [True, False])) + for update, aggregation, cross_replica in options: + # assign in replica context with SUM does not make sense cause you can + # just do value * num replicas error is 1. is not a distributed value and + # is unsupported for aggregation SUM + if aggregation == variables_lib.VariableAggregation.SUM: + continue + with distribution.scope(): + v = variable_scope.variable( + 0., + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + fn, update_value = update + self.evaluate(assign(fn, v, update_value, cross_replica)) + if cross_replica: + for component in v._values: + self.assertAllEqual(expected_cross_replica.get(aggregation), + self.evaluate(component.read_value())) + else: + for component in v._values: + self.assertAllEqual(expected_replica.get(aggregation), + self.evaluate(component.read_value())) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testAssignPerReplicaVal(self, distribution, experimental_run_tf_function, + use_var_policy): + + if isinstance(distribution, _TPU_STRATEGIES): + self.skipTest("Assigning PerReplica values is not supported. See" + " sponge/80ba41f8-4220-4516-98ce-bbad48f9f11a.") + + self.skipTest("We don't support assiging PerReplica values in cross " + "replica context or replica context. see error in " + "sponge/2b2e54c1-eda6-4534-82e1-c73b1dcd517f.") + + with distribution.scope(): + per_replica_value = values.PerReplica( + [constant_op.constant(2.0), + constant_op.constant(2.0)]) + + def assign(fn, v, update_value, cross_replica): + update_fn = lambda: getattr(v, fn)(update_value) + if cross_replica: + return update_fn() + else: + if experimental_run_tf_function: + update_fn = def_function.function(update_fn) + return distribution.experimental_local_results( + distribution.run(update_fn)) + + updates = [("assign", per_replica_value)] + # We don't support assigning PerReplica valus to vars in replica context + # with aggregation=NONE. + aggregations = [ + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + options = list( + x for x in itertools.product(updates, aggregations, [True, False])) + for update, aggregation, cross_replica in options: + # assign in replica context with SUM does not make sense cause you can + # just do value * num replicas error is 1. is not a distributed value and + # is unsupported for aggregation SUM + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + fn, update_value = update + # with self.assertRaisesRegex(ValueError, "Attempt to convert a value "): + self.evaluate(assign(fn, v, update_value, cross_replica)) + if aggregation == variables_lib.VariableAggregation.SUM: + expected = 4.0 + else: + expected = 2.0 + for component in v._values: + self.assertAllEqual(expected, self.evaluate(component.read_value())) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testAssignDtypeConversion(self, distribution, + experimental_run_tf_function, + use_var_policy): + + def assign(fn, v, update_value, cross_replica): + update_fn = lambda: getattr(v, fn)(update_value) + if cross_replica: + return update_fn() + else: + if experimental_run_tf_function: + update_fn = def_function.function(update_fn) + return distribution.experimental_local_results( + distribution.run(update_fn)) + + updates = [("assign", 1), ("assign_add", 1), ("assign_sub", -1)] + aggregations = [ + variables_lib.VariableAggregation.NONE, + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + options = list( + x for x in itertools.product(updates, aggregations, [True, False])) + for update, aggregation, cross_replica in options: + # VariableAggregation.SUM in cross-replica mode is tested below, + # VariableAggregation.NONE in cross-replica mode is not supported. + if cross_replica and aggregation in [ + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.NONE, + ]: + continue + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + fn, update_value = update + self.evaluate(assign(fn, v, update_value, cross_replica)) + for component in v._values: + self.assertAllEqual(self.evaluate(component.read_value()), + self.evaluate(array_ops.ones_like(component))) + + @combinations.generate(strategy_with_var_policy()) + def testAssignWithAggregationSum(self, distribution, use_var_policy): + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=variables_lib.VariableAggregation.SUM) + self.evaluate(variables_lib.global_variables_initializer()) + self.evaluate(v.assign(1. * distribution.num_replicas_in_sync)) + for component in v._values: + self.assertAllEqual(self.evaluate(component.read_value()), + self.evaluate(array_ops.ones_like(component))) + + @combinations.generate(strategy_with_var_policy()) + def testAssignAddSubWithAggregationSum(self, distribution, use_var_policy): + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=variables_lib.VariableAggregation.SUM) + self.evaluate(variables_lib.global_variables_initializer()) + with self.assertRaisesRegex( + ValueError, "SyncOnReadVariable does not support "): + self.evaluate(v.assign_add(1.)) + with self.assertRaisesRegex( + ValueError, "SyncOnReadVariable does not support "): + self.evaluate(v.assign_sub(1.)) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testReadValueInReplicaContext(self, distribution, + experimental_run_tf_function, + use_var_policy): + aggregations = [ + variables_lib.VariableAggregation.NONE, + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + for aggregation in aggregations: + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + if experimental_run_tf_function: + read_var_fn = def_function.function(v.read_value) + else: + read_var_fn = v.read_value + results = self.evaluate( + distribution.experimental_local_results( + distribution.run(read_var_fn))) + for component, value in zip(v._values, results): + self.assertAllEqual(self.evaluate(component.read_value()), value) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testReadValueInCrossReplicaContext(self, distribution, + experimental_run_tf_function, + use_var_policy): + aggregations = [ + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + for aggregation in aggregations: + if isinstance(distribution, _TPU_STRATEGIES): + resolver = tpu_cluster_resolver.TPUClusterResolver("") + tpu_strategy_util.initialize_tpu_system(resolver) + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + + def assign(v=v): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + return v.assign(math_ops.cast(replica_id, dtypes.float32)) + + if experimental_run_tf_function: + assign = def_function.function(assign) + + self.evaluate( + distribution.experimental_local_results(distribution.run(assign))) + num_replicas = distribution.num_replicas_in_sync + sum_of_replica_values = num_replicas * (num_replicas - 1) / 2. + if aggregation == variables_lib.VariableAggregation.SUM: + expected = sum_of_replica_values + elif aggregation == variables_lib.VariableAggregation.MEAN: + expected = sum_of_replica_values / num_replicas + else: + expected = 0 + self.assertEqual(expected, self.evaluate(v.read_value()), aggregation) + self.assertEqual(expected, self.evaluate(v.value()), aggregation) + self.assertEqual(expected, self.evaluate(v), aggregation) + self.assertEqual(expected, self.evaluate(array_ops.identity(v)), + aggregation) + + # TODO(b/145574622): Re-enable this test once ReduceOp argument is + # respected on GPUs. + @combinations.generate(strategy_and_run_tf_function_combinations()) + def disable_testAllReduce(self, distribution, + experimental_run_tf_function, + use_var_policy): + with distribution.scope(): + v = variable_scope.variable( + 2., + synchronization=variables_lib.VariableSynchronization.ON_WRITE, + aggregation=variables_lib.VariableAggregation.MEAN) + self.evaluate(variables_lib.global_variables_initializer()) + + def all_reduce(): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + return ctx.all_reduce("SUM", v) + math_ops.cast(replica_id, + dtypes.float32) + + if experimental_run_tf_function: + all_reduce = def_function.function(all_reduce) + + per_replica_results = self.evaluate( + distribution.experimental_local_results(distribution.run(all_reduce))) + expected_result = [] + for i in range(distribution.num_replicas_in_sync): + expected_result.append(2.0 * distribution.num_replicas_in_sync + + 1.0 * i) + self.assertEqual(per_replica_results, tuple(expected_result)) + + @combinations.generate(strategy_and_run_tf_function_combinations()) + def testAssignPerReplicaBeforeRead(self, distribution, + experimental_run_tf_function, + use_var_policy): + aggregations = [ + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ] + for aggregation in aggregations: + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(variables_lib.global_variables_initializer()) + + def assign(var=v): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + return var.assign(math_ops.cast(replica_id, dtypes.float32)) + + if experimental_run_tf_function: + assign = def_function.function(assign) + + per_replica_results = self.evaluate( + distribution.experimental_local_results(distribution.run(assign))) + expected_result = [] + for i in range(distribution.num_replicas_in_sync): + expected_result.append(1.0 * i) + self.assertEqual(per_replica_results, tuple(expected_result)) + + @combinations.generate(strategy_with_var_policy()) + def testReadValueWithAggregationNoneInCrossReplicaContext(self, distribution, + use_var_policy): + with distribution.scope(): + v = variable_scope.variable( + 0., + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=variables_lib.VariableAggregation.NONE) + self.evaluate(variables_lib.global_variables_initializer()) + with self.assertRaisesRegex( + ValueError, "Could not convert from .* VariableAggregation\\.NONE"): + self.evaluate(v.read_value()) + + @combinations.generate(strategy_with_var_policy()) + def testInitializedToSameValueInsideEagerRun(self, distribution, + use_var_policy): + if not context.executing_eagerly(): self.skipTest("eager only") + + v = [None] + @def_function.function + def step(): + def f(): + if v[0] is None: + v[0] = variables_lib.Variable( + random_ops.random_normal([]), + synchronization=variables_lib.VariableSynchronization.ON_READ) + + distribution.run(f) + + context.set_global_seed(None) + step() + vals = self.evaluate(v[0].values) + self.assertAllEqual(vals[0], vals[1]) + + @combinations.generate(strategy_with_var_policy()) + def testOperatorOverride(self, distribution, use_var_policy): + + with distribution.scope(): + v = variable_scope.variable( + 0.0, + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=variables_lib.VariableAggregation.MEAN) + self.evaluate(variables_lib.global_variables_initializer()) + + @def_function.function + def assign(): + ctx = distribution_strategy_context.get_replica_context() + replica_id = ctx.replica_id_in_sync_group + return v.assign(math_ops.cast(replica_id, dtypes.float32)) + + # Assign different replicas with different values. + self.evaluate(distribution.experimental_local_results( + distribution.run(assign))) + self.assertEqual(1.5, self.evaluate(v + 1)) + + @def_function.function + def add(): + return v + 1 + + per_replica_results = self.evaluate( + distribution.experimental_local_results(distribution.run(add))) + self.assertAllEqual([1, 2], per_replica_results) + + +@combinations.generate( + combinations.combine( + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + ], + aggregation=[ + variables_lib.VariableAggregation.MEAN, + variables_lib.VariableAggregation.SUM, + variables_lib.VariableAggregation.ONLY_FIRST_REPLICA, + ], + mode=["graph", "eager"], + use_var_policy=[True, False])) +class SyncOnReadScatterReplicaTest(test.TestCase, parameterized.TestCase): + + def testScatterSub(self, distribution, aggregation, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [1., 1., 1.], + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(v.initializer) + + delta = values.PerReplica([ + indexed_slices.IndexedSlices( + values=[[0.], [1.]], indices=[0, 1], dense_shape=(3,)), + indexed_slices.IndexedSlices( + values=[[1.], [2.]], indices=[1, 2], dense_shape=(3,)), + ]) + + with self.assertRaises(NotImplementedError): + self.evaluate(distribution.run(v.scatter_sub, args=(delta,))) + + def testScatterAdd(self, distribution, aggregation, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [1., 1., 1.], + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(v.initializer) + + delta = values.PerReplica([ + indexed_slices.IndexedSlices( + values=[[0.], [1.]], indices=[0, 1], dense_shape=(3,)), + indexed_slices.IndexedSlices( + values=[[1.], [2.]], indices=[1, 2], dense_shape=(3,)), + ]) + + with self.assertRaises(NotImplementedError): + self.evaluate(distribution.run(v.scatter_add, args=(delta,))) + + def testScatterDiv(self, distribution, aggregation, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [2., 6., 1.], + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(v.initializer) + + delta = values.PerReplica([ + indexed_slices.IndexedSlices( + values=[[2.], [2.]], indices=[0, 1], dense_shape=(3,)), + indexed_slices.IndexedSlices( + values=[[3.], [3.]], indices=[1, 2], dense_shape=(3,)), + ]) + + with self.assertRaises(NotImplementedError): + self.evaluate(distribution.run(v.scatter_div, args=(delta,))) + + def testScatterMul(self, distribution, aggregation, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [2., 1., 1.], + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(v.initializer) + + delta = values.PerReplica([ + indexed_slices.IndexedSlices( + values=[[2.], [3.]], indices=[0, 1], dense_shape=(3,)), + indexed_slices.IndexedSlices( + values=[[4.], [5.]], indices=[1, 2], dense_shape=(3,)), + ]) + + with self.assertRaises(NotImplementedError): + self.evaluate(distribution.run(v.scatter_mul, args=(delta,))) + + def testScatterMin(self, distribution, aggregation, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [3., 4., 5.], + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(v.initializer) + + delta = values.PerReplica([ + indexed_slices.IndexedSlices( + values=[[1.], [8.]], indices=[0, 1], dense_shape=(3,)), + indexed_slices.IndexedSlices( + values=[[9.], [2.]], indices=[1, 2], dense_shape=(3,)), + ]) + + with self.assertRaises(NotImplementedError): + self.evaluate(distribution.run(v.scatter_min, args=(delta,))) + + def testScatterMax(self, distribution, aggregation, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [3., 4., 5.], + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(v.initializer) + + delta = values.PerReplica([ + indexed_slices.IndexedSlices( + values=[[1.], [8.]], indices=[0, 1], dense_shape=(3,)), + indexed_slices.IndexedSlices( + values=[[9.], [2.]], indices=[1, 2], dense_shape=(3,)), + ]) + + with self.assertRaises(NotImplementedError): + self.evaluate(distribution.run(v.scatter_max, args=(delta,))) + + def testScatterUpdate(self, distribution, aggregation, use_var_policy): + with distribution.scope(): + v = variables_lib.Variable( + [0., 0., 0.], + synchronization=variables_lib.VariableSynchronization.ON_READ, + aggregation=aggregation) + self.evaluate(v.initializer) + + delta = values.PerReplica([ + indexed_slices.IndexedSlices( + values=[[1.], [2.]], indices=[0, 1], dense_shape=(3,)), + indexed_slices.IndexedSlices( + values=[[3.], [4.]], indices=[1, 2], dense_shape=(3,)), + ]) + + with self.assertRaises(NotImplementedError): + self.evaluate(distribution.run(v.scatter_min, args=(delta,))) + + +def _make_index_slices(vals, indices, dense_shape=None): + if dense_shape: + dense_shape = array_ops.identity(dense_shape) + return indexed_slices.IndexedSlices( + array_ops.identity(vals), array_ops.identity(indices), dense_shape) + + +if __name__ == "__main__": + test.main() From 9b1caab3d7c59be936fe7c640635bb740f2a6275 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 28 Jul 2020 00:54:15 +0700 Subject: [PATCH 1383/2522] typo --- tensorflow/c/experimental/filesystem/plugins/s3/BUILD | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/BUILD b/tensorflow/c/experimental/filesystem/plugins/s3/BUILD index ec48f5de2b6..56bd3b4a75c 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/BUILD +++ b/tensorflow/c/experimental/filesystem/plugins/s3/BUILD @@ -1,11 +1,11 @@ -# Experimental gcs filesystem plugin. -load("//tensorflow:tensorflow.bzl", "get_win_copts", "tf_cc_shared_object") +# Experimental s3 filesystem plugin. +load("//tensorflow:tensorflow.bzl", "get_win_copts", "tf_cc_shared_object", "tf_cc_test") package( licenses = ["notice"], # Apache 2.0 ) -# Filesystem implementation for GCS environments +# Filesystem implementation for S3 environments tf_cc_shared_object( name = "s3_filesystem", framework_so = [], From d13436a0ce9ff22b32893710e8c1dd39588b757e Mon Sep 17 00:00:00 2001 From: Lucy Fox Date: Mon, 27 Jul 2020 10:46:12 -0700 Subject: [PATCH 1384/2522] Carry forward attributes when creating LegacyCall ops during import to MLIR. Attributes should be carried forward when importing from GraphDef to TF MLIR dialects, including when importing to LegacyCall op. A LegacyCall op may ignore these attributes (e.g. not honor TPU device placement), but the attributes should be maintained nonetheless. This is important, for example, in the TF:XLA bridge, where LegacyCall ops are lowered to call operations which do honor device placement. PiperOrigin-RevId: 323393025 Change-Id: Ic099ebe60546764a1de2fddd273900f15f11f989 --- .../compiler/mlir/tensorflow/ir/tf_ops.td | 4 +- .../graph-custom-operation.pbtxt | 2 +- .../graphdef2mlir/graph-function-call.pbtxt | 8 ++- .../graph-function-name-bug.pbtxt | 4 +- .../graph-function-resource-args.pbtxt | 2 +- .../tests/graphdef2mlir/graph-library.pbtxt | 6 +- .../tests/mlir2graphdef/tf-legacy-call.mlir | 8 ++- .../mlir/tensorflow/translate/import_model.cc | 61 ++++++------------- 8 files changed, 42 insertions(+), 53 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td index 7dd8609eea9..c65db14ed69 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td @@ -346,8 +346,8 @@ def TF_LegacyCallOp : TF_Op<"LegacyCall", within the same symbol scope as the call and is mapped to a GraphDef node with the function name as the op name. Unlike a PartitionedCall which represents asynchronously executing a function across multiple devices, a - LegacyCall represents a function call with the only attribute - _diable_call_shape_inference. + LegacyCall ignores specification for ops in the attached function and + instead executes it on the device assigned to this op. }]; let arguments = (ins diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-custom-operation.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-custom-operation.pbtxt index cf08d55b3cb..304429c8783 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-custom-operation.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-custom-operation.pbtxt @@ -54,5 +54,5 @@ versions { # the names are matching between the function definition and the uses / call # site (a numerical suffix may be appended). -# CHECK: "tf.LegacyCall"(%outputs) {_disable_call_shape_inference = false, f = @foo0} +# CHECK: "tf.LegacyCall"(%outputs) {_disable_call_shape_inference = false, device = "", f = @foo0} # CHECK: func @foo0 diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-call.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-call.pbtxt index fa6f63e27a5..f954657765a 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-call.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-call.pbtxt @@ -34,6 +34,12 @@ node { b: true } } + attr { + key: "_tpu_replicate" + value { + s: "cluster" + } + } } library { function { @@ -62,4 +68,4 @@ library { } # CHECK: func @main -# CHECK: "tf.LegacyCall"(%arg0) {_disable_call_shape_inference = true, f = @test_func_name0} +# CHECK: "tf.LegacyCall"(%arg0) {_disable_call_shape_inference = true, _tpu_replicate = "cluster", device = "", f = @test_func_name0} diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-name-bug.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-name-bug.pbtxt index 8cf6d4ed5d5..326e7b1ecd4 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-name-bug.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-name-bug.pbtxt @@ -121,8 +121,8 @@ versions { # Verify that functions from the library are properly imported. # CHECK-LABEL: func @main() { -# CHECK: "tf.LegacyCall"() {_disable_call_shape_inference = false, f = @foo110} -# CHECK: "tf.LegacyCall"() {_disable_call_shape_inference = false, f = @foo111} +# CHECK: "tf.LegacyCall"() {_disable_call_shape_inference = false, device = "", f = @foo110} +# CHECK: "tf.LegacyCall"() {_disable_call_shape_inference = false, device = "", f = @foo111} # CHECK-LABEL: func @foo110() attributes {sym_visibility = "private"} # CHECK-LABEL: func @foo111() attributes {sym_visibility = "private"} diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-resource-args.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-resource-args.pbtxt index eb358d52b26..7cb7ac7e008 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-resource-args.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-resource-args.pbtxt @@ -88,7 +88,7 @@ library { # CHECK: tf_executor.graph # CHECK: "tf.VarHandleOp"() # CHECK: "tf.LegacyCall" -# CHECK-SAME: {_disable_call_shape_inference = true, f = @test_func_name0} +# CHECK-SAME: {_disable_call_shape_inference = true, device = "", f = @test_func_name0} # CHECK: tf_executor.fetch # CHECK: return # CHECK: func @test_func_name0 diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-library.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-library.pbtxt index 55a76b1b668..53e951473d0 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-library.pbtxt +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-library.pbtxt @@ -54,10 +54,10 @@ versions { # Verify that functions from the library are properly imported. # CHECK-LABEL: func @main() { -# CHECK: "tf.LegacyCall"() {_disable_call_shape_inference = true, f = @foo0} -# CHECK: "tf.LegacyCall"() {_disable_call_shape_inference = false, f = @bar0} +# CHECK: "tf.LegacyCall"() {_disable_call_shape_inference = true, device = "", f = @foo0} +# CHECK: "tf.LegacyCall"() {_disable_call_shape_inference = false, device = "", f = @bar0} # CHECK-LABEL: func @foo0() attributes {sym_visibility = "private"} -# CHECK: "tf.LegacyCall"() {_disable_call_shape_inference = false, f = @bar0} +# CHECK: "tf.LegacyCall"() {_disable_call_shape_inference = false, device = "", f = @bar0} # CHECK-LABEL: func @bar0() attributes {sym_visibility = "private"} diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/tf-legacy-call.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/tf-legacy-call.mlir index 5f92d789066..3e50aa18098 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/tf-legacy-call.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/tf-legacy-call.mlir @@ -3,7 +3,7 @@ func @main() { tf_executor.graph { %outputs, %control = tf_executor.island wraps "tf.Const"() {device = "", dtype = "tfdtype$DT_INT32", name = "Constant", value = dense<0> : tensor} : () -> tensor - %outputs_0, %control_1 = tf_executor.island wraps "tf.LegacyCall"(%outputs) {f = @foo0} : (tensor) -> tensor + %outputs_0, %control_1 = tf_executor.island wraps "tf.LegacyCall"(%outputs) {_tpu_replicate = "cluster", device = "", f = @foo0} : (tensor) -> tensor tf_executor.fetch } return @@ -23,6 +23,12 @@ func @foo0(%arg0: tensor<*xi32>) -> tensor<*xi32> { // CHECK-NEXT: value { // CHECK-NEXT: list { // CHECK-NEXT: shape { +// CHECK: attr { +// CHECK-NEXT: key: "_tpu_replicate" +// CHECK-NEXT: value { +// CHECK-NEXT: s: "cluster" +// CHECK-NEXT: } +// CHECK-NEXT: } // CHECK: library { // CHECK-NEXT: function { diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc index a12378b66ba..8f5613d9019 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc @@ -119,7 +119,6 @@ static inline absl::string_view StringRefToView(llvm::StringRef ref) { namespace tensorflow { using mlir::NamedAttrList; using mlir::TensorType; -using mlir::TF::VarHandleOp; using mlir::tf_saved_model::AssetOp; using mlir::tf_saved_model::GlobalTensorOp; using mlir::tf_saved_model::SessionInitializerOp; @@ -129,12 +128,6 @@ namespace { constexpr char kTpuReplicateAttr[] = "_tpu_replicate"; -bool IsDisableCallShapeInferenceAttribute(const AttrValue& attr_value, - llvm::StringRef attr_name) { - return attr_name.compare("_disable_call_shape_inference") == 0 && - attr_value.value_case() == AttrValue::kB; -} - bool IsOutputShapesAttribute(const AttrValue& attr_value, llvm::StringRef attr_name) { return attr_name.compare("_output_shapes") == 0 && @@ -336,14 +329,11 @@ class ImporterBase { NamedAttrList* attributes); // Helper to create either a tf_executor operation or a TF operation wrapped - // in an island. When convert_to_legacy_call is true, converts the operation - // representing a call to a library function with a name represented in - // node_type_name to LegacyCallOp. + // in an island. mlir::Operation* CreateOperation( const Node& node, llvm::StringRef node_type_name, const mlir::OperationState& result, - const llvm::SmallVectorImpl& control_operands, - bool convert_to_legacy_call = false); + const llvm::SmallVectorImpl& control_operands); // Converts one NodeDef from the input GraphDef into an Operation and // inserts it into the MLIR module using builder_. @@ -1680,8 +1670,7 @@ Status ImporterBase::EmitErrorWithLocationStr(const Node& node, mlir::Operation* ImporterBase::CreateOperation( const Node& node, llvm::StringRef node_type_name, const mlir::OperationState& result, - const llvm::SmallVectorImpl& control_operands, - bool convert_to_legacy_call) { + const llvm::SmallVectorImpl& control_operands) { // For the tf.executor specific operations (not wrapped in an island), we // have an extra returned value for the control result, and we concatenate // control and non-control operands. @@ -1744,25 +1733,7 @@ mlir::Operation* ImporterBase::CreateOperation( mlir::OpBuilder::atBlockEnd(&island.GetBody()); // Create the operation inside the island now. - mlir::Operation* inner_op; - if (convert_to_legacy_call) { - bool disable_call_shape_inference = false; - for (const auto& name_and_value : node.attrs()) { - const auto& attr_name = name_and_value.first; - const AttrValue& attr_value = name_and_value.second; - if (IsDisableCallShapeInferenceAttribute(attr_value, attr_name)) { - disable_call_shape_inference = attr_value.b(); - } - } - - mlir::BoolAttr attribute = - builder_.getBoolAttr(disable_call_shape_inference); - inner_op = island_builder.create( - result.location, result.types, result.operands, - island_builder.getSymbolRefAttr(node_type_name), attribute); - } else { - inner_op = island_builder.createOperation(result); - } + mlir::Operation* inner_op = island_builder.createOperation(result); // Sets operand_segment_sizes or result_segment_sizes attribute to the op. const auto set_segment_sizes_attr = @@ -1927,13 +1898,6 @@ Status ImporterBase::ConvertNode(const Node& node) { // Remove _output_shapes attribute that will be added by the exporter. if (IsOutputShapesAttribute(attr_value, attr_name)) continue; - // We represent the _diable_call_shape_inference attribute and remove - // the _output_shapes attribute for LegacyCall. If a call has other - // attributes, we can't convert it to LegacyCall. - if (convert_to_legacy_call && - !IsDisableCallShapeInferenceAttribute(attr_value, attr_name)) { - convert_to_legacy_call = false; - } if (attr_value.value_case() == AttrValue::kFunc) { // Attribute iteration order is not defined for protocol buffer Map. // Process function attributes separately in the lexicographical order to @@ -1957,6 +1921,19 @@ Status ImporterBase::ConvertNode(const Node& node) { result.attributes.push_back(builder_.getNamedAttr( "device", builder_.getStringAttr(std::string(node_def.device())))); + // Map user function calls to LegacyCall ops and add the user function name + // as an attribute. + if (convert_to_legacy_call) { + result.name = mlir::OperationName(get_full_op_name("LegacyCall"), context_); + mlir::SymbolRefAttr val = builder_.getSymbolRefAttr(node_type_name); + result.addAttribute("f", val); + + if (!result.attributes.get("_disable_call_shape_inference")) { + result.addAttribute("_disable_call_shape_inference", + builder_.getBoolAttr(false)); + } + } + // Map If and StatelessIf op in TensorFlow to the common If op in MLIR and add // the differentiating attribute. if (node.IsIfNode()) { @@ -1975,8 +1952,8 @@ Status ImporterBase::ConvertNode(const Node& node) { } // Register the mapping between the TF node and the newly created operation. - node_values_[node.id()] = CreateOperation( - node, node_type_name, result, control_operands, convert_to_legacy_call); + node_values_[node.id()] = + CreateOperation(node, node_type_name, result, control_operands); return Status::OK(); } From 4486dced0c79f7348165e3de6423a8c8f19a3c57 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Mon, 27 Jul 2020 10:48:43 -0700 Subject: [PATCH 1385/2522] Make sure tf.categorical adds dispatching (like all the other random ops). PiperOrigin-RevId: 323393634 Change-Id: I2c28ff302c8bbf4728f7c701f630a3c41a15a896 --- tensorflow/python/ops/random_ops.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/ops/random_ops.py b/tensorflow/python/ops/random_ops.py index 6aad3fd43f0..0bb4b78c29f 100644 --- a/tensorflow/python/ops/random_ops.py +++ b/tensorflow/python/ops/random_ops.py @@ -426,6 +426,7 @@ def multinomial(logits, num_samples, seed=None, name=None, output_dtype=None): @tf_export("random.categorical") +@dispatch.add_dispatch_support def categorical(logits, num_samples, dtype=None, seed=None, name=None): """Draws samples from a categorical distribution. From b49d5c60477b6a8797bf1bcfd43076bb2b1f3e44 Mon Sep 17 00:00:00 2001 From: Hubert Eichner Date: Mon, 27 Jul 2020 10:52:45 -0700 Subject: [PATCH 1386/2522] Change error message format to append, not prepend, version warning. PiperOrigin-RevId: 323394517 Change-Id: I0ee8046b2935c2100bc2e30a7d62823932c97a7b --- tensorflow/core/common_runtime/graph_constructor.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/common_runtime/graph_constructor.cc b/tensorflow/core/common_runtime/graph_constructor.cc index ab5b086b25c..5b2cc0d0045 100644 --- a/tensorflow/core/common_runtime/graph_constructor.cc +++ b/tensorflow/core/common_runtime/graph_constructor.cc @@ -470,17 +470,17 @@ Status MaybeAppendVersionWarning(const VersionDef* versions, return Status( import_status.code(), absl::StrCat( - "Converting GraphDef to Graph has failed. The binary trying to " - "import the GraphDef was built when GraphDef version was ", + "Converting GraphDef to Graph has failed with an error: '", + import_status.error_message(), + "' The binary trying to import the GraphDef was built when " + "GraphDef version was ", TF_GRAPH_DEF_VERSION, ". The GraphDef was produced by a binary built when GraphDef " "version was ", versions->producer(), ". The difference between these versions is larger than " - "TensorFlow's forward compatibility guarantee. The following error " - "might be due to the binary trying to import the GraphDef being " - "too old: ", - import_status.error_message())); + "TensorFlow's forward compatibility guarantee, and might be the " + "root cause for failing to import the GraphDef.")); } return import_status; } From 93668bb0c71a54c1ed8a511300578970b7aa5f5f Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Mon, 27 Jul 2020 11:06:45 -0700 Subject: [PATCH 1387/2522] Add tf.data service journal reader and writer. This change adds reader and writer implementations for journaling dispatcher state. The implementations read and write protocol buffers representing state changes. The records are written in TFRecord format so that we can detect corruption (TFRecord takes a CRC checksum for each record). Later CLs will update the dispatcher to write to the journal during state changes and read from the journal on restart. PiperOrigin-RevId: 323398037 Change-Id: I62eaab40219992c26df5029075a5875f212f92da --- tensorflow/core/data/service/BUILD | 36 ++++ tensorflow/core/data/service/common.proto | 7 + tensorflow/core/data/service/dispatcher.proto | 7 - tensorflow/core/data/service/journal.cc | 94 ++++++++++ tensorflow/core/data/service/journal.h | 81 +++++++++ tensorflow/core/data/service/journal.proto | 42 +++++ tensorflow/core/data/service/journal_test.cc | 162 ++++++++++++++++++ 7 files changed, 422 insertions(+), 7 deletions(-) create mode 100644 tensorflow/core/data/service/journal.cc create mode 100644 tensorflow/core/data/service/journal.h create mode 100644 tensorflow/core/data/service/journal.proto create mode 100644 tensorflow/core/data/service/journal_test.cc diff --git a/tensorflow/core/data/service/BUILD b/tensorflow/core/data/service/BUILD index d7cc7a3e528..7a12f1079cc 100644 --- a/tensorflow/core/data/service/BUILD +++ b/tensorflow/core/data/service/BUILD @@ -130,6 +130,42 @@ tf_cc_test( ], ) +cc_library( + name = "journal", + srcs = ["journal.cc"], + hdrs = ["journal.h"], + deps = [ + ":journal_proto_cc", + "//tensorflow/core:lib", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + ], +) + +tf_cc_test( + name = "journal_test", + srcs = ["journal_test.cc"], + deps = [ + ":common_proto_cc", + ":journal", + ":journal_proto_cc", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "@com_google_absl//absl/memory", + ], +) + +tf_proto_library( + name = "journal_proto", + srcs = ["journal.proto"], + cc_api_version = 2, + protodeps = [ + ":common_proto", + ], +) + cc_library( name = "credentials_factory", srcs = ["credentials_factory.cc"], diff --git a/tensorflow/core/data/service/common.proto b/tensorflow/core/data/service/common.proto index b9a6f72ddf4..6d5398d9cd9 100644 --- a/tensorflow/core/data/service/common.proto +++ b/tensorflow/core/data/service/common.proto @@ -18,3 +18,10 @@ message TaskDef { int64 task_id = 3; int64 job_id = 4; } + +enum ProcessingModeDef { + // Each tf.data worker processes an entire epoch. + PARALLEL_EPOCHS = 0; + // Processing of an epoch is distributed across all tf.data workers. + ONE_EPOCH = 1; +} diff --git a/tensorflow/core/data/service/dispatcher.proto b/tensorflow/core/data/service/dispatcher.proto index 119fe675f2a..2a2d48ab93d 100644 --- a/tensorflow/core/data/service/dispatcher.proto +++ b/tensorflow/core/data/service/dispatcher.proto @@ -41,13 +41,6 @@ message GetOrRegisterDatasetResponse { int64 dataset_id = 1; } -enum ProcessingModeDef { - // Each tf.data worker processes an entire epoch. - PARALLEL_EPOCHS = 0; - // Processing of an epoch is distributed across all tf.data workers. - ONE_EPOCH = 1; -} - message CreateJobRequest { // The id of the dataset to create a job for. int64 dataset_id = 1; diff --git a/tensorflow/core/data/service/journal.cc b/tensorflow/core/data/service/journal.cc new file mode 100644 index 00000000000..9f2d4908f05 --- /dev/null +++ b/tensorflow/core/data/service/journal.cc @@ -0,0 +1,94 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/data/service/journal.h" + +#include "absl/memory/memory.h" +#include "tensorflow/core/data/service/journal.pb.h" +#include "tensorflow/core/lib/io/record_reader.h" +#include "tensorflow/core/lib/io/record_writer.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/path.h" + +namespace tensorflow { +namespace data { + +namespace { +constexpr StringPiece kJournal = "journal"; +} // namespace + +std::string DataServiceJournalFile(StringPiece journal_dir) { + return io::JoinPath(journal_dir, kJournal); +} + +JournalWriter::JournalWriter(Env* env, StringPiece journal_dir) + : env_(env), journal_dir_(journal_dir) {} + +Status JournalWriter::EnsureInitialized() { + if (writer_) { + return Status::OK(); + } + TF_RETURN_IF_ERROR(env_->RecursivelyCreateDir(journal_dir_)); + TF_RETURN_IF_ERROR( + env_->NewAppendableFile(DataServiceJournalFile(journal_dir_), &file_)); + writer_ = absl::make_unique(file_.get()); + return Status::OK(); +} + +Status JournalWriter::Write(Update update) { + TF_RETURN_IF_ERROR(EnsureInitialized()); + std::string s = update.SerializeAsString(); + if (s.empty()) { + return errors::Internal("Failed to serialize update ", update.DebugString(), + " to string"); + } + TF_RETURN_IF_ERROR(writer_->WriteRecord(s)); + TF_RETURN_IF_ERROR(writer_->Flush()); + TF_RETURN_IF_ERROR(file_->Sync()); + return Status::OK(); +} + +JournalReader::JournalReader(Env* env, StringPiece journal_dir) + : env_(env), journal_dir_(journal_dir) {} + +Status JournalReader::EnsureInitialized() { + if (reader_) { + return Status::OK(); + } + TF_RETURN_IF_ERROR( + env_->NewRandomAccessFile(DataServiceJournalFile(journal_dir_), &file_)); + reader_ = absl::make_unique(file_.get()); + return Status::OK(); +} + +Status JournalReader::Read(Update* update, bool* end_of_journal) { + TF_RETURN_IF_ERROR(EnsureInitialized()); + tstring record; + Status s = reader_->ReadRecord(&offset_, &record); + if (errors::IsOutOfRange(s)) { + *end_of_journal = true; + return Status::OK(); + } + TF_RETURN_IF_ERROR(s); + if (!update->ParseFromString(record)) { + return errors::DataLoss("Failed to parse journal record."); + } + *end_of_journal = false; + return Status::OK(); +} + +} // namespace data +} // namespace tensorflow diff --git a/tensorflow/core/data/service/journal.h b/tensorflow/core/data/service/journal.h new file mode 100644 index 00000000000..b2d718ad652 --- /dev/null +++ b/tensorflow/core/data/service/journal.h @@ -0,0 +1,81 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_DATA_SERVICE_JOURNAL_H_ +#define TENSORFLOW_CORE_DATA_SERVICE_JOURNAL_H_ + +#include "tensorflow/core/data/service/journal.pb.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/io/record_reader.h" +#include "tensorflow/core/lib/io/record_writer.h" +#include "tensorflow/core/platform/env.h" + +namespace tensorflow { +namespace data { + +// Returns the location of the journal file within the journal directory. +std::string DataServiceJournalFile(StringPiece journal_dir); + +// JournalWriter is not thread-safe, requiring external synchronization when +// used by multiple threads. +class JournalWriter { + public: + // Creates a journal writer to write to the given journal directory. + // If there is already journal data there, the journal writer will append to + // the existing journal. + explicit JournalWriter(Env* env, StringPiece journal_dir); + JournalWriter(const JournalWriter&) = delete; + JournalWriter& operator=(const JournalWriter&) = delete; + + // Writes and syncs an update to the journal. + Status Write(Update update); + + private: + // Initializes the writer if it is not yet initialized. + Status EnsureInitialized(); + + Env* env_; + const std::string journal_dir_; + std::unique_ptr file_; + std::unique_ptr writer_; +}; + +// JournalReader is not thread-safe, requiring external synchronization when +// used by multiple threads. +class JournalReader { + public: + explicit JournalReader(Env* env, StringPiece journal_dir); + JournalReader(const JournalReader&) = delete; + JournalReader& operator=(const JournalReader&) = delete; + + // Reads the next update from the journal. Sets `*end_of_journal=true` if + // there are no more updates left in the journal. + Status Read(Update* update, bool* end_of_journal); + + private: + // Initializes the reader if it is not yet initialized. + Status EnsureInitialized(); + + Env* env_; + const std::string journal_dir_; + // Current offset into `file_`. + uint64 offset_ = 0; + std::unique_ptr file_; + std::unique_ptr reader_; +}; + +} // namespace data +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_DATA_SERVICE_JOURNAL_H_ diff --git a/tensorflow/core/data/service/journal.proto b/tensorflow/core/data/service/journal.proto new file mode 100644 index 00000000000..b378011db4b --- /dev/null +++ b/tensorflow/core/data/service/journal.proto @@ -0,0 +1,42 @@ +syntax = "proto3"; + +package tensorflow.data; + +import "google/protobuf/wrappers.proto"; +import "tensorflow/core/data/service/common.proto"; + +// Message representing journaled dispatcher metadata updates. When we apply +// one of these changes to the dispatcher's in-memory state, we also write an +// Update message to the journal. +message Update { + oneof update_type { + RegisterDataset register_dataset = 1; + CreateJob create_job = 2; + FinishJob finish_job = 3; + CreateTask create_task = 4; + } +} + +message RegisterDataset { + int64 dataset_id = 1; + // A file where the dataset graph has been written. + string filename = 2; +} + +message CreateJob { + int64 job_id = 1; + int64 dataset_id = 2; + ProcessingModeDef processing_mode = 3; + // Only some jobs have names, so this may be null. + google.protobuf.StringValue job_name = 4; +} + +message FinishJob { + int64 job_id = 1; +} + +message CreateTask { + int64 task_id = 1; + int64 job_id = 2; + int64 dataset_id = 3; +} diff --git a/tensorflow/core/data/service/journal_test.cc b/tensorflow/core/data/service/journal_test.cc new file mode 100644 index 00000000000..ece33970eb8 --- /dev/null +++ b/tensorflow/core/data/service/journal_test.cc @@ -0,0 +1,162 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/data/service/journal.h" + +#include "absl/memory/memory.h" +#include "tensorflow/core/data/service/common.pb.h" +#include "tensorflow/core/data/service/journal.pb.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/path.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace data { + +namespace { +using ::testing::HasSubstr; + +bool NewJournalDir(std::string* journal_dir) { + std::string filename; + if (!Env::Default()->LocalTempFilename(&filename)) { + return false; + } + *journal_dir = io::JoinPath(testing::TmpDir(), filename); + return true; +} + +Update MakeCreateJobUpdate() { + Update update; + CreateJob* create_job = update.mutable_create_job(); + create_job->set_dataset_id(3); + create_job->set_job_id(8); + create_job->set_processing_mode(ProcessingModeDef::PARALLEL_EPOCHS); + return update; +} + +Update MakeFinishJobUpdate() { + Update update; + FinishJob* finish_job = update.mutable_finish_job(); + finish_job->set_job_id(8); + return update; +} + +Update MakeCreateTaskUpdate() { + Update update; + CreateTask* create_task = update.mutable_create_task(); + create_task->set_task_id(2); + create_task->set_dataset_id(4); + create_task->set_job_id(5); + return update; +} + +Status CheckJournalContent(StringPiece journal_dir, + const std::vector& expected) { + JournalReader reader(Env::Default(), journal_dir); + for (const auto& update : expected) { + Update result; + bool end_of_journal = true; + TF_RETURN_IF_ERROR(reader.Read(&result, &end_of_journal)); + EXPECT_FALSE(end_of_journal); + // We can't use the testing::EqualsProto matcher because it is not available + // in OSS. + EXPECT_EQ(result.SerializeAsString(), update.SerializeAsString()); + } + Update result; + bool end_of_journal = false; + TF_RETURN_IF_ERROR(reader.Read(&result, &end_of_journal)); + EXPECT_TRUE(end_of_journal); + return Status::OK(); +} +} // namespace + +TEST(Journal, RoundTripMultiple) { + std::string journal_dir; + EXPECT_TRUE(NewJournalDir(&journal_dir)); + std::vector updates = {MakeCreateJobUpdate(), MakeCreateTaskUpdate(), + MakeFinishJobUpdate()}; + JournalWriter writer(Env::Default(), journal_dir); + for (const auto& update : updates) { + TF_EXPECT_OK(writer.Write(update)); + } + + TF_EXPECT_OK(CheckJournalContent(journal_dir, updates)); +} + +TEST(Journal, AppendExistingFile) { + std::string journal_dir; + EXPECT_TRUE(NewJournalDir(&journal_dir)); + std::vector updates = {MakeCreateJobUpdate(), MakeCreateTaskUpdate(), + MakeFinishJobUpdate()}; + for (const auto& update : updates) { + JournalWriter writer(Env::Default(), journal_dir); + TF_EXPECT_OK(writer.Write(update)); + } + + TF_EXPECT_OK(CheckJournalContent(journal_dir, updates)); +} + +TEST(Journal, MissingFile) { + std::string journal_dir; + EXPECT_TRUE(NewJournalDir(&journal_dir)); + JournalReader reader(Env::Default(), journal_dir); + Update result; + bool end_of_journal = true; + Status s = reader.Read(&result, &end_of_journal); + EXPECT_TRUE(errors::IsNotFound(s)); +} + +TEST(Journal, NonRecordData) { + std::string journal_dir; + EXPECT_TRUE(NewJournalDir(&journal_dir)); + + TF_ASSERT_OK(Env::Default()->RecursivelyCreateDir(journal_dir)); + { + std::unique_ptr file; + TF_ASSERT_OK(Env::Default()->NewAppendableFile( + DataServiceJournalFile(journal_dir), &file)); + TF_ASSERT_OK(file->Append("not record data")); + } + + JournalReader reader(Env::Default(), journal_dir); + Update result; + bool end_of_journal = true; + Status s = reader.Read(&result, &end_of_journal); + EXPECT_THAT(s.error_message(), HasSubstr("corrupted record")); + EXPECT_EQ(s.code(), error::DATA_LOSS); +} + +TEST(Journal, InvalidRecordData) { + std::string journal_dir; + EXPECT_TRUE(NewJournalDir(&journal_dir)); + + TF_ASSERT_OK(Env::Default()->RecursivelyCreateDir(journal_dir)); + { + std::unique_ptr file; + TF_ASSERT_OK(Env::Default()->NewAppendableFile( + DataServiceJournalFile(journal_dir), &file)); + auto writer = absl::make_unique(file.get()); + TF_ASSERT_OK(writer->WriteRecord("not serializd proto")); + } + + JournalReader reader(Env::Default(), journal_dir); + Update result; + bool end_of_journal = true; + Status s = reader.Read(&result, &end_of_journal); + EXPECT_THAT(s.error_message(), HasSubstr("Failed to parse journal record")); + EXPECT_EQ(s.code(), error::DATA_LOSS); +} +} // namespace data +} // namespace tensorflow From aa22295c816de288b4df1ad4b3fbd16ad39ab641 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 11:23:31 -0700 Subject: [PATCH 1388/2522] Updated documentation and external source code links. PiperOrigin-RevId: 323401937 Change-Id: Ice4238aa02583a364f03410ccddaedee9246d8d2 --- .../tpu/optimization_parameters.proto | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tensorflow/core/protobuf/tpu/optimization_parameters.proto b/tensorflow/core/protobuf/tpu/optimization_parameters.proto index 853dbe886be..0425762164f 100644 --- a/tensorflow/core/protobuf/tpu/optimization_parameters.proto +++ b/tensorflow/core/protobuf/tpu/optimization_parameters.proto @@ -54,8 +54,8 @@ message LearningRate { // Each optimizer's parameter proto has a link to its documentation and CPU // implementation (if available) for user reference. -// https://www.tensorflow.org/api_docs/python/tf/train/AdagradOptimizer -// https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/core/kernels/training_ops.cc#L151 +// https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adagrad +// https://github.com/tensorflow/tensorflow/blob/6b6471f3ffb7f1fefe42d814aa5fb9ab7a535b58/tensorflow/core/kernels/training_ops.cc#L1634 message AdagradParameters { float initial_accumulator = 1; } @@ -74,12 +74,12 @@ message BoundedAdagradParameters { float max_accumulator = 3; } -// https://www.tensorflow.org/api_docs/python/tf/train/GradientDescentOptimizer -// https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/core/kernels/training_ops.cc#L423 +// https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/SGD +// https://github.com/tensorflow/tensorflow/blob/6b6471f3ffb7f1fefe42d814aa5fb9ab7a535b58/tensorflow/core/kernels/training_ops.cc#L629 message StochasticGradientDescentParameters {} -// https://www.tensorflow.org/api_docs/python/tf/train/FtrlOptimizer -// https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/core/kernels/training_ops.cc#L192 +// https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Ftrl +// https://github.com/tensorflow/tensorflow/blob/6b6471f3ffb7f1fefe42d814aa5fb9ab7a535b58/tensorflow/core/kernels/training_ops.cc#L2646 message FtrlParameters { float l1 = 1; float l2 = 2; @@ -94,8 +94,8 @@ message FtrlParameters { // user learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t) // Here, t is the current timestep. // -// https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer -// https://github.com/tensorflow/tensorflow/blob/ab51450c817674c8ff08a7ae4f8ac50cdc4bed8b/tensorflow/python/training/adam.py#L54 +// https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adam +// https://github.com/tensorflow/tensorflow/blob/ab51450c817674c8ff08a7ae4f8ac50cdc4bed8b/tensorflow/python/training/adam.py#L32 // // Note that the code by default implements the lazy version of Adam // (https://www.tensorflow.org/api_docs/python/tf/contrib/opt/LazyAdamOptimizer) @@ -119,16 +119,16 @@ message AdamParameters { bool use_sum_inside_sqrt = 10; } -// https://www.tensorflow.org/api_docs/python/tf/train/MomentumOptimizer -// https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/core/kernels/training_ops.cc#L271 +// https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/SGD +// https://github.com/tensorflow/tensorflow/blob/6b6471f3ffb7f1fefe42d814aa5fb9ab7a535b58/tensorflow/core/kernels/training_ops.cc#L3068 message MomentumParameters { float momentum = 1; bool use_nesterov = 2; float initial_accum = 3; } -// https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer -// https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/core/kernels/training_ops.cc#L356 +// https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/RMSprop +// https://github.com/tensorflow/tensorflow/blob/6b6471f3ffb7f1fefe42d814aa5fb9ab7a535b58/tensorflow/core/kernels/training_ops.cc#L4229 message RmsPropParameters { float rho = 1; float momentum = 2; @@ -137,8 +137,8 @@ message RmsPropParameters { float initial_mom = 5; } -// https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer -// https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/core/kernels/training_ops.cc#L372 +// https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/RMSprop +// https://github.com/tensorflow/tensorflow/blob/6b6471f3ffb7f1fefe42d814aa5fb9ab7a535b58/tensorflow/core/kernels/training_ops.cc#L4358 message CenteredRmsPropParameters { float rho = 1; float momentum = 2; @@ -167,8 +167,8 @@ message MdlAdagradLightParameters { float initial_benefit = 15; } -// https://www.tensorflow.org/api_docs/python/tf/train/AdadeltaOptimizer -// https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/core/kernels/training_ops.cc#L68 +// https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adadelta +// https://github.com/tensorflow/tensorflow/blob/6b6471f3ffb7f1fefe42d814aa5fb9ab7a535b58/tensorflow/core/kernels/training_ops.cc#L933 message AdadeltaParameters { float rho = 1; float epsilon = 2; @@ -176,8 +176,8 @@ message AdadeltaParameters { float initial_update = 4; } -// https://www.tensorflow.org/api_docs/python/tf/train/ProximalAdagradOptimizer -// https://github.com/tensorflow/tensorflow/blob/c19e29306ce1777456b2dbb3a14f511edf7883a8/tensorflow/core/kernels/training_ops.cc#L164 +// https://www.tensorflow.org/api_docs/python/tf/compat/v1/train/ProximalAdagradOptimizer +// https://github.com/tensorflow/tensorflow/blob/6b6471f3ffb7f1fefe42d814aa5fb9ab7a535b58/tensorflow/core/kernels/training_ops.cc#L1961 message ProximalAdagradParameters { float l1 = 1; float l2 = 2; From 45a987d572b315eaafbe969a2f821b7d941125f1 Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Mon, 27 Jul 2020 11:32:57 -0700 Subject: [PATCH 1389/2522] Correct docstring in IndexLookup. PiperOrigin-RevId: 323404117 Change-Id: I6275357317559285a4e3848652e79a62d75fd792 --- tensorflow/python/keras/layers/preprocessing/index_lookup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/layers/preprocessing/index_lookup.py b/tensorflow/python/keras/layers/preprocessing/index_lookup.py index 7d11feae341..c25ff2c0d05 100644 --- a/tensorflow/python/keras/layers/preprocessing/index_lookup.py +++ b/tensorflow/python/keras/layers/preprocessing/index_lookup.py @@ -46,7 +46,7 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer): This layer translates a set of arbitrary hashables into an integer output via a table-based lookup, with optional out-of-vocabulary handling. This is the - basis layer for both IntegerLookup and IndexLookup; it holds the common + basis layer for both IntegerLookup and StringLookup; it holds the common logic but is not intended to be exported as part of the Keras API. If desired, the user can call this layer's `adapt()` method on a data set, From 75801da4cd321aabbf79e78da1e5de1a10ba4c2a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 11:44:42 -0700 Subject: [PATCH 1390/2522] Fixes GitHub 41712 PiperOrigin-RevId: 323406781 Change-Id: I3182e17b5f949b7995b9613a0cf5e76de1dd307e --- tensorflow/python/keras/layers/convolutional.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/keras/layers/convolutional.py b/tensorflow/python/keras/layers/convolutional.py index 3207689339c..05efbd23c1e 100644 --- a/tensorflow/python/keras/layers/convolutional.py +++ b/tensorflow/python/keras/layers/convolutional.py @@ -862,6 +862,7 @@ class Conv1DTranspose(Conv1D): the dilation rate to use for dilated convolution. Currently, specifying a `dilation_rate` value != 1 is incompatible with specifying a stride value != 1. + Also dilation rate larger than 1 is not currently supported. activation: Activation function to use. If you don't specify anything, no activation is applied ( see `keras.activations`). From 997d077052395c1af42f547e5d6bf33323fae848 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Mon, 27 Jul 2020 11:46:24 -0700 Subject: [PATCH 1391/2522] Port the pad op to the new TfLiteEvalTensor API. PiperOrigin-RevId: 323407199 Change-Id: I0ad9d3a4be5a54f0009bf578b04016c094b226ad --- tensorflow/lite/micro/kernels/BUILD | 1 + tensorflow/lite/micro/kernels/pad.cc | 274 ++++++++++++---------- tensorflow/lite/micro/kernels/pad_test.cc | 64 +++-- 3 files changed, 173 insertions(+), 166 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 717ab188f2c..3a63a6fdcf9 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -572,6 +572,7 @@ tflite_micro_cc_test( "pad_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:micro_framework", "//tensorflow/lite/micro:op_resolvers", diff --git a/tensorflow/lite/micro/kernels/pad.cc b/tensorflow/lite/micro/kernels/pad.cc index b0ddcfda0de..39f86cbf9a3 100644 --- a/tensorflow/lite/micro/kernels/pad.cc +++ b/tensorflow/lite/micro/kernels/pad.cc @@ -16,189 +16,205 @@ limitations under the License. #include -#include "tensorflow/lite/kernels/internal/types.h" - -#ifdef MEMORY_SANITIZER -#include -#else -#define __msan_check_mem_is_initialized(ptr, size) -#endif - #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/tensor.h" +#include "tensorflow/lite/kernels/internal/types.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/op_macros.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { namespace micro { namespace pad { +namespace { -struct PadContext { - PadContext(TfLiteContext* context, TfLiteNode* node) { - input = GetInput(context, node, 0); - paddings = GetInput(context, node, 1); - constant_values = nullptr; - if (NumInputs(node) == 3) { - constant_values = GetOptionalInputTensor(context, node, 2); - } else { - constant_values = nullptr; - } - output = GetOutput(context, node, 0); - dims = NumDimensions(input); - - resizing_category = ResizingCategory::kGenericResize; - const int paddings_total = GetTensorShape(paddings).FlatSize(); - const int32_t* paddings_data = GetTensorData(paddings); - // Paddings will be a n,2 array, and we need to detect 4D arrays with the - // pattern { {0,0}, {a, b}, {c, d}, {0,0} }. - if (IsConstantTensor(paddings) && paddings_total == 8 && - (paddings_data[0] == 0 && paddings_data[1] == 0) && - (paddings_data[6] == 0 && paddings_data[7] == 0)) { - resizing_category = ResizingCategory::kImageStyle; - } - } - const TfLiteTensor* constant_values; - const TfLiteTensor* input; - const TfLiteTensor* paddings; - TfLiteTensor* output; - int dims; - ResizingCategory resizing_category; +struct OpData { + PadParams params; + int32_t output_zero_point; }; +} // namespace + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + OpData* data = static_cast(node->user_data); + TF_LITE_ENSURE(context, NumInputs(node) == 2 || NumInputs(node) == 3); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); - PadContext op_context(context, node); - TF_LITE_ENSURE_EQ(context, op_context.input->type, op_context.output->type); - if (op_context.constant_values != nullptr) { - TF_LITE_ENSURE_EQ(context, op_context.input->type, - op_context.constant_values->type); + const TfLiteTensor* input = GetInput(context, node, /*index=*/0); + const TfLiteTensor* paddings = GetInput(context, node, /*index=*/1); + const TfLiteTensor* constant_values = + NumInputs(node) == 3 ? GetInput(context, node, /*index=*/2) : nullptr; + TfLiteTensor* output = GetOutput(context, node, /*index=*/0); + + TF_LITE_ENSURE_EQ(context, input->type, output->type); + + // Current implementations rely on the inputs being <= 4D. + TF_LITE_ENSURE(context, NumDimensions(input) <= + reference_ops::PadKernelMaxDimensionCount()); + + if (constant_values != nullptr) { + TF_LITE_ENSURE_EQ(context, input->type, constant_values->type); + // Ensure that constant_values is a scalar. + TF_LITE_ENSURE_EQ(context, NumElements(constant_values), 1); } // There must be a pair of paddings for each output dimension. - TF_LITE_ENSURE_EQ(context, GetTensorShape(op_context.paddings).FlatSize(), - op_context.output->dims->size * 2); + TF_LITE_ENSURE_EQ(context, GetTensorShape(paddings).FlatSize(), + output->dims->size * 2); // On Micro, outputs must be properly sized by the converter. - const int32_t* paddings_data = GetTensorData(op_context.paddings); - for (int i = 0; i < op_context.output->dims->size; i++) { - int output_dim = op_context.output->dims->data[i]; - int expected_dim = op_context.input->dims->data[i] + paddings_data[i * 2] + - paddings_data[i * 2 + 1]; + // NOTE: This data is only available because the paddings buffer is stored in + // the flatbuffer: + TF_LITE_ENSURE(context, IsConstantTensor(paddings)); + const int32_t* paddings_data = GetTensorData(paddings); + for (int i = 0; i < output->dims->size; i++) { + int output_dim = output->dims->data[i]; + int expected_dim = + input->dims->data[i] + paddings_data[i * 2] + paddings_data[i * 2 + 1]; TF_LITE_ENSURE_EQ(context, output_dim, expected_dim); } - // Current implementations rely on the inputs being <= 4D. - TF_LITE_ENSURE( - context, op_context.dims <= reference_ops::PadKernelMaxDimensionCount()); - TF_LITE_ENSURE(context, IsConstantTensor(op_context.paddings)); + // Calculate OpData: + data->params.resizing_category = ResizingCategory::kGenericResize; + const int paddings_total = GetTensorShape(paddings).FlatSize(); + if (paddings_total == 8 && (paddings_data[0] == 0 && paddings_data[1] == 0) && + (paddings_data[6] == 0 && paddings_data[7] == 0)) { + data->params.resizing_category = ResizingCategory::kImageStyle; + } + + const int num_input_dimensions = NumDimensions(input); + data->params.left_padding_count = num_input_dimensions; + data->params.right_padding_count = num_input_dimensions; + + for (int idx = num_input_dimensions - 1; idx >= 0; --idx) { + data->params.left_padding[idx] = paddings_data[idx * 2]; + data->params.right_padding[idx] = paddings_data[idx * 2 + 1]; + } + + if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) { + if (constant_values == nullptr) { + // Quantized Pad requires that 0 is represented in the quantized + // range. + if (input->type == kTfLiteUInt8) { + TF_LITE_ENSURE(context, output->params.zero_point >= + std::numeric_limits::min()); + TF_LITE_ENSURE(context, output->params.zero_point <= + std::numeric_limits::max()); + } else { + TF_LITE_ENSURE(context, output->params.zero_point >= + std::numeric_limits::min()); + TF_LITE_ENSURE(context, output->params.zero_point <= + std::numeric_limits::max()); + } + } else { + // Quantized Pad requires that 'constant_values' is represented in the + // same quantized range as the input and output tensors. + TF_LITE_ENSURE_EQ(context, output->params.zero_point, + constant_values->params.zero_point); + TF_LITE_ENSURE_EQ(context, static_cast(output->params.scale), + static_cast(constant_values->params.scale)); + } + data->output_zero_point = output->params.zero_point; + } + return kTfLiteOk; } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - PadContext op_context(context, node); + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); - if (op_context.constant_values != nullptr) { - // Ensure that constant_values is a scalar. - TF_LITE_ENSURE_EQ(context, NumElements(op_context.constant_values), 1); - } + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, /*index=*/0); + const TfLiteEvalTensor* constant_values = + NumInputs(node) == 3 + ? tflite::micro::GetEvalInput(context, node, /*index=*/2) + : nullptr; + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, /*index=*/0); - // Create before and after padding arrays that are accepted by the kernel. - const int32_t* paddings_data = GetTensorData(op_context.paddings); - - tflite::PadParams op_params; - memset(&op_params, 0, sizeof(PadParams)); - op_params.left_padding_count = op_context.dims; - op_params.right_padding_count = op_context.dims; - - for (int idx = op_context.dims - 1; idx >= 0; --idx) { - op_params.left_padding[idx] = paddings_data[idx * 2]; - op_params.right_padding[idx] = paddings_data[idx * 2 + 1]; - } - -#define TF_LITE_PAD(type, op_name, scalar, pad_value) \ - const scalar pad_value_copy = pad_value; \ - \ - type::op_name(op_params, GetTensorShape(op_context.input), \ - GetTensorData(op_context.input), &pad_value_copy, \ - GetTensorShape(op_context.output), \ - GetTensorData(op_context.output)) - switch (op_context.input->type) { + switch (input->type) { case kTfLiteFloat32: { - float pad_value = op_context.constant_values == nullptr - ? 0.f - : *GetTensorData(op_context.constant_values); - if (op_context.resizing_category == ResizingCategory::kImageStyle) { - TF_LITE_PAD(reference_ops, PadImageStyle, float, pad_value); + float pad_value = + constant_values == nullptr + ? 0.f + : *tflite::micro::GetTensorData(constant_values); + if (data->params.resizing_category == ResizingCategory::kImageStyle) { + reference_ops::PadImageStyle( + data->params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), &pad_value, + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } else { - TF_LITE_PAD(reference_ops, Pad, float, pad_value); + reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + &pad_value, tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } } break; case kTfLiteUInt8: { uint8_t pad_value; - if (op_context.constant_values == nullptr) { - // Quantized Pad requires that 0 is represented in the quantized - // range. - TF_LITE_ENSURE(context, op_context.output->params.zero_point >= - std::numeric_limits::min()); - TF_LITE_ENSURE(context, op_context.output->params.zero_point <= - std::numeric_limits::max()); - pad_value = static_cast(op_context.output->params.zero_point); + if (constant_values == nullptr) { + pad_value = static_cast(data->output_zero_point); } else { - // Quantized Pad requires that 'constant_values' is represented in the - // same quantized range as the input and output tensors. - TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point, - op_context.constant_values->params.zero_point); - TF_LITE_ENSURE_EQ( - context, static_cast(op_context.output->params.scale), - static_cast(op_context.constant_values->params.scale)); - pad_value = *GetTensorData(op_context.constant_values); + pad_value = *tflite::micro::GetTensorData(constant_values); } - if (op_context.resizing_category == ResizingCategory::kImageStyle) { - TF_LITE_PAD(reference_ops, PadImageStyle, uint8_t, pad_value); + if (data->params.resizing_category == ResizingCategory::kImageStyle) { + reference_ops::PadImageStyle( + data->params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), &pad_value, + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } else { - TF_LITE_PAD(reference_ops, Pad, uint8_t, pad_value); + reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + &pad_value, tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } } break; case kTfLiteInt8: { int8_t pad_value; - if (op_context.constant_values == nullptr) { - // Quantized Pad requires that 0 is represented in the quantized - // range. - TF_LITE_ENSURE(context, op_context.output->params.zero_point >= - std::numeric_limits::min()); - TF_LITE_ENSURE(context, op_context.output->params.zero_point <= - std::numeric_limits::max()); - pad_value = static_cast(op_context.output->params.zero_point); + if (constant_values == nullptr) { + pad_value = static_cast(data->output_zero_point); } else { - // Quantized Pad requires that 'constant_values' is represented in the - // same quantized range as the input and output tensors. - TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point, - op_context.constant_values->params.zero_point); - TF_LITE_ENSURE(context, op_context.output->params.scale == - op_context.constant_values->params.scale); - pad_value = *GetTensorData(op_context.constant_values); + pad_value = *tflite::micro::GetTensorData(constant_values); } - if (op_context.resizing_category == ResizingCategory::kImageStyle) { - TF_LITE_PAD(reference_ops, PadImageStyle, int8_t, pad_value); + if (data->params.resizing_category == ResizingCategory::kImageStyle) { + reference_ops::PadImageStyle( + data->params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), &pad_value, + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } else { - TF_LITE_PAD(reference_ops, Pad, int8_t, pad_value); + reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + &pad_value, tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } } break; case kTfLiteInt32: { int32_t pad_value = - op_context.constant_values == nullptr + constant_values == nullptr ? 0 - : *GetTensorData(op_context.constant_values); - TF_LITE_PAD(reference_ops, Pad, int32_t, pad_value); + : *tflite::micro::GetTensorData(constant_values); + reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + &pad_value, tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } break; default: TF_LITE_KERNEL_LOG(context, "Type %s not currently supported by Pad.", - TfLiteTypeGetName(op_context.input->type)); + TfLiteTypeGetName(input->type)); return kTfLiteError; } #undef TF_LITE_PAD @@ -208,7 +224,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace pad TfLiteRegistration Register_PAD() { - return {/*init=*/nullptr, + return {/*init=*/pad::Init, /*free=*/nullptr, /*prepare=*/pad::Prepare, /*invoke=*/pad::Eval, @@ -220,7 +236,7 @@ TfLiteRegistration Register_PAD() { // Also register Pad as PadV2. TfLiteRegistration Register_PADV2() { - return {/*init=*/nullptr, + return {/*init=*/pad::Init, /*free=*/nullptr, /*prepare=*/pad::Prepare, /*invoke=*/pad::Eval, diff --git a/tensorflow/lite/micro/kernels/pad_test.cc b/tensorflow/lite/micro/kernels/pad_test.cc index 9b1f4db30cd..4d391057858 100644 --- a/tensorflow/lite/micro/kernels/pad_test.cc +++ b/tensorflow/lite/micro/kernels/pad_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/test_helpers.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -28,29 +29,28 @@ template TfLiteStatus ValidatePadGoldens(TfLiteTensor* tensors, int tensors_size, const T* golden, T* output_data, int output_length) { - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_PAD); - TF_LITE_ENSURE(&context, registration != nullptr); - int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = nullptr; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->prepare); - TF_LITE_ENSURE_EQ(&context, kTfLiteOk, - registration->prepare(&context, &node)); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_ENSURE_EQ(&context, kTfLiteOk, registration->invoke(&context, &node)); + + const TfLiteRegistration registration = tflite::ops::micro::Register_PAD(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); + + // Prepare should catch dimension mismatches. + TfLiteStatus prepare_status = runner.InitAndPrepare(); + if (prepare_status != kTfLiteOk) { + return prepare_status; + } + + // Eval should catch quantization mismatches. + TfLiteStatus invoke_status = runner.Invoke(); + if (invoke_status != kTfLiteOk) { + return invoke_status; + } + for (int i = 0; i < output_length; ++i) { TF_LITE_MICRO_EXPECT_EQ(golden[i], output_data[i]); } @@ -61,34 +61,24 @@ template TfLiteStatus ValidatePadV2Goldens(TfLiteTensor* tensors, int tensors_size, const T* golden, T* output_data, int output_length) { - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_PADV2); - TF_LITE_ENSURE(&context, registration != nullptr); - int inputs_array_data[] = {3, 0, 1, 2}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 3}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = nullptr; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->prepare); + + const TfLiteRegistration registration = tflite::ops::micro::Register_PADV2(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); + // Prepare should catch dimension mismatches. - TfLiteStatus prepare_status = registration->prepare(&context, &node); + TfLiteStatus prepare_status = runner.InitAndPrepare(); if (prepare_status != kTfLiteOk) { return prepare_status; } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); // Eval should catch quantization mismatches. - TfLiteStatus invoke_status = registration->invoke(&context, &node); + TfLiteStatus invoke_status = runner.Invoke(); if (invoke_status != kTfLiteOk) { return invoke_status; } From dc6bbb8e778a9f617d07db9a29be9a95a3cc8b69 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Mon, 27 Jul 2020 11:50:10 -0700 Subject: [PATCH 1392/2522] Enable the KerasTensors refactoring of the Keras functional API. Replaces symbolic tensors during Functional API construction with lightweight `tensor-like` objects that have a very similar API, but do not require a global graph workspace. This should improve the reliability and performance of the Keras functional API and of automatic TF op -> Lambda layer conversions during functional API construction. E.g. ~8-10% faster functional model construction time, dramatically lower memory usage If this causes a breakage in your code, you are likely experiencing one of the following: * code that uses map_fn/tf.cond/tf.while_loop/control flow as op layers and happens to maybe work right now (I wouldn't trust any model that uses those during functional model construction) * code that uses get_concrete_function to trace keras inputs directly. * code that uses isinstance(x, tf.Tensor) instead of tf.is_tensor * Any code already susceptible to leaking tensors outside of a graph becomes slightly more susceptible now (though it would be an issue anyway) * Any code that is overly dependent on the exact names attached to symbolic tensors (e.g. assumes there will be `:0` at the end of the inputs, treating names as unique identifiers instead of using .ref(), etc.) * Your code relies on the exact # and names of the op layers not changing from what it was * You have code that has very tricky shape manipulation via automatically converted tf op layers, and the KerasTensor shape inference is insufficient. * You have code that tries manually walking a model layer by layer and assumes layers only ever have 1 positional argument (This doesn't hold true in head either, but it becomes marginally more likely to cause issues w/ the newer op lambda layers) * Your code manually enters keras.backend.get_graph() before building your functional model (Usually this was done to work around fragility in the functional api/ op -> layer conversions. This should no longer be needed, and in fact will now cause tensors to leak out of a graph if you do it) * direct asserts might break in case where an op like tf.rank used to return a static or symbolic value depending on if the input had a fully static shape or not. Now it is always symbolic. To see if this refactoring is causing you problems, you can disable the refactoring as follows for as long as we have an internal flag available: ``` from tensorflow.python.keras.engine import keras_tensor keras_tensor.disable_keras_tensors() ``` PiperOrigin-RevId: 323407977 Change-Id: Ie83a77e44769794ef3454bd4a2a7d0758682878c --- RELEASE.md | 21 ++++++++++++++++++- tensorflow/python/keras/backend.py | 12 +++++------ .../python/keras/engine/keras_tensor.py | 2 +- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 1d5a7d3e99d..12a2f3eb799 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -14,6 +14,18 @@ * Removed `tf.distribute.Strategy.experimental_run_v2` method, which was deprecated in TF 2.2. * `tensorflow.python`, `tensorflow.core` and `tensorflow.compiler` modules are now hidden. These modules are not part of TensorFlow public API. +* A major refactoring of the internals of the Keras Functional API may affect code that is relying on certain internal details: + * Code that uses `isinstance(x, tf.Tensor)` instead of `tf.is_tensor` when checking Keras symbolic inputs/outputs should switch to using `tf.is_tensor`. + * Code that is overly dependent on the exact names attached to symbolic tensors (e.g. assumes there will be ":0" at the end of the inputs, treats names as unique identifiers instead of using `tensor.ref()`, etc.) + * Code that uses `get_concrete_function` to trace Keras symbolic inputs directly should switch to building matching `tf.TensorSpec`s directly and tracing the `TensorSpec` objects. + * Code that relies on the exact number and names of the op layers that TensorFlow operations were converted into. These may have changed. + * Code that uses `tf.map_fn`/`tf.cond`/`tf.while_loop`/control flow as op layers and happens to work before TF 2.4. These will explicitly be unsupported now. Converting these ops to Functional API op layers was unreliable before TF 2.4, and prone to erroring incomprehensibly or being silently buggy. + * Code that directly asserts on a Keras symbolic value in cases where ops like `tf.rank` used to return a static or symbolic value depending on if the input had a fully static shape or not. Now these ops always return symbolic values. + * Code already susceptible to leaking tensors outside of graphs becomes slightly more likely to do so now. + * Code that requires very tricky shape manipulation via converted op layers in order to work, where the Keras symbolic shape inference proves insufficient. + * Code that tries manually walking a `tf.keras.Model` layer by layer and assumes layers only ever have one positional argument. This assumption doesn't hold true before TF 2.4 either, but is more likely to cause issues know. + * Code that manually enters `keras.backend.get_graph()` before building a functional model. This is no longer needed. + ## Known Caveats @@ -24,6 +36,7 @@ * * * A new module named `tf.experimental.numpy` is added, which is a NumPy-compatible API for writing TF programs. This module provides class `ndarray`, which mimics the `ndarray` class in NumPy, and wraps an immutable `tf.Tensor` under the hood. A subset of NumPy functions (e.g. `numpy.add`) are provided. Their inter-operation with TF facilities is seamless in most cases. See tensorflow/python/ops/numpy_ops/README.md for details of what are supported and what are the differences with NumPy. +* A major refactoring of the internals of the Keras Functional API has been completed, that should improve the reliability, stability, and performance of constructing Functional models. ## Bug Fixes and Other Changes @@ -57,7 +70,13 @@ option. * `tf.distribute`: * -* `tf.keras`: +* `tf.keras`: + * Improvements from the functional API refactoring: + * Functional model construction does not need to maintain a global workspace graph, removing memory leaks especially when building many models or very large models. + * Functional model construction should be ~8-10% faster on average. + * Functional models can now contain non-symbolic values in their call inputs inside of the first positional argument. + * Several classes of TF ops that were not reliably converted to Keras layers during functional API construction should now work, e.g. `tf.image.ssim_multiscale` + * Error messages when Functional API construction goes wrong (and when ops cannot be converted to Keras layers automatically) should be clearer and easier to understand. * * `tf.function` / AutoGraph: * Added `experimental_follow_type_hints` argument for `tf.function`. When diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py index 07da09049c5..3861d49254d 100644 --- a/tensorflow/python/keras/backend.py +++ b/tensorflow/python/keras/backend.py @@ -1180,7 +1180,7 @@ def placeholder(shape=None, >>> input_ph = tf.keras.backend.placeholder(shape=(2, 4, 5)) >>> input_ph - + """ if sparse and ragged: @@ -1281,7 +1281,7 @@ def shape(x): >>> input = tf.keras.backend.placeholder(shape=(2, 4, 5)) >>> tf.keras.backend.shape(input) - + """ return array_ops.shape(x) @@ -1796,13 +1796,13 @@ def dot(x, y): >>> y = tf.keras.backend.placeholder(shape=(3, 4)) >>> xy = tf.keras.backend.dot(x, y) >>> xy - + >>> x = tf.keras.backend.placeholder(shape=(32, 28, 3)) >>> y = tf.keras.backend.placeholder(shape=(3, 4)) >>> xy = tf.keras.backend.dot(x, y) >>> xy - + >>> x = tf.keras.backend.random_uniform_variable(shape=(2, 3), low=0, high=1) >>> y = tf.keras.backend.ones((4, 3, 5)) @@ -2052,10 +2052,10 @@ def transpose(x): [3., 6.]], dtype=float32) >>> input = tf.keras.backend.placeholder((2, 3)) >>> input - + >>> input_transposed = tf.keras.backend.transpose(input) >>> input_transposed - + """ return array_ops.transpose(x) diff --git a/tensorflow/python/keras/engine/keras_tensor.py b/tensorflow/python/keras/engine/keras_tensor.py index 4266c6dbee6..840aaa72441 100644 --- a/tensorflow/python/keras/engine/keras_tensor.py +++ b/tensorflow/python/keras/engine/keras_tensor.py @@ -30,7 +30,7 @@ from tensorflow.python.util import object_identity # pylint: disable=g-classes-have-attributes -_KERAS_TENSORS_ENABLED = False +_KERAS_TENSORS_ENABLED = True def enable_keras_tensors(): From 127cda80da7832d1228166031111663bdf2eca05 Mon Sep 17 00:00:00 2001 From: Ken Franko Date: Mon, 27 Jul 2020 12:03:45 -0700 Subject: [PATCH 1393/2522] Add more automatic Outside Compilation tests. Adds test for string ops and randomsops for Automatic outside compilation. PiperOrigin-RevId: 323410994 Change-Id: Ia8124acc43d4f7426bb9f8c60b6be4bb5338dda7 --- .../tpu/tpu_outside_compilation_test.py | 83 ++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/tpu/tpu_outside_compilation_test.py b/tensorflow/python/tpu/tpu_outside_compilation_test.py index bdba57ca000..291ab7f8d53 100644 --- a/tensorflow/python/tpu/tpu_outside_compilation_test.py +++ b/tensorflow/python/tpu/tpu_outside_compilation_test.py @@ -28,11 +28,15 @@ from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver from tensorflow.python.eager import def_function from tensorflow.python.eager import remote from tensorflow.python.eager import test +from tensorflow.python.framework import config from tensorflow.python.framework import constant_op from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import logging_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.ops import string_ops from tensorflow.python.ops import summary_ops_v2 as summary from tensorflow.python.ops import variables from tensorflow.python.platform import flags @@ -61,6 +65,11 @@ def get_tpu_strategy(): return tpu_lib.TPUStrategyV2(resolver) +def computation_with_string_ops(x): + output = string_ops.string_format("1{}", x) + return string_ops.string_to_number(output) + + class TpuOutsideCompilationTest(test.TestCase, parameterized.TestCase): def testResourceVariableAssignOnHost(self): @@ -416,7 +425,7 @@ class TpuOutsideCompilationTest(test.TestCase, parameterized.TestCase): strategy.experimental_local_results(train_step()), constant_op.constant(8748., shape=(strategy.num_replicas_in_sync))) - def testiGradientOfGradientAcrossOutsideCompilation(self): + def testGradientOfGradientAcrossOutsideCompilation(self): """Tests compiled gradients of gradients can contain host computations.""" strategy = get_tpu_strategy() @@ -443,5 +452,77 @@ class TpuOutsideCompilationTest(test.TestCase, parameterized.TestCase): constant_op.constant(2916., shape=(strategy.num_replicas_in_sync))) +class OutsideCompilationOnUnsupportedOpTest(test.TestCase): + + def setUp(self): + super(OutsideCompilationOnUnsupportedOpTest, self).setUp() + config.set_soft_device_placement(True) + + def testStringOpWithManualOutsideCompilation(self): + strategy = get_tpu_strategy() + + @def_function.function + def train_step(x): + + def computation(x): + return tpu.outside_compilation(computation_with_string_ops, x) + + return strategy.run(computation, args=(x,)) + + self.assertAllEqual( + strategy.experimental_local_results(train_step(0)), + constant_op.constant(10, shape=(strategy.num_replicas_in_sync))) + + def testStringOpWithAutoOutsideCompilation(self): + strategy = get_tpu_strategy() + + @def_function.function + def train_step(x): + + def computation(x): + return computation_with_string_ops(x) + + return strategy.run(computation, args=(x,)) + + self.assertAllEqual( + strategy.experimental_local_results(train_step(0)), + constant_op.constant(10, shape=(strategy.num_replicas_in_sync))) + + def testAutoOutsideCompilationWithFunctionalNodes(self): + strategy = get_tpu_strategy() + + @def_function.function + def train_step(a, b): + + def fn(a, b): + fn1 = lambda: computation_with_string_ops(a * 100) + fn2 = lambda: computation_with_string_ops(a) + pred = math_ops.greater_equal(a, b) + result = array_ops.identity( + control_flow_ops.cond(pred, fn1, fn2), + name="uncompilable_control_flow") + return result + + return strategy.run(fn, args=(a, b)) + + self.assertAllEqual( + strategy.experimental_local_results(train_step(0.0, -1.0)), + constant_op.constant(10, shape=(strategy.num_replicas_in_sync))) + + def testRandomOpsWithAutoOutsideCompilation(self): + strategy = get_tpu_strategy() + + @def_function.function + def train_step(): + + def computation(): + return random_ops.random_normal(shape=[1, 2, 3]) + + return strategy.run(computation, args=()) + + self.assertAllEqual( + strategy.experimental_local_results(train_step())[0].shape, [1, 2, 3]) + + if __name__ == "__main__": test.main() From 19207dd8714601a9d7d21ccea0cced53f0d119cc Mon Sep 17 00:00:00 2001 From: Brian Patton Date: Mon, 27 Jul 2020 12:18:00 -0700 Subject: [PATCH 1394/2522] Add pfor strategy for stateless truncated normal. PiperOrigin-RevId: 323413961 Change-Id: I3afc1395e844c8ddb6cd4eb439db35a26fa7e6e0 --- tensorflow/python/ops/parallel_for/pfor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/ops/parallel_for/pfor.py b/tensorflow/python/ops/parallel_for/pfor.py index ef06ebe61ec..d14ad1e5cba 100644 --- a/tensorflow/python/ops/parallel_for/pfor.py +++ b/tensorflow/python/ops/parallel_for/pfor.py @@ -3064,6 +3064,7 @@ def _convert_multinomial(pfor_input): @RegisterPFor("StatelessMultinomial") +@RegisterPFor("StatelessParameterizedTruncatedNormal") @RegisterPFor("StatelessRandomBinomial") @RegisterPFor("StatelessRandomGammaV2") @RegisterPFor("StatelessRandomNormal") From 2f27a0b6b59aad5a2bc870c2cd82d945aefe5e9e Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Mon, 27 Jul 2020 12:21:27 -0700 Subject: [PATCH 1395/2522] Wait for eager collectives to finish before returning The background is that we use async executors to launch collective ops in eager, so calling the strategy.reduce() in eager no longer blocks until the collective finishes. This changes that with the following motivations: - By default the user is using a sync executor. Having strategy.reduce behave in async is inconsistent. - This may leads to issues if strategy.reduce is the last line in the program, e.g.: ``` metrics = strategy.reduce() if is_chief(): write(metrics) ``` Other workers will exit before the collective finishes, and cause a deadlock or failure. - This doesn't work with single client where collectives are all on a remote worker. Because there's no guaranteed RPC ordering between the collective op and the op that consumes the output of the collective. Note that we need to use more than one stream. PiperOrigin-RevId: 323414702 Change-Id: I4a04d194ba53769c9df7a9ce3867eadea8dc9dea --- tensorflow/python/distribute/cross_device_ops.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/python/distribute/cross_device_ops.py b/tensorflow/python/distribute/cross_device_ops.py index ed6b0558b46..f82ddf8ab79 100644 --- a/tensorflow/python/distribute/cross_device_ops.py +++ b/tensorflow/python/distribute/cross_device_ops.py @@ -1110,6 +1110,9 @@ class CollectiveAllReduce(CrossDeviceOps): control_inputs, executors=self._executors)) + for e in self._executors: + e.wait() + mirrored = [] # Reverse the order of reduced value to recover the order in the input. for value in reversed(reduced_values): From adf835dd3ba875ce3795cc37a68245a4ea360624 Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Mon, 27 Jul 2020 12:31:55 -0700 Subject: [PATCH 1396/2522] Use proto to configure tf.data service worker server. This simplifies adding new configuration properties, so that we don't need to plumb new properties through. This also gives us a single place to document all configuration options (in the .proto file). PiperOrigin-RevId: 323416829 Change-Id: I99c1a351e0e283fdcddfa2a6d8e5768ab53da21a --- tensorflow/core/data/service/BUILD | 1 + .../core/data/service/grpc_worker_impl.cc | 5 ++-- .../core/data/service/grpc_worker_impl.h | 4 +-- tensorflow/core/data/service/server_lib.cc | 30 +++++-------------- tensorflow/core/data/service/server_lib.h | 25 ++-------------- tensorflow/core/data/service/test_cluster.cc | 8 +++-- tensorflow/core/data/service/worker_impl.cc | 16 +++++----- tensorflow/core/data/service/worker_impl.h | 13 ++++---- .../data/experimental/service_config.proto | 15 ++++++++++ .../data/experimental/service/server_lib.py | 7 ++++- .../service/server_lib_wrapper.cc | 12 +++++--- 11 files changed, 65 insertions(+), 71 deletions(-) diff --git a/tensorflow/core/data/service/BUILD b/tensorflow/core/data/service/BUILD index 7a12f1079cc..2a23445a518 100644 --- a/tensorflow/core/data/service/BUILD +++ b/tensorflow/core/data/service/BUILD @@ -263,6 +263,7 @@ cc_library( deps = [ ":worker_cc_grpc_proto", ":worker_impl", + "//tensorflow/core:protos_all_cc", "//tensorflow/core/distributed_runtime/rpc:grpc_util", tf_grpc_cc_dependency(), ], diff --git a/tensorflow/core/data/service/grpc_worker_impl.cc b/tensorflow/core/data/service/grpc_worker_impl.cc index 0cddfce4e0b..c76e1062753 100644 --- a/tensorflow/core/data/service/grpc_worker_impl.cc +++ b/tensorflow/core/data/service/grpc_worker_impl.cc @@ -26,9 +26,8 @@ using ::grpc::ServerContext; using ::grpc::Status; GrpcWorkerImpl::GrpcWorkerImpl(ServerBuilder* server_builder, - const std::string& dispatcher_address, - const std::string& protocol) - : impl_(dispatcher_address, protocol) { + const experimental::WorkerConfig& config) + : impl_(config) { server_builder->RegisterService(this); VLOG(1) << "Registered data service worker"; } diff --git a/tensorflow/core/data/service/grpc_worker_impl.h b/tensorflow/core/data/service/grpc_worker_impl.h index 169ae29ea37..b0881143a57 100644 --- a/tensorflow/core/data/service/grpc_worker_impl.h +++ b/tensorflow/core/data/service/grpc_worker_impl.h @@ -19,6 +19,7 @@ limitations under the License. #include "grpcpp/server_builder.h" #include "tensorflow/core/data/service/worker.grpc.pb.h" #include "tensorflow/core/data/service/worker_impl.h" +#include "tensorflow/core/protobuf/data/experimental/service_config.pb.h" namespace tensorflow { namespace data { @@ -35,8 +36,7 @@ namespace data { class GrpcWorkerImpl : public WorkerService::Service { public: explicit GrpcWorkerImpl(grpc::ServerBuilder* server_builder, - const std::string& dispatcher_address, - const std::string& protocol); + const experimental::WorkerConfig& config); ~GrpcWorkerImpl() override {} void Start(const std::string& worker_address); diff --git a/tensorflow/core/data/service/server_lib.cc b/tensorflow/core/data/service/server_lib.cc index 6d912b1c802..648a189717e 100644 --- a/tensorflow/core/data/service/server_lib.cc +++ b/tensorflow/core/data/service/server_lib.cc @@ -79,8 +79,7 @@ DispatchGrpcDataServer::DispatchGrpcDataServer( DispatchGrpcDataServer::~DispatchGrpcDataServer() { delete service_; } void DispatchGrpcDataServer::AddServiceToBuilder(grpc::ServerBuilder* builder) { - auto service = absl::make_unique(builder, config_); - service_ = service.release(); + service_ = absl::make_unique(builder, config_).release(); } Status DispatchGrpcDataServer::NumWorkers(int* num_workers) { @@ -96,22 +95,17 @@ Status DispatchGrpcDataServer::NumWorkers(int* num_workers) { } WorkerGrpcDataServer::WorkerGrpcDataServer( - int port, const std::string& protocol, - const std::string& dispatcher_address, const std::string& worker_address) - : GrpcDataServerBase(port, protocol), - dispatcher_address_(dispatcher_address), - worker_address_(worker_address) {} + const experimental::WorkerConfig& config) + : GrpcDataServerBase(config.port(), config.protocol()), config_(config) {} WorkerGrpcDataServer::~WorkerGrpcDataServer() { delete service_; } void WorkerGrpcDataServer::AddServiceToBuilder(grpc::ServerBuilder* builder) { - auto service = absl::make_unique(builder, dispatcher_address_, - protocol_); - service_ = service.release(); + service_ = absl::make_unique(builder, config_).release(); } Status WorkerGrpcDataServer::StartServiceInternal() { - std::string worker_address = worker_address_; + std::string worker_address = config_.worker_address(); if (worker_address.empty()) { worker_address = absl::StrCat("localhost:", kPortPlaceholder); } @@ -128,19 +122,9 @@ Status NewDispatchServer(const experimental::DispatcherConfig& config, return Status::OK(); } -Status NewWorkerServer(int port, const std::string& protocol, - const std::string& dispatcher_address, +Status NewWorkerServer(const experimental::WorkerConfig& config, std::unique_ptr* out_server) { - return NewWorkerServer(port, protocol, dispatcher_address, - /*worker_address=*/"", out_server); -} - -Status NewWorkerServer(int port, const std::string& protocol, - const std::string& dispatcher_address, - const std::string& worker_address, - std::unique_ptr* out_server) { - *out_server = absl::make_unique( - port, protocol, dispatcher_address, worker_address); + *out_server = absl::make_unique(config); return Status::OK(); } diff --git a/tensorflow/core/data/service/server_lib.h b/tensorflow/core/data/service/server_lib.h index d147f47c5e4..365241753fb 100644 --- a/tensorflow/core/data/service/server_lib.h +++ b/tensorflow/core/data/service/server_lib.h @@ -91,9 +91,7 @@ class DispatchGrpcDataServer : public GrpcDataServerBase { class WorkerGrpcDataServer : public GrpcDataServerBase { public: - WorkerGrpcDataServer(int requested_port, const std::string& protocol, - const std::string& dispatcher_address, - const std::string& worker_address); + explicit WorkerGrpcDataServer(const experimental::WorkerConfig& config); ~WorkerGrpcDataServer() override; protected: @@ -101,8 +99,7 @@ class WorkerGrpcDataServer : public GrpcDataServerBase { Status StartServiceInternal() override; private: - const std::string dispatcher_address_; - const std::string worker_address_; + const experimental::WorkerConfig config_; // Owned. We use a raw pointer because GrpcWorkerImpl is forward-declared. GrpcWorkerImpl* service_; }; @@ -112,23 +109,7 @@ Status NewDispatchServer(const experimental::DispatcherConfig& config, std::unique_ptr* out_server); // Creates a worker tf.data server and stores it in `*out_server`. -// -// The port can be a specific port or 0. If the port is 0, an available port -// will be chosen in Start(). This value can be queried with BoundPort(). -// -// The worker_address argument is optional. If left empty, it will default to -// "localhost:%port%". When the worker registers with the dispatcher, the worker -// will report the worker address, so that the dispatcher can tell clients where -// to read from. The address may contain the placeholder "%port%", which will be -// replaced with the value of BoundPort(). -Status NewWorkerServer(int port, const std::string& protocol, - const std::string& dispatcher_address, - const std::string& worker_address, - std::unique_ptr* out_server); - -// Creates a worker using the default worker_address. -Status NewWorkerServer(int port, const std::string& protocol, - const std::string& dispatcher_address, +Status NewWorkerServer(const experimental::WorkerConfig& config, std::unique_ptr* out_server); } // namespace data diff --git a/tensorflow/core/data/service/test_cluster.cc b/tensorflow/core/data/service/test_cluster.cc index ad0d2be87d8..8ae3f191407 100644 --- a/tensorflow/core/data/service/test_cluster.cc +++ b/tensorflow/core/data/service/test_cluster.cc @@ -62,8 +62,12 @@ Status TestCluster::Initialize() { Status TestCluster::AddWorker() { std::unique_ptr worker; - TF_RETURN_IF_ERROR( - NewWorkerServer(/*port=*/0, kProtocol, dispatcher_address_, &worker)); + experimental::WorkerConfig config; + config.set_port(0); + config.set_protocol(kProtocol); + config.set_dispatcher_address(dispatcher_address_); + config.set_worker_address("localhost:%port%"); + TF_RETURN_IF_ERROR(NewWorkerServer(config, &worker)); TF_RETURN_IF_ERROR(worker->Start()); worker_addresses_.push_back(absl::StrCat("localhost:", worker->BoundPort())); workers_.push_back(std::move(worker)); diff --git a/tensorflow/core/data/service/worker_impl.cc b/tensorflow/core/data/service/worker_impl.cc index 00659e1d048..39508b1eab0 100644 --- a/tensorflow/core/data/service/worker_impl.cc +++ b/tensorflow/core/data/service/worker_impl.cc @@ -46,8 +46,8 @@ auto* tf_data_service_created = } // namespace DataServiceWorkerImpl::DataServiceWorkerImpl( - const std::string& dispatcher_address, const std::string& protocol) - : dispatcher_address_(dispatcher_address), protocol_(protocol) { + const experimental::WorkerConfig& config) + : config_(config) { tf_data_service_created->GetCell()->Set(true); } @@ -68,7 +68,7 @@ void DataServiceWorkerImpl::Start(const std::string& worker_address) { Status s = Register(); while (!s.ok()) { LOG(WARNING) << "Failed to register with dispatcher at " - << dispatcher_address_ << ": " << s; + << config_.dispatcher_address() << ": " << s; Env::Default()->SleepForMicroseconds(kHeartbeatIntervalMicros); s = Register(); } @@ -173,17 +173,17 @@ Status DataServiceWorkerImpl::EnsureDispatcherStubInitialized() if (!dispatcher_stub_) { ::grpc::ChannelArguments args; std::shared_ptr<::grpc::ChannelCredentials> credentials; - TF_RETURN_IF_ERROR( - CredentialsFactory::CreateClientCredentials(protocol_, &credentials)); - auto channel = - ::grpc::CreateCustomChannel(dispatcher_address_, credentials, args); + TF_RETURN_IF_ERROR(CredentialsFactory::CreateClientCredentials( + config_.protocol(), &credentials)); + auto channel = ::grpc::CreateCustomChannel(config_.dispatcher_address(), + credentials, args); dispatcher_stub_ = DispatcherService::NewStub(channel); } return Status::OK(); } Status DataServiceWorkerImpl::Register() EXCLUSIVE_LOCKS_REQUIRED(mu_) { - VLOG(3) << "Registering with dispatcher at " << dispatcher_address_; + VLOG(3) << "Registering with dispatcher at " << config_.dispatcher_address(); TF_RETURN_IF_ERROR(EnsureDispatcherStubInitialized()); RegisterWorkerRequest req; req.set_worker_address(worker_address_); diff --git a/tensorflow/core/data/service/worker_impl.h b/tensorflow/core/data/service/worker_impl.h index adb3e97bbea..6961312ee34 100644 --- a/tensorflow/core/data/service/worker_impl.h +++ b/tensorflow/core/data/service/worker_impl.h @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/data/service/worker.pb.h" #include "tensorflow/core/data/standalone.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/protobuf/data/experimental/service_config.pb.h" #include "tensorflow/core/public/session.h" namespace tensorflow { @@ -29,12 +30,14 @@ namespace data { // A TensorFlow DataService serves dataset elements over RPC. class DataServiceWorkerImpl { public: - explicit DataServiceWorkerImpl(const std::string& dispatcher_address, - const std::string& protocol); + explicit DataServiceWorkerImpl(const experimental::WorkerConfig& config); ~DataServiceWorkerImpl(); // Starts the worker. The worker needs to know its own address so that it can - // register with the dispatcher. + // register with the dispatcher. This is set in `Start` instead of in the + // constructor because the worker may be binding to port `0`, in which case + // the address isn't known until the worker has started and decided which port + // to bind to. void Start(const std::string& worker_address); // See worker.proto for API documentation. @@ -67,9 +70,7 @@ class DataServiceWorkerImpl { std::unique_ptr iterator; } Task; - const std::string dispatcher_address_; - // Protocol for communicating with the dispatcher. - const std::string protocol_; + const experimental::WorkerConfig config_; // The worker's own address. std::string worker_address_; diff --git a/tensorflow/core/protobuf/data/experimental/service_config.proto b/tensorflow/core/protobuf/data/experimental/service_config.proto index 5dcc3c69083..8708b923720 100644 --- a/tensorflow/core/protobuf/data/experimental/service_config.proto +++ b/tensorflow/core/protobuf/data/experimental/service_config.proto @@ -10,3 +10,18 @@ message DispatcherConfig { // The protocol for the dispatcher to use when connecting to workers. string protocol = 2; } + +// Configuration for a tf.data service WorkerServer. +message WorkerConfig { + // The port for the worker to bind to. A value of 0 indicates that the + // worker may bind to any available port. + int64 port = 1; + // The protocol for the worker to use when connecting to the dispatcher. + string protocol = 2; + // The address of the dispatcher to register with. + string dispatcher_address = 3; + // The address of the worker server. The substring "%port%", if specified, + // will be replaced with the worker's bound port. This is useful when the port + // is set to `0`. + string worker_address = 4; +} diff --git a/tensorflow/python/data/experimental/service/server_lib.py b/tensorflow/python/data/experimental/service/server_lib.py index 3e355565308..99dc9297901 100644 --- a/tensorflow/python/data/experimental/service/server_lib.py +++ b/tensorflow/python/data/experimental/service/server_lib.py @@ -205,8 +205,13 @@ class WorkerServer(object): protocol = "grpc" self._protocol = protocol + config = service_config_pb2.WorkerConfig( + port=port, + protocol=protocol, + dispatcher_address=dispatcher_address, + worker_address=worker_address) self._server = _pywrap_server_lib.TF_DATA_NewWorkerServer( - port, protocol, dispatcher_address, worker_address) + config.SerializeToString()) if start: self._server.start() diff --git a/tensorflow/python/data/experimental/service/server_lib_wrapper.cc b/tensorflow/python/data/experimental/service/server_lib_wrapper.cc index b8250aaeda6..f59c1fb90bf 100644 --- a/tensorflow/python/data/experimental/service/server_lib_wrapper.cc +++ b/tensorflow/python/data/experimental/service/server_lib_wrapper.cc @@ -69,12 +69,16 @@ PYBIND11_MODULE(_pywrap_server_lib, m) { m.def( "TF_DATA_NewWorkerServer", - [](int port, std::string protocol, std::string dispatcher_address, - std::string worker_address) + [](std::string serialized_worker_config) -> std::unique_ptr { + tensorflow::data::experimental::WorkerConfig config; + if (!config.ParseFromString(serialized_worker_config)) { + tensorflow::MaybeRaiseFromStatus(tensorflow::errors::InvalidArgument( + "Failed to deserialize worker config.")); + } std::unique_ptr server; - tensorflow::Status status = tensorflow::data::NewWorkerServer( - port, protocol, dispatcher_address, worker_address, &server); + tensorflow::Status status = + tensorflow::data::NewWorkerServer(config, &server); tensorflow::MaybeRaiseFromStatus(status); return server; }, From c88b0e463bacf4da5d61f5b1e604ba7df445b309 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 12:45:46 -0700 Subject: [PATCH 1397/2522] track total time besides per pass time for KPI consumption. PiperOrigin-RevId: 323419746 Change-Id: Id0dc6f0af3390f5461622c48ff34d6eec23b3135 --- .../core/common_runtime/optimization_registry.cc | 9 ++++++--- tensorflow/core/grappler/optimizers/meta_optimizer.cc | 11 +++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/common_runtime/optimization_registry.cc b/tensorflow/core/common_runtime/optimization_registry.cc index cfaeb05d66f..23e6959ab24 100644 --- a/tensorflow/core/common_runtime/optimization_registry.cc +++ b/tensorflow/core/common_runtime/optimization_registry.cc @@ -36,15 +36,16 @@ Status OptimizationPassRegistry::RunGrouping( Grouping grouping, const GraphOptimizationPassOptions& options) { auto group = groups_.find(grouping); if (group != groups_.end()) { + const uint64 start_us = Env::Default()->NowMicros(); for (auto& phase : group->second) { VLOG(1) << "Running optimization phase " << phase.first; for (auto& pass : phase.second) { VLOG(1) << "Running optimization pass: " << pass->name(); - const uint64 start_us = Env::Default()->NowMicros(); + const uint64 pass_start_us = Env::Default()->NowMicros(); Status s = pass->Run(options); - const uint64 end_us = Env::Default()->NowMicros(); + const uint64 pass_end_us = Env::Default()->NowMicros(); metrics::UpdateGraphOptimizationPassTime(pass->name(), - end_us - start_us); + pass_end_us - pass_start_us); if (!s.ok()) return s; if (VLOG_IS_ON(1)) { if (options.graph) { @@ -67,6 +68,8 @@ Status OptimizationPassRegistry::RunGrouping( } } } + const uint64 end_us = Env::Default()->NowMicros(); + metrics::UpdateGraphOptimizationPassTime("*", end_us - start_us); } return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 2f1c869965d..a82ee3dbb87 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -401,6 +401,8 @@ Status MetaOptimizer::OptimizeGraph(Cluster* cluster, GrapplerItem&& item, return Status::OK(); } + const uint64 start_us = Env::Default()->NowMicros(); + std::vector> optimizers; if (cfg_.optimizers().empty()) { TF_RETURN_IF_ERROR(InitializeOptimizers(&optimizers)); @@ -530,6 +532,9 @@ Status MetaOptimizer::OptimizeGraph(Cluster* cluster, GrapplerItem&& item, DCHECK_EQ(optimized_graph->versions().producer(), original_producer); } + const uint64 end_us = Env::Default()->NowMicros(); + metrics::UpdateGrapplerPassTime("OptimizeMainGraph", end_us - start_us); + return Status::OK(); } @@ -602,6 +607,8 @@ Status MetaOptimizer::RunOptimizer( Status MetaOptimizer::OptimizeConsumeItem(Cluster* cluster, GrapplerItem&& item, GraphDef* optimized_graph) { + const uint64 start_us = Env::Default()->NowMicros(); + VLOG(1) << "Starting optimization for grappler item: " << item.id; optimization_results_.clear(); @@ -808,6 +815,10 @@ Status MetaOptimizer::OptimizeConsumeItem(Cluster* cluster, GrapplerItem&& item, reinterpret_cast(optimized_graph)), *optimized_graph); } + + const uint64 end_us = Env::Default()->NowMicros(); + metrics::UpdateGrapplerPassTime("*", end_us - start_us); + return Status::OK(); } From d21a236a62d5539a1b72c89edbaa0304fe13a49b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 12:47:52 -0700 Subject: [PATCH 1398/2522] Removed initial accumulator value fields since they are unused. PiperOrigin-RevId: 323420206 Change-Id: Ib1fb128d20485a689042eda8b360811334a2643e --- .../tpu/optimization_parameters.proto | 52 +++++++++++++------ tensorflow/python/tpu/tpu_embedding.py | 4 -- 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/tensorflow/core/protobuf/tpu/optimization_parameters.proto b/tensorflow/core/protobuf/tpu/optimization_parameters.proto index 0425762164f..1699a26abfd 100644 --- a/tensorflow/core/protobuf/tpu/optimization_parameters.proto +++ b/tensorflow/core/protobuf/tpu/optimization_parameters.proto @@ -57,7 +57,9 @@ message LearningRate { // https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adagrad // https://github.com/tensorflow/tensorflow/blob/6b6471f3ffb7f1fefe42d814aa5fb9ab7a535b58/tensorflow/core/kernels/training_ops.cc#L1634 message AdagradParameters { - float initial_accumulator = 1; + // Old initial accumulator parameter. + reserved "initial_accumulator"; + reserved 1; } // Algorithm in http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf. @@ -84,9 +86,11 @@ message FtrlParameters { float l1 = 1; float l2 = 2; float lr_power = 3; - float initial_accum = 4; - float initial_linear = 5; bool multiply_linear_by_lr = 6; + + // Old initial accumulator parameters. + reserved "initial_accum", "initial_linear"; + reserved 4, 5; } // The Adam optimizer does not implement hyper-parameter update; use the dynamic @@ -113,10 +117,12 @@ message AdamParameters { float beta1 = 3; float beta2 = 4; float epsilon = 5; - float initial_m = 6; - float initial_v = 7; bool use_non_lazy_adam = 8; bool use_sum_inside_sqrt = 10; + + // Old initial accumulator parameters. + reserved "initial_m", "initial_v"; + reserved 6, 7; } // https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/SGD @@ -124,7 +130,10 @@ message AdamParameters { message MomentumParameters { float momentum = 1; bool use_nesterov = 2; - float initial_accum = 3; + + // Old initial accumulator parameter. + reserved "initial_accum"; + reserved 3; } // https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/RMSprop @@ -133,8 +142,10 @@ message RmsPropParameters { float rho = 1; float momentum = 2; float epsilon = 3; - float initial_ms = 4; - float initial_mom = 5; + + // Old initial accumulator parameters. + reserved "initial_ms", "initial_mom"; + reserved 4, 5; } // https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/RMSprop @@ -143,9 +154,10 @@ message CenteredRmsPropParameters { float rho = 1; float momentum = 2; float epsilon = 3; - float initial_ms = 4; - float initial_mom = 5; - float initial_mg = 6; + + // Old initial accumulator parameters. + reserved "initial_ms", "initial_mom", "initial_mg"; + reserved 4, 5, 6; } // Variant of algorithm in http://proceedings.mlr.press/v44/shamir15.pdf @@ -162,9 +174,10 @@ message MdlAdagradLightParameters { float mdl_hard_limit = 10; bool hard_limit_min_benefit = 11; bool mdl_regularize = 12; - float initial_accumulator = 13; - float initial_weight = 14; - float initial_benefit = 15; + + // Old initial accumulator parameters. + reserved "initial_accumulator", "initial_weight", "initial_benefit"; + reserved 13, 14, 15; } // https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adadelta @@ -172,8 +185,10 @@ message MdlAdagradLightParameters { message AdadeltaParameters { float rho = 1; float epsilon = 2; - float initial_accumulator = 3; - float initial_update = 4; + + // Old initial accumulator parameters. + reserved "initial_accumulator", "initial_update"; + reserved 3, 4; } // https://www.tensorflow.org/api_docs/python/tf/compat/v1/train/ProximalAdagradOptimizer @@ -181,7 +196,10 @@ message AdadeltaParameters { message ProximalAdagradParameters { float l1 = 1; float l2 = 2; - float initial_accumulator = 3; + + // Old initial accumulator parameter. + reserved "initial_accumulator"; + reserved 3; } // The online Yogi optimizer does not implement hyper-parameter update; use the diff --git a/tensorflow/python/tpu/tpu_embedding.py b/tensorflow/python/tpu/tpu_embedding.py index d1848f34502..13afe1a2147 100644 --- a/tensorflow/python/tpu/tpu_embedding.py +++ b/tensorflow/python/tpu/tpu_embedding.py @@ -1896,10 +1896,6 @@ class _FtrlHandler(_OptimizerHandler): self._optimization_parameters.l1_regularization_strength) table_descriptor.optimization_parameters.ftrl.l2 = ( self._optimization_parameters.l2_regularization_strength) - table_descriptor.optimization_parameters.ftrl.initial_accum = ( - self._optimization_parameters.initial_accumulator_value) - table_descriptor.optimization_parameters.ftrl.initial_linear = ( - self._optimization_parameters.initial_linear_value) def get_default_slot_variable_names(self, table): # These match the default slot variable names created by From c7a0ed1edc3b44971751d454bbc13ccc0bdb2765 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 27 Jul 2020 12:49:58 -0700 Subject: [PATCH 1399/2522] Allow running tests with FILECHECK_OPTS=-enable-var-scope PiperOrigin-RevId: 323420636 Change-Id: Iccb76bd1536b2c44b29834edc77dc8bf0c9aa987 --- tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-lhlo.mlir | 4 ++-- .../compiler/mlir/hlo/tests/hlo-legalize-to-linalg.mlir | 2 +- .../compiler/mlir/hlo/tests/lhlo-legalize-to-linalg.mlir | 2 +- .../mlir/hlo/tests/lhlo-legalize-to-parallel-loops.mlir | 2 +- tensorflow/compiler/mlir/hlo/tests/unfuse_batch_norm.mlir | 2 +- tensorflow/compiler/mlir/lite/tests/const-fold.mlir | 2 +- .../mlir/lite/tests/prepare-composite-functions-tf.mlir | 2 +- tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir | 2 +- .../mlir/tensorflow/tests/parallel_execute_to_islands.mlir | 2 +- .../mlir/tensorflow/tests/promote_resources_to_args.mlir | 2 +- .../compiler/mlir/tensorflow/tests/resource_op_lifting.mlir | 2 +- .../tensorflow/tests/tpu_extract_outside_compilation.mlir | 2 +- tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir | 2 +- .../compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/ops.mlir | 2 +- .../mlir/xla/tests/hlo_to_lhlo_with_xla/passthrough.mlir | 2 +- tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir | 4 ++-- tensorflow/compiler/mlir/xla/tests/translate/case.mlir | 2 +- tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt | 2 +- 18 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-lhlo.mlir b/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-lhlo.mlir index 789ab82761a..018711e33cb 100644 --- a/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-lhlo.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-lhlo.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-hlo-opt -hlo-legalize-to-lhlo -buffer-placement -split-input-file %s -o - | FileCheck --check-prefixes=PRE,BOTH %s -// RUN: mlir-hlo-opt -hlo-legalize-to-lhlo=results-escape-function=true -buffer-placement -split-input-file %s -o - | FileCheck --check-prefixes=ESC,BOTH %s +// RUN: mlir-hlo-opt -hlo-legalize-to-lhlo -buffer-placement -split-input-file %s -o - | FILECHECK_OPTS="" FileCheck --check-prefixes=PRE,BOTH %s +// RUN: mlir-hlo-opt -hlo-legalize-to-lhlo=results-escape-function=true -buffer-placement -split-input-file %s -o - | FILECHECK_OPTS="" FileCheck --check-prefixes=ESC,BOTH %s // BOTH-LABEL: func @attrs func @attrs_copy(%operand: memref<2x2xf32>, %result: memref<2x2xf32>) { diff --git a/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-linalg.mlir b/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-linalg.mlir index db06708bce7..46725e0bd09 100644 --- a/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-linalg.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-linalg.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-hlo-opt %s -hlo-legalize-to-linalg -split-input-file | FileCheck %s +// RUN: mlir-hlo-opt %s -hlo-legalize-to-linalg -split-input-file | FILECHECK_OPTS="" FileCheck %s // CHECK: #map0 = affine_map<(d0, d1) -> (d0, d1)> // CHECK-LABEL: func @float_add diff --git a/tensorflow/compiler/mlir/hlo/tests/lhlo-legalize-to-linalg.mlir b/tensorflow/compiler/mlir/hlo/tests/lhlo-legalize-to-linalg.mlir index dd88e5c80bf..768d8da22bd 100644 --- a/tensorflow/compiler/mlir/hlo/tests/lhlo-legalize-to-linalg.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/lhlo-legalize-to-linalg.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-hlo-opt %s -lhlo-legalize-to-linalg -split-input-file | FileCheck %s +// RUN: mlir-hlo-opt %s -lhlo-legalize-to-linalg -split-input-file | FILECHECK_OPTS="" FileCheck %s // CHECK: #map0 = affine_map<(d0, d1) -> (d0, d1)> // CHECK-LABEL: func @element_wise diff --git a/tensorflow/compiler/mlir/hlo/tests/lhlo-legalize-to-parallel-loops.mlir b/tensorflow/compiler/mlir/hlo/tests/lhlo-legalize-to-parallel-loops.mlir index 1530f59317d..47ef99bcac0 100644 --- a/tensorflow/compiler/mlir/hlo/tests/lhlo-legalize-to-parallel-loops.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/lhlo-legalize-to-parallel-loops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-hlo-opt %s -lhlo-legalize-to-parallel-loops -canonicalize -split-input-file | FileCheck %s +// RUN: mlir-hlo-opt %s -lhlo-legalize-to-parallel-loops -canonicalize -split-input-file | FILECHECK_OPTS="" FileCheck %s func @reduce(%arg: memref<100x10x5xf32>, %init: memref, diff --git a/tensorflow/compiler/mlir/hlo/tests/unfuse_batch_norm.mlir b/tensorflow/compiler/mlir/hlo/tests/unfuse_batch_norm.mlir index c1930721218..f903dbb7080 100644 --- a/tensorflow/compiler/mlir/hlo/tests/unfuse_batch_norm.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/unfuse_batch_norm.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-hlo-opt -split-input-file -mhlo-test-unfuse-batch-norm -verify-diagnostics %s | FileCheck --enable-var-scope %s +// RUN: mlir-hlo-opt -split-input-file -mhlo-test-unfuse-batch-norm -verify-diagnostics %s | FILECHECK_OPTS="" FileCheck --enable-var-scope %s // CHECK-LABEL: @batchNormInference_2D_inner_features // CHECK-SAME: %[[X:[^:[:space:]]+]] diff --git a/tensorflow/compiler/mlir/lite/tests/const-fold.mlir b/tensorflow/compiler/mlir/lite/tests/const-fold.mlir index b37fdb9aa7b..ff7c47fb621 100644 --- a/tensorflow/compiler/mlir/lite/tests/const-fold.mlir +++ b/tensorflow/compiler/mlir/lite/tests/const-fold.mlir @@ -1,4 +1,4 @@ -// RUN: tf-opt %s -canonicalize | FileCheck %s +// RUN: tf-opt %s -canonicalize | FILECHECK_OPTS="" FileCheck %s // CHECK-LABEL: @add_float func @add_float() -> (tensor, tensor<4xf32>, tensor<4xf32>, tensor<4xf32>, tensor<4xf32>) { diff --git a/tensorflow/compiler/mlir/lite/tests/prepare-composite-functions-tf.mlir b/tensorflow/compiler/mlir/lite/tests/prepare-composite-functions-tf.mlir index a596595b2eb..6847cdd5874 100644 --- a/tensorflow/compiler/mlir/lite/tests/prepare-composite-functions-tf.mlir +++ b/tensorflow/compiler/mlir/lite/tests/prepare-composite-functions-tf.mlir @@ -1,4 +1,4 @@ -// RUN: tf-opt -tfl-prepare-composite-funcs-tf %s -split-input-file -verify-diagnostics | FileCheck %s +// RUN: tf-opt -tfl-prepare-composite-funcs-tf %s -split-input-file -verify-diagnostics | FILECHECK_OPTS="" FileCheck %s module{ func @embedding(%arg0: tensor<*xf32>, %arg1: tensor<*xi32>) -> tensor<*xf32> attributes {tf._implements = "embedding_matmul", tf._reference = "mlir"} { diff --git a/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir b/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir index 78e10fa797f..e7e07845fcc 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir @@ -1,4 +1,4 @@ -// RUN: tf-opt %s -test-tf-lower-tf | FileCheck %s +// RUN: tf-opt %s -test-tf-lower-tf | FILECHECK_OPTS="" FileCheck %s // CHECK-LABEL: invert_permutation func @invert_permutation(%arg0: tensor<5xi32>) -> tensor<5xi32> { diff --git a/tensorflow/compiler/mlir/tensorflow/tests/parallel_execute_to_islands.mlir b/tensorflow/compiler/mlir/tensorflow/tests/parallel_execute_to_islands.mlir index 99e029d52c2..52dc06cd393 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/parallel_execute_to_islands.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/parallel_execute_to_islands.mlir @@ -1,4 +1,4 @@ -// RUN: tf-opt %s -tf-parallel-execute-to-islands | FileCheck %s +// RUN: tf-opt %s -tf-parallel-execute-to-islands | FILECHECK_OPTS="" FileCheck %s // CHECK-LABEL: func @check_regions_to_islands func @check_regions_to_islands() { diff --git a/tensorflow/compiler/mlir/tensorflow/tests/promote_resources_to_args.mlir b/tensorflow/compiler/mlir/tensorflow/tests/promote_resources_to_args.mlir index 40cfc03b8e6..3e6d4f37bac 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/promote_resources_to_args.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/promote_resources_to_args.mlir @@ -1,4 +1,4 @@ -// RUN: tf-opt %s -split-input-file -verify-diagnostics -tf-promote-resources-to-args | FileCheck %s +// RUN: tf-opt %s -split-input-file -verify-diagnostics -tf-promote-resources-to-args | FILECHECK_OPTS="" FileCheck %s // One resource, one read. The initial value of the resource is read. // CHECK-LABEL: func @main(%arg0: tensor, %arg1: tensor {tf.resource_name = "x"}) -> tensor<2xf32> diff --git a/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir b/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir index d8a87b9bdb4..ac5c2df8f7e 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir @@ -1,4 +1,4 @@ -// RUN: tf-opt %s -split-input-file -verify-diagnostics -tf-resource-op-lifting | FileCheck %s +// RUN: tf-opt %s -split-input-file -verify-diagnostics -tf-resource-op-lifting | FILECHECK_OPTS="" FileCheck %s // Tests that resource load operations are hoisted. diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_outside_compilation.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_outside_compilation.mlir index 208146a1226..7cabd0ea61a 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_outside_compilation.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_outside_compilation.mlir @@ -1,4 +1,4 @@ -// RUN: tf-opt %s -split-input-file -verify-diagnostics -tf-tpu-extract-outside-compilation | FileCheck %s +// RUN: tf-opt %s -split-input-file -verify-diagnostics -tf-tpu-extract-outside-compilation | FILECHECK_OPTS="" FileCheck %s // Tests that missing `_xla_outside_compilation` attribute value results in an error. diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir index fa70ca85419..14bffb0f4d4 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir @@ -1,4 +1,4 @@ -// RUN: tf-opt %s -split-input-file -verify-diagnostics -tf-tpu-rewrite -tpu_compile_metadata_debug | FileCheck %s +// RUN: tf-opt %s -split-input-file -verify-diagnostics -tf-tpu-rewrite -tpu_compile_metadata_debug | FILECHECK_OPTS="" FileCheck %s // Tests module with missing `tf.versions` attribute. diff --git a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/ops.mlir b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/ops.mlir index 09a85177fae..f8213897c28 100644 --- a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/ops.mlir +++ b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/ops.mlir @@ -1,4 +1,4 @@ -// RUN: xla-opt -split-input-file -xla-hlo-to-lhlo-with-xla %s | FileCheck --enable-var-scope %s +// RUN: xla-opt -split-input-file -xla-hlo-to-lhlo-with-xla %s | FILECHECK_OPTS="" FileCheck --enable-var-scope %s // CHECK-LABEL: func @main // CHECK-SAME: %[[ARG0:.*]]: memref<2x2xf32> {lmhlo.params = 0 diff --git a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/passthrough.mlir b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/passthrough.mlir index cc07624d63d..f39fc4f29fe 100644 --- a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/passthrough.mlir +++ b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/passthrough.mlir @@ -1,4 +1,4 @@ -// RUN: xla-opt -xla-hlo-to-lhlo-with-xla %s | FileCheck --enable-var-scope %s +// RUN: xla-opt -xla-hlo-to-lhlo-with-xla %s | FILECHECK_OPTS="" FileCheck --enable-var-scope %s // Current allocation will lead to one buffer argument for the "value" and // another one for the output, an no returned values. diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir index 95acf5d8875..b09ccf025b0 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir @@ -1,7 +1,7 @@ -// RUN: tf-opt "-xla-legalize-tf=allow-partial-conversion legalize-chlo=false" %s | FileCheck %s +// RUN: tf-opt "-xla-legalize-tf=allow-partial-conversion legalize-chlo=false" %s | FILECHECK_OPTS="" FileCheck %s // RUN: tf-opt "-xla-legalize-tf=allow-partial-conversion legalize-chlo=true" -verify-diagnostics %s // This test runs twice: -// 1. Through FileCheck with chlo legalization disabled since verifying +// 1. Through FILECHECK_OPTS="" FileCheck with chlo legalization disabled since verifying // that the chlo ops emit produces more useful tests. // 2. With chlo legalization enabled, verifying diagnostics to pick up any // issues with the full lowering (can catch some broadcasting corner diff --git a/tensorflow/compiler/mlir/xla/tests/translate/case.mlir b/tensorflow/compiler/mlir/xla/tests/translate/case.mlir index 57959568287..1032bb723c5 100644 --- a/tensorflow/compiler/mlir/xla/tests/translate/case.mlir +++ b/tensorflow/compiler/mlir/xla/tests/translate/case.mlir @@ -1,4 +1,4 @@ -// RUN: tf-mlir-translate -split-input-file -mlir-hlo-to-hlo-text %s | FileCheck %s +// RUN: tf-mlir-translate -split-input-file -mlir-hlo-to-hlo-text %s | FILECHECK_OPTS="" FileCheck %s func @main() -> tensor { %cst = constant {name = "constant"} dense<1> : tensor diff --git a/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt b/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt index 2b7d44f4522..d89b1fa44e1 100644 --- a/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt +++ b/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt @@ -1,4 +1,4 @@ -// RUN: tf-mlir-translate -hlo-text-to-mlir-hlo %s -o - | FileCheck %s -DPRIVATE="attributes {sym_visibility = \"private\"}" +// RUN: tf-mlir-translate -hlo-text-to-mlir-hlo %s -o - | FILECHECK_OPTS="" FileCheck %s -DPRIVATE="attributes {sym_visibility = \"private\"}" HloModule main From f413da875ce3e599cd299379dc4d7ad282e03dcc Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Mon, 27 Jul 2020 12:50:29 -0700 Subject: [PATCH 1400/2522] Abort collectives immediately after timeout instead of waiting for a second. This is to avoid crashes in two corner cases: - We now aborts the ParamResolver, which is not owned by BaseCollectiveExecutor, but the CollectiveExecutorMgr which is owned by EagerContext/Session. It's possible that it's gone when the abortion happens. - If the collective finishes before the aborting happens, i.e. within one second after the timeout, some callbacks in the stack will be called with OK and may access OP related data structures. Those structures are already gone after the timeout. PiperOrigin-RevId: 323420766 Change-Id: I8e10dc5a30d22dc0d88268bd28751e93e9108dd1 --- .../base_collective_executor.cc | 11 +--- tensorflow/python/ops/collective_ops_test.py | 62 +++++++++++++++++-- 2 files changed, 60 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/common_runtime/base_collective_executor.cc b/tensorflow/core/common_runtime/base_collective_executor.cc index 85d911da24f..cd46ab76b62 100644 --- a/tensorflow/core/common_runtime/base_collective_executor.cc +++ b/tensorflow/core/common_runtime/base_collective_executor.cc @@ -226,19 +226,12 @@ void BaseCollectiveExecutor::ExecuteAsync(OpKernelContext* ctx, // On any individual collective Op failure we need to abort the // BufRendezvous so that other Ops in the instance don't hang - // waiting for transmissions that will never happen. Do so after a - // delay so that the original error status is more likely to - // propagate up, and peers are unlikely to re-create the purged - // BufRendezvous by late-arriving requests. + // waiting for transmissions that will never happen. StatusCallback done_safe = [this, done, is_callback_called](const Status& s) { auto should_call_callback = !is_callback_called->exchange(true); if (should_call_callback) { if (!s.ok()) { - Ref(); // Ensure this lasts until the closure executes. - SchedNonBlockingClosureAfter(1000000, [this, s] { - remote_access_->buf_rendezvous()->StartAbort(s); - Unref(); - }); + remote_access_->buf_rendezvous()->StartAbort(s); } done(s); } diff --git a/tensorflow/python/ops/collective_ops_test.py b/tensorflow/python/ops/collective_ops_test.py index 6ddcdb3f31e..fd93da34847 100644 --- a/tensorflow/python/ops/collective_ops_test.py +++ b/tensorflow/python/ops/collective_ops_test.py @@ -18,10 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os import threading import time -import unittest from tensorflow.core.protobuf import config_pb2 from tensorflow.core.protobuf import rewriter_config_pb2 @@ -256,6 +254,64 @@ class CollectiveOpTest(test.TestCase): final_op='Id', timeout=timeout) + @test_util.run_v2_only + def testExecutionAfterTimeoutV2(self): + timeout = 1.5 + cpus = config.list_physical_devices('CPU') + self.assertEqual(len(cpus), 1) + config.set_logical_device_configuration(cpus[0], [ + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration() + ]) + context.ensure_initialized() + + group_key = 20 + instance_key = 30 + input_data = constant_op.constant([1, 2, 3, 4]) + + @def_function.function + def run_all_reduce(): + for device in ['CPU:0', 'CPU:1']: + with ops.device(device): + collective_ops.all_reduce( + input_data, + group_size=2, + group_key=group_key, + instance_key=instance_key, + merge_op='Add', + final_op='Id', + timeout=timeout) + + # Run a normal all-reduce to complete param resolution. + run_all_reduce() + + with self.assertRaisesRegex(errors.DeadlineExceededError, + 'Collective has timed out during execution'): + with ops.device('CPU:0'): + collective_ops.all_reduce( + input_data, + group_size=2, + group_key=group_key, + instance_key=instance_key, + merge_op='Add', + final_op='Id', + timeout=timeout) + + # We launch the second device after the first device times out. This is to + # simulate the situation when other workers are slow and the timeout is + # short. It should error immediately. + with self.assertRaisesRegex(errors.DeadlineExceededError, + 'Collective has timed out during execution'): + with ops.device('CPU:1'): + # No timeout. + collective_ops.all_reduce( + input_data, + group_size=2, + group_key=group_key, + merge_op='Add', + final_op='Id', + instance_key=instance_key) + def testNcclHintFallbackToRingReduce(self): """Tests that setting `communication_hint=nccl` works on non-GPU builds.""" if kernels.get_registered_kernels_for_op('NcclAllReduce'): @@ -742,7 +798,6 @@ class CollectiveOpTest(test.TestCase): def_function.function(collective_fn)() @test_util.run_v2_only - @unittest.skipIf(os.name == 'nt', 'b/161922535: Flaky on Windows') def testAbortInstanceParamsResolution(self): cpus = config.list_physical_devices('CPU') config.set_logical_device_configuration(cpus[0], [ @@ -802,7 +857,6 @@ class CollectiveOpTest(test.TestCase): def_function.function(collective_fn)() @test_util.run_v2_only - @unittest.skipIf(os.name == 'nt', 'b/161922535: Flaky on Windows') def testAbortRing(self): cpus = config.list_physical_devices('CPU') config.set_logical_device_configuration(cpus[0], [ From d475b4f7a5cc3235489cb21253fd537accb3ac5e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 13:01:19 -0700 Subject: [PATCH 1401/2522] Make generated build_info source file deterministic. The order of a plain dict is not guaranteed. PiperOrigin-RevId: 323423187 Change-Id: I7a086ce8cb6995b0da9e2eddceab671349ad41b6 --- tensorflow/python/platform/build_info_test.py | 4 ++++ tensorflow/tools/build_info/gen_build_info.py | 9 +++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/platform/build_info_test.py b/tensorflow/python/platform/build_info_test.py index be253885715..8eac0de2cbc 100644 --- a/tensorflow/python/platform/build_info_test.py +++ b/tensorflow/python/platform/build_info_test.py @@ -30,6 +30,10 @@ class BuildInfoTest(test.TestCase): self.assertEqual(build_info.build_info['is_cuda_build'], test.is_built_with_cuda()) + def testDeterministicOrder(self): + self.assertEqual(['is_cuda_build', 'is_rocm_build'], + list(build_info.build_info.keys())) + if __name__ == '__main__': test.main() diff --git a/tensorflow/tools/build_info/gen_build_info.py b/tensorflow/tools/build_info/gen_build_info.py index 19478ab4dc2..8f84ac4584c 100755 --- a/tensorflow/tools/build_info/gen_build_info.py +++ b/tensorflow/tools/build_info/gen_build_info.py @@ -52,6 +52,9 @@ def write_build_info(filename, key_value_list): else: build_info[key] = value.format(**build_info) + # Sort the build info to ensure deterministic output. + sorted_build_info_pairs = sorted(build_info.items()) + contents = """ # Copyright 2020 The TensorFlow Authors. All Rights Reserved. # @@ -72,8 +75,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -build_info = {build_info} -""".format(build_info=build_info) +import collections + +build_info = collections.OrderedDict(%s) +""" % sorted_build_info_pairs open(filename, "w").write(contents) From c9ec28eebea0d368999ea4f7ee8de84ef3d1759e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 13:06:23 -0700 Subject: [PATCH 1402/2522] add buffer size and collective type to nccl collective annotation to be consumed by cupti tracer. PiperOrigin-RevId: 323424357 Change-Id: Ifbeab89b39c8946d4de17c910ad4dd496b5c7654 --- tensorflow/core/nccl/BUILD | 1 + tensorflow/core/nccl/nccl_manager.cc | 43 +++++++++++++++++++++++++--- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/nccl/BUILD b/tensorflow/core/nccl/BUILD index a63e862c621..388b5e62c18 100644 --- a/tensorflow/core/nccl/BUILD +++ b/tensorflow/core/nccl/BUILD @@ -45,6 +45,7 @@ cc_library( "//tensorflow/core:stream_executor", "//tensorflow/core/profiler/lib:traceme", "//tensorflow/core/profiler/lib:connected_traceme", + "//tensorflow/core/profiler/lib:annotated_traceme", ]), alwayslink = 1, ) diff --git a/tensorflow/core/nccl/nccl_manager.cc b/tensorflow/core/nccl/nccl_manager.cc index 619885e9f34..bb4e7c90a06 100644 --- a/tensorflow/core/nccl/nccl_manager.cc +++ b/tensorflow/core/nccl/nccl_manager.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/core/lib/core/refcount.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/platform/env.h" +#include "tensorflow/core/profiler/lib/annotated_traceme.h" #include "tensorflow/core/profiler/lib/connected_traceme.h" #include "tensorflow/core/profiler/lib/traceme.h" #if GOOGLE_CUDA @@ -666,6 +667,20 @@ void NcclManager::RunCollective(Collective* collective) { collective->Unref(); } +namespace { +// For tracing purpose. +size_t ComputeBufferSize(const NcclManager::Participant* p, + DataType data_type) { + size_t num_elements = 0; + if (p->output) { + num_elements += p->output->NumElements(); + } else if (p->input) { + num_elements += p->input->NumElements(); + } + return num_elements * DataTypeSize(data_type); +} +} // namespace + void NcclManager::LoopKernelLaunches(NcclStream* nccl_stream) { #if TENSORFLOW_USE_ROCM se::Stream* comm_stream = nccl_stream->stream; @@ -713,7 +728,12 @@ void NcclManager::LoopKernelLaunches(NcclStream* nccl_stream) { << " sendbuff " << sendbuff << " recvbuff " << recvbuff << " nccl_comm " << nccl_comm << " comm_stream " << comm_stream << " cuda_stream " << cu_stream; - profiler::TraceMe trace_me("ncclAllReduce"); + profiler::AnnotatedTraceMe traceme([&] { + return profiler::TraceMeEncode( + "ncclAllReduce", + {{"buffer_size", ComputeBufferSize(p, collective->data_type)}, + {"collective_type", "all_reduce"}}); + }); nccl_result = ncclAllReduce(sendbuff, recvbuff, p->input->NumElements(), data_type, collective->reduction_op, nccl_comm, *cu_stream); @@ -745,7 +765,12 @@ void NcclManager::LoopKernelLaunches(NcclStream* nccl_stream) { << " sendbuff " << sendbuff << " recvbuff " << recvbuff << " nccl_comm " << nccl_comm << " comm_stream " << comm_stream << " cuda_stream " << cu_stream; - profiler::TraceMe trace_me("ncclBroadcast"); + profiler::AnnotatedTraceMe traceme([&] { + return profiler::TraceMeEncode( + "ncclBroadcast", + {{"buffer_size", ComputeBufferSize(p, collective->data_type)}, + {"collective_type", "broadcast"}}); + }); nccl_result = ncclBroadcast(sendbuff, recvbuff, num_elements, data_type, collective->root_rank, nccl_comm, *cu_stream); @@ -756,7 +781,12 @@ void NcclManager::LoopKernelLaunches(NcclStream* nccl_stream) { void* recvbuff = p->output ? const_cast(p->output->tensor_data().data()) : nullptr; - profiler::TraceMe trace_me("ncclReduce"); + profiler::AnnotatedTraceMe traceme([&] { + return profiler::TraceMeEncode( + "buffer_size", + {{"output_size", ComputeBufferSize(p, collective->data_type)}, + {"collective_type", "reduce"}}); + }); nccl_result = ncclReduce(sendbuff, recvbuff, p->input->NumElements(), data_type, collective->reduction_op, collective->root_rank, nccl_comm, *cu_stream); @@ -773,7 +803,12 @@ void NcclManager::LoopKernelLaunches(NcclStream* nccl_stream) { << " recvcount " << p->output->NumElements() << " nccl_comm " << nccl_comm << " comm_stream " << comm_stream << " cuda_stream " << cu_stream; - profiler::TraceMe trace_me("ncclAllGather"); + profiler::AnnotatedTraceMe traceme([&] { + return profiler::TraceMeEncode( + "ncclAllGather", + {{"buffer_size", ComputeBufferSize(p, collective->data_type)}, + {"collective_type", "all_gather"}}); + }); nccl_result = ncclAllGather(sendbuff, recvbuff, p->input->NumElements(), data_type, nccl_comm, *cu_stream); break; From f27f2de368f56136035c3d3630616c8a82bc2b21 Mon Sep 17 00:00:00 2001 From: Smit Hinsu Date: Mon, 27 Jul 2020 13:19:53 -0700 Subject: [PATCH 1403/2522] Add a pass to parallelize TPU embedding params ops on different shards This pass moves LoadTPUEmbedding* and corresponding ReadVariable ops to different regions using the parallel_execute op. This parallel_execute op is later broken in different islands by ParallelExecuteToIslands pass and these islands can progress in parallel. This pass is required to avoid control dependencies between ops on different shards during export to the tf_executor dialect. Also, added this pass to the pass pipeline. PiperOrigin-RevId: 323426936 Change-Id: If250a57dfdd137ba25e265581a653dfa104323d3 --- tensorflow/compiler/mlir/tensorflow/BUILD | 2 + ...parallelize_embedding_params_ops_pass.mlir | 96 +++++++++++ .../mlir/tensorflow/transforms/bridge.cc | 1 + .../parallelize_embedding_params_ops_pass.cc | 152 ++++++++++++++++++ .../mlir/tensorflow/transforms/passes.h | 5 + 5 files changed, 256 insertions(+) create mode 100644 tensorflow/compiler/mlir/tensorflow/tests/parallelize_embedding_params_ops_pass.mlir create mode 100644 tensorflow/compiler/mlir/tensorflow/transforms/parallelize_embedding_params_ops_pass.cc diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index 26c47e580e8..2a800cfc8c4 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -734,6 +734,7 @@ cc_library( "transforms/materialize_mlir_passthrough_op.cc", "transforms/optimize.cc", "transforms/parallel_execute_to_islands.cc", + "transforms/parallelize_embedding_params_ops_pass.cc", "transforms/promote_resources_to_args.cc", "transforms/readonly_references_to_resources.cc", "transforms/region_control_flow_to_functional.cc", @@ -808,6 +809,7 @@ cc_library( "//tensorflow/core/platform:random", "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", "//tensorflow/core/protobuf/tpu:dynamic_padding_proto_cc", + "//tensorflow/core/tpu:tpu_embedding_optimization_parameters_utils", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@llvm-project//llvm:Support", diff --git a/tensorflow/compiler/mlir/tensorflow/tests/parallelize_embedding_params_ops_pass.mlir b/tensorflow/compiler/mlir/tensorflow/tests/parallelize_embedding_params_ops_pass.mlir new file mode 100644 index 00000000000..e1cfaba5dcc --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/tests/parallelize_embedding_params_ops_pass.mlir @@ -0,0 +1,96 @@ +// RUN: tf-opt %s -tf-parallize-embedding-params-ops -verify-diagnostics -split-input-file | FileCheck %s + +// CHECK-LABEL: func @two_shards +func @two_shards(%arg0: tensor<*x!tf.resource>>, %arg1: tensor<*x!tf.resource>>, %arg2: tensor<*x!tf.resource>>, %arg3: tensor<*x!tf.resource>>) { + tf_executor.graph { + %control = tf_executor.island { + // CHECK: "tf_device.parallel_execute" + // CHECK: "tf.ReadVariableOp" + // CHECK: "tf.ReadVariableOp" + // CHECK: "tf.LoadTPUEmbeddingAdagradParameters" + // CHECK: tf_device.return + // CHECK: "tf.ReadVariableOp" + // CHECK: "tf.ReadVariableOp" + // CHECK: "tf.LoadTPUEmbeddingAdagradParameters" + // CHECK: tf_device.return + %0 = "tf.ReadVariableOp"(%arg0) {device = ""} : (tensor<*x!tf.resource>>) -> tensor<8xf32> + %1 = "tf.ReadVariableOp"(%arg1) {device = ""} : (tensor<*x!tf.resource>>) -> tensor<8xf32> + %2 = "tf.ReadVariableOp"(%arg2) {device = ""} : (tensor<*x!tf.resource>>) -> tensor<8xf32> + %3 = "tf.ReadVariableOp"(%arg3) {device = ""} : (tensor<*x!tf.resource>>) -> tensor<8xf32> + "tf.LoadTPUEmbeddingAdagradParameters"(%0, %1) {config = "", device = "/job:worker/replica:0/task:0/device:CPU:0", num_shards = 2 : i64, shard_id = 0 : i64, table_id = -1 : i64, table_name = "param_table"} : (tensor<8xf32>, tensor<8xf32>) -> () + "tf.LoadTPUEmbeddingAdagradParameters"(%2, %3) {config = "", device = "/job:worker/replica:0/task:1/device:CPU:0", num_shards = 2 : i64, shard_id = 1 : i64, table_id = -1 : i64, table_name = "param_table"} : (tensor<8xf32>, tensor<8xf32>) -> () + tf_executor.yield + } + tf_executor.fetch %control : !tf_executor.control + } + return +} + +// Verifies that resource reads shared across two shards are kept outside the +// parallel_execute op. + +// CHECK-LABEL: func @shared_reads +func @shared_reads(%arg0: tensor<*x!tf.resource>>, %arg1: tensor<*x!tf.resource>>) { + tf_executor.graph { + %control = tf_executor.island { + // CHECK: "tf.ReadVariableOp" + %0 = "tf.ReadVariableOp"(%arg0) {device = ""} : (tensor<*x!tf.resource>>) -> tensor<8xf32> + // CHECK: "tf.ReadVariableOp" + %1 = "tf.ReadVariableOp"(%arg1) {device = ""} : (tensor<*x!tf.resource>>) -> tensor<8xf32> + + // CHECK: "tf_device.parallel_execute" + // CHECK: "tf.LoadTPUEmbeddingAdagradParameters" + // CHECK: tf_device.return + // CHECK: "tf.LoadTPUEmbeddingAdagradParameters" + // CHECK: tf_device.return + "tf.LoadTPUEmbeddingAdagradParameters"(%0, %1) {config = "", device = "/job:worker/replica:0/task:0/device:CPU:0", num_shards = 2 : i64, shard_id = 0 : i64, table_id = -1 : i64, table_name = "param_table"} : (tensor<8xf32>, tensor<8xf32>) -> () + "tf.LoadTPUEmbeddingAdagradParameters"(%0, %1) {config = "", device = "/job:worker/replica:0/task:1/device:CPU:0", num_shards = 2 : i64, shard_id = 1 : i64, table_id = -1 : i64, table_name = "param_table"} : (tensor<8xf32>, tensor<8xf32>) -> () + tf_executor.yield + } + tf_executor.fetch %control : !tf_executor.control + } + return +} + +// Verifies that if the resource variables are used in ops other than read +// variable op whose semantics are not known then the function is kept +// unchanged. + +// CHECK-LABEL: func @update_var +func @update_var(%arg0: tensor<*x!tf.resource>>, %arg1: tensor<*x!tf.resource>>, %arg2: tensor<*x!tf.resource>>) { + tf_executor.graph { + // CHECK-NOT: tf_device.parallel_execute + %control = tf_executor.island { + %0 = "tf.ReadVariableOp"(%arg0) {device = ""} : (tensor<*x!tf.resource>>) -> tensor<8xf32> + %1 = "tf.ReadVariableOp"(%arg1) {device = ""} : (tensor<*x!tf.resource>>) -> tensor<8xf32> + "tf.LoadTPUEmbeddingAdagradParameters"(%0, %1) {config = "", device = "/job:worker/replica:0/task:0/device:CPU:0", num_shards = 2 : i64, shard_id = 0 : i64, table_id = -1 : i64, table_name = "param_table"} : (tensor<8xf32>, tensor<8xf32>) -> () + + %2 = "tf.ReadVariableOp"(%arg2) {device = ""} : (tensor<*x!tf.resource>>) -> tensor<8xf32> + %zeros = "tf.Const"() {value = dense<1.0> : tensor<8xf32>} : () -> tensor<8xf32> + "tf.AssignVariableOp"(%arg2, %zeros) : (tensor<*x!tf.resource>>, tensor<8xf32>) -> () + %3 = "tf.ReadVariableOp"(%arg2) {device = ""} : (tensor<*x!tf.resource>>) -> tensor<8xf32> + "tf.LoadTPUEmbeddingAdagradParameters"(%2, %3) {config = "", device = "/job:worker/replica:0/task:1/device:CPU:0", num_shards = 2 : i64, shard_id = 1 : i64, table_id = -1 : i64, table_name = "param_table"} : (tensor<8xf32>, tensor<8xf32>) -> () + tf_executor.yield + } + tf_executor.fetch %control : !tf_executor.control + } + return +} + +// ----- + +func @invalid_shard_range(%arg0: tensor<*x!tf.resource>>, %arg1: tensor<*x!tf.resource>>) { + tf_executor.graph { + %control = tf_executor.island { + // expected-error @-1 {{require continuous range of shards}} + %0 = "tf.ReadVariableOp"(%arg0) {device = ""} : (tensor<*x!tf.resource>>) -> tensor<8xf32> + %1 = "tf.ReadVariableOp"(%arg1) {device = ""} : (tensor<*x!tf.resource>>) -> tensor<8xf32> + + "tf.LoadTPUEmbeddingAdagradParameters"(%0, %1) {config = "", device = "/job:worker/replica:0/task:0/device:CPU:0", num_shards = 3 : i64, shard_id = 0 : i64, table_id = -1 : i64, table_name = "param_table"} : (tensor<8xf32>, tensor<8xf32>) -> () + "tf.LoadTPUEmbeddingAdagradParameters"(%0, %1) {config = "", device = "/job:worker/replica:0/task:1/device:CPU:0", num_shards = 3 : i64, shard_id = 3 : i64, table_id = -1 : i64, table_name = "param_table"} : (tensor<8xf32>, tensor<8xf32>) -> () + tf_executor.yield + } + tf_executor.fetch %control : !tf_executor.control + } + return +} diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc index 35ffabb9131..783664960bc 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc @@ -41,6 +41,7 @@ namespace TFTPU { namespace { void AddGraphExportLoweringPasses(OpPassManager &pm) { pm.addNestedPass(CreateFunctionalToExecutorDialectConversionPass()); + pm.addNestedPass(TFDevice::CreateParallelizeEmbeddingParamsOpsPass()); pm.addNestedPass(CreateBreakUpIslandsPass()); pm.addNestedPass(TFDevice::CreateReplicateToIslandPass()); pm.addNestedPass(CreateBreakUpIslandsPass()); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/parallelize_embedding_params_ops_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/parallelize_embedding_params_ops_pass.cc new file mode 100644 index 00000000000..527af0934ea --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/transforms/parallelize_embedding_params_ops_pass.cc @@ -0,0 +1,152 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This transformation parallelizes TPU embedding params assigned to different +// shards using the parallel execute op. This is useful to avoid introducing +// control dependency between these ops that are known to be independent. + +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/Block.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "mlir/Pass/PassRegistry.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" +#include "tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.h" + +namespace mlir { +namespace TFDevice { + +namespace { + +struct ParallelizeEmbeddingParamsOpsPass + : public PassWrapper { + void runOnFunction() override; +}; + +bool IsLoadTPUEmbeddingParmasOp(Operation& op) { + static const auto* algorithms = []() { + auto* algorithms = new llvm::SmallSet(); + for (tensorflow::tpu::OptimizationAlgorithm alg : + tensorflow::tpu::GetOptimizationAlgorithms()) { + const auto alg_name = tensorflow::tpu::GetOptimizationAlgorithmName(alg); + algorithms->insert(alg_name); + } + return algorithms; + }(); + StringRef op_name = op.getName().getStringRef(); + return op_name.consume_front("tf.LoadTPUEmbedding") && + op_name.consume_back("Parameters") && + algorithms->contains(op_name.str()); +} + +static LogicalResult RunOnIsland(tf_executor::IslandOp island) { + Block* block = island.getBody(); + + // Map from op to the id of the shard it is assigned for ops that can execute + // in parallel across shards. + llvm::SmallMapVector assigned_shard; + llvm::SmallVector resources; + llvm::SmallSet shard_ids; + for (Operation& op : llvm::reverse(*block)) { + int64_t shard = -1; + if (IsLoadTPUEmbeddingParmasOp(op)) { + auto shard_id = op.getAttrOfType("shard_id"); + if (!shard_id) { + return op.emitOpError("requires 'shard_id' integer attribute"); + } + shard = shard_id.getInt(); + shard_ids.insert(shard); + } else if (auto read_op = llvm::dyn_cast(op)) { + if (assigned_shard.empty()) continue; + + for (Operation* user : op.getUsers()) { + auto iter = assigned_shard.find(user); + if (iter == assigned_shard.end() || + (shard != -1 && shard != iter->second)) { + shard = -1; + break; + } + shard = iter->second; + } + if (shard != -1) resources.push_back(read_op.resource()); + } + + if (shard != -1) assigned_shard.insert(std::make_pair(&op, shard)); + } + + // No transformations are required. + int num_shards = shard_ids.size(); + if (num_shards <= 1) return success(); + + // If the resources are used for ops other than read variable op, then moving + // read variable ops to the parallel_execute may not preserve the semantics. + for (Value resource : resources) { + for (Operation* user : resource.getUsers()) + if (!llvm::isa(*user)) return success(); + } + + // Create parallel_execute op at the end of the block and move operations + // to their corresponding shard. + auto builder = OpBuilder::atBlockTerminator(block); + auto parallel_execute_op = builder.create( + island.getLoc(), num_shards, llvm::ArrayRef()); + for (int shard_id = 0; shard_id < num_shards; ++shard_id) { + mlir::Block& b = parallel_execute_op.GetRegionBlockWithIndex(shard_id); + builder.setInsertionPointToStart(&b); + builder.create(island.getLoc()); + } + + for (auto op_shard : assigned_shard) { + int64_t shard = op_shard.second; + if (shard >= num_shards) { + return island.emitOpError( + "load tpu embedding ops require continuous range of shards"); + } + mlir::Block& b = parallel_execute_op.GetRegionBlockWithIndex(shard); + op_shard.first->moveBefore(&b, b.begin()); + } + return success(); +} + +void ParallelizeEmbeddingParamsOpsPass::runOnFunction() { + getFunction().walk([&](tf_executor::IslandOp island) { + if (failed(RunOnIsland(island))) { + signalPassFailure(); + return WalkResult::interrupt(); + } + return WalkResult::advance(); + }); +} + +} // namespace + +std::unique_ptr> +CreateParallelizeEmbeddingParamsOpsPass() { + return std::make_unique(); +} +} // namespace TFDevice +} // namespace mlir + +static mlir::PassRegistration + pass("tf-parallize-embedding-params-ops", + "Parallelizes TPU embedding params assigned to different shards using " + "the parallel_execte op"); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h index 68bc9d09e91..9c8790afa1d 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h +++ b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h @@ -242,6 +242,11 @@ std::unique_ptr> CreateReplicateToIslandPass(); // `tf_device.parallel_execute` island. std::unique_ptr> CreateParallelExecuteToIslandsPass(); +// Create a pass to parallelize TPU embedding params assigned to different +// shards using the parallel_execte op. +std::unique_ptr> +CreateParallelizeEmbeddingParamsOpsPass(); + // Creates a pass that annotates whether a LaunchFuncOp's parameters have the // same data across replicas. std::unique_ptr> From 24d4ff08e93d7f9490bb5b11f483130665bd7904 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Mon, 27 Jul 2020 13:40:17 -0700 Subject: [PATCH 1404/2522] Fix conditional canonicalizer to use clone instead of mutating existing conditional's shape. Today I learned that it's not a good idea to mess up with existing instruction's shape.. Because: 1. It makes the code more confusing. 2. It may mess up with existing computation's root shape, and if we change root shape, we discard all aliasing info. See tensorflow/compiler/xla/service/hlo_computation.cc:347 PiperOrigin-RevId: 323431445 Change-Id: I64f0bd68a4e9832aa5d2aa22959a4083d9ae23f4 --- .../xla/service/conditional_canonicalizer.cc | 11 +++++++---- tensorflow/compiler/xla/service/hlo_instruction.cc | 13 ++++++++++--- tensorflow/compiler/xla/service/hlo_instruction.h | 5 +++++ 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/service/conditional_canonicalizer.cc b/tensorflow/compiler/xla/service/conditional_canonicalizer.cc index 3d917eb39fe..8af8e11febd 100644 --- a/tensorflow/compiler/xla/service/conditional_canonicalizer.cc +++ b/tensorflow/compiler/xla/service/conditional_canonicalizer.cc @@ -31,11 +31,14 @@ Status CanonicalizeNonTupleConditional(HloInstruction* conditional) { branch->AddInstruction(HloInstruction::CreateTuple({root})); branch->set_root_instruction(tuple, /*accept_different_shape=*/true); } + auto parent = conditional->parent(); auto root_shape = conditional->shape(); - *conditional->mutable_shape() = ShapeUtil::MakeTupleShape({root_shape}); - auto gte = conditional->parent()->AddInstruction( - HloInstruction::CreateGetTupleElement(root_shape, conditional, 0)); - TF_RETURN_IF_ERROR(conditional->ReplaceAllUsesWithDifferentShape(gte)); + auto new_shape = ShapeUtil::MakeTupleShape({root_shape}); + auto new_conditional = + parent->AddInstruction(conditional->CloneWithNewShape(new_shape)); + auto gte = parent->AddInstruction( + HloInstruction::CreateGetTupleElement(root_shape, new_conditional, 0)); + TF_RETURN_IF_ERROR(parent->ReplaceInstruction(conditional, gte)); return Status::OK(); } } // namespace diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 6ffb149b893..4335ed312c3 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1750,10 +1750,10 @@ void HloInstruction::DetachFromOperandsAndUsers() { } } -std::unique_ptr HloInstruction::Clone( - const string& suffix, HloCloneContext* context) const { +std::unique_ptr HloInstruction::CloneWithNewShape( + const Shape& shape, const string& suffix, HloCloneContext* context) const { std::unique_ptr clone = - CloneWithNewOperands(shape_, operands_, context); + CloneWithNewOperands(shape, operands_, context); if (suffix.empty()) { clone->name_ = name(); } else { @@ -1790,6 +1790,13 @@ std::unique_ptr HloInstruction::Clone( return clone; } +std::unique_ptr HloInstruction::Clone( + const string& suffix, HloCloneContext* context) const { + std::unique_ptr clone = + CloneWithNewShape(shape_, suffix, context); + return clone; +} + std::pair HloInstruction::LatestNonGteAncestorAndIndex() const { const HloInstruction* hlo = this; diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 95a56d7595b..e29323c25b4 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -1419,6 +1419,11 @@ class HloInstruction { std::unique_ptr Clone( const string& suffix = "clone", HloCloneContext* context = nullptr) const; + // Clones the HLO instruction as above but with new shape. + std::unique_ptr CloneWithNewShape( + const Shape& shape, const string& suffix = "clone", + HloCloneContext* context = nullptr) const; + // Clones the HLO instruction as above but with new shape and operands. std::unique_ptr CloneWithNewOperands( const Shape& shape, absl::Span new_operands, From 3f4e18fd9354de8106d37f10382fc49be146b577 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Mon, 27 Jul 2020 14:07:40 -0700 Subject: [PATCH 1405/2522] Use utility to identify OnWrite and OnRead synchronized variables. PiperOrigin-RevId: 323437049 Change-Id: I7d1370a55cdbea18d2ebcb7eeebdc572d77fd0f6 --- .../distribute/mirrored_strategy_test.py | 34 +++++++++---------- .../distribute/mirrored_variable_test.py | 29 ++++++++-------- .../distribute/mirrored_variable_test.py | 6 ++-- 3 files changed, 35 insertions(+), 34 deletions(-) diff --git a/tensorflow/python/distribute/mirrored_strategy_test.py b/tensorflow/python/distribute/mirrored_strategy_test.py index d2a567589b9..5c86cbea1a4 100644 --- a/tensorflow/python/distribute/mirrored_strategy_test.py +++ b/tensorflow/python/distribute/mirrored_strategy_test.py @@ -658,7 +658,7 @@ class MirroredThreeDeviceDistributionTest( with distribution.scope(): result = distribution.extended.call_for_each_replica(model_fn) - self.assertIsInstance(result, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(result)) self.assertEqual("foo:0", result.name) @@ -680,7 +680,7 @@ class MirroredVariableUpdateTest(test.TestCase): with distribution.scope(): mirrored_var = distribution.extended.call_for_each_replica(var_fn) - self.assertIsInstance(mirrored_var, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(mirrored_var)) self.evaluate(variables.global_variables_initializer()) def model_fn(): @@ -700,7 +700,7 @@ class MirroredVariableUpdateTest(test.TestCase): with distribution.scope(): mirrored_var = distribution.extended.call_for_each_replica(var_fn) - self.assertIsInstance(mirrored_var, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(mirrored_var)) self.evaluate(variables.global_variables_initializer()) def model_fn(): @@ -718,7 +718,7 @@ class MirroredVariableUpdateTest(test.TestCase): with distribution.scope(): mirrored_var = distribution.extended.call_for_each_replica(var_fn) - self.assertIsInstance(mirrored_var, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(mirrored_var)) self.evaluate(variables.global_variables_initializer()) self.assertEqual(1.0, self.evaluate(mirrored_var)) mirrored_var_result = self.evaluate(mirrored_var.assign(6.0)) @@ -731,7 +731,7 @@ class MirroredVariableUpdateTest(test.TestCase): with distribution.scope(): mirrored_var = distribution.extended.call_for_each_replica(var_fn) - self.assertIsInstance(mirrored_var, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(mirrored_var)) self.evaluate(variables.global_variables_initializer()) self.assertEqual(1.0, self.evaluate(mirrored_var)) @@ -752,7 +752,7 @@ class MirroredVariableUpdateTest(test.TestCase): with distribution.scope(): mirrored_var = distribution.extended.call_for_each_replica(var_fn) - self.assertIsInstance(mirrored_var, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(mirrored_var)) self.evaluate(variables.global_variables_initializer()) self.assertEqual(1.0, self.evaluate(mirrored_var)) @@ -769,7 +769,7 @@ class MirroredVariableUpdateTest(test.TestCase): with distribution.scope(): mirrored_var = distribution.extended.call_for_each_replica(var_fn) - self.assertIsInstance(mirrored_var, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(mirrored_var)) self.evaluate(variables.global_variables_initializer()) self.assertEqual(1.0, self.evaluate(mirrored_var)) @@ -812,7 +812,7 @@ class MirroredVariableUpdateTest(test.TestCase): with distribution.scope(): mirrored_var = distribution.extended.call_for_each_replica(var_fn) - self.assertIsInstance(mirrored_var, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(mirrored_var)) self.evaluate(variables.global_variables_initializer()) self.assertEqual(1.0, self.evaluate(mirrored_var)) @@ -833,7 +833,7 @@ class MirroredVariableUpdateTest(test.TestCase): with distribution.scope(): mirrored_var = distribution.extended.call_for_each_replica(var_fn) - self.assertIsInstance(mirrored_var, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(mirrored_var)) self.evaluate(variables.global_variables_initializer()) self.assertEqual(1.0, self.evaluate(mirrored_var)) @@ -850,7 +850,7 @@ class MirroredVariableUpdateTest(test.TestCase): with distribution.scope(): mirrored_var = distribution.extended.call_for_each_replica(var_fn) - self.assertIsInstance(mirrored_var, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(mirrored_var)) self.evaluate(variables.global_variables_initializer()) self.assertEqual(5.0, self.evaluate(mirrored_var)) mirrored_var_result = self.evaluate(mirrored_var.assign_sub(2.0)) @@ -875,7 +875,7 @@ class MirroredVariableUpdateTest(test.TestCase): with distribution.scope(): mirrored_var = distribution.extended.call_for_each_replica(var_fn) - self.assertIsInstance(mirrored_var, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(mirrored_var)) self.evaluate(variables.global_variables_initializer()) self.assertEqual(5.0, self.evaluate(mirrored_var)) @@ -896,7 +896,7 @@ class MirroredVariableUpdateTest(test.TestCase): with distribution.scope(): mirrored_var = distribution.extended.call_for_each_replica(var_fn) - self.assertIsInstance(mirrored_var, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(mirrored_var)) self.evaluate(variables.global_variables_initializer()) self.assertEqual(5.0, self.evaluate(mirrored_var)) @@ -926,7 +926,7 @@ class MirroredAndSyncOnReadVariableInitializerTest(test.TestCase): with distribution.scope(): mirrored_var = distribution.extended.call_for_each_replica(var_fn) - self.assertIsInstance(mirrored_var, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(mirrored_var)) self.assertFalse(self.evaluate(mirrored_var.is_initialized())) self.evaluate(mirrored_var.initializer) self.assertTrue(self.evaluate(mirrored_var.is_initialized())) @@ -940,13 +940,13 @@ class MirroredAndSyncOnReadVariableInitializerTest(test.TestCase): 1.0, synchronization=variable_scope.VariableSynchronization.ON_READ, aggregation=variable_scope.VariableAggregation.SUM) - self.assertIsInstance(v_sum, values.SyncOnReadVariable) + self.assertTrue(distribute_utils.is_sync_on_read(v_sum)) return v_sum with distribution.scope(): sync_on_read_var = distribution.extended.call_for_each_replica( model_fn) - self.assertIsInstance(sync_on_read_var, values.SyncOnReadVariable) + self.assertTrue(distribute_utils.is_sync_on_read(sync_on_read_var)) self.assertFalse(self.evaluate(sync_on_read_var.is_initialized())) self.evaluate(sync_on_read_var.initializer) self.assertTrue(self.evaluate(sync_on_read_var.is_initialized())) @@ -970,7 +970,7 @@ class SyncOnReadVariableAssignTest(test.TestCase): with distribution.scope(): sync_on_read_var = distribution.extended.call_for_each_replica(model_fn) - self.assertIsInstance(sync_on_read_var, values.SyncOnReadVariable) + self.assertTrue(distribute_utils.is_sync_on_read(sync_on_read_var)) self.evaluate(variables.global_variables_initializer()) # Each replica has a value of 1.0 assigned to it in replica context. # When we read the value using `read_var` we should see the SUM of each of @@ -997,7 +997,7 @@ class SyncOnReadVariableAssignTest(test.TestCase): with distribution.scope(): sync_on_read_var = distribution.extended.call_for_each_replica(model_fn) - self.assertIsInstance(sync_on_read_var, values.SyncOnReadVariable) + self.assertTrue(distribute_utils.is_sync_on_read(sync_on_read_var)) self.evaluate(variables.global_variables_initializer()) # Each replica has a value of 1.0 assigned to it in replica context. # When we read the value using `read_var` we should see the MEAN of values diff --git a/tensorflow/python/distribute/mirrored_variable_test.py b/tensorflow/python/distribute/mirrored_variable_test.py index 03d697fe1eb..53a18fb271b 100644 --- a/tensorflow/python/distribute/mirrored_variable_test.py +++ b/tensorflow/python/distribute/mirrored_variable_test.py @@ -20,6 +20,7 @@ from __future__ import print_function from tensorflow.python.distribute import collective_all_reduce_strategy from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import distribute_utils from tensorflow.python.distribute import distribution_strategy_context as ds_context from tensorflow.python.distribute import strategy_combinations from tensorflow.python.distribute import values @@ -89,7 +90,7 @@ class MirroredVariableCreationTest(test.TestCase): # TODO(priyag): Modify more tests to use this helper and check more # properties. def _test_mv_properties(self, var, name, strategy): - self.assertIsInstance(var, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(var)) self.assertEqual(name, var.name) self.assertIs(strategy, var.distribute_strategy) for i, d in enumerate(var._devices): @@ -185,7 +186,7 @@ class MirroredVariableCreationTest(test.TestCase): with distribution.scope(): result = distribution.extended.call_for_each_replica(model_fn) for v in result: - self.assertIsInstance(v, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(v)) self.assertEqual(4, len(result)) self.assertEqual("foo/bar:0", result[0].name) self.assertEqual("foo_1/bar:0", result[1].name) @@ -202,7 +203,7 @@ class MirroredVariableCreationTest(test.TestCase): with distribution.scope(): result = distribution.extended.call_for_each_replica(model_fn) - self.assertIsInstance(result, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(result)) # The resulting mirrored variable will use the name from the first device. self.assertEqual("foo_0:0", result.name) @@ -234,14 +235,14 @@ class MirroredVariableCreationTest(test.TestCase): result = distribution.extended.call_for_each_replica(model_fn) self.assertEqual(4, len(result)) v0, v1, v2, v3 = result - self.assertIsInstance(v0, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(v0)) self.assertEqual("var0:0", v0.name) - self.assertIsInstance(v1, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(v1)) self.assertEqual("common/var1:0", v1.name) - self.assertIsInstance(v2, values.SyncOnReadVariable) + self.assertTrue(distribute_utils.is_sync_on_read(v2)) self.assertEqual("common/var2:0", v2.name) self.assertEqual(variable_scope.VariableAggregation.SUM, v2.aggregation) - self.assertIsInstance(v3, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(v3)) self.assertEqual("common/var3:0", v3.name) self.assertEqual(variable_scope.VariableAggregation.MEAN, v3.aggregation) @@ -272,14 +273,14 @@ class MirroredVariableCreationTest(test.TestCase): result = distribution.extended.call_for_each_replica(model_fn) self.assertEqual(4, len(result)) v0, v1, v2, v3 = result - self.assertIsInstance(v0, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(v0)) self.assertEqual("main/var0:0", v0.name) - self.assertIsInstance(v1, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(v1)) self.assertEqual("main/common/var1:0", v1.name) - self.assertIsInstance(v2, values.SyncOnReadVariable) + self.assertTrue(distribute_utils.is_sync_on_read(v2)) self.assertEqual("main/common/var2:0", v2.name) self.assertEqual(variable_scope.VariableAggregation.SUM, v2.aggregation) - self.assertIsInstance(v3, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(v3)) self.assertEqual("main/common/var3:0", v3.name) self.assertEqual(variable_scope.VariableAggregation.MEAN, v3.aggregation) @@ -453,8 +454,8 @@ class MirroredVariableCreationTest(test.TestCase): 4.0, synchronization=variable_scope.VariableSynchronization.ON_READ, aggregation=variable_scope.VariableAggregation.MEAN) - self.assertIsInstance(v_sum, values.SyncOnReadVariable) - self.assertIsInstance(v_mean, values.SyncOnReadVariable) + self.assertTrue(distribute_utils.is_sync_on_read(v_sum)) + self.assertTrue(distribute_utils.is_sync_on_read(v_mean)) updates = [ v_sum.assign_add(2.0 + replica_id), v_mean.assign(6.0 * replica_id) @@ -548,7 +549,7 @@ class MirroredVariableCreationTest(test.TestCase): 1.0, synchronization=variable_scope.VariableSynchronization.ON_READ, aggregation=variable_scope.VariableAggregation.SUM) - self.assertIsInstance(v_sum, values.SyncOnReadVariable) + self.assertTrue(distribute_utils.is_sync_on_read(v_sum)) return v_sum def update(var, value): diff --git a/tensorflow/python/keras/distribute/mirrored_variable_test.py b/tensorflow/python/keras/distribute/mirrored_variable_test.py index 0edfa4806f2..e24420ffc4c 100644 --- a/tensorflow/python/keras/distribute/mirrored_variable_test.py +++ b/tensorflow/python/keras/distribute/mirrored_variable_test.py @@ -21,9 +21,9 @@ from __future__ import print_function from tensorflow.python.data.ops import dataset_ops from tensorflow.python.distribute import collective_all_reduce_strategy from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import distribute_utils from tensorflow.python.distribute import distribution_strategy_context as ds_context from tensorflow.python.distribute import strategy_combinations -from tensorflow.python.distribute import values from tensorflow.python.eager import context from tensorflow.python.eager import test from tensorflow.python.framework import config @@ -96,9 +96,9 @@ class MirroredVariableCreationTest(test.TestCase): result = distribution.extended.call_for_each_replica( model_fn, args=(features,)) for kernel, bias in result: - self.assertIsInstance(kernel, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(kernel)) self.assertAllDifferent(distribution.experimental_local_results(kernel)) - self.assertIsInstance(bias, values.MirroredVariable) + self.assertTrue(distribute_utils.is_mirrored(bias)) self.assertAllDifferent(distribution.experimental_local_results(kernel)) From 15e1a0802a1cd38c33af6b2626d6f5414690e3c9 Mon Sep 17 00:00:00 2001 From: Frederic Bastien Date: Mon, 27 Jul 2020 14:22:26 -0700 Subject: [PATCH 1406/2522] Make LLVM code easier to read. --- tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc index eee0fc83481..3f000a2491d 100644 --- a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc @@ -289,7 +289,7 @@ StatusOr GpuElementalIrEmitter::EmitTanh(PrimitiveType prim_type, auto one_with_sign = llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::copysign, {one, input}, {type}, b_); return FPCast(Select(FCmpULT(abs_value, max_value), fast_tanh, one_with_sign), - value->getType()); + value->getType(), "tanh"); } StatusOr GpuElementalIrEmitter::EmitComplexAbs( From ac4bda59d60f22ab5bc902b367d6ce1cbbe8a889 Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Mon, 27 Jul 2020 14:36:31 -0700 Subject: [PATCH 1407/2522] [TF] Fix test_group_conv flakiness. Increase the error tolerance from rtol=1e-05 and atol=1e-06 to rtol=3e-5 and atol=3e-5. This fixes the flakiness of the test. Enable the test. PiperOrigin-RevId: 323443013 Change-Id: Ia36784ad44f7b6a2fa48a83435a66942646b3c48 --- tensorflow/python/keras/layers/convolutional_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/keras/layers/convolutional_test.py b/tensorflow/python/keras/layers/convolutional_test.py index 3ab3acd0ff9..0bc869160ec 100644 --- a/tensorflow/python/keras/layers/convolutional_test.py +++ b/tensorflow/python/keras/layers/convolutional_test.py @@ -433,7 +433,7 @@ class GroupedConvTest(keras_parameterized.TestCase): ('Conv2D', keras.layers.Conv2D, (32, 12, 12, 32)), ('Conv3D', keras.layers.Conv3D, (32, 12, 12, 12, 32)), ) - def disable_test_group_conv(self, layer_cls, input_shape): + def test_group_conv(self, layer_cls, input_shape): if test.is_gpu_available(cuda_only=True): with testing_utils.use_gpu(): inputs = random_ops.random_uniform(shape=input_shape) @@ -448,8 +448,8 @@ class GroupedConvTest(keras_parameterized.TestCase): for inputs, weights in zip(input_slices, weight_slices) ], axis=-1) - - self.assertAllClose(layer(inputs), expected_outputs, rtol=1e-5) + self.assertAllClose( + layer(inputs), expected_outputs, rtol=3e-5, atol=3e-5) def test_group_conv_depthwise(self): if test.is_gpu_available(cuda_only=True): From dd3ce26d7ba4a628cf08902942e5f376808e2052 Mon Sep 17 00:00:00 2001 From: Karim Nosir Date: Mon, 27 Jul 2020 14:48:33 -0700 Subject: [PATCH 1408/2522] Add member function in OpBuilder to compute min/max and add them to the graph. Also, updated activation and arg min/max builders. PiperOrigin-RevId: 323445668 Change-Id: I558bfe8b1fe3055762022457668770345c8367c1 --- .../hexagon/builders/activation_builder.cc | 9 ++------- .../hexagon/builders/arg_min_max_builder.cc | 10 +--------- .../lite/delegates/hexagon/builders/op_builder.cc | 15 +++++++++++++++ .../lite/delegates/hexagon/builders/op_builder.h | 8 ++++++++ 4 files changed, 26 insertions(+), 16 deletions(-) diff --git a/tensorflow/lite/delegates/hexagon/builders/activation_builder.cc b/tensorflow/lite/delegates/hexagon/builders/activation_builder.cc index feb061158ea..896c7e30fd4 100644 --- a/tensorflow/lite/delegates/hexagon/builders/activation_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/activation_builder.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/common.h" #include "tensorflow/lite/delegates/hexagon/hexagon_nn/hexagon_nn.h" #include "tensorflow/lite/kernels/kernel_util.h" @@ -32,13 +33,7 @@ TfLiteStatus ActivationOpBuilder::PopulateSubGraph( int tensor_id = inputs->data[0]; const auto& input_tensor = context->tensors[tensor_id]; AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); - ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_); - auto* input_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_min_), sizeof(input_min_)); - auto* input_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_max_), sizeof(input_max_)); - AddInput(TensorID(input_min_const->GetID(), 0)); - AddInput(TensorID(input_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, input_tensor)); if (op_node_.op_type == OP_QuantizedReluX_8) { auto* relu_value_const = graph_builder_->AddConstNodeWithData( diff --git a/tensorflow/lite/delegates/hexagon/builders/arg_min_max_builder.cc b/tensorflow/lite/delegates/hexagon/builders/arg_min_max_builder.cc index 4cd2dc1f897..cc4b97a6222 100644 --- a/tensorflow/lite/delegates/hexagon/builders/arg_min_max_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/arg_min_max_builder.cc @@ -54,15 +54,7 @@ TfLiteStatus ArgMinMaxOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, AddInput(TensorID(input_axis_const->GetID(), 0)); // Compute Min/Max - TF_LITE_ENSURE_STATUS( - ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_)); - auto* input_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_min_), sizeof(input_min_)); - auto* input_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_max_), sizeof(input_max_)); - - AddInput(TensorID(input_min_const->GetID(), 0)); - AddInput(TensorID(input_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, input_tensor)); // Output Node int output_batch_size, output_height_size, output_width_size, diff --git a/tensorflow/lite/delegates/hexagon/builders/op_builder.cc b/tensorflow/lite/delegates/hexagon/builders/op_builder.cc index 95cec93a41a..0f32a4de6e1 100644 --- a/tensorflow/lite/delegates/hexagon/builders/op_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/op_builder.cc @@ -279,6 +279,21 @@ const OpNode* OpBuilder::Build() { return &op_node_; } +TfLiteStatus OpBuilder::ComputeAndAddMinAndMax(TfLiteContext* context, + const TfLiteTensor& tensor) { + float tensor_min, tensor_max; + TF_LITE_ENSURE_STATUS( + ComputeMinAndMaxQuantValues(tensor, &tensor_min, &tensor_max)); + auto* min_const_node = graph_builder_->AddConstNodeWithData( + kScalarShape, reinterpret_cast(&tensor_min), sizeof(tensor_min)); + auto* max_const_node = graph_builder_->AddConstNodeWithData( + kScalarShape, reinterpret_cast(&tensor_max), sizeof(tensor_max)); + AddInput(TensorID(min_const_node->GetID(), 0)); + AddInput(TensorID(max_const_node->GetID(), 0)); + + return kTfLiteOk; +} + // Static constexpr int OpBuilder::kScalarShape[]; diff --git a/tensorflow/lite/delegates/hexagon/builders/op_builder.h b/tensorflow/lite/delegates/hexagon/builders/op_builder.h index 8cfa90565bd..52b130c756f 100644 --- a/tensorflow/lite/delegates/hexagon/builders/op_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/op_builder.h @@ -182,6 +182,14 @@ class OpBuilder { } } + // Computes the min and max for 'tensor' and adds them as input + // to the node. + TfLiteStatus ComputeAndAddMinAndMax(TfLiteContext* context, + const TfLiteTensor& tensor); + + // Computes the float min and max for 'tensor', given 'min_value' and + // 'max_value' data range. The float min and max will be set in 'min' and + // 'max' params template static TfLiteStatus ComputeMinAndMaxQuantValues(const TfLiteTensor& tensor, float* min, float* max, From 32ab3697b55c9369a975d0a106930b2b73a8b940 Mon Sep 17 00:00:00 2001 From: Jonathan Chu Date: Mon, 27 Jul 2020 21:57:24 +0000 Subject: [PATCH 1409/2522] Fix pylint sanity check style errors --- tensorflow/python/eager/benchmarks_test.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index b8c083f0ada..8f5edf5eabb 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -476,12 +476,15 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): func = lambda: f(m, m, transpose_b=transpose_b) self._run(func, num_iters, execution_mode=execution_mode) - def _benchmark_defun_matmul_with_signature(self, m, num_iters, execution_mode=None): - + def _benchmark_defun_matmul_with_signature(self, + m, + num_iters, + execution_mode=None): def func_matmul(m): return math_ops.matmul(m, m) f = function.defun( - func_matmul, input_signature=[tensor_spec.TensorSpec([2, 2], dtypes.float32)]) + func_matmul, + input_signature=[tensor_spec.TensorSpec([2, 2], dtypes.float32)]) func = lambda: f(m) self._run(func, num_iters, execution_mode=execution_mode) @@ -589,7 +592,8 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): def benchmark_defun_matmul_2_by_2_CPU_with_signature(self): with context.device(CPU): m = self._m_2_by_2.cpu() - self._benchmark_defun_matmul_with_signature(m, num_iters=self._num_iters_2_by_2) + self._benchmark_defun_matmul_with_signature( + m, num_iters=self._num_iters_2_by_2) @test_util.disable_tfrt("Graph is not supported yet. b/156187905") def benchmark_defun_args_matmul_2_by_2_CPU(self): @@ -677,7 +681,8 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): return with context.device(GPU): m = self._m_2_by_2.gpu() - self._benchmark_defun_matmul_with_signature(m, num_iters=self._num_iters_2_by_2) + self._benchmark_defun_matmul_with_signature( + m, num_iters=self._num_iters_2_by_2) @test_util.disable_tfrt("Graph is not supported yet. b/156187905") def benchmark_defun_args_matmul_2_by_2_GPU(self): From 247db5c30ea6aa144f791ba9eaf2e60e449d55a4 Mon Sep 17 00:00:00 2001 From: Sachin Joglekar Date: Mon, 27 Jul 2020 15:02:02 -0700 Subject: [PATCH 1410/2522] Perform MEAN axis check while choosing nodes to delegate PiperOrigin-RevId: 323448703 Change-Id: If1282ca7bb3faf66769b3f4e8324082a540ffa43 --- .../delegates/gpu/common/model_builder.cc | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.cc b/tensorflow/lite/delegates/gpu/common/model_builder.cc index 386743d8a3c..f1df46810d7 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc @@ -2562,8 +2562,23 @@ class MeanOperationParser : public TFLiteOperationParser { absl::Status IsSupported(const TfLiteContext* context, const TfLiteNode* tflite_node, const TfLiteRegistration* registration) final { - return CheckInputsOutputs(context, tflite_node, /*runtime_inputs=*/1, - /*outputs=*/1); + RETURN_IF_ERROR(CheckInputsOutputs(context, tflite_node, + /*runtime_inputs=*/1, + /*outputs=*/1)); + + // Simple mechanism to check if MEAN is to be performed only on HW plane. + auto* axes = &context->tensors[tflite_node->inputs->data[1]]; + if (axes->allocation_type != kTfLiteMmapRo || axes->type != kTfLiteInt32) { + return absl::UnimplementedError("Mean has unsupported tensor for axes"); + } + auto* axes_data = axes->data.i32; + const bool is_hw_mean = tflite::NumElements(axes) == 2 && + ((axes_data[0] == 1 && axes_data[1] == 2) || + (axes_data[0] == 2 && axes_data[1] == 1)); + if (!is_hw_mean) { + return absl::UnimplementedError("Mean operation supports only HW plane"); + } + return absl::OkStatus(); } absl::Status Parse(const TfLiteNode* tflite_node, From 64d3dbdba3897188ec66317bd46f06c7abd4589a Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Mon, 27 Jul 2020 22:12:15 +0000 Subject: [PATCH 1411/2522] added includes for each symbol used in summary_op --- tensorflow/c/kernels/BUILD | 11 +++++++++-- tensorflow/c/kernels/ops/summary.cc | 1 + tensorflow/c/kernels/summary_op.cc | 13 ++++++++++--- tensorflow/c/kernels/summary_op_test.cc | 16 +++++++++++++++- 4 files changed, 35 insertions(+), 6 deletions(-) diff --git a/tensorflow/c/kernels/BUILD b/tensorflow/c/kernels/BUILD index 87ae0339c6f..fb1c7deb054 100644 --- a/tensorflow/c/kernels/BUILD +++ b/tensorflow/c/kernels/BUILD @@ -31,7 +31,9 @@ tf_kernel_library( "//tensorflow/c/kernels:tensor_shape_utils", "//tensorflow/c:kernels", "//tensorflow/c:tf_tensor", - "//tensorflow/core:framework" + "//tensorflow/c:tf_status", + "//tensorflow/core:framework", + "//tensorflow/core:lib", ], ) @@ -51,6 +53,7 @@ tf_gen_op_libs( op_lib_names = ["summary"], deps = [ "//tensorflow/c:ops", + "//tensorflow/c:tf_status", "//tensorflow/core:lib", ], ) @@ -73,7 +76,10 @@ tf_cc_test( srcs = ["summary_op_test.cc"], deps = [ ":summary_op", - ":summary_op_lib", + "//tensorflow/c:kernels", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib" ], @@ -94,6 +100,7 @@ tf_cc_test( ":tensor_shape_utils", "//tensorflow/core:lib", "//tensorflow/core:test", + "//tensorflow/core:framework", "//tensorflow/core:test_main", ], ) diff --git a/tensorflow/c/kernels/ops/summary.cc b/tensorflow/c/kernels/ops/summary.cc index a9c3b697f14..36f23684ef2 100644 --- a/tensorflow/c/kernels/ops/summary.cc +++ b/tensorflow/c/kernels/ops/summary.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/c/ops.h" +#include "tensorflow/c/tf_status.h" #include "tensorflow/core/framework/selective_registration.h" #include "tensorflow/core/platform/logging.h" diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index 5d98d0c0477..925521c7c73 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -15,10 +15,17 @@ limitations under the License. ==============================================================================*/ #include +#include +#include "tensorflow/c/kernels/tensor_shape_utils.h" #include "tensorflow/c/kernels.h" #include "tensorflow/c/tf_tensor.h" -#include "tensorflow/c/kernels/tensor_shape_utils.h" +#include "tensorflow/c/tf_status.h" +#include "tensorflow/core/platform/protobuf.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/tstring.h" +#include "tensorflow/core/platform/strcat.h" #include "tensorflow/core/framework/selective_registration.h" #include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/framework/types.h" @@ -60,7 +67,7 @@ void ScalarSummaryOp_Delete(void* kernel) { bool IsSameSize(TF_Tensor* tensor1, TF_Tensor* tensor2); // Returns a string representation of a single tag or empty string if there // are multiple tags -tensorflow::string SingleTag(TF_Tensor* tags); +std::string SingleTag(TF_Tensor* tags); template void ScalarSummaryOp_Compute(void* kernel, TF_OpKernelContext* ctx) { @@ -117,7 +124,7 @@ bool IsSameSize(TF_Tensor* tensor1, TF_Tensor* tensor2) { return true; } -tensorflow::string SingleTag(TF_Tensor* tags) { +std::string SingleTag(TF_Tensor* tags) { if (TF_TensorElementCount(tags) == 1) { const char* single_tag = static_cast( TF_TensorData(tags))->c_str(); diff --git a/tensorflow/c/kernels/summary_op_test.cc b/tensorflow/c/kernels/summary_op_test.cc index 4c691379bed..b19c19ad225 100644 --- a/tensorflow/c/kernels/summary_op_test.cc +++ b/tensorflow/c/kernels/summary_op_test.cc @@ -13,12 +13,26 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/c/kernels.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/device_base.h" #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/attr_value_util.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_def_builder.h" #include "tensorflow/core/framework/summary.pb.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/platform/strcat.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/protobuf.h" +#include "tensorflow/core/protobuf/error_codes.pb.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" + namespace tensorflow { namespace { From a9b6a48489ac1177c7923492eceedf27a1b8288c Mon Sep 17 00:00:00 2001 From: Meghna Natraj Date: Mon, 27 Jul 2020 15:19:12 -0700 Subject: [PATCH 1412/2522] Fix errors related to flatbuffers python library in Kokoro ubuntu_16 GCS build PiperOrigin-RevId: 323452135 Change-Id: I3d602969e74dfbca5d83f153f107742dd9685f5b --- third_party/flatbuffers/build_defs.bzl | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/third_party/flatbuffers/build_defs.bzl b/third_party/flatbuffers/build_defs.bzl index 02027aa09f5..1fbe629e66a 100644 --- a/third_party/flatbuffers/build_defs.bzl +++ b/third_party/flatbuffers/build_defs.bzl @@ -358,11 +358,8 @@ def _concat_flatbuffer_py_srcs_impl(ctx): outputs = [ctx.outputs.out], command = ( "find '%s' -name '*.py' -exec cat {} + |" + - "sed '/import flatbuffers/d' |" + - "sed 's/from flatbuffers." + - "/from flatbuffers.python.flatbuffers./' |" + - "sed '1s/^/from flatbuffers.python " + - "import flatbuffers\\'$'\\n/' > %s" + "sed 's/from flatbuffers.compat import import_numpy/import numpy as np' |" + + "sed '/np = import_numpy()/d' > %s" ) % ( ctx.attr.deps[0].files.to_list()[0].path, ctx.outputs.out.path, From 20cd718248ceb09a4d770c85af7fd6d4b8fc843f Mon Sep 17 00:00:00 2001 From: Nat Jeffries Date: Mon, 27 Jul 2020 15:24:16 -0700 Subject: [PATCH 1413/2522] Port the split, strided_slice and sub kernels to the new TfLiteEvalTensor API along with removing initializer lists from their tests. PiperOrigin-RevId: 323453073 Change-Id: I3ecbaa4716870618c2f052826f4bde178fe38366 --- tensorflow/lite/micro/kernels/BUILD | 3 + tensorflow/lite/micro/kernels/split.cc | 29 +- tensorflow/lite/micro/kernels/split_test.cc | 473 ++--- .../lite/micro/kernels/strided_slice.cc | 67 +- .../lite/micro/kernels/strided_slice_test.cc | 1814 ++++++++--------- tensorflow/lite/micro/kernels/sub.cc | 113 +- tensorflow/lite/micro/kernels/sub_test.cc | 40 +- 7 files changed, 1187 insertions(+), 1352 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 3a63a6fdcf9..0e4289073ec 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -317,6 +317,7 @@ tflite_micro_cc_test( "sub_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:op_resolvers", "//tensorflow/lite/micro/testing:micro_test", @@ -380,6 +381,7 @@ tflite_micro_cc_test( "strided_slice_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:op_resolvers", "//tensorflow/lite/micro/testing:micro_test", @@ -420,6 +422,7 @@ tflite_micro_cc_test( "split_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:debug_log", "//tensorflow/lite/micro:op_resolvers", diff --git a/tensorflow/lite/micro/kernels/split.cc b/tensorflow/lite/micro/kernels/split.cc index 926280af997..9bff0b700e7 100644 --- a/tensorflow/lite/micro/kernels/split.cc +++ b/tensorflow/lite/micro/kernels/split.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -25,10 +26,11 @@ namespace split { template TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node, - const TfLiteTensor* input, int axis_value) { + const TfLiteEvalTensor* input, int axis_value) { const int output_count = NumOutputs(node); const TfLiteIntArray* input_dims = input->dims; - const TfLiteTensor* output0 = GetOutput(context, node, 0); + const TfLiteEvalTensor* output0 = + tflite::micro::GetEvalOutput(context, node, 0); const TfLiteIntArray* output_dims = output0->dims; const int split_dimensions = input_dims->size; @@ -50,11 +52,11 @@ TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node, base_inner_size *= input_dims->data[i]; } - const T* input_ptr = GetTensorData(input); + const T* input_ptr = tflite::micro::GetTensorData(input); for (int k = 0; k < outer_size; ++k) { for (int i = 0; i < output_count; ++i) { - TfLiteTensor* t = GetOutput(context, node, i); - T* output_data = GetTensorData(t); + TfLiteEvalTensor* t = tflite::micro::GetEvalOutput(context, node, i); + T* output_data = tflite::micro::GetTensorData(t); const int copy_size = output_dims->data[axis] * base_inner_size; T* output_ptr = output_data + k * copy_size; for (int j = 0; j < copy_size; ++j) output_ptr[j] = input_ptr[j]; @@ -65,23 +67,28 @@ TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node, return kTfLiteOk; } -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* axis = GetInput(context, node, 0); - const TfLiteTensor* input = GetInput(context, node, 1); // Dynamic output tensors are needed if axis tensor is not constant. // But Micro doesn't support dynamic memory allocation, so we only support // constant axis tensor for now. TF_LITE_ENSURE_MSG(context, IsConstantTensor(axis), "Non constant axis tensor not supported"); + return kTfLiteOk; +} - int axis_value = GetTensorData(axis)[0]; +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteEvalTensor* axis = tflite::micro::GetEvalInput(context, node, 0); + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 1); + + int axis_value = tflite::micro::GetTensorData(axis)[0]; if (axis_value < 0) { - axis_value += NumDimensions(input); + axis_value += input->dims->size; } TF_LITE_ENSURE(context, axis_value >= 0); - TF_LITE_ENSURE(context, axis_value < NumDimensions(input)); + TF_LITE_ENSURE(context, axis_value < input->dims->size); switch (input->type) { case kTfLiteFloat32: { @@ -114,7 +121,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TfLiteRegistration Register_SPLIT() { return {/*init=*/nullptr, /*free=*/nullptr, - /*prepare=*/nullptr, + /*prepare=*/split::Prepare, /*invoke=*/split::Eval, /*profiling_string=*/nullptr, /*builtin_code=*/0, diff --git a/tensorflow/lite/micro/kernels/split_test.cc b/tensorflow/lite/micro/kernels/split_test.cc index 3a51665ed56..711e807b2e8 100644 --- a/tensorflow/lite/micro/kernels/split_test.cc +++ b/tensorflow/lite/micro/kernels/split_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/all_ops_resolver.h" #include "tensorflow/lite/micro/debug_log.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -24,19 +25,15 @@ namespace tflite { namespace testing { void TestSplitTwoOutputsFloat( - std::initializer_list input_dims_data, - std::initializer_list input_data, - std::initializer_list axis_dims_data, - std::initializer_list axis_data, - std::initializer_list output1_dims_data, - std::initializer_list expected_output1_data, - std::initializer_list output2_dims_data, - std::initializer_list expected_output2_data, float* output1_data, - float* output2_data) { - TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); - TfLiteIntArray* axis_dims = IntArrayFromInitializer(axis_dims_data); - TfLiteIntArray* output1_dims = IntArrayFromInitializer(output1_dims_data); - TfLiteIntArray* output2_dims = IntArrayFromInitializer(output2_dims_data); + const int* input_dims_data, const float* input_data, + const int* axis_dims_data, const int32_t* axis_data, + const int* output1_dims_data, const float* expected_output1_data, + const int* output2_dims_data, const float* expected_output2_data, + float* output1_data, float* output2_data) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* axis_dims = IntArrayFromInts(axis_dims_data); + TfLiteIntArray* output1_dims = IntArrayFromInts(output1_dims_data); + TfLiteIntArray* output2_dims = IntArrayFromInts(output2_dims_data); const int output1_dims_count = ElementCount(*output1_dims); const int output2_dims_count = ElementCount(*output2_dims); @@ -61,76 +58,42 @@ void TestSplitTwoOutputsFloat( output2_data[i] = 23; } - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_SPLIT); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - TfLiteSplitParams builtin_data = { - .num_splits = 2, - }; - - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, nullptr, 0); - } - int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {2, 2, 3}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = tflite::ops::micro::Register_SPLIT(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, nullptr, micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output1_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output1_data.begin()[i], output1_data[i], - 1e-5f); + TF_LITE_MICRO_EXPECT_NEAR(expected_output1_data[i], output1_data[i], 1e-5f); } for (int i = 0; i < output2_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output2_data.begin()[i], output2_data[i], - 1e-5f); + TF_LITE_MICRO_EXPECT_NEAR(expected_output2_data[i], output2_data[i], 1e-5f); } } void TestSplitFourOutputsFloat( - std::initializer_list input_dims_data, - std::initializer_list input_data, - std::initializer_list axis_dims_data, - std::initializer_list axis_data, - std::initializer_list output1_dims_data, - std::initializer_list expected_output1_data, - std::initializer_list output2_dims_data, - std::initializer_list expected_output2_data, - std::initializer_list output3_dims_data, - std::initializer_list expected_output3_data, - std::initializer_list output4_dims_data, - std::initializer_list expected_output4_data, float* output1_data, - float* output2_data, float* output3_data, float* output4_data) { - TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); - TfLiteIntArray* axis_dims = IntArrayFromInitializer(axis_dims_data); - TfLiteIntArray* output1_dims = IntArrayFromInitializer(output1_dims_data); - TfLiteIntArray* output2_dims = IntArrayFromInitializer(output2_dims_data); - TfLiteIntArray* output3_dims = IntArrayFromInitializer(output3_dims_data); - TfLiteIntArray* output4_dims = IntArrayFromInitializer(output4_dims_data); + const int* input_dims_data, const float* input_data, + const int* axis_dims_data, const int32_t* axis_data, + const int* output1_dims_data, const float* expected_output1_data, + const int* output2_dims_data, const float* expected_output2_data, + const int* output3_dims_data, const float* expected_output3_data, + const int* output4_dims_data, const float* expected_output4_data, + float* output1_data, float* output2_data, float* output3_data, + float* output4_data) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* axis_dims = IntArrayFromInts(axis_dims_data); + TfLiteIntArray* output1_dims = IntArrayFromInts(output1_dims_data); + TfLiteIntArray* output2_dims = IntArrayFromInts(output2_dims_data); + TfLiteIntArray* output3_dims = IntArrayFromInts(output3_dims_data); + TfLiteIntArray* output4_dims = IntArrayFromInts(output4_dims_data); const int output1_dims_count = ElementCount(*output1_dims); const int output2_dims_count = ElementCount(*output2_dims); const int output3_dims_count = ElementCount(*output3_dims); @@ -164,77 +127,42 @@ void TestSplitFourOutputsFloat( output4_data[i] = 23; } - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_SPLIT); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - TfLiteSplitParams builtin_data = { - .num_splits = 4, - }; - - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, nullptr, 0); - } - int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {4, 2, 3, 4, 5}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = tflite::ops::micro::Register_SPLIT(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, nullptr, micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output1_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output1_data.begin()[i], output1_data[i], - 1e-5f); + TF_LITE_MICRO_EXPECT_NEAR(expected_output1_data[i], output1_data[i], 1e-5f); } for (int i = 0; i < output2_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output2_data.begin()[i], output2_data[i], - 1e-5f); + TF_LITE_MICRO_EXPECT_NEAR(expected_output2_data[i], output2_data[i], 1e-5f); } for (int i = 0; i < output3_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output3_data.begin()[i], output3_data[i], - 1e-5f); + TF_LITE_MICRO_EXPECT_NEAR(expected_output3_data[i], output3_data[i], 1e-5f); } for (int i = 0; i < output4_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output4_data.begin()[i], output4_data[i], - 1e-5f); + TF_LITE_MICRO_EXPECT_NEAR(expected_output4_data[i], output4_data[i], 1e-5f); } } void TestSplitTwoOutputsQuantized( - std::initializer_list input_dims_data, - std::initializer_list input_data, - std::initializer_list axis_dims_data, - std::initializer_list axis_data, - std::initializer_list output1_dims_data, - std::initializer_list expected_output1_data, - std::initializer_list output2_dims_data, - std::initializer_list expected_output2_data, uint8_t* output1_data, - uint8_t* output2_data) { - TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); - TfLiteIntArray* axis_dims = IntArrayFromInitializer(axis_dims_data); - TfLiteIntArray* output1_dims = IntArrayFromInitializer(output1_dims_data); - TfLiteIntArray* output2_dims = IntArrayFromInitializer(output2_dims_data); + const int* input_dims_data, const uint8_t* input_data, + const int* axis_dims_data, const int32_t* axis_data, + const int* output1_dims_data, const uint8_t* expected_output1_data, + const int* output2_dims_data, const uint8_t* expected_output2_data, + uint8_t* output1_data, uint8_t* output2_data) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* axis_dims = IntArrayFromInts(axis_dims_data); + TfLiteIntArray* output1_dims = IntArrayFromInts(output1_dims_data); + TfLiteIntArray* output2_dims = IntArrayFromInts(output2_dims_data); const int output1_dims_count = ElementCount(*output1_dims); const int output2_dims_count = ElementCount(*output2_dims); @@ -260,68 +188,37 @@ void TestSplitTwoOutputsQuantized( output2_data[i] = 23; } - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_SPLIT); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - TfLiteSplitParams builtin_data = { - .num_splits = 2, - }; - - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, nullptr, 0); - } - int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {2, 2, 3}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = tflite::ops::micro::Register_SPLIT(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, nullptr, micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output1_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_EQ(expected_output1_data.begin()[i], output1_data[i]); + TF_LITE_MICRO_EXPECT_EQ(expected_output1_data[i], output1_data[i]); } for (int i = 0; i < output2_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_EQ(expected_output2_data.begin()[i], output2_data[i]); + TF_LITE_MICRO_EXPECT_EQ(expected_output2_data[i], output2_data[i]); } } void TestSplitTwoOutputsQuantized32( - std::initializer_list input_dims_data, - std::initializer_list input_data, - std::initializer_list axis_dims_data, - std::initializer_list axis_data, - std::initializer_list output1_dims_data, - std::initializer_list expected_output1_data, - std::initializer_list output2_dims_data, - std::initializer_list expected_output2_data, int32_t* output1_data, - int32_t* output2_data) { - TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); - TfLiteIntArray* axis_dims = IntArrayFromInitializer(axis_dims_data); - TfLiteIntArray* output1_dims = IntArrayFromInitializer(output1_dims_data); - TfLiteIntArray* output2_dims = IntArrayFromInitializer(output2_dims_data); + const int* input_dims_data, const int32_t* input_data, + const int* axis_dims_data, const int32_t* axis_data, + const int* output1_dims_data, const int32_t* expected_output1_data, + const int* output2_dims_data, const int32_t* expected_output2_data, + int32_t* output1_data, int32_t* output2_data) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* axis_dims = IntArrayFromInts(axis_dims_data); + TfLiteIntArray* output1_dims = IntArrayFromInts(output1_dims_data); + TfLiteIntArray* output2_dims = IntArrayFromInts(output2_dims_data); const int output1_dims_count = ElementCount(*output1_dims); const int output2_dims_count = ElementCount(*output2_dims); @@ -347,51 +244,24 @@ void TestSplitTwoOutputsQuantized32( output2_data[i] = 23; } - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_SPLIT); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - TfLiteSplitParams builtin_data = { - .num_splits = 2, - }; - - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, nullptr, 0); - } - int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {2, 2, 3}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = tflite::ops::micro::Register_SPLIT(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, nullptr, micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output1_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_EQ(expected_output1_data.begin()[i], output1_data[i]); + TF_LITE_MICRO_EXPECT_EQ(expected_output1_data[i], output1_data[i]); } for (int i = 0; i < output2_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_EQ(expected_output2_data.begin()[i], output2_data[i]); + TF_LITE_MICRO_EXPECT_EQ(expected_output2_data[i], output2_data[i]); } } @@ -401,91 +271,119 @@ void TestSplitTwoOutputsQuantized32( TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TwoSplitFourDimensionalAxisZero) { + const int input_shape[] = {4, 2, 2, 2, 2}; + const float input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16}; + const int axis_shape[] = {1, 1}; + const int32_t axis_data[] = {0}; + const int output1_shape[] = {4, 1, 2, 2, 2}; + const float golden1[] = {1, 2, 3, 4, 5, 6, 7, 8}; + const int output2_shape[] = {4, 1, 2, 2, 2}; + const float golden2[] = {9, 10, 11, 12, 13, 14, 15, 16}; + constexpr int output1_dims_count = 8; constexpr int output2_dims_count = 8; float output1_data[output1_dims_count]; float output2_data[output2_dims_count]; tflite::testing::TestSplitTwoOutputsFloat( - {4, 2, 2, 2, 2}, // Input shape - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, // Input values - {1, 1}, // Axis shape - {0}, // Axis value - {4, 1, 2, 2, 2}, // Output1 shape - {1, 2, 3, 4, 5, 6, 7, 8}, // Output1 values - {4, 1, 2, 2, 2}, // Output2 shape - {9, 10, 11, 12, 13, 14, 15, 16}, // Output2 values - output1_data, output2_data); + input_shape, input_data, axis_shape, axis_data, output1_shape, golden1, + output2_shape, golden2, output1_data, output2_data); } TF_LITE_MICRO_TEST(TwoSplitFourDimensionalAxisOne) { + const int input_shape[] = {4, 2, 2, 2, 2}; + const float input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16}; + const int axis_shape[] = {1, 1}; + const int32_t axis_data[] = {1}; + const int output1_shape[] = {4, 2, 1, 2, 2}; + const float golden1[] = {1, 2, 3, 4, 9, 10, 11, 12}; + const int output2_shape[] = {4, 2, 1, 2, 2}; + const float golden2[] = {5, 6, 7, 8, 13, 14, 15, 16}; + constexpr int output1_dims_count = 8; constexpr int output2_dims_count = 8; float output1_data[output1_dims_count]; float output2_data[output2_dims_count]; tflite::testing::TestSplitTwoOutputsFloat( - {4, 2, 2, 2, 2}, // Input shape - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, // Input values - {1, 1}, // Axis shape - {1}, // Axis value - {4, 2, 1, 2, 2}, // Output1 shape - {1, 2, 3, 4, 9, 10, 11, 12}, // Output1 values - {4, 2, 1, 2, 2}, // Output2 shape - {5, 6, 7, 8, 13, 14, 15, 16}, // Output2 values - output1_data, output2_data); + input_shape, input_data, axis_shape, axis_data, output1_shape, golden1, + output2_shape, golden2, output1_data, output2_data); } TF_LITE_MICRO_TEST(TwoSplitFourDimensionalAxisTwo) { + const int input_shape[] = {4, 2, 2, 2, 2}; + const float input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16}; + const int axis_shape[] = {1, 1}; + const int32_t axis_data[] = {2}; + const int output1_shape[] = {4, 2, 2, 1, 2}; + const float golden1[] = {1, 2, 5, 6, 9, 10, 13, 14}; + const int output2_shape[] = {4, 2, 2, 1, 2}; + const float golden2[] = {3, 4, 7, 8, 11, 12, 15, 16}; + constexpr int output1_dims_count = 8; constexpr int output2_dims_count = 8; float output1_data[output1_dims_count]; float output2_data[output2_dims_count]; tflite::testing::TestSplitTwoOutputsFloat( - {4, 2, 2, 2, 2}, // Input shape - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, // Input values - {1, 1}, // Axis shape - {2}, // Axis value - {4, 2, 2, 1, 2}, // Output1 shape - {1, 2, 5, 6, 9, 10, 13, 14}, // Output1 values - {4, 2, 2, 1, 2}, // Output2 shape - {3, 4, 7, 8, 11, 12, 15, 16}, // Output2 values - output1_data, output2_data); + input_shape, input_data, axis_shape, axis_data, output1_shape, golden1, + output2_shape, golden2, output1_data, output2_data); } TF_LITE_MICRO_TEST(TwoSplitFourDimensionalAxisThree) { + const int input_shape[] = {4, 2, 2, 2, 2}; + const float input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16}; + const int axis_shape[] = {1, 1}; + const int32_t axis_data[] = {3}; + const int output1_shape[] = {4, 2, 2, 2, 1}; + const float golden1[] = {1, 3, 5, 7, 9, 11, 13, 15}; + const int output2_shape[] = {4, 2, 2, 2, 1}; + const float golden2[] = {2, 4, 6, 8, 10, 12, 14, 16}; + constexpr int output1_dims_count = 8; constexpr int output2_dims_count = 8; float output1_data[output1_dims_count]; float output2_data[output2_dims_count]; tflite::testing::TestSplitTwoOutputsFloat( - {4, 2, 2, 2, 2}, // Input shape - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, // Input values - {1, 1}, // Axis shape - {3}, // Axis value - {4, 2, 2, 2, 1}, // Output1 shape - {1, 3, 5, 7, 9, 11, 13, 15}, // Output1 values - {4, 2, 2, 2, 1}, // Output2 shape - {2, 4, 6, 8, 10, 12, 14, 16}, // Output2 values - output1_data, output2_data); + input_shape, input_data, axis_shape, axis_data, output1_shape, golden1, + output2_shape, golden2, output1_data, output2_data); } TF_LITE_MICRO_TEST(TwoSplitFourDimensionalNegativeAxis) { + const int input_shape[] = {4, 2, 2, 2, 2}; + const float input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16}; + const int axis_shape[] = {1, 1}; + const int32_t axis_data[] = {-4}; + const int output1_shape[] = {4, 1, 2, 2, 2}; + const float golden1[] = {1, 2, 3, 4, 5, 6, 7, 8}; + const int output2_shape[] = {4, 1, 2, 2, 2}; + const float golden2[] = {9, 10, 11, 12, 13, 14, 15, 16}; + constexpr int output1_dims_count = 8; constexpr int output2_dims_count = 8; float output1_data[output1_dims_count]; float output2_data[output2_dims_count]; tflite::testing::TestSplitTwoOutputsFloat( - {4, 2, 2, 2, 2}, // Input shape - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, // Input values - {1, 1}, // Axis shape - {-4}, // Axis value - {4, 1, 2, 2, 2}, // Output1 shape - {1, 2, 3, 4, 5, 6, 7, 8}, // Output1 values - {4, 1, 2, 2, 2}, // Output2 shape - {9, 10, 11, 12, 13, 14, 15, 16}, // Output2 values - output1_data, output2_data); + input_shape, input_data, axis_shape, axis_data, output1_shape, golden1, + output2_shape, golden2, output1_data, output2_data); } TF_LITE_MICRO_TEST(FourSplit) { + const int input_shape[] = {1, 4}; + const float input_data[] = {1, 2, 3, 4}; + const int axis_shape[] = {1, 1}; + const int32_t axis_data[] = {0}; + const int output1_shape[] = {1, 1}; + const float golden1[] = {1}; + const int output2_shape[] = {1, 1}; + const float golden2[] = {2}; + const int output3_shape[] = {1, 1}; + const float golden3[] = {3}; + const int output4_shape[] = {1, 1}; + const float golden4[] = {4}; + constexpr int output1_dims_count = 1; constexpr int output2_dims_count = 1; constexpr int output3_dims_count = 1; @@ -494,70 +392,69 @@ TF_LITE_MICRO_TEST(FourSplit) { float output2_data[output2_dims_count]; float output3_data[output3_dims_count]; float output4_data[output4_dims_count]; - tflite::testing::TestSplitFourOutputsFloat({1, 4}, // Input shape - {1, 2, 3, 4}, // Input values - {1, 1}, // Axis shape - {0}, // Axis value - {1, 1}, // Output1 shape - {1}, // Output1 values - {1, 1}, // Output2 shape - {2}, // Output2 values - {1, 1}, // Output3 shape - {3}, // Output3 values - {1, 1}, // Output4 shape - {4}, // Output4 values - output1_data, output2_data, - output3_data, output4_data); + tflite::testing::TestSplitFourOutputsFloat( + input_shape, input_data, axis_shape, axis_data, output1_shape, golden1, + output2_shape, golden2, output3_shape, golden3, output4_shape, golden4, + output1_data, output2_data, output3_data, output4_data); } TF_LITE_MICRO_TEST(TwoSplitOneDimensional) { + const int input_shape[] = {1, 2}; + const float input_data[] = {1, 2}; + const int axis_shape[] = {1, 1}; + const int32_t axis_data[] = {0}; + const int output1_shape[] = {1, 1}; + const float golden1[] = {1}; + const int output2_shape[] = {1, 1}; + const float golden2[] = {2}; + constexpr int output1_dims_count = 8; constexpr int output2_dims_count = 8; float output1_data[output1_dims_count]; float output2_data[output2_dims_count]; - tflite::testing::TestSplitTwoOutputsFloat({1, 2}, // Input shape - {1, 2}, // Input values - {1, 1}, // Axis shape - {0}, // Axis value - {1, 1}, // Output1 shape - {1}, // Output1 values - {1, 1}, // Output2 shape - {2}, // Output2 values - output1_data, output2_data); + tflite::testing::TestSplitTwoOutputsFloat( + input_shape, input_data, axis_shape, axis_data, output1_shape, golden1, + output2_shape, golden2, output1_data, output2_data); } TF_LITE_MICRO_TEST(TwoSplitFourDimensionalQuantized) { + const int input_shape[] = {4, 2, 2, 2, 2}; + const uint8_t input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16}; + const int axis_shape[] = {1, 1}; + const int32_t axis_data[] = {1}; + const int output1_shape[] = {4, 2, 1, 2, 2}; + const uint8_t golden1[] = {1, 2, 3, 4, 9, 10, 11, 12}; + const int output2_shape[] = {4, 2, 1, 2, 2}; + const uint8_t golden2[] = {5, 6, 7, 8, 13, 14, 15, 16}; + constexpr int output1_dims_count = 8; constexpr int output2_dims_count = 8; uint8_t output1_data[output1_dims_count]; uint8_t output2_data[output2_dims_count]; tflite::testing::TestSplitTwoOutputsQuantized( - {4, 2, 2, 2, 2}, // Input shape - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, // Input values - {1, 1}, // Axis shape - {0}, // Axis value - {4, 1, 2, 2, 2}, // Output1 shape - {1, 2, 3, 4, 5, 6, 7, 8}, // Output1 values - {4, 1, 2, 2, 2}, // Output2 shape - {9, 10, 11, 12, 13, 14, 15, 16}, // Output2 values - output1_data, output2_data); + input_shape, input_data, axis_shape, axis_data, output1_shape, golden1, + output2_shape, golden2, output1_data, output2_data); } TF_LITE_MICRO_TEST(TwoSplitFourDimensionalQuantized32) { + const int input_shape[] = {4, 2, 2, 2, 2}; + const int32_t input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16}; + const int axis_shape[] = {1, 1}; + const int32_t axis_data[] = {1}; + const int output1_shape[] = {4, 2, 1, 2, 2}; + const int32_t golden1[] = {1, 2, 3, 4, 9, 10, 11, 12}; + const int output2_shape[] = {4, 2, 1, 2, 2}; + const int32_t golden2[] = {5, 6, 7, 8, 13, 14, 15, 16}; + constexpr int output1_dims_count = 8; constexpr int output2_dims_count = 8; int32_t output1_data[output1_dims_count]; int32_t output2_data[output2_dims_count]; tflite::testing::TestSplitTwoOutputsQuantized32( - {4, 2, 2, 2, 2}, // Input shape - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, // Input values - {1, 1}, // Axis shape - {0}, // Axis value - {4, 1, 2, 2, 2}, // Output1 shape - {1, 2, 3, 4, 5, 6, 7, 8}, // Output1 values - {4, 1, 2, 2, 2}, // Output2 shape - {9, 10, 11, 12, 13, 14, 15, 16}, // Output2 values - output1_data, output2_data); + input_shape, input_data, axis_shape, axis_data, output1_shape, golden1, + output2_shape, golden2, output1_data, output2_data); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/kernels/strided_slice.cc b/tensorflow/lite/micro/kernels/strided_slice.cc index 3307d67a9ab..2dbe6e1debf 100644 --- a/tensorflow/lite/micro/kernels/strided_slice.cc +++ b/tensorflow/lite/micro/kernels/strided_slice.cc @@ -15,23 +15,20 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/strided_slice.h" #include +#include #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/op_macros.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { namespace micro { namespace strided_slice { -enum KernelType { - kReference, - // TODO(soroosh): add kGenericOptimized -}; - constexpr int kInputTensor = 0; constexpr int kBeginTensor = 1; constexpr int kEndTensor = 2; @@ -120,58 +117,70 @@ TfLiteStatus CheckOutputSize(TfLiteContext* context, return kTfLiteOk; } +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(StridedSliceParams)); +} + TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + StridedSliceParams* op_params = + static_cast(node->user_data); TF_LITE_ENSURE_EQ(context, NumInputs(node), 4); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); StridedSliceContext op_context(context, node); TF_LITE_ENSURE_MSG(context, op_context.dims <= kMaxDim, "input dim should not exceed 4"); + auto params = BuildStridedSliceParams(&op_context); + memcpy(op_params, ¶ms, sizeof(StridedSliceParams)); return CheckOutputSize(context, &op_context); } -template TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - StridedSliceContext op_context(context, node); - auto op_params = BuildStridedSliceParams(&op_context); + TFLITE_DCHECK(node->user_data != nullptr); + const StridedSliceParams& op_params = + *(static_cast(node->user_data)); -#define TF_LITE_STRIDED_SLICE(kernel_type, data_type) \ - kernel_type::StridedSlice(op_params, GetTensorShape(op_context.input), \ - GetTensorData(op_context.input), \ - GetTensorShape(op_context.output), \ - GetTensorData(op_context.output)) - - switch (op_context.input->type) { + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + switch (output->type) { case kTfLiteFloat32: - if (kernel_type == kReference) { - TF_LITE_STRIDED_SLICE(reference_ops, float); - } + reference_ops::StridedSlice(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); break; case kTfLiteUInt8: - if (kernel_type == kReference) { - TF_LITE_STRIDED_SLICE(reference_ops, uint8_t); - } + reference_ops::StridedSlice( + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); break; case kTfLiteInt8: - if (kernel_type == kReference) { - TF_LITE_STRIDED_SLICE(reference_ops, int8_t); - } + reference_ops::StridedSlice(op_params, + tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); break; default: TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", - TfLiteTypeGetName(op_context.input->type), - op_context.input->type); + TfLiteTypeGetName(input->type), input->type); return kTfLiteError; } -#undef TF_LITE_STRIDED_SLICE return kTfLiteOk; } } // namespace strided_slice TfLiteRegistration Register_STRIDED_SLICE() { - return {/*init=*/nullptr, + return {/*init=*/strided_slice::Init, /*free=*/nullptr, /*prepare=*/strided_slice::Prepare, - /*invoke=*/strided_slice::Eval, + /*invoke=*/strided_slice::Eval, /*profiling_string=*/nullptr, /*builtin_code=*/0, /*custom_name=*/nullptr, diff --git a/tensorflow/lite/micro/kernels/strided_slice_test.cc b/tensorflow/lite/micro/kernels/strided_slice_test.cc index 4387e4bdde3..a2a472af990 100644 --- a/tensorflow/lite/micro/kernels/strided_slice_test.cc +++ b/tensorflow/lite/micro/kernels/strided_slice_test.cc @@ -12,9 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include + #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -22,1160 +25,1049 @@ namespace tflite { namespace testing { namespace { -template -inline TfLiteTensor CreateTensor(const input_type* data, TfLiteIntArray* dims, - bool is_variable = false) { - TfLiteTensor result; - result.type = tensor_input_type; - result.data.raw = reinterpret_cast(const_cast(data)); - result.dims = dims; - result.allocation_type = kTfLiteMemNone; - result.bytes = ElementCount(*dims) * sizeof(input_type); - result.is_variable = is_variable; - return result; -} - -template -inline TfLiteTensor CreateTensor(std::initializer_list data, - TfLiteIntArray* dims, - bool is_variable = false) { - return CreateTensor(data.begin(), dims, - is_variable); -} - -template -void TestStrideSlide(std::initializer_list input_shape, - std::initializer_list begin_shape, - std::initializer_list end_shape, - std::initializer_list strides_shape, int begin_mask, - int end_mask, int ellipsis_mask, int new_axis_mask, - int shrink_axis_mask, - std::initializer_list input_data, - std::initializer_list begin_data, - std::initializer_list end_data, - std::initializer_list strides_data, - std::initializer_list output_shape, - input_type* output_data, - std::initializer_list expected_output, - bool expect_prepare_err, bool expect_invoke_err, - int num_invoke = 1) { - TfLiteIntArray* input_dims = IntArrayFromInitializer(input_shape); - TfLiteIntArray* begin_dims = IntArrayFromInitializer(begin_shape); - TfLiteIntArray* end_dims = IntArrayFromInitializer(end_shape); - TfLiteIntArray* strides_dims = IntArrayFromInitializer(strides_shape); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_shape); - constexpr int inputs_size = 4; - constexpr int outputs_size = 1; - constexpr int tensors_size = inputs_size + outputs_size; - TfLiteTensor tensors[tensors_size] = { - CreateTensor(input_data, input_dims), - CreateTensor(begin_data, begin_dims), - CreateTensor(end_data, end_dims), - CreateTensor(strides_data, strides_dims), - CreateTensor(output_data, output_dims), - }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_STRIDED_SLICE); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - TfLiteStridedSliceParams builtin_data = {begin_mask, end_mask, ellipsis_mask, - new_axis_mask, shrink_axis_mask}; - const char* init_data = reinterpret_cast(&builtin_data); - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } - +template +void ValidateStridedSliceGoldens(TfLiteTensor* tensors, int tensors_size, + const T* golden, T* output, int output_len, + TfLiteStridedSliceParams* params, + const bool expect_prepare_err, int num_invoke, + float tolerance = 1e-5) { int inputs_array_data[] = {4, 0, 1, 2, 3}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 4}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - if (registration->prepare) { - if (expect_prepare_err) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteError, - registration->prepare(&context, &node)); - return; - } - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - if (expect_invoke_err) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteError, - registration->invoke(&context, &node)); + + const TfLiteRegistration registration = + tflite::ops::micro::Register_STRIDED_SLICE(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, reinterpret_cast(params), + micro_test::reporter); + if (expect_prepare_err) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteError, runner.InitAndPrepare()); return; + } else { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); } - for (int i = 0; i < num_invoke; ++i) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); + + for (int i = 0; i < num_invoke; i++) { + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); } - if (registration->free) { - registration->free(&context, user_data); - } - for (size_t i = 0; i < expected_output.size(); ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output.begin()[i], output_data[i], - 1e-5f); + + for (int i = 0; i < output_len; ++i) { + TF_LITE_MICRO_EXPECT_NEAR(golden[i], output[i], 1e-5f); } } +void TestStridedSliceFloat(const int* input_shape, const int* begin_shape, + const int* end_shape, const int* strides_shape, + TfLiteStridedSliceParams* builtin_data, + float* input_data, const int32_t* begin_data, + const int32_t* end_data, const int32_t* strides_data, + const int* output_shape, float* output_data, + const float* expected_output, + bool expect_prepare_err, int num_invoke = 1) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_shape); + TfLiteIntArray* begin_dims = IntArrayFromInts(begin_shape); + TfLiteIntArray* end_dims = IntArrayFromInts(end_shape); + TfLiteIntArray* strides_dims = IntArrayFromInts(strides_shape); + TfLiteIntArray* output_dims = IntArrayFromInts(output_shape); + constexpr int inputs_size = 4; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateFloatTensor(input_data, input_dims), + CreateQuantized32Tensor(begin_data, begin_dims, 1.0), + CreateQuantized32Tensor(end_data, end_dims, 1.0), + CreateQuantized32Tensor(strides_data, strides_dims, 1.0), + CreateFloatTensor(output_data, output_dims), + }; + + ValidateStridedSliceGoldens(tensors, tensors_size, expected_output, + output_data, ElementCount(*output_dims), + builtin_data, expect_prepare_err, num_invoke, + 1.0); +} + +template +void TestStridedSliceQuantized( + const int* input_shape, const int* begin_shape, const int* end_shape, + const int* strides_shape, TfLiteStridedSliceParams* builtin_data, + const T* input_data, const int32_t* begin_data, const int32_t* end_data, + const int32_t* strides_data, const int* output_shape, T* output_data, + const T* expected_output, bool expect_prepare_err, int num_invoke = 1) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_shape); + TfLiteIntArray* begin_dims = IntArrayFromInts(begin_shape); + TfLiteIntArray* end_dims = IntArrayFromInts(end_shape); + TfLiteIntArray* strides_dims = IntArrayFromInts(strides_shape); + TfLiteIntArray* output_dims = IntArrayFromInts(output_shape); + constexpr int inputs_size = 4; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + int zero_point = + std::numeric_limits::max() + std::numeric_limits::min() / 2; + TfLiteTensor tensors[tensors_size] = { + CreateQuantizedTensor(input_data, input_dims, 1.0, zero_point), + CreateQuantized32Tensor(begin_data, begin_dims, 1.0), + CreateQuantized32Tensor(end_data, end_dims, 1.0), + CreateQuantized32Tensor(strides_data, strides_dims, 1.0), + CreateQuantizedTensor(output_data, output_dims, 1.0, zero_point), + }; + + ValidateStridedSliceGoldens(tensors, tensors_size, expected_output, + output_data, ElementCount(*output_dims), + builtin_data, expect_prepare_err, num_invoke, + 1.0); +} + } // namespace } // namespace testing } // namespace tflite TF_LITE_MICRO_TESTS_BEGIN -using tflite::testing::TestStrideSlide; TF_LITE_MICRO_TEST(UnsupportedInputSize) { + const int input_shape[] = {5, 2, 2, 2, 2, 2}; + const int begin_shape[] = {1, 5}; + const int end_shape[] = {1, 5}; + const int strides_shape[] = {1, 5}; + const int output_shape[] = {0}; + float input_data[] = {}; + int32_t begin_data[] = {}; + int32_t end_data[] = {}; + int32_t strides_data[] = {}; + float golden[] = {}; float output_data[4]; - TestStrideSlide({5, 2, 2, 2, 2, 2}, // input_shape - {1, 5}, // begin_shape - {1, 5}, // end_shape - {1, 5}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {}, // input_data - {}, // begin_data - {}, // end_data - {}, // strides_data - {0}, // output_shape - output_data, // output_data - {}, // expected_output - true, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, true); } TF_LITE_MICRO_TEST(In1D) { + const int input_shape[] = {1, 4}; + const int begin_shape[] = {1, 1}; + const int end_shape[] = {1, 1}; + const int strides_shape[] = {1, 1}; + const int output_shape[] = {1, 2}; + float input_data[] = {1, 2, 3, 4}; + int32_t begin_data[] = {1}; + int32_t end_data[] = {3}; + int32_t strides_data[] = {1}; + float golden[] = {2, 3}; float output_data[4]; - TestStrideSlide({1, 4}, // input_shape - {1, 1}, // begin_shape - {1, 1}, // end_shape - {1, 1}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4}, // input_data - {1}, // begin_data - {3}, // end_data - {1}, // strides_data - {1, 2}, // output_shape - output_data, // output_data - {2, 3}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In1D_EmptyOutput) { + const int input_shape[] = {1, 4}; + const int begin_shape[] = {1, 1}; + const int end_shape[] = {1, 1}; + const int strides_shape[] = {1, 1}; + const int output_shape[] = {1, 0}; + float input_data[] = {1, 2, 3, 4}; + int32_t begin_data[] = {10}; + int32_t end_data[] = {3}; + int32_t strides_data[] = {1}; + float golden[] = {}; float output_data[4]; - TestStrideSlide({1, 4}, // input_shape - {1, 1}, // begin_shape - {1, 1}, // end_shape - {1, 1}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4}, // input_data - {10}, // begin_data - {3}, // end_data - {1}, // strides_data - {1, 0}, // output_shape - output_data, // output_data - {}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In1D_NegativeBegin) { + const int input_shape[] = {1, 4}; + const int begin_shape[] = {1, 1}; + const int end_shape[] = {1, 1}; + const int strides_shape[] = {1, 1}; + const int output_shape[] = {1, 2}; + float input_data[] = {1, 2, 3, 4}; + int32_t begin_data[] = {-3}; + int32_t end_data[] = {3}; + int32_t strides_data[] = {1}; + float golden[] = {2, 3}; float output_data[4]; - TestStrideSlide({1, 4}, // input_shape - {1, 1}, // begin_shape - {1, 1}, // end_shape - {1, 1}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4}, // input_data - {-3}, // begin_data - {3}, // end_data - {1}, // strides_data - {1, 2}, // output_shape - output_data, // output_data - {2, 3}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In1D_OutOfRangeBegin) { + const int input_shape[] = {1, 4}; + const int begin_shape[] = {1, 1}; + const int end_shape[] = {1, 1}; + const int strides_shape[] = {1, 1}; + const int output_shape[] = {1, 3}; + float input_data[] = {1, 2, 3, 4}; + int32_t begin_data[] = {-5}; + int32_t end_data[] = {3}; + int32_t strides_data[] = {1}; + float golden[] = {1, 2, 3}; float output_data[4]; - TestStrideSlide({1, 4}, // input_shape - {1, 1}, // begin_shape - {1, 1}, // end_shape - {1, 1}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4}, // input_data - {-5}, // begin_data - {3}, // end_data - {1}, // strides_data - {1, 3}, // output_shape - output_data, // output_data - {1, 2, 3}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In1D_NegativeEnd) { + const int input_shape[] = {1, 4}; + const int begin_shape[] = {1, 1}; + const int end_shape[] = {1, 1}; + const int strides_shape[] = {1, 1}; + const int output_shape[] = {1, 1}; + float input_data[] = {1, 2, 3, 4}; + int32_t begin_data[] = {1}; + int32_t end_data[] = {-2}; + int32_t strides_data[] = {1}; + float golden[] = {2}; float output_data[4]; - TestStrideSlide({1, 4}, // input_shape - {1, 1}, // begin_shape - {1, 1}, // end_shape - {1, 1}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4}, // input_data - {1}, // begin_data - {-2}, // end_data - {1}, // strides_data - {1, 1}, // output_shape - output_data, // output_data - {2}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In1D_OutOfRangeEnd) { + const int input_shape[] = {1, 4}; + const int begin_shape[] = {1, 1}; + const int end_shape[] = {1, 1}; + const int strides_shape[] = {1, 1}; + const int output_shape[] = {1, 3}; + float input_data[] = {1, 2, 3, 4}; + int32_t begin_data[] = {-3}; + int32_t end_data[] = {5}; + int32_t strides_data[] = {1}; + float golden[] = {2, 3, 4}; float output_data[4]; - TestStrideSlide({1, 4}, // input_shape - {1, 1}, // begin_shape - {1, 1}, // end_shape - {1, 1}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4}, // input_data - {-3}, // begin_data - {5}, // end_data - {1}, // strides_data - {1, 3}, // output_shape - output_data, // output_data - {2, 3, 4}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In1D_BeginMask) { + const int input_shape[] = {1, 4}; + const int begin_shape[] = {1, 1}; + const int end_shape[] = {1, 1}; + const int strides_shape[] = {1, 1}; + const int output_shape[] = {1, 3}; + float input_data[] = {1, 2, 3, 4}; + int32_t begin_data[] = {1}; + int32_t end_data[] = {3}; + int32_t strides_data[] = {1}; + float golden[] = {1, 2, 3}; float output_data[4]; - TestStrideSlide({1, 4}, // input_shape - {1, 1}, // begin_shape - {1, 1}, // end_shape - {1, 1}, // strides_shape - 1, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4}, // input_data - {1}, // begin_data - {3}, // end_data - {1}, // strides_data - {1, 3}, // output_shape - output_data, // output_data - {1, 2, 3}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {1, 0, 0, 0, 0}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In1D_NegativeBeginNegativeStride) { + const int input_shape[] = {1, 4}; + const int begin_shape[] = {1, 1}; + const int end_shape[] = {1, 1}; + const int strides_shape[] = {1, 1}; + const int output_shape[] = {1, 1}; + float input_data[] = {1, 2, 3, 4}; + int32_t begin_data[] = {-2}; + int32_t end_data[] = {-3}; + int32_t strides_data[] = {-1}; + float golden[] = {3}; float output_data[4]; - TestStrideSlide({1, 4}, // input_shape - {1, 1}, // begin_shape - {1, 1}, // end_shape - {1, 1}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4}, // input_data - {-2}, // begin_data - {-3}, // end_data - {-1}, // strides_data - {1, 1}, // output_shape - output_data, // output_data - {3}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In1D_OutOfRangeBeginNegativeStride) { + const int input_shape[] = {1, 4}; + const int begin_shape[] = {1, 1}; + const int end_shape[] = {1, 1}; + const int strides_shape[] = {1, 1}; + const int output_shape[] = {1, 1}; + float input_data[] = {1, 2, 3, 4}; + int32_t begin_data[] = {5}; + int32_t end_data[] = {2}; + int32_t strides_data[] = {-1}; + float golden[] = {4}; float output_data[4]; - TestStrideSlide({1, 4}, // input_shape - {1, 1}, // begin_shape - {1, 1}, // end_shape - {1, 1}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4}, // input_data - {5}, // begin_data - {2}, // end_data - {-1}, // strides_data - {1, 1}, // output_shape - output_data, // output_data - {4}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In1D_NegativeEndNegativeStride) { + const int input_shape[] = {1, 4}; + const int begin_shape[] = {1, 1}; + const int end_shape[] = {1, 1}; + const int strides_shape[] = {1, 1}; + const int output_shape[] = {1, 2}; + float input_data[] = {1, 2, 3, 4}; + int32_t begin_data[] = {2}; + int32_t end_data[] = {-4}; + int32_t strides_data[] = {-1}; + float golden[] = {3, 2}; float output_data[4]; - TestStrideSlide({1, 4}, // input_shape - {1, 1}, // begin_shape - {1, 1}, // end_shape - {1, 1}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4}, // input_data - {2}, // begin_data - {-4}, // end_data - {-1}, // strides_data - {1, 2}, // output_shape - output_data, // output_data - {3, 2}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In1D_OutOfRangeEndNegativeStride) { + const int input_shape[] = {1, 4}; + const int begin_shape[] = {1, 1}; + const int end_shape[] = {1, 1}; + const int strides_shape[] = {1, 1}; + const int output_shape[] = {1, 2}; + float input_data[] = {1, 2, 3, 4}; + int32_t begin_data[] = {-3}; + int32_t end_data[] = {-5}; + int32_t strides_data[] = {-1}; + float golden[] = {2, 1}; float output_data[4]; - TestStrideSlide({1, 4}, // input_shape - {1, 1}, // begin_shape - {1, 1}, // end_shape - {1, 1}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4}, // input_data - {-3}, // begin_data - {-5}, // end_data - {-1}, // strides_data - {1, 2}, // output_shape - output_data, // output_data - {2, 1}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In1D_EndMask) { + const int input_shape[] = {1, 4}; + const int begin_shape[] = {1, 1}; + const int end_shape[] = {1, 1}; + const int strides_shape[] = {1, 1}; + const int output_shape[] = {1, 3}; + float input_data[] = {1, 2, 3, 4}; + int32_t begin_data[] = {1}; + int32_t end_data[] = {3}; + int32_t strides_data[] = {1}; + float golden[] = {2, 3, 4}; float output_data[4]; - TestStrideSlide({1, 4}, // input_shape - {1, 1}, // begin_shape - {1, 1}, // end_shape - {1, 1}, // strides_shape - 0, // begin_mask - 1, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4}, // input_data - {1}, // begin_data - {3}, // end_data - {1}, // strides_data - {1, 3}, // output_shape - output_data, // output_data - {2, 3, 4}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {0, 1, 0, 0, 0}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In1D_NegStride) { + const int input_shape[] = {1, 3}; + const int begin_shape[] = {1, 1}; + const int end_shape[] = {1, 1}; + const int strides_shape[] = {1, 1}; + const int output_shape[] = {1, 3}; + float input_data[] = {1, 2, 3}; + int32_t begin_data[] = {-1}; + int32_t end_data[] = {-4}; + int32_t strides_data[] = {-1}; + float golden[] = {3, 2, 1}; float output_data[4]; - TestStrideSlide({1, 3}, // input_shape - {1, 1}, // begin_shape - {1, 1}, // end_shape - {1, 1}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3}, // input_data - {-1}, // begin_data - {-4}, // end_data - {-1}, // strides_data - {1, 3}, // output_shape - output_data, // output_data - {3, 2, 1}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In1D_EvenLenStride2) { + const int input_shape[] = {1, 2}; + const int begin_shape[] = {1, 1}; + const int end_shape[] = {1, 1}; + const int strides_shape[] = {1, 1}; + const int output_shape[] = {1, 1}; + float input_data[] = {1, 2, 3, 4}; + int32_t begin_data[] = {0}; + int32_t end_data[] = {4}; + int32_t strides_data[] = {2}; + float golden[] = {1}; float output_data[4]; - TestStrideSlide({1, 2}, // input_shape - {1, 1}, // begin_shape - {1, 1}, // end_shape - {1, 1}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2}, // input_data - {0}, // begin_data - {4}, // end_data - {2}, // strides_data - {1, 1}, // output_shape - output_data, // output_data - {1}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In1D_OddLenStride2) { + const int input_shape[] = {1, 3}; + const int begin_shape[] = {1, 1}; + const int end_shape[] = {1, 1}; + const int strides_shape[] = {1, 1}; + const int output_shape[] = {1, 2}; + float input_data[] = {1, 2, 3, 4}; + int32_t begin_data[] = {0}; + int32_t end_data[] = {3}; + int32_t strides_data[] = {2}; + float golden[] = {1, 3}; float output_data[4]; - TestStrideSlide({1, 3}, // input_shape - {1, 1}, // begin_shape - {1, 1}, // end_shape - {1, 1}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3}, // input_data - {0}, // begin_data - {3}, // end_data - {2}, // strides_data - {1, 2}, // output_shape - output_data, // output_data - {1, 3}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In2D_Identity) { + const int input_shape[] = {2, 2, 3}; + const int begin_shape[] = {1, 2}; + const int end_shape[] = {1, 2}; + const int strides_shape[] = {1, 2}; + const int output_shape[] = {2, 2, 3}; + float input_data[] = {1, 2, 3, 4, 5, 6}; + int32_t begin_data[] = {0, 0}; + int32_t end_data[] = {2, 3}; + int32_t strides_data[] = {1, 1}; + float golden[] = {1, 2, 3, 4, 5, 6}; float output_data[8]; - TestStrideSlide({2, 2, 3}, // input_shape - {1, 2}, // begin_shape - {1, 2}, // end_shape - {1, 2}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4, 5, 6}, // input_data - {0, 0}, // begin_data - {2, 3}, // end_data - {1, 1}, // strides_data - {2, 2, 3}, // output_shape - output_data, // output_data - {1, 2, 3, 4, 5, 6}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In2D) { + const int input_shape[] = {2, 2, 3}; + const int begin_shape[] = {1, 2}; + const int end_shape[] = {1, 2}; + const int strides_shape[] = {1, 2}; + const int output_shape[] = {2, 1, 2}; + float input_data[] = {1, 2, 3, 4, 5, 6}; + int32_t begin_data[] = {1, 0}; + int32_t end_data[] = {2, 2}; + int32_t strides_data[] = {1, 1}; + float golden[] = {4, 5}; float output_data[8]; - TestStrideSlide({2, 2, 3}, // input_shape - {1, 2}, // begin_shape - {1, 2}, // end_shape - {1, 2}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4, 5, 6}, // input_data - {1, 0}, // begin_data - {2, 2}, // end_data - {1, 1}, // strides_data - {2, 1, 2}, // output_shape - output_data, // output_data - {4, 5}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In2D_Stride2) { + const int input_shape[] = {2, 2, 3}; + const int begin_shape[] = {1, 2}; + const int end_shape[] = {1, 2}; + const int strides_shape[] = {1, 2}; + const int output_shape[] = {2, 1, 2}; + float input_data[] = {1, 2, 3, 4, 5, 6}; + int32_t begin_data[] = {0, 0}; + int32_t end_data[] = {2, 3}; + int32_t strides_data[] = {2, 2}; + float golden[] = {1, 3}; float output_data[8]; - TestStrideSlide({2, 2, 3}, // input_shape - {1, 2}, // begin_shape - {1, 2}, // end_shape - {1, 2}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4, 5, 6}, // input_data - {0, 0}, // begin_data - {2, 3}, // end_data - {2, 2}, // strides_data - {2, 1, 2}, // output_shape - output_data, // output_data - {1, 3}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In2D_NegStride) { + const int input_shape[] = {2, 2, 3}; + const int begin_shape[] = {1, 2}; + const int end_shape[] = {1, 2}; + const int strides_shape[] = {1, 2}; + const int output_shape[] = {2, 1, 3}; + float input_data[] = {1, 2, 3, 4, 5, 6}; + int32_t begin_data[] = {1, -1}; + int32_t end_data[] = {2, -4}; + int32_t strides_data[] = {2, -1}; + float golden[] = {6, 5, 4}; float output_data[8]; - TestStrideSlide({2, 2, 3}, // input_shape - {1, 2}, // begin_shape - {1, 2}, // end_shape - {1, 2}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4, 5, 6}, // input_data - {1, -1}, // begin_data - {2, -4}, // end_data - {2, -1}, // strides_data - {2, 1, 3}, // output_shape - output_data, // output_data - {6, 5, 4}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In2D_BeginMask) { + const int input_shape[] = {2, 2, 3}; + const int begin_shape[] = {1, 2}; + const int end_shape[] = {1, 2}; + const int strides_shape[] = {1, 2}; + const int output_shape[] = {2, 2, 2}; + float input_data[] = {1, 2, 3, 4, 5, 6}; + int32_t begin_data[] = {1, 0}; + int32_t end_data[] = {2, 2}; + int32_t strides_data[] = {1, 1}; + float golden[] = {1, 2, 4, 5}; float output_data[8]; - TestStrideSlide({2, 2, 3}, // input_shape - {1, 2}, // begin_shape - {1, 2}, // end_shape - {1, 2}, // strides_shape - 1, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4, 5, 6}, // input_data - {1, 0}, // begin_data - {2, 2}, // end_data - {1, 1}, // strides_data - {2, 2, 2}, // output_shape - output_data, // output_data - {1, 2, 4, 5}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {1, 0, 0, 0, 0}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In2D_EndMask) { + const int input_shape[] = {2, 2, 3}; + const int begin_shape[] = {1, 2}; + const int end_shape[] = {1, 2}; + const int strides_shape[] = {1, 2}; + const int output_shape[] = {2, 1, 3}; + float input_data[] = {1, 2, 3, 4, 5, 6}; + int32_t begin_data[] = {1, 0}; + int32_t end_data[] = {2, 2}; + int32_t strides_data[] = {1, 1}; + float golden[] = {4, 5, 6}; float output_data[8]; - TestStrideSlide({2, 2, 3}, // input_shape - {1, 2}, // begin_shape - {1, 2}, // end_shape - {1, 2}, // strides_shape - 0, // begin_mask - 2, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4, 5, 6}, // input_data - {1, 0}, // begin_data - {2, 2}, // end_data - {1, 1}, // strides_data - {2, 1, 3}, // output_shape - output_data, // output_data - {4, 5, 6}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {0, 2, 0, 0, 0}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In2D_NegStrideBeginMask) { + const int input_shape[] = {2, 2, 3}; + const int begin_shape[] = {1, 2}; + const int end_shape[] = {1, 2}; + const int strides_shape[] = {1, 2}; + const int output_shape[] = {2, 1, 3}; + float input_data[] = {1, 2, 3, 4, 5, 6}; + int32_t begin_data[] = {1, -2}; + int32_t end_data[] = {2, -4}; + int32_t strides_data[] = {1, -1}; + float golden[] = {6, 5, 4}; float output_data[8]; - TestStrideSlide({2, 2, 3}, // input_shape - {1, 2}, // begin_shape - {1, 2}, // end_shape - {1, 2}, // strides_shape - 2, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4, 5, 6}, // input_data - {1, -2}, // begin_data - {2, -4}, // end_data - {1, -1}, // strides_data - {2, 1, 3}, // output_shape - output_data, // output_data - {6, 5, 4}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {2, 0, 0, 0, 0}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In2D_NegStrideEndMask) { + const int input_shape[] = {2, 2, 3}; + const int begin_shape[] = {1, 2}; + const int end_shape[] = {1, 2}; + const int strides_shape[] = {1, 2}; + const int output_shape[] = {2, 1, 2}; + float input_data[] = {1, 2, 3, 4, 5, 6}; + int32_t begin_data[] = {1, -2}; + int32_t end_data[] = {2, -3}; + int32_t strides_data[] = {1, -1}; + float golden[] = {5, 4}; float output_data[8]; - TestStrideSlide({2, 2, 3}, // input_shape - {1, 2}, // begin_shape - {1, 2}, // end_shape - {1, 2}, // strides_shape - 0, // begin_mask - 2, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4, 5, 6}, // input_data - {1, -2}, // begin_data - {2, -3}, // end_data - {1, -1}, // strides_data - {2, 1, 2}, // output_shape - output_data, // output_data - {5, 4}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {0, 2, 0, 0, 0}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In3D_Identity) { + const int input_shape[] = {3, 2, 3, 2}; + const int begin_shape[] = {1, 3}; + const int end_shape[] = {1, 3}; + const int strides_shape[] = {1, 3}; + const int output_shape[] = {3, 2, 3, 2}; + float input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + int32_t begin_data[] = {0, 0, 0}; + int32_t end_data[] = {2, 3, 2}; + int32_t strides_data[] = {1, 1, 1}; + float golden[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; float output_data[16]; - TestStrideSlide( - {3, 2, 3, 2}, // input_shape - {1, 3}, // begin_shape - {1, 3}, // end_shape - {1, 3}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, // input_data - {0, 0, 0}, // begin_data - {2, 3, 2}, // end_data - {1, 1, 1}, // strides_data - {3, 2, 3, 2}, // output_shape - output_data, // output_data - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In3D_NegStride) { + const int input_shape[] = {3, 2, 3, 2}; + const int begin_shape[] = {1, 3}; + const int end_shape[] = {1, 3}; + const int strides_shape[] = {1, 3}; + const int output_shape[] = {3, 2, 3, 2}; + float input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + int32_t begin_data[] = {0, 0, 0}; + int32_t end_data[] = {2, 3, 2}; + int32_t strides_data[] = {1, 1, 1}; + float golden[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; float output_data[16]; - TestStrideSlide( - {3, 2, 3, 2}, // input_shape - {1, 3}, // begin_shape - {1, 3}, // end_shape - {1, 3}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, // input_data - {-1, -1, -1}, // begin_data - {-3, -4, -3}, // end_data - {-1, -1, -1}, // strides_data - {3, 2, 3, 2}, // output_shape - output_data, // output_data - {12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In3D_Strided2) { + const int input_shape[] = {3, 2, 3, 2}; + const int begin_shape[] = {1, 3}; + const int end_shape[] = {1, 3}; + const int strides_shape[] = {1, 3}; + const int output_shape[] = {3, 1, 2, 1}; + float input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + int32_t begin_data[] = {0, 0, 0}; + int32_t end_data[] = {2, 3, 2}; + int32_t strides_data[] = {2, 2, 2}; + float golden[] = {1, 5}; float output_data[16]; - TestStrideSlide({3, 2, 3, 2}, // input_shape - {1, 3}, // begin_shape - {1, 3}, // end_shape - {1, 3}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, // input_data - {0, 0, 0}, // begin_data - {2, 3, 2}, // end_data - {2, 2, 2}, // strides_data - {3, 1, 2, 1}, // output_shape - output_data, // output_data - {1, 5}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In1D_ShrinkAxisMask1) { - float output_data[4]; - TestStrideSlide({1, 4}, // input_shape - {1, 1}, // begin_shape - {1, 1}, // end_shape - {1, 1}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 1, // shrink_axis_mask - {1, 2, 3, 4}, // input_data - {1}, // begin_data - {2}, // end_data - {1}, // strides_data - {0}, // output_shape - output_data, // output_data - {2}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + const int input_shape[] = {3, 2, 3, 2}; + const int begin_shape[] = {1, 3}; + const int end_shape[] = {1, 3}; + const int strides_shape[] = {1, 3}; + const int output_shape[] = {3, 2, 3, 2}; + float input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + int32_t begin_data[] = {0, 0, 0}; + int32_t end_data[] = {2, 3, 2}; + int32_t strides_data[] = {1, 1, 1}; + float golden[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + float output_data[16]; + + TfLiteStridedSliceParams builtin_data = {}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In1D_ShrinkAxisMask1_NegativeSlice) { + const int input_shape[] = {1, 4}; + const int begin_shape[] = {1, 1}; + const int end_shape[] = {1, 1}; + const int strides_shape[] = {1, 1}; + const int output_shape[] = {0}; + float input_data[] = {0, 1, 2, 3}; + int32_t begin_data[] = {-1}; + int32_t end_data[] = {0}; + int32_t strides_data[] = {1}; + float golden[] = {3}; float output_data[4]; - TestStrideSlide({1, 4}, // input_shape - {1, 1}, // begin_shape - {1, 1}, // end_shape - {1, 1}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 1, // shrink_axis_mask - {0, 1, 2, 3}, // input_data - {-1}, // begin_data - {0}, // end_data - {1}, // strides_data - {0}, // output_shape - output_data, // output_data - {3}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {0, 0, 0, 0, 1}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In2D_ShrinkAxis3_NegativeSlice) { + const int input_shape[] = {2, 4, 1}; + const int begin_shape[] = {1, 2}; + const int end_shape[] = {1, 2}; + const int strides_shape[] = {1, 2}; + const int output_shape[] = {0}; + float input_data[] = {0, 1, 2, 3}; + int32_t begin_data[] = {-2, -1}; + int32_t end_data[] = {-1, 0}; + int32_t strides_data[] = {1, 1}; + float golden[] = {2}; float output_data[4]; - TestStrideSlide({2, 4, 1}, // input_shape - {1, 2}, // begin_shape - {1, 2}, // end_shape - {1, 2}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 3, // shrink_axis_mask - {0, 1, 2, 3}, // input_data - {-2, -1}, // begin_data - {-1, 0}, // end_data - {1, 1}, // strides_data - {0}, // output_shape - output_data, // output_data - {2}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {0, 0, 0, 0, 3}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In2D_ShrinkAxis2_BeginEndAxis1_NegativeSlice) { + const int input_shape[] = {2, 4, 1}; + const int begin_shape[] = {1, 2}; + const int end_shape[] = {1, 2}; + const int strides_shape[] = {1, 2}; + const int output_shape[] = {1, 4}; + float input_data[] = {0, 1, 2, 3}; + int32_t begin_data[] = {0, -1}; + int32_t end_data[] = {0, 0}; + int32_t strides_data[] = {1, 1}; + float golden[] = {0, 1, 2, 3}; float output_data[4]; - TestStrideSlide({2, 4, 1}, // input_shape - {1, 2}, // begin_shape - {1, 2}, // end_shape - {1, 2}, // strides_shape - 1, // begin_mask - 1, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 2, // shrink_axis_mask - {0, 1, 2, 3}, // input_data - {0, -1}, // begin_data - {0, 0}, // end_data - {1, 1}, // strides_data - {1, 4}, // output_shape - output_data, // output_data - {0, 1, 2, 3}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {1, 1, 0, 0, 2}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In1D_BeginMaskShrinkAxisMask1) { + const int input_shape[] = {1, 4}; + const int begin_shape[] = {1, 1}; + const int end_shape[] = {1, 1}; + const int strides_shape[] = {1, 1}; + const int output_shape[] = {0}; + float input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + int32_t begin_data[] = {1}; + int32_t end_data[] = {1}; + int32_t strides_data[] = {1}; + float golden[] = {1}; float output_data[4]; - TestStrideSlide({1, 4}, // input_shape - {1, 1}, // begin_shape - {1, 1}, // end_shape - {1, 1}, // strides_shape - 1, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 1, // shrink_axis_mask - {1, 2, 3, 4}, // input_data - {1}, // begin_data - {1}, // end_data - {1}, // strides_data - {0}, // output_shape - output_data, // output_data - {1}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {1, 0, 0, 0, 1}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In2D_ShrinkAxisMask1) { + const int input_shape[] = {2, 2, 3}; + const int begin_shape[] = {1, 2}; + const int end_shape[] = {1, 2}; + const int strides_shape[] = {1, 2}; + const int output_shape[] = {1, 3}; + float input_data[] = {1, 2, 3, 4, 5, 6}; + int32_t begin_data[] = {0, 0}; + int32_t end_data[] = {1, 3}; + int32_t strides_data[] = {1, 1}; + float golden[] = {1, 2, 3}; float output_data[6]; - TestStrideSlide({2, 2, 3}, // input_shape - {1, 2}, // begin_shape - {1, 2}, // end_shape - {1, 2}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 1, // shrink_axis_mask - {1, 2, 3, 4, 5, 6}, // input_data - {0, 0}, // begin_data - {1, 3}, // end_data - {1, 1}, // strides_data - {1, 3}, // output_shape - output_data, // output_data - {1, 2, 3}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {0, 0, 0, 0, 1}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In2D_ShrinkAxisMask2) { + const int input_shape[] = {2, 2, 3}; + const int begin_shape[] = {1, 2}; + const int end_shape[] = {1, 2}; + const int strides_shape[] = {1, 2}; + const int output_shape[] = {1, 2}; + float input_data[] = {1, 2, 3, 4, 5, 6}; + int32_t begin_data[] = {0, 0}; + int32_t end_data[] = {2, 1}; + int32_t strides_data[] = {1, 1}; + float golden[] = {1, 4}; float output_data[6]; - TestStrideSlide({2, 2, 3}, // input_shape - {1, 2}, // begin_shape - {1, 2}, // end_shape - {1, 2}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 2, // shrink_axis_mask - {1, 2, 3, 4, 5, 6}, // input_data - {0, 0}, // begin_data - {2, 1}, // end_data - {1, 1}, // strides_data - {1, 2}, // output_shape - output_data, // output_data - {1, 4}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {0, 0, 0, 0, 2}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In2D_ShrinkAxisMask3) { + const int input_shape[] = {2, 2, 3}; + const int begin_shape[] = {1, 2}; + const int end_shape[] = {1, 2}; + const int strides_shape[] = {1, 2}; + const int output_shape[] = {0}; + float input_data[] = {1, 2, 3, 4, 5, 6}; + int32_t begin_data[] = {0, 0}; + int32_t end_data[] = {1, 1}; + int32_t strides_data[] = {1, 1}; + float golden[] = {1}; float output_data[6]; - TestStrideSlide({2, 2, 3}, // input_shape - {1, 2}, // begin_shape - {1, 2}, // end_shape - {1, 2}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 3, // shrink_axis_mask - {1, 2, 3, 4, 5, 6}, // input_data - {0, 0}, // begin_data - {1, 1}, // end_data - {1, 1}, // strides_data - {0}, // output_shape - output_data, // output_data - {1}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {0, 0, 0, 0, 3}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In3D_IdentityShrinkAxis1) { + const int input_shape[] = {3, 2, 3, 2}; + const int begin_shape[] = {1, 3}; + const int end_shape[] = {1, 3}; + const int strides_shape[] = {1, 3}; + const int output_shape[] = {2, 3, 2}; + float input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + int32_t begin_data[] = {0, 0, 0}; + int32_t end_data[] = {1, 3, 2}; + int32_t strides_data[] = {1, 1, 1}; + float golden[] = {1, 2, 3, 4, 5, 6}; float output_data[16]; - TestStrideSlide({3, 2, 3, 2}, // input_shape - {1, 3}, // begin_shape - {1, 3}, // end_shape - {1, 3}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 1, // shrink_axis_mask - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, // input_data - {0, 0, 0}, // begin_data - {1, 3, 2}, // end_data - {1, 1, 1}, // strides_data - {2, 3, 2}, // output_shape - output_data, // output_data - {1, 2, 3, 4, 5, 6}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {0, 0, 0, 0, 1}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In3D_IdentityShrinkAxis2) { + const int input_shape[] = {3, 2, 3, 2}; + const int begin_shape[] = {1, 3}; + const int end_shape[] = {1, 3}; + const int strides_shape[] = {1, 3}; + const int output_shape[] = {2, 2, 2}; + float input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + int32_t begin_data[] = {0, 0, 0}; + int32_t end_data[] = {2, 1, 2}; + int32_t strides_data[] = {1, 1, 1}; + float golden[] = {1, 2, 7, 8}; float output_data[16]; - TestStrideSlide({3, 2, 3, 2}, // input_shape - {1, 3}, // begin_shape - {1, 3}, // end_shape - {1, 3}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 2, // shrink_axis_mask - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, // input_data - {0, 0, 0}, // begin_data - {2, 1, 2}, // end_data - {1, 1, 1}, // strides_data - {2, 2, 2}, // output_shape - output_data, // output_data - {1, 2, 7, 8}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {0, 0, 0, 0, 2}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In3D_IdentityShrinkAxis3) { + const int input_shape[] = {3, 2, 3, 2}; + const int begin_shape[] = {1, 3}; + const int end_shape[] = {1, 3}; + const int strides_shape[] = {1, 3}; + const int output_shape[] = {1, 2}; + float input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + int32_t begin_data[] = {0, 0, 0}; + int32_t end_data[] = {1, 1, 2}; + int32_t strides_data[] = {1, 1, 1}; + float golden[] = {1, 2}; float output_data[16]; - TestStrideSlide({3, 2, 3, 2}, // input_shape - {1, 3}, // begin_shape - {1, 3}, // end_shape - {1, 3}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 3, // shrink_axis_mask - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, // input_data - {0, 0, 0}, // begin_data - {1, 1, 2}, // end_data - {1, 1, 1}, // strides_data - {1, 2}, // output_shape - output_data, // output_data - {1, 2}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {0, 0, 0, 0, 3}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In3D_IdentityShrinkAxis4) { + const int input_shape[] = {3, 2, 3, 2}; + const int begin_shape[] = {1, 3}; + const int end_shape[] = {1, 3}; + const int strides_shape[] = {1, 3}; + const int output_shape[] = {2, 2, 3}; + float input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + int32_t begin_data[] = {0, 0, 0}; + int32_t end_data[] = {2, 3, 2}; + int32_t strides_data[] = {1, 1, 1}; + float golden[] = {1, 3, 5, 7, 9, 11}; float output_data[16]; - TestStrideSlide({3, 2, 3, 2}, // input_shape - {1, 3}, // begin_shape - {1, 3}, // end_shape - {1, 3}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 4, // shrink_axis_mask - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, // input_data - {0, 0, 0}, // begin_data - {2, 3, 1}, // end_data - {1, 1, 1}, // strides_data - {2, 2, 3}, // output_shape - output_data, // output_data - {1, 3, 5, 7, 9, 11}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {0, 0, 0, 0, 4}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In3D_IdentityShrinkAxis5) { + const int input_shape[] = {3, 2, 3, 2}; + const int begin_shape[] = {1, 3}; + const int end_shape[] = {1, 3}; + const int strides_shape[] = {1, 3}; + const int output_shape[] = {1, 3}; + float input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + int32_t begin_data[] = {0, 0, 0}; + int32_t end_data[] = {1, 3, 1}; + int32_t strides_data[] = {1, 1, 1}; + float golden[] = {1, 3, 5}; float output_data[16]; - TestStrideSlide({3, 2, 3, 2}, // input_shape - {1, 3}, // begin_shape - {1, 3}, // end_shape - {1, 3}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 5, // shrink_axis_mask - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, // input_data - {0, 0, 0}, // begin_data - {1, 3, 1}, // end_data - {1, 1, 1}, // strides_data - {1, 3}, // output_shape - output_data, // output_data - {1, 3, 5}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {0, 0, 0, 0, 5}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In3D_IdentityShrinkAxis6) { + const int input_shape[] = {3, 2, 3, 2}; + const int begin_shape[] = {1, 3}; + const int end_shape[] = {1, 3}; + const int strides_shape[] = {1, 3}; + const int output_shape[] = {1, 2}; + float input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + int32_t begin_data[] = {0, 0, 0}; + int32_t end_data[] = {2, 1, 1}; + int32_t strides_data[] = {1, 1, 1}; + float golden[] = {1, 7}; float output_data[16]; - TestStrideSlide({3, 2, 3, 2}, // input_shape - {1, 3}, // begin_shape - {1, 3}, // end_shape - {1, 3}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 6, // shrink_axis_mask - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, // input_data - {0, 0, 0}, // begin_data - {2, 1, 1}, // end_data - {1, 1, 1}, // strides_data - {1, 2}, // output_shape - output_data, // output_data - {1, 7}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {0, 0, 0, 0, 6}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In3D_IdentityShrinkAxis7) { + const int input_shape[] = {3, 2, 3, 2}; + const int begin_shape[] = {1, 3}; + const int end_shape[] = {1, 3}; + const int strides_shape[] = {1, 3}; + const int output_shape[] = {0}; + float input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + int32_t begin_data[] = {0, 0, 0}; + int32_t end_data[] = {1, 1, 1}; + int32_t strides_data[] = {1, 1, 1}; + float golden[] = {1}; float output_data[16]; - TestStrideSlide({3, 2, 3, 2}, // input_shape - {1, 3}, // begin_shape - {1, 3}, // end_shape - {1, 3}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 7, // shrink_axis_mask - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, // input_data - {0, 0, 0}, // begin_data - {1, 1, 1}, // end_data - {1, 1, 1}, // strides_data - {0}, // output_shape - output_data, // output_data - {1}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {0, 0, 0, 0, 7}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } // This tests catches a very subtle bug that was fixed by cl/188403234. TF_LITE_MICRO_TEST(RunTwice) { - float output_data[6]; - TestStrideSlide({2, 2, 3}, // input_shape - {1, 2}, // begin_shape - {1, 2}, // end_shape - {1, 2}, // strides_shape - 1, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 0, // shrink_axis_mask - {1, 2, 3, 4, 5, 6}, // input_data - {1, 0}, // begin_data - {2, 2}, // end_data - {1, 1}, // strides_data - {2, 2, 2}, // output_shape - output_data, // output_data - {1, 2, 4, 5}, // expected_output - false, // expect_prepare_err - false, // expect_invoke_err - 2 // num_invoke - ); + const int input_shape[] = {2, 2, 3}; + const int begin_shape[] = {1, 2}; + const int end_shape[] = {1, 2}; + const int strides_shape[] = {1, 2}; + const int output_shape[] = {2, 2, 2}; + float input_data[] = {1, 2, 3, 4, 5, 6}; + int32_t begin_data[] = {1, 0}; + int32_t end_data[] = {2, 2}; + int32_t strides_data[] = {1, 1}; + float golden[] = {1, 2, 4, 5}; + float output_data[16]; + + TfLiteStridedSliceParams builtin_data = {1, 0, 0, 0, 0}; + + tflite::testing::TestStridedSliceFloat( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false, 2); } TF_LITE_MICRO_TEST(In3D_IdentityShrinkAxis1Uint8) { + const int input_shape[] = {3, 2, 3, 2}; + const int begin_shape[] = {1, 3}; + const int end_shape[] = {1, 3}; + const int strides_shape[] = {1, 3}; + const int output_shape[] = {2, 3, 2}; + uint8_t input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + int32_t begin_data[] = {0, 0, 0}; + int32_t end_data[] = {1, 3, 2}; + int32_t strides_data[] = {1, 1, 1}; + uint8_t golden[] = {1, 2, 3, 4, 5, 6}; uint8_t output_data[12]; - TestStrideSlide( - {3, 2, 3, 2}, // input_shape - {1, 3}, // begin_shape - {1, 3}, // end_shape - {1, 3}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 1, // shrink_axis_mask - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, // input_data - {0, 0, 0}, // begin_data - {1, 3, 2}, // end_data - {1, 1, 1}, // strides_data - {2, 3, 2}, // output_shape - output_data, // output_data - {1, 2, 3, 4, 5, 6}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {0, 0, 0, 0, 1}; + + tflite::testing::TestStridedSliceQuantized( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TEST(In3D_IdentityShrinkAxis1int8) { + const int input_shape[] = {3, 2, 3, 2}; + const int begin_shape[] = {1, 3}; + const int end_shape[] = {1, 3}; + const int strides_shape[] = {1, 3}; + const int output_shape[] = {2, 3, 2}; + int8_t input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + int32_t begin_data[] = {0, 0, 0}; + int32_t end_data[] = {1, 3, 2}; + int32_t strides_data[] = {1, 1, 1}; + int8_t golden[] = {1, 2, 3, 4, 5, 6}; int8_t output_data[12]; - TestStrideSlide( - {3, 2, 3, 2}, // input_shape - {1, 3}, // begin_shape - {1, 3}, // end_shape - {1, 3}, // strides_shape - 0, // begin_mask - 0, // end_mask - 0, // ellipsis_mask - 0, // new_axis_mask - 1, // shrink_axis_mask - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, // input_data - {0, 0, 0}, // begin_data - {1, 3, 2}, // end_data - {1, 1, 1}, // strides_data - {2, 3, 2}, // output_shape - output_data, // output_data - {1, 2, 3, 4, 5, 6}, // expected_output - false, // expect_prepare_err - false // expect_invoke_err - ); + + TfLiteStridedSliceParams builtin_data = {0, 0, 0, 0, 1}; + + tflite::testing::TestStridedSliceQuantized( + input_shape, begin_shape, end_shape, strides_shape, &builtin_data, + input_data, begin_data, end_data, strides_data, output_shape, output_data, + golden, false); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/kernels/sub.cc b/tensorflow/lite/micro/kernels/sub.cc index 6c3dc5f917b..8ba1594932f 100644 --- a/tensorflow/lite/micro/kernels/sub.cc +++ b/tensorflow/lite/micro/kernels/sub.cc @@ -21,8 +21,10 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/quantization_util.h" #include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "tensorflow/lite/kernels/internal/types.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/op_macros.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -93,31 +95,59 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteSubParams* params, return kTfLiteOk; } +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + OpData* data = static_cast(node->user_data); + auto* params = reinterpret_cast(node->builtin_data); + + const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); + const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + TF_LITE_ENSURE_STATUS( + CalculateOpData(context, params, input1, input2, output, data)); + return kTfLiteOk; +} + void EvalSub(TfLiteContext* context, TfLiteNode* node, TfLiteSubParams* params, - const OpData* data, const TfLiteTensor* input1, - const TfLiteTensor* input2, TfLiteTensor* output) { + const OpData* data, const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRange(params->activation, &output_activation_min, &output_activation_max); tflite::ArithmeticParams op_params; SetActivationParams(output_activation_min, output_activation_max, &op_params); -#define TF_LITE_SUB(opname) \ - opname(op_params, GetTensorShape(input1), GetTensorData(input1), \ - GetTensorShape(input2), GetTensorData(input2), \ - GetTensorShape(output), GetTensorData(output)) if (data->requires_broadcast) { - TF_LITE_SUB(tflite::reference_ops::BroadcastSubSlow); + tflite::reference_ops::BroadcastSubSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } else { - TF_LITE_SUB(tflite::reference_ops::SubWithActivation); + tflite::reference_ops::SubWithActivation( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } -#undef TF_LITE_SUB } TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node, TfLiteSubParams* params, const OpData* data, - const TfLiteTensor* input1, - const TfLiteTensor* input2, - TfLiteTensor* output) { + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { tflite::ArithmeticParams op_params; op_params.left_shift = data->left_shift; @@ -133,25 +163,46 @@ TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node, SetActivationParams(data->output_activation_min, data->output_activation_max, &op_params); bool need_broadcast = reference_ops::ProcessBroadcastShapes( - GetTensorShape(input1), GetTensorShape(input2), &op_params); -#define TF_LITE_SUB(opname, dtype) \ - opname(op_params, GetTensorShape(input1), GetTensorData(input1), \ - GetTensorShape(input2), GetTensorData(input2), \ - GetTensorShape(output), GetTensorData(output)); + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); + if (output->type == kTfLiteInt8) { if (need_broadcast) { - TF_LITE_SUB(tflite::reference_ops::BroadcastSubSlow, int8_t); + tflite::reference_ops::BroadcastSubSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } else { - TF_LITE_SUB(tflite::reference_ops::Sub, int8_t); + tflite::reference_ops::Sub( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } } else { if (need_broadcast) { - TF_LITE_SUB(tflite::reference_ops::BroadcastSubSlow, uint8_t); + tflite::reference_ops::BroadcastSubSlow( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } else { - TF_LITE_SUB(tflite::reference_ops::Sub, uint8_t); + tflite::reference_ops::Sub( + op_params, tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorData(input2), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } } -#undef TF_LITE_SUB } return kTfLiteOk; @@ -160,13 +211,15 @@ TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node, TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); - const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); - const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); - OpData data; - TF_LITE_ENSURE_STATUS( - CalculateOpData(context, params, input1, input2, output, &data)); + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); if (output->type == kTfLiteFloat32) { EvalSub(context, node, params, &data, input1, input2, output); @@ -185,9 +238,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace sub TfLiteRegistration Register_SUB() { - return {/*init=*/nullptr, + return {/*init=*/sub::Init, /*free=*/nullptr, - /*prepare=*/nullptr, + /*prepare=*/sub::Prepare, /*invoke=*/sub::Eval, /*profiling_string=*/nullptr, /*builtin_code=*/0, diff --git a/tensorflow/lite/micro/kernels/sub_test.cc b/tensorflow/lite/micro/kernels/sub_test.cc index 9c8d476352e..fdfe4234c64 100644 --- a/tensorflow/lite/micro/kernels/sub_test.cc +++ b/tensorflow/lite/micro/kernels/sub_test.cc @@ -17,7 +17,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -66,47 +66,21 @@ void ValidateSubGoldens(TfLiteTensor* tensors, int tensors_size, const T* golden, T* output, int output_size, TfLiteFusedActivation activation, float tolerance = 1e-5) { - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(::tflite::BuiltinOperator_SUB); - - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - TfLiteSubParams builtin_data; builtin_data.activation = activation; - const char* init_data = reinterpret_cast(&builtin_data); - const size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } - int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = tflite::ops::micro::Register_SUB(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, &builtin_data, + micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output_size; ++i) { TF_LITE_MICRO_EXPECT_NEAR(golden[i], output[i], tolerance); From 9872ddd15a66986d1fe7c4f756609b78540db3d4 Mon Sep 17 00:00:00 2001 From: Russell Power Date: Mon, 27 Jul 2020 15:28:42 -0700 Subject: [PATCH 1414/2522] Adjust TPU build dependencies. PiperOrigin-RevId: 323453889 Change-Id: I6b33b57830d3414d45aaa066735c3b52f217e739 --- tensorflow/compiler/jit/BUILD | 2 + .../jit/encapsulate_subgraphs_pass_test.cc | 53 +++++++++++---- .../jit/extract_outside_compilation_pass.cc | 1 + .../extract_outside_compilation_pass_test.cc | 13 ---- tensorflow/core/BUILD | 1 + .../base_api/api_def_DataServiceDataset.pbtxt | 3 + .../base_api/api_def_KthOrderStatistic.pbtxt | 20 ++++++ .../api_def/base_api/api_def_MakeUnique.pbtxt | 10 +++ .../api_def/base_api/api_def_TPUCompile.pbtxt | 21 ++++++ .../api_def_TPUCompileSucceededAssert.pbtxt | 9 +++ .../api_def/base_api/api_def_TPUExecute.pbtxt | 7 ++ ...api_def_TPUExecuteAndUpdateVariables.pbtxt | 13 ++++ .../api_def_TPUPartitionedInput.pbtxt | 23 +++++++ .../api_def_TPUPartitionedOutput.pbtxt | 25 +++++++ .../api_def/base_api/api_def_TopKUnique.pbtxt | 18 +++++ .../base_api/api_def_TopKWithUnique.pbtxt | 10 +++ .../base_api/api_def_XlaHostCompute.pbtxt | 66 +++++++++++++++++++ .../base_api/api_def_XlaRecvFromHost.pbtxt | 3 + .../base_api/api_def_XlaSendToHost.pbtxt | 12 ++++ tensorflow/core/common_runtime/BUILD | 3 +- tensorflow/core/tpu/BUILD | 59 ++++++++--------- tensorflow/core/tpu/kernels/BUILD | 41 +++++++----- tensorflow/core/tpu/kernels/tpu_execute_op.cc | 21 +++--- tensorflow/core/tpu/ops/BUILD | 18 +++++ tensorflow/core/tpu/ops/host_compute_ops.cc | 55 +++------------- tensorflow/core/tpu/ops/topk_ops.cc | 53 ++------------- tensorflow/core/tpu/ops/tpu_compile_op.cc | 48 +------------- tensorflow/core/tpu/ops/tpu_execute_op.cc | 15 +---- .../core/tpu/ops/tpu_partitioned_input_op.cc | 10 +-- .../core/tpu/ops/tpu_partitioned_output_op.cc | 11 +--- tensorflow/core/tpu/tpu_execute.cc | 5 -- tensorflow/core/tpu/tpu_system_device.cc | 1 - tensorflow/python/BUILD | 4 +- tensorflow/stream_executor/tpu/BUILD | 18 ++--- .../stream_executor/tpu/c_api_conversions.cc | 2 + .../stream_executor/tpu/tpu_executor_c_api.h | 1 - 36 files changed, 393 insertions(+), 282 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_DataServiceDataset.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_KthOrderStatistic.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_MakeUnique.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TPUCompile.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TPUCompileSucceededAssert.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TPUExecute.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TPUExecuteAndUpdateVariables.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TPUPartitionedInput.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TPUPartitionedOutput.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TopKUnique.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TopKWithUnique.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_XlaHostCompute.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_XlaRecvFromHost.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_XlaSendToHost.pbtxt diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index ccb83954d24..b52a350dc48 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -225,6 +225,8 @@ cc_library( "xla_device_context.h", "xla_device_ops.h", ], + # Public visibility is needed for external TF/XLA backends. + visibility = ["//visibility:public"], deps = XLA_DEVICE_DEPS, ) diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc index 6640a5d5dba..efd2ef24c3b 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc @@ -295,19 +295,6 @@ bool EqualFunctionDefLibrary(const FunctionDefLibrary& expected, << diff << "\nActual: " << actual.DebugString(); \ } while (false) -// These dummy Op registrations are here because the real Op registrations live -// in contrib and there can't be a dependence from this test to contrib. -REGISTER_OP("XlaHostCompute") - .Input("inputs: Tinputs") - .Output("outputs: Toutputs") - .Attr("Tinputs: list(type) >= 0") - .Attr("Toutputs: list(type) >= 0") - .Attr("ancestors: list(string) >= 0") - .Attr("key: string") - .Attr("shape_inference_graph: func") - .Attr("shapes: list(shape) >= 0") - .SetShapeFn(::tensorflow::shape_inference::UnknownShape); - REGISTER_OP("InputTest") .Output("o: float") .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) { @@ -947,6 +934,8 @@ TEST(EncapsulateSubgraphsTest, OneFunctionOneOutside) { {"ancestors", absl::Span({})}, {"key", "host_compute_channel_F1_F1_O1"}, {"shape_inference_graph", shape_inference_graph}, + {"tpu_core", 0}, + {"cost_estimate_ns", 1000000}, {"shapes", absl::Span({})}, {"_outside_compilation_subgraph", "O1"}, {"_xla_token_input_nodes", @@ -1114,6 +1103,8 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) { {"ancestors", absl::Span({})}, {"key", "host_compute_channel_F1_F1_O2"}, {"shape_inference_graph", shape_inference_graph2}, + {"tpu_core", 0}, + {"cost_estimate_ns", 1000000}, {"shapes", absl::Span({})}, {"_outside_compilation_subgraph", "O2"}, {"_xla_token_input_nodes", @@ -1130,6 +1121,8 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) { {"ancestors", absl::Span({})}, {"key", "host_compute_channel_F1_F1_O1"}, {"shape_inference_graph", shape_inference_graph1}, + {"tpu_core", 0}, + {"cost_estimate_ns", 1000000}, {"shapes", absl::Span({})}, {"_outside_compilation_subgraph", "O1"}, {"_xla_token_input_nodes", @@ -1266,6 +1259,8 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) { {"ancestors", absl::Span({})}, {"key", "host_compute_channel_F1_F1_O1"}, {"shape_inference_graph", NameAttrList()}, + {"tpu_core", 0}, + {"cost_estimate_ns", 1000000}, {"shapes", absl::Span({shape_proto_expected})}, {"_outside_compilation_subgraph", "O1"}, @@ -1295,6 +1290,8 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) { {"ancestors", absl::Span({})}, {"key", "host_compute_channel_F2_F2_O1"}, {"shape_inference_graph", NameAttrList()}, + {"tpu_core", 0}, + {"cost_estimate_ns", 1000000}, {"shapes", absl::Span({shape_proto_expected})}, {"_outside_compilation_subgraph", "O1"}, @@ -1428,6 +1425,8 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutsideDependencyFromOutside) { {"ancestors", absl::Span({})}, {"key", "host_compute_channel_F1_F1_O1"}, {"shape_inference_graph", NameAttrList()}, + {"tpu_core", 0}, + {"cost_estimate_ns", 1000000}, {"shapes", absl::Span({shape_proto_expected})}, {"_outside_compilation_subgraph", "O1"}, @@ -1454,6 +1453,8 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutsideDependencyFromOutside) { {"ancestors", absl::Span({})}, {"key", "host_compute_channel_F2_F2_O1"}, {"shape_inference_graph", NameAttrList()}, + {"tpu_core", 0}, + {"cost_estimate_ns", 1000000}, {"shapes", absl::Span({shape_proto_expected})}, {"_outside_compilation_subgraph", "O1"}, @@ -1566,6 +1567,8 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoInputs) { {"ancestors", absl::Span({})}, {"key", "host_compute_channel_F1_F1_O1"}, {"shape_inference_graph", NameAttrList()}, + {"tpu_core", 0}, + {"cost_estimate_ns", 1000000}, {"shapes", absl::Span({shape_proto_expected})}, {"_outside_compilation_subgraph", "O1"}, @@ -1658,6 +1661,8 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlInput) { {"ancestors", absl::Span({})}, {"key", "host_compute_channel_F1_F1_O1"}, {"shape_inference_graph", NameAttrList()}, + {"tpu_core", 0}, + {"cost_estimate_ns", 1000000}, {"shapes", absl::Span({shape_proto_expected})}, {"_outside_compilation_subgraph", "O1"}, @@ -1765,6 +1770,8 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoOutputs) { {"ancestors", absl::Span({})}, {"key", "host_compute_channel_F1_F1_O1"}, {"shape_inference_graph", shape_inference_graph}, + {"tpu_core", 0}, + {"cost_estimate_ns", 1000000}, {"shapes", absl::Span({})}, {"_outside_compilation_subgraph", "O1"}, {"_xla_token_input_nodes", @@ -1875,6 +1882,8 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlOutput) { {"ancestors", absl::Span({})}, {"key", "host_compute_channel_F1_F1_O1"}, {"shape_inference_graph", shape_inference_graph}, + {"tpu_core", 0}, + {"cost_estimate_ns", 1000000}, {"shapes", absl::Span({})}, {"_outside_compilation_subgraph", "O1"}, {"_xla_token_input_nodes", @@ -2009,6 +2018,8 @@ TEST(EncapsulateSubgraphsTest, {"ancestors", absl::Span({})}, {"key", "host_compute_channel_F1_F1_O1"}, {"shape_inference_graph", shape_inference_graph1}, + {"tpu_core", 0}, + {"cost_estimate_ns", 1000000}, {"shapes", absl::Span({})}, {"_outside_compilation_subgraph", "O1"}, {"_xla_token_input_nodes", @@ -2023,6 +2034,8 @@ TEST(EncapsulateSubgraphsTest, {"ancestors", absl::Span({})}, {"key", "host_compute_channel_F1_F1_O2"}, {"shape_inference_graph", shape_inference_graph2}, + {"tpu_core", 0}, + {"cost_estimate_ns", 1000000}, {"shapes", absl::Span({})}, {"_outside_compilation_subgraph", "O2"}, {"_xla_token_input_nodes", @@ -2153,6 +2166,8 @@ TEST(EncapsulateSubgraphsTest, {"ancestors", absl::Span({})}, {"key", "host_compute_channel_F1_F1_O2"}, {"shape_inference_graph", NameAttrList()}, + {"tpu_core", 0}, + {"cost_estimate_ns", 1000000}, {"shapes", absl::Span({})}, {"_outside_compilation_subgraph", "O2"}, {"_xla_token_input_nodes", @@ -2169,6 +2184,8 @@ TEST(EncapsulateSubgraphsTest, {"ancestors", absl::Span({})}, {"key", "host_compute_channel_F1_F1_O1"}, {"shape_inference_graph", shape_inference_graph}, + {"tpu_core", 0}, + {"cost_estimate_ns", 1000000}, {"shapes", absl::Span({})}, {"_outside_compilation_subgraph", "O1"}, {"_xla_token_input_nodes", @@ -2296,6 +2313,8 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationClusterDependency) { {"ancestors", absl::Span({})}, {"key", "host_compute_channel_F1_F1_O1"}, {"shape_inference_graph", shape_inference_graph}, + {"tpu_core", 0}, + {"cost_estimate_ns", 1000000}, {"shapes", absl::Span({})}, {"_outside_compilation_subgraph", "O1"}, {"_xla_token_input_nodes", @@ -2310,6 +2329,8 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationClusterDependency) { {"ancestors", absl::Span({})}, {"key", "host_compute_channel_F1_F1_O2"}, {"shape_inference_graph", NameAttrList()}, + {"tpu_core", 0}, + {"cost_estimate_ns", 1000000}, {"shapes", absl::Span({})}, {"_outside_compilation_subgraph", "O2"}, {"_xla_token_input_nodes", @@ -2325,6 +2346,8 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationClusterDependency) { {"ancestors", absl::Span({})}, {"key", "host_compute_channel_F1_F1_O3"}, {"shape_inference_graph", NameAttrList()}, + {"tpu_core", 0}, + {"cost_estimate_ns", 1000000}, {"shapes", absl::Span({})}, {"_outside_compilation_subgraph", "O3"}, {"_xla_token_input_nodes", @@ -2451,6 +2474,8 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoInputsOrOutputs) { {"ancestors", absl::Span({})}, {"key", "host_compute_channel_F1_F1_O1"}, {"shape_inference_graph", shape_inference_graph}, + {"tpu_core", 0}, + {"cost_estimate_ns", 1000000}, {"shapes", absl::Span({})}, {"_outside_compilation_subgraph", "O1"}, {"_xla_token_input_nodes", @@ -2567,6 +2592,8 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) { {"ancestors", absl::Span({})}, {"key", "host_compute_channel_F1_F1_O1"}, {"shape_inference_graph", shape_inference_graph}, + {"tpu_core", 0}, + {"cost_estimate_ns", 1000000}, {"shapes", absl::Span({})}, {"_outside_compilation_subgraph", "O1"}, {"_xla_token_input_nodes", diff --git a/tensorflow/compiler/jit/extract_outside_compilation_pass.cc b/tensorflow/compiler/jit/extract_outside_compilation_pass.cc index 5f1c3d536a8..f0d8a27b2c9 100644 --- a/tensorflow/compiler/jit/extract_outside_compilation_pass.cc +++ b/tensorflow/compiler/jit/extract_outside_compilation_pass.cc @@ -2420,6 +2420,7 @@ Status ExtractOutsideCompilationForFunction( auto updated_fdef = absl::make_unique(); TF_RETURN_IF_ERROR( GraphToFunctionDef(*g, new_func_name, updated_fdef.get())); + updated_fdef->mutable_signature()->set_is_stateful(true); const FunctionDef* original_fdef = fld->Find(func_name); if (original_fdef) { for (const auto& attr : original_fdef->attr()) { diff --git a/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc b/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc index a6f2bd41275..b727dfc72fc 100644 --- a/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc +++ b/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc @@ -422,19 +422,6 @@ TEST_F(ExtractOutsideCompilationForFunctionTest, NoHostGraph) { EXPECT_EQ(fld.Find("host_graph"), nullptr); } -REGISTER_OP("XlaSendToHost") - .Input("input: Tinput") - .Attr("Tinput: type") - .Attr("key: string") - .SetIsStateful(); - -REGISTER_OP("XlaRecvFromHost") - .Output("output: Toutput") - .Attr("Toutput: type") - .Attr("shape: shape") - .Attr("key: string") - .SetIsStateful(); - TEST_F(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInIf) { // Build the XLA computation func. // "const0" (bool) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 9c6fd9f3632..55c45a42272 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -880,6 +880,7 @@ cc_library( ":tpu_outfeed_ops_op_lib", ":tpu_ordinal_selector_ops_op_lib", ":tpu_replication_ops_op_lib", + "//tensorflow/core/tpu/ops", ], ) + if_mkl([ ":mkl_array_ops_op_lib", diff --git a/tensorflow/core/api_def/base_api/api_def_DataServiceDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_DataServiceDataset.pbtxt new file mode 100644 index 00000000000..3801878cd71 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_DataServiceDataset.pbtxt @@ -0,0 +1,3 @@ +op { + graph_op_name: "DataServiceDataset" +} diff --git a/tensorflow/core/api_def/base_api/api_def_KthOrderStatistic.pbtxt b/tensorflow/core/api_def/base_api/api_def_KthOrderStatistic.pbtxt new file mode 100644 index 00000000000..a7485f9a144 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_KthOrderStatistic.pbtxt @@ -0,0 +1,20 @@ +op { + graph_op_name: "KthOrderStatistic" + summary: "Computes the Kth order statistic of a data set. The current" + description: <Deallocate(buffers.device_ordinal(), buffer); - if (!status.ok()) { - LOG(ERROR) << "Error deallocating buffer " << status; - } - } - }); + buffers.buffers().ForEachElement( + [&](const xla::ShapeIndex& index, const se::DeviceMemoryBase& buffer) { + if (owned_buffers.element(index) && !buffer.is_null()) { + Status status = + memory_allocator->Deallocate(buffers.device_ordinal(), buffer); + if (!status.ok()) { + LOG(ERROR) << "Error deallocating buffer " << status; + } + } + }); } // Which of the buffers do we own? diff --git a/tensorflow/core/tpu/ops/BUILD b/tensorflow/core/tpu/ops/BUILD index 5bb2b644dd2..5b49f5abc78 100644 --- a/tensorflow/core/tpu/ops/BUILD +++ b/tensorflow/core/tpu/ops/BUILD @@ -3,12 +3,26 @@ package( licenses = ["notice"], # Apache 2.0 ) +cc_library( + name = "ops", + linkstatic = 1, + deps = [ + ":host_compute_ops", + ":topk_ops", + ":tpu_compile_op", + ":tpu_execute_op", + ":tpu_partitioned_ops", + ], + alwayslink = 1, +) + cc_library( name = "tpu_partitioned_ops", srcs = [ "tpu_partitioned_input_op.cc", "tpu_partitioned_output_op.cc", ], + linkstatic = 1, deps = [ "//tensorflow/core:framework", "//tensorflow/core:graph", @@ -22,6 +36,7 @@ cc_library( srcs = [ "tpu_compile_op.cc", ], + linkstatic = 1, deps = [ "//tensorflow/core:framework", "//tensorflow/core:graph", @@ -35,6 +50,7 @@ cc_library( srcs = [ "tpu_execute_op.cc", ], + linkstatic = 1, deps = [ "//tensorflow/core:framework", "//tensorflow/core:graph", @@ -48,6 +64,7 @@ cc_library( srcs = [ "host_compute_ops.cc", ], + linkstatic = 1, deps = [ "//tensorflow/core:framework", "//tensorflow/core:lib", @@ -61,6 +78,7 @@ cc_library( srcs = [ "topk_ops.cc", ], + linkstatic = 1, deps = [ "//tensorflow/core:framework", "//tensorflow/core:lib", diff --git a/tensorflow/core/tpu/ops/host_compute_ops.cc b/tensorflow/core/tpu/ops/host_compute_ops.cc index 3c7994ccf2e..c83c5159f62 100644 --- a/tensorflow/core/tpu/ops/host_compute_ops.cc +++ b/tensorflow/core/tpu/ops/host_compute_ops.cc @@ -30,17 +30,10 @@ REGISTER_OP("_HostComputeMlir") .Attr("Toutputs: list(type) >= 0") .Attr("key: string") .Attr("tpu_core: int = 0") - .SetIsStateful() - .Doc(R"doc( -A host-side computation called from a TPU device. - -inputs: A list of tensors that will be sent to the host. -outputs: A list of tensors that will be returned to the device. -Tinputs: The element types of each element in `inputs`. -Toutputs: The element types of each element in `outputs`. -key: A unique identifier for this region used to match up host transfers. -tpu_core: Default core to use for host to device transfers. -)doc"); + .SetShapeFn([](shape_inference::InferenceContext* c) { + return ::tensorflow::shape_inference::UnknownShape(c); + }) + .SetIsStateful(); REGISTER_OP("XlaHostCompute") .Input("inputs: Tinputs") @@ -78,36 +71,16 @@ REGISTER_OP("XlaHostCompute") // statically known. return ::tensorflow::shape_inference::UnknownShape(c); } - }) - .Doc(R"doc( -A pseudo-op to represent host-side computation in an XLA program. - -inputs: A list of tensors that will be sent to the host. -outputs: A list of tensors that will be returned to the device. -Tinputs: The element types of each element in `inputs`. -Toutputs: The element types of each element in `outputs`. -ancestors: A list of names of HostCompute computations that must be -sequenced before this computation. -shape_inference_graph: If non-empty, a serialized GraphDef representing a graph -that must be analyzed at compile time to determine the shapes of the outputs. -shapes: If shape_inference_graph is empty, a list of the shapes of `outputs`. -key: A unique identifier for this region used to match up host transfers. -cost_estimate_ns: Estimated duration of the host computation in nanoseconds. -tpu_core: Default core to use for host to device transfers. -)doc"); + }); REGISTER_OP("XlaSendToHost") .Input("input: Tinput") .Attr("Tinput: type") .Attr("key: string") - .SetIsStateful() - .Doc(R"doc( -An op to send a tensor to the host. - -input: the tensor that will be sent to the host. -Tinput: element type for input. -key: A unique identifier for this region used to match up host transfers. -)doc"); + .SetShapeFn([](shape_inference::InferenceContext* c) { + return ::tensorflow::shape_inference::UnknownShape(c); + }) + .SetIsStateful(); REGISTER_OP("XlaRecvFromHost") .Output("output: Toutput") @@ -127,14 +100,6 @@ REGISTER_OP("XlaRecvFromHost") c->MakeShapeFromShapeProto(shape_attr->shape(), &handle)); c->set_output(0, handle); return Status::OK(); - }) - .Doc(R"doc( -An op to receive a tensor from the host. - -output: the tensor that will be received from the host. -Toutput: element type for output. -shape: shape for output. -key: A unique identifier for this region used to match up host transfers. -)doc"); + }); } // namespace tensorflow diff --git a/tensorflow/core/tpu/ops/topk_ops.cc b/tensorflow/core/tpu/ops/topk_ops.cc index 1656351690d..56a00253759 100644 --- a/tensorflow/core/tpu/ops/topk_ops.cc +++ b/tensorflow/core/tpu/ops/topk_ops.cc @@ -33,24 +33,7 @@ REGISTER_OP("KthOrderStatistic") TF_RETURN_IF_ERROR(c->Subshape(input, 0, -1, &s)); c->set_output(0, s); return Status::OK(); - }) - .Doc(R"doc( -Computes the Kth order statistic of a data set. The current -implementation uses a binary search requiring exactly 32 passes over -the input data. The running time is linear with respect to input -size. The median-of-medians algorithm is probably faster, but is -difficult to implement efficiently in XLA. The implementation imposes -a total ordering on floats. The ordering is consistent with the usual -partial order. Positive NaNs are greater than positive -infinity. Negative NaNs are less than negative infinity. NaNs with -distinct payloads are treated as distinct. Subnormal numbers are -preserved (not flushed to zero). Positive infinity is greater than all -numbers. Negative infinity is less than all numbers. Positive is -greater than negative zero. There are less than k values greater than -the kth order statistic. There are at least k values greater than or -equal to the Kth order statistic. The semantics are not the same as -top_k_unique. -)doc"); + }); REGISTER_OP("TopKUnique") .Input("input: float32") @@ -69,22 +52,7 @@ REGISTER_OP("TopKUnique") c->set_output(0, s); c->set_output(1, s); return Status::OK(); - }) - .Doc(R"doc( -Returns the TopK unique values in the array in sorted order. The -running time is proportional to the product of K and the input -size. Sorting the whole array is more efficient for sufficiently large -values of K. The median-of-medians algorithm is probably faster, but -difficult to implement efficiently in XLA. If there are fewer than K -unique numbers (not NANs), the results are padded with negative -infinity. NaNs are never returned. Subnormal numbers are flushed to -zero. If an element appears at multiple indices, the highest index is -returned. If a TopK element never appears in the input due to padding -values, the indices are padded with negative one. If a padding value -appears in the input and padding is needed, the highest index of the -padding value will be returned. The semantics are not the same as -kth_order_statistic. -)doc"); + }); REGISTER_OP("MakeUnique") .Input("input: float32") @@ -94,14 +62,7 @@ REGISTER_OP("MakeUnique") TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &input)); c->set_output(0, input); return Status::OK(); - }) - .Doc(R"doc( -Make all elements in the non-Batch dimension unique, but \"close\" to -their initial value. Never returns a sub-normal number. Never returns -zero. The sign of each input element is always identical to the sign -of the corresponding output element. Behavior for infinite elements is -undefined. Behavior for subnormal elements is undefined. -)doc"); + }); REGISTER_OP("TopKWithUnique") .Input("input: float32") @@ -120,11 +81,5 @@ REGISTER_OP("TopKWithUnique") c->set_output(0, s); c->set_output(1, s); return Status::OK(); - }) - .Doc(R"doc( -Returns the TopK values in the array in sorted order. This is a combination -of MakeUnique and TopKUnique. The returned top-K will have its lower bits -replaced by iota, thus it will be close to the original value but not exactly -the same. The running time is proportional to the product of K and the input -size. NaNs are never returned. Subnormal numbers are flushed to zero.)doc"); + }); } // namespace tensorflow diff --git a/tensorflow/core/tpu/ops/tpu_compile_op.cc b/tensorflow/core/tpu/ops/tpu_compile_op.cc index 9f68406dbba..b18b34e5f88 100644 --- a/tensorflow/core/tpu/ops/tpu_compile_op.cc +++ b/tensorflow/core/tpu/ops/tpu_compile_op.cc @@ -43,23 +43,7 @@ REGISTER_OP("_TPUCompileMlir") c->set_output(i + 1, c->Vector(2)); } return Status::OK(); - }) - .Doc( - R"( -Compiles a computations for execution on one or more TPU devices. -For the internal use of the distributed TPU compiler. Note that currently only -single TPU device is supported. - -'mlir_module' is a serialized MLIR module with a `main` function that contains -target computation. -'dynamic_shapes' contains dynamic shapes of arguments whose shapes were not -known statically at TPUReplication rewrite time. -'metadata' is a serialized TPUCompileMetadataProto describing -the shapes and types of the inputs to the computation, as well as a mapping onto -the TPU pod topology. -'program' output is a string key that is passed to the _TPUExecute op and -used to look up the program in the compilation cache. -)"); + }); REGISTER_OP("TPUCompile") .Attr("num_computations: int >= 0") @@ -91,39 +75,13 @@ REGISTER_OP("TPUCompile") c->set_output(num_computations + i + 1, c->Scalar()); } return Status::OK(); - }) - .Doc( - R"( -Compiles a computations for execution on one or more TPU devices. -For the internal use of the distributed TPU compiler. - -'num_computations' is the number of computations to be compiled. -'function' is a function containing the computation to compile. -'dynamic_shapes' contains dynamic shapes of arguments whose shapes were not -known statically at TPUReplication rewrite time. -'guaranteed_constants' is a list of tensors which have been guaranteed to not -change their values during the session lifetime. These contain tensors marked as -constant using the GuaranteeConstOp. -'metadata' is a serialized TPUCompileMetadataProto describing -the shapes and types of the inputs to the computation, as well as a mapping onto -the TPU pod topology. -Each 'program' output is a string key that is passed to the _TPUExecute op and -used to look up the program in the compilation cache. -'may_modify_variables' indicates whether variables may be modified. -)"); + }); REGISTER_OP("TPUCompileSucceededAssert") .Input("compilation_status: string") // Do not optimize me away. Read the comment on TPUCompileOp for more // details. .SetIsStateful() - .SetShapeFn(shape_inference::NoOutputs) - .Doc( - R"( -Asserts that compilation succeeded. This op produces no output and closes the -device during failure to ensure all pending device interactions fail. - -'compilation_status' is a serialized CompilationResultProto. - )"); + .SetShapeFn(shape_inference::NoOutputs); } // namespace tensorflow diff --git a/tensorflow/core/tpu/ops/tpu_execute_op.cc b/tensorflow/core/tpu/ops/tpu_execute_op.cc index 6d42c35fc7b..68ddc862031 100644 --- a/tensorflow/core/tpu/ops/tpu_execute_op.cc +++ b/tensorflow/core/tpu/ops/tpu_execute_op.cc @@ -35,10 +35,7 @@ REGISTER_OP("TPUExecute") c->set_output(i, c->UnknownShape()); } return Status::OK(); - }) - .Doc(R"( -Op that loads and executes a TPU program on a TPU device. -For the internal use of the distributed TPU compiler.)"); + }); REGISTER_OP("TPUExecuteAndUpdateVariables") .Input("args: Targs") @@ -58,14 +55,6 @@ REGISTER_OP("TPUExecuteAndUpdateVariables") c->set_output(i, c->UnknownShape()); } return Status::OK(); - }) - .Doc(R"(Op that executes a program with optional in-place variable updates. -It (optionally) reads device variables, loads and executes a TPU program on a -TPU device, and then (optionally) in-place updates variables using the program -outputs, as specified in attributes device_var_reads_indices (program input -indices from directly reading variables) and device_var_updates_indices (program -output indices used to update variables, -1 means no-update/read-only). Such -program outputs are consumed by these variables will not appear in the op -output. For the internal use of the distributed TPU compiler.)"); + }); } // namespace tensorflow diff --git a/tensorflow/core/tpu/ops/tpu_partitioned_input_op.cc b/tensorflow/core/tpu/ops/tpu_partitioned_input_op.cc index acaed3d072c..f6ccf279956 100644 --- a/tensorflow/core/tpu/ops/tpu_partitioned_input_op.cc +++ b/tensorflow/core/tpu/ops/tpu_partitioned_input_op.cc @@ -94,14 +94,6 @@ REGISTER_OP("TPUPartitionedInput") } return Status::OK(); - }) - .Doc(R"doc( -An op that groups a list of partitioned inputs together. This op - -inputs: A list of partitioned inputs which must have the same shape. -output: A handle which represents the full shape of partitioned tensors. -partition_dim: An integer describles which dimension is partitioned. -1 means - those inputs are replicated. -)doc"); + }); } // namespace tensorflow diff --git a/tensorflow/core/tpu/ops/tpu_partitioned_output_op.cc b/tensorflow/core/tpu/ops/tpu_partitioned_output_op.cc index 69ea48d242a..5282abff679 100644 --- a/tensorflow/core/tpu/ops/tpu_partitioned_output_op.cc +++ b/tensorflow/core/tpu/ops/tpu_partitioned_output_op.cc @@ -23,7 +23,6 @@ namespace tensorflow { using shape_inference::InferenceContext; using shape_inference::ShapeHandle; - REGISTER_OP("TPUPartitionedOutput") .Input("inputs: T") .Output("output: num_splits * T") @@ -53,14 +52,6 @@ REGISTER_OP("TPUPartitionedOutput") c->set_output(i, newoutput0); } return Status::OK(); - }) - .Doc(R"doc( -An op that demultiplexes a tensor to be sharded by XLA to a list of partitioned -outputs outside the XLA computation. - -inputs: A tensor which represents the full shape of partitioned tensors. -output: A list of partitioned inputs which must have the same shape. -partition_dim: An integer describles which dimension is partitioned. -)doc"); + }); } // namespace tensorflow diff --git a/tensorflow/core/tpu/tpu_execute.cc b/tensorflow/core/tpu/tpu_execute.cc index 99547cafc82..29a05c0d538 100644 --- a/tensorflow/core/tpu/tpu_execute.cc +++ b/tensorflow/core/tpu/tpu_execute.cc @@ -23,7 +23,6 @@ limitations under the License. #include "absl/base/casts.h" #include "absl/memory/memory.h" -#include "tensorflow/compiler/jit/xla_device.h" #include "tensorflow/compiler/xla/executable_run_options.h" #include "tensorflow/compiler/xla/service/computation_layout.h" #include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" @@ -419,10 +418,6 @@ xla::StatusOr TPUExecute( xla::Backend* backend = node_context->backend(); - XlaDevice* device = - tensorflow::down_cast(ctx->device()->UnderlyingDevice()); - TF_RET_CHECK(device); - // Create a HostTransferManager to handle Send/Recv operations from the TPU. std::shared_ptr host_transfer_manager = std::make_shared(node_context, backend); diff --git a/tensorflow/core/tpu/tpu_system_device.cc b/tensorflow/core/tpu/tpu_system_device.cc index de72021da06..7a6c4e949e3 100644 --- a/tensorflow/core/tpu/tpu_system_device.cc +++ b/tensorflow/core/tpu/tpu_system_device.cc @@ -15,7 +15,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/graph/types.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/public/session_options.h" #include "tensorflow/core/tpu/virtual_device.h" diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index a10913b6c4b..766bc35e4bd 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3,7 +3,7 @@ # ":platform" - Low-level and platform-specific Python code. load("//tensorflow:tensorflow.bzl", "py_strict_library") -load("//tensorflow:tensorflow.bzl", "cc_header_only_library", "if_mlir", "if_not_windows", "if_tpu", "if_xla_available", "py_test", "py_tests", "tf_cc_shared_object", "tf_cc_test", "tf_cuda_library", "tf_enable_mlir_bridge", "tf_gen_op_wrapper_py") +load("//tensorflow:tensorflow.bzl", "cc_header_only_library", "if_mlir", "if_not_windows", "if_xla_available", "py_test", "py_tests", "tf_cc_shared_object", "tf_cc_test", "tf_cuda_library", "tf_enable_mlir_bridge", "tf_gen_op_wrapper_py") # buildifier: disable=same-origin-load load("//tensorflow:tensorflow.bzl", "tf_monitoring_python_deps") @@ -6093,8 +6093,6 @@ pywrap_tensorflow_macro( "@ngraph_tf//:ngraph_tf", ]) + if_xla_available([ "//tensorflow/compiler/aot:tfcompile_lib", - ]) + if_tpu([ - "//tensorflow/core/tpu:tpu_api_dlsym_initializer", ]) + if_static(extra_deps = ["//tensorflow/core/platform:tf32_utils"]), ) diff --git a/tensorflow/stream_executor/tpu/BUILD b/tensorflow/stream_executor/tpu/BUILD index adee7546c7c..813c0fa87a9 100644 --- a/tensorflow/stream_executor/tpu/BUILD +++ b/tensorflow/stream_executor/tpu/BUILD @@ -68,7 +68,7 @@ cc_library( deps = [ ":c_api_decl", ":tpu_executor_c_api_hdrs", - "//tensorflow/core/platform:status", + "//tensorflow/core:lib", "//tensorflow/core/tpu:tpu_api", ], ) @@ -103,9 +103,7 @@ cc_library( ":tpu_executor_interface", ":tpu_platform_interface", ":tpu_stream_interface", - "//tensorflow/core/platform:casts", - "//tensorflow/core/platform:mutex", - "//tensorflow/core/platform:types", + "//tensorflow/core:lib", "//tensorflow/core/tpu:tpu_api", "//tensorflow/stream_executor", "//tensorflow/stream_executor/lib", @@ -131,8 +129,6 @@ cc_library( ":status_helper", ":tpu_executor_c_api_hdrs", "//tensorflow/core:lib", - "//tensorflow/core/platform:mutex", - "//tensorflow/core/platform:types", "//tensorflow/core/tpu:tpu_api", "//tensorflow/stream_executor", "//tensorflow/stream_executor/lib", @@ -164,8 +160,6 @@ cc_library( "//tensorflow/c:tf_status", "//tensorflow/c:tf_status_helper", "//tensorflow/core:lib", - "//tensorflow/core/platform:mutex", - "//tensorflow/core/platform:types", "//tensorflow/core/tpu:tpu_api", "//tensorflow/stream_executor", "//tensorflow/stream_executor/lib", @@ -274,10 +268,8 @@ cc_library( hdrs = ["tpu_platform_interface.h"], visibility = ["//visibility:public"], deps = [ - "//tensorflow/core/platform:mutex", - "//tensorflow/core/platform:types", - "//tensorflow/stream_executor:multi_platform_manager", - "//tensorflow/stream_executor:stream_executor_headers", + "//tensorflow/core:lib", + "//tensorflow/stream_executor", ], ) @@ -334,7 +326,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":c_api_decl", - "//tensorflow/core/platform:types", + "//tensorflow/core:lib", "//tensorflow/core/tpu:tpu_api", ], ) diff --git a/tensorflow/stream_executor/tpu/c_api_conversions.cc b/tensorflow/stream_executor/tpu/c_api_conversions.cc index 3c2180e2819..ddbd9ec2219 100644 --- a/tensorflow/stream_executor/tpu/c_api_conversions.cc +++ b/tensorflow/stream_executor/tpu/c_api_conversions.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" namespace ApiConverter { + xla::ShapedBuffer FromC(XLA_ShapedBuffer* c_buffer) { xla::Shape xla_on_host_shape = ApiConverter::FromC(&c_buffer->on_host_shape); xla::Shape xla_on_device_shape = @@ -114,6 +115,7 @@ SE_DeviceMemoryAllocator ToC( }; return se_allocator; } + SE_MaybeOwningDeviceMemory ToC(stream_executor::OwningDeviceMemory* mem) { SE_MaybeOwningDeviceMemory se_mem; se_mem.device_ordinal = mem->device_ordinal(); diff --git a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h index 8bee19f16ed..46d908ac18a 100644 --- a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h +++ b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h @@ -349,7 +349,6 @@ struct TfTpu_ExecutorApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_New); TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_Free); - TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_RunHloPasses); TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_RunBackend); TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_Compile); From 07e4db17ff666ffb3c9f9b2439485efbfa1e0cc0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 15:42:20 -0700 Subject: [PATCH 1415/2522] Fuse tensorflow_text.ngrams into a TFLite custom op PiperOrigin-RevId: 323456482 Change-Id: Idfd446c371e8a4a4f82b6da730d02b0897d35a8a --- tensorflow/compiler/mlir/lite/BUILD | 1 + .../compiler/mlir/lite/tests/fuse-tftext.mlir | 6631 +++++++++-------- .../compiler/mlir/lite/utils/tftext_utils.cc | 194 +- 3 files changed, 3602 insertions(+), 3224 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD index 103009cddb3..e6c4964b5b1 100644 --- a/tensorflow/compiler/mlir/lite/BUILD +++ b/tensorflow/compiler/mlir/lite/BUILD @@ -270,6 +270,7 @@ cc_library( "//tensorflow/compiler/mlir/tensorflow", "//tensorflow/compiler/mlir/tensorflow:tensorflow_attributes", "//tensorflow/core:framework", + "@flatbuffers", "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", "@llvm-project//mlir:StandardOps", diff --git a/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir b/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir index 97c0c7358ca..f6f32e7a069 100644 --- a/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir +++ b/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir @@ -1,3197 +1,3438 @@ -// RUN: tf-opt -tfl-prepare-composite-funcs-tf -tfl-fuse-tftext=true %s -split-input-file | FileCheck %s -module { +// RUN: tf-opt -tfl-prepare-composite-funcs-tf -tfl-fuse-tftext=true %s | FileCheck %s - func @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<1>], tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<[0, 1]> : tensor<2xi64>} : () -> tensor<2xi64> - %1 = "tf.Const"() {value = dense<[]> : tensor<0xi64>} : () -> tensor<0xi64> - %2 = "tf.Const"() {value = dense : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<-1> : tensor} : () -> tensor - %4 = "tf.Const"() {value = dense<[[0], [1]]> : tensor<2x1xi64>} : () -> tensor<2x1xi64> - %5 = "tf.Const"() {value = dense<-1> : tensor<1xi32>} : () -> tensor<1xi32> - %6 = "tf.Const"() {value = dense<2> : tensor<1xi32>} : () -> tensor<1xi32> - %7 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %8 = "tf.Const"() {value = dense<2> : tensor} : () -> tensor - %9 = "tf.Const"() {value = dense<[]> : tensor<0xi32>} : () -> tensor<0xi32> - %10 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %11 = "tf.Const"() {value = dense<0> : tensor<1xi64>} : () -> tensor<1xi64> - %12 = "tf.Const"() {value = dense<1> : tensor<1xi64>} : () -> tensor<1xi64> - %13 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %14 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %15 = "tf.Const"() {value = dense<0> : tensor<1xi32>} : () -> tensor<1xi32> - %16 = "tf.Const"() {value = dense<1> : tensor<1xi32>} : () -> tensor<1xi32> - %17 = "tf.If"(%2, %2, %13, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_false_3210, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_true_3200} : (tensor, tensor, tensor, tensor) -> tensor - %18 = "tf.Identity"(%17) {device = ""} : (tensor) -> tensor - %19 = "tf.StringLength"(%arg0) {device = "", unit = "BYTE"} : (tensor<1x!tf.string>) -> tensor<1xi32> - %20 = "tf.ExpandDims"(%19, %7) {device = ""} : (tensor<1xi32>, tensor) -> tensor<1x1xi32> - %21 = "tf.Cast"(%20) {Truncate = false, device = ""} : (tensor<1x1xi32>) -> tensor<1x1xi64> - %22 = "tf.Reshape"(%21, %12) {device = ""} : (tensor<1x1xi64>, tensor<1xi64>) -> tensor<1xi64> - %23 = "tf.Reshape"(%arg0, %5) {device = ""} : (tensor<1x!tf.string>, tensor<1xi32>) -> tensor<1x!tf.string> - %24:3 = "tf.UnicodeDecodeWithOffsets"(%23) {Tsplits = i64, device = "", errors = "replace", input_encoding = "UTF-8", replace_control_characters = false, replacement_char = 65533 : i64} : (tensor<1x!tf.string>) -> (tensor<2xi64>, tensor, tensor) - %25 = "tf.StridedSlice"(%24#0, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %26 = "tf.AddV2"(%25, %13) {device = ""} : (tensor<1xi64>, tensor) -> tensor<1xi64> - %27 = "tf.StridedSlice"(%24#0, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %28 = "tf.Minimum"(%26, %27) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor<1xi64> - %29:2 = "tf.RaggedRange"(%28, %27, %13) {T = i64, Tsplits = i64, device = ""} : (tensor<1xi64>, tensor<1xi64>, tensor) -> (tensor<2xi64>, tensor) - %30 = "tf.StridedSlice"(%29#0, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %31 = "tf.AddV2"(%30, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> - %32 = "tf.ConcatV2"(%29#0, %31, %14) {device = ""} : (tensor<2xi64>, tensor<1xi64>, tensor) -> tensor<3xi64> - %33 = "tf.GatherV2"(%24#2, %29#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %34 = "tf.ConcatV2"(%33, %22, %14) {device = ""} : (tensor, tensor<1xi64>, tensor) -> tensor - %35:2 = "tf.RaggedGather"(%32, %34, %0) {OUTPUT_RAGGED_RANK = 1 : i64, PARAMS_RAGGED_RANK = 1 : i64, Tindices = i64, Tsplits = i64, Tvalues = i64, device = ""} : (tensor<3xi64>, tensor, tensor<2xi64>) -> (tensor, tensor) - %36:5 = "tf.WhitespaceTokenizeWithOffsets"(%24#1, %24#0) {Tsplits = i64, device = ""} : (tensor, tensor<2xi64>) -> (tensor, tensor, tensor, tensor, tensor) - %37 = "tf.StridedSlice"(%36#1, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %38 = "tf.Equal"(%37, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %39 = "tf.All"(%38, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %40 = "tf.If"(%39, %39, %37, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_3970, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_3960} : (tensor, tensor, tensor, tensor) -> tensor - %41 = "tf.Identity"(%40) {device = ""} : (tensor) -> tensor - %42 = "tf.StridedSlice"(%36#1, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %43 = "tf.StridedSlice"(%36#1, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %44 = "tf.Sub"(%42, %43) {device = ""} : (tensor, tensor) -> tensor - %45 = "tf.LessEqual"(%10, %44) {device = ""} : (tensor, tensor) -> tensor - %46 = "tf.All"(%45, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %47 = "tf.If"(%46, %46, %44) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_4330, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_4320} : (tensor, tensor, tensor) -> tensor - %48 = "tf.Identity"(%47) {device = ""} : (tensor) -> tensor - %49 = "tf.Identity"(%36#1) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %50 = "tf.StridedSlice"(%49, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %51 = "tf.Shape"(%36#0) {device = ""} : (tensor) -> tensor<1xi64> - %52 = "tf.StridedSlice"(%51, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %53 = "tf.Equal"(%50, %52) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %54 = "tf.All"(%53, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %55 = "tf.If"(%54, %54, %50, %52) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_4670, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_4660} : (tensor, tensor, tensor, tensor) -> tensor - %56 = "tf.Identity"(%55) {device = ""} : (tensor) -> tensor - %57 = "tf.Identity"(%49) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %58 = "tf.Shape"(%57) {device = ""} : (tensor) -> tensor<1xi64> - %59 = "tf.StridedSlice"(%58, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %60 = "tf.Sub"(%59, %13) {device = ""} : (tensor, tensor) -> tensor - %61 = "tf.StridedSlice"(%36#4, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %62 = "tf.Equal"(%61, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %63 = "tf.All"(%62, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %64 = "tf.If"(%63, %63, %61, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_5040, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_5030} : (tensor, tensor, tensor, tensor) -> tensor - %65 = "tf.Identity"(%64) {device = ""} : (tensor) -> tensor - %66 = "tf.StridedSlice"(%36#4, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %67 = "tf.StridedSlice"(%36#4, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %68 = "tf.Sub"(%66, %67) {device = ""} : (tensor, tensor) -> tensor - %69 = "tf.LessEqual"(%10, %68) {device = ""} : (tensor, tensor) -> tensor - %70 = "tf.All"(%69, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %71 = "tf.If"(%70, %70, %68) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_5400, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_5390} : (tensor, tensor, tensor) -> tensor - %72 = "tf.Identity"(%71) {device = ""} : (tensor) -> tensor - %73 = "tf.Identity"(%36#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %74 = "tf.StridedSlice"(%73, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %75 = "tf.Equal"(%74, %60) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %76 = "tf.All"(%75, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %77 = "tf.If"(%76, %76, %74, %60) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_5760, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_5750} : (tensor, tensor, tensor, tensor) -> tensor - %78 = "tf.Identity"(%77) {device = ""} : (tensor) -> tensor - %79 = "tf.Identity"(%73) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %80 = "tf.StridedSlice"(%36#4, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %81 = "tf.Equal"(%80, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %82 = "tf.All"(%81, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %83 = "tf.If"(%82, %82, %80, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6110, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6100} : (tensor, tensor, tensor, tensor) -> tensor - %84 = "tf.Identity"(%83) {device = ""} : (tensor) -> tensor - %85 = "tf.StridedSlice"(%36#4, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %86 = "tf.StridedSlice"(%36#4, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %87 = "tf.Sub"(%85, %86) {device = ""} : (tensor, tensor) -> tensor - %88 = "tf.LessEqual"(%10, %87) {device = ""} : (tensor, tensor) -> tensor - %89 = "tf.All"(%88, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %90 = "tf.If"(%89, %89, %87) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_6470, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_6460} : (tensor, tensor, tensor) -> tensor - %91 = "tf.Identity"(%90) {device = ""} : (tensor) -> tensor - %92 = "tf.Identity"(%36#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %93 = "tf.StridedSlice"(%92, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %94 = "tf.Shape"(%36#2) {device = ""} : (tensor) -> tensor<1xi64> - %95 = "tf.StridedSlice"(%94, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %96 = "tf.Equal"(%93, %95) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %97 = "tf.All"(%96, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %98 = "tf.If"(%97, %97, %93, %95) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6810, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6800} : (tensor, tensor, tensor, tensor) -> tensor - %99 = "tf.Identity"(%98) {device = ""} : (tensor) -> tensor - %100 = "tf.Identity"(%92) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %101 = "tf.Shape"(%100) {device = ""} : (tensor) -> tensor<1xi64> - %102 = "tf.StridedSlice"(%101, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %103 = "tf.Sub"(%102, %13) {device = ""} : (tensor, tensor) -> tensor - %104 = "tf.Equal"(%103, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %105 = "tf.LogicalOr"(%104, %2) {device = ""} : (tensor, tensor) -> tensor - %106 = "tf.Equal"(%103, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %107 = "tf.LogicalOr"(%105, %106) {device = ""} : (tensor, tensor) -> tensor - %108 = "tf.StridedSlice"(%100, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %109 = "tf.StridedSlice"(%100, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %110 = "tf.Sub"(%108, %109) {device = ""} : (tensor, tensor) -> tensor - %111 = "tf.Shape"(%100) {device = ""} : (tensor) -> tensor<1xi64> - %112 = "tf.StridedSlice"(%111, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %113 = "tf.Sub"(%112, %13) {device = ""} : (tensor, tensor) -> tensor - %114 = "tf.Equal"(%113, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %115 = "tf.ExpandDims"(%100, %7) {device = ""} : (tensor, tensor) -> tensor - %116 = "tf.Shape"(%100) {device = ""} : (tensor) -> tensor<1xi32> - %117 = "tf.StridedSlice"(%116, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %118 = "tf.StridedSlice"(%116, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %119 = "tf.StridedSlice"(%116, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %120 = "tf.StridedSlice"(%36#4, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %121 = "tf.Equal"(%120, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %122 = "tf.All"(%121, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %123 = "tf.If"(%122, %122, %120, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7180, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7170} : (tensor, tensor, tensor, tensor) -> tensor - %124 = "tf.Identity"(%123) {device = ""} : (tensor) -> tensor - %125 = "tf.StridedSlice"(%36#4, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %126 = "tf.StridedSlice"(%36#4, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %127 = "tf.Sub"(%125, %126) {device = ""} : (tensor, tensor) -> tensor - %128 = "tf.LessEqual"(%10, %127) {device = ""} : (tensor, tensor) -> tensor - %129 = "tf.All"(%128, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %130 = "tf.If"(%129, %129, %127) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_7540, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_7530} : (tensor, tensor, tensor) -> tensor - %131 = "tf.Identity"(%130) {device = ""} : (tensor) -> tensor - %132 = "tf.Identity"(%36#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %133 = "tf.StridedSlice"(%132, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %134 = "tf.Shape"(%36#3) {device = ""} : (tensor) -> tensor<1xi64> - %135 = "tf.StridedSlice"(%134, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %136 = "tf.Equal"(%133, %135) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %137 = "tf.All"(%136, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %138 = "tf.If"(%137, %137, %133, %135) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7880, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7870} : (tensor, tensor, tensor, tensor) -> tensor - %139 = "tf.Identity"(%138) {device = ""} : (tensor) -> tensor - %140 = "tf.Identity"(%132) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %141 = "tf.Shape"(%140) {device = ""} : (tensor) -> tensor<1xi64> - %142 = "tf.StridedSlice"(%141, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %143 = "tf.Sub"(%142, %13) {device = ""} : (tensor, tensor) -> tensor - %144 = "tf.Equal"(%143, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %145 = "tf.LogicalOr"(%144, %2) {device = ""} : (tensor, tensor) -> tensor - %146 = "tf.Equal"(%143, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %147 = "tf.LogicalOr"(%145, %146) {device = ""} : (tensor, tensor) -> tensor - %148 = "tf.StridedSlice"(%140, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %149 = "tf.StridedSlice"(%140, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %150 = "tf.Sub"(%148, %149) {device = ""} : (tensor, tensor) -> tensor - %151 = "tf.Shape"(%140) {device = ""} : (tensor) -> tensor<1xi64> - %152 = "tf.StridedSlice"(%151, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %153 = "tf.Sub"(%152, %13) {device = ""} : (tensor, tensor) -> tensor - %154 = "tf.Equal"(%153, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %155 = "tf.ExpandDims"(%140, %7) {device = ""} : (tensor, tensor) -> tensor - %156 = "tf.Shape"(%140) {device = ""} : (tensor) -> tensor<1xi32> - %157 = "tf.StridedSlice"(%156, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %158 = "tf.StridedSlice"(%156, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %159 = "tf.StridedSlice"(%156, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %160 = "tf.StridedSlice"(%140, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %161 = "tf.Range"(%10, %160, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %162 = "tf.StridedSlice"(%140, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %163 = "tf.StridedSlice"(%140, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %164 = "tf.Sub"(%162, %163) {device = ""} : (tensor, tensor) -> tensor - %165 = "tf.If"(%107, %107, %13, %103) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_Assert_AssertGuard_false_8680, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_Assert_AssertGuard_true_8670} : (tensor, tensor, tensor, tensor) -> tensor - %166 = "tf.Identity"(%165) {device = ""} : (tensor) -> tensor - %167 = "tf.Equal"(%103, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %168 = "tf.Select"(%167, %13, %103) {device = ""} : (tensor, tensor, tensor) -> tensor - %169 = "tf.Equal"(%168, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %170 = "tf.LogicalOr"(%169, %2) {device = ""} : (tensor, tensor) -> tensor - %171 = "tf.Equal"(%168, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %172 = "tf.LogicalOr"(%170, %171) {device = ""} : (tensor, tensor) -> tensor - %173 = "tf.Select"(%114, %168, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %174 = "tf.Pack"(%173, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %175 = "tf.StridedSlice"(%174, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %176 = "tf.Cast"(%175) {Truncate = false, device = ""} : (tensor) -> tensor - %177 = "tf.Reshape"(%176, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %178 = "tf.Pack"(%7, %177) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %179 = "tf.Tile"(%115, %178) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %180 = "tf.Mul"(%177, %118) {device = ""} : (tensor, tensor) -> tensor - %181 = "tf.Pack"(%180) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %182 = "tf.ConcatV2"(%117, %181, %119, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %183 = "tf.Reshape"(%179, %182) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %184 = "tf.Shape"(%183) {device = ""} : (tensor) -> tensor<1xi64> - %185 = "tf.StridedSlice"(%184, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %186 = "tf.Pack"(%175) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %187 = "tf.StridedSlice"(%183, %186, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %188 = "tf.Sub"(%185, %175) {device = ""} : (tensor, tensor) -> tensor - %189 = "tf.Pack"(%188) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %190 = "tf.StridedSlice"(%183, %11, %189, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %191:2 = "tf.RaggedRange"(%190, %187, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %192 = "tf.Select"(%2, %168, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %193 = "tf.Pack"(%192, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %194 = "tf.StridedSlice"(%193, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %195 = "tf.Cast"(%194) {Truncate = false, device = ""} : (tensor) -> tensor - %196 = "tf.Reshape"(%195, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %197 = "tf.Pack"(%7, %196) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %198 = "tf.Tile"(%4, %197) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> - %199 = "tf.Mul"(%196, %8) {device = ""} : (tensor, tensor) -> tensor - %200 = "tf.Pack"(%199) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %201 = "tf.ConcatV2"(%9, %200, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %202 = "tf.Reshape"(%198, %201) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor - %203 = "tf.Shape"(%202) {device = ""} : (tensor) -> tensor<1xi64> - %204 = "tf.StridedSlice"(%203, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %205 = "tf.Pack"(%194) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %206 = "tf.StridedSlice"(%202, %205, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %207 = "tf.Sub"(%204, %194) {device = ""} : (tensor, tensor) -> tensor - %208 = "tf.Pack"(%207) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %209 = "tf.StridedSlice"(%202, %11, %208, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %210:2 = "tf.RaggedRange"(%209, %206, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %211 = "tf.StridedSlice"(%193, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %212 = "tf.StridedSlice"(%193, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %213 = "tf.Mul"(%212, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> - %214 = "tf.Tile"(%213, %211) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor - %215 = "tf.Cumsum"(%214, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor - %216 = "tf.ConcatV2"(%11, %215, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor - %217 = "tf.StridedSlice"(%216, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %218 = "tf.ExpandDims"(%217, %7) {device = ""} : (tensor, tensor) -> tensor - %219 = "tf.Shape"(%217) {device = ""} : (tensor) -> tensor<1xi32> - %220 = "tf.StridedSlice"(%219, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %221 = "tf.Pack"(%220) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %222 = "tf.StridedSlice"(%216, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %223 = "tf.ExpandDims"(%222, %7) {device = ""} : (tensor, tensor) -> tensor - %224 = "tf.Shape"(%222) {device = ""} : (tensor) -> tensor<1xi32> - %225 = "tf.StridedSlice"(%224, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %226 = "tf.Pack"(%225) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %227 = "tf.Equal"(%103, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %228 = "tf.Select"(%227, %168, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %229 = "tf.Cast"(%228) {Truncate = false, device = ""} : (tensor) -> tensor - %230 = "tf.Reshape"(%229, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %231 = "tf.Pack"(%7, %230) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %232 = "tf.Mul"(%230, %8) {device = ""} : (tensor, tensor) -> tensor - %233 = "tf.Pack"(%232) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %234 = "tf.ConcatV2"(%9, %233, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %235 = "tf.Pack"(%228) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %236 = "tf.Pack"(%10, %103) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %237 = "tf.ExpandDims"(%236, %7) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> - %238 = "tf.Tile"(%237, %231) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> - %239 = "tf.Reshape"(%238, %234) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor - %240 = "tf.Shape"(%239) {device = ""} : (tensor) -> tensor<1xi64> - %241 = "tf.StridedSlice"(%240, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %242 = "tf.Sub"(%241, %228) {device = ""} : (tensor, tensor) -> tensor - %243 = "tf.Pack"(%242) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %244 = "tf.StridedSlice"(%239, %11, %243, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %245 = "tf.StridedSlice"(%239, %235, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %246:2 = "tf.RaggedRange"(%244, %245, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %247 = "tf.GatherV2"(%110, %246#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %248 = "tf.Cast"(%247) {Truncate = false, device = ""} : (tensor) -> tensor - %249 = "tf.BroadcastTo"(%248, %221) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %250 = "tf.Max"(%249, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %251 = "tf.Maximum"(%14, %250) {device = ""} : (tensor, tensor) -> tensor - %252 = "tf.Range"(%14, %251, %7) {device = ""} : (tensor, tensor, tensor) -> tensor - %253 = "tf.Pack"(%7, %251) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %254 = "tf.Tile"(%218, %253) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %255 = "tf.Shape"(%254) {device = ""} : (tensor) -> tensor<2xi32> - %256 = "tf.StridedSlice"(%255, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> - %257 = "tf.Prod"(%256, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor - %258 = "tf.Pack"(%257) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %259 = "tf.Shape"(%254) {device = ""} : (tensor) -> tensor<2xi32> - %260 = "tf.StridedSlice"(%259, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %261 = "tf.Shape"(%254) {device = ""} : (tensor) -> tensor<2xi32> - %262 = "tf.StridedSlice"(%261, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %263 = "tf.ConcatV2"(%260, %258, %262, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %264 = "tf.Reshape"(%254, %263) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %265 = "tf.ExpandDims"(%249, %3) {device = ""} : (tensor, tensor) -> tensor - %266 = "tf.Less"(%252, %265) {device = ""} : (tensor, tensor) -> tensor - %267 = "tf.Reshape"(%266, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %268 = "tf.Where"(%267) {device = ""} : (tensor) -> tensor - %269 = "tf.Squeeze"(%268) {device = "", squeeze_dims = [1]} : (tensor) -> tensor - %270 = "tf.GatherV2"(%264, %269, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %271 = "tf.Cast"(%247) {Truncate = false, device = ""} : (tensor) -> tensor - %272 = "tf.BroadcastTo"(%271, %226) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %273 = "tf.Max"(%272, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %274 = "tf.Maximum"(%14, %273) {device = ""} : (tensor, tensor) -> tensor - %275 = "tf.Range"(%14, %274, %7) {device = ""} : (tensor, tensor, tensor) -> tensor - %276 = "tf.Pack"(%7, %274) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %277 = "tf.Tile"(%223, %276) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %278 = "tf.Shape"(%277) {device = ""} : (tensor) -> tensor<2xi32> - %279 = "tf.StridedSlice"(%278, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> - %280 = "tf.Prod"(%279, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor - %281 = "tf.Pack"(%280) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %282 = "tf.Shape"(%277) {device = ""} : (tensor) -> tensor<2xi32> - %283 = "tf.StridedSlice"(%282, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %284 = "tf.Shape"(%277) {device = ""} : (tensor) -> tensor<2xi32> - %285 = "tf.StridedSlice"(%284, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %286 = "tf.ConcatV2"(%283, %281, %285, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %287 = "tf.Reshape"(%277, %286) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %288 = "tf.ExpandDims"(%272, %3) {device = ""} : (tensor, tensor) -> tensor - %289 = "tf.Less"(%275, %288) {device = ""} : (tensor, tensor) -> tensor - %290 = "tf.Reshape"(%289, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %291 = "tf.Where"(%290) {device = ""} : (tensor) -> tensor - %292 = "tf.Squeeze"(%291) {device = "", squeeze_dims = [1]} : (tensor) -> tensor - %293 = "tf.GatherV2"(%287, %292, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %294:2 = "tf.RaggedRange"(%270, %293, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %295 = "tf.If"(%172, %172, %168, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_false_9750, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_true_9740} : (tensor, tensor, tensor, tensor) -> tensor - %296 = "tf.Identity"(%295) {device = ""} : (tensor) -> tensor - %297 = "tf.Select"(%2, %168, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %298 = "tf.Pack"(%297) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %299 = "tf.ConcatV2"(%1, %298, %12, %14) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> - %300 = "tf.StridedSlice"(%299, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %301 = "tf.Equal"(%300, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %302 = "tf.StridedSlice"(%299, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %303 = "tf.StridedSlice"(%299, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %304 = "tf.Equal"(%303, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %305 = "tf.If"(%304, %304, %303, %247) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_false_10240, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_true_10230} : (tensor, tensor, tensor, tensor) -> tensor - %306 = "tf.Identity"(%305) {device = ""} : (tensor) -> tensor - %307 = "tf.If"(%301, %301, %247, %302) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_false_10600, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_true_10590} : (tensor, tensor, tensor, tensor) -> tensor - %308 = "tf.If"(%147, %147, %13, %143) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_Assert_AssertGuard_false_15300, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_Assert_AssertGuard_true_15290} : (tensor, tensor, tensor, tensor) -> tensor - %309 = "tf.Identity"(%308) {device = ""} : (tensor) -> tensor - %310 = "tf.Equal"(%143, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %311 = "tf.Select"(%310, %13, %143) {device = ""} : (tensor, tensor, tensor) -> tensor - %312 = "tf.Equal"(%311, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %313 = "tf.LogicalOr"(%312, %2) {device = ""} : (tensor, tensor) -> tensor - %314 = "tf.Equal"(%311, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %315 = "tf.LogicalOr"(%313, %314) {device = ""} : (tensor, tensor) -> tensor - %316 = "tf.Select"(%154, %311, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %317 = "tf.Pack"(%316, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %318 = "tf.StridedSlice"(%317, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %319 = "tf.Cast"(%318) {Truncate = false, device = ""} : (tensor) -> tensor - %320 = "tf.Reshape"(%319, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %321 = "tf.Pack"(%7, %320) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %322 = "tf.Tile"(%155, %321) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %323 = "tf.Mul"(%320, %158) {device = ""} : (tensor, tensor) -> tensor - %324 = "tf.Pack"(%323) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %325 = "tf.ConcatV2"(%157, %324, %159, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %326 = "tf.Reshape"(%322, %325) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %327 = "tf.Shape"(%326) {device = ""} : (tensor) -> tensor<1xi64> - %328 = "tf.StridedSlice"(%327, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %329 = "tf.Pack"(%318) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %330 = "tf.StridedSlice"(%326, %329, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %331 = "tf.Sub"(%328, %318) {device = ""} : (tensor, tensor) -> tensor - %332 = "tf.Pack"(%331) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %333 = "tf.StridedSlice"(%326, %11, %332, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %334:2 = "tf.RaggedRange"(%333, %330, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %335 = "tf.GatherV2"(%161, %334#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %336 = "tf.StridedSlice"(%317, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %337 = "tf.StridedSlice"(%317, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %338 = "tf.StridedSlice"(%317, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> - %339 = "tf.ConcatV2"(%337, %338, %14) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> - %340 = "tf.StridedSlice"(%317, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %341 = "tf.Mul"(%164, %340) {device = ""} : (tensor, tensor) -> tensor - %342 = "tf.Tile"(%341, %336) {device = ""} : (tensor, tensor<1xi64>) -> tensor - %343 = "tf.Cumsum"(%342, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor - %344 = "tf.ConcatV2"(%11, %343, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor - %345 = "tf.Shape"(%344) {device = ""} : (tensor) -> tensor<1xi64> - %346 = "tf.StridedSlice"(%345, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %347 = "tf.Sub"(%346, %13) {device = ""} : (tensor, tensor) -> tensor - %348 = "tf.Equal"(%347, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %349 = "tf.LogicalOr"(%348, %2) {device = ""} : (tensor, tensor) -> tensor - %350 = "tf.Equal"(%347, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %351 = "tf.LogicalOr"(%349, %350) {device = ""} : (tensor, tensor) -> tensor - %352 = "tf.StridedSlice"(%344, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %353 = "tf.StridedSlice"(%344, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %354 = "tf.Sub"(%352, %353) {device = ""} : (tensor, tensor) -> tensor - %355 = "tf.Shape"(%344) {device = ""} : (tensor) -> tensor<1xi64> - %356 = "tf.StridedSlice"(%355, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %357 = "tf.Sub"(%356, %13) {device = ""} : (tensor, tensor) -> tensor - %358 = "tf.Equal"(%357, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %359 = "tf.ExpandDims"(%344, %7) {device = ""} : (tensor, tensor) -> tensor - %360 = "tf.Shape"(%344) {device = ""} : (tensor) -> tensor<1xi32> - %361 = "tf.StridedSlice"(%360, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %362 = "tf.StridedSlice"(%360, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %363 = "tf.StridedSlice"(%360, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %364 = "tf.Select"(%2, %311, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %365 = "tf.Pack"(%364, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %366 = "tf.StridedSlice"(%365, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %367 = "tf.Cast"(%366) {Truncate = false, device = ""} : (tensor) -> tensor - %368 = "tf.Reshape"(%367, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %369 = "tf.Pack"(%7, %368) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %370 = "tf.Tile"(%4, %369) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> - %371 = "tf.Mul"(%368, %8) {device = ""} : (tensor, tensor) -> tensor - %372 = "tf.Pack"(%371) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %373 = "tf.ConcatV2"(%9, %372, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %374 = "tf.Reshape"(%370, %373) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor - %375 = "tf.Shape"(%374) {device = ""} : (tensor) -> tensor<1xi64> - %376 = "tf.StridedSlice"(%375, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %377 = "tf.Pack"(%366) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %378 = "tf.StridedSlice"(%374, %377, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %379 = "tf.Sub"(%376, %366) {device = ""} : (tensor, tensor) -> tensor - %380 = "tf.Pack"(%379) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %381 = "tf.StridedSlice"(%374, %11, %380, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %382:2 = "tf.RaggedRange"(%381, %378, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %383 = "tf.GatherV2"(%11, %382#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor - %384 = "tf.GatherV2"(%12, %383, %14) {batch_dims = 0 : i64, device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor - %385 = "tf.StridedSlice"(%365, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %386 = "tf.StridedSlice"(%365, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %387 = "tf.StridedSlice"(%365, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> - %388 = "tf.ConcatV2"(%386, %387, %14) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> - %389 = "tf.Tile"(%384, %388) {device = ""} : (tensor, tensor<1xi64>) -> tensor - %390 = "tf.StridedSlice"(%365, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %391 = "tf.Mul"(%390, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> - %392 = "tf.Tile"(%391, %385) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor - %393 = "tf.Cumsum"(%392, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor - %394 = "tf.ConcatV2"(%11, %393, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor - %395 = "tf.StridedSlice"(%394, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %396 = "tf.ExpandDims"(%395, %7) {device = ""} : (tensor, tensor) -> tensor - %397 = "tf.Shape"(%395) {device = ""} : (tensor) -> tensor<1xi32> - %398 = "tf.StridedSlice"(%397, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %399 = "tf.Pack"(%398) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %400 = "tf.StridedSlice"(%394, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %401 = "tf.ExpandDims"(%400, %7) {device = ""} : (tensor, tensor) -> tensor - %402 = "tf.Shape"(%400) {device = ""} : (tensor) -> tensor<1xi32> - %403 = "tf.StridedSlice"(%402, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %404 = "tf.Pack"(%403) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %405 = "tf.Equal"(%143, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %406 = "tf.Select"(%405, %311, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %407 = "tf.Cast"(%406) {Truncate = false, device = ""} : (tensor) -> tensor - %408 = "tf.Reshape"(%407, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %409 = "tf.Pack"(%7, %408) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %410 = "tf.Mul"(%408, %8) {device = ""} : (tensor, tensor) -> tensor - %411 = "tf.Pack"(%410) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %412 = "tf.ConcatV2"(%9, %411, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %413 = "tf.Pack"(%406) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %414 = "tf.Pack"(%10, %143) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %415 = "tf.ExpandDims"(%414, %7) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> - %416 = "tf.Tile"(%415, %409) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> - %417 = "tf.Reshape"(%416, %412) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor - %418 = "tf.Shape"(%417) {device = ""} : (tensor) -> tensor<1xi64> - %419 = "tf.StridedSlice"(%418, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %420 = "tf.Sub"(%419, %406) {device = ""} : (tensor, tensor) -> tensor - %421 = "tf.Pack"(%420) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %422 = "tf.StridedSlice"(%417, %11, %421, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %423 = "tf.StridedSlice"(%417, %413, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %424:2 = "tf.RaggedRange"(%422, %423, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %425 = "tf.GatherV2"(%150, %424#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %426 = "tf.Cast"(%425) {Truncate = false, device = ""} : (tensor) -> tensor - %427 = "tf.BroadcastTo"(%426, %399) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %428 = "tf.Max"(%427, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %429 = "tf.Maximum"(%14, %428) {device = ""} : (tensor, tensor) -> tensor - %430 = "tf.Range"(%14, %429, %7) {device = ""} : (tensor, tensor, tensor) -> tensor - %431 = "tf.Pack"(%7, %429) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %432 = "tf.Tile"(%396, %431) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %433 = "tf.Shape"(%432) {device = ""} : (tensor) -> tensor<2xi32> - %434 = "tf.StridedSlice"(%433, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> - %435 = "tf.Prod"(%434, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor - %436 = "tf.Pack"(%435) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %437 = "tf.Shape"(%432) {device = ""} : (tensor) -> tensor<2xi32> - %438 = "tf.StridedSlice"(%437, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %439 = "tf.Shape"(%432) {device = ""} : (tensor) -> tensor<2xi32> - %440 = "tf.StridedSlice"(%439, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %441 = "tf.ConcatV2"(%438, %436, %440, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %442 = "tf.Reshape"(%432, %441) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %443 = "tf.ExpandDims"(%427, %3) {device = ""} : (tensor, tensor) -> tensor - %444 = "tf.Less"(%430, %443) {device = ""} : (tensor, tensor) -> tensor - %445 = "tf.Reshape"(%444, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %446 = "tf.Where"(%445) {device = ""} : (tensor) -> tensor - %447 = "tf.Squeeze"(%446) {device = "", squeeze_dims = [1]} : (tensor) -> tensor - %448 = "tf.GatherV2"(%442, %447, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %449 = "tf.Cast"(%425) {Truncate = false, device = ""} : (tensor) -> tensor - %450 = "tf.BroadcastTo"(%449, %404) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %451 = "tf.Max"(%450, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %452 = "tf.Maximum"(%14, %451) {device = ""} : (tensor, tensor) -> tensor - %453 = "tf.Range"(%14, %452, %7) {device = ""} : (tensor, tensor, tensor) -> tensor - %454 = "tf.Pack"(%7, %452) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %455 = "tf.Tile"(%401, %454) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %456 = "tf.Shape"(%455) {device = ""} : (tensor) -> tensor<2xi32> - %457 = "tf.StridedSlice"(%456, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> - %458 = "tf.Prod"(%457, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor - %459 = "tf.Pack"(%458) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %460 = "tf.Shape"(%455) {device = ""} : (tensor) -> tensor<2xi32> - %461 = "tf.StridedSlice"(%460, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %462 = "tf.Shape"(%455) {device = ""} : (tensor) -> tensor<2xi32> - %463 = "tf.StridedSlice"(%462, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %464 = "tf.ConcatV2"(%461, %459, %463, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %465 = "tf.Reshape"(%455, %464) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %466 = "tf.ExpandDims"(%450, %3) {device = ""} : (tensor, tensor) -> tensor - %467 = "tf.Less"(%453, %466) {device = ""} : (tensor, tensor) -> tensor - %468 = "tf.Reshape"(%467, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %469 = "tf.Where"(%468) {device = ""} : (tensor) -> tensor - %470 = "tf.Squeeze"(%469) {device = "", squeeze_dims = [1]} : (tensor) -> tensor - %471 = "tf.GatherV2"(%465, %470, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %472:2 = "tf.RaggedRange"(%448, %471, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %473 = "tf.GatherV2"(%389, %472#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %474 = "tf.If"(%315, %315, %311, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_Assert_1_AssertGuard_false_16370, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_Assert_1_AssertGuard_true_16360} : (tensor, tensor, tensor, tensor) -> tensor - %475 = "tf.Identity"(%474) {device = ""} : (tensor) -> tensor - %476 = "tf.Select"(%2, %311, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %477 = "tf.Pack"(%476) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %478 = "tf.ConcatV2"(%1, %477, %12, %14) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> - %479 = "tf.StridedSlice"(%478, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %480 = "tf.Equal"(%479, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %481 = "tf.StridedSlice"(%478, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %482 = "tf.StridedSlice"(%478, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %483 = "tf.Equal"(%482, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %484 = "tf.If"(%483, %483, %482, %425) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_Assert_2_AssertGuard_false_16860, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_Assert_2_AssertGuard_true_16850} : (tensor, tensor, tensor, tensor) -> tensor - %485 = "tf.Identity"(%484) {device = ""} : (tensor) -> tensor - %486 = "tf.If"(%480, %480, %425, %481) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_Assert_3_AssertGuard_false_17220, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_Assert_3_AssertGuard_true_17210} : (tensor, tensor, tensor, tensor) -> tensor - %487 = "tf.Identity"(%486) {device = ""} : (tensor) -> tensor - %488 = "tf.If"(%351, %351, %13, %347) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_false_21900, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_true_21890} : (tensor, tensor, tensor, tensor) -> tensor - %489 = "tf.Identity"(%488) {device = ""} : (tensor) -> tensor - %490 = "tf.Equal"(%347, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %491 = "tf.Select"(%490, %13, %347) {device = ""} : (tensor, tensor, tensor) -> tensor - %492 = "tf.Equal"(%491, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %493 = "tf.LogicalOr"(%492, %2) {device = ""} : (tensor, tensor) -> tensor - %494 = "tf.Equal"(%491, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %495 = "tf.LogicalOr"(%493, %494) {device = ""} : (tensor, tensor) -> tensor - %496 = "tf.Select"(%358, %491, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %497 = "tf.Pack"(%496, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %498 = "tf.StridedSlice"(%497, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %499 = "tf.Cast"(%498) {Truncate = false, device = ""} : (tensor) -> tensor - %500 = "tf.Reshape"(%499, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %501 = "tf.Pack"(%7, %500) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %502 = "tf.Tile"(%359, %501) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %503 = "tf.Mul"(%500, %362) {device = ""} : (tensor, tensor) -> tensor - %504 = "tf.Pack"(%503) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %505 = "tf.ConcatV2"(%361, %504, %363, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %506 = "tf.Reshape"(%502, %505) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %507 = "tf.Shape"(%506) {device = ""} : (tensor) -> tensor<1xi64> - %508 = "tf.StridedSlice"(%507, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %509 = "tf.Pack"(%498) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %510 = "tf.StridedSlice"(%506, %509, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %511 = "tf.Sub"(%508, %498) {device = ""} : (tensor, tensor) -> tensor - %512 = "tf.Pack"(%511) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %513 = "tf.StridedSlice"(%506, %11, %512, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %514:2 = "tf.RaggedRange"(%513, %510, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %515 = "tf.Select"(%2, %491, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %516 = "tf.Pack"(%515, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %517 = "tf.StridedSlice"(%516, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %518 = "tf.Cast"(%517) {Truncate = false, device = ""} : (tensor) -> tensor - %519 = "tf.Reshape"(%518, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %520 = "tf.Pack"(%7, %519) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %521 = "tf.Tile"(%4, %520) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> - %522 = "tf.Mul"(%519, %8) {device = ""} : (tensor, tensor) -> tensor - %523 = "tf.Pack"(%522) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %524 = "tf.ConcatV2"(%9, %523, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %525 = "tf.Reshape"(%521, %524) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor - %526 = "tf.Shape"(%525) {device = ""} : (tensor) -> tensor<1xi64> - %527 = "tf.StridedSlice"(%526, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %528 = "tf.Pack"(%517) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %529 = "tf.StridedSlice"(%525, %528, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %530 = "tf.Sub"(%527, %517) {device = ""} : (tensor, tensor) -> tensor - %531 = "tf.Pack"(%530) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %532 = "tf.StridedSlice"(%525, %11, %531, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %533:2 = "tf.RaggedRange"(%532, %529, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %534 = "tf.StridedSlice"(%516, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %535 = "tf.StridedSlice"(%516, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %536 = "tf.Mul"(%535, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> - %537 = "tf.Tile"(%536, %534) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor - %538 = "tf.Cumsum"(%537, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor - %539 = "tf.ConcatV2"(%11, %538, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor - %540 = "tf.StridedSlice"(%539, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %541 = "tf.ExpandDims"(%540, %7) {device = ""} : (tensor, tensor) -> tensor - %542 = "tf.Shape"(%540) {device = ""} : (tensor) -> tensor<1xi32> - %543 = "tf.StridedSlice"(%542, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %544 = "tf.Pack"(%543) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %545 = "tf.StridedSlice"(%539, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %546 = "tf.ExpandDims"(%545, %7) {device = ""} : (tensor, tensor) -> tensor - %547 = "tf.Shape"(%545) {device = ""} : (tensor) -> tensor<1xi32> - %548 = "tf.StridedSlice"(%547, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %549 = "tf.Pack"(%548) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %550 = "tf.Equal"(%347, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %551 = "tf.Select"(%550, %491, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %552 = "tf.Cast"(%551) {Truncate = false, device = ""} : (tensor) -> tensor - %553 = "tf.Reshape"(%552, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %554 = "tf.Pack"(%7, %553) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %555 = "tf.Mul"(%553, %8) {device = ""} : (tensor, tensor) -> tensor - %556 = "tf.Pack"(%555) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %557 = "tf.ConcatV2"(%9, %556, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %558 = "tf.Pack"(%551) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %559 = "tf.Pack"(%10, %347) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %560 = "tf.ExpandDims"(%559, %7) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> - %561 = "tf.Tile"(%560, %554) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> - %562 = "tf.Reshape"(%561, %557) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor - %563 = "tf.Shape"(%562) {device = ""} : (tensor) -> tensor<1xi64> - %564 = "tf.StridedSlice"(%563, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %565 = "tf.Sub"(%564, %551) {device = ""} : (tensor, tensor) -> tensor - %566 = "tf.Pack"(%565) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %567 = "tf.StridedSlice"(%562, %11, %566, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %568 = "tf.StridedSlice"(%562, %558, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %569:2 = "tf.RaggedRange"(%567, %568, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %570 = "tf.GatherV2"(%354, %569#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %571 = "tf.Cast"(%570) {Truncate = false, device = ""} : (tensor) -> tensor - %572 = "tf.BroadcastTo"(%571, %544) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %573 = "tf.Max"(%572, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %574 = "tf.Maximum"(%14, %573) {device = ""} : (tensor, tensor) -> tensor - %575 = "tf.Range"(%14, %574, %7) {device = ""} : (tensor, tensor, tensor) -> tensor - %576 = "tf.Pack"(%7, %574) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %577 = "tf.Tile"(%541, %576) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %578 = "tf.Shape"(%577) {device = ""} : (tensor) -> tensor<2xi32> - %579 = "tf.StridedSlice"(%578, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> - %580 = "tf.Prod"(%579, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor - %581 = "tf.Pack"(%580) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %582 = "tf.Shape"(%577) {device = ""} : (tensor) -> tensor<2xi32> - %583 = "tf.StridedSlice"(%582, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %584 = "tf.Shape"(%577) {device = ""} : (tensor) -> tensor<2xi32> - %585 = "tf.StridedSlice"(%584, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %586 = "tf.ConcatV2"(%583, %581, %585, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %587 = "tf.Reshape"(%577, %586) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %588 = "tf.ExpandDims"(%572, %3) {device = ""} : (tensor, tensor) -> tensor - %589 = "tf.Less"(%575, %588) {device = ""} : (tensor, tensor) -> tensor - %590 = "tf.Reshape"(%589, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %591 = "tf.Where"(%590) {device = ""} : (tensor) -> tensor - %592 = "tf.Squeeze"(%591) {device = "", squeeze_dims = [1]} : (tensor) -> tensor - %593 = "tf.GatherV2"(%587, %592, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %594 = "tf.Cast"(%570) {Truncate = false, device = ""} : (tensor) -> tensor - %595 = "tf.BroadcastTo"(%594, %549) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %596 = "tf.Max"(%595, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %597 = "tf.Maximum"(%14, %596) {device = ""} : (tensor, tensor) -> tensor - %598 = "tf.Range"(%14, %597, %7) {device = ""} : (tensor, tensor, tensor) -> tensor - %599 = "tf.Pack"(%7, %597) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %600 = "tf.Tile"(%546, %599) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %601 = "tf.Shape"(%600) {device = ""} : (tensor) -> tensor<2xi32> - %602 = "tf.StridedSlice"(%601, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> - %603 = "tf.Prod"(%602, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor - %604 = "tf.Pack"(%603) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %605 = "tf.Shape"(%600) {device = ""} : (tensor) -> tensor<2xi32> - %606 = "tf.StridedSlice"(%605, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %607 = "tf.Shape"(%600) {device = ""} : (tensor) -> tensor<2xi32> - %608 = "tf.StridedSlice"(%607, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %609 = "tf.ConcatV2"(%606, %604, %608, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %610 = "tf.Reshape"(%600, %609) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %611 = "tf.ExpandDims"(%595, %3) {device = ""} : (tensor, tensor) -> tensor - %612 = "tf.Less"(%598, %611) {device = ""} : (tensor, tensor) -> tensor - %613 = "tf.Reshape"(%612, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %614 = "tf.Where"(%613) {device = ""} : (tensor) -> tensor - %615 = "tf.Squeeze"(%614) {device = "", squeeze_dims = [1]} : (tensor) -> tensor - %616 = "tf.GatherV2"(%610, %615, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %617:2 = "tf.RaggedRange"(%593, %616, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %618 = "tf.If"(%495, %495, %491, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_false_22970, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_true_22960} : (tensor, tensor, tensor, tensor) -> tensor - %619 = "tf.Identity"(%618) {device = ""} : (tensor) -> tensor - %620 = "tf.Select"(%2, %491, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %621 = "tf.Pack"(%620) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %622 = "tf.ConcatV2"(%1, %621, %12, %14) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> - %623 = "tf.StridedSlice"(%622, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %624 = "tf.Equal"(%623, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %625 = "tf.StridedSlice"(%622, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %626 = "tf.StridedSlice"(%622, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %627 = "tf.Equal"(%626, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %628 = "tf.If"(%627, %627, %626, %570) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_false_23460, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_true_23450} : (tensor, tensor, tensor, tensor) -> tensor - %629 = "tf.Identity"(%628) {device = ""} : (tensor) -> tensor - %630 = "tf.If"(%624, %624, %570, %625) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_false_23820, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_true_23810} : (tensor, tensor, tensor, tensor) -> tensor - %631 = "tf.Identity"(%79) {device = ""} : (tensor) -> tensor - %632 = "tf.Identity"(%630) {device = ""} : (tensor) -> tensor - %633 = "tf.Identity"(%307) {device = ""} : (tensor) -> tensor - %634 = "tf.Shape"(%36#2) {device = ""} : (tensor) -> tensor<1xi32> - %635 = "tf.StridedSlice"(%634, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %636 = "tf.Cast"(%635) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> - %637 = "tf.Identity"(%636) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> - %638 = "tf.Shape"(%36#3) {device = ""} : (tensor) -> tensor<1xi32> - %639 = "tf.StridedSlice"(%638, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %640 = "tf.Cast"(%639) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> - %641 = "tf.Identity"(%640) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> - %642 = "tf.GatherV2"(%36#3, %335, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %643 = "tf.Tile"(%642, %339) {device = ""} : (tensor, tensor<1xi64>) -> tensor - %644 = "tf.Sub"(%643, %473) {device = ""} : (tensor, tensor) -> tensor - %645 = "tf.Shape"(%644) {device = ""} : (tensor) -> tensor<1xi32> - %646 = "tf.StridedSlice"(%645, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %647 = "tf.Cast"(%646) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> - %648 = "tf.Identity"(%647) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> - %649 = "tf.UnicodeEncode"(%36#0, %57) {Tsplits = i64, device = "", errors = "replace", output_encoding = "UTF-8", replacement_char = 65533 : i64} : (tensor, tensor) -> tensor - %650 = "tf.Identity"(%649) {device = ""} : (tensor) -> tensor - return %650, %631 : tensor, tensor - } - func @WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_false_3210(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Input tensors have incompatible shapes."> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedConcat/RaggedFromTensor/Const:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedConcat/RaggedNRows/Const:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_true_3200(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_3970(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_3960(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_4330(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () - %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor - return %4 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_4320(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_4670(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_4660(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_5040(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_5030(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_5400(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () - %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor - return %4 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_5390(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_5760(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/strided_slice:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RaggedNRows/sub:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_5750(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6110(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6100(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_6470(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () - %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor - return %4 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_6460(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6810(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6800(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7180(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7170(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_7540(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () - %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor - return %4 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_7530(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7880(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7870(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_RaggedGather_Assert_AssertGuard_false_8680(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_RaggedGather_Assert_AssertGuard_true_8670(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_false_9750(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_true_9740(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_false_10240(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_true_10230(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_false_10600(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_true_10590(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_Assert_AssertGuard_false_15300(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_Assert_AssertGuard_true_15290(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_Assert_1_AssertGuard_false_16370(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_Assert_1_AssertGuard_true_16360(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_Assert_2_AssertGuard_false_16860(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_Assert_2_AssertGuard_true_16850(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_Assert_3_AssertGuard_false_17220(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_Assert_3_AssertGuard_true_17210(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_false_21900(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_true_21890(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_false_22970(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_true_22960(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_false_23460(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_true_23450(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_false_23820(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_true_23810(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - - // CHECK: func @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf._input_shapes = [#tf.shape<1>], tf.signature.is_stateful} { - // CHECK: %0:2 = "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor<1x!tf.string>) -> (tensor, tensor) - // CHECK: return %0#0, %0#1 : tensor, tensor - - func @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape], tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<[]> : tensor<0xi64>} : () -> tensor<0xi64> - %1 = "tf.Const"() {value = dense : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<-1> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<[[0], [1]]> : tensor<2x1xi64>} : () -> tensor<2x1xi64> - %4 = "tf.Const"() {value = dense<[2, -1]> : tensor<2xi32>} : () -> tensor<2xi32> - %5 = "tf.Const"() {value = dense<2> : tensor} : () -> tensor - %6 = "tf.Const"() {value = dense<-1> : tensor<1xi32>} : () -> tensor<1xi32> - %7 = "tf.Const"() {value = dense<2> : tensor<1xi32>} : () -> tensor<1xi32> - %8 = "tf.Const"() {value = dense<[1, 0]> : tensor<2xi32>} : () -> tensor<2xi32> - %9 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %10 = "tf.Const"() {value = dense<2> : tensor} : () -> tensor - %11 = "tf.Const"() {value = dense<[]> : tensor<0xi32>} : () -> tensor<0xi32> - %12 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %13 = "tf.Const"() {value = dense<0> : tensor<1xi64>} : () -> tensor<1xi64> - %14 = "tf.Const"() {value = dense<1> : tensor<1xi64>} : () -> tensor<1xi64> - %15 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %16 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %17 = "tf.Const"() {value = dense<0> : tensor<1xi32>} : () -> tensor<1xi32> - %18 = "tf.Const"() {value = dense<1> : tensor<1xi32>} : () -> tensor<1xi32> - %19 = "tf.Shape"(%arg0) {device = ""} : (tensor) -> tensor<2xi64> - %20 = "tf.StridedSlice"(%19, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %21 = "tf.StridedSlice"(%19, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %22 = "tf.Mul"(%20, %21) {device = ""} : (tensor, tensor) -> tensor - %23 = "tf.Pack"(%22) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %24 = "tf.StridedSlice"(%19, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> - %25 = "tf.ConcatV2"(%23, %24, %16) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> - %26 = "tf.Reshape"(%arg0, %25) {device = ""} : (tensor, tensor<1xi64>) -> tensor - %27 = "tf.StringLength"(%26) {device = "", unit = "BYTE"} : (tensor) -> tensor - %28 = "tf.ExpandDims"(%27, %9) {device = ""} : (tensor, tensor) -> tensor - %29 = "tf.Cast"(%28) {Truncate = false, device = ""} : (tensor) -> tensor - %30 = "tf.Shape"(%29) {device = ""} : (tensor) -> tensor<2xi64> - %31 = "tf.StridedSlice"(%30, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %32 = "tf.StridedSlice"(%30, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %33 = "tf.Mul"(%31, %32) {device = ""} : (tensor, tensor) -> tensor - %34 = "tf.Pack"(%33) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %35 = "tf.StridedSlice"(%30, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> - %36 = "tf.ConcatV2"(%34, %35, %16) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> - %37 = "tf.Reshape"(%29, %36) {device = ""} : (tensor, tensor<1xi64>) -> tensor - %38 = "tf.StridedSlice"(%30, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %39 = "tf.AddV2"(%38, %15) {device = ""} : (tensor, tensor) -> tensor - %40 = "tf.Range"(%12, %39, %15) {device = ""} : (tensor, tensor, tensor) -> tensor - %41 = "tf.Mul"(%40, %15) {device = ""} : (tensor, tensor) -> tensor - %42 = "tf.Reshape"(%26, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %43:3 = "tf.UnicodeDecodeWithOffsets"(%42) {Tsplits = i64, device = "", errors = "replace", input_encoding = "UTF-8", replace_control_characters = false, replacement_char = 65533 : i64} : (tensor) -> (tensor, tensor, tensor) - %44 = "tf.StridedSlice"(%43#0, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %45 = "tf.Shape"(%44) {device = ""} : (tensor) -> tensor<1xi32> - %46 = "tf.ConcatV2"(%45, %18, %16) {device = ""} : (tensor<1xi32>, tensor<1xi32>, tensor) -> tensor<2xi32> - %47 = "tf.Reshape"(%44, %46) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %48 = "tf.Shape"(%47) {device = ""} : (tensor) -> tensor<2xi64> - %49 = "tf.StridedSlice"(%48, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %50 = "tf.AddV2"(%49, %15) {device = ""} : (tensor, tensor) -> tensor - %51 = "tf.Range"(%12, %50, %15) {device = ""} : (tensor, tensor, tensor) -> tensor - %52 = "tf.Mul"(%51, %15) {device = ""} : (tensor, tensor) -> tensor - %53 = "tf.ExpandDims"(%52, %9) {device = ""} : (tensor, tensor) -> tensor - %54 = "tf.Shape"(%52) {device = ""} : (tensor) -> tensor<1xi32> - %55 = "tf.StridedSlice"(%54, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %56 = "tf.StridedSlice"(%54, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %57 = "tf.StridedSlice"(%54, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %58 = "tf.StridedSlice"(%52, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %59 = "tf.StridedSlice"(%52, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %60 = "tf.Sub"(%58, %59) {device = ""} : (tensor, tensor) -> tensor - %61 = "tf.Shape"(%47) {device = ""} : (tensor) -> tensor<2xi32> - %62 = "tf.Cast"(%61) {Truncate = false, device = ""} : (tensor<2xi32>) -> tensor<2xi64> - %63 = "tf.StridedSlice"(%62, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %64 = "tf.Equal"(%63, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %65 = "tf.StridedSlice"(%62, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %66 = "tf.Equal"(%65, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %67 = "tf.StridedSlice"(%62, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %68 = "tf.Shape"(%47) {device = ""} : (tensor) -> tensor<2xi32> - %69 = "tf.Cast"(%68) {Truncate = false, device = ""} : (tensor<2xi32>) -> tensor<2xi64> - %70 = "tf.StridedSlice"(%69, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %71 = "tf.Equal"(%70, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %72 = "tf.StridedSlice"(%43#0, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %73 = "tf.AddV2"(%72, %15) {device = ""} : (tensor, tensor) -> tensor - %74 = "tf.StridedSlice"(%43#0, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %75 = "tf.Minimum"(%73, %74) {device = ""} : (tensor, tensor) -> tensor - %76:2 = "tf.RaggedRange"(%75, %74, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %77 = "tf.Shape"(%76#0) {device = ""} : (tensor) -> tensor<1xi64> - %78 = "tf.StridedSlice"(%77, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %79 = "tf.Sub"(%78, %15) {device = ""} : (tensor, tensor) -> tensor - %80 = "tf.Equal"(%38, %79) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %81 = "tf.All"(%80, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %82 = "tf.If"(%81, %81, %38, %79) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_false_99640, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_true_99630} : (tensor, tensor, tensor, tensor) -> tensor - %83 = "tf.Identity"(%82) {device = ""} : (tensor) -> tensor - %84 = "tf.StridedSlice"(%41, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %85 = "tf.Mul"(%79, %5) {device = ""} : (tensor, tensor) -> tensor - %86 = "tf.Range"(%12, %85, %15) {device = ""} : (tensor, tensor, tensor) -> tensor - %87 = "tf.Reshape"(%86, %4) {device = ""} : (tensor, tensor<2xi32>) -> tensor<2x?xi64> - %88 = "tf.Transpose"(%87, %8) {device = ""} : (tensor<2x?xi64>, tensor<2xi32>) -> tensor - %89 = "tf.Reshape"(%88, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %90 = "tf.StridedSlice"(%76#0, %6, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %91 = "tf.AddV2"(%84, %90) {device = ""} : (tensor, tensor) -> tensor - %92 = "tf.ConcatV2"(%76#0, %91, %16) {device = ""} : (tensor, tensor, tensor) -> tensor - %93 = "tf.GatherV2"(%43#2, %76#1, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %94 = "tf.ConcatV2"(%93, %37, %16) {device = ""} : (tensor, tensor, tensor) -> tensor - %95:2 = "tf.RaggedGather"(%92, %94, %89) {OUTPUT_RAGGED_RANK = 1 : i64, PARAMS_RAGGED_RANK = 1 : i64, Tindices = i64, Tsplits = i64, Tvalues = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %96 = "tf.StridedSlice"(%95#0, %17, %17, %7) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %97 = "tf.StridedSlice"(%96, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %98 = "tf.Shape"(%97) {device = ""} : (tensor) -> tensor<1xi32> - %99 = "tf.ConcatV2"(%98, %18, %16) {device = ""} : (tensor<1xi32>, tensor<1xi32>, tensor) -> tensor<2xi32> - %100 = "tf.Reshape"(%97, %99) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %101 = "tf.Shape"(%100) {device = ""} : (tensor) -> tensor<2xi64> - %102 = "tf.StridedSlice"(%101, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %103 = "tf.AddV2"(%102, %15) {device = ""} : (tensor, tensor) -> tensor - %104 = "tf.Range"(%12, %103, %15) {device = ""} : (tensor, tensor, tensor) -> tensor - %105 = "tf.Mul"(%104, %15) {device = ""} : (tensor, tensor) -> tensor - %106 = "tf.ExpandDims"(%105, %9) {device = ""} : (tensor, tensor) -> tensor - %107 = "tf.Shape"(%105) {device = ""} : (tensor) -> tensor<1xi32> - %108 = "tf.StridedSlice"(%107, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %109 = "tf.StridedSlice"(%107, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %110 = "tf.StridedSlice"(%107, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %111 = "tf.StridedSlice"(%105, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %112 = "tf.StridedSlice"(%105, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %113 = "tf.Sub"(%111, %112) {device = ""} : (tensor, tensor) -> tensor - %114 = "tf.Shape"(%100) {device = ""} : (tensor) -> tensor<2xi32> - %115 = "tf.Cast"(%114) {Truncate = false, device = ""} : (tensor<2xi32>) -> tensor<2xi64> - %116 = "tf.StridedSlice"(%115, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %117 = "tf.Equal"(%116, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %118 = "tf.StridedSlice"(%115, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %119 = "tf.Equal"(%118, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %120 = "tf.StridedSlice"(%115, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %121 = "tf.Shape"(%100) {device = ""} : (tensor) -> tensor<2xi32> - %122 = "tf.Cast"(%121) {Truncate = false, device = ""} : (tensor<2xi32>) -> tensor<2xi64> - %123 = "tf.StridedSlice"(%122, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %124 = "tf.Equal"(%123, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %125:5 = "tf.WhitespaceTokenizeWithOffsets"(%43#1, %43#0) {Tsplits = i64, device = ""} : (tensor, tensor) -> (tensor, tensor, tensor, tensor, tensor) - %126 = "tf.StridedSlice"(%125#1, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %127 = "tf.Equal"(%126, %12) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %128 = "tf.All"(%127, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %129 = "tf.If"(%128, %128, %126, %12) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_100400, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_100390} : (tensor, tensor, tensor, tensor) -> tensor - %130 = "tf.Identity"(%129) {device = ""} : (tensor) -> tensor - %131 = "tf.StridedSlice"(%125#1, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %132 = "tf.StridedSlice"(%125#1, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %133 = "tf.Sub"(%131, %132) {device = ""} : (tensor, tensor) -> tensor - %134 = "tf.LessEqual"(%12, %133) {device = ""} : (tensor, tensor) -> tensor - %135 = "tf.All"(%134, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %136 = "tf.If"(%135, %135, %133) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_100760, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_100750} : (tensor, tensor, tensor) -> tensor - %137 = "tf.Identity"(%136) {device = ""} : (tensor) -> tensor - %138 = "tf.Identity"(%125#1) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %139 = "tf.StridedSlice"(%138, %6, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %140 = "tf.Shape"(%125#0) {device = ""} : (tensor) -> tensor<1xi64> - %141 = "tf.StridedSlice"(%140, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %142 = "tf.Equal"(%139, %141) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %143 = "tf.All"(%142, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %144 = "tf.If"(%143, %143, %139, %141) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_101100, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_101090} : (tensor, tensor, tensor, tensor) -> tensor - %145 = "tf.Identity"(%144) {device = ""} : (tensor) -> tensor - %146 = "tf.Identity"(%138) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %147 = "tf.Shape"(%146) {device = ""} : (tensor) -> tensor<1xi64> - %148 = "tf.StridedSlice"(%147, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %149 = "tf.Sub"(%148, %15) {device = ""} : (tensor, tensor) -> tensor - %150 = "tf.StridedSlice"(%125#4, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %151 = "tf.Equal"(%150, %12) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %152 = "tf.All"(%151, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %153 = "tf.If"(%152, %152, %150, %12) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_101470, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_101460} : (tensor, tensor, tensor, tensor) -> tensor - %154 = "tf.Identity"(%153) {device = ""} : (tensor) -> tensor - %155 = "tf.StridedSlice"(%125#4, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %156 = "tf.StridedSlice"(%125#4, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %157 = "tf.Sub"(%155, %156) {device = ""} : (tensor, tensor) -> tensor - %158 = "tf.LessEqual"(%12, %157) {device = ""} : (tensor, tensor) -> tensor - %159 = "tf.All"(%158, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %160 = "tf.If"(%159, %159, %157) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_101830, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_101820} : (tensor, tensor, tensor) -> tensor - %161 = "tf.Identity"(%160) {device = ""} : (tensor) -> tensor - %162 = "tf.Identity"(%125#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %163 = "tf.StridedSlice"(%162, %6, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %164 = "tf.Equal"(%163, %149) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %165 = "tf.All"(%164, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %166 = "tf.If"(%165, %165, %163, %149) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_102190, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_102180} : (tensor, tensor, tensor, tensor) -> tensor - %167 = "tf.Identity"(%166) {device = ""} : (tensor) -> tensor - %168 = "tf.Identity"(%162) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %169 = "tf.StridedSlice"(%125#4, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %170 = "tf.Equal"(%169, %12) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %171 = "tf.All"(%170, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %172 = "tf.If"(%171, %171, %169, %12) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_102540, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_102530} : (tensor, tensor, tensor, tensor) -> tensor - %173 = "tf.Identity"(%172) {device = ""} : (tensor) -> tensor - %174 = "tf.StridedSlice"(%125#4, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %175 = "tf.StridedSlice"(%125#4, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %176 = "tf.Sub"(%174, %175) {device = ""} : (tensor, tensor) -> tensor - %177 = "tf.LessEqual"(%12, %176) {device = ""} : (tensor, tensor) -> tensor - %178 = "tf.All"(%177, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %179 = "tf.If"(%178, %178, %176) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_102900, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_102890} : (tensor, tensor, tensor) -> tensor - %180 = "tf.Identity"(%179) {device = ""} : (tensor) -> tensor - %181 = "tf.Identity"(%125#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %182 = "tf.StridedSlice"(%181, %6, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %183 = "tf.Shape"(%125#2) {device = ""} : (tensor) -> tensor<1xi64> - %184 = "tf.StridedSlice"(%183, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %185 = "tf.Equal"(%182, %184) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %186 = "tf.All"(%185, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %187 = "tf.If"(%186, %186, %182, %184) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_103240, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_103230} : (tensor, tensor, tensor, tensor) -> tensor - %188 = "tf.Identity"(%187) {device = ""} : (tensor) -> tensor - %189 = "tf.Identity"(%181) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %190 = "tf.Shape"(%189) {device = ""} : (tensor) -> tensor<1xi64> - %191 = "tf.StridedSlice"(%190, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %192 = "tf.Sub"(%191, %15) {device = ""} : (tensor, tensor) -> tensor - %193 = "tf.Equal"(%192, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %194 = "tf.LogicalOr"(%64, %193) {device = ""} : (tensor, tensor) -> tensor - %195 = "tf.Equal"(%192, %63) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %196 = "tf.LogicalOr"(%194, %195) {device = ""} : (tensor, tensor) -> tensor - %197 = "tf.StridedSlice"(%189, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %198 = "tf.StridedSlice"(%189, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %199 = "tf.Sub"(%197, %198) {device = ""} : (tensor, tensor) -> tensor - %200 = "tf.Shape"(%189) {device = ""} : (tensor) -> tensor<1xi64> - %201 = "tf.StridedSlice"(%200, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %202 = "tf.Sub"(%201, %15) {device = ""} : (tensor, tensor) -> tensor - %203 = "tf.Equal"(%202, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %204 = "tf.ExpandDims"(%189, %9) {device = ""} : (tensor, tensor) -> tensor - %205 = "tf.Shape"(%189) {device = ""} : (tensor) -> tensor<1xi32> - %206 = "tf.StridedSlice"(%205, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %207 = "tf.StridedSlice"(%205, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %208 = "tf.StridedSlice"(%205, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %209 = "tf.StridedSlice"(%125#4, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %210 = "tf.Equal"(%209, %12) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %211 = "tf.All"(%210, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %212 = "tf.If"(%211, %211, %209, %12) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_103610, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_103600} : (tensor, tensor, tensor, tensor) -> tensor - %213 = "tf.Identity"(%212) {device = ""} : (tensor) -> tensor - %214 = "tf.StridedSlice"(%125#4, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %215 = "tf.StridedSlice"(%125#4, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %216 = "tf.Sub"(%214, %215) {device = ""} : (tensor, tensor) -> tensor - %217 = "tf.LessEqual"(%12, %216) {device = ""} : (tensor, tensor) -> tensor - %218 = "tf.All"(%217, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %219 = "tf.If"(%218, %218, %216) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_103970, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_103960} : (tensor, tensor, tensor) -> tensor - %220 = "tf.Identity"(%219) {device = ""} : (tensor) -> tensor - %221 = "tf.Identity"(%125#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %222 = "tf.StridedSlice"(%221, %6, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %223 = "tf.Shape"(%125#3) {device = ""} : (tensor) -> tensor<1xi64> - %224 = "tf.StridedSlice"(%223, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %225 = "tf.Equal"(%222, %224) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %226 = "tf.All"(%225, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %227 = "tf.If"(%226, %226, %222, %224) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_104310, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_104300} : (tensor, tensor, tensor, tensor) -> tensor - %228 = "tf.Identity"(%227) {device = ""} : (tensor) -> tensor - %229 = "tf.Identity"(%221) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %230 = "tf.Shape"(%229) {device = ""} : (tensor) -> tensor<1xi64> - %231 = "tf.StridedSlice"(%230, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %232 = "tf.Sub"(%231, %15) {device = ""} : (tensor, tensor) -> tensor - %233 = "tf.Equal"(%232, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %234 = "tf.LogicalOr"(%233, %1) {device = ""} : (tensor, tensor) -> tensor - %235 = "tf.Equal"(%232, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %236 = "tf.LogicalOr"(%234, %235) {device = ""} : (tensor, tensor) -> tensor - %237 = "tf.StridedSlice"(%229, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %238 = "tf.StridedSlice"(%229, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %239 = "tf.Sub"(%237, %238) {device = ""} : (tensor, tensor) -> tensor - %240 = "tf.Shape"(%229) {device = ""} : (tensor) -> tensor<1xi64> - %241 = "tf.StridedSlice"(%240, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %242 = "tf.Sub"(%241, %15) {device = ""} : (tensor, tensor) -> tensor - %243 = "tf.Equal"(%242, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %244 = "tf.ExpandDims"(%229, %9) {device = ""} : (tensor, tensor) -> tensor - %245 = "tf.Shape"(%229) {device = ""} : (tensor) -> tensor<1xi32> - %246 = "tf.StridedSlice"(%245, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %247 = "tf.StridedSlice"(%245, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %248 = "tf.StridedSlice"(%245, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %249 = "tf.StridedSlice"(%229, %6, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %250 = "tf.Range"(%12, %249, %15) {device = ""} : (tensor, tensor, tensor) -> tensor - %251 = "tf.StridedSlice"(%229, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %252 = "tf.StridedSlice"(%229, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %253 = "tf.Sub"(%251, %252) {device = ""} : (tensor, tensor) -> tensor - %254 = "tf.If"(%196, %196, %63, %192) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_false_105110, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_true_105100} : (tensor, tensor, tensor, tensor) -> tensor - %255 = "tf.Identity"(%254) {device = ""} : (tensor) -> tensor - %256 = "tf.Equal"(%192, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %257 = "tf.Select"(%256, %63, %192) {device = ""} : (tensor, tensor, tensor) -> tensor - %258 = "tf.Equal"(%257, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %259 = "tf.LogicalOr"(%258, %66) {device = ""} : (tensor, tensor) -> tensor - %260 = "tf.Equal"(%65, %257) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %261 = "tf.LogicalOr"(%259, %260) {device = ""} : (tensor, tensor) -> tensor - %262 = "tf.Select"(%203, %257, %15) {device = ""} : (tensor, tensor, tensor) -> tensor - %263 = "tf.Pack"(%262, %15) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %264 = "tf.StridedSlice"(%263, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %265 = "tf.Cast"(%264) {Truncate = false, device = ""} : (tensor) -> tensor - %266 = "tf.Reshape"(%265, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %267 = "tf.Pack"(%9, %266) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %268 = "tf.Tile"(%204, %267) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %269 = "tf.Mul"(%266, %207) {device = ""} : (tensor, tensor) -> tensor - %270 = "tf.Pack"(%269) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %271 = "tf.ConcatV2"(%206, %270, %208, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %272 = "tf.Reshape"(%268, %271) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %273 = "tf.Shape"(%272) {device = ""} : (tensor) -> tensor<1xi64> - %274 = "tf.StridedSlice"(%273, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %275 = "tf.Pack"(%264) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %276 = "tf.StridedSlice"(%272, %275, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %277 = "tf.Sub"(%274, %264) {device = ""} : (tensor, tensor) -> tensor - %278 = "tf.Pack"(%277) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %279 = "tf.StridedSlice"(%272, %13, %278, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %280:2 = "tf.RaggedRange"(%279, %276, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %281 = "tf.Select"(%71, %257, %15) {device = ""} : (tensor, tensor, tensor) -> tensor - %282 = "tf.Pack"(%281, %15) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %283 = "tf.StridedSlice"(%282, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %284 = "tf.Cast"(%283) {Truncate = false, device = ""} : (tensor) -> tensor - %285 = "tf.Reshape"(%284, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %286 = "tf.Pack"(%9, %285) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %287 = "tf.Tile"(%53, %286) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %288 = "tf.Mul"(%285, %56) {device = ""} : (tensor, tensor) -> tensor - %289 = "tf.Pack"(%288) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %290 = "tf.ConcatV2"(%55, %289, %57, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %291 = "tf.Reshape"(%287, %290) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %292 = "tf.Shape"(%291) {device = ""} : (tensor) -> tensor<1xi64> - %293 = "tf.StridedSlice"(%292, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %294 = "tf.Pack"(%283) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %295 = "tf.StridedSlice"(%291, %294, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %296 = "tf.Sub"(%293, %283) {device = ""} : (tensor, tensor) -> tensor - %297 = "tf.Pack"(%296) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %298 = "tf.StridedSlice"(%291, %13, %297, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %299:2 = "tf.RaggedRange"(%298, %295, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %300 = "tf.StridedSlice"(%282, %17, %18, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %301 = "tf.StridedSlice"(%282, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %302 = "tf.Mul"(%60, %301) {device = ""} : (tensor, tensor) -> tensor - %303 = "tf.Tile"(%302, %300) {device = ""} : (tensor, tensor<1xi64>) -> tensor - %304 = "tf.Cumsum"(%303, %16) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor - %305 = "tf.ConcatV2"(%13, %304, %2) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor - %306 = "tf.StridedSlice"(%305, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %307 = "tf.ExpandDims"(%306, %9) {device = ""} : (tensor, tensor) -> tensor - %308 = "tf.Shape"(%306) {device = ""} : (tensor) -> tensor<1xi32> - %309 = "tf.StridedSlice"(%308, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %310 = "tf.Pack"(%309) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %311 = "tf.StridedSlice"(%305, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %312 = "tf.ExpandDims"(%311, %9) {device = ""} : (tensor, tensor) -> tensor - %313 = "tf.Shape"(%311) {device = ""} : (tensor) -> tensor<1xi32> - %314 = "tf.StridedSlice"(%313, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %315 = "tf.Pack"(%314) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %316 = "tf.Equal"(%192, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %317 = "tf.Select"(%316, %257, %15) {device = ""} : (tensor, tensor, tensor) -> tensor - %318 = "tf.Cast"(%317) {Truncate = false, device = ""} : (tensor) -> tensor - %319 = "tf.Reshape"(%318, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %320 = "tf.Pack"(%9, %319) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %321 = "tf.Mul"(%319, %10) {device = ""} : (tensor, tensor) -> tensor - %322 = "tf.Pack"(%321) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %323 = "tf.ConcatV2"(%11, %322, %11, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %324 = "tf.Pack"(%317) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %325 = "tf.Pack"(%12, %192) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %326 = "tf.ExpandDims"(%325, %9) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> - %327 = "tf.Tile"(%326, %320) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> - %328 = "tf.Reshape"(%327, %323) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor - %329 = "tf.Shape"(%328) {device = ""} : (tensor) -> tensor<1xi64> - %330 = "tf.StridedSlice"(%329, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %331 = "tf.Sub"(%330, %317) {device = ""} : (tensor, tensor) -> tensor - %332 = "tf.Pack"(%331) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %333 = "tf.StridedSlice"(%328, %13, %332, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %334 = "tf.StridedSlice"(%328, %324, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %335:2 = "tf.RaggedRange"(%333, %334, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %336 = "tf.GatherV2"(%199, %335#1, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %337 = "tf.Cast"(%336) {Truncate = false, device = ""} : (tensor) -> tensor - %338 = "tf.BroadcastTo"(%337, %310) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %339 = "tf.Max"(%338, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %340 = "tf.Maximum"(%16, %339) {device = ""} : (tensor, tensor) -> tensor - %341 = "tf.Range"(%16, %340, %9) {device = ""} : (tensor, tensor, tensor) -> tensor - %342 = "tf.Pack"(%9, %340) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %343 = "tf.Tile"(%307, %342) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %344 = "tf.Shape"(%343) {device = ""} : (tensor) -> tensor<2xi32> - %345 = "tf.StridedSlice"(%344, %17, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> - %346 = "tf.Prod"(%345, %17) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor - %347 = "tf.Pack"(%346) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %348 = "tf.Shape"(%343) {device = ""} : (tensor) -> tensor<2xi32> - %349 = "tf.StridedSlice"(%348, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %350 = "tf.Shape"(%343) {device = ""} : (tensor) -> tensor<2xi32> - %351 = "tf.StridedSlice"(%350, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %352 = "tf.ConcatV2"(%349, %347, %351, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %353 = "tf.Reshape"(%343, %352) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %354 = "tf.ExpandDims"(%338, %2) {device = ""} : (tensor, tensor) -> tensor - %355 = "tf.Less"(%341, %354) {device = ""} : (tensor, tensor) -> tensor - %356 = "tf.Reshape"(%355, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %357 = "tf.Where"(%356) {device = ""} : (tensor) -> tensor - %358 = "tf.Squeeze"(%357) {device = "", squeeze_dims = [1]} : (tensor) -> tensor - %359 = "tf.GatherV2"(%353, %358, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %360 = "tf.Cast"(%336) {Truncate = false, device = ""} : (tensor) -> tensor - %361 = "tf.BroadcastTo"(%360, %315) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %362 = "tf.Max"(%361, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %363 = "tf.Maximum"(%16, %362) {device = ""} : (tensor, tensor) -> tensor - %364 = "tf.Range"(%16, %363, %9) {device = ""} : (tensor, tensor, tensor) -> tensor - %365 = "tf.Pack"(%9, %363) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %366 = "tf.Tile"(%312, %365) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %367 = "tf.Shape"(%366) {device = ""} : (tensor) -> tensor<2xi32> - %368 = "tf.StridedSlice"(%367, %17, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> - %369 = "tf.Prod"(%368, %17) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor - %370 = "tf.Pack"(%369) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %371 = "tf.Shape"(%366) {device = ""} : (tensor) -> tensor<2xi32> - %372 = "tf.StridedSlice"(%371, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %373 = "tf.Shape"(%366) {device = ""} : (tensor) -> tensor<2xi32> - %374 = "tf.StridedSlice"(%373, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %375 = "tf.ConcatV2"(%372, %370, %374, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %376 = "tf.Reshape"(%366, %375) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %377 = "tf.ExpandDims"(%361, %2) {device = ""} : (tensor, tensor) -> tensor - %378 = "tf.Less"(%364, %377) {device = ""} : (tensor, tensor) -> tensor - %379 = "tf.Reshape"(%378, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %380 = "tf.Where"(%379) {device = ""} : (tensor) -> tensor - %381 = "tf.Squeeze"(%380) {device = "", squeeze_dims = [1]} : (tensor) -> tensor - %382 = "tf.GatherV2"(%376, %381, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %383:2 = "tf.RaggedRange"(%359, %382, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %384 = "tf.If"(%261, %261, %257, %67) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_false_106180, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_true_106170} : (tensor, tensor, tensor, tensor) -> tensor - %385 = "tf.Identity"(%384) {device = ""} : (tensor) -> tensor - %386 = "tf.StridedSlice"(%62, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %387 = "tf.Equal"(%386, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %388 = "tf.Select"(%387, %257, %386) {device = ""} : (tensor, tensor, tensor) -> tensor - %389 = "tf.Pack"(%388) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %390 = "tf.StridedSlice"(%62, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> - %391 = "tf.StridedSlice"(%62, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %392 = "tf.ConcatV2"(%390, %389, %391, %16) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> - %393 = "tf.StridedSlice"(%392, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %394 = "tf.Equal"(%393, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %395 = "tf.StridedSlice"(%392, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %396 = "tf.StridedSlice"(%392, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %397 = "tf.Equal"(%396, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %398 = "tf.If"(%397, %397, %396, %336) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_false_106670, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_true_106660} : (tensor, tensor, tensor, tensor) -> tensor - %399 = "tf.Identity"(%398) {device = ""} : (tensor) -> tensor - %400 = "tf.If"(%394, %394, %336, %395) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_false_107030, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_true_107020} : (tensor, tensor, tensor, tensor) -> tensor - %401 = "tf.If"(%236, %236, %15, %232) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_false_111870, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_true_111860} : (tensor, tensor, tensor, tensor) -> tensor - %402 = "tf.Identity"(%401) {device = ""} : (tensor) -> tensor - %403 = "tf.Equal"(%232, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %404 = "tf.Select"(%403, %15, %232) {device = ""} : (tensor, tensor, tensor) -> tensor - %405 = "tf.Equal"(%404, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %406 = "tf.LogicalOr"(%405, %1) {device = ""} : (tensor, tensor) -> tensor - %407 = "tf.Equal"(%404, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %408 = "tf.LogicalOr"(%406, %407) {device = ""} : (tensor, tensor) -> tensor - %409 = "tf.Select"(%243, %404, %15) {device = ""} : (tensor, tensor, tensor) -> tensor - %410 = "tf.Pack"(%409, %15) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %411 = "tf.StridedSlice"(%410, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %412 = "tf.Cast"(%411) {Truncate = false, device = ""} : (tensor) -> tensor - %413 = "tf.Reshape"(%412, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %414 = "tf.Pack"(%9, %413) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %415 = "tf.Tile"(%244, %414) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %416 = "tf.Mul"(%413, %247) {device = ""} : (tensor, tensor) -> tensor - %417 = "tf.Pack"(%416) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %418 = "tf.ConcatV2"(%246, %417, %248, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %419 = "tf.Reshape"(%415, %418) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %420 = "tf.Shape"(%419) {device = ""} : (tensor) -> tensor<1xi64> - %421 = "tf.StridedSlice"(%420, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %422 = "tf.Pack"(%411) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %423 = "tf.StridedSlice"(%419, %422, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %424 = "tf.Sub"(%421, %411) {device = ""} : (tensor, tensor) -> tensor - %425 = "tf.Pack"(%424) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %426 = "tf.StridedSlice"(%419, %13, %425, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %427:2 = "tf.RaggedRange"(%426, %423, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %428 = "tf.GatherV2"(%250, %427#1, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %429 = "tf.StridedSlice"(%410, %17, %18, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %430 = "tf.StridedSlice"(%410, %17, %18, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %431 = "tf.StridedSlice"(%410, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> - %432 = "tf.ConcatV2"(%430, %431, %16) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> - %433 = "tf.StridedSlice"(%410, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %434 = "tf.Mul"(%253, %433) {device = ""} : (tensor, tensor) -> tensor - %435 = "tf.Tile"(%434, %429) {device = ""} : (tensor, tensor<1xi64>) -> tensor - %436 = "tf.Cumsum"(%435, %16) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor - %437 = "tf.ConcatV2"(%13, %436, %2) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor - %438 = "tf.Shape"(%437) {device = ""} : (tensor) -> tensor<1xi64> - %439 = "tf.StridedSlice"(%438, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %440 = "tf.Sub"(%439, %15) {device = ""} : (tensor, tensor) -> tensor - %441 = "tf.Equal"(%440, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %442 = "tf.LogicalOr"(%117, %441) {device = ""} : (tensor, tensor) -> tensor - %443 = "tf.Equal"(%440, %116) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %444 = "tf.LogicalOr"(%442, %443) {device = ""} : (tensor, tensor) -> tensor - %445 = "tf.StridedSlice"(%437, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %446 = "tf.StridedSlice"(%437, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %447 = "tf.Sub"(%445, %446) {device = ""} : (tensor, tensor) -> tensor - %448 = "tf.Shape"(%437) {device = ""} : (tensor) -> tensor<1xi64> - %449 = "tf.StridedSlice"(%448, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %450 = "tf.Sub"(%449, %15) {device = ""} : (tensor, tensor) -> tensor - %451 = "tf.Equal"(%450, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %452 = "tf.ExpandDims"(%437, %9) {device = ""} : (tensor, tensor) -> tensor - %453 = "tf.Shape"(%437) {device = ""} : (tensor) -> tensor<1xi32> - %454 = "tf.StridedSlice"(%453, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %455 = "tf.StridedSlice"(%453, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %456 = "tf.StridedSlice"(%453, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %457 = "tf.Select"(%1, %404, %15) {device = ""} : (tensor, tensor, tensor) -> tensor - %458 = "tf.Pack"(%457, %15) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %459 = "tf.StridedSlice"(%458, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %460 = "tf.Cast"(%459) {Truncate = false, device = ""} : (tensor) -> tensor - %461 = "tf.Reshape"(%460, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %462 = "tf.Pack"(%9, %461) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %463 = "tf.Tile"(%3, %462) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> - %464 = "tf.Mul"(%461, %10) {device = ""} : (tensor, tensor) -> tensor - %465 = "tf.Pack"(%464) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %466 = "tf.ConcatV2"(%11, %465, %11, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %467 = "tf.Reshape"(%463, %466) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor - %468 = "tf.Shape"(%467) {device = ""} : (tensor) -> tensor<1xi64> - %469 = "tf.StridedSlice"(%468, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %470 = "tf.Pack"(%459) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %471 = "tf.StridedSlice"(%467, %470, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %472 = "tf.Sub"(%469, %459) {device = ""} : (tensor, tensor) -> tensor - %473 = "tf.Pack"(%472) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %474 = "tf.StridedSlice"(%467, %13, %473, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %475:2 = "tf.RaggedRange"(%474, %471, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %476 = "tf.GatherV2"(%13, %475#1, %16) {batch_dims = 0 : i64, device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor - %477 = "tf.GatherV2"(%14, %476, %16) {batch_dims = 0 : i64, device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor - %478 = "tf.StridedSlice"(%458, %17, %18, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %479 = "tf.StridedSlice"(%458, %17, %18, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %480 = "tf.StridedSlice"(%458, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> - %481 = "tf.ConcatV2"(%479, %480, %16) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> - %482 = "tf.Tile"(%477, %481) {device = ""} : (tensor, tensor<1xi64>) -> tensor - %483 = "tf.StridedSlice"(%458, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %484 = "tf.Mul"(%483, %14) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> - %485 = "tf.Tile"(%484, %478) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor - %486 = "tf.Cumsum"(%485, %16) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor - %487 = "tf.ConcatV2"(%13, %486, %2) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor - %488 = "tf.StridedSlice"(%487, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %489 = "tf.ExpandDims"(%488, %9) {device = ""} : (tensor, tensor) -> tensor - %490 = "tf.Shape"(%488) {device = ""} : (tensor) -> tensor<1xi32> - %491 = "tf.StridedSlice"(%490, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %492 = "tf.Pack"(%491) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %493 = "tf.StridedSlice"(%487, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %494 = "tf.ExpandDims"(%493, %9) {device = ""} : (tensor, tensor) -> tensor - %495 = "tf.Shape"(%493) {device = ""} : (tensor) -> tensor<1xi32> - %496 = "tf.StridedSlice"(%495, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %497 = "tf.Pack"(%496) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %498 = "tf.Equal"(%232, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %499 = "tf.Select"(%498, %404, %15) {device = ""} : (tensor, tensor, tensor) -> tensor - %500 = "tf.Cast"(%499) {Truncate = false, device = ""} : (tensor) -> tensor - %501 = "tf.Reshape"(%500, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %502 = "tf.Pack"(%9, %501) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %503 = "tf.Mul"(%501, %10) {device = ""} : (tensor, tensor) -> tensor - %504 = "tf.Pack"(%503) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %505 = "tf.ConcatV2"(%11, %504, %11, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %506 = "tf.Pack"(%499) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %507 = "tf.Pack"(%12, %232) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %508 = "tf.ExpandDims"(%507, %9) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> - %509 = "tf.Tile"(%508, %502) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> - %510 = "tf.Reshape"(%509, %505) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor - %511 = "tf.Shape"(%510) {device = ""} : (tensor) -> tensor<1xi64> - %512 = "tf.StridedSlice"(%511, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %513 = "tf.Sub"(%512, %499) {device = ""} : (tensor, tensor) -> tensor - %514 = "tf.Pack"(%513) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %515 = "tf.StridedSlice"(%510, %13, %514, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %516 = "tf.StridedSlice"(%510, %506, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %517:2 = "tf.RaggedRange"(%515, %516, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %518 = "tf.GatherV2"(%239, %517#1, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %519 = "tf.Cast"(%518) {Truncate = false, device = ""} : (tensor) -> tensor - %520 = "tf.BroadcastTo"(%519, %492) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %521 = "tf.Max"(%520, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %522 = "tf.Maximum"(%16, %521) {device = ""} : (tensor, tensor) -> tensor - %523 = "tf.Range"(%16, %522, %9) {device = ""} : (tensor, tensor, tensor) -> tensor - %524 = "tf.Pack"(%9, %522) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %525 = "tf.Tile"(%489, %524) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %526 = "tf.Shape"(%525) {device = ""} : (tensor) -> tensor<2xi32> - %527 = "tf.StridedSlice"(%526, %17, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> - %528 = "tf.Prod"(%527, %17) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor - %529 = "tf.Pack"(%528) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %530 = "tf.Shape"(%525) {device = ""} : (tensor) -> tensor<2xi32> - %531 = "tf.StridedSlice"(%530, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %532 = "tf.Shape"(%525) {device = ""} : (tensor) -> tensor<2xi32> - %533 = "tf.StridedSlice"(%532, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %534 = "tf.ConcatV2"(%531, %529, %533, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %535 = "tf.Reshape"(%525, %534) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %536 = "tf.ExpandDims"(%520, %2) {device = ""} : (tensor, tensor) -> tensor - %537 = "tf.Less"(%523, %536) {device = ""} : (tensor, tensor) -> tensor - %538 = "tf.Reshape"(%537, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %539 = "tf.Where"(%538) {device = ""} : (tensor) -> tensor - %540 = "tf.Squeeze"(%539) {device = "", squeeze_dims = [1]} : (tensor) -> tensor - %541 = "tf.GatherV2"(%535, %540, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %542 = "tf.Cast"(%518) {Truncate = false, device = ""} : (tensor) -> tensor - %543 = "tf.BroadcastTo"(%542, %497) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %544 = "tf.Max"(%543, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %545 = "tf.Maximum"(%16, %544) {device = ""} : (tensor, tensor) -> tensor - %546 = "tf.Range"(%16, %545, %9) {device = ""} : (tensor, tensor, tensor) -> tensor - %547 = "tf.Pack"(%9, %545) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %548 = "tf.Tile"(%494, %547) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %549 = "tf.Shape"(%548) {device = ""} : (tensor) -> tensor<2xi32> - %550 = "tf.StridedSlice"(%549, %17, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> - %551 = "tf.Prod"(%550, %17) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor - %552 = "tf.Pack"(%551) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %553 = "tf.Shape"(%548) {device = ""} : (tensor) -> tensor<2xi32> - %554 = "tf.StridedSlice"(%553, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %555 = "tf.Shape"(%548) {device = ""} : (tensor) -> tensor<2xi32> - %556 = "tf.StridedSlice"(%555, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %557 = "tf.ConcatV2"(%554, %552, %556, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %558 = "tf.Reshape"(%548, %557) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %559 = "tf.ExpandDims"(%543, %2) {device = ""} : (tensor, tensor) -> tensor - %560 = "tf.Less"(%546, %559) {device = ""} : (tensor, tensor) -> tensor - %561 = "tf.Reshape"(%560, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %562 = "tf.Where"(%561) {device = ""} : (tensor) -> tensor - %563 = "tf.Squeeze"(%562) {device = "", squeeze_dims = [1]} : (tensor) -> tensor - %564 = "tf.GatherV2"(%558, %563, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %565:2 = "tf.RaggedRange"(%541, %564, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %566 = "tf.GatherV2"(%482, %565#1, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %567 = "tf.If"(%408, %408, %404, %15) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_false_112940, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_true_112930} : (tensor, tensor, tensor, tensor) -> tensor - %568 = "tf.Identity"(%567) {device = ""} : (tensor) -> tensor - %569 = "tf.Select"(%1, %404, %15) {device = ""} : (tensor, tensor, tensor) -> tensor - %570 = "tf.Pack"(%569) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %571 = "tf.ConcatV2"(%0, %570, %14, %16) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> - %572 = "tf.StridedSlice"(%571, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %573 = "tf.Equal"(%572, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %574 = "tf.StridedSlice"(%571, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %575 = "tf.StridedSlice"(%571, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %576 = "tf.Equal"(%575, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %577 = "tf.If"(%576, %576, %575, %518) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_false_113430, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_true_113420} : (tensor, tensor, tensor, tensor) -> tensor - %578 = "tf.Identity"(%577) {device = ""} : (tensor) -> tensor - %579 = "tf.If"(%573, %573, %518, %574) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_false_113790, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_true_113780} : (tensor, tensor, tensor, tensor) -> tensor - %580 = "tf.Identity"(%579) {device = ""} : (tensor) -> tensor - %581 = "tf.If"(%444, %444, %116, %440) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_false_118470, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_true_118460} : (tensor, tensor, tensor, tensor) -> tensor - %582 = "tf.Identity"(%581) {device = ""} : (tensor) -> tensor - %583 = "tf.Equal"(%440, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %584 = "tf.Select"(%583, %116, %440) {device = ""} : (tensor, tensor, tensor) -> tensor - %585 = "tf.Equal"(%584, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %586 = "tf.LogicalOr"(%585, %119) {device = ""} : (tensor, tensor) -> tensor - %587 = "tf.Equal"(%118, %584) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %588 = "tf.LogicalOr"(%586, %587) {device = ""} : (tensor, tensor) -> tensor - %589 = "tf.Select"(%451, %584, %15) {device = ""} : (tensor, tensor, tensor) -> tensor - %590 = "tf.Pack"(%589, %15) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %591 = "tf.StridedSlice"(%590, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %592 = "tf.Cast"(%591) {Truncate = false, device = ""} : (tensor) -> tensor - %593 = "tf.Reshape"(%592, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %594 = "tf.Pack"(%9, %593) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %595 = "tf.Tile"(%452, %594) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %596 = "tf.Mul"(%593, %455) {device = ""} : (tensor, tensor) -> tensor - %597 = "tf.Pack"(%596) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %598 = "tf.ConcatV2"(%454, %597, %456, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %599 = "tf.Reshape"(%595, %598) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %600 = "tf.Shape"(%599) {device = ""} : (tensor) -> tensor<1xi64> - %601 = "tf.StridedSlice"(%600, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %602 = "tf.Pack"(%591) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %603 = "tf.StridedSlice"(%599, %602, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %604 = "tf.Sub"(%601, %591) {device = ""} : (tensor, tensor) -> tensor - %605 = "tf.Pack"(%604) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %606 = "tf.StridedSlice"(%599, %13, %605, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %607:2 = "tf.RaggedRange"(%606, %603, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %608 = "tf.Select"(%124, %584, %15) {device = ""} : (tensor, tensor, tensor) -> tensor - %609 = "tf.Pack"(%608, %15) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %610 = "tf.StridedSlice"(%609, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %611 = "tf.Cast"(%610) {Truncate = false, device = ""} : (tensor) -> tensor - %612 = "tf.Reshape"(%611, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %613 = "tf.Pack"(%9, %612) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %614 = "tf.Tile"(%106, %613) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %615 = "tf.Mul"(%612, %109) {device = ""} : (tensor, tensor) -> tensor - %616 = "tf.Pack"(%615) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %617 = "tf.ConcatV2"(%108, %616, %110, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %618 = "tf.Reshape"(%614, %617) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %619 = "tf.Shape"(%618) {device = ""} : (tensor) -> tensor<1xi64> - %620 = "tf.StridedSlice"(%619, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %621 = "tf.Pack"(%610) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %622 = "tf.StridedSlice"(%618, %621, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %623 = "tf.Sub"(%620, %610) {device = ""} : (tensor, tensor) -> tensor - %624 = "tf.Pack"(%623) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %625 = "tf.StridedSlice"(%618, %13, %624, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %626:2 = "tf.RaggedRange"(%625, %622, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %627 = "tf.StridedSlice"(%609, %17, %18, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %628 = "tf.StridedSlice"(%609, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %629 = "tf.Mul"(%113, %628) {device = ""} : (tensor, tensor) -> tensor - %630 = "tf.Tile"(%629, %627) {device = ""} : (tensor, tensor<1xi64>) -> tensor - %631 = "tf.Cumsum"(%630, %16) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor - %632 = "tf.ConcatV2"(%13, %631, %2) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor - %633 = "tf.StridedSlice"(%632, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %634 = "tf.ExpandDims"(%633, %9) {device = ""} : (tensor, tensor) -> tensor - %635 = "tf.Shape"(%633) {device = ""} : (tensor) -> tensor<1xi32> - %636 = "tf.StridedSlice"(%635, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %637 = "tf.Pack"(%636) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %638 = "tf.StridedSlice"(%632, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %639 = "tf.ExpandDims"(%638, %9) {device = ""} : (tensor, tensor) -> tensor - %640 = "tf.Shape"(%638) {device = ""} : (tensor) -> tensor<1xi32> - %641 = "tf.StridedSlice"(%640, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %642 = "tf.Pack"(%641) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %643 = "tf.Equal"(%440, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %644 = "tf.Select"(%643, %584, %15) {device = ""} : (tensor, tensor, tensor) -> tensor - %645 = "tf.Cast"(%644) {Truncate = false, device = ""} : (tensor) -> tensor - %646 = "tf.Reshape"(%645, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %647 = "tf.Pack"(%9, %646) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %648 = "tf.Mul"(%646, %10) {device = ""} : (tensor, tensor) -> tensor - %649 = "tf.Pack"(%648) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %650 = "tf.ConcatV2"(%11, %649, %11, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %651 = "tf.Pack"(%644) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %652 = "tf.Pack"(%12, %440) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %653 = "tf.ExpandDims"(%652, %9) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> - %654 = "tf.Tile"(%653, %647) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> - %655 = "tf.Reshape"(%654, %650) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor - %656 = "tf.Shape"(%655) {device = ""} : (tensor) -> tensor<1xi64> - %657 = "tf.StridedSlice"(%656, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %658 = "tf.Sub"(%657, %644) {device = ""} : (tensor, tensor) -> tensor - %659 = "tf.Pack"(%658) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %660 = "tf.StridedSlice"(%655, %13, %659, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %661 = "tf.StridedSlice"(%655, %651, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %662:2 = "tf.RaggedRange"(%660, %661, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %663 = "tf.GatherV2"(%447, %662#1, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %664 = "tf.Cast"(%663) {Truncate = false, device = ""} : (tensor) -> tensor - %665 = "tf.BroadcastTo"(%664, %637) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %666 = "tf.Max"(%665, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %667 = "tf.Maximum"(%16, %666) {device = ""} : (tensor, tensor) -> tensor - %668 = "tf.Range"(%16, %667, %9) {device = ""} : (tensor, tensor, tensor) -> tensor - %669 = "tf.Pack"(%9, %667) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %670 = "tf.Tile"(%634, %669) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %671 = "tf.Shape"(%670) {device = ""} : (tensor) -> tensor<2xi32> - %672 = "tf.StridedSlice"(%671, %17, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> - %673 = "tf.Prod"(%672, %17) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor - %674 = "tf.Pack"(%673) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %675 = "tf.Shape"(%670) {device = ""} : (tensor) -> tensor<2xi32> - %676 = "tf.StridedSlice"(%675, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %677 = "tf.Shape"(%670) {device = ""} : (tensor) -> tensor<2xi32> - %678 = "tf.StridedSlice"(%677, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %679 = "tf.ConcatV2"(%676, %674, %678, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %680 = "tf.Reshape"(%670, %679) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %681 = "tf.ExpandDims"(%665, %2) {device = ""} : (tensor, tensor) -> tensor - %682 = "tf.Less"(%668, %681) {device = ""} : (tensor, tensor) -> tensor - %683 = "tf.Reshape"(%682, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %684 = "tf.Where"(%683) {device = ""} : (tensor) -> tensor - %685 = "tf.Squeeze"(%684) {device = "", squeeze_dims = [1]} : (tensor) -> tensor - %686 = "tf.GatherV2"(%680, %685, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %687 = "tf.Cast"(%663) {Truncate = false, device = ""} : (tensor) -> tensor - %688 = "tf.BroadcastTo"(%687, %642) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %689 = "tf.Max"(%688, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %690 = "tf.Maximum"(%16, %689) {device = ""} : (tensor, tensor) -> tensor - %691 = "tf.Range"(%16, %690, %9) {device = ""} : (tensor, tensor, tensor) -> tensor - %692 = "tf.Pack"(%9, %690) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %693 = "tf.Tile"(%639, %692) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %694 = "tf.Shape"(%693) {device = ""} : (tensor) -> tensor<2xi32> - %695 = "tf.StridedSlice"(%694, %17, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> - %696 = "tf.Prod"(%695, %17) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor - %697 = "tf.Pack"(%696) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %698 = "tf.Shape"(%693) {device = ""} : (tensor) -> tensor<2xi32> - %699 = "tf.StridedSlice"(%698, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %700 = "tf.Shape"(%693) {device = ""} : (tensor) -> tensor<2xi32> - %701 = "tf.StridedSlice"(%700, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %702 = "tf.ConcatV2"(%699, %697, %701, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %703 = "tf.Reshape"(%693, %702) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %704 = "tf.ExpandDims"(%688, %2) {device = ""} : (tensor, tensor) -> tensor - %705 = "tf.Less"(%691, %704) {device = ""} : (tensor, tensor) -> tensor - %706 = "tf.Reshape"(%705, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %707 = "tf.Where"(%706) {device = ""} : (tensor) -> tensor - %708 = "tf.Squeeze"(%707) {device = "", squeeze_dims = [1]} : (tensor) -> tensor - %709 = "tf.GatherV2"(%703, %708, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %710:2 = "tf.RaggedRange"(%686, %709, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %711 = "tf.If"(%588, %588, %584, %120) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_false_119540, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_true_119530} : (tensor, tensor, tensor, tensor) -> tensor - %712 = "tf.Identity"(%711) {device = ""} : (tensor) -> tensor - %713 = "tf.StridedSlice"(%115, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %714 = "tf.Equal"(%713, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %715 = "tf.Select"(%714, %584, %713) {device = ""} : (tensor, tensor, tensor) -> tensor - %716 = "tf.Pack"(%715) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %717 = "tf.StridedSlice"(%115, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> - %718 = "tf.StridedSlice"(%115, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %719 = "tf.ConcatV2"(%717, %716, %718, %16) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> - %720 = "tf.StridedSlice"(%719, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %721 = "tf.Equal"(%720, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %722 = "tf.StridedSlice"(%719, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %723 = "tf.StridedSlice"(%719, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %724 = "tf.Equal"(%723, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %725 = "tf.If"(%724, %724, %723, %663) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_false_120030, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_true_120020} : (tensor, tensor, tensor, tensor) -> tensor - %726 = "tf.Identity"(%725) {device = ""} : (tensor) -> tensor - %727 = "tf.If"(%721, %721, %663, %722) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_false_120390, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_true_120380} : (tensor, tensor, tensor, tensor) -> tensor - %728 = "tf.Identity"(%168) {device = ""} : (tensor) -> tensor - %729 = "tf.Identity"(%727) {device = ""} : (tensor) -> tensor - %730 = "tf.Identity"(%400) {device = ""} : (tensor) -> tensor - %731 = "tf.Shape"(%125#2) {device = ""} : (tensor) -> tensor<1xi32> - %732 = "tf.StridedSlice"(%731, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %733 = "tf.Cast"(%732) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> - %734 = "tf.Identity"(%733) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> - %735 = "tf.Shape"(%125#3) {device = ""} : (tensor) -> tensor<1xi32> - %736 = "tf.StridedSlice"(%735, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %737 = "tf.Cast"(%736) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> - %738 = "tf.Identity"(%737) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> - %739 = "tf.GatherV2"(%125#3, %428, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %740 = "tf.Tile"(%739, %432) {device = ""} : (tensor, tensor<1xi64>) -> tensor - %741 = "tf.Sub"(%740, %566) {device = ""} : (tensor, tensor) -> tensor - %742 = "tf.Shape"(%741) {device = ""} : (tensor) -> tensor<1xi32> - %743 = "tf.StridedSlice"(%742, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %744 = "tf.Cast"(%743) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> - %745 = "tf.Identity"(%744) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> - %746 = "tf.UnicodeEncode"(%125#0, %146) {Tsplits = i64, device = "", errors = "replace", output_encoding = "UTF-8", replacement_char = 65533 : i64} : (tensor, tensor) -> tensor - %747 = "tf.Identity"(%746) {device = ""} : (tensor) -> tensor - %748 = "tf.StridedSlice"(%19, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %749 = "tf.AddV2"(%748, %15) {device = ""} : (tensor, tensor) -> tensor - %750 = "tf.Range"(%12, %749, %15) {device = ""} : (tensor, tensor, tensor) -> tensor - %751 = "tf.Mul"(%750, %15) {device = ""} : (tensor, tensor) -> tensor - %752 = "tf.Identity"(%751) {device = ""} : (tensor) -> tensor - return %747, %752, %728 : tensor, tensor, tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_false_99640(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Input tensors have incompatible shapes."> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedConcat/RaggedFromTensor/strided_slice_4:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedConcat/RaggedNRows/sub:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_true_99630(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_100400(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_100390(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_100760(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () - %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor - return %4 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_100750(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_101100(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_101090(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_101470(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_101460(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_101830(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () - %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor - return %4 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_101820(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_102190(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/strided_slice:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RaggedNRows/sub:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_102180(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_102540(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_102530(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_102900(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () - %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor - return %4 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_102890(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_103240(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_103230(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_103610(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_103600(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_103970(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () - %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor - return %4 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_103960(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_104310(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_104300(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_false_105110(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_true_105100(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_false_106180(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_true_106170(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_false_106670(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_true_106660(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_false_107030(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_true_107020(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_false_111870(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_true_111860(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_false_112940(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_true_112930(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_false_113430(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_true_113420(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_false_113790(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_true_113780(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_false_118470(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_true_118460(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_false_119540(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_true_119530(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_false_120030(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_true_120020(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_false_120390(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_true_120380(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - - - - // CHECK: func @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf._input_shapes = [#tf.shape], tf.signature.is_stateful} { - // CHECK: %0:3 = "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor) -> (tensor, tensor, tensor) - // CHECK: return %0#0, %0#1, %0#2 : tensor, tensor, tensor - - func @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>], tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<[0, 1]> : tensor<2xi64>} : () -> tensor<2xi64> - %1 = "tf.Const"() {value = dense<[]> : tensor<0xi64>} : () -> tensor<0xi64> - %2 = "tf.Const"() {value = dense : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<-1> : tensor} : () -> tensor - %4 = "tf.Const"() {value = dense<[[0], [1]]> : tensor<2x1xi64>} : () -> tensor<2x1xi64> - %5 = "tf.Const"() {value = dense<-1> : tensor<1xi32>} : () -> tensor<1xi32> - %6 = "tf.Const"() {value = dense<2> : tensor<1xi32>} : () -> tensor<1xi32> - %7 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %8 = "tf.Const"() {value = dense<2> : tensor} : () -> tensor - %9 = "tf.Const"() {value = dense<[]> : tensor<0xi32>} : () -> tensor<0xi32> - %10 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %11 = "tf.Const"() {value = dense<0> : tensor<1xi64>} : () -> tensor<1xi64> - %12 = "tf.Const"() {value = dense<1> : tensor<1xi64>} : () -> tensor<1xi64> - %13 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %14 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %15 = "tf.Const"() {value = dense<0> : tensor<1xi32>} : () -> tensor<1xi32> - %16 = "tf.Const"() {value = dense<1> : tensor<1xi32>} : () -> tensor<1xi32> - %17 = "tf.If"(%2, %2, %13, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_false_3220, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_true_3210} : (tensor, tensor, tensor, tensor) -> tensor - %18 = "tf.Identity"(%17) {device = ""} : (tensor) -> tensor - %19 = "tf.Pack"(%arg0) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1x!tf.string> - %20 = "tf.StringLength"(%19) {device = "", unit = "BYTE"} : (tensor<1x!tf.string>) -> tensor<1xi32> - %21 = "tf.ExpandDims"(%20, %7) {device = ""} : (tensor<1xi32>, tensor) -> tensor<1x1xi32> - %22 = "tf.Cast"(%21) {Truncate = false, device = ""} : (tensor<1x1xi32>) -> tensor<1x1xi64> - %23 = "tf.Reshape"(%22, %12) {device = ""} : (tensor<1x1xi64>, tensor<1xi64>) -> tensor<1xi64> - %24 = "tf.Reshape"(%19, %5) {device = ""} : (tensor<1x!tf.string>, tensor<1xi32>) -> tensor<1x!tf.string> - %25:3 = "tf.UnicodeDecodeWithOffsets"(%24) {Tsplits = i64, device = "", errors = "replace", input_encoding = "UTF-8", replace_control_characters = false, replacement_char = 65533 : i64} : (tensor<1x!tf.string>) -> (tensor<2xi64>, tensor, tensor) - %26 = "tf.StridedSlice"(%25#0, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %27 = "tf.AddV2"(%26, %13) {device = ""} : (tensor<1xi64>, tensor) -> tensor<1xi64> - %28 = "tf.StridedSlice"(%25#0, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %29 = "tf.Minimum"(%27, %28) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor<1xi64> - %30:2 = "tf.RaggedRange"(%29, %28, %13) {T = i64, Tsplits = i64, device = ""} : (tensor<1xi64>, tensor<1xi64>, tensor) -> (tensor<2xi64>, tensor) - %31 = "tf.StridedSlice"(%30#0, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %32 = "tf.AddV2"(%31, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> - %33 = "tf.ConcatV2"(%30#0, %32, %14) {device = ""} : (tensor<2xi64>, tensor<1xi64>, tensor) -> tensor<3xi64> - %34 = "tf.GatherV2"(%25#2, %30#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %35 = "tf.ConcatV2"(%34, %23, %14) {device = ""} : (tensor, tensor<1xi64>, tensor) -> tensor - %36:2 = "tf.RaggedGather"(%33, %35, %0) {OUTPUT_RAGGED_RANK = 1 : i64, PARAMS_RAGGED_RANK = 1 : i64, Tindices = i64, Tsplits = i64, Tvalues = i64, device = ""} : (tensor<3xi64>, tensor, tensor<2xi64>) -> (tensor, tensor) - %37:5 = "tf.WhitespaceTokenizeWithOffsets"(%25#1, %25#0) {Tsplits = i64, device = ""} : (tensor, tensor<2xi64>) -> (tensor, tensor, tensor, tensor, tensor) - %38 = "tf.StridedSlice"(%37#1, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %39 = "tf.Equal"(%38, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %40 = "tf.All"(%39, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %41 = "tf.If"(%40, %40, %38, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_3980, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_3970} : (tensor, tensor, tensor, tensor) -> tensor - %42 = "tf.Identity"(%41) {device = ""} : (tensor) -> tensor - %43 = "tf.StridedSlice"(%37#1, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %44 = "tf.StridedSlice"(%37#1, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %45 = "tf.Sub"(%43, %44) {device = ""} : (tensor, tensor) -> tensor - %46 = "tf.LessEqual"(%10, %45) {device = ""} : (tensor, tensor) -> tensor - %47 = "tf.All"(%46, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %48 = "tf.If"(%47, %47, %45) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_4340, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_4330} : (tensor, tensor, tensor) -> tensor - %49 = "tf.Identity"(%48) {device = ""} : (tensor) -> tensor - %50 = "tf.Identity"(%37#1) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %51 = "tf.StridedSlice"(%50, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %52 = "tf.Shape"(%37#0) {device = ""} : (tensor) -> tensor<1xi64> - %53 = "tf.StridedSlice"(%52, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %54 = "tf.Equal"(%51, %53) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %55 = "tf.All"(%54, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %56 = "tf.If"(%55, %55, %51, %53) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_4680, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_4670} : (tensor, tensor, tensor, tensor) -> tensor - %57 = "tf.Identity"(%56) {device = ""} : (tensor) -> tensor - %58 = "tf.Identity"(%50) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %59 = "tf.Shape"(%58) {device = ""} : (tensor) -> tensor<1xi64> - %60 = "tf.StridedSlice"(%59, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %61 = "tf.Sub"(%60, %13) {device = ""} : (tensor, tensor) -> tensor - %62 = "tf.StridedSlice"(%37#4, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %63 = "tf.Equal"(%62, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %64 = "tf.All"(%63, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %65 = "tf.If"(%64, %64, %62, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_5050, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_5040} : (tensor, tensor, tensor, tensor) -> tensor - %66 = "tf.Identity"(%65) {device = ""} : (tensor) -> tensor - %67 = "tf.StridedSlice"(%37#4, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %68 = "tf.StridedSlice"(%37#4, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %69 = "tf.Sub"(%67, %68) {device = ""} : (tensor, tensor) -> tensor - %70 = "tf.LessEqual"(%10, %69) {device = ""} : (tensor, tensor) -> tensor - %71 = "tf.All"(%70, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %72 = "tf.If"(%71, %71, %69) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_5410, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_5400} : (tensor, tensor, tensor) -> tensor - %73 = "tf.Identity"(%72) {device = ""} : (tensor) -> tensor - %74 = "tf.Identity"(%37#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %75 = "tf.StridedSlice"(%74, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %76 = "tf.Equal"(%75, %61) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %77 = "tf.All"(%76, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %78 = "tf.If"(%77, %77, %75, %61) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_5770, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_5760} : (tensor, tensor, tensor, tensor) -> tensor - %79 = "tf.Identity"(%78) {device = ""} : (tensor) -> tensor - %80 = "tf.Identity"(%74) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %81 = "tf.StridedSlice"(%37#4, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %82 = "tf.Equal"(%81, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %83 = "tf.All"(%82, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %84 = "tf.If"(%83, %83, %81, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6120, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6110} : (tensor, tensor, tensor, tensor) -> tensor - %85 = "tf.Identity"(%84) {device = ""} : (tensor) -> tensor - %86 = "tf.StridedSlice"(%37#4, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %87 = "tf.StridedSlice"(%37#4, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %88 = "tf.Sub"(%86, %87) {device = ""} : (tensor, tensor) -> tensor - %89 = "tf.LessEqual"(%10, %88) {device = ""} : (tensor, tensor) -> tensor - %90 = "tf.All"(%89, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %91 = "tf.If"(%90, %90, %88) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_6480, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_6470} : (tensor, tensor, tensor) -> tensor - %92 = "tf.Identity"(%91) {device = ""} : (tensor) -> tensor - %93 = "tf.Identity"(%37#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %94 = "tf.StridedSlice"(%93, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %95 = "tf.Shape"(%37#2) {device = ""} : (tensor) -> tensor<1xi64> - %96 = "tf.StridedSlice"(%95, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %97 = "tf.Equal"(%94, %96) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %98 = "tf.All"(%97, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %99 = "tf.If"(%98, %98, %94, %96) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6820, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6810} : (tensor, tensor, tensor, tensor) -> tensor - %100 = "tf.Identity"(%99) {device = ""} : (tensor) -> tensor - %101 = "tf.Identity"(%93) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %102 = "tf.Shape"(%101) {device = ""} : (tensor) -> tensor<1xi64> - %103 = "tf.StridedSlice"(%102, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %104 = "tf.Sub"(%103, %13) {device = ""} : (tensor, tensor) -> tensor - %105 = "tf.Equal"(%104, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %106 = "tf.LogicalOr"(%105, %2) {device = ""} : (tensor, tensor) -> tensor - %107 = "tf.Equal"(%104, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %108 = "tf.LogicalOr"(%106, %107) {device = ""} : (tensor, tensor) -> tensor - %109 = "tf.StridedSlice"(%101, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %110 = "tf.StridedSlice"(%101, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %111 = "tf.Sub"(%109, %110) {device = ""} : (tensor, tensor) -> tensor - %112 = "tf.Shape"(%101) {device = ""} : (tensor) -> tensor<1xi64> - %113 = "tf.StridedSlice"(%112, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %114 = "tf.Sub"(%113, %13) {device = ""} : (tensor, tensor) -> tensor - %115 = "tf.Equal"(%114, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %116 = "tf.ExpandDims"(%101, %7) {device = ""} : (tensor, tensor) -> tensor - %117 = "tf.Shape"(%101) {device = ""} : (tensor) -> tensor<1xi32> - %118 = "tf.StridedSlice"(%117, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %119 = "tf.StridedSlice"(%117, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %120 = "tf.StridedSlice"(%117, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %121 = "tf.StridedSlice"(%37#4, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %122 = "tf.Equal"(%121, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %123 = "tf.All"(%122, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %124 = "tf.If"(%123, %123, %121, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7190, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7180} : (tensor, tensor, tensor, tensor) -> tensor - %125 = "tf.Identity"(%124) {device = ""} : (tensor) -> tensor - %126 = "tf.StridedSlice"(%37#4, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %127 = "tf.StridedSlice"(%37#4, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %128 = "tf.Sub"(%126, %127) {device = ""} : (tensor, tensor) -> tensor - %129 = "tf.LessEqual"(%10, %128) {device = ""} : (tensor, tensor) -> tensor - %130 = "tf.All"(%129, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %131 = "tf.If"(%130, %130, %128) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_7550, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_7540} : (tensor, tensor, tensor) -> tensor - %132 = "tf.Identity"(%131) {device = ""} : (tensor) -> tensor - %133 = "tf.Identity"(%37#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %134 = "tf.StridedSlice"(%133, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %135 = "tf.Shape"(%37#3) {device = ""} : (tensor) -> tensor<1xi64> - %136 = "tf.StridedSlice"(%135, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %137 = "tf.Equal"(%134, %136) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %138 = "tf.All"(%137, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor - %139 = "tf.If"(%138, %138, %134, %136) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7890, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7880} : (tensor, tensor, tensor, tensor) -> tensor - %140 = "tf.Identity"(%139) {device = ""} : (tensor) -> tensor - %141 = "tf.Identity"(%133) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor - %142 = "tf.Shape"(%141) {device = ""} : (tensor) -> tensor<1xi64> - %143 = "tf.StridedSlice"(%142, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %144 = "tf.Sub"(%143, %13) {device = ""} : (tensor, tensor) -> tensor - %145 = "tf.Equal"(%144, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %146 = "tf.LogicalOr"(%145, %2) {device = ""} : (tensor, tensor) -> tensor - %147 = "tf.Equal"(%144, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %148 = "tf.LogicalOr"(%146, %147) {device = ""} : (tensor, tensor) -> tensor - %149 = "tf.StridedSlice"(%141, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %150 = "tf.StridedSlice"(%141, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %151 = "tf.Sub"(%149, %150) {device = ""} : (tensor, tensor) -> tensor - %152 = "tf.Shape"(%141) {device = ""} : (tensor) -> tensor<1xi64> - %153 = "tf.StridedSlice"(%152, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %154 = "tf.Sub"(%153, %13) {device = ""} : (tensor, tensor) -> tensor - %155 = "tf.Equal"(%154, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %156 = "tf.ExpandDims"(%141, %7) {device = ""} : (tensor, tensor) -> tensor - %157 = "tf.Shape"(%141) {device = ""} : (tensor) -> tensor<1xi32> - %158 = "tf.StridedSlice"(%157, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %159 = "tf.StridedSlice"(%157, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %160 = "tf.StridedSlice"(%157, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %161 = "tf.StridedSlice"(%141, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %162 = "tf.Range"(%10, %161, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %163 = "tf.StridedSlice"(%141, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %164 = "tf.StridedSlice"(%141, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %165 = "tf.Sub"(%163, %164) {device = ""} : (tensor, tensor) -> tensor - %166 = "tf.If"(%108, %108, %13, %104) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_false_8690, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_true_8680} : (tensor, tensor, tensor, tensor) -> tensor - %167 = "tf.Identity"(%166) {device = ""} : (tensor) -> tensor - %168 = "tf.Equal"(%104, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %169 = "tf.Select"(%168, %13, %104) {device = ""} : (tensor, tensor, tensor) -> tensor - %170 = "tf.Equal"(%169, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %171 = "tf.LogicalOr"(%170, %2) {device = ""} : (tensor, tensor) -> tensor - %172 = "tf.Equal"(%169, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %173 = "tf.LogicalOr"(%171, %172) {device = ""} : (tensor, tensor) -> tensor - %174 = "tf.Select"(%115, %169, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %175 = "tf.Pack"(%174, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %176 = "tf.StridedSlice"(%175, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %177 = "tf.Cast"(%176) {Truncate = false, device = ""} : (tensor) -> tensor - %178 = "tf.Reshape"(%177, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %179 = "tf.Pack"(%7, %178) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %180 = "tf.Tile"(%116, %179) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %181 = "tf.Mul"(%178, %119) {device = ""} : (tensor, tensor) -> tensor - %182 = "tf.Pack"(%181) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %183 = "tf.ConcatV2"(%118, %182, %120, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %184 = "tf.Reshape"(%180, %183) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %185 = "tf.Shape"(%184) {device = ""} : (tensor) -> tensor<1xi64> - %186 = "tf.StridedSlice"(%185, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %187 = "tf.Pack"(%176) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %188 = "tf.StridedSlice"(%184, %187, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %189 = "tf.Sub"(%186, %176) {device = ""} : (tensor, tensor) -> tensor - %190 = "tf.Pack"(%189) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %191 = "tf.StridedSlice"(%184, %11, %190, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %192:2 = "tf.RaggedRange"(%191, %188, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %193 = "tf.Select"(%2, %169, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %194 = "tf.Pack"(%193, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %195 = "tf.StridedSlice"(%194, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %196 = "tf.Cast"(%195) {Truncate = false, device = ""} : (tensor) -> tensor - %197 = "tf.Reshape"(%196, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %198 = "tf.Pack"(%7, %197) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %199 = "tf.Tile"(%4, %198) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> - %200 = "tf.Mul"(%197, %8) {device = ""} : (tensor, tensor) -> tensor - %201 = "tf.Pack"(%200) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %202 = "tf.ConcatV2"(%9, %201, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %203 = "tf.Reshape"(%199, %202) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor - %204 = "tf.Shape"(%203) {device = ""} : (tensor) -> tensor<1xi64> - %205 = "tf.StridedSlice"(%204, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %206 = "tf.Pack"(%195) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %207 = "tf.StridedSlice"(%203, %206, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %208 = "tf.Sub"(%205, %195) {device = ""} : (tensor, tensor) -> tensor - %209 = "tf.Pack"(%208) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %210 = "tf.StridedSlice"(%203, %11, %209, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %211:2 = "tf.RaggedRange"(%210, %207, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %212 = "tf.StridedSlice"(%194, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %213 = "tf.StridedSlice"(%194, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %214 = "tf.Mul"(%213, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> - %215 = "tf.Tile"(%214, %212) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor - %216 = "tf.Cumsum"(%215, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor - %217 = "tf.ConcatV2"(%11, %216, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor - %218 = "tf.StridedSlice"(%217, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %219 = "tf.ExpandDims"(%218, %7) {device = ""} : (tensor, tensor) -> tensor - %220 = "tf.Shape"(%218) {device = ""} : (tensor) -> tensor<1xi32> - %221 = "tf.StridedSlice"(%220, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %222 = "tf.Pack"(%221) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %223 = "tf.StridedSlice"(%217, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %224 = "tf.ExpandDims"(%223, %7) {device = ""} : (tensor, tensor) -> tensor - %225 = "tf.Shape"(%223) {device = ""} : (tensor) -> tensor<1xi32> - %226 = "tf.StridedSlice"(%225, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %227 = "tf.Pack"(%226) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %228 = "tf.Equal"(%104, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %229 = "tf.Select"(%228, %169, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %230 = "tf.Cast"(%229) {Truncate = false, device = ""} : (tensor) -> tensor - %231 = "tf.Reshape"(%230, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %232 = "tf.Pack"(%7, %231) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %233 = "tf.Mul"(%231, %8) {device = ""} : (tensor, tensor) -> tensor - %234 = "tf.Pack"(%233) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %235 = "tf.ConcatV2"(%9, %234, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %236 = "tf.Pack"(%229) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %237 = "tf.Pack"(%10, %104) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %238 = "tf.ExpandDims"(%237, %7) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> - %239 = "tf.Tile"(%238, %232) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> - %240 = "tf.Reshape"(%239, %235) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor - %241 = "tf.Shape"(%240) {device = ""} : (tensor) -> tensor<1xi64> - %242 = "tf.StridedSlice"(%241, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %243 = "tf.Sub"(%242, %229) {device = ""} : (tensor, tensor) -> tensor - %244 = "tf.Pack"(%243) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %245 = "tf.StridedSlice"(%240, %11, %244, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %246 = "tf.StridedSlice"(%240, %236, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %247:2 = "tf.RaggedRange"(%245, %246, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %248 = "tf.GatherV2"(%111, %247#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %249 = "tf.Cast"(%248) {Truncate = false, device = ""} : (tensor) -> tensor - %250 = "tf.BroadcastTo"(%249, %222) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %251 = "tf.Max"(%250, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %252 = "tf.Maximum"(%14, %251) {device = ""} : (tensor, tensor) -> tensor - %253 = "tf.Range"(%14, %252, %7) {device = ""} : (tensor, tensor, tensor) -> tensor - %254 = "tf.Pack"(%7, %252) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %255 = "tf.Tile"(%219, %254) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %256 = "tf.Shape"(%255) {device = ""} : (tensor) -> tensor<2xi32> - %257 = "tf.StridedSlice"(%256, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> - %258 = "tf.Prod"(%257, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor - %259 = "tf.Pack"(%258) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %260 = "tf.Shape"(%255) {device = ""} : (tensor) -> tensor<2xi32> - %261 = "tf.StridedSlice"(%260, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %262 = "tf.Shape"(%255) {device = ""} : (tensor) -> tensor<2xi32> - %263 = "tf.StridedSlice"(%262, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %264 = "tf.ConcatV2"(%261, %259, %263, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %265 = "tf.Reshape"(%255, %264) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %266 = "tf.ExpandDims"(%250, %3) {device = ""} : (tensor, tensor) -> tensor - %267 = "tf.Less"(%253, %266) {device = ""} : (tensor, tensor) -> tensor - %268 = "tf.Reshape"(%267, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %269 = "tf.Where"(%268) {device = ""} : (tensor) -> tensor - %270 = "tf.Squeeze"(%269) {device = "", squeeze_dims = [1]} : (tensor) -> tensor - %271 = "tf.GatherV2"(%265, %270, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %272 = "tf.Cast"(%248) {Truncate = false, device = ""} : (tensor) -> tensor - %273 = "tf.BroadcastTo"(%272, %227) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %274 = "tf.Max"(%273, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %275 = "tf.Maximum"(%14, %274) {device = ""} : (tensor, tensor) -> tensor - %276 = "tf.Range"(%14, %275, %7) {device = ""} : (tensor, tensor, tensor) -> tensor - %277 = "tf.Pack"(%7, %275) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %278 = "tf.Tile"(%224, %277) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %279 = "tf.Shape"(%278) {device = ""} : (tensor) -> tensor<2xi32> - %280 = "tf.StridedSlice"(%279, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> - %281 = "tf.Prod"(%280, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor - %282 = "tf.Pack"(%281) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %283 = "tf.Shape"(%278) {device = ""} : (tensor) -> tensor<2xi32> - %284 = "tf.StridedSlice"(%283, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %285 = "tf.Shape"(%278) {device = ""} : (tensor) -> tensor<2xi32> - %286 = "tf.StridedSlice"(%285, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %287 = "tf.ConcatV2"(%284, %282, %286, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %288 = "tf.Reshape"(%278, %287) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %289 = "tf.ExpandDims"(%273, %3) {device = ""} : (tensor, tensor) -> tensor - %290 = "tf.Less"(%276, %289) {device = ""} : (tensor, tensor) -> tensor - %291 = "tf.Reshape"(%290, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %292 = "tf.Where"(%291) {device = ""} : (tensor) -> tensor - %293 = "tf.Squeeze"(%292) {device = "", squeeze_dims = [1]} : (tensor) -> tensor - %294 = "tf.GatherV2"(%288, %293, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %295:2 = "tf.RaggedRange"(%271, %294, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %296 = "tf.If"(%173, %173, %169, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_false_9760, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_true_9750} : (tensor, tensor, tensor, tensor) -> tensor - %297 = "tf.Identity"(%296) {device = ""} : (tensor) -> tensor - %298 = "tf.Select"(%2, %169, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %299 = "tf.Pack"(%298) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %300 = "tf.ConcatV2"(%1, %299, %12, %14) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> - %301 = "tf.StridedSlice"(%300, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %302 = "tf.Equal"(%301, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %303 = "tf.StridedSlice"(%300, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %304 = "tf.StridedSlice"(%300, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %305 = "tf.Equal"(%304, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %306 = "tf.If"(%305, %305, %304, %248) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_false_10250, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_true_10240} : (tensor, tensor, tensor, tensor) -> tensor - %307 = "tf.Identity"(%306) {device = ""} : (tensor) -> tensor - %308 = "tf.If"(%302, %302, %248, %303) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_false_10610, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_true_10600} : (tensor, tensor, tensor, tensor) -> tensor - %309 = "tf.If"(%148, %148, %13, %144) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_false_15310, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_true_15300} : (tensor, tensor, tensor, tensor) -> tensor - %310 = "tf.Identity"(%309) {device = ""} : (tensor) -> tensor - %311 = "tf.Equal"(%144, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %312 = "tf.Select"(%311, %13, %144) {device = ""} : (tensor, tensor, tensor) -> tensor - %313 = "tf.Equal"(%312, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %314 = "tf.LogicalOr"(%313, %2) {device = ""} : (tensor, tensor) -> tensor - %315 = "tf.Equal"(%312, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %316 = "tf.LogicalOr"(%314, %315) {device = ""} : (tensor, tensor) -> tensor - %317 = "tf.Select"(%155, %312, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %318 = "tf.Pack"(%317, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %319 = "tf.StridedSlice"(%318, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %320 = "tf.Cast"(%319) {Truncate = false, device = ""} : (tensor) -> tensor - %321 = "tf.Reshape"(%320, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %322 = "tf.Pack"(%7, %321) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %323 = "tf.Tile"(%156, %322) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %324 = "tf.Mul"(%321, %159) {device = ""} : (tensor, tensor) -> tensor - %325 = "tf.Pack"(%324) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %326 = "tf.ConcatV2"(%158, %325, %160, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %327 = "tf.Reshape"(%323, %326) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %328 = "tf.Shape"(%327) {device = ""} : (tensor) -> tensor<1xi64> - %329 = "tf.StridedSlice"(%328, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %330 = "tf.Pack"(%319) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %331 = "tf.StridedSlice"(%327, %330, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %332 = "tf.Sub"(%329, %319) {device = ""} : (tensor, tensor) -> tensor - %333 = "tf.Pack"(%332) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %334 = "tf.StridedSlice"(%327, %11, %333, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %335:2 = "tf.RaggedRange"(%334, %331, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %336 = "tf.GatherV2"(%162, %335#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %337 = "tf.StridedSlice"(%318, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %338 = "tf.StridedSlice"(%318, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %339 = "tf.StridedSlice"(%318, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> - %340 = "tf.ConcatV2"(%338, %339, %14) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> - %341 = "tf.StridedSlice"(%318, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %342 = "tf.Mul"(%165, %341) {device = ""} : (tensor, tensor) -> tensor - %343 = "tf.Tile"(%342, %337) {device = ""} : (tensor, tensor<1xi64>) -> tensor - %344 = "tf.Cumsum"(%343, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor - %345 = "tf.ConcatV2"(%11, %344, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor - %346 = "tf.Shape"(%345) {device = ""} : (tensor) -> tensor<1xi64> - %347 = "tf.StridedSlice"(%346, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %348 = "tf.Sub"(%347, %13) {device = ""} : (tensor, tensor) -> tensor - %349 = "tf.Equal"(%348, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %350 = "tf.LogicalOr"(%349, %2) {device = ""} : (tensor, tensor) -> tensor - %351 = "tf.Equal"(%348, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %352 = "tf.LogicalOr"(%350, %351) {device = ""} : (tensor, tensor) -> tensor - %353 = "tf.StridedSlice"(%345, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %354 = "tf.StridedSlice"(%345, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %355 = "tf.Sub"(%353, %354) {device = ""} : (tensor, tensor) -> tensor - %356 = "tf.Shape"(%345) {device = ""} : (tensor) -> tensor<1xi64> - %357 = "tf.StridedSlice"(%356, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %358 = "tf.Sub"(%357, %13) {device = ""} : (tensor, tensor) -> tensor - %359 = "tf.Equal"(%358, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %360 = "tf.ExpandDims"(%345, %7) {device = ""} : (tensor, tensor) -> tensor - %361 = "tf.Shape"(%345) {device = ""} : (tensor) -> tensor<1xi32> - %362 = "tf.StridedSlice"(%361, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %363 = "tf.StridedSlice"(%361, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %364 = "tf.StridedSlice"(%361, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %365 = "tf.Select"(%2, %312, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %366 = "tf.Pack"(%365, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %367 = "tf.StridedSlice"(%366, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %368 = "tf.Cast"(%367) {Truncate = false, device = ""} : (tensor) -> tensor - %369 = "tf.Reshape"(%368, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %370 = "tf.Pack"(%7, %369) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %371 = "tf.Tile"(%4, %370) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> - %372 = "tf.Mul"(%369, %8) {device = ""} : (tensor, tensor) -> tensor - %373 = "tf.Pack"(%372) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %374 = "tf.ConcatV2"(%9, %373, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %375 = "tf.Reshape"(%371, %374) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor - %376 = "tf.Shape"(%375) {device = ""} : (tensor) -> tensor<1xi64> - %377 = "tf.StridedSlice"(%376, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %378 = "tf.Pack"(%367) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %379 = "tf.StridedSlice"(%375, %378, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %380 = "tf.Sub"(%377, %367) {device = ""} : (tensor, tensor) -> tensor - %381 = "tf.Pack"(%380) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %382 = "tf.StridedSlice"(%375, %11, %381, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %383:2 = "tf.RaggedRange"(%382, %379, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %384 = "tf.GatherV2"(%11, %383#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor - %385 = "tf.GatherV2"(%12, %384, %14) {batch_dims = 0 : i64, device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor - %386 = "tf.StridedSlice"(%366, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %387 = "tf.StridedSlice"(%366, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %388 = "tf.StridedSlice"(%366, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> - %389 = "tf.ConcatV2"(%387, %388, %14) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> - %390 = "tf.Tile"(%385, %389) {device = ""} : (tensor, tensor<1xi64>) -> tensor - %391 = "tf.StridedSlice"(%366, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %392 = "tf.Mul"(%391, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> - %393 = "tf.Tile"(%392, %386) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor - %394 = "tf.Cumsum"(%393, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor - %395 = "tf.ConcatV2"(%11, %394, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor - %396 = "tf.StridedSlice"(%395, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %397 = "tf.ExpandDims"(%396, %7) {device = ""} : (tensor, tensor) -> tensor - %398 = "tf.Shape"(%396) {device = ""} : (tensor) -> tensor<1xi32> - %399 = "tf.StridedSlice"(%398, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %400 = "tf.Pack"(%399) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %401 = "tf.StridedSlice"(%395, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %402 = "tf.ExpandDims"(%401, %7) {device = ""} : (tensor, tensor) -> tensor - %403 = "tf.Shape"(%401) {device = ""} : (tensor) -> tensor<1xi32> - %404 = "tf.StridedSlice"(%403, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %405 = "tf.Pack"(%404) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %406 = "tf.Equal"(%144, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %407 = "tf.Select"(%406, %312, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %408 = "tf.Cast"(%407) {Truncate = false, device = ""} : (tensor) -> tensor - %409 = "tf.Reshape"(%408, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %410 = "tf.Pack"(%7, %409) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %411 = "tf.Mul"(%409, %8) {device = ""} : (tensor, tensor) -> tensor - %412 = "tf.Pack"(%411) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %413 = "tf.ConcatV2"(%9, %412, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %414 = "tf.Pack"(%407) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %415 = "tf.Pack"(%10, %144) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %416 = "tf.ExpandDims"(%415, %7) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> - %417 = "tf.Tile"(%416, %410) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> - %418 = "tf.Reshape"(%417, %413) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor - %419 = "tf.Shape"(%418) {device = ""} : (tensor) -> tensor<1xi64> - %420 = "tf.StridedSlice"(%419, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %421 = "tf.Sub"(%420, %407) {device = ""} : (tensor, tensor) -> tensor - %422 = "tf.Pack"(%421) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %423 = "tf.StridedSlice"(%418, %11, %422, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %424 = "tf.StridedSlice"(%418, %414, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %425:2 = "tf.RaggedRange"(%423, %424, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %426 = "tf.GatherV2"(%151, %425#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %427 = "tf.Cast"(%426) {Truncate = false, device = ""} : (tensor) -> tensor - %428 = "tf.BroadcastTo"(%427, %400) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %429 = "tf.Max"(%428, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %430 = "tf.Maximum"(%14, %429) {device = ""} : (tensor, tensor) -> tensor - %431 = "tf.Range"(%14, %430, %7) {device = ""} : (tensor, tensor, tensor) -> tensor - %432 = "tf.Pack"(%7, %430) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %433 = "tf.Tile"(%397, %432) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %434 = "tf.Shape"(%433) {device = ""} : (tensor) -> tensor<2xi32> - %435 = "tf.StridedSlice"(%434, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> - %436 = "tf.Prod"(%435, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor - %437 = "tf.Pack"(%436) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %438 = "tf.Shape"(%433) {device = ""} : (tensor) -> tensor<2xi32> - %439 = "tf.StridedSlice"(%438, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %440 = "tf.Shape"(%433) {device = ""} : (tensor) -> tensor<2xi32> - %441 = "tf.StridedSlice"(%440, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %442 = "tf.ConcatV2"(%439, %437, %441, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %443 = "tf.Reshape"(%433, %442) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %444 = "tf.ExpandDims"(%428, %3) {device = ""} : (tensor, tensor) -> tensor - %445 = "tf.Less"(%431, %444) {device = ""} : (tensor, tensor) -> tensor - %446 = "tf.Reshape"(%445, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %447 = "tf.Where"(%446) {device = ""} : (tensor) -> tensor - %448 = "tf.Squeeze"(%447) {device = "", squeeze_dims = [1]} : (tensor) -> tensor - %449 = "tf.GatherV2"(%443, %448, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %450 = "tf.Cast"(%426) {Truncate = false, device = ""} : (tensor) -> tensor - %451 = "tf.BroadcastTo"(%450, %405) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %452 = "tf.Max"(%451, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %453 = "tf.Maximum"(%14, %452) {device = ""} : (tensor, tensor) -> tensor - %454 = "tf.Range"(%14, %453, %7) {device = ""} : (tensor, tensor, tensor) -> tensor - %455 = "tf.Pack"(%7, %453) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %456 = "tf.Tile"(%402, %455) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %457 = "tf.Shape"(%456) {device = ""} : (tensor) -> tensor<2xi32> - %458 = "tf.StridedSlice"(%457, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> - %459 = "tf.Prod"(%458, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor - %460 = "tf.Pack"(%459) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %461 = "tf.Shape"(%456) {device = ""} : (tensor) -> tensor<2xi32> - %462 = "tf.StridedSlice"(%461, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %463 = "tf.Shape"(%456) {device = ""} : (tensor) -> tensor<2xi32> - %464 = "tf.StridedSlice"(%463, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %465 = "tf.ConcatV2"(%462, %460, %464, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %466 = "tf.Reshape"(%456, %465) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %467 = "tf.ExpandDims"(%451, %3) {device = ""} : (tensor, tensor) -> tensor - %468 = "tf.Less"(%454, %467) {device = ""} : (tensor, tensor) -> tensor - %469 = "tf.Reshape"(%468, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %470 = "tf.Where"(%469) {device = ""} : (tensor) -> tensor - %471 = "tf.Squeeze"(%470) {device = "", squeeze_dims = [1]} : (tensor) -> tensor - %472 = "tf.GatherV2"(%466, %471, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %473:2 = "tf.RaggedRange"(%449, %472, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %474 = "tf.GatherV2"(%390, %473#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %475 = "tf.If"(%316, %316, %312, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_false_16380, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_true_16370} : (tensor, tensor, tensor, tensor) -> tensor - %476 = "tf.Identity"(%475) {device = ""} : (tensor) -> tensor - %477 = "tf.Select"(%2, %312, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %478 = "tf.Pack"(%477) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %479 = "tf.ConcatV2"(%1, %478, %12, %14) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> - %480 = "tf.StridedSlice"(%479, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %481 = "tf.Equal"(%480, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %482 = "tf.StridedSlice"(%479, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %483 = "tf.StridedSlice"(%479, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %484 = "tf.Equal"(%483, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %485 = "tf.If"(%484, %484, %483, %426) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_false_16870, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_true_16860} : (tensor, tensor, tensor, tensor) -> tensor - %486 = "tf.Identity"(%485) {device = ""} : (tensor) -> tensor - %487 = "tf.If"(%481, %481, %426, %482) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_false_17230, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_true_17220} : (tensor, tensor, tensor, tensor) -> tensor - %488 = "tf.Identity"(%487) {device = ""} : (tensor) -> tensor - %489 = "tf.If"(%352, %352, %13, %348) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_false_21910, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_true_21900} : (tensor, tensor, tensor, tensor) -> tensor - %490 = "tf.Identity"(%489) {device = ""} : (tensor) -> tensor - %491 = "tf.Equal"(%348, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %492 = "tf.Select"(%491, %13, %348) {device = ""} : (tensor, tensor, tensor) -> tensor - %493 = "tf.Equal"(%492, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %494 = "tf.LogicalOr"(%493, %2) {device = ""} : (tensor, tensor) -> tensor - %495 = "tf.Equal"(%492, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %496 = "tf.LogicalOr"(%494, %495) {device = ""} : (tensor, tensor) -> tensor - %497 = "tf.Select"(%359, %492, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %498 = "tf.Pack"(%497, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %499 = "tf.StridedSlice"(%498, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %500 = "tf.Cast"(%499) {Truncate = false, device = ""} : (tensor) -> tensor - %501 = "tf.Reshape"(%500, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %502 = "tf.Pack"(%7, %501) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %503 = "tf.Tile"(%360, %502) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %504 = "tf.Mul"(%501, %363) {device = ""} : (tensor, tensor) -> tensor - %505 = "tf.Pack"(%504) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %506 = "tf.ConcatV2"(%362, %505, %364, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %507 = "tf.Reshape"(%503, %506) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %508 = "tf.Shape"(%507) {device = ""} : (tensor) -> tensor<1xi64> - %509 = "tf.StridedSlice"(%508, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %510 = "tf.Pack"(%499) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %511 = "tf.StridedSlice"(%507, %510, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %512 = "tf.Sub"(%509, %499) {device = ""} : (tensor, tensor) -> tensor - %513 = "tf.Pack"(%512) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %514 = "tf.StridedSlice"(%507, %11, %513, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %515:2 = "tf.RaggedRange"(%514, %511, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %516 = "tf.Select"(%2, %492, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %517 = "tf.Pack"(%516, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %518 = "tf.StridedSlice"(%517, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %519 = "tf.Cast"(%518) {Truncate = false, device = ""} : (tensor) -> tensor - %520 = "tf.Reshape"(%519, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %521 = "tf.Pack"(%7, %520) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %522 = "tf.Tile"(%4, %521) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> - %523 = "tf.Mul"(%520, %8) {device = ""} : (tensor, tensor) -> tensor - %524 = "tf.Pack"(%523) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %525 = "tf.ConcatV2"(%9, %524, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %526 = "tf.Reshape"(%522, %525) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor - %527 = "tf.Shape"(%526) {device = ""} : (tensor) -> tensor<1xi64> - %528 = "tf.StridedSlice"(%527, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %529 = "tf.Pack"(%518) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %530 = "tf.StridedSlice"(%526, %529, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %531 = "tf.Sub"(%528, %518) {device = ""} : (tensor, tensor) -> tensor - %532 = "tf.Pack"(%531) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %533 = "tf.StridedSlice"(%526, %11, %532, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %534:2 = "tf.RaggedRange"(%533, %530, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %535 = "tf.StridedSlice"(%517, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> - %536 = "tf.StridedSlice"(%517, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %537 = "tf.Mul"(%536, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> - %538 = "tf.Tile"(%537, %535) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor - %539 = "tf.Cumsum"(%538, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor - %540 = "tf.ConcatV2"(%11, %539, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor - %541 = "tf.StridedSlice"(%540, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %542 = "tf.ExpandDims"(%541, %7) {device = ""} : (tensor, tensor) -> tensor - %543 = "tf.Shape"(%541) {device = ""} : (tensor) -> tensor<1xi32> - %544 = "tf.StridedSlice"(%543, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %545 = "tf.Pack"(%544) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %546 = "tf.StridedSlice"(%540, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %547 = "tf.ExpandDims"(%546, %7) {device = ""} : (tensor, tensor) -> tensor - %548 = "tf.Shape"(%546) {device = ""} : (tensor) -> tensor<1xi32> - %549 = "tf.StridedSlice"(%548, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %550 = "tf.Pack"(%549) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %551 = "tf.Equal"(%348, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %552 = "tf.Select"(%551, %492, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %553 = "tf.Cast"(%552) {Truncate = false, device = ""} : (tensor) -> tensor - %554 = "tf.Reshape"(%553, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor - %555 = "tf.Pack"(%7, %554) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %556 = "tf.Mul"(%554, %8) {device = ""} : (tensor, tensor) -> tensor - %557 = "tf.Pack"(%556) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %558 = "tf.ConcatV2"(%9, %557, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %559 = "tf.Pack"(%552) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %560 = "tf.Pack"(%10, %348) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> - %561 = "tf.ExpandDims"(%560, %7) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> - %562 = "tf.Tile"(%561, %555) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> - %563 = "tf.Reshape"(%562, %558) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor - %564 = "tf.Shape"(%563) {device = ""} : (tensor) -> tensor<1xi64> - %565 = "tf.StridedSlice"(%564, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %566 = "tf.Sub"(%565, %552) {device = ""} : (tensor, tensor) -> tensor - %567 = "tf.Pack"(%566) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %568 = "tf.StridedSlice"(%563, %11, %567, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %569 = "tf.StridedSlice"(%563, %559, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor - %570:2 = "tf.RaggedRange"(%568, %569, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %571 = "tf.GatherV2"(%355, %570#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %572 = "tf.Cast"(%571) {Truncate = false, device = ""} : (tensor) -> tensor - %573 = "tf.BroadcastTo"(%572, %545) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %574 = "tf.Max"(%573, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %575 = "tf.Maximum"(%14, %574) {device = ""} : (tensor, tensor) -> tensor - %576 = "tf.Range"(%14, %575, %7) {device = ""} : (tensor, tensor, tensor) -> tensor - %577 = "tf.Pack"(%7, %575) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %578 = "tf.Tile"(%542, %577) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %579 = "tf.Shape"(%578) {device = ""} : (tensor) -> tensor<2xi32> - %580 = "tf.StridedSlice"(%579, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> - %581 = "tf.Prod"(%580, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor - %582 = "tf.Pack"(%581) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %583 = "tf.Shape"(%578) {device = ""} : (tensor) -> tensor<2xi32> - %584 = "tf.StridedSlice"(%583, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %585 = "tf.Shape"(%578) {device = ""} : (tensor) -> tensor<2xi32> - %586 = "tf.StridedSlice"(%585, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %587 = "tf.ConcatV2"(%584, %582, %586, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %588 = "tf.Reshape"(%578, %587) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %589 = "tf.ExpandDims"(%573, %3) {device = ""} : (tensor, tensor) -> tensor - %590 = "tf.Less"(%576, %589) {device = ""} : (tensor, tensor) -> tensor - %591 = "tf.Reshape"(%590, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %592 = "tf.Where"(%591) {device = ""} : (tensor) -> tensor - %593 = "tf.Squeeze"(%592) {device = "", squeeze_dims = [1]} : (tensor) -> tensor - %594 = "tf.GatherV2"(%588, %593, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %595 = "tf.Cast"(%571) {Truncate = false, device = ""} : (tensor) -> tensor - %596 = "tf.BroadcastTo"(%595, %550) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %597 = "tf.Max"(%596, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor - %598 = "tf.Maximum"(%14, %597) {device = ""} : (tensor, tensor) -> tensor - %599 = "tf.Range"(%14, %598, %7) {device = ""} : (tensor, tensor, tensor) -> tensor - %600 = "tf.Pack"(%7, %598) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> - %601 = "tf.Tile"(%547, %600) {device = ""} : (tensor, tensor<2xi32>) -> tensor - %602 = "tf.Shape"(%601) {device = ""} : (tensor) -> tensor<2xi32> - %603 = "tf.StridedSlice"(%602, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> - %604 = "tf.Prod"(%603, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor - %605 = "tf.Pack"(%604) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> - %606 = "tf.Shape"(%601) {device = ""} : (tensor) -> tensor<2xi32> - %607 = "tf.StridedSlice"(%606, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %608 = "tf.Shape"(%601) {device = ""} : (tensor) -> tensor<2xi32> - %609 = "tf.StridedSlice"(%608, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %610 = "tf.ConcatV2"(%607, %605, %609, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> - %611 = "tf.Reshape"(%601, %610) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %612 = "tf.ExpandDims"(%596, %3) {device = ""} : (tensor, tensor) -> tensor - %613 = "tf.Less"(%599, %612) {device = ""} : (tensor, tensor) -> tensor - %614 = "tf.Reshape"(%613, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor - %615 = "tf.Where"(%614) {device = ""} : (tensor) -> tensor - %616 = "tf.Squeeze"(%615) {device = "", squeeze_dims = [1]} : (tensor) -> tensor - %617 = "tf.GatherV2"(%611, %616, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %618:2 = "tf.RaggedRange"(%594, %617, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) - %619 = "tf.If"(%496, %496, %492, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_false_22980, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_true_22970} : (tensor, tensor, tensor, tensor) -> tensor - %620 = "tf.Identity"(%619) {device = ""} : (tensor) -> tensor - %621 = "tf.Select"(%2, %492, %13) {device = ""} : (tensor, tensor, tensor) -> tensor - %622 = "tf.Pack"(%621) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> - %623 = "tf.ConcatV2"(%1, %622, %12, %14) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> - %624 = "tf.StridedSlice"(%623, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %625 = "tf.Equal"(%624, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %626 = "tf.StridedSlice"(%623, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %627 = "tf.StridedSlice"(%623, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor - %628 = "tf.Equal"(%627, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor - %629 = "tf.If"(%628, %628, %627, %571) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_false_23470, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_true_23460} : (tensor, tensor, tensor, tensor) -> tensor - %630 = "tf.Identity"(%629) {device = ""} : (tensor) -> tensor - %631 = "tf.If"(%625, %625, %571, %626) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_false_23830, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_true_23820} : (tensor, tensor, tensor, tensor) -> tensor - %632 = "tf.Identity"(%631) {device = ""} : (tensor) -> tensor - %633 = "tf.Identity"(%308) {device = ""} : (tensor) -> tensor - %634 = "tf.Shape"(%37#2) {device = ""} : (tensor) -> tensor<1xi32> - %635 = "tf.StridedSlice"(%634, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %636 = "tf.Cast"(%635) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> - %637 = "tf.Identity"(%636) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> - %638 = "tf.Shape"(%37#3) {device = ""} : (tensor) -> tensor<1xi32> - %639 = "tf.StridedSlice"(%638, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %640 = "tf.Cast"(%639) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> - %641 = "tf.Identity"(%640) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> - %642 = "tf.GatherV2"(%37#3, %336, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor - %643 = "tf.Tile"(%642, %340) {device = ""} : (tensor, tensor<1xi64>) -> tensor - %644 = "tf.Sub"(%643, %474) {device = ""} : (tensor, tensor) -> tensor - %645 = "tf.Shape"(%644) {device = ""} : (tensor) -> tensor<1xi32> - %646 = "tf.StridedSlice"(%645, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> - %647 = "tf.Cast"(%646) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> - %648 = "tf.Identity"(%647) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> - %649 = "tf.UnicodeEncode"(%37#0, %58) {Tsplits = i64, device = "", errors = "replace", output_encoding = "UTF-8", replacement_char = 65533 : i64} : (tensor, tensor) -> tensor - %650 = "tf.Identity"(%649) {device = ""} : (tensor) -> tensor - return %650 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_false_3220(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Input tensors have incompatible shapes."> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedConcat/RaggedFromTensor/Const:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedConcat/RaggedNRows/Const:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_true_3210(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_3980(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_3970(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_4340(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () - %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor - return %4 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_4330(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_4680(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_4670(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_5050(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_5040(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_5410(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () - %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor - return %4 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_5400(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_5770(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/strided_slice:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RaggedNRows/sub:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_5760(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6120(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6110(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_6480(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () - %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor - return %4 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_6470(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6820(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6810(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7190(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7180(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_7550(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () - %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor - return %4 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_7540(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7890(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7880(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_false_8690(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_true_8680(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_false_9760(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_true_9750(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_false_10250(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_true_10240(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_false_10610(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_true_10600(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_false_15310(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_true_15300(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_false_16380(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_true_16370(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_false_16870(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_true_16860(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_false_17230(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_true_17220(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_false_21910(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_true_21900(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_false_22980(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_true_22970(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_false_23470(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_true_23460(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_false_23830(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { - %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor - %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor - %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor - %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor - "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () - %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor - return %5 : tensor - } - func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_true_23820(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { - %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor - %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor - return %1 : tensor - } - - // CHECK: func @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf._input_shapes = [#tf.shape<>], tf.signature.is_stateful} { - // CHECK: %0 = "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor) -> tensor - // CHECK: return %0 : tensor +func @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<1>], tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<[0, 1]> : tensor<2xi64>} : () -> tensor<2xi64> + %1 = "tf.Const"() {value = dense<[]> : tensor<0xi64>} : () -> tensor<0xi64> + %2 = "tf.Const"() {value = dense : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<-1> : tensor} : () -> tensor + %4 = "tf.Const"() {value = dense<[[0], [1]]> : tensor<2x1xi64>} : () -> tensor<2x1xi64> + %5 = "tf.Const"() {value = dense<-1> : tensor<1xi32>} : () -> tensor<1xi32> + %6 = "tf.Const"() {value = dense<2> : tensor<1xi32>} : () -> tensor<1xi32> + %7 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %8 = "tf.Const"() {value = dense<2> : tensor} : () -> tensor + %9 = "tf.Const"() {value = dense<[]> : tensor<0xi32>} : () -> tensor<0xi32> + %10 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %11 = "tf.Const"() {value = dense<0> : tensor<1xi64>} : () -> tensor<1xi64> + %12 = "tf.Const"() {value = dense<1> : tensor<1xi64>} : () -> tensor<1xi64> + %13 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %14 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %15 = "tf.Const"() {value = dense<0> : tensor<1xi32>} : () -> tensor<1xi32> + %16 = "tf.Const"() {value = dense<1> : tensor<1xi32>} : () -> tensor<1xi32> + %17 = "tf.If"(%2, %2, %13, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_false_3210, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_true_3200} : (tensor, tensor, tensor, tensor) -> tensor + %18 = "tf.Identity"(%17) {device = ""} : (tensor) -> tensor + %19 = "tf.StringLength"(%arg0) {device = "", unit = "BYTE"} : (tensor<1x!tf.string>) -> tensor<1xi32> + %20 = "tf.ExpandDims"(%19, %7) {device = ""} : (tensor<1xi32>, tensor) -> tensor<1x1xi32> + %21 = "tf.Cast"(%20) {Truncate = false, device = ""} : (tensor<1x1xi32>) -> tensor<1x1xi64> + %22 = "tf.Reshape"(%21, %12) {device = ""} : (tensor<1x1xi64>, tensor<1xi64>) -> tensor<1xi64> + %23 = "tf.Reshape"(%arg0, %5) {device = ""} : (tensor<1x!tf.string>, tensor<1xi32>) -> tensor<1x!tf.string> + %24:3 = "tf.UnicodeDecodeWithOffsets"(%23) {Tsplits = i64, device = "", errors = "replace", input_encoding = "UTF-8", replace_control_characters = false, replacement_char = 65533 : i64} : (tensor<1x!tf.string>) -> (tensor<2xi64>, tensor, tensor) + %25 = "tf.StridedSlice"(%24#0, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %26 = "tf.AddV2"(%25, %13) {device = ""} : (tensor<1xi64>, tensor) -> tensor<1xi64> + %27 = "tf.StridedSlice"(%24#0, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %28 = "tf.Minimum"(%26, %27) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor<1xi64> + %29:2 = "tf.RaggedRange"(%28, %27, %13) {T = i64, Tsplits = i64, device = ""} : (tensor<1xi64>, tensor<1xi64>, tensor) -> (tensor<2xi64>, tensor) + %30 = "tf.StridedSlice"(%29#0, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %31 = "tf.AddV2"(%30, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> + %32 = "tf.ConcatV2"(%29#0, %31, %14) {device = ""} : (tensor<2xi64>, tensor<1xi64>, tensor) -> tensor<3xi64> + %33 = "tf.GatherV2"(%24#2, %29#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %34 = "tf.ConcatV2"(%33, %22, %14) {device = ""} : (tensor, tensor<1xi64>, tensor) -> tensor + %35:2 = "tf.RaggedGather"(%32, %34, %0) {OUTPUT_RAGGED_RANK = 1 : i64, PARAMS_RAGGED_RANK = 1 : i64, Tindices = i64, Tsplits = i64, Tvalues = i64, device = ""} : (tensor<3xi64>, tensor, tensor<2xi64>) -> (tensor, tensor) + %36:5 = "tf.WhitespaceTokenizeWithOffsets"(%24#1, %24#0) {Tsplits = i64, device = ""} : (tensor, tensor<2xi64>) -> (tensor, tensor, tensor, tensor, tensor) + %37 = "tf.StridedSlice"(%36#1, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %38 = "tf.Equal"(%37, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %39 = "tf.All"(%38, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %40 = "tf.If"(%39, %39, %37, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_3970, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_3960} : (tensor, tensor, tensor, tensor) -> tensor + %41 = "tf.Identity"(%40) {device = ""} : (tensor) -> tensor + %42 = "tf.StridedSlice"(%36#1, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %43 = "tf.StridedSlice"(%36#1, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %44 = "tf.Sub"(%42, %43) {device = ""} : (tensor, tensor) -> tensor + %45 = "tf.LessEqual"(%10, %44) {device = ""} : (tensor, tensor) -> tensor + %46 = "tf.All"(%45, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %47 = "tf.If"(%46, %46, %44) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_4330, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_4320} : (tensor, tensor, tensor) -> tensor + %48 = "tf.Identity"(%47) {device = ""} : (tensor) -> tensor + %49 = "tf.Identity"(%36#1) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %50 = "tf.StridedSlice"(%49, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %51 = "tf.Shape"(%36#0) {device = ""} : (tensor) -> tensor<1xi64> + %52 = "tf.StridedSlice"(%51, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %53 = "tf.Equal"(%50, %52) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %54 = "tf.All"(%53, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %55 = "tf.If"(%54, %54, %50, %52) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_4670, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_4660} : (tensor, tensor, tensor, tensor) -> tensor + %56 = "tf.Identity"(%55) {device = ""} : (tensor) -> tensor + %57 = "tf.Identity"(%49) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %58 = "tf.Shape"(%57) {device = ""} : (tensor) -> tensor<1xi64> + %59 = "tf.StridedSlice"(%58, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %60 = "tf.Sub"(%59, %13) {device = ""} : (tensor, tensor) -> tensor + %61 = "tf.StridedSlice"(%36#4, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %62 = "tf.Equal"(%61, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %63 = "tf.All"(%62, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %64 = "tf.If"(%63, %63, %61, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_5040, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_5030} : (tensor, tensor, tensor, tensor) -> tensor + %65 = "tf.Identity"(%64) {device = ""} : (tensor) -> tensor + %66 = "tf.StridedSlice"(%36#4, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %67 = "tf.StridedSlice"(%36#4, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %68 = "tf.Sub"(%66, %67) {device = ""} : (tensor, tensor) -> tensor + %69 = "tf.LessEqual"(%10, %68) {device = ""} : (tensor, tensor) -> tensor + %70 = "tf.All"(%69, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %71 = "tf.If"(%70, %70, %68) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_5400, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_5390} : (tensor, tensor, tensor) -> tensor + %72 = "tf.Identity"(%71) {device = ""} : (tensor) -> tensor + %73 = "tf.Identity"(%36#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %74 = "tf.StridedSlice"(%73, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %75 = "tf.Equal"(%74, %60) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %76 = "tf.All"(%75, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %77 = "tf.If"(%76, %76, %74, %60) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_5760, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_5750} : (tensor, tensor, tensor, tensor) -> tensor + %78 = "tf.Identity"(%77) {device = ""} : (tensor) -> tensor + %79 = "tf.Identity"(%73) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %80 = "tf.StridedSlice"(%36#4, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %81 = "tf.Equal"(%80, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %82 = "tf.All"(%81, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %83 = "tf.If"(%82, %82, %80, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6110, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6100} : (tensor, tensor, tensor, tensor) -> tensor + %84 = "tf.Identity"(%83) {device = ""} : (tensor) -> tensor + %85 = "tf.StridedSlice"(%36#4, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %86 = "tf.StridedSlice"(%36#4, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %87 = "tf.Sub"(%85, %86) {device = ""} : (tensor, tensor) -> tensor + %88 = "tf.LessEqual"(%10, %87) {device = ""} : (tensor, tensor) -> tensor + %89 = "tf.All"(%88, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %90 = "tf.If"(%89, %89, %87) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_6470, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_6460} : (tensor, tensor, tensor) -> tensor + %91 = "tf.Identity"(%90) {device = ""} : (tensor) -> tensor + %92 = "tf.Identity"(%36#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %93 = "tf.StridedSlice"(%92, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %94 = "tf.Shape"(%36#2) {device = ""} : (tensor) -> tensor<1xi64> + %95 = "tf.StridedSlice"(%94, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %96 = "tf.Equal"(%93, %95) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %97 = "tf.All"(%96, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %98 = "tf.If"(%97, %97, %93, %95) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6810, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6800} : (tensor, tensor, tensor, tensor) -> tensor + %99 = "tf.Identity"(%98) {device = ""} : (tensor) -> tensor + %100 = "tf.Identity"(%92) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %101 = "tf.Shape"(%100) {device = ""} : (tensor) -> tensor<1xi64> + %102 = "tf.StridedSlice"(%101, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %103 = "tf.Sub"(%102, %13) {device = ""} : (tensor, tensor) -> tensor + %104 = "tf.Equal"(%103, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %105 = "tf.LogicalOr"(%104, %2) {device = ""} : (tensor, tensor) -> tensor + %106 = "tf.Equal"(%103, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %107 = "tf.LogicalOr"(%105, %106) {device = ""} : (tensor, tensor) -> tensor + %108 = "tf.StridedSlice"(%100, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %109 = "tf.StridedSlice"(%100, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %110 = "tf.Sub"(%108, %109) {device = ""} : (tensor, tensor) -> tensor + %111 = "tf.Shape"(%100) {device = ""} : (tensor) -> tensor<1xi64> + %112 = "tf.StridedSlice"(%111, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %113 = "tf.Sub"(%112, %13) {device = ""} : (tensor, tensor) -> tensor + %114 = "tf.Equal"(%113, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %115 = "tf.ExpandDims"(%100, %7) {device = ""} : (tensor, tensor) -> tensor + %116 = "tf.Shape"(%100) {device = ""} : (tensor) -> tensor<1xi32> + %117 = "tf.StridedSlice"(%116, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %118 = "tf.StridedSlice"(%116, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %119 = "tf.StridedSlice"(%116, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %120 = "tf.StridedSlice"(%36#4, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %121 = "tf.Equal"(%120, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %122 = "tf.All"(%121, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %123 = "tf.If"(%122, %122, %120, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7180, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7170} : (tensor, tensor, tensor, tensor) -> tensor + %124 = "tf.Identity"(%123) {device = ""} : (tensor) -> tensor + %125 = "tf.StridedSlice"(%36#4, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %126 = "tf.StridedSlice"(%36#4, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %127 = "tf.Sub"(%125, %126) {device = ""} : (tensor, tensor) -> tensor + %128 = "tf.LessEqual"(%10, %127) {device = ""} : (tensor, tensor) -> tensor + %129 = "tf.All"(%128, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %130 = "tf.If"(%129, %129, %127) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_7540, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_7530} : (tensor, tensor, tensor) -> tensor + %131 = "tf.Identity"(%130) {device = ""} : (tensor) -> tensor + %132 = "tf.Identity"(%36#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %133 = "tf.StridedSlice"(%132, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %134 = "tf.Shape"(%36#3) {device = ""} : (tensor) -> tensor<1xi64> + %135 = "tf.StridedSlice"(%134, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %136 = "tf.Equal"(%133, %135) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %137 = "tf.All"(%136, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %138 = "tf.If"(%137, %137, %133, %135) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7880, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7870} : (tensor, tensor, tensor, tensor) -> tensor + %139 = "tf.Identity"(%138) {device = ""} : (tensor) -> tensor + %140 = "tf.Identity"(%132) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %141 = "tf.Shape"(%140) {device = ""} : (tensor) -> tensor<1xi64> + %142 = "tf.StridedSlice"(%141, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %143 = "tf.Sub"(%142, %13) {device = ""} : (tensor, tensor) -> tensor + %144 = "tf.Equal"(%143, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %145 = "tf.LogicalOr"(%144, %2) {device = ""} : (tensor, tensor) -> tensor + %146 = "tf.Equal"(%143, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %147 = "tf.LogicalOr"(%145, %146) {device = ""} : (tensor, tensor) -> tensor + %148 = "tf.StridedSlice"(%140, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %149 = "tf.StridedSlice"(%140, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %150 = "tf.Sub"(%148, %149) {device = ""} : (tensor, tensor) -> tensor + %151 = "tf.Shape"(%140) {device = ""} : (tensor) -> tensor<1xi64> + %152 = "tf.StridedSlice"(%151, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %153 = "tf.Sub"(%152, %13) {device = ""} : (tensor, tensor) -> tensor + %154 = "tf.Equal"(%153, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %155 = "tf.ExpandDims"(%140, %7) {device = ""} : (tensor, tensor) -> tensor + %156 = "tf.Shape"(%140) {device = ""} : (tensor) -> tensor<1xi32> + %157 = "tf.StridedSlice"(%156, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %158 = "tf.StridedSlice"(%156, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %159 = "tf.StridedSlice"(%156, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %160 = "tf.StridedSlice"(%140, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %161 = "tf.Range"(%10, %160, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %162 = "tf.StridedSlice"(%140, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %163 = "tf.StridedSlice"(%140, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %164 = "tf.Sub"(%162, %163) {device = ""} : (tensor, tensor) -> tensor + %165 = "tf.If"(%107, %107, %13, %103) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_Assert_AssertGuard_false_8680, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_Assert_AssertGuard_true_8670} : (tensor, tensor, tensor, tensor) -> tensor + %166 = "tf.Identity"(%165) {device = ""} : (tensor) -> tensor + %167 = "tf.Equal"(%103, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %168 = "tf.Select"(%167, %13, %103) {device = ""} : (tensor, tensor, tensor) -> tensor + %169 = "tf.Equal"(%168, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %170 = "tf.LogicalOr"(%169, %2) {device = ""} : (tensor, tensor) -> tensor + %171 = "tf.Equal"(%168, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %172 = "tf.LogicalOr"(%170, %171) {device = ""} : (tensor, tensor) -> tensor + %173 = "tf.Select"(%114, %168, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %174 = "tf.Pack"(%173, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %175 = "tf.StridedSlice"(%174, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %176 = "tf.Cast"(%175) {Truncate = false, device = ""} : (tensor) -> tensor + %177 = "tf.Reshape"(%176, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %178 = "tf.Pack"(%7, %177) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %179 = "tf.Tile"(%115, %178) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %180 = "tf.Mul"(%177, %118) {device = ""} : (tensor, tensor) -> tensor + %181 = "tf.Pack"(%180) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %182 = "tf.ConcatV2"(%117, %181, %119, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %183 = "tf.Reshape"(%179, %182) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %184 = "tf.Shape"(%183) {device = ""} : (tensor) -> tensor<1xi64> + %185 = "tf.StridedSlice"(%184, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %186 = "tf.Pack"(%175) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %187 = "tf.StridedSlice"(%183, %186, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %188 = "tf.Sub"(%185, %175) {device = ""} : (tensor, tensor) -> tensor + %189 = "tf.Pack"(%188) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %190 = "tf.StridedSlice"(%183, %11, %189, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %191:2 = "tf.RaggedRange"(%190, %187, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %192 = "tf.Select"(%2, %168, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %193 = "tf.Pack"(%192, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %194 = "tf.StridedSlice"(%193, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %195 = "tf.Cast"(%194) {Truncate = false, device = ""} : (tensor) -> tensor + %196 = "tf.Reshape"(%195, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %197 = "tf.Pack"(%7, %196) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %198 = "tf.Tile"(%4, %197) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %199 = "tf.Mul"(%196, %8) {device = ""} : (tensor, tensor) -> tensor + %200 = "tf.Pack"(%199) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %201 = "tf.ConcatV2"(%9, %200, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %202 = "tf.Reshape"(%198, %201) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %203 = "tf.Shape"(%202) {device = ""} : (tensor) -> tensor<1xi64> + %204 = "tf.StridedSlice"(%203, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %205 = "tf.Pack"(%194) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %206 = "tf.StridedSlice"(%202, %205, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %207 = "tf.Sub"(%204, %194) {device = ""} : (tensor, tensor) -> tensor + %208 = "tf.Pack"(%207) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %209 = "tf.StridedSlice"(%202, %11, %208, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %210:2 = "tf.RaggedRange"(%209, %206, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %211 = "tf.StridedSlice"(%193, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %212 = "tf.StridedSlice"(%193, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %213 = "tf.Mul"(%212, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> + %214 = "tf.Tile"(%213, %211) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor + %215 = "tf.Cumsum"(%214, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %216 = "tf.ConcatV2"(%11, %215, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %217 = "tf.StridedSlice"(%216, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %218 = "tf.ExpandDims"(%217, %7) {device = ""} : (tensor, tensor) -> tensor + %219 = "tf.Shape"(%217) {device = ""} : (tensor) -> tensor<1xi32> + %220 = "tf.StridedSlice"(%219, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %221 = "tf.Pack"(%220) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %222 = "tf.StridedSlice"(%216, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %223 = "tf.ExpandDims"(%222, %7) {device = ""} : (tensor, tensor) -> tensor + %224 = "tf.Shape"(%222) {device = ""} : (tensor) -> tensor<1xi32> + %225 = "tf.StridedSlice"(%224, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %226 = "tf.Pack"(%225) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %227 = "tf.Equal"(%103, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %228 = "tf.Select"(%227, %168, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %229 = "tf.Cast"(%228) {Truncate = false, device = ""} : (tensor) -> tensor + %230 = "tf.Reshape"(%229, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %231 = "tf.Pack"(%7, %230) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %232 = "tf.Mul"(%230, %8) {device = ""} : (tensor, tensor) -> tensor + %233 = "tf.Pack"(%232) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %234 = "tf.ConcatV2"(%9, %233, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %235 = "tf.Pack"(%228) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %236 = "tf.Pack"(%10, %103) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %237 = "tf.ExpandDims"(%236, %7) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> + %238 = "tf.Tile"(%237, %231) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %239 = "tf.Reshape"(%238, %234) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %240 = "tf.Shape"(%239) {device = ""} : (tensor) -> tensor<1xi64> + %241 = "tf.StridedSlice"(%240, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %242 = "tf.Sub"(%241, %228) {device = ""} : (tensor, tensor) -> tensor + %243 = "tf.Pack"(%242) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %244 = "tf.StridedSlice"(%239, %11, %243, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %245 = "tf.StridedSlice"(%239, %235, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %246:2 = "tf.RaggedRange"(%244, %245, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %247 = "tf.GatherV2"(%110, %246#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %248 = "tf.Cast"(%247) {Truncate = false, device = ""} : (tensor) -> tensor + %249 = "tf.BroadcastTo"(%248, %221) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %250 = "tf.Max"(%249, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %251 = "tf.Maximum"(%14, %250) {device = ""} : (tensor, tensor) -> tensor + %252 = "tf.Range"(%14, %251, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %253 = "tf.Pack"(%7, %251) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %254 = "tf.Tile"(%218, %253) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %255 = "tf.Shape"(%254) {device = ""} : (tensor) -> tensor<2xi32> + %256 = "tf.StridedSlice"(%255, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %257 = "tf.Prod"(%256, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %258 = "tf.Pack"(%257) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %259 = "tf.Shape"(%254) {device = ""} : (tensor) -> tensor<2xi32> + %260 = "tf.StridedSlice"(%259, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %261 = "tf.Shape"(%254) {device = ""} : (tensor) -> tensor<2xi32> + %262 = "tf.StridedSlice"(%261, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %263 = "tf.ConcatV2"(%260, %258, %262, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %264 = "tf.Reshape"(%254, %263) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %265 = "tf.ExpandDims"(%249, %3) {device = ""} : (tensor, tensor) -> tensor + %266 = "tf.Less"(%252, %265) {device = ""} : (tensor, tensor) -> tensor + %267 = "tf.Reshape"(%266, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %268 = "tf.Where"(%267) {device = ""} : (tensor) -> tensor + %269 = "tf.Squeeze"(%268) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %270 = "tf.GatherV2"(%264, %269, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %271 = "tf.Cast"(%247) {Truncate = false, device = ""} : (tensor) -> tensor + %272 = "tf.BroadcastTo"(%271, %226) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %273 = "tf.Max"(%272, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %274 = "tf.Maximum"(%14, %273) {device = ""} : (tensor, tensor) -> tensor + %275 = "tf.Range"(%14, %274, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %276 = "tf.Pack"(%7, %274) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %277 = "tf.Tile"(%223, %276) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %278 = "tf.Shape"(%277) {device = ""} : (tensor) -> tensor<2xi32> + %279 = "tf.StridedSlice"(%278, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %280 = "tf.Prod"(%279, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %281 = "tf.Pack"(%280) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %282 = "tf.Shape"(%277) {device = ""} : (tensor) -> tensor<2xi32> + %283 = "tf.StridedSlice"(%282, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %284 = "tf.Shape"(%277) {device = ""} : (tensor) -> tensor<2xi32> + %285 = "tf.StridedSlice"(%284, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %286 = "tf.ConcatV2"(%283, %281, %285, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %287 = "tf.Reshape"(%277, %286) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %288 = "tf.ExpandDims"(%272, %3) {device = ""} : (tensor, tensor) -> tensor + %289 = "tf.Less"(%275, %288) {device = ""} : (tensor, tensor) -> tensor + %290 = "tf.Reshape"(%289, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %291 = "tf.Where"(%290) {device = ""} : (tensor) -> tensor + %292 = "tf.Squeeze"(%291) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %293 = "tf.GatherV2"(%287, %292, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %294:2 = "tf.RaggedRange"(%270, %293, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %295 = "tf.If"(%172, %172, %168, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_false_9750, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_true_9740} : (tensor, tensor, tensor, tensor) -> tensor + %296 = "tf.Identity"(%295) {device = ""} : (tensor) -> tensor + %297 = "tf.Select"(%2, %168, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %298 = "tf.Pack"(%297) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %299 = "tf.ConcatV2"(%1, %298, %12, %14) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> + %300 = "tf.StridedSlice"(%299, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %301 = "tf.Equal"(%300, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %302 = "tf.StridedSlice"(%299, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %303 = "tf.StridedSlice"(%299, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %304 = "tf.Equal"(%303, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %305 = "tf.If"(%304, %304, %303, %247) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_false_10240, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_true_10230} : (tensor, tensor, tensor, tensor) -> tensor + %306 = "tf.Identity"(%305) {device = ""} : (tensor) -> tensor + %307 = "tf.If"(%301, %301, %247, %302) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_false_10600, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_true_10590} : (tensor, tensor, tensor, tensor) -> tensor + %308 = "tf.If"(%147, %147, %13, %143) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_Assert_AssertGuard_false_15300, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_Assert_AssertGuard_true_15290} : (tensor, tensor, tensor, tensor) -> tensor + %309 = "tf.Identity"(%308) {device = ""} : (tensor) -> tensor + %310 = "tf.Equal"(%143, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %311 = "tf.Select"(%310, %13, %143) {device = ""} : (tensor, tensor, tensor) -> tensor + %312 = "tf.Equal"(%311, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %313 = "tf.LogicalOr"(%312, %2) {device = ""} : (tensor, tensor) -> tensor + %314 = "tf.Equal"(%311, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %315 = "tf.LogicalOr"(%313, %314) {device = ""} : (tensor, tensor) -> tensor + %316 = "tf.Select"(%154, %311, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %317 = "tf.Pack"(%316, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %318 = "tf.StridedSlice"(%317, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %319 = "tf.Cast"(%318) {Truncate = false, device = ""} : (tensor) -> tensor + %320 = "tf.Reshape"(%319, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %321 = "tf.Pack"(%7, %320) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %322 = "tf.Tile"(%155, %321) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %323 = "tf.Mul"(%320, %158) {device = ""} : (tensor, tensor) -> tensor + %324 = "tf.Pack"(%323) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %325 = "tf.ConcatV2"(%157, %324, %159, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %326 = "tf.Reshape"(%322, %325) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %327 = "tf.Shape"(%326) {device = ""} : (tensor) -> tensor<1xi64> + %328 = "tf.StridedSlice"(%327, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %329 = "tf.Pack"(%318) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %330 = "tf.StridedSlice"(%326, %329, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %331 = "tf.Sub"(%328, %318) {device = ""} : (tensor, tensor) -> tensor + %332 = "tf.Pack"(%331) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %333 = "tf.StridedSlice"(%326, %11, %332, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %334:2 = "tf.RaggedRange"(%333, %330, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %335 = "tf.GatherV2"(%161, %334#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %336 = "tf.StridedSlice"(%317, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %337 = "tf.StridedSlice"(%317, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %338 = "tf.StridedSlice"(%317, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> + %339 = "tf.ConcatV2"(%337, %338, %14) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> + %340 = "tf.StridedSlice"(%317, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %341 = "tf.Mul"(%164, %340) {device = ""} : (tensor, tensor) -> tensor + %342 = "tf.Tile"(%341, %336) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %343 = "tf.Cumsum"(%342, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %344 = "tf.ConcatV2"(%11, %343, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %345 = "tf.Shape"(%344) {device = ""} : (tensor) -> tensor<1xi64> + %346 = "tf.StridedSlice"(%345, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %347 = "tf.Sub"(%346, %13) {device = ""} : (tensor, tensor) -> tensor + %348 = "tf.Equal"(%347, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %349 = "tf.LogicalOr"(%348, %2) {device = ""} : (tensor, tensor) -> tensor + %350 = "tf.Equal"(%347, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %351 = "tf.LogicalOr"(%349, %350) {device = ""} : (tensor, tensor) -> tensor + %352 = "tf.StridedSlice"(%344, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %353 = "tf.StridedSlice"(%344, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %354 = "tf.Sub"(%352, %353) {device = ""} : (tensor, tensor) -> tensor + %355 = "tf.Shape"(%344) {device = ""} : (tensor) -> tensor<1xi64> + %356 = "tf.StridedSlice"(%355, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %357 = "tf.Sub"(%356, %13) {device = ""} : (tensor, tensor) -> tensor + %358 = "tf.Equal"(%357, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %359 = "tf.ExpandDims"(%344, %7) {device = ""} : (tensor, tensor) -> tensor + %360 = "tf.Shape"(%344) {device = ""} : (tensor) -> tensor<1xi32> + %361 = "tf.StridedSlice"(%360, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %362 = "tf.StridedSlice"(%360, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %363 = "tf.StridedSlice"(%360, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %364 = "tf.Select"(%2, %311, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %365 = "tf.Pack"(%364, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %366 = "tf.StridedSlice"(%365, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %367 = "tf.Cast"(%366) {Truncate = false, device = ""} : (tensor) -> tensor + %368 = "tf.Reshape"(%367, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %369 = "tf.Pack"(%7, %368) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %370 = "tf.Tile"(%4, %369) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %371 = "tf.Mul"(%368, %8) {device = ""} : (tensor, tensor) -> tensor + %372 = "tf.Pack"(%371) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %373 = "tf.ConcatV2"(%9, %372, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %374 = "tf.Reshape"(%370, %373) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %375 = "tf.Shape"(%374) {device = ""} : (tensor) -> tensor<1xi64> + %376 = "tf.StridedSlice"(%375, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %377 = "tf.Pack"(%366) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %378 = "tf.StridedSlice"(%374, %377, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %379 = "tf.Sub"(%376, %366) {device = ""} : (tensor, tensor) -> tensor + %380 = "tf.Pack"(%379) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %381 = "tf.StridedSlice"(%374, %11, %380, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %382:2 = "tf.RaggedRange"(%381, %378, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %383 = "tf.GatherV2"(%11, %382#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %384 = "tf.GatherV2"(%12, %383, %14) {batch_dims = 0 : i64, device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %385 = "tf.StridedSlice"(%365, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %386 = "tf.StridedSlice"(%365, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %387 = "tf.StridedSlice"(%365, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> + %388 = "tf.ConcatV2"(%386, %387, %14) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> + %389 = "tf.Tile"(%384, %388) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %390 = "tf.StridedSlice"(%365, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %391 = "tf.Mul"(%390, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> + %392 = "tf.Tile"(%391, %385) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor + %393 = "tf.Cumsum"(%392, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %394 = "tf.ConcatV2"(%11, %393, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %395 = "tf.StridedSlice"(%394, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %396 = "tf.ExpandDims"(%395, %7) {device = ""} : (tensor, tensor) -> tensor + %397 = "tf.Shape"(%395) {device = ""} : (tensor) -> tensor<1xi32> + %398 = "tf.StridedSlice"(%397, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %399 = "tf.Pack"(%398) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %400 = "tf.StridedSlice"(%394, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %401 = "tf.ExpandDims"(%400, %7) {device = ""} : (tensor, tensor) -> tensor + %402 = "tf.Shape"(%400) {device = ""} : (tensor) -> tensor<1xi32> + %403 = "tf.StridedSlice"(%402, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %404 = "tf.Pack"(%403) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %405 = "tf.Equal"(%143, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %406 = "tf.Select"(%405, %311, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %407 = "tf.Cast"(%406) {Truncate = false, device = ""} : (tensor) -> tensor + %408 = "tf.Reshape"(%407, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %409 = "tf.Pack"(%7, %408) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %410 = "tf.Mul"(%408, %8) {device = ""} : (tensor, tensor) -> tensor + %411 = "tf.Pack"(%410) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %412 = "tf.ConcatV2"(%9, %411, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %413 = "tf.Pack"(%406) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %414 = "tf.Pack"(%10, %143) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %415 = "tf.ExpandDims"(%414, %7) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> + %416 = "tf.Tile"(%415, %409) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %417 = "tf.Reshape"(%416, %412) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %418 = "tf.Shape"(%417) {device = ""} : (tensor) -> tensor<1xi64> + %419 = "tf.StridedSlice"(%418, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %420 = "tf.Sub"(%419, %406) {device = ""} : (tensor, tensor) -> tensor + %421 = "tf.Pack"(%420) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %422 = "tf.StridedSlice"(%417, %11, %421, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %423 = "tf.StridedSlice"(%417, %413, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %424:2 = "tf.RaggedRange"(%422, %423, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %425 = "tf.GatherV2"(%150, %424#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %426 = "tf.Cast"(%425) {Truncate = false, device = ""} : (tensor) -> tensor + %427 = "tf.BroadcastTo"(%426, %399) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %428 = "tf.Max"(%427, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %429 = "tf.Maximum"(%14, %428) {device = ""} : (tensor, tensor) -> tensor + %430 = "tf.Range"(%14, %429, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %431 = "tf.Pack"(%7, %429) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %432 = "tf.Tile"(%396, %431) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %433 = "tf.Shape"(%432) {device = ""} : (tensor) -> tensor<2xi32> + %434 = "tf.StridedSlice"(%433, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %435 = "tf.Prod"(%434, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %436 = "tf.Pack"(%435) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %437 = "tf.Shape"(%432) {device = ""} : (tensor) -> tensor<2xi32> + %438 = "tf.StridedSlice"(%437, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %439 = "tf.Shape"(%432) {device = ""} : (tensor) -> tensor<2xi32> + %440 = "tf.StridedSlice"(%439, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %441 = "tf.ConcatV2"(%438, %436, %440, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %442 = "tf.Reshape"(%432, %441) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %443 = "tf.ExpandDims"(%427, %3) {device = ""} : (tensor, tensor) -> tensor + %444 = "tf.Less"(%430, %443) {device = ""} : (tensor, tensor) -> tensor + %445 = "tf.Reshape"(%444, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %446 = "tf.Where"(%445) {device = ""} : (tensor) -> tensor + %447 = "tf.Squeeze"(%446) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %448 = "tf.GatherV2"(%442, %447, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %449 = "tf.Cast"(%425) {Truncate = false, device = ""} : (tensor) -> tensor + %450 = "tf.BroadcastTo"(%449, %404) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %451 = "tf.Max"(%450, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %452 = "tf.Maximum"(%14, %451) {device = ""} : (tensor, tensor) -> tensor + %453 = "tf.Range"(%14, %452, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %454 = "tf.Pack"(%7, %452) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %455 = "tf.Tile"(%401, %454) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %456 = "tf.Shape"(%455) {device = ""} : (tensor) -> tensor<2xi32> + %457 = "tf.StridedSlice"(%456, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %458 = "tf.Prod"(%457, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %459 = "tf.Pack"(%458) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %460 = "tf.Shape"(%455) {device = ""} : (tensor) -> tensor<2xi32> + %461 = "tf.StridedSlice"(%460, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %462 = "tf.Shape"(%455) {device = ""} : (tensor) -> tensor<2xi32> + %463 = "tf.StridedSlice"(%462, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %464 = "tf.ConcatV2"(%461, %459, %463, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %465 = "tf.Reshape"(%455, %464) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %466 = "tf.ExpandDims"(%450, %3) {device = ""} : (tensor, tensor) -> tensor + %467 = "tf.Less"(%453, %466) {device = ""} : (tensor, tensor) -> tensor + %468 = "tf.Reshape"(%467, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %469 = "tf.Where"(%468) {device = ""} : (tensor) -> tensor + %470 = "tf.Squeeze"(%469) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %471 = "tf.GatherV2"(%465, %470, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %472:2 = "tf.RaggedRange"(%448, %471, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %473 = "tf.GatherV2"(%389, %472#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %474 = "tf.If"(%315, %315, %311, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_Assert_1_AssertGuard_false_16370, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_Assert_1_AssertGuard_true_16360} : (tensor, tensor, tensor, tensor) -> tensor + %475 = "tf.Identity"(%474) {device = ""} : (tensor) -> tensor + %476 = "tf.Select"(%2, %311, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %477 = "tf.Pack"(%476) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %478 = "tf.ConcatV2"(%1, %477, %12, %14) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> + %479 = "tf.StridedSlice"(%478, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %480 = "tf.Equal"(%479, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %481 = "tf.StridedSlice"(%478, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %482 = "tf.StridedSlice"(%478, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %483 = "tf.Equal"(%482, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %484 = "tf.If"(%483, %483, %482, %425) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_Assert_2_AssertGuard_false_16860, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_Assert_2_AssertGuard_true_16850} : (tensor, tensor, tensor, tensor) -> tensor + %485 = "tf.Identity"(%484) {device = ""} : (tensor) -> tensor + %486 = "tf.If"(%480, %480, %425, %481) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_Assert_3_AssertGuard_false_17220, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_Assert_3_AssertGuard_true_17210} : (tensor, tensor, tensor, tensor) -> tensor + %487 = "tf.Identity"(%486) {device = ""} : (tensor) -> tensor + %488 = "tf.If"(%351, %351, %13, %347) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_false_21900, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_true_21890} : (tensor, tensor, tensor, tensor) -> tensor + %489 = "tf.Identity"(%488) {device = ""} : (tensor) -> tensor + %490 = "tf.Equal"(%347, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %491 = "tf.Select"(%490, %13, %347) {device = ""} : (tensor, tensor, tensor) -> tensor + %492 = "tf.Equal"(%491, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %493 = "tf.LogicalOr"(%492, %2) {device = ""} : (tensor, tensor) -> tensor + %494 = "tf.Equal"(%491, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %495 = "tf.LogicalOr"(%493, %494) {device = ""} : (tensor, tensor) -> tensor + %496 = "tf.Select"(%358, %491, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %497 = "tf.Pack"(%496, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %498 = "tf.StridedSlice"(%497, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %499 = "tf.Cast"(%498) {Truncate = false, device = ""} : (tensor) -> tensor + %500 = "tf.Reshape"(%499, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %501 = "tf.Pack"(%7, %500) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %502 = "tf.Tile"(%359, %501) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %503 = "tf.Mul"(%500, %362) {device = ""} : (tensor, tensor) -> tensor + %504 = "tf.Pack"(%503) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %505 = "tf.ConcatV2"(%361, %504, %363, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %506 = "tf.Reshape"(%502, %505) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %507 = "tf.Shape"(%506) {device = ""} : (tensor) -> tensor<1xi64> + %508 = "tf.StridedSlice"(%507, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %509 = "tf.Pack"(%498) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %510 = "tf.StridedSlice"(%506, %509, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %511 = "tf.Sub"(%508, %498) {device = ""} : (tensor, tensor) -> tensor + %512 = "tf.Pack"(%511) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %513 = "tf.StridedSlice"(%506, %11, %512, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %514:2 = "tf.RaggedRange"(%513, %510, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %515 = "tf.Select"(%2, %491, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %516 = "tf.Pack"(%515, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %517 = "tf.StridedSlice"(%516, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %518 = "tf.Cast"(%517) {Truncate = false, device = ""} : (tensor) -> tensor + %519 = "tf.Reshape"(%518, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %520 = "tf.Pack"(%7, %519) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %521 = "tf.Tile"(%4, %520) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %522 = "tf.Mul"(%519, %8) {device = ""} : (tensor, tensor) -> tensor + %523 = "tf.Pack"(%522) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %524 = "tf.ConcatV2"(%9, %523, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %525 = "tf.Reshape"(%521, %524) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %526 = "tf.Shape"(%525) {device = ""} : (tensor) -> tensor<1xi64> + %527 = "tf.StridedSlice"(%526, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %528 = "tf.Pack"(%517) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %529 = "tf.StridedSlice"(%525, %528, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %530 = "tf.Sub"(%527, %517) {device = ""} : (tensor, tensor) -> tensor + %531 = "tf.Pack"(%530) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %532 = "tf.StridedSlice"(%525, %11, %531, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %533:2 = "tf.RaggedRange"(%532, %529, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %534 = "tf.StridedSlice"(%516, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %535 = "tf.StridedSlice"(%516, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %536 = "tf.Mul"(%535, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> + %537 = "tf.Tile"(%536, %534) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor + %538 = "tf.Cumsum"(%537, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %539 = "tf.ConcatV2"(%11, %538, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %540 = "tf.StridedSlice"(%539, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %541 = "tf.ExpandDims"(%540, %7) {device = ""} : (tensor, tensor) -> tensor + %542 = "tf.Shape"(%540) {device = ""} : (tensor) -> tensor<1xi32> + %543 = "tf.StridedSlice"(%542, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %544 = "tf.Pack"(%543) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %545 = "tf.StridedSlice"(%539, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %546 = "tf.ExpandDims"(%545, %7) {device = ""} : (tensor, tensor) -> tensor + %547 = "tf.Shape"(%545) {device = ""} : (tensor) -> tensor<1xi32> + %548 = "tf.StridedSlice"(%547, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %549 = "tf.Pack"(%548) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %550 = "tf.Equal"(%347, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %551 = "tf.Select"(%550, %491, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %552 = "tf.Cast"(%551) {Truncate = false, device = ""} : (tensor) -> tensor + %553 = "tf.Reshape"(%552, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %554 = "tf.Pack"(%7, %553) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %555 = "tf.Mul"(%553, %8) {device = ""} : (tensor, tensor) -> tensor + %556 = "tf.Pack"(%555) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %557 = "tf.ConcatV2"(%9, %556, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %558 = "tf.Pack"(%551) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %559 = "tf.Pack"(%10, %347) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %560 = "tf.ExpandDims"(%559, %7) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> + %561 = "tf.Tile"(%560, %554) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %562 = "tf.Reshape"(%561, %557) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %563 = "tf.Shape"(%562) {device = ""} : (tensor) -> tensor<1xi64> + %564 = "tf.StridedSlice"(%563, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %565 = "tf.Sub"(%564, %551) {device = ""} : (tensor, tensor) -> tensor + %566 = "tf.Pack"(%565) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %567 = "tf.StridedSlice"(%562, %11, %566, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %568 = "tf.StridedSlice"(%562, %558, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %569:2 = "tf.RaggedRange"(%567, %568, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %570 = "tf.GatherV2"(%354, %569#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %571 = "tf.Cast"(%570) {Truncate = false, device = ""} : (tensor) -> tensor + %572 = "tf.BroadcastTo"(%571, %544) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %573 = "tf.Max"(%572, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %574 = "tf.Maximum"(%14, %573) {device = ""} : (tensor, tensor) -> tensor + %575 = "tf.Range"(%14, %574, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %576 = "tf.Pack"(%7, %574) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %577 = "tf.Tile"(%541, %576) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %578 = "tf.Shape"(%577) {device = ""} : (tensor) -> tensor<2xi32> + %579 = "tf.StridedSlice"(%578, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %580 = "tf.Prod"(%579, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %581 = "tf.Pack"(%580) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %582 = "tf.Shape"(%577) {device = ""} : (tensor) -> tensor<2xi32> + %583 = "tf.StridedSlice"(%582, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %584 = "tf.Shape"(%577) {device = ""} : (tensor) -> tensor<2xi32> + %585 = "tf.StridedSlice"(%584, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %586 = "tf.ConcatV2"(%583, %581, %585, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %587 = "tf.Reshape"(%577, %586) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %588 = "tf.ExpandDims"(%572, %3) {device = ""} : (tensor, tensor) -> tensor + %589 = "tf.Less"(%575, %588) {device = ""} : (tensor, tensor) -> tensor + %590 = "tf.Reshape"(%589, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %591 = "tf.Where"(%590) {device = ""} : (tensor) -> tensor + %592 = "tf.Squeeze"(%591) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %593 = "tf.GatherV2"(%587, %592, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %594 = "tf.Cast"(%570) {Truncate = false, device = ""} : (tensor) -> tensor + %595 = "tf.BroadcastTo"(%594, %549) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %596 = "tf.Max"(%595, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %597 = "tf.Maximum"(%14, %596) {device = ""} : (tensor, tensor) -> tensor + %598 = "tf.Range"(%14, %597, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %599 = "tf.Pack"(%7, %597) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %600 = "tf.Tile"(%546, %599) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %601 = "tf.Shape"(%600) {device = ""} : (tensor) -> tensor<2xi32> + %602 = "tf.StridedSlice"(%601, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %603 = "tf.Prod"(%602, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %604 = "tf.Pack"(%603) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %605 = "tf.Shape"(%600) {device = ""} : (tensor) -> tensor<2xi32> + %606 = "tf.StridedSlice"(%605, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %607 = "tf.Shape"(%600) {device = ""} : (tensor) -> tensor<2xi32> + %608 = "tf.StridedSlice"(%607, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %609 = "tf.ConcatV2"(%606, %604, %608, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %610 = "tf.Reshape"(%600, %609) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %611 = "tf.ExpandDims"(%595, %3) {device = ""} : (tensor, tensor) -> tensor + %612 = "tf.Less"(%598, %611) {device = ""} : (tensor, tensor) -> tensor + %613 = "tf.Reshape"(%612, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %614 = "tf.Where"(%613) {device = ""} : (tensor) -> tensor + %615 = "tf.Squeeze"(%614) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %616 = "tf.GatherV2"(%610, %615, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %617:2 = "tf.RaggedRange"(%593, %616, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %618 = "tf.If"(%495, %495, %491, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_false_22970, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_true_22960} : (tensor, tensor, tensor, tensor) -> tensor + %619 = "tf.Identity"(%618) {device = ""} : (tensor) -> tensor + %620 = "tf.Select"(%2, %491, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %621 = "tf.Pack"(%620) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %622 = "tf.ConcatV2"(%1, %621, %12, %14) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> + %623 = "tf.StridedSlice"(%622, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %624 = "tf.Equal"(%623, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %625 = "tf.StridedSlice"(%622, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %626 = "tf.StridedSlice"(%622, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %627 = "tf.Equal"(%626, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %628 = "tf.If"(%627, %627, %626, %570) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_false_23460, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_true_23450} : (tensor, tensor, tensor, tensor) -> tensor + %629 = "tf.Identity"(%628) {device = ""} : (tensor) -> tensor + %630 = "tf.If"(%624, %624, %570, %625) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_false_23820, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_true_23810} : (tensor, tensor, tensor, tensor) -> tensor + %631 = "tf.Identity"(%79) {device = ""} : (tensor) -> tensor + %632 = "tf.Identity"(%630) {device = ""} : (tensor) -> tensor + %633 = "tf.Identity"(%307) {device = ""} : (tensor) -> tensor + %634 = "tf.Shape"(%36#2) {device = ""} : (tensor) -> tensor<1xi32> + %635 = "tf.StridedSlice"(%634, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %636 = "tf.Cast"(%635) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> + %637 = "tf.Identity"(%636) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> + %638 = "tf.Shape"(%36#3) {device = ""} : (tensor) -> tensor<1xi32> + %639 = "tf.StridedSlice"(%638, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %640 = "tf.Cast"(%639) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> + %641 = "tf.Identity"(%640) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> + %642 = "tf.GatherV2"(%36#3, %335, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %643 = "tf.Tile"(%642, %339) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %644 = "tf.Sub"(%643, %473) {device = ""} : (tensor, tensor) -> tensor + %645 = "tf.Shape"(%644) {device = ""} : (tensor) -> tensor<1xi32> + %646 = "tf.StridedSlice"(%645, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %647 = "tf.Cast"(%646) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> + %648 = "tf.Identity"(%647) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> + %649 = "tf.UnicodeEncode"(%36#0, %57) {Tsplits = i64, device = "", errors = "replace", output_encoding = "UTF-8", replacement_char = 65533 : i64} : (tensor, tensor) -> tensor + %650 = "tf.Identity"(%649) {device = ""} : (tensor) -> tensor + return %650, %631 : tensor, tensor } +func @WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_false_3210(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Input tensors have incompatible shapes."> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedConcat/RaggedFromTensor/Const:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedConcat/RaggedNRows/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_true_3200(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_3970(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_3960(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_4330(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_4320(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_4670(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_4660(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_5040(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_5030(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_5400(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_5390(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_5760(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RaggedNRows/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_5750(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6110(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6100(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_6470(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_6460(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6810(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6800(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7180(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7170(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_7540(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_7530(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7880(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7870(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_RaggedGather_Assert_AssertGuard_false_8680(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_RaggedGather_Assert_AssertGuard_true_8670(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_false_9750(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_true_9740(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_false_10240(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_true_10230(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_false_10600(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_true_10590(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_Assert_AssertGuard_false_15300(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_Assert_AssertGuard_true_15290(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_Assert_1_AssertGuard_false_16370(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_Assert_1_AssertGuard_true_16360(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_Assert_2_AssertGuard_false_16860(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_Assert_2_AssertGuard_true_16850(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_Assert_3_AssertGuard_false_17220(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_Assert_3_AssertGuard_true_17210(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_false_21900(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_true_21890(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_false_22970(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_true_22960(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_false_23460(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_true_23450(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_false_23820(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_true_23810(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} + +// CHECK: func @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf._input_shapes = [#tf.shape<1>], tf.signature.is_stateful} { +// CHECK: %0:2 = "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor<1x!tf.string>) -> (tensor, tensor) +// CHECK: return %0#0, %0#1 : tensor, tensor + +func @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape], tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<[]> : tensor<0xi64>} : () -> tensor<0xi64> + %1 = "tf.Const"() {value = dense : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<-1> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<[[0], [1]]> : tensor<2x1xi64>} : () -> tensor<2x1xi64> + %4 = "tf.Const"() {value = dense<[2, -1]> : tensor<2xi32>} : () -> tensor<2xi32> + %5 = "tf.Const"() {value = dense<2> : tensor} : () -> tensor + %6 = "tf.Const"() {value = dense<-1> : tensor<1xi32>} : () -> tensor<1xi32> + %7 = "tf.Const"() {value = dense<2> : tensor<1xi32>} : () -> tensor<1xi32> + %8 = "tf.Const"() {value = dense<[1, 0]> : tensor<2xi32>} : () -> tensor<2xi32> + %9 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %10 = "tf.Const"() {value = dense<2> : tensor} : () -> tensor + %11 = "tf.Const"() {value = dense<[]> : tensor<0xi32>} : () -> tensor<0xi32> + %12 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %13 = "tf.Const"() {value = dense<0> : tensor<1xi64>} : () -> tensor<1xi64> + %14 = "tf.Const"() {value = dense<1> : tensor<1xi64>} : () -> tensor<1xi64> + %15 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %16 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %17 = "tf.Const"() {value = dense<0> : tensor<1xi32>} : () -> tensor<1xi32> + %18 = "tf.Const"() {value = dense<1> : tensor<1xi32>} : () -> tensor<1xi32> + %19 = "tf.Shape"(%arg0) {device = ""} : (tensor) -> tensor<2xi64> + %20 = "tf.StridedSlice"(%19, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %21 = "tf.StridedSlice"(%19, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %22 = "tf.Mul"(%20, %21) {device = ""} : (tensor, tensor) -> tensor + %23 = "tf.Pack"(%22) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %24 = "tf.StridedSlice"(%19, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> + %25 = "tf.ConcatV2"(%23, %24, %16) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> + %26 = "tf.Reshape"(%arg0, %25) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %27 = "tf.StringLength"(%26) {device = "", unit = "BYTE"} : (tensor) -> tensor + %28 = "tf.ExpandDims"(%27, %9) {device = ""} : (tensor, tensor) -> tensor + %29 = "tf.Cast"(%28) {Truncate = false, device = ""} : (tensor) -> tensor + %30 = "tf.Shape"(%29) {device = ""} : (tensor) -> tensor<2xi64> + %31 = "tf.StridedSlice"(%30, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %32 = "tf.StridedSlice"(%30, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %33 = "tf.Mul"(%31, %32) {device = ""} : (tensor, tensor) -> tensor + %34 = "tf.Pack"(%33) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %35 = "tf.StridedSlice"(%30, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> + %36 = "tf.ConcatV2"(%34, %35, %16) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> + %37 = "tf.Reshape"(%29, %36) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %38 = "tf.StridedSlice"(%30, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %39 = "tf.AddV2"(%38, %15) {device = ""} : (tensor, tensor) -> tensor + %40 = "tf.Range"(%12, %39, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %41 = "tf.Mul"(%40, %15) {device = ""} : (tensor, tensor) -> tensor + %42 = "tf.Reshape"(%26, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %43:3 = "tf.UnicodeDecodeWithOffsets"(%42) {Tsplits = i64, device = "", errors = "replace", input_encoding = "UTF-8", replace_control_characters = false, replacement_char = 65533 : i64} : (tensor) -> (tensor, tensor, tensor) + %44 = "tf.StridedSlice"(%43#0, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %45 = "tf.Shape"(%44) {device = ""} : (tensor) -> tensor<1xi32> + %46 = "tf.ConcatV2"(%45, %18, %16) {device = ""} : (tensor<1xi32>, tensor<1xi32>, tensor) -> tensor<2xi32> + %47 = "tf.Reshape"(%44, %46) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %48 = "tf.Shape"(%47) {device = ""} : (tensor) -> tensor<2xi64> + %49 = "tf.StridedSlice"(%48, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %50 = "tf.AddV2"(%49, %15) {device = ""} : (tensor, tensor) -> tensor + %51 = "tf.Range"(%12, %50, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %52 = "tf.Mul"(%51, %15) {device = ""} : (tensor, tensor) -> tensor + %53 = "tf.ExpandDims"(%52, %9) {device = ""} : (tensor, tensor) -> tensor + %54 = "tf.Shape"(%52) {device = ""} : (tensor) -> tensor<1xi32> + %55 = "tf.StridedSlice"(%54, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %56 = "tf.StridedSlice"(%54, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %57 = "tf.StridedSlice"(%54, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %58 = "tf.StridedSlice"(%52, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %59 = "tf.StridedSlice"(%52, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %60 = "tf.Sub"(%58, %59) {device = ""} : (tensor, tensor) -> tensor + %61 = "tf.Shape"(%47) {device = ""} : (tensor) -> tensor<2xi32> + %62 = "tf.Cast"(%61) {Truncate = false, device = ""} : (tensor<2xi32>) -> tensor<2xi64> + %63 = "tf.StridedSlice"(%62, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %64 = "tf.Equal"(%63, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %65 = "tf.StridedSlice"(%62, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %66 = "tf.Equal"(%65, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %67 = "tf.StridedSlice"(%62, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %68 = "tf.Shape"(%47) {device = ""} : (tensor) -> tensor<2xi32> + %69 = "tf.Cast"(%68) {Truncate = false, device = ""} : (tensor<2xi32>) -> tensor<2xi64> + %70 = "tf.StridedSlice"(%69, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %71 = "tf.Equal"(%70, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %72 = "tf.StridedSlice"(%43#0, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %73 = "tf.AddV2"(%72, %15) {device = ""} : (tensor, tensor) -> tensor + %74 = "tf.StridedSlice"(%43#0, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %75 = "tf.Minimum"(%73, %74) {device = ""} : (tensor, tensor) -> tensor + %76:2 = "tf.RaggedRange"(%75, %74, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %77 = "tf.Shape"(%76#0) {device = ""} : (tensor) -> tensor<1xi64> + %78 = "tf.StridedSlice"(%77, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %79 = "tf.Sub"(%78, %15) {device = ""} : (tensor, tensor) -> tensor + %80 = "tf.Equal"(%38, %79) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %81 = "tf.All"(%80, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %82 = "tf.If"(%81, %81, %38, %79) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_false_99640, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_true_99630} : (tensor, tensor, tensor, tensor) -> tensor + %83 = "tf.Identity"(%82) {device = ""} : (tensor) -> tensor + %84 = "tf.StridedSlice"(%41, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %85 = "tf.Mul"(%79, %5) {device = ""} : (tensor, tensor) -> tensor + %86 = "tf.Range"(%12, %85, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %87 = "tf.Reshape"(%86, %4) {device = ""} : (tensor, tensor<2xi32>) -> tensor<2x?xi64> + %88 = "tf.Transpose"(%87, %8) {device = ""} : (tensor<2x?xi64>, tensor<2xi32>) -> tensor + %89 = "tf.Reshape"(%88, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %90 = "tf.StridedSlice"(%76#0, %6, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %91 = "tf.AddV2"(%84, %90) {device = ""} : (tensor, tensor) -> tensor + %92 = "tf.ConcatV2"(%76#0, %91, %16) {device = ""} : (tensor, tensor, tensor) -> tensor + %93 = "tf.GatherV2"(%43#2, %76#1, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %94 = "tf.ConcatV2"(%93, %37, %16) {device = ""} : (tensor, tensor, tensor) -> tensor + %95:2 = "tf.RaggedGather"(%92, %94, %89) {OUTPUT_RAGGED_RANK = 1 : i64, PARAMS_RAGGED_RANK = 1 : i64, Tindices = i64, Tsplits = i64, Tvalues = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %96 = "tf.StridedSlice"(%95#0, %17, %17, %7) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %97 = "tf.StridedSlice"(%96, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %98 = "tf.Shape"(%97) {device = ""} : (tensor) -> tensor<1xi32> + %99 = "tf.ConcatV2"(%98, %18, %16) {device = ""} : (tensor<1xi32>, tensor<1xi32>, tensor) -> tensor<2xi32> + %100 = "tf.Reshape"(%97, %99) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %101 = "tf.Shape"(%100) {device = ""} : (tensor) -> tensor<2xi64> + %102 = "tf.StridedSlice"(%101, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %103 = "tf.AddV2"(%102, %15) {device = ""} : (tensor, tensor) -> tensor + %104 = "tf.Range"(%12, %103, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %105 = "tf.Mul"(%104, %15) {device = ""} : (tensor, tensor) -> tensor + %106 = "tf.ExpandDims"(%105, %9) {device = ""} : (tensor, tensor) -> tensor + %107 = "tf.Shape"(%105) {device = ""} : (tensor) -> tensor<1xi32> + %108 = "tf.StridedSlice"(%107, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %109 = "tf.StridedSlice"(%107, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %110 = "tf.StridedSlice"(%107, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %111 = "tf.StridedSlice"(%105, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %112 = "tf.StridedSlice"(%105, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %113 = "tf.Sub"(%111, %112) {device = ""} : (tensor, tensor) -> tensor + %114 = "tf.Shape"(%100) {device = ""} : (tensor) -> tensor<2xi32> + %115 = "tf.Cast"(%114) {Truncate = false, device = ""} : (tensor<2xi32>) -> tensor<2xi64> + %116 = "tf.StridedSlice"(%115, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %117 = "tf.Equal"(%116, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %118 = "tf.StridedSlice"(%115, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %119 = "tf.Equal"(%118, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %120 = "tf.StridedSlice"(%115, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %121 = "tf.Shape"(%100) {device = ""} : (tensor) -> tensor<2xi32> + %122 = "tf.Cast"(%121) {Truncate = false, device = ""} : (tensor<2xi32>) -> tensor<2xi64> + %123 = "tf.StridedSlice"(%122, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %124 = "tf.Equal"(%123, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %125:5 = "tf.WhitespaceTokenizeWithOffsets"(%43#1, %43#0) {Tsplits = i64, device = ""} : (tensor, tensor) -> (tensor, tensor, tensor, tensor, tensor) + %126 = "tf.StridedSlice"(%125#1, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %127 = "tf.Equal"(%126, %12) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %128 = "tf.All"(%127, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %129 = "tf.If"(%128, %128, %126, %12) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_100400, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_100390} : (tensor, tensor, tensor, tensor) -> tensor + %130 = "tf.Identity"(%129) {device = ""} : (tensor) -> tensor + %131 = "tf.StridedSlice"(%125#1, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %132 = "tf.StridedSlice"(%125#1, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %133 = "tf.Sub"(%131, %132) {device = ""} : (tensor, tensor) -> tensor + %134 = "tf.LessEqual"(%12, %133) {device = ""} : (tensor, tensor) -> tensor + %135 = "tf.All"(%134, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %136 = "tf.If"(%135, %135, %133) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_100760, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_100750} : (tensor, tensor, tensor) -> tensor + %137 = "tf.Identity"(%136) {device = ""} : (tensor) -> tensor + %138 = "tf.Identity"(%125#1) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %139 = "tf.StridedSlice"(%138, %6, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %140 = "tf.Shape"(%125#0) {device = ""} : (tensor) -> tensor<1xi64> + %141 = "tf.StridedSlice"(%140, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %142 = "tf.Equal"(%139, %141) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %143 = "tf.All"(%142, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %144 = "tf.If"(%143, %143, %139, %141) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_101100, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_101090} : (tensor, tensor, tensor, tensor) -> tensor + %145 = "tf.Identity"(%144) {device = ""} : (tensor) -> tensor + %146 = "tf.Identity"(%138) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %147 = "tf.Shape"(%146) {device = ""} : (tensor) -> tensor<1xi64> + %148 = "tf.StridedSlice"(%147, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %149 = "tf.Sub"(%148, %15) {device = ""} : (tensor, tensor) -> tensor + %150 = "tf.StridedSlice"(%125#4, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %151 = "tf.Equal"(%150, %12) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %152 = "tf.All"(%151, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %153 = "tf.If"(%152, %152, %150, %12) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_101470, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_101460} : (tensor, tensor, tensor, tensor) -> tensor + %154 = "tf.Identity"(%153) {device = ""} : (tensor) -> tensor + %155 = "tf.StridedSlice"(%125#4, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %156 = "tf.StridedSlice"(%125#4, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %157 = "tf.Sub"(%155, %156) {device = ""} : (tensor, tensor) -> tensor + %158 = "tf.LessEqual"(%12, %157) {device = ""} : (tensor, tensor) -> tensor + %159 = "tf.All"(%158, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %160 = "tf.If"(%159, %159, %157) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_101830, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_101820} : (tensor, tensor, tensor) -> tensor + %161 = "tf.Identity"(%160) {device = ""} : (tensor) -> tensor + %162 = "tf.Identity"(%125#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %163 = "tf.StridedSlice"(%162, %6, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %164 = "tf.Equal"(%163, %149) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %165 = "tf.All"(%164, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %166 = "tf.If"(%165, %165, %163, %149) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_102190, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_102180} : (tensor, tensor, tensor, tensor) -> tensor + %167 = "tf.Identity"(%166) {device = ""} : (tensor) -> tensor + %168 = "tf.Identity"(%162) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %169 = "tf.StridedSlice"(%125#4, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %170 = "tf.Equal"(%169, %12) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %171 = "tf.All"(%170, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %172 = "tf.If"(%171, %171, %169, %12) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_102540, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_102530} : (tensor, tensor, tensor, tensor) -> tensor + %173 = "tf.Identity"(%172) {device = ""} : (tensor) -> tensor + %174 = "tf.StridedSlice"(%125#4, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %175 = "tf.StridedSlice"(%125#4, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %176 = "tf.Sub"(%174, %175) {device = ""} : (tensor, tensor) -> tensor + %177 = "tf.LessEqual"(%12, %176) {device = ""} : (tensor, tensor) -> tensor + %178 = "tf.All"(%177, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %179 = "tf.If"(%178, %178, %176) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_102900, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_102890} : (tensor, tensor, tensor) -> tensor + %180 = "tf.Identity"(%179) {device = ""} : (tensor) -> tensor + %181 = "tf.Identity"(%125#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %182 = "tf.StridedSlice"(%181, %6, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %183 = "tf.Shape"(%125#2) {device = ""} : (tensor) -> tensor<1xi64> + %184 = "tf.StridedSlice"(%183, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %185 = "tf.Equal"(%182, %184) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %186 = "tf.All"(%185, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %187 = "tf.If"(%186, %186, %182, %184) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_103240, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_103230} : (tensor, tensor, tensor, tensor) -> tensor + %188 = "tf.Identity"(%187) {device = ""} : (tensor) -> tensor + %189 = "tf.Identity"(%181) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %190 = "tf.Shape"(%189) {device = ""} : (tensor) -> tensor<1xi64> + %191 = "tf.StridedSlice"(%190, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %192 = "tf.Sub"(%191, %15) {device = ""} : (tensor, tensor) -> tensor + %193 = "tf.Equal"(%192, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %194 = "tf.LogicalOr"(%64, %193) {device = ""} : (tensor, tensor) -> tensor + %195 = "tf.Equal"(%192, %63) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %196 = "tf.LogicalOr"(%194, %195) {device = ""} : (tensor, tensor) -> tensor + %197 = "tf.StridedSlice"(%189, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %198 = "tf.StridedSlice"(%189, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %199 = "tf.Sub"(%197, %198) {device = ""} : (tensor, tensor) -> tensor + %200 = "tf.Shape"(%189) {device = ""} : (tensor) -> tensor<1xi64> + %201 = "tf.StridedSlice"(%200, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %202 = "tf.Sub"(%201, %15) {device = ""} : (tensor, tensor) -> tensor + %203 = "tf.Equal"(%202, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %204 = "tf.ExpandDims"(%189, %9) {device = ""} : (tensor, tensor) -> tensor + %205 = "tf.Shape"(%189) {device = ""} : (tensor) -> tensor<1xi32> + %206 = "tf.StridedSlice"(%205, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %207 = "tf.StridedSlice"(%205, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %208 = "tf.StridedSlice"(%205, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %209 = "tf.StridedSlice"(%125#4, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %210 = "tf.Equal"(%209, %12) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %211 = "tf.All"(%210, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %212 = "tf.If"(%211, %211, %209, %12) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_103610, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_103600} : (tensor, tensor, tensor, tensor) -> tensor + %213 = "tf.Identity"(%212) {device = ""} : (tensor) -> tensor + %214 = "tf.StridedSlice"(%125#4, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %215 = "tf.StridedSlice"(%125#4, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %216 = "tf.Sub"(%214, %215) {device = ""} : (tensor, tensor) -> tensor + %217 = "tf.LessEqual"(%12, %216) {device = ""} : (tensor, tensor) -> tensor + %218 = "tf.All"(%217, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %219 = "tf.If"(%218, %218, %216) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_103970, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_103960} : (tensor, tensor, tensor) -> tensor + %220 = "tf.Identity"(%219) {device = ""} : (tensor) -> tensor + %221 = "tf.Identity"(%125#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %222 = "tf.StridedSlice"(%221, %6, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %223 = "tf.Shape"(%125#3) {device = ""} : (tensor) -> tensor<1xi64> + %224 = "tf.StridedSlice"(%223, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %225 = "tf.Equal"(%222, %224) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %226 = "tf.All"(%225, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %227 = "tf.If"(%226, %226, %222, %224) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_104310, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_104300} : (tensor, tensor, tensor, tensor) -> tensor + %228 = "tf.Identity"(%227) {device = ""} : (tensor) -> tensor + %229 = "tf.Identity"(%221) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %230 = "tf.Shape"(%229) {device = ""} : (tensor) -> tensor<1xi64> + %231 = "tf.StridedSlice"(%230, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %232 = "tf.Sub"(%231, %15) {device = ""} : (tensor, tensor) -> tensor + %233 = "tf.Equal"(%232, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %234 = "tf.LogicalOr"(%233, %1) {device = ""} : (tensor, tensor) -> tensor + %235 = "tf.Equal"(%232, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %236 = "tf.LogicalOr"(%234, %235) {device = ""} : (tensor, tensor) -> tensor + %237 = "tf.StridedSlice"(%229, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %238 = "tf.StridedSlice"(%229, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %239 = "tf.Sub"(%237, %238) {device = ""} : (tensor, tensor) -> tensor + %240 = "tf.Shape"(%229) {device = ""} : (tensor) -> tensor<1xi64> + %241 = "tf.StridedSlice"(%240, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %242 = "tf.Sub"(%241, %15) {device = ""} : (tensor, tensor) -> tensor + %243 = "tf.Equal"(%242, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %244 = "tf.ExpandDims"(%229, %9) {device = ""} : (tensor, tensor) -> tensor + %245 = "tf.Shape"(%229) {device = ""} : (tensor) -> tensor<1xi32> + %246 = "tf.StridedSlice"(%245, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %247 = "tf.StridedSlice"(%245, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %248 = "tf.StridedSlice"(%245, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %249 = "tf.StridedSlice"(%229, %6, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %250 = "tf.Range"(%12, %249, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %251 = "tf.StridedSlice"(%229, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %252 = "tf.StridedSlice"(%229, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %253 = "tf.Sub"(%251, %252) {device = ""} : (tensor, tensor) -> tensor + %254 = "tf.If"(%196, %196, %63, %192) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_false_105110, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_true_105100} : (tensor, tensor, tensor, tensor) -> tensor + %255 = "tf.Identity"(%254) {device = ""} : (tensor) -> tensor + %256 = "tf.Equal"(%192, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %257 = "tf.Select"(%256, %63, %192) {device = ""} : (tensor, tensor, tensor) -> tensor + %258 = "tf.Equal"(%257, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %259 = "tf.LogicalOr"(%258, %66) {device = ""} : (tensor, tensor) -> tensor + %260 = "tf.Equal"(%65, %257) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %261 = "tf.LogicalOr"(%259, %260) {device = ""} : (tensor, tensor) -> tensor + %262 = "tf.Select"(%203, %257, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %263 = "tf.Pack"(%262, %15) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %264 = "tf.StridedSlice"(%263, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %265 = "tf.Cast"(%264) {Truncate = false, device = ""} : (tensor) -> tensor + %266 = "tf.Reshape"(%265, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %267 = "tf.Pack"(%9, %266) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %268 = "tf.Tile"(%204, %267) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %269 = "tf.Mul"(%266, %207) {device = ""} : (tensor, tensor) -> tensor + %270 = "tf.Pack"(%269) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %271 = "tf.ConcatV2"(%206, %270, %208, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %272 = "tf.Reshape"(%268, %271) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %273 = "tf.Shape"(%272) {device = ""} : (tensor) -> tensor<1xi64> + %274 = "tf.StridedSlice"(%273, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %275 = "tf.Pack"(%264) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %276 = "tf.StridedSlice"(%272, %275, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %277 = "tf.Sub"(%274, %264) {device = ""} : (tensor, tensor) -> tensor + %278 = "tf.Pack"(%277) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %279 = "tf.StridedSlice"(%272, %13, %278, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %280:2 = "tf.RaggedRange"(%279, %276, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %281 = "tf.Select"(%71, %257, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %282 = "tf.Pack"(%281, %15) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %283 = "tf.StridedSlice"(%282, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %284 = "tf.Cast"(%283) {Truncate = false, device = ""} : (tensor) -> tensor + %285 = "tf.Reshape"(%284, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %286 = "tf.Pack"(%9, %285) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %287 = "tf.Tile"(%53, %286) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %288 = "tf.Mul"(%285, %56) {device = ""} : (tensor, tensor) -> tensor + %289 = "tf.Pack"(%288) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %290 = "tf.ConcatV2"(%55, %289, %57, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %291 = "tf.Reshape"(%287, %290) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %292 = "tf.Shape"(%291) {device = ""} : (tensor) -> tensor<1xi64> + %293 = "tf.StridedSlice"(%292, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %294 = "tf.Pack"(%283) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %295 = "tf.StridedSlice"(%291, %294, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %296 = "tf.Sub"(%293, %283) {device = ""} : (tensor, tensor) -> tensor + %297 = "tf.Pack"(%296) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %298 = "tf.StridedSlice"(%291, %13, %297, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %299:2 = "tf.RaggedRange"(%298, %295, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %300 = "tf.StridedSlice"(%282, %17, %18, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %301 = "tf.StridedSlice"(%282, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %302 = "tf.Mul"(%60, %301) {device = ""} : (tensor, tensor) -> tensor + %303 = "tf.Tile"(%302, %300) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %304 = "tf.Cumsum"(%303, %16) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %305 = "tf.ConcatV2"(%13, %304, %2) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %306 = "tf.StridedSlice"(%305, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %307 = "tf.ExpandDims"(%306, %9) {device = ""} : (tensor, tensor) -> tensor + %308 = "tf.Shape"(%306) {device = ""} : (tensor) -> tensor<1xi32> + %309 = "tf.StridedSlice"(%308, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %310 = "tf.Pack"(%309) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %311 = "tf.StridedSlice"(%305, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %312 = "tf.ExpandDims"(%311, %9) {device = ""} : (tensor, tensor) -> tensor + %313 = "tf.Shape"(%311) {device = ""} : (tensor) -> tensor<1xi32> + %314 = "tf.StridedSlice"(%313, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %315 = "tf.Pack"(%314) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %316 = "tf.Equal"(%192, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %317 = "tf.Select"(%316, %257, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %318 = "tf.Cast"(%317) {Truncate = false, device = ""} : (tensor) -> tensor + %319 = "tf.Reshape"(%318, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %320 = "tf.Pack"(%9, %319) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %321 = "tf.Mul"(%319, %10) {device = ""} : (tensor, tensor) -> tensor + %322 = "tf.Pack"(%321) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %323 = "tf.ConcatV2"(%11, %322, %11, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %324 = "tf.Pack"(%317) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %325 = "tf.Pack"(%12, %192) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %326 = "tf.ExpandDims"(%325, %9) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> + %327 = "tf.Tile"(%326, %320) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %328 = "tf.Reshape"(%327, %323) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %329 = "tf.Shape"(%328) {device = ""} : (tensor) -> tensor<1xi64> + %330 = "tf.StridedSlice"(%329, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %331 = "tf.Sub"(%330, %317) {device = ""} : (tensor, tensor) -> tensor + %332 = "tf.Pack"(%331) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %333 = "tf.StridedSlice"(%328, %13, %332, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %334 = "tf.StridedSlice"(%328, %324, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %335:2 = "tf.RaggedRange"(%333, %334, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %336 = "tf.GatherV2"(%199, %335#1, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %337 = "tf.Cast"(%336) {Truncate = false, device = ""} : (tensor) -> tensor + %338 = "tf.BroadcastTo"(%337, %310) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %339 = "tf.Max"(%338, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %340 = "tf.Maximum"(%16, %339) {device = ""} : (tensor, tensor) -> tensor + %341 = "tf.Range"(%16, %340, %9) {device = ""} : (tensor, tensor, tensor) -> tensor + %342 = "tf.Pack"(%9, %340) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %343 = "tf.Tile"(%307, %342) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %344 = "tf.Shape"(%343) {device = ""} : (tensor) -> tensor<2xi32> + %345 = "tf.StridedSlice"(%344, %17, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %346 = "tf.Prod"(%345, %17) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %347 = "tf.Pack"(%346) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %348 = "tf.Shape"(%343) {device = ""} : (tensor) -> tensor<2xi32> + %349 = "tf.StridedSlice"(%348, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %350 = "tf.Shape"(%343) {device = ""} : (tensor) -> tensor<2xi32> + %351 = "tf.StridedSlice"(%350, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %352 = "tf.ConcatV2"(%349, %347, %351, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %353 = "tf.Reshape"(%343, %352) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %354 = "tf.ExpandDims"(%338, %2) {device = ""} : (tensor, tensor) -> tensor + %355 = "tf.Less"(%341, %354) {device = ""} : (tensor, tensor) -> tensor + %356 = "tf.Reshape"(%355, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %357 = "tf.Where"(%356) {device = ""} : (tensor) -> tensor + %358 = "tf.Squeeze"(%357) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %359 = "tf.GatherV2"(%353, %358, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %360 = "tf.Cast"(%336) {Truncate = false, device = ""} : (tensor) -> tensor + %361 = "tf.BroadcastTo"(%360, %315) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %362 = "tf.Max"(%361, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %363 = "tf.Maximum"(%16, %362) {device = ""} : (tensor, tensor) -> tensor + %364 = "tf.Range"(%16, %363, %9) {device = ""} : (tensor, tensor, tensor) -> tensor + %365 = "tf.Pack"(%9, %363) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %366 = "tf.Tile"(%312, %365) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %367 = "tf.Shape"(%366) {device = ""} : (tensor) -> tensor<2xi32> + %368 = "tf.StridedSlice"(%367, %17, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %369 = "tf.Prod"(%368, %17) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %370 = "tf.Pack"(%369) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %371 = "tf.Shape"(%366) {device = ""} : (tensor) -> tensor<2xi32> + %372 = "tf.StridedSlice"(%371, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %373 = "tf.Shape"(%366) {device = ""} : (tensor) -> tensor<2xi32> + %374 = "tf.StridedSlice"(%373, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %375 = "tf.ConcatV2"(%372, %370, %374, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %376 = "tf.Reshape"(%366, %375) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %377 = "tf.ExpandDims"(%361, %2) {device = ""} : (tensor, tensor) -> tensor + %378 = "tf.Less"(%364, %377) {device = ""} : (tensor, tensor) -> tensor + %379 = "tf.Reshape"(%378, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %380 = "tf.Where"(%379) {device = ""} : (tensor) -> tensor + %381 = "tf.Squeeze"(%380) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %382 = "tf.GatherV2"(%376, %381, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %383:2 = "tf.RaggedRange"(%359, %382, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %384 = "tf.If"(%261, %261, %257, %67) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_false_106180, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_true_106170} : (tensor, tensor, tensor, tensor) -> tensor + %385 = "tf.Identity"(%384) {device = ""} : (tensor) -> tensor + %386 = "tf.StridedSlice"(%62, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %387 = "tf.Equal"(%386, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %388 = "tf.Select"(%387, %257, %386) {device = ""} : (tensor, tensor, tensor) -> tensor + %389 = "tf.Pack"(%388) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %390 = "tf.StridedSlice"(%62, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> + %391 = "tf.StridedSlice"(%62, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %392 = "tf.ConcatV2"(%390, %389, %391, %16) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> + %393 = "tf.StridedSlice"(%392, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %394 = "tf.Equal"(%393, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %395 = "tf.StridedSlice"(%392, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %396 = "tf.StridedSlice"(%392, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %397 = "tf.Equal"(%396, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %398 = "tf.If"(%397, %397, %396, %336) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_false_106670, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_true_106660} : (tensor, tensor, tensor, tensor) -> tensor + %399 = "tf.Identity"(%398) {device = ""} : (tensor) -> tensor + %400 = "tf.If"(%394, %394, %336, %395) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_false_107030, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_true_107020} : (tensor, tensor, tensor, tensor) -> tensor + %401 = "tf.If"(%236, %236, %15, %232) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_false_111870, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_true_111860} : (tensor, tensor, tensor, tensor) -> tensor + %402 = "tf.Identity"(%401) {device = ""} : (tensor) -> tensor + %403 = "tf.Equal"(%232, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %404 = "tf.Select"(%403, %15, %232) {device = ""} : (tensor, tensor, tensor) -> tensor + %405 = "tf.Equal"(%404, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %406 = "tf.LogicalOr"(%405, %1) {device = ""} : (tensor, tensor) -> tensor + %407 = "tf.Equal"(%404, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %408 = "tf.LogicalOr"(%406, %407) {device = ""} : (tensor, tensor) -> tensor + %409 = "tf.Select"(%243, %404, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %410 = "tf.Pack"(%409, %15) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %411 = "tf.StridedSlice"(%410, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %412 = "tf.Cast"(%411) {Truncate = false, device = ""} : (tensor) -> tensor + %413 = "tf.Reshape"(%412, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %414 = "tf.Pack"(%9, %413) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %415 = "tf.Tile"(%244, %414) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %416 = "tf.Mul"(%413, %247) {device = ""} : (tensor, tensor) -> tensor + %417 = "tf.Pack"(%416) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %418 = "tf.ConcatV2"(%246, %417, %248, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %419 = "tf.Reshape"(%415, %418) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %420 = "tf.Shape"(%419) {device = ""} : (tensor) -> tensor<1xi64> + %421 = "tf.StridedSlice"(%420, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %422 = "tf.Pack"(%411) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %423 = "tf.StridedSlice"(%419, %422, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %424 = "tf.Sub"(%421, %411) {device = ""} : (tensor, tensor) -> tensor + %425 = "tf.Pack"(%424) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %426 = "tf.StridedSlice"(%419, %13, %425, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %427:2 = "tf.RaggedRange"(%426, %423, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %428 = "tf.GatherV2"(%250, %427#1, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %429 = "tf.StridedSlice"(%410, %17, %18, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %430 = "tf.StridedSlice"(%410, %17, %18, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %431 = "tf.StridedSlice"(%410, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> + %432 = "tf.ConcatV2"(%430, %431, %16) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> + %433 = "tf.StridedSlice"(%410, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %434 = "tf.Mul"(%253, %433) {device = ""} : (tensor, tensor) -> tensor + %435 = "tf.Tile"(%434, %429) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %436 = "tf.Cumsum"(%435, %16) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %437 = "tf.ConcatV2"(%13, %436, %2) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %438 = "tf.Shape"(%437) {device = ""} : (tensor) -> tensor<1xi64> + %439 = "tf.StridedSlice"(%438, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %440 = "tf.Sub"(%439, %15) {device = ""} : (tensor, tensor) -> tensor + %441 = "tf.Equal"(%440, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %442 = "tf.LogicalOr"(%117, %441) {device = ""} : (tensor, tensor) -> tensor + %443 = "tf.Equal"(%440, %116) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %444 = "tf.LogicalOr"(%442, %443) {device = ""} : (tensor, tensor) -> tensor + %445 = "tf.StridedSlice"(%437, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %446 = "tf.StridedSlice"(%437, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %447 = "tf.Sub"(%445, %446) {device = ""} : (tensor, tensor) -> tensor + %448 = "tf.Shape"(%437) {device = ""} : (tensor) -> tensor<1xi64> + %449 = "tf.StridedSlice"(%448, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %450 = "tf.Sub"(%449, %15) {device = ""} : (tensor, tensor) -> tensor + %451 = "tf.Equal"(%450, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %452 = "tf.ExpandDims"(%437, %9) {device = ""} : (tensor, tensor) -> tensor + %453 = "tf.Shape"(%437) {device = ""} : (tensor) -> tensor<1xi32> + %454 = "tf.StridedSlice"(%453, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %455 = "tf.StridedSlice"(%453, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %456 = "tf.StridedSlice"(%453, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %457 = "tf.Select"(%1, %404, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %458 = "tf.Pack"(%457, %15) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %459 = "tf.StridedSlice"(%458, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %460 = "tf.Cast"(%459) {Truncate = false, device = ""} : (tensor) -> tensor + %461 = "tf.Reshape"(%460, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %462 = "tf.Pack"(%9, %461) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %463 = "tf.Tile"(%3, %462) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %464 = "tf.Mul"(%461, %10) {device = ""} : (tensor, tensor) -> tensor + %465 = "tf.Pack"(%464) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %466 = "tf.ConcatV2"(%11, %465, %11, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %467 = "tf.Reshape"(%463, %466) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %468 = "tf.Shape"(%467) {device = ""} : (tensor) -> tensor<1xi64> + %469 = "tf.StridedSlice"(%468, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %470 = "tf.Pack"(%459) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %471 = "tf.StridedSlice"(%467, %470, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %472 = "tf.Sub"(%469, %459) {device = ""} : (tensor, tensor) -> tensor + %473 = "tf.Pack"(%472) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %474 = "tf.StridedSlice"(%467, %13, %473, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %475:2 = "tf.RaggedRange"(%474, %471, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %476 = "tf.GatherV2"(%13, %475#1, %16) {batch_dims = 0 : i64, device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %477 = "tf.GatherV2"(%14, %476, %16) {batch_dims = 0 : i64, device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %478 = "tf.StridedSlice"(%458, %17, %18, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %479 = "tf.StridedSlice"(%458, %17, %18, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %480 = "tf.StridedSlice"(%458, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> + %481 = "tf.ConcatV2"(%479, %480, %16) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> + %482 = "tf.Tile"(%477, %481) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %483 = "tf.StridedSlice"(%458, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %484 = "tf.Mul"(%483, %14) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> + %485 = "tf.Tile"(%484, %478) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor + %486 = "tf.Cumsum"(%485, %16) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %487 = "tf.ConcatV2"(%13, %486, %2) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %488 = "tf.StridedSlice"(%487, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %489 = "tf.ExpandDims"(%488, %9) {device = ""} : (tensor, tensor) -> tensor + %490 = "tf.Shape"(%488) {device = ""} : (tensor) -> tensor<1xi32> + %491 = "tf.StridedSlice"(%490, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %492 = "tf.Pack"(%491) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %493 = "tf.StridedSlice"(%487, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %494 = "tf.ExpandDims"(%493, %9) {device = ""} : (tensor, tensor) -> tensor + %495 = "tf.Shape"(%493) {device = ""} : (tensor) -> tensor<1xi32> + %496 = "tf.StridedSlice"(%495, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %497 = "tf.Pack"(%496) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %498 = "tf.Equal"(%232, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %499 = "tf.Select"(%498, %404, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %500 = "tf.Cast"(%499) {Truncate = false, device = ""} : (tensor) -> tensor + %501 = "tf.Reshape"(%500, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %502 = "tf.Pack"(%9, %501) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %503 = "tf.Mul"(%501, %10) {device = ""} : (tensor, tensor) -> tensor + %504 = "tf.Pack"(%503) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %505 = "tf.ConcatV2"(%11, %504, %11, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %506 = "tf.Pack"(%499) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %507 = "tf.Pack"(%12, %232) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %508 = "tf.ExpandDims"(%507, %9) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> + %509 = "tf.Tile"(%508, %502) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %510 = "tf.Reshape"(%509, %505) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %511 = "tf.Shape"(%510) {device = ""} : (tensor) -> tensor<1xi64> + %512 = "tf.StridedSlice"(%511, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %513 = "tf.Sub"(%512, %499) {device = ""} : (tensor, tensor) -> tensor + %514 = "tf.Pack"(%513) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %515 = "tf.StridedSlice"(%510, %13, %514, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %516 = "tf.StridedSlice"(%510, %506, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %517:2 = "tf.RaggedRange"(%515, %516, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %518 = "tf.GatherV2"(%239, %517#1, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %519 = "tf.Cast"(%518) {Truncate = false, device = ""} : (tensor) -> tensor + %520 = "tf.BroadcastTo"(%519, %492) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %521 = "tf.Max"(%520, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %522 = "tf.Maximum"(%16, %521) {device = ""} : (tensor, tensor) -> tensor + %523 = "tf.Range"(%16, %522, %9) {device = ""} : (tensor, tensor, tensor) -> tensor + %524 = "tf.Pack"(%9, %522) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %525 = "tf.Tile"(%489, %524) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %526 = "tf.Shape"(%525) {device = ""} : (tensor) -> tensor<2xi32> + %527 = "tf.StridedSlice"(%526, %17, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %528 = "tf.Prod"(%527, %17) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %529 = "tf.Pack"(%528) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %530 = "tf.Shape"(%525) {device = ""} : (tensor) -> tensor<2xi32> + %531 = "tf.StridedSlice"(%530, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %532 = "tf.Shape"(%525) {device = ""} : (tensor) -> tensor<2xi32> + %533 = "tf.StridedSlice"(%532, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %534 = "tf.ConcatV2"(%531, %529, %533, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %535 = "tf.Reshape"(%525, %534) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %536 = "tf.ExpandDims"(%520, %2) {device = ""} : (tensor, tensor) -> tensor + %537 = "tf.Less"(%523, %536) {device = ""} : (tensor, tensor) -> tensor + %538 = "tf.Reshape"(%537, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %539 = "tf.Where"(%538) {device = ""} : (tensor) -> tensor + %540 = "tf.Squeeze"(%539) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %541 = "tf.GatherV2"(%535, %540, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %542 = "tf.Cast"(%518) {Truncate = false, device = ""} : (tensor) -> tensor + %543 = "tf.BroadcastTo"(%542, %497) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %544 = "tf.Max"(%543, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %545 = "tf.Maximum"(%16, %544) {device = ""} : (tensor, tensor) -> tensor + %546 = "tf.Range"(%16, %545, %9) {device = ""} : (tensor, tensor, tensor) -> tensor + %547 = "tf.Pack"(%9, %545) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %548 = "tf.Tile"(%494, %547) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %549 = "tf.Shape"(%548) {device = ""} : (tensor) -> tensor<2xi32> + %550 = "tf.StridedSlice"(%549, %17, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %551 = "tf.Prod"(%550, %17) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %552 = "tf.Pack"(%551) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %553 = "tf.Shape"(%548) {device = ""} : (tensor) -> tensor<2xi32> + %554 = "tf.StridedSlice"(%553, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %555 = "tf.Shape"(%548) {device = ""} : (tensor) -> tensor<2xi32> + %556 = "tf.StridedSlice"(%555, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %557 = "tf.ConcatV2"(%554, %552, %556, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %558 = "tf.Reshape"(%548, %557) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %559 = "tf.ExpandDims"(%543, %2) {device = ""} : (tensor, tensor) -> tensor + %560 = "tf.Less"(%546, %559) {device = ""} : (tensor, tensor) -> tensor + %561 = "tf.Reshape"(%560, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %562 = "tf.Where"(%561) {device = ""} : (tensor) -> tensor + %563 = "tf.Squeeze"(%562) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %564 = "tf.GatherV2"(%558, %563, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %565:2 = "tf.RaggedRange"(%541, %564, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %566 = "tf.GatherV2"(%482, %565#1, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %567 = "tf.If"(%408, %408, %404, %15) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_false_112940, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_true_112930} : (tensor, tensor, tensor, tensor) -> tensor + %568 = "tf.Identity"(%567) {device = ""} : (tensor) -> tensor + %569 = "tf.Select"(%1, %404, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %570 = "tf.Pack"(%569) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %571 = "tf.ConcatV2"(%0, %570, %14, %16) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> + %572 = "tf.StridedSlice"(%571, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %573 = "tf.Equal"(%572, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %574 = "tf.StridedSlice"(%571, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %575 = "tf.StridedSlice"(%571, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %576 = "tf.Equal"(%575, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %577 = "tf.If"(%576, %576, %575, %518) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_false_113430, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_true_113420} : (tensor, tensor, tensor, tensor) -> tensor + %578 = "tf.Identity"(%577) {device = ""} : (tensor) -> tensor + %579 = "tf.If"(%573, %573, %518, %574) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_false_113790, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_true_113780} : (tensor, tensor, tensor, tensor) -> tensor + %580 = "tf.Identity"(%579) {device = ""} : (tensor) -> tensor + %581 = "tf.If"(%444, %444, %116, %440) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_false_118470, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_true_118460} : (tensor, tensor, tensor, tensor) -> tensor + %582 = "tf.Identity"(%581) {device = ""} : (tensor) -> tensor + %583 = "tf.Equal"(%440, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %584 = "tf.Select"(%583, %116, %440) {device = ""} : (tensor, tensor, tensor) -> tensor + %585 = "tf.Equal"(%584, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %586 = "tf.LogicalOr"(%585, %119) {device = ""} : (tensor, tensor) -> tensor + %587 = "tf.Equal"(%118, %584) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %588 = "tf.LogicalOr"(%586, %587) {device = ""} : (tensor, tensor) -> tensor + %589 = "tf.Select"(%451, %584, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %590 = "tf.Pack"(%589, %15) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %591 = "tf.StridedSlice"(%590, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %592 = "tf.Cast"(%591) {Truncate = false, device = ""} : (tensor) -> tensor + %593 = "tf.Reshape"(%592, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %594 = "tf.Pack"(%9, %593) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %595 = "tf.Tile"(%452, %594) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %596 = "tf.Mul"(%593, %455) {device = ""} : (tensor, tensor) -> tensor + %597 = "tf.Pack"(%596) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %598 = "tf.ConcatV2"(%454, %597, %456, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %599 = "tf.Reshape"(%595, %598) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %600 = "tf.Shape"(%599) {device = ""} : (tensor) -> tensor<1xi64> + %601 = "tf.StridedSlice"(%600, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %602 = "tf.Pack"(%591) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %603 = "tf.StridedSlice"(%599, %602, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %604 = "tf.Sub"(%601, %591) {device = ""} : (tensor, tensor) -> tensor + %605 = "tf.Pack"(%604) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %606 = "tf.StridedSlice"(%599, %13, %605, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %607:2 = "tf.RaggedRange"(%606, %603, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %608 = "tf.Select"(%124, %584, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %609 = "tf.Pack"(%608, %15) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %610 = "tf.StridedSlice"(%609, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %611 = "tf.Cast"(%610) {Truncate = false, device = ""} : (tensor) -> tensor + %612 = "tf.Reshape"(%611, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %613 = "tf.Pack"(%9, %612) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %614 = "tf.Tile"(%106, %613) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %615 = "tf.Mul"(%612, %109) {device = ""} : (tensor, tensor) -> tensor + %616 = "tf.Pack"(%615) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %617 = "tf.ConcatV2"(%108, %616, %110, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %618 = "tf.Reshape"(%614, %617) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %619 = "tf.Shape"(%618) {device = ""} : (tensor) -> tensor<1xi64> + %620 = "tf.StridedSlice"(%619, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %621 = "tf.Pack"(%610) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %622 = "tf.StridedSlice"(%618, %621, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %623 = "tf.Sub"(%620, %610) {device = ""} : (tensor, tensor) -> tensor + %624 = "tf.Pack"(%623) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %625 = "tf.StridedSlice"(%618, %13, %624, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %626:2 = "tf.RaggedRange"(%625, %622, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %627 = "tf.StridedSlice"(%609, %17, %18, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %628 = "tf.StridedSlice"(%609, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %629 = "tf.Mul"(%113, %628) {device = ""} : (tensor, tensor) -> tensor + %630 = "tf.Tile"(%629, %627) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %631 = "tf.Cumsum"(%630, %16) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %632 = "tf.ConcatV2"(%13, %631, %2) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %633 = "tf.StridedSlice"(%632, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %634 = "tf.ExpandDims"(%633, %9) {device = ""} : (tensor, tensor) -> tensor + %635 = "tf.Shape"(%633) {device = ""} : (tensor) -> tensor<1xi32> + %636 = "tf.StridedSlice"(%635, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %637 = "tf.Pack"(%636) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %638 = "tf.StridedSlice"(%632, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %639 = "tf.ExpandDims"(%638, %9) {device = ""} : (tensor, tensor) -> tensor + %640 = "tf.Shape"(%638) {device = ""} : (tensor) -> tensor<1xi32> + %641 = "tf.StridedSlice"(%640, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %642 = "tf.Pack"(%641) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %643 = "tf.Equal"(%440, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %644 = "tf.Select"(%643, %584, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %645 = "tf.Cast"(%644) {Truncate = false, device = ""} : (tensor) -> tensor + %646 = "tf.Reshape"(%645, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %647 = "tf.Pack"(%9, %646) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %648 = "tf.Mul"(%646, %10) {device = ""} : (tensor, tensor) -> tensor + %649 = "tf.Pack"(%648) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %650 = "tf.ConcatV2"(%11, %649, %11, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %651 = "tf.Pack"(%644) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %652 = "tf.Pack"(%12, %440) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %653 = "tf.ExpandDims"(%652, %9) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> + %654 = "tf.Tile"(%653, %647) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %655 = "tf.Reshape"(%654, %650) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %656 = "tf.Shape"(%655) {device = ""} : (tensor) -> tensor<1xi64> + %657 = "tf.StridedSlice"(%656, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %658 = "tf.Sub"(%657, %644) {device = ""} : (tensor, tensor) -> tensor + %659 = "tf.Pack"(%658) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %660 = "tf.StridedSlice"(%655, %13, %659, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %661 = "tf.StridedSlice"(%655, %651, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %662:2 = "tf.RaggedRange"(%660, %661, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %663 = "tf.GatherV2"(%447, %662#1, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %664 = "tf.Cast"(%663) {Truncate = false, device = ""} : (tensor) -> tensor + %665 = "tf.BroadcastTo"(%664, %637) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %666 = "tf.Max"(%665, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %667 = "tf.Maximum"(%16, %666) {device = ""} : (tensor, tensor) -> tensor + %668 = "tf.Range"(%16, %667, %9) {device = ""} : (tensor, tensor, tensor) -> tensor + %669 = "tf.Pack"(%9, %667) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %670 = "tf.Tile"(%634, %669) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %671 = "tf.Shape"(%670) {device = ""} : (tensor) -> tensor<2xi32> + %672 = "tf.StridedSlice"(%671, %17, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %673 = "tf.Prod"(%672, %17) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %674 = "tf.Pack"(%673) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %675 = "tf.Shape"(%670) {device = ""} : (tensor) -> tensor<2xi32> + %676 = "tf.StridedSlice"(%675, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %677 = "tf.Shape"(%670) {device = ""} : (tensor) -> tensor<2xi32> + %678 = "tf.StridedSlice"(%677, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %679 = "tf.ConcatV2"(%676, %674, %678, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %680 = "tf.Reshape"(%670, %679) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %681 = "tf.ExpandDims"(%665, %2) {device = ""} : (tensor, tensor) -> tensor + %682 = "tf.Less"(%668, %681) {device = ""} : (tensor, tensor) -> tensor + %683 = "tf.Reshape"(%682, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %684 = "tf.Where"(%683) {device = ""} : (tensor) -> tensor + %685 = "tf.Squeeze"(%684) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %686 = "tf.GatherV2"(%680, %685, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %687 = "tf.Cast"(%663) {Truncate = false, device = ""} : (tensor) -> tensor + %688 = "tf.BroadcastTo"(%687, %642) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %689 = "tf.Max"(%688, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %690 = "tf.Maximum"(%16, %689) {device = ""} : (tensor, tensor) -> tensor + %691 = "tf.Range"(%16, %690, %9) {device = ""} : (tensor, tensor, tensor) -> tensor + %692 = "tf.Pack"(%9, %690) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %693 = "tf.Tile"(%639, %692) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %694 = "tf.Shape"(%693) {device = ""} : (tensor) -> tensor<2xi32> + %695 = "tf.StridedSlice"(%694, %17, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %696 = "tf.Prod"(%695, %17) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %697 = "tf.Pack"(%696) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %698 = "tf.Shape"(%693) {device = ""} : (tensor) -> tensor<2xi32> + %699 = "tf.StridedSlice"(%698, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %700 = "tf.Shape"(%693) {device = ""} : (tensor) -> tensor<2xi32> + %701 = "tf.StridedSlice"(%700, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %702 = "tf.ConcatV2"(%699, %697, %701, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %703 = "tf.Reshape"(%693, %702) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %704 = "tf.ExpandDims"(%688, %2) {device = ""} : (tensor, tensor) -> tensor + %705 = "tf.Less"(%691, %704) {device = ""} : (tensor, tensor) -> tensor + %706 = "tf.Reshape"(%705, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %707 = "tf.Where"(%706) {device = ""} : (tensor) -> tensor + %708 = "tf.Squeeze"(%707) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %709 = "tf.GatherV2"(%703, %708, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %710:2 = "tf.RaggedRange"(%686, %709, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %711 = "tf.If"(%588, %588, %584, %120) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_false_119540, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_true_119530} : (tensor, tensor, tensor, tensor) -> tensor + %712 = "tf.Identity"(%711) {device = ""} : (tensor) -> tensor + %713 = "tf.StridedSlice"(%115, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %714 = "tf.Equal"(%713, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %715 = "tf.Select"(%714, %584, %713) {device = ""} : (tensor, tensor, tensor) -> tensor + %716 = "tf.Pack"(%715) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %717 = "tf.StridedSlice"(%115, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> + %718 = "tf.StridedSlice"(%115, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %719 = "tf.ConcatV2"(%717, %716, %718, %16) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> + %720 = "tf.StridedSlice"(%719, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %721 = "tf.Equal"(%720, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %722 = "tf.StridedSlice"(%719, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %723 = "tf.StridedSlice"(%719, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %724 = "tf.Equal"(%723, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %725 = "tf.If"(%724, %724, %723, %663) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_false_120030, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_true_120020} : (tensor, tensor, tensor, tensor) -> tensor + %726 = "tf.Identity"(%725) {device = ""} : (tensor) -> tensor + %727 = "tf.If"(%721, %721, %663, %722) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_false_120390, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_true_120380} : (tensor, tensor, tensor, tensor) -> tensor + %728 = "tf.Identity"(%168) {device = ""} : (tensor) -> tensor + %729 = "tf.Identity"(%727) {device = ""} : (tensor) -> tensor + %730 = "tf.Identity"(%400) {device = ""} : (tensor) -> tensor + %731 = "tf.Shape"(%125#2) {device = ""} : (tensor) -> tensor<1xi32> + %732 = "tf.StridedSlice"(%731, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %733 = "tf.Cast"(%732) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> + %734 = "tf.Identity"(%733) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> + %735 = "tf.Shape"(%125#3) {device = ""} : (tensor) -> tensor<1xi32> + %736 = "tf.StridedSlice"(%735, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %737 = "tf.Cast"(%736) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> + %738 = "tf.Identity"(%737) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> + %739 = "tf.GatherV2"(%125#3, %428, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %740 = "tf.Tile"(%739, %432) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %741 = "tf.Sub"(%740, %566) {device = ""} : (tensor, tensor) -> tensor + %742 = "tf.Shape"(%741) {device = ""} : (tensor) -> tensor<1xi32> + %743 = "tf.StridedSlice"(%742, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %744 = "tf.Cast"(%743) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> + %745 = "tf.Identity"(%744) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> + %746 = "tf.UnicodeEncode"(%125#0, %146) {Tsplits = i64, device = "", errors = "replace", output_encoding = "UTF-8", replacement_char = 65533 : i64} : (tensor, tensor) -> tensor + %747 = "tf.Identity"(%746) {device = ""} : (tensor) -> tensor + %748 = "tf.StridedSlice"(%19, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %749 = "tf.AddV2"(%748, %15) {device = ""} : (tensor, tensor) -> tensor + %750 = "tf.Range"(%12, %749, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %751 = "tf.Mul"(%750, %15) {device = ""} : (tensor, tensor) -> tensor + %752 = "tf.Identity"(%751) {device = ""} : (tensor) -> tensor + return %747, %752, %728 : tensor, tensor, tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_false_99640(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Input tensors have incompatible shapes."> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedConcat/RaggedFromTensor/strided_slice_4:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedConcat/RaggedNRows/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_true_99630(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_100400(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_100390(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_100760(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_100750(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_101100(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_101090(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_101470(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_101460(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_101830(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_101820(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_102190(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RaggedNRows/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_102180(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_102540(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_102530(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_102900(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_102890(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_103240(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_103230(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_103610(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_103600(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_103970(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_103960(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_104310(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_104300(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_false_105110(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_true_105100(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_false_106180(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_true_106170(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_false_106670(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_true_106660(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_false_107030(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_true_107020(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_false_111870(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_true_111860(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_false_112940(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_true_112930(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_false_113430(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_true_113420(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_false_113790(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_true_113780(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_false_118470(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_true_118460(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_false_119540(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_true_119530(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_false_120030(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_true_120020(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_false_120390(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_true_120380(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} + + + +// CHECK: func @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf._input_shapes = [#tf.shape], tf.signature.is_stateful} { +// CHECK: %0:3 = "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor) -> (tensor, tensor, tensor) +// CHECK: return %0#0, %0#1, %0#2 : tensor, tensor, tensor + +func @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>], tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<[0, 1]> : tensor<2xi64>} : () -> tensor<2xi64> + %1 = "tf.Const"() {value = dense<[]> : tensor<0xi64>} : () -> tensor<0xi64> + %2 = "tf.Const"() {value = dense : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<-1> : tensor} : () -> tensor + %4 = "tf.Const"() {value = dense<[[0], [1]]> : tensor<2x1xi64>} : () -> tensor<2x1xi64> + %5 = "tf.Const"() {value = dense<-1> : tensor<1xi32>} : () -> tensor<1xi32> + %6 = "tf.Const"() {value = dense<2> : tensor<1xi32>} : () -> tensor<1xi32> + %7 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %8 = "tf.Const"() {value = dense<2> : tensor} : () -> tensor + %9 = "tf.Const"() {value = dense<[]> : tensor<0xi32>} : () -> tensor<0xi32> + %10 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %11 = "tf.Const"() {value = dense<0> : tensor<1xi64>} : () -> tensor<1xi64> + %12 = "tf.Const"() {value = dense<1> : tensor<1xi64>} : () -> tensor<1xi64> + %13 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %14 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %15 = "tf.Const"() {value = dense<0> : tensor<1xi32>} : () -> tensor<1xi32> + %16 = "tf.Const"() {value = dense<1> : tensor<1xi32>} : () -> tensor<1xi32> + %17 = "tf.If"(%2, %2, %13, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_false_3220, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_true_3210} : (tensor, tensor, tensor, tensor) -> tensor + %18 = "tf.Identity"(%17) {device = ""} : (tensor) -> tensor + %19 = "tf.Pack"(%arg0) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1x!tf.string> + %20 = "tf.StringLength"(%19) {device = "", unit = "BYTE"} : (tensor<1x!tf.string>) -> tensor<1xi32> + %21 = "tf.ExpandDims"(%20, %7) {device = ""} : (tensor<1xi32>, tensor) -> tensor<1x1xi32> + %22 = "tf.Cast"(%21) {Truncate = false, device = ""} : (tensor<1x1xi32>) -> tensor<1x1xi64> + %23 = "tf.Reshape"(%22, %12) {device = ""} : (tensor<1x1xi64>, tensor<1xi64>) -> tensor<1xi64> + %24 = "tf.Reshape"(%19, %5) {device = ""} : (tensor<1x!tf.string>, tensor<1xi32>) -> tensor<1x!tf.string> + %25:3 = "tf.UnicodeDecodeWithOffsets"(%24) {Tsplits = i64, device = "", errors = "replace", input_encoding = "UTF-8", replace_control_characters = false, replacement_char = 65533 : i64} : (tensor<1x!tf.string>) -> (tensor<2xi64>, tensor, tensor) + %26 = "tf.StridedSlice"(%25#0, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %27 = "tf.AddV2"(%26, %13) {device = ""} : (tensor<1xi64>, tensor) -> tensor<1xi64> + %28 = "tf.StridedSlice"(%25#0, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %29 = "tf.Minimum"(%27, %28) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor<1xi64> + %30:2 = "tf.RaggedRange"(%29, %28, %13) {T = i64, Tsplits = i64, device = ""} : (tensor<1xi64>, tensor<1xi64>, tensor) -> (tensor<2xi64>, tensor) + %31 = "tf.StridedSlice"(%30#0, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %32 = "tf.AddV2"(%31, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> + %33 = "tf.ConcatV2"(%30#0, %32, %14) {device = ""} : (tensor<2xi64>, tensor<1xi64>, tensor) -> tensor<3xi64> + %34 = "tf.GatherV2"(%25#2, %30#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %35 = "tf.ConcatV2"(%34, %23, %14) {device = ""} : (tensor, tensor<1xi64>, tensor) -> tensor + %36:2 = "tf.RaggedGather"(%33, %35, %0) {OUTPUT_RAGGED_RANK = 1 : i64, PARAMS_RAGGED_RANK = 1 : i64, Tindices = i64, Tsplits = i64, Tvalues = i64, device = ""} : (tensor<3xi64>, tensor, tensor<2xi64>) -> (tensor, tensor) + %37:5 = "tf.WhitespaceTokenizeWithOffsets"(%25#1, %25#0) {Tsplits = i64, device = ""} : (tensor, tensor<2xi64>) -> (tensor, tensor, tensor, tensor, tensor) + %38 = "tf.StridedSlice"(%37#1, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %39 = "tf.Equal"(%38, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %40 = "tf.All"(%39, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %41 = "tf.If"(%40, %40, %38, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_3980, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_3970} : (tensor, tensor, tensor, tensor) -> tensor + %42 = "tf.Identity"(%41) {device = ""} : (tensor) -> tensor + %43 = "tf.StridedSlice"(%37#1, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %44 = "tf.StridedSlice"(%37#1, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %45 = "tf.Sub"(%43, %44) {device = ""} : (tensor, tensor) -> tensor + %46 = "tf.LessEqual"(%10, %45) {device = ""} : (tensor, tensor) -> tensor + %47 = "tf.All"(%46, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %48 = "tf.If"(%47, %47, %45) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_4340, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_4330} : (tensor, tensor, tensor) -> tensor + %49 = "tf.Identity"(%48) {device = ""} : (tensor) -> tensor + %50 = "tf.Identity"(%37#1) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %51 = "tf.StridedSlice"(%50, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %52 = "tf.Shape"(%37#0) {device = ""} : (tensor) -> tensor<1xi64> + %53 = "tf.StridedSlice"(%52, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %54 = "tf.Equal"(%51, %53) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %55 = "tf.All"(%54, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %56 = "tf.If"(%55, %55, %51, %53) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_4680, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_4670} : (tensor, tensor, tensor, tensor) -> tensor + %57 = "tf.Identity"(%56) {device = ""} : (tensor) -> tensor + %58 = "tf.Identity"(%50) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %59 = "tf.Shape"(%58) {device = ""} : (tensor) -> tensor<1xi64> + %60 = "tf.StridedSlice"(%59, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %61 = "tf.Sub"(%60, %13) {device = ""} : (tensor, tensor) -> tensor + %62 = "tf.StridedSlice"(%37#4, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %63 = "tf.Equal"(%62, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %64 = "tf.All"(%63, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %65 = "tf.If"(%64, %64, %62, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_5050, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_5040} : (tensor, tensor, tensor, tensor) -> tensor + %66 = "tf.Identity"(%65) {device = ""} : (tensor) -> tensor + %67 = "tf.StridedSlice"(%37#4, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %68 = "tf.StridedSlice"(%37#4, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %69 = "tf.Sub"(%67, %68) {device = ""} : (tensor, tensor) -> tensor + %70 = "tf.LessEqual"(%10, %69) {device = ""} : (tensor, tensor) -> tensor + %71 = "tf.All"(%70, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %72 = "tf.If"(%71, %71, %69) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_5410, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_5400} : (tensor, tensor, tensor) -> tensor + %73 = "tf.Identity"(%72) {device = ""} : (tensor) -> tensor + %74 = "tf.Identity"(%37#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %75 = "tf.StridedSlice"(%74, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %76 = "tf.Equal"(%75, %61) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %77 = "tf.All"(%76, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %78 = "tf.If"(%77, %77, %75, %61) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_5770, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_5760} : (tensor, tensor, tensor, tensor) -> tensor + %79 = "tf.Identity"(%78) {device = ""} : (tensor) -> tensor + %80 = "tf.Identity"(%74) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %81 = "tf.StridedSlice"(%37#4, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %82 = "tf.Equal"(%81, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %83 = "tf.All"(%82, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %84 = "tf.If"(%83, %83, %81, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6120, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6110} : (tensor, tensor, tensor, tensor) -> tensor + %85 = "tf.Identity"(%84) {device = ""} : (tensor) -> tensor + %86 = "tf.StridedSlice"(%37#4, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %87 = "tf.StridedSlice"(%37#4, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %88 = "tf.Sub"(%86, %87) {device = ""} : (tensor, tensor) -> tensor + %89 = "tf.LessEqual"(%10, %88) {device = ""} : (tensor, tensor) -> tensor + %90 = "tf.All"(%89, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %91 = "tf.If"(%90, %90, %88) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_6480, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_6470} : (tensor, tensor, tensor) -> tensor + %92 = "tf.Identity"(%91) {device = ""} : (tensor) -> tensor + %93 = "tf.Identity"(%37#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %94 = "tf.StridedSlice"(%93, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %95 = "tf.Shape"(%37#2) {device = ""} : (tensor) -> tensor<1xi64> + %96 = "tf.StridedSlice"(%95, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %97 = "tf.Equal"(%94, %96) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %98 = "tf.All"(%97, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %99 = "tf.If"(%98, %98, %94, %96) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6820, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6810} : (tensor, tensor, tensor, tensor) -> tensor + %100 = "tf.Identity"(%99) {device = ""} : (tensor) -> tensor + %101 = "tf.Identity"(%93) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %102 = "tf.Shape"(%101) {device = ""} : (tensor) -> tensor<1xi64> + %103 = "tf.StridedSlice"(%102, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %104 = "tf.Sub"(%103, %13) {device = ""} : (tensor, tensor) -> tensor + %105 = "tf.Equal"(%104, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %106 = "tf.LogicalOr"(%105, %2) {device = ""} : (tensor, tensor) -> tensor + %107 = "tf.Equal"(%104, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %108 = "tf.LogicalOr"(%106, %107) {device = ""} : (tensor, tensor) -> tensor + %109 = "tf.StridedSlice"(%101, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %110 = "tf.StridedSlice"(%101, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %111 = "tf.Sub"(%109, %110) {device = ""} : (tensor, tensor) -> tensor + %112 = "tf.Shape"(%101) {device = ""} : (tensor) -> tensor<1xi64> + %113 = "tf.StridedSlice"(%112, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %114 = "tf.Sub"(%113, %13) {device = ""} : (tensor, tensor) -> tensor + %115 = "tf.Equal"(%114, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %116 = "tf.ExpandDims"(%101, %7) {device = ""} : (tensor, tensor) -> tensor + %117 = "tf.Shape"(%101) {device = ""} : (tensor) -> tensor<1xi32> + %118 = "tf.StridedSlice"(%117, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %119 = "tf.StridedSlice"(%117, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %120 = "tf.StridedSlice"(%117, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %121 = "tf.StridedSlice"(%37#4, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %122 = "tf.Equal"(%121, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %123 = "tf.All"(%122, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %124 = "tf.If"(%123, %123, %121, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7190, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7180} : (tensor, tensor, tensor, tensor) -> tensor + %125 = "tf.Identity"(%124) {device = ""} : (tensor) -> tensor + %126 = "tf.StridedSlice"(%37#4, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %127 = "tf.StridedSlice"(%37#4, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %128 = "tf.Sub"(%126, %127) {device = ""} : (tensor, tensor) -> tensor + %129 = "tf.LessEqual"(%10, %128) {device = ""} : (tensor, tensor) -> tensor + %130 = "tf.All"(%129, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %131 = "tf.If"(%130, %130, %128) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_7550, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_7540} : (tensor, tensor, tensor) -> tensor + %132 = "tf.Identity"(%131) {device = ""} : (tensor) -> tensor + %133 = "tf.Identity"(%37#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %134 = "tf.StridedSlice"(%133, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %135 = "tf.Shape"(%37#3) {device = ""} : (tensor) -> tensor<1xi64> + %136 = "tf.StridedSlice"(%135, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %137 = "tf.Equal"(%134, %136) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %138 = "tf.All"(%137, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %139 = "tf.If"(%138, %138, %134, %136) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7890, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7880} : (tensor, tensor, tensor, tensor) -> tensor + %140 = "tf.Identity"(%139) {device = ""} : (tensor) -> tensor + %141 = "tf.Identity"(%133) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %142 = "tf.Shape"(%141) {device = ""} : (tensor) -> tensor<1xi64> + %143 = "tf.StridedSlice"(%142, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %144 = "tf.Sub"(%143, %13) {device = ""} : (tensor, tensor) -> tensor + %145 = "tf.Equal"(%144, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %146 = "tf.LogicalOr"(%145, %2) {device = ""} : (tensor, tensor) -> tensor + %147 = "tf.Equal"(%144, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %148 = "tf.LogicalOr"(%146, %147) {device = ""} : (tensor, tensor) -> tensor + %149 = "tf.StridedSlice"(%141, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %150 = "tf.StridedSlice"(%141, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %151 = "tf.Sub"(%149, %150) {device = ""} : (tensor, tensor) -> tensor + %152 = "tf.Shape"(%141) {device = ""} : (tensor) -> tensor<1xi64> + %153 = "tf.StridedSlice"(%152, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %154 = "tf.Sub"(%153, %13) {device = ""} : (tensor, tensor) -> tensor + %155 = "tf.Equal"(%154, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %156 = "tf.ExpandDims"(%141, %7) {device = ""} : (tensor, tensor) -> tensor + %157 = "tf.Shape"(%141) {device = ""} : (tensor) -> tensor<1xi32> + %158 = "tf.StridedSlice"(%157, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %159 = "tf.StridedSlice"(%157, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %160 = "tf.StridedSlice"(%157, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %161 = "tf.StridedSlice"(%141, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %162 = "tf.Range"(%10, %161, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %163 = "tf.StridedSlice"(%141, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %164 = "tf.StridedSlice"(%141, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %165 = "tf.Sub"(%163, %164) {device = ""} : (tensor, tensor) -> tensor + %166 = "tf.If"(%108, %108, %13, %104) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_false_8690, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_true_8680} : (tensor, tensor, tensor, tensor) -> tensor + %167 = "tf.Identity"(%166) {device = ""} : (tensor) -> tensor + %168 = "tf.Equal"(%104, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %169 = "tf.Select"(%168, %13, %104) {device = ""} : (tensor, tensor, tensor) -> tensor + %170 = "tf.Equal"(%169, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %171 = "tf.LogicalOr"(%170, %2) {device = ""} : (tensor, tensor) -> tensor + %172 = "tf.Equal"(%169, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %173 = "tf.LogicalOr"(%171, %172) {device = ""} : (tensor, tensor) -> tensor + %174 = "tf.Select"(%115, %169, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %175 = "tf.Pack"(%174, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %176 = "tf.StridedSlice"(%175, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %177 = "tf.Cast"(%176) {Truncate = false, device = ""} : (tensor) -> tensor + %178 = "tf.Reshape"(%177, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %179 = "tf.Pack"(%7, %178) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %180 = "tf.Tile"(%116, %179) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %181 = "tf.Mul"(%178, %119) {device = ""} : (tensor, tensor) -> tensor + %182 = "tf.Pack"(%181) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %183 = "tf.ConcatV2"(%118, %182, %120, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %184 = "tf.Reshape"(%180, %183) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %185 = "tf.Shape"(%184) {device = ""} : (tensor) -> tensor<1xi64> + %186 = "tf.StridedSlice"(%185, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %187 = "tf.Pack"(%176) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %188 = "tf.StridedSlice"(%184, %187, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %189 = "tf.Sub"(%186, %176) {device = ""} : (tensor, tensor) -> tensor + %190 = "tf.Pack"(%189) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %191 = "tf.StridedSlice"(%184, %11, %190, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %192:2 = "tf.RaggedRange"(%191, %188, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %193 = "tf.Select"(%2, %169, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %194 = "tf.Pack"(%193, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %195 = "tf.StridedSlice"(%194, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %196 = "tf.Cast"(%195) {Truncate = false, device = ""} : (tensor) -> tensor + %197 = "tf.Reshape"(%196, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %198 = "tf.Pack"(%7, %197) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %199 = "tf.Tile"(%4, %198) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %200 = "tf.Mul"(%197, %8) {device = ""} : (tensor, tensor) -> tensor + %201 = "tf.Pack"(%200) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %202 = "tf.ConcatV2"(%9, %201, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %203 = "tf.Reshape"(%199, %202) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %204 = "tf.Shape"(%203) {device = ""} : (tensor) -> tensor<1xi64> + %205 = "tf.StridedSlice"(%204, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %206 = "tf.Pack"(%195) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %207 = "tf.StridedSlice"(%203, %206, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %208 = "tf.Sub"(%205, %195) {device = ""} : (tensor, tensor) -> tensor + %209 = "tf.Pack"(%208) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %210 = "tf.StridedSlice"(%203, %11, %209, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %211:2 = "tf.RaggedRange"(%210, %207, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %212 = "tf.StridedSlice"(%194, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %213 = "tf.StridedSlice"(%194, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %214 = "tf.Mul"(%213, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> + %215 = "tf.Tile"(%214, %212) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor + %216 = "tf.Cumsum"(%215, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %217 = "tf.ConcatV2"(%11, %216, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %218 = "tf.StridedSlice"(%217, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %219 = "tf.ExpandDims"(%218, %7) {device = ""} : (tensor, tensor) -> tensor + %220 = "tf.Shape"(%218) {device = ""} : (tensor) -> tensor<1xi32> + %221 = "tf.StridedSlice"(%220, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %222 = "tf.Pack"(%221) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %223 = "tf.StridedSlice"(%217, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %224 = "tf.ExpandDims"(%223, %7) {device = ""} : (tensor, tensor) -> tensor + %225 = "tf.Shape"(%223) {device = ""} : (tensor) -> tensor<1xi32> + %226 = "tf.StridedSlice"(%225, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %227 = "tf.Pack"(%226) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %228 = "tf.Equal"(%104, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %229 = "tf.Select"(%228, %169, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %230 = "tf.Cast"(%229) {Truncate = false, device = ""} : (tensor) -> tensor + %231 = "tf.Reshape"(%230, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %232 = "tf.Pack"(%7, %231) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %233 = "tf.Mul"(%231, %8) {device = ""} : (tensor, tensor) -> tensor + %234 = "tf.Pack"(%233) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %235 = "tf.ConcatV2"(%9, %234, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %236 = "tf.Pack"(%229) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %237 = "tf.Pack"(%10, %104) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %238 = "tf.ExpandDims"(%237, %7) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> + %239 = "tf.Tile"(%238, %232) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %240 = "tf.Reshape"(%239, %235) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %241 = "tf.Shape"(%240) {device = ""} : (tensor) -> tensor<1xi64> + %242 = "tf.StridedSlice"(%241, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %243 = "tf.Sub"(%242, %229) {device = ""} : (tensor, tensor) -> tensor + %244 = "tf.Pack"(%243) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %245 = "tf.StridedSlice"(%240, %11, %244, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %246 = "tf.StridedSlice"(%240, %236, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %247:2 = "tf.RaggedRange"(%245, %246, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %248 = "tf.GatherV2"(%111, %247#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %249 = "tf.Cast"(%248) {Truncate = false, device = ""} : (tensor) -> tensor + %250 = "tf.BroadcastTo"(%249, %222) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %251 = "tf.Max"(%250, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %252 = "tf.Maximum"(%14, %251) {device = ""} : (tensor, tensor) -> tensor + %253 = "tf.Range"(%14, %252, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %254 = "tf.Pack"(%7, %252) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %255 = "tf.Tile"(%219, %254) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %256 = "tf.Shape"(%255) {device = ""} : (tensor) -> tensor<2xi32> + %257 = "tf.StridedSlice"(%256, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %258 = "tf.Prod"(%257, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %259 = "tf.Pack"(%258) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %260 = "tf.Shape"(%255) {device = ""} : (tensor) -> tensor<2xi32> + %261 = "tf.StridedSlice"(%260, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %262 = "tf.Shape"(%255) {device = ""} : (tensor) -> tensor<2xi32> + %263 = "tf.StridedSlice"(%262, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %264 = "tf.ConcatV2"(%261, %259, %263, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %265 = "tf.Reshape"(%255, %264) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %266 = "tf.ExpandDims"(%250, %3) {device = ""} : (tensor, tensor) -> tensor + %267 = "tf.Less"(%253, %266) {device = ""} : (tensor, tensor) -> tensor + %268 = "tf.Reshape"(%267, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %269 = "tf.Where"(%268) {device = ""} : (tensor) -> tensor + %270 = "tf.Squeeze"(%269) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %271 = "tf.GatherV2"(%265, %270, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %272 = "tf.Cast"(%248) {Truncate = false, device = ""} : (tensor) -> tensor + %273 = "tf.BroadcastTo"(%272, %227) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %274 = "tf.Max"(%273, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %275 = "tf.Maximum"(%14, %274) {device = ""} : (tensor, tensor) -> tensor + %276 = "tf.Range"(%14, %275, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %277 = "tf.Pack"(%7, %275) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %278 = "tf.Tile"(%224, %277) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %279 = "tf.Shape"(%278) {device = ""} : (tensor) -> tensor<2xi32> + %280 = "tf.StridedSlice"(%279, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %281 = "tf.Prod"(%280, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %282 = "tf.Pack"(%281) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %283 = "tf.Shape"(%278) {device = ""} : (tensor) -> tensor<2xi32> + %284 = "tf.StridedSlice"(%283, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %285 = "tf.Shape"(%278) {device = ""} : (tensor) -> tensor<2xi32> + %286 = "tf.StridedSlice"(%285, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %287 = "tf.ConcatV2"(%284, %282, %286, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %288 = "tf.Reshape"(%278, %287) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %289 = "tf.ExpandDims"(%273, %3) {device = ""} : (tensor, tensor) -> tensor + %290 = "tf.Less"(%276, %289) {device = ""} : (tensor, tensor) -> tensor + %291 = "tf.Reshape"(%290, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %292 = "tf.Where"(%291) {device = ""} : (tensor) -> tensor + %293 = "tf.Squeeze"(%292) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %294 = "tf.GatherV2"(%288, %293, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %295:2 = "tf.RaggedRange"(%271, %294, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %296 = "tf.If"(%173, %173, %169, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_false_9760, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_true_9750} : (tensor, tensor, tensor, tensor) -> tensor + %297 = "tf.Identity"(%296) {device = ""} : (tensor) -> tensor + %298 = "tf.Select"(%2, %169, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %299 = "tf.Pack"(%298) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %300 = "tf.ConcatV2"(%1, %299, %12, %14) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> + %301 = "tf.StridedSlice"(%300, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %302 = "tf.Equal"(%301, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %303 = "tf.StridedSlice"(%300, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %304 = "tf.StridedSlice"(%300, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %305 = "tf.Equal"(%304, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %306 = "tf.If"(%305, %305, %304, %248) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_false_10250, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_true_10240} : (tensor, tensor, tensor, tensor) -> tensor + %307 = "tf.Identity"(%306) {device = ""} : (tensor) -> tensor + %308 = "tf.If"(%302, %302, %248, %303) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_false_10610, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_true_10600} : (tensor, tensor, tensor, tensor) -> tensor + %309 = "tf.If"(%148, %148, %13, %144) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_false_15310, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_true_15300} : (tensor, tensor, tensor, tensor) -> tensor + %310 = "tf.Identity"(%309) {device = ""} : (tensor) -> tensor + %311 = "tf.Equal"(%144, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %312 = "tf.Select"(%311, %13, %144) {device = ""} : (tensor, tensor, tensor) -> tensor + %313 = "tf.Equal"(%312, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %314 = "tf.LogicalOr"(%313, %2) {device = ""} : (tensor, tensor) -> tensor + %315 = "tf.Equal"(%312, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %316 = "tf.LogicalOr"(%314, %315) {device = ""} : (tensor, tensor) -> tensor + %317 = "tf.Select"(%155, %312, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %318 = "tf.Pack"(%317, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %319 = "tf.StridedSlice"(%318, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %320 = "tf.Cast"(%319) {Truncate = false, device = ""} : (tensor) -> tensor + %321 = "tf.Reshape"(%320, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %322 = "tf.Pack"(%7, %321) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %323 = "tf.Tile"(%156, %322) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %324 = "tf.Mul"(%321, %159) {device = ""} : (tensor, tensor) -> tensor + %325 = "tf.Pack"(%324) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %326 = "tf.ConcatV2"(%158, %325, %160, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %327 = "tf.Reshape"(%323, %326) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %328 = "tf.Shape"(%327) {device = ""} : (tensor) -> tensor<1xi64> + %329 = "tf.StridedSlice"(%328, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %330 = "tf.Pack"(%319) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %331 = "tf.StridedSlice"(%327, %330, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %332 = "tf.Sub"(%329, %319) {device = ""} : (tensor, tensor) -> tensor + %333 = "tf.Pack"(%332) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %334 = "tf.StridedSlice"(%327, %11, %333, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %335:2 = "tf.RaggedRange"(%334, %331, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %336 = "tf.GatherV2"(%162, %335#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %337 = "tf.StridedSlice"(%318, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %338 = "tf.StridedSlice"(%318, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %339 = "tf.StridedSlice"(%318, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> + %340 = "tf.ConcatV2"(%338, %339, %14) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> + %341 = "tf.StridedSlice"(%318, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %342 = "tf.Mul"(%165, %341) {device = ""} : (tensor, tensor) -> tensor + %343 = "tf.Tile"(%342, %337) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %344 = "tf.Cumsum"(%343, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %345 = "tf.ConcatV2"(%11, %344, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %346 = "tf.Shape"(%345) {device = ""} : (tensor) -> tensor<1xi64> + %347 = "tf.StridedSlice"(%346, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %348 = "tf.Sub"(%347, %13) {device = ""} : (tensor, tensor) -> tensor + %349 = "tf.Equal"(%348, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %350 = "tf.LogicalOr"(%349, %2) {device = ""} : (tensor, tensor) -> tensor + %351 = "tf.Equal"(%348, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %352 = "tf.LogicalOr"(%350, %351) {device = ""} : (tensor, tensor) -> tensor + %353 = "tf.StridedSlice"(%345, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %354 = "tf.StridedSlice"(%345, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %355 = "tf.Sub"(%353, %354) {device = ""} : (tensor, tensor) -> tensor + %356 = "tf.Shape"(%345) {device = ""} : (tensor) -> tensor<1xi64> + %357 = "tf.StridedSlice"(%356, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %358 = "tf.Sub"(%357, %13) {device = ""} : (tensor, tensor) -> tensor + %359 = "tf.Equal"(%358, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %360 = "tf.ExpandDims"(%345, %7) {device = ""} : (tensor, tensor) -> tensor + %361 = "tf.Shape"(%345) {device = ""} : (tensor) -> tensor<1xi32> + %362 = "tf.StridedSlice"(%361, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %363 = "tf.StridedSlice"(%361, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %364 = "tf.StridedSlice"(%361, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %365 = "tf.Select"(%2, %312, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %366 = "tf.Pack"(%365, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %367 = "tf.StridedSlice"(%366, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %368 = "tf.Cast"(%367) {Truncate = false, device = ""} : (tensor) -> tensor + %369 = "tf.Reshape"(%368, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %370 = "tf.Pack"(%7, %369) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %371 = "tf.Tile"(%4, %370) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %372 = "tf.Mul"(%369, %8) {device = ""} : (tensor, tensor) -> tensor + %373 = "tf.Pack"(%372) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %374 = "tf.ConcatV2"(%9, %373, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %375 = "tf.Reshape"(%371, %374) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %376 = "tf.Shape"(%375) {device = ""} : (tensor) -> tensor<1xi64> + %377 = "tf.StridedSlice"(%376, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %378 = "tf.Pack"(%367) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %379 = "tf.StridedSlice"(%375, %378, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %380 = "tf.Sub"(%377, %367) {device = ""} : (tensor, tensor) -> tensor + %381 = "tf.Pack"(%380) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %382 = "tf.StridedSlice"(%375, %11, %381, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %383:2 = "tf.RaggedRange"(%382, %379, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %384 = "tf.GatherV2"(%11, %383#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %385 = "tf.GatherV2"(%12, %384, %14) {batch_dims = 0 : i64, device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %386 = "tf.StridedSlice"(%366, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %387 = "tf.StridedSlice"(%366, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %388 = "tf.StridedSlice"(%366, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> + %389 = "tf.ConcatV2"(%387, %388, %14) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> + %390 = "tf.Tile"(%385, %389) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %391 = "tf.StridedSlice"(%366, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %392 = "tf.Mul"(%391, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> + %393 = "tf.Tile"(%392, %386) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor + %394 = "tf.Cumsum"(%393, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %395 = "tf.ConcatV2"(%11, %394, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %396 = "tf.StridedSlice"(%395, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %397 = "tf.ExpandDims"(%396, %7) {device = ""} : (tensor, tensor) -> tensor + %398 = "tf.Shape"(%396) {device = ""} : (tensor) -> tensor<1xi32> + %399 = "tf.StridedSlice"(%398, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %400 = "tf.Pack"(%399) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %401 = "tf.StridedSlice"(%395, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %402 = "tf.ExpandDims"(%401, %7) {device = ""} : (tensor, tensor) -> tensor + %403 = "tf.Shape"(%401) {device = ""} : (tensor) -> tensor<1xi32> + %404 = "tf.StridedSlice"(%403, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %405 = "tf.Pack"(%404) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %406 = "tf.Equal"(%144, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %407 = "tf.Select"(%406, %312, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %408 = "tf.Cast"(%407) {Truncate = false, device = ""} : (tensor) -> tensor + %409 = "tf.Reshape"(%408, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %410 = "tf.Pack"(%7, %409) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %411 = "tf.Mul"(%409, %8) {device = ""} : (tensor, tensor) -> tensor + %412 = "tf.Pack"(%411) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %413 = "tf.ConcatV2"(%9, %412, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %414 = "tf.Pack"(%407) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %415 = "tf.Pack"(%10, %144) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %416 = "tf.ExpandDims"(%415, %7) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> + %417 = "tf.Tile"(%416, %410) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %418 = "tf.Reshape"(%417, %413) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %419 = "tf.Shape"(%418) {device = ""} : (tensor) -> tensor<1xi64> + %420 = "tf.StridedSlice"(%419, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %421 = "tf.Sub"(%420, %407) {device = ""} : (tensor, tensor) -> tensor + %422 = "tf.Pack"(%421) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %423 = "tf.StridedSlice"(%418, %11, %422, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %424 = "tf.StridedSlice"(%418, %414, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %425:2 = "tf.RaggedRange"(%423, %424, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %426 = "tf.GatherV2"(%151, %425#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %427 = "tf.Cast"(%426) {Truncate = false, device = ""} : (tensor) -> tensor + %428 = "tf.BroadcastTo"(%427, %400) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %429 = "tf.Max"(%428, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %430 = "tf.Maximum"(%14, %429) {device = ""} : (tensor, tensor) -> tensor + %431 = "tf.Range"(%14, %430, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %432 = "tf.Pack"(%7, %430) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %433 = "tf.Tile"(%397, %432) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %434 = "tf.Shape"(%433) {device = ""} : (tensor) -> tensor<2xi32> + %435 = "tf.StridedSlice"(%434, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %436 = "tf.Prod"(%435, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %437 = "tf.Pack"(%436) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %438 = "tf.Shape"(%433) {device = ""} : (tensor) -> tensor<2xi32> + %439 = "tf.StridedSlice"(%438, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %440 = "tf.Shape"(%433) {device = ""} : (tensor) -> tensor<2xi32> + %441 = "tf.StridedSlice"(%440, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %442 = "tf.ConcatV2"(%439, %437, %441, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %443 = "tf.Reshape"(%433, %442) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %444 = "tf.ExpandDims"(%428, %3) {device = ""} : (tensor, tensor) -> tensor + %445 = "tf.Less"(%431, %444) {device = ""} : (tensor, tensor) -> tensor + %446 = "tf.Reshape"(%445, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %447 = "tf.Where"(%446) {device = ""} : (tensor) -> tensor + %448 = "tf.Squeeze"(%447) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %449 = "tf.GatherV2"(%443, %448, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %450 = "tf.Cast"(%426) {Truncate = false, device = ""} : (tensor) -> tensor + %451 = "tf.BroadcastTo"(%450, %405) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %452 = "tf.Max"(%451, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %453 = "tf.Maximum"(%14, %452) {device = ""} : (tensor, tensor) -> tensor + %454 = "tf.Range"(%14, %453, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %455 = "tf.Pack"(%7, %453) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %456 = "tf.Tile"(%402, %455) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %457 = "tf.Shape"(%456) {device = ""} : (tensor) -> tensor<2xi32> + %458 = "tf.StridedSlice"(%457, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %459 = "tf.Prod"(%458, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %460 = "tf.Pack"(%459) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %461 = "tf.Shape"(%456) {device = ""} : (tensor) -> tensor<2xi32> + %462 = "tf.StridedSlice"(%461, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %463 = "tf.Shape"(%456) {device = ""} : (tensor) -> tensor<2xi32> + %464 = "tf.StridedSlice"(%463, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %465 = "tf.ConcatV2"(%462, %460, %464, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %466 = "tf.Reshape"(%456, %465) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %467 = "tf.ExpandDims"(%451, %3) {device = ""} : (tensor, tensor) -> tensor + %468 = "tf.Less"(%454, %467) {device = ""} : (tensor, tensor) -> tensor + %469 = "tf.Reshape"(%468, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %470 = "tf.Where"(%469) {device = ""} : (tensor) -> tensor + %471 = "tf.Squeeze"(%470) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %472 = "tf.GatherV2"(%466, %471, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %473:2 = "tf.RaggedRange"(%449, %472, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %474 = "tf.GatherV2"(%390, %473#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %475 = "tf.If"(%316, %316, %312, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_false_16380, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_true_16370} : (tensor, tensor, tensor, tensor) -> tensor + %476 = "tf.Identity"(%475) {device = ""} : (tensor) -> tensor + %477 = "tf.Select"(%2, %312, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %478 = "tf.Pack"(%477) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %479 = "tf.ConcatV2"(%1, %478, %12, %14) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> + %480 = "tf.StridedSlice"(%479, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %481 = "tf.Equal"(%480, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %482 = "tf.StridedSlice"(%479, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %483 = "tf.StridedSlice"(%479, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %484 = "tf.Equal"(%483, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %485 = "tf.If"(%484, %484, %483, %426) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_false_16870, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_true_16860} : (tensor, tensor, tensor, tensor) -> tensor + %486 = "tf.Identity"(%485) {device = ""} : (tensor) -> tensor + %487 = "tf.If"(%481, %481, %426, %482) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_false_17230, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_true_17220} : (tensor, tensor, tensor, tensor) -> tensor + %488 = "tf.Identity"(%487) {device = ""} : (tensor) -> tensor + %489 = "tf.If"(%352, %352, %13, %348) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_false_21910, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_true_21900} : (tensor, tensor, tensor, tensor) -> tensor + %490 = "tf.Identity"(%489) {device = ""} : (tensor) -> tensor + %491 = "tf.Equal"(%348, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %492 = "tf.Select"(%491, %13, %348) {device = ""} : (tensor, tensor, tensor) -> tensor + %493 = "tf.Equal"(%492, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %494 = "tf.LogicalOr"(%493, %2) {device = ""} : (tensor, tensor) -> tensor + %495 = "tf.Equal"(%492, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %496 = "tf.LogicalOr"(%494, %495) {device = ""} : (tensor, tensor) -> tensor + %497 = "tf.Select"(%359, %492, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %498 = "tf.Pack"(%497, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %499 = "tf.StridedSlice"(%498, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %500 = "tf.Cast"(%499) {Truncate = false, device = ""} : (tensor) -> tensor + %501 = "tf.Reshape"(%500, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %502 = "tf.Pack"(%7, %501) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %503 = "tf.Tile"(%360, %502) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %504 = "tf.Mul"(%501, %363) {device = ""} : (tensor, tensor) -> tensor + %505 = "tf.Pack"(%504) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %506 = "tf.ConcatV2"(%362, %505, %364, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %507 = "tf.Reshape"(%503, %506) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %508 = "tf.Shape"(%507) {device = ""} : (tensor) -> tensor<1xi64> + %509 = "tf.StridedSlice"(%508, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %510 = "tf.Pack"(%499) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %511 = "tf.StridedSlice"(%507, %510, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %512 = "tf.Sub"(%509, %499) {device = ""} : (tensor, tensor) -> tensor + %513 = "tf.Pack"(%512) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %514 = "tf.StridedSlice"(%507, %11, %513, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %515:2 = "tf.RaggedRange"(%514, %511, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %516 = "tf.Select"(%2, %492, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %517 = "tf.Pack"(%516, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %518 = "tf.StridedSlice"(%517, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %519 = "tf.Cast"(%518) {Truncate = false, device = ""} : (tensor) -> tensor + %520 = "tf.Reshape"(%519, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %521 = "tf.Pack"(%7, %520) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %522 = "tf.Tile"(%4, %521) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %523 = "tf.Mul"(%520, %8) {device = ""} : (tensor, tensor) -> tensor + %524 = "tf.Pack"(%523) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %525 = "tf.ConcatV2"(%9, %524, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %526 = "tf.Reshape"(%522, %525) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %527 = "tf.Shape"(%526) {device = ""} : (tensor) -> tensor<1xi64> + %528 = "tf.StridedSlice"(%527, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %529 = "tf.Pack"(%518) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %530 = "tf.StridedSlice"(%526, %529, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %531 = "tf.Sub"(%528, %518) {device = ""} : (tensor, tensor) -> tensor + %532 = "tf.Pack"(%531) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %533 = "tf.StridedSlice"(%526, %11, %532, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %534:2 = "tf.RaggedRange"(%533, %530, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %535 = "tf.StridedSlice"(%517, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %536 = "tf.StridedSlice"(%517, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %537 = "tf.Mul"(%536, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> + %538 = "tf.Tile"(%537, %535) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor + %539 = "tf.Cumsum"(%538, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %540 = "tf.ConcatV2"(%11, %539, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %541 = "tf.StridedSlice"(%540, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %542 = "tf.ExpandDims"(%541, %7) {device = ""} : (tensor, tensor) -> tensor + %543 = "tf.Shape"(%541) {device = ""} : (tensor) -> tensor<1xi32> + %544 = "tf.StridedSlice"(%543, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %545 = "tf.Pack"(%544) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %546 = "tf.StridedSlice"(%540, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %547 = "tf.ExpandDims"(%546, %7) {device = ""} : (tensor, tensor) -> tensor + %548 = "tf.Shape"(%546) {device = ""} : (tensor) -> tensor<1xi32> + %549 = "tf.StridedSlice"(%548, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %550 = "tf.Pack"(%549) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %551 = "tf.Equal"(%348, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %552 = "tf.Select"(%551, %492, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %553 = "tf.Cast"(%552) {Truncate = false, device = ""} : (tensor) -> tensor + %554 = "tf.Reshape"(%553, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %555 = "tf.Pack"(%7, %554) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %556 = "tf.Mul"(%554, %8) {device = ""} : (tensor, tensor) -> tensor + %557 = "tf.Pack"(%556) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %558 = "tf.ConcatV2"(%9, %557, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %559 = "tf.Pack"(%552) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %560 = "tf.Pack"(%10, %348) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %561 = "tf.ExpandDims"(%560, %7) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> + %562 = "tf.Tile"(%561, %555) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %563 = "tf.Reshape"(%562, %558) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %564 = "tf.Shape"(%563) {device = ""} : (tensor) -> tensor<1xi64> + %565 = "tf.StridedSlice"(%564, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %566 = "tf.Sub"(%565, %552) {device = ""} : (tensor, tensor) -> tensor + %567 = "tf.Pack"(%566) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %568 = "tf.StridedSlice"(%563, %11, %567, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %569 = "tf.StridedSlice"(%563, %559, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %570:2 = "tf.RaggedRange"(%568, %569, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %571 = "tf.GatherV2"(%355, %570#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %572 = "tf.Cast"(%571) {Truncate = false, device = ""} : (tensor) -> tensor + %573 = "tf.BroadcastTo"(%572, %545) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %574 = "tf.Max"(%573, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %575 = "tf.Maximum"(%14, %574) {device = ""} : (tensor, tensor) -> tensor + %576 = "tf.Range"(%14, %575, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %577 = "tf.Pack"(%7, %575) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %578 = "tf.Tile"(%542, %577) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %579 = "tf.Shape"(%578) {device = ""} : (tensor) -> tensor<2xi32> + %580 = "tf.StridedSlice"(%579, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %581 = "tf.Prod"(%580, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %582 = "tf.Pack"(%581) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %583 = "tf.Shape"(%578) {device = ""} : (tensor) -> tensor<2xi32> + %584 = "tf.StridedSlice"(%583, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %585 = "tf.Shape"(%578) {device = ""} : (tensor) -> tensor<2xi32> + %586 = "tf.StridedSlice"(%585, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %587 = "tf.ConcatV2"(%584, %582, %586, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %588 = "tf.Reshape"(%578, %587) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %589 = "tf.ExpandDims"(%573, %3) {device = ""} : (tensor, tensor) -> tensor + %590 = "tf.Less"(%576, %589) {device = ""} : (tensor, tensor) -> tensor + %591 = "tf.Reshape"(%590, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %592 = "tf.Where"(%591) {device = ""} : (tensor) -> tensor + %593 = "tf.Squeeze"(%592) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %594 = "tf.GatherV2"(%588, %593, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %595 = "tf.Cast"(%571) {Truncate = false, device = ""} : (tensor) -> tensor + %596 = "tf.BroadcastTo"(%595, %550) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %597 = "tf.Max"(%596, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %598 = "tf.Maximum"(%14, %597) {device = ""} : (tensor, tensor) -> tensor + %599 = "tf.Range"(%14, %598, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %600 = "tf.Pack"(%7, %598) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %601 = "tf.Tile"(%547, %600) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %602 = "tf.Shape"(%601) {device = ""} : (tensor) -> tensor<2xi32> + %603 = "tf.StridedSlice"(%602, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %604 = "tf.Prod"(%603, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %605 = "tf.Pack"(%604) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %606 = "tf.Shape"(%601) {device = ""} : (tensor) -> tensor<2xi32> + %607 = "tf.StridedSlice"(%606, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %608 = "tf.Shape"(%601) {device = ""} : (tensor) -> tensor<2xi32> + %609 = "tf.StridedSlice"(%608, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %610 = "tf.ConcatV2"(%607, %605, %609, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %611 = "tf.Reshape"(%601, %610) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %612 = "tf.ExpandDims"(%596, %3) {device = ""} : (tensor, tensor) -> tensor + %613 = "tf.Less"(%599, %612) {device = ""} : (tensor, tensor) -> tensor + %614 = "tf.Reshape"(%613, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %615 = "tf.Where"(%614) {device = ""} : (tensor) -> tensor + %616 = "tf.Squeeze"(%615) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %617 = "tf.GatherV2"(%611, %616, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %618:2 = "tf.RaggedRange"(%594, %617, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %619 = "tf.If"(%496, %496, %492, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_false_22980, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_true_22970} : (tensor, tensor, tensor, tensor) -> tensor + %620 = "tf.Identity"(%619) {device = ""} : (tensor) -> tensor + %621 = "tf.Select"(%2, %492, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %622 = "tf.Pack"(%621) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %623 = "tf.ConcatV2"(%1, %622, %12, %14) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> + %624 = "tf.StridedSlice"(%623, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %625 = "tf.Equal"(%624, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %626 = "tf.StridedSlice"(%623, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %627 = "tf.StridedSlice"(%623, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %628 = "tf.Equal"(%627, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %629 = "tf.If"(%628, %628, %627, %571) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_false_23470, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_true_23460} : (tensor, tensor, tensor, tensor) -> tensor + %630 = "tf.Identity"(%629) {device = ""} : (tensor) -> tensor + %631 = "tf.If"(%625, %625, %571, %626) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_false_23830, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_true_23820} : (tensor, tensor, tensor, tensor) -> tensor + %632 = "tf.Identity"(%631) {device = ""} : (tensor) -> tensor + %633 = "tf.Identity"(%308) {device = ""} : (tensor) -> tensor + %634 = "tf.Shape"(%37#2) {device = ""} : (tensor) -> tensor<1xi32> + %635 = "tf.StridedSlice"(%634, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %636 = "tf.Cast"(%635) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> + %637 = "tf.Identity"(%636) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> + %638 = "tf.Shape"(%37#3) {device = ""} : (tensor) -> tensor<1xi32> + %639 = "tf.StridedSlice"(%638, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %640 = "tf.Cast"(%639) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> + %641 = "tf.Identity"(%640) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> + %642 = "tf.GatherV2"(%37#3, %336, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %643 = "tf.Tile"(%642, %340) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %644 = "tf.Sub"(%643, %474) {device = ""} : (tensor, tensor) -> tensor + %645 = "tf.Shape"(%644) {device = ""} : (tensor) -> tensor<1xi32> + %646 = "tf.StridedSlice"(%645, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %647 = "tf.Cast"(%646) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> + %648 = "tf.Identity"(%647) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> + %649 = "tf.UnicodeEncode"(%37#0, %58) {Tsplits = i64, device = "", errors = "replace", output_encoding = "UTF-8", replacement_char = 65533 : i64} : (tensor, tensor) -> tensor + %650 = "tf.Identity"(%649) {device = ""} : (tensor) -> tensor + return %650 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_false_3220(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Input tensors have incompatible shapes."> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedConcat/RaggedFromTensor/Const:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedConcat/RaggedNRows/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_true_3210(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_3980(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_3970(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_4340(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_4330(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_4680(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_4670(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_5050(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_5040(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_5410(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_5400(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_5770(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RaggedNRows/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_5760(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6120(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6110(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_6480(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_6470(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6820(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6810(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7190(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7180(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_7550(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_7540(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7890(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7880(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_false_8690(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_true_8680(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_false_9760(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_true_9750(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_false_10250(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_true_10240(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_false_10610(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_true_10600(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_false_15310(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_true_15300(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_false_16380(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_true_16370(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_false_16870(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_true_16860(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_false_17230(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_true_17220(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_false_21910(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_true_21900(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_false_22980(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_true_22970(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_false_23470(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_true_23460(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_false_23830(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_true_23820(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} + +// CHECK: func @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:WhitespaceTokenizer", {}>, tf._input_shapes = [#tf.shape<>], tf.signature.is_stateful} { +// CHECK: %0 = "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor) -> tensor +// CHECK: return %0 : tensor + +func @ngrams(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {tf._input_shapes = [#tf.shape], tf._implements = #tf.func<@"tftext:Ngrams", {axis = -1 : i64, reduction_type = "STRING_JOIN", string_separator = " ", width = 2 : i64}>} { + %0 = "tf.Const"() {value = dense<-1> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<[0, -1]> : tensor<2xi32>} : () -> tensor<2xi32> + %2 = "tf.Const"() {value = dense<[0, 1]> : tensor<2xi32>} : () -> tensor<2xi32> + %3 = "tf.Const"() {value = dense<0> : tensor<2xi32>} : () -> tensor<2xi32> + %4 = "tf.Const"() {value = dense<1> : tensor<2xi32>} : () -> tensor<2xi32> + %5 = "tf.StridedSlice"(%arg0, %3, %1, %4) {begin_mask = 0 : i64, device = "", ellipsis_mask = 1 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>) -> tensor + %6 = "tf.StridedSlice"(%arg0, %2, %3, %4) {begin_mask = 0 : i64, device = "", ellipsis_mask = 1 : i64, end_mask = 2 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>) -> tensor + %7 = "tf.Pack"(%5, %6) {axis = -1 : i64, device = ""} : (tensor, tensor) -> tensor + %8 = "tf.ReduceJoin"(%7, %0) {device = "", keep_dims = false, separator = " "} : (tensor, tensor) -> tensor + %9 = "tf.Identity"(%8) {device = ""} : (tensor) -> tensor + return %9 : tensor +} + +// CHECK: func @ngrams(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {tf._implements = #tf.func<@"tftext:Ngrams", {axis = -1 : i64, reduction_type = "STRING_JOIN", string_separator = " ", width = 2 : i64}>, tf._input_shapes = [#tf.shape]} { +// CHECK: %0 = "tfl.custom"(%arg0) {custom_code = "tftext:Ngrams", custom_option = opaque<"tfl", "0x776964746800737472696E675F736570617261746F72000120006178697300726564756374696F6E5F74797065000B535452494E475F4A4F494E0004221E383F040104FF152D0204141404082401"> : tensor<78xi8>} : (tensor) -> tensor +// CHECK: return %0 : tensor +// CHECK: } + +func @ngrams_ragged_rank_2(%arg0: tensor {tf._user_specified_name = "values"}, %arg1: tensor<3xi64> {tf._user_specified_name = "args_0"}, %arg2: tensor {tf._user_specified_name = "args_1"}) -> (tensor, tensor<3xi64>, tensor) attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:Ngrams", {axis = -1 : i64, reduction_type = "STRING_JOIN", string_separator = "", width = 2 : i64}>, tf._input_shapes = [#tf.shape, #tf.shape<3>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<-1> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<-1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %4 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %5 = "tf.Const"() {value = dense<> : tensor<0xi32>} : () -> tensor<0xi32> + %6 = "tf.Const"() {value = dense<-1> : tensor<1xi32>} : () -> tensor<1xi32> + %7 = "tf.Const"() {value = dense<0> : tensor<1xi32>} : () -> tensor<1xi32> + %8 = "tf.Const"() {value = dense<1> : tensor<1xi32>} : () -> tensor<1xi32> + %9 = "tf.StridedSlice"(%arg1, %7, %8, %8) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<3xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %10 = "tf.Equal"(%9, %4) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %11 = "tf.All"(%10, %5) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %12 = "tf.StridedSlice"(%arg1, %8, %7, %8) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<3xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi64> + %13 = "tf.StridedSlice"(%arg1, %7, %6, %8) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<3xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi64> + %14 = "tf.Sub"(%12, %13) {device = ""} : (tensor<2xi64>, tensor<2xi64>) -> tensor<2xi64> + %15 = "tf.LessEqual"(%4, %14) {device = ""} : (tensor, tensor<2xi64>) -> tensor<2xi1> + %16 = "tf.All"(%15, %7) {device = "", keep_dims = false} : (tensor<2xi1>, tensor<1xi32>) -> tensor + %17 = "tf.StridedSlice"(%arg2, %7, %8, %8) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %18 = "tf.Equal"(%17, %4) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %19 = "tf.All"(%18, %5) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %20 = "tf.IfRegion"(%19) ( { + %72 = "std.call"(%19, %17, %4) {callee = @RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_27770} : (tensor, tensor, tensor) -> tensor + "tf.Yield"(%72) : (tensor) -> () + }, { + %72 = "std.call"(%19, %17, %4) {callee = @RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_27780} : (tensor, tensor, tensor) -> tensor + "tf.Yield"(%72) : (tensor) -> () + }) {is_stateless = false} : (tensor) -> tensor + %21 = "tf.Identity"(%20) {device = ""} : (tensor) -> tensor + %22 = "tf.StridedSlice"(%arg2, %8, %7, %8) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %23 = "tf.StridedSlice"(%arg2, %7, %6, %8) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %24 = "tf.Sub"(%22, %23) {device = ""} : (tensor, tensor) -> tensor + %25 = "tf.LessEqual"(%4, %24) {device = ""} : (tensor, tensor) -> tensor + %26 = "tf.All"(%25, %7) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %27 = "tf.IfRegion"(%26) ( { + %72 = "std.call"(%26, %24) {callee = @RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_28130} : (tensor, tensor) -> tensor + "tf.Yield"(%72) : (tensor) -> () + }, { + %72 = "std.call"(%26, %24) {callee = @RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_28140} : (tensor, tensor) -> tensor + "tf.Yield"(%72) : (tensor) -> () + }) {is_stateless = false} : (tensor) -> tensor + %28 = "tf.Identity"(%27) {device = ""} : (tensor) -> tensor + %29 = "tf.Identity"(%arg2) {_class = ["loc:@args_1"], device = ""} : (tensor) -> tensor + %30 = "tf.StridedSlice"(%29, %6, %7, %8) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %31 = "tf.Shape"(%arg0) {device = ""} : (tensor) -> tensor<1xi64> + %32 = "tf.StridedSlice"(%31, %7, %8, %8) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %33 = "tf.Equal"(%30, %32) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %34 = "tf.All"(%33, %5) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %35 = "tf.IfRegion"(%34) ( { + %72 = "std.call"(%34, %30, %32) {callee = @RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_28500} : (tensor, tensor, tensor) -> tensor + "tf.Yield"(%72) : (tensor) -> () + }, { + %72 = "std.call"(%34, %30, %32) {callee = @RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_28510} : (tensor, tensor, tensor) -> tensor + "tf.Yield"(%72) : (tensor) -> () + }) {is_stateless = false} : (tensor) -> tensor + %36 = "tf.Identity"(%35) {device = ""} : (tensor) -> tensor + %37 = "tf.Identity"(%29) {_class = ["loc:@args_1"], device = ""} : (tensor) -> tensor + %38 = "tf.StridedSlice"(%37, %7, %6, %8) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %39 = "tf.StridedSlice"(%37, %8, %7, %8) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %40 = "tf.Minimum"(%38, %39) {device = ""} : (tensor, tensor) -> tensor + %41 = "tf.AddV2"(%39, %1) {device = ""} : (tensor, tensor) -> tensor + %42 = "tf.Maximum"(%41, %38) {device = ""} : (tensor, tensor) -> tensor + %43:2 = "tf.RaggedRange"(%40, %42, %3) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %44 = "tf.GatherV2"(%arg0, %43#1, %2) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %45 = "tf.AddV2"(%38, %3) {device = ""} : (tensor, tensor) -> tensor + %46 = "tf.Minimum"(%45, %39) {device = ""} : (tensor, tensor) -> tensor + %47:2 = "tf.RaggedRange"(%46, %39, %3) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %48 = "tf.Equal"(%43#0, %47#0) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %49 = "tf.All"(%48, %7) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %50 = "tf.GatherV2"(%arg0, %47#1, %2) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %51 = "tf.Shape"(%37) {device = ""} : (tensor) -> tensor<1xi64> + %52 = "tf.StridedSlice"(%51, %7, %8, %8) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %53 = "tf.Sub"(%52, %3) {device = ""} : (tensor, tensor) -> tensor + %54 = "tf.IfRegion"(%11) ( { + %72 = "std.call"(%11, %9, %4) {callee = @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_28900} : (tensor, tensor, tensor) -> tensor + "tf.Yield"(%72) : (tensor) -> () + }, { + %72 = "std.call"(%11, %9, %4) {callee = @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_28910} : (tensor, tensor, tensor) -> tensor + "tf.Yield"(%72) : (tensor) -> () + }) {is_stateless = false} : (tensor) -> tensor + %55 = "tf.Identity"(%54) {device = ""} : (tensor) -> tensor + %56 = "tf.IfRegion"(%16) ( { + %72 = "std.call"(%16, %14) {callee = @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_29260} : (tensor, tensor<2xi64>) -> tensor + "tf.Yield"(%72) : (tensor) -> () + }, { + %72 = "std.call"(%16, %14) {callee = @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_29270} : (tensor, tensor<2xi64>) -> tensor + "tf.Yield"(%72) : (tensor) -> () + }) {is_stateless = false} : (tensor) -> tensor + %57 = "tf.Identity"(%56) {device = ""} : (tensor) -> tensor + %58 = "tf.Identity"(%arg1) {_class = ["loc:@args_0"], device = ""} : (tensor<3xi64>) -> tensor<3xi64> + %59 = "tf.StridedSlice"(%58, %6, %7, %8) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<3xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %60 = "tf.Equal"(%59, %53) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %61 = "tf.All"(%60, %5) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %62 = "tf.IfRegion"(%61) ( { + %72 = "std.call"(%61, %59, %53) {callee = @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_29650} : (tensor, tensor, tensor) -> tensor + "tf.Yield"(%72) : (tensor) -> () + }, { + %72 = "std.call"(%61, %59, %53) {callee = @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_29660} : (tensor, tensor, tensor) -> tensor + "tf.Yield"(%72) : (tensor) -> () + }) {is_stateless = false} : (tensor) -> tensor + %63 = "tf.IfRegion"(%49) ( { + %72 = "std.call"(%49, %43#0, %47#0) {callee = @NGrams_SlidingWindow_RaggedConcat_assert_equal_2_Assert_AssertGuard_true_30330} : (tensor, tensor, tensor) -> tensor + "tf.Yield"(%72) : (tensor) -> () + }, { + %72 = "std.call"(%49, %43#0, %47#0) {callee = @NGrams_SlidingWindow_RaggedConcat_assert_equal_2_Assert_AssertGuard_false_30340} : (tensor, tensor, tensor) -> tensor + "tf.Yield"(%72) : (tensor) -> () + }) {is_stateless = false} : (tensor) -> tensor + %64 = "tf.Identity"(%43#0) {device = ""} : (tensor) -> tensor + %65 = "tf.Identity"(%63) {device = ""} : (tensor) -> tensor + %66 = "tf.Pack"(%44, %50) {axis = 1 : i64, device = ""} : (tensor, tensor) -> tensor + %67 = "tf.ReduceJoin"(%66, %0) {device = "", keep_dims = false, separator = ""} : (tensor, tensor) -> tensor + %68 = "tf.Identity"(%67) {device = ""} : (tensor) -> tensor + %69 = "tf.Identity"(%62) {device = ""} : (tensor) -> tensor + %70 = "tf.Identity"(%58) {_class = ["loc:@args_0"], device = ""} : (tensor<3xi64>) -> tensor<3xi64> + %71 = "tf.Identity"(%70) {device = ""} : (tensor<3xi64>) -> tensor<3xi64> + return %68, %71, %64 : tensor, tensor<3xi64>, tensor +} +func @RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_27770(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_27780(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_28130(%arg0: tensor, %arg1: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_28140(%arg0: tensor, %arg1: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor +} +func @RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_28500(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_28510(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (RaggedFromNestedRowSplits/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (RaggedFromNestedRowSplits/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor +"tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_28900(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_28910(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_29260(%arg0: tensor, %arg1: tensor<2xi64>) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape<2>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_29270(%arg0: tensor, %arg1: tensor<2xi64>) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape<2>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor<2xi64>) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor +} +func @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_29650(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_29660(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (RaggedFromNestedRowSplits/RaggedFromRowSplits_1/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RaggedNRows/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +func @NGrams_SlidingWindow_RaggedConcat_assert_equal_2_Assert_AssertGuard_true_30330(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor +} +func @NGrams_SlidingWindow_RaggedConcat_assert_equal_2_Assert_AssertGuard_false_30340(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {sym_visibility = "private", tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Inputs must have identical ragged splits"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (NGrams/SlidingWindow/RaggedGetItem/RaggedRange:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (NGrams/SlidingWindow/RaggedGetItem_1/RaggedRange:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor +} +// CHECK: func @ngrams_ragged_rank_2(%arg0: tensor {tf._user_specified_name = "values"}, %arg1: tensor<3xi64> {tf._user_specified_name = "args_0"}, %arg2: tensor {tf._user_specified_name = "args_1"}) -> (tensor, tensor<3xi64>, tensor) attributes {sym_visibility = "private", tf._implements = #tf.func<@"tftext:Ngrams", {axis = -1 : i64, reduction_type = "STRING_JOIN", string_separator = "", width = 2 : i64}>, tf._input_shapes = [#tf.shape, #tf.shape<3>, #tf.shape], tf.signature.is_stateful} { +// CHECK: %0:3 = "tfl.custom"(%arg0, %arg1, %arg2) {custom_code = "tftext:Ngrams", custom_option = opaque<"tfl", "0x776964746800737472696E675F736570617261746F720000006178697300726564756374696F6E5F74797065000B535452494E475F4A4F494E0004221E373E040104FF152C0204141404082401"> : tensor<77xi8>} : (tensor, tensor<3xi64>, tensor) -> (tensor, tensor<3xi64>, tensor) +// CHECK: return %0#0, %0#1, %0#2 : tensor, tensor<3xi64>, tensor \ No newline at end of file diff --git a/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc b/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc index 1681f654b92..96d22cb51e9 100644 --- a/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc +++ b/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/mlir/lite/utils/tftext_utils.h" +#include "flatbuffers/flexbuffers.h" // from @flatbuffers #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" #include "llvm/ADT/SmallVector.h" @@ -28,6 +29,7 @@ limitations under the License. #include "mlir/IR/Identifier.h" // from @llvm-project #include "mlir/IR/Location.h" // from @llvm-project #include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/Matchers.h" // from @llvm-project #include "mlir/IR/OpDefinition.h" // from @llvm-project #include "mlir/IR/Operation.h" // from @llvm-project #include "mlir/IR/StandardTypes.h" // from @llvm-project @@ -43,32 +45,35 @@ namespace TFL { namespace { +constexpr char kNgrams[] = "tftext:Ngrams"; constexpr char kWhitespaceTokenizer[] = "tftext:WhitespaceTokenizer"; constexpr char kTFImplements[] = "tf._implements"; using mlir::TF::FuncAttr; +using mlir::TF::StringType; -inline OpaqueElementsAttr emptyCustomOption(OpBuilder* builder) { - std::string content = ""; +inline OpaqueElementsAttr CustomOption(OpBuilder* builder, + const std::string& content) { ShapedType type = RankedTensorType::get( {static_cast(content.size())}, builder->getIntegerType(8)); return OpaqueElementsAttr::get( - builder->getContext()->getRegisteredDialect("tfl"), type, content); + builder->getContext()->getRegisteredDialect("tfl"), type, + StringRef(content.data(), content.size())); } -inline RankedTensorType getInputType(mlir::FuncOp func, int idx) { - return func.getType() - .getInput(idx) - .dyn_cast_or_null(); +inline TensorType GetInputType(FuncOp func, int idx) { + return func.getType().getInput(idx).dyn_cast_or_null(); } -inline RankedTensorType getResultType(mlir::FuncOp func, int idx) { - return func.getType() - .getResult(idx) - .dyn_cast_or_null(); +inline TensorType GetResultType(FuncOp func, int idx) { + return func.getType().getResult(idx).dyn_cast_or_null(); } -LogicalResult VerifyWhitespaceTokenizer(mlir::FuncOp func) { +inline bool RankEquals(const TensorType& type, int rank) { + return type && type.hasRank() && type.getRank() == rank; +} + +LogicalResult VerifyWhitespaceTokenizer(FuncOp func) { // In the case of input tensor with 0 rank. // Whitespace tokenizer generates 1 output: // * String tensor for tokens. @@ -83,8 +88,8 @@ LogicalResult VerifyWhitespaceTokenizer(mlir::FuncOp func) { // * 1st output is the value of ragged tensor; // * 2nd output is the inner offset; // * 3rd output is the outer offset. - auto input_type = getInputType(func, 0); - if (!input_type || !input_type.getElementType().isa() || + auto input_type = GetInputType(func, 0); + if (!input_type || !input_type.getElementType().isa() || !input_type.hasRank()) { return func.emitError() << "Input should be a string tensor"; } @@ -100,21 +105,21 @@ LogicalResult VerifyWhitespaceTokenizer(mlir::FuncOp func) { << "output(s) when input has rank " << input_type.getRank(); } - auto value_type = getResultType(func, 0); - if (!value_type || !value_type.hasRank() || value_type.getRank() != 1 || - !value_type.getElementType().isa()) { + auto value_type = GetResultType(func, 0); + if (!RankEquals(value_type, 1) || + !value_type.getElementType().isa()) { return func.emitError() << "1st output should be string tensor"; } if (func.getNumResults() > 1) { - auto offset_type = getResultType(func, 1); - if (!offset_type || !offset_type.hasRank() || offset_type.getRank() != 1 || + auto offset_type = GetResultType(func, 1); + if (!RankEquals(offset_type, 1) || !offset_type.getElementType().isInteger(64)) { return func.emitError() << "2nd output should be int64 tensor"; } } if (func.getNumResults() > 2) { - auto offset_type = getResultType(func, 2); - if (!offset_type || !offset_type.hasRank() || offset_type.getRank() != 1 || + auto offset_type = GetResultType(func, 2); + if (!RankEquals(offset_type, 1) || !offset_type.getElementType().isInteger(64)) { return func.emitError() << "3rd output should be int64 tensor"; } @@ -123,28 +128,159 @@ LogicalResult VerifyWhitespaceTokenizer(mlir::FuncOp func) { return success(); } -LogicalResult ConvertWhitespaceTokenizer(mlir::FuncOp func, llvm::StringRef api, +LogicalResult ConvertWhitespaceTokenizer(FuncOp func, llvm::StringRef api, FuncAttr attr) { func.eraseBody(); func.addEntryBlock(); func.setAttr(kTFImplements, attr); - Value text = func.getArgument(0); OpBuilder builder(func.getBody()); - - auto op = builder.create( - func.getLoc(), func.getType().getResults(), ValueRange(text), api, - emptyCustomOption(&builder)); - builder.create(func.getLoc(), op.getResults()); + std::string empty_option_buffer; + auto op = builder.create( + func.getLoc(), func.getType().getResults(), func.getArguments(), api, + CustomOption(&builder, empty_option_buffer)); + builder.create(func.getLoc(), op.getResults()); return success(); } + +LogicalResult VerifyNgrams(FuncOp func) { + // The inputs and outputs should be the same: + // * A string tensor for tokens/ragged tensor values. + // * Zero or more row_split tensors. + constexpr int kValues = 0; + constexpr int kRowSplits = 1; + + if (func.getType().getInputs().size() != func.getType().getResults().size()) { + return func.emitError() << "Mismatched number of inputs and outputs."; + } + + int row_splits = func.getType().getInputs().size() - kRowSplits; + if (row_splits == 0) { + auto input_values = GetInputType(func, kValues); + if (!input_values || !input_values.getElementType().isa()) { + return func.emitError() + << "Input " << kValues << " should be a string tensor"; + } + auto output_values = GetResultType(func, kValues); + if (!output_values || !output_values.getElementType().isa()) { + return func.emitError() + << "Output " << kValues << " should be a string tensor"; + } + + if (input_values.hasRank() && output_values.hasRank() && + input_values.getRank() != output_values.getRank()) { + return func.emitError() << "Input " << kValues << " and output " + << kValues << " should have the same rank"; + } + } else { + auto input_values = GetInputType(func, kValues); + if (!RankEquals(input_values, 1) || + !input_values.getElementType().isa()) { + return func.emitError() + << "Input " << kValues << " should be a 1D string tensor"; + } + auto output_values = GetResultType(func, kValues); + if (!RankEquals(output_values, 1) || + !output_values.getElementType().isa()) { + return func.emitError() + << "Output " << kValues << " should be a 1D string tensor"; + } + + for (int i = 0; i < row_splits; ++i) { + const int row_index = i + kRowSplits; + auto input_row_splits = GetInputType(func, row_index); + if (!RankEquals(input_row_splits, 1) || + !input_row_splits.getElementType().isInteger(64)) { + return func.emitError() + << "Input " << row_index << " should be a 1D int64 tensor"; + } + auto output_row_splits = GetResultType(func, row_index); + if (!RankEquals(output_row_splits, 1) || + !output_row_splits.getElementType().isInteger(64)) { + return func.emitError() + << "Output " << row_index << " should be a 1D int64 tensor"; + } + } + } + + return success(); +} + +LogicalResult CreateNgramsCustomOption(FuncOp func, DictionaryAttr attrs, + std::string& custom_option_buffer) { + flexbuffers::Builder fbb; + size_t start_map = fbb.StartMap(); + + auto width = attrs.get("width").dyn_cast_or_null(); + if (!width) { + return func.emitError() << "'width' attribute is not set or not an integer"; + } + fbb.Int("width", width.getInt()); + + auto string_separator = + attrs.get("string_separator").dyn_cast_or_null(); + if (!string_separator) { + return func.emitError() + << "'string_separator' attribute is not set or not a string"; + } + // StringAttrs are not guaranteed to be NUL terminated, but flexbuffers + // strings expect NUL terminated strings. + std::string string_separator_str(string_separator.getValue().data(), + string_separator.getValue().size()); + fbb.String("string_separator", string_separator_str); + + auto axis = attrs.get("axis").dyn_cast_or_null(); + if (!axis) { + return func.emitError() << "'axis' attribute is not set or not an integer"; + } + fbb.Int("axis", axis.getInt()); + + auto reduction_type = + attrs.get("reduction_type").dyn_cast_or_null(); + if (!reduction_type) { + return func.emitError() + << "'reduction_type' attribute is not set or not a string"; + } + // StringAttrs are not guaranteed to be NUL terminated, but flexbuffers + // strings expect NUL terminated strings. + std::string reduction_type_str(reduction_type.getValue().data(), + reduction_type.getValue().size()); + fbb.String("reduction_type", reduction_type_str); + + fbb.EndMap(start_map); + fbb.Finish(); + custom_option_buffer.assign(fbb.GetBuffer().begin(), fbb.GetBuffer().end()); + return success(); +} + +LogicalResult ConvertNgrams(FuncOp func, llvm::StringRef api, FuncAttr attr) { + func.eraseBody(); + func.addEntryBlock(); + func.setAttr(kTFImplements, attr); + OpBuilder builder(func.getBody()); + std::string custom_option_buffer; + if (failed(CreateNgramsCustomOption(func, attr.GetAttrs(), + custom_option_buffer))) { + return failure(); + } + auto op = builder.create( + func.getLoc(), func.getType().getResults(), func.getArguments(), api, + CustomOption(&builder, custom_option_buffer)); + builder.create(func.getLoc(), op.getResults()); + return success(); +} + } // namespace -LogicalResult ConvertTFTextAPI(mlir::FuncOp func, llvm::StringRef api, +LogicalResult ConvertTFTextAPI(FuncOp func, llvm::StringRef api, FuncAttr attr) { if (api.str() == kWhitespaceTokenizer) { if (succeeded(VerifyWhitespaceTokenizer(func))) { return ConvertWhitespaceTokenizer(func, api, attr); } + } else if (api.str() == kNgrams) { + if (succeeded(VerifyNgrams(func))) { + return ConvertNgrams(func, api, attr); + } } return failure(); } From 11ae78cc4db61c37eab5fdcd45096d0ae927af5f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 15:45:35 -0700 Subject: [PATCH 1416/2522] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 323457060 Change-Id: I29394a15c1bfbcee806053ff47a6a8f23aacd520 --- tensorflow/go/op/wrappers.go | 13163 +++++++++++++++++---------------- 1 file changed, 6719 insertions(+), 6444 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index fdec7e2580f..687bc4ced33 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -2472,79 +2472,6 @@ func ConcatOffset(scope *Scope, concat_dim tf.Output, shape []tf.Output) (offset return offset } -// Checks a tensor for NaN and Inf values. -// -// When run, reports an `InvalidArgument` error if `tensor` has any values -// that are not a number (NaN) or infinity (Inf). Otherwise, passes `tensor` as-is. -// -// Arguments: -// -// message: Prefix of the error message. -func CheckNumerics(scope *Scope, tensor tf.Output, message string) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"message": message} - opspec := tf.OpSpec{ - Type: "CheckNumerics", - Input: []tf.Input{ - tensor, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Broadcast an array for a compatible shape. -// -// Broadcasting is the process of making arrays to have compatible shapes -// for arithmetic operations. Two shapes are compatible if for each -// dimension pair they are either equal or one of them is one. When trying -// to broadcast a Tensor to a shape, it starts with the trailing dimensions, -// and works its way forward. -// -// For example, -// -// >>> x = tf.constant([1, 2, 3]) -// >>> y = tf.broadcast_to(x, [3, 3]) -// >>> print(y) -// tf.Tensor( -// [[1 2 3] -// [1 2 3] -// [1 2 3]], shape=(3, 3), dtype=int32) -// -// In the above example, the input Tensor with the shape of `[1, 3]` -// is broadcasted to output Tensor with shape of `[3, 3]`. -// -// When doing broadcasted operations such as multiplying a tensor -// by a scalar, broadcasting (usually) confers some time or space -// benefit, as the broadcasted tensor is never materialized. -// -// However, `broadcast_to` does not carry with it any such benefits. -// The newly-created tensor takes the full memory of the broadcasted -// shape. (In a graph context, `broadcast_to` might be fused to -// subsequent operation and then be optimized away, however.) -// -// Arguments: -// input: A Tensor to broadcast. -// shape: An 1-D `int` Tensor. The shape of the desired output. -// -// Returns A Tensor. -func BroadcastTo(scope *Scope, input tf.Output, shape tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BroadcastTo", - Input: []tf.Input{ - input, shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Converts an array of flat indices into a tuple of coordinate arrays. // // @@ -13228,119 +13155,40 @@ func BoostedTreesPredict(scope *Scope, tree_ensemble_handle tf.Output, bucketize return op.Output(0) } -// RandomGammaAttr is an optional argument to RandomGamma. -type RandomGammaAttr func(optionalAttr) +// ResizeBilinearAttr is an optional argument to ResizeBilinear. +type ResizeBilinearAttr func(optionalAttr) -// RandomGammaSeed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomGammaSeed(value int64) RandomGammaAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// RandomGammaSeed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomGammaSeed2(value int64) RandomGammaAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Outputs random values from the Gamma distribution(s) described by alpha. -// -// This op uses the algorithm by Marsaglia et al. to acquire samples via -// transformation-rejection from pairs of uniform and normal random variables. -// See http://dl.acm.org/citation.cfm?id=358414 -// -// Arguments: -// shape: 1-D integer tensor. Shape of independent samples to draw from each -// distribution described by the shape parameters given in alpha. -// alpha: A tensor in which each scalar is a "shape" parameter describing the -// associated gamma distribution. -// -// Returns A tensor with shape `shape + shape(alpha)`. Each slice -// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for -// `alpha[i0, i1, ...iN]`. The dtype of the output matches the dtype of alpha. -func RandomGamma(scope *Scope, shape tf.Output, alpha tf.Output, optional ...RandomGammaAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RandomGamma", - Input: []tf.Input{ - shape, alpha, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns 0 if x == 0, and x * log1p(y) otherwise, elementwise. -func Xlog1py(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Xlog1py", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QuantizedResizeBilinearAttr is an optional argument to QuantizedResizeBilinear. -type QuantizedResizeBilinearAttr func(optionalAttr) - -// QuantizedResizeBilinearAlignCorners sets the optional align_corners attribute to value. +// ResizeBilinearAlignCorners sets the optional align_corners attribute to value. // // value: If true, the centers of the 4 corner pixels of the input and output tensors are // aligned, preserving the values at the corner pixels. Defaults to false. // If not specified, defaults to false -func QuantizedResizeBilinearAlignCorners(value bool) QuantizedResizeBilinearAttr { +func ResizeBilinearAlignCorners(value bool) ResizeBilinearAttr { return func(m optionalAttr) { m["align_corners"] = value } } -// QuantizedResizeBilinearHalfPixelCenters sets the optional half_pixel_centers attribute to value. +// ResizeBilinearHalfPixelCenters sets the optional half_pixel_centers attribute to value. // If not specified, defaults to false -func QuantizedResizeBilinearHalfPixelCenters(value bool) QuantizedResizeBilinearAttr { +func ResizeBilinearHalfPixelCenters(value bool) ResizeBilinearAttr { return func(m optionalAttr) { m["half_pixel_centers"] = value } } -// Resize quantized `images` to `size` using quantized bilinear interpolation. +// Resize `images` to `size` using bilinear interpolation. // -// Input images and output images must be quantized types. +// Input images can be of different types but output images are always float. // // Arguments: // images: 4-D with shape `[batch, height, width, channels]`. // size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The // new size for the images. // -// -// -// Returns: -// resized_images: 4-D with shape +// Returns 4-D with shape // `[batch, new_height, new_width, channels]`. -// out_min -// out_max -func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min tf.Output, max tf.Output, optional ...QuantizedResizeBilinearAttr) (resized_images tf.Output, out_min tf.Output, out_max tf.Output) { +func ResizeBilinear(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBilinearAttr) (resized_images tf.Output) { if scope.Err() != nil { return } @@ -13349,14 +13197,81 @@ func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizedResizeBilinear", + Type: "ResizeBilinear", Input: []tf.Input{ - images, size, min, max, + images, size, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) +} + +// Returns the set of files matching one or more glob patterns. +// +// Note that this routine only supports wildcard characters in the +// basename portion of the pattern, not in the directory portion. +// Note also that the order of filenames returned is deterministic. +// +// Arguments: +// pattern: Shell wildcard pattern(s). Scalar or vector of type string. +// +// Returns A vector of matching filenames. +func MatchingFiles(scope *Scope, pattern tf.Output) (filenames tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "MatchingFiles", + Input: []tf.Input{ + pattern, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Restore a Reader to its initial clean state. +// +// Arguments: +// reader_handle: Handle to a Reader. +// +// Returns the created operation. +func ReaderResetV2(scope *Scope, reader_handle tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ReaderResetV2", + Input: []tf.Input{ + reader_handle, + }, + } + return scope.AddOperation(opspec) +} + +// Restore a reader to a previously saved state. +// +// Not all Readers support being restored, so this can produce an +// Unimplemented error. +// +// Arguments: +// reader_handle: Handle to a Reader. +// state: Result of a ReaderSerializeState of a Reader with type +// matching reader_handle. +// +// Returns the created operation. +func ReaderRestoreStateV2(scope *Scope, reader_handle tf.Output, state tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ReaderRestoreStateV2", + Input: []tf.Input{ + reader_handle, state, + }, + } + return scope.AddOperation(opspec) } // ResizeAreaAttr is an optional argument to ResizeArea. @@ -13413,192 +13328,6 @@ func ResizeArea(scope *Scope, images tf.Output, size tf.Output, optional ...Resi return op.Output(0) } -// Restore a reader to a previously saved state. -// -// Not all Readers support being restored, so this can produce an -// Unimplemented error. -// -// Arguments: -// reader_handle: Handle to a Reader. -// state: Result of a ReaderSerializeState of a Reader with type -// matching reader_handle. -// -// Returns the created operation. -func ReaderRestoreStateV2(scope *Scope, reader_handle tf.Output, state tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReaderRestoreStateV2", - Input: []tf.Input{ - reader_handle, state, - }, - } - return scope.AddOperation(opspec) -} - -// Computes rectified linear 6: `min(max(features, 0), 6)`. -func Relu6(scope *Scope, features tf.Output) (activations tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Relu6", - Input: []tf.Input{ - features, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RaggedRangeAttr is an optional argument to RaggedRange. -type RaggedRangeAttr func(optionalAttr) - -// RaggedRangeTsplits sets the optional Tsplits attribute to value. -// If not specified, defaults to DT_INT64 -func RaggedRangeTsplits(value tf.DataType) RaggedRangeAttr { - return func(m optionalAttr) { - m["Tsplits"] = value - } -} - -// Returns a `RaggedTensor` containing the specified sequences of numbers. -// -// -// Returns a `RaggedTensor` `result` composed from `rt_dense_values` and -// `rt_nested_splits`, such that -// `result[i] = range(starts[i], limits[i], deltas[i])`. -// -// ```python -// (rt_nested_splits, rt_dense_values) = ragged_range( -// starts=[2, 5, 8], limits=[3, 5, 12], deltas=1) -// result = tf.ragged.from_row_splits(rt_dense_values, rt_nested_splits) -// print(result) -// -// ``` -// -// The input tensors `starts`, `limits`, and `deltas` may be scalars or vectors. -// The vector inputs must all have the same size. Scalar inputs are broadcast -// to match the size of the vector inputs. -// -// Arguments: -// starts: The starts of each range. -// limits: The limits of each range. -// deltas: The deltas of each range. -// -// Returns: -// rt_nested_splits: The `row_splits` for the returned `RaggedTensor`. -// rt_dense_values: The `flat_values` for the returned `RaggedTensor`. -func RaggedRange(scope *Scope, starts tf.Output, limits tf.Output, deltas tf.Output, optional ...RaggedRangeAttr) (rt_nested_splits tf.Output, rt_dense_values tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RaggedRange", - Input: []tf.Input{ - starts, limits, deltas, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Split a `SparseTensor` into `num_split` tensors along one dimension. -// -// If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices -// `[0 : shape[split_dim] % num_split]` gets one extra dimension. -// For example, if `split_dim = 1` and `num_split = 2` and the input is -// -// input_tensor = shape = [2, 7] -// [ a d e ] -// [b c ] -// -// Graphically the output tensors are: -// -// output_tensor[0] = shape = [2, 4] -// [ a ] -// [b c ] -// -// output_tensor[1] = shape = [2, 3] -// [ d e ] -// [ ] -// -// Arguments: -// split_dim: 0-D. The dimension along which to split. Must be in the range -// `[0, rank(shape))`. -// indices: 2-D tensor represents the indices of the sparse tensor. -// values: 1-D tensor represents the values of the sparse tensor. -// shape: 1-D. tensor represents the shape of the sparse tensor. -// output indices: A list of 1-D tensors represents the indices of the output -// sparse tensors. -// num_split: The number of ways to split. -// -// Returns: -// output_indices -// output_values: A list of 1-D tensors represents the values of the output sparse -// tensors. -// output_shape: A list of 1-D tensors represents the shape of the output sparse -// tensors. -func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf.Output, shape tf.Output, num_split int64) (output_indices []tf.Output, output_values []tf.Output, output_shape []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_split": num_split} - opspec := tf.OpSpec{ - Type: "SparseSplit", - Input: []tf.Input{ - split_dim, indices, values, shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output_indices, idx, err = makeOutputList(op, idx, "output_indices"); err != nil { - scope.UpdateErr("SparseSplit", err) - return - } - if output_values, idx, err = makeOutputList(op, idx, "output_values"); err != nil { - scope.UpdateErr("SparseSplit", err) - return - } - if output_shape, idx, err = makeOutputList(op, idx, "output_shape"); err != nil { - scope.UpdateErr("SparseSplit", err) - return - } - return output_indices, output_values, output_shape -} - -// Produce a string tensor that encodes the state of a Reader. -// -// Not all Readers support being serialized, so this can produce an -// Unimplemented error. -// -// Arguments: -// reader_handle: Handle to a Reader. -func ReaderSerializeStateV2(scope *Scope, reader_handle tf.Output) (state tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReaderSerializeStateV2", - Input: []tf.Input{ - reader_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Returns up to `num_records` (key, value) pairs produced by a Reader. // // Will dequeue from the input queue if necessary (e.g. when the @@ -15085,58 +14814,6 @@ func MatrixDeterminant(scope *Scope, input tf.Output) (output tf.Output) { return op.Output(0) } -// ResizeBilinearAttr is an optional argument to ResizeBilinear. -type ResizeBilinearAttr func(optionalAttr) - -// ResizeBilinearAlignCorners sets the optional align_corners attribute to value. -// -// value: If true, the centers of the 4 corner pixels of the input and output tensors are -// aligned, preserving the values at the corner pixels. Defaults to false. -// If not specified, defaults to false -func ResizeBilinearAlignCorners(value bool) ResizeBilinearAttr { - return func(m optionalAttr) { - m["align_corners"] = value - } -} - -// ResizeBilinearHalfPixelCenters sets the optional half_pixel_centers attribute to value. -// If not specified, defaults to false -func ResizeBilinearHalfPixelCenters(value bool) ResizeBilinearAttr { - return func(m optionalAttr) { - m["half_pixel_centers"] = value - } -} - -// Resize `images` to `size` using bilinear interpolation. -// -// Input images can be of different types but output images are always float. -// -// Arguments: -// images: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. -// -// Returns 4-D with shape -// `[batch, new_height, new_width, channels]`. -func ResizeBilinear(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBilinearAttr) (resized_images tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResizeBilinear", - Input: []tf.Input{ - images, size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Creates a TensorList by indexing into a Tensor. // // Each member of the TensorList corresponds to one row of the input tensor, @@ -16559,36 +16236,6 @@ func TensorListSplit(scope *Scope, tensor tf.Output, element_shape tf.Output, le return op.Output(0) } -// Check if the input matches the regex pattern. -// -// The input is a string tensor of any shape. The pattern is the -// regular expression to be matched with every element of the input tensor. -// The boolean values (True or False) of the output tensor indicate -// if the input matches the regex pattern provided. -// -// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax) -// -// Arguments: -// input: A string tensor of the text to be processed. -// pattern: The regular expression to match the input. -// -// Returns A bool tensor with the same shape as `input`. -func StaticRegexFullMatch(scope *Scope, input tf.Output, pattern string) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"pattern": pattern} - opspec := tf.OpSpec{ - Type: "StaticRegexFullMatch", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // ParseSingleSequenceExampleAttr is an optional argument to ParseSingleSequenceExample. type ParseSingleSequenceExampleAttr func(optionalAttr) @@ -16753,6 +16400,36 @@ func ParseSingleSequenceExample(scope *Scope, serialized tf.Output, feature_list return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values } +// Check if the input matches the regex pattern. +// +// The input is a string tensor of any shape. The pattern is the +// regular expression to be matched with every element of the input tensor. +// The boolean values (True or False) of the output tensor indicate +// if the input matches the regex pattern provided. +// +// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax) +// +// Arguments: +// input: A string tensor of the text to be processed. +// pattern: The regular expression to match the input. +// +// Returns A bool tensor with the same shape as `input`. +func StaticRegexFullMatch(scope *Scope, input tf.Output, pattern string) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"pattern": pattern} + opspec := tf.OpSpec{ + Type: "StaticRegexFullMatch", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Computes the number of elements in the given table. // // Arguments: @@ -17277,6 +16954,45 @@ func RequantizationRange(scope *Scope, input tf.Output, input_min tf.Output, inp return op.Output(0), op.Output(1) } +// TPUPartitionedInputAttr is an optional argument to TPUPartitionedInput. +type TPUPartitionedInputAttr func(optionalAttr) + +// TPUPartitionedInputPartitionDim sets the optional partition_dim attribute to value. +// +// value: An integer describles which dimension is partitioned. -1 means +// those inputs are replicated. +// If not specified, defaults to 0 +func TPUPartitionedInputPartitionDim(value int64) TPUPartitionedInputAttr { + return func(m optionalAttr) { + m["partition_dim"] = value + } +} + +// An op that groups a list of partitioned inputs together. This op +// +// Arguments: +// inputs: A list of partitioned inputs which must have the same shape. +// +// Returns A handle which represents the full shape of partitioned tensors. +func TPUPartitionedInput(scope *Scope, inputs []tf.Output, optional ...TPUPartitionedInputAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "TPUPartitionedInput", + Input: []tf.Input{ + tf.OutputList(inputs), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Compare values of `input` to `threshold` and pack resulting bits into a `uint8`. // // Each comparison returns a boolean `true` (if `input_value > threshold`) @@ -19852,6 +19568,28 @@ func ShapeN(scope *Scope, input []tf.Output, optional ...ShapeNAttr) (output []t return output } +// Returns the TopK values in the array in sorted order. This is a combination +// +// of MakeUnique and TopKUnique. The returned top-K will have its lower bits +// replaced by iota, thus it will be close to the original value but not exactly +// the same. The running time is proportional to the product of K and the input +// size. NaNs are never returned. Subnormal numbers are flushed to zero. +func TopKWithUnique(scope *Scope, input tf.Output, k int64) (topk tf.Output, topk_indices tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"k": k} + opspec := tf.OpSpec{ + Type: "TopKWithUnique", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + // ImageSummaryAttr is an optional argument to ImageSummary. type ImageSummaryAttr func(optionalAttr) @@ -20431,6 +20169,137 @@ func Maximum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { return op.Output(0) } +// QuantizedResizeBilinearAttr is an optional argument to QuantizedResizeBilinear. +type QuantizedResizeBilinearAttr func(optionalAttr) + +// QuantizedResizeBilinearAlignCorners sets the optional align_corners attribute to value. +// +// value: If true, the centers of the 4 corner pixels of the input and output tensors are +// aligned, preserving the values at the corner pixels. Defaults to false. +// If not specified, defaults to false +func QuantizedResizeBilinearAlignCorners(value bool) QuantizedResizeBilinearAttr { + return func(m optionalAttr) { + m["align_corners"] = value + } +} + +// QuantizedResizeBilinearHalfPixelCenters sets the optional half_pixel_centers attribute to value. +// If not specified, defaults to false +func QuantizedResizeBilinearHalfPixelCenters(value bool) QuantizedResizeBilinearAttr { + return func(m optionalAttr) { + m["half_pixel_centers"] = value + } +} + +// Resize quantized `images` to `size` using quantized bilinear interpolation. +// +// Input images and output images must be quantized types. +// +// Arguments: +// images: 4-D with shape `[batch, height, width, channels]`. +// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The +// new size for the images. +// +// +// +// Returns: +// resized_images: 4-D with shape +// `[batch, new_height, new_width, channels]`. +// out_min +// out_max +func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min tf.Output, max tf.Output, optional ...QuantizedResizeBilinearAttr) (resized_images tf.Output, out_min tf.Output, out_max tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "QuantizedResizeBilinear", + Input: []tf.Input{ + images, size, min, max, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// RandomGammaAttr is an optional argument to RandomGamma. +type RandomGammaAttr func(optionalAttr) + +// RandomGammaSeed sets the optional seed attribute to value. +// +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomGammaSeed(value int64) RandomGammaAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// RandomGammaSeed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomGammaSeed2(value int64) RandomGammaAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Outputs random values from the Gamma distribution(s) described by alpha. +// +// This op uses the algorithm by Marsaglia et al. to acquire samples via +// transformation-rejection from pairs of uniform and normal random variables. +// See http://dl.acm.org/citation.cfm?id=358414 +// +// Arguments: +// shape: 1-D integer tensor. Shape of independent samples to draw from each +// distribution described by the shape parameters given in alpha. +// alpha: A tensor in which each scalar is a "shape" parameter describing the +// associated gamma distribution. +// +// Returns A tensor with shape `shape + shape(alpha)`. Each slice +// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for +// `alpha[i0, i1, ...iN]`. The dtype of the output matches the dtype of alpha. +func RandomGamma(scope *Scope, shape tf.Output, alpha tf.Output, optional ...RandomGammaAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RandomGamma", + Input: []tf.Input{ + shape, alpha, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns 0 if x == 0, and x * log1p(y) otherwise, elementwise. +func Xlog1py(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Xlog1py", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Returns 0 if x == 0, and x * log(y) otherwise, elementwise. func Xlogy(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { @@ -21602,202 +21471,6 @@ func TanhGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { return op.Output(0) } -// FusedBatchNormAttr is an optional argument to FusedBatchNorm. -type FusedBatchNormAttr func(optionalAttr) - -// FusedBatchNormEpsilon sets the optional epsilon attribute to value. -// -// value: A small float number added to the variance of x. -// If not specified, defaults to 0.0001 -func FusedBatchNormEpsilon(value float32) FusedBatchNormAttr { - return func(m optionalAttr) { - m["epsilon"] = value - } -} - -// FusedBatchNormExponentialAvgFactor sets the optional exponential_avg_factor attribute to value. -// If not specified, defaults to 1 -func FusedBatchNormExponentialAvgFactor(value float32) FusedBatchNormAttr { - return func(m optionalAttr) { - m["exponential_avg_factor"] = value - } -} - -// FusedBatchNormDataFormat sets the optional data_format attribute to value. -// -// value: The data format for x and y. Either "NHWC" (default) or "NCHW". -// If not specified, defaults to "NHWC" -func FusedBatchNormDataFormat(value string) FusedBatchNormAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// FusedBatchNormIsTraining sets the optional is_training attribute to value. -// -// value: A bool value to indicate the operation is for training (default) -// or inference. -// If not specified, defaults to true -func FusedBatchNormIsTraining(value bool) FusedBatchNormAttr { - return func(m optionalAttr) { - m["is_training"] = value - } -} - -// Batch normalization. -// -// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". -// The size of 1D Tensors matches the dimension C of the 4D Tensors. -// -// Arguments: -// x: A 4D Tensor for input data. -// scale: A 1D Tensor for scaling factor, to scale the normalized x. -// offset: A 1D Tensor for offset, to shift to the normalized x. -// mean: A 1D Tensor for population mean. Used for inference only; -// must be empty for training. -// variance: A 1D Tensor for population variance. Used for inference only; -// must be empty for training. -// -// Returns: -// y: A 4D Tensor for output data. -// batch_mean: A 1D Tensor for the computed batch mean, to be used by TensorFlow -// to compute the running mean. -// batch_variance: A 1D Tensor for the computed batch variance, to be used by -// TensorFlow to compute the running variance. -// reserve_space_1: A 1D Tensor for the computed batch mean, to be reused -// in the gradient computation. -// reserve_space_2: A 1D Tensor for the computed batch variance (inverted variance -// in the cuDNN case), to be reused in the gradient computation. -func FusedBatchNorm(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormAttr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FusedBatchNorm", - Input: []tf.Input{ - x, scale, offset, mean, variance, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) -} - -// SparseMatMulAttr is an optional argument to SparseMatMul. -type SparseMatMulAttr func(optionalAttr) - -// SparseMatMulTransposeA sets the optional transpose_a attribute to value. -// If not specified, defaults to false -func SparseMatMulTransposeA(value bool) SparseMatMulAttr { - return func(m optionalAttr) { - m["transpose_a"] = value - } -} - -// SparseMatMulTransposeB sets the optional transpose_b attribute to value. -// If not specified, defaults to false -func SparseMatMulTransposeB(value bool) SparseMatMulAttr { - return func(m optionalAttr) { - m["transpose_b"] = value - } -} - -// SparseMatMulAIsSparse sets the optional a_is_sparse attribute to value. -// If not specified, defaults to false -func SparseMatMulAIsSparse(value bool) SparseMatMulAttr { - return func(m optionalAttr) { - m["a_is_sparse"] = value - } -} - -// SparseMatMulBIsSparse sets the optional b_is_sparse attribute to value. -// If not specified, defaults to false -func SparseMatMulBIsSparse(value bool) SparseMatMulAttr { - return func(m optionalAttr) { - m["b_is_sparse"] = value - } -} - -// Multiply matrix "a" by matrix "b". -// -// The inputs must be two-dimensional matrices and the inner dimension of "a" must -// match the outer dimension of "b". Both "a" and "b" must be `Tensor`s not -// `SparseTensor`s. This op is optimized for the case where at least one of "a" or -// "b" is sparse, in the sense that they have a large proportion of zero values. -// The breakeven for using this versus a dense matrix multiply on one platform was -// 30% zero values in the sparse matrix. -// -// The gradient computation of this operation will only take advantage of sparsity -// in the input gradient when that gradient comes from a Relu. -func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatMulAttr) (product tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SparseMatMul", - Input: []tf.Input{ - a, b, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Resizes the list. -// -// -// input_handle: the input list -// size: size of the output list -// -func TensorListResize(scope *Scope, input_handle tf.Output, size tf.Output) (output_handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorListResize", - Input: []tf.Input{ - input_handle, size, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes inverse hyperbolic tangent of x element-wise. -// -// Given an input tensor, this function computes inverse hyperbolic tangent -// for every element in the tensor. Input range is `[-1,1]` and output range is -// `[-inf, inf]`. If input is `-1`, output will be `-inf` and if the -// input is `1`, output will be `inf`. Values outside the range will have -// `nan` as output. -// -// ```python -// x = tf.constant([-float("inf"), -1, -0.5, 1, 0, 0.5, 10, float("inf")]) -// tf.math.atanh(x) ==> [nan -inf -0.54930615 inf 0. 0.54930615 nan nan] -// ``` -func Atanh(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Atanh", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Computes hyperbolic tangent of `x` element-wise. // // Given an input tensor, this function computes hyperbolic tangent of every @@ -22961,75 +22634,6 @@ func NcclReduce(scope *Scope, input []tf.Output, reduction string) (data tf.Outp return op.Output(0) } -// QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr is an optional argument to QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize. -type QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr func(optionalAttr) - -// QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeOutType sets the optional out_type attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_QUINT8 -func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeOutType(value tf.DataType) QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeDilations sets the optional dilations attribute to value. -// -// value: List of dilation values. -// If not specified, defaults to -func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeDilations(value []int64) QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizePaddingList sets the optional padding_list attribute to value. -// If not specified, defaults to <> -func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizePaddingList(value []int64) QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr { - return func(m optionalAttr) { - m["padding_list"] = value - } -} - -// Computes quantized depthwise Conv2D with Bias, Relu and Requantize. -// -// Arguments: -// input: The original input tensor. -// filter: The original filter tensor. -// bias: The original bias tensor. -// min_input: The float value that the minimum quantized input value represents. -// max_input: The float value that the maximum quantized input value represents. -// min_filter: The float value that the minimum quantized filter value represents. -// max_filter: The float value that the maximum quantized filter value represents. -// min_freezed_output: The minimum float value of the output tensor. -// max_freezed_output: The maximum float value of the output tensor. -// strides: List of stride values. -// -// -// Returns: -// output: The output tensor. -// min_output: The float value that the minimum quantized output value represents. -// max_output: The float value that the maximum quantized output value represents. -func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize(scope *Scope, input tf.Output, filter tf.Output, bias tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, min_freezed_output tf.Output, max_freezed_output tf.Output, strides []int64, padding string, optional ...QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize", - Input: []tf.Input{ - input, filter, bias, min_input, max_input, min_filter, max_filter, min_freezed_output, max_freezed_output, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - // QuantizedDepthwiseConv2DWithBiasAndReluAttr is an optional argument to QuantizedDepthwiseConv2DWithBiasAndRelu. type QuantizedDepthwiseConv2DWithBiasAndReluAttr func(optionalAttr) @@ -23320,131 +22924,6 @@ func QuantizedConv2DPerChannel(scope *Scope, input tf.Output, filter tf.Output, return op.Output(0), op.Output(1), op.Output(2) } -// Concatenates quantized tensors along one dimension. -// -// Arguments: -// concat_dim: 0-D. The dimension along which to concatenate. Must be in the -// range [0, rank(values)). -// values: The `N` Tensors to concatenate. Their ranks and types must match, -// and their sizes must match in all dimensions except `concat_dim`. -// input_mins: The minimum scalar values for each of the input tensors. -// input_maxes: The maximum scalar values for each of the input tensors. -// -// Returns: -// output: A `Tensor` with the concatenation of values stacked along the -// `concat_dim` dimension. This tensor's shape matches that of `values` except -// in `concat_dim` where it has the sum of the sizes. -// output_min: The float value that the minimum quantized output value represents. -// output_max: The float value that the maximum quantized output value represents. -func QuantizedConcat(scope *Scope, concat_dim tf.Output, values []tf.Output, input_mins []tf.Output, input_maxes []tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "QuantizedConcat", - Input: []tf.Input{ - concat_dim, tf.OutputList(values), tf.OutputList(input_mins), tf.OutputList(input_maxes), - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Returns the batched diagonal part of a batched tensor. -// -// Returns a tensor with the `k[0]`-th to `k[1]`-th diagonals of the batched -// `input`. -// -// Assume `input` has `r` dimensions `[I, J, ..., L, M, N]`. -// Let `max_diag_len` be the maximum length among all diagonals to be extracted, -// `max_diag_len = min(M + min(k[1], 0), N + min(-k[0], 0))` -// Let `num_diags` be the number of diagonals to extract, -// `num_diags = k[1] - k[0] + 1`. -// -// If `num_diags == 1`, the output tensor is of rank `r - 1` with shape -// `[I, J, ..., L, max_diag_len]` and values: -// -// ``` -// diagonal[i, j, ..., l, n] -// = input[i, j, ..., l, n+y, n+x] ; if 0 <= n+y < M and 0 <= n+x < N, -// padding_value ; otherwise. -// ``` -// where `y = max(-k[1], 0)`, `x = max(k[1], 0)`. -// -// Otherwise, the output tensor has rank `r` with dimensions -// `[I, J, ..., L, num_diags, max_diag_len]` with values: -// -// ``` -// diagonal[i, j, ..., l, m, n] -// = input[i, j, ..., l, n+y, n+x] ; if 0 <= n+y < M and 0 <= n+x < N, -// padding_value ; otherwise. -// ``` -// where `d = k[1] - m`, `y = max(-d, 0)`, and `x = max(d, 0)`. -// -// The input must be at least a matrix. -// -// For example: -// -// ``` -// input = np.array([[[1, 2, 3, 4], # Input shape: (2, 3, 4) -// [5, 6, 7, 8], -// [9, 8, 7, 6]], -// [[5, 4, 3, 2], -// [1, 2, 3, 4], -// [5, 6, 7, 8]]]) -// -// # A main diagonal from each batch. -// tf.matrix_diag_part(input) ==> [[1, 6, 7], # Output shape: (2, 3) -// [5, 2, 7]] -// -// # A superdiagonal from each batch. -// tf.matrix_diag_part(input, k = 1) -// ==> [[2, 7, 6], # Output shape: (2, 3) -// [4, 3, 8]] -// -// # A tridiagonal band from each batch. -// tf.matrix_diag_part(input, k = (-1, 1)) -// ==> [[[2, 7, 6], # Output shape: (2, 3, 3) -// [1, 6, 7], -// [5, 8, 0]], -// [[4, 3, 8], -// [5, 2, 7], -// [1, 6, 0]]] -// -// # Padding value = 9 -// tf.matrix_diag_part(input, k = (1, 3), padding_value = 9) -// ==> [[[4, 9, 9], # Output shape: (2, 3, 3) -// [3, 8, 9], -// [2, 7, 6]], -// [[2, 9, 9], -// [3, 4, 9], -// [4, 3, 8]]] -// ``` -// -// Arguments: -// input: Rank `r` tensor where `r >= 2`. -// k: Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main -// diagonal, and negative value means subdiagonals. `k` can be a single integer -// (for a single diagonal) or a pair of integers specifying the low and high ends -// of a matrix band. `k[0]` must not be larger than `k[1]`. -// padding_value: The value to fill the area outside the specified diagonal band with. -// Default is 0. -// -// Returns The extracted diagonal(s). -func MatrixDiagPartV2(scope *Scope, input tf.Output, k tf.Output, padding_value tf.Output) (diagonal tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MatrixDiagPartV2", - Input: []tf.Input{ - input, k, padding_value, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // A container for a multi device iterator resource. // // Returns: @@ -25019,6 +24498,176 @@ func SoftsignGrad(scope *Scope, gradients tf.Output, features tf.Output) (backpr return op.Output(0) } +// Resizes the list. +// +// +// input_handle: the input list +// size: size of the output list +// +func TensorListResize(scope *Scope, input_handle tf.Output, size tf.Output) (output_handle tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TensorListResize", + Input: []tf.Input{ + input_handle, size, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// FusedBatchNormAttr is an optional argument to FusedBatchNorm. +type FusedBatchNormAttr func(optionalAttr) + +// FusedBatchNormEpsilon sets the optional epsilon attribute to value. +// +// value: A small float number added to the variance of x. +// If not specified, defaults to 0.0001 +func FusedBatchNormEpsilon(value float32) FusedBatchNormAttr { + return func(m optionalAttr) { + m["epsilon"] = value + } +} + +// FusedBatchNormExponentialAvgFactor sets the optional exponential_avg_factor attribute to value. +// If not specified, defaults to 1 +func FusedBatchNormExponentialAvgFactor(value float32) FusedBatchNormAttr { + return func(m optionalAttr) { + m["exponential_avg_factor"] = value + } +} + +// FusedBatchNormDataFormat sets the optional data_format attribute to value. +// +// value: The data format for x and y. Either "NHWC" (default) or "NCHW". +// If not specified, defaults to "NHWC" +func FusedBatchNormDataFormat(value string) FusedBatchNormAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// FusedBatchNormIsTraining sets the optional is_training attribute to value. +// +// value: A bool value to indicate the operation is for training (default) +// or inference. +// If not specified, defaults to true +func FusedBatchNormIsTraining(value bool) FusedBatchNormAttr { + return func(m optionalAttr) { + m["is_training"] = value + } +} + +// Batch normalization. +// +// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". +// The size of 1D Tensors matches the dimension C of the 4D Tensors. +// +// Arguments: +// x: A 4D Tensor for input data. +// scale: A 1D Tensor for scaling factor, to scale the normalized x. +// offset: A 1D Tensor for offset, to shift to the normalized x. +// mean: A 1D Tensor for population mean. Used for inference only; +// must be empty for training. +// variance: A 1D Tensor for population variance. Used for inference only; +// must be empty for training. +// +// Returns: +// y: A 4D Tensor for output data. +// batch_mean: A 1D Tensor for the computed batch mean, to be used by TensorFlow +// to compute the running mean. +// batch_variance: A 1D Tensor for the computed batch variance, to be used by +// TensorFlow to compute the running variance. +// reserve_space_1: A 1D Tensor for the computed batch mean, to be reused +// in the gradient computation. +// reserve_space_2: A 1D Tensor for the computed batch variance (inverted variance +// in the cuDNN case), to be reused in the gradient computation. +func FusedBatchNorm(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormAttr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "FusedBatchNorm", + Input: []tf.Input{ + x, scale, offset, mean, variance, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) +} + +// SparseMatMulAttr is an optional argument to SparseMatMul. +type SparseMatMulAttr func(optionalAttr) + +// SparseMatMulTransposeA sets the optional transpose_a attribute to value. +// If not specified, defaults to false +func SparseMatMulTransposeA(value bool) SparseMatMulAttr { + return func(m optionalAttr) { + m["transpose_a"] = value + } +} + +// SparseMatMulTransposeB sets the optional transpose_b attribute to value. +// If not specified, defaults to false +func SparseMatMulTransposeB(value bool) SparseMatMulAttr { + return func(m optionalAttr) { + m["transpose_b"] = value + } +} + +// SparseMatMulAIsSparse sets the optional a_is_sparse attribute to value. +// If not specified, defaults to false +func SparseMatMulAIsSparse(value bool) SparseMatMulAttr { + return func(m optionalAttr) { + m["a_is_sparse"] = value + } +} + +// SparseMatMulBIsSparse sets the optional b_is_sparse attribute to value. +// If not specified, defaults to false +func SparseMatMulBIsSparse(value bool) SparseMatMulAttr { + return func(m optionalAttr) { + m["b_is_sparse"] = value + } +} + +// Multiply matrix "a" by matrix "b". +// +// The inputs must be two-dimensional matrices and the inner dimension of "a" must +// match the outer dimension of "b". Both "a" and "b" must be `Tensor`s not +// `SparseTensor`s. This op is optimized for the case where at least one of "a" or +// "b" is sparse, in the sense that they have a large proportion of zero values. +// The breakeven for using this versus a dense matrix multiply on one platform was +// 30% zero values in the sparse matrix. +// +// The gradient computation of this operation will only take advantage of sparsity +// in the input gradient when that gradient comes from a Relu. +func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatMulAttr) (product tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "SparseMatMul", + Input: []tf.Input{ + a, b, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Computes the LSTM cell backward propagation for 1 timestep. // // This implementation is to be used in conjunction of LSTMBlockCell. @@ -25842,6 +25491,38 @@ func TensorArrayCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) { return scope.AddOperation(opspec) } +// Computes the Kth order statistic of a data set. The current +// +// implementation uses a binary search requiring exactly 32 passes over +// the input data. The running time is linear with respect to input +// size. The median-of-medians algorithm is probably faster, but is +// difficult to implement efficiently in XLA. The implementation imposes +// a total ordering on floats. The ordering is consistent with the usual +// partial order. Positive NaNs are greater than positive +// infinity. Negative NaNs are less than negative infinity. NaNs with +// distinct payloads are treated as distinct. Subnormal numbers are +// preserved (not flushed to zero). Positive infinity is greater than all +// numbers. Negative infinity is less than all numbers. Positive is +// greater than negative zero. There are less than k values greater than +// the kth order statistic. There are at least k values greater than or +// equal to the Kth order statistic. The semantics are not the same as +// top_k_unique. +func KthOrderStatistic(scope *Scope, input tf.Output, k int64) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"k": k} + opspec := tf.OpSpec{ + Type: "KthOrderStatistic", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // AvgPool3DGradAttr is an optional argument to AvgPool3DGrad. type AvgPool3DGradAttr func(optionalAttr) @@ -25998,114 +25679,6 @@ func Conv3D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, pa return op.Output(0) } -// UniqueWithCountsAttr is an optional argument to UniqueWithCounts. -type UniqueWithCountsAttr func(optionalAttr) - -// UniqueWithCountsOutIdx sets the optional out_idx attribute to value. -// If not specified, defaults to DT_INT32 -func UniqueWithCountsOutIdx(value tf.DataType) UniqueWithCountsAttr { - return func(m optionalAttr) { - m["out_idx"] = value - } -} - -// Finds unique elements in a 1-D tensor. -// -// This operation returns a tensor `y` containing all of the unique elements of `x` -// sorted in the same order that they occur in `x`. This operation also returns a -// tensor `idx` the same size as `x` that contains the index of each value of `x` -// in the unique output `y`. Finally, it returns a third tensor `count` that -// contains the count of each element of `y` in `x`. In other words: -// -// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` -// -// For example: -// -// ``` -// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] -// y, idx, count = unique_with_counts(x) -// y ==> [1, 2, 4, 7, 8] -// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] -// count ==> [2, 1, 3, 1, 2] -// ``` -// -// Arguments: -// x: 1-D. -// -// Returns: -// y: 1-D. -// idx: 1-D. -// count: 1-D. -func UniqueWithCounts(scope *Scope, x tf.Output, optional ...UniqueWithCountsAttr) (y tf.Output, idx tf.Output, count tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "UniqueWithCounts", - Input: []tf.Input{ - x, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// ResizeBicubicGradAttr is an optional argument to ResizeBicubicGrad. -type ResizeBicubicGradAttr func(optionalAttr) - -// ResizeBicubicGradAlignCorners sets the optional align_corners attribute to value. -// -// value: If true, the centers of the 4 corner pixels of the input and grad tensors are -// aligned. Defaults to false. -// If not specified, defaults to false -func ResizeBicubicGradAlignCorners(value bool) ResizeBicubicGradAttr { - return func(m optionalAttr) { - m["align_corners"] = value - } -} - -// ResizeBicubicGradHalfPixelCenters sets the optional half_pixel_centers attribute to value. -// If not specified, defaults to false -func ResizeBicubicGradHalfPixelCenters(value bool) ResizeBicubicGradAttr { - return func(m optionalAttr) { - m["half_pixel_centers"] = value - } -} - -// Computes the gradient of bicubic interpolation. -// -// Arguments: -// grads: 4-D with shape `[batch, height, width, channels]`. -// original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`, -// The image tensor that was resized. -// -// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. -// Gradients with respect to the input image. Input image must have been -// float or double. -func ResizeBicubicGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBicubicGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResizeBicubicGrad", - Input: []tf.Input{ - grads, original_image, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Returns a list which has the passed-in `Tensor` as last element and the other elements of the given list in `input_handle`. // // tensor: The tensor to put on the list. @@ -27122,91 +26695,107 @@ func NoOp(scope *Scope) (o *tf.Operation) { return scope.AddOperation(opspec) } -// ResourceSparseApplyRMSPropAttr is an optional argument to ResourceSparseApplyRMSProp. -type ResourceSparseApplyRMSPropAttr func(optionalAttr) - -// ResourceSparseApplyRMSPropUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var, ms, and mom tensors is protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyRMSPropUseLocking(value bool) ResourceSparseApplyRMSPropAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' according to the RMSProp algorithm. -// -// Note that in dense implementation of this algorithm, ms and mom will -// update even if the grad is zero, but in this sparse implementation, ms -// and mom will not update in iterations during which the grad is zero. -// -// mean_square = decay * mean_square + (1-decay) * gradient ** 2 -// Delta = learning_rate * gradient / sqrt(mean_square + epsilon) -// -// ms <- rho * ms_{t-1} + (1-rho) * grad * grad -// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) -// var <- var - mom -// -// Arguments: -// var_: Should be from a Variable(). -// ms: Should be from a Variable(). -// mom: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// rho: Decay rate. Must be a scalar. -// -// epsilon: Ridge term. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var, ms and mom. +// Set a summary_writer_interface to record statistics using given stats_aggregator. // // Returns the created operation. -func ResourceSparseApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyRMSPropAttr) (o *tf.Operation) { +func StatsAggregatorSetSummaryWriter(scope *Scope, stats_aggregator tf.Output, summary tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ResourceSparseApplyRMSProp", + Type: "StatsAggregatorSetSummaryWriter", Input: []tf.Input{ - var_, ms, mom, lr, rho, momentum, epsilon, grad, indices, + stats_aggregator, summary, }, - Attrs: attrs, } return scope.AddOperation(opspec) } -// StringToNumberAttr is an optional argument to StringToNumber. -type StringToNumberAttr func(optionalAttr) - -// StringToNumberOutType sets the optional out_type attribute to value. +// Subtracts a value from the current value of a variable. // -// value: The numeric type to interpret each string in `string_tensor` as. -// If not specified, defaults to DT_FLOAT -func StringToNumberOutType(value tf.DataType) StringToNumberAttr { +// Any ReadVariableOp with a control dependency on this op is guaranteed to +// see the decremented value or a subsequent newer one. +// +// Arguments: +// resource: handle to the resource in which to store the variable. +// value: the value by which the variable will be incremented. +// +// Returns the created operation. +func AssignSubVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "AssignSubVariableOp", + Input: []tf.Input{ + resource, value, + }, + } + return scope.AddOperation(opspec) +} + +// FusedBatchNormGradAttr is an optional argument to FusedBatchNormGrad. +type FusedBatchNormGradAttr func(optionalAttr) + +// FusedBatchNormGradEpsilon sets the optional epsilon attribute to value. +// +// value: A small float number added to the variance of x. +// If not specified, defaults to 0.0001 +func FusedBatchNormGradEpsilon(value float32) FusedBatchNormGradAttr { return func(m optionalAttr) { - m["out_type"] = value + m["epsilon"] = value } } -// Converts each string in the input Tensor to the specified numeric type. +// FusedBatchNormGradDataFormat sets the optional data_format attribute to value. // -// (Note that int32 overflow results in an error while float overflow -// results in a rounded value.) +// value: The data format for y_backprop, x, x_backprop. +// Either "NHWC" (default) or "NCHW". +// If not specified, defaults to "NHWC" +func FusedBatchNormGradDataFormat(value string) FusedBatchNormGradAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// FusedBatchNormGradIsTraining sets the optional is_training attribute to value. // -// Example: +// value: A bool value to indicate the operation is for training (default) +// or inference. +// If not specified, defaults to true +func FusedBatchNormGradIsTraining(value bool) FusedBatchNormGradAttr { + return func(m optionalAttr) { + m["is_training"] = value + } +} + +// Gradient for batch normalization. // -// >>> strings = ["5.0", "3.0", "7.0"] -// >>> tf.strings.to_number(strings) -// +// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". +// The size of 1D Tensors matches the dimension C of the 4D Tensors. // +// Arguments: +// y_backprop: A 4D Tensor for the gradient with respect to y. +// x: A 4D Tensor for input data. +// scale: A 1D Tensor for scaling factor, to scale the normalized x. +// reserve_space_1: When is_training is True, a 1D Tensor for the computed batch +// mean to be reused in gradient computation. When is_training is +// False, a 1D Tensor for the population mean to be reused in both +// 1st and 2nd order gradient computation. +// reserve_space_2: When is_training is True, a 1D Tensor for the computed batch +// variance (inverted variance in the cuDNN case) to be reused in +// gradient computation. When is_training is False, a 1D Tensor +// for the population variance to be reused in both 1st and 2nd +// order gradient computation. // -// Returns A Tensor of the same shape as the input `string_tensor`. -func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToNumberAttr) (output tf.Output) { +// Returns: +// x_backprop: A 4D Tensor for the gradient with respect to x. +// scale_backprop: A 1D Tensor for the gradient with respect to scale. +// offset_backprop: A 1D Tensor for the gradient with respect to offset. +// reserve_space_3: Unused placeholder to match the mean input in FusedBatchNorm. +// reserve_space_4: Unused placeholder to match the variance input +// in FusedBatchNorm. +func FusedBatchNormGrad(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradAttr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) { if scope.Err() != nil { return } @@ -27215,30 +26804,124 @@ func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToN a(attrs) } opspec := tf.OpSpec{ - Type: "StringToNumber", + Type: "FusedBatchNormGrad", Input: []tf.Input{ - string_tensor, + y_backprop, x, scale, reserve_space_1, reserve_space_2, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) } -// Transforms a Tensor into a serialized TensorProto proto. +// DecodeCSVAttr is an optional argument to DecodeCSV. +type DecodeCSVAttr func(optionalAttr) + +// DecodeCSVFieldDelim sets the optional field_delim attribute to value. +// +// value: char delimiter to separate fields in a record. +// If not specified, defaults to "," +func DecodeCSVFieldDelim(value string) DecodeCSVAttr { + return func(m optionalAttr) { + m["field_delim"] = value + } +} + +// DecodeCSVUseQuoteDelim sets the optional use_quote_delim attribute to value. +// +// value: If false, treats double quotation marks as regular +// characters inside of the string fields (ignoring RFC 4180, Section 2, +// Bullet 5). +// If not specified, defaults to true +func DecodeCSVUseQuoteDelim(value bool) DecodeCSVAttr { + return func(m optionalAttr) { + m["use_quote_delim"] = value + } +} + +// DecodeCSVNaValue sets the optional na_value attribute to value. +// +// value: Additional string to recognize as NA/NaN. +// If not specified, defaults to "" +func DecodeCSVNaValue(value string) DecodeCSVAttr { + return func(m optionalAttr) { + m["na_value"] = value + } +} + +// DecodeCSVSelectCols sets the optional select_cols attribute to value. +// If not specified, defaults to <> +func DecodeCSVSelectCols(value []int64) DecodeCSVAttr { + return func(m optionalAttr) { + m["select_cols"] = value + } +} + +// Convert CSV records to tensors. Each column maps to one tensor. +// +// RFC 4180 format is expected for the CSV records. +// (https://tools.ietf.org/html/rfc4180) +// Note that we allow leading and trailing spaces with int or float field. // // Arguments: -// tensor: A Tensor of type `T`. +// records: Each string is a record/row in the csv and all records should have +// the same format. +// record_defaults: One tensor per column of the input record, with either a +// scalar default value for that column or an empty vector if the column is +// required. // -// Returns A serialized TensorProto proto of the input tensor. -func SerializeTensor(scope *Scope, tensor tf.Output) (serialized tf.Output) { +// Returns Each tensor will have the same shape as records. +func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, optional ...DecodeCSVAttr) (output []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "DecodeCSV", + Input: []tf.Input{ + records, tf.OutputList(record_defaults), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if output, idx, err = makeOutputList(op, idx, "output"); err != nil { + scope.UpdateErr("DecodeCSV", err) + return + } + return output +} + +// Convert JSON-encoded Example records to binary protocol buffer strings. +// +// This op translates a tensor containing Example records, encoded using +// the [standard JSON +// mapping](https://developers.google.com/protocol-buffers/docs/proto3#json), +// into a tensor containing the same records encoded as binary protocol +// buffers. The resulting tensor can then be fed to any of the other +// Example-parsing ops. +// +// Arguments: +// json_examples: Each string is a JSON object serialized according to the JSON +// mapping of the Example proto. +// +// Returns Each string is a binary Example protocol buffer corresponding +// to the respective element of `json_examples`. +func DecodeJSONExample(scope *Scope, json_examples tf.Output) (binary_examples tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SerializeTensor", + Type: "DecodeJSONExample", Input: []tf.Input{ - tensor, + json_examples, }, } op := scope.AddOperation(opspec) @@ -27269,204 +26952,50 @@ func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (outp return op.Output(0) } -// Returns element-wise integer closest to x. -// -// If the result is midway between two representable values, -// the even representable is chosen. -// For example: -// -// ``` -// rint(-1.5) ==> -2.0 -// rint(0.5000001) ==> 1.0 -// rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.] -// ``` -func Rint(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Rint", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} +// ParseSequenceExampleAttr is an optional argument to ParseSequenceExample. +type ParseSequenceExampleAttr func(optionalAttr) -// Reverses specific dimensions of a tensor. -// -// Given a `tensor`, and a `bool` tensor `dims` representing the dimensions -// of `tensor`, this operation reverses each dimension i of `tensor` where -// `dims[i]` is `True`. -// -// `tensor` can have up to 8 dimensions. The number of dimensions -// of `tensor` must equal the number of elements in `dims`. In other words: -// -// `rank(tensor) = size(dims)` -// -// For example: -// -// ``` -// # tensor 't' is [[[[ 0, 1, 2, 3], -// # [ 4, 5, 6, 7], -// # [ 8, 9, 10, 11]], -// # [[12, 13, 14, 15], -// # [16, 17, 18, 19], -// # [20, 21, 22, 23]]]] -// # tensor 't' shape is [1, 2, 3, 4] -// -// # 'dims' is [False, False, False, True] -// reverse(t, dims) ==> [[[[ 3, 2, 1, 0], -// [ 7, 6, 5, 4], -// [ 11, 10, 9, 8]], -// [[15, 14, 13, 12], -// [19, 18, 17, 16], -// [23, 22, 21, 20]]]] -// -// # 'dims' is [False, True, False, False] -// reverse(t, dims) ==> [[[[12, 13, 14, 15], -// [16, 17, 18, 19], -// [20, 21, 22, 23] -// [[ 0, 1, 2, 3], -// [ 4, 5, 6, 7], -// [ 8, 9, 10, 11]]]] -// -// # 'dims' is [False, False, True, False] -// reverse(t, dims) ==> [[[[8, 9, 10, 11], -// [4, 5, 6, 7], -// [0, 1, 2, 3]] -// [[20, 21, 22, 23], -// [16, 17, 18, 19], -// [12, 13, 14, 15]]]] -// ``` -// -// Arguments: -// tensor: Up to 8-D. -// dims: 1-D. The dimensions to reverse. -// -// Returns The same shape as `tensor`. -func Reverse(scope *Scope, tensor tf.Output, dims tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Reverse", - Input: []tf.Input{ - tensor, dims, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Wraps an arbitrary MLIR computation expressed as a module with a main() function. -// -// This operation does not have an associated kernel and is not intended to be -// executed in a regular TensorFlow session. Instead it is intended to be used for -// testing or for special case where a user intends to pass custom MLIR computation -// through a TensorFlow graph with the intent of having custom tooling processing -// it downstream (when targeting a different environment, like TensorFlow lite for -// example). -// The MLIR module is expected to have a main() function that will be used as an -// entry point. The inputs to the operations will be passed as argument to the -// main() function and the returned values of the main function mapped to the -// outputs. -// Example usage: -// -// ``` -// import tensorflow as tf -// from tensorflow.compiler.mlir.tensorflow.gen_mlir_passthrough_op import mlir_passthrough_op -// -// mlir_module = '''python -// func @main(%arg0 : tensor<10xf32>, %arg1 : tensor<10xf32>) -> tensor<10x10xf32> { -// %add = "magic.op"(%arg0, %arg1) : (tensor<10xf32>, tensor<10xf32>) -> tensor<10x10xf32> -// return %ret : tensor<10x10xf32> -// } -// ''' -// -// @tf.function -// def foo(x, y): -// return mlir_passthrough_op([x, y], mlir_module, Toutputs=[tf.float32]) -// -// graph_def = foo.get_concrete_function(tf.TensorSpec([10], tf.float32), tf.TensorSpec([10], tf.float32)).graph.as_graph_def() -// ``` -func MlirPassthroughOp(scope *Scope, inputs []tf.Output, mlir_module string, Toutputs []tf.DataType) (outputs []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"mlir_module": mlir_module, "Toutputs": Toutputs} - opspec := tf.OpSpec{ - Type: "MlirPassthroughOp", - Input: []tf.Input{ - tf.OutputList(inputs), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { - scope.UpdateErr("MlirPassthroughOp", err) - return - } - return outputs -} - -// StringLowerAttr is an optional argument to StringLower. -type StringLowerAttr func(optionalAttr) - -// StringLowerEncoding sets the optional encoding attribute to value. -// If not specified, defaults to "" -func StringLowerEncoding(value string) StringLowerAttr { - return func(m optionalAttr) { - m["encoding"] = value - } -} - -// Converts all uppercase characters into their respective lowercase replacements. -// -// Example: -// -// >>> tf.strings.lower("CamelCase string and ALL CAPS") -// -// -func StringLower(scope *Scope, input tf.Output, optional ...StringLowerAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StringLower", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ParseSequenceExampleV2Attr is an optional argument to ParseSequenceExampleV2. -type ParseSequenceExampleV2Attr func(optionalAttr) - -// ParseSequenceExampleV2NcontextSparse sets the optional Ncontext_sparse attribute to value. +// ParseSequenceExampleNcontextSparse sets the optional Ncontext_sparse attribute to value. // If not specified, defaults to 0 // // REQUIRES: value >= 0 -func ParseSequenceExampleV2NcontextSparse(value int64) ParseSequenceExampleV2Attr { +func ParseSequenceExampleNcontextSparse(value int64) ParseSequenceExampleAttr { return func(m optionalAttr) { m["Ncontext_sparse"] = value } } -// ParseSequenceExampleV2ContextSparseTypes sets the optional context_sparse_types attribute to value. +// ParseSequenceExampleNcontextDense sets the optional Ncontext_dense attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func ParseSequenceExampleNcontextDense(value int64) ParseSequenceExampleAttr { + return func(m optionalAttr) { + m["Ncontext_dense"] = value + } +} + +// ParseSequenceExampleNfeatureListSparse sets the optional Nfeature_list_sparse attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func ParseSequenceExampleNfeatureListSparse(value int64) ParseSequenceExampleAttr { + return func(m optionalAttr) { + m["Nfeature_list_sparse"] = value + } +} + +// ParseSequenceExampleNfeatureListDense sets the optional Nfeature_list_dense attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func ParseSequenceExampleNfeatureListDense(value int64) ParseSequenceExampleAttr { + return func(m optionalAttr) { + m["Nfeature_list_dense"] = value + } +} + +// ParseSequenceExampleContextSparseTypes sets the optional context_sparse_types attribute to value. // // value: A list of Ncontext_sparse types; the data types of data in // each context Feature given in context_sparse_keys. @@ -27475,37 +27004,23 @@ func ParseSequenceExampleV2NcontextSparse(value int64) ParseSequenceExampleV2Att // If not specified, defaults to <> // // REQUIRES: len(value) >= 0 -func ParseSequenceExampleV2ContextSparseTypes(value []tf.DataType) ParseSequenceExampleV2Attr { +func ParseSequenceExampleContextSparseTypes(value []tf.DataType) ParseSequenceExampleAttr { return func(m optionalAttr) { m["context_sparse_types"] = value } } -// ParseSequenceExampleV2ContextRaggedValueTypes sets the optional context_ragged_value_types attribute to value. -// -// value: RaggedTensor.value dtypes for the ragged context features. +// ParseSequenceExampleFeatureListDenseTypes sets the optional feature_list_dense_types attribute to value. // If not specified, defaults to <> // // REQUIRES: len(value) >= 0 -func ParseSequenceExampleV2ContextRaggedValueTypes(value []tf.DataType) ParseSequenceExampleV2Attr { +func ParseSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseSequenceExampleAttr { return func(m optionalAttr) { - m["context_ragged_value_types"] = value + m["feature_list_dense_types"] = value } } -// ParseSequenceExampleV2ContextRaggedSplitTypes sets the optional context_ragged_split_types attribute to value. -// -// value: RaggedTensor.row_split dtypes for the ragged context features. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleV2ContextRaggedSplitTypes(value []tf.DataType) ParseSequenceExampleV2Attr { - return func(m optionalAttr) { - m["context_ragged_split_types"] = value - } -} - -// ParseSequenceExampleV2ContextDenseShapes sets the optional context_dense_shapes attribute to value. +// ParseSequenceExampleContextDenseShapes sets the optional context_dense_shapes attribute to value. // // value: A list of Ncontext_dense shapes; the shapes of data in // each context Feature given in context_dense_keys. @@ -27515,43 +27030,13 @@ func ParseSequenceExampleV2ContextRaggedSplitTypes(value []tf.DataType) ParseSeq // If not specified, defaults to <> // // REQUIRES: len(value) >= 0 -func ParseSequenceExampleV2ContextDenseShapes(value []tf.Shape) ParseSequenceExampleV2Attr { +func ParseSequenceExampleContextDenseShapes(value []tf.Shape) ParseSequenceExampleAttr { return func(m optionalAttr) { m["context_dense_shapes"] = value } } -// ParseSequenceExampleV2NfeatureListSparse sets the optional Nfeature_list_sparse attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func ParseSequenceExampleV2NfeatureListSparse(value int64) ParseSequenceExampleV2Attr { - return func(m optionalAttr) { - m["Nfeature_list_sparse"] = value - } -} - -// ParseSequenceExampleV2NfeatureListDense sets the optional Nfeature_list_dense attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func ParseSequenceExampleV2NfeatureListDense(value int64) ParseSequenceExampleV2Attr { - return func(m optionalAttr) { - m["Nfeature_list_dense"] = value - } -} - -// ParseSequenceExampleV2FeatureListDenseTypes sets the optional feature_list_dense_types attribute to value. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleV2FeatureListDenseTypes(value []tf.DataType) ParseSequenceExampleV2Attr { - return func(m optionalAttr) { - m["feature_list_dense_types"] = value - } -} - -// ParseSequenceExampleV2FeatureListSparseTypes sets the optional feature_list_sparse_types attribute to value. +// ParseSequenceExampleFeatureListSparseTypes sets the optional feature_list_sparse_types attribute to value. // // value: A list of Nfeature_list_sparse types; the data types // of data in each FeatureList given in feature_list_sparse_keys. @@ -27560,37 +27045,13 @@ func ParseSequenceExampleV2FeatureListDenseTypes(value []tf.DataType) ParseSeque // If not specified, defaults to <> // // REQUIRES: len(value) >= 0 -func ParseSequenceExampleV2FeatureListSparseTypes(value []tf.DataType) ParseSequenceExampleV2Attr { +func ParseSequenceExampleFeatureListSparseTypes(value []tf.DataType) ParseSequenceExampleAttr { return func(m optionalAttr) { m["feature_list_sparse_types"] = value } } -// ParseSequenceExampleV2FeatureListRaggedValueTypes sets the optional feature_list_ragged_value_types attribute to value. -// -// value: RaggedTensor.value dtypes for the ragged FeatureList features. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleV2FeatureListRaggedValueTypes(value []tf.DataType) ParseSequenceExampleV2Attr { - return func(m optionalAttr) { - m["feature_list_ragged_value_types"] = value - } -} - -// ParseSequenceExampleV2FeatureListRaggedSplitTypes sets the optional feature_list_ragged_split_types attribute to value. -// -// value: RaggedTensor.row_split dtypes for the ragged FeatureList features. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleV2FeatureListRaggedSplitTypes(value []tf.DataType) ParseSequenceExampleV2Attr { - return func(m optionalAttr) { - m["feature_list_ragged_split_types"] = value - } -} - -// ParseSequenceExampleV2FeatureListDenseShapes sets the optional feature_list_dense_shapes attribute to value. +// ParseSequenceExampleFeatureListDenseShapes sets the optional feature_list_dense_shapes attribute to value. // // value: A list of Nfeature_list_dense shapes; the shapes of // data in each FeatureList given in feature_list_dense_keys. @@ -27600,35 +27061,21 @@ func ParseSequenceExampleV2FeatureListRaggedSplitTypes(value []tf.DataType) Pars // If not specified, defaults to <> // // REQUIRES: len(value) >= 0 -func ParseSequenceExampleV2FeatureListDenseShapes(value []tf.Shape) ParseSequenceExampleV2Attr { +func ParseSequenceExampleFeatureListDenseShapes(value []tf.Shape) ParseSequenceExampleAttr { return func(m optionalAttr) { m["feature_list_dense_shapes"] = value } } -// Transforms a vector of tf.io.SequenceExample protos (as strings) into -// typed tensors. +// Transforms a vector of brain.SequenceExample protos (as strings) into typed tensors. // // Arguments: -// serialized: A scalar or vector containing binary serialized SequenceExample protos. -// debug_name: A scalar or vector containing the names of the serialized protos. +// serialized: A vector containing binary serialized SequenceExample protos. +// debug_name: A vector containing the names of the serialized protos. // May contain, for example, table key (descriptive) name for the // corresponding serialized proto. This is purely useful for debugging // purposes, and the presence of values here has no effect on the output. // May also be an empty vector if no name is available. -// context_sparse_keys: The keys expected in the Examples' features associated with context_sparse -// values. -// context_dense_keys: The keys expected in the SequenceExamples' context features associated with -// dense values. -// context_ragged_keys: The keys expected in the Examples' features associated with context_ragged -// values. -// feature_list_sparse_keys: The keys expected in the FeatureLists associated with sparse values. -// feature_list_dense_keys: The keys expected in the SequenceExamples' feature_lists associated -// with lists of dense values. -// feature_list_ragged_keys: The keys expected in the FeatureLists associated with ragged values. -// feature_list_dense_missing_assumed_empty: A vector corresponding 1:1 with feature_list_dense_keys, indicating which -// features may be missing from the SequenceExamples. If the associated -// FeatureList is missing, it is treated as empty. // context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty). // context_dense_defaults[j] provides default values // when the SequenceExample's context map lacks context_dense_key[j]. @@ -27637,18 +27084,34 @@ func ParseSequenceExampleV2FeatureListDenseShapes(value []tf.Shape) ParseSequenc // The input type is inferred from context_dense_defaults[j], even when it's // empty. If context_dense_defaults[j] is not empty, its shape must match // context_dense_shapes[j]. -func ParseSequenceExampleV2(scope *Scope, serialized tf.Output, debug_name tf.Output, context_sparse_keys tf.Output, context_dense_keys tf.Output, context_ragged_keys tf.Output, feature_list_sparse_keys tf.Output, feature_list_dense_keys tf.Output, feature_list_ragged_keys tf.Output, feature_list_dense_missing_assumed_empty tf.Output, context_dense_defaults []tf.Output, optional ...ParseSequenceExampleV2Attr) (context_sparse_indices []tf.Output, context_sparse_values []tf.Output, context_sparse_shapes []tf.Output, context_dense_values []tf.Output, context_ragged_values []tf.Output, context_ragged_row_splits []tf.Output, feature_list_sparse_indices []tf.Output, feature_list_sparse_values []tf.Output, feature_list_sparse_shapes []tf.Output, feature_list_dense_values []tf.Output, feature_list_dense_lengths []tf.Output, feature_list_ragged_values []tf.Output, feature_list_ragged_outer_splits []tf.Output, feature_list_ragged_inner_splits []tf.Output) { +// feature_list_dense_missing_assumed_empty: A vector listing the +// FeatureList keys which may be missing from the SequenceExamples. If the +// associated FeatureList is missing, it is treated as empty. By default, +// any FeatureList not listed in this vector must exist in the SequenceExamples. +// context_sparse_keys: A list of Ncontext_sparse string Tensors (scalars). +// The keys expected in the Examples' features associated with context_sparse +// values. +// context_dense_keys: A list of Ncontext_dense string Tensors (scalars). +// The keys expected in the SequenceExamples' context features associated with +// dense values. +// feature_list_sparse_keys: A list of Nfeature_list_sparse string Tensors +// (scalars). The keys expected in the FeatureLists associated with sparse +// values. +// feature_list_dense_keys: A list of Nfeature_list_dense string Tensors (scalars). +// The keys expected in the SequenceExamples' feature_lists associated +// with lists of dense values. +func ParseSequenceExample(scope *Scope, serialized tf.Output, debug_name tf.Output, context_dense_defaults []tf.Output, feature_list_dense_missing_assumed_empty []string, context_sparse_keys []string, context_dense_keys []string, feature_list_sparse_keys []string, feature_list_dense_keys []string, optional ...ParseSequenceExampleAttr) (context_sparse_indices []tf.Output, context_sparse_values []tf.Output, context_sparse_shapes []tf.Output, context_dense_values []tf.Output, feature_list_sparse_indices []tf.Output, feature_list_sparse_values []tf.Output, feature_list_sparse_shapes []tf.Output, feature_list_dense_values []tf.Output, feature_list_dense_lengths []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"feature_list_dense_missing_assumed_empty": feature_list_dense_missing_assumed_empty, "context_sparse_keys": context_sparse_keys, "context_dense_keys": context_dense_keys, "feature_list_sparse_keys": feature_list_sparse_keys, "feature_list_dense_keys": feature_list_dense_keys} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ParseSequenceExampleV2", + Type: "ParseSequenceExample", Input: []tf.Input{ - serialized, debug_name, context_sparse_keys, context_dense_keys, context_ragged_keys, feature_list_sparse_keys, feature_list_dense_keys, feature_list_ragged_keys, feature_list_dense_missing_assumed_empty, tf.OutputList(context_dense_defaults), + serialized, debug_name, tf.OutputList(context_dense_defaults), }, Attrs: attrs, } @@ -27659,281 +27122,68 @@ func ParseSequenceExampleV2(scope *Scope, serialized tf.Output, debug_name tf.Ou var idx int var err error if context_sparse_indices, idx, err = makeOutputList(op, idx, "context_sparse_indices"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) + scope.UpdateErr("ParseSequenceExample", err) return } if context_sparse_values, idx, err = makeOutputList(op, idx, "context_sparse_values"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) + scope.UpdateErr("ParseSequenceExample", err) return } if context_sparse_shapes, idx, err = makeOutputList(op, idx, "context_sparse_shapes"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) + scope.UpdateErr("ParseSequenceExample", err) return } if context_dense_values, idx, err = makeOutputList(op, idx, "context_dense_values"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) - return - } - if context_ragged_values, idx, err = makeOutputList(op, idx, "context_ragged_values"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) - return - } - if context_ragged_row_splits, idx, err = makeOutputList(op, idx, "context_ragged_row_splits"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) + scope.UpdateErr("ParseSequenceExample", err) return } if feature_list_sparse_indices, idx, err = makeOutputList(op, idx, "feature_list_sparse_indices"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) + scope.UpdateErr("ParseSequenceExample", err) return } if feature_list_sparse_values, idx, err = makeOutputList(op, idx, "feature_list_sparse_values"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) + scope.UpdateErr("ParseSequenceExample", err) return } if feature_list_sparse_shapes, idx, err = makeOutputList(op, idx, "feature_list_sparse_shapes"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) + scope.UpdateErr("ParseSequenceExample", err) return } if feature_list_dense_values, idx, err = makeOutputList(op, idx, "feature_list_dense_values"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) + scope.UpdateErr("ParseSequenceExample", err) return } if feature_list_dense_lengths, idx, err = makeOutputList(op, idx, "feature_list_dense_lengths"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) + scope.UpdateErr("ParseSequenceExample", err) return } - if feature_list_ragged_values, idx, err = makeOutputList(op, idx, "feature_list_ragged_values"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) - return - } - if feature_list_ragged_outer_splits, idx, err = makeOutputList(op, idx, "feature_list_ragged_outer_splits"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) - return - } - if feature_list_ragged_inner_splits, idx, err = makeOutputList(op, idx, "feature_list_ragged_inner_splits"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) - return - } - return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, context_ragged_values, context_ragged_row_splits, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values, feature_list_dense_lengths, feature_list_ragged_values, feature_list_ragged_outer_splits, feature_list_ragged_inner_splits + return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values, feature_list_dense_lengths } -// Gives a guarantee to the TF runtime that the input tensor is a constant. -// -// The runtime is then free to make optimizations based on this. -// -// Only accepts value typed tensors as inputs and rejects resource variable handles -// as input. -// -// Returns the input tensor without modification. -func GuaranteeConst(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "GuaranteeConst", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} +// UniqueV2Attr is an optional argument to UniqueV2. +type UniqueV2Attr func(optionalAttr) -// Transforms a tf.Example proto (as a string) into typed tensors. -// -// Arguments: -// serialized: A vector containing a batch of binary serialized Example protos. -// dense_defaults: A list of Tensors (some may be empty), whose length matches -// the length of `dense_keys`. dense_defaults[j] provides default values -// when the example's feature_map lacks dense_key[j]. If an empty Tensor is -// provided for dense_defaults[j], then the Feature dense_keys[j] is required. -// The input type is inferred from dense_defaults[j], even when it's empty. -// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined, -// then the shape of dense_defaults[j] must match that of dense_shapes[j]. -// If dense_shapes[j] has an undefined major dimension (variable strides dense -// feature), dense_defaults[j] must contain a single element: -// the padding element. -// num_sparse: The number of sparse features to be parsed from the example. This -// must match the lengths of `sparse_keys` and `sparse_types`. -// sparse_keys: A list of `num_sparse` strings. -// The keys expected in the Examples' features associated with sparse values. -// dense_keys: The keys expected in the Examples' features associated with dense -// values. -// sparse_types: A list of `num_sparse` types; the data types of data in each -// Feature given in sparse_keys. -// Currently the ParseSingleExample op supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// dense_shapes: The shapes of data in each Feature given in dense_keys. -// The length of this list must match the length of `dense_keys`. The -// number of elements in the Feature corresponding to dense_key[j] must -// always equal dense_shapes[j].NumEntries(). If dense_shapes[j] == -// (D0, D1, ..., DN) then the shape of output Tensor dense_values[j] -// will be (D0, D1, ..., DN): In the case dense_shapes[j] = (-1, D1, -// ..., DN), the shape of the output Tensor dense_values[j] will be (M, -// D1, .., DN), where M is the number of blocks of elements of length -// D1 * .... * DN, in the input. -func ParseSingleExample(scope *Scope, serialized tf.Output, dense_defaults []tf.Output, num_sparse int64, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_sparse": num_sparse, "sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes} - opspec := tf.OpSpec{ - Type: "ParseSingleExample", - Input: []tf.Input{ - serialized, tf.OutputList(dense_defaults), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil { - scope.UpdateErr("ParseSingleExample", err) - return - } - if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil { - scope.UpdateErr("ParseSingleExample", err) - return - } - if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil { - scope.UpdateErr("ParseSingleExample", err) - return - } - if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil { - scope.UpdateErr("ParseSingleExample", err) - return - } - return sparse_indices, sparse_values, sparse_shapes, dense_values -} - -// Scatter `updates` into a new tensor according to `indices`. -// -// Creates a new tensor by applying sparse `updates` to individual values or -// slices within a tensor (initially zero for numeric, empty for string) of -// the given `shape` according to indices. This operator is the inverse of the -// `tf.gather_nd` operator which extracts values or slices from a given tensor. -// -// This operation is similar to tensor_scatter_add, except that the tensor is -// zero-initialized. Calling `tf.scatter_nd(indices, values, shape)` is identical -// to `tensor_scatter_add(tf.zeros(shape, values.dtype), indices, values)` -// -// If `indices` contains duplicates, then their updates are accumulated (summed). -// -// **WARNING**: The order in which updates are applied is nondeterministic, so the -// output will be nondeterministic if `indices` contains duplicates -- because -// of some numerical approximation issues, numbers summed in different order -// may yield different results. -// -// `indices` is an integer tensor containing indices into a new tensor of shape -// `shape`. The last dimension of `indices` can be at most the rank of `shape`: -// -// indices.shape[-1] <= shape.rank -// -// The last dimension of `indices` corresponds to indices into elements -// (if `indices.shape[-1] = shape.rank`) or slices -// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of -// `shape`. `updates` is a tensor with shape -// -// indices.shape[:-1] + shape[indices.shape[-1]:] -// -// The simplest form of scatter is to insert individual elements in a tensor by -// index. For example, say we want to insert 4 scattered elements in a rank-1 -// tensor with 8 elements. -// -//
-// -//
-// -// In Python, this scatter operation would look like this: -// -// ```python -// indices = tf.constant([[4], [3], [1], [7]]) -// updates = tf.constant([9, 10, 11, 12]) -// shape = tf.constant([8]) -// scatter = tf.scatter_nd(indices, updates, shape) -// print(scatter) -// ``` -// -// The resulting tensor would look like this: -// -// [0, 11, 0, 10, 9, 0, 0, 12] -// -// We can also, insert entire slices of a higher rank tensor all at once. For -// example, if we wanted to insert two slices in the first dimension of a -// rank-3 tensor with two matrices of new values. -// -//
-// -//
-// -// In Python, this scatter operation would look like this: -// -// ```python -// indices = tf.constant([[0], [2]]) -// updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6], -// [7, 7, 7, 7], [8, 8, 8, 8]], -// [[5, 5, 5, 5], [6, 6, 6, 6], -// [7, 7, 7, 7], [8, 8, 8, 8]]]) -// shape = tf.constant([4, 4, 4]) -// scatter = tf.scatter_nd(indices, updates, shape) -// print(scatter) -// ``` -// -// The resulting tensor would look like this: -// -// [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], -// [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], -// [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], -// [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]] -// -// Note that on CPU, if an out of bound index is found, an error is returned. -// On GPU, if an out of bound index is found, the index is ignored. -// -// Arguments: -// indices: Index tensor. -// updates: Updates to scatter into output. -// shape: 1-D. The shape of the resulting tensor. -// -// Returns A new tensor with the given shape and updates applied according -// to the indices. -func ScatterNd(scope *Scope, indices tf.Output, updates tf.Output, shape tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ScatterNd", - Input: []tf.Input{ - indices, updates, shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// UniqueAttr is an optional argument to Unique. -type UniqueAttr func(optionalAttr) - -// UniqueOutIdx sets the optional out_idx attribute to value. +// UniqueV2OutIdx sets the optional out_idx attribute to value. // If not specified, defaults to DT_INT32 -func UniqueOutIdx(value tf.DataType) UniqueAttr { +func UniqueV2OutIdx(value tf.DataType) UniqueV2Attr { return func(m optionalAttr) { m["out_idx"] = value } } -// Finds unique elements in a 1-D tensor. +// Finds unique elements along an axis of a tensor. // -// This operation returns a tensor `y` containing all of the unique elements of `x` -// sorted in the same order that they occur in `x`; `x` does not need to be sorted. -// This operation also returns a tensor `idx` the same size as `x` that contains -// the index of each value of `x` in the unique output `y`. In other words: +// This operation either returns a tensor `y` containing unique elements +// along the `axis` of a tensor. The returned unique elements is sorted +// in the same order as they occur along `axis` in `x`. +// This operation also returns a tensor `idx` that is the same size as +// the number of the elements in `x` along the `axis` dimension. It +// contains the index in the unique output `y`. +// In other words, for an `1-D` tensor `x` with `axis = None: // // `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` // -// Examples: +// For example: // // ``` // # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] @@ -27942,20 +27192,41 @@ func UniqueOutIdx(value tf.DataType) UniqueAttr { // idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] // ``` // +// For an `2-D` tensor `x` with `axis = 0`: +// // ``` -// # tensor 'x' is [4, 5, 1, 2, 3, 3, 4, 5] -// y, idx = unique(x) -// y ==> [4, 5, 1, 2, 3] -// idx ==> [0, 1, 2, 3, 4, 4, 0, 1] +// # tensor 'x' is [[1, 0, 0], +// # [1, 0, 0], +// # [2, 0, 0]] +// y, idx = unique(x, axis=0) +// y ==> [[1, 0, 0], +// [2, 0, 0]] +// idx ==> [0, 0, 1] +// ``` +// +// For an `2-D` tensor `x` with `axis = 1`: +// +// ``` +// # tensor 'x' is [[1, 0, 0], +// # [1, 0, 0], +// # [2, 0, 0]] +// y, idx = unique(x, axis=1) +// y ==> [[1, 0], +// [1, 0], +// [2, 0]] +// idx ==> [0, 1, 1] // ``` // // Arguments: -// x: 1-D. +// x: A `Tensor`. +// axis: A `Tensor` of type `int32` (default: None). The axis of the Tensor to +// find the unique elements. // // Returns: -// y: 1-D. -// idx: 1-D. -func Unique(scope *Scope, x tf.Output, optional ...UniqueAttr) (y tf.Output, idx tf.Output) { +// y: A `Tensor`. Unique elements along the `axis` of `Tensor` x. +// idx: A 1-D Tensor. Has the same type as x that contains the index of each +// value of x in the output y. +func UniqueV2(scope *Scope, x tf.Output, axis tf.Output, optional ...UniqueV2Attr) (y tf.Output, idx tf.Output) { if scope.Err() != nil { return } @@ -27964,9 +27235,9 @@ func Unique(scope *Scope, x tf.Output, optional ...UniqueAttr) (y tf.Output, idx a(attrs) } opspec := tf.OpSpec{ - Type: "Unique", + Type: "UniqueV2", Input: []tf.Input{ - x, + x, axis, }, Attrs: attrs, } @@ -27974,133 +27245,90 @@ func Unique(scope *Scope, x tf.Output, optional ...UniqueAttr) (y tf.Output, idx return op.Output(0), op.Output(1) } -// Converts a `RaggedTensor` into a `SparseTensor` with the same values. +// RetrieveTPUEmbeddingADAMParametersAttr is an optional argument to RetrieveTPUEmbeddingADAMParameters. +type RetrieveTPUEmbeddingADAMParametersAttr func(optionalAttr) + +// RetrieveTPUEmbeddingADAMParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 +func RetrieveTPUEmbeddingADAMParametersTableId(value int64) RetrieveTPUEmbeddingADAMParametersAttr { + return func(m optionalAttr) { + m["table_id"] = value + } +} + +// RetrieveTPUEmbeddingADAMParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingADAMParametersTableName(value string) RetrieveTPUEmbeddingADAMParametersAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// RetrieveTPUEmbeddingADAMParametersConfig sets the optional config attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingADAMParametersConfig(value string) RetrieveTPUEmbeddingADAMParametersAttr { + return func(m optionalAttr) { + m["config"] = value + } +} + +// Retrieve ADAM embedding parameters. // -// input=ragged.from_nested_row_splits(rt_dense_values, rt_nested_splits) -// output=SparseTensor(indices=sparse_indices, values=sparse_values, -// dense_shape=sparse_dense_shape) -// -// Arguments: -// rt_nested_splits: The `row_splits` for the `RaggedTensor`. -// rt_dense_values: The `flat_values` for the `RaggedTensor`. +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. // // Returns: -// sparse_indices: The indices for the `SparseTensor`. -// sparse_values: The values of the `SparseTensor`. -// sparse_dense_shape: `sparse_dense_shape` is a tight bounding box of the input `RaggedTensor`. -func RaggedTensorToSparse(scope *Scope, rt_nested_splits []tf.Output, rt_dense_values tf.Output) (sparse_indices tf.Output, sparse_values tf.Output, sparse_dense_shape tf.Output) { +// parameters: Parameter parameters updated by the ADAM optimization algorithm. +// momenta: Parameter momenta updated by the ADAM optimization algorithm. +// velocities: Parameter velocities updated by the ADAM optimization algorithm. +func RetrieveTPUEmbeddingADAMParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingADAMParametersAttr) (parameters tf.Output, momenta tf.Output, velocities tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "RaggedTensorToSparse", - Input: []tf.Input{ - tf.OutputList(rt_nested_splits), rt_dense_values, - }, + Type: "RetrieveTPUEmbeddingADAMParameters", + + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0), op.Output(1), op.Output(2) } -// Returns the name of the device on which `resource` has been placed. -func ExperimentalIteratorGetDevice(scope *Scope, resource tf.Output) (device tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ExperimentalIteratorGetDevice", - Input: []tf.Input{ - resource, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} +// StatelessRandomBinomialAttr is an optional argument to StatelessRandomBinomial. +type StatelessRandomBinomialAttr func(optionalAttr) -// Records the bytes size of each element of `input_dataset` in a StatsAggregator. -func ExperimentalBytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ExperimentalBytesProducedStatsDataset", - Input: []tf.Input{ - input_dataset, tag, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes exponential linear: `exp(features) - 1` if < 0, `features` otherwise. +// StatelessRandomBinomialDtype sets the optional dtype attribute to value. // -// See [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs) -// ](http://arxiv.org/abs/1511.07289) -func Elu(scope *Scope, features tf.Output) (activations tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Elu", - Input: []tf.Input{ - features, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// AddSparseToTensorsMapAttr is an optional argument to AddSparseToTensorsMap. -type AddSparseToTensorsMapAttr func(optionalAttr) - -// AddSparseToTensorsMapContainer sets the optional container attribute to value. -// -// value: The container name for the `SparseTensorsMap` created by this op. -// If not specified, defaults to "" -func AddSparseToTensorsMapContainer(value string) AddSparseToTensorsMapAttr { +// value: The type of the output. +// If not specified, defaults to DT_INT64 +func StatelessRandomBinomialDtype(value tf.DataType) StatelessRandomBinomialAttr { return func(m optionalAttr) { - m["container"] = value + m["dtype"] = value } } -// AddSparseToTensorsMapSharedName sets the optional shared_name attribute to value. +// Outputs deterministic pseudorandom random numbers from a binomial distribution. // -// value: The shared name for the `SparseTensorsMap` created by this op. -// If blank, the new Operation's unique name is used. -// If not specified, defaults to "" -func AddSparseToTensorsMapSharedName(value string) AddSparseToTensorsMapAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Add a `SparseTensor` to a `SparseTensorsMap` return its handle. +// Outputs random values from a binomial distribution. // -// A `SparseTensor` is represented by three tensors: `sparse_indices`, -// `sparse_values`, and `sparse_shape`. -// -// This operator takes the given `SparseTensor` and adds it to a container -// object (a `SparseTensorsMap`). A unique key within this container is generated -// in the form of an `int64`, and this is the value that is returned. -// -// The `SparseTensor` can then be read out as part of a minibatch by passing -// the key as a vector element to `TakeManySparseFromTensorsMap`. To ensure -// the correct `SparseTensorsMap` is accessed, ensure that the same -// `container` and `shared_name` are passed to that Op. If no `shared_name` -// is provided here, instead use the *name* of the Operation created by calling -// `AddSparseToTensorsMap` as the `shared_name` passed to -// `TakeManySparseFromTensorsMap`. Ensure the Operations are colocated. +// The outputs are a deterministic function of `shape`, `seed`, `counts`, and `probs`. // // Arguments: -// sparse_indices: 2-D. The `indices` of the `SparseTensor`. -// sparse_values: 1-D. The `values` of the `SparseTensor`. -// sparse_shape: 1-D. The `shape` of the `SparseTensor`. +// shape: The shape of the output tensor. +// seed: 2 seeds (shape [2]). +// counts: The counts of the binomial distribution. Must be broadcastable with `probs`, +// and broadcastable with the rightmost dimensions of `shape`. +// probs: The probability of success for the binomial distribution. Must be broadcastable +// with `counts` and broadcastable with the rightmost dimensions of `shape`. // -// Returns 0-D. The handle of the `SparseTensor` now stored in the -// `SparseTensorsMap`. -func AddSparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddSparseToTensorsMapAttr) (sparse_handle tf.Output) { +// Returns Random values with specified shape. +func StatelessRandomBinomial(scope *Scope, shape tf.Output, seed tf.Output, counts tf.Output, probs tf.Output, optional ...StatelessRandomBinomialAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -28109,9 +27337,9 @@ func AddSparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values a(attrs) } opspec := tf.OpSpec{ - Type: "AddSparseToTensorsMap", + Type: "StatelessRandomBinomial", Input: []tf.Input{ - sparse_indices, sparse_values, sparse_shape, + shape, seed, counts, probs, }, Attrs: attrs, } @@ -28119,314 +27347,15 @@ func AddSparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values return op.Output(0) } -// Transforms a vector of tf.Example protos (as strings) into typed tensors. -// -// Arguments: -// serialized: A scalar or vector containing binary serialized Example protos. -// names: A tensor containing the names of the serialized protos. -// Corresponds 1:1 with the `serialized` tensor. -// May contain, for example, table key (descriptive) names for the -// corresponding serialized protos. These are purely useful for debugging -// purposes, and the presence of values here has no effect on the output. -// May also be an empty vector if no names are available. -// If non-empty, this tensor must have the same shape as "serialized". -// sparse_keys: Vector of strings. -// The keys expected in the Examples' features associated with sparse values. -// dense_keys: Vector of strings. -// The keys expected in the Examples' features associated with dense values. -// ragged_keys: Vector of strings. -// The keys expected in the Examples' features associated with ragged values. -// dense_defaults: A list of Tensors (some may be empty). Corresponds 1:1 with `dense_keys`. -// dense_defaults[j] provides default values -// when the example's feature_map lacks dense_key[j]. If an empty Tensor is -// provided for dense_defaults[j], then the Feature dense_keys[j] is required. -// The input type is inferred from dense_defaults[j], even when it's empty. -// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined, -// then the shape of dense_defaults[j] must match that of dense_shapes[j]. -// If dense_shapes[j] has an undefined major dimension (variable strides dense -// feature), dense_defaults[j] must contain a single element: -// the padding element. -// num_sparse: The number of sparse keys. -// sparse_types: A list of `num_sparse` types; the data types of data in each Feature -// given in sparse_keys. -// Currently the ParseExample supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// ragged_value_types: A list of `num_ragged` types; the data types of data in each Feature -// given in ragged_keys (where `num_ragged = sparse_keys.size()`). -// Currently the ParseExample supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// ragged_split_types: A list of `num_ragged` types; the data types of row_splits in each Feature -// given in ragged_keys (where `num_ragged = sparse_keys.size()`). -// May be DT_INT32 or DT_INT64. -// dense_shapes: A list of `num_dense` shapes; the shapes of data in each Feature -// given in dense_keys (where `num_dense = dense_keys.size()`). -// The number of elements in the Feature corresponding to dense_key[j] -// must always equal dense_shapes[j].NumEntries(). -// If dense_shapes[j] == (D0, D1, ..., DN) then the shape of output -// Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN): -// The dense outputs are just the inputs row-stacked by batch. -// This works for dense_shapes[j] = (-1, D1, ..., DN). In this case -// the shape of the output Tensor dense_values[j] will be -// (|serialized|, M, D1, .., DN), where M is the maximum number of blocks -// of elements of length D1 * .... * DN, across all minibatch entries -// in the input. Any minibatch entry with less than M blocks of elements of -// length D1 * ... * DN will be padded with the corresponding default_value -// scalar element along the second dimension. -func ParseExampleV2(scope *Scope, serialized tf.Output, names tf.Output, sparse_keys tf.Output, dense_keys tf.Output, ragged_keys tf.Output, dense_defaults []tf.Output, num_sparse int64, sparse_types []tf.DataType, ragged_value_types []tf.DataType, ragged_split_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output, ragged_values []tf.Output, ragged_row_splits []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_sparse": num_sparse, "sparse_types": sparse_types, "ragged_value_types": ragged_value_types, "ragged_split_types": ragged_split_types, "dense_shapes": dense_shapes} - opspec := tf.OpSpec{ - Type: "ParseExampleV2", - Input: []tf.Input{ - serialized, names, sparse_keys, dense_keys, ragged_keys, tf.OutputList(dense_defaults), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil { - scope.UpdateErr("ParseExampleV2", err) - return - } - if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil { - scope.UpdateErr("ParseExampleV2", err) - return - } - if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil { - scope.UpdateErr("ParseExampleV2", err) - return - } - if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil { - scope.UpdateErr("ParseExampleV2", err) - return - } - if ragged_values, idx, err = makeOutputList(op, idx, "ragged_values"); err != nil { - scope.UpdateErr("ParseExampleV2", err) - return - } - if ragged_row_splits, idx, err = makeOutputList(op, idx, "ragged_row_splits"); err != nil { - scope.UpdateErr("ParseExampleV2", err) - return - } - return sparse_indices, sparse_values, sparse_shapes, dense_values, ragged_values, ragged_row_splits -} +// DecodePaddedRawAttr is an optional argument to DecodePaddedRaw. +type DecodePaddedRawAttr func(optionalAttr) -// Saves input tensors slices to disk. +// DecodePaddedRawLittleEndian sets the optional little_endian attribute to value. // -// This is like `Save` except that tensors can be listed in the saved file as being -// a slice of a larger tensor. `shapes_and_slices` specifies the shape of the -// larger tensor and the slice that this tensor covers. `shapes_and_slices` must -// have as many elements as `tensor_names`. -// -// Elements of the `shapes_and_slices` input must either be: -// -// * The empty string, in which case the corresponding tensor is -// saved normally. -// * A string of the form `dim0 dim1 ... dimN-1 slice-spec` where the -// `dimI` are the dimensions of the larger tensor and `slice-spec` -// specifies what part is covered by the tensor to save. -// -// `slice-spec` itself is a `:`-separated list: `slice0:slice1:...:sliceN-1` -// where each `sliceI` is either: -// -// * The string `-` meaning that the slice covers all indices of this dimension -// * `start,length` where `start` and `length` are integers. In that -// case the slice covers `length` indices starting at `start`. -// -// See also `Save`. -// -// Arguments: -// filename: Must have a single element. The name of the file to which we write the -// tensor. -// tensor_names: Shape `[N]`. The names of the tensors to be saved. -// shapes_and_slices: Shape `[N]`. The shapes and slice specifications to use when -// saving the tensors. -// data: `N` tensors to save. -// -// Returns the created operation. -func SaveSlices(scope *Scope, filename tf.Output, tensor_names tf.Output, shapes_and_slices tf.Output, data []tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SaveSlices", - Input: []tf.Input{ - filename, tensor_names, shapes_and_slices, tf.OutputList(data), - }, - } - return scope.AddOperation(opspec) -} - -// FusedBatchNormGradV3Attr is an optional argument to FusedBatchNormGradV3. -type FusedBatchNormGradV3Attr func(optionalAttr) - -// FusedBatchNormGradV3Epsilon sets the optional epsilon attribute to value. -// -// value: A small float number added to the variance of x. -// If not specified, defaults to 0.0001 -func FusedBatchNormGradV3Epsilon(value float32) FusedBatchNormGradV3Attr { - return func(m optionalAttr) { - m["epsilon"] = value - } -} - -// FusedBatchNormGradV3DataFormat sets the optional data_format attribute to value. -// -// value: The data format for y_backprop, x, x_backprop. -// Either "NHWC" (default) or "NCHW". -// If not specified, defaults to "NHWC" -func FusedBatchNormGradV3DataFormat(value string) FusedBatchNormGradV3Attr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// FusedBatchNormGradV3IsTraining sets the optional is_training attribute to value. -// -// value: A bool value to indicate the operation is for training (default) -// or inference. +// value: Whether the input `input_bytes` is in little-endian order. Ignored for +// `out_type` values that are stored in a single byte, like `uint8` // If not specified, defaults to true -func FusedBatchNormGradV3IsTraining(value bool) FusedBatchNormGradV3Attr { - return func(m optionalAttr) { - m["is_training"] = value - } -} - -// Gradient for batch normalization. -// -// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". -// The size of 1D Tensors matches the dimension C of the 4D Tensors. -// -// Arguments: -// y_backprop: A 4D Tensor for the gradient with respect to y. -// x: A 4D Tensor for input data. -// scale: A 1D Tensor for scaling factor, to scale the normalized x. -// reserve_space_1: When is_training is True, a 1D Tensor for the computed batch -// mean to be reused in gradient computation. When is_training is -// False, a 1D Tensor for the population mean to be reused in both -// 1st and 2nd order gradient computation. -// reserve_space_2: When is_training is True, a 1D Tensor for the computed batch -// variance (inverted variance in the cuDNN case) to be reused in -// gradient computation. When is_training is False, a 1D Tensor -// for the population variance to be reused in both 1st and 2nd -// order gradient computation. -// reserve_space_3: When is_training is True, a 1D Tensor for some intermediate results to be reused -// in gradient computation. When is_training is False, a dummy empty Tensor will be -// created. -// -// Returns: -// x_backprop: A 4D Tensor for the gradient with respect to x. -// scale_backprop: A 1D Tensor for the gradient with respect to scale. -// offset_backprop: A 1D Tensor for the gradient with respect to offset. -// reserve_space_4: Unused placeholder to match the mean input in FusedBatchNorm. -// reserve_space_5: Unused placeholder to match the variance input -// in FusedBatchNorm. -func FusedBatchNormGradV3(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, reserve_space_3 tf.Output, optional ...FusedBatchNormGradV3Attr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_4 tf.Output, reserve_space_5 tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FusedBatchNormGradV3", - Input: []tf.Input{ - y_backprop, x, scale, reserve_space_1, reserve_space_2, reserve_space_3, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) -} - -// AvgPool3DAttr is an optional argument to AvgPool3D. -type AvgPool3DAttr func(optionalAttr) - -// AvgPool3DDataFormat sets the optional data_format attribute to value. -// -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func AvgPool3DDataFormat(value string) AvgPool3DAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Performs 3D average pooling on the input. -// -// Each entry in `output` is the mean of the corresponding size `ksize` window in -// `value`. -// -// Arguments: -// input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over. -// ksize: 1-D tensor of length 5. The size of the window for each dimension of -// the input tensor. Must have `ksize[0] = ksize[4] = 1`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -// -// Returns The average pooled output tensor. -func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AvgPool3D", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the number of records this Reader has produced. -// -// This is the same as the number of ReaderRead executions that have -// succeeded. -// -// Arguments: -// reader_handle: Handle to a Reader. -func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_produced tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReaderNumRecordsProducedV2", - Input: []tf.Input{ - reader_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DecodeRawAttr is an optional argument to DecodeRaw. -type DecodeRawAttr func(optionalAttr) - -// DecodeRawLittleEndian sets the optional little_endian attribute to value. -// -// value: Whether the input `bytes` are in little-endian order. -// Ignored for `out_type` values that are stored in a single byte like -// `uint8`. -// If not specified, defaults to true -func DecodeRawLittleEndian(value bool) DecodeRawAttr { +func DecodePaddedRawLittleEndian(value bool) DecodePaddedRawAttr { return func(m optionalAttr) { m["little_endian"] = value } @@ -28435,13 +27364,15 @@ func DecodeRawLittleEndian(value bool) DecodeRawAttr { // Reinterpret the bytes of a string as a vector of numbers. // // Arguments: -// bytes: All the elements must have the same length. +// input_bytes: Tensor of string to be decoded. +// fixed_length: Length in bytes for each element of the decoded output. Must be a multiple +// of the size of the output type. // // -// Returns A Tensor with one more dimension than the input `bytes`. The -// added dimension will have size equal to the length of the elements -// of `bytes` divided by the number of bytes to represent `out_type`. -func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...DecodeRawAttr) (output tf.Output) { +// Returns A Tensor with one more dimension than the input `bytes`. The added dimension +// will have size equal to the length of the elements of `bytes` divided by the +// number of bytes to represent `out_type`. +func DecodePaddedRaw(scope *Scope, input_bytes tf.Output, fixed_length tf.Output, out_type tf.DataType, optional ...DecodePaddedRawAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -28450,9 +27381,9 @@ func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ... a(attrs) } opspec := tf.OpSpec{ - Type: "DecodeRaw", + Type: "DecodePaddedRaw", Input: []tf.Input{ - bytes, + input_bytes, fixed_length, }, Attrs: attrs, } @@ -31547,57 +30478,6 @@ func VariableShape(scope *Scope, input tf.Output, optional ...VariableShapeAttr) return op.Output(0) } -// Computes the minimum along segments of a tensor. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. -// -// This operator is similar to the unsorted segment sum operator found -// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum). -// Instead of computing the sum over segments, it computes the minimum such that: -// -// \\(output_i = \min_{j...} data_[j...]\\) where min is over tuples `j...` such -// that `segment_ids[j...] == i`. -// -// If the minimum is empty for a given segment ID `i`, it outputs the largest -// possible value for the specific numeric type, -// `output[i] = numeric_limits::max()`. -// -// For example: -// -// ``` python -// c = tf.constant([[1,2,3,4], [5,6,7,8], [4,3,2,1]]) -// tf.unsorted_segment_min(c, tf.constant([0, 1, 0]), num_segments=2) -// # ==> [[ 1, 2, 2, 1], -// # [5, 6, 7, 8]] -// ``` -// -// If the given segment ID `i` is negative, then the corresponding value is -// dropped, and will not be included in the result. -// -// Arguments: -// -// segment_ids: A tensor whose shape is a prefix of `data.shape`. -// -// -// Returns Has same shape as data, except for the first `segment_ids.rank` -// dimensions, which are replaced with a single dimension which has size -// `num_segments`. -func UnsortedSegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "UnsortedSegmentMin", - Input: []tf.Input{ - data, segment_ids, num_segments, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // ResourceScatterNdSubAttr is an optional argument to ResourceScatterNdSub. type ResourceScatterNdSubAttr func(optionalAttr) @@ -31675,6 +30555,107 @@ func ResourceScatterNdSub(scope *Scope, ref tf.Output, indices tf.Output, update return scope.AddOperation(opspec) } +// Computes the minimum along segments of a tensor. +// +// Read +// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) +// for an explanation of segments. +// +// This operator is similar to the unsorted segment sum operator found +// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum). +// Instead of computing the sum over segments, it computes the minimum such that: +// +// \\(output_i = \min_{j...} data_[j...]\\) where min is over tuples `j...` such +// that `segment_ids[j...] == i`. +// +// If the minimum is empty for a given segment ID `i`, it outputs the largest +// possible value for the specific numeric type, +// `output[i] = numeric_limits::max()`. +// +// For example: +// +// ``` python +// c = tf.constant([[1,2,3,4], [5,6,7,8], [4,3,2,1]]) +// tf.unsorted_segment_min(c, tf.constant([0, 1, 0]), num_segments=2) +// # ==> [[ 1, 2, 2, 1], +// # [5, 6, 7, 8]] +// ``` +// +// If the given segment ID `i` is negative, then the corresponding value is +// dropped, and will not be included in the result. +// +// Arguments: +// +// segment_ids: A tensor whose shape is a prefix of `data.shape`. +// +// +// Returns Has same shape as data, except for the first `segment_ids.rank` +// dimensions, which are replaced with a single dimension which has size +// `num_segments`. +func UnsortedSegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "UnsortedSegmentMin", + Input: []tf.Input{ + data, segment_ids, num_segments, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// AvgPool3DAttr is an optional argument to AvgPool3D. +type AvgPool3DAttr func(optionalAttr) + +// AvgPool3DDataFormat sets the optional data_format attribute to value. +// +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func AvgPool3DDataFormat(value string) AvgPool3DAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// Performs 3D average pooling on the input. +// +// Each entry in `output` is the mean of the corresponding size `ksize` window in +// `value`. +// +// Arguments: +// input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over. +// ksize: 1-D tensor of length 5. The size of the window for each dimension of +// the input tensor. Must have `ksize[0] = ksize[4] = 1`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. +// +// Returns The average pooled output tensor. +func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "AvgPool3D", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // DataFormatDimMapAttr is an optional argument to DataFormatDimMap. type DataFormatDimMapAttr func(optionalAttr) @@ -31749,144 +30730,26 @@ func AssignVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf. return scope.AddOperation(opspec) } -// UpperBoundAttr is an optional argument to UpperBound. -type UpperBoundAttr func(optionalAttr) - -// UpperBoundOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_INT32 -func UpperBoundOutType(value tf.DataType) UpperBoundAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Applies upper_bound(sorted_search_values, values) along each row. +// Asserts that compilation succeeded. This op produces no output and closes the // -// Each set of rows with the same index in (sorted_inputs, values) is treated -// independently. The resulting row is the equivalent of calling -// `np.searchsorted(sorted_inputs, values, side='right')`. +// device during failure to ensure all pending device interactions fail. // -// The result is not a global index to the entire -// `Tensor`, but rather just the index in the last dimension. -// -// A 2-D example: -// sorted_sequence = [[0, 3, 9, 9, 10], -// [1, 2, 3, 4, 5]] -// values = [[2, 4, 9], -// [0, 2, 6]] -// -// result = UpperBound(sorted_sequence, values) -// -// result == [[1, 2, 4], -// [0, 2, 5]] -// -// Arguments: -// sorted_inputs: 2-D Tensor where each row is ordered. -// values: 2-D Tensor with the same numbers of rows as `sorted_search_values`. Contains -// the values that will be searched for in `sorted_search_values`. -// -// Returns A `Tensor` with the same shape as `values`. It contains the last scalar index -// into the last dimension where values can be inserted without changing the -// ordered property. -func UpperBound(scope *Scope, sorted_inputs tf.Output, values tf.Output, optional ...UpperBoundAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "UpperBound", - Input: []tf.Input{ - sorted_inputs, values, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2. -type ResourceApplyFtrlV2Attr func(optionalAttr) - -// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// ResourceApplyFtrlV2MultiplyLinearByLr sets the optional multiply_linear_by_lr attribute to value. -// If not specified, defaults to false -func ResourceApplyFtrlV2MultiplyLinearByLr(value bool) ResourceApplyFtrlV2Attr { - return func(m optionalAttr) { - m["multiply_linear_by_lr"] = value - } -} - -// Update '*var' according to the Ftrl-proximal scheme. -// -// grad_with_shrinkage = grad + 2 * l2_shrinkage * var -// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage -// linear += grad_with_shrinkage + -// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var -// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 -// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 -// accum = accum_new -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// linear: Should be from a Variable(). -// grad: The gradient. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 shrinkage regularization. Must be a scalar. -// -// lr_power: Scaling factor. Must be a scalar. +// 'compilation_status' is a serialized CompilationResultProto. // // Returns the created operation. -func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) { +func TPUCompileSucceededAssert(scope *Scope, compilation_status tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ResourceApplyFtrlV2", + Type: "TPUCompileSucceededAssert", Input: []tf.Input{ - var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power, + compilation_status, }, - Attrs: attrs, } return scope.AddOperation(opspec) } -// Deprecated. Use TensorArraySplitV3 -// -// DEPRECATED at GraphDef version 26: Use TensorArraySplitV3 -func TensorArraySplitV2(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorArraySplitV2", - Input: []tf.Input{ - handle, value, lengths, flow_in, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // ComputeAccidentalHitsAttr is an optional argument to ComputeAccidentalHits. type ComputeAccidentalHitsAttr func(optionalAttr) @@ -32203,61 +31066,6 @@ func BoostedTreesQuantileStreamResourceHandleOp(scope *Scope, optional ...Booste return op.Output(0) } -// ResourceSparseApplyAdagradAttr is an optional argument to ResourceSparseApplyAdagrad. -type ResourceSparseApplyAdagradAttr func(optionalAttr) - -// ResourceSparseApplyAdagradUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyAdagradUseLocking(value bool) ResourceSparseApplyAdagradAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// ResourceSparseApplyAdagradUpdateSlots sets the optional update_slots attribute to value. -// If not specified, defaults to true -func ResourceSparseApplyAdagradUpdateSlots(value bool) ResourceSparseApplyAdagradAttr { - return func(m optionalAttr) { - m["update_slots"] = value - } -} - -// Update relevant entries in '*var' and '*accum' according to the adagrad scheme. -// -// That is for rows we have grad for, we update var and accum as follows: -// accum += grad * grad -// var -= lr * grad * (1 / sqrt(accum)) -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Learning rate. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// -// Returns the created operation. -func ResourceSparseApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdagradAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceSparseApplyAdagrad", - Input: []tf.Input{ - var_, accum, lr, grad, indices, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - // EagerPyFuncAttr is an optional argument to EagerPyFunc. type EagerPyFuncAttr func(optionalAttr) @@ -33072,30 +31880,6 @@ func DenseToDenseSetOperation(scope *Scope, set1 tf.Output, set2 tf.Output, set_ return op.Output(0), op.Output(1), op.Output(2) } -// Returns the set of files matching one or more glob patterns. -// -// Note that this routine only supports wildcard characters in the -// basename portion of the pattern, not in the directory portion. -// Note also that the order of filenames returned is deterministic. -// -// Arguments: -// pattern: Shell wildcard pattern(s). Scalar or vector of type string. -// -// Returns A vector of matching filenames. -func MatchingFiles(scope *Scope, pattern tf.Output) (filenames tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MatchingFiles", - Input: []tf.Input{ - pattern, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Returns element-wise remainder of division. When `x < 0` xor `y < 0` is // // true, this follows Python semantics in that the result here is consistent @@ -33701,6 +32485,230 @@ func CSRSparseMatrixToSparseTensor(scope *Scope, sparse_matrix tf.Output, type_ return op.Output(0), op.Output(1), op.Output(2) } +// OrderedMapStageAttr is an optional argument to OrderedMapStage. +type OrderedMapStageAttr func(optionalAttr) + +// OrderedMapStageCapacity sets the optional capacity attribute to value. +// +// value: Maximum number of elements in the Staging Area. If > 0, inserts +// on the container will block when the capacity is reached. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func OrderedMapStageCapacity(value int64) OrderedMapStageAttr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// OrderedMapStageMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func OrderedMapStageMemoryLimit(value int64) OrderedMapStageAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// OrderedMapStageContainer sets the optional container attribute to value. +// +// value: If non-empty, this queue is placed in the given container. Otherwise, +// a default container is used. +// If not specified, defaults to "" +func OrderedMapStageContainer(value string) OrderedMapStageAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// OrderedMapStageSharedName sets the optional shared_name attribute to value. +// +// value: It is necessary to match this name to the matching Unstage Op. +// If not specified, defaults to "" +func OrderedMapStageSharedName(value string) OrderedMapStageAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Stage (key, values) in the underlying container which behaves like a ordered +// +// associative container. Elements are ordered by key. +// +// Arguments: +// key: int64 +// +// values: a list of tensors +// dtypes A list of data types that inserted values should adhere to. +// +// +// Returns the created operation. +func OrderedMapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...OrderedMapStageAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtypes": dtypes} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "OrderedMapStage", + Input: []tf.Input{ + key, indices, tf.OutputList(values), + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// TPUReplicateMetadataAttr is an optional argument to TPUReplicateMetadata. +type TPUReplicateMetadataAttr func(optionalAttr) + +// TPUReplicateMetadataNumCoresPerReplica sets the optional num_cores_per_replica attribute to value. +// +// value: Number of cores per replica. Used for model parallelism. +// If not specified, defaults to 1 +func TPUReplicateMetadataNumCoresPerReplica(value int64) TPUReplicateMetadataAttr { + return func(m optionalAttr) { + m["num_cores_per_replica"] = value + } +} + +// TPUReplicateMetadataTopology sets the optional topology attribute to value. +// +// value: TopologyProto indicating the topology of the TPU pod slice. +// If not specified, defaults to "" +func TPUReplicateMetadataTopology(value string) TPUReplicateMetadataAttr { + return func(m optionalAttr) { + m["topology"] = value + } +} + +// TPUReplicateMetadataUseTpu sets the optional use_tpu attribute to value. +// +// value: Whether to place the computation on the TPU. +// If not specified, defaults to true +func TPUReplicateMetadataUseTpu(value bool) TPUReplicateMetadataAttr { + return func(m optionalAttr) { + m["use_tpu"] = value + } +} + +// TPUReplicateMetadataDeviceAssignment sets the optional device_assignment attribute to value. +// +// value: The assignment of devices for the computation. +// If not specified, defaults to <> +func TPUReplicateMetadataDeviceAssignment(value []int64) TPUReplicateMetadataAttr { + return func(m optionalAttr) { + m["device_assignment"] = value + } +} + +// TPUReplicateMetadataComputationShape sets the optional computation_shape attribute to value. +// +// value: DEPRECATED. Use num_cores_per_replica instead. +// If not specified, defaults to <> +func TPUReplicateMetadataComputationShape(value []int64) TPUReplicateMetadataAttr { + return func(m optionalAttr) { + m["computation_shape"] = value + } +} + +// TPUReplicateMetadataHostComputeCore sets the optional host_compute_core attribute to value. +// If not specified, defaults to <> +func TPUReplicateMetadataHostComputeCore(value []string) TPUReplicateMetadataAttr { + return func(m optionalAttr) { + m["host_compute_core"] = value + } +} + +// TPUReplicateMetadataPaddingMap sets the optional padding_map attribute to value. +// If not specified, defaults to <> +func TPUReplicateMetadataPaddingMap(value []string) TPUReplicateMetadataAttr { + return func(m optionalAttr) { + m["padding_map"] = value + } +} + +// TPUReplicateMetadataStepMarkerLocation sets the optional step_marker_location attribute to value. +// If not specified, defaults to "STEP_MARK_AT_ENTRY" +func TPUReplicateMetadataStepMarkerLocation(value string) TPUReplicateMetadataAttr { + return func(m optionalAttr) { + m["step_marker_location"] = value + } +} + +// TPUReplicateMetadataAllowSoftPlacement sets the optional allow_soft_placement attribute to value. +// If not specified, defaults to false +func TPUReplicateMetadataAllowSoftPlacement(value bool) TPUReplicateMetadataAttr { + return func(m optionalAttr) { + m["allow_soft_placement"] = value + } +} + +// TPUReplicateMetadataUseSpmdForXlaPartitioning sets the optional use_spmd_for_xla_partitioning attribute to value. +// If not specified, defaults to false +func TPUReplicateMetadataUseSpmdForXlaPartitioning(value bool) TPUReplicateMetadataAttr { + return func(m optionalAttr) { + m["use_spmd_for_xla_partitioning"] = value + } +} + +// Metadata indicating how the TPU computation should be replicated. +// +// This operation holds the metadata common to operations of a `tpu.replicate()` computation subgraph. +// +// Arguments: +// num_replicas: Number of replicas of the computation +// +// Returns the created operation. +func TPUReplicateMetadata(scope *Scope, num_replicas int64, optional ...TPUReplicateMetadataAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_replicas": num_replicas} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "TPUReplicateMetadata", + + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// Returns the TopK unique values in the array in sorted order. The +// +// running time is proportional to the product of K and the input +// size. Sorting the whole array is more efficient for sufficiently large +// values of K. The median-of-medians algorithm is probably faster, but +// difficult to implement efficiently in XLA. If there are fewer than K +// unique numbers (not NANs), the results are padded with negative +// infinity. NaNs are never returned. Subnormal numbers are flushed to +// zero. If an element appears at multiple indices, the highest index is +// returned. If a TopK element never appears in the input due to padding +// values, the indices are padded with negative one. If a padding value +// appears in the input and padding is needed, the highest index of the +// padding value will be returned. The semantics are not the same as +// kth_order_statistic. +func TopKUnique(scope *Scope, input tf.Output, k int64) (topk tf.Output, topk_indices tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"k": k} + opspec := tf.OpSpec{ + Type: "TopKUnique", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + // SizeAttr is an optional argument to Size. type SizeAttr func(optionalAttr) @@ -34395,6 +33403,221 @@ func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAt return op.Output(0) } +// Returns the name of the device on which `resource` has been placed. +func ExperimentalIteratorGetDevice(scope *Scope, resource tf.Output) (device tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ExperimentalIteratorGetDevice", + Input: []tf.Input{ + resource, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Records the bytes size of each element of `input_dataset` in a StatsAggregator. +func ExperimentalBytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "ExperimentalBytesProducedStatsDataset", + Input: []tf.Input{ + input_dataset, tag, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Transforms a vector of tf.Example protos (as strings) into typed tensors. +// +// Arguments: +// serialized: A scalar or vector containing binary serialized Example protos. +// names: A tensor containing the names of the serialized protos. +// Corresponds 1:1 with the `serialized` tensor. +// May contain, for example, table key (descriptive) names for the +// corresponding serialized protos. These are purely useful for debugging +// purposes, and the presence of values here has no effect on the output. +// May also be an empty vector if no names are available. +// If non-empty, this tensor must have the same shape as "serialized". +// sparse_keys: Vector of strings. +// The keys expected in the Examples' features associated with sparse values. +// dense_keys: Vector of strings. +// The keys expected in the Examples' features associated with dense values. +// ragged_keys: Vector of strings. +// The keys expected in the Examples' features associated with ragged values. +// dense_defaults: A list of Tensors (some may be empty). Corresponds 1:1 with `dense_keys`. +// dense_defaults[j] provides default values +// when the example's feature_map lacks dense_key[j]. If an empty Tensor is +// provided for dense_defaults[j], then the Feature dense_keys[j] is required. +// The input type is inferred from dense_defaults[j], even when it's empty. +// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined, +// then the shape of dense_defaults[j] must match that of dense_shapes[j]. +// If dense_shapes[j] has an undefined major dimension (variable strides dense +// feature), dense_defaults[j] must contain a single element: +// the padding element. +// num_sparse: The number of sparse keys. +// sparse_types: A list of `num_sparse` types; the data types of data in each Feature +// given in sparse_keys. +// Currently the ParseExample supports DT_FLOAT (FloatList), +// DT_INT64 (Int64List), and DT_STRING (BytesList). +// ragged_value_types: A list of `num_ragged` types; the data types of data in each Feature +// given in ragged_keys (where `num_ragged = sparse_keys.size()`). +// Currently the ParseExample supports DT_FLOAT (FloatList), +// DT_INT64 (Int64List), and DT_STRING (BytesList). +// ragged_split_types: A list of `num_ragged` types; the data types of row_splits in each Feature +// given in ragged_keys (where `num_ragged = sparse_keys.size()`). +// May be DT_INT32 or DT_INT64. +// dense_shapes: A list of `num_dense` shapes; the shapes of data in each Feature +// given in dense_keys (where `num_dense = dense_keys.size()`). +// The number of elements in the Feature corresponding to dense_key[j] +// must always equal dense_shapes[j].NumEntries(). +// If dense_shapes[j] == (D0, D1, ..., DN) then the shape of output +// Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN): +// The dense outputs are just the inputs row-stacked by batch. +// This works for dense_shapes[j] = (-1, D1, ..., DN). In this case +// the shape of the output Tensor dense_values[j] will be +// (|serialized|, M, D1, .., DN), where M is the maximum number of blocks +// of elements of length D1 * .... * DN, across all minibatch entries +// in the input. Any minibatch entry with less than M blocks of elements of +// length D1 * ... * DN will be padded with the corresponding default_value +// scalar element along the second dimension. +func ParseExampleV2(scope *Scope, serialized tf.Output, names tf.Output, sparse_keys tf.Output, dense_keys tf.Output, ragged_keys tf.Output, dense_defaults []tf.Output, num_sparse int64, sparse_types []tf.DataType, ragged_value_types []tf.DataType, ragged_split_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output, ragged_values []tf.Output, ragged_row_splits []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_sparse": num_sparse, "sparse_types": sparse_types, "ragged_value_types": ragged_value_types, "ragged_split_types": ragged_split_types, "dense_shapes": dense_shapes} + opspec := tf.OpSpec{ + Type: "ParseExampleV2", + Input: []tf.Input{ + serialized, names, sparse_keys, dense_keys, ragged_keys, tf.OutputList(dense_defaults), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil { + scope.UpdateErr("ParseExampleV2", err) + return + } + if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil { + scope.UpdateErr("ParseExampleV2", err) + return + } + if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil { + scope.UpdateErr("ParseExampleV2", err) + return + } + if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil { + scope.UpdateErr("ParseExampleV2", err) + return + } + if ragged_values, idx, err = makeOutputList(op, idx, "ragged_values"); err != nil { + scope.UpdateErr("ParseExampleV2", err) + return + } + if ragged_row_splits, idx, err = makeOutputList(op, idx, "ragged_row_splits"); err != nil { + scope.UpdateErr("ParseExampleV2", err) + return + } + return sparse_indices, sparse_values, sparse_shapes, dense_values, ragged_values, ragged_row_splits +} + +// Computes exponential linear: `exp(features) - 1` if < 0, `features` otherwise. +// +// See [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs) +// ](http://arxiv.org/abs/1511.07289) +func Elu(scope *Scope, features tf.Output) (activations tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Elu", + Input: []tf.Input{ + features, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// AddSparseToTensorsMapAttr is an optional argument to AddSparseToTensorsMap. +type AddSparseToTensorsMapAttr func(optionalAttr) + +// AddSparseToTensorsMapContainer sets the optional container attribute to value. +// +// value: The container name for the `SparseTensorsMap` created by this op. +// If not specified, defaults to "" +func AddSparseToTensorsMapContainer(value string) AddSparseToTensorsMapAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// AddSparseToTensorsMapSharedName sets the optional shared_name attribute to value. +// +// value: The shared name for the `SparseTensorsMap` created by this op. +// If blank, the new Operation's unique name is used. +// If not specified, defaults to "" +func AddSparseToTensorsMapSharedName(value string) AddSparseToTensorsMapAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Add a `SparseTensor` to a `SparseTensorsMap` return its handle. +// +// A `SparseTensor` is represented by three tensors: `sparse_indices`, +// `sparse_values`, and `sparse_shape`. +// +// This operator takes the given `SparseTensor` and adds it to a container +// object (a `SparseTensorsMap`). A unique key within this container is generated +// in the form of an `int64`, and this is the value that is returned. +// +// The `SparseTensor` can then be read out as part of a minibatch by passing +// the key as a vector element to `TakeManySparseFromTensorsMap`. To ensure +// the correct `SparseTensorsMap` is accessed, ensure that the same +// `container` and `shared_name` are passed to that Op. If no `shared_name` +// is provided here, instead use the *name* of the Operation created by calling +// `AddSparseToTensorsMap` as the `shared_name` passed to +// `TakeManySparseFromTensorsMap`. Ensure the Operations are colocated. +// +// Arguments: +// sparse_indices: 2-D. The `indices` of the `SparseTensor`. +// sparse_values: 1-D. The `values` of the `SparseTensor`. +// sparse_shape: 1-D. The `shape` of the `SparseTensor`. +// +// Returns 0-D. The handle of the `SparseTensor` now stored in the +// `SparseTensorsMap`. +func AddSparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddSparseToTensorsMapAttr) (sparse_handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "AddSparseToTensorsMap", + Input: []tf.Input{ + sparse_indices, sparse_values, sparse_shape, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Advance the counter of a counter-based RNG. // // The state of the RNG after @@ -34421,277 +33644,6 @@ func RngSkip(scope *Scope, resource tf.Output, algorithm tf.Output, delta tf.Out return scope.AddOperation(opspec) } -// Generates values in an interval. -// -// A sequence of `num` evenly-spaced values are generated beginning at `start`. -// If `num > 1`, the values in the sequence increase by `stop - start / num - 1`, -// so that the last one is exactly `stop`. -// -// For example: -// -// ``` -// tf.linspace(10.0, 12.0, 3, name="linspace") => [ 10.0 11.0 12.0] -// ``` -// -// Arguments: -// start: 0-D tensor. First entry in the range. -// stop: 0-D tensor. Last entry in the range. -// num: 0-D tensor. Number of values to generate. -// -// Returns 1-D. The generated values. -func LinSpace(scope *Scope, start tf.Output, stop tf.Output, num tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LinSpace", - Input: []tf.Input{ - start, stop, num, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MultinomialAttr is an optional argument to Multinomial. -type MultinomialAttr func(optionalAttr) - -// MultinomialSeed sets the optional seed attribute to value. -// -// value: If either seed or seed2 is set to be non-zero, the internal random number -// generator is seeded by the given seed. Otherwise, a random seed is used. -// If not specified, defaults to 0 -func MultinomialSeed(value int64) MultinomialAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// MultinomialSeed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func MultinomialSeed2(value int64) MultinomialAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// MultinomialOutputDtype sets the optional output_dtype attribute to value. -// If not specified, defaults to DT_INT64 -func MultinomialOutputDtype(value tf.DataType) MultinomialAttr { - return func(m optionalAttr) { - m["output_dtype"] = value - } -} - -// Draws samples from a multinomial distribution. -// -// Arguments: -// logits: 2-D Tensor with shape `[batch_size, num_classes]`. Each slice `[i, :]` -// represents the unnormalized log probabilities for all classes. -// num_samples: 0-D. Number of independent samples to draw for each row slice. -// -// Returns 2-D Tensor with shape `[batch_size, num_samples]`. Each slice `[i, :]` -// contains the drawn class labels with range `[0, num_classes)`. -func Multinomial(scope *Scope, logits tf.Output, num_samples tf.Output, optional ...MultinomialAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Multinomial", - Input: []tf.Input{ - logits, num_samples, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// NonDeterministicIntsAttr is an optional argument to NonDeterministicInts. -type NonDeterministicIntsAttr func(optionalAttr) - -// NonDeterministicIntsDtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_INT64 -func NonDeterministicIntsDtype(value tf.DataType) NonDeterministicIntsAttr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Non-deterministically generates some integers. -// -// This op may use some OS-provided source of non-determinism (e.g. an RNG), so each execution will give different results. -// -// Arguments: -// shape: The shape of the output tensor. -// -// Returns Non-deterministic integer values with specified shape. -func NonDeterministicInts(scope *Scope, shape tf.Output, optional ...NonDeterministicIntsAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "NonDeterministicInts", - Input: []tf.Input{ - shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that caches elements from `input_dataset`. -// -// A CacheDataset will iterate over the input_dataset, and store tensors. If the -// cache already exists, the cache will be used. If the cache is inappropriate -// (e.g. cannot be opened, contains tensors of the wrong shape / size), an error -// will the returned when used. -// -// Arguments: -// -// filename: A path on the filesystem where we should cache the dataset. Note: this -// will be a directory. -// -// -func CacheDataset(scope *Scope, input_dataset tf.Output, filename tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "CacheDataset", - Input: []tf.Input{ - input_dataset, filename, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ThreadPoolHandleAttr is an optional argument to ThreadPoolHandle. -type ThreadPoolHandleAttr func(optionalAttr) - -// ThreadPoolHandleMaxIntraOpParallelism sets the optional max_intra_op_parallelism attribute to value. -// -// value: The maximum degree of parallelism to use within operations that execute on this -// threadpool. -// If not specified, defaults to 1 -func ThreadPoolHandleMaxIntraOpParallelism(value int64) ThreadPoolHandleAttr { - return func(m optionalAttr) { - m["max_intra_op_parallelism"] = value - } -} - -// ThreadPoolHandleContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func ThreadPoolHandleContainer(value string) ThreadPoolHandleAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// ThreadPoolHandleSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func ThreadPoolHandleSharedName(value string) ThreadPoolHandleAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Creates a dataset that uses a custom thread pool to compute `input_dataset`. -// -// Arguments: -// num_threads: The number of threads in the thread pool. -// display_name: A human-readable name for the threads that may be visible in some -// visualizations. -// threadpool. -// -// Returns A resource that can be consumed by one or more ExperimentalThreadPoolDataset -// ops. -func ThreadPoolHandle(scope *Scope, num_threads int64, display_name string, optional ...ThreadPoolHandleAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_threads": num_threads, "display_name": display_name} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ThreadPoolHandle", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SparseReduceMaxSparseAttr is an optional argument to SparseReduceMaxSparse. -type SparseReduceMaxSparseAttr func(optionalAttr) - -// SparseReduceMaxSparseKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func SparseReduceMaxSparseKeepDims(value bool) SparseReduceMaxSparseAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } -} - -// Computes the max of elements across dimensions of a SparseTensor. -// -// This Op takes a SparseTensor and is the sparse counterpart to -// `tf.reduce_max()`. In contrast to SparseReduceMax, this Op returns a -// SparseTensor. -// -// Reduces `sp_input` along the dimensions given in `reduction_axes`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained -// with length 1. -// -// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor -// with a single element is returned. Additionally, the axes can be negative, -// which are interpreted according to the indexing rules in Python. -// -// Arguments: -// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. -// input_shape: 1-D. Shape of the input SparseTensor. -// reduction_axes: 1-D. Length-`K` vector containing the reduction axes. -func SparseReduceMaxSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceMaxSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SparseReduceMaxSparse", - Input: []tf.Input{ - input_indices, input_values, input_shape, reduction_axes, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - // Computes the maximum along segments of a tensor. // // Read @@ -34785,125 +33737,6 @@ func StringUpper(scope *Scope, input tf.Output, optional ...StringUpperAttr) (ou return op.Output(0) } -// Set a summary_writer_interface to record statistics using given stats_aggregator. -// -// Returns the created operation. -func StatsAggregatorSetSummaryWriter(scope *Scope, stats_aggregator tf.Output, summary tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "StatsAggregatorSetSummaryWriter", - Input: []tf.Input{ - stats_aggregator, summary, - }, - } - return scope.AddOperation(opspec) -} - -// FusedBatchNormGradAttr is an optional argument to FusedBatchNormGrad. -type FusedBatchNormGradAttr func(optionalAttr) - -// FusedBatchNormGradEpsilon sets the optional epsilon attribute to value. -// -// value: A small float number added to the variance of x. -// If not specified, defaults to 0.0001 -func FusedBatchNormGradEpsilon(value float32) FusedBatchNormGradAttr { - return func(m optionalAttr) { - m["epsilon"] = value - } -} - -// FusedBatchNormGradDataFormat sets the optional data_format attribute to value. -// -// value: The data format for y_backprop, x, x_backprop. -// Either "NHWC" (default) or "NCHW". -// If not specified, defaults to "NHWC" -func FusedBatchNormGradDataFormat(value string) FusedBatchNormGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// FusedBatchNormGradIsTraining sets the optional is_training attribute to value. -// -// value: A bool value to indicate the operation is for training (default) -// or inference. -// If not specified, defaults to true -func FusedBatchNormGradIsTraining(value bool) FusedBatchNormGradAttr { - return func(m optionalAttr) { - m["is_training"] = value - } -} - -// Gradient for batch normalization. -// -// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". -// The size of 1D Tensors matches the dimension C of the 4D Tensors. -// -// Arguments: -// y_backprop: A 4D Tensor for the gradient with respect to y. -// x: A 4D Tensor for input data. -// scale: A 1D Tensor for scaling factor, to scale the normalized x. -// reserve_space_1: When is_training is True, a 1D Tensor for the computed batch -// mean to be reused in gradient computation. When is_training is -// False, a 1D Tensor for the population mean to be reused in both -// 1st and 2nd order gradient computation. -// reserve_space_2: When is_training is True, a 1D Tensor for the computed batch -// variance (inverted variance in the cuDNN case) to be reused in -// gradient computation. When is_training is False, a 1D Tensor -// for the population variance to be reused in both 1st and 2nd -// order gradient computation. -// -// Returns: -// x_backprop: A 4D Tensor for the gradient with respect to x. -// scale_backprop: A 1D Tensor for the gradient with respect to scale. -// offset_backprop: A 1D Tensor for the gradient with respect to offset. -// reserve_space_3: Unused placeholder to match the mean input in FusedBatchNorm. -// reserve_space_4: Unused placeholder to match the variance input -// in FusedBatchNorm. -func FusedBatchNormGrad(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradAttr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FusedBatchNormGrad", - Input: []tf.Input{ - y_backprop, x, scale, reserve_space_1, reserve_space_2, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) -} - -// Subtracts a value from the current value of a variable. -// -// Any ReadVariableOp with a control dependency on this op is guaranteed to -// see the decremented value or a subsequent newer one. -// -// Arguments: -// resource: handle to the resource in which to store the variable. -// value: the value by which the variable will be incremented. -// -// Returns the created operation. -func AssignSubVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "AssignSubVariableOp", - Input: []tf.Input{ - resource, value, - }, - } - return scope.AddOperation(opspec) -} - // SparseReduceMaxAttr is an optional argument to SparseReduceMax. type SparseReduceMaxAttr func(optionalAttr) @@ -37183,6 +36016,199 @@ func SdcaFprint(scope *Scope, input tf.Output) (output tf.Output) { return op.Output(0) } +// Saves input tensors slices to disk. +// +// This is like `Save` except that tensors can be listed in the saved file as being +// a slice of a larger tensor. `shapes_and_slices` specifies the shape of the +// larger tensor and the slice that this tensor covers. `shapes_and_slices` must +// have as many elements as `tensor_names`. +// +// Elements of the `shapes_and_slices` input must either be: +// +// * The empty string, in which case the corresponding tensor is +// saved normally. +// * A string of the form `dim0 dim1 ... dimN-1 slice-spec` where the +// `dimI` are the dimensions of the larger tensor and `slice-spec` +// specifies what part is covered by the tensor to save. +// +// `slice-spec` itself is a `:`-separated list: `slice0:slice1:...:sliceN-1` +// where each `sliceI` is either: +// +// * The string `-` meaning that the slice covers all indices of this dimension +// * `start,length` where `start` and `length` are integers. In that +// case the slice covers `length` indices starting at `start`. +// +// See also `Save`. +// +// Arguments: +// filename: Must have a single element. The name of the file to which we write the +// tensor. +// tensor_names: Shape `[N]`. The names of the tensors to be saved. +// shapes_and_slices: Shape `[N]`. The shapes and slice specifications to use when +// saving the tensors. +// data: `N` tensors to save. +// +// Returns the created operation. +func SaveSlices(scope *Scope, filename tf.Output, tensor_names tf.Output, shapes_and_slices tf.Output, data []tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SaveSlices", + Input: []tf.Input{ + filename, tensor_names, shapes_and_slices, tf.OutputList(data), + }, + } + return scope.AddOperation(opspec) +} + +// FusedBatchNormGradV3Attr is an optional argument to FusedBatchNormGradV3. +type FusedBatchNormGradV3Attr func(optionalAttr) + +// FusedBatchNormGradV3Epsilon sets the optional epsilon attribute to value. +// +// value: A small float number added to the variance of x. +// If not specified, defaults to 0.0001 +func FusedBatchNormGradV3Epsilon(value float32) FusedBatchNormGradV3Attr { + return func(m optionalAttr) { + m["epsilon"] = value + } +} + +// FusedBatchNormGradV3DataFormat sets the optional data_format attribute to value. +// +// value: The data format for y_backprop, x, x_backprop. +// Either "NHWC" (default) or "NCHW". +// If not specified, defaults to "NHWC" +func FusedBatchNormGradV3DataFormat(value string) FusedBatchNormGradV3Attr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// FusedBatchNormGradV3IsTraining sets the optional is_training attribute to value. +// +// value: A bool value to indicate the operation is for training (default) +// or inference. +// If not specified, defaults to true +func FusedBatchNormGradV3IsTraining(value bool) FusedBatchNormGradV3Attr { + return func(m optionalAttr) { + m["is_training"] = value + } +} + +// Gradient for batch normalization. +// +// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". +// The size of 1D Tensors matches the dimension C of the 4D Tensors. +// +// Arguments: +// y_backprop: A 4D Tensor for the gradient with respect to y. +// x: A 4D Tensor for input data. +// scale: A 1D Tensor for scaling factor, to scale the normalized x. +// reserve_space_1: When is_training is True, a 1D Tensor for the computed batch +// mean to be reused in gradient computation. When is_training is +// False, a 1D Tensor for the population mean to be reused in both +// 1st and 2nd order gradient computation. +// reserve_space_2: When is_training is True, a 1D Tensor for the computed batch +// variance (inverted variance in the cuDNN case) to be reused in +// gradient computation. When is_training is False, a 1D Tensor +// for the population variance to be reused in both 1st and 2nd +// order gradient computation. +// reserve_space_3: When is_training is True, a 1D Tensor for some intermediate results to be reused +// in gradient computation. When is_training is False, a dummy empty Tensor will be +// created. +// +// Returns: +// x_backprop: A 4D Tensor for the gradient with respect to x. +// scale_backprop: A 1D Tensor for the gradient with respect to scale. +// offset_backprop: A 1D Tensor for the gradient with respect to offset. +// reserve_space_4: Unused placeholder to match the mean input in FusedBatchNorm. +// reserve_space_5: Unused placeholder to match the variance input +// in FusedBatchNorm. +func FusedBatchNormGradV3(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, reserve_space_3 tf.Output, optional ...FusedBatchNormGradV3Attr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_4 tf.Output, reserve_space_5 tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "FusedBatchNormGradV3", + Input: []tf.Input{ + y_backprop, x, scale, reserve_space_1, reserve_space_2, reserve_space_3, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) +} + +// Returns the number of records this Reader has produced. +// +// This is the same as the number of ReaderRead executions that have +// succeeded. +// +// Arguments: +// reader_handle: Handle to a Reader. +func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_produced tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ReaderNumRecordsProducedV2", + Input: []tf.Input{ + reader_handle, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// DecodeRawAttr is an optional argument to DecodeRaw. +type DecodeRawAttr func(optionalAttr) + +// DecodeRawLittleEndian sets the optional little_endian attribute to value. +// +// value: Whether the input `bytes` are in little-endian order. +// Ignored for `out_type` values that are stored in a single byte like +// `uint8`. +// If not specified, defaults to true +func DecodeRawLittleEndian(value bool) DecodeRawAttr { + return func(m optionalAttr) { + m["little_endian"] = value + } +} + +// Reinterpret the bytes of a string as a vector of numbers. +// +// Arguments: +// bytes: All the elements must have the same length. +// +// +// Returns A Tensor with one more dimension than the input `bytes`. The +// added dimension will have size equal to the length of the elements +// of `bytes` divided by the number of bytes to represent `out_type`. +func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...DecodeRawAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"out_type": out_type} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "DecodeRaw", + Input: []tf.Input{ + bytes, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Inverse 3D fast Fourier transform. // // Computes the inverse 3-dimensional discrete Fourier transform over the @@ -37280,121 +36306,6 @@ func QueueDequeueUpToV2(scope *Scope, handle tf.Output, n tf.Output, component_t return components } -// Says whether the targets are in the top `K` predictions. -// -// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the -// prediction for the target class is among the top `k` predictions among -// all predictions for example `i`. Note that the behavior of `InTopK` differs -// from the `TopK` op in its handling of ties; if multiple classes have the -// same prediction value and straddle the top-`k` boundary, all of those -// classes are considered to be in the top `k`. -// -// More formally, let -// -// \\(predictions_i\\) be the predictions for all classes for example `i`, -// \\(targets_i\\) be the target class for example `i`, -// \\(out_i\\) be the output for example `i`, -// -// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ -// -// Arguments: -// predictions: A `batch_size` x `classes` tensor. -// targets: A `batch_size` vector of class ids. -// k: Number of top elements to look at for computing precision. -// -// Returns Computed Precision at `k` as a `bool Tensor`. -func InTopK(scope *Scope, predictions tf.Output, targets tf.Output, k int64) (precision tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"k": k} - opspec := tf.OpSpec{ - Type: "InTopK", - Input: []tf.Input{ - predictions, targets, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns x - y element-wise. -// -// *NOTE*: `Subtract` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Sub", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// FusedResizeAndPadConv2DAttr is an optional argument to FusedResizeAndPadConv2D. -type FusedResizeAndPadConv2DAttr func(optionalAttr) - -// FusedResizeAndPadConv2DResizeAlignCorners sets the optional resize_align_corners attribute to value. -// -// value: If true, the centers of the 4 corner pixels of the input and output tensors are -// aligned, preserving the values at the corner pixels. Defaults to false. -// If not specified, defaults to false -func FusedResizeAndPadConv2DResizeAlignCorners(value bool) FusedResizeAndPadConv2DAttr { - return func(m optionalAttr) { - m["resize_align_corners"] = value - } -} - -// Performs a resize and padding as a preprocess during a convolution. -// -// It's often possible to do spatial transformations more efficiently as part of -// the packing stage of a convolution, so this op allows for an optimized -// implementation where these stages are fused together. This prevents the need to -// write out the intermediate results as whole tensors, reducing memory pressure, -// and we can get some latency gains by merging the transformation calculations. -// The data_format attribute for Conv2D isn't supported by this op, and defaults to -// 'NHWC' order. -// Internally this op uses a single per-graph scratch buffer, which means that it -// will block if multiple versions are being run in parallel. This is because this -// operator is primarily an optimization to minimize memory usage. -// -// Arguments: -// input: 4-D with shape `[batch, in_height, in_width, in_channels]`. -// size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. -// paddings: A two-column matrix specifying the padding sizes. The number of -// rows must be the same as the rank of `input`. -// filter: 4-D with shape -// `[filter_height, filter_width, in_channels, out_channels]`. -// -// strides: 1-D of length 4. The stride of the sliding window for each dimension -// of `input`. Must be in the same order as the dimension specified with format. -// padding: The type of padding algorithm to use. -func FusedResizeAndPadConv2D(scope *Scope, input tf.Output, size tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string, optional ...FusedResizeAndPadConv2DAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FusedResizeAndPadConv2D", - Input: []tf.Input{ - input, size, paddings, filter, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Computes the product along segments of a tensor. // // Read @@ -38444,6 +37355,68 @@ func Prelinearize(scope *Scope, input tf.Output, optional ...PrelinearizeAttr) ( return op.Output(0) } +// StatefulUniformFullIntAttr is an optional argument to StatefulUniformFullInt. +type StatefulUniformFullIntAttr func(optionalAttr) + +// StatefulUniformFullIntDtype sets the optional dtype attribute to value. +// +// value: The type of the output. +// If not specified, defaults to DT_UINT64 +func StatefulUniformFullIntDtype(value tf.DataType) StatefulUniformFullIntAttr { + return func(m optionalAttr) { + m["dtype"] = value + } +} + +// Outputs random integers from a uniform distribution. +// +// The generated values are uniform integers covering the whole range of `dtype`. +// +// Arguments: +// resource: The handle of the resource variable that stores the state of the RNG. +// algorithm: The RNG algorithm. +// shape: The shape of the output tensor. +// +// Returns Random values with specified shape. +func StatefulUniformFullInt(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, optional ...StatefulUniformFullIntAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "StatefulUniformFullInt", + Input: []tf.Input{ + resource, algorithm, shape, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Transforms a Tensor into a serialized TensorProto proto. +// +// Arguments: +// tensor: A Tensor of type `T`. +// +// Returns A serialized TensorProto proto of the input tensor. +func SerializeTensor(scope *Scope, tensor tf.Output) (serialized tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SerializeTensor", + Input: []tf.Input{ + tensor, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Computes the sparse Cholesky decomposition of `input`. // // Computes the Sparse Cholesky decomposition of a sparse matrix, with the given @@ -38590,6 +37563,88 @@ func StatefulTruncatedNormal(scope *Scope, resource tf.Output, algorithm tf.Outp return op.Output(0) } +// PaddingFIFOQueueV2Attr is an optional argument to PaddingFIFOQueueV2. +type PaddingFIFOQueueV2Attr func(optionalAttr) + +// PaddingFIFOQueueV2Shapes sets the optional shapes attribute to value. +// +// value: The shape of each component in a value. The length of this attr must +// be either 0 or the same as the length of component_types. +// Shapes of fixed rank but variable size are allowed by setting +// any shape dimension to -1. In this case, the inputs' shape may vary along +// the given dimension, and DequeueMany will pad the given dimension with +// zeros up to the maximum shape of all elements in the given batch. +// If the length of this attr is 0, different queue elements may have +// different ranks and shapes, but only one element may be dequeued at a time. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func PaddingFIFOQueueV2Shapes(value []tf.Shape) PaddingFIFOQueueV2Attr { + return func(m optionalAttr) { + m["shapes"] = value + } +} + +// PaddingFIFOQueueV2Capacity sets the optional capacity attribute to value. +// +// value: The upper bound on the number of elements in this queue. +// Negative numbers mean no limit. +// If not specified, defaults to -1 +func PaddingFIFOQueueV2Capacity(value int64) PaddingFIFOQueueV2Attr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// PaddingFIFOQueueV2Container sets the optional container attribute to value. +// +// value: If non-empty, this queue is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func PaddingFIFOQueueV2Container(value string) PaddingFIFOQueueV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// PaddingFIFOQueueV2SharedName sets the optional shared_name attribute to value. +// +// value: If non-empty, this queue will be shared under the given name +// across multiple sessions. +// If not specified, defaults to "" +func PaddingFIFOQueueV2SharedName(value string) PaddingFIFOQueueV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// A queue that produces elements in first-in first-out order. +// +// Variable-size shapes are allowed by setting the corresponding shape dimensions +// to 0 in the shape attr. In this case DequeueMany will pad up to the maximum +// size of any given element in the minibatch. See below for details. +// +// Arguments: +// component_types: The type of each component in a value. +// +// Returns The handle to the queue. +func PaddingFIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...PaddingFIFOQueueV2Attr) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"component_types": component_types} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "PaddingFIFOQueueV2", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Returns true if queue is closed. // // This operation returns true if the queue is closed and false if the queue @@ -38633,166 +37688,137 @@ func IsBoostedTreesQuantileStreamResourceInitialized(scope *Scope, quantile_stre return op.Output(0) } -// ParseSequenceExampleAttr is an optional argument to ParseSequenceExample. -type ParseSequenceExampleAttr func(optionalAttr) - -// ParseSequenceExampleNcontextSparse sets the optional Ncontext_sparse attribute to value. -// If not specified, defaults to 0 +// Applies softmax to a batched N-D `SparseTensor`. // -// REQUIRES: value >= 0 -func ParseSequenceExampleNcontextSparse(value int64) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["Ncontext_sparse"] = value - } -} - -// ParseSequenceExampleNcontextDense sets the optional Ncontext_dense attribute to value. -// If not specified, defaults to 0 +// The inputs represent an N-D SparseTensor with logical shape `[..., B, C]` +// (where `N >= 2`), and with indices sorted in the canonical lexicographic order. // -// REQUIRES: value >= 0 -func ParseSequenceExampleNcontextDense(value int64) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["Ncontext_dense"] = value - } -} - -// ParseSequenceExampleNfeatureListSparse sets the optional Nfeature_list_sparse attribute to value. -// If not specified, defaults to 0 +// This op is equivalent to applying the normal `tf.nn.softmax()` to each innermost +// logical submatrix with shape `[B, C]`, but with the catch that *the implicitly +// zero elements do not participate*. Specifically, the algorithm is equivalent +// to the following: // -// REQUIRES: value >= 0 -func ParseSequenceExampleNfeatureListSparse(value int64) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["Nfeature_list_sparse"] = value - } -} - -// ParseSequenceExampleNfeatureListDense sets the optional Nfeature_list_dense attribute to value. -// If not specified, defaults to 0 +// (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix +// with shape `[B, C]`, along the size-C dimension; +// (2) Masks out the original implicitly-zero locations; +// (3) Renormalizes the remaining elements. // -// REQUIRES: value >= 0 -func ParseSequenceExampleNfeatureListDense(value int64) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["Nfeature_list_dense"] = value - } -} - -// ParseSequenceExampleContextSparseTypes sets the optional context_sparse_types attribute to value. -// -// value: A list of Ncontext_sparse types; the data types of data in -// each context Feature given in context_sparse_keys. -// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleContextSparseTypes(value []tf.DataType) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["context_sparse_types"] = value - } -} - -// ParseSequenceExampleFeatureListDenseTypes sets the optional feature_list_dense_types attribute to value. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["feature_list_dense_types"] = value - } -} - -// ParseSequenceExampleContextDenseShapes sets the optional context_dense_shapes attribute to value. -// -// value: A list of Ncontext_dense shapes; the shapes of data in -// each context Feature given in context_dense_keys. -// The number of elements in the Feature corresponding to context_dense_key[j] -// must always equal context_dense_shapes[j].NumEntries(). -// The shape of context_dense_values[j] will match context_dense_shapes[j]. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleContextDenseShapes(value []tf.Shape) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["context_dense_shapes"] = value - } -} - -// ParseSequenceExampleFeatureListSparseTypes sets the optional feature_list_sparse_types attribute to value. -// -// value: A list of Nfeature_list_sparse types; the data types -// of data in each FeatureList given in feature_list_sparse_keys. -// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleFeatureListSparseTypes(value []tf.DataType) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["feature_list_sparse_types"] = value - } -} - -// ParseSequenceExampleFeatureListDenseShapes sets the optional feature_list_dense_shapes attribute to value. -// -// value: A list of Nfeature_list_dense shapes; the shapes of -// data in each FeatureList given in feature_list_dense_keys. -// The shape of each Feature in the FeatureList corresponding to -// feature_list_dense_key[j] must always equal -// feature_list_dense_shapes[j].NumEntries(). -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleFeatureListDenseShapes(value []tf.Shape) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["feature_list_dense_shapes"] = value - } -} - -// Transforms a vector of brain.SequenceExample protos (as strings) into typed tensors. +// Hence, the `SparseTensor` result has exactly the same non-zero indices and +// shape. // // Arguments: -// serialized: A vector containing binary serialized SequenceExample protos. -// debug_name: A vector containing the names of the serialized protos. -// May contain, for example, table key (descriptive) name for the -// corresponding serialized proto. This is purely useful for debugging -// purposes, and the presence of values here has no effect on the output. -// May also be an empty vector if no name is available. -// context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty). -// context_dense_defaults[j] provides default values -// when the SequenceExample's context map lacks context_dense_key[j]. -// If an empty Tensor is provided for context_dense_defaults[j], -// then the Feature context_dense_keys[j] is required. -// The input type is inferred from context_dense_defaults[j], even when it's -// empty. If context_dense_defaults[j] is not empty, its shape must match -// context_dense_shapes[j]. -// feature_list_dense_missing_assumed_empty: A vector listing the -// FeatureList keys which may be missing from the SequenceExamples. If the -// associated FeatureList is missing, it is treated as empty. By default, -// any FeatureList not listed in this vector must exist in the SequenceExamples. -// context_sparse_keys: A list of Ncontext_sparse string Tensors (scalars). -// The keys expected in the Examples' features associated with context_sparse -// values. -// context_dense_keys: A list of Ncontext_dense string Tensors (scalars). -// The keys expected in the SequenceExamples' context features associated with -// dense values. -// feature_list_sparse_keys: A list of Nfeature_list_sparse string Tensors -// (scalars). The keys expected in the FeatureLists associated with sparse -// values. -// feature_list_dense_keys: A list of Nfeature_list_dense string Tensors (scalars). -// The keys expected in the SequenceExamples' feature_lists associated -// with lists of dense values. -func ParseSequenceExample(scope *Scope, serialized tf.Output, debug_name tf.Output, context_dense_defaults []tf.Output, feature_list_dense_missing_assumed_empty []string, context_sparse_keys []string, context_dense_keys []string, feature_list_sparse_keys []string, feature_list_dense_keys []string, optional ...ParseSequenceExampleAttr) (context_sparse_indices []tf.Output, context_sparse_values []tf.Output, context_sparse_shapes []tf.Output, context_dense_values []tf.Output, feature_list_sparse_indices []tf.Output, feature_list_sparse_values []tf.Output, feature_list_sparse_shapes []tf.Output, feature_list_dense_values []tf.Output, feature_list_dense_lengths []tf.Output) { +// sp_indices: 2-D. `NNZ x R` matrix with the indices of non-empty values in a +// SparseTensor, in canonical ordering. +// sp_values: 1-D. `NNZ` non-empty values corresponding to `sp_indices`. +// sp_shape: 1-D. Shape of the input SparseTensor. +// +// Returns 1-D. The `NNZ` values for the result `SparseTensor`. +func SparseSoftmax(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"feature_list_dense_missing_assumed_empty": feature_list_dense_missing_assumed_empty, "context_sparse_keys": context_sparse_keys, "context_dense_keys": context_dense_keys, "feature_list_sparse_keys": feature_list_sparse_keys, "feature_list_dense_keys": feature_list_dense_keys} - for _, a := range optional { - a(attrs) + opspec := tf.OpSpec{ + Type: "SparseSoftmax", + Input: []tf.Input{ + sp_indices, sp_values, sp_shape, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// An Op to permute tensors across replicated TPU instances. +// +// Each instance supplies its own input. +// +// For example, suppose there are 4 TPU instances: `[A, B, C, D]`. Passing +// source_target_pairs=`[[0,1],[1,2],[2,3],[3,0]]` gets the outputs: +// `[D, A, B, C]`. +// +// Arguments: +// input: The local input to be permuted. Currently only supports float and +// bfloat16. +// source_target_pairs: A tensor with shape [num_pairs, 2]. +// +// Returns The permuted input. +func CollectivePermute(scope *Scope, input tf.Output, source_target_pairs tf.Output) (output tf.Output) { + if scope.Err() != nil { + return } opspec := tf.OpSpec{ - Type: "ParseSequenceExample", + Type: "CollectivePermute", Input: []tf.Input{ - serialized, debug_name, tf.OutputList(context_dense_defaults), + input, source_target_pairs, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Gives a guarantee to the TF runtime that the input tensor is a constant. +// +// The runtime is then free to make optimizations based on this. +// +// Only accepts value typed tensors as inputs and rejects resource variable handles +// as input. +// +// Returns the input tensor without modification. +func GuaranteeConst(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "GuaranteeConst", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Transforms a tf.Example proto (as a string) into typed tensors. +// +// Arguments: +// serialized: A vector containing a batch of binary serialized Example protos. +// dense_defaults: A list of Tensors (some may be empty), whose length matches +// the length of `dense_keys`. dense_defaults[j] provides default values +// when the example's feature_map lacks dense_key[j]. If an empty Tensor is +// provided for dense_defaults[j], then the Feature dense_keys[j] is required. +// The input type is inferred from dense_defaults[j], even when it's empty. +// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined, +// then the shape of dense_defaults[j] must match that of dense_shapes[j]. +// If dense_shapes[j] has an undefined major dimension (variable strides dense +// feature), dense_defaults[j] must contain a single element: +// the padding element. +// num_sparse: The number of sparse features to be parsed from the example. This +// must match the lengths of `sparse_keys` and `sparse_types`. +// sparse_keys: A list of `num_sparse` strings. +// The keys expected in the Examples' features associated with sparse values. +// dense_keys: The keys expected in the Examples' features associated with dense +// values. +// sparse_types: A list of `num_sparse` types; the data types of data in each +// Feature given in sparse_keys. +// Currently the ParseSingleExample op supports DT_FLOAT (FloatList), +// DT_INT64 (Int64List), and DT_STRING (BytesList). +// dense_shapes: The shapes of data in each Feature given in dense_keys. +// The length of this list must match the length of `dense_keys`. The +// number of elements in the Feature corresponding to dense_key[j] must +// always equal dense_shapes[j].NumEntries(). If dense_shapes[j] == +// (D0, D1, ..., DN) then the shape of output Tensor dense_values[j] +// will be (D0, D1, ..., DN): In the case dense_shapes[j] = (-1, D1, +// ..., DN), the shape of the output Tensor dense_values[j] will be (M, +// D1, .., DN), where M is the number of blocks of elements of length +// D1 * .... * DN, in the input. +func ParseSingleExample(scope *Scope, serialized tf.Output, dense_defaults []tf.Output, num_sparse int64, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_sparse": num_sparse, "sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes} + opspec := tf.OpSpec{ + Type: "ParseSingleExample", + Input: []tf.Input{ + serialized, tf.OutputList(dense_defaults), }, Attrs: attrs, } @@ -38802,43 +37828,68 @@ func ParseSequenceExample(scope *Scope, serialized tf.Output, debug_name tf.Outp } var idx int var err error - if context_sparse_indices, idx, err = makeOutputList(op, idx, "context_sparse_indices"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) + if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil { + scope.UpdateErr("ParseSingleExample", err) return } - if context_sparse_values, idx, err = makeOutputList(op, idx, "context_sparse_values"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) + if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil { + scope.UpdateErr("ParseSingleExample", err) return } - if context_sparse_shapes, idx, err = makeOutputList(op, idx, "context_sparse_shapes"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) + if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil { + scope.UpdateErr("ParseSingleExample", err) return } - if context_dense_values, idx, err = makeOutputList(op, idx, "context_dense_values"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) + if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil { + scope.UpdateErr("ParseSingleExample", err) return } - if feature_list_sparse_indices, idx, err = makeOutputList(op, idx, "feature_list_sparse_indices"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) + return sparse_indices, sparse_values, sparse_shapes, dense_values +} + +// StringToNumberAttr is an optional argument to StringToNumber. +type StringToNumberAttr func(optionalAttr) + +// StringToNumberOutType sets the optional out_type attribute to value. +// +// value: The numeric type to interpret each string in `string_tensor` as. +// If not specified, defaults to DT_FLOAT +func StringToNumberOutType(value tf.DataType) StringToNumberAttr { + return func(m optionalAttr) { + m["out_type"] = value + } +} + +// Converts each string in the input Tensor to the specified numeric type. +// +// (Note that int32 overflow results in an error while float overflow +// results in a rounded value.) +// +// Example: +// +// >>> strings = ["5.0", "3.0", "7.0"] +// >>> tf.strings.to_number(strings) +// +// +// +// Returns A Tensor of the same shape as the input `string_tensor`. +func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToNumberAttr) (output tf.Output) { + if scope.Err() != nil { return } - if feature_list_sparse_values, idx, err = makeOutputList(op, idx, "feature_list_sparse_values"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) } - if feature_list_sparse_shapes, idx, err = makeOutputList(op, idx, "feature_list_sparse_shapes"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return + opspec := tf.OpSpec{ + Type: "StringToNumber", + Input: []tf.Input{ + string_tensor, + }, + Attrs: attrs, } - if feature_list_dense_values, idx, err = makeOutputList(op, idx, "feature_list_dense_values"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return - } - if feature_list_dense_lengths, idx, err = makeOutputList(op, idx, "feature_list_dense_lengths"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return - } - return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values, feature_list_dense_lengths + op := scope.AddOperation(opspec) + return op.Output(0) } // Fast Fourier transform. @@ -38869,237 +37920,6 @@ func FFT(scope *Scope, input tf.Output) (output tf.Output) { return op.Output(0) } -// UniqueV2Attr is an optional argument to UniqueV2. -type UniqueV2Attr func(optionalAttr) - -// UniqueV2OutIdx sets the optional out_idx attribute to value. -// If not specified, defaults to DT_INT32 -func UniqueV2OutIdx(value tf.DataType) UniqueV2Attr { - return func(m optionalAttr) { - m["out_idx"] = value - } -} - -// Finds unique elements along an axis of a tensor. -// -// This operation either returns a tensor `y` containing unique elements -// along the `axis` of a tensor. The returned unique elements is sorted -// in the same order as they occur along `axis` in `x`. -// This operation also returns a tensor `idx` that is the same size as -// the number of the elements in `x` along the `axis` dimension. It -// contains the index in the unique output `y`. -// In other words, for an `1-D` tensor `x` with `axis = None: -// -// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` -// -// For example: -// -// ``` -// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] -// y, idx = unique(x) -// y ==> [1, 2, 4, 7, 8] -// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] -// ``` -// -// For an `2-D` tensor `x` with `axis = 0`: -// -// ``` -// # tensor 'x' is [[1, 0, 0], -// # [1, 0, 0], -// # [2, 0, 0]] -// y, idx = unique(x, axis=0) -// y ==> [[1, 0, 0], -// [2, 0, 0]] -// idx ==> [0, 0, 1] -// ``` -// -// For an `2-D` tensor `x` with `axis = 1`: -// -// ``` -// # tensor 'x' is [[1, 0, 0], -// # [1, 0, 0], -// # [2, 0, 0]] -// y, idx = unique(x, axis=1) -// y ==> [[1, 0], -// [1, 0], -// [2, 0]] -// idx ==> [0, 1, 1] -// ``` -// -// Arguments: -// x: A `Tensor`. -// axis: A `Tensor` of type `int32` (default: None). The axis of the Tensor to -// find the unique elements. -// -// Returns: -// y: A `Tensor`. Unique elements along the `axis` of `Tensor` x. -// idx: A 1-D Tensor. Has the same type as x that contains the index of each -// value of x in the output y. -func UniqueV2(scope *Scope, x tf.Output, axis tf.Output, optional ...UniqueV2Attr) (y tf.Output, idx tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "UniqueV2", - Input: []tf.Input{ - x, axis, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// DecodePaddedRawAttr is an optional argument to DecodePaddedRaw. -type DecodePaddedRawAttr func(optionalAttr) - -// DecodePaddedRawLittleEndian sets the optional little_endian attribute to value. -// -// value: Whether the input `input_bytes` is in little-endian order. Ignored for -// `out_type` values that are stored in a single byte, like `uint8` -// If not specified, defaults to true -func DecodePaddedRawLittleEndian(value bool) DecodePaddedRawAttr { - return func(m optionalAttr) { - m["little_endian"] = value - } -} - -// Reinterpret the bytes of a string as a vector of numbers. -// -// Arguments: -// input_bytes: Tensor of string to be decoded. -// fixed_length: Length in bytes for each element of the decoded output. Must be a multiple -// of the size of the output type. -// -// -// Returns A Tensor with one more dimension than the input `bytes`. The added dimension -// will have size equal to the length of the elements of `bytes` divided by the -// number of bytes to represent `out_type`. -func DecodePaddedRaw(scope *Scope, input_bytes tf.Output, fixed_length tf.Output, out_type tf.DataType, optional ...DecodePaddedRawAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"out_type": out_type} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DecodePaddedRaw", - Input: []tf.Input{ - input_bytes, fixed_length, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RetrieveTPUEmbeddingADAMParametersAttr is an optional argument to RetrieveTPUEmbeddingADAMParameters. -type RetrieveTPUEmbeddingADAMParametersAttr func(optionalAttr) - -// RetrieveTPUEmbeddingADAMParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingADAMParametersTableId(value int64) RetrieveTPUEmbeddingADAMParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingADAMParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingADAMParametersTableName(value string) RetrieveTPUEmbeddingADAMParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingADAMParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingADAMParametersConfig(value string) RetrieveTPUEmbeddingADAMParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve ADAM embedding parameters. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns: -// parameters: Parameter parameters updated by the ADAM optimization algorithm. -// momenta: Parameter momenta updated by the ADAM optimization algorithm. -// velocities: Parameter velocities updated by the ADAM optimization algorithm. -func RetrieveTPUEmbeddingADAMParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingADAMParametersAttr) (parameters tf.Output, momenta tf.Output, velocities tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingADAMParameters", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// StatelessRandomBinomialAttr is an optional argument to StatelessRandomBinomial. -type StatelessRandomBinomialAttr func(optionalAttr) - -// StatelessRandomBinomialDtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_INT64 -func StatelessRandomBinomialDtype(value tf.DataType) StatelessRandomBinomialAttr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Outputs deterministic pseudorandom random numbers from a binomial distribution. -// -// Outputs random values from a binomial distribution. -// -// The outputs are a deterministic function of `shape`, `seed`, `counts`, and `probs`. -// -// Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). -// counts: The counts of the binomial distribution. Must be broadcastable with `probs`, -// and broadcastable with the rightmost dimensions of `shape`. -// probs: The probability of success for the binomial distribution. Must be broadcastable -// with `counts` and broadcastable with the rightmost dimensions of `shape`. -// -// Returns Random values with specified shape. -func StatelessRandomBinomial(scope *Scope, shape tf.Output, seed tf.Output, counts tf.Output, probs tf.Output, optional ...StatelessRandomBinomialAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StatelessRandomBinomial", - Input: []tf.Input{ - shape, seed, counts, probs, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingAdagradParametersGradAccumDebug. type RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr func(optionalAttr) @@ -39328,135 +38148,6 @@ func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, option return op.Output(0) } -// Computes the sum along sparse segments of a tensor divided by the sqrt of N. -// -// N is the size of the segment being reduced. -// -// See `tf.sparse.segment_sum` for usage examples. -// -// -// Arguments: -// -// indices: A 1-D tensor. Has same rank as `segment_ids`. -// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SparseSegmentSqrtN(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSegmentSqrtN", - Input: []tf.Input{ - data, indices, segment_ids, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// UnicodeDecodeWithOffsetsAttr is an optional argument to UnicodeDecodeWithOffsets. -type UnicodeDecodeWithOffsetsAttr func(optionalAttr) - -// UnicodeDecodeWithOffsetsErrors sets the optional errors attribute to value. -// -// value: Error handling policy when there is invalid formatting found in the input. -// The value of 'strict' will cause the operation to produce a InvalidArgument -// error on any invalid input formatting. A value of 'replace' (the default) will -// cause the operation to replace any invalid formatting in the input with the -// `replacement_char` codepoint. A value of 'ignore' will cause the operation to -// skip any invalid formatting in the input and produce no corresponding output -// character. -// If not specified, defaults to "replace" -func UnicodeDecodeWithOffsetsErrors(value string) UnicodeDecodeWithOffsetsAttr { - return func(m optionalAttr) { - m["errors"] = value - } -} - -// UnicodeDecodeWithOffsetsReplacementChar sets the optional replacement_char attribute to value. -// -// value: The replacement character codepoint to be used in place of any invalid -// formatting in the input when `errors='replace'`. Any valid unicode codepoint may -// be used. The default value is the default unicode replacement character is -// 0xFFFD or U+65533.) -// If not specified, defaults to 65533 -func UnicodeDecodeWithOffsetsReplacementChar(value int64) UnicodeDecodeWithOffsetsAttr { - return func(m optionalAttr) { - m["replacement_char"] = value - } -} - -// UnicodeDecodeWithOffsetsReplaceControlCharacters sets the optional replace_control_characters attribute to value. -// -// value: Whether to replace the C0 control characters (00-1F) with the -// `replacement_char`. Default is false. -// If not specified, defaults to false -func UnicodeDecodeWithOffsetsReplaceControlCharacters(value bool) UnicodeDecodeWithOffsetsAttr { - return func(m optionalAttr) { - m["replace_control_characters"] = value - } -} - -// UnicodeDecodeWithOffsetsTsplits sets the optional Tsplits attribute to value. -// If not specified, defaults to DT_INT64 -func UnicodeDecodeWithOffsetsTsplits(value tf.DataType) UnicodeDecodeWithOffsetsAttr { - return func(m optionalAttr) { - m["Tsplits"] = value - } -} - -// Decodes each string in `input` into a sequence of Unicode code points. -// -// The character codepoints for all strings are returned using a single vector -// `char_values`, with strings expanded to characters in row-major order. -// Similarly, the character start byte offsets are returned using a single vector -// `char_to_byte_starts`, with strings expanded in row-major order. -// -// The `row_splits` tensor indicates where the codepoints and start offsets for -// each input string begin and end within the `char_values` and -// `char_to_byte_starts` tensors. In particular, the values for the `i`th -// string (in row-major order) are stored in the slice -// `[row_splits[i]:row_splits[i+1]]`. Thus: -// -// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th -// character in the `i`th string (in row-major order). -// * `char_to_bytes_starts[row_splits[i]+j]` is the start byte offset for the `j`th -// character in the `i`th string (in row-major order). -// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th -// string (in row-major order). -// -// Arguments: -// input: The text to be decoded. Can have any shape. Note that the output is flattened -// to a vector of char values. -// input_encoding: Text encoding of the input strings. This is any of the encodings supported -// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. -// -// Returns: -// row_splits: A 1D int32 tensor containing the row splits. -// char_values: A 1D int32 Tensor containing the decoded codepoints. -// char_to_byte_starts: A 1D int32 Tensor containing the byte index in the input string where each -// character in `char_values` starts. -func UnicodeDecodeWithOffsets(scope *Scope, input tf.Output, input_encoding string, optional ...UnicodeDecodeWithOffsetsAttr) (row_splits tf.Output, char_values tf.Output, char_to_byte_starts tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"input_encoding": input_encoding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "UnicodeDecodeWithOffsets", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - // This op is used as a placeholder in If branch functions. It doesn't provide a // valid output when run, so must either be removed (e.g. replaced with a // function input) or guaranteed not to be used (e.g. if mirroring an @@ -39612,34 +38303,6 @@ func FloorDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { return op.Output(0) } -// An Op to permute tensors across replicated TPU instances. -// -// Each instance supplies its own input. -// -// For example, suppose there are 4 TPU instances: `[A, B, C, D]`. Passing -// source_target_pairs=`[[0,1],[1,2],[2,3],[3,0]]` gets the outputs: -// `[D, A, B, C]`. -// -// Arguments: -// input: The local input to be permuted. Currently only supports float and -// bfloat16. -// source_target_pairs: A tensor with shape [num_pairs, 2]. -// -// Returns The permuted input. -func CollectivePermute(scope *Scope, input tf.Output, source_target_pairs tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "CollectivePermute", - Input: []tf.Input{ - input, source_target_pairs, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // QuantizedReluXAttr is an optional argument to QuantizedReluX. type QuantizedReluXAttr func(optionalAttr) @@ -39935,6 +38598,95 @@ func BlockLSTMGrad(scope *Scope, seq_len_max tf.Output, x tf.Output, cs_prev tf. return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6), op.Output(7) } +// Op that executes a program with optional in-place variable updates. +// +// It (optionally) reads device variables, loads and executes a TPU program on a +// TPU device, and then (optionally) in-place updates variables using the program +// outputs, as specified in attributes device_var_reads_indices (program input +// indices from directly reading variables) and device_var_updates_indices (program +// output indices used to update variables, -1 means no-update/read-only). Such +// program outputs are consumed by these variables will not appear in the op +// output. For the internal use of the distributed TPU compiler. +func TPUExecuteAndUpdateVariables(scope *Scope, args []tf.Output, key tf.Output, Tresults []tf.DataType, device_var_reads_indices []int64, device_var_updates_indices []int64) (results []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"Tresults": Tresults, "device_var_reads_indices": device_var_reads_indices, "device_var_updates_indices": device_var_updates_indices} + opspec := tf.OpSpec{ + Type: "TPUExecuteAndUpdateVariables", + Input: []tf.Input{ + tf.OutputList(args), key, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if results, idx, err = makeOutputList(op, idx, "results"); err != nil { + scope.UpdateErr("TPUExecuteAndUpdateVariables", err) + return + } + return results +} + +// ResourceSparseApplyAdagradAttr is an optional argument to ResourceSparseApplyAdagrad. +type ResourceSparseApplyAdagradAttr func(optionalAttr) + +// ResourceSparseApplyAdagradUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceSparseApplyAdagradUseLocking(value bool) ResourceSparseApplyAdagradAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// ResourceSparseApplyAdagradUpdateSlots sets the optional update_slots attribute to value. +// If not specified, defaults to true +func ResourceSparseApplyAdagradUpdateSlots(value bool) ResourceSparseApplyAdagradAttr { + return func(m optionalAttr) { + m["update_slots"] = value + } +} + +// Update relevant entries in '*var' and '*accum' according to the adagrad scheme. +// +// That is for rows we have grad for, we update var and accum as follows: +// accum += grad * grad +// var -= lr * grad * (1 / sqrt(accum)) +// +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// lr: Learning rate. Must be a scalar. +// grad: The gradient. +// indices: A vector of indices into the first dimension of var and accum. +// +// Returns the created operation. +func ResourceSparseApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdagradAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceSparseApplyAdagrad", + Input: []tf.Input{ + var_, accum, lr, grad, indices, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + // OutfeedDequeueTupleAttr is an optional argument to OutfeedDequeueTuple. type OutfeedDequeueTupleAttr func(optionalAttr) @@ -39986,97 +38738,6 @@ func OutfeedDequeueTuple(scope *Scope, dtypes []tf.DataType, shapes []tf.Shape, return outputs } -// DecodeCompressedAttr is an optional argument to DecodeCompressed. -type DecodeCompressedAttr func(optionalAttr) - -// DecodeCompressedCompressionType sets the optional compression_type attribute to value. -// -// value: A scalar containing either (i) the empty string (no -// compression), (ii) "ZLIB", or (iii) "GZIP". -// If not specified, defaults to "" -func DecodeCompressedCompressionType(value string) DecodeCompressedAttr { - return func(m optionalAttr) { - m["compression_type"] = value - } -} - -// Decompress strings. -// -// This op decompresses each element of the `bytes` input `Tensor`, which -// is assumed to be compressed using the given `compression_type`. -// -// The `output` is a string `Tensor` of the same shape as `bytes`, -// each element containing the decompressed data from the corresponding -// element in `bytes`. -// -// Arguments: -// bytes: A Tensor of string which is compressed. -// -// Returns A Tensor with the same shape as input `bytes`, uncompressed -// from bytes. -func DecodeCompressed(scope *Scope, bytes tf.Output, optional ...DecodeCompressedAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DecodeCompressed", - Input: []tf.Input{ - bytes, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Converts each string in the input Tensor to its hash mod by a number of buckets. -// -// The hash function is deterministic on the content of the string within the -// process. The hash function is a keyed hash function, where attribute `key` -// defines the key of the hash function. `key` is an array of 2 elements. -// -// A strong hash is important when inputs may be malicious, e.g. URLs with -// additional components. Adversaries could try to make their inputs hash to the -// same bucket for a denial-of-service attack or to skew the results. A strong -// hash can be used to make it difficult to find inputs with a skewed hash value -// distribution over buckets. This requires that the hash function is -// seeded by a high-entropy (random) "key" unknown to the adversary. -// -// The additional robustness comes at a cost of roughly 4x higher compute -// time than `tf.string_to_hash_bucket_fast`. -// -// Examples: -// -// >>> tf.strings.to_hash_bucket_strong(["Hello", "TF"], 3, [1, 2]).numpy() -// array([2, 0]) -// -// Arguments: -// input: The strings to assign a hash bucket. -// num_buckets: The number of buckets. -// key: The key used to seed the hash function, passed as a list of two uint64 -// elements. -// -// Returns A Tensor of the same shape as the input `string_tensor`. -func StringToHashBucketStrong(scope *Scope, input tf.Output, num_buckets int64, key []int64) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_buckets": num_buckets, "key": key} - opspec := tf.OpSpec{ - Type: "StringToHashBucketStrong", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Deserialize bucket boundaries and ready flag into current QuantileAccumulator. // // An op that deserializes bucket boundaries and are boundaries ready flag into current QuantileAccumulator. @@ -40229,6 +38890,168 @@ func StringJoin(scope *Scope, inputs []tf.Output, optional ...StringJoinAttr) (o return op.Output(0) } +// Produce a string tensor that encodes the state of a Reader. +// +// Not all Readers support being serialized, so this can produce an +// Unimplemented error. +// +// Arguments: +// reader_handle: Handle to a Reader. +func ReaderSerializeStateV2(scope *Scope, reader_handle tf.Output) (state tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ReaderSerializeStateV2", + Input: []tf.Input{ + reader_handle, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Split a `SparseTensor` into `num_split` tensors along one dimension. +// +// If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices +// `[0 : shape[split_dim] % num_split]` gets one extra dimension. +// For example, if `split_dim = 1` and `num_split = 2` and the input is +// +// input_tensor = shape = [2, 7] +// [ a d e ] +// [b c ] +// +// Graphically the output tensors are: +// +// output_tensor[0] = shape = [2, 4] +// [ a ] +// [b c ] +// +// output_tensor[1] = shape = [2, 3] +// [ d e ] +// [ ] +// +// Arguments: +// split_dim: 0-D. The dimension along which to split. Must be in the range +// `[0, rank(shape))`. +// indices: 2-D tensor represents the indices of the sparse tensor. +// values: 1-D tensor represents the values of the sparse tensor. +// shape: 1-D. tensor represents the shape of the sparse tensor. +// output indices: A list of 1-D tensors represents the indices of the output +// sparse tensors. +// num_split: The number of ways to split. +// +// Returns: +// output_indices +// output_values: A list of 1-D tensors represents the values of the output sparse +// tensors. +// output_shape: A list of 1-D tensors represents the shape of the output sparse +// tensors. +func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf.Output, shape tf.Output, num_split int64) (output_indices []tf.Output, output_values []tf.Output, output_shape []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_split": num_split} + opspec := tf.OpSpec{ + Type: "SparseSplit", + Input: []tf.Input{ + split_dim, indices, values, shape, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if output_indices, idx, err = makeOutputList(op, idx, "output_indices"); err != nil { + scope.UpdateErr("SparseSplit", err) + return + } + if output_values, idx, err = makeOutputList(op, idx, "output_values"); err != nil { + scope.UpdateErr("SparseSplit", err) + return + } + if output_shape, idx, err = makeOutputList(op, idx, "output_shape"); err != nil { + scope.UpdateErr("SparseSplit", err) + return + } + return output_indices, output_values, output_shape +} + +// Computes rectified linear 6: `min(max(features, 0), 6)`. +func Relu6(scope *Scope, features tf.Output) (activations tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Relu6", + Input: []tf.Input{ + features, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// RaggedRangeAttr is an optional argument to RaggedRange. +type RaggedRangeAttr func(optionalAttr) + +// RaggedRangeTsplits sets the optional Tsplits attribute to value. +// If not specified, defaults to DT_INT64 +func RaggedRangeTsplits(value tf.DataType) RaggedRangeAttr { + return func(m optionalAttr) { + m["Tsplits"] = value + } +} + +// Returns a `RaggedTensor` containing the specified sequences of numbers. +// +// +// Returns a `RaggedTensor` `result` composed from `rt_dense_values` and +// `rt_nested_splits`, such that +// `result[i] = range(starts[i], limits[i], deltas[i])`. +// +// ```python +// (rt_nested_splits, rt_dense_values) = ragged_range( +// starts=[2, 5, 8], limits=[3, 5, 12], deltas=1) +// result = tf.ragged.from_row_splits(rt_dense_values, rt_nested_splits) +// print(result) +// +// ``` +// +// The input tensors `starts`, `limits`, and `deltas` may be scalars or vectors. +// The vector inputs must all have the same size. Scalar inputs are broadcast +// to match the size of the vector inputs. +// +// Arguments: +// starts: The starts of each range. +// limits: The limits of each range. +// deltas: The deltas of each range. +// +// Returns: +// rt_nested_splits: The `row_splits` for the returned `RaggedTensor`. +// rt_dense_values: The `flat_values` for the returned `RaggedTensor`. +func RaggedRange(scope *Scope, starts tf.Output, limits tf.Output, deltas tf.Output, optional ...RaggedRangeAttr) (rt_nested_splits tf.Output, rt_dense_values tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RaggedRange", + Input: []tf.Input{ + starts, limits, deltas, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + // Replaces the contents of the table with the specified keys and values. // // The tensor `keys` must be of the same type as the keys of the table. @@ -40840,6 +39663,32 @@ func InitializeTableFromTextFileV2(scope *Scope, table_handle tf.Output, filenam return scope.AddOperation(opspec) } +// Computes inverse hyperbolic tangent of x element-wise. +// +// Given an input tensor, this function computes inverse hyperbolic tangent +// for every element in the tensor. Input range is `[-1,1]` and output range is +// `[-inf, inf]`. If input is `-1`, output will be `-inf` and if the +// input is `1`, output will be `inf`. Values outside the range will have +// `nan` as output. +// +// ```python +// x = tf.constant([-float("inf"), -1, -0.5, 1, 0, 0.5, 10, float("inf")]) +// tf.math.atanh(x) ==> [nan -inf -0.54930615 inf 0. 0.54930615 nan nan] +// ``` +func Atanh(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Atanh", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Returns an element-wise indication of the sign of a number. // // `y = sign(x) = -1` if `x < 0`; 0 if `x == 0`; 1 if `x > 0`. @@ -41314,212 +40163,6 @@ func LoadTPUEmbeddingMDLAdagradLightParameters(scope *Scope, parameters tf.Outpu return scope.AddOperation(opspec) } -// MapPeekAttr is an optional argument to MapPeek. -type MapPeekAttr func(optionalAttr) - -// MapPeekCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapPeekCapacity(value int64) MapPeekAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// MapPeekMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapPeekMemoryLimit(value int64) MapPeekAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// MapPeekContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func MapPeekContainer(value string) MapPeekAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MapPeekSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func MapPeekSharedName(value string) MapPeekAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op peeks at the values at the specified key. If the -// -// underlying container does not contain this key -// this op will block until it does. -func MapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapPeekAttr) (values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MapPeek", - Input: []tf.Input{ - key, indices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("MapPeek", err) - return - } - return values -} - -// RetrieveTPUEmbeddingCenteredRMSPropParametersAttr is an optional argument to RetrieveTPUEmbeddingCenteredRMSPropParameters. -type RetrieveTPUEmbeddingCenteredRMSPropParametersAttr func(optionalAttr) - -// RetrieveTPUEmbeddingCenteredRMSPropParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingCenteredRMSPropParametersTableId(value int64) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingCenteredRMSPropParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingCenteredRMSPropParametersTableName(value string) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingCenteredRMSPropParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingCenteredRMSPropParametersConfig(value string) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve centered RMSProp embedding parameters. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns: -// parameters: Parameter parameters updated by the centered RMSProp optimization algorithm. -// ms: Parameter ms updated by the centered RMSProp optimization algorithm. -// mom: Parameter mom updated by the centered RMSProp optimization algorithm. -// mg: Parameter mg updated by the centered RMSProp optimization algorithm. -func RetrieveTPUEmbeddingCenteredRMSPropParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingCenteredRMSPropParametersAttr) (parameters tf.Output, ms tf.Output, mom tf.Output, mg tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingCenteredRMSPropParameters", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) -} - -// Transforms a vector of brain.Example protos (as strings) into typed tensors. -// -// Arguments: -// serialized: A vector containing a batch of binary serialized Example protos. -// names: A vector containing the names of the serialized protos. -// May contain, for example, table key (descriptive) names for the -// corresponding serialized protos. These are purely useful for debugging -// purposes, and the presence of values here has no effect on the output. -// May also be an empty vector if no names are available. -// If non-empty, this vector must be the same length as "serialized". -// sparse_keys: A list of Nsparse string Tensors (scalars). -// The keys expected in the Examples' features associated with sparse values. -// dense_keys: A list of Ndense string Tensors (scalars). -// The keys expected in the Examples' features associated with dense values. -// dense_defaults: A list of Ndense Tensors (some may be empty). -// dense_defaults[j] provides default values -// when the example's feature_map lacks dense_key[j]. If an empty Tensor is -// provided for dense_defaults[j], then the Feature dense_keys[j] is required. -// The input type is inferred from dense_defaults[j], even when it's empty. -// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined, -// then the shape of dense_defaults[j] must match that of dense_shapes[j]. -// If dense_shapes[j] has an undefined major dimension (variable strides dense -// feature), dense_defaults[j] must contain a single element: -// the padding element. -// sparse_types: A list of Nsparse types; the data types of data in each Feature -// given in sparse_keys. -// Currently the ParseExample supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// dense_shapes: A list of Ndense shapes; the shapes of data in each Feature -// given in dense_keys. -// The number of elements in the Feature corresponding to dense_key[j] -// must always equal dense_shapes[j].NumEntries(). -// If dense_shapes[j] == (D0, D1, ..., DN) then the shape of output -// Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN): -// The dense outputs are just the inputs row-stacked by batch. -// This works for dense_shapes[j] = (-1, D1, ..., DN). In this case -// the shape of the output Tensor dense_values[j] will be -// (|serialized|, M, D1, .., DN), where M is the maximum number of blocks -// of elements of length D1 * .... * DN, across all minibatch entries -// in the input. Any minibatch entry with less than M blocks of elements of -// length D1 * ... * DN will be padded with the corresponding default_value -// scalar element along the second dimension. -func ParseExample(scope *Scope, serialized tf.Output, names tf.Output, sparse_keys []tf.Output, dense_keys []tf.Output, dense_defaults []tf.Output, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"sparse_types": sparse_types, "dense_shapes": dense_shapes} - opspec := tf.OpSpec{ - Type: "ParseExample", - Input: []tf.Input{ - serialized, names, tf.OutputList(sparse_keys), tf.OutputList(dense_keys), tf.OutputList(dense_defaults), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil { - scope.UpdateErr("ParseExample", err) - return - } - if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil { - scope.UpdateErr("ParseExample", err) - return - } - if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil { - scope.UpdateErr("ParseExample", err) - return - } - if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil { - scope.UpdateErr("ParseExample", err) - return - } - return sparse_indices, sparse_values, sparse_shapes, dense_values -} - // DatasetToGraphAttr is an optional argument to DatasetToGraph. type DatasetToGraphAttr func(optionalAttr) @@ -41926,106 +40569,6 @@ func ResourceApplyKerasMomentum(scope *Scope, var_ tf.Output, accum tf.Output, l return scope.AddOperation(opspec) } -// TensorArrayConcatV2Attr is an optional argument to TensorArrayConcatV2. -type TensorArrayConcatV2Attr func(optionalAttr) - -// TensorArrayConcatV2ElementShapeExcept0 sets the optional element_shape_except0 attribute to value. -// If not specified, defaults to -func TensorArrayConcatV2ElementShapeExcept0(value tf.Shape) TensorArrayConcatV2Attr { - return func(m optionalAttr) { - m["element_shape_except0"] = value - } -} - -// Deprecated. Use TensorArrayConcatV3 -func TensorArrayConcatV2(scope *Scope, handle tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayConcatV2Attr) (value tf.Output, lengths tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TensorArrayConcatV2", - Input: []tf.Input{ - handle, flow_in, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// MatrixSolveAttr is an optional argument to MatrixSolve. -type MatrixSolveAttr func(optionalAttr) - -// MatrixSolveAdjoint sets the optional adjoint attribute to value. -// -// value: Boolean indicating whether to solve with `matrix` or its (block-wise) -// adjoint. -// If not specified, defaults to false -func MatrixSolveAdjoint(value bool) MatrixSolveAttr { - return func(m optionalAttr) { - m["adjoint"] = value - } -} - -// Solves systems of linear equations. -// -// `Matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. `Rhs` is a tensor of shape `[..., M, K]`. The `output` is -// a tensor shape `[..., M, K]`. If `adjoint` is `False` then each output matrix -// satisfies `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`. -// If `adjoint` is `True` then each output matrix satisfies -// `adjoint(matrix[..., :, :]) * output[..., :, :] = rhs[..., :, :]`. -// -// Arguments: -// matrix: Shape is `[..., M, M]`. -// rhs: Shape is `[..., M, K]`. -// -// Returns Shape is `[..., M, K]`. -func MatrixSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixSolveAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MatrixSolve", - Input: []tf.Input{ - matrix, rhs, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Writes contents to the file at input filename. Creates file and recursively -// -// creates directory if not existing. -// -// Arguments: -// filename: scalar. The name of the file to which we write the contents. -// contents: scalar. The content to be written to the output file. -// -// Returns the created operation. -func WriteFile(scope *Scope, filename tf.Output, contents tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "WriteFile", - Input: []tf.Input{ - filename, contents, - }, - } - return scope.AddOperation(opspec) -} - // ResourceSparseApplyMomentumAttr is an optional argument to ResourceSparseApplyMomentum. type ResourceSparseApplyMomentumAttr func(optionalAttr) @@ -42132,198 +40675,112 @@ func Recv(scope *Scope, tensor_type tf.DataType, tensor_name string, send_device return op.Output(0) } -// OrderedMapStageAttr is an optional argument to OrderedMapStage. -type OrderedMapStageAttr func(optionalAttr) +// UniqueWithCountsAttr is an optional argument to UniqueWithCounts. +type UniqueWithCountsAttr func(optionalAttr) -// OrderedMapStageCapacity sets the optional capacity attribute to value. -// -// value: Maximum number of elements in the Staging Area. If > 0, inserts -// on the container will block when the capacity is reached. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapStageCapacity(value int64) OrderedMapStageAttr { +// UniqueWithCountsOutIdx sets the optional out_idx attribute to value. +// If not specified, defaults to DT_INT32 +func UniqueWithCountsOutIdx(value tf.DataType) UniqueWithCountsAttr { return func(m optionalAttr) { - m["capacity"] = value + m["out_idx"] = value } } -// OrderedMapStageMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 +// Finds unique elements in a 1-D tensor. // -// REQUIRES: value >= 0 -func OrderedMapStageMemoryLimit(value int64) OrderedMapStageAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// OrderedMapStageContainer sets the optional container attribute to value. +// This operation returns a tensor `y` containing all of the unique elements of `x` +// sorted in the same order that they occur in `x`. This operation also returns a +// tensor `idx` the same size as `x` that contains the index of each value of `x` +// in the unique output `y`. Finally, it returns a third tensor `count` that +// contains the count of each element of `y` in `x`. In other words: // -// value: If non-empty, this queue is placed in the given container. Otherwise, -// a default container is used. -// If not specified, defaults to "" -func OrderedMapStageContainer(value string) OrderedMapStageAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// OrderedMapStageSharedName sets the optional shared_name attribute to value. +// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` // -// value: It is necessary to match this name to the matching Unstage Op. -// If not specified, defaults to "" -func OrderedMapStageSharedName(value string) OrderedMapStageAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Stage (key, values) in the underlying container which behaves like a ordered +// For example: // -// associative container. Elements are ordered by key. +// ``` +// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] +// y, idx, count = unique_with_counts(x) +// y ==> [1, 2, 4, 7, 8] +// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] +// count ==> [2, 1, 3, 1, 2] +// ``` // // Arguments: -// key: int64 +// x: 1-D. // -// values: a list of tensors -// dtypes A list of data types that inserted values should adhere to. -// -// -// Returns the created operation. -func OrderedMapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...OrderedMapStageAttr) (o *tf.Operation) { +// Returns: +// y: 1-D. +// idx: 1-D. +// count: 1-D. +func UniqueWithCounts(scope *Scope, x tf.Output, optional ...UniqueWithCountsAttr) (y tf.Output, idx tf.Output, count tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtypes": dtypes} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "OrderedMapStage", + Type: "UniqueWithCounts", Input: []tf.Input{ - key, indices, tf.OutputList(values), + x, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) } -// TPUReplicateMetadataAttr is an optional argument to TPUReplicateMetadata. -type TPUReplicateMetadataAttr func(optionalAttr) +// ResizeBicubicGradAttr is an optional argument to ResizeBicubicGrad. +type ResizeBicubicGradAttr func(optionalAttr) -// TPUReplicateMetadataNumCoresPerReplica sets the optional num_cores_per_replica attribute to value. +// ResizeBicubicGradAlignCorners sets the optional align_corners attribute to value. // -// value: Number of cores per replica. Used for model parallelism. -// If not specified, defaults to 1 -func TPUReplicateMetadataNumCoresPerReplica(value int64) TPUReplicateMetadataAttr { - return func(m optionalAttr) { - m["num_cores_per_replica"] = value - } -} - -// TPUReplicateMetadataTopology sets the optional topology attribute to value. -// -// value: TopologyProto indicating the topology of the TPU pod slice. -// If not specified, defaults to "" -func TPUReplicateMetadataTopology(value string) TPUReplicateMetadataAttr { - return func(m optionalAttr) { - m["topology"] = value - } -} - -// TPUReplicateMetadataUseTpu sets the optional use_tpu attribute to value. -// -// value: Whether to place the computation on the TPU. -// If not specified, defaults to true -func TPUReplicateMetadataUseTpu(value bool) TPUReplicateMetadataAttr { - return func(m optionalAttr) { - m["use_tpu"] = value - } -} - -// TPUReplicateMetadataDeviceAssignment sets the optional device_assignment attribute to value. -// -// value: The assignment of devices for the computation. -// If not specified, defaults to <> -func TPUReplicateMetadataDeviceAssignment(value []int64) TPUReplicateMetadataAttr { - return func(m optionalAttr) { - m["device_assignment"] = value - } -} - -// TPUReplicateMetadataComputationShape sets the optional computation_shape attribute to value. -// -// value: DEPRECATED. Use num_cores_per_replica instead. -// If not specified, defaults to <> -func TPUReplicateMetadataComputationShape(value []int64) TPUReplicateMetadataAttr { - return func(m optionalAttr) { - m["computation_shape"] = value - } -} - -// TPUReplicateMetadataHostComputeCore sets the optional host_compute_core attribute to value. -// If not specified, defaults to <> -func TPUReplicateMetadataHostComputeCore(value []string) TPUReplicateMetadataAttr { - return func(m optionalAttr) { - m["host_compute_core"] = value - } -} - -// TPUReplicateMetadataPaddingMap sets the optional padding_map attribute to value. -// If not specified, defaults to <> -func TPUReplicateMetadataPaddingMap(value []string) TPUReplicateMetadataAttr { - return func(m optionalAttr) { - m["padding_map"] = value - } -} - -// TPUReplicateMetadataStepMarkerLocation sets the optional step_marker_location attribute to value. -// If not specified, defaults to "STEP_MARK_AT_ENTRY" -func TPUReplicateMetadataStepMarkerLocation(value string) TPUReplicateMetadataAttr { - return func(m optionalAttr) { - m["step_marker_location"] = value - } -} - -// TPUReplicateMetadataAllowSoftPlacement sets the optional allow_soft_placement attribute to value. +// value: If true, the centers of the 4 corner pixels of the input and grad tensors are +// aligned. Defaults to false. // If not specified, defaults to false -func TPUReplicateMetadataAllowSoftPlacement(value bool) TPUReplicateMetadataAttr { +func ResizeBicubicGradAlignCorners(value bool) ResizeBicubicGradAttr { return func(m optionalAttr) { - m["allow_soft_placement"] = value + m["align_corners"] = value } } -// TPUReplicateMetadataUseSpmdForXlaPartitioning sets the optional use_spmd_for_xla_partitioning attribute to value. +// ResizeBicubicGradHalfPixelCenters sets the optional half_pixel_centers attribute to value. // If not specified, defaults to false -func TPUReplicateMetadataUseSpmdForXlaPartitioning(value bool) TPUReplicateMetadataAttr { +func ResizeBicubicGradHalfPixelCenters(value bool) ResizeBicubicGradAttr { return func(m optionalAttr) { - m["use_spmd_for_xla_partitioning"] = value + m["half_pixel_centers"] = value } } -// Metadata indicating how the TPU computation should be replicated. -// -// This operation holds the metadata common to operations of a `tpu.replicate()` computation subgraph. +// Computes the gradient of bicubic interpolation. // // Arguments: -// num_replicas: Number of replicas of the computation +// grads: 4-D with shape `[batch, height, width, channels]`. +// original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`, +// The image tensor that was resized. // -// Returns the created operation. -func TPUReplicateMetadata(scope *Scope, num_replicas int64, optional ...TPUReplicateMetadataAttr) (o *tf.Operation) { +// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. +// Gradients with respect to the input image. Input image must have been +// float or double. +func ResizeBicubicGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBicubicGradAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_replicas": num_replicas} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "TPUReplicateMetadata", - + Type: "ResizeBicubicGrad", + Input: []tf.Input{ + grads, original_image, + }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } // TensorListConcatAttr is an optional argument to TensorListConcat. @@ -42426,6 +40883,67 @@ func LoadTPUEmbeddingRMSPropParametersGradAccumDebug(scope *Scope, parameters tf return scope.AddOperation(opspec) } +// LoadTPUEmbeddingAdadeltaParametersAttr is an optional argument to LoadTPUEmbeddingAdadeltaParameters. +type LoadTPUEmbeddingAdadeltaParametersAttr func(optionalAttr) + +// LoadTPUEmbeddingAdadeltaParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 +func LoadTPUEmbeddingAdadeltaParametersTableId(value int64) LoadTPUEmbeddingAdadeltaParametersAttr { + return func(m optionalAttr) { + m["table_id"] = value + } +} + +// LoadTPUEmbeddingAdadeltaParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingAdadeltaParametersTableName(value string) LoadTPUEmbeddingAdadeltaParametersAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// LoadTPUEmbeddingAdadeltaParametersConfig sets the optional config attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingAdadeltaParametersConfig(value string) LoadTPUEmbeddingAdadeltaParametersAttr { + return func(m optionalAttr) { + m["config"] = value + } +} + +// Load Adadelta embedding parameters. +// +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. +// +// Arguments: +// parameters: Value of parameters used in the Adadelta optimization algorithm. +// accumulators: Value of accumulators used in the Adadelta optimization algorithm. +// updates: Value of updates used in the Adadelta optimization algorithm. +// +// +// +// Returns the created operation. +func LoadTPUEmbeddingAdadeltaParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, updates tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdadeltaParametersAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "LoadTPUEmbeddingAdadeltaParameters", + Input: []tf.Input{ + parameters, accumulators, updates, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + // LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingFTRLParametersGradAccumDebug. type LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr func(optionalAttr) @@ -42488,67 +41006,6 @@ func LoadTPUEmbeddingFTRLParametersGradAccumDebug(scope *Scope, parameters tf.Ou return scope.AddOperation(opspec) } -// LoadTPUEmbeddingAdadeltaParametersAttr is an optional argument to LoadTPUEmbeddingAdadeltaParameters. -type LoadTPUEmbeddingAdadeltaParametersAttr func(optionalAttr) - -// LoadTPUEmbeddingAdadeltaParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingAdadeltaParametersTableId(value int64) LoadTPUEmbeddingAdadeltaParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingAdadeltaParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingAdadeltaParametersTableName(value string) LoadTPUEmbeddingAdadeltaParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingAdadeltaParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingAdadeltaParametersConfig(value string) LoadTPUEmbeddingAdadeltaParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load Adadelta embedding parameters. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the Adadelta optimization algorithm. -// accumulators: Value of accumulators used in the Adadelta optimization algorithm. -// updates: Value of updates used in the Adadelta optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingAdadeltaParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, updates tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdadeltaParametersAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingAdadeltaParameters", - Input: []tf.Input{ - parameters, accumulators, updates, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - // ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2. type ResourceSparseApplyFtrlV2Attr func(optionalAttr) @@ -42670,25 +41127,6 @@ func ResourceSparseApplyAdagradV2(scope *Scope, var_ tf.Output, accum tf.Output, return scope.AddOperation(opspec) } -// Restore a Reader to its initial clean state. -// -// Arguments: -// reader_handle: Handle to a Reader. -// -// Returns the created operation. -func ReaderResetV2(scope *Scope, reader_handle tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReaderResetV2", - Input: []tf.Input{ - reader_handle, - }, - } - return scope.AddOperation(opspec) -} - // FakeQuantWithMinMaxVarsPerChannelGradientAttr is an optional argument to FakeQuantWithMinMaxVarsPerChannelGradient. type FakeQuantWithMinMaxVarsPerChannelGradientAttr func(optionalAttr) @@ -43047,45 +41485,6 @@ func ResourceSparseApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumul return scope.AddOperation(opspec) } -// Applies softmax to a batched N-D `SparseTensor`. -// -// The inputs represent an N-D SparseTensor with logical shape `[..., B, C]` -// (where `N >= 2`), and with indices sorted in the canonical lexicographic order. -// -// This op is equivalent to applying the normal `tf.nn.softmax()` to each innermost -// logical submatrix with shape `[B, C]`, but with the catch that *the implicitly -// zero elements do not participate*. Specifically, the algorithm is equivalent -// to the following: -// -// (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix -// with shape `[B, C]`, along the size-C dimension; -// (2) Masks out the original implicitly-zero locations; -// (3) Renormalizes the remaining elements. -// -// Hence, the `SparseTensor` result has exactly the same non-zero indices and -// shape. -// -// Arguments: -// sp_indices: 2-D. `NNZ x R` matrix with the indices of non-empty values in a -// SparseTensor, in canonical ordering. -// sp_values: 1-D. `NNZ` non-empty values corresponding to `sp_indices`. -// sp_shape: 1-D. Shape of the input SparseTensor. -// -// Returns 1-D. The `NNZ` values for the result `SparseTensor`. -func SparseSoftmax(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSoftmax", - Input: []tf.Input{ - sp_indices, sp_values, sp_shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Adjust the hue of one or more images. // // `images` is a tensor of at least 3 dimensions. The last dimension is @@ -43185,6 +41584,106 @@ func CollectiveReduce(scope *Scope, input tf.Output, group_size int64, group_key return op.Output(0) } +// TensorArrayConcatV2Attr is an optional argument to TensorArrayConcatV2. +type TensorArrayConcatV2Attr func(optionalAttr) + +// TensorArrayConcatV2ElementShapeExcept0 sets the optional element_shape_except0 attribute to value. +// If not specified, defaults to +func TensorArrayConcatV2ElementShapeExcept0(value tf.Shape) TensorArrayConcatV2Attr { + return func(m optionalAttr) { + m["element_shape_except0"] = value + } +} + +// Deprecated. Use TensorArrayConcatV3 +func TensorArrayConcatV2(scope *Scope, handle tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayConcatV2Attr) (value tf.Output, lengths tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "TensorArrayConcatV2", + Input: []tf.Input{ + handle, flow_in, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// Writes contents to the file at input filename. Creates file and recursively +// +// creates directory if not existing. +// +// Arguments: +// filename: scalar. The name of the file to which we write the contents. +// contents: scalar. The content to be written to the output file. +// +// Returns the created operation. +func WriteFile(scope *Scope, filename tf.Output, contents tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "WriteFile", + Input: []tf.Input{ + filename, contents, + }, + } + return scope.AddOperation(opspec) +} + +// MatrixSolveAttr is an optional argument to MatrixSolve. +type MatrixSolveAttr func(optionalAttr) + +// MatrixSolveAdjoint sets the optional adjoint attribute to value. +// +// value: Boolean indicating whether to solve with `matrix` or its (block-wise) +// adjoint. +// If not specified, defaults to false +func MatrixSolveAdjoint(value bool) MatrixSolveAttr { + return func(m optionalAttr) { + m["adjoint"] = value + } +} + +// Solves systems of linear equations. +// +// `Matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions +// form square matrices. `Rhs` is a tensor of shape `[..., M, K]`. The `output` is +// a tensor shape `[..., M, K]`. If `adjoint` is `False` then each output matrix +// satisfies `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`. +// If `adjoint` is `True` then each output matrix satisfies +// `adjoint(matrix[..., :, :]) * output[..., :, :] = rhs[..., :, :]`. +// +// Arguments: +// matrix: Shape is `[..., M, M]`. +// rhs: Shape is `[..., M, K]`. +// +// Returns Shape is `[..., M, K]`. +func MatrixSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixSolveAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MatrixSolve", + Input: []tf.Input{ + matrix, rhs, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // ResourceApplyAdaMaxAttr is an optional argument to ResourceApplyAdaMax. type ResourceApplyAdaMaxAttr func(optionalAttr) @@ -43414,6 +41913,63 @@ func LSTMBlockCell(scope *Scope, x tf.Output, cs_prev tf.Output, h_prev tf.Outpu return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6) } +// Returns x - y element-wise. +// +// *NOTE*: `Subtract` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Sub", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Says whether the targets are in the top `K` predictions. +// +// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the +// prediction for the target class is among the top `k` predictions among +// all predictions for example `i`. Note that the behavior of `InTopK` differs +// from the `TopK` op in its handling of ties; if multiple classes have the +// same prediction value and straddle the top-`k` boundary, all of those +// classes are considered to be in the top `k`. +// +// More formally, let +// +// \\(predictions_i\\) be the predictions for all classes for example `i`, +// \\(targets_i\\) be the target class for example `i`, +// \\(out_i\\) be the output for example `i`, +// +// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ +// +// Arguments: +// predictions: A `batch_size` x `classes` tensor. +// targets: A `batch_size` vector of class ids. +// k: Number of top elements to look at for computing precision. +// +// Returns Computed Precision at `k` as a `bool Tensor`. +func InTopK(scope *Scope, predictions tf.Output, targets tf.Output, k int64) (precision tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"k": k} + opspec := tf.OpSpec{ + Type: "InTopK", + Input: []tf.Input{ + predictions, targets, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // A TPU core selector Op. // // This Op produces a set of TPU cores (for warm-up) or a single TPU core @@ -43560,35 +42116,6 @@ func Substr(scope *Scope, input tf.Output, pos tf.Output, len tf.Output, optiona return op.Output(0) } -// Convert JSON-encoded Example records to binary protocol buffer strings. -// -// This op translates a tensor containing Example records, encoded using -// the [standard JSON -// mapping](https://developers.google.com/protocol-buffers/docs/proto3#json), -// into a tensor containing the same records encoded as binary protocol -// buffers. The resulting tensor can then be fed to any of the other -// Example-parsing ops. -// -// Arguments: -// json_examples: Each string is a JSON object serialized according to the JSON -// mapping of the Example proto. -// -// Returns Each string is a binary Example protocol buffer corresponding -// to the respective element of `json_examples`. -func DecodeJSONExample(scope *Scope, json_examples tf.Output) (binary_examples tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DecodeJSONExample", - Input: []tf.Input{ - json_examples, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Elementwise computes the bitwise AND of `x` and `y`. // // The result will have those bits set, that are set in both `x` and `y`. The @@ -43625,91 +42152,6 @@ func BitwiseAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { return op.Output(0) } -// DecodeCSVAttr is an optional argument to DecodeCSV. -type DecodeCSVAttr func(optionalAttr) - -// DecodeCSVFieldDelim sets the optional field_delim attribute to value. -// -// value: char delimiter to separate fields in a record. -// If not specified, defaults to "," -func DecodeCSVFieldDelim(value string) DecodeCSVAttr { - return func(m optionalAttr) { - m["field_delim"] = value - } -} - -// DecodeCSVUseQuoteDelim sets the optional use_quote_delim attribute to value. -// -// value: If false, treats double quotation marks as regular -// characters inside of the string fields (ignoring RFC 4180, Section 2, -// Bullet 5). -// If not specified, defaults to true -func DecodeCSVUseQuoteDelim(value bool) DecodeCSVAttr { - return func(m optionalAttr) { - m["use_quote_delim"] = value - } -} - -// DecodeCSVNaValue sets the optional na_value attribute to value. -// -// value: Additional string to recognize as NA/NaN. -// If not specified, defaults to "" -func DecodeCSVNaValue(value string) DecodeCSVAttr { - return func(m optionalAttr) { - m["na_value"] = value - } -} - -// DecodeCSVSelectCols sets the optional select_cols attribute to value. -// If not specified, defaults to <> -func DecodeCSVSelectCols(value []int64) DecodeCSVAttr { - return func(m optionalAttr) { - m["select_cols"] = value - } -} - -// Convert CSV records to tensors. Each column maps to one tensor. -// -// RFC 4180 format is expected for the CSV records. -// (https://tools.ietf.org/html/rfc4180) -// Note that we allow leading and trailing spaces with int or float field. -// -// Arguments: -// records: Each string is a record/row in the csv and all records should have -// the same format. -// record_defaults: One tensor per column of the input record, with either a -// scalar default value for that column or an empty vector if the column is -// required. -// -// Returns Each tensor will have the same shape as records. -func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, optional ...DecodeCSVAttr) (output []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DecodeCSV", - Input: []tf.Input{ - records, tf.OutputList(record_defaults), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output, idx, err = makeOutputList(op, idx, "output"); err != nil { - scope.UpdateErr("DecodeCSV", err) - return - } - return output -} - // SerializeIteratorAttr is an optional argument to SerializeIterator. type SerializeIteratorAttr func(optionalAttr) @@ -44045,71 +42487,125 @@ func ImageProjectiveTransformV2(scope *Scope, images tf.Output, transforms tf.Ou return op.Output(0) } -// Computes rectified linear gradients for a Relu operation. +// The gradient of SparseFillEmptyRows. +// +// Takes vectors reverse_index_map, shaped `[N]`, and grad_values, +// shaped `[N_full]`, where `N_full >= N` and copies data into either +// `d_values` or `d_default_value`. Here `d_values` is shaped `[N]` and +// `d_default_value` is a scalar. +// +// d_values[j] = grad_values[reverse_index_map[j]] +// d_default_value = sum_{k : 0 .. N_full - 1} ( +// grad_values[k] * 1{k not in reverse_index_map}) // // Arguments: -// gradients: The backpropagated gradients to the corresponding Relu operation. -// features: The features passed as input to the corresponding Relu operation, OR -// the outputs of that operation (both work equivalently). +// reverse_index_map: 1-D. The reverse index map from SparseFillEmptyRows. +// grad_values: 1-D. The gradients from backprop. // -// Returns `gradients * (features > 0)`. -func ReluGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { +// Returns: +// d_values: 1-D. The backprop into values. +// d_default_value: 0-D. The backprop into default_value. +func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_values tf.Output) (d_values tf.Output, d_default_value tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ReluGrad", + Type: "SparseFillEmptyRowsGrad", Input: []tf.Input{ - gradients, features, + reverse_index_map, grad_values, }, } op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// MaxPool3DGradAttr is an optional argument to MaxPool3DGrad. +type MaxPool3DGradAttr func(optionalAttr) + +// MaxPool3DGradDataFormat sets the optional data_format attribute to value. +// +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func MaxPool3DGradDataFormat(value string) MaxPool3DGradAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// Computes gradients of 3D max pooling function. +// +// Arguments: +// orig_input: The original input tensor. +// orig_output: The original output tensor. +// grad: Output backprop of shape `[batch, depth, rows, cols, channels]`. +// ksize: 1-D tensor of length 5. The size of the window for each dimension of +// the input tensor. Must have `ksize[0] = ksize[4] = 1`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. +func MaxPool3DGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MaxPool3DGrad", + Input: []tf.Input{ + orig_input, orig_output, grad, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum. -type ResourceApplyMomentumAttr func(optionalAttr) +// ResourceApplyRMSPropAttr is an optional argument to ResourceApplyRMSProp. +type ResourceApplyRMSPropAttr func(optionalAttr) -// ResourceApplyMomentumUseLocking sets the optional use_locking attribute to value. +// ResourceApplyRMSPropUseLocking sets the optional use_locking attribute to value. // -// value: If `True`, updating of the var and accum tensors will be protected +// value: If `True`, updating of the var, ms, and mom tensors is protected // by a lock; otherwise the behavior is undefined, but may exhibit less // contention. // If not specified, defaults to false -func ResourceApplyMomentumUseLocking(value bool) ResourceApplyMomentumAttr { +func ResourceApplyRMSPropUseLocking(value bool) ResourceApplyRMSPropAttr { return func(m optionalAttr) { m["use_locking"] = value } } -// ResourceApplyMomentumUseNesterov sets the optional use_nesterov attribute to value. +// Update '*var' according to the RMSProp algorithm. // -// value: If `True`, the tensor passed to compute grad will be -// var - lr * momentum * accum, so in the end, the var you get is actually -// var - lr * momentum * accum. -// If not specified, defaults to false -func ResourceApplyMomentumUseNesterov(value bool) ResourceApplyMomentumAttr { - return func(m optionalAttr) { - m["use_nesterov"] = value - } -} - -// Update '*var' according to the momentum scheme. +// Note that in dense implementation of this algorithm, ms and mom will +// update even if the grad is zero, but in this sparse implementation, ms +// and mom will not update in iterations during which the grad is zero. // -// Set use_nesterov = True if you want to use Nesterov momentum. +// mean_square = decay * mean_square + (1-decay) * gradient ** 2 +// Delta = learning_rate * gradient / sqrt(mean_square + epsilon) // -// accum = accum * momentum + grad -// var -= lr * accum +// ms <- rho * ms_{t-1} + (1-rho) * grad * grad +// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) +// var <- var - mom // // Arguments: // var_: Should be from a Variable(). -// accum: Should be from a Variable(). +// ms: Should be from a Variable(). +// mom: Should be from a Variable(). // lr: Scaling factor. Must be a scalar. +// rho: Decay rate. Must be a scalar. +// +// epsilon: Ridge term. Must be a scalar. // grad: The gradient. -// momentum: Momentum. Must be a scalar. // // Returns the created operation. -func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyMomentumAttr) (o *tf.Operation) { +func ResourceApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyRMSPropAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -44118,15 +42614,597 @@ func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf. a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyMomentum", + Type: "ResourceApplyRMSProp", Input: []tf.Input{ - var_, accum, lr, grad, momentum, + var_, ms, mom, lr, rho, momentum, epsilon, grad, }, Attrs: attrs, } return scope.AddOperation(opspec) } +// Reshapes a SparseTensor to represent values in a new dense shape. +// +// This operation has the same semantics as reshape on the represented dense +// tensor. The `input_indices` are recomputed based on the requested `new_shape`. +// +// If one component of `new_shape` is the special value -1, the size of that +// dimension is computed so that the total dense size remains constant. At +// most one component of `new_shape` can be -1. The number of dense elements +// implied by `new_shape` must be the same as the number of dense elements +// originally implied by `input_shape`. +// +// Reshaping does not affect the order of values in the SparseTensor. +// +// If the input tensor has rank `R_in` and `N` non-empty values, and `new_shape` +// has length `R_out`, then `input_indices` has shape `[N, R_in]`, +// `input_shape` has length `R_in`, `output_indices` has shape `[N, R_out]`, and +// `output_shape` has length `R_out`. +// +// Arguments: +// input_indices: 2-D. `N x R_in` matrix with the indices of non-empty values in a +// SparseTensor. +// input_shape: 1-D. `R_in` vector with the input SparseTensor's dense shape. +// new_shape: 1-D. `R_out` vector with the requested new dense shape. +// +// Returns: +// output_indices: 2-D. `N x R_out` matrix with the updated indices of non-empty +// values in the output SparseTensor. +// output_shape: 1-D. `R_out` vector with the full dense shape of the output +// SparseTensor. This is the same as `new_shape` but with any -1 dimensions +// filled in. +func SparseReshape(scope *Scope, input_indices tf.Output, input_shape tf.Output, new_shape tf.Output) (output_indices tf.Output, output_shape tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseReshape", + Input: []tf.Input{ + input_indices, input_shape, new_shape, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// Says whether the targets are in the top `K` predictions. +// +// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the +// prediction for the target class is among the top `k` predictions among +// all predictions for example `i`. Note that the behavior of `InTopK` differs +// from the `TopK` op in its handling of ties; if multiple classes have the +// same prediction value and straddle the top-`k` boundary, all of those +// classes are considered to be in the top `k`. +// +// More formally, let +// +// \\(predictions_i\\) be the predictions for all classes for example `i`, +// \\(targets_i\\) be the target class for example `i`, +// \\(out_i\\) be the output for example `i`, +// +// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ +// +// Arguments: +// predictions: A `batch_size` x `classes` tensor. +// targets: A `batch_size` vector of class ids. +// k: Number of top elements to look at for computing precision. +// +// Returns Computed precision at `k` as a `bool Tensor`. +func InTopKV2(scope *Scope, predictions tf.Output, targets tf.Output, k tf.Output) (precision tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "InTopKV2", + Input: []tf.Input{ + predictions, targets, k, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates an Optional variant with no value. +func OptionalNone(scope *Scope) (optional tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "OptionalNone", + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr is an optional argument to RetrieveTPUEmbeddingStochasticGradientDescentParameters. +type RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr func(optionalAttr) + +// RetrieveTPUEmbeddingStochasticGradientDescentParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 +func RetrieveTPUEmbeddingStochasticGradientDescentParametersTableId(value int64) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr { + return func(m optionalAttr) { + m["table_id"] = value + } +} + +// RetrieveTPUEmbeddingStochasticGradientDescentParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingStochasticGradientDescentParametersTableName(value string) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// RetrieveTPUEmbeddingStochasticGradientDescentParametersConfig sets the optional config attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingStochasticGradientDescentParametersConfig(value string) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr { + return func(m optionalAttr) { + m["config"] = value + } +} + +// Retrieve SGD embedding parameters. +// +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. +// +// Returns Parameter parameters updated by the stochastic gradient descent optimization algorithm. +func RetrieveTPUEmbeddingStochasticGradientDescentParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr) (parameters tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RetrieveTPUEmbeddingStochasticGradientDescentParameters", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Deprecated. Use TensorArraySplitV3 +// +// DEPRECATED at GraphDef version 26: Use TensorArraySplitV3 +func TensorArraySplitV2(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TensorArraySplitV2", + Input: []tf.Input{ + handle, value, lengths, flow_in, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// UpperBoundAttr is an optional argument to UpperBound. +type UpperBoundAttr func(optionalAttr) + +// UpperBoundOutType sets the optional out_type attribute to value. +// If not specified, defaults to DT_INT32 +func UpperBoundOutType(value tf.DataType) UpperBoundAttr { + return func(m optionalAttr) { + m["out_type"] = value + } +} + +// Applies upper_bound(sorted_search_values, values) along each row. +// +// Each set of rows with the same index in (sorted_inputs, values) is treated +// independently. The resulting row is the equivalent of calling +// `np.searchsorted(sorted_inputs, values, side='right')`. +// +// The result is not a global index to the entire +// `Tensor`, but rather just the index in the last dimension. +// +// A 2-D example: +// sorted_sequence = [[0, 3, 9, 9, 10], +// [1, 2, 3, 4, 5]] +// values = [[2, 4, 9], +// [0, 2, 6]] +// +// result = UpperBound(sorted_sequence, values) +// +// result == [[1, 2, 4], +// [0, 2, 5]] +// +// Arguments: +// sorted_inputs: 2-D Tensor where each row is ordered. +// values: 2-D Tensor with the same numbers of rows as `sorted_search_values`. Contains +// the values that will be searched for in `sorted_search_values`. +// +// Returns A `Tensor` with the same shape as `values`. It contains the last scalar index +// into the last dimension where values can be inserted without changing the +// ordered property. +func UpperBound(scope *Scope, sorted_inputs tf.Output, values tf.Output, optional ...UpperBoundAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "UpperBound", + Input: []tf.Input{ + sorted_inputs, values, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2. +type ResourceApplyFtrlV2Attr func(optionalAttr) + +// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// ResourceApplyFtrlV2MultiplyLinearByLr sets the optional multiply_linear_by_lr attribute to value. +// If not specified, defaults to false +func ResourceApplyFtrlV2MultiplyLinearByLr(value bool) ResourceApplyFtrlV2Attr { + return func(m optionalAttr) { + m["multiply_linear_by_lr"] = value + } +} + +// Update '*var' according to the Ftrl-proximal scheme. +// +// grad_with_shrinkage = grad + 2 * l2_shrinkage * var +// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage +// linear += grad_with_shrinkage + +// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var +// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 +// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 +// accum = accum_new +// +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// linear: Should be from a Variable(). +// grad: The gradient. +// lr: Scaling factor. Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 shrinkage regularization. Must be a scalar. +// +// lr_power: Scaling factor. Must be a scalar. +// +// Returns the created operation. +func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceApplyFtrlV2", + Input: []tf.Input{ + var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// Constructs a tensor by tiling a given tensor. +// +// This operation creates a new tensor by replicating `input` `multiples` times. +// The output tensor's i'th dimension has `input.dims(i) * multiples[i]` elements, +// and the values of `input` are replicated `multiples[i]` times along the 'i'th +// dimension. For example, tiling `[a b c d]` by `[2]` produces +// `[a b c d a b c d]`. +// +// >>> a = tf.constant([[1,2,3],[4,5,6]], tf.int32) +// >>> b = tf.constant([1,2], tf.int32) +// >>> tf.tile(a, b) +// +// >>> c = tf.constant([2,1], tf.int32) +// >>> tf.tile(a, c) +// +// >>> d = tf.constant([2,2], tf.int32) +// >>> tf.tile(a, d) +// +// +// Arguments: +// input: 1-D or higher. +// multiples: 1-D. Length must be the same as the number of dimensions in `input` +func Tile(scope *Scope, input tf.Output, multiples tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Tile", + Input: []tf.Input{ + input, multiples, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes softmax cross entropy cost and gradients to backpropagate. +// +// Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept +// a matrix of label probabilities, but rather a single label per row +// of features. This label is considered to have probability 1.0 for the +// given row. +// +// Inputs are the logits, not probabilities. +// +// Arguments: +// features: batch_size x num_classes matrix +// labels: batch_size vector with values in [0, num_classes). +// This is the label for the given minibatch entry. +// +// Returns: +// loss: Per example loss (batch_size vector). +// backprop: backpropagated gradients (batch_size x num_classes matrix). +func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseSoftmaxCrossEntropyWithLogits", + Input: []tf.Input{ + features, labels, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// Worker heartbeat op. +// +// Heartbeats may be sent periodically to indicate the coordinator is still active, +// to retrieve the current worker status and to expedite shutdown when necessary. +// +// Arguments: +// request: A string tensor containing a serialized WorkerHeartbeatRequest +// +// Returns A string tensor containing a serialized WorkerHeartbeatResponse +func WorkerHeartbeat(scope *Scope, request tf.Output) (response tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "WorkerHeartbeat", + Input: []tf.Input{ + request, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ResourceApplyProximalGradientDescentAttr is an optional argument to ResourceApplyProximalGradientDescent. +type ResourceApplyProximalGradientDescentAttr func(optionalAttr) + +// ResourceApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value. +// +// value: If True, the subtraction will be protected by a lock; +// otherwise the behavior is undefined, but may exhibit less contention. +// If not specified, defaults to false +func ResourceApplyProximalGradientDescentUseLocking(value bool) ResourceApplyProximalGradientDescentAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' as FOBOS algorithm with fixed learning rate. +// +// prox_v = var - alpha * delta +// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0} +// +// Arguments: +// var_: Should be from a Variable(). +// alpha: Scaling factor. Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 regularization. Must be a scalar. +// delta: The change. +// +// Returns the created operation. +func ResourceApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, delta tf.Output, optional ...ResourceApplyProximalGradientDescentAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceApplyProximalGradientDescent", + Input: []tf.Input{ + var_, alpha, l1, l2, delta, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// Elementwise computes the bitwise left-shift of `x` and `y`. +// +// If `y` is negative, or greater than or equal to the width of `x` in bits the +// result is implementation defined. +// +// Example: +// +// ```python +// import tensorflow as tf +// from tensorflow.python.ops import bitwise_ops +// import numpy as np +// dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64] +// +// for dtype in dtype_list: +// lhs = tf.constant([-1, -5, -3, -14], dtype=dtype) +// rhs = tf.constant([5, 0, 7, 11], dtype=dtype) +// +// left_shift_result = bitwise_ops.left_shift(lhs, rhs) +// +// print(left_shift_result) +// +// # This will print: +// # tf.Tensor([ -32 -5 -128 0], shape=(4,), dtype=int8) +// # tf.Tensor([ -32 -5 -384 -28672], shape=(4,), dtype=int16) +// # tf.Tensor([ -32 -5 -384 -28672], shape=(4,), dtype=int32) +// # tf.Tensor([ -32 -5 -384 -28672], shape=(4,), dtype=int64) +// +// lhs = np.array([-2, 64, 101, 32], dtype=np.int8) +// rhs = np.array([-1, -5, -3, -14], dtype=np.int8) +// bitwise_ops.left_shift(lhs, rhs) +// # +// ``` +// +func LeftShift(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "LeftShift", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Generates a feature cross from a list of tensors, and returns it as a +// RaggedTensor. See `tf.ragged.cross` for more details. +// +// Arguments: +// ragged_values: The values tensor for each RaggedTensor input. +// ragged_row_splits: The row_splits tensor for each RaggedTensor input. +// sparse_indices: The indices tensor for each SparseTensor input. +// sparse_values: The values tensor for each SparseTensor input. +// sparse_shape: The dense_shape tensor for each SparseTensor input. +// dense_inputs: The tf.Tensor inputs. +// input_order: String specifying the tensor type for each input. The `i`th character in +// this string specifies the type of the `i`th input, and is one of: 'R' (ragged), +// 'D' (dense), or 'S' (sparse). This attr is used to ensure that the crossed +// values are combined in the order of the inputs from the call to tf.ragged.cross. +// +// +// +// +// +// +// Returns: +// output_values: The `values` for the returned `RaggedTensor`. +// output_row_splits: The `row_splits` for the returned `RaggedTensor`. +func RaggedCross(scope *Scope, ragged_values []tf.Output, ragged_row_splits []tf.Output, sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shape []tf.Output, dense_inputs []tf.Output, input_order string, hashed_output bool, num_buckets int64, hash_key int64, out_values_type tf.DataType, out_row_splits_type tf.DataType) (output_values tf.Output, output_row_splits tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"input_order": input_order, "hashed_output": hashed_output, "num_buckets": num_buckets, "hash_key": hash_key, "out_values_type": out_values_type, "out_row_splits_type": out_row_splits_type} + opspec := tf.OpSpec{ + Type: "RaggedCross", + Input: []tf.Input{ + tf.OutputList(ragged_values), tf.OutputList(ragged_row_splits), tf.OutputList(sparse_indices), tf.OutputList(sparse_values), tf.OutputList(sparse_shape), tf.OutputList(dense_inputs), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// Output a fact about factorials. +func Fact(scope *Scope) (fact tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Fact", + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug. +type RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr func(optionalAttr) + +// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 +func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["table_id"] = value + } +} + +// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugConfig sets the optional config attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["config"] = value + } +} + +// Retrieve Adadelta embedding parameters with debug support. +// +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. +// +// Returns: +// parameters: Parameter parameters updated by the Adadelta optimization algorithm. +// accumulators: Parameter accumulators updated by the Adadelta optimization algorithm. +// updates: Parameter updates updated by the Adadelta optimization algorithm. +// gradient_accumulators: Parameter gradient_accumulators updated by the Adadelta optimization algorithm. +func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, updates tf.Output, gradient_accumulators tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3) +} + // IRFFTAttr is an optional argument to IRFFT. type IRFFTAttr func(optionalAttr) @@ -44337,6 +43415,34 @@ func ResourceScatterNdUpdate(scope *Scope, ref tf.Output, indices tf.Output, upd return scope.AddOperation(opspec) } +// Computes the sum along sparse segments of a tensor divided by the sqrt of N. +// +// N is the size of the segment being reduced. +// +// See `tf.sparse.segment_sum` for usage examples. +// +// +// Arguments: +// +// indices: A 1-D tensor. Has same rank as `segment_ids`. +// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SparseSegmentSqrtN(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseSegmentSqrtN", + Input: []tf.Input{ + data, indices, segment_ids, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Computes square root of x element-wise. // // I.e., \\(y = \sqrt{x} = x^{1/2}\\). @@ -44483,125 +43589,71 @@ func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.O return scope.AddOperation(opspec) } -// The gradient of SparseFillEmptyRows. -// -// Takes vectors reverse_index_map, shaped `[N]`, and grad_values, -// shaped `[N_full]`, where `N_full >= N` and copies data into either -// `d_values` or `d_default_value`. Here `d_values` is shaped `[N]` and -// `d_default_value` is a scalar. -// -// d_values[j] = grad_values[reverse_index_map[j]] -// d_default_value = sum_{k : 0 .. N_full - 1} ( -// grad_values[k] * 1{k not in reverse_index_map}) +// Computes rectified linear gradients for a Relu operation. // // Arguments: -// reverse_index_map: 1-D. The reverse index map from SparseFillEmptyRows. -// grad_values: 1-D. The gradients from backprop. +// gradients: The backpropagated gradients to the corresponding Relu operation. +// features: The features passed as input to the corresponding Relu operation, OR +// the outputs of that operation (both work equivalently). // -// Returns: -// d_values: 1-D. The backprop into values. -// d_default_value: 0-D. The backprop into default_value. -func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_values tf.Output) (d_values tf.Output, d_default_value tf.Output) { +// Returns `gradients * (features > 0)`. +func ReluGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseFillEmptyRowsGrad", + Type: "ReluGrad", Input: []tf.Input{ - reverse_index_map, grad_values, + gradients, features, }, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// MaxPool3DGradAttr is an optional argument to MaxPool3DGrad. -type MaxPool3DGradAttr func(optionalAttr) - -// MaxPool3DGradDataFormat sets the optional data_format attribute to value. -// -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func MaxPool3DGradDataFormat(value string) MaxPool3DGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes gradients of 3D max pooling function. -// -// Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: Output backprop of shape `[batch, depth, rows, cols, channels]`. -// ksize: 1-D tensor of length 5. The size of the window for each dimension of -// the input tensor. Must have `ksize[0] = ksize[4] = 1`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func MaxPool3DGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPool3DGrad", - Input: []tf.Input{ - orig_input, orig_output, grad, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceApplyRMSPropAttr is an optional argument to ResourceApplyRMSProp. -type ResourceApplyRMSPropAttr func(optionalAttr) +// ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum. +type ResourceApplyMomentumAttr func(optionalAttr) -// ResourceApplyRMSPropUseLocking sets the optional use_locking attribute to value. +// ResourceApplyMomentumUseLocking sets the optional use_locking attribute to value. // -// value: If `True`, updating of the var, ms, and mom tensors is protected +// value: If `True`, updating of the var and accum tensors will be protected // by a lock; otherwise the behavior is undefined, but may exhibit less // contention. // If not specified, defaults to false -func ResourceApplyRMSPropUseLocking(value bool) ResourceApplyRMSPropAttr { +func ResourceApplyMomentumUseLocking(value bool) ResourceApplyMomentumAttr { return func(m optionalAttr) { m["use_locking"] = value } } -// Update '*var' according to the RMSProp algorithm. +// ResourceApplyMomentumUseNesterov sets the optional use_nesterov attribute to value. // -// Note that in dense implementation of this algorithm, ms and mom will -// update even if the grad is zero, but in this sparse implementation, ms -// and mom will not update in iterations during which the grad is zero. +// value: If `True`, the tensor passed to compute grad will be +// var - lr * momentum * accum, so in the end, the var you get is actually +// var - lr * momentum * accum. +// If not specified, defaults to false +func ResourceApplyMomentumUseNesterov(value bool) ResourceApplyMomentumAttr { + return func(m optionalAttr) { + m["use_nesterov"] = value + } +} + +// Update '*var' according to the momentum scheme. // -// mean_square = decay * mean_square + (1-decay) * gradient ** 2 -// Delta = learning_rate * gradient / sqrt(mean_square + epsilon) +// Set use_nesterov = True if you want to use Nesterov momentum. // -// ms <- rho * ms_{t-1} + (1-rho) * grad * grad -// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) -// var <- var - mom +// accum = accum * momentum + grad +// var -= lr * accum // // Arguments: // var_: Should be from a Variable(). -// ms: Should be from a Variable(). -// mom: Should be from a Variable(). +// accum: Should be from a Variable(). // lr: Scaling factor. Must be a scalar. -// rho: Decay rate. Must be a scalar. -// -// epsilon: Ridge term. Must be a scalar. // grad: The gradient. +// momentum: Momentum. Must be a scalar. // // Returns the created operation. -func ResourceApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyRMSPropAttr) (o *tf.Operation) { +func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyMomentumAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -44610,239 +43662,100 @@ func ResourceApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Out a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyRMSProp", + Type: "ResourceApplyMomentum", Input: []tf.Input{ - var_, ms, mom, lr, rho, momentum, epsilon, grad, + var_, accum, lr, grad, momentum, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// Reshapes a SparseTensor to represent values in a new dense shape. +// Reverses specific dimensions of a tensor. // -// This operation has the same semantics as reshape on the represented dense -// tensor. The `input_indices` are recomputed based on the requested `new_shape`. +// Given a `tensor`, and a `bool` tensor `dims` representing the dimensions +// of `tensor`, this operation reverses each dimension i of `tensor` where +// `dims[i]` is `True`. // -// If one component of `new_shape` is the special value -1, the size of that -// dimension is computed so that the total dense size remains constant. At -// most one component of `new_shape` can be -1. The number of dense elements -// implied by `new_shape` must be the same as the number of dense elements -// originally implied by `input_shape`. +// `tensor` can have up to 8 dimensions. The number of dimensions +// of `tensor` must equal the number of elements in `dims`. In other words: // -// Reshaping does not affect the order of values in the SparseTensor. +// `rank(tensor) = size(dims)` // -// If the input tensor has rank `R_in` and `N` non-empty values, and `new_shape` -// has length `R_out`, then `input_indices` has shape `[N, R_in]`, -// `input_shape` has length `R_in`, `output_indices` has shape `[N, R_out]`, and -// `output_shape` has length `R_out`. +// For example: +// +// ``` +// # tensor 't' is [[[[ 0, 1, 2, 3], +// # [ 4, 5, 6, 7], +// # [ 8, 9, 10, 11]], +// # [[12, 13, 14, 15], +// # [16, 17, 18, 19], +// # [20, 21, 22, 23]]]] +// # tensor 't' shape is [1, 2, 3, 4] +// +// # 'dims' is [False, False, False, True] +// reverse(t, dims) ==> [[[[ 3, 2, 1, 0], +// [ 7, 6, 5, 4], +// [ 11, 10, 9, 8]], +// [[15, 14, 13, 12], +// [19, 18, 17, 16], +// [23, 22, 21, 20]]]] +// +// # 'dims' is [False, True, False, False] +// reverse(t, dims) ==> [[[[12, 13, 14, 15], +// [16, 17, 18, 19], +// [20, 21, 22, 23] +// [[ 0, 1, 2, 3], +// [ 4, 5, 6, 7], +// [ 8, 9, 10, 11]]]] +// +// # 'dims' is [False, False, True, False] +// reverse(t, dims) ==> [[[[8, 9, 10, 11], +// [4, 5, 6, 7], +// [0, 1, 2, 3]] +// [[20, 21, 22, 23], +// [16, 17, 18, 19], +// [12, 13, 14, 15]]]] +// ``` // // Arguments: -// input_indices: 2-D. `N x R_in` matrix with the indices of non-empty values in a -// SparseTensor. -// input_shape: 1-D. `R_in` vector with the input SparseTensor's dense shape. -// new_shape: 1-D. `R_out` vector with the requested new dense shape. +// tensor: Up to 8-D. +// dims: 1-D. The dimensions to reverse. // -// Returns: -// output_indices: 2-D. `N x R_out` matrix with the updated indices of non-empty -// values in the output SparseTensor. -// output_shape: 1-D. `R_out` vector with the full dense shape of the output -// SparseTensor. This is the same as `new_shape` but with any -1 dimensions -// filled in. -func SparseReshape(scope *Scope, input_indices tf.Output, input_shape tf.Output, new_shape tf.Output) (output_indices tf.Output, output_shape tf.Output) { +// Returns The same shape as `tensor`. +func Reverse(scope *Scope, tensor tf.Output, dims tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseReshape", + Type: "Reverse", Input: []tf.Input{ - input_indices, input_shape, new_shape, + tensor, dims, }, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Elementwise computes the bitwise left-shift of `x` and `y`. -// -// If `y` is negative, or greater than or equal to the width of `x` in bits the -// result is implementation defined. +// StringLowerAttr is an optional argument to StringLower. +type StringLowerAttr func(optionalAttr) + +// StringLowerEncoding sets the optional encoding attribute to value. +// If not specified, defaults to "" +func StringLowerEncoding(value string) StringLowerAttr { + return func(m optionalAttr) { + m["encoding"] = value + } +} + +// Converts all uppercase characters into their respective lowercase replacements. // // Example: // -// ```python -// import tensorflow as tf -// from tensorflow.python.ops import bitwise_ops -// import numpy as np -// dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64] +// >>> tf.strings.lower("CamelCase string and ALL CAPS") +// // -// for dtype in dtype_list: -// lhs = tf.constant([-1, -5, -3, -14], dtype=dtype) -// rhs = tf.constant([5, 0, 7, 11], dtype=dtype) -// -// left_shift_result = bitwise_ops.left_shift(lhs, rhs) -// -// print(left_shift_result) -// -// # This will print: -// # tf.Tensor([ -32 -5 -128 0], shape=(4,), dtype=int8) -// # tf.Tensor([ -32 -5 -384 -28672], shape=(4,), dtype=int16) -// # tf.Tensor([ -32 -5 -384 -28672], shape=(4,), dtype=int32) -// # tf.Tensor([ -32 -5 -384 -28672], shape=(4,), dtype=int64) -// -// lhs = np.array([-2, 64, 101, 32], dtype=np.int8) -// rhs = np.array([-1, -5, -3, -14], dtype=np.int8) -// bitwise_ops.left_shift(lhs, rhs) -// # -// ``` -// -func LeftShift(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LeftShift", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Generates a feature cross from a list of tensors, and returns it as a -// RaggedTensor. See `tf.ragged.cross` for more details. -// -// Arguments: -// ragged_values: The values tensor for each RaggedTensor input. -// ragged_row_splits: The row_splits tensor for each RaggedTensor input. -// sparse_indices: The indices tensor for each SparseTensor input. -// sparse_values: The values tensor for each SparseTensor input. -// sparse_shape: The dense_shape tensor for each SparseTensor input. -// dense_inputs: The tf.Tensor inputs. -// input_order: String specifying the tensor type for each input. The `i`th character in -// this string specifies the type of the `i`th input, and is one of: 'R' (ragged), -// 'D' (dense), or 'S' (sparse). This attr is used to ensure that the crossed -// values are combined in the order of the inputs from the call to tf.ragged.cross. -// -// -// -// -// -// -// Returns: -// output_values: The `values` for the returned `RaggedTensor`. -// output_row_splits: The `row_splits` for the returned `RaggedTensor`. -func RaggedCross(scope *Scope, ragged_values []tf.Output, ragged_row_splits []tf.Output, sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shape []tf.Output, dense_inputs []tf.Output, input_order string, hashed_output bool, num_buckets int64, hash_key int64, out_values_type tf.DataType, out_row_splits_type tf.DataType) (output_values tf.Output, output_row_splits tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"input_order": input_order, "hashed_output": hashed_output, "num_buckets": num_buckets, "hash_key": hash_key, "out_values_type": out_values_type, "out_row_splits_type": out_row_splits_type} - opspec := tf.OpSpec{ - Type: "RaggedCross", - Input: []tf.Input{ - tf.OutputList(ragged_values), tf.OutputList(ragged_row_splits), tf.OutputList(sparse_indices), tf.OutputList(sparse_values), tf.OutputList(sparse_shape), tf.OutputList(dense_inputs), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Output a fact about factorials. -func Fact(scope *Scope) (fact tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Fact", - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes softmax cross entropy cost and gradients to backpropagate. -// -// Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept -// a matrix of label probabilities, but rather a single label per row -// of features. This label is considered to have probability 1.0 for the -// given row. -// -// Inputs are the logits, not probabilities. -// -// Arguments: -// features: batch_size x num_classes matrix -// labels: batch_size vector with values in [0, num_classes). -// This is the label for the given minibatch entry. -// -// Returns: -// loss: Per example loss (batch_size vector). -// backprop: backpropagated gradients (batch_size x num_classes matrix). -func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSoftmaxCrossEntropyWithLogits", - Input: []tf.Input{ - features, labels, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Worker heartbeat op. -// -// Heartbeats may be sent periodically to indicate the coordinator is still active, -// to retrieve the current worker status and to expedite shutdown when necessary. -// -// Arguments: -// request: A string tensor containing a serialized WorkerHeartbeatRequest -// -// Returns A string tensor containing a serialized WorkerHeartbeatResponse -func WorkerHeartbeat(scope *Scope, request tf.Output) (response tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "WorkerHeartbeat", - Input: []tf.Input{ - request, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceApplyProximalGradientDescentAttr is an optional argument to ResourceApplyProximalGradientDescent. -type ResourceApplyProximalGradientDescentAttr func(optionalAttr) - -// ResourceApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value. -// -// value: If True, the subtraction will be protected by a lock; -// otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceApplyProximalGradientDescentUseLocking(value bool) ResourceApplyProximalGradientDescentAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' as FOBOS algorithm with fixed learning rate. -// -// prox_v = var - alpha * delta -// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0} -// -// Arguments: -// var_: Should be from a Variable(). -// alpha: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// delta: The change. -// -// Returns the created operation. -func ResourceApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, delta tf.Output, optional ...ResourceApplyProximalGradientDescentAttr) (o *tf.Operation) { +func StringLower(scope *Scope, input tf.Output, optional ...StringLowerAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -44851,62 +43764,9 @@ func ResourceApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyProximalGradientDescent", + Type: "StringLower", Input: []tf.Input{ - var_, alpha, l1, l2, delta, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// RandomUniformAttr is an optional argument to RandomUniform. -type RandomUniformAttr func(optionalAttr) - -// RandomUniformSeed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomUniformSeed(value int64) RandomUniformAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// RandomUniformSeed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomUniformSeed2(value int64) RandomUniformAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Outputs random values from a uniform distribution. -// -// The generated values follow a uniform distribution in the range `[0, 1)`. The -// lower bound 0 is included in the range, while the upper bound 1 is excluded. -// -// Arguments: -// shape: The shape of the output tensor. -// dtype: The type of the output. -// -// Returns A tensor of the specified shape filled with uniform random values. -func RandomUniform(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomUniformAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RandomUniform", - Input: []tf.Input{ - shape, + input, }, Attrs: attrs, } @@ -44914,60 +43774,104 @@ func RandomUniform(scope *Scope, shape tf.Output, dtype tf.DataType, optional .. return op.Output(0) } -// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug. -type RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr func(optionalAttr) - -// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve Adadelta embedding parameters with debug support. +// Wraps an arbitrary MLIR computation expressed as a module with a main() function. // -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. +// This operation does not have an associated kernel and is not intended to be +// executed in a regular TensorFlow session. Instead it is intended to be used for +// testing or for special case where a user intends to pass custom MLIR computation +// through a TensorFlow graph with the intent of having custom tooling processing +// it downstream (when targeting a different environment, like TensorFlow lite for +// example). +// The MLIR module is expected to have a main() function that will be used as an +// entry point. The inputs to the operations will be passed as argument to the +// main() function and the returned values of the main function mapped to the +// outputs. +// Example usage: // -// Returns: -// parameters: Parameter parameters updated by the Adadelta optimization algorithm. -// accumulators: Parameter accumulators updated by the Adadelta optimization algorithm. -// updates: Parameter updates updated by the Adadelta optimization algorithm. -// gradient_accumulators: Parameter gradient_accumulators updated by the Adadelta optimization algorithm. -func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, updates tf.Output, gradient_accumulators tf.Output) { +// ``` +// import tensorflow as tf +// from tensorflow.compiler.mlir.tensorflow.gen_mlir_passthrough_op import mlir_passthrough_op +// +// mlir_module = '''python +// func @main(%arg0 : tensor<10xf32>, %arg1 : tensor<10xf32>) -> tensor<10x10xf32> { +// %add = "magic.op"(%arg0, %arg1) : (tensor<10xf32>, tensor<10xf32>) -> tensor<10x10xf32> +// return %ret : tensor<10x10xf32> +// } +// ''' +// +// @tf.function +// def foo(x, y): +// return mlir_passthrough_op([x, y], mlir_module, Toutputs=[tf.float32]) +// +// graph_def = foo.get_concrete_function(tf.TensorSpec([10], tf.float32), tf.TensorSpec([10], tf.float32)).graph.as_graph_def() +// ``` +func MlirPassthroughOp(scope *Scope, inputs []tf.Output, mlir_module string, Toutputs []tf.DataType) (outputs []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"mlir_module": mlir_module, "Toutputs": Toutputs} opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug", - + Type: "MlirPassthroughOp", + Input: []tf.Input{ + tf.OutputList(inputs), + }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) + if scope.Err() != nil { + return + } + var idx int + var err error + if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { + scope.UpdateErr("MlirPassthroughOp", err) + return + } + return outputs +} + +// Converts each string in the input Tensor to its hash mod by a number of buckets. +// +// The hash function is deterministic on the content of the string within the +// process. The hash function is a keyed hash function, where attribute `key` +// defines the key of the hash function. `key` is an array of 2 elements. +// +// A strong hash is important when inputs may be malicious, e.g. URLs with +// additional components. Adversaries could try to make their inputs hash to the +// same bucket for a denial-of-service attack or to skew the results. A strong +// hash can be used to make it difficult to find inputs with a skewed hash value +// distribution over buckets. This requires that the hash function is +// seeded by a high-entropy (random) "key" unknown to the adversary. +// +// The additional robustness comes at a cost of roughly 4x higher compute +// time than `tf.string_to_hash_bucket_fast`. +// +// Examples: +// +// >>> tf.strings.to_hash_bucket_strong(["Hello", "TF"], 3, [1, 2]).numpy() +// array([2, 0]) +// +// Arguments: +// input: The strings to assign a hash bucket. +// num_buckets: The number of buckets. +// key: The key used to seed the hash function, passed as a list of two uint64 +// elements. +// +// Returns A Tensor of the same shape as the input `string_tensor`. +func StringToHashBucketStrong(scope *Scope, input tf.Output, num_buckets int64, key []int64) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_buckets": num_buckets, "key": key} + opspec := tf.OpSpec{ + Type: "StringToHashBucketStrong", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) } // Retrieves the tree ensemble resource stamp token, number of trees and growing statistics. @@ -45073,102 +43977,6 @@ func ResourceScatterNdAdd(scope *Scope, ref tf.Output, indices tf.Output, update return scope.AddOperation(opspec) } -// CropAndResizeGradImageAttr is an optional argument to CropAndResizeGradImage. -type CropAndResizeGradImageAttr func(optionalAttr) - -// CropAndResizeGradImageMethod sets the optional method attribute to value. -// -// value: A string specifying the interpolation method. Only 'bilinear' is -// supported for now. -// If not specified, defaults to "bilinear" -func CropAndResizeGradImageMethod(value string) CropAndResizeGradImageAttr { - return func(m optionalAttr) { - m["method"] = value - } -} - -// Computes the gradient of the crop_and_resize op wrt the input image tensor. -// -// Arguments: -// grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. -// boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor -// specifies the coordinates of a box in the `box_ind[i]` image and is specified -// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of -// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the -// `[0, 1]` interval of normalized image height is mapped to -// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in -// which case the sampled crop is an up-down flipped version of the original -// image. The width dimension is treated similarly. Normalized coordinates -// outside the `[0, 1]` range are allowed, in which case we use -// `extrapolation_value` to extrapolate the input image values. -// box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. -// The value of `box_ind[i]` specifies the image that the `i`-th box refers to. -// image_size: A 1-D tensor with value `[batch, image_height, image_width, depth]` -// containing the original image size. Both `image_height` and `image_width` need -// to be positive. -// -// -// Returns A 4-D tensor of shape `[batch, image_height, image_width, depth]`. -func CropAndResizeGradImage(scope *Scope, grads tf.Output, boxes tf.Output, box_ind tf.Output, image_size tf.Output, T tf.DataType, optional ...CropAndResizeGradImageAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"T": T} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CropAndResizeGradImage", - Input: []tf.Input{ - grads, boxes, box_ind, image_size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// OutfeedDequeueAttr is an optional argument to OutfeedDequeue. -type OutfeedDequeueAttr func(optionalAttr) - -// OutfeedDequeueDeviceOrdinal sets the optional device_ordinal attribute to value. -// -// value: The TPU device to use. This should be -1 when the Op -// is running on a TPU device, and >= 0 when the Op is running on the CPU -// device. -// If not specified, defaults to -1 -func OutfeedDequeueDeviceOrdinal(value int64) OutfeedDequeueAttr { - return func(m optionalAttr) { - m["device_ordinal"] = value - } -} - -// Retrieves a single tensor from the computation outfeed. -// -// This operation will block indefinitely until data is available. -// -// Arguments: -// dtype: The type of elements in the tensor. -// shape: The shape of the tensor. -// -// Returns A tensor that will be read from the device outfeed. -func OutfeedDequeue(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...OutfeedDequeueAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype, "shape": shape} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "OutfeedDequeue", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // An Op to sum inputs across replicated TPU instances. // // Each instance supplies its own input. @@ -45199,94 +44007,6 @@ func CrossReplicaSum(scope *Scope, input tf.Output, group_assignment tf.Output) return op.Output(0) } -// EnqueueTPUEmbeddingRaggedTensorBatchAttr is an optional argument to EnqueueTPUEmbeddingRaggedTensorBatch. -type EnqueueTPUEmbeddingRaggedTensorBatchAttr func(optionalAttr) - -// EnqueueTPUEmbeddingRaggedTensorBatchDeviceOrdinal sets the optional device_ordinal attribute to value. -// -// value: The TPU device to use. Should be >= 0 and less than the number -// of TPU cores in the task on which the node is placed. -// If not specified, defaults to -1 -func EnqueueTPUEmbeddingRaggedTensorBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingRaggedTensorBatchAttr { - return func(m optionalAttr) { - m["device_ordinal"] = value - } -} - -// EnqueueTPUEmbeddingRaggedTensorBatchCombiners sets the optional combiners attribute to value. -// -// value: A list of string scalars, one for each embedding table that specify -// how to normalize the embedding activations after weighted summation. -// Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have -// the sum of the weights be 0 for 'mean' or the sum of the squared weights be -// 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for -// all tables. -// If not specified, defaults to <> -func EnqueueTPUEmbeddingRaggedTensorBatchCombiners(value []string) EnqueueTPUEmbeddingRaggedTensorBatchAttr { - return func(m optionalAttr) { - m["combiners"] = value - } -} - -// EnqueueTPUEmbeddingRaggedTensorBatchMaxSequenceLengths sets the optional max_sequence_lengths attribute to value. -// If not specified, defaults to <> -func EnqueueTPUEmbeddingRaggedTensorBatchMaxSequenceLengths(value []int64) EnqueueTPUEmbeddingRaggedTensorBatchAttr { - return func(m optionalAttr) { - m["max_sequence_lengths"] = value - } -} - -// Eases the porting of code that uses tf.nn.embedding_lookup(). -// -// sample_splits[i], embedding_indices[i] and aggregation_weights[i] correspond -// to the ith feature. table_ids[i] indicates which embedding table to look up ith -// feature. -// -// The tensors at corresponding positions in two of the input lists, -// embedding_indices and aggregation_weights, must have the same shape, i.e. rank 1 -// with dim_size() equal to the total number of lookups into the table described by -// the corresponding feature. -// -// Arguments: -// sample_splits: A list of rank 1 Tensors specifying the break points for splitting -// embedding_indices and aggregation_weights into rows. -// It corresponds to ids.row_splits in embedding_lookup(), when ids is a -// RaggedTensor. -// embedding_indices: A list of rank 1 Tensors, indices into the embedding tables. -// It corresponds to ids.values in embedding_lookup(), when ids is a RaggedTensor. -// aggregation_weights: A list of rank 1 Tensors containing per training example -// aggregation weights. It corresponds to the values field of a RaggedTensor -// with the same row_splits as ids in embedding_lookup(), when ids is a -// RaggedTensor. -// mode_override: A string input that overrides the mode specified in the -// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference', -// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set -// in TPUEmbeddingConfiguration is used, otherwise mode_override is used. -// table_ids: A list of integers specifying the identifier of the embedding table -// (offset of TableDescriptor in the TPUEmbeddingConfiguration) to lookup the -// corresponding input. The ith input is looked up using table_ids[i]. The size -// of the table_ids list must be equal to that of sample_indices, -// embedding_indices and aggregation_weights. -// -// Returns the created operation. -func EnqueueTPUEmbeddingRaggedTensorBatch(scope *Scope, sample_splits []tf.Output, embedding_indices []tf.Output, aggregation_weights []tf.Output, mode_override tf.Output, table_ids []int64, optional ...EnqueueTPUEmbeddingRaggedTensorBatchAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"table_ids": table_ids} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "EnqueueTPUEmbeddingRaggedTensorBatch", - Input: []tf.Input{ - tf.OutputList(sample_splits), tf.OutputList(embedding_indices), tf.OutputList(aggregation_weights), mode_override, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - // FakeQuantWithMinMaxVarsAttr is an optional argument to FakeQuantWithMinMaxVars. type FakeQuantWithMinMaxVarsAttr func(optionalAttr) @@ -45432,6 +44152,212 @@ func LoadTPUEmbeddingADAMParameters(scope *Scope, parameters tf.Output, momenta return scope.AddOperation(opspec) } +// Transforms a vector of brain.Example protos (as strings) into typed tensors. +// +// Arguments: +// serialized: A vector containing a batch of binary serialized Example protos. +// names: A vector containing the names of the serialized protos. +// May contain, for example, table key (descriptive) names for the +// corresponding serialized protos. These are purely useful for debugging +// purposes, and the presence of values here has no effect on the output. +// May also be an empty vector if no names are available. +// If non-empty, this vector must be the same length as "serialized". +// sparse_keys: A list of Nsparse string Tensors (scalars). +// The keys expected in the Examples' features associated with sparse values. +// dense_keys: A list of Ndense string Tensors (scalars). +// The keys expected in the Examples' features associated with dense values. +// dense_defaults: A list of Ndense Tensors (some may be empty). +// dense_defaults[j] provides default values +// when the example's feature_map lacks dense_key[j]. If an empty Tensor is +// provided for dense_defaults[j], then the Feature dense_keys[j] is required. +// The input type is inferred from dense_defaults[j], even when it's empty. +// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined, +// then the shape of dense_defaults[j] must match that of dense_shapes[j]. +// If dense_shapes[j] has an undefined major dimension (variable strides dense +// feature), dense_defaults[j] must contain a single element: +// the padding element. +// sparse_types: A list of Nsparse types; the data types of data in each Feature +// given in sparse_keys. +// Currently the ParseExample supports DT_FLOAT (FloatList), +// DT_INT64 (Int64List), and DT_STRING (BytesList). +// dense_shapes: A list of Ndense shapes; the shapes of data in each Feature +// given in dense_keys. +// The number of elements in the Feature corresponding to dense_key[j] +// must always equal dense_shapes[j].NumEntries(). +// If dense_shapes[j] == (D0, D1, ..., DN) then the shape of output +// Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN): +// The dense outputs are just the inputs row-stacked by batch. +// This works for dense_shapes[j] = (-1, D1, ..., DN). In this case +// the shape of the output Tensor dense_values[j] will be +// (|serialized|, M, D1, .., DN), where M is the maximum number of blocks +// of elements of length D1 * .... * DN, across all minibatch entries +// in the input. Any minibatch entry with less than M blocks of elements of +// length D1 * ... * DN will be padded with the corresponding default_value +// scalar element along the second dimension. +func ParseExample(scope *Scope, serialized tf.Output, names tf.Output, sparse_keys []tf.Output, dense_keys []tf.Output, dense_defaults []tf.Output, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"sparse_types": sparse_types, "dense_shapes": dense_shapes} + opspec := tf.OpSpec{ + Type: "ParseExample", + Input: []tf.Input{ + serialized, names, tf.OutputList(sparse_keys), tf.OutputList(dense_keys), tf.OutputList(dense_defaults), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil { + scope.UpdateErr("ParseExample", err) + return + } + if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil { + scope.UpdateErr("ParseExample", err) + return + } + if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil { + scope.UpdateErr("ParseExample", err) + return + } + if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil { + scope.UpdateErr("ParseExample", err) + return + } + return sparse_indices, sparse_values, sparse_shapes, dense_values +} + +// MapPeekAttr is an optional argument to MapPeek. +type MapPeekAttr func(optionalAttr) + +// MapPeekCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func MapPeekCapacity(value int64) MapPeekAttr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// MapPeekMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func MapPeekMemoryLimit(value int64) MapPeekAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// MapPeekContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func MapPeekContainer(value string) MapPeekAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// MapPeekSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func MapPeekSharedName(value string) MapPeekAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op peeks at the values at the specified key. If the +// +// underlying container does not contain this key +// this op will block until it does. +func MapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapPeekAttr) (values []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtypes": dtypes} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MapPeek", + Input: []tf.Input{ + key, indices, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if values, idx, err = makeOutputList(op, idx, "values"); err != nil { + scope.UpdateErr("MapPeek", err) + return + } + return values +} + +// RetrieveTPUEmbeddingCenteredRMSPropParametersAttr is an optional argument to RetrieveTPUEmbeddingCenteredRMSPropParameters. +type RetrieveTPUEmbeddingCenteredRMSPropParametersAttr func(optionalAttr) + +// RetrieveTPUEmbeddingCenteredRMSPropParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 +func RetrieveTPUEmbeddingCenteredRMSPropParametersTableId(value int64) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr { + return func(m optionalAttr) { + m["table_id"] = value + } +} + +// RetrieveTPUEmbeddingCenteredRMSPropParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingCenteredRMSPropParametersTableName(value string) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// RetrieveTPUEmbeddingCenteredRMSPropParametersConfig sets the optional config attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingCenteredRMSPropParametersConfig(value string) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr { + return func(m optionalAttr) { + m["config"] = value + } +} + +// Retrieve centered RMSProp embedding parameters. +// +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. +// +// Returns: +// parameters: Parameter parameters updated by the centered RMSProp optimization algorithm. +// ms: Parameter ms updated by the centered RMSProp optimization algorithm. +// mom: Parameter mom updated by the centered RMSProp optimization algorithm. +// mg: Parameter mg updated by the centered RMSProp optimization algorithm. +func RetrieveTPUEmbeddingCenteredRMSPropParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingCenteredRMSPropParametersAttr) (parameters tf.Output, ms tf.Output, mom tf.Output, mg tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RetrieveTPUEmbeddingCenteredRMSPropParameters", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3) +} + // Records the latency of producing `input_dataset` elements in a StatsAggregator. func LatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { @@ -46438,6 +45364,88 @@ func EncodeBase64(scope *Scope, input tf.Output, optional ...EncodeBase64Attr) ( return op.Output(0) } +// ResourceApplyAdagradV2Attr is an optional argument to ResourceApplyAdagradV2. +type ResourceApplyAdagradV2Attr func(optionalAttr) + +// ResourceApplyAdagradV2UseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyAdagradV2UseLocking(value bool) ResourceApplyAdagradV2Attr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// ResourceApplyAdagradV2UpdateSlots sets the optional update_slots attribute to value. +// If not specified, defaults to true +func ResourceApplyAdagradV2UpdateSlots(value bool) ResourceApplyAdagradV2Attr { + return func(m optionalAttr) { + m["update_slots"] = value + } +} + +// Update '*var' according to the adagrad scheme. +// +// accum += grad * grad +// var -= lr * grad * (1 / (sqrt(accum) + epsilon)) +// +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// epsilon: Constant factor. Must be a scalar. +// grad: The gradient. +// +// Returns the created operation. +func ResourceApplyAdagradV2(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdagradV2Attr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceApplyAdagradV2", + Input: []tf.Input{ + var_, accum, lr, epsilon, grad, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// Op that loads and executes a TPU program on a TPU device. +// +// For the internal use of the distributed TPU compiler. +func TPUExecute(scope *Scope, args []tf.Output, key tf.Output, Tresults []tf.DataType) (results []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"Tresults": Tresults} + opspec := tf.OpSpec{ + Type: "TPUExecute", + Input: []tf.Input{ + tf.OutputList(args), key, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if results, idx, err = makeOutputList(op, idx, "results"); err != nil { + scope.UpdateErr("TPUExecute", err) + return + } + return results +} + // Creates a dataset that batches input elements into a SparseTensor. // // Arguments: @@ -46527,140 +45535,140 @@ func LoadTPUEmbeddingAdadeltaParametersGradAccumDebug(scope *Scope, parameters t return scope.AddOperation(opspec) } -// Returns x / y element-wise. +// DepthwiseConv2dNativeAttr is an optional argument to DepthwiseConv2dNative. +type DepthwiseConv2dNativeAttr func(optionalAttr) + +// DepthwiseConv2dNativeExplicitPaddings sets the optional explicit_paddings attribute to value. +// If not specified, defaults to <> +func DepthwiseConv2dNativeExplicitPaddings(value []int64) DepthwiseConv2dNativeAttr { + return func(m optionalAttr) { + m["explicit_paddings"] = value + } +} + +// DepthwiseConv2dNativeDataFormat sets the optional data_format attribute to value. // -// *NOTE*: `Div` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Div(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, height, width, channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, channels, height, width]. +// If not specified, defaults to "NHWC" +func DepthwiseConv2dNativeDataFormat(value string) DepthwiseConv2dNativeAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// DepthwiseConv2dNativeDilations sets the optional dilations attribute to value. +// +// value: 1-D tensor of length 4. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each filter +// element on that dimension. The dimension order is determined by the value of +// `data_format`, see above for details. Dilations in the batch and depth +// dimensions must be 1. +// If not specified, defaults to +func DepthwiseConv2dNativeDilations(value []int64) DepthwiseConv2dNativeAttr { + return func(m optionalAttr) { + m["dilations"] = value + } +} + +// Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors. +// +// Given an input tensor of shape `[batch, in_height, in_width, in_channels]` +// and a filter / kernel tensor of shape +// `[filter_height, filter_width, in_channels, channel_multiplier]`, containing +// `in_channels` convolutional filters of depth 1, `depthwise_conv2d` applies +// a different filter to each input channel (expanding from 1 channel to +// `channel_multiplier` channels for each), then concatenates the results +// together. Thus, the output has `in_channels * channel_multiplier` channels. +// +// ``` +// for k in 0..in_channels-1 +// for q in 0..channel_multiplier-1 +// output[b, i, j, k * channel_multiplier + q] = +// sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] * +// filter[di, dj, k, q] +// ``` +// +// Must have `strides[0] = strides[3] = 1`. For the most common case of the same +// horizontal and vertices strides, `strides = [1, stride, stride, 1]`. +// +// Arguments: +// +// +// strides: 1-D of length 4. The stride of the sliding window for each dimension +// of `input`. +// padding: The type of padding algorithm to use. +func DepthwiseConv2dNative(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Div", + Type: "DepthwiseConv2dNative", Input: []tf.Input{ - x, y, + input, filter, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Enqueue a Tensor on the computation outfeed. +// Creates an all-zeros CSRSparseMatrix with shape `dense_shape`. // // Arguments: -// input: A tensor that will be inserted into the outfeed queue. +// dense_shape: The desired matrix shape. // -// Returns the created operation. -func OutfeedEnqueue(scope *Scope, input tf.Output) (o *tf.Operation) { +// +// Returns An empty CSR matrix with shape `dense_shape`. +func SparseMatrixZeros(scope *Scope, dense_shape tf.Output, type_ tf.DataType) (sparse_matrix tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"type": type_} opspec := tf.OpSpec{ - Type: "OutfeedEnqueue", + Type: "SparseMatrixZeros", Input: []tf.Input{ - input, + dense_shape, }, + Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// DecodeJpegAttr is an optional argument to DecodeJpeg. -type DecodeJpegAttr func(optionalAttr) +// EqualAttr is an optional argument to Equal. +type EqualAttr func(optionalAttr) -// DecodeJpegChannels sets the optional channels attribute to value. -// -// value: Number of color channels for the decoded image. -// If not specified, defaults to 0 -func DecodeJpegChannels(value int64) DecodeJpegAttr { - return func(m optionalAttr) { - m["channels"] = value - } -} - -// DecodeJpegRatio sets the optional ratio attribute to value. -// -// value: Downscaling ratio. -// If not specified, defaults to 1 -func DecodeJpegRatio(value int64) DecodeJpegAttr { - return func(m optionalAttr) { - m["ratio"] = value - } -} - -// DecodeJpegFancyUpscaling sets the optional fancy_upscaling attribute to value. -// -// value: If true use a slower but nicer upscaling of the -// chroma planes (yuv420/422 only). +// EqualIncompatibleShapeError sets the optional incompatible_shape_error attribute to value. // If not specified, defaults to true -func DecodeJpegFancyUpscaling(value bool) DecodeJpegAttr { +func EqualIncompatibleShapeError(value bool) EqualAttr { return func(m optionalAttr) { - m["fancy_upscaling"] = value + m["incompatible_shape_error"] = value } } -// DecodeJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value. +// Returns the truth value of (x == y) element-wise. // -// value: If true try to recover an image from truncated input. -// If not specified, defaults to false -func DecodeJpegTryRecoverTruncated(value bool) DecodeJpegAttr { - return func(m optionalAttr) { - m["try_recover_truncated"] = value - } -} - -// DecodeJpegAcceptableFraction sets the optional acceptable_fraction attribute to value. +// *NOTE*: `Equal` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) // -// value: The minimum required fraction of lines before a truncated -// input is accepted. -// If not specified, defaults to 1 -func DecodeJpegAcceptableFraction(value float32) DecodeJpegAttr { - return func(m optionalAttr) { - m["acceptable_fraction"] = value - } -} - -// DecodeJpegDctMethod sets the optional dct_method attribute to value. +// ```python +// x = tf.constant([2, 4]) +// y = tf.constant(2) +// tf.math.equal(x, y) ==> array([True, False]) // -// value: string specifying a hint about the algorithm used for -// decompression. Defaults to "" which maps to a system-specific -// default. Currently valid values are ["INTEGER_FAST", -// "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal -// jpeg library changes to a version that does not have that specific -// option.) -// If not specified, defaults to "" -func DecodeJpegDctMethod(value string) DecodeJpegAttr { - return func(m optionalAttr) { - m["dct_method"] = value - } -} - -// Decode a JPEG-encoded image to a uint8 tensor. -// -// The attr `channels` indicates the desired number of color channels for the -// decoded image. -// -// Accepted values are: -// -// * 0: Use the number of channels in the JPEG-encoded image. -// * 1: output a grayscale image. -// * 3: output an RGB image. -// -// If needed, the JPEG-encoded image is transformed to match the requested number -// of color channels. -// -// The attr `ratio` allows downscaling the image by an integer factor during -// decoding. Allowed values are: 1, 2, 4, and 8. This is much faster than -// downscaling the image later. -// -// -// This op also supports decoding PNGs and non-animated GIFs since the interface is -// the same, though it is cleaner to use `tf.io.decode_image`. -// -// Arguments: -// contents: 0-D. The JPEG-encoded image. -// -// Returns 3-D with shape `[height, width, channels]`.. -func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (image tf.Output) { +// x = tf.constant([2, 4]) +// y = tf.constant([2, 4]) +// tf.math.equal(x, y) ==> array([True, True]) +// ``` +func Equal(scope *Scope, x tf.Output, y tf.Output, optional ...EqualAttr) (z tf.Output) { if scope.Err() != nil { return } @@ -46669,9 +45677,9 @@ func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (i a(attrs) } opspec := tf.OpSpec{ - Type: "DecodeJpeg", + Type: "Equal", Input: []tf.Input{ - contents, + x, y, }, Attrs: attrs, } @@ -46679,208 +45687,83 @@ func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (i return op.Output(0) } -// Returns the number of nonzeroes of `sparse_matrix`. +// SparseToSparseSetOperationAttr is an optional argument to SparseToSparseSetOperation. +type SparseToSparseSetOperationAttr func(optionalAttr) + +// SparseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value. +// If not specified, defaults to true +func SparseToSparseSetOperationValidateIndices(value bool) SparseToSparseSetOperationAttr { + return func(m optionalAttr) { + m["validate_indices"] = value + } +} + +// Applies set operation along last dimension of 2 `SparseTensor` inputs. +// +// See SetOperationOp::SetOperationFromContext for values of `set_operation`. +// +// If `validate_indices` is `True`, `SparseToSparseSetOperation` validates the +// order and range of `set1` and `set2` indices. +// +// Input `set1` is a `SparseTensor` represented by `set1_indices`, `set1_values`, +// and `set1_shape`. For `set1` ranked `n`, 1st `n-1` dimensions must be the same +// as `set2`. Dimension `n` contains values in a set, duplicates are allowed but +// ignored. +// +// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`, +// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same +// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but +// ignored. +// +// If `validate_indices` is `True`, this op validates the order and range of `set1` +// and `set2` indices. +// +// Output `result` is a `SparseTensor` represented by `result_indices`, +// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this +// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth` +// dimension contains the result of `set_operation` applied to the corresponding +// `[0...n-1]` dimension of `set`. // // Arguments: -// sparse_matrix: A CSRSparseMatrix. +// set1_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major +// order. +// set1_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major +// order. +// set1_shape: 1D `Tensor`, shape of a `SparseTensor`. `set1_shape[0...n-1]` must +// be the same as `set2_shape[0...n-1]`, `set1_shape[n]` is the +// max set size across `0...n-1` dimensions. +// set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major +// order. +// set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major +// order. +// set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must +// be the same as `set1_shape[0...n-1]`, `set2_shape[n]` is the +// max set size across `0...n-1` dimensions. // -// Returns The number of nonzeroes of `sparse_matrix`. -func SparseMatrixNNZ(scope *Scope, sparse_matrix tf.Output) (nnz tf.Output) { +// +// Returns: +// result_indices: 2D indices of a `SparseTensor`. +// result_values: 1D values of a `SparseTensor`. +// result_shape: 1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is +// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]` +// is the max result set size across all `0...n-1` dimensions. +func SparseToSparseSetOperation(scope *Scope, set1_indices tf.Output, set1_values tf.Output, set1_shape tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...SparseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) { if scope.Err() != nil { return } - opspec := tf.OpSpec{ - Type: "SparseMatrixNNZ", - Input: []tf.Input{ - sparse_matrix, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug. -type LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr func(optionalAttr) - -// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load proximal Adagrad embedding parameters with debug support. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the proximal Adagrad optimization algorithm. -// accumulators: Value of accumulators used in the proximal Adagrad optimization algorithm. -// gradient_accumulators: Value of gradient_accumulators used in the proximal Adagrad optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + attrs := map[string]interface{}{"set_operation": set_operation} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug", + Type: "SparseToSparseSetOperation", Input: []tf.Input{ - parameters, accumulators, gradient_accumulators, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Serializes the tree ensemble to a proto. -// -// Arguments: -// tree_ensemble_handle: Handle to the tree ensemble. -// -// Returns: -// stamp_token: Stamp token of the tree ensemble resource. -// tree_ensemble_serialized: Serialized proto of the ensemble. -func BoostedTreesSerializeEnsemble(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, tree_ensemble_serialized tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BoostedTreesSerializeEnsemble", - Input: []tf.Input{ - tree_ensemble_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Computes inverse hyperbolic cosine of x element-wise. -// -// Given an input tensor, the function computes inverse hyperbolic cosine of every element. -// Input range is `[1, inf]`. It returns `nan` if the input lies outside the range. -// -// ```python -// x = tf.constant([-2, -0.5, 1, 1.2, 200, 10000, float("inf")]) -// tf.math.acosh(x) ==> [nan nan 0. 0.62236255 5.9914584 9.903487 inf] -// ``` -func Acosh(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Acosh", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Outputs deterministic pseudorandom random numbers from a gamma distribution. -// -// Outputs random values from a gamma distribution. -// -// The outputs are a deterministic function of `shape`, `seed`, and `alpha`. -// -// Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). -// alpha: The concentration of the gamma distribution. Shape must match the rightmost -// dimensions of `shape`. -// -// Returns Random values with specified shape. -func StatelessRandomGammaV2(scope *Scope, shape tf.Output, seed tf.Output, alpha tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "StatelessRandomGammaV2", - Input: []tf.Input{ - shape, seed, alpha, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that executes a SQL query and emits rows of the result set. -// -// Arguments: -// driver_name: The database type. Currently, the only supported type is 'sqlite'. -// data_source_name: A connection string to connect to the database. -// query: A SQL query to execute. -// -// -func SqlDataset(scope *Scope, driver_name tf.Output, data_source_name tf.Output, query tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "SqlDataset", - Input: []tf.Input{ - driver_name, data_source_name, query, + set1_indices, set1_values, set1_shape, set2_indices, set2_values, set2_shape, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Outputs deterministic pseudorandom random integers from a uniform distribution. -// -// The generated values follow a uniform distribution in the range `[minval, maxval)`. -// -// The outputs are a deterministic function of `shape`, `seed`, `minval`, and `maxval`. -// -// Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). -// minval: Minimum value (inclusive, scalar). -// maxval: Maximum value (exclusive, scalar). -// -// Returns Random values with specified shape. -func StatelessRandomUniformInt(scope *Scope, shape tf.Output, seed tf.Output, minval tf.Output, maxval tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "StatelessRandomUniformInt", - Input: []tf.Input{ - shape, seed, minval, maxval, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } // Returns a batched diagonal tensor with a given batched diagonal values. @@ -47171,32 +46054,95 @@ func RetrieveTPUEmbeddingMomentumParametersGradAccumDebug(scope *Scope, num_shar return op.Output(0), op.Output(1), op.Output(2) } -// EqualAttr is an optional argument to Equal. -type EqualAttr func(optionalAttr) +// MaxPoolGradGradV2Attr is an optional argument to MaxPoolGradGradV2. +type MaxPoolGradGradV2Attr func(optionalAttr) -// EqualIncompatibleShapeError sets the optional incompatible_shape_error attribute to value. -// If not specified, defaults to true -func EqualIncompatibleShapeError(value bool) EqualAttr { +// MaxPoolGradGradV2DataFormat sets the optional data_format attribute to value. +// +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func MaxPoolGradGradV2DataFormat(value string) MaxPoolGradGradV2Attr { return func(m optionalAttr) { - m["incompatible_shape_error"] = value + m["data_format"] = value } } -// Returns the truth value of (x == y) element-wise. +// Computes second-order gradients of the maxpooling function. // -// *NOTE*: `Equal` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +// Arguments: +// orig_input: The original input tensor. +// orig_output: The original output tensor. +// grad: 4-D. Gradients of gradients w.r.t. the input of `max_pool`. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. // -// ```python -// x = tf.constant([2, 4]) -// y = tf.constant(2) -// tf.math.equal(x, y) ==> array([True, False]) +// Returns Gradients of gradients w.r.t. the input to `max_pool`. +func MaxPoolGradGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradGradV2Attr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MaxPoolGradGradV2", + Input: []tf.Input{ + orig_input, orig_output, grad, ksize, strides, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ResourceSparseApplyRMSPropAttr is an optional argument to ResourceSparseApplyRMSProp. +type ResourceSparseApplyRMSPropAttr func(optionalAttr) + +// ResourceSparseApplyRMSPropUseLocking sets the optional use_locking attribute to value. // -// x = tf.constant([2, 4]) -// y = tf.constant([2, 4]) -// tf.math.equal(x, y) ==> array([True, True]) -// ``` -func Equal(scope *Scope, x tf.Output, y tf.Output, optional ...EqualAttr) (z tf.Output) { +// value: If `True`, updating of the var, ms, and mom tensors is protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceSparseApplyRMSPropUseLocking(value bool) ResourceSparseApplyRMSPropAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' according to the RMSProp algorithm. +// +// Note that in dense implementation of this algorithm, ms and mom will +// update even if the grad is zero, but in this sparse implementation, ms +// and mom will not update in iterations during which the grad is zero. +// +// mean_square = decay * mean_square + (1-decay) * gradient ** 2 +// Delta = learning_rate * gradient / sqrt(mean_square + epsilon) +// +// ms <- rho * ms_{t-1} + (1-rho) * grad * grad +// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) +// var <- var - mom +// +// Arguments: +// var_: Should be from a Variable(). +// ms: Should be from a Variable(). +// mom: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// rho: Decay rate. Must be a scalar. +// +// epsilon: Ridge term. Must be a scalar. +// grad: The gradient. +// indices: A vector of indices into the first dimension of var, ms and mom. +// +// Returns the created operation. +func ResourceSparseApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyRMSPropAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -47205,192 +46151,53 @@ func Equal(scope *Scope, x tf.Output, y tf.Output, optional ...EqualAttr) (z tf. a(attrs) } opspec := tf.OpSpec{ - Type: "Equal", + Type: "ResourceSparseApplyRMSProp", Input: []tf.Input{ - x, y, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SparseToSparseSetOperationAttr is an optional argument to SparseToSparseSetOperation. -type SparseToSparseSetOperationAttr func(optionalAttr) - -// SparseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func SparseToSparseSetOperationValidateIndices(value bool) SparseToSparseSetOperationAttr { - return func(m optionalAttr) { - m["validate_indices"] = value - } -} - -// Applies set operation along last dimension of 2 `SparseTensor` inputs. -// -// See SetOperationOp::SetOperationFromContext for values of `set_operation`. -// -// If `validate_indices` is `True`, `SparseToSparseSetOperation` validates the -// order and range of `set1` and `set2` indices. -// -// Input `set1` is a `SparseTensor` represented by `set1_indices`, `set1_values`, -// and `set1_shape`. For `set1` ranked `n`, 1st `n-1` dimensions must be the same -// as `set2`. Dimension `n` contains values in a set, duplicates are allowed but -// ignored. -// -// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`, -// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same -// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but -// ignored. -// -// If `validate_indices` is `True`, this op validates the order and range of `set1` -// and `set2` indices. -// -// Output `result` is a `SparseTensor` represented by `result_indices`, -// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this -// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth` -// dimension contains the result of `set_operation` applied to the corresponding -// `[0...n-1]` dimension of `set`. -// -// Arguments: -// set1_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major -// order. -// set1_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major -// order. -// set1_shape: 1D `Tensor`, shape of a `SparseTensor`. `set1_shape[0...n-1]` must -// be the same as `set2_shape[0...n-1]`, `set1_shape[n]` is the -// max set size across `0...n-1` dimensions. -// set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major -// order. -// set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major -// order. -// set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must -// be the same as `set1_shape[0...n-1]`, `set2_shape[n]` is the -// max set size across `0...n-1` dimensions. -// -// -// Returns: -// result_indices: 2D indices of a `SparseTensor`. -// result_values: 1D values of a `SparseTensor`. -// result_shape: 1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is -// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]` -// is the max result set size across all `0...n-1` dimensions. -func SparseToSparseSetOperation(scope *Scope, set1_indices tf.Output, set1_values tf.Output, set1_shape tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...SparseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"set_operation": set_operation} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SparseToSparseSetOperation", - Input: []tf.Input{ - set1_indices, set1_values, set1_shape, set2_indices, set2_values, set2_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// InfeedEnqueueTupleAttr is an optional argument to InfeedEnqueueTuple. -type InfeedEnqueueTupleAttr func(optionalAttr) - -// InfeedEnqueueTupleLayouts sets the optional layouts attribute to value. -// -// value: A vector holding the requested layout in minor-to-major sequence for -// all the tuple shapes, in the order the shapes appear in the "shapes" input. -// The layout elements for a sub-shape can be set to -1, in which case the -// corresponding layout will be computed by the infeed operation. -// If not specified, defaults to <> -func InfeedEnqueueTupleLayouts(value []int64) InfeedEnqueueTupleAttr { - return func(m optionalAttr) { - m["layouts"] = value - } -} - -// InfeedEnqueueTupleDeviceOrdinal sets the optional device_ordinal attribute to value. -// -// value: The TPU device to use. This should be -1 when the Op -// is running on a TPU device, and >= 0 when the Op is running on the CPU -// device. -// If not specified, defaults to -1 -func InfeedEnqueueTupleDeviceOrdinal(value int64) InfeedEnqueueTupleAttr { - return func(m optionalAttr) { - m["device_ordinal"] = value - } -} - -// Feeds multiple Tensor values into the computation as an XLA tuple. -// -// Arguments: -// inputs: A list of tensors that will be provided using the infeed mechanism. -// shapes: The shapes of each tensor in `inputs`. -// -// Returns the created operation. -func InfeedEnqueueTuple(scope *Scope, inputs []tf.Output, shapes []tf.Shape, optional ...InfeedEnqueueTupleAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"shapes": shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "InfeedEnqueueTuple", - Input: []tf.Input{ - tf.OutputList(inputs), + var_, ms, mom, lr, rho, momentum, epsilon, grad, indices, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// LoadTPUEmbeddingRMSPropParametersAttr is an optional argument to LoadTPUEmbeddingRMSPropParameters. -type LoadTPUEmbeddingRMSPropParametersAttr func(optionalAttr) +// RetrieveTPUEmbeddingMomentumParametersAttr is an optional argument to RetrieveTPUEmbeddingMomentumParameters. +type RetrieveTPUEmbeddingMomentumParametersAttr func(optionalAttr) -// LoadTPUEmbeddingRMSPropParametersTableId sets the optional table_id attribute to value. +// RetrieveTPUEmbeddingMomentumParametersTableId sets the optional table_id attribute to value. // If not specified, defaults to -1 -func LoadTPUEmbeddingRMSPropParametersTableId(value int64) LoadTPUEmbeddingRMSPropParametersAttr { +func RetrieveTPUEmbeddingMomentumParametersTableId(value int64) RetrieveTPUEmbeddingMomentumParametersAttr { return func(m optionalAttr) { m["table_id"] = value } } -// LoadTPUEmbeddingRMSPropParametersTableName sets the optional table_name attribute to value. +// RetrieveTPUEmbeddingMomentumParametersTableName sets the optional table_name attribute to value. // If not specified, defaults to "" -func LoadTPUEmbeddingRMSPropParametersTableName(value string) LoadTPUEmbeddingRMSPropParametersAttr { +func RetrieveTPUEmbeddingMomentumParametersTableName(value string) RetrieveTPUEmbeddingMomentumParametersAttr { return func(m optionalAttr) { m["table_name"] = value } } -// LoadTPUEmbeddingRMSPropParametersConfig sets the optional config attribute to value. +// RetrieveTPUEmbeddingMomentumParametersConfig sets the optional config attribute to value. // If not specified, defaults to "" -func LoadTPUEmbeddingRMSPropParametersConfig(value string) LoadTPUEmbeddingRMSPropParametersAttr { +func RetrieveTPUEmbeddingMomentumParametersConfig(value string) RetrieveTPUEmbeddingMomentumParametersAttr { return func(m optionalAttr) { m["config"] = value } } -// Load RMSProp embedding parameters. +// Retrieve Momentum embedding parameters. // -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. // -// Arguments: -// parameters: Value of parameters used in the RMSProp optimization algorithm. -// ms: Value of ms used in the RMSProp optimization algorithm. -// mom: Value of mom used in the RMSProp optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingRMSPropParameters(scope *Scope, parameters tf.Output, ms tf.Output, mom tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingRMSPropParametersAttr) (o *tf.Operation) { +// Returns: +// parameters: Parameter parameters updated by the Momentum optimization algorithm. +// momenta: Parameter momenta updated by the Momentum optimization algorithm. +func RetrieveTPUEmbeddingMomentumParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingMomentumParametersAttr) (parameters tf.Output, momenta tf.Output) { if scope.Err() != nil { return } @@ -47399,39 +46206,144 @@ func LoadTPUEmbeddingRMSPropParameters(scope *Scope, parameters tf.Output, ms tf a(attrs) } opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingRMSPropParameters", + Type: "RetrieveTPUEmbeddingMomentumParameters", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingMomentumParametersGradAccumDebug. +type LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr func(optionalAttr) + +// LoadTPUEmbeddingMomentumParametersGradAccumDebugTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 +func LoadTPUEmbeddingMomentumParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["table_id"] = value + } +} + +// LoadTPUEmbeddingMomentumParametersGradAccumDebugTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingMomentumParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// LoadTPUEmbeddingMomentumParametersGradAccumDebugConfig sets the optional config attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingMomentumParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["config"] = value + } +} + +// Load Momentum embedding parameters with debug support. +// +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. +// +// Arguments: +// parameters: Value of parameters used in the Momentum optimization algorithm. +// momenta: Value of momenta used in the Momentum optimization algorithm. +// gradient_accumulators: Value of gradient_accumulators used in the Momentum optimization algorithm. +// +// +// +// Returns the created operation. +func LoadTPUEmbeddingMomentumParametersGradAccumDebug(scope *Scope, parameters tf.Output, momenta tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "LoadTPUEmbeddingMomentumParametersGradAccumDebug", Input: []tf.Input{ - parameters, ms, mom, + parameters, momenta, gradient_accumulators, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// StatefulUniformFullIntAttr is an optional argument to StatefulUniformFullInt. -type StatefulUniformFullIntAttr func(optionalAttr) +// DecodeCompressedAttr is an optional argument to DecodeCompressed. +type DecodeCompressedAttr func(optionalAttr) -// StatefulUniformFullIntDtype sets the optional dtype attribute to value. +// DecodeCompressedCompressionType sets the optional compression_type attribute to value. +// +// value: A scalar containing either (i) the empty string (no +// compression), (ii) "ZLIB", or (iii) "GZIP". +// If not specified, defaults to "" +func DecodeCompressedCompressionType(value string) DecodeCompressedAttr { + return func(m optionalAttr) { + m["compression_type"] = value + } +} + +// Decompress strings. +// +// This op decompresses each element of the `bytes` input `Tensor`, which +// is assumed to be compressed using the given `compression_type`. +// +// The `output` is a string `Tensor` of the same shape as `bytes`, +// each element containing the decompressed data from the corresponding +// element in `bytes`. +// +// Arguments: +// bytes: A Tensor of string which is compressed. +// +// Returns A Tensor with the same shape as input `bytes`, uncompressed +// from bytes. +func DecodeCompressed(scope *Scope, bytes tf.Output, optional ...DecodeCompressedAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "DecodeCompressed", + Input: []tf.Input{ + bytes, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// NonDeterministicIntsAttr is an optional argument to NonDeterministicInts. +type NonDeterministicIntsAttr func(optionalAttr) + +// NonDeterministicIntsDtype sets the optional dtype attribute to value. // // value: The type of the output. -// If not specified, defaults to DT_UINT64 -func StatefulUniformFullIntDtype(value tf.DataType) StatefulUniformFullIntAttr { +// If not specified, defaults to DT_INT64 +func NonDeterministicIntsDtype(value tf.DataType) NonDeterministicIntsAttr { return func(m optionalAttr) { m["dtype"] = value } } -// Outputs random integers from a uniform distribution. +// Non-deterministically generates some integers. // -// The generated values are uniform integers covering the whole range of `dtype`. +// This op may use some OS-provided source of non-determinism (e.g. an RNG), so each execution will give different results. // // Arguments: -// resource: The handle of the resource variable that stores the state of the RNG. -// algorithm: The RNG algorithm. // shape: The shape of the output tensor. // -// Returns Random values with specified shape. -func StatefulUniformFullInt(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, optional ...StatefulUniformFullIntAttr) (output tf.Output) { +// Returns Non-deterministic integer values with specified shape. +func NonDeterministicInts(scope *Scope, shape tf.Output, optional ...NonDeterministicIntsAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -47440,9 +46352,9 @@ func StatefulUniformFullInt(scope *Scope, resource tf.Output, algorithm tf.Outpu a(attrs) } opspec := tf.OpSpec{ - Type: "StatefulUniformFullInt", + Type: "NonDeterministicInts", Input: []tf.Input{ - resource, algorithm, shape, + shape, }, Attrs: attrs, } @@ -47450,92 +46362,87 @@ func StatefulUniformFullInt(scope *Scope, resource tf.Output, algorithm tf.Outpu return op.Output(0) } -// LoadTPUEmbeddingStochasticGradientDescentParametersAttr is an optional argument to LoadTPUEmbeddingStochasticGradientDescentParameters. -type LoadTPUEmbeddingStochasticGradientDescentParametersAttr func(optionalAttr) +// MultinomialAttr is an optional argument to Multinomial. +type MultinomialAttr func(optionalAttr) -// LoadTPUEmbeddingStochasticGradientDescentParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingStochasticGradientDescentParametersTableId(value int64) LoadTPUEmbeddingStochasticGradientDescentParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingStochasticGradientDescentParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingStochasticGradientDescentParametersTableName(value string) LoadTPUEmbeddingStochasticGradientDescentParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingStochasticGradientDescentParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingStochasticGradientDescentParametersConfig(value string) LoadTPUEmbeddingStochasticGradientDescentParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load SGD embedding parameters. +// MultinomialSeed sets the optional seed attribute to value. // -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. +// value: If either seed or seed2 is set to be non-zero, the internal random number +// generator is seeded by the given seed. Otherwise, a random seed is used. +// If not specified, defaults to 0 +func MultinomialSeed(value int64) MultinomialAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// MultinomialSeed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func MultinomialSeed2(value int64) MultinomialAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// MultinomialOutputDtype sets the optional output_dtype attribute to value. +// If not specified, defaults to DT_INT64 +func MultinomialOutputDtype(value tf.DataType) MultinomialAttr { + return func(m optionalAttr) { + m["output_dtype"] = value + } +} + +// Draws samples from a multinomial distribution. // // Arguments: -// parameters: Value of parameters used in the stochastic gradient descent optimization algorithm. +// logits: 2-D Tensor with shape `[batch_size, num_classes]`. Each slice `[i, :]` +// represents the unnormalized log probabilities for all classes. +// num_samples: 0-D. Number of independent samples to draw for each row slice. // -// -// -// Returns the created operation. -func LoadTPUEmbeddingStochasticGradientDescentParameters(scope *Scope, parameters tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingStochasticGradientDescentParametersAttr) (o *tf.Operation) { +// Returns 2-D Tensor with shape `[batch_size, num_samples]`. Each slice `[i, :]` +// contains the drawn class labels with range `[0, num_classes)`. +func Multinomial(scope *Scope, logits tf.Output, num_samples tf.Output, optional ...MultinomialAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingStochasticGradientDescentParameters", + Type: "Multinomial", Input: []tf.Input{ - parameters, + logits, num_samples, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// RequantizePerChannelAttr is an optional argument to RequantizePerChannel. -type RequantizePerChannelAttr func(optionalAttr) +// SerializeSparseAttr is an optional argument to SerializeSparse. +type SerializeSparseAttr func(optionalAttr) -// RequantizePerChannelOutType sets the optional out_type attribute to value. +// SerializeSparseOutType sets the optional out_type attribute to value. // -// value: The quantized type of output tensor that needs to be converted. -// If not specified, defaults to DT_QUINT8 -func RequantizePerChannelOutType(value tf.DataType) RequantizePerChannelAttr { +// value: The `dtype` to use for serialization; the supported types are `string` +// (default) and `variant`. +// If not specified, defaults to DT_STRING +func SerializeSparseOutType(value tf.DataType) SerializeSparseAttr { return func(m optionalAttr) { m["out_type"] = value } } -// Requantizes input with min and max values known per channel. +// Serialize a `SparseTensor` into a `[3]` `Tensor` object. // // Arguments: -// input: The original input tensor. -// input_min: The minimum value of the input tensor -// input_max: The maximum value of the input tensor. -// requested_output_min: The minimum value of the output tensor requested. -// requested_output_max: The maximum value of the output tensor requested. -// -// Returns: -// output: Output tensor. -// output_min: The minimum value of the final output tensor -// output_max: The maximum value of the final output tensor. -func RequantizePerChannel(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, optional ...RequantizePerChannelAttr) (output tf.Output, output_min tf.Output, output_max tf.Output) { +// sparse_indices: 2-D. The `indices` of the `SparseTensor`. +// sparse_values: 1-D. The `values` of the `SparseTensor`. +// sparse_shape: 1-D. The `shape` of the `SparseTensor`. +func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeSparseAttr) (serialized_sparse tf.Output) { if scope.Err() != nil { return } @@ -47544,29 +46451,91 @@ func RequantizePerChannel(scope *Scope, input tf.Output, input_min tf.Output, in a(attrs) } opspec := tf.OpSpec{ - Type: "RequantizePerChannel", + Type: "SerializeSparse", Input: []tf.Input{ - input, input_min, input_max, requested_output_min, requested_output_max, + sparse_indices, sparse_values, sparse_shape, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// LeakyReluAttr is an optional argument to LeakyRelu. -type LeakyReluAttr func(optionalAttr) +// Extracts the average gradient in the given ConditionalAccumulator. +// +// The op blocks until sufficient (i.e., more than num_required) +// gradients have been accumulated. If the accumulator has already +// aggregated more than num_required gradients, it returns the average of +// the accumulated gradients. Also automatically increments the recorded +// global_step in the accumulator by 1, and resets the aggregate to 0. +// +// Arguments: +// handle: The handle to an accumulator. +// num_required: Number of gradients required before we return an aggregate. +// dtype: The data type of accumulated gradients. Needs to correspond to the type +// of the accumulator. +// +// Returns The average of the accumulated gradients. +func ResourceAccumulatorTakeGradient(scope *Scope, handle tf.Output, num_required tf.Output, dtype tf.DataType) (average tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype} + opspec := tf.OpSpec{ + Type: "ResourceAccumulatorTakeGradient", + Input: []tf.Input{ + handle, num_required, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} -// LeakyReluAlpha sets the optional alpha attribute to value. -// If not specified, defaults to 0.2 -func LeakyReluAlpha(value float32) LeakyReluAttr { +// InfeedEnqueueAttr is an optional argument to InfeedEnqueue. +type InfeedEnqueueAttr func(optionalAttr) + +// InfeedEnqueueShape sets the optional shape attribute to value. +// +// value: The shape of the tensor. +// If not specified, defaults to <> +func InfeedEnqueueShape(value tf.Shape) InfeedEnqueueAttr { return func(m optionalAttr) { - m["alpha"] = value + m["shape"] = value } } -// Computes rectified linear: `max(features, features * alpha)`. -func LeakyRelu(scope *Scope, features tf.Output, optional ...LeakyReluAttr) (activations tf.Output) { +// InfeedEnqueueLayout sets the optional layout attribute to value. +// +// value: A vector holding the requested layout in minor-to-major sequence. +// If a layout attribute is passed, but its values are all -1, the layout will +// be computed by the infeed operation. +// If not specified, defaults to <> +func InfeedEnqueueLayout(value []int64) InfeedEnqueueAttr { + return func(m optionalAttr) { + m["layout"] = value + } +} + +// InfeedEnqueueDeviceOrdinal sets the optional device_ordinal attribute to value. +// +// value: The TPU device to use. This should be -1 when the Op +// is running on a TPU device, and >= 0 when the Op is running on the CPU +// device. +// If not specified, defaults to -1 +func InfeedEnqueueDeviceOrdinal(value int64) InfeedEnqueueAttr { + return func(m optionalAttr) { + m["device_ordinal"] = value + } +} + +// An op which feeds a single Tensor value into the computation. +// +// Arguments: +// input: A tensor that will be provided using the infeed mechanism. +// +// Returns the created operation. +func InfeedEnqueue(scope *Scope, input tf.Output, optional ...InfeedEnqueueAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -47575,41 +46544,13 @@ func LeakyRelu(scope *Scope, features tf.Output, optional ...LeakyReluAttr) (act a(attrs) } opspec := tf.OpSpec{ - Type: "LeakyRelu", + Type: "InfeedEnqueue", Input: []tf.Input{ - features, + input, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Component-wise divides a SparseTensor by a dense Tensor. -// -// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not -// the other direction. -// -// Arguments: -// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. -// sp_shape: 1-D. Shape of the input SparseTensor. -// dense: `R`-D. The dense Tensor operand. -// -// Returns 1-D. The `N` values that are operated on. -func SparseDenseCwiseDiv(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseDenseCwiseDiv", - Input: []tf.Input{ - sp_indices, sp_values, sp_shape, dense, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } // EnqueueTPUEmbeddingIntegerBatchAttr is an optional argument to EnqueueTPUEmbeddingIntegerBatch. @@ -47655,211 +46596,33 @@ func EnqueueTPUEmbeddingIntegerBatch(scope *Scope, batch []tf.Output, mode_overr return scope.AddOperation(opspec) } -// MapClearAttr is an optional argument to MapClear. -type MapClearAttr func(optionalAttr) - -// MapClearCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 +// Component-wise divides a SparseTensor by a dense Tensor. // -// REQUIRES: value >= 0 -func MapClearCapacity(value int64) MapClearAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// MapClearMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapClearMemoryLimit(value int64) MapClearAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// MapClearContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func MapClearContainer(value string) MapClearAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MapClearSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func MapClearSharedName(value string) MapClearAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op removes all elements in the underlying container. -// -// Returns the created operation. -func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MapClear", - - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Deserialize `SparseTensor` objects. -// -// The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where -// the last dimension stores serialized `SparseTensor` objects and the other N -// dimensions (N >= 0) correspond to a batch. The ranks of the original -// `SparseTensor` objects must all match. When the final `SparseTensor` is -// created, its rank is the rank of the incoming `SparseTensor` objects plus N; -// the sparse tensors have been concatenated along new dimensions, one for each -// batch. -// -// The output `SparseTensor` object's shape values for the original dimensions -// are the max across the input `SparseTensor` objects' shape values for the -// corresponding dimensions. The new dimensions match the size of the batch. -// -// The input `SparseTensor` objects' indices are assumed ordered in -// standard lexicographic order. If this is not the case, after this -// step run `SparseReorder` to restore index ordering. -// -// For example, if the serialized input is a `[2 x 3]` matrix representing two -// original `SparseTensor` objects: -// -// index = [ 0] -// [10] -// [20] -// values = [1, 2, 3] -// shape = [50] -// -// and -// -// index = [ 2] -// [10] -// values = [4, 5] -// shape = [30] -// -// then the final deserialized `SparseTensor` will be: -// -// index = [0 0] -// [0 10] -// [0 20] -// [1 2] -// [1 10] -// values = [1, 2, 3, 4, 5] -// shape = [2 50] +// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not +// the other direction. // // Arguments: -// serialized_sparse: The serialized `SparseTensor` objects. The last dimension -// must have 3 columns. -// dtype: The `dtype` of the serialized `SparseTensor` objects. -func DeserializeSparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - opspec := tf.OpSpec{ - Type: "DeserializeSparse", - Input: []tf.Input{ - serialized_sparse, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Decode web-safe base64-encoded strings. +// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, possibly not in canonical ordering. +// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. +// sp_shape: 1-D. Shape of the input SparseTensor. +// dense: `R`-D. The dense Tensor operand. // -// Input may or may not have padding at the end. See EncodeBase64 for padding. -// Web-safe means that input must use - and _ instead of + and /. -// -// Arguments: -// input: Base64 strings to decode. -// -// Returns Decoded strings. -func DecodeBase64(scope *Scope, input tf.Output) (output tf.Output) { +// Returns 1-D. The `N` values that are operated on. +func SparseDenseCwiseDiv(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "DecodeBase64", + Type: "SparseDenseCwiseDiv", Input: []tf.Input{ - input, + sp_indices, sp_values, sp_shape, dense, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// LoadTPUEmbeddingAdagradParametersAttr is an optional argument to LoadTPUEmbeddingAdagradParameters. -type LoadTPUEmbeddingAdagradParametersAttr func(optionalAttr) - -// LoadTPUEmbeddingAdagradParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingAdagradParametersTableId(value int64) LoadTPUEmbeddingAdagradParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingAdagradParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingAdagradParametersTableName(value string) LoadTPUEmbeddingAdagradParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingAdagradParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingAdagradParametersConfig(value string) LoadTPUEmbeddingAdagradParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load Adagrad embedding parameters. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the Adagrad optimization algorithm. -// accumulators: Value of accumulators used in the Adagrad optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingAdagradParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdagradParametersAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingAdagradParameters", - Input: []tf.Input{ - parameters, accumulators, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - // Returns the gradient of `Tile`. // // DEPRECATED at GraphDef version 3: TileGrad has been replaced with reduce_sum @@ -47998,432 +46761,155 @@ func LoadTPUEmbeddingFTRLParameters(scope *Scope, parameters tf.Output, accumula return scope.AddOperation(opspec) } -// DepthwiseConv2dNativeAttr is an optional argument to DepthwiseConv2dNative. -type DepthwiseConv2dNativeAttr func(optionalAttr) +// UnicodeDecodeWithOffsetsAttr is an optional argument to UnicodeDecodeWithOffsets. +type UnicodeDecodeWithOffsetsAttr func(optionalAttr) -// DepthwiseConv2dNativeExplicitPaddings sets the optional explicit_paddings attribute to value. -// If not specified, defaults to <> -func DepthwiseConv2dNativeExplicitPaddings(value []int64) DepthwiseConv2dNativeAttr { +// UnicodeDecodeWithOffsetsErrors sets the optional errors attribute to value. +// +// value: Error handling policy when there is invalid formatting found in the input. +// The value of 'strict' will cause the operation to produce a InvalidArgument +// error on any invalid input formatting. A value of 'replace' (the default) will +// cause the operation to replace any invalid formatting in the input with the +// `replacement_char` codepoint. A value of 'ignore' will cause the operation to +// skip any invalid formatting in the input and produce no corresponding output +// character. +// If not specified, defaults to "replace" +func UnicodeDecodeWithOffsetsErrors(value string) UnicodeDecodeWithOffsetsAttr { return func(m optionalAttr) { - m["explicit_paddings"] = value + m["errors"] = value } } -// DepthwiseConv2dNativeDataFormat sets the optional data_format attribute to value. +// UnicodeDecodeWithOffsetsReplacementChar sets the optional replacement_char attribute to value. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, height, width, channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, channels, height, width]. -// If not specified, defaults to "NHWC" -func DepthwiseConv2dNativeDataFormat(value string) DepthwiseConv2dNativeAttr { +// value: The replacement character codepoint to be used in place of any invalid +// formatting in the input when `errors='replace'`. Any valid unicode codepoint may +// be used. The default value is the default unicode replacement character is +// 0xFFFD or U+65533.) +// If not specified, defaults to 65533 +func UnicodeDecodeWithOffsetsReplacementChar(value int64) UnicodeDecodeWithOffsetsAttr { return func(m optionalAttr) { - m["data_format"] = value + m["replacement_char"] = value } } -// DepthwiseConv2dNativeDilations sets the optional dilations attribute to value. +// UnicodeDecodeWithOffsetsReplaceControlCharacters sets the optional replace_control_characters attribute to value. // -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each filter -// element on that dimension. The dimension order is determined by the value of -// `data_format`, see above for details. Dilations in the batch and depth -// dimensions must be 1. -// If not specified, defaults to -func DepthwiseConv2dNativeDilations(value []int64) DepthwiseConv2dNativeAttr { +// value: Whether to replace the C0 control characters (00-1F) with the +// `replacement_char`. Default is false. +// If not specified, defaults to false +func UnicodeDecodeWithOffsetsReplaceControlCharacters(value bool) UnicodeDecodeWithOffsetsAttr { return func(m optionalAttr) { - m["dilations"] = value + m["replace_control_characters"] = value } } -// Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors. +// UnicodeDecodeWithOffsetsTsplits sets the optional Tsplits attribute to value. +// If not specified, defaults to DT_INT64 +func UnicodeDecodeWithOffsetsTsplits(value tf.DataType) UnicodeDecodeWithOffsetsAttr { + return func(m optionalAttr) { + m["Tsplits"] = value + } +} + +// Decodes each string in `input` into a sequence of Unicode code points. // -// Given an input tensor of shape `[batch, in_height, in_width, in_channels]` -// and a filter / kernel tensor of shape -// `[filter_height, filter_width, in_channels, channel_multiplier]`, containing -// `in_channels` convolutional filters of depth 1, `depthwise_conv2d` applies -// a different filter to each input channel (expanding from 1 channel to -// `channel_multiplier` channels for each), then concatenates the results -// together. Thus, the output has `in_channels * channel_multiplier` channels. +// The character codepoints for all strings are returned using a single vector +// `char_values`, with strings expanded to characters in row-major order. +// Similarly, the character start byte offsets are returned using a single vector +// `char_to_byte_starts`, with strings expanded in row-major order. // -// ``` -// for k in 0..in_channels-1 -// for q in 0..channel_multiplier-1 -// output[b, i, j, k * channel_multiplier + q] = -// sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] * -// filter[di, dj, k, q] -// ``` +// The `row_splits` tensor indicates where the codepoints and start offsets for +// each input string begin and end within the `char_values` and +// `char_to_byte_starts` tensors. In particular, the values for the `i`th +// string (in row-major order) are stored in the slice +// `[row_splits[i]:row_splits[i+1]]`. Thus: // -// Must have `strides[0] = strides[3] = 1`. For the most common case of the same -// horizontal and vertices strides, `strides = [1, stride, stride, 1]`. +// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th +// character in the `i`th string (in row-major order). +// * `char_to_bytes_starts[row_splits[i]+j]` is the start byte offset for the `j`th +// character in the `i`th string (in row-major order). +// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th +// string (in row-major order). // // Arguments: +// input: The text to be decoded. Can have any shape. Note that the output is flattened +// to a vector of char values. +// input_encoding: Text encoding of the input strings. This is any of the encodings supported +// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. // -// -// strides: 1-D of length 4. The stride of the sliding window for each dimension -// of `input`. -// padding: The type of padding algorithm to use. -func DepthwiseConv2dNative(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeAttr) (output tf.Output) { +// Returns: +// row_splits: A 1D int32 tensor containing the row splits. +// char_values: A 1D int32 Tensor containing the decoded codepoints. +// char_to_byte_starts: A 1D int32 Tensor containing the byte index in the input string where each +// character in `char_values` starts. +func UnicodeDecodeWithOffsets(scope *Scope, input tf.Output, input_encoding string, optional ...UnicodeDecodeWithOffsetsAttr) (row_splits tf.Output, char_values tf.Output, char_to_byte_starts tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} + attrs := map[string]interface{}{"input_encoding": input_encoding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "DepthwiseConv2dNative", - Input: []tf.Input{ - input, filter, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates an all-zeros CSRSparseMatrix with shape `dense_shape`. -// -// Arguments: -// dense_shape: The desired matrix shape. -// -// -// Returns An empty CSR matrix with shape `dense_shape`. -func SparseMatrixZeros(scope *Scope, dense_shape tf.Output, type_ tf.DataType) (sparse_matrix tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"type": type_} - opspec := tf.OpSpec{ - Type: "SparseMatrixZeros", - Input: []tf.Input{ - dense_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// PaddingFIFOQueueV2Attr is an optional argument to PaddingFIFOQueueV2. -type PaddingFIFOQueueV2Attr func(optionalAttr) - -// PaddingFIFOQueueV2Shapes sets the optional shapes attribute to value. -// -// value: The shape of each component in a value. The length of this attr must -// be either 0 or the same as the length of component_types. -// Shapes of fixed rank but variable size are allowed by setting -// any shape dimension to -1. In this case, the inputs' shape may vary along -// the given dimension, and DequeueMany will pad the given dimension with -// zeros up to the maximum shape of all elements in the given batch. -// If the length of this attr is 0, different queue elements may have -// different ranks and shapes, but only one element may be dequeued at a time. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func PaddingFIFOQueueV2Shapes(value []tf.Shape) PaddingFIFOQueueV2Attr { - return func(m optionalAttr) { - m["shapes"] = value - } -} - -// PaddingFIFOQueueV2Capacity sets the optional capacity attribute to value. -// -// value: The upper bound on the number of elements in this queue. -// Negative numbers mean no limit. -// If not specified, defaults to -1 -func PaddingFIFOQueueV2Capacity(value int64) PaddingFIFOQueueV2Attr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// PaddingFIFOQueueV2Container sets the optional container attribute to value. -// -// value: If non-empty, this queue is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func PaddingFIFOQueueV2Container(value string) PaddingFIFOQueueV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// PaddingFIFOQueueV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this queue will be shared under the given name -// across multiple sessions. -// If not specified, defaults to "" -func PaddingFIFOQueueV2SharedName(value string) PaddingFIFOQueueV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// A queue that produces elements in first-in first-out order. -// -// Variable-size shapes are allowed by setting the corresponding shape dimensions -// to 0 in the shape attr. In this case DequeueMany will pad up to the maximum -// size of any given element in the minibatch. See below for details. -// -// Arguments: -// component_types: The type of each component in a value. -// -// Returns The handle to the queue. -func PaddingFIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...PaddingFIFOQueueV2Attr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"component_types": component_types} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "PaddingFIFOQueueV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingMomentumParametersGradAccumDebug. -type LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr func(optionalAttr) - -// LoadTPUEmbeddingMomentumParametersGradAccumDebugTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingMomentumParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingMomentumParametersGradAccumDebugTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingMomentumParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingMomentumParametersGradAccumDebugConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingMomentumParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load Momentum embedding parameters with debug support. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the Momentum optimization algorithm. -// momenta: Value of momenta used in the Momentum optimization algorithm. -// gradient_accumulators: Value of gradient_accumulators used in the Momentum optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingMomentumParametersGradAccumDebug(scope *Scope, parameters tf.Output, momenta tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingMomentumParametersGradAccumDebug", - Input: []tf.Input{ - parameters, momenta, gradient_accumulators, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Constructs a tensor by tiling a given tensor. -// -// This operation creates a new tensor by replicating `input` `multiples` times. -// The output tensor's i'th dimension has `input.dims(i) * multiples[i]` elements, -// and the values of `input` are replicated `multiples[i]` times along the 'i'th -// dimension. For example, tiling `[a b c d]` by `[2]` produces -// `[a b c d a b c d]`. -// -// >>> a = tf.constant([[1,2,3],[4,5,6]], tf.int32) -// >>> b = tf.constant([1,2], tf.int32) -// >>> tf.tile(a, b) -// -// >>> c = tf.constant([2,1], tf.int32) -// >>> tf.tile(a, c) -// -// >>> d = tf.constant([2,2], tf.int32) -// >>> tf.tile(a, d) -// -// -// Arguments: -// input: 1-D or higher. -// multiples: 1-D. Length must be the same as the number of dimensions in `input` -func Tile(scope *Scope, input tf.Output, multiples tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Tile", - Input: []tf.Input{ - input, multiples, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SerializeSparseAttr is an optional argument to SerializeSparse. -type SerializeSparseAttr func(optionalAttr) - -// SerializeSparseOutType sets the optional out_type attribute to value. -// -// value: The `dtype` to use for serialization; the supported types are `string` -// (default) and `variant`. -// If not specified, defaults to DT_STRING -func SerializeSparseOutType(value tf.DataType) SerializeSparseAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Serialize a `SparseTensor` into a `[3]` `Tensor` object. -// -// Arguments: -// sparse_indices: 2-D. The `indices` of the `SparseTensor`. -// sparse_values: 1-D. The `values` of the `SparseTensor`. -// sparse_shape: 1-D. The `shape` of the `SparseTensor`. -func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeSparseAttr) (serialized_sparse tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SerializeSparse", - Input: []tf.Input{ - sparse_indices, sparse_values, sparse_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Extracts the average gradient in the given ConditionalAccumulator. -// -// The op blocks until sufficient (i.e., more than num_required) -// gradients have been accumulated. If the accumulator has already -// aggregated more than num_required gradients, it returns the average of -// the accumulated gradients. Also automatically increments the recorded -// global_step in the accumulator by 1, and resets the aggregate to 0. -// -// Arguments: -// handle: The handle to an accumulator. -// num_required: Number of gradients required before we return an aggregate. -// dtype: The data type of accumulated gradients. Needs to correspond to the type -// of the accumulator. -// -// Returns The average of the accumulated gradients. -func ResourceAccumulatorTakeGradient(scope *Scope, handle tf.Output, num_required tf.Output, dtype tf.DataType) (average tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - opspec := tf.OpSpec{ - Type: "ResourceAccumulatorTakeGradient", - Input: []tf.Input{ - handle, num_required, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// InfeedEnqueueAttr is an optional argument to InfeedEnqueue. -type InfeedEnqueueAttr func(optionalAttr) - -// InfeedEnqueueShape sets the optional shape attribute to value. -// -// value: The shape of the tensor. -// If not specified, defaults to <> -func InfeedEnqueueShape(value tf.Shape) InfeedEnqueueAttr { - return func(m optionalAttr) { - m["shape"] = value - } -} - -// InfeedEnqueueLayout sets the optional layout attribute to value. -// -// value: A vector holding the requested layout in minor-to-major sequence. -// If a layout attribute is passed, but its values are all -1, the layout will -// be computed by the infeed operation. -// If not specified, defaults to <> -func InfeedEnqueueLayout(value []int64) InfeedEnqueueAttr { - return func(m optionalAttr) { - m["layout"] = value - } -} - -// InfeedEnqueueDeviceOrdinal sets the optional device_ordinal attribute to value. -// -// value: The TPU device to use. This should be -1 when the Op -// is running on a TPU device, and >= 0 when the Op is running on the CPU -// device. -// If not specified, defaults to -1 -func InfeedEnqueueDeviceOrdinal(value int64) InfeedEnqueueAttr { - return func(m optionalAttr) { - m["device_ordinal"] = value - } -} - -// An op which feeds a single Tensor value into the computation. -// -// Arguments: -// input: A tensor that will be provided using the infeed mechanism. -// -// Returns the created operation. -func InfeedEnqueue(scope *Scope, input tf.Output, optional ...InfeedEnqueueAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "InfeedEnqueue", + Type: "UnicodeDecodeWithOffsets", Input: []tf.Input{ input, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// TPUPartitionedOutputAttr is an optional argument to TPUPartitionedOutput. +type TPUPartitionedOutputAttr func(optionalAttr) + +// TPUPartitionedOutputPartitionDim sets the optional partition_dim attribute to value. +// +// value: An integer describles which dimension is partitioned. +// If not specified, defaults to 0 +func TPUPartitionedOutputPartitionDim(value int64) TPUPartitionedOutputAttr { + return func(m optionalAttr) { + m["partition_dim"] = value + } +} + +// An op that demultiplexes a tensor to be sharded by XLA to a list of partitioned +// +// outputs outside the XLA computation. +// +// Arguments: +// inputs: A tensor which represents the full shape of partitioned tensors. +// +// +// Returns A list of partitioned inputs which must have the same shape. +func TPUPartitionedOutput(scope *Scope, inputs tf.Output, num_splits int64, optional ...TPUPartitionedOutputAttr) (output []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_splits": num_splits} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "TPUPartitionedOutput", + Input: []tf.Input{ + inputs, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if output, idx, err = makeOutputList(op, idx, "output"); err != nil { + scope.UpdateErr("TPUPartitionedOutput", err) + return + } + return output } // Computes the mean along segments of a tensor. @@ -48612,346 +47098,6 @@ func ResourceSparseApplyKerasMomentum(scope *Scope, var_ tf.Output, accum tf.Out return scope.AddOperation(opspec) } -// MaxPoolGradGradV2Attr is an optional argument to MaxPoolGradGradV2. -type MaxPoolGradGradV2Attr func(optionalAttr) - -// MaxPoolGradGradV2DataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolGradGradV2DataFormat(value string) MaxPoolGradGradV2Attr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes second-order gradients of the maxpooling function. -// -// Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: 4-D. Gradients of gradients w.r.t. the input of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns Gradients of gradients w.r.t. the input to `max_pool`. -func MaxPoolGradGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradGradV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPoolGradGradV2", - Input: []tf.Input{ - orig_input, orig_output, grad, ksize, strides, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RetrieveTPUEmbeddingMomentumParametersAttr is an optional argument to RetrieveTPUEmbeddingMomentumParameters. -type RetrieveTPUEmbeddingMomentumParametersAttr func(optionalAttr) - -// RetrieveTPUEmbeddingMomentumParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingMomentumParametersTableId(value int64) RetrieveTPUEmbeddingMomentumParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingMomentumParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingMomentumParametersTableName(value string) RetrieveTPUEmbeddingMomentumParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingMomentumParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingMomentumParametersConfig(value string) RetrieveTPUEmbeddingMomentumParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve Momentum embedding parameters. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns: -// parameters: Parameter parameters updated by the Momentum optimization algorithm. -// momenta: Parameter momenta updated by the Momentum optimization algorithm. -func RetrieveTPUEmbeddingMomentumParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingMomentumParametersAttr) (parameters tf.Output, momenta tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingMomentumParameters", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// ConfigureDistributedTPUAttr is an optional argument to ConfigureDistributedTPU. -type ConfigureDistributedTPUAttr func(optionalAttr) - -// ConfigureDistributedTPUEmbeddingConfig sets the optional embedding_config attribute to value. -// -// value: Reserved. Do not use. -// If not specified, defaults to "" -func ConfigureDistributedTPUEmbeddingConfig(value string) ConfigureDistributedTPUAttr { - return func(m optionalAttr) { - m["embedding_config"] = value - } -} - -// ConfigureDistributedTPUTpuEmbeddingConfig sets the optional tpu_embedding_config attribute to value. -// -// value: Serialized tensorflow.tpu.TPUEmbeddingConfiguration that -// describes the embedding lookups of the program. -// If not specified, defaults to "" -func ConfigureDistributedTPUTpuEmbeddingConfig(value string) ConfigureDistributedTPUAttr { - return func(m optionalAttr) { - m["tpu_embedding_config"] = value - } -} - -// ConfigureDistributedTPUIsGlobalInit sets the optional is_global_init attribute to value. -// -// value: Reserved. Do not use. -// If not specified, defaults to false -func ConfigureDistributedTPUIsGlobalInit(value bool) ConfigureDistributedTPUAttr { - return func(m optionalAttr) { - m["is_global_init"] = value - } -} - -// ConfigureDistributedTPUEnableWholeMeshCompilations sets the optional enable_whole_mesh_compilations attribute to value. -// If not specified, defaults to false -func ConfigureDistributedTPUEnableWholeMeshCompilations(value bool) ConfigureDistributedTPUAttr { - return func(m optionalAttr) { - m["enable_whole_mesh_compilations"] = value - } -} - -// ConfigureDistributedTPUCompilationFailureClosesChips sets the optional compilation_failure_closes_chips attribute to value. -// If not specified, defaults to true -func ConfigureDistributedTPUCompilationFailureClosesChips(value bool) ConfigureDistributedTPUAttr { - return func(m optionalAttr) { - m["compilation_failure_closes_chips"] = value - } -} - -// Sets up the centralized structures for a distributed TPU system. -// -// Returns A serialized tensorflow.tpu.TopologyProto that describes the TPU -// topology. -func ConfigureDistributedTPU(scope *Scope, optional ...ConfigureDistributedTPUAttr) (topology tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ConfigureDistributedTPU", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Combines (nests of) input elements into a dataset of (nests of) windows. -// -// A "window" is a finite dataset of flat elements of size `size` (or possibly -// fewer if there are not enough input elements to fill the window and -// `drop_remainder` evaluates to false). -// -// The `shift` argument determines the number of input elements by which -// the window moves on each iteration. The first element in the `k`th window -// will be element -// -// ``` -// 1 + (k-1) * shift -// ``` -// -// of the input dataset. In particular, the first element of the first window -// will always be the first element of the input dataset. -// -// If the `stride` parameter is greater than 1, then each window will skip -// `(stride - 1)` input elements between each element that appears in the -// window. Output windows will still contain `size` elements regardless of -// the value of `stride`. -// -// The `stride` argument determines the stride of the input elements, and the -// `shift` argument determines the shift of the window. -// -// For example, letting `{...}` to represent a Dataset: -// -// - `tf.data.Dataset.range(7).window(2)` produces -// `{{0, 1}, {2, 3}, {4, 5}, {6}}` -// - `tf.data.Dataset.range(7).window(3, 2, 1, True)` produces -// `{{0, 1, 2}, {2, 3, 4}, {4, 5, 6}}` -// - `tf.data.Dataset.range(7).window(3, 1, 2, True)` produces -// `{{0, 2, 4}, {1, 3, 5}, {2, 4, 6}}` -// -// Note that when the `window` transformation is applied to a dataset of -// nested elements, it produces a dataset of nested windows. -// -// For example: -// -// - `tf.data.Dataset.from_tensor_slices((range(4), range(4))).window(2)` -// produces `{({0, 1}, {0, 1}), ({2, 3}, {2, 3})}` -// - `tf.data.Dataset.from_tensor_slices({"a": range(4)}).window(2)` -// produces `{{"a": {0, 1}}, {"a": {2, 3}}}` -// -// Arguments: -// -// size: An integer scalar, representing the number of elements -// of the input dataset to combine into a window. Must be positive. -// shift: An integer scalar, representing the number of input elements -// by which the window moves in each iteration. Defaults to `size`. -// Must be positive. -// stride: An integer scalar, representing the stride of the input elements -// in the sliding window. Must be positive. The default value of 1 means -// "retain every input element". -// drop_remainder: A Boolean scalar, representing whether the last window should be -// dropped if its size is smaller than `window_size`. -// -// -func WindowDataset(scope *Scope, input_dataset tf.Output, size tf.Output, shift tf.Output, stride tf.Output, drop_remainder tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "WindowDataset", - Input: []tf.Input{ - input_dataset, size, shift, stride, drop_remainder, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SetSizeAttr is an optional argument to SetSize. -type SetSizeAttr func(optionalAttr) - -// SetSizeValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func SetSizeValidateIndices(value bool) SetSizeAttr { - return func(m optionalAttr) { - m["validate_indices"] = value - } -} - -// Number of unique elements along last dimension of input `set`. -// -// Input `set` is a `SparseTensor` represented by `set_indices`, `set_values`, -// and `set_shape`. The last dimension contains values in a set, duplicates are -// allowed but ignored. -// -// If `validate_indices` is `True`, this op validates the order and range of `set` -// indices. -// -// Arguments: -// set_indices: 2D `Tensor`, indices of a `SparseTensor`. -// set_values: 1D `Tensor`, values of a `SparseTensor`. -// set_shape: 1D `Tensor`, shape of a `SparseTensor`. -// -// Returns For `set` ranked `n`, this is a `Tensor` with rank `n-1`, and the same 1st -// `n-1` dimensions as `set`. Each value is the number of unique elements in -// the corresponding `[0...n-1]` dimension of `set`. -func SetSize(scope *Scope, set_indices tf.Output, set_values tf.Output, set_shape tf.Output, optional ...SetSizeAttr) (size tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SetSize", - Input: []tf.Input{ - set_indices, set_values, set_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// AutoShardDatasetAttr is an optional argument to AutoShardDataset. -type AutoShardDatasetAttr func(optionalAttr) - -// AutoShardDatasetAutoShardPolicy sets the optional auto_shard_policy attribute to value. -// If not specified, defaults to 0 -func AutoShardDatasetAutoShardPolicy(value int64) AutoShardDatasetAttr { - return func(m optionalAttr) { - m["auto_shard_policy"] = value - } -} - -// Creates a dataset that shards the input dataset. -// -// Creates a dataset that shards the input dataset by num_workers, returning a -// sharded dataset for the index-th worker. This attempts to automatically shard -// a dataset by examining the Dataset graph and inserting a shard op before the -// inputs to a reader Dataset (e.g. CSVDataset, TFRecordDataset). -// -// This dataset will throw a NotFound error if we cannot shard the dataset -// automatically. -// -// Arguments: -// input_dataset: A variant tensor representing the input dataset. -// num_workers: A scalar representing the number of workers to distribute this dataset across. -// index: A scalar representing the index of the current worker out of num_workers. -// -// -func AutoShardDataset(scope *Scope, input_dataset tf.Output, num_workers tf.Output, index tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...AutoShardDatasetAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AutoShardDataset", - Input: []tf.Input{ - input_dataset, num_workers, index, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // RetrieveTPUEmbeddingFTRLParametersAttr is an optional argument to RetrieveTPUEmbeddingFTRLParameters. type RetrieveTPUEmbeddingFTRLParametersAttr func(optionalAttr) @@ -49023,43 +47169,216 @@ func TPUCompilationResult(scope *Scope) (output tf.Output) { return op.Output(0) } -// ResourceApplyAdagradV2Attr is an optional argument to ResourceApplyAdagradV2. -type ResourceApplyAdagradV2Attr func(optionalAttr) - -// ResourceApplyAdagradV2UseLocking sets the optional use_locking attribute to value. +// Returns element-wise integer closest to x. // -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyAdagradV2UseLocking(value bool) ResourceApplyAdagradV2Attr { +// If the result is midway between two representable values, +// the even representable is chosen. +// For example: +// +// ``` +// rint(-1.5) ==> -2.0 +// rint(0.5000001) ==> 1.0 +// rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.] +// ``` +func Rint(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Rint", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ParseSequenceExampleV2Attr is an optional argument to ParseSequenceExampleV2. +type ParseSequenceExampleV2Attr func(optionalAttr) + +// ParseSequenceExampleV2NcontextSparse sets the optional Ncontext_sparse attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func ParseSequenceExampleV2NcontextSparse(value int64) ParseSequenceExampleV2Attr { return func(m optionalAttr) { - m["use_locking"] = value + m["Ncontext_sparse"] = value } } -// ResourceApplyAdagradV2UpdateSlots sets the optional update_slots attribute to value. -// If not specified, defaults to true -func ResourceApplyAdagradV2UpdateSlots(value bool) ResourceApplyAdagradV2Attr { +// ParseSequenceExampleV2ContextSparseTypes sets the optional context_sparse_types attribute to value. +// +// value: A list of Ncontext_sparse types; the data types of data in +// each context Feature given in context_sparse_keys. +// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList), +// DT_INT64 (Int64List), and DT_STRING (BytesList). +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func ParseSequenceExampleV2ContextSparseTypes(value []tf.DataType) ParseSequenceExampleV2Attr { return func(m optionalAttr) { - m["update_slots"] = value + m["context_sparse_types"] = value } } -// Update '*var' according to the adagrad scheme. +// ParseSequenceExampleV2ContextRaggedValueTypes sets the optional context_ragged_value_types attribute to value. // -// accum += grad * grad -// var -= lr * grad * (1 / (sqrt(accum) + epsilon)) +// value: RaggedTensor.value dtypes for the ragged context features. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func ParseSequenceExampleV2ContextRaggedValueTypes(value []tf.DataType) ParseSequenceExampleV2Attr { + return func(m optionalAttr) { + m["context_ragged_value_types"] = value + } +} + +// ParseSequenceExampleV2ContextRaggedSplitTypes sets the optional context_ragged_split_types attribute to value. +// +// value: RaggedTensor.row_split dtypes for the ragged context features. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func ParseSequenceExampleV2ContextRaggedSplitTypes(value []tf.DataType) ParseSequenceExampleV2Attr { + return func(m optionalAttr) { + m["context_ragged_split_types"] = value + } +} + +// ParseSequenceExampleV2ContextDenseShapes sets the optional context_dense_shapes attribute to value. +// +// value: A list of Ncontext_dense shapes; the shapes of data in +// each context Feature given in context_dense_keys. +// The number of elements in the Feature corresponding to context_dense_key[j] +// must always equal context_dense_shapes[j].NumEntries(). +// The shape of context_dense_values[j] will match context_dense_shapes[j]. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func ParseSequenceExampleV2ContextDenseShapes(value []tf.Shape) ParseSequenceExampleV2Attr { + return func(m optionalAttr) { + m["context_dense_shapes"] = value + } +} + +// ParseSequenceExampleV2NfeatureListSparse sets the optional Nfeature_list_sparse attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func ParseSequenceExampleV2NfeatureListSparse(value int64) ParseSequenceExampleV2Attr { + return func(m optionalAttr) { + m["Nfeature_list_sparse"] = value + } +} + +// ParseSequenceExampleV2NfeatureListDense sets the optional Nfeature_list_dense attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func ParseSequenceExampleV2NfeatureListDense(value int64) ParseSequenceExampleV2Attr { + return func(m optionalAttr) { + m["Nfeature_list_dense"] = value + } +} + +// ParseSequenceExampleV2FeatureListDenseTypes sets the optional feature_list_dense_types attribute to value. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func ParseSequenceExampleV2FeatureListDenseTypes(value []tf.DataType) ParseSequenceExampleV2Attr { + return func(m optionalAttr) { + m["feature_list_dense_types"] = value + } +} + +// ParseSequenceExampleV2FeatureListSparseTypes sets the optional feature_list_sparse_types attribute to value. +// +// value: A list of Nfeature_list_sparse types; the data types +// of data in each FeatureList given in feature_list_sparse_keys. +// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList), +// DT_INT64 (Int64List), and DT_STRING (BytesList). +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func ParseSequenceExampleV2FeatureListSparseTypes(value []tf.DataType) ParseSequenceExampleV2Attr { + return func(m optionalAttr) { + m["feature_list_sparse_types"] = value + } +} + +// ParseSequenceExampleV2FeatureListRaggedValueTypes sets the optional feature_list_ragged_value_types attribute to value. +// +// value: RaggedTensor.value dtypes for the ragged FeatureList features. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func ParseSequenceExampleV2FeatureListRaggedValueTypes(value []tf.DataType) ParseSequenceExampleV2Attr { + return func(m optionalAttr) { + m["feature_list_ragged_value_types"] = value + } +} + +// ParseSequenceExampleV2FeatureListRaggedSplitTypes sets the optional feature_list_ragged_split_types attribute to value. +// +// value: RaggedTensor.row_split dtypes for the ragged FeatureList features. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func ParseSequenceExampleV2FeatureListRaggedSplitTypes(value []tf.DataType) ParseSequenceExampleV2Attr { + return func(m optionalAttr) { + m["feature_list_ragged_split_types"] = value + } +} + +// ParseSequenceExampleV2FeatureListDenseShapes sets the optional feature_list_dense_shapes attribute to value. +// +// value: A list of Nfeature_list_dense shapes; the shapes of +// data in each FeatureList given in feature_list_dense_keys. +// The shape of each Feature in the FeatureList corresponding to +// feature_list_dense_key[j] must always equal +// feature_list_dense_shapes[j].NumEntries(). +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func ParseSequenceExampleV2FeatureListDenseShapes(value []tf.Shape) ParseSequenceExampleV2Attr { + return func(m optionalAttr) { + m["feature_list_dense_shapes"] = value + } +} + +// Transforms a vector of tf.io.SequenceExample protos (as strings) into +// typed tensors. // // Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// epsilon: Constant factor. Must be a scalar. -// grad: The gradient. -// -// Returns the created operation. -func ResourceApplyAdagradV2(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdagradV2Attr) (o *tf.Operation) { +// serialized: A scalar or vector containing binary serialized SequenceExample protos. +// debug_name: A scalar or vector containing the names of the serialized protos. +// May contain, for example, table key (descriptive) name for the +// corresponding serialized proto. This is purely useful for debugging +// purposes, and the presence of values here has no effect on the output. +// May also be an empty vector if no name is available. +// context_sparse_keys: The keys expected in the Examples' features associated with context_sparse +// values. +// context_dense_keys: The keys expected in the SequenceExamples' context features associated with +// dense values. +// context_ragged_keys: The keys expected in the Examples' features associated with context_ragged +// values. +// feature_list_sparse_keys: The keys expected in the FeatureLists associated with sparse values. +// feature_list_dense_keys: The keys expected in the SequenceExamples' feature_lists associated +// with lists of dense values. +// feature_list_ragged_keys: The keys expected in the FeatureLists associated with ragged values. +// feature_list_dense_missing_assumed_empty: A vector corresponding 1:1 with feature_list_dense_keys, indicating which +// features may be missing from the SequenceExamples. If the associated +// FeatureList is missing, it is treated as empty. +// context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty). +// context_dense_defaults[j] provides default values +// when the SequenceExample's context map lacks context_dense_key[j]. +// If an empty Tensor is provided for context_dense_defaults[j], +// then the Feature context_dense_keys[j] is required. +// The input type is inferred from context_dense_defaults[j], even when it's +// empty. If context_dense_defaults[j] is not empty, its shape must match +// context_dense_shapes[j]. +func ParseSequenceExampleV2(scope *Scope, serialized tf.Output, debug_name tf.Output, context_sparse_keys tf.Output, context_dense_keys tf.Output, context_ragged_keys tf.Output, feature_list_sparse_keys tf.Output, feature_list_dense_keys tf.Output, feature_list_ragged_keys tf.Output, feature_list_dense_missing_assumed_empty tf.Output, context_dense_defaults []tf.Output, optional ...ParseSequenceExampleV2Attr) (context_sparse_indices []tf.Output, context_sparse_values []tf.Output, context_sparse_shapes []tf.Output, context_dense_values []tf.Output, context_ragged_values []tf.Output, context_ragged_row_splits []tf.Output, feature_list_sparse_indices []tf.Output, feature_list_sparse_values []tf.Output, feature_list_sparse_shapes []tf.Output, feature_list_dense_values []tf.Output, feature_list_dense_lengths []tf.Output, feature_list_ragged_values []tf.Output, feature_list_ragged_outer_splits []tf.Output, feature_list_ragged_inner_splits []tf.Output) { if scope.Err() != nil { return } @@ -49068,260 +47387,75 @@ func ResourceApplyAdagradV2(scope *Scope, var_ tf.Output, accum tf.Output, lr tf a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyAdagradV2", + Type: "ParseSequenceExampleV2", Input: []tf.Input{ - var_, accum, lr, epsilon, grad, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// QuantizedMatMulWithBiasAndReluAttr is an optional argument to QuantizedMatMulWithBiasAndRelu. -type QuantizedMatMulWithBiasAndReluAttr func(optionalAttr) - -// QuantizedMatMulWithBiasAndReluToutput sets the optional Toutput attribute to value. -// If not specified, defaults to DT_QINT32 -func QuantizedMatMulWithBiasAndReluToutput(value tf.DataType) QuantizedMatMulWithBiasAndReluAttr { - return func(m optionalAttr) { - m["Toutput"] = value - } -} - -// QuantizedMatMulWithBiasAndReluTransposeA sets the optional transpose_a attribute to value. -// -// value: If true, `a` is transposed before multiplication. -// If not specified, defaults to false -func QuantizedMatMulWithBiasAndReluTransposeA(value bool) QuantizedMatMulWithBiasAndReluAttr { - return func(m optionalAttr) { - m["transpose_a"] = value - } -} - -// QuantizedMatMulWithBiasAndReluTransposeB sets the optional transpose_b attribute to value. -// -// value: If true, `b` is transposed before multiplication. -// If not specified, defaults to false -func QuantizedMatMulWithBiasAndReluTransposeB(value bool) QuantizedMatMulWithBiasAndReluAttr { - return func(m optionalAttr) { - m["transpose_b"] = value - } -} - -// QuantizedMatMulWithBiasAndReluInputQuantMode sets the optional input_quant_mode attribute to value. -// -// value: Input data quantization mode. Either MIN_FIRST(default) or SCALED. -// If not specified, defaults to "MIN_FIRST" -func QuantizedMatMulWithBiasAndReluInputQuantMode(value string) QuantizedMatMulWithBiasAndReluAttr { - return func(m optionalAttr) { - m["input_quant_mode"] = value - } -} - -// Perform a quantized matrix multiplication of `a` by the matrix `b` with bias -// add and relu fusion. -// -// The inputs must be two-dimensional matrices and 1D bias vector. And the inner -// dimension of `a` (after being transposed if `transpose_a` is non-zero) must -// match the outer dimension of `b` (after being transposed if `transposed_b` is -// non-zero). Then do broadcast add operation with bias values on the matrix -// multiplication result. The bias size must match inner dimension of `b`. Then do -// relu activation to get non-negative result. -// -// Arguments: -// a: A matrix to be multiplied. Must be a two-dimensional tensor of type `quint8`. -// b: A matrix to be multiplied and must be a two-dimensional tensor of type `qint8`. -// bias: A 1D bias tensor with size matching with inner dimension of `b` (after being -// transposed if `transposed_b` is non-zero). -// min_a: The float value that the lowest quantized `a` value represents. -// max_a: The float value that the highest quantized `a` value represents. -// min_b: The float value that the lowest quantized `b` value represents. -// max_b: The float value that the highest quantized `b` value represents. -// -// Returns: -// out -// min_out: The float value that the lowest quantized output value represents. -// max_out: The float value that the highest quantized output value represents. -func QuantizedMatMulWithBiasAndRelu(scope *Scope, a tf.Output, b tf.Output, bias tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, optional ...QuantizedMatMulWithBiasAndReluAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizedMatMulWithBiasAndRelu", - Input: []tf.Input{ - a, b, bias, min_a, max_a, min_b, max_b, + serialized, debug_name, context_sparse_keys, context_dense_keys, context_ragged_keys, feature_list_sparse_keys, feature_list_dense_keys, feature_list_ragged_keys, feature_list_dense_missing_assumed_empty, tf.OutputList(context_dense_defaults), }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Shuts down a running distributed TPU system. -// -// The op returns an error if no system is running. -// -// Returns the created operation. -func ShutdownDistributedTPU(scope *Scope) (o *tf.Operation) { if scope.Err() != nil { return } - opspec := tf.OpSpec{ - Type: "ShutdownDistributedTPU", - } - return scope.AddOperation(opspec) -} - -// SerializeManySparseAttr is an optional argument to SerializeManySparse. -type SerializeManySparseAttr func(optionalAttr) - -// SerializeManySparseOutType sets the optional out_type attribute to value. -// -// value: The `dtype` to use for serialization; the supported types are `string` -// (default) and `variant`. -// If not specified, defaults to DT_STRING -func SerializeManySparseOutType(value tf.DataType) SerializeManySparseAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object. -// -// The `SparseTensor` must have rank `R` greater than 1, and the first dimension -// is treated as the minibatch dimension. Elements of the `SparseTensor` -// must be sorted in increasing order of this first dimension. The serialized -// `SparseTensor` objects going into each row of `serialized_sparse` will have -// rank `R-1`. -// -// The minibatch size `N` is extracted from `sparse_shape[0]`. -// -// Arguments: -// sparse_indices: 2-D. The `indices` of the minibatch `SparseTensor`. -// sparse_values: 1-D. The `values` of the minibatch `SparseTensor`. -// sparse_shape: 1-D. The `shape` of the minibatch `SparseTensor`. -func SerializeManySparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeManySparseAttr) (serialized_sparse tf.Output) { - if scope.Err() != nil { + var idx int + var err error + if context_sparse_indices, idx, err = makeOutputList(op, idx, "context_sparse_indices"); err != nil { + scope.UpdateErr("ParseSequenceExampleV2", err) return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SerializeManySparse", - Input: []tf.Input{ - sparse_indices, sparse_values, sparse_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Says whether the targets are in the top `K` predictions. -// -// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the -// prediction for the target class is among the top `k` predictions among -// all predictions for example `i`. Note that the behavior of `InTopK` differs -// from the `TopK` op in its handling of ties; if multiple classes have the -// same prediction value and straddle the top-`k` boundary, all of those -// classes are considered to be in the top `k`. -// -// More formally, let -// -// \\(predictions_i\\) be the predictions for all classes for example `i`, -// \\(targets_i\\) be the target class for example `i`, -// \\(out_i\\) be the output for example `i`, -// -// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ -// -// Arguments: -// predictions: A `batch_size` x `classes` tensor. -// targets: A `batch_size` vector of class ids. -// k: Number of top elements to look at for computing precision. -// -// Returns Computed precision at `k` as a `bool Tensor`. -func InTopKV2(scope *Scope, predictions tf.Output, targets tf.Output, k tf.Output) (precision tf.Output) { - if scope.Err() != nil { + if context_sparse_values, idx, err = makeOutputList(op, idx, "context_sparse_values"); err != nil { + scope.UpdateErr("ParseSequenceExampleV2", err) return } - opspec := tf.OpSpec{ - Type: "InTopKV2", - Input: []tf.Input{ - predictions, targets, k, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates an Optional variant with no value. -func OptionalNone(scope *Scope) (optional tf.Output) { - if scope.Err() != nil { + if context_sparse_shapes, idx, err = makeOutputList(op, idx, "context_sparse_shapes"); err != nil { + scope.UpdateErr("ParseSequenceExampleV2", err) return } - opspec := tf.OpSpec{ - Type: "OptionalNone", - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr is an optional argument to RetrieveTPUEmbeddingStochasticGradientDescentParameters. -type RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr func(optionalAttr) - -// RetrieveTPUEmbeddingStochasticGradientDescentParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingStochasticGradientDescentParametersTableId(value int64) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingStochasticGradientDescentParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingStochasticGradientDescentParametersTableName(value string) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingStochasticGradientDescentParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingStochasticGradientDescentParametersConfig(value string) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve SGD embedding parameters. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns Parameter parameters updated by the stochastic gradient descent optimization algorithm. -func RetrieveTPUEmbeddingStochasticGradientDescentParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr) (parameters tf.Output) { - if scope.Err() != nil { + if context_dense_values, idx, err = makeOutputList(op, idx, "context_dense_values"); err != nil { + scope.UpdateErr("ParseSequenceExampleV2", err) return } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) + if context_ragged_values, idx, err = makeOutputList(op, idx, "context_ragged_values"); err != nil { + scope.UpdateErr("ParseSequenceExampleV2", err) + return } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingStochasticGradientDescentParameters", - - Attrs: attrs, + if context_ragged_row_splits, idx, err = makeOutputList(op, idx, "context_ragged_row_splits"); err != nil { + scope.UpdateErr("ParseSequenceExampleV2", err) + return } - op := scope.AddOperation(opspec) - return op.Output(0) + if feature_list_sparse_indices, idx, err = makeOutputList(op, idx, "feature_list_sparse_indices"); err != nil { + scope.UpdateErr("ParseSequenceExampleV2", err) + return + } + if feature_list_sparse_values, idx, err = makeOutputList(op, idx, "feature_list_sparse_values"); err != nil { + scope.UpdateErr("ParseSequenceExampleV2", err) + return + } + if feature_list_sparse_shapes, idx, err = makeOutputList(op, idx, "feature_list_sparse_shapes"); err != nil { + scope.UpdateErr("ParseSequenceExampleV2", err) + return + } + if feature_list_dense_values, idx, err = makeOutputList(op, idx, "feature_list_dense_values"); err != nil { + scope.UpdateErr("ParseSequenceExampleV2", err) + return + } + if feature_list_dense_lengths, idx, err = makeOutputList(op, idx, "feature_list_dense_lengths"); err != nil { + scope.UpdateErr("ParseSequenceExampleV2", err) + return + } + if feature_list_ragged_values, idx, err = makeOutputList(op, idx, "feature_list_ragged_values"); err != nil { + scope.UpdateErr("ParseSequenceExampleV2", err) + return + } + if feature_list_ragged_outer_splits, idx, err = makeOutputList(op, idx, "feature_list_ragged_outer_splits"); err != nil { + scope.UpdateErr("ParseSequenceExampleV2", err) + return + } + if feature_list_ragged_inner_splits, idx, err = makeOutputList(op, idx, "feature_list_ragged_inner_splits"); err != nil { + scope.UpdateErr("ParseSequenceExampleV2", err) + return + } + return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, context_ragged_values, context_ragged_row_splits, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values, feature_list_dense_lengths, feature_list_ragged_values, feature_list_ragged_outer_splits, feature_list_ragged_inner_splits } // CudnnRNNAttr is an optional argument to CudnnRNN. @@ -49670,6 +47804,2011 @@ func InfeedDequeueTuple(scope *Scope, dtypes []tf.DataType, shapes []tf.Shape) ( return outputs } +// Serializes the tree ensemble to a proto. +// +// Arguments: +// tree_ensemble_handle: Handle to the tree ensemble. +// +// Returns: +// stamp_token: Stamp token of the tree ensemble resource. +// tree_ensemble_serialized: Serialized proto of the ensemble. +func BoostedTreesSerializeEnsemble(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, tree_ensemble_serialized tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "BoostedTreesSerializeEnsemble", + Input: []tf.Input{ + tree_ensemble_handle, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// Computes inverse hyperbolic cosine of x element-wise. +// +// Given an input tensor, the function computes inverse hyperbolic cosine of every element. +// Input range is `[1, inf]`. It returns `nan` if the input lies outside the range. +// +// ```python +// x = tf.constant([-2, -0.5, 1, 1.2, 200, 10000, float("inf")]) +// tf.math.acosh(x) ==> [nan nan 0. 0.62236255 5.9914584 9.903487 inf] +// ``` +func Acosh(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Acosh", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Outputs deterministic pseudorandom random numbers from a gamma distribution. +// +// Outputs random values from a gamma distribution. +// +// The outputs are a deterministic function of `shape`, `seed`, and `alpha`. +// +// Arguments: +// shape: The shape of the output tensor. +// seed: 2 seeds (shape [2]). +// alpha: The concentration of the gamma distribution. Shape must match the rightmost +// dimensions of `shape`. +// +// Returns Random values with specified shape. +func StatelessRandomGammaV2(scope *Scope, shape tf.Output, seed tf.Output, alpha tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "StatelessRandomGammaV2", + Input: []tf.Input{ + shape, seed, alpha, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ConfigureDistributedTPUAttr is an optional argument to ConfigureDistributedTPU. +type ConfigureDistributedTPUAttr func(optionalAttr) + +// ConfigureDistributedTPUEmbeddingConfig sets the optional embedding_config attribute to value. +// +// value: Reserved. Do not use. +// If not specified, defaults to "" +func ConfigureDistributedTPUEmbeddingConfig(value string) ConfigureDistributedTPUAttr { + return func(m optionalAttr) { + m["embedding_config"] = value + } +} + +// ConfigureDistributedTPUTpuEmbeddingConfig sets the optional tpu_embedding_config attribute to value. +// +// value: Serialized tensorflow.tpu.TPUEmbeddingConfiguration that +// describes the embedding lookups of the program. +// If not specified, defaults to "" +func ConfigureDistributedTPUTpuEmbeddingConfig(value string) ConfigureDistributedTPUAttr { + return func(m optionalAttr) { + m["tpu_embedding_config"] = value + } +} + +// ConfigureDistributedTPUIsGlobalInit sets the optional is_global_init attribute to value. +// +// value: Reserved. Do not use. +// If not specified, defaults to false +func ConfigureDistributedTPUIsGlobalInit(value bool) ConfigureDistributedTPUAttr { + return func(m optionalAttr) { + m["is_global_init"] = value + } +} + +// ConfigureDistributedTPUEnableWholeMeshCompilations sets the optional enable_whole_mesh_compilations attribute to value. +// If not specified, defaults to false +func ConfigureDistributedTPUEnableWholeMeshCompilations(value bool) ConfigureDistributedTPUAttr { + return func(m optionalAttr) { + m["enable_whole_mesh_compilations"] = value + } +} + +// ConfigureDistributedTPUCompilationFailureClosesChips sets the optional compilation_failure_closes_chips attribute to value. +// If not specified, defaults to true +func ConfigureDistributedTPUCompilationFailureClosesChips(value bool) ConfigureDistributedTPUAttr { + return func(m optionalAttr) { + m["compilation_failure_closes_chips"] = value + } +} + +// Sets up the centralized structures for a distributed TPU system. +// +// Returns A serialized tensorflow.tpu.TopologyProto that describes the TPU +// topology. +func ConfigureDistributedTPU(scope *Scope, optional ...ConfigureDistributedTPUAttr) (topology tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ConfigureDistributedTPU", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a dataset that executes a SQL query and emits rows of the result set. +// +// Arguments: +// driver_name: The database type. Currently, the only supported type is 'sqlite'. +// data_source_name: A connection string to connect to the database. +// query: A SQL query to execute. +// +// +func SqlDataset(scope *Scope, driver_name tf.Output, data_source_name tf.Output, query tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "SqlDataset", + Input: []tf.Input{ + driver_name, data_source_name, query, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Outputs deterministic pseudorandom random integers from a uniform distribution. +// +// The generated values follow a uniform distribution in the range `[minval, maxval)`. +// +// The outputs are a deterministic function of `shape`, `seed`, `minval`, and `maxval`. +// +// Arguments: +// shape: The shape of the output tensor. +// seed: 2 seeds (shape [2]). +// minval: Minimum value (inclusive, scalar). +// maxval: Maximum value (exclusive, scalar). +// +// Returns Random values with specified shape. +func StatelessRandomUniformInt(scope *Scope, shape tf.Output, seed tf.Output, minval tf.Output, maxval tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "StatelessRandomUniformInt", + Input: []tf.Input{ + shape, seed, minval, maxval, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// LoadTPUEmbeddingRMSPropParametersAttr is an optional argument to LoadTPUEmbeddingRMSPropParameters. +type LoadTPUEmbeddingRMSPropParametersAttr func(optionalAttr) + +// LoadTPUEmbeddingRMSPropParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 +func LoadTPUEmbeddingRMSPropParametersTableId(value int64) LoadTPUEmbeddingRMSPropParametersAttr { + return func(m optionalAttr) { + m["table_id"] = value + } +} + +// LoadTPUEmbeddingRMSPropParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingRMSPropParametersTableName(value string) LoadTPUEmbeddingRMSPropParametersAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// LoadTPUEmbeddingRMSPropParametersConfig sets the optional config attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingRMSPropParametersConfig(value string) LoadTPUEmbeddingRMSPropParametersAttr { + return func(m optionalAttr) { + m["config"] = value + } +} + +// Load RMSProp embedding parameters. +// +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. +// +// Arguments: +// parameters: Value of parameters used in the RMSProp optimization algorithm. +// ms: Value of ms used in the RMSProp optimization algorithm. +// mom: Value of mom used in the RMSProp optimization algorithm. +// +// +// +// Returns the created operation. +func LoadTPUEmbeddingRMSPropParameters(scope *Scope, parameters tf.Output, ms tf.Output, mom tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingRMSPropParametersAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "LoadTPUEmbeddingRMSPropParameters", + Input: []tf.Input{ + parameters, ms, mom, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// InfeedEnqueueTupleAttr is an optional argument to InfeedEnqueueTuple. +type InfeedEnqueueTupleAttr func(optionalAttr) + +// InfeedEnqueueTupleLayouts sets the optional layouts attribute to value. +// +// value: A vector holding the requested layout in minor-to-major sequence for +// all the tuple shapes, in the order the shapes appear in the "shapes" input. +// The layout elements for a sub-shape can be set to -1, in which case the +// corresponding layout will be computed by the infeed operation. +// If not specified, defaults to <> +func InfeedEnqueueTupleLayouts(value []int64) InfeedEnqueueTupleAttr { + return func(m optionalAttr) { + m["layouts"] = value + } +} + +// InfeedEnqueueTupleDeviceOrdinal sets the optional device_ordinal attribute to value. +// +// value: The TPU device to use. This should be -1 when the Op +// is running on a TPU device, and >= 0 when the Op is running on the CPU +// device. +// If not specified, defaults to -1 +func InfeedEnqueueTupleDeviceOrdinal(value int64) InfeedEnqueueTupleAttr { + return func(m optionalAttr) { + m["device_ordinal"] = value + } +} + +// Feeds multiple Tensor values into the computation as an XLA tuple. +// +// Arguments: +// inputs: A list of tensors that will be provided using the infeed mechanism. +// shapes: The shapes of each tensor in `inputs`. +// +// Returns the created operation. +func InfeedEnqueueTuple(scope *Scope, inputs []tf.Output, shapes []tf.Shape, optional ...InfeedEnqueueTupleAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"shapes": shapes} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "InfeedEnqueueTuple", + Input: []tf.Input{ + tf.OutputList(inputs), + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// MapClearAttr is an optional argument to MapClear. +type MapClearAttr func(optionalAttr) + +// MapClearCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func MapClearCapacity(value int64) MapClearAttr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// MapClearMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func MapClearMemoryLimit(value int64) MapClearAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// MapClearContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func MapClearContainer(value string) MapClearAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// MapClearSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func MapClearSharedName(value string) MapClearAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op removes all elements in the underlying container. +// +// Returns the created operation. +func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtypes": dtypes} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MapClear", + + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// Deserialize `SparseTensor` objects. +// +// The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where +// the last dimension stores serialized `SparseTensor` objects and the other N +// dimensions (N >= 0) correspond to a batch. The ranks of the original +// `SparseTensor` objects must all match. When the final `SparseTensor` is +// created, its rank is the rank of the incoming `SparseTensor` objects plus N; +// the sparse tensors have been concatenated along new dimensions, one for each +// batch. +// +// The output `SparseTensor` object's shape values for the original dimensions +// are the max across the input `SparseTensor` objects' shape values for the +// corresponding dimensions. The new dimensions match the size of the batch. +// +// The input `SparseTensor` objects' indices are assumed ordered in +// standard lexicographic order. If this is not the case, after this +// step run `SparseReorder` to restore index ordering. +// +// For example, if the serialized input is a `[2 x 3]` matrix representing two +// original `SparseTensor` objects: +// +// index = [ 0] +// [10] +// [20] +// values = [1, 2, 3] +// shape = [50] +// +// and +// +// index = [ 2] +// [10] +// values = [4, 5] +// shape = [30] +// +// then the final deserialized `SparseTensor` will be: +// +// index = [0 0] +// [0 10] +// [0 20] +// [1 2] +// [1 10] +// values = [1, 2, 3, 4, 5] +// shape = [2 50] +// +// Arguments: +// serialized_sparse: The serialized `SparseTensor` objects. The last dimension +// must have 3 columns. +// dtype: The `dtype` of the serialized `SparseTensor` objects. +func DeserializeSparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype} + opspec := tf.OpSpec{ + Type: "DeserializeSparse", + Input: []tf.Input{ + serialized_sparse, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// Decode web-safe base64-encoded strings. +// +// Input may or may not have padding at the end. See EncodeBase64 for padding. +// Web-safe means that input must use - and _ instead of + and /. +// +// Arguments: +// input: Base64 strings to decode. +// +// Returns Decoded strings. +func DecodeBase64(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "DecodeBase64", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// LoadTPUEmbeddingAdagradParametersAttr is an optional argument to LoadTPUEmbeddingAdagradParameters. +type LoadTPUEmbeddingAdagradParametersAttr func(optionalAttr) + +// LoadTPUEmbeddingAdagradParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 +func LoadTPUEmbeddingAdagradParametersTableId(value int64) LoadTPUEmbeddingAdagradParametersAttr { + return func(m optionalAttr) { + m["table_id"] = value + } +} + +// LoadTPUEmbeddingAdagradParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingAdagradParametersTableName(value string) LoadTPUEmbeddingAdagradParametersAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// LoadTPUEmbeddingAdagradParametersConfig sets the optional config attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingAdagradParametersConfig(value string) LoadTPUEmbeddingAdagradParametersAttr { + return func(m optionalAttr) { + m["config"] = value + } +} + +// Load Adagrad embedding parameters. +// +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. +// +// Arguments: +// parameters: Value of parameters used in the Adagrad optimization algorithm. +// accumulators: Value of accumulators used in the Adagrad optimization algorithm. +// +// +// +// Returns the created operation. +func LoadTPUEmbeddingAdagradParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdagradParametersAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "LoadTPUEmbeddingAdagradParameters", + Input: []tf.Input{ + parameters, accumulators, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// QuantizedMatMulWithBiasAndReluAttr is an optional argument to QuantizedMatMulWithBiasAndRelu. +type QuantizedMatMulWithBiasAndReluAttr func(optionalAttr) + +// QuantizedMatMulWithBiasAndReluToutput sets the optional Toutput attribute to value. +// If not specified, defaults to DT_QINT32 +func QuantizedMatMulWithBiasAndReluToutput(value tf.DataType) QuantizedMatMulWithBiasAndReluAttr { + return func(m optionalAttr) { + m["Toutput"] = value + } +} + +// QuantizedMatMulWithBiasAndReluTransposeA sets the optional transpose_a attribute to value. +// +// value: If true, `a` is transposed before multiplication. +// If not specified, defaults to false +func QuantizedMatMulWithBiasAndReluTransposeA(value bool) QuantizedMatMulWithBiasAndReluAttr { + return func(m optionalAttr) { + m["transpose_a"] = value + } +} + +// QuantizedMatMulWithBiasAndReluTransposeB sets the optional transpose_b attribute to value. +// +// value: If true, `b` is transposed before multiplication. +// If not specified, defaults to false +func QuantizedMatMulWithBiasAndReluTransposeB(value bool) QuantizedMatMulWithBiasAndReluAttr { + return func(m optionalAttr) { + m["transpose_b"] = value + } +} + +// QuantizedMatMulWithBiasAndReluInputQuantMode sets the optional input_quant_mode attribute to value. +// +// value: Input data quantization mode. Either MIN_FIRST(default) or SCALED. +// If not specified, defaults to "MIN_FIRST" +func QuantizedMatMulWithBiasAndReluInputQuantMode(value string) QuantizedMatMulWithBiasAndReluAttr { + return func(m optionalAttr) { + m["input_quant_mode"] = value + } +} + +// Perform a quantized matrix multiplication of `a` by the matrix `b` with bias +// add and relu fusion. +// +// The inputs must be two-dimensional matrices and 1D bias vector. And the inner +// dimension of `a` (after being transposed if `transpose_a` is non-zero) must +// match the outer dimension of `b` (after being transposed if `transposed_b` is +// non-zero). Then do broadcast add operation with bias values on the matrix +// multiplication result. The bias size must match inner dimension of `b`. Then do +// relu activation to get non-negative result. +// +// Arguments: +// a: A matrix to be multiplied. Must be a two-dimensional tensor of type `quint8`. +// b: A matrix to be multiplied and must be a two-dimensional tensor of type `qint8`. +// bias: A 1D bias tensor with size matching with inner dimension of `b` (after being +// transposed if `transposed_b` is non-zero). +// min_a: The float value that the lowest quantized `a` value represents. +// max_a: The float value that the highest quantized `a` value represents. +// min_b: The float value that the lowest quantized `b` value represents. +// max_b: The float value that the highest quantized `b` value represents. +// +// Returns: +// out +// min_out: The float value that the lowest quantized output value represents. +// max_out: The float value that the highest quantized output value represents. +func QuantizedMatMulWithBiasAndRelu(scope *Scope, a tf.Output, b tf.Output, bias tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, optional ...QuantizedMatMulWithBiasAndReluAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "QuantizedMatMulWithBiasAndRelu", + Input: []tf.Input{ + a, b, bias, min_a, max_a, min_b, max_b, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// Shuts down a running distributed TPU system. +// +// The op returns an error if no system is running. +// +// Returns the created operation. +func ShutdownDistributedTPU(scope *Scope) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ShutdownDistributedTPU", + } + return scope.AddOperation(opspec) +} + +// SerializeManySparseAttr is an optional argument to SerializeManySparse. +type SerializeManySparseAttr func(optionalAttr) + +// SerializeManySparseOutType sets the optional out_type attribute to value. +// +// value: The `dtype` to use for serialization; the supported types are `string` +// (default) and `variant`. +// If not specified, defaults to DT_STRING +func SerializeManySparseOutType(value tf.DataType) SerializeManySparseAttr { + return func(m optionalAttr) { + m["out_type"] = value + } +} + +// Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object. +// +// The `SparseTensor` must have rank `R` greater than 1, and the first dimension +// is treated as the minibatch dimension. Elements of the `SparseTensor` +// must be sorted in increasing order of this first dimension. The serialized +// `SparseTensor` objects going into each row of `serialized_sparse` will have +// rank `R-1`. +// +// The minibatch size `N` is extracted from `sparse_shape[0]`. +// +// Arguments: +// sparse_indices: 2-D. The `indices` of the minibatch `SparseTensor`. +// sparse_values: 1-D. The `values` of the minibatch `SparseTensor`. +// sparse_shape: 1-D. The `shape` of the minibatch `SparseTensor`. +func SerializeManySparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeManySparseAttr) (serialized_sparse tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "SerializeManySparse", + Input: []tf.Input{ + sparse_indices, sparse_values, sparse_shape, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// RequantizePerChannelAttr is an optional argument to RequantizePerChannel. +type RequantizePerChannelAttr func(optionalAttr) + +// RequantizePerChannelOutType sets the optional out_type attribute to value. +// +// value: The quantized type of output tensor that needs to be converted. +// If not specified, defaults to DT_QUINT8 +func RequantizePerChannelOutType(value tf.DataType) RequantizePerChannelAttr { + return func(m optionalAttr) { + m["out_type"] = value + } +} + +// Requantizes input with min and max values known per channel. +// +// Arguments: +// input: The original input tensor. +// input_min: The minimum value of the input tensor +// input_max: The maximum value of the input tensor. +// requested_output_min: The minimum value of the output tensor requested. +// requested_output_max: The maximum value of the output tensor requested. +// +// Returns: +// output: Output tensor. +// output_min: The minimum value of the final output tensor +// output_max: The maximum value of the final output tensor. +func RequantizePerChannel(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, optional ...RequantizePerChannelAttr) (output tf.Output, output_min tf.Output, output_max tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RequantizePerChannel", + Input: []tf.Input{ + input, input_min, input_max, requested_output_min, requested_output_max, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// LeakyReluAttr is an optional argument to LeakyRelu. +type LeakyReluAttr func(optionalAttr) + +// LeakyReluAlpha sets the optional alpha attribute to value. +// If not specified, defaults to 0.2 +func LeakyReluAlpha(value float32) LeakyReluAttr { + return func(m optionalAttr) { + m["alpha"] = value + } +} + +// Computes rectified linear: `max(features, features * alpha)`. +func LeakyRelu(scope *Scope, features tf.Output, optional ...LeakyReluAttr) (activations tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "LeakyRelu", + Input: []tf.Input{ + features, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Generates values in an interval. +// +// A sequence of `num` evenly-spaced values are generated beginning at `start`. +// If `num > 1`, the values in the sequence increase by `stop - start / num - 1`, +// so that the last one is exactly `stop`. +// +// For example: +// +// ``` +// tf.linspace(10.0, 12.0, 3, name="linspace") => [ 10.0 11.0 12.0] +// ``` +// +// Arguments: +// start: 0-D tensor. First entry in the range. +// stop: 0-D tensor. Last entry in the range. +// num: 0-D tensor. Number of values to generate. +// +// Returns 1-D. The generated values. +func LinSpace(scope *Scope, start tf.Output, stop tf.Output, num tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "LinSpace", + Input: []tf.Input{ + start, stop, num, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a dataset that caches elements from `input_dataset`. +// +// A CacheDataset will iterate over the input_dataset, and store tensors. If the +// cache already exists, the cache will be used. If the cache is inappropriate +// (e.g. cannot be opened, contains tensors of the wrong shape / size), an error +// will the returned when used. +// +// Arguments: +// +// filename: A path on the filesystem where we should cache the dataset. Note: this +// will be a directory. +// +// +func CacheDataset(scope *Scope, input_dataset tf.Output, filename tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "CacheDataset", + Input: []tf.Input{ + input_dataset, filename, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ThreadPoolHandleAttr is an optional argument to ThreadPoolHandle. +type ThreadPoolHandleAttr func(optionalAttr) + +// ThreadPoolHandleMaxIntraOpParallelism sets the optional max_intra_op_parallelism attribute to value. +// +// value: The maximum degree of parallelism to use within operations that execute on this +// threadpool. +// If not specified, defaults to 1 +func ThreadPoolHandleMaxIntraOpParallelism(value int64) ThreadPoolHandleAttr { + return func(m optionalAttr) { + m["max_intra_op_parallelism"] = value + } +} + +// ThreadPoolHandleContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func ThreadPoolHandleContainer(value string) ThreadPoolHandleAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// ThreadPoolHandleSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func ThreadPoolHandleSharedName(value string) ThreadPoolHandleAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Creates a dataset that uses a custom thread pool to compute `input_dataset`. +// +// Arguments: +// num_threads: The number of threads in the thread pool. +// display_name: A human-readable name for the threads that may be visible in some +// visualizations. +// threadpool. +// +// Returns A resource that can be consumed by one or more ExperimentalThreadPoolDataset +// ops. +func ThreadPoolHandle(scope *Scope, num_threads int64, display_name string, optional ...ThreadPoolHandleAttr) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_threads": num_threads, "display_name": display_name} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ThreadPoolHandle", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// SparseReduceMaxSparseAttr is an optional argument to SparseReduceMaxSparse. +type SparseReduceMaxSparseAttr func(optionalAttr) + +// SparseReduceMaxSparseKeepDims sets the optional keep_dims attribute to value. +// +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func SparseReduceMaxSparseKeepDims(value bool) SparseReduceMaxSparseAttr { + return func(m optionalAttr) { + m["keep_dims"] = value + } +} + +// Computes the max of elements across dimensions of a SparseTensor. +// +// This Op takes a SparseTensor and is the sparse counterpart to +// `tf.reduce_max()`. In contrast to SparseReduceMax, this Op returns a +// SparseTensor. +// +// Reduces `sp_input` along the dimensions given in `reduction_axes`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained +// with length 1. +// +// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor +// with a single element is returned. Additionally, the axes can be negative, +// which are interpreted according to the indexing rules in Python. +// +// Arguments: +// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, possibly not in canonical ordering. +// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. +// input_shape: 1-D. Shape of the input SparseTensor. +// reduction_axes: 1-D. Length-`K` vector containing the reduction axes. +func SparseReduceMaxSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceMaxSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "SparseReduceMaxSparse", + Input: []tf.Input{ + input_indices, input_values, input_shape, reduction_axes, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// LoadTPUEmbeddingStochasticGradientDescentParametersAttr is an optional argument to LoadTPUEmbeddingStochasticGradientDescentParameters. +type LoadTPUEmbeddingStochasticGradientDescentParametersAttr func(optionalAttr) + +// LoadTPUEmbeddingStochasticGradientDescentParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 +func LoadTPUEmbeddingStochasticGradientDescentParametersTableId(value int64) LoadTPUEmbeddingStochasticGradientDescentParametersAttr { + return func(m optionalAttr) { + m["table_id"] = value + } +} + +// LoadTPUEmbeddingStochasticGradientDescentParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingStochasticGradientDescentParametersTableName(value string) LoadTPUEmbeddingStochasticGradientDescentParametersAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// LoadTPUEmbeddingStochasticGradientDescentParametersConfig sets the optional config attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingStochasticGradientDescentParametersConfig(value string) LoadTPUEmbeddingStochasticGradientDescentParametersAttr { + return func(m optionalAttr) { + m["config"] = value + } +} + +// Load SGD embedding parameters. +// +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. +// +// Arguments: +// parameters: Value of parameters used in the stochastic gradient descent optimization algorithm. +// +// +// +// Returns the created operation. +func LoadTPUEmbeddingStochasticGradientDescentParameters(scope *Scope, parameters tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingStochasticGradientDescentParametersAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "LoadTPUEmbeddingStochasticGradientDescentParameters", + Input: []tf.Input{ + parameters, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// CropAndResizeGradImageAttr is an optional argument to CropAndResizeGradImage. +type CropAndResizeGradImageAttr func(optionalAttr) + +// CropAndResizeGradImageMethod sets the optional method attribute to value. +// +// value: A string specifying the interpolation method. Only 'bilinear' is +// supported for now. +// If not specified, defaults to "bilinear" +func CropAndResizeGradImageMethod(value string) CropAndResizeGradImageAttr { + return func(m optionalAttr) { + m["method"] = value + } +} + +// Computes the gradient of the crop_and_resize op wrt the input image tensor. +// +// Arguments: +// grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. +// boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor +// specifies the coordinates of a box in the `box_ind[i]` image and is specified +// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of +// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the +// `[0, 1]` interval of normalized image height is mapped to +// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in +// which case the sampled crop is an up-down flipped version of the original +// image. The width dimension is treated similarly. Normalized coordinates +// outside the `[0, 1]` range are allowed, in which case we use +// `extrapolation_value` to extrapolate the input image values. +// box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. +// The value of `box_ind[i]` specifies the image that the `i`-th box refers to. +// image_size: A 1-D tensor with value `[batch, image_height, image_width, depth]` +// containing the original image size. Both `image_height` and `image_width` need +// to be positive. +// +// +// Returns A 4-D tensor of shape `[batch, image_height, image_width, depth]`. +func CropAndResizeGradImage(scope *Scope, grads tf.Output, boxes tf.Output, box_ind tf.Output, image_size tf.Output, T tf.DataType, optional ...CropAndResizeGradImageAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"T": T} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "CropAndResizeGradImage", + Input: []tf.Input{ + grads, boxes, box_ind, image_size, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// OutfeedDequeueAttr is an optional argument to OutfeedDequeue. +type OutfeedDequeueAttr func(optionalAttr) + +// OutfeedDequeueDeviceOrdinal sets the optional device_ordinal attribute to value. +// +// value: The TPU device to use. This should be -1 when the Op +// is running on a TPU device, and >= 0 when the Op is running on the CPU +// device. +// If not specified, defaults to -1 +func OutfeedDequeueDeviceOrdinal(value int64) OutfeedDequeueAttr { + return func(m optionalAttr) { + m["device_ordinal"] = value + } +} + +// Retrieves a single tensor from the computation outfeed. +// +// This operation will block indefinitely until data is available. +// +// Arguments: +// dtype: The type of elements in the tensor. +// shape: The shape of the tensor. +// +// Returns A tensor that will be read from the device outfeed. +func OutfeedDequeue(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...OutfeedDequeueAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype, "shape": shape} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "OutfeedDequeue", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Combines (nests of) input elements into a dataset of (nests of) windows. +// +// A "window" is a finite dataset of flat elements of size `size` (or possibly +// fewer if there are not enough input elements to fill the window and +// `drop_remainder` evaluates to false). +// +// The `shift` argument determines the number of input elements by which +// the window moves on each iteration. The first element in the `k`th window +// will be element +// +// ``` +// 1 + (k-1) * shift +// ``` +// +// of the input dataset. In particular, the first element of the first window +// will always be the first element of the input dataset. +// +// If the `stride` parameter is greater than 1, then each window will skip +// `(stride - 1)` input elements between each element that appears in the +// window. Output windows will still contain `size` elements regardless of +// the value of `stride`. +// +// The `stride` argument determines the stride of the input elements, and the +// `shift` argument determines the shift of the window. +// +// For example, letting `{...}` to represent a Dataset: +// +// - `tf.data.Dataset.range(7).window(2)` produces +// `{{0, 1}, {2, 3}, {4, 5}, {6}}` +// - `tf.data.Dataset.range(7).window(3, 2, 1, True)` produces +// `{{0, 1, 2}, {2, 3, 4}, {4, 5, 6}}` +// - `tf.data.Dataset.range(7).window(3, 1, 2, True)` produces +// `{{0, 2, 4}, {1, 3, 5}, {2, 4, 6}}` +// +// Note that when the `window` transformation is applied to a dataset of +// nested elements, it produces a dataset of nested windows. +// +// For example: +// +// - `tf.data.Dataset.from_tensor_slices((range(4), range(4))).window(2)` +// produces `{({0, 1}, {0, 1}), ({2, 3}, {2, 3})}` +// - `tf.data.Dataset.from_tensor_slices({"a": range(4)}).window(2)` +// produces `{{"a": {0, 1}}, {"a": {2, 3}}}` +// +// Arguments: +// +// size: An integer scalar, representing the number of elements +// of the input dataset to combine into a window. Must be positive. +// shift: An integer scalar, representing the number of input elements +// by which the window moves in each iteration. Defaults to `size`. +// Must be positive. +// stride: An integer scalar, representing the stride of the input elements +// in the sliding window. Must be positive. The default value of 1 means +// "retain every input element". +// drop_remainder: A Boolean scalar, representing whether the last window should be +// dropped if its size is smaller than `window_size`. +// +// +func WindowDataset(scope *Scope, input_dataset tf.Output, size tf.Output, shift tf.Output, stride tf.Output, drop_remainder tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "WindowDataset", + Input: []tf.Input{ + input_dataset, size, shift, stride, drop_remainder, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// SetSizeAttr is an optional argument to SetSize. +type SetSizeAttr func(optionalAttr) + +// SetSizeValidateIndices sets the optional validate_indices attribute to value. +// If not specified, defaults to true +func SetSizeValidateIndices(value bool) SetSizeAttr { + return func(m optionalAttr) { + m["validate_indices"] = value + } +} + +// Number of unique elements along last dimension of input `set`. +// +// Input `set` is a `SparseTensor` represented by `set_indices`, `set_values`, +// and `set_shape`. The last dimension contains values in a set, duplicates are +// allowed but ignored. +// +// If `validate_indices` is `True`, this op validates the order and range of `set` +// indices. +// +// Arguments: +// set_indices: 2D `Tensor`, indices of a `SparseTensor`. +// set_values: 1D `Tensor`, values of a `SparseTensor`. +// set_shape: 1D `Tensor`, shape of a `SparseTensor`. +// +// Returns For `set` ranked `n`, this is a `Tensor` with rank `n-1`, and the same 1st +// `n-1` dimensions as `set`. Each value is the number of unique elements in +// the corresponding `[0...n-1]` dimension of `set`. +func SetSize(scope *Scope, set_indices tf.Output, set_values tf.Output, set_shape tf.Output, optional ...SetSizeAttr) (size tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "SetSize", + Input: []tf.Input{ + set_indices, set_values, set_shape, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// AutoShardDatasetAttr is an optional argument to AutoShardDataset. +type AutoShardDatasetAttr func(optionalAttr) + +// AutoShardDatasetAutoShardPolicy sets the optional auto_shard_policy attribute to value. +// If not specified, defaults to 0 +func AutoShardDatasetAutoShardPolicy(value int64) AutoShardDatasetAttr { + return func(m optionalAttr) { + m["auto_shard_policy"] = value + } +} + +// Creates a dataset that shards the input dataset. +// +// Creates a dataset that shards the input dataset by num_workers, returning a +// sharded dataset for the index-th worker. This attempts to automatically shard +// a dataset by examining the Dataset graph and inserting a shard op before the +// inputs to a reader Dataset (e.g. CSVDataset, TFRecordDataset). +// +// This dataset will throw a NotFound error if we cannot shard the dataset +// automatically. +// +// Arguments: +// input_dataset: A variant tensor representing the input dataset. +// num_workers: A scalar representing the number of workers to distribute this dataset across. +// index: A scalar representing the index of the current worker out of num_workers. +// +// +func AutoShardDataset(scope *Scope, input_dataset tf.Output, num_workers tf.Output, index tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...AutoShardDatasetAttr) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "AutoShardDataset", + Input: []tf.Input{ + input_dataset, num_workers, index, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// DecodeJpegAttr is an optional argument to DecodeJpeg. +type DecodeJpegAttr func(optionalAttr) + +// DecodeJpegChannels sets the optional channels attribute to value. +// +// value: Number of color channels for the decoded image. +// If not specified, defaults to 0 +func DecodeJpegChannels(value int64) DecodeJpegAttr { + return func(m optionalAttr) { + m["channels"] = value + } +} + +// DecodeJpegRatio sets the optional ratio attribute to value. +// +// value: Downscaling ratio. +// If not specified, defaults to 1 +func DecodeJpegRatio(value int64) DecodeJpegAttr { + return func(m optionalAttr) { + m["ratio"] = value + } +} + +// DecodeJpegFancyUpscaling sets the optional fancy_upscaling attribute to value. +// +// value: If true use a slower but nicer upscaling of the +// chroma planes (yuv420/422 only). +// If not specified, defaults to true +func DecodeJpegFancyUpscaling(value bool) DecodeJpegAttr { + return func(m optionalAttr) { + m["fancy_upscaling"] = value + } +} + +// DecodeJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value. +// +// value: If true try to recover an image from truncated input. +// If not specified, defaults to false +func DecodeJpegTryRecoverTruncated(value bool) DecodeJpegAttr { + return func(m optionalAttr) { + m["try_recover_truncated"] = value + } +} + +// DecodeJpegAcceptableFraction sets the optional acceptable_fraction attribute to value. +// +// value: The minimum required fraction of lines before a truncated +// input is accepted. +// If not specified, defaults to 1 +func DecodeJpegAcceptableFraction(value float32) DecodeJpegAttr { + return func(m optionalAttr) { + m["acceptable_fraction"] = value + } +} + +// DecodeJpegDctMethod sets the optional dct_method attribute to value. +// +// value: string specifying a hint about the algorithm used for +// decompression. Defaults to "" which maps to a system-specific +// default. Currently valid values are ["INTEGER_FAST", +// "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal +// jpeg library changes to a version that does not have that specific +// option.) +// If not specified, defaults to "" +func DecodeJpegDctMethod(value string) DecodeJpegAttr { + return func(m optionalAttr) { + m["dct_method"] = value + } +} + +// Decode a JPEG-encoded image to a uint8 tensor. +// +// The attr `channels` indicates the desired number of color channels for the +// decoded image. +// +// Accepted values are: +// +// * 0: Use the number of channels in the JPEG-encoded image. +// * 1: output a grayscale image. +// * 3: output an RGB image. +// +// If needed, the JPEG-encoded image is transformed to match the requested number +// of color channels. +// +// The attr `ratio` allows downscaling the image by an integer factor during +// decoding. Allowed values are: 1, 2, 4, and 8. This is much faster than +// downscaling the image later. +// +// +// This op also supports decoding PNGs and non-animated GIFs since the interface is +// the same, though it is cleaner to use `tf.io.decode_image`. +// +// Arguments: +// contents: 0-D. The JPEG-encoded image. +// +// Returns 3-D with shape `[height, width, channels]`.. +func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (image tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "DecodeJpeg", + Input: []tf.Input{ + contents, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns the number of nonzeroes of `sparse_matrix`. +// +// Arguments: +// sparse_matrix: A CSRSparseMatrix. +// +// Returns The number of nonzeroes of `sparse_matrix`. +func SparseMatrixNNZ(scope *Scope, sparse_matrix tf.Output) (nnz tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseMatrixNNZ", + Input: []tf.Input{ + sparse_matrix, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Enqueue a Tensor on the computation outfeed. +// +// Arguments: +// input: A tensor that will be inserted into the outfeed queue. +// +// Returns the created operation. +func OutfeedEnqueue(scope *Scope, input tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "OutfeedEnqueue", + Input: []tf.Input{ + input, + }, + } + return scope.AddOperation(opspec) +} + +// Checks a tensor for NaN and Inf values. +// +// When run, reports an `InvalidArgument` error if `tensor` has any values +// that are not a number (NaN) or infinity (Inf). Otherwise, passes `tensor` as-is. +// +// Arguments: +// +// message: Prefix of the error message. +func CheckNumerics(scope *Scope, tensor tf.Output, message string) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"message": message} + opspec := tf.OpSpec{ + Type: "CheckNumerics", + Input: []tf.Input{ + tensor, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Broadcast an array for a compatible shape. +// +// Broadcasting is the process of making arrays to have compatible shapes +// for arithmetic operations. Two shapes are compatible if for each +// dimension pair they are either equal or one of them is one. When trying +// to broadcast a Tensor to a shape, it starts with the trailing dimensions, +// and works its way forward. +// +// For example, +// +// >>> x = tf.constant([1, 2, 3]) +// >>> y = tf.broadcast_to(x, [3, 3]) +// >>> print(y) +// tf.Tensor( +// [[1 2 3] +// [1 2 3] +// [1 2 3]], shape=(3, 3), dtype=int32) +// +// In the above example, the input Tensor with the shape of `[1, 3]` +// is broadcasted to output Tensor with shape of `[3, 3]`. +// +// When doing broadcasted operations such as multiplying a tensor +// by a scalar, broadcasting (usually) confers some time or space +// benefit, as the broadcasted tensor is never materialized. +// +// However, `broadcast_to` does not carry with it any such benefits. +// The newly-created tensor takes the full memory of the broadcasted +// shape. (In a graph context, `broadcast_to` might be fused to +// subsequent operation and then be optimized away, however.) +// +// Arguments: +// input: A Tensor to broadcast. +// shape: An 1-D `int` Tensor. The shape of the desired output. +// +// Returns A Tensor. +func BroadcastTo(scope *Scope, input tf.Output, shape tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "BroadcastTo", + Input: []tf.Input{ + input, shape, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Make all elements in the non-Batch dimension unique, but \"close\" to +// +// their initial value. Never returns a sub-normal number. Never returns +// zero. The sign of each input element is always identical to the sign +// of the corresponding output element. Behavior for infinite elements is +// undefined. Behavior for subnormal elements is undefined. +func MakeUnique(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "MakeUnique", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// RandomUniformAttr is an optional argument to RandomUniform. +type RandomUniformAttr func(optionalAttr) + +// RandomUniformSeed sets the optional seed attribute to value. +// +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomUniformSeed(value int64) RandomUniformAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// RandomUniformSeed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomUniformSeed2(value int64) RandomUniformAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Outputs random values from a uniform distribution. +// +// The generated values follow a uniform distribution in the range `[0, 1)`. The +// lower bound 0 is included in the range, while the upper bound 1 is excluded. +// +// Arguments: +// shape: The shape of the output tensor. +// dtype: The type of the output. +// +// Returns A tensor of the specified shape filled with uniform random values. +func RandomUniform(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomUniformAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RandomUniform", + Input: []tf.Input{ + shape, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr is an optional argument to QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize. +type QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr func(optionalAttr) + +// QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeOutType sets the optional out_type attribute to value. +// +// value: The type of the output. +// If not specified, defaults to DT_QUINT8 +func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeOutType(value tf.DataType) QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr { + return func(m optionalAttr) { + m["out_type"] = value + } +} + +// QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeDilations sets the optional dilations attribute to value. +// +// value: List of dilation values. +// If not specified, defaults to +func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeDilations(value []int64) QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr { + return func(m optionalAttr) { + m["dilations"] = value + } +} + +// QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizePaddingList sets the optional padding_list attribute to value. +// If not specified, defaults to <> +func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizePaddingList(value []int64) QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr { + return func(m optionalAttr) { + m["padding_list"] = value + } +} + +// Computes quantized depthwise Conv2D with Bias, Relu and Requantize. +// +// Arguments: +// input: The original input tensor. +// filter: The original filter tensor. +// bias: The original bias tensor. +// min_input: The float value that the minimum quantized input value represents. +// max_input: The float value that the maximum quantized input value represents. +// min_filter: The float value that the minimum quantized filter value represents. +// max_filter: The float value that the maximum quantized filter value represents. +// min_freezed_output: The minimum float value of the output tensor. +// max_freezed_output: The maximum float value of the output tensor. +// strides: List of stride values. +// +// +// Returns: +// output: The output tensor. +// min_output: The float value that the minimum quantized output value represents. +// max_output: The float value that the maximum quantized output value represents. +func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize(scope *Scope, input tf.Output, filter tf.Output, bias tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, min_freezed_output tf.Output, max_freezed_output tf.Output, strides []int64, padding string, optional ...QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize", + Input: []tf.Input{ + input, filter, bias, min_input, max_input, min_filter, max_filter, min_freezed_output, max_freezed_output, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// Removes keys and its associated values from a table. +// +// The tensor `keys` must of the same type as the keys of the table. Keys not +// already in the table are silently ignored. +// +// Arguments: +// table_handle: Handle to the table. +// keys: Any shape. Keys of the elements to remove. +// +// Returns the created operation. +func LookupTableRemoveV2(scope *Scope, table_handle tf.Output, keys tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "LookupTableRemoveV2", + Input: []tf.Input{ + table_handle, keys, + }, + } + return scope.AddOperation(opspec) +} + +// NotEqualAttr is an optional argument to NotEqual. +type NotEqualAttr func(optionalAttr) + +// NotEqualIncompatibleShapeError sets the optional incompatible_shape_error attribute to value. +// If not specified, defaults to true +func NotEqualIncompatibleShapeError(value bool) NotEqualAttr { + return func(m optionalAttr) { + m["incompatible_shape_error"] = value + } +} + +// Returns the truth value of (x != y) element-wise. +// +// *NOTE*: `NotEqual` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func NotEqual(scope *Scope, x tf.Output, y tf.Output, optional ...NotEqualAttr) (z tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "NotEqual", + Input: []tf.Input{ + x, y, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Concatenates quantized tensors along one dimension. +// +// Arguments: +// concat_dim: 0-D. The dimension along which to concatenate. Must be in the +// range [0, rank(values)). +// values: The `N` Tensors to concatenate. Their ranks and types must match, +// and their sizes must match in all dimensions except `concat_dim`. +// input_mins: The minimum scalar values for each of the input tensors. +// input_maxes: The maximum scalar values for each of the input tensors. +// +// Returns: +// output: A `Tensor` with the concatenation of values stacked along the +// `concat_dim` dimension. This tensor's shape matches that of `values` except +// in `concat_dim` where it has the sum of the sizes. +// output_min: The float value that the minimum quantized output value represents. +// output_max: The float value that the maximum quantized output value represents. +func QuantizedConcat(scope *Scope, concat_dim tf.Output, values []tf.Output, input_mins []tf.Output, input_maxes []tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "QuantizedConcat", + Input: []tf.Input{ + concat_dim, tf.OutputList(values), tf.OutputList(input_mins), tf.OutputList(input_maxes), + }, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// Returns the batched diagonal part of a batched tensor. +// +// Returns a tensor with the `k[0]`-th to `k[1]`-th diagonals of the batched +// `input`. +// +// Assume `input` has `r` dimensions `[I, J, ..., L, M, N]`. +// Let `max_diag_len` be the maximum length among all diagonals to be extracted, +// `max_diag_len = min(M + min(k[1], 0), N + min(-k[0], 0))` +// Let `num_diags` be the number of diagonals to extract, +// `num_diags = k[1] - k[0] + 1`. +// +// If `num_diags == 1`, the output tensor is of rank `r - 1` with shape +// `[I, J, ..., L, max_diag_len]` and values: +// +// ``` +// diagonal[i, j, ..., l, n] +// = input[i, j, ..., l, n+y, n+x] ; if 0 <= n+y < M and 0 <= n+x < N, +// padding_value ; otherwise. +// ``` +// where `y = max(-k[1], 0)`, `x = max(k[1], 0)`. +// +// Otherwise, the output tensor has rank `r` with dimensions +// `[I, J, ..., L, num_diags, max_diag_len]` with values: +// +// ``` +// diagonal[i, j, ..., l, m, n] +// = input[i, j, ..., l, n+y, n+x] ; if 0 <= n+y < M and 0 <= n+x < N, +// padding_value ; otherwise. +// ``` +// where `d = k[1] - m`, `y = max(-d, 0)`, and `x = max(d, 0)`. +// +// The input must be at least a matrix. +// +// For example: +// +// ``` +// input = np.array([[[1, 2, 3, 4], # Input shape: (2, 3, 4) +// [5, 6, 7, 8], +// [9, 8, 7, 6]], +// [[5, 4, 3, 2], +// [1, 2, 3, 4], +// [5, 6, 7, 8]]]) +// +// # A main diagonal from each batch. +// tf.matrix_diag_part(input) ==> [[1, 6, 7], # Output shape: (2, 3) +// [5, 2, 7]] +// +// # A superdiagonal from each batch. +// tf.matrix_diag_part(input, k = 1) +// ==> [[2, 7, 6], # Output shape: (2, 3) +// [4, 3, 8]] +// +// # A tridiagonal band from each batch. +// tf.matrix_diag_part(input, k = (-1, 1)) +// ==> [[[2, 7, 6], # Output shape: (2, 3, 3) +// [1, 6, 7], +// [5, 8, 0]], +// [[4, 3, 8], +// [5, 2, 7], +// [1, 6, 0]]] +// +// # Padding value = 9 +// tf.matrix_diag_part(input, k = (1, 3), padding_value = 9) +// ==> [[[4, 9, 9], # Output shape: (2, 3, 3) +// [3, 8, 9], +// [2, 7, 6]], +// [[2, 9, 9], +// [3, 4, 9], +// [4, 3, 8]]] +// ``` +// +// Arguments: +// input: Rank `r` tensor where `r >= 2`. +// k: Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main +// diagonal, and negative value means subdiagonals. `k` can be a single integer +// (for a single diagonal) or a pair of integers specifying the low and high ends +// of a matrix band. `k[0]` must not be larger than `k[1]`. +// padding_value: The value to fill the area outside the specified diagonal band with. +// Default is 0. +// +// Returns The extracted diagonal(s). +func MatrixDiagPartV2(scope *Scope, input tf.Output, k tf.Output, padding_value tf.Output) (diagonal tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "MatrixDiagPartV2", + Input: []tf.Input{ + input, k, padding_value, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns x / y element-wise. +// +// *NOTE*: `Div` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Div(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Div", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug. +type LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr func(optionalAttr) + +// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 +func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["table_id"] = value + } +} + +// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugConfig sets the optional config attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["config"] = value + } +} + +// Load proximal Adagrad embedding parameters with debug support. +// +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. +// +// Arguments: +// parameters: Value of parameters used in the proximal Adagrad optimization algorithm. +// accumulators: Value of accumulators used in the proximal Adagrad optimization algorithm. +// gradient_accumulators: Value of gradient_accumulators used in the proximal Adagrad optimization algorithm. +// +// +// +// Returns the created operation. +func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug", + Input: []tf.Input{ + parameters, accumulators, gradient_accumulators, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// FusedResizeAndPadConv2DAttr is an optional argument to FusedResizeAndPadConv2D. +type FusedResizeAndPadConv2DAttr func(optionalAttr) + +// FusedResizeAndPadConv2DResizeAlignCorners sets the optional resize_align_corners attribute to value. +// +// value: If true, the centers of the 4 corner pixels of the input and output tensors are +// aligned, preserving the values at the corner pixels. Defaults to false. +// If not specified, defaults to false +func FusedResizeAndPadConv2DResizeAlignCorners(value bool) FusedResizeAndPadConv2DAttr { + return func(m optionalAttr) { + m["resize_align_corners"] = value + } +} + +// Performs a resize and padding as a preprocess during a convolution. +// +// It's often possible to do spatial transformations more efficiently as part of +// the packing stage of a convolution, so this op allows for an optimized +// implementation where these stages are fused together. This prevents the need to +// write out the intermediate results as whole tensors, reducing memory pressure, +// and we can get some latency gains by merging the transformation calculations. +// The data_format attribute for Conv2D isn't supported by this op, and defaults to +// 'NHWC' order. +// Internally this op uses a single per-graph scratch buffer, which means that it +// will block if multiple versions are being run in parallel. This is because this +// operator is primarily an optimization to minimize memory usage. +// +// Arguments: +// input: 4-D with shape `[batch, in_height, in_width, in_channels]`. +// size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The +// new size for the images. +// paddings: A two-column matrix specifying the padding sizes. The number of +// rows must be the same as the rank of `input`. +// filter: 4-D with shape +// `[filter_height, filter_width, in_channels, out_channels]`. +// +// strides: 1-D of length 4. The stride of the sliding window for each dimension +// of `input`. Must be in the same order as the dimension specified with format. +// padding: The type of padding algorithm to use. +func FusedResizeAndPadConv2D(scope *Scope, input tf.Output, size tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string, optional ...FusedResizeAndPadConv2DAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "FusedResizeAndPadConv2D", + Input: []tf.Input{ + input, size, paddings, filter, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// EnqueueTPUEmbeddingRaggedTensorBatchAttr is an optional argument to EnqueueTPUEmbeddingRaggedTensorBatch. +type EnqueueTPUEmbeddingRaggedTensorBatchAttr func(optionalAttr) + +// EnqueueTPUEmbeddingRaggedTensorBatchDeviceOrdinal sets the optional device_ordinal attribute to value. +// +// value: The TPU device to use. Should be >= 0 and less than the number +// of TPU cores in the task on which the node is placed. +// If not specified, defaults to -1 +func EnqueueTPUEmbeddingRaggedTensorBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingRaggedTensorBatchAttr { + return func(m optionalAttr) { + m["device_ordinal"] = value + } +} + +// EnqueueTPUEmbeddingRaggedTensorBatchCombiners sets the optional combiners attribute to value. +// +// value: A list of string scalars, one for each embedding table that specify +// how to normalize the embedding activations after weighted summation. +// Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have +// the sum of the weights be 0 for 'mean' or the sum of the squared weights be +// 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for +// all tables. +// If not specified, defaults to <> +func EnqueueTPUEmbeddingRaggedTensorBatchCombiners(value []string) EnqueueTPUEmbeddingRaggedTensorBatchAttr { + return func(m optionalAttr) { + m["combiners"] = value + } +} + +// EnqueueTPUEmbeddingRaggedTensorBatchMaxSequenceLengths sets the optional max_sequence_lengths attribute to value. +// If not specified, defaults to <> +func EnqueueTPUEmbeddingRaggedTensorBatchMaxSequenceLengths(value []int64) EnqueueTPUEmbeddingRaggedTensorBatchAttr { + return func(m optionalAttr) { + m["max_sequence_lengths"] = value + } +} + +// Eases the porting of code that uses tf.nn.embedding_lookup(). +// +// sample_splits[i], embedding_indices[i] and aggregation_weights[i] correspond +// to the ith feature. table_ids[i] indicates which embedding table to look up ith +// feature. +// +// The tensors at corresponding positions in two of the input lists, +// embedding_indices and aggregation_weights, must have the same shape, i.e. rank 1 +// with dim_size() equal to the total number of lookups into the table described by +// the corresponding feature. +// +// Arguments: +// sample_splits: A list of rank 1 Tensors specifying the break points for splitting +// embedding_indices and aggregation_weights into rows. +// It corresponds to ids.row_splits in embedding_lookup(), when ids is a +// RaggedTensor. +// embedding_indices: A list of rank 1 Tensors, indices into the embedding tables. +// It corresponds to ids.values in embedding_lookup(), when ids is a RaggedTensor. +// aggregation_weights: A list of rank 1 Tensors containing per training example +// aggregation weights. It corresponds to the values field of a RaggedTensor +// with the same row_splits as ids in embedding_lookup(), when ids is a +// RaggedTensor. +// mode_override: A string input that overrides the mode specified in the +// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference', +// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set +// in TPUEmbeddingConfiguration is used, otherwise mode_override is used. +// table_ids: A list of integers specifying the identifier of the embedding table +// (offset of TableDescriptor in the TPUEmbeddingConfiguration) to lookup the +// corresponding input. The ith input is looked up using table_ids[i]. The size +// of the table_ids list must be equal to that of sample_indices, +// embedding_indices and aggregation_weights. +// +// Returns the created operation. +func EnqueueTPUEmbeddingRaggedTensorBatch(scope *Scope, sample_splits []tf.Output, embedding_indices []tf.Output, aggregation_weights []tf.Output, mode_override tf.Output, table_ids []int64, optional ...EnqueueTPUEmbeddingRaggedTensorBatchAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"table_ids": table_ids} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "EnqueueTPUEmbeddingRaggedTensorBatch", + Input: []tf.Input{ + tf.OutputList(sample_splits), tf.OutputList(embedding_indices), tf.OutputList(aggregation_weights), mode_override, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + // Creates and returns an empty tensor list. // // All list elements must be tensors of dtype element_dtype and shape compatible @@ -49770,45 +49909,153 @@ func TPUReplicatedOutput(scope *Scope, input tf.Output, num_replicas int64) (out return outputs } -// Removes keys and its associated values from a table. +// Scatter `updates` into a new tensor according to `indices`. // -// The tensor `keys` must of the same type as the keys of the table. Keys not -// already in the table are silently ignored. +// Creates a new tensor by applying sparse `updates` to individual values or +// slices within a tensor (initially zero for numeric, empty for string) of +// the given `shape` according to indices. This operator is the inverse of the +// `tf.gather_nd` operator which extracts values or slices from a given tensor. +// +// This operation is similar to tensor_scatter_add, except that the tensor is +// zero-initialized. Calling `tf.scatter_nd(indices, values, shape)` is identical +// to `tensor_scatter_add(tf.zeros(shape, values.dtype), indices, values)` +// +// If `indices` contains duplicates, then their updates are accumulated (summed). +// +// **WARNING**: The order in which updates are applied is nondeterministic, so the +// output will be nondeterministic if `indices` contains duplicates -- because +// of some numerical approximation issues, numbers summed in different order +// may yield different results. +// +// `indices` is an integer tensor containing indices into a new tensor of shape +// `shape`. The last dimension of `indices` can be at most the rank of `shape`: +// +// indices.shape[-1] <= shape.rank +// +// The last dimension of `indices` corresponds to indices into elements +// (if `indices.shape[-1] = shape.rank`) or slices +// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of +// `shape`. `updates` is a tensor with shape +// +// indices.shape[:-1] + shape[indices.shape[-1]:] +// +// The simplest form of scatter is to insert individual elements in a tensor by +// index. For example, say we want to insert 4 scattered elements in a rank-1 +// tensor with 8 elements. +// +//
+// +//
+// +// In Python, this scatter operation would look like this: +// +// ```python +// indices = tf.constant([[4], [3], [1], [7]]) +// updates = tf.constant([9, 10, 11, 12]) +// shape = tf.constant([8]) +// scatter = tf.scatter_nd(indices, updates, shape) +// print(scatter) +// ``` +// +// The resulting tensor would look like this: +// +// [0, 11, 0, 10, 9, 0, 0, 12] +// +// We can also, insert entire slices of a higher rank tensor all at once. For +// example, if we wanted to insert two slices in the first dimension of a +// rank-3 tensor with two matrices of new values. +// +//
+// +//
+// +// In Python, this scatter operation would look like this: +// +// ```python +// indices = tf.constant([[0], [2]]) +// updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6], +// [7, 7, 7, 7], [8, 8, 8, 8]], +// [[5, 5, 5, 5], [6, 6, 6, 6], +// [7, 7, 7, 7], [8, 8, 8, 8]]]) +// shape = tf.constant([4, 4, 4]) +// scatter = tf.scatter_nd(indices, updates, shape) +// print(scatter) +// ``` +// +// The resulting tensor would look like this: +// +// [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], +// [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], +// [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], +// [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]] +// +// Note that on CPU, if an out of bound index is found, an error is returned. +// On GPU, if an out of bound index is found, the index is ignored. // // Arguments: -// table_handle: Handle to the table. -// keys: Any shape. Keys of the elements to remove. +// indices: Index tensor. +// updates: Updates to scatter into output. +// shape: 1-D. The shape of the resulting tensor. // -// Returns the created operation. -func LookupTableRemoveV2(scope *Scope, table_handle tf.Output, keys tf.Output) (o *tf.Operation) { +// Returns A new tensor with the given shape and updates applied according +// to the indices. +func ScatterNd(scope *Scope, indices tf.Output, updates tf.Output, shape tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "LookupTableRemoveV2", + Type: "ScatterNd", Input: []tf.Input{ - table_handle, keys, + indices, updates, shape, }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// NotEqualAttr is an optional argument to NotEqual. -type NotEqualAttr func(optionalAttr) +// UniqueAttr is an optional argument to Unique. +type UniqueAttr func(optionalAttr) -// NotEqualIncompatibleShapeError sets the optional incompatible_shape_error attribute to value. -// If not specified, defaults to true -func NotEqualIncompatibleShapeError(value bool) NotEqualAttr { +// UniqueOutIdx sets the optional out_idx attribute to value. +// If not specified, defaults to DT_INT32 +func UniqueOutIdx(value tf.DataType) UniqueAttr { return func(m optionalAttr) { - m["incompatible_shape_error"] = value + m["out_idx"] = value } } -// Returns the truth value of (x != y) element-wise. +// Finds unique elements in a 1-D tensor. // -// *NOTE*: `NotEqual` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func NotEqual(scope *Scope, x tf.Output, y tf.Output, optional ...NotEqualAttr) (z tf.Output) { +// This operation returns a tensor `y` containing all of the unique elements of `x` +// sorted in the same order that they occur in `x`; `x` does not need to be sorted. +// This operation also returns a tensor `idx` the same size as `x` that contains +// the index of each value of `x` in the unique output `y`. In other words: +// +// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` +// +// Examples: +// +// ``` +// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] +// y, idx = unique(x) +// y ==> [1, 2, 4, 7, 8] +// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] +// ``` +// +// ``` +// # tensor 'x' is [4, 5, 1, 2, 3, 3, 4, 5] +// y, idx = unique(x) +// y ==> [4, 5, 1, 2, 3] +// idx ==> [0, 1, 2, 3, 4, 4, 0, 1] +// ``` +// +// Arguments: +// x: 1-D. +// +// Returns: +// y: 1-D. +// idx: 1-D. +func Unique(scope *Scope, x tf.Output, optional ...UniqueAttr) (y tf.Output, idx tf.Output) { if scope.Err() != nil { return } @@ -49817,12 +50064,40 @@ func NotEqual(scope *Scope, x tf.Output, y tf.Output, optional ...NotEqualAttr) a(attrs) } opspec := tf.OpSpec{ - Type: "NotEqual", + Type: "Unique", Input: []tf.Input{ - x, y, + x, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) +} + +// Converts a `RaggedTensor` into a `SparseTensor` with the same values. +// +// input=ragged.from_nested_row_splits(rt_dense_values, rt_nested_splits) +// output=SparseTensor(indices=sparse_indices, values=sparse_values, +// dense_shape=sparse_dense_shape) +// +// Arguments: +// rt_nested_splits: The `row_splits` for the `RaggedTensor`. +// rt_dense_values: The `flat_values` for the `RaggedTensor`. +// +// Returns: +// sparse_indices: The indices for the `SparseTensor`. +// sparse_values: The values of the `SparseTensor`. +// sparse_dense_shape: `sparse_dense_shape` is a tight bounding box of the input `RaggedTensor`. +func RaggedTensorToSparse(scope *Scope, rt_nested_splits []tf.Output, rt_dense_values tf.Output) (sparse_indices tf.Output, sparse_values tf.Output, sparse_dense_shape tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "RaggedTensorToSparse", + Input: []tf.Input{ + tf.OutputList(rt_nested_splits), rt_dense_values, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) } From 9be4774701e1d5032e0831ac82afc143ae1251f7 Mon Sep 17 00:00:00 2001 From: Jonathan Chu Date: Mon, 27 Jul 2020 23:11:03 +0000 Subject: [PATCH 1417/2522] Cache hashable input signature for _cache_key --- tensorflow/python/eager/function.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index f86e2889f3d..6662105cbeb 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -2902,6 +2902,8 @@ class Function(object): self._function_attributes = attributes or {} self._capture_by_value = capture_by_value self.tracing_count = 0 + self._hashable_input_signature = _make_input_signature_hashable( + self.flat_input_signature) self._lock = threading.Lock() # _descriptor_cache is a of instance of a class to an instance-specific @@ -2940,6 +2942,11 @@ class Function(object): """Returns the flattened input signature.""" return self._function_spec.flat_input_signature + @property + def hashable_input_signature(self): + """Returns a cached hashable object for the flattened input signature.""" + return self._hashable_input_signature + def _get_concrete_function_internal_garbage_collected(self, *args, **kwargs): """Returns a concrete function which cleans up its graph function.""" if self.input_signature: @@ -3072,10 +3079,11 @@ class Function(object): inputs = (args, kwargs) if kwargs else args input_signature = pywrap_tfe.TFE_Py_EncodeArg(inputs, include_tensor_ranks_only) + hashable_input_signature = _make_input_signature_hashable(input_signature) else: del args, kwargs assert not include_tensor_ranks_only - input_signature = self.flat_input_signature + hashable_input_signature = self.hashable_input_signature ctx = context.context() @@ -3144,9 +3152,9 @@ class Function(object): save_context.get_save_options().experimental_variable_policy) else: variable_policy = save_options.VariablePolicy.EXPAND_DISTRIBUTED_VARIABLES - + return CacheKey( - _make_input_signature_hashable(input_signature), parent_graph, + hashable_input_signature, parent_graph, device_functions, colocation_stack, in_cross_replica_context, variable_policy, xla_context_id) From a616fc34ebdb0bd2438f9cb8854611f9f11322e0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 16:10:58 -0700 Subject: [PATCH 1418/2522] Adds support for string separators to AddJoinedString Also fixes a bug where an initial empty string would not generate a separator. PiperOrigin-RevId: 323462049 Change-Id: I99703ff6b641bcca1b1a72b234ab4766b05a5ae5 --- RELEASE.md | 2 ++ tensorflow/lite/string_util.cc | 27 +++++++++++-------- tensorflow/lite/string_util.h | 2 ++ tensorflow/lite/string_util_test.cc | 42 +++++++++++++++++++++++------ 4 files changed, 54 insertions(+), 19 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 6c7562a80df..7182846a805 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -86,6 +86,8 @@ True, the function may use type annotations to optimize the tracing performance. * `tf.lite`: + * `DynamicBuffer::AddJoinedString()` will now add a separator if the first + string to be joined is empty. * * `tf.random`: * diff --git a/tensorflow/lite/string_util.cc b/tensorflow/lite/string_util.cc index 44719858f2a..799a850a0d4 100644 --- a/tensorflow/lite/string_util.cc +++ b/tensorflow/lite/string_util.cc @@ -35,27 +35,32 @@ void DynamicBuffer::AddString(const StringRef& string) { void DynamicBuffer::AddJoinedString(const std::vector& strings, char separator) { + StringRef ref; + ref.str = &separator; + ref.len = 1; + AddJoinedString(strings, ref); +} + +void DynamicBuffer::AddJoinedString(const std::vector& strings, + StringRef separator) { // Resize the data buffer. - int total_len = strings.size() - 1; + int total_len = (strings.size() - 1) * separator.len; for (StringRef ref : strings) { total_len += ref.len; } data_.resize(data_.size() + total_len); - int current_idx = 0; - for (StringRef ref : strings) { - char* dst = data_.data() + offset_.back() + current_idx; - + char* dst = data_.data() + offset_.back(); + for (int i = 0; i < strings.size(); ++i) { // Fill separator if not first string. - if (current_idx != 0) { - *dst = separator; - ++dst; - ++current_idx; + if (i != 0) { + memcpy(dst, separator.str, separator.len); + dst += separator.len; } // Fill content of the string. - memcpy(dst, ref.str, ref.len); - current_idx += ref.len; + memcpy(dst, strings[i].str, strings[i].len); + dst += strings[i].len; } offset_.push_back(offset_.back() + total_len); } diff --git a/tensorflow/lite/string_util.h b/tensorflow/lite/string_util.h index 879aa76b83b..2086f9badbf 100644 --- a/tensorflow/lite/string_util.h +++ b/tensorflow/lite/string_util.h @@ -69,6 +69,8 @@ class DynamicBuffer { // Join a list of string with separator, and add as a single string to the // buffer. void AddJoinedString(const std::vector& strings, char separator); + void AddJoinedString(const std::vector& strings, + StringRef separator); // Fill content into a buffer and returns the number of bytes stored. // The function allocates space for the buffer but does NOT take ownership. diff --git a/tensorflow/lite/string_util_test.cc b/tensorflow/lite/string_util_test.cc index 28d93840c56..d5c4909fcad 100644 --- a/tensorflow/lite/string_util_test.cc +++ b/tensorflow/lite/string_util_test.cc @@ -97,27 +97,53 @@ TEST(StringUtil, TestStringUtil) { ASSERT_EQ(t2->bytes, 15); } -TEST(StringUtil, TestAddJoinedString) { +TEST(StringUtil, TestAddJoinedStringCharSeparator) { Interpreter interpreter; interpreter.AddTensors(1); TfLiteTensor* t0 = interpreter.tensor(0); t0->type = kTfLiteString; t0->allocation_type = kTfLiteDynamic; - char s0[] = "ABC"; - char s1[] = "DEFG"; - char s2[] = ""; - char s3[] = "XYZ"; + char s0[] = ""; + char s1[] = "ABC"; + char s2[] = "DEFG"; + char s3[] = ""; + char s4[] = "XYZ"; DynamicBuffer buf; - buf.AddJoinedString({{s0, 3}, {s1, 4}, {s2, 0}, {s3, 3}}, ' '); + buf.AddJoinedString({{s0, 0}, {s1, 3}, {s2, 4}, {s3, 0}, {s4, 3}}, ' '); buf.WriteToTensorAsVector(t0); ASSERT_EQ(GetStringCount(t0), 1); StringRef str_ref; str_ref = GetString(t0, 0); - ASSERT_EQ(string(str_ref.str, str_ref.len), "ABC DEFG XYZ"); - ASSERT_EQ(t0->bytes, 25); + ASSERT_EQ(string(str_ref.str, str_ref.len), " ABC DEFG XYZ"); + ASSERT_EQ(t0->bytes, 26); +} + +TEST(StringUtil, TestAddJoinedStringStringRefSeparator) { + Interpreter interpreter; + interpreter.AddTensors(1); + TfLiteTensor* t0 = interpreter.tensor(0); + t0->type = kTfLiteString; + t0->allocation_type = kTfLiteDynamic; + + char s[] = " - "; + char s0[] = ""; + char s1[] = "ABC"; + char s2[] = "DEFG"; + char s3[] = ""; + char s4[] = "XYZ"; + + DynamicBuffer buf; + buf.AddJoinedString({{s0, 0}, {s1, 3}, {s2, 4}, {s3, 0}, {s4, 3}}, {s, 3}); + buf.WriteToTensorAsVector(t0); + + ASSERT_EQ(GetStringCount(t0), 1); + StringRef str_ref; + str_ref = GetString(t0, 0); + ASSERT_EQ(string(str_ref.str, str_ref.len), " - ABC - DEFG - - XYZ"); + ASSERT_EQ(t0->bytes, 34); } TEST(StringUtil, TestEmptyList) { From 1e10e449aa0594a324d4ca239fafc841845a0a2d Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Mon, 27 Jul 2020 16:12:50 -0700 Subject: [PATCH 1419/2522] Remove the usage of TF private API image_ops.image_projective_transform_v2 from Keras. PiperOrigin-RevId: 323462404 Change-Id: Ic0c1ff99daed63daa5c40c9f18fc4221fd4585e3 --- .../python/keras/layers/preprocessing/image_preprocessing.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py index e9ec157de59..9b7772d02b1 100644 --- a/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py +++ b/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py @@ -33,6 +33,7 @@ from tensorflow.python.keras.utils import tf_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import gen_image_ops from tensorflow.python.ops import image_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import stateful_random_ops @@ -688,8 +689,8 @@ def transform(images, 'new_height, new_width, instead got ' '{}'.format(output_shape)) - return image_ops.image_projective_transform_v2( - images, + return gen_image_ops.ImageProjectiveTransformV2( + images=images, output_shape=output_shape, transforms=transforms, fill_mode=fill_mode.upper(), From 0f6544c932fb6739641c7fe2438401891b1a8b70 Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Mon, 27 Jul 2020 16:18:13 -0700 Subject: [PATCH 1420/2522] Remove the usage of TF private API init_ops._compute_fans from Keras. PiperOrigin-RevId: 323463488 Change-Id: If0c71609a7ac111651abf9b3ebd46bc790fe993d --- tensorflow/python/keras/initializers_test.py | 40 ++++++++++++++++---- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/keras/initializers_test.py b/tensorflow/python/keras/initializers_test.py index 3e4502f14fc..e254f6340fc 100644 --- a/tensorflow/python/keras/initializers_test.py +++ b/tensorflow/python/keras/initializers_test.py @@ -28,10 +28,36 @@ from tensorflow.python.keras import models from tensorflow.python.keras.engine import input_layer from tensorflow.python.keras.layers import core from tensorflow.python.ops import array_ops -from tensorflow.python.ops import init_ops from tensorflow.python.platform import test +def _compute_fans(shape): + """Computes the number of input and output units for a weight shape. + + Args: + shape: Integer shape tuple or TF tensor shape. + + Returns: + A tuple of integer scalars (fan_in, fan_out). + """ + if len(shape) < 1: # Just to avoid errors for constants. + fan_in = fan_out = 1 + elif len(shape) == 1: + fan_in = fan_out = shape[0] + elif len(shape) == 2: + fan_in = shape[0] + fan_out = shape[1] + else: + # Assuming convolution kernels (2D, 3D, or more). + # kernel shape: (..., input_depth, depth) + receptive_field_size = 1 + for dim in shape[:-2]: + receptive_field_size *= dim + fan_in = shape[-2] * receptive_field_size + fan_out = shape[-1] * receptive_field_size + return int(fan_in), int(fan_out) + + @combinations.generate(combinations.combine(mode=['graph', 'eager'])) class KerasInitializersTest(test.TestCase): @@ -88,7 +114,7 @@ class KerasInitializersTest(test.TestCase): def test_lecun_uniform(self): tensor_shape = (5, 6, 4, 2) with self.cached_session(): - fan_in, _ = init_ops._compute_fans(tensor_shape) + fan_in, _ = _compute_fans(tensor_shape) std = np.sqrt(1. / fan_in) self._runner( initializers.LecunUniformV2(seed=123), @@ -99,7 +125,7 @@ class KerasInitializersTest(test.TestCase): def test_glorot_uniform(self): tensor_shape = (5, 6, 4, 2) with self.cached_session(): - fan_in, fan_out = init_ops._compute_fans(tensor_shape) + fan_in, fan_out = _compute_fans(tensor_shape) std = np.sqrt(2. / (fan_in + fan_out)) self._runner( initializers.GlorotUniformV2(seed=123), @@ -110,7 +136,7 @@ class KerasInitializersTest(test.TestCase): def test_he_uniform(self): tensor_shape = (5, 6, 4, 2) with self.cached_session(): - fan_in, _ = init_ops._compute_fans(tensor_shape) + fan_in, _ = _compute_fans(tensor_shape) std = np.sqrt(2. / fan_in) self._runner( initializers.HeUniformV2(seed=123), @@ -121,7 +147,7 @@ class KerasInitializersTest(test.TestCase): def test_lecun_normal(self): tensor_shape = (5, 6, 4, 2) with self.cached_session(): - fan_in, _ = init_ops._compute_fans(tensor_shape) + fan_in, _ = _compute_fans(tensor_shape) std = np.sqrt(1. / fan_in) self._runner( initializers.LecunNormalV2(seed=123), @@ -132,7 +158,7 @@ class KerasInitializersTest(test.TestCase): def test_glorot_normal(self): tensor_shape = (5, 6, 4, 2) with self.cached_session(): - fan_in, fan_out = init_ops._compute_fans(tensor_shape) + fan_in, fan_out = _compute_fans(tensor_shape) std = np.sqrt(2. / (fan_in + fan_out)) self._runner( initializers.GlorotNormalV2(seed=123), @@ -143,7 +169,7 @@ class KerasInitializersTest(test.TestCase): def test_he_normal(self): tensor_shape = (5, 6, 4, 2) with self.cached_session(): - fan_in, _ = init_ops._compute_fans(tensor_shape) + fan_in, _ = _compute_fans(tensor_shape) std = np.sqrt(2. / fan_in) self._runner( initializers.HeNormalV2(seed=123), From dd2dbb0b32a1039cdeda30d09dc39e61a76ca473 Mon Sep 17 00:00:00 2001 From: Anna R Date: Mon, 27 Jul 2020 16:21:38 -0700 Subject: [PATCH 1421/2522] Disable //tensorflow/core/data/service:journal_test on windows since it is failing. PiperOrigin-RevId: 323464116 Change-Id: I55347346a5d06170243d7f0c7532268cc74e0193 --- tensorflow/core/data/service/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/data/service/BUILD b/tensorflow/core/data/service/BUILD index 2a23445a518..e4efcdde93b 100644 --- a/tensorflow/core/data/service/BUILD +++ b/tensorflow/core/data/service/BUILD @@ -145,6 +145,7 @@ cc_library( tf_cc_test( name = "journal_test", srcs = ["journal_test.cc"], + tags = ["no_windows"], # b/162268597 deps = [ ":common_proto_cc", ":journal", From 2edc77327649cc818d05a84a680135577dcd3300 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Mon, 27 Jul 2020 23:31:57 +0000 Subject: [PATCH 1422/2522] removed scalar summary tests from tensorflow/core --- tensorflow/c/kernels/BUILD | 15 ++- tensorflow/c/kernels/summary_op.cc | 2 +- tensorflow/c/kernels/summary_op_test.cc | 18 ++-- tensorflow/core/kernels/summary_op_test.cc | 105 --------------------- 4 files changed, 20 insertions(+), 120 deletions(-) diff --git a/tensorflow/c/kernels/BUILD b/tensorflow/c/kernels/BUILD index fb1c7deb054..d61b2817b40 100644 --- a/tensorflow/c/kernels/BUILD +++ b/tensorflow/c/kernels/BUILD @@ -28,16 +28,15 @@ tf_kernel_library( name = "summary_op", prefix = "summary_op", deps = [ - "//tensorflow/c/kernels:tensor_shape_utils", "//tensorflow/c:kernels", "//tensorflow/c:tf_tensor", "//tensorflow/c:tf_status", + "//tensorflow/c/kernels:tensor_shape_utils", "//tensorflow/core:framework", "//tensorflow/core:lib", ], ) - tf_gen_op_libs( op_lib_names = ["bitcast"], deps = [ @@ -81,15 +80,15 @@ tf_cc_test( "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", - "//tensorflow/core:testlib" + "//tensorflow/core:testlib", ], ) cc_library( name = "tensor_shape_utils", - srcs = ["tensor_shape_utils.cc",], - hdrs = ["tensor_shape_utils.h",], - deps = [ "//tensorflow/c:tf_tensor",], + srcs = ["tensor_shape_utils.cc"], + hdrs = ["tensor_shape_utils.h"], + deps = [ "//tensorflow/c:tf_tensor"], visibility = ["//visibility:private"], ) @@ -116,7 +115,7 @@ filegroup( name = "android_all_op_kernels", srcs = [ "bitcast_op.cc", - "summary_op.cc" + "summary_op.cc", ], ) # LINT.ThenChange(//tensorflow/contrib/makefile/tf_op_files.txt) @@ -125,6 +124,6 @@ filegroup( name = "android_all_ops", srcs = [ "ops/bitcast.cc", - "ops/summary.cc" + "ops/summary.cc", ], ) diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index 925521c7c73..3ce3a1256a9 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -78,7 +78,7 @@ void ScalarSummaryOp_Compute(void* kernel, TF_OpKernelContext* ctx) { } if (!IsSameSize(params.tags, params.values)) { std::ostringstream err; - err << "tags and values not the same shape: " + err << "tags and values are not the same shape: " << tensorflow::ShapeDebugString(params.tags) << " != " << tensorflow::ShapeDebugString(params.values) << SingleTag(params.tags); diff --git a/tensorflow/c/kernels/summary_op_test.cc b/tensorflow/c/kernels/summary_op_test.cc index b19c19ad225..8e05f32a3ed 100644 --- a/tensorflow/c/kernels/summary_op_test.cc +++ b/tensorflow/c/kernels/summary_op_test.cc @@ -53,7 +53,7 @@ void ExpectSummaryMatches(const Summary& actual, const string& expected_str) { } -void TestScalarSummaryOp(Tensor* tags, Tensor* values, string expected_summary, +void TestScalarSummaryOp(Tensor* tags, Tensor* values, string expected_output, error::Code expected_code) { // Initialize node used to fetch OpKernel Status status; @@ -92,9 +92,12 @@ void TestScalarSummaryOp(Tensor* tags, Tensor* values, string expected_summary, Summary summary; ASSERT_TRUE(ParseProtoUnlimited(&summary, ctx.mutable_output(0)-> scalar()())); - ExpectSummaryMatches(summary, expected_summary); - + ExpectSummaryMatches(summary, expected_output); } + else { + EXPECT_TRUE(absl::StrContains(ctx.status().ToString(), expected_output)) + << ctx.status(); + } } TEST(ScalarSummaryOpTest, SimpleFloat) { @@ -153,7 +156,8 @@ TEST(ScalarSummaryOpTest, Error_WrongDimsTags) { tags.matrix()(1, 0) = "tag2"; values.vec()(0) = 1.0f; values.vec()(1) = -2.0f; - TestScalarSummaryOp(&tags, &values, R"()", error::INVALID_ARGUMENT); + TestScalarSummaryOp(&tags, &values, "tags and values are not the same shape", + error::INVALID_ARGUMENT); } TEST(ScalarSummaryOpTest, Error_WrongValuesTags) { @@ -163,7 +167,8 @@ TEST(ScalarSummaryOpTest, Error_WrongValuesTags) { tags.vec()(1) = "tag2"; values.matrix()(0, 0) = 1.0f; values.matrix()(1, 0) = -2.0f; - TestScalarSummaryOp(&tags, &values, R"()", error::INVALID_ARGUMENT); + TestScalarSummaryOp(&tags, &values, "tags and values are not the same shape", + error::INVALID_ARGUMENT); } TEST(ScalarSummaryOpTest, Error_WrongWithSingleTag) { @@ -172,7 +177,8 @@ TEST(ScalarSummaryOpTest, Error_WrongWithSingleTag) { tags.vec()(0) = "tag1"; values.matrix()(0, 0) = 1.0f; values.matrix()(1, 0) = -2.0f; - TestScalarSummaryOp(&tags, &values, R"()", error::INVALID_ARGUMENT); + TestScalarSummaryOp(&tags, &values, "tags and values are not the same shape", + error::INVALID_ARGUMENT); } diff --git a/tensorflow/core/kernels/summary_op_test.cc b/tensorflow/core/kernels/summary_op_test.cc index 1e5089bdeab..9c9e87581c6 100644 --- a/tensorflow/core/kernels/summary_op_test.cc +++ b/tensorflow/core/kernels/summary_op_test.cc @@ -45,111 +45,6 @@ static void EXPECT_SummaryMatches(const Summary& actual, EXPECT_EQ(expected.DebugString(), actual.DebugString()); } -class SummaryScalarOpTest : public OpsTestBase { - protected: - void MakeOp(DataType dt) { - TF_ASSERT_OK(NodeDefBuilder("myop", "ScalarSummary") - .Input(FakeInput()) - .Input(FakeInput(dt)) - .Finalize(node_def())); - TF_ASSERT_OK(InitOp()); - } -}; - -TEST_F(SummaryScalarOpTest, SimpleFloat) { - MakeOp(DT_FLOAT); - - // Feed and run - AddInputFromArray(TensorShape({3}), {"tag1", "tag2", "tag3"}); - AddInputFromArray(TensorShape({3}), {1.0f, -0.73f, 10000.0f}); - TF_ASSERT_OK(RunOpKernel()); - - // Check the output size. - Tensor* out_tensor = GetOutput(0); - ASSERT_EQ(0, out_tensor->dims()); - Summary summary; - ParseProtoUnlimited(&summary, out_tensor->scalar()()); - EXPECT_SummaryMatches(summary, R"( - value { tag: 'tag1' simple_value: 1.0 } - value { tag: 'tag2' simple_value: -0.73 } - value { tag: 'tag3' simple_value: 10000.0 } - )"); -} - -TEST_F(SummaryScalarOpTest, SimpleDouble) { - MakeOp(DT_DOUBLE); - - // Feed and run - AddInputFromArray(TensorShape({3}), {"tag1", "tag2", "tag3"}); - AddInputFromArray(TensorShape({3}), {1.0, -0.73, 10000.0}); - TF_ASSERT_OK(RunOpKernel()); - - // Check the output size. - Tensor* out_tensor = GetOutput(0); - ASSERT_EQ(0, out_tensor->dims()); - Summary summary; - ParseProtoUnlimited(&summary, out_tensor->scalar()()); - EXPECT_SummaryMatches(summary, R"( - value { tag: 'tag1' simple_value: 1.0 } - value { tag: 'tag2' simple_value: -0.73 } - value { tag: 'tag3' simple_value: 10000.0 } - )"); -} - -TEST_F(SummaryScalarOpTest, SimpleHalf) { - MakeOp(DT_HALF); - - // Feed and run - AddInputFromList(TensorShape({3}), {"tag1", "tag2", "tag3"}); - AddInputFromList(TensorShape({3}), {1.0, -2.0, 10000.0}); - TF_ASSERT_OK(RunOpKernel()); - - // Check the output size. - Tensor* out_tensor = GetOutput(0); - ASSERT_EQ(0, out_tensor->dims()); - Summary summary; - ParseProtoUnlimited(&summary, out_tensor->scalar()()); - EXPECT_SummaryMatches(summary, R"( - value { tag: 'tag1' simple_value: 1.0 } - value { tag: 'tag2' simple_value: -2.0 } - value { tag: 'tag3' simple_value: 10000.0 } - )"); -} - -TEST_F(SummaryScalarOpTest, Error_MismatchedSize) { - MakeOp(DT_FLOAT); - - // Feed and run - AddInputFromArray(TensorShape({2}), {"tag1", "tag2"}); - AddInputFromArray(TensorShape({3}), {1.0f, -0.73f, 10000.0f}); - Status s = RunOpKernel(); - EXPECT_TRUE(absl::StrContains(s.ToString(), "not the same shape")) << s; -} - -TEST_F(SummaryScalarOpTest, Error_WrongDimsTags) { - MakeOp(DT_FLOAT); - - // Feed and run - AddInputFromArray(TensorShape({2, 1}), {"tag1", "tag2"}); - AddInputFromArray(TensorShape({2}), {1.0f, -0.73f}); - Status s = RunOpKernel(); - EXPECT_TRUE( - absl::StrContains(s.ToString(), "tags and values not the same shape")) - << s; -} - -TEST_F(SummaryScalarOpTest, Error_WrongDimsValues) { - MakeOp(DT_FLOAT); - - // Feed and run - AddInputFromArray(TensorShape({2}), {"tag1", "tag2"}); - AddInputFromArray(TensorShape({2, 1}), {1.0f, -0.73f}); - Status s = RunOpKernel(); - EXPECT_TRUE( - absl::StrContains(s.ToString(), "tags and values not the same shape")) - << s; -} - // -------------------------------------------------------------------------- // SummaryHistoOp // -------------------------------------------------------------------------- From 84a4da0cdddebc9135231e833c91a18aee3412e0 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Mon, 27 Jul 2020 16:32:00 -0700 Subject: [PATCH 1423/2522] Expose TF's internal tool for tracking the size of artifacts PiperOrigin-RevId: 323466096 Change-Id: Ied3034233e1804c6757593b339930f710491fba1 --- tensorflow/tools/ci_build/sizetrack_helper.py | 333 ++++++++++++++++++ 1 file changed, 333 insertions(+) create mode 100755 tensorflow/tools/ci_build/sizetrack_helper.py diff --git a/tensorflow/tools/ci_build/sizetrack_helper.py b/tensorflow/tools/ci_build/sizetrack_helper.py new file mode 100755 index 00000000000..d9b3bfadd4b --- /dev/null +++ b/tensorflow/tools/ci_build/sizetrack_helper.py @@ -0,0 +1,333 @@ +#!/usr/bin/env python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Used for Google-internal artifact size tracking. + +See go/tf-devinfra/sizetrack. + +INVOCATION: The following flags are required: + + sizetrack_helper.py \ + --artifact=ARTIFACT, or --manual_bytes=MANUAL_BYTES + --artifact_id=ARTIFACT_ID \ + --team=TEAM \ + ... other optional args ... + +On Windows you might need something like: + + C:\Python38\python.exe C:\path\to\sizetrack_helper.py ... + +PREREQUISITES: + + 1. Your current activated GCP user must have access scopes and IAM permissions + to do the following: + + 1. Query and load data into BigQuery + 2. Upload files to GCS + + 2. Your environment must match the following criteria: + + 1. Current directory is a git repository + 2. CL-based commits have a PiperOrigin-RevId trailer. This is the case + for any use of Copybara Single-source-of-truth, e.g. TensorFlow. + Only these commits are considered when running commands. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import csv +import datetime +import os.path +import platform +import subprocess + +parser = argparse.ArgumentParser( + usage=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument( + "--project", + type=str, + default="tensorflow-testing", + help="GCP project you can access.") +parser.add_argument( + "--dataset", + type=str, + default="sizetracker", + help="BigQuery dataset containing --table") +parser.add_argument( + "--table", type=str, default="tensorflow_devinfra", help="BigQuery table.") +parser.add_argument( + "--upload", + action="store_true", + help="Upload the artifact to --bucket for analysis.") +parser.add_argument( + "--bucket", + type=str, + default="gs://tensorflow-testing-bucket", + help="GCS bucket for artifacts.") +parser.add_argument( + "--team", + type=str, + help="For grouping in the dashboard and buckets; e.g. tf-lite-team.") +parser.add_argument( + "--artifact_id", + type=str, + help="Unique ID for your artifact, used for sorting dashboards.") +parser.add_argument( + "--print_schema", + action="store_true", + help="Print the table schema and don't do anything else.") +size = parser.add_mutually_exclusive_group() +size.add_argument( + "--artifact", + type=argparse.FileType("r"), + help="Local to file you are measuring.") +size.add_argument( + "--manual_bytes", + type=int, + help="Manually set the recorded size instead of providing an artifact.") +FLAGS = parser.parse_args() + +TABLE_NAME = "{}.{}".format(FLAGS.dataset, FLAGS.table) +PROJECT_LEVEL_TABLE_NAME = "{}:{}".format(FLAGS.project, TABLE_NAME) +CL_TRAILER = "PiperOrigin-RevId" +PRETTY_COMMIT_DATE = "%cI" +PRETTY_CL = "%(trailers:key={},valueonly)".format(CL_TRAILER) +PRETTY_HEAD_INFO = "%h\t{cl}\t%s\t%ae\t%aI\t%ce\t%cI".format(cl=PRETTY_CL) +PRETTY_EARLY = "{cl}\t%aI\t%cI".format(cl=PRETTY_CL) +PRETTY_COMMIT = "%h" +# This is a BigQuery table schema defined as CSV +# See https://cloud.google.com/bigquery/docs/schemas +SCHEMA = ",".join([ + "id:string", + "filename:string", + # These 6 lines are from git's format=pretty + # %h $CL_PRETTY %s %ae %aI %ce %cI + "commit:string", + "cl:int64", + "description:string", + "author:string", + "author_date:timestamp", + "committer:string", + "commit_date:timestamp", + # Done with format=pretty + "earliest_commit:string", + "earliest_cl:int64", + "earliest_author_date:timestamp", + "earliest_commit_date:timestamp", + "all_commits:string", + "all_cls:string", + "bytes:int64", + "team:string", + "logged_date:timestamp", +]) +# Select the earliest recorded commit in the same table for the same artifact +# and team. Used to determine the full range of tested commits for each +# invocation. Returns empty string if there are no earlier records. +BQ_GET_EARLIEST_INCLUDED_COMMIT = """ + SELECT + commit + FROM {table} WHERE + commit_date < '{earlier_than_this_date}' + AND id = '{artifact_id}' + AND team = '{team}' + ORDER BY commit_date DESC LIMIT 1 +""" + + +# pylint: disable=unused-argument +def git_pretty(commit_range, pretty_format, n=None): + r"""Run git log and return the cleaned results. + + Git is assumed to be available in the PATH. + + The PiperOrigin-RevId trailer always picks up an extra newline, so this splits + entries on a null byte (\0, or %x00 for git log) and removes newlines. + + Args: + commit_range: Standard range given to git log, e.g. HEAD~1..HEAD + pretty_format: See https://git-scm.com/docs/pretty-formats + n: Number of commits to get. By default, get all within commit_range. + + Returns: + List of strings of whatever the format string was. + """ + n = [] if n is None else ["-n", "1"] + try: + ret = subprocess.run([ + "git", "log", *n, "--date", "iso", "--grep", CL_TRAILER, commit_range, + "--pretty=format:" + pretty_format + "%x00" + ], + check=True, + universal_newlines=True, + stderr=subprocess.PIPE, + stdout=subprocess.PIPE) + except subprocess.CalledProcessError as e: + print(e.stderr) + print(e.stdout) + raise e + out = ret.stdout.replace("\n", "") + return list(map(str.strip, out.split("\0"))) + + +def gcloud(tool, args, stdin=None): + r"""Run a Google cloud utility. + + On Linux and MacOS, utilities are assumed to be in the PATH. + On Windows, utilities are assumed to be available as + C:\Program Files (x86)\Google\Cloud SDK\google-cloud-sdk\bin\{tool}.cmd + + Args: + tool: CLI tool, e.g. bq, gcloud, gsutil + args: List of arguments, same format as subprocess.run + stdin: String to send to stdin + + Returns: + String, the stdout of the tool + """ + + if platform.system() == "Windows": + tool = (r"C:\Program Files (x86)\Google\Cloud " + r"SDK\google-cloud-sdk\bin\{}.cmd").format(tool) + + try: + ret = subprocess.run([tool, *args], + check=True, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + input=stdin) + except subprocess.CalledProcessError as e: + print(e.stderr) + print(e.stdout) + raise e + return ret.stdout.strip() + + +def get_all_tested_commits(): + """Get details about the full commit range tested by this invocation.""" + head_info = git_pretty("HEAD", PRETTY_HEAD_INFO, n=1) + _, _, _, _, _, _, current_commit_date = head_info[0].split("\t") + + query_earliest_included_commit = BQ_GET_EARLIEST_INCLUDED_COMMIT.format( + table=TABLE_NAME, + earlier_than_this_date=current_commit_date, + artifact_id=FLAGS.artifact_id, + team=FLAGS.team) + + # --format=csv returns an empty string if no results, or else two lines: + # commit + # COMMIT_HASH + earliest_commit = gcloud( + "bq", [ + "--project_id", FLAGS.project, "query", "--format", "csv", + "--nouse_legacy_sql" + ], + stdin=query_earliest_included_commit) + + # Compute the commit/CL range since the last test + if earliest_commit: + + earliest_commit = earliest_commit.splitlines()[-1] # Ignore CSV header + early_cl, early_author_date, early_commit_date = git_pretty( + earliest_commit, PRETTY_EARLY, n=1)[0].split("\t") + + all_range = "{commit}..HEAD".format(commit=earliest_commit) + all_commits = ",".join(git_pretty(all_range, PRETTY_COMMIT)) + all_changelists = ",".join(git_pretty(all_range, PRETTY_CL)) + + return [ + earliest_commit, early_cl, early_author_date, early_commit_date, + all_commits, all_changelists + ] + + # If the artifact has never been tracked before this commit + # Empty cells in CSV loads are loaded as NULL values + else: + return [""] * 6 + + +def build_row(): + """Assemble one row of data about this artifact.""" + (earliest_commit, early_cl, early_author_date, + early_commit_date, all_commits, all_changelists) = get_all_tested_commits() + + # Use UTC to make sure machines in different timezones load consistent data + current_time = datetime.datetime.now(datetime.timezone.utc).isoformat() + artifact_filename = ("NO_FILE" if not FLAGS.artifact + else os.path.basename(FLAGS.artifact.name)) + size_bytes = FLAGS.manual_bytes or os.path.getsize(FLAGS.artifact.name) + head_info = git_pretty("HEAD", PRETTY_HEAD_INFO, n=1) + all_head_info_items = head_info[0].split("\t") + return [ + FLAGS.artifact_id, + artifact_filename, + *all_head_info_items, + earliest_commit, + early_cl, + early_author_date, + early_commit_date, + all_commits, + all_changelists, + size_bytes, + FLAGS.team, + current_time, + ] + + +def main(): + + # Validate flags + if FLAGS.print_schema: + print(SCHEMA) + exit(0) + elif not FLAGS.team or not FLAGS.artifact_id or not (FLAGS.artifact or + FLAGS.manual_bytes): + print( + "--team and --artifact_id are required if --print_schema is not " + "specified.\nYou must also specify one of --artifact or --manual_bytes." + "\nPass -h or --help for usage." + ) + exit(1) + + # Generate data about this artifact into a Tab Separated Value file + next_tsv_row = build_row() + with open("data.tsv", "w") as tsvfile: + writer = csv.writer(tsvfile, delimiter="\t", quoting=csv.QUOTE_MINIMAL) + writer.writerow(next_tsv_row) + + # Load into BigQuery + gcloud("bq", [ + "--project_id", FLAGS.project, "load", "--source_format", "CSV", + "--field_delimiter", "tab", PROJECT_LEVEL_TABLE_NAME, "data.tsv", SCHEMA + ]) + + # Upload artifact into GCS + if FLAGS.upload: + # note: not os.path.join here, because gsutil is always linux-style + head_info = git_pretty("HEAD", PRETTY_HEAD_INFO, n=1) + _, current_cl, _, _, _, _, _ = head_info[0].split("\t") + path = "{bucket}/{team}/{artifact_id}/{cl}.{artifact}".format( + bucket=FLAGS.bucket, + team=FLAGS.team, + artifact_id=FLAGS.artifact_id, + cl=current_cl, + artifact=FLAGS.artifact) + gcloud("gsutil", ["cp", FLAGS.artifact, path]) + +if __name__ == "__main__": + main() From b15d294cc6e37328b0115afeea5ba69bc545ed35 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Mon, 27 Jul 2020 23:47:43 +0000 Subject: [PATCH 1424/2522] fixed BUILDIFIER errors in c/kernels/BUILD --- tensorflow/c/kernels/BUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/c/kernels/BUILD b/tensorflow/c/kernels/BUILD index d61b2817b40..b2d72c74dc8 100644 --- a/tensorflow/c/kernels/BUILD +++ b/tensorflow/c/kernels/BUILD @@ -30,8 +30,8 @@ tf_kernel_library( deps = [ "//tensorflow/c:kernels", "//tensorflow/c:tf_tensor", - "//tensorflow/c:tf_status", "//tensorflow/c/kernels:tensor_shape_utils", + "//tensorflow/c:tf_status", "//tensorflow/core:framework", "//tensorflow/core:lib", ], @@ -96,10 +96,10 @@ tf_cc_test( name = "tensor_shape_utils_test", srcs = ["tensor_shape_utils_test.cc"], deps = [ + "//tensorflow/core:framework", ":tensor_shape_utils", "//tensorflow/core:lib", "//tensorflow/core:test", - "//tensorflow/core:framework", "//tensorflow/core:test_main", ], ) From 6a875d47610faeebc95b9875f83330299d141d73 Mon Sep 17 00:00:00 2001 From: Tiezhen WANG Date: Mon, 27 Jul 2020 16:55:03 -0700 Subject: [PATCH 1425/2522] TFLM: Implement the last piece for multi-tenant allocator. The major change is in SimpleMemoryAllocator to allow the head space to be reused among different models. PiperOrigin-RevId: 323470479 Change-Id: If709181da5e9b71222742b2850e6b08d25122a49 --- tensorflow/lite/micro/micro_allocator.cc | 18 ++-- tensorflow/lite/micro/micro_allocator_test.cc | 55 ++++++++++- .../lite/micro/micro_interpreter_test.cc | 97 ++++++++++++++++++- .../recording_simple_memory_allocator.cc | 6 +- .../micro/recording_simple_memory_allocator.h | 2 +- .../recording_simple_memory_allocator_test.cc | 6 +- .../lite/micro/simple_memory_allocator.cc | 48 +++++---- .../lite/micro/simple_memory_allocator.h | 17 ++-- .../micro/simple_memory_allocator_test.cc | 31 +++++- 9 files changed, 235 insertions(+), 45 deletions(-) diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc index 39358b33b16..29a0c002cab 100644 --- a/tensorflow/lite/micro/micro_allocator.cc +++ b/tensorflow/lite/micro/micro_allocator.cc @@ -1011,6 +1011,7 @@ const SubGraph* MicroAllocator::GetSubGraphFromModel(const Model* model) { TfLiteStatus MicroAllocator::CommitStaticMemoryPlan( const Model* model, const SubGraph* subgraph, TfLiteEvalTensor* eval_tensors) { + size_t head_usage = 0; // Create static memory plan // 1. Calculate AllocationInfo to know the lifetime of each tensor/buffer. // 2. Add them into the planner (such as the GreedyMemoryPlanner). @@ -1020,7 +1021,7 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan( // thrown away when the child allocator (tmp_allocator) goes out of scope. { SimpleMemoryAllocator tmp_allocator(error_reporter_, - memory_allocator_->GetHead(), + memory_allocator_->GetBufferHead(), memory_allocator_->GetTail()); AllocationInfoBuilder builder(error_reporter_, &tmp_allocator); @@ -1039,7 +1040,7 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan( // Remaining arena size that memory planner can use for calculating offsets. size_t remaining_arena_size = tmp_allocator.GetAvailableMemory(); uint8_t* planner_arena = - tmp_allocator.AllocateFromHead(remaining_arena_size, /*alignment=*/1); + tmp_allocator.AdjustHead(remaining_arena_size, kBufferAlignment); TF_LITE_ENSURE(error_reporter_, planner_arena != nullptr); GreedyMemoryPlanner planner(planner_arena, remaining_arena_size); TF_LITE_ENSURE_STATUS( @@ -1059,14 +1060,15 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan( // Commit the plan. TF_LITE_ENSURE_STATUS(CommitPlan(error_reporter_, &planner, - memory_allocator_->GetHead(), + memory_allocator_->GetBufferHead(), allocation_info, builder.Size())); - // Allocate the planned area, so the allocator knows it's used. - uint8_t* allocated_tensor_memory = - memory_allocator_->AllocateFromHead(planner.GetMaximumMemorySize(), - /*alignment=*/1); - TF_LITE_ENSURE(error_reporter_, allocated_tensor_memory != nullptr); + head_usage = planner.GetMaximumMemorySize(); } + // Allocate the planned area, so the allocator knows it's used. + + uint8_t* allocated_tensor_memory = + memory_allocator_->AdjustHead(head_usage, kBufferAlignment); + TF_LITE_ENSURE(error_reporter_, allocated_tensor_memory != nullptr); return kTfLiteOk; } diff --git a/tensorflow/lite/micro/micro_allocator_test.cc b/tensorflow/lite/micro/micro_allocator_test.cc index 22eb999ec9b..32d52a994d9 100644 --- a/tensorflow/lite/micro/micro_allocator_test.cc +++ b/tensorflow/lite/micro/micro_allocator_test.cc @@ -243,17 +243,30 @@ TF_LITE_MICRO_TEST(TestFailsWhenModelStartsTwice) { &node_and_registration, &eval_tensors)); } -TF_LITE_MICRO_TEST(TestFailsWhenModelFinishesBeforeStart) { +TF_LITE_MICRO_TEST(TestFailsWithWrongSequence) { const tflite::Model* model = tflite::testing::GetSimpleMockModel(); TfLiteEvalTensor* eval_tensors = nullptr; tflite::AllOpsResolver op_resolver = tflite::testing::GetOpResolver(); + tflite::NodeAndRegistration* node_and_registration; constexpr size_t arena_size = 1024; uint8_t arena[arena_size]; tflite::MicroAllocator* allocator = tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); TF_LITE_MICRO_EXPECT_NE(nullptr, allocator); + + // We can't finish allocation before it ever got started. TF_LITE_MICRO_EXPECT_EQ( kTfLiteError, allocator->FinishModelAllocation(model, eval_tensors)); + + // Start twice is not allowed. + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + allocator->StartModelAllocation(model, op_resolver, + &node_and_registration, &eval_tensors)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteError, + allocator->StartModelAllocation(model, op_resolver, + &node_and_registration, &eval_tensors)); } TF_LITE_MICRO_TEST(TestMockModelAllocation) { @@ -294,6 +307,46 @@ TF_LITE_MICRO_TEST(TestMockModelAllocation) { /*count=*/2); } +TF_LITE_MICRO_TEST(TestMultiTenantAllocation) { + // The `OpResolver` is shared among different models in this test for + // simplicity but in practice you could have different `OpResolver`. + tflite::AllOpsResolver op_resolver = tflite::testing::GetOpResolver(); + + // Create a shared allocator. + constexpr size_t arena_size = 4096; + uint8_t arena[arena_size]; + tflite::MicroAllocator* allocator = + tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); + TF_LITE_MICRO_EXPECT_NE(nullptr, allocator); + TfLiteEvalTensor* eval_tensors = nullptr; + + // Allocate for model 1. We use ComplexMockModel here to cover the code path + // allocatig variables. + const tflite::Model* model1 = tflite::testing::GetComplexMockModel(); + tflite::NodeAndRegistration* node_and_registration1; + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + allocator->StartModelAllocation(model1, op_resolver, + &node_and_registration1, &eval_tensors)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, allocator->FinishModelAllocation(model1, eval_tensors)); + const size_t single_model_used_bytes = allocator->used_bytes(); + + // Allocate for model 2. + const tflite::Model* model2 = tflite::testing::GetComplexMockModel(); + tflite::NodeAndRegistration* node_and_registration2; + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, + allocator->StartModelAllocation(model2, op_resolver, + &node_and_registration2, &eval_tensors)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, allocator->FinishModelAllocation(model2, eval_tensors)); + + // Allocation for two instances of the same model takes less memory as `head` + // of the arena is reused. + TF_LITE_MICRO_EXPECT_LE(allocator->used_bytes(), 2 * single_model_used_bytes); +} + TF_LITE_MICRO_TEST(TestAllocationForModelsWithBranches) { const tflite::Model* model = tflite::testing::GetSimpleModelWithBranch(); TfLiteEvalTensor* eval_tensors = nullptr; diff --git a/tensorflow/lite/micro/micro_interpreter_test.cc b/tensorflow/lite/micro/micro_interpreter_test.cc index 3037ffaada9..150dbead337 100644 --- a/tensorflow/lite/micro/micro_interpreter_test.cc +++ b/tensorflow/lite/micro/micro_interpreter_test.cc @@ -121,6 +121,99 @@ TF_LITE_MICRO_TEST(TestInterpreter) { TF_LITE_MICRO_EXPECT_EQ(tflite::testing::MockCustom::freed_, true); } +TF_LITE_MICRO_TEST(TestMultiTenantInterpreter) { + tflite::AllOpsResolver op_resolver = tflite::testing::GetOpResolver(); + constexpr size_t arena_size = 8192; + uint8_t arena[arena_size]; + + size_t simple_model_head_usage = 0, complex_model_head_usage = 0; + + // Get simple_model_head_usage. + { + tflite::RecordingMicroAllocator* allocator = + tflite::RecordingMicroAllocator::Create(arena, arena_size, + micro_test::reporter); + const tflite::Model* model0 = tflite::testing::GetSimpleMockModel(); + tflite::MicroInterpreter interpreter0(model0, op_resolver, allocator, + micro_test::reporter); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, interpreter0.AllocateTensors()); + simple_model_head_usage = + allocator->GetSimpleMemoryAllocator()->GetHeadUsedBytes(); + + TfLiteTensor* input = interpreter0.input(0); + TfLiteTensor* output = interpreter0.output(0); + input->data.i32[0] = 21; + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, interpreter0.Invoke()); + TF_LITE_MICRO_EXPECT_EQ(42, output->data.i32[0]); + } + + // Shared allocator for various models. + tflite::RecordingMicroAllocator* allocator = + tflite::RecordingMicroAllocator::Create(arena, arena_size, + micro_test::reporter); + + // Get complex_model_head_usage. No head space reuse since it's the first + // model allocated in the `allocator`. + const tflite::Model* model1 = tflite::testing::GetComplexMockModel(); + tflite::MicroInterpreter interpreter1(model1, op_resolver, allocator, + micro_test::reporter); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, interpreter1.AllocateTensors()); + TfLiteTensor* input1 = interpreter1.input(0); + TfLiteTensor* output1 = interpreter1.output(0); + complex_model_head_usage = + allocator->GetSimpleMemoryAllocator()->GetHeadUsedBytes(); + + // Allocate simple model from the same `allocator`. Some head space will + // be reused thanks to multi-tenant TFLM support. Also makes sure that + // the output is correct. + const tflite::Model* model2 = tflite::testing::GetSimpleMockModel(); + tflite::MicroInterpreter interpreter2(model2, op_resolver, allocator, + micro_test::reporter); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, interpreter2.AllocateTensors()); + TfLiteTensor* input2 = interpreter2.input(0); + TfLiteTensor* output2 = interpreter2.output(0); + // Verify that 1 + 1 < 2. + size_t multi_tenant_head_usage = + allocator->GetSimpleMemoryAllocator()->GetHeadUsedBytes(); + TF_LITE_MICRO_EXPECT_LE(multi_tenant_head_usage, + complex_model_head_usage + simple_model_head_usage); + + // Now we have model1 and model2 sharing the same `allocator`. + // Let's make sure that they can produce correct results. + TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, input1->type); + input1->data.i32[0] = 10; + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, interpreter1.Invoke()); + // Output tensor for the first model. + TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, output1->type); + TF_LITE_MICRO_EXPECT_EQ(10, output1->data.i32[0]); + + TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, input2->type); + input2->data.i32[0] = 21; + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, interpreter2.Invoke()); + // Output for the second model. + TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, output2->type); + TF_LITE_MICRO_EXPECT_EQ(42, output2->data.i32[0]); + + // Allocate another complex model from the `allocator` will not increase + // head space usage. + const tflite::Model* model3 = tflite::testing::GetComplexMockModel(); + tflite::MicroInterpreter interpreter3(model3, op_resolver, allocator, + micro_test::reporter); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, interpreter3.AllocateTensors()); + TfLiteTensor* input3 = interpreter3.input(0); + TfLiteTensor* output3 = interpreter3.output(0); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, input3->type); + input3->data.i32[0] = 10; + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, interpreter3.Invoke()); + // Output tensor for the third model. + TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, output3->type); + TF_LITE_MICRO_EXPECT_EQ(10, output3->data.i32[0]); + // No increase on the head usage as we're reusing the space. + TF_LITE_MICRO_EXPECT_EQ( + multi_tenant_head_usage, + allocator->GetSimpleMemoryAllocator()->GetHeadUsedBytes()); +} + TF_LITE_MICRO_TEST(TestKernelMemoryPlanning) { const tflite::Model* model = tflite::testing::GetSimpleStatefulModel(); TF_LITE_MICRO_EXPECT_NE(nullptr, model); @@ -388,8 +481,8 @@ TF_LITE_MICRO_TEST(TestInterpreterDoesNotAllocateUntilInvoke) { static_cast(0)); // TODO(b/160160549): This check is mostly meaningless right now because the - // operator creation in our mock models is inconsistent. Revisit what this - // check should be once the mock models are properly created. + // operator creation in our mock models is inconsistent. Revisit what + // this check should be once the mock models are properly created. TF_LITE_MICRO_EXPECT_EQ( allocator->GetRecordedAllocation(tflite::RecordedAllocationType::kOpData) .used_bytes, diff --git a/tensorflow/lite/micro/recording_simple_memory_allocator.cc b/tensorflow/lite/micro/recording_simple_memory_allocator.cc index 5e7eb5754e7..f44afad5eb2 100644 --- a/tensorflow/lite/micro/recording_simple_memory_allocator.cc +++ b/tensorflow/lite/micro/recording_simple_memory_allocator.cc @@ -56,10 +56,10 @@ size_t RecordingSimpleMemoryAllocator::GetAllocatedCount() const { return alloc_count_; } -uint8_t* RecordingSimpleMemoryAllocator::AllocateFromHead(size_t size, - size_t alignment) { +uint8_t* RecordingSimpleMemoryAllocator::AdjustHead(size_t size, + size_t alignment) { const uint8_t* previous_head = GetHead(); - uint8_t* result = SimpleMemoryAllocator::AllocateFromHead(size, alignment); + uint8_t* result = SimpleMemoryAllocator::AdjustHead(size, alignment); if (result != nullptr) { used_bytes_ += GetHead() - previous_head; requested_bytes_ += size; diff --git a/tensorflow/lite/micro/recording_simple_memory_allocator.h b/tensorflow/lite/micro/recording_simple_memory_allocator.h index 270d9543404..e1ac0ebfd20 100644 --- a/tensorflow/lite/micro/recording_simple_memory_allocator.h +++ b/tensorflow/lite/micro/recording_simple_memory_allocator.h @@ -47,7 +47,7 @@ class RecordingSimpleMemoryAllocator : public SimpleMemoryAllocator { // Returns the number of alloc calls from the head or tail. size_t GetAllocatedCount() const; - uint8_t* AllocateFromHead(size_t size, size_t alignment) override; + uint8_t* AdjustHead(size_t size, size_t alignment) override; uint8_t* AllocateFromTail(size_t size, size_t alignment) override; private: diff --git a/tensorflow/lite/micro/recording_simple_memory_allocator_test.cc b/tensorflow/lite/micro/recording_simple_memory_allocator_test.cc index 16dbdb74437..ec37f399ed8 100644 --- a/tensorflow/lite/micro/recording_simple_memory_allocator_test.cc +++ b/tensorflow/lite/micro/recording_simple_memory_allocator_test.cc @@ -83,7 +83,7 @@ TF_LITE_MICRO_TEST(TestRecordsHeadAllocations) { tflite::RecordingSimpleMemoryAllocator allocator(micro_test::reporter, arena, arena_size); - uint8_t* result = allocator.AllocateFromHead(/*size=*/5, /*alignment=*/1); + uint8_t* result = allocator.AdjustHead(/*size=*/5, /*alignment=*/1); TF_LITE_MICRO_EXPECT_NE(result, nullptr); TF_LITE_MICRO_EXPECT_EQ(allocator.GetUsedBytes(), static_cast(5)); TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), @@ -106,7 +106,7 @@ TF_LITE_MICRO_TEST(TestRecordsMisalignedHeadAllocations) { tflite::RecordingSimpleMemoryAllocator allocator(micro_test::reporter, arena, arena_size); - uint8_t* result = allocator.AllocateFromHead(/*size=*/10, /*alignment=*/12); + uint8_t* result = allocator.AdjustHead(/*size=*/10, /*alignment=*/12); TF_LITE_MICRO_EXPECT_NE(result, nullptr); // Validate used bytes in 8 byte range that can included alignment of 12: TF_LITE_MICRO_EXPECT_GE(allocator.GetUsedBytes(), static_cast(10)); @@ -123,7 +123,7 @@ TF_LITE_MICRO_TEST(TestDoesNotRecordFailedTailAllocations) { tflite::RecordingSimpleMemoryAllocator allocator(micro_test::reporter, arena, arena_size); - uint8_t* result = allocator.AllocateFromHead(/*size=*/2048, /*alignment=*/1); + uint8_t* result = allocator.AdjustHead(/*size=*/2048, /*alignment=*/1); TF_LITE_MICRO_EXPECT(result == nullptr); TF_LITE_MICRO_EXPECT_EQ(allocator.GetUsedBytes(), static_cast(0)); TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), diff --git a/tensorflow/lite/micro/simple_memory_allocator.cc b/tensorflow/lite/micro/simple_memory_allocator.cc index 48cfdc02a34..37c5acd37d3 100644 --- a/tensorflow/lite/micro/simple_memory_allocator.cc +++ b/tensorflow/lite/micro/simple_memory_allocator.cc @@ -31,7 +31,7 @@ SimpleMemoryAllocator::SimpleMemoryAllocator(ErrorReporter* error_reporter, : error_reporter_(error_reporter), buffer_head_(buffer_head), buffer_tail_(buffer_tail), - head_(buffer_head), + head_watermark_(buffer_head), tail_(buffer_tail), temp_(buffer_head_) {} @@ -59,27 +59,39 @@ SimpleMemoryAllocator* SimpleMemoryAllocator::Create( SimpleMemoryAllocator::~SimpleMemoryAllocator() {} -uint8_t* SimpleMemoryAllocator::AllocateFromHead(size_t size, - size_t alignment) { - if (head_ != temp_) { - TF_LITE_REPORT_ERROR( - error_reporter_, - "Called AllocateFromHead() after AllocateTemp() without resetting temp " - "allocations with ResetTempAllocations()"); +uint8_t* SimpleMemoryAllocator::AdjustHead(size_t size, size_t alignment) { + if (head_watermark_ != temp_) { + TF_LITE_REPORT_ERROR(error_reporter_, + "Internal error: AdjustHead() needs to be called after" + "ResetTempAllocations()."); return nullptr; } - uint8_t* ret = AllocateTemp(size, alignment); - head_ = temp_; - return ret; + uint8_t* const aligned_result = AlignPointerUp(buffer_head_, alignment); + if (aligned_result + size < head_watermark_) { + return aligned_result; + } + + const size_t available_memory = tail_ - aligned_result; + if (available_memory < size) { + TF_LITE_REPORT_ERROR( + error_reporter_, + "Failed to allocate memory. Requested: %u, available %u, missing: %u", + size, available_memory, size - available_memory); + return nullptr; + } + head_watermark_ = aligned_result + size; + temp_ = head_watermark_; + + return aligned_result; } uint8_t* SimpleMemoryAllocator::AllocateFromTail(size_t size, size_t alignment) { uint8_t* const aligned_result = AlignPointerDown(tail_ - size, alignment); - if (aligned_result < head_) { + if (aligned_result < head_watermark_) { #ifndef TF_LITE_STRIP_ERROR_STRINGS - const size_t missing_memory = head_ - aligned_result; + const size_t missing_memory = head_watermark_ - aligned_result; TF_LITE_REPORT_ERROR( error_reporter_, "Failed to allocate memory. Requested: %u, available %u, missing: %u", @@ -105,14 +117,16 @@ uint8_t* SimpleMemoryAllocator::AllocateTemp(size_t size, size_t alignment) { return aligned_result; } -void SimpleMemoryAllocator::ResetTempAllocations() { temp_ = head_; } +void SimpleMemoryAllocator::ResetTempAllocations() { temp_ = head_watermark_; } -uint8_t* SimpleMemoryAllocator::GetHead() const { return head_; } +uint8_t* SimpleMemoryAllocator::GetHead() const { return head_watermark_; } + +uint8_t* SimpleMemoryAllocator::GetBufferHead() const { return buffer_head_; } uint8_t* SimpleMemoryAllocator::GetTail() const { return tail_; } size_t SimpleMemoryAllocator::GetHeadUsedBytes() const { - return head_ - buffer_head_; + return head_watermark_ - buffer_head_; } size_t SimpleMemoryAllocator::GetTailUsedBytes() const { @@ -120,7 +134,7 @@ size_t SimpleMemoryAllocator::GetTailUsedBytes() const { } size_t SimpleMemoryAllocator::GetAvailableMemory() const { - return tail_ - head_; + return tail_ - buffer_head_; } size_t SimpleMemoryAllocator::GetUsedBytes() const { diff --git a/tensorflow/lite/micro/simple_memory_allocator.h b/tensorflow/lite/micro/simple_memory_allocator.h index fd2363955f2..26e217096e5 100644 --- a/tensorflow/lite/micro/simple_memory_allocator.h +++ b/tensorflow/lite/micro/simple_memory_allocator.h @@ -42,13 +42,13 @@ class SimpleMemoryAllocator { uint8_t* buffer_head, size_t buffer_size); - // Allocates memory starting at the head of the arena (lowest address and - // moving upwards). Calls to this method will also invalidate all temporary - // allocation values. This call will fail if a chain allocation calls through - // AllocateTemp() have not been cleaned up with a call to - // ResetTempAllocations(). - virtual uint8_t* AllocateFromHead(size_t size, size_t alignment); - + // Adjust memory allocations starting at the head of the arena (lowest address + // and moving upwards). It only tracks the maximum head usage and make sure + // that memory used at head will not overlap with memory reserved at tail. + // Calls to this method will also invalidate all temporary allocation values. + // This call will fail if a chain allocation calls through AllocateTemp() have + // not been cleaned up with a call to ResetTempAllocations(). + virtual uint8_t* AdjustHead(size_t size, size_t alignment); // Allocates memory starting at the tail of the arena (highest address and // moving downwards). virtual uint8_t* AllocateFromTail(size_t size, size_t alignment); @@ -68,6 +68,7 @@ class SimpleMemoryAllocator { virtual void ResetTempAllocations(); uint8_t* GetHead() const; + uint8_t* GetBufferHead() const; uint8_t* GetTail() const; size_t GetHeadUsedBytes() const; @@ -82,7 +83,7 @@ class SimpleMemoryAllocator { ErrorReporter* error_reporter_; uint8_t* buffer_head_; uint8_t* buffer_tail_; - uint8_t* head_; + uint8_t* head_watermark_; uint8_t* tail_; uint8_t* temp_; diff --git a/tensorflow/lite/micro/simple_memory_allocator_test.cc b/tensorflow/lite/micro/simple_memory_allocator_test.cc index ef97089b00b..0829c7766d4 100644 --- a/tensorflow/lite/micro/simple_memory_allocator_test.cc +++ b/tensorflow/lite/micro/simple_memory_allocator_test.cc @@ -22,6 +22,33 @@ limitations under the License. TF_LITE_MICRO_TESTS_BEGIN +TF_LITE_MICRO_TEST(TestAdjustHead) { + constexpr size_t arena_size = 1024; + uint8_t arena[arena_size]; + tflite::SimpleMemoryAllocator allocator(micro_test::reporter, arena, + arena_size); + + // First allocation from head. + { + uint8_t* result = allocator.AdjustHead(100, 1); + TF_LITE_MICRO_EXPECT(arena == result); + TF_LITE_MICRO_EXPECT(arena + 100 == allocator.GetHead()); + } + // Second allocation doesn't require as much space so head pointer didn't + // move. + { + uint8_t* result = allocator.AdjustHead(10, 1); + TF_LITE_MICRO_EXPECT(arena == result); + TF_LITE_MICRO_EXPECT(arena + 100 == allocator.GetHead()); + } + // Third allocation increase head memory usage. + { + uint8_t* result = allocator.AdjustHead(1000, 1); + TF_LITE_MICRO_EXPECT(arena == result); + TF_LITE_MICRO_EXPECT(arena + 1000 == allocator.GetHead()); + } +} + TF_LITE_MICRO_TEST(TestJustFits) { constexpr size_t arena_size = 1024; uint8_t arena[arena_size]; @@ -105,12 +132,12 @@ TF_LITE_MICRO_TEST(TestAllocateHeadWithoutResettingTemp) { // Allocation should be null since temp allocation was not followed by a call // to ResetTempAllocations(). - uint8_t* head = allocator.AllocateFromHead(100, 1); + uint8_t* head = allocator.AdjustHead(100, 1); TF_LITE_MICRO_EXPECT(nullptr == head); allocator.ResetTempAllocations(); - head = allocator.AllocateFromHead(100, 1); + head = allocator.AdjustHead(100, 1); TF_LITE_MICRO_EXPECT(nullptr != head); // The most recent head allocation should be in the same location as the From 1da0ebb453374dd3b1b30272cda11c27dd9dc276 Mon Sep 17 00:00:00 2001 From: Penporn Koanantakool Date: Mon, 27 Jul 2020 16:55:40 -0700 Subject: [PATCH 1426/2522] Fix the docstring in random_flip_up_down. PiperOrigin-RevId: 323470604 Change-Id: I5fe1592a71443449f2774c4a9ed7e14144d8bc26 --- tensorflow/python/ops/image_ops_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 9cc6a6d9c26..7569b35fe25 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -339,12 +339,12 @@ def random_flip_up_down(image, seed=None): Example usage: >>> import numpy as np - >>> image = np.array([[[1], [2]], [[3], [4]]]) >>> tf.image.random_flip_up_down(image, 3).numpy().tolist() [[[3], [4]], [[1], [2]]] Randomly flip multiple images. + >>> images = np.array( ... [ ... [[[1], [2]], [[3], [4]]], From 9443104e931c963ed871703995d4edb0d6aa46a3 Mon Sep 17 00:00:00 2001 From: Geeta Chavan Date: Mon, 27 Jul 2020 17:14:33 -0700 Subject: [PATCH 1427/2522] Merge release notes to master PiperOrigin-RevId: 323474098 Change-Id: I70b138be9164075c2210438775a8e9a276a59c4f --- RELEASE.md | 207 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 197 insertions(+), 10 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 7182846a805..56567abea2d 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -113,19 +113,206 @@ stjohnso98, , , , , # Release 2.3.0 -## Breaking Changes +## Major Features and Improvements + * `tf.data` adds two new mechanisms to solve input pipeline bottlenecks and save resources: + * [snapshot](https://www.tensorflow.org/api_docs/python/tf/data/experimental/snapshot) + * [tf.data service](https://www.tensorflow.org/api_docs/python/tf/data/experimental/service). -* `tf.image.extract_glimpse` has been updated to correctly process the case - where `centered=False` and `normalized=False`. This is a breaking change as - the output is different from (incorrect) previous versions. Note this - breaking change only impacts `tf.image.extract_glimpse` and - `tf.compat.v2.image.extract_glimpse` API endpoints. The behavior of - `tf.compat.v1.image.extract_glimpse` does not change. The behavior of - exsiting C++ kernel `ExtractGlimpse` does not change as well, so saved - models will not be impacted. + In addition checkout the detailed [guide](https://www.tensorflow.org/guide/data_performance_analysis) for analyzing input pipeline performance with TF Profiler. + + * [`tf.distribute.TPUStrategy`](https://www.tensorflow.org/api_docs/python/tf/distribute/TPUStrategy) is now a stable API and no longer considered experimental for TensorFlow. (earlier `tf.distribute.experimental.TPUStrategy`). + + * [TF Profiler](https://www.tensorflow.org/guide/profiler) introduces two new tools: a memory profiler to visualize your model’s memory usage over time and a [python tracer](https://www.tensorflow.org/guide/profiler#events) which allows you to trace python function calls in your model. Usability improvements include better diagnostic messages and [profile options](https://tensorflow.org/guide/profiler#collect_performance_data) to customize the host and device trace verbosity level. + + * Introduces experimental support for Keras Preprocessing Layers API ([`tf.keras.layers.experimental.preprocessing.*`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/experimental/preprocessing?version=nightly)) to handle data preprocessing operations, with support for composite tensor inputs. Please see below for additional details on these layers. + + * TFLite now properly supports dynamic shapes during conversion and inference. We’ve also added opt-in support on Android and iOS for [XNNPACK](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/delegates/xnnpack), a highly optimized set of CPU kernels, as well as opt-in support for [executing quantized models on the GPU](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/performance/gpu_advanced.md#running-quantized-models-experimental). + + * Libtensorflow packages are available in GCS starting this release. We have also started to [release a nightly version of these packages](https://github.com/tensorflow/tensorflow#official-builds). + + * The experimental Python API [`tf.debugging.experimental.enable_dump_debug_info()`](https://www.tensorflow.org/api_docs/python/tf/debugging/experimental/enable_dump_debug_info) now allows you to instrument a TensorFlow program and dump debugging information to a directory on the file system. The directory can be read and visualized by a new interactive dashboard in TensorBoard 2.3 called [Debugger V2](https://www.tensorflow.org/tensorboard/debugger_v2), which reveals the details of the TensorFlow program including graph structures, history of op executions at the Python (eager) and intra-graph levels, the runtime dtype, shape, and numerical composistion of tensors, as well as their code locations. + +## Breaking Changes +* Increases the **minimum bazel version** required to build TF to **3.1.0**. +* `tf.data` + * Makes the following (breaking) changes to the `tf.data`. + * C++ API: - `IteratorBase::RestoreInternal`, `IteratorBase::SaveInternal`, and `DatasetBase::CheckExternalState` become pure-virtual and subclasses are now expected to provide an implementation. + * The deprecated `DatasetBase::IsStateful` method is removed in favor of `DatasetBase::CheckExternalState`. + * Deprecated overrides of `DatasetBase::MakeIterator` and `MakeIteratorFromInputElement` are removed. + * The signature of `tensorflow::data::IteratorBase::SaveInternal` and `tensorflow::data::IteratorBase::SaveInput` has been extended with `SerializationContext` argument to enable overriding the default policy for the handling external state during iterator checkpointing. This is not a backwards compatible change and all subclasses of `IteratorBase` *need to be updated* accordingly. +* `tf.keras` + * Add a new `BackupAndRestore` callback for handling distributed training failures & restarts. Please take a look at this [tutorial](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras) for details on how to use the callback. +* `tf.image.extract_glimpse` has been updated to correctly process the case + where `centered=False` and `normalized=False`. This is a breaking change as + the output is different from (incorrect) previous versions. Note this + breaking change only impacts `tf.image.extract_glimpse` and + `tf.compat.v2.image.extract_glimpse` API endpoints. The behavior of + `tf.compat.v1.image.extract_glimpse` does not change. The behavior of + exsiting C++ kernel `ExtractGlimpse` does not change either, so saved + models using `tf.raw_ops.ExtractGlimpse` will not be impacted. + +## Known Caveats + * `tf.lite` + * Keras-based LSTM models must be converted with an explicit batch size in the input layer. ## Bug Fixes and Other Changes -* Mutable tables now restore checkpointed values when loaded from SavedModel. + +### TF Core: + * Set `tf2_behavior` to 1 to enable V2 for early loading cases. + * Add `execute_fn_for_device function` to dynamically choose the implementation based on underlying device placement. + * Eager: + * Add `reduce_logsumexp` benchmark with experiment compile. + * Give `EagerTensor`s a meaningful `__array__` implementation. + * Add another version of defun matmul for performance analysis. + * `tf.function`/AutoGraph: + * `AutoGraph` now includes into TensorFlow loops any variables that are closed over by local functions. Previously, such variables were sometimes incorrectly ignored. + * functions returned by the `get_concrete_function` method of `tf.function` objects can now be called with arguments consistent with the original arguments or type specs passed to `get_concrete_function`. This calling convention is now the preferred way to use concrete functions with nested values and composite tensors. Please check the [guide](https://www.tensorflow.org/guide/concrete_function) for more details on `concrete_ function`. + * Update `tf.function`'s `experimental_relax_shapes` to handle composite tensors appropriately. + * Optimize `tf.function` invocation, by removing redundant list converter. + * `tf.function` will retrace when called with a different variable instead of simply using the `dtype` & `shape`. + * [Improve support](https://github.com/tensorflow/tensorflow/issues/33862) for dynamically-sized TensorArray inside `tf.function`. + * `tf.math`: + * Narrow down `argmin`/`argmax` contract to always return the smallest index for ties. + * `tf.math.reduce_variance` and `tf.math.reduce_std` return correct computation for complex types and no longer support integer types. + * Add Bessel functions of order 0,1 to `tf.math.special`. + * `tf.divide` now always returns a tensor to be consistent with documentation and other APIs. + * `tf.image`: + * Replaced [`tf.image.non_max_suppression_padded`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/image/non_max_suppression_padded?hl=en) with a new implementation that supports batched inputs, which is considerably faster on TPUs and GPUs. Boxes with area=0 will be ignored. Existing usage with single inputs should still work as before. + * `tf.linalg` + * Add `tf.linalg.banded_triangular_solve`. + * `tf.random`: + * Add `tf.random.stateless_parameterized_truncated_normal`. + * `tf.ragged`: + * Add `tf.ragged.cross` and `tf.ragged.cross_hashed` operations. + * `tf.RaggedTensor`: + * `RaggedTensor.to_tensor()` now preserves static shape. + * Add `tf.strings.format()` and `tf.print()` to support RaggedTensors. + * `tf.saved_model`: + * `@tf.function` from SavedModel no longer ignores args after a `RaggedTensor` when selecting the concrete function to run. + * Fix save model issue for ops with a list of functions. + * Add `tf.saved_model.LoadOptions` with [`experimental_io_device`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/saved_model/LoadOptions?hl=en) as arg with default value `None` to choose the I/O device for loading models and weights. + * Update `tf.saved_model.SaveOptions` with [`experimental_io_device`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/saved_model/SaveOptions?hl=en) as arg with default value `None` to choose the I/O device for saving models and weights. + * Mutable tables now restore checkpointed values when loaded from SavedModel. + * GPU + * No longer includes PTX kernels for GPU except for sm_70 to reduce binary size. On systems with NVIDIA® Ampere GPUs (CUDA architecture 8.0) or newer, kernels are JIT-compiled from PTX and TensorFlow can take over 30 minutes to start up. This overhead can be limited to the first start up by increasing the default JIT cache size with: `export CUDA_CACHE_MAXSIZE=2147483648`.: + * Others + * Retain parent namescope for ops added inside `tf.while_loop`/`tf.cond`/`tf.switch_case`. + * Update `tf.vectorized_map` to support vectorizing `tf.while_loop` and TensorList operations. + * `tf.custom_gradient` can now be applied to functions that accept nested structures of `tensors` as inputs (instead of just a list of tensors). Note that Python structures such as tuples and lists now won't be treated as tensors, so if you still want them to be treated that way, you need to wrap them with `tf.convert_to_tensor`. + * No lowering on gradient case op when input is `DeviceIndex` op. + * Extend the ragged version of `tf.gather` to support `batch_dims` and `axis` args. + * Update `tf.map_fn` to support RaggedTensors and SparseTensors. + * Deprecate `tf.group`. It is not useful in eager mode. + * Add CPU and GPU implementation of modified variation of [`FTRL`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/raw_ops/ApplyFtrl)/[`FTRLV2`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/raw_ops/ApplyFtrlV2) that can triggerred by `multiply_linear_by_lr` allowing a learning rate of zero. + +### `tf.data`: + * `tf.data.experimental.dense_to_ragged_batch` works correctly with tuples. + * `tf.data.experimental.dense_to_ragged_batch` to output variable ragged rank. + * `tf.data.experimental.cardinality` is now a method on `tf.data.Dataset`. + * `tf.data.Dataset` now supports `len(Dataset)` when the cardinality is finite. + +### `tf.distribute`: + * Expose experimental [`tf.distribute.DistributedDataset`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/distribute/DistributedDataset?hl=en) and [`tf.distribute.DistributedIterator`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/distribute/DistributedIterator) to distribute input data when using `tf.distribute` to scale training on multiple devices. + * Added a [`get_next_as_optional`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/distribute/DistributedIterator?hl=en#get_next_as_optional) method for [`tf.distribute.DistributedIterator`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/distribute/DistributedIterator?hl=en) class to return a `tf.experimental.Optional` instance that contains the next value for all replicas or none instead of raising an out of range error. Also see *new* [guide on input distribution](https://www.tensorflow.org/tutorials/distribute/input). + * Allow var.assign on MirroredVariables with aggregation=NONE in replica context. Previously this would raise an error. We now allow this because many users and library writers find using `.assign` in replica context to be more convenient, instead of having to use `Strategy.extended.update` which was the previous way of updating variables in this situation. + * `tf.distribute.experimental.MultiWorkerMirroredStrategy` adds support for partial batches. Workers running out of data now continue to participate in the training with empty inputs, instead of raising an error. Learn more about [partial batches here](https://www.tensorflow.org/tutorials/distribute/input#partial_batches). + * Improve the performance of reading metrics eagerly under `tf.distribute.experimental.MultiWorkerMirroredStrategy`. + * Fix the issue that `strategy.reduce()` inside `tf.function` may raise exceptions when the values to reduce are from loops or if-clauses. + * Fix the issue that `tf.distribute.MirroredStrategy` cannot be used together with `tf.distribute.experimental.MultiWorkerMirroredStrategy`. + * Add a `tf.distribute.cluster_resolver.TPUClusterResolver.connect` API to simplify TPU initialization. + +### `tf.keras`: + * Introduces experimental preprocessing layers API (`tf.keras.layers.experimental.preprocessing`) to handle data preprocessing operations such as categorical feature encoding, text vectorization, data normalization, and data discretization (binning). The newly added layers provide a replacement for the legacy feature column API, and support composite tensor inputs. + * Added **categorical data** processing layers: + * `IntegerLookup` & `StringLookup`: build an index of categorical feature values + * `CategoryEncoding`: turn integer-encoded categories into one-hot, multi-hot, or tf-idf encoded representations + * `CategoryCrossing`: create new categorical features representing co-occurrences of previous categorical feature values + * `Hashing`: the hashing trick, for large-vocabulary categorical features + * `Discretization`: turn continuous numerical features into categorical features by binning their values + * Improved **image preprocessing** layers: `CenterCrop`, `Rescaling` + * Improved **image augmentation** layers: `RandomCrop`, `RandomFlip`, `RandomTranslation`, `RandomRotation`, `RandomHeight`, `RandomWidth`, `RandomZoom`, `RandomContrast` + * Improved **`TextVectorization`** layer, which handles string tokenization, n-gram generation, and token encoding + * The `TextVectorization` layer now accounts for the mask_token as part of the vocabulary size when output_mode='int'. This means that, if you have a max_tokens value of 5000, your output will have 5000 unique values (not 5001 as before). + * Change the return value of `TextVectorization.get_vocabulary()` from `byte` to `string`. Users who previously were calling 'decode' on the output of this method should no longer need to do so. + * Introduce new Keras dataset generation utilities : + * **[`image_dataset_from_directory`](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image_dataset_from_directory)** is a utility based on `tf.data.Dataset`, meant to replace the legacy `ImageDataGenerator`. It takes you from a structured directory of images to a labeled dataset, in one function call. Note that it doesn't perform image data augmentation (which is meant to be done using preprocessing layers). + * **[`text_dataset_from_directory`](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/text_dataset_from_directory)** takes you from a structured directory of text files to a labeled dataset, in one function call. + * **[`timeseries_dataset_from_array`](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/timeseries_dataset_from_array)** is a `tf.data.Dataset`-based replacement of the legacy `TimeseriesGenerator`. It takes you from an array of timeseries data to a dataset of shifting windows with their targets. + * Added [`experimental_steps_per_execution`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/keras/Model?hl=en#compile) + arg to `model.compile` to indicate the number of batches to run per `tf.function` call. This can speed up Keras Models on TPUs up to 3x. + * Extends `tf.keras.layers.Lambda` layers to support multi-argument lambdas, and keyword arguments when calling the layer. + * Functional models now get constructed if *any* tensor in a layer call's arguments/keyword arguments comes from a keras input. Previously the functional api would only work if all of the elements in the first argument to the layer came from a keras input. + * Clean up `BatchNormalization` layer's `trainable` property to act like standard python state when it's used inside `tf.functions` (frozen at tracing time), instead of acting like a pseudo-variable whose updates *kind of sometimes* get reflected in already-traced `tf.function` traces. + * Add the `Conv1DTranspose` layer. + * Refine the semantics of `SensitivitySpecificityBase` derived metrics. See the updated API docstrings for [`tf.keras.metrics.SensitivityAtSpecificity`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/keras/metrics/SensitivityAtSpecificity) and [`tf.keras.metrics.SpecificityAtSensitivty`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/keras/metrics/SpecificityAtSensitivity). + +### `tf.lite`: + * Converter + * Restored `inference_input_type` and `inference_output_type` flags in TF 2.x TFLiteConverter (backward compatible with TF 1.x) to support integer (tf.int8, tf.uint8) input and output types in post training full integer quantized models. + * Added support for converting and resizing models with dynamic (placeholder) dimensions. Previously, there was only limited support for dynamic batch size, and even that did not guarantee that the model could be properly resized at runtime. + * Enabled experimental support for a new quantization mode with 16-bit activations and 8-bit weights. See `lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8`. + * CPU + * Fix an issue w/ dynamic weights and `Conv2D` on x86. + * Add a runtime Android flag for enabling `XNNPACK` for optimized CPU performance. + * Add a runtime iOS flag for enabling `XNNPACK` for optimized CPU performance. + * Add a compiler flag to enable building a TFLite library that applies `XNNPACK` delegate automatically when the model has a `fp32` operation. + * GPU + * Allow GPU acceleration starting with internal graph nodes + * Experimental support for quantized models with the Android GPU delegate + * Add GPU delegate whitelist. + * Rename GPU whitelist -> compatibility (list). + * Improve GPU compatibility list entries from crash reports. + * NNAPI + * Set default value for `StatefulNnApiDelegate::Options::max_number_delegated_partitions` to 3. + * Add capability to disable `NNAPI` CPU and check `NNAPI` Errno. + * Fix crashes when using `NNAPI` with target accelerator specified with model containing Conv2d or FullyConnected or LSTM nodes with quantized weights. + * Fix `ANEURALNETWORKS_BAD_DATA` execution failures with `sum`/`max`/`min`/`reduce` operations with `scalar` inputs. + * Hexagon + * TFLite Hexagon Delegate out of experimental. + * Experimental `int8` support for most hexagon ops. + * Experimental per-channel quant support for `conv` in Hexagon delegate. + * Support dynamic batch size in C++ API. + * CoreML + * Opensource CoreML delegate + * Misc + * Enable building Android TFLite targets on Windows + * Add support for `BatchMatMul`. + * Add support for `half_pixel_centers` with `ResizeNearestNeighbor`. + * Add 3D support for `BatchToSpaceND`. + * Add 5D support for `BroadcastSub`, `Maximum`, `Minimum`, `Transpose` and `BroadcastDiv`. + * Rename `kTfLiteActRelu1` to `kTfLiteActReluN1To1`. + * Enable flex delegate on tensorflow.lite.Interpreter Python package. + * Add `Buckettize`, `SparseCross` and `BoostedTreesBucketize` to the flex whitelist. + * Add support for selective registration of flex ops. + * Add missing kernels for flex delegate whitelisted ops. + * Fix issue when using direct `ByteBuffer` inputs with graphs that have dynamic shapes. + * Fix error checking supported operations in a model containing `HardSwish`. + +### Packaging Support + * Added `tf.sysconfig.get_build_info()`. Returns a dict that describes the build environment of the currently installed TensorFlow package, e.g. the NVIDIA CUDA and NVIDIA CuDNN versions used when TensorFlow was built. + +### Profiler + * Fix a subtle use-after-free issue in `XStatVisitor::RefValue()`. + +### TPU Enhancements + * Adds 3D mesh support in TPU configurations ops. + * Added TPU code for `FTRL` with `multiply_linear_by_lr`. + * Silently adds a new file system registry at `gstpu`. + * Support `restartType` in cloud tpu client. + * Depend on a specific version of google-api-python-client. + * Fixes apiclient import. + +### Tracing and Debugging + * Add a `TFE_Py_Execute` traceme. + +### XLA Support + * Implement stable `argmin` and `argmax` + +## Thanks to our Contributors + +This release contains contributions from many people at Google, as well as: + +902449@58880@bigcat_chen@ASIC, Abdul Baseer Khan, Abhineet Choudhary, Abolfazl Shahbazi, Adam Hillier, ag.ramesh, Agoniii, Ajay P, Alex Hoffman, Alexander Bayandin, Alexander Grund, Alexandre Abadie, Alexey Rogachevskiy, amoitra, Andrew Stevens, Angus-Luo, Anshuman Tripathy, Anush Elangovan, Artem Mavrin, Ashutosh Hathidara, autoih, Ayushman Kumar, ayushmankumar7, Bairen Yi, Bas Aarts, Bastian Eichenberger, Ben Barsdell, bhack, Bharat Raghunathan, Biagio Montaruli, Bigcat-Himax, blueyi, Bryan Cutler, Byambaa, Carlos Hernandez-Vaquero, Chen Lei, Chris Knorowski, Christian Clauss, chuanqiw, CuiYifeng, Daniel Situnayake, Daria Zhuravleva, Dayananda-V, Deven Desai, Devi Sandeep Endluri, Dmitry Zakharov, Dominic Jack, Duncan Riach, Edgar Liberis, Ehsan Toosi, ekuznetsov139, Elena Zhelezina, Eugene Kuznetsov, Eugene Mikhantiev, Evgenii Zheltonozhskii, Fabio Di Domenico, Fausto Morales, Fei Sun, feihugis, Felix E. Klee, flyingcat, Frederic Bastien, Fredrik Knutsson, frreiss, fsx950223, ganler, Gaurav Singh, Georgios Pinitas, Gian Marco Iodice, Giorgio Arena, Giuseppe Rossini, Gregory Keith, Guozhong Zhuang, gurushantj, Hahn Anselm, Harald Husum, Harjyot Bagga, Hristo Vrigazov, Ilya Persky, Ir1d, Itamar Turner-Trauring, jacco, Jake Tae, Janosh Riebesell, Jason Zaman, jayanth, Jeff Daily, Jens Elofsson, Jinzhe Zeng, JLZ, Jonas Skog, Jonathan Dekhtiar, Josh Meyer, Joshua Chia, Judd, justkw, Kaixi Hou, Kam D Kasravi, Kamil Rakoczy, Karol Gugala, Kayou, Kazuaki Ishizaki, Keith Smiley, Khaled Besrour, Kilaru Yasaswi Sri Chandra Gandhi, Kim, Young Soo, Kristian Hartikainen, Kwabena W. Agyeman, Leslie-Fang, Leslie-Fang-Intel, Li, Guizi, Lukas Geiger, Lutz Roeder, M\U00E5Ns Nilsson, Mahmoud Abuzaina, Manish, Marcel Koester, Marcin Sielski, marload, Martin Jul, Matt Conley, mdfaijul, Meng, Peng, Meteorix, Michael Käufl, Michael137, Milan Straka, Mitchell Vitez, Ml-0, Mokke Meguru, Mshr-H, nammbash, Nathan Luehr, naumkin, Neeraj Bhadani, ngc92, Nick Morgan, nihui, Niranjan Hasabnis, Niranjan Yadla, Nishidha Panpaliya, Oceania2018, oclyke, Ouyang Jin, OverLordGoldDragon, Owen Lyke, Patrick Hemmer, Paul Andrey, Peng Sun, periannath, Phil Pearl, Prashant Dandriyal, Prashant Kumar, Rahul Huilgol, Rajan Singh, Rajeshwar Reddy T, rangjiaheng, Rishit Dagli, Rohan Reddy, rpalakkal, rposts, Ruan Kunliang, Rushabh Vasani, Ryohei Ikegami, Semun Lee, Seo-Inyoung, Sergey Mironov, Sharada Shiddibhavi, ShengYang1, Shraiysh Vaishay, Shunya Ueta, shwetaoj, Siyavash Najafzade, Srinivasan Narayanamoorthy, Stephan Uphoff, storypku, sunchenggen, sunway513, Sven-Hendrik Haase, Swapnil Parekh, Tamas Bela Feher, Teng Lu, tigertang, tomas, Tomohiro Ubukata, tongxuan.ltx, Tony Tonev, Tzu-Wei Huang, Téo Bouvard, Uday Bondhugula, Vaibhav Jade, Vijay Tadikamalla, Vikram Dattu, Vincent Abriou, Vishnuvardhan Janapati, Vo Van Nghia, VoVAllen, Will Battel, William D. Irons, wyzhao, Xiaoming (Jason) Cui, Xiaoquan Kong, Xinan Jiang, xutianming, Yair Ehrenwald, Yasir Modak, Yasuhiro Matsumoto, Yixing Fu, Yong Tang, Yuan Tang, zhaozheng09, Zilin Zhu, zilinzhu, 张志豪 # Release 2.1.1 From b34ccacfea8993c733e81823b492b0b5f38c53fb Mon Sep 17 00:00:00 2001 From: Jay Shi Date: Mon, 27 Jul 2020 17:21:54 -0700 Subject: [PATCH 1428/2522] [tf.data] Implementation of tf.data experiment roll out framework. PiperOrigin-RevId: 323475203 Change-Id: I4fc60053deda12b1536c17a2dfe941cf9113aed6 --- .../base_api/api_def_OptimizeDatasetV2.pbtxt | 32 ++++ tensorflow/core/kernels/data/BUILD | 1 + tensorflow/core/kernels/data/dataset_utils.cc | 130 ++++++++++++++++ tensorflow/core/kernels/data/dataset_utils.h | 12 ++ .../core/kernels/data/dataset_utils_test.cc | 137 ++++++++++++++++ .../core/kernels/data/optimize_dataset_op.cc | 58 ++++++- .../core/kernels/data/optimize_dataset_op.h | 9 ++ tensorflow/core/ops/dataset_ops.cc | 11 ++ tensorflow/core/platform/default/port.cc | 2 + tensorflow/core/platform/host_info.h | 6 +- .../kernel_tests/optimize_dataset_test.py | 147 +++++++++++++++--- .../kernel_tests/prefetch_with_slack_test.py | 4 +- .../optimize_dataset_serialization_test.py | 6 +- .../data/experimental/ops/optimization.py | 20 ++- .../experimental/ops/optimization_options.py | 53 +++++-- tensorflow/python/data/ops/dataset_ops.py | 105 ++++++++++--- tensorflow/python/data/util/options.py | 8 + .../api/golden/v1/tensorflow.raw_ops.pbtxt | 4 + .../api/golden/v2/tensorflow.raw_ops.pbtxt | 4 + 19 files changed, 676 insertions(+), 73 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_OptimizeDatasetV2.pbtxt diff --git a/tensorflow/core/api_def/base_api/api_def_OptimizeDatasetV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_OptimizeDatasetV2.pbtxt new file mode 100644 index 00000000000..a8e66499471 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_OptimizeDatasetV2.pbtxt @@ -0,0 +1,32 @@ +op { + graph_op_name: "OptimizeDatasetV2" + visibility: HIDDEN + in_arg { + name: "input_dataset" + description: < SelectOptimizations( + const string& job_name, const string& opt_ins_raw, + const string& opt_outs_raw, + const absl::flat_hash_map& live_experiments, + const std::vector& optimizations_enabled, + const std::vector& optimizations_disabled, + const std::vector& optimizations_default, + std::function hash_func) { + // Creates a set of optimizations. + absl::flat_hash_set optimizations_set; + + // Creates the opt in and opt out settings. + std::vector opt_ins, opt_outs; + if (opt_ins_raw == "all") { + for (auto& pair : live_experiments) { + opt_ins.push_back(pair.first); + } + } else { + opt_ins = str_util::Split(opt_ins_raw, ',', str_util::SkipEmpty()); + } + if (opt_outs_raw == "all") { + for (auto& pair : live_experiments) { + opt_outs.push_back(pair.first); + } + } else { + opt_outs = str_util::Split(opt_outs_raw, ',', str_util::SkipEmpty()); + } + + // Checks if the opt in and opt out experiments are live experiments. + for (auto& optimization : opt_ins) { + if (live_experiments.find(optimization) == live_experiments.end()) { + LOG(WARNING) << "The experiment \"" << optimization + << "\" is opted in but it is not a live experiment."; + } + } + for (auto& optimization : opt_outs) { + if (live_experiments.find(optimization) == live_experiments.end()) { + LOG(WARNING) << "The experiment \"" << optimization + << "\" is opted out but it is not a live experiment."; + } + } + + // Checks if the opt in settings conflict with opt out settings. + for (auto& optimization : opt_ins) { + if (std::find(opt_outs.begin(), opt_outs.end(), optimization) != + opt_outs.end()) { + LOG(WARNING) << "The experiment \"" << optimization + << "\" is set in both \"TF_DATA_EXPERIMENT_OPT_IN\" and " + "\"TF_DATA_EXPERIMENT_OPT_OUT\". Unless the experiment " + "corresponds to an explicitly enabled optimization, it " + "is not applied."; + } + } + + // Checks if the enable/disable settings from tf.data.Options conflict with + // user opt in/out settings. In which case we assume tf.data.Options settings + // have higher priority to overwrite. + for (auto& optimization : optimizations_enabled) { + if (std::find(opt_outs.begin(), opt_outs.end(), optimization) != + opt_outs.end()) { + LOG(WARNING) << "The optimization \"" << optimization + << "\" is opt out, but is still applied since" + " it is enabled through tf.data.Options."; + } + } + for (auto& optimization : optimizations_disabled) { + if (std::find(opt_ins.begin(), opt_ins.end(), optimization) != + opt_ins.end()) { + LOG(WARNING) << "The optimization \"" << optimization + << "\" is opt in, but is not applied since" + " it is disabled through tf.data.Options."; + } + } + + // Add the enabled optimizations. + optimizations_set.insert(optimizations_enabled.begin(), + optimizations_enabled.end()); + + // Add the default optimizations that are not explicitly opted out. + for (auto& optimization : optimizations_default) { + if (std::find(opt_outs.begin(), opt_outs.end(), optimization) == + opt_outs.end()) { + optimizations_set.insert(optimization); + } + } + + // Add the live experiments stochastically if they are neither opted in nor + // opted out. + for (auto& pair : live_experiments) { + string experiment = pair.first; + // Skip experiments that are explicitly opted out. + if (std::find(opt_outs.begin(), opt_outs.end(), experiment) != + opt_outs.end()) { + continue; + } + // Skip experiments whose transformations are explicitly disabled. + if (std::find(optimizations_disabled.begin(), optimizations_disabled.end(), + experiment) != optimizations_disabled.end()) { + continue; + } + // Apply experiments that are explicitly opted in. + if (std::find(opt_ins.begin(), opt_ins.end(), experiment) != + opt_ins.end()) { + optimizations_set.insert(experiment); + continue; + } + // Otherwise, apply experiment stochastically based on job name and + // experiment roll out percentage. + if (hash_func(strings::StrCat(job_name, experiment)) % 100 < pair.second) { + optimizations_set.insert(experiment); + } + } + + // Log the experiments that will be applied. + if (VLOG_IS_ON(1)) { + for (auto& pair : live_experiments) { + string experiment = pair.first; + if (std::find(optimizations_set.begin(), optimizations_set.end(), + experiment) != optimizations_set.end()) { + VLOG(1) << "The experiment \"" << experiment << "\" is applied."; + } + } + } + + std::vector optimizations; + optimizations.insert(optimizations.end(), optimizations_set.begin(), + optimizations_set.end()); + return optimizations; +} + } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h index 5c6b14a8782..0fe3618f34b 100644 --- a/tensorflow/core/kernels/data/dataset_utils.h +++ b/tensorflow/core/kernels/data/dataset_utils.h @@ -304,6 +304,18 @@ class DummyResourceOp : public OpKernel { // MatchesAnyVersionRE("PaddedBatchDataset", "BatchDataset") == false bool MatchesAnyVersionRE(StringPiece op_prefix, StringPiece op_to_match); +// Based on `optimizations_enabled`, `optimizations_disabled`, and +// `optimizations_disabled`, returns the list of optimizations that will be +// applied. +std::vector SelectOptimizations( + const string& job_name, const string& opt_ins_raw, + const string& opt_outs_raw, + const absl::flat_hash_map& live_experiments, + const std::vector& optimizations_enabled, + const std::vector& optimizations_disabled, + const std::vector& optimizations_default, + std::function hash_func); + } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/kernels/data/dataset_utils_test.cc b/tensorflow/core/kernels/data/dataset_utils_test.cc index 1a6e673c3f3..a1f624faeb6 100644 --- a/tensorflow/core/kernels/data/dataset_utils_test.cc +++ b/tensorflow/core/kernels/data/dataset_utils_test.cc @@ -30,6 +30,8 @@ namespace tensorflow { namespace data { namespace { +using ::testing::UnorderedElementsAre; + class DatasetHashUtilsTest : public ::testing::Test { protected: uint64 GetHash(const FunctionDefLibrary& library, const FunctionDef& fn) { @@ -1131,6 +1133,141 @@ TEST_F(DatasetHashUtilsTest, HashStringTensor) { EXPECT_NE(GetHash(v1), GetHash(v3)); } +class SelectOptimizationsHashTest : public ::testing::TestWithParam {}; + +TEST_P(SelectOptimizationsHashTest, DatasetUtils) { + const uint64 hash_result = GetParam(); + string job_name = "job"; + const string opt_ins_raw = ""; + const string opt_outs_raw = ""; + auto hash_func = [hash_result](const string& str) { return hash_result; }; + absl::flat_hash_map live_experiments = { + {"exp1", 0}, {"exp2", 20}, {"exp3", 33}, {"exp4", 45}, + {"exp5", 67}, {"exp6", 88}, {"exp7", 100}}; + std::vector optimizations_enabled, optimizations_disabled, + optimizations_default; + std::vector optimizations = + SelectOptimizations(job_name, opt_ins_raw, opt_outs_raw, live_experiments, + optimizations_enabled, optimizations_disabled, + optimizations_default, hash_func); + + int tested_times = 0; + switch (hash_result) { + case 0: + case 100: + case 200: + tested_times++; + EXPECT_THAT(optimizations, UnorderedElementsAre("exp2", "exp3", "exp4", + "exp5", "exp6", "exp7")); + break; + case 33: + case 133: + tested_times++; + EXPECT_THAT(optimizations, + UnorderedElementsAre("exp4", "exp5", "exp6", "exp7")); + break; + case 67: + case 167: + tested_times++; + EXPECT_THAT(optimizations, UnorderedElementsAre("exp6", "exp7")); + break; + } + EXPECT_EQ(tested_times, 1); +} + +INSTANTIATE_TEST_SUITE_P(Test, SelectOptimizationsHashTest, + ::testing::Values(0, 33, 67, 100, 133, 167, 200)); + +class SelectOptimizationsOptTest + : public ::testing::TestWithParam> {}; + +TEST_P(SelectOptimizationsOptTest, DatasetUtils) { + string job_name = "job"; + const string opt_ins_raw = std::get<0>(GetParam()); + const string opt_outs_raw = std::get<1>(GetParam()); + auto hash_func = [](const string& str) { return 50; }; + absl::flat_hash_map live_experiments = { + {"exp1", 0}, {"exp2", 25}, {"exp3", 50}, {"exp4", 75}, {"exp5", 100}}; + std::vector optimizations_enabled, optimizations_disabled, + optimizations_default; + std::vector optimizations = + SelectOptimizations(job_name, opt_ins_raw, opt_outs_raw, live_experiments, + optimizations_enabled, optimizations_disabled, + optimizations_default, hash_func); + + int tested_times = 0; + if (opt_outs_raw == "all") { + EXPECT_THAT(optimizations, UnorderedElementsAre()); + tested_times++; + } else if (opt_outs_raw.empty()) { + if (opt_ins_raw == "all") { + EXPECT_THAT(optimizations, + UnorderedElementsAre("exp1", "exp2", "exp3", "exp4", "exp5")); + tested_times++; + } else if (opt_ins_raw.empty()) { + EXPECT_THAT(optimizations, UnorderedElementsAre("exp4", "exp5")); + tested_times++; + } else if (opt_ins_raw == "exp2,exp4") { + EXPECT_THAT(optimizations, UnorderedElementsAre("exp2", "exp4", "exp5")); + tested_times++; + } + } else if (opt_outs_raw == "exp1,exp5") { + if (opt_ins_raw == "all") { + EXPECT_THAT(optimizations, UnorderedElementsAre("exp2", "exp3", "exp4")); + tested_times++; + } else if (opt_ins_raw.empty()) { + EXPECT_THAT(optimizations, UnorderedElementsAre("exp4")); + tested_times++; + } else if (opt_ins_raw == "exp2,exp4") { + EXPECT_THAT(optimizations, UnorderedElementsAre("exp2", "exp4")); + tested_times++; + } + } + EXPECT_EQ(tested_times, 1); +} + +INSTANTIATE_TEST_SUITE_P( + Test, SelectOptimizationsOptTest, + ::testing::Combine(::testing::Values("all", "", "exp2,exp4"), + ::testing::Values("all", "", "exp1,exp5"))); + +class SelectOptimizationsConflictTest + : public ::testing::TestWithParam> {}; + +TEST_P(SelectOptimizationsConflictTest, DatasetUtils) { + string job_name = "job"; + const string opt_ins_raw = std::get<0>(GetParam()); + const string opt_outs_raw = std::get<1>(GetParam()); + const uint64 hash_result = std::get<2>(GetParam()); + auto hash_func = [hash_result](const string& str) { return hash_result; }; + absl::flat_hash_map live_experiments = { + {"exp1", 20}, {"exp2", 30}, {"exp3", 40}, + {"exp4", 60}, {"exp5", 70}, {"exp6", 80}}; + std::vector optimizations_enabled = {"exp1", "exp4"}, + optimizations_disabled = {"exp2", "exp5"}, + optimizations_default = {"exp3", "exp6"}; + std::vector optimizations = + SelectOptimizations(job_name, opt_ins_raw, opt_outs_raw, live_experiments, + optimizations_enabled, optimizations_disabled, + optimizations_default, hash_func); + + int tested_times = 0; + if (opt_outs_raw.empty()) { + EXPECT_THAT(optimizations, + UnorderedElementsAre("exp1", "exp3", "exp4", "exp6")); + tested_times++; + } else if (opt_outs_raw == "exp1,exp3") { + EXPECT_THAT(optimizations, UnorderedElementsAre("exp1", "exp4", "exp6")); + tested_times++; + } + EXPECT_EQ(tested_times, 1); +} + +INSTANTIATE_TEST_SUITE_P(Test, SelectOptimizationsConflictTest, + ::testing::Combine(::testing::Values("", "exp2"), + ::testing::Values("", "exp1,exp3"), + ::testing::Values(10, 50, 90))); + } // namespace } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/kernels/data/optimize_dataset_op.cc b/tensorflow/core/kernels/data/optimize_dataset_op.cc index c976a8f7b08..a0101435794 100644 --- a/tensorflow/core/kernels/data/optimize_dataset_op.cc +++ b/tensorflow/core/kernels/data/optimize_dataset_op.cc @@ -18,8 +18,10 @@ limitations under the License. #include "tensorflow/core/framework/partial_tensor_shape.h" #include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/data/dataset_utils.h" #include "tensorflow/core/kernels/data/rewrite_utils.h" #include "tensorflow/core/lib/random/random.h" +#include "tensorflow/core/platform/host_info.h" #include "tensorflow/core/protobuf/rewriter_config.pb.h" namespace tensorflow { @@ -31,10 +33,18 @@ namespace data { /* static */ constexpr const char* const OptimizeDatasetOp::kDatasetType; /* static */ constexpr const char* const OptimizeDatasetOp::kInputDataset; /* static */ constexpr const char* const OptimizeDatasetOp::kOptimizations; +/* static */ constexpr const char* const + OptimizeDatasetOp::kOptimizationsEnabled; +/* static */ constexpr const char* const + OptimizeDatasetOp::kOptimizationsDisabled; +/* static */ constexpr const char* const + OptimizeDatasetOp::kOptimizationsDefault; /* static */ constexpr const char* const OptimizeDatasetOp::kOutputTypes; /* static */ constexpr const char* const OptimizeDatasetOp::kOutputShapes; /* static */ constexpr const char* const OptimizeDatasetOp::kOptimizationConfigs; +/* static */ constexpr const char* const OptimizeDatasetOp::kOptimizeDatasetV1; +/* static */ constexpr const char* const OptimizeDatasetOp::kOptimizeDatasetV2; constexpr char kOptimizerName[] = "tf_data_meta_optimizer"; constexpr char kOptimizers[] = "optimizers"; @@ -42,6 +52,12 @@ constexpr char kOptimizerConfigs[] = "optimizer_configs"; OptimizeDatasetOp::OptimizeDatasetOp(OpKernelConstruction* ctx) : UnaryDatasetOpKernel(ctx) { + auto& op_name = ctx->def().op(); + if (op_name == kOptimizeDatasetV1) { + op_version_ = 1; + } else if (op_name == kOptimizeDatasetV2) { + op_version_ = 2; + } OP_REQUIRES_OK(ctx, ctx->GetAttr(kOptimizationConfigs, &optimization_configs_)); } @@ -49,8 +65,44 @@ OptimizeDatasetOp::OptimizeDatasetOp(OpKernelConstruction* ctx) void OptimizeDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase* input, DatasetBase** output) { std::vector optimizations; - OP_REQUIRES_OK( - ctx, ParseVectorArgument(ctx, kOptimizations, &optimizations)); + if (op_version_ == 1) { + OP_REQUIRES_OK( + ctx, ParseVectorArgument(ctx, kOptimizations, &optimizations)); + } else if (op_version_ == 2) { + std::vector optimizations_enabled, optimizations_disabled, + optimizations_default; + OP_REQUIRES_OK(ctx, ParseVectorArgument(ctx, kOptimizationsEnabled, + &optimizations_enabled)); + OP_REQUIRES_OK(ctx, + ParseVectorArgument(ctx, kOptimizationsDisabled, + &optimizations_disabled)); + OP_REQUIRES_OK(ctx, ParseVectorArgument(ctx, kOptimizationsDefault, + &optimizations_default)); + + string job_name = port::JobName(); + if (job_name.empty()) { + // If `job_name` is empty, apply the enabled and default optimizations + // directly. + optimizations.insert(optimizations.end(), optimizations_enabled.begin(), + optimizations_enabled.end()); + optimizations.insert(optimizations.end(), optimizations_default.begin(), + optimizations_default.end()); + } else { + // The map that stores the experiment names and for how much percentage + // of the jobs, the experiments will be randomly turned on. + // + // This is currently empty; we have no live experiments yet. + absl::flat_hash_map live_experiments; + + const string opt_ins_raw = std::getenv("TF_DATA_EXPERIMENT_OPT_IN"); + const string opt_outs_raw = std::getenv("TF_DATA_EXPERIMENT_OPT_OUT"); + auto hash_func = [](const string& str) { return Hash64(str); }; + optimizations = SelectOptimizations( + job_name, opt_ins_raw, opt_outs_raw, live_experiments, + optimizations_enabled, optimizations_disabled, optimizations_default, + hash_func); + } + } auto config_factory = [this, &optimizations]() { return CreateConfig(optimizations, optimization_configs_); @@ -95,6 +147,8 @@ RewriterConfig OptimizeDatasetOp::CreateConfig( namespace { REGISTER_KERNEL_BUILDER(Name("OptimizeDataset").Device(DEVICE_CPU), OptimizeDatasetOp); +REGISTER_KERNEL_BUILDER(Name("OptimizeDatasetV2").Device(DEVICE_CPU), + OptimizeDatasetOp); } // namespace } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/kernels/data/optimize_dataset_op.h b/tensorflow/core/kernels/data/optimize_dataset_op.h index a5fcc72260d..d9e366f1ad5 100644 --- a/tensorflow/core/kernels/data/optimize_dataset_op.h +++ b/tensorflow/core/kernels/data/optimize_dataset_op.h @@ -25,10 +25,18 @@ class OptimizeDatasetOp : public UnaryDatasetOpKernel { static constexpr const char* const kDatasetType = "Optimize"; static constexpr const char* const kInputDataset = "input_dataset"; static constexpr const char* const kOptimizations = "optimizations"; + static constexpr const char* const kOptimizationsEnabled = + "optimizations_enabled"; + static constexpr const char* const kOptimizationsDisabled = + "optimizations_disabled"; + static constexpr const char* const kOptimizationsDefault = + "optimizations_default"; static constexpr const char* const kOutputTypes = "output_types"; static constexpr const char* const kOutputShapes = "output_shapes"; static constexpr const char* const kOptimizationConfigs = "optimization_configs"; + static constexpr const char* const kOptimizeDatasetV1 = "OptimizeDataset"; + static constexpr const char* const kOptimizeDatasetV2 = "OptimizeDatasetV2"; explicit OptimizeDatasetOp(OpKernelConstruction* ctx); @@ -41,6 +49,7 @@ class OptimizeDatasetOp : public UnaryDatasetOpKernel { std::vector optimizations_configs); std::vector optimization_configs_; + int op_version_ = 0; }; } // namespace data diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index 4f750cc938d..6ef5635e95a 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -837,6 +837,17 @@ REGISTER_OP("OptimizeDataset") .Attr("optimization_configs: list(string) = []") .SetShapeFn(shape_inference::ScalarShape); +REGISTER_OP("OptimizeDatasetV2") + .Input("input_dataset: variant") + .Input("optimizations_enabled: string") + .Input("optimizations_disabled: string") + .Input("optimizations_default: string") + .Output("handle: variant") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .Attr("optimization_configs: list(string) = []") + .SetShapeFn(shape_inference::ScalarShape); + REGISTER_OP("OptionalFromValue") .Input("components: Toutput_types") .Output("optional: variant") diff --git a/tensorflow/core/platform/default/port.cc b/tensorflow/core/platform/default/port.cc index 11b3cd7fd9a..5b96eec072c 100644 --- a/tensorflow/core/platform/default/port.cc +++ b/tensorflow/core/platform/default/port.cc @@ -61,6 +61,8 @@ string Hostname() { return string(hostname); } +string JobName() { return ""; } + int NumSchedulableCPUs() { #if defined(__linux__) && !defined(__ANDROID__) cpu_set_t cpuset; diff --git a/tensorflow/core/platform/host_info.h b/tensorflow/core/platform/host_info.h index e76b83adf34..3447b2e0330 100644 --- a/tensorflow/core/platform/host_info.h +++ b/tensorflow/core/platform/host_info.h @@ -21,9 +21,13 @@ limitations under the License. namespace tensorflow { namespace port { -// Return the hostname of the machine on which this process is running +// Return the hostname of the machine on which this process is running. string Hostname(); +// Return the job name as a string if it exists, otherwise return an empty +// string. +string JobName(); + } // namespace port } // namespace tensorflow diff --git a/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py index 59e41528ea4..e26e97dbd97 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py @@ -225,11 +225,14 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): optimized_it = dataset_ops.make_initializable_iterator(optimized_dataset) self.assertGreaterEqual(len(w), 1) - expected = ("tf.data graph rewrites are not compatible with " - "tf.Variable. The following rewrites will be disabled: %s." - " To enable rewrites, use resource variables instead by " - "calling `tf.enable_resource_variables()` at the start of the " - "program." % (", ".join(options._graph_rewrites()))) + graph_rewrites = options._graph_rewrites() + expected = ( + "tf.data graph rewrites are not compatible with " + "tf.Variable. The following rewrites will be disabled: %s." + " To enable rewrites, use resource variables instead by " + "calling `tf.enable_resource_variables()` at the start of the " + "program." % + (", ".join(graph_rewrites.enabled + graph_rewrites.default))) self.assertTrue(any(expected in str(warning) for warning in w)) # Check that outputs are the same in the optimized and unoptimized cases, @@ -251,34 +254,136 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): break @combinations.generate(test_base.default_test_combinations()) - def testOptimizationEnabledByDefault(self): - """Tests that some optimizations are applied to datasets by default.""" + def testOptimizationDefault(self): + """Tests the optimization settings by default.""" options = dataset_ops.Options() - expected_optimizations = [ + expected_optimizations_enabled = [] + expected_optimizations_disabled = [] + expected_optimizations_default = [ "map_and_batch_fusion", "noop_elimination", "shuffle_and_repeat_fusion", ] - self.assertEqual( - set(options._graph_rewrites()), set(expected_optimizations)) + graph_rewrites = options._graph_rewrites() + self.assertEqual(set(graph_rewrites.enabled), + set(expected_optimizations_enabled)) + self.assertEqual(set(graph_rewrites.disabled), + set(expected_optimizations_disabled)) + self.assertEqual(set(graph_rewrites.default), + set(expected_optimizations_default)) + + options.experimental_optimization.apply_default_optimizations = True + graph_rewrites = options._graph_rewrites() + self.assertEqual(set(graph_rewrites.enabled), + set(expected_optimizations_enabled)) + self.assertEqual(set(graph_rewrites.disabled), + set(expected_optimizations_disabled)) + self.assertEqual(set(graph_rewrites.default), + set(expected_optimizations_default)) + + options.experimental_optimization.apply_default_optimizations = False + expected_optimizations_default = [] + graph_rewrites = options._graph_rewrites() + self.assertEqual(set(graph_rewrites.enabled), + set(expected_optimizations_enabled)) + self.assertEqual(set(graph_rewrites.disabled), + set(expected_optimizations_disabled)) + self.assertEqual(set(graph_rewrites.default), + set(expected_optimizations_default)) @combinations.generate(test_base.default_test_combinations()) - def testOptimizationDisableDefault(self): - """Tests that we can disable all graph optimizations enabled by default. - - If the `apply_default_optimizations` optimization options flag is False, - only explicitly enabled optimizations will be applied. - """ + def testOptimizationEnabled(self): + """Tests the optimization settings by enabling all.""" options = dataset_ops.Options() - options.experimental_optimization.apply_default_optimizations = False + options.experimental_optimization.filter_fusion = True + options.experimental_optimization.filter_with_random_uniform_fusion = True options.experimental_optimization.hoist_random_uniform = True + options.experimental_optimization.map_and_batch_fusion = True + options.experimental_optimization.map_and_filter_fusion = True + options.experimental_optimization.map_parallelization = True + options.experimental_optimization.map_fusion = True options.experimental_optimization.noop_elimination = True - expected_optimizations = [ + options.experimental_optimization.parallel_batch = True + options.experimental_optimization.shuffle_and_repeat_fusion = True + options.experimental_optimization.map_vectorization.enabled = True + options.experimental_optimization.autotune_buffers = True + options.experimental_deterministic = False + options.experimental_stats.latency_all_edges = True + options.experimental_slack = True + + expected_optimizations_enabled = [ + "filter_fusion", + "filter_with_random_uniform_fusion", "hoist_random_uniform", + "map_and_batch_fusion", + "map_and_filter_fusion", + "map_parallelization", + "map_fusion", "noop_elimination", + "parallel_batch", + "shuffle_and_repeat_fusion", + "map_vectorization", + "inject_prefetch", + "make_sloppy", + "latency_all_edges", + "slack", ] - self.assertEqual( - set(options._graph_rewrites()), set(expected_optimizations)) + expected_optimizations_disabled = [] + expected_optimizations_default = [] + graph_rewrites = options._graph_rewrites() + self.assertEqual(set(graph_rewrites.enabled), + set(expected_optimizations_enabled)) + self.assertEqual(set(graph_rewrites.disabled), + set(expected_optimizations_disabled)) + self.assertEqual(set(graph_rewrites.default), + set(expected_optimizations_default)) + + @combinations.generate(test_base.default_test_combinations()) + def testOptimizationDisabled(self): + """Tests the optimization settings by disabling all.""" + options = dataset_ops.Options() + options.experimental_optimization.filter_fusion = False + options.experimental_optimization.filter_with_random_uniform_fusion = False + options.experimental_optimization.hoist_random_uniform = False + options.experimental_optimization.map_and_batch_fusion = False + options.experimental_optimization.map_and_filter_fusion = False + options.experimental_optimization.map_parallelization = False + options.experimental_optimization.map_fusion = False + options.experimental_optimization.noop_elimination = False + options.experimental_optimization.parallel_batch = False + options.experimental_optimization.shuffle_and_repeat_fusion = False + options.experimental_optimization.map_vectorization.enabled = False + options.experimental_optimization.autotune = False + options.experimental_deterministic = True + options.experimental_stats.latency_all_edges = False + options.experimental_slack = False + + expected_optimizations_enabled = [] + expected_optimizations_disabled = [ + "filter_fusion", + "filter_with_random_uniform_fusion", + "hoist_random_uniform", + "map_and_batch_fusion", + "map_and_filter_fusion", + "map_parallelization", + "map_fusion", + "noop_elimination", + "parallel_batch", + "shuffle_and_repeat_fusion", + "map_vectorization", + "inject_prefetch", + "make_sloppy", + "latency_all_edges", + "slack", + ] + expected_optimizations_default = [] + graph_rewrites = options._graph_rewrites() + self.assertEqual(set(graph_rewrites.enabled), + set(expected_optimizations_enabled)) + self.assertEqual(set(graph_rewrites.disabled), + set(expected_optimizations_disabled)) + self.assertEqual(set(graph_rewrites.default), + set(expected_optimizations_default)) @combinations.generate(test_base.default_test_combinations()) def testAutotuningDefaults(self): @@ -295,7 +400,7 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): def testAutotuningBufferSizes(self): options = dataset_ops.Options() options.experimental_optimization.autotune_buffers = True - self.assertIn("inject_prefetch", options._graph_rewrites()) + self.assertIn("inject_prefetch", options._graph_rewrites().enabled) autotune, algorithm, cpu_budget = options._autotune_settings() self.assertTrue(autotune) self.assertEqual(algorithm, diff --git a/tensorflow/python/data/experimental/kernel_tests/prefetch_with_slack_test.py b/tensorflow/python/data/experimental/kernel_tests/prefetch_with_slack_test.py index ff1f1680a76..cbff39b90e5 100644 --- a/tensorflow/python/data/experimental/kernel_tests/prefetch_with_slack_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/prefetch_with_slack_test.py @@ -45,7 +45,7 @@ class PrefetchWithSlackTest(test_base.DatasetTestBase, parameterized.TestCase): multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator( dataset, ["/cpu:1", "/cpu:2"]) dataset = multi_device_iterator._dataset # pylint: disable=protected-access - self.assertIn("slack", dataset.options()._graph_rewrites()) + self.assertIn("slack", dataset.options()._graph_rewrites().enabled) self.assertIn("slack:slack_period:2", dataset.options()._graph_rewrite_configs()) @@ -69,7 +69,7 @@ class PrefetchWithSlackTest(test_base.DatasetTestBase, parameterized.TestCase): options = dataset_ops.Options() options.experimental_slack = True dataset = dataset.with_options(options) - self.assertIn("slack", dataset.options()._graph_rewrites()) + self.assertIn("slack", dataset.options()._graph_rewrites().enabled) self.assertIn("slack:slack_period:1", dataset.options()._graph_rewrite_configs()) self.assertDatasetProduces(dataset, range(10)) diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py index 385b1acd49c..30d53165f85 100644 --- a/tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py @@ -36,7 +36,8 @@ class OptimizeDatasetSerializationTest( def build_dataset(num_elements, batch_size): return dataset_ops.Dataset.range(num_elements).map(lambda x: x * x).batch( - batch_size).apply(optimization.optimize(["map_and_batch_fusion"])) + batch_size).apply( + optimization.optimize(["map_and_batch_fusion"], None, None)) self.run_core_tests(lambda: build_dataset(200, 10), 20) @@ -50,7 +51,8 @@ class OptimizeDatasetSerializationTest( dataset = dataset.batch(5) # map_vectorization adds a new vectorized function to the function # library. - dataset = dataset.apply(optimization.optimize(["map_vectorization"])) + dataset = dataset.apply( + optimization.optimize(["map_vectorization"], None, None)) return dataset self.run_core_tests(build_dataset, 20) diff --git a/tensorflow/python/data/experimental/ops/optimization.py b/tensorflow/python/data/experimental/ops/optimization.py index 4581a612ed6..161850521de 100644 --- a/tensorflow/python/data/experimental/ops/optimization.py +++ b/tensorflow/python/data/experimental/ops/optimization.py @@ -36,13 +36,19 @@ def model(): return _apply_fn -def optimize(optimizations=None): +def optimize(optimizations_enabled=None, optimizations_disabled=None, + optimizations_default=None): """A transformation that applies optimizations. Args: - optimizations: (Optional.) A `tf.string` vector `tf.Tensor` identifying - optimizations to use. If not specified, the default set of optimizations - is applied. + optimizations_enabled: (Optional.) A `tf.string` vector `tf.Tensor` + identifying enabled optimizations. If not specified, set to be empty. + + optimizations_disabled: (Optional.) A `tf.string` vector `tf.Tensor` + identifying disabled optimizations. If not specified, set to be empty. + + optimizations_default: (Optional.) A `tf.string` vector `tf.Tensor` + identifying default optimizations. If not specified, set to be empty. Returns: A `Dataset` transformation function, which can be passed to @@ -51,7 +57,11 @@ def optimize(optimizations=None): def _apply_fn(dataset): """Function from `Dataset` to `Dataset` that applies the transformation.""" - return dataset_ops._OptimizeDataset(dataset, optimizations) # pylint: disable=protected-access + return dataset_ops._OptimizeDataset( # pylint: disable=protected-access + dataset, + optimizations_enabled, + optimizations_disabled, + optimizations_default) return _apply_fn diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py index ab1c7b73212..fa7a0d23dea 100644 --- a/tensorflow/python/data/experimental/ops/optimization_options.py +++ b/tensorflow/python/data/experimental/ops/optimization_options.py @@ -53,9 +53,13 @@ class MapVectorizationOptions(options.OptionsBase): "defaults to False.") def _graph_rewrites(self): - if self.enabled: - return ["map_vectorization"] - return [] + graph_rewrites = options.graph_rewrites() + result = graph_rewrites(enabled=[], disabled=[], default=[]) + if self.enabled is True: # pylint: disable=g-bool-id-comparison + result.enabled.append("map_vectorization") + elif self.enabled is False: # pylint: disable=g-bool-id-comparison + result.disabled.append("map_vectorization") + return result def _graph_rewrite_configs(self): if not self.enabled: @@ -229,8 +233,20 @@ class OptimizationOptions(options.OptionsBase): return autotune, algorithm, cpu_budget def _graph_rewrites(self): - """Produces the list of enabled graph optimizations.""" - result = set() + """Produces lists of enabled, disabled and default graph optimizations. + + Returns: + result: a namedtuple with three attributes. `result.enabled` is the list + of user enabled optimizations. `result.disabled` is the list of user + disabled optimizations. `result.default` is the list of optimizations + that are enabled by default (the user has not explicitly enabled or + disabled them). + """ + if self.map_vectorization is not None: + result = self.map_vectorization._graph_rewrites() # pylint: disable=protected-access + else: + result = MapVectorizationOptions()._graph_rewrites() # pylint: disable=protected-access + all_optimizations = [ "filter_fusion", "filter_with_random_uniform_fusion", @@ -244,11 +260,8 @@ class OptimizationOptions(options.OptionsBase): "reorder_data_discarding_ops", "shuffle_and_repeat_fusion", ] - for optimization in all_optimizations: - if getattr(self, optimization): - result.add(optimization) - if self.apply_default_optimizations is not False: + if self.apply_default_optimizations is not False: # pylint: disable=g-bool-id-comparison # The following optimizations are turned on by default, unless the user # explicitly disables them. optimizations_to_disable = [ @@ -257,21 +270,29 @@ class OptimizationOptions(options.OptionsBase): "shuffle_and_repeat_fusion", ] for optimization in optimizations_to_disable: - if getattr(self, optimization) is not False: - result.add(optimization) + if getattr(self, optimization) is None: + result.default.append(optimization) - if self.map_vectorization is not None: - result.update(self.map_vectorization._graph_rewrites()) # pylint: disable=protected-access + # Each of these attributes on the Options object is either True (explicitly + # enabled), False (explicitly disabled), or None (default). + for optimization in all_optimizations: + if getattr(self, optimization) is True: # pylint: disable=g-bool-id-comparison + result.enabled.append(optimization) + elif getattr(self, optimization) is False: # pylint: disable=g-bool-id-comparison + result.disabled.append(optimization) autotune_buffers = self._autotune_buffers() - if self.autotune is not False and autotune_buffers: # pylint: disable=g-bool-id-comparison + if self.autotune is not False and autotune_buffers is True: # pylint: disable=g-bool-id-comparison # When autotuning buffer sizes is enabled, we inject a `prefetch` # transformation after asynchronous dataset ops. Only the buffer sizes of # prefetch transformations will be autotuned, though this is practically # equivalent to tuning the buffer sizes of the other asynchronous # transformations. - result.add("inject_prefetch") - return sorted(list(result)) + result.enabled.append("inject_prefetch") + if self.autotune is False: # pylint: disable=g-bool-id-comparison + result.disabled.append("inject_prefetch") + + return result def _graph_rewrite_configs(self): if self.map_vectorization is not None: diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 491eb031931..bd75d0a735a 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -30,11 +30,13 @@ from six.moves import queue as Queue # pylint: disable=redefined-builtin from tensorflow.core.framework import graph_pb2 from tensorflow.python import tf2 +from tensorflow.python.compat import compat from tensorflow.python.data.experimental.ops import distribute_options from tensorflow.python.data.experimental.ops import optimization_options from tensorflow.python.data.experimental.ops import stats_options from tensorflow.python.data.experimental.ops import threading_options from tensorflow.python.data.ops import iterator_ops +from tensorflow.python.data.util import convert from tensorflow.python.data.util import nest from tensorflow.python.data.util import options as options_lib from tensorflow.python.data.util import random_seed @@ -374,16 +376,18 @@ class DatasetV2(collections_abc.Iterable, tracking_base.Trackable, graph_rewrites = options._graph_rewrites() graph_rewrite_configs = options._graph_rewrite_configs() # pylint: enable=protected-access - if graph_rewrites: + if graph_rewrites.enabled or graph_rewrites.default: if self._has_captured_ref(): warnings.warn( "tf.data graph rewrites are not compatible with tf.Variable. " "The following rewrites will be disabled: %s. To enable " "rewrites, use resource variables instead by calling " "`tf.enable_resource_variables()` at the start of the program." % - ", ".join(graph_rewrites)) + ", ".join(graph_rewrites.enabled + graph_rewrites.default)) else: - dataset = _OptimizeDataset(dataset, graph_rewrites, + dataset = _OptimizeDataset(dataset, graph_rewrites.enabled, + graph_rewrites.disabled, + graph_rewrites.default, graph_rewrite_configs) # (3) Apply autotune options @@ -2887,22 +2891,39 @@ class Options(options_lib.OptionsBase): "is being captured.") def _graph_rewrites(self): - """Produces the list of enabled static graph rewrites.""" - result = [] + """Produces lists of enabled, disabled, default static graph rewrites. + + Returns: + result: a namedtuple with three attributes. `result.enabled` is the list + of user enabled graph rewrites. `result.disabled` is the list of user + disabled graph rewrites. `result.default` is the list of graph + rewrites that are enabled by default (the user has not explicitly + enabled or disabled them). + """ if self.experimental_optimization is not None: - result.extend(self.experimental_optimization._graph_rewrites()) # pylint: disable=protected-access + result = self.experimental_optimization._graph_rewrites() # pylint: disable=protected-access else: # Apply default options - result.extend( - optimization_options.OptimizationOptions()._graph_rewrites()) # pylint: disable=protected-access + result = optimization_options.OptimizationOptions()._graph_rewrites() # pylint: disable=protected-access if self.experimental_deterministic is False: # pylint: disable=g-bool-id-comparison - result.append("make_sloppy") - if self.experimental_stats and self.experimental_stats.latency_all_edges: - result.append("latency_all_edges") - if self.experimental_slack: - result.append("slack") - return result + result.enabled.append("make_sloppy") + elif self.experimental_deterministic is True: # pylint: disable=g-bool-id-comparison + result.disabled.append("make_sloppy") + if self.experimental_stats: + if self.experimental_stats.latency_all_edges is True: # pylint: disable=g-bool-id-comparison + result.enabled.append("latency_all_edges") + elif self.experimental_stats.latency_all_edges is False: # pylint: disable=g-bool-id-comparison + result.disabled.append("latency_all_edges") + if self.experimental_slack is True: # pylint: disable=g-bool-id-comparison + result.enabled.append("slack") + elif self.experimental_slack is False: # pylint: disable=g-bool-id-comparison + result.disabled.append("slack") + + graph_rewrites = options_lib.graph_rewrites() + return graph_rewrites(enabled=list(set(result.enabled)), + disabled=list(set(result.disabled)), + default=list(set(result.default))) def _graph_rewrite_configs(self): """Produces the list of configurations for enabled graph optimizations.""" @@ -4387,19 +4408,55 @@ class _ModelDataset(UnaryUnchangedStructureDataset): class _OptimizeDataset(UnaryUnchangedStructureDataset): """A `Dataset` that acts as an identity, and applies optimizations.""" - def __init__(self, input_dataset, optimizations, optimization_configs=None): + def __init__(self, + input_dataset, + optimizations_enabled, + optimizations_disabled, + optimizations_default, + optimization_configs=None): self._input_dataset = input_dataset - if optimizations is None: - optimizations = [] if optimization_configs is None: optimization_configs = [] - self._optimizations = ops.convert_to_tensor( - optimizations, dtype=dtypes.string, name="optimizations") - variant_tensor = gen_dataset_ops.optimize_dataset( - input_dataset._variant_tensor, # pylint: disable=protected-access - self._optimizations, - optimization_configs=optimization_configs, - **self._flat_structure) + + if compat.forward_compatible(2020, 8, 6): + self._optimizations_enabled = convert.optional_param_to_tensor( + argument_name="optimizations_enabled", + argument_value=optimizations_enabled, + argument_default=[], + argument_dtype=dtypes.string) + self._optimizations_disabled = convert.optional_param_to_tensor( + argument_name="optimizations_disabled", + argument_value=optimizations_disabled, + argument_default=[], + argument_dtype=dtypes.string) + self._optimizations_default = convert.optional_param_to_tensor( + argument_name="optimizations_default", + argument_value=optimizations_default, + argument_default=[], + argument_dtype=dtypes.string) + + variant_tensor = gen_dataset_ops.optimize_dataset_v2( + input_dataset._variant_tensor, # pylint: disable=protected-access + self._optimizations_enabled, + self._optimizations_disabled, + self._optimizations_default, + optimization_configs=optimization_configs, + **self._flat_structure) + else: + if optimizations_enabled is None: + optimizations_enabled = [] + if optimizations_default is None: + optimizations_default = [] + + self._optimizations = ops.convert_to_tensor( + optimizations_enabled + optimizations_default, + dtype=dtypes.string, + name="optimizations") + variant_tensor = gen_dataset_ops.optimize_dataset( + input_dataset._variant_tensor, # pylint: disable=protected-access + self._optimizations, + optimization_configs=optimization_configs, + **self._flat_structure) super(_OptimizeDataset, self).__init__(input_dataset, variant_tensor) diff --git a/tensorflow/python/data/util/options.py b/tensorflow/python/data/util/options.py index 3c79197fae8..781ae6403fa 100644 --- a/tensorflow/python/data/util/options.py +++ b/tensorflow/python/data/util/options.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import collections + def _internal_attr_name(name): return "_" + name @@ -56,6 +58,12 @@ class OptionsBase(object): "Cannot set the property %s on %s." % (name, type(self).__name__)) +# Creates a namedtuple with three keys for optimization graph rewrites settings. +def graph_rewrites(): + return collections.namedtuple("GraphRewrites", + ["enabled", "disabled", "default"]) + + def create_option(name, ty, docstring, default_factory=lambda: None): """Creates a type-checked property. diff --git a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt index 4ad0c0d4448..3c47a392b7e 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt @@ -2660,6 +2660,10 @@ tf_module { name: "OptimizeDataset" argspec: "args=[\'input_dataset\', \'optimizations\', \'output_types\', \'output_shapes\', \'optimization_configs\', \'name\'], varargs=None, keywords=None, defaults=[\'[]\', \'None\'], " } + member_method { + name: "OptimizeDatasetV2" + argspec: "args=[\'input_dataset\', \'optimizations_enabled\', \'optimizations_disabled\', \'optimizations_default\', \'output_types\', \'output_shapes\', \'optimization_configs\', \'name\'], varargs=None, keywords=None, defaults=[\'[]\', \'None\'], " + } member_method { name: "OptionalFromValue" argspec: "args=[\'components\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt index 4ad0c0d4448..3c47a392b7e 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt @@ -2660,6 +2660,10 @@ tf_module { name: "OptimizeDataset" argspec: "args=[\'input_dataset\', \'optimizations\', \'output_types\', \'output_shapes\', \'optimization_configs\', \'name\'], varargs=None, keywords=None, defaults=[\'[]\', \'None\'], " } + member_method { + name: "OptimizeDatasetV2" + argspec: "args=[\'input_dataset\', \'optimizations_enabled\', \'optimizations_disabled\', \'optimizations_default\', \'output_types\', \'output_shapes\', \'optimization_configs\', \'name\'], varargs=None, keywords=None, defaults=[\'[]\', \'None\'], " + } member_method { name: "OptionalFromValue" argspec: "args=[\'components\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " From 42c80494771c7ac398a83491e059cf4abd3918f4 Mon Sep 17 00:00:00 2001 From: AG Ramesh Date: Mon, 27 Jul 2020 17:33:26 -0700 Subject: [PATCH 1429/2522] Making comments consistent. Co-authored-by: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com> --- tensorflow/core/kernels/mkl_batch_matmul_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/mkl_batch_matmul_op.cc b/tensorflow/core/kernels/mkl_batch_matmul_op.cc index 1a5821bc5af..f0226369daa 100644 --- a/tensorflow/core/kernels/mkl_batch_matmul_op.cc +++ b/tensorflow/core/kernels/mkl_batch_matmul_op.cc @@ -34,7 +34,7 @@ limitations under the License. #if !defined(INTEL_MKL_DNN_ONLY) #include "mkl_cblas.h" -#endif // INTEL_MKL_DNN_ONLY +#endif // !INTEL_MKL_DNN_ONLY #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" From 3965492046818d78708e751e08aff7f145ee8a22 Mon Sep 17 00:00:00 2001 From: AG Ramesh Date: Mon, 27 Jul 2020 17:33:53 -0700 Subject: [PATCH 1430/2522] Making comments consistent. Co-authored-by: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com> --- tensorflow/core/kernels/mkl_batch_matmul_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/mkl_batch_matmul_op.cc b/tensorflow/core/kernels/mkl_batch_matmul_op.cc index f0226369daa..5cd3ac3b084 100644 --- a/tensorflow/core/kernels/mkl_batch_matmul_op.cc +++ b/tensorflow/core/kernels/mkl_batch_matmul_op.cc @@ -255,7 +255,7 @@ class BatchMatMulMkl : public OpKernel { dnnl_gemm_batch(TransA_Array, TransB_Array, M_Array, N_Array, K_Array, alpha_Array, *A_Array, *B_Array, beta_Array, *C_Array, group_count, group_size, ctx); -#endif // INTEL_MKL_DNN_ONLY +#endif // !INTEL_MKL_DNN_ONLY } // BatchMatMul BFloat16 support only exists in DNNL 1.2 onwards. #if defined(ENABLE_MKLDNN_V1) && defined(ENABLE_INTEL_MKL_BFLOAT16) From a7df1d610bb3ae9a8d7b8ba4964b493906d94bed Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 17:24:01 -0700 Subject: [PATCH 1431/2522] Update ops-related pbtxt files. PiperOrigin-RevId: 323475628 Change-Id: If6ecfe1fa73ba1afe039ac06493f06f2304b0e8d --- .../ops_history_v2/KthOrderStatistic.pbtxt | 15 + .../compat/ops_history_v2/MakeUnique.pbtxt | 11 + .../compat/ops_history_v2/TPUCompile.pbtxt | 50 +++ .../TPUCompileSucceededAssert.pbtxt | 8 + .../compat/ops_history_v2/TPUExecute.pbtxt | 26 ++ .../TPUExecuteAndUpdateVariables.pbtxt | 36 ++ .../ops_history_v2/TPUPartitionedInput.pbtxt | 29 ++ .../ops_history_v2/TPUPartitionedOutput.pbtxt | 29 ++ .../compat/ops_history_v2/TopKUnique.pbtxt | 19 + .../ops_history_v2/TopKWithUnique.pbtxt | 19 + .../ops_history_v2/XlaHostCompute.pbtxt | 54 +++ .../ops_history_v2/XlaRecvFromHost.pbtxt | 20 ++ .../compat/ops_history_v2/XlaSendToHost.pbtxt | 16 + tensorflow/core/ops/ops.pbtxt | 332 ++++++++++++++++++ 14 files changed, 664 insertions(+) create mode 100644 tensorflow/core/ops/compat/ops_history_v2/KthOrderStatistic.pbtxt create mode 100644 tensorflow/core/ops/compat/ops_history_v2/MakeUnique.pbtxt create mode 100644 tensorflow/core/ops/compat/ops_history_v2/TPUCompile.pbtxt create mode 100644 tensorflow/core/ops/compat/ops_history_v2/TPUCompileSucceededAssert.pbtxt create mode 100644 tensorflow/core/ops/compat/ops_history_v2/TPUExecute.pbtxt create mode 100644 tensorflow/core/ops/compat/ops_history_v2/TPUExecuteAndUpdateVariables.pbtxt create mode 100644 tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedInput.pbtxt create mode 100644 tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedOutput.pbtxt create mode 100644 tensorflow/core/ops/compat/ops_history_v2/TopKUnique.pbtxt create mode 100644 tensorflow/core/ops/compat/ops_history_v2/TopKWithUnique.pbtxt create mode 100644 tensorflow/core/ops/compat/ops_history_v2/XlaHostCompute.pbtxt create mode 100644 tensorflow/core/ops/compat/ops_history_v2/XlaRecvFromHost.pbtxt create mode 100644 tensorflow/core/ops/compat/ops_history_v2/XlaSendToHost.pbtxt diff --git a/tensorflow/core/ops/compat/ops_history_v2/KthOrderStatistic.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/KthOrderStatistic.pbtxt new file mode 100644 index 00000000000..8e5b79cec04 --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/KthOrderStatistic.pbtxt @@ -0,0 +1,15 @@ +op { + name: "KthOrderStatistic" + input_arg { + name: "input" + type: DT_FLOAT + } + output_arg { + name: "output" + type: DT_FLOAT + } + attr { + name: "k" + type: "int" + } +} diff --git a/tensorflow/core/ops/compat/ops_history_v2/MakeUnique.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/MakeUnique.pbtxt new file mode 100644 index 00000000000..685f52d66ea --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/MakeUnique.pbtxt @@ -0,0 +1,11 @@ +op { + name: "MakeUnique" + input_arg { + name: "input" + type: DT_FLOAT + } + output_arg { + name: "output" + type: DT_FLOAT + } +} diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUCompile.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUCompile.pbtxt new file mode 100644 index 00000000000..be95091c809 --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUCompile.pbtxt @@ -0,0 +1,50 @@ +op { + name: "TPUCompile" + input_arg { + name: "dynamic_shapes" + type: DT_INT64 + number_attr: "NumDynamicShapes" + } + input_arg { + name: "guaranteed_constants" + type_list_attr: "Tguaranteed_constants" + } + output_arg { + name: "compilation_status" + type: DT_STRING + } + output_arg { + name: "program" + type: DT_STRING + number_attr: "num_computations" + } + output_arg { + name: "may_modify_variables" + type: DT_BOOL + number_attr: "num_computations" + } + attr { + name: "num_computations" + type: "int" + has_minimum: true + } + attr { + name: "function" + type: "func" + } + attr { + name: "metadata" + type: "string" + } + attr { + name: "NumDynamicShapes" + type: "int" + has_minimum: true + } + attr { + name: "Tguaranteed_constants" + type: "list(type)" + has_minimum: true + } + is_stateful: true +} diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUCompileSucceededAssert.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUCompileSucceededAssert.pbtxt new file mode 100644 index 00000000000..bc1b3c153f1 --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUCompileSucceededAssert.pbtxt @@ -0,0 +1,8 @@ +op { + name: "TPUCompileSucceededAssert" + input_arg { + name: "compilation_status" + type: DT_STRING + } + is_stateful: true +} diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUExecute.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUExecute.pbtxt new file mode 100644 index 00000000000..f60da0ac5be --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUExecute.pbtxt @@ -0,0 +1,26 @@ +op { + name: "TPUExecute" + input_arg { + name: "args" + type_list_attr: "Targs" + } + input_arg { + name: "key" + type: DT_STRING + } + output_arg { + name: "results" + type_list_attr: "Tresults" + } + attr { + name: "Targs" + type: "list(type)" + has_minimum: true + } + attr { + name: "Tresults" + type: "list(type)" + has_minimum: true + } + is_stateful: true +} diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUExecuteAndUpdateVariables.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUExecuteAndUpdateVariables.pbtxt new file mode 100644 index 00000000000..8fab665120c --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUExecuteAndUpdateVariables.pbtxt @@ -0,0 +1,36 @@ +op { + name: "TPUExecuteAndUpdateVariables" + input_arg { + name: "args" + type_list_attr: "Targs" + } + input_arg { + name: "key" + type: DT_STRING + } + output_arg { + name: "results" + type_list_attr: "Tresults" + } + attr { + name: "Targs" + type: "list(type)" + has_minimum: true + } + attr { + name: "Tresults" + type: "list(type)" + has_minimum: true + } + attr { + name: "device_var_reads_indices" + type: "list(int)" + has_minimum: true + } + attr { + name: "device_var_updates_indices" + type: "list(int)" + has_minimum: true + } + is_stateful: true +} diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedInput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedInput.pbtxt new file mode 100644 index 00000000000..aab0574d99e --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedInput.pbtxt @@ -0,0 +1,29 @@ +op { + name: "TPUPartitionedInput" + input_arg { + name: "inputs" + type_attr: "T" + number_attr: "N" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "N" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "T" + type: "type" + } + attr { + name: "partition_dim" + type: "int" + default_value { + i: 0 + } + } +} diff --git a/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedOutput.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedOutput.pbtxt new file mode 100644 index 00000000000..38a85e31964 --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/TPUPartitionedOutput.pbtxt @@ -0,0 +1,29 @@ +op { + name: "TPUPartitionedOutput" + input_arg { + name: "inputs" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + number_attr: "num_splits" + } + attr { + name: "T" + type: "type" + } + attr { + name: "num_splits" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "partition_dim" + type: "int" + default_value { + i: 0 + } + } +} diff --git a/tensorflow/core/ops/compat/ops_history_v2/TopKUnique.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TopKUnique.pbtxt new file mode 100644 index 00000000000..12463385bcc --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/TopKUnique.pbtxt @@ -0,0 +1,19 @@ +op { + name: "TopKUnique" + input_arg { + name: "input" + type: DT_FLOAT + } + output_arg { + name: "topk" + type: DT_FLOAT + } + output_arg { + name: "topk_indices" + type: DT_INT32 + } + attr { + name: "k" + type: "int" + } +} diff --git a/tensorflow/core/ops/compat/ops_history_v2/TopKWithUnique.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TopKWithUnique.pbtxt new file mode 100644 index 00000000000..5e3216fa554 --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/TopKWithUnique.pbtxt @@ -0,0 +1,19 @@ +op { + name: "TopKWithUnique" + input_arg { + name: "input" + type: DT_FLOAT + } + output_arg { + name: "topk" + type: DT_FLOAT + } + output_arg { + name: "topk_indices" + type: DT_INT32 + } + attr { + name: "k" + type: "int" + } +} diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaHostCompute.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaHostCompute.pbtxt new file mode 100644 index 00000000000..87a8f639e6c --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaHostCompute.pbtxt @@ -0,0 +1,54 @@ +op { + name: "XlaHostCompute" + input_arg { + name: "inputs" + type_list_attr: "Tinputs" + } + output_arg { + name: "outputs" + type_list_attr: "Toutputs" + } + attr { + name: "Tinputs" + type: "list(type)" + has_minimum: true + } + attr { + name: "Toutputs" + type: "list(type)" + has_minimum: true + } + attr { + name: "ancestors" + type: "list(string)" + has_minimum: true + } + attr { + name: "shapes" + type: "list(shape)" + has_minimum: true + } + attr { + name: "shape_inference_graph" + type: "func" + } + attr { + name: "key" + type: "string" + } + attr { + name: "cost_estimate_ns" + type: "int" + default_value { + i: 1000000 + } + } + attr { + name: "tpu_core" + type: "int" + default_value { + i: 0 + } + } + is_stateful: true +} diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaRecvFromHost.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaRecvFromHost.pbtxt new file mode 100644 index 00000000000..d3760ea79b1 --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaRecvFromHost.pbtxt @@ -0,0 +1,20 @@ +op { + name: "XlaRecvFromHost" + output_arg { + name: "output" + type_attr: "Toutput" + } + attr { + name: "Toutput" + type: "type" + } + attr { + name: "shape" + type: "shape" + } + attr { + name: "key" + type: "string" + } + is_stateful: true +} diff --git a/tensorflow/core/ops/compat/ops_history_v2/XlaSendToHost.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/XlaSendToHost.pbtxt new file mode 100644 index 00000000000..f2dfeaf4444 --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/XlaSendToHost.pbtxt @@ -0,0 +1,16 @@ +op { + name: "XlaSendToHost" + input_arg { + name: "input" + type_attr: "Tinput" + } + attr { + name: "Tinput" + type: "type" + } + attr { + name: "key" + type: "string" + } + is_stateful: true +} diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 440b06b9465..fed598bdef4 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -20017,6 +20017,21 @@ op { type: DT_FLOAT } } +op { + name: "KthOrderStatistic" + input_arg { + name: "input" + type: DT_FLOAT + } + output_arg { + name: "output" + type: DT_FLOAT + } + attr { + name: "k" + type: "int" + } +} op { name: "L2Loss" input_arg { @@ -22342,6 +22357,17 @@ op { } is_stateful: true } +op { + name: "MakeUnique" + input_arg { + name: "input" + type: DT_FLOAT + } + output_arg { + name: "output" + type: DT_FLOAT + } +} op { name: "MapAndBatchDataset" input_arg { @@ -50669,6 +50695,64 @@ op { type: DT_STRING } } +op { + name: "TPUCompile" + input_arg { + name: "dynamic_shapes" + type: DT_INT64 + number_attr: "NumDynamicShapes" + } + input_arg { + name: "guaranteed_constants" + type_list_attr: "Tguaranteed_constants" + } + output_arg { + name: "compilation_status" + type: DT_STRING + } + output_arg { + name: "program" + type: DT_STRING + number_attr: "num_computations" + } + output_arg { + name: "may_modify_variables" + type: DT_BOOL + number_attr: "num_computations" + } + attr { + name: "num_computations" + type: "int" + has_minimum: true + } + attr { + name: "function" + type: "func" + } + attr { + name: "metadata" + type: "string" + } + attr { + name: "NumDynamicShapes" + type: "int" + has_minimum: true + } + attr { + name: "Tguaranteed_constants" + type: "list(type)" + has_minimum: true + } + is_stateful: true +} +op { + name: "TPUCompileSucceededAssert" + input_arg { + name: "compilation_status" + type: DT_STRING + } + is_stateful: true +} op { name: "TPUEmbeddingActivations" input_arg { @@ -50694,6 +50778,68 @@ op { has_minimum: true } } +op { + name: "TPUExecute" + input_arg { + name: "args" + type_list_attr: "Targs" + } + input_arg { + name: "key" + type: DT_STRING + } + output_arg { + name: "results" + type_list_attr: "Tresults" + } + attr { + name: "Targs" + type: "list(type)" + has_minimum: true + } + attr { + name: "Tresults" + type: "list(type)" + has_minimum: true + } + is_stateful: true +} +op { + name: "TPUExecuteAndUpdateVariables" + input_arg { + name: "args" + type_list_attr: "Targs" + } + input_arg { + name: "key" + type: DT_STRING + } + output_arg { + name: "results" + type_list_attr: "Tresults" + } + attr { + name: "Targs" + type: "list(type)" + has_minimum: true + } + attr { + name: "Tresults" + type: "list(type)" + has_minimum: true + } + attr { + name: "device_var_reads_indices" + type: "list(int)" + has_minimum: true + } + attr { + name: "device_var_updates_indices" + type: "list(int)" + has_minimum: true + } + is_stateful: true +} op { name: "TPUOrdinalSelector" output_arg { @@ -50738,6 +50884,64 @@ op { } } } +op { + name: "TPUPartitionedInput" + input_arg { + name: "inputs" + type_attr: "T" + number_attr: "N" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "N" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "T" + type: "type" + } + attr { + name: "partition_dim" + type: "int" + default_value { + i: 0 + } + } +} +op { + name: "TPUPartitionedOutput" + input_arg { + name: "inputs" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + number_attr: "num_splits" + } + attr { + name: "T" + type: "type" + } + attr { + name: "num_splits" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "partition_dim" + type: "int" + default_value { + i: 0 + } + } +} op { name: "TPUReplicateMetadata" attr { @@ -53314,6 +53518,25 @@ op { explanation: "Use TopKV2 instead" } } +op { + name: "TopKUnique" + input_arg { + name: "input" + type: DT_FLOAT + } + output_arg { + name: "topk" + type: DT_FLOAT + } + output_arg { + name: "topk_indices" + type: DT_INT32 + } + attr { + name: "k" + type: "int" + } +} op { name: "TopKV2" input_arg { @@ -53360,6 +53583,25 @@ op { } } } +op { + name: "TopKWithUnique" + input_arg { + name: "input" + type: DT_FLOAT + } + output_arg { + name: "topk" + type: DT_FLOAT + } + output_arg { + name: "topk_indices" + type: DT_INT32 + } + attr { + name: "k" + type: "int" + } +} op { name: "Transpose" input_arg { @@ -55285,6 +55527,96 @@ op { } } } +op { + name: "XlaHostCompute" + input_arg { + name: "inputs" + type_list_attr: "Tinputs" + } + output_arg { + name: "outputs" + type_list_attr: "Toutputs" + } + attr { + name: "Tinputs" + type: "list(type)" + has_minimum: true + } + attr { + name: "Toutputs" + type: "list(type)" + has_minimum: true + } + attr { + name: "ancestors" + type: "list(string)" + has_minimum: true + } + attr { + name: "shapes" + type: "list(shape)" + has_minimum: true + } + attr { + name: "shape_inference_graph" + type: "func" + } + attr { + name: "key" + type: "string" + } + attr { + name: "cost_estimate_ns" + type: "int" + default_value { + i: 1000000 + } + } + attr { + name: "tpu_core" + type: "int" + default_value { + i: 0 + } + } + is_stateful: true +} +op { + name: "XlaRecvFromHost" + output_arg { + name: "output" + type_attr: "Toutput" + } + attr { + name: "Toutput" + type: "type" + } + attr { + name: "shape" + type: "shape" + } + attr { + name: "key" + type: "string" + } + is_stateful: true +} +op { + name: "XlaSendToHost" + input_arg { + name: "input" + type_attr: "Tinput" + } + attr { + name: "Tinput" + type: "type" + } + attr { + name: "key" + type: "string" + } + is_stateful: true +} op { name: "Xlog1py" input_arg { From d604689ea7a24dfc4f8994825b3ca9e0c63ddc9b Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 27 Jul 2020 17:37:50 -0700 Subject: [PATCH 1432/2522] Add some missing dependencies so that the TPU version of TensorFlow builds PiperOrigin-RevId: 323477747 Change-Id: I13393c728bda8f6c541955513a7e6315799ec844 --- tensorflow/core/tpu/BUILD | 3 +++ tensorflow/core/tpu/tpu_compilation_device.cc | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/tpu/BUILD b/tensorflow/core/tpu/BUILD index b8cc603e079..62d411c0a67 100644 --- a/tensorflow/core/tpu/BUILD +++ b/tensorflow/core/tpu/BUILD @@ -143,8 +143,11 @@ cc_library( deps = [ ":libtftpu_header", ":tpu_api", + ":tpu_compilation_device", ":tpu_config_c_api", ":tpu_library_init_fns", + ":tpu_node_device", + ":tpu_system_device", "//tensorflow/core:lib", "//tensorflow/core/tpu/graph_rewrite:tpu_rewrite_pass_registration", "//tensorflow/core/tpu/kernels:tpu_compile_c_api_hdrs", diff --git a/tensorflow/core/tpu/tpu_compilation_device.cc b/tensorflow/core/tpu/tpu_compilation_device.cc index 2b2314820bc..f124000c9e3 100644 --- a/tensorflow/core/tpu/tpu_compilation_device.cc +++ b/tensorflow/core/tpu/tpu_compilation_device.cc @@ -18,7 +18,14 @@ limitations under the License. #include "tensorflow/core/tpu/tpu_node_device_util.h" namespace tensorflow { +namespace { -REGISTER_XLA_BACKEND(DEVICE_TPU_XLA_JIT, kTpuAllTypes, TpuOpFilter); +bool RegisterTpuXlaBackend() { + REGISTER_XLA_BACKEND(DEVICE_TPU_XLA_JIT, kTpuAllTypes, TpuOpFilter); + return true; +} +static bool tpu_xla_backend_registered = RegisterTpuXlaBackend(); + +} // namespace } // namespace tensorflow From 3da3eebc474bd0f0119a63db11522f279250d64c Mon Sep 17 00:00:00 2001 From: Reed Wanderman-Milne Date: Mon, 27 Jul 2020 17:41:33 -0700 Subject: [PATCH 1433/2522] Don't use LossScaleGradientTape in loss_scale_benchmark.py. This is because the symbol is not public and so it should not be used by Keras. Instead, the "gradient_tape" benchmarks now use a LossScaleOptimizer, but only to scale the loss, unscale the gradients, and apply the gradients. A tf.GradientTape is used to compute the gradients instead of a LossScaleGradientTape. Because of this change, "gradient_tape" benchmarks from before this change cannot be compared with the equivalent benchmarks after this change. Also remove graph mode, as its broken if TF is built with TF2 (despite the fact it explicitly uses a tf.Graph). PiperOrigin-RevId: 323478238 Change-Id: I045ee69743133273387733a54275405c7de9697e --- .../experimental/loss_scale_benchmark.py | 62 +++++++------------ 1 file changed, 23 insertions(+), 39 deletions(-) diff --git a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_benchmark.py b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_benchmark.py index 8f8f50b4052..4ebc360b973 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_benchmark.py +++ b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_benchmark.py @@ -12,27 +12,24 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Benchmarks for LossScaleOptimizer and LossScaleGradientTape.""" +"""Benchmarks for LossScaleOptimizer.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import time -from tensorflow.python.client import session as session_module from tensorflow.python.distribute import distribution_strategy_context from tensorflow.python.distribute import mirrored_strategy from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.eager import def_function -from tensorflow.python.framework import ops from tensorflow.python.keras.mixed_precision.experimental import loss_scale_optimizer from tensorflow.python.keras.optimizer_v2 import adam from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.training.experimental import loss_scale as loss_scale_module -from tensorflow.python.training.experimental import loss_scaling_gradient_tape as lsgt_module def _get_strategy(num_gpus): @@ -56,28 +53,18 @@ class LossScaleBenchmark(test.Benchmark): Args: gradient_type: "optimizer" or "gradient_tape". How gradients are computed. "optimizer" uses Optimizer.minimize. "gradient_tape" uses - GradientTape.gradient. + GradientTape.gradient along with LossScaleOptimizer.get_scaled_loss and + LossScaleOptimizer.get_unscaled_gradients. num_gpus: The number of GPUs to use. Must be at least 1. - mode: "eager", "tf_function", or "graph". "eager" means to use eager mode. - "tf_function" means to use eager mode where all computations are wrapped - in a tf.function. "graph" means to use TensorFlow 1's graph mode with a - tf.compat.v1.Session. "graph" is unsupported with a - LossScaleGradientTape. + mode: "eager" or "tf_function". "tf_function" causes all computations to + be wrapped in a tf.function, while "eager" runs computations eagerly. loss_scaling: "fixed", "dynamic", or None. The type of loss scaling to use. None means use no loss scaling, which is useful as a baseline to see how much slower loss scaling is in comparison. """ - if mode == 'graph': - graph = ops.Graph() - ctx_mgr = graph.as_default() - elif mode == 'eager': - ctx_mgr = context.eager_mode() - else: - assert mode == 'tf_function' - ctx_mgr = context.eager_mode() ls_str = loss_scaling or 'no_loss_scaling' name = '%s_%d_GPU_%s_%s' % (gradient_type, num_gpus, mode, ls_str) - with ctx_mgr, _get_strategy(num_gpus).scope() as strategy: + with context.eager_mode(), _get_strategy(num_gpus).scope() as strategy: opt = adam.Adam() if loss_scaling == 'fixed': loss_scale = loss_scale_module.FixedLossScale(2.) @@ -93,6 +80,8 @@ class LossScaleBenchmark(test.Benchmark): else: assert loss_scaling is None loss_scale = None + if loss_scale: + opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale) num_vars = 200 num_warmup_iters = 1 @@ -112,29 +101,25 @@ class LossScaleBenchmark(test.Benchmark): return math_ops.add_n(var_list) if gradient_type == 'gradient_tape': - tape_cls = ((lambda: lsgt_module.LossScaleGradientTape(loss_scale)) - if loss_scale else backprop.GradientTape) - def minimize_fn(): - with tape_cls() as tape: - loss = get_loss() - grads = tape.gradient(loss, var_list) - return opt.apply_gradients(zip(grads, var_list)) + if loss_scale is None: + def minimize_fn(): + with backprop.GradientTape() as tape: + loss = get_loss() + grads = tape.gradient(loss, var_list) + return opt.apply_gradients(zip(grads, var_list)) + else: + def minimize_fn(): + with backprop.GradientTape() as tape: + loss = get_loss() + scaled_loss = opt.get_scaled_loss(loss) + scaled_grads = tape.gradient(scaled_loss, var_list) + grads = opt.get_unscaled_gradients(scaled_grads) + return opt.apply_gradients(zip(grads, var_list)) else: assert gradient_type == 'optimizer' - if loss_scale: - opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale) def minimize_fn(): return opt.minimize(get_loss, var_list) - if mode == 'graph': - run_op = strategy.run(minimize_fn) - init_op = variables.global_variables_initializer() - with session_module.Session() as sess: - sess.run(init_op) - self.run_op_benchmark(sess, run_op, min_iters=num_iters, - burn_iters=num_warmup_iters, name=name) - return - def run_fn(): strategy.run(minimize_fn) if mode == 'tf_function': @@ -163,13 +148,12 @@ class LossScaleBenchmark(test.Benchmark): def benchmark_optimizer(self): for num_gpus in self._gpus_to_test_with(): - for mode in 'eager', 'tf_function', 'graph': + for mode in 'eager', 'tf_function': for loss_scaling in None, 'fixed', 'dynamic': self._benchmark('optimizer', num_gpus, mode, loss_scaling) def benchmark_gradient_tape(self): for num_gpus in self._gpus_to_test_with(): - # LossScaleGradientTape doesn't support graph mode for mode in 'eager', 'tf_function': for loss_scaling in None, 'fixed', 'dynamic': self._benchmark('gradient_tape', num_gpus, mode, loss_scaling) From 1ba83bde2bb903a1c514931f3619402a11b398f3 Mon Sep 17 00:00:00 2001 From: Jiho Choi Date: Mon, 27 Jul 2020 17:43:16 -0700 Subject: [PATCH 1434/2522] Add a host event type for device input pipelines. PiperOrigin-RevId: 323478464 Change-Id: I265e9d3b2215df08bbec10006ed6f71cc36ee07c --- tensorflow/core/profiler/utils/xplane_schema.cc | 1 + tensorflow/core/profiler/utils/xplane_schema.h | 1 + 2 files changed, 2 insertions(+) diff --git a/tensorflow/core/profiler/utils/xplane_schema.cc b/tensorflow/core/profiler/utils/xplane_schema.cc index 46ee5db2f71..691712d893d 100644 --- a/tensorflow/core/profiler/utils/xplane_schema.cc +++ b/tensorflow/core/profiler/utils/xplane_schema.cc @@ -94,6 +94,7 @@ const HostEventTypeMap& GetHostEventTypeMap() { {"IteratorGetNextOp::DoCompute", kIteratorGetNextOp}, {"IteratorGetNextAsOptionalOp::DoCompute", kIteratorGetNextAsOptionalOp}, {"Iterator", kIterator}, + {"Iterator::Prefetch::Generator", kDeviceInputPipelineSecondIterator}, {"PrefetchProduce", kPrefetchProduce}, {"PrefetchConsume", kPrefetchConsume}, {"ParallelInterleaveProduce", kParallelInterleaveProduce}, diff --git a/tensorflow/core/profiler/utils/xplane_schema.h b/tensorflow/core/profiler/utils/xplane_schema.h index c435b46f6d1..4b07e8c0d6e 100644 --- a/tensorflow/core/profiler/utils/xplane_schema.h +++ b/tensorflow/core/profiler/utils/xplane_schema.h @@ -85,6 +85,7 @@ enum HostEventType { kIteratorGetNextOp, kIteratorGetNextAsOptionalOp, kIterator, + kDeviceInputPipelineSecondIterator, kPrefetchProduce, kPrefetchConsume, kParallelInterleaveProduce, From c5d24476a4561014f94c00af93e723fe2e63f774 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 17:45:55 -0700 Subject: [PATCH 1435/2522] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 323478890 Change-Id: Ibfcf4de5d745de709bbaf87a175fdb312e857248 --- tensorflow/go/op/wrappers.go | 41 ++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 687bc4ced33..9da46e94adb 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -8345,6 +8345,47 @@ func OptionalFromValue(scope *Scope, components []tf.Output) (optional tf.Output return op.Output(0) } +// OptimizeDatasetV2Attr is an optional argument to OptimizeDatasetV2. +type OptimizeDatasetV2Attr func(optionalAttr) + +// OptimizeDatasetV2OptimizationConfigs sets the optional optimization_configs attribute to value. +// If not specified, defaults to <> +func OptimizeDatasetV2OptimizationConfigs(value []string) OptimizeDatasetV2Attr { + return func(m optionalAttr) { + m["optimization_configs"] = value + } +} + +// Creates a dataset by applying related optimizations to `input_dataset`. +// +// Creates a dataset by applying related optimizations to `input_dataset`. +// +// Arguments: +// input_dataset: A variant tensor representing the input dataset. +// optimizations_enabled: A `tf.string` vector `tf.Tensor` identifying user enabled optimizations. +// optimizations_disabled: A `tf.string` vector `tf.Tensor` identifying user disabled optimizations. +// optimizations_default: A `tf.string` vector `tf.Tensor` identifying optimizations by default. +// +// +func OptimizeDatasetV2(scope *Scope, input_dataset tf.Output, optimizations_enabled tf.Output, optimizations_disabled tf.Output, optimizations_default tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...OptimizeDatasetV2Attr) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "OptimizeDatasetV2", + Input: []tf.Input{ + input_dataset, optimizations_enabled, optimizations_disabled, optimizations_default, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // OptimizeDatasetAttr is an optional argument to OptimizeDataset. type OptimizeDatasetAttr func(optionalAttr) From 92c1f4902aca219993b347c4bb12818b76e1e205 Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Mon, 27 Jul 2020 17:46:17 -0700 Subject: [PATCH 1436/2522] Create a DispatcherState class for managing dispatcher state. The goal of the class is to make it easier to keep the journal in sync with the in-memory state. Currently only the RegisterDataset state transition is implemented. Further CLs will implement the rest of the state transitions enumerated in journal.proto, and we may add more state transitions in the future. PiperOrigin-RevId: 323478950 Change-Id: I53433c6b2c45d01a3aabea26dcc62dae83e0af04 --- tensorflow/core/data/service/BUILD | 31 ++++++ .../core/data/service/dispatcher_state.cc | 76 +++++++++++++++ .../core/data/service/dispatcher_state.h | 94 +++++++++++++++++++ .../data/service/dispatcher_state_test.cc | 57 +++++++++++ tensorflow/core/data/service/journal.proto | 20 ++-- tensorflow/core/data/service/journal_test.cc | 6 +- 6 files changed, 271 insertions(+), 13 deletions(-) create mode 100644 tensorflow/core/data/service/dispatcher_state.cc create mode 100644 tensorflow/core/data/service/dispatcher_state.h create mode 100644 tensorflow/core/data/service/dispatcher_state_test.cc diff --git a/tensorflow/core/data/service/BUILD b/tensorflow/core/data/service/BUILD index e4efcdde93b..22b9f8a2a3a 100644 --- a/tensorflow/core/data/service/BUILD +++ b/tensorflow/core/data/service/BUILD @@ -77,6 +77,37 @@ cc_library( ], ) +cc_library( + name = "dispatcher_state", + srcs = ["dispatcher_state.cc"], + hdrs = [ + "dispatcher_state.h", + ], + deps = [ + ":common_proto_cc", + ":data_service", + ":journal_proto_cc", + "//tensorflow/core:lib", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/memory", + ], +) + +tf_cc_test( + name = "dispatcher_state_test", + srcs = ["dispatcher_state_test.cc"], + deps = [ + ":common_proto_cc", + ":dispatcher_state", + ":journal_proto_cc", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/platform:errors", + ], +) + cc_library( name = "worker_impl", srcs = ["worker_impl.cc"], diff --git a/tensorflow/core/data/service/dispatcher_state.cc b/tensorflow/core/data/service/dispatcher_state.cc new file mode 100644 index 00000000000..d1fd3f60f67 --- /dev/null +++ b/tensorflow/core/data/service/dispatcher_state.cc @@ -0,0 +1,76 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/data/service/dispatcher_state.h" + +#include "tensorflow/core/data/service/journal.pb.h" +#include "tensorflow/core/platform/errors.h" + +namespace tensorflow { +namespace data { + +DispatcherState::DispatcherState() {} + +Status DispatcherState::Apply(Update update) { + switch (update.update_type_case()) { + case Update::kRegisterDataset: + RegisterDataset(update.register_dataset()); + break; + // TODO(aaudibert): implement these. + case Update::kCreateJob: + case Update::kCreateTask: + case Update::kFinishJob: + return errors::Unimplemented("Update type ", update.update_type_case(), + " is not yet supported"); + case Update::UPDATE_TYPE_NOT_SET: + return errors::Internal("Update type not set."); + } + + return Status::OK(); +} + +void DispatcherState::RegisterDataset( + const RegisterDatasetUpdate& register_dataset) { + int64 id = register_dataset.dataset_id(); + int64 fingerprint = register_dataset.fingerprint(); + auto dataset = std::make_shared(id, fingerprint, + register_dataset.dataset_def()); + DCHECK(!datasets_by_id_.contains(id)); + datasets_by_id_[id] = dataset; + DCHECK(!datasets_by_fingerprint_.contains(fingerprint)); + datasets_by_fingerprint_[fingerprint] = dataset; +} + +Status DispatcherState::DatasetFromId( + int64 id, std::shared_ptr* dataset) const { + auto it = datasets_by_id_.find(id); + if (it == datasets_by_id_.end()) { + return errors::NotFound("Dataset id ", id, " not found"); + } + *dataset = it->second; + return Status::OK(); +} + +Status DispatcherState::DatasetFromFingerprint( + uint64 fingerprint, std::shared_ptr* dataset) const { + auto it = datasets_by_fingerprint_.find(fingerprint); + if (it == datasets_by_fingerprint_.end()) { + return errors::NotFound("Dataset fingerprint ", fingerprint, " not found"); + } + *dataset = it->second; + return Status::OK(); +} + +} // namespace data +} // namespace tensorflow diff --git a/tensorflow/core/data/service/dispatcher_state.h b/tensorflow/core/data/service/dispatcher_state.h new file mode 100644 index 00000000000..601c05126de --- /dev/null +++ b/tensorflow/core/data/service/dispatcher_state.h @@ -0,0 +1,94 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_DATA_SERVICE_DISPATCHER_STATE_H_ +#define TENSORFLOW_CORE_DATA_SERVICE_DISPATCHER_STATE_H_ + +#include "absl/container/flat_hash_map.h" +#include "tensorflow/core/data/service/common.pb.h" +#include "tensorflow/core/data/service/data_service.h" +#include "tensorflow/core/data/service/journal.pb.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { +namespace data { + +// A class encapsulating the journaled state of the dispatcher. All state +// modifications must be done via `ApplyUpdate`. This helps to ensure that +// replaying the journal will allow us to restore the exact same state. +// +// The following usage pattern will keep the journal in sync with the state of +// the dispatcher: +// { +// mutex_lock l(mu_); +// Update update = ... // create an update +// dispatcher_state.ApplyUpdate(update); +// journal_writer.write(Update); +// // Unlock mu_ +// } +// +// The division of functionality between DispatcherImpl and DispatcherState is +// as follows: +// - DispatcherImpl is responsible for handling RPC requests, reading from +// DispatcherState, and deciding what updates to apply to DispatcherState. +// DispatcherImpl handles all synchronization. +// - DispatcherState is responsible for making the state changes requested by +// DispatcherImpl and for providing DispatcherImpl with read-only access to +// the state. +// +// DispatcherState is thread-compatible but not thread-safe. +class DispatcherState { + public: + DispatcherState(); + DispatcherState(const DispatcherState&) = delete; + DispatcherState& operator=(const DispatcherState&) = delete; + + // Applies the given update to the dispatcher's state. + Status Apply(Update update); + + // A dataset registered with the dispatcher. + struct Dataset { + public: + Dataset(int64 dataset_id, int64 fingerprint, const DatasetDef& dataset_def) + : dataset_id(dataset_id), + fingerprint(fingerprint), + dataset_def(dataset_def) {} + + const int64 dataset_id; + const int64 fingerprint; + const DatasetDef dataset_def; + }; + + // Gets a dataset by id. Returns NOT_FOUND if there is no such dataset. + Status DatasetFromId(int64 id, std::shared_ptr* dataset) const; + // Gets a dataset by fingerprint. Returns NOT_FOUND if there is no such + // dataset. + Status DatasetFromFingerprint(uint64 fingerprint, + std::shared_ptr* dataset) const; + + private: + // Registers a dataset. The dataset must not already be registered. + void RegisterDataset(const RegisterDatasetUpdate& register_dataset); + + // Registered datasets, keyed by dataset ids. + absl::flat_hash_map> datasets_by_id_; + // Registered datasets, keyed by dataset fingerprints. + absl::flat_hash_map> + datasets_by_fingerprint_; +}; + +} // namespace data +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_DATA_SERVICE_DISPATCHER_STATE_H_ diff --git a/tensorflow/core/data/service/dispatcher_state_test.cc b/tensorflow/core/data/service/dispatcher_state_test.cc new file mode 100644 index 00000000000..629665e7643 --- /dev/null +++ b/tensorflow/core/data/service/dispatcher_state_test.cc @@ -0,0 +1,57 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/data/service/dispatcher_state.h" + +#include "tensorflow/core/data/service/common.pb.h" +#include "tensorflow/core/data/service/journal.pb.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/path.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace data { + +TEST(DispatcherState, RegisterDataset) { + int64 id = 10; + uint64 fingerprint = 20; + DispatcherState state; + Update update; + RegisterDatasetUpdate* register_dataset = update.mutable_register_dataset(); + register_dataset->set_dataset_id(id); + register_dataset->set_fingerprint(fingerprint); + TF_EXPECT_OK(state.Apply(update)); + + { + std::shared_ptr dataset; + TF_EXPECT_OK(state.DatasetFromFingerprint(fingerprint, &dataset)); + EXPECT_EQ(id, dataset->dataset_id); + } + { + std::shared_ptr dataset; + TF_EXPECT_OK(state.DatasetFromId(id, &dataset)); + EXPECT_EQ(fingerprint, dataset->fingerprint); + } +} + +TEST(DispatcherState, UnknownUpdate) { + DispatcherState state; + Update update; + Status s = state.Apply(update); + EXPECT_EQ(s.code(), error::INTERNAL); +} + +} // namespace data +} // namespace tensorflow diff --git a/tensorflow/core/data/service/journal.proto b/tensorflow/core/data/service/journal.proto index b378011db4b..6d5168c8696 100644 --- a/tensorflow/core/data/service/journal.proto +++ b/tensorflow/core/data/service/journal.proto @@ -10,20 +10,20 @@ import "tensorflow/core/data/service/common.proto"; // Update message to the journal. message Update { oneof update_type { - RegisterDataset register_dataset = 1; - CreateJob create_job = 2; - FinishJob finish_job = 3; - CreateTask create_task = 4; + RegisterDatasetUpdate register_dataset = 1; + CreateJobUpdate create_job = 2; + FinishJobUpdate finish_job = 3; + CreateTaskUpdate create_task = 4; } } -message RegisterDataset { +message RegisterDatasetUpdate { int64 dataset_id = 1; - // A file where the dataset graph has been written. - string filename = 2; + DatasetDef dataset_def = 2; + uint64 fingerprint = 3; } -message CreateJob { +message CreateJobUpdate { int64 job_id = 1; int64 dataset_id = 2; ProcessingModeDef processing_mode = 3; @@ -31,11 +31,11 @@ message CreateJob { google.protobuf.StringValue job_name = 4; } -message FinishJob { +message FinishJobUpdate { int64 job_id = 1; } -message CreateTask { +message CreateTaskUpdate { int64 task_id = 1; int64 job_id = 2; int64 dataset_id = 3; diff --git a/tensorflow/core/data/service/journal_test.cc b/tensorflow/core/data/service/journal_test.cc index ece33970eb8..cc4cef67c4f 100644 --- a/tensorflow/core/data/service/journal_test.cc +++ b/tensorflow/core/data/service/journal_test.cc @@ -39,7 +39,7 @@ bool NewJournalDir(std::string* journal_dir) { Update MakeCreateJobUpdate() { Update update; - CreateJob* create_job = update.mutable_create_job(); + CreateJobUpdate* create_job = update.mutable_create_job(); create_job->set_dataset_id(3); create_job->set_job_id(8); create_job->set_processing_mode(ProcessingModeDef::PARALLEL_EPOCHS); @@ -48,14 +48,14 @@ Update MakeCreateJobUpdate() { Update MakeFinishJobUpdate() { Update update; - FinishJob* finish_job = update.mutable_finish_job(); + FinishJobUpdate* finish_job = update.mutable_finish_job(); finish_job->set_job_id(8); return update; } Update MakeCreateTaskUpdate() { Update update; - CreateTask* create_task = update.mutable_create_task(); + CreateTaskUpdate* create_task = update.mutable_create_task(); create_task->set_task_id(2); create_task->set_dataset_id(4); create_task->set_job_id(5); From e2173ef5ab8ec1b2bb1f0e843d1ad88a0bc39e15 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Mon, 27 Jul 2020 18:13:33 -0700 Subject: [PATCH 1437/2522] Add transactional API as separate methods --- tensorflow/core/platform/file_system.h | 318 +++++++++++++------------ 1 file changed, 170 insertions(+), 148 deletions(-) diff --git a/tensorflow/core/platform/file_system.h b/tensorflow/core/platform/file_system.h index c9b4de63585..2d980c23986 100644 --- a/tensorflow/core/platform/file_system.h +++ b/tensorflow/core/platform/file_system.h @@ -68,9 +68,14 @@ class FileSystem { /// The ownership of the returned RandomAccessFile is passed to the caller /// and the object should be deleted when is not used. virtual tensorflow::Status NewRandomAccessFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) = 0; + const string& fname, std::unique_ptr* result) = 0; + + virtual tensorflow::Status NewRandomAccessFile( + const string& fname, std::unique_ptr* result, + TransactionToken* token){ + // We duplicate these methods due to Google internal coding style prevents + // virtual functions with default arguments. See PR #41615. + }; /// \brief Creates an object that writes to a new file with the specified /// name. @@ -85,9 +90,11 @@ class FileSystem { /// The ownership of the returned WritableFile is passed to the caller /// and the object should be deleted when is not used. virtual tensorflow::Status NewWritableFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) = 0; + const string& fname, std::unique_ptr* result) = 0; + + virtual tensorflow::Status NewWritableFile( + const string& fname, std::unique_ptr* result, + TransactionToken* token){}; /// \brief Creates an object that either appends to an existing file, or /// writes to a new file (if the file does not exist to begin with). @@ -101,9 +108,11 @@ class FileSystem { /// The ownership of the returned WritableFile is passed to the caller /// and the object should be deleted when is not used. virtual tensorflow::Status NewAppendableFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) = 0; + const string& fname, std::unique_ptr* result) = 0; + + virtual tensorflow::Status NewAppendableFile( + const string& fname, std::unique_ptr* result, + TransactionToken* token){}; /// \brief Creates a readonly region of memory with the file context. /// @@ -116,26 +125,36 @@ class FileSystem { /// The ownership of the returned ReadOnlyMemoryRegion is passed to the caller /// and the object should be deleted when is not used. virtual tensorflow::Status NewReadOnlyMemoryRegionFromFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) = 0; + const string& fname, std::unique_ptr* result) = 0; + + virtual tensorflow::Status NewReadOnlyMemoryRegionFromFile( + const string& fname, std::unique_ptr* result, + TransactionToken* token){}; /// Returns OK if the named path exists and NOT_FOUND otherwise. virtual tensorflow::Status FileExists(const string& fname) = 0; + virtual tensorflow::Status FileExists(const string& fname, + TransactionToken* token){}; /// Returns true if all the listed files exist, false otherwise. /// if status is not null, populate the vector with a detailed status /// for each file. - virtual bool FilesExist( - const std::vector& files, - std::vector* status /*, TransactionToken* token = nullptr */); + virtual bool FilesExist(const std::vector& files, + std::vector* status); + + virtual bool FilesExist(const std::vector& files, + std::vector* status, + TransactionToken* token){}; /// \brief Returns the immediate children in the given directory. /// /// The returned paths are relative to 'dir'. - virtual tensorflow::Status GetChildren( - const string& dir, - std::vector* result /*, TransactionToken* token = nullptr */) = 0; + virtual tensorflow::Status GetChildren(const string& dir, + std::vector* result) = 0; + + virtual tensorflow::Status GetChildren(const string& dir, + std::vector* result, + TransactionToken* token){}; /// \brief Given a pattern, stores in *results the set of paths that matches /// that pattern. *results is cleared. @@ -159,10 +178,12 @@ class FileSystem { /// * OK - no errors /// * UNIMPLEMENTED - Some underlying functions (like GetChildren) are not /// implemented - virtual tensorflow::Status GetMatchingPaths( - const string& pattern, - std::vector* - results /*, TransactionToken* token = nullptr */) = 0; + virtual tensorflow::Status GetMatchingPaths(const string& pattern, + std::vector* results) = 0; + + virtual tensorflow::Status GetMatchingPaths(const string& pattern, + std::vector* results, + TransactionToken* token){}; /// \brief Checks if the given filename matches the pattern. /// @@ -172,21 +193,27 @@ class FileSystem { virtual bool Match(const std::string& filename, const std::string& pattern); /// \brief Obtains statistics for the given path. - virtual tensorflow::Status Stat( - const string& fname, - FileStatistics* stat /*, TransactionToken* token = nullptr */) = 0; + virtual tensorflow::Status Stat(const string& fname, + FileStatistics* stat) = 0; + + virtual tensorflow::Status Stat(const string& fname, FileStatistics* stat, + TransactionToken* token){}; /// \brief Deletes the named file. - virtual tensorflow::Status DeleteFile( - const string& fname /*, TransactionToken* token = nullptr */) = 0; + virtual tensorflow::Status DeleteFile(const string& fname) = 0; + + virtual tensorflow::Status DeleteFile(const string& fname, + TransactionToken* token){}; /// \brief Creates the specified directory. /// Typical return codes: /// * OK - successfully created the directory. /// * ALREADY_EXISTS - directory with name dirname already exists. /// * PERMISSION_DENIED - dirname is not writable. - virtual tensorflow::Status CreateDir( - const string& dirname /*, TransactionToken* token = nullptr */) = 0; + virtual tensorflow::Status CreateDir(const string& dirname) = 0; + + virtual tensorflow::Status CreateDir(const string& dirname, + TransactionToken* token){}; /// \brief Creates the specified directory and all the necessary /// subdirectories. @@ -194,12 +221,16 @@ class FileSystem { /// * OK - successfully created the directory and sub directories, even if /// they were already created. /// * PERMISSION_DENIED - dirname or some subdirectory is not writable. - virtual tensorflow::Status RecursivelyCreateDir( - const string& dirname /*, TransactionToken* token = nullptr */); + virtual tensorflow::Status RecursivelyCreateDir(const string& dirname); + + virtual tensorflow::Status RecursivelyCreateDir(const string& dirname, + TransactionToken* token) {} /// \brief Deletes the specified directory. - virtual tensorflow::Status DeleteDir( - const string& dirname /*, TransactionToken* token = nullptr */) = 0; + virtual tensorflow::Status DeleteDir(const string& dirname) = 0; + + virtual tensorflow::Status DeleteDir(const string& dirname, + TransactionToken* token){}; /// \brief Deletes the specified directory and all subdirectories and files /// underneath it. This is accomplished by traversing the directory tree @@ -225,24 +256,34 @@ class FileSystem { /// * PERMISSION_DENIED - dirname or some descendant is not writable /// * UNIMPLEMENTED - Some underlying functions (like Delete) are not /// implemented - virtual tensorflow::Status DeleteRecursively( - const string& dirname, int64* undeleted_files, - int64* undeleted_dirs /*, TransactionToken* token = nullptr */); + virtual tensorflow::Status DeleteRecursively(const string& dirname, + int64* undeleted_files, + int64* undeleted_dirs); + + virtual tensorflow::Status DeleteRecursively(const string& dirname, + int64* undeleted_files, + int64* undeleted_dirs, + TransactionToken* token) {} /// \brief Stores the size of `fname` in `*file_size`. - virtual tensorflow::Status GetFileSize( - const string& fname, - uint64* file_size /*, TransactionToken* token = nullptr */) = 0; + virtual tensorflow::Status GetFileSize(const string& fname, + uint64* file_size) = 0; + + virtual tensorflow::Status GetFileSize(const string& fname, uint64* file_size, + TransactionToken* token){}; /// \brief Overwrites the target if it exists. - virtual tensorflow::Status RenameFile( - const string& src, - const string& target /*, TransactionToken* token = nullptr */) = 0; + virtual tensorflow::Status RenameFile(const string& src, + const string& target) = 0; + + virtual tensorflow::Status RenameFile(const string& src, const string& target, + TransactionToken* token){}; /// \brief Copy the src to target. - virtual tensorflow::Status CopyFile( - const string& src, - const string& target /*, TransactionToken* token = nullptr */); + virtual tensorflow::Status CopyFile(const string& src, const string& target); + + virtual tensorflow::Status CopyFile(const string& src, const string& target, + TransactionToken* token) {} /// \brief Translate an URI to a filename for the FileSystem implementation. /// @@ -262,8 +303,10 @@ class FileSystem { /// * NOT_FOUND - The path entry does not exist. /// * PERMISSION_DENIED - Insufficient permissions. /// * UNIMPLEMENTED - The file factory doesn't support directories. - virtual tensorflow::Status IsDirectory( - const string& fname /*, TransactionToken* token = nullptr */); + virtual tensorflow::Status IsDirectory(const string& fname); + + virtual tensorflow::Status IsDirectory(const string& fname, + TransactionToken* token){}; /// \brief Returns whether the given path is on a file system /// that has atomic move capabilities. This can be used @@ -278,8 +321,9 @@ class FileSystem { virtual Status HasAtomicMove(const string& path, bool* has_atomic_move); /// \brief Flushes any cached filesystem objects from memory. - virtual void FlushCaches(/* TransactionToken* token = nullptr */); + virtual void FlushCaches(); + virtual void FlushCaches(TransactionToken* token){}; /// \brief The separator this filesystem uses. /// /// This is implemented as a part of the filesystem, because even on windows, @@ -398,8 +442,8 @@ class FileSystem { /// \brief Return transaction for `path` or nullptr in `token` virtual tensorflow::Status GetTransactionForPath(const string& path, TransactionToken** token) { - return Status::OK(); token = nullptr; + return Status::OK(); }; /// \brief Decode transaction to human readable string. @@ -423,179 +467,157 @@ class FileSystem { class WrappedFileSystem : public FileSystem { public: virtual tensorflow::Status NewRandomAccessFile( - const string& fname, std::unique_ptr* result - /*, TransactionToken* token = nullptr */) /* override */ { - return fs_->NewRandomAccessFile(fname, - result /* , (token ? token : token_) */); + const string& fname, std::unique_ptr* result, + TransactionToken* token) override { + return fs_->NewRandomAccessFile(fname, result, (token ? token : token_)); } virtual tensorflow::Status NewWritableFile( - const string& fname, std::unique_ptr* result - /*, TransactionToken* token = nullptr */) /* override */ { - return fs_->NewWritableFile(fname, result /* , (token ? token : token_) */); + const string& fname, std::unique_ptr* result, + TransactionToken* token) override { + return fs_->NewWritableFile(fname, result, (token ? token : token_)); } virtual tensorflow::Status NewAppendableFile( - const string& fname, std::unique_ptr* result - /*, TransactionToken* token = nullptr */) /* override */ { - return fs_->NewAppendableFile(fname, - result /* , (token ? token : token_) */); + const string& fname, std::unique_ptr* result, + TransactionToken* token) override { + return fs_->NewAppendableFile(fname, result, (token ? token : token_)); } virtual tensorflow::Status NewReadOnlyMemoryRegionFromFile( - const string& fname, std::unique_ptr* result - /*, TransactionToken* token = nullptr */) /* override */ { - return fs_->NewReadOnlyMemoryRegionFromFile( - fname, result /* , (token ? token : token_) */); + const string& fname, std::unique_ptr* result, + TransactionToken* token) override { + return fs_->NewReadOnlyMemoryRegionFromFile(fname, result, + (token ? token : token_)); } - virtual tensorflow::Status FileExists( - const string& - fname /*, TransactionToken* token = nullptr */) /* override */ { - return fs_->FileExists(fname /* , (token ? token : token_) */); + virtual tensorflow::Status FileExists(const string& fname, + TransactionToken* token) override { + return fs_->FileExists(fname, (token ? token : token_)); } - virtual bool FilesExist( - const std::vector& files, std::vector* status - /*, TransactionToken* token = nullptr */) /* override */ { - return fs_->FilesExist(files, status /* , (token ? token : token_) */); + virtual bool FilesExist(const std::vector& files, + std::vector* status, + TransactionToken* token) override { + return fs_->FilesExist(files, status, (token ? token : token_)); } - virtual tensorflow::Status GetChildren( - const string& dir, std::vector* result - /*, TransactionToken* token = nullptr */) /* override */ { - return fs_->GetChildren(dir, result /* , (token ? token : token_) */); + virtual tensorflow::Status GetChildren(const string& dir, + std::vector* result, + TransactionToken* token) override { + return fs_->GetChildren(dir, result, (token ? token : token_)); } virtual tensorflow::Status GetMatchingPaths( - const string& pattern, std::vector* results - /*, TransactionToken* token = nullptr */) /* override */ { - return fs_->GetMatchingPaths(pattern, - results /* , (token ? token : token_) */); + const string& pattern, std::vector* results, + TransactionToken* token) override { + return fs_->GetMatchingPaths(pattern, results, (token ? token : token_)); } - virtual bool Match(const std::string& filename, const std::string& pattern - /*, TransactionToken* token = nullptr */) /* override */ { - return fs_->Match(filename, pattern /* , (token ? token : token_) */); + virtual bool Match(const std::string& filename, + const std::string& pattern) override { + return fs_->Match(filename, pattern); } - virtual tensorflow::Status Stat( - const string& fname, FileStatistics* stat - /*, TransactionToken* token = nullptr */) /* override */ { - return fs_->Stat(fname, stat /* , (token ? token : token_) */); + virtual tensorflow::Status Stat(const string& fname, FileStatistics* stat, + TransactionToken* token) override { + return fs_->Stat(fname, stat, (token ? token : token_)); } - virtual tensorflow::Status DeleteFile( - const string& - fname /*, TransactionToken* token = nullptr */) /* override */ { - return fs_->DeleteFile(fname /* , (token ? token : token_) */); + virtual tensorflow::Status DeleteFile(const string& fname, + TransactionToken* token) override { + return fs_->DeleteFile(fname, (token ? token : token_)); } - virtual tensorflow::Status CreateDir( - const string& - dirname /*, TransactionToken* token = nullptr */) /* override */ { - return fs_->CreateDir(dirname /* , (token ? token : token_) */); + virtual tensorflow::Status CreateDir(const string& dirname, + TransactionToken* token) override { + return fs_->CreateDir(dirname, (token ? token : token_)); } virtual tensorflow::Status RecursivelyCreateDir( - const string& - dirname /*, TransactionToken* token = nullptr */) /* override */ { - return fs_->RecursivelyCreateDir(dirname /* , (token ? token : token_) */); + const string& dirname, TransactionToken* token) override { + return fs_->RecursivelyCreateDir(dirname, (token ? token : token_)); } - virtual tensorflow::Status DeleteDir( - const string& - dirname /*, TransactionToken* token = nullptr */) /* override */ { - return fs_->DeleteDir(dirname /* , (token ? token : token_) */); + virtual tensorflow::Status DeleteDir(const string& dirname, + TransactionToken* token) override { + return fs_->DeleteDir(dirname, (token ? token : token_)); } virtual tensorflow::Status DeleteRecursively( - const string& dirname, int64* undeleted_files, int64* undeleted_dirs - /*, TransactionToken* token = nullptr */) /* override */ { + const string& dirname, int64* undeleted_files, int64* undeleted_dirs, + TransactionToken* token) override { return fs_->DeleteRecursively( dirname, undeleted_files, undeleted_dirs /*, (token ? token : token_) */); } - virtual tensorflow::Status GetFileSize( - const string& fname, uint64* file_size - /*, TransactionToken* token = nullptr */) /* override */ { - return fs_->GetFileSize(fname, file_size /* , (token ? token : token_) */); + virtual tensorflow::Status GetFileSize(const string& fname, uint64* file_size, + TransactionToken* token) override { + return fs_->GetFileSize(fname, file_size, (token ? token : token_)); } - virtual tensorflow::Status RenameFile( - const string& src, const string& target - /*, TransactionToken* token = nullptr */) /* override */ { - return fs_->RenameFile(src, target /* , (token ? token : token_) */); + virtual tensorflow::Status RenameFile(const string& src, const string& target, + TransactionToken* token) override { + return fs_->RenameFile(src, target, (token ? token : token_)); } - virtual tensorflow::Status CopyFile( - const string& src, const string& target - /*, TransactionToken* token = nullptr */) /* override */ { - return fs_->CopyFile(src, target /* , (token ? token : token_) */); + virtual tensorflow::Status CopyFile(const string& src, const string& target, + TransactionToken* token) override { + return fs_->CopyFile(src, target, (token ? token : token_)); } - virtual std::string TranslateName(const std::string& name) const - /* override */ { + virtual std::string TranslateName(const std::string& name) const override { return fs_->TranslateName(name); } - virtual tensorflow::Status IsDirectory( - const string& - fname /*, TransactionToken* token = nullptr */) /* override */ { - return fs_->IsDirectory(fname /* , (token ? token : token_) */); + virtual tensorflow::Status IsDirectory(const string& fname, + TransactionToken* token) override { + return fs_->IsDirectory(fname, (token ? token : token_)); } virtual Status HasAtomicMove(const string& path, - bool* has_atomic_move) /* override */ { + bool* has_atomic_move) override { return fs_->HasAtomicMove(path, has_atomic_move); } - virtual void FlushCaches( - /*TransactionToken* token = nullptr */) /* override */ { - return fs_->FlushCaches(/* (token ? token : token_) */); + virtual void FlushCaches(TransactionToken* token) override { + return fs_->FlushCaches((token ? token : token_)); } - virtual char Separator() const /* override */ { return fs_->Separator(); } + virtual char Separator() const override { return fs_->Separator(); } - virtual StringPiece Basename(StringPiece path) const /* override */ { + virtual StringPiece Basename(StringPiece path) const override { return fs_->Basename(path); } virtual tensorflow::Status StartTransaction( - TransactionToken** token) /* override */ { - /* return fs_->StartTransaction(token); */ - return Status::OK(); + TransactionToken** token) override { + return fs_->StartTransaction(token); } virtual tensorflow::Status AddToTransaction( - const string& path, TransactionToken* token) /* override */ { - /* return fs_->AddToTransaction(path, (token ? token : token_) ); */ - return Status::OK(); + const string& path, TransactionToken* token) override { + return fs_->AddToTransaction(path, (token ? token : token_)); } - virtual tensorflow::Status EndTransaction( - TransactionToken* token) /* override */ { - /* return fs_->EndTransaction(token); */ - return Status::OK(); + virtual tensorflow::Status EndTransaction(TransactionToken* token) override { + return fs_->EndTransaction(token); } virtual tensorflow::Status GetTransactionForPath( - const string& path, TransactionToken** token) /* override */ { - /* return fs_->GetTransactionForPath(path, token); */ - return Status::OK(); + const string& path, TransactionToken** token) override { + return fs_->GetTransactionForPath(path, token); } virtual tensorflow::Status GetTokenOrStartTransaction( - const string& path, TransactionToken** token) /* override */ { - /* return fs_->GetTokenOrStartTransaction(path, token); */ - return Status::OK(); + const string& path, TransactionToken** token) override { + return fs_->GetTokenOrStartTransaction(path, token); } - virtual string DecodeTransaction( - const TransactionToken* token) /* override */ { - return ""; - /*return fs_->DecodeTransaction((token ? token : token_)); */ + virtual std::string DecodeTransaction( + const TransactionToken* token) override { + return fs_->DecodeTransaction((token ? token : token_)); } WrappedFileSystem(FileSystem* file_system, TransactionToken* token) From a937d80421ab5d68a386a5740343b4ecf8dc87b8 Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Mon, 27 Jul 2020 18:16:41 -0700 Subject: [PATCH 1438/2522] Make CollectiveKey instance key global instead of thread local There's no reason for instance key to be thread local, since we get one instance key and use it for all devices. This change also removes the lock from CollectiveKey since we already need synchronization when launching collectives. PiperOrigin-RevId: 323483277 Change-Id: I2e01519f1231c59686c77d64eebbb284f9c00f32 --- .../python/distribute/cross_device_ops.py | 69 +++++++++++-------- .../python/distribute/cross_device_utils.py | 31 ++------- 2 files changed, 48 insertions(+), 52 deletions(-) diff --git a/tensorflow/python/distribute/cross_device_ops.py b/tensorflow/python/distribute/cross_device_ops.py index f82ddf8ab79..81a401fb4a9 100644 --- a/tensorflow/python/distribute/cross_device_ops.py +++ b/tensorflow/python/distribute/cross_device_ops.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import collections +import copy import enum import threading @@ -958,8 +959,18 @@ class CollectiveAllReduce(CrossDeviceOps): self._collective_keys = (collective_keys or cross_device_utils.CollectiveKeys()) self._communication = communication + # This lock guards all collective launches, i.e. calls to + # cross_device_utils.build_collectve_*. + # # In a multi threaded eager program we need to ensure different groups of - # collectives don't interleave each other, otherwise there will be deadlock. + # collectives don't interleave each other, otherwise there couuld be + # deadlocks. E.g. + # user-thread-0 device0 device1 + # user-thread-1 device0 device1 + # Note that thanks to protection in the runtime, this is only an issue when + # the instance key is re-used. The instance key is reused if the user builds + # a tf.function and runs it in multiple threads, since the instance key is + # an attribute of the collective ops. self._lock = threading.Lock() # Collective ops requires all devices to participate and is blocking. In @@ -1085,30 +1096,31 @@ class CollectiveAllReduce(CrossDeviceOps): self._devices), self._group_size, communication, len(packs)), 10) reduced_values = [] - for pack in packs: - # By placing all CollectiveReduce ops in a pack under single name scope, - # we ensure they will be picked up by the `ScopedAllocator` grappler - # optimizer and packed into a single all-reduce. - with self._lock, ops.name_scope("allreduce"): - for per_replica in pack: - # Add control dependencies per device from the last gradients to the - # current set, in order to serialize NCCL launches. - if (communication == CollectiveCommunication.NCCL.value and - reduced_values): - control_inputs = list(reduced_values[-1]) - else: - control_inputs = None - reduced_values.append( - cross_device_utils.build_collective_reduce( - per_replica.values, - self._devices, - self._group_size, - self._collective_keys, - "Add", - "Id", - communication, - control_inputs, - executors=self._executors)) + with self._lock: + for pack in packs: + # By placing all CollectiveReduce ops in a pack under single name scope, + # we ensure they will be picked up by the `ScopedAllocator` grappler + # optimizer and packed into a single all-reduce. + with ops.name_scope("allreduce"): + for per_replica in pack: + # Add control dependencies per device from the last gradients to the + # current set, in order to serialize NCCL launches. + if (communication == CollectiveCommunication.NCCL.value and + reduced_values): + control_inputs = list(reduced_values[-1]) + else: + control_inputs = None + reduced_values.append( + cross_device_utils.build_collective_reduce( + per_replica.values, + self._devices, + self._group_size, + self._collective_keys, + "Add", + "Id", + communication, + control_inputs, + executors=self._executors)) for e in self._executors: e.wait() @@ -1142,7 +1154,7 @@ class CollectiveAllReduce(CrossDeviceOps): communication_hint = CollectiveCommunication.AUTO.value gathered_values = [] - with ops.name_scope("allreduce"): + with self._lock, ops.name_scope("allreduce"): for per_replica in per_replica_values: gathered_values.append( cross_device_utils.build_collective_gather_indexed_slices( @@ -1163,8 +1175,9 @@ class CollectiveAllReduce(CrossDeviceOps): def __deepcopy__(self, memo): # distribute_coordinator deep-copies the strategy object, so # CollectiveAllReduce needs to support deep copy as well. - return CollectiveAllReduce(self._devices, self._group_size, - self._collective_keys, self._communication) + collective_keys = copy.deepcopy(self._collective_keys, memo) + return CollectiveAllReduce(self._devices, self._group_size, collective_keys, + self._communication) def choose_the_best(devices, session_config=None): diff --git a/tensorflow/python/distribute/cross_device_utils.py b/tensorflow/python/distribute/cross_device_utils.py index 9dc24b16e6a..1d5c2c8f452 100644 --- a/tensorflow/python/distribute/cross_device_utils.py +++ b/tensorflow/python/distribute/cross_device_utils.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function import collections as pycoll -import threading from tensorflow.python.distribute import all_reduce from tensorflow.python.distribute import values as value_lib @@ -229,14 +228,6 @@ def split_grads_by_size(threshold_size, device_grads): return small_grads, large_grads -# threading.Lock() and threading.local() cannot be pickled and therefore cannot -# be a field of CollectiveKeys. Right now _thread_local is not necessary to be -# an instance member of CollectiveKeys since we always create a new thread for -# each replica. -_lock = threading.Lock() -_thread_local = threading.local() - - # TODO(yuefengz): use random key starts to avoid reusing keys? class CollectiveKeys(object): """Class that manages collective keys. @@ -271,16 +262,9 @@ class CollectiveKeys(object): self._group_key_table = {} assert op_instance_key_start != variable_instance_key_start - self._op_instance_key_start = op_instance_key_start + self._op_instance_key = op_instance_key_start self._variable_instance_key = variable_instance_key_start - def _get_thread_local_object(self): - # We make instance key without key ids thread local so that it will work - # with MirroredStrategy and distribute coordinator. - if not hasattr(_thread_local, 'op_instance_key'): - _thread_local.op_instance_key = self._op_instance_key_start - return _thread_local - def get_group_key(self, devices): """Returns a group key for the set of devices. @@ -298,17 +282,16 @@ class CollectiveKeys(object): # task_type and task_id. names = sorted(['%s:%d' % (d.device_type, d.device_index) for d in parsed]) key_id = ','.join(names) - with _lock: - if key_id not in self._group_key_table: - new_key = self._group_key - self._group_key += 1 - self._group_key_table[key_id] = new_key + if key_id not in self._group_key_table: + new_key = self._group_key + self._group_key += 1 + self._group_key_table[key_id] = new_key return self._group_key_table[key_id] def get_op_instance_key(self): """Returns a new instance key for use in defining a collective op.""" - v = self._get_thread_local_object().op_instance_key - self._get_thread_local_object().op_instance_key += 1 + v = self._op_instance_key + self._op_instance_key += 1 return v def get_variable_instance_key(self): From 59288d2451f6cf6bf463f69913be2c47b84999a1 Mon Sep 17 00:00:00 2001 From: Michael Gester Date: Mon, 27 Jul 2020 18:32:56 -0700 Subject: [PATCH 1439/2522] Take reference variables into account for element type checking of TF ops Previously, many TF ops checked for same operand and result element type which failed in cases where one type was a reference variable and the other one not. PiperOrigin-RevId: 323485308 Change-Id: I48d30dacba71e68b726e507069673dc1ed867f27 --- .../mlir/tensorflow/ir/tf_generated_ops.td | 48 +++++++++---------- .../compiler/mlir/tensorflow/ir/tf_op_base.td | 6 +++ .../compiler/mlir/tensorflow/ir/tf_traits.h | 40 ++++++++++++++++ .../compiler/mlir/tensorflow/ir/tf_types.h | 12 +++++ .../mlir/tensorflow/tests/tf-ops.mlir | 18 +++++++ 5 files changed, 100 insertions(+), 24 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index 0851975e8e1..e43e35b72ba 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -87,7 +87,7 @@ tf.math.acosh(x) ==> [nan nan 0. 0.62236255 5.9914584 9.903487 inf] TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_AddOp : TF_Op<"Add", [NoSideEffect, ResultsBroadcastableShape, SameOperandsAndResultElementType, TF_LayoutAgnostic]>, +def TF_AddOp : TF_Op<"Add", [NoSideEffect, ResultsBroadcastableShape, TF_LayoutAgnostic, TF_SameOperandsAndResultElementTypeResolveRef]>, WithBroadcastableBinOpBuilder { let summary = "Returns x + y element-wise."; @@ -136,7 +136,7 @@ Inputs must be of same size and shape. let hasFolder = 1; } -def TF_AddV2Op : TF_Op<"AddV2", [Commutative, NoSideEffect, ResultsBroadcastableShape, SameOperandsAndResultElementType, TF_LayoutAgnostic]>, +def TF_AddV2Op : TF_Op<"AddV2", [Commutative, NoSideEffect, ResultsBroadcastableShape, TF_LayoutAgnostic, TF_SameOperandsAndResultElementTypeResolveRef]>, WithBroadcastableBinOpBuilder { let summary = "Returns x + y element-wise."; @@ -648,7 +648,7 @@ tf.math.atan(y) # [1.047, 0.785] = x TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_Atan2Op : TF_Op<"Atan2", [NoSideEffect, ResultsBroadcastableShape, SameOperandsAndResultElementType]>, +def TF_Atan2Op : TF_Op<"Atan2", [NoSideEffect, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef]>, WithBroadcastableBinOpBuilder { let summary = [{ Computes arctangent of `y/x` element-wise, respecting signs of the arguments. @@ -789,7 +789,7 @@ def TF_AvgPoolGradOp : TF_Op<"AvgPoolGrad", [NoSideEffect]> { TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<1>; } -def TF_BatchMatMulOp : TF_Op<"BatchMatMul", [NoSideEffect, SameOperandsAndResultElementType]> { +def TF_BatchMatMulOp : TF_Op<"BatchMatMul", [NoSideEffect, TF_SameOperandsAndResultElementTypeResolveRef]> { let summary = "Multiplies slices of two tensors in batches."; let description = [{ @@ -830,7 +830,7 @@ It is computed as: let hasCanonicalizer = 1; } -def TF_BatchMatMulV2Op : TF_Op<"BatchMatMulV2", [NoSideEffect, SameOperandsAndResultElementType]> { +def TF_BatchMatMulV2Op : TF_Op<"BatchMatMulV2", [NoSideEffect, TF_SameOperandsAndResultElementTypeResolveRef]> { let summary = "Multiplies slices of two tensors in batches."; let description = [{ @@ -1446,7 +1446,7 @@ that are not a number (NaN) or infinity (Inf). Otherwise, passes `tensor` as-is. TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_ClipByValueOp : TF_Op<"ClipByValue", [NoSideEffect, SameOperandsAndResultElementType]> { +def TF_ClipByValueOp : TF_Op<"ClipByValue", [NoSideEffect, TF_SameOperandsAndResultElementTypeResolveRef]> { let summary = "Clips tensor values to a specified min and max."; let description = [{ @@ -2493,7 +2493,7 @@ Computes Psi, the derivative of Lgamma (the log of the absolute value of TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_DivOp : TF_Op<"Div", [NoSideEffect, ResultsBroadcastableShape, SameOperandsAndResultElementType]>, +def TF_DivOp : TF_Op<"Div", [NoSideEffect, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef]>, WithBroadcastableBinOpBuilder { let summary = "Returns x / y element-wise."; @@ -2518,7 +2518,7 @@ def TF_DivOp : TF_Op<"Div", [NoSideEffect, ResultsBroadcastableShape, SameOperan let hasFolder = 1; } -def TF_DivNoNanOp : TF_Op<"DivNoNan", [NoSideEffect, ResultsBroadcastableShape, SameOperandsAndResultElementType]>, +def TF_DivNoNanOp : TF_Op<"DivNoNan", [NoSideEffect, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef]>, WithBroadcastableBinOpBuilder { let summary = "Returns 0 if the denominator is zero."; @@ -3398,7 +3398,7 @@ def TF_FloorDivOp : TF_Op<"FloorDiv", [NoSideEffect, ResultsBroadcastableShape]> TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_FloorModOp : TF_Op<"FloorMod", [NoSideEffect, ResultsBroadcastableShape, SameOperandsAndResultElementType]>, +def TF_FloorModOp : TF_Op<"FloorMod", [NoSideEffect, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef]>, WithBroadcastableBinOpBuilder { let summary = [{ Returns element-wise remainder of division. When `x < 0` xor `y < 0` is @@ -4090,7 +4090,7 @@ def ApplyG(op, dy, _): TF_DerivedOperandTypeListAttr T = TF_DerivedOperandTypeListAttr<0>; } -def TF_IgammaOp : TF_Op<"Igamma", [NoSideEffect, ResultsBroadcastableShape, SameOperandsAndResultElementType]>, +def TF_IgammaOp : TF_Op<"Igamma", [NoSideEffect, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef]>, WithBroadcastableBinOpBuilder { let summary = [{ Compute the lower regularized incomplete Gamma function `P(a, x)`. @@ -4124,7 +4124,7 @@ Gamma function. TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_IgammaGradAOp : TF_Op<"IgammaGradA", [NoSideEffect, ResultsBroadcastableShape, SameOperandsAndResultElementType]>, +def TF_IgammaGradAOp : TF_Op<"IgammaGradA", [NoSideEffect, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef]>, WithBroadcastableBinOpBuilder { let summary = "Computes the gradient of `igamma(a, x)` wrt `a`."; @@ -4140,7 +4140,7 @@ def TF_IgammaGradAOp : TF_Op<"IgammaGradA", [NoSideEffect, ResultsBroadcastableS TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_IgammacOp : TF_Op<"Igammac", [NoSideEffect, ResultsBroadcastableShape, SameOperandsAndResultElementType]>, +def TF_IgammacOp : TF_Op<"Igammac", [NoSideEffect, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef]>, WithBroadcastableBinOpBuilder { let summary = [{ Compute the upper regularized incomplete Gamma function `Q(a, x)`. @@ -4930,7 +4930,7 @@ def TF_LookupTableSizeV2Op : TF_Op<"LookupTableSizeV2", []> { ); } -def TF_MatMulOp : TF_Op<"MatMul", [NoSideEffect, SameOperandsAndResultElementType]> { +def TF_MatMulOp : TF_Op<"MatMul", [NoSideEffect, TF_SameOperandsAndResultElementTypeResolveRef]> { let summary = [{ Multiply the matrix "a" by the matrix "b". }]; @@ -5694,7 +5694,7 @@ def TF_MaxPoolGradOp : TF_Op<"MaxPoolGrad", [NoSideEffect]> { }]; } -def TF_MaximumOp : TF_Op<"Maximum", [NoSideEffect, ResultsBroadcastableShape, SameOperandsAndResultElementType]>, +def TF_MaximumOp : TF_Op<"Maximum", [NoSideEffect, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef]>, WithBroadcastableBinOpBuilder { let summary = "Returns the max of x and y (i.e. x > y ? x : y) element-wise."; @@ -5768,7 +5768,7 @@ retained with length 1. TF_DerivedOperandTypeAttr Tidx = TF_DerivedOperandTypeAttr<1>; } -def TF_MinimumOp : TF_Op<"Minimum", [NoSideEffect, ResultsBroadcastableShape, SameOperandsAndResultElementType]>, +def TF_MinimumOp : TF_Op<"Minimum", [NoSideEffect, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef]>, WithBroadcastableBinOpBuilder { let summary = "Returns the min of x and y (i.e. x < y ? x : y) element-wise."; @@ -5901,7 +5901,7 @@ graph_def = foo.get_concrete_function(tf.TensorSpec([10], tf.float32), tf.Tensor TF_DerivedResultTypeListAttr Toutputs = TF_DerivedResultTypeListAttr<0>; } -def TF_ModOp : TF_Op<"Mod", [NoSideEffect, ResultsBroadcastableShape, SameOperandsAndResultElementType]>, +def TF_ModOp : TF_Op<"Mod", [NoSideEffect, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef]>, WithBroadcastableBinOpBuilder { let summary = [{ Returns element-wise remainder of division. This emulates C semantics in that @@ -5927,7 +5927,7 @@ the result here is consistent with a truncating divide. E.g. TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_MulOp : TF_Op<"Mul", [Commutative, NoSideEffect, ResultsBroadcastableShape, SameOperandsAndResultElementType]>, +def TF_MulOp : TF_Op<"Mul", [Commutative, NoSideEffect, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef]>, WithBroadcastableBinOpBuilder { let summary = "Returns x * y element-wise."; @@ -6430,7 +6430,7 @@ pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0] TF_DerivedOperandTypeAttr Tpaddings = TF_DerivedOperandTypeAttr<1>; } -def TF_PowOp : TF_Op<"Pow", [NoSideEffect, ResultsBroadcastableShape, SameOperandsAndResultElementType]>, +def TF_PowOp : TF_Op<"Pow", [NoSideEffect, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef]>, WithBroadcastableBinOpBuilder { let summary = "Computes the power of one value to another."; @@ -6909,7 +6909,7 @@ lower bound 0 is included in the range, while the upper bound 1 is excluded. }]; } -def TF_RangeOp : TF_Op<"Range", [NoSideEffect, SameOperandsAndResultElementType]> { +def TF_RangeOp : TF_Op<"Range", [NoSideEffect, TF_SameOperandsAndResultElementTypeResolveRef]> { let summary = "Creates a sequence of numbers."; let description = [{ @@ -9556,7 +9556,7 @@ Examples: TF_DerivedOperandSizeAttr N = TF_DerivedOperandSizeAttr<0>; } -def TF_SubOp : TF_Op<"Sub", [NoSideEffect, ResultsBroadcastableShape, SameOperandsAndResultElementType]>, +def TF_SubOp : TF_Op<"Sub", [NoSideEffect, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef]>, WithBroadcastableBinOpBuilder { let summary = "Returns x - y element-wise."; @@ -10729,7 +10729,7 @@ Python Semantics. let hasCanonicalizer = 1; } -def TF_TruncateModOp : TF_Op<"TruncateMod", [NoSideEffect, ResultsBroadcastableShape, SameOperandsAndResultElementType]>, +def TF_TruncateModOp : TF_Op<"TruncateMod", [NoSideEffect, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef]>, WithBroadcastableBinOpBuilder { let summary = [{ Returns element-wise remainder of division. This emulates C semantics in that @@ -11202,7 +11202,7 @@ where(input) ==> [[0, 0, 0], TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_XdivyOp : TF_Op<"Xdivy", [NoSideEffect, ResultsBroadcastableShape, SameOperandsAndResultElementType]>, +def TF_XdivyOp : TF_Op<"Xdivy", [NoSideEffect, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef]>, WithBroadcastableBinOpBuilder { let summary = "Returns 0 if x == 0, and x / y otherwise, elementwise."; @@ -11568,7 +11568,7 @@ tensor such that tensor[...,:,:] = u[..., :, :] * Diag(s[..., :]) * Transpose(v[ TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_Xlog1pyOp : TF_Op<"Xlog1py", [NoSideEffect, SameOperandsAndResultElementType]> { +def TF_Xlog1pyOp : TF_Op<"Xlog1py", [NoSideEffect, TF_SameOperandsAndResultElementTypeResolveRef]> { let summary = "Returns 0 if x == 0, and x * log1p(y) otherwise, elementwise."; let arguments = (ins @@ -11583,7 +11583,7 @@ def TF_Xlog1pyOp : TF_Op<"Xlog1py", [NoSideEffect, SameOperandsAndResultElementT TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_XlogyOp : TF_Op<"Xlogy", [NoSideEffect, ResultsBroadcastableShape, SameOperandsAndResultElementType]>, +def TF_XlogyOp : TF_Op<"Xlogy", [NoSideEffect, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef]>, WithBroadcastableBinOpBuilder { let summary = "Returns 0 if x == 0, and x * log(y) otherwise, elementwise."; diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td index aaaf9c2fc5c..7aa4c1b54b5 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td @@ -59,6 +59,12 @@ TODO: Make invariants more structured so that we can reference them in ops. def TF_OperandsSameAsResultsTypeOrRef : NativeOpTrait< "TF::OperandsSameAsResultsTypeOrRef">; +// Op has the same operand and result element types (or type itself, if scalar) +// after resolving reference types (i.e., after converting reference types to +// their corresponding TensorFlow or standard types). +def TF_SameOperandsAndResultElementTypeResolveRef : NativeOpTrait< + "TF::SameOperandsAndResultElementTypeResolveRef">; + // Layout agnostic operations do not depend on the operands data layout (data // format), as an example all element wise operations are layout agnostic. def TF_LayoutAgnostic : NativeOpTrait<"TF::LayoutAgnostic">; diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h index f488171d1e1..a94d7dbd219 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h @@ -65,6 +65,46 @@ class OperandsSameAsResultsTypeOrRef } }; +// Verifies that op has the same operand and result element types (or type +// itself, if scalar) after resolving reference types (i.e., after converting +// reference types to their corresponding TensorFlow or standard types). +template +class SameOperandsAndResultElementTypeResolveRef + : public TraitBase { + public: + static LogicalResult verifyTrait(Operation* op) { + Type element_type; + if (op->getNumResults() > 0) { + element_type = + mlir::TF::GetElementTypeOrSelfResolveRef(op->getResult(0).getType()); + } else if (op->getNumOperands() > 0) { + element_type = + mlir::TF::GetElementTypeOrSelfResolveRef(op->getOperand(0).getType()); + } else { + // Nothing to check. + return success(); + } + // Verify that all result element types are compatible to `element_type`. + for (const auto& result_type : op->getResultTypes()) { + if (mlir::TF::GetElementTypeOrSelfResolveRef(result_type) != + element_type) { + return op->emitOpError( + "requires compatible element types for all operands and results"); + } + } + // Verify that all operand element types are compatible to `element_type`. + for (const auto& operand_type : op->getOperandTypes()) { + if (mlir::TF::GetElementTypeOrSelfResolveRef(operand_type) != + element_type) { + return op->emitOpError( + "requires compatible element types for all operands and results"); + } + } + return success(); + } +}; + // Layout agnostic operations do not depend on the operands data layout (data // format), as and example all element wise operations are layout agnostic. template diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_types.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_types.h index f352bc0eb47..125f6bb31df 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_types.h +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_types.h @@ -22,6 +22,7 @@ limitations under the License. #include "mlir/IR/Location.h" // from @llvm-project #include "mlir/IR/Operation.h" // from @llvm-project #include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "mlir/IR/TypeUtilities.h" // from @llvm-project #include "mlir/IR/Types.h" // from @llvm-project namespace mlir { @@ -166,6 +167,17 @@ static inline Type GetDefaultTypeOf(TensorFlowRefType type) { return type.RemoveRef(); } +// Returns the element type if `type` is a `ShapedType` and the type itself +// otherwise, converting `TensorFlowRef` type to corresponding `TensorFlow` or +// standard type if necessary. +static inline Type GetElementTypeOrSelfResolveRef(Type type) { + Type element_type = mlir::getElementTypeOrSelf(type); + if (auto ref_type = element_type.dyn_cast()) { + element_type = ref_type.RemoveRef(); + } + return element_type; +} + #define HANDLE_TF_TYPE(tftype, enumerant, name) \ class tftype##Type : public detail::TensorFlowTypeImpl { \ public: \ diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir index 44646690519..ec28b32b485 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir @@ -191,6 +191,24 @@ func @testMul(%arg0: tensor<2xui16>) -> (tensor<2xui16>) { // ----- +// Test error message for incompatible element types. +func @testIncompatibleElementTypes(%arg0: tensor<3x2xf32>, %arg1: tensor<3x2xf64>) -> (tensor<3x2xf32>) { + // expected-error @+1 {{'tf.Mul' op requires compatible element types for all operands and results}} + %0 = "tf.Mul"(%arg0, %arg1) : (tensor<3x2xf32>, tensor<3x2xf64>) -> tensor<3x2xf32> + return %0 : tensor<3x2xf32> +} + +// ----- + +// Test error message for incompatible element types. +func @testIncompatibleElementTypes(%arg0: tensor<3x2xf32>, %arg1: tensor<3x2xf32>) -> (tensor<3x2xf64>) { + // expected-error @+1 {{'tf.Mul' op requires compatible element types for all operands and results}} + %0 = "tf.Mul"(%arg0, %arg1) : (tensor<3x2xf32>, tensor<3x2xf32>) -> tensor<3x2xf64> + return %0 : tensor<3x2xf64> +} + +// ----- + // CHECK-LABEL: func @testReshape(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>, %arg2: tensor<10000xf32>, %arg3: tensor<*xi32>) func @testReshape(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>, %arg2: tensor<10000xf32>, %arg3: tensor<*xi32>) -> (tensor<100x100xf32>, tensor<*xf32>, tensor<10000xf32>, tensor<100x100xf32>, tensor<*xf32>, tensor<*xf32>) { %shape1 = constant dense<100> : tensor<2xi32> From 74c0ea4ca8420080135027bb89f4aae5ab2e1040 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Mon, 27 Jul 2020 19:10:34 -0700 Subject: [PATCH 1440/2522] [XLA:TPU] Add support for half precision PiperOrigin-RevId: 323489799 Change-Id: I90e2a2f3a57e8e96105e58aea87c54a3ec98c750 --- tensorflow/compiler/xla/literal.cc | 6 ++++-- tensorflow/core/tpu/tpu_defs.h | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc index d2b300f0b2d..5c26929661e 100644 --- a/tensorflow/compiler/xla/literal.cc +++ b/tensorflow/compiler/xla/literal.cc @@ -1601,14 +1601,16 @@ bool LiteralBase::Piece::EqualElements(const LiteralBase::Piece& other) const { switch (subshape().element_type()) { case PRED: return EqualElementsInternal(other, &multi_index); - case U8: - return EqualElementsInternal(other, &multi_index); + case S8: + return EqualElementsInternal(other, &multi_index); case S16: return EqualElementsInternal(other, &multi_index); case S32: return EqualElementsInternal(other, &multi_index); case S64: return EqualElementsInternal(other, &multi_index); + case U8: + return EqualElementsInternal(other, &multi_index); case U16: return EqualElementsInternal(other, &multi_index); case U32: diff --git a/tensorflow/core/tpu/tpu_defs.h b/tensorflow/core/tpu/tpu_defs.h index 696fa8dbe3e..008e386dde6 100644 --- a/tensorflow/core/tpu/tpu_defs.h +++ b/tensorflow/core/tpu/tpu_defs.h @@ -51,8 +51,8 @@ extern const char* const kTPUReplicateAttr; extern const char* const kOutsideCompilationAttr; // Supported types for TPUs. -static constexpr std::array kTpuAllTypes = { - {DT_INT32, DT_UINT32, DT_BFLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL, +static constexpr std::array kTpuAllTypes = { + {DT_INT32, DT_UINT32, DT_HALF, DT_BFLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL, DT_COMPLEX64, DT_INT64, DT_UINT64, DT_QINT8, DT_QUINT8, DT_INT8, DT_UINT8, DT_INT16, DT_UINT16}}; From 427df02dbbdcc8806c15325aa277c6891abcfd73 Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Mon, 27 Jul 2020 19:16:48 -0700 Subject: [PATCH 1441/2522] Generate replica_id tensor at call time Keeping Tensors as states doesn't work well with nested tf.function. It's possible that the Tensor is generated one func graph, and gets capture by other func graphs, which results an error that "an op outside of the function building code is being passed a Graph tensor". e.g. @tf.function def f(): ... strategy.run(g) ... @tf.function def g(): ... do_something(get_replica_context().replica_id_in_sync_group) ... Note that f() and g() may be traced multiple times, i.e. g() may capture the tensor from an ephemeral trace of f(). PiperOrigin-RevId: 323490593 Change-Id: I230fd036aaf05cfbd7c8bb7898921b83d2731e7d --- tensorflow/python/distribute/BUILD | 1 + .../python/distribute/distribute_lib.py | 49 ++++++++++++++++--- .../python/distribute/distribute_lib_test.py | 4 +- .../distribution_strategy_context.py | 4 +- .../distribute/mirrored_function_strategy.py | 6 +-- tensorflow/python/distribute/mirrored_run.py | 12 ++--- .../python/distribute/one_device_strategy.py | 4 +- .../python/distribute/strategy_common_test.py | 43 +++++++++++----- tensorflow/python/distribute/tpu_strategy.py | 6 +-- 9 files changed, 89 insertions(+), 40 deletions(-) diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index 185b4568868..a3279e84ac2 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -1727,6 +1727,7 @@ distribute_py_test( "//tensorflow/python:constant_op", "//tensorflow/python:dtypes", "//tensorflow/python:math_ops", + "//tensorflow/python/compat:v2_compat", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/eager:def_function", "@absl_py//absl/testing:parameterized", diff --git a/tensorflow/python/distribute/distribute_lib.py b/tensorflow/python/distribute/distribute_lib.py index 41ec725d3ab..16a44048534 100644 --- a/tensorflow/python/distribute/distribute_lib.py +++ b/tensorflow/python/distribute/distribute_lib.py @@ -213,6 +213,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import custom_gradient @@ -2845,9 +2846,22 @@ class ReplicaContext(object): """ def __init__(self, strategy, replica_id_in_sync_group): + """Creates a ReplicaContext. + + Args: + strategy: A `tf.distribute.Strategy`. + replica_id_in_sync_group: An integer, a `Tensor` or None. Prefer an + integer whenever possible to avoid issues with nested `tf.function`. It + accepts a `Tensor` only to be compatible with `tpu.replicate`. + """ self._strategy = strategy self._thread_context = distribution_strategy_context._InReplicaThreadMode( # pylint: disable=protected-access self) + if not (replica_id_in_sync_group is None or + tensor_util.is_tensor(replica_id_in_sync_group) or + isinstance(replica_id_in_sync_group, int)): + raise ValueError( + "replica_id_in_sync_group can only be an integer, a Tensor or None.") self._replica_id_in_sync_group = replica_id_in_sync_group self._summary_recording_distribution_strategy = None @@ -2856,7 +2870,7 @@ class ReplicaContext(object): _push_per_thread_mode(self._thread_context) def replica_id_is_zero(): - return math_ops.equal(self._replica_id_in_sync_group, + return math_ops.equal(self.replica_id_in_sync_group, constant_op.constant(0)) summary_state = summary_ops_v2._summary_state # pylint: disable=protected-access @@ -2929,9 +2943,24 @@ class ReplicaContext(object): NOTE: This is not guaranteed to be the same ID as the XLA replica ID use for low-level operations such as collective_permute. + + Returns: + a `Tensor`. """ - require_replica_context(self) - return self._replica_id_in_sync_group + # It's important to prefer making the Tensor at call time whenver possible. + # Keeping Tensors in global states doesn't work well with nested + # tf.function, since it's possible that the tensor is generated in one func + # graph, and gets captured by another, which will result in a subtle "An op + # outside of the function building code is being passed a Graph tensor" + # error. Making the tensor at call time to ensure it is the same graph where + # it's used. However to be compatible with tpu.replicate(), + # self._replica_id_in_sync_group can also be a Tensor. + if tensor_util.is_tensor(self._replica_id_in_sync_group): + return self._replica_id_in_sync_group + return constant_op.constant( + self._replica_id_in_sync_group, + dtypes.int32, + name="replica_id_in_sync_group") @property def strategy(self): @@ -3155,9 +3184,7 @@ class _DefaultDistributionExtended(StrategyExtendedV1): raise NotImplementedError("TODO") def _call_for_each_replica(self, fn, args, kwargs): - with ReplicaContext( - self._container_strategy(), - replica_id_in_sync_group=constant_op.constant(0, dtypes.int32)): + with ReplicaContext(self._container_strategy(), replica_id_in_sync_group=0): return fn(*args, **kwargs) def _reduce_to(self, reduce_op, value, destinations, experimental_hints): @@ -3262,6 +3289,16 @@ class _DefaultDistributionExtended(StrategyExtendedV1): return True +class _DefaultReplicaContext(ReplicaContext): + """ReplicaContext for _DefaultDistributionStrategy.""" + + @property + def replica_id_in_sync_group(self): + # Return 0 instead of a constant tensor to avoid creating a new node for + # users who don't use distribution strategy. + return 0 + + # ------------------------------------------------------------------------------ # We haven't yet implemented deserialization for DistributedVariables. # So here we catch any attempts to deserialize variables diff --git a/tensorflow/python/distribute/distribute_lib_test.py b/tensorflow/python/distribute/distribute_lib_test.py index 1aa6911fee6..816ff0ce465 100644 --- a/tensorflow/python/distribute/distribute_lib_test.py +++ b/tensorflow/python/distribute/distribute_lib_test.py @@ -32,7 +32,6 @@ from tensorflow.python.distribute.cluster_resolver import SimpleClusterResolver from tensorflow.python.eager import context from tensorflow.python.eager import def_function from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables @@ -75,8 +74,7 @@ class _TestExtended(distribute_lib.StrategyExtendedV1): def _call_for_each_replica(self, fn, args, kwargs): with _TestReplicaContext( - self._container_strategy(), - replica_id_in_sync_group=constant_op.constant(0, dtypes.int32)): + self._container_strategy(), replica_id_in_sync_group=0): return fn(*args, **kwargs) def _create_variable(self, next_creator, **kwargs): diff --git a/tensorflow/python/distribute/distribution_strategy_context.py b/tensorflow/python/distribute/distribution_strategy_context.py index 89f9d3b964b..b08c2313b1c 100644 --- a/tensorflow/python/distribute/distribution_strategy_context.py +++ b/tensorflow/python/distribute/distribution_strategy_context.py @@ -334,8 +334,10 @@ def _get_default_replica_context(): # Avoid race condition causing two defaults to be created with _default_replica_context_lock: if _defaults["replica_context"] is None: - _defaults["replica_context"] = distribute_lib.ReplicaContext( + # pylint: disable=protected-access + _defaults["replica_context"] = distribute_lib._DefaultReplicaContext( _get_default_strategy(), replica_id_in_sync_group=0) + # pylint: enable=protected-access return _defaults["replica_context"] diff --git a/tensorflow/python/distribute/mirrored_function_strategy.py b/tensorflow/python/distribute/mirrored_function_strategy.py index bbe52984d1e..57e2f2e5c69 100644 --- a/tensorflow/python/distribute/mirrored_function_strategy.py +++ b/tensorflow/python/distribute/mirrored_function_strategy.py @@ -168,11 +168,11 @@ class MirroredFunctionReplicaContext(distribute_lib.ReplicaContext): distribute_lib.ReplicaContext.__init__(self, strategy, None) @property - def _replica_id_in_sync_group(self): + def replica_id_in_sync_group(self): return _replica_id_tensor() - @_replica_id_in_sync_group.setter - def _replica_id_in_sync_group(self, value): + @replica_id_in_sync_group.setter + def replica_id_in_sync_group(self, value): assert value is None def _merge_call(self, merge_fn, args, kwargs): diff --git a/tensorflow/python/distribute/mirrored_run.py b/tensorflow/python/distribute/mirrored_run.py index 05018450121..2cf23e96e67 100644 --- a/tensorflow/python/distribute/mirrored_run.py +++ b/tensorflow/python/distribute/mirrored_run.py @@ -31,11 +31,8 @@ from tensorflow.python.distribute import distribute_utils from tensorflow.python.distribute import shared_variable_creator from tensorflow.python.eager import context from tensorflow.python.eager import def_function -from tensorflow.python.framework import constant_op from tensorflow.python.framework import device as tf_device -from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_util from tensorflow.python.ops import summary_ops_v2 from tensorflow.python.ops import variable_scope from tensorflow.python.platform import tf_logging as logging @@ -313,8 +310,7 @@ class _MirroredReplicaThread(threading.Thread): _enter_graph(self.graph, self.in_eager, self._variable_creator_stack), \ context.device_policy(self.context_device_policy), \ - _MirroredReplicaContext(self.distribution, constant_op.constant( - self.replica_id, dtypes.int32)), \ + _MirroredReplicaContext(self.distribution, self.replica_id), \ ops.device(self.devices[self.replica_id]), \ ops.name_scope(self._name_scope), \ variable_scope.variable_scope( @@ -452,5 +448,7 @@ class _MirroredReplicaContext(distribute_lib.ReplicaContext): @property def devices(self): distribute_lib.require_replica_context(self) - replica_id = tensor_util.constant_value(self._replica_id_in_sync_group) - return [self._strategy.extended.worker_devices_by_replica[replica_id]] + return [ + self._strategy.extended.worker_devices_by_replica[ + self._replica_id_in_sync_group] + ] diff --git a/tensorflow/python/distribute/one_device_strategy.py b/tensorflow/python/distribute/one_device_strategy.py index 2a58df28c14..8f40a5f7991 100644 --- a/tensorflow/python/distribute/one_device_strategy.py +++ b/tensorflow/python/distribute/one_device_strategy.py @@ -24,7 +24,6 @@ from tensorflow.python.distribute import distribute_utils from tensorflow.python.distribute import input_lib from tensorflow.python.distribute import numpy_dataset from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops @@ -459,9 +458,8 @@ class _OneDeviceReplicaContext(distribute_lib.ReplicaContext): """ReplicaContext for OneDeviceStrategy.""" def __init__(self, strategy): - zero = constant_op.constant(0, dtypes.int32) distribute_lib.ReplicaContext.__init__( - self, strategy, replica_id_in_sync_group=zero) + self, strategy, replica_id_in_sync_group=0) @property def devices(self): diff --git a/tensorflow/python/distribute/strategy_common_test.py b/tensorflow/python/distribute/strategy_common_test.py index b1dfe78326c..ece8c573ed1 100644 --- a/tensorflow/python/distribute/strategy_common_test.py +++ b/tensorflow/python/distribute/strategy_common_test.py @@ -21,8 +21,10 @@ from __future__ import print_function from absl.testing import parameterized import numpy as np +from tensorflow.python.compat import v2_compat from tensorflow.python.data.ops import dataset_ops from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import distribution_strategy_context as ds_context from tensorflow.python.distribute import multi_worker_test_base from tensorflow.python.distribute import reduce_util from tensorflow.python.distribute import strategy_combinations @@ -33,31 +35,29 @@ from tensorflow.python.eager import def_function from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variables from tensorflow.python.platform import test -class StrategyReduceTest(test.TestCase, parameterized.TestCase): +@combinations.generate( + combinations.combine( + strategy=[ + strategy_combinations.multi_worker_mirrored_2x1_cpu, + strategy_combinations.multi_worker_mirrored_2x1_gpu, + ] + strategy_combinations.all_strategies, + mode=['eager'])) +class StrategyTest(test.TestCase, parameterized.TestCase): - @combinations.generate( - combinations.combine( - strategy=[ - strategy_combinations.multi_worker_mirrored_2x1_cpu, - strategy_combinations.multi_worker_mirrored_2x1_gpu, - ] + strategy_combinations.strategies_minus_tpu, - mode=['eager'])) def testSimpleReduce(self, strategy): + per_replica_value = strategy.experimental_distribute_values_from_function( + lambda _: array_ops.ones((), dtypes.float32)) def fn_eager(): - def replica_fn(): - return array_ops.ones((), dtypes.float32) - - per_replica_value = strategy.run(replica_fn) return strategy.reduce( reduce_util.ReduceOp.SUM, value=per_replica_value, axis=None) fn_graph = def_function.function(fn_eager) - # Run reduce under the strategy scope to explicitly enter # strategy default_device scope. with strategy.scope(): @@ -69,6 +69,22 @@ class StrategyReduceTest(test.TestCase, parameterized.TestCase): self.assertEqual(fn_eager().numpy(), 1.0 * strategy.num_replicas_in_sync) self.assertEqual(fn_graph().numpy(), 1.0 * strategy.num_replicas_in_sync) + def testCaptureReplicaId(self, strategy): + m = {} + + @def_function.function + def f(): + return ds_context.get_replica_context().replica_id_in_sync_group + + @def_function.function + def g(): + # Make g() a stateful function so it's traced twice. + if m.get('v', None) is None: + m['v'] = variables.Variable(0.) + return strategy.run(f) + + g() + @combinations.generate( combinations.combine( @@ -222,4 +238,5 @@ class StrategyClusterResolverTest(test.TestCase, parameterized.TestCase): if __name__ == '__main__': + v2_compat.enable_v2_behavior() combinations.main() diff --git a/tensorflow/python/distribute/tpu_strategy.py b/tensorflow/python/distribute/tpu_strategy.py index bad6e6aa39f..3446f78288d 100644 --- a/tensorflow/python/distribute/tpu_strategy.py +++ b/tensorflow/python/distribute/tpu_strategy.py @@ -1205,9 +1205,7 @@ class _TPUReplicaContext(distribute_lib.ReplicaContext): # TODO(sourabhbajaj): Call for each replica should be updating this. # TODO(b/118385803): Always properly initialize replica_id. - def __init__(self, strategy, replica_id_in_sync_group=None): - if replica_id_in_sync_group is None: - replica_id_in_sync_group = constant_op.constant(0, dtypes.int32) + def __init__(self, strategy, replica_id_in_sync_group=0): distribute_lib.ReplicaContext.__init__( self, strategy, replica_id_in_sync_group=replica_id_in_sync_group) @@ -1215,7 +1213,7 @@ class _TPUReplicaContext(distribute_lib.ReplicaContext): def devices(self): distribute_lib.require_replica_context(self) ds = self._strategy - replica_id = tensor_util.constant_value(self._replica_id_in_sync_group) + replica_id = tensor_util.constant_value(self.replica_id_in_sync_group) if replica_id is None: # Non-constant `Tensor` inside `tpu.replicate`. # TODO(cjfj): Return other devices when model parallelism is supported. From 2f82a73c4186642dc7fc4c2a737c3792df137439 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 19:18:46 -0700 Subject: [PATCH 1442/2522] Update ops-related pbtxt files. PiperOrigin-RevId: 323490805 Change-Id: Ia9c319c8e816a40bf6130045c404737db8bd58d4 --- .../ops_history_v2/OptimizeDatasetV2.pbtxt | 43 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 43 +++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 tensorflow/core/ops/compat/ops_history_v2/OptimizeDatasetV2.pbtxt diff --git a/tensorflow/core/ops/compat/ops_history_v2/OptimizeDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OptimizeDatasetV2.pbtxt new file mode 100644 index 00000000000..ee43df5bfd7 --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/OptimizeDatasetV2.pbtxt @@ -0,0 +1,43 @@ +op { + name: "OptimizeDatasetV2" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "optimizations_enabled" + type: DT_STRING + } + input_arg { + name: "optimizations_disabled" + type: DT_STRING + } + input_arg { + name: "optimizations_default" + type: DT_STRING + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + attr { + name: "optimization_configs" + type: "list(string)" + default_value { + list { + } + } + } +} diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index fed598bdef4..a9a94580d86 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -26317,6 +26317,49 @@ op { } } } +op { + name: "OptimizeDatasetV2" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "optimizations_enabled" + type: DT_STRING + } + input_arg { + name: "optimizations_disabled" + type: DT_STRING + } + input_arg { + name: "optimizations_default" + type: DT_STRING + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + attr { + name: "optimization_configs" + type: "list(string)" + default_value { + list { + } + } + } +} op { name: "OptionalFromValue" input_arg { From ec94fabb838b0934eeaf46f1fd22c36b2b3ec333 Mon Sep 17 00:00:00 2001 From: Meghna Natraj Date: Mon, 27 Jul 2020 19:20:31 -0700 Subject: [PATCH 1443/2522] Support integer input and output type for Quantize-Aware Trained models PiperOrigin-RevId: 323490985 Change-Id: Ife8e1b106bb4f44afe1cf7bc72bb21600439dfc1 --- RELEASE.md | 6 +- tensorflow/lite/python/BUILD | 16 ++ tensorflow/lite/python/lite.py | 29 ++- tensorflow/lite/python/lite_v2_test.py | 46 ++-- tensorflow/lite/python/util.py | 264 ++++++++++++++++++++++ tensorflow/lite/python/util_test.py | 163 +++++++++++++ tensorflow/lite/tools/flatbuffer_utils.py | 2 +- tensorflow/lite/tools/test_utils.py | 2 +- 8 files changed, 493 insertions(+), 35 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 56567abea2d..4ad67378c38 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -57,8 +57,8 @@ dispatch to logical ops. This brings them more in line with Python and NumPy benavior. * Added `tf.SparseTensor.with_values`. This returns a new SparseTensor with - the same sparsity pattern, but with new provided values. It is similar to - the `with_values` function of `RaggedTensor`. + the same sparsity pattern, but with new provided values. It is similar to + the `with_values` function of `RaggedTensor`. * `tf.data`: * Added new `tf.data.experimental.service.register_dataset` and `tf.data.experimental.service.from_dataset_id` APIs to enable one process @@ -88,6 +88,8 @@ * `tf.lite`: * `DynamicBuffer::AddJoinedString()` will now add a separator if the first string to be joined is empty. + * `TFLiteConverter`: + * Support optional flags `inference_input_type` and `inference_output_type` for full integer quantized models. This allows users to modify the model input and output type to integer types (`tf.int8`, `tf.uint8`) instead of defaulting to float type (`tf.float32`). * * `tf.random`: * diff --git a/tensorflow/lite/python/BUILD b/tensorflow/lite/python/BUILD index e26000c810a..3f4e187b4eb 100644 --- a/tensorflow/lite/python/BUILD +++ b/tensorflow/lite/python/BUILD @@ -162,6 +162,7 @@ py_test( shard_count = 4, srcs_version = "PY2AND3", tags = [ + "no_mac", # TODO(b/148247402): flatbuffers import broken on Mac OS. "no_windows", ], deps = [ @@ -212,8 +213,11 @@ py_library( deps = [ ":lite_constants", ":op_hint", + ":schema_py", "//tensorflow/python:tf_optimizer", "//tensorflow/python/eager:wrap_function", + "@absl_py//absl/logging", + "@flatbuffers//:runtime_py", "@six_archive//:six", ], ) @@ -224,12 +228,24 @@ py_test( python_version = "PY3", srcs_version = "PY2AND3", tags = [ + "no_mac", # TODO(b/148247402): flatbuffers import broken on Mac OS. "no_windows", ], deps = [ + ":lite_constants", ":util", + "//tensorflow:tensorflow_py", + "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:convert_to_constants", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:session", + "//third_party/py/numpy", + "@absl_py//absl/testing:parameterized", "@six_archive//:six", ], ) diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index bbdb41778da..c0a8c33331b 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -61,6 +61,7 @@ from tensorflow.lite.python.util import get_grappler_config as _get_grappler_con from tensorflow.lite.python.util import get_tensor_name as _get_tensor_name from tensorflow.lite.python.util import get_tensors_from_tensor_names as _get_tensors_from_tensor_names from tensorflow.lite.python.util import is_frozen_graph as _is_frozen_graph +from tensorflow.lite.python.util import modify_integer_quantized_model_io_type as _modify_integer_quantized_model_io_type from tensorflow.lite.python.util import run_graph_optimizations as _run_graph_optimizations from tensorflow.lite.python.util import set_tensor_shapes as _set_tensor_shapes from tensorflow.python import keras as _keras @@ -324,6 +325,23 @@ class QuantizationMode(object): else: return False, None + def flags_modify_model_io_type( + self, input_type=constants.FLOAT, output_type=constants.FLOAT): + """Flags for modifying the input and output type of a tflite model.""" + is_post_training_quantize = self.quantizer_flags(input_type, output_type)[0] + is_training_time_only_quantize = self.training_time_int8_allow_float() and \ + not is_post_training_quantize + + # TODO(b/153576658): Consolidate post/during training quantization workflows + # to modify model input/output type after MLIR conversion. + if is_training_time_only_quantize: + return { + "inference_input_type": input_type, + "inference_output_type": output_type, + } + else: + return None + # Below are helpers for the above functions. def _validate_int8_required(self): @@ -567,9 +585,8 @@ class TFLiteConverterBaseV2(TFLiteConverterBase): def _validate_inference_input_output_types(self, quant_mode): """Validate inference_input_type and inference_output_type flags.""" default_types = [constants.FLOAT] - # We only support integer types for post training integer quantization - # as we have statistical information to quantize the input and output. - if quant_mode.is_post_training_integer_quantize(): + # We support integer input/output for integer quantized models only. + if quant_mode.training_time_int8_allow_float(): if quant_mode.is_post_training_integer_quantize_16x8(): all_types = default_types + [constants.INT16] else: @@ -656,6 +673,12 @@ class TFLiteConverterBaseV2(TFLiteConverterBase): if calibrate_and_quantize: result = self._calibrate_quantize_model(result, **flags) + flags_modify_model_io_type = quant_mode.flags_modify_model_io_type( + self.inference_input_type, self.inference_output_type) + if flags_modify_model_io_type: + result = _modify_integer_quantized_model_io_type( + result, **flags_modify_model_io_type) + if self._experimental_sparsify_model: result = _mlir_sparsify(result) diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py index 65d9dbb5ef0..c1b566ff8ad 100644 --- a/tensorflow/lite/python/lite_v2_test.py +++ b/tensorflow/lite/python/lite_v2_test.py @@ -445,8 +445,12 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): return tf.keras.Sequential(QLinear(3, input_shape=(2,))) + @parameterized.named_parameters( + ('_DefaultFLOAT32InputOutput', lite.constants.FLOAT), + ('_INT8InputOutput', lite.constants.INT8), + ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8)) @test_util.run_v2_only - def testTrainingTimeQuantization(self): + def testTrainingTimeQuantization(self, inference_input_output_type): model = self._getTrainingTimeQuantizedModel() float_converter = lite.TFLiteConverterV2.from_keras_model(model) @@ -455,38 +459,24 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): quantized_converter = lite.TFLiteConverterV2.from_keras_model(model) quantized_converter.optimizations = [lite.Optimize.DEFAULT] + quantized_converter.inference_input_type = inference_input_output_type + quantized_converter.inference_output_type = inference_input_output_type quantized_tflite = quantized_converter.convert() self.assertTrue(quantized_tflite) - # Ensure that the quantized weights tflite model is smaller. - self.assertLess(len(quantized_tflite), len(float_tflite)) - interpreter = Interpreter(model_content=quantized_tflite) - self.assertEqual(np.float32, interpreter.get_input_details()[0]['dtype']) + interpreter.allocate_tensors() + input_details = interpreter.get_input_details() + self.assertLen(input_details, 1) + self.assertEqual(inference_input_output_type.as_numpy_dtype, + input_details[0]['dtype']) + output_details = interpreter.get_output_details() + self.assertLen(output_details, 1) + self.assertEqual(inference_input_output_type.as_numpy_dtype, + output_details[0]['dtype']) - @parameterized.named_parameters( - ('_INT8InputOutput', lite.constants.INT8), - ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8), - ('_INT16InputOutput', lite.constants.INT16)) - def testInvalidTrainingTimeQuantization(self, inference_input_output_type): - # We currently don't support integer inference_input_type and - # inference_output_type flags for training time quantization. - - model = self._getTrainingTimeQuantizedModel() - - converter = lite.TFLiteConverterV2.from_keras_model(model) - tflite_model = converter.convert() - self.assertTrue(tflite_model) - - quantized_converter = lite.TFLiteConverterV2.from_keras_model(model) - quantized_converter.optimizations = [lite.Optimize.DEFAULT] - with self.assertRaises(ValueError) as error: - quantized_converter.inference_input_type = inference_input_output_type - quantized_converter.inference_output_type = inference_input_output_type - quantized_converter.convert() - self.assertEqual( - 'The inference_input_type and inference_output_type ' - 'must be tf.float32.', str(error.exception)) + # Ensure that the quantized tflite model is smaller. + self.assertLess(len(quantized_tflite), len(float_tflite)) @test_util.run_v2_only def testNewQuantizer(self): diff --git a/tensorflow/lite/python/util.py b/tensorflow/lite/python/util.py index ff7caad0f88..720e53de509 100644 --- a/tensorflow/lite/python/util.py +++ b/tensorflow/lite/python/util.py @@ -19,15 +19,20 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import copy import datetime import sys +from absl import logging import six from six.moves import range +import flatbuffers from tensorflow.core.protobuf import config_pb2 as _config_pb2 from tensorflow.core.protobuf import graph_debug_info_pb2 from tensorflow.core.protobuf import meta_graph_pb2 as _meta_graph_pb2 +from tensorflow.lite.python import lite_constants as _lite_constants +from tensorflow.lite.python import schema_py_generated as schema_fb from tensorflow.lite.python.op_hint import convert_op_hints_to_stubs from tensorflow.lite.python.op_hint import find_all_hinted_output_nodes from tensorflow.lite.toco import types_pb2 as _types_pb2 @@ -55,6 +60,25 @@ _MAP_TF_TO_TFLITE_TYPES = { dtypes.bool: _types_pb2.BOOL, } +_MAP_TFLITE_ENUM_TO_TF_TYPES = { + 0: dtypes.float32, + 1: dtypes.float16, + 2: dtypes.int32, + 3: dtypes.uint8, + 4: dtypes.int64, + 5: dtypes.string, + 6: dtypes.bool, + 7: dtypes.int16, + 8: dtypes.complex64, + 9: dtypes.int8, + 10: dtypes.float64, +} + +_TFLITE_FILE_IDENTIFIER = b"TFL3" + +_TFLITE_MODEL_INPUT_OUTPUT_TYPES = (_lite_constants.FLOAT, _lite_constants.INT8, + _lite_constants.QUANTIZED_UINT8) + def convert_dtype_to_tflite_type(tf_dtype): """Converts tf.dtype to TFLite proto type. @@ -74,6 +98,31 @@ def convert_dtype_to_tflite_type(tf_dtype): return result +def _convert_tflite_enum_type_to_tf_type(tflite_enum_type): + """Converts tflite enum type (eg: 0) to tf type (eg: tf.float32). + + Args: + tflite_enum_type: tflite enum type (eg: 0, that corresponds to float32) + + Raises: + ValueError: If an invalid tflite enum type is provided. + + Returns: + tf type (eg: tf.float32) + """ + tf_type = _MAP_TFLITE_ENUM_TO_TF_TYPES.get(tflite_enum_type) + if tf_type is None: + raise ValueError( + "Unsupported enum {}. The valid map of enum to tf.dtypes is : {}" + .format(tflite_enum_type, _MAP_TFLITE_ENUM_TO_TF_TYPES)) + return tf_type + + +def _get_dtype_name(tf_type): + """Converts tf.dtype (eg: tf.float32) to str (eg: "tf.float32").""" + return "tf." + tf_type.name + + def get_tensor_name(tensor): """Returns name of the input tensor. @@ -514,3 +563,218 @@ extern const int {array_name}_len; license_text=license_text) return source_text, header_text + + +def _convert_model_from_bytearray_to_object(model_bytearray): + """Converts a tflite model from a bytearray into a parsable object.""" + model_object = schema_fb.Model.GetRootAsModel(model_bytearray, 0) + model_object = schema_fb.ModelT.InitFromObj(model_object) + model_object = copy.deepcopy(model_object) + model_object.subgraphs[0].inputs[0] = model_object.subgraphs[0].inputs[0] + return model_object + + +def _convert_model_from_object_to_bytearray(model_object): + """Converts a tflite model from a parsable object into a bytearray.""" + # Initial size of the buffer, which will grow automatically if needed + builder = flatbuffers.Builder(1024) + model_offset = model_object.Pack(builder) + builder.Finish(model_offset, file_identifier=_TFLITE_FILE_IDENTIFIER) + return bytes(builder.Output()) + + +def _remove_tensors_from_model(model, remove_tensors_idxs): + """Remove tensors from model.""" + if not remove_tensors_idxs: + return + if len(model.subgraphs) > 1: + raise ValueError("Model must only have one subgraph. Instead, it has " + "{} subgraphs.".format(len(model.subgraphs))) + subgraph = model.subgraphs[0] + tensors = subgraph.tensors + operators = subgraph.operators + + logging.debug("Removing tensors at indices : %s", remove_tensors_idxs) + # An optimized check to validate if "remove_tensors_idxs" (eg: [4,5,6]) is an + # exact subset, with ordering, of "tensors" indices (eg: [0,1,2,3,4,5,6]). + if min(remove_tensors_idxs) == len(tensors) - len(remove_tensors_idxs): + logging.debug("Removing tensors only at the end of the tensor list") + del tensors[min(remove_tensors_idxs):] + else: + logging.debug("Removing tensors requires updating the model") + # Map the old tensor indices to new tensor indices + d_old_to_new_tensors = {} + left_shift_by = 0 + for idx in range(len(tensors)): + if idx in remove_tensors_idxs: + left_shift_by += 1 + else: + d_old_to_new_tensors[idx] = idx - left_shift_by + logging.debug("Old to new tensors map: %s", d_old_to_new_tensors.__str__()) + # Update tensor indices referenced throughout the model + def update_tensors(tensor_idxs): + for i, ti in enumerate(tensor_idxs): + tensor_idxs[i] = d_old_to_new_tensors.get(ti, -1) + update_tensors(subgraph.inputs) + update_tensors(subgraph.outputs) + for op in operators: + update_tensors(op.inputs) + update_tensors(op.outputs) + # Delete the tensors + for idx in sorted(remove_tensors_idxs, reverse=True): + tensors.pop(idx) + logging.debug("Removed tensors marked for deletion") + + +def _validate_and_find_int8_quantized_inputs_outputs(model): + """Validate that model input is quantized and output is dequantized.""" + if len(model.subgraphs) > 1: + raise ValueError("Model must only have one subgraph. Instead, it has " + "{} subgraphs.".format(len(model.subgraphs))) + subgraph = model.subgraphs[0] + tensors = subgraph.tensors + operators = subgraph.operators + + # Ensure model has atleast one quantize and dequantize operator + quant_opcode_idx, dequant_opcode_idx = None, None + for idx, opcode in enumerate(model.operatorCodes): + if opcode.builtinCode == schema_fb.BuiltinOperator.QUANTIZE: + quant_opcode_idx = idx + elif opcode.builtinCode == schema_fb.BuiltinOperator.DEQUANTIZE: + dequant_opcode_idx = idx + if quant_opcode_idx is not None and dequant_opcode_idx is not None: + break + if quant_opcode_idx is None and dequant_opcode_idx is None: + raise ValueError("Model is not integer quantized as it does not " + "contain quantize/dequantize operators.") + + # Ensure model inputs and outputs are integer quantized + input_quant_ops, output_dequant_ops = [], [] + for op in operators: + # Find input quantize operator + if op.opcodeIndex == quant_opcode_idx and op.inputs[0] in subgraph.inputs: + pos, float_tensor, int_tensor = \ + "input", tensors[op.inputs[0]], tensors[op.outputs[0]] + input_quant_ops.append(op) + # Find output dequantize operator + elif op.opcodeIndex == dequant_opcode_idx and \ + op.outputs[0] in subgraph.outputs: + pos, float_tensor, int_tensor = \ + "output", tensors[op.outputs[0]], tensors[op.inputs[0]] + output_dequant_ops.append(op) + # Otherwise, ignore + else: + continue + # If found, validate the input/output tensor type + if float_tensor.type != schema_fb.TensorType.FLOAT32: + raise ValueError( + "Model {} type must be tf.float32. Expected type for tensor with " + "name '{}' is tf.float32, instead type is tf.{}".format( + pos, float_tensor.name, + _convert_tflite_enum_type_to_tf_type(float_tensor.type).name)) + if int_tensor.type != schema_fb.TensorType.INT8: + raise ValueError( + "Model is not integer quantized. Expected type for tensor with " + "name '{}' is tf.int8, instead type is tf.{}".format( + int_tensor.name, + _convert_tflite_enum_type_to_tf_type(int_tensor.type).name)) + + return input_quant_ops, output_dequant_ops + + +def modify_integer_quantized_model_io_type( + model, inference_input_type=_lite_constants.FLOAT, + inference_output_type=_lite_constants.FLOAT): + """Modify the float input/output type of an integer quantized model. + + Args: + model: An int8 quantized tflite model with float input and output. + inference_input_type: tf.DType representing final input type. + (default tf.float32) + inference_output_type: tf.DType representing final output type. + (default tf.float32) + + Returns: + An int8 quantized tflite model with modified input and/or output type. + + Raises: + ValueError: If the model is not int8 quantized or the inference_input_type + and/or inference_input_type is unsupported. + RuntimeError: If the modification was unsuccessful. + + """ + # Return if input and output types default to float + if inference_input_type == _lite_constants.FLOAT and \ + inference_output_type == _lite_constants.FLOAT: + return model + + # Validate input and output types + if inference_input_type not in _TFLITE_MODEL_INPUT_OUTPUT_TYPES: + raise ValueError("The `inference_input_type` should be in {}".format( + tuple(_get_dtype_name(t) for t in _TFLITE_MODEL_INPUT_OUTPUT_TYPES))) + if inference_output_type not in _TFLITE_MODEL_INPUT_OUTPUT_TYPES: + raise ValueError("The `inference_output_type` should be in {}".format( + tuple(_get_dtype_name(t) for t in _TFLITE_MODEL_INPUT_OUTPUT_TYPES))) + + logging.debug(("Attempting to modify the model input from tf.float32 to %s " + "and output from tf.float32 to %s"), + _get_dtype_name(inference_input_type), + _get_dtype_name(inference_output_type)) + # Convert the model to an object + model = _convert_model_from_bytearray_to_object(model) + + # Validate the integer quantized model + input_quant_ops, output_dequant_ops = \ + _validate_and_find_int8_quantized_inputs_outputs(model) + + # Initialize references and variables + if len(model.subgraphs) > 1: + raise ValueError("Model must only have one subgraph. Instead, it has " + "{} subgraphs.".format(len(model.subgraphs))) + subgraph = model.subgraphs[0] + tensors = subgraph.tensors + operators = subgraph.operators + remove_tensors_idxs = set() + + # Modify model input type + if inference_input_type == _lite_constants.QUANTIZED_UINT8: + # Change quant op (float to int8) to quant op (uint8 to int8) + for op in input_quant_ops: + int8_quantization = tensors[op.outputs[0]].quantization + uint8_quantization = schema_fb.QuantizationParametersT() + uint8_quantization.scale = [int8_quantization.scale[0]] + uint8_quantization.zeroPoint = [int8_quantization.zeroPoint[0] + 128] + tensors[op.inputs[0]].quantization = uint8_quantization + tensors[op.inputs[0]].type = schema_fb.TensorType.UINT8 + elif inference_input_type == _lite_constants.INT8: + # Remove the inputs and the quant operator + for op in input_quant_ops: + subgraph.inputs[subgraph.inputs == op.inputs[0]] = op.outputs[0] + remove_tensors_idxs.add(op.inputs[0]) + operators.remove(op) + + # Modify model output type + if inference_output_type == _lite_constants.QUANTIZED_UINT8: + # Change dequant op (int8 to float) to quant op (int8 to uint8) + for op in output_dequant_ops: + op.opcodeIndex = input_quant_ops[0].opcodeIndex + int8_quantization = tensors[op.inputs[0]].quantization + uint8_quantization = schema_fb.QuantizationParametersT() + uint8_quantization.scale = [int8_quantization.scale[0]] + uint8_quantization.zeroPoint = [int8_quantization.zeroPoint[0] + 128] + tensors[op.outputs[0]].quantization = uint8_quantization + tensors[op.outputs[0]].type = schema_fb.TensorType.UINT8 + elif inference_output_type == _lite_constants.INT8: + # Remove the outputs and the dequant operator + for op in output_dequant_ops: + subgraph.outputs[subgraph.outputs == op.outputs[0]] = op.inputs[0] + remove_tensors_idxs.add(op.outputs[0]) + operators.remove(op) + + # Remove tensors marked for deletion. + _remove_tensors_from_model(model, remove_tensors_idxs) + + # Convert the model to a bytearray + model = _convert_model_from_object_to_bytearray(model) + + return model diff --git a/tensorflow/lite/python/util_test.py b/tensorflow/lite/python/util_test.py index f3c287dd7fc..0e9cbc1e58a 100644 --- a/tensorflow/lite/python/util_test.py +++ b/tensorflow/lite/python/util_test.py @@ -19,7 +19,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from absl.testing import parameterized +import numpy as np from six.moves import range +import tensorflow as tf from tensorflow.lite.python import lite_constants from tensorflow.lite.python import util @@ -61,6 +64,31 @@ class UtilTest(test_util.TensorFlowTestCase): self.assertEqual( util.convert_dtype_to_tflite_type(dtypes.bool), _types_pb2.BOOL) + def testConvertEnumToDtype(self): + self.assertEqual( + util._convert_tflite_enum_type_to_tf_type(0), dtypes.float32) + self.assertEqual( + util._convert_tflite_enum_type_to_tf_type(1), dtypes.float16) + self.assertEqual(util._convert_tflite_enum_type_to_tf_type(2), dtypes.int32) + self.assertEqual(util._convert_tflite_enum_type_to_tf_type(3), dtypes.uint8) + self.assertEqual(util._convert_tflite_enum_type_to_tf_type(4), dtypes.int64) + self.assertEqual( + util._convert_tflite_enum_type_to_tf_type(5), dtypes.string) + self.assertEqual(util._convert_tflite_enum_type_to_tf_type(6), dtypes.bool) + self.assertEqual(util._convert_tflite_enum_type_to_tf_type(7), dtypes.int16) + self.assertEqual( + util._convert_tflite_enum_type_to_tf_type(8), dtypes.complex64) + self.assertEqual(util._convert_tflite_enum_type_to_tf_type(9), dtypes.int8) + self.assertEqual( + util._convert_tflite_enum_type_to_tf_type(10), dtypes.float64) + with self.assertRaises(ValueError) as error: + util._convert_tflite_enum_type_to_tf_type(11) + self.assertEqual( + "Unsupported enum 11. The valid map of enum to tf.dtypes is : " + "{0: tf.float32, 1: tf.float16, 2: tf.int32, 3: tf.uint8, 4: tf.int64, " + "5: tf.string, 6: tf.bool, 7: tf.int16, 8: tf.complex64, 9: tf.int8, " + "10: tf.float64}", str(error.exception)) + def testTensorName(self): with ops.Graph().as_default(): in_tensor = array_ops.placeholder(shape=[4], dtype=dtypes.float32) @@ -195,5 +223,140 @@ class TensorFunctionsTest(test_util.TensorFlowTestCase): self.assertEqual([None, 3, 5], tensor.shape.as_list()) +def _generate_integer_tflite_model(): + """Define an integer post-training quantized tflite model.""" + # Load MNIST dataset + n = 10 # Number of samples + (train_images, train_labels), (test_images, test_labels) = \ + tf.keras.datasets.mnist.load_data() + train_images, train_labels, test_images, test_labels = \ + train_images[:n], train_labels[:n], test_images[:n], test_labels[:n] + + # Normalize the input image so that each pixel value is between 0 to 1. + train_images = train_images / 255.0 + test_images = test_images / 255.0 + + # Define TF model + model = tf.keras.Sequential([ + tf.keras.layers.InputLayer(input_shape=(28, 28)), + tf.keras.layers.Reshape(target_shape=(28, 28, 1)), + tf.keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu"), + tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(10) + ]) + + # Train + model.compile( + optimizer="adam", + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=["accuracy"]) + + model.fit( + train_images, + train_labels, + epochs=1, + validation_split=0.1, + ) + + # Convert TF Model to an Integer Quantized TFLite Model + converter = tf.lite.TFLiteConverter.from_keras_model(model) + converter.optimizations = {tf.lite.Optimize.DEFAULT} + def representative_dataset_gen(): + for _ in range(2): + yield [ + np.random.uniform(low=0, high=1, size=(1, 28, 28)).astype( + np.float32) + ] + converter.representative_dataset = representative_dataset_gen + converter.target_spec.supported_ops = {tf.lite.OpsSet.TFLITE_BUILTINS_INT8} + tflite_model = converter.convert() + + return tflite_model + + +def _test_param_modify_integer_model_io_type(): + """Function to generate parameterized inputs for testing.""" + params = [] + str_template = "_{}{}{}" + map_model_type = { + "PostTraining": True, + # "DuringTraining": False, + } + map_types = { + "": lite_constants.FLOAT, + "INT8": lite_constants.INT8, + "UINT8": lite_constants.QUANTIZED_UINT8 + } + for k1, v1 in map_model_type.items(): + for k2, v2 in map_types.items(): + istr = "_Input{}".format(k2) if k2 else "" + for k3, v3 in map_types.items(): + ostr = "_Output{}".format(k3) if k3 else "" if istr else "_NoUpdate" + params.append((str_template.format(k1, istr, ostr), v1, v2, v3)) + return params + + +# TODO(b/161174063): Merge tests for integer input/output type +class UtilModifyIntegerQuantizedModelIOTypeTest( + test_util.TensorFlowTestCase, parameterized.TestCase): + + @classmethod + def setUpClass(cls): + super(UtilModifyIntegerQuantizedModelIOTypeTest, cls).setUpClass() + cls.post_train_integer_model = _generate_integer_tflite_model() + + @parameterized.named_parameters(_test_param_modify_integer_model_io_type()) + def test(self, is_post_train, in_tftype, out_tftype): + """Modify the float input/output type of an integer quantized model.""" + + def _run_tflite_inference(model, in_tftype, out_tftype): + """Run inference on a model with a specific input/output type.""" + # Load TFLite model and allocate tensors. + interpreter = tf.lite.Interpreter(model_content=model) + interpreter.allocate_tensors() + input_details = interpreter.get_input_details()[0] + output_details = interpreter.get_output_details()[0] + + # Validate TFLite model input and output types + self.assertEqual(input_details["dtype"], in_tftype.as_numpy_dtype) + self.assertEqual(output_details["dtype"], out_tftype.as_numpy_dtype) + + # Define Input + np.random.seed(0) + input_data = np.random.uniform(low=0, high=1, size=(1, 28, 28)) + input_data = input_data.astype(np.float32) + if input_details["dtype"] != np.float32: + # quantize float to int + scale, zero_point = input_details["quantization"] + input_data = input_data / scale + zero_point + input_data = input_data.astype(input_details["dtype"]) + + # Run Inference + interpreter.set_tensor(input_details["index"], input_data) + interpreter.invoke() + + # Get output + output_data = interpreter.get_tensor(output_details["index"])[0] + if output_details["dtype"] != np.float32: + # dequantize int to float + scale, zero_point = output_details["quantization"] + output_data = output_data.astype(np.float32) + output_data = (output_data - zero_point) * scale + + return output_data + + model = self.__class__.post_train_integer_model if is_post_train else None + # Run model inference with float input output type + output_data = _run_tflite_inference(model, tf.float32, tf.float32) + # Run model inference with modified integer input output type + model_io = util.modify_integer_quantized_model_io_type( + model, in_tftype, out_tftype) + output_io_data = _run_tflite_inference(model_io, in_tftype, out_tftype) + + # Validate that both the outputs are the same + self.assertTrue(np.allclose(output_data, output_io_data, atol=1.0)) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/lite/tools/flatbuffer_utils.py b/tensorflow/lite/tools/flatbuffer_utils.py index ce29a8e0e89..3171759201c 100644 --- a/tensorflow/lite/tools/flatbuffer_utils.py +++ b/tensorflow/lite/tools/flatbuffer_utils.py @@ -29,7 +29,7 @@ import copy import os import random -from flatbuffers.python import flatbuffers +import flatbuffers from tensorflow.lite.python import schema_py_generated as schema_fb _TFLITE_FILE_IDENTIFIER = b'TFL3' diff --git a/tensorflow/lite/tools/test_utils.py b/tensorflow/lite/tools/test_utils.py index 3950e3de35e..dde01a9872a 100644 --- a/tensorflow/lite/tools/test_utils.py +++ b/tensorflow/lite/tools/test_utils.py @@ -21,7 +21,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from flatbuffers.python import flatbuffers +import flatbuffers from tensorflow.lite.python import schema_py_generated as schema_fb TFLITE_SCHEMA_VERSION = 3 From 35d7f0af7c384a113610083dbba680ebe38c850c Mon Sep 17 00:00:00 2001 From: Anna R Date: Mon, 27 Jul 2020 19:31:43 -0700 Subject: [PATCH 1444/2522] Enable the KerasTensors refactoring of the Keras functional API. Replaces symbolic tensors during Functional API construction with lightweight `tensor-like` objects that have a very similar API, but do not require a global graph workspace. This should improve the reliability and performance of the Keras functional API and of automatic TF op -> Lambda layer conversions during functional API construction. E.g. ~8-10% fas... PiperOrigin-RevId: 323492034 Change-Id: Ie23afa55463b16dbc26e3b574e5f1e3814f2f612 --- RELEASE.md | 21 +------------------ tensorflow/python/keras/backend.py | 12 +++++------ .../python/keras/engine/keras_tensor.py | 2 +- 3 files changed, 8 insertions(+), 27 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 4ad67378c38..b248bae9a68 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -14,18 +14,6 @@ * Removed `tf.distribute.Strategy.experimental_run_v2` method, which was deprecated in TF 2.2. * `tensorflow.python`, `tensorflow.core` and `tensorflow.compiler` modules are now hidden. These modules are not part of TensorFlow public API. -* A major refactoring of the internals of the Keras Functional API may affect code that is relying on certain internal details: - * Code that uses `isinstance(x, tf.Tensor)` instead of `tf.is_tensor` when checking Keras symbolic inputs/outputs should switch to using `tf.is_tensor`. - * Code that is overly dependent on the exact names attached to symbolic tensors (e.g. assumes there will be ":0" at the end of the inputs, treats names as unique identifiers instead of using `tensor.ref()`, etc.) - * Code that uses `get_concrete_function` to trace Keras symbolic inputs directly should switch to building matching `tf.TensorSpec`s directly and tracing the `TensorSpec` objects. - * Code that relies on the exact number and names of the op layers that TensorFlow operations were converted into. These may have changed. - * Code that uses `tf.map_fn`/`tf.cond`/`tf.while_loop`/control flow as op layers and happens to work before TF 2.4. These will explicitly be unsupported now. Converting these ops to Functional API op layers was unreliable before TF 2.4, and prone to erroring incomprehensibly or being silently buggy. - * Code that directly asserts on a Keras symbolic value in cases where ops like `tf.rank` used to return a static or symbolic value depending on if the input had a fully static shape or not. Now these ops always return symbolic values. - * Code already susceptible to leaking tensors outside of graphs becomes slightly more likely to do so now. - * Code that requires very tricky shape manipulation via converted op layers in order to work, where the Keras symbolic shape inference proves insufficient. - * Code that tries manually walking a `tf.keras.Model` layer by layer and assumes layers only ever have one positional argument. This assumption doesn't hold true before TF 2.4 either, but is more likely to cause issues know. - * Code that manually enters `keras.backend.get_graph()` before building a functional model. This is no longer needed. - ## Known Caveats @@ -36,7 +24,6 @@ * * * A new module named `tf.experimental.numpy` is added, which is a NumPy-compatible API for writing TF programs. This module provides class `ndarray`, which mimics the `ndarray` class in NumPy, and wraps an immutable `tf.Tensor` under the hood. A subset of NumPy functions (e.g. `numpy.add`) are provided. Their inter-operation with TF facilities is seamless in most cases. See tensorflow/python/ops/numpy_ops/README.md for details of what are supported and what are the differences with NumPy. -* A major refactoring of the internals of the Keras Functional API has been completed, that should improve the reliability, stability, and performance of constructing Functional models. ## Bug Fixes and Other Changes @@ -73,13 +60,7 @@ option. * `tf.distribute`: * -* `tf.keras`: - * Improvements from the functional API refactoring: - * Functional model construction does not need to maintain a global workspace graph, removing memory leaks especially when building many models or very large models. - * Functional model construction should be ~8-10% faster on average. - * Functional models can now contain non-symbolic values in their call inputs inside of the first positional argument. - * Several classes of TF ops that were not reliably converted to Keras layers during functional API construction should now work, e.g. `tf.image.ssim_multiscale` - * Error messages when Functional API construction goes wrong (and when ops cannot be converted to Keras layers automatically) should be clearer and easier to understand. +* `tf.keras`: * * `tf.function` / AutoGraph: * Added `experimental_follow_type_hints` argument for `tf.function`. When diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py index 3861d49254d..07da09049c5 100644 --- a/tensorflow/python/keras/backend.py +++ b/tensorflow/python/keras/backend.py @@ -1180,7 +1180,7 @@ def placeholder(shape=None, >>> input_ph = tf.keras.backend.placeholder(shape=(2, 4, 5)) >>> input_ph - + """ if sparse and ragged: @@ -1281,7 +1281,7 @@ def shape(x): >>> input = tf.keras.backend.placeholder(shape=(2, 4, 5)) >>> tf.keras.backend.shape(input) - + """ return array_ops.shape(x) @@ -1796,13 +1796,13 @@ def dot(x, y): >>> y = tf.keras.backend.placeholder(shape=(3, 4)) >>> xy = tf.keras.backend.dot(x, y) >>> xy - + >>> x = tf.keras.backend.placeholder(shape=(32, 28, 3)) >>> y = tf.keras.backend.placeholder(shape=(3, 4)) >>> xy = tf.keras.backend.dot(x, y) >>> xy - + >>> x = tf.keras.backend.random_uniform_variable(shape=(2, 3), low=0, high=1) >>> y = tf.keras.backend.ones((4, 3, 5)) @@ -2052,10 +2052,10 @@ def transpose(x): [3., 6.]], dtype=float32) >>> input = tf.keras.backend.placeholder((2, 3)) >>> input - + >>> input_transposed = tf.keras.backend.transpose(input) >>> input_transposed - + """ return array_ops.transpose(x) diff --git a/tensorflow/python/keras/engine/keras_tensor.py b/tensorflow/python/keras/engine/keras_tensor.py index 840aaa72441..4266c6dbee6 100644 --- a/tensorflow/python/keras/engine/keras_tensor.py +++ b/tensorflow/python/keras/engine/keras_tensor.py @@ -30,7 +30,7 @@ from tensorflow.python.util import object_identity # pylint: disable=g-classes-have-attributes -_KERAS_TENSORS_ENABLED = True +_KERAS_TENSORS_ENABLED = False def enable_keras_tensors(): From fb4efe194dd9077481751cc60faa6992c793db26 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 19:41:05 -0700 Subject: [PATCH 1445/2522] Relax assertion in build_info_test. There may be other dict keys on other platforms. PiperOrigin-RevId: 323492935 Change-Id: I59c57fbe7015ae56f0d6b17fed6605b4c30aa9fb --- tensorflow/python/platform/build_info_test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/platform/build_info_test.py b/tensorflow/python/platform/build_info_test.py index 8eac0de2cbc..5d4b3cfa251 100644 --- a/tensorflow/python/platform/build_info_test.py +++ b/tensorflow/python/platform/build_info_test.py @@ -31,8 +31,10 @@ class BuildInfoTest(test.TestCase): test.is_built_with_cuda()) def testDeterministicOrder(self): - self.assertEqual(['is_cuda_build', 'is_rocm_build'], - list(build_info.build_info.keys())) + # The dict may contain other keys depending on the platform, but the ones + # it always contains should be in order. + self.assertContainsSubsequence(build_info.build_info.keys(), + ('is_cuda_build', 'is_rocm_build')) if __name__ == '__main__': From 571594aedb2237d76bb0405d4d0895ee78f1d659 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 19:42:45 -0700 Subject: [PATCH 1446/2522] [tf.data] Implementation of tf.data experiment roll out framework. PiperOrigin-RevId: 323493103 Change-Id: I763bacdc55636287304f8f4f0f3a3acbe89e4198 --- .../base_api/api_def_OptimizeDatasetV2.pbtxt | 32 ---- tensorflow/core/kernels/data/BUILD | 1 - tensorflow/core/kernels/data/dataset_utils.cc | 130 ---------------- tensorflow/core/kernels/data/dataset_utils.h | 12 -- .../core/kernels/data/dataset_utils_test.cc | 137 ---------------- .../core/kernels/data/optimize_dataset_op.cc | 58 +------ .../core/kernels/data/optimize_dataset_op.h | 9 -- tensorflow/core/ops/dataset_ops.cc | 11 -- tensorflow/core/platform/default/port.cc | 2 - tensorflow/core/platform/host_info.h | 6 +- .../kernel_tests/optimize_dataset_test.py | 147 +++--------------- .../kernel_tests/prefetch_with_slack_test.py | 4 +- .../optimize_dataset_serialization_test.py | 6 +- .../data/experimental/ops/optimization.py | 20 +-- .../experimental/ops/optimization_options.py | 53 ++----- tensorflow/python/data/ops/dataset_ops.py | 105 +++---------- tensorflow/python/data/util/options.py | 8 - .../api/golden/v1/tensorflow.raw_ops.pbtxt | 4 - .../api/golden/v2/tensorflow.raw_ops.pbtxt | 4 - 19 files changed, 73 insertions(+), 676 deletions(-) delete mode 100644 tensorflow/core/api_def/base_api/api_def_OptimizeDatasetV2.pbtxt diff --git a/tensorflow/core/api_def/base_api/api_def_OptimizeDatasetV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_OptimizeDatasetV2.pbtxt deleted file mode 100644 index a8e66499471..00000000000 --- a/tensorflow/core/api_def/base_api/api_def_OptimizeDatasetV2.pbtxt +++ /dev/null @@ -1,32 +0,0 @@ -op { - graph_op_name: "OptimizeDatasetV2" - visibility: HIDDEN - in_arg { - name: "input_dataset" - description: < SelectOptimizations( - const string& job_name, const string& opt_ins_raw, - const string& opt_outs_raw, - const absl::flat_hash_map& live_experiments, - const std::vector& optimizations_enabled, - const std::vector& optimizations_disabled, - const std::vector& optimizations_default, - std::function hash_func) { - // Creates a set of optimizations. - absl::flat_hash_set optimizations_set; - - // Creates the opt in and opt out settings. - std::vector opt_ins, opt_outs; - if (opt_ins_raw == "all") { - for (auto& pair : live_experiments) { - opt_ins.push_back(pair.first); - } - } else { - opt_ins = str_util::Split(opt_ins_raw, ',', str_util::SkipEmpty()); - } - if (opt_outs_raw == "all") { - for (auto& pair : live_experiments) { - opt_outs.push_back(pair.first); - } - } else { - opt_outs = str_util::Split(opt_outs_raw, ',', str_util::SkipEmpty()); - } - - // Checks if the opt in and opt out experiments are live experiments. - for (auto& optimization : opt_ins) { - if (live_experiments.find(optimization) == live_experiments.end()) { - LOG(WARNING) << "The experiment \"" << optimization - << "\" is opted in but it is not a live experiment."; - } - } - for (auto& optimization : opt_outs) { - if (live_experiments.find(optimization) == live_experiments.end()) { - LOG(WARNING) << "The experiment \"" << optimization - << "\" is opted out but it is not a live experiment."; - } - } - - // Checks if the opt in settings conflict with opt out settings. - for (auto& optimization : opt_ins) { - if (std::find(opt_outs.begin(), opt_outs.end(), optimization) != - opt_outs.end()) { - LOG(WARNING) << "The experiment \"" << optimization - << "\" is set in both \"TF_DATA_EXPERIMENT_OPT_IN\" and " - "\"TF_DATA_EXPERIMENT_OPT_OUT\". Unless the experiment " - "corresponds to an explicitly enabled optimization, it " - "is not applied."; - } - } - - // Checks if the enable/disable settings from tf.data.Options conflict with - // user opt in/out settings. In which case we assume tf.data.Options settings - // have higher priority to overwrite. - for (auto& optimization : optimizations_enabled) { - if (std::find(opt_outs.begin(), opt_outs.end(), optimization) != - opt_outs.end()) { - LOG(WARNING) << "The optimization \"" << optimization - << "\" is opt out, but is still applied since" - " it is enabled through tf.data.Options."; - } - } - for (auto& optimization : optimizations_disabled) { - if (std::find(opt_ins.begin(), opt_ins.end(), optimization) != - opt_ins.end()) { - LOG(WARNING) << "The optimization \"" << optimization - << "\" is opt in, but is not applied since" - " it is disabled through tf.data.Options."; - } - } - - // Add the enabled optimizations. - optimizations_set.insert(optimizations_enabled.begin(), - optimizations_enabled.end()); - - // Add the default optimizations that are not explicitly opted out. - for (auto& optimization : optimizations_default) { - if (std::find(opt_outs.begin(), opt_outs.end(), optimization) == - opt_outs.end()) { - optimizations_set.insert(optimization); - } - } - - // Add the live experiments stochastically if they are neither opted in nor - // opted out. - for (auto& pair : live_experiments) { - string experiment = pair.first; - // Skip experiments that are explicitly opted out. - if (std::find(opt_outs.begin(), opt_outs.end(), experiment) != - opt_outs.end()) { - continue; - } - // Skip experiments whose transformations are explicitly disabled. - if (std::find(optimizations_disabled.begin(), optimizations_disabled.end(), - experiment) != optimizations_disabled.end()) { - continue; - } - // Apply experiments that are explicitly opted in. - if (std::find(opt_ins.begin(), opt_ins.end(), experiment) != - opt_ins.end()) { - optimizations_set.insert(experiment); - continue; - } - // Otherwise, apply experiment stochastically based on job name and - // experiment roll out percentage. - if (hash_func(strings::StrCat(job_name, experiment)) % 100 < pair.second) { - optimizations_set.insert(experiment); - } - } - - // Log the experiments that will be applied. - if (VLOG_IS_ON(1)) { - for (auto& pair : live_experiments) { - string experiment = pair.first; - if (std::find(optimizations_set.begin(), optimizations_set.end(), - experiment) != optimizations_set.end()) { - VLOG(1) << "The experiment \"" << experiment << "\" is applied."; - } - } - } - - std::vector optimizations; - optimizations.insert(optimizations.end(), optimizations_set.begin(), - optimizations_set.end()); - return optimizations; -} - } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h index 0fe3618f34b..5c6b14a8782 100644 --- a/tensorflow/core/kernels/data/dataset_utils.h +++ b/tensorflow/core/kernels/data/dataset_utils.h @@ -304,18 +304,6 @@ class DummyResourceOp : public OpKernel { // MatchesAnyVersionRE("PaddedBatchDataset", "BatchDataset") == false bool MatchesAnyVersionRE(StringPiece op_prefix, StringPiece op_to_match); -// Based on `optimizations_enabled`, `optimizations_disabled`, and -// `optimizations_disabled`, returns the list of optimizations that will be -// applied. -std::vector SelectOptimizations( - const string& job_name, const string& opt_ins_raw, - const string& opt_outs_raw, - const absl::flat_hash_map& live_experiments, - const std::vector& optimizations_enabled, - const std::vector& optimizations_disabled, - const std::vector& optimizations_default, - std::function hash_func); - } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/kernels/data/dataset_utils_test.cc b/tensorflow/core/kernels/data/dataset_utils_test.cc index a1f624faeb6..1a6e673c3f3 100644 --- a/tensorflow/core/kernels/data/dataset_utils_test.cc +++ b/tensorflow/core/kernels/data/dataset_utils_test.cc @@ -30,8 +30,6 @@ namespace tensorflow { namespace data { namespace { -using ::testing::UnorderedElementsAre; - class DatasetHashUtilsTest : public ::testing::Test { protected: uint64 GetHash(const FunctionDefLibrary& library, const FunctionDef& fn) { @@ -1133,141 +1131,6 @@ TEST_F(DatasetHashUtilsTest, HashStringTensor) { EXPECT_NE(GetHash(v1), GetHash(v3)); } -class SelectOptimizationsHashTest : public ::testing::TestWithParam {}; - -TEST_P(SelectOptimizationsHashTest, DatasetUtils) { - const uint64 hash_result = GetParam(); - string job_name = "job"; - const string opt_ins_raw = ""; - const string opt_outs_raw = ""; - auto hash_func = [hash_result](const string& str) { return hash_result; }; - absl::flat_hash_map live_experiments = { - {"exp1", 0}, {"exp2", 20}, {"exp3", 33}, {"exp4", 45}, - {"exp5", 67}, {"exp6", 88}, {"exp7", 100}}; - std::vector optimizations_enabled, optimizations_disabled, - optimizations_default; - std::vector optimizations = - SelectOptimizations(job_name, opt_ins_raw, opt_outs_raw, live_experiments, - optimizations_enabled, optimizations_disabled, - optimizations_default, hash_func); - - int tested_times = 0; - switch (hash_result) { - case 0: - case 100: - case 200: - tested_times++; - EXPECT_THAT(optimizations, UnorderedElementsAre("exp2", "exp3", "exp4", - "exp5", "exp6", "exp7")); - break; - case 33: - case 133: - tested_times++; - EXPECT_THAT(optimizations, - UnorderedElementsAre("exp4", "exp5", "exp6", "exp7")); - break; - case 67: - case 167: - tested_times++; - EXPECT_THAT(optimizations, UnorderedElementsAre("exp6", "exp7")); - break; - } - EXPECT_EQ(tested_times, 1); -} - -INSTANTIATE_TEST_SUITE_P(Test, SelectOptimizationsHashTest, - ::testing::Values(0, 33, 67, 100, 133, 167, 200)); - -class SelectOptimizationsOptTest - : public ::testing::TestWithParam> {}; - -TEST_P(SelectOptimizationsOptTest, DatasetUtils) { - string job_name = "job"; - const string opt_ins_raw = std::get<0>(GetParam()); - const string opt_outs_raw = std::get<1>(GetParam()); - auto hash_func = [](const string& str) { return 50; }; - absl::flat_hash_map live_experiments = { - {"exp1", 0}, {"exp2", 25}, {"exp3", 50}, {"exp4", 75}, {"exp5", 100}}; - std::vector optimizations_enabled, optimizations_disabled, - optimizations_default; - std::vector optimizations = - SelectOptimizations(job_name, opt_ins_raw, opt_outs_raw, live_experiments, - optimizations_enabled, optimizations_disabled, - optimizations_default, hash_func); - - int tested_times = 0; - if (opt_outs_raw == "all") { - EXPECT_THAT(optimizations, UnorderedElementsAre()); - tested_times++; - } else if (opt_outs_raw.empty()) { - if (opt_ins_raw == "all") { - EXPECT_THAT(optimizations, - UnorderedElementsAre("exp1", "exp2", "exp3", "exp4", "exp5")); - tested_times++; - } else if (opt_ins_raw.empty()) { - EXPECT_THAT(optimizations, UnorderedElementsAre("exp4", "exp5")); - tested_times++; - } else if (opt_ins_raw == "exp2,exp4") { - EXPECT_THAT(optimizations, UnorderedElementsAre("exp2", "exp4", "exp5")); - tested_times++; - } - } else if (opt_outs_raw == "exp1,exp5") { - if (opt_ins_raw == "all") { - EXPECT_THAT(optimizations, UnorderedElementsAre("exp2", "exp3", "exp4")); - tested_times++; - } else if (opt_ins_raw.empty()) { - EXPECT_THAT(optimizations, UnorderedElementsAre("exp4")); - tested_times++; - } else if (opt_ins_raw == "exp2,exp4") { - EXPECT_THAT(optimizations, UnorderedElementsAre("exp2", "exp4")); - tested_times++; - } - } - EXPECT_EQ(tested_times, 1); -} - -INSTANTIATE_TEST_SUITE_P( - Test, SelectOptimizationsOptTest, - ::testing::Combine(::testing::Values("all", "", "exp2,exp4"), - ::testing::Values("all", "", "exp1,exp5"))); - -class SelectOptimizationsConflictTest - : public ::testing::TestWithParam> {}; - -TEST_P(SelectOptimizationsConflictTest, DatasetUtils) { - string job_name = "job"; - const string opt_ins_raw = std::get<0>(GetParam()); - const string opt_outs_raw = std::get<1>(GetParam()); - const uint64 hash_result = std::get<2>(GetParam()); - auto hash_func = [hash_result](const string& str) { return hash_result; }; - absl::flat_hash_map live_experiments = { - {"exp1", 20}, {"exp2", 30}, {"exp3", 40}, - {"exp4", 60}, {"exp5", 70}, {"exp6", 80}}; - std::vector optimizations_enabled = {"exp1", "exp4"}, - optimizations_disabled = {"exp2", "exp5"}, - optimizations_default = {"exp3", "exp6"}; - std::vector optimizations = - SelectOptimizations(job_name, opt_ins_raw, opt_outs_raw, live_experiments, - optimizations_enabled, optimizations_disabled, - optimizations_default, hash_func); - - int tested_times = 0; - if (opt_outs_raw.empty()) { - EXPECT_THAT(optimizations, - UnorderedElementsAre("exp1", "exp3", "exp4", "exp6")); - tested_times++; - } else if (opt_outs_raw == "exp1,exp3") { - EXPECT_THAT(optimizations, UnorderedElementsAre("exp1", "exp4", "exp6")); - tested_times++; - } - EXPECT_EQ(tested_times, 1); -} - -INSTANTIATE_TEST_SUITE_P(Test, SelectOptimizationsConflictTest, - ::testing::Combine(::testing::Values("", "exp2"), - ::testing::Values("", "exp1,exp3"), - ::testing::Values(10, 50, 90))); - } // namespace } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/kernels/data/optimize_dataset_op.cc b/tensorflow/core/kernels/data/optimize_dataset_op.cc index a0101435794..c976a8f7b08 100644 --- a/tensorflow/core/kernels/data/optimize_dataset_op.cc +++ b/tensorflow/core/kernels/data/optimize_dataset_op.cc @@ -18,10 +18,8 @@ limitations under the License. #include "tensorflow/core/framework/partial_tensor_shape.h" #include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/kernels/data/dataset_utils.h" #include "tensorflow/core/kernels/data/rewrite_utils.h" #include "tensorflow/core/lib/random/random.h" -#include "tensorflow/core/platform/host_info.h" #include "tensorflow/core/protobuf/rewriter_config.pb.h" namespace tensorflow { @@ -33,18 +31,10 @@ namespace data { /* static */ constexpr const char* const OptimizeDatasetOp::kDatasetType; /* static */ constexpr const char* const OptimizeDatasetOp::kInputDataset; /* static */ constexpr const char* const OptimizeDatasetOp::kOptimizations; -/* static */ constexpr const char* const - OptimizeDatasetOp::kOptimizationsEnabled; -/* static */ constexpr const char* const - OptimizeDatasetOp::kOptimizationsDisabled; -/* static */ constexpr const char* const - OptimizeDatasetOp::kOptimizationsDefault; /* static */ constexpr const char* const OptimizeDatasetOp::kOutputTypes; /* static */ constexpr const char* const OptimizeDatasetOp::kOutputShapes; /* static */ constexpr const char* const OptimizeDatasetOp::kOptimizationConfigs; -/* static */ constexpr const char* const OptimizeDatasetOp::kOptimizeDatasetV1; -/* static */ constexpr const char* const OptimizeDatasetOp::kOptimizeDatasetV2; constexpr char kOptimizerName[] = "tf_data_meta_optimizer"; constexpr char kOptimizers[] = "optimizers"; @@ -52,12 +42,6 @@ constexpr char kOptimizerConfigs[] = "optimizer_configs"; OptimizeDatasetOp::OptimizeDatasetOp(OpKernelConstruction* ctx) : UnaryDatasetOpKernel(ctx) { - auto& op_name = ctx->def().op(); - if (op_name == kOptimizeDatasetV1) { - op_version_ = 1; - } else if (op_name == kOptimizeDatasetV2) { - op_version_ = 2; - } OP_REQUIRES_OK(ctx, ctx->GetAttr(kOptimizationConfigs, &optimization_configs_)); } @@ -65,44 +49,8 @@ OptimizeDatasetOp::OptimizeDatasetOp(OpKernelConstruction* ctx) void OptimizeDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase* input, DatasetBase** output) { std::vector optimizations; - if (op_version_ == 1) { - OP_REQUIRES_OK( - ctx, ParseVectorArgument(ctx, kOptimizations, &optimizations)); - } else if (op_version_ == 2) { - std::vector optimizations_enabled, optimizations_disabled, - optimizations_default; - OP_REQUIRES_OK(ctx, ParseVectorArgument(ctx, kOptimizationsEnabled, - &optimizations_enabled)); - OP_REQUIRES_OK(ctx, - ParseVectorArgument(ctx, kOptimizationsDisabled, - &optimizations_disabled)); - OP_REQUIRES_OK(ctx, ParseVectorArgument(ctx, kOptimizationsDefault, - &optimizations_default)); - - string job_name = port::JobName(); - if (job_name.empty()) { - // If `job_name` is empty, apply the enabled and default optimizations - // directly. - optimizations.insert(optimizations.end(), optimizations_enabled.begin(), - optimizations_enabled.end()); - optimizations.insert(optimizations.end(), optimizations_default.begin(), - optimizations_default.end()); - } else { - // The map that stores the experiment names and for how much percentage - // of the jobs, the experiments will be randomly turned on. - // - // This is currently empty; we have no live experiments yet. - absl::flat_hash_map live_experiments; - - const string opt_ins_raw = std::getenv("TF_DATA_EXPERIMENT_OPT_IN"); - const string opt_outs_raw = std::getenv("TF_DATA_EXPERIMENT_OPT_OUT"); - auto hash_func = [](const string& str) { return Hash64(str); }; - optimizations = SelectOptimizations( - job_name, opt_ins_raw, opt_outs_raw, live_experiments, - optimizations_enabled, optimizations_disabled, optimizations_default, - hash_func); - } - } + OP_REQUIRES_OK( + ctx, ParseVectorArgument(ctx, kOptimizations, &optimizations)); auto config_factory = [this, &optimizations]() { return CreateConfig(optimizations, optimization_configs_); @@ -147,8 +95,6 @@ RewriterConfig OptimizeDatasetOp::CreateConfig( namespace { REGISTER_KERNEL_BUILDER(Name("OptimizeDataset").Device(DEVICE_CPU), OptimizeDatasetOp); -REGISTER_KERNEL_BUILDER(Name("OptimizeDatasetV2").Device(DEVICE_CPU), - OptimizeDatasetOp); } // namespace } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/kernels/data/optimize_dataset_op.h b/tensorflow/core/kernels/data/optimize_dataset_op.h index d9e366f1ad5..a5fcc72260d 100644 --- a/tensorflow/core/kernels/data/optimize_dataset_op.h +++ b/tensorflow/core/kernels/data/optimize_dataset_op.h @@ -25,18 +25,10 @@ class OptimizeDatasetOp : public UnaryDatasetOpKernel { static constexpr const char* const kDatasetType = "Optimize"; static constexpr const char* const kInputDataset = "input_dataset"; static constexpr const char* const kOptimizations = "optimizations"; - static constexpr const char* const kOptimizationsEnabled = - "optimizations_enabled"; - static constexpr const char* const kOptimizationsDisabled = - "optimizations_disabled"; - static constexpr const char* const kOptimizationsDefault = - "optimizations_default"; static constexpr const char* const kOutputTypes = "output_types"; static constexpr const char* const kOutputShapes = "output_shapes"; static constexpr const char* const kOptimizationConfigs = "optimization_configs"; - static constexpr const char* const kOptimizeDatasetV1 = "OptimizeDataset"; - static constexpr const char* const kOptimizeDatasetV2 = "OptimizeDatasetV2"; explicit OptimizeDatasetOp(OpKernelConstruction* ctx); @@ -49,7 +41,6 @@ class OptimizeDatasetOp : public UnaryDatasetOpKernel { std::vector optimizations_configs); std::vector optimization_configs_; - int op_version_ = 0; }; } // namespace data diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index 6ef5635e95a..4f750cc938d 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -837,17 +837,6 @@ REGISTER_OP("OptimizeDataset") .Attr("optimization_configs: list(string) = []") .SetShapeFn(shape_inference::ScalarShape); -REGISTER_OP("OptimizeDatasetV2") - .Input("input_dataset: variant") - .Input("optimizations_enabled: string") - .Input("optimizations_disabled: string") - .Input("optimizations_default: string") - .Output("handle: variant") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .Attr("optimization_configs: list(string) = []") - .SetShapeFn(shape_inference::ScalarShape); - REGISTER_OP("OptionalFromValue") .Input("components: Toutput_types") .Output("optional: variant") diff --git a/tensorflow/core/platform/default/port.cc b/tensorflow/core/platform/default/port.cc index 5b96eec072c..11b3cd7fd9a 100644 --- a/tensorflow/core/platform/default/port.cc +++ b/tensorflow/core/platform/default/port.cc @@ -61,8 +61,6 @@ string Hostname() { return string(hostname); } -string JobName() { return ""; } - int NumSchedulableCPUs() { #if defined(__linux__) && !defined(__ANDROID__) cpu_set_t cpuset; diff --git a/tensorflow/core/platform/host_info.h b/tensorflow/core/platform/host_info.h index 3447b2e0330..e76b83adf34 100644 --- a/tensorflow/core/platform/host_info.h +++ b/tensorflow/core/platform/host_info.h @@ -21,13 +21,9 @@ limitations under the License. namespace tensorflow { namespace port { -// Return the hostname of the machine on which this process is running. +// Return the hostname of the machine on which this process is running string Hostname(); -// Return the job name as a string if it exists, otherwise return an empty -// string. -string JobName(); - } // namespace port } // namespace tensorflow diff --git a/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py index e26e97dbd97..59e41528ea4 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py @@ -225,14 +225,11 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): optimized_it = dataset_ops.make_initializable_iterator(optimized_dataset) self.assertGreaterEqual(len(w), 1) - graph_rewrites = options._graph_rewrites() - expected = ( - "tf.data graph rewrites are not compatible with " - "tf.Variable. The following rewrites will be disabled: %s." - " To enable rewrites, use resource variables instead by " - "calling `tf.enable_resource_variables()` at the start of the " - "program." % - (", ".join(graph_rewrites.enabled + graph_rewrites.default))) + expected = ("tf.data graph rewrites are not compatible with " + "tf.Variable. The following rewrites will be disabled: %s." + " To enable rewrites, use resource variables instead by " + "calling `tf.enable_resource_variables()` at the start of the " + "program." % (", ".join(options._graph_rewrites()))) self.assertTrue(any(expected in str(warning) for warning in w)) # Check that outputs are the same in the optimized and unoptimized cases, @@ -254,136 +251,34 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): break @combinations.generate(test_base.default_test_combinations()) - def testOptimizationDefault(self): - """Tests the optimization settings by default.""" + def testOptimizationEnabledByDefault(self): + """Tests that some optimizations are applied to datasets by default.""" options = dataset_ops.Options() - expected_optimizations_enabled = [] - expected_optimizations_disabled = [] - expected_optimizations_default = [ + expected_optimizations = [ "map_and_batch_fusion", "noop_elimination", "shuffle_and_repeat_fusion", ] - graph_rewrites = options._graph_rewrites() - self.assertEqual(set(graph_rewrites.enabled), - set(expected_optimizations_enabled)) - self.assertEqual(set(graph_rewrites.disabled), - set(expected_optimizations_disabled)) - self.assertEqual(set(graph_rewrites.default), - set(expected_optimizations_default)) + self.assertEqual( + set(options._graph_rewrites()), set(expected_optimizations)) - options.experimental_optimization.apply_default_optimizations = True - graph_rewrites = options._graph_rewrites() - self.assertEqual(set(graph_rewrites.enabled), - set(expected_optimizations_enabled)) - self.assertEqual(set(graph_rewrites.disabled), - set(expected_optimizations_disabled)) - self.assertEqual(set(graph_rewrites.default), - set(expected_optimizations_default)) + @combinations.generate(test_base.default_test_combinations()) + def testOptimizationDisableDefault(self): + """Tests that we can disable all graph optimizations enabled by default. + If the `apply_default_optimizations` optimization options flag is False, + only explicitly enabled optimizations will be applied. + """ + options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False - expected_optimizations_default = [] - graph_rewrites = options._graph_rewrites() - self.assertEqual(set(graph_rewrites.enabled), - set(expected_optimizations_enabled)) - self.assertEqual(set(graph_rewrites.disabled), - set(expected_optimizations_disabled)) - self.assertEqual(set(graph_rewrites.default), - set(expected_optimizations_default)) - - @combinations.generate(test_base.default_test_combinations()) - def testOptimizationEnabled(self): - """Tests the optimization settings by enabling all.""" - options = dataset_ops.Options() - options.experimental_optimization.filter_fusion = True - options.experimental_optimization.filter_with_random_uniform_fusion = True options.experimental_optimization.hoist_random_uniform = True - options.experimental_optimization.map_and_batch_fusion = True - options.experimental_optimization.map_and_filter_fusion = True - options.experimental_optimization.map_parallelization = True - options.experimental_optimization.map_fusion = True options.experimental_optimization.noop_elimination = True - options.experimental_optimization.parallel_batch = True - options.experimental_optimization.shuffle_and_repeat_fusion = True - options.experimental_optimization.map_vectorization.enabled = True - options.experimental_optimization.autotune_buffers = True - options.experimental_deterministic = False - options.experimental_stats.latency_all_edges = True - options.experimental_slack = True - - expected_optimizations_enabled = [ - "filter_fusion", - "filter_with_random_uniform_fusion", + expected_optimizations = [ "hoist_random_uniform", - "map_and_batch_fusion", - "map_and_filter_fusion", - "map_parallelization", - "map_fusion", "noop_elimination", - "parallel_batch", - "shuffle_and_repeat_fusion", - "map_vectorization", - "inject_prefetch", - "make_sloppy", - "latency_all_edges", - "slack", ] - expected_optimizations_disabled = [] - expected_optimizations_default = [] - graph_rewrites = options._graph_rewrites() - self.assertEqual(set(graph_rewrites.enabled), - set(expected_optimizations_enabled)) - self.assertEqual(set(graph_rewrites.disabled), - set(expected_optimizations_disabled)) - self.assertEqual(set(graph_rewrites.default), - set(expected_optimizations_default)) - - @combinations.generate(test_base.default_test_combinations()) - def testOptimizationDisabled(self): - """Tests the optimization settings by disabling all.""" - options = dataset_ops.Options() - options.experimental_optimization.filter_fusion = False - options.experimental_optimization.filter_with_random_uniform_fusion = False - options.experimental_optimization.hoist_random_uniform = False - options.experimental_optimization.map_and_batch_fusion = False - options.experimental_optimization.map_and_filter_fusion = False - options.experimental_optimization.map_parallelization = False - options.experimental_optimization.map_fusion = False - options.experimental_optimization.noop_elimination = False - options.experimental_optimization.parallel_batch = False - options.experimental_optimization.shuffle_and_repeat_fusion = False - options.experimental_optimization.map_vectorization.enabled = False - options.experimental_optimization.autotune = False - options.experimental_deterministic = True - options.experimental_stats.latency_all_edges = False - options.experimental_slack = False - - expected_optimizations_enabled = [] - expected_optimizations_disabled = [ - "filter_fusion", - "filter_with_random_uniform_fusion", - "hoist_random_uniform", - "map_and_batch_fusion", - "map_and_filter_fusion", - "map_parallelization", - "map_fusion", - "noop_elimination", - "parallel_batch", - "shuffle_and_repeat_fusion", - "map_vectorization", - "inject_prefetch", - "make_sloppy", - "latency_all_edges", - "slack", - ] - expected_optimizations_default = [] - graph_rewrites = options._graph_rewrites() - self.assertEqual(set(graph_rewrites.enabled), - set(expected_optimizations_enabled)) - self.assertEqual(set(graph_rewrites.disabled), - set(expected_optimizations_disabled)) - self.assertEqual(set(graph_rewrites.default), - set(expected_optimizations_default)) + self.assertEqual( + set(options._graph_rewrites()), set(expected_optimizations)) @combinations.generate(test_base.default_test_combinations()) def testAutotuningDefaults(self): @@ -400,7 +295,7 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): def testAutotuningBufferSizes(self): options = dataset_ops.Options() options.experimental_optimization.autotune_buffers = True - self.assertIn("inject_prefetch", options._graph_rewrites().enabled) + self.assertIn("inject_prefetch", options._graph_rewrites()) autotune, algorithm, cpu_budget = options._autotune_settings() self.assertTrue(autotune) self.assertEqual(algorithm, diff --git a/tensorflow/python/data/experimental/kernel_tests/prefetch_with_slack_test.py b/tensorflow/python/data/experimental/kernel_tests/prefetch_with_slack_test.py index cbff39b90e5..ff1f1680a76 100644 --- a/tensorflow/python/data/experimental/kernel_tests/prefetch_with_slack_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/prefetch_with_slack_test.py @@ -45,7 +45,7 @@ class PrefetchWithSlackTest(test_base.DatasetTestBase, parameterized.TestCase): multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator( dataset, ["/cpu:1", "/cpu:2"]) dataset = multi_device_iterator._dataset # pylint: disable=protected-access - self.assertIn("slack", dataset.options()._graph_rewrites().enabled) + self.assertIn("slack", dataset.options()._graph_rewrites()) self.assertIn("slack:slack_period:2", dataset.options()._graph_rewrite_configs()) @@ -69,7 +69,7 @@ class PrefetchWithSlackTest(test_base.DatasetTestBase, parameterized.TestCase): options = dataset_ops.Options() options.experimental_slack = True dataset = dataset.with_options(options) - self.assertIn("slack", dataset.options()._graph_rewrites().enabled) + self.assertIn("slack", dataset.options()._graph_rewrites()) self.assertIn("slack:slack_period:1", dataset.options()._graph_rewrite_configs()) self.assertDatasetProduces(dataset, range(10)) diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py index 30d53165f85..385b1acd49c 100644 --- a/tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py @@ -36,8 +36,7 @@ class OptimizeDatasetSerializationTest( def build_dataset(num_elements, batch_size): return dataset_ops.Dataset.range(num_elements).map(lambda x: x * x).batch( - batch_size).apply( - optimization.optimize(["map_and_batch_fusion"], None, None)) + batch_size).apply(optimization.optimize(["map_and_batch_fusion"])) self.run_core_tests(lambda: build_dataset(200, 10), 20) @@ -51,8 +50,7 @@ class OptimizeDatasetSerializationTest( dataset = dataset.batch(5) # map_vectorization adds a new vectorized function to the function # library. - dataset = dataset.apply( - optimization.optimize(["map_vectorization"], None, None)) + dataset = dataset.apply(optimization.optimize(["map_vectorization"])) return dataset self.run_core_tests(build_dataset, 20) diff --git a/tensorflow/python/data/experimental/ops/optimization.py b/tensorflow/python/data/experimental/ops/optimization.py index 161850521de..4581a612ed6 100644 --- a/tensorflow/python/data/experimental/ops/optimization.py +++ b/tensorflow/python/data/experimental/ops/optimization.py @@ -36,19 +36,13 @@ def model(): return _apply_fn -def optimize(optimizations_enabled=None, optimizations_disabled=None, - optimizations_default=None): +def optimize(optimizations=None): """A transformation that applies optimizations. Args: - optimizations_enabled: (Optional.) A `tf.string` vector `tf.Tensor` - identifying enabled optimizations. If not specified, set to be empty. - - optimizations_disabled: (Optional.) A `tf.string` vector `tf.Tensor` - identifying disabled optimizations. If not specified, set to be empty. - - optimizations_default: (Optional.) A `tf.string` vector `tf.Tensor` - identifying default optimizations. If not specified, set to be empty. + optimizations: (Optional.) A `tf.string` vector `tf.Tensor` identifying + optimizations to use. If not specified, the default set of optimizations + is applied. Returns: A `Dataset` transformation function, which can be passed to @@ -57,11 +51,7 @@ def optimize(optimizations_enabled=None, optimizations_disabled=None, def _apply_fn(dataset): """Function from `Dataset` to `Dataset` that applies the transformation.""" - return dataset_ops._OptimizeDataset( # pylint: disable=protected-access - dataset, - optimizations_enabled, - optimizations_disabled, - optimizations_default) + return dataset_ops._OptimizeDataset(dataset, optimizations) # pylint: disable=protected-access return _apply_fn diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py index fa7a0d23dea..ab1c7b73212 100644 --- a/tensorflow/python/data/experimental/ops/optimization_options.py +++ b/tensorflow/python/data/experimental/ops/optimization_options.py @@ -53,13 +53,9 @@ class MapVectorizationOptions(options.OptionsBase): "defaults to False.") def _graph_rewrites(self): - graph_rewrites = options.graph_rewrites() - result = graph_rewrites(enabled=[], disabled=[], default=[]) - if self.enabled is True: # pylint: disable=g-bool-id-comparison - result.enabled.append("map_vectorization") - elif self.enabled is False: # pylint: disable=g-bool-id-comparison - result.disabled.append("map_vectorization") - return result + if self.enabled: + return ["map_vectorization"] + return [] def _graph_rewrite_configs(self): if not self.enabled: @@ -233,20 +229,8 @@ class OptimizationOptions(options.OptionsBase): return autotune, algorithm, cpu_budget def _graph_rewrites(self): - """Produces lists of enabled, disabled and default graph optimizations. - - Returns: - result: a namedtuple with three attributes. `result.enabled` is the list - of user enabled optimizations. `result.disabled` is the list of user - disabled optimizations. `result.default` is the list of optimizations - that are enabled by default (the user has not explicitly enabled or - disabled them). - """ - if self.map_vectorization is not None: - result = self.map_vectorization._graph_rewrites() # pylint: disable=protected-access - else: - result = MapVectorizationOptions()._graph_rewrites() # pylint: disable=protected-access - + """Produces the list of enabled graph optimizations.""" + result = set() all_optimizations = [ "filter_fusion", "filter_with_random_uniform_fusion", @@ -260,8 +244,11 @@ class OptimizationOptions(options.OptionsBase): "reorder_data_discarding_ops", "shuffle_and_repeat_fusion", ] + for optimization in all_optimizations: + if getattr(self, optimization): + result.add(optimization) - if self.apply_default_optimizations is not False: # pylint: disable=g-bool-id-comparison + if self.apply_default_optimizations is not False: # The following optimizations are turned on by default, unless the user # explicitly disables them. optimizations_to_disable = [ @@ -270,29 +257,21 @@ class OptimizationOptions(options.OptionsBase): "shuffle_and_repeat_fusion", ] for optimization in optimizations_to_disable: - if getattr(self, optimization) is None: - result.default.append(optimization) + if getattr(self, optimization) is not False: + result.add(optimization) - # Each of these attributes on the Options object is either True (explicitly - # enabled), False (explicitly disabled), or None (default). - for optimization in all_optimizations: - if getattr(self, optimization) is True: # pylint: disable=g-bool-id-comparison - result.enabled.append(optimization) - elif getattr(self, optimization) is False: # pylint: disable=g-bool-id-comparison - result.disabled.append(optimization) + if self.map_vectorization is not None: + result.update(self.map_vectorization._graph_rewrites()) # pylint: disable=protected-access autotune_buffers = self._autotune_buffers() - if self.autotune is not False and autotune_buffers is True: # pylint: disable=g-bool-id-comparison + if self.autotune is not False and autotune_buffers: # pylint: disable=g-bool-id-comparison # When autotuning buffer sizes is enabled, we inject a `prefetch` # transformation after asynchronous dataset ops. Only the buffer sizes of # prefetch transformations will be autotuned, though this is practically # equivalent to tuning the buffer sizes of the other asynchronous # transformations. - result.enabled.append("inject_prefetch") - if self.autotune is False: # pylint: disable=g-bool-id-comparison - result.disabled.append("inject_prefetch") - - return result + result.add("inject_prefetch") + return sorted(list(result)) def _graph_rewrite_configs(self): if self.map_vectorization is not None: diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index bd75d0a735a..491eb031931 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -30,13 +30,11 @@ from six.moves import queue as Queue # pylint: disable=redefined-builtin from tensorflow.core.framework import graph_pb2 from tensorflow.python import tf2 -from tensorflow.python.compat import compat from tensorflow.python.data.experimental.ops import distribute_options from tensorflow.python.data.experimental.ops import optimization_options from tensorflow.python.data.experimental.ops import stats_options from tensorflow.python.data.experimental.ops import threading_options from tensorflow.python.data.ops import iterator_ops -from tensorflow.python.data.util import convert from tensorflow.python.data.util import nest from tensorflow.python.data.util import options as options_lib from tensorflow.python.data.util import random_seed @@ -376,18 +374,16 @@ class DatasetV2(collections_abc.Iterable, tracking_base.Trackable, graph_rewrites = options._graph_rewrites() graph_rewrite_configs = options._graph_rewrite_configs() # pylint: enable=protected-access - if graph_rewrites.enabled or graph_rewrites.default: + if graph_rewrites: if self._has_captured_ref(): warnings.warn( "tf.data graph rewrites are not compatible with tf.Variable. " "The following rewrites will be disabled: %s. To enable " "rewrites, use resource variables instead by calling " "`tf.enable_resource_variables()` at the start of the program." % - ", ".join(graph_rewrites.enabled + graph_rewrites.default)) + ", ".join(graph_rewrites)) else: - dataset = _OptimizeDataset(dataset, graph_rewrites.enabled, - graph_rewrites.disabled, - graph_rewrites.default, + dataset = _OptimizeDataset(dataset, graph_rewrites, graph_rewrite_configs) # (3) Apply autotune options @@ -2891,39 +2887,22 @@ class Options(options_lib.OptionsBase): "is being captured.") def _graph_rewrites(self): - """Produces lists of enabled, disabled, default static graph rewrites. - - Returns: - result: a namedtuple with three attributes. `result.enabled` is the list - of user enabled graph rewrites. `result.disabled` is the list of user - disabled graph rewrites. `result.default` is the list of graph - rewrites that are enabled by default (the user has not explicitly - enabled or disabled them). - """ + """Produces the list of enabled static graph rewrites.""" + result = [] if self.experimental_optimization is not None: - result = self.experimental_optimization._graph_rewrites() # pylint: disable=protected-access + result.extend(self.experimental_optimization._graph_rewrites()) # pylint: disable=protected-access else: # Apply default options - result = optimization_options.OptimizationOptions()._graph_rewrites() # pylint: disable=protected-access + result.extend( + optimization_options.OptimizationOptions()._graph_rewrites()) # pylint: disable=protected-access if self.experimental_deterministic is False: # pylint: disable=g-bool-id-comparison - result.enabled.append("make_sloppy") - elif self.experimental_deterministic is True: # pylint: disable=g-bool-id-comparison - result.disabled.append("make_sloppy") - if self.experimental_stats: - if self.experimental_stats.latency_all_edges is True: # pylint: disable=g-bool-id-comparison - result.enabled.append("latency_all_edges") - elif self.experimental_stats.latency_all_edges is False: # pylint: disable=g-bool-id-comparison - result.disabled.append("latency_all_edges") - if self.experimental_slack is True: # pylint: disable=g-bool-id-comparison - result.enabled.append("slack") - elif self.experimental_slack is False: # pylint: disable=g-bool-id-comparison - result.disabled.append("slack") - - graph_rewrites = options_lib.graph_rewrites() - return graph_rewrites(enabled=list(set(result.enabled)), - disabled=list(set(result.disabled)), - default=list(set(result.default))) + result.append("make_sloppy") + if self.experimental_stats and self.experimental_stats.latency_all_edges: + result.append("latency_all_edges") + if self.experimental_slack: + result.append("slack") + return result def _graph_rewrite_configs(self): """Produces the list of configurations for enabled graph optimizations.""" @@ -4408,55 +4387,19 @@ class _ModelDataset(UnaryUnchangedStructureDataset): class _OptimizeDataset(UnaryUnchangedStructureDataset): """A `Dataset` that acts as an identity, and applies optimizations.""" - def __init__(self, - input_dataset, - optimizations_enabled, - optimizations_disabled, - optimizations_default, - optimization_configs=None): + def __init__(self, input_dataset, optimizations, optimization_configs=None): self._input_dataset = input_dataset + if optimizations is None: + optimizations = [] if optimization_configs is None: optimization_configs = [] - - if compat.forward_compatible(2020, 8, 6): - self._optimizations_enabled = convert.optional_param_to_tensor( - argument_name="optimizations_enabled", - argument_value=optimizations_enabled, - argument_default=[], - argument_dtype=dtypes.string) - self._optimizations_disabled = convert.optional_param_to_tensor( - argument_name="optimizations_disabled", - argument_value=optimizations_disabled, - argument_default=[], - argument_dtype=dtypes.string) - self._optimizations_default = convert.optional_param_to_tensor( - argument_name="optimizations_default", - argument_value=optimizations_default, - argument_default=[], - argument_dtype=dtypes.string) - - variant_tensor = gen_dataset_ops.optimize_dataset_v2( - input_dataset._variant_tensor, # pylint: disable=protected-access - self._optimizations_enabled, - self._optimizations_disabled, - self._optimizations_default, - optimization_configs=optimization_configs, - **self._flat_structure) - else: - if optimizations_enabled is None: - optimizations_enabled = [] - if optimizations_default is None: - optimizations_default = [] - - self._optimizations = ops.convert_to_tensor( - optimizations_enabled + optimizations_default, - dtype=dtypes.string, - name="optimizations") - variant_tensor = gen_dataset_ops.optimize_dataset( - input_dataset._variant_tensor, # pylint: disable=protected-access - self._optimizations, - optimization_configs=optimization_configs, - **self._flat_structure) + self._optimizations = ops.convert_to_tensor( + optimizations, dtype=dtypes.string, name="optimizations") + variant_tensor = gen_dataset_ops.optimize_dataset( + input_dataset._variant_tensor, # pylint: disable=protected-access + self._optimizations, + optimization_configs=optimization_configs, + **self._flat_structure) super(_OptimizeDataset, self).__init__(input_dataset, variant_tensor) diff --git a/tensorflow/python/data/util/options.py b/tensorflow/python/data/util/options.py index 781ae6403fa..3c79197fae8 100644 --- a/tensorflow/python/data/util/options.py +++ b/tensorflow/python/data/util/options.py @@ -18,8 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import collections - def _internal_attr_name(name): return "_" + name @@ -58,12 +56,6 @@ class OptionsBase(object): "Cannot set the property %s on %s." % (name, type(self).__name__)) -# Creates a namedtuple with three keys for optimization graph rewrites settings. -def graph_rewrites(): - return collections.namedtuple("GraphRewrites", - ["enabled", "disabled", "default"]) - - def create_option(name, ty, docstring, default_factory=lambda: None): """Creates a type-checked property. diff --git a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt index 3c47a392b7e..4ad0c0d4448 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt @@ -2660,10 +2660,6 @@ tf_module { name: "OptimizeDataset" argspec: "args=[\'input_dataset\', \'optimizations\', \'output_types\', \'output_shapes\', \'optimization_configs\', \'name\'], varargs=None, keywords=None, defaults=[\'[]\', \'None\'], " } - member_method { - name: "OptimizeDatasetV2" - argspec: "args=[\'input_dataset\', \'optimizations_enabled\', \'optimizations_disabled\', \'optimizations_default\', \'output_types\', \'output_shapes\', \'optimization_configs\', \'name\'], varargs=None, keywords=None, defaults=[\'[]\', \'None\'], " - } member_method { name: "OptionalFromValue" argspec: "args=[\'components\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt index 3c47a392b7e..4ad0c0d4448 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt @@ -2660,10 +2660,6 @@ tf_module { name: "OptimizeDataset" argspec: "args=[\'input_dataset\', \'optimizations\', \'output_types\', \'output_shapes\', \'optimization_configs\', \'name\'], varargs=None, keywords=None, defaults=[\'[]\', \'None\'], " } - member_method { - name: "OptimizeDatasetV2" - argspec: "args=[\'input_dataset\', \'optimizations_enabled\', \'optimizations_disabled\', \'optimizations_default\', \'output_types\', \'output_shapes\', \'optimization_configs\', \'name\'], varargs=None, keywords=None, defaults=[\'[]\', \'None\'], " - } member_method { name: "OptionalFromValue" argspec: "args=[\'components\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " From bfd09eebf980cd740c30e6bd06c0e8e8c8da6e67 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 19:46:46 -0700 Subject: [PATCH 1447/2522] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 323493489 Change-Id: I7edb3427438934af9e97694fed8d5ff65ebc3d3d --- tensorflow/go/op/wrappers.go | 41 ------------------------------------ 1 file changed, 41 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 9da46e94adb..687bc4ced33 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -8345,47 +8345,6 @@ func OptionalFromValue(scope *Scope, components []tf.Output) (optional tf.Output return op.Output(0) } -// OptimizeDatasetV2Attr is an optional argument to OptimizeDatasetV2. -type OptimizeDatasetV2Attr func(optionalAttr) - -// OptimizeDatasetV2OptimizationConfigs sets the optional optimization_configs attribute to value. -// If not specified, defaults to <> -func OptimizeDatasetV2OptimizationConfigs(value []string) OptimizeDatasetV2Attr { - return func(m optionalAttr) { - m["optimization_configs"] = value - } -} - -// Creates a dataset by applying related optimizations to `input_dataset`. -// -// Creates a dataset by applying related optimizations to `input_dataset`. -// -// Arguments: -// input_dataset: A variant tensor representing the input dataset. -// optimizations_enabled: A `tf.string` vector `tf.Tensor` identifying user enabled optimizations. -// optimizations_disabled: A `tf.string` vector `tf.Tensor` identifying user disabled optimizations. -// optimizations_default: A `tf.string` vector `tf.Tensor` identifying optimizations by default. -// -// -func OptimizeDatasetV2(scope *Scope, input_dataset tf.Output, optimizations_enabled tf.Output, optimizations_disabled tf.Output, optimizations_default tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...OptimizeDatasetV2Attr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "OptimizeDatasetV2", - Input: []tf.Input{ - input_dataset, optimizations_enabled, optimizations_disabled, optimizations_default, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // OptimizeDatasetAttr is an optional argument to OptimizeDataset. type OptimizeDatasetAttr func(optionalAttr) From 6890ebe07c6050a9a8728950ed2649a58c25649e Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Mon, 27 Jul 2020 20:07:13 -0700 Subject: [PATCH 1448/2522] Fix typo on android sdk download command The typo caused a failure on downloading build-tools. PiperOrigin-RevId: 323495569 Change-Id: Ia476898470bc95d13f2b0d91aca769680df6a129 --- tensorflow/lite/g3doc/guide/android.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/g3doc/guide/android.md b/tensorflow/lite/g3doc/guide/android.md index 41b4c213504..a1493090588 100644 --- a/tensorflow/lite/g3doc/guide/android.md +++ b/tensorflow/lite/g3doc/guide/android.md @@ -143,7 +143,7 @@ directory instead (-v hostDir:/tmp). license): ```shell -android update sdk --no-ui -a --filter tools,platform-tools,android-${ANDROID_API_LEVEL},build-tools-${ANDROID_BUILD_TOOLS_VERSION}’ +android update sdk --no-ui -a --filter tools,platform-tools,android-${ANDROID_API_LEVEL},build-tools-${ANDROID_BUILD_TOOLS_VERSION} ``` You can now proceed to the "Build and Install" section. After you are finished From 2bf0b82fe2abc058a62121b1e2c629e022168f4c Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Mon, 27 Jul 2020 20:19:52 -0700 Subject: [PATCH 1449/2522] [XLA/GPU] Remove temp buffer from nested computation arg list. Nested computations (used by AllReduce, Map, Reduce, ReduceWindow, Scatter, SelectAndScatter, Sort, and Fusion) are small customization pointers for certain ops. This patch rejects HLOs that require temporary buffers in a nested computation. These nested computations are meant to be called per-element in each CUDA thread. It doesn't make sense for each thread to request a global buffer assigned to them, especially when they don't care about their own thread index at all. In all of these cases, they are reading and writing local allocas or constants. PiperOrigin-RevId: 323496918 Change-Id: I95f05b0f57f6275a6855a485c948a99125b75c3b --- .../xla/service/gpu/hlo_to_ir_bindings.cc | 55 ++++++------------- .../xla/service/gpu/hlo_to_ir_bindings.h | 2 +- .../compiler/xla/service/gpu/ir_emitter.cc | 3 - .../xla/service/gpu/ir_emitter_nested.cc | 8 +-- .../xla/service/gpu/tests/scatter.hlo | 2 +- 5 files changed, 21 insertions(+), 49 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc index 7b0686af910..986e7192859 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc +++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc @@ -35,16 +35,18 @@ namespace gpu { using absl::StrAppend; using absl::StrCat; -void HloToIrBindings::EmitBasePointersForHlos( +Status HloToIrBindings::EmitBasePointersForHlos( absl::Span io_hlos, absl::Span non_io_hlos) { + CHECK(is_nested_); + // I/O HLOs are bound to the arguments of the current IR function, // *excluding* the output argument, which is added to non-I/O HLOs. // I.e., // - // void IrFunction(io_0, io_1, ..., io_{m-1}, output_arg, temp_buffer_base) { + // void IrFunction(io_0, io_1, ..., io_{m-1}, output_arg); llvm::Function* function = b_->GetInsertBlock()->getParent(); - CHECK_EQ(io_hlos.size() + 2, function->arg_size()); + CHECK_EQ(io_hlos.size() + 1, function->arg_size()); // An HLO can have duplicated operands. This data structure remembers which // operand HLOs are already bound to avoid rebinding the same HLO. @@ -55,11 +57,7 @@ void HloToIrBindings::EmitBasePointersForHlos( !absl::c_count(non_io_hlos, io_hlo)) << "IO HLOs and non-IO HLOs should be disjoint"; if (!already_bound_for_this_function.contains(io_hlo)) { - if (!is_nested_ && io_hlo->opcode() == HloOpcode::kGetTupleElement) { - BindHloToIrValue(*io_hlo, EmitGetTupleElement(io_hlo, &*arg_iter)); - } else { - BindHloToIrValue(*io_hlo, &*arg_iter); - } + BindHloToIrValue(*io_hlo, &*arg_iter); already_bound_for_this_function.insert(io_hlo); } ++arg_iter; @@ -69,9 +67,6 @@ void HloToIrBindings::EmitBasePointersForHlos( arg_iter->setName("output_arg"); ++arg_iter; - temp_buffer_base_ = &*arg_iter; - temp_buffer_base_->setName("temp_buffer"); - for (const HloInstruction* non_io_hlo : non_io_hlos) { if (already_bound_for_this_function.contains(non_io_hlo)) { continue; @@ -79,22 +74,6 @@ void HloToIrBindings::EmitBasePointersForHlos( already_bound_for_this_function.insert(non_io_hlo); if (non_io_hlo->opcode() == HloOpcode::kGetTupleElement) { - if (!is_nested_) { - // Lookup allocation GetTupleElement operand. - const BufferAllocation::Slice slice = - buffer_assignment_ - ->GetUniqueTopLevelSlice(non_io_hlo->LatestNonGteAncestor()) - .ConsumeValueOrDie(); - // We are not in a nested context, so check non-thread-local allocation. - CHECK(!slice.allocation()->is_thread_local()); - const int64 offset = slice.offset(); - CHECK_NE(nullptr, temp_buffer_base_); - // Emit IR for GetTupleElement instruction and bind to emitted value. - llvm::Value* base_ptr = - b_->CreateInBoundsGEP(temp_buffer_base_, b_->getInt64(offset)); - BindHloToIrValue(*non_io_hlo, - EmitGetTupleElement(non_io_hlo, base_ptr)); - } continue; } @@ -102,17 +81,15 @@ void HloToIrBindings::EmitBasePointersForHlos( continue; } - ShapeUtil::ForEachSubshape( + auto status = ShapeUtil::ForEachSubshapeWithStatus( non_io_hlo->shape(), [&](const Shape& /*subshape*/, const ShapeIndex& index) { - // A non-IO HLO with a buffer is bound to - // (1) an alloca if it is thread-local, or - // (2) an internal pointer in temp_buffer_base according to its - // offset. + // A non-IO HLO with a buffer is bound to an alloca if it is + // thread-local. auto slice_result = buffer_assignment_->GetUniqueSlice(non_io_hlo, index); if (!slice_result.ok()) { - return; + return Status::OK(); } const BufferAllocation::Slice slice = slice_result.ConsumeValueOrDie(); @@ -129,15 +106,15 @@ void HloToIrBindings::EmitBasePointersForHlos( *slice.allocation())); BindHloToIrValue(*non_io_hlo, global_for_constant); } else { - const int64 offset = slice.offset(); - CHECK_NE(nullptr, temp_buffer_base_); - BindHloToIrValue( - *non_io_hlo, - b_->CreateInBoundsGEP(temp_buffer_base_, b_->getInt64(offset)), - index); + return InternalError( + "Nested computation are not expected to take the temporary " + "buffer. All buffers are either constant or thread-local."); } + return Status::OK(); }); + TF_RETURN_IF_ERROR(status); } + return Status::OK(); } llvm::Value* HloToIrBindings::EmitGetTupleElement(const HloInstruction* gte, diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h index 5eef6727801..c059d9f19d7 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h +++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h @@ -43,7 +43,7 @@ class HloToIrBindings { b_(b), module_(llvm_module) {} - void EmitBasePointersForHlos( + Status EmitBasePointersForHlos( absl::Span io_hlos, absl::Span non_io_hlos); diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc index 04e24733971..31203b9c5f0 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc @@ -192,9 +192,6 @@ Status IrEmitter::EmitCallToNestedComputation( llvm::Value* casted_output = AddrCastToDefault(output, b_); arguments.push_back(casted_output); - // It is not required to do address space cast because TempBufferBase - // is always in addrspace 0. - arguments.push_back(bindings_.GetTempBufferBase()); Call(emitted_function, arguments); return Status::OK(); diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc index 72f48c49096..e85a71a43cf 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc @@ -67,8 +67,6 @@ Status IrEmitterNested::CodegenNestedComputation() { root_shape, ir_emitter_context_->llvm_module()->getDataLayout()); argument_dereferenceable_bytes.push_back(root_size); } - // The base pointer of the memory block for all pre-allocated temp buffers. - argument_types.push_back(b_.getInt8PtrTy()); llvm::FunctionType* function_type = llvm::FunctionType::get(b_.getVoidTy(), argument_types, false); @@ -106,7 +104,7 @@ Status IrEmitterNested::CodegenNestedComputation() { non_io_hlos.push_back(hlo); } } - bindings_.EmitBasePointersForHlos(io_hlos, non_io_hlos); + TF_RETURN_IF_ERROR(bindings_.EmitBasePointersForHlos(io_hlos, non_io_hlos)); TF_RETURN_IF_ERROR(nested_computation_.root_instruction()->Accept(this)); b_.SetInsertPoint(ret_instr); @@ -119,8 +117,8 @@ Status IrEmitterNested::CodegenNestedComputation() { llvm::Value* root_value = bindings_.GetBasePointer(*root_instruction); const Shape& return_shape = root_instruction->shape(); - // Second last argument is the out parameter. - llvm::Argument* out_parameter = std::prev(function->arg_end(), 2); + // Last argument is the out parameter. + llvm::Argument* out_parameter = std::prev(function->arg_end(), 1); if (ShapeUtil::IsScalar(return_shape)) { llvm::Value* ret_value = Load(root_value, "load_ret_value"); diff --git a/tensorflow/compiler/xla/service/gpu/tests/scatter.hlo b/tensorflow/compiler/xla/service/gpu/tests/scatter.hlo index c5edec4b916..c9e7daeb3bc 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/scatter.hlo +++ b/tensorflow/compiler/xla/service/gpu/tests/scatter.hlo @@ -180,7 +180,7 @@ ENTRY main { // CHECK: atomic_op_loop_body: ; preds = %[[VAL_104]], %[[VAL_95]] // CHECK: %[[VAL_105:.*]] = load i32, i32* %[[VAL_64]], align 4 // CHECK: store i32 %[[VAL_105]], i32* %[[VAL_63]], align 4 -// CHECK: call void @mul_s32(i32* %[[VAL_63]], i32* %[[VAL_98]], i32* %[[VAL_63]], i8* null) +// CHECK: call void @mul_s32(i32* %[[VAL_63]], i32* %[[VAL_98]], i32* %[[VAL_63]]) // CHECK: %[[VAL_106:.*]] = load i32, i32* %[[VAL_63]], align 4 // CHECK: %[[VAL_107:.*]] = cmpxchg i32* %[[VAL_97]], i32 %[[VAL_105]], i32 %[[VAL_106]] seq_cst seq_cst // CHECK: %[[VAL_108:.*]] = extractvalue { i32, i1 } %[[VAL_107]], 0 From 6ff4343a548a90c11ba3d7677cb25eb54fce8e85 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 20:23:47 -0700 Subject: [PATCH 1450/2522] Multiply and broadcast reassociation in algebraic simplifier. PiperOrigin-RevId: 323497258 Change-Id: I48206228980f8dd031b82e7d4215f8a781cec8e0 --- .../xla/service/algebraic_simplifier.cc | 37 +++++++++++++++++++ .../xla/service/algebraic_simplifier_test.cc | 23 ++++++++++++ 2 files changed, 60 insertions(+) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 86beb3de694..3ea516674b6 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -2502,6 +2502,43 @@ Status AlgebraicSimplifierVisitor::HandleMultiply(HloInstruction* multiply) { } } + { + HloInstruction *a, *b, *constant, *op; + // Mul(Mul(a, constant1), Broadcast(b)) => + // Mul(Broadcast(Mul(b, constant1), a)) + if (Match(multiply, + m::MultiplyAnyOrder(m::MultiplyAnyOrder(m::NonConstant(&a), + m::Constant(&constant)), + m::Op(&op))) || + Match(multiply, + m::MultiplyAnyOrder( + m::MultiplyAnyOrder(m::NonConstant(&a), + m::Broadcast(m::Constant(&constant))), + m::Op(&op)))) { + // Check that the other side was a broadcast, and not of a constant. + if (ShapeUtil::IsScalar(constant->shape()) && + Match(op, m::Broadcast(m::NonConstant()))) { + auto dims = op->dimensions(); + b = op->mutable_operand(0); + if (!ShapeUtil::IsScalar(b->shape())) { + constant = computation_->AddInstruction( + HloInstruction::CreateBroadcast(b->shape(), constant, {})); + } + + auto new_mul = + computation_->AddInstruction(HloInstruction::CreateBinary( + b->shape(), HloOpcode::kMultiply, b, constant)); + + return ReplaceWithNewInstruction( + multiply, + HloInstruction::CreateBinary( + multiply->shape(), HloOpcode::kMultiply, a, + computation_->AddInstruction(HloInstruction::CreateBroadcast( + multiply->shape(), new_mul, dims)))); + } + } + } + VLOG(10) << "trying transform [(A * C1) * C2 => A * (C1 * C2)]"; HloInstruction *a, *c1, *c2; if (Match(multiply, diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 137fbcc01a6..034d8ec4361 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -140,6 +140,29 @@ TEST_F(AlgebraicSimplifierTest, MultiplyChain) { m::MultiplyAnyOrder(m::ConstantScalar(2), m::ConstantScalar(4))))); } +// MUL(MUL(X, BROADCAST(constant)), BROADCAST(Y)) ==> +// MUL(X, BROADCAST(MUL(Y, BROADCAST(constant)))) +TEST_F(AlgebraicSimplifierTest, MultiplyBroadcastReassoc) { + const char* kModuleStr = R"( + HloModule m + test { + p0 = f32[2,2] parameter(0) + p1 = f32[] parameter(1) + b = f32[] constant(2) + c = f32[2, 2] broadcast(b), dimensions={} + x = f32[2,2] multiply(p0, c) + y = f32[2,2] broadcast(p1), dimensions={} + ROOT z = f32[2,2] multiply(y, x) + } + )"; + TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(kModuleStr)); + ASSERT_TRUE(AlgebraicSimplifier(default_options_).Run(m.get()).ValueOrDie()); + EXPECT_THAT(m->entry_computation()->root_instruction(), + GmockMatch(m::MultiplyAnyOrder( + m::Parameter(0), m::Broadcast(m::MultiplyAnyOrder( + m::Parameter(1), m::Constant()))))); +} + // A*C + B*C => (A+B)*C if C is a broadcast of a floating-point power of 2. TEST_F(AlgebraicSimplifierTest, FactorFpAdditionWithBroadcast) { const char* kModuleStr = R"( From e31577598b02caf6a5bf6ebff0a83d35c4739233 Mon Sep 17 00:00:00 2001 From: Brian Zhao Date: Mon, 27 Jul 2020 20:27:53 -0700 Subject: [PATCH 1451/2522] Move adding concrete function inputs to the GetCallOp method. This means we don't expose captures on the C API surface, and instead marshaling of captures to function inputs becomes an implementation detail. This makes the integration with MLIR traced functions easier. Credit goes to Saurabh for this idea :) PiperOrigin-RevId: 323497623 Change-Id: Ib30b00520d59701e2ed868b68cc658ab00749245 --- .../c/experimental/saved_model/core/BUILD | 2 ++ .../saved_model/core/concrete_function.h | 6 +++--- .../saved_model/core/revived_types/BUILD | 2 ++ .../revived_types/tf_concrete_function.cc | 20 +++++++++++++------ .../core/revived_types/tf_concrete_function.h | 6 ++---- .../c/experimental/saved_model/internal/BUILD | 3 +++ .../saved_model/internal/concrete_function.cc | 20 ++++++++++++------- .../internal/saved_model_api_test.cc | 19 +++++------------- .../saved_model/public/concrete_function.h | 14 +++++++------ 9 files changed, 52 insertions(+), 40 deletions(-) diff --git a/tensorflow/c/experimental/saved_model/core/BUILD b/tensorflow/c/experimental/saved_model/core/BUILD index 38bdbee1fdc..8078758328c 100644 --- a/tensorflow/c/experimental/saved_model/core/BUILD +++ b/tensorflow/c/experimental/saved_model/core/BUILD @@ -26,6 +26,7 @@ cc_library( ":function_metadata", "//tensorflow/c/eager:immediate_execution_operation", "//tensorflow/c/eager:immediate_execution_tensor_handle", + "@com_google_absl//absl/types:span", ], ) @@ -146,6 +147,7 @@ cc_library( "//tensorflow/c/eager:immediate_execution_operation", "//tensorflow/c/eager:immediate_execution_tensor_handle", "//tensorflow/core:lib", + "@com_google_absl//absl/types:span", ], ) diff --git a/tensorflow/c/experimental/saved_model/core/concrete_function.h b/tensorflow/c/experimental/saved_model/core/concrete_function.h index 2cc627bcf27..da3a64b91a3 100644 --- a/tensorflow/c/experimental/saved_model/core/concrete_function.h +++ b/tensorflow/c/experimental/saved_model/core/concrete_function.h @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "absl/types/span.h" #include "tensorflow/c/eager/immediate_execution_operation.h" #include "tensorflow/c/eager/immediate_execution_tensor_handle.h" #include "tensorflow/c/experimental/saved_model/core/function_metadata.h" @@ -38,10 +39,9 @@ class ConcreteFunction { virtual ~ConcreteFunction() = default; // This method returns the "Call" Op used to execute the function. - virtual Status GetCallOp(ImmediateOpPtr* out) = 0; + virtual Status GetCallOp(absl::Span inputs, + ImmediateOpPtr* out) = 0; - virtual const std::vector& GetCaptures() - const = 0; virtual const FunctionMetadata& GetFunctionMetadata() const = 0; }; diff --git a/tensorflow/c/experimental/saved_model/core/revived_types/BUILD b/tensorflow/c/experimental/saved_model/core/revived_types/BUILD index 8bb15674db0..2b883618c87 100644 --- a/tensorflow/c/experimental/saved_model/core/revived_types/BUILD +++ b/tensorflow/c/experimental/saved_model/core/revived_types/BUILD @@ -69,6 +69,7 @@ cc_library( ], deps = [ ":tensorhandle_convertible", + "//tensorflow/c/eager:abstract_tensor_handle", "//tensorflow/c/eager:immediate_execution_context", "//tensorflow/c/eager:immediate_execution_operation", "//tensorflow/c/eager:immediate_execution_tensor_handle", @@ -77,5 +78,6 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/common_runtime/eager:context", + "@com_google_absl//absl/types:span", ], ) diff --git a/tensorflow/c/experimental/saved_model/core/revived_types/tf_concrete_function.cc b/tensorflow/c/experimental/saved_model/core/revived_types/tf_concrete_function.cc index aa6f0e7205e..f734f9eca66 100644 --- a/tensorflow/c/experimental/saved_model/core/revived_types/tf_concrete_function.cc +++ b/tensorflow/c/experimental/saved_model/core/revived_types/tf_concrete_function.cc @@ -18,6 +18,8 @@ limitations under the License. #include #include +#include "absl/types/span.h" +#include "tensorflow/c/eager/abstract_tensor_handle.h" #include "tensorflow/c/eager/immediate_execution_operation.h" #include "tensorflow/c/eager/immediate_execution_tensor_handle.h" #include "tensorflow/c/experimental/saved_model/core/revived_types/tensorhandle_convertible.h" @@ -60,16 +62,12 @@ Status TFConcreteFunction::Create( return Status(); } -const std::vector& -TFConcreteFunction::GetCaptures() const { - return captures_; -} - const FunctionMetadata& TFConcreteFunction::GetFunctionMetadata() const { return metadata_; } -Status TFConcreteFunction::GetCallOp(ImmediateOpPtr* out) { +Status TFConcreteFunction::GetCallOp( + absl::Span inputs, ImmediateOpPtr* out) { out->reset(ctx_->CreateOperation()); // In eager mode, TF2 python executes functions by constructing an op with // the name of the functiondef: @@ -81,6 +79,16 @@ Status TFConcreteFunction::GetCallOp(ImmediateOpPtr* out) { // PartitionedCallOp for compatibility with "tooling that assumes functions in // graphs are PartitionedCallOps". TF_RETURN_IF_ERROR((*out)->Reset(name_.c_str(), nullptr)); + + // Adding the user-provided inputs to the function. + TF_RETURN_IF_ERROR((*out)->AddInputList(inputs)); + + absl::Span captures( + reinterpret_cast(captures_.data()), + captures_.size()); + + // Adding the captures of the function. + TF_RETURN_IF_ERROR((*out)->AddInputList(captures)); return Status(); } diff --git a/tensorflow/c/experimental/saved_model/core/revived_types/tf_concrete_function.h b/tensorflow/c/experimental/saved_model/core/revived_types/tf_concrete_function.h index 71c8322414d..d38f3546f91 100644 --- a/tensorflow/c/experimental/saved_model/core/revived_types/tf_concrete_function.h +++ b/tensorflow/c/experimental/saved_model/core/revived_types/tf_concrete_function.h @@ -58,10 +58,8 @@ class TFConcreteFunction : public ConcreteFunction { std::unique_ptr* out); // This method returns the "Call" Op used to execute the function. - Status GetCallOp(ImmediateOpPtr* out) override; - - const std::vector& GetCaptures() - const override; + Status GetCallOp(absl::Span inputs, + ImmediateOpPtr* out) override; const FunctionMetadata& GetFunctionMetadata() const override; diff --git a/tensorflow/c/experimental/saved_model/internal/BUILD b/tensorflow/c/experimental/saved_model/internal/BUILD index b22718dfd04..60ca0134602 100644 --- a/tensorflow/c/experimental/saved_model/internal/BUILD +++ b/tensorflow/c/experimental/saved_model/internal/BUILD @@ -42,12 +42,15 @@ cc_library( ":tensorhandle_list_type", "//tensorflow/c:c_api_macros", "//tensorflow/c:tf_status_internal", + "//tensorflow/c/eager:abstract_tensor_handle", "//tensorflow/c/eager:c_api", "//tensorflow/c/eager:immediate_execution_operation", "//tensorflow/c/eager:tfe_op_internal", + "//tensorflow/c/eager:tfe_tensorhandle_internal", "//tensorflow/c/experimental/saved_model/core:concrete_function", "//tensorflow/c/experimental/saved_model/core:function_metadata", "//tensorflow/core:lib", + "@com_google_absl//absl/types:span", ], ) diff --git a/tensorflow/c/experimental/saved_model/internal/concrete_function.cc b/tensorflow/c/experimental/saved_model/internal/concrete_function.cc index 12d49212a88..9f421a7b9b7 100644 --- a/tensorflow/c/experimental/saved_model/internal/concrete_function.cc +++ b/tensorflow/c/experimental/saved_model/internal/concrete_function.cc @@ -15,8 +15,11 @@ limitations under the License. #include "tensorflow/c/experimental/saved_model/public/concrete_function.h" +#include "absl/types/span.h" +#include "tensorflow/c/eager/abstract_tensor_handle.h" #include "tensorflow/c/eager/immediate_execution_operation.h" #include "tensorflow/c/eager/tfe_op_internal.h" +#include "tensorflow/c/eager/tfe_tensorhandle_internal.h" #include "tensorflow/c/experimental/saved_model/core/concrete_function.h" #include "tensorflow/c/experimental/saved_model/core/function_metadata.h" #include "tensorflow/c/experimental/saved_model/internal/concrete_function_type.h" @@ -32,15 +35,18 @@ TF_FunctionMetadata* TF_ConcreteFunctionGetMetadata(TF_ConcreteFunction* func) { &tensorflow::unwrap(func)->GetFunctionMetadata())); } -const TF_TensorHandleList* TF_ConcreteFunctionGetCaptures( - TF_ConcreteFunction* func) { - return tensorflow::wrap(&tensorflow::unwrap(func)->GetCaptures()); -} - TFE_Op* TF_ConcreteFunctionGetCallOp(TF_ConcreteFunction* func, + TFE_TensorHandle** inputs, int num_inputs, TF_Status* status) { - tensorflow::ImmediateOpPtr call_op(nullptr); - status->status = tensorflow::unwrap(func)->GetCallOp(&call_op); + tensorflow::ImmediateOpPtr call_op; + absl::Span input_span( + reinterpret_cast( + tensorflow::unwrap(inputs)), + static_cast(num_inputs)); + status->status = tensorflow::unwrap(func)->GetCallOp(input_span, &call_op); + if (!status->status.ok()) { + return nullptr; + } return tensorflow::wrap(call_op.release()); } diff --git a/tensorflow/c/experimental/saved_model/internal/saved_model_api_test.cc b/tensorflow/c/experimental/saved_model/internal/saved_model_api_test.cc index 3d490fe7e08..10b5677a48b 100644 --- a/tensorflow/c/experimental/saved_model/internal/saved_model_api_test.cc +++ b/tensorflow/c/experimental/saved_model/internal/saved_model_api_test.cc @@ -102,27 +102,18 @@ TEST_P(CSavedModelAPITest, LoadsSavedModel) { TF_GetSavedModelConcreteFunction(saved_model, "compute", status); EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); - TFE_Op* compute_fn_op = TF_ConcreteFunctionGetCallOp(compute_fn, status); - EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); - - const TF_TensorHandleList* captures = - TF_ConcreteFunctionGetCaptures(compute_fn); - - // TODO(bmzhao): Finish API on FunctionMetadata args, so we know how many - // inputs + outputs a function has. std::vector compute_fn_inputs; TFE_TensorHandle* input_a = TestScalarTensorHandle(ctx, 2.0f); TFE_TensorHandle* input_b = TestScalarTensorHandle(ctx, 1.0f); - compute_fn_inputs.reserve(2 + TF_TensorHandleListSize(captures)); compute_fn_inputs.push_back(input_a); compute_fn_inputs.push_back(input_b); - for (int i = 0; i < TF_TensorHandleListSize(captures); ++i) { - compute_fn_inputs.push_back(TF_TensorHandleListGet(captures, i)); - } - TFE_OpAddInputList(compute_fn_op, compute_fn_inputs.data(), - compute_fn_inputs.size(), status); + + TFE_Op* compute_fn_op = TF_ConcreteFunctionGetCallOp( + compute_fn, compute_fn_inputs.data(), compute_fn_inputs.size(), status); EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); + // TODO(bmzhao): Finish API on FunctionMetadata args, so we know how many + // inputs + outputs a function has. TFE_TensorHandle* compute_fn_outputs[1] = {nullptr}; int num_retvals = 1; diff --git a/tensorflow/c/experimental/saved_model/public/concrete_function.h b/tensorflow/c/experimental/saved_model/public/concrete_function.h index 944ddecea16..4cc2a4b4f05 100644 --- a/tensorflow/c/experimental/saved_model/public/concrete_function.h +++ b/tensorflow/c/experimental/saved_model/public/concrete_function.h @@ -35,13 +35,15 @@ typedef struct TF_ConcreteFunction TF_ConcreteFunction; TF_CAPI_EXPORT extern TF_FunctionMetadata* TF_ConcreteFunctionGetMetadata( TF_ConcreteFunction* func); -// Returns a list of TensorHandles implicitly captured by this function. -TF_CAPI_EXPORT extern const TF_TensorHandleList* TF_ConcreteFunctionGetCaptures( - TF_ConcreteFunction* func); - -// Returns a TFE_Op suitable for executing this function. +// Returns a TFE_Op suitable for executing this function. Caller must provide +// all function inputs in `inputs`, and must not add any additional inputs on +// the returned op. (i.e. don't call TFE_OpAddInput or TFE_OpAddInputList). +// The caller is responsible for deleting the returned TFE_Op. If op +// construction fails, `status` will be non-OK and the returned pointer will be +// null. TF_CAPI_EXPORT extern TFE_Op* TF_ConcreteFunctionGetCallOp( - TF_ConcreteFunction* func, TF_Status* status); + TF_ConcreteFunction* func, TFE_TensorHandle** inputs, int num_inputs, + TF_Status* status); #ifdef __cplusplus } // end extern "C" From 22f8998de295d0d24501732bd8bbade7ac770b03 Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Mon, 27 Jul 2020 21:55:50 -0700 Subject: [PATCH 1452/2522] [XLA] Add mixed precision dot and convolution support to the HLO evaluator. PiperOrigin-RevId: 323506310 Change-Id: Ibaeead976b891776cb530864e30d088ab4425fa3 --- .../xla/service/algebraic_simplifier.cc | 23 ++- .../xla/service/hlo_evaluator_typed_visitor.h | 143 +++++++++++++----- .../compiler/xla/service/shape_inference.cc | 4 +- 3 files changed, 124 insertions(+), 46 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 3ea516674b6..1f82c062df9 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -2101,11 +2101,8 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { AsInt64Slice(dot->dot_dimension_numbers().lhs_batch_dimensions()), AsInt64Slice( dot->dot_dimension_numbers().lhs_contracting_dimensions()))); - if (dot->shape().rank() != lhs->shape().rank()) { - std::vector lhs_broadcast_dims(lhs->shape().rank()); - absl::c_iota(lhs_broadcast_dims, 0); - new_lhs = computation_->AddInstruction(HloInstruction::CreateBroadcast( - dot->shape(), new_lhs, lhs_broadcast_dims)); + if (!ShapeUtil::SameElementType(dot->shape(), new_lhs->shape())) { + new_lhs = MakeConvertToHlo(new_lhs, dot->shape().element_type()); } TF_ASSIGN_OR_RETURN( HloInstruction * new_rhs, @@ -2114,6 +2111,15 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { AsInt64Slice(dot->dot_dimension_numbers().rhs_batch_dimensions()), AsInt64Slice( dot->dot_dimension_numbers().rhs_contracting_dimensions()))); + if (!ShapeUtil::SameElementType(dot->shape(), new_rhs->shape())) { + new_rhs = MakeConvertToHlo(new_rhs, dot->shape().element_type()); + } + if (dot->shape().rank() != lhs->shape().rank()) { + std::vector lhs_broadcast_dims(lhs->shape().rank()); + absl::c_iota(lhs_broadcast_dims, 0); + new_lhs = computation_->AddInstruction(HloInstruction::CreateBroadcast( + dot->shape(), new_lhs, lhs_broadcast_dims)); + } if (dot->shape().rank() != rhs->shape().rank()) { std::vector rhs_broadcast_dims( dot->dot_dimension_numbers().lhs_batch_dimensions_size()); @@ -2145,6 +2151,10 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { AsInt64Slice(dot->dot_dimension_numbers().lhs_batch_dimensions()), AsInt64Slice( dot->dot_dimension_numbers().lhs_contracting_dimensions()))); + if (!ShapeUtil::SameElementType(dot->shape(), new_lhs->shape())) { + new_lhs = MakeConvertToHlo(new_lhs, dot->shape().element_type()); + } + TF_ASSIGN_OR_RETURN( HloInstruction * new_rhs, NormalizeDotOperandToBatchMajorAndContractingMinor( @@ -2152,6 +2162,9 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { AsInt64Slice(dot->dot_dimension_numbers().rhs_batch_dimensions()), AsInt64Slice( dot->dot_dimension_numbers().rhs_contracting_dimensions()))); + if (!ShapeUtil::SameElementType(dot->shape(), new_rhs->shape())) { + new_rhs = MakeConvertToHlo(new_rhs, dot->shape().element_type()); + } int64 lhs_outer_dims = lhs->shape().rank() - diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h index 1a154f32a6f..250e2cf1f08 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h @@ -27,6 +27,7 @@ limitations under the License. #include "absl/meta/type_traits.h" #include "absl/types/optional.h" #include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/hlo_casting_utils.h" #include "tensorflow/compiler/xla/service/hlo_evaluator.h" @@ -1076,13 +1077,13 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { return Status::OK(); } - Status HandleConvolution(HloInstruction* conv) override { - auto lhs = conv->operand(0); - auto rhs = conv->operand(1); + Status HandleConvolutionWithLiterals(HloInstruction* conv, + const Literal& lhs_literal, + const Literal& rhs_literal) { const auto& window = conv->window(); const Shape& result_shape = conv->shape(); - const Shape& lhs_shape = lhs->shape(); - const Shape& rhs_shape = rhs->shape(); + const Shape& lhs_shape = lhs_literal.shape(); + const Shape& rhs_shape = rhs_literal.shape(); TF_CHECK_OK(ShapeUtil::ValidateShape(lhs_shape)); TF_CHECK_OK(ShapeUtil::ValidateShape(rhs_shape)); @@ -1098,24 +1099,6 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { CHECK_GE(num_spatial_dims, 0); CHECK_EQ(window.dimensions_size(), num_spatial_dims); - const auto lhs_rank = lhs_shape.rank(); - const auto rhs_rank = rhs_shape.rank(); - - CHECK_EQ(num_spatial_dims + 2, lhs_rank); - CHECK_EQ(num_spatial_dims + 2, rhs_rank); - - TF_ASSIGN_OR_RETURN(auto inferred_return_shape, - ShapeInference::InferConvolveShape( - lhs_shape, rhs_shape, conv->feature_group_count(), - conv->batch_group_count(), window, dnums)); - CHECK(ShapeUtil::Compatible(result_shape, inferred_return_shape)) - << "return shape set to: " << ShapeUtil::HumanString(result_shape) - << " but is inferred to be: " - << ShapeUtil::HumanString(inferred_return_shape); - - const Literal& lhs_literal = parent_->GetEvaluatedLiteralFor(lhs); - const Literal& rhs_literal = parent_->GetEvaluatedLiteralFor(rhs); - std::vector window_dimension_sizes; for (auto i : dnums.kernel_spatial_dimensions()) { window_dimension_sizes.push_back(ShapeUtil::GetDimension(rhs_shape, i)); @@ -1271,9 +1254,68 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { return Status::OK(); } + Status HandleConvolution(HloInstruction* conv) override { + auto lhs = conv->operand(0); + auto rhs = conv->operand(1); + const auto& window = conv->window(); + const Shape& result_shape = conv->shape(); + const Shape& lhs_shape = lhs->shape(); + const Shape& rhs_shape = rhs->shape(); + + TF_CHECK_OK(ShapeUtil::ValidateShape(lhs_shape)); + TF_CHECK_OK(ShapeUtil::ValidateShape(rhs_shape)); + CHECK(lhs_shape.IsArray()); + CHECK(rhs_shape.IsArray()); + + const auto& dnums = conv->convolution_dimension_numbers(); + const int64 num_spatial_dims = dnums.output_spatial_dimensions_size(); + CHECK_EQ(num_spatial_dims, dnums.input_spatial_dimensions_size()); + CHECK_EQ(num_spatial_dims, dnums.kernel_spatial_dimensions_size()); + CHECK_GE(num_spatial_dims, 0); + CHECK_EQ(window.dimensions_size(), num_spatial_dims); + + const auto lhs_rank = lhs_shape.rank(); + const auto rhs_rank = rhs_shape.rank(); + + CHECK_EQ(num_spatial_dims + 2, lhs_rank); + CHECK_EQ(num_spatial_dims + 2, rhs_rank); + + TF_ASSIGN_OR_RETURN(auto inferred_return_shape, + ShapeInference::InferConvolveShape( + lhs_shape, rhs_shape, conv->feature_group_count(), + conv->batch_group_count(), window, dnums)); + CHECK(ShapeUtil::Compatible(result_shape, inferred_return_shape)) + << "return shape set to: " << ShapeUtil::HumanString(result_shape) + << " but is inferred to be: " + << ShapeUtil::HumanString(inferred_return_shape); + + const Literal& lhs_literal = parent_->GetEvaluatedLiteralFor(lhs); + const Literal& rhs_literal = parent_->GetEvaluatedLiteralFor(rhs); + const bool lhs_same = ShapeUtil::SameElementType(lhs_shape, result_shape); + const bool rhs_same = ShapeUtil::SameElementType(rhs_shape, result_shape); + if (rhs_same && lhs_same) { + return HandleConvolutionWithLiterals(conv, lhs_literal, rhs_literal); + } + if (rhs_same) { + return HandleConvolutionWithLiterals( + conv, lhs_literal.Convert(result_shape.element_type()).ValueOrDie(), + rhs_literal); + } + if (lhs_same) { + return HandleConvolutionWithLiterals( + conv, lhs_literal, + rhs_literal.Convert(result_shape.element_type()).ValueOrDie()); + } + return HandleConvolutionWithLiterals( + conv, lhs_literal.Convert(result_shape.element_type()).ValueOrDie(), + rhs_literal.Convert(result_shape.element_type()).ValueOrDie()); + } + Status HandleDot(HloInstruction* dot) override { if (dot->dot_dimension_numbers().rhs_contracting_dimensions_size() == 1 && - parent_->use_fast_path_) { + parent_->use_fast_path_ && + ShapeUtil::SameElementType(dot->operand(0)->shape(), dot->shape()) && + ShapeUtil::SameElementType(dot->operand(1)->shape(), dot->shape())) { return HandleDot(dot); } return HandleDotSlowPath(dot); @@ -1342,23 +1384,16 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { return HandleDotSlowPath(dot); } - Status HandleDotSlowPath(HloInstruction* dot) { - auto lhs = dot->operand(0); - auto rhs = dot->operand(1); - CHECK(dot->shape().IsArray()); - CHECK(lhs->shape().IsArray()); - CHECK(rhs->shape().IsArray()); - + Status HandleDotSlowPathWithLiterals(HloInstruction* dot, + const Literal& lhs_literal, + const Literal& rhs_literal) { const auto& dnums = dot->dot_dimension_numbers(); - const auto lhs_rank = lhs->shape().rank(); - const auto rhs_rank = rhs->shape().rank(); + const auto lhs_rank = lhs_literal.shape().rank(); + const auto rhs_rank = rhs_literal.shape().rank(); - CHECK(ShapeUtil::SameElementType(lhs->shape(), rhs->shape())); - CHECK(ShapeUtil::SameElementType(lhs->shape(), dot->shape())); - - const Literal& lhs_literal = parent_->GetEvaluatedLiteralFor(lhs); - const Literal& rhs_literal = parent_->GetEvaluatedLiteralFor(rhs); + CHECK(ShapeUtil::SameElementType(lhs_literal.shape(), rhs_literal.shape())); + CHECK(ShapeUtil::SameElementType(lhs_literal.shape(), dot->shape())); CHECK_EQ(dnums.lhs_batch_dimensions_size(), dnums.rhs_batch_dimensions_size()); @@ -1406,7 +1441,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { const int64 rhs_dnum = dnums.rhs_contracting_dimensions(i); accumulate_index_locations.push_back( {&lhs_index[lhs_dnum], &rhs_index[rhs_dnum]}); - const int64 dim_size = lhs->shape().dimensions(lhs_dnum); + const int64 dim_size = lhs_literal.shape().dimensions(lhs_dnum); accumulate_index_sizes.push_back(dim_size); } const int64 total_contraction_size = Product(accumulate_index_sizes); @@ -1457,6 +1492,36 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { return Status::OK(); } + Status HandleDotSlowPath(HloInstruction* dot) { + auto lhs = dot->operand(0); + auto rhs = dot->operand(1); + CHECK(dot->shape().IsArray()); + CHECK(lhs->shape().IsArray()); + CHECK(rhs->shape().IsArray()); + const bool lhs_same = + ShapeUtil::SameElementType(lhs->shape(), dot->shape()); + const bool rhs_same = + ShapeUtil::SameElementType(rhs->shape(), dot->shape()); + const Literal& lhs_literal = parent_->GetEvaluatedLiteralFor(lhs); + const Literal& rhs_literal = parent_->GetEvaluatedLiteralFor(rhs); + if (lhs_same && rhs_same) { + return HandleDotSlowPathWithLiterals(dot, lhs_literal, rhs_literal); + } + if (lhs_same) { + return HandleDotSlowPathWithLiterals( + dot, lhs_literal, + rhs_literal.Convert(dot->shape().element_type()).ValueOrDie()); + } + if (rhs_same) { + return HandleDotSlowPathWithLiterals( + dot, lhs_literal.Convert(dot->shape().element_type()).ValueOrDie(), + rhs_literal); + } + return HandleDotSlowPathWithLiterals( + dot, lhs_literal.Convert(dot->shape().element_type()).ValueOrDie(), + rhs_literal.Convert(dot->shape().element_type()).ValueOrDie()); + } + Status HandlePad(HloInstruction* pad) override { CHECK(pad->operand(0)->shape().IsArray()); // Padding value must be scalar. diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 29a728c068e..8e39e32e4c3 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -949,18 +949,18 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation, TF_RETURN_IF_ERROR(ExpectArray( rhs, absl::StrCat("rhs of binary operation ", HloOpcodeString(opcode)))); switch (opcode) { + case HloOpcode::kAdd: case HloOpcode::kMaximum: case HloOpcode::kMinimum: + case HloOpcode::kMultiply: return InferElementwiseBinaryOpShape(opcode, lhs, rhs, broadcast_dimensions); case HloOpcode::kSubtract: - case HloOpcode::kAdd: case HloOpcode::kAtan2: case HloOpcode::kPower: case HloOpcode::kDivide: case HloOpcode::kRemainder: - case HloOpcode::kMultiply: case HloOpcode::kShiftLeft: case HloOpcode::kShiftRightArithmetic: case HloOpcode::kShiftRightLogical: From 8443d693c053e0e058f677e3c2c00c6fd0b67f6b Mon Sep 17 00:00:00 2001 From: Anna R Date: Mon, 27 Jul 2020 21:58:24 -0700 Subject: [PATCH 1453/2522] Update ops-related pbtxt files. PiperOrigin-RevId: 323506525 Change-Id: I7680f880a1d40484429287b1c11e56f7e532090a --- .../ops_history_v2/OptimizeDatasetV2.pbtxt | 43 ------------------- tensorflow/core/ops/ops.pbtxt | 43 ------------------- 2 files changed, 86 deletions(-) delete mode 100644 tensorflow/core/ops/compat/ops_history_v2/OptimizeDatasetV2.pbtxt diff --git a/tensorflow/core/ops/compat/ops_history_v2/OptimizeDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OptimizeDatasetV2.pbtxt deleted file mode 100644 index ee43df5bfd7..00000000000 --- a/tensorflow/core/ops/compat/ops_history_v2/OptimizeDatasetV2.pbtxt +++ /dev/null @@ -1,43 +0,0 @@ -op { - name: "OptimizeDatasetV2" - input_arg { - name: "input_dataset" - type: DT_VARIANT - } - input_arg { - name: "optimizations_enabled" - type: DT_STRING - } - input_arg { - name: "optimizations_disabled" - type: DT_STRING - } - input_arg { - name: "optimizations_default" - type: DT_STRING - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } - attr { - name: "optimization_configs" - type: "list(string)" - default_value { - list { - } - } - } -} diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index a9a94580d86..fed598bdef4 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -26317,49 +26317,6 @@ op { } } } -op { - name: "OptimizeDatasetV2" - input_arg { - name: "input_dataset" - type: DT_VARIANT - } - input_arg { - name: "optimizations_enabled" - type: DT_STRING - } - input_arg { - name: "optimizations_disabled" - type: DT_STRING - } - input_arg { - name: "optimizations_default" - type: DT_STRING - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 - } - attr { - name: "optimization_configs" - type: "list(string)" - default_value { - list { - } - } - } -} op { name: "OptionalFromValue" input_arg { From 83870cb25fa07fac8900aa9da7ecc7823f15cbcf Mon Sep 17 00:00:00 2001 From: Ayush Dubey Date: Mon, 27 Jul 2020 21:58:46 -0700 Subject: [PATCH 1454/2522] Share ownership of CollectiveImplementation in `BaseCollectiveExecutor::ExecuteAsync`. Without this change, we would sometimes get a segfault for a short-running collective. The reason is that the CUDA kernel can complete and trigger the done callback, which deletes `col_impl` object, before the TF kernel has finished execution. This change also updates collective_ops_gpu_test to use EagerContext for checking number of available GPUs. Resolves #41113. PiperOrigin-RevId: 323506550 Change-Id: I414f2b79875f51963e7d5e2b454e106d88b511be --- .../base_collective_executor.cc | 8 +- .../hierarchical_tree_broadcaster_test.cc | 13 +- .../core/common_runtime/ring_gatherer_test.cc | 12 +- .../core/common_runtime/ring_reducer_test.cc | 14 +- tensorflow/core/framework/collective.h | 2 +- tensorflow/python/BUILD | 4 +- .../python/ops/collective_ops_gpu_test.py | 215 +++++++++--------- 7 files changed, 134 insertions(+), 134 deletions(-) diff --git a/tensorflow/core/common_runtime/base_collective_executor.cc b/tensorflow/core/common_runtime/base_collective_executor.cc index cd46ab76b62..80820c9022c 100644 --- a/tensorflow/core/common_runtime/base_collective_executor.cc +++ b/tensorflow/core/common_runtime/base_collective_executor.cc @@ -33,6 +33,7 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/refcount.h" #include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/lib/traceme.h" @@ -265,18 +266,20 @@ void BaseCollectiveExecutor::ExecuteAsync(OpKernelContext* ctx, DCHECK_EQ(nullptr, col_impl); return; } + core::ScopedUnref unref(col_impl); auto col_ctx = std::make_shared( this, dev_mgr_, ctx, CtxParams(ctx), col_params, exec_key, step_id_, input, output); status = col_impl->InitializeCollectiveContext(col_ctx); if (!status.ok()) { done_safe(status); - delete col_impl; return; } // Run on an unbounded work queue that can handle blocking work so as to not // starve executor threads. + col_impl->Ref(); remote_access_->RunClosure([col_impl, col_ctx, done_safe, ctx]() { + core::ScopedUnref unref(col_impl); profiler::TraceMe activity( [ctx] { string op = profiler::TraceMeOp(ctx->op_kernel().name_view(), @@ -285,9 +288,10 @@ void BaseCollectiveExecutor::ExecuteAsync(OpKernelContext* ctx, {{"id", ctx->step_id()}}); }, profiler::TraceMeLevel::kInfo); + col_impl->Ref(); col_impl->Run([col_impl, col_ctx, done_safe](const Status& s) { + core::ScopedUnref unref(col_impl); done_safe(s); - delete col_impl; }); }); } diff --git a/tensorflow/core/common_runtime/hierarchical_tree_broadcaster_test.cc b/tensorflow/core/common_runtime/hierarchical_tree_broadcaster_test.cc index 333a70adc27..1a98a9adbb8 100644 --- a/tensorflow/core/common_runtime/hierarchical_tree_broadcaster_test.cc +++ b/tensorflow/core/common_runtime/hierarchical_tree_broadcaster_test.cc @@ -518,8 +518,9 @@ class HierarchicalTreeBroadcasterTest : public ::testing::Test { cp->subdiv_rank.clear(); cp->instance.impl_details.subdiv_source_rank.clear(); // Create a stub broadcaster only for testing param initialization. - HierarchicalTreeBroadcaster broadcaster; - TF_CHECK_OK(broadcaster.InitializeCollectiveParams(cp)); + HierarchicalTreeBroadcaster* broadcaster = new HierarchicalTreeBroadcaster; + core::ScopedUnref unref(broadcaster); + TF_CHECK_OK(broadcaster->InitializeCollectiveParams(cp)); EXPECT_EQ(expected_subdiv_perms, cp->instance.impl_details.subdiv_permutations); EXPECT_EQ(expected_subdiv_rank, cp->subdiv_rank); @@ -669,14 +670,16 @@ class HierarchicalTreeBroadcasterTest : public ::testing::Test { // Prepare a Broadcaster instance. string exec_key = strings::StrCat(col_params_.instance.instance_key, ":0:0"); - HierarchicalTreeBroadcaster broadcaster; + HierarchicalTreeBroadcaster* broadcaster = + new HierarchicalTreeBroadcaster; + core::ScopedUnref unref(broadcaster); auto col_ctx = std::make_shared( parent_->col_exec_, parent_->dev_mgr_.get(), &ctx, &op_params, col_params_, exec_key, kStepId, input_tensor_ptr, output_tensor_ptr); - TF_CHECK_OK(broadcaster.InitializeCollectiveContext(col_ctx)); + TF_CHECK_OK(broadcaster->InitializeCollectiveContext(col_ctx)); // Run the broadcast. - broadcaster.Run([this](Status s) { status_ = s; }); + broadcaster->Run([this](Status s) { status_ = s; }); if (status_.ok()) { CHECK(tensor_.CopyFrom(*ctx.mutable_output(0), tensor_.shape())); } diff --git a/tensorflow/core/common_runtime/ring_gatherer_test.cc b/tensorflow/core/common_runtime/ring_gatherer_test.cc index 124965b6c6a..3e70f523ff5 100644 --- a/tensorflow/core/common_runtime/ring_gatherer_test.cc +++ b/tensorflow/core/common_runtime/ring_gatherer_test.cc @@ -369,8 +369,9 @@ class RingGathererTest : public ::testing::Test { cp->instance.impl_details.subdiv_permutations.clear(); cp->subdiv_rank.clear(); // Create a stub ring gatherer only for testing param initialization. - RingGatherer gatherer; - TF_CHECK_OK(gatherer.InitializeCollectiveParams(cp)); + RingGatherer* gatherer = new RingGatherer; + core::ScopedUnref unref(gatherer); + TF_CHECK_OK(gatherer->InitializeCollectiveParams(cp)); EXPECT_EQ(expected_subdiv_perms, cp->instance.impl_details.subdiv_permutations); EXPECT_EQ(expected_subdiv_rank, cp->subdiv_rank); @@ -476,14 +477,15 @@ class RingGathererTest : public ::testing::Test { // Prepare a RingGatherer instance. string exec_key = strings::StrCat(col_params_.instance.instance_key, ":0:0"); - RingGatherer gatherer; + RingGatherer* gatherer = new RingGatherer; + core::ScopedUnref unref(gatherer); auto col_ctx = std::make_shared( parent_->col_exec_, parent_->dev_mgr_.get(), &ctx, &op_params, col_params_, exec_key, kStepId, &input_tensor_, output_tensor_ptr); - TF_CHECK_OK(gatherer.InitializeCollectiveContext(col_ctx)); + TF_CHECK_OK(gatherer->InitializeCollectiveContext(col_ctx)); // Run the all-gather. - gatherer.Run([this](Status s) { status_ = s; }); + gatherer->Run([this](Status s) { status_ = s; }); if (status_.ok()) { CHECK(output_tensor_.CopyFrom(*ctx.mutable_output(0), ctx.mutable_output(0)->shape())); diff --git a/tensorflow/core/common_runtime/ring_reducer_test.cc b/tensorflow/core/common_runtime/ring_reducer_test.cc index 678153c3603..a7f99cf0f45 100644 --- a/tensorflow/core/common_runtime/ring_reducer_test.cc +++ b/tensorflow/core/common_runtime/ring_reducer_test.cc @@ -393,12 +393,13 @@ class RingReducerTest : public ::testing::Test { cp->instance.impl_details.subdiv_permutations.clear(); cp->subdiv_rank.clear(); // Create a stub ring reducer only for testing param initialization. - RingReducer reducer; - TF_CHECK_OK(reducer.InitializeCollectiveParams(cp)); + RingReducer* reducer = new RingReducer; + core::ScopedUnref unref(reducer); + TF_CHECK_OK(reducer->InitializeCollectiveParams(cp)); EXPECT_EQ(expected_subdiv_perms, cp->instance.impl_details.subdiv_permutations); EXPECT_EQ(expected_subdiv_rank, cp->subdiv_rank); - reducer.group_size_tensor_ready_.Notify(); // To unblock destructor. + reducer->group_size_tensor_ready_.Notify(); // To unblock destructor. } class DeviceInstance { @@ -506,14 +507,15 @@ class RingReducerTest : public ::testing::Test { // Prepare a RingReducer instance. string exec_key = strings::StrCat(col_params_.instance.instance_key, ":0:0"); - RingReducer reducer; + RingReducer* reducer = new RingReducer; + core::ScopedUnref unref(reducer); auto col_ctx = std::make_shared( parent_->col_exec_, parent_->dev_mgr_.get(), &ctx, &op_params, col_params_, exec_key, kStepId, &tensor_, &tensor_); - TF_CHECK_OK(reducer.InitializeCollectiveContext(col_ctx)); + TF_CHECK_OK(reducer->InitializeCollectiveContext(col_ctx)); // Run the all-reduce. - reducer.Run([this](Status s) { status_ = s; }); + reducer->Run([this](Status s) { status_ = s; }); if (status_.ok()) { CHECK(tensor_.CopyFrom(*ctx.mutable_output(0), tensor_.shape())); } diff --git a/tensorflow/core/framework/collective.h b/tensorflow/core/framework/collective.h index 72920cfaa08..94e83fa2f08 100644 --- a/tensorflow/core/framework/collective.h +++ b/tensorflow/core/framework/collective.h @@ -384,7 +384,7 @@ class CollectiveContext { // implement this interface and register the implementation via the // CollectiveRegistry detailed below. See common_runtime/ring_reducer and // common_runtime/hierarchical_tree_broadcaster for examples. -class CollectiveImplementationInterface { +class CollectiveImplementationInterface : public core::RefCounted { public: virtual ~CollectiveImplementationInterface() = default; diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 766bc35e4bd..0c31a32531d 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3346,11 +3346,9 @@ cuda_py_test( python_version = "PY3", tags = [ "guitar", - "manual", "multi_gpu", - "no_oss", "no_rocm", - "notap", + "no_windows", ], deps = [ ":client_testlib", diff --git a/tensorflow/python/ops/collective_ops_gpu_test.py b/tensorflow/python/ops/collective_ops_gpu_test.py index efa97bd9555..87758a314b2 100644 --- a/tensorflow/python/ops/collective_ops_gpu_test.py +++ b/tensorflow/python/ops/collective_ops_gpu_test.py @@ -20,7 +20,6 @@ from __future__ import print_function import os -from tensorflow.core.protobuf import config_pb2 from tensorflow.python.eager import context from tensorflow.python.eager import def_function from tensorflow.python.framework import config @@ -28,10 +27,8 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops -from tensorflow.python.framework import test_util from tensorflow.python.ops import collective_ops from tensorflow.python.platform import test -from tensorflow.python.platform import tf_logging as logging class CollectiveOpGPUTest(test.TestCase): @@ -43,232 +40,217 @@ class CollectiveOpGPUTest(test.TestCase): # Group size is the number of devices in a group communicating collectively. # This will be passed into the collective ops in the tests below. cls._group_size = 2 + cls._devices = ['/device:GPU:{}'.format(i) for i in range(2)] os.environ['NCCL_DEBUG'] = 'INFO' os.environ['NCCL_LAUNCH_MODE'] = 'PARALLEL' - def _configure(self, set_config_proto_nccl=True): - """Return `ConfigProto` for NCCL execution.""" - experimental = config_pb2.ConfigProto.Experimental() - if set_config_proto_nccl: - experimental.collective_nccl = True - return config_pb2.ConfigProto(experimental=experimental) - - def _ensure_context_initialized(self): + def _setup_context(self, num_gpus=2): + context._reset_context() gpus = config.list_physical_devices('GPU') - if len(gpus) < 2: - self.skipTest('Expected at least 2 GPUs but found {} GPUs'.format( - len(gpus))) + if len(gpus) < num_gpus: + self.skipTest('Expected at least {} GPUs but found {} GPUs'.format( + num_gpus, len(gpus))) context.ensure_initialized() def testBasicNcclAllReduce(self): + self._setup_context() + inputs = [[0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1], [0.3, 1.3, 2.3, 3.3, 4.3, 5.3, 6.3, 7.3]] expected = [0.2, 1.2, 2.2, 3.2, 4.2, 5.2, 6.2, 7.2] group_key = 1 instance_key = 1 - devices = ['/GPU:{}'.format(i) for i in range(self._group_size)] - # Tests that execute collectives need to be enclosed in graph or tf.function - with ops.Graph().as_default(), self.session( - config=self._configure()) as sess: - if not test_util.is_gpu_available(cuda_only=True): - self.skipTest('No GPU available') + @def_function.function + def run_basic_all_reduce(): collectives = [] for i in range(self._group_size): - with ops.device(devices[i]): + with ops.device(self._devices[i]): t = constant_op.constant(inputs[i]) collectives.append(collective_ops.all_reduce( t, self._group_size, group_key, instance_key, 'Add', 'Div')) - results = sess.run(collectives) - for result in results: + return collectives + + for result in run_basic_all_reduce(): self.assertAllClose(result, expected, rtol=1e-5, atol=1e-5) def testInt32Error(self): + self._setup_context() + inputs = [[0, 1], [2, 3]] group_key = 1 instance_key = 50 - devices = ['/GPU:{}'.format(i) for i in range(self._group_size)] - # Tests that execute collectives need to be enclosed in graph or tf.function - with ops.Graph().as_default(), self.session( - config=self._configure()) as sess: - if not test_util.is_gpu_available(cuda_only=True): - self.skipTest('No GPU available') - collectives = [] + @def_function.function + def run_int32_error(): for i in range(self._group_size): - with ops.device(devices[i]): + with ops.device(self._devices[i]): t = constant_op.constant(inputs[i], dtype=dtypes.int32) - collectives.append(collective_ops.all_reduce( - t, self._group_size, group_key, instance_key, 'Add', 'Div')) - with self.assertRaisesRegex( - errors.InternalError, - 'does not support datatype DT_INT32 on DEVICE_GPU'): - sess.run(collectives) + collective_ops.all_reduce( + t, self._group_size, group_key, instance_key, 'Add', 'Div') + + with self.assertRaisesRegex( + errors.InternalError, + 'does not support datatype DT_INT32 on DEVICE_GPU'): + run_int32_error() def testFp16Reduce(self): + self._setup_context() + inputs = [[0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1], [0.3, 1.3, 2.3, 3.3, 4.3, 5.3, 6.3, 7.3]] expected = [0.2, 1.2, 2.2, 3.2, 4.2, 5.2, 6.2, 7.2] group_key = 1 instance_key = 100 - devices = ['/GPU:{}'.format(i) for i in range(self._group_size)] - with ops.Graph().as_default(), self.session( - config=self._configure()) as sess: - if not test_util.is_gpu_available(cuda_only=True): - self.skipTest('No GPU available') + @def_function.function + def run_fp16_reduce(): collectives = [] for i in range(self._group_size): - with ops.device(devices[i]): + with ops.device(self._devices[i]): t = constant_op.constant(inputs[i], dtype=dtypes.float16) collectives.append(collective_ops.all_reduce( t, self._group_size, group_key, instance_key, 'Add', 'Div')) - results = sess.run(collectives) - for result in results: - logging.info('i {} result {} expected {}'.format(i, results[i], expected)) + return collectives + + for result in run_fp16_reduce(): self.assertAllClose(result, expected, rtol=1e-3, atol=1e-3) def testNcclHintAllReduce(self): + self._setup_context() + inputs = [[0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1], [0.3, 1.3, 2.3, 3.3, 4.3, 5.3, 6.3, 7.3]] expected = [0.2, 1.2, 2.2, 3.2, 4.2, 5.2, 6.2, 7.2] group_key = 1 instance_key = 1 - devices = ['/GPU:{}'.format(i) for i in range(self._group_size)] - with ops.Graph().as_default(), self.session( - config=self._configure(set_config_proto_nccl=False)) as sess: - if not test_util.is_gpu_available(cuda_only=True): - self.skipTest('No GPU available') + @def_function.function + def run_nccl_hint_all_reduce(): collectives = [] for i in range(self._group_size): - with ops.device(devices[i]): + with ops.device(self._devices[i]): t = constant_op.constant(inputs[i]) collectives.append(collective_ops.all_reduce( t, self._group_size, group_key, instance_key, 'Add', 'Div', communication_hint='nccl')) - results = sess.run(collectives) - for result in results: + return collectives + + for result in run_nccl_hint_all_reduce(): self.assertAllClose(result, expected, rtol=1e-5, atol=1e-5) def testBasicNcclBroadcast(self): + self._setup_context() + tensor_value = [0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1] group_key = 1 instance_key = 1 - devices = ['/GPU:{}'.format(i) for i in range(self._group_size)] - with ops.Graph().as_default(), self.session( - config=self._configure()) as sess: - if not test_util.is_gpu_available(cuda_only=True): - self.skipTest('No GPU available') + @def_function.function + def run_basic_nccl_broadcast(): collectives = [] - with ops.device(devices[0]): + with ops.device(self._devices[0]): t = constant_op.constant(tensor_value) collectives.append(collective_ops.broadcast_send( t, t.shape, t.dtype, self._group_size, group_key, instance_key)) - with ops.device(devices[1]): + with ops.device(self._devices[1]): t = constant_op.constant(tensor_value) collectives.append(collective_ops.broadcast_recv( t.shape, t.dtype, self._group_size, group_key, instance_key)) - results = sess.run(collectives) - for result in results: + return collectives + + for result in run_basic_nccl_broadcast(): self.assertAllClose(result, tensor_value, rtol=1e-5, atol=1e-5) def testNcclBroadcastDoubleRecv(self): + self._setup_context() + tensor_value = [0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1] group_key = 1 instance_key = 1 - devices = ['/GPU:{}'.format(i) for i in range(self._group_size)] - with ops.Graph().as_default(), self.session( - config=self._configure()) as sess: - if not test_util.is_gpu_available(cuda_only=True): - self.skipTest('No GPU available') - collectives = [] - for device in devices: + @def_function.function + def run_nccl_broadcast_double_recv(): + for device in self._devices: with ops.device(device): t = constant_op.constant(tensor_value) - collectives.append(collective_ops.broadcast_recv( - t.shape, t.dtype, self._group_size, group_key, instance_key)) - with self.assertRaisesRegex(errors.InternalError, 'found no source'): - sess.run(collectives) + collective_ops.broadcast_recv( + t.shape, t.dtype, self._group_size, group_key, instance_key) + + with self.assertRaisesRegex(errors.InternalError, 'found no source'): + run_nccl_broadcast_double_recv() def testNcclBroadcastDoubleSend(self): + self._setup_context() + tensor_value = [0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1] group_key = 1 instance_key = 1 - devices = ['/GPU:{}'.format(i) for i in range(self._group_size)] - with ops.Graph().as_default(), self.session( - config=self._configure()) as sess: - if not test_util.is_gpu_available(cuda_only=True): - self.skipTest('No GPU available') - collectives = [] - for device in devices: + @def_function.function + def run_nccl_broadcast_double_send(): + for device in self._devices: with ops.device(device): t = constant_op.constant(tensor_value) - collectives.append(collective_ops.broadcast_send( - t, t.shape, t.dtype, self._group_size, group_key, instance_key)) - with self.assertRaisesRegex(errors.InternalError, 'already has source'): - sess.run(collectives) + collective_ops.broadcast_send( + t, t.shape, t.dtype, self._group_size, group_key, instance_key) + + with self.assertRaisesRegex(errors.InternalError, 'already has source'): + run_nccl_broadcast_double_send() def testBasicNcclAllGather(self): + self._setup_context() + inputs = [[0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1], [0.3, 1.3, 2.3, 3.3, 4.3, 5.3, 6.3, 7.3]] expected = [0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 0.3, 1.3, 2.3, 3.3, 4.3, 5.3, 6.3, 7.3] group_key = 1 instance_key = 1 - devices = ['/GPU:{}'.format(i) for i in range(self._group_size)] - with ops.Graph().as_default(), self.session( - config=self._configure()) as sess: - if not test_util.is_gpu_available(cuda_only=True): - self.skipTest('No GPU available') + @def_function.function + def run_basic_nccl_all_gather(): collectives = [] for i in range(self._group_size): - with ops.device(devices[i]): + with ops.device(self._devices[i]): t = constant_op.constant(inputs[i]) collectives.append(collective_ops.all_gather(t, self._group_size, group_key, instance_key)) - results = sess.run(collectives) - for result in results: + return collectives + + for result in run_basic_nccl_all_gather(): self.assertAllClose(result, expected, rtol=1e-5, atol=1e-5) def testCollectiveDeviceMismatch(self): + self._setup_context() + group_key = 10 instance_key = 20 t0 = [1, 2, 3, 4] t1 = [5, 6, 7, 8] - with ops.Graph().as_default(), self.session( - config=self._configure(set_config_proto_nccl=False)) as sess: - if not test_util.is_gpu_available(cuda_only=True): - self.skipTest('No GPU available') + @def_function.function + def run_collective_device_mismatch(): with ops.device('/CPU:0'): in0 = constant_op.constant(t0) - c0 = collective_ops.all_reduce(in0, self._group_size, group_key, - instance_key, 'Add', 'Id') + collective_ops.all_reduce(in0, self._group_size, group_key, + instance_key, 'Add', 'Id') with ops.device('/GPU:0'): in1 = constant_op.constant(t1) - c1 = collective_ops.all_reduce(in1, self._group_size, group_key, - instance_key, 'Add', 'Id') - run_options = config_pb2.RunOptions() - run_options.experimental.collective_graph_key = 100 - with self.assertRaisesRegex(errors.InternalError, - 'but that group has type'): - sess.run([c0, c1], options=run_options) + collective_ops.all_reduce(in1, self._group_size, group_key, + instance_key, 'Add', 'Id') + + with self.assertRaisesRegex(errors.InternalError, + 'but that group has type'): + run_collective_device_mismatch() - @test_util.run_v2_only def testCollectiveReduceMinMax(self): - self._ensure_context_initialized() + self._setup_context() @def_function.function def run_all_reduce(group_key, instance_key, merge_op): t0 = [1., 20., 3., 40., 5.] t1 = [10., 2., 30., 4., 50.] - os.environ['NCCL_DEBUG'] = 'INFO' - os.environ['NCCL_LAUNCH_MODE'] = 'PARALLEL' with ops.device('/GPU:0'): in0 = constant_op.constant(t0) c0 = collective_ops.all_reduce( @@ -289,9 +271,8 @@ class CollectiveOpGPUTest(test.TestCase): for result in results: self.assertAllClose(result, expected, rtol=1e-5, atol=1e-5) - @test_util.run_v2_only def testCollectiveGroupSizeOne(self): - self._ensure_context_initialized() + self._setup_context() group_size = 1 group_key = 100 @@ -310,6 +291,16 @@ class CollectiveOpGPUTest(test.TestCase): in_tensor, group_size, group_key, instance_key) self.assertAllEqual(in_value, gathered_tensor.numpy()) + def testNcclStress(self): + self._setup_context(num_gpus=1) + + num_iters = 1000 + for _ in range(num_iters): + with ops.device('/device:GPU:0'): + collective_ops.all_reduce( + [1.], group_size=1, group_key=0, instance_key=0, merge_op='Add', + final_op='Id', communication_hint='NCCL') + if __name__ == '__main__': test.main() From 9efc52e47d98fb14886aafeb9aa5d7de7d287a42 Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Mon, 27 Jul 2020 22:06:58 -0700 Subject: [PATCH 1455/2522] Renable SAN tests on selective_build_test Marking the test not portable for mobile platforms. PiperOrigin-RevId: 323507613 Change-Id: I26d29a57316a02726711460eaf56b6215bacb0e1 --- tensorflow/lite/testing/BUILD | 4 +--- tensorflow/lite/testing/selective_build_test.cc | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tensorflow/lite/testing/BUILD b/tensorflow/lite/testing/BUILD index d0744c49445..3d4527e926e 100644 --- a/tensorflow/lite/testing/BUILD +++ b/tensorflow/lite/testing/BUILD @@ -555,9 +555,7 @@ cc_test( ], tags = [ "no_mac", # b/161990368 - "noasan", # b/162027436 - "nomsan", # b/162027436 - "notsan", # b/162027436 + "tflite_not_portable", ], deps = [ ":test_tflite_lib", diff --git a/tensorflow/lite/testing/selective_build_test.cc b/tensorflow/lite/testing/selective_build_test.cc index ad23e382a8d..1a9a5b2efdb 100644 --- a/tensorflow/lite/testing/selective_build_test.cc +++ b/tensorflow/lite/testing/selective_build_test.cc @@ -51,8 +51,8 @@ bool RunWithRandomInputs(const std::string& filename) { for (auto it = data.begin(); it != data.end(); ++it) { *it = random(); } - tensor->data.raw = reinterpret_cast(data.data()); sample.push_back(data); + tensor->data.raw = reinterpret_cast(sample.rbegin()->data()); } // Running inference. From 8d3585924325f572c71b790a70439db1bce73ff5 Mon Sep 17 00:00:00 2001 From: Mangpo Phothilimthana Date: Mon, 27 Jul 2020 22:07:35 -0700 Subject: [PATCH 1456/2522] Make MemoryUsageTracker::EndInstruction() return error status instead of crashing due to CHECK fails. This crashes the layout autotuner. PiperOrigin-RevId: 323507724 Change-Id: I5eafc0dbbb527164a9246131f1ad7f7d71ddcb44 --- tensorflow/compiler/xla/service/hlo_rematerialization.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc index e1defa313e8..7f974a618a8 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc @@ -745,7 +745,7 @@ Status MemoryUsageTracker::EndInstruction() { for (BufferId buffer_id : in_progress_item_->buffers_used) { Buffer& buffer = buffers_.at(buffer_id); buffer.unfinished_user_count--; - CHECK_GE(buffer.unfinished_user_count, 0) + TF_RET_CHECK(buffer.unfinished_user_count >= 0) << buffer.ToString() << " has negative unfinished user count."; if (buffer.unfinished_user_count == 0) { // Buffer is now dead. From a1d78970aa14bc4bf85d1f62a238adc299ed86c1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 22:08:47 -0700 Subject: [PATCH 1457/2522] Internal change PiperOrigin-RevId: 323507899 Change-Id: I5ede4ed67d92f132a4e8b7fbe175084daa4181ec --- .../base_api/api_def_OptimizeDatasetV2.pbtxt | 32 ++++ tensorflow/core/kernels/data/BUILD | 1 + tensorflow/core/kernels/data/dataset_utils.cc | 130 ++++++++++++++++ tensorflow/core/kernels/data/dataset_utils.h | 12 ++ .../core/kernels/data/dataset_utils_test.cc | 137 ++++++++++++++++ .../core/kernels/data/optimize_dataset_op.cc | 58 ++++++- .../core/kernels/data/optimize_dataset_op.h | 9 ++ tensorflow/core/ops/dataset_ops.cc | 11 ++ tensorflow/core/platform/default/port.cc | 2 + tensorflow/core/platform/host_info.h | 6 +- .../kernel_tests/optimize_dataset_test.py | 147 +++++++++++++++--- .../kernel_tests/prefetch_with_slack_test.py | 4 +- .../optimize_dataset_serialization_test.py | 6 +- .../data/experimental/ops/optimization.py | 20 ++- .../experimental/ops/optimization_options.py | 53 +++++-- tensorflow/python/data/ops/dataset_ops.py | 105 ++++++++++--- tensorflow/python/data/util/options.py | 8 + .../api/golden/v1/tensorflow.raw_ops.pbtxt | 4 + .../api/golden/v2/tensorflow.raw_ops.pbtxt | 4 + 19 files changed, 676 insertions(+), 73 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_OptimizeDatasetV2.pbtxt diff --git a/tensorflow/core/api_def/base_api/api_def_OptimizeDatasetV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_OptimizeDatasetV2.pbtxt new file mode 100644 index 00000000000..a8e66499471 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_OptimizeDatasetV2.pbtxt @@ -0,0 +1,32 @@ +op { + graph_op_name: "OptimizeDatasetV2" + visibility: HIDDEN + in_arg { + name: "input_dataset" + description: < SelectOptimizations( + const string& job_name, const string& opt_ins_raw, + const string& opt_outs_raw, + const absl::flat_hash_map& live_experiments, + const std::vector& optimizations_enabled, + const std::vector& optimizations_disabled, + const std::vector& optimizations_default, + std::function hash_func) { + // Creates a set of optimizations. + absl::flat_hash_set optimizations_set; + + // Creates the opt in and opt out settings. + std::vector opt_ins, opt_outs; + if (opt_ins_raw == "all") { + for (auto& pair : live_experiments) { + opt_ins.push_back(pair.first); + } + } else { + opt_ins = str_util::Split(opt_ins_raw, ',', str_util::SkipEmpty()); + } + if (opt_outs_raw == "all") { + for (auto& pair : live_experiments) { + opt_outs.push_back(pair.first); + } + } else { + opt_outs = str_util::Split(opt_outs_raw, ',', str_util::SkipEmpty()); + } + + // Checks if the opt in and opt out experiments are live experiments. + for (auto& optimization : opt_ins) { + if (live_experiments.find(optimization) == live_experiments.end()) { + LOG(WARNING) << "The experiment \"" << optimization + << "\" is opted in but it is not a live experiment."; + } + } + for (auto& optimization : opt_outs) { + if (live_experiments.find(optimization) == live_experiments.end()) { + LOG(WARNING) << "The experiment \"" << optimization + << "\" is opted out but it is not a live experiment."; + } + } + + // Checks if the opt in settings conflict with opt out settings. + for (auto& optimization : opt_ins) { + if (std::find(opt_outs.begin(), opt_outs.end(), optimization) != + opt_outs.end()) { + LOG(WARNING) << "The experiment \"" << optimization + << "\" is set in both \"TF_DATA_EXPERIMENT_OPT_IN\" and " + "\"TF_DATA_EXPERIMENT_OPT_OUT\". Unless the experiment " + "corresponds to an explicitly enabled optimization, it " + "is not applied."; + } + } + + // Checks if the enable/disable settings from tf.data.Options conflict with + // user opt in/out settings. In which case we assume tf.data.Options settings + // have higher priority to overwrite. + for (auto& optimization : optimizations_enabled) { + if (std::find(opt_outs.begin(), opt_outs.end(), optimization) != + opt_outs.end()) { + LOG(WARNING) << "The optimization \"" << optimization + << "\" is opt out, but is still applied since" + " it is enabled through tf.data.Options."; + } + } + for (auto& optimization : optimizations_disabled) { + if (std::find(opt_ins.begin(), opt_ins.end(), optimization) != + opt_ins.end()) { + LOG(WARNING) << "The optimization \"" << optimization + << "\" is opt in, but is not applied since" + " it is disabled through tf.data.Options."; + } + } + + // Add the enabled optimizations. + optimizations_set.insert(optimizations_enabled.begin(), + optimizations_enabled.end()); + + // Add the default optimizations that are not explicitly opted out. + for (auto& optimization : optimizations_default) { + if (std::find(opt_outs.begin(), opt_outs.end(), optimization) == + opt_outs.end()) { + optimizations_set.insert(optimization); + } + } + + // Add the live experiments stochastically if they are neither opted in nor + // opted out. + for (auto& pair : live_experiments) { + string experiment = pair.first; + // Skip experiments that are explicitly opted out. + if (std::find(opt_outs.begin(), opt_outs.end(), experiment) != + opt_outs.end()) { + continue; + } + // Skip experiments whose transformations are explicitly disabled. + if (std::find(optimizations_disabled.begin(), optimizations_disabled.end(), + experiment) != optimizations_disabled.end()) { + continue; + } + // Apply experiments that are explicitly opted in. + if (std::find(opt_ins.begin(), opt_ins.end(), experiment) != + opt_ins.end()) { + optimizations_set.insert(experiment); + continue; + } + // Otherwise, apply experiment stochastically based on job name and + // experiment roll out percentage. + if (hash_func(strings::StrCat(job_name, experiment)) % 100 < pair.second) { + optimizations_set.insert(experiment); + } + } + + // Log the experiments that will be applied. + if (VLOG_IS_ON(1)) { + for (auto& pair : live_experiments) { + string experiment = pair.first; + if (std::find(optimizations_set.begin(), optimizations_set.end(), + experiment) != optimizations_set.end()) { + VLOG(1) << "The experiment \"" << experiment << "\" is applied."; + } + } + } + + std::vector optimizations; + optimizations.insert(optimizations.end(), optimizations_set.begin(), + optimizations_set.end()); + return optimizations; +} + } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h index 5c6b14a8782..0fe3618f34b 100644 --- a/tensorflow/core/kernels/data/dataset_utils.h +++ b/tensorflow/core/kernels/data/dataset_utils.h @@ -304,6 +304,18 @@ class DummyResourceOp : public OpKernel { // MatchesAnyVersionRE("PaddedBatchDataset", "BatchDataset") == false bool MatchesAnyVersionRE(StringPiece op_prefix, StringPiece op_to_match); +// Based on `optimizations_enabled`, `optimizations_disabled`, and +// `optimizations_disabled`, returns the list of optimizations that will be +// applied. +std::vector SelectOptimizations( + const string& job_name, const string& opt_ins_raw, + const string& opt_outs_raw, + const absl::flat_hash_map& live_experiments, + const std::vector& optimizations_enabled, + const std::vector& optimizations_disabled, + const std::vector& optimizations_default, + std::function hash_func); + } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/kernels/data/dataset_utils_test.cc b/tensorflow/core/kernels/data/dataset_utils_test.cc index 1a6e673c3f3..a1f624faeb6 100644 --- a/tensorflow/core/kernels/data/dataset_utils_test.cc +++ b/tensorflow/core/kernels/data/dataset_utils_test.cc @@ -30,6 +30,8 @@ namespace tensorflow { namespace data { namespace { +using ::testing::UnorderedElementsAre; + class DatasetHashUtilsTest : public ::testing::Test { protected: uint64 GetHash(const FunctionDefLibrary& library, const FunctionDef& fn) { @@ -1131,6 +1133,141 @@ TEST_F(DatasetHashUtilsTest, HashStringTensor) { EXPECT_NE(GetHash(v1), GetHash(v3)); } +class SelectOptimizationsHashTest : public ::testing::TestWithParam {}; + +TEST_P(SelectOptimizationsHashTest, DatasetUtils) { + const uint64 hash_result = GetParam(); + string job_name = "job"; + const string opt_ins_raw = ""; + const string opt_outs_raw = ""; + auto hash_func = [hash_result](const string& str) { return hash_result; }; + absl::flat_hash_map live_experiments = { + {"exp1", 0}, {"exp2", 20}, {"exp3", 33}, {"exp4", 45}, + {"exp5", 67}, {"exp6", 88}, {"exp7", 100}}; + std::vector optimizations_enabled, optimizations_disabled, + optimizations_default; + std::vector optimizations = + SelectOptimizations(job_name, opt_ins_raw, opt_outs_raw, live_experiments, + optimizations_enabled, optimizations_disabled, + optimizations_default, hash_func); + + int tested_times = 0; + switch (hash_result) { + case 0: + case 100: + case 200: + tested_times++; + EXPECT_THAT(optimizations, UnorderedElementsAre("exp2", "exp3", "exp4", + "exp5", "exp6", "exp7")); + break; + case 33: + case 133: + tested_times++; + EXPECT_THAT(optimizations, + UnorderedElementsAre("exp4", "exp5", "exp6", "exp7")); + break; + case 67: + case 167: + tested_times++; + EXPECT_THAT(optimizations, UnorderedElementsAre("exp6", "exp7")); + break; + } + EXPECT_EQ(tested_times, 1); +} + +INSTANTIATE_TEST_SUITE_P(Test, SelectOptimizationsHashTest, + ::testing::Values(0, 33, 67, 100, 133, 167, 200)); + +class SelectOptimizationsOptTest + : public ::testing::TestWithParam> {}; + +TEST_P(SelectOptimizationsOptTest, DatasetUtils) { + string job_name = "job"; + const string opt_ins_raw = std::get<0>(GetParam()); + const string opt_outs_raw = std::get<1>(GetParam()); + auto hash_func = [](const string& str) { return 50; }; + absl::flat_hash_map live_experiments = { + {"exp1", 0}, {"exp2", 25}, {"exp3", 50}, {"exp4", 75}, {"exp5", 100}}; + std::vector optimizations_enabled, optimizations_disabled, + optimizations_default; + std::vector optimizations = + SelectOptimizations(job_name, opt_ins_raw, opt_outs_raw, live_experiments, + optimizations_enabled, optimizations_disabled, + optimizations_default, hash_func); + + int tested_times = 0; + if (opt_outs_raw == "all") { + EXPECT_THAT(optimizations, UnorderedElementsAre()); + tested_times++; + } else if (opt_outs_raw.empty()) { + if (opt_ins_raw == "all") { + EXPECT_THAT(optimizations, + UnorderedElementsAre("exp1", "exp2", "exp3", "exp4", "exp5")); + tested_times++; + } else if (opt_ins_raw.empty()) { + EXPECT_THAT(optimizations, UnorderedElementsAre("exp4", "exp5")); + tested_times++; + } else if (opt_ins_raw == "exp2,exp4") { + EXPECT_THAT(optimizations, UnorderedElementsAre("exp2", "exp4", "exp5")); + tested_times++; + } + } else if (opt_outs_raw == "exp1,exp5") { + if (opt_ins_raw == "all") { + EXPECT_THAT(optimizations, UnorderedElementsAre("exp2", "exp3", "exp4")); + tested_times++; + } else if (opt_ins_raw.empty()) { + EXPECT_THAT(optimizations, UnorderedElementsAre("exp4")); + tested_times++; + } else if (opt_ins_raw == "exp2,exp4") { + EXPECT_THAT(optimizations, UnorderedElementsAre("exp2", "exp4")); + tested_times++; + } + } + EXPECT_EQ(tested_times, 1); +} + +INSTANTIATE_TEST_SUITE_P( + Test, SelectOptimizationsOptTest, + ::testing::Combine(::testing::Values("all", "", "exp2,exp4"), + ::testing::Values("all", "", "exp1,exp5"))); + +class SelectOptimizationsConflictTest + : public ::testing::TestWithParam> {}; + +TEST_P(SelectOptimizationsConflictTest, DatasetUtils) { + string job_name = "job"; + const string opt_ins_raw = std::get<0>(GetParam()); + const string opt_outs_raw = std::get<1>(GetParam()); + const uint64 hash_result = std::get<2>(GetParam()); + auto hash_func = [hash_result](const string& str) { return hash_result; }; + absl::flat_hash_map live_experiments = { + {"exp1", 20}, {"exp2", 30}, {"exp3", 40}, + {"exp4", 60}, {"exp5", 70}, {"exp6", 80}}; + std::vector optimizations_enabled = {"exp1", "exp4"}, + optimizations_disabled = {"exp2", "exp5"}, + optimizations_default = {"exp3", "exp6"}; + std::vector optimizations = + SelectOptimizations(job_name, opt_ins_raw, opt_outs_raw, live_experiments, + optimizations_enabled, optimizations_disabled, + optimizations_default, hash_func); + + int tested_times = 0; + if (opt_outs_raw.empty()) { + EXPECT_THAT(optimizations, + UnorderedElementsAre("exp1", "exp3", "exp4", "exp6")); + tested_times++; + } else if (opt_outs_raw == "exp1,exp3") { + EXPECT_THAT(optimizations, UnorderedElementsAre("exp1", "exp4", "exp6")); + tested_times++; + } + EXPECT_EQ(tested_times, 1); +} + +INSTANTIATE_TEST_SUITE_P(Test, SelectOptimizationsConflictTest, + ::testing::Combine(::testing::Values("", "exp2"), + ::testing::Values("", "exp1,exp3"), + ::testing::Values(10, 50, 90))); + } // namespace } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/kernels/data/optimize_dataset_op.cc b/tensorflow/core/kernels/data/optimize_dataset_op.cc index c976a8f7b08..a0101435794 100644 --- a/tensorflow/core/kernels/data/optimize_dataset_op.cc +++ b/tensorflow/core/kernels/data/optimize_dataset_op.cc @@ -18,8 +18,10 @@ limitations under the License. #include "tensorflow/core/framework/partial_tensor_shape.h" #include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/data/dataset_utils.h" #include "tensorflow/core/kernels/data/rewrite_utils.h" #include "tensorflow/core/lib/random/random.h" +#include "tensorflow/core/platform/host_info.h" #include "tensorflow/core/protobuf/rewriter_config.pb.h" namespace tensorflow { @@ -31,10 +33,18 @@ namespace data { /* static */ constexpr const char* const OptimizeDatasetOp::kDatasetType; /* static */ constexpr const char* const OptimizeDatasetOp::kInputDataset; /* static */ constexpr const char* const OptimizeDatasetOp::kOptimizations; +/* static */ constexpr const char* const + OptimizeDatasetOp::kOptimizationsEnabled; +/* static */ constexpr const char* const + OptimizeDatasetOp::kOptimizationsDisabled; +/* static */ constexpr const char* const + OptimizeDatasetOp::kOptimizationsDefault; /* static */ constexpr const char* const OptimizeDatasetOp::kOutputTypes; /* static */ constexpr const char* const OptimizeDatasetOp::kOutputShapes; /* static */ constexpr const char* const OptimizeDatasetOp::kOptimizationConfigs; +/* static */ constexpr const char* const OptimizeDatasetOp::kOptimizeDatasetV1; +/* static */ constexpr const char* const OptimizeDatasetOp::kOptimizeDatasetV2; constexpr char kOptimizerName[] = "tf_data_meta_optimizer"; constexpr char kOptimizers[] = "optimizers"; @@ -42,6 +52,12 @@ constexpr char kOptimizerConfigs[] = "optimizer_configs"; OptimizeDatasetOp::OptimizeDatasetOp(OpKernelConstruction* ctx) : UnaryDatasetOpKernel(ctx) { + auto& op_name = ctx->def().op(); + if (op_name == kOptimizeDatasetV1) { + op_version_ = 1; + } else if (op_name == kOptimizeDatasetV2) { + op_version_ = 2; + } OP_REQUIRES_OK(ctx, ctx->GetAttr(kOptimizationConfigs, &optimization_configs_)); } @@ -49,8 +65,44 @@ OptimizeDatasetOp::OptimizeDatasetOp(OpKernelConstruction* ctx) void OptimizeDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase* input, DatasetBase** output) { std::vector optimizations; - OP_REQUIRES_OK( - ctx, ParseVectorArgument(ctx, kOptimizations, &optimizations)); + if (op_version_ == 1) { + OP_REQUIRES_OK( + ctx, ParseVectorArgument(ctx, kOptimizations, &optimizations)); + } else if (op_version_ == 2) { + std::vector optimizations_enabled, optimizations_disabled, + optimizations_default; + OP_REQUIRES_OK(ctx, ParseVectorArgument(ctx, kOptimizationsEnabled, + &optimizations_enabled)); + OP_REQUIRES_OK(ctx, + ParseVectorArgument(ctx, kOptimizationsDisabled, + &optimizations_disabled)); + OP_REQUIRES_OK(ctx, ParseVectorArgument(ctx, kOptimizationsDefault, + &optimizations_default)); + + string job_name = port::JobName(); + if (job_name.empty()) { + // If `job_name` is empty, apply the enabled and default optimizations + // directly. + optimizations.insert(optimizations.end(), optimizations_enabled.begin(), + optimizations_enabled.end()); + optimizations.insert(optimizations.end(), optimizations_default.begin(), + optimizations_default.end()); + } else { + // The map that stores the experiment names and for how much percentage + // of the jobs, the experiments will be randomly turned on. + // + // This is currently empty; we have no live experiments yet. + absl::flat_hash_map live_experiments; + + const string opt_ins_raw = std::getenv("TF_DATA_EXPERIMENT_OPT_IN"); + const string opt_outs_raw = std::getenv("TF_DATA_EXPERIMENT_OPT_OUT"); + auto hash_func = [](const string& str) { return Hash64(str); }; + optimizations = SelectOptimizations( + job_name, opt_ins_raw, opt_outs_raw, live_experiments, + optimizations_enabled, optimizations_disabled, optimizations_default, + hash_func); + } + } auto config_factory = [this, &optimizations]() { return CreateConfig(optimizations, optimization_configs_); @@ -95,6 +147,8 @@ RewriterConfig OptimizeDatasetOp::CreateConfig( namespace { REGISTER_KERNEL_BUILDER(Name("OptimizeDataset").Device(DEVICE_CPU), OptimizeDatasetOp); +REGISTER_KERNEL_BUILDER(Name("OptimizeDatasetV2").Device(DEVICE_CPU), + OptimizeDatasetOp); } // namespace } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/kernels/data/optimize_dataset_op.h b/tensorflow/core/kernels/data/optimize_dataset_op.h index a5fcc72260d..d9e366f1ad5 100644 --- a/tensorflow/core/kernels/data/optimize_dataset_op.h +++ b/tensorflow/core/kernels/data/optimize_dataset_op.h @@ -25,10 +25,18 @@ class OptimizeDatasetOp : public UnaryDatasetOpKernel { static constexpr const char* const kDatasetType = "Optimize"; static constexpr const char* const kInputDataset = "input_dataset"; static constexpr const char* const kOptimizations = "optimizations"; + static constexpr const char* const kOptimizationsEnabled = + "optimizations_enabled"; + static constexpr const char* const kOptimizationsDisabled = + "optimizations_disabled"; + static constexpr const char* const kOptimizationsDefault = + "optimizations_default"; static constexpr const char* const kOutputTypes = "output_types"; static constexpr const char* const kOutputShapes = "output_shapes"; static constexpr const char* const kOptimizationConfigs = "optimization_configs"; + static constexpr const char* const kOptimizeDatasetV1 = "OptimizeDataset"; + static constexpr const char* const kOptimizeDatasetV2 = "OptimizeDatasetV2"; explicit OptimizeDatasetOp(OpKernelConstruction* ctx); @@ -41,6 +49,7 @@ class OptimizeDatasetOp : public UnaryDatasetOpKernel { std::vector optimizations_configs); std::vector optimization_configs_; + int op_version_ = 0; }; } // namespace data diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index 4f750cc938d..6ef5635e95a 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -837,6 +837,17 @@ REGISTER_OP("OptimizeDataset") .Attr("optimization_configs: list(string) = []") .SetShapeFn(shape_inference::ScalarShape); +REGISTER_OP("OptimizeDatasetV2") + .Input("input_dataset: variant") + .Input("optimizations_enabled: string") + .Input("optimizations_disabled: string") + .Input("optimizations_default: string") + .Output("handle: variant") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .Attr("optimization_configs: list(string) = []") + .SetShapeFn(shape_inference::ScalarShape); + REGISTER_OP("OptionalFromValue") .Input("components: Toutput_types") .Output("optional: variant") diff --git a/tensorflow/core/platform/default/port.cc b/tensorflow/core/platform/default/port.cc index 11b3cd7fd9a..5b96eec072c 100644 --- a/tensorflow/core/platform/default/port.cc +++ b/tensorflow/core/platform/default/port.cc @@ -61,6 +61,8 @@ string Hostname() { return string(hostname); } +string JobName() { return ""; } + int NumSchedulableCPUs() { #if defined(__linux__) && !defined(__ANDROID__) cpu_set_t cpuset; diff --git a/tensorflow/core/platform/host_info.h b/tensorflow/core/platform/host_info.h index e76b83adf34..3447b2e0330 100644 --- a/tensorflow/core/platform/host_info.h +++ b/tensorflow/core/platform/host_info.h @@ -21,9 +21,13 @@ limitations under the License. namespace tensorflow { namespace port { -// Return the hostname of the machine on which this process is running +// Return the hostname of the machine on which this process is running. string Hostname(); +// Return the job name as a string if it exists, otherwise return an empty +// string. +string JobName(); + } // namespace port } // namespace tensorflow diff --git a/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py index 59e41528ea4..e26e97dbd97 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py @@ -225,11 +225,14 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): optimized_it = dataset_ops.make_initializable_iterator(optimized_dataset) self.assertGreaterEqual(len(w), 1) - expected = ("tf.data graph rewrites are not compatible with " - "tf.Variable. The following rewrites will be disabled: %s." - " To enable rewrites, use resource variables instead by " - "calling `tf.enable_resource_variables()` at the start of the " - "program." % (", ".join(options._graph_rewrites()))) + graph_rewrites = options._graph_rewrites() + expected = ( + "tf.data graph rewrites are not compatible with " + "tf.Variable. The following rewrites will be disabled: %s." + " To enable rewrites, use resource variables instead by " + "calling `tf.enable_resource_variables()` at the start of the " + "program." % + (", ".join(graph_rewrites.enabled + graph_rewrites.default))) self.assertTrue(any(expected in str(warning) for warning in w)) # Check that outputs are the same in the optimized and unoptimized cases, @@ -251,34 +254,136 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): break @combinations.generate(test_base.default_test_combinations()) - def testOptimizationEnabledByDefault(self): - """Tests that some optimizations are applied to datasets by default.""" + def testOptimizationDefault(self): + """Tests the optimization settings by default.""" options = dataset_ops.Options() - expected_optimizations = [ + expected_optimizations_enabled = [] + expected_optimizations_disabled = [] + expected_optimizations_default = [ "map_and_batch_fusion", "noop_elimination", "shuffle_and_repeat_fusion", ] - self.assertEqual( - set(options._graph_rewrites()), set(expected_optimizations)) + graph_rewrites = options._graph_rewrites() + self.assertEqual(set(graph_rewrites.enabled), + set(expected_optimizations_enabled)) + self.assertEqual(set(graph_rewrites.disabled), + set(expected_optimizations_disabled)) + self.assertEqual(set(graph_rewrites.default), + set(expected_optimizations_default)) + + options.experimental_optimization.apply_default_optimizations = True + graph_rewrites = options._graph_rewrites() + self.assertEqual(set(graph_rewrites.enabled), + set(expected_optimizations_enabled)) + self.assertEqual(set(graph_rewrites.disabled), + set(expected_optimizations_disabled)) + self.assertEqual(set(graph_rewrites.default), + set(expected_optimizations_default)) + + options.experimental_optimization.apply_default_optimizations = False + expected_optimizations_default = [] + graph_rewrites = options._graph_rewrites() + self.assertEqual(set(graph_rewrites.enabled), + set(expected_optimizations_enabled)) + self.assertEqual(set(graph_rewrites.disabled), + set(expected_optimizations_disabled)) + self.assertEqual(set(graph_rewrites.default), + set(expected_optimizations_default)) @combinations.generate(test_base.default_test_combinations()) - def testOptimizationDisableDefault(self): - """Tests that we can disable all graph optimizations enabled by default. - - If the `apply_default_optimizations` optimization options flag is False, - only explicitly enabled optimizations will be applied. - """ + def testOptimizationEnabled(self): + """Tests the optimization settings by enabling all.""" options = dataset_ops.Options() - options.experimental_optimization.apply_default_optimizations = False + options.experimental_optimization.filter_fusion = True + options.experimental_optimization.filter_with_random_uniform_fusion = True options.experimental_optimization.hoist_random_uniform = True + options.experimental_optimization.map_and_batch_fusion = True + options.experimental_optimization.map_and_filter_fusion = True + options.experimental_optimization.map_parallelization = True + options.experimental_optimization.map_fusion = True options.experimental_optimization.noop_elimination = True - expected_optimizations = [ + options.experimental_optimization.parallel_batch = True + options.experimental_optimization.shuffle_and_repeat_fusion = True + options.experimental_optimization.map_vectorization.enabled = True + options.experimental_optimization.autotune_buffers = True + options.experimental_deterministic = False + options.experimental_stats.latency_all_edges = True + options.experimental_slack = True + + expected_optimizations_enabled = [ + "filter_fusion", + "filter_with_random_uniform_fusion", "hoist_random_uniform", + "map_and_batch_fusion", + "map_and_filter_fusion", + "map_parallelization", + "map_fusion", "noop_elimination", + "parallel_batch", + "shuffle_and_repeat_fusion", + "map_vectorization", + "inject_prefetch", + "make_sloppy", + "latency_all_edges", + "slack", ] - self.assertEqual( - set(options._graph_rewrites()), set(expected_optimizations)) + expected_optimizations_disabled = [] + expected_optimizations_default = [] + graph_rewrites = options._graph_rewrites() + self.assertEqual(set(graph_rewrites.enabled), + set(expected_optimizations_enabled)) + self.assertEqual(set(graph_rewrites.disabled), + set(expected_optimizations_disabled)) + self.assertEqual(set(graph_rewrites.default), + set(expected_optimizations_default)) + + @combinations.generate(test_base.default_test_combinations()) + def testOptimizationDisabled(self): + """Tests the optimization settings by disabling all.""" + options = dataset_ops.Options() + options.experimental_optimization.filter_fusion = False + options.experimental_optimization.filter_with_random_uniform_fusion = False + options.experimental_optimization.hoist_random_uniform = False + options.experimental_optimization.map_and_batch_fusion = False + options.experimental_optimization.map_and_filter_fusion = False + options.experimental_optimization.map_parallelization = False + options.experimental_optimization.map_fusion = False + options.experimental_optimization.noop_elimination = False + options.experimental_optimization.parallel_batch = False + options.experimental_optimization.shuffle_and_repeat_fusion = False + options.experimental_optimization.map_vectorization.enabled = False + options.experimental_optimization.autotune = False + options.experimental_deterministic = True + options.experimental_stats.latency_all_edges = False + options.experimental_slack = False + + expected_optimizations_enabled = [] + expected_optimizations_disabled = [ + "filter_fusion", + "filter_with_random_uniform_fusion", + "hoist_random_uniform", + "map_and_batch_fusion", + "map_and_filter_fusion", + "map_parallelization", + "map_fusion", + "noop_elimination", + "parallel_batch", + "shuffle_and_repeat_fusion", + "map_vectorization", + "inject_prefetch", + "make_sloppy", + "latency_all_edges", + "slack", + ] + expected_optimizations_default = [] + graph_rewrites = options._graph_rewrites() + self.assertEqual(set(graph_rewrites.enabled), + set(expected_optimizations_enabled)) + self.assertEqual(set(graph_rewrites.disabled), + set(expected_optimizations_disabled)) + self.assertEqual(set(graph_rewrites.default), + set(expected_optimizations_default)) @combinations.generate(test_base.default_test_combinations()) def testAutotuningDefaults(self): @@ -295,7 +400,7 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): def testAutotuningBufferSizes(self): options = dataset_ops.Options() options.experimental_optimization.autotune_buffers = True - self.assertIn("inject_prefetch", options._graph_rewrites()) + self.assertIn("inject_prefetch", options._graph_rewrites().enabled) autotune, algorithm, cpu_budget = options._autotune_settings() self.assertTrue(autotune) self.assertEqual(algorithm, diff --git a/tensorflow/python/data/experimental/kernel_tests/prefetch_with_slack_test.py b/tensorflow/python/data/experimental/kernel_tests/prefetch_with_slack_test.py index ff1f1680a76..cbff39b90e5 100644 --- a/tensorflow/python/data/experimental/kernel_tests/prefetch_with_slack_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/prefetch_with_slack_test.py @@ -45,7 +45,7 @@ class PrefetchWithSlackTest(test_base.DatasetTestBase, parameterized.TestCase): multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator( dataset, ["/cpu:1", "/cpu:2"]) dataset = multi_device_iterator._dataset # pylint: disable=protected-access - self.assertIn("slack", dataset.options()._graph_rewrites()) + self.assertIn("slack", dataset.options()._graph_rewrites().enabled) self.assertIn("slack:slack_period:2", dataset.options()._graph_rewrite_configs()) @@ -69,7 +69,7 @@ class PrefetchWithSlackTest(test_base.DatasetTestBase, parameterized.TestCase): options = dataset_ops.Options() options.experimental_slack = True dataset = dataset.with_options(options) - self.assertIn("slack", dataset.options()._graph_rewrites()) + self.assertIn("slack", dataset.options()._graph_rewrites().enabled) self.assertIn("slack:slack_period:1", dataset.options()._graph_rewrite_configs()) self.assertDatasetProduces(dataset, range(10)) diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py index 385b1acd49c..30d53165f85 100644 --- a/tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/serialization/optimize_dataset_serialization_test.py @@ -36,7 +36,8 @@ class OptimizeDatasetSerializationTest( def build_dataset(num_elements, batch_size): return dataset_ops.Dataset.range(num_elements).map(lambda x: x * x).batch( - batch_size).apply(optimization.optimize(["map_and_batch_fusion"])) + batch_size).apply( + optimization.optimize(["map_and_batch_fusion"], None, None)) self.run_core_tests(lambda: build_dataset(200, 10), 20) @@ -50,7 +51,8 @@ class OptimizeDatasetSerializationTest( dataset = dataset.batch(5) # map_vectorization adds a new vectorized function to the function # library. - dataset = dataset.apply(optimization.optimize(["map_vectorization"])) + dataset = dataset.apply( + optimization.optimize(["map_vectorization"], None, None)) return dataset self.run_core_tests(build_dataset, 20) diff --git a/tensorflow/python/data/experimental/ops/optimization.py b/tensorflow/python/data/experimental/ops/optimization.py index 4581a612ed6..161850521de 100644 --- a/tensorflow/python/data/experimental/ops/optimization.py +++ b/tensorflow/python/data/experimental/ops/optimization.py @@ -36,13 +36,19 @@ def model(): return _apply_fn -def optimize(optimizations=None): +def optimize(optimizations_enabled=None, optimizations_disabled=None, + optimizations_default=None): """A transformation that applies optimizations. Args: - optimizations: (Optional.) A `tf.string` vector `tf.Tensor` identifying - optimizations to use. If not specified, the default set of optimizations - is applied. + optimizations_enabled: (Optional.) A `tf.string` vector `tf.Tensor` + identifying enabled optimizations. If not specified, set to be empty. + + optimizations_disabled: (Optional.) A `tf.string` vector `tf.Tensor` + identifying disabled optimizations. If not specified, set to be empty. + + optimizations_default: (Optional.) A `tf.string` vector `tf.Tensor` + identifying default optimizations. If not specified, set to be empty. Returns: A `Dataset` transformation function, which can be passed to @@ -51,7 +57,11 @@ def optimize(optimizations=None): def _apply_fn(dataset): """Function from `Dataset` to `Dataset` that applies the transformation.""" - return dataset_ops._OptimizeDataset(dataset, optimizations) # pylint: disable=protected-access + return dataset_ops._OptimizeDataset( # pylint: disable=protected-access + dataset, + optimizations_enabled, + optimizations_disabled, + optimizations_default) return _apply_fn diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py index ab1c7b73212..fa7a0d23dea 100644 --- a/tensorflow/python/data/experimental/ops/optimization_options.py +++ b/tensorflow/python/data/experimental/ops/optimization_options.py @@ -53,9 +53,13 @@ class MapVectorizationOptions(options.OptionsBase): "defaults to False.") def _graph_rewrites(self): - if self.enabled: - return ["map_vectorization"] - return [] + graph_rewrites = options.graph_rewrites() + result = graph_rewrites(enabled=[], disabled=[], default=[]) + if self.enabled is True: # pylint: disable=g-bool-id-comparison + result.enabled.append("map_vectorization") + elif self.enabled is False: # pylint: disable=g-bool-id-comparison + result.disabled.append("map_vectorization") + return result def _graph_rewrite_configs(self): if not self.enabled: @@ -229,8 +233,20 @@ class OptimizationOptions(options.OptionsBase): return autotune, algorithm, cpu_budget def _graph_rewrites(self): - """Produces the list of enabled graph optimizations.""" - result = set() + """Produces lists of enabled, disabled and default graph optimizations. + + Returns: + result: a namedtuple with three attributes. `result.enabled` is the list + of user enabled optimizations. `result.disabled` is the list of user + disabled optimizations. `result.default` is the list of optimizations + that are enabled by default (the user has not explicitly enabled or + disabled them). + """ + if self.map_vectorization is not None: + result = self.map_vectorization._graph_rewrites() # pylint: disable=protected-access + else: + result = MapVectorizationOptions()._graph_rewrites() # pylint: disable=protected-access + all_optimizations = [ "filter_fusion", "filter_with_random_uniform_fusion", @@ -244,11 +260,8 @@ class OptimizationOptions(options.OptionsBase): "reorder_data_discarding_ops", "shuffle_and_repeat_fusion", ] - for optimization in all_optimizations: - if getattr(self, optimization): - result.add(optimization) - if self.apply_default_optimizations is not False: + if self.apply_default_optimizations is not False: # pylint: disable=g-bool-id-comparison # The following optimizations are turned on by default, unless the user # explicitly disables them. optimizations_to_disable = [ @@ -257,21 +270,29 @@ class OptimizationOptions(options.OptionsBase): "shuffle_and_repeat_fusion", ] for optimization in optimizations_to_disable: - if getattr(self, optimization) is not False: - result.add(optimization) + if getattr(self, optimization) is None: + result.default.append(optimization) - if self.map_vectorization is not None: - result.update(self.map_vectorization._graph_rewrites()) # pylint: disable=protected-access + # Each of these attributes on the Options object is either True (explicitly + # enabled), False (explicitly disabled), or None (default). + for optimization in all_optimizations: + if getattr(self, optimization) is True: # pylint: disable=g-bool-id-comparison + result.enabled.append(optimization) + elif getattr(self, optimization) is False: # pylint: disable=g-bool-id-comparison + result.disabled.append(optimization) autotune_buffers = self._autotune_buffers() - if self.autotune is not False and autotune_buffers: # pylint: disable=g-bool-id-comparison + if self.autotune is not False and autotune_buffers is True: # pylint: disable=g-bool-id-comparison # When autotuning buffer sizes is enabled, we inject a `prefetch` # transformation after asynchronous dataset ops. Only the buffer sizes of # prefetch transformations will be autotuned, though this is practically # equivalent to tuning the buffer sizes of the other asynchronous # transformations. - result.add("inject_prefetch") - return sorted(list(result)) + result.enabled.append("inject_prefetch") + if self.autotune is False: # pylint: disable=g-bool-id-comparison + result.disabled.append("inject_prefetch") + + return result def _graph_rewrite_configs(self): if self.map_vectorization is not None: diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 491eb031931..bd75d0a735a 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -30,11 +30,13 @@ from six.moves import queue as Queue # pylint: disable=redefined-builtin from tensorflow.core.framework import graph_pb2 from tensorflow.python import tf2 +from tensorflow.python.compat import compat from tensorflow.python.data.experimental.ops import distribute_options from tensorflow.python.data.experimental.ops import optimization_options from tensorflow.python.data.experimental.ops import stats_options from tensorflow.python.data.experimental.ops import threading_options from tensorflow.python.data.ops import iterator_ops +from tensorflow.python.data.util import convert from tensorflow.python.data.util import nest from tensorflow.python.data.util import options as options_lib from tensorflow.python.data.util import random_seed @@ -374,16 +376,18 @@ class DatasetV2(collections_abc.Iterable, tracking_base.Trackable, graph_rewrites = options._graph_rewrites() graph_rewrite_configs = options._graph_rewrite_configs() # pylint: enable=protected-access - if graph_rewrites: + if graph_rewrites.enabled or graph_rewrites.default: if self._has_captured_ref(): warnings.warn( "tf.data graph rewrites are not compatible with tf.Variable. " "The following rewrites will be disabled: %s. To enable " "rewrites, use resource variables instead by calling " "`tf.enable_resource_variables()` at the start of the program." % - ", ".join(graph_rewrites)) + ", ".join(graph_rewrites.enabled + graph_rewrites.default)) else: - dataset = _OptimizeDataset(dataset, graph_rewrites, + dataset = _OptimizeDataset(dataset, graph_rewrites.enabled, + graph_rewrites.disabled, + graph_rewrites.default, graph_rewrite_configs) # (3) Apply autotune options @@ -2887,22 +2891,39 @@ class Options(options_lib.OptionsBase): "is being captured.") def _graph_rewrites(self): - """Produces the list of enabled static graph rewrites.""" - result = [] + """Produces lists of enabled, disabled, default static graph rewrites. + + Returns: + result: a namedtuple with three attributes. `result.enabled` is the list + of user enabled graph rewrites. `result.disabled` is the list of user + disabled graph rewrites. `result.default` is the list of graph + rewrites that are enabled by default (the user has not explicitly + enabled or disabled them). + """ if self.experimental_optimization is not None: - result.extend(self.experimental_optimization._graph_rewrites()) # pylint: disable=protected-access + result = self.experimental_optimization._graph_rewrites() # pylint: disable=protected-access else: # Apply default options - result.extend( - optimization_options.OptimizationOptions()._graph_rewrites()) # pylint: disable=protected-access + result = optimization_options.OptimizationOptions()._graph_rewrites() # pylint: disable=protected-access if self.experimental_deterministic is False: # pylint: disable=g-bool-id-comparison - result.append("make_sloppy") - if self.experimental_stats and self.experimental_stats.latency_all_edges: - result.append("latency_all_edges") - if self.experimental_slack: - result.append("slack") - return result + result.enabled.append("make_sloppy") + elif self.experimental_deterministic is True: # pylint: disable=g-bool-id-comparison + result.disabled.append("make_sloppy") + if self.experimental_stats: + if self.experimental_stats.latency_all_edges is True: # pylint: disable=g-bool-id-comparison + result.enabled.append("latency_all_edges") + elif self.experimental_stats.latency_all_edges is False: # pylint: disable=g-bool-id-comparison + result.disabled.append("latency_all_edges") + if self.experimental_slack is True: # pylint: disable=g-bool-id-comparison + result.enabled.append("slack") + elif self.experimental_slack is False: # pylint: disable=g-bool-id-comparison + result.disabled.append("slack") + + graph_rewrites = options_lib.graph_rewrites() + return graph_rewrites(enabled=list(set(result.enabled)), + disabled=list(set(result.disabled)), + default=list(set(result.default))) def _graph_rewrite_configs(self): """Produces the list of configurations for enabled graph optimizations.""" @@ -4387,19 +4408,55 @@ class _ModelDataset(UnaryUnchangedStructureDataset): class _OptimizeDataset(UnaryUnchangedStructureDataset): """A `Dataset` that acts as an identity, and applies optimizations.""" - def __init__(self, input_dataset, optimizations, optimization_configs=None): + def __init__(self, + input_dataset, + optimizations_enabled, + optimizations_disabled, + optimizations_default, + optimization_configs=None): self._input_dataset = input_dataset - if optimizations is None: - optimizations = [] if optimization_configs is None: optimization_configs = [] - self._optimizations = ops.convert_to_tensor( - optimizations, dtype=dtypes.string, name="optimizations") - variant_tensor = gen_dataset_ops.optimize_dataset( - input_dataset._variant_tensor, # pylint: disable=protected-access - self._optimizations, - optimization_configs=optimization_configs, - **self._flat_structure) + + if compat.forward_compatible(2020, 8, 6): + self._optimizations_enabled = convert.optional_param_to_tensor( + argument_name="optimizations_enabled", + argument_value=optimizations_enabled, + argument_default=[], + argument_dtype=dtypes.string) + self._optimizations_disabled = convert.optional_param_to_tensor( + argument_name="optimizations_disabled", + argument_value=optimizations_disabled, + argument_default=[], + argument_dtype=dtypes.string) + self._optimizations_default = convert.optional_param_to_tensor( + argument_name="optimizations_default", + argument_value=optimizations_default, + argument_default=[], + argument_dtype=dtypes.string) + + variant_tensor = gen_dataset_ops.optimize_dataset_v2( + input_dataset._variant_tensor, # pylint: disable=protected-access + self._optimizations_enabled, + self._optimizations_disabled, + self._optimizations_default, + optimization_configs=optimization_configs, + **self._flat_structure) + else: + if optimizations_enabled is None: + optimizations_enabled = [] + if optimizations_default is None: + optimizations_default = [] + + self._optimizations = ops.convert_to_tensor( + optimizations_enabled + optimizations_default, + dtype=dtypes.string, + name="optimizations") + variant_tensor = gen_dataset_ops.optimize_dataset( + input_dataset._variant_tensor, # pylint: disable=protected-access + self._optimizations, + optimization_configs=optimization_configs, + **self._flat_structure) super(_OptimizeDataset, self).__init__(input_dataset, variant_tensor) diff --git a/tensorflow/python/data/util/options.py b/tensorflow/python/data/util/options.py index 3c79197fae8..781ae6403fa 100644 --- a/tensorflow/python/data/util/options.py +++ b/tensorflow/python/data/util/options.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import collections + def _internal_attr_name(name): return "_" + name @@ -56,6 +58,12 @@ class OptionsBase(object): "Cannot set the property %s on %s." % (name, type(self).__name__)) +# Creates a namedtuple with three keys for optimization graph rewrites settings. +def graph_rewrites(): + return collections.namedtuple("GraphRewrites", + ["enabled", "disabled", "default"]) + + def create_option(name, ty, docstring, default_factory=lambda: None): """Creates a type-checked property. diff --git a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt index 4ad0c0d4448..3c47a392b7e 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt @@ -2660,6 +2660,10 @@ tf_module { name: "OptimizeDataset" argspec: "args=[\'input_dataset\', \'optimizations\', \'output_types\', \'output_shapes\', \'optimization_configs\', \'name\'], varargs=None, keywords=None, defaults=[\'[]\', \'None\'], " } + member_method { + name: "OptimizeDatasetV2" + argspec: "args=[\'input_dataset\', \'optimizations_enabled\', \'optimizations_disabled\', \'optimizations_default\', \'output_types\', \'output_shapes\', \'optimization_configs\', \'name\'], varargs=None, keywords=None, defaults=[\'[]\', \'None\'], " + } member_method { name: "OptionalFromValue" argspec: "args=[\'components\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt index 4ad0c0d4448..3c47a392b7e 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt @@ -2660,6 +2660,10 @@ tf_module { name: "OptimizeDataset" argspec: "args=[\'input_dataset\', \'optimizations\', \'output_types\', \'output_shapes\', \'optimization_configs\', \'name\'], varargs=None, keywords=None, defaults=[\'[]\', \'None\'], " } + member_method { + name: "OptimizeDatasetV2" + argspec: "args=[\'input_dataset\', \'optimizations_enabled\', \'optimizations_disabled\', \'optimizations_default\', \'output_types\', \'output_shapes\', \'optimization_configs\', \'name\'], varargs=None, keywords=None, defaults=[\'[]\', \'None\'], " + } member_method { name: "OptionalFromValue" argspec: "args=[\'components\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " From ac84c5eb812097f1658170b60394db2a6b247d9c Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Mon, 27 Jul 2020 22:24:59 -0700 Subject: [PATCH 1458/2522] Port the pooling op to the new TfLiteEvalTensor API. PiperOrigin-RevId: 323510133 Change-Id: Ie1510dc747f5922b29961cccd1e1d3d707d8bf55 --- tensorflow/lite/micro/kernels/BUILD | 1 + tensorflow/lite/micro/kernels/pooling.cc | 159 +++++++++------- tensorflow/lite/micro/kernels/pooling_test.cc | 174 +++++------------- .../lite/micro/memory_arena_threshold_test.cc | 8 +- 4 files changed, 145 insertions(+), 197 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 0e4289073ec..f74ba5e3865 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -147,6 +147,7 @@ tflite_micro_cc_test( "pooling_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:op_resolvers", "//tensorflow/lite/micro/testing:micro_test", diff --git a/tensorflow/lite/micro/kernels/pooling.cc b/tensorflow/lite/micro/kernels/pooling.cc index 6d8e61ed755..90d48aaee5a 100644 --- a/tensorflow/lite/micro/kernels/pooling.cc +++ b/tensorflow/lite/micro/kernels/pooling.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -32,6 +33,10 @@ constexpr int kOutputTensor = 0; struct OpData { TfLitePaddingValues padding; + int32_t activation_min; + int32_t activation_max; + float activation_min_f32; + float activation_max_f32; }; TfLiteStatus CalculateOpData(const TfLiteContext* context, @@ -55,11 +60,7 @@ TfLiteStatus CalculateOpData(const TfLiteContext* context, void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node, const TfLitePoolParams* params, const OpData* data, - const TfLiteTensor* input, TfLiteTensor* output) { - float activation_min, activation_max; - CalculateActivationRange(params->activation, &activation_min, - &activation_max); - + const TfLiteEvalTensor* input, TfLiteEvalTensor* output) { PoolParams op_params; op_params.stride_height = params->stride_height; op_params.stride_width = params->stride_width; @@ -67,20 +68,19 @@ void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node, op_params.filter_width = params->filter_width; op_params.padding_values.height = data->padding.height; op_params.padding_values.width = data->padding.width; - op_params.float_activation_min = activation_min; - op_params.float_activation_max = activation_max; - reference_ops::AveragePool( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + op_params.float_activation_min = data->activation_min_f32; + op_params.float_activation_max = data->activation_max_f32; + reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node, const TfLitePoolParams* params, const OpData* data, - const TfLiteTensor* input, TfLiteTensor* output) { + const TfLiteEvalTensor* input, + TfLiteEvalTensor* output) { TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8); - int32_t activation_min, activation_max; - (void)CalculateActivationRangeQuantized(context, params->activation, output, - &activation_min, &activation_max); PoolParams op_params; op_params.stride_height = params->stride_height; @@ -89,27 +89,26 @@ void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node, op_params.filter_width = params->filter_width; op_params.padding_values.height = data->padding.height; op_params.padding_values.width = data->padding.width; - op_params.quantized_activation_min = activation_min; - op_params.quantized_activation_max = activation_max; + op_params.quantized_activation_min = data->activation_min; + op_params.quantized_activation_max = data->activation_max; if (input->type == kTfLiteUInt8) { - reference_ops::AveragePool( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } else { reference_integer_ops::AveragePool( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } } void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node, - TfLitePoolParams* params, OpData* data, - const TfLiteTensor* input, TfLiteTensor* output) { - float activation_min, activation_max; - CalculateActivationRange(params->activation, &activation_min, - &activation_max); - + TfLitePoolParams* params, const OpData* data, + const TfLiteEvalTensor* input, TfLiteEvalTensor* output) { tflite::PoolParams op_params; op_params.stride_height = params->stride_height; op_params.stride_width = params->stride_width; @@ -117,22 +116,17 @@ void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node, op_params.filter_width = params->filter_width; op_params.padding_values.height = data->padding.height; op_params.padding_values.width = data->padding.width; - op_params.float_activation_min = activation_min; - op_params.float_activation_max = activation_max; - reference_ops::MaxPool(op_params, GetTensorShape(input), - GetTensorData(input), GetTensorShape(output), - GetTensorData(output)); + op_params.float_activation_min = data->activation_min_f32; + op_params.float_activation_max = data->activation_max_f32; + reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node, - TfLitePoolParams* params, OpData* data, - const TfLiteTensor* input, TfLiteTensor* output) { - TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8); - - int32_t activation_min, activation_max; - (void)CalculateActivationRangeQuantized(context, params->activation, output, - &activation_min, &activation_max); - + TfLitePoolParams* params, const OpData* data, + const TfLiteEvalTensor* input, TfLiteEvalTensor* output) { tflite::PoolParams op_params; op_params.stride_height = params->stride_height; op_params.stride_width = params->stride_width; @@ -140,39 +134,44 @@ void MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node, op_params.filter_width = params->filter_width; op_params.padding_values.height = data->padding.height; op_params.padding_values.width = data->padding.width; - op_params.quantized_activation_min = activation_min; - op_params.quantized_activation_max = activation_max; + op_params.quantized_activation_min = data->activation_min; + op_params.quantized_activation_max = data->activation_max; if (input->type == kTfLiteUInt8) { - reference_ops::MaxPool( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } else { reference_integer_ops::MaxPool( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } } } // namespace - TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); auto* params = reinterpret_cast(node->builtin_data); - OpData data; - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); - TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data)); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); // Inputs and outputs share the same type, guaranteed by the converter. switch (input->type) { case kTfLiteFloat32: - AverageEvalFloat(context, node, params, &data, input, output); + AverageEvalFloat(context, node, params, data, input, output); break; case kTfLiteUInt8: case kTfLiteInt8: - AverageEvalQuantized(context, node, params, &data, input, output); + AverageEvalQuantized(context, node, params, data, input, output); break; default: TF_LITE_KERNEL_LOG(context, "Input type %s is not currently supported", @@ -183,21 +182,24 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) { } TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); auto* params = reinterpret_cast(node->builtin_data); - OpData data; - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); - TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data)); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); switch (input->type) { case kTfLiteFloat32: - MaxEvalFloat(context, node, params, &data, input, output); + MaxEvalFloat(context, node, params, data, input, output); break; case kTfLiteUInt8: case kTfLiteInt8: - MaxEvalQuantized(context, node, params, &data, input, output); + MaxEvalQuantized(context, node, params, data, input, output); break; default: TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.", @@ -207,12 +209,41 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + OpData* data = static_cast(node->user_data); + + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data)); + + if (input->type == kTfLiteFloat32) { + CalculateActivationRange(params->activation, &data->activation_min_f32, + &data->activation_max_f32); + } else if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) { + CalculateActivationRangeQuantized(context, params->activation, output, + &data->activation_min, + &data->activation_max); + } + + return kTfLiteOk; +} + } // namespace pooling TfLiteRegistration Register_AVERAGE_POOL_2D() { - return {/*init=*/nullptr, + return {/*init=*/pooling::Init, /*free=*/nullptr, - /*prepare=*/nullptr, + /*prepare=*/pooling::Prepare, /*invoke=*/pooling::AverageEval, /*profiling_string=*/nullptr, /*builtin_code=*/0, @@ -221,9 +252,9 @@ TfLiteRegistration Register_AVERAGE_POOL_2D() { } TfLiteRegistration Register_MAX_POOL_2D() { - return {/*init=*/nullptr, + return {/*init=*/pooling::Init, /*free=*/nullptr, - /*prepare=*/nullptr, + /*prepare=*/pooling::Prepare, /*invoke=*/pooling::MaxEval, /*profiling_string=*/nullptr, /*builtin_code=*/0, diff --git a/tensorflow/lite/micro/kernels/pooling_test.cc b/tensorflow/lite/micro/kernels/pooling_test.cc index 73d5d80de7c..ec5eb47d0da 100644 --- a/tensorflow/lite/micro/kernels/pooling_test.cc +++ b/tensorflow/lite/micro/kernels/pooling_test.cc @@ -17,7 +17,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -46,13 +46,10 @@ void TestAveragePoolingFloat(std::initializer_list input_dims_data, CreateFloatTensor(output_data, output_dims), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_AVERAGE_POOL_2D); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + int inputs_array_data[] = {1, 0}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 1}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); TfLitePoolParams builtin_data = {padding, stride_width, @@ -61,33 +58,15 @@ void TestAveragePoolingFloat(std::initializer_list input_dims_data, filter_height, activation, {}}; - const char* init_data = reinterpret_cast(&builtin_data); - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } - int inputs_array_data[] = {1, 0}; - TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - int outputs_array_data[] = {1, 1}; - TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = + tflite::ops::micro::Register_AVERAGE_POOL_2D(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output_dims_count; ++i) { TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], @@ -119,13 +98,10 @@ void TestAveragePoolingQuantized( CreateQuantizedTensor(output_data, output_dims, output_min, output_max), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_AVERAGE_POOL_2D); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + int inputs_array_data[] = {1, 0}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 1}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); TfLitePoolParams builtin_data = {padding, stride_width, @@ -134,33 +110,15 @@ void TestAveragePoolingQuantized( filter_height, activation, {}}; - const char* init_data = reinterpret_cast(&builtin_data); - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } - int inputs_array_data[] = {1, 0}; - TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - int outputs_array_data[] = {1, 1}; - TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + const TfLiteRegistration registration = + tflite::ops::micro::Register_AVERAGE_POOL_2D(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output_dims_count; ++i) { TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], @@ -187,13 +145,10 @@ void TestMaxPoolFloat(std::initializer_list input_dims_data, CreateFloatTensor(output_data, output_dims), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_MAX_POOL_2D); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + int inputs_array_data[] = {1, 0}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 1}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); TfLitePoolParams builtin_data = {padding, stride_width, @@ -203,33 +158,15 @@ void TestMaxPoolFloat(std::initializer_list input_dims_data, activation, {}}; - const char* init_data = reinterpret_cast(&builtin_data); - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } + const TfLiteRegistration registration = + tflite::ops::micro::Register_MAX_POOL_2D(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), micro_test::reporter); - int inputs_array_data[] = {1, 0}; - TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - int outputs_array_data[] = {1, 1}; - TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } for (int i = 0; i < output_dims_count; ++i) { TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], 1e-5f); @@ -260,13 +197,10 @@ void TestMaxPoolQuantized(std::initializer_list input_dims_data, CreateQuantizedTensor(output_data, output_dims, output_min, output_max), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_MAX_POOL_2D); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); + int inputs_array_data[] = {1, 0}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 1}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); TfLitePoolParams builtin_data = {padding, stride_width, @@ -276,33 +210,15 @@ void TestMaxPoolQuantized(std::initializer_list input_dims_data, activation, {}}; - const char* init_data = reinterpret_cast(&builtin_data); - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } + const TfLiteRegistration registration = + tflite::ops::micro::Register_MAX_POOL_2D(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), micro_test::reporter); - int inputs_array_data[] = {1, 0}; - TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - int outputs_array_data[] = {1, 1}; - TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = reinterpret_cast(&builtin_data); - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } for (int i = 0; i < output_dims_count; ++i) { TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]); } diff --git a/tensorflow/lite/micro/memory_arena_threshold_test.cc b/tensorflow/lite/micro/memory_arena_threshold_test.cc index d783f3a970e..d2bb404051a 100644 --- a/tensorflow/lite/micro/memory_arena_threshold_test.cc +++ b/tensorflow/lite/micro/memory_arena_threshold_test.cc @@ -68,11 +68,11 @@ constexpr int kTestConvModelNodeAndRegistrationCount = 7; // NOTE: These values are measured on x86-64: // TODO(b/158651472): Consider auditing these values on non-64 bit systems. #ifdef TF_LITE_STATIC_MEMORY -constexpr int kTestConvModelTotalSize = 9488; -constexpr int kTestConvModelTailSize = 1744; +constexpr int kTestConvModelTotalSize = 9552; +constexpr int kTestConvModelTailSize = 1808; #else -constexpr int kTestConvModelTotalSize = 9648; -constexpr int kTestConvModelTailSize = 1904; +constexpr int kTestConvModelTotalSize = 9712; +constexpr int kTestConvModelTailSize = 1968; #endif constexpr int kTestConvModelHeadSize = 7744; constexpr int kTestConvModelOpRuntimeDataSize = 136; From c8dcee2b650ee92d15c46ab59afa946d49235281 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 27 Jul 2020 22:59:11 -0700 Subject: [PATCH 1459/2522] Tweak TPU computation placer to use the TPU C API PiperOrigin-RevId: 323514200 Change-Id: Idab6bc18a23b98c95e03d91a2ab00b818cb620a2 --- tensorflow/core/tpu/BUILD | 1 + tensorflow/stream_executor/tpu/BUILD | 1 + tensorflow/stream_executor/tpu/tpu_computation_placer.cc | 5 +++-- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/tpu/BUILD b/tensorflow/core/tpu/BUILD index 62d411c0a67..30a90c1da6c 100644 --- a/tensorflow/core/tpu/BUILD +++ b/tensorflow/core/tpu/BUILD @@ -154,6 +154,7 @@ cc_library( "//tensorflow/core/tpu/kernels:tpu_execute_c_api_hdrs", "//tensorflow/core/tpu/kernels:tpu_mesh_state_c_api_hdrs", "//tensorflow/core/tpu/kernels:tpu_util_c_api_hdrs", + "//tensorflow/stream_executor/tpu:tpu_computation_placer", "//tensorflow/stream_executor/tpu:tpu_executor_c_api_hdrs", "//tensorflow/stream_executor/tpu:tpu_node_context_c_api_hdrs", ], diff --git a/tensorflow/stream_executor/tpu/BUILD b/tensorflow/stream_executor/tpu/BUILD index 813c0fa87a9..1e5063f31f8 100644 --- a/tensorflow/stream_executor/tpu/BUILD +++ b/tensorflow/stream_executor/tpu/BUILD @@ -229,6 +229,7 @@ cc_library( ":tpu_executor_c_api_hdrs", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla/service:computation_placer", + "//tensorflow/core/tpu:tpu_api", ], alwayslink = True, ) diff --git a/tensorflow/stream_executor/tpu/tpu_computation_placer.cc b/tensorflow/stream_executor/tpu/tpu_computation_placer.cc index 660b446d953..9d8aa3808b3 100644 --- a/tensorflow/stream_executor/tpu/tpu_computation_placer.cc +++ b/tensorflow/stream_executor/tpu/tpu_computation_placer.cc @@ -15,17 +15,18 @@ limitations under the License. #include "tensorflow/stream_executor/tpu/tpu_computation_placer.h" +#include "tensorflow/core/tpu/tpu_api.h" #include "tensorflow/stream_executor/tpu/tpu_platform.h" template using StatusOr = TpuComputationPlacer::StatusOr; TpuComputationPlacer::TpuComputationPlacer() { - placer_ = TpuComputationPlacer_New(); + placer_ = tensorflow::tpu::ExecutorApiFn()->TpuComputationPlacer_NewFn(); } TpuComputationPlacer::~TpuComputationPlacer() { - TpuComputationPlacer_Free(placer_); + tensorflow::tpu::ExecutorApiFn()->TpuComputationPlacer_FreeFn(placer_); } StatusOr TpuComputationPlacer::DeviceId(int replica, int computation, From e8029af3b2a9370acadd84270b41f75c4eb74f57 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 23:17:54 -0700 Subject: [PATCH 1460/2522] Update ops-related pbtxt files. PiperOrigin-RevId: 323515788 Change-Id: I5f1f509c2608be714a5951bccd447b73bd1b77e9 --- .../ops_history_v2/OptimizeDatasetV2.pbtxt | 43 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 43 +++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 tensorflow/core/ops/compat/ops_history_v2/OptimizeDatasetV2.pbtxt diff --git a/tensorflow/core/ops/compat/ops_history_v2/OptimizeDatasetV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/OptimizeDatasetV2.pbtxt new file mode 100644 index 00000000000..ee43df5bfd7 --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/OptimizeDatasetV2.pbtxt @@ -0,0 +1,43 @@ +op { + name: "OptimizeDatasetV2" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "optimizations_enabled" + type: DT_STRING + } + input_arg { + name: "optimizations_disabled" + type: DT_STRING + } + input_arg { + name: "optimizations_default" + type: DT_STRING + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + attr { + name: "optimization_configs" + type: "list(string)" + default_value { + list { + } + } + } +} diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index fed598bdef4..a9a94580d86 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -26317,6 +26317,49 @@ op { } } } +op { + name: "OptimizeDatasetV2" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "optimizations_enabled" + type: DT_STRING + } + input_arg { + name: "optimizations_disabled" + type: DT_STRING + } + input_arg { + name: "optimizations_default" + type: DT_STRING + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + attr { + name: "optimization_configs" + type: "list(string)" + default_value { + list { + } + } + } +} op { name: "OptionalFromValue" input_arg { From 22b60f146e2315878df04ec37232ba0ec7ae0eed Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Mon, 27 Jul 2020 23:26:39 -0700 Subject: [PATCH 1461/2522] Update tf._FusedMatMul to support BF16 element type. Updated _FusedMatMul op definition in TensorFlow op registry to always support bf16 regardless of build. PiperOrigin-RevId: 323516484 Change-Id: Idba319a22f2042beec5432cf696dd63207edb0dd --- .../mlir/tensorflow/ir/tf_generated_ops.td | 10 +- .../core/grappler/optimizers/remapper_test.cc | 257 ++++++++++-------- .../core/grappler/utils/grappler_test.h | 9 + tensorflow/core/ops/math_ops.cc | 4 - 4 files changed, 159 insertions(+), 121 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index e43e35b72ba..0fe8dd647a7 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -11661,7 +11661,7 @@ create these operators. TF_DerivedOperandSizeAttr num_args = TF_DerivedOperandSizeAttr<2>; } -def TF__FusedMatMulOp : TF_Op<"_FusedMatMul", [NoSideEffect]> { +def TF__FusedMatMulOp : TF_Op<"_FusedMatMul", [NoSideEffect, SameOperandsAndResultElementType]> { let summary = [{ Performs a MatMul followed by a specified series of operations. }]; @@ -11687,9 +11687,9 @@ expected to create these operators. }]; let arguments = (ins - F32Tensor:$a, - F32Tensor:$b, - Variadic:$args, + TensorOf<[BF16, F32]>:$a, + TensorOf<[BF16, F32]>:$b, + Variadic>:$args, DefaultValuedAttr:$transpose_a, DefaultValuedAttr:$transpose_b, @@ -11698,7 +11698,7 @@ expected to create these operators. ); let results = (outs - F32Tensor:$product + TensorOf<[BF16, F32]>:$product ); TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; diff --git a/tensorflow/core/grappler/optimizers/remapper_test.cc b/tensorflow/core/grappler/optimizers/remapper_test.cc index da984a4fa19..f4bc5e38526 100644 --- a/tensorflow/core/grappler/optimizers/remapper_test.cc +++ b/tensorflow/core/grappler/optimizers/remapper_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/grappler/devices.h" #include "tensorflow/core/grappler/grappler_item.h" @@ -388,65 +389,80 @@ TEST_F(RemapperTest, FuseConv2DWithBias) { test::ExpectTensorNear(tensors[0], tensors_expected[0], 1e-6); } -TEST_F(RemapperTest, FuseMatMulWithBias) { - using ::tensorflow::ops::Placeholder; +class RemapperFuseMatMulWithBiasTest : public RemapperTest { + public: + template + void RunTest() { + using ::tensorflow::ops::Placeholder; - tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - auto lhs_shape = ops::Placeholder::Shape({8, 32}); - auto rhs_shape = ops::Placeholder::Shape({32, 64}); - auto bias_shape = ops::Placeholder::Shape({64}); + auto lhs_shape = ops::Placeholder::Shape({8, 32}); + auto rhs_shape = ops::Placeholder::Shape({32, 64}); + auto bias_shape = ops::Placeholder::Shape({64}); - auto lhs = Placeholder(s.WithOpName("lhs"), DT_FLOAT, lhs_shape); - auto rhs = Placeholder(s.WithOpName("rhs"), DT_FLOAT, rhs_shape); - auto bias = Placeholder(s.WithOpName("bias"), DT_FLOAT, bias_shape); + auto lhs = Placeholder(s.WithOpName("lhs"), DTYPE, lhs_shape); + auto rhs = Placeholder(s.WithOpName("rhs"), DTYPE, rhs_shape); + auto bias = Placeholder(s.WithOpName("bias"), DTYPE, bias_shape); - auto matmul = ops::MatMul(s.WithOpName("matmul"), lhs, rhs); - auto bias_add = ops::BiasAdd(s.WithOpName("bias_add"), matmul, bias); - auto fetch = ops::Identity(s.WithOpName("fetch"), bias_add); + auto matmul = ops::MatMul(s.WithOpName("matmul"), lhs, rhs); + auto bias_add = ops::BiasAdd(s.WithOpName("bias_add"), matmul, bias); + auto fetch = ops::Identity(s.WithOpName("fetch"), bias_add); - auto lhs_t = GenerateRandomTensor({8, 32}); - auto rhs_t = GenerateRandomTensor({32, 64}); - auto bias_t = GenerateRandomTensor({64}); + auto lhs_t = GenerateTensorWithSetRandom({8, 32}); + auto rhs_t = GenerateTensorWithSetRandom({32, 64}); + auto bias_t = GenerateTensorWithSetRandom({64}); - GrapplerItem item; - item.fetch = {"fetch"}; - item.feed = {{"lhs", lhs_t}, {"rhs", rhs_t}, {"bias", bias_t}}; - TF_ASSERT_OK(s.ToGraphDef(&item.graph)); + GrapplerItem item; + item.fetch = {"fetch"}; + item.feed = {{"lhs", lhs_t}, {"rhs", rhs_t}, {"bias", bias_t}}; + TF_ASSERT_OK(s.ToGraphDef(&item.graph)); - // Place all nodes on CPU. - for (int i = 0; i < item.graph.node_size(); ++i) { - item.graph.mutable_node(i)->set_device("/device:CPU:0"); - } - - Remapper optimizer(RewriterConfig::ON); - GraphDef output; - TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); - - int found = 0; - for (const NodeDef& node : output.node()) { - if (node.name() == "bias_add") { - EXPECT_EQ(node.op(), "_FusedMatMul"); - ASSERT_GE(node.input_size(), 3); - EXPECT_EQ(node.input(0), "lhs"); - EXPECT_EQ(node.input(1), "rhs"); - - EXPECT_EQ(node.attr().at("num_args").i(), 1); - EXPECT_EQ(node.input(2), "bias"); - - const auto fused_ops = node.attr().at("fused_ops").list().s(); - ASSERT_EQ(fused_ops.size(), 1); - EXPECT_EQ(fused_ops[0], "BiasAdd"); - found++; + // Place all nodes on CPU. + for (int i = 0; i < item.graph.node_size(); ++i) { + item.graph.mutable_node(i)->set_device("/device:CPU:0"); } - } - EXPECT_EQ(1, found); - auto tensors_expected = EvaluateNodes(item.graph, item.fetch, item.feed); - ASSERT_EQ(tensors_expected.size(), 1); - auto tensors = EvaluateNodes(output, item.fetch, item.feed); - ASSERT_EQ(tensors.size(), 1); - test::ExpectTensorNear(tensors[0], tensors_expected[0], 1e-6); + Remapper optimizer(RewriterConfig::ON); + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); + + int found = 0; + for (const NodeDef& node : output.node()) { + if (node.name() == "bias_add") { + EXPECT_EQ(node.op(), "_FusedMatMul"); + ASSERT_GE(node.input_size(), 3); + EXPECT_EQ(node.input(0), "lhs"); + EXPECT_EQ(node.input(1), "rhs"); + + EXPECT_EQ(node.attr().at("num_args").i(), 1); + EXPECT_EQ(node.input(2), "bias"); + + const auto fused_ops = node.attr().at("fused_ops").list().s(); + ASSERT_EQ(fused_ops.size(), 1); + EXPECT_EQ(fused_ops[0], "BiasAdd"); + found++; + } + } + EXPECT_EQ(1, found); + + auto tensors_expected = EvaluateNodes(item.graph, item.fetch, item.feed); + ASSERT_EQ(tensors_expected.size(), 1); + auto tensors = EvaluateNodes(output, item.fetch, item.feed); + ASSERT_EQ(tensors.size(), 1); + typedef typename EnumToDataType::Type T; + test::ExpectTensorNear(tensors[0], tensors_expected[0], 1e-6); + } +}; + +TEST_F(RemapperFuseMatMulWithBiasTest, F32) { RunTest(); } + +TEST_F(RemapperFuseMatMulWithBiasTest, Bf16) { +#if !defined(INTEL_MKL) || !defined(ENABLE_INTEL_MKL_BFLOAT16) + GTEST_SKIP() << "Intel MKL with bfloat16 support is not enabled, skipping " + "FuseMatMulWithBias with bfloat16."; +#endif // !defined(INTEL_MKL) || !defined(ENABLE_INTEL_MKL_BFLOAT16) + RunTest(); // NOLINT } // TODO(b/161005848): Fix flaky test. @@ -602,82 +618,99 @@ TEST_F(RemapperTest, FuseConv2DWithBiasAndActivation) { } } -TEST_F(RemapperTest, FuseMatMulWithBiasAndActivation) { - using ::tensorflow::ops::Placeholder; +class RemapperFuseMatMulWithBiasAndActivationTest : public RemapperTest { + public: + template + void RunTest() { + using ::tensorflow::ops::Placeholder; - for (const string& activation : {"Relu", "Relu6", "Elu"}) { - tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + for (const string& activation : {"Relu", "Relu6", "Elu"}) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - auto lhs_shape = ops::Placeholder::Shape({8, 32}); - auto rhs_shape = ops::Placeholder::Shape({32, 64}); - auto bias_shape = ops::Placeholder::Shape({64}); + auto lhs_shape = ops::Placeholder::Shape({8, 32}); + auto rhs_shape = ops::Placeholder::Shape({32, 64}); + auto bias_shape = ops::Placeholder::Shape({64}); - auto lhs = Placeholder(s.WithOpName("lhs"), DT_FLOAT, lhs_shape); - auto rhs = Placeholder(s.WithOpName("rhs"), DT_FLOAT, rhs_shape); - auto bias = Placeholder(s.WithOpName("bias"), DT_FLOAT, bias_shape); + auto lhs = Placeholder(s.WithOpName("lhs"), DTYPE, lhs_shape); + auto rhs = Placeholder(s.WithOpName("rhs"), DTYPE, rhs_shape); + auto bias = Placeholder(s.WithOpName("bias"), DTYPE, bias_shape); - auto matmul = ops::MatMul(s.WithOpName("matmul"), lhs, rhs); - auto bias_add = ops::BiasAdd(s.WithOpName("bias_add"), matmul, bias); + auto matmul = ops::MatMul(s.WithOpName("matmul"), lhs, rhs); + auto bias_add = ops::BiasAdd(s.WithOpName("bias_add"), matmul, bias); - ops::Identity fetch = [&]() -> ops::Identity { - auto activate = s.WithOpName("activation"); - auto fetch = s.WithOpName("fetch"); + ops::Identity fetch = [&]() -> ops::Identity { + auto activate = s.WithOpName("activation"); + auto fetch = s.WithOpName("fetch"); - if (activation == "Relu") { - return ops::Identity(fetch, ops::Relu(activate, bias_add)); - } else if (activation == "Relu6") { - return ops::Identity(fetch, ops::Relu6(activate, bias_add)); - } else if (activation == "Elu") { - return ops::Identity(fetch, ops::Elu(activate, bias_add)); + if (activation == "Relu") { + return ops::Identity(fetch, ops::Relu(activate, bias_add)); + } else if (activation == "Relu6") { + return ops::Identity(fetch, ops::Relu6(activate, bias_add)); + } else if (activation == "Elu") { + return ops::Identity(fetch, ops::Elu(activate, bias_add)); + } + + return ops::Identity(fetch, bias); + }(); + + auto lhs_t = GenerateTensorWithSetRandom({8, 32}); + auto rhs_t = GenerateTensorWithSetRandom({32, 64}); + auto bias_t = GenerateTensorWithSetRandom({64}); + + GrapplerItem item; + item.fetch = {"fetch"}; + item.feed = {{"lhs", lhs_t}, {"rhs", rhs_t}, {"bias", bias_t}}; + TF_ASSERT_OK(s.ToGraphDef(&item.graph)); + + // Place all nodes on CPU. + for (int i = 0; i < item.graph.node_size(); ++i) { + item.graph.mutable_node(i)->set_device("/device:CPU:0"); } - return ops::Identity(fetch, bias); - }(); + Remapper optimizer(RewriterConfig::ON); + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); - auto lhs_t = GenerateRandomTensor({8, 32}); - auto rhs_t = GenerateRandomTensor({32, 64}); - auto bias_t = GenerateRandomTensor({64}); + int found = 0; + for (const NodeDef& node : output.node()) { + if (node.name() == "activation") { + EXPECT_EQ(node.op(), "_FusedMatMul"); + ASSERT_GE(node.input_size(), 3); + EXPECT_EQ(node.input(0), "lhs"); + EXPECT_EQ(node.input(1), "rhs"); - GrapplerItem item; - item.fetch = {"fetch"}; - item.feed = {{"lhs", lhs_t}, {"rhs", rhs_t}, {"bias", bias_t}}; - TF_ASSERT_OK(s.ToGraphDef(&item.graph)); + EXPECT_EQ(node.attr().at("num_args").i(), 1); + EXPECT_EQ(node.input(2), "bias"); - // Place all nodes on CPU. - for (int i = 0; i < item.graph.node_size(); ++i) { - item.graph.mutable_node(i)->set_device("/device:CPU:0"); - } - - Remapper optimizer(RewriterConfig::ON); - GraphDef output; - TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); - - int found = 0; - for (const NodeDef& node : output.node()) { - if (node.name() == "activation") { - EXPECT_EQ(node.op(), "_FusedMatMul"); - ASSERT_GE(node.input_size(), 3); - EXPECT_EQ(node.input(0), "lhs"); - EXPECT_EQ(node.input(1), "rhs"); - - EXPECT_EQ(node.attr().at("num_args").i(), 1); - EXPECT_EQ(node.input(2), "bias"); - - const auto fused_ops = node.attr().at("fused_ops").list().s(); - ASSERT_EQ(fused_ops.size(), 2); - EXPECT_EQ(fused_ops[0], "BiasAdd"); - EXPECT_EQ(fused_ops[1], activation); - found++; + const auto fused_ops = node.attr().at("fused_ops").list().s(); + ASSERT_EQ(fused_ops.size(), 2); + EXPECT_EQ(fused_ops[0], "BiasAdd"); + EXPECT_EQ(fused_ops[1], activation); + found++; + } } - } - EXPECT_EQ(1, found); + EXPECT_EQ(1, found); - auto tensors_expected = EvaluateNodes(item.graph, item.fetch, item.feed); - ASSERT_EQ(tensors_expected.size(), 1); - auto tensors = EvaluateNodes(output, item.fetch, item.feed); - ASSERT_EQ(tensors.size(), 1); - test::ExpectTensorNear(tensors[0], tensors_expected[0], 1e-6); + auto tensors_expected = EvaluateNodes(item.graph, item.fetch, item.feed); + ASSERT_EQ(tensors_expected.size(), 1); + auto tensors = EvaluateNodes(output, item.fetch, item.feed); + ASSERT_EQ(tensors.size(), 1); + typedef typename EnumToDataType::Type T; + test::ExpectTensorNear(tensors[0], tensors_expected[0], 1e-6); + } } +}; + +TEST_F(RemapperFuseMatMulWithBiasAndActivationTest, F32) { + RunTest(); +} + +TEST_F(RemapperFuseMatMulWithBiasAndActivationTest, Bf16) { +#if !defined(INTEL_MKL) || !defined(ENABLE_INTEL_MKL_BFLOAT16) + GTEST_SKIP() << "Intel MKL with bfloat16 support is not enabled, skipping " + "FuseMatMulWithBiasAndActivation with bfloat16."; +#endif // !defined(INTEL_MKL) || !defined(ENABLE_INTEL_MKL_BFLOAT16) + RunTest(); // NOLINT } #ifndef INTEL_MKL diff --git a/tensorflow/core/grappler/utils/grappler_test.h b/tensorflow/core/grappler/utils/grappler_test.h index 7ac70356f2c..9225f9172e8 100644 --- a/tensorflow/core/grappler/utils/grappler_test.h +++ b/tensorflow/core/grappler/utils/grappler_test.h @@ -89,6 +89,15 @@ class GrapplerTest : public ::testing::Test { return tensor; } + // Creates a random tensor with given shape using `setRandom`. + template + Tensor GenerateTensorWithSetRandom(const TensorShape& shape) const { + typedef typename EnumToDataType::Type T; + Tensor tensor(DTYPE, shape); + tensor.flat().setRandom(); + return tensor; + } + // Get a constant tensor with given shape. template Tensor GenerateConstantTensor( diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 99be4e2fcd8..54d8a6add77 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -952,11 +952,7 @@ REGISTER_OP("_FusedMatMul") .Output("product: T") .Attr("transpose_a: bool = false") .Attr("transpose_b: bool = false") -#if defined(INTEL_MKL) && defined(ENABLE_INTEL_MKL_BFLOAT16) .Attr("T: {bfloat16, float}") -#else - .Attr("T: {float}") -#endif .Attr("num_args: int >= 0") .Attr("fused_ops: list(string) = []") // Attributes for the FusedBatchNorm ----------- // From fe94fda399eb73b9fbad75a8934f998af912c650 Mon Sep 17 00:00:00 2001 From: Hemal Mamtora Date: Tue, 28 Jul 2020 12:19:17 +0530 Subject: [PATCH 1462/2522] Corrected citation: Graves et. al -2016 -> +2006 --- tensorflow/python/ops/ctc_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/ctc_ops.py b/tensorflow/python/ops/ctc_ops.py index a33d9a1de1f..75d88cfe44f 100644 --- a/tensorflow/python/ops/ctc_ops.py +++ b/tensorflow/python/ops/ctc_ops.py @@ -842,7 +842,7 @@ def ctc_loss_v3(labels, name=None): """Computes CTC (Connectionist Temporal Classification) loss. - This op implements the CTC loss as presented in (Graves et al., 2016). + This op implements the CTC loss as presented in (Graves et al., 2006). Notes: @@ -882,7 +882,7 @@ def ctc_loss_v3(labels, References: Connectionist Temporal Classification - Labeling Unsegmented Sequence Data with Recurrent Neural Networks: - [Graves et al., 2016](https://dl.acm.org/citation.cfm?id=1143891) + [Graves et al., 2006](https://dl.acm.org/citation.cfm?id=1143891) ([pdf](http://www.cs.toronto.edu/~graves/icml_2006.pdf)) """ if isinstance(labels, sparse_tensor.SparseTensor): From e544dce3a3a43631811e0760db5c33fe0a7519ba Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 23:45:56 -0700 Subject: [PATCH 1463/2522] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 323518068 Change-Id: Ieffc5d5fb0ecefd88e010e46c971674a966185d7 --- tensorflow/go/op/wrappers.go | 41 ++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 687bc4ced33..9da46e94adb 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -8345,6 +8345,47 @@ func OptionalFromValue(scope *Scope, components []tf.Output) (optional tf.Output return op.Output(0) } +// OptimizeDatasetV2Attr is an optional argument to OptimizeDatasetV2. +type OptimizeDatasetV2Attr func(optionalAttr) + +// OptimizeDatasetV2OptimizationConfigs sets the optional optimization_configs attribute to value. +// If not specified, defaults to <> +func OptimizeDatasetV2OptimizationConfigs(value []string) OptimizeDatasetV2Attr { + return func(m optionalAttr) { + m["optimization_configs"] = value + } +} + +// Creates a dataset by applying related optimizations to `input_dataset`. +// +// Creates a dataset by applying related optimizations to `input_dataset`. +// +// Arguments: +// input_dataset: A variant tensor representing the input dataset. +// optimizations_enabled: A `tf.string` vector `tf.Tensor` identifying user enabled optimizations. +// optimizations_disabled: A `tf.string` vector `tf.Tensor` identifying user disabled optimizations. +// optimizations_default: A `tf.string` vector `tf.Tensor` identifying optimizations by default. +// +// +func OptimizeDatasetV2(scope *Scope, input_dataset tf.Output, optimizations_enabled tf.Output, optimizations_disabled tf.Output, optimizations_default tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...OptimizeDatasetV2Attr) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "OptimizeDatasetV2", + Input: []tf.Input{ + input_dataset, optimizations_enabled, optimizations_disabled, optimizations_default, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // OptimizeDatasetAttr is an optional argument to OptimizeDataset. type OptimizeDatasetAttr func(optionalAttr) From 32068c58ac27f22798bed95414b238263f5999f2 Mon Sep 17 00:00:00 2001 From: YoungSeok Yoon Date: Tue, 28 Jul 2020 00:12:28 -0700 Subject: [PATCH 1464/2522] Bump the TFLite iOS version number to 2.3.0 PiperOrigin-RevId: 323520599 Change-Id: I7adfdf53d7b87f7e1ad5f604bc1b9badf62db19f --- .../experimental/ios/TensorFlowLiteC.podspec | 23 +++++++-- .../ios/TensorFlowLiteSelectTfOps.podspec | 4 +- .../objc/TensorFlowLiteObjC.podspec | 2 +- .../swift/TensorFlowLiteSwift.podspec | 48 +++++++++++++++---- 4 files changed, 62 insertions(+), 15 deletions(-) diff --git a/tensorflow/lite/experimental/ios/TensorFlowLiteC.podspec b/tensorflow/lite/experimental/ios/TensorFlowLiteC.podspec index f379799c8a9..1b986933f5f 100644 --- a/tensorflow/lite/experimental/ios/TensorFlowLiteC.podspec +++ b/tensorflow/lite/experimental/ios/TensorFlowLiteC.podspec @@ -1,10 +1,10 @@ Pod::Spec.new do |s| s.name = 'TensorFlowLiteC' - s.version = '2.2.0' + s.version = '2.3.0' s.authors = 'Google Inc.' s.license = { :type => 'Apache' } s.homepage = 'https://github.com/tensorflow/tensorflow' - s.source = { :http => "https://dl.google.com/dl/cpdc/b3338da8d8cfd06b/TensorFlowLiteC-#{s.version}.tar.gz" } + s.source = { :http => "https://dl.google.com/dl/cpdc/b03814d8b5a44ad2/TensorFlowLiteC-#{s.version}.tar.gz" } s.summary = 'TensorFlow Lite' s.description = <<-DESC @@ -19,5 +19,22 @@ Pod::Spec.new do |s| s.module_name = 'TensorFlowLiteC' s.library = 'c++' - s.vendored_frameworks = 'Frameworks/TensorFlowLiteC.framework' + + s.default_subspec = 'Core' + + s.subspec 'Core' do |core| + core.vendored_frameworks = 'Frameworks/TensorFlowLiteC.framework' + end + + s.subspec 'CoreML' do |coreml| + coreml.weak_framework = 'CoreML' + coreml.dependency 'TensorFlowLiteC/Core' + coreml.vendored_frameworks = 'Frameworks/TensorFlowLiteCCoreML.framework' + end + + s.subspec 'Metal' do |metal| + metal.weak_framework = 'Metal' + metal.dependency 'TensorFlowLiteC/Core' + metal.vendored_frameworks = 'Frameworks/TensorFlowLiteCMetal.framework' + end end diff --git a/tensorflow/lite/experimental/ios/TensorFlowLiteSelectTfOps.podspec b/tensorflow/lite/experimental/ios/TensorFlowLiteSelectTfOps.podspec index 788630a6d4f..393040b34b4 100644 --- a/tensorflow/lite/experimental/ios/TensorFlowLiteSelectTfOps.podspec +++ b/tensorflow/lite/experimental/ios/TensorFlowLiteSelectTfOps.podspec @@ -1,10 +1,10 @@ Pod::Spec.new do |s| s.name = 'TensorFlowLiteSelectTfOps' - s.version = '2.2.0' + s.version = '2.3.0' s.authors = 'Google Inc.' s.license = { :type => 'Apache' } s.homepage = 'https://github.com/tensorflow/tensorflow' - s.source = { :http => "https://dl.google.com/dl/cpdc/9604b128278441ac/TensorFlowLiteSelectTfOps-2.2.0.tar.gz" } + s.source = { :http => "https://dl.google.com/dl/cpdc/4f626bc24212fd61/TensorFlowLiteSelectTfOps-#{s.version}.tar.gz" } s.summary = 'TensorFlow Lite Select TF Ops' s.description = <<-DESC diff --git a/tensorflow/lite/experimental/objc/TensorFlowLiteObjC.podspec b/tensorflow/lite/experimental/objc/TensorFlowLiteObjC.podspec index 5817619a58f..145cf02a2e6 100644 --- a/tensorflow/lite/experimental/objc/TensorFlowLiteObjC.podspec +++ b/tensorflow/lite/experimental/objc/TensorFlowLiteObjC.podspec @@ -1,6 +1,6 @@ Pod::Spec.new do |s| s.name = 'TensorFlowLiteObjC' - s.version = '2.2.0' + s.version = '2.3.0' s.authors = 'Google Inc.' s.license = { :type => 'Apache' } s.homepage = 'https://github.com/tensorflow/tensorflow' diff --git a/tensorflow/lite/experimental/swift/TensorFlowLiteSwift.podspec b/tensorflow/lite/experimental/swift/TensorFlowLiteSwift.podspec index 679a894c414..8af52ef1a33 100644 --- a/tensorflow/lite/experimental/swift/TensorFlowLiteSwift.podspec +++ b/tensorflow/lite/experimental/swift/TensorFlowLiteSwift.podspec @@ -1,6 +1,6 @@ Pod::Spec.new do |s| s.name = 'TensorFlowLiteSwift' - s.version = '2.2.0' + s.version = '2.3.0' s.authors = 'Google Inc.' s.license = { :type => 'Apache' } s.homepage = 'https://github.com/tensorflow/tensorflow' @@ -20,14 +20,44 @@ Pod::Spec.new do |s| tfl_dir = 'tensorflow/lite/' swift_dir = tfl_dir + 'experimental/swift/' - s.source_files = swift_dir + 'Sources/*.swift' - s.dependency 'TensorFlowLiteC', "#{s.version}" - s.test_spec 'Tests' do |ts| - ts.source_files = swift_dir + 'Tests/*.swift' - ts.resources = [ - tfl_dir + 'testdata/add.bin', - tfl_dir + 'testdata/add_quantized.bin', - ] + tfl_dir = 'tensorflow/lite/' + swift_dir = tfl_dir + 'experimental/swift/' + + s.default_subspec = 'Core' + + s.subspec 'Core' do |core| + core.dependency 'TensorFlowLiteC', "#{s.version}" + core.source_files = swift_dir + 'Sources/*.swift' + core.exclude_files = swift_dir + 'Sources/{CoreML,Metal}Delegate.swift' + + core.test_spec 'Tests' do |ts| + ts.source_files = swift_dir + 'Tests/*.swift' + ts.exclude_files = swift_dir + 'Tests/MetalDelegateTests.swift' + ts.resources = [ + tfl_dir + 'testdata/add.bin', + tfl_dir + 'testdata/add_quantized.bin', + ] + end + end + + s.subspec 'CoreML' do |coreml| + coreml.source_files = swift_dir + 'Sources/CoreMLDelegate.swift' + coreml.dependency 'TensorFlowLiteC/CoreML', "#{s.version}" + coreml.dependency 'TensorFlowLiteSwift/Core', "#{s.version}" + end + + s.subspec 'Metal' do |metal| + metal.source_files = swift_dir + 'Sources/MetalDelegate.swift' + metal.dependency 'TensorFlowLiteC/Metal', "#{s.version}" + metal.dependency 'TensorFlowLiteSwift/Core', "#{s.version}" + + metal.test_spec 'Tests' do |ts| + ts.source_files = swift_dir + 'Tests/{Interpreter,MetalDelegate}Tests.swift' + ts.resources = [ + tfl_dir + 'testdata/add.bin', + tfl_dir + 'testdata/add_quantized.bin', + ] + end end end From 11d919f0a51aeb835230dc3a0a36df10346eb493 Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Tue, 28 Jul 2020 00:13:59 -0700 Subject: [PATCH 1465/2522] Support bool in TensorflowLite java API PiperOrigin-RevId: 323520729 Change-Id: I71a9dc33cd80ce2401ca99928046cc608dc03821 --- tensorflow/lite/java/BUILD | 2 ++ .../java/org/tensorflow/lite/DataType.java | 9 ++++++- .../java/org/tensorflow/lite/Interpreter.java | 4 +++ .../main/java/org/tensorflow/lite/Tensor.java | 4 +++ .../native/nativeinterpreterwrapper_jni.cc | 2 ++ .../lite/java/src/main/native/tensor_jni.cc | 25 +++++++++++++----- .../org/tensorflow/lite/InterpreterTest.java | 24 +++++++++++++++++ .../src/testdata/tile_with_bool_input.bin | Bin 0 -> 596 bytes 8 files changed, 63 insertions(+), 7 deletions(-) create mode 100644 tensorflow/lite/java/src/testdata/tile_with_bool_input.bin diff --git a/tensorflow/lite/java/BUILD b/tensorflow/lite/java/BUILD index e8f9145065a..9bceb939c02 100644 --- a/tensorflow/lite/java/BUILD +++ b/tensorflow/lite/java/BUILD @@ -16,6 +16,7 @@ exports_files([ "src/testdata/add.bin", "src/testdata/add_unknown_dimensions.bin", "src/testdata/grace_hopper_224.jpg", + "src/testdata/tile_with_bool_input.bin", "AndroidManifest.xml", "proguard.flags", "tflite_version_script.lds", @@ -249,6 +250,7 @@ java_test( data = [ "src/testdata/add.bin", "src/testdata/add_unknown_dimensions.bin", + "src/testdata/tile_with_bool_input.bin", "//tensorflow/lite:testdata/dynamic_shapes.bin", "//tensorflow/lite:testdata/multi_add.bin", "//tensorflow/lite:testdata/multi_add_flex.bin", diff --git a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/DataType.java b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/DataType.java index 527346c3c9b..1c4d4176763 100644 --- a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/DataType.java +++ b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/DataType.java @@ -32,6 +32,9 @@ public enum DataType { /** Strings. */ STRING(5), + /** Bool. */ + BOOL(6), + /** 8-bit signed integer. */ INT8(9); @@ -45,7 +48,6 @@ public enum DataType { public int byteSize() { switch (this) { case FLOAT32: - return 4; case INT32: return 4; case INT8: @@ -53,6 +55,9 @@ public enum DataType { return 1; case INT64: return 8; + case BOOL: + // Boolean size is JVM-dependent. + return -1; case STRING: return -1; } @@ -92,6 +97,8 @@ public enum DataType { return "byte"; case INT64: return "long"; + case BOOL: + return "bool"; case STRING: return "string"; } diff --git a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java index 5993ee7a037..59afc0c3608 100644 --- a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java +++ b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java @@ -281,6 +281,8 @@ public final class Interpreter implements AutoCloseable { *
  • {@link LongBuffer} - compatible with int64 Tensors. * * + * Note that boolean types are only supported as arrays, not {@link Buffer}s, or as scalar inputs. + * * @param input an array or multidimensional array, or a {@link Buffer} of primitive types * including int, float, long, and byte. {@link Buffer} is the preferred way to pass large * input data for primitive types, whereas string types require using the (multi-dimensional) @@ -319,6 +321,8 @@ public final class Interpreter implements AutoCloseable { *
  • {@link LongBuffer} - compatible with int64 Tensors. * * + * Note that boolean types are only supported as arrays, not {@link Buffer}s, or as scalar inputs. + * *

    Note: {@code null} values for invididual elements of {@code inputs} and {@code outputs} is * allowed only if the caller is using a {@link Delegate} that allows buffer handle interop, and * such a buffer has been bound to the corresponding input or output {@link Tensor}(s). diff --git a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java index b69ab2072c1..f875c7424c9 100644 --- a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java +++ b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java @@ -323,6 +323,8 @@ public final class Tensor { return DataType.UINT8; } else if (long.class.equals(c)) { return DataType.INT64; + } else if (boolean.class.equals(c)) { + return DataType.BOOL; } else if (String.class.equals(c)) { return DataType.STRING; } @@ -339,6 +341,8 @@ public final class Tensor { return DataType.UINT8; } else if (Long.class.equals(c) || o instanceof LongBuffer) { return DataType.INT64; + } else if (Boolean.class.equals(c)) { + return DataType.BOOL; } else if (String.class.equals(c)) { return DataType.STRING; } diff --git a/tensorflow/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc b/tensorflow/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc index 7abe0f518f0..2d1844fbd39 100644 --- a/tensorflow/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc +++ b/tensorflow/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc @@ -102,6 +102,8 @@ int getDataType(TfLiteType data_type) { return 4; case kTfLiteString: return 5; + case kTfLiteBool: + return 6; default: return -1; } diff --git a/tensorflow/lite/java/src/main/native/tensor_jni.cc b/tensorflow/lite/java/src/main/native/tensor_jni.cc index 1f6fa3ed249..5dfca9ebe6c 100644 --- a/tensorflow/lite/java/src/main/native/tensor_jni.cc +++ b/tensorflow/lite/java/src/main/native/tensor_jni.cc @@ -142,13 +142,20 @@ size_t WriteOneDimensionalArray(JNIEnv* env, jobject object, TfLiteType type, env->GetByteArrayRegion(byte_array, 0, num_elements, byte_dst); return to_copy; } + case kTfLiteBool: { + jbooleanArray bool_array = static_cast(array); + jboolean* bool_dst = static_cast(dst); + env->GetBooleanArrayRegion(bool_array, 0, num_elements, bool_dst); + return to_copy; + } default: { - ThrowException(env, kUnsupportedOperationException, - "DataType error: TensorFlowLite currently supports float " - "(32 bits), int (32 bits), byte (8 bits), and long " - "(64 bits), support for other types (DataType %d in this " - "case) will be added in the future", - kTfLiteFloat32, type); + ThrowException( + env, kUnsupportedOperationException, + "DataType error: TensorFlowLite currently supports float " + "(32 bits), int (32 bits), byte (8 bits), bool (8 bits), and long " + "(64 bits), support for other types (DataType %d in this " + "case) will be added in the future", + kTfLiteFloat32, type); return 0; } } @@ -191,6 +198,12 @@ size_t ReadOneDimensionalArray(JNIEnv* env, TfLiteType data_type, static_cast(src)); return size; } + case kTfLiteBool: { + jbooleanArray bool_array = static_cast(dst); + env->SetBooleanArrayRegion(bool_array, 0, len, + static_cast(src)); + return size; + } default: { ThrowException(env, kIllegalStateException, "DataType error: invalid DataType(%d)", data_type); diff --git a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java index f1d4ff147b1..8f52422dde0 100644 --- a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java +++ b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java @@ -42,6 +42,8 @@ public final class InterpreterTest { "tensorflow/lite/java/src/testdata/add_unknown_dimensions.bin"; private static final String DYNAMIC_SHAPES_MODEL_PATH = "tensorflow/lite/testdata/dynamic_shapes.bin"; + private static final String BOOL_MODEL = + "tensorflow/lite/java/src/testdata/tile_with_bool_input.bin"; private static final ByteBuffer MODEL_BUFFER = TestUtils.getTestFileAsBuffer(MODEL_PATH); private static final ByteBuffer MULTIPLE_INPUTS_MODEL_BUFFER = @@ -52,6 +54,7 @@ public final class InterpreterTest { TestUtils.getTestFileAsBuffer(UNKNOWN_DIMS_MODEL_PATH); private static final ByteBuffer DYNAMIC_SHAPES_MODEL_BUFFER = TestUtils.getTestFileAsBuffer(DYNAMIC_SHAPES_MODEL_PATH); + private static final ByteBuffer BOOL_MODEL_BUFFER = TestUtils.getTestFileAsBuffer(BOOL_MODEL); @Test public void testInterpreter() throws Exception { @@ -611,6 +614,27 @@ public final class InterpreterTest { } } + @Test + public void testBoolModel() throws Exception { + boolean[][][] inputs = {{{true, false}, {false, true}}, {{true, true}, {false, true}}}; + int[] multipliers = {1, 1, 2}; + boolean[][][] parsedOutputs = new boolean[2][2][4]; + + try (Interpreter interpreter = new Interpreter(BOOL_MODEL_BUFFER)) { + assertThat(interpreter.getInputTensor(0).dataType()).isEqualTo(DataType.BOOL); + Object[] inputsArray = {inputs, multipliers}; + Map outputsMap = new HashMap<>(); + outputsMap.put(0, parsedOutputs); + interpreter.runForMultipleInputsOutputs(inputsArray, outputsMap); + + boolean[][][] expectedOutputs = { + {{true, false, true, false}, {false, true, false, true}}, + {{true, true, true, true}, {false, true, false, true}} + }; + assertThat(parsedOutputs).isEqualTo(expectedOutputs); + } + } + private static FloatBuffer fill(FloatBuffer buffer, float value) { while (buffer.hasRemaining()) { buffer.put(value); diff --git a/tensorflow/lite/java/src/testdata/tile_with_bool_input.bin b/tensorflow/lite/java/src/testdata/tile_with_bool_input.bin new file mode 100644 index 0000000000000000000000000000000000000000..fadff5298ac92c5449a0b4483070c9926cd64564 GIT binary patch literal 596 zcmZ`$F;2rk5FCPuEo6lVTu`KNK|z76Sb2g46;dd!KnN*KT%2S(k+Es{0574U-~qf0 zGjq0qhLz^_ZufR)=WYSO+}y5!Q!L;iK!`aaM$Z`m4z-);1m=M&!Xpld?ciVOPYAQl z)^WD0p7XlNw+W6ITN0NhTF?a*C(MnhIk+k-BcQtQ#>Z5!B+dW zp;uhBBR*I&=Xrb&;3XTm-uix%c(|{An(LS-+oG}i!sc~9PtO-R4Z8PVPhiG34B!uc zG~->Gn_0Z7m2S;rY?0GBd*>|5>-{6Q>X&`Tqlju~D`^+E=2Q>asORt7dCV%W-0y$- J?mrp2{susmKl=ax literal 0 HcmV?d00001 From 8358f2c42f97b2f0fb1aaa3da3e99ea8de5be786 Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Tue, 28 Jul 2020 00:55:58 -0700 Subject: [PATCH 1466/2522] Extend unranked to ranked pattern for hlo operations to all unary and binary ops. As this is essentially always the same pattern, only one operation is tested. PiperOrigin-RevId: 323525418 Change-Id: I9b15760294eb1fd8ba2b2e2c4832bd8d8cdb04d6 --- .../mhlo/transforms/transform_unranked_hlo.cc | 33 ++++++++++++++++--- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/transform_unranked_hlo.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/transform_unranked_hlo.cc index d2852394ee8..a973569d913 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/transform_unranked_hlo.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/transform_unranked_hlo.cc @@ -31,6 +31,22 @@ namespace mlir { namespace mhlo { namespace { +// TODO(herhut): Generate these out of op definitions. +#define MAP_XLA_OPERATION_CWISE_UNARY(fn, sep) \ + fn(AbsOp) sep fn(CeilOp) sep fn(ClzOp) sep fn(CosOp) sep fn(ExpOp) \ + sep fn(Expm1Op) sep fn(FloorOp) sep fn(ImagOp) sep fn(IsFiniteOp) \ + sep fn(LogOp) sep fn(Log1pOp) sep fn(LogisticOp) sep fn(NotOp) \ + sep fn(NegOp) sep fn(PopulationCountOp) sep fn(RealOp) \ + sep fn(RoundOp) sep fn(RsqrtOp) sep fn(SignOp) sep fn(SinOp) \ + sep fn(SqrtOp) sep fn(TanhOp) + +// TODO(herhut): Generate these out of op definitions. +#define MAP_XLA_OPERATION_CWISE_BINARY(fn, sep) \ + fn(AddOp) sep fn(Atan2Op) sep fn(ComplexOp) sep fn(DivOp) sep fn(MaxOp) \ + sep fn(MinOp) sep fn(MulOp) sep fn(PowOp) sep fn(RemOp) \ + sep fn(ShiftLeftOp) sep fn(ShiftRightArithmeticOp) \ + sep fn(ShiftRightLogicalOp) sep fn(SubOp) + // TODO(frgossen): Make it variadic. template inline void AddLegalOpOnRankedTensor(ConversionTarget *target) { @@ -154,8 +170,10 @@ struct TransformUnrankedHloPass target.addLegalDialect(); target.addLegalOp(); - AddLegalOpOnRankedTensor(&target); - AddLegalOpOnRankedTensor(&target); +#define ADD_LEGAL(op) AddLegalOpOnRankedTensor(&target) + MAP_XLA_OPERATION_CWISE_UNARY(ADD_LEGAL, ;); + MAP_XLA_OPERATION_CWISE_BINARY(ADD_LEGAL, ;); +#undef ADD_LEGAL // Populate rewrite patterns. OwningRewritePatternList patterns; @@ -173,9 +191,16 @@ void PopulateTransformUnrankedHloPatterns(MLIRContext *context, OwningRewritePatternList *patterns) { // TODO(frgossen): Populate all unary and binary operations. // clang-format off +#define MAP_UNARY(op) UnaryElementwiseOpConversion +#define MAP_BINARY(op) BinaryElementwiseOpConversion +#define COMMA , patterns->insert< - BinaryElementwiseOpConversion, - UnaryElementwiseOpConversion>(context); + MAP_XLA_OPERATION_CWISE_UNARY(MAP_UNARY, COMMA), + MAP_XLA_OPERATION_CWISE_BINARY(MAP_BINARY, COMMA) + >(context); +#undef MAP_UNARY +#undef MAP_BINARY +#undef COMMA // clang-format on } From 210cf0a0142af9d1bd21a7de82d5dd0afffc6c68 Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Tue, 28 Jul 2020 01:05:10 -0700 Subject: [PATCH 1467/2522] Use elem_type instead of f99 in template files for mlir generated kernels. PiperOrigin-RevId: 323526562 Change-Id: I6c12444d5ce2d16198c08f47c94dbddf8b9773b9 --- tensorflow/core/kernels/mlir_generated/build_defs.bzl | 2 +- .../kernels/mlir_generated/op_definitions/abs.mlir.tmpl | 6 +++--- .../mlir_generated/op_definitions/bias_add.mlir.tmpl | 8 ++++---- .../kernels/mlir_generated/op_definitions/relu.mlir.tmpl | 6 +++--- .../kernels/mlir_generated/op_definitions/tanh.mlir.tmpl | 6 +++--- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/tensorflow/core/kernels/mlir_generated/build_defs.bzl b/tensorflow/core/kernels/mlir_generated/build_defs.bzl index da2b4343d96..3426aba94a4 100644 --- a/tensorflow/core/kernels/mlir_generated/build_defs.bzl +++ b/tensorflow/core/kernels/mlir_generated/build_defs.bzl @@ -136,7 +136,7 @@ def _gen_mlir_op_impl(ctx): ctx.actions.run_shell( inputs = [ctx.file.template], outputs = [ctx.outputs.out], - command = "cat %s | sed s/f99/%s/g > %s" % ( + command = "cat %s | sed s/elem_type/%s/g > %s" % ( ctx.file.template.path, ctx.attr.type, ctx.outputs.out.path, diff --git a/tensorflow/core/kernels/mlir_generated/op_definitions/abs.mlir.tmpl b/tensorflow/core/kernels/mlir_generated/op_definitions/abs.mlir.tmpl index ed58f6279ce..bca0c59cd77 100644 --- a/tensorflow/core/kernels/mlir_generated/op_definitions/abs.mlir.tmpl +++ b/tensorflow/core/kernels/mlir_generated/op_definitions/abs.mlir.tmpl @@ -1,5 +1,5 @@ -func @Abs(%arg0: tensor) -> tensor { +func @Abs(%arg0: tensor) -> tensor { %0 = "tf.Abs"(%arg0) { } - : (tensor) -> tensor - return %0 : tensor + : (tensor) -> tensor + return %0 : tensor } diff --git a/tensorflow/core/kernels/mlir_generated/op_definitions/bias_add.mlir.tmpl b/tensorflow/core/kernels/mlir_generated/op_definitions/bias_add.mlir.tmpl index f685a5cde81..f58b6c0c1cb 100644 --- a/tensorflow/core/kernels/mlir_generated/op_definitions/bias_add.mlir.tmpl +++ b/tensorflow/core/kernels/mlir_generated/op_definitions/bias_add.mlir.tmpl @@ -1,6 +1,6 @@ -func @bias_add(%arg0: tensor, - %arg1: tensor) -> tensor { +func @bias_add(%arg0: tensor, + %arg1: tensor) -> tensor { %0 = "tf.BiasAdd"(%arg0, %arg1) { } - : (tensor, tensor) -> tensor - return %0 : tensor + : (tensor, tensor) -> tensor + return %0 : tensor } diff --git a/tensorflow/core/kernels/mlir_generated/op_definitions/relu.mlir.tmpl b/tensorflow/core/kernels/mlir_generated/op_definitions/relu.mlir.tmpl index 45aac242cee..7e7082ff295 100644 --- a/tensorflow/core/kernels/mlir_generated/op_definitions/relu.mlir.tmpl +++ b/tensorflow/core/kernels/mlir_generated/op_definitions/relu.mlir.tmpl @@ -1,5 +1,5 @@ -func @relu(%arg0: tensor) -> tensor { +func @relu(%arg0: tensor) -> tensor { %0 = "tf.Relu"(%arg0) { } - : (tensor) -> tensor - return %0 : tensor + : (tensor) -> tensor + return %0 : tensor } diff --git a/tensorflow/core/kernels/mlir_generated/op_definitions/tanh.mlir.tmpl b/tensorflow/core/kernels/mlir_generated/op_definitions/tanh.mlir.tmpl index 3188e86a233..a6000604210 100644 --- a/tensorflow/core/kernels/mlir_generated/op_definitions/tanh.mlir.tmpl +++ b/tensorflow/core/kernels/mlir_generated/op_definitions/tanh.mlir.tmpl @@ -1,5 +1,5 @@ -func @Tanh(%arg0: tensor) -> tensor { +func @Tanh(%arg0: tensor) -> tensor { %0 = "tf.Tanh"(%arg0) { } - : (tensor) -> tensor - return %0 : tensor + : (tensor) -> tensor + return %0 : tensor } From c7fd50db87b6cb88fdc46a6a277675ae1b30760a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Jul 2020 02:01:48 -0700 Subject: [PATCH 1468/2522] compat: Update forward compatibility horizon to 2020-07-28 PiperOrigin-RevId: 323532059 Change-Id: Ic36325353cd8075c49c19f4efdf87ee75d77c5c4 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 8cb19c601cf..9790cf62096 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 27) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 28) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From cf49bfdb2a177cb2c39834ff3e8ae6fd9dca2ad9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Jul 2020 02:01:49 -0700 Subject: [PATCH 1469/2522] Update GraphDef version to 476. PiperOrigin-RevId: 323532061 Change-Id: Ie70c0461325f4ab1d88f6991866a9a5e889f3ff8 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 1382ab7bbe9..47c32ac04c6 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 475 // Updated: 2020/7/27 +#define TF_GRAPH_DEF_VERSION 476 // Updated: 2020/7/28 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From f295633406569f9a6ee71467a9bb34ef1cc6852b Mon Sep 17 00:00:00 2001 From: Thomas Joerg Date: Tue, 28 Jul 2020 03:30:34 -0700 Subject: [PATCH 1470/2522] Integrate LLVM at llvm/llvm-project@a51829913dba Updates LLVM usage to match [a51829913dba](https://github.com/llvm/llvm-project/commit/a51829913dba) PiperOrigin-RevId: 323541562 Change-Id: I664c2f9c4889f7a6a97117234a1cac0639fa8f15 --- tensorflow/workspace.bzl | 4 ++-- third_party/mlir/test.BUILD | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 630f8b80c6d..98454b49ce6 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -711,8 +711,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "eed333149d178b69fdaf39b9419b7ca032520182" - LLVM_SHA256 = "ee6ba5d5f25dfbfe524ef58f15ab64c772b3bd6d45b0b01b9b88aa222992d3c2" + LLVM_COMMIT = "a51829913dba28dae603fdcdddd242c7e20192a1" + LLVM_SHA256 = "53a6cb26b3716fb8ace65cb80ef37af1c9b53cb734d945dce3dee8d4f28dc219" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), diff --git a/third_party/mlir/test.BUILD b/third_party/mlir/test.BUILD index 36e68ac2751..7b670e3f50d 100644 --- a/third_party/mlir/test.BUILD +++ b/third_party/mlir/test.BUILD @@ -144,6 +144,7 @@ cc_library( "lib/IR/TestMatchers.cpp", "lib/IR/TestSideEffects.cpp", "lib/IR/TestSymbolUses.cpp", + "lib/IR/TestTypes.cpp", ], deps = [ ":TestDialect", From ae52b4c494d35f1330b8190f31caa28b5c653de3 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 28 Jul 2020 13:41:52 +0700 Subject: [PATCH 1471/2522] Add test NewRandomAccessFile --- .../filesystem/plugins/s3/s3_filesystem.cc | 6 +- .../filesystem/plugins/s3/s3_filesystem.h | 23 ++++++ .../plugins/s3/s3_filesystem_test.cc | 70 ++++++++++++++++++- 3 files changed, 96 insertions(+), 3 deletions(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 9555fe61881..8420b6ec013 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -227,11 +227,13 @@ static void GetExecutor(tf_s3_filesystem::S3File* s3_file) { static void GetTransferManager( const Aws::Transfer::TransferDirection& direction, tf_s3_filesystem::S3File* s3_file) { + // These functions should be called before holding `initialization_lock`. + GetS3Client(s3_file); + GetExecutor(s3_file); + absl::MutexLock l(&s3_file->initialization_lock); if (s3_file->transfer_managers[direction].get() == nullptr) { - GetS3Client(s3_file); - GetExecutor(s3_file); Aws::Transfer::TransferManagerConfiguration config(s3_file->executor.get()); config.s3Client = s3_file->s3_client; config.bufferSize = s3_file->multi_part_chunk_sizes[direction]; diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h index 8360d018b46..2c44fd2ce14 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h @@ -29,6 +29,22 @@ limitations under the License. void ParseS3Path(const Aws::String& fname, bool object_empty_ok, Aws::String* bucket, Aws::String* object, TF_Status* status); +namespace tf_random_access_file { +void Cleanup(TF_RandomAccessFile* file); +int64_t Read(const TF_RandomAccessFile* file, uint64_t offset, size_t n, + char* buffer, TF_Status* status); +} // namespace tf_random_access_file + +namespace tf_writable_file { +void Cleanup(TF_WritableFile* file); +void Append(const TF_WritableFile* file, const char* buffer, size_t n, + TF_Status* status); +int64_t Tell(const TF_WritableFile* file, TF_Status* status); +void Sync(const TF_WritableFile* file, TF_Status* status); +void Flush(const TF_WritableFile* file, TF_Status* status); +void Close(const TF_WritableFile* file, TF_Status* status); +} // namespace tf_writable_file + namespace tf_s3_filesystem { typedef struct S3File { std::shared_ptr s3_client; @@ -43,8 +59,15 @@ typedef struct S3File { absl::Mutex initialization_lock; S3File(); } S3File; + void Init(TF_Filesystem* filesystem, TF_Status* status); void Cleanup(TF_Filesystem* filesystem); +void NewRandomAccessFile(const TF_Filesystem* filesystem, const char* path, + TF_RandomAccessFile* file, TF_Status* status); +void NewWritableFile(const TF_Filesystem* filesystem, const char* path, + TF_WritableFile* file, TF_Status* status); +void NewAppendableFile(const TF_Filesystem* filesystem, const char* path, + TF_WritableFile* file, TF_Status* status); } // namespace tf_s3_filesystem #endif // TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_S3_S3_FILESYSTEM_H_ diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc index 431da1419f7..db8a28b938d 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/platform/test.h" #define ASSERT_TF_OK(x) ASSERT_EQ(TF_OK, TF_GetCode(x)) << TF_Message(x) +#define EXPECT_TF_OK(x) EXPECT_EQ(TF_OK, TF_GetCode(x)) << TF_Message(x) static std::string InitializeTmpDir() { // This env should be something like `s3://bucket/path` @@ -82,6 +83,45 @@ class S3FilesystemTest : public ::testing::Test { return translated_name; } + std::unique_ptr + GetWriter() { + std::unique_ptr writer( + new TF_WritableFile, [](TF_WritableFile* file) { + if (file != nullptr) { + if (file->plugin_file != nullptr) tf_writable_file::Cleanup(file); + delete file; + } + }); + writer->plugin_file = nullptr; + return writer; + } + + std::unique_ptr + GetReader() { + std::unique_ptr + reader(new TF_RandomAccessFile, [](TF_RandomAccessFile* file) { + if (file != nullptr) { + if (file->plugin_file != nullptr) + tf_random_access_file::Cleanup(file); + delete file; + } + }); + reader->plugin_file = nullptr; + return reader; + } + + void WriteString(const std::string& path, const std::string& content) { + auto writer = GetWriter(); + tf_s3_filesystem::NewWritableFile(filesystem_, path.c_str(), writer.get(), + status_); + if (TF_GetCode(status_) != TF_OK) return; + tf_writable_file::Append(writer.get(), content.c_str(), content.length(), + status_); + if (TF_GetCode(status_) != TF_OK) return; + tf_writable_file::Close(writer.get(), status_); + if (TF_GetCode(status_) != TF_OK) return; + } + protected: TF_Filesystem* filesystem_; TF_Status* status_; @@ -90,7 +130,35 @@ class S3FilesystemTest : public ::testing::Test { std::string root_dir_; }; -TEST_F(S3FilesystemTest, Init) { ASSERT_TF_OK(status_); } +TEST_F(S3FilesystemTest, NewRandomAccessFile) { + const std::string path = GetURIForPath("RandomAccessFile"); + const std::string content = "abcdefghijklmn"; + + WriteString(path, content); + ASSERT_TF_OK(status_); + + auto reader = GetReader(); + tf_s3_filesystem::NewRandomAccessFile(filesystem_, path.c_str(), reader.get(), + status_); + EXPECT_TF_OK(status_); + + std::string result; + result.resize(content.size()); + auto read = tf_random_access_file::Read(reader.get(), 0, content.size(), + &result[0], status_); + result.resize(read); + EXPECT_TF_OK(status_); + EXPECT_EQ(content.size(), result.size()); + EXPECT_EQ(content, result); + + result.clear(); + result.resize(4); + read = tf_random_access_file::Read(reader.get(), 2, 4, &result[0], status_); + result.resize(read); + EXPECT_TF_OK(status_); + EXPECT_EQ(4, result.size()); + EXPECT_EQ(content.substr(2, 4), result); +} } // namespace } // namespace tensorflow From 8de99a351c72564144fbaffe58fd4d4ba6da01cf Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 28 Jul 2020 19:43:00 +0700 Subject: [PATCH 1472/2522] Add test NewWritableFile --- .../filesystem/plugins/s3/s3_filesystem.h | 2 + .../plugins/s3/s3_filesystem_test.cc | 49 +++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h index 2c44fd2ce14..05c79a333c1 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h @@ -68,6 +68,8 @@ void NewWritableFile(const TF_Filesystem* filesystem, const char* path, TF_WritableFile* file, TF_Status* status); void NewAppendableFile(const TF_Filesystem* filesystem, const char* path, TF_WritableFile* file, TF_Status* status); +int64_t GetFileSize(const TF_Filesystem* filesystem, const char* path, + TF_Status* status); } // namespace tf_s3_filesystem #endif // TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_S3_S3_FILESYSTEM_H_ diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc index db8a28b938d..f367d0b6a98 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc @@ -122,6 +122,31 @@ class S3FilesystemTest : public ::testing::Test { if (TF_GetCode(status_) != TF_OK) return; } + std::string ReadAll(const string& path) { + auto reader = GetReader(); + tf_s3_filesystem::NewRandomAccessFile(filesystem_, path.c_str(), + reader.get(), status_); + if (TF_GetCode(status_) != TF_OK) return ""; + + auto file_size = + tf_s3_filesystem::GetFileSize(filesystem_, path.c_str(), status_); + if (TF_GetCode(status_) != TF_OK) return ""; + + std::string content; + content.resize(file_size); + auto read = tf_random_access_file::Read(reader.get(), 0, file_size, + &content[0], status_); + if (TF_GetCode(status_) != TF_OK) return ""; + if (read >= 0) content.resize(read); + if (file_size != content.size()) + TF_SetStatus( + status_, TF_DATA_LOSS, + std::string("expected " + std::to_string(file_size) + " got " + + std::to_string(content.size()) + " bytes") + .c_str()); + return content; + } + protected: TF_Filesystem* filesystem_; TF_Status* status_; @@ -160,6 +185,30 @@ TEST_F(S3FilesystemTest, NewRandomAccessFile) { EXPECT_EQ(content.substr(2, 4), result); } +TEST_F(S3FilesystemTest, NewWritableFile) { + auto writer = GetWriter(); + const std::string path = GetURIForPath("WritableFile"); + tf_s3_filesystem::NewWritableFile(filesystem_, path.c_str(), writer.get(), + status_); + EXPECT_TF_OK(status_); + tf_writable_file::Append(writer.get(), "content1,", strlen("content1,"), + status_); + EXPECT_TF_OK(status_); + tf_writable_file::Append(writer.get(), "content2", strlen("content2"), + status_); + EXPECT_TF_OK(status_); + tf_writable_file::Flush(writer.get(), status_); + EXPECT_TF_OK(status_); + tf_writable_file::Sync(writer.get(), status_); + EXPECT_TF_OK(status_); + tf_writable_file::Close(writer.get(), status_); + EXPECT_TF_OK(status_); + + auto content = ReadAll(path); + EXPECT_TF_OK(status_); + EXPECT_EQ("content1,content2", content); +} + } // namespace } // namespace tensorflow From ddf538bf79cc5cf575cad2aa5b3b8a6995e378b6 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Tue, 28 Jul 2020 06:46:13 -0700 Subject: [PATCH 1473/2522] Add support for resolvers to model side effects of functions on local variables. Add support for static value annotations, which are needed to properly resolve names in modules. Add support for attributes, based on the static value if available. Clean up the resolver interface to reduce the amount of indirection. PiperOrigin-RevId: 323562369 Change-Id: I934311ca47edf5d175e95940994e76b57d8a2c9c --- tensorflow/python/autograph/pyct/anno.py | 9 +- .../pyct/static_analysis/type_inference.py | 329 +++++++------ .../static_analysis/type_inference_test.py | 447 +++++++++++++++--- 3 files changed, 575 insertions(+), 210 deletions(-) diff --git a/tensorflow/python/autograph/pyct/anno.py b/tensorflow/python/autograph/pyct/anno.py index e6c40fc0cc2..90535ffd903 100644 --- a/tensorflow/python/autograph/pyct/anno.py +++ b/tensorflow/python/autograph/pyct/anno.py @@ -36,8 +36,14 @@ import gast class NoValue(enum.Enum): + def of(self, node, default=None): + return getanno(node, self, default=default) + + def exists(self, node): + return hasanno(node, self) + def __repr__(self): - return self.name + return str(self.name) class Basic(NoValue): @@ -102,6 +108,7 @@ class Static(NoValue): LIVE_VARS_IN = ('Symbols live when entering the node. See liveness.py.') TYPES = 'Static type information. See type_inference.py.' CLOSURE_TYPES = 'Types of closure symbols at each detected call site.' + VALUE = 'Static value information. See type_inference.py.' FAIL = object() diff --git a/tensorflow/python/autograph/pyct/static_analysis/type_inference.py b/tensorflow/python/autograph/pyct/static_analysis/type_inference.py index 4e8a9a90020..cf866ad3ec7 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/type_inference.py +++ b/tensorflow/python/autograph/pyct/static_analysis/type_inference.py @@ -20,6 +20,10 @@ extracted from static sources: * global and local symbols visible to the function at analysis time * literals +Important: This analysis is static, and does not detect dynamic type changes. +The analysis attempts to use the values of external symbols, if available. These +values are also considered static for the purpose of analysis. + Requires reaching function definitions analysis. """ @@ -27,7 +31,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from typing import Any, Tuple +from typing import Tuple import gast @@ -41,42 +45,68 @@ from tensorflow.python.autograph.pyct.static_analysis import annos class Resolver(object): """Resolver objects handle the process of looking up actual names and types. - All resolve_* methods: + Unless noted otherwise, all resolve_* methods: * have a first namespace argument, mapping string to actual values + * have a second types_namespace argument, mapping string to actual inferred + types * specify names as QN objects * specify types as a Set of inferred types - All resolve_* methods must return either: + Unless noted otherwise, all resolve_* methods must return either: * a set of `type` objects * None """ - def res_name(self, ns, name): - """Resolves the type an external (e.g. closure, global) variable.""" + def res_name(self, ns, types_ns, name): + """Resolves the type/value an external (e.g. closure, global) variable. + + Args: + ns: namespace + types_ns: types namespace + name: symbol name + Returns: + Tuple (type, static_value). The first element is the type to use for + inferrence. The second is the static value to use. Return None to treat it + as unknown. + """ raise NotImplementedError('subclasses must implement') def res_value(self, ns, value): - """Resolves the type a literal value.""" + """Resolves the type a literal or static value.""" raise NotImplementedError('subclasses must implement') - # TODO(mdan): Allow caller to model side effects. - def res_call(self, ns, name, target, args, keywords, starargs, kwargs): + def res_arg(self, ns, types_ns, f_name, name, type_anno): + """Resolves the type of a (possibly annotated) function argument.""" + raise NotImplementedError('subclasses must implement') + + def res_call(self, ns, types_ns, node, args, keywords): """Resolves the return type an external function or method call. Args: ns: namespace - name: str, the function name - target: if this is a method call, the types of the method target, None - otherwise - args: list or argument types - keywords: dict of name to argument types - starargs: list of types of the *args arguments (should be at most one) - kwargs: list of types of the **kwargs arguments (in order of appearance) + types_ns: types namespace + node: str, the function name + args: types of each respective argument in node.args + keywords: types of each respective argument in node.keywords + + Returns: + Tuple (return_type, side_effect_types). The first element is just the + return types of the function. The second element is a map from + argument names to sets of types, and allow modelling side effects of + functions (for example via global or nonlocal). """ raise NotImplementedError('subclasses must implement') - def res_arg(self, ns, f_name, arg_name, type_anno): - """Resolves the type of a (possibly annotated) function argument.""" + def res_subscript(self, ns, types_ns, node, value, slice_): + """Resolves the return type of a unary operation.""" + raise NotImplementedError('subclasses must implement') + + def res_compare(self, ns, types_ns, node, left, right): + """Resolves the return type of a unary operation.""" + raise NotImplementedError('subclasses must implement') + + def res_binop(self, ns, types_ns, node, left, right): + """Resolves the return type of a unary operation.""" raise NotImplementedError('subclasses must implement') @@ -86,23 +116,23 @@ class _SymbolTable(object): This is a value type. Only implements the strictly necessary operators. Attributes: - value: Dict[qual_names.QN, Set[Type]], mapping symbols to the set of + types: Dict[qual_names.QN, Set[Type]], mapping symbols to the set of possible types. """ def __init__(self, init_from=None): if init_from: assert isinstance(init_from, _SymbolTable) - self.value = { - s: set(other_types) for s, other_types in init_from.value.items() + self.types = { + s: set(other_types) for s, other_types in init_from.types.items() } else: - self.value = {} + self.types = {} def __eq__(self, other): - if frozenset(self.value.keys()) != frozenset(other.value.keys()): + if frozenset(self.types.keys()) != frozenset(other.types.keys()): return False - ret = all(self.value[s] == other.value[s] for s in self.value) + ret = all(self.types[s] == other.types[s] for s in self.types) return ret def __ne__(self, other): @@ -111,52 +141,17 @@ class _SymbolTable(object): def __or__(self, other): assert isinstance(other, _SymbolTable) result = _SymbolTable(self) - for s, other_types in other.value.items(): - if s not in result.value: + for s, other_types in other.types.items(): + if s not in result.types: self_types = set() - result.value[s] = self_types + result.types[s] = self_types else: - self_types = result.value[s] + self_types = result.types[s] self_types.update(other_types) return result def __repr__(self): - return 'SymbolTable {}'.format(self.value) - - -_GETITEM = qual_names.QN('__getitem__') - -_HANDLERS = { - gast.Eq: qual_names.QN('__eq__'), - gast.NotEq: qual_names.QN('__ne__'), - gast.Lt: qual_names.QN('__lt__'), - gast.LtE: qual_names.QN('__le__'), - gast.Gt: qual_names.QN('__gt__'), - gast.GtE: qual_names.QN('__ge__'), - gast.In: qual_names.QN('__contains__'), - # TODO(mdan): Is this actually correct? - # NotIn(*) = Not(In(*)) - gast.NotIn: qual_names.QN('__not__'), - - gast.Add: qual_names.QN('__add__'), - gast.Sub: qual_names.QN('__sub__'), - gast.Mult: qual_names.QN('__mul__'), - gast.Div: qual_names.QN('__div__'), - gast.FloorDiv: qual_names.QN('__floordiv__'), - gast.Mod: qual_names.QN('__mod__'), - gast.Pow: qual_names.QN('__pow__'), - gast.LShift: qual_names.QN('__lshift__'), - gast.RShift: qual_names.QN('__rshift__'), - gast.BitOr: qual_names.QN('__or__'), - gast.BitXor: qual_names.QN('__xor__'), - gast.BitAnd: qual_names.QN('__and__'), - gast.MatMult: qual_names.QN('__matmul__'), -} - -_FIXED_RETTYPES = { - gast.Is: bool, - gast.IsNot: bool, -} + return 'SymbolTable {}'.format(self.types) class StmtInferrer(gast.NodeVisitor): @@ -164,6 +159,21 @@ class StmtInferrer(gast.NodeVisitor): This visitor annotates most nodes with type information. It also sets types for the symbols modified by this statement in its types_out property. + + Note: this inferrer is able to capture side effects of functions, however, + these side effects will not be applied to the current expression. Doing so + would create too much of a dependence on the runtime's internal rules about + execution order. + Example: + + def f(): + nonlocal a + a = 1 + return a + + a = 0.0 + b = f() + a # a = float; side effect of f() ignored + print(a) # a = int; side effect of f() accounted for """ def __init__(self, resolver, scope, namespace, closure_types, types_in): @@ -173,7 +183,7 @@ class StmtInferrer(gast.NodeVisitor): self.closure_types = closure_types self.types_in = types_in self.new_symbols = {} - self.rvalue = None + self.rtype = None def visit(self, node): types = super().visit(node) @@ -184,10 +194,19 @@ class StmtInferrer(gast.NodeVisitor): def visit_FunctionDef(self, node): # Skip local function definitions. They are analyzed separately. + # TODO(mdan): Don't skip. Analyze side effects instead. return None + def _check_set(self, value): + if value is not None and not isinstance(value, set): + raise ValueError('{} method expected to return set, got {}'.format( + self.resolver, value)) + def visit_Constant(self, node): - return self.resolver.res_value(self.namespace, node.value) + types = self.resolver.res_value(self.namespace, node.value) + if __debug__: + self._check_set(types) + return types def visit_Tuple(self, node): if isinstance(node.ctx, gast.Load): @@ -214,116 +233,156 @@ class StmtInferrer(gast.NodeVisitor): def visit_Name(self, node): name = anno.getanno(node, anno.Basic.QN) + if isinstance(node.ctx, gast.Load): - types = self.types_in.value.get(name, None) + types = self.types_in.types.get(name, None) if (types is None) and (name not in self.scope.bound): if name in self.closure_types: types = self.closure_types[name] else: - types = self.resolver.res_name(self.namespace, name) - return types + types, value = self.resolver.res_name( + self.namespace, self.types_in.types, name) + if value is not None: + anno.setanno(node, anno.Static.VALUE, value) elif isinstance(node.ctx, gast.Param): type_name = anno.getanno(node.annotation, anno.Basic.QN, None) - types = self.resolver.res_arg(self.namespace, self.scope.function_name, - name, type_name) + types = self.resolver.res_arg(self.namespace, self.types_in.types, + self.scope.function_name, name, type_name) if types is not None: self.new_symbols[name] = types - return types elif isinstance(node.ctx, gast.Store): - if self.rvalue is not None: - self.new_symbols[name] = self.rvalue - else: - # No type information, assume Any. - self.new_symbols[name] = {Any} - return self.rvalue + if self.rtype is not None: + self.new_symbols[name] = self.rtype + types = self.rtype - assert False, 'unknown ctx' + else: + assert False, 'unknown ctx' + + if __debug__: + self._check_set(types) + + return types + + def visit_Attribute(self, node): + parent_types = self.visit(node.value) + + # Attempt to use the static value if known. + parent_value = anno.Static.VALUE.of(node.value, None) + if parent_value is not None: + static_value = getattr(parent_value, node.attr, None) + + else: + # Fall back to the type if that is known. + if parent_types is None: + return None + + inferred_values = [getattr(t, node.attr, None) for t in parent_types] + if not inferred_values: + return None + + static_value = inferred_values[0] + if static_value is None: + return None + + if any(v is not static_value for v in inferred_values[1:]): + # Static value not stable, assume it's dynamic. + return None + + types = self.resolver.res_value(self.namespace, static_value) + anno.setanno(node, anno.Static.VALUE, static_value) + + if __debug__: + self._check_set(types) + + return types def visit_Call(self, node): + self.visit(node.func) + f_name = anno.getanno(node.func, anno.Basic.QN) - - kwargs = [self.visit(kw.value) for kw in node.keywords if kw.arg is None] - keywords = { - kw.arg: self.visit(kw.value) - for kw in node.keywords - if kw.arg is not None - } - is_starred = [isinstance(a, gast.Starred) for a in node.args] - args = [ - self.visit(a) - for a, starred in zip(node.args, is_starred) - if not starred - ] - starargs = [ - self.visit(a.value) - for a, starred in zip(node.args, is_starred) - if starred - ] - if f_name in self.scope.bound: # Don't attempt external resolution of local functions. # TODO(mdan): Use type annotations of the local definition. return None - return self.resolver.res_call( - self.namespace, f_name, None, args, keywords, starargs, kwargs) + arg_types = [self.visit(a) for a in node.args] + keyword_types = [self.visit(kw.value) for kw in node.keywords] + + ret_type, side_effects = self.resolver.res_call(self.namespace, + self.types_in.types, node, + arg_types, keyword_types) + if __debug__: + self._check_set(ret_type) + if side_effects: + if not isinstance(side_effects, dict): + raise ValueError( + 'side effects must be dict, got {}'.format(side_effects)) + for k, v in side_effects.items(): + if not isinstance(k, qual_names.QN): + raise ValueError('side effect keys must be QNs, got {}'.format(k)) + self._check_set(v) + + if side_effects: + self.new_symbols.update(side_effects) + return ret_type def visit_Index(self, node): return self.visit(node.value) def visit_Assign(self, node): - self.rvalue = self.visit(node.value) + self.rtype = self.visit(node.value) for t in node.targets: self.visit(t) - self.rvalue = None + self.rtype = None def visit_Subscript(self, node): - val_type = self.visit(node.value) - slice_type = self.visit(node.slice) + val_types = self.visit(node.value) + slice_types = self.visit(node.slice) - if val_type is None or slice_type is None: + if val_types is None or slice_types is None: return None - return self.resolver.res_call(self.namespace, _GETITEM, val_type, - (slice_type,), {}, (), ()) + types = self.resolver.res_subscript( + self.namespace, self.types_in.types, node, val_types, slice_types) + + if __debug__: + self._check_set(types) + + return types def visit_Compare(self, node): + left_types = self.visit(node.left) right_types = [self.visit(c) for c in node.comparators] - op_types = [type(o) for o in node.ops] - if len(op_types) > 1: - raise NotImplementedError('chained comparisons') - assert len(right_types) == 1 - left_type = self.visit(node.left) - right_type, = right_types - op_type, = op_types - - if left_type is None or right_type is None: + if left_types is None or any(t is None for t in right_types): return None - f_name = _HANDLERS.get(op_type, None) - if f_name is None: - # Python doesn't allow overriding these operators. Their return types are - # fixed. - return {_FIXED_RETTYPES[op_type]} - return self.resolver.res_call(self.namespace, _HANDLERS[op_type], - left_type, (right_type,), {}, (), ()) + types = self.resolver.res_compare( + self.namespace, self.types_in.types, node, left_types, right_types) + + if __debug__: + self._check_set(types) + + return types def visit_BinOp(self, node): - left_type = self.visit(node.left) - right_type = self.visit(node.right) + left_types = self.visit(node.left) + right_types = self.visit(node.right) - if left_type is None or right_type is None: + if left_types is None or right_types is None: return None - # TODO(mdan): This does not fully follow Python operator semantics. - # For example, in `a + b` Python will try `a.__add__`, but also `b.__radd__` - return self.resolver.res_call(self.namespace, _HANDLERS[type(node.op)], - left_type, (right_type,), {}, (), ()) + types = self.resolver.res_binop( + self.namespace, self.types_in.types, node, left_types, right_types) + + if __debug__: + self._check_set(types) + + return types class Analyzer(cfg.GraphVisitor): @@ -355,7 +414,7 @@ class Analyzer(cfg.GraphVisitor): existing_types = {} anno.setanno(ast_node, anno.Static.CLOSURE_TYPES, existing_types) - for k, v in types.value.items(): + for k, v in types.types.items(): if k in existing_types: existing_types[k].update(v) else: @@ -371,10 +430,10 @@ class Analyzer(cfg.GraphVisitor): types_out = _SymbolTable(types_in) ast_node = node.ast_node - inferrer = StmtInferrer( - self.resolver, self.scope, self.namespace, self.closure_types, types_in) + inferrer = StmtInferrer(self.resolver, self.scope, self.namespace, + self.closure_types, types_in) inferrer.visit(ast_node) - types_out.value.update(inferrer.new_symbols) + types_out.types.update(inferrer.new_symbols) reaching_fndefs = anno.getanno(ast_node, anno.Static.DEFINED_FNS_IN) node_scope = anno.getanno(ast_node, anno.Static.SCOPE, None) @@ -404,8 +463,8 @@ class FunctionVisitor(transformer.Base): scope = anno.getanno(node, annos.NodeAnno.ARGS_AND_BODY_SCOPE) closure_types = anno.getanno(node, anno.Static.CLOSURE_TYPES, {}) - analyzer = Analyzer( - subgraph, self.resolver, self.ctx.info.namespace, scope, closure_types) + analyzer = Analyzer(subgraph, self.resolver, self.ctx.info.namespace, scope, + closure_types) analyzer.visit_forward() # Recursively process any remaining subfunctions. diff --git a/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py b/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py index fb7324aedfa..e3cb7e04c61 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py +++ b/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py @@ -29,37 +29,24 @@ from tensorflow.python.autograph.pyct.static_analysis import type_inference from tensorflow.python.platform import test -class TestResolver(type_inference.Resolver): +class BasicTestResolver(type_inference.Resolver): """A very basic resolver for testing.""" - def res_name(self, ns, name): - return {type(ns[str(name)])} + def res_name(self, ns, types_ns, name): + return {type(ns[str(name)])}, ns[str(name)] def res_value(self, ns, value): - del ns return {type(value)} - def res_call(self, ns, name, target, args, keywords, starargs, kwargs): - name_str = str(name) - if name_str in ns: - return {ns[name_str].__annotations__['return']} - if target is None: - return {'unk_{}'.format(name_str)} - return {'{}_{}'.format(list(target)[0], name_str)} - - def res_arg(self, ns, f_name, arg_name, type_anno): - if f_name == 'magic_no_types': - return None - if type_anno is not None: - return {{'int': int, 'float': float}[str(type_anno)]} - return {'{}_{}'.format(f_name, arg_name)} + def res_arg(self, ns, types_ns, f_name, name, type_anno): + return {str(type_anno)} class TestTranspiler(transpiler.GenericTranspiler): - def __init__(self): + def __init__(self, resolver_type): super().__init__() - self.resolver = TestResolver() + self.resolver = resolver_type() def get_transformed_name(self, _): return 'test_item' @@ -87,16 +74,58 @@ class TypeInferenceAnalyzerTest(test.TestCase): actual = {str(k): v for k, v in actual.items()} self.assertDictEqual(actual, expected) + def test_no_inference_on_unknown_operand_types(self): + + class Resolver(type_inference.Resolver): + + def res_arg(self, ns, types_ns, f_name, name, type_anno): + return None + + def test_fn(a, b): + return a < b, a - b + + node, _ = TestTranspiler(Resolver).transform(test_fn, None) + fn_body = node.body + + # With no information on operand types, the operators will infer nothing. + self.assertFalse( + anno.hasanno(fn_body[0].value.elts[0], anno.Static.TYPES)) + self.assertFalse( + anno.hasanno(fn_body[0].value.elts[1], anno.Static.TYPES)) + + def test_resolver_output_checked(self): + + class Resolver(type_inference.Resolver): + + def res_arg(self, ns, types_ns, f_name, name, type_anno): + return 1 + + def test_fn(a): + del a + pass + + with self.assertRaisesRegex(ValueError, 'expected to return set'): + TestTranspiler(Resolver).transform(test_fn, None) + def test_argument(self): + test_self = self + + class Resolver(type_inference.Resolver): + + def res_arg(self, ns, types_ns, f_name, name, type_anno): + if name == qual_names.QN('a'): + test_self.assertEqual(type_anno, qual_names.QN('int')) + return {str(name) + '_type'} + def test_fn(a: int, b): return a, b - node, _ = TestTranspiler().transform(test_fn, None) + node, _ = TestTranspiler(Resolver).transform(test_fn, None) fn_body = node.body - self.assertTypes(fn_body[0].value.elts[0], int) - self.assertTypes(fn_body[0].value.elts[1], 'test_fn_b') + self.assertTypes(fn_body[0].value.elts[0], 'a_type') + self.assertTypes(fn_body[0].value.elts[1], 'b_type') def test_argument_of_local_function(self): @@ -107,42 +136,238 @@ class TypeInferenceAnalyzerTest(test.TestCase): return foo(a) - tr = TestTranspiler() + tr = TestTranspiler(BasicTestResolver) node, _ = tr.transform(test_fn, None) fn_body = node.body - self.assertTypes(fn_body[0].body[0].value, float) - self.assertClosureTypes(fn_body[0], {'a': {int}}) + self.assertTypes(fn_body[0].body[0].value, 'float') + self.assertClosureTypes(fn_body[0], {'a': {'int'}}) - def test_straightline_assignment(self): + def test_assign_straightline(self): - def test_fn(a: int, c): + def test_fn(a: int, c: float): b = a return a, b, c - node, _ = TestTranspiler().transform(test_fn, None) + node, _ = TestTranspiler(BasicTestResolver).transform(test_fn, None) fn_body = node.body - self.assertTypes(fn_body[0].targets[0], int) - self.assertTypes(fn_body[0].value, int) - self.assertTypes(fn_body[1].value.elts[0], int) - self.assertTypes(fn_body[1].value.elts[1], int) - self.assertTypes(fn_body[1].value.elts[2], 'test_fn_c') + self.assertTypes(fn_body[0].targets[0], 'int') + self.assertTypes(fn_body[0].value, 'int') + self.assertTypes(fn_body[1].value.elts[0], 'int') + self.assertTypes(fn_body[1].value.elts[1], 'int') + self.assertTypes(fn_body[1].value.elts[2], 'float') - def test_assignment_overwrite(self): + def test_expr(self): + + self_test = self + + class Resolver(type_inference.Resolver): + + def res_value(self, ns, value): + self_test.assertEqual(value, tc.a) + return {str} + + def res_name(self, ns, types_ns, name): + self_test.assertEqual(name, qual_names.QN('tc')) + return {TestClass}, tc + + def res_call(self, ns, types_ns, node, args, keywords): + return {int}, None + + class TestClass: + + def a(self): + pass + + tc = TestClass() + + def test_fn(): + tc.a() + + node, _ = TestTranspiler(Resolver).transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[0].value, int) + self.assertTypes(fn_body[0].value.func, str) + self.assertEqual( + anno.getanno(fn_body[0].value.func, anno.Static.VALUE), tc.a) + + def test_assign_overwriting(self): def test_fn(a: int, b: float): c = a c = b return c - node, _ = TestTranspiler().transform(test_fn, None) + node, _ = TestTranspiler(BasicTestResolver).transform(test_fn, None) fn_body = node.body - self.assertTypes(fn_body[0].targets[0], int) + self.assertTypes(fn_body[0].targets[0], 'int') + self.assertTypes(fn_body[0].value, 'int') + self.assertTypes(fn_body[1].targets[0], 'float') + self.assertTypes(fn_body[1].value, 'float') + + def test_dynamic_attribute_of_static_value(self): + + test_self = self + + class Resolver(type_inference.Resolver): + + def res_value(self, ns, value): + test_self.assertEqual(value, tc.a) + return {int} + + def res_name(self, ns, types_ns, name): + test_self.assertEqual(name, qual_names.QN('tc')) + return {TestClass}, tc + + class TestClass: + + def __init__(self): + self.a = 1 + + tc = TestClass() + + def test_fn(): + return tc.a + + node, _ = TestTranspiler(Resolver).transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[0].value.value, TestClass) self.assertTypes(fn_body[0].value, int) - self.assertTypes(fn_body[1].targets[0], float) - self.assertTypes(fn_body[1].value, float) + self.assertIs(anno.getanno(fn_body[0].value.value, anno.Static.VALUE), tc) + self.assertEqual(anno.getanno(fn_body[0].value, anno.Static.VALUE), tc.a) + + def test_static_attribute_of_typed_value(self): + + test_self = self + + class TestClass: + + a = 1 + + tc = TestClass() + + class Resolver(type_inference.Resolver): + + def res_name(self, ns, types_ns, name): + test_self.assertEqual(name, qual_names.QN('tc')) + return {TestClass}, None + + def res_value(self, ns, value): + test_self.assertIs(value, tc.a) + return {str} + + def test_fn(): + return tc.a + + node, _ = TestTranspiler(Resolver).transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[0].value.value, TestClass) + self.assertTypes(fn_body[0].value, str) # Resolver is SOT + self.assertFalse(anno.hasanno(fn_body[0].value.value, anno.Static.VALUE)) + self.assertEqual(anno.getanno(fn_body[0].value, anno.Static.VALUE), 1) + + def test_static_attribute_of_ambiguous_type(self): + + test_self = self + + class TestClass1: + + a = 1 + + class TestClass2: + + a = 2 + + tc = TestClass1() + + class Resolver(type_inference.Resolver): + + def res_name(self, ns, types_ns, name): + test_self.assertEqual(name, qual_names.QN('tc')) + return {TestClass1, TestClass2}, None + + def res_value(self, ns, value): + test_self.assertIn(value, (1, 2)) + return {str} + + def test_fn(): + return tc.a + + node, _ = TestTranspiler(Resolver).transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[0].value.value, (TestClass1, TestClass2)) + self.assertFalse(anno.hasanno(fn_body[0].value, anno.Static.TYPES)) + self.assertFalse(anno.hasanno(fn_body[0].value.value, anno.Static.VALUE)) + self.assertFalse(anno.hasanno(fn_body[0].value, anno.Static.VALUE)) + + def test_property_of_typed_value(self): + + test_self = self + + class TestClass: + + @property + def a(self): + return 1 + + tc = TestClass() + + class Resolver(type_inference.Resolver): + + def res_name(self, ns, types_ns, name): + test_self.assertEqual(name, qual_names.QN('tc')) + return {TestClass}, None + + def res_value(self, ns, value): + test_self.assertIs(value, TestClass.a) + test_self.assertNotEqual(value, 1) # Can't evaluate property of class. + return {property} + + def test_fn(): + return tc.a + + node, _ = TestTranspiler(Resolver).transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[0].value.value, TestClass) + self.assertTypes(fn_body[0].value, property) + self.assertFalse(anno.hasanno(fn_body[0].value.value, anno.Static.VALUE)) + self.assertEqual( + anno.getanno(fn_body[0].value, anno.Static.VALUE), TestClass.a) + + def test_dynamic_attribute_of_typed_value(self): + + test_self = self + + class TestClass: + + def __init__(self): + self.a = 1 + + tc = TestClass() + + class Resolver(type_inference.Resolver): + + def res_name(self, ns, types_ns, name): + test_self.assertEqual(name, qual_names.QN('tc')) + return {TestClass}, None + + def test_fn(): + return tc.a + + node, _ = TestTranspiler(Resolver).transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[0].value.value, TestClass) + self.assertFalse(anno.hasanno(fn_body[0].value, anno.Static.TYPES)) + self.assertFalse(anno.hasanno(fn_body[0].value.value, anno.Static.VALUE)) + self.assertFalse(anno.hasanno(fn_body[0].value, anno.Static.VALUE)) def test_external_value(self): @@ -152,7 +377,7 @@ class TypeInferenceAnalyzerTest(test.TestCase): b = a return b - node, _ = TestTranspiler().transform(test_fn, None) + node, _ = TestTranspiler(BasicTestResolver).transform(test_fn, None) fn_body = node.body self.assertTypes(fn_body[0].targets[0], str) @@ -160,6 +385,19 @@ class TypeInferenceAnalyzerTest(test.TestCase): def test_external_function(self): + test_self = self + + class Resolver(type_inference.Resolver): + + def res_name(self, ns, types_ns, name): + test_self.assertEqual(name, qual_names.QN('g')) + return {str}, g + + def res_call(self, ns, types_ns, node, args, keywords): + test_self.assertEqual( + anno.getanno(node.func, anno.Basic.QN), qual_names.QN('g')) + return {float}, None + def g() -> float: return 1.0 @@ -167,12 +405,49 @@ class TypeInferenceAnalyzerTest(test.TestCase): a = g() return a - node, _ = TestTranspiler().transform(test_fn, None) + node, _ = TestTranspiler(Resolver).transform(test_fn, None) fn_body = node.body + self.assertTypes(fn_body[0].value.func, str) self.assertTypes(fn_body[0].targets[0], float) self.assertTypes(fn_body[1].value, float) + def test_external_function_side_effects(self): + + test_self = self + + class Resolver(type_inference.Resolver): + + def res_name(self, ns, types_ns, name): + test_self.assertEqual(name, qual_names.QN('g')) + return None, g + + def res_arg(self, ns, types_ns, f_name, name, type_anno): + return {str(type_anno)} + + def res_call(self, ns, types_ns, node, args, keywords): + return None, {qual_names.QN('x'): {str}} + + def g(): + # The resolver will pretend that this function has the following body: + # + # nonlocal x + # x = 'a' + pass + + def test_fn(x: int): + y = x + g() + return x, y + + node, _ = TestTranspiler(Resolver).transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[0].targets[0], 'int') + self.assertTypes(fn_body[0].value, 'int') + self.assertTypes(fn_body[2].value.elts[0], str) + self.assertTypes(fn_body[2].value.elts[1], 'int') + def test_local_function_closure(self): def test_fn(x: int): @@ -182,27 +457,27 @@ class TypeInferenceAnalyzerTest(test.TestCase): foo() - node, _ = TestTranspiler().transform(test_fn, None) + node, _ = TestTranspiler(BasicTestResolver).transform(test_fn, None) fn_body = node.body - self.assertTypes(fn_body[0].body[0].value, int) - self.assertClosureTypes(fn_body[0], {'x': {int}}) + self.assertTypes(fn_body[0].body[0].value, 'int') + self.assertClosureTypes(fn_body[0], {'x': {'int'}}) def test_local_function_closure_ignored_for_bound_symbols(self): - def test_fn(x: int): # pylint:disable=unused-argument + def test_fn(x: float): # pylint:disable=unused-argument def foo(): x = x + 1 # pylint:disable=used-before-assignment foo() - node, _ = TestTranspiler().transform(test_fn, None) + node, _ = TestTranspiler(BasicTestResolver).transform(test_fn, None) fn_body = node.body self.assertFalse( anno.hasanno(fn_body[0].body[0].value.left, anno.Static.TYPES)) - self.assertClosureTypes(fn_body[0], {'x': {int}}) + self.assertClosureTypes(fn_body[0], {'x': {'float'}}) def test_local_function_closure_uses_call_site_types(self): @@ -214,7 +489,7 @@ class TypeInferenceAnalyzerTest(test.TestCase): x = 1.0 foo() - node, _ = TestTranspiler().transform(test_fn, None) + node, _ = TestTranspiler(BasicTestResolver).transform(test_fn, None) fn_body = node.body self.assertTypes(fn_body[0].body[0].value, float) @@ -223,54 +498,78 @@ class TypeInferenceAnalyzerTest(test.TestCase): def test_subscript(self): + test_self = self + + class Resolver(type_inference.Resolver): + + def res_arg(self, ns, types_ns, f_name, name, type_anno): + return {list} + + def res_value(self, ns, value): + return {int} + + def res_subscript(self, ns, types_ns, node, value, slice_): + test_self.assertSetEqual(value, {list}) + test_self.assertSetEqual(slice_, {int}) + return {str} + def test_fn(a): return a[1] - node, _ = TestTranspiler().transform(test_fn, None) + node, _ = TestTranspiler(Resolver).transform(test_fn, None) fn_body = node.body - self.assertTypes(fn_body[0].value, 'test_fn_a___getitem__') - self.assertTypes(fn_body[0].value.value, 'test_fn_a') + self.assertTypes(fn_body[0].value, str) + self.assertTypes(fn_body[0].value.value, list) self.assertTypes(fn_body[0].value.slice.value, int) def test_compare(self): + test_self = self + + class Resolver(type_inference.Resolver): + + def res_arg(self, ns, types_ns, f_name, name, type_anno): + return {int} + + def res_compare(self, ns, types_ns, node, left, right): + test_self.assertSetEqual(left, {int}) + test_self.assertListEqual(right, [{int}]) + return {bool} + def test_fn(a, b): return a < b - node, _ = TestTranspiler().transform(test_fn, None) + node, _ = TestTranspiler(Resolver).transform(test_fn, None) fn_body = node.body - self.assertTypes(fn_body[0].value, 'test_fn_a___lt__') - self.assertTypes(fn_body[0].value.left, 'test_fn_a') - self.assertTypes(fn_body[0].value.comparators[0], 'test_fn_b') + self.assertTypes(fn_body[0].value, bool) + self.assertTypes(fn_body[0].value.left, int) + self.assertTypes(fn_body[0].value.comparators[0], int) def test_binop(self): + test_self = self + + class Resolver(type_inference.Resolver): + + def res_arg(self, ns, types_ns, f_name, name, type_anno): + return {list} + + def res_binop(self, ns, types_ns, node, left, right): + test_self.assertSetEqual(left, {list}) + test_self.assertSetEqual(right, {list}) + return {float} + def test_fn(a, b): return a @ b - node, _ = TestTranspiler().transform(test_fn, None) + node, _ = TestTranspiler(Resolver).transform(test_fn, None) fn_body = node.body - self.assertTypes(fn_body[0].value, 'test_fn_a___matmul__') - self.assertTypes(fn_body[0].value.left, 'test_fn_a') - self.assertTypes(fn_body[0].value.right, 'test_fn_b') - - def test_no_inference_on_unknown_operand_types(self): - - # No information on types of a and b, see TestResolver. - def magic_no_types(a, b): - return a < b, a - b - - node, _ = TestTranspiler().transform(magic_no_types, None) - fn_body = node.body - - # With no information on operand types, the operators will assert nothing. - self.assertFalse( - anno.hasanno(fn_body[0].value.elts[0], anno.Static.TYPES)) - self.assertFalse( - anno.hasanno(fn_body[0].value.elts[1], anno.Static.TYPES)) + self.assertTypes(fn_body[0].value, float) + self.assertTypes(fn_body[0].value.left, list) + self.assertTypes(fn_body[0].value.right, list) if __name__ == '__main__': From d480a3ad713e50057a2b1a611e7cacd6bbca1eb5 Mon Sep 17 00:00:00 2001 From: Alan Anderson Date: Tue, 28 Jul 2020 23:20:58 +0900 Subject: [PATCH 1474/2522] fix windows 10 build dependency error fix for windows 10 with CUDA 11.0 build --- tensorflow/stream_executor/cuda/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/stream_executor/cuda/BUILD b/tensorflow/stream_executor/cuda/BUILD index 3a14be9ad50..f3cffc04465 100644 --- a/tensorflow/stream_executor/cuda/BUILD +++ b/tensorflow/stream_executor/cuda/BUILD @@ -324,6 +324,7 @@ cc_library( "@local_config_cuda//cuda:cudnn_header", "//tensorflow/stream_executor/lib", "//tensorflow/stream_executor/platform:dso_loader", + ":cudnn_version", ]), ) From e82d258f17bb47ce5fe8daa5c88cbbd7fcc0924b Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Tue, 28 Jul 2020 08:03:22 -0700 Subject: [PATCH 1475/2522] [NFC] Add and use member function to get FuncOp's for attached functions. - Add member functions to various flavors of CallOps to get FuncOp for the attached function. - Use these and similar existing functions on WhileOp/IfOp where possible. PiperOrigin-RevId: 323572733 Change-Id: Id3982909791af8d767f916e31fe3478cacbdde70 --- .../mlir/lite/transforms/legalize_tf_while.cc | 12 ++-- .../transforms/lower_static_tensor_list.cc | 8 +-- .../transforms/optimize_functional_ops.cc | 9 ++- .../compiler/mlir/tensorflow/ir/tf_device.h | 1 + .../mlir/tensorflow/ir/tf_device_ops.td | 5 +- .../compiler/mlir/tensorflow/ir/tf_ops.td | 40 ++++++------ .../compiler/mlir/tensorflow/ir/tf_ops_a_m.cc | 5 +- .../compiler/mlir/tensorflow/ir/tf_ops_n_z.cc | 5 +- .../transforms/collection_ops_util.cc | 20 ++---- .../functional_control_flow_to_cfg.cc | 13 ++-- .../functional_control_flow_to_regions.cc | 15 ++--- .../transforms/resource_device_inference.cc | 62 +++++++++--------- .../transforms/resource_op_lifting.cc | 19 +++--- .../transforms/stack_ops_decomposition.cc | 31 +++++---- .../tensor_array_ops_decomposition.cc | 63 +++++++------------ .../tensor_list_ops_decomposition.cc | 23 +++---- .../transforms/tpu_space_to_depth_pass.cc | 3 +- .../tpu_variable_runtime_reformatting.cc | 5 +- .../transforms/legalize_tf_control_flow.cc | 14 ++--- 19 files changed, 148 insertions(+), 205 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_tf_while.cc b/tensorflow/compiler/mlir/lite/transforms/legalize_tf_while.cc index 31e3f6dd005..6202507ae91 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_tf_while.cc +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_tf_while.cc @@ -49,23 +49,19 @@ void RunOnWhile(TF::WhileOp while_op) { op->getLoc(), op->getResultTypes(), op->getOperands(), while_op.is_stateless()); // Insert call to the given function into the 'region'. - auto create_region_with_call = [&while_op](FlatSymbolRefAttr symbol, - Region& region) { + auto create_region_with_call = [&while_op](FuncOp func, Region& region) { OpBuilder builder(region); auto block = builder.createBlock(®ion); SmallVector new_operands; - auto func = while_op.getParentOfType().lookupSymbol( - symbol.getValue()); for (Type t : func.getType().getInputs()) new_operands.push_back(block->addArgument(t)); - auto call = builder.create( - while_op.getLoc(), symbol, func.getType().getResults(), new_operands); + auto call = builder.create(while_op.getLoc(), func, new_operands); builder.create(while_op.getLoc(), call.getResults()); // Mark old function as private so that it can be DCE'd if not called. func.setVisibility(SymbolTable::Visibility::Private); }; - create_region_with_call(while_op.condAttr(), new_op.cond()); - create_region_with_call(while_op.bodyAttr(), new_op.body()); + create_region_with_call(while_op.cond_func(), new_op.cond()); + create_region_with_call(while_op.body_func(), new_op.body()); op->replaceAllUsesWith(new_op.getResults()); op->erase(); diff --git a/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc b/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc index 439c44dc77e..edddc7751ab 100644 --- a/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc +++ b/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc @@ -749,11 +749,7 @@ Type VariantToUnrankedTensorType(Type type, Value value) { // Changes the function type of `cond_func` and `body_func` for the given While // op. LogicalResult UpdateFunctionTypes(TF::WhileOp op) { - auto module = op.getParentOfType(); - auto *context = module.getContext(); - - for (StringRef func_name : {op.cond(), op.body()}) { - FuncOp func = module.lookupSymbol(func_name); + for (FuncOp func : {op.cond_func(), op.body_func()}) { if (!func) continue; FunctionType func_type = func.getType(); @@ -781,7 +777,7 @@ LogicalResult UpdateFunctionTypes(TF::WhileOp op) { // return types contain a `DT_VARIANT`, change it to the unranked type // derived from the corresponding argument. func.setType(FunctionType::get(updated_argument_types, updated_result_types, - context)); + op.getContext())); // Change the argument type for the first block. llvm::for_each(func.getArguments(), [&](BlockArgument &arg) { diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc b/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc index 18c1912d4c7..2311ae0668c 100644 --- a/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc +++ b/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc @@ -83,16 +83,15 @@ class FoldIfOp : public OpRewritePattern { if (!llvm::hasSingleElement(parent_op)) return failure(); // Find the then and else branch functions. - SymbolTable table(op.getParentOfType()); - FuncOp then_branch = table.lookup(op.then_branch()); - FuncOp else_branch = table.lookup(op.else_branch()); + FuncOp then_func = op.then_func(); + FuncOp else_func = op.else_func(); // If the If has no uses and its functions are side-effect free, then // remove. // TODO(jpienaar): Remove once recusive side-effects are supported. if (op.use_empty() && (op.is_stateless() || - (IsSideEffectFree(then_branch) && IsSideEffectFree(else_branch)))) { + (IsSideEffectFree(then_func) && IsSideEffectFree(else_func)))) { rewriter.eraseOp(op.getOperation()); return success(); } @@ -109,7 +108,7 @@ class FoldIfOp : public OpRewritePattern { // Identify the branch to inline. bool cond_value = (*cond.int_value_begin()).getSExtValue(); - FuncOp func = cond_value ? then_branch : else_branch; + FuncOp func = cond_value ? then_func : else_func; // Make sure that the function has exactly one block to simplify inlining. // TFLite doesn't use control flow with blocks so functions with more than diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_device.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_device.h index 4c20d1ccc4f..d1ca07d85a7 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_device.h +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_device.h @@ -21,6 +21,7 @@ limitations under the License. #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Dialect.h" // from @llvm-project +#include "mlir/IR/Function.h" // from @llvm-project #include "mlir/IR/OpDefinition.h" // from @llvm-project #include "mlir/IR/Value.h" // from @llvm-project diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_device_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_device_ops.td index 3a92e3237dc..565be63a74f 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_device_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_device_ops.td @@ -354,7 +354,10 @@ This op is used for outlining a cluster. ); let extraClassDeclaration = [{ - StringRef getFunc() { return func(); } + // returns the function that this operation will launch. + FuncOp getFunc() { + return SymbolTable::lookupNearestSymbolFrom(*this, func()); + } }]; } diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td index c65db14ed69..1e99675d938 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td @@ -366,8 +366,11 @@ def TF_LegacyCallOp : TF_Op<"LegacyCall", operand_range getArgOperands() { return args(); } // Returns the callee of this operation. - CallInterfaceCallable getCallableForCallee() { - return getAttrOfType("f"); + CallInterfaceCallable getCallableForCallee() { return fAttr(); } + + // returns the callee of this operation. + FuncOp func() { + return SymbolTable::lookupNearestSymbolFrom(*this, f()); } }]; } @@ -484,8 +487,11 @@ underlying graph, and executes each of the partitioned subgraphs as a function. operand_range getArgOperands() { return args(); } // Returns the callee of this operation. - CallInterfaceCallable getCallableForCallee() { - return getAttrOfType("f"); + CallInterfaceCallable getCallableForCallee() { return fAttr(); } + + // returns the callee of this operation. + FuncOp func() { + return SymbolTable::lookupNearestSymbolFrom(*this, f()); } }]; @@ -590,8 +596,11 @@ underlying graph, and executes each of the partitioned subgraphs as a function. operand_range getArgOperands() { return args(); } // Returns the callee of this operation. - CallInterfaceCallable getCallableForCallee() { - return getAttrOfType("f"); + CallInterfaceCallable getCallableForCallee() { return fAttr(); } + + // returns the callee of this operation. + FuncOp func() { + return SymbolTable::lookupNearestSymbolFrom(*this, f()); } }]; @@ -644,18 +653,6 @@ body: A function that takes a list of tensors and returns another }]; let hasCanonicalizer = 1; - let extraClassDeclaration = [{ - // Get the condition function. - FuncOp cond_func() { - return getParentOfType().lookupSymbol(cond()); - } - - // Get the body function. - FuncOp body_func() { - return getParentOfType().lookupSymbol(body()); - } - }]; - let extraClassDeclaration = [{ // Get the condition function. FuncOp cond_func() { @@ -1203,8 +1200,11 @@ def TF_TPUPartitionedCallOp : TF_Op<"TPUPartitionedCall", [CallOpInterface]> { operand_range getArgOperands() { return args(); } // Returns the callee of this operation. - CallInterfaceCallable getCallableForCallee() { - return getAttrOfType("f"); + CallInterfaceCallable getCallableForCallee() { return fAttr(); } + + // returns the callee of this operation. + FuncOp func() { + return SymbolTable::lookupNearestSymbolFrom(*this, f()); } }]; diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc index 6183dde8581..485e4fa5315 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc @@ -1545,12 +1545,11 @@ static LogicalResult Verify(GatherV2Op op) { //===----------------------------------------------------------------------===// static LogicalResult Verify(IfOp op) { - auto module = op.getParentOfType(); - auto then_fn = module.lookupSymbol(op.then_branch()); + auto then_fn = op.then_func(); if (!then_fn) return op.emitOpError("then_branch refers to an undefined function : ") << op.then_branch(); - auto else_fn = module.lookupSymbol(op.else_branch()); + auto else_fn = op.else_func(); if (!else_fn) return op.emitOpError("else_branch refers to an undefined function : ") << op.else_branch(); diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc index df9476b7fe5..6662b0fed8f 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc @@ -1989,9 +1989,8 @@ OpFoldResult VariableShapeOp::fold(ArrayRef operands) { //===----------------------------------------------------------------------===// static LogicalResult Verify(WhileOp op) { - auto module = op.getParentOfType(); - auto cond_fn = module.lookupSymbol(op.cond()); - auto body_fn = module.lookupSymbol(op.body()); + auto cond_fn = op.cond_func(); + auto body_fn = op.body_func(); if (!cond_fn) { return op.emitOpError("cond refers to an undefined function : ") << op.cond(); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/collection_ops_util.cc b/tensorflow/compiler/mlir/tensorflow/transforms/collection_ops_util.cc index 96f50065886..57a5cd888a1 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/collection_ops_util.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/collection_ops_util.cc @@ -181,14 +181,14 @@ llvm::Optional GetElementTypeFromAccess( llvm::function_ref(Operation*)> infer_from_op) { for (auto& use : collection.getUses()) { if (auto while_op = llvm::dyn_cast(use.getOwner())) { - auto body = module.lookupSymbol(while_op.body()); + auto body = while_op.body_func(); assert(body); auto type_from_body = GetElementTypeFromAccess( body.getArgument(use.getOperandNumber()), module, infer_from_op); if (type_from_body.hasValue()) return type_from_body; } else if (auto if_op = llvm::dyn_cast(use.getOwner())) { - auto then_branch = module.lookupSymbol(if_op.then_branch()); - auto else_branch = module.lookupSymbol(if_op.else_branch()); + auto then_branch = if_op.then_func(); + auto else_branch = if_op.else_func(); assert(then_branch && else_branch); auto type_from_then = GetElementTypeFromAccess( then_branch.getArgument(use.getOperandNumber() - 1), module, @@ -198,18 +198,8 @@ llvm::Optional GetElementTypeFromAccess( else_branch.getArgument(use.getOperandNumber() - 1), module, infer_from_op); if (type_from_else.hasValue()) return type_from_else; - } else if (auto pcall = - llvm::dyn_cast(use.getOwner())) { - if (!pcall.f().isa()) continue; - auto callee = module.lookupSymbol(pcall.f().getRootReference()); - assert(callee); - auto type_from_callee = GetElementTypeFromAccess( - callee.getArgument(use.getOperandNumber()), module, infer_from_op); - if (type_from_callee.hasValue()) return type_from_callee; - } else if (auto spcall = llvm::dyn_cast( - use.getOwner())) { - auto callee = module.lookupSymbol(spcall.f()); - assert(callee); + } else if (auto call = llvm::dyn_cast(use.getOwner())) { + auto callee = dyn_cast(call.resolveCallable()); auto type_from_callee = GetElementTypeFromAccess( callee.getArgument(use.getOperandNumber()), module, infer_from_op); if (type_from_callee.hasValue()) return type_from_callee; diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_cfg.cc b/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_cfg.cc index a0be88cc564..d8678e620f4 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_cfg.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_cfg.cc @@ -140,10 +140,6 @@ static LogicalResult LowerIfOp(IfOp op) { Value cond_i1 = LowerCondition(loc, op.cond(), &builder); if (!cond_i1) return failure(); - auto module = op_inst->getParentOfType(); - auto then_fn = module.lookupSymbol(op.then_branch()); - auto else_fn = module.lookupSymbol(op.else_branch()); - // Split the basic block before the 'if'. The new dest will be our merge // point. Block* orig_block = op_inst->getBlock(); @@ -161,14 +157,14 @@ static LogicalResult LowerIfOp(IfOp op) { // Set up the 'then' block. Block* then_block = builder.createBlock(merge_block); - Operation* call_op = CallFn(loc, get_operand, then_fn, &builder); + Operation* call_op = CallFn(loc, get_operand, op.then_func(), &builder); auto get_then_result = [&](int i) { return call_op->getResult(i); }; JumpToBlock(loc, get_then_result, merge_block, &builder); // Set up the 'else' block. Block* else_block = builder.createBlock(merge_block); - call_op = CallFn(loc, get_operand, else_fn, &builder); + call_op = CallFn(loc, get_operand, op.else_func(), &builder); auto get_else_result = [&](int i) { return call_op->getResult(i); }; JumpToBlock(loc, get_else_result, merge_block, &builder); @@ -194,9 +190,8 @@ static LogicalResult LowerWhileOp(WhileOp op) { OpBuilder builder(op_inst); - auto module = op_inst->getParentOfType(); - auto cond_fn = module.lookupSymbol(op.cond()); - auto body_fn = module.lookupSymbol(op.body()); + auto cond_fn = op.cond_func(); + auto body_fn = op.body_func(); // Split the block containing the While op into two blocks. One containing // operations before the While op and other containing the rest. Create two diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_regions.cc b/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_regions.cc index 3320a16a6ac..672e95efd30 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_regions.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_regions.cc @@ -46,19 +46,18 @@ struct FunctionalControlFlowToRegions void runOnOperation() override; }; -// Creates a call to function `callee` in region `caller_region`. Use `args` as +// Creates a call to function `func` in region `caller_region`. Use `args` as // the call arguments, and terminate the region with a yield. The arguments are // cast to the required type before the call. `use_region_args` control whether // the input arguments are used as is (for IfOp) or block arguments of the same // type as the input arguments are created and then used as call arguments (for // While). -void CreateCall(Operation* op, StringRef callee, Region& caller_region, +void CreateCall(Operation* op, FuncOp func, Region& caller_region, ValueRange args, bool use_region_args) { assert(caller_region.empty() && "Expected empty region for newly created ops"); OpBuilder builder(caller_region); Block* entry = builder.createBlock(&caller_region); - auto func = op->getParentOfType().lookupSymbol(callee); if (use_region_args) { entry->addArguments(args.getType()); @@ -85,10 +84,10 @@ LogicalResult ConvertIfOp(IfOp if_op) { if_op.getLoc(), if_op.getResultTypes(), if_op.cond(), if_op.is_stateless()); - CreateCall(if_op, /*callee=*/if_op.then_branch(), + CreateCall(if_op, if_op.then_func(), /*caller_region=*/if_region.then_branch(), if_op.input(), /*use_region_args=*/false); - CreateCall(if_op, /*callee=*/if_op.else_branch(), + CreateCall(if_op, if_op.else_func(), /*caller_region=*/if_region.else_branch(), if_op.input(), /*use_region_args=*/false); if_op.replaceAllUsesWith(if_region.getResults()); @@ -101,9 +100,11 @@ LogicalResult ConvertWhileOp(WhileOp while_op) { while_op.getLoc(), while_op.getResultTypes(), while_op.input(), while_op.is_stateless(), while_op.parallel_iterations()); - CreateCall(while_op, while_op.cond(), while_region.cond(), while_op.input(), + CreateCall(while_op, while_op.cond_func(), + /*caller_region=*/while_region.cond(), while_op.input(), /*use_region_args=*/true); - CreateCall(while_op, while_op.body(), while_region.body(), while_op.input(), + CreateCall(while_op, while_op.body_func(), + /*caller_region=*/while_region.body(), while_op.input(), /*use_region_args=*/true); while_op.replaceAllUsesWith(while_region.getResults()); while_op.erase(); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/resource_device_inference.cc b/tensorflow/compiler/mlir/tensorflow/transforms/resource_device_inference.cc index 21d74d81b20..4c9fa32cfd6 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/resource_device_inference.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/resource_device_inference.cc @@ -203,36 +203,36 @@ void ResourceDeviceInference::runOnOperation() { // called function's arguments. auto propagate_operands_to_callee_arguments = [&](Operation* caller, Operation::operand_range caller_operands, - llvm::StringRef called_func_name, - const PerFunctionResult& caller_res) { - auto callee = - llvm::dyn_cast(module.lookupSymbol(called_func_name)); - assert(callee); - auto& callee_res = per_function_results.find(callee)->getSecond(); - bool callee_needs_recompute = false; - for (auto operand_and_argument : - llvm::zip(caller_operands, callee.getArguments())) { - if (!mlir::getElementTypeOrSelf( - std::get<0>(operand_and_argument).getType()) - .isa()) { - continue; + ArrayRef callees, const PerFunctionResult& caller_res) { + for (FuncOp callee : callees) { + assert(callee); + auto& callee_res = per_function_results.find(callee)->getSecond(); + bool callee_needs_recompute = false; + for (auto operand_and_argument : + llvm::zip(caller_operands, callee.getArguments())) { + if (!mlir::getElementTypeOrSelf( + std::get<0>(operand_and_argument).getType()) + .isa()) { + continue; + } + auto device = + caller_res.DeviceForResource(std::get<0>(operand_and_argument)); + if (!device) continue; + if (failed(AddResourceDeviceAndEmitError( + std::get<1>(operand_and_argument), *device, caller, + &callee_res, &callee_needs_recompute))) { + return failure(); + } } - auto device = - caller_res.DeviceForResource(std::get<0>(operand_and_argument)); - if (!device) continue; - if (failed(AddResourceDeviceAndEmitError( - std::get<1>(operand_and_argument), *device, caller, - &callee_res, &callee_needs_recompute))) { - return failure(); + // If the callee recording is modified, make sure that it will be + // reprocessed. + if (callee_needs_recompute) { + worklist.insert(callee); } } - // If the callee recording is modified, make sure that it will be - // reprocessed. - if (callee_needs_recompute) { - worklist.insert(callee); - } return success(); }; + while (!worklist.empty()) { auto func_op = worklist.back(); worklist.pop_back(); @@ -245,18 +245,14 @@ void ResourceDeviceInference::runOnOperation() { auto walk_res = func_op.walk([&](Operation* op) { if (auto while_op = llvm::dyn_cast(op)) { if (failed(propagate_operands_to_callee_arguments( - while_op, while_op.getOperands(), while_op.body(), func_res)) || - failed(propagate_operands_to_callee_arguments( - while_op, while_op.getOperands(), while_op.cond(), func_res))) { + while_op, while_op.getOperands(), + {while_op.body_func(), while_op.cond_func()}, func_res))) return WalkResult::interrupt(); - } } else if (auto if_op = llvm::dyn_cast(op)) { if (failed(propagate_operands_to_callee_arguments( - if_op, if_op.input(), if_op.then_branch(), func_res)) || - failed(propagate_operands_to_callee_arguments( - if_op, if_op.input(), if_op.else_branch(), func_res))) { + if_op, if_op.input(), {if_op.then_func(), if_op.else_func()}, + func_res))) return WalkResult::interrupt(); - } } return WalkResult::advance(); }); diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc index a9caeaac50d..de27251a7e8 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc @@ -982,8 +982,8 @@ LogicalResult HoistForFunctionalControlFlow( RemoveIdentity(block); for (Operation& op : llvm::make_early_inc_range(*block)) { if (auto while_op = llvm::dyn_cast(&op)) { - auto body = llvm::cast(module.lookupSymbol(while_op.body())); - auto cond = llvm::cast(module.lookupSymbol(while_op.cond())); + auto body = while_op.body_func(); + auto cond = while_op.cond_func(); // Recursively handle the nested control flow. HoistForFunctionalControlFlow(&body.front(), module, lifted_partitioned_call_callees); @@ -991,10 +991,8 @@ LogicalResult HoistForFunctionalControlFlow( lifted_partitioned_call_callees); if (failed(HandleWhileLoop(while_op, body, cond))) return failure(); } else if (auto if_op = llvm::dyn_cast(&op)) { - auto then_branch = - llvm::cast(module.lookupSymbol(if_op.then_branch())); - auto else_branch = - llvm::cast(module.lookupSymbol(if_op.else_branch())); + auto then_branch = if_op.then_func(); + auto else_branch = if_op.else_func(); // Recursively handle the nested control flow. HoistForFunctionalControlFlow(&then_branch.front(), module, lifted_partitioned_call_callees); @@ -1015,12 +1013,10 @@ LogicalResult HoistForFunctionalControlFlow( } if (failed(HandleCaseOrIfOp(case_op, branch_functions))) return failure(); } else if (auto call_op = llvm::dyn_cast(&op)) { - if (!call_op.f().isa()) { + auto callee = call_op.func(); + if (!callee) return call_op.emitOpError( "resource lifting does not support call with nested references."); - } - auto callee = llvm::cast( - module.lookupSymbol(call_op.f().getRootReference())); if (failed(HandlePartitionedCallOp(call_op, callee, module, lifted_partitioned_call_callees))) { // Nested control flow handling is done in HandlePartitionedCallOp(). @@ -1028,8 +1024,7 @@ LogicalResult HoistForFunctionalControlFlow( } } else if (auto call_op = llvm::dyn_cast(&op)) { - auto callee = llvm::cast(module.lookupSymbol(call_op.f())); - if (failed(HandlePartitionedCallOp(call_op, callee, module, + if (failed(HandlePartitionedCallOp(call_op, call_op.func(), module, lifted_partitioned_call_callees))) { return failure(); } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc b/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc index 2dc45ee9816..d3755a4a7d0 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc @@ -163,7 +163,7 @@ LogicalResult HandleWhileOp( const llvm::SmallDenseMap& data_var_to_size_var, llvm::StringMap* decomposed_partitioned_call_callees) { - auto body = module.lookupSymbol(while_op.body()); + auto body = while_op.body_func(); llvm::SmallDenseMap body_map; auto find_arg_stack_type = [&](int64_t index) -> llvm::Optional { auto it = data_var_to_size_var.find(while_op.getOperand(index)); @@ -187,7 +187,7 @@ LogicalResult HandleWhileOp( return failure(); } // Cond should not change stacks in the arguments, so use an empty map. - auto cond = module.lookupSymbol(while_op.cond()); + auto cond = while_op.cond_func(); ModifyFunctionSignature(cond, nullptr, find_arg_stack_type); llvm::SmallDenseMap empty_map; if (failed(DecomposeStackOpsInternal(&cond.front(), module, &empty_map, @@ -231,8 +231,8 @@ LogicalResult HandleIfOp( const llvm::SmallDenseMap& data_var_to_size_var, llvm::StringMap* decomposed_partitioned_call_callees) { - auto then_branch = module.lookupSymbol(if_op.then_branch()); - auto else_branch = module.lookupSymbol(if_op.else_branch()); + auto then_func = if_op.then_func(); + auto else_func = if_op.else_func(); llvm::SmallDenseMap then_map; llvm::SmallDenseMap else_map; @@ -241,12 +241,12 @@ LogicalResult HandleIfOp( if (it == data_var_to_size_var.end()) return llvm::None; return it->getFirst().getType(); }; - ModifyFunctionSignature(then_branch, &then_map, find_arg_stack_type); - ModifyFunctionSignature(else_branch, &else_map, find_arg_stack_type); + ModifyFunctionSignature(then_func, &then_map, find_arg_stack_type); + ModifyFunctionSignature(else_func, &else_map, find_arg_stack_type); const bool signature_change = !then_map.empty() || !else_map.empty(); - if (failed(DecomposeStackOpsInternal(&then_branch.front(), module, &then_map, + if (failed(DecomposeStackOpsInternal(&then_func.front(), module, &then_map, decomposed_partitioned_call_callees)) || - failed(DecomposeStackOpsInternal(&else_branch.front(), module, &else_map, + failed(DecomposeStackOpsInternal(&else_func.front(), module, &else_map, decomposed_partitioned_call_callees))) { return failure(); } @@ -258,16 +258,16 @@ LogicalResult HandleIfOp( new_if_operands.push_back(it->getSecond()); } auto new_if = OpBuilder(if_op).create( - if_op.getLoc(), then_branch.getType().getResults(), new_if_operands, + if_op.getLoc(), then_func.getType().getResults(), new_if_operands, if_op.getAttrs()); for (auto result : if_op.getResults()) { if (!getElementTypeOrSelf(result.getType()).isa()) { continue; } int64_t then_aliased_input = - FindAliasedInput(then_branch, result.getResultNumber()); + FindAliasedInput(then_func, result.getResultNumber()); int64_t else_aliased_input = - FindAliasedInput(else_branch, result.getResultNumber()); + FindAliasedInput(else_func, result.getResultNumber()); if (then_aliased_input >= 0 && then_aliased_input == else_aliased_input) { // Replace aliased stack output uses with input. result.replaceAllUsesWith(if_op.getOperand(then_aliased_input + 1)); @@ -507,21 +507,20 @@ LogicalResult DecomposeStackOpsInternal( return failure(); } } else if (auto pcall = llvm::dyn_cast(&op)) { - if (!pcall.f().isa()) { + if (!pcall.func()) { return pcall.emitOpError( "stack decomposition does not support call with nested references"); } if (failed(HandlePartitionedCallOp( - pcall, module.lookupSymbol(pcall.f().getRootReference()), - module, *data_var_to_size_var, + pcall, pcall.func(), module, *data_var_to_size_var, decomposed_partitioned_call_callees))) { return failure(); } } else if (auto spcall = llvm::dyn_cast(&op)) { if (failed(HandlePartitionedCallOp( - spcall, module.lookupSymbol(spcall.f()), module, - *data_var_to_size_var, decomposed_partitioned_call_callees))) { + spcall, spcall.func(), module, *data_var_to_size_var, + decomposed_partitioned_call_callees))) { return failure(); } } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc index 2c3422e3e00..b3a05c06a67 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc @@ -442,38 +442,20 @@ llvm::SmallDenseMap> AccessedGradients( if (auto grad = llvm::dyn_cast(&op)) { insert(grad.handle(), grad.source().str()); } else if (auto while_op = llvm::dyn_cast(&op)) { - auto body = module.lookupSymbol(while_op.body()); - auto cond = module.lookupSymbol(while_op.cond()); - for (const auto& entry : AccessedGradients({body, cond}, module)) { - for (const string& source : entry.getSecond()) { + for (const auto& entry : AccessedGradients( + {while_op.body_func(), while_op.cond_func()}, module)) + for (const string& source : entry.getSecond()) insert(while_op.getOperand(entry.getFirst()), source); - } - } } else if (auto if_op = llvm::dyn_cast(&op)) { - auto then_branch = module.lookupSymbol(if_op.then_branch()); - auto else_branch = module.lookupSymbol(if_op.else_branch()); for (const auto& entry : - AccessedGradients({then_branch, else_branch}, module)) { - for (const string& source : entry.getSecond()) { + AccessedGradients({if_op.then_func(), if_op.else_func()}, module)) + for (const string& source : entry.getSecond()) insert(if_op.getOperand(entry.getFirst() + 1), source); - } - } - } else if (auto pc = llvm::dyn_cast(&op)) { - if (!pc.f().isa()) continue; - auto callee = module.lookupSymbol(pc.f().getRootReference()); - for (const auto& entry : AccessedGradients({callee}, module)) { - for (const string& source : entry.getSecond()) { - insert(pc.getOperand(entry.getFirst()), source); - } - } - } else if (auto spc = - llvm::dyn_cast(&op)) { - auto callee = module.lookupSymbol(spc.f()); - for (const auto& entry : AccessedGradients({callee}, module)) { - for (const string& source : entry.getSecond()) { - insert(spc.getOperand(entry.getFirst()), source); - } - } + } else if (auto call = llvm::dyn_cast(&op)) { + auto callee = dyn_cast(call.resolveCallable()); + for (const auto& entry : AccessedGradients({callee}, module)) + for (const string& source : entry.getSecond()) + insert(call.getArgOperands()[entry.getFirst()], source); } } } @@ -527,8 +509,8 @@ LogicalResult HandleWhileOp(TF::WhileOp while_op, ModuleOp module, llvm::SmallDenseMap* stats, llvm::StringMap* decomposed_partitioned_call_callees) { - auto body = module.lookupSymbol(while_op.body()); - auto cond = module.lookupSymbol(while_op.cond()); + auto body = while_op.body_func(); + auto cond = while_op.cond_func(); auto grads = AccessedGradients({body, cond}, module); auto ta_arg_buffer_type = [&](int64_t index) -> Type { auto it = stats->find(while_op.getOperand(index)); @@ -610,8 +592,8 @@ LogicalResult HandleIfOp(TF::IfOp if_op, ModuleOp module, llvm::SmallDenseMap* stats, llvm::StringMap* decomposed_partitioned_call_callees) { - auto then_branch = module.lookupSymbol(if_op.then_branch()); - auto else_branch = module.lookupSymbol(if_op.else_branch()); + auto then_branch = if_op.then_func(); + auto else_branch = if_op.else_func(); auto grads = AccessedGradients({then_branch, else_branch}, module); auto ta_arg_buffer_type = [&](int64_t index) -> Type { auto it = stats->find(if_op.getOperand(index + 1)); @@ -838,21 +820,22 @@ LogicalResult DecomposeTensorArrayOps( return failure(); } } else if (auto pcall = llvm::dyn_cast(&op)) { - if (!pcall.f().isa()) { + auto callee = pcall.func(); + if (!callee) return pcall.emitOpError( "TensorArray decomposition does not support call with nested " "references."); - } - if (failed(HandlePartitionedCallOp( - pcall, module.lookupSymbol(pcall.f().getRootReference()), - module, stats, decomposed_partitioned_call_callees))) { + + if (failed( + HandlePartitionedCallOp(pcall, callee, module, stats, + decomposed_partitioned_call_callees))) { return failure(); } } else if (auto spcall = llvm::dyn_cast(&op)) { - if (failed(HandlePartitionedCallOp( - spcall, module.lookupSymbol(spcall.f()), module, stats, - decomposed_partitioned_call_callees))) { + if (failed( + HandlePartitionedCallOp(spcall, spcall.func(), module, stats, + decomposed_partitioned_call_callees))) { return failure(); } } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc index cd055a8dc4a..9634e4a8be3 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc @@ -155,7 +155,7 @@ LogicalResult HandleWhileOp( llvm::StringMap* decomposed_partitioned_call_callees) { // Rewrite body. - auto body = module.lookupSymbol(while_op.body()); + auto body = while_op.body_func(); llvm::SmallDenseMap body_map; auto find_arg_tensor_list_type = [&](int64_t index) -> llvm::Optional { auto it = buffer_to_size->find(while_op.getOperand(index)); @@ -176,7 +176,7 @@ LogicalResult HandleWhileOp( auto output_buffer_to_size = AddTensorListSizesToReturn(body, body_map); // Rewrite cond. - auto cond = module.lookupSymbol(while_op.cond()); + auto cond = while_op.cond_func(); llvm::SmallDenseMap cond_map; ModifyFunctionSignature(cond, cutil::GetSizeType(builder), &cond_map, find_arg_tensor_list_type, arg_buffer_size_is_fixed); @@ -701,11 +701,8 @@ LogicalResult DecomposeTensorListOpsInternal( return failure(); } } else if (auto if_op = llvm::dyn_cast(&op)) { - auto then_branch = module.lookupSymbol(if_op.then_branch()); - auto else_branch = module.lookupSymbol(if_op.else_branch()); - - if (failed(HandleCaseOrIfOp(if_op, {then_branch, else_branch}, module, - buffer_to_size, + if (failed(HandleCaseOrIfOp(if_op, {if_op.then_func(), if_op.else_func()}, + module, buffer_to_size, decomposed_partitioned_call_callees))) { return failure(); } @@ -720,21 +717,21 @@ LogicalResult DecomposeTensorListOpsInternal( return failure(); } } else if (auto pcall = llvm::dyn_cast(&op)) { - if (!pcall.f().isa()) { + if (!pcall.func()) return pcall.emitOpError( "TensorList decomposition does not support call with nested " "references."); - } + if (failed(HandlePartitionedCallOp( - pcall, module.lookupSymbol(pcall.f().getRootReference()), - module, buffer_to_size, decomposed_partitioned_call_callees))) { + pcall, pcall.func(), module, buffer_to_size, + decomposed_partitioned_call_callees))) { return failure(); } } else if (auto spcall = llvm::dyn_cast(&op)) { if (failed(HandlePartitionedCallOp( - spcall, module.lookupSymbol(spcall.f()), module, - buffer_to_size, decomposed_partitioned_call_callees))) { + spcall, spcall.func(), module, buffer_to_size, + decomposed_partitioned_call_callees))) { return failure(); } } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_space_to_depth_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_space_to_depth_pass.cc index 7befa68f3d8..204a674e632 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_space_to_depth_pass.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_space_to_depth_pass.cc @@ -604,8 +604,7 @@ void TPUSpaceToDepthPass::runOnOperation() { } // Get the function on device. - auto device_func = - getOperation().lookupSymbol(cluster_func->getFunc()); + auto device_func = cluster_func->getFunc(); if (!device_func) return; TF::Conv2DOp first_conv; diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc index 2b2a33b8bc2..29b2af93561 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_variable_runtime_reformatting.cc @@ -452,9 +452,8 @@ void HandleReplicateOp(TF::WhileOp while_op, tf_device::ReplicateOp replicate, !llvm::isa(compile_launch.GetBody().front())) return; - auto module = while_op.getParentOfType(); - auto body = llvm::cast(module.lookupSymbol(while_op.body())); - auto cond = llvm::cast(module.lookupSymbol(while_op.cond())); + FuncOp body = while_op.body_func(); + FuncOp cond = while_op.cond_func(); // Analyze the formattable inputs. auto execute_arg_to_outer_args = diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_control_flow.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_control_flow.cc index 09e94d9a84f..760252331e0 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_control_flow.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_control_flow.cc @@ -119,10 +119,8 @@ void LowerIf(TF::IfOp op, ModuleOp module) { // Import the regions for both the true and false cases. These regions // must be updated to tuple the return results together and use the xla hlo // return op. - auto then_branch = module.lookupSymbol(op.then_branch()); - auto else_branch = module.lookupSymbol(op.else_branch()); - ImportXlaRegion(then_branch, &if_op.true_branch(), loc); - ImportXlaRegion(else_branch, &if_op.false_branch(), loc); + ImportXlaRegion(op.then_func(), &if_op.true_branch(), loc); + ImportXlaRegion(op.else_func(), &if_op.false_branch(), loc); // De-tuple the results of the xla hlo if result. Detuple(if_op.getResult(), op.getResults(), &builder); @@ -174,11 +172,9 @@ void LowerWhile(TF::WhileOp op, ModuleOp module) { // Import the regions for both the cond and body. These regions must be // updated to tuple the return results together and use the xla hlo return op. - auto body_branch = module.lookupSymbol(op.body()); - auto cond_branch = module.lookupSymbol(op.cond()); - - ImportXlaRegion(body_branch, &while_op.body(), loc); - ImportXlaRegion(cond_branch, &while_op.cond(), loc, /*tuple_return=*/false); + ImportXlaRegion(op.body_func(), &while_op.body(), loc); + ImportXlaRegion(op.cond_func(), &while_op.cond(), loc, + /*tuple_return=*/false); // De-tuple the results of the xla hlo while. Detuple(while_op.getResult(), op.getResults(), &builder); From e428734be9e7b146877a05b50c242fab730e7021 Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Tue, 28 Jul 2020 09:09:13 -0700 Subject: [PATCH 1476/2522] Internal change PiperOrigin-RevId: 323584034 Change-Id: I334fb8c13e8398039524274b321d92cef054c97a --- tensorflow/compiler/tests/BUILD | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index c4dd75de1dd..d9450cb6364 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -1188,6 +1188,10 @@ tf_xla_py_test( python_version = "PY3", tags = [ "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip + "noasan", + "nomsan", + "notsan", + "optonly", ], deps = [ ":xla_test", From 3e21339b42cdb463ae715b3b349cc0373eda1fc3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Jul 2020 09:39:41 -0700 Subject: [PATCH 1477/2522] Fix leak of Tensors when ExecuteTrtEngine() fails. You have to explicitly delete the Tensor contained in TensorValue. PiperOrigin-RevId: 323589787 Change-Id: I8408a7a502f410c1d591f231daa696fde63e2364 --- tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc index 1094555a622..58d1c611463 100644 --- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc @@ -643,8 +643,10 @@ void TRTEngineOp::ComputeAsync(OpKernelContext* ctx, } // Release any outputs that are allocated, ExecuteNativeSegment will // re-allocate them and fail if they are currently allocated. + // The Tensor pointer in the returned TensorValue must be explicitly + // deleted. for (int i = 0; i < ctx->num_outputs(); i++) { - ctx->release_output(i); + delete ctx->release_output(i).tensor; } ExecuteNativeSegment(ctx, helper); return; From 8a1f870523e4037265c042ce894f001165074855 Mon Sep 17 00:00:00 2001 From: Bruce Fontaine Date: Tue, 28 Jul 2020 09:40:25 -0700 Subject: [PATCH 1478/2522] Make get next parallel when using PER_HOST_V2 input mode on TPUEstimator. PiperOrigin-RevId: 323589939 Change-Id: Ifb4622ba799b3d1616c5bdfbb5eff18eccc4287b --- .../golden/v1/tensorflow.estimator.tpu.-t-p-u-config.pbtxt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.tpu.-t-p-u-config.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.tpu.-t-p-u-config.pbtxt index d934b4013b2..e329045123e 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.tpu.-t-p-u-config.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.tpu.-t-p-u-config.pbtxt @@ -7,6 +7,10 @@ tf_class { name: "eval_training_input_configuration" mtype: "" } + member { + name: "experimental_allow_per_host_v2_parallel_get_next" + mtype: "" + } member { name: "experimental_host_call_every_n_steps" mtype: "" From 415774cedf4530193b72cc1d69acc73bc0b03f15 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Tue, 28 Jul 2020 10:31:22 -0700 Subject: [PATCH 1479/2522] Removing run_deprecated_v1 from batch_gather_op_test and moving it to array_ops folder. Also removing the no_gpu tag on this test. PiperOrigin-RevId: 323601944 Change-Id: Ia574f7eb3c71f4c2bf90e0a8311842a8574036bd --- tensorflow/python/kernel_tests/BUILD | 15 -------------- .../python/kernel_tests/array_ops/BUILD | 20 +++++++++++++++++++ .../{ => array_ops}/batch_gather_op_test.py | 15 +++++++------- 3 files changed, 28 insertions(+), 22 deletions(-) create mode 100644 tensorflow/python/kernel_tests/array_ops/BUILD rename tensorflow/python/kernel_tests/{ => array_ops}/batch_gather_op_test.py (91%) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 224c86878b4..5bd81aa26ef 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -77,21 +77,6 @@ tf_py_test( ], ) -tf_py_test( - name = "batch_gather_op_test", - srcs = ["batch_gather_op_test.py"], - tags = [ - "no_gpu", # b/127001953 - ], - deps = [ - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:constant_op", - "//tensorflow/python:dtypes", - "@absl_py//absl/testing:parameterized", - ], -) - tf_py_test( name = "batch_scatter_ops_test", srcs = ["batch_scatter_ops_test.py"], diff --git a/tensorflow/python/kernel_tests/array_ops/BUILD b/tensorflow/python/kernel_tests/array_ops/BUILD new file mode 100644 index 00000000000..6086cfcf449 --- /dev/null +++ b/tensorflow/python/kernel_tests/array_ops/BUILD @@ -0,0 +1,20 @@ +# Tests of TensorFlow array ops kernels written using the Python API. + +load("//tensorflow:tensorflow.bzl", "cuda_py_test") + +package( + default_visibility = ["//tensorflow:internal"], + licenses = ["notice"], # Apache 2.0 +) + +cuda_py_test( + name = "batch_gather_op_test", + srcs = ["batch_gather_op_test.py"], + deps = [ + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "@absl_py//absl/testing:parameterized", + ], +) diff --git a/tensorflow/python/kernel_tests/batch_gather_op_test.py b/tensorflow/python/kernel_tests/array_ops/batch_gather_op_test.py similarity index 91% rename from tensorflow/python/kernel_tests/batch_gather_op_test.py rename to tensorflow/python/kernel_tests/array_ops/batch_gather_op_test.py index 8a7d8669d08..bd5645b8168 100644 --- a/tensorflow/python/kernel_tests/batch_gather_op_test.py +++ b/tensorflow/python/kernel_tests/array_ops/batch_gather_op_test.py @@ -23,7 +23,7 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes -from tensorflow.python.framework import test_util +from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.platform import test @@ -96,15 +96,16 @@ class GatherTest(test.TestCase, parameterized.TestCase): [[b"qwer", b"uiop"]], self.evaluate(array_ops.batch_gather(params, indices_tf))) - @test_util.run_deprecated_v1 def testUnknownIndices(self): - params = constant_op.constant([[0, 1, 2]]) - indices = array_ops.placeholder(dtypes.int32, shape=[None, None]) - gather_t = array_ops.batch_gather(params, indices) - self.assertEqual([1, None], gather_t.get_shape().as_list()) + # This test needs a placeholder which means we need to construct a graph. + with ops.Graph().as_default(): + params = constant_op.constant([[0, 1, 2]]) + indices = array_ops.placeholder(dtypes.int32, shape=[None, None]) + gather_t = array_ops.batch_gather(params, indices) + self.assertEqual([1, None], gather_t.get_shape().as_list()) def testBadIndicesCPU(self): - with self.session(use_gpu=False): + with ops.device_v2("cpu:0"): params = [[0, 1, 2], [3, 4, 5]] with self.assertRaisesOpError(r"indices\[0\] = 7 is not in \[0, 2\)"): self.evaluate(array_ops.batch_gather(params, [7])) From 0cbbc7a0bf68cebf36c6e54b3e96eca400309b45 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Tue, 28 Jul 2020 10:33:53 -0700 Subject: [PATCH 1480/2522] Port the quantize kernel to the new TfLiteEvalTensor API. PiperOrigin-RevId: 323602599 Change-Id: I05382c4e7fa94dd3d746d99876f5bcdaf873bddd --- tensorflow/lite/micro/kernels/BUILD | 1 + tensorflow/lite/micro/kernels/quantize.cc | 41 +++++++++++------- .../lite/micro/kernels/quantize_test.cc | 43 ++++--------------- 3 files changed, 35 insertions(+), 50 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index f74ba5e3865..26e907a04be 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -472,6 +472,7 @@ tflite_micro_cc_test( "quantize_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:micro_framework", "//tensorflow/lite/micro:op_resolvers", diff --git a/tensorflow/lite/micro/kernels/quantize.cc b/tensorflow/lite/micro/kernels/quantize.cc index 309d2b59b7d..832379060dd 100644 --- a/tensorflow/lite/micro/kernels/quantize.cc +++ b/tensorflow/lite/micro/kernels/quantize.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/requantize.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/micro/micro_utils.h" namespace tflite { @@ -32,6 +33,8 @@ struct OpData { // be represented as a fixed point multiplier plus a left shift. int32_t output_multiplier; int output_shift; + + int32_t input_zero_point; }; void* Init(TfLiteContext* context, const char* buffer, size_t length) { @@ -76,6 +79,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { data->quantization_params.zero_point = output->params.zero_point; data->quantization_params.scale = static_cast(output->params.scale); + + data->input_zero_point = input->params.zero_point; return kTfLiteOk; } @@ -83,22 +88,24 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TFLITE_DCHECK(node->user_data != nullptr); OpData* data = static_cast(node->user_data); - const TfLiteTensor* input = GetInput(context, node, 0); - TfLiteTensor* output = GetOutput(context, node, 0); + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); if (input->type == kTfLiteFloat32) { switch (output->type) { case kTfLiteInt8: reference_ops::AffineQuantize( - data->quantization_params, GetTensorShape(input), - GetTensorData(input), GetTensorShape(output), - GetTensorData(output)); + data->quantization_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); break; case kTfLiteUInt8: reference_ops::AffineQuantize( - data->quantization_params, GetTensorShape(input), - GetTensorData(input), GetTensorShape(output), - GetTensorData(output)); + data->quantization_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); break; default: TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.", @@ -110,10 +117,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { size_t size = ElementCount(*input->dims); switch (output->type) { case kTfLiteInt8: - reference_ops::Requantize( - GetTensorData(input), size, data->output_multiplier, - data->output_shift, input->params.zero_point, - output->params.zero_point, GetTensorData(output)); + reference_ops::Requantize(tflite::micro::GetTensorData(input), + size, data->output_multiplier, + data->output_shift, data->input_zero_point, + data->quantization_params.zero_point, + tflite::micro::GetTensorData(output)); break; default: TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.", @@ -127,10 +135,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { size_t size = ElementCount(*input->dims); switch (output->type) { case kTfLiteInt8: - reference_ops::Requantize( - GetTensorData(input), size, data->output_multiplier, - data->output_shift, input->params.zero_point, - output->params.zero_point, GetTensorData(output)); + reference_ops::Requantize(tflite::micro::GetTensorData(input), + size, data->output_multiplier, + data->output_shift, data->input_zero_point, + data->quantization_params.zero_point, + tflite::micro::GetTensorData(output)); break; default: TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.", diff --git a/tensorflow/lite/micro/kernels/quantize_test.cc b/tensorflow/lite/micro/kernels/quantize_test.cc index 2e76fc566af..588b23ca834 100644 --- a/tensorflow/lite/micro/kernels/quantize_test.cc +++ b/tensorflow/lite/micro/kernels/quantize_test.cc @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/test_helpers.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -29,45 +29,20 @@ void ValidateQuantizeGoldens(TfLiteTensor* tensors, int tensors_size, const float* golden, T* golden_quantized, float scale, int zero_point, int output_len, T* output_data) { - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - - // Version 1 of quantize supports int8_t and uint8_t quantization. - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_QUANTIZE); - - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - - const char* init_data = nullptr; - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, init_data, init_data_size); - } - int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 1}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; + // Version 1 of quantize supports int8_t and uint8_t quantization. + const TfLiteRegistration registration = + tflite::ops::micro::Register_QUANTIZE(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - - if (registration->free) { - registration->free(&context, user_data); - } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); // Use reference quantization from test utils to compare against op output. AsymmetricQuantize(golden, golden_quantized, output_len, scale, zero_point); From 989b3bee78328f459de2aa6c22f273cf239a15d7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Jul 2020 10:34:31 -0700 Subject: [PATCH 1481/2522] Make get next parallel when using PER_HOST_V2 input mode on TPUEstimator. PiperOrigin-RevId: 323602760 Change-Id: Id4eee4a696e06d6dd6ad1b3b7264ec93ee6abc67 --- .../golden/v1/tensorflow.estimator.tpu.-t-p-u-config.pbtxt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.tpu.-t-p-u-config.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.tpu.-t-p-u-config.pbtxt index e329045123e..d934b4013b2 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.tpu.-t-p-u-config.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.tpu.-t-p-u-config.pbtxt @@ -7,10 +7,6 @@ tf_class { name: "eval_training_input_configuration" mtype: "" } - member { - name: "experimental_allow_per_host_v2_parallel_get_next" - mtype: "" - } member { name: "experimental_host_call_every_n_steps" mtype: "" From 7c9d9448d92f806b19e74127455b947c13ddca3f Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Tue, 28 Jul 2020 10:57:27 -0700 Subject: [PATCH 1482/2522] Fix issues with size tracker PiperOrigin-RevId: 323608264 Change-Id: Ie778d8da54e8382bc1f5ae7ebaaf6370481dde14 --- tensorflow/tools/ci_build/sizetrack_helper.py | 42 ++++++++++++------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/tensorflow/tools/ci_build/sizetrack_helper.py b/tensorflow/tools/ci_build/sizetrack_helper.py index d9b3bfadd4b..4b7c83919b2 100755 --- a/tensorflow/tools/ci_build/sizetrack_helper.py +++ b/tensorflow/tools/ci_build/sizetrack_helper.py @@ -87,6 +87,10 @@ parser.add_argument( "--artifact_id", type=str, help="Unique ID for your artifact, used for sorting dashboards.") +parser.add_argument( + "-n", "--dry_run", + action="store_true", + help="Dry run: do not load to BigQuery or upload to GCS.") parser.add_argument( "--print_schema", action="store_true", @@ -181,7 +185,8 @@ def git_pretty(commit_range, pretty_format, n=None): print(e.stdout) raise e out = ret.stdout.replace("\n", "") - return list(map(str.strip, out.split("\0"))) + # Split by \0 and make list of text, extra whitespace and empty lines removed + return list(filter(None, map(str.strip, out.split("\0")))) def gcloud(tool, args, stdin=None): @@ -306,28 +311,37 @@ def main(): # Generate data about this artifact into a Tab Separated Value file next_tsv_row = build_row() - with open("data.tsv", "w") as tsvfile: - writer = csv.writer(tsvfile, delimiter="\t", quoting=csv.QUOTE_MINIMAL) - writer.writerow(next_tsv_row) # Load into BigQuery - gcloud("bq", [ - "--project_id", FLAGS.project, "load", "--source_format", "CSV", - "--field_delimiter", "tab", PROJECT_LEVEL_TABLE_NAME, "data.tsv", SCHEMA - ]) + if FLAGS.dry_run: + print("DRY RUN: Generated this TSV row:") + print("\t".join(map(str, next_tsv_row))) + else: + with open("data.tsv", "w") as tsvfile: + writer = csv.writer(tsvfile, delimiter="\t", quoting=csv.QUOTE_MINIMAL) + writer.writerow(next_tsv_row) + gcloud("bq", [ + "--project_id", FLAGS.project, "load", "--source_format", "CSV", + "--field_delimiter", "tab", PROJECT_LEVEL_TABLE_NAME, "data.tsv", SCHEMA + ]) - # Upload artifact into GCS - if FLAGS.upload: - # note: not os.path.join here, because gsutil is always linux-style + # Upload artifact into GCS if it exists + if FLAGS.upload and FLAGS.artifact: head_info = git_pretty("HEAD", PRETTY_HEAD_INFO, n=1) _, current_cl, _, _, _, _, _ = head_info[0].split("\t") - path = "{bucket}/{team}/{artifact_id}/{cl}.{artifact}".format( + artifact_filename = os.path.basename(FLAGS.artifact.name) + # note: not os.path.join here, because gsutil is always linux-style + path = "{bucket}/{team}/{artifact_id}/{cl}.{artifact_filename}".format( bucket=FLAGS.bucket, team=FLAGS.team, artifact_id=FLAGS.artifact_id, cl=current_cl, - artifact=FLAGS.artifact) - gcloud("gsutil", ["cp", FLAGS.artifact, path]) + artifact_filename=artifact_filename) + if FLAGS.dry_run: + print("DRY RUN: Would gsutil cp to:\n{}".format(path)) + else: + gcloud("gsutil", ["cp", FLAGS.artifact, path]) + if __name__ == "__main__": main() From bf837f21562f0853ebedac9857c7ccb9edf7dd4e Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Tue, 28 Jul 2020 10:57:44 -0700 Subject: [PATCH 1483/2522] Remove the usage of TF private API `@deprecated` from Keras. PiperOrigin-RevId: 323608337 Change-Id: Ib2fb54696abb308d79aa847264e9aaab3caaa001 --- tensorflow/python/keras/backend.py | 18 +++++++------ tensorflow/python/keras/engine/sequential.py | 22 ++++++++------- .../keras/layers/legacy_rnn/rnn_cell_impl.py | 27 ++++++++++--------- 3 files changed, 37 insertions(+), 30 deletions(-) diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py index 07da09049c5..abf2657bc62 100644 --- a/tensorflow/python/keras/backend.py +++ b/tensorflow/python/keras/backend.py @@ -83,7 +83,6 @@ from tensorflow.python.util import nest from tensorflow.python.util import object_identity from tensorflow.python.util import tf_contextlib from tensorflow.python.util import tf_inspect -from tensorflow.python.util.deprecation import deprecated from tensorflow.python.util.tf_export import keras_export py_all = all @@ -395,9 +394,6 @@ def _default_learning_phase(): False, shape=(), name='keras_learning_phase') -@deprecated('2020-10-11', - 'Simply pass a True/False value to the `training` argument ' - 'of the `__call__` method of your layer or model.') @keras_export('keras.backend.set_learning_phase') def set_learning_phase(value): """Sets the learning phase to a fixed value. @@ -423,6 +419,10 @@ def set_learning_phase(value): Raises: ValueError: if `value` is neither `0` nor `1`. """ + logging.warning('`tf.keras.backend.set_learning_phase` is deprecated and ' + 'will be removed after 2020-10-11. To update it, simply ' + 'pass a True/False value to the `training` argument of the ' + '`__call__` method of your layer or model.') deprecated_internal_set_learning_phase(value) @@ -459,9 +459,6 @@ def deprecated_internal_set_learning_phase(value): _GRAPH_LEARNING_PHASES[get_graph()] = value -@deprecated('2020-10-11', - 'Simply pass a True/False value to the `training` argument ' - 'of the `__call__` method of your layer or model.') @keras_export('keras.backend.learning_phase_scope') @tf_contextlib.contextmanager def learning_phase_scope(value): @@ -479,6 +476,10 @@ def learning_phase_scope(value): Raises: ValueError: if `value` is neither `0` nor `1`. """ + logging.warning('`tf.keras.backend.learning_phase_scope` is deprecated and ' + 'will be removed after 2020-10-11. To update it, simply ' + 'pass a True/False value to the `training` argument of the ' + '`__call__` method of your layer or model.') with deprecated_internal_learning_phase_scope(value): try: yield @@ -5862,7 +5863,6 @@ def random_uniform(shape, minval=0.0, maxval=1.0, dtype=None, seed=None): shape, minval=minval, maxval=maxval, dtype=dtype, seed=seed) -@deprecated(None, 'Use `tf.keras.backend.random_bernoulli` instead.') @keras_export('keras.backend.random_binomial') @dispatch.add_dispatch_support def random_binomial(shape, p=0.0, dtype=None, seed=None): @@ -5891,6 +5891,8 @@ def random_binomial(shape, p=0.0, dtype=None, seed=None): """ + logging.warning('`tf.keras.backend.random_binomial` is deprecated. ' + 'Please use `tf.keras.backend.random_bernoulli` instead.') if dtype is None: dtype = floatx() if seed is None: diff --git a/tensorflow/python/keras/engine/sequential.py b/tensorflow/python/keras/engine/sequential.py index a79d541c4e4..979c1e47b2b 100644 --- a/tensorflow/python/keras/engine/sequential.py +++ b/tensorflow/python/keras/engine/sequential.py @@ -37,7 +37,6 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training.tracking import base as trackable from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect -from tensorflow.python.util.deprecation import deprecated from tensorflow.python.util.tf_export import keras_export @@ -405,7 +404,6 @@ class Sequential(functional.Functional): outputs = self.call(inputs, mask=mask) return getattr(outputs, '_keras_mask', None) - @deprecated('2021-01-01', 'Please use `model.predict()` instead.') def predict_proba(self, x, batch_size=32, verbose=0): """Generates class probability predictions for the input samples. @@ -420,6 +418,9 @@ class Sequential(functional.Functional): Returns: A Numpy array of probability predictions. """ + logging.warning('`model.predict_proba()` is deprecated and ' + 'will be removed after 2021-01-01. ' + 'Please use `model.predict()` instead.') preds = self.predict(x, batch_size, verbose) if preds.min() < 0. or preds.max() > 1.: logging.warning('Network returning invalid probability values. ' @@ -428,14 +429,6 @@ class Sequential(functional.Functional): '(like softmax or sigmoid would).') return preds - @deprecated('2021-01-01', - 'Please use instead:' - '* `np.argmax(model.predict(x), axis=-1)`, ' - ' if your model does multi-class classification ' - ' (e.g. if it uses a `softmax` last-layer activation).' - '* `(model.predict(x) > 0.5).astype("int32")`, ' - ' if your model does binary classification ' - ' (e.g. if it uses a `sigmoid` last-layer activation).') def predict_classes(self, x, batch_size=32, verbose=0): """Generate class predictions for the input samples. @@ -450,6 +443,15 @@ class Sequential(functional.Functional): Returns: A numpy array of class predictions. """ + logging.warning('`model.predict_classes()` is deprecated and ' + 'will be removed after 2021-01-01. ' + 'Please use instead:' + '* `np.argmax(model.predict(x), axis=-1)`, ' + ' if your model does multi-class classification ' + ' (e.g. if it uses a `softmax` last-layer activation).' + '* `(model.predict(x) > 0.5).astype("int32")`, ' + ' if your model does binary classification ' + ' (e.g. if it uses a `sigmoid` last-layer activation).') proba = self.predict(x, batch_size=batch_size, verbose=verbose) if proba.shape[-1] > 1: return proba.argmax(axis=-1) diff --git a/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_impl.py b/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_impl.py index 96a33280d54..1e33edd497c 100644 --- a/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_impl.py +++ b/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_impl.py @@ -51,7 +51,6 @@ from tensorflow.python.ops import variables as tf_variables from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training.tracking import base as trackable from tensorflow.python.util import nest -from tensorflow.python.util.deprecation import deprecated from tensorflow.python.util.tf_export import tf_export _BIAS_VARIABLE_NAME = "bias" @@ -410,8 +409,6 @@ class BasicRNNCell(LayerRNNCell): `trainable` etc when constructing the cell from configs of get_config(). """ - @deprecated(None, "This class is equivalent as tf.keras.layers.SimpleRNNCell," - " and will be replaced by that in Tensorflow 2.0.") def __init__(self, num_units, activation=None, @@ -419,6 +416,9 @@ class BasicRNNCell(LayerRNNCell): name=None, dtype=None, **kwargs): + logging.warning("`tf.nn.rnn_cell.BasicRNNCell` is deprecated. This class " + "is equivalent as `tf.keras.layers.SimpleRNNCell`, " + "and will be replaced by that in Tensorflow 2.0.") super(BasicRNNCell, self).__init__( _reuse=reuse, name=name, dtype=dtype, **kwargs) _check_supported_dtypes(self.dtype) @@ -514,8 +514,6 @@ class GRUCell(LayerRNNCell): ([pdf](http://emnlp2014.org/papers/pdf/EMNLP2014179.pdf)) """ - @deprecated(None, "This class is equivalent as tf.keras.layers.GRUCell," - " and will be replaced by that in Tensorflow 2.0.") def __init__(self, num_units, activation=None, @@ -525,6 +523,9 @@ class GRUCell(LayerRNNCell): name=None, dtype=None, **kwargs): + logging.warning("`tf.nn.rnn_cell.GRUCell` is deprecated. This class " + "is equivalent as `tf.keras.layers.GRUCell`, " + "and will be replaced by that in Tensorflow 2.0.") super(GRUCell, self).__init__( _reuse=reuse, name=name, dtype=dtype, **kwargs) _check_supported_dtypes(self.dtype) @@ -662,8 +663,6 @@ class BasicLSTMCell(LayerRNNCell): better performance on CPU. """ - @deprecated(None, "This class is equivalent as tf.keras.layers.LSTMCell," - " and will be replaced by that in Tensorflow 2.0.") def __init__(self, num_units, forget_bias=1.0, @@ -696,6 +695,9 @@ class BasicLSTMCell(LayerRNNCell): When restoring from CudnnLSTM-trained checkpoints, must use `CudnnCompatibleLSTMCell` instead. """ + logging.warning("`tf.nn.rnn_cell.BasicLSTMCell` is deprecated. This class " + "is equivalent as `tf.keras.layers.LSTMCell`, " + "and will be replaced by that in Tensorflow 2.0.") super(BasicLSTMCell, self).__init__( _reuse=reuse, name=name, dtype=dtype, **kwargs) _check_supported_dtypes(self.dtype) @@ -838,8 +840,6 @@ class LSTMCell(LayerRNNCell): ([pdf](http://ml.jku.at/publications/older/3504.pdf)) """ - @deprecated(None, "This class is equivalent as tf.keras.layers.LSTMCell," - " and will be replaced by that in Tensorflow 2.0.") def __init__(self, num_units, use_peepholes=False, @@ -895,6 +895,9 @@ class LSTMCell(LayerRNNCell): When restoring from CudnnLSTM-trained checkpoints, use `CudnnCompatibleLSTMCell` instead. """ + logging.warning("`tf.nn.rnn_cell.LSTMCell` is deprecated. This class " + "is equivalent as `tf.keras.layers.LSTMCell`, " + "and will be replaced by that in Tensorflow 2.0.") super(LSTMCell, self).__init__( _reuse=reuse, name=name, dtype=dtype, **kwargs) _check_supported_dtypes(self.dtype) @@ -1217,9 +1220,6 @@ class MultiRNNCell(RNNCell): ``` """ - @deprecated(None, "This class is equivalent as " - "tf.keras.layers.StackedRNNCells, and will be replaced by " - "that in Tensorflow 2.0.") def __init__(self, cells, state_is_tuple=True): """Create a RNN cell composed sequentially of a number of RNNCells. @@ -1233,6 +1233,9 @@ class MultiRNNCell(RNNCell): ValueError: if cells is empty (not allowed), or at least one of the cells returns a state tuple but the flag `state_is_tuple` is `False`. """ + logging.warning("`tf.nn.rnn_cell.MultiRNNCell` is deprecated. This class " + "is equivalent as `tf.keras.layers.StackedRNNCells`, " + "and will be replaced by that in Tensorflow 2.0.") super(MultiRNNCell, self).__init__() if not cells: raise ValueError("Must specify at least one cell for MultiRNNCell.") From d98f120b8cd9eeb82bfc26002577782b1f05d8f6 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Tue, 28 Jul 2020 11:02:52 -0700 Subject: [PATCH 1484/2522] Port the prelu kernel to the new TfLiteEvalTensor API. PiperOrigin-RevId: 323609674 Change-Id: I0817b06159cb1ba630ec344a3d4dc9cfa35c287b --- tensorflow/lite/kernels/internal/types.h | 4 +- tensorflow/lite/micro/kernels/BUILD | 1 + tensorflow/lite/micro/kernels/prelu.cc | 112 ++++++++++++-------- tensorflow/lite/micro/kernels/prelu_test.cc | 72 ++++--------- 4 files changed, 89 insertions(+), 100 deletions(-) diff --git a/tensorflow/lite/kernels/internal/types.h b/tensorflow/lite/kernels/internal/types.h index aabbc34bf5e..9db742ddf03 100644 --- a/tensorflow/lite/kernels/internal/types.h +++ b/tensorflow/lite/kernels/internal/types.h @@ -984,9 +984,9 @@ struct PreluParams { int32_t alpha_offset; int32_t output_offset; int32_t output_multiplier_1; - int32_t output_shift_1; + int output_shift_1; int32_t output_multiplier_2; - int32_t output_shift_2; + int output_shift_2; }; struct PoolParams { diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 26e907a04be..a58b93f79a3 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -241,6 +241,7 @@ tflite_micro_cc_test( "prelu_test.cc", ], deps = [ + ":kernel_runner", "//tensorflow/lite/c:common", "//tensorflow/lite/micro:op_resolvers", "//tensorflow/lite/micro/testing:micro_test", diff --git a/tensorflow/lite/micro/kernels/prelu.cc b/tensorflow/lite/micro/kernels/prelu.cc index 3adb63312af..8665dbc2abb 100644 --- a/tensorflow/lite/micro/kernels/prelu.cc +++ b/tensorflow/lite/micro/kernels/prelu.cc @@ -15,20 +15,45 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/prelu.h" +#include + #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/quantization_util.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { namespace micro { namespace activations { +namespace { + +TfLiteStatus CalculatePreluParams(const TfLiteTensor* input, + const TfLiteTensor* alpha, + TfLiteTensor* output, PreluParams* params) { + if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8 || + output->type == kTfLiteInt16) { + double real_multiplier_1 = static_cast(input->params.scale) / + static_cast(output->params.scale); + double real_multiplier_2 = static_cast(input->params.scale) * + static_cast(alpha->params.scale) / + static_cast(output->params.scale); + QuantizeMultiplier(real_multiplier_1, ¶ms->output_multiplier_1, + ¶ms->output_shift_1); + QuantizeMultiplier(real_multiplier_2, ¶ms->output_multiplier_2, + ¶ms->output_shift_2); + + params->input_offset = -input->params.zero_point; + params->alpha_offset = -alpha->params.zero_point; + params->output_offset = output->params.zero_point; + } -TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } +} // namespace + inline void BroadcastPrelu4DSlowFloat( const RuntimeShape& unextended_input1_shape, const float* input1_data, const RuntimeShape& unextended_input2_shape, const float* input2_data, @@ -60,62 +85,59 @@ inline void BroadcastPrelu4DSlowFloat( } } -TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) { +void* PreluInit(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(PreluParams)); +} + +TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + PreluParams* params = static_cast(node->user_data); + const TfLiteTensor* input = GetInput(context, node, 0); const TfLiteTensor* alpha = GetInput(context, node, 1); TfLiteTensor* output = GetOutput(context, node, 0); - int32_t output_multiplier_1 = 0; - int output_shift_1 = 0; - int32_t output_multiplier_2 = 0; - int output_shift_2 = 0; - if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8 || - output->type == kTfLiteInt16) { - double real_multiplier_1 = static_cast(input->params.scale) / - static_cast(output->params.scale); - double real_multiplier_2 = static_cast(input->params.scale) * - static_cast(alpha->params.scale) / - static_cast(output->params.scale); - QuantizeMultiplier(real_multiplier_1, &output_multiplier_1, - &output_shift_1); - QuantizeMultiplier(real_multiplier_2, &output_multiplier_2, - &output_shift_2); - } + + return CalculatePreluParams(input, alpha, output, params); +} + +TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + const PreluParams& params = + *(static_cast(node->user_data)); + + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + const TfLiteEvalTensor* alpha = tflite::micro::GetEvalInput(context, node, 1); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + switch (input->type) { case kTfLiteFloat32: { - BroadcastPrelu4DSlowFloat( - GetTensorShape(input), GetTensorData(input), - GetTensorShape(alpha), GetTensorData(alpha), - GetTensorShape(output), GetTensorData(output)); + BroadcastPrelu4DSlowFloat(tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(alpha), + tflite::micro::GetTensorData(alpha), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); return kTfLiteOk; } break; case kTfLiteUInt8: { - PreluParams op_params; - op_params.input_offset = -input->params.zero_point; - op_params.alpha_offset = -alpha->params.zero_point; - op_params.output_offset = output->params.zero_point; - op_params.output_multiplier_1 = output_multiplier_1; - op_params.output_shift_1 = output_shift_1; - op_params.output_multiplier_2 = output_multiplier_2; - op_params.output_shift_2 = output_shift_2; reference_ops::BroadcastPrelu4DSlow( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(alpha), GetTensorData(alpha), - GetTensorShape(output), GetTensorData(output)); + params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(alpha), + tflite::micro::GetTensorData(alpha), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); return kTfLiteOk; } break; case kTfLiteInt8: { - PreluParams op_params; - op_params.input_offset = -input->params.zero_point; - op_params.alpha_offset = -alpha->params.zero_point; - op_params.output_offset = output->params.zero_point; - op_params.output_multiplier_1 = output_multiplier_1; - op_params.output_shift_1 = output_shift_1; - op_params.output_multiplier_2 = output_multiplier_2; - op_params.output_shift_2 = output_shift_2; reference_ops::BroadcastPrelu4DSlow( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(alpha), GetTensorData(alpha), - GetTensorShape(output), GetTensorData(output)); + params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(alpha), + tflite::micro::GetTensorData(alpha), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); return kTfLiteOk; } break; default: @@ -129,7 +151,7 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) { } // namespace activations TfLiteRegistration Register_PRELU() { - return {/*init=*/nullptr, + return {/*init=*/activations::PreluInit, /*free=*/nullptr, /*prepare=*/activations::PreluPrepare, /*invoke=*/activations::PreluEval, diff --git a/tensorflow/lite/micro/kernels/prelu_test.cc b/tensorflow/lite/micro/kernels/prelu_test.cc index ae5bacca988..4e352763cec 100644 --- a/tensorflow/lite/micro/kernels/prelu_test.cc +++ b/tensorflow/lite/micro/kernels/prelu_test.cc @@ -15,7 +15,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_runner.h" #include "tensorflow/lite/micro/testing/micro_test.h" #include "tensorflow/lite/micro/testing/test_utils.h" @@ -42,37 +42,20 @@ void TestPreluFloat(std::initializer_list input_dims_data, CreateFloatTensor(alpha_data, alpha_dims), CreateFloatTensor(output_data, output_dims), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_PRELU); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, nullptr, init_data_size); - } int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + + const TfLiteRegistration registration = tflite::ops::micro::Register_PRELU(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); + + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); + for (int i = 0; i < output_dims_count; ++i) { TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], 1e-5f); @@ -103,37 +86,20 @@ void TestPreluQuantized(std::initializer_list input_dims_data, CreateQuantizedTensor(alpha_data, alpha_dims, alpha_min, alpha_max), CreateQuantizedTensor(output_data, output_dims, output_min, output_max), }; - TfLiteContext context; - PopulateContext(tensors, tensors_size, micro_test::reporter, &context); - ::tflite::AllOpsResolver resolver; - const TfLiteRegistration* registration = - resolver.FindOp(tflite::BuiltinOperator_PRELU); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - size_t init_data_size = 0; - void* user_data = nullptr; - if (registration->init) { - user_data = registration->init(&context, nullptr, init_data_size); - } int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - TfLiteNode node; - node.inputs = inputs_array; - node.outputs = outputs_array; - node.user_data = user_data; - node.builtin_data = nullptr; - node.custom_initial_data = nullptr; - node.custom_initial_data_size = 0; - if (registration->prepare) { - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node)); - } - TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node)); - if (registration->free) { - registration->free(&context, user_data); - } + + const TfLiteRegistration registration = tflite::ops::micro::Register_PRELU(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); + + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); + for (int i = 0; i < output_dims_count; ++i) { TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]); } From 111f48de6e46dabb0ad944e576dbb7789ec4be2b Mon Sep 17 00:00:00 2001 From: Cesar Crusius Date: Tue, 28 Jul 2020 11:18:23 -0700 Subject: [PATCH 1485/2522] Remove run_deprecated_v1 qualifier from saved_model:simple_save_test. PiperOrigin-RevId: 323613279 Change-Id: I96f174f589c203acb7303627a33131867d9ac5bb --- .../python/saved_model/simple_save_test.py | 75 ++++++++++--------- 1 file changed, 39 insertions(+), 36 deletions(-) diff --git a/tensorflow/python/saved_model/simple_save_test.py b/tensorflow/python/saved_model/simple_save_test.py index 21c2e9df2fa..21be3677aa8 100644 --- a/tensorflow/python/saved_model/simple_save_test.py +++ b/tensorflow/python/saved_model/simple_save_test.py @@ -21,7 +21,6 @@ from __future__ import print_function import os from tensorflow.python.framework import ops -from tensorflow.python.framework import test_util from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.saved_model import loader @@ -32,7 +31,7 @@ from tensorflow.python.saved_model import tag_constants class SimpleSaveTest(test.TestCase): - def _init_and_validate_variable(self, sess, variable_name, variable_value): + def _init_and_validate_variable(self, variable_name, variable_value): v = variables.Variable(variable_value, name=variable_name) self.evaluate(variables.global_variables_initializer()) self.assertEqual(variable_value, self.evaluate(v)) @@ -54,50 +53,54 @@ class SimpleSaveTest(test.TestCase): self.assertEqual(actual_tensor_info.tensor_shape.dim[i].size, expected_tensor.shape[i]) - @test_util.run_deprecated_v1 def testSimpleSave(self): """Test simple_save that uses the default parameters.""" export_dir = os.path.join(test.get_temp_dir(), "test_simple_save") - # Initialize input and output variables and save a prediction graph using - # the default parameters. - with self.session(graph=ops.Graph()) as sess: - var_x = self._init_and_validate_variable(sess, "var_x", 1) - var_y = self._init_and_validate_variable(sess, "var_y", 2) - inputs = {"x": var_x} - outputs = {"y": var_y} - simple_save.simple_save(sess, export_dir, inputs, outputs) + # Force the test to run in graph mode. + # This tests a deprecated v1 API that both requires a session and uses + # functionality that does not work with eager tensors (such as + # build_tensor_info as called by predict_signature_def). + with ops.Graph().as_default(): + # Initialize input and output variables and save a prediction graph using + # the default parameters. + with self.session(graph=ops.Graph()) as sess: + var_x = self._init_and_validate_variable("var_x", 1) + var_y = self._init_and_validate_variable("var_y", 2) + inputs = {"x": var_x} + outputs = {"y": var_y} + simple_save.simple_save(sess, export_dir, inputs, outputs) - # Restore the graph with a valid tag and check the global variables and - # signature def map. - with self.session(graph=ops.Graph()) as sess: - graph = loader.load(sess, [tag_constants.SERVING], export_dir) - collection_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + # Restore the graph with a valid tag and check the global variables and + # signature def map. + with self.session(graph=ops.Graph()) as sess: + graph = loader.load(sess, [tag_constants.SERVING], export_dir) + collection_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - # Check value and metadata of the saved variables. - self.assertEqual(len(collection_vars), 2) - self.assertEqual(1, collection_vars[0].eval()) - self.assertEqual(2, collection_vars[1].eval()) - self._check_variable_info(collection_vars[0], var_x) - self._check_variable_info(collection_vars[1], var_y) + # Check value and metadata of the saved variables. + self.assertEqual(len(collection_vars), 2) + self.assertEqual(1, collection_vars[0].eval()) + self.assertEqual(2, collection_vars[1].eval()) + self._check_variable_info(collection_vars[0], var_x) + self._check_variable_info(collection_vars[1], var_y) - # Check that the appropriate signature_def_map is created with the - # default key and method name, and the specified inputs and outputs. - signature_def_map = graph.signature_def - self.assertEqual(1, len(signature_def_map)) - self.assertEqual(signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY, - list(signature_def_map.keys())[0]) + # Check that the appropriate signature_def_map is created with the + # default key and method name, and the specified inputs and outputs. + signature_def_map = graph.signature_def + self.assertEqual(1, len(signature_def_map)) + self.assertEqual(signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY, + list(signature_def_map.keys())[0]) - signature_def = signature_def_map[ - signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] - self.assertEqual(signature_constants.PREDICT_METHOD_NAME, - signature_def.method_name) + signature_def = signature_def_map[ + signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] + self.assertEqual(signature_constants.PREDICT_METHOD_NAME, + signature_def.method_name) - self.assertEqual(1, len(signature_def.inputs)) - self._check_tensor_info(signature_def.inputs["x"], var_x) - self.assertEqual(1, len(signature_def.outputs)) - self._check_tensor_info(signature_def.outputs["y"], var_y) + self.assertEqual(1, len(signature_def.inputs)) + self._check_tensor_info(signature_def.inputs["x"], var_x) + self.assertEqual(1, len(signature_def.outputs)) + self._check_tensor_info(signature_def.outputs["y"], var_y) if __name__ == "__main__": From 79616c080e0eb4ada91ffcc018802f770c9baa9b Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Tue, 28 Jul 2020 11:18:26 -0700 Subject: [PATCH 1486/2522] Migrate Windows GPU official release and nightly testing builds to use the new bazel configs. PiperOrigin-RevId: 323613292 Change-Id: I2dd0ff4fa88ea89ec1e8968eb2b3e3983f8fc00c --- .../tools/ci_build/release/windows/gpu_py35_full/nightly.bat | 2 +- .../tools/ci_build/release/windows/gpu_py35_full/release.bat | 2 +- .../tools/ci_build/release/windows/gpu_py36_full/nightly.bat | 2 +- .../tools/ci_build/release/windows/gpu_py36_full/release.bat | 2 +- .../tools/ci_build/release/windows/gpu_py37_full/nightly.bat | 2 +- .../tools/ci_build/release/windows/gpu_py37_full/release.bat | 2 +- .../tools/ci_build/release/windows/gpu_py38_full/nightly.bat | 2 +- .../tools/ci_build/release/windows/gpu_py38_full/release.bat | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py35_full/nightly.bat b/tensorflow/tools/ci_build/release/windows/gpu_py35_full/nightly.bat index 19e8ebcfabd..ba8dee59853 100644 --- a/tensorflow/tools/ci_build/release/windows/gpu_py35_full/nightly.bat +++ b/tensorflow/tools/ci_build/release/windows/gpu_py35_full/nightly.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python35 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --extra_test_flags "--test_env=TF2_BEHAVIOR=1" diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py35_full/release.bat b/tensorflow/tools/ci_build/release/windows/gpu_py35_full/release.bat index cba62225bee..86c118b2f83 100644 --- a/tensorflow/tools/ci_build/release/windows/gpu_py35_full/release.bat +++ b/tensorflow/tools/ci_build/release/windows/gpu_py35_full/release.bat @@ -17,7 +17,7 @@ SET PYTHON_DIRECTORY=Python35 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa" bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py36_full/nightly.bat b/tensorflow/tools/ci_build/release/windows/gpu_py36_full/nightly.bat index dbd380f47e3..9624ca5f5b2 100644 --- a/tensorflow/tools/ci_build/release/windows/gpu_py36_full/nightly.bat +++ b/tensorflow/tools/ci_build/release/windows/gpu_py36_full/nightly.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python36 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --extra_test_flags "--test_env=TF2_BEHAVIOR=1" diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py36_full/release.bat b/tensorflow/tools/ci_build/release/windows/gpu_py36_full/release.bat index ede8bd35f52..cc4f84afbee 100644 --- a/tensorflow/tools/ci_build/release/windows/gpu_py36_full/release.bat +++ b/tensorflow/tools/ci_build/release/windows/gpu_py36_full/release.bat @@ -17,7 +17,7 @@ SET PYTHON_DIRECTORY=Python36 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa" bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh \ No newline at end of file diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py37_full/nightly.bat b/tensorflow/tools/ci_build/release/windows/gpu_py37_full/nightly.bat index 3d12b723048..c6141c42916 100644 --- a/tensorflow/tools/ci_build/release/windows/gpu_py37_full/nightly.bat +++ b/tensorflow/tools/ci_build/release/windows/gpu_py37_full/nightly.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python37 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --extra_test_flags "--test_env=TF2_BEHAVIOR=1" diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py37_full/release.bat b/tensorflow/tools/ci_build/release/windows/gpu_py37_full/release.bat index 7509270fc43..5fa798e3eb8 100644 --- a/tensorflow/tools/ci_build/release/windows/gpu_py37_full/release.bat +++ b/tensorflow/tools/ci_build/release/windows/gpu_py37_full/release.bat @@ -17,7 +17,7 @@ SET PYTHON_DIRECTORY=Python37 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa" bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh \ No newline at end of file diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py38_full/nightly.bat b/tensorflow/tools/ci_build/release/windows/gpu_py38_full/nightly.bat index 247487fa926..dcbed63089e 100644 --- a/tensorflow/tools/ci_build/release/windows/gpu_py38_full/nightly.bat +++ b/tensorflow/tools/ci_build/release/windows/gpu_py38_full/nightly.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python38 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --extra_test_flags "--test_env=TF2_BEHAVIOR=1" diff --git a/tensorflow/tools/ci_build/release/windows/gpu_py38_full/release.bat b/tensorflow/tools/ci_build/release/windows/gpu_py38_full/release.bat index fc1c600fa5e..fa1fc131145 100644 --- a/tensorflow/tools/ci_build/release/windows/gpu_py38_full/release.bat +++ b/tensorflow/tools/ci_build/release/windows/gpu_py38_full/release.bat @@ -17,7 +17,7 @@ SET PYTHON_DIRECTORY=Python38 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa" bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh From c58899efe94acb65fad1d5672484462a22af2195 Mon Sep 17 00:00:00 2001 From: Reed Wanderman-Milne Date: Tue, 28 Jul 2020 11:18:59 -0700 Subject: [PATCH 1487/2522] Support CPU bfloat16 SquaredDifference. This allows the Keras mean_squared_error function to use bfloat16 on CPU PiperOrigin-RevId: 323613429 Change-Id: I67996779738fb10a4c916aeb19704a599f2f2c12 --- tensorflow/core/kernels/cwise_op_squared_difference.cc | 5 +++-- tensorflow/python/ops/math_ops_test.py | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/kernels/cwise_op_squared_difference.cc b/tensorflow/core/kernels/cwise_op_squared_difference.cc index 154c6adf258..12520b7e10b 100644 --- a/tensorflow/core/kernels/cwise_op_squared_difference.cc +++ b/tensorflow/core/kernels/cwise_op_squared_difference.cc @@ -16,8 +16,9 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER7(BinaryOp, CPU, "SquaredDifference", functor::squared_difference, - float, Eigen::half, double, int32, int64, complex64, complex128); +REGISTER8(BinaryOp, CPU, "SquaredDifference", functor::squared_difference, + float, Eigen::half, double, bfloat16, int32, int64, complex64, + complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER4(BinaryOp, GPU, "SquaredDifference", functor::squared_difference, float, Eigen::half, double, int64); diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index bf15bf86ee2..296395d034f 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -261,7 +261,10 @@ class ModTest(test_util.TensorFlowTestCase): class SquaredDifferenceTest(test_util.TensorFlowTestCase): def testSquaredDifference(self): - for dtype in [np.float16, np.float32, np.float64, np.int32, np.int64]: + for dtype in [ + np.float16, np.float32, np.float64, dtypes.bfloat16.as_numpy_dtype, + np.int32, np.int64 + ]: x = np.array([[1, 2, 3], [4, 5, 6]], dtype=dtype) y = np.array([-3, -2, -1], dtype=dtype) z = (x - y) * (x - y) From f683d360788a4fb3c192e4d38cb23bbf26d79ef4 Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Tue, 28 Jul 2020 11:20:38 -0700 Subject: [PATCH 1488/2522] Remove run_v1_only decorator in tracking_util_test as it can be run with both v1 and v2. PiperOrigin-RevId: 323613818 Change-Id: Iaab9732e5b9d56c03386ed022142686747defd18 --- tensorflow/python/keras/tests/tracking_util_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/keras/tests/tracking_util_test.py b/tensorflow/python/keras/tests/tracking_util_test.py index 32b3ceec6f6..0aa6b03ecb0 100644 --- a/tensorflow/python/keras/tests/tracking_util_test.py +++ b/tensorflow/python/keras/tests/tracking_util_test.py @@ -408,7 +408,6 @@ class CheckpointingTests(keras_parameterized.TestCase): # pylint: disable=cell-var-from-loop @combinations.generate(combinations.combine(mode=["graph", "eager"])) - @test_util.run_v1_only("b/120545219") def testWithDefun(self): with self.test_session(): num_training_steps = 2 From f4de8c6d5770c43701045397a171b238a9402235 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Tue, 28 Jul 2020 11:21:45 -0700 Subject: [PATCH 1489/2522] Fix a typo PiperOrigin-RevId: 323614079 Change-Id: Iff91d64b193dec3ae0874321558fe8ca73ecc177 --- tensorflow/tools/ci_build/sizetrack_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/sizetrack_helper.py b/tensorflow/tools/ci_build/sizetrack_helper.py index 4b7c83919b2..85bfa125828 100755 --- a/tensorflow/tools/ci_build/sizetrack_helper.py +++ b/tensorflow/tools/ci_build/sizetrack_helper.py @@ -99,7 +99,7 @@ size = parser.add_mutually_exclusive_group() size.add_argument( "--artifact", type=argparse.FileType("r"), - help="Local to file you are measuring.") + help="Local file you are measuring.") size.add_argument( "--manual_bytes", type=int, From 42d3b023d84bf7a83b3a723613f42e31770362e4 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Tue, 28 Jul 2020 11:28:35 -0700 Subject: [PATCH 1490/2522] A new SPMD API to shard tensor on a device mesh The user can define a mesh of device IDs (from 0 to N-1), then map a tensor's dimensions to the mesh dimensions. This helps choosing tile assignment that avoids resharding. PiperOrigin-RevId: 323615551 Change-Id: I30523c8ac65c0adaac0f82873141ab592cfde13b --- .../experimental/xla_sharding/xla_sharding.py | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tensorflow/compiler/xla/experimental/xla_sharding/xla_sharding.py b/tensorflow/compiler/xla/experimental/xla_sharding/xla_sharding.py index 212ad87d94c..16563bab5bc 100644 --- a/tensorflow/compiler/xla/experimental/xla_sharding/xla_sharding.py +++ b/tensorflow/compiler/xla/experimental/xla_sharding/xla_sharding.py @@ -294,3 +294,39 @@ def manual_to_auto_spmd_partition(tensor, manual_sharding, full_shape): """ return tf2xla.spmd_shard_to_full_shape( tensor, manual_sharding=manual_sharding, full_shape=full_shape) + + +def mesh_split(tensor, + device_mesh, + tensor_split_dims_mapping, + use_sharding_op=False): + """Returns a tensor that is split along multiple dimensions in a device mesh. + + Args: + tensor: A tf.Tensor to split. + device_mesh: An np.ndarray describing the topology of the device mesh and + each element is the ID of the device in the topology. + tensor_split_dims_mapping: A list of integers that map each tensor axis to + the device mesh axis along which it is sharded. Its length is the tensor + rank, and tensor_split_dims_mapping[i] is device mesh axis for tensor + dimension i. Use -1 for tensor dimensions that are not sharded. + use_sharding_op: If true, adds a sharding op to set the sharding. + + Raises: + ValueError: The number of tensor split dimensions is different from device + mesh rank. + """ + permutation = [d for d in tensor_split_dims_mapping if d >= 0] + if len(permutation) != len(device_mesh.shape): + raise ValueError( + 'Number of tensor split dimensions (%r) is different from device mesh ' + 'rank (%r). tensor_split_dims_mapping: %r, device_mesh.shape: %r' % + (len(permutation), len( + device_mesh.shape), tensor_split_dims_mapping, device_mesh.shape)) + tile_assignment = _np.transpose(device_mesh, permutation) + tile_shape = [ + 1 if d < 0 else device_mesh.shape[d] for d in tensor_split_dims_mapping + ] + tile_assignment = _np.reshape(tile_assignment, tile_shape) + + return tile(tensor, tile_assignment, use_sharding_op=use_sharding_op) From bc59b5d7347ae54f10932986a0bdf29f424f66a0 Mon Sep 17 00:00:00 2001 From: Bruce Fontaine Date: Tue, 28 Jul 2020 11:30:38 -0700 Subject: [PATCH 1491/2522] Allow setting optimizer in TPUEmbedding mid level API to None when using CPU strategy. PiperOrigin-RevId: 323616014 Change-Id: I75598a854738951cc7990db0cef090571f7d1e10 --- tensorflow/python/tpu/tpu_embedding_v2.py | 15 +++++++++++---- .../python/tpu/tpu_embedding_v2_cpu_test.py | 12 ++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/tpu/tpu_embedding_v2.py b/tensorflow/python/tpu/tpu_embedding_v2.py index 8e23812706b..fd3d1055618 100644 --- a/tensorflow/python/tpu/tpu_embedding_v2.py +++ b/tensorflow/python/tpu/tpu_embedding_v2.py @@ -251,7 +251,10 @@ class TPUEmbedding(tracking.AutoTrackable): evaluation. optimizer: An instance of one of `tf.tpu.experimental.embedding.SGD`, `tf.tpu.experimental.embedding.Adagrad` or - `tf.tpu.experimental.embedding.Adam`. + `tf.tpu.experimental.embedding.Adam`. When not created under + TPUStrategy may be set to None to avoid the creation of the optimizer + slot variables, useful for optimizing memory consumption when exporting + the model for serving where slot variables aren't needed. pipeline_execution_with_tensor_core: If True, the TPU embedding computations will overlap with the TensorCore computations (and hence will be one step old). Set to True for improved performance. @@ -262,7 +265,7 @@ class TPUEmbedding(tracking.AutoTrackable): Raises: ValueError: If optimizer is not one of tf.tpu.experimental.embedding.(SGD, - Adam or Adagrad). + Adam or Adagrad) or None when created under a TPUStrategy. """ self._strategy = distribution_strategy_context.get_strategy() self._using_tpu = isinstance(self._strategy, (tpu_strategy.TPUStrategy, @@ -299,7 +302,8 @@ class TPUEmbedding(tracking.AutoTrackable): if table.optimizer is None: # TODO(bfontain) Should we allow some sort of optimizer merging here? table.optimizer = optimizer - if not isinstance(table.optimizer, tpu_embedding_v2_utils._Optimizer): # pylint: disable=protected-access + if ((table.optimizer is not None or self._using_tpu) and + not isinstance(table.optimizer, tpu_embedding_v2_utils._Optimizer)): # pylint: disable=protected-access raise ValueError("{} is an unsupported optimizer class. Please pass an " "instance of one of the optimizer classes under " "tf.tpu.experimental.embedding.".format( @@ -740,7 +744,10 @@ class TPUEmbedding(tracking.AutoTrackable): initializer, False) - slot_vars = table.optimizer._create_slots(parameters, slot_creator) # pylint: disable=protected-access + if table.optimizer is not None: + slot_vars = table.optimizer._create_slots(parameters, slot_creator) # pylint: disable=protected-access + else: + slot_vars = {} slot_vars["parameters"] = parameters return slot_vars diff --git a/tensorflow/python/tpu/tpu_embedding_v2_cpu_test.py b/tensorflow/python/tpu/tpu_embedding_v2_cpu_test.py index 3177498deba..6c44b77b5ec 100644 --- a/tensorflow/python/tpu/tpu_embedding_v2_cpu_test.py +++ b/tensorflow/python/tpu/tpu_embedding_v2_cpu_test.py @@ -296,6 +296,18 @@ class CPUEmbeddingTest(test.TestCase): tables=mid_level.embedding_tables, feature_config=feature_config) + def test_cpu_no_optimizer(self): + feature_config = ( + tpu_embedding_v2_utils.FeatureConfig( + table=self.table_video, name='watched', max_sequence_length=2),) + mid_level = tpu_embedding_v2.TPUEmbedding( + feature_config=feature_config, + batch_size=self.batch_size, + optimizer=None) + self.assertEqual( + list(mid_level._variables[self.table_video.name].keys()), + ['parameters']) + if __name__ == '__main__': v2_compat.enable_v2_behavior() From 53de2cff12b69b8e65b3d7d349b77ca4fc99b5ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tar=C3=A9=20Gaskin?= Date: Tue, 28 Jul 2020 19:04:37 +0000 Subject: [PATCH 1492/2522] removing unrelated changes --- .../core/profiler/convert/op_stats_to_overview_page.cc | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc index 154d2897613..cd0f10543df 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc @@ -25,7 +25,6 @@ limitations under the License. #include "tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.h" #include "tensorflow/core/profiler/protobuf/hardware_types.pb.h" #include "tensorflow/core/profiler/protobuf/input_pipeline.pb.h" -#include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h" #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" #include "tensorflow/core/profiler/protobuf/op_stats.pb.h" #include "tensorflow/core/profiler/protobuf/overview_page.pb.h" @@ -34,7 +33,6 @@ limitations under the License. #include "tensorflow/core/profiler/utils/diagnostics.h" #include "tensorflow/core/profiler/utils/hardware_type_utils.h" #include "tensorflow/core/profiler/utils/html_utils.h" -#include "tensorflow/core/profiler/utils/kernel_stats_utils.h" #include "tensorflow/core/profiler/utils/math_utils.h" #include "tensorflow/core/profiler/utils/op_metrics_db_utils.h" #include "tensorflow/core/profiler/utils/time_utils.h" @@ -165,9 +163,6 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) { OverviewPageAnalysis analysis; OpMetricsDb device_tf_op_metrics_db = CreateTfMetricsDbFromDeviceOpMetricsDb( op_stats.device_op_metrics_db(), /*with_idle=*/false); - absl::flat_hash_map> - grouped_kernel_reports = - GroupKernelReportsByOpName(op_stats.kernel_stats_db()); uint64 total_device_time_ps = device_tf_op_metrics_db.total_time_ps(); constexpr int kNumTopOpsShown = 10; double device_cumulative_fraction = 0.0; @@ -182,11 +177,6 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) { op->set_cumulative_time_fraction(device_cumulative_fraction); op->set_flop_rate( SafeDivide(metrics->flops(), PicosToNanos(metrics->time_ps()))); - auto iter = grouped_kernel_reports.find(op->name()); - if (iter != grouped_kernel_reports.end()) { - op->set_is_op_tensorcore_eligible( - iter->second.front()->is_op_tensor_core_eligible()); - } } uint64 total_device_compute_ps = op_stats.device_op_metrics_db().precision_stats().compute_16bit_ps() + From a912a8ed6cc873e1b4ed5de0fb0524d2e499ea34 Mon Sep 17 00:00:00 2001 From: Jay Shi Date: Tue, 28 Jul 2020 11:46:27 -0700 Subject: [PATCH 1493/2522] [tf.data] Add `JobName` function to the file. PiperOrigin-RevId: 323619463 Change-Id: I1ee9463b39dc8e5fc9fa9d657ac59768aa7609bf --- tensorflow/core/platform/windows/port.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/core/platform/windows/port.cc b/tensorflow/core/platform/windows/port.cc index 00f52f9b70c..52f9e479036 100644 --- a/tensorflow/core/platform/windows/port.cc +++ b/tensorflow/core/platform/windows/port.cc @@ -49,6 +49,8 @@ string Hostname() { return name; } +string JobName() { return ""; } + int NumSchedulableCPUs() { SYSTEM_INFO system_info; GetSystemInfo(&system_info); From dc3099c444d294b39cd79fe1d1a4bff59a0c6180 Mon Sep 17 00:00:00 2001 From: Mangpo Phothilimthana Date: Tue, 28 Jul 2020 11:47:43 -0700 Subject: [PATCH 1494/2522] Remove read limit of ReadBinaryProto. PiperOrigin-RevId: 323619709 Change-Id: I2b21c8c83d62f5f21c892ddad93c9d5a162fe6cf --- tensorflow/core/platform/env.cc | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc index 05d95ba0425..4cc36e0b705 100644 --- a/tensorflow/core/platform/env.cc +++ b/tensorflow/core/platform/env.cc @@ -544,15 +544,7 @@ Status ReadBinaryProto(Env* env, const string& fname, std::unique_ptr file; TF_RETURN_IF_ERROR(env->NewRandomAccessFile(fname, &file)); std::unique_ptr stream(new FileStream(file.get())); - - // TODO(jiayq): the following coded stream is for debugging purposes to allow - // one to parse arbitrarily large messages for MessageLite. One most likely - // doesn't want to put protobufs larger than 64MB on Android, so we should - // eventually remove this and quit loud when a large protobuf is passed in. protobuf::io::CodedInputStream coded_stream(stream.get()); - // Total bytes hard limit / warning limit are set to 1GB and 512MB - // respectively. - coded_stream.SetTotalBytesLimit(1024LL << 20, 512LL << 20); if (!proto->ParseFromCodedStream(&coded_stream) || !coded_stream.ConsumedEntireMessage()) { From 33e8a866f137a494c3f30a554c1ba59e8c56da2e Mon Sep 17 00:00:00 2001 From: Bruce Fontaine Date: Tue, 28 Jul 2020 12:07:46 -0700 Subject: [PATCH 1495/2522] Add an option to make get next parallel when using PER_HOST_V2 input mode on TPUEstimator. PiperOrigin-RevId: 323624249 Change-Id: I239ff160a669ab53300158738cc3c881251b1f6d --- .../golden/v1/tensorflow.estimator.tpu.-t-p-u-config.pbtxt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.tpu.-t-p-u-config.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.tpu.-t-p-u-config.pbtxt index d934b4013b2..e329045123e 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.tpu.-t-p-u-config.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.tpu.-t-p-u-config.pbtxt @@ -7,6 +7,10 @@ tf_class { name: "eval_training_input_configuration" mtype: "" } + member { + name: "experimental_allow_per_host_v2_parallel_get_next" + mtype: "" + } member { name: "experimental_host_call_every_n_steps" mtype: "" From bf52f56d1f299d3e8ab17b35f57aca89a91d87e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tar=C3=A9=20Gaskin?= Date: Tue, 28 Jul 2020 19:20:28 +0000 Subject: [PATCH 1496/2522] recommended updates --- tensorflow/core/kernels/data/experimental/snapshot_util.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/kernels/data/experimental/snapshot_util.cc b/tensorflow/core/kernels/data/experimental/snapshot_util.cc index 7b4b68a6aec..c063217b8e0 100644 --- a/tensorflow/core/kernels/data/experimental/snapshot_util.cc +++ b/tensorflow/core/kernels/data/experimental/snapshot_util.cc @@ -906,9 +906,10 @@ Status DetermineOpState(const std::string& mode_string, bool file_exists, return Status::OK(); } - if (metadata->creation_timestamp() >= - static_cast(static_cast(EnvTime::NowMicros()) - - pending_snapshot_expiry_seconds * 1000000)) { + int64 expiration_timer = EnvTime::NowMicros() + - pending_snapshot_expiry_seconds * 1000000; + + if (metadata->creation_timestamp() >= expiration_timer) { // Someone else is already writing and time has not expired. *mode = PASSTHROUGH; return Status::OK(); From 6a8e5328c68b037a741b40bc538fecfb72980953 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 15:22:33 -0400 Subject: [PATCH 1497/2522] Update tensorflow/core/grappler/graph_analyzer/graph_analyzer.cc Co-authored-by: Mihai Maruseac --- tensorflow/core/grappler/graph_analyzer/graph_analyzer.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/graph_analyzer/graph_analyzer.cc b/tensorflow/core/grappler/graph_analyzer/graph_analyzer.cc index e0de3257b0d..b35099a6aae 100644 --- a/tensorflow/core/grappler/graph_analyzer/graph_analyzer.cc +++ b/tensorflow/core/grappler/graph_analyzer/graph_analyzer.cc @@ -92,8 +92,8 @@ void GraphAnalyzer::FindSubgraphs() { } void GraphAnalyzer::ExtendSubgraph(Subgraph* parent) { - const int parent_id_size_plus_one = parent->id().size() + 1; - bool will_complete = (parent_id_size_plus_one == subgraph_size_); + const int next_parent_id = parent->id().size() + 1; + bool will_complete = (next_parent_id == subgraph_size_); SubgraphPtrSet& sg_set = will_complete ? result_ : partial_; const GenNode* last_all_or_none_node = nullptr; From 32a84465d39ffe38b78dffb58cf7973b315786f3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Jul 2020 12:15:31 -0700 Subject: [PATCH 1498/2522] Fully qualify uses of tensorflow::int64. PiperOrigin-RevId: 323625977 Change-Id: I0d10e5c75f3ae7544d316ba0f866e5b0e9c159cc --- tensorflow/c/eager/c_api_debug.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/c/eager/c_api_debug.cc b/tensorflow/c/eager/c_api_debug.cc index 6827021455b..dd55f05283b 100644 --- a/tensorflow/c/eager/c_api_debug.cc +++ b/tensorflow/c/eager/c_api_debug.cc @@ -26,14 +26,13 @@ limitations under the License. #include "tensorflow/compiler/jit/xla_device.h" #endif // TENSORFLOW_EAGER_USE_XLA -using tensorflow::int64; using tensorflow::string; namespace { -std::vector TensorShapeAsVector(const tensorflow::TensorHandle& handle, - tensorflow::Status* status) { - std::vector shape; +std::vector TensorShapeAsVector( + const tensorflow::TensorHandle& handle, tensorflow::Status* status) { + std::vector shape; int rank = -1; *status = handle.NumDims(&rank); if (!status->ok()) { @@ -79,7 +78,7 @@ TF_CAPI_EXPORT extern TFE_TensorDebugInfo* TFE_TensorHandleTensorDebugInfo( return nullptr; } if (VLOG_IS_ON(3)) { - std::vector shape_to_log = + std::vector shape_to_log = TensorShapeAsVector(*handle, &status->status); if (!status->status.ok()) { // Ignore the status here as we are simply logging. @@ -128,14 +127,14 @@ TF_CAPI_EXPORT extern TFE_TensorDebugInfo* TFE_TensorHandleTensorDebugInfo( } int rank = padded_shape.dimensions_size(); - std::vector dev_dims; + std::vector dev_dims; dev_dims.reserve(rank); if (rank == 1) { // Rank 1 tensors might not have padded_shape.layout.minor_to_major set, dev_dims.push_back(padded_shape.dimensions(0)); } else { for (int i = rank - 1; i >= 0; --i) { - int64 dim_index = padded_shape.layout().minor_to_major(i); + tensorflow::int64 dim_index = padded_shape.layout().minor_to_major(i); dev_dims.push_back(padded_shape.dimensions(dim_index)); } } @@ -146,7 +145,8 @@ TF_CAPI_EXPORT extern TFE_TensorDebugInfo* TFE_TensorHandleTensorDebugInfo( // If the tensor is not an XLA tensor, the device shape is // the same as regular tensor shape. - std::vector dev_dims = TensorShapeAsVector(*handle, &status->status); + std::vector dev_dims = + TensorShapeAsVector(*handle, &status->status); if (!status->status.ok()) { return nullptr; } From 38ebede5aae863dec8a6284c9e9d8b93971fb039 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 15:23:57 -0400 Subject: [PATCH 1499/2522] Update map_vectorization.cc --- tensorflow/core/grappler/optimizers/data/map_vectorization.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc index ce8830309e9..da0c8b3ddf4 100644 --- a/tensorflow/core/grappler/optimizers/data/map_vectorization.cc +++ b/tensorflow/core/grappler/optimizers/data/map_vectorization.cc @@ -103,7 +103,7 @@ FunctionDef* CreateMapDefunWrapper(const NodeDef& map_node, // Set return values to match output names string output_prefix = strings::StrCat(map_defun_node->name(), ":output:"); - for (size_t i = 0; i < static_cast(vectorized_func->signature().output_arg_size()); ++i) { + for (size_t i = 0, end = vectorized_func->signature().output_arg_size(); i < end; ++i) { const auto& output_arg = vectorized_func->signature().output_arg(i); (*vectorized_func->mutable_ret())[output_arg.name()] = strings::StrCat(output_prefix, i); From f72b707dfc0588d4ba43f696abc3263e5f9dcd19 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Tue, 28 Jul 2020 12:26:00 -0700 Subject: [PATCH 1500/2522] [tf.data] Reverting portion of cl/322686110, changing back model input time to be zero (i.e. infinitely fast consumer). PiperOrigin-RevId: 323628170 Change-Id: Ia6dd4fe48985c431b84c6189e5622e29fde4f8f9 --- tensorflow/core/kernels/data/model_dataset_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/data/model_dataset_op.cc b/tensorflow/core/kernels/data/model_dataset_op.cc index af9e1e59a35..d32ac368fa1 100644 --- a/tensorflow/core/kernels/data/model_dataset_op.cc +++ b/tensorflow/core/kernels/data/model_dataset_op.cc @@ -204,7 +204,7 @@ class ModelDatasetOp : public UnaryDatasetOpKernel { model_input_time = SelfInputTime(); } model_->Optimize(dataset()->algorithm_, dataset()->cpu_budget_, - dataset()->ram_budget_, model_input_time); + dataset()->ram_budget_, /*model_input_time=*/0); // Exponentially increase the period of running the optimization // until a threshold is reached. if (optimization_period_ms != kOptimizationPeriodThresholdMs) { From 7d2be7bf531984ea89889909662cc99e530cd76b Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 15:36:35 -0400 Subject: [PATCH 1501/2522] Update tensorflow/core/distributed_runtime/collective_rma_distributed.cc Co-authored-by: Mihai Maruseac --- .../core/distributed_runtime/collective_rma_distributed.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/distributed_runtime/collective_rma_distributed.cc b/tensorflow/core/distributed_runtime/collective_rma_distributed.cc index d740a165797..46889e737e7 100644 --- a/tensorflow/core/distributed_runtime/collective_rma_distributed.cc +++ b/tensorflow/core/distributed_runtime/collective_rma_distributed.cc @@ -109,8 +109,8 @@ void CollectiveRemoteAccessDistributed::RecvFromPeer( for (const auto& chunk : extra.tensor_content()) { num_bytes += chunk.size(); } - const int64 to_tensor_TotalBytes = to_tensor->TotalBytes(); - if (num_bytes != to_tensor_TotalBytes) { + const int64 total_bytes = to_tensor->TotalBytes(); + if (num_bytes != total_bytes) { done(errors::Internal("RecvBufResponse returned ", num_bytes, " bytes where to_tensor expected ", to_tensor->TotalBytes())); From 6f2f7a243b7cc28a61db03696fccb17283aa14b8 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 15:36:42 -0400 Subject: [PATCH 1502/2522] Update tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc Co-authored-by: Mihai Maruseac --- tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc b/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc index 1781c643e1d..07ab6c69d2e 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc @@ -176,8 +176,8 @@ class GrpcRemoteMaster : public MasterInterface { ? deadline_with_backoff_micros : expired_time_micros; Env::Default()->SleepForMicroseconds(backoff_until - now_micros); - const int64 default_now_micros = Env::Default()->NowMicros(); - if (default_now_micros > expired_time_micros && timeout_in_ms > 0) { + const int64 now = Env::Default()->NowMicros(); + if (now > expired_time_micros && timeout_in_ms > 0) { // If timeout_in_ms is set, exit the retry loop on timeout. return errors::DeadlineExceeded(ctx.debug_error_string()); } From cdac4cf5c5ef1dcd6c449e1e4e607eac7f76251f Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Tue, 28 Jul 2020 12:27:16 -0700 Subject: [PATCH 1503/2522] Fix tf.map_fn interop with np ndarray PiperOrigin-RevId: 323628461 Change-Id: Ib56efd25a2cd200cb1f2cb2469940f24b93cfcd2 --- tensorflow/python/ops/map_fn.py | 11 ++++++++++- tensorflow/python/ops/numpy_ops/np_interop_test.py | 9 +++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/ops/map_fn.py b/tensorflow/python/ops/map_fn.py index e751ba1b25c..bf7b2cda7f3 100644 --- a/tensorflow/python/ops/map_fn.py +++ b/tensorflow/python/ops/map_fn.py @@ -38,10 +38,16 @@ from tensorflow.python.ops import variable_scope as vs from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import deprecation +from tensorflow.python.util import lazy_loader from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export +np_arrays = lazy_loader.LazyLoader( + "np_arrays", globals(), + "tensorflow.python.ops.numpy_ops.np_arrays") + + @tf_export(v1=["map_fn"]) @deprecation.deprecated_args(None, "Use fn_output_signature instead", "dtype") def map_fn(fn, @@ -419,7 +425,10 @@ def map_fn(fn, ] # Check that inputs are not scalars. - elems_static_shape = elems_flat[0].shape + first_elem = elems_flat[0] + if isinstance(first_elem, np_arrays.ndarray): + first_elem = first_elem.data + elems_static_shape = first_elem.shape if elems_static_shape.ndims is not None and elems_static_shape.ndims < 1: if len(elems_flat) == 1: raise ValueError("elems must be a 1+ dimensional Tensor, not a scalar") diff --git a/tensorflow/python/ops/numpy_ops/np_interop_test.py b/tensorflow/python/ops/numpy_ops/np_interop_test.py index ec350804e02..9074f377d0c 100644 --- a/tensorflow/python/ops/numpy_ops/np_interop_test.py +++ b/tensorflow/python/ops/numpy_ops/np_interop_test.py @@ -315,6 +315,15 @@ class InteropTest(tf.test.TestCase): self.assertIsInstance(batch_jacobian, np.ndarray) self.assertAllClose(batch_jacobian, answer) + def testMapFn(self): + x = np.asarray([1., 2.]) + mapped_x = tf.map_fn(lambda x: (x[0]+1, x[1]+1), (x, x)) + + self.assertIsInstance(mapped_x[0], np.ndarray) + self.assertIsInstance(mapped_x[1], np.ndarray) + self.assertAllClose(mapped_x[0], [2., 3.]) + self.assertAllClose(mapped_x[1], [2., 3.]) + class FunctionTest(InteropTest): From 7784fe3bc0b3e15f24374ad05f7e9bb4a98bf345 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 15:37:27 -0400 Subject: [PATCH 1504/2522] Update shape_inference.cc --- tensorflow/core/framework/shape_inference.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc index 72fff010d08..456c1826572 100644 --- a/tensorflow/core/framework/shape_inference.cc +++ b/tensorflow/core/framework/shape_inference.cc @@ -211,7 +211,6 @@ void InferenceContext::PostInputInit( } input_handle_shapes_and_types_ = std::move(input_handle_data); } - const int inputs_size = inputs_.size(); if (inputs_size != num_inputs_from_node_def) { construction_status_ = errors::InvalidArgument( From 3ad158c7b9d5cd62b2c955a3393272599230a065 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 28 Jul 2020 12:44:25 -0700 Subject: [PATCH 1505/2522] Add a clarifying comment; NFC PiperOrigin-RevId: 323632239 Change-Id: Ia6c34d90e2c446eedd1a3fee1000951a70c955b8 --- tensorflow/stream_executor/gpu/redzone_allocator.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/stream_executor/gpu/redzone_allocator.h b/tensorflow/stream_executor/gpu/redzone_allocator.h index e5e42df73bd..3e56ca386a9 100644 --- a/tensorflow/stream_executor/gpu/redzone_allocator.h +++ b/tensorflow/stream_executor/gpu/redzone_allocator.h @@ -118,6 +118,9 @@ class RedzoneAllocator : public ScratchAllocator { // isn't necessarily just first.size() - 2 * redzone_size_ because when the // user allocation size is not a multiple of 4 bytes, we round up the size of // the RHS redzone. + // + // ScratchAllocators need to free all allocated memory on destruction so we + // use `OwningDeviceMemory` here. std::vector> allocated_buffers_; int64 allocated_bytes_excluding_redzones_ = 0; From 982236961f580b6b6edf09d693a89e7ad799ce4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tar=C3=A9=20Gaskin?= Date: Tue, 28 Jul 2020 20:08:56 +0000 Subject: [PATCH 1506/2522] updates --- tensorflow/compiler/xla/client/lib/pooling.cc | 8 ++++---- tensorflow/compiler/xla/client/xla_builder.cc | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/xla/client/lib/pooling.cc b/tensorflow/compiler/xla/client/lib/pooling.cc index 6a0db64b834..460c1cff03a 100644 --- a/tensorflow/compiler/xla/client/lib/pooling.cc +++ b/tensorflow/compiler/xla/client/lib/pooling.cc @@ -198,16 +198,16 @@ XlaOp AvgPoolGrad(XlaOp out_backprop, absl::Span gradients_size, XlaBuilder* b = out_backprop.builder(); return b->ReportErrorOrReturn([&]() -> StatusOr { const int num_dims = kernel_size.size(); - const int gradients_size_size = gradients_size.size(); - if (gradients_size_size != num_dims) { + const int num_gradients = gradients_size.size(); + if (num_gradients != num_dims) { return tensorflow::errors::InvalidArgument("gradients must be ", num_dims, "-dimensional"); } TF_ASSIGN_OR_RETURN(Shape out_backprop_xla_shape, b->GetShape(out_backprop)); - const int obxsd_size = out_backprop_xla_shape.dimensions().size(); - if (obxsd_size != num_dims) { + const int backprop_xla_num_dims = out_backprop_xla_shape.dimensions().size(); + if (backprop_xla_num_dims != num_dims) { return tensorflow::errors::InvalidArgument("out_backprop must be ", num_dims, "-dimensional"); } diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index c84d2b519dc..db437142665 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -774,9 +774,9 @@ XlaOp XlaBuilder::BroadcastInDim( operand_shape->rank(), broadcast_dimensions.size()); } for (int i = 0, end = broadcast_dimensions.size(); i < end; i++) { - const tensorflow::int64 out_dim_size_size = out_dim_size.size(); + const tensorflow::int64 num_dims = out_dim_size.size(); if (broadcast_dimensions[i] < 0 || - broadcast_dimensions[i] > out_dim_size_size) { + broadcast_dimensions[i] > num_dims) { return InvalidArgument("Broadcast dimension %lld is out of bound", broadcast_dimensions[i]); } From 6c9fad23bf194ba6970b9160531510cf2c77cdef Mon Sep 17 00:00:00 2001 From: Victor de Souza Date: Tue, 28 Jul 2020 13:05:06 -0700 Subject: [PATCH 1507/2522] Fix macro typo that prevented SYCL registrations for ScatterNdMin and ScatterNdMax. PiperOrigin-RevId: 323636493 Change-Id: I42237f4b6dadbdc7d6187283b0282eb6dca5e5e5 --- tensorflow/core/kernels/scatter_nd_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc index c6c93077f01..88bf16d974e 100644 --- a/tensorflow/core/kernels/scatter_nd_op.cc +++ b/tensorflow/core/kernels/scatter_nd_op.cc @@ -513,7 +513,7 @@ TF_CALL_COMPLEX_TYPES(REGISTER_SCATTER_ND_ALL_GPU); #define REGISTER_SCATTER_ND_UPDATE_SYCL(type) \ REGISTER_SCATTER_ND_UPDATE(type, SYCL); -#define REGISTER_SCATTER_ND_MIN_MAX_GPU(type) \ +#define REGISTER_SCATTER_ND_MIN_MAX_SYCL(type) \ REGISTER_SCATTER_ND_MIN_MAX(type, SYCL); TF_CALL_int32(REGISTER_SCATTER_ND_ADD_SUB_SYCL); From 17017477142dbfc04cc2fdedf7ed064939bc294c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tar=C3=A9=20Gaskin?= Date: Tue, 28 Jul 2020 20:15:12 +0000 Subject: [PATCH 1508/2522] updates --- tensorflow/compiler/tf2xla/xla_compiler.cc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index 97254c17b09..ea16020721d 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -1030,11 +1030,6 @@ Status XlaCompiler::BuildArguments( xla::XlaScopedShardingAssignment assign_sharding( builder, it == arg_shardings.end() ? absl::optional() : it->second); - auto& arg = args[input_to_args->at(i)]; - - xla::OpMetadata arg_metadata; - arg_metadata.set_op_name(arg.node_name); - builder->SetOneShotOpMetadata(arg_metadata); arg_handles[i] = xla::GetTupleElement(tuple, i); } } else { From 36f3e515a4b683279825d27b3ff3d0b36bc2140f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Jul 2020 13:11:46 -0700 Subject: [PATCH 1509/2522] Compile out contents of DebugLog when building in release mode. PiperOrigin-RevId: 323637922 Change-Id: Icaa8cb1eb5738c75af696f53756377db236aac85 --- tensorflow/lite/micro/apollo3evb/debug_log.cc | 2 ++ tensorflow/lite/micro/sparkfun_edge/debug_log.cc | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tensorflow/lite/micro/apollo3evb/debug_log.cc b/tensorflow/lite/micro/apollo3evb/debug_log.cc index 2779d941784..1523d4bcc84 100644 --- a/tensorflow/lite/micro/apollo3evb/debug_log.cc +++ b/tensorflow/lite/micro/apollo3evb/debug_log.cc @@ -41,6 +41,7 @@ limitations under the License. #include "am_util.h" // NOLINT extern "C" void DebugLog(const char* s) { +#ifndef TF_LITE_STRIP_ERROR_STRINGS static bool is_initialized = false; if (!is_initialized) { am_bsp_itm_printf_enable(); @@ -48,4 +49,5 @@ extern "C" void DebugLog(const char* s) { } am_util_stdio_printf("%s", s); +#endif } diff --git a/tensorflow/lite/micro/sparkfun_edge/debug_log.cc b/tensorflow/lite/micro/sparkfun_edge/debug_log.cc index 1dc15aba529..984d2a90188 100644 --- a/tensorflow/lite/micro/sparkfun_edge/debug_log.cc +++ b/tensorflow/lite/micro/sparkfun_edge/debug_log.cc @@ -23,6 +23,7 @@ limitations under the License. #include "am_util.h" // NOLINT extern "C" void DebugLog(const char* s) { +#ifndef TF_LITE_STRIP_ERROR_STRINGS static bool is_initialized = false; if (!is_initialized) { am_bsp_uart_printf_enable(); @@ -30,4 +31,5 @@ extern "C" void DebugLog(const char* s) { } am_util_stdio_printf("%s", s); +#endif } From 2e4a0f193a868d1c368f4ce1185fd41e9f68a04a Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Tue, 28 Jul 2020 17:41:08 +0000 Subject: [PATCH 1510/2522] reordered build dependencies --- tensorflow/c/kernels/BUILD | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/c/kernels/BUILD b/tensorflow/c/kernels/BUILD index b2d72c74dc8..af6253f3403 100644 --- a/tensorflow/c/kernels/BUILD +++ b/tensorflow/c/kernels/BUILD @@ -29,9 +29,9 @@ tf_kernel_library( prefix = "summary_op", deps = [ "//tensorflow/c:kernels", + "//tensorflow/c:tf_status", "//tensorflow/c:tf_tensor", "//tensorflow/c/kernels:tensor_shape_utils", - "//tensorflow/c:tf_status", "//tensorflow/core:framework", "//tensorflow/core:lib", ], @@ -88,16 +88,16 @@ cc_library( name = "tensor_shape_utils", srcs = ["tensor_shape_utils.cc"], hdrs = ["tensor_shape_utils.h"], - deps = [ "//tensorflow/c:tf_tensor"], visibility = ["//visibility:private"], + deps = ["//tensorflow/c:tf_tensor"], ) tf_cc_test( name = "tensor_shape_utils_test", srcs = ["tensor_shape_utils_test.cc"], deps = [ - "//tensorflow/core:framework", ":tensor_shape_utils", + "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", @@ -114,7 +114,7 @@ tf_cc_test( filegroup( name = "android_all_op_kernels", srcs = [ - "bitcast_op.cc", + "bitcast_op.cc", "summary_op.cc", ], ) @@ -123,7 +123,7 @@ filegroup( filegroup( name = "android_all_ops", srcs = [ - "ops/bitcast.cc", + "ops/bitcast.cc", "ops/summary.cc", ], ) From 6142a2797261f442797ab18430d8e3bcb1b9741d Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 16:17:21 -0400 Subject: [PATCH 1511/2522] Update tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc Co-authored-by: Mihai Maruseac --- .../compiler/mlir/tensorflow/translate/export_graphdef.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc b/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc index e508f8fbd6b..571d5e3e715 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc @@ -519,11 +519,11 @@ StatusOr> Exporter::Convert( llvm::StringMap name_to_op; for (const auto& it : llvm::enumerate(graph_op.GetFetch().getOperands())) { // Skip control rets. - const int64 it_index = it.index(); - if (it_index >= num_data_results) break; + const int64 index = it.index(); + if (index >= num_data_results) break; // TODO(jpienaar): If there is a result index specified, ensure only one // and that it matches the result index of the op. - std::string orig_name(output_names[it.index()]); + std::string orig_name(output_names[index]); auto tensor_id = ParseTensorName(orig_name); auto name = LegalizeNodeName( llvm::StringRef(tensor_id.node().data(), tensor_id.node().size())); From 20eeb11ee0817ebe3ba7621f5f9c29c6e43786c7 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 16:17:29 -0400 Subject: [PATCH 1512/2522] Update tensorflow/compiler/mlir/tensorflow/translate/import_model.cc Co-authored-by: Mihai Maruseac --- tensorflow/compiler/mlir/tensorflow/translate/import_model.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc index 2bf2c900cd2..63366911f63 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc @@ -2387,8 +2387,8 @@ GraphDefImporter::GetArgsRetsAndTypesFromFunctionGraph( "' is missing attribute 'index'"); auto index = attr->i(); - const int nodes_size = nodes->size(); - if (nodes_size < index + 1) nodes->resize(index + 1); + const int num_nodes = nodes->size(); + if (num_nodes < index + 1) nodes->resize(index + 1); if ((*nodes)[index].node != nullptr) return errors::InvalidArgument(node->type_string(), " node '", From 68b5f1defde0a2f855b2172beb84c5bbed5b1fb3 Mon Sep 17 00:00:00 2001 From: Sachin Joglekar Date: Tue, 28 Jul 2020 13:15:45 -0700 Subject: [PATCH 1513/2522] Add support for half_pixel_centers in resize ops w/ Hexagon PiperOrigin-RevId: 323638839 Change-Id: Ic927930989d6bdc362642f111f028d7153d759ec --- .../builders/resize_bilinear_builder.cc | 7 +- .../resize_nearest_neighbor_builder.cc | 11 +- .../resize_nearest_neighbor_builder.h | 1 - .../hexagon/builders/tests/resize_test.cc | 101 ++++++++++++++++-- tensorflow/lite/delegates/hexagon/utils.cc | 4 +- 5 files changed, 107 insertions(+), 17 deletions(-) diff --git a/tensorflow/lite/delegates/hexagon/builders/resize_bilinear_builder.cc b/tensorflow/lite/delegates/hexagon/builders/resize_bilinear_builder.cc index dda1d83717e..dccbdc5fab5 100644 --- a/tensorflow/lite/delegates/hexagon/builders/resize_bilinear_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/resize_bilinear_builder.cc @@ -56,7 +56,7 @@ TfLiteStatus ResizeBilinearOpBuilder::PopulateSubGraph( AddInput(TensorID(input_min_const->GetID(), 0)); AddInput(TensorID(input_max_const->GetID(), 0)); - // Align Corners + // Align Corners & half-pixel-centers. const TfLiteResizeBilinearParams* params = reinterpret_cast(builtin_data_); int align_corners = params->align_corners ? 1 : 0; @@ -64,6 +64,11 @@ TfLiteStatus ResizeBilinearOpBuilder::PopulateSubGraph( kScalarShape, reinterpret_cast(&align_corners), sizeof(align_corners)); AddInput(TensorID(align_corners_const->GetID(), 0)); + int half_pixel_centers = params->half_pixel_centers ? 1 : 0; + auto* half_pixel_centers_const = graph_builder_->AddConstNodeWithData( + kScalarShape, reinterpret_cast(&half_pixel_centers), + sizeof(half_pixel_centers)); + AddInput(TensorID(half_pixel_centers_const->GetID(), 0)); // Output int output_batch_size, output_height_size, output_width_size, diff --git a/tensorflow/lite/delegates/hexagon/builders/resize_nearest_neighbor_builder.cc b/tensorflow/lite/delegates/hexagon/builders/resize_nearest_neighbor_builder.cc index c8dc0d60363..735130f8fa2 100644 --- a/tensorflow/lite/delegates/hexagon/builders/resize_nearest_neighbor_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/resize_nearest_neighbor_builder.cc @@ -59,11 +59,16 @@ TfLiteStatus ResizeNearestNeighborOpBuilder::PopulateSubGraph( // Align corners. const TfLiteResizeNearestNeighborParams* params = reinterpret_cast(builtin_data_); - align_corners_ = params->align_corners; + int align_corners = params->align_corners ? 1 : 0; auto* align_corners_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&align_corners_), - sizeof(align_corners_)); + kScalarShape, reinterpret_cast(&align_corners), + sizeof(align_corners)); AddInput(TensorID(align_corners_const->GetID(), 0)); + int half_pixel_centers = params->half_pixel_centers ? 1 : 0; + auto* half_pixel_centers_const = graph_builder_->AddConstNodeWithData( + kScalarShape, reinterpret_cast(&half_pixel_centers), + sizeof(half_pixel_centers)); + AddInput(TensorID(half_pixel_centers_const->GetID(), 0)); // Hexagon outputs for this node. int output_batch_size, output_height_size, output_width_size, diff --git a/tensorflow/lite/delegates/hexagon/builders/resize_nearest_neighbor_builder.h b/tensorflow/lite/delegates/hexagon/builders/resize_nearest_neighbor_builder.h index f9fc9281080..3630257cb79 100644 --- a/tensorflow/lite/delegates/hexagon/builders/resize_nearest_neighbor_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/resize_nearest_neighbor_builder.h @@ -40,7 +40,6 @@ class ResizeNearestNeighborOpBuilder : public OpBuilder { private: TensorID node_output_; float input_min_, input_max_; - bool align_corners_; }; } // namespace hexagon diff --git a/tensorflow/lite/delegates/hexagon/builders/tests/resize_test.cc b/tensorflow/lite/delegates/hexagon/builders/tests/resize_test.cc index a34c65d7d45..57a7d762d8c 100644 --- a/tensorflow/lite/delegates/hexagon/builders/tests/resize_test.cc +++ b/tensorflow/lite/delegates/hexagon/builders/tests/resize_test.cc @@ -22,17 +22,24 @@ class ResizeOpModel : public SingleOpModelWithHexagon { public: explicit ResizeOpModel(BuiltinOperator op_type, const TensorData& input, std::initializer_list size_data, - const TensorData& output) { + const TensorData& output, bool align_corners = false, + bool half_pixel_centers = false) { input_ = AddInput(input); size_ = AddConstInput(TensorType_INT32, size_data, {2}); output_ = AddOutput(output); if (op_type == BuiltinOperator_RESIZE_NEAREST_NEIGHBOR) { SetBuiltinOp(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR, BuiltinOptions_ResizeNearestNeighborOptions, - CreateResizeNearestNeighborOptions(builder_).Union()); + CreateResizeNearestNeighborOptions( + builder_, /*align_corners*/ align_corners, + /*half_pixel_centers*/ half_pixel_centers) + .Union()); } else { SetBuiltinOp(op_type, BuiltinOptions_ResizeBilinearOptions, - CreateResizeBilinearOptions(builder_).Union()); + CreateResizeBilinearOptions( + builder_, /**align_corners**/ align_corners, + /**half_pixel_centers**/ half_pixel_centers) + .Union()); } BuildInterpreter({GetShape(input_)}); } @@ -66,9 +73,6 @@ class ResizeOpModel : public SingleOpModelWithHexagon { int output_; }; -// TODO(b/154007913): Investigate why NearestNeighbor does not provide the same -// output always, requiring high allowed error. - TEST(ResizeOpModel, HorizontalResizeBiliear_UInt8) { ResizeOpModel m(BuiltinOperator_RESIZE_BILINEAR, {TensorType_UINT8, {1, 1, 2, 1}, -2.0, 10}, {1, 3}, @@ -87,7 +91,7 @@ TEST(ResizeOpModel, HorizontalResizeNearestNeighbor_Int8) { m.ApplyDelegateAndInvoke(); EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear({3.01176, 3.01176, 6.02353}, - /*max_abs_error=*/4))); + /*max_abs_error=*/1))); } TEST(ResizeOpModel, VerticalResizeBiliear_Int8) { @@ -108,7 +112,7 @@ TEST(ResizeOpModel, VerticalResizeNearestNeighbor_UInt8) { m.ApplyDelegateAndInvoke(); EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear({3.01961, 3.01961, 8.97255}, - /*max_abs_error=*/6))); + /*max_abs_error=*/1))); } TEST(ResizeOpModel, ThreeDimensionalResizeBiliear_UInt8) { @@ -146,7 +150,7 @@ TEST(ResizeOpModel, ThreeDimensionalResizeNearestNeighbor_Int8) { 3.01177, 4.01569, 3.01177, 4.01569, 6.02353, 10.0392, // 10.0392, 12.0471, 10.0392, 12.0471, 14.0549, 16.0627, // }, - /*max_abs_error=*/13))); + /*max_abs_error=*/1))); } TEST(ResizeOpModel, TwoDimensionalResizeBilinearWithTwoBatches_Int8) { @@ -193,7 +197,84 @@ TEST(ResizeOpModel, TwoDimensionalResizeNNWithTwoBatches_UInt8) { 4.01569, 4.01569, 10.0392, // 12.0471, 12.0471, 16.0627, // }, - /*max_abs_error=*/13))); + /*max_abs_error=*/1))); +} + +TEST(ResizeOpModel, TwoDimResizeBilinearWithTwoBatches_HalfPixelCenters_UInt8) { + ResizeOpModel m(BuiltinOperator_RESIZE_BILINEAR, + {TensorType_UINT8, {2, 2, 2, 1}, -2.0, 20}, {3, 3}, + {TensorType_UINT8, {}, -2.0, 20}, /**align_corners**/ false, + /**half_pixel_centers**/ true); + m.SetQuantizedInput({ + 3, 6, // + 9, 12, // + 4, 10, // + 12, 16 // + }); + m.ApplyDelegateAndInvoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({2, 4, 6, // + 6, 7, 9, // + 9, 10, 12, // + 4, 7, 10, // + 8, 10, 13, // + 12, 14, 16}, + /*max_abs_error=*/2))); +} + +TEST(ResizeOpModel, TwoDimResizeBilinearWithTwoBatches_AlignCorners_UInt8) { + ResizeOpModel m(BuiltinOperator_RESIZE_BILINEAR, + {TensorType_UINT8, {2, 2, 2, 1}, -2.0, 20}, {3, 3}, + {TensorType_UINT8, {}, -2.0, 20}, /**align_corners**/ true, + /**half_pixel_centers**/ false); + m.SetQuantizedInput({ + 3, 6, // + 9, 12, // + 4, 10, // + 12, 16 // + }); + m.ApplyDelegateAndInvoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({3, 5, 6, // + 7, 9, 10, // + 9, 11, 12, // + 4, 8, 10, // + 9, 12, 13, // + 12, 15, 16}, + /*max_abs_error=*/2))); +} + +TEST(ResizeOpModel, ThreeDimensionalResizeNN_AlignCorners_UInt8) { + ResizeOpModel m(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR, + {TensorType_UINT8, {1, 2, 2, 2}, -2.0, 20}, {3, 3}, + {TensorType_UINT8, {}, -2.0, 20}, /**align_corners**/ true); + m.SetQuantizedInput({ + 3, 4, 6, 10, // + 10, 12, 14, 16, // + }); + m.ApplyDelegateAndInvoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({3, 4, 6, 10, 6, 10, // + 10, 12, 14, 16, 14, 16, // + 10, 12, 14, 16, 14, 16}, + /*max_abs_error=*/1))); +} + +TEST(ResizeOpModel, ThreeDimensionalResizeNN_HalfPixelCenters_UInt8) { + ResizeOpModel m(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR, + {TensorType_UINT8, {1, 2, 2, 2}, -2.0, 20}, {3, 3}, + {TensorType_UINT8, {}, -2.0, 20}, /**align_corners**/ false, + /**half_pixel_centers**/ true); + m.SetQuantizedInput({ + 3, 4, 6, 10, // + 10, 12, 14, 16, // + }); + m.ApplyDelegateAndInvoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({3, 4, 6, 10, 6, 10, // + 10, 12, 14, 16, 14, 16, // + 10, 12, 14, 16, 14, 16}, + /*max_abs_error=*/1))); } } // namespace tflite diff --git a/tensorflow/lite/delegates/hexagon/utils.cc b/tensorflow/lite/delegates/hexagon/utils.cc index 14d651a9d7d..397400c81f0 100644 --- a/tensorflow/lite/delegates/hexagon/utils.cc +++ b/tensorflow/lite/delegates/hexagon/utils.cc @@ -91,8 +91,6 @@ bool CheckOpVersion(const TfLiteRegistration* registration) { case kTfLiteBuiltinPad: case kTfLiteBuiltinQuantize: case kTfLiteBuiltinRelu6: - case kTfLiteBuiltinResizeBilinear: - case kTfLiteBuiltinResizeNearestNeighbor: case kTfLiteBuiltinSlice: case kTfLiteBuiltinSoftmax: case kTfLiteBuiltinSpaceToDepth: @@ -107,6 +105,8 @@ bool CheckOpVersion(const TfLiteRegistration* registration) { return registration->version == 2; case kTfLiteBuiltinConv2d: case kTfLiteBuiltinDepthwiseConv2d: + case kTfLiteBuiltinResizeBilinear: + case kTfLiteBuiltinResizeNearestNeighbor: return registration->version <= 3; case kTfLiteBuiltinFullyConnected: return registration->version <= 4; From 5198b4467450139006300ba0bb2429580713a32b Mon Sep 17 00:00:00 2001 From: Yanhua Sun Date: Tue, 28 Jul 2020 13:20:19 -0700 Subject: [PATCH 1514/2522] Remove v1 only decorator PiperOrigin-RevId: 323639834 Change-Id: Ie65dfb649898e138f5b2aad046fd9fc6d3f231c0 --- tensorflow/python/ops/array_ops_test.py | 32 ++++++++++++------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tensorflow/python/ops/array_ops_test.py b/tensorflow/python/ops/array_ops_test.py index d8e2dcd0fb3..87c05b47455 100644 --- a/tensorflow/python/ops/array_ops_test.py +++ b/tensorflow/python/ops/array_ops_test.py @@ -18,11 +18,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.eager import backprop from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gradients from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.platform import test @@ -30,7 +28,6 @@ from tensorflow.python.platform import test class ArrayOpTest(test.TestCase): - @test_util.deprecated_graph_mode_only def testGatherGradHasPartialStaticShape(self): # Create a tensor with an unknown dim 1. x = random_ops.random_normal([4, 10, 10]) @@ -38,19 +35,22 @@ class ArrayOpTest(test.TestCase): x, array_ops.reshape(array_ops.where_v2(x[0, :, 0] > 0.5), [-1]), axis=1) - self.assertAllEqual(x.shape.as_list(), [4, None, 10]) + x.shape.assert_is_compatible_with([4, None, 10]) - a = array_ops.gather(array_ops.gather(x, [0, 1]), [0, 1]) - b = array_ops.gather(array_ops.gather(x, [2, 3], axis=2), [0, 1]) - grad_a = ops.convert_to_tensor(gradients.gradients(a, x)[0]) - grad_b = ops.convert_to_tensor(gradients.gradients(b, x)[0]) + with backprop.GradientTape() as tape: + tape.watch(x) + a = array_ops.gather(array_ops.gather(x, [0, 1]), [0, 1]) + grad_a = tape.gradient(a, x) + with backprop.GradientTape() as tape: + tape.watch(x) + b = array_ops.gather(array_ops.gather(x, [2, 3], axis=2), [0, 1]) + grad_b = tape.gradient(b, x) # We make sure that the representation of the shapes are correct; the shape # equality check will always eval to false due to the shapes being partial. - self.assertAllEqual(grad_a.shape.as_list(), [None, None, 10]) - self.assertAllEqual(grad_b.shape.as_list(), [4, None, 10]) + grad_a.shape.assert_is_compatible_with([None, None, 10]) + grad_b.shape.assert_is_compatible_with([4, None, 10]) - @test_util.deprecated_graph_mode_only def testReshapeShapeInference(self): # Create a tensor with an unknown dim 1. x = random_ops.random_normal([4, 10, 10]) @@ -58,11 +58,11 @@ class ArrayOpTest(test.TestCase): x, array_ops.reshape(array_ops.where_v2(x[0, :, 0] > 0.5), [-1]), axis=1) - self.assertAllEqual(x.shape.as_list(), [4, None, 10]) + x.shape.assert_is_compatible_with([4, None, 10]) a = array_ops.reshape(x, array_ops.shape(x)) - self.assertAllEqual(a.shape.as_list(), [4, None, 10]) + a.shape.assert_is_compatible_with([4, None, 10]) b = array_ops.reshape(x, math_ops.cast(array_ops.shape(x), dtypes.int64)) - self.assertAllEqual(b.shape.as_list(), [4, None, 10]) + b.shape.assert_is_compatible_with([4, None, 10]) # We do not shape-infer across a tf.cast into anything that's not tf.int32 # or tf.int64, since they might end up mangling the shape. @@ -70,7 +70,7 @@ class ArrayOpTest(test.TestCase): x, math_ops.cast( math_ops.cast(array_ops.shape(x), dtypes.float32), dtypes.int32)) - self.assertAllEqual(c.shape.as_list(), [None, None, None]) + c.shape.assert_is_compatible_with([None, None, None]) def testEmptyMeshgrid(self): self.assertEqual(array_ops.meshgrid(), []) From bcfb60d0a138d215980b0881e4619a2d9b20e489 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Tue, 28 Jul 2020 13:23:04 -0700 Subject: [PATCH 1515/2522] [TF2XLA] [NFC] Break apart the [TF2XLA/MLIR] -> xla_compiler dependency edge This is needed for invoking the MLIR tf2xla bridge from xla_compiler. This CL breaks apart items from xla_compiler into individual build targets, which are then depended on from the MLIR TF bridge. PiperOrigin-RevId: 323640340 Change-Id: I78b972503db9e7b5254014ca7e889005490d8339 --- tensorflow/compiler/aot/BUILD | 2 + tensorflow/compiler/jit/BUILD | 8 + tensorflow/compiler/jit/kernels/BUILD | 1 + tensorflow/compiler/mlir/BUILD | 1 + tensorflow/compiler/mlir/tensorflow/BUILD | 7 +- .../tensorflow/utils/compile_mlir_util.cc | 33 ++- .../mlir/tensorflow/utils/compile_mlir_util.h | 17 +- tensorflow/compiler/mlir/xla/BUILD | 10 +- .../compiler/mlir/xla/mlir_hlo_to_hlo.cc | 7 +- .../compiler/mlir/xla/mlir_hlo_to_hlo.h | 5 +- .../xla/transforms/legalize_tf_with_tf2xla.cc | 7 +- .../xla/transforms/mhlo_to_lhlo_with_xla.cc | 2 + tensorflow/compiler/tf2xla/BUILD | 189 +++++++++++++++++- tensorflow/compiler/tf2xla/kernels/BUILD | 14 ++ tensorflow/compiler/tf2xla/lib/BUILD | 1 + tensorflow/compiler/tf2xla/xla_argument.cc | 53 +++++ tensorflow/compiler/tf2xla/xla_argument.h | 121 +++++++++++ tensorflow/compiler/tf2xla/xla_compiler.cc | 122 ----------- tensorflow/compiler/tf2xla/xla_compiler.h | 180 +---------------- tensorflow/compiler/tf2xla/xla_context.cc | 1 - tensorflow/compiler/tf2xla/xla_context.h | 2 +- tensorflow/compiler/tf2xla/xla_expression.cc | 19 ++ tensorflow/compiler/tf2xla/xla_expression.h | 7 + tensorflow/compiler/tf2xla/xla_helpers.cc | 91 ++++++++- tensorflow/compiler/tf2xla/xla_helpers.h | 95 ++++++++- tensorflow/compiler/tf2xla/xla_op_kernel.cc | 45 ++--- tensorflow/compiler/tf2xla/xla_op_kernel.h | 10 +- tensorflow/compiler/tf2xla/xla_resource.cc | 1 - tensorflow/core/tpu/BUILD | 3 +- tensorflow/core/tpu/kernels/BUILD | 2 + 30 files changed, 668 insertions(+), 388 deletions(-) create mode 100644 tensorflow/compiler/tf2xla/xla_argument.cc create mode 100644 tensorflow/compiler/tf2xla/xla_argument.h diff --git a/tensorflow/compiler/aot/BUILD b/tensorflow/compiler/aot/BUILD index d091146c75a..ff255dd9cc1 100644 --- a/tensorflow/compiler/aot/BUILD +++ b/tensorflow/compiler/aot/BUILD @@ -308,6 +308,8 @@ cc_library( ], deps = [ "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_context", + "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/core:framework", ], alwayslink = 1, diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index b52a350dc48..ecbb1a5d200 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -95,6 +95,7 @@ cc_library( ":xla_kernel_creator", # buildcleaner: keep "//tensorflow/compiler/jit/kernels:xla_ops", "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/compiler/tf2xla/kernels:xla_ops", "//tensorflow/compiler/xla/service:cpu_plugin", # buildcleaner: keep "//tensorflow/core:core_cpu_internal", @@ -115,6 +116,7 @@ cc_library( ":xla_kernel_creator", # buildcleaner: keep "//tensorflow/compiler/jit/kernels:xla_ops", "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/compiler/tf2xla/kernels:xla_ops", "//tensorflow/compiler/xla/service:gpu_plugin", # buildcleaner: keep "//tensorflow/core:core_cpu_internal", @@ -172,6 +174,7 @@ XLA_DEVICE_DEPS = [ "//tensorflow/compiler/tf2xla:common", "//tensorflow/compiler/tf2xla:tf2xla_util", "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/compiler/tf2xla/kernels:xla_ops", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla/client:client_library", @@ -343,6 +346,7 @@ cc_library( "//tensorflow/compiler/mlir/tensorflow:compile_mlir_util_no_tf_dialect_passes", "//tensorflow/compiler/tf2xla:common", "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_context", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla/client:client_library", @@ -406,6 +410,7 @@ cc_library( ":compilation_passes", "//tensorflow/compiler/jit/kernels:xla_ops_no_jit_rewrite_registration", "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", "//tensorflow/core:lib", @@ -641,6 +646,7 @@ cc_library( "//tensorflow/compiler/tf2xla:side_effect_util", "//tensorflow/compiler/tf2xla:tf2xla_util", "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/compiler/tf2xla/cc:xla_jit_ops", "//tensorflow/compiler/tf2xla/cc:xla_ops", "//tensorflow/compiler/xla:status_macros", @@ -700,6 +706,7 @@ cc_library( hdrs = ["device_util.h"], deps = [ "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", "//tensorflow/core:framework", @@ -914,6 +921,7 @@ cc_library( "//tensorflow/compiler/jit/graphcycles", "//tensorflow/compiler/tf2xla:resource_operation_table", "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:util", "//tensorflow/core:core_cpu", diff --git a/tensorflow/compiler/jit/kernels/BUILD b/tensorflow/compiler/jit/kernels/BUILD index 347bae087df..eb9ad8a2e85 100644 --- a/tensorflow/compiler/jit/kernels/BUILD +++ b/tensorflow/compiler/jit/kernels/BUILD @@ -21,6 +21,7 @@ XLA_OPS_DEPS = [ "//tensorflow/compiler/tf2xla:common", "//tensorflow/compiler/tf2xla:tf2xla_util", "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/compiler/xla:executable_run_options", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", diff --git a/tensorflow/compiler/mlir/BUILD b/tensorflow/compiler/mlir/BUILD index 57f923caa91..01c187790b7 100644 --- a/tensorflow/compiler/mlir/BUILD +++ b/tensorflow/compiler/mlir/BUILD @@ -150,6 +150,7 @@ tf_cc_binary( "//tensorflow/compiler/mlir/tensorflow:translate_registration", "//tensorflow/compiler/mlir/tensorflow:translate_tf_dialect_op", "//tensorflow/compiler/mlir/xla:xla_mlir_translate", + "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core:tensorflow", diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index 2a800cfc8c4..fe1f47d8d69 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -1477,10 +1477,13 @@ COMPILE_MLIR_UTIL_DEPS = [ "//tensorflow/compiler/mlir/xla:xla_legalize_tf", "//tensorflow/compiler/mlir/xla:xla_legalize_tf_with_tf2xla", "//tensorflow/compiler/tf2xla:common", - "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_helpers", + "//tensorflow/compiler/tf2xla:xla_argument", + "//tensorflow/compiler/xla/client:xla_computation", + "//tensorflow/core/common_runtime:core_cpu_internal", + "//tensorflow/core/platform:logging", "//tensorflow/core:framework", "//tensorflow/core:protos_all_cc", - "//tensorflow/core/platform:logging", "//tensorflow/stream_executor/lib", "//tensorflow/compiler/xla:xla_data_proto_cc", "//tensorflow/compiler/xla/service:hlo", diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc index 5e548da55f1..16bc851d3a6 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc @@ -83,7 +83,7 @@ Status ParseMlirModule(llvm::StringRef mlir_module_string, Status GetXlaInputShapes( mlir::ModuleOp module, llvm::ArrayRef arg_shapes, bool use_tuple_args, - const XlaCompiler::ShapeRepresentationFn shape_representation_fn, + const XlaHelpers::ShapeRepresentationFn shape_representation_fn, std::vector* xla_input_shapes) { xla_input_shapes->clear(); @@ -135,9 +135,8 @@ Status GetXlaInputShapes( // output based on static shapes in MLIR module Status GetOutputInfo( mlir::ModuleOp module, - const XlaCompiler::ShapeRepresentationFn shape_representation_fn, - xla::Shape* xla_output_shape, - std::vector* outputs) { + const XlaHelpers::ShapeRepresentationFn shape_representation_fn, + xla::Shape* xla_output_shape, std::vector* outputs) { auto shape_representation_fn_no_fast_memory = [shape_representation_fn](const TensorShape& shape, DataType dtype) { return shape_representation_fn(shape, dtype, /*use_fast_memory=*/false); @@ -161,7 +160,7 @@ Status GetOutputInfo( // Construct OutputDescription for result. outputs->emplace_back(); - XlaCompiler::OutputDescription& out_desc = outputs->back(); + XlaOutputDescription& out_desc = outputs->back(); TF_RETURN_IF_ERROR(ConvertToDataType(tensor_type, &out_desc.type)); // TODO(ycao): Support constant output. out_desc.is_constant = false; @@ -185,7 +184,7 @@ Status GetOutputInfo( // TODO(ycao): Implement logic to compute resource updates when we need to // support graphs with resource updates in MLIR-based TF compiler bridge. void GetResourceUpdatesForMlir( - std::vector* resource_updates) { + std::vector* resource_updates) { resource_updates->clear(); } @@ -265,7 +264,7 @@ Status ConvertMLIRToXlaComputation( mlir::ModuleOp module_op, llvm::StringRef device_type, xla::XlaComputation* xla_computation, bool use_tuple_args, bool return_tuple, - const XlaCompiler::ShapeRepresentationFn shape_representation_fn, + const XlaHelpers::ShapeRepresentationFn shape_representation_fn, std::vector> custom_legalization_passes) { mlir::PassManager tf2xla(module_op.getContext()); tf2xla.addNestedPass(mlir::createCanonicalizerPass()); @@ -341,8 +340,8 @@ Status ConvertMLIRToXlaComputation( static Status CompileMlirToXlaHlo( mlir::ModuleOp module_op, llvm::ArrayRef arg_shapes, llvm::StringRef device_type, bool use_tuple_args, - XlaCompiler::ShapeRepresentationFn shape_representation_fn, - XlaCompiler::CompilationResult* compilation_result, + XlaHelpers::ShapeRepresentationFn shape_representation_fn, + XlaCompilationResult* compilation_result, std::vector> custom_legalization_passes) { if (VLOG_IS_ON(1)) tensorflow::DumpMlirOpToFile("mlir_compile_before", module_op); @@ -391,8 +390,8 @@ static Status CompileMlirToXlaHlo( Status CompileSerializedMlirToXlaHlo( llvm::StringRef mlir_module_string, llvm::ArrayRef arg_shapes, llvm::StringRef device_type, bool use_tuple_args, - const XlaCompiler::ShapeRepresentationFn shape_representation_fn, - XlaCompiler::CompilationResult* compilation_result, + const XlaHelpers::ShapeRepresentationFn shape_representation_fn, + XlaCompilationResult* compilation_result, std::vector> custom_legalization_passes) { RegisterDialects(); mlir::MLIRContext mlir_context; @@ -411,16 +410,16 @@ Status CompileSerializedMlirToXlaHlo( // removed from the signature. // Returns the original indices for the other arguments on success. static StatusOr> RewriteWithArgs( - mlir::ModuleOp module, llvm::ArrayRef args) { + mlir::ModuleOp module, llvm::ArrayRef args) { mlir::FuncOp main_fn = module.lookupSymbol("main"); std::vector params; auto builder = mlir::OpBuilder(main_fn.getBody()); std::vector args_to_erase; for (int idx = 0; idx < args.size(); idx++) { - const XlaCompiler::Argument& xla_arg = args[idx]; + const XlaArgument& xla_arg = args[idx]; mlir::BlockArgument mlir_arg = main_fn.getArgument(idx); - if (xla_arg.kind != XlaCompiler::Argument::kConstant) { + if (xla_arg.kind != XlaArgument::kConstant) { params.push_back(idx); continue; } @@ -439,11 +438,11 @@ static StatusOr> RewriteWithArgs( } Status CompileGraphToXlaHlo( - const Graph& graph, llvm::ArrayRef args, + const Graph& graph, llvm::ArrayRef args, llvm::StringRef device_type, bool use_tuple_args, const FunctionLibraryDefinition& flib_def, const GraphDebugInfo& debug_info, - const XlaCompiler::ShapeRepresentationFn shape_representation_fn, - XlaCompiler::CompilationResult* compilation_result, + const XlaHelpers::ShapeRepresentationFn shape_representation_fn, + XlaCompilationResult* compilation_result, std::vector> custom_legalization_passes) { RegisterDialects(); diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h index 24b60dcb346..719a96f52d4 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h +++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h @@ -20,7 +20,10 @@ limitations under the License. #include "llvm/ADT/StringRef.h" #include "mlir/IR/Module.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project -#include "tensorflow/compiler/tf2xla/xla_compiler.h" +#include "tensorflow/compiler/tf2xla/xla_argument.h" +#include "tensorflow/compiler/tf2xla/xla_helpers.h" +#include "tensorflow/compiler/xla/client/xla_computation.h" +#include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/protobuf/graph_debug_info.pb.h" #include "tensorflow/stream_executor/lib/statusor.h" @@ -57,7 +60,7 @@ Status ConvertMLIRToXlaComputation( mlir::ModuleOp module_op, llvm::StringRef device_type, xla::XlaComputation* xla_computation, bool use_tuple_args, bool return_tuple, - const XlaCompiler::ShapeRepresentationFn shape_representation_fn = nullptr, + const XlaHelpers::ShapeRepresentationFn shape_representation_fn = nullptr, std::vector> custom_legalization_passes = {}); // Compiles a serialized MLIR module into XLA HLO, generates all accompanying @@ -65,17 +68,17 @@ Status ConvertMLIRToXlaComputation( Status CompileSerializedMlirToXlaHlo( llvm::StringRef mlir_module_string, llvm::ArrayRef arg_shapes, llvm::StringRef device_type, bool use_tuple_args, - const XlaCompiler::ShapeRepresentationFn shape_representation_fn, - XlaCompiler::CompilationResult* compilation_result, + const XlaHelpers::ShapeRepresentationFn shape_representation_fn, + XlaCompilationResult* compilation_result, std::vector> custom_legalization_passes = {}); // Same as the above but takes input as TensorFlow Graph. Status CompileGraphToXlaHlo( - const Graph& graph, llvm::ArrayRef args, + const Graph& graph, llvm::ArrayRef args, llvm::StringRef device_type, bool use_tuple_args, const FunctionLibraryDefinition& flib_def, const GraphDebugInfo& debug_info, - const XlaCompiler::ShapeRepresentationFn shape_representation_fn, - XlaCompiler::CompilationResult* compilation_result, + const XlaHelpers::ShapeRepresentationFn shape_representation_fn, + XlaCompilationResult* compilation_result, std::vector> custom_legalization_passes = {}); } // namespace tensorflow diff --git a/tensorflow/compiler/mlir/xla/BUILD b/tensorflow/compiler/mlir/xla/BUILD index 838b060079c..55daec0395e 100644 --- a/tensorflow/compiler/mlir/xla/BUILD +++ b/tensorflow/compiler/mlir/xla/BUILD @@ -92,7 +92,11 @@ cc_library( "//tensorflow/compiler/mlir/tensorflow:export_tf_dialect_op", "//tensorflow/compiler/mlir/tensorflow:lower_tf_lib", "//tensorflow/compiler/mlir/tensorflow:translate_utils", - "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_compilation_device", + "//tensorflow/compiler/tf2xla:xla_context", + "//tensorflow/compiler/tf2xla:xla_expression", + "//tensorflow/compiler/tf2xla:xla_helpers", + "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/compiler/xla/client:xla_builder", "//tensorflow/core:core_cpu_lib", "//tensorflow/core:framework", @@ -125,8 +129,10 @@ cc_library( "//tensorflow/compiler/mlir/hlo", "//tensorflow/compiler/mlir/hlo:hlo_dialect_registration", "//tensorflow/compiler/mlir/hlo:lhlo", + "//tensorflow/compiler/xla:debug_options_flags", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla/service:backend", "//tensorflow/compiler/xla/service:buffer_assignment", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_casting_utils", @@ -228,7 +234,7 @@ cc_library( "//tensorflow/compiler/mlir/tensorflow:convert_type", "//tensorflow/compiler/mlir/tensorflow:error_util", "//tensorflow/compiler/tf2xla:common", - "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_helpers", "//tensorflow/compiler/xla:comparison_util", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", diff --git a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc index a4c3c43cfbf..e45cf1b56ee 100644 --- a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc +++ b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc @@ -43,7 +43,6 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/utils/convert_type.h" #include "tensorflow/compiler/mlir/xla/type_to_shape.h" #include "tensorflow/compiler/tf2xla/shape_util.h" -#include "tensorflow/compiler/tf2xla/xla_compiler.h" #include "tensorflow/compiler/xla/client/lib/matrix.h" #include "tensorflow/compiler/xla/client/lib/quantize.h" #include "tensorflow/compiler/xla/client/lib/slicing.h" @@ -463,7 +462,7 @@ class ConvertToHloModule { // single value. explicit ConvertToHloModule( mlir::ModuleOp module, bool use_tuple_args, bool return_tuple, - tensorflow::XlaCompiler::ShapeRepresentationFn shape_representation_fn) + tensorflow::XlaHelpers::ShapeRepresentationFn shape_representation_fn) : module_(module), module_builder_("main"), use_tuple_args_(use_tuple_args), @@ -545,7 +544,7 @@ class ConvertToHloModule { // Shape representation function to determine entry function argument and // result shapes. - tensorflow::XlaCompiler::ShapeRepresentationFn shape_representation_fn_; + tensorflow::XlaHelpers::ShapeRepresentationFn shape_representation_fn_; // Unique suffix to give to the name of the next lowered region. size_t region_id_ = 0; @@ -1500,7 +1499,7 @@ LogicalResult AddDynamicParameterBindings(mlir::ModuleOp module, Status ConvertMlirHloToHlo(mlir::ModuleOp module, xla::HloProto* hlo_proto, bool use_tuple_args, bool return_tuple, - const tensorflow::XlaCompiler::ShapeRepresentationFn + const tensorflow::XlaHelpers::ShapeRepresentationFn shape_representation_fn) { mlir::StatusScopedDiagnosticHandler diag_handler(module.getContext()); ConvertToHloModule converter(module, use_tuple_args, return_tuple, diff --git a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h index 8bfe4c76b04..d84aa92d3e2 100644 --- a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h +++ b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h @@ -18,9 +18,10 @@ limitations under the License. #include "mlir/IR/Module.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/utils/error_util.h" -#include "tensorflow/compiler/tf2xla/xla_compiler.h" +#include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/xla/client/xla_builder.h" #include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/core/framework/tensor_shape.h" namespace mlir { @@ -33,7 +34,7 @@ namespace mlir { // single value. Status ConvertMlirHloToHlo(mlir::ModuleOp module, ::xla::HloProto* hlo_proto, bool use_tuple_args, bool return_tuple, - const tensorflow::XlaCompiler::ShapeRepresentationFn + const tensorflow::XlaHelpers::ShapeRepresentationFn shape_representation_fn = nullptr); // Creates XlaOp equivalent of a given MLIR operation using the operand info diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc index 34e12d3300e..1743ae7be17 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc @@ -48,7 +48,8 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_compilation_device.h" #include "tensorflow/compiler/tf2xla/xla_context.h" #include "tensorflow/compiler/tf2xla/xla_expression.h" -#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_helpers.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/compiler/xla/client/xla_builder.h" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/device_factory.h" @@ -410,7 +411,7 @@ LogicalResult Tf2XlaRewriter::LegalizeOp() { device_->GetAllocator(tensorflow::AllocatorAttributes()), expr.dtype(), shape_or.ValueOrDie()); tensorflow::Tensor& tensor = tensors.back(); - tensorflow::XlaOpKernelContext::AssignExpressionToTensor(expr, &tensor); + tensorflow::XlaExpression::AssignExpressionToTensor(expr, &tensor); inputs.emplace_back(&tensor); } @@ -438,7 +439,7 @@ LogicalResult Tf2XlaRewriter::LegalizeOp() { for (int i = 0, e = op_->getNumResults(); i < e; i++) { tensorflow::Tensor* output = op_context.mutable_output(i); const tensorflow::XlaExpression* expr = - tensorflow::XlaOpKernelContext::CastExpressionFromTensor(*output); + tensorflow::XlaExpression::CastExpressionFromTensor(*output); if (expr->kind() != tensorflow::XlaExpression::Kind::kXlaOp) return op_->emitError( "expects XlaExpression of kind kXlaOp in compiled output"); diff --git a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc index 519068893e7..d45f1ba8ec6 100644 --- a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc +++ b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc @@ -37,6 +37,8 @@ limitations under the License. #include "tensorflow/compiler/mlir/xla/hlo_function_importer.h" #include "tensorflow/compiler/mlir/xla/hlo_utils.h" #include "tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h" +#include "tensorflow/compiler/xla/debug_options_flags.h" +#include "tensorflow/compiler/xla/service/backend.h" #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/hlo_casting_utils.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index 663e34c2b8e..1e57c11b2cf 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -50,6 +50,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":xla_compiler", + ":xla_op_registry", "//tensorflow/compiler/tf2xla/kernels:xla_ops", "//tensorflow/core:framework", "//tensorflow/core:framework_internal", @@ -145,6 +146,7 @@ cc_library( ":tf2xla_proto_cc", ":tf2xla_util", ":xla_compiler", + ":xla_op_registry", "//tensorflow/compiler/aot:aot_only_var_handle_op", "//tensorflow/compiler/tf2xla/kernels:xla_ops", "//tensorflow/compiler/xla/client", @@ -316,14 +318,8 @@ cc_library( srcs = [ "const_analysis.cc", "graph_compiler.cc", - "xla_compilation_device.cc", "xla_compiler.cc", - "xla_context.cc", - "xla_expression.cc", - "xla_helpers.cc", "xla_op_kernel.cc", - "xla_op_registry.cc", - "xla_resource.cc", "xla_cpu_backend.cc", ] + if_cuda_is_configured([ "xla_gpu_backend.cc", @@ -333,14 +329,10 @@ cc_library( hdrs = [ "const_analysis.h", "graph_compiler.h", - "xla_compilation_device.h", "xla_compiler.h", - "xla_context.h", - "xla_expression.h", "xla_helpers.h", "xla_op_kernel.h", "xla_op_registry.h", - "xla_resource.h", ], visibility = [":friends"], deps = [ @@ -351,10 +343,18 @@ cc_library( ":sharding_util", ":side_effect_util", ":tf2xla_util", + ":xla_argument", + ":xla_compilation_device", + ":xla_context", + ":xla_expression", + ":xla_helpers", + ":xla_op_registry", + ":xla_resource", "//tensorflow/compiler/jit:common", "//tensorflow/compiler/jit:flags", "//tensorflow/compiler/jit:shape_inference", "//tensorflow/compiler/jit:xla_cluster_util", + "//tensorflow/compiler/mlir/tensorflow:compile_mlir_util_no_tf_dialect_passes", "//tensorflow/compiler/tf2xla/lib:util", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla:shape_util", @@ -370,6 +370,7 @@ cc_library( "//tensorflow/compiler/xla/client:xla_computation", "//tensorflow/compiler/xla/client/lib:arithmetic", "//tensorflow/compiler/xla/client/lib:constants", + "//tensorflow/compiler/xla/service:hlo", "//tensorflow/core:core_cpu", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", @@ -388,6 +389,172 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "xla_compilation_device", + srcs = [ + "xla_compilation_device.cc", + ], + hdrs = [ + "xla_compilation_device.h", + ], + deps = [ + ":common", + ":frontend_attributes_util", + ":sharding_util", + ":xla_context", + ":xla_helpers", + "//tensorflow/compiler/xla/client:xla_builder", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:session_options", + "//tensorflow/core/common_runtime:core_cpu_internal", + ], + alwayslink = 1, +) + +cc_library( + name = "xla_context", + srcs = [ + "xla_context.cc", + ], + hdrs = [ + "xla_context.h", + ], + deps = [ + ":common", + ":xla_expression", + ":xla_helpers", + "//tensorflow/compiler/xla:literal", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:xla_data_proto_cc", + "//tensorflow/compiler/xla/client:client_library", + "//tensorflow/compiler/xla/client:xla_builder", + "//tensorflow/compiler/xla/client:xla_computation", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/common_runtime:core_cpu_internal", + "@com_google_absl//absl/types:span", + ], + alwayslink = 1, +) + +cc_library( + name = "xla_op_registry", + srcs = [ + "xla_op_registry.cc", + ], + hdrs = [ + "xla_op_registry.h", + ], + visibility = [":friends"], + deps = [ + ":common", + ":xla_context", + "//tensorflow/compiler/jit:flags", + "//tensorflow/compiler/jit:xla_cluster_util", + "//tensorflow/compiler/xla/client:client_library", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:session_options", + "//tensorflow/core:stream_executor_no_cuda", + "//tensorflow/core/common_runtime:core_cpu_internal", + ], + alwayslink = 1, +) + +cc_library( + name = "xla_expression", + srcs = [ + "xla_expression.cc", + ], + hdrs = [ + "xla_expression.h", + ], + deps = [ + ":common", + ":xla_resource", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla/client", + "//tensorflow/compiler/xla/client:xla_builder", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "@com_google_absl//absl/types:optional", + ], + alwayslink = 1, +) + +cc_library( + name = "xla_resource", + srcs = [ + "xla_resource.cc", + ], + hdrs = [ + "xla_resource.h", + ], + deps = [ + ":common", + ":sharding_util", + ":xla_helpers", + "//tensorflow/compiler/xla:xla_data_proto_cc", + "//tensorflow/compiler/xla/client:xla_builder", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + ], + alwayslink = 1, +) + +cc_library( + name = "xla_helpers", + srcs = [ + "xla_helpers.cc", + ], + hdrs = [ + "xla_helpers.h", + ], + visibility = [":friends"], + deps = [ + ":common", + ":host_compute_metadata_proto_cc", + "//tensorflow/compiler/tf2xla/lib:util", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla/client:xla_builder", + "//tensorflow/compiler/xla/client:xla_computation", + "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/compiler/xla/client/lib:constants", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "@com_google_absl//absl/types:span", + ], + alwayslink = 1, +) + +cc_library( + name = "xla_argument", + srcs = [ + "xla_argument.cc", + ], + hdrs = [ + "xla_argument.h", + ], + deps = [ + ":host_compute_metadata_proto_cc", + ":xla_resource", + "//tensorflow/compiler/xla/client:xla_builder", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/core:framework", + "@com_google_absl//absl/types:span", + ], + alwayslink = 1, +) + cc_library( name = "common", srcs = [ @@ -564,6 +731,8 @@ tf_cc_test( ":common", ":side_effect_util", ":xla_compiler", + ":xla_expression", + ":xla_resource", "//tensorflow/cc:cc_ops", "//tensorflow/cc:function_ops", "//tensorflow/cc:functional_ops", diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index ec0cb9c0b66..26051c98cb7 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -145,7 +145,12 @@ tf_kernel_library( "//tensorflow/compiler/jit:xla_activity_listener", "//tensorflow/compiler/jit:xla_activity_proto_cc", "//tensorflow/compiler/tf2xla:common", + "//tensorflow/compiler/tf2xla:xla_compilation_device", "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_context", + "//tensorflow/compiler/tf2xla:xla_helpers", + "//tensorflow/compiler/tf2xla:xla_op_registry", + "//tensorflow/compiler/tf2xla:xla_resource", "//tensorflow/compiler/tf2xla/lib:broadcast", "//tensorflow/compiler/tf2xla/lib:data_format", "//tensorflow/compiler/tf2xla/lib:random", @@ -223,6 +228,8 @@ cc_library( deps = [ "//tensorflow/compiler/tf2xla:common", "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_helpers", + "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:util", @@ -276,6 +283,8 @@ tf_kernel_library( "//tensorflow/compiler/tf2xla:common", "//tensorflow/compiler/tf2xla:side_effect_util", "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_helpers", + "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/compiler/tf2xla/ops:xla_ops", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla:status_macros", @@ -296,6 +305,8 @@ tf_kernel_library( "//tensorflow/compiler/tf2xla:common", "//tensorflow/compiler/tf2xla:side_effect_util", "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_context", + "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/compiler/tf2xla/ops:xla_ops", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla/client:xla_builder", @@ -314,6 +325,8 @@ tf_kernel_library( "//tensorflow/compiler/tf2xla:common", "//tensorflow/compiler/tf2xla:side_effect_util", "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_context", + "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/compiler/tf2xla/ops:xla_ops", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla/client:xla_builder", @@ -333,6 +346,7 @@ tf_kernel_library( ], deps = [ "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", "//tensorflow/core:lib", diff --git a/tensorflow/compiler/tf2xla/lib/BUILD b/tensorflow/compiler/tf2xla/lib/BUILD index f0bd97c85eb..531679d3905 100644 --- a/tensorflow/compiler/tf2xla/lib/BUILD +++ b/tensorflow/compiler/tf2xla/lib/BUILD @@ -38,6 +38,7 @@ cc_library( hdrs = ["random.h"], deps = [ "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_helpers", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla/client:xla_builder", diff --git a/tensorflow/compiler/tf2xla/xla_argument.cc b/tensorflow/compiler/tf2xla/xla_argument.cc new file mode 100644 index 00000000000..fe31025386e --- /dev/null +++ b/tensorflow/compiler/tf2xla/xla_argument.cc @@ -0,0 +1,53 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/xla_argument.h" + +namespace tensorflow { + +bool XlaArgument::operator==(const XlaArgument& other) const { + if (std::tie(kind, resource_kind, type, name, initialized, max_array_size, + tensor_array_gradients) != + std::tie(other.kind, other.resource_kind, other.type, other.name, + other.initialized, other.max_array_size, + other.tensor_array_gradients)) { + return false; + } + if (absl::holds_alternative(shape)) { + if (!absl::holds_alternative(other.shape)) { + return false; + } + if (!xla::Shape::Equal()(absl::get(shape), + absl::get(other.shape))) { + return false; + } + } else { + if (!absl::holds_alternative(other.shape)) { + return false; + } + if (absl::get(shape) != absl::get(other.shape)) { + return false; + } + } + if (constant_value.shape() != other.constant_value.shape()) { + return false; + } + if (is_same_data_across_replicas != other.is_same_data_across_replicas) { + return false; + } + return constant_value.tensor_data() == other.constant_value.tensor_data(); +} + +} // end namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/xla_argument.h b/tensorflow/compiler/tf2xla/xla_argument.h new file mode 100644 index 00000000000..e2cd634e1d5 --- /dev/null +++ b/tensorflow/compiler/tf2xla/xla_argument.h @@ -0,0 +1,121 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_TF2XLA_XLA_ARGUMENT_H_ +#define TENSORFLOW_COMPILER_TF2XLA_XLA_ARGUMENT_H_ + +#include "absl/types/span.h" +#include "tensorflow/compiler/tf2xla/host_compute_metadata.pb.h" +#include "tensorflow/compiler/tf2xla/xla_resource.h" +#include "tensorflow/compiler/xla/client/xla_builder.h" +#include "tensorflow/compiler/xla/service/hlo_sharding.h" +#include "tensorflow/core/framework/tensor.h" + +namespace tensorflow { + +// Describes how to derive the value of each _Arg node in the graph/function +// being compiled. There must be one Argument for each _Arg index. +struct XlaArgument { + enum Kind { + // Default value; not a valid kind. + kInvalid, + + // Argument is a compile-time constant. No associated runtime parameter. + kConstant, + + // Argument is a Variable, TensorArray, or Stack resource. Has an + // associated runtime parameter iff `initialized` is true. + kResource, + + // Argument is a run-time parameter. + kParameter, + + // Argument is an XLA token. + kToken, + + // Argument is a TensorList. + kTensorList, + }; + + Kind kind = kInvalid; + + // The type of the argument. If the argument is a resource, this + // is the type of the variable's value, not DT_RESOURCE. + DataType type = DT_INVALID; + + // The shape of the argument. For: + // * a parameter: the shape of the parameter. We allow setting the xla shape + // if known. This helps avoid conversions to and from TensorShape. + // * a constant: ignored; the shape given by constant_value is used + // instead. + // * an uninitialized resource: ignored. We don't yet know the shape of an + // uninitialized resource (otherwise we would have initialized it!) + // * an initialized variable: the shape of the variable's value. + // * an initialized TensorArray or Stack resource: the shape of an entry in + // the TensorArray/Stack. Note this is the size of a single entry, not the + // XLA data structure that represents the complete stack/array. + absl::variant shape; + + // The value of the argument, if it is a compile-time constant. Must be a + // host-memory tensor. + Tensor constant_value; + + // The name of this argument, used for debugging. + string name; + + // The name of TensorFlow _Arg node, used for debugging. + string node_name; + + // For a kResource, what kind of resource is it? + XlaResource::Kind resource_kind = XlaResource::kInvalid; + + // For a kResource, has this resource been initialized? + bool initialized = false; + + // For a kResource, is this resource on Fast Memory. + bool fast_mem = false; + + // For a TensorArray or Stack resource, what is the array's declared size? + // (Used for lazy initialization.) + int64 max_array_size = -1; + + // TensorArray resource parameters are passed as (array, gradient array 0, + // ..., gradient array k), where the gradient arrays are in the same order + // as `tensor_array_gradients`. + std::set tensor_array_gradients; + + // dynamic dims to arg number map. Empty if no dynamic shapes. + std::map dynamic_dim_to_arg_num_map; + bool is_pad_arg = false; + + // Whether this argument will receive the same data across all replicas. + bool is_same_data_across_replicas = false; + + bool operator==(const XlaArgument& other) const; + + // Returns a human-readable summary of the argument. + string HumanString() const; + + // Returns the dimension sizes for either TensorShape or xla::Shape. + std::vector DimensionSizes() const; + absl::InlinedVector DimensionSizesAsInlinedVector() const; + + // Returns the human-readable string for either TensorShape or xla::Shape. + string ShapeHumanString() const; +}; + +} // end namespace tensorflow + +#endif // TENSORFLOW_COMPILER_TF2XLA_XLA_ARGUMENT_H_ diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index 0722c30787f..db54f2f6563 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -422,39 +422,6 @@ Status BuildComputation( } // namespace -bool XlaCompiler::Argument::operator==( - const XlaCompiler::Argument& other) const { - if (std::tie(kind, resource_kind, type, name, initialized, max_array_size, - tensor_array_gradients) != - std::tie(other.kind, other.resource_kind, other.type, other.name, - other.initialized, other.max_array_size, - other.tensor_array_gradients)) { - return false; - } - if (absl::holds_alternative(shape)) { - if (!absl::holds_alternative(other.shape)) { - return false; - } - if (!xla::Shape::Equal()(absl::get(shape), - absl::get(other.shape))) { - return false; - } - } else { - if (!absl::holds_alternative(other.shape)) { - return false; - } - if (absl::get(shape) != absl::get(other.shape)) { - return false; - } - } - if (constant_value.shape() != other.constant_value.shape()) { - return false; - } - if (is_same_data_across_replicas != other.is_same_data_across_replicas) { - return false; - } - return constant_value.tensor_data() == other.constant_value.tensor_data(); -} string XlaCompiler::Argument::HumanString() const { string common; @@ -1494,93 +1461,4 @@ xla::StatusOr XlaCompiler::GetNodeToken(const string& node_name) { return iter->second; } -XlaCompiler::ShapeRepresentationFn IdentityShapeRepresentationFn() { - return [](const TensorShape& shape, DataType dtype, - bool use_fast_memory) -> xla::StatusOr { - xla::Shape xla_shape; - TF_RETURN_IF_ERROR(TensorShapeToXLAShape(dtype, shape, &xla_shape)); - return xla_shape; - }; -} - -// Rewrites the layout of xla_shape if there is tiled sharding. -Status RewriteLayoutWithShardedShape( - const absl::optional& sharding, bool use_fast_memory, - XlaCompiler::ShapeRepresentationFn shape_representation_fn, - xla::Shape* xla_shape) { - if (sharding && !sharding->IsTileMaximal()) { - // After sharding, per core shape might have different layout. For example, - // before sharding, a shape [128, 128] will be assigned default - // minor-to-major {1, 0}. But after we shard this shape to [128, 64] * 2, - // the sharded shapes will have minor-to-major {0, 1}. - // - // As a result, for sharded shapes, we set their layout to per core shape's - // layout. - // - // TODO(endlessroad): for variable input & update, we might have - // different layouts which will prevent input output aliasing and - // increase memory usage. Investigate such cases. - int64 device = *sharding->tile_assignment().begin(); - std::vector offset = - sharding->TileOffsetForDevice(*xla_shape, device); - std::vector limit = sharding->TileLimitForDevice(*xla_shape, device); - std::vector dimensions(xla_shape->rank()); - for (int64 i = 0; i < xla_shape->rank(); ++i) { - dimensions[i] = limit[i] - offset[i]; - } - xla::Shape per_device_xla_shape = - xla::ShapeUtil::MakeShape(xla_shape->element_type(), dimensions); - TensorShape per_device_tensor_shape; - TF_RETURN_IF_ERROR( - XLAShapeToTensorShape(per_device_xla_shape, &per_device_tensor_shape)); - TF_ASSIGN_OR_RETURN(DataType dtype, EncodePrimitiveTypeAsDataType( - xla_shape->element_type())); - TF_ASSIGN_OR_RETURN(per_device_xla_shape, - shape_representation_fn(per_device_tensor_shape, dtype, - use_fast_memory)); - *xla_shape->mutable_layout() = per_device_xla_shape.layout(); - } - return Status::OK(); -} - -// There is a shape_representation_fn or sharding for an output, this function -// uses a reshape to fix the layout. -xla::StatusOr ReshapeWithCorrectRepresentationAndSharding( - xla::XlaBuilder* builder, xla::XlaOp original, xla::Shape original_shape, - XlaCompiler::ShapeRepresentationFn shape_representation_fn, - absl::optional sharding, bool fast_mem) { - if (original_shape.IsTuple()) { - std::vector elements; - for (int64 i = 0; i < original_shape.tuple_shapes_size(); ++i) { - auto subsharding = sharding ? sharding->tuple_shardings(i) : sharding; - TF_ASSIGN_OR_RETURN(auto element, - ReshapeWithCorrectRepresentationAndSharding( - builder, xla::GetTupleElement(original, i), - original_shape.tuple_shapes(i), - shape_representation_fn, subsharding, fast_mem)); - elements.push_back(element); - } - return xla::Tuple(builder, elements); - } - if (!original_shape.IsArray()) return original; - TensorShape shape; - TF_RETURN_IF_ERROR(XLAShapeToTensorShape(original_shape, &shape)); - TF_ASSIGN_OR_RETURN(DataType dtype, EncodePrimitiveTypeAsDataType( - original_shape.element_type())); - TF_ASSIGN_OR_RETURN(auto to_shape, - shape_representation_fn(shape, dtype, fast_mem)); - if (sharding) { - TF_ASSIGN_OR_RETURN(auto hlo_sharding, - xla::HloSharding::FromProto(*sharding)); - TF_RETURN_IF_ERROR(RewriteLayoutWithShardedShape( - hlo_sharding, fast_mem, shape_representation_fn, &to_shape)); - } - if (xla::ShapeUtil::Compatible(original_shape, to_shape)) { - for (int64 i = 0; i < original_shape.rank(); ++i) { - to_shape.set_dynamic_dimension(i, original_shape.is_dynamic_dimension(i)); - } - } - return xla::Reshape(to_shape, original); -} - } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/xla_compiler.h b/tensorflow/compiler/tf2xla/xla_compiler.h index b95d250636a..b0d93cde846 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.h +++ b/tensorflow/compiler/tf2xla/xla_compiler.h @@ -21,8 +21,10 @@ limitations under the License. #include "absl/types/span.h" #include "absl/types/variant.h" #include "tensorflow/compiler/tf2xla/host_compute_metadata.pb.h" +#include "tensorflow/compiler/tf2xla/xla_argument.h" #include "tensorflow/compiler/tf2xla/xla_compilation_device.h" #include "tensorflow/compiler/tf2xla/xla_expression.h" +#include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/client/xla_builder.h" @@ -97,96 +99,7 @@ class XlaContext; // `tensor_array_gradients` ordered set. class XlaCompiler { public: - // Describes how to derive the value of each _Arg node in the graph/function - // being compiled. There must be one Argument for each _Arg index. - struct Argument { - enum Kind { - // Default value; not a valid kind. - kInvalid, - - // Argument is a compile-time constant. No associated runtime parameter. - kConstant, - - // Argument is a Variable, TensorArray, or Stack resource. Has an - // associated runtime parameter iff `initialized` is true. - kResource, - - // Argument is a run-time parameter. - kParameter, - - // Argument is an XLA token. - kToken, - - // Argument is a TensorList. - kTensorList, - }; - - Kind kind = kInvalid; - - // The type of the argument. If the argument is a resource, this - // is the type of the variable's value, not DT_RESOURCE. - DataType type = DT_INVALID; - - // The shape of the argument. For: - // * a parameter: the shape of the parameter. We allow setting the xla shape - // if known. This helps avoid conversions to and from TensorShape. - // * a constant: ignored; the shape given by constant_value is used - // instead. - // * an uninitialized resource: ignored. We don't yet know the shape of an - // uninitialized resource (otherwise we would have initialized it!) - // * an initialized variable: the shape of the variable's value. - // * an initialized TensorArray or Stack resource: the shape of an entry in - // the TensorArray/Stack. Note this is the size of a single entry, not the - // XLA data structure that represents the complete stack/array. - absl::variant shape; - - // The value of the argument, if it is a compile-time constant. Must be a - // host-memory tensor. - Tensor constant_value; - - // The name of this argument, used for debugging. - string name; - - // The name of TensorFlow _Arg node, used for debugging. - string node_name; - - // For a kResource, what kind of resource is it? - XlaResource::Kind resource_kind = XlaResource::kInvalid; - - // For a kResource, has this resource been initialized? - bool initialized = false; - - // For a kResource, is this resource on Fast Memory. - bool fast_mem = false; - - // For a TensorArray or Stack resource, what is the array's declared size? - // (Used for lazy initialization.) - int64 max_array_size = -1; - - // TensorArray resource parameters are passed as (array, gradient array 0, - // ..., gradient array k), where the gradient arrays are in the same order - // as `tensor_array_gradients`. - std::set tensor_array_gradients; - - // dynamic dims to arg number map. Empty if no dynamic shapes. - std::map dynamic_dim_to_arg_num_map; - bool is_pad_arg = false; - - // Whether this argument will receive the same data across all replicas. - bool is_same_data_across_replicas = false; - - bool operator==(const Argument& other) const; - - // Returns a human-readable summary of the argument. - string HumanString() const; - - // Returns the dimension sizes for either TensorShape or xla::Shape. - std::vector DimensionSizes() const; - absl::InlinedVector DimensionSizesAsInlinedVector() const; - - // Returns the human-readable string for either TensorShape or xla::Shape. - string ShapeHumanString() const; - }; + using Argument = ::tensorflow::XlaArgument; // Options pertaining to an individual call to CompileGraph() or // CompileFunction(). @@ -221,77 +134,11 @@ class XlaCompiler { bool alias_resource_update = false; }; - struct OutputDescription { - // Type and shape of the output. The shape is the unflattened shape. - // When `type` is DT_RESOURCE, `shape` is the shape of the resource - // variable's value. - DataType type; - TensorShape shape; + using OutputDescription = ::tensorflow::XlaOutputDescription; - // Constant output value, if known to be constant at JIT compilation time. - // 'Tensor' is in host memory. - bool is_constant = false; - Tensor constant_value; + using ResourceUpdate = ::tensorflow::XlaResourceUpdate; - // When this output is a resource, i.e. `type == DT_RESOURCE`, this is - // the index of the input that contains the resource. - int input_index; - - // Whether this output is a TensorList. - bool is_tensor_list = false; - }; - - // Describes a variable write side effect of the computation. - struct ResourceUpdate { - // Index of the input that contains the variable resource to write to. - int input_index; - - // Type and shape of the tensor to be written back. - // The `shape` field has the same meaning as the Argument::shape field. - DataType type; - TensorShape shape; - - // Was the value of the variable modified by the computation? - // (Always true, unless `return_updated_values_for_all_resources` is true.) - bool modified; - - // If the resource is a TensorArray, the set of gradients read or written. - std::set tensor_array_gradients_accessed; - }; - - struct CompilationResult { - // Vector that maps from the parameters of the XLA computation to their - // original argument positions. To handle compile-time constant inputs, the - // parameters to the XLA computation may be a subset of the original - // arguments. The relative ordering of parameters are maintained. - std::vector input_mapping; - - // Input shapes of the computation. If we are flattening inputs, these are - // the flattened shapes. - std::vector xla_input_shapes; - - // Output shape in XLA format. The output shape is always a tuple. If we - // are flattening outputs, these are the flattened shapes. - xla::Shape xla_output_shape; - - // TensorFlow shapes of outputs, together with the values of any - // constant arguments. Vector indexed by Tensorflow _Retval number, - // containing both constant and non-constant results. - std::vector outputs; - - // TensorFlow shapes and types of sends/recvs from HostCompute Ops to their - // matching RecvAtHost/SendFromHost Ops in the outer graph. - tf2xla::HostComputeMetadata host_compute_metadata; - - // Resources whose values were updated by the computation, ordered - // by return value position (which is the same as the order the resources - // were passed as arguments). Resource updates follow the non-constant - // results in the outputs of XLA computation. - std::vector resource_updates; - - // The XLA computation built from the tensorflow subgraph. - std::shared_ptr computation; - }; + using CompilationResult = ::tensorflow::XlaCompilationResult; typedef std::function(const TensorShape&, DataType, bool)> @@ -518,21 +365,6 @@ class XlaCompiler { TF_DISALLOW_COPY_AND_ASSIGN(XlaCompiler); }; -// Creates an identity shape representation function. -XlaCompiler::ShapeRepresentationFn IdentityShapeRepresentationFn(); - -// Rewrites the layout of xla_shape if there is tiled sharding. -Status RewriteLayoutWithShardedShape( - const absl::optional& sharding, bool use_fast_memory, - XlaCompiler::ShapeRepresentationFn shape_representation_fn, - xla::Shape* xla_shape); - -// Adds reshapes to fix the layout of an output, if a shape_representation_fn or -// sharding is present. -xla::StatusOr ReshapeWithCorrectRepresentationAndSharding( - xla::XlaBuilder* builder, xla::XlaOp original, xla::Shape original_shape, - XlaCompiler::ShapeRepresentationFn shape_representation_fn, - absl::optional sharding, bool fast_mem); } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/xla_context.cc b/tensorflow/compiler/tf2xla/xla_context.cc index c94c4805d53..cb5bf34208f 100644 --- a/tensorflow/compiler/tf2xla/xla_context.cc +++ b/tensorflow/compiler/tf2xla/xla_context.cc @@ -24,7 +24,6 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/tf2xla/type_util.h" #include "tensorflow/compiler/tf2xla/xla_helpers.h" -#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/xla/client/client_library.h" #include "tensorflow/compiler/xla/client/xla_builder.h" #include "tensorflow/compiler/xla/client/xla_computation.h" diff --git a/tensorflow/compiler/tf2xla/xla_context.h b/tensorflow/compiler/tf2xla/xla_context.h index eb4ad3fe6a1..e44ac05b702 100644 --- a/tensorflow/compiler/tf2xla/xla_context.h +++ b/tensorflow/compiler/tf2xla/xla_context.h @@ -20,7 +20,6 @@ limitations under the License. #include -#include "tensorflow/compiler/tf2xla/xla_compiler.h" #include "tensorflow/compiler/tf2xla/xla_expression.h" #include "tensorflow/compiler/xla/client/xla_builder.h" #include "tensorflow/compiler/xla/client/xla_computation.h" @@ -33,6 +32,7 @@ limitations under the License. namespace tensorflow { class XlaOpKernelContext; +class XlaCompiler; // The XlaContext is the data structure that holds the state of an XLA // compilation, that is accessible from OpKernelContexts when compiling a diff --git a/tensorflow/compiler/tf2xla/xla_expression.cc b/tensorflow/compiler/tf2xla/xla_expression.cc index 49f108ed6c8..34e108bb6bf 100644 --- a/tensorflow/compiler/tf2xla/xla_expression.cc +++ b/tensorflow/compiler/tf2xla/xla_expression.cc @@ -163,4 +163,23 @@ xla::StatusOr XlaExpression::GetShape() const { } } +const XlaExpression* XlaExpression::CastExpressionFromTensor( + const Tensor& tensor) { + const XlaExpression* expression = + reinterpret_cast(tensor.tensor_data().data()); + CHECK(expression->kind() != XlaExpression::Kind::kInvalid) + << expression->HumanString(); + return expression; +} + +// Assigns an XlaExpression to a tensor on an XLA compilation device. +void XlaExpression::AssignExpressionToTensor(const XlaExpression& value, + Tensor* tensor) { + const XlaExpression* expression = + reinterpret_cast(tensor->tensor_data().data()); + CHECK(expression->kind() == XlaExpression::Kind::kInvalid) + << expression->HumanString(); + *const_cast(expression) = value; +} + } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/xla_expression.h b/tensorflow/compiler/tf2xla/xla_expression.h index 5d0bb35b182..3010964c5b7 100644 --- a/tensorflow/compiler/tf2xla/xla_expression.h +++ b/tensorflow/compiler/tf2xla/xla_expression.h @@ -104,6 +104,13 @@ class XlaExpression { // not the shape of the resource's value. xla::StatusOr GetShape() const; + // Retrieves an XlaExpression that was allocated by a previous Op. + static const XlaExpression* CastExpressionFromTensor(const Tensor& tensor); + + // Assigns an XlaExpression to a tensor on an XLA compilation device. + static void AssignExpressionToTensor(const XlaExpression& value, + Tensor* tensor); + private: Kind kind_ = Kind::kInvalid; diff --git a/tensorflow/compiler/tf2xla/xla_helpers.cc b/tensorflow/compiler/tf2xla/xla_helpers.cc index 74247bbaec7..8c4b55aec8a 100644 --- a/tensorflow/compiler/tf2xla/xla_helpers.cc +++ b/tensorflow/compiler/tf2xla/xla_helpers.cc @@ -22,8 +22,6 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/literal_util.h" #include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/tf2xla/type_util.h" -#include "tensorflow/compiler/tf2xla/xla_context.h" -#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/xla/client/lib/arithmetic.h" #include "tensorflow/compiler/xla/client/lib/constants.h" #include "tensorflow/compiler/xla/client/xla_builder.h" @@ -128,4 +126,93 @@ xla::XlaOp XlaHelpers::ConvertElementType(const xla::XlaOp& operand, return xla::ConvertElementType(operand, convert_to); } +XlaHelpers::ShapeRepresentationFn IdentityShapeRepresentationFn() { + return [](const TensorShape& shape, DataType dtype, + bool use_fast_memory) -> xla::StatusOr { + xla::Shape xla_shape; + TF_RETURN_IF_ERROR(TensorShapeToXLAShape(dtype, shape, &xla_shape)); + return xla_shape; + }; +} + +// Rewrites the layout of xla_shape if there is tiled sharding. +Status RewriteLayoutWithShardedShape( + const absl::optional& sharding, bool use_fast_memory, + XlaHelpers::ShapeRepresentationFn shape_representation_fn, + xla::Shape* xla_shape) { + if (sharding && !sharding->IsTileMaximal()) { + // After sharding, per core shape might have different layout. For example, + // before sharding, a shape [128, 128] will be assigned default + // minor-to-major {1, 0}. But after we shard this shape to [128, 64] * 2, + // the sharded shapes will have minor-to-major {0, 1}. + // + // As a result, for sharded shapes, we set their layout to per core shape's + // layout. + // + // TODO(endlessroad): for variable input & update, we might have + // different layouts which will prevent input output aliasing and + // increase memory usage. Investigate such cases. + int64 device = *sharding->tile_assignment().begin(); + std::vector offset = + sharding->TileOffsetForDevice(*xla_shape, device); + std::vector limit = sharding->TileLimitForDevice(*xla_shape, device); + std::vector dimensions(xla_shape->rank()); + for (int64 i = 0; i < xla_shape->rank(); ++i) { + dimensions[i] = limit[i] - offset[i]; + } + xla::Shape per_device_xla_shape = + xla::ShapeUtil::MakeShape(xla_shape->element_type(), dimensions); + TensorShape per_device_tensor_shape; + TF_RETURN_IF_ERROR( + XLAShapeToTensorShape(per_device_xla_shape, &per_device_tensor_shape)); + TF_ASSIGN_OR_RETURN(DataType dtype, EncodePrimitiveTypeAsDataType( + xla_shape->element_type())); + TF_ASSIGN_OR_RETURN(per_device_xla_shape, + shape_representation_fn(per_device_tensor_shape, dtype, + use_fast_memory)); + *xla_shape->mutable_layout() = per_device_xla_shape.layout(); + } + return Status::OK(); +} + +// There is a shape_representation_fn or sharding for an output, this function +// uses a reshape to fix the layout. +xla::StatusOr ReshapeWithCorrectRepresentationAndSharding( + xla::XlaBuilder* builder, xla::XlaOp original, xla::Shape original_shape, + XlaHelpers::ShapeRepresentationFn shape_representation_fn, + absl::optional sharding, bool fast_mem) { + if (original_shape.IsTuple()) { + std::vector elements; + for (int64 i = 0; i < original_shape.tuple_shapes_size(); ++i) { + auto subsharding = sharding ? sharding->tuple_shardings(i) : sharding; + TF_ASSIGN_OR_RETURN(auto element, + ReshapeWithCorrectRepresentationAndSharding( + builder, xla::GetTupleElement(original, i), + original_shape.tuple_shapes(i), + shape_representation_fn, subsharding, fast_mem)); + elements.push_back(element); + } + return xla::Tuple(builder, elements); + } + if (!original_shape.IsArray()) return original; + TensorShape shape; + TF_RETURN_IF_ERROR(XLAShapeToTensorShape(original_shape, &shape)); + TF_ASSIGN_OR_RETURN(DataType dtype, EncodePrimitiveTypeAsDataType( + original_shape.element_type())); + TF_ASSIGN_OR_RETURN(auto to_shape, + shape_representation_fn(shape, dtype, fast_mem)); + if (sharding) { + TF_ASSIGN_OR_RETURN(auto hlo_sharding, + xla::HloSharding::FromProto(*sharding)); + TF_RETURN_IF_ERROR(RewriteLayoutWithShardedShape( + hlo_sharding, fast_mem, shape_representation_fn, &to_shape)); + } + if (xla::ShapeUtil::Compatible(original_shape, to_shape)) { + for (int64 i = 0; i < original_shape.rank(); ++i) { + to_shape.set_dynamic_dimension(i, original_shape.is_dynamic_dimension(i)); + } + } + return xla::Reshape(to_shape, original); +} + } // end namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/xla_helpers.h b/tensorflow/compiler/tf2xla/xla_helpers.h index 490923526bd..3a9375ec1f4 100644 --- a/tensorflow/compiler/tf2xla/xla_helpers.h +++ b/tensorflow/compiler/tf2xla/xla_helpers.h @@ -19,8 +19,9 @@ limitations under the License. #define TENSORFLOW_COMPILER_TF2XLA_XLA_HELPERS_H_ #include "absl/types/span.h" -#include "tensorflow/compiler/tf2xla/xla_context.h" +#include "tensorflow/compiler/tf2xla/host_compute_metadata.pb.h" #include "tensorflow/compiler/xla/client/xla_builder.h" +#include "tensorflow/compiler/xla/service/hlo_sharding.h" #include "tensorflow/core/framework/tensor.h" namespace tensorflow { @@ -72,6 +73,98 @@ class XlaHelpers { // than the xla::PrimitiveType. static xla::XlaOp ConvertElementType(const xla::XlaOp& operand, const DataType new_element_type); + + typedef std::function(const TensorShape&, DataType, + bool)> + ShapeRepresentationFn; +}; + +// Creates an identity shape representation function. +XlaHelpers::ShapeRepresentationFn IdentityShapeRepresentationFn(); + +// Rewrites the layout of xla_shape if there is tiled sharding. +Status RewriteLayoutWithShardedShape( + const absl::optional& sharding, bool use_fast_memory, + XlaHelpers::ShapeRepresentationFn shape_representation_fn, + xla::Shape* xla_shape); + +// Adds reshapes to fix the layout of an output, if a shape_representation_fn or +// sharding is present. +xla::StatusOr ReshapeWithCorrectRepresentationAndSharding( + xla::XlaBuilder* builder, xla::XlaOp original, xla::Shape original_shape, + XlaHelpers::ShapeRepresentationFn shape_representation_fn, + absl::optional sharding, bool fast_mem); + +struct XlaOutputDescription { + // Type and shape of the output. The shape is the unflattened shape. + // When `type` is DT_RESOURCE, `shape` is the shape of the resource + // variable's value. + DataType type; + TensorShape shape; + + // Constant output value, if known to be constant at JIT compilation time. + // 'Tensor' is in host memory. + bool is_constant = false; + Tensor constant_value; + + // When this output is a resource, i.e. `type == DT_RESOURCE`, this is + // the index of the input that contains the resource. + int input_index; + + // Whether this output is a TensorList. + bool is_tensor_list = false; +}; + +// Describes a variable write side effect of the computation. +struct XlaResourceUpdate { + // Index of the input that contains the variable resource to write to. + int input_index; + + // Type and shape of the tensor to be written back. + // The `shape` field has the same meaning as the Argument::shape field. + DataType type; + TensorShape shape; + + // Was the value of the variable modified by the computation? + // (Always true, unless `return_updated_values_for_all_resources` is true.) + bool modified; + + // If the resource is a TensorArray, the set of gradients read or written. + std::set tensor_array_gradients_accessed; +}; + +struct XlaCompilationResult { + // Vector that maps from the parameters of the XLA computation to their + // original argument positions. To handle compile-time constant inputs, the + // parameters to the XLA computation may be a subset of the original + // arguments. The relative ordering of parameters are maintained. + std::vector input_mapping; + + // Input shapes of the computation. If we are flattening inputs, these are + // the flattened shapes. + std::vector xla_input_shapes; + + // Output shape in XLA format. The output shape is always a tuple. If we + // are flattening outputs, these are the flattened shapes. + xla::Shape xla_output_shape; + + // TensorFlow shapes of outputs, together with the values of any + // constant arguments. Vector indexed by Tensorflow _Retval number, + // containing both constant and non-constant results. + std::vector outputs; + + // TensorFlow shapes and types of sends/recvs from HostCompute Ops to their + // matching RecvAtHost/SendFromHost Ops in the outer graph. + tf2xla::HostComputeMetadata host_compute_metadata; + + // Resources whose values were updated by the computation, ordered + // by return value position (which is the same as the order the resources + // were passed as arguments). Resource updates follow the non-constant + // results in the outputs of XLA computation. + std::vector resource_updates; + + // The XLA computation built from the tensorflow subgraph. + std::shared_ptr computation; }; } // end namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc index 27766408716..735a6c7291e 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc @@ -49,33 +49,13 @@ XlaCompiler* XlaOpKernelContext::compiler() const { return xla_context()->compiler(); } -// Retrieves an XlaExpression that was allocated by a previous Op. -const XlaExpression* XlaOpKernelContext::CastExpressionFromTensor( - const Tensor& tensor) { - const XlaExpression* expression = - reinterpret_cast(tensor.tensor_data().data()); - CHECK(expression->kind() != XlaExpression::Kind::kInvalid) - << expression->HumanString(); - return expression; -} - -// Assigns an XlaExpression to a tensor on an XLA compilation device. -void XlaOpKernelContext::AssignExpressionToTensor(const XlaExpression& value, - Tensor* tensor) { - const XlaExpression* expression = - reinterpret_cast(tensor->tensor_data().data()); - CHECK(expression->kind() == XlaExpression::Kind::kInvalid) - << expression->HumanString(); - *const_cast(expression) = value; -} - const XlaExpression& XlaOpKernelContext::InputExpression(int index) { - return *CastExpressionFromTensor(context_->input(index)); + return *XlaExpression::CastExpressionFromTensor(context_->input(index)); } const XlaExpression& XlaOpKernelContext::InputExpression( absl::string_view name) { - return *CastExpressionFromTensor(GetInputTensorByName(name)); + return *XlaExpression::CastExpressionFromTensor(GetInputTensorByName(name)); } xla::XlaOp XlaOpKernelContext::Input(int index) { @@ -108,7 +88,8 @@ DataType XlaOpKernelContext::input_type(int index) const { if (type == DT_UINT8) { // Masqueraded XlaExpression could have different type. See // XlaOpKernelContext::SetOutputExpression for details. - auto expression = CastExpressionFromTensor(context_->input(index)); + auto expression = + XlaExpression::CastExpressionFromTensor(context_->input(index)); type = expression->dtype(); } return type; @@ -120,7 +101,7 @@ DataType XlaOpKernelContext::InputType(absl::string_view name) { if (type == DT_UINT8) { // Masqueraded XlaExpression could have different type. See // XlaOpKernelContext::SetOutputExpression for details. - auto expression = CastExpressionFromTensor(tensor); + auto expression = XlaExpression::CastExpressionFromTensor(tensor); type = expression->dtype(); } return type; @@ -385,7 +366,8 @@ Status XlaOpKernelContext::InputList(absl::string_view name, handles->clear(); shapes->clear(); for (const Tensor& input : inputs) { - handles->push_back(CastExpressionFromTensor(input)->AsXlaOp(builder())); + handles->push_back( + XlaExpression::CastExpressionFromTensor(input)->AsXlaOp(builder())); shapes->push_back(input.shape()); } return Status::OK(); @@ -408,7 +390,7 @@ Status ReadVariableInputTensor(const Tensor& tensor, DataType type, const XlaOpKernelContext* ctx, TensorShape* shape, xla::XlaOp* value) { const XlaExpression* expression = - XlaOpKernelContext::CastExpressionFromTensor(tensor); + XlaExpression::CastExpressionFromTensor(tensor); XlaResource* variable = expression->resource(); TF_RET_CHECK(variable != nullptr); TF_RET_CHECK(variable->kind() == XlaResource::kVariable); @@ -461,7 +443,8 @@ Status XlaOpKernelContext::ReadVariableInput(absl::string_view name, Status XlaOpKernelContext::GetVariableTypeAndShape(int index, DataType* type, TensorShape* shape) const { const Tensor& tensor = context_->input(index); - const XlaExpression* expression = CastExpressionFromTensor(tensor); + const XlaExpression* expression = + XlaExpression::CastExpressionFromTensor(tensor); XlaResource* variable = expression->resource(); TF_RET_CHECK(variable != nullptr); TF_RET_CHECK(variable->kind() == XlaResource::kVariable); @@ -502,8 +485,8 @@ void XlaOpKernelContext::SetOutputExpression(int index, TF_ASSIGN_OR_RETURN(TensorShape shape, expression.GetShape()); TF_RETURN_IF_ERROR(context_->allocate_output(index, shape, &output)); } - XlaOpKernelContext::AssignExpressionToTensor( - expression, context_->mutable_output(index)); + XlaExpression::AssignExpressionToTensor(expression, + context_->mutable_output(index)); return Status::OK(); }(); if (!status.ok()) { @@ -542,7 +525,7 @@ void XlaOpKernelContext::SetResourceOutput(int index, XlaResource* resource) { Status XlaOpKernelContext::GetResourceInput(int index, XlaResource** resource) { const XlaExpression* expression = - CastExpressionFromTensor(context_->input(index)); + XlaExpression::CastExpressionFromTensor(context_->input(index)); TF_RET_CHECK(expression->resource() != nullptr); *resource = expression->resource(); return Status::OK(); @@ -554,7 +537,7 @@ Status AssignVariableTensor(const Tensor& tensor, DataType type, const XlaOpKernelContext* ctx, xla::XlaOp handle, xla::XlaBuilder* builder) { const XlaExpression* expression = - XlaOpKernelContext::CastExpressionFromTensor(tensor); + XlaExpression::CastExpressionFromTensor(tensor); XlaResource* variable = expression->resource(); TF_RET_CHECK(variable != nullptr); TF_RET_CHECK(variable->kind() == XlaResource::kVariable); diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.h b/tensorflow/compiler/tf2xla/xla_op_kernel.h index 6987b6fbb98..3cf51e6ec6f 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.h +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.h @@ -17,6 +17,9 @@ limitations under the License. #define TENSORFLOW_COMPILER_TF2XLA_XLA_OP_KERNEL_H_ #include "tensorflow/compiler/tf2xla/xla_compiler.h" +#include "tensorflow/compiler/tf2xla/xla_context.h" +#include "tensorflow/compiler/tf2xla/xla_expression.h" +#include "tensorflow/compiler/tf2xla/xla_resource.h" #include "tensorflow/compiler/xla/client/xla_builder.h" #include "tensorflow/compiler/xla/client/xla_computation.h" #include "tensorflow/compiler/xla/xla_data.pb.h" @@ -284,13 +287,6 @@ class XlaOpKernelContext { // separate specialization of the computation for each DataType. const xla::XlaComputation* GetOrCreateMul(const DataType type); - // Assigns an XlaExpression to a tensor on an XLA compilation device. - static void AssignExpressionToTensor(const XlaExpression& value, - Tensor* tensor); - - // Retrieves an XlaExpression that was assigned to the specified tensor. - static const XlaExpression* CastExpressionFromTensor(const Tensor& tensor); - private: // Returns the tensor of input `name`. const Tensor& GetInputTensorByName(absl::string_view name); diff --git a/tensorflow/compiler/tf2xla/xla_resource.cc b/tensorflow/compiler/tf2xla/xla_resource.cc index 32d42cb8a42..bec0b46611d 100644 --- a/tensorflow/compiler/tf2xla/xla_resource.cc +++ b/tensorflow/compiler/tf2xla/xla_resource.cc @@ -21,7 +21,6 @@ limitations under the License. #include "absl/memory/memory.h" #include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/tf2xla/sharding_util.h" -#include "tensorflow/compiler/tf2xla/xla_context.h" #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/xla/client/xla_builder.h" diff --git a/tensorflow/core/tpu/BUILD b/tensorflow/core/tpu/BUILD index 30a90c1da6c..0a17ba3d408 100644 --- a/tensorflow/core/tpu/BUILD +++ b/tensorflow/core/tpu/BUILD @@ -57,6 +57,7 @@ cc_library( ":tpu_defs", ":tpu_node_device_util", "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_op_registry", ], alwayslink = 1, ) @@ -180,8 +181,8 @@ cc_library( "//tensorflow/compiler/tf2xla:common", "//tensorflow/compiler/tf2xla:tf2xla_util", "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/core:core_cpu_internal", - "//tensorflow/core:framework", "//tensorflow/core:framework_internal", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 7f64758d238..e5f49158231 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -656,6 +656,7 @@ cc_library( deps = [ "//tensorflow/compiler/tf2xla:common", "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto_cc", @@ -673,6 +674,7 @@ cc_library( srcs = ["topk_ops.cc"], deps = [ "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla:xla_op_registry", "//tensorflow/compiler/xla/client:xla_builder", "//tensorflow/compiler/xla/client/lib:arithmetic", "//tensorflow/core/tpu:tpu_defs", From 605eb0f9a2196c57fe2fcd0cf39f9f9341e95868 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Tue, 28 Jul 2020 13:33:08 -0700 Subject: [PATCH 1516/2522] Internal change. Not for public. PiperOrigin-RevId: 323642355 Change-Id: I2459eec6b96881ff6ad0c37b779835ecce0faf86 --- tensorflow/core/kernels/batching_util/batch_resource_base.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/core/kernels/batching_util/batch_resource_base.cc b/tensorflow/core/kernels/batching_util/batch_resource_base.cc index b372f446f7a..adfa172cf1c 100644 --- a/tensorflow/core/kernels/batching_util/batch_resource_base.cc +++ b/tensorflow/core/kernels/batching_util/batch_resource_base.cc @@ -270,8 +270,6 @@ Status BatchResourceBase::ConcatInputTensors( // In this context, Concat can be further optimized to get rid of // some (probably all) memcpy when input tensors are slices of // another copy. - // TODO(b/154140947): - // Add a custom implementation of Split and then optimize Concat. std::vector to_concatenate; to_concatenate.reserve(output->size()); for (int j = 0; j < output->size(); ++j) { From 32a2c9c3d0426906c754ac65fe2ebc0b9d4c6520 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Tue, 28 Jul 2020 13:35:22 -0700 Subject: [PATCH 1517/2522] [TF/XLA] Rename local variable to be more self-descriptive PiperOrigin-RevId: 323642819 Change-Id: I83006b9b6c56adadd389ead060bb688cdf284af6 --- tensorflow/compiler/jit/xla_launch_util.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc index 41abe86df6e..e0afc5d93a1 100644 --- a/tensorflow/compiler/jit/xla_launch_util.cc +++ b/tensorflow/compiler/jit/xla_launch_util.cc @@ -556,7 +556,7 @@ Status XlaComputationLaunchContext::PopulateOutputs( } Status XlaComputationLaunchContext::BuildXlaCompilerArguments( - const std::map& constant_args, + const std::map& must_be_constant_args, absl::Span variable_args, OpKernelContext* ctx, std::vector* args) { args->resize(ctx->num_inputs()); @@ -572,9 +572,9 @@ Status XlaComputationLaunchContext::BuildXlaCompilerArguments( for (int64 input_num = 0; input_num < ctx->num_inputs(); ++input_num) { XlaCompiler::Argument& arg = (*args)[input_num]; - if (constant_args.count(input_num) > 0) { + if (must_be_constant_args.count(input_num) > 0) { // Handles compile-time constants. - const Tensor& input = constant_args.at(input_num); + const Tensor& input = must_be_constant_args.at(input_num); TF_RET_CHECK(input.dtype() != DT_RESOURCE); arg.kind = XlaCompiler::Argument::kConstant; arg.type = input.dtype(); From 57135344849334c9019b9d4ce23b67843367958a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Jul 2020 13:37:23 -0700 Subject: [PATCH 1518/2522] add 1.3 NNAPI DurationCode. PiperOrigin-RevId: 323643227 Change-Id: Ic1bb76e5ed9aab3318451ff99915b250b9742841 --- tensorflow/lite/nnapi/NeuralNetworksTypes.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tensorflow/lite/nnapi/NeuralNetworksTypes.h b/tensorflow/lite/nnapi/NeuralNetworksTypes.h index 27892f55631..934e8948b71 100644 --- a/tensorflow/lite/nnapi/NeuralNetworksTypes.h +++ b/tensorflow/lite/nnapi/NeuralNetworksTypes.h @@ -627,6 +627,26 @@ typedef enum { // such as that of the runtime itself and the IPC needed for the runtime to // communicate with the driver. ANEURALNETWORKS_DURATION_IN_DRIVER = 1, + // Execution time on hardware, after all dependencies have been signaled. + // If no dependencies specified (for example, if the execution was scheduled + // other + // than with {@link ANeuralNetworksExecution_startComputeWithDependencies}), + // the + // reported time will be the same as ANEURALNETWORKS_DURATION_ON_HARDWARE. + // Available since API level 30. + ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE = 2, + // Execution time in driver, after all dependencies have been signaled. + // Excludes + // overhead such as that of the runtime itself and the IPC needed for the + // runtime + // to communicate with the driver. + // If no dependencies specified (for example, if the execution was scheduled + // other + // than with {@link ANeuralNetworksExecution_startComputeWithDependencies}), + // the + // reported time will be the same as ANEURALNETWORKS_DURATION_IN_DRIVER. + // Available since API level 30. + ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER = 3, } DurationCode; typedef int (*ANeuralNetworksExecution_getDuration_fn)( From 3a70eef32d7846978437677ff42e77cc72391e2c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Jul 2020 13:40:47 -0700 Subject: [PATCH 1519/2522] Add missing files to ARM Cortex M4 source list PiperOrigin-RevId: 323644035 Change-Id: If543e28c349d63f0db387bef086332d2a75043b9 --- .../lite/micro/tools/make/targets/apollo3evb_makefile.inc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/lite/micro/tools/make/targets/apollo3evb_makefile.inc b/tensorflow/lite/micro/tools/make/targets/apollo3evb_makefile.inc index 51163233074..68792496ec3 100644 --- a/tensorflow/lite/micro/tools/make/targets/apollo3evb_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/apollo3evb_makefile.inc @@ -118,9 +118,11 @@ $(MAKEFILE_DIR)/downloads/$(AM_SDK_DEST)/$(SF_BSPS_DEST): $(MAKEFILE_DIR)/downlo CMSIS_SRC_DIR := $(MAKEFILE_DIR)/downloads/cmsis/CMSIS/DSP/Source THIRD_PARTY_CC_SRCS := \ + $(CMSIS_SRC_DIR)/BasicMathFunctions/arm_dot_prod_q15.c \ $(CMSIS_SRC_DIR)/BasicMathFunctions/arm_mult_q15.c \ $(CMSIS_SRC_DIR)/TransformFunctions/arm_rfft_init_q15.c \ $(CMSIS_SRC_DIR)/TransformFunctions/arm_rfft_q15.c \ + $(CMSIS_SRC_DIR)/TransformFunctions/arm_bitreversal2.c \ $(CMSIS_SRC_DIR)/TransformFunctions/arm_cfft_q15.c \ $(CMSIS_SRC_DIR)/TransformFunctions/arm_cfft_radix4_q15.c \ $(CMSIS_SRC_DIR)/CommonTables/arm_const_structs.c \ From 5f8e3e8d5404ebc6089aa5e83b8342256d0cf4ad Mon Sep 17 00:00:00 2001 From: Karim Nosir Date: Tue, 28 Jul 2020 13:42:30 -0700 Subject: [PATCH 1520/2522] Update other builders for hexagon delegate to use the method to compute and add min/max to the op. PiperOrigin-RevId: 323644369 Change-Id: I96cfe9cb506662111c7545194f998306b2b293fd --- .../hexagon/builders/activation_builder.h | 1 - .../hexagon/builders/arg_min_max_builder.h | 1 - .../hexagon/builders/arithmetic_builder.cc | 20 ++--------- .../hexagon/builders/arithmetic_builder.h | 3 +- .../hexagon/builders/hardswish_builder.cc | 19 ++--------- .../hexagon/builders/hardswish_builder.h | 1 - .../builders/l2_normalization_builder.cc | 9 +---- .../builders/l2_normalization_builder.h | 1 - .../hexagon/builders/min_max_builder.cc | 33 ++----------------- .../hexagon/builders/min_max_builder.h | 1 - .../hexagon/builders/mirror_pad_builder.cc | 9 +---- .../hexagon/builders/mirror_pad_builder.h | 1 - .../hexagon/builders/neg_op_builder.cc | 8 +---- .../hexagon/builders/neg_op_builder.h | 1 - .../delegates/hexagon/builders/pad_builder.cc | 9 +---- .../delegates/hexagon/builders/pad_builder.h | 1 - .../hexagon/builders/pool_2d_builder.cc | 9 +---- .../hexagon/builders/pool_2d_builder.h | 2 +- .../hexagon/builders/quantize_builder.cc | 24 ++------------ .../hexagon/builders/reduce_builder.cc | 10 +----- .../hexagon/builders/reduce_builder.h | 1 - .../builders/resize_bilinear_builder.cc | 9 +---- .../builders/resize_bilinear_builder.h | 1 - .../resize_nearest_neighbor_builder.cc | 9 +---- .../resize_nearest_neighbor_builder.h | 1 - .../hexagon/builders/slice_builder.cc | 9 +---- .../hexagon/builders/slice_builder.h | 1 - .../hexagon/builders/softmax_builder.cc | 9 +---- .../hexagon/builders/softmax_builder.h | 1 - .../builders/space_to_depth_builder.cc | 11 ++----- .../hexagon/builders/space_to_depth_builder.h | 1 - .../hexagon/builders/split_builder.cc | 9 +---- .../hexagon/builders/split_builder.h | 2 -- .../hexagon/builders/strided_slice_builder.cc | 9 +---- .../hexagon/builders/strided_slice_builder.h | 1 - .../hexagon/builders/transpose_builder.cc | 10 +----- .../hexagon/builders/transpose_builder.h | 1 - .../builders/transpose_conv_2d_builder.cc | 23 ++----------- 38 files changed, 31 insertions(+), 240 deletions(-) diff --git a/tensorflow/lite/delegates/hexagon/builders/activation_builder.h b/tensorflow/lite/delegates/hexagon/builders/activation_builder.h index ffd6ffdabb7..4537cd4aa01 100644 --- a/tensorflow/lite/delegates/hexagon/builders/activation_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/activation_builder.h @@ -41,7 +41,6 @@ class ActivationOpBuilder : public OpBuilder { private: TensorID node_output_; - float input_min_, input_max_; float relu_value_ = 6; }; diff --git a/tensorflow/lite/delegates/hexagon/builders/arg_min_max_builder.h b/tensorflow/lite/delegates/hexagon/builders/arg_min_max_builder.h index 0ffa4ac9505..54d85b5cb51 100644 --- a/tensorflow/lite/delegates/hexagon/builders/arg_min_max_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/arg_min_max_builder.h @@ -36,7 +36,6 @@ class ArgMinMaxOpBuilder : public OpBuilder { private: TensorID node_output_; - float input_min_, input_max_; }; } // namespace hexagon diff --git a/tensorflow/lite/delegates/hexagon/builders/arithmetic_builder.cc b/tensorflow/lite/delegates/hexagon/builders/arithmetic_builder.cc index 5b069ed4e24..1b6de1b8907 100644 --- a/tensorflow/lite/delegates/hexagon/builders/arithmetic_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/arithmetic_builder.cc @@ -33,29 +33,15 @@ TfLiteStatus ArithmeticOpBuilder::PopulateSubGraph( int tensor_id = inputs->data[0]; const auto& input1_tensor = context->tensors[tensor_id]; AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); - TF_LITE_ENSURE_STATUS( - ComputeMinAndMaxQuantValues(input1_tensor, &input1_min_, &input1_max_)); - auto* input1_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input1_min_), sizeof(input1_min_)); - auto* input1_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input1_max_), sizeof(input1_max_)); // Second input data tensor. tensor_id = inputs->data[1]; const auto& input2_tensor = context->tensors[tensor_id]; AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); - TF_LITE_ENSURE_STATUS( - ComputeMinAndMaxQuantValues(input2_tensor, &input2_min_, &input2_max_)); - auto* input2_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input2_min_), sizeof(input2_min_)); - auto* input2_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input2_max_), sizeof(input2_max_)); - // Min/max values for input tensors. - AddInput(TensorID(input1_min_const->GetID(), 0)); - AddInput(TensorID(input1_max_const->GetID(), 0)); - AddInput(TensorID(input2_min_const->GetID(), 0)); - AddInput(TensorID(input2_max_const->GetID(), 0)); + // Inputs min/max + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, input1_tensor)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, input2_tensor)); // Output details. TF_LITE_ENSURE_STATUS(ComputeMinAndMaxQuantValues( diff --git a/tensorflow/lite/delegates/hexagon/builders/arithmetic_builder.h b/tensorflow/lite/delegates/hexagon/builders/arithmetic_builder.h index e3cba846884..cc17a5c1426 100644 --- a/tensorflow/lite/delegates/hexagon/builders/arithmetic_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/arithmetic_builder.h @@ -38,8 +38,7 @@ class ArithmeticOpBuilder : public OpBuilder { private: TensorID node_output_; - float input1_min_, input1_max_, input2_min_, input2_max_, output_min_, - output_max_; + float output_min_, output_max_; }; } // namespace hexagon diff --git a/tensorflow/lite/delegates/hexagon/builders/hardswish_builder.cc b/tensorflow/lite/delegates/hexagon/builders/hardswish_builder.cc index af7daec7875..774f8759c6e 100644 --- a/tensorflow/lite/delegates/hexagon/builders/hardswish_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/hardswish_builder.cc @@ -31,24 +31,11 @@ TfLiteStatus HardSwishOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, int tensor_id = inputs->data[0]; const auto& input1_tensor = context->tensors[tensor_id]; AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); - TF_LITE_ENSURE_STATUS( - ComputeMinAndMaxQuantValues(input1_tensor, &input_min_, &input_max_)); - auto* input_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_min_), sizeof(input_min_)); - auto* input_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_max_), sizeof(input_max_)); - AddInput(TensorID(input_min_const->GetID(), 0)); - AddInput(TensorID(input_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, input1_tensor)); // Output min/max - TF_LITE_ENSURE_STATUS(ComputeMinAndMaxQuantValues( - context->tensors[outputs->data[0]], &output_min_, &output_max_)); - auto* output_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&output_min_), sizeof(output_min_)); - auto* output_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&output_max_), sizeof(output_max_)); - AddInput(TensorID(output_min_const->GetID(), 0)); - AddInput(TensorID(output_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS( + ComputeAndAddMinAndMax(context, context->tensors[outputs->data[0]])); int output_batch_size, output_height_size, output_width_size, output_depth_size; diff --git a/tensorflow/lite/delegates/hexagon/builders/hardswish_builder.h b/tensorflow/lite/delegates/hexagon/builders/hardswish_builder.h index ba173bec466..a48b0b0c2ec 100644 --- a/tensorflow/lite/delegates/hexagon/builders/hardswish_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/hardswish_builder.h @@ -40,7 +40,6 @@ class HardSwishOpBuilder : public OpBuilder { private: TensorID node_output_; - float input_min_, input_max_, output_min_, output_max_; }; } // namespace hexagon diff --git a/tensorflow/lite/delegates/hexagon/builders/l2_normalization_builder.cc b/tensorflow/lite/delegates/hexagon/builders/l2_normalization_builder.cc index 1adc5102ead..4565b299800 100644 --- a/tensorflow/lite/delegates/hexagon/builders/l2_normalization_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/l2_normalization_builder.cc @@ -32,14 +32,7 @@ TfLiteStatus L2NormalizationOpBuilder::PopulateSubGraph( int tensor_id = inputs->data[0]; const auto& input_tensor = context->tensors[tensor_id]; AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); - TF_LITE_ENSURE_STATUS( - ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_)); - auto* input_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_min_), sizeof(input_min_)); - auto* input_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_max_), sizeof(input_max_)); - AddInput(TensorID(input_min_const->GetID(), 0)); - AddInput(TensorID(input_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, input_tensor)); // Hexagon outputs for this node. int output_batch_size, output_height_size, output_width_size, diff --git a/tensorflow/lite/delegates/hexagon/builders/l2_normalization_builder.h b/tensorflow/lite/delegates/hexagon/builders/l2_normalization_builder.h index d552d6f7a09..7a534ccd5bb 100644 --- a/tensorflow/lite/delegates/hexagon/builders/l2_normalization_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/l2_normalization_builder.h @@ -38,7 +38,6 @@ class L2NormalizationOpBuilder : public OpBuilder { private: TensorID node_output_; - float input_min_, input_max_; }; } // namespace hexagon diff --git a/tensorflow/lite/delegates/hexagon/builders/min_max_builder.cc b/tensorflow/lite/delegates/hexagon/builders/min_max_builder.cc index 67027619415..bcfae6032c8 100644 --- a/tensorflow/lite/delegates/hexagon/builders/min_max_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/min_max_builder.cc @@ -35,40 +35,13 @@ TfLiteStatus MinMaxOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, AddInput(graph_builder_->GetHexagonTensorId(b_tensor_id)); // Add Inputs A & B min/max - TF_LITE_ENSURE_STATUS( - ComputeMinAndMaxQuantValues(a_tensor, &a_input_min_, &a_input_max_)); - auto* a_input_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&a_input_min_), - sizeof(a_input_min_)); - auto* a_input_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&a_input_max_), - sizeof(a_input_max_)); - AddInput(TensorID(a_input_min_const->GetID(), 0)); - AddInput(TensorID(a_input_max_const->GetID(), 0)); - - TF_LITE_ENSURE_STATUS( - ComputeMinAndMaxQuantValues(b_tensor, &b_input_min_, &b_input_max_)); - auto* b_input_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&b_input_min_), - sizeof(b_input_min_)); - auto* b_input_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&b_input_max_), - sizeof(b_input_max_)); - AddInput(TensorID(b_input_min_const->GetID(), 0)); - AddInput(TensorID(b_input_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, a_tensor)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, b_tensor)); // Add output min/max const int output_tensor_id = outputs->data[0]; const auto& output_tensor = context->tensors[output_tensor_id]; - float output_min, output_max; - TF_LITE_ENSURE_STATUS( - ComputeMinAndMaxQuantValues(output_tensor, &output_min, &output_max)); - auto* output_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&output_min), sizeof(output_min)); - auto* output_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&output_max), sizeof(output_max)); - AddInput(TensorID(output_min_const->GetID(), 0)); - AddInput(TensorID(output_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, output_tensor)); // Add outputs. int output_batch_size, output_height_size, output_width_size, diff --git a/tensorflow/lite/delegates/hexagon/builders/min_max_builder.h b/tensorflow/lite/delegates/hexagon/builders/min_max_builder.h index c12cb5ee665..5683f331799 100644 --- a/tensorflow/lite/delegates/hexagon/builders/min_max_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/min_max_builder.h @@ -35,7 +35,6 @@ class MinMaxOpBuilder : public OpBuilder { private: TensorID node_output_; - float a_input_min_, a_input_max_, b_input_min_, b_input_max_; }; } // namespace hexagon diff --git a/tensorflow/lite/delegates/hexagon/builders/mirror_pad_builder.cc b/tensorflow/lite/delegates/hexagon/builders/mirror_pad_builder.cc index 441140cd570..9f16e7aba58 100644 --- a/tensorflow/lite/delegates/hexagon/builders/mirror_pad_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/mirror_pad_builder.cc @@ -65,14 +65,7 @@ TfLiteStatus MirrorPadOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, } // Min/max values for input tensor. - TF_LITE_ENSURE_STATUS( - ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_)); - auto* input_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_min_), sizeof(input_min_)); - auto* input_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_max_), sizeof(input_max_)); - AddInput(TensorID(input_min_const->GetID(), 0)); - AddInput(TensorID(input_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, input_tensor)); // Hexagon outputs for this node. int output_batch_size, output_height_size, output_width_size, diff --git a/tensorflow/lite/delegates/hexagon/builders/mirror_pad_builder.h b/tensorflow/lite/delegates/hexagon/builders/mirror_pad_builder.h index 50af36b7417..3afbac2c788 100644 --- a/tensorflow/lite/delegates/hexagon/builders/mirror_pad_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/mirror_pad_builder.h @@ -38,7 +38,6 @@ class MirrorPadOpBuilder : public OpBuilder { private: TensorID node_output_; - float input_min_, input_max_; std::vector paddings_shape_; }; diff --git a/tensorflow/lite/delegates/hexagon/builders/neg_op_builder.cc b/tensorflow/lite/delegates/hexagon/builders/neg_op_builder.cc index 8454258f700..93511dc491d 100644 --- a/tensorflow/lite/delegates/hexagon/builders/neg_op_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/neg_op_builder.cc @@ -26,13 +26,7 @@ TfLiteStatus NegOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, int tensor_id = inputs->data[0]; const auto& input_tensor = context->tensors[tensor_id]; AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); - ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_); - auto* input_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_min_), sizeof(input_min_)); - auto* input_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_max_), sizeof(input_max_)); - AddInput(TensorID(input_min_const->GetID(), 0)); - AddInput(TensorID(input_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, input_tensor)); // Hexagon outputs for this node. int output_batch_size, output_height_size, output_width_size, diff --git a/tensorflow/lite/delegates/hexagon/builders/neg_op_builder.h b/tensorflow/lite/delegates/hexagon/builders/neg_op_builder.h index c7b535e7b29..578447ec791 100644 --- a/tensorflow/lite/delegates/hexagon/builders/neg_op_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/neg_op_builder.h @@ -34,7 +34,6 @@ class NegOpBuilder : public OpBuilder { private: TensorID node_output_; - float input_min_, input_max_; }; } // namespace hexagon diff --git a/tensorflow/lite/delegates/hexagon/builders/pad_builder.cc b/tensorflow/lite/delegates/hexagon/builders/pad_builder.cc index 08393cb8720..7473d686391 100644 --- a/tensorflow/lite/delegates/hexagon/builders/pad_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/pad_builder.cc @@ -32,16 +32,9 @@ TfLiteStatus PadOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, int tensor_id = inputs->data[0]; const auto& input_tensor = context->tensors[tensor_id]; AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); - TF_LITE_ENSURE_STATUS( - ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_)); - auto* input_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_min_), sizeof(input_min_)); - auto* input_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_max_), sizeof(input_max_)); // Min/max values for input tensor. - AddInput(TensorID(input_min_const->GetID(), 0)); - AddInput(TensorID(input_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, input_tensor)); // Padding tensor. tensor_id = inputs->data[1]; diff --git a/tensorflow/lite/delegates/hexagon/builders/pad_builder.h b/tensorflow/lite/delegates/hexagon/builders/pad_builder.h index 855d6d582bb..62bfaa88e65 100644 --- a/tensorflow/lite/delegates/hexagon/builders/pad_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/pad_builder.h @@ -38,7 +38,6 @@ class PadOpBuilder : public OpBuilder { private: TensorID node_output_; - float input_min_, input_max_; }; } // namespace hexagon diff --git a/tensorflow/lite/delegates/hexagon/builders/pool_2d_builder.cc b/tensorflow/lite/delegates/hexagon/builders/pool_2d_builder.cc index 5dddcbfb0b3..2aba6c7f164 100644 --- a/tensorflow/lite/delegates/hexagon/builders/pool_2d_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/pool_2d_builder.cc @@ -33,14 +33,7 @@ TfLiteStatus Pool2dOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, int tensor_id = inputs->data[0]; const auto& data_tensor = context->tensors[tensor_id]; AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); - TF_LITE_ENSURE_STATUS( - ComputeMinAndMaxQuantValues(data_tensor, &data_min_, &data_max_)); - auto* data_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, (char*)&data_min_, sizeof(data_min_)); - auto* data_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, (char*)&data_max_, sizeof(data_max_)); - AddInput(TensorID(data_min_const->GetID(), 0)); - AddInput(TensorID(data_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, data_tensor)); const TfLitePoolParams* pool_params = reinterpret_cast(builtin_data_); diff --git a/tensorflow/lite/delegates/hexagon/builders/pool_2d_builder.h b/tensorflow/lite/delegates/hexagon/builders/pool_2d_builder.h index 53bb0eb1b29..470ea070acc 100644 --- a/tensorflow/lite/delegates/hexagon/builders/pool_2d_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/pool_2d_builder.h @@ -40,7 +40,7 @@ class Pool2dOpBuilder : public OpBuilder { TensorID node_output_; std::vector stride_shape_; std::vector filter_shape_; - float data_min_, data_max_, output_min_, output_max_; + float output_min_, output_max_; }; } // namespace hexagon diff --git a/tensorflow/lite/delegates/hexagon/builders/quantize_builder.cc b/tensorflow/lite/delegates/hexagon/builders/quantize_builder.cc index cc8ab5e6313..66b86abb6a8 100644 --- a/tensorflow/lite/delegates/hexagon/builders/quantize_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/quantize_builder.cc @@ -28,36 +28,16 @@ namespace hexagon { TfLiteStatus QuantizeOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, const TfLiteIntArray* outputs, TfLiteContext* context) { - // Input. - float input_min = 0; - float input_max = 0; const auto& input_tensor = context->tensors[inputs->data[0]]; - ComputeMinAndMaxQuantValues(input_tensor, &input_min, &input_max); - auto* input_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_min), sizeof(input_min)); - auto* input_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_max), sizeof(input_max)); - - // Output. - float output_min = 0; - float output_max = 0; const auto& output_tensor = context->tensors[outputs->data[0]]; - TF_LITE_ENSURE_STATUS( - ComputeMinAndMaxQuantValues(output_tensor, &output_min, &output_max)); int output_batch_size, output_height_size, output_width_size, output_depth_size; GetDims(&output_batch_size, &output_height_size, &output_width_size, &output_depth_size, output_tensor.dims); - auto* requantized_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&output_min), sizeof(output_min)); - auto* requantized_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&output_max), sizeof(output_max)); AddInput(graph_builder_->GetHexagonTensorId(inputs->data[0])); - AddInput(TensorID(input_min_const->GetID(), 0)); - AddInput(TensorID(input_max_const->GetID(), 0)); - AddInput(TensorID(requantized_min_const->GetID(), 0)); - AddInput(TensorID(requantized_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, input_tensor)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, output_tensor)); // Hexagon outputs for this node. node_output_ = AddOutput(sizeof(uint8_t), 4, diff --git a/tensorflow/lite/delegates/hexagon/builders/reduce_builder.cc b/tensorflow/lite/delegates/hexagon/builders/reduce_builder.cc index ddc492541f7..25742a8aab5 100644 --- a/tensorflow/lite/delegates/hexagon/builders/reduce_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/reduce_builder.cc @@ -33,15 +33,7 @@ TfLiteStatus ReduceOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, int tensor_id = inputs->data[0]; const auto& input_tensor = context->tensors[tensor_id]; AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); - ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_); - auto* input_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_min_), sizeof(input_min_)); - auto* input_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_max_), sizeof(input_max_)); - - // Min/max values for input tensor. - AddInput(TensorID(input_min_const->GetID(), 0)); - AddInput(TensorID(input_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, input_tensor)); // Axes tensor should be constant. tensor_id = inputs->data[1]; diff --git a/tensorflow/lite/delegates/hexagon/builders/reduce_builder.h b/tensorflow/lite/delegates/hexagon/builders/reduce_builder.h index 8cab32637ac..ef860db268a 100644 --- a/tensorflow/lite/delegates/hexagon/builders/reduce_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/reduce_builder.h @@ -38,7 +38,6 @@ class ReduceOpBuilder : public OpBuilder { private: TensorID node_output_; - float input_min_, input_max_; }; } // namespace hexagon diff --git a/tensorflow/lite/delegates/hexagon/builders/resize_bilinear_builder.cc b/tensorflow/lite/delegates/hexagon/builders/resize_bilinear_builder.cc index dccbdc5fab5..3387070e9b2 100644 --- a/tensorflow/lite/delegates/hexagon/builders/resize_bilinear_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/resize_bilinear_builder.cc @@ -47,15 +47,8 @@ TfLiteStatus ResizeBilinearOpBuilder::PopulateSubGraph( AddInput(TensorID(dims_const->GetID(), 0)); // Input min/max - TF_LITE_ENSURE_OK(context, ComputeMinAndMaxQuantValues( - input_tensor, &input_min_, &input_max_)); - auto* input_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_min_), sizeof(input_min_)); - auto* input_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_max_), sizeof(input_max_)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, input_tensor)); - AddInput(TensorID(input_min_const->GetID(), 0)); - AddInput(TensorID(input_max_const->GetID(), 0)); // Align Corners & half-pixel-centers. const TfLiteResizeBilinearParams* params = reinterpret_cast(builtin_data_); diff --git a/tensorflow/lite/delegates/hexagon/builders/resize_bilinear_builder.h b/tensorflow/lite/delegates/hexagon/builders/resize_bilinear_builder.h index 3fb23f88542..3e8891e6855 100644 --- a/tensorflow/lite/delegates/hexagon/builders/resize_bilinear_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/resize_bilinear_builder.h @@ -36,7 +36,6 @@ class ResizeBilinearOpBuilder : public OpBuilder { private: TensorID node_output_; - float input_min_, input_max_; }; } // namespace hexagon diff --git a/tensorflow/lite/delegates/hexagon/builders/resize_nearest_neighbor_builder.cc b/tensorflow/lite/delegates/hexagon/builders/resize_nearest_neighbor_builder.cc index 735130f8fa2..5ec23d79bcb 100644 --- a/tensorflow/lite/delegates/hexagon/builders/resize_nearest_neighbor_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/resize_nearest_neighbor_builder.cc @@ -32,12 +32,6 @@ TfLiteStatus ResizeNearestNeighborOpBuilder::PopulateSubGraph( int tensor_id = inputs->data[0]; const auto& input_tensor = context->tensors[tensor_id]; AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); - TF_LITE_ENSURE_STATUS( - ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_)); - auto* input_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_min_), sizeof(input_min_)); - auto* input_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_max_), sizeof(input_max_)); // Output dimensions tensor. tensor_id = inputs->data[1]; @@ -53,8 +47,7 @@ TfLiteStatus ResizeNearestNeighborOpBuilder::PopulateSubGraph( } // Min/max values for input tensor. - AddInput(TensorID(input_min_const->GetID(), 0)); - AddInput(TensorID(input_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, input_tensor)); // Align corners. const TfLiteResizeNearestNeighborParams* params = diff --git a/tensorflow/lite/delegates/hexagon/builders/resize_nearest_neighbor_builder.h b/tensorflow/lite/delegates/hexagon/builders/resize_nearest_neighbor_builder.h index 3630257cb79..2c16eff1f89 100644 --- a/tensorflow/lite/delegates/hexagon/builders/resize_nearest_neighbor_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/resize_nearest_neighbor_builder.h @@ -39,7 +39,6 @@ class ResizeNearestNeighborOpBuilder : public OpBuilder { private: TensorID node_output_; - float input_min_, input_max_; }; } // namespace hexagon diff --git a/tensorflow/lite/delegates/hexagon/builders/slice_builder.cc b/tensorflow/lite/delegates/hexagon/builders/slice_builder.cc index 4ef6c302ad8..05dfd3ffeb0 100644 --- a/tensorflow/lite/delegates/hexagon/builders/slice_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/slice_builder.cc @@ -64,14 +64,7 @@ TfLiteStatus SliceOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, AddInput(TensorID(sizes_node->GetID(), 0)); // Input min/max - TF_LITE_ENSURE_STATUS( - ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_)); - auto* input_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_min_), sizeof(input_min_)); - auto* input_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_max_), sizeof(input_max_)); - AddInput(TensorID(input_min_const->GetID(), 0)); - AddInput(TensorID(input_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, input_tensor)); // Outputs int output_batch_size, output_height_size, output_width_size, diff --git a/tensorflow/lite/delegates/hexagon/builders/slice_builder.h b/tensorflow/lite/delegates/hexagon/builders/slice_builder.h index 700c3097bed..9bb0c586f51 100644 --- a/tensorflow/lite/delegates/hexagon/builders/slice_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/slice_builder.h @@ -35,7 +35,6 @@ class SliceOpBuilder : public OpBuilder { private: TensorID node_output_; - float input_min_, input_max_; }; } // namespace hexagon diff --git a/tensorflow/lite/delegates/hexagon/builders/softmax_builder.cc b/tensorflow/lite/delegates/hexagon/builders/softmax_builder.cc index fea696b7421..5c77dc710d6 100644 --- a/tensorflow/lite/delegates/hexagon/builders/softmax_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/softmax_builder.cc @@ -33,14 +33,7 @@ TfLiteStatus SoftmaxOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, int tensor_id = inputs->data[0]; const auto& input_tensor = context->tensors[tensor_id]; AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); - TF_LITE_ENSURE_STATUS( - ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_)); - auto* input_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, (char*)&input_min_, sizeof(input_min_)); - auto* input_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, (char*)&input_max_, sizeof(input_max_)); - AddInput(TensorID(input_min_const->GetID(), 0)); - AddInput(TensorID(input_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, input_tensor)); // beta value const TfLiteSoftmaxParams* softmax_params = diff --git a/tensorflow/lite/delegates/hexagon/builders/softmax_builder.h b/tensorflow/lite/delegates/hexagon/builders/softmax_builder.h index 1d86a82b5c5..cb35a2fe794 100644 --- a/tensorflow/lite/delegates/hexagon/builders/softmax_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/softmax_builder.h @@ -39,7 +39,6 @@ class SoftmaxOpBuilder : public OpBuilder { private: TensorID node_output_; float beta_value_ = 1.0f; - float input_min_, input_max_; }; } // namespace hexagon diff --git a/tensorflow/lite/delegates/hexagon/builders/space_to_depth_builder.cc b/tensorflow/lite/delegates/hexagon/builders/space_to_depth_builder.cc index 4447ed9ca2b..7dbfd081f60 100644 --- a/tensorflow/lite/delegates/hexagon/builders/space_to_depth_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/space_to_depth_builder.cc @@ -30,13 +30,6 @@ TfLiteStatus SpaceToDepthOpBuilder::PopulateSubGraph( TfLiteContext* context) { // Input tensor. int tensor_id = inputs->data[0]; - const auto& input_tensor = context->tensors[tensor_id]; - TF_LITE_ENSURE_STATUS( - ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_)); - auto* input_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_min_), sizeof(input_min_)); - auto* input_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_max_), sizeof(input_max_)); // Block size. const TfLiteSpaceToDepthParams* space_to_depth_params = @@ -48,8 +41,8 @@ TfLiteStatus SpaceToDepthOpBuilder::PopulateSubGraph( // All inputs. AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); AddInput(TensorID(block_size_node->GetID(), 0)); - AddInput(TensorID(input_min_const->GetID(), 0)); - AddInput(TensorID(input_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS( + ComputeAndAddMinAndMax(context, context->tensors[tensor_id])); // Hexagon outputs for this node. int output_batch_size, output_height_size, output_width_size, diff --git a/tensorflow/lite/delegates/hexagon/builders/space_to_depth_builder.h b/tensorflow/lite/delegates/hexagon/builders/space_to_depth_builder.h index 95d1a02feed..347c0aaed42 100644 --- a/tensorflow/lite/delegates/hexagon/builders/space_to_depth_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/space_to_depth_builder.h @@ -40,7 +40,6 @@ class SpaceToDepthOpBuilder : public OpBuilder { private: TensorID node_output_; - float input_min_, input_max_; int block_size_; }; diff --git a/tensorflow/lite/delegates/hexagon/builders/split_builder.cc b/tensorflow/lite/delegates/hexagon/builders/split_builder.cc index 833ef2f5d3d..af151e53913 100644 --- a/tensorflow/lite/delegates/hexagon/builders/split_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/split_builder.cc @@ -53,14 +53,7 @@ TfLiteStatus SplitOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, // Input data tensor & min/max. AddInput(graph_builder_->GetHexagonTensorId(input_tensor_id)); - TF_LITE_ENSURE_STATUS( - ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_)); - auto* input_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_min_), sizeof(input_min_)); - auto* input_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_max_), sizeof(input_max_)); - AddInput(TensorID(input_min_const->GetID(), 0)); - AddInput(TensorID(input_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, input_tensor)); // Output data tensors. for (int i = 0; i < outputs->size; ++i) { diff --git a/tensorflow/lite/delegates/hexagon/builders/split_builder.h b/tensorflow/lite/delegates/hexagon/builders/split_builder.h index 6681158e7e7..e3ec45e8c70 100644 --- a/tensorflow/lite/delegates/hexagon/builders/split_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/split_builder.h @@ -38,8 +38,6 @@ class SplitOpBuilder : public OpBuilder { private: std::vector node_outputs_; - float input_min_; - float input_max_; }; } // namespace hexagon diff --git a/tensorflow/lite/delegates/hexagon/builders/strided_slice_builder.cc b/tensorflow/lite/delegates/hexagon/builders/strided_slice_builder.cc index 125c1f9ea87..ea59775f7f3 100644 --- a/tensorflow/lite/delegates/hexagon/builders/strided_slice_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/strided_slice_builder.cc @@ -67,14 +67,7 @@ TfLiteStatus StridedSliceOpBuilder::PopulateSubGraph( AddInput(TensorID(shrink_axis_mask_const->GetID(), 0)); // Input min/max - TF_LITE_ENSURE_STATUS( - ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_)); - auto* input_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_min_), sizeof(input_min_)); - auto* input_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_max_), sizeof(input_max_)); - AddInput(TensorID(input_min_const->GetID(), 0)); - AddInput(TensorID(input_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, input_tensor)); // Slice outputs. int output_batch_size, output_height_size, output_width_size, diff --git a/tensorflow/lite/delegates/hexagon/builders/strided_slice_builder.h b/tensorflow/lite/delegates/hexagon/builders/strided_slice_builder.h index 330e6636a1a..b7f042d7562 100644 --- a/tensorflow/lite/delegates/hexagon/builders/strided_slice_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/strided_slice_builder.h @@ -35,7 +35,6 @@ class StridedSliceOpBuilder : public OpBuilder { private: TensorID node_output_; - float input_min_, input_max_; }; } // namespace hexagon diff --git a/tensorflow/lite/delegates/hexagon/builders/transpose_builder.cc b/tensorflow/lite/delegates/hexagon/builders/transpose_builder.cc index 9ad0262fbc2..4a7304d011e 100644 --- a/tensorflow/lite/delegates/hexagon/builders/transpose_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/transpose_builder.cc @@ -39,15 +39,7 @@ TfLiteStatus TransposeOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); } - TF_LITE_ENSURE_STATUS( - ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_)); - auto* input_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_min_), sizeof(input_min_)); - auto* input_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&input_max_), sizeof(input_max_)); - // Min/max values for input tensor. - AddInput(TensorID(input_min_const->GetID(), 0)); - AddInput(TensorID(input_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, input_tensor)); // Hexagon outputs for this node. int output_batch_size, output_height_size, output_width_size, diff --git a/tensorflow/lite/delegates/hexagon/builders/transpose_builder.h b/tensorflow/lite/delegates/hexagon/builders/transpose_builder.h index 5f99087b935..f9418300b00 100644 --- a/tensorflow/lite/delegates/hexagon/builders/transpose_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/transpose_builder.h @@ -34,7 +34,6 @@ class TransposeOpBuilder : public OpBuilder { private: TensorID node_output_; - float input_min_, input_max_; }; } // namespace hexagon } // namespace delegates diff --git a/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.cc b/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.cc index d7d700b2e77..d2620f71007 100644 --- a/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.cc @@ -83,14 +83,6 @@ TfLiteStatus TransposeConv2dOpBuilder::PopulateSubGraph( int tensor_id = inputs->data[2]; const auto& data_tensor = context->tensors[tensor_id]; AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); - float data_min = 0; - float data_max = 0; - TF_LITE_ENSURE_STATUS( - ComputeMinAndMaxQuantValues(data_tensor, &data_min, &data_max)); - auto* data_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&data_min), sizeof(data_min)); - auto* data_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&data_max), sizeof(data_max)); // WEIGHTS. tensor_id = inputs->data[1]; @@ -150,8 +142,7 @@ TfLiteStatus TransposeConv2dOpBuilder::PopulateSubGraph( kScalarShape, reinterpret_cast(&weights_max), sizeof(weights_max)); // Min/max inputs for data & weights tensors. - AddInput(TensorID(data_min_const->GetID(), 0)); - AddInput(TensorID(data_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, data_tensor)); AddInput(TensorID(weights_min_const->GetID(), 0)); AddInput(TensorID(weights_max_const->GetID(), 0)); @@ -206,16 +197,8 @@ TfLiteStatus TransposeConv2dOpBuilder::PopulateSubGraph( AddInput(TensorID(bias_max_const->GetID(), 0)); // Output quantization. - float output_min = 0; - float output_max = 0; - ComputeMinAndMaxQuantValues(context->tensors[outputs->data[0]], &output_min, - &output_max); - auto* output_min_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&output_min), sizeof(output_min)); - auto* output_max_const = graph_builder_->AddConstNodeWithData( - kScalarShape, reinterpret_cast(&output_max), sizeof(output_max)); - AddInput(TensorID(output_min_const->GetID(), 0)); - AddInput(TensorID(output_max_const->GetID(), 0)); + TF_LITE_ENSURE_STATUS( + ComputeAndAddMinAndMax(context, context->tensors[outputs->data[0]])); // Channel scales, if this op is per-channel quantized. if (channel_scales_node_ != nullptr) { From 69ca56e7f41910bb330f4d353a04f36080e606fe Mon Sep 17 00:00:00 2001 From: Edward Loper Date: Tue, 28 Jul 2020 13:47:45 -0700 Subject: [PATCH 1521/2522] Update np.data.Dataset.as_numpy_iterator to support ragged tensors. PiperOrigin-RevId: 323645392 Change-Id: Id80daa55a676bfd523f23f72394acb79f5088fd5 --- .../python/data/kernel_tests/as_numpy_iterator_test.py | 10 ++++++---- tensorflow/python/data/ops/dataset_ops.py | 5 ++++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/data/kernel_tests/as_numpy_iterator_test.py b/tensorflow/python/data/kernel_tests/as_numpy_iterator_test.py index ea80389b0a5..a69e49439c4 100644 --- a/tensorflow/python/data/kernel_tests/as_numpy_iterator_test.py +++ b/tensorflow/python/data/kernel_tests/as_numpy_iterator_test.py @@ -27,7 +27,7 @@ from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import combinations from tensorflow.python.framework import constant_op from tensorflow.python.framework import sparse_tensor -from tensorflow.python.ops.ragged import ragged_tensor_value +from tensorflow.python.ops.ragged import ragged_factory_ops from tensorflow.python.platform import test @@ -74,9 +74,11 @@ class AsNumpyIteratorTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testRaggedElement(self): - self._testInvalidElement( - ragged_tensor_value.RaggedTensorValue( - np.array([0, 1, 2]), np.array([0, 1, 3], dtype=np.int64))) + lst = [[1, 2], [3], [4, 5, 6]] + rt = ragged_factory_ops.constant(lst) + ds = dataset_ops.Dataset.from_tensor_slices(rt) + for actual, expected in zip(ds.as_numpy_iterator(), lst): + self.assertTrue(np.array_equal(actual, expected)) @combinations.generate(test_base.eager_only_combinations()) def testDatasetElement(self): diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index bd75d0a735a..512cd2db90a 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -67,6 +67,7 @@ from tensorflow.python.ops import gen_io_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import script_ops from tensorflow.python.ops import string_ops +from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.training.tracking import base as tracking_base from tensorflow.python.training.tracking import tracking from tensorflow.python.util import deprecation @@ -522,7 +523,9 @@ class DatasetV2(collections_abc.Iterable, tracking_base.Trackable, raise RuntimeError("as_numpy_iterator() is not supported while tracing " "functions") for component_spec in nest.flatten(self.element_spec): - if not isinstance(component_spec, tensor_spec.TensorSpec): + if not isinstance( + component_spec, + (tensor_spec.TensorSpec, ragged_tensor.RaggedTensorSpec)): raise TypeError( "Dataset.as_numpy_iterator() does not support datasets containing " + str(component_spec.value_type)) From 9e475aa305ddab71752064d6e475fc8b20201028 Mon Sep 17 00:00:00 2001 From: Ayush Dubey Date: Tue, 28 Jul 2020 14:02:59 -0700 Subject: [PATCH 1522/2522] Ensure input shape validation always runs in ScopedAllocatorOptimizer. This change fixes a bug in the optimizer: the check for valid shape should always happen, irrespective of other checks. Also enable Adagrad optimizer in CTL correctness check which triggers this bug. PiperOrigin-RevId: 323648534 Change-Id: I2f785f6ca9e38ce63de12b6ceabfb7c405b341f7 --- .../optimizers/scoped_allocator_optimizer.cc | 12 ++++++------ .../python/keras/distribute/ctl_correctness_test.py | 6 ++++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc index 3b3a747fd18..a3328f12801 100644 --- a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc @@ -118,18 +118,18 @@ Status CheckTypesAndGetShapes(const GraphProperties& graph_properties, *type = props.dtype(); } else if (*type != props.dtype()) { return errors::Internal("Group ops don't all have same type"); - } else if (!TensorShape::IsValid(props.shape()) || - props.shape().unknown_rank()) { - // TensorShape::IsValid may return true if unknown_rank is True, i.e. - // number of dimensions is unknown. But for ScopedAllocatorOptimizer we - // need to know the shape fully. - return errors::Internal("Complete shape not known for ", n->name()); } if (*type != dtype) { return errors::Internal( "Type mismatch: type in op attr = ", DataTypeString(dtype), ", type in output props = ", DataTypeString(*type)); } + if (!TensorShape::IsValid(props.shape()) || props.shape().unknown_rank()) { + // TensorShape::IsValid may return true if unknown_rank is True, i.e. + // number of dimensions is unknown. But for ScopedAllocatorOptimizer we + // need to know the shape fully. + return errors::Internal("Complete shape not known for ", n->name()); + } VLOG(2) << "Adding shape " << props.shape().DebugString(); shapes->push_back(TensorShape(props.shape())); } diff --git a/tensorflow/python/keras/distribute/ctl_correctness_test.py b/tensorflow/python/keras/distribute/ctl_correctness_test.py index a55f80e4bf2..dcac3f37e71 100644 --- a/tensorflow/python/keras/distribute/ctl_correctness_test.py +++ b/tensorflow/python/keras/distribute/ctl_correctness_test.py @@ -234,8 +234,10 @@ class TestDistributionStrategyDnnCorrectness(test.TestCase, sync_batchnorm=[True, False]) + combinations.combine( distribution=strategy_combinations.multiworker_strategies, - optimizer_fn= - optimizer_combinations.gradient_descent_optimizer_keras_v2_fn, + optimizer_fn=[ + optimizer_combinations.gradient_descent_optimizer_keras_v2_fn, + optimizer_combinations.adagrad_optimizer_keras_v2_fn + ], mode=['eager'], iteration_type=['iterator', 'dataset'], inside_func=[False, True], From 33bee8e716868542efdc087000462f9a9c544e3a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Jul 2020 14:05:33 -0700 Subject: [PATCH 1523/2522] Fix the release notes message about compute capabilities. PiperOrigin-RevId: 323649173 Change-Id: Iaf6049ab4085c431d0a209faad1cb4e4dd331a53 --- RELEASE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index b248bae9a68..02550040c47 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -177,7 +177,7 @@ stjohnso98, , , , , * Update `tf.saved_model.SaveOptions` with [`experimental_io_device`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/saved_model/SaveOptions?hl=en) as arg with default value `None` to choose the I/O device for saving models and weights. * Mutable tables now restore checkpointed values when loaded from SavedModel. * GPU - * No longer includes PTX kernels for GPU except for sm_70 to reduce binary size. On systems with NVIDIA® Ampere GPUs (CUDA architecture 8.0) or newer, kernels are JIT-compiled from PTX and TensorFlow can take over 30 minutes to start up. This overhead can be limited to the first start up by increasing the default JIT cache size with: `export CUDA_CACHE_MAXSIZE=2147483648`.: + * TF 2.3 includes PTX kernels only for [compute capability](https://developer.nvidia.com/cuda-gpus) 7.0 to reduce the TF pip binary size. Earlier releases included PTX for a variety of older compute capabilities. * Others * Retain parent namescope for ops added inside `tf.while_loop`/`tf.cond`/`tf.switch_case`. * Update `tf.vectorized_map` to support vectorizing `tf.while_loop` and TensorList operations. From 916e0023b988d97a1456aedc3c60b223ce494b25 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Jul 2020 14:06:14 -0700 Subject: [PATCH 1524/2522] Prevent dereferencing nullptr in CopyIntoSparseTensor called with empty tensor. PiperOrigin-RevId: 323649311 Change-Id: I5f55bbd6a3c6be35a0ba9620e395e447e247e15e --- tensorflow/core/util/example_proto_helper.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/util/example_proto_helper.cc b/tensorflow/core/util/example_proto_helper.cc index 117991a2f64..bc6d0b896fb 100644 --- a/tensorflow/core/util/example_proto_helper.cc +++ b/tensorflow/core/util/example_proto_helper.cc @@ -155,11 +155,13 @@ int64 CopyIntoSparseTensor(const Tensor& in, const int batch, CHECK_EQ(dtype, values->dtype()); // Update indices. - auto ix_t = indices->matrix(); - int64* ix_p = &ix_t(offset, 0); - for (int64 i = 0; i < num_elements; ++i, ix_p += 2) { - *ix_p = batch; // Column 0 stores the batch entry - *(ix_p + 1) = i; // Column 1 stores the index in the batch + if (num_elements > 0) { + auto ix_t = indices->matrix(); + int64* ix_p = &ix_t(offset, 0); + for (int64 i = 0; i < num_elements; ++i, ix_p += 2) { + *ix_p = batch; // Column 0 stores the batch entry + *(ix_p + 1) = i; // Column 1 stores the index in the batch + } } // Copy values over. From 711e05bd78497ffa941cae0a3dca97e42f1825f3 Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Tue, 28 Jul 2020 14:16:41 -0700 Subject: [PATCH 1525/2522] Refactor `TpuCompilationCacheEntry` interface to return `TpuProgramGroupInterface` and `core_index` and makes CacheEntry less transparent and move application specific logics outside of cache. PiperOrigin-RevId: 323651431 Change-Id: Ia790cf3bc5b17fe9647ac93b960357cf48868efd --- tensorflow/core/tpu/kernels/BUILD | 7 +-- .../kernels/tpu_compilation_cache_entry.cc | 54 ------------------- .../tpu/kernels/tpu_compilation_cache_entry.h | 26 ++++----- 3 files changed, 15 insertions(+), 72 deletions(-) delete mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.cc diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index e5f49158231..31e64c77762 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -209,19 +209,14 @@ cc_library( cc_library( name = "tpu_compilation_cache_entry", - srcs = ["tpu_compilation_cache_entry.cc"], hdrs = [ "tpu_compilation_cache_entry.h", ], deps = [ - ":compiled_subgraph", - ":tpu_compilation_cache_proto_cc", ":tpu_executable_info_proto_cc", - ":tpu_program_group", + ":tpu_program_group_interface", "//tensorflow/compiler/xla/service:hlo_proto_cc", - "//tensorflow/core:framework", "//tensorflow/core/lib/core:refcount", - "//tensorflow/core/platform:casts", ], ) diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.cc deleted file mode 100644 index 73f55853306..00000000000 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.cc +++ /dev/null @@ -1,54 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" - -#include "tensorflow/core/platform/casts.h" - -namespace tensorflow { -namespace tpu { - -TpuCompilationCacheEntry::TpuCompilationCacheEntry( - const TpuProgramGroupInterface* tpu_program_group, int core_index) - : tpu_program_group_( - tensorflow::down_cast(tpu_program_group)), - core_index_(core_index) {} - -// Constructor for an empty entry. -TpuCompilationCacheEntry::TpuCompilationCacheEntry() - : tpu_program_group_(nullptr) {} - -const TPUExecutableInfoProto* TpuCompilationCacheEntry::get_executable_info() - const { - return &(tpu_program_group_->executable_info()); -} - -const TPUHostTransferInfoProto* -TpuCompilationCacheEntry::get_host_transfer_info() const { - return &(tpu_program_group_->host_transfer_info()); -} - -const xla::HloProto* TpuCompilationCacheEntry::get_hlo_metadata() const { - return tpu_program_group_->hlo_metadatas()[core_index_]; -} - -// TODO(henrytan,jiawenhao): When should we expect more than one -// XLA_TpuProgram* per TpuProgram? Remove the program_count CHECK below then. -const XLA_TpuProgram* TpuCompilationCacheEntry::get_tpu_program() const { - CHECK_EQ(tpu_program_group_->program_count(), 1); - return tpu_program_group_->tpu_programs()[core_index_]; -} - -} // namespace tpu -} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h index b3766b8b4dd..832d76bfceb 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h @@ -18,30 +18,32 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/core/lib/core/refcount.h" #include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" -#include "tensorflow/core/tpu/kernels/tpu_program_group.h" +#include "tensorflow/core/tpu/kernels/tpu_program_group_interface.h" namespace tensorflow { namespace tpu { -// A version of `CompilationCacheEntry` to access Tpu binary program -// `XLA_TpuProgram`. +// Cache entry to hold a `TpuProgramGroupInterface` object that can be used to +// fetch a TPU program for a given TPU core index. class TpuCompilationCacheEntry { public: explicit TpuCompilationCacheEntry( - const TpuProgramGroupInterface* tpu_program_group, int core_index); + const TpuProgramGroupInterface* tpu_program_group, int core_index) + : tpu_program_group_(tpu_program_group), core_index_(core_index) {} + // Constructor for an empty entry. - TpuCompilationCacheEntry(); - const TPUExecutableInfoProto* get_executable_info() const; - const TPUHostTransferInfoProto* get_host_transfer_info() const; - const xla::HloProto* get_hlo_metadata() const; - // TODO(henrytan): maybe nicer to return C++ wrapper of `XLA_TpuProgram` - const XLA_TpuProgram* get_tpu_program() const; + TpuCompilationCacheEntry() : tpu_program_group_(nullptr), core_index_(-1) {} + + const TpuProgramGroupInterface* tpu_program_group() const { + return tpu_program_group_; + } + + int core_index() const { return core_index_; } private: - const TpuProgramGroup* tpu_program_group_; + const TpuProgramGroupInterface* tpu_program_group_; int core_index_; }; - } // namespace tpu } // namespace tensorflow From a78f101f8e655948e442f1a471d296c9394a12e6 Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Tue, 28 Jul 2020 14:24:31 -0700 Subject: [PATCH 1526/2522] [XLA/GPU] Remove uses of BufferAssignment in nested computation lowering. NFC BufferAssignment doesn't really do intelligent work in nested computations, because all allocations are either constants or local allocas. Removing it helps with XLA/GPU -> LHLO migration. PiperOrigin-RevId: 323653069 Change-Id: I50acf0bdf07072102b145e5ffb387d1215f3887c --- .../xla/service/gpu/hlo_to_ir_bindings.cc | 38 +++++-------------- .../xla/service/gpu/hlo_to_ir_bindings.h | 2 +- .../xla/service/gpu/ir_emitter_nested.cc | 2 +- .../service/llvm_ir/buffer_assignment_util.cc | 9 +++-- .../service/llvm_ir/buffer_assignment_util.h | 3 ++ 5 files changed, 20 insertions(+), 34 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc index 986e7192859..5d38d1b727c 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc +++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc @@ -35,7 +35,7 @@ namespace gpu { using absl::StrAppend; using absl::StrCat; -Status HloToIrBindings::EmitBasePointersForHlos( +void HloToIrBindings::EmitBasePointersForHlos( absl::Span io_hlos, absl::Span non_io_hlos) { CHECK(is_nested_); @@ -77,44 +77,23 @@ Status HloToIrBindings::EmitBasePointersForHlos( continue; } - if (!buffer_assignment_->HasTopLevelAllocation(non_io_hlo)) { - continue; - } - - auto status = ShapeUtil::ForEachSubshapeWithStatus( + ShapeUtil::ForEachSubshape( non_io_hlo->shape(), [&](const Shape& /*subshape*/, const ShapeIndex& index) { - // A non-IO HLO with a buffer is bound to an alloca if it is - // thread-local. - auto slice_result = - buffer_assignment_->GetUniqueSlice(non_io_hlo, index); - if (!slice_result.ok()) { - return Status::OK(); - } - const BufferAllocation::Slice slice = - slice_result.ConsumeValueOrDie(); - if (slice.allocation()->is_thread_local()) { + if (non_io_hlo->opcode() == HloOpcode::kConstant) { + llvm::Value* global_for_constant = module_->getGlobalVariable( + llvm_ir::ConstantHloToGlobalName(*non_io_hlo)); + BindHloToIrValue(*non_io_hlo, global_for_constant); + } else { llvm::Type* pointee_type = llvm_ir::ShapeToIrType(non_io_hlo->shape(), module_); BindHloToIrValue(*non_io_hlo, llvm_ir::EmitAllocaAtFunctionEntry( pointee_type, /*name=*/"", b_), index); - } else if (slice.allocation()->is_constant()) { - llvm::Value* global_for_constant = module_->getGlobalVariable( - llvm_ir::ConstantBufferAllocationToGlobalName( - *slice.allocation())); - BindHloToIrValue(*non_io_hlo, global_for_constant); - } else { - return InternalError( - "Nested computation are not expected to take the temporary " - "buffer. All buffers are either constant or thread-local."); } - return Status::OK(); }); - TF_RETURN_IF_ERROR(status); } - return Status::OK(); } llvm::Value* HloToIrBindings::EmitGetTupleElement(const HloInstruction* gte, @@ -214,7 +193,8 @@ llvm_ir::IrArray HloToIrBindings::GetIrArray(const HloInstruction& hlo, // Therefore if hlo's output buffer is not modified within consumer, and if // consumer runs hlo only once (so that it doesn't create two different // outputs), then we can mark ir_array as invariant over the whole program. - if (BuffersInvariantWithinConsumer(hlo, consumer, buffer_assignment_)) { + if (!is_nested_ && + BuffersInvariantWithinConsumer(hlo, consumer, buffer_assignment_)) { VLOG(2) << "Marking " << hlo.name() << " as invariant within " << consumer.name(); ir_array.MarkInvariantOverWholeProgram(&module_->getContext()); diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h index c059d9f19d7..5eef6727801 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h +++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h @@ -43,7 +43,7 @@ class HloToIrBindings { b_(b), module_(llvm_module) {} - Status EmitBasePointersForHlos( + void EmitBasePointersForHlos( absl::Span io_hlos, absl::Span non_io_hlos); diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc index e85a71a43cf..e96c5f05e60 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc @@ -104,7 +104,7 @@ Status IrEmitterNested::CodegenNestedComputation() { non_io_hlos.push_back(hlo); } } - TF_RETURN_IF_ERROR(bindings_.EmitBasePointersForHlos(io_hlos, non_io_hlos)); + bindings_.EmitBasePointersForHlos(io_hlos, non_io_hlos); TF_RETURN_IF_ERROR(nested_computation_.root_instruction()->Accept(this)); b_.SetInsertPoint(ret_instr); diff --git a/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.cc b/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.cc index f96c985da71..33121635b0b 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.cc @@ -54,9 +54,7 @@ string SanitizeConstantName(const HloInstruction& instr) { return instr_name; } -string ConstantBufferAllocationToGlobalName( - const BufferAllocation& allocation) { - const HloInstruction& instr = InstrForConstantBufferAllocation(allocation); +string ConstantHloToGlobalName(const HloInstruction& instr) { string instr_name = instr.name(); // Check that names are sanitized and stored in the HLO instructions // before constant buffer allocation. @@ -64,6 +62,11 @@ string ConstantBufferAllocationToGlobalName( return absl::StrCat("buffer_for_", instr_name); } +string ConstantBufferAllocationToGlobalName( + const BufferAllocation& allocation) { + return ConstantHloToGlobalName(InstrForConstantBufferAllocation(allocation)); +} + const Literal& LiteralForConstantAllocation( const BufferAllocation& allocation) { return InstrForConstantBufferAllocation(allocation).literal(); diff --git a/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h b/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h index 03e98a66900..2e2d3bf0b48 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h +++ b/tensorflow/compiler/xla/service/llvm_ir/buffer_assignment_util.h @@ -24,6 +24,9 @@ namespace llvm_ir { // name of the corresponding constant buffer. In particular, it replaces . and // - with _. string SanitizeConstantName(const HloInstruction& instr); + +string ConstantHloToGlobalName(const HloInstruction& instr); + // In XLA:GPU we map constant buffer allocations to globals in the generated // LLVM IR. This function gives us the name of the global variable a constant // buffer is mapped to. Not used on XLA:CPU. From dd13d3b4c6851016037dd7372729d33109b43dd6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Jul 2020 14:34:30 -0700 Subject: [PATCH 1527/2522] add a metric to track bfc allocator delay. PiperOrigin-RevId: 323655357 Change-Id: Ic4be5e5af5634e2cb1be983f7f46f7f42cc72e36 --- tensorflow/core/common_runtime/BUILD | 2 ++ .../core/common_runtime/allocator_retry.cc | 27 +++++++++++++++++++ tensorflow/core/framework/metrics.cc | 12 +++++++++ tensorflow/core/framework/metrics.h | 3 +++ 4 files changed, 44 insertions(+) diff --git a/tensorflow/core/common_runtime/BUILD b/tensorflow/core/common_runtime/BUILD index 71815118bca..2dbcfdbee38 100644 --- a/tensorflow/core/common_runtime/BUILD +++ b/tensorflow/core/common_runtime/BUILD @@ -1671,6 +1671,7 @@ cc_library( deps = [ ":shared_counter", "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", @@ -1678,6 +1679,7 @@ cc_library( "//tensorflow/core/profiler/lib:traceme", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", ], ) diff --git a/tensorflow/core/common_runtime/allocator_retry.cc b/tensorflow/core/common_runtime/allocator_retry.cc index 3402b7fd919..966fcd1d36d 100644 --- a/tensorflow/core/common_runtime/allocator_retry.cc +++ b/tensorflow/core/common_runtime/allocator_retry.cc @@ -14,6 +14,9 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/common_runtime/allocator_retry.h" + +#include "absl/types/optional.h" +#include "tensorflow/core/framework/metrics.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/mutex.h" @@ -21,6 +24,28 @@ limitations under the License. namespace tensorflow { +namespace { +class ScopedTimeTracker { + public: + explicit ScopedTimeTracker(Env* env) : env_(env) {} + void Enable() { + if (!start_us_) { // Only override start_us when not set yet. + start_us_ = env_->NowMicros(); + } + } + ~ScopedTimeTracker() { + if (start_us_) { + uint64 end_us = env_->NowMicros(); + metrics::UpdateBfcAllocatorDelayTime(end_us - *start_us_); + } + } + + private: + Env* env_; + absl::optional start_us_; +}; +} // namespace + AllocatorRetry::AllocatorRetry() : env_(Env::Default()) {} void* AllocatorRetry::AllocateRaw( @@ -31,6 +56,7 @@ void* AllocatorRetry::AllocateRaw( if (num_bytes == 0) { return nullptr; } + ScopedTimeTracker tracker(env_); uint64 deadline_micros = 0; bool first = true; void* ptr = nullptr; @@ -43,6 +69,7 @@ void* AllocatorRetry::AllocateRaw( first = false; } if (now < deadline_micros) { + tracker.Enable(); mutex_lock l(mu_); WaitForMilliseconds(&l, &memory_returned_, (deadline_micros - now) / 1000); diff --git a/tensorflow/core/framework/metrics.cc b/tensorflow/core/framework/metrics.cc index 738863f3646..8cbfcd5342a 100644 --- a/tensorflow/core/framework/metrics.cc +++ b/tensorflow/core/framework/metrics.cc @@ -148,6 +148,11 @@ auto* mlir_import_failure_count = monitoring::Counter<0>::New( "/tensorflow/mlir/import_failure_count", "The number of jobs that failed during mlir import or verification."); +auto* bfc_allocator_delay = + monitoring::Counter<0>::New("/tensorflow/core/bfc_allocator_delay", + "The total time spent running each graph " + "optimization pass in microseconds."); + } // namespace void RecordTFDataAutotune(const string& name) { @@ -274,6 +279,13 @@ void UpdateXlaCompilationTime(const uint64 compilation_time_usecs) { } } +void UpdateBfcAllocatorDelayTime(const uint64 delay_usecs) { + static auto* bfc_allocator_delay_cell = bfc_allocator_delay->GetCell(); + if (delay_usecs > 0) { + bfc_allocator_delay_cell->IncrementBy(delay_usecs); + } +} + void IncrementMLIRImportFailureCount() { static auto* mlir_import_failure_count_cell = mlir_import_failure_count->GetCell(); diff --git a/tensorflow/core/framework/metrics.h b/tensorflow/core/framework/metrics.h index 263fde272ab..7bc9a1bda0b 100644 --- a/tensorflow/core/framework/metrics.h +++ b/tensorflow/core/framework/metrics.h @@ -120,6 +120,9 @@ void UpdateGrapplerPassTime(const string& pass_name, // Updates the metrics stored about time XLA spents compiling graphs. void UpdateXlaCompilationTime(const uint64 compilation_time_usecs); +// Updates the metrics stored about time BFC allocator spents during delay. +void UpdateBfcAllocatorDelayTime(const uint64 delay_usecs); + // Increment the number of jobs that failed during import to mlir. void IncrementMLIRImportFailureCount(); From f6e74335831e8139175ac376ca9522efce8ea2f9 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 17:39:01 -0400 Subject: [PATCH 1528/2522] Update flatbuffer_import.cc --- tensorflow/compiler/mlir/lite/flatbuffer_import.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_import.cc b/tensorflow/compiler/mlir/lite/flatbuffer_import.cc index 29484fabbea..719f3c6ca79 100644 --- a/tensorflow/compiler/mlir/lite/flatbuffer_import.cc +++ b/tensorflow/compiler/mlir/lite/flatbuffer_import.cc @@ -342,6 +342,7 @@ StatusOr ConvertFloatBuffer( values.reserve(elem_count); const char* data = reinterpret_cast(buffer.data()); + for (int i = 0; i < elem_count; i++) { uint32_t bit_repr = llvm::support::endian::readNext Date: Tue, 28 Jul 2020 14:38:29 -0700 Subject: [PATCH 1529/2522] Revert TF Micro support for embedded build using proxy macros. PiperOrigin-RevId: 323656191 Change-Id: I5940e6ff40e3019372d2cf73265a02f47f8cc0c1 --- tensorflow/lite/BUILD | 6 -- tensorflow/lite/c/BUILD | 5 -- tensorflow/lite/core/api/BUILD | 3 +- tensorflow/lite/kernels/BUILD | 18 ++---- tensorflow/lite/kernels/hashtable/BUILD | 2 - tensorflow/lite/kernels/internal/BUILD | 63 +++++++------------ tensorflow/lite/micro/BUILD | 12 ---- tensorflow/lite/micro/build_def.bzl | 23 ------- .../lite/micro/examples/hello_world/BUILD | 2 - tensorflow/lite/micro/kernels/BUILD | 7 --- tensorflow/lite/micro/memory_planner/BUILD | 3 - tensorflow/lite/schema/BUILD | 3 +- tensorflow/lite/tools/optimize/sparsity/BUILD | 2 - tensorflow/lite/tools/signature/BUILD | 1 - 14 files changed, 32 insertions(+), 118 deletions(-) diff --git a/tensorflow/lite/BUILD b/tensorflow/lite/BUILD index 61b9972c4d9..fac85181231 100644 --- a/tensorflow/lite/BUILD +++ b/tensorflow/lite/BUILD @@ -1,6 +1,5 @@ load("//tensorflow:tensorflow.bzl", "if_not_windows", "tf_cc_test") load("//tensorflow/lite:build_def.bzl", "if_tflite_experimental_runtime", "tflite_cc_shared_object", "tflite_copts", "tflite_experimental_runtime_linkopts") -load("//tensorflow/lite/micro:build_def.bzl", "cc_library") load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite") package( @@ -83,7 +82,6 @@ FRAMEWORK_LIB_HDRS = [ cc_library( name = "version", hdrs = ["version.h"], - build_for_embedded = True, copts = TFLITE_DEFAULT_COPTS, # Note that we only use the header defines from :version_lib. deps = ["//tensorflow/core:version_lib"], @@ -139,7 +137,6 @@ cc_library( name = "external_cpu_backend_context", srcs = ["external_cpu_backend_context.cc"], hdrs = ["external_cpu_backend_context.h"], - build_for_embedded = True, copts = TFLITE_DEFAULT_COPTS, deps = [ "//tensorflow/lite/c:common", @@ -193,7 +190,6 @@ cc_library( hdrs = [ "string_type.h", ], - build_for_embedded = True, copts = TFLITE_DEFAULT_COPTS, ) @@ -309,7 +305,6 @@ cc_library( name = "string_util", srcs = ["string_util.cc"], hdrs = ["string_util.h"], - build_for_embedded = True, copts = TFLITE_DEFAULT_COPTS, deps = [ ":string", @@ -620,7 +615,6 @@ cc_library( cc_library( name = "type_to_tflitetype", hdrs = ["type_to_tflitetype.h"], - build_for_embedded = True, deps = ["//tensorflow/lite/c:common"], ) diff --git a/tensorflow/lite/c/BUILD b/tensorflow/lite/c/BUILD index 366b43336b9..bdf86d7904f 100644 --- a/tensorflow/lite/c/BUILD +++ b/tensorflow/lite/c/BUILD @@ -3,10 +3,6 @@ load( "tflite_cc_shared_object", "tflite_copts", ) -load( - "//tensorflow/lite/micro:build_def.bzl", - "cc_library", -) package( default_visibility = ["//visibility:public"], @@ -126,7 +122,6 @@ cc_library( "builtin_op_data.h", "common.h", ], - build_for_embedded = True, alwayslink = 1, ) diff --git a/tensorflow/lite/core/api/BUILD b/tensorflow/lite/core/api/BUILD index 97a3d3f78de..a1e6fc41cd9 100644 --- a/tensorflow/lite/core/api/BUILD +++ b/tensorflow/lite/core/api/BUILD @@ -1,5 +1,5 @@ load("//tensorflow/lite:build_def.bzl", "tflite_copts") -load("//tensorflow/lite/micro:build_def.bzl", "cc_library", "micro_copts") +load("//tensorflow/lite/micro:build_def.bzl", "micro_copts") package( default_visibility = ["//visibility:public"], @@ -21,7 +21,6 @@ cc_library( "profiler.h", "tensor_utils.h", ], - build_for_embedded = True, copts = tflite_copts() + micro_copts(), deps = [ "@flatbuffers//:runtime_cc", diff --git a/tensorflow/lite/kernels/BUILD b/tensorflow/lite/kernels/BUILD index e9ac9110869..a56d370afeb 100644 --- a/tensorflow/lite/kernels/BUILD +++ b/tensorflow/lite/kernels/BUILD @@ -1,5 +1,5 @@ load("//tensorflow/lite:build_def.bzl", "tflite_copts") -load("//tensorflow/lite/micro:build_def.bzl", "cc_library", "micro_copts") +load("//tensorflow/lite/micro:build_def.bzl", "micro_copts") load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite_combined") load("//tensorflow:tensorflow.bzl", "tf_opts_nortti_if_android") @@ -281,7 +281,6 @@ cc_test( cc_library( name = "tflite_with_ruy_enabled", - build_for_embedded = True, defines = ["TFLITE_WITH_RUY"], visibility = ["//visibility:private"], ) @@ -297,8 +296,8 @@ cc_library( cc_library( name = "tflite_with_ruy_default", - build_for_embedded = True, - select_deps = { + visibility = ["//visibility:private"], + deps = select({ ":chromiumos_arm64": [":tflite_with_ruy_enabled"], ":cpu_aarch64": [":tflite_with_ruy_enabled"], ":cpu_arm64": [":tflite_with_ruy_enabled"], @@ -308,18 +307,16 @@ cc_library( ":cpu_arm64_v8a": [":tflite_with_ruy_enabled"], "//tensorflow:android_arm": ["tflite_with_ruy_enabled"], "//conditions:default": [], - }, - visibility = ["//visibility:private"], + }), ) cc_library( name = "tflite_with_ruy", - build_for_embedded = True, - select_deps = { + deps = select({ ":tflite_with_ruy_explicit_true": [":tflite_with_ruy_enabled"], ":tflite_with_ruy_explicit_false": [], "//conditions:default": [":tflite_with_ruy_default"], - }, + }), ) cc_library( @@ -432,7 +429,6 @@ cc_library( hdrs = [ "op_macros.h", ], - build_for_embedded = True, copts = tflite_copts(), deps = ["//tensorflow/lite/micro:debug_log"], ) @@ -445,7 +441,6 @@ cc_library( hdrs = [ "kernel_util.h", ], - build_for_embedded = True, copts = tflite_copts() + micro_copts(), deps = [ "//tensorflow/lite/c:common", @@ -501,7 +496,6 @@ cc_library( name = "padding", srcs = [], hdrs = ["padding.h"], - build_for_embedded = True, copts = tflite_copts(), deps = [ "//tensorflow/lite/c:common", diff --git a/tensorflow/lite/kernels/hashtable/BUILD b/tensorflow/lite/kernels/hashtable/BUILD index d141abf4f95..73f6247a05e 100644 --- a/tensorflow/lite/kernels/hashtable/BUILD +++ b/tensorflow/lite/kernels/hashtable/BUILD @@ -1,5 +1,3 @@ -load("//tensorflow/lite/micro:build_def.bzl", "cc_library") - package( default_visibility = [ "//visibility:public", diff --git a/tensorflow/lite/kernels/internal/BUILD b/tensorflow/lite/kernels/internal/BUILD index f07918f05dc..2707871df16 100644 --- a/tensorflow/lite/kernels/internal/BUILD +++ b/tensorflow/lite/kernels/internal/BUILD @@ -1,6 +1,6 @@ load("//tensorflow:tensorflow.bzl", "transitive_hdrs") load("//tensorflow/lite:build_def.bzl", "tflite_copts") -load("//tensorflow/lite/micro:build_def.bzl", "cc_library", "micro_copts") +load("//tensorflow/lite/micro:build_def.bzl", "micro_copts") load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite_combined") package( @@ -46,7 +46,6 @@ NEON_FLAGS_IF_APPLICABLE = select({ cc_library( name = "compatibility", hdrs = ["compatibility.h"], - build_for_embedded = True, copts = tflite_copts(), deps = [ "//tensorflow/lite/kernels:op_macros", @@ -56,7 +55,6 @@ cc_library( cc_library( name = "types", hdrs = ["types.h"], - build_for_embedded = True, copts = tflite_copts(), deps = [ ":compatibility", @@ -66,7 +64,6 @@ cc_library( cc_library( name = "legacy_types", hdrs = ["legacy_types.h"], - build_for_embedded = True, copts = tflite_copts(), deps = [ ":types", @@ -221,7 +218,6 @@ cc_library( name = "common", srcs = [], hdrs = ["common.h"], - build_for_embedded = True, copts = tflite_copts(), deps = [ ":cppmath", @@ -375,7 +371,6 @@ cc_library( "max.h", "min.h", ], - build_for_embedded = True, copts = tflite_copts(), ) @@ -383,7 +378,6 @@ cc_library( name = "quantization_util", srcs = ["quantization_util.cc"], hdrs = ["quantization_util.h"], - build_for_embedded = True, copts = tflite_copts() + micro_copts(), deps = [ ":compatibility", @@ -411,7 +405,6 @@ cc_library( hdrs = [ "transpose_utils.h", ], - build_for_embedded = True, copts = tflite_copts(), deps = [ ":types", @@ -433,7 +426,6 @@ cc_library( hdrs = [ "strided_slice_logic.h", ], - build_for_embedded = True, copts = tflite_copts(), deps = [ ":compatibility", @@ -500,20 +492,7 @@ cc_library( "reference/sparse_ops/fully_connected.h", ], }), - build_for_embedded = True, copts = tflite_copts(), - select_deps = { - ":haswell": tflite_deps_intel, - ":ios_x86_64": tflite_deps_intel, - ":k8": tflite_deps_intel, - ":x86": tflite_deps_intel, - ":x86_64": tflite_deps_intel, - ":darwin": tflite_deps_intel, - ":darwin_x86_64": tflite_deps_intel, - ":freebsd": tflite_deps_intel, - ":windows": tflite_deps_intel, - "//conditions:default": [], - }, deps = [ ":common", ":compatibility", @@ -524,14 +503,25 @@ cc_library( ":tensor", ":tensor_utils", ":types", + "//third_party/eigen3", + "@gemmlowp//:fixedpoint", + "@ruy//ruy/profiler:instrumentation", "//tensorflow/lite:string_util", "//tensorflow/lite/c:common", "//tensorflow/lite/kernels:op_macros", "//tensorflow/lite/tools/optimize/sparsity:format_converter", - "//third_party/eigen3", - "@gemmlowp//:fixedpoint", - "@ruy//ruy/profiler:instrumentation", - ], + ] + select({ + ":haswell": tflite_deps_intel, + ":ios_x86_64": tflite_deps_intel, + ":k8": tflite_deps_intel, + ":x86": tflite_deps_intel, + ":x86_64": tflite_deps_intel, + ":darwin": tflite_deps_intel, + ":darwin_x86_64": tflite_deps_intel, + ":freebsd": tflite_deps_intel, + ":windows": tflite_deps_intel, + "//conditions:default": [], + }), ) cc_library( @@ -610,7 +600,6 @@ cc_library( "tensor.h", "tensor_ctypes.h", ], - build_for_embedded = True, copts = tflite_copts(), deps = [ ":types", @@ -704,7 +693,6 @@ cc_library( name = "kernel_utils", srcs = ["kernel_utils.cc"], hdrs = ["kernel_utils.h"], - build_for_embedded = True, copts = tflite_copts() + micro_copts(), deps = [ ":tensor_utils", @@ -742,9 +730,12 @@ cc_library( hdrs = [ "tensor_utils.h", ], - build_for_embedded = True, copts = tflite_copts() + NEON_FLAGS_IF_APPLICABLE, - select_deps = { + deps = [ + ":cpu_check", + "//third_party/eigen3", + "//tensorflow/lite/c:common", + ] + select({ ":aarch64": [ ":neon_tensor_utils", ], @@ -808,12 +799,7 @@ cc_library( "//conditions:default": [ ":portable_tensor_utils", ], - }, - deps = [ - ":cpu_check", - "//tensorflow/lite/c:common", - "//third_party/eigen3", - ], + }), ) cc_library( @@ -1071,9 +1057,8 @@ cc_library( "optimized/neon_check.h", "optimized/sse_check.h", ], - build_for_embedded = True, copts = tflite_copts(), - select_deps = { + deps = select({ ":haswell": tflite_deps_intel, ":ios_x86_64": tflite_deps_intel, ":k8": tflite_deps_intel, @@ -1084,7 +1069,7 @@ cc_library( ":freebsd": tflite_deps_intel, ":windows": tflite_deps_intel, "//conditions:default": [], - }, + }), ) cc_test( diff --git a/tensorflow/lite/micro/BUILD b/tensorflow/lite/micro/BUILD index 803c7718e77..9b3d0d623cc 100644 --- a/tensorflow/lite/micro/BUILD +++ b/tensorflow/lite/micro/BUILD @@ -4,7 +4,6 @@ load( ) load( "//tensorflow/lite/micro:build_def.bzl", - "cc_library", "micro_copts", ) @@ -23,7 +22,6 @@ cc_library( hdrs = [ "compatibility.h", ], - build_for_embedded = True, copts = micro_copts(), ) @@ -41,7 +39,6 @@ cc_library( "micro_optional_debug_tools.h", "simple_memory_allocator.h", ], - build_for_embedded = True, copts = micro_copts(), deps = [ ":memory_helpers", @@ -81,7 +78,6 @@ cc_library( hdrs = [ "test_helpers.h", ], - build_for_embedded = True, copts = micro_copts(), deps = [ ":micro_utils", @@ -106,7 +102,6 @@ cc_library( "micro_mutable_op_resolver.h", "micro_op_resolver.h", ], - build_for_embedded = True, copts = micro_copts(), deps = [ ":micro_compatibility", @@ -127,7 +122,6 @@ cc_library( hdrs = [ "debug_log.h", ], - build_for_embedded = True, copts = micro_copts(), ) @@ -139,7 +133,6 @@ cc_library( hdrs = [ "micro_error_reporter.h", ], - build_for_embedded = True, copts = micro_copts(), deps = [ ":debug_log", @@ -157,7 +150,6 @@ cc_library( hdrs = [ "micro_string.h", ], - build_for_embedded = True, copts = micro_copts(), deps = ["//tensorflow/lite/c:common"], ) @@ -170,7 +162,6 @@ cc_library( hdrs = [ "micro_time.h", ], - build_for_embedded = True, copts = micro_copts(), deps = ["//tensorflow/lite/c:common"], ) @@ -183,7 +174,6 @@ cc_library( hdrs = [ "micro_profiler.h", ], - build_for_embedded = True, copts = micro_copts(), deps = [ ":micro_compatibility", @@ -201,7 +191,6 @@ cc_library( hdrs = [ "micro_utils.h", ], - build_for_embedded = True, copts = micro_copts(), deps = [ "//tensorflow/lite/c:common", @@ -220,7 +209,6 @@ cc_library( "recording_micro_interpreter.h", "recording_simple_memory_allocator.h", ], - build_for_embedded = True, copts = micro_copts(), deps = [ ":micro_compatibility", diff --git a/tensorflow/lite/micro/build_def.bzl b/tensorflow/lite/micro/build_def.bzl index ef37c92d9cd..edca4cb5cea 100644 --- a/tensorflow/lite/micro/build_def.bzl +++ b/tensorflow/lite/micro/build_def.bzl @@ -1,25 +1,2 @@ -load( - "@rules_cc//cc:defs.bzl", - _cc_library = "cc_library", -) -load( - "@flatbuffers//:build_defs.bzl", - _flatbuffer_cc_library = "flatbuffer_cc_library", -) - def micro_copts(): return [] - -def cc_library(**kwargs): - kwargs.pop("build_for_embedded", False) - if "select_deps" in kwargs.keys(): - select_deps = kwargs.pop("select_deps", {}) - if "deps" in kwargs.keys(): - kwargs["deps"] += select(select_deps) - else: - kwargs["deps"] = select(select_deps) - _cc_library(**kwargs) - -def flatbuffer_cc_library(**kwargs): - kwargs.pop("build_for_embedded", False) - _flatbuffer_cc_library(**kwargs) diff --git a/tensorflow/lite/micro/examples/hello_world/BUILD b/tensorflow/lite/micro/examples/hello_world/BUILD index 8762e9d2f3e..b5541f15fa9 100644 --- a/tensorflow/lite/micro/examples/hello_world/BUILD +++ b/tensorflow/lite/micro/examples/hello_world/BUILD @@ -7,7 +7,6 @@ load( ) load( "//tensorflow/lite/micro:build_def.bzl", - "cc_library", "micro_copts", ) @@ -23,7 +22,6 @@ cc_library( hdrs = [ "model.h", ], - build_for_embedded = True, copts = micro_copts(), ) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index a58b93f79a3..b25aca02bb6 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -4,7 +4,6 @@ load( ) load( "//tensorflow/lite/micro:build_def.bzl", - "cc_library", "micro_copts", ) @@ -93,8 +92,6 @@ cc_library( ], }), hdrs = ["micro_ops.h"], - # TODO(b/153609488): enable embedded build once we can properly support it. - #build_for_embedded = True, copts = micro_copts(), visibility = [ # Needed for micro:op_resolvers but visibility can not be finer-grained @@ -460,7 +457,6 @@ tflite_micro_cc_test( cc_library( name = "activation_utils", hdrs = ["activation_utils.h"], - build_for_embedded = True, deps = [ "//tensorflow/lite/c:common", "//tensorflow/lite/kernels/internal:cppmath", @@ -503,7 +499,6 @@ cc_library( "kernel_runner.cc", ], hdrs = ["kernel_runner.h"], - build_for_embedded = True, deps = [ "//tensorflow/lite/c:common", "//tensorflow/lite/kernels/internal:compatibility", @@ -517,7 +512,6 @@ cc_library( "kernel_util.cc", ], hdrs = ["kernel_util.h"], - build_for_embedded = True, deps = [ "//tensorflow/lite/c:common", "//tensorflow/lite/kernels/internal:types", @@ -527,7 +521,6 @@ cc_library( cc_library( name = "micro_utils", hdrs = ["micro_utils.h"], - build_for_embedded = True, ) tflite_micro_cc_test( diff --git a/tensorflow/lite/micro/memory_planner/BUILD b/tensorflow/lite/micro/memory_planner/BUILD index 9e53fb1f874..a674f075cb6 100644 --- a/tensorflow/lite/micro/memory_planner/BUILD +++ b/tensorflow/lite/micro/memory_planner/BUILD @@ -4,7 +4,6 @@ load( ) load( "//tensorflow/lite/micro:build_def.bzl", - "cc_library", "micro_copts", ) @@ -18,7 +17,6 @@ cc_library( hdrs = [ "memory_planner.h", ], - build_for_embedded = True, copts = micro_copts(), deps = [ "//tensorflow/lite/c:common", @@ -50,7 +48,6 @@ cc_library( hdrs = [ "greedy_memory_planner.h", ], - build_for_embedded = True, copts = micro_copts(), deps = [ ":memory_planner", diff --git a/tensorflow/lite/schema/BUILD b/tensorflow/lite/schema/BUILD index 33e7eec8421..0bbb2d5e95d 100644 --- a/tensorflow/lite/schema/BUILD +++ b/tensorflow/lite/schema/BUILD @@ -1,6 +1,6 @@ load("//tensorflow:tensorflow.bzl", "py_test") load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite") -load("//tensorflow/lite/micro:build_def.bzl", "flatbuffer_cc_library") +load("@flatbuffers//:build_defs.bzl", "flatbuffer_cc_library") package( default_visibility = [ @@ -64,7 +64,6 @@ exports_files([ flatbuffer_cc_library( name = "schema_fbs", srcs = ["schema.fbs"], - build_for_embedded = True, ) # Generic schema for flatbuffer converter (but with mutable makes bigger). diff --git a/tensorflow/lite/tools/optimize/sparsity/BUILD b/tensorflow/lite/tools/optimize/sparsity/BUILD index 4ea901f77f9..b68094849c1 100644 --- a/tensorflow/lite/tools/optimize/sparsity/BUILD +++ b/tensorflow/lite/tools/optimize/sparsity/BUILD @@ -1,5 +1,4 @@ load("//tensorflow/lite:build_def.bzl", "tflite_copts") -load("//tensorflow/lite/micro:build_def.bzl", "cc_library") package( default_visibility = [ @@ -12,7 +11,6 @@ cc_library( name = "format_converter", srcs = ["format_converter.cc"], hdrs = ["format_converter.h"], - build_for_embedded = True, copts = tflite_copts(), deps = [ "//tensorflow/lite/c:common", diff --git a/tensorflow/lite/tools/signature/BUILD b/tensorflow/lite/tools/signature/BUILD index cf28b2eab72..05fc106d759 100644 --- a/tensorflow/lite/tools/signature/BUILD +++ b/tensorflow/lite/tools/signature/BUILD @@ -2,7 +2,6 @@ load("//tensorflow:tensorflow.bzl", "pybind_extension") load("//tensorflow:tensorflow.bzl", "if_not_windows") load("//tensorflow/lite:build_def.bzl", "tflite_copts") -load("//tensorflow/lite/micro:build_def.bzl", "cc_library") load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite") package( From 11f56a157c159f51314e581e5b2027de52e291f4 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Tue, 28 Jul 2020 14:45:48 -0700 Subject: [PATCH 1530/2522] Add upload path to BigQuery table PiperOrigin-RevId: 323657731 Change-Id: I12b5f81cfb2e4a8fd99482ae5e31de5d3951c38e --- tensorflow/tools/ci_build/sizetrack_helper.py | 59 +++++++++++-------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/tensorflow/tools/ci_build/sizetrack_helper.py b/tensorflow/tools/ci_build/sizetrack_helper.py index 85bfa125828..cc460806187 100755 --- a/tensorflow/tools/ci_build/sizetrack_helper.py +++ b/tensorflow/tools/ci_build/sizetrack_helper.py @@ -88,7 +88,8 @@ parser.add_argument( type=str, help="Unique ID for your artifact, used for sorting dashboards.") parser.add_argument( - "-n", "--dry_run", + "-n", + "--dry_run", action="store_true", help="Dry run: do not load to BigQuery or upload to GCS.") parser.add_argument( @@ -138,6 +139,7 @@ SCHEMA = ",".join([ "bytes:int64", "team:string", "logged_date:timestamp", + "uploaded_to:string", ]) # Select the earliest recorded commit in the same table for the same artifact # and team. Used to determine the full range of tested commits for each @@ -266,15 +268,33 @@ def get_all_tested_commits(): return [""] * 6 +def get_upload_path(): + """Generate URL for 'gsutil cp'.""" + if FLAGS.upload and FLAGS.artifact: + head_info = git_pretty("HEAD", PRETTY_HEAD_INFO, n=1) + _, current_cl, _, _, _, _, _ = head_info[0].split("\t") + artifact_filename = os.path.basename(FLAGS.artifact.name) + # note: not os.path.join here, because gsutil is always linux-style + path = "{bucket}/{team}/{artifact_id}/{cl}.{artifact_filename}".format( + bucket=FLAGS.bucket, + team=FLAGS.team, + artifact_id=FLAGS.artifact_id, + cl=current_cl, + artifact_filename=artifact_filename) + return path + else: + return "" + + def build_row(): """Assemble one row of data about this artifact.""" - (earliest_commit, early_cl, early_author_date, - early_commit_date, all_commits, all_changelists) = get_all_tested_commits() + (earliest_commit, early_cl, early_author_date, early_commit_date, all_commits, + all_changelists) = get_all_tested_commits() # Use UTC to make sure machines in different timezones load consistent data current_time = datetime.datetime.now(datetime.timezone.utc).isoformat() - artifact_filename = ("NO_FILE" if not FLAGS.artifact - else os.path.basename(FLAGS.artifact.name)) + artifact_filename = ("NO_FILE" if not FLAGS.artifact else os.path.basename( + FLAGS.artifact.name)) size_bytes = FLAGS.manual_bytes or os.path.getsize(FLAGS.artifact.name) head_info = git_pretty("HEAD", PRETTY_HEAD_INFO, n=1) all_head_info_items = head_info[0].split("\t") @@ -291,6 +311,7 @@ def build_row(): size_bytes, FLAGS.team, current_time, + get_upload_path(), ] @@ -305,13 +326,20 @@ def main(): print( "--team and --artifact_id are required if --print_schema is not " "specified.\nYou must also specify one of --artifact or --manual_bytes." - "\nPass -h or --help for usage." - ) + "\nPass -h or --help for usage.") exit(1) # Generate data about this artifact into a Tab Separated Value file next_tsv_row = build_row() + # Upload artifact into GCS if it exists + if FLAGS.upload and FLAGS.artifact: + upload_path = get_upload_path() + if FLAGS.dry_run: + print("DRY RUN: Would gsutil cp to:\n{}".format(upload_path)) + else: + gcloud("gsutil", ["cp", FLAGS.artifact.name, upload_path]) + # Load into BigQuery if FLAGS.dry_run: print("DRY RUN: Generated this TSV row:") @@ -325,23 +353,6 @@ def main(): "--field_delimiter", "tab", PROJECT_LEVEL_TABLE_NAME, "data.tsv", SCHEMA ]) - # Upload artifact into GCS if it exists - if FLAGS.upload and FLAGS.artifact: - head_info = git_pretty("HEAD", PRETTY_HEAD_INFO, n=1) - _, current_cl, _, _, _, _, _ = head_info[0].split("\t") - artifact_filename = os.path.basename(FLAGS.artifact.name) - # note: not os.path.join here, because gsutil is always linux-style - path = "{bucket}/{team}/{artifact_id}/{cl}.{artifact_filename}".format( - bucket=FLAGS.bucket, - team=FLAGS.team, - artifact_id=FLAGS.artifact_id, - cl=current_cl, - artifact_filename=artifact_filename) - if FLAGS.dry_run: - print("DRY RUN: Would gsutil cp to:\n{}".format(path)) - else: - gcloud("gsutil", ["cp", FLAGS.artifact, path]) - if __name__ == "__main__": main() From e6237e14b8fbddab19cfac75a8842fa1d9d9097f Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 18:00:31 -0400 Subject: [PATCH 1531/2522] Update tfl_ops.cc --- tensorflow/compiler/mlir/lite/ir/tfl_ops.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc index c7c3f5713f1..1361f4e501a 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc @@ -150,7 +150,15 @@ bool VerifyAddOpShapeConstraints(AddOp op) { // Allows F32, QI8, QUI8 and I32 outputs when the operands have valid shapes, // which are broadcastable shapes up to five dimension or have same shapes. if (element_type.isF32() || IsQI8Type(element_type) || - IsQUI8Type(element_type) || IsI32Type(element_type)) { + IsQUI8Type(element_type)) { + return VerifyOperandsHaveSameShapesOrBroadcastableShape( + /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, + /*max_bcast_rank=*/5); + } + + // Allows I32 output when the operands have valid shapes, which are + // broadcastable shapes up to four dimension or have same shapes. + if (IsI32Type(element_type)) { return VerifyOperandsHaveSameShapesOrBroadcastableShape( /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, /*max_bcast_rank=*/4); From 20bcc54f35a32eaad00b91a90c88c1eefa3a3fe4 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 18:04:01 -0400 Subject: [PATCH 1532/2522] Update tfl_ops.cc --- tensorflow/compiler/mlir/lite/ir/tfl_ops.cc | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc index 1361f4e501a..3b81da9ca13 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc @@ -147,7 +147,7 @@ bool IsI64Type(Type element_type) { bool VerifyAddOpShapeConstraints(AddOp op) { auto element_type = getElementTypeOrSelf(op.output().getType()); - // Allows F32, QI8, QUI8 and I32 outputs when the operands have valid shapes, + // Allows F32, QI8, and QUI8 outputs when the operands have valid shapes, // which are broadcastable shapes up to five dimension or have same shapes. if (element_type.isF32() || IsQI8Type(element_type) || IsQUI8Type(element_type)) { @@ -210,13 +210,20 @@ bool VerifyMulOpShapeConstraints(MulOp op) { } return VerifyOperandsHaveSameShapesOrBroadcastableShape( /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, - /*max_bcast_rank=*/4); + /*max_bcast_rank=*/5); } - // Allows I32, QI16 and F32 outputs when the operands have valid shapes, which - // are broadcastable shapes up to four dimension or have same shapes. - if (IsI32Type(element_type) || IsQI16Type(element_type) || - element_type.isF32()) { + // Allows F32 output when the operands have valid shapes, which are + // broadcastable shapes up to five dimension or have same shapes. + if (element_type.isF32()) { + return VerifyOperandsHaveSameShapesOrBroadcastableShape( + /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, + /*max_bcast_rank=*/5); + } + + // Allows I32 and QI16 outputs when the operands have valid shapes, which are + // broadcastable shapes up to four dimension or have same shapes. + if (IsI32Type(element_type) || IsQI16Type(element_type)) { return VerifyOperandsHaveSameShapesOrBroadcastableShape( /*op=*/op.getOperation(), /*indices=*/ArrayRef{0, 1}, /*max_bcast_rank=*/4); From 9cc5633717b2b4258c0129a6f6ce430e4dceef77 Mon Sep 17 00:00:00 2001 From: Jonathan Chu Date: Tue, 28 Jul 2020 22:04:14 +0000 Subject: [PATCH 1533/2522] Remove public hashable_input_signature property, add conditional in initialization --- tensorflow/python/eager/function.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 6662105cbeb..0f1912a5efd 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -2902,8 +2902,9 @@ class Function(object): self._function_attributes = attributes or {} self._capture_by_value = capture_by_value self.tracing_count = 0 - self._hashable_input_signature = _make_input_signature_hashable( - self.flat_input_signature) + if self.input_signature is not None: + self._hashable_input_signature = _make_input_signature_hashable( + self.flat_input_signature) self._lock = threading.Lock() # _descriptor_cache is a of instance of a class to an instance-specific @@ -2942,11 +2943,6 @@ class Function(object): """Returns the flattened input signature.""" return self._function_spec.flat_input_signature - @property - def hashable_input_signature(self): - """Returns a cached hashable object for the flattened input signature.""" - return self._hashable_input_signature - def _get_concrete_function_internal_garbage_collected(self, *args, **kwargs): """Returns a concrete function which cleans up its graph function.""" if self.input_signature: @@ -3083,7 +3079,8 @@ class Function(object): else: del args, kwargs assert not include_tensor_ranks_only - hashable_input_signature = self.hashable_input_signature + assert hasattr(self, '_hashable_input_signature') + hashable_input_signature = self._hashable_input_signature ctx = context.context() From afad47f5b3821d12f137e71f567c63a8cd5aa272 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 18:10:27 -0400 Subject: [PATCH 1534/2522] Update legalize_tf.cc --- .../mlir/lite/transforms/legalize_tf.cc | 191 ++---------------- 1 file changed, 12 insertions(+), 179 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc index c85e8259711..597c874ee04 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc @@ -158,7 +158,7 @@ LogicalResult ConvertTFRandomUniformOp::matchAndRewrite( random_uniform_op.seed().getSExtValue(), random_uniform_op.seed2().getSExtValue()); Distribution dist; - int num_elements = 0; + size_t num_elements = 0; if (auto output_type = random_uniform_op.output().getType().dyn_cast_or_null()) { if (auto ranked_output = output_type.dyn_cast_or_null()) { @@ -170,7 +170,7 @@ LogicalResult ConvertTFRandomUniformOp::matchAndRewrite( size_t num_samples = Distribution::kResultElementCount; llvm::SmallVector data; data.resize(num_elements); - while (static_cast(offset) < num_elements) { + while (offset < num_elements) { const typename Distribution::ResultType samples = dist(&generator); std::copy(&samples[0], &samples[0] + std::min(num_samples, data.size() - offset), @@ -631,156 +631,6 @@ struct LegalizeUnidirectionalSequenceRnn : public RewritePattern { } }; -// Put two TFL BroadcastTo ops in front of the given TF binary broadcast op to -// to make binary broadcast-able op conversion always successful and does not -// require flex delegate. -template -class ApplyExplicitBroadcasting : public OpRewritePattern { - public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(SourceOp src_op, - PatternRewriter& rewriter) const override { - Operation* op = static_cast(src_op); - auto lhs = op->getOperand(0); - auto rhs = op->getOperand(1); - - // Should have static shapes to calculate the broadcasted shape. - if (!lhs.getType().cast().hasStaticShape() || - !rhs.getType().cast().hasStaticShape()) { - return failure(); - } - - // Calculate the broadcasted shape. - SmallVector result_shape; - if (!OpTrait::util::getBroadcastedShape( - lhs.getType().cast().getShape(), - rhs.getType().cast().getShape(), result_shape)) { - return failure(); - } - - RankedTensorType result_type = RankedTensorType::get( - result_shape, getElementTypeOrSelf(op->getResult(0).getType())); - - // Create a const op, that stores the above broadcasted shape. - auto new_shape_attr = mlir::DenseIntElementsAttr::get( - RankedTensorType::get(result_shape.size(), rewriter.getIntegerType(64)), - result_shape); - auto new_shape = rewriter.create(op->getLoc(), new_shape_attr); - - // Apply BroadcastTo ops to each input. - auto broadcast_type = RankedTensorType::get( - result_shape, getElementTypeOrSelf(lhs.getType())); - - if (result_type.getShape() != lhs.getType().cast().getShape()) { - lhs = rewriter - .create(op->getLoc(), broadcast_type, lhs, - new_shape) - .output(); - } - if (result_type.getShape() != rhs.getType().cast().getShape()) { - rhs = rewriter - .create(op->getLoc(), broadcast_type, rhs, - new_shape) - .output(); - } - - // Recreate an op with the above Broadcast op results. - rewriter.replaceOpWithNewOp(op, result_type, lhs, rhs); - return success(); - } -}; - -// This specialization is for TF SelectV2 op. SelectV2 op have three inputs and -// they should have broadcastable shapes. -template <> -class ApplyExplicitBroadcasting - : public OpRewritePattern { - public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(TF::SelectV2Op src_op, - PatternRewriter& rewriter) const override { - Operation* op = static_cast(src_op); - auto cond = op->getOperand(0); - auto lhs = op->getOperand(1); - auto rhs = op->getOperand(2); - - // Should have static shapes to calculate the broadcasted shape. - if (!lhs.getType().cast().hasStaticShape() || - !rhs.getType().cast().hasStaticShape() || - !cond.getType().cast().hasStaticShape()) { - return failure(); - } - - // Calculate the broadcasted shape. - SmallVector broadcasted_shape; - if (!OpTrait::util::getBroadcastedShape( - lhs.getType().cast().getShape(), - rhs.getType().cast().getShape(), broadcasted_shape)) { - return failure(); - } - - SmallVector result_shape; - if (!OpTrait::util::getBroadcastedShape( - broadcasted_shape, cond.getType().cast().getShape(), - result_shape)) { - return failure(); - } - - // Create a const op, that stores the above broadcasted shape. - auto shape_type = - RankedTensorType::get(result_shape.size(), rewriter.getIntegerType(64)); - auto new_shape_attr = - mlir::DenseIntElementsAttr::get(shape_type, result_shape); - auto new_shape = rewriter.create(op->getLoc(), new_shape_attr); - - // Apply BroadcastTo ops to each input. - auto cond_result_type = - RankedTensorType::get(result_shape, rewriter.getIntegerType(1)); - auto result_type = RankedTensorType::get( - result_shape, getElementTypeOrSelf(lhs.getType())); - - if (result_shape != cond.getType().cast().getShape()) { - cond = rewriter - .create(op->getLoc(), cond_result_type, - cond, new_shape) - .output(); - } - if (result_shape != lhs.getType().cast().getShape()) { - lhs = rewriter - .create(op->getLoc(), result_type, lhs, - new_shape) - .output(); - } - if (result_shape != rhs.getType().cast().getShape()) { - rhs = rewriter - .create(op->getLoc(), result_type, rhs, - new_shape) - .output(); - } - - // Recreate an op with the above Broadcast op results. - rewriter.replaceOpWithNewOp(op, result_type, cond, lhs, - rhs); - return success(); - } -}; - -void applyPatterns(FuncOp func, ConversionTarget& target, - const OwningRewritePatternList& patterns) { - // Keep trying to convert. - // TODO(karimnosseir): This is similar to what apply greedy patterns does. - // Look if there is a function that tries until it converge. - // Currently unit-test doesn't do multiple tries, so we need this. - const int max_iterations = 15; - for (int i = 0; i < max_iterations; ++i) { - if (failed(applyPartialConversion(func, target, patterns))) { - return; - } - } -} - void LegalizeTF::runOnFunction() { OwningRewritePatternList patterns; auto* context = &getContext(); @@ -831,33 +681,16 @@ void LegalizeTF::runOnFunction() { return success(current_thread_id == llvm::get_threadid()); }); - applyPatterns(func, target, patterns); - - // Explict BroadcastTo addition for left-over broadcast-able ops. - // The following pattern matchings should be done after the other legalization - // rules in order not to add unnecessary BroadcastTo ops. - patterns.insert, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting, - ApplyExplicitBroadcasting>(context); - - applyPatterns(func, target, patterns); -} + // Keep trying to convert. + // TODO(karimnosseir): This is similar to what apply greedy patterns does. + // Look if there is a function that tries until it converge. + // Currently unit-test doesn't do multiple tries, so we need this. + const int max_iterations = 15; + for (int i = 0; i < max_iterations; ++i) { + if (failed(applyPartialConversion(func, target, patterns))) { + return; + } + } } // namespace From 82f49ce173c5109ff084afd0b3bd0fbdc703c6c6 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 18:11:14 -0400 Subject: [PATCH 1535/2522] Update tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc Co-authored-by: Mihai Maruseac --- .../mlir/lite/transforms/prepare_composite_functions_tf.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc index 9261deab18b..ca70d8f4029 100644 --- a/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc @@ -225,8 +225,8 @@ void PrepareCompositeFunctionsPass::ConvertTFImplementsWithAttributes( LogicalResult CheckOutputConsumer( Operation* call_op, int expected_num_outputs, llvm::DenseSet expected_consumer_indices) { - const int call_op_getNumResults = call_op->getNumResults(); - if (call_op_getNumResults != expected_num_outputs) return failure(); + const int num_results = call_op->getNumResults(); + if (num_results != expected_num_outputs) return failure(); for (int i = 0; i < expected_num_outputs; ++i) { auto it = expected_consumer_indices.find(i); From d54a57f37d39610f4d5e6b167693f5d677c26a3e Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Tue, 28 Jul 2020 15:05:05 -0700 Subject: [PATCH 1536/2522] Don't add output copies to conditional if the output is non-phi. Non-phi buffers (buffers with only one value) don't need a copy. In fact, adding a copy can confuse other operations like while loops because it turns their non-phi node into phi without notifying dataflow analysis. PiperOrigin-RevId: 323661923 Change-Id: I793f71dbbd30b54eee13a429873f0f2fdc8eadda --- .../compiler/xla/service/copy_insertion.cc | 45 ++++++++++++++++--- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc index 6bfd8c4db46..6d1f81ba896 100644 --- a/tensorflow/compiler/xla/service/copy_insertion.cc +++ b/tensorflow/compiler/xla/service/copy_insertion.cc @@ -191,6 +191,30 @@ bool IndicesToCopyForWhile(const HloDataflowAnalysis& dataflow, return any_copies; } +// Compute the indices of the conditional outputs which need copies. Umambiguous +// buffers(buffer with only one value) don't need copies. +bool IndicesToCopyForConditional(const HloDataflowAnalysis& dataflow, + const HloInstruction* xla_conditional, + ShapeTree* indices_to_copy) { + DCHECK(ShapeUtil::Compatible(indices_to_copy->shape(), + xla_conditional->shape())); + + bool any_copies = false; + for (auto& pair : *indices_to_copy) { + const ShapeIndex& index = pair.first; + bool& should_copy = pair.second; + + CHECK_EQ(dataflow.GetValueSet(xla_conditional, index).values().size(), 1); + + auto value = dataflow.GetValueSet(xla_conditional, index).values()[0]; + // The conditional must be copied if the value is a phi. + should_copy = + value->is_phi() && value->defining_instruction() == xla_conditional; + any_copies |= should_copy; + } + return any_copies; +} + // Add kCopy instructions around the given kWhile instruction to eliminate any // possible live range interference of HLO values assuming a dependency-based // ordering (HloDependencyOrdering). Copies are added conservatively. There @@ -306,24 +330,30 @@ Status AddCopiesForWhile(const HloAliasAnalysis& alias_analysis, } body->set_root_instruction(root_copy); - return Status::OK(); } -// We add copies for all the indices of the true and false computation roots, in -// order to resolve interference. We later rely on RemoveUnnecessaryCopies to -// drop the unnecessary ones. +// We add copies for all non-phi indices of the true and false computation +// roots, in order to resolve interference. We later rely on +// RemoveUnnecessaryCopies to drop the unnecessary ones. Status AddCopiesForConditional(const HloAliasAnalysis& alias_analysis, HloInstruction* conditional) { VLOG(2) << "Adding copies for kConditional instruction " << conditional->name(); + ShapeTree indices_to_copy(conditional->shape()); TF_RET_CHECK(conditional->opcode() == HloOpcode::kConditional); - + if (!IndicesToCopyForConditional(alias_analysis.dataflow_analysis(), + conditional, &indices_to_copy)) { + VLOG(2) << "No copies necessary for kWhile instruction " + << conditional->name(); + return Status::OK(); + } for (HloComputation* computation : conditional->branch_computations()) { HloInstruction* root = computation->root_instruction(); std::vector users = root->users(); - TF_ASSIGN_OR_RETURN(HloInstruction * deep_copy, - computation->DeepCopyInstruction(root)); + TF_ASSIGN_OR_RETURN( + HloInstruction * deep_copy, + computation->DeepCopyInstruction(root, &indices_to_copy)); for (HloInstruction* user : users) { TF_RETURN_IF_ERROR(root->ReplaceUseWith(user, deep_copy)); } @@ -1128,6 +1158,7 @@ static int64 GetNumExistingCopies(const HloModule* module) { Status CopyInsertion::RemoveUnnecessaryCopies(const HloOrdering& ordering, HloModule* module) { + XLA_LOG_LINES(4, module->ToString()); TF_ASSIGN_OR_RETURN(std::unique_ptr alias_analysis, HloAliasAnalysis::Run(module, can_share_buffer_)); From 8b23245d0932876ad4d3a036875c48c4cbdbd063 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Jul 2020 15:15:50 -0700 Subject: [PATCH 1537/2522] Added multiply_linear_by_lr documentation for FTRL for TPU embeddings. PiperOrigin-RevId: 323664047 Change-Id: I20b141e1d1eab9a3b5f6d72528d38ada350bf4ec --- .../protobuf/tpu/optimization_parameters.proto | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/protobuf/tpu/optimization_parameters.proto b/tensorflow/core/protobuf/tpu/optimization_parameters.proto index 1699a26abfd..f29beb3bc48 100644 --- a/tensorflow/core/protobuf/tpu/optimization_parameters.proto +++ b/tensorflow/core/protobuf/tpu/optimization_parameters.proto @@ -81,7 +81,19 @@ message BoundedAdagradParameters { message StochasticGradientDescentParameters {} // https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Ftrl +// https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/41159.pdf // https://github.com/tensorflow/tensorflow/blob/6b6471f3ffb7f1fefe42d814aa5fb9ab7a535b58/tensorflow/core/kernels/training_ops.cc#L2646 +// +// The hyperparameters for FTRL are the same as for the Keras implementation, +// with some additions. When the multiply_linear_by_lr field is set to true, a +// modified formula is used for FTRL that treats the "linear" accumulator as +// being pre-multiplied by the learning rate (i.e., the accumulator named +// "linear" actually stores "linear * learning_rate"). Other than checkpoint +// compatibility, this is mathematically equivalent for a static learning rate; +// for a dynamic learning rate, it is nearly the same as long as the learning +// rate does not change quickly. The benefit of setting multiply_linear_by_lr to +// true is that the modified formula handles zero and near-zero learning rates +// without producing NaNs, improving flexibility for learning rate ramp-up. message FtrlParameters { float l1 = 1; float l2 = 2; @@ -93,9 +105,9 @@ message FtrlParameters { reserved 4, 5; } -// The Adam optimizer does not implement hyper-parameter update; use the dynamic -// learning rate feature instead, setting the learning rate to: -// user learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t) +// The Adam optimizer does not implement hyper-parameter update due to hardware +// limitations; use the dynamic learning rate feature instead, setting the +// learning rate to: user learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t) // Here, t is the current timestep. // // https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adam From cbc19aacda24c8352bc21bfc4974bab5cf885067 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 28 Jul 2020 15:29:13 -0700 Subject: [PATCH 1538/2522] Integrate LLVM at llvm/llvm-project@754deffd11c7 Updates LLVM usage to match [754deffd11c7](https://github.com/llvm/llvm-project/commit/754deffd11c7) PiperOrigin-RevId: 323666587 Change-Id: I82ddc2113c4e501f92ccc8dff1095dde03e9c9cd --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 98454b49ce6..04c6da2cad4 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -711,8 +711,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "a51829913dba28dae603fdcdddd242c7e20192a1" - LLVM_SHA256 = "53a6cb26b3716fb8ace65cb80ef37af1c9b53cb734d945dce3dee8d4f28dc219" + LLVM_COMMIT = "754deffd11c733d709c3ed66d3b9a6b54d081474" + LLVM_SHA256 = "c6e6f7f88f29de8a62eb0f5f70168259f9b5abacce02a1022f5944813b060b8f" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From c389e90e5ae4d1336c062c8f9a3780ea46b94efc Mon Sep 17 00:00:00 2001 From: Gabriel Rasskin Date: Tue, 28 Jul 2020 15:38:36 -0700 Subject: [PATCH 1539/2522] Create tstring_fuzz.cc --- tensorflow/security/fuzzing/tstring_fuzz.cc | 41 +++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 tensorflow/security/fuzzing/tstring_fuzz.cc diff --git a/tensorflow/security/fuzzing/tstring_fuzz.cc b/tensorflow/security/fuzzing/tstring_fuzz.cc new file mode 100644 index 00000000000..49aaa884374 --- /dev/null +++ b/tensorflow/security/fuzzing/tstring_fuzz.cc @@ -0,0 +1,41 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include + +#include +#include + +#include "tensorflow/core/platform/tstring.h" + +// This is a fuzzer for tensorflow::tstring + +namespace { + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + FuzzedDataProvider fuzzed_data(data, size); + + tensorflow::tstring base = fuzzed_data.ConsumeRandomLengthString(10); + + while(fuzzed_data.remaining_bytes() > 0) { + const size_t initial_size = base.size(); + tensorflow::tstring pair = fuzzed_data.ConsumeRandomLengthString(10); + base.append(pair); + assert(base.size() == pair.size() + initial_size); + } + + return 0; +} + +} // namespace From 4f0dbbef37e85c4b5c850b0a0ae3b2d9455b53fd Mon Sep 17 00:00:00 2001 From: Gabriel Rasskin Date: Tue, 28 Jul 2020 15:39:11 -0700 Subject: [PATCH 1540/2522] Add tstring_fuzz.cc to build --- tensorflow/security/fuzzing/BUILD | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/security/fuzzing/BUILD b/tensorflow/security/fuzzing/BUILD index aa3d509af37..75c62ec8bf1 100644 --- a/tensorflow/security/fuzzing/BUILD +++ b/tensorflow/security/fuzzing/BUILD @@ -70,3 +70,11 @@ tf_fuzz_target( "//tensorflow/core/platform:stringprintf", ], ) + +tf_fuzz_target( + name = "tstring_fuzz", + srcs = ["tstring_fuzz.cc"], + deps = [ + "//tensorflow/core/platform:tstring", + ], +) From dd40d314668eddede28000c4c1691d8aa7d089d2 Mon Sep 17 00:00:00 2001 From: Peng Wang Date: Tue, 28 Jul 2020 15:30:17 -0700 Subject: [PATCH 1541/2522] Disables `xla_enable_strict_auto_jit` for tensorflow/python/kernel_tests/array_ops:batch_gather_op_test_xla_gpu because in xla-auto-jit mode an expected error is not raised. PiperOrigin-RevId: 323666769 Change-Id: I95bd49bc8f0b2f902b5d92d3890f71c6b7dfae8f --- tensorflow/python/kernel_tests/array_ops/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/kernel_tests/array_ops/BUILD b/tensorflow/python/kernel_tests/array_ops/BUILD index 6086cfcf449..e78f14d8ead 100644 --- a/tensorflow/python/kernel_tests/array_ops/BUILD +++ b/tensorflow/python/kernel_tests/array_ops/BUILD @@ -10,6 +10,7 @@ package( cuda_py_test( name = "batch_gather_op_test", srcs = ["batch_gather_op_test.py"], + xla_enable_strict_auto_jit = False, # b/162351094 deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", From 3aaf9e186bb6ff05d3370ad83b941f88b3efc006 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Tue, 28 Jul 2020 22:53:13 +0000 Subject: [PATCH 1542/2522] Update tensorflow/compiler/tf2xla/functionalize_cond.cc --- tensorflow/compiler/tf2xla/functionalize_cond.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/tf2xla/functionalize_cond.cc b/tensorflow/compiler/tf2xla/functionalize_cond.cc index ef8222853d2..58bbb704329 100644 --- a/tensorflow/compiler/tf2xla/functionalize_cond.cc +++ b/tensorflow/compiler/tf2xla/functionalize_cond.cc @@ -224,8 +224,8 @@ string DebugString(const CondArgNodes& nodes) { } StateMap::CondId StateMap::LookupCondId(const Node* node) const { - const int64 node_to_condid_map_size = node_to_condid_map_.size(); - if (node->id() < node_to_condid_map_size) + const int64 map_size = node_to_condid_map_.size(); + if (node->id() < map_size) return node_to_condid_map_[node->id()]; return added_node_condid_mapping_.at(node->id()); } From 1f6e4df5e05c73c030fba0b4cb9d2228aba30e1f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Jul 2020 15:55:39 -0700 Subject: [PATCH 1543/2522] Checking tensor index for unset tensors when reading it. PiperOrigin-RevId: 323671321 Change-Id: If38fe986950af1775622d5d9d180ebffc6a8f785 --- tensorflow/lite/delegates/gpu/common/object_reader.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/lite/delegates/gpu/common/object_reader.h b/tensorflow/lite/delegates/gpu/common/object_reader.h index a9fbf546bf6..f360bcf9302 100644 --- a/tensorflow/lite/delegates/gpu/common/object_reader.h +++ b/tensorflow/lite/delegates/gpu/common/object_reader.h @@ -59,6 +59,12 @@ class ObjectReader { template absl::Status ReadTensor(uint32_t idx, TensorT* t) const { const int32_t tensor_idx = node_->inputs->data[idx]; + if (tensor_idx < 0) { + return absl::InvalidArgumentError( + "Invalid data index found. Possibly an unset optional tensor is " + "being read."); + } + const TfLiteTensor* tflite_tensor = context_->tensors + tensor_idx; t->data.resize(NumElements(tflite_tensor)); RETURN_IF_ERROR(CreateVectorCopyData(*tflite_tensor, &t->data[0])); From 660c0e77d82962fd8d02a4b993320c348c99e960 Mon Sep 17 00:00:00 2001 From: Anna R Date: Tue, 28 Jul 2020 16:04:04 -0700 Subject: [PATCH 1544/2522] Disable tensorflow/c/eager:c_api_distributed_test since it is flaky. PiperOrigin-RevId: 323672949 Change-Id: Ib4210df2f41a79ca5f75986fc3678595a96bf88a --- tensorflow/c/eager/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index 61701bc8b21..2fc88f4a287 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -550,6 +550,7 @@ tf_cuda_cc_test( args = ["--heap_check=local"], extra_copts = tfe_xla_copts(), tags = [ + "no_oss", # b/162361408 "no_windows", "noasan", # leaks gRPC server instances ], From 3770e9cde4c71ab5387612c594d81de3880aa619 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 28 Jul 2020 16:12:08 -0700 Subject: [PATCH 1545/2522] More cleanup in mlir-hlo to prepare for the standalone build Shuffle files around, use TableGen to register passes, and introduce a `mlir-hlo-opt.cpp` file to hold the main entry point of the -opt tool and stop relying on static registration for dialect/passes. PiperOrigin-RevId: 323674455 Change-Id: I04629a39b33176ff6769fe211639b3c17f8be82a --- tensorflow/compiler/mlir/hlo/BUILD | 109 ++++++++++++---- .../mlir-hlo/Dialect/mhlo/IR/chlo_ops.h | 20 +-- .../mlir-hlo/Dialect/mhlo/IR/hlo_ops.h | 28 ++-- .../mlir-hlo/Dialect/mhlo/IR/hlo_ops.td | 6 +- .../mhlo/IR/infer_fusibility_op_interface.h | 2 +- .../mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h | 24 ++-- .../mlir-hlo/Dialect/mhlo/IR/register.h | 27 ++++ .../Dialect/mhlo/transforms/lmhlo_passes.td | 65 ++++++++++ .../mhlo/transforms/map_hlo_to_lhlo_op.h | 4 +- .../mhlo/transforms/map_lmhlo_to_scalar_op.h | 8 +- .../Dialect/mhlo/transforms/mhlo_passes.td | 108 ++++++++++++++++ .../mlir-hlo/Dialect/mhlo/transforms/passes.h | 21 +-- .../Dialect/mhlo/transforms/register_passes.h | 49 +++++++ .../Dialect/mhlo/transforms/rewriters.h | 19 ++- .../include/mlir-hlo/utils/broadcast_utils.h | 12 +- .../mlir-hlo/utils/convert_op_folder.h | 4 +- .../hlo/include/mlir-hlo/utils/hlo_utils.h | 10 +- .../mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc | 18 +-- .../Dialect/mhlo/IR/dialect_registration.cc | 6 +- .../mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc | 63 +++++---- .../hlo/lib/Dialect/mhlo/IR/hlo_patterns.td | 2 +- .../mhlo/IR/infer_fusibility_op_interface.cc | 4 +- .../mlir/hlo/lib/Dialect/mhlo/IR/init.cc | 33 +++++ .../mlir/hlo/lib/Dialect/mhlo/IR/lhlo_ops.cc | 38 +++--- .../mhlo_canonicalize.td} | 0 .../mhlo/transforms/chlo_legalize_to_hlo.cc | 26 ++-- .../transforms/chlo_legalize_to_hlo_pass.cc | 30 +++-- .../mhlo/transforms/hlo_legalize_to_lhlo.cc | 44 +++---- .../mhlo/transforms/legalize_control_flow.cc | 38 +++--- .../legalize_gather_to_torch_index_select.cc | 23 ++-- .../legalize_tanh_to_approximation.cc | 26 ++-- .../mhlo/transforms/legalize_to_linalg.cc | 57 ++++----- .../mhlo/transforms/legalize_to_standard.cc | 27 ++-- .../legalize_to_standard_patterns.td | 2 +- .../mhlo/transforms/lhlo_copy_removal.cc | 19 ++- .../mhlo/transforms/lhlo_fuse_linalg.cc | 31 ++--- .../transforms/lhlo_legalize_to_affine.cc | 32 ++--- .../mhlo/transforms/lhlo_legalize_to_gpu.cc | 45 +++---- .../mhlo/transforms/lhlo_legalize_to_llvm.cc | 12 +- .../transforms/lhlo_legalize_to_llvm_pass.cc | 25 ++-- .../lhlo_legalize_to_parallel_loops.cc | 26 ++-- .../Dialect/mhlo/transforms/lower_complex.cc | 36 +++--- .../mhlo/transforms/lower_complex_patterns.td | 2 +- .../mhlo/transforms/lower_general_dot.cc | 34 ++--- .../mhlo/transforms/materialize_broadcasts.cc | 12 +- .../transforms/materialize_broadcasts_pass.cc | 24 ++-- .../Dialect/mhlo/transforms/mhlo_fusion.cc | 15 +-- .../Dialect/mhlo/transforms/optimize_mhlo.cc | 22 ++-- .../mhlo/transforms/optimize_mhlo_pass.cc | 28 ++-- .../sink_constants_to_control_flow.cc | 23 ++-- .../transforms/test_infer_shaped_type_pass.cc | 27 ++-- .../mhlo/transforms/transform_unranked_hlo.cc | 29 ++--- .../mhlo/transforms/unfuse_batch_norm.cc | 18 +-- .../mhlo/transforms/unfuse_batch_norm_pass.cc | 24 ++-- .../mlir/hlo/lib/utils/broadcast_utils.cc | 8 +- .../mlir/hlo/lib/utils/convert_op_folder.cc | 8 +- .../mlir/hlo/lib/utils/cycle_detector.cc | 2 +- .../mlir/hlo/lib/utils/cycle_detector_test.cc | 2 +- .../compiler/mlir/hlo/lib/utils/hlo_utils.cc | 4 +- .../hlo/tools/mlir-hlo-opt/mlir-hlo-opt.cpp | 121 ++++++++++++++++++ tensorflow/compiler/mlir/xla/BUILD | 6 +- .../compiler/xla/service/mlir_gpu/BUILD | 2 +- .../xla/service/mlir_gpu/kernel_lowering.cc | 6 +- 63 files changed, 1004 insertions(+), 592 deletions(-) create mode 100644 tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/register.h create mode 100644 tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/lmhlo_passes.td create mode 100644 tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/mhlo_passes.td create mode 100644 tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/register_passes.h create mode 100644 tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/init.cc rename tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/{transforms/canonicalize.td => IR/mhlo_canonicalize.td} (100%) create mode 100644 tensorflow/compiler/mlir/hlo/tools/mlir-hlo-opt/mlir-hlo-opt.cpp diff --git a/tensorflow/compiler/mlir/hlo/BUILD b/tensorflow/compiler/mlir/hlo/BUILD index 9e835979829..c6909079113 100644 --- a/tensorflow/compiler/mlir/hlo/BUILD +++ b/tensorflow/compiler/mlir/hlo/BUILD @@ -55,6 +55,38 @@ filegroup( ], ) +gentbl( + name = "MhloPassIncGen", + strip_include_prefix = "include/mlir-hlo/Dialect/mhlo/transforms/", + tbl_outs = [ + ( + "-gen-pass-decls", + "include/mlir-hlo/Dialect/mhlo/transforms/mhlo_passes.h.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "include/mlir-hlo/Dialect/mhlo/transforms/mhlo_passes.td", + td_srcs = [ + "@llvm-project//mlir:PassBaseTdFiles", + ], +) + +gentbl( + name = "LmhloPassIncGen", + strip_include_prefix = "include/mlir-hlo/Dialect/mhlo/transforms/", + tbl_outs = [ + ( + "-gen-pass-decls", + "include/mlir-hlo/Dialect/mhlo/transforms/lmhlo_passes.h.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "include/mlir-hlo/Dialect/mhlo/transforms/lmhlo_passes.td", + td_srcs = [ + "@llvm-project//mlir:PassBaseTdFiles", + ], +) + gentbl( name = "chlo_ops_inc_gen", strip_include_prefix = "include", @@ -76,8 +108,8 @@ gentbl( tbl_outs = [ ("-gen-op-decls", "include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h.inc"), ("-gen-op-defs", "include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.cc.inc"), - ("-gen-struct-attr-decls", "include/mlir-hlo/Dialect/mhlo/IR/hlo_structs.h.inc"), - ("-gen-struct-attr-defs", "include/mlir-hlo/Dialect/mhlo/IR/hlo_structs.cc.inc"), + ("-gen-struct-attr-decls", "include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_structs.h.inc"), + ("-gen-struct-attr-defs", "include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_structs.cc.inc"), ], tblgen = "@llvm-project//mlir:mlir-tblgen", td_file = "include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td", @@ -108,15 +140,18 @@ gentbl( gentbl( name = "hlo_ops_pattern_gen", - strip_include_prefix = "include", + strip_include_prefix = "lib/Dialect/mhlo/IR/", tbl_outs = [ ( "-gen-rewriters", - "include/mlir-hlo/Dialect/mhlo/IR/hlo_patterns.cc.inc", + "lib/Dialect/mhlo/IR/hlo_patterns.cc.inc", ), ], tblgen = "@llvm-project//mlir:mlir-tblgen", td_file = "lib/Dialect/mhlo/IR/hlo_patterns.td", + td_relative_includes = [ + "include", + ], td_srcs = [ ":hlo_ops_td_files", "@llvm-project//mlir:StdOpsTdFiles", @@ -131,8 +166,8 @@ gentbl( tbl_outs = [ ("-gen-op-decls", "include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h.inc"), ("-gen-op-defs", "include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.cc.inc"), - ("-gen-struct-attr-decls", "include/mlir-hlo/Dialect/mhlo/IR/lhlo_structs.h.inc"), - ("-gen-struct-attr-defs", "include/mlir-hlo/Dialect/mhlo/IR/lhlo_structs.cc.inc"), + ("-gen-struct-attr-decls", "include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops_structs.h.inc"), + ("-gen-struct-attr-defs", "include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops_structs.cc.inc"), ], tblgen = "@llvm-project//mlir:mlir-tblgen", td_file = "include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.td", @@ -145,11 +180,12 @@ gentbl( #TODO(aminim): revisit the naming and grouping of these rules post-move. gentbl( name = "canonicalize_inc_gen", + strip_include_prefix = "lib/Dialect/mhlo/IR/", tbl_outs = [ - ("-gen-rewriters", "lib/Dialect/mhlo/transforms/generated_canonicalize.inc"), + ("-gen-rewriters", "lib/Dialect/mhlo/IR/mhlo_canonicalize.inc"), ], tblgen = "@llvm-project//mlir:mlir-tblgen", - td_file = "lib/Dialect/mhlo/transforms/canonicalize.td", + td_file = "lib/Dialect/mhlo/IR/mhlo_canonicalize.td", td_relative_includes = [ "include", ], @@ -165,7 +201,7 @@ gentbl( ), ( "-gen-op-interface-defs", - "include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.cc.inc", + "include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.cpp.inc", ), ], tblgen = "@llvm-project//mlir:mlir-tblgen", @@ -187,6 +223,7 @@ cc_library( "include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h", "include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h.inc", ], + includes = ["include"], deps = [ ":infer_fusibility_op_interface_gen", "@llvm-project//mlir:IR", @@ -199,6 +236,7 @@ cc_library( name = "convert_op_folder", srcs = ["lib/utils/convert_op_folder.cc"], hdrs = ["include/mlir-hlo/utils/convert_op_folder.h"], + includes = ["include"], deps = [ "@llvm-project//mlir:IR", ], @@ -229,7 +267,6 @@ cc_library( ":hlo_ops_base_inc_gen", ":hlo_ops_inc_gen", ":infer_fusibility_op_interface", - "@com_google_absl//absl/container:flat_hash_set", "@llvm-project//llvm:Support", "@llvm-project//mlir:Analysis", "@llvm-project//mlir:IR", @@ -274,7 +311,7 @@ cc_library( ) cc_library( - name = "hlo_dialect_registration", + name = "hlo_dialect_force_registration", srcs = ["lib/Dialect/mhlo/IR/dialect_registration.cc"], deps = [ ":hlo", @@ -284,6 +321,17 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "hlo_dialect_registration", + srcs = ["lib/Dialect/mhlo/IR/init.cc"], + hdrs = ["include/mlir-hlo/Dialect/mhlo/IR/register.h"], + deps = [ + ":hlo", + ":lhlo", + "@llvm-project//mlir:IR", + ], +) + cc_library( name = "sink_constants_to_control_flow", srcs = ["lib/Dialect/mhlo/transforms/sink_constants_to_control_flow.cc"], @@ -327,7 +375,6 @@ cc_library( ":hlo", ":lhlo", ":map_lmhlo_to_scalar_op", - "@com_google_absl//absl/memory", "@llvm-project//llvm:Support", "@llvm-project//mlir:Affine", "@llvm-project//mlir:IR", @@ -342,7 +389,6 @@ cc_library( srcs = ["lib/Dialect/mhlo/transforms/lhlo_legalize_to_parallel_loops.cc"], deps = [ ":lhlo", - "@com_google_absl//absl/memory", "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", "@llvm-project//mlir:LinalgOps", @@ -377,7 +423,6 @@ cc_library( ":hlo", ":lhlo", ":map_lmhlo_to_scalar_op", - "@com_google_absl//absl/memory", "@llvm-project//llvm:Support", "@llvm-project//mlir:Affine", "@llvm-project//mlir:IR", @@ -395,7 +440,6 @@ cc_library( hdrs = ["include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h"], deps = [ ":hlo", - "@com_google_absl//absl/memory", "@llvm-project//mlir:IR", "@llvm-project//mlir:Pass", "@llvm-project//mlir:Shape", @@ -412,7 +456,6 @@ cc_library( ":hlo", ":lhlo", ":map_lmhlo_to_scalar_op", - "@com_google_absl//absl/memory", "@llvm-project//llvm:Support", "@llvm-project//mlir:GPUDialect", "@llvm-project//mlir:IR", @@ -431,7 +474,6 @@ cc_library( hdrs = ["include/mlir-hlo/Dialect/mhlo/transforms/passes.h"], deps = [ ":lhlo", - "@com_google_absl//absl/memory", "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", "@llvm-project//mlir:LinalgTransforms", @@ -449,7 +491,6 @@ cc_library( hdrs = ["include/mlir-hlo/Dialect/mhlo/transforms/passes.h"], deps = [ ":lhlo", - "@com_google_absl//absl/memory", "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", "@llvm-project//mlir:Pass", @@ -470,7 +511,6 @@ cc_library( ":hlo", ":lhlo", ":map_hlo_to_lhlo_op", - "@com_google_absl//absl/memory", "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", "@llvm-project//mlir:Pass", @@ -485,6 +525,7 @@ cc_library( name = "cycle_detector", srcs = ["lib/utils/cycle_detector.cc"], hdrs = ["include/mlir-hlo/utils/cycle_detector.h"], + includes = ["include"], deps = [ "@llvm-project//llvm:Support", ], @@ -521,13 +562,14 @@ cc_library( gentbl( name = "legalize_to_standard_inc_gen", + strip_include_prefix = "lib/Dialect/mhlo/transforms/", tbl_outs = [ ("-gen-rewriters", "lib/Dialect/mhlo/transforms/generated_legalize_to_standard.inc"), ], tblgen = "@llvm-project//mlir:mlir-tblgen", td_file = "lib/Dialect/mhlo/transforms/legalize_to_standard_patterns.td", td_relative_includes = [ - "../hlo/include", + "include", ], td_srcs = [ ":hlo_ops_td_files", @@ -577,7 +619,6 @@ cc_library( ], deps = [ ":hlo", - "@com_google_absl//absl/memory", "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", "@llvm-project//mlir:Pass", @@ -595,6 +636,7 @@ cc_library( "include/mlir-hlo/Dialect/mhlo/transforms/passes.h", "include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h", ], + includes = ["include"], deps = [ "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", @@ -608,13 +650,14 @@ cc_library( gentbl( name = "lower_complex_inc_gen", + strip_include_prefix = "lib/Dialect/mhlo/transforms/", tbl_outs = [ ("-gen-rewriters", "lib/Dialect/mhlo/transforms/generated_lower_complex.inc"), ], tblgen = "@llvm-project//mlir:mlir-tblgen", td_file = "lib/Dialect/mhlo/transforms/lower_complex_patterns.td", td_relative_includes = [ - "../hlo/include", + "include", ], td_srcs = [ ":hlo_ops_td_files", @@ -627,7 +670,6 @@ cc_library( #TODO(aminim): find a better name here? name = "mhlo_to_mhlo_lowering_patterns", srcs = [ - "lib/Dialect/mhlo/transforms/generated_lower_complex.inc", "lib/Dialect/mhlo/transforms/lower_complex.cc", "lib/Dialect/mhlo/transforms/lower_general_dot.cc", "lib/Dialect/mhlo/transforms/optimize_mhlo.cc", @@ -638,7 +680,8 @@ cc_library( ], deps = [ ":hlo", - ":hlo_dialect_registration", + ":hlo_dialect_force_registration", + ":lower_complex_inc_gen", "@llvm-project//llvm:Support", "@llvm-project//mlir:Analysis", "@llvm-project//mlir:IR", @@ -732,12 +775,16 @@ cc_library( cc_library( name = "all_passes_for_testing", + hdrs = [ + "include/mlir-hlo/Dialect/mhlo/transforms/register_passes.h", + ], visibility = [ "//tensorflow/compiler/mlir:__subpackages__", ], deps = [ + ":LmhloPassIncGen", + ":MhloPassIncGen", ":chlo_legalize_to_hlo", - ":hlo_dialect_registration", ":hlo_legalize_to_lhlo", ":legalize_control_flow", ":legalize_gather_to_torch_index_select", @@ -755,15 +802,23 @@ cc_library( ":sink_constants_to_control_flow", ":test_passes", ":transform_unranked_hlo", + "@llvm-project//mlir:Pass", ], ) cc_binary( name = "mlir-hlo-opt", + srcs = [ + "tools/mlir-hlo-opt/mlir-hlo-opt.cpp", + ], deps = [ ":all_passes_for_testing", - "@llvm-project//mlir:AllPassesAndDialects", + ":hlo_dialect_registration", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:AllPassesAndDialectsNoRegistration", + "@llvm-project//mlir:IR", "@llvm-project//mlir:MlirOptLib", - "@llvm-project//mlir:MlirOptMain", + "@llvm-project//mlir:Pass", + "@llvm-project//mlir:Support", ], ) diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h index 1fbf55ded83..c5483e978ec 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h @@ -17,15 +17,15 @@ limitations under the License. #define TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_MHLO_IR_CHLO_OPS_H_ #include "llvm/ADT/StringRef.h" -#include "mlir/IR/Dialect.h" // from @llvm-project -#include "mlir/IR/DialectImplementation.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/OpDefinition.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/IR/Types.h" // from @llvm-project -#include "mlir/Interfaces/InferTypeOpInterface.h" // from @llvm-project -#include "mlir/Interfaces/SideEffectInterfaces.h" // from @llvm-project +#include "mlir/IR/Dialect.h" +#include "mlir/IR/DialectImplementation.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/IR/Types.h" +#include "mlir/Interfaces/InferTypeOpInterface.h" +#include "mlir/Interfaces/SideEffectInterfaces.h" namespace mlir { namespace chlo { @@ -37,7 +37,7 @@ class HloClientDialect : public Dialect { }; #define GET_OP_CLASSES -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h.inc" +#include "mlir-hlo/Dialect/mhlo/IR/chlo_ops.h.inc" } // namespace chlo } // namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h index 4de52639bca..0036cc0dc19 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h @@ -19,23 +19,23 @@ limitations under the License. #define TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_MHLO_IR_HLO_OPS_H_ #include "llvm/ADT/StringRef.h" -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/Dialect.h" // from @llvm-project -#include "mlir/IR/DialectImplementation.h" // from @llvm-project -#include "mlir/IR/Location.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/OpDefinition.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/IR/Types.h" // from @llvm-project -#include "mlir/Interfaces/InferTypeOpInterface.h" // from @llvm-project -#include "mlir/Interfaces/SideEffectInterfaces.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h" +#include "mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/Dialect.h" +#include "mlir/IR/DialectImplementation.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/IR/Types.h" +#include "mlir/Interfaces/InferTypeOpInterface.h" +#include "mlir/Interfaces/SideEffectInterfaces.h" namespace mlir { class OpBuilder; -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_structs.h.inc" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops_structs.h.inc" namespace mhlo { @@ -91,7 +91,7 @@ LogicalResult deriveShapeFromFirstOperand( SmallVectorImpl *reifiedReturnShapes); #define GET_OP_CLASSES -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h.inc" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h.inc" } // end namespace mhlo } // end namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td index c88a6138b95..0ed4235e23f 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td @@ -21,9 +21,9 @@ limitations under the License. include "mlir/IR/OpBase.td" include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/SideEffectInterfaces.td" -include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.td" -include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_utils.td" -include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.td" +include "mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.td" +include "mlir-hlo/Dialect/mhlo/IR/hlo_utils.td" +include "mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.td" def HLO_Dialect : Dialect { let name = "mhlo"; diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h index ecbf2e05000..00de1170f8a 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h @@ -21,7 +21,7 @@ limitations under the License. namespace mlir { -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h.inc" +#include "mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h.inc" } // namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h index fd31bec44c0..bb9b29096f3 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h @@ -19,21 +19,21 @@ limitations under the License. #define TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_MHLO_IR_LHLO_OPS_H_ #include "llvm/ADT/StringRef.h" -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/Dialect.h" // from @llvm-project -#include "mlir/IR/Location.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/OpDefinition.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/IR/Types.h" // from @llvm-project -#include "mlir/Interfaces/SideEffectInterfaces.h" // from @llvm-project -#include "mlir/Interfaces/ViewLikeInterface.h" // from @llvm-project +#include "mlir/IR/Attributes.h" +#include "mlir/IR/Dialect.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/IR/Types.h" +#include "mlir/Interfaces/SideEffectInterfaces.h" +#include "mlir/Interfaces/ViewLikeInterface.h" namespace mlir { class OpBuilder; -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_structs.h.inc" +#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops_structs.h.inc" namespace lmhlo { @@ -44,7 +44,7 @@ class LmhloDialect : public Dialect { }; #define GET_OP_CLASSES -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h.inc" +#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h.inc" } // namespace lmhlo } // end namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/register.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/register.h new file mode 100644 index 00000000000..5773901ad78 --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/register.h @@ -0,0 +1,27 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MLIR_HLO_DIALECT_MHLO_IR_REGISTER_H_ +#define MLIR_HLO_DIALECT_MHLO_IR_REGISTER_H_ + +namespace mlir { +namespace mhlo { + +void registerAllDialects(); + +} +} // namespace mlir + +#endif // MLIR_HLO_DIALECT_MHLO_IR_REGISTER_H_ diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/lmhlo_passes.td b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/lmhlo_passes.td new file mode 100644 index 00000000000..963ff5dbacf --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/lmhlo_passes.td @@ -0,0 +1,65 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +include "mlir/Pass/PassBase.td" + +def LhloCopyRemovalPass : Pass<"lhlo-copy-removal", "FuncOp"> { + let summary = "Removes redundant LHLO copy operations."; + let constructor = "createLhloCopyRemovalPass()"; +} + + +def LhloLegalizeToLinalgPass : Pass<"lhlo-legalize-to-linalg", "FuncOp"> { + let summary = "Legalize from LHLO dialect to Linalg dialect."; + let constructor = "createLegalizeLhloToLinalgPass()"; +} + + +def LhloFuseLinalgPass : Pass<"lhlo-fuse-linalg", "FuncOp"> { + let summary = "Greedily fuse linalg ops obtained after LHLO lowering."; + let constructor = "createLhloFuseLinalgPass()"; + let options = [ + Option<"use_parallel_loops_", "use-parallel-loops", "bool", + /*default=*/"false", "Tiles GenericOp consumer to parallel loops before linalg fusion">, + ListOption<"tile_sizes_", "tile-sizes", "unsigned", + "Faster memory space number to promote fusion buffers to", + "llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated">, + ]; +} + + +def LhloLegalizeToAffinePass : Pass<"lhlo-legalize-to-affine", "FuncOp"> { + let summary = "Legalize from LHLO dialect to affine dialect."; + let constructor = "createLhloLegalizeToAffinePass()"; +} + + +def LhloLegalizeToGpuPass : Pass<"lhlo-legalize-to-gpu", "FuncOp"> { + let summary = "Legalize from LHLO dialect to GPU dialect."; + let constructor = "createLegalizeToGpuPass()"; +} + + +def TestLhloToLLVMPass : Pass<"test-lhlo-legalize-to-llvm", "FuncOp"> { + let summary = "Legalize from LHLO dialect to LLVM."; + let constructor = "createTestLhloToLLVMPass()"; +} + + +def LhloLegalizeToParallelLoopsPass : Pass<"lhlo-legalize-to-parallel-loops", "FuncOp"> { + let summary = "Legalize from LHLO dialect to parallel loops."; + let constructor = "createLegalizeLhloToParallelLoopsPass()"; +} + diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/map_hlo_to_lhlo_op.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/map_hlo_to_lhlo_op.h index a0246f93180..c51bcfcfe89 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/map_hlo_to_lhlo_op.h +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/map_hlo_to_lhlo_op.h @@ -18,8 +18,8 @@ limitations under the License. #include -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" namespace mlir { namespace mhlo { diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/map_lmhlo_to_scalar_op.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/map_lmhlo_to_scalar_op.h index 5d2bffcec2a..2bb5ab2888d 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/map_lmhlo_to_scalar_op.h +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/map_lmhlo_to_scalar_op.h @@ -18,10 +18,10 @@ limitations under the License. #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/map_hlo_to_lhlo_op.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/transforms/map_hlo_to_lhlo_op.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" namespace mlir { namespace lmhlo { diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/mhlo_passes.td b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/mhlo_passes.td new file mode 100644 index 00000000000..fa3bde24df1 --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/mhlo_passes.td @@ -0,0 +1,108 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +include "mlir/Pass/PassBase.td" + +def TestChloLegalizeToHloPass : Pass<"mhlo-test-chlo-legalize-to-hlo", "FuncOp"> { + let summary = "Test pass for applying chlo -> hlo legalization patterns."; + let constructor = "createTestChloLegalizeToHloPass()"; +} + +def HloLegalizeToLhloPass : Pass<"hlo-legalize-to-lhlo", "ModuleOp"> { + let summary = "Legalize from HLO dialect to LHLO dialect."; + let constructor = "createLegalizeToLhloPass()"; +} + +def LegalizeControlFlowPass : Pass<"mhlo-legalize-control-flow", "FuncOp"> { + let summary = "Legalize from MHLO control flow to CFG control flow."; + let constructor = "createLegalizeControlFlowPass()"; +} + +def LegalizeGatherToTorchIndexSelectPass : Pass<"mhlo-legalize-gather-to-torch-index-select", "FuncOp"> { + let summary = "Legalizes gathers to a torch index select."; + let constructor = "createLegalizeGatherToTorchIndexSelectPass()"; +} + + +def LegalizeTanhToApproximationPass : Pass<"mhlo-legalize-tanh-to-approximation", "FuncOp"> { + let summary = "Legalize tanh from standard dialect to an approximation."; + let constructor = "createLegalizeTanhToApproximationPass()"; +} + + +def HloLegalizeToLinalgPass : Pass<"hlo-legalize-to-linalg", "FuncOp"> { + let summary = "Legalize from HLO dialect to Linalg dialect."; + let constructor = "createLegalizeHloToLinalgPass()"; +} + + +def LegalizeToStandardPass : Pass<"mhlo-legalize-to-std", "FuncOp"> { + let summary = "Legalize from MHLO dialect to standard dialect."; + let constructor = "createLegalizeToStdPass()"; +} + +def LowerComplexPass : Pass<"mhlo-test-lower-complex", "FuncOp"> { + let summary = "Lower complex operations into non-complex operations."; + let constructor = "createLowerComplexPass()"; +} + + +def LegalizeGeneralDotPass : Pass<"mhlo-test-lower-general-dot", "FuncOp"> { + let summary = "Tests lowering general dot to a non-batched dot when possible."; + let constructor = "createLegalizeGeneralDotPass()"; +} + + +def TestMaterializeBroadcastsPass : Pass<"mhlo-test-materialize-broadcasts", "FuncOp"> { + let summary = "Test pass for materializing 'broadcast_dimensions' attributes."; + let constructor = "createTestMaterializeBroadcastsPass()"; +} + + +def MhloFusionPass : Pass<"mhlo-fusion", "FuncOp"> { + let summary = "Fuse mhlo ops to kLoop/kInput fusion patterns."; + let constructor = "createMhloFusionPass()"; +} + + +def OptimizeMhloPass : Pass<"mhlo-test-optimize", "FuncOp"> { + let summary = "Run optional HLO optimizations."; + let constructor = "createOptimizeMhloPass()"; +} + + +def SinkConstantsToControlFlowPass : Pass<"mhlo-sink-constants-to-control-flow", "FuncOp"> { + let summary = "Sink constants implicitly captured in control flow regions. This " + "is necessary to export to XLA."; + let constructor = "createSinkConstantsToControlFlowPass()"; +} + + +def TestInferShapedTypeMethodsPass : Pass<"mhlo-test-infer-shaped-type-methods", "FuncOp"> { + let summary = "Uses test ops to invoke InferShapedTypeOpInterface methods."; + let constructor = "createTestInferShapedTypeMethodsPass()"; +} + + +def TransformUnrankedHloPass : Pass<"transform-unranked-hlo", "FuncOp"> { + let summary = "Realize element-wise operations on ranked tensors where possible."; + let constructor = "createTransformUnrankedHloPass()"; +} + + +def TestUnfuseBatchNormPass : Pass<"mhlo-test-unfuse-batch-norm", "FuncOp"> { + let summary = "Test pass for materializing 'broadcast_dimensions' attributes."; + let constructor = "createTestUnfuseBatchNormPass()"; +} diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/passes.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/passes.h index 9ea39e95fef..efa116f3f0d 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/passes.h +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/passes.h @@ -23,6 +23,7 @@ limitations under the License. namespace mlir { class FuncOp; +class FunctionPass; class ModuleOp; class Operation; template @@ -58,18 +59,26 @@ std::unique_ptr> createSinkConstantsToControlFlowPass(); // fuse mhlo ops to kLoop/kInput fusion patterns std::unique_ptr> createMhloFusionPass(); +/// Lowers the standard TanhOp to an approximation that does not use intrinsics. +std::unique_ptr> createLegalizeTanhToApproximationPass(); + +std::unique_ptr createOptimizeMhloPass(); +std::unique_ptr createLowerComplexPass(); +std::unique_ptr<::mlir::Pass> createLegalizeGeneralDotPass(); +std::unique_ptr createLegalizeGatherToTorchIndexSelectPass(); + } // namespace mhlo namespace lmhlo { // Lowers from LHLO dialect to Affine dialect. -std::unique_ptr> createLegalizeToAffinePass(); +std::unique_ptr> createLhloLegalizeToAffinePass(); // Lowers from LHLO dialect to Linalg dialect. std::unique_ptr> createLegalizeLhloToLinalgPass(); // Lowers from LHLO dialect to GPU dialect. -std::unique_ptr> createLegalizeToGpuPass(); +std::unique_ptr createLegalizeToGpuPass(); // Fuses linalg ops obtained after LHLO lowering. To enable fusion, // operations are first tiled. @@ -80,7 +89,7 @@ std::unique_ptr> createLegalizeToGpuPass(); // 'tile_sizes' provides the tile sizes to use for tiling. If the linalg // operation has more dimensions than tile sizes provided, 1 is used as // default. -std::unique_ptr> createLhloFuseLinalg( +std::unique_ptr createLhloFuseLinalgPass( bool use_parallel_loops = false, llvm::ArrayRef tile_sizes = {}); // Removes unnecessary LHLO copies which copy from the allocated buffers to the @@ -94,12 +103,6 @@ std::unique_ptr> createLegalizeLhloToParallelLoopsPass(); } // namespace lmhlo -namespace hlo { - -/// Lowers the standard TanhOp to an approximation that does not use intrinsics. -std::unique_ptr> createLegalizeTanhToApproximationPass(); - -} // namespace hlo } // namespace mlir #endif // TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_MHLO_TRANSFORMS_PASSES_H_ diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/register_passes.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/register_passes.h new file mode 100644 index 00000000000..5c862d83fee --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/register_passes.h @@ -0,0 +1,49 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef MLIR_HLO_DIALECT_MHLO_TRANSFORMS_REGISTER_PASSES_H_ +#define MLIR_HLO_DIALECT_MHLO_TRANSFORMS_REGISTER_PASSES_H_ + +#include "mlir-hlo/Dialect/mhlo/transforms/passes.h" +#include "mlir/Pass/Pass.h" + +namespace mlir { +namespace mhlo { + +std::unique_ptr createTestChloLegalizeToHloPass(); +std::unique_ptr createTestInferShapedTypeMethodsPass(); +std::unique_ptr createTestMaterializeBroadcastsPass(); +std::unique_ptr createTestUnfuseBatchNormPass(); + +inline void registerAllMhloPasses() { +#define GEN_PASS_REGISTRATION +#include "mlir-hlo/Dialect/mhlo/transforms/mhlo_passes.h.inc" +} + +} // namespace mhlo + +namespace lmhlo { + +std::unique_ptr createTestLhloToLLVMPass(); + +inline void registerAllLmhloPasses() { +#define GEN_PASS_REGISTRATION +#include "mlir-hlo/Dialect/mhlo/transforms/lmhlo_passes.h.inc" +} + +} // namespace lmhlo +} // namespace mlir + +#endif // MLIR_HLO_DIALECT_MHLO_TRANSFORMS_REGISTER_PASSES_H_ diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h index ddc6417b9ec..e5ca4f727a3 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h @@ -18,9 +18,9 @@ limitations under the License. #include -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/Transforms/DialectConversion.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Transforms/DialectConversion.h" namespace mlir { class LLVMTypeConverter; @@ -80,6 +80,11 @@ void PopulateTransformUnrankedHloPatterns(MLIRContext *context, void PopulateUnfuseBatchNormPatterns(MLIRContext *context, OwningRewritePatternList *patterns); +// Populates a pattern that translates the standard TanhOp to an approximation +// that does not use intrinsics. +void PopulateTanhToApproximationPatterns(MLIRContext *context, + OwningRewritePatternList *patterns); + } // namespace mhlo namespace lmhlo { @@ -100,14 +105,6 @@ void PopulateLegalizeChloToHloPatterns(MLIRContext *context, } // namespace chlo -namespace hlo { - -// Populates a pattern that translates the standard TanhOp to an approximation -// that does not use intrinsics. -void PopulateTanhToApproximationPatterns(MLIRContext *context, - OwningRewritePatternList *patterns); - -} // namespace hlo } // namespace mlir #endif // TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_MHLO_TRANSFORMS_REWRITERS_H_ diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h index 3be7d42cc25..1e2404299b2 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h @@ -19,12 +19,12 @@ limitations under the License. // Utilities relating to implementing HLO broadcasting. // Note: This file should not depend on any non-MLIR TensorFlow libraries. -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/Builders.h" // from @llvm-project -#include "mlir/IR/Location.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/Interfaces/InferTypeOpInterface.h" // from @llvm-project -#include "mlir/Support/LLVM.h" // from @llvm-project +#include "mlir/IR/Attributes.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/Interfaces/InferTypeOpInterface.h" +#include "mlir/Support/LLVM.h" namespace mlir { namespace hlo { diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/convert_op_folder.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/convert_op_folder.h index a63df336d8f..4cf74385843 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/convert_op_folder.h +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/convert_op_folder.h @@ -16,8 +16,8 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_UTILS_CONVERT_OP_FOLDER_H_ #define TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_UTILS_CONVERT_OP_FOLDER_H_ -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "mlir/IR/Attributes.h" +#include "mlir/IR/StandardTypes.h" namespace mlir { namespace hlo { diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/hlo_utils.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/hlo_utils.h index b31ba231acd..1e335ae6b82 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/hlo_utils.h +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/hlo_utils.h @@ -16,11 +16,11 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_UTILS_HLO_UTILS_H_ #define TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_UTILS_HLO_UTILS_H_ -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/Builders.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/IR/TypeUtilities.h" // from @llvm-project +#include "mlir/IR/Attributes.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/IR/TypeUtilities.h" namespace mlir { namespace hlo { diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc index c6c193a9d89..99ed8bcb849 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc @@ -13,14 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/chlo_ops.h" -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/Builders.h" // from @llvm-project -#include "mlir/IR/Diagnostics.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/IR/TypeUtilities.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h" +#include "mlir-hlo/utils/broadcast_utils.h" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/Diagnostics.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/IR/TypeUtilities.h" namespace mlir { namespace chlo { @@ -260,7 +260,7 @@ BROADCAST_BINARY_OP_DEFS(BroadcastXorOp); #undef BROADCAST_BINARY_OP_DEFS #define GET_OP_CLASSES -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.cc.inc" +#include "mlir-hlo/Dialect/mhlo/IR/chlo_ops.cc.inc" //===----------------------------------------------------------------------===// // chlo Dialect Constructor @@ -270,7 +270,7 @@ HloClientDialect::HloClientDialect(MLIRContext* context) : Dialect(getDialectNamespace(), context) { addOperations< #define GET_OP_LIST -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.cc.inc" +#include "mlir-hlo/Dialect/mhlo/IR/chlo_ops.cc.inc" >(); } diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/dialect_registration.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/dialect_registration.cc index f4df946d11a..9d1c354690a 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/dialect_registration.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/dialect_registration.cc @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/chlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" // Static initialization for *HLO dialects registration. static mlir::DialectRegistration mhlo_ops; diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc index fe14e40b3eb..69b01009a0d 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc @@ -15,7 +15,7 @@ limitations under the License. // This file defines the operations used in the MHLO dialect. -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" #include #include @@ -24,7 +24,6 @@ limitations under the License. #include #include -#include "absl/container/flat_hash_set.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" @@ -35,33 +34,33 @@ limitations under the License. #include "llvm/Support/Casting.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" -#include "mlir/Dialect/Shape/IR/Shape.h" // from @llvm-project -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/Builders.h" // from @llvm-project -#include "mlir/IR/Dialect.h" // from @llvm-project -#include "mlir/IR/Location.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/Matchers.h" // from @llvm-project -#include "mlir/IR/OpDefinition.h" // from @llvm-project -#include "mlir/IR/OpImplementation.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/IR/OperationSupport.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/IR/TypeUtilities.h" // from @llvm-project -#include "mlir/IR/Types.h" // from @llvm-project -#include "mlir/IR/Value.h" // from @llvm-project -#include "mlir/Support/LLVM.h" // from @llvm-project -#include "mlir/Support/LogicalResult.h" // from @llvm-project -#include "mlir/Transforms/InliningUtils.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h.inc" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/convert_op_folder.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/hlo_utils.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h.inc" +#include "mlir-hlo/utils/convert_op_folder.h" +#include "mlir-hlo/utils/hlo_utils.h" +#include "mlir/Dialect/Shape/IR/Shape.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/Dialect.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/OpImplementation.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/OperationSupport.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/IR/TypeUtilities.h" +#include "mlir/IR/Types.h" +#include "mlir/IR/Value.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/Transforms/InliningUtils.h" namespace mlir { -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_patterns.cc.inc" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_structs.cc.inc" +#include "hlo_patterns.cc.inc" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops_structs.cc.inc" namespace mhlo { Operation* MhloDialect::materializeConstant(OpBuilder& builder, Attribute value, @@ -106,7 +105,7 @@ DenseIntElementsAttr BuildSliceLimits(DenseIntElementsAttr start_indices, return GetI64ElementsAttr(slice_limits, builder); } -#include "tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/generated_canonicalize.inc" +#include "mhlo_canonicalize.inc" } // namespace //===----------------------------------------------------------------------===// @@ -375,8 +374,8 @@ static LogicalResult Verify(CollectivePermuteOp op) { << "expect source_target_pairs attribute of shape (N, 2), but got (" << type.getShape() << ")"; // Check source target pairs for duplicate sources or targets - absl::flat_hash_set sources; - absl::flat_hash_set targets; + llvm::DenseSet sources; + llvm::DenseSet targets; for (auto i = op.source_target_pairs().begin(), e = op.source_target_pairs().end(); i != e; ++i) { @@ -2123,7 +2122,7 @@ void CompareOp::build(OpBuilder& builder, OperationState& result, Value lhs, } #define GET_OP_CLASSES -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.cc.inc" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.cc.inc" //===----------------------------------------------------------------------===// // mhlo Dialect Interfaces @@ -2154,7 +2153,7 @@ MhloDialect::MhloDialect(MLIRContext* context) : Dialect(getDialectNamespace(), context) { addOperations< #define GET_OP_LIST -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.cc.inc" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.cc.inc" >(); addInterfaces(); addTypes(); diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_patterns.td b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_patterns.td index dab49740b4b..b8b6cb80fba 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_patterns.td +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_patterns.td @@ -16,7 +16,7 @@ limitations under the License. // Canonicalization patterns for the MHLO dialect. include "mlir/Dialect/Shape/IR/ShapeOps.td" -include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td" +include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.td" def EqualBinaryOperands : Constraint>; diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/infer_fusibility_op_interface.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/infer_fusibility_op_interface.cc index eaa3414b36a..e93a6cfce3d 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/infer_fusibility_op_interface.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/infer_fusibility_op_interface.cc @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h" +#include "mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h" namespace mlir { -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.cc.inc" +#include "mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.cpp.inc" } // namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/init.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/init.cc new file mode 100644 index 00000000000..9fffeae1cc5 --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/init.cc @@ -0,0 +1,33 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mlir-hlo/Dialect/mhlo/IR/chlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/register.h" + +// Static initialization for *HLO dialects registration. + +void mlir::mhlo::registerAllDialects() { + static bool init_once = []() { + registerDialect(); + registerDialect(); + registerDialect(); + return true; + }(); + (void)init_once; + + // Dependent dialects +} diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/lhlo_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/lhlo_ops.cc index bd0dc224ccc..bbb463cd1a9 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/lhlo_ops.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/lhlo_ops.cc @@ -15,7 +15,7 @@ limitations under the License. // This file defines the operations used in the LMHLO dialect. -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" #include #include @@ -28,31 +28,31 @@ limitations under the License. #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/FormatVariadic.h" -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/Builders.h" // from @llvm-project -#include "mlir/IR/Dialect.h" // from @llvm-project -#include "mlir/IR/Location.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/OpDefinition.h" // from @llvm-project -#include "mlir/IR/OpImplementation.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/IR/OperationSupport.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/IR/TypeUtilities.h" // from @llvm-project -#include "mlir/IR/Types.h" // from @llvm-project -#include "mlir/IR/Value.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h.inc" +#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h.inc" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/Dialect.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/OpImplementation.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/OperationSupport.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/IR/TypeUtilities.h" +#include "mlir/IR/Types.h" +#include "mlir/IR/Value.h" namespace mlir { -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_structs.cc.inc" +#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops_structs.cc.inc" namespace lmhlo { LmhloDialect::LmhloDialect(MLIRContext *context) : Dialect(getDialectNamespace(), context) { addOperations< #define GET_OP_LIST -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.cc.inc" +#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.cc.inc" >(); } @@ -127,7 +127,7 @@ static LogicalResult Verify(ReshapeMemRefCastOp op) { } #define GET_OP_CLASSES -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.cc.inc" +#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.cc.inc" // TODO(cheshire): Support folding, reuse code from hlo_ops.cc. diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/canonicalize.td b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/mhlo_canonicalize.td similarity index 100% rename from tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/canonicalize.td rename to tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/mhlo_canonicalize.td diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc index d86d01df3b3..2a8482b813e 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc @@ -13,19 +13,19 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "mlir/Dialect/SCF/SCF.h" // from @llvm-project -#include "mlir/Dialect/Shape/IR/Shape.h" // from @llvm-project -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/OperationSupport.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/Transforms/DialectConversion.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h" +#include "mlir-hlo/Dialect/mhlo/IR/chlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir-hlo/utils/broadcast_utils.h" +#include "mlir/Dialect/SCF/SCF.h" +#include "mlir/Dialect/Shape/IR/Shape.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/OperationSupport.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/Transforms/DialectConversion.h" namespace mlir { namespace chlo { diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo_pass.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo_pass.cc index 89aa9bad997..50cd6df5c99 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo_pass.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo_pass.cc @@ -13,16 +13,17 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "mlir/Dialect/SCF/SCF.h" // from @llvm-project -#include "mlir/Dialect/Shape/IR/Shape.h" // from @llvm-project -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir-hlo/Dialect/mhlo/IR/chlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/transforms/passes.h" +#include "mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir/Dialect/SCF/SCF.h" +#include "mlir/Dialect/Shape/IR/Shape.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Pass/Pass.h" namespace mlir { -namespace chlo { +namespace mhlo { namespace { @@ -32,7 +33,7 @@ struct TestChloLegalizeToHloPass ConversionTarget conversionTarget(getContext()); OwningRewritePatternList conversionPatterns; - conversionTarget.addIllegalDialect(); + conversionTarget.addIllegalDialect(); // Consider the mhlo dialect legal for tests. conversionTarget.addLegalDialect(); // The conversion uses helpers from the Standard dialect. @@ -40,7 +41,7 @@ struct TestChloLegalizeToHloPass conversionTarget.addLegalDialect(); conversionTarget.addLegalDialect(); - PopulateLegalizeChloToHloPatterns(&getContext(), &conversionPatterns); + chlo::PopulateLegalizeChloToHloPatterns(&getContext(), &conversionPatterns); if (failed(applyPartialConversion(getFunction(), conversionTarget, conversionPatterns))) { @@ -51,9 +52,10 @@ struct TestChloLegalizeToHloPass } // namespace -} // namespace chlo +std::unique_ptr createTestChloLegalizeToHloPass() { + return std::make_unique(); +} + +} // namespace mhlo } // namespace mlir -static mlir::PassRegistration pass( - "mhlo-test-chlo-legalize-to-hlo", - "Test pass for applying chlo -> hlo legalization patterns"); diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/hlo_legalize_to_lhlo.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/hlo_legalize_to_lhlo.cc index 108689c28d9..a8c3ad17ebb 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/hlo_legalize_to_lhlo.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/hlo_legalize_to_lhlo.cc @@ -15,26 +15,25 @@ limitations under the License. // This file implements logic for lowering HLO dialect to LHLO dialect. -#include "absl/memory/memory.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/AffineMap.h" // from @llvm-project -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/BlockAndValueMapping.h" // from @llvm-project -#include "mlir/IR/Builders.h" // from @llvm-project -#include "mlir/IR/Function.h" // from @llvm-project -#include "mlir/IR/Location.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "mlir/Transforms/BufferPlacement.h" // from @llvm-project -#include "mlir/Transforms/DialectConversion.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/map_hlo_to_lhlo_op.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/passes.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/transforms/map_hlo_to_lhlo_op.h" +#include "mlir-hlo/Dialect/mhlo/transforms/passes.h" +#include "mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/AffineMap.h" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/BlockAndValueMapping.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/Function.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/BufferPlacement.h" +#include "mlir/Transforms/DialectConversion.h" namespace mlir { namespace mhlo { @@ -511,11 +510,8 @@ void populateHLOToLHLOConversionPattern( std::unique_ptr> createLegalizeToLhloPass( bool results_escape_function) { - return absl::make_unique(results_escape_function); + return std::make_unique(results_escape_function); } -static PassRegistration legalize_pass( - "hlo-legalize-to-lhlo", "Legalize from HLO dialect to LHLO dialect"); - } // namespace mhlo } // namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_control_flow.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_control_flow.cc index 440df7ec23f..b6e23a6b131 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_control_flow.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_control_flow.cc @@ -18,27 +18,27 @@ limitations under the License. #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Casting.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/Block.h" // from @llvm-project -#include "mlir/IR/BlockAndValueMapping.h" // from @llvm-project -#include "mlir/IR/Builders.h" // from @llvm-project -#include "mlir/IR/Function.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/IR/TypeUtilities.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "mlir/Pass/PassRegistry.h" // from @llvm-project -#include "mlir/Support/LogicalResult.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/passes.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/transforms/passes.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Block.h" +#include "mlir/IR/BlockAndValueMapping.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/Function.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/IR/TypeUtilities.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassRegistry.h" +#include "mlir/Support/LogicalResult.h" using mlir::PassRegistration; namespace mlir { namespace mhlo { namespace { -struct LegalizeControlFlow - : public mlir::PassWrapper { +struct LegalizeControlFlowPass + : public mlir::PassWrapper { // Perform the lowering to MLIR control flow. void runOnFunction() override; }; @@ -206,7 +206,7 @@ LogicalResult LowerWhileOp(mlir::mhlo::WhileOp while_op) { return success(); } -void LegalizeControlFlow::runOnFunction() { +void LegalizeControlFlowPass::runOnFunction() { auto func = getFunction(); llvm::SmallVector if_ops; func.walk([&](IfOp op) { if_ops.push_back(op); }); @@ -228,9 +228,5 @@ void LegalizeControlFlow::runOnFunction() { std::unique_ptr> mlir::mhlo::createLegalizeControlFlowPass() { - return std::make_unique(); + return std::make_unique(); } - -static PassRegistration legalize_cf_pass( - "mhlo-legalize-control-flow", - "Legalize from MHLO control flow to CFG control flow"); diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_gather_to_torch_index_select.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_gather_to_torch_index_select.cc index 391829865c6..59cd3381133 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_gather_to_torch_index_select.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_gather_to_torch_index_select.cc @@ -13,13 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "absl/memory/memory.h" -#include "mlir/IR/Function.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/passes.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/transforms/passes.h" +#include "mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir/IR/Function.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/Pass.h" namespace mlir { @@ -128,8 +127,8 @@ struct GatherIsTorchIndexSelect : public OpRewritePattern { } }; -struct LegalizeGatherToTorchIndexSelect - : public PassWrapper { +struct LegalizeGatherToTorchIndexSelectPass + : public PassWrapper { /// Perform the lowering of standard dialect operations to approximations. void runOnFunction() override { OwningRewritePatternList patterns; @@ -144,9 +143,9 @@ void PopulateGatherToTorchIndexSelectPatterns( patterns->insert(context); } -static PassRegistration legalize_hlo_pass( - "mhlo-legalize-gather-to-torch-index-select", - "Legalizes gathers to a torch index select."); +std::unique_ptr createLegalizeGatherToTorchIndexSelectPass() { + return std::make_unique(); +} } // namespace mhlo } // namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_tanh_to_approximation.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_tanh_to_approximation.cc index 1890646160e..57c494f536b 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_tanh_to_approximation.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_tanh_to_approximation.cc @@ -16,15 +16,15 @@ limitations under the License. // This file implements logic for lowering the tanh standard ops to an // approximation. -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/Function.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/passes.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir-hlo/Dialect/mhlo/transforms/passes.h" +#include "mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Function.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/Pass.h" namespace mlir { -namespace hlo { +namespace mhlo { namespace { /// Emits the fast tanh approximation that is also used by XLA. @@ -126,8 +126,8 @@ class ApproximateTanhLowering : public OpRewritePattern { } }; -struct LegalizeTanhToApproximation - : public PassWrapper { +struct LegalizeTanhToApproximationPass + : public PassWrapper { /// Perform the lowering of standard dialect operations to approximations. void runOnFunction() override { OwningRewritePatternList patterns; @@ -140,7 +140,7 @@ struct LegalizeTanhToApproximation std::unique_ptr> createLegalizeTanhToApproximationPass() { - return std::make_unique(); + return std::make_unique(); } void PopulateTanhToApproximationPatterns(mlir::MLIRContext *context, @@ -148,9 +148,5 @@ void PopulateTanhToApproximationPatterns(mlir::MLIRContext *context, patterns->insert(context); } -static PassRegistration legalize_pass( - "mhlo-legalize-tanh-to-approximation", - "Legalize tanh from standard dialect to an approximation"); - -} // namespace hlo +} // namespace mhlo } // namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_linalg.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_linalg.cc index 223baf420f9..f47f2c2fbdc 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_linalg.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_linalg.cc @@ -15,26 +15,25 @@ limitations under the License. // This file implements logic for lowering HLO/LHLO dialect to Linalg dialect. -#include "absl/memory/memory.h" -#include "mlir/Dialect/Affine/IR/AffineOps.h" // from @llvm-project -#include "mlir/Dialect/Linalg/IR/LinalgOps.h" // from @llvm-project -#include "mlir/Dialect/Linalg/IR/LinalgTypes.h" // from @llvm-project -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/AffineExpr.h" // from @llvm-project -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/Builders.h" // from @llvm-project -#include "mlir/IR/Function.h" // from @llvm-project -#include "mlir/IR/Location.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "mlir/Transforms/DialectConversion.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/map_lmhlo_to_scalar_op.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/transforms/map_lmhlo_to_scalar_op.h" +#include "mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/Dialect/Linalg/IR/LinalgTypes.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/AffineExpr.h" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/Function.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/DialectConversion.h" namespace mlir { namespace { @@ -826,8 +825,8 @@ void populateLHLOToLinalgConversionPattern(MLIRContext* context, // indexing_maps = [#map0, #map0, #map0], // iterator_types = ["parallel", "parallel"], // } : (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> () -struct LhloLegalizeToLinalg - : public PassWrapper { +struct LhloLegalizeToLinalgPass + : public PassWrapper { void runOnFunction() override { OwningRewritePatternList patterns; ConversionTarget target(getContext()); @@ -842,8 +841,8 @@ struct LhloLegalizeToLinalg } }; -struct HloLegalizeToLinalg - : public PassWrapper { +struct HloLegalizeToLinalgPass + : public PassWrapper { void runOnFunction() override { OwningRewritePatternList patterns; ConversionTarget target(getContext()); @@ -861,11 +860,8 @@ struct HloLegalizeToLinalg namespace lmhlo { std::unique_ptr> createLegalizeLhloToLinalgPass() { - return absl::make_unique(); + return std::make_unique(); } - -static PassRegistration legalize_lhlo_pass( - "lhlo-legalize-to-linalg", "Legalize from LHLO dialect to Linalg dialect"); } // namespace lmhlo namespace mhlo { @@ -906,10 +902,7 @@ void populateHLOToLinalgConversionPattern(MLIRContext* context, } std::unique_ptr> createLegalizeHloToLinalgPass() { - return absl::make_unique(); + return std::make_unique(); } - -static PassRegistration legalize_hlo_pass( - "hlo-legalize-to-linalg", "Legalize from HLO dialect to Linalg dialect"); } // namespace mhlo } // namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_standard.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_standard.cc index c71aa1d0460..cc574e008d5 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_standard.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_standard.cc @@ -16,17 +16,17 @@ limitations under the License. // This file implements logic for lowering MHLO dialect to Standard dialect. #include "llvm/ADT/StringSwitch.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/Function.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/passes.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/transforms/passes.h" +#include "mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Function.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/Pass.h" namespace mlir { namespace { -#include "tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/generated_legalize_to_standard.inc" +#include "generated_legalize_to_standard.inc" } // end anonymous namespace namespace mhlo { namespace { @@ -176,15 +176,15 @@ class ConvertIotaOp : public OpRewritePattern { } // end anonymous namespace namespace { -struct LegalizeToStandard - : public PassWrapper { +struct LegalizeToStandardPass + : public PassWrapper { /// Perform the lowering to Standard dialect. void runOnFunction() override; }; } // end anonymous namespace std::unique_ptr> createLegalizeToStdPass() { - return std::make_unique(); + return std::make_unique(); } void PopulateMhloToStdPatterns(OwningRewritePatternList *patterns, @@ -194,14 +194,11 @@ void PopulateMhloToStdPatterns(OwningRewritePatternList *patterns, } /// Perform the lowering to standard dialect. -void LegalizeToStandard::runOnFunction() { +void LegalizeToStandardPass::runOnFunction() { OwningRewritePatternList patterns; mlir::mhlo::PopulateMhloToStdPatterns(&patterns, &getContext()); applyPatternsAndFoldGreedily(getFunction(), patterns); } -static PassRegistration legalize_pass( - "mhlo-legalize-to-std", "Legalize from MHLO dialect to standard dialect"); - } // end namespace mhlo } // end namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_standard_patterns.td b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_standard_patterns.td index 0e6fdf06701..ea67c052c5c 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_standard_patterns.td +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_standard_patterns.td @@ -17,7 +17,7 @@ limitations under the License. include "mlir/IR/OpBase.td" include "mlir/Dialect/StandardOps/IR/Ops.td" -include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td" +include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.td" //===----------------------------------------------------------------------===// // Nullary op patterns. diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_copy_removal.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_copy_removal.cc index d2607887482..7a4418466b5 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_copy_removal.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_copy_removal.cc @@ -15,12 +15,11 @@ limitations under the License. // This file implements a pass to remove redundant LHLO copy operations. -#include "absl/memory/memory.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/passes.h" +#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/transforms/passes.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Operation.h" +#include "mlir/Pass/Pass.h" namespace mlir { namespace lmhlo { @@ -30,7 +29,8 @@ namespace { // arguments. All uses of each buffer are replaced with the corresponding block // argument and the buffer is freed. Note that this pass only works in regions // with a single block. -struct LhloCopyRemoval : mlir::PassWrapper> { +struct LhloCopyRemovalPass + : mlir::PassWrapper> { void runOnOperation() override { llvm::SmallVector eraseList; auto operation = getOperation(); @@ -95,11 +95,8 @@ struct LhloCopyRemoval : mlir::PassWrapper> { } // namespace std::unique_ptr createLhloCopyRemovalPass() { - return absl::make_unique(); + return std::make_unique(); } -static PassRegistration copy_removal_pass( - "lhlo-copy-removal", "Removes redundant LHLO copy operations"); - } // namespace lmhlo } // namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_fuse_linalg.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_fuse_linalg.cc index d832b96bf7b..1467f015dc9 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_fuse_linalg.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_fuse_linalg.cc @@ -16,15 +16,14 @@ limitations under the License. // This file implements logic for fusing linalg ops obtained after LHLO // lowering. -#include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h" -#include "absl/memory/memory.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" -#include "mlir/Dialect/Linalg/Transforms/Transforms.h" // from @llvm-project -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "mlir/Transforms/FoldUtils.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/passes.h" +#include "mlir-hlo/Dialect/mhlo/transforms/passes.h" +#include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h" +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/FoldUtils.h" namespace mlir { namespace lmhlo { @@ -32,11 +31,13 @@ namespace { using linalg::LinalgOp; -class LhloFuseLinalg : public PassWrapper { +class LhloFuseLinalgPass + : public PassWrapper { public: - LhloFuseLinalg() = default; - LhloFuseLinalg(const LhloFuseLinalg&) {} - LhloFuseLinalg(bool use_parallel_loops, llvm::ArrayRef tile_sizes) { + LhloFuseLinalgPass() = default; + LhloFuseLinalgPass(const LhloFuseLinalgPass&) {} + LhloFuseLinalgPass(bool use_parallel_loops, + llvm::ArrayRef tile_sizes) { tile_sizes_ = tile_sizes; use_parallel_loops_.setValue(use_parallel_loops); } @@ -138,14 +139,10 @@ class LhloFuseLinalg : public PassWrapper { } // namespace -std::unique_ptr> createLhloFuseLinalg( +std::unique_ptr createLhloFuseLinalgPass( bool use_parallel_loops, ArrayRef tile_sizes) { - return absl::make_unique(use_parallel_loops, tile_sizes); + return std::make_unique(use_parallel_loops, tile_sizes); } -static PassRegistration legalize_pass( - "lhlo-fuse-linalg", - "Greedily fuse linalg ops obtained after LHLO lowering."); - } // namespace lmhlo } // namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_affine.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_affine.cc index a353472be4b..07891327775 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_affine.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_affine.cc @@ -15,17 +15,16 @@ limitations under the License. // This file implements logic for lowering LHLO dialect to Affine dialect. -#include "absl/memory/memory.h" -#include "mlir/Dialect/Affine/IR/AffineOps.h" // from @llvm-project -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/Location.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/map_lmhlo_to_scalar_op.h" +#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/transforms/map_lmhlo_to_scalar_op.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/Pass/Pass.h" namespace mlir { namespace lmhlo { @@ -138,8 +137,8 @@ void populateLHLOToAffineConversionPattern(MLIRContext* context, // clang-format on } -struct LhloLegalizeToAffine - : public PassWrapper { +struct LhloLegalizeToAffinePass + : public PassWrapper { void runOnFunction() override { OwningRewritePatternList patterns; auto func = getFunction(); @@ -150,12 +149,9 @@ struct LhloLegalizeToAffine } // namespace -std::unique_ptr> createLegalizeToAffinePass() { - return absl::make_unique(); +std::unique_ptr> createLhloLegalizeToAffinePass() { + return std::make_unique(); } -static PassRegistration legalize_pass( - "lhlo-legalize-to-affine", "Legalize from LHLO dialect to affine dialect"); - } // namespace lmhlo } // namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_gpu.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_gpu.cc index 0ff491a93c3..0d0b8b0ab6e 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_gpu.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_gpu.cc @@ -17,25 +17,24 @@ limitations under the License. #include -#include "absl/memory/memory.h" #include "llvm/ADT/ArrayRef.h" -#include "mlir/Dialect/GPU/GPUDialect.h" // from @llvm-project -#include "mlir/Dialect/Linalg/IR/LinalgOps.h" // from @llvm-project -#include "mlir/Dialect/SCF/SCF.h" // from @llvm-project -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/BlockAndValueMapping.h" // from @llvm-project -#include "mlir/IR/Builders.h" // from @llvm-project -#include "mlir/IR/Function.h" // from @llvm-project -#include "mlir/IR/Location.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "mlir/Transforms/DialectConversion.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/map_lmhlo_to_scalar_op.h" +#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/transforms/map_lmhlo_to_scalar_op.h" +#include "mlir/Dialect/GPU/GPUDialect.h" +#include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/Dialect/SCF/SCF.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/BlockAndValueMapping.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/Function.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/DialectConversion.h" namespace mlir { namespace lmhlo { @@ -168,7 +167,8 @@ class LhloReduceToGPULaunchConverter : public OpConversionPattern { }; }; -struct LhloLegalizeToGpu : public PassWrapper { +struct LhloLegalizeToGpuPass + : public PassWrapper { void runOnFunction() override { OwningRewritePatternList patterns; ConversionTarget target(getContext()); @@ -185,12 +185,9 @@ struct LhloLegalizeToGpu : public PassWrapper { } // namespace -std::unique_ptr> createLegalizeToGpuPass() { - return absl::make_unique(); +std::unique_ptr createLegalizeToGpuPass() { + return std::make_unique(); } -static PassRegistration legalize_pass( - "lhlo-legalize-to-gpu", "Legalize from LHLO dialect to GPU dialect"); - } // namespace lmhlo } // namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm.cc index dd3e2f8697d..35bbea7ccd4 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm.cc @@ -13,12 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" // from @llvm-project -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" // from @llvm-project -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/Transforms/DialectConversion.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" +#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/Transforms/DialectConversion.h" namespace mlir { namespace lmhlo { diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm_pass.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm_pass.cc index d6cda99a912..2ed0182319b 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm_pass.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm_pass.cc @@ -13,16 +13,16 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "mlir/Conversion/AffineToStandard/AffineToStandard.h" // from @llvm-project -#include "mlir/Conversion/SCFToStandard/SCFToStandard.h" // from @llvm-project -#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" // from @llvm-project -#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" // from @llvm-project -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" // from @llvm-project -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir/Conversion/AffineToStandard/AffineToStandard.h" +#include "mlir/Conversion/SCFToStandard/SCFToStandard.h" +#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" +#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/Pass/Pass.h" namespace mlir { namespace lmhlo { @@ -57,8 +57,9 @@ class TestLhloToLLVMPass } // namespace -static PassRegistration legalize_lhlo_pass( - "test-lhlo-legalize-to-llvm", "Legalize from LHLO dialect to LLVM."); +std::unique_ptr createTestLhloToLLVMPass() { + return std::make_unique(); +} } // namespace lmhlo } // namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_parallel_loops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_parallel_loops.cc index 4255d87d48e..19f47d08c0d 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_parallel_loops.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_parallel_loops.cc @@ -13,17 +13,16 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "absl/memory/memory.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "mlir/Dialect/Linalg/IR/LinalgOps.h" // from @llvm-project -#include "mlir/Dialect/SCF/SCF.h" // from @llvm-project -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "mlir/Transforms/DialectConversion.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" +#include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/Dialect/SCF/SCF.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/DialectConversion.h" namespace mlir { namespace lmhlo { @@ -690,8 +689,8 @@ class SelectAndScatterOpConverter } }; -struct LhloLegalizeToParallelLoops - : public PassWrapper { +struct LhloLegalizeToParallelLoopsPass + : public PassWrapper { void runOnFunction() override { auto func = getFunction(); @@ -715,16 +714,11 @@ struct LhloLegalizeToParallelLoops } } }; - } // namespace std::unique_ptr> createLegalizeLhloToParallelLoopsPass() { - return absl::make_unique(); + return std::make_unique(); } -static PassRegistration legalize_lhlo_pass( - "lhlo-legalize-to-parallel-loops", - "Legalize from LHLO dialect to parallel loops."); - } // namespace lmhlo } // namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lower_complex.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lower_complex.cc index 23d146afeee..9f7c946577d 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lower_complex.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lower_complex.cc @@ -23,17 +23,17 @@ limitations under the License. #include #include "llvm/ADT/STLExtras.h" -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/IR/TypeUtilities.h" // from @llvm-project -#include "mlir/IR/Types.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "mlir/Pass/PassRegistry.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/passes.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/hlo_utils.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/transforms/passes.h" +#include "mlir-hlo/utils/hlo_utils.h" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/TypeUtilities.h" +#include "mlir/IR/Types.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassRegistry.h" using mlir::FunctionPass; using mlir::OwningRewritePatternList; @@ -41,9 +41,9 @@ using mlir::PassRegistration; using mlir::PassWrapper; namespace { -class LowerComplex : public PassWrapper { +class LowerComplexPass : public PassWrapper { public: - explicit LowerComplex() : PassWrapper() {} + explicit LowerComplexPass() : PassWrapper() {} /// Performs the lowering to MHLO dialect. void runOnFunction() override; @@ -54,7 +54,7 @@ namespace mlir { namespace mhlo { namespace { -#include "tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/generated_lower_complex.inc" +#include "generated_lower_complex.inc" } // end anonymous namespace @@ -66,7 +66,7 @@ void PopulateComplexLoweringPatterns(MLIRContext* context, } // end namespace mlir // Lowers the complex operations that can be represented using other operations. -void LowerComplex::runOnFunction() { +void LowerComplexPass::runOnFunction() { // Add lowering patterns to the list. OwningRewritePatternList patterns; mlir::mhlo::PopulateComplexLoweringPatterns(&getContext(), &patterns); @@ -74,6 +74,6 @@ void LowerComplex::runOnFunction() { applyPatternsAndFoldGreedily(getFunction(), patterns); } -static PassRegistration pass( - "mhlo-test-lower-complex", - "Lower complex operations into non-complex operations"); +std::unique_ptr mlir::mhlo::createLowerComplexPass() { + return std::make_unique(); +} diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lower_complex_patterns.td b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lower_complex_patterns.td index eadfebf2fac..2cc97c90d1c 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lower_complex_patterns.td +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lower_complex_patterns.td @@ -18,7 +18,7 @@ limitations under the License. include "mlir/IR/OpBase.td" include "mlir/Dialect/StandardOps/IR/Ops.td" -include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td" +include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.td" //===----------------------------------------------------------------------===// // Binary op patterns. diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lower_general_dot.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lower_general_dot.cc index 32a6ce42e5e..2bbd4691f95 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lower_general_dot.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lower_general_dot.cc @@ -17,18 +17,18 @@ limitations under the License. #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/Function.h" // from @llvm-project -#include "mlir/IR/Location.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/IR/TypeUtilities.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/passes.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/transforms/passes.h" +#include "mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/Function.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/IR/TypeUtilities.h" +#include "mlir/Pass/Pass.h" using mlir::DenseIntElementsAttr; using mlir::ElementsAttr; @@ -170,8 +170,8 @@ struct GeneralDotConvert : public OpRewritePattern { } }; -struct LegalizeGeneralDot - : public PassWrapper { +struct LegalizeGeneralDotPass + : public PassWrapper { /// Lower all general dots that can be represented as a non-batched matmul. void runOnFunction() override { OwningRewritePatternList patterns; @@ -187,6 +187,6 @@ void mlir::mhlo::PopulateGeneralDotOpLoweringPatterns( patterns->insert(ctx); } -static PassRegistration legalize_pass( - "mhlo-test-lower-general-dot", - "Tests lowering general dot to a non-batched dot when possible"); +std::unique_ptr<::mlir::Pass> mlir::mhlo::createLegalizeGeneralDotPass() { + return std::make_unique(); +} diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/materialize_broadcasts.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/materialize_broadcasts.cc index c2f88ad5e31..445cf2e79fe 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/materialize_broadcasts.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/materialize_broadcasts.cc @@ -15,12 +15,12 @@ limitations under the License. #include -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/Transforms/DialectConversion.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Transforms/DialectConversion.h" namespace mlir { namespace mhlo { diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/materialize_broadcasts_pass.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/materialize_broadcasts_pass.cc index 1d5d593bd43..3909f046007 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/materialize_broadcasts_pass.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/materialize_broadcasts_pass.cc @@ -13,14 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "mlir/Transforms/DialectConversion.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/DialectConversion.h" namespace mlir { namespace mhlo { @@ -50,9 +50,9 @@ struct TestMaterializeBroadcastsPass } // namespace +std::unique_ptr<::mlir::Pass> createTestMaterializeBroadcastsPass() { + return std::make_unique(); +} + } // namespace mhlo } // namespace mlir - -static mlir::PassRegistration pass( - "mhlo-test-materialize-broadcasts", - "Test pass for materializing 'broadcast_dimensions' attributes"); diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/mhlo_fusion.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/mhlo_fusion.cc index 91f9344b8c5..233d95a1a65 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/mhlo_fusion.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/mhlo_fusion.cc @@ -18,14 +18,14 @@ limitations under the License. #include #include +#include "llvm/ADT/EquivalenceClasses.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/utils/cycle_detector.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" // TF:llvm-project #include "mlir/IR/MLIRContext.h" // TF:llvm-project #include "mlir/IR/Matchers.h" #include "mlir/Pass/Pass.h" // TF:local_config_mlir #include "mlir/Transforms/RegionUtils.h" // TF:llvm-project -#include "llvm/ADT/EquivalenceClasses.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/cycle_detector.h" // This pass has similar functionality of the fusion pass in XLA stack. // However, unlike XLA, it targets the fully dynamic shape scenario. @@ -479,7 +479,7 @@ class FusionPlanner { EquivalenceClasses leader_for_node_; }; -struct MhloFusion : public mlir::PassWrapper { +struct MhloFusionPass : public mlir::PassWrapper { void runOnFunction() override { FuncOp func = getFunction(); if (!IsTargetFunc(func)) { @@ -568,12 +568,9 @@ struct MhloFusion : public mlir::PassWrapper { } // namespace -std::unique_ptr> createMhloFusion() { - return std::make_unique(); +std::unique_ptr> createMhloFusionPass() { + return std::make_unique(); } -static PassRegistration mhlo_fusion_pass( - "mhlo-fusion", "fuse mhlo ops to kLoop/kInput fusion patterns."); - } // namespace mhlo } // namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/optimize_mhlo.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/optimize_mhlo.cc index dfed951e19f..43de47086bf 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/optimize_mhlo.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/optimize_mhlo.cc @@ -22,17 +22,17 @@ limitations under the License. #include #include "llvm/ADT/STLExtras.h" -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/IR/TypeUtilities.h" // from @llvm-project -#include "mlir/IR/Types.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "mlir/Pass/PassRegistry.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/passes.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/hlo_utils.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/transforms/passes.h" +#include "mlir-hlo/utils/hlo_utils.h" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/TypeUtilities.h" +#include "mlir/IR/Types.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassRegistry.h" using mlir::OwningRewritePatternList; diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/optimize_mhlo_pass.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/optimize_mhlo_pass.cc index 3d1f29e0ca6..32a846e79ef 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/optimize_mhlo_pass.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/optimize_mhlo_pass.cc @@ -13,23 +13,24 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "mlir/Transforms/DialectConversion.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/transforms/passes.h" +#include "mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/DialectConversion.h" using mlir::FunctionPass; using mlir::PassRegistration; using mlir::PassWrapper; namespace { -class OptimizeMhlo : public PassWrapper { +class OptimizeMhloPass : public PassWrapper { public: - explicit OptimizeMhlo() : PassWrapper() {} + explicit OptimizeMhloPass() : PassWrapper() {} /// Performs the lowering to MHLO dialect. void runOnFunction() override; @@ -37,7 +38,7 @@ class OptimizeMhlo : public PassWrapper { } // end anonymous namespace // Lowers the complex operations that can be represented using other operations. -void OptimizeMhlo::runOnFunction() { +void OptimizeMhloPass::runOnFunction() { // Add lowering patterns to the list. mlir::OwningRewritePatternList patterns; mlir::mhlo::PopulateOptimizeMHLOPatterns(&getContext(), &patterns); @@ -45,5 +46,6 @@ void OptimizeMhlo::runOnFunction() { applyPatternsAndFoldGreedily(getFunction(), patterns); } -static PassRegistration pass("mhlo-test-optimize", - "Run optional HLO optimizations."); +std::unique_ptr mlir::mhlo::createOptimizeMhloPass() { + return std::make_unique(); +} diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/sink_constants_to_control_flow.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/sink_constants_to_control_flow.cc index b05918030e9..0f31e613afe 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/sink_constants_to_control_flow.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/sink_constants_to_control_flow.cc @@ -15,12 +15,12 @@ limitations under the License. #include "llvm/ADT/DenseMap.h" #include "llvm/Support/Casting.h" -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "mlir/Pass/PassManager.h" // from @llvm-project -#include "mlir/Support/LLVM.h" // from @llvm-project -#include "mlir/Transforms/RegionUtils.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir/IR/Operation.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Transforms/RegionUtils.h" namespace mlir { namespace mhlo { @@ -29,8 +29,8 @@ namespace { // A pass that sinks constants implicitly captured in control flow regions. This // is necessary to export to XLA. -class SinkConstantsToControlFlow - : public mlir::PassWrapper { +class SinkConstantsToControlFlowPass + : public mlir::PassWrapper { void runOnFunction() override { getFunction().walk([](Operation* op) { if (auto while_op = llvm::dyn_cast(op)) { @@ -70,15 +70,10 @@ class SinkConstantsToControlFlow } }; -static mlir::PassRegistration pass( - "mhlo-sink-constants-to-control-flow", - "Sink constants implicitly captured in control flow regions. This is " - "necessary to export to XLA."); - } // anonymous namespace std::unique_ptr> createSinkConstantsToControlFlowPass() { - return std::make_unique(); + return std::make_unique(); } } // namespace mhlo diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/test_infer_shaped_type_pass.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/test_infer_shaped_type_pass.cc index 184420bb8f7..35e5a184472 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/test_infer_shaped_type_pass.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/test_infer_shaped_type_pass.cc @@ -13,16 +13,16 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/Identifier.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/OperationSupport.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/Interfaces/InferTypeOpInterface.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project +#include "mlir/IR/Attributes.h" +#include "mlir/IR/Identifier.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/OperationSupport.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Interfaces/InferTypeOpInterface.h" +#include "mlir/Pass/Pass.h" namespace mlir { -namespace hlo { +namespace mhlo { namespace { struct InferReturnTypeComponentsPattern : public RewritePattern { @@ -92,9 +92,10 @@ struct TestInferShapedTypeMethodsPass }; } // namespace -} // namespace hlo -} // namespace mlir -static mlir::PassRegistration pass( - "mhlo-test-infer-shaped-type-methods", - "Uses test ops to invoke InferShapedTypeOpInterface methods"); +std::unique_ptr createTestInferShapedTypeMethodsPass() { + return std::make_unique(); +} + +} // namespace mhlo +} // namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/transform_unranked_hlo.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/transform_unranked_hlo.cc index a973569d913..8db5d849322 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/transform_unranked_hlo.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/transform_unranked_hlo.cc @@ -14,18 +14,17 @@ limitations under the License. ==============================================================================*/ -#include "absl/memory/memory.h" -#include "mlir/Dialect/Shape/IR/Shape.h" // from @llvm-project -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/Function.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "mlir/Transforms/DialectConversion.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir/Dialect/Shape/IR/Shape.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Function.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/DialectConversion.h" namespace mlir { namespace mhlo { @@ -204,9 +203,9 @@ void PopulateTransformUnrankedHloPatterns(MLIRContext *context, // clang-format on } -static PassRegistration transform_unranked_hlo_pass( - "transform-unranked-hlo", - "Realize element-wise operations on ranked tensors where possible"); +std::unique_ptr<::mlir::Pass> createTransformUnrankedHloPass() { + return std::make_unique(); +} } // namespace mhlo } // namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/unfuse_batch_norm.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/unfuse_batch_norm.cc index 09c9c61119e..1458e5f3d63 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/unfuse_batch_norm.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/unfuse_batch_norm.cc @@ -14,15 +14,15 @@ limitations under the License. ==============================================================================*/ #include "llvm/ADT/SmallVector.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/Builders.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/IR/Types.h" // from @llvm-project -#include "mlir/Transforms/DialectConversion.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/IR/Types.h" +#include "mlir/Transforms/DialectConversion.h" namespace mlir { namespace mhlo { diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/unfuse_batch_norm_pass.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/unfuse_batch_norm_pass.cc index c26d73f3306..f187a7470cf 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/unfuse_batch_norm_pass.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/unfuse_batch_norm_pass.cc @@ -13,14 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/MLIRContext.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "mlir/Transforms/DialectConversion.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/DialectConversion.h" namespace mlir { namespace mhlo { @@ -38,9 +38,9 @@ struct TestUnfuseBatchNormPass } // namespace +std::unique_ptr<::mlir::Pass> createTestUnfuseBatchNormPass() { + return std::make_unique(); +} + } // namespace mhlo } // namespace mlir - -static mlir::PassRegistration pass( - "mhlo-test-unfuse-batch-norm", - "Test pass for materializing 'broadcast_dimensions' attributes"); diff --git a/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc b/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc index e05ec3c3481..73111c02dbd 100644 --- a/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc +++ b/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc @@ -13,15 +13,15 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h" +#include "mlir-hlo/utils/broadcast_utils.h" #include #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallVector.h" -#include "mlir/Dialect/Shape/IR/Shape.h" // from @llvm-project -#include "mlir/IR/Diagnostics.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "mlir/Dialect/Shape/IR/Shape.h" +#include "mlir/IR/Diagnostics.h" +#include "mlir/IR/StandardTypes.h" namespace mlir { namespace hlo { diff --git a/tensorflow/compiler/mlir/hlo/lib/utils/convert_op_folder.cc b/tensorflow/compiler/mlir/hlo/lib/utils/convert_op_folder.cc index ea074c4907d..0751d2c626c 100644 --- a/tensorflow/compiler/mlir/hlo/lib/utils/convert_op_folder.cc +++ b/tensorflow/compiler/mlir/hlo/lib/utils/convert_op_folder.cc @@ -15,11 +15,11 @@ limitations under the License. // This file defines helpers useful when creating or manipulating lhlo/hlo. -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/convert_op_folder.h" +#include "mlir-hlo/utils/convert_op_folder.h" -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/IR/TypeUtilities.h" // from @llvm-project +#include "mlir/IR/Attributes.h" +#include "mlir/IR/StandardTypes.h" +#include "mlir/IR/TypeUtilities.h" namespace mlir { namespace hlo { diff --git a/tensorflow/compiler/mlir/hlo/lib/utils/cycle_detector.cc b/tensorflow/compiler/mlir/hlo/lib/utils/cycle_detector.cc index 6145391a608..0914460236d 100644 --- a/tensorflow/compiler/mlir/hlo/lib/utils/cycle_detector.cc +++ b/tensorflow/compiler/mlir/hlo/lib/utils/cycle_detector.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/cycle_detector.h" +#include "mlir-hlo/utils/cycle_detector.h" #include diff --git a/tensorflow/compiler/mlir/hlo/lib/utils/cycle_detector_test.cc b/tensorflow/compiler/mlir/hlo/lib/utils/cycle_detector_test.cc index 314bbd699c7..263321c17d1 100644 --- a/tensorflow/compiler/mlir/hlo/lib/utils/cycle_detector_test.cc +++ b/tensorflow/compiler/mlir/hlo/lib/utils/cycle_detector_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/cycle_detector.h" +#include "mlir-hlo/utils/cycle_detector.h" #include "tensorflow/compiler/xla/test.h" diff --git a/tensorflow/compiler/mlir/hlo/lib/utils/hlo_utils.cc b/tensorflow/compiler/mlir/hlo/lib/utils/hlo_utils.cc index 184d113fb9d..df2442cc4b6 100644 --- a/tensorflow/compiler/mlir/hlo/lib/utils/hlo_utils.cc +++ b/tensorflow/compiler/mlir/hlo/lib/utils/hlo_utils.cc @@ -13,11 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/hlo_utils.h" +#include "mlir-hlo/utils/hlo_utils.h" #include -#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/Attributes.h" namespace mlir { namespace hlo { diff --git a/tensorflow/compiler/mlir/hlo/tools/mlir-hlo-opt/mlir-hlo-opt.cpp b/tensorflow/compiler/mlir/hlo/tools/mlir-hlo-opt/mlir-hlo-opt.cpp new file mode 100644 index 00000000000..70fc21d6959 --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/tools/mlir-hlo-opt/mlir-hlo-opt.cpp @@ -0,0 +1,121 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/ToolOutputFile.h" +#include "mlir-hlo/Dialect/mhlo/IR/register.h" +#include "mlir-hlo/Dialect/mhlo/transforms/register_passes.h" +#include "mlir/IR/Dialect.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/InitAllDialects.h" +#include "mlir/InitAllPasses.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Support/FileUtilities.h" +#include "mlir/Support/MlirOptMain.h" + +// NOLINTNEXTLINE +static llvm::cl::opt inputFilename(llvm::cl::Positional, + llvm::cl::desc(""), + llvm::cl::init("-")); + +// NOLINTNEXTLINE +static llvm::cl::opt outputFilename( + "o", llvm::cl::desc("Output filename"), llvm::cl::value_desc("filename"), + llvm::cl::init("-")); + +// NOLINTNEXTLINE +static llvm::cl::opt splitInputFile( + "split-input-file", + llvm::cl::desc("Split the input file into pieces and process each " + "chunk independently"), + llvm::cl::init(false)); + +// NOLINTNEXTLINE +static llvm::cl::opt verifyDiagnostics( + "verify-diagnostics", + llvm::cl::desc("Check that emitted diagnostics match " + "expected-* lines on the corresponding line"), + llvm::cl::init(false)); + +// NOLINTNEXTLINE +static llvm::cl::opt verifyPasses( + "verify-each", + llvm::cl::desc("Run the verifier after each transformation pass"), + llvm::cl::init(true)); + +// NOLINTNEXTLINE +static llvm::cl::opt allowUnregisteredDialects( + "allow-unregistered-dialect", + llvm::cl::desc("Allow operation with no registered dialects"), + llvm::cl::init(false)); + +// NOLINTNEXTLINE +static llvm::cl::opt showDialects( + "show-dialects", llvm::cl::desc("Print the list of registered dialects"), + llvm::cl::init(false)); + +int main(int argc, char **argv) { + mlir::registerAllDialects(); + mlir::registerAllPasses(); + + mlir::mhlo::registerAllDialects(); + mlir::mhlo::registerAllMhloPasses(); + mlir::lmhlo::registerAllLmhloPasses(); + + llvm::InitLLVM y(argc, argv); + + // Register any pass manager command line options. + mlir::registerPassManagerCLOptions(); + mlir::PassPipelineCLParser passPipeline("", "Compiler passes to run"); + + // Parse pass names in main to ensure static initialization completed. + llvm::cl::ParseCommandLineOptions(argc, argv, + "MLIR modular optimizer driver\n"); + + if (showDialects) { + mlir::MLIRContext context; + llvm::outs() << "Registered Dialects:\n"; + for (mlir::Dialect *dialect : context.getRegisteredDialects()) { + llvm::outs() << dialect->getNamespace() << "\n"; + } + return 0; + } + + // Set up the input file. + std::string errorMessage; + auto file = mlir::openInputFile(inputFilename, &errorMessage); + if (!file) { + llvm::errs() << errorMessage << "\n"; + return 1; + } + + auto output = mlir::openOutputFile(outputFilename, &errorMessage); + if (!output) { + llvm::errs() << errorMessage << "\n"; + exit(1); + } + + if (failed(MlirOptMain(output->os(), std::move(file), passPipeline, + splitInputFile, verifyDiagnostics, verifyPasses, + allowUnregisteredDialects))) { + return 1; + } + // Keep the output file if the invocation of MlirOptMain was successful. + output->keep(); + return 0; +} diff --git a/tensorflow/compiler/mlir/xla/BUILD b/tensorflow/compiler/mlir/xla/BUILD index 55daec0395e..a6eb9f2fe1c 100644 --- a/tensorflow/compiler/mlir/xla/BUILD +++ b/tensorflow/compiler/mlir/xla/BUILD @@ -127,7 +127,7 @@ cc_library( ":hlo_utils", ":mlir_hlo_to_hlo", "//tensorflow/compiler/mlir/hlo", - "//tensorflow/compiler/mlir/hlo:hlo_dialect_registration", + "//tensorflow/compiler/mlir/hlo:hlo_dialect_force_registration", "//tensorflow/compiler/mlir/hlo:lhlo", "//tensorflow/compiler/xla:debug_options_flags", "//tensorflow/compiler/xla:statusor", @@ -230,7 +230,7 @@ cc_library( deps = [ ":type_to_shape", "//tensorflow/compiler/mlir/hlo", - "//tensorflow/compiler/mlir/hlo:hlo_dialect_registration", + "//tensorflow/compiler/mlir/hlo:hlo_dialect_force_registration", "//tensorflow/compiler/mlir/tensorflow:convert_type", "//tensorflow/compiler/mlir/tensorflow:error_util", "//tensorflow/compiler/tf2xla:common", @@ -376,7 +376,7 @@ cc_library( ":xla_legalize_tf_with_tf2xla", "//tensorflow/compiler/mlir/hlo", "//tensorflow/compiler/mlir/hlo:chlo_legalize_to_hlo", - "//tensorflow/compiler/mlir/hlo:hlo_dialect_registration", + "//tensorflow/compiler/mlir/hlo:hlo_dialect_force_registration", "//tensorflow/compiler/mlir/hlo:hlo_legalize_to_lhlo", "//tensorflow/compiler/mlir/hlo:legalize_control_flow", "//tensorflow/compiler/mlir/hlo:legalize_tanh_to_approximation", diff --git a/tensorflow/compiler/xla/service/mlir_gpu/BUILD b/tensorflow/compiler/xla/service/mlir_gpu/BUILD index 113c9764b40..2bcf5fa7dae 100644 --- a/tensorflow/compiler/xla/service/mlir_gpu/BUILD +++ b/tensorflow/compiler/xla/service/mlir_gpu/BUILD @@ -160,7 +160,7 @@ cc_library( hdrs = ["kernel_lowering.h"], deps = [ "//tensorflow/compiler/mlir/hlo", - "//tensorflow/compiler/mlir/hlo:hlo_dialect_registration", + "//tensorflow/compiler/mlir/hlo:hlo_dialect_force_registration", "//tensorflow/compiler/mlir/hlo:hlo_legalize_to_lhlo", "//tensorflow/compiler/mlir/hlo:legalize_tanh_to_approximation", "//tensorflow/compiler/mlir/hlo:legalize_to_linalg", diff --git a/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc b/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc index 648c44d9ac1..2e3fa00ca86 100644 --- a/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc +++ b/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc @@ -467,8 +467,8 @@ Status LowerLHLOToGPU(mlir::ModuleOp module, LowerLHLOToGPUOptions options) { // Transform LHLO operations to LinAlg. pm.addPass(::mlir::lmhlo::createLegalizeLhloToLinalgPass()); // Fuse linalg operations. - pm.addPass(::mlir::lmhlo::createLhloFuseLinalg(/*use_parallel_loops=*/true, - tiling_for_unrolling)); + pm.addPass(::mlir::lmhlo::createLhloFuseLinalgPass( + /*use_parallel_loops=*/true, tiling_for_unrolling)); // Legalize reduce operations directly to GPU dialect. pm.addPass(::mlir::lmhlo::createLegalizeToGpuPass()); // Transform the Linalg operations inside of the loop nest into parallel @@ -512,7 +512,7 @@ Status LowerLHLOToGPU(mlir::ModuleOp module, LowerLHLOToGPUOptions options) { // Approximate of requested. if (options.use_approximations) { pm.addNestedPass<::mlir::FuncOp>( - ::mlir::hlo::createLegalizeTanhToApproximationPass()); + ::mlir::mhlo::createLegalizeTanhToApproximationPass()); } // Move scalar operations into the launch to ensure smaller signatures. pm.addPass(absl::make_unique()); From 9dc7dc24685087e6f5d0806966cb00176449360e Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Tue, 28 Jul 2020 16:30:26 -0700 Subject: [PATCH 1546/2522] [tflite] Don't check for buffers on every subgraph. Buffers in the model are allocated globally, hence it makes sense to check for their presence only once (O(1)) instead of on every subgraph (O(n)). PiperOrigin-RevId: 323677724 Change-Id: I2da0c381093006828cc4c80f03dec8a917782861 --- tensorflow/lite/interpreter_builder.cc | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tensorflow/lite/interpreter_builder.cc b/tensorflow/lite/interpreter_builder.cc index 96871e740c0..07c5251fab3 100644 --- a/tensorflow/lite/interpreter_builder.cc +++ b/tensorflow/lite/interpreter_builder.cc @@ -616,7 +616,12 @@ TfLiteStatus InterpreterBuilder::operator()( auto* buffers = model_->buffers(); if (subgraphs->size() == 0) { - error_reporter_->Report("No subgraph in the model.\n"); + TF_LITE_REPORT_ERROR(error_reporter_, "No subgraph in the model.\n"); + return cleanup_and_error(); + } + + if (!buffers) { + TF_LITE_REPORT_ERROR(error_reporter_, "No buffers in the model.\n"); return cleanup_and_error(); } @@ -637,10 +642,10 @@ TfLiteStatus InterpreterBuilder::operator()( (*interpreter)->subgraph(subgraph_index); auto operators = subgraph->operators(); auto tensors = subgraph->tensors(); - if (!operators || !tensors || !buffers) { - error_reporter_->Report( - "Did not get operators, tensors, or buffers in subgraph %d.\n", - subgraph_index); + if (!operators || !tensors) { + TF_LITE_REPORT_ERROR(error_reporter_, + "Did not get operators or tensors in subgraph %d.\n", + subgraph_index); return cleanup_and_error(); } if (modified_subgraph->AddTensors(tensors->size()) != kTfLiteOk) { From 3cd34c55f5d45871808a76b87024ae7d36049a92 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 19:41:18 -0400 Subject: [PATCH 1547/2522] Update flatbuffer_import.cc --- tensorflow/compiler/mlir/lite/flatbuffer_import.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_import.cc b/tensorflow/compiler/mlir/lite/flatbuffer_import.cc index 719f3c6ca79..3c8bf26aa14 100644 --- a/tensorflow/compiler/mlir/lite/flatbuffer_import.cc +++ b/tensorflow/compiler/mlir/lite/flatbuffer_import.cc @@ -342,7 +342,7 @@ StatusOr ConvertFloatBuffer( values.reserve(elem_count); const char* data = reinterpret_cast(buffer.data()); - + for (int i = 0; i < elem_count; i++) { uint32_t bit_repr = llvm::support::endian::readNext Date: Tue, 28 Jul 2020 19:43:00 -0400 Subject: [PATCH 1548/2522] Update legalize_tf.cc --- tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc index 597c874ee04..297b1459fc5 100644 --- a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc @@ -691,6 +691,7 @@ void LegalizeTF::runOnFunction() { return; } } +} } // namespace From 42a9b7f7ae9f8951ec8ba26098ae63cd6182ff37 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Tue, 28 Jul 2020 17:01:25 -0700 Subject: [PATCH 1549/2522] [TF2XLA] Enable using MLIR bridge when TF_XLA_FLAGS=--tf_mlir_enable_mlir_bridge is on for tf.function(compile=True) PiperOrigin-RevId: 323683301 Change-Id: Ib1cfaec1bd27c3bf691820c616cdca1721aabe25 --- .../compiler/jit/xla_kernel_creator_util.cc | 48 ++++++++++--------- tensorflow/compiler/tests/BUILD | 8 ---- tensorflow/compiler/tf2xla/xla_compiler.cc | 16 ++++++- 3 files changed, 40 insertions(+), 32 deletions(-) diff --git a/tensorflow/compiler/jit/xla_kernel_creator_util.cc b/tensorflow/compiler/jit/xla_kernel_creator_util.cc index 3cc68f2a1a4..61c89d8a67a 100644 --- a/tensorflow/compiler/jit/xla_kernel_creator_util.cc +++ b/tensorflow/compiler/jit/xla_kernel_creator_util.cc @@ -80,31 +80,35 @@ Status CreateXlaKernel(FunctionLibraryRuntime* flr, const NodeDef& node_def, // Make sure that kernels have been registered on the JIT device. XlaOpRegistry::RegisterCompilationKernels(); - RecursiveCompilabilityChecker::UncompilableNodesMap uncompilable_nodes_map; - if (!IsCompilable(flr, node_def, &uncompilable_nodes_map)) { - std::vector - uncompilable_node_info; - for (const auto& it : uncompilable_nodes_map) { - for (const auto& info : it.second.second) { - uncompilable_node_info.emplace_back(info); + + // Only check for compilability if the MLIR bridge is not enabled. + if (!GetMlirCommonFlags()->tf_mlir_enable_mlir_bridge) { + RecursiveCompilabilityChecker::UncompilableNodesMap uncompilable_nodes_map; + if (!IsCompilable(flr, node_def, &uncompilable_nodes_map)) { + std::vector + uncompilable_node_info; + for (const auto& it : uncompilable_nodes_map) { + for (const auto& info : it.second.second) { + uncompilable_node_info.emplace_back(info); + } } - } - string message = absl::StrCat( - "Function invoked by the following node is not compilable: ", - SummarizeNodeDef(node_def, /*max_inputs_in_summary=*/10), ".\n"); - absl::StrAppend(&message, "Uncompilable nodes:"); - for (const auto& node_info : uncompilable_node_info) { - string node_message = - absl::StrCat("\n", node_info.name, ": ", - node_info.uncompilable_reason, "\n", "\tStacktrace:\n"); - for (const auto& stack_frame : node_info.stack_trace) { - absl::StrAppendFormat(&node_message, "\t\tNode: %s, function: %s\n", - stack_frame.name, stack_frame.function_name); + string message = absl::StrCat( + "Function invoked by the following node is not compilable: ", + SummarizeNodeDef(node_def, /*max_inputs_in_summary=*/10), ".\n"); + absl::StrAppend(&message, "Uncompilable nodes:"); + for (const auto& node_info : uncompilable_node_info) { + string node_message = absl::StrCat("\n", node_info.name, ": ", + node_info.uncompilable_reason, "\n", + "\tStacktrace:\n"); + for (const auto& stack_frame : node_info.stack_trace) { + absl::StrAppendFormat(&node_message, "\t\tNode: %s, function: %s\n", + stack_frame.name, stack_frame.function_name); + } + absl::StrAppend(&message, node_message); } - absl::StrAppend(&message, node_message); + VLOG(1) << message; + return errors::InvalidArgument(message); } - VLOG(1) << message; - return errors::InvalidArgument(message); } // Get function body, constant args, and resource args. diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index d9450cb6364..c2b5000647d 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -123,7 +123,6 @@ tf_xla_py_test( name = "adagrad_da_test", size = "small", srcs = ["adagrad_da_test.py"], - enable_mlir_bridge = True, python_version = "PY3", tags = [ "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip @@ -161,7 +160,6 @@ tf_xla_py_test( srcs = ["add_n_test.py"], # TensorList ops are not implemented in the on-demand compilation model yet. disabled_backends = ["cpu_ondemand"], - enable_mlir_bridge = True, python_version = "PY3", tags = [ "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip @@ -689,7 +687,6 @@ tf_xla_py_test( name = "fft_test", size = "medium", srcs = ["fft_test.py"], - enable_mlir_bridge = True, python_version = "PY3", shard_count = 6, tags = [ @@ -929,7 +926,6 @@ tf_xla_py_test( name = "pooling_ops_test", size = "medium", srcs = ["pooling_ops_test.py"], - enable_mlir_bridge = True, python_version = "PY3", shard_count = 20, tags = [ @@ -1243,7 +1239,6 @@ tf_xla_py_test( name = "stack_ops_test", size = "small", srcs = ["stack_ops_test.py"], - enable_mlir_bridge = True, python_version = "PY3", tags = [ "config-cuda-only", @@ -1304,7 +1299,6 @@ tf_xla_py_test( srcs = ["tensor_array_ops_test.py"], # TensorArray ops are not implemented in the on-demand compilation model yet. disabled_backends = ["cpu_ondemand"], - enable_mlir_bridge = True, python_version = "PY3", tags = [ "config-cuda-only", @@ -1333,7 +1327,6 @@ tf_xla_py_test( srcs = ["tensor_list_ops_test.py"], # TensorList ops are not implemented in the on-demand compilation model yet. disabled_backends = ["cpu_ondemand"], - enable_mlir_bridge = True, python_version = "PY3", tags = [ "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip @@ -1889,7 +1882,6 @@ tf_xla_py_test( name = "special_math_test", size = "medium", srcs = ["special_math_test.py"], - enable_mlir_bridge = True, shard_count = 5, tags = [ "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index db54f2f6563..0045a7958b4 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/compiler/jit/defs.h" #include "tensorflow/compiler/jit/flags.h" #include "tensorflow/compiler/jit/shape_inference.h" +#include "tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h" #include "tensorflow/compiler/tf2xla/graph_compiler.h" #include "tensorflow/compiler/tf2xla/rearrange_function_argument.h" #include "tensorflow/compiler/tf2xla/shape_util.h" @@ -52,6 +53,7 @@ limitations under the License. #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/protobuf/error_codes.pb.h" +#include "tensorflow/core/protobuf/graph_debug_info.pb.h" #include "tensorflow/core/util/dump_graph.h" namespace tensorflow { @@ -726,8 +728,18 @@ Status XlaCompiler::CompileFunction( } VLOG(1) << "===================================================="; - TF_RETURN_IF_ERROR( - CompileGraph(options, function_id, std::move(graph), args, result)); + if (GetMlirCommonFlags()->tf_mlir_enable_mlir_bridge) { + VLOG(1) << "Using MLIR bridge"; + GraphDebugInfo debug_info; + TF_RETURN_IF_ERROR(CompileGraphToXlaHlo( + std::move(*graph), {args.data(), args.size()}, + options_.device_type.type_string(), options.use_tuple_arg, + *options_.flib_def, debug_info, options_.shape_representation_fn, + result)); + } else { + TF_RETURN_IF_ERROR( + CompileGraph(options, function_id, std::move(graph), args, result)); + } VLOG(1) << "===================================================="; cache_[{function_id, arg_vector}] = *result; From 88d5304b45d17e4aca2d542969ef52cb3dcf9f69 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Tue, 28 Jul 2020 17:08:30 -0700 Subject: [PATCH 1550/2522] [TF2XLA] [MLIR] Run TF standard transformation passes in CompileGraphToXlaHlo PiperOrigin-RevId: 323684680 Change-Id: I8f03b42a9e39f4ccc280b705dc268e0fb8aee86a --- .../compiler/mlir/tensorflow/utils/compile_mlir_util.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc index 16bc851d3a6..9d6cc8809ff 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc @@ -461,6 +461,14 @@ Status CompileGraphToXlaHlo( for (unsigned idx : remaining_params) arg_shapes.push_back(absl::get(args[idx].shape)); + mlir::PassManager pm(&context); + mlir::TF::StandardPipelineOptions tf_options; + mlir::TF::CreateTFStandardPipeline(pm, tf_options); + { + mlir::StatusScopedDiagnosticHandler diag_handler(module.getContext()); + if (failed(pm.run(module))) return diag_handler.ConsumeStatus(); + } + auto status = CompileMlirToXlaHlo( module, arg_shapes, device_type, use_tuple_args, shape_representation_fn, compilation_result, std::move(custom_legalization_passes)); From 54746b9a2aecb202b3a22ef3d3e7df0d21253429 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 20:20:20 -0400 Subject: [PATCH 1551/2522] Update prepare_tf.cc --- .../mlir/lite/transforms/prepare_tf.cc | 26 ------------------- 1 file changed, 26 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc index 0a7802c3229..0e1be1ecfe1 100644 --- a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc @@ -41,9 +41,7 @@ limitations under the License. #include "mlir/Analysis/LoopAnalysis.h" // from @llvm-project #include "mlir/Dialect/Quant/FakeQuantSupport.h" // from @llvm-project #include "mlir/Dialect/Quant/UniformSupport.h" // from @llvm-project -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project #include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/Function.h" // from @llvm-project #include "mlir/IR/MLIRContext.h" // from @llvm-project #include "mlir/IR/PatternMatch.h" // from @llvm-project #include "mlir/IR/StandardTypes.h" // from @llvm-project @@ -51,7 +49,6 @@ limitations under the License. #include "mlir/Support/LLVM.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project #include "mlir/Transforms/DialectConversion.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h" #include "tensorflow/compiler/mlir/lite/quantization/quantization_utils.h" #include "tensorflow/compiler/mlir/lite/transforms/dilated_conv.h" @@ -61,7 +58,6 @@ limitations under the License. #include "tensorflow/compiler/mlir/lite/utils/validators.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/einsum.h" -#include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/unroll_batch_matmul.h" #include "tensorflow/compiler/mlir/xla/transforms/passes.h" @@ -743,23 +739,6 @@ LogicalResult ValidateOp(Operation *op) { return failure(has_illegal_ops); } -// Converts a set of TF2XLA ops into pure TF ops for future legalizations as -// TF2XLA ops aren't supported by later stages. -LogicalResult ConvertTf2XlaOps(FuncOp func, MLIRContext *context) { - ConversionTarget target(*context); - target.addLegalDialect(); - target.addLegalDialect(); - target.addLegalOp(); - target.addLegalOp(); - target.addIllegalOp(); - - OwningRewritePatternList patterns; - mhlo::PopulateLegalizeTfWithTf2XlaPatterns("XLA_CPU_JIT", patterns); - TF::PopulateLegalizeHloToTfPatterns(&patterns, context); - - return applyPartialConversion(func, target, patterns); -} - void PrepareTFPass::runOnFunction() { OwningRewritePatternList patterns; auto func = getFunction(); @@ -775,11 +754,6 @@ void PrepareTFPass::runOnFunction() { return; } - if (failed(ConvertTf2XlaOps(func, ctx))) { - signalPassFailure(); - return; - } - // This pattern was intented to uses TFL QDQs to preserve the quantization // parameters from the TF Quant ops, thus this pattern should run with the // first `applyPatternsGreedily` method, which would otherwise removes the From 135b642ade756b8dc83624ff2b15f073b9ac5394 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 20:21:31 -0400 Subject: [PATCH 1552/2522] Update prepare_tf.cc --- tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc index 0e1be1ecfe1..753e4c4fe7f 100644 --- a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc @@ -59,7 +59,6 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/einsum.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/unroll_batch_matmul.h" -#include "tensorflow/compiler/mlir/xla/transforms/passes.h" #define DEBUG_TYPE "tf-tfl-legalization" From 4845b298f861a33079c20df9b08f20f437ebf6e2 Mon Sep 17 00:00:00 2001 From: Anna R Date: Tue, 28 Jul 2020 17:15:51 -0700 Subject: [PATCH 1553/2522] Removing networking C API from experimental directory. It is not used and probably lacks functionality to make it more useful. PiperOrigin-RevId: 323685902 Change-Id: I088623a51c92df4f3482a1a5518ce08742342bb9 --- tensorflow/c/experimental/BUILD | 124 --------- tensorflow/c/experimental/network.cc | 166 ------------ tensorflow/c/experimental/network.h | 97 ------- tensorflow/c/experimental/network_internal.h | 77 ------ tensorflow/c/experimental/network_test.cc | 256 ------------------ tensorflow/c/experimental/rendezvous.cc | 127 --------- tensorflow/c/experimental/rendezvous.h | 67 ----- .../c/experimental/rendezvous_internal.h | 135 --------- tensorflow/tools/pip_package/BUILD | 1 - 9 files changed, 1050 deletions(-) delete mode 100644 tensorflow/c/experimental/BUILD delete mode 100644 tensorflow/c/experimental/network.cc delete mode 100644 tensorflow/c/experimental/network.h delete mode 100644 tensorflow/c/experimental/network_internal.h delete mode 100644 tensorflow/c/experimental/network_test.cc delete mode 100644 tensorflow/c/experimental/rendezvous.cc delete mode 100644 tensorflow/c/experimental/rendezvous.h delete mode 100644 tensorflow/c/experimental/rendezvous_internal.h diff --git a/tensorflow/c/experimental/BUILD b/tensorflow/c/experimental/BUILD deleted file mode 100644 index 53cd99f18a6..00000000000 --- a/tensorflow/c/experimental/BUILD +++ /dev/null @@ -1,124 +0,0 @@ -# Description: -# Experimental C APIs for TensorFlow. - -load( - "//tensorflow:tensorflow.bzl", - "tf_copts", - "tf_cuda_library", -) -load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test") - -package( - licenses = ["notice"], # Apache 2.0 -) - -tf_cuda_library( - name = "rendezvous_internal", - srcs = [ - "rendezvous.cc", - ], - hdrs = [ - "rendezvous.h", - "rendezvous_internal.h", - ], - copts = tf_copts(), - visibility = ["//tensorflow/c:__subpackages__"], - deps = [ - "//tensorflow/c:c_api_internal", - "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core/distributed_runtime:base_rendezvous_mgr", - "//tensorflow/core/distributed_runtime:worker_env", - "//tensorflow/core/distributed_runtime/rpc:grpc_server_lib", - ], -) - -tf_cuda_library( - name = "rendezvous", - hdrs = [ - "rendezvous.h", - ], - copts = tf_copts(), - visibility = ["//visibility:public"], - deps = [ - ":rendezvous_internal", - "//tensorflow/c:c_api", - ], -) - -tf_cuda_library( - name = "network_internal", - srcs = [ - "network.cc", - ], - hdrs = [ - "network.h", - "network_internal.h", - ], - copts = tf_copts(), - visibility = ["//tensorflow/c:__subpackages__"], - deps = [ - ":rendezvous_internal", - "//tensorflow/c:c_api_internal", - "//tensorflow/core:framework_internal", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core/distributed_runtime:server_lib", - "//tensorflow/core/distributed_runtime:worker_env", - "//tensorflow/core/distributed_runtime/rpc:grpc_server_lib", - ], -) - -tf_cuda_library( - name = "network", - hdrs = [ - "network.h", - ], - copts = tf_copts(), - visibility = ["//visibility:public"], - deps = [ - ":network_internal", - ":rendezvous", - "//tensorflow/c:c_api", - ], -) - -# ----------------------------------------------------------------------------- -# Tests - -tf_cuda_cc_test( - name = "network_test", - size = "medium", - srcs = ["network_test.cc"], - tags = ["noasan"], - # We must ensure that the dependencies can be dynamically linked since - # the shared library must be able to use core:framework. - # linkstatic = tf_kernel_tests_linkstatic(), - deps = [ - ":network", - ":network_internal", - ":rendezvous", - ":rendezvous_internal", - "//tensorflow/c:c_api", - "//tensorflow/c:env", - "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core/distributed_runtime:rendezvous_mgr_interface", - "//tensorflow/core/distributed_runtime:server_lib", - "//tensorflow/core/distributed_runtime:session_mgr", - "//tensorflow/core/distributed_runtime:worker_env", - "//tensorflow/core/distributed_runtime:worker_session", - "//tensorflow/core/distributed_runtime/rpc:async_service_interface", - "//tensorflow/core/distributed_runtime/rpc:grpc_server_lib", - "@com_google_absl//absl/synchronization", - "@com_google_absl//absl/time", - ], -) diff --git a/tensorflow/c/experimental/network.cc b/tensorflow/c/experimental/network.cc deleted file mode 100644 index 97e63ec6259..00000000000 --- a/tensorflow/c/experimental/network.cc +++ /dev/null @@ -1,166 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/c/experimental/network.h" - -#include -#include - -#include "tensorflow/c/c_api.h" -#include "tensorflow/c/c_api_internal.h" -#include "tensorflow/c/experimental/network_internal.h" -#include "tensorflow/c/experimental/rendezvous_internal.h" -#include "tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h" -#include "tensorflow/core/distributed_runtime/server_lib.h" -#include "tensorflow/core/distributed_runtime/worker_env.h" -#include "tensorflow/core/platform/errors.h" -#include "tensorflow/core/platform/status.h" - -using tensorflow::ServerFactory; - -namespace tensorflow { - -/* static */ Status CGrpcServer::Create( - const ServerDef& server_def, - void* (*init_function)(const TF_GrpcServer*, TF_Status*), - void (*start_function)(const TF_GrpcServer*, void*, TF_Status*), - void (*stop_function)(const TF_GrpcServer*, void*, TF_Status*), - void (*join_function)(const TF_GrpcServer*, void*, TF_Status*), - void (*delete_function)(void*), - TF_RemoteRendezvousBuilder* rendezvous_builder, - std::unique_ptr* out_server) { - auto* grpc_server = new CGrpcServer(server_def, start_function, stop_function, - join_function, delete_function); - - GrpcServerOptions options; - options.rendezvous_mgr_func = [rendezvous_builder](const WorkerEnv* env) { - return new CRendezvousMgr(env, rendezvous_builder); - }; - TF_RETURN_IF_ERROR(grpc_server->Init(options)); - TF_Status* tf_status = TF_NewStatus(); - grpc_server->SetContext(init_function( - reinterpret_cast(grpc_server), tf_status)); - TF_RETURN_IF_ERROR(tf_status->status); - TF_DeleteStatus(tf_status); - - out_server->reset(grpc_server); - return Status::OK(); -} - -Status CGrpcServer::Start() { - Status status = GrpcServer::Start(); - TF_Status* tf_status = TF_NewStatus(); - (*start_function_)(reinterpret_cast(this), context_, - tf_status); - status.Update(tf_status->status); - TF_DeleteStatus(tf_status); - return status; -} - -Status CGrpcServer::Stop() { - Status status = GrpcServer::Stop(); - TF_Status* tf_status = TF_NewStatus(); - (*stop_function_)(reinterpret_cast(this), context_, - tf_status); - status.Update(tf_status->status); - TF_DeleteStatus(tf_status); - return status; -} - -Status CGrpcServer::Join() { - Status status = GrpcServer::Join(); - TF_Status* tf_status = TF_NewStatus(); - (*join_function_)(reinterpret_cast(this), context_, - tf_status); - status.Update(tf_status->status); - TF_DeleteStatus(tf_status); - return status; -} - -namespace { -// Factory that creates CGrpcServer instances. -class CServerFactory : public ServerFactory { - public: - CServerFactory(bool (*accept_function)(const char*), - void* (*init_function)(const TF_GrpcServer*, TF_Status*), - void (*start_function)(const TF_GrpcServer*, void*, - TF_Status*), - void (*stop_function)(const TF_GrpcServer*, void*, TF_Status*), - void (*join_function)(const TF_GrpcServer*, void*, TF_Status*), - void (*delete_function)(void*), - TF_RemoteRendezvousBuilder* rendezvous_builder) - : accept_function_(accept_function), - init_function_(init_function), - start_function_(start_function), - stop_function_(stop_function), - join_function_(join_function), - delete_function_(delete_function), - rendezvous_builder_(rendezvous_builder) {} - - Status NewServer(const ServerDef& server_def, const Options& options, - std::unique_ptr* out_server) override { - TF_RETURN_IF_ERROR(CGrpcServer::Create( - server_def, init_function_, start_function_, stop_function_, - join_function_, delete_function_, rendezvous_builder_, out_server)); - return Status::OK(); - } - - // Returns true if and only if this factory can create a server - // based on the given `server_def`. - bool AcceptsOptions(const ServerDef& server_def) override { - return (*accept_function_)(server_def.protocol().c_str()); - } - - private: - bool (*accept_function_)(const char* protocol); - void* (*init_function_)(const TF_GrpcServer*, TF_Status*); - void (*start_function_)(const TF_GrpcServer*, void*, TF_Status*); - void (*stop_function_)(const TF_GrpcServer*, void*, TF_Status*); - void (*join_function_)(const TF_GrpcServer*, void*, TF_Status*); - void (*delete_function_)(void*); - TF_RemoteRendezvousBuilder* rendezvous_builder_; -}; -} // namespace -} // namespace tensorflow - -// Server factory representation to use in C API. -// Holds CServerFactory pointer. -struct TF_GrpcServerFactory { - ::tensorflow::CServerFactory* factory; -}; - -TF_GrpcServerFactory* TF_NewGrpcServerFactory( - bool (*accept_function)(const char*), - void* (*init_function)(const TF_GrpcServer*, TF_Status*), - void (*start_function)(const TF_GrpcServer*, void*, TF_Status*), - void (*stop_function)(const TF_GrpcServer*, void*, TF_Status*), - void (*join_function)(const TF_GrpcServer*, void*, TF_Status*), - void (*delete_function)(void*), - TF_RemoteRendezvousBuilder* rendezvous_builder) { - TF_GrpcServerFactory* server_factory = new TF_GrpcServerFactory; - server_factory->factory = new ::tensorflow::CServerFactory( - accept_function, init_function, start_function, stop_function, - join_function, delete_function, rendezvous_builder); - return server_factory; -} - -void TF_DeleteGrpcServerFactory(TF_GrpcServerFactory* server_factory) { - DCHECK_NE(server_factory, nullptr); - delete server_factory; -} - -void TF_RegisterGrpcServerFactory(const char* server_type, - TF_GrpcServerFactory* server_factory) { - ServerFactory::Register(server_type, server_factory->factory); -} diff --git a/tensorflow/c/experimental/network.h b/tensorflow/c/experimental/network.h deleted file mode 100644 index bd74ec8ffec..00000000000 --- a/tensorflow/c/experimental/network.h +++ /dev/null @@ -1,97 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_C_EXPERIMENTAL_NETWORK_H_ -#define TENSORFLOW_C_EXPERIMENTAL_NETWORK_H_ - -#include "tensorflow/c/c_api.h" -#include "tensorflow/c/experimental/rendezvous.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// -------------------------------------------------------------------------- -// C API for TensorFlow Networking. -// NOTE: This API is unstable and almost certainly will change in the near -// future. -// -// Users wishing to register a custom GrpcServer should call -// TF_NewServerFactory and then TF_RegisterGrpcServerFactory. -// -// Example: -// ```c++ -// auto* rendezvous_builder = TF_NewRemoteRendezvousBuilder( -// rendezvous_init_function, -// receive_from_remote_async_function, -// rendezvous_delete_function); -// -// TF_GrpcServerFactory* factory = TF_NewGrpcServerFactory( -// accept_function, -// init_function, -// start_function, -// stop_function, -// join_function, -// delete_function, -// rendezvous_builder); -// TF_RegisterGrpcServerFactory("customfactory", factory); -// ... -// TF_DeleteGrpcServerFactory(factory); -// ``` - -typedef struct TF_GrpcServerFactory TF_GrpcServerFactory; -typedef struct TF_GrpcServerOptions TF_GrpcServerOptions; -typedef struct TF_GrpcServer TF_GrpcServer; -typedef struct TF_ServerContext { - TF_GrpcServer* const server; - void* context; -} TF_ServerContext; - -// Creates a new TF_GrpcServerFactory instance. Caller takes ownership -// of TF_GrpcServerFactory instance and should deallocate it by calling -// TF_GrpcDeleteServerFactory. -// accept_function should return true if this ServerFactory can create -// server instances for the given protocol name (for e.g. grpc+verbs). -// GRPC servers created by this factory will call provided -// init_function, start_function, stop_function, join_function and -// delete_function. -// -// Note that clean shutdown is currently not implemented for GrpcServer. -// So, stop_function will never be called now but may be in the future -// when stop mechanism is supported. -TF_CAPI_EXPORT extern TF_GrpcServerFactory* TF_NewGrpcServerFactory( - bool (*accept_function)(const char*), - void* (*init_function)(const TF_GrpcServer*, TF_Status*), - void (*start_function)(const TF_GrpcServer*, void*, TF_Status*), - void (*stop_function)(const TF_GrpcServer*, void*, TF_Status*), - void (*join_function)(const TF_GrpcServer*, void*, TF_Status*), - void (*delete_function)(void*), - TF_RemoteRendezvousBuilder* rendezvous_builder); - -// Deletes TF_GrpcServerFactory instances. -// Note that this function only deletes TF_GrpcServerFactory wrapper. -// Actual underlying server factory would not be deleted and will -// remain registered. -TF_CAPI_EXPORT extern void TF_DeleteGrpcServerFactory( - TF_GrpcServerFactory* server_factory); - -// Registers provided server_factory for the given server_type. -// server_type must be unique to the server factory. -TF_CAPI_EXPORT extern void TF_RegisterGrpcServerFactory( - const char* server_type, TF_GrpcServerFactory* server_factory); - -#ifdef __cplusplus -} /* end extern "C" */ -#endif -#endif // TENSORFLOW_C_EXPERIMENTAL_NETWORK_H_ diff --git a/tensorflow/c/experimental/network_internal.h b/tensorflow/c/experimental/network_internal.h deleted file mode 100644 index 389de440b70..00000000000 --- a/tensorflow/c/experimental/network_internal.h +++ /dev/null @@ -1,77 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_C_EXPERIMENTAL_NETWORK_INTERNAL_H_ -#define TENSORFLOW_C_EXPERIMENTAL_NETWORK_INTERNAL_H_ - -#include - -#include "tensorflow/c/c_api.h" -#include "tensorflow/c/experimental/network.h" -#include "tensorflow/c/experimental/rendezvous.h" -#include "tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h" -#include "tensorflow/core/distributed_runtime/server_lib.h" -#include "tensorflow/core/platform/env.h" -#include "tensorflow/core/platform/status.h" -#include "tensorflow/core/protobuf/tensorflow_server.pb.h" - -namespace tensorflow { - -// GrpcServer implementation that forwards calls to callbacks. -class CGrpcServer : public GrpcServer { - protected: - CGrpcServer(const ServerDef& server_def, - void (*start_function)(const TF_GrpcServer*, void*, TF_Status*), - void (*stop_function)(const TF_GrpcServer*, void*, TF_Status*), - void (*join_function)(const TF_GrpcServer*, void*, TF_Status*), - void (*delete_function)(void*)) - : GrpcServer(server_def, ::tensorflow::Env::Default()), - start_function_(start_function), - stop_function_(stop_function), - join_function_(join_function), - delete_function_(delete_function), - context_(nullptr) {} - - public: - static Status Create( - const ServerDef& server_def, - void* (*init_function)(const TF_GrpcServer*, TF_Status*), - void (*start_function)(const TF_GrpcServer*, void*, TF_Status*), - void (*stop_function)(const TF_GrpcServer*, void*, TF_Status*), - void (*join_function)(const TF_GrpcServer*, void*, TF_Status*), - void (*delete_function)(void*), - TF_RemoteRendezvousBuilder* rendezvous_builder, - std::unique_ptr* out_server); - - Status Start() override; - Status Stop() override; - Status Join() override; - - ~CGrpcServer() override { delete_function_(context_); } - - protected: - void SetContext(void* context) { context_ = context; } - - private: - void (*start_function_)(const TF_GrpcServer*, void*, TF_Status*); - void (*stop_function_)(const TF_GrpcServer*, void*, TF_Status*); - void (*join_function_)(const TF_GrpcServer*, void*, TF_Status*); - void (*delete_function_)(void*); - void* context_; - - friend class NetworksTest; -}; - -} // namespace tensorflow -#endif // TENSORFLOW_C_EXPERIMENTAL_NETWORK_INTERNAL_H_ diff --git a/tensorflow/c/experimental/network_test.cc b/tensorflow/c/experimental/network_test.cc deleted file mode 100644 index b7a50008c37..00000000000 --- a/tensorflow/c/experimental/network_test.cc +++ /dev/null @@ -1,256 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/c/experimental/network.h" - -#include -#include -#include - -#include -#include - -#include "absl/synchronization/notification.h" -#include "absl/time/time.h" -#include "tensorflow/c/c_api.h" -#include "tensorflow/c/experimental/network_internal.h" -#include "tensorflow/c/experimental/rendezvous.h" -#include "tensorflow/c/experimental/rendezvous_internal.h" -#include "tensorflow/core/distributed_runtime/rendezvous_mgr_interface.h" -#include "tensorflow/core/distributed_runtime/rpc/async_service_interface.h" -#include "tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h" -#include "tensorflow/core/distributed_runtime/server_lib.h" -#include "tensorflow/core/distributed_runtime/session_mgr.h" -#include "tensorflow/core/distributed_runtime/worker_env.h" -#include "tensorflow/core/distributed_runtime/worker_session.h" -#include "tensorflow/core/framework/allocator.h" -#include "tensorflow/core/framework/rendezvous.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/platform/status.h" -#include "tensorflow/core/platform/strcat.h" -#include "tensorflow/core/platform/test.h" -#include "tensorflow/core/protobuf/cluster.pb.h" -#include "tensorflow/core/protobuf/tensorflow_server.pb.h" - -namespace tensorflow { - -bool accept_functionA(const char* protocol_name) { - return strcmp(protocol_name, "grpc+A") == 0; -} - -bool accept_functionB(const char* protocol_name) { - return strcmp(protocol_name, "grpc+B") == 0; -} - -struct SomeServerData { - bool server_started = false; -}; - -struct SomeRendezvousData { - int test = 0; -}; - -void* init_function(const TF_GrpcServer* server, TF_Status* status) { - SomeServerData* server_data = new SomeServerData(); - TF_SetStatus(status, TF_OK, ""); - return server_data; -} - -void start_function(const TF_GrpcServer* server, void* context, - TF_Status* status) { - auto* server_data = static_cast(context); - server_data->server_started = true; - TF_SetStatus(status, TF_OK, ""); -} - -void stop_function(const TF_GrpcServer* server, void* context, - TF_Status* status) { - TF_SetStatus(status, TF_OK, ""); -} - -void join_function(const TF_GrpcServer* server, void* context, - TF_Status* status) { - TF_SetStatus(status, TF_OK, ""); -} - -void delete_function(void* context) { - auto* server_data = static_cast(context); - delete server_data; -} - -void* rendezvous_init_function(void* server_context) { - return new SomeRendezvousData(); -} - -void Deallocator(void* data, size_t, void* arg) { - tensorflow::cpu_allocator()->DeallocateRaw(data); - *reinterpret_cast(arg) = true; -} - -void receive_from_remote_async_function(TF_ParsedKey* key, - TF_RendezvousArgs* args, - TF_RendezvousDoneCallback* callback, - void* context) { - // Create dummy tensor - const int num_bytes = 6 * sizeof(float); - float* values = - reinterpret_cast(tensorflow::cpu_allocator()->AllocateRaw( - EIGEN_MAX_ALIGN_BYTES, num_bytes)); - int64_t dims[] = {2, 3}; - bool deallocator_called = false; - auto* tensor = TF_NewTensor(TF_FLOAT, dims, 2, values, num_bytes, - &Deallocator, &deallocator_called); - callback->tensor = tensor; - auto* tf_status = TF_NewStatus(); - TF_SetStatus(tf_status, TF_OK, ""); - callback->status = tf_status; - TF_RendezvousDone(callback); - TF_DeleteStatus(tf_status); - TF_DeleteTensor(tensor); -} - -void rendezvous_delete_function(void* context) { - auto* rendezvous_data = static_cast(context); - delete rendezvous_data; -} - -tensorflow::ServerDef GetServerDef(const string& protocol, - const string& job_name, int num_tasks) { - tensorflow::ServerDef server_def; - server_def.set_protocol(protocol); - server_def.set_job_name(job_name); - server_def.set_task_index(0); - tensorflow::ClusterDef* cluster_def = server_def.mutable_cluster(); - tensorflow::JobDef* job_def = cluster_def->add_job(); - job_def->set_name(job_name); - for (int i = 0; i < num_tasks; i++) { - int port = tensorflow::testing::PickUnusedPortOrDie(); - job_def->mutable_tasks()->insert( - {i, tensorflow::strings::StrCat("localhost:", port)}); - } - return server_def; -} - -class NetworksTest : public ::testing::Test { - public: - ~NetworksTest() override {} - - SomeServerData* GetServerData(CGrpcServer* server) { - EXPECT_NE(server->context_, nullptr); - return static_cast(server->context_); - } -}; - -Rendezvous::ParsedKey Key(const string& sender, const uint64 incarnation, - const string& receiver, const string& name) { - Rendezvous::ParsedKey result; - CHECK( - Rendezvous::ParseKey(Rendezvous::CreateKey(sender, incarnation, receiver, - name, FrameAndIter(0, 0)), - &result) - .ok()); - return result; -} - -void InitializeRendezvous(GrpcServer* grpc_server, ServerDef* server_def, - RemoteRendezvous* remote_rendezvous) { - int rendezvous_id = 0; - auto session_name = tensorflow::strings::StrCat("test_", rendezvous_id); - TF_EXPECT_OK(grpc_server->worker_env()->session_mgr->CreateSession( - session_name, *server_def, true)); - - std::shared_ptr worker_session; - TF_EXPECT_OK(grpc_server->worker_env()->session_mgr->WorkerSessionForSession( - session_name, &worker_session)); - - TF_EXPECT_OK(remote_rendezvous->Initialize(worker_session.get())); -} - -TEST_F(NetworksTest, TestStartServer) { - auto* rendezvous_builder = TF_NewRemoteRendezvousBuilder( - rendezvous_init_function, receive_from_remote_async_function, - rendezvous_delete_function); - - TF_Status* tf_status = TF_NewStatus(); - TF_GrpcServerFactory* factory = TF_NewGrpcServerFactory( - accept_functionA, init_function, start_function, stop_function, - join_function, delete_function, rendezvous_builder); - TF_RegisterGrpcServerFactory("testfactoryA", factory); - - ServerDef server_def = GetServerDef("grpc+A", "localhost", 1); - std::unique_ptr server; - TF_EXPECT_OK(NewServer(server_def, &server)); - auto* grpc_server = static_cast(server.get()); - auto* server_data = GetServerData(grpc_server); - ASSERT_FALSE(server_data->server_started); - - TF_EXPECT_OK(server->Start()); - ASSERT_TRUE(server_data->server_started); - - TF_DeleteStatus(tf_status); - TF_DeleteGrpcServerFactory(factory); - TF_DeleteRemoteRendezvousBuilder(rendezvous_builder); - // TODO(annarev): find a clean way to shutdown server. - server.release(); -} - -TEST_F(NetworksTest, TestReceiveData) { - auto* rendezvous_builder = TF_NewRemoteRendezvousBuilder( - rendezvous_init_function, receive_from_remote_async_function, - rendezvous_delete_function); - - TF_Status* tf_status = TF_NewStatus(); - TF_GrpcServerFactory* factory = TF_NewGrpcServerFactory( - accept_functionB, init_function, start_function, stop_function, - join_function, delete_function, rendezvous_builder); - TF_RegisterGrpcServerFactory("testfactoryB", factory); - - ServerDef server_def = GetServerDef("grpc+B", "localhost", 1); - std::unique_ptr server; - TF_EXPECT_OK(NewServer(server_def, &server)); - auto* grpc_server = static_cast(server.get()); - - TF_EXPECT_OK(server->Start()); - auto* rendezvous_mgr = grpc_server->worker_env()->rendezvous_mgr; - auto* remote_rendezvous = rendezvous_mgr->Find(0); - - auto key = Key("/job:localhost/replica:1/task:2/device:CPU:0", 1, - "/job:localhost/replica:0/task:0/device:CPU:0", "test"); - Rendezvous::Args args; - bool done_callback_called = false; - auto* done_callback_called_ptr = &done_callback_called; - absl::Notification notification; - auto* notification_ptr = ¬ification; - - InitializeRendezvous(grpc_server, &server_def, remote_rendezvous); - remote_rendezvous->RecvAsync( - key, args, - [done_callback_called_ptr, notification_ptr]( - const Status&, const Rendezvous::Args&, const Rendezvous::Args&, - const Tensor&, const bool) mutable { - *done_callback_called_ptr = true; - notification_ptr->Notify(); - }); - notification.WaitForNotificationWithTimeout(absl::Seconds(10)); - ASSERT_EQ(done_callback_called, true); - - TF_DeleteStatus(tf_status); - TF_DeleteGrpcServerFactory(factory); - TF_DeleteRemoteRendezvousBuilder(rendezvous_builder); - // Server doesn't have a clean shutdown. - server.release(); -} - -} // namespace tensorflow diff --git a/tensorflow/c/experimental/rendezvous.cc b/tensorflow/c/experimental/rendezvous.cc deleted file mode 100644 index c996cfb44f3..00000000000 --- a/tensorflow/c/experimental/rendezvous.cc +++ /dev/null @@ -1,127 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/c/experimental/rendezvous.h" - -#include - -#include "tensorflow/c/c_api.h" -#include "tensorflow/c/c_api_internal.h" -#include "tensorflow/c/experimental/rendezvous_internal.h" -#include "tensorflow/core/distributed_runtime/base_rendezvous_mgr.h" -#include "tensorflow/core/distributed_runtime/worker_env.h" -#include "tensorflow/core/framework/allocator.h" -#include "tensorflow/core/framework/rendezvous.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/platform/errors.h" -#include "tensorflow/core/platform/status.h" -#include "tensorflow/core/platform/stringpiece.h" - -namespace tensorflow { - -CRemoteRendezvous::CRemoteRendezvous(const WorkerEnv* env, int64 step_id, - void (*receive_from_remote_async_function)( - TF_ParsedKey*, TF_RendezvousArgs*, - TF_RendezvousDoneCallback*, - void* context), - void (*delete_function)(void* context), - void* server_context) - : BaseRemoteRendezvous(env, step_id), - receive_from_remote_async_function_(receive_from_remote_async_function), - delete_function_(delete_function), - context_(nullptr) {} - -void CRemoteRendezvous::RecvFromRemoteAsync(const Rendezvous::ParsedKey& parsed, - const Rendezvous::Args& args, - DoneCallback done) { - if (args.cancellation_manager != nullptr) { - VLOG(1) << "WARNING: CRemoteRendezvous does not support cancellation."; - } - TF_ParsedKey key; - key.src_device = parsed.src_device.data(); - key.src_device_len = parsed.src_device.size(); - key.dst_device = parsed.dst_device.data(); - key.dst_device_len = parsed.dst_device.size(); - key.full_key = parsed.FullKey().data(); - key.full_key_len = parsed.FullKey().size(); - - TF_DeviceContext* device_context = new TF_DeviceContext(); - device_context->context = args.device_context; - - TF_AllocatorAttributes* alloc_attrs = new TF_AllocatorAttributes(); - alloc_attrs->value = args.alloc_attrs.value; - alloc_attrs->scope_id = args.alloc_attrs.scope_id; - alloc_attrs->on_host = args.alloc_attrs.on_host(); - alloc_attrs->nic_compatible = args.alloc_attrs.nic_compatible(); - - TF_RendezvousArgs* cargs = new TF_RendezvousArgs(); - cargs->device_context = device_context; - cargs->alloc_attrs = alloc_attrs; - - TF_RendezvousDoneCallback* done_callback = new TF_RendezvousDoneCallback(); - done_callback->done_callback = done; - done_callback->recv_args = cargs; - - receive_from_remote_async_function_(&key, cargs, done_callback, context_); -} - -CRemoteRendezvous::~CRemoteRendezvous() { delete_function_(context_); } -} // namespace tensorflow - -TF_RemoteRendezvousBuilder* TF_NewRemoteRendezvousBuilder( - void* (*init_function)(void* server_context), - void (*receive_from_remote_async_function)(TF_ParsedKey*, - TF_RendezvousArgs*, - TF_RendezvousDoneCallback*, - void* context), - void (*delete_function)(void* context)) { - TF_RemoteRendezvousBuilder* builder = new TF_RemoteRendezvousBuilder(); - builder->init_function = init_function; - builder->delete_function = delete_function; - builder->receive_from_remote_async_function = - receive_from_remote_async_function; - return builder; -} - -void TF_DeleteRemoteRendezvousBuilder( - TF_RemoteRendezvousBuilder* rendezvous_builder) { - DCHECK_NE(rendezvous_builder, nullptr); - delete rendezvous_builder; -} - -TF_CAPI_EXPORT extern void TF_RendezvousDone( - TF_RendezvousDoneCallback* callback) { - DCHECK_NE(callback, nullptr); - ::tensorflow::Tensor tensor; - TF_CHECK_OK(TF_TensorToTensor(callback->tensor, &tensor)); - ::tensorflow::Rendezvous::Args recv_args; - recv_args.alloc_attrs.value = callback->recv_args->alloc_attrs->value; - recv_args.alloc_attrs.scope_id = callback->recv_args->alloc_attrs->scope_id; - recv_args.device_context = callback->recv_args->device_context->context; - ::tensorflow::Rendezvous::Args sent_args; - - callback->done_callback(callback->status->status, sent_args, recv_args, - tensor, callback->dead); - - if (callback->recv_args) { - DCHECK_NE(callback->recv_args, nullptr); - DCHECK_NE(callback->recv_args->alloc_attrs, nullptr); - DCHECK_NE(callback->recv_args->device_context, nullptr); - delete callback->recv_args->alloc_attrs; - delete callback->recv_args->device_context; - delete callback->recv_args; - } - delete callback; - callback = nullptr; -} diff --git a/tensorflow/c/experimental/rendezvous.h b/tensorflow/c/experimental/rendezvous.h deleted file mode 100644 index 5b007d52429..00000000000 --- a/tensorflow/c/experimental/rendezvous.h +++ /dev/null @@ -1,67 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_C_EXPERIMENTAL_RENDEZVOUS_H_ -#define TENSORFLOW_C_EXPERIMENTAL_RENDEZVOUS_H_ - -#include "tensorflow/c/c_api.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// -------------------------------------------------------------------------- -// C API for Rendezvous. -// NOTE: This API is unstable and almost certainly will change in the near -// future. -// -// Custom rendezvous allows for custom implementations of Recv call. -// -// Users wishing to create custom rendezvous objects should call -// TF_NewRemoteRendezvousBuilder and pass returned TF_RemoteRendezvousBuilder -// to to TF_NewServerFactory. - -typedef struct TF_RemoteRendezvousBuilder TF_RemoteRendezvousBuilder; -typedef struct TF_ParsedKey TF_ParsedKey; -typedef struct TF_RendezvousArgs TF_RendezvousArgs; -typedef struct TF_RendezvousDoneCallback TF_RendezvousDoneCallback; - -// Creates a new TF_RemoteRendezvousBuilder instance. -// Rendezvous instances will forward calls to init_function, -// receive_from_remote_async_function and delete_function passed here. -// -// Note that receive_from_remote_async_function implementation must call -// TF_Done with the TF_DoneCallback passed as an argument. -TF_CAPI_EXPORT extern TF_RemoteRendezvousBuilder* TF_NewRemoteRendezvousBuilder( - void* (*init_function)(void* server_context), - void (*receive_from_remote_async_function)(TF_ParsedKey*, - TF_RendezvousArgs*, - TF_RendezvousDoneCallback*, - void* context), - void (*delete_function)(void* context)); - -// Deletes TF_RemoteRendezvousBuilder instances. -TF_CAPI_EXPORT extern void TF_DeleteRemoteRendezvousBuilder( - TF_RemoteRendezvousBuilder* rendezvous_builder); - -// Calls TF_DoneCallback and destroys callback instance and -// TF_DoneCallback members except `tensor` and `status`. Caller is -// responsible for deleting `tensor` and `status` after TF_Done returns. -TF_CAPI_EXPORT extern void TF_RendezvousDone( - TF_RendezvousDoneCallback* callback); - -#ifdef __cplusplus -} /* end extern "C" */ -#endif -#endif // TENSORFLOW_C_EXPERIMENTAL_RENDEZVOUS_H_ diff --git a/tensorflow/c/experimental/rendezvous_internal.h b/tensorflow/c/experimental/rendezvous_internal.h deleted file mode 100644 index f06686023e6..00000000000 --- a/tensorflow/c/experimental/rendezvous_internal.h +++ /dev/null @@ -1,135 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_C_EXPERIMENTAL_RENDEZVOUS_INTERNAL_H_ -#define TENSORFLOW_C_EXPERIMENTAL_RENDEZVOUS_INTERNAL_H_ - -#include - -#include "tensorflow/c/c_api.h" -#include "tensorflow/c/experimental/rendezvous.h" -#include "tensorflow/core/distributed_runtime/base_rendezvous_mgr.h" -#include "tensorflow/core/distributed_runtime/worker_env.h" -#include "tensorflow/core/framework/device_base.h" -#include "tensorflow/core/framework/rendezvous.h" -#include "tensorflow/core/platform/macros.h" - -struct TF_ParsedKey { - // char* members might not be null-terminated. - const char* src_device; - size_t src_device_len; - const char* dst_device; - size_t dst_device_len; - const char* full_key; - size_t full_key_len; -}; - -struct TF_AllocatorAttributes { - bool on_host; - bool nic_compatible; - // NOTE: The upper 8 bits of the value are reserved for - // device-specific uses. Implementors of a device can interpret these - // upper 8 bits in device-specific ways, and ops implemented for those - // devices are responsible for setting those 8 bits appropriately. - tensorflow::uint32 value = 0; - // EXPERIMENTAL: If this is greater than zero, then allocation is delegated to - // a named special-purpose allocator on the same device. - tensorflow::int32 scope_id = 0; -}; - -struct TF_DeviceContext { - ::tensorflow::DeviceContext* context; -}; - -struct TF_RendezvousArgs { - const TF_DeviceContext* device_context; - const TF_AllocatorAttributes* alloc_attrs; -}; - -struct TF_RendezvousDoneCallback { - ::tensorflow::Rendezvous::DoneCallback done_callback; - - // TODO(annarev): figure out if we should also support sent_args. - const TF_RendezvousArgs* recv_args; - TF_Tensor* tensor = nullptr; - TF_Status* status; - bool dead; -}; - -struct TF_RemoteRendezvousBuilder { - void* (*init_function)(void* server_context); - void (*receive_from_remote_async_function)(TF_ParsedKey*, TF_RendezvousArgs*, - TF_RendezvousDoneCallback*, - void* context); - void (*delete_function)(void* context); - void* server_context; -}; - -namespace tensorflow { - -class CRemoteRendezvous : public BaseRemoteRendezvous { - public: - CRemoteRendezvous(const WorkerEnv* env, int64 step_id, - void (*receive_from_remote_async_function)( - TF_ParsedKey*, TF_RendezvousArgs*, - TF_RendezvousDoneCallback*, void* context), - void (*delete_function)(void* context), - void* server_context); - - void SetContext(void* context) { context_ = context; } - - protected: - void RecvFromRemoteAsync(const Rendezvous::ParsedKey& parsed, - const Rendezvous::Args& args, - DoneCallback done) override; - - private: - ~CRemoteRendezvous() override; - - void (*receive_from_remote_async_function_)(TF_ParsedKey*, TF_RendezvousArgs*, - TF_RendezvousDoneCallback*, - void* context); - void (*delete_function_)(void* context); - void* context_; - TF_DISALLOW_COPY_AND_ASSIGN(CRemoteRendezvous); -}; - -class CRendezvousMgr : public BaseRendezvousMgr { - public: - CRendezvousMgr(const WorkerEnv* env, - const TF_RemoteRendezvousBuilder* rendezvous_builder) - : BaseRendezvousMgr(env), rendezvous_builder_(rendezvous_builder) {} - - protected: - BaseRemoteRendezvous* Create(int64 step_id, - const WorkerEnv* worker_env) override { - auto* rendezvous = new CRemoteRendezvous( - worker_env, step_id, - rendezvous_builder_->receive_from_remote_async_function, - rendezvous_builder_->delete_function, - rendezvous_builder_->server_context); - - rendezvous->SetContext(rendezvous_builder_->init_function( - rendezvous_builder_->server_context)); - return rendezvous; - } - - private: - const TF_RemoteRendezvousBuilder* rendezvous_builder_; - TF_DISALLOW_COPY_AND_ASSIGN(CRendezvousMgr); -}; - -} // namespace tensorflow - -#endif // TENSORFLOW_C_EXPERIMENTAL_RENDEZVOUS_INTERNAL_H_ diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 38ff12b100e..bbd4f49d95b 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -26,7 +26,6 @@ package(default_visibility = ["//visibility:private"]) transitive_hdrs( name = "included_headers", deps = [ - "//tensorflow/c/experimental:network", "//tensorflow/compiler/tf2xla:xla_compiled_cpu_function", "//tensorflow/compiler/mlir:mlir_graph_optimization_pass", "//tensorflow/core:core_cpu", From 34fe8146037c9074218b529ba50c55e71be51cfa Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Jul 2020 17:31:01 -0700 Subject: [PATCH 1554/2522] Legalize matrix-diag-part to xla. PiperOrigin-RevId: 323688154 Change-Id: I011a57681cb27163e2942f4d3ce87c8de68c2d19 --- .../mlir/tensorflow/ir/tf_generated_ops.td | 120 ++++++++ .../compiler/mlir/xla/tests/legalize-tf.mlir | 134 +++++++++ .../mlir/xla/transforms/legalize_tf.cc | 266 +++++++++++++++++- 3 files changed, 518 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index 0fe8dd647a7..721513aa039 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -5068,6 +5068,126 @@ which has shape (2, 4, 4) TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } +def TF_MatrixDiagPartV3Op : TF_Op<"MatrixDiagPartV3", [NoSideEffect]> { + let summary = "Returns the batched diagonal part of a batched tensor."; + + let description = [{ +Returns a tensor with the `k[0]`-th to `k[1]`-th diagonals of the batched +`input`. + +Assume `input` has `r` dimensions `[I, J, ..., L, M, N]`. +Let `max_diag_len` be the maximum length among all diagonals to be extracted, +`max_diag_len = min(M + min(k[1], 0), N + min(-k[0], 0))` +Let `num_diags` be the number of diagonals to extract, +`num_diags = k[1] - k[0] + 1`. + +If `num_diags == 1`, the output tensor is of rank `r - 1` with shape +`[I, J, ..., L, max_diag_len]` and values: + +``` +diagonal[i, j, ..., l, n] + = input[i, j, ..., l, n+y, n+x] ; if 0 <= n+y < M and 0 <= n+x < N, + padding_value ; otherwise. +``` +where `y = max(-k[1], 0)`, `x = max(k[1], 0)`. + +Otherwise, the output tensor has rank `r` with dimensions +`[I, J, ..., L, num_diags, max_diag_len]` with values: + +``` +diagonal[i, j, ..., l, m, n] + = input[i, j, ..., l, n+y, n+x] ; if 0 <= n+y < M and 0 <= n+x < N, + padding_value ; otherwise. +``` +where `d = k[1] - m`, `y = max(-d, 0) - offset`, and `x = max(d, 0) - offset`. + +`offset` is zero except when the alignment of the diagonal is to the right. +``` +offset = max_diag_len - diag_len(d) ; if (`align` in {RIGHT_LEFT, RIGHT_RIGHT} + and `d >= 0`) or + (`align` in {LEFT_RIGHT, RIGHT_RIGHT} + and `d <= 0`) + 0 ; otherwise +``` +where `diag_len(d) = min(cols - max(d, 0), rows + min(d, 0))`. + +The input must be at least a matrix. + +For example: + +``` +input = np.array([[[1, 2, 3, 4], # Input shape: (2, 3, 4) + [5, 6, 7, 8], + [9, 8, 7, 6]], + [[5, 4, 3, 2], + [1, 2, 3, 4], + [5, 6, 7, 8]]]) + +# A main diagonal from each batch. +tf.matrix_diag_part(input) ==> [[1, 6, 7], # Output shape: (2, 3) + [5, 2, 7]] + +# A superdiagonal from each batch. +tf.matrix_diag_part(input, k = 1) + ==> [[2, 7, 6], # Output shape: (2, 3) + [4, 3, 8]] + +# A band from each batch. +tf.matrix_diag_part(input, k = (-1, 2)) + ==> [[[0, 3, 8], # Output shape: (2, 4, 3) + [2, 7, 6], + [1, 6, 7], + [5, 8, 0]], + [[0, 3, 4], + [4, 3, 8], + [5, 2, 7], + [1, 6, 0]]] + +# LEFT_RIGHT alignment. +tf.matrix_diag_part(input, k = (-1, 2), align="LEFT_RIGHT") + ==> [[[3, 8, 0], # Output shape: (2, 4, 3) + [2, 7, 6], + [1, 6, 7], + [0, 5, 8]], + [[3, 4, 0], + [4, 3, 8], + [5, 2, 7], + [0, 1, 6]]] + +# max_diag_len can be shorter than the main diagonal. +tf.matrix_diag_part(input, k = (-2, -1)) + ==> [[[5, 8], + [9, 0]], + [[1, 6], + [5, 0]]] + +# padding_value = 9 +tf.matrix_diag_part(input, k = (1, 3), padding_value = 9) + ==> [[[9, 9, 4], # Output shape: (2, 3, 3) + [9, 3, 8], + [2, 7, 6]], + [[9, 9, 2], + [9, 3, 4], + [4, 3, 8]]] + +``` + }]; + + let arguments = (ins + TF_Tensor:$input, + I32Tensor:$k, + TF_Tensor:$padding_value, + + DefaultValuedAttr, "RIGHT_LEFT">:$align + ); + + let results = (outs + TF_Tensor:$diagonal + ); + + TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; +} + def TF_MatrixDiagV2Op : TF_Op<"MatrixDiagV2", [NoSideEffect]> { let summary = [{ Returns a batched diagonal tensor with given batched diagonal values. diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir index b09ccf025b0..3ce6e63d53d 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir @@ -495,6 +495,140 @@ func @diag_part(%arg0: tensor<4x3x4x3xf32>) -> tensor<4x3xf32> { return %0: tensor<4x3xf32> } +//===----------------------------------------------------------------------===// +// MatrixDiagPart +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: func @matrix_diag_part +// CHECK-SAME: %[[ARG:.*]]: tensor<7x140x128xi32> +func @matrix_diag_part(%arg0: tensor<7x140x128xi32>) -> tensor<7x22x128xi32> { + // CHECK: %[[V0:.*]] = mhlo.constant dense<42> : tensor + // CHECK: %[[V1:.*]] = mhlo.constant dense<[-10, 11]> : tensor<2xi32> + // CHECK: %[[V2:.*]] = "mhlo.iota"() {iota_dimension = 1 : i64} : () -> tensor<1x22x128xi32> + // CHECK: %[[V3:.*]] = "mhlo.iota"() {iota_dimension = 2 : i64} : () -> tensor<1x22x128xi32> + // CHECK: %[[V4:.*]] = mhlo.constant dense<0> : tensor + // CHECK: %[[V5:.*]] = "mhlo.broadcast"(%[[V4]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi32> + // CHECK: %[[V6:.*]] = mhlo.constant dense : tensor + // CHECK: %[[V7:.*]] = "mhlo.broadcast"(%[[V6]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi1> + // CHECK: %[[V8:.*]] = mhlo.constant dense : tensor + // CHECK: %[[V9:.*]] = "mhlo.broadcast"(%[[V8]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi1> + // CHECK: %[[V10:.*]] = mhlo.constant dense<11> : tensor + // CHECK: %[[V11:.*]] = "mhlo.broadcast"(%[[V10]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi32> + // CHECK: %[[V12:.*]] = mhlo.constant dense<140> : tensor + // CHECK: %[[V13:.*]] = "mhlo.broadcast"(%[[V12]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi32> + // CHECK: %[[V14:.*]] = mhlo.constant dense<128> : tensor + // CHECK: %[[V15:.*]] = "mhlo.broadcast"(%[[V14]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi32> + // CHECK: %[[V16:.*]] = mhlo.constant dense<128> : tensor + // CHECK: %[[V17:.*]] = "mhlo.broadcast"(%[[V16]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi32> + // CHECK-DAG: %[[V18:.*]] = mhlo.subtract %[[V11]], %[[V2]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V19:.*]] = "mhlo.negate"(%[[V18]]) : (tensor<1x22x128xi32>) -> tensor<1x22x128xi32> + // CHECK-DAG: %[[V20:.*]] = mhlo.minimum %[[V18]], %[[V5]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V21:.*]] = mhlo.maximum %[[V18]], %[[V5]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V22:.*]] = mhlo.add %[[V13]], %[[V20]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V24:.*]] = mhlo.minimum %[[V22]], %{{[0-9]*}} : tensor<1x22x128xi32> + // CHECK-DAG: %[[V25:.*]] = chlo.broadcast_compare %[[V18]], %[[V5]] {comparison_direction = "GE"} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi1> + // CHECK-DAG: %[[V26:.*]] = mhlo.subtract %[[V17]], %[[V24]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V27:.*]] = "mhlo.select"(%[[V25]], %[[V26]], %[[V5]]) : (tensor<1x22x128xi1>, tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi32> + // CHECK-DAG: %[[V28:.*]] = mhlo.subtract %[[V21]], %[[V27]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V29:.*]] = mhlo.maximum %[[V19]], %[[V5]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V30:.*]] = mhlo.subtract %[[V29]], %[[V27]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V31:.*]] = mhlo.add %[[V3]], %[[V28]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V32:.*]] = mhlo.add %[[V3]], %[[V30]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V33:.*]] = chlo.broadcast_compare %[[V31]], %[[V5]] {comparison_direction = "GE"} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi1> + // CHECK-DAG: %[[V34:.*]] = chlo.broadcast_compare %[[V31]], %[[V15]] {comparison_direction = "LT"} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi1> + // CHECK-DAG: %[[V35:.*]] = mhlo.and %[[V33]], %[[V34]] : tensor<1x22x128xi1> + // CHECK-DAG: %[[V36:.*]] = chlo.broadcast_compare %[[V32]], %[[V5]] {comparison_direction = "GE"} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi1> + // CHECK-DAG: %[[V37:.*]] = chlo.broadcast_compare %[[V32]], %[[V13]] {comparison_direction = "LT"} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi1> + // CHECK-DAG: %[[V38:.*]] = mhlo.and %[[V36]], %[[V37]] : tensor<1x22x128xi1> + // CHECK-DAG: %[[V39:.*]] = mhlo.and %[[V35]], %[[V38]] : tensor<1x22x128xi1> + // CHECK-DAG: %[[V40:.*]] = "mhlo.reshape"(%[[V39]]) : (tensor<1x22x128xi1>) -> tensor<22x128xi1> + // CHECK-DAG: %[[V41:.*]] = "mhlo.concatenate"(%[[V32]], %[[V31]]) {dimension = 0 : i64} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<2x22x128xi32> + // CHECK-DAG: %[[V42:.*]] = "mhlo.gather"(%[[ARG]], %[[V41]]) {dimension_numbers = {collapsed_slice_dims = dense<[1, 2]> : tensor<2xi64>, index_vector_dim = 0 : i64, offset_dims = dense<0> : tensor<1xi64>, start_index_map = dense<[1, 2]> : tensor<2xi64>}, indices_are_sorted = false, slice_sizes = dense<[7, 1, 1]> : tensor<3xi64>} : (tensor<7x140x128xi32>, tensor<2x22x128xi32>) -> tensor<7x22x128xi32> + // CHECK-DAG: %[[V43:.*]] = "mhlo.broadcast"(%[[V40]]) {broadcast_sizes = dense<7> : tensor<1xi64>} : (tensor<22x128xi1>) -> tensor<7x22x128xi1> + // CHECK-DAG: %[[V44:.*]] = "mhlo.broadcast"(%[[V0]]) {broadcast_sizes = dense<[7, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<7x22x128xi32> + // CHECK-DAG: %[[V45:.*]] = "mhlo.select"(%[[V43]], %[[V42]], %[[V44]]) : (tensor<7x22x128xi1>, tensor<7x22x128xi32>, tensor<7x22x128xi32>) -> tensor<7x22x128xi32> + // CHECK: return %[[V45]] : tensor<7x22x128xi32> + %0 = mhlo.constant dense<42> : tensor // padding value + %1 = mhlo.constant dense<[-10, 11]> : tensor<2xi32> // k + %2 = "tf.MatrixDiagPartV3"(%arg0, %1, %0) { + T = i32, align = "RIGHT_LEFT" + } : (tensor<7x140x128xi32>, tensor<2xi32>, tensor) -> tensor<7x22x128xi32> + return %2: tensor<7x22x128xi32> +} + +// CHECK-LABEL: func @matrix_diag_part_single_diagonal +func @matrix_diag_part_single_diagonal(%arg0: tensor<7x140x128xi32>) -> tensor<7x128xi32> { + %0 = mhlo.constant dense<42> : tensor // padding value + %1 = mhlo.constant dense<0> : tensor<2xi32> // k + %2 = "tf.MatrixDiagPartV3"(%arg0, %1, %0) { + T = i32, align = "RIGHT_LEFT" + } : (tensor<7x140x128xi32>, tensor<2xi32>, tensor) -> tensor<7x128xi32> + // CHECK: %[[result:.*]] = "mhlo.reshape"({{.*}}) : (tensor<7x1x128xi32>) -> tensor<7x128xi32> + // CHECK: return %[[result]] : tensor<7x128xi32> + return %2: tensor<7x128xi32> +} + +// CHECK-LABEL: func @matrix_diag_part_align_ll +func @matrix_diag_part_align_ll(%arg0: tensor<7x140x128xi32>) -> tensor<7x22x128xi32> { + %0 = mhlo.constant dense<42> : tensor // padding value + %1 = mhlo.constant dense<[-10, 11]> : tensor<2xi32> // k + %2 = "tf.MatrixDiagPartV3"(%arg0, %1, %0) { + T = i32, align = "LEFT_LEFT" + } : (tensor<7x140x128xi32>, tensor<2xi32>, tensor) -> tensor<7x22x128xi32> + // CHECK: %[[false:.*]] = mhlo.constant dense : tensor + // CHECK: %[[b_false:.*]] = "mhlo.broadcast"(%[[false]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi1> + // CHECK: %{{[0-9]*}} = "mhlo.select"(%[[b_false]], %{{[0-9]*}}, %{{[0-9]*}}) : (tensor<1x22x128xi1>, tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi32> + return %2: tensor<7x22x128xi32> +} + +// CHECK-LABEL: func @matrix_diag_part_align_lr +func @matrix_diag_part_align_lr(%arg0: tensor<7x140x128xi32>) -> tensor<7x22x128xi32> { + %0 = mhlo.constant dense<42> : tensor // padding value + %1 = mhlo.constant dense<[-10, 11]> : tensor<2xi32> // k + %2 = "tf.MatrixDiagPartV3"(%arg0, %1, %0) { + T = i32, align = "LEFT_RIGHT" + } : (tensor<7x140x128xi32>, tensor<2xi32>, tensor) -> tensor<7x22x128xi32> + // CHECK: %[[le:.*]] = chlo.broadcast_compare %{{[0-9]*}}, %{{[0-9]*}} {comparison_direction = "LE"} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi1> + // CHECK: %{{[0-9]*}} = "mhlo.select"(%[[le]], %{{[0-9]*}}, %{{[0-9]*}}) : (tensor<1x22x128xi1>, tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi32> + return %2: tensor<7x22x128xi32> +} + +// CHECK-LABEL: func @matrix_diag_part_align_rl +func @matrix_diag_part_align_rl(%arg0: tensor<7x140x128xi32>) -> tensor<7x22x128xi32> { + %0 = mhlo.constant dense<42> : tensor // padding value + %1 = mhlo.constant dense<[-10, 11]> : tensor<2xi32> // k + %2 = "tf.MatrixDiagPartV3"(%arg0, %1, %0) { + T = i32, align = "RIGHT_LEFT" + } : (tensor<7x140x128xi32>, tensor<2xi32>, tensor) -> tensor<7x22x128xi32> + // CHECK: %[[ge:.*]] = chlo.broadcast_compare %{{[0-9]*}}, %{{[0-9]*}} {comparison_direction = "GE"} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi1> + // CHECK: %{{[0-9]*}} = "mhlo.select"(%[[ge]], %{{[0-9]*}}, %{{[0-9]*}}) : (tensor<1x22x128xi1>, tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi32> + return %2: tensor<7x22x128xi32> +} + +// CHECK-LABEL: func @matrix_diag_part_align_rr +func @matrix_diag_part_align_rr(%arg0: tensor<7x140x128xi32>) -> tensor<7x22x128xi32> { + %0 = mhlo.constant dense<42> : tensor // padding value + %1 = mhlo.constant dense<[-10, 11]> : tensor<2xi32> // k + %2 = "tf.MatrixDiagPartV3"(%arg0, %1, %0) { + T = i32, align = "RIGHT_RIGHT" + } : (tensor<7x140x128xi32>, tensor<2xi32>, tensor) -> tensor<7x22x128xi32> + // CHECK: %[[true:.*]] = mhlo.constant dense : tensor + // CHECK: %[[b_true:.*]] = "mhlo.broadcast"(%[[true]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi1> + // CHECK: %{{[0-9]*}} = "mhlo.select"(%[[b_true]], %{{[0-9]*}}, %{{[0-9]*}}) : (tensor<1x22x128xi1>, tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi32> + return %2: tensor<7x22x128xi32> +} + +// CHECK-LABEL: func @matrix_diag_part_align_7d +// CHECK: (%arg0: tensor<3x5x7x9x11x13x17xf32>) -> tensor<3x5x7x9x11x4x10xf32> +func @matrix_diag_part_align_7d(%arg0: tensor<3x5x7x9x11x13x17xf32>) -> tensor<3x5x7x9x11x4x10xf32> { + %0 = mhlo.constant dense<-1.> : tensor // padding value + %1 = mhlo.constant dense<[-6, -3]> : tensor<2xi32> // k + %2 = "tf.MatrixDiagPartV3"(%arg0, %1, %0) { + T = f32, align = "LEFT_RIGHT" + } : (tensor<3x5x7x9x11x13x17xf32>, tensor<2xi32>, tensor) -> tensor<3x5x7x9x11x4x10xf32> + return %2: tensor<3x5x7x9x11x4x10xf32> +} + //===----------------------------------------------------------------------===// // Einsum. //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc index b1e74e354fe..e387ad43b89 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc @@ -1387,6 +1387,268 @@ class ConvertDiagPartOp : public OpRewritePattern { } }; +// Converts TensorFlow MatrixDiagPartOp to HLO ops. +class ConvertMatrixDiagPartV3Op + : public OpRewritePattern { + using Shape = llvm::SmallVector; + + // Parse the "k" parameter. MatrixDiagPartV3 allows to specify the diagonal(s) + // with k. This can be either a single value (for a single diagonal) or a + // tuple of two values (starting and ending diagonal, for a band). + LogicalResult ExtractK(TF::MatrixDiagPartV3Op op, int64_t (*k)[2]) const { + DenseIntElementsAttr kattr; + if (!matchPattern(op.k(), m_Constant(&kattr))) { + return failure(); + } + DenseIntElementsAttr::iterator it = kattr.begin(); + (*k)[0] = (*it).getSExtValue(); + it++; + if (it == kattr.end()) { + // Handle input like e.g. "k = 5", in which case we extract a single + // diagonal. + (*k)[1] = (*k)[0]; + } else { + // Handle input like e.g. "k = [-1, 1]", in which case we extract a + // band (multiple diagonals). + (*k)[1] = (*it).getSExtValue(); + } + return success(); + } + + // Utility method for broadcasting integer constants to a given shape. + BroadcastOp BroadcastConstant(Location loc, Shape shape, int32_t constant, + int int_size, PatternRewriter &rewriter) const { + return rewriter.create( + loc, RankedTensorType::get(shape, rewriter.getIntegerType(int_size)), + GetScalarConstOfType(rewriter.getIntegerType(int_size), loc, constant, + &rewriter), + GetI64ElementsAttr(shape, &rewriter)); + } + + public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(TF::MatrixDiagPartV3Op op, + PatternRewriter &rewriter) const override { + Location loc = op.getLoc(); + ShapedType input_type = op.input().getType().dyn_cast(); + auto element_type = input_type.getElementType(); + + // Align is a string specifying how superdiagonals and subdiagonals should + // be aligned/padded for diagonals that are shorter than max_diag_len. The + // format is "{super}_{sub}", with {super} the superdiagonal alignment and + // {sub} the subdiagonal alignment. "LEFT" means rows will be padded to the + // left, "RIGHT" means rows will be padded ot the right. The default is + // "RIGHT_LEFT". + StringRef align = op.getAttrOfType("align").getValue(); + enum Alignment { kLeft, kRight }; + + // default is RIGHT_LEFT + Alignment superdiagonal_align = kRight; + Alignment subdiagonal_align = kLeft; + + if (align == "RIGHT_LEFT") { + superdiagonal_align = kRight; + subdiagonal_align = kLeft; + } else if (align == "RIGHT_RIGHT") { + superdiagonal_align = kRight; + subdiagonal_align = kRight; + } else if (align == "LEFT_RIGHT") { + superdiagonal_align = kLeft; + subdiagonal_align = kRight; + } else if (align == "LEFT_LEFT") { + superdiagonal_align = kLeft; + subdiagonal_align = kLeft; + } else { + return failure(); // unsupported alignment + } + + // MatrixDiagPart operates on a matrix of shape [I, J, ..., L, M, N], and + // will extract the diagonal(s) out of [M, N], for all [I, J, ..., L]. + if (!input_type || !input_type.hasStaticShape()) return failure(); + int64_t num_dims = input_type.getRank(); + if (num_dims < 2) return failure(); + int rows = input_type.getDimSize(num_dims - 2); // rows + int cols = input_type.getDimSize(num_dims - 1); // cols + + // We extract the diagonals from k[0] up to and including k[1]. + // Addressing is 0 for the main diagonal. (So k = [0, 0] would just extract + // the main diagonal). It's negative for subdiagonals (under and to the left + // of the main diagonal) and positive for superdiagonals (above and to the + // right of the main diagonal). + int64_t k[2]; + if (failed(ExtractK(op, &k))) return failure(); + int num_diags = k[1] - k[0] + 1; + + // Shifting diagonals away from the main diagonal might shorten them. This + // is the longest diagonal we will see. We make this the last dimension of + // the output shape. + int64_t max_diag_len = + std::min(rows + std::min(k[1], 0l), cols + std::min(-k[0], 0l)); + + // The first dimension is the index vector dimension we'll use for gather. + // It's 1 here, but will be 2 once we glue x and y together. + Shape indices_shape({1, num_diags, max_diag_len}); + + RankedTensorType iota_type = + RankedTensorType::get(indices_shape, rewriter.getIntegerType(32)); + Value iotaM = + rewriter.create(loc, iota_type, rewriter.getI64IntegerAttr(1)); + Value iotaN = + rewriter.create(loc, iota_type, rewriter.getI64IntegerAttr(2)); + + // Boradcasted constants, of the same shape as iotaM and iotaN. + Value b_zero = BroadcastConstant(loc, indices_shape, 0, 32, rewriter); + Value b_false = BroadcastConstant(loc, indices_shape, 0, 1, rewriter); + Value b_true = BroadcastConstant(loc, indices_shape, 1, 1, rewriter); + Value b_k1 = BroadcastConstant(loc, indices_shape, k[1], 32, rewriter); + Value b_rows = BroadcastConstant(loc, indices_shape, rows, 32, rewriter); + Value b_cols = BroadcastConstant(loc, indices_shape, cols, 32, rewriter); + Value b_max_diag_len = + BroadcastConstant(loc, indices_shape, max_diag_len, 32, rewriter); + + // d = k[1] - m + // (A.k.a. the number of the diagonal, depending on m. Note that we + // subtract m here. This means we start with the superdiagonals and + // move downwards towards the subdiagonals. So the start indices will + // be decreasing.) + Value d = rewriter.create(loc, b_k1, iotaM); + Value neg_d = rewriter.create(loc, d); + + // Common subexpressions. + Value min_d_zero = rewriter.create(loc, d, b_zero); + Value max_d_zero = rewriter.create(loc, d, b_zero); + + // diag_len_d = min(rows + min(d, 0), cols - max(d, 0)) + // (Length of a diagonal for a given d. Same as max_diag_len for m = 0.) + Value diag_len_d = rewriter.create( + loc, rewriter.create(loc, b_rows, min_d_zero), + rewriter.create(loc, b_cols, max_d_zero)); + + // offset is max_diag_len - diag_len_d if we're padding, 0 otherwise. + Value cmp; + if (subdiagonal_align == kRight && superdiagonal_align == kRight) { + cmp = b_true; + } else if (superdiagonal_align == kRight) { + // offset = d>=0 ? max_diag_len - diag_len_d : 0 + cmp = rewriter.create(loc, d, b_zero); + } else if (subdiagonal_align == kRight) { + // offset = d<=0 ? max_diag_len - diag_len_d : 0 + cmp = rewriter.create(loc, d, b_zero); + } else { + // offset = 0 + cmp = b_false; + } + + // This offset shifts the diagonals to the "left" or "right", depending + // on alignment. + Value offset = rewriter.create( + loc, b_zero.getType(), cmp, + rewriter.create(loc, b_max_diag_len, diag_len_d), b_zero); + + // x = max(d, 0) - offset + // y = max(-d, 0) - offset + Value x = rewriter.create(loc, max_d_zero, offset); + Value y = rewriter.create( + loc, rewriter.create(loc, neg_d, b_zero), offset); + + Value n_plus_x = rewriter.create(loc, iotaN, x); + Value n_plus_y = rewriter.create(loc, iotaN, y); + + // GatherOp is happy about letting us index out of bounds values, but those + // values will be undefined. So we mask them later. Set up the boolean + // expression that tells us which entries, in the output shape, are out of + // bounds and thus become the padding_value. + Value x_in_bounds = rewriter.create( + loc, + rewriter.create(loc, b_false.getType(), n_plus_x, + b_zero), + rewriter.create(loc, b_false.getType(), n_plus_x, b_cols)); + Value y_in_bounds = rewriter.create( + loc, + rewriter.create(loc, b_false.getType(), n_plus_y, + b_zero), + rewriter.create(loc, b_false.getType(), n_plus_y, b_rows)); + Value in_bounds = rewriter.create( + loc, + RankedTensorType::get(Shape({num_diags, max_diag_len}), + rewriter.getIntegerType(1)), + rewriter.create(loc, x_in_bounds, y_in_bounds)); + + // Now combine x and y into the index data structure needed for gather. + Shape concat_shape({2, num_diags, max_diag_len}); + Value start_indices = rewriter.create( + loc, RankedTensorType::get(concat_shape, rewriter.getIntegerType(32)), + mlir::ValueRange({n_plus_y, n_plus_x}), + mlir::IntegerAttr::get(rewriter.getIntegerType(64), 0)); + + // Shape of the final output. (Except for dimension folding in the + // single diagonal case.) + Shape output_shape; + for (int i = 0; i < num_dims - 2; i++) { + output_shape.push_back(input_type.getDimSize(i)); + } + output_shape.push_back(num_diags); + output_shape.push_back(max_diag_len); + auto output_type = RankedTensorType::get(output_shape, element_type); + + // A slice is the shape of what GatherOp copies per lookup. So the last + // two dimensions (M, N in the matrix-diag-part docs) are where we go + // through entry by entry. + ArrayRef input_shape = input_type.getShape(); + Shape slice_sizes(input_shape.begin(), input_shape.end()); + int slice_dimensions = slice_sizes.size(); + slice_sizes[slice_dimensions - 2] = 1; + slice_sizes[slice_dimensions - 1] = 1; + + // Dimensions of the input we won't see in the output (M and N). + SmallVector collapsed_dims( + {slice_dimensions - 2, slice_dimensions - 1}); + + // Which dimensions (in the input) the two offset "columns" map to. + SmallVector start_index_map({num_dims - 2, num_dims - 1}); + + // Gather the diagonal entries. + // TODO(kramm): For a single diagonal, this might be slower than the + // mask + sum approach. Special-case num_diags==1? + auto dims_attr = GatherDimensionNumbers::get( + /*offset_dims=*/GetI64ElementsAttrForSeq(0, num_dims - 2, &rewriter), + /*collapsed_slice_dims=*/GetI64ElementsAttr(collapsed_dims, &rewriter), + /*start_index_map=*/GetI64ElementsAttr(start_index_map, &rewriter), + /*index_vector_dim=*/rewriter.getI64IntegerAttr(0), + rewriter.getContext()); + Value gather = rewriter.create( + loc, output_type, op.input(), start_indices, dims_attr, + GetI64ElementsAttr(slice_sizes, &rewriter)); + + // We now need to broadcast the "in_bounds" boolean expression, as well as + // the padding value, to do the final select. + Shape broadcast_bounds; + for (int i = 0; i < output_shape.size() - 2; i++) { + broadcast_bounds.push_back(output_shape[i]); + } + Value b_in_bounds = rewriter.create( + loc, RankedTensorType::get(output_shape, rewriter.getIntegerType(1)), + in_bounds, GetI64ElementsAttr(broadcast_bounds, &rewriter)); + Value b_padding = rewriter.create( + loc, output_type, op.padding_value(), + GetI64ElementsAttr(output_shape, &rewriter)); + + // Replace all out-of-bounds values in the result with padding_value. + Value result = rewriter.create(loc, output_type, b_in_bounds, + gather, b_padding); + + if (num_diags == 1) { + // matrix_diag_part folds away the 1-sized band dimension if we only + // extract a single diagonal. + result = rewriter.create(loc, op.getType(), result); + } + + rewriter.replaceOp(op, result); + return success(); + } +}; + // Converts TensorFlow EinsumOp to either HLO EinsumOp or UnaryEinsumOp // depending on arity of the op. class ConvertEinsumOp : public OpRewritePattern { @@ -5486,8 +5748,8 @@ LogicalResult legalizeTF(Operation *op, bool allow_partial_conversion, ConvertAvgPool3DGradOp, ConvertMaxPool2DOp, ConvertMaxPool3DOp, ConvertMaxPool2DGradOp, ConvertMaxPool3DGradOp, ConvertMeanOp, ConvertOneHotOp, ConvertOutfeedEnqueueTupleOp, ConvertProdOp, ConvertQrOp, - ConvertDynamicRangeOp, ConvertRangeOp, ConvertSelectV2Op, - ConvertSigmoidOp, ConvertShapeOp, ConvertSizeOp, + ConvertMatrixDiagPartV3Op, ConvertDynamicRangeOp, ConvertRangeOp, + ConvertSelectV2Op, ConvertSigmoidOp, ConvertShapeOp, ConvertSizeOp, ConvertSoftmaxOp, ConvertSoftmaxOp, ConvertSplitOp, ConvertSplitVOp, ConvertStridedSliceOp, ConvertStridedSliceGradOp, ConvertSumOp, From ac9c2c749313422a90a0a11e09405cc9b12153c6 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 20:47:18 -0400 Subject: [PATCH 1555/2522] Update optimize.cc --- tensorflow/compiler/mlir/lite/transforms/optimize.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize.cc b/tensorflow/compiler/mlir/lite/transforms/optimize.cc index 751c5266f65..c0c53935d6a 100644 --- a/tensorflow/compiler/mlir/lite/transforms/optimize.cc +++ b/tensorflow/compiler/mlir/lite/transforms/optimize.cc @@ -197,9 +197,9 @@ TypeAttr RescaleQtype(Type input, Attribute factor) { DenseElementsAttr GetShape(Value output_val) { auto output_type = output_val.getType().cast(); auto shape_vector = output_type.getShape(); - std::vector shape(shape_vector.size()); - for (int i = 0, end = shape_vector.size(); i < end; ++i) { - shape[i] = shape_vector[i]; + std::vector shape; + for (auto shape_object : shape_vector) { + shape.push_back(shape_object); } return mlir::DenseElementsAttr::get( RankedTensorType::get( From 15c52a21aa7c35d645982a76ed56b58795090098 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 20:47:25 -0400 Subject: [PATCH 1556/2522] Update optimize.cc From 35c4ca71bee0bd18ddb9a5e40433a27bed0f9f29 Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Tue, 28 Jul 2020 17:48:53 -0700 Subject: [PATCH 1557/2522] Refactor compilation cache library. PiperOrigin-RevId: 323690659 Change-Id: Iaca0a76778d9d94af248e932ac0ed1b7cdb0dfc1 --- tensorflow/core/tpu/kernels/BUILD | 14 +- .../tpu_compilation_cache_entry_impl.h | 108 ------------- .../kernels/tpu_compilation_cache_external.cc | 15 -- .../kernels/tpu_compilation_cache_external.h | 12 -- .../tpu_compilation_cache_interface.cc | 144 ++++++++++++++++-- .../kernels/tpu_compilation_cache_interface.h | 111 +++++--------- .../tpu_compilation_cache_local_lookup.cc | 43 ++---- .../tpu_compilation_cache_local_lookup.h | 13 +- .../kernels/tpu_compilation_cache_lookup.h | 18 +-- tensorflow/core/tpu/kernels/tpu_execute_op.cc | 58 +++---- .../core/tpu/kernels/tpu_program_group.cc | 61 ++++++++ .../core/tpu/kernels/tpu_program_group.h | 45 ++---- 12 files changed, 299 insertions(+), 343 deletions(-) delete mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache_entry_impl.h diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 31e64c77762..06beab6b016 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -220,17 +220,6 @@ cc_library( ], ) -cc_library( - name = "tpu_compilation_cache_entry_impl", - srcs = [], - hdrs = ["tpu_compilation_cache_entry_impl.h"], - deps = [ - ":compiled_subgraph", - ":tpu_compilation_cache_interface", - ":tpu_executable_info_proto_cc", - ], -) - cc_library( name = "tpu_compilation_cache_lookup", hdrs = [ @@ -337,6 +326,7 @@ cc_library( hdrs = ["tpu_compilation_cache_interface.h"], deps = [ ":compiled_subgraph", + ":tpu_compilation_cache_entry", ":tpu_compilation_cache_key", ":tpu_compilation_cache_proto_cc", ":tpu_compilation_metrics_hdrs", @@ -368,7 +358,6 @@ cc_library( deps = [ ":compiled_subgraph", ":tpu_compilation_cache_entry", - ":tpu_compilation_cache_entry_impl", ":tpu_compilation_cache_interface", ":tpu_compilation_cache_key", ":tpu_compilation_cache_proto_cc", @@ -611,6 +600,7 @@ cc_library( deps = [ ":tpu_compilation_cache_entry", ":tpu_compilation_cache_external", + ":tpu_compilation_cache_interface", ":tpu_compilation_cache_local_lookup", ":tpu_compilation_cache_lookup", ":tpu_executable_info_proto_cc", diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry_impl.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry_impl.h deleted file mode 100644 index 501f802b01f..00000000000 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry_impl.h +++ /dev/null @@ -1,108 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_ENTRY_IMPL_H_ -#define TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_ENTRY_IMPL_H_ - -#include "tensorflow/core/tpu/kernels/compiled_subgraph.h" -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" -#include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" - -namespace tensorflow { -namespace tpu { - -// Wrapper for a cache entry that holds a reference to the entry until the -// wrapper is deleted. This wrapper is the concrete type of -// CompilationCacheEntryRef returned by Lookup. -template -class CompilationCacheEntryRefImpl - : public CompilationCacheEntryRef { - public: - CompilationCacheEntryRefImpl(TpuCompilationCacheInterface* parent, - CompiledSubgraph* entry, int index); - - ~CompilationCacheEntryRefImpl() override; - - Status ToSubEntryRef(CompilationCacheFetchTarget fetch_target) override; - - protected: - TpuCompilationCacheInterface* parent_; // Not owned. - // A reference to entry_ is acquired in the constructor and released via - // parent->DiscardEntryRefs in the destructor. - CompiledSubgraph* entry_; - // The index of the program in entry_ that is returned by the get method. - int index_; -}; - -template -CompilationCacheEntryRefImpl::CompilationCacheEntryRefImpl( - TpuCompilationCacheInterface* parent, CompiledSubgraph* entry, int index) - : parent_(parent), entry_(entry), index_(index) { - if (entry_ == nullptr) { - return; - } - if (entry_->main_entry == nullptr) { - entry_->Ref(); - } else { - // This is a sharding/unsharding entry nested in a main entry. Only - // refcount the main entry. - entry_->main_entry->Ref(); - } -} - -template -CompilationCacheEntryRefImpl::~CompilationCacheEntryRefImpl() { - if (entry_ == nullptr) { - return; - } - if (entry_->main_entry == nullptr) { - parent_->DiscardEntryRefs({entry_}); - } else { - parent_->DiscardEntryRefs({entry_->main_entry}); - } -} - -template -Status CompilationCacheEntryRefImpl::ToSubEntryRef( - CompilationCacheFetchTarget fetch_target) { - CompiledSubgraph* target = nullptr; - switch (fetch_target) { - case CompilationCacheFetchTarget::MAIN: - target = entry_; - break; - case CompilationCacheFetchTarget::SHARDING: - target = entry_->sharding_entry.get(); - break; - case CompilationCacheFetchTarget::UNSHARDING: - target = entry_->unsharding_entry.get(); - break; - default: - return xla::InvalidArgument("Invalid fetch target: %d", fetch_target); - } - - if (target == nullptr) { - // Cache entry does not have an unsharding subentry. Unref and replace - // with nullptr. - parent_->DiscardEntryRefs({entry_}); - } - // Otherwise, since the refcount is always on the main entry, we don't - // need ref/unref. - entry_ = target; - return Status::OK(); -} - -} // namespace tpu -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_ENTRY_IMPL_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc index b4b18d1743b..a58eb21f81d 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc @@ -51,20 +51,6 @@ void PopulateEntry(const std::string& key, CompiledSubgraph* entry, } } // namespace -TpuCompilationCacheExternal::EntryRefImpl::EntryRefImpl( - TpuCompilationCacheInterface* parent, CompiledSubgraph* entry, int index) - : CompilationCacheEntryRefImpl(parent, entry, - index) {} - -TpuCompilationCacheEntry TpuCompilationCacheExternal::EntryRefImpl::get() { - if (entry_ == nullptr) { - // Create an empty entry if the entry is nullptr. This corresponds to - // non-existing sharding/unsharding entries. - return TpuCompilationCacheEntry(); - } - return TpuCompilationCacheEntry(entry_->tpu_program_group.get(), index_); -} - CompiledSubgraph* TpuCompilationCacheExternal::InitializeEntry( const string& key, const std::function& initialize_program, @@ -73,7 +59,6 @@ CompiledSubgraph* TpuCompilationCacheExternal::InitializeEntry( main_entry->parent = this; main_entry->subgraph_key = key; main_entry->uid = get_uid(); - // TODO(henrytan): implement TpuCompilationCacheKey.debug_string. main_entry->cache_entry_debug_string = subgraph_key.prefix; VLOG(1) << "Cache Initializing Entry Session Debug " << main_entry->cache_entry_debug_string; diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h index 86615b15d4c..51b5ffbed0d 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h @@ -32,7 +32,6 @@ limitations under the License. #include "tensorflow/core/tpu/kernels/compiled_subgraph.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache.pb.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry_impl.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_key.h" #include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" @@ -46,17 +45,6 @@ namespace tpu { class TpuCompilationCacheExternal : public TpuCompilationCacheInterface { public: - using Status = ::stream_executor::port::Status; - - class EntryRefImpl - : public CompilationCacheEntryRefImpl { - public: - EntryRefImpl(TpuCompilationCacheInterface* parent, CompiledSubgraph* entry, - int index); - - TpuCompilationCacheEntry get() override; - }; - explicit TpuCompilationCacheExternal(int64 max_cache_size) : TpuCompilationCacheInterface(max_cache_size) {} diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc index 9e1aedf92ce..4cd2b864203 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc @@ -38,10 +38,77 @@ void TpuCompilationCacheInterface::RefHolder::AddRef(CompiledSubgraph* entry) { entries_.push_back(entry); } -string TpuCompilationCacheInterface::RefHolder::DebugString() const { +std::string TpuCompilationCacheInterface::RefHolder::DebugString() const { return "TpuCompilationCacheRefHolder"; } +CompilationCacheEntryRef::CompilationCacheEntryRef() + : parent_(nullptr), entry_(nullptr), index_(0) {} + +CompilationCacheEntryRef::CompilationCacheEntryRef( + TpuCompilationCacheInterface* parent, CompiledSubgraph* entry, int index) + : parent_(parent), entry_(entry), index_(index) { + if (entry_ == nullptr) { + return; + } + if (entry_->main_entry == nullptr) { + entry_->Ref(); + } else { + // This is a sharding/unsharding entry nested in a main entry. Only + // refcount the main entry. + entry_->main_entry->Ref(); + } +} + +CompilationCacheEntryRef::~CompilationCacheEntryRef() { + if (entry_ == nullptr) { + return; + } + if (entry_->main_entry == nullptr) { + parent_->DiscardEntryRefs({entry_}); + } else { + parent_->DiscardEntryRefs({entry_->main_entry}); + } +} + +TpuCompilationCacheEntry CompilationCacheEntryRef::get() { + if (entry_ == nullptr) { + // Create an empty entry if the entry is nullptr. This corresponds to + // non-existing sharding/unsharding entries. + return TpuCompilationCacheEntry(); + } + + return TpuCompilationCacheEntry(entry_->tpu_program_group.get(), index_); +} + +Status CompilationCacheEntryRef::ToSubEntryRef( + CompilationCacheFetchTarget fetch_target) { + CompiledSubgraph* target = nullptr; + switch (fetch_target) { + case CompilationCacheFetchTarget::MAIN: + target = entry_; + break; + case CompilationCacheFetchTarget::SHARDING: + target = entry_->sharding_entry.get(); + break; + case CompilationCacheFetchTarget::UNSHARDING: + target = entry_->unsharding_entry.get(); + break; + default: + return xla::InvalidArgument("Invalid fetch target: %d", fetch_target); + } + + if (target == nullptr) { + // Cache entry does not have an unsharding subentry. Unref and replace + // with nullptr. + parent_->DiscardEntryRefs({entry_}); + } + // Otherwise, since the refcount is always on the main entry, we don't + // need ref/unref. + entry_ = target; + return Status::OK(); +} + TpuCompilationCacheInterface::TpuCompilationCacheInterface(int64 max_cache_size) : max_cache_size_(max_cache_size) { CHECK_GE(max_cache_size_, 0); @@ -156,7 +223,7 @@ void TpuCompilationCacheInterface::UnloadAndDestroy(CompiledSubgraph* entry) { entry->Unref(); } -size_t TpuCompilationCacheInterface::RemoveEntry(const string& key) { +size_t TpuCompilationCacheInterface::RemoveEntry(const std::string& key) { auto erased = cache_.erase(key); TpuCompilationMetrics::SetCacheEntryCount(cache_.size()); @@ -196,7 +263,7 @@ CompiledSubgraph* TpuCompilationCacheInterface::DiscardEntryRef( } erased = entries_by_uid_.erase(entry->uid); CHECK_EQ(erased, 1); - for (const string& key : entry->proto_key) { + for (const std::string& key : entry->proto_key) { erased = entries_by_proto_key_.erase(key); CHECK_EQ(erased, 1); } @@ -269,10 +336,10 @@ void TpuCompilationCacheInterface::LookupEntryMarkedForEviction( } } -void TpuCompilationCacheInterface::InsertEntry(const string& key, +void TpuCompilationCacheInterface::InsertEntry(const std::string& key, CompiledSubgraph* entry) { auto cache_inserted = - cache_.insert(std::pair(key, entry)); + cache_.insert(std::pair(key, entry)); CHECK(cache_inserted.second); TpuCompilationMetrics::SetCacheEntryCount(cache_.size()); @@ -295,7 +362,8 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsent( const TpuCompilationCacheKey& subgraph_key, const SessionMetadata* session_metadata, CompilationRefHolder* per_step_ref_holder, int64* uid, - std::vector* proto_key, std::vector* may_modify_variables, + std::vector* proto_key, + std::vector* may_modify_variables, absl::Span* hlo_metadatas, const std::function& compile_function) { std::vector removed_entries; @@ -308,7 +376,7 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsent( return status; } -string TpuCompilationCacheInterface::FindCacheKey( +std::string TpuCompilationCacheInterface::FindCacheKey( const TpuCompilationCacheKey& subgraph_key) { if (!subgraph_key.has_guaranteed_const) { return subgraph_key.prefix; @@ -331,7 +399,8 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( const TpuCompilationCacheKey& subgraph_key, const SessionMetadata* session_metadata, CompilationRefHolder* per_step_ref_holder, int64* uid, - std::vector* proto_key, std::vector* may_modify_variables, + std::vector* proto_key, + std::vector* may_modify_variables, std::vector* removed_entries, absl::Span* hlo_metadatas, const std::function& compile_function) { @@ -345,17 +414,18 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( // for the lifetime of the object, see InitializeEntry() call below. absl::MutexLock lock(&mu_); - string cache_key = FindCacheKey(subgraph_key); + std::string cache_key = FindCacheKey(subgraph_key); auto iter = cache_.find(cache_key); bool is_new_key = iter == cache_.end(); - const string session_name = tpu::SessionNameFromMetadata(session_metadata); + const std::string session_name = + tpu::SessionNameFromMetadata(session_metadata); if (is_new_key) { cache_key = subgraph_key.ToString(); TpuCompilationMetrics::IncrementCacheLookupCount( /*is_cache_hit=*/false, session_name); - const string msg = + const std::string msg = strings::StrCat("TPU host compilation cache miss: cache_key(", cache_key, "), session_name(", session_name, ")"); TRACESTRING(msg); @@ -364,7 +434,7 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( // Check if caller has disabled compilation. Set using // internal::ScopedTpuCompileDisabler. if (!UtilApiFn()->TpuCompile_IsTpuCompilationEnabledFn()) { - const string error_msg = strings::StrCat( + const std::string error_msg = strings::StrCat( "[TpuCompilationDisabled]: Compilation cache miss, but compilation " "disabled, session_name(", session_name, ") Debug String: ", subgraph_key.debug_string); @@ -403,7 +473,7 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( } else { TpuCompilationMetrics::IncrementCacheLookupCount( /*is_cache_hit=*/true, session_name); - const string msg = + const std::string msg = strings::StrCat("TPU host compilation cache hit: cache_key(", cache_key, "), session_name(", session_name, ")"); TRACESTRING(msg); @@ -466,8 +536,8 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( return entry->initialization_status; } -Status TpuCompilationCacheInterface::GetKeysFromUid(int64 uid, - std::vector* keys) { +Status TpuCompilationCacheInterface::GetKeysFromUid( + int64 uid, std::vector* keys) { keys->clear(); absl::MutexLock lock(&mu_); @@ -479,5 +549,49 @@ Status TpuCompilationCacheInterface::GetKeysFromUid(int64 uid, return Status::OK(); } +Status TpuCompilationCacheInterface::Lookup( + int64 uid, int proto_index, + std::unique_ptr* entry) { + entry->reset(); + + profiler::TraceMe proto_lookup_traceme( + "TPU compilation cache proto lookup by uid", + /*level=*/2); + + absl::MutexLock lock(&mu_); + const auto iter = entries_by_uid_.find(uid); + if (iter == entries_by_uid_.end()) { + return errors::NotFound("No subgraph found for uid ", uid); + } + CompiledSubgraph* cache_entry = iter->second; + if (proto_index < 0 || + proto_index >= cache_entry->tpu_program_group->program_count()) { + return errors::NotFound("No proto found for core index ", proto_index, + " in subgraph with uid ", uid); + } + *entry = absl::make_unique(this, cache_entry, + proto_index); + return Status::OK(); +} + +Status TpuCompilationCacheInterface::Lookup( + const std::string& proto_key, + std::unique_ptr* entry) { + entry->reset(); + + profiler::TraceMe proto_lookup_traceme("TPU compilation cache proto lookup", + /*level=*/2); + + absl::MutexLock lock(&mu_); + const auto iter = entries_by_proto_key_.find(proto_key); + if (iter == entries_by_proto_key_.end()) { + return errors::NotFound("No proto found for key ", proto_key); + } + CompiledSubgraph* cache_entry = iter->second.first; + int proto_index = iter->second.second; + *entry = absl::make_unique(this, cache_entry, + proto_index); + return Status::OK(); +} } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h index cde6467b7af..7b206fb1cf4 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h @@ -32,6 +32,7 @@ limitations under the License. #include "tensorflow/core/protobuf/config.pb.h" #include "tensorflow/core/tpu/kernels/compiled_subgraph.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_key.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_metrics.h" #include "tensorflow/core/tpu/kernels/trace_util.h" @@ -48,18 +49,20 @@ class CompilationRefHolder : public ResourceBase { ~CompilationRefHolder() override = default; }; -// Base class for a reference to a cached tpu program. A unique_ptr to a -// CompilationCacheEntryRef is returned by all the cache Lookup methods below, -// and ensures the underlying proto is not garbage-collected until the client -// discards the ptr. -template +// Wrapper for a cache entry returned by all the TpuCompilationCacheInterface +// `Lookup` methods, and ensures the underlying proto is not garbage-collected +// until the client discards the ptr. class CompilationCacheEntryRef { public: - virtual ~CompilationCacheEntryRef() = default; + CompilationCacheEntryRef(); + CompilationCacheEntryRef(TpuCompilationCacheInterface* parent, + CompiledSubgraph* entry, int index); - // Returns a CompilationCacheEntry that should not be used beyond the lifetime - // of the tpu::CompilationCacheEntryRef. - virtual CacheEntryType get() = 0; + virtual ~CompilationCacheEntryRef(); + + // Returns a TpuCompilationCacheEntry that should not be used beyond the + // lifetime of the CompilationCacheEntryRef. + virtual TpuCompilationCacheEntry get(); // Mutates this ref to point to the entry's subentry (for // sharding/unsharding) or main entry (unchanged) as specified by @@ -69,7 +72,15 @@ class CompilationCacheEntryRef { // // If the requested subentry does not exist, the ref will point to a nullptr // entry, and the original entry will be unref'ed. - virtual Status ToSubEntryRef(CompilationCacheFetchTarget fetch_target) = 0; + virtual Status ToSubEntryRef(CompilationCacheFetchTarget fetch_target); + + protected: + TpuCompilationCacheInterface* parent_; // Not owned. + // A reference to entry_ is acquired in the constructor and released via + // parent->DiscardEntryRefs in the destructor. + CompiledSubgraph* entry_; + // The index of the program in entry_ that is returned by the get method. + int index_; }; class TpuCompilationCacheInterface : public ResourceBase { @@ -97,7 +108,8 @@ class TpuCompilationCacheInterface : public ResourceBase { const TpuCompilationCacheKey& subgraph_key, const SessionMetadata* session_metadata, CompilationRefHolder* per_step_ref_holder, int64* uid, - std::vector* proto_key, std::vector* may_modify_variables, + std::vector* proto_key, + std::vector* may_modify_variables, absl::Span* hlo_metadatas, const std::function& compile_function); @@ -124,19 +136,18 @@ class TpuCompilationCacheInterface : public ResourceBase { // Looks up an executable corresponding to the model-parallel core index of // the subgraph represented by key. On success a pointer to an EntryRef // holding the program is returned in entry. - template - Status Lookup(const string& proto_key, std::unique_ptr* entry); + Status Lookup(const std::string& proto_key, + std::unique_ptr* entry); // Looks up an executable corresponding to the model-parallel core index of // the subgraph represented by uid. On success a pointer to an EntryRef // holding the program is returned in entry. - template Status Lookup(int64 uid, int proto_index, - std::unique_ptr* entry); + std::unique_ptr* entry); // Looks up the subgraph represented by uid, and returns the vector of keys, // one per core, corresponding to that subgraph. - Status GetKeysFromUid(int64 uid, std::vector* keys); + Status GetKeysFromUid(int64 uid, std::vector* keys); // Makes a reference holder for this cache, that can be stored in the per-step // resource manager and will ensure that compiled entries persist until the @@ -170,7 +181,7 @@ class TpuCompilationCacheInterface : public ResourceBase { // parent_->DiscardEntryRefs. void AddRef(CompiledSubgraph* entry); - string DebugString() const override; + std::string DebugString() const override; private: TpuCompilationCacheInterface* parent_; // Not owned. @@ -185,7 +196,8 @@ class TpuCompilationCacheInterface : public ResourceBase { const TpuCompilationCacheKey& subgraph_key, const SessionMetadata* session_metadata, CompilationRefHolder* per_step_ref_holder, int64* uid, - std::vector* proto_key, std::vector* may_modify_variables, + std::vector* proto_key, + std::vector* may_modify_variables, std::vector* removed_entries, absl::Span* hlo_metadatas, const std::function& compile_function); @@ -230,14 +242,14 @@ class TpuCompilationCacheInterface : public ResourceBase { ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); // Removes the entry with given key from cache. - size_t RemoveEntry(const string& key) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); + size_t RemoveEntry(const std::string& key) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); // Inserts the given key and entry to cache. - void InsertEntry(const string& key, CompiledSubgraph* entry) + void InsertEntry(const std::string& key, CompiledSubgraph* entry) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); // Returns the cache key matching given subgraph_key. - string FindCacheKey(const TpuCompilationCacheKey& subgraph_key) + std::string FindCacheKey(const TpuCompilationCacheKey& subgraph_key) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); // Creates a new entry by running initialize_programs and places it in the @@ -247,7 +259,7 @@ class TpuCompilationCacheInterface : public ResourceBase { // // **InitializeEntry releases mu_ during the call to initialize_programs.** virtual CompiledSubgraph* InitializeEntry( - const string& key, + const std::string& key, const std::function& initialize_programs, const TpuCompilationCacheKey& subgraph_key) @@ -276,13 +288,16 @@ class TpuCompilationCacheInterface : public ResourceBase { // cache_ key matching a given subgraph key. When doing a lookup, check // session_key_map_ first to avoid unnecessay fingerprint computation. // Map from key prefix + session_handle to a cache_ key. - absl::node_hash_map session_key_map_ ABSL_GUARDED_BY(mu_); + absl::node_hash_map session_key_map_ + ABSL_GUARDED_BY(mu_); // Map from key prefix + fingerprint to a cache_ key. - absl::node_hash_map fingerprint_key_map_ ABSL_GUARDED_BY(mu_); + absl::node_hash_map fingerprint_key_map_ + ABSL_GUARDED_BY(mu_); // All the subgraph entries that can be looked up in the cache. An entry is // marked for eviction iff it is present in cache_ and not in // entries_by_last_use_. - std::unordered_map cache_ ABSL_GUARDED_BY(mu_); + std::unordered_map cache_ + ABSL_GUARDED_BY(mu_); // All the subgraph entries that can be looked up in the cache, indexed by // uid. absl::node_hash_map entries_by_uid_ @@ -290,7 +305,7 @@ class TpuCompilationCacheInterface : public ResourceBase { // All the protos that can be looked up in the cache, indexed by proto // key. The value of the map is a subgraph and the index of the proto compiled // for that subgraph. - std::unordered_map> + std::unordered_map> entries_by_proto_key_ ABSL_GUARDED_BY(mu_); // Map from last_use to entry, used to mark entries for eviction in LRU // order. If an entry's last_use counter is not present as a key in @@ -304,50 +319,6 @@ class TpuCompilationCacheInterface : public ResourceBase { TpuCompilationCacheInterface& operator=(const TpuCompilationCacheInterface&) = delete; }; - -template -Status TpuCompilationCacheInterface::Lookup( - int64 uid, int proto_index, std::unique_ptr* entry) { - entry->reset(); - - profiler::TraceMe proto_lookup_traceme( - "TPU compilation cache proto lookup by uid", - /*level=*/2); - - absl::MutexLock lock(&mu_); - const auto iter = entries_by_uid_.find(uid); - if (iter == entries_by_uid_.end()) { - return errors::NotFound("No subgraph found for uid ", uid); - } - CompiledSubgraph* cache_entry = iter->second; - if (proto_index < 0 || - proto_index >= cache_entry->tpu_program_group->program_count()) { - return errors::NotFound("No proto found for core index ", proto_index, - " in subgraph with uid ", uid); - } - *entry = absl::make_unique(this, cache_entry, proto_index); - return Status::OK(); -} - -template -Status TpuCompilationCacheInterface::Lookup( - const string& proto_key, std::unique_ptr* entry) { - entry->reset(); - - profiler::TraceMe proto_lookup_traceme("TPU compilation cache proto lookup", - /*level=*/2); - - absl::MutexLock lock(&mu_); - const auto iter = entries_by_proto_key_.find(proto_key); - if (iter == entries_by_proto_key_.end()) { - return errors::NotFound("No proto found for key ", proto_key); - } - CompiledSubgraph* cache_entry = iter->second.first; - int proto_index = iter->second.second; - *entry = absl::make_unique(this, cache_entry, proto_index); - return Status::OK(); -} - } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.cc index f30a503d2d2..29864a310d1 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.cc +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.cc @@ -16,70 +16,50 @@ limitations under the License. namespace tensorflow { namespace tpu { -namespace { -class CompilationCacheFetchTargetUtility { - public: - CompilationCacheFetchTargetUtility() - : names_({"Invalid", "Main", "Sharding", "Unsharding"}) {} - - std::string name(CompilationCacheFetchTarget target) const { - return names_[static_cast(target)]; - } - - private: - const std::vector names_; -}; - -std::string GetName(CompilationCacheFetchTarget target) { - static const auto* util = new CompilationCacheFetchTargetUtility(); - return util->name(target); -} - -} // namespace TpuCompilationCacheLocalLookup::TpuCompilationCacheLocalLookup( TpuCompilationCacheInterface* cache) - : cache_(cache) {} + : cache_(cache) { + cache_->Ref(); +} TpuCompilationCacheLocalLookup::~TpuCompilationCacheLocalLookup() { cache_->Unref(); } Status TpuCompilationCacheLocalLookup::Lookup( - const string& proto_key, - std::unique_ptr* entry, + const string& proto_key, std::unique_ptr* entry, CompilationCacheFetchTarget fetch_target) { profiler::TraceMe proto_lookup_traceme("Local TPU proto cache lookup", /*level=*/2); - Status s = cache_->Lookup( - proto_key, entry); + Status s = cache_->Lookup(proto_key, entry); VLOG(1) << "Looked up key " << proto_key << " in local subgraph cache status " << s; if (!s.ok()) { return s; } s = (*entry)->ToSubEntryRef(fetch_target); - - VLOG(1) << "Fetched subentry: " << GetName(fetch_target) << " with status " + VLOG(1) << "Fetched subentry: " + << CompilationCacheFetchTarget_Name(fetch_target) << " with status " << s; return s; } Status TpuCompilationCacheLocalLookup::Lookup( int64 uid, int proto_index, - std::unique_ptr* entry, + std::unique_ptr* entry, CompilationCacheFetchTarget fetch_target) { profiler::TraceMe proto_lookup_traceme("Local TPU proto cache lookup by uid", /*level=*/2); - Status s = cache_->Lookup( - uid, proto_index, entry); + Status s = cache_->Lookup(uid, proto_index, entry); VLOG(1) << "Looked up uid " << uid << ", index " << proto_index << " in local subgraph cache status " << s; if (!s.ok()) { return s; } s = (*entry)->ToSubEntryRef(fetch_target); - VLOG(1) << "Fetched subentry: " << GetName(fetch_target) << " with status " + VLOG(1) << "Fetched subentry: " + << CompilationCacheFetchTarget_Name(fetch_target) << " with status " << s; return s; } @@ -87,6 +67,5 @@ Status TpuCompilationCacheLocalLookup::Lookup( string TpuCompilationCacheLocalLookup::DebugString() const { return "TpuCompilationCacheLocalLookup"; } - } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.h index eb5aadcd3e2..8db4c11ebea 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.h @@ -28,24 +28,17 @@ namespace tpu { // Class for looking up TPU programs when the execute and compile Op are in the // same address space. The proto is simply looked up in the compilation cache, // without any serialization taking place. -class TpuCompilationCacheLocalLookup - : public TpuCompilationCacheLookup< - CompilationCacheEntryRef> { +class TpuCompilationCacheLocalLookup : public TpuCompilationCacheLookup { public: - using TpuCompilationCacheEntryRef = - ::tensorflow::tpu::CompilationCacheEntryRef; - using EntryRefImpl = - ::tensorflow::tpu::TpuCompilationCacheExternal::EntryRefImpl; - explicit TpuCompilationCacheLocalLookup(TpuCompilationCacheInterface* cache); ~TpuCompilationCacheLocalLookup() override; Status Lookup(const string& proto_key, - std::unique_ptr* entry, + std::unique_ptr* entry, CompilationCacheFetchTarget fetch_target) override; Status Lookup(int64 uid, int proto_index, - std::unique_ptr* entry, + std::unique_ptr* entry, CompilationCacheFetchTarget fetch_target) override; string DebugString() const override; diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h index 0d1a53d31d2..ab476322a8a 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h @@ -23,10 +23,11 @@ limitations under the License. namespace tensorflow { namespace tpu { +// TODO(b/162241759): consider merging TpuCompilationCacheLookup and +// TpuCompilationCacheInterface. // Base class allowing Execute Ops to look up TPU programs. Different subclasses // are used when the execute Op is in the same address space as the compile Op, // and when they need to communicate over RPC. -template class TpuCompilationCacheLookup : public ResourceBase { public: ~TpuCompilationCacheLookup() override = default; @@ -43,12 +44,11 @@ class TpuCompilationCacheLookup : public ResourceBase { // fetch_target requests one of them, then after this call // (*entry)->get().get_executable() will return nullptr. virtual Status Lookup(const string& proto_key, - std::unique_ptr* entry, + std::unique_ptr* entry, CompilationCacheFetchTarget fetch_target) = 0; - virtual Status Lookup( - const string& proto_key, - std::unique_ptr* entry) { + virtual Status Lookup(const string& proto_key, + std::unique_ptr* entry) { return Lookup(proto_key, std::move(entry), CompilationCacheFetchTarget::MAIN); } @@ -58,17 +58,15 @@ class TpuCompilationCacheLookup : public ResourceBase { // returned in program. The wrapper is guaranteed to be valid only during the // execution of the Op requesting the proto. virtual Status Lookup(int64 uid, int proto_index, - std::unique_ptr* entry, + std::unique_ptr* entry, CompilationCacheFetchTarget fetch_target) = 0; - virtual Status Lookup( - int64 uid, int proto_index, - std::unique_ptr* entry) { + virtual Status Lookup(int64 uid, int proto_index, + std::unique_ptr* entry) { return Lookup(uid, proto_index, std::move(entry), CompilationCacheFetchTarget::MAIN); } }; - } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_execute_op.cc b/tensorflow/core/tpu/kernels/tpu_execute_op.cc index 51c9dd481a3..0f451e52242 100644 --- a/tensorflow/core/tpu/kernels/tpu_execute_op.cc +++ b/tensorflow/core/tpu/kernels/tpu_execute_op.cc @@ -40,10 +40,12 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/casts.h" #include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/profiler/lib/traceme.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h" #include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" @@ -56,14 +58,10 @@ limitations under the License. #include "tensorflow/stream_executor/tpu/tpu_node_context.h" namespace tensorflow { - namespace { - +using ::tensorflow::tpu::CompilationCacheEntryRef; +using ::tensorflow::tpu::TpuCompilationCacheLookup; using ::tensorflow::tpu::TpuNodeContext; -using CompilationCacheEntryRef = ::tensorflow::tpu::CompilationCacheEntryRef< - ::tensorflow::tpu::TpuCompilationCacheEntry>; -using TpuCompilationCacheLookup = - ::tensorflow::tpu::TpuCompilationCacheLookup; // Looks up the input `key` in the compilation cache, populating // `*rendezvous_key_base` and `*entry`. @@ -641,28 +639,34 @@ Status TPUExecuteOp::DoWork(OpKernelContext* context) { profiler::TraceMe trace_me_init("TPUExecuteOp::Init", /*level=*/2); string rendezvous_key_base; - std::unique_ptr entry; + std::unique_ptr entry_ref; TF_RETURN_IF_ERROR( - GetComputationCacheEntry(context, &rendezvous_key_base, &entry)); + GetComputationCacheEntry(context, &rendezvous_key_base, &entry_ref)); // Shapes of the inputs and outputs, in xla::Shape form. - const TPUExecutableInfoProto* proto = entry->get().get_executable_info(); + tpu::TpuCompilationCacheEntry entry = entry_ref->get(); + const tpu::TpuProgramGroup* tpu_program_group = + tensorflow::down_cast( + entry.tpu_program_group()); + CHECK_NE(tpu_program_group, nullptr); + const TPUExecutableInfoProto& executable = + tpu_program_group->executable_info(); xla::Backend* const backend = node_context->backend(); xla::TransferManager* const transfer_manager = backend->transfer_manager(); TF_RET_CHECK(context->op_device_context()); se::Stream* stream = context->op_device_context()->stream(); - TF_RET_CHECK(proto->input_shapes_size() == 1); + TF_RET_CHECK(executable.input_shapes_size() == 1); - xla::Shape host_shape(proto->input_shapes(0)); + xla::Shape host_shape(executable.input_shapes(0)); TF_ASSIGN_OR_RETURN( auto variable_update_map, - BuildVariableUpdateMap(proto->variable_indices(), + BuildVariableUpdateMap(executable.variable_indices(), fused_device_var_reads_in_computation_inputs_, fused_device_var_updates_in_computation_outputs_, - proto->output_tensor_shapes().size())); + executable.output_tensor_shapes().size())); TF_ASSIGN_OR_RETURN( std::unique_ptr input_buffers, BuildComputationInputs(context, host_shape, variable_update_map, backend, @@ -697,8 +701,9 @@ Status TPUExecuteOp::DoWork(OpKernelContext* context) { // Snapshot the inputs, if a snapshot was requested. std::shared_ptr hlo_snapshot; - if (proto->has_session_module()) { - hlo_snapshot = std::make_shared(proto->session_module()); + if (executable.has_session_module()) { + hlo_snapshot = + std::make_shared(executable.session_module()); auto literal = std::make_shared(shaped_buffer.on_host_shape()); transfer_manager->TransferLiteralFromDevice( @@ -723,9 +728,9 @@ Status TPUExecuteOp::DoWork(OpKernelContext* context) { const uint32 rng_seed = GetXLARandomSeed(); std::unique_ptr device_assignment; - if (proto->has_device_assignment()) { + if (executable.has_device_assignment()) { TF_ASSIGN_OR_RETURN(device_assignment, xla::DeviceAssignment::Deserialize( - proto->device_assignment())); + executable.device_assignment())); } VLOG(4) << "Input buffers after alias resolution: " @@ -743,24 +748,25 @@ Status TPUExecuteOp::DoWork(OpKernelContext* context) { // we free a memory and reassign it to other users while a program is running, // all subsequent writes to the program that could possibly clobber the memory // will depend on the program to finish. - const TPUHostTransferInfoProto* host_transfer_info = - entry->get().get_host_transfer_info(); - const xla::HloProto* hlo_metadata = entry->get().get_hlo_metadata(); + const TPUHostTransferInfoProto& host_transfer_info = + tpu_program_group->host_transfer_info(); + const int core_index = entry.core_index(); TF_ASSIGN_OR_RETURN( xla::ExecutionOutput output, - TPUExecute(*proto, *host_transfer_info, *hlo_metadata, std::move(input), + TPUExecute(executable, host_transfer_info, + *tpu_program_group->hlo_metadata(core_index), std::move(input), rendezvous_key_base, rng_seed, node_context.get(), device_assignment.get(), context->cancellation_manager(), context, stream, transfer_stream_ptr.get(), - entry->get().get_tpu_program())); + tpu_program_group->tpu_program(core_index))); stream->ThenRecordEvent(definition_event.get()); TF_ASSIGN_OR_RETURN( std::unique_ptr output_buffers, - AllocateOutputTensors(context, output.ConsumeResult(), - proto->output_tensor_shapes(), variable_update_map, - node_context.get(), stream, device_ordinal, - input_buffers.get(), definition_event)); + AllocateOutputTensors( + context, output.ConsumeResult(), executable.output_tensor_shapes(), + variable_update_map, node_context.get(), stream, device_ordinal, + input_buffers.get(), definition_event)); // Transfer the outputs and save the snapshot to disk. if (hlo_snapshot) { diff --git a/tensorflow/core/tpu/kernels/tpu_program_group.cc b/tensorflow/core/tpu/kernels/tpu_program_group.cc index e22175af270..27b699e1acd 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_group.cc +++ b/tensorflow/core/tpu/kernels/tpu_program_group.cc @@ -248,6 +248,12 @@ absl::Span TpuProgramGroup::hlo_metadatas() const { return hlo_metadatas_ptrs_; } +const xla::HloProto* TpuProgramGroup::hlo_metadata(int index) const { + CHECK_GE(index, 0); + CHECK_LT(index, hlo_metadatas_ptrs_.size()); + return hlo_metadatas_ptrs_[index]; +} + void TpuProgramGroup::RefreshHloMetadatasPtrs() { hlo_metadatas_ptrs_.reserve(hlo_metadatas_.size()); for (const auto& hlo_metadata_internal_ : hlo_metadatas_) { @@ -262,6 +268,61 @@ Status TpuProgramGroup::LogCompilationStats(const TpuCompilationCacheKey& key, return Status::OK(); } +const std::vector& TpuProgramGroup::may_modify_variables() const { + return may_modify_variables_; +} + +void TpuProgramGroup::set_may_modify_variables( + const std::vector& may_modify_variables) { + may_modify_variables_ = may_modify_variables; +} + +const tf2xla::HostComputeMetadata& TpuProgramGroup::host_compute_metadata() + const { + return host_compute_metadata_; +} + +void TpuProgramGroup::set_host_compute_metadata( + const tf2xla::HostComputeMetadata& host_compute_metadata) { + host_compute_metadata_ = host_compute_metadata; +} + +const std::vector& TpuProgramGroup::tpu_programs() const { + return tpu_programs_; +} + +const XLA_TpuProgram* TpuProgramGroup::tpu_program(int index) const { + CHECK_GE(index, 0); + CHECK_LT(index, tpu_programs_.size()); + return tpu_programs_[index]; +} + +void TpuProgramGroup::set_tpu_programs( + absl::Span tpu_programs) { + tpu_programs_.resize(tpu_programs.size()); + for (size_t i = 0; i < tpu_programs.size(); ++i) { + tpu_programs_[i] = tpu_programs[i]; + } +} + +const TPUExecutableInfoProto& TpuProgramGroup::executable_info() const { + return executable_info_; +} + +void TpuProgramGroup::set_executable_info( + const TPUExecutableInfoProto& executable_info) { + executable_info_ = executable_info; +} + +const TPUHostTransferInfoProto& TpuProgramGroup::host_transfer_info() const { + return host_transfer_info_; +} + +void TpuProgramGroup::set_host_transfer_info( + const TPUHostTransferInfoProto& host_transfer_info) { + host_transfer_info_ = host_transfer_info; +} + /*static*/ Status TpuProgramGroup::CompileAndBuild( const TpuCompilationRequestProto& compilation_request, diff --git a/tensorflow/core/tpu/kernels/tpu_program_group.h b/tensorflow/core/tpu/kernels/tpu_program_group.h index 4bc8cdd003a..5a36fa4e78d 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_group.h +++ b/tensorflow/core/tpu/kernels/tpu_program_group.h @@ -117,47 +117,26 @@ class TpuProgramGroup : public TpuProgramGroupInterface { Status LogCompilationStats(const TpuCompilationCacheKey& key, absl::Duration duration) override; - const std::vector& may_modify_variables() const override { - return may_modify_variables_; - } - void set_may_modify_variables(const std::vector& may_modify_variables) { - may_modify_variables_ = may_modify_variables; - } + const std::vector& may_modify_variables() const override; + void set_may_modify_variables(const std::vector& may_modify_variables); - const tf2xla::HostComputeMetadata& host_compute_metadata() const { - return host_compute_metadata_; - } + const tf2xla::HostComputeMetadata& host_compute_metadata() const; void set_host_compute_metadata( - const tf2xla::HostComputeMetadata& host_compute_metadata) { - host_compute_metadata_ = host_compute_metadata; - } + const tf2xla::HostComputeMetadata& host_compute_metadata); - const std::vector& tpu_programs() const { - return tpu_programs_; - } - void set_tpu_programs(absl::Span tpu_programs) { - tpu_programs_.resize(tpu_programs.size()); - for (size_t i = 0; i < tpu_programs.size(); ++i) { - tpu_programs_[i] = tpu_programs[i]; - } - } + const std::vector& tpu_programs() const; + const XLA_TpuProgram* tpu_program(int index) const; + void set_tpu_programs(absl::Span tpu_programs); - const TPUExecutableInfoProto& executable_info() const { - return executable_info_; - } - void set_executable_info(const TPUExecutableInfoProto& executable_info) { - executable_info_ = executable_info; - } + const TPUExecutableInfoProto& executable_info() const; + void set_executable_info(const TPUExecutableInfoProto& executable_info); - const TPUHostTransferInfoProto& host_transfer_info() const { - return host_transfer_info_; - } + const TPUHostTransferInfoProto& host_transfer_info() const; void set_host_transfer_info( - const TPUHostTransferInfoProto& host_transfer_info) { - host_transfer_info_ = host_transfer_info; - } + const TPUHostTransferInfoProto& host_transfer_info); void set_hlo_metadata(const xla::HloProto& hlo_metadata); + const xla::HloProto* hlo_metadata(int index) const; absl::Span hlo_metadatas() const override; private: From 95b8235761baa6abc876fc4658488ab57cf64d5e Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 20:56:10 -0400 Subject: [PATCH 1558/2522] Update gradients.cc --- tensorflow/cc/framework/gradients.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/cc/framework/gradients.cc b/tensorflow/cc/framework/gradients.cc index 4229c356eff..3195a357186 100644 --- a/tensorflow/cc/framework/gradients.cc +++ b/tensorflow/cc/framework/gradients.cc @@ -524,9 +524,8 @@ Status SymbolicGradientBuilder::AddGradients() { // make this association explicit. for (const Edge* e : n->in_edges()) { if (e->IsControlEdge()) continue; - int dx_index = e->dst_input(); - const int dx_size = dx.size(); - if (dx_index >= dx_size) { + size_t dx_index = e->dst_input(); + if (dx_index >= dx.size()) { return errors::Internal( "Invalid gradient output index: ", dx_index, " size: ", dx.size()); } From 5bbedc7ec0310e0df19978401efe36b381ba4712 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 20:58:19 -0400 Subject: [PATCH 1559/2522] Update tensorflow/core/profiler/convert/xplane_to_memory_profile.cc Co-authored-by: Mihai Maruseac --- tensorflow/core/profiler/convert/xplane_to_memory_profile.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc b/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc index e8783a71709..cc1da272851 100644 --- a/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc +++ b/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc @@ -413,8 +413,8 @@ void ProcessActiveAllocations(int64 peak_bytes_profile_step_id, allocation->set_special_index(-1); } allocation->set_num_occurrences(1); - const int active_allocs_size = active_allocs.size() - 1; - while (i < active_allocs_size && + const int last_alloc = active_allocs.size() - 1; + while (i < last_alloc && active_allocs[i] == active_allocs[i + 1]) { allocation->set_num_occurrences(allocation->num_occurrences() + 1); i++; From 3fb90eb802523cb6a878308e904e3c0535db9691 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 20:59:05 -0400 Subject: [PATCH 1560/2522] Update tensorflow/core/profiler/internal/tfprof_code.cc Co-authored-by: Mihai Maruseac --- tensorflow/core/profiler/internal/tfprof_code.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/profiler/internal/tfprof_code.cc b/tensorflow/core/profiler/internal/tfprof_code.cc index c37efcf222c..eb3501bc07b 100644 --- a/tensorflow/core/profiler/internal/tfprof_code.cc +++ b/tensorflow/core/profiler/internal/tfprof_code.cc @@ -452,8 +452,8 @@ void TFCode::Build() { GetTraceString(fn->call_stack()->traces().at(i)) + kGradientSuffix; pre_code_node = pre_code_node->AddChildren( trace, &fn->call_stack()->traces().at(i), kGradientSuffix); - const int64 max_stack_trace_allowed_size = fn->call_stack()->traces().size() - 1; - if (i == max_stack_trace_allowed_size) { + const int64 last_trace = fn->call_stack()->traces().size() - 1; + if (i == last_trace) { leaf = pre_code_node; } } From fc7228cdd403268d43f0c9fcb02d5350d33e3ad6 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Tue, 28 Jul 2020 17:52:45 -0700 Subject: [PATCH 1561/2522] [XLA] Change XLA_LOG_LINES to XLA_VLOG_LINES PiperOrigin-RevId: 323691228 Change-Id: I3e2c6da00e00dbf0b7513aa0750aa9eb061b6946 --- tensorflow/compiler/xla/service/copy_insertion.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc index 6d1f81ba896..b88120d8128 100644 --- a/tensorflow/compiler/xla/service/copy_insertion.cc +++ b/tensorflow/compiler/xla/service/copy_insertion.cc @@ -1158,7 +1158,7 @@ static int64 GetNumExistingCopies(const HloModule* module) { Status CopyInsertion::RemoveUnnecessaryCopies(const HloOrdering& ordering, HloModule* module) { - XLA_LOG_LINES(4, module->ToString()); + XLA_VLOG_LINES(4, module->ToString()); TF_ASSIGN_OR_RETURN(std::unique_ptr alias_analysis, HloAliasAnalysis::Run(module, can_share_buffer_)); From 5d14e877b9703c1cd9df8f7011ba7526754be62d Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 20:59:14 -0400 Subject: [PATCH 1562/2522] Update tensorflow/core/util/padding.cc Co-authored-by: Mihai Maruseac --- tensorflow/core/util/padding.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/util/padding.cc b/tensorflow/core/util/padding.cc index 6ffbc8040ed..3948f1af9b3 100644 --- a/tensorflow/core/util/padding.cc +++ b/tensorflow/core/util/padding.cc @@ -37,8 +37,8 @@ Status CheckValidPadding(Padding padding_type, const std::vector& explicit_paddings, int num_dims, TensorFormat data_format) { if (padding_type == Padding::EXPLICIT) { - const int explicit_paddings_size = explicit_paddings.size(); - if (explicit_paddings_size != 2 * num_dims) { + const int num_paddings = explicit_paddings.size(); + if (num_paddings != 2 * num_dims) { return errors::InvalidArgument( "explicit_paddings attribute must contain ", 2 * num_dims, " values, but got: ", explicit_paddings.size()); From 774441f2b9c8b5a6688bf552dbd8a5faf985dc8d Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 21:05:23 -0400 Subject: [PATCH 1563/2522] Update op_stats_to_overview_page.cc --- tensorflow/core/profiler/convert/op_stats_to_overview_page.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc index cd0f10543df..196262e7d96 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc @@ -287,10 +287,10 @@ std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db) { // Sorts candidates in descending order of expensive_call_percent. absl::c_sort(candidates, cmp); std::string expensive_functions = ""; - const int64 num_functions_shown = std::min( + auto num_functions_shown = std::min( static_cast(3), candidates.size()); - for (int64 i = 0; i < num_functions_shown; i++) { + for (int64 i = 0, end = num_functions_shown; i < end; i++) { if (i > 0) absl::StrAppend(&expensive_functions, ", "); absl::StrAppend(&expensive_functions, "\"", candidates[i].function_name, "\""); From 5bb7ef8fb94d07d2a6ca8d09cb43bda4f3846e9d Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Tue, 28 Jul 2020 18:02:42 -0700 Subject: [PATCH 1564/2522] Update CompileGraphToXlaHlo to support resource XlaArguments and populate XlaCompilationResult.resource_updates instead of XlaCompilationResult.outputs for resource writes. This is necessary to enable MLIR support when compiling tf.functions with XLA that use resources. PiperOrigin-RevId: 323692573 Change-Id: If3af722e5beb98dfd8260a4cb2df8db8a2a550ff --- tensorflow/compiler/mlir/tensorflow/BUILD | 4 + .../tensorflow/utils/compile_mlir_util.cc | 140 +++++++++++++----- .../mlir/tensorflow/utils/compile_mlir_util.h | 1 + .../utils/compile_mlir_util_test.cc | 61 +++++++- 4 files changed, 166 insertions(+), 40 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index fe1f47d8d69..8dcaf23a6b3 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -1459,6 +1459,7 @@ COMPILE_MLIR_UTIL_DEPS = [ ":mlir_roundtrip_flags", ":tensorflow", ":tensorflow_dialect_registration", + ":tensorflow_types", ":tensorflow_passes", ":translate_utils", "@com_google_absl//absl/types:optional", @@ -1520,6 +1521,9 @@ tf_cc_test( srcs = ["utils/compile_mlir_util_test.cc"], deps = [ ":compile_mlir_util", + "//tensorflow/cc:function_ops", + "//tensorflow/cc:resource_variable_ops", + "//tensorflow/cc:scope", "//tensorflow/compiler/jit", "//tensorflow/compiler/tf2xla:common", "//tensorflow/compiler/tf2xla:xla_compiler", diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc index 9d6cc8809ff..e27302071fa 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc @@ -17,11 +17,14 @@ limitations under the License. #include "absl/types/optional.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" #include "mlir/Dialect/Shape/IR/Shape.h" // from @llvm-project #include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Dialect.h" // from @llvm-project #include "mlir/IR/Function.h" // from @llvm-project #include "mlir/IR/Location.h" // from @llvm-project @@ -36,6 +39,7 @@ limitations under the License. #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/passes.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.h" #include "tensorflow/compiler/mlir/tensorflow/translate/import_model.h" @@ -52,6 +56,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/xla/service/hlo_sharding.h" #include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/platform/logging.h" namespace tensorflow { @@ -79,9 +84,15 @@ Status ParseMlirModule(llvm::StringRef mlir_module_string, return Status::OK(); } +// Arguments to a computation can be either a tensor or resource. +struct TensorOrResourceShape { + TensorShape shape; + bool is_resource = false; +}; + // Converts arg_shapes to xla::Shape's and store into xla_input_shapes. Status GetXlaInputShapes( - mlir::ModuleOp module, llvm::ArrayRef arg_shapes, + mlir::ModuleOp module, llvm::ArrayRef arg_shapes, bool use_tuple_args, const XlaHelpers::ShapeRepresentationFn shape_representation_fn, std::vector* xla_input_shapes) { @@ -103,7 +114,7 @@ Status GetXlaInputShapes( DataType dtype; TF_RETURN_IF_ERROR(ConvertToDataType(func_type.getInput(i), &dtype)); TF_ASSIGN_OR_RETURN(xla_shape, - shape_representation_fn(arg_shapes[i], dtype, + shape_representation_fn(arg_shapes[i].shape, dtype, /*use_fast_memory=*/false)); // Rewrite layout with sharding, if sharding is set. @@ -132,11 +143,13 @@ Status GetXlaInputShapes( } // Calculates computation output shape and build OutputDescription for each -// output based on static shapes in MLIR module +// output based on static shapes in MLIR module. If an output is a resource +// write, `resource_updates` is populated insead of `outputs` for that output. Status GetOutputInfo( mlir::ModuleOp module, const XlaHelpers::ShapeRepresentationFn shape_representation_fn, - xla::Shape* xla_output_shape, std::vector* outputs) { + xla::Shape* xla_output_shape, std::vector* outputs, + std::vector* resource_updates) { auto shape_representation_fn_no_fast_memory = [shape_representation_fn](const TensorShape& shape, DataType dtype) { return shape_representation_fn(shape, dtype, /*use_fast_memory=*/false); @@ -147,17 +160,37 @@ Status GetOutputInfo( outputs->clear(); outputs->reserve(func_type.getNumResults()); + resource_updates->reserve(func_type.getNumResults()); std::vector shapes; shapes.reserve(func_type.getNumResults()); - for (mlir::Type type : func_type.getResults()) { + llvm::SmallDenseMap resource_arg_to_write; + for (unsigned i = 0; i < main_func.getNumArguments(); ++i) + if (auto aliasing_output = main_func.getArgAttrOfType( + i, "tf.aliasing_output")) + resource_arg_to_write.insert({aliasing_output.getInt(), i}); + + for (auto type_and_idx : llvm::enumerate(func_type.getResults())) { TF_ASSIGN_OR_RETURN( xla::Shape shape, - xla::TypeToShape(type, shape_representation_fn_no_fast_memory)); - auto tensor_type = type.dyn_cast(); + xla::TypeToShape(type_and_idx.value(), + shape_representation_fn_no_fast_memory)); + auto tensor_type = type_and_idx.value().dyn_cast(); shapes.push_back(shape); + auto it = resource_arg_to_write.find(type_and_idx.index()); + if (it != resource_arg_to_write.end()) { + // Add resource write. + resource_updates->emplace_back(); + XlaResourceUpdate& resource_update = resource_updates->back(); + resource_update.input_index = it->getSecond(); + resource_update.modified = true; + TF_RETURN_IF_ERROR(ConvertToDataType(tensor_type, &resource_update.type)); + TF_RETURN_IF_ERROR(XLAShapeToTensorShape(shape, &resource_update.shape)); + continue; + } + // Construct OutputDescription for result. outputs->emplace_back(); XlaOutputDescription& out_desc = outputs->back(); @@ -180,14 +213,6 @@ Status GetOutputInfo( return Status::OK(); } -// Gets information about how computation updates Tensorflow resources. -// TODO(ycao): Implement logic to compute resource updates when we need to -// support graphs with resource updates in MLIR-based TF compiler bridge. -void GetResourceUpdatesForMlir( - std::vector* resource_updates) { - resource_updates->clear(); -} - // Creates a vector that maps from the parameters of the XLA computation to // their original argument positions. // MLIR-based TF-Compiler bridge doesn't have constant analysis yet, thus no @@ -201,7 +226,7 @@ void GetInputMappingForMlir(int num_inputs, std::vector* input_mapping) { } // Refine MLIR types based on new shape information. -Status RefineShapes(llvm::ArrayRef arg_shapes, +Status RefineShapes(llvm::ArrayRef arg_shapes, mlir::ModuleOp module) { auto producer_or = GetTfGraphProducerVersion(module); if (!producer_or.ok()) return producer_or.status(); @@ -212,15 +237,20 @@ Status RefineShapes(llvm::ArrayRef arg_shapes, { // Convert arg_shapes to a mlir friendly format. size_t count = 0; - for (const TensorShape& shape : arg_shapes) { - count += shape.dims(); + for (const TensorOrResourceShape& tensor_resource_shape : arg_shapes) { + if (tensor_resource_shape.is_resource) continue; + count += tensor_resource_shape.shape.dims(); } shape_backing.resize(count); arg_shapes_copy.reserve(arg_shapes.size()); size_t offset = 0; - for (const TensorShape& shape : arg_shapes) { + for (const TensorOrResourceShape& tensor_resource_shape : arg_shapes) { + if (tensor_resource_shape.is_resource) { + arg_shapes_copy.push_back(llvm::ArrayRef()); + continue; + } size_t start = offset; - for (tensorflow::TensorShapeDim dim : shape) { + for (tensorflow::TensorShapeDim dim : tensor_resource_shape.shape) { shape_backing[offset] = dim.size; ++offset; } @@ -338,7 +368,7 @@ Status ConvertMLIRToXlaComputation( } static Status CompileMlirToXlaHlo( - mlir::ModuleOp module_op, llvm::ArrayRef arg_shapes, + mlir::ModuleOp module_op, llvm::ArrayRef arg_shapes, llvm::StringRef device_type, bool use_tuple_args, XlaHelpers::ShapeRepresentationFn shape_representation_fn, XlaCompilationResult* compilation_result, @@ -372,14 +402,10 @@ static Status CompileMlirToXlaHlo( shape_representation_fn, &compilation_result->xla_input_shapes)); - // Compute all output descriptions. - TF_RETURN_IF_ERROR(GetOutputInfo(module_op, shape_representation_fn, - &compilation_result->xla_output_shape, - &compilation_result->outputs)); - - // Compute what resource variables need to be updated after XlaComputation's - // execution. - GetResourceUpdatesForMlir(&compilation_result->resource_updates); + // Compute all output descriptions and resource writes + TF_RETURN_IF_ERROR(GetOutputInfo( + module_op, shape_representation_fn, &compilation_result->xla_output_shape, + &compilation_result->outputs, &compilation_result->resource_updates)); if (VLOG_IS_ON(1)) tensorflow::DumpMlirOpToFile("mlir_compile_after", module_op); @@ -399,26 +425,51 @@ Status CompileSerializedMlirToXlaHlo( TF_RETURN_IF_ERROR( ParseMlirModule(mlir_module_string, &mlir_context, &mlir_module)); - return CompileMlirToXlaHlo(mlir_module.get(), arg_shapes, device_type, - use_tuple_args, shape_representation_fn, - compilation_result, + llvm::SmallVector tensor_or_resource_shapes; + tensor_or_resource_shapes.reserve(arg_shapes.size()); + for (const auto& arg_shape : arg_shapes) + tensor_or_resource_shapes.push_back({arg_shape}); + return CompileMlirToXlaHlo(mlir_module.get(), tensor_or_resource_shapes, + device_type, use_tuple_args, + shape_representation_fn, compilation_result, std::move(custom_legalization_passes)); } // Rewrites the given module with specified args. For each of the constant args, // it gets inlined in the "main' function and the corresponding argument is -// removed from the signature. +// removed from the signature. For resource args, their subtypes are populated. // Returns the original indices for the other arguments on success. static StatusOr> RewriteWithArgs( mlir::ModuleOp module, llvm::ArrayRef args) { mlir::FuncOp main_fn = module.lookupSymbol("main"); std::vector params; + bool has_resource_args = false; auto builder = mlir::OpBuilder(main_fn.getBody()); std::vector args_to_erase; for (int idx = 0; idx < args.size(); idx++) { const XlaArgument& xla_arg = args[idx]; mlir::BlockArgument mlir_arg = main_fn.getArgument(idx); + if (xla_arg.kind == XlaArgument::kResource) { + mlir::Type element_type; + TF_RETURN_IF_ERROR(ConvertDataType(xla_arg.type, builder, &element_type)); + auto resource_shape = absl::get(xla_arg.shape).dim_sizes(); + llvm::SmallVector resource_subtype_shape( + resource_shape.begin(), resource_shape.end()); + auto resource_subtype = + mlir::RankedTensorType::get(resource_subtype_shape, element_type); + auto resource_type = + mlir::TF::ResourceType::get({resource_subtype}, builder.getContext()); + + auto tensor_type = mlir_arg.getType().cast(); + if (tensor_type.hasRank()) { + mlir_arg.setType( + mlir::RankedTensorType::get(tensor_type.getShape(), resource_type)); + } else { + mlir_arg.setType(mlir::UnrankedTensorType::get(resource_type)); + } + has_resource_args = true; + } if (xla_arg.kind != XlaArgument::kConstant) { params.push_back(idx); continue; @@ -433,7 +484,19 @@ static StatusOr> RewriteWithArgs( args_to_erase.push_back(idx); } + if (has_resource_args) { + llvm::SmallVector updated_argument_types; + updated_argument_types.reserve(main_fn.getNumArguments()); + for (mlir::BlockArgument& arg : main_fn.getArguments()) + updated_argument_types.push_back(arg.getType()); + + main_fn.setType(mlir::FunctionType::get(updated_argument_types, + main_fn.getType().getResults(), + main_fn.getContext())); + } + for (int idx : llvm::reverse(args_to_erase)) main_fn.eraseArgument(idx); + return params; } @@ -456,10 +519,13 @@ Status CompileGraphToXlaHlo( mlir::ModuleOp module = module_or.ValueOrDie().get(); TF_ASSIGN_OR_RETURN(std::vector remaining_params, RewriteWithArgs(module, {args.data(), args.size()})); - llvm::SmallVector arg_shapes; - arg_shapes.reserve(args.size()); - for (unsigned idx : remaining_params) - arg_shapes.push_back(absl::get(args[idx].shape)); + llvm::SmallVector arg_shapes; + arg_shapes.reserve(remaining_params.size()); + for (unsigned idx : remaining_params) { + const auto& arg = args[idx]; + arg_shapes.push_back({absl::get(arg.shape), + /*is_resource=*/arg.kind == XlaArgument::kResource}); + } mlir::PassManager pm(&context); mlir::TF::StandardPipelineOptions tf_options; diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h index 719a96f52d4..5c64a65ecbd 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h +++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h @@ -73,6 +73,7 @@ Status CompileSerializedMlirToXlaHlo( std::vector> custom_legalization_passes = {}); // Same as the above but takes input as TensorFlow Graph. +// TODO(lyandy): Allow populating of targets/control outputs. Status CompileGraphToXlaHlo( const Graph& graph, llvm::ArrayRef args, llvm::StringRef device_type, bool use_tuple_args, diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc index dde2408c83a..6ebf6897bb1 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc @@ -15,6 +15,9 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h" +#include "tensorflow/cc/framework/scope.h" +#include "tensorflow/cc/ops/function_ops.h" +#include "tensorflow/cc/ops/resource_variable_ops.h" #include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/tf2xla/xla_compiler.h" #include "tensorflow/compiler/xla/service/hlo_module.h" @@ -448,9 +451,6 @@ TEST(CompileGraphToXlaHlo, Basic) { FunctionLibraryDefinition flib_def(OpRegistry::Global(), {}); Graph graph(OpRegistry::Global()); - Tensor dummy_tensor(DT_FLOAT, TensorShape({1})); - test::FillValues(&dummy_tensor, {-1.0}); - Node* arg = test::graph::Arg(&graph, 0, DT_FLOAT); test::graph::Retval(&graph, 0, arg); @@ -483,5 +483,60 @@ ENTRY %main.3 (Arg_0.1: f32[]) -> (f32[]) { status_or_hlo_module.ValueOrDie()->ToString()); } +// Tests a conversion from Graph to MLIR with resource arguments. +TEST(CompileGraphToXlaHlo, Resources) { + FunctionLibraryDefinition flib_def(OpRegistry::Global(), {}); + Graph graph(OpRegistry::Global()); + + Scope scope = Scope::NewRootScope().ExitOnError(); + auto val = ops::_Arg(scope.WithOpName("arg0"), DT_FLOAT, 0); + auto var = ops::_Arg(scope.WithOpName("arg1"), DT_RESOURCE, 1); + auto assign = + ops::AssignVariableOp(scope.WithOpName("assign_variable"), var, val); + TF_ASSERT_OK(scope.ToGraph(&graph)); + + XlaCompiler::CompilationResult result; + XlaCompiler::Argument arg0; + arg0.kind = XlaCompiler::Argument::kParameter; + arg0.shape = TensorShape({2}); + XlaCompiler::Argument arg1; + arg1.kind = XlaCompiler::Argument::kResource; + arg1.shape = TensorShape({2}); + arg1.type = DT_FLOAT; + + TF_ASSERT_OK( + CompileGraphToXlaHlo(graph, /*args=*/{arg0, arg1}, "XLA_CPU_JIT", + /*use_tuple_args=*/false, flib_def, GraphDebugInfo(), + /*shape_representation_fn=*/nullptr, &result)); + + EXPECT_EQ(result.outputs.size(), 0); + ASSERT_EQ(result.resource_updates.size(), 1); + const auto& resource_update = result.resource_updates[0]; + EXPECT_EQ(resource_update.input_index, 1); + EXPECT_EQ(resource_update.modified, true); + EXPECT_EQ(resource_update.shape, TensorShape({2})); + EXPECT_EQ(resource_update.type, DT_FLOAT); + + const xla::HloModuleConfig module_config( + result.computation->GetProgramShape().ValueOrDie()); + auto status_or_hlo_module = xla::HloModule::CreateFromProto( + result.computation->proto(), module_config); + ASSERT_TRUE(status_or_hlo_module.ok()); + + constexpr char expected_hlo_module_string[] = + R"(HloModule main.4, input_output_alias={ {0}: 1 } + +ENTRY %main.4 (Arg_0.1: f32[2], Arg_1.2: f32[2]) -> (f32[2]) { + %Arg_1.2 = f32[2]{0} parameter(1) + %Arg_0.1 = f32[2]{0} parameter(0) + ROOT %tuple.3 = (f32[2]{0}) tuple(f32[2]{0} %Arg_0.1) +} + +)"; + + EXPECT_EQ(expected_hlo_module_string, + status_or_hlo_module.ValueOrDie()->ToString()); +} + } // namespace } // namespace tensorflow From d2ce989ee65ce40d8cba8e446eaf64f8a5105adf Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 21:07:38 -0400 Subject: [PATCH 1565/2522] Update tensorflow/core/kernels/data/experimental/snapshot_util.cc Co-authored-by: Mihai Maruseac --- tensorflow/core/kernels/data/experimental/snapshot_util.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/data/experimental/snapshot_util.cc b/tensorflow/core/kernels/data/experimental/snapshot_util.cc index c063217b8e0..4c5ab7f9e70 100644 --- a/tensorflow/core/kernels/data/experimental/snapshot_util.cc +++ b/tensorflow/core/kernels/data/experimental/snapshot_util.cc @@ -514,8 +514,8 @@ class Reader::NestedDataset : public DatasetBase { Status GetNextInternal(IteratorContext* ctx, std::vector* out_tensors, bool* end_of_sequence) override { - const int64 dataset_datasets_size = dataset()->datasets_.size(); - *end_of_sequence = dataset_datasets_size == index_; + const int64 num_datasets = dataset()->datasets_.size(); + *end_of_sequence = num_datasets == index_; if (!*end_of_sequence) { Tensor tensor(DT_VARIANT, TensorShape({})); From 3564540369fba835c613f7fed3ef9707ae34aa6e Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 21:09:12 -0400 Subject: [PATCH 1566/2522] Update xplane_to_memory_profile.cc --- tensorflow/core/profiler/convert/xplane_to_memory_profile.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc b/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc index cc1da272851..9a5130f63be 100644 --- a/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc +++ b/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc @@ -414,8 +414,7 @@ void ProcessActiveAllocations(int64 peak_bytes_profile_step_id, } allocation->set_num_occurrences(1); const int last_alloc = active_allocs.size() - 1; - while (i < last_alloc && - active_allocs[i] == active_allocs[i + 1]) { + while (i < last_alloc && active_allocs[i] == active_allocs[i + 1]) { allocation->set_num_occurrences(allocation->num_occurrences() + 1); i++; } From 234ddcfdd56d19696d97fa705b8dfbb29a3224f9 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 21:13:41 -0400 Subject: [PATCH 1567/2522] Update snapshot_util.cc --- tensorflow/core/kernels/data/experimental/snapshot_util.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/data/experimental/snapshot_util.cc b/tensorflow/core/kernels/data/experimental/snapshot_util.cc index 4c5ab7f9e70..2e6c901b79f 100644 --- a/tensorflow/core/kernels/data/experimental/snapshot_util.cc +++ b/tensorflow/core/kernels/data/experimental/snapshot_util.cc @@ -906,8 +906,8 @@ Status DetermineOpState(const std::string& mode_string, bool file_exists, return Status::OK(); } - int64 expiration_timer = EnvTime::NowMicros() - - pending_snapshot_expiry_seconds * 1000000; + int64 expiration_timer = static_cast(EnvTime::NowMicros()) + - pending_snapshot_expiry_seconds * 1000000; if (metadata->creation_timestamp() >= expiration_timer) { // Someone else is already writing and time has not expired. From c95202254288b7df371f996150803679d6280d14 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 28 Jul 2020 21:27:13 -0400 Subject: [PATCH 1568/2522] Update xla_builder.cc --- tensorflow/compiler/xla/client/xla_builder.cc | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index db437142665..84843ad821f 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -3024,12 +3024,7 @@ StatusOr XlaBuilder::AddInstruction(HloInstructionProto&& instr, instr.add_operand_ids(operand.handle()); } - if (one_shot_metadata_.has_value()) { - *instr.mutable_metadata() = one_shot_metadata_.value(); - one_shot_metadata_.reset(); - } else { - *instr.mutable_metadata() = metadata_; - } + *instr.mutable_metadata() = metadata_; if (sharding_) { *instr.mutable_sharding() = *sharding_; } From a1568444559ee2dbdc98e82cd98dc097318adfbc Mon Sep 17 00:00:00 2001 From: Anna R Date: Tue, 28 Jul 2020 18:42:45 -0700 Subject: [PATCH 1569/2522] Legalize matrix-diag-part to xla. PiperOrigin-RevId: 323698391 Change-Id: Ie8d898ddf2309b16019afafba590f3461ecd8ca1 --- .../mlir/tensorflow/ir/tf_generated_ops.td | 120 -------- .../compiler/mlir/xla/tests/legalize-tf.mlir | 134 --------- .../mlir/xla/transforms/legalize_tf.cc | 266 +----------------- 3 files changed, 2 insertions(+), 518 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index 721513aa039..0fe8dd647a7 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -5068,126 +5068,6 @@ which has shape (2, 4, 4) TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_MatrixDiagPartV3Op : TF_Op<"MatrixDiagPartV3", [NoSideEffect]> { - let summary = "Returns the batched diagonal part of a batched tensor."; - - let description = [{ -Returns a tensor with the `k[0]`-th to `k[1]`-th diagonals of the batched -`input`. - -Assume `input` has `r` dimensions `[I, J, ..., L, M, N]`. -Let `max_diag_len` be the maximum length among all diagonals to be extracted, -`max_diag_len = min(M + min(k[1], 0), N + min(-k[0], 0))` -Let `num_diags` be the number of diagonals to extract, -`num_diags = k[1] - k[0] + 1`. - -If `num_diags == 1`, the output tensor is of rank `r - 1` with shape -`[I, J, ..., L, max_diag_len]` and values: - -``` -diagonal[i, j, ..., l, n] - = input[i, j, ..., l, n+y, n+x] ; if 0 <= n+y < M and 0 <= n+x < N, - padding_value ; otherwise. -``` -where `y = max(-k[1], 0)`, `x = max(k[1], 0)`. - -Otherwise, the output tensor has rank `r` with dimensions -`[I, J, ..., L, num_diags, max_diag_len]` with values: - -``` -diagonal[i, j, ..., l, m, n] - = input[i, j, ..., l, n+y, n+x] ; if 0 <= n+y < M and 0 <= n+x < N, - padding_value ; otherwise. -``` -where `d = k[1] - m`, `y = max(-d, 0) - offset`, and `x = max(d, 0) - offset`. - -`offset` is zero except when the alignment of the diagonal is to the right. -``` -offset = max_diag_len - diag_len(d) ; if (`align` in {RIGHT_LEFT, RIGHT_RIGHT} - and `d >= 0`) or - (`align` in {LEFT_RIGHT, RIGHT_RIGHT} - and `d <= 0`) - 0 ; otherwise -``` -where `diag_len(d) = min(cols - max(d, 0), rows + min(d, 0))`. - -The input must be at least a matrix. - -For example: - -``` -input = np.array([[[1, 2, 3, 4], # Input shape: (2, 3, 4) - [5, 6, 7, 8], - [9, 8, 7, 6]], - [[5, 4, 3, 2], - [1, 2, 3, 4], - [5, 6, 7, 8]]]) - -# A main diagonal from each batch. -tf.matrix_diag_part(input) ==> [[1, 6, 7], # Output shape: (2, 3) - [5, 2, 7]] - -# A superdiagonal from each batch. -tf.matrix_diag_part(input, k = 1) - ==> [[2, 7, 6], # Output shape: (2, 3) - [4, 3, 8]] - -# A band from each batch. -tf.matrix_diag_part(input, k = (-1, 2)) - ==> [[[0, 3, 8], # Output shape: (2, 4, 3) - [2, 7, 6], - [1, 6, 7], - [5, 8, 0]], - [[0, 3, 4], - [4, 3, 8], - [5, 2, 7], - [1, 6, 0]]] - -# LEFT_RIGHT alignment. -tf.matrix_diag_part(input, k = (-1, 2), align="LEFT_RIGHT") - ==> [[[3, 8, 0], # Output shape: (2, 4, 3) - [2, 7, 6], - [1, 6, 7], - [0, 5, 8]], - [[3, 4, 0], - [4, 3, 8], - [5, 2, 7], - [0, 1, 6]]] - -# max_diag_len can be shorter than the main diagonal. -tf.matrix_diag_part(input, k = (-2, -1)) - ==> [[[5, 8], - [9, 0]], - [[1, 6], - [5, 0]]] - -# padding_value = 9 -tf.matrix_diag_part(input, k = (1, 3), padding_value = 9) - ==> [[[9, 9, 4], # Output shape: (2, 3, 3) - [9, 3, 8], - [2, 7, 6]], - [[9, 9, 2], - [9, 3, 4], - [4, 3, 8]]] - -``` - }]; - - let arguments = (ins - TF_Tensor:$input, - I32Tensor:$k, - TF_Tensor:$padding_value, - - DefaultValuedAttr, "RIGHT_LEFT">:$align - ); - - let results = (outs - TF_Tensor:$diagonal - ); - - TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; -} - def TF_MatrixDiagV2Op : TF_Op<"MatrixDiagV2", [NoSideEffect]> { let summary = [{ Returns a batched diagonal tensor with given batched diagonal values. diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir index 3ce6e63d53d..b09ccf025b0 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir @@ -495,140 +495,6 @@ func @diag_part(%arg0: tensor<4x3x4x3xf32>) -> tensor<4x3xf32> { return %0: tensor<4x3xf32> } -//===----------------------------------------------------------------------===// -// MatrixDiagPart -//===----------------------------------------------------------------------===// - -// CHECK-LABEL: func @matrix_diag_part -// CHECK-SAME: %[[ARG:.*]]: tensor<7x140x128xi32> -func @matrix_diag_part(%arg0: tensor<7x140x128xi32>) -> tensor<7x22x128xi32> { - // CHECK: %[[V0:.*]] = mhlo.constant dense<42> : tensor - // CHECK: %[[V1:.*]] = mhlo.constant dense<[-10, 11]> : tensor<2xi32> - // CHECK: %[[V2:.*]] = "mhlo.iota"() {iota_dimension = 1 : i64} : () -> tensor<1x22x128xi32> - // CHECK: %[[V3:.*]] = "mhlo.iota"() {iota_dimension = 2 : i64} : () -> tensor<1x22x128xi32> - // CHECK: %[[V4:.*]] = mhlo.constant dense<0> : tensor - // CHECK: %[[V5:.*]] = "mhlo.broadcast"(%[[V4]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi32> - // CHECK: %[[V6:.*]] = mhlo.constant dense : tensor - // CHECK: %[[V7:.*]] = "mhlo.broadcast"(%[[V6]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi1> - // CHECK: %[[V8:.*]] = mhlo.constant dense : tensor - // CHECK: %[[V9:.*]] = "mhlo.broadcast"(%[[V8]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi1> - // CHECK: %[[V10:.*]] = mhlo.constant dense<11> : tensor - // CHECK: %[[V11:.*]] = "mhlo.broadcast"(%[[V10]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi32> - // CHECK: %[[V12:.*]] = mhlo.constant dense<140> : tensor - // CHECK: %[[V13:.*]] = "mhlo.broadcast"(%[[V12]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi32> - // CHECK: %[[V14:.*]] = mhlo.constant dense<128> : tensor - // CHECK: %[[V15:.*]] = "mhlo.broadcast"(%[[V14]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi32> - // CHECK: %[[V16:.*]] = mhlo.constant dense<128> : tensor - // CHECK: %[[V17:.*]] = "mhlo.broadcast"(%[[V16]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi32> - // CHECK-DAG: %[[V18:.*]] = mhlo.subtract %[[V11]], %[[V2]] : tensor<1x22x128xi32> - // CHECK-DAG: %[[V19:.*]] = "mhlo.negate"(%[[V18]]) : (tensor<1x22x128xi32>) -> tensor<1x22x128xi32> - // CHECK-DAG: %[[V20:.*]] = mhlo.minimum %[[V18]], %[[V5]] : tensor<1x22x128xi32> - // CHECK-DAG: %[[V21:.*]] = mhlo.maximum %[[V18]], %[[V5]] : tensor<1x22x128xi32> - // CHECK-DAG: %[[V22:.*]] = mhlo.add %[[V13]], %[[V20]] : tensor<1x22x128xi32> - // CHECK-DAG: %[[V24:.*]] = mhlo.minimum %[[V22]], %{{[0-9]*}} : tensor<1x22x128xi32> - // CHECK-DAG: %[[V25:.*]] = chlo.broadcast_compare %[[V18]], %[[V5]] {comparison_direction = "GE"} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi1> - // CHECK-DAG: %[[V26:.*]] = mhlo.subtract %[[V17]], %[[V24]] : tensor<1x22x128xi32> - // CHECK-DAG: %[[V27:.*]] = "mhlo.select"(%[[V25]], %[[V26]], %[[V5]]) : (tensor<1x22x128xi1>, tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi32> - // CHECK-DAG: %[[V28:.*]] = mhlo.subtract %[[V21]], %[[V27]] : tensor<1x22x128xi32> - // CHECK-DAG: %[[V29:.*]] = mhlo.maximum %[[V19]], %[[V5]] : tensor<1x22x128xi32> - // CHECK-DAG: %[[V30:.*]] = mhlo.subtract %[[V29]], %[[V27]] : tensor<1x22x128xi32> - // CHECK-DAG: %[[V31:.*]] = mhlo.add %[[V3]], %[[V28]] : tensor<1x22x128xi32> - // CHECK-DAG: %[[V32:.*]] = mhlo.add %[[V3]], %[[V30]] : tensor<1x22x128xi32> - // CHECK-DAG: %[[V33:.*]] = chlo.broadcast_compare %[[V31]], %[[V5]] {comparison_direction = "GE"} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi1> - // CHECK-DAG: %[[V34:.*]] = chlo.broadcast_compare %[[V31]], %[[V15]] {comparison_direction = "LT"} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi1> - // CHECK-DAG: %[[V35:.*]] = mhlo.and %[[V33]], %[[V34]] : tensor<1x22x128xi1> - // CHECK-DAG: %[[V36:.*]] = chlo.broadcast_compare %[[V32]], %[[V5]] {comparison_direction = "GE"} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi1> - // CHECK-DAG: %[[V37:.*]] = chlo.broadcast_compare %[[V32]], %[[V13]] {comparison_direction = "LT"} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi1> - // CHECK-DAG: %[[V38:.*]] = mhlo.and %[[V36]], %[[V37]] : tensor<1x22x128xi1> - // CHECK-DAG: %[[V39:.*]] = mhlo.and %[[V35]], %[[V38]] : tensor<1x22x128xi1> - // CHECK-DAG: %[[V40:.*]] = "mhlo.reshape"(%[[V39]]) : (tensor<1x22x128xi1>) -> tensor<22x128xi1> - // CHECK-DAG: %[[V41:.*]] = "mhlo.concatenate"(%[[V32]], %[[V31]]) {dimension = 0 : i64} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<2x22x128xi32> - // CHECK-DAG: %[[V42:.*]] = "mhlo.gather"(%[[ARG]], %[[V41]]) {dimension_numbers = {collapsed_slice_dims = dense<[1, 2]> : tensor<2xi64>, index_vector_dim = 0 : i64, offset_dims = dense<0> : tensor<1xi64>, start_index_map = dense<[1, 2]> : tensor<2xi64>}, indices_are_sorted = false, slice_sizes = dense<[7, 1, 1]> : tensor<3xi64>} : (tensor<7x140x128xi32>, tensor<2x22x128xi32>) -> tensor<7x22x128xi32> - // CHECK-DAG: %[[V43:.*]] = "mhlo.broadcast"(%[[V40]]) {broadcast_sizes = dense<7> : tensor<1xi64>} : (tensor<22x128xi1>) -> tensor<7x22x128xi1> - // CHECK-DAG: %[[V44:.*]] = "mhlo.broadcast"(%[[V0]]) {broadcast_sizes = dense<[7, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<7x22x128xi32> - // CHECK-DAG: %[[V45:.*]] = "mhlo.select"(%[[V43]], %[[V42]], %[[V44]]) : (tensor<7x22x128xi1>, tensor<7x22x128xi32>, tensor<7x22x128xi32>) -> tensor<7x22x128xi32> - // CHECK: return %[[V45]] : tensor<7x22x128xi32> - %0 = mhlo.constant dense<42> : tensor // padding value - %1 = mhlo.constant dense<[-10, 11]> : tensor<2xi32> // k - %2 = "tf.MatrixDiagPartV3"(%arg0, %1, %0) { - T = i32, align = "RIGHT_LEFT" - } : (tensor<7x140x128xi32>, tensor<2xi32>, tensor) -> tensor<7x22x128xi32> - return %2: tensor<7x22x128xi32> -} - -// CHECK-LABEL: func @matrix_diag_part_single_diagonal -func @matrix_diag_part_single_diagonal(%arg0: tensor<7x140x128xi32>) -> tensor<7x128xi32> { - %0 = mhlo.constant dense<42> : tensor // padding value - %1 = mhlo.constant dense<0> : tensor<2xi32> // k - %2 = "tf.MatrixDiagPartV3"(%arg0, %1, %0) { - T = i32, align = "RIGHT_LEFT" - } : (tensor<7x140x128xi32>, tensor<2xi32>, tensor) -> tensor<7x128xi32> - // CHECK: %[[result:.*]] = "mhlo.reshape"({{.*}}) : (tensor<7x1x128xi32>) -> tensor<7x128xi32> - // CHECK: return %[[result]] : tensor<7x128xi32> - return %2: tensor<7x128xi32> -} - -// CHECK-LABEL: func @matrix_diag_part_align_ll -func @matrix_diag_part_align_ll(%arg0: tensor<7x140x128xi32>) -> tensor<7x22x128xi32> { - %0 = mhlo.constant dense<42> : tensor // padding value - %1 = mhlo.constant dense<[-10, 11]> : tensor<2xi32> // k - %2 = "tf.MatrixDiagPartV3"(%arg0, %1, %0) { - T = i32, align = "LEFT_LEFT" - } : (tensor<7x140x128xi32>, tensor<2xi32>, tensor) -> tensor<7x22x128xi32> - // CHECK: %[[false:.*]] = mhlo.constant dense : tensor - // CHECK: %[[b_false:.*]] = "mhlo.broadcast"(%[[false]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi1> - // CHECK: %{{[0-9]*}} = "mhlo.select"(%[[b_false]], %{{[0-9]*}}, %{{[0-9]*}}) : (tensor<1x22x128xi1>, tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi32> - return %2: tensor<7x22x128xi32> -} - -// CHECK-LABEL: func @matrix_diag_part_align_lr -func @matrix_diag_part_align_lr(%arg0: tensor<7x140x128xi32>) -> tensor<7x22x128xi32> { - %0 = mhlo.constant dense<42> : tensor // padding value - %1 = mhlo.constant dense<[-10, 11]> : tensor<2xi32> // k - %2 = "tf.MatrixDiagPartV3"(%arg0, %1, %0) { - T = i32, align = "LEFT_RIGHT" - } : (tensor<7x140x128xi32>, tensor<2xi32>, tensor) -> tensor<7x22x128xi32> - // CHECK: %[[le:.*]] = chlo.broadcast_compare %{{[0-9]*}}, %{{[0-9]*}} {comparison_direction = "LE"} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi1> - // CHECK: %{{[0-9]*}} = "mhlo.select"(%[[le]], %{{[0-9]*}}, %{{[0-9]*}}) : (tensor<1x22x128xi1>, tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi32> - return %2: tensor<7x22x128xi32> -} - -// CHECK-LABEL: func @matrix_diag_part_align_rl -func @matrix_diag_part_align_rl(%arg0: tensor<7x140x128xi32>) -> tensor<7x22x128xi32> { - %0 = mhlo.constant dense<42> : tensor // padding value - %1 = mhlo.constant dense<[-10, 11]> : tensor<2xi32> // k - %2 = "tf.MatrixDiagPartV3"(%arg0, %1, %0) { - T = i32, align = "RIGHT_LEFT" - } : (tensor<7x140x128xi32>, tensor<2xi32>, tensor) -> tensor<7x22x128xi32> - // CHECK: %[[ge:.*]] = chlo.broadcast_compare %{{[0-9]*}}, %{{[0-9]*}} {comparison_direction = "GE"} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi1> - // CHECK: %{{[0-9]*}} = "mhlo.select"(%[[ge]], %{{[0-9]*}}, %{{[0-9]*}}) : (tensor<1x22x128xi1>, tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi32> - return %2: tensor<7x22x128xi32> -} - -// CHECK-LABEL: func @matrix_diag_part_align_rr -func @matrix_diag_part_align_rr(%arg0: tensor<7x140x128xi32>) -> tensor<7x22x128xi32> { - %0 = mhlo.constant dense<42> : tensor // padding value - %1 = mhlo.constant dense<[-10, 11]> : tensor<2xi32> // k - %2 = "tf.MatrixDiagPartV3"(%arg0, %1, %0) { - T = i32, align = "RIGHT_RIGHT" - } : (tensor<7x140x128xi32>, tensor<2xi32>, tensor) -> tensor<7x22x128xi32> - // CHECK: %[[true:.*]] = mhlo.constant dense : tensor - // CHECK: %[[b_true:.*]] = "mhlo.broadcast"(%[[true]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi1> - // CHECK: %{{[0-9]*}} = "mhlo.select"(%[[b_true]], %{{[0-9]*}}, %{{[0-9]*}}) : (tensor<1x22x128xi1>, tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi32> - return %2: tensor<7x22x128xi32> -} - -// CHECK-LABEL: func @matrix_diag_part_align_7d -// CHECK: (%arg0: tensor<3x5x7x9x11x13x17xf32>) -> tensor<3x5x7x9x11x4x10xf32> -func @matrix_diag_part_align_7d(%arg0: tensor<3x5x7x9x11x13x17xf32>) -> tensor<3x5x7x9x11x4x10xf32> { - %0 = mhlo.constant dense<-1.> : tensor // padding value - %1 = mhlo.constant dense<[-6, -3]> : tensor<2xi32> // k - %2 = "tf.MatrixDiagPartV3"(%arg0, %1, %0) { - T = f32, align = "LEFT_RIGHT" - } : (tensor<3x5x7x9x11x13x17xf32>, tensor<2xi32>, tensor) -> tensor<3x5x7x9x11x4x10xf32> - return %2: tensor<3x5x7x9x11x4x10xf32> -} - //===----------------------------------------------------------------------===// // Einsum. //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc index e387ad43b89..b1e74e354fe 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc @@ -1387,268 +1387,6 @@ class ConvertDiagPartOp : public OpRewritePattern { } }; -// Converts TensorFlow MatrixDiagPartOp to HLO ops. -class ConvertMatrixDiagPartV3Op - : public OpRewritePattern { - using Shape = llvm::SmallVector; - - // Parse the "k" parameter. MatrixDiagPartV3 allows to specify the diagonal(s) - // with k. This can be either a single value (for a single diagonal) or a - // tuple of two values (starting and ending diagonal, for a band). - LogicalResult ExtractK(TF::MatrixDiagPartV3Op op, int64_t (*k)[2]) const { - DenseIntElementsAttr kattr; - if (!matchPattern(op.k(), m_Constant(&kattr))) { - return failure(); - } - DenseIntElementsAttr::iterator it = kattr.begin(); - (*k)[0] = (*it).getSExtValue(); - it++; - if (it == kattr.end()) { - // Handle input like e.g. "k = 5", in which case we extract a single - // diagonal. - (*k)[1] = (*k)[0]; - } else { - // Handle input like e.g. "k = [-1, 1]", in which case we extract a - // band (multiple diagonals). - (*k)[1] = (*it).getSExtValue(); - } - return success(); - } - - // Utility method for broadcasting integer constants to a given shape. - BroadcastOp BroadcastConstant(Location loc, Shape shape, int32_t constant, - int int_size, PatternRewriter &rewriter) const { - return rewriter.create( - loc, RankedTensorType::get(shape, rewriter.getIntegerType(int_size)), - GetScalarConstOfType(rewriter.getIntegerType(int_size), loc, constant, - &rewriter), - GetI64ElementsAttr(shape, &rewriter)); - } - - public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(TF::MatrixDiagPartV3Op op, - PatternRewriter &rewriter) const override { - Location loc = op.getLoc(); - ShapedType input_type = op.input().getType().dyn_cast(); - auto element_type = input_type.getElementType(); - - // Align is a string specifying how superdiagonals and subdiagonals should - // be aligned/padded for diagonals that are shorter than max_diag_len. The - // format is "{super}_{sub}", with {super} the superdiagonal alignment and - // {sub} the subdiagonal alignment. "LEFT" means rows will be padded to the - // left, "RIGHT" means rows will be padded ot the right. The default is - // "RIGHT_LEFT". - StringRef align = op.getAttrOfType("align").getValue(); - enum Alignment { kLeft, kRight }; - - // default is RIGHT_LEFT - Alignment superdiagonal_align = kRight; - Alignment subdiagonal_align = kLeft; - - if (align == "RIGHT_LEFT") { - superdiagonal_align = kRight; - subdiagonal_align = kLeft; - } else if (align == "RIGHT_RIGHT") { - superdiagonal_align = kRight; - subdiagonal_align = kRight; - } else if (align == "LEFT_RIGHT") { - superdiagonal_align = kLeft; - subdiagonal_align = kRight; - } else if (align == "LEFT_LEFT") { - superdiagonal_align = kLeft; - subdiagonal_align = kLeft; - } else { - return failure(); // unsupported alignment - } - - // MatrixDiagPart operates on a matrix of shape [I, J, ..., L, M, N], and - // will extract the diagonal(s) out of [M, N], for all [I, J, ..., L]. - if (!input_type || !input_type.hasStaticShape()) return failure(); - int64_t num_dims = input_type.getRank(); - if (num_dims < 2) return failure(); - int rows = input_type.getDimSize(num_dims - 2); // rows - int cols = input_type.getDimSize(num_dims - 1); // cols - - // We extract the diagonals from k[0] up to and including k[1]. - // Addressing is 0 for the main diagonal. (So k = [0, 0] would just extract - // the main diagonal). It's negative for subdiagonals (under and to the left - // of the main diagonal) and positive for superdiagonals (above and to the - // right of the main diagonal). - int64_t k[2]; - if (failed(ExtractK(op, &k))) return failure(); - int num_diags = k[1] - k[0] + 1; - - // Shifting diagonals away from the main diagonal might shorten them. This - // is the longest diagonal we will see. We make this the last dimension of - // the output shape. - int64_t max_diag_len = - std::min(rows + std::min(k[1], 0l), cols + std::min(-k[0], 0l)); - - // The first dimension is the index vector dimension we'll use for gather. - // It's 1 here, but will be 2 once we glue x and y together. - Shape indices_shape({1, num_diags, max_diag_len}); - - RankedTensorType iota_type = - RankedTensorType::get(indices_shape, rewriter.getIntegerType(32)); - Value iotaM = - rewriter.create(loc, iota_type, rewriter.getI64IntegerAttr(1)); - Value iotaN = - rewriter.create(loc, iota_type, rewriter.getI64IntegerAttr(2)); - - // Boradcasted constants, of the same shape as iotaM and iotaN. - Value b_zero = BroadcastConstant(loc, indices_shape, 0, 32, rewriter); - Value b_false = BroadcastConstant(loc, indices_shape, 0, 1, rewriter); - Value b_true = BroadcastConstant(loc, indices_shape, 1, 1, rewriter); - Value b_k1 = BroadcastConstant(loc, indices_shape, k[1], 32, rewriter); - Value b_rows = BroadcastConstant(loc, indices_shape, rows, 32, rewriter); - Value b_cols = BroadcastConstant(loc, indices_shape, cols, 32, rewriter); - Value b_max_diag_len = - BroadcastConstant(loc, indices_shape, max_diag_len, 32, rewriter); - - // d = k[1] - m - // (A.k.a. the number of the diagonal, depending on m. Note that we - // subtract m here. This means we start with the superdiagonals and - // move downwards towards the subdiagonals. So the start indices will - // be decreasing.) - Value d = rewriter.create(loc, b_k1, iotaM); - Value neg_d = rewriter.create(loc, d); - - // Common subexpressions. - Value min_d_zero = rewriter.create(loc, d, b_zero); - Value max_d_zero = rewriter.create(loc, d, b_zero); - - // diag_len_d = min(rows + min(d, 0), cols - max(d, 0)) - // (Length of a diagonal for a given d. Same as max_diag_len for m = 0.) - Value diag_len_d = rewriter.create( - loc, rewriter.create(loc, b_rows, min_d_zero), - rewriter.create(loc, b_cols, max_d_zero)); - - // offset is max_diag_len - diag_len_d if we're padding, 0 otherwise. - Value cmp; - if (subdiagonal_align == kRight && superdiagonal_align == kRight) { - cmp = b_true; - } else if (superdiagonal_align == kRight) { - // offset = d>=0 ? max_diag_len - diag_len_d : 0 - cmp = rewriter.create(loc, d, b_zero); - } else if (subdiagonal_align == kRight) { - // offset = d<=0 ? max_diag_len - diag_len_d : 0 - cmp = rewriter.create(loc, d, b_zero); - } else { - // offset = 0 - cmp = b_false; - } - - // This offset shifts the diagonals to the "left" or "right", depending - // on alignment. - Value offset = rewriter.create( - loc, b_zero.getType(), cmp, - rewriter.create(loc, b_max_diag_len, diag_len_d), b_zero); - - // x = max(d, 0) - offset - // y = max(-d, 0) - offset - Value x = rewriter.create(loc, max_d_zero, offset); - Value y = rewriter.create( - loc, rewriter.create(loc, neg_d, b_zero), offset); - - Value n_plus_x = rewriter.create(loc, iotaN, x); - Value n_plus_y = rewriter.create(loc, iotaN, y); - - // GatherOp is happy about letting us index out of bounds values, but those - // values will be undefined. So we mask them later. Set up the boolean - // expression that tells us which entries, in the output shape, are out of - // bounds and thus become the padding_value. - Value x_in_bounds = rewriter.create( - loc, - rewriter.create(loc, b_false.getType(), n_plus_x, - b_zero), - rewriter.create(loc, b_false.getType(), n_plus_x, b_cols)); - Value y_in_bounds = rewriter.create( - loc, - rewriter.create(loc, b_false.getType(), n_plus_y, - b_zero), - rewriter.create(loc, b_false.getType(), n_plus_y, b_rows)); - Value in_bounds = rewriter.create( - loc, - RankedTensorType::get(Shape({num_diags, max_diag_len}), - rewriter.getIntegerType(1)), - rewriter.create(loc, x_in_bounds, y_in_bounds)); - - // Now combine x and y into the index data structure needed for gather. - Shape concat_shape({2, num_diags, max_diag_len}); - Value start_indices = rewriter.create( - loc, RankedTensorType::get(concat_shape, rewriter.getIntegerType(32)), - mlir::ValueRange({n_plus_y, n_plus_x}), - mlir::IntegerAttr::get(rewriter.getIntegerType(64), 0)); - - // Shape of the final output. (Except for dimension folding in the - // single diagonal case.) - Shape output_shape; - for (int i = 0; i < num_dims - 2; i++) { - output_shape.push_back(input_type.getDimSize(i)); - } - output_shape.push_back(num_diags); - output_shape.push_back(max_diag_len); - auto output_type = RankedTensorType::get(output_shape, element_type); - - // A slice is the shape of what GatherOp copies per lookup. So the last - // two dimensions (M, N in the matrix-diag-part docs) are where we go - // through entry by entry. - ArrayRef input_shape = input_type.getShape(); - Shape slice_sizes(input_shape.begin(), input_shape.end()); - int slice_dimensions = slice_sizes.size(); - slice_sizes[slice_dimensions - 2] = 1; - slice_sizes[slice_dimensions - 1] = 1; - - // Dimensions of the input we won't see in the output (M and N). - SmallVector collapsed_dims( - {slice_dimensions - 2, slice_dimensions - 1}); - - // Which dimensions (in the input) the two offset "columns" map to. - SmallVector start_index_map({num_dims - 2, num_dims - 1}); - - // Gather the diagonal entries. - // TODO(kramm): For a single diagonal, this might be slower than the - // mask + sum approach. Special-case num_diags==1? - auto dims_attr = GatherDimensionNumbers::get( - /*offset_dims=*/GetI64ElementsAttrForSeq(0, num_dims - 2, &rewriter), - /*collapsed_slice_dims=*/GetI64ElementsAttr(collapsed_dims, &rewriter), - /*start_index_map=*/GetI64ElementsAttr(start_index_map, &rewriter), - /*index_vector_dim=*/rewriter.getI64IntegerAttr(0), - rewriter.getContext()); - Value gather = rewriter.create( - loc, output_type, op.input(), start_indices, dims_attr, - GetI64ElementsAttr(slice_sizes, &rewriter)); - - // We now need to broadcast the "in_bounds" boolean expression, as well as - // the padding value, to do the final select. - Shape broadcast_bounds; - for (int i = 0; i < output_shape.size() - 2; i++) { - broadcast_bounds.push_back(output_shape[i]); - } - Value b_in_bounds = rewriter.create( - loc, RankedTensorType::get(output_shape, rewriter.getIntegerType(1)), - in_bounds, GetI64ElementsAttr(broadcast_bounds, &rewriter)); - Value b_padding = rewriter.create( - loc, output_type, op.padding_value(), - GetI64ElementsAttr(output_shape, &rewriter)); - - // Replace all out-of-bounds values in the result with padding_value. - Value result = rewriter.create(loc, output_type, b_in_bounds, - gather, b_padding); - - if (num_diags == 1) { - // matrix_diag_part folds away the 1-sized band dimension if we only - // extract a single diagonal. - result = rewriter.create(loc, op.getType(), result); - } - - rewriter.replaceOp(op, result); - return success(); - } -}; - // Converts TensorFlow EinsumOp to either HLO EinsumOp or UnaryEinsumOp // depending on arity of the op. class ConvertEinsumOp : public OpRewritePattern { @@ -5748,8 +5486,8 @@ LogicalResult legalizeTF(Operation *op, bool allow_partial_conversion, ConvertAvgPool3DGradOp, ConvertMaxPool2DOp, ConvertMaxPool3DOp, ConvertMaxPool2DGradOp, ConvertMaxPool3DGradOp, ConvertMeanOp, ConvertOneHotOp, ConvertOutfeedEnqueueTupleOp, ConvertProdOp, ConvertQrOp, - ConvertMatrixDiagPartV3Op, ConvertDynamicRangeOp, ConvertRangeOp, - ConvertSelectV2Op, ConvertSigmoidOp, ConvertShapeOp, ConvertSizeOp, + ConvertDynamicRangeOp, ConvertRangeOp, ConvertSelectV2Op, + ConvertSigmoidOp, ConvertShapeOp, ConvertSizeOp, ConvertSoftmaxOp, ConvertSoftmaxOp, ConvertSplitOp, ConvertSplitVOp, ConvertStridedSliceOp, ConvertStridedSliceGradOp, ConvertSumOp, From b2604b0fdb4872ea54f8c772511d7c0d663e2cc2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Jul 2020 18:51:51 -0700 Subject: [PATCH 1570/2522] Added BitCast to the estimator. PiperOrigin-RevId: 323699500 Change-Id: I9ee34697b0651134d8423f499cfb282bbb150d48 --- .../grappler/costs/op_level_cost_estimator.cc | 3 +++ .../costs/op_level_cost_estimator_test.cc | 26 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index d15aa698f39..b0e76ff4943 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -32,6 +32,7 @@ namespace grappler { constexpr int kOpsPerMac = 2; constexpr char kGuaranteeConst[] = "GuaranteeConst"; +constexpr char kBitCast[] = "BitCast"; constexpr char kConv2d[] = "Conv2D"; constexpr char kConv2dBackpropFilter[] = "Conv2DBackpropFilter"; constexpr char kConv2dBackpropInput[] = "Conv2DBackpropInput"; @@ -440,6 +441,8 @@ OpLevelCostEstimator::OpLevelCostEstimator() { wrap(&OpLevelCostEstimator::PredictIdentity)); device_cost_impl_.emplace(kNextIteration, wrap(&OpLevelCostEstimator::PredictIdentity)); + device_cost_impl_.emplace(kBitCast, + wrap(&OpLevelCostEstimator::PredictIdentity)); device_cost_impl_.emplace(kRank, wrap(&OpLevelCostEstimator::PredictMetadata)); diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc index 39140e03139..4cc24df1936 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc @@ -1838,5 +1838,31 @@ TEST_F(OpLevelCostEstimatorTest, PredictResourceVariableOps) { } } +TEST_F(OpLevelCostEstimatorTest, IdentityOpExecutionTime) { + std::vector identity_ops = { + "_Recv", "_Send", "BitCast", "Identity", + "Enter", "Exit", "IdentityN", "Merge", + "NextIteration", "Placeholder", "PreventGradient", "RefIdentity", + "Reshape", "StopGradient", "Switch"}; + + const int kTensorSize = 1000; + for (auto identity_op : identity_ops) { + OpContext op_context = DescribeUnaryOp(identity_op, kTensorSize); + + const int kExpectedMemoryTime = 0; + const int kExpectedComputeTime = 1; + + auto cost = PredictCosts(op_context); + EXPECT_EQ(Costs::Duration(kExpectedMemoryTime), cost.memory_time); + EXPECT_EQ(Costs::Duration(kExpectedComputeTime), cost.compute_time); + EXPECT_EQ(Costs::Duration(kExpectedComputeTime + kExpectedMemoryTime), + cost.execution_time); + EXPECT_EQ(cost.max_memory, kTensorSize * 4); + EXPECT_EQ(1, cost.num_ops_total); + EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); + } +} + } // end namespace grappler } // end namespace tensorflow From 259128e7fbe1bb1749f83bd6aa5a5bfe15efa213 Mon Sep 17 00:00:00 2001 From: Renjie Liu Date: Tue, 28 Jul 2020 18:54:15 -0700 Subject: [PATCH 1571/2522] Fix typo of the ifdef. PiperOrigin-RevId: 323699851 Change-Id: I51e6d326dc896d303f3009490aa35dad8320909c --- tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc index 800d7008b4b..0172ba690e4 100644 --- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc +++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc @@ -2292,7 +2292,7 @@ void NeonVectorScalarMultiply(const int8_t* vector, const int v_size, // Also consider changing the rounding stragey from "ties to away" to // "ties to even" since vcvtnq_s32_f32 is generally more available. inline int32x4_t RoundToNearest(const float32x4_t input) { -#if defined(_ACAT_ARM64) +#if __ARM_ARCH >= 8 return vcvtaq_s32_f32(input); #else static const float32x4_t zero_val_dup = vdupq_n_f32(0.0f); From 966bbfa4c51b12e6edbdc303aa43da3246c37833 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Jul 2020 19:21:12 -0700 Subject: [PATCH 1572/2522] Added reshape op cost computation to the estimator. PiperOrigin-RevId: 323702969 Change-Id: I40b56a80d814f867a208307efd3027b6de26093d --- .../grappler/costs/op_level_cost_estimator.cc | 42 ++++++++++++++++++- .../grappler/costs/op_level_cost_estimator.h | 1 + .../costs/op_level_cost_estimator_test.cc | 28 +++++++++++++ 3 files changed, 69 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index b0e76ff4943..39db86e8481 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -33,10 +33,13 @@ namespace grappler { constexpr int kOpsPerMac = 2; constexpr char kGuaranteeConst[] = "GuaranteeConst"; constexpr char kBitCast[] = "BitCast"; +constexpr char kConcatV2[] = "ConcatV2"; constexpr char kConv2d[] = "Conv2D"; constexpr char kConv2dBackpropFilter[] = "Conv2DBackpropFilter"; constexpr char kConv2dBackpropInput[] = "Conv2DBackpropInput"; constexpr char kFusedConv2dBiasActivation[] = "FusedConv2DBiasActivation"; +constexpr char kDataFormatVecPermute[] = "DataFormatVecPermute"; +constexpr char kDepthToSpace[] = "DepthToSpace"; constexpr char kDepthwiseConv2dNative[] = "DepthwiseConv2dNative"; constexpr char kDepthwiseConv2dNativeBackpropFilter[] = "DepthwiseConv2dNativeBackpropFilter"; @@ -45,6 +48,8 @@ constexpr char kDepthwiseConv2dNativeBackpropInput[] = constexpr char kMatMul[] = "MatMul"; constexpr char kXlaEinsum[] = "XlaEinsum"; constexpr char kEinsum[] = "Einsum"; +constexpr char kExpandDims[] = "ExpandDims"; +constexpr char kFill[] = "Fill"; constexpr char kSparseMatMul[] = "SparseMatMul"; constexpr char kSparseTensorDenseMatMul[] = "SparseTensorDenseMatMul"; constexpr char kPlaceholder[] = "Placeholder"; @@ -53,11 +58,13 @@ constexpr char kIdentityN[] = "IdentityN"; constexpr char kRefIdentity[] = "RefIdentity"; constexpr char kNoOp[] = "NoOp"; constexpr char kReshape[] = "Reshape"; +constexpr char kSplit[] = "Split"; constexpr char kSqueeze[] = "Squeeze"; constexpr char kRecv[] = "_Recv"; constexpr char kSend[] = "_Send"; constexpr char kBatchMatMul[] = "BatchMatMul"; constexpr char kBatchMatMulV2[] = "BatchMatMulV2"; +constexpr char kPack[] = "Pack"; constexpr char kRank[] = "Rank"; constexpr char kShape[] = "Shape"; constexpr char kShapeN[] = "ShapeN"; @@ -74,6 +81,8 @@ constexpr char kScatterMul[] = "ScatterMul"; constexpr char kScatterSub[] = "ScatterSub"; constexpr char kScatterUpdate[] = "ScatterUpdate"; constexpr char kSlice[] = "Slice"; +constexpr char kSpaceToDepth[] = "SpaceToDepth"; +constexpr char kTranspose[] = "Transpose"; constexpr char kMaxPool[] = "MaxPool"; constexpr char kMaxPoolGrad[] = "MaxPoolGrad"; constexpr char kAvgPool[] = "AvgPool"; @@ -82,6 +91,7 @@ constexpr char kFusedBatchNorm[] = "FusedBatchNorm"; constexpr char kFusedBatchNormGrad[] = "FusedBatchNormGrad"; constexpr char kQuantizedMatMul[] = "QuantizedMatMul"; constexpr char kQuantizedMatMulV2[] = "QuantizedMatMulV2"; +constexpr char kUnpack[] = "Unpack"; // Dynamic control flow ops. constexpr char kSwitch[] = "Switch"; constexpr char kMerge[] = "Merge"; @@ -425,8 +435,6 @@ OpLevelCostEstimator::OpLevelCostEstimator() { wrap(&OpLevelCostEstimator::PredictIdentity)); device_cost_impl_.emplace(kReshape, wrap(&OpLevelCostEstimator::PredictIdentity)); - device_cost_impl_.emplace(kSqueeze, - wrap(&OpLevelCostEstimator::PredictIdentity)); device_cost_impl_.emplace(kRecv, wrap(&OpLevelCostEstimator::PredictIdentity)); device_cost_impl_.emplace(kSend, @@ -444,6 +452,29 @@ OpLevelCostEstimator::OpLevelCostEstimator() { device_cost_impl_.emplace(kBitCast, wrap(&OpLevelCostEstimator::PredictIdentity)); + device_cost_impl_.emplace(kConcatV2, + wrap(&OpLevelCostEstimator::PredictPureMemoryOp)); + device_cost_impl_.emplace(kDataFormatVecPermute, + wrap(&OpLevelCostEstimator::PredictPureMemoryOp)); + device_cost_impl_.emplace(kDepthToSpace, + wrap(&OpLevelCostEstimator::PredictPureMemoryOp)); + device_cost_impl_.emplace(kExpandDims, + wrap(&OpLevelCostEstimator::PredictPureMemoryOp)); + device_cost_impl_.emplace(kFill, + wrap(&OpLevelCostEstimator::PredictPureMemoryOp)); + device_cost_impl_.emplace(kPack, + wrap(&OpLevelCostEstimator::PredictPureMemoryOp)); + device_cost_impl_.emplace(kSpaceToDepth, + wrap(&OpLevelCostEstimator::PredictPureMemoryOp)); + device_cost_impl_.emplace(kSplit, + wrap(&OpLevelCostEstimator::PredictPureMemoryOp)); + device_cost_impl_.emplace(kSqueeze, + wrap(&OpLevelCostEstimator::PredictPureMemoryOp)); + device_cost_impl_.emplace(kTranspose, + wrap(&OpLevelCostEstimator::PredictPureMemoryOp)); + device_cost_impl_.emplace(kUnpack, + wrap(&OpLevelCostEstimator::PredictPureMemoryOp)); + device_cost_impl_.emplace(kRank, wrap(&OpLevelCostEstimator::PredictMetadata)); device_cost_impl_.emplace(kShape, @@ -1677,6 +1708,13 @@ Costs OpLevelCostEstimator::PredictNoOp(const OpContext& op_context) const { return Costs::ZeroCosts(); } +Costs OpLevelCostEstimator::PredictPureMemoryOp( + const OpContext& op_context) const { + // Each output element is a copy of some element from input, with no required + // computation, so just compute memory costs. + return PredictOpCountBasedCost(0, op_context.op_info); +} + Costs OpLevelCostEstimator::PredictIdentity(const OpContext& op_context) const { const auto& op_info = op_context.op_info; VLOG(1) << "Op:" << op_info.op() << " Execution Time 0 (ns)"; diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.h b/tensorflow/core/grappler/costs/op_level_cost_estimator.h index 2bf3c5bb920..8bcf51b4010 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.h +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.h @@ -86,6 +86,7 @@ class OpLevelCostEstimator { Costs PredictFusedBatchNormGrad(const OpContext& op_context) const; Costs PredictEinsum(const OpContext& op_context) const; Costs PredictAssignVariableOps(const OpContext& op_context) const; + Costs PredictPureMemoryOp(const OpContext& op_context) const; // Generic cost prediction method for fused operations. Costs PredictFusedOp(const OpContext& op_context, diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc index 4cc24df1936..82fc86ac580 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc @@ -1864,5 +1864,33 @@ TEST_F(OpLevelCostEstimatorTest, IdentityOpExecutionTime) { } } +TEST_F(OpLevelCostEstimatorTest, PureMemoryOpExecutionTime) { + std::vector reshape_ops = { + "ConcatV2", "DataFormatVecPermute", + "DepthToSpace", "ExpandDims", + "Fill", "Pack", + "SpaceToDepth", "Split", + "Squeeze", "Transpose", + "Unpack"}; + + const int kTensorSize = 1000; + for (auto reshape_op : reshape_ops) { + OpContext op_context = DescribeUnaryOp(reshape_op, kTensorSize); + + const int kExpectedMemoryTime = 800; + const int kExpectedComputeTime = 0; + + auto cost = PredictCosts(op_context); + EXPECT_EQ(Costs::Duration(kExpectedMemoryTime), cost.memory_time); + EXPECT_EQ(Costs::Duration(kExpectedComputeTime), cost.compute_time); + EXPECT_EQ(Costs::Duration(kExpectedComputeTime + kExpectedMemoryTime), + cost.execution_time); + EXPECT_EQ(cost.max_memory, kTensorSize * 4); + EXPECT_EQ(1, cost.num_ops_total); + EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); + } +} + } // end namespace grappler } // end namespace tensorflow From d684ef2f408fe7cd6c1b55a5ecdae3d7c4198bb0 Mon Sep 17 00:00:00 2001 From: Sean Silva Date: Tue, 28 Jul 2020 19:40:25 -0700 Subject: [PATCH 1573/2522] Make comments consistent on GroupedFilterShapeForDepthwiseConvolution PiperOrigin-RevId: 323704723 Change-Id: I00d6fb12ea053a6a8826a976833d752718980e34 --- tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc index e0bc2ba5052..a7a8b8bcb52 100644 --- a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc +++ b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc @@ -44,7 +44,7 @@ namespace tensorflow { namespace { // Returns the expanded size of a filter used for depthwise convolution. -// If `shape` is [H, W, ..., M, N] returns [H, W, ..., M, M*N]. +// If `shape` is [H, W, ..., M, N] returns [H, W, ..., 1, M*N]. xla::Shape GroupedFilterShapeForDepthwiseConvolution( const xla::Shape& filter_shape) { int64 input_feature_dim = filter_shape.dimensions_size() - 2; @@ -52,7 +52,7 @@ xla::Shape GroupedFilterShapeForDepthwiseConvolution( int64 depthwise_multiplier = filter_shape.dimensions(output_feature_dim); int64 input_feature = filter_shape.dimensions(input_feature_dim); - // Create a [H, W, ..., 1, N*M] reshape of the filter. + // Create a [H, W, ..., 1, M*N] reshape of the filter. xla::Shape grouped_filter_shape = filter_shape; grouped_filter_shape.set_dimensions(input_feature_dim, 1); grouped_filter_shape.set_dimensions(output_feature_dim, From 3877059a38f5a9b8237f84951c4d410dd3eaae00 Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Tue, 28 Jul 2020 19:40:49 -0700 Subject: [PATCH 1574/2522] Fix @test_util.run_deprecated_v1 in //tensorflow/python/kernel_tests:softmax_op_test PiperOrigin-RevId: 323704760 Change-Id: Ia1c8245cd9bbff0af66b87fd59ef050d3d6a61f2 --- .../python/kernel_tests/softmax_op_test.py | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/tensorflow/python/kernel_tests/softmax_op_test.py b/tensorflow/python/kernel_tests/softmax_op_test.py index 4debe180690..2efa60993be 100644 --- a/tensorflow/python/kernel_tests/softmax_op_test.py +++ b/tensorflow/python/kernel_tests/softmax_op_test.py @@ -23,9 +23,9 @@ import unittest import numpy as np +from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl -from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops @@ -242,14 +242,11 @@ class SoftmaxTest(test.TestCase): [[5., 4., 3., 2.], [1., 2., 3., 4.]]]) self.assertEqual([3, 2, 4], op.get_shape()) - @test_util.run_deprecated_v1 def testEmptyInput(self): - with self.cached_session(): - x = array_ops.placeholder(dtypes.float32, shape=[0, 3]) - self.assertEqual(0, array_ops.size(x).eval()) - # reshape would raise if logits is empty - with self.assertRaises(errors_impl.InvalidArgumentError): - nn_ops.softmax(x, axis=0).eval() + x = array_ops.ones(shape=[0, 3], dtype=dtypes.float32) + y = np.zeros(shape=[0, 3], dtype=np.float32) + self.assertEqual(0, self.evaluate(array_ops.size(x))) + self.assertAllEqual(y, self.evaluate(nn_ops.softmax(x, axis=0))) def testDimTooLarge(self): with self.cached_session(): @@ -266,7 +263,6 @@ class SoftmaxTest(test.TestCase): with self.assertRaises(errors_impl.InvalidArgumentError): nn_ops.softmax(ones, axis=2).eval() - @test_util.run_deprecated_v1 def testLargeDims(self): # Make sure that we properly handle large inputs. See # https://github.com/tensorflow/tensorflow/issues/4425 for details @@ -275,10 +271,10 @@ class SoftmaxTest(test.TestCase): np_softmax = self._npSoftmax(ones) for use_gpu in [True, False]: - with self.cached_session(use_gpu=use_gpu) as sess: - x = array_ops.placeholder(dtypes.float32) + with self.cached_session(use_gpu=use_gpu): + x = constant_op.constant(ones) y = nn_ops.softmax(x) - tf_softmax = sess.run(y, feed_dict={x: ones}) + tf_softmax = self.evaluate(y) self.assertAllClose(tf_softmax, np_softmax) From 492b0355cd0d04a617075804bbd9bc6ab0fddc13 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Jul 2020 19:48:53 -0700 Subject: [PATCH 1575/2522] Added AddN to the estimator. PiperOrigin-RevId: 323705484 Change-Id: I5e2f8a8260db794c3d5d724fe692cfad37091450 --- .../grappler/costs/op_level_cost_estimator.cc | 36 ++++++++++++++++++- .../grappler/costs/op_level_cost_estimator.h | 1 + .../costs/op_level_cost_estimator_test.cc | 18 ++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index 39db86e8481..d2e56cd2f1c 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -32,6 +32,7 @@ namespace grappler { constexpr int kOpsPerMac = 2; constexpr char kGuaranteeConst[] = "GuaranteeConst"; +constexpr char kAddN[] = "AddN"; constexpr char kBitCast[] = "BitCast"; constexpr char kConcatV2[] = "ConcatV2"; constexpr char kConv2d[] = "Conv2D"; @@ -504,6 +505,7 @@ OpLevelCostEstimator::OpLevelCostEstimator() { device_cost_impl_.emplace( kAssignSubVariableOp, wrap(&OpLevelCostEstimator::PredictAssignVariableOps)); + device_cost_impl_.emplace(kAddN, wrap(&OpLevelCostEstimator::PredictNaryOp)); persistent_ops_ = { kConst, kVariable, kVariableV2, kAutoReloadVariable, @@ -686,7 +688,7 @@ Costs OpLevelCostEstimator::PredictCwiseOp(const OpContext& op_context) const { // use the count for the largest input here to be more robust in case that the // shape is unknown or partially known for other input. int64 op_count = CalculateLargestInputCount(op_info, &found_unknown_shapes); - // If output shape is available, try use the element count calculated from + // If output shape is available, try to use the element count calculated from // that. if (op_info.outputs_size() > 0) { op_count = std::max( @@ -2231,5 +2233,37 @@ Costs OpLevelCostEstimator::PredictFusedBatchNormGrad( costs.max_memory = total_output_size; return costs; } + +Costs OpLevelCostEstimator::PredictNaryOp(const OpContext& op_context) const { + const auto& op_info = op_context.op_info; + bool found_unknown_shapes = false; + // Calculate the largest known tensor size across all inputs and output. + int64 op_count = CalculateLargestInputCount(op_info, &found_unknown_shapes); + // If output shape is available, try to use the element count calculated from + // that. + if (op_info.outputs_size() > 0) { + op_count = std::max( + op_count, + CalculateTensorElementCount(op_info.outputs(0), &found_unknown_shapes)); + } + // Also calculate the output shape possibly resulting from broadcasting. + // Note that the some Nary ops (such as AddN) do not support broadcasting, + // but we're including this here for completeness. + if (op_info.inputs_size() >= 2) { + op_count = std::max(op_count, CwiseOutputElementCount(op_info)); + } + + // Nary ops perform one operation for every element in every input tensor. + op_count *= op_info.inputs_size() - 1; + + const auto sum_cost = Eigen::internal::functor_traits< + Eigen::internal::scalar_sum_op>::Cost; + Costs costs = PredictOpCountBasedCost(op_count * sum_cost, op_info); + if (found_unknown_shapes) { + costs.inaccurate = true; + } + costs.num_ops_with_unknown_shapes = found_unknown_shapes; + return costs; +} } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.h b/tensorflow/core/grappler/costs/op_level_cost_estimator.h index 8bcf51b4010..f44f4ee19e5 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.h +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.h @@ -64,6 +64,7 @@ class OpLevelCostEstimator { // Implementation of costs other than // execution_time is optional, depending on the // device. + Costs PredictNaryOp(const OpContext& op_context) const; Costs PredictConv2D(const OpContext& op_context) const; Costs PredictCwiseOp(const OpContext& op_context) const; Costs PredictConv2DBackpropInput(const OpContext& op_context) const; diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc index 82fc86ac580..d24533cf532 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc @@ -1838,6 +1838,24 @@ TEST_F(OpLevelCostEstimatorTest, PredictResourceVariableOps) { } } +TEST_F(OpLevelCostEstimatorTest, AddNExecutionTime) { + OpContext op_context; + SetCpuDevice(&op_context.op_info); + op_context.op_info.set_op("AddN"); + + DescribeTensor4D(1, 10, 10, 10, op_context.op_info.add_inputs()); + DescribeTensor4D(1, 10, 10, 10, op_context.op_info.add_inputs()); + DescribeTensor4D(1, 10, 10, 10, op_context.op_info.add_inputs()); + + auto cost = PredictCosts(op_context); + EXPECT_EQ(Costs::Duration(1200), cost.memory_time); + EXPECT_EQ(Costs::Duration(200), cost.compute_time); + EXPECT_EQ(Costs::Duration(1400), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); + EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); +} + TEST_F(OpLevelCostEstimatorTest, IdentityOpExecutionTime) { std::vector identity_ops = { "_Recv", "_Send", "BitCast", "Identity", From 316acba0b1d74e071323da1a6901cda38fde329f Mon Sep 17 00:00:00 2001 From: Renjie Liu Date: Tue, 28 Jul 2020 19:49:00 -0700 Subject: [PATCH 1576/2522] Fix regression caused by short-circuit the quantized pad optimized path. PiperOrigin-RevId: 323705501 Change-Id: I71f4b69dce0d6353a70e6b2f8b72b897050c993b --- tensorflow/lite/kernels/internal/optimized/optimized_ops.h | 3 ++- tensorflow/lite/kernels/pad.cc | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h index 5d922fe3c4e..6ca8b781805 100644 --- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h @@ -5559,7 +5559,8 @@ inline void PadImageStyle(const tflite::PadParams& op_params, const RuntimeShape& input_shape, const T* input_data, const P* pad_value_ptr, const RuntimeShape& output_shape, T* output_data) { - TFLITE_ASSERT_FALSE; + reference_ops::PadImageStyle(op_params, input_shape, input_data, + pad_value_ptr, output_shape, output_data); } template diff --git a/tensorflow/lite/kernels/pad.cc b/tensorflow/lite/kernels/pad.cc index 7a864cface0..e522ae06bfb 100644 --- a/tensorflow/lite/kernels/pad.cc +++ b/tensorflow/lite/kernels/pad.cc @@ -156,7 +156,7 @@ TfLiteStatus EvalInt(TfLiteContext* context, const PadContext& op_context, } const integer_type pad_value_copy = pad_value; if (op_context.resizing_category == ResizingCategory::kImageStyle) { - reference_ops::PadImageStyle( + optimized_ops::PadImageStyle( op_params, GetTensorShape(op_context.input), GetTensorData(op_context.input), &pad_value_copy, GetTensorShape(op_context.output), From 37c793e9b89bd6a91b89a988275a4caf492e12de Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Tue, 28 Jul 2020 19:50:57 -0700 Subject: [PATCH 1577/2522] Don't rewrite conditionals with empty branches into select. Executing an empty branch is faster than materializing the select. PiperOrigin-RevId: 323705674 Change-Id: Ibab748535d8dd136764f5b3bc30ca2097cf75151 --- .../xla/service/conditional_simplifier.cc | 50 +++++++++++++++++-- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/conditional_simplifier.cc b/tensorflow/compiler/xla/service/conditional_simplifier.cc index bb19a63a9ce..199bc787b83 100644 --- a/tensorflow/compiler/xla/service/conditional_simplifier.cc +++ b/tensorflow/compiler/xla/service/conditional_simplifier.cc @@ -41,6 +41,26 @@ limitations under the License. namespace xla { namespace { + +// A computation with array type that only contains parameters and tuples is +// considered emtpy. +bool ComputationIsEmptyWithArrayRoot(const HloComputation* computation) { + bool empty_operations = absl::c_all_of( + computation->MakeInstructionPostOrder(), [](const HloInstruction* inst) { + return inst->opcode() == HloOpcode::kTuple || + inst->opcode() == HloOpcode::kGetTupleElement || + inst->opcode() == HloOpcode::kParameter; + }); + bool contains_array = false; + ShapeUtil::ForEachSubshape(computation->root_instruction()->shape(), + [&](const Shape& shape, const ShapeIndex& index) { + if (shape.IsArray()) { + contains_array = true; + } + }); + return empty_operations && contains_array; +} + // Tries to replace a conditional with a call operation of the corresponding // computation. If the given conditional has a constant branch_index, tries to // replace it with a call to its corresponding branch computation and then @@ -124,7 +144,6 @@ StatusOr TryRemoveConditional(HloInstruction* conditional) { << conditional->ToShortString(); return false; } - HloInstruction* true_call_op = create_call(0); HloInstruction* false_call_op = create_call(1); auto condition_broadcast = [&](const Shape& shape) { @@ -140,6 +159,14 @@ StatusOr TryRemoveConditional(HloInstruction* conditional) { return computation->AddInstruction(HloInstruction::CreateGetTupleElement( hlo->shape().tuple_shapes(i), hlo, i)); }; + + bool branch_empty = + ComputationIsEmptyWithArrayRoot(conditional->branch_computation(0)) || + ComputationIsEmptyWithArrayRoot(conditional->branch_computation(1)); + // Empty branch is faster to execute than select. + if (branch_empty) { + return false; + } std::function select = [&](HloInstruction* t, HloInstruction* f) { if (f->shape().IsToken()) { @@ -559,6 +586,10 @@ StatusOr ConditionalSimplifier::Run(HloModule* module) { absl::flat_hash_set removed_conditionals; for (HloInstruction* conditional_op : conditional_ops) { + if (conditional_op->has_sharding()) { + // The code below doesn't handle sharding properly. + continue; + } changed |= MergeDuplicateTupleElements(conditional_op); changed |= RemoveUnusedTupleElements(conditional_op); changed |= ReplaceRootWithEmptyTupleIfNoUsers(conditional_op); @@ -573,18 +604,27 @@ StatusOr ConditionalSimplifier::Run(HloModule* module) { // lets collect them first. absl::flat_hash_map> calling_conditionals; + // Keys of calling_conditionals to get a deterministic ordering. + std::vector calling_computationals_vector; for (HloInstruction* conditional : conditional_ops) { if (removed_conditionals.contains(conditional)) { continue; } + for (int64 branch = 0; branch < conditional->branch_count(); ++branch) { - calling_conditionals[conditional->branch_computation(branch)].insert( - conditional); + auto* branch_comp = conditional->branch_computation(branch); + if (!calling_conditionals.contains(branch_comp)) { + calling_computationals_vector.push_back(branch_comp); + } + calling_conditionals[branch_comp].insert(conditional); } } - for (const auto& entry : calling_conditionals) { + + for (auto* comp : calling_computationals_vector) { + auto entry = calling_conditionals.find(comp); + CHECK(entry != calling_conditionals.end()); TF_ASSIGN_OR_RETURN(bool result, TryRemoveUnusedConditionalOperands( - entry.first, entry.second)); + entry->first, entry->second)); changed |= result; } From 4353b9cd4d2a31924e1bd47e2649378401f12929 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 28 Jul 2020 20:09:13 -0700 Subject: [PATCH 1578/2522] [TF2XLA] Enable using MLIR bridge when TF_XLA_FLAGS=--tf_mlir_enable_mlir_bridge is on for tf.function(compile=True) PiperOrigin-RevId: 323707882 Change-Id: I34a513fad8a5119b8a68180fc7277ff80fc6a555 --- .../compiler/jit/xla_kernel_creator_util.cc | 50 +++++++++---------- tensorflow/compiler/tests/BUILD | 8 +++ tensorflow/compiler/tf2xla/xla_compiler.cc | 16 +----- 3 files changed, 33 insertions(+), 41 deletions(-) diff --git a/tensorflow/compiler/jit/xla_kernel_creator_util.cc b/tensorflow/compiler/jit/xla_kernel_creator_util.cc index 61c89d8a67a..3cc68f2a1a4 100644 --- a/tensorflow/compiler/jit/xla_kernel_creator_util.cc +++ b/tensorflow/compiler/jit/xla_kernel_creator_util.cc @@ -80,35 +80,31 @@ Status CreateXlaKernel(FunctionLibraryRuntime* flr, const NodeDef& node_def, // Make sure that kernels have been registered on the JIT device. XlaOpRegistry::RegisterCompilationKernels(); - - // Only check for compilability if the MLIR bridge is not enabled. - if (!GetMlirCommonFlags()->tf_mlir_enable_mlir_bridge) { - RecursiveCompilabilityChecker::UncompilableNodesMap uncompilable_nodes_map; - if (!IsCompilable(flr, node_def, &uncompilable_nodes_map)) { - std::vector - uncompilable_node_info; - for (const auto& it : uncompilable_nodes_map) { - for (const auto& info : it.second.second) { - uncompilable_node_info.emplace_back(info); - } + RecursiveCompilabilityChecker::UncompilableNodesMap uncompilable_nodes_map; + if (!IsCompilable(flr, node_def, &uncompilable_nodes_map)) { + std::vector + uncompilable_node_info; + for (const auto& it : uncompilable_nodes_map) { + for (const auto& info : it.second.second) { + uncompilable_node_info.emplace_back(info); } - string message = absl::StrCat( - "Function invoked by the following node is not compilable: ", - SummarizeNodeDef(node_def, /*max_inputs_in_summary=*/10), ".\n"); - absl::StrAppend(&message, "Uncompilable nodes:"); - for (const auto& node_info : uncompilable_node_info) { - string node_message = absl::StrCat("\n", node_info.name, ": ", - node_info.uncompilable_reason, "\n", - "\tStacktrace:\n"); - for (const auto& stack_frame : node_info.stack_trace) { - absl::StrAppendFormat(&node_message, "\t\tNode: %s, function: %s\n", - stack_frame.name, stack_frame.function_name); - } - absl::StrAppend(&message, node_message); - } - VLOG(1) << message; - return errors::InvalidArgument(message); } + string message = absl::StrCat( + "Function invoked by the following node is not compilable: ", + SummarizeNodeDef(node_def, /*max_inputs_in_summary=*/10), ".\n"); + absl::StrAppend(&message, "Uncompilable nodes:"); + for (const auto& node_info : uncompilable_node_info) { + string node_message = + absl::StrCat("\n", node_info.name, ": ", + node_info.uncompilable_reason, "\n", "\tStacktrace:\n"); + for (const auto& stack_frame : node_info.stack_trace) { + absl::StrAppendFormat(&node_message, "\t\tNode: %s, function: %s\n", + stack_frame.name, stack_frame.function_name); + } + absl::StrAppend(&message, node_message); + } + VLOG(1) << message; + return errors::InvalidArgument(message); } // Get function body, constant args, and resource args. diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index c2b5000647d..d9450cb6364 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -123,6 +123,7 @@ tf_xla_py_test( name = "adagrad_da_test", size = "small", srcs = ["adagrad_da_test.py"], + enable_mlir_bridge = True, python_version = "PY3", tags = [ "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip @@ -160,6 +161,7 @@ tf_xla_py_test( srcs = ["add_n_test.py"], # TensorList ops are not implemented in the on-demand compilation model yet. disabled_backends = ["cpu_ondemand"], + enable_mlir_bridge = True, python_version = "PY3", tags = [ "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip @@ -687,6 +689,7 @@ tf_xla_py_test( name = "fft_test", size = "medium", srcs = ["fft_test.py"], + enable_mlir_bridge = True, python_version = "PY3", shard_count = 6, tags = [ @@ -926,6 +929,7 @@ tf_xla_py_test( name = "pooling_ops_test", size = "medium", srcs = ["pooling_ops_test.py"], + enable_mlir_bridge = True, python_version = "PY3", shard_count = 20, tags = [ @@ -1239,6 +1243,7 @@ tf_xla_py_test( name = "stack_ops_test", size = "small", srcs = ["stack_ops_test.py"], + enable_mlir_bridge = True, python_version = "PY3", tags = [ "config-cuda-only", @@ -1299,6 +1304,7 @@ tf_xla_py_test( srcs = ["tensor_array_ops_test.py"], # TensorArray ops are not implemented in the on-demand compilation model yet. disabled_backends = ["cpu_ondemand"], + enable_mlir_bridge = True, python_version = "PY3", tags = [ "config-cuda-only", @@ -1327,6 +1333,7 @@ tf_xla_py_test( srcs = ["tensor_list_ops_test.py"], # TensorList ops are not implemented in the on-demand compilation model yet. disabled_backends = ["cpu_ondemand"], + enable_mlir_bridge = True, python_version = "PY3", tags = [ "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip @@ -1882,6 +1889,7 @@ tf_xla_py_test( name = "special_math_test", size = "medium", srcs = ["special_math_test.py"], + enable_mlir_bridge = True, shard_count = 5, tags = [ "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index 0045a7958b4..db54f2f6563 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -23,7 +23,6 @@ limitations under the License. #include "tensorflow/compiler/jit/defs.h" #include "tensorflow/compiler/jit/flags.h" #include "tensorflow/compiler/jit/shape_inference.h" -#include "tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h" #include "tensorflow/compiler/tf2xla/graph_compiler.h" #include "tensorflow/compiler/tf2xla/rearrange_function_argument.h" #include "tensorflow/compiler/tf2xla/shape_util.h" @@ -53,7 +52,6 @@ limitations under the License. #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/protobuf/error_codes.pb.h" -#include "tensorflow/core/protobuf/graph_debug_info.pb.h" #include "tensorflow/core/util/dump_graph.h" namespace tensorflow { @@ -728,18 +726,8 @@ Status XlaCompiler::CompileFunction( } VLOG(1) << "===================================================="; - if (GetMlirCommonFlags()->tf_mlir_enable_mlir_bridge) { - VLOG(1) << "Using MLIR bridge"; - GraphDebugInfo debug_info; - TF_RETURN_IF_ERROR(CompileGraphToXlaHlo( - std::move(*graph), {args.data(), args.size()}, - options_.device_type.type_string(), options.use_tuple_arg, - *options_.flib_def, debug_info, options_.shape_representation_fn, - result)); - } else { - TF_RETURN_IF_ERROR( - CompileGraph(options, function_id, std::move(graph), args, result)); - } + TF_RETURN_IF_ERROR( + CompileGraph(options, function_id, std::move(graph), args, result)); VLOG(1) << "===================================================="; cache_[{function_id, arg_vector}] = *result; From d227b141aa2bf36d62f58c741afb267f55ee4512 Mon Sep 17 00:00:00 2001 From: Hye Soo Yang Date: Tue, 28 Jul 2020 20:51:23 -0700 Subject: [PATCH 1579/2522] Add `tf.image.stateless_random_flip_left_right` and `tf.image.stateless_random_flip_up_down` to tf.image ops. These functions are deterministic unlike the existing `tf.image_random_flip_*` functions which reuse same kernel if called with the same arguments and keep internal counter. The new functions guarantee the same results independent of how many times the they are called, and independent of global seed settings. PiperOrigin-RevId: 323711929 Change-Id: I2f972f40607a919ea10f747167b3505df3e9aad2 --- RELEASE.md | 6 + tensorflow/python/BUILD | 3 +- tensorflow/python/ops/image_ops_impl.py | 89 +++++++++++++-- tensorflow/python/ops/image_ops_test.py | 108 +++++++++++++++++- .../api/golden/v2/tensorflow.image.pbtxt | 8 ++ 5 files changed, 201 insertions(+), 13 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 02550040c47..0e5d7f11733 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -58,6 +58,12 @@ dataset when it is safe to do so. The optimization can be disabled via the `experimental_optimization.reorder_data_discarding_ops` dataset option. +* `tf.image`: + * Added `tf.image.stateless_random_flip_left_right` and + `tf.image.stateless_random_flip_up_down` functions that are deterministic. + That is, given the same seed, they produce the same results independent of + how many times the function is called, and independent of global seed + settings. * `tf.distribute`: * * `tf.keras`: diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 0c31a32531d..feadf029137 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -5066,7 +5066,7 @@ cuda_py_test( srcs = ["ops/image_ops_test.py"], data = ["//tensorflow/core:image_testdata"], python_version = "PY3", - shard_count = 5, + shard_count = 16, deps = [ ":array_ops", ":client", @@ -5083,6 +5083,7 @@ cuda_py_test( ":variables", "//tensorflow/core:protos_all_py", "//third_party/py/numpy", + "@absl_py//absl/testing:parameterized", ], ) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index ed5a27c0868..2155248e358 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import functools import numpy as np from tensorflow.python.compat import compat @@ -37,6 +38,7 @@ from tensorflow.python.ops import nn from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import sort_ops +from tensorflow.python.ops import stateless_random_ops from tensorflow.python.ops import string_ops from tensorflow.python.ops import variables from tensorflow.python.util import deprecation @@ -364,7 +366,8 @@ def random_flip_up_down(image, seed=None): Raises: ValueError: if the shape of `image` not supported. """ - return _random_flip(image, 0, seed, 'random_flip_up_down') + random_func = functools.partial(random_ops.random_uniform, seed=seed) + return _random_flip(image, 0, random_func, 'random_flip_up_down') @tf_export('image.random_flip_left_right') @@ -407,19 +410,86 @@ def random_flip_left_right(image, seed=None): Raises: ValueError: if the shape of `image` not supported. """ - return _random_flip(image, 1, seed, 'random_flip_left_right') + random_func = functools.partial(random_ops.random_uniform, seed=seed) + return _random_flip(image, 1, random_func, 'random_flip_left_right') -def _random_flip(image, flip_index, seed, scope_name): +@tf_export('image.stateless_random_flip_left_right', v1=[]) +@dispatch.add_dispatch_support +def stateless_random_flip_left_right(image, seed): + """Randomly flip an image horizontally (left to right) deterministically. + + Guarantees the same results given the same `seed` independent of how many + times the function is called, and independent of global seed settings (e.g. + `tf.random.set_seed`). + + Example usage: + + >>> import numpy as np + + >>> image = np.array([[[1], [2]], [[3], [4]]]) + >>> seed = (2, 3) + >>> tf.image.stateless_random_flip_left_right(image, seed).numpy().tolist() + [[[2], [1]], [[4], [3]]] + + Args: + image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor + of shape `[height, width, channels]`. + seed: A shape [2] Tensor, the seed to the random number generator. Must have + dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) + + Returns: + A tensor of the same type and shape as `image`. + """ + random_func = functools.partial( + stateless_random_ops.stateless_random_uniform, seed=seed) + return _random_flip( + image, 1, random_func, 'stateless_random_flip_left_right') + + +@tf_export('image.stateless_random_flip_up_down', v1=[]) +@dispatch.add_dispatch_support +def stateless_random_flip_up_down(image, seed): + """Randomly flip an image vertically (upside down) deterministically. + + Guarantees the same results given the same `seed` independent of how many + times the function is called, and independent of global seed settings (e.g. + `tf.random.set_seed`). + + Example usage: + + >>> import numpy as np + + >>> image = np.array([[[1], [2]], [[3], [4]]]) + >>> seed = (2, 3) + >>> tf.image.stateless_random_flip_up_down(image, seed).numpy().tolist() + [[[3], [4]], [[1], [2]]] + + Args: + image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor + of shape `[height, width, channels]`. + seed: A shape [2] Tensor, the seed to the random number generator. Must have + dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) + + Returns: + A tensor of the same type and shape as `image`. + """ + random_func = functools.partial( + stateless_random_ops.stateless_random_uniform, seed=seed) + return _random_flip( + image, 0, random_func, 'stateless_random_flip_up_down') + + +def _random_flip(image, flip_index, random_func, scope_name): """Randomly (50% chance) flip an image along axis `flip_index`. Args: image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor of shape `[height, width, channels]`. flip_index: Dimension along which to flip the image. - Vertical: 0, Horizontal: 1 - seed: A Python integer. Used to create a random seed. See - `tf.compat.v1.set_random_seed` for behavior. + Vertical is 0, Horizontal is 1. + random_func: partial function for calling either stateful or stateless + random ops with `seed` parameter specified. scope_name: Name of the scope in which the ops are added. Returns: @@ -434,7 +504,7 @@ def _random_flip(image, flip_index, seed, scope_name): shape = image.get_shape() def f_rank3(): - uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed) + uniform_random = random_func(shape=[], minval=0, maxval=1.0) mirror_cond = math_ops.less(uniform_random, .5) result = control_flow_ops.cond( mirror_cond, @@ -445,10 +515,7 @@ def _random_flip(image, flip_index, seed, scope_name): def f_rank4(): batch_size = array_ops.shape(image)[0] - uniform_random = random_ops.random_uniform([batch_size], - 0, - 1.0, - seed=seed) + uniform_random = random_func(shape=[batch_size], minval=0, maxval=1.0) flips = math_ops.round( array_ops.reshape(uniform_random, [batch_size, 1, 1, 1])) flips = math_ops.cast(flips, image.dtype) diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index cd753db2f0b..10d81c47874 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -25,6 +25,7 @@ import math import os import time +from absl.testing import parameterized import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin @@ -48,6 +49,7 @@ from tensorflow.python.ops import image_ops_impl from tensorflow.python.ops import io_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops +from tensorflow.python.ops import stateless_random_ops from tensorflow.python.ops import variables from tensorflow.python.platform import googletest from tensorflow.python.platform import test @@ -1001,7 +1003,8 @@ class AdjustSaturationTest(test_util.TensorFlowTestCase): self.assertAllClose(y_fused, y_baseline, rtol=2e-5, atol=1e-5) -class FlipTransposeRotateTest(test_util.TensorFlowTestCase): +class FlipTransposeRotateTest(test_util.TensorFlowTestCase, + parameterized.TestCase): def testInvolutionLeftRight(self): x_np = np.array([[1, 2, 3], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1]) @@ -1076,6 +1079,109 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): self.assertGreaterEqual(count_flipped, 20) self.assertGreaterEqual(count_unflipped, 20) + # TODO(b/162345082): stateless random op generates different random number + # with xla_gpu. Update tests such that there is a single ground truth result + # to test against. + @parameterized.named_parameters( + ("_RandomFlipLeftRight", image_ops.stateless_random_flip_left_right), + ("_RandomFlipUpDown", image_ops.stateless_random_flip_up_down), + ) + def testRandomFlipStateless(self, func): + with test_util.use_gpu(): + x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1]) + y_np = np.array([[3, 2, 1], [6, 5, 4]], dtype=np.uint8).reshape([2, 3, 1]) + if "RandomFlipUpDown" in self.id(): + y_np = np.array( + [[4, 5, 6], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1]) + + x_tf = constant_op.constant(x_np, shape=x_np.shape) + + iterations = 2 + flip_counts = [None for _ in range(iterations)] + flip_sequences = ["" for _ in range(iterations)] + test_seed = (1, 2) + split_seeds = stateless_random_ops.split(test_seed, 10) + seeds_list = self.evaluate(split_seeds) + for i in range(iterations): + count_flipped = 0 + count_unflipped = 0 + flip_seq = "" + for seed in seeds_list: + y_tf = func(x_tf, seed=seed) + y_tf_eval = self.evaluate(y_tf) + if y_tf_eval[0][0] == 1: + self.assertAllEqual(y_tf_eval, x_np) + count_unflipped += 1 + flip_seq += "U" + else: + self.assertAllEqual(y_tf_eval, y_np) + count_flipped += 1 + flip_seq += "F" + + flip_counts[i] = (count_flipped, count_unflipped) + flip_sequences[i] = flip_seq + + # Verify that results are deterministic. + for i in range(1, iterations): + self.assertAllEqual(flip_counts[0], flip_counts[i]) + self.assertAllEqual(flip_sequences[0], flip_sequences[i]) + + # TODO(b/162345082): stateless random op generates different random number + # with xla_gpu. Update tests such that there is a single ground truth result + # to test against. + @parameterized.named_parameters( + ("_RandomFlipLeftRight", image_ops.stateless_random_flip_left_right), + ("_RandomFlipUpDown", image_ops.stateless_random_flip_up_down) + ) + def testRandomFlipStatelessWithBatch(self, func): + with test_util.use_gpu(): + batch_size = 16 + + # create single item of test data + x_np_raw = np.array( + [[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([1, 2, 3, 1]) + y_np_raw = np.array( + [[3, 2, 1], [6, 5, 4]], dtype=np.uint8).reshape([1, 2, 3, 1]) + if "RandomFlipUpDown" in self.id(): + y_np_raw = np.array( + [[4, 5, 6], [1, 2, 3]], dtype=np.uint8).reshape([1, 2, 3, 1]) + + # create batched test data + x_np = np.vstack([x_np_raw for _ in range(batch_size)]) + y_np = np.vstack([y_np_raw for _ in range(batch_size)]) + + x_tf = constant_op.constant(x_np, shape=x_np.shape) + + iterations = 2 + flip_counts = [None for _ in range(iterations)] + flip_sequences = ["" for _ in range(iterations)] + test_seed = (1, 2) + split_seeds = stateless_random_ops.split(test_seed, 10) + seeds_list = self.evaluate(split_seeds) + for i in range(iterations): + count_flipped = 0 + count_unflipped = 0 + flip_seq = "" + for seed in seeds_list: + y_tf = func(x_tf, seed=seed) + y_tf_eval = self.evaluate(y_tf) + for j in range(batch_size): + if y_tf_eval[j][0][0] == 1: + self.assertAllEqual(y_tf_eval[j], x_np[j]) + count_unflipped += 1 + flip_seq += "U" + else: + self.assertAllEqual(y_tf_eval[j], y_np[j]) + count_flipped += 1 + flip_seq += "F" + + flip_counts[i] = (count_flipped, count_unflipped) + flip_sequences[i] = flip_seq + + for i in range(1, iterations): + self.assertAllEqual(flip_counts[0], flip_counts[i]) + self.assertAllEqual(flip_sequences[0], flip_sequences[i]) + @test_util.run_deprecated_v1 def testRandomFlipLeftRightWithBatch(self): batch_size = 16 diff --git a/tensorflow/tools/api/golden/v2/tensorflow.image.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.image.pbtxt index 3d01153895b..93bf9bd4e59 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.image.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.image.pbtxt @@ -232,6 +232,14 @@ tf_module { name: "ssim_multiscale" argspec: "args=[\'img1\', \'img2\', \'max_val\', \'power_factors\', \'filter_size\', \'filter_sigma\', \'k1\', \'k2\'], varargs=None, keywords=None, defaults=[\'(0.0448, 0.2856, 0.3001, 0.2363, 0.1333)\', \'11\', \'1.5\', \'0.01\', \'0.03\'], " } + member_method { + name: "stateless_random_flip_left_right" + argspec: "args=[\'image\', \'seed\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "stateless_random_flip_up_down" + argspec: "args=[\'image\', \'seed\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "total_variation" argspec: "args=[\'images\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " From 5a710936251a14adf28316d68f9be599aa27750f Mon Sep 17 00:00:00 2001 From: Gaurav Jain Date: Tue, 28 Jul 2020 21:52:27 -0700 Subject: [PATCH 1580/2522] Fix tf.bincount errors in eager mode PiperOrigin-RevId: 323718056 Change-Id: Ic454b2b76a313d68b18e22e705cd7cbe9ae107b7 --- tensorflow/core/kernels/bincount_op.cc | 10 ++++++-- .../python/kernel_tests/bincount_op_test.py | 24 ++++++++++--------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/kernels/bincount_op.cc b/tensorflow/core/kernels/bincount_op.cc index a84b25f2541..35911ee5d55 100644 --- a/tensorflow/core/kernels/bincount_op.cc +++ b/tensorflow/core/kernels/bincount_op.cc @@ -175,13 +175,15 @@ class BincountOp : public OpKernel { void Compute(OpKernelContext* ctx) override { const Tensor& arr_t = ctx->input(0); const Tensor& size_tensor = ctx->input(1); - const Tensor& weights_t = ctx->input(2); - + OP_REQUIRES(ctx, size_tensor.dims() == 0, + errors::InvalidArgument("Shape must be rank 0 but is rank ", + size_tensor.dims())); int32 size = size_tensor.scalar()(); OP_REQUIRES( ctx, size >= 0, errors::InvalidArgument("size (", size, ") must be non-negative")); + const Tensor& weights_t = ctx->input(2); const auto arr = arr_t.flat(); const auto weights = weights_t.flat(); Tensor* output_t; @@ -226,6 +228,10 @@ class DenseBincountOp : public OpKernel { void Compute(OpKernelContext* ctx) override { const Tensor& data = ctx->input(0); + OP_REQUIRES(ctx, data.dims() <= 2, + errors::InvalidArgument( + "Shape must be at most rank 2 but is rank ", data.dims())); + const Tensor& size_t = ctx->input(1); const Tensor& weights = ctx->input(2); diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py index efa68fd6521..133d33996f9 100644 --- a/tensorflow/python/kernel_tests/bincount_op_test.py +++ b/tensorflow/python/kernel_tests/bincount_op_test.py @@ -119,22 +119,24 @@ class BincountTest(test_util.TensorFlowTestCase): with self.assertRaises(errors.InvalidArgumentError): self.evaluate(bincount_ops.bincount([1, 2, 3, -1, 6, 8])) - @test_util.run_deprecated_v1 def test_shape_function(self): # size must be scalar. with self.assertRaisesRegex( - ValueError, "Shape must be rank 0 but is rank 1 for .*Bincount"): - gen_math_ops.bincount([1, 2, 3, -1, 6, 8], [1], []) + (ValueError, errors.InvalidArgumentError), + "Shape must be rank 0 but is rank 1 .*Bincount"): + gen_math_ops.bincount([1, 2, 3, 1, 6, 8], [1], []) # size must be positive. - with self.assertRaisesRegex(ValueError, "must be non-negative"): - gen_math_ops.bincount([1, 2, 3, -1, 6, 8], -5, []) + with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError), + "must be non-negative"): + gen_math_ops.bincount([1, 2, 3, 1, 6, 8], -5, []) # if size is a constant then the shape is known. - v1 = gen_math_ops.bincount([1, 2, 3, -1, 6, 8], 5, []) + v1 = gen_math_ops.bincount([1, 2, 3, 1, 6, 8], 5, []) self.assertAllEqual(v1.get_shape().as_list(), [5]) # if size is a placeholder then the shape is unknown. - s = array_ops.placeholder(dtype=dtypes.int32) - v2 = gen_math_ops.bincount([1, 2, 3, -1, 6, 8], s, []) - self.assertAllEqual(v2.get_shape().as_list(), [None]) + with ops.Graph().as_default(): + s = array_ops.placeholder(dtype=dtypes.int32) + v2 = gen_math_ops.bincount([1, 2, 3, 1, 6, 8], s, []) + self.assertAllEqual(v2.get_shape().as_list(), [None]) class BincountOpTest(test_util.TensorFlowTestCase, parameterized.TestCase): @@ -322,9 +324,9 @@ class BincountOpTest(test_util.TensorFlowTestCase, parameterized.TestCase): size = 10 self._test_bincount_col_binary(num_rows, num_cols, size, dtype) - @test_util.run_deprecated_v1 def test_invalid_rank(self): - with self.assertRaisesRegex(ValueError, "at most rank 2"): + with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError), + "at most rank 2"): with test_util.use_gpu(): self.evaluate( gen_math_ops.dense_bincount( From 5f21c0472e4405269d94ab8b295bf07a3267f992 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Tue, 28 Jul 2020 22:00:07 -0700 Subject: [PATCH 1581/2522] Make keras sequential work with tfnumpy.ndarray PiperOrigin-RevId: 323718791 Change-Id: I1000d893e92144136abcc93e5c2718f9835065a4 --- tensorflow/python/keras/engine/base_layer.py | 11 +++++--- tensorflow/python/keras/engine/sequential.py | 2 ++ .../python/ops/numpy_ops/np_interop_test.py | 27 +++++++++++++++++++ 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py index ac6c1a9a739..a63d499400a 100644 --- a/tensorflow/python/keras/engine/base_layer.py +++ b/tensorflow/python/keras/engine/base_layer.py @@ -71,6 +71,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables as tf_variables +from tensorflow.python.ops.numpy_ops import np_arrays from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.platform import tf_logging from tensorflow.python.training.tracking import base as trackable @@ -929,7 +930,8 @@ class Layer(module.Module, version_utils.LayerVersionSelector): call_context = base_layer_utils.call_context() # Accept NumPy and scalar inputs by converting to Tensors. - if any(isinstance(x, (np.ndarray, float, int)) for x in input_list): + if any(isinstance(x, ( + np_arrays.ndarray, np.ndarray, float, int)) for x in input_list): inputs = nest.map_structure(_convert_numpy_or_python_types, inputs) input_list = nest.flatten(inputs) @@ -997,12 +999,13 @@ class Layer(module.Module, version_utils.LayerVersionSelector): call_context = base_layer_utils.call_context() # Accept NumPy and scalar inputs by converting to Tensors. - if any(isinstance(x, (np.ndarray, float, int)) for x in input_list): + if any(isinstance(x, ( + np_arrays.ndarray, np.ndarray, float, int)) for x in input_list): def _convert_non_tensor(x): # Don't call `ops.convert_to_tensor_v2` on all `inputs` because # `SparseTensors` can't be converted to `Tensor`. - if isinstance(x, (np.ndarray, float, int)): + if isinstance(x, (np_arrays.ndarray, np.ndarray, float, int)): return ops.convert_to_tensor_v2(x) return x @@ -3242,7 +3245,7 @@ def _in_functional_construction_mode(layer, inputs, args, kwargs, input_list): def _convert_numpy_or_python_types(x): - if isinstance(x, (np.ndarray, float, int)): + if isinstance(x, (np_arrays.ndarray, np.ndarray, float, int)): return ops.convert_to_tensor_v2(x) return x diff --git a/tensorflow/python/keras/engine/sequential.py b/tensorflow/python/keras/engine/sequential.py index 979c1e47b2b..595757672ce 100644 --- a/tensorflow/python/keras/engine/sequential.py +++ b/tensorflow/python/keras/engine/sequential.py @@ -520,6 +520,8 @@ class Sequential(functional.Functional): def _get_shape_tuple(t): if hasattr(t, 'shape'): shape = t.shape + if isinstance(shape, tuple): + return shape if shape.rank is not None: return tuple(shape.as_list()) return None diff --git a/tensorflow/python/ops/numpy_ops/np_interop_test.py b/tensorflow/python/ops/numpy_ops/np_interop_test.py index 9074f377d0c..c0cd8e8d141 100644 --- a/tensorflow/python/ops/numpy_ops/np_interop_test.py +++ b/tensorflow/python/ops/numpy_ops/np_interop_test.py @@ -273,6 +273,33 @@ class InteropTest(tf.test.TestCase): self.assertIsInstance(result, np.ndarray) self.assertAllClose(result, onp.square(values)) + def testKerasInteropSequential(self): + class ProjectionLayer(tf.keras.layers.Layer): + """Linear projection layer using TF NumPy.""" + + def __init__(self, units): + super(ProjectionLayer, self).__init__() + self._units = units + + def build(self, input_shape): + stddev = np.sqrt(self._units).astype(np.float32) + initial_value = np.random.randn(input_shape[1], self._units).astype( + np.float32) / stddev + # Note that TF NumPy can interoperate with tf.Variable. + self.w = tf.Variable(initial_value, trainable=True) + + def call(self, inputs): + return np.matmul(inputs, self.w) + + model = tf.keras.Sequential( + [tf.keras.layers.Dense(100), ProjectionLayer(2)]) + output = model.call(np.random.randn(10, 100)) + + self.assertIsInstance(output, np.ndarray) + + dense_layer = tf.keras.layers.Dense(100) + output = dense_layer(np.random.randn(10, 100)) + def testPForInterop(self): def outer_product(a): return np.tensordot(a, a, 0) From 78a7560b41d4359fc449967a5a232be9960bfb65 Mon Sep 17 00:00:00 2001 From: Hye Soo Yang Date: Tue, 28 Jul 2020 22:07:52 -0700 Subject: [PATCH 1582/2522] Remove `import numpy as np` from example docstrings. PiperOrigin-RevId: 323719817 Change-Id: Ie0c00c2f3f0524c32906576957de1df025298975 --- tensorflow/python/ops/image_ops_impl.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 2155248e358..7a9342762e6 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -340,7 +340,6 @@ def random_flip_up_down(image, seed=None): Example usage: - >>> import numpy as np >>> image = np.array([[[1], [2]], [[3], [4]]]) >>> tf.image.random_flip_up_down(image, 3).numpy().tolist() [[[3], [4]], [[1], [2]]] @@ -382,8 +381,6 @@ def random_flip_left_right(image, seed=None): Example usage: - >>> import numpy as np - >>> image = np.array([[[1], [2]], [[3], [4]]]) >>> tf.image.random_flip_left_right(image, 5).numpy().tolist() [[[2], [1]], [[4], [3]]] @@ -425,8 +422,6 @@ def stateless_random_flip_left_right(image, seed): Example usage: - >>> import numpy as np - >>> image = np.array([[[1], [2]], [[3], [4]]]) >>> seed = (2, 3) >>> tf.image.stateless_random_flip_left_right(image, seed).numpy().tolist() @@ -458,8 +453,6 @@ def stateless_random_flip_up_down(image, seed): Example usage: - >>> import numpy as np - >>> image = np.array([[[1], [2]], [[3], [4]]]) >>> seed = (2, 3) >>> tf.image.stateless_random_flip_up_down(image, seed).numpy().tolist() From 3484f3cee456eaa4da3e17521768be2d4f70f31b Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Tue, 28 Jul 2020 22:31:24 -0700 Subject: [PATCH 1583/2522] Fix @test_util.run_deprecated_v1 in third_party/tensorflow/python/kernel_tests/cast_op_test.py PiperOrigin-RevId: 323721922 Change-Id: I673451d1ba0e5df48a180fc74bacf5726de66ac3 --- tensorflow/python/kernel_tests/cast_op_test.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/kernel_tests/cast_op_test.py b/tensorflow/python/kernel_tests/cast_op_test.py index 3149aaadf57..7f5474ccf19 100644 --- a/tensorflow/python/kernel_tests/cast_op_test.py +++ b/tensorflow/python/kernel_tests/cast_op_test.py @@ -25,7 +25,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gradient_checker +from tensorflow.python.ops import gradient_checker_v2 from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -168,17 +168,19 @@ class CastOpTest(test.TestCase): self.evaluate(variables.global_variables_initializer()) self.assertEqual(1.0, self.evaluate(cast)) - @test_util.run_deprecated_v1 def testGradients(self): t = [dtypes.float32, dtypes.float64, dtypes.complex64, dtypes.complex128] for src_t in t: for dst_t in t: with self.cached_session(): x = constant_op.constant(1.0, src_t) - z = array_ops.identity(x) - y = math_ops.cast(z, dst_t) - err = gradient_checker.compute_gradient_error(x, [], y, []) - self.assertLess(err, 1e-3) + + def cast(x, dst_t=dst_t): + x = array_ops.identity(x) + x = math_ops.cast(x, dst_t) + return x + + gradient_checker_v2.compute_gradient(cast, [x]) class SparseTensorCastTest(test.TestCase): From f802e37f2a559d9dace8d116e5f71b6917d58295 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Tue, 28 Jul 2020 22:59:06 -0700 Subject: [PATCH 1584/2522] Fix compilation errors with TensorFlow TPU support by adding relevant headers PiperOrigin-RevId: 323724718 Change-Id: I73f44619643f8ef2010e4b775e796c1a48d66e21 --- tensorflow/core/tpu/kernels/BUILD | 4 ++++ .../core/tpu/kernels/tpu_configuration_ops.cc | 13 +++++++++++++ .../core/tpu/kernels/tpu_program_group_interface.h | 2 ++ 3 files changed, 19 insertions(+) diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 06beab6b016..ed3227065ca 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -93,6 +93,8 @@ tf_kernel_library( deps = [ ":tpu_compilation_cache_factory", ":tpu_compilation_cache_interface", + ":tpu_compilation_cache_local_lookup", + ":tpu_compilation_cache_lookup", ":tpu_mesh_state_interface", ":tpu_op_consts", "//tensorflow/c:tf_status", @@ -287,6 +289,8 @@ cc_library( "//tensorflow/compiler/tf2xla:host_compute_metadata_proto_cc", "//tensorflow/compiler/xla/service:hlo_proto_cc", "//tensorflow/core/lib/core:status", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:span", ], ) diff --git a/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc b/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc index e098dbd682c..5a8c283c7c2 100644 --- a/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc +++ b/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc @@ -25,6 +25,8 @@ limitations under the License. #include "tensorflow/core/platform/refcount.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h" #include "tensorflow/core/tpu/kernels/tpu_mesh_state_interface.h" #include "tensorflow/core/tpu/kernels/tpu_op_consts.h" #include "tensorflow/core/tpu/tpu_api.h" @@ -253,6 +255,10 @@ void InitializeHostForDistributedTpuOp::Compute(OpKernelContext* ctx) { mesh_state_interface)); } + VLOG(1) << "Removing existing proto compilation cache lookup if it exists"; + OP_REQUIRES_OK(ctx, DeleteIfExists( + rmgr, tpu::kCompiledProtoCacheResourceName)); + if (enable_whole_mesh_compilations_) { // If this is a whole mesh compilation mode, create the compilation cache, // if missing. @@ -276,6 +282,13 @@ void InitializeHostForDistributedTpuOp::Compute(OpKernelContext* ctx) { if (local_compilation_cache != nullptr) { local_compilation_cache->Unref(); + + tpu::TpuCompilationCacheLookup* proto_lookup; + proto_lookup = + new tpu::TpuCompilationCacheLocalLookup(local_compilation_cache); + OP_REQUIRES_OK( + ctx, rmgr->Create(rmgr->default_container(), + tpu::kCompiledProtoCacheResourceName, proto_lookup)); } Tensor* ctx_output; diff --git a/tensorflow/core/tpu/kernels/tpu_program_group_interface.h b/tensorflow/core/tpu/kernels/tpu_program_group_interface.h index cb7347783b1..9a2bf8d9f02 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_group_interface.h +++ b/tensorflow/core/tpu/kernels/tpu_program_group_interface.h @@ -20,6 +20,8 @@ limitations under the License. #include #include +#include "absl/time/time.h" +#include "absl/types/span.h" #include "tensorflow/compiler/tf2xla/host_compute_metadata.pb.h" #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/core/lib/core/status.h" From fba496dd121dea0c5d05223313dbd04b1059a7b3 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Tue, 28 Jul 2020 23:45:07 -0700 Subject: [PATCH 1585/2522] Disabling the single test for xla since in XLA gather doesn't error out. PiperOrigin-RevId: 323728896 Change-Id: If880f1c27014c7c57073f5b9b14112ec18e7c547 --- tensorflow/python/kernel_tests/array_ops/BUILD | 1 - .../python/kernel_tests/array_ops/batch_gather_op_test.py | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/array_ops/BUILD b/tensorflow/python/kernel_tests/array_ops/BUILD index e78f14d8ead..6086cfcf449 100644 --- a/tensorflow/python/kernel_tests/array_ops/BUILD +++ b/tensorflow/python/kernel_tests/array_ops/BUILD @@ -10,7 +10,6 @@ package( cuda_py_test( name = "batch_gather_op_test", srcs = ["batch_gather_op_test.py"], - xla_enable_strict_auto_jit = False, # b/162351094 deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", diff --git a/tensorflow/python/kernel_tests/array_ops/batch_gather_op_test.py b/tensorflow/python/kernel_tests/array_ops/batch_gather_op_test.py index bd5645b8168..e41053b3182 100644 --- a/tensorflow/python/kernel_tests/array_ops/batch_gather_op_test.py +++ b/tensorflow/python/kernel_tests/array_ops/batch_gather_op_test.py @@ -24,6 +24,7 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.platform import test @@ -104,6 +105,7 @@ class GatherTest(test.TestCase, parameterized.TestCase): gather_t = array_ops.batch_gather(params, indices) self.assertEqual([1, None], gather_t.get_shape().as_list()) + @test_util.disable_xla("Cannot force cpu placement for xla_gpu test") def testBadIndicesCPU(self): with ops.device_v2("cpu:0"): params = [[0, 1, 2], [3, 4, 5]] From a95897414e0e1e8cc22a5f10dfb71840edb1160c Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Wed, 29 Jul 2020 00:10:12 -0700 Subject: [PATCH 1586/2522] Enable mlir generated GPU kernels by default for cuda builds. PiperOrigin-RevId: 323731317 Change-Id: Icb7e52396d6634683fa23f97a345860af699a262 --- .bazelrc | 5 +++++ tensorflow/core/kernels/mlir_generated/BUILD | 4 ++-- tensorflow/core/kernels/mlir_generated/build_defs.bzl | 4 ++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.bazelrc b/.bazelrc index 6a448b267e0..73152ba3c5c 100644 --- a/.bazelrc +++ b/.bazelrc @@ -173,6 +173,11 @@ build:using_cuda --define=using_cuda=true build:using_cuda --action_env TF_NEED_CUDA=1 build:using_cuda --crosstool_top=@local_config_cuda//crosstool:toolchain +# Enable the mlir generated GPU kernels only for cuda builds. +build --define=tensorflow_enable_mlir_generated_kernels=0 +# This is a more specific option, so it takes precedence over the line above for cuda builds. +build:using_cuda --define=tensorflow_enable_mlir_generated_kernels=1 + # This config refers to building CUDA op kernels with nvcc. build:cuda --config=using_cuda build:cuda --define=using_cuda_nvcc=true diff --git a/tensorflow/core/kernels/mlir_generated/BUILD b/tensorflow/core/kernels/mlir_generated/BUILD index 79ccda50c87..9f3efe9d972 100644 --- a/tensorflow/core/kernels/mlir_generated/BUILD +++ b/tensorflow/core/kernels/mlir_generated/BUILD @@ -18,9 +18,9 @@ package( ) config_setting( - name = "mlir_generated_gpu_kernels_enabled", + name = "mlir_generated_gpu_kernels_disabled", define_values = { - "tensorflow_enable_mlir_generated_gpu_kernels": "1", + "tensorflow_enable_mlir_generated_gpu_kernels": "0", }, ) diff --git a/tensorflow/core/kernels/mlir_generated/build_defs.bzl b/tensorflow/core/kernels/mlir_generated/build_defs.bzl index 3426aba94a4..2bf6e8fa3bb 100644 --- a/tensorflow/core/kernels/mlir_generated/build_defs.bzl +++ b/tensorflow/core/kernels/mlir_generated/build_defs.bzl @@ -4,8 +4,8 @@ load("@local_config_cuda//cuda:build_defs.bzl", "cuda_gpu_architectures", "if_cu def if_mlir_generated_gpu_kernels_enabled(if_true, if_false = []): return select({ - "//tensorflow/core/kernels/mlir_generated:mlir_generated_gpu_kernels_enabled": if_true, - "//conditions:default": if_false, + "//tensorflow/core/kernels/mlir_generated:mlir_generated_gpu_kernels_disabled": if_false, + "//conditions:default": if_true, }) def _lookup_file(filegroup, path): From 7b090a4e269440447c0d76f71249183fce6d8b2a Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Wed, 29 Jul 2020 01:45:43 -0700 Subject: [PATCH 1587/2522] Enable mlir generated GPU kernels by default for cuda builds. PiperOrigin-RevId: 323742641 Change-Id: I97f7cc8743eafe85fbbb26672131df0cb4388c52 --- .bazelrc | 5 ----- tensorflow/core/kernels/mlir_generated/BUILD | 4 ++-- tensorflow/core/kernels/mlir_generated/build_defs.bzl | 4 ++-- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/.bazelrc b/.bazelrc index 73152ba3c5c..6a448b267e0 100644 --- a/.bazelrc +++ b/.bazelrc @@ -173,11 +173,6 @@ build:using_cuda --define=using_cuda=true build:using_cuda --action_env TF_NEED_CUDA=1 build:using_cuda --crosstool_top=@local_config_cuda//crosstool:toolchain -# Enable the mlir generated GPU kernels only for cuda builds. -build --define=tensorflow_enable_mlir_generated_kernels=0 -# This is a more specific option, so it takes precedence over the line above for cuda builds. -build:using_cuda --define=tensorflow_enable_mlir_generated_kernels=1 - # This config refers to building CUDA op kernels with nvcc. build:cuda --config=using_cuda build:cuda --define=using_cuda_nvcc=true diff --git a/tensorflow/core/kernels/mlir_generated/BUILD b/tensorflow/core/kernels/mlir_generated/BUILD index 9f3efe9d972..79ccda50c87 100644 --- a/tensorflow/core/kernels/mlir_generated/BUILD +++ b/tensorflow/core/kernels/mlir_generated/BUILD @@ -18,9 +18,9 @@ package( ) config_setting( - name = "mlir_generated_gpu_kernels_disabled", + name = "mlir_generated_gpu_kernels_enabled", define_values = { - "tensorflow_enable_mlir_generated_gpu_kernels": "0", + "tensorflow_enable_mlir_generated_gpu_kernels": "1", }, ) diff --git a/tensorflow/core/kernels/mlir_generated/build_defs.bzl b/tensorflow/core/kernels/mlir_generated/build_defs.bzl index 2bf6e8fa3bb..3426aba94a4 100644 --- a/tensorflow/core/kernels/mlir_generated/build_defs.bzl +++ b/tensorflow/core/kernels/mlir_generated/build_defs.bzl @@ -4,8 +4,8 @@ load("@local_config_cuda//cuda:build_defs.bzl", "cuda_gpu_architectures", "if_cu def if_mlir_generated_gpu_kernels_enabled(if_true, if_false = []): return select({ - "//tensorflow/core/kernels/mlir_generated:mlir_generated_gpu_kernels_disabled": if_false, - "//conditions:default": if_true, + "//tensorflow/core/kernels/mlir_generated:mlir_generated_gpu_kernels_enabled": if_true, + "//conditions:default": if_false, }) def _lookup_file(filegroup, path): From 3bdc374cb0eb8592ee808ad4c4401b09aad19d54 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Jul 2020 02:01:39 -0700 Subject: [PATCH 1588/2522] Update GraphDef version to 477. PiperOrigin-RevId: 323744249 Change-Id: I7e4a915f2ed632c88c476aaf243d137bd736a695 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 47c32ac04c6..bfe008bb87f 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 476 // Updated: 2020/7/28 +#define TF_GRAPH_DEF_VERSION 477 // Updated: 2020/7/29 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 39cf466ff9f364ae4d88e94491eee73d33a2cf7b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Jul 2020 02:01:39 -0700 Subject: [PATCH 1589/2522] compat: Update forward compatibility horizon to 2020-07-29 PiperOrigin-RevId: 323744251 Change-Id: I95815ddcdbc021ad052114f0181dbafb0fc0ea2c --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 9790cf62096..c8f0b19eb0d 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 28) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 29) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 8a0e1711f4def103d1c06036c27701213262f0a1 Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Wed, 29 Jul 2020 02:20:16 -0700 Subject: [PATCH 1590/2522] Save a non-distributed model correctly We used to save in either the default replica context or the cross replica context. In either the variables behave in the desired way, which is as if there's no distribute strategy. This change make this behavior explicitly implemented. Note that now you will be able to optionally save a distributed version of the model by setting experimental_variable_policy to EXPAND_DISTRIBUTED_VARIABLES in SaveOptions. This change is somewhat messy due to the ongoing refactoring to DistributedVariable, but the fix is important and we can clean up later. PiperOrigin-RevId: 323746817 Change-Id: I5ec5db232d86be97c93a2d54c9d3b1ceb344b3df --- tensorflow/python/distribute/BUILD | 9 + .../integration_test/saved_model_test.py | 163 +++++++------- tensorflow/python/distribute/tpu_values.py | 17 ++ tensorflow/python/distribute/values.py | 78 ++++++- tensorflow/python/distribute/values_test.py | 204 ++++++++++++++++++ tensorflow/python/distribute/values_util.py | 19 ++ tensorflow/python/saved_model/save_test.py | 3 +- 7 files changed, 415 insertions(+), 78 deletions(-) diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index a3279e84ac2..947ec987de1 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -794,9 +794,13 @@ py_library( deps = [ ":distribute_lib", ":reduce_util", + "//tensorflow/python:control_flow_ops", "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", "//tensorflow/python:tensor_util", "//tensorflow/python:variable_scope", + "//tensorflow/python/saved_model:save_context", + "//tensorflow/python/saved_model:save_options", ], ) @@ -806,6 +810,7 @@ py_library( deps = [ ":packed_distributed_variable", ":values", + ":values_util", "//tensorflow/python:framework_ops", "//tensorflow/python:math_ops", "//tensorflow/python:resource_variable_ops_gen", @@ -1165,6 +1170,7 @@ distribute_py_test( ":distribute_lib", ":distribute_utils", ":packed_distributed_variable", + ":parameter_server_strategy", ":strategy_combinations", ":test_util", ":tpu_strategy", @@ -1172,6 +1178,7 @@ distribute_py_test( ":values", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", + "//tensorflow/python:check_ops", "//tensorflow/python:constant_op", "//tensorflow/python:control_flow_ops", "//tensorflow/python:dtypes", @@ -1183,6 +1190,7 @@ distribute_py_test( "//tensorflow/python:saver", "//tensorflow/python:sparse_ops", "//tensorflow/python:sparse_tensor", + "//tensorflow/python:tensor_shape", "//tensorflow/python:tensor_spec", "//tensorflow/python:tf2", "//tensorflow/python:training", @@ -1195,6 +1203,7 @@ distribute_py_test( "//tensorflow/python/eager:test", "//tensorflow/python/saved_model:save_context", "//tensorflow/python/saved_model:save_options", + "//tensorflow/python/saved_model/model_utils:mode_keys", "//tensorflow/python/tpu:tpu_lib", "//tensorflow/python/types", "@absl_py//absl/testing:parameterized", diff --git a/tensorflow/python/distribute/integration_test/saved_model_test.py b/tensorflow/python/distribute/integration_test/saved_model_test.py index 60de590bb48..4455e1f79f5 100644 --- a/tensorflow/python/distribute/integration_test/saved_model_test.py +++ b/tensorflow/python/distribute/integration_test/saved_model_test.py @@ -68,25 +68,9 @@ class SaveAndLoadForServingTest(test.TestCase, parameterized.TestCase): # context and the cross-replica context. Saving happens in the cross replica # context or the default startegy's replica context. - def test_read_sync_on_read_variable_broken(self, strategy): + def test_read_sync_on_read_variable(self, strategy): # synchronizaiton=ON_READ variables are typically used in Keras metrics and # batch norm layers. - # - # This is broken now since the saved variable already has the aggregated - # value, but the saved tf.function is traced under the cross-replica context - # and contains the aggregation. - # - # Impacts: - # - MirroredStrategy, TPUStrategy - # - aggregation=NONE: error when saving. - # - aggregation=SUM: incorrect results. - # - aggregation=MEAN: slight computation overhead. - # - aggregation=ONLY_FIRST_REPLICA: none. - # - MultiWorkerMirroredStrategy: - # - aggregation=NONE: error when saving - # - aggregation=MEAN, SUM: error or hanging when using the loaded model. - # - aggregation=ONLY_FIRST_REPLICA: none. - # Note that batch norm uses aggregation=MEAN. class Model(tf.Module): @@ -113,9 +97,7 @@ class SaveAndLoadForServingTest(test.TestCase, parameterized.TestCase): loaded = tf.saved_model.load(export_dir) # The variable already has the aggregated value. self.assertEqual(self.evaluate(loaded.v.read_value()), 1.) - # TODO(b/159752793): reading the variable aggregates the values again. - # got 2., want 1. - self.assertEqual(self.evaluate(loaded()), 2.) + self.assertEqual(self.evaluate(loaded()), 1.) def test_read_mirrored_variable(self, strategy): # synchronizaiton=ON_WRITE is the default variable created under @@ -142,14 +124,10 @@ class SaveAndLoadForServingTest(test.TestCase, parameterized.TestCase): loaded = tf.saved_model.load(export_dir) self.assertEqual(self.evaluate(loaded()), 1.) - def test_update_sync_on_read_variable_broken(self, strategy): + def test_update_sync_on_read_variable(self, strategy): # It's rare to update aggregation=ON_READ variables in serving, but it's # possible that the SavedModel contains both serving and training graphs, # and the training may contain metrics layers. - # - # This is now partially broken since assign_add() and assign_sub() are not - # allowed in the cross-replica context if aggregation=SUM, which blocks - # saving the model. class Model(tf.Module): @@ -167,23 +145,15 @@ class SaveAndLoadForServingTest(test.TestCase, parameterized.TestCase): export_dir = self.get_temp_dir() with strategy.scope(): m = Model() - # got error, want no error. - with self.assertRaisesRegex(ValueError, - "SyncOnReadVariable does not support"): - tf.saved_model.save(m, export_dir) + tf.saved_model.save(m, export_dir) - # TODO(b/159752793): Uncomment after fix. - # loaded = tf.saved_model.load(export_dir) - # loaded.update() - # self.assertEqual(self.evaluate(loaded.v), 1.) + loaded = tf.saved_model.load(export_dir) + loaded.update() + self.assertEqual(self.evaluate(loaded.v), 1.) - def test_update_mirrored_variable_broken(self, strategy): + def test_update_mirrored_variable(self, strategy): # It's very rare to update aggregation=ON_WRITE variables in the forward # path, and this test case is mainly for completeness. - # - # The saved tf.function updates each components of the distributed variable, - # which effectively updates the variable in the saved model N times where N - # equals the number of local replicas during training. class Model(tf.Module): @@ -205,9 +175,7 @@ class SaveAndLoadForServingTest(test.TestCase, parameterized.TestCase): loaded = tf.saved_model.load(export_dir) self.assertEqual(self.evaluate(loaded.v), 0.) loaded.update() - # TODO(b/159752793): Change after fix. - # got 2., want 1. - self.assertEqual(self.evaluate(loaded.v), 2.) + self.assertEqual(self.evaluate(loaded.v), 1.) def test_training_only_device(self, strategy): # tf.distribute APIs may enter device scopes, but the saved model should not @@ -314,12 +282,14 @@ class SaveAndLoadForTrainingTest(test.TestCase, parameterized.TestCase): # can workaround most issues since Keras loader restructs the layers with # saved configs if possible, in which case the saved graph is not used. - def test_read_sync_on_read_variable_broken(self, strategy): - # Reading a synchronizaiton=ON_READ in the replica context should only read - # the local value, however with a loaded model, reading in the replica - # context triggers aggregation as well. While one may argue the behavior is - # desirable, note that aggregation can cause hanging if the originall model - # is trained with MultiWorkerMirroredStrategy. + def test_read_sync_on_read_variable(self, strategy): + # Reading a synchronizaiton=ON_READ in the replica context should just read + # the local value. Reading it in the cross replica context aggregates the + # value from all replicas. Both are true with a loaded model. + # + # Note that if aggregation=SUM, the value of each replica is the saved value + # divided by the number of replicas. In this way if you load a model and + # save it again, the values of the variables don't change. class Model(tf.Module): @@ -334,26 +304,45 @@ class SaveAndLoadForTrainingTest(test.TestCase, parameterized.TestCase): return self.v.read_value() export_dir = self.get_temp_dir() + value = strategy.experimental_distribute_values_from_function( + lambda ctx: tf.identity([3., 7.][ctx.replica_id_in_sync_group])) with strategy.scope(): m = Model() - m.v.assign(1.) + strategy.run(m.v.assign, args=(value,)) self.assertAllEqual( - self.evaluate(strategy.experimental_local_results(m.v)), [0.5, 0.5]) + self.evaluate(strategy.experimental_local_results(m.v)), [3., 7.]) + self.assertEqual(self.evaluate(m.v.read_value()), 10.) tf.saved_model.save(m, export_dir) + del m with strategy.scope(): loaded = tf.saved_model.load(export_dir) - # After loading, reading in the replica context is the same as reading in - # the cross-replica context. - # TODO(b/159752793): change after fix. + # It's intended that we don't save the each replica, but just the aggregated + # value. self.assertAllEqual( self.evaluate( strategy.experimental_local_results(strategy.run(loaded))), - [1., 1.]) - self.assertEqual(self.evaluate(loaded.v.read_value()), 1.) + [5., 5.]) + self.assertEqual(self.evaluate(loaded.v.read_value()), 10.) - def test_update_sync_on_read_variable_broken(self, strategy): - # Can't even save. + # save and load again. + export_dir2 = self.get_temp_dir() + tf.saved_model.save(loaded, export_dir2) + # loaded.v.read_value() is still 1., both with and without strategy. + loaded = tf.saved_model.load(export_dir2) + self.assertEqual(self.evaluate(loaded.v.read_value()), 10.) + with strategy.scope(): + loaded = tf.saved_model.load(export_dir2) + self.assertEqual(self.evaluate(loaded.v.read_value()), 10.) + + def test_update_sync_on_read_variable(self, strategy): + # Updating a synchronizaiton=ON_READ in the replica context should just + # update the local value. Updating it in the cross replica context updates + # each component of the variable. Both are true with a loaded model. + # + # Note that if assigning a variable whose aggregation=SUM in the cross + # replica context, each replica is assigned with the value divided by the + # number of replicas. class Model(tf.Module): @@ -363,19 +352,36 @@ class SaveAndLoadForTrainingTest(test.TestCase, parameterized.TestCase): synchronization=tf.VariableSynchronization.ON_READ, aggregation=tf.VariableAggregation.SUM) - @tf.function(input_signature=[tf.TensorSpec(shape=[1], dtype=tf.float32)]) + @tf.function(input_signature=[tf.TensorSpec(shape=(), dtype=tf.float32)]) def update(self, value): self.v.assign_add(value) export_dir = self.get_temp_dir() + value = strategy.experimental_distribute_values_from_function( + lambda ctx: tf.identity([3., 7.][ctx.replica_id_in_sync_group])) with strategy.scope(): m = Model() - # got error, want no error. - with self.assertRaisesRegex(ValueError, - "SyncOnReadVariable does not support"): - tf.saved_model.save(m, export_dir) + tf.saved_model.save(m, export_dir) + self.evaluate(m.v.assign(10.)) + self.assertAllEqual( + self.evaluate(strategy.experimental_local_results(m.v)), [5., 5.]) + del m + # TODO(b/161488560): strategy.run doesn't work with tf.function with + # input_signature. + # self.evaluate(strategy.run(m.update, args=(value,))) + # self.assertAllEqual( + # self.evaluate(strategy.experimental_local_results(m.v)), [8., 12.]) - # TODO(b/159752793): Complete the test after the saving issue is fixed. + with strategy.scope(): + loaded = tf.saved_model.load(export_dir) + self.evaluate(loaded.v.assign(10.)) + self.assertAllEqual( + self.evaluate(strategy.experimental_local_results(loaded.v)), + [5., 5.]) + self.evaluate(strategy.run(loaded.update, args=(value,))) + self.assertAllEqual( + self.evaluate(strategy.experimental_local_results(loaded.v)), + [8., 12.]) def test_read_mirrored_variable(self, strategy): @@ -402,13 +408,18 @@ class SaveAndLoadForTrainingTest(test.TestCase, parameterized.TestCase): strategy.experimental_local_results(strategy.run(loaded))), [1., 1.]) - def test_update_mirrored_variable_broken(self, strategy): + def test_update_mirrored_variable(self, strategy): # This is also uncommon since most model parameters should be updated by # optimizer, and this test case is for completeness. # - # It's broken the saved model may not contain the aggregation logic. Even if - # it does, it's wrong since all inputs to the aggregation are the same - # variable. + # In the cross replica context, assigning to the variable assigns the same + # value to all replicas. This is true with the loaded model as well. + # + # However in replica context, MirroredVariable (synchronization=ON_WRITE) + # in a loaded model behaves differently. Updating MirroredVariable only + # update the current replica's variable with the current replica's value. + # There's no aggregation. This doesn't affect variables that are updated + # through optimizer. This is work as intended but can be surprising. class Model(tf.Module): @@ -418,24 +429,28 @@ class SaveAndLoadForTrainingTest(test.TestCase, parameterized.TestCase): synchronization=tf.VariableSynchronization.ON_WRITE, aggregation=tf.VariableAggregation.MEAN) - @tf.function(input_signature=[tf.TensorSpec(shape=[1], dtype=tf.float32)]) + @tf.function(input_signature=[tf.TensorSpec(shape=(), dtype=tf.float32)]) def update(self, value): - self.v.assign_add(value[0]) + self.v.assign_add(value) export_dir = self.get_temp_dir() + value = strategy.experimental_distribute_values_from_function( + lambda ctx: tf.identity([1., 2.][ctx.replica_id_in_sync_group])) with strategy.scope(): m = Model() tf.saved_model.save(m, export_dir) + del m with strategy.scope(): loaded = tf.saved_model.load(export_dir) - value = strategy.experimental_distribute_dataset( - tf.data.Dataset.from_tensor_slices([1., 2.]).batch(2)) - strategy.run(loaded.update, args=(next(iter(value)),)) - # TODO(b/159752793): Change after fix. - # got [2., 4.], want [1.5, 1.5]. self.assertAllEqual( - self.evaluate(strategy.experimental_local_results(loaded.v)), [2., 4.]) + self.evaluate(strategy.experimental_local_results(loaded.v)), [0., 0.]) + self.evaluate(loaded.v.assign(1.)) + self.assertAllEqual( + self.evaluate(strategy.experimental_local_results(loaded.v)), [1., 1.]) + strategy.run(loaded.update, args=(value,)) + self.assertAllEqual( + self.evaluate(strategy.experimental_local_results(loaded.v)), [2., 3.]) # TODO(crccw): add a test case that trains a saved model with optimizer. diff --git a/tensorflow/python/distribute/tpu_values.py b/tensorflow/python/distribute/tpu_values.py index ce6d2e7029b..901b906e4d9 100644 --- a/tensorflow/python/distribute/tpu_values.py +++ b/tensorflow/python/distribute/tpu_values.py @@ -26,6 +26,7 @@ import contextlib from tensorflow.python.distribute import packed_distributed_variable as packed from tensorflow.python.distribute import values +from tensorflow.python.distribute import values_util from tensorflow.python.eager import context from tensorflow.python.eager import tape from tensorflow.python.framework import ops @@ -162,6 +163,8 @@ class TPUVariableMixin(object): @property def op(self): + if values_util.is_saving_non_distributed(): + return self._primary.op return values.DistributedVarOp(self._primary.op.name, self._primary.op.graph, self._primary.op.traceback, @@ -289,24 +292,38 @@ class TPUMirroredVariable(TPUVariableMixin, values.MirroredVariable): read_value=read_value) def scatter_sub(self, *args, **kwargs): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_sub(*args, **kwargs) raise NotImplementedError def scatter_add(self, *args, **kwargs): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_add(*args, **kwargs) raise NotImplementedError def scatter_max(self, *args, **kwargs): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_max(*args, **kwargs) raise NotImplementedError def scatter_min(self, *args, **kwargs): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_min(*args, **kwargs) raise NotImplementedError def scatter_mul(self, *args, **kwargs): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_mul(*args, **kwargs) raise NotImplementedError def scatter_div(self, *args, **kwargs): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_div(*args, **kwargs) raise NotImplementedError def scatter_update(self, *args, **kwargs): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_update(*args, **kwargs) raise NotImplementedError def _is_mirrored(self): diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py index 7dedbee2041..e6b77ad48cb 100644 --- a/tensorflow/python/distribute/values.py +++ b/tensorflow/python/distribute/values.py @@ -487,6 +487,8 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, The op that evaluates to True or False depending on if all the component variables are initialized. """ + if values_util.is_saving_non_distributed(): + return self._primary.is_initialized() if self._use_packed_variable(): return self._packed_var.is_initialized() result = self._primary.is_initialized() @@ -502,6 +504,8 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, @property def initializer(self): + if values_util.is_saving_non_distributed(): + return self._primary.initializer if self._initializer_op: init_op = self._initializer_op else: @@ -567,6 +571,8 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, @property def handle(self): + if values_util.is_saving_non_distributed(): + return self._primary.handle replica_id = values_util.get_current_replica_id_as_int() if replica_id is None: raise ValueError("`handle` is not available outside the replica context" @@ -610,6 +616,8 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, @property def op(self): + if values_util.is_saving_non_distributed(): + return self._primary.op # We want cross-replica code that does some var.op.X calls # to work (even if the current device isn't in self._devices), but # other uses of var.op in a cross-replica context to fail. @@ -630,6 +638,8 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, def _get(self): """Returns the value for the current device or raises a ValueError.""" + if values_util.is_saving_non_distributed(): + return self._primary replica_id = values_util.get_current_replica_id_as_int() if replica_id is None: return self._get_cross_replica() @@ -638,6 +648,8 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, def _get_on_device_or_primary(self): """Returns value in same replica or device if possible, else the _primary.""" + if values_util.is_saving_non_distributed(): + return self._primary replica_id = values_util.get_current_replica_id_as_int() if replica_id is None: # Try to find a value on the current device. @@ -654,6 +666,8 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, return array_ops.identity(self._get()) def value(self): + if values_util.is_saving_non_distributed(): + return self._primary.value() if self._policy: return self._policy.value(self) return self._get_on_device_or_primary().value() @@ -666,6 +680,8 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, "numpy() is only available when eager execution is enabled.") def assign_sub(self, value, use_locking=False, name=None, read_value=True): + if values_util.is_saving_non_distributed(): + return self._primary.assign_sub(value, use_locking, name, read_value) if self._policy: return self._policy.assign_sub( self, @@ -677,6 +693,8 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, self, value, use_locking=use_locking, name=name, read_value=read_value) def assign_add(self, value, use_locking=False, name=None, read_value=True): + if values_util.is_saving_non_distributed(): + return self._primary.assign_add(value, use_locking, name, read_value) if self._policy: return self._policy.assign_add( self, @@ -688,6 +706,8 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, self, value, use_locking=use_locking, name=name, read_value=read_value) def assign(self, value, use_locking=False, name=None, read_value=True): + if values_util.is_saving_non_distributed(): + return self._primary.assign(value, use_locking, name, read_value) if self._policy: return self._policy.assign( self, @@ -699,6 +719,8 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, self, value, use_locking=use_locking, name=name, read_value=read_value) def scatter_sub(self, sparse_delta, use_locking=False, name=None): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_sub(sparse_delta, use_locking, name) if self._policy: return self._policy.scatter_sub( self, sparse_delta, use_locking=use_locking, name=name) @@ -706,6 +728,8 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, self, sparse_delta, use_locking=use_locking, name=name) def scatter_add(self, sparse_delta, use_locking=False, name=None): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_add(sparse_delta, use_locking, name) if self._policy: return self._policy.scatter_add( self, sparse_delta, use_locking=use_locking, name=name) @@ -713,6 +737,8 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, self, sparse_delta, use_locking=use_locking, name=name) def scatter_mul(self, sparse_delta, use_locking=False, name=None): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_mul(sparse_delta, use_locking, name) if self._policy: return self._policy.scatter_mul( self, sparse_delta, use_locking=use_locking, name=name) @@ -720,6 +746,8 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, self, sparse_delta, use_locking=use_locking, name=name) def scatter_div(self, sparse_delta, use_locking=False, name=None): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_div(sparse_delta, use_locking, name) if self._policy: return self._policy.scatter_div( self, sparse_delta, use_locking=use_locking, name=name) @@ -727,6 +755,8 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, self, sparse_delta, use_locking=use_locking, name=name) def scatter_min(self, sparse_delta, use_locking=False, name=None): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_min(sparse_delta, use_locking, name) if self._policy: return self._policy.scatter_min( self, sparse_delta, use_locking=use_locking, name=name) @@ -734,6 +764,8 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, self, sparse_delta, use_locking=use_locking, name=name) def scatter_max(self, sparse_delta, use_locking=False, name=None): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_max(sparse_delta, use_locking, name) if self._policy: return self._policy.scatter_max( self, sparse_delta, use_locking=use_locking, name=name) @@ -741,6 +773,8 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, self, sparse_delta, use_locking=use_locking, name=name) def scatter_update(self, sparse_delta, use_locking=False, name=None): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_update(sparse_delta, use_locking, name) if self._policy: return self._policy.scatter_update( self, sparse_delta, use_locking=use_locking, name=name) @@ -763,12 +797,16 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, return {trackable.VARIABLE_VALUE_KEY: _saveable_factory} def _as_graph_element(self): + if values_util.is_saving_non_distributed(): + return self._primary._as_graph_element() # pylint: disable=protected-access if self._policy: return self._policy._as_graph_element(self) # pylint: disable=protected-access raise NotImplementedError("No policy set for calling _as_graph_element.") def _get_cross_replica(self): + if values_util.is_saving_non_distributed(): + return self._primary if self._policy: return self._policy._get_cross_replica(self) # pylint: disable=protected-access @@ -827,6 +865,8 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable, Updated variable or `tf.Operation`. """ + if values_util.is_saving_non_distributed(): + return update_fn(self._primary, value, **kwargs) with ds_context.enter_or_assert_strategy(self.distribute_strategy): if ds_context.in_cross_replica_context(): update_replica_id = distribute_lib.get_update_replica_id() @@ -919,6 +959,8 @@ class MirroredVariable(DistributedVariable, Mirrored): return _on_write_update_replica(self, update_fn, value, **kwargs) def scatter_min(self, *args, **kwargs): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_min(*args, **kwargs) if (self._aggregation != vs.VariableAggregation.ONLY_FIRST_REPLICA and self._aggregation != vs.VariableAggregation.NONE): raise NotImplementedError(values_util.scatter_error_msg.format( @@ -926,20 +968,26 @@ class MirroredVariable(DistributedVariable, Mirrored): return super(MirroredVariable, self).scatter_min(*args, **kwargs) def scatter_max(self, *args, **kwargs): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_max(*args, **kwargs) if (self._aggregation != vs.VariableAggregation.ONLY_FIRST_REPLICA and self._aggregation != vs.VariableAggregation.NONE): raise NotImplementedError(values_util.scatter_error_msg.format( - op_name="scatter_min", aggregation=self._aggregation)) + op_name="scatter_max", aggregation=self._aggregation)) return super(MirroredVariable, self).scatter_max(*args, **kwargs) def scatter_update(self, *args, **kwargs): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_update(*args, **kwargs) if (self._aggregation != vs.VariableAggregation.ONLY_FIRST_REPLICA and self._aggregation != vs.VariableAggregation.NONE): raise NotImplementedError(values_util.scatter_error_msg.format( - op_name="scatter_min", aggregation=self._aggregation)) + op_name="scatter_update", aggregation=self._aggregation)) return super(MirroredVariable, self).scatter_update(*args, **kwargs) def _get_cross_replica(self): + if values_util.is_saving_non_distributed(): + return self._primary.read_value() # Return identity, to avoid directly exposing the variable to the user and # allowing it to be modified by mistake. return array_ops.identity(Mirrored._get_cross_replica(self)) @@ -1022,6 +1070,8 @@ class SyncOnReadVariable(DistributedVariable): # TODO(b/154017756): Make assign behaivor in cross replica context consistent # with MirroredVariable. def assign_sub(self, value, use_locking=False, name=None, read_value=True): + if values_util.is_saving_non_distributed(): + return self._primary.assign_sub(value, use_locking, name, read_value) with ds_context.enter_or_assert_strategy(self._distribute_strategy): if ds_context.in_cross_replica_context() and not _in_update_replica(): return values_util.on_read_assign_sub_cross_replica( @@ -1031,6 +1081,8 @@ class SyncOnReadVariable(DistributedVariable): self).assign_sub(value, use_locking, name, read_value) def assign_add(self, value, use_locking=False, name=None, read_value=True): + if values_util.is_saving_non_distributed(): + return self._primary.assign_add(value, use_locking, name, read_value) with ds_context.enter_or_assert_strategy(self._distribute_strategy): if ds_context.in_cross_replica_context() and not _in_update_replica(): return values_util.on_read_assign_add_cross_replica( @@ -1040,6 +1092,8 @@ class SyncOnReadVariable(DistributedVariable): self).assign_add(value, use_locking, name, read_value) def assign(self, value, use_locking=False, name=None, read_value=True): + if values_util.is_saving_non_distributed(): + return self._primary.assign(value, use_locking, name, read_value) with ds_context.enter_or_assert_strategy(self._distribute_strategy): if ds_context.in_cross_replica_context() and not _in_update_replica(): return values_util.on_read_assign_cross_replica( @@ -1054,27 +1108,43 @@ class SyncOnReadVariable(DistributedVariable): method) def scatter_sub(self, *args, **kwargs): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_sub(*args, **kwargs) self._scatter_not_implemented("scatter_sub") def scatter_add(self, *args, **kwargs): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_add(*args, **kwargs) self._scatter_not_implemented("scatter_add") def scatter_mul(self, *args, **kwargs): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_mul(*args, **kwargs) self._scatter_not_implemented("scatter_mul") def scatter_div(self, *args, **kwargs): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_div(*args, **kwargs) self._scatter_not_implemented("scatter_div") def scatter_min(self, *args, **kwargs): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_min(*args, **kwargs) self._scatter_not_implemented("scatter_min") def scatter_max(self, *args, **kwargs): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_max(*args, **kwargs) self._scatter_not_implemented("scatter_max") def scatter_update(self, *args, **kwargs): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_update(*args, **kwargs) self._scatter_not_implemented("scatter_update") def value(self): + if values_util.is_saving_non_distributed(): + return self._primary.value() with ds_context.enter_or_assert_strategy(self._distribute_strategy): if ds_context.in_cross_replica_context() and not _in_update_replica(): if self._aggregation == vs.VariableAggregation.ONLY_FIRST_REPLICA: @@ -1085,6 +1155,8 @@ class SyncOnReadVariable(DistributedVariable): return self._get_on_device_or_primary().value() def _get_cross_replica(self): + if values_util.is_saving_non_distributed(): + return self._primary.read_value() if self._aggregation == vs.VariableAggregation.ONLY_FIRST_REPLICA: # Consider returning a tensor value here to make the return value of # _get_cross_replica consistent. @@ -1097,6 +1169,8 @@ class SyncOnReadVariable(DistributedVariable): axis=None) def _as_graph_element(self): + if values_util.is_saving_non_distributed(): + return self._primary._as_graph_element() # pylint: disable=protected-access # pylint: disable=protected-access with ds_context.enter_or_assert_strategy(self._distribute_strategy): if ds_context.in_cross_replica_context(): diff --git a/tensorflow/python/distribute/values_test.py b/tensorflow/python/distribute/values_test.py index e445c1195be..48b6b973a6b 100644 --- a/tensorflow/python/distribute/values_test.py +++ b/tensorflow/python/distribute/values_test.py @@ -30,6 +30,7 @@ from tensorflow.python.distribute import combinations from tensorflow.python.distribute import distribute_lib from tensorflow.python.distribute import distribute_utils from tensorflow.python.distribute import packed_distributed_variable as packed +from tensorflow.python.distribute import parameter_server_strategy from tensorflow.python.distribute import strategy_combinations from tensorflow.python.distribute import test_util as ds_test_util from tensorflow.python.distribute import tpu_strategy @@ -43,9 +44,11 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import indexed_slices from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_spec from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import sparse_ops @@ -578,6 +581,207 @@ class DistributedVariableTest(test.TestCase, parameterized.TestCase): distribution.extended.update(v, read_assign_fn, args=(value,))) self.assertAllEqual(self.evaluate(v.values), [3., 6.]) + def testSaveNonDistributed(self, distribution, synchronization, aggregation): + # This test verifies that the DistributedVariable behave like the primary + # variable when saving a non-distributed version of the model (the default). + # The test asserts that the function traced under SaveContext has no device + # annotations and only reference the primary component of the variable. Note + # that please avoid capturing other eager tensors in this test to make the + # assertion easy. + + if isinstance(distribution.extended, + parameter_server_strategy.ParameterServerStrategyExtended): + self.skipTest("b/148689177: AggregatingVariable doesn't " + "conform to Variable interface well") + + # tf.function requires the return value to be Tensors, which is not always + # case for properties and methods of Variable, so we simply discard the + # return values. + def _discard_return(f): + f() + return + + def _test(f, v): + # This verifies that the function under SaveContext: + # - contains no device annotations. + # - only references the primary component of the variable. + g = def_function.function(lambda: _discard_return(f)) + options = save_options.SaveOptions( + experimental_variable_policy=save_options.VariablePolicy.NONE) + with save_context.save_context(options): + # The graph should contain no device. + graph = g.get_concrete_function().graph + for op in graph.get_operations(): + self.assertEqual(op.device, "", msg=str(op)) + # The function should only capture the primary variable. Note that it + # may not have captures, e.g. v.aggregation. + captures = list(graph.captures) + self.assertLessEqual(len(captures), 1) + if graph.captures: + self.assertIs(captures[0][0], v._primary.handle) + + def _assert(cond): + return control_flow_ops.Assert(cond, [cond]) + + with distribution.scope(): + # We use four variables for convenience reasons. They have no special + # meaning. + # - v is used whenever possible, and for the methods that require the + # dtype to be integer. + # - w is used for scatter and gather, which require the variable to be + # non-scalar. + # - y is used when the dtype needs to be float. + v = variables_lib.Variable( + 0, + synchronization=synchronization, + aggregation=aggregation, + trainable=True) + w = variables_lib.Variable([0., 0., 0.], + synchronization=synchronization, + aggregation=aggregation, + trainable=True) + y = variables_lib.Variable( + 7., + synchronization=synchronization, + aggregation=aggregation) + + # pylint: disable=g-long-lambda + + # tf.Variable properties. + _test(lambda: self.assertEqual(v.aggregation, aggregation), v) + _test(lambda: self.assertIs(v.constraint, None), v) + # TODO(crccw): should we raise an error instead? + _test(lambda: self.assertEqual(v.device, v._primary.device), v) + _test(lambda: self.assertEqual(v.dtype, dtypes.int32), v) + if not context.executing_eagerly(): + _test(lambda: self.assertIs(v.graph, v._primary.graph), v) + if not context.executing_eagerly(): + _test(lambda: _assert(v.initial_value == 0), v) + _test(lambda: self.assertIs(v.initializer, v._primary.initializer), v) + _test(lambda: self.assertEqual(v.name, "Variable:0"), v) + if not context.executing_eagerly(): + _test(lambda: self.assertIs(v.op, v._primary.op), v) + _test(lambda: self.assertEqual(v.shape, tensor_shape.TensorShape(())), v) + _test(lambda: self.assertEqual(v.synchronization, synchronization), v) + _test(lambda: self.assertTrue(v.trainable, True), v) + + # tf.Variable methods. + _test(lambda: check_ops.assert_equal_v2(v.assign(1), 1), v) + _test(lambda: check_ops.assert_equal_v2(v.assign_add(1), 2), v) + _test(lambda: check_ops.assert_equal_v2(v.assign_sub(1), 1), v) + # TODO(b/148689177): Implement batch_scatter_update. + # count_up_to() is skipped since it's deprecated. + # eval() is skipped since it shouldn't called in a tf.function. + # experimental_ref() is skipped since it's deprecated. + # from_proto() is skipped since it shouldn't called in a tf.function. + # TODO(b/148689177): Implement gather_nd. + _test( + lambda: check_ops.assert_equal_v2(v.get_shape(), + tensor_shape.TensorShape(())), v) + # initialized_value() is skipped since it shouldn't called in a tf.function. + # load() is skipped since it shouldn't called in a tf.function. + _test(lambda: check_ops.assert_equal_v2(v.read_value(), 1), v) + # ref() is skipped since it shouldn't called in a tf.function. + _test( + lambda: check_ops.assert_equal_v2( + w.scatter_add(_make_index_slices(values=[1., 2.], indices=[0, 2])), + [1., 0., 2.]), w) + _test( + lambda: check_ops.assert_equal_v2( + w.scatter_div(_make_index_slices(values=[4., 2.], indices=[0, 2])), + [0.25, 0., 1.]), w) + _test( + lambda: check_ops.assert_equal_v2( + w.scatter_max(_make_index_slices(values=[1., 0.5], indices=[1, 2])), + [0.25, 1., 1.]), w) + _test( + lambda: check_ops.assert_equal_v2( + w.scatter_min(_make_index_slices(values=[1., 0.5], indices=[0, 1])), + [0.25, 0.5, 1.]), w) + _test( + lambda: check_ops.assert_equal_v2( + w.scatter_mul(_make_index_slices(values=[2., 0.5], indices=[0, 1])), + [0.5, 0.25, 1.]), w) + # TODO(b/148689177): Implement scatter_nd_* + _test( + lambda: check_ops.assert_equal_v2( + w.scatter_sub(_make_index_slices(values=[2., 0.5], indices=[0, 1])), + [-1.5, -0.25, 1.]), w) + _test( + lambda: check_ops.assert_equal_v2( + w.scatter_update( + _make_index_slices(values=[2., 0.5], indices=[0, 1])), + [2., 0.5, 1.]), w) + # set_shape() is skipped since ResourceVariable doesn't implement it. + # to_proto() is skipped since it shouldn't called in a tf.function. + _test(lambda: check_ops.assert_equal_v2(v.value(), 1), v) + + # DistributedVariable should be treated as ResourceVariable, so it needs to + # conform to ResourceVariable interface as well. + _test(lambda: self.assertIs(v.handle, v._primary.handle), v) + + # Convert to tensor. + _test(lambda: check_ops.assert_equal_v2(ops.convert_to_tensor(v), 1), v) + + # Control dependency. + def _with_control_dep(): + with ops.control_dependencies([v.assign(1)]): + return array_ops.identity(1) + + _test(_with_control_dep, v) + + # Operator overloads. + _test(lambda: check_ops.assert_equal_v2(v.assign(7), 7), v) + _test(lambda: check_ops.assert_equal_v2(v + 1, 8), v) + _test(lambda: check_ops.assert_equal_v2(3 + v, 10), v) + _test(lambda: check_ops.assert_equal_v2(v + v, 14), v) + _test(lambda: check_ops.assert_equal_v2(v - 2, 5), v) + _test(lambda: check_ops.assert_equal_v2(v - v, 0), v) + _test(lambda: check_ops.assert_equal_v2(v * 2, 14), v) + _test(lambda: check_ops.assert_equal_v2(3 * v, 21), v) + _test(lambda: check_ops.assert_equal_v2(v * v, 49), v) + _test( + lambda: check_ops.assert_equal_v2( + math_ops.cast(v / 2, dtypes.float32), 3.5), v) + _test( + lambda: check_ops.assert_equal_v2( + math_ops.cast(14 / v, dtypes.float32), 2.), v) + _test(lambda: check_ops.assert_equal_v2(v // 2, 3), v) + _test(lambda: check_ops.assert_equal_v2(15 // v, 2), v) + _test(lambda: check_ops.assert_equal_v2(v % 2, 1), v) + _test(lambda: check_ops.assert_equal_v2(16 % v, 2), v) + _test(lambda: _assert(v < 12), v) + _test(lambda: _assert(v <= 12), v) + _test(lambda: _assert(not v > 12), v) + _test(lambda: _assert(not v >= 12), v) + _test(lambda: _assert(not 12 < v), v) + _test(lambda: _assert(not 12 <= v), v) + _test(lambda: _assert(12 > v), v) + _test(lambda: _assert(12 >= v), v) + # XLA doesn't implement pow() with integers. + _test(lambda: check_ops.assert_near_v2(pow(y, 3.), 343.), y) + _test(lambda: check_ops.assert_near_v2(pow(2., y), 128.), y) + _test(lambda: check_ops.assert_equal_v2(abs(v), 7), v) + _test(lambda: check_ops.assert_equal_v2(v & 3, 3), v) + _test(lambda: check_ops.assert_equal_v2(3 & v, 3), v) + _test(lambda: check_ops.assert_equal_v2(v | 8, 15), v) + _test(lambda: check_ops.assert_equal_v2(16 | v, 23), v) + _test(lambda: check_ops.assert_equal_v2(v ^ 3, 4), v) + _test(lambda: check_ops.assert_equal_v2(11 ^ v, 12), v) + _test(lambda: check_ops.assert_equal_v2(-v, -7), v) + _test(lambda: check_ops.assert_equal_v2(~v, ~7), v) + + # Index. + if isinstance(distribution.extended, tpu_strategy.TPUExtended): + # TODO(b/161572567): slice assignment doesn't work for TPU. + _test(lambda: check_ops.assert_equal_v2(w[0], 2.), w) + else: + _test(lambda: check_ops.assert_equal_v2(w[0].assign(1.), [1., 0.5, 1.]), + w) + _test(lambda: check_ops.assert_equal_v2(w[0], 1.), w) + + # pylint: enable=g-long-lambda + @combinations.generate( combinations.combine( diff --git a/tensorflow/python/distribute/values_util.py b/tensorflow/python/distribute/values_util.py index 5909bdd229e..099184d8dbb 100644 --- a/tensorflow/python/distribute/values_util.py +++ b/tensorflow/python/distribute/values_util.py @@ -26,6 +26,8 @@ from tensorflow.python.framework import tensor_util from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.saved_model import save_context +from tensorflow.python.saved_model import save_options def on_write_assign(var, value, use_locking=False, name=None, read_value=True): @@ -247,3 +249,20 @@ scatter_error_msg = ("{op_name} is only supported for mirrored " "variable (variable created within certain " "`tf.distribute.Strategy` scope) with NONE or " "`ONLY_FIRST_REPLICA` aggregation, got: {aggregation}.") + + +def is_saving_non_distributed(): + """Returns whether we're saving a non-distributed version of the model. + + It returns True iff we are in saving context and are saving a non-distributed + version of the model. That is, SaveOptions.experimental_variable_policy is + NONE. + + Returns: + A boolean. + """ + if not save_context.in_save_context(): + return False + options = save_context.get_save_options() + return (options is not None and options.experimental_variable_policy != + save_options.VariablePolicy.EXPAND_DISTRIBUTED_VARIABLES) diff --git a/tensorflow/python/saved_model/save_test.py b/tensorflow/python/saved_model/save_test.py index a5171f3eee2..28b8fa907e0 100644 --- a/tensorflow/python/saved_model/save_test.py +++ b/tensorflow/python/saved_model/save_test.py @@ -580,8 +580,7 @@ class SaveTest(test.TestCase, parameterized.TestCase): else: self.assertIsNone(v1) self.assertEmpty(v0.device) - # TODO(b/159752793): There should be only one input here. - self.assertLen(saved_function.signature.input_arg, 2) + self.assertLen(saved_function.signature.input_arg, 1) def test_expand_distributed_variables_not_allowed(self): root = tracking.AutoTrackable() From 9f5d815c031213f0620ccee1723b9c9b9cf5103e Mon Sep 17 00:00:00 2001 From: ShengYang1 Date: Wed, 29 Jul 2020 18:47:11 +0800 Subject: [PATCH 1591/2522] change device --- tensorflow/core/grappler/optimizers/remapper.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/remapper.cc b/tensorflow/core/grappler/optimizers/remapper.cc index 661ad7895c2..9d3fc920d23 100644 --- a/tensorflow/core/grappler/optimizers/remapper.cc +++ b/tensorflow/core/grappler/optimizers/remapper.cc @@ -929,7 +929,7 @@ bool FindComparisonWithCast(const RemapperContext& ctx, int node_index, const auto* node_def = node_view->node(); if (!IsCast(*node_def) || HasControlFaninOrFanout(*node_view)) return false; - if (NodeIsOnGpu(node_def)) return false; + if (!NodeIsOnCpu(node_def)) return false; if (node_view->NumRegularFanins() != 1) return false; const auto& regular_fanin_0 = node_view->GetRegularFanin(0); @@ -938,7 +938,7 @@ bool FindComparisonWithCast(const RemapperContext& ctx, int node_index, if (!IsComparison(*comparison_node_def) || HasControlFaninOrFanout(*comparison)) return false; - if (NodeIsOnGpu(comparison_node_def)) return false; + if (!NodeIsOnCpu(comparison_node_def)) return false; DataType comparator_dtype = GetDataTypeFromAttr(*comparison_node_def, "T"); DataType src_dtype = GetDataTypeFromAttr(*node_def, "SrcT"); From f49af531eff85c9f636ae99aab1c2dd2aec36a16 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Wed, 29 Jul 2020 05:42:14 -0700 Subject: [PATCH 1592/2522] Test get_next_as_optional in a tf.while_loop. PiperOrigin-RevId: 323769441 Change-Id: Ia09b543b27eda55a52890328a937426d29a3a425 --- .../python/distribute/input_lib_test.py | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/tensorflow/python/distribute/input_lib_test.py b/tensorflow/python/distribute/input_lib_test.py index 23397bf5070..a70eb50dbba 100644 --- a/tensorflow/python/distribute/input_lib_test.py +++ b/tensorflow/python/distribute/input_lib_test.py @@ -45,6 +45,7 @@ from tensorflow.python.eager import context from tensorflow.python.eager import def_function from tensorflow.python.eager import test from tensorflow.python.framework import composite_tensor +from tensorflow.python.framework import constant_op from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor @@ -860,6 +861,92 @@ class DistributedIteratorTensorTypeTest(DistributedIteratorTestBase, self.assertEqual(iterator._enable_get_next_as_optional, (not drop_remainder) and enable_get_next_as_optional) + @combinations.generate( + combinations.combine( + tf_api_version=2, + mode=["eager"], + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + strategy_combinations.central_storage_strategy_with_gpu_and_cpu, + strategy_combinations.one_device_strategy, + strategy_combinations.mirrored_strategy_with_one_cpu, + # TODO(mdan): Add these? + # strategy_combinations.multi_worker_mirrored_2x1_cpu, + # strategy_combinations.multi_worker_mirrored_2x1_gpu, + # strategy_combinations.multi_worker_mirrored_2x2_gpu, + ], + input_type=["dataset", "input_fn"], + drop_remainder=[False, True], + )) + def testRaggedSparseGetNextAsOptionalInLoop( + self, distribution, input_type, drop_remainder): + """Test with `RaggedTensor`s and `SparseTensor`s.""" + self.skipTest("b/323359921") + + global_batch_size = 8 + + def dataset_fn(ctx=None): + ctx = ctx or distribute_lib.InputContext() + batch_size = ctx.get_per_replica_batch_size(global_batch_size) + # Use 20 which isn't divisible by 8 to test partial batch behavior. + row_lengths = np.mod(np.arange(20), 4).astype(np.int64) + ragged_tensor = ragged_tensor_lib.RaggedTensor.from_row_lengths( + np.repeat(np.arange(20, dtype=np.float32), row_lengths), row_lengths) + dataset = dataset_ops.DatasetV2.from_tensor_slices({ + "dense": ragged_tensor.to_tensor(), + "ragged": ragged_tensor, + "sparse": ragged_tensor.to_sparse(), + }) + dataset = dataset.shard(ctx.num_input_pipelines, ctx.input_pipeline_id) + return dataset.batch(batch_size, drop_remainder=drop_remainder) + + if input_type == "dataset": + ds = distribution.experimental_distribute_dataset( + dataset_fn(distribute_lib.InputContext())) + else: + ds = distribution.experimental_distribute_datasets_from_function( + dataset_fn) + + # Iterate through all the batches and sum them up. + def sum_batch(per_replica_features): + """Sums the `PerReplica` values in the `per_replica_features` map.""" + + def map_fn(per_replica_values): + per_replica_sums = distribution.run( + (lambda x: math_ops.reduce_sum(x.values)) if all( + map(sparse_tensor.is_sparse, per_replica_values.values)) else + math_ops.reduce_sum, (per_replica_values,)) + return distribution.reduce( + reduce_util.ReduceOp.SUM, per_replica_sums, axis=None) + + return nest.map_structure(map_fn, per_replica_features) + + def _reduce(state, batch): + sums = sum_batch(batch) + return {name: value + sums[name] for name, value in state.items()} + + def sum_while_loop(ds): + iterator = iter(ds) + sums = {"dense": 0., "ragged": 0., "sparse": 0.} + try_next = constant_op.constant(True) + + while try_next: + opt_iterate = iterator.get_next_as_optional() + if opt_iterate.has_value(): + sums = _reduce(sums, opt_iterate.get_value()) + else: + try_next = False + return sums + + sums = def_function.function(sum_while_loop)(ds) + # For loops always call get next as optional inside tf functions, so we + # expect 310 here when using an input function (as there are 5 batches of + # size 4 round robined over 2 replicas. + expected_for_sum = 200. + if not drop_remainder or input_type == "input_fn": + expected_for_sum = 310. + self.assertAllEqual(nest.flatten(sums), [expected_for_sum] * 3) + class DistributedIteratorMultiWorkerTest( multi_worker_test_base.MultiWorkerTestBase, DistributedIteratorTestBase, From ec93a9b75d66a93587bacc13fb777a8ca1497c76 Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Wed, 29 Jul 2020 05:42:24 -0700 Subject: [PATCH 1593/2522] Fix @test_util.run_deprecated_v1 in third_party/tensorflow/python/kernel_tests/basic_gpu_test.py PiperOrigin-RevId: 323769468 Change-Id: If7dfc7cce3f50e28910f6499484fe2ef9ff9e1ad --- .../python/kernel_tests/basic_gpu_test.py | 70 +++++-------------- 1 file changed, 17 insertions(+), 53 deletions(-) diff --git a/tensorflow/python/kernel_tests/basic_gpu_test.py b/tensorflow/python/kernel_tests/basic_gpu_test.py index df27e8afbba..6db3717f08a 100644 --- a/tensorflow/python/kernel_tests/basic_gpu_test.py +++ b/tensorflow/python/kernel_tests/basic_gpu_test.py @@ -29,7 +29,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gradient_checker +from tensorflow.python.ops import gradient_checker_v2 from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import variables @@ -156,10 +156,8 @@ class MathBuiltinUnaryTest(test.TestCase): class BroadcastSimpleTest(test.TestCase): def _GetGradientArgs(self, xs, ys): - with self.cached_session(use_gpu=True) as sess: - return sess.run(broadcast_gradient_args(xs, ys)) + return self.evaluate(broadcast_gradient_args(xs, ys)) - @test_util.run_deprecated_v1 def testBroadcast(self): r0, r1 = self._GetGradientArgs([2, 3, 5], [1]) self.assertAllEqual(r0, []) @@ -167,48 +165,6 @@ class BroadcastSimpleTest(test.TestCase): _GRAD_TOL = {dtypes.float32: 1e-3} - def _compareGradientX(self, - x, - y, - np_func, - tf_func, - numeric_gradient_type=None): - z = np_func(x, y) - zs = list(z.shape) - with self.cached_session(): - inx = ops.convert_to_tensor(x) - iny = ops.convert_to_tensor(y) - if x.dtype in (np.float32, np.float64): - out = 1.1 * tf_func(inx, iny) - else: - out = tf_func(inx, iny) - xs = list(x.shape) - jacob_t, jacob_n = gradient_checker.compute_gradient( - inx, xs, out, zs, x_init_value=x) - tol = self._GRAD_TOL[dtypes.as_dtype(x.dtype)] - self.assertAllClose(jacob_t, jacob_n, rtol=tol, atol=tol) - - def _compareGradientY(self, - x, - y, - np_func, - tf_func, - numeric_gradient_type=None): - z = np_func(x, y) - zs = list(z.shape) - with self.cached_session(): - inx = ops.convert_to_tensor(x) - iny = ops.convert_to_tensor(y) - if x.dtype in (np.float32, np.float64): - out = 1.1 * tf_func(inx, iny) - else: - out = tf_func(inx, iny) - ys = list(np.shape(y)) - jacob_t, jacob_n = gradient_checker.compute_gradient( - iny, ys, out, zs, x_init_value=y) - tol = self._GRAD_TOL[dtypes.as_dtype(x.dtype)] - self.assertAllClose(jacob_t, jacob_n, rtol=tol, atol=tol) - def _compareGpu(self, x, y, np_func, tf_func): np_ans = np_func(x, y) with self.cached_session(use_gpu=True): @@ -220,17 +176,25 @@ class BroadcastSimpleTest(test.TestCase): self.assertShapeEqual(np_ans, out) # TODO(zhifengc/ke): make gradient checker work on GPU. - @test_util.run_deprecated_v1 def testGradient(self): - x = (1 + np.linspace(0, 5, np.prod([1, 3, 2]))).astype(np.float32).reshape( + x1 = (1 + np.linspace(0, 5, np.prod([1, 3, 2]))).astype(np.float32).reshape( [1, 3, 2]) - y = (1 + np.linspace(0, 5, np.prod([1, 3, 2]))).astype(np.float32).reshape( + x2 = (1 + np.linspace(0, 5, np.prod([1, 3, 2]))).astype(np.float32).reshape( [1, 3, 2]) - self._compareGradientX(x, y, np.true_divide, math_ops.truediv) - self._compareGradientY(x, y, np.true_divide, math_ops.truediv) - self._compareGpu(x, y, np.true_divide, math_ops.truediv) - self._compareGpu(x, y + 0.1, np.floor_divide, math_ops.floordiv) + def div_x1(x1): + return math_ops.truediv(x1, x2) * math_ops.cast(1.1, dtype=x1.dtype) + + def div_x2(x2): + return math_ops.truediv(x1, x2) * math_ops.cast(1.1, dtype=x2.dtype) + + gradient_checker_v2.compute_gradient( + div_x1, [x1], self._GRAD_TOL[dtypes.as_dtype(x1.dtype)]) + gradient_checker_v2.compute_gradient( + div_x2, [x2], self._GRAD_TOL[dtypes.as_dtype(x2.dtype)]) + + self._compareGpu(x1, x2, np.true_divide, math_ops.truediv) + self._compareGpu(x1, x2 + 0.1, np.floor_divide, math_ops.floordiv) class GpuMultiSessionMemoryTest(test_util.TensorFlowTestCase): From 905db751b969951db51001087b0d7dfef80c3fd9 Mon Sep 17 00:00:00 2001 From: Edward Loper Date: Wed, 29 Jul 2020 06:06:44 -0700 Subject: [PATCH 1594/2522] Move declaration of GetRegisteredPyObject into 'tensorflow::swig' namespace (to match location of its definition). PiperOrigin-RevId: 323772084 Change-Id: I897e324d23ab2e1fbc868d33f7523eb4dede65e6 --- tensorflow/python/util/util.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/util/util.h b/tensorflow/python/util/util.h index fc0b864416e..370db8e0a20 100644 --- a/tensorflow/python/util/util.h +++ b/tensorflow/python/util/util.h @@ -280,12 +280,11 @@ PyObject* RegisterPyObject(PyObject* name, PyObject* value); // Variant of RegisterPyObject that requires the object's value to be a type. PyObject* RegisterType(PyObject* type_name, PyObject* type); -} // namespace swig - // Returns a borrowed reference to an object that was registered with -// RegisterPyObject. (Do not call PY_DECREF on the result). +// RegisterPyObject. (Do not call Py_DECREF on the result). PyObject* GetRegisteredPyObject(const std::string& name); +} // namespace swig } // namespace tensorflow #endif // TENSORFLOW_PYTHON_UTIL_UTIL_H_ From e27cb981bed52e3d24573b61566e7d71b0118ba5 Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Wed, 29 Jul 2020 06:08:15 -0700 Subject: [PATCH 1595/2522] [MLIR][KERNEL_GEN] Simplify TF Framework alloc op and add dealloc. PiperOrigin-RevId: 323772270 Change-Id: I2f6003fe063ad4d7bfbd6999608bcc1749ace7ed --- .../compiler/mlir/tools/kernel_gen/ir/BUILD | 2 - .../tools/kernel_gen/ir/tf_framework_ops.cc | 14 +-- .../tools/kernel_gen/ir/tf_framework_ops.h | 3 - .../tools/kernel_gen/ir/tf_framework_ops.td | 101 ++++++------------ .../mlir/tools/kernel_gen/tests/invalid.mlir | 12 +-- .../mlir/tools/kernel_gen/tests/ops.mlir | 20 ++-- 6 files changed, 50 insertions(+), 102 deletions(-) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/ir/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/ir/BUILD index 11fe9159e6d..0c3db5fa4ab 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/ir/BUILD +++ b/tensorflow/compiler/mlir/tools/kernel_gen/ir/BUILD @@ -16,8 +16,6 @@ gentbl( tbl_outs = [ ("-gen-op-decls", "tf_framework_ops.h.inc"), ("-gen-op-defs", "tf_framework_ops.cc.inc"), - ("-gen-struct-attr-decls", "tf_framework_structs.h.inc"), - ("-gen-struct-attr-defs", "tf_framework_structs.cc.inc"), ("-gen-dialect-decls", "tf_framework_dialect.h.inc"), ], tblgen = "@llvm-project//mlir:mlir-tblgen", diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc index 9a8a6ea006b..e67b5fd7f85 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc @@ -21,8 +21,6 @@ limitations under the License. #include "mlir/IR/DialectImplementation.h" // from @llvm-project namespace mlir { -#include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_structs.cc.inc" - namespace kernel_gen { namespace tf_framework { @@ -61,12 +59,9 @@ void TFFrameworkDialect::printType(Type type, DialectAsmPrinter &os) const { } //===----------------------------------------------------------------------===// -// AllocLikeOp +// AllocRawOp //===----------------------------------------------------------------------===// -template -static LogicalResult Verify(AllocLikeOp op) { - static_assert(llvm::is_one_of::value, - "applies to only alloc_output or alloc_temp"); +static LogicalResult Verify(AllocRawOp op) { // Check that the total number of operands matches the number of dynamic // dimensions specified in the memref type. unsigned result_dyn_dims = op.getType().getNumDynamicDims(); @@ -79,6 +74,11 @@ static LogicalResult Verify(AllocLikeOp op) { return success(); } +//===----------------------------------------------------------------------===// +// DeallocRawOp +//===----------------------------------------------------------------------===// +static LogicalResult Verify(DeallocRawOp op) { return success(); } + #define GET_OP_CLASSES #include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc.inc" diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h index 6fde701d4c0..ae621cb386a 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h +++ b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h @@ -27,9 +27,6 @@ limitations under the License. #include "mlir/Interfaces/SideEffectInterfaces.h" // from @llvm-project namespace mlir { - -#include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_structs.h.inc" - namespace kernel_gen { namespace tf_framework { diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.td b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.td index e5488346008..65481ad377f 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.td +++ b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.td @@ -42,67 +42,44 @@ def TFFramework_OpKernelContextType : DialectType, - StructFieldAttr<"nic_compatible", BoolAttr>, - StructFieldAttr<"gpu_compatible", BoolAttr>]> { - let description = "Equivalent to `tensorflow::AllocatorAttributes` in C++"; -} - -def AllocationAttributes : StructAttr<"AllocationAttributes", - TFFramework_Dialect, [ - StructFieldAttr<"no_retry_on_failure", - DefaultValuedAttr>, - StructFieldAttr<"allocation_will_be_logged", - DefaultValuedAttr>]> { - let description = "Equivalent to `tensorflow::AllocationAttributes` in C++"; -} - - // Base class for TF Framework dialect ops. class TFFramework_Op traits = []> : Op { let verifier = "return Verify(*this);"; } +//===----------------------------------------------------------------------===// +// AllocRawOp +//===----------------------------------------------------------------------===// +def TFFramework_AllocRawOp : TFFramework_Op<"alloc_raw", + [MemoryEffects<[MemAlloc]>]> { + let summary = "allocation of tensors that uses TF Framework"; + let description = [{ + Allocation of tensors during kernel execution in the Compute method. -// Base class for TF Framework alloc ops. -class TFFramework_AllocLikeOp traits = []> : - TFFramework_Op]>], traits)> { + This should be used to allocate any temporary or output memref. + Corresponds to `Allocator::AllocateRaw` in + tensorflow/core/framework/allocator.h. + }]; - let arguments = (ins TFFramework_OpKernelContextType:$op_kernel_ctx, - Variadic:$dyn_sizes, - OptionalAttr:$allocator_attrs, - OptionalAttr:$allocation_attrs); - let results = (outs Res]>:$result); + let arguments = (ins TFFramework_OpKernelContextType:$ctx, + Variadic:$dyn_sizes); + let results = (outs Res]>:$result); let builders = [ OpBuilder<[{ OpBuilder &builder, OperationState &result, MemRefType memref_type, - Value op_kernel_ctx, - AllocatorAttributes allocator_attrs = AllocatorAttributes(), - AllocationAttributes allocation_attrs = AllocationAttributes() + Value ctx }], [{ - result.addOperands(op_kernel_ctx); + result.addOperands(ctx); result.types.push_back(memref_type); - if (allocator_attrs) - result.addAttribute("allocator_attrs", allocator_attrs); - if (allocation_attrs) - result.addAttribute("allocation_attrs", allocation_attrs); }]>, OpBuilder<[{ OpBuilder &builder, OperationState &result, MemRefType memref_type, - Value op_kernel_ctx, ValueRange dyn_sizes, - AllocatorAttributes allocator_attrs = AllocatorAttributes(), - AllocationAttributes allocation_attrs = AllocationAttributes() + Value ctx, ValueRange dyn_sizes }], [{ - build(builder, result, memref_type, op_kernel_ctx, allocator_attrs, - allocation_attrs); + build(builder, result, memref_type, ctx); result.addOperands(dyn_sizes); }]>]; @@ -110,42 +87,28 @@ class TFFramework_AllocLikeOp(); } }]; let assemblyFormat = [{ - `(` $op_kernel_ctx (`,` $dyn_sizes^ )? `)` attr-dict `:` type($result) + `(` $ctx (`,` $dyn_sizes^ )? `)` attr-dict `:` type($result) }]; } //===----------------------------------------------------------------------===// -// AllocOutputOp +// DeallocRawOp //===----------------------------------------------------------------------===// - -def TFFramework_AllocOutputOp - : TFFramework_AllocLikeOp<"alloc_output", DefaultResource> { - let summary = "allocation of tensorsmemory allocation operation"; +def TFFramework_DeallocRawOp : TFFramework_Op<"dealloc_raw", + [MemoryEffects<[MemFree]>]> { + let summary = "deallocation of tensors that uses TF Framework"; let description = [{ - Allocation of output tensors during kernel execution in the Compute method. + Deallocation of tensors during kernel execution in the Compute method. - This should be used to allocate any tensor that is going to be used as an - output from the kernel at the end of the current execution. - - Defined in third_party/tensorflow/core/framework/op_kernel.cc. + This should be used to deallocate any temporary memref that was allocated + with `tf_framework.alloc_raw`. + Corresponds to `Allocator::DeallocateRaw` in + tensorflow/core/framework/allocator.h. }]; -} -//===----------------------------------------------------------------------===// -// AllocTempOp -//===----------------------------------------------------------------------===// - -def TFFramework_AllocTempOp - : TFFramework_AllocLikeOp<"alloc_temp", DefaultResource> { - let summary = "memory allocation operation"; - let description = [{ - Allocation of temp tensors during kernel execution in the Compute method. - - This should be used to allocate any scratch storage that is needed while - the kernel is executing, and will not be retained. - - Defined in third_party/tensorflow/core/framework/op_kernel.cc. - }]; + let arguments = (ins TFFramework_OpKernelContextType:$ctx, + Arg:$memref); + let assemblyFormat = "`(` $ctx `,` $memref `)` attr-dict `:` type($memref)"; } #endif // TF_FRAMEWORK_OPS diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/invalid.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/invalid.mlir index e8983b701c8..1d1b3319515 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/invalid.mlir +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tests/invalid.mlir @@ -1,15 +1,7 @@ // RUN: kernel-gen-opt %s -split-input-file -verify-diagnostics -func @alloc_output(%ctx: !tf_framework.op_kernel_context, %size : index) { +func @alloc_raw(%ctx: !tf_framework.op_kernel_context, %size : index) { // expected-error @+1 {{`dyn_sizes` count 1 does not match dynamic dimensions}} - %buf = tf_framework.alloc_output(%ctx, %size) : memref - return -} - -// ----- - -func @alloc_temp(%ctx: !tf_framework.op_kernel_context, %size : index) { - // expected-error @+1 {{`dyn_sizes` count 1 does not match dynamic dimensions}} - %buf = tf_framework.alloc_temp(%ctx, %size) : memref<10xi8> + %buf = tf_framework.alloc_raw(%ctx, %size) : memref return } diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/ops.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/ops.mlir index d2a4a8866ce..19974ec9482 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/ops.mlir +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tests/ops.mlir @@ -1,21 +1,19 @@ // RUN: kernel-gen-opt %s | FileCheck %s // Verify the printed output can be parsed. -// RUN: kernel-gen-opt %s | kernel-gen-opt -allow-unregistered-dialect | FileCheck %s +// RUN: kernel-gen-opt %s | kernel-gen-opt | FileCheck %s // Verify the generic form can be parsed. -// RUN: kernel-gen-opt -mlir-print-op-generic %s | kernel-gen-opt -allow-unregistered-dialect | FileCheck %s +// RUN: kernel-gen-opt -mlir-print-op-generic %s | kernel-gen-opt | FileCheck %s -// CHECK-LABEL: func @alloc_output -func @alloc_output(%ctx: !tf_framework.op_kernel_context, +// CHECK-LABEL: func @alloc_raw +func @alloc_raw(%ctx: !tf_framework.op_kernel_context, %size_0 : index , %size_2 : index) { - %buf_0 = tf_framework.alloc_output(%ctx) : memref<10xi8> - %buf_1 = tf_framework.alloc_output(%ctx, %size_0, %size_2) : memref + %buf_0 = tf_framework.alloc_raw(%ctx) : memref<10xi8> + %buf_1 = tf_framework.alloc_raw(%ctx, %size_0, %size_2) : memref return } -// CHECK-LABEL: func @alloc_temp -func @alloc_temp(%ctx: !tf_framework.op_kernel_context, - %size_0 : index , %size_2 : index) { - %buf_0 = tf_framework.alloc_temp(%ctx) : memref<10xi8> - %buf_1 = tf_framework.alloc_temp(%ctx, %size_0, %size_2) : memref +// CHECK-LABEL: func @dealloc_raw +func @dealloc_raw(%ctx: !tf_framework.op_kernel_context, %memref : memref) { + tf_framework.dealloc_raw(%ctx, %memref) : memref return } From 5ed1a8f082b0a7c2fc58f5147ed57f17ad9f8db6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Jul 2020 06:40:09 -0700 Subject: [PATCH 1596/2522] PR #41745: Fix the usage of uninitialized variable in adaptive_shared_batch_scheduler Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/41745 The variable best_score is uninitialized. Copybara import of the project: -- 68f9891f2f58dde14f6350ed824b28f92ceb3997 by Cheng CHEN : Fix the usage of uninitialized variable. PiperOrigin-RevId: 323776083 Change-Id: I06abbc952059c804c19379d928bfdbd529b73233 --- .../kernels/batching_util/adaptive_shared_batch_scheduler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h index f4dc47757d3..fedea93849c 100644 --- a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h +++ b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h @@ -425,7 +425,7 @@ void AdaptiveSharedBatchScheduler::MaybeScheduleNextBatch() { return; } auto best_it = batches_.end(); - double best_score = std::numeric_limits::max; + double best_score; int64 now_micros = GetEnv()->NowMicros(); for (auto it = batches_.begin(); it != batches_.end(); it++) { if ((*it)->schedulable_time_micros() > now_micros) continue; From 6f86cb552d26a1ecdcea290ad5bbc630ac0a4c21 Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Wed, 29 Jul 2020 07:35:52 -0700 Subject: [PATCH 1597/2522] Do not mandate the result type of shape computations but have it be inferred from context. The computation of a broadcasted shape forced the use of the shape type unnecessarily, which blocked further canonicalizations. PiperOrigin-RevId: 323783998 Change-Id: I55f317b739bad134a5cde7facb81f0b50e6180bf --- .../mlir/hlo/lib/utils/broadcast_utils.cc | 9 +++------ .../chlo_legalize_to_hlo_broadcasts.mlir | 20 ++++++------------- 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc b/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc index 73111c02dbd..c4466260042 100644 --- a/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc +++ b/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc @@ -58,13 +58,10 @@ Value ComputeBinaryElementwiseBroadcastingResultExtents(Location loc, Value lhs, } int64_t result_rank = std::max(lhs_type.getRank(), rhs_type.getRank()); - auto shape_type = shape::ShapeType::get(builder.getContext()); - Value lhs_shape_v = - builder.createOrFold(loc, shape_type, lhs); - Value rhs_shape_v = - builder.createOrFold(loc, shape_type, rhs); + Value lhs_shape_v = builder.createOrFold(loc, lhs); + Value rhs_shape_v = builder.createOrFold(loc, rhs); Value result_shape_v = builder.createOrFold( - loc, shape_type, lhs_shape_v, rhs_shape_v, nullptr /* error */); + loc, lhs_shape_v, rhs_shape_v, nullptr /* error */); return builder.createOrFold( loc, RankedTensorType::get({result_rank}, builder.getIndexType()), result_shape_v); diff --git a/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir b/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir index 997136e58cc..96e4fa3bd29 100644 --- a/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir @@ -18,9 +18,7 @@ func @dynamicBroadcast(%arg0: tensor, %arg1: tensor) -> tensor : tensor<1xi64>} // CHECK-DAG: %[[ARG1_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG1]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} @@ -41,9 +39,7 @@ func @dynamicBroadcastComplex(%arg0: tensor, %arg1: tensor) -> t // CHECK-DAG: %[[ARG1_S:.+]] = shape.shape_of %[[ARG1]] // CHECK-NEXT: %[[WITNESS:.+]] = shape.cstr_broadcastable %[[ARG0_S]], %[[ARG1_S]] // CHECK-NEXT: %[[FINAL_RESULT:.+]] = shape.assuming %[[WITNESS]] - // CHECK-DAG: %[[ARG0_SS:.+]] = shape.shape_of %[[ARG0]] - // CHECK-DAG: %[[ARG1_SS:.+]] = shape.shape_of %[[ARG1]] - // CHECK-NEXT: %[[RESULT_S:.+]] = shape.broadcast %[[ARG0_SS]], %[[ARG1_SS]] + // CHECK-NEXT: %[[RESULT_S:.+]] = shape.broadcast %[[ARG0_S]], %[[ARG1_S]] // CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_S]] // CHECK-DAG: %[[ARG0_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG0]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor, tensor<2xindex>) -> tensor // CHECK-DAG: %[[ARG1_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG1]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor, tensor<2xindex>) -> tensor @@ -64,9 +60,7 @@ func @dynamicBroadcastCompare(%arg0: tensor, %arg1: tensor) -> t // CHECK-DAG: %[[ARG1_S:.+]] = shape.shape_of %[[ARG1]] // CHECK: %[[WITNESS:.+]] = shape.cstr_broadcastable %[[ARG0_S]], %[[ARG1_S]] // CHECK: %[[FINAL_RESULT:.+]] = shape.assuming %[[WITNESS]] - // CHECK-DAG: %[[ARG0_SS:.+]] = shape.shape_of %[[ARG0]] - // CHECK-DAG: %[[ARG1_SS:.+]] = shape.shape_of %[[ARG1]] - // CHECK: %[[RESULT_S:.+]] = shape.broadcast %[[ARG0_SS]], %[[ARG1_SS]] + // CHECK: %[[RESULT_S:.+]] = shape.broadcast %[[ARG0_S]], %[[ARG1_S]] // CHECK: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_S]] // CHECK-DAG: %[[ARG0_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG0]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor, tensor<2xindex>) -> tensor // CHECK-DAG: %[[ARG1_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG1]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor, tensor<2xindex>) -> tensor @@ -269,7 +263,6 @@ func @addScalarUnranked(%arg0: tensor, %arg1: tensor<*xf32>) -> tensor<*xf3 // CHECK: %[[WITNESS:.*]] = shape.cstr_broadcastable %[[SHAPE_0]], %[[SHAPE_RESHAPED]] // CHECK: %[[ASSUMING_RESULT:.*]] = shape.assuming %[[WITNESS]] -> (tensor) { // CHECK: %[[SCALAR_SHAPE:.*]] = shape.const_shape [] -// CHECK: %[[SHAPE_RESHAPED:.*]] = shape.shape_of %[[RESHAPED]] : tensor // CHECK: %[[BROADCASTED_SHAPE:.*]] = shape.broadcast %[[SCALAR_SHAPE]], %[[SHAPE_RESHAPED]] // CHECK: %[[SHAPE_TENSOR:.*]] = shape.to_extent_tensor %[[BROADCASTED_SHAPE]] : !shape.shape -> tensor<1xindex> // CHECK: %[[BROADCASTED_LHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG_0]], %[[SHAPE_TENSOR]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor, tensor<1xindex>) -> tensor @@ -306,10 +299,9 @@ func @addUnrankedScalar(%arg0: tensor<*xf32>, %arg1: tensor) -> tensor<*xf3 // CHECK: %[[SHAPE_1:.*]] = shape.shape_of %[[ARG_1]] : tensor // CHECK: %[[WITNESS:.*]] = shape.cstr_broadcastable %[[SHAPE_RESHAPED]], %[[SHAPE_1]] // CHECK: %[[ASSUMING_RESULT:.*]] = shape.assuming %[[WITNESS]] -> (tensor) { -// CHECK: %[[SHAPE_OF:.*]] = shape.shape_of %[[RESHAPED]] : tensor -// CHECK: %[[SHAPE_RESHAPED:.*]] = shape.to_extent_tensor %[[SHAPE_OF]] -// CHECK: %[[BROADCASTED_LHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[RESHAPED]], %[[SHAPE_RESHAPED]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor, tensor<1xindex>) -> tensor -// CHECK: %[[BROADCASTED_RHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG_1]], %[[SHAPE_RESHAPED]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor, tensor<1xindex>) -> tensor +// CHECK: %[[ASTENSOR:.*]] = shape.to_extent_tensor %[[SHAPE_RESHAPED]] +// CHECK: %[[BROADCASTED_LHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[RESHAPED]], %[[ASTENSOR]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor, tensor<1xindex>) -> tensor +// CHECK: %[[BROADCASTED_RHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG_1]], %[[ASTENSOR]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor, tensor<1xindex>) -> tensor // CHECK: %[[BROADCASTED_RESULT:.*]] = mhlo.add %[[BROADCASTED_LHS]], %[[BROADCASTED_RHS]] : tensor // CHECK: shape.assuming_yield %[[BROADCASTED_RESULT]] : tensor // CHECK: } From a3fc5b10c0bc105ecd4c8d217d050f8438d7e8d0 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Wed, 29 Jul 2020 07:36:32 -0700 Subject: [PATCH 1598/2522] Bump the references to github.com/google/ruy to commit 5bb02fbf90824c2eb6cd7418f766c593106a332b. PiperOrigin-RevId: 323784071 Change-Id: I8ed4d6e4447193c0eff03395e75fe70b9b572cc1 --- .../lite/micro/tools/make/third_party_downloads.inc | 4 ++-- tensorflow/lite/tools/make/download_dependencies.sh | 4 ++-- third_party/ruy/workspace.bzl | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/lite/micro/tools/make/third_party_downloads.inc b/tensorflow/lite/micro/tools/make/third_party_downloads.inc index 99ab34daab8..0a85995efd0 100644 --- a/tensorflow/lite/micro/tools/make/third_party_downloads.inc +++ b/tensorflow/lite/micro/tools/make/third_party_downloads.inc @@ -56,8 +56,8 @@ SIFIVE_FE310_LIB_MD5 := "06ee24c4956f8e21670ab3395861fe64" KISSFFT_URL="https://github.com/mborgerding/kissfft/archive/v130.zip" KISSFFT_MD5="438ba1fef5783cc5f5f201395cc477ca" -RUY_URL="https://github.com/google/ruy/archive/d492ac890d982d7a153a326922f362b10de8d2ad.zip" -RUY_MD5="3a5c19abc60c3d9a8045ddf6b114067f" +RUY_URL="https://github.com/google/ruy/archive/5bb02fbf90824c2eb6cd7418f766c593106a332b.zip" +RUY_MD5="c720b1743360259ac45809a321f8f26c" CIFAR10_DATASET_URL="https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz" CIFAR10_DATASET_MD5="c32a1d4ab5d03f1284b67883e8d87530" diff --git a/tensorflow/lite/tools/make/download_dependencies.sh b/tensorflow/lite/tools/make/download_dependencies.sh index f60b937bb96..27537823be2 100755 --- a/tensorflow/lite/tools/make/download_dependencies.sh +++ b/tensorflow/lite/tools/make/download_dependencies.sh @@ -37,8 +37,8 @@ EIGEN_URL="$(grep -o 'https.*gitlab.com/libeigen/eigen/-/archive/.*tar\.gz' "${B EIGEN_SHA="$(eval echo $(grep '# SHARED_EIGEN_SHA' "${BZL_FILE_PATH}" | grep -o '\".*\"'))" GEMMLOWP_URL="$(grep -o 'https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GEMMLOWP_SHA="$(eval echo $(grep '# SHARED_GEMMLOWP_SHA' "${BZL_FILE_PATH}" | grep -o '\".*\"'))" -RUY_URL="https://github.com/google/ruy/archive/d492ac890d982d7a153a326922f362b10de8d2ad.zip" -RUY_SHA="e1b38265ab36662c921be260c68dbe28349a539873baabd974a5140ea64f1fe0" +RUY_URL="https://github.com/google/ruy/archive/5bb02fbf90824c2eb6cd7418f766c593106a332b.zip" +RUY_SHA="d8f9dc52c0a52c8470e2e0b60bc16cba91853d812846c075f7ed8404990b003d" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" GOOGLETEST_SHA="58a6f4277ca2bc8565222b3bbd58a177609e9c488e8a72649359ba51450db7d8" ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)" diff --git a/third_party/ruy/workspace.bzl b/third_party/ruy/workspace.bzl index 35943b04e58..c2b8f0531de 100644 --- a/third_party/ruy/workspace.bzl +++ b/third_party/ruy/workspace.bzl @@ -5,11 +5,11 @@ load("//third_party:repo.bzl", "third_party_http_archive") def repo(): third_party_http_archive( name = "ruy", - sha256 = "e1b38265ab36662c921be260c68dbe28349a539873baabd974a5140ea64f1fe0", - strip_prefix = "ruy-d492ac890d982d7a153a326922f362b10de8d2ad", + sha256 = "d8f9dc52c0a52c8470e2e0b60bc16cba91853d812846c075f7ed8404990b003d", + strip_prefix = "ruy-5bb02fbf90824c2eb6cd7418f766c593106a332b", urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/ruy/archive/d492ac890d982d7a153a326922f362b10de8d2ad.zip", - "https://github.com/google/ruy/archive/d492ac890d982d7a153a326922f362b10de8d2ad.zip", + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/ruy/archive/5bb02fbf90824c2eb6cd7418f766c593106a332b.zip", + "https://github.com/google/ruy/archive/5bb02fbf90824c2eb6cd7418f766c593106a332b.zip", ], build_file = "//third_party/ruy:BUILD", ) From 967eb7ae2353a0e32341ac60d21ebf4e1f49a52f Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Wed, 29 Jul 2020 21:48:03 +0700 Subject: [PATCH 1599/2522] Add test NewAppendableFile --- .../filesystem/plugins/s3/s3_filesystem_test.cc | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc index f367d0b6a98..e24af071405 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc @@ -209,6 +209,21 @@ TEST_F(S3FilesystemTest, NewWritableFile) { EXPECT_EQ("content1,content2", content); } +TEST_F(S3FilesystemTest, NewAppendableFile) { + const std::string path = GetURIForPath("AppendableFile"); + WriteString(path, "test"); + ASSERT_TF_OK(status_); + + auto writer = GetWriter(); + tf_s3_filesystem::NewAppendableFile(filesystem_, path.c_str(), writer.get(), + status_); + EXPECT_TF_OK(status_); + tf_writable_file::Append(writer.get(), "content", strlen("content"), status_); + EXPECT_TF_OK(status_); + tf_writable_file::Close(writer.get(), status_); + EXPECT_TF_OK(status_); +} + } // namespace } // namespace tensorflow From 9f1aadb48fab3b8cc8c4f57f9851cbac632703b5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Jul 2020 07:40:48 -0700 Subject: [PATCH 1600/2522] Fix @test_util.run_deprecated_v1 in third_party/tensorflow/python/kernel_tests/basic_gpu_test.py PiperOrigin-RevId: 323784664 Change-Id: I286870945ddcee9073fc65ef54aa3fcf173f97bb --- .../python/kernel_tests/basic_gpu_test.py | 70 ++++++++++++++----- 1 file changed, 53 insertions(+), 17 deletions(-) diff --git a/tensorflow/python/kernel_tests/basic_gpu_test.py b/tensorflow/python/kernel_tests/basic_gpu_test.py index 6db3717f08a..df27e8afbba 100644 --- a/tensorflow/python/kernel_tests/basic_gpu_test.py +++ b/tensorflow/python/kernel_tests/basic_gpu_test.py @@ -29,7 +29,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gradient_checker_v2 +from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import variables @@ -156,8 +156,10 @@ class MathBuiltinUnaryTest(test.TestCase): class BroadcastSimpleTest(test.TestCase): def _GetGradientArgs(self, xs, ys): - return self.evaluate(broadcast_gradient_args(xs, ys)) + with self.cached_session(use_gpu=True) as sess: + return sess.run(broadcast_gradient_args(xs, ys)) + @test_util.run_deprecated_v1 def testBroadcast(self): r0, r1 = self._GetGradientArgs([2, 3, 5], [1]) self.assertAllEqual(r0, []) @@ -165,6 +167,48 @@ class BroadcastSimpleTest(test.TestCase): _GRAD_TOL = {dtypes.float32: 1e-3} + def _compareGradientX(self, + x, + y, + np_func, + tf_func, + numeric_gradient_type=None): + z = np_func(x, y) + zs = list(z.shape) + with self.cached_session(): + inx = ops.convert_to_tensor(x) + iny = ops.convert_to_tensor(y) + if x.dtype in (np.float32, np.float64): + out = 1.1 * tf_func(inx, iny) + else: + out = tf_func(inx, iny) + xs = list(x.shape) + jacob_t, jacob_n = gradient_checker.compute_gradient( + inx, xs, out, zs, x_init_value=x) + tol = self._GRAD_TOL[dtypes.as_dtype(x.dtype)] + self.assertAllClose(jacob_t, jacob_n, rtol=tol, atol=tol) + + def _compareGradientY(self, + x, + y, + np_func, + tf_func, + numeric_gradient_type=None): + z = np_func(x, y) + zs = list(z.shape) + with self.cached_session(): + inx = ops.convert_to_tensor(x) + iny = ops.convert_to_tensor(y) + if x.dtype in (np.float32, np.float64): + out = 1.1 * tf_func(inx, iny) + else: + out = tf_func(inx, iny) + ys = list(np.shape(y)) + jacob_t, jacob_n = gradient_checker.compute_gradient( + iny, ys, out, zs, x_init_value=y) + tol = self._GRAD_TOL[dtypes.as_dtype(x.dtype)] + self.assertAllClose(jacob_t, jacob_n, rtol=tol, atol=tol) + def _compareGpu(self, x, y, np_func, tf_func): np_ans = np_func(x, y) with self.cached_session(use_gpu=True): @@ -176,25 +220,17 @@ class BroadcastSimpleTest(test.TestCase): self.assertShapeEqual(np_ans, out) # TODO(zhifengc/ke): make gradient checker work on GPU. + @test_util.run_deprecated_v1 def testGradient(self): - x1 = (1 + np.linspace(0, 5, np.prod([1, 3, 2]))).astype(np.float32).reshape( + x = (1 + np.linspace(0, 5, np.prod([1, 3, 2]))).astype(np.float32).reshape( [1, 3, 2]) - x2 = (1 + np.linspace(0, 5, np.prod([1, 3, 2]))).astype(np.float32).reshape( + y = (1 + np.linspace(0, 5, np.prod([1, 3, 2]))).astype(np.float32).reshape( [1, 3, 2]) - def div_x1(x1): - return math_ops.truediv(x1, x2) * math_ops.cast(1.1, dtype=x1.dtype) - - def div_x2(x2): - return math_ops.truediv(x1, x2) * math_ops.cast(1.1, dtype=x2.dtype) - - gradient_checker_v2.compute_gradient( - div_x1, [x1], self._GRAD_TOL[dtypes.as_dtype(x1.dtype)]) - gradient_checker_v2.compute_gradient( - div_x2, [x2], self._GRAD_TOL[dtypes.as_dtype(x2.dtype)]) - - self._compareGpu(x1, x2, np.true_divide, math_ops.truediv) - self._compareGpu(x1, x2 + 0.1, np.floor_divide, math_ops.floordiv) + self._compareGradientX(x, y, np.true_divide, math_ops.truediv) + self._compareGradientY(x, y, np.true_divide, math_ops.truediv) + self._compareGpu(x, y, np.true_divide, math_ops.truediv) + self._compareGpu(x, y + 0.1, np.floor_divide, math_ops.floordiv) class GpuMultiSessionMemoryTest(test_util.TensorFlowTestCase): From 59d53b742551314b0a2acd369d9e1ac32053f929 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Wed, 29 Jul 2020 07:57:51 -0700 Subject: [PATCH 1601/2522] Let cpu_backend_gemm support all storage order combinations, unconditionally using ruy as the backend in combinations other than RowMajor*ColMajor->ColMajor, which were so far not supported. Ruy is different from other back-ends in that it supports all combinations as runtime parameters without a code size increase. PiperOrigin-RevId: 323786939 Change-Id: Ib81abb5ca621a01cd8453a4a08b27601ad75c7dc --- tensorflow/lite/kernels/cpu_backend_gemm.h | 32 +++++++++++++++---- .../lite/kernels/cpu_backend_gemm_params.h | 11 ------- .../lite/kernels/cpu_backend_gemm_test.cc | 13 ++++++-- 3 files changed, 36 insertions(+), 20 deletions(-) diff --git a/tensorflow/lite/kernels/cpu_backend_gemm.h b/tensorflow/lite/kernels/cpu_backend_gemm.h index a95c4d15a82..14ff571e7da 100644 --- a/tensorflow/lite/kernels/cpu_backend_gemm.h +++ b/tensorflow/lite/kernels/cpu_backend_gemm.h @@ -95,9 +95,26 @@ void Gemm(const MatrixParams& lhs_params, const LhsScalar* lhs_data, CpuBackendContext* context) { ruy::profiler::ScopeLabel label("cpu_backend_gemm::Gemm"); ValidateParams(lhs_params, rhs_params, dst_params, params); + // In some cases we want to unconditionally use ruy as the backend, overriding + // the `tflite_with_ruy` setting and the platform default. + bool must_use_ruy = false; if (context->use_caching()) { - // Dispatch to backend that supports caching of prepacked weights - // matrices. + // Only ruy supports caching of pre-packed matrices. Due to the large + // performance impact in the cases where it's typically used, this overrides + // the default. + must_use_ruy = true; + } + if (lhs_params.order != Order::kRowMajor || + rhs_params.order != Order::kColMajor || + dst_params.order != Order::kColMajor) { + // ruy supports all 2^3=8 combinations of storage orders with comparable + // performance. In ruy, it's only a runtime switch. In other backends + // (gemmlowp, Eigen), storage orders are template parameters, supporting + // all 8 combinations would be up to a 8-fold code size increase, so we + // prefer to force usage of ruy in these cases. + must_use_ruy = true; + } + if (must_use_ruy) { detail::GemmImplUsingRuy::Run(lhs_params, lhs_data, rhs_params, rhs_data, @@ -105,15 +122,18 @@ void Gemm(const MatrixParams& lhs_params, const LhsScalar* lhs_data, params, context); return; } - const bool do_custom_gemv = (dst_params.cols == 1); - if (do_custom_gemv) { - // GEMV case: try a custom fast GEMV path. + // If we did not choose to force usage of ruy above, then we may now consider + // using custom GEMV code for the matrix*vector cases. + const bool try_custom_gemv = (dst_params.cols == 1); + if (try_custom_gemv) { + // GEMV case: try a custom fast GEMV path. It will return true if it + // actually handled it. if (detail::CustomGemv(lhs_params, lhs_data, rhs_params, rhs_data, dst_params, dst_data, params, context)) { return; } } - ruy::profiler::ScopeLabel label2("cpu_backend_gemm::Gemm: general GEMM"); + // Generic case: dispatch to any backend as a general GEMM. GemmImpl::Run(lhs_params, lhs_data, rhs_params, rhs_data, dst_params, dst_data, params, context); diff --git a/tensorflow/lite/kernels/cpu_backend_gemm_params.h b/tensorflow/lite/kernels/cpu_backend_gemm_params.h index 0040f40cd50..ef06d97331e 100644 --- a/tensorflow/lite/kernels/cpu_backend_gemm_params.h +++ b/tensorflow/lite/kernels/cpu_backend_gemm_params.h @@ -236,17 +236,6 @@ void ValidateParams( (void)detail::ValidateTypes(); ValidateGemmParams(params); - // For now, Gemm only supports this particular combination of storage orders. - // Actually the generic ruy path already supports all combinations (with - // various performance penalties). On the other hand, gemmlowp and Eigen - // paths would require more source code and larger binary code to handle - // other combinations (because orders are template parameters in gemmlowp - // and Eigen). Since this is TFLite's own internal Gemm library, there is - // no point in supporting more than what TFlite currently uses, and that - // is for now this single combination. - TFLITE_DCHECK(lhs_params.order == Order::kRowMajor); - TFLITE_DCHECK(rhs_params.order == Order::kColMajor); - TFLITE_DCHECK(dst_params.order == Order::kColMajor); } } // namespace cpu_backend_gemm diff --git a/tensorflow/lite/kernels/cpu_backend_gemm_test.cc b/tensorflow/lite/kernels/cpu_backend_gemm_test.cc index d79d1357696..521e7bb03fd 100644 --- a/tensorflow/lite/kernels/cpu_backend_gemm_test.cc +++ b/tensorflow/lite/kernels/cpu_backend_gemm_test.cc @@ -389,8 +389,13 @@ void TestSomeGemm(int rows, int depth, int cols, } MakeDeterministicPseudoRandomVector(rows * cols, &dst_data); + auto random_order = [&]() { + return random_engine() % 2 ? cpu_backend_gemm::Order::kRowMajor + : cpu_backend_gemm::Order::kColMajor; + }; MatrixParams lhs_params; - lhs_params.order = cpu_backend_gemm::Order::kRowMajor; + lhs_params.order = + use_golden ? cpu_backend_gemm::Order::kRowMajor : random_order(); lhs_params.rows = rows; lhs_params.cols = depth; if (!std::is_floating_point::value) { @@ -401,7 +406,8 @@ void TestSomeGemm(int rows, int depth, int cols, } MatrixParams rhs_params; - rhs_params.order = cpu_backend_gemm::Order::kColMajor; + rhs_params.order = + use_golden ? cpu_backend_gemm::Order::kColMajor : random_order(); rhs_params.rows = depth; rhs_params.cols = cols; if (!std::is_floating_point::value) { @@ -412,7 +418,8 @@ void TestSomeGemm(int rows, int depth, int cols, } MatrixParams dst_params; - dst_params.order = cpu_backend_gemm::Order::kColMajor; + dst_params.order = + use_golden ? cpu_backend_gemm::Order::kColMajor : random_order(); dst_params.rows = rows; dst_params.cols = cols; if (!std::is_floating_point::value) { From ff2a8f17ae5c002d78a3564b2b00b762025fa9af Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Wed, 29 Jul 2020 22:05:19 +0700 Subject: [PATCH 1602/2522] Add test NewReadOnlyMemoryRegionFromFile --- .../filesystem/plugins/s3/s3_filesystem.h | 10 ++++++++ .../plugins/s3/s3_filesystem_test.cc | 25 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h index 05c79a333c1..52a444a0973 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h @@ -45,6 +45,12 @@ void Flush(const TF_WritableFile* file, TF_Status* status); void Close(const TF_WritableFile* file, TF_Status* status); } // namespace tf_writable_file +namespace tf_read_only_memory_region { +void Cleanup(TF_ReadOnlyMemoryRegion* region); +const void* Data(const TF_ReadOnlyMemoryRegion* region); +uint64_t Length(const TF_ReadOnlyMemoryRegion* region); +} // namespace tf_read_only_memory_region + namespace tf_s3_filesystem { typedef struct S3File { std::shared_ptr s3_client; @@ -70,6 +76,10 @@ void NewAppendableFile(const TF_Filesystem* filesystem, const char* path, TF_WritableFile* file, TF_Status* status); int64_t GetFileSize(const TF_Filesystem* filesystem, const char* path, TF_Status* status); +void NewReadOnlyMemoryRegionFromFile(const TF_Filesystem* filesystem, + const char* path, + TF_ReadOnlyMemoryRegion* region, + TF_Status* status); } // namespace tf_s3_filesystem #endif // TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_S3_S3_FILESYSTEM_H_ diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc index e24af071405..71ef7d5de5f 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc @@ -224,6 +224,31 @@ TEST_F(S3FilesystemTest, NewAppendableFile) { EXPECT_TF_OK(status_); } +TEST_F(S3FilesystemTest, NewReadOnlyMemoryRegionFromFile) { + const std::string path = GetURIForPath("MemoryFile"); + const std::string content = "content"; + WriteString(path, content); + ASSERT_TF_OK(status_); + + std::unique_ptr + region(new TF_ReadOnlyMemoryRegion, [](TF_ReadOnlyMemoryRegion* file) { + if (file != nullptr) { + if (file->plugin_memory_region != nullptr) + tf_read_only_memory_region::Cleanup(file); + delete file; + } + }); + region->plugin_memory_region = nullptr; + tf_s3_filesystem::NewReadOnlyMemoryRegionFromFile(filesystem_, path.c_str(), + region.get(), status_); + EXPECT_TF_OK(status_); + std::string result(reinterpret_cast( + tf_read_only_memory_region::Data(region.get())), + tf_read_only_memory_region::Length(region.get())); + EXPECT_EQ(content, result); +} + } // namespace } // namespace tensorflow From 3de8d771d23bc64a91c0a822c4ab952b3d4ed911 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Wed, 29 Jul 2020 22:24:05 +0700 Subject: [PATCH 1603/2522] Add test PathExists --- .../filesystem/plugins/s3/s3_filesystem.h | 2 ++ .../filesystem/plugins/s3/s3_filesystem_test.cc | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h index 52a444a0973..1e0b1060898 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h @@ -80,6 +80,8 @@ void NewReadOnlyMemoryRegionFromFile(const TF_Filesystem* filesystem, const char* path, TF_ReadOnlyMemoryRegion* region, TF_Status* status); +void PathExists(const TF_Filesystem* filesystem, const char* path, + TF_Status* status); } // namespace tf_s3_filesystem #endif // TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_S3_S3_FILESYSTEM_H_ diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc index 71ef7d5de5f..2542488a292 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc @@ -249,6 +249,17 @@ TEST_F(S3FilesystemTest, NewReadOnlyMemoryRegionFromFile) { EXPECT_EQ(content, result); } +TEST_F(S3FilesystemTest, PathExists) { + const std::string path = GetURIForPath("PathExists"); + tf_s3_filesystem::PathExists(filesystem_, path.c_str(), status_); + EXPECT_EQ(TF_NOT_FOUND, TF_GetCode(status_)) << TF_Message(status_); + TF_SetStatus(status_, TF_OK, ""); + WriteString(path, "test"); + ASSERT_TF_OK(status_); + tf_s3_filesystem::PathExists(filesystem_, path.c_str(), status_); + EXPECT_TF_OK(status_); +} + } // namespace } // namespace tensorflow From f7ab85c9bb9106f3f34a8f06f83eb04dfd197579 Mon Sep 17 00:00:00 2001 From: Thomas Joerg Date: Wed, 29 Jul 2020 08:23:45 -0700 Subject: [PATCH 1604/2522] Integrate LLVM at llvm/llvm-project@834133c950fc Updates LLVM usage to match [834133c950fc](https://github.com/llvm/llvm-project/commit/834133c950fc) PiperOrigin-RevId: 323791166 Change-Id: I562eecddd25a64e0e6ce20c689381457bb7291a6 --- tensorflow/workspace.bzl | 4 ++-- third_party/mlir/BUILD | 31 +------------------------------ third_party/mlir/test.BUILD | 17 ----------------- 3 files changed, 3 insertions(+), 49 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 04c6da2cad4..76d23dd81ab 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -711,8 +711,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "754deffd11c733d709c3ed66d3b9a6b54d081474" - LLVM_SHA256 = "c6e6f7f88f29de8a62eb0f5f70168259f9b5abacce02a1022f5944813b060b8f" + LLVM_COMMIT = "834133c950fce120d0378d09718d32a320cbcd72" + LLVM_SHA256 = "a9f9eb7aa73b79ad0c7c8b6bc4c15f8109f05dce40b64411c7a05f3a1c6c7853" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD index 7e42a4b40f2..94e8f4520a6 100644 --- a/third_party/mlir/BUILD +++ b/third_party/mlir/BUILD @@ -809,7 +809,7 @@ cc_library( ":Pass", ":Shape", ":ShapeTransformsPassIncGen", - ":Support", + ":StandardOps", ":Transforms", ], ) @@ -902,7 +902,6 @@ cc_library( ":Support", ":VectorInterfaces", ":VectorOpsIncGen", - ":VectorTransformPatternsIncGen", "@llvm-project//llvm:Support", ], ) @@ -3603,34 +3602,6 @@ gentbl( ], ) -filegroup( - name = "VectorTransformPatternsTdFiles", - srcs = [ - "include/mlir/Dialect/Vector/VectorTransformPatterns.td", - ":AffineOpsTdFiles", - ":LinalgOpsTdFiles", - ":LinalgStructuredOpsTdFiles", - ":OpBaseTdFiles", - ":StdOpsTdFiles", - ":VectorOpsTdFiles", - ], -) - -gentbl( - name = "VectorTransformPatternsIncGen", - tbl_outs = [ - ( - "-gen-rewriters", - "include/mlir/Dialect/Vector/VectorTransformPatterns.h.inc", - ), - ], - tblgen = ":mlir-tblgen", - td_file = "include/mlir/Dialect/Vector/VectorTransformPatterns.td", - td_srcs = [ - ":VectorTransformPatternsTdFiles", - ], -) - cc_library( name = "VectorToLLVM", srcs = glob([ diff --git a/third_party/mlir/test.BUILD b/third_party/mlir/test.BUILD index 7b670e3f50d..9ba0c29e981 100644 --- a/third_party/mlir/test.BUILD +++ b/third_party/mlir/test.BUILD @@ -16,21 +16,6 @@ cc_library( includes = ["."], ) -gentbl( - name = "TestVectorTransformPatternsIncGen", - tbl_outs = [ - ( - "-gen-rewriters", - "lib/DeclarativeTransforms/TestVectorTransformPatterns.h.inc", - ), - ], - tblgen = "@llvm-project//mlir:mlir-tblgen", - td_file = "lib/DeclarativeTransforms/TestVectorTransformPatterns.td", - td_srcs = [ - "@llvm-project//mlir:VectorTransformPatternsTdFiles", - ], -) - gentbl( name = "TestOpsIncGen", strip_include_prefix = "lib/Dialect/Test", @@ -115,7 +100,6 @@ cc_library( "lib/Dialect/Test/TestTypes.h", ], includes = [ - "lib/DeclarativeTransforms", "lib/Dialect/Test", ], deps = [ @@ -188,7 +172,6 @@ cc_library( includes = ["lib/Dialect/Test"], deps = [ ":TestDialect", - ":TestVectorTransformPatternsIncGen", "@llvm-project//llvm:Support", "@llvm-project//mlir:Affine", "@llvm-project//mlir:Analysis", From 7d5b34df972d7c9b73a866f128ed847d929ca69d Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Wed, 29 Jul 2020 22:40:04 +0700 Subject: [PATCH 1605/2522] Add test GetChildren --- .../filesystem/plugins/s3/s3_filesystem.cc | 1 + .../filesystem/plugins/s3/s3_filesystem.h | 4 +++ .../plugins/s3/s3_filesystem_test.cc | 29 +++++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 8420b6ec013..1ab6145ec88 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -1110,6 +1110,7 @@ int GetChildren(const TF_Filesystem* filesystem, const char* path, for (int i = 0; i < num_entries; i++) (*entries)[i] = strdup(result[i].c_str()); TF_SetStatus(status, TF_OK, ""); + return num_entries; } static char* TranslateName(const TF_Filesystem* filesystem, const char* uri) { diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h index 1e0b1060898..d63c1d29ed1 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h @@ -82,6 +82,10 @@ void NewReadOnlyMemoryRegionFromFile(const TF_Filesystem* filesystem, TF_Status* status); void PathExists(const TF_Filesystem* filesystem, const char* path, TF_Status* status); +void CreateDir(const TF_Filesystem* filesystem, const char* path, + TF_Status* status); +int GetChildren(const TF_Filesystem* filesystem, const char* path, + char*** entries, TF_Status* status); } // namespace tf_s3_filesystem #endif // TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_S3_S3_FILESYSTEM_H_ diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc index 2542488a292..f9fc307f272 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc @@ -260,6 +260,35 @@ TEST_F(S3FilesystemTest, PathExists) { EXPECT_TF_OK(status_); } +TEST_F(S3FilesystemTest, GetChildren) { + const std::string base = GetURIForPath("GetChildren"); + tf_s3_filesystem::CreateDir(filesystem_, base.c_str(), status_); + EXPECT_TF_OK(status_); + + const std::string file = io::JoinPath(base, "TestFile.csv"); + WriteString(file, "test"); + EXPECT_TF_OK(status_); + + const std::string subdir = io::JoinPath(base, "SubDir"); + tf_s3_filesystem::CreateDir(filesystem_, subdir.c_str(), status_); + EXPECT_TF_OK(status_); + const std::string subfile = io::JoinPath(subdir, "TestSubFile.csv"); + WriteString(subfile, "test"); + EXPECT_TF_OK(status_); + + char** entries; + auto num_entries = tf_s3_filesystem::GetChildren(filesystem_, base.c_str(), + &entries, status_); + EXPECT_TF_OK(status_); + + std::vector childrens; + for (int i = 0; i < num_entries; ++i) { + childrens.push_back(entries[i]); + } + std::sort(childrens.begin(), childrens.end()); + EXPECT_EQ(std::vector({"SubDir", "TestFile.csv"}), childrens); +} + } // namespace } // namespace tensorflow From e8fa2bd14011e50c97426b6d328cc6569b607dc3 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Wed, 29 Jul 2020 22:44:04 +0700 Subject: [PATCH 1606/2522] Add test DeleteFile --- .../c/experimental/filesystem/plugins/s3/s3_filesystem.h | 2 ++ .../filesystem/plugins/s3/s3_filesystem_test.cc | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h index d63c1d29ed1..f1c3e876195 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h @@ -86,6 +86,8 @@ void CreateDir(const TF_Filesystem* filesystem, const char* path, TF_Status* status); int GetChildren(const TF_Filesystem* filesystem, const char* path, char*** entries, TF_Status* status); +void DeleteFile(const TF_Filesystem* filesystem, const char* path, + TF_Status* status); } // namespace tf_s3_filesystem #endif // TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_S3_S3_FILESYSTEM_H_ diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc index f9fc307f272..536c910b41b 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc @@ -289,6 +289,14 @@ TEST_F(S3FilesystemTest, GetChildren) { EXPECT_EQ(std::vector({"SubDir", "TestFile.csv"}), childrens); } +TEST_F(S3FilesystemTest, DeleteFile) { + const std::string path = GetURIForPath("DeleteFile"); + WriteString(path, "test"); + ASSERT_TF_OK(status_); + tf_s3_filesystem::DeleteFile(filesystem_, path.c_str(), status_); + EXPECT_TF_OK(status_); +} + } // namespace } // namespace tensorflow From b646fc746788f8214b5f83233193f0bbc80d0b9b Mon Sep 17 00:00:00 2001 From: Edward Loper Date: Wed, 29 Jul 2020 08:43:41 -0700 Subject: [PATCH 1607/2522] Fix typo in FastTensorDType() helper in pywrap_tfe_src.cc (testing wrong variable against nullptr) PiperOrigin-RevId: 323794682 Change-Id: I415bdaa25aa76b2b0d18bb17a25247b69398dbef --- tensorflow/python/eager/pywrap_tfe_src.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index dcaaafeda5c..1221c1dbac9 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -1118,7 +1118,7 @@ static tensorflow::DataType FastTensorDtype(PyObject* tensor) { } PyObject* enum_field = PyObject_GetAttrString(dtype_field, "_type_enum"); Py_DECREF(dtype_field); - if (dtype_field == nullptr) { + if (enum_field == nullptr) { return tensorflow::DT_INVALID; } tensorflow::int64 id = MakeInt(enum_field); From 25942a2c2b52eda3008da771833528409abbcd0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tar=C3=A9=20Gaskin?= Date: Wed, 29 Jul 2020 16:27:57 +0000 Subject: [PATCH 1608/2522] recommonded updates --- tensorflow/compiler/tf2xla/functionalize_cond.cc | 12 ++++++------ .../compiler/tf2xla/rearrange_function_argument.cc | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/tf2xla/functionalize_cond.cc b/tensorflow/compiler/tf2xla/functionalize_cond.cc index 58bbb704329..8f6b4eff83b 100644 --- a/tensorflow/compiler/tf2xla/functionalize_cond.cc +++ b/tensorflow/compiler/tf2xla/functionalize_cond.cc @@ -236,16 +236,16 @@ StateMap::CondId StateMap::GetCondId(const StateMap::CondState& state) { } void StateMap::ResetCondId(const Node* node, StateMap::CondId id) { - const int64 node_to_condid_map_size = node_to_condid_map_.size(); - if (node->id() < node_to_condid_map_size) + const int64 node_to_map_size = node_to_condid_map_.size(); + if (node->id() < node_to_map_size) node_to_condid_map_[node->id()] = id; else added_node_condid_mapping_[node->id()] = id; } StateMap::AncestorId StateMap::LookupAncestorId(const Node* node) const { - const int64 node_to_ancestorid_map_size = node_to_ancestorid_map_.size(); - if (node->id() < node_to_ancestorid_map_size) + const int64 node_to_map_size = node_to_ancestorid_map_.size(); + if (node->id() < node_to_map_size) return node_to_ancestorid_map_[node->id()]; return added_node_ancestorid_mapping_.at(node->id()); } @@ -257,8 +257,8 @@ StateMap::AncestorId StateMap::GetAncestorId( } void StateMap::ResetAncestorId(const Node* node, StateMap::AncestorId id) { - const int64 node_to_ancestorid_map_size = node_to_ancestorid_map_.size(); - if (node->id() < node_to_ancestorid_map_size) + const int64 node_to_map_size = node_to_ancestorid_map_.size(); + if (node->id() < node_to_map_size) node_to_ancestorid_map_[node->id()] = id; else added_node_ancestorid_mapping_[node->id()] = id; diff --git a/tensorflow/compiler/tf2xla/rearrange_function_argument.cc b/tensorflow/compiler/tf2xla/rearrange_function_argument.cc index 635585639a8..ed7927a9999 100644 --- a/tensorflow/compiler/tf2xla/rearrange_function_argument.cc +++ b/tensorflow/compiler/tf2xla/rearrange_function_argument.cc @@ -146,7 +146,7 @@ Status ReorderOutputEdges(Graph* g, Node* n, int input_count, int dst_input = e->dst_input(); g->RemoveEdge(e); - if (new_src_output < static_cast(input_count - resource_input_count)) { + if (new_src_output < input_count - resource_input_count) { g->AddEdge(n, new_src_output, dst, dst_input); } else { const Edge* input_edge; From 222fdf5a6871e09c8b66503a85cc5ffd2f8368c6 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Wed, 29 Jul 2020 12:30:44 -0400 Subject: [PATCH 1609/2522] Update tensorflow/core/profiler/convert/op_stats_to_overview_page.cc Co-authored-by: Mihai Maruseac --- tensorflow/core/profiler/convert/op_stats_to_overview_page.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc index 196262e7d96..c87059e6ab9 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc @@ -290,7 +290,7 @@ std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db) { auto num_functions_shown = std::min( static_cast(3), candidates.size()); - for (int64 i = 0, end = num_functions_shown; i < end; i++) { + for (decltype(candidates)::size_type i = 0; i < num_functions_shown; i++) { if (i > 0) absl::StrAppend(&expensive_functions, ", "); absl::StrAppend(&expensive_functions, "\"", candidates[i].function_name, "\""); From 6c4027ffa5cbe9b96308a150dfc8ab4cf39a3a10 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Wed, 29 Jul 2020 12:32:30 -0400 Subject: [PATCH 1610/2522] Update op_stats_to_overview_page.cc --- tensorflow/core/profiler/convert/op_stats_to_overview_page.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc index c87059e6ab9..7e67714de8d 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc @@ -295,8 +295,7 @@ std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db) { absl::StrAppend(&expensive_functions, "\"", candidates[i].function_name, "\""); } - const int64 candidates_size = candidates.size(); - if (candidates_size > num_functions_shown) + if (candidates.size() > num_functions_shown) absl::StrAppend(&expensive_functions, " and more"); return absl::StrCat("Expensive tf-functions detected (", expensive_functions, ") due to either retracing or eager execution."); From e8d82106dc7af10a9ba37e79edb69b27403f97bc Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Wed, 29 Jul 2020 12:33:58 -0400 Subject: [PATCH 1611/2522] Update tensorflow/compiler/xla/client/xla_builder.cc Co-authored-by: Mihai Maruseac --- tensorflow/compiler/xla/client/xla_builder.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index 84843ad821f..167a835a769 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -766,8 +766,8 @@ XlaOp XlaBuilder::BroadcastInDim( TF_ASSIGN_OR_RETURN(auto output_shape, ShapeUtil::MakeValidatedShape( operand_shape->element_type(), out_dim_size)); - tensorflow::int64 broadcast_dimensions_size = broadcast_dimensions.size(); - if (operand_shape->rank() != broadcast_dimensions_size) { + tensorflow::int64 broadcast_rank = broadcast_dimensions.size(); + if (operand_shape->rank() != broadcast_rank) { return InvalidArgument( "Size of broadcast_dimensions has to match operand's rank; operand " "rank: %lld, size of broadcast_dimensions %u.", From 1712c1053f5d9fbb4abfc49d9b90ab835171c912 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Wed, 29 Jul 2020 12:34:08 -0400 Subject: [PATCH 1612/2522] Update tensorflow/compiler/xla/client/xla_builder.cc Co-authored-by: Mihai Maruseac --- tensorflow/compiler/xla/client/xla_builder.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index 167a835a769..4fbdc7b8dc9 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -788,7 +788,7 @@ XlaOp XlaBuilder::BroadcastInDim( *operand_shape, output_shape, broadcast_dimensions) .status()); std::vector in_dim_size(out_dim_size.begin(), out_dim_size.end()); - for (int i = 0, end = broadcast_dimensions.size(); i < end; i++) { + for (int i = 0; i < broadcast_rank; i++) { in_dim_size[broadcast_dimensions[i]] = operand_shape->dimensions(i); } const auto& in_dim_shape = From 41b7aff167e8cbd1ac3a2093e8df4079d2edea30 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Wed, 29 Jul 2020 12:34:17 -0400 Subject: [PATCH 1613/2522] Update tensorflow/compiler/xla/client/xla_builder.cc Co-authored-by: Mihai Maruseac --- tensorflow/compiler/xla/client/xla_builder.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index 4fbdc7b8dc9..1c79555aff3 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -773,7 +773,7 @@ XlaOp XlaBuilder::BroadcastInDim( "rank: %lld, size of broadcast_dimensions %u.", operand_shape->rank(), broadcast_dimensions.size()); } - for (int i = 0, end = broadcast_dimensions.size(); i < end; i++) { + for (int i = 0; i < broadcast_rank; i++) { const tensorflow::int64 num_dims = out_dim_size.size(); if (broadcast_dimensions[i] < 0 || broadcast_dimensions[i] > num_dims) { From a9d4eaace064c3fafa1ed11870479729a7e2006f Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Wed, 29 Jul 2020 23:35:39 +0700 Subject: [PATCH 1614/2522] Expose LoadLibrary and GetSymbolFromLibrary --- tensorflow/c/env.cc | 19 +++++++++++++++++++ tensorflow/c/env.h | 20 ++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/tensorflow/c/env.cc b/tensorflow/c/env.cc index ce715c43acb..a8e9adca83a 100644 --- a/tensorflow/c/env.cc +++ b/tensorflow/c/env.cc @@ -186,3 +186,22 @@ void TF_JoinThread(TF_Thread* thread) { // ::tensorflow::Thread joins on destruction delete reinterpret_cast<::tensorflow::Thread*>(thread); } + +void* TF_LoadLibraryFromEnv(const char* library_filename, TF_Status* status) { + void* handle = nullptr; + TF_SetStatus(status, TF_OK, ""); + ::tensorflow::Set_TF_Status_from_Status( + status, + ::tensorflow::Env::Default()->LoadLibrary(library_filename, &handle)); + return handle; +} + +void* TF_GetSymbolFromLibrary(void* handle, const char* symbol_name, + TF_Status* status) { + void* symbol = nullptr; + TF_SetStatus(status, TF_OK, ""); + ::tensorflow::Set_TF_Status_from_Status( + status, ::tensorflow::Env::Default()->GetSymbolFromLibrary( + handle, symbol_name, &symbol)); + return symbol; +} diff --git a/tensorflow/c/env.h b/tensorflow/c/env.h index 7dc7ac32f08..048dca9eb43 100644 --- a/tensorflow/c/env.h +++ b/tensorflow/c/env.h @@ -184,6 +184,26 @@ TF_CAPI_EXPORT extern TF_Thread* TF_StartThread(const TF_ThreadOptions* options, // Waits for the given thread to finish execution, then deletes it. TF_CAPI_EXPORT extern void TF_JoinThread(TF_Thread* thread); +// \brief Load a dynamic library. +// +// Pass "library_filename" to a platform-specific mechanism for dynamically +// loading a library. The rules for determining the exact location of the +// library are platform-specific and are not documented here. +// +// On success, place OK in status and return the newly created library handle. +// Otherwise returns nullptr and set error status. +TF_CAPI_EXPORT extern void* TF_LoadLibraryFromEnv(const char* library_filename, + TF_Status* status); + +// \brief Get a pointer to a symbol from a dynamic library. +// +// "handle" should be a pointer returned from a previous call to +// TF_LoadLibraryFromEnv. On success, place OK in status and return a pointer to +// the located symbol. Otherwise returns nullptr and set error status. +TF_CAPI_EXPORT extern void* TF_GetSymbolFromLibrary(void* handle, + const char* symbol_name, + TF_Status* status); + #ifdef __cplusplus } #endif From 66b1247f10bf37a33e5c495eb1523e0895a04ae9 Mon Sep 17 00:00:00 2001 From: Cesar Crusius Date: Wed, 29 Jul 2020 09:49:19 -0700 Subject: [PATCH 1615/2522] Remove run_deprecated_v1 decorators from signature_def_utilts_test. The tests that had that decorator all depended on build_tensor_info and build_tensor_info_from_op, which in turn depend on tensor.name, which is meaningless in eager mode. Migrating the tests to TF2 would defeat their purpose completely, as they exercise V1 signature building utilities. "with ops.Graph().as_default()"ed them. PiperOrigin-RevId: 323807112 Change-Id: Iab1f20a4561acaa235d8ffd3fea436ac7cfddb91 --- .../saved_model/signature_def_utils_test.py | 198 ++++++++++-------- tensorflow/python/saved_model/utils_impl.py | 6 + 2 files changed, 121 insertions(+), 83 deletions(-) diff --git a/tensorflow/python/saved_model/signature_def_utils_test.py b/tensorflow/python/saved_model/signature_def_utils_test.py index 9a18f185d0b..98ae4b2f891 100644 --- a/tensorflow/python/saved_model/signature_def_utils_test.py +++ b/tensorflow/python/saved_model/signature_def_utils_test.py @@ -22,7 +22,7 @@ from tensorflow.core.framework import types_pb2 from tensorflow.core.protobuf import meta_graph_pb2 from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes -from tensorflow.python.framework import test_util +from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.platform import test @@ -60,17 +60,20 @@ def _make_signature(inputs, outputs, name=None): class SignatureDefUtilsTest(test.TestCase): - @test_util.run_deprecated_v1 def testBuildSignatureDef(self): - x = array_ops.placeholder(dtypes.float32, 1, name="x") - x_tensor_info = utils.build_tensor_info(x) - inputs = {} - inputs["foo-input"] = x_tensor_info + # Force the test to run in graph mode. + # This tests a deprecated v1 API that uses functionality that does not work + # with eager tensors (namely build_tensor_info). + with ops.Graph().as_default(): + x = array_ops.placeholder(dtypes.float32, 1, name="x") + x_tensor_info = utils.build_tensor_info(x) + inputs = {} + inputs["foo-input"] = x_tensor_info - y = array_ops.placeholder(dtypes.float32, name="y") - y_tensor_info = utils.build_tensor_info(y) - outputs = {} - outputs["foo-output"] = y_tensor_info + y = array_ops.placeholder(dtypes.float32, name="y") + y_tensor_info = utils.build_tensor_info(y) + outputs = {} + outputs["foo-output"] = y_tensor_info signature_def = signature_def_utils_impl.build_signature_def( inputs, outputs, "foo-method-name") @@ -91,12 +94,15 @@ class SignatureDefUtilsTest(test.TestCase): self.assertEqual(types_pb2.DT_FLOAT, y_tensor_info_actual.dtype) self.assertEqual(0, len(y_tensor_info_actual.tensor_shape.dim)) - @test_util.run_deprecated_v1 def testRegressionSignatureDef(self): - input1 = constant_op.constant("a", name="input-1") - output1 = constant_op.constant(2.2, name="output-1") - signature_def = signature_def_utils_impl.regression_signature_def( - input1, output1) + # Force the test to run in graph mode. + # This tests a deprecated v1 API that uses functionality that does not work + # with eager tensors (namely build_tensor_info). + with ops.Graph().as_default(): + input1 = constant_op.constant("a", name="input-1") + output1 = constant_op.constant(2.2, name="output-1") + signature_def = signature_def_utils_impl.regression_signature_def( + input1, output1) self.assertEqual(signature_constants.REGRESS_METHOD_NAME, signature_def.method_name) @@ -117,13 +123,16 @@ class SignatureDefUtilsTest(test.TestCase): self.assertEqual(types_pb2.DT_FLOAT, y_tensor_info_actual.dtype) self.assertEqual(0, len(y_tensor_info_actual.tensor_shape.dim)) - @test_util.run_deprecated_v1 def testClassificationSignatureDef(self): - input1 = constant_op.constant("a", name="input-1") - output1 = constant_op.constant("b", name="output-1") - output2 = constant_op.constant(3.3, name="output-2") - signature_def = signature_def_utils_impl.classification_signature_def( - input1, output1, output2) + # Force the test to run in graph mode. + # This tests a deprecated v1 API that uses functionality that does not work + # with eager tensors (namely build_tensor_info). + with ops.Graph().as_default(): + input1 = constant_op.constant("a", name="input-1") + output1 = constant_op.constant("b", name="output-1") + output2 = constant_op.constant(3.3, name="output-2") + signature_def = signature_def_utils_impl.classification_signature_def( + input1, output1, output2) self.assertEqual(signature_constants.CLASSIFY_METHOD_NAME, signature_def.method_name) @@ -149,17 +158,23 @@ class SignatureDefUtilsTest(test.TestCase): self.assertEqual(types_pb2.DT_FLOAT, scores_tensor_info_actual.dtype) self.assertEqual(0, len(scores_tensor_info_actual.tensor_shape.dim)) - @test_util.run_deprecated_v1 def testPredictionSignatureDef(self): - input1 = constant_op.constant("a", name="input-1") - input2 = constant_op.constant("b", name="input-2") - output1 = constant_op.constant("c", name="output-1") - output2 = constant_op.constant("d", name="output-2") - signature_def = signature_def_utils_impl.predict_signature_def({ - "input-1": input1, - "input-2": input2 - }, {"output-1": output1, - "output-2": output2}) + # Force the test to run in graph mode. + # This tests a deprecated v1 API that uses functionality that does not work + # with eager tensors (namely build_tensor_info). + with ops.Graph().as_default(): + input1 = constant_op.constant("a", name="input-1") + input2 = constant_op.constant("b", name="input-2") + output1 = constant_op.constant("c", name="output-1") + output2 = constant_op.constant("d", name="output-2") + signature_def = signature_def_utils_impl.predict_signature_def( + { + "input-1": input1, + "input-2": input2 + }, { + "output-1": output1, + "output-2": output2 + }) self.assertEqual(signature_constants.PREDICT_METHOD_NAME, signature_def.method_name) @@ -186,34 +201,38 @@ class SignatureDefUtilsTest(test.TestCase): self.assertEqual(types_pb2.DT_STRING, output2_tensor_info_actual.dtype) self.assertEqual(0, len(output2_tensor_info_actual.tensor_shape.dim)) - @test_util.run_deprecated_v1 def testTrainSignatureDef(self): self._testSupervisedSignatureDef( signature_def_utils_impl.supervised_train_signature_def, signature_constants.SUPERVISED_TRAIN_METHOD_NAME) - @test_util.run_deprecated_v1 def testEvalSignatureDef(self): self._testSupervisedSignatureDef( signature_def_utils_impl.supervised_eval_signature_def, signature_constants.SUPERVISED_EVAL_METHOD_NAME) def _testSupervisedSignatureDef(self, fn_to_test, method_name): - inputs = { - "input-1": constant_op.constant("a", name="input-1"), - "input-2": constant_op.constant("b", name="input-2"), - } - loss = {"loss-1": constant_op.constant(0.45, name="loss-1")} - predictions = { - "classes": constant_op.constant([100], name="classes"), - } - metrics_val = constant_op.constant(100.0, name="metrics_val") - metrics = { - "metrics/value": metrics_val, - "metrics/update_op": array_ops.identity(metrics_val, name="metrics_op"), - } + # Force the test to run in graph mode. + # This tests a deprecated v1 API that uses functionality that does not work + # with eager tensors (namely build_tensor_info). + with ops.Graph().as_default(): + inputs = { + "input-1": constant_op.constant("a", name="input-1"), + "input-2": constant_op.constant("b", name="input-2"), + } + loss = {"loss-1": constant_op.constant(0.45, name="loss-1")} + predictions = { + "classes": constant_op.constant([100], name="classes"), + } + metrics_val = constant_op.constant(100.0, name="metrics_val") + metrics = { + "metrics/value": + metrics_val, + "metrics/update_op": + array_ops.identity(metrics_val, name="metrics_op"), + } - signature_def = fn_to_test(inputs, loss, predictions, metrics) + signature_def = fn_to_test(inputs, loss, predictions, metrics) self.assertEqual(method_name, signature_def.method_name) @@ -246,44 +265,50 @@ class SignatureDefUtilsTest(test.TestCase): self.assertEqual( types_pb2.DT_FLOAT, signature_def.outputs["metrics/value"].dtype) - @test_util.run_deprecated_v1 def testTrainSignatureDefMissingInputs(self): self._testSupervisedSignatureDefMissingInputs( signature_def_utils_impl.supervised_train_signature_def, signature_constants.SUPERVISED_TRAIN_METHOD_NAME) - @test_util.run_deprecated_v1 def testEvalSignatureDefMissingInputs(self): self._testSupervisedSignatureDefMissingInputs( signature_def_utils_impl.supervised_eval_signature_def, signature_constants.SUPERVISED_EVAL_METHOD_NAME) def _testSupervisedSignatureDefMissingInputs(self, fn_to_test, method_name): - inputs = { - "input-1": constant_op.constant("a", name="input-1"), - "input-2": constant_op.constant("b", name="input-2"), - } - loss = {"loss-1": constant_op.constant(0.45, name="loss-1")} - predictions = { - "classes": constant_op.constant([100], name="classes"), - } - metrics_val = constant_op.constant(100, name="metrics_val") - metrics = { - "metrics/value": metrics_val, - "metrics/update_op": array_ops.identity(metrics_val, name="metrics_op"), - } + # Force the test to run in graph mode. + # This tests a deprecated v1 API that uses functionality that does not work + # with eager tensors (namely build_tensor_info). + with ops.Graph().as_default(): + inputs = { + "input-1": constant_op.constant("a", name="input-1"), + "input-2": constant_op.constant("b", name="input-2"), + } + loss = {"loss-1": constant_op.constant(0.45, name="loss-1")} + predictions = { + "classes": constant_op.constant([100], name="classes"), + } + metrics_val = constant_op.constant(100, name="metrics_val") + metrics = { + "metrics/value": + metrics_val, + "metrics/update_op": + array_ops.identity(metrics_val, name="metrics_op"), + } - with self.assertRaises(ValueError): - signature_def = fn_to_test( - {}, loss=loss, predictions=predictions, metrics=metrics) + with self.assertRaises(ValueError): + signature_def = fn_to_test({}, + loss=loss, + predictions=predictions, + metrics=metrics) - signature_def = fn_to_test(inputs, loss=loss) - self.assertEqual(method_name, signature_def.method_name) - self.assertEqual(1, len(signature_def.outputs)) + signature_def = fn_to_test(inputs, loss=loss) + self.assertEqual(method_name, signature_def.method_name) + self.assertEqual(1, len(signature_def.outputs)) - signature_def = fn_to_test(inputs, metrics=metrics, loss=loss) - self.assertEqual(method_name, signature_def.method_name) - self.assertEqual(3, len(signature_def.outputs)) + signature_def = fn_to_test(inputs, metrics=metrics, loss=loss) + self.assertEqual(method_name, signature_def.method_name) + self.assertEqual(3, len(signature_def.outputs)) def _assertValidSignature(self, inputs, outputs, method_name): signature_def = signature_def_utils_impl.build_signature_def( @@ -423,23 +448,30 @@ class SignatureDefUtilsTest(test.TestCase): {}, signature_constants.PREDICT_METHOD_NAME) - @test_util.run_v1_only("b/120545219") def testOpSignatureDef(self): - key = "adding_1_and_2_key" - add_op = math_ops.add(1, 2, name="adding_1_and_2") - signature_def = signature_def_utils_impl.op_signature_def(add_op, key) + # Force the test to run in graph mode. + # This tests a deprecated v1 API that uses functionality that does not work + # with eager tensors (namely build_tensor_info_from_op). + with ops.Graph().as_default(): + key = "adding_1_and_2_key" + add_op = math_ops.add(1, 2, name="adding_1_and_2") + signature_def = signature_def_utils_impl.op_signature_def(add_op, key) + self.assertIn(key, signature_def.outputs) self.assertEqual(add_op.name, signature_def.outputs[key].name) - @test_util.run_v1_only("b/120545219") def testLoadOpFromSignatureDef(self): - key = "adding_1_and_2_key" - add_op = math_ops.add(1, 2, name="adding_1_and_2") - signature_def = signature_def_utils_impl.op_signature_def(add_op, key) - - self.assertEqual( - add_op, - signature_def_utils_impl.load_op_from_signature_def(signature_def, key)) + # Force the test to run in graph mode. + # This tests a deprecated v1 API that uses functionality that does not work + # with eager tensors (namely build_tensor_info_from_op). + with ops.Graph().as_default(): + key = "adding_1_and_2_key" + add_op = math_ops.add(1, 2, name="adding_1_and_2") + signature_def = signature_def_utils_impl.op_signature_def(add_op, key) + self.assertEqual( + add_op, + signature_def_utils_impl.load_op_from_signature_def( + signature_def, key)) if __name__ == "__main__": diff --git a/tensorflow/python/saved_model/utils_impl.py b/tensorflow/python/saved_model/utils_impl.py index 0f635b6bf85..899dd61d172 100644 --- a/tensorflow/python/saved_model/utils_impl.py +++ b/tensorflow/python/saved_model/utils_impl.py @@ -126,7 +126,13 @@ def build_tensor_info_from_op(op): Returns: A TensorInfo protocol buffer constructed based on the supplied argument. + + Raises: + RuntimeError: If eager execution is enabled. """ + if context.executing_eagerly(): + raise RuntimeError( + "build_tensor_info_from_op is not supported in Eager mode.") return meta_graph_pb2.TensorInfo( dtype=types_pb2.DT_INVALID, tensor_shape=tensor_shape.unknown_shape().as_proto(), From 796b9673ae21fb5a4df65967e9a699c8ea2016ce Mon Sep 17 00:00:00 2001 From: Allen Wang Date: Wed, 29 Jul 2020 10:15:16 -0700 Subject: [PATCH 1616/2522] Add in traces for GCS buffer loading. PiperOrigin-RevId: 323812874 Change-Id: I69715431cde1d856a35be7ec2fa331f8ba848d62 --- tensorflow/core/platform/cloud/BUILD | 2 ++ tensorflow/core/platform/cloud/gcs_file_system.cc | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/tensorflow/core/platform/cloud/BUILD b/tensorflow/core/platform/cloud/BUILD index 2440549a353..38ced40d8fd 100644 --- a/tensorflow/core/platform/cloud/BUILD +++ b/tensorflow/core/platform/cloud/BUILD @@ -103,6 +103,8 @@ cc_library( "//tensorflow/core/platform:retrying_utils", "//tensorflow/core/platform:str_util", "//tensorflow/core/platform:stringprintf", + "//tensorflow/core/profiler/lib:traceme", + "@com_google_absl//absl/strings:str_format", "@jsoncpp_git//:jsoncpp", ], alwayslink = 1, diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index 43ece688034..62a6d9f4531 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -47,6 +47,7 @@ limitations under the License. #include "tensorflow/core/platform/str_util.h" #include "tensorflow/core/platform/stringprintf.h" #include "tensorflow/core/platform/thread_annotations.h" +#include "tensorflow/core/profiler/lib/traceme.h" #ifdef _WIN32 #ifdef DeleteFile @@ -1070,6 +1071,9 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& fname, size_t offset, string bucket, object; TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object)); + profiler::TraceMe activity( + [fname]() { return absl::StrCat("LoadBufferFromGCS ", fname); }); + std::unique_ptr request; TF_RETURN_WITH_CONTEXT_IF_ERROR(CreateHttpRequest(&request), "when reading gs://", bucket, "/", object); @@ -1091,6 +1095,9 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& fname, size_t offset, *bytes_transferred = bytes_read; VLOG(1) << "Successful read of gs://" << bucket << "/" << object << " @ " << offset << " of size: " << bytes_read; + activity.AppendMetadata([bytes_read]() { + return profiler::TraceMeEncode({{"block_size", bytes_read}}); + }); if (stats_ != nullptr) { stats_->RecordBlockRetrieved(fname, offset, bytes_read); From f5fb417ebc18485b7e2493e766d658da539f007c Mon Sep 17 00:00:00 2001 From: Lucy Fox Date: Wed, 29 Jul 2020 10:20:09 -0700 Subject: [PATCH 1617/2522] Legalize tf.LegacyCall op to standard Call op. PiperOrigin-RevId: 323813962 Change-Id: If0f5ffd7a6bc6fe979ed494141bd465ad825d790 --- .../compiler/mlir/xla/tests/legalize-tf.mlir | 28 +++++++++++++++++++ .../xla/transforms/legalize_tf_patterns.td | 8 +++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir index b09ccf025b0..a2c4159db1a 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir @@ -3049,6 +3049,34 @@ func @linspace_invalid_num(%arg0: tensor, %arg1: tensor) -> tensor } +//===----------------------------------------------------------------------===// +// LegacyCall op legalizations. +//===----------------------------------------------------------------------===// + +func @identity_func(%arg0: tensor<10x2xf32>) -> tensor<10x2xf32> { + return %arg0: tensor<10x2xf32> +} + +// CHECK-LABEL: testSimpleLegacyCallOp +func @testSimpleLegacyCallOp(%arg0: tensor<10x2xf32>) -> tensor<10x2xf32> { + // CHECK: %[[RESULT:.*]] = call @identity_func(%arg0) : (tensor<10x2xf32>) -> tensor<10x2xf32> + %0 = "tf.LegacyCall"(%arg0) {f = @identity_func} : (tensor<10x2xf32>) -> tensor<10x2xf32> + // CHECK: return %[[RESULT]] + return %0: tensor<10x2xf32> +} + +func @select_first(%arg0: tensor<10x2xf32>, %arg1: tensor<10x2xf32>) -> tensor<10x2xf32> { + return %arg0: tensor<10x2xf32> +} + +// CHECK-LABEL: testMultiInputLegacyCallOp +func @testMultiInputLegacyCallOp(%arg0: tensor<10x2xf32>, %arg1: tensor<10x2xf32>) -> tensor<10x2xf32> { + // CHECK: %[[RESULT:.*]] = call @select_first(%arg0, %arg1) : (tensor<10x2xf32>, tensor<10x2xf32>) -> tensor<10x2xf32> + %0 = "tf.LegacyCall"(%arg0, %arg1) {_disable_call_shape_inference = true, _tpu_replicate = "cluster", device = "", f = @select_first} : (tensor<10x2xf32>, tensor<10x2xf32>) -> tensor<10x2xf32> + // CHECK: return %[[RESULT]] + return %0: tensor<10x2xf32> +} + //===----------------------------------------------------------------------===// // Conv op legalizations. //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td index 05e061337c7..897549648cd 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td @@ -489,7 +489,7 @@ def : Pat<(TF_SliceOp:$op HLO_Tensor:$input, HLO_Tensor:$starting_indices, $slice_sizes)]>; //===----------------------------------------------------------------------===// -// PartitionedCall op patterns. +// PartitionedCall and LegacyCall op patterns. //===----------------------------------------------------------------------===// def ArgTypesMatchCallee : Constraint< @@ -502,6 +502,12 @@ foreach callOp = [TF_PartitionedCallOp, TF_StatefulPartitionedCallOp] in { [(ArgTypesMatchCallee $op, $args, $f)]>; } +// The extra attr on this op is _disable_call_shape_inference, which we ignore +// in the bridge. +def : Pat<(TF_LegacyCallOp:$op $args, FlatSymbolRefAttr:$f, $attr), + (CallOp $f, $args), + [(ArgTypesMatchCallee $op, $args, $f)]>; + //===----------------------------------------------------------------------===// // Reverse op patterns. //===----------------------------------------------------------------------===// From e56d6d5619062163cefe815a835d7b3a8baa8867 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Thu, 30 Jul 2020 00:44:22 +0700 Subject: [PATCH 1618/2522] Add test CreateDir --- .../filesystem/plugins/s3/s3_filesystem.cc | 3 ++- .../filesystem/plugins/s3/s3_filesystem.h | 2 ++ .../filesystem/plugins/s3/s3_filesystem_test.cc | 17 +++++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 1ab6145ec88..c7f2ef8b03b 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -646,7 +646,8 @@ void Stat(const TF_Filesystem* filesystem, const char* path, head_object_outcome.GetResult().GetLastModified().Millis() * 1e6; found = true; } else { - return TF_SetStatusFromAWSError(head_object_outcome.GetError(), status); + TF_SetStatusFromAWSError(head_object_outcome.GetError(), status); + if (TF_GetCode(status) == TF_FAILED_PRECONDITION) return; } auto prefix = object; diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h index f1c3e876195..23a07fc6405 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h @@ -88,6 +88,8 @@ int GetChildren(const TF_Filesystem* filesystem, const char* path, char*** entries, TF_Status* status); void DeleteFile(const TF_Filesystem* filesystem, const char* path, TF_Status* status); +void Stat(const TF_Filesystem* filesystem, const char* path, + TF_FileStatistics* stats, TF_Status* status); } // namespace tf_s3_filesystem #endif // TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_S3_S3_FILESYSTEM_H_ diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc index 536c910b41b..d66fb552b79 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc @@ -297,6 +297,23 @@ TEST_F(S3FilesystemTest, DeleteFile) { EXPECT_TF_OK(status_); } +TEST_F(S3FilesystemTest, CreateDir) { + // s3 object storage doesn't support empty directory, we create file in the + // directory + const std::string dir = GetURIForPath("CreateDir"); + tf_s3_filesystem::CreateDir(filesystem_, dir.c_str(), status_); + EXPECT_TF_OK(status_); + + const std::string file = io::JoinPath(dir, "CreateDirFile.csv"); + WriteString(file, "test"); + ASSERT_TF_OK(status_); + + TF_FileStatistics stat; + tf_s3_filesystem::Stat(filesystem_, dir.c_str(), &stat, status_); + EXPECT_TF_OK(status_); + EXPECT_TRUE(stat.is_directory); +} + } // namespace } // namespace tensorflow From 69518728586b7c293a52f4a05a7a276528829142 Mon Sep 17 00:00:00 2001 From: Yanhua Sun Date: Wed, 29 Jul 2020 10:54:22 -0700 Subject: [PATCH 1619/2522] remove v1 only decorator PiperOrigin-RevId: 323821989 Change-Id: Id0cb6221990644f70170891a6a9dbaa6b45e64a8 --- .../python/framework/tensor_spec_test.py | 90 ++++++++++--------- 1 file changed, 47 insertions(+), 43 deletions(-) diff --git a/tensorflow/python/framework/tensor_spec_test.py b/tensorflow/python/framework/tensor_spec_test.py index f67aa4c9013..d8932275f88 100644 --- a/tensorflow/python/framework/tensor_spec_test.py +++ b/tensorflow/python/framework/tensor_spec_test.py @@ -22,6 +22,7 @@ import pickle import numpy as np +from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -51,41 +52,41 @@ class TensorSpecTest(test_util.TensorFlowTestCase): desc = tensor_spec.TensorSpec(shape=None, dtype=dtypes.float32) self.assertEqual(desc.shape, tensor_shape.TensorShape(None)) - @test_util.run_deprecated_v1 def testShapeCompatibility(self): - unknown = array_ops.placeholder(dtypes.int64) - partial = array_ops.placeholder(dtypes.int64, shape=[None, 1]) - full = array_ops.placeholder(dtypes.int64, shape=[2, 3]) - rank3 = array_ops.placeholder(dtypes.int64, shape=[4, 5, 6]) + # This test needs a placeholder which means we need to construct a graph. + with ops.Graph().as_default(): + unknown = array_ops.placeholder(dtypes.int64) + partial = array_ops.placeholder(dtypes.int64, shape=[None, 1]) + full = array_ops.placeholder(dtypes.int64, shape=[2, 3]) + rank3 = array_ops.placeholder(dtypes.int64, shape=[4, 5, 6]) - desc_unknown = tensor_spec.TensorSpec(None, dtypes.int64) - self.assertTrue(desc_unknown.is_compatible_with(unknown)) - self.assertTrue(desc_unknown.is_compatible_with(partial)) - self.assertTrue(desc_unknown.is_compatible_with(full)) - self.assertTrue(desc_unknown.is_compatible_with(rank3)) + desc_unknown = tensor_spec.TensorSpec(None, dtypes.int64) + self.assertTrue(desc_unknown.is_compatible_with(unknown)) + self.assertTrue(desc_unknown.is_compatible_with(partial)) + self.assertTrue(desc_unknown.is_compatible_with(full)) + self.assertTrue(desc_unknown.is_compatible_with(rank3)) - desc_partial = tensor_spec.TensorSpec([2, None], dtypes.int64) - self.assertTrue(desc_partial.is_compatible_with(unknown)) - self.assertTrue(desc_partial.is_compatible_with(partial)) - self.assertTrue(desc_partial.is_compatible_with(full)) - self.assertFalse(desc_partial.is_compatible_with(rank3)) + desc_partial = tensor_spec.TensorSpec([2, None], dtypes.int64) + self.assertTrue(desc_partial.is_compatible_with(unknown)) + self.assertTrue(desc_partial.is_compatible_with(partial)) + self.assertTrue(desc_partial.is_compatible_with(full)) + self.assertFalse(desc_partial.is_compatible_with(rank3)) - desc_full = tensor_spec.TensorSpec([2, 3], dtypes.int64) - self.assertTrue(desc_full.is_compatible_with(unknown)) - self.assertFalse(desc_full.is_compatible_with(partial)) - self.assertTrue(desc_full.is_compatible_with(full)) - self.assertFalse(desc_full.is_compatible_with(rank3)) + desc_full = tensor_spec.TensorSpec([2, 3], dtypes.int64) + self.assertTrue(desc_full.is_compatible_with(unknown)) + self.assertFalse(desc_full.is_compatible_with(partial)) + self.assertTrue(desc_full.is_compatible_with(full)) + self.assertFalse(desc_full.is_compatible_with(rank3)) - desc_rank3 = tensor_spec.TensorSpec([4, 5, 6], dtypes.int64) - self.assertTrue(desc_rank3.is_compatible_with(unknown)) - self.assertFalse(desc_rank3.is_compatible_with(partial)) - self.assertFalse(desc_rank3.is_compatible_with(full)) - self.assertTrue(desc_rank3.is_compatible_with(rank3)) + desc_rank3 = tensor_spec.TensorSpec([4, 5, 6], dtypes.int64) + self.assertTrue(desc_rank3.is_compatible_with(unknown)) + self.assertFalse(desc_rank3.is_compatible_with(partial)) + self.assertFalse(desc_rank3.is_compatible_with(full)) + self.assertTrue(desc_rank3.is_compatible_with(rank3)) - @test_util.run_deprecated_v1 def testTypeCompatibility(self): - floats = array_ops.placeholder(dtypes.float32, shape=[10, 10]) - ints = array_ops.placeholder(dtypes.int32, shape=[10, 10]) + floats = constant_op.constant(1, dtype=dtypes.float32, shape=[10, 10]) + ints = constant_op.constant(1, dtype=dtypes.int32, shape=[10, 10]) desc = tensor_spec.TensorSpec(shape=(10, 10), dtype=dtypes.float32) self.assertTrue(desc.is_compatible_with(floats)) self.assertFalse(desc.is_compatible_with(ints)) @@ -118,28 +119,31 @@ class TensorSpecTest(test_util.TensorFlowTestCase): spec_2 = tensor_spec.TensorSpec.from_spec(spec_1) self.assertEqual(spec_1, spec_2) - @test_util.run_deprecated_v1 def testFromTensor(self): zero = constant_op.constant(0) spec = tensor_spec.TensorSpec.from_tensor(zero) self.assertEqual(spec.dtype, dtypes.int32) self.assertEqual(spec.shape, []) - self.assertEqual(spec.name, "Const") + # Tensor.name is meaningless when eager execution is enabled. + if not context.executing_eagerly(): + self.assertEqual(spec.name, "Const") - @test_util.run_deprecated_v1 def testFromPlaceholder(self): - unknown = array_ops.placeholder(dtypes.int64, name="unknown") - partial = array_ops.placeholder(dtypes.float32, - shape=[None, 1], - name="partial") - spec_1 = tensor_spec.TensorSpec.from_tensor(unknown) - self.assertEqual(spec_1.dtype, dtypes.int64) - self.assertEqual(spec_1.shape, None) - self.assertEqual(spec_1.name, "unknown") - spec_2 = tensor_spec.TensorSpec.from_tensor(partial) - self.assertEqual(spec_2.dtype, dtypes.float32) - self.assertEqual(spec_2.shape.as_list(), [None, 1]) - self.assertEqual(spec_2.name, "partial") + # This test needs a placeholder which means we need to construct a graph. + with ops.Graph().as_default(): + unknown = array_ops.placeholder(dtypes.int64, name="unknown") + partial = array_ops.placeholder(dtypes.float32, + shape=[None, 1], + name="partial") + + spec_1 = tensor_spec.TensorSpec.from_tensor(unknown) + self.assertEqual(spec_1.dtype, dtypes.int64) + self.assertEqual(spec_1.shape, None) + self.assertEqual(spec_1.name, "unknown") + spec_2 = tensor_spec.TensorSpec.from_tensor(partial) + self.assertEqual(spec_2.dtype, dtypes.float32) + self.assertEqual(spec_2.shape.as_list(), [None, 1]) + self.assertEqual(spec_2.name, "partial") def testFromBoundedTensorSpec(self): bounded_spec = tensor_spec.BoundedTensorSpec((1, 2), dtypes.int32, 0, 1) From 2bced7cca9983e1d72303144d8cec5f873d9f51b Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Thu, 30 Jul 2020 01:09:07 +0700 Subject: [PATCH 1620/2522] Add test DeleteDir --- .../filesystem/plugins/s3/s3_filesystem.h | 2 ++ .../plugins/s3/s3_filesystem_test.cc | 20 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h index 23a07fc6405..daaeacedceb 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h @@ -90,6 +90,8 @@ void DeleteFile(const TF_Filesystem* filesystem, const char* path, TF_Status* status); void Stat(const TF_Filesystem* filesystem, const char* path, TF_FileStatistics* stats, TF_Status* status); +void DeleteDir(const TF_Filesystem* filesystem, const char* path, + TF_Status* status); } // namespace tf_s3_filesystem #endif // TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_S3_S3_FILESYSTEM_H_ diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc index d66fb552b79..f7265f6a218 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc @@ -314,6 +314,26 @@ TEST_F(S3FilesystemTest, CreateDir) { EXPECT_TRUE(stat.is_directory); } +TEST_F(S3FilesystemTest, DeleteDir) { + // s3 object storage doesn't support empty directory, we create file in the + // directory + const std::string dir = GetURIForPath("DeleteDir"); + const std::string file = io::JoinPath(dir, "DeleteDirFile.csv"); + WriteString(file, "test"); + ASSERT_TF_OK(status_); + tf_s3_filesystem::DeleteDir(filesystem_, dir.c_str(), status_); + EXPECT_NE(TF_GetCode(status_), TF_OK); + + TF_SetStatus(status_, TF_OK, ""); + tf_s3_filesystem::DeleteFile(filesystem_, file.c_str(), status_); + EXPECT_TF_OK(status_); + tf_s3_filesystem::DeleteDir(filesystem_, dir.c_str(), status_); + EXPECT_TF_OK(status_); + TF_FileStatistics stat; + tf_s3_filesystem::Stat(filesystem_, dir.c_str(), &stat, status_); + EXPECT_EQ(TF_GetCode(status_), TF_NOT_FOUND) << TF_Message(status_); +} + } // namespace } // namespace tensorflow From 6a50a3c81be8dac73723d7c608d59732254bc41b Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Thu, 30 Jul 2020 01:14:32 +0700 Subject: [PATCH 1621/2522] Add test StatFile --- .../filesystem/plugins/s3/s3_filesystem_test.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc index f7265f6a218..4610e0b3b53 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem_test.cc @@ -334,6 +334,18 @@ TEST_F(S3FilesystemTest, DeleteDir) { EXPECT_EQ(TF_GetCode(status_), TF_NOT_FOUND) << TF_Message(status_); } +TEST_F(S3FilesystemTest, StatFile) { + const std::string path = GetURIForPath("StatFile"); + WriteString(path, "test"); + ASSERT_TF_OK(status_); + + TF_FileStatistics stat; + tf_s3_filesystem::Stat(filesystem_, path.c_str(), &stat, status_); + EXPECT_TF_OK(status_); + EXPECT_EQ(4, stat.length); + EXPECT_FALSE(stat.is_directory); +} + } // namespace } // namespace tensorflow From 24b36f123d71147e9c22dafd6dc25c8bfe66fc39 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Jul 2020 11:25:12 -0700 Subject: [PATCH 1622/2522] Add int16 to BatchMatMulV2 on CPU. Just sending to get feedback. Let me know if we should change other ops. PiperOrigin-RevId: 323829276 Change-Id: I5a72b265923fdb6ddcd57db618c6f0fc018ff8b7 --- tensorflow/core/kernels/batch_matmul_op_real.cc | 1 + tensorflow/core/ops/math_ops.cc | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/batch_matmul_op_real.cc b/tensorflow/core/kernels/batch_matmul_op_real.cc index 12c1f48a3c8..075666c1dc3 100644 --- a/tensorflow/core/kernels/batch_matmul_op_real.cc +++ b/tensorflow/core/kernels/batch_matmul_op_real.cc @@ -24,6 +24,7 @@ namespace tensorflow { TF_CALL_float(REGISTER_BATCH_MATMUL_CPU); TF_CALL_double(REGISTER_BATCH_MATMUL_CPU); TF_CALL_half(REGISTER_BATCH_MATMUL_CPU); +TF_CALL_int16(REGISTER_BATCH_MATMUL_CPU); TF_CALL_int32(REGISTER_BATCH_MATMUL_CPU); TF_CALL_int64(REGISTER_BATCH_MATMUL_CPU); diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 54d8a6add77..cbf1ef53dde 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -131,7 +131,7 @@ REGISTER_OP("BatchMatMulV2") .Input("y: T") .Output("output: T") .Attr( - "T: {bfloat16, half, float, double, int32, int64, complex64, " + "T: {bfloat16, half, float, double, int16, int32, int64, complex64, " "complex128}") .Attr("adj_x: bool = false") .Attr("adj_y: bool = false") From 225d851dad37cb036b37b59da7d6e091e4f08006 Mon Sep 17 00:00:00 2001 From: Jonathan Chu Date: Wed, 29 Jul 2020 18:34:50 +0000 Subject: [PATCH 1623/2522] Remove extraneous assert --- tensorflow/python/eager/function.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 0f1912a5efd..0c06e0425cd 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -3079,7 +3079,6 @@ class Function(object): else: del args, kwargs assert not include_tensor_ranks_only - assert hasattr(self, '_hashable_input_signature') hashable_input_signature = self._hashable_input_signature ctx = context.context() From aa462dd2507703ebbafc3ffe1e207f3a4ca0b53e Mon Sep 17 00:00:00 2001 From: Thomas O'Malley Date: Wed, 29 Jul 2020 11:36:14 -0700 Subject: [PATCH 1624/2522] Create Optimizer utilities file and move gradient aggregation and filtering functions to this file. PiperOrigin-RevId: 323831904 Change-Id: I59ff3f42771eba8c7fb27ea68c53296bd2425011 --- tensorflow/python/keras/optimizer_v2/BUILD | 1 + .../python/keras/optimizer_v2/optimizer_v2.py | 60 ++----------- tensorflow/python/keras/optimizer_v2/utils.py | 87 +++++++++++++++++++ 3 files changed, 95 insertions(+), 53 deletions(-) create mode 100644 tensorflow/python/keras/optimizer_v2/utils.py diff --git a/tensorflow/python/keras/optimizer_v2/BUILD b/tensorflow/python/keras/optimizer_v2/BUILD index 42a1e8ac93a..b519ec7fb3d 100644 --- a/tensorflow/python/keras/optimizer_v2/BUILD +++ b/tensorflow/python/keras/optimizer_v2/BUILD @@ -28,6 +28,7 @@ py_library( "nadam.py", "optimizer_v2.py", "rmsprop.py", + "utils.py", ], srcs_version = "PY2AND3", deps = [ diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py index 4e1aba1f3b4..71b58739fb2 100644 --- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py +++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py @@ -27,7 +27,6 @@ import six from tensorflow.python.distribute import distribution_strategy_context as distribute_ctx from tensorflow.python.distribute import parameter_server_strategy -from tensorflow.python.distribute import reduce_util as ds_reduce_util from tensorflow.python.distribute import values as ds_values from tensorflow.python.eager import backprop from tensorflow.python.eager import context @@ -38,6 +37,7 @@ from tensorflow.python.keras import backend from tensorflow.python.keras import initializers from tensorflow.python.keras.engine import base_layer_utils from tensorflow.python.keras.optimizer_v2 import learning_rate_schedule +from tensorflow.python.keras.optimizer_v2 import utils as optimizer_utils from tensorflow.python.keras.utils import generic_utils from tensorflow.python.keras.utils import tf_utils from tensorflow.python.ops import array_ops @@ -47,7 +47,6 @@ from tensorflow.python.ops import gen_resource_variable_ops from tensorflow.python.ops import gradients from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables as tf_variables -from tensorflow.python.platform import tf_logging as logging from tensorflow.python.saved_model import revived_types from tensorflow.python.training.tracking import base as trackable from tensorflow.python.training.tracking import tracking @@ -510,7 +509,7 @@ class OptimizerV2(trackable.Trackable): TypeError: If `grads_and_vars` is malformed. ValueError: If none of the variables have gradients. """ - grads_and_vars = _filter_grads(grads_and_vars) + grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars) var_list = [v for (_, v) in grads_and_vars] with backend.name_scope(self._name): @@ -550,7 +549,10 @@ class OptimizerV2(trackable.Trackable): }) def _aggregate_gradients(self, grads_and_vars): - """Returns all-reduced gradients. + """Returns aggregated gradients. + + This method must be preserved to maintain backwards compatibility with + Horovod aggregation. Args: grads_and_vars: List of (gradient, variable) pairs. @@ -558,32 +560,7 @@ class OptimizerV2(trackable.Trackable): Returns: A list of all-reduced gradients. """ - grads_and_vars = list(grads_and_vars) - filtered_grads_and_vars = _filter_grads(grads_and_vars) - def all_reduce_fn(distribution, grads_and_vars): - return distribution.extended.batch_reduce_to( - ds_reduce_util.ReduceOp.SUM, grads_and_vars) - # We switch to a cross-replica context since there is a bug which causes - # IndexedSlices to be converted to dense tensors when all-reduced in a - # replica context. - # TODO(b/150507409): Do not switch to a cross-replica context once the bug - # is fixed. - if filtered_grads_and_vars: - reduced = distribute_ctx.get_replica_context().merge_call( - all_reduce_fn, args=(filtered_grads_and_vars,)) - else: - reduced = [] - # Copy 'reduced' but add None gradients back in - reduced_with_nones = [] - reduced_pos = 0 - for g, _ in grads_and_vars: - if g is None: - reduced_with_nones.append(None) - else: - reduced_with_nones.append(reduced[reduced_pos]) - reduced_pos += 1 - assert reduced_pos == len(reduced), "Failed to add all gradients" - return reduced_with_nones + return optimizer_utils.all_reduce_sum_gradients(grads_and_vars) def _distributed_apply(self, distribution, grads_and_vars, name, apply_state): """`apply_gradients` using a `DistributionStrategy`.""" @@ -1259,29 +1236,6 @@ class OptimizerV2(trackable.Trackable): yield -def _filter_grads(grads_and_vars): - """Filter out iterable with grad equal to None.""" - grads_and_vars = tuple(grads_and_vars) - if not grads_and_vars: - return grads_and_vars - filtered = [] - vars_with_empty_grads = [] - for grad, var in grads_and_vars: - if grad is None: - vars_with_empty_grads.append(var) - else: - filtered.append((grad, var)) - filtered = tuple(filtered) - if not filtered: - raise ValueError("No gradients provided for any variable: %s." % - ([v.name for _, v in grads_and_vars],)) - if vars_with_empty_grads: - logging.warning( - ("Gradients do not exist for variables %s when minimizing the loss."), - ([v.name for v in vars_with_empty_grads])) - return filtered - - def _var_key(var): """Key for representing a primary variable, for looking up slots. diff --git a/tensorflow/python/keras/optimizer_v2/utils.py b/tensorflow/python/keras/optimizer_v2/utils.py new file mode 100644 index 00000000000..9f680e04dd6 --- /dev/null +++ b/tensorflow/python/keras/optimizer_v2/utils.py @@ -0,0 +1,87 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Optimizer utilities.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.distribute import distribution_strategy_context as distribute_ctx +from tensorflow.python.distribute import reduce_util as ds_reduce_util +from tensorflow.python.platform import tf_logging as logging + + +def all_reduce_sum_gradients(grads_and_vars): + """Returns all-reduced gradients aggregated via summation. + + Args: + grads_and_vars: List of (gradient, variable) pairs. + + Returns: + A list of all-reduced gradients. + """ + grads_and_vars = list(grads_and_vars) + filtered_grads_and_vars = filter_empty_gradients(grads_and_vars) + # We switch to a cross-replica context since there is a bug which causes + # IndexedSlices to be converted to dense tensors when all-reduced in a + # replica context. + # TODO(b/150507409): Do not switch to a cross-replica context once the bug + # is fixed. + if filtered_grads_and_vars: + reduced = distribute_ctx.get_replica_context().merge_call( + _all_reduce_sum_fn, args=(filtered_grads_and_vars,)) + else: + reduced = [] + # Copy 'reduced' but add None gradients back in + reduced_with_nones = [] + reduced_pos = 0 + for g, _ in grads_and_vars: + if g is None: + reduced_with_nones.append(None) + else: + reduced_with_nones.append(reduced[reduced_pos]) + reduced_pos += 1 + assert reduced_pos == len(reduced), "Failed to add all gradients" + return reduced_with_nones + + +def filter_empty_gradients(grads_and_vars): + """Filter out `(grad, var)` pairs that have a gradient equal to `None`.""" + grads_and_vars = tuple(grads_and_vars) + if not grads_and_vars: + return grads_and_vars + + filtered = [] + vars_with_empty_grads = [] + for grad, var in grads_and_vars: + if grad is None: + vars_with_empty_grads.append(var) + else: + filtered.append((grad, var)) + filtered = tuple(filtered) + + if not filtered: + raise ValueError("No gradients provided for any variable: %s." % + ([v.name for _, v in grads_and_vars],)) + if vars_with_empty_grads: + logging.warning( + ("Gradients do not exist for variables %s when minimizing the loss."), + ([v.name for v in vars_with_empty_grads])) + return filtered + + +def _all_reduce_sum_fn(distribution, grads_and_vars): + return distribution.extended.batch_reduce_to(ds_reduce_util.ReduceOp.SUM, + grads_and_vars) From 124fb2b2bd1c6fc49c3529a8497c575a787120a2 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Thu, 30 Jul 2020 01:46:38 +0700 Subject: [PATCH 1625/2522] Change name of LoadLibrary Co-authored-by: Mihai Maruseac --- tensorflow/c/env.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/c/env.cc b/tensorflow/c/env.cc index a8e9adca83a..e731c0659a7 100644 --- a/tensorflow/c/env.cc +++ b/tensorflow/c/env.cc @@ -187,7 +187,7 @@ void TF_JoinThread(TF_Thread* thread) { delete reinterpret_cast<::tensorflow::Thread*>(thread); } -void* TF_LoadLibraryFromEnv(const char* library_filename, TF_Status* status) { +void* TF_LoadSharedLibrary(const char* library_filename, TF_Status* status) { void* handle = nullptr; TF_SetStatus(status, TF_OK, ""); ::tensorflow::Set_TF_Status_from_Status( From 233d855f9cfcd458672cda5922c26084a501009b Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Wed, 29 Jul 2020 11:37:33 -0700 Subject: [PATCH 1626/2522] Add in traces for GCS buffer loading. PiperOrigin-RevId: 323832189 Change-Id: I801261b1488da9ea933203d0328b05f4037bd7a4 --- tensorflow/core/platform/cloud/BUILD | 2 -- tensorflow/core/platform/cloud/gcs_file_system.cc | 7 ------- 2 files changed, 9 deletions(-) diff --git a/tensorflow/core/platform/cloud/BUILD b/tensorflow/core/platform/cloud/BUILD index 38ced40d8fd..2440549a353 100644 --- a/tensorflow/core/platform/cloud/BUILD +++ b/tensorflow/core/platform/cloud/BUILD @@ -103,8 +103,6 @@ cc_library( "//tensorflow/core/platform:retrying_utils", "//tensorflow/core/platform:str_util", "//tensorflow/core/platform:stringprintf", - "//tensorflow/core/profiler/lib:traceme", - "@com_google_absl//absl/strings:str_format", "@jsoncpp_git//:jsoncpp", ], alwayslink = 1, diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index 62a6d9f4531..43ece688034 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -47,7 +47,6 @@ limitations under the License. #include "tensorflow/core/platform/str_util.h" #include "tensorflow/core/platform/stringprintf.h" #include "tensorflow/core/platform/thread_annotations.h" -#include "tensorflow/core/profiler/lib/traceme.h" #ifdef _WIN32 #ifdef DeleteFile @@ -1071,9 +1070,6 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& fname, size_t offset, string bucket, object; TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object)); - profiler::TraceMe activity( - [fname]() { return absl::StrCat("LoadBufferFromGCS ", fname); }); - std::unique_ptr request; TF_RETURN_WITH_CONTEXT_IF_ERROR(CreateHttpRequest(&request), "when reading gs://", bucket, "/", object); @@ -1095,9 +1091,6 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& fname, size_t offset, *bytes_transferred = bytes_read; VLOG(1) << "Successful read of gs://" << bucket << "/" << object << " @ " << offset << " of size: " << bytes_read; - activity.AppendMetadata([bytes_read]() { - return profiler::TraceMeEncode({{"block_size", bytes_read}}); - }); if (stats_ != nullptr) { stats_->RecordBlockRetrieved(fname, offset, bytes_read); From a4bfe896cf41ea4870bda18c5adea23b9634b9ed Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Thu, 30 Jul 2020 01:48:09 +0700 Subject: [PATCH 1627/2522] Change name of LoadLibrary --- tensorflow/c/env.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/c/env.h b/tensorflow/c/env.h index 048dca9eb43..895d3b57240 100644 --- a/tensorflow/c/env.h +++ b/tensorflow/c/env.h @@ -192,7 +192,7 @@ TF_CAPI_EXPORT extern void TF_JoinThread(TF_Thread* thread); // // On success, place OK in status and return the newly created library handle. // Otherwise returns nullptr and set error status. -TF_CAPI_EXPORT extern void* TF_LoadLibraryFromEnv(const char* library_filename, +TF_CAPI_EXPORT extern void* TF_LoadSharedLibrary(const char* library_filename, TF_Status* status); // \brief Get a pointer to a symbol from a dynamic library. From 18521423aef6f61867aeecae8d7934e62f5f948e Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Thu, 30 Jul 2020 01:50:49 +0700 Subject: [PATCH 1628/2522] Run clang-format --- tensorflow/c/env.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/c/env.h b/tensorflow/c/env.h index 895d3b57240..63e2c86ad44 100644 --- a/tensorflow/c/env.h +++ b/tensorflow/c/env.h @@ -193,7 +193,7 @@ TF_CAPI_EXPORT extern void TF_JoinThread(TF_Thread* thread); // On success, place OK in status and return the newly created library handle. // Otherwise returns nullptr and set error status. TF_CAPI_EXPORT extern void* TF_LoadSharedLibrary(const char* library_filename, - TF_Status* status); + TF_Status* status); // \brief Get a pointer to a symbol from a dynamic library. // From d1a607adf20fbb244aba7e7927859599737a1082 Mon Sep 17 00:00:00 2001 From: Yanhua Sun Date: Wed, 29 Jul 2020 11:37:38 -0700 Subject: [PATCH 1629/2522] remove v1 decorator PiperOrigin-RevId: 323832211 Change-Id: I0e9d4bcd867ae6eeee933e3a29ebe69c94208145 --- tensorflow/python/eager/graph_only_ops_test.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/eager/graph_only_ops_test.py b/tensorflow/python/eager/graph_only_ops_test.py index 97cf69f1905..2c5e478558c 100644 --- a/tensorflow/python/eager/graph_only_ops_test.py +++ b/tensorflow/python/eager/graph_only_ops_test.py @@ -22,6 +22,7 @@ import numpy as np from tensorflow.python.eager import graph_only_ops from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import math_ops from tensorflow.python.platform import test @@ -29,14 +30,14 @@ from tensorflow.python.platform import test class GraphOnlyOpsTest(test_util.TensorFlowTestCase): - @test_util.deprecated_graph_mode_only def testGraphPlaceholder(self): - x_tf = graph_only_ops.graph_placeholder(dtypes.int32, shape=(1,)) - y_tf = math_ops.square(x_tf) - with self.cached_session() as sess: - x = np.array([42]) - y = sess.run(y_tf, feed_dict={x_tf: np.array([42])}) - self.assertAllClose(np.square(x), y) + with ops.Graph().as_default(): + x_tf = graph_only_ops.graph_placeholder(dtypes.int32, shape=(1,)) + y_tf = math_ops.square(x_tf) + with self.cached_session() as sess: + x = np.array([42]) + y = sess.run(y_tf, feed_dict={x_tf: np.array([42])}) + self.assertAllClose(np.square(x), y) if __name__ == '__main__': From d430738860a1fb57404d6985e714559e8b7bea5d Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Wed, 29 Jul 2020 11:38:13 -0700 Subject: [PATCH 1630/2522] Fix @test_util.run_deprecated_v1 in third_party/tensorflow/python/kernel_tests/basic_gpu_test.py PiperOrigin-RevId: 323832344 Change-Id: I173fff3f748811adb1c7ded116eca590f2d2c809 --- .../python/kernel_tests/basic_gpu_test.py | 71 +++++-------------- 1 file changed, 18 insertions(+), 53 deletions(-) diff --git a/tensorflow/python/kernel_tests/basic_gpu_test.py b/tensorflow/python/kernel_tests/basic_gpu_test.py index df27e8afbba..b4f964d2daa 100644 --- a/tensorflow/python/kernel_tests/basic_gpu_test.py +++ b/tensorflow/python/kernel_tests/basic_gpu_test.py @@ -29,7 +29,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gradient_checker +from tensorflow.python.ops import gradient_checker_v2 from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import variables @@ -156,10 +156,8 @@ class MathBuiltinUnaryTest(test.TestCase): class BroadcastSimpleTest(test.TestCase): def _GetGradientArgs(self, xs, ys): - with self.cached_session(use_gpu=True) as sess: - return sess.run(broadcast_gradient_args(xs, ys)) + return self.evaluate(broadcast_gradient_args(xs, ys)) - @test_util.run_deprecated_v1 def testBroadcast(self): r0, r1 = self._GetGradientArgs([2, 3, 5], [1]) self.assertAllEqual(r0, []) @@ -167,48 +165,6 @@ class BroadcastSimpleTest(test.TestCase): _GRAD_TOL = {dtypes.float32: 1e-3} - def _compareGradientX(self, - x, - y, - np_func, - tf_func, - numeric_gradient_type=None): - z = np_func(x, y) - zs = list(z.shape) - with self.cached_session(): - inx = ops.convert_to_tensor(x) - iny = ops.convert_to_tensor(y) - if x.dtype in (np.float32, np.float64): - out = 1.1 * tf_func(inx, iny) - else: - out = tf_func(inx, iny) - xs = list(x.shape) - jacob_t, jacob_n = gradient_checker.compute_gradient( - inx, xs, out, zs, x_init_value=x) - tol = self._GRAD_TOL[dtypes.as_dtype(x.dtype)] - self.assertAllClose(jacob_t, jacob_n, rtol=tol, atol=tol) - - def _compareGradientY(self, - x, - y, - np_func, - tf_func, - numeric_gradient_type=None): - z = np_func(x, y) - zs = list(z.shape) - with self.cached_session(): - inx = ops.convert_to_tensor(x) - iny = ops.convert_to_tensor(y) - if x.dtype in (np.float32, np.float64): - out = 1.1 * tf_func(inx, iny) - else: - out = tf_func(inx, iny) - ys = list(np.shape(y)) - jacob_t, jacob_n = gradient_checker.compute_gradient( - iny, ys, out, zs, x_init_value=y) - tol = self._GRAD_TOL[dtypes.as_dtype(x.dtype)] - self.assertAllClose(jacob_t, jacob_n, rtol=tol, atol=tol) - def _compareGpu(self, x, y, np_func, tf_func): np_ans = np_func(x, y) with self.cached_session(use_gpu=True): @@ -220,17 +176,26 @@ class BroadcastSimpleTest(test.TestCase): self.assertShapeEqual(np_ans, out) # TODO(zhifengc/ke): make gradient checker work on GPU. - @test_util.run_deprecated_v1 def testGradient(self): - x = (1 + np.linspace(0, 5, np.prod([1, 3, 2]))).astype(np.float32).reshape( + x1 = (1 + np.linspace(0, 5, np.prod([1, 3, 2]))).astype(np.float32).reshape( [1, 3, 2]) - y = (1 + np.linspace(0, 5, np.prod([1, 3, 2]))).astype(np.float32).reshape( + x2 = (1 + np.linspace(0, 5, np.prod([1, 3, 2]))).astype(np.float32).reshape( [1, 3, 2]) - self._compareGradientX(x, y, np.true_divide, math_ops.truediv) - self._compareGradientY(x, y, np.true_divide, math_ops.truediv) - self._compareGpu(x, y, np.true_divide, math_ops.truediv) - self._compareGpu(x, y + 0.1, np.floor_divide, math_ops.floordiv) + def div_x1(x1): + return math_ops.truediv(x1, x2) * math_ops.cast(1.1, dtype=x1.dtype) + + def div_x2(x2): + return math_ops.truediv(x1, x2) * math_ops.cast(1.1, dtype=x2.dtype) + + with self.cached_session(): + gradient_checker_v2.compute_gradient( + div_x1, [x1], self._GRAD_TOL[dtypes.as_dtype(x1.dtype)]) + gradient_checker_v2.compute_gradient( + div_x2, [x2], self._GRAD_TOL[dtypes.as_dtype(x2.dtype)]) + + self._compareGpu(x1, x2, np.true_divide, math_ops.truediv) + self._compareGpu(x1, x2 + 0.1, np.floor_divide, math_ops.floordiv) class GpuMultiSessionMemoryTest(test_util.TensorFlowTestCase): From 532e00e6bc5bea4ebadf5b2002cfa3db4cd7e7e0 Mon Sep 17 00:00:00 2001 From: Qiao Zhang Date: Wed, 29 Jul 2020 11:49:22 -0700 Subject: [PATCH 1631/2522] Change EagerContext::FindCustomDeviceFromName to return bool instead of Status. EagerContext::FindCustomDeviceFromName lookup failure case is the common case, and the string copy for error status is on the critical path. There is no current consumer of the actual error message. Making it return bool shaves off 0.34us. PiperOrigin-RevId: 323834762 Change-Id: Ie96dbb8aae53017efc7a9bb185459af03732901e --- tensorflow/c/eager/c_api.cc | 8 +++++--- tensorflow/core/common_runtime/eager/context.cc | 8 ++++---- tensorflow/core/common_runtime/eager/context.h | 4 ++-- tensorflow/core/common_runtime/eager/core.cc | 9 +++++---- tensorflow/core/common_runtime/eager/eager_operation.cc | 2 +- tensorflow/core/common_runtime/eager/placement_utils.cc | 2 +- 6 files changed, 18 insertions(+), 15 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 39b09348642..76d603694e3 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -1072,11 +1072,13 @@ TFE_TensorHandle* TFE_NewTensorHandleFromDeviceMemory( status->status = context->FindDeviceFromName(device_name, &device); tensorflow::CustomDevice* custom_device = nullptr; if (!status->status.ok()) { - status->status = - context->FindCustomDeviceFromName(device_name, &custom_device); - if (!status->status.ok()) { + if (!context->FindCustomDeviceFromName(device_name, &custom_device)) { deallocator(data, len, deallocator_arg); + status->status = + tensorflow::errors::InvalidArgument(device_name, " unknown device."); return nullptr; + } else { + status->status = tensorflow::Status::OK(); } } std::vector dimvec(num_dims); diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc index 106e6bb87db..93b78ed6a26 100644 --- a/tensorflow/core/common_runtime/eager/context.cc +++ b/tensorflow/core/common_runtime/eager/context.cc @@ -907,14 +907,14 @@ Status EagerContext::FindCompositeDeviceFromName( return errors::NotFound("Unknown composite device: ", device_name); } -Status EagerContext::FindCustomDeviceFromName(const string& device_name, - CustomDevice** dev) const { +bool EagerContext::FindCustomDeviceFromName(const string& device_name, + CustomDevice** dev) const { auto dev_it = custom_devices_.find(device_name); if (dev_it == custom_devices_.end()) { - return errors::InvalidArgument(device_name, " unknown device."); + return false; } *dev = dev_it->second.get(); - return Status::OK(); + return true; } Status EagerContext::RegisterCustomDevice( diff --git a/tensorflow/core/common_runtime/eager/context.h b/tensorflow/core/common_runtime/eager/context.h index 36aa2f18292..286eb44fbeb 100644 --- a/tensorflow/core/common_runtime/eager/context.h +++ b/tensorflow/core/common_runtime/eager/context.h @@ -481,8 +481,8 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted { Status FindCompositeDeviceFromName(StringPiece device_name, CompositeDevice** device) const; - Status FindCustomDeviceFromName(const string& device_name, - CustomDevice** dev) const; + bool FindCustomDeviceFromName(const string& device_name, + CustomDevice** dev) const; Status RegisterCustomDevice(const string& name, std::unique_ptr device); diff --git a/tensorflow/core/common_runtime/eager/core.cc b/tensorflow/core/common_runtime/eager/core.cc index 28f25d643b6..c0fe1b4fe42 100644 --- a/tensorflow/core/common_runtime/eager/core.cc +++ b/tensorflow/core/common_runtime/eager/core.cc @@ -126,12 +126,14 @@ ImmediateExecutionTensorHandle* EagerContext::CopyTensorHandleToDevice( *status = this->FindDeviceFromName(device_name, &device); if (!status->ok()) { tensorflow::CustomDevice* dev; - *status = this->FindCustomDeviceFromName(device_name, &dev); - if (status->ok()) { + if (this->FindCustomDeviceFromName(device_name, &dev)) { *status = dev->CopyTensorToDevice(input, &result); if (status->ok()) { return result; } + } else { + *status = + tensorflow::errors::InvalidArgument(device_name, " unknown device."); } return nullptr; } @@ -141,8 +143,7 @@ ImmediateExecutionTensorHandle* EagerContext::CopyTensorHandleToDevice( return nullptr; } tensorflow::CustomDevice* dev; - *status = this->FindCustomDeviceFromName(handle_device_name, &dev); - if (status->ok()) { + if (this->FindCustomDeviceFromName(handle_device_name, &dev)) { *status = dev->CopyTensorFromDevice(input, device_name, &result); if (status->ok()) { return result; diff --git a/tensorflow/core/common_runtime/eager/eager_operation.cc b/tensorflow/core/common_runtime/eager/eager_operation.cc index 6dbc342c1bd..df3b3727b60 100644 --- a/tensorflow/core/common_runtime/eager/eager_operation.cc +++ b/tensorflow/core/common_runtime/eager/eager_operation.cc @@ -392,7 +392,7 @@ Status EagerOperation::SetDeviceName(const char* c_name) { last_set_device_name_ = name; device_name_ = DeviceNameUtils::ParsedNameToString(device_parsed_name_); CustomDevice* custom_device; - if (ctx_.FindCustomDeviceFromName(device_name_, &custom_device).ok()) { + if (ctx_.FindCustomDeviceFromName(device_name_, &custom_device)) { device_ = custom_device; } else { // Device placement for physical devices happens lazily in diff --git a/tensorflow/core/common_runtime/eager/placement_utils.cc b/tensorflow/core/common_runtime/eager/placement_utils.cc index 8898516612f..dd99c0fca83 100644 --- a/tensorflow/core/common_runtime/eager/placement_utils.cc +++ b/tensorflow/core/common_runtime/eager/placement_utils.cc @@ -78,7 +78,7 @@ bool IsFunction(StringPiece op_name) { bool IsCustomDevice(StringPiece device_name, const EagerContext& ctx) { CustomDevice* custom_device; - return ctx.FindCustomDeviceFromName(string(device_name), &custom_device).ok(); + return ctx.FindCustomDeviceFromName(string(device_name), &custom_device); } Status MaybePinSmallOpsToCpu(bool* result, StringPiece op_name, From d85aceb8627316ff5fcbaee2b0931b00624a5f35 Mon Sep 17 00:00:00 2001 From: Cesar Crusius Date: Wed, 29 Jul 2020 11:54:22 -0700 Subject: [PATCH 1632/2522] Remove deprecated_graph_mode_only annotations from model_utils:export_test. One of the tests actually did not need the annotation. The others all used APIs that found their way to build_tensor_info, which is a graph-mode only function, and were forced to run in graph mode. PiperOrigin-RevId: 323835871 Change-Id: Ie97b638db3f3205f55be268bf96af32009d0103a --- .../python/saved_model/model_utils/BUILD | 1 + .../saved_model/model_utils/export_test.py | 291 +++++++++--------- 2 files changed, 155 insertions(+), 137 deletions(-) diff --git a/tensorflow/python/saved_model/model_utils/BUILD b/tensorflow/python/saved_model/model_utils/BUILD index 82a33c8e522..775d81a86bc 100644 --- a/tensorflow/python/saved_model/model_utils/BUILD +++ b/tensorflow/python/saved_model/model_utils/BUILD @@ -103,6 +103,7 @@ py_strict_test( "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", "//tensorflow/python:framework_test_lib", "//tensorflow/python/saved_model:signature_constants", "//tensorflow/python/saved_model:signature_def_utils", diff --git a/tensorflow/python/saved_model/model_utils/export_test.py b/tensorflow/python/saved_model/model_utils/export_test.py index 8620a3a6a06..f62f4150bf6 100644 --- a/tensorflow/python/saved_model/model_utils/export_test.py +++ b/tensorflow/python/saved_model/model_utils/export_test.py @@ -24,6 +24,7 @@ import time from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.platform import test @@ -36,136 +37,147 @@ from tensorflow.python.saved_model.model_utils.mode_keys import KerasModeKeys class ExportTest(test_util.TensorFlowTestCase): - @test_util.deprecated_graph_mode_only def test_build_all_signature_defs_without_receiver_alternatives(self): - receiver_tensor = array_ops.placeholder(dtypes.string) - output_1 = constant_op.constant([1.]) - output_2 = constant_op.constant(["2"]) - output_3 = constant_op.constant(["3"]) - export_outputs = { - signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: - export_output.RegressionOutput(value=output_1), - "head-2": export_output.ClassificationOutput(classes=output_2), - "head-3": export_output.PredictOutput(outputs={ - "some_output_3": output_3 - }), - } + # Force the test to run in graph mode. + # This tests a deprecated v1 API that depends on graph-only functions such + # as build_tensor_info. + with ops.Graph().as_default(): + receiver_tensor = array_ops.placeholder(dtypes.string) + output_1 = constant_op.constant([1.]) + output_2 = constant_op.constant(["2"]) + output_3 = constant_op.constant(["3"]) + export_outputs = { + signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: + export_output.RegressionOutput(value=output_1), + "head-2": + export_output.ClassificationOutput(classes=output_2), + "head-3": + export_output.PredictOutput(outputs={"some_output_3": output_3}), + } - signature_defs = export_utils.build_all_signature_defs( - receiver_tensor, export_outputs) + signature_defs = export_utils.build_all_signature_defs( + receiver_tensor, export_outputs) - expected_signature_defs = { - "serving_default": - signature_def_utils.regression_signature_def(receiver_tensor, - output_1), - "head-2": - signature_def_utils.classification_signature_def(receiver_tensor, - output_2, None), - "head-3": - signature_def_utils.predict_signature_def({ - "input": receiver_tensor - }, {"some_output_3": output_3}) - } + expected_signature_defs = { + "serving_default": + signature_def_utils.regression_signature_def( + receiver_tensor, output_1), + "head-2": + signature_def_utils.classification_signature_def( + receiver_tensor, output_2, None), + "head-3": + signature_def_utils.predict_signature_def( + {"input": receiver_tensor}, {"some_output_3": output_3}) + } - self.assertDictEqual(expected_signature_defs, signature_defs) + self.assertDictEqual(expected_signature_defs, signature_defs) - @test_util.deprecated_graph_mode_only def test_build_all_signature_defs_with_dict_alternatives(self): - receiver_tensor = array_ops.placeholder(dtypes.string) - receiver_tensors_alternative_1 = { - "foo": array_ops.placeholder(dtypes.int64), - "bar": array_ops.sparse_placeholder(dtypes.float32)} - receiver_tensors_alternatives = {"other": receiver_tensors_alternative_1} - output_1 = constant_op.constant([1.]) - output_2 = constant_op.constant(["2"]) - output_3 = constant_op.constant(["3"]) - export_outputs = { - signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: - export_output.RegressionOutput(value=output_1), - "head-2": export_output.ClassificationOutput(classes=output_2), - "head-3": export_output.PredictOutput(outputs={ - "some_output_3": output_3 - }), - } + # Force the test to run in graph mode. + # This tests a deprecated v1 API that depends on graph-only functions such + # as build_tensor_info. + with ops.Graph().as_default(): + receiver_tensor = array_ops.placeholder(dtypes.string) + receiver_tensors_alternative_1 = { + "foo": array_ops.placeholder(dtypes.int64), + "bar": array_ops.sparse_placeholder(dtypes.float32) + } + receiver_tensors_alternatives = {"other": receiver_tensors_alternative_1} + output_1 = constant_op.constant([1.]) + output_2 = constant_op.constant(["2"]) + output_3 = constant_op.constant(["3"]) + export_outputs = { + signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: + export_output.RegressionOutput(value=output_1), + "head-2": + export_output.ClassificationOutput(classes=output_2), + "head-3": + export_output.PredictOutput(outputs={"some_output_3": output_3}), + } - signature_defs = export_utils.build_all_signature_defs( - receiver_tensor, export_outputs, receiver_tensors_alternatives) + signature_defs = export_utils.build_all_signature_defs( + receiver_tensor, export_outputs, receiver_tensors_alternatives) - expected_signature_defs = { - "serving_default": - signature_def_utils.regression_signature_def( - receiver_tensor, output_1), - "head-2": - signature_def_utils.classification_signature_def( - receiver_tensor, output_2, None), - "head-3": - signature_def_utils.predict_signature_def( - {"input": receiver_tensor}, {"some_output_3": output_3}), - "other:head-3": - signature_def_utils.predict_signature_def( - receiver_tensors_alternative_1, {"some_output_3": output_3}) + expected_signature_defs = { + "serving_default": + signature_def_utils.regression_signature_def( + receiver_tensor, output_1), + "head-2": + signature_def_utils.classification_signature_def( + receiver_tensor, output_2, None), + "head-3": + signature_def_utils.predict_signature_def( + {"input": receiver_tensor}, {"some_output_3": output_3}), + "other:head-3": + signature_def_utils.predict_signature_def( + receiver_tensors_alternative_1, {"some_output_3": output_3}) - # Note that the alternatives 'other:serving_default' and - # 'other:head-2' are invalid, because regression and classification - # signatures must take a single string input. Here we verify that - # these invalid signatures are not included in the export_utils. - } + # Note that the alternatives 'other:serving_default' and + # 'other:head-2' are invalid, because regression and classification + # signatures must take a single string input. Here we verify that + # these invalid signatures are not included in the export_utils. + } - self.assertDictEqual(expected_signature_defs, signature_defs) + self.assertDictEqual(expected_signature_defs, signature_defs) - @test_util.deprecated_graph_mode_only def test_build_all_signature_defs_with_single_alternatives(self): - receiver_tensor = array_ops.placeholder(dtypes.string) - receiver_tensors_alternative_1 = array_ops.placeholder(dtypes.int64) - receiver_tensors_alternative_2 = array_ops.sparse_placeholder( - dtypes.float32) - # Note we are passing single Tensors as values of - # receiver_tensors_alternatives, where normally that is a dict. - # In this case a dict will be created using the default receiver tensor - # name "input". - receiver_tensors_alternatives = {"other1": receiver_tensors_alternative_1, - "other2": receiver_tensors_alternative_2} - output_1 = constant_op.constant([1.]) - output_2 = constant_op.constant(["2"]) - output_3 = constant_op.constant(["3"]) - export_outputs = { - signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: - export_output.RegressionOutput(value=output_1), - "head-2": export_output.ClassificationOutput(classes=output_2), - "head-3": export_output.PredictOutput(outputs={ - "some_output_3": output_3 - }), - } + # Force the test to run in graph mode. + # This tests a deprecated v1 API that depends on graph-only functions such + # as build_tensor_info. + with ops.Graph().as_default(): + receiver_tensor = array_ops.placeholder(dtypes.string) + receiver_tensors_alternative_1 = array_ops.placeholder(dtypes.int64) + receiver_tensors_alternative_2 = array_ops.sparse_placeholder( + dtypes.float32) + # Note we are passing single Tensors as values of + # receiver_tensors_alternatives, where normally that is a dict. + # In this case a dict will be created using the default receiver tensor + # name "input". + receiver_tensors_alternatives = { + "other1": receiver_tensors_alternative_1, + "other2": receiver_tensors_alternative_2 + } + output_1 = constant_op.constant([1.]) + output_2 = constant_op.constant(["2"]) + output_3 = constant_op.constant(["3"]) + export_outputs = { + signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: + export_output.RegressionOutput(value=output_1), + "head-2": + export_output.ClassificationOutput(classes=output_2), + "head-3": + export_output.PredictOutput(outputs={"some_output_3": output_3}), + } - signature_defs = export_utils.build_all_signature_defs( - receiver_tensor, export_outputs, receiver_tensors_alternatives) + signature_defs = export_utils.build_all_signature_defs( + receiver_tensor, export_outputs, receiver_tensors_alternatives) - expected_signature_defs = { - "serving_default": - signature_def_utils.regression_signature_def( - receiver_tensor, output_1), - "head-2": - signature_def_utils.classification_signature_def( - receiver_tensor, output_2, None), - "head-3": - signature_def_utils.predict_signature_def( - {"input": receiver_tensor}, {"some_output_3": output_3}), - "other1:head-3": - signature_def_utils.predict_signature_def( - {"input": receiver_tensors_alternative_1}, - {"some_output_3": output_3}), - "other2:head-3": - signature_def_utils.predict_signature_def( - {"input": receiver_tensors_alternative_2}, - {"some_output_3": output_3}) + expected_signature_defs = { + "serving_default": + signature_def_utils.regression_signature_def( + receiver_tensor, output_1), + "head-2": + signature_def_utils.classification_signature_def( + receiver_tensor, output_2, None), + "head-3": + signature_def_utils.predict_signature_def( + {"input": receiver_tensor}, {"some_output_3": output_3}), + "other1:head-3": + signature_def_utils.predict_signature_def( + {"input": receiver_tensors_alternative_1}, + {"some_output_3": output_3}), + "other2:head-3": + signature_def_utils.predict_signature_def( + {"input": receiver_tensors_alternative_2}, + {"some_output_3": output_3}) - # Note that the alternatives 'other:serving_default' and 'other:head-2' - # are invalid, because regression and classification signatures must take - # a single string input. Here we verify that these invalid signatures - # are not included in the export_utils. - } + # Note that the alternatives 'other:serving_default' and + # 'other:head-2' are invalid, because regression and classification + # signatures must take a single string input. Here we verify that + # these invalid signatures are not included in the export_utils. + } - self.assertDictEqual(expected_signature_defs, signature_defs) + self.assertDictEqual(expected_signature_defs, signature_defs) def test_build_all_signature_defs_export_outputs_required(self): receiver_tensor = constant_op.constant(["11"]) @@ -210,37 +222,42 @@ class ExportTest(test_util.TensorFlowTestCase): self.assertEqual(tmp_export_dir, os.path.join(b"tmp", b"export", b"temp-1576013284")) - @test_util.deprecated_graph_mode_only def test_build_all_signature_defs_serving_only(self): - receiver_tensor = {"input": array_ops.placeholder(dtypes.string)} - output_1 = constant_op.constant([1.]) - export_outputs = { - signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: - export_output.PredictOutput(outputs=output_1), - "train": export_output.TrainOutput(loss=output_1), - } + # Force the test to run in graph mode. + # This tests a deprecated v1 API that depends on graph-only functions such + # as build_tensor_info. + with ops.Graph().as_default(): + receiver_tensor = {"input": array_ops.placeholder(dtypes.string)} + output_1 = constant_op.constant([1.]) + export_outputs = { + signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: + export_output.PredictOutput(outputs=output_1), + "train": + export_output.TrainOutput(loss=output_1), + } - signature_defs = export_utils.build_all_signature_defs( - receiver_tensor, export_outputs) + signature_defs = export_utils.build_all_signature_defs( + receiver_tensor, export_outputs) - expected_signature_defs = { - "serving_default": signature_def_utils.predict_signature_def( - receiver_tensor, {"output": output_1}) - } + expected_signature_defs = { + "serving_default": + signature_def_utils.predict_signature_def(receiver_tensor, + {"output": output_1}) + } - self.assertDictEqual(expected_signature_defs, signature_defs) + self.assertDictEqual(expected_signature_defs, signature_defs) - signature_defs = export_utils.build_all_signature_defs( - receiver_tensor, export_outputs, serving_only=False) + signature_defs = export_utils.build_all_signature_defs( + receiver_tensor, export_outputs, serving_only=False) - expected_signature_defs.update({ - "train": signature_def_utils.supervised_train_signature_def( - receiver_tensor, loss={"loss": output_1}) - }) + expected_signature_defs.update({ + "train": + signature_def_utils.supervised_train_signature_def( + receiver_tensor, loss={"loss": output_1}) + }) - self.assertDictEqual(expected_signature_defs, signature_defs) + self.assertDictEqual(expected_signature_defs, signature_defs) - @test_util.deprecated_graph_mode_only def test_export_outputs_for_mode(self): predictions = {"predictions": constant_op.constant([1.])} loss = {"loss": constant_op.constant([2.])} From 78a6718d949c1f2375f6703f5c6ffb3391de1a97 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Wed, 29 Jul 2020 11:54:52 -0700 Subject: [PATCH 1633/2522] [TF2XLA] [NFC] Correct the documentation for self.test_scope for XLATestCase PiperOrigin-RevId: 323835976 Change-Id: I0967b0d50730704cf0a48aa427edd09f079131cf --- tensorflow/compiler/tests/xla_test.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tensorflow/compiler/tests/xla_test.py b/tensorflow/compiler/tests/xla_test.py index f5f63cb60aa..3b057ed8b17 100644 --- a/tensorflow/compiler/tests/xla_test.py +++ b/tensorflow/compiler/tests/xla_test.py @@ -236,9 +236,7 @@ class XLATestCase(test.TestCase): @contextlib.contextmanager def test_scope(self): - """Test scope that runs tests on a Tensorflow/XLA device. - - Uses a compilation_scope() to mark operators to compile. + """Test scope that runs tests on `self.device`. Yields: A scope to apply to the operators under test. From 20939a4b51e72fd8e4263e8cc79f01f63a544f62 Mon Sep 17 00:00:00 2001 From: Jonathan Chu Date: Wed, 29 Jul 2020 19:27:03 +0000 Subject: [PATCH 1634/2522] Remove whitespace --- tensorflow/python/eager/function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 0c06e0425cd..40df3e33e27 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -3148,7 +3148,7 @@ class Function(object): save_context.get_save_options().experimental_variable_policy) else: variable_policy = save_options.VariablePolicy.EXPAND_DISTRIBUTED_VARIABLES - + return CacheKey( hashable_input_signature, parent_graph, device_functions, colocation_stack, in_cross_replica_context, From d9e90e0437a4851ba2852e405d8267b3b630931d Mon Sep 17 00:00:00 2001 From: Robert Suderman Date: Wed, 29 Jul 2020 12:31:15 -0700 Subject: [PATCH 1635/2522] HLO Random operations should match shape constraints. PiperOrigin-RevId: 323844002 Change-Id: I63b9b7be0ebc74db668307a00405cf9635024886 --- .../mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td index 0ed4235e23f..93c5388ad5d 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td @@ -1324,7 +1324,7 @@ def HLO_RngUniformOp : HLO_Op<"rng_uniform", []>, BASE_HLO_RngUniformOp { let arguments = (ins HLO_PredIntOrFpTensor:$a, HLO_PredIntOrFpTensor:$b, - I64Tensor:$shape + HLO_DimensionTensor:$shape ); let results = (outs HLO_PredIntOrFpTensor); @@ -1336,7 +1336,7 @@ def HLO_RngNormalOp : HLO_Op<"rng_normal", []>, BASE_HLO_RngNormalOp { let arguments = (ins HLO_FpTensor:$mu, HLO_FpTensor:$sigma, - I64Tensor:$shape + HLO_DimensionTensor:$shape ); let results = (outs HLO_FpTensor); From 4fe224f94738c1f9f0dad7adf90ae6f8f1583758 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Wed, 29 Jul 2020 15:35:39 -0400 Subject: [PATCH 1636/2522] Update tensorflow/compiler/tf2xla/functionalize_cond.cc Co-authored-by: Mihai Maruseac --- tensorflow/compiler/tf2xla/functionalize_cond.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/tf2xla/functionalize_cond.cc b/tensorflow/compiler/tf2xla/functionalize_cond.cc index 8f6b4eff83b..548bf20128c 100644 --- a/tensorflow/compiler/tf2xla/functionalize_cond.cc +++ b/tensorflow/compiler/tf2xla/functionalize_cond.cc @@ -236,8 +236,8 @@ StateMap::CondId StateMap::GetCondId(const StateMap::CondState& state) { } void StateMap::ResetCondId(const Node* node, StateMap::CondId id) { - const int64 node_to_map_size = node_to_condid_map_.size(); - if (node->id() < node_to_map_size) + const int64 map_size = node_to_condid_map_.size(); + if (node->id() < map_size) node_to_condid_map_[node->id()] = id; else added_node_condid_mapping_[node->id()] = id; From 7beca73b99512205113eea45af5da5a97218c174 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Wed, 29 Jul 2020 15:35:50 -0400 Subject: [PATCH 1637/2522] Update tensorflow/compiler/tf2xla/functionalize_cond.cc Co-authored-by: Mihai Maruseac --- tensorflow/compiler/tf2xla/functionalize_cond.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/tf2xla/functionalize_cond.cc b/tensorflow/compiler/tf2xla/functionalize_cond.cc index 548bf20128c..1a512ccff8c 100644 --- a/tensorflow/compiler/tf2xla/functionalize_cond.cc +++ b/tensorflow/compiler/tf2xla/functionalize_cond.cc @@ -244,8 +244,8 @@ void StateMap::ResetCondId(const Node* node, StateMap::CondId id) { } StateMap::AncestorId StateMap::LookupAncestorId(const Node* node) const { - const int64 node_to_map_size = node_to_ancestorid_map_.size(); - if (node->id() < node_to_map_size) + const int64 map_size = node_to_ancestorid_map_.size(); + if (node->id() < map_size) return node_to_ancestorid_map_[node->id()]; return added_node_ancestorid_mapping_.at(node->id()); } From fe0a4687f06f46b9689abbe8d537f15eedd6be12 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Wed, 29 Jul 2020 15:35:58 -0400 Subject: [PATCH 1638/2522] Update tensorflow/compiler/tf2xla/functionalize_cond.cc Co-authored-by: Mihai Maruseac --- tensorflow/compiler/tf2xla/functionalize_cond.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/tf2xla/functionalize_cond.cc b/tensorflow/compiler/tf2xla/functionalize_cond.cc index 1a512ccff8c..6eb2ed04421 100644 --- a/tensorflow/compiler/tf2xla/functionalize_cond.cc +++ b/tensorflow/compiler/tf2xla/functionalize_cond.cc @@ -257,8 +257,8 @@ StateMap::AncestorId StateMap::GetAncestorId( } void StateMap::ResetAncestorId(const Node* node, StateMap::AncestorId id) { - const int64 node_to_map_size = node_to_ancestorid_map_.size(); - if (node->id() < node_to_map_size) + const int64 map_size = node_to_ancestorid_map_.size(); + if (node->id() < map_size) node_to_ancestorid_map_[node->id()] = id; else added_node_ancestorid_mapping_[node->id()] = id; From 2423a7fd914c5f5f79312483076b368ac4dc997a Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Wed, 29 Jul 2020 19:42:05 +0000 Subject: [PATCH 1639/2522] added missing include --- tensorflow/c/kernels/ops/summary.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/c/kernels/ops/summary.cc b/tensorflow/c/kernels/ops/summary.cc index 36f23684ef2..b3b0d6bdc95 100644 --- a/tensorflow/c/kernels/ops/summary.cc +++ b/tensorflow/c/kernels/ops/summary.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/c/tf_status.h" #include "tensorflow/core/framework/selective_registration.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/macros.h" static void scalar_summary_shape_inference_fn(TF_ShapeInferenceContext* ctx, TF_Status* status) { From acd4240283237384cafc85bff801f54330efe461 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Wed, 29 Jul 2020 12:44:12 -0700 Subject: [PATCH 1640/2522] Support calling `model.build()` with a shape passed a list of int/None for subclassed models. PiperOrigin-RevId: 323846785 Change-Id: I7f3260b4a3309527f665ef04b608a5c00a15c352 --- tensorflow/python/keras/engine/training.py | 3 ++ .../python/keras/engine/training_test.py | 47 +++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index 18dfc4c1642..15f77ab8a96 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -393,6 +393,9 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): else: graph = backend.get_graph() with graph.as_default(): + if (isinstance(input_shape, list) and + all(d is None or isinstance(d, int) for d in input_shape)): + input_shape = tuple(input_shape) if isinstance(input_shape, list): x = [base_layer_utils.generate_placeholders_from_shape(shape) for shape in input_shape] diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py index 93e9b66b196..15976c0a072 100644 --- a/tensorflow/python/keras/engine/training_test.py +++ b/tensorflow/python/keras/engine/training_test.py @@ -3604,5 +3604,52 @@ class TestFunctionTracing(keras_parameterized.TestCase): self.assertEqual(sum(new_func_graph in log for log in logs.output), 9) +class TestBuildCustomModel(keras_parameterized.TestCase): + + @keras_parameterized.run_all_keras_modes + def test_build_list_of_inputs(self): + + class MyModel(training_module.Model): + + def __init__(self): + super(MyModel, self).__init__() + self.l1 = layers_module.Dense(1) + self.l2 = layers_module.Dense(2) + + def call(self, x): + a, b = x + return self.l1(a) + self.l2(b) + + # List of tuples + model = MyModel() + model.build([(None, 1), (None, 2)]) + self.assertEqual(model.l1.kernel.shape.as_list(), [1, 1]) + self.assertEqual(model.l2.kernel.shape.as_list(), [2, 2]) + # List of lists + model = MyModel() + model.build([[None, 1], [None, 2]]) + self.assertEqual(model.l1.kernel.shape.as_list(), [1, 1]) + self.assertEqual(model.l2.kernel.shape.as_list(), [2, 2]) + + @keras_parameterized.run_all_keras_modes + def test_build_single_inputs(self): + + class MyModel(training_module.Model): + + def __init__(self): + super(MyModel, self).__init__() + self.l1 = layers_module.Dense(1) + + def call(self, x): + return self.l1(x) + + model = MyModel() + model.build((None, 1)) + self.assertEqual(model.l1.kernel.shape.as_list(), [1, 1]) + model = MyModel() + model.build([None, 1]) + self.assertEqual(model.l1.kernel.shape.as_list(), [1, 1]) + + if __name__ == '__main__': test.main() From 3617076dbf895c51c275dbcc9de907cb45e07aa0 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Wed, 29 Jul 2020 20:09:09 +0000 Subject: [PATCH 1641/2522] Update tensorflow/security/fuzzing/tstring_fuzz.cc --- tensorflow/security/fuzzing/tstring_fuzz.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/security/fuzzing/tstring_fuzz.cc b/tensorflow/security/fuzzing/tstring_fuzz.cc index 49aaa884374..0358034c334 100644 --- a/tensorflow/security/fuzzing/tstring_fuzz.cc +++ b/tensorflow/security/fuzzing/tstring_fuzz.cc @@ -32,7 +32,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { const size_t initial_size = base.size(); tensorflow::tstring pair = fuzzed_data.ConsumeRandomLengthString(10); base.append(pair); - assert(base.size() == pair.size() + initial_size); + assert(base.size() <= base.capacity()); } return 0; From 3fbfc9351ae6a2bb719d24458ee2c95214682302 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Wed, 29 Jul 2020 20:09:16 +0000 Subject: [PATCH 1642/2522] Update tensorflow/security/fuzzing/tstring_fuzz.cc --- tensorflow/security/fuzzing/tstring_fuzz.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/security/fuzzing/tstring_fuzz.cc b/tensorflow/security/fuzzing/tstring_fuzz.cc index 0358034c334..65c711b8a8f 100644 --- a/tensorflow/security/fuzzing/tstring_fuzz.cc +++ b/tensorflow/security/fuzzing/tstring_fuzz.cc @@ -29,7 +29,6 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { tensorflow::tstring base = fuzzed_data.ConsumeRandomLengthString(10); while(fuzzed_data.remaining_bytes() > 0) { - const size_t initial_size = base.size(); tensorflow::tstring pair = fuzzed_data.ConsumeRandomLengthString(10); base.append(pair); assert(base.size() <= base.capacity()); From 41968353ef623161e355af2f3cf9acf7b1938a6b Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Wed, 29 Jul 2020 16:14:54 -0400 Subject: [PATCH 1643/2522] Update tensorflow/compiler/mlir/lite/ir/tfl_ops.cc Co-authored-by: Mihai Maruseac --- tensorflow/compiler/mlir/lite/ir/tfl_ops.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc index 3b81da9ca13..da2d0b76d4e 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc @@ -1193,8 +1193,8 @@ struct RemoveRedundantUnpackPack : public RewritePattern { return failure(); const int total_pack_inputs = pack_op.getNumOperands(); - const int input_unpack_op_getNumResults = input_unpack_op.getNumResults(); - if (total_pack_inputs != input_unpack_op_getNumResults) return failure(); + const int num_results = input_unpack_op.getNumResults(); + if (total_pack_inputs != num_results) return failure(); for (auto input_output : llvm::zip(pack_op.getOperands(), input_unpack_op.getResults())) { Value pack_input = std::get<0>(input_output); From 5c16494cccdb54ad46eab487e1d1afa9ce285a36 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Jul 2020 13:13:39 -0700 Subject: [PATCH 1644/2522] Update constant folding to allow disabling compressed tensor optimization PiperOrigin-RevId: 323853391 Change-Id: I675c26909dae99333962219482aae180f7a89cd9 --- .../grappler/optimizers/constant_folding.cc | 20 +++++-- .../grappler/optimizers/constant_folding.h | 7 ++- .../optimizers/constant_folding_test.cc | 52 +++++++++++++++++++ .../grappler/optimizers/meta_optimizer.cc | 10 ++-- .../core/protobuf/rewriter_config.proto | 4 ++ 5 files changed, 84 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index ce4e101e419..869ecbef691 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -187,13 +187,19 @@ float QuantizedTypeMaxAsFloat(DataType data_type) { } // namespace ConstantFolding::ConstantFolding(RewriterConfig::Toggle opt_level, - DeviceBase* cpu_device) - : opt_level_(opt_level), cpu_device_(cpu_device) { + DeviceBase* cpu_device, + bool disable_compressed_tensor_optimization) + : opt_level_(opt_level), + cpu_device_(cpu_device), + disable_compressed_tensor_optimization_( + disable_compressed_tensor_optimization) { resource_mgr_.reset(new ResourceMgr()); } -ConstantFolding::ConstantFolding(DeviceBase* cpu_device) - : ConstantFolding(RewriterConfig::ON, cpu_device) {} +ConstantFolding::ConstantFolding(DeviceBase* cpu_device, + bool disable_compressed_tensor_optimization) + : ConstantFolding(RewriterConfig::ON, cpu_device, + disable_compressed_tensor_optimization) {} // static string ConstantFolding::AddControlDependency(const string& input_name, @@ -813,6 +819,9 @@ Status ConstantFolding::MaterializeReductionIndices( Status ConstantFolding::MaterializeConstantValuedNode( NodeDef* node, const GraphProperties& properties) { + if (disable_compressed_tensor_optimization_) { + return Status::OK(); + } // Nodes that generate constant-valued outputs can be represented compactly in // compressed format, regardless of their shape. const std::vector& output_props = @@ -974,6 +983,9 @@ bool ConstantFolding::IsFoldableUncached( } } if (is_merge && !merge_has_constant_input) return false; + if (disable_compressed_tensor_optimization_ && + (IsFill(node) || IsZerosLike(node) || IsOnesLike(node))) + return false; // If we know the output shapes, make sure that the outputs are small enough // to materialize. diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 4e3deb40d15..398e16947ec 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -45,8 +45,10 @@ class ConstantFolding : public GraphOptimizer { static string AddControlDependency(const string& input_name, GraphDef* graph, NodeMap* node_map); - explicit ConstantFolding(DeviceBase* cpu_device); - ConstantFolding(RewriterConfig::Toggle opt_level, DeviceBase* cpu_device); + explicit ConstantFolding(DeviceBase* cpu_device, + bool disable_compressed_tensor_optimization = false); + ConstantFolding(RewriterConfig::Toggle opt_level, DeviceBase* cpu_device, + bool disable_compressed_tensor_optimization = false); ~ConstantFolding() override {} @@ -334,6 +336,7 @@ class ConstantFolding : public GraphOptimizer { bool has_fetch_; bool graph_modified_; bool graph_contains_assign_or_inplace_op_; + bool disable_compressed_tensor_optimization_; }; } // end namespace grappler diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 59e236d2454..cb1ad87de60 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -4030,6 +4030,58 @@ TEST_F(ConstantFoldingTest, MaterializeConstantValuedNode) { } } +TEST_F(ConstantFoldingTest, MaterializeConstantValuedNodeDisableCompression) { + tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); + + Output x = + ops::Placeholder(scope.WithOpName("x"), DT_FLOAT, + ops::Placeholder::Shape(TensorShape({1, 2, 3, 4}))); + Output ones_like = ops::OnesLike(scope.WithOpName("ones_like"), x); + Output zeros_like = ops::ZerosLike(scope.WithOpName("zeros_like"), x); + Output fill = ops::Fill(scope.WithOpName("fill"), {4, 3, 2, 1}, 42); + + GrapplerItem item; + TF_CHECK_OK(scope.ToGraphDef(&item.graph)); + item.fetch = {"ones_like", "zeros_like", "fill"}; + auto x_t = GenerateRandomTensor(TensorShape({1, 2, 3, 4})); + auto tensors_expected = EvaluateNodes(item.graph, item.fetch, {{"x", x_t}}); + + ConstantFolding optimizer(/*cpu_device=*/nullptr, true); + GraphDef output; + Status status = optimizer.Optimize(/*cluster=*/nullptr, item, &output); + TF_EXPECT_OK(status); + + EXPECT_EQ(output.node_size(), 6); + for (const auto& node : output.node()) { + if (node.name() == "ones_like") { + EXPECT_EQ(node.op(), "OnesLike"); + ASSERT_EQ(node.input_size(), 1); + EXPECT_EQ(node.input(0), "x"); + } + if (node.name() == "zeros_like") { + EXPECT_EQ(node.op(), "ZerosLike"); + ASSERT_EQ(node.input_size(), 1); + EXPECT_EQ(node.input(0), "x"); + } + if (node.name() == "fill") { + EXPECT_EQ(node.op(), "Fill"); + ASSERT_EQ(node.input_size(), 2); + EXPECT_EQ(node.input(0), "Const/Const"); + EXPECT_EQ(node.input(1), "Const_1/Const"); + } + } + auto tensors = EvaluateNodes(output, item.fetch, {{"x", x_t}}); + ASSERT_EQ(item.fetch.size(), tensors.size()); + ASSERT_EQ(tensors_expected.size(), tensors.size()); + for (int i = 0; i < tensors.size(); i++) { + if (item.fetch[i] == "fill") { + test::ExpectTensorEqual(tensors_expected[i], tensors[i]); + } else { + test::ExpectTensorEqual(tensors_expected[i], tensors[i]); + } + } +} + TEST_F(ConstantFoldingTest, MaterializeConstantValuedNodeHugeFill) { tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); Output value = ops::Const(scope.WithOpName("value"), 42, {}); diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index a82ee3dbb87..b4d6f8cee41 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -183,7 +183,10 @@ std::unique_ptr MetaOptimizer::MakeNewOptimizer( MK_OPT("function", new FunctionOptimizer( cfg_.function_optimization(), /*lower_control_flow=*/!IsSingleThreadedExecutor())); - MK_OPT("constfold", new ConstantFolding(cpu_device_)); + MK_OPT("constfold", + new ConstantFolding( + cpu_device_, + cfg_.experimental_disable_compressed_tensor_optimization())); MK_OPT("shape", new ShapeOptimizer()); MK_OPT("remap", new Remapper(cfg_.remapping())); MK_OPT("layout", new GenericLayoutOptimizer()); @@ -243,8 +246,9 @@ Status MetaOptimizer::InitializeOptimizers( optimizers->push_back(MakeUnique()); } if (cfg_.constant_folding() != RewriterConfig::OFF) { - optimizers->push_back( - MakeUnique(cfg_.constant_folding(), cpu_device_)); + optimizers->push_back(MakeUnique( + cfg_.constant_folding(), cpu_device_, + cfg_.experimental_disable_compressed_tensor_optimization())); } if (cfg_.shape_optimization() != RewriterConfig::OFF) { optimizers->push_back(MakeUnique()); diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index 2b7830f8bef..695e73f62e8 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -107,6 +107,10 @@ message RewriterConfig { // < 0 means do not skip optimization. int32 min_graph_nodes = 17; + // Disable optimizations that assume compressed tensors. Note that this flag + // is experimental and may be removed in the future. + bool experimental_disable_compressed_tensor_optimization = 26; + enum MemOptType { // The default setting (SCHEDULING and SWAPPING HEURISTICS only) DEFAULT_MEM_OPT = 0; From 02f600eb92dce044c75a89dddfede9e6da2c0f44 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Wed, 29 Jul 2020 16:18:42 -0400 Subject: [PATCH 1645/2522] Update tensorflow/compiler/mlir/lite/transforms/optimize.cc Co-authored-by: Mihai Maruseac --- tensorflow/compiler/mlir/lite/transforms/optimize.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize.cc b/tensorflow/compiler/mlir/lite/transforms/optimize.cc index c0c53935d6a..6de6187d81a 100644 --- a/tensorflow/compiler/mlir/lite/transforms/optimize.cc +++ b/tensorflow/compiler/mlir/lite/transforms/optimize.cc @@ -198,6 +198,7 @@ DenseElementsAttr GetShape(Value output_val) { auto output_type = output_val.getType().cast(); auto shape_vector = output_type.getShape(); std::vector shape; + shape.reserve(shape_vector.size()); for (auto shape_object : shape_vector) { shape.push_back(shape_object); } From 3706f63bcc31f1930e52ab45d4f56a960d30a97b Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Wed, 29 Jul 2020 16:21:33 -0400 Subject: [PATCH 1646/2522] Update tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc Co-authored-by: Mihai Maruseac --- tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc index 753e4c4fe7f..c6fb02cff83 100644 --- a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc +++ b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc @@ -495,8 +495,8 @@ struct ConvertTFStridedSlice : public RewritePattern { original_input_type.getShape(); SmallVector new_shape; int index = 0; - const int original_input_shape_size = original_input_shape.size(); - while (index < original_input_shape_size || new_axis_mask) { + const int original_input_rank = original_input_shape.size(); + while (index < original_input_rank || new_axis_mask) { if (new_axis_mask & 1) { new_shape.emplace_back(1); } else { From 2a7d5a7f7176f78fa4bed9f61babd9c30bc5044e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tar=C3=A9=20Gaskin?= Date: Wed, 29 Jul 2020 20:22:07 +0000 Subject: [PATCH 1647/2522] update 3 --- tensorflow/compiler/mlir/lite/ir/tfl_ops.cc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc index c7c3f5713f1..df87fa074b7 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc @@ -758,8 +758,8 @@ static LogicalResult Verify(CustomOp op) { op.custom_option().cast(); if (!opaque_attr.getType().hasStaticShape()) return op.emitOpError("custom_option should have a static shape."); - const int opaque_attr_getValue_size = opaque_attr.getValue().size(); - if (opaque_attr_getValue_size != + const int attribute_size = opaque_attr.getValue().size(); + if (attribute_size != opaque_attr.getType().cast().getDimSize(0)) return op.emitOpError( "custom_option should have the same length of content with shape."); @@ -941,7 +941,7 @@ static LogicalResult Verify(ScatterNdOp op) { // Checks whether the last `(shape_type.getDimSize(0) - outermost_dim)` // dimensions of `updates` and `shape` are equal. for (auto shape_it : llvm::enumerate(shape_value)) { - long int i = shape_it.index(); + int64_t i = shape_it.index(); auto value = shape_it.value().getSExtValue(); if (i >= outermost_dim) { auto corresponding_dim = i - outermost_dim + outer_dims; @@ -1248,8 +1248,7 @@ static LogicalResult Verify(SliceOp op) { } if (begin && size && input_type.hasStaticShape()) { - const uint64_t input_rank = begin.getNumElements(); - for (uint64_t i = 0; i < input_rank; i++) { + for (uint64_t i = 0, end = begin.getNumElements(); i < end; i++) { int begin_i = begin.getValue({i}).cast().getValue().getSExtValue(); int size_i = From b4b90b346b796b3546aee29818edc9b365a71bf3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Jul 2020 13:18:07 -0700 Subject: [PATCH 1648/2522] Update ops-related pbtxt files. PiperOrigin-RevId: 323854345 Change-Id: Ie693ac3ddbdc526b6016b7b567ad55cca12e0af7 --- .../compat/ops_history_v2/BatchMatMulV2.pbtxt | 46 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 1 + 2 files changed, 47 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history_v2/BatchMatMulV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/BatchMatMulV2.pbtxt index 77224c111ba..a32cad6c148 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/BatchMatMulV2.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/BatchMatMulV2.pbtxt @@ -43,3 +43,49 @@ op { } } } +op { + name: "BatchMatMulV2" + input_arg { + name: "x" + type_attr: "T" + } + input_arg { + name: "y" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_BFLOAT16 + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT16 + type: DT_INT32 + type: DT_INT64 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } + } + attr { + name: "adj_x" + type: "bool" + default_value { + b: false + } + } + attr { + name: "adj_y" + type: "bool" + default_value { + b: false + } + } +} diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index a9a94580d86..91e0a3b74a3 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -3722,6 +3722,7 @@ op { type: DT_HALF type: DT_FLOAT type: DT_DOUBLE + type: DT_INT16 type: DT_INT32 type: DT_INT64 type: DT_COMPLEX64 From 206ed7a37f0e5e80b1f62e2172f96a9a2f7041c8 Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Wed, 29 Jul 2020 13:18:24 -0700 Subject: [PATCH 1649/2522] Update outside compilation related passes to use tf._XlaHostComputeMlir and tf._TPUCompileMlirPlaceholderProgramKey instead of tf._HostComputeMlir and tf._TPUCompileMlir. This is in preparation of matching up TF/XLA communication ops with their respective tf_device dialect ops. tf._HostComputeMlir has been updated to have separate send and receive keys. An explicit op tf._TPUCompileMlirPlaceholderProgramKey is now used instead of a placeholder invalid tf._TPUCompileMlir. PiperOrigin-RevId: 323854389 Change-Id: I0d16e9ab78701d705919e8af5c53f0e8fda73925 --- .../mlir/tensorflow/ir/tf_generated_ops.td | 57 ++++++++---- .../tpu_extract_outside_compilation.mlir | 89 ++++++++++--------- .../mlir/tensorflow/tests/tpu_rewrite.mlir | 12 +-- .../tpu_extract_outside_compilation.cc | 64 ++++++------- .../tensorflow/transforms/tpu_rewrite_pass.cc | 11 +-- tensorflow/core/tpu/ops/host_compute_ops.cc | 18 +++- tensorflow/core/tpu/ops/tpu_compile_op.cc | 17 ++++ 7 files changed, 159 insertions(+), 109 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index 0fe8dd647a7..54b85525346 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -11705,24 +11705,6 @@ expected to create these operators. TF_DerivedOperandSizeAttr num_args = TF_DerivedOperandSizeAttr<2>; } -def TF__HostComputeMlirOp : TF_Op<"_HostComputeMlir", []> { - let summary = "A host-side computation called from a TPU device."; - - let arguments = (ins - Variadic:$inputs, - - StrAttr:$key, - DefaultValuedAttr:$tpu_core - ); - - let results = (outs - Variadic:$outputs - ); - - TF_DerivedOperandTypeListAttr Tinputs = TF_DerivedOperandTypeListAttr<0>; - TF_DerivedResultTypeListAttr Toutputs = TF_DerivedResultTypeListAttr<0>; -} - def TF__RecvTPUEmbeddingActivationsOp : TF_Op<"_RecvTPUEmbeddingActivations", []> { let summary = "An op that receives embeddng activations on the TPU."; @@ -11782,6 +11764,45 @@ used to look up the program in the compilation cache. TF_DerivedOperandSizeAttr NumDynamicShapes = TF_DerivedOperandSizeAttr<0>; } +def TF__TPUCompileMlirPlaceholderProgramKeyOp : TF_Op<"_TPUCompileMlirPlaceholderProgramKey", []> { + let summary = [{ +Placeholder program key (compilation cache key) of a _TPUCompileMlir `program`. + }]; + + let description = [{ +This op can be used when certain rewrite passes materialize ops that require a +program key but the _TPUCompileMlir op has not been added yet. Subsequent +rewrite passes must replace this op with a _TPUCompileMlir op `program` output. + }]; + + let arguments = (ins); + + let results = (outs + TF_StrTensor:$program + ); +} + +def TF__XlaHostComputeMlirOp : TF_Op<"_XlaHostComputeMlir", []> { + let summary = [{ +A pseudo-op to represent host-side computation in an XLA program. + }]; + + let arguments = (ins + Variadic:$inputs, + + StrAttr:$send_key, + StrAttr:$recv_key, + DefaultValuedAttr:$tpu_core + ); + + let results = (outs + Variadic:$outputs + ); + + TF_DerivedOperandTypeListAttr Tinputs = TF_DerivedOperandTypeListAttr<0>; + TF_DerivedResultTypeListAttr Toutputs = TF_DerivedResultTypeListAttr<0>; +} + def TF__XlaRecvAtHostOp : TF_Op<"_XlaRecvAtHost", []> { let summary = [{ A placeholder op to receive values from a running XLA computation. diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_outside_compilation.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_outside_compilation.mlir index 7cabd0ea61a..732e34fce90 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_outside_compilation.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_outside_compilation.mlir @@ -143,14 +143,14 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK: %[[REPLICATE:[0-9]*]]:2 = tf_device.replicate // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute" // CHECK-NEXT: "tf_device.launch" - // CHECK: %[[STATUS_OUTPUT:[a-z_0-9]*]], %[[PROGRAM_OUTPUT:[a-z_0-9]*]] = "tf._TPUCompileMlir" + // CHECK: %[[PROGRAM_OUTPUT:[a-z_0-9]*]] = "tf._TPUCompileMlirPlaceholderProgramKey" // CHECK: %[[RECV_OUTPUT:[0-9]*]] = "tf._XlaRecvAtHost"(%[[PROGRAM_OUTPUT]]) - // CHECK-SAME: key = "host_compute_channel_cluster1" + // CHECK-SAME: key = "host_compute_channel_cluster1_args" // CHECK: "tf.B"(%[[RECV_OUTPUT]]) // CHECK: "tf_device.cluster" // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A" - // CHECK: "tf._HostComputeMlir"(%[[A_OUTPUT]]) - // CHECK-SAME: key = "host_compute_channel_cluster1" + // CHECK: "tf._XlaHostComputeMlir"(%[[A_OUTPUT]]) + // CHECK-SAME: send_key = "host_compute_channel_cluster1_args" %1:2 = tf_device.replicate([%0, %arg0] as %ri_0: tensor) {n = 2 : i32} { %2 = "tf_device.cluster"() ( { %3 = "tf.A"() : () -> (tensor) @@ -172,15 +172,17 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK: %[[REPLICATE:[0-9]*]]:2 = tf_device.replicate // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute" // CHECK-NEXT: "tf_device.launch" - // CHECK: %[[STATUS_OUTPUT:[a-z_0-9]*]], %[[PROGRAM_OUTPUT:[a-z_0-9]*]] = "tf._TPUCompileMlir" + // CHECK: %[[PROGRAM_OUTPUT:[a-z_0-9]*]] = "tf._TPUCompileMlirPlaceholderProgramKey" // CHECK: "tf._XlaRecvAtHost"(%[[PROGRAM_OUTPUT]]) + // CHECK-SAME: key = "host_compute_channel_cluster1_args" // CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B"() // CHECK: "tf._XlaSendFromHost"(%[[B_OUTPUT]], %[[PROGRAM_OUTPUT]]) - // CHECK-SAME: key = "host_compute_channel_cluster1" + // CHECK-SAME: key = "host_compute_channel_cluster1_retvals" // CHECK: "tf_device.cluster" // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A" - // CHECK: %[[HOST_OUTPUT:[0-9]*]] = "tf._HostComputeMlir"() - // CHECK-SAME: key = "host_compute_channel_cluster1" + // CHECK: %[[HOST_OUTPUT:[0-9]*]] = "tf._XlaHostComputeMlir"() + // CHECK-SAME: recv_key = "host_compute_channel_cluster1_retvals" + // CHECK-SAME: send_key = "host_compute_channel_cluster1_args" // CHECK: "tf.C"(%[[HOST_OUTPUT]]) %1:2 = tf_device.replicate([%0, %arg0] as %ri_0: tensor) {n = 2 : i32} { %2 = "tf_device.cluster"() ( { @@ -203,14 +205,15 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK: %[[REPLICATE:[0-9]*]]:2 = tf_device.replicate // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute" // CHECK-NEXT: "tf_device.launch" - // CHECK: %[[STATUS_OUTPUT:[a-z_0-9]*]], %[[PROGRAM_OUTPUT:[a-z_0-9]*]] = "tf._TPUCompileMlir" + // CHECK: %[[PROGRAM_OUTPUT:[a-z_0-9]*]] = "tf._TPUCompileMlirPlaceholderProgramKey" // CHECK: %[[RECV_OUTPUT:[0-9]*]] = "tf._XlaRecvAtHost"(%[[PROGRAM_OUTPUT]]) // CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B"(%[[RECV_OUTPUT]]) // CHECK: "tf._XlaSendFromHost"(%[[B_OUTPUT]], %[[PROGRAM_OUTPUT]]) + // CHECK-SAME: key = "host_compute_channel_cluster1_retvals" // CHECK: "tf_device.cluster" // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A" - // CHECK: %[[HOST_OUTPUT:[0-9]*]] = "tf._HostComputeMlir"(%[[A_OUTPUT]]) - // CHECK-SAME: key = "host_compute_channel_cluster1" + // CHECK: %[[HOST_OUTPUT:[0-9]*]] = "tf._XlaHostComputeMlir"(%[[A_OUTPUT]]) + // CHECK-SAME: recv_key = "host_compute_channel_cluster1_retvals" // CHECK: tf_device.return %[[HOST_OUTPUT]] %1:2 = tf_device.replicate([%0, %arg0] as %ri_0: tensor) {n = 2 : i32} { %2 = "tf_device.cluster"() ( { @@ -233,15 +236,15 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK: %[[REPLICATE:[0-9]*]]:2 = tf_device.replicate // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute" // CHECK-NEXT: "tf_device.launch" - // CHECK: %[[STATUS_OUTPUT:[a-z_0-9]*]], %[[PROGRAM_OUTPUT:[a-z_0-9]*]] = "tf._TPUCompileMlir" + // CHECK: %[[PROGRAM_OUTPUT:[a-z_0-9]*]] = "tf._TPUCompileMlirPlaceholderProgramKey" // CHECK: %[[RECV_OUTPUT:[0-9]*]] = "tf._XlaRecvAtHost"(%[[PROGRAM_OUTPUT]]) // CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B"(%[[RECV_OUTPUT]]) // CHECK: "tf._XlaSendFromHost"(%[[B_OUTPUT]], %[[PROGRAM_OUTPUT]]) - // CHECK-SAME: key = "host_compute_channel_cluster1" + // CHECK-SAME: key = "host_compute_channel_cluster1_retvals" // CHECK: "tf_device.cluster" // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A" - // CHECK: %[[HOST_OUTPUT:[0-9]*]] = "tf._HostComputeMlir"(%[[A_OUTPUT]]) - // CHECK-SAME: key = "host_compute_channel_cluster1" + // CHECK: %[[HOST_OUTPUT:[0-9]*]] = "tf._XlaHostComputeMlir"(%[[A_OUTPUT]]) + // CHECK-SAME: recv_key = "host_compute_channel_cluster1_retvals" // CHECK: "tf.C"(%[[HOST_OUTPUT]]) %1:2 = tf_device.replicate([%0, %arg0] as %ri_0: tensor) {n = 2 : i32} { %2 = "tf_device.cluster"() ( { @@ -264,16 +267,16 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK: %[[REPLICATE:[0-9]*]]:2 = tf_device.replicate // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute" // CHECK-NEXT: "tf_device.launch" - // CHECK: %[[STATUS_OUTPUT:[a-z_0-9]*]], %[[PROGRAM_OUTPUT:[a-z_0-9]*]] = "tf._TPUCompileMlir" + // CHECK: %[[PROGRAM_OUTPUT:[a-z_0-9]*]] = "tf._TPUCompileMlirPlaceholderProgramKey" // CHECK: %[[RECV_OUTPUT:[0-9]*]]:2 = "tf._XlaRecvAtHost"(%[[PROGRAM_OUTPUT]]) // CHECK: %[[B_OUTPUT:[0-9]*]]:2 = "tf.C"(%[[RECV_OUTPUT]]#0, %[[RECV_OUTPUT]]#1) // CHECK: "tf._XlaSendFromHost"(%[[B_OUTPUT]]#0, %[[B_OUTPUT]]#1, %[[PROGRAM_OUTPUT]]) - // CHECK-SAME: key = "host_compute_channel_cluster1" + // CHECK-SAME: key = "host_compute_channel_cluster1_retvals" // CHECK: "tf_device.cluster" // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A" // CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B" - // CHECK: %[[HOST_OUTPUT:[0-9]*]]:2 = "tf._HostComputeMlir"(%[[A_OUTPUT]], %[[B_OUTPUT]]) - // CHECK-SAME: key = "host_compute_channel_cluster1" + // CHECK: %[[HOST_OUTPUT:[0-9]*]]:2 = "tf._XlaHostComputeMlir"(%[[A_OUTPUT]], %[[B_OUTPUT]]) + // CHECK-SAME: recv_key = "host_compute_channel_cluster1_retvals" // CHECK: "tf.D"(%[[HOST_OUTPUT]]#0) // CHECK: "tf.E"(%[[HOST_OUTPUT]]#1) %1:2 = tf_device.replicate([%0, %arg0] as %ri_0: tensor) {n = 2 : i32} { @@ -299,24 +302,24 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK: %[[REPLICATE:[0-9]*]]:2 = tf_device.replicate // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute" // CHECK-NEXT: "tf_device.launch" - // CHECK: %[[STATUS_OUTPUT2:[a-z_0-9]*]], %[[PROGRAM_OUTPUT2:[a-z_0-9]*]] = "tf._TPUCompileMlir" + // CHECK: %[[PROGRAM_OUTPUT2:[a-z_0-9]*]] = "tf._TPUCompileMlirPlaceholderProgramKey" // CHECK: %[[RECV_OUTPUT2:[0-9]*]] = "tf._XlaRecvAtHost"(%[[PROGRAM_OUTPUT2]]) // CHECK: %[[D_OUTPUT:[0-9]*]] = "tf.D"(%[[RECV_OUTPUT2]]) // CHECK: "tf._XlaSendFromHost"(%[[D_OUTPUT]], %[[PROGRAM_OUTPUT]]) - // CHECK-SAME: key = "host_compute_channel_cluster2" + // CHECK-SAME: key = "host_compute_channel_cluster2_retvals" // CHECK: "tf_device.launch" - // CHECK: %[[STATUS_OUTPUT1:[a-z_0-9]*]], %[[PROGRAM_OUTPUT1:[a-z_0-9]*]] = "tf._TPUCompileMlir" + // CHECK: %[[PROGRAM_OUTPUT1:[a-z_0-9]*]] = "tf._TPUCompileMlirPlaceholderProgramKey" // CHECK: %[[RECV_OUTPUT1:[0-9]*]] = "tf._XlaRecvAtHost"(%[[PROGRAM_OUTPUT1]]) // CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B"(%[[RECV_OUTPUT1]]) // CHECK: "tf._XlaSendFromHost"(%[[B_OUTPUT]], %[[PROGRAM_OUTPUT]]) - // CHECK-SAME: key = "host_compute_channel_cluster1" + // CHECK-SAME: key = "host_compute_channel_cluster1_retvals" // CHECK: "tf_device.cluster" // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A" - // CHECK: %[[HOST_OUTPUT1:[0-9]*]] = "tf._HostComputeMlir"(%[[A_OUTPUT]]) - // CHECK-SAME: key = "host_compute_channel_cluster1" + // CHECK: %[[HOST_OUTPUT1:[0-9]*]] = "tf._XlaHostComputeMlir"(%[[A_OUTPUT]]) + // CHECK-SAME: recv_key = "host_compute_channel_cluster1_retvals" // CHECK: %[[C_OUTPUT:[0-9]*]] = "tf.C"(%[[HOST_OUTPUT1]]) - // CHECK: %[[HOST_OUTPUT2:[0-9]*]] = "tf._HostComputeMlir"(%[[C_OUTPUT]]) - // CHECK-SAME: key = "host_compute_channel_cluster2" + // CHECK: %[[HOST_OUTPUT2:[0-9]*]] = "tf._XlaHostComputeMlir"(%[[C_OUTPUT]]) + // CHECK-SAME: recv_key = "host_compute_channel_cluster2_retvals" // CHECK: "tf.E"(%[[HOST_OUTPUT2]]) %1:2 = tf_device.replicate([%0, %arg0] as %ri_0: tensor) {n = 2 : i32} { %2 = "tf_device.cluster"() ( { @@ -341,14 +344,14 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK: %[[REPLICATE:[0-9]*]]:2 = tf_device.replicate // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute" // CHECK-NEXT: "tf_device.launch" - // CHECK: %[[STATUS_OUTPUT:[a-z_0-9]*]], %[[PROGRAM_OUTPUT:[a-z_0-9]*]] = "tf._TPUCompileMlir" + // CHECK: %[[PROGRAM_OUTPUT:[a-z_0-9]*]] = "tf._TPUCompileMlirPlaceholderProgramKey" // CHECK: %[[RECV_OUTPUT:[0-9]*]] = "tf._XlaRecvAtHost"(%[[PROGRAM_OUTPUT]]) - // CHECK-SAME: key = "host_compute_channel_cluster1" + // CHECK-SAME: key = "host_compute_channel_cluster1_args" // CHECK: "tf.B"(%arg0, %[[RECV_OUTPUT]]) // CHECK: "tf_device.cluster" // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A" - // CHECK: "tf._HostComputeMlir"(%[[A_OUTPUT]]) - // CHECK-SAME: key = "host_compute_channel_cluster1" + // CHECK: "tf._XlaHostComputeMlir"(%[[A_OUTPUT]]) + // CHECK-SAME: send_key = "host_compute_channel_cluster1_args" %1:2 = tf_device.replicate([%0, %arg0] as %ri_0: tensor) {n = 2 : i32} { %2 = "tf_device.cluster"() ( { %3 = "tf.A"() : () -> (tensor) @@ -370,22 +373,22 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK: %[[REPLICATE:[0-9]*]]:2 = tf_device.replicate // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute" // CHECK-NEXT: "tf_device.launch" - // CHECK: %[[STATUS_OUTPUT_2:[a-z_0-9]*]], %[[PROGRAM_OUTPUT_2:[a-z_0-9]*]] = "tf._TPUCompileMlir" + // CHECK: %[[PROGRAM_OUTPUT_2:[a-z_0-9]*]] = "tf._TPUCompileMlirPlaceholderProgramKey" // CHECK: %[[RECV_OUTPUT_2:[0-9]*]] = "tf._XlaRecvAtHost"(%[[PROGRAM_OUTPUT_2]]) - // CHECK-SAME: key = "host_compute_channel_cluster2" + // CHECK-SAME: key = "host_compute_channel_cluster2_args" // CHECK: "tf.D"(%[[RECV_OUTPUT_2]]) // CHECK: "tf_device.launch" - // CHECK: %[[STATUS_OUTPUT_1:[a-z_0-9]*]], %[[PROGRAM_OUTPUT_1:[a-z_0-9]*]] = "tf._TPUCompileMlir" + // CHECK: %[[PROGRAM_OUTPUT_1:[a-z_0-9]*]] = "tf._TPUCompileMlirPlaceholderProgramKey" // CHECK: %[[RECV_OUTPUT_1:[0-9]*]] = "tf._XlaRecvAtHost"(%[[PROGRAM_OUTPUT_1]]) - // CHECK-SAME: key = "host_compute_channel_cluster1" + // CHECK-SAME: key = "host_compute_channel_cluster1_args" // CHECK: "tf.B"(%[[RECV_OUTPUT_1]]) // CHECK: "tf_device.cluster" // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A" - // CHECK: "tf._HostComputeMlir"(%[[A_OUTPUT]]) - // CHECK-SAME: key = "host_compute_channel_cluster1" + // CHECK: "tf._XlaHostComputeMlir"(%[[A_OUTPUT]]) + // CHECK-SAME: send_key = "host_compute_channel_cluster1_args" // CHECK: %[[C_OUTPUT:[0-9]*]] = "tf.C" - // CHECK: "tf._HostComputeMlir"(%[[C_OUTPUT]]) - // CHECK-SAME: key = "host_compute_channel_cluster2" + // CHECK: "tf._XlaHostComputeMlir"(%[[C_OUTPUT]]) + // CHECK-SAME: send_key = "host_compute_channel_cluster2_args" %1:2 = tf_device.replicate([%0, %arg0] as %ri_0: tensor) {n = 2 : i32} { %2 = "tf_device.cluster"() ( { %3 = "tf.A"() : () -> (tensor) @@ -408,16 +411,16 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK: %[[REPLICATE:[0-9]*]]:2 = tf_device.replicate // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute" // CHECK-NEXT: "tf_device.launch" - // CHECK: %[[STATUS_OUTPUT:[a-z_0-9]*]], %[[PROGRAM_OUTPUT:[a-z_0-9]*]] = "tf._TPUCompileMlir" + // CHECK: %[[PROGRAM_OUTPUT:[a-z_0-9]*]] = "tf._TPUCompileMlirPlaceholderProgramKey" // CHECK: %[[RECV_OUTPUT:[0-9]*]]:2 = "tf._XlaRecvAtHost"(%[[PROGRAM_OUTPUT]]) - // CHECK-SAME: key = "host_compute_channel_cluster1" + // CHECK-SAME: key = "host_compute_channel_cluster1_args" // CHECK: "tf.C"(%[[RECV_OUTPUT]]#0) // CHECK: "tf.D"(%[[RECV_OUTPUT]]#1, %[[RECV_OUTPUT]]#0) // CHECK: "tf_device.cluster" // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A" // CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B" - // CHECK: "tf._HostComputeMlir"(%[[A_OUTPUT]], %[[B_OUTPUT]]) - // CHECK-SAME: key = "host_compute_channel_cluster1" + // CHECK: "tf._XlaHostComputeMlir"(%[[A_OUTPUT]], %[[B_OUTPUT]]) + // CHECK-SAME: send_key = "host_compute_channel_cluster1_args" %1:2 = tf_device.replicate([%0, %arg0] as %ri_0: tensor) {n = 2 : i32} { %2 = "tf_device.cluster"() ( { %3 = "tf.A"() : () -> (tensor) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir index 14bffb0f4d4..2a0091ce9bf 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_rewrite.mlir @@ -1256,21 +1256,21 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor // CHECK: "tf._TPUCompileMlir" // CHECK: "tf.TPUCompileSucceededAssert" // CHECK: "tf_device.parallel_execute" - // CHECK-NOT:"tf._TPUCompileMlir" + // CHECK-NOT:"tf._TPUCompileMlirPlaceholderProgramKey" // CHECK: "tf.D"(%[[COMPILE_OUTPUT]]#1 // CHECK: "tf.TPUExecute" - // CHECK-NOT:"tf._TPUCompileMlir" + // CHECK-NOT:"tf._TPUCompileMlirPlaceholderProgramKey" // CHECK: "tf.E"(%[[COMPILE_OUTPUT]]#1 %3 = "tf_device.parallel_execute"() ( { - %status, %program = "tf._TPUCompileMlir"() {metadata = "...", mlir_module = "..."} : () -> (tensor, tensor) - "tf.D"(%program) : (tensor) -> () + %program = "tf._TPUCompileMlirPlaceholderProgramKey"() : () -> tensor + "tf.D"(%program) : (tensor) -> () tf_device.return }, { %4 = "tf_device.cluster_func"(%ri_0) {_tpu_replicate = "cluster0", func = @tpu0_func, num_cores_per_replica = 1, step_marker_location = "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP", padding_map = ["\08\01\10\02\18\03"], topology = "", device_assignment = [], input_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], output_sharding_configuration = ["\08\01\1A\01\01\22\01\00"], use_spmd_for_xla_partitioning = false} : (tensor) -> tensor tf_device.return %4 : tensor }, { - %status, %program = "tf._TPUCompileMlir"() {metadata = "...", mlir_module = "..."} : () -> (tensor, tensor) - "tf.E"(%program) : (tensor) -> () + %program = "tf._TPUCompileMlirPlaceholderProgramKey"() : () -> tensor + "tf.E"(%program) : (tensor) -> () tf_device.return }) : () -> (tensor) tf_device.return %3 : tensor diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc index 6a750addd4e..cbea4ae6544 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc @@ -34,10 +34,7 @@ namespace TFTPU { namespace { -constexpr char kAncestorsAttr[] = "ancestors"; constexpr char kDeviceAttr[] = "device"; -constexpr char kKeyAttr[] = "key"; -constexpr char kShapesAttr[] = "shapes"; constexpr char kXlaOutsideCompilationAttr[] = "_xla_outside_compilation"; // Mapping for `_xla_outside_compilation` attribute to ops of a cluster. @@ -196,20 +193,21 @@ void SetHostComputeInsertion( // Creates the HostCompute with `inputs` and `outputs` // using `communication_key`. -TF::_HostComputeMlirOp CreateHostCompute( +TF::_XlaHostComputeMlirOp CreateHostCompute( OpBuilder* builder, tf_device::ClusterOp tpu_cluster, llvm::ArrayRef cluster_ops, const llvm::SmallSetVector& inputs, llvm::ArrayRef outputs, - llvm::StringRef communication_key) { + llvm::StringRef args_communication_key, + llvm::StringRef retvals_communication_key) { llvm::SmallVector device_output_types; for (const auto& output : outputs) device_output_types.push_back(output.getType()); SetHostComputeInsertion(builder, cluster_ops, inputs); - auto host_compute = builder->create( - tpu_cluster.getLoc(), device_output_types, inputs.getArrayRef()); - host_compute.setAttr(kAncestorsAttr, builder->getArrayAttr({})); - host_compute.setAttr(kShapesAttr, builder->getArrayAttr({})); - host_compute.setAttr(kKeyAttr, builder->getStringAttr(communication_key)); + auto host_compute = builder->create( + tpu_cluster.getLoc(), device_output_types, inputs.getArrayRef(), + builder->getStringAttr(args_communication_key), + builder->getStringAttr(retvals_communication_key), + /*tpu_core=*/builder->getI64IntegerAttr(0)); return host_compute; } @@ -232,41 +230,43 @@ void MoveOutsideCompiledOps( // TODO(b/157054714): Use a better abstraction instead of _TPUCompileMlirOp // and _XlaRecvAtHostOp and _XlaSendFromHostOp. - // A placeholder _TpuCompileMlirOp is created because it is required input to - // XlaRecvAtHostOp and XlaSendFromHostOp but the _TpuCompileMlirOp has not yet - // been created for the TPU cluster that contains the outside compiled ops. - // This placeholder should be replaced by the TPU cluster _TPUCompileMlirOp in - // a subsequent pass. - auto compile_op = builder.create( - tpu_cluster.getLoc(), /*compilation_status=*/result_type, /*program=*/ - llvm::ArrayRef{result_type}, llvm::ArrayRef{}, txt_module, - txt_metadata); + // A placeholder compilation cache key is created because it is a required + // input to _XlaRecvAtHost and _XlaSendFromHost but the _TPUCompileMlir has + // not yet been created for the TPU cluster that contains the outside compiled + // ops. This placeholder should be replaced by the TPU cluster _TPUCompileMlir + // in a subsequent pass. + auto compilation_key = + builder.create( + tpu_cluster.getLoc(), /*program=*/result_type, + llvm::ArrayRef{}); llvm::SmallVector host_output_types; for (const auto& external_input : external_inputs) host_output_types.push_back(external_input.getType()); - std::string communication_key = - llvm::formatv("host_compute_channel_{0}", outside_cluster_name).str(); - // XlaRecvAtHostOp takes both the program key(dynamic_key) from the - // _TpuCompileMlirOp and the communication_key. + std::string args_communication_key = + llvm::formatv("host_compute_channel_{0}_args", outside_cluster_name) + .str(); + std::string retvals_communication_key = + llvm::formatv("host_compute_channel_{0}_retvals", outside_cluster_name) + .str(); auto recv_at_host = builder.create( tpu_cluster.getLoc(), host_output_types, - /*dynamic_key=*/compile_op.getResult(1), - builder.getStringAttr(communication_key), - builder.getIntegerAttr(builder.getIntegerType(64), 0)); + /*dynamic_key=*/compilation_key, + builder.getStringAttr(args_communication_key), + /*device_ordinal=*/builder.getI64IntegerAttr(0)); - auto host_compute = - CreateHostCompute(&builder, tpu_cluster, cluster_ops, external_inputs, - external_outputs, communication_key); + auto host_compute = CreateHostCompute( + &builder, tpu_cluster, cluster_ops, external_inputs, external_outputs, + args_communication_key, retvals_communication_key); MoveOutsideClusterOpsToLaunchOp(host_launch_op, cluster_ops); builder.setInsertionPoint(host_launch_op.GetBody().getTerminator()); builder.create( tpu_cluster.getLoc(), external_outputs, - /*dynamic_key=*/compile_op.getResult(1), - builder.getStringAttr(communication_key), - /*device_ordinal=*/builder.getIntegerAttr(builder.getIntegerType(64), 0)); + /*dynamic_key=*/compilation_key, + builder.getStringAttr(retvals_communication_key), + /*device_ordinal=*/builder.getI64IntegerAttr(0)); for (auto result : llvm::zip(external_inputs, recv_at_host.getResults())) mlir::replaceAllUsesInRegionWith(std::get<0>(result), std::get<1>(result), diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc index 7e6f2915502..ca77feafc05 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc @@ -643,10 +643,7 @@ LogicalResult Rewrite( // Collect `num_replicas` and `num_cores_per_replica` attributes. int num_replicas = 1; tf_device::ReplicateOp replicate = - cluster_func.getParentOp() - ? llvm::dyn_cast_or_null( - cluster_func.getParentOp()) - : nullptr; + cluster_func.getParentOfType(); if (replicate) num_replicas = replicate.n().getLimitedValue(); auto num_cores_per_replica_attr = cluster_func.getAttrOfType( @@ -715,9 +712,9 @@ LogicalResult Rewrite( // structured lowering. if (auto parallel_op = llvm::dyn_cast( cluster_func.getParentOp())) { - parallel_op.walk([&](TF::_TPUCompileMlirOp parallel_compile_op) { - parallel_compile_op.replaceAllUsesWith(compile_op); - parallel_compile_op.erase(); + parallel_op.walk([&](TF::_TPUCompileMlirPlaceholderProgramKeyOp key_op) { + key_op.replaceAllUsesWith(compile_op->getResult(1)); + key_op.erase(); }); } diff --git a/tensorflow/core/tpu/ops/host_compute_ops.cc b/tensorflow/core/tpu/ops/host_compute_ops.cc index c83c5159f62..c053ac948f9 100644 --- a/tensorflow/core/tpu/ops/host_compute_ops.cc +++ b/tensorflow/core/tpu/ops/host_compute_ops.cc @@ -23,17 +23,29 @@ namespace tensorflow { using shape_inference::InferenceContext; using shape_inference::ShapeHandle; -REGISTER_OP("_HostComputeMlir") +REGISTER_OP("_XlaHostComputeMlir") .Input("inputs: Tinputs") .Output("outputs: Toutputs") .Attr("Tinputs: list(type) >= 0") .Attr("Toutputs: list(type) >= 0") - .Attr("key: string") + .Attr("send_key: string") + .Attr("recv_key: string") .Attr("tpu_core: int = 0") .SetShapeFn([](shape_inference::InferenceContext* c) { return ::tensorflow::shape_inference::UnknownShape(c); }) - .SetIsStateful(); + .SetIsStateful() + .Doc(R"doc( +A pseudo-op to represent host-side computation in an XLA program. + +inputs: A list of tensors that will be sent to the host. +outputs: A list of tensors that will be returned to the device. +Tinputs: The element types of each element in `inputs`. +Toutputs: The element types of each element in `outputs`. +send_key: A unique identifier for this region used to match up host recv. +recv_key: A unique identifier for this region used to match up host send. +tpu_core: Default core to use for host to device transfers. +)doc"); REGISTER_OP("XlaHostCompute") .Input("inputs: Tinputs") diff --git a/tensorflow/core/tpu/ops/tpu_compile_op.cc b/tensorflow/core/tpu/ops/tpu_compile_op.cc index b18b34e5f88..6f62e36f857 100644 --- a/tensorflow/core/tpu/ops/tpu_compile_op.cc +++ b/tensorflow/core/tpu/ops/tpu_compile_op.cc @@ -45,6 +45,23 @@ REGISTER_OP("_TPUCompileMlir") return Status::OK(); }); +REGISTER_OP("_TPUCompileMlirPlaceholderProgramKey") + .SetIsStateful() + .Output("program: string") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->Vector(2)); + return Status::OK(); + }) + .SetIsStateful() + .Doc( + R"( +Placeholder program key (compilation cache key) of a _TPUCompileMlir `program`. + +This op can be used when certain rewrite passes materialize ops that require a +program key but the _TPUCompileMlir op has not been added yet. Subsequent +rewrite passes must replace this op with a _TPUCompileMlir op `program` output. +)"); + REGISTER_OP("TPUCompile") .Attr("num_computations: int >= 0") .Attr("function: func") From 1d0c8946535822ad8eb83267b7c77a03141ab619 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Wed, 29 Jul 2020 16:27:39 -0400 Subject: [PATCH 1650/2522] Update tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc Co-authored-by: Mihai Maruseac --- tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc index 8230cde8660..e0d0ac57144 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc @@ -1132,8 +1132,8 @@ static Status GetArgTypes(const Graph& graph, DataTypeVector* types) { if (n->type_string() == kArgOp) { int index; TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "index", &index)); - const int types_size = types->size(); - if (index < 0 || index >= types_size) { + const int num_types = types->size(); + if (index < 0 || index >= num_types) { return errors::InvalidArgument("Invalid argument number"); } (*types)[index] = n->output_type(0); From e4a3a4ece71995fb0e7deb15299c8f8b371f0c8c Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Wed, 29 Jul 2020 16:27:47 -0400 Subject: [PATCH 1651/2522] Update tensorflow/compiler/jit/extract_outside_compilation_pass.cc Co-authored-by: Mihai Maruseac --- tensorflow/compiler/jit/extract_outside_compilation_pass.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/jit/extract_outside_compilation_pass.cc b/tensorflow/compiler/jit/extract_outside_compilation_pass.cc index 4a2b6136d53..c84fca81420 100644 --- a/tensorflow/compiler/jit/extract_outside_compilation_pass.cc +++ b/tensorflow/compiler/jit/extract_outside_compilation_pass.cc @@ -226,8 +226,8 @@ xla::StatusOr BuildSendFromHostNode( for (auto* n : ret_nodes) { int index; TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "index", &index)); - const int send_from_host_dtypes_size = send_from_host_dtypes.size(); - if (index < 0 || index >= send_from_host_dtypes_size) { + const int num_dtypes = send_from_host_dtypes.size(); + if (index < 0 || index >= num_dtypes) { return errors::Internal("Invalid _Retval index: ", index); } for (auto edge : n->in_edges()) { From 16b353da53fc1e801286d69398fd818cba3c7264 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Wed, 29 Jul 2020 13:24:11 -0700 Subject: [PATCH 1652/2522] [TF2XLA] Test MLIR bridge support in def_function_xla_jit_test PiperOrigin-RevId: 323855484 Change-Id: Ie8f26b0fd87013ff45bf2dd56451b69968bb4a19 --- tensorflow/python/eager/BUILD | 12 +- .../python/eager/def_function_xla_jit_test.py | 724 ++++++++++-------- 2 files changed, 397 insertions(+), 339 deletions(-) diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index b9c43c2e4e9..7336e85b2de 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -846,18 +846,26 @@ tf_py_test( ], ) -cuda_py_test( +tf_xla_py_test( name = "def_function_xla_jit_test", srcs = ["def_function_xla_jit_test.py"], + enable_mlir_bridge = True, + enabled_backends = [ + # TODO(b/162438052): Enable the test on TPU. + "cpu", + "gpu", + ], python_version = "PY3", tags = [ "no_mac", + "no_pip", "no_windows", ], - xla_enabled = True, + use_xla_device = False, deps = [ ":backprop", ":def_function", + "//tensorflow/compiler/tests:xla_test", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", "//tensorflow/python:framework_ops", diff --git a/tensorflow/python/eager/def_function_xla_jit_test.py b/tensorflow/python/eager/def_function_xla_jit_test.py index bd7a6ec2279..3e324c90fbc 100644 --- a/tensorflow/python/eager/def_function_xla_jit_test.py +++ b/tensorflow/python/eager/def_function_xla_jit_test.py @@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.compiler.tests import xla_test from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.eager import def_function @@ -36,43 +37,45 @@ from tensorflow.python.ops import variables from tensorflow.python.platform import test -class DefFunctionTest(test.TestCase): +class DefFunctionTest(xla_test.XLATestCase): def testAutoclusteringWithTfFunction(self): + with ops.device('device:{}:0'.format(self.device)): - @def_function.function(experimental_compile=False) - def outer(a, b, c): - return a * inner(b, c) + c + @def_function.function(experimental_compile=False) + def outer(a, b, c): + return a * inner(b, c) + c - @def_function.function(experimental_compile=True) - def inner(b, c): - return b + c * b + @def_function.function(experimental_compile=True) + def inner(b, c): + return b + c * b - i1 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) - i2 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) - i3 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) + i1 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) + i2 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) + i3 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) - with context.collect_graphs(optimized=True) as graphs: - outer(i1, i2, i3) + with context.collect_graphs(optimized=True) as graphs: + outer(i1, i2, i3) - if test_util.is_xla_enabled(): - self.assertIn('_XlaRun', [n.op for n in graphs[0].node]) - else: - self.assertNotIn('_XlaRun', [n.op for n in graphs[0].node]) + if test_util.is_xla_enabled(): + self.assertIn('_XlaRun', [n.op for n in graphs[0].node]) + else: + self.assertNotIn('_XlaRun', [n.op for n in graphs[0].node]) def testBasic(self): + with ops.device('device:{}:0'.format(self.device)): - def fn(x, a): - return x + a + def fn(x, a): + return x + a - func = def_function.function(fn, experimental_compile=False) - xla_func = def_function.function(fn, experimental_compile=True) + func = def_function.function(fn, experimental_compile=False) + xla_func = def_function.function(fn, experimental_compile=True) - inputs = constant_op.constant([1, 2, 2, 3, 3]) - self.assertAllClose([2, 3, 3, 4, 4], func(inputs, 1)) - if not test.is_built_with_rocm(): - # XLA support is not yet enabled for TF ROCm - self.assertAllClose([2, 3, 3, 4, 4], xla_func(inputs, 1)) + inputs = constant_op.constant([1, 2, 2, 3, 3]) + self.assertAllClose([2, 3, 3, 4, 4], func(inputs, 1)) + if not test.is_built_with_rocm(): + # XLA support is not yet enabled for TF ROCm + self.assertAllClose([2, 3, 3, 4, 4], xla_func(inputs, 1)) def testBasicInt32(self): @@ -87,385 +90,432 @@ class DefFunctionTest(test.TestCase): self.assertAllClose([2, 3, 3, 4, 4], xla_func(inputs, 1)) def testDerivative(self): - if test.is_built_with_rocm(): - return + with ops.device('device:{}:0'.format(self.device)): + if test.is_built_with_rocm(): + return - def fn(x, a): - return 2 * x + a + def fn(x, a): + return 2 * x + a - xla_func = def_function.function(fn, experimental_compile=True) + xla_func = def_function.function(fn, experimental_compile=True) - with backprop.GradientTape() as tape: - inputs = constant_op.constant([1., 2., 2., 3., 3.]) - tape.watch(inputs) - outputs = xla_func(inputs, 1) + with backprop.GradientTape() as tape: + inputs = constant_op.constant([1., 2., 2., 3., 3.]) + tape.watch(inputs) + outputs = xla_func(inputs, 1) - self.assertAllClose([2, 2, 2, 2, 2], tape.gradient(outputs, inputs)) + self.assertAllClose([2, 2, 2, 2, 2], tape.gradient(outputs, inputs)) - # pylint: disable=protected-access - (forward, backward) = xla_func.get_concrete_function( - inputs, 1)._delayed_rewrite_functions.forward_backward() + # pylint: disable=protected-access + (forward, backward) = xla_func.get_concrete_function( + inputs, 1)._delayed_rewrite_functions.forward_backward() - # Check that the must-compile attribute gets correctly propagated to the - # created derivatives. - self.assertTrue(backward.function_def.attr['_XlaMustCompile']) - self.assertTrue(forward.definition.attr['_XlaMustCompile']) + # Check that the must-compile attribute gets correctly propagated to the + # created derivatives. + self.assertTrue(backward.function_def.attr['_XlaMustCompile']) + self.assertTrue(forward.definition.attr['_XlaMustCompile']) # Calling function with experimental_compile=True from # experimental_compile=False should compile the inner func. def testNestedCall(self): + with ops.device('device:{}:0'.format(self.device)): - def fn(x, a): - return x + a + def fn(x, a): + return x + a - xla_func = def_function.function(fn, experimental_compile=True) + xla_func = def_function.function(fn, experimental_compile=True) - def fn2(x, a): - return xla_func(x, a) + def fn2(x, a): + return xla_func(x, a) - func = def_function.function(fn2, experimental_compile=False) + func = def_function.function(fn2, experimental_compile=False) - inputs = constant_op.constant([1, 2, 2, 3, 3]) - if not test.is_built_with_rocm(): - # XLA support is not yet enabled for TF ROCm - self.assertAllClose([2, 3, 3, 4, 4], func(inputs, 1)) + inputs = constant_op.constant([1, 2, 2, 3, 3]) + if not test.is_built_with_rocm(): + # XLA support is not yet enabled for TF ROCm + self.assertAllClose([2, 3, 3, 4, 4], func(inputs, 1)) + @test_util.disable_mlir_bridge('TODO(b/162272821): MLIR bridge returns' + ' wrong status type') def testNestedCallUnsupportedOps(self): + with ops.device('device:{}:0'.format(self.device)): - def fn(x): - return array_ops.unique(x).y + def fn(x): + return array_ops.unique(x).y - xla_func = def_function.function(fn, experimental_compile=True) + xla_func = def_function.function(fn, experimental_compile=True) - def fn2(x): - return xla_func(x) + def fn2(x): + return xla_func(x) - func = def_function.function(fn2, experimental_compile=False) - inputs = constant_op.constant([1, 2, 2, 3, 3]) - if not test.is_built_with_rocm(): + func = def_function.function(fn2, experimental_compile=False) + inputs = constant_op.constant([1, 2, 2, 3, 3]) + if not test.is_built_with_rocm(): + with self.assertRaisesRegex(errors.InvalidArgumentError, + 'not compilable'): + func(inputs) + + @test_util.disable_mlir_bridge('TODO(b/162272821): MLIR bridge returns' + ' wrong status type') + def testUnsupportedOps(self): + with ops.device('device:{}:0'.format(self.device)): + + def fn(x): + return array_ops.unique(x).y # Unique is not supported by XLA + + func = def_function.function(fn, experimental_compile=False) + xla_func = def_function.function(fn, experimental_compile=True) + + inputs = constant_op.constant([1, 2, 2, 3, 3]) + self.assertAllClose([1, 2, 3], func(inputs)) with self.assertRaisesRegex(errors.InvalidArgumentError, 'not compilable'): - func(inputs) - - def testUnsupportedOps(self): - - def fn(x): - return array_ops.unique(x).y # Unique is not supported by XLA - - func = def_function.function(fn, experimental_compile=False) - xla_func = def_function.function(fn, experimental_compile=True) - - inputs = constant_op.constant([1, 2, 2, 3, 3]) - self.assertAllClose([1, 2, 3], func(inputs)) - with self.assertRaisesRegex(errors.InvalidArgumentError, 'not compilable'): - xla_func(inputs) + xla_func(inputs) def testFunctionGradient(self): - v = resource_variable_ops.ResourceVariable(2.0) + with ops.device('device:{}:0'.format(self.device)): + v = resource_variable_ops.ResourceVariable(2.0) - def fn(x): - return v * x + def fn(x): + return v * x - func = def_function.function(fn, experimental_compile=False) - xla_func = def_function.function(fn, experimental_compile=True) + func = def_function.function(fn, experimental_compile=False) + xla_func = def_function.function(fn, experimental_compile=True) - def run_and_check(test_func): - x = constant_op.constant(3.0) - with backprop.GradientTape() as tape: - y = test_func(x) - dy = tape.gradient(y, v) + def run_and_check(test_func): + x = constant_op.constant(3.0) + with backprop.GradientTape() as tape: + y = test_func(x) + dy = tape.gradient(y, v) - self.assertAllClose(6.0, y) - self.assertAllClose(3.0, dy) + self.assertAllClose(6.0, y) + self.assertAllClose(3.0, dy) - run_and_check(func) - if not test.is_built_with_rocm(): - # XLA support is not yet enabled for TF ROCm - run_and_check(xla_func) + run_and_check(func) + if not test.is_built_with_rocm(): + # XLA support is not yet enabled for TF ROCm + run_and_check(xla_func) def testControlFlow(self): - @def_function.function(experimental_compile=True) - def f(x): - assert control_flow_util.GraphOrParentsInXlaContext( - ops.get_default_graph()) - x = ops.convert_to_tensor(x) + with ops.device('device:{}:0'.format(self.device)): - def body(i, a): - return i + 1, control_flow_ops.cond(i > 2, lambda: a + (x**2), - lambda: a + 3) + @def_function.function(experimental_compile=True) + def f(x): + assert control_flow_util.GraphOrParentsInXlaContext( + ops.get_default_graph()) + x = ops.convert_to_tensor(x) - return control_flow_ops.while_loop( - lambda i, *_: i < 10, - body, (constant_op.constant(0), constant_op.constant(3.)), - maximum_iterations=10)[1] + def body(i, a): + return i + 1, control_flow_ops.cond(i > 2, lambda: a + (x**2), + lambda: a + 3) - @def_function.function(experimental_compile=True) - def g(x): - x = ops.convert_to_tensor(x) - with backprop.GradientTape() as tape: - tape.watch(x) - y = f(x) - return y, tape.gradient(y, x) + return control_flow_ops.while_loop( + lambda i, *_: i < 10, + body, (constant_op.constant(0), constant_op.constant(3.)), + maximum_iterations=10)[1] - self.assertAllClose(40.0, f(2.0)) - self.assertAllClose([40.0, 28.0], g(2.0)) + @def_function.function(experimental_compile=True) + def g(x): + x = ops.convert_to_tensor(x) + with backprop.GradientTape() as tape: + tape.watch(x) + y = f(x) + return y, tape.gradient(y, x) + + self.assertAllClose(40.0, f(2.0)) + self.assertAllClose([40.0, 28.0], g(2.0)) def testMethodCompilation(self): if test.is_built_with_rocm(): return - class C(object): + with ops.device('device:{}:0'.format(self.device)): - @def_function.function(experimental_compile=True) - def f1(self, x, a): - return x + a + class C(object): - inputs = constant_op.constant([1, 2, 2, 3, 3]) - c = C() - self.assertAllClose([2, 3, 3, 4, 4], c.f1(inputs, 1)) + @def_function.function(experimental_compile=True) + def f1(self, x, a): + return x + a + inputs = constant_op.constant([1, 2, 2, 3, 3]) + c = C() + self.assertAllClose([2, 3, 3, 4, 4], c.f1(inputs, 1)) + + @test_util.disable_mlir_bridge('TODO(b/162272821): MLIR bridge returns ' + ' wrong status type') def testMethodCompilationUnsupportedFunc(self): if test.is_built_with_rocm(): return - class C(object): + with ops.device('device:{}:0'.format(self.device)): - @def_function.function(experimental_compile=True) - def f1(self, x): - return array_ops.unique(x).y + class C(object): - inputs = constant_op.constant([1, 2, 2, 3, 3]) - c = C() - with self.assertRaisesRegex(errors.InvalidArgumentError, 'not compilable'): - c.f1(inputs) + @def_function.function(experimental_compile=True) + def f1(self, x): + return array_ops.unique(x).y + + inputs = constant_op.constant([1, 2, 2, 3, 3]) + c = C() + with self.assertRaisesRegex(errors.InvalidArgumentError, + 'not compilable'): + c.f1(inputs) def testMustBeConstantPropagation(self): - if test.is_built_with_rocm(): - return - - @def_function.function(experimental_compile=True) - def f(): - return constant_op.constant([0, 2, 1], dtype=dtypes.int32) - - @def_function.function(experimental_compile=True) - def g(a, b): - return array_ops.transpose(a, b) - - @def_function.function - def z(): - return g(array_ops.ones([3, 4, 3], dtype=dtypes.float32), f()) - - z() - - def testArgMinMax(self): - - @def_function.function(experimental_compile=True) - def argmax(x): - return math_ops.argmax(x) - - @def_function.function(experimental_compile=True) - def argmin(x): - return math_ops.argmin(x) - - self.assertAllClose(0, argmax(array_ops.ones([10], dtype=dtypes.float32))) - self.assertAllClose(0, argmax(array_ops.ones([10]))) - self.assertAllClose(0, argmin(array_ops.ones([10], dtype=dtypes.float32))) - self.assertAllClose(0, argmin(array_ops.ones([10]))) - - def testErrorMessagePassingTensorArray(self): - - @def_function.function(experimental_compile=True) - def f(x): - ta = tensor_array_ops.TensorArray( - dtype=dtypes.float32, size=1, element_shape=[]) - ta = ta.write(0, 2 * x) - y = ta.read(0) - return y - - x = constant_op.constant(3.14) - with backprop.GradientTape() as tape: - tape.watch(x) - with self.assertRaisesRegex(errors.UnimplementedError, - 'TensorList crossing the XLA/TF boundary'): - y = f(x) - tape.gradient(y, x) - - def testTensorListConcatV2(self): - - def f(x): - ta = tensor_array_ops.TensorArray( - dtype=dtypes.float32, size=2, element_shape=[3]) - ta = ta.write(0, 2 * x) - ta = ta.write(1, 3 * x) - return ta.concat() - - compiled_f = def_function.function(experimental_compile=True)(f) - - inputs = constant_op.constant([3.14, 2.68, 7.69]) - - self.assertAllClose([6.28, 5.36, 15.38, 9.42, 8.04, 23.07], f(inputs)) - - self.assertAllClose(compiled_f(inputs), f(inputs)) - - def testTensorListConcatV2Multidim(self): - - def f(x): - ta = tensor_array_ops.TensorArray( - dtype=dtypes.float32, size=2, element_shape=[3, 2]) - ta = ta.write(0, 2 * x) - ta = ta.write(1, 3 * x) - return ta.concat() - - compiled_f = def_function.function(experimental_compile=True)(f) - - inputs = constant_op.constant([[3.14, 21.1], [2.68, 22.2], [7.69, 23.3]]) - self.assertAllClose(f(inputs), compiled_f(inputs)) - - def testTensorListConcatV2Scalars(self): - - def f(x): - ta = tensor_array_ops.TensorArray( - dtype=dtypes.float32, size=2, element_shape=[1]) - ta = ta.write(0, 2 * x) - ta = ta.write(1, 3 * x) - return ta.concat() - - compiled_f = def_function.function(experimental_compile=True)(f) - inputs = constant_op.constant([3.14]) - self.assertAllClose(f(inputs), compiled_f(inputs)) - - def testTensorListConcatGrad(self): - - def f(x): - ta = tensor_array_ops.TensorArray( - dtype=dtypes.float32, size=2, element_shape=[3]) - ta = ta.write(0, 2 * x) - ta = ta.write(1, 3 * x) - return ta.concat() - - def g(): - x = constant_op.constant([3.14, 2.68, 7.69]) - with backprop.GradientTape() as tape: - tape.watch(x) - y = f(x) - return tape.gradient(y, x) - - compiled_g = def_function.function(experimental_compile=True)(g) - - self.assertAllClose([5.0, 5.0, 5.0], g()) - self.assertAllClose(compiled_g(), g()) - - def testTensorListConcatGradNestedCompile(self): - - @def_function.function(experimental_compile=True) - def f(x): - ta = tensor_array_ops.TensorArray( - dtype=dtypes.float32, size=2, element_shape=[3]) - ta = ta.write(0, 2 * x) - ta = ta.write(1, 3 * x) - return ta.concat() - - @def_function.function(experimental_compile=True) - def g(): - x = constant_op.constant([3.14, 2.68, 7.69]) - with backprop.GradientTape() as tape: - tape.watch(x) - y = f(x) - out = tape.gradient(y, x) - return out - - self.assertAllClose([5.0, 5.0, 5.0], g()) - - def testCumsum(self): - - @def_function.function(experimental_compile=True) - def f(x): - return math_ops.cumsum(x) - - f64_input = constant_op.constant([1.1, 2.2, 3.3], dtype=dtypes.float64) - self.assertAllClose([1.1, 3.3, 6.6], f(f64_input)) - - def testNoExcessiveRetracing(self): - inner_retracings = 0 - - @def_function.function(experimental_compile=True) - def inner(a, b): - nonlocal inner_retracings - inner_retracings += 1 - return a * b + a - - def outer(a, b): - return inner(a, b) - - func_input = random_ops.random_normal([10, 10]) - for _ in range(2): - def_function.function(outer)(func_input, func_input) - - self.assertEqual(inner_retracings, 1) - - def testUpdateVariable(self): - v = variables.Variable(3.1) - - @def_function.function(experimental_compile=True) - def update_var(a, b): - v.assign_add(a * b) - - update_var(constant_op.constant(0.7), constant_op.constant(0.6)) - self.assertAllClose(v, 3.52) - - def testUpdateVariableVector(self): - v = variables.Variable([3.1, 3.1]) - - @def_function.function(experimental_compile=True) - def update_var(a, b): - v.assign_add(a * b) - - update_var( - constant_op.constant([0.7, 0.7]), constant_op.constant([0.6, 0.6])) - self.assertAllClose(v, [3.52, 3.52]) - - def testUpdateVariableInClass(self): - - class C(object): + with ops.device('device:{}:0'.format(self.device)): + if test.is_built_with_rocm(): + return @def_function.function(experimental_compile=True) - def update_var(self, a, b): - if not hasattr(self, 'v'): - self.v = variables.Variable(3.1) - self.v.assign_add(a * b) + def f(): + return constant_op.constant([0, 2, 1], dtype=dtypes.int32) - c = C() + @def_function.function(experimental_compile=True) + def g(a, b): + return array_ops.transpose(a, b) - @def_function.function - def outer(): - c.update_var(constant_op.constant(0.7), constant_op.constant(0.6)) + @def_function.function + def z(): + return g(array_ops.ones([3, 4, 3], dtype=dtypes.float32), f()) - outer() - self.assertAllClose(c.v, 3.52) + z() + + @test_util.disable_mlir_bridge('TODO(b/162271237): argmax gives different' + ' results in MLIR-based bridge') + def testArgMinMax(self): + with ops.device('device:{}:0'.format(self.device)): + + @def_function.function(experimental_compile=True) + def argmax(x): + return math_ops.argmax(x) + + @def_function.function(experimental_compile=True) + def argmin(x): + return math_ops.argmin(x) + + self.assertAllClose(0, argmax(array_ops.ones([10], dtype=dtypes.float32))) + self.assertAllClose(0, argmax(array_ops.ones([10]))) + self.assertAllClose(0, argmin(array_ops.ones([10], dtype=dtypes.float32))) + self.assertAllClose(0, argmin(array_ops.ones([10]))) + + @test_util.disable_mlir_bridge('TensorArray support not implemented') + def testErrorMessagePassingTensorArray(self): + with ops.device('device:{}:0'.format(self.device)): + + @def_function.function(experimental_compile=True) + def f(x): + ta = tensor_array_ops.TensorArray( + dtype=dtypes.float32, size=1, element_shape=[]) + ta = ta.write(0, 2 * x) + y = ta.read(0) + return y + + x = constant_op.constant(3.14) + with backprop.GradientTape() as tape: + tape.watch(x) + with self.assertRaisesRegex(errors.UnimplementedError, + 'TensorList crossing the XLA/TF boundary'): + y = f(x) + tape.gradient(y, x) + + @test_util.disable_mlir_bridge('TODO(b/162281863): MLIR bridge errors out' + ' lowering TensorListConcatV2') + def testTensorListConcatV2(self): + with ops.device('device:{}:0'.format(self.device)): + + def f(x): + ta = tensor_array_ops.TensorArray( + dtype=dtypes.float32, size=2, element_shape=[3]) + ta = ta.write(0, 2 * x) + ta = ta.write(1, 3 * x) + return ta.concat() + + compiled_f = def_function.function(experimental_compile=True)(f) + + inputs = constant_op.constant([3.14, 2.68, 7.69]) + + self.assertAllClose([6.28, 5.36, 15.38, 9.42, 8.04, 23.07], f(inputs)) + + self.assertAllClose(compiled_f(inputs), f(inputs)) + + @test_util.disable_mlir_bridge('TODO(b/162281863): MLIR bridge errors out' + ' lowering TensorListConcatV2') + def testTensorListConcatV2Multidim(self): + with ops.device('device:{}:0'.format(self.device)): + + def f(x): + ta = tensor_array_ops.TensorArray( + dtype=dtypes.float32, size=2, element_shape=[3, 2]) + ta = ta.write(0, 2 * x) + ta = ta.write(1, 3 * x) + return ta.concat() + + compiled_f = def_function.function(experimental_compile=True)(f) + + inputs = constant_op.constant([[3.14, 21.1], [2.68, 22.2], [7.69, 23.3]]) + self.assertAllClose(f(inputs), compiled_f(inputs)) + + @test_util.disable_mlir_bridge('TODO(b/162281863): MLIR bridge errors out' + ' lowering TensorListConcatV2') + def testTensorListConcatV2Scalars(self): + with ops.device('device:{}:0'.format(self.device)): + + def f(x): + ta = tensor_array_ops.TensorArray( + dtype=dtypes.float32, size=2, element_shape=[1]) + ta = ta.write(0, 2 * x) + ta = ta.write(1, 3 * x) + return ta.concat() + + compiled_f = def_function.function(experimental_compile=True)(f) + inputs = constant_op.constant([3.14]) + self.assertAllClose(f(inputs), compiled_f(inputs)) + + @test_util.disable_mlir_bridge('TODO(b/162281863): MLIR bridge errors out' + ' lowering TensorListConcatV2') + def testTensorListConcatGrad(self): + with ops.device('device:{}:0'.format(self.device)): + + def f(x): + ta = tensor_array_ops.TensorArray( + dtype=dtypes.float32, size=2, element_shape=[3]) + ta = ta.write(0, 2 * x) + ta = ta.write(1, 3 * x) + return ta.concat() + + def g(): + x = constant_op.constant([3.14, 2.68, 7.69]) + with backprop.GradientTape() as tape: + tape.watch(x) + y = f(x) + return tape.gradient(y, x) + + compiled_g = def_function.function(experimental_compile=True)(g) + + self.assertAllClose([5.0, 5.0, 5.0], g()) + self.assertAllClose(compiled_g(), g()) + + @test_util.disable_mlir_bridge('TODO(b/162281863): MLIR bridge errors out' + ' lowering TensorListConcatV2') + def testTensorListConcatGradNestedCompile(self): + with ops.device('device:{}:0'.format(self.device)): + + @def_function.function(experimental_compile=True) + def f(x): + ta = tensor_array_ops.TensorArray( + dtype=dtypes.float32, size=2, element_shape=[3]) + ta = ta.write(0, 2 * x) + ta = ta.write(1, 3 * x) + return ta.concat() + + @def_function.function(experimental_compile=True) + def g(): + x = constant_op.constant([3.14, 2.68, 7.69]) + with backprop.GradientTape() as tape: + tape.watch(x) + y = f(x) + out = tape.gradient(y, x) + return out + + self.assertAllClose([5.0, 5.0, 5.0], g()) + + def testCumsum(self): + with ops.device('device:{}:0'.format(self.device)): + + @def_function.function(experimental_compile=True) + def f(x): + return math_ops.cumsum(x) + + f64_input = constant_op.constant([1.1, 2.2, 3.3], dtype=dtypes.float64) + self.assertAllClose([1.1, 3.3, 6.6], f(f64_input)) + + def testNoExcessiveRetracing(self): + with ops.device('device:{}:0'.format(self.device)): + inner_retracings = 0 + + @def_function.function(experimental_compile=True) + def inner(a, b): + nonlocal inner_retracings + inner_retracings += 1 + return a * b + a + + def outer(a, b): + return inner(a, b) + + func_input = random_ops.random_normal([10, 10]) + for _ in range(2): + def_function.function(outer)(func_input, func_input) + + self.assertEqual(inner_retracings, 1) + + def testUpdateVariable(self): + with ops.device('device:{}:0'.format(self.device)): + v = variables.Variable(3.1) + + @def_function.function(experimental_compile=True) + def update_var(a, b): + v.assign_add(a * b) + + update_var(constant_op.constant(0.7), constant_op.constant(0.6)) + self.assertAllClose(v, 3.52) + + def testUpdateVariableVector(self): + with ops.device('device:{}:0'.format(self.device)): + v = variables.Variable([3.1, 3.1]) + + @def_function.function(experimental_compile=True) + def update_var(a, b): + v.assign_add(a * b) + + update_var( + constant_op.constant([0.7, 0.7]), constant_op.constant([0.6, 0.6])) + self.assertAllClose(v, [3.52, 3.52]) + + def testUpdateVariableInClass(self): + with ops.device('device:{}:0'.format(self.device)): + + class C(object): + + @def_function.function(experimental_compile=True) + def update_var(self, a, b): + if not hasattr(self, 'v'): + self.v = variables.Variable(3.1) + self.v.assign_add(a * b) + + c = C() + + @def_function.function + def outer(): + c.update_var(constant_op.constant(0.7), constant_op.constant(0.6)) + + outer() + self.assertAllClose(c.v, 3.52) def testUpdateVariableMultipleOutputs(self): - v = variables.Variable(3.1) + with ops.device('device:{}:0'.format(self.device)): + v = variables.Variable(3.1) - @def_function.function(experimental_compile=True) - def update_var(a, b): - v.assign_add(a * b) - return a * b + v + @def_function.function(experimental_compile=True) + def update_var(a, b): + v.assign_add(a * b) + return a * b + v - out = update_var(constant_op.constant(0.7), constant_op.constant(0.6)) - self.assertAllClose(v, 3.52) - self.assertAllClose(out, 3.94) + out = update_var(constant_op.constant(0.7), constant_op.constant(0.6)) + self.assertAllClose(v, 3.52) + self.assertAllClose(out, 3.94) def testReturnIdentity(self): + with ops.device('device:{}:0'.format(self.device)): - @def_function.function(experimental_compile=True) - def f(a, b): - return (a, b) + @def_function.function(experimental_compile=True) + def f(a, b): + return (a, b) - a = constant_op.constant([0.7]) - b = constant_op.constant([0.6]) + a = constant_op.constant([0.7]) + b = constant_op.constant([0.6]) - f(a, b) + f(a, b) if __name__ == '__main__': From 82957428532230bf8b9b6a139862762098ee2cba Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Jul 2020 13:30:20 -0700 Subject: [PATCH 1653/2522] Move file upload operation definitions from the file definition into the gcs file system. PiperOrigin-RevId: 323856572 Change-Id: Ibbafb222f604ddef9e919227bad5d2537d56d4e0 --- .../core/platform/cloud/gcs_file_system.cc | 327 ++++++++++++------ .../core/platform/cloud/gcs_file_system.h | 25 ++ 2 files changed, 247 insertions(+), 105 deletions(-) diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index 43ece688034..0874c47142a 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -384,6 +384,26 @@ class BufferedGcsRandomAccessFile : public RandomAccessFile { mutable string buffer_ TF_GUARDED_BY(buffer_mutex_); }; +// Function object declaration with params needed to create upload sessions. +typedef std::function + SessionCreator; + +// Function object declaration with params needed to upload objects. +typedef std::function + ObjectUploader; + +// Function object declaration with params needed to poll upload status. +typedef std::function + StatusPoller; + /// \brief GCS-based implementation of a writeable file. /// /// Since GCS objects are immutable, this implementation writes to a local @@ -394,7 +414,9 @@ class GcsWritableFile : public WritableFile { GcsFileSystem* filesystem, GcsFileSystem::TimeoutConfig* timeouts, std::function file_cache_erase, - RetryConfig retry_config, bool compose_append) + RetryConfig retry_config, bool compose_append, + SessionCreator session_creator, + ObjectUploader object_uploader, StatusPoller status_poller) : bucket_(bucket), object_(object), filesystem_(filesystem), @@ -403,7 +425,10 @@ class GcsWritableFile : public WritableFile { sync_needed_(true), retry_config_(retry_config), compose_append_(compose_append), - start_offset_(0) { + start_offset_(0), + session_creator_(std::move(session_creator)), + object_uploader_(std::move(object_uploader)), + status_poller_(std::move(status_poller)) { // TODO: to make it safer, outfile_ should be constructed from an FD VLOG(3) << "GcsWritableFile: " << GetGcsPath(); if (GetTmpFilename(&tmp_content_filename_).ok()) { @@ -421,7 +446,9 @@ class GcsWritableFile : public WritableFile { GcsFileSystem* filesystem, const string& tmp_content_filename, GcsFileSystem::TimeoutConfig* timeouts, std::function file_cache_erase, - RetryConfig retry_config, bool compose_append) + RetryConfig retry_config, bool compose_append, + SessionCreator session_creator, + ObjectUploader object_uploader, StatusPoller status_poller) : bucket_(bucket), object_(object), filesystem_(filesystem), @@ -430,7 +457,10 @@ class GcsWritableFile : public WritableFile { sync_needed_(true), retry_config_(retry_config), compose_append_(compose_append), - start_offset_(0) { + start_offset_(0), + session_creator_(std::move(session_creator)), + object_uploader_(std::move(object_uploader)), + status_poller_(std::move(status_poller)) { VLOG(3) << "GcsWritableFile: " << GetGcsPath() << "with existing file " << tmp_content_filename; tmp_content_filename_ = tmp_content_filename; @@ -526,7 +556,7 @@ class GcsWritableFile : public WritableFile { } } TF_RETURN_IF_ERROR( - CreateNewUploadSession(&session_uri, start_offset, object_to_upload)); + CreateNewUploadSession(start_offset, object_to_upload, &session_uri)); uint64 already_uploaded = 0; bool first_attempt = true; const Status upload_status = RetryingUtils::CallWithRetries( @@ -584,33 +614,13 @@ class GcsWritableFile : public WritableFile { } /// Initiates a new resumable upload session. - Status CreateNewUploadSession(string* session_uri, uint64 start_offset, - string object_to_upload) { + Status CreateNewUploadSession(uint64 start_offset, + std::string object_to_upload, + std::string* session_uri) { uint64 file_size; TF_RETURN_IF_ERROR(GetCurrentFileSize(&file_size)); - - std::vector output_buffer; - std::unique_ptr request; - TF_RETURN_IF_ERROR(filesystem_->CreateHttpRequest(&request)); - - request->SetUri(strings::StrCat(kGcsUploadUriBase, "b/", bucket_, - "/o?uploadType=resumable&name=", - request->EscapeString(object_to_upload))); - request->AddHeader("X-Upload-Content-Length", - std::to_string(file_size - start_offset)); - request->SetPostEmptyBody(); - request->SetResultBuffer(&output_buffer); - request->SetTimeouts(timeouts_->connect, timeouts_->idle, - timeouts_->metadata); - TF_RETURN_WITH_CONTEXT_IF_ERROR( - request->Send(), " when initiating an upload to ", GetGcsPath()); - *session_uri = request->GetResponseHeader("Location"); - if (session_uri->empty()) { - return errors::Internal("Unexpected response from GCS when writing to ", - GetGcsPath(), - ": 'Location' header not returned."); - } - return Status::OK(); + return session_creator_(start_offset, object_to_upload, bucket_, file_size, + GetGcsPath(), session_uri); } /// Appends the data of append_object to the original object and deletes @@ -653,87 +663,26 @@ class GcsWritableFile : public WritableFile { uint64* uploaded) { uint64 file_size; TF_RETURN_IF_ERROR(GetCurrentFileSize(&file_size)); - - std::unique_ptr request; - TF_RETURN_IF_ERROR(filesystem_->CreateHttpRequest(&request)); - request->SetUri(session_uri); - request->SetTimeouts(timeouts_->connect, timeouts_->idle, - timeouts_->metadata); - request->AddHeader("Content-Range", strings::StrCat("bytes */", file_size)); - request->SetPutEmptyBody(); - const Status& status = request->Send(); - if (status.ok()) { - *completed = true; - return Status::OK(); - } - *completed = false; - if (request->GetResponseCode() != HTTP_CODE_RESUME_INCOMPLETE) { - TF_RETURN_WITH_CONTEXT_IF_ERROR(status, " when resuming upload ", - GetGcsPath()); - } - const string& received_range = request->GetResponseHeader("Range"); - if (received_range.empty()) { - // This means GCS doesn't have any bytes of the file yet. - *uploaded = 0; - } else { - StringPiece range_piece(received_range); - absl::ConsumePrefix(&range_piece, - "bytes="); // May or may not be present. - - auto return_error = [this](string error_message) { - return errors::Internal("Unexpected response from GCS when writing ", - GetGcsPath(), ": ", error_message); - }; - - std::vector range_strs = str_util::Split(range_piece, '-'); - std::vector range_parts; - for (const string& range_str : range_strs) { - int64 tmp; - if (strings::safe_strto64(range_str, &tmp)) { - range_parts.push_back(tmp); - } else { - return return_error("Range header '" + received_range + - "' could not be parsed."); - } - } - if (range_parts.size() != 2) { - return return_error("Range header '" + received_range + - "' could not be parsed."); - } - - if (range_parts[0] != 0) { - return return_error("The returned range '" + received_range + - "' does not start at zero."); - } - // If GCS returned "Range: 0-10", this means 11 bytes were uploaded. - *uploaded = range_parts[1] + 1; - } - return Status::OK(); + return status_poller_(session_uri, file_size, GetGcsPath(), completed, + uploaded); } + /// Uploads data to object. Status UploadToSession(const string& session_uri, uint64 start_offset, uint64 already_uploaded) { uint64 file_size; TF_RETURN_IF_ERROR(GetCurrentFileSize(&file_size)); - - std::unique_ptr request; - TF_RETURN_IF_ERROR(filesystem_->CreateHttpRequest(&request)); - request->SetUri(session_uri); - if (file_size > 0) { - request->AddHeader("Content-Range", - strings::StrCat("bytes ", already_uploaded, "-", - file_size - start_offset - 1, "/", - file_size - start_offset)); + Status status = + object_uploader_(session_uri, start_offset, already_uploaded, + tmp_content_filename_, file_size, GetGcsPath()); + if (status.ok()) { + // Erase the file from the file cache on every successful write. + // Note: Only local cache, this does nothing on distributed cache. The + // distributed cache clears the cache as it is needed. + file_cache_erase_(); } - request->SetTimeouts(timeouts_->connect, timeouts_->idle, timeouts_->write); - TF_RETURN_IF_ERROR(request->SetPutFromFile( - tmp_content_filename_, start_offset + already_uploaded)); - TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when uploading ", - GetGcsPath()); - // Erase the file from the file cache on every successful write. - file_cache_erase_(); - return Status::OK(); + return status; } string GetGcsPathWithObject(string object) const { @@ -752,6 +701,10 @@ class GcsWritableFile : public WritableFile { RetryConfig retry_config_; bool compose_append_; uint64 start_offset_; + // Callbacks to the file system used to upload object into GCS. + const SessionCreator session_creator_; + const ObjectUploader object_uploader_; + const StatusPoller status_poller_; }; class GcsReadOnlyMemoryRegion : public ReadOnlyMemoryRegion { @@ -1115,6 +1068,126 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& fname, size_t offset, return Status::OK(); } +/// Initiates a new upload session. +Status GcsFileSystem::CreateNewUploadSession( + uint64 start_offset, const std::string& object_to_upload, + const std::string& bucket, uint64 file_size, const std::string& gcs_path, + std::string* session_uri) { + std::vector output_buffer; + std::unique_ptr request; + TF_RETURN_IF_ERROR(CreateHttpRequest(&request)); + + std::string uri = strings::StrCat( + kGcsUploadUriBase, "b/", bucket, + "/o?uploadType=resumable&name=", request->EscapeString(object_to_upload)); + request->SetUri(uri); + request->AddHeader("X-Upload-Content-Length", + absl::StrCat(file_size - start_offset)); + request->SetPostEmptyBody(); + request->SetResultBuffer(&output_buffer); + request->SetTimeouts(timeouts_.connect, timeouts_.idle, timeouts_.metadata); + TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), + " when initiating an upload to ", gcs_path); + if (session_uri != nullptr) { + *session_uri = request->GetResponseHeader("Location"); + if (session_uri->empty()) { + return errors::Internal("Unexpected response from GCS when writing to ", + gcs_path, ": 'Location' header not returned."); + } + } + return Status::OK(); +} + +Status GcsFileSystem::UploadToSession(const std::string& session_uri, + uint64 start_offset, + uint64 already_uploaded, + const std::string& tmp_content_filename, + uint64 file_size, + const std::string& file_path) { + std::unique_ptr request; + TF_RETURN_IF_ERROR(CreateHttpRequest(&request)); + request->SetUri(session_uri); + if (file_size > 0) { + request->AddHeader("Content-Range", + strings::StrCat("bytes ", already_uploaded, "-", + file_size - start_offset - 1, "/", + file_size - start_offset)); + } + request->SetTimeouts(timeouts_.connect, timeouts_.idle, timeouts_.write); + + TF_RETURN_IF_ERROR(request->SetPutFromFile(tmp_content_filename, + start_offset + already_uploaded)); + TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when uploading ", + file_path); + return Status::OK(); +} + +Status GcsFileSystem::RequestUploadSessionStatus(const string& session_uri, + uint64 file_size, + const std::string& gcs_path, + bool* completed, + uint64* uploaded) { + CHECK(completed != nullptr) << "RequestUploadSessionStatus() called with out " + "param 'completed' == nullptr."; // Crash ok + CHECK(uploaded != nullptr) << "RequestUploadSessionStatus() called with out " + "param 'uploaded' == nullptr."; // Crash ok + std::unique_ptr request; + TF_RETURN_IF_ERROR(CreateHttpRequest(&request)); + request->SetUri(session_uri); + request->SetTimeouts(timeouts_.connect, timeouts_.idle, timeouts_.metadata); + request->AddHeader("Content-Range", strings::StrCat("bytes */", file_size)); + request->SetPutEmptyBody(); + Status status = request->Send(); + if (status.ok()) { + *completed = true; + return Status::OK(); + } + *completed = false; + if (request->GetResponseCode() != HTTP_CODE_RESUME_INCOMPLETE) { + TF_RETURN_WITH_CONTEXT_IF_ERROR(status, " when resuming upload ", gcs_path); + } + const std::string received_range = request->GetResponseHeader("Range"); + if (received_range.empty()) { + // This means GCS doesn't have any bytes of the file yet. + *uploaded = 0; + } else { + StringPiece range_piece(received_range); + absl::ConsumePrefix(&range_piece, + "bytes="); // May or may not be present. + + auto return_error = [](const std::string& gcs_path, + const std::string& error_message) { + return errors::Internal("Unexpected response from GCS when writing ", + gcs_path, ": ", error_message); + }; + + std::vector range_strs = str_util::Split(range_piece, '-'); + if (range_strs.size() != 2) { + return return_error(gcs_path, "Range header '" + received_range + + "' could not be parsed."); + } + + std::vector range_parts; + for (const std::string& range_str : range_strs) { + int64 tmp; + if (strings::safe_strto64(range_str, &tmp)) { + range_parts.push_back(tmp); + } else { + return return_error(gcs_path, "Range header '" + received_range + + "' could not be parsed."); + } + } + + if (range_parts[0] != 0) { + return return_error(gcs_path, "The returned range '" + received_range + + "' does not start at zero."); + } + // If GCS returned "Range: 0-10", this means 11 bytes were uploaded. + *uploaded = range_parts[1] + 1; + } + return Status::OK(); +} + Status GcsFileSystem::ParseGcsPathForScheme(StringPiece fname, string scheme, bool empty_object_ok, string* bucket, string* object) { @@ -1156,10 +1229,32 @@ Status GcsFileSystem::NewWritableFile( std::unique_ptr* result /*, TransactionToken* token */) { string bucket, object; TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object)); + + auto session_creator = + [this](uint64 start_offset, const std::string& object_to_upload, + const std::string& bucket, uint64 file_size, + const std::string& gcs_path, std::string* session_uri) { + return CreateNewUploadSession(start_offset, object_to_upload, bucket, + file_size, gcs_path, session_uri); + }; + auto object_uploader = + [this](const std::string& session_uri, uint64 start_offset, + uint64 already_uploaded, const std::string& tmp_content_filename, + uint64 file_size, const std::string& file_path) { + return UploadToSession(session_uri, start_offset, already_uploaded, + tmp_content_filename, file_size, file_path); + }; + auto status_poller = [this](const string& session_uri, uint64 file_size, + const std::string& gcs_path, bool* completed, + uint64* uploaded) { + return RequestUploadSessionStatus(session_uri, file_size, gcs_path, + completed, uploaded); + }; + result->reset(new GcsWritableFile( bucket, object, this, &timeouts_, [this, fname]() { ClearFileCaches(fname); }, retry_config_, - compose_append_)); + compose_append_, session_creator, object_uploader, status_poller)); return Status::OK(); } @@ -1195,13 +1290,35 @@ Status GcsFileSystem::NewAppendableFile( } old_content.close(); + auto session_creator = + [this](uint64 start_offset, const std::string& object_to_upload, + const std::string& bucket, uint64 file_size, + const std::string& gcs_path, std::string* session_uri) { + return CreateNewUploadSession(start_offset, object_to_upload, bucket, + file_size, gcs_path, session_uri); + }; + auto object_uploader = + [this](const std::string& session_uri, uint64 start_offset, + uint64 already_uploaded, const std::string& tmp_content_filename, + uint64 file_size, const std::string& file_path) { + return UploadToSession(session_uri, start_offset, already_uploaded, + tmp_content_filename, file_size, file_path); + }; + + auto status_poller = [this](const string& session_uri, uint64 file_size, + const std::string& gcs_path, bool* completed, + uint64* uploaded) { + return RequestUploadSessionStatus(session_uri, file_size, gcs_path, + completed, uploaded); + }; + // Create a writable file and pass the old content to it. string bucket, object; TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object)); result->reset(new GcsWritableFile( bucket, object, this, old_content_filename, &timeouts_, [this, fname]() { ClearFileCaches(fname); }, retry_config_, - compose_append_)); + compose_append_, session_creator, object_uploader, status_poller)); return Status::OK(); } diff --git a/tensorflow/core/platform/cloud/gcs_file_system.h b/tensorflow/core/platform/cloud/gcs_file_system.h index e2ced8727fd..7bf95170cb1 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.h +++ b/tensorflow/core/platform/cloud/gcs_file_system.h @@ -291,6 +291,31 @@ class GcsFileSystem : public FileSystem { virtual Status LoadBufferFromGCS(const string& fname, size_t offset, size_t n, char* buffer, size_t* bytes_transferred); + // Creates an upload session for an upcoming GCS object upload. + virtual Status CreateNewUploadSession(uint64 start_offset, + const std::string& object_to_upload, + const std::string& bucket, + uint64 file_size, + const std::string& gcs_path, + std::string* session_uri); + + // Uploads object data to session. + virtual Status UploadToSession(const std::string& session_uri, + uint64 start_offset, uint64 already_uploaded, + const std::string& tmp_content_filename, + uint64 file_size, + const std::string& file_path); + + /// \brief Requests status of a previously initiated upload session. + /// + /// If the upload has already succeeded, sets 'completed' to true. + /// Otherwise sets 'completed' to false and 'uploaded' to the currently + /// uploaded size in bytes. + virtual Status RequestUploadSessionStatus(const string& session_uri, + uint64 file_size, + const std::string& gcs_path, + bool* completed, uint64* uploaded); + Status ParseGcsPathForScheme(StringPiece fname, string scheme, bool empty_object_ok, string* bucket, string* object); From 181df6f3fcb67e1d1dbf8753ec807d106057b2d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tar=C3=A9=20Gaskin?= Date: Wed, 29 Jul 2020 20:50:51 +0000 Subject: [PATCH 1654/2522] removal of non- tensorflow/compiler/ changes --- tensorflow/c/eager/tape.h | 10 +++++----- tensorflow/cc/framework/gradients.cc | 6 +++--- tensorflow/cc/framework/while_gradients.cc | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index f52a5e32c1a..40cfa87dd66 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -573,7 +573,7 @@ Status InitialGradients( gtl::ArraySlice output_gradients, const TensorTape& tensor_tape, const OpTape& op_tape, std::unordered_map>* result) { - for (int i = 0, end = target_tensor_ids.size(); i < end; ++i) { + for (int i = 0; i < target_tensor_ids.size(); ++i) { const int64 id = target_tensor_ids[i]; if (output_gradients.empty() || output_gradients[i] == nullptr) { auto tensor_it = tensor_tape.find(id); @@ -699,7 +699,7 @@ Status GradientTape::ComputeGradient( std::vector out_gradients; out_gradients.reserve(trace.output_tensor_info.size()); std::vector unneeded_gradients; - for (int i = 0, end = trace.input_tensor_id.size(); i < end; i++) { + for (int i = 0; i < trace.input_tensor_id.size(); i++) { const auto& in_tensor_id = trace.input_tensor_id[i]; if (tensor_tape_.find(in_tensor_id) == tensor_tape_.end() && sources_set.find(in_tensor_id) == sources_set.end()) { @@ -709,7 +709,7 @@ Status GradientTape::ComputeGradient( bool any_gradient_nonzero = false; std::vector zero_indices; - for (int i = 0, end = trace.output_tensor_info.size(); i < end; ++i) { + for (int i = 0; i < trace.output_tensor_info.size(); ++i) { const int64 id = trace.output_tensor_info[i].GetID(); auto grad_it = gradients.find(id); if (grad_it == gradients.end()) { @@ -775,7 +775,7 @@ Status GradientTape::ComputeGradient( } VLOG(1) << "Got " << in_gradients.size() << " in_gradients for " << trace.input_tensor_id.size() << " sources"; - for (int i = 0, end = in_gradients.size(); i < end; ++i) { + for (int i = 0; i < in_gradients.size(); ++i) { const int64 id = trace.input_tensor_id[i]; if (in_gradients[i] != nullptr) { auto& unaggregated_grads = gradients[id]; @@ -968,7 +968,7 @@ ForwardAccumulator::ForwardpropFromTape( targets.reserve(grad.size()); used_in_grads.reserve(grad.size()); std::unordered_map sources_that_are_targets; - for (int grad_index = 0, end = grad.size(); grad_index < end; ++grad_index) { + for (int grad_index = 0; grad_index < grad.size(); ++grad_index) { Gradient* grad_tensor = grad[grad_index]; if (grad_tensor != nullptr) { int64 tensor_id = vspace_.TensorId(grad_tensor); diff --git a/tensorflow/cc/framework/gradients.cc b/tensorflow/cc/framework/gradients.cc index 3195a357186..88cd3fe79d6 100644 --- a/tensorflow/cc/framework/gradients.cc +++ b/tensorflow/cc/framework/gradients.cc @@ -425,7 +425,7 @@ Status SymbolicGradientBuilder::ProcessWhileLoop(Node* exit_node, // Backprop along the in edges to the while loop (i.e. the inputs to the enter // nodes) DCHECK_EQ(dx.size(), while_ctx->enter_nodes().size()); - for (int i = 0, end = dx.size(); i < end; ++i) { + for (int i = 0; i < dx.size(); ++i) { Node* enter_node = while_ctx->enter_nodes()[i]; for (const Edge* e : enter_node->in_edges()) { if (e->IsControlEdge()) continue; @@ -489,7 +489,7 @@ Status SymbolicGradientBuilder::AddGradients() { // All loop-specific control flow ops should have been handled above DCHECK(!n->IsEnter() && !n->IsNextIteration()) << n->DebugString(); - const int num_no_grad = no_grad_dy_indices.size(); + const size_t num_no_grad = no_grad_dy_indices.size(); if (IsPrimitiveOpWithNoGrad(n->type_string()) || num_no_grad == num_y) { // No grad defined for this op, or all outputs returned 'NoGradient': // Backprop 'NoGradient' along the in edges. @@ -524,7 +524,7 @@ Status SymbolicGradientBuilder::AddGradients() { // make this association explicit. for (const Edge* e : n->in_edges()) { if (e->IsControlEdge()) continue; - size_t dx_index = e->dst_input(); + int dx_index = e->dst_input(); if (dx_index >= dx.size()) { return errors::Internal( "Invalid gradient output index: ", dx_index, " size: ", dx.size()); diff --git a/tensorflow/cc/framework/while_gradients.cc b/tensorflow/cc/framework/while_gradients.cc index e241cfaebe9..81870a0efa3 100644 --- a/tensorflow/cc/framework/while_gradients.cc +++ b/tensorflow/cc/framework/while_gradients.cc @@ -34,7 +34,7 @@ Output ToOutput(OutputTensor output_tensor) { std::vector ToOutputVector( const std::vector& output_tensors) { - const int n = output_tensors.size(); + size_t n = output_tensors.size(); std::vector result; result.reserve(n); for (int i = 0; i < n; ++i) result.push_back(ToOutput(output_tensors[i])); From d6066885d7547332df957f7bdb50cf26a090e2ac Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Wed, 29 Jul 2020 13:50:05 -0700 Subject: [PATCH 1655/2522] Put a size limit on the int32 tensors KerasTensors will try to infer values for. This is needed because there is a maximum rank limit (of 254) for Tensors, so int32 tensors with more than 254 elements cannot represent shapes. (Which before this cl would cause KerasTensor shape inference to crash) PiperOrigin-RevId: 323860520 Change-Id: Icbf10b8220739c5a9474f6f588174606402b9ca8 --- .../python/keras/engine/keras_tensor.py | 12 ++++- .../keras/layers/tensorflow_op_layer_test.py | 53 +++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/keras/engine/keras_tensor.py b/tensorflow/python/keras/engine/keras_tensor.py index 4266c6dbee6..28f6ef34e4e 100644 --- a/tensorflow/python/keras/engine/keras_tensor.py +++ b/tensorflow/python/keras/engine/keras_tensor.py @@ -433,6 +433,12 @@ class UserRegisteredSpec(type_spec_module.TypeSpec): def value_type(self): raise NotImplementedError +# Tensorflow tensors have a maximum dimension of 254 +# (See //tensorflow/core/framework/tensor_shape.h ) +# So we do not try to infer values for int32 tensors larger than this, +# As they cannot represent shapes. +_MAX_TENSOR_DIMS = 254 + def keras_tensor_from_tensor(x): """Convert a traced (composite)tensor to a representative KerasTensor.""" @@ -461,7 +467,7 @@ def keras_tensor_from_tensor(x): and type_spec.dtype == dtypes.int32 and type_spec.shape.rank < 2): # If this tensor might be representing shape information, - # (dtype=int32, rank of 0 or 1) + # (dtype=int32, rank of 0 or 1, not too large to represent a shape) # we attempt to capture any value information tensorflow's # shape handling can extract from the current scratch graph. # @@ -476,9 +482,13 @@ def keras_tensor_from_tensor(x): # manipulated w/ floating point numbers then converted back # * cases where int32 tensors w/ rank > 2 are manipulated before being # used as a shape tensor + # * cases where int32 tensors too large to represent shapes are manipulated + # to a smaller size before being used as a shape tensor inferred_value = array_ops.ones(shape=x).shape if inferred_value.dims: inferred_value = inferred_value.as_list() + if len(inferred_value) > _MAX_TENSOR_DIMS: + inferred_value = None else: inferred_value = None diff --git a/tensorflow/python/keras/layers/tensorflow_op_layer_test.py b/tensorflow/python/keras/layers/tensorflow_op_layer_test.py index f43b758c33c..18eb82624c1 100644 --- a/tensorflow/python/keras/layers/tensorflow_op_layer_test.py +++ b/tensorflow/python/keras/layers/tensorflow_op_layer_test.py @@ -131,6 +131,55 @@ def _shape_op_slice_and_range_known_dim(): return keras.Model(inputs, inputs) +def _int32_manipulation_too_big_for_shape(): + # This test verifies that the Keras Functional API + # won't crash when manipulating int32 tensors that are too large + # to represent shapes. + inputs = keras.Input(batch_size=2, shape=(10,)) + batch_size = array_ops.shape(inputs)[0] + num_features = 3 * 1024 * 16 + x = math_ops.range(batch_size * num_features, dtype='int32') + assert x.shape.as_list() == [inputs.shape[0] * num_features] + x = array_ops.reshape(x, (batch_size, num_features)) + x = math_ops.cast(x, dtype='float32') + outputs = keras.layers.Dense(10)(x) + if context.executing_eagerly(): + return keras.Model(inputs, outputs) + else: + # In V1 the op layer fails for some reason, + # but we don't have access to the test case to call + # self.skip_test in this util method + return keras.Model(inputs, inputs) + + +def _int32_manipulation_at_max_shape_dims_limit(): + # This test verifies that the Keras Functional API + # won't crash when manipulating int32 tensors that are at the limit + # of the max tensor size Keras can try inferring values for. + inputs = keras.Input(batch_size=2, shape=(10,)) + batch_size = array_ops.shape(inputs)[0] + num_features = int(keras_tensor._MAX_TENSOR_DIMS / int(inputs.shape[0])) + x = math_ops.range(batch_size * num_features, dtype='int32') + assert x.shape.as_list() == [keras_tensor._MAX_TENSOR_DIMS] + + # Verify that a value was actually inferred for a tensor that *might* + # represent the shape, bying checking that a value in + # the range appears in the printed inferred value + if keras_tensor.keras_tensors_enabled(): + assert str(keras_tensor._MAX_TENSOR_DIMS - 1) in str(x) + + x = array_ops.reshape(x, (batch_size, num_features)) + x = math_ops.cast(x, dtype='float32') + outputs = keras.layers.Dense(10)(x) + if context.executing_eagerly(): + return keras.Model(inputs, outputs) + else: + # In V1 the op layer fails for some reason, + # but we don't have access to the test case to call + # self.skip_test in this util method + return keras.Model(inputs, inputs) + + def _single_standalone_branch(): inputs = keras.Input(shape=(10,)) x = keras.layers.Dense(10)(inputs) @@ -252,6 +301,10 @@ class AutoLambdaTest(keras_parameterized.TestCase): ('shape_op_slice_and_range', _shape_op_slice_and_range), ('shape_op_slice_and_range_known_dim', _shape_op_slice_and_range_known_dim), + ('int32_manipulation_too_big_for_shape', + _int32_manipulation_too_big_for_shape), + ('int32_manipulation_at_max_shape_dims_limit', + _int32_manipulation_at_max_shape_dims_limit), ('single_standalone_branch', _single_standalone_branch), ('single_op_with_attrs', _single_op_with_attrs), ('multiple_uses', _multiple_uses), From 57fe05f57dffc6d5ad356f8e1cc9d4f22cc12116 Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Wed, 29 Jul 2020 13:54:33 -0700 Subject: [PATCH 1656/2522] [mlir] Enhance mlir::MemRefType -> xla::Shape conversion. PiperOrigin-RevId: 323861437 Change-Id: If60b33c5b69a81b7f05843f42b602ce2945bed95 --- tensorflow/compiler/mlir/xla/BUILD | 1 + tensorflow/compiler/mlir/xla/type_to_shape.cc | 34 ++++++++++++++++--- .../compiler/mlir/xla/type_to_shape_test.cc | 18 ++++++++++ 3 files changed, 49 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/mlir/xla/BUILD b/tensorflow/compiler/mlir/xla/BUILD index a6eb9f2fe1c..0a7e44a275f 100644 --- a/tensorflow/compiler/mlir/xla/BUILD +++ b/tensorflow/compiler/mlir/xla/BUILD @@ -209,6 +209,7 @@ tf_cc_test( name = "type_to_shape_test", srcs = ["type_to_shape_test.cc"], deps = [ + ":hlo_utils", ":type_to_shape", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:test", diff --git a/tensorflow/compiler/mlir/xla/type_to_shape.cc b/tensorflow/compiler/mlir/xla/type_to_shape.cc index b684abde7a5..afc36916348 100644 --- a/tensorflow/compiler/mlir/xla/type_to_shape.cc +++ b/tensorflow/compiler/mlir/xla/type_to_shape.cc @@ -145,11 +145,37 @@ Shape TypeToShape(mlir::Type type) { // For the primitive type case, the shape of the memref is similar to the // vector type case (i.e., it is, modulo the layout, the same dimensions // and primitive type). - // Currently we only return shapes for identity affine maps. - // TODO(andydavis) Map affine map layout function to XLA layout. - if (m.getAffineMaps().empty() || - (m.getAffineMaps().size() == 1 && m.getAffineMaps()[0].isIdentity())) + if (m.getAffineMaps().empty()) return ShapeUtil::MakeShape(primitive_type, span); + + if (m.getAffineMaps().size() == 1) { + llvm::SmallVector strides; + int64_t offset; + if (failed(mlir::getStridesAndOffset(m, strides, offset))) return {}; + + llvm::SmallVector, 4> strides_with_indices; + for (const auto& e : llvm::enumerate(strides)) { + strides_with_indices.push_back({e.value(), e.index()}); + } + std::sort(strides_with_indices.begin(), strides_with_indices.end()); + + llvm::SmallVector minor_to_major; + int64_t stride = 1; + for (const auto& pr : strides_with_indices) { + minor_to_major.push_back(pr.second); + + // Either the affine map is not perfectly strided, or the dimensions + // recovered from strides don't match the actual dimensions in shapes. + if (stride != pr.first) return {}; + + stride *= m.getShape()[pr.second]; + } + + llvm::SmallVector dimensions(m.getShape().begin(), + m.getShape().end()); + return ::xla::ShapeUtil::MakeShapeWithLayout(primitive_type, dimensions, + minor_to_major); + } break; } case mlir::StandardTypes::RankedTensor: { diff --git a/tensorflow/compiler/mlir/xla/type_to_shape_test.cc b/tensorflow/compiler/mlir/xla/type_to_shape_test.cc index b2a7cb85686..a4a2bc42d99 100644 --- a/tensorflow/compiler/mlir/xla/type_to_shape_test.cc +++ b/tensorflow/compiler/mlir/xla/type_to_shape_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/MLIRContext.h" // from @llvm-project #include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "tensorflow/compiler/mlir/xla/hlo_utils.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/xla_data.pb.h" @@ -178,5 +179,22 @@ TEST(TypeToShapeTest, ConvertWithShapeRepresentationFn) { EXPECT_EQ(captured_tensor_shape, tensorflow::TensorShape({1, 2, 3})); } +TEST(TypeToShapeTest, ConvertMemRefToShape) { + Shape shape = ShapeUtil::MakeShapeWithLayout(PrimitiveType::F32, {10, 20, 30}, + {2, 0, 1}); + MLIRContext context; + mlir::Builder builder(&context); + + StatusOr mlir_type = + ConvertShapeToType(shape, builder); + ASSERT_TRUE(mlir_type.ok()); + mlir::Type type = mlir_type.ConsumeValueOrDie(); + Shape converted = TypeToShape(type); + EXPECT_TRUE(ShapeUtil::Equal( + converted, ShapeUtil::MakeShapeWithLayout(PrimitiveType::F32, + {10, 20, 30}, {2, 0, 1}))); + EXPECT_TRUE(ShapeUtil::Equal(converted, shape)); +} + } // namespace } // namespace xla From 5d61fb278ebd49f3b970a15f7fb8822a6cae9cf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tar=C3=A9=20Gaskin?= Date: Wed, 29 Jul 2020 21:04:00 +0000 Subject: [PATCH 1657/2522] c, cc folder resolutions --- tensorflow/c/eager/tape.h | 10 +++++----- tensorflow/cc/framework/gradients.cc | 6 +++--- tensorflow/cc/framework/while_gradients.cc | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h index 40cfa87dd66..f52a5e32c1a 100644 --- a/tensorflow/c/eager/tape.h +++ b/tensorflow/c/eager/tape.h @@ -573,7 +573,7 @@ Status InitialGradients( gtl::ArraySlice output_gradients, const TensorTape& tensor_tape, const OpTape& op_tape, std::unordered_map>* result) { - for (int i = 0; i < target_tensor_ids.size(); ++i) { + for (int i = 0, end = target_tensor_ids.size(); i < end; ++i) { const int64 id = target_tensor_ids[i]; if (output_gradients.empty() || output_gradients[i] == nullptr) { auto tensor_it = tensor_tape.find(id); @@ -699,7 +699,7 @@ Status GradientTape::ComputeGradient( std::vector out_gradients; out_gradients.reserve(trace.output_tensor_info.size()); std::vector unneeded_gradients; - for (int i = 0; i < trace.input_tensor_id.size(); i++) { + for (int i = 0, end = trace.input_tensor_id.size(); i < end; i++) { const auto& in_tensor_id = trace.input_tensor_id[i]; if (tensor_tape_.find(in_tensor_id) == tensor_tape_.end() && sources_set.find(in_tensor_id) == sources_set.end()) { @@ -709,7 +709,7 @@ Status GradientTape::ComputeGradient( bool any_gradient_nonzero = false; std::vector zero_indices; - for (int i = 0; i < trace.output_tensor_info.size(); ++i) { + for (int i = 0, end = trace.output_tensor_info.size(); i < end; ++i) { const int64 id = trace.output_tensor_info[i].GetID(); auto grad_it = gradients.find(id); if (grad_it == gradients.end()) { @@ -775,7 +775,7 @@ Status GradientTape::ComputeGradient( } VLOG(1) << "Got " << in_gradients.size() << " in_gradients for " << trace.input_tensor_id.size() << " sources"; - for (int i = 0; i < in_gradients.size(); ++i) { + for (int i = 0, end = in_gradients.size(); i < end; ++i) { const int64 id = trace.input_tensor_id[i]; if (in_gradients[i] != nullptr) { auto& unaggregated_grads = gradients[id]; @@ -968,7 +968,7 @@ ForwardAccumulator::ForwardpropFromTape( targets.reserve(grad.size()); used_in_grads.reserve(grad.size()); std::unordered_map sources_that_are_targets; - for (int grad_index = 0; grad_index < grad.size(); ++grad_index) { + for (int grad_index = 0, end = grad.size(); grad_index < end; ++grad_index) { Gradient* grad_tensor = grad[grad_index]; if (grad_tensor != nullptr) { int64 tensor_id = vspace_.TensorId(grad_tensor); diff --git a/tensorflow/cc/framework/gradients.cc b/tensorflow/cc/framework/gradients.cc index 88cd3fe79d6..3195a357186 100644 --- a/tensorflow/cc/framework/gradients.cc +++ b/tensorflow/cc/framework/gradients.cc @@ -425,7 +425,7 @@ Status SymbolicGradientBuilder::ProcessWhileLoop(Node* exit_node, // Backprop along the in edges to the while loop (i.e. the inputs to the enter // nodes) DCHECK_EQ(dx.size(), while_ctx->enter_nodes().size()); - for (int i = 0; i < dx.size(); ++i) { + for (int i = 0, end = dx.size(); i < end; ++i) { Node* enter_node = while_ctx->enter_nodes()[i]; for (const Edge* e : enter_node->in_edges()) { if (e->IsControlEdge()) continue; @@ -489,7 +489,7 @@ Status SymbolicGradientBuilder::AddGradients() { // All loop-specific control flow ops should have been handled above DCHECK(!n->IsEnter() && !n->IsNextIteration()) << n->DebugString(); - const size_t num_no_grad = no_grad_dy_indices.size(); + const int num_no_grad = no_grad_dy_indices.size(); if (IsPrimitiveOpWithNoGrad(n->type_string()) || num_no_grad == num_y) { // No grad defined for this op, or all outputs returned 'NoGradient': // Backprop 'NoGradient' along the in edges. @@ -524,7 +524,7 @@ Status SymbolicGradientBuilder::AddGradients() { // make this association explicit. for (const Edge* e : n->in_edges()) { if (e->IsControlEdge()) continue; - int dx_index = e->dst_input(); + size_t dx_index = e->dst_input(); if (dx_index >= dx.size()) { return errors::Internal( "Invalid gradient output index: ", dx_index, " size: ", dx.size()); diff --git a/tensorflow/cc/framework/while_gradients.cc b/tensorflow/cc/framework/while_gradients.cc index 81870a0efa3..e241cfaebe9 100644 --- a/tensorflow/cc/framework/while_gradients.cc +++ b/tensorflow/cc/framework/while_gradients.cc @@ -34,7 +34,7 @@ Output ToOutput(OutputTensor output_tensor) { std::vector ToOutputVector( const std::vector& output_tensors) { - size_t n = output_tensors.size(); + const int n = output_tensors.size(); std::vector result; result.reserve(n); for (int i = 0; i < n; ++i) result.push_back(ToOutput(output_tensors[i])); From e991ea984bd5b2fb2f9a2e56088445c14420b329 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Jul 2020 14:03:20 -0700 Subject: [PATCH 1658/2522] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 323863385 Change-Id: I0e3374b72d224f45b20b209841d36ab0e5edf985 --- tensorflow/go/op/wrappers.go | 2298 +++++++++++++++++----------------- 1 file changed, 1149 insertions(+), 1149 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 9da46e94adb..470d8fe43ed 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -27432,74 +27432,6 @@ func DecodePaddedRaw(scope *Scope, input_bytes tf.Output, fixed_length tf.Output return op.Output(0) } -// Gather ragged slices from `params` axis `0` according to `indices`. -// -// Outputs a `RaggedTensor` output composed from `output_dense_values` and -// `output_nested_splits`, such that: -// -// ```python -// output.shape = indices.shape + params.shape[1:] -// output.ragged_rank = indices.shape.ndims + params.ragged_rank -// output[i...j, d0...dn] = params[indices[i...j], d0...dn] -// ``` -// -// where -// -// * `params = -// ragged.from_nested_row_splits(params_dense_values, params_nested_splits)` -// provides the values that should be gathered. -// * `indices` ia a dense tensor with dtype `int32` or `int64`, indicating which -// values should be gathered. -// * `output = -// ragged.from_nested_row_splits(output_dense_values, output_nested_splits)` -// is the output tensor. -// -// (Note: This c++ op is used to implement the higher-level python -// `tf.ragged.gather` op, which also supports ragged indices.) -// -// -// Arguments: -// params_nested_splits: The `nested_row_splits` tensors that define the row-partitioning for the -// `params` RaggedTensor input. -// params_dense_values: The `flat_values` for the `params` RaggedTensor. There was a terminology change -// at the python level from dense_values to flat_values, so dense_values is the -// deprecated name. -// indices: Indices in the outermost dimension of `params` of the values that should be -// gathered. -// OUTPUT_RAGGED_RANK: The ragged rank of the output RaggedTensor. `output_nested_splits` will contain -// this number of `row_splits` tensors. This value should equal -// `indices.shape.ndims + params.ragged_rank - 1`. -// -// Returns: -// output_nested_splits: The `nested_row_splits` tensors that define the row-partitioning for the -// returned RaggedTensor. -// output_dense_values: The `flat_values` for the returned RaggedTensor. -func RaggedGather(scope *Scope, params_nested_splits []tf.Output, params_dense_values tf.Output, indices tf.Output, OUTPUT_RAGGED_RANK int64) (output_nested_splits []tf.Output, output_dense_values tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"OUTPUT_RAGGED_RANK": OUTPUT_RAGGED_RANK} - opspec := tf.OpSpec{ - Type: "RaggedGather", - Input: []tf.Input{ - tf.OutputList(params_nested_splits), params_dense_values, indices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output_nested_splits, idx, err = makeOutputList(op, idx, "output_nested_splits"); err != nil { - scope.UpdateErr("RaggedGather", err) - return - } - output_dense_values = op.Output(idx) - return output_nested_splits, output_dense_values -} - // QuantizeV2Attr is an optional argument to QuantizeV2. type QuantizeV2Attr func(optionalAttr) @@ -32364,6 +32296,74 @@ func CSRSparseMatrixComponents(scope *Scope, csr_sparse_matrix tf.Output, index return op.Output(0), op.Output(1), op.Output(2) } +// Gather ragged slices from `params` axis `0` according to `indices`. +// +// Outputs a `RaggedTensor` output composed from `output_dense_values` and +// `output_nested_splits`, such that: +// +// ```python +// output.shape = indices.shape + params.shape[1:] +// output.ragged_rank = indices.shape.ndims + params.ragged_rank +// output[i...j, d0...dn] = params[indices[i...j], d0...dn] +// ``` +// +// where +// +// * `params = +// ragged.from_nested_row_splits(params_dense_values, params_nested_splits)` +// provides the values that should be gathered. +// * `indices` ia a dense tensor with dtype `int32` or `int64`, indicating which +// values should be gathered. +// * `output = +// ragged.from_nested_row_splits(output_dense_values, output_nested_splits)` +// is the output tensor. +// +// (Note: This c++ op is used to implement the higher-level python +// `tf.ragged.gather` op, which also supports ragged indices.) +// +// +// Arguments: +// params_nested_splits: The `nested_row_splits` tensors that define the row-partitioning for the +// `params` RaggedTensor input. +// params_dense_values: The `flat_values` for the `params` RaggedTensor. There was a terminology change +// at the python level from dense_values to flat_values, so dense_values is the +// deprecated name. +// indices: Indices in the outermost dimension of `params` of the values that should be +// gathered. +// OUTPUT_RAGGED_RANK: The ragged rank of the output RaggedTensor. `output_nested_splits` will contain +// this number of `row_splits` tensors. This value should equal +// `indices.shape.ndims + params.ragged_rank - 1`. +// +// Returns: +// output_nested_splits: The `nested_row_splits` tensors that define the row-partitioning for the +// returned RaggedTensor. +// output_dense_values: The `flat_values` for the returned RaggedTensor. +func RaggedGather(scope *Scope, params_nested_splits []tf.Output, params_dense_values tf.Output, indices tf.Output, OUTPUT_RAGGED_RANK int64) (output_nested_splits []tf.Output, output_dense_values tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"OUTPUT_RAGGED_RANK": OUTPUT_RAGGED_RANK} + opspec := tf.OpSpec{ + Type: "RaggedGather", + Input: []tf.Input{ + tf.OutputList(params_nested_splits), params_dense_values, indices, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if output_nested_splits, idx, err = makeOutputList(op, idx, "output_nested_splits"); err != nil { + scope.UpdateErr("RaggedGather", err) + return + } + output_dense_values = op.Output(idx) + return output_nested_splits, output_dense_values +} + // StringSplitV2Attr is an optional argument to StringSplitV2. type StringSplitV2Attr func(optionalAttr) @@ -36014,28 +36014,6 @@ func StringFormat(scope *Scope, inputs []tf.Output, optional ...StringFormatAttr return op.Output(0) } -// Converts a SparseTensor to a (possibly batched) CSRSparseMatrix. -// -// Arguments: -// indices: SparseTensor indices. -// values: SparseTensor values. -// dense_shape: SparseTensor dense shape. -// -// Returns A (possibly batched) CSRSparseMatrix. -func SparseTensorToCSRSparseMatrix(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output) (sparse_matrix tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseTensorToCSRSparseMatrix", - Input: []tf.Input{ - indices, values, dense_shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Computes fingerprints of the input strings. // // Arguments: @@ -36347,6 +36325,28 @@ func QueueDequeueUpToV2(scope *Scope, handle tf.Output, n tf.Output, component_t return components } +// Converts a SparseTensor to a (possibly batched) CSRSparseMatrix. +// +// Arguments: +// indices: SparseTensor indices. +// values: SparseTensor values. +// dense_shape: SparseTensor dense shape. +// +// Returns A (possibly batched) CSRSparseMatrix. +func SparseTensorToCSRSparseMatrix(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output) (sparse_matrix tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseTensorToCSRSparseMatrix", + Input: []tf.Input{ + indices, values, dense_shape, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Computes the product along segments of a tensor. // // Read @@ -38639,40 +38639,6 @@ func BlockLSTMGrad(scope *Scope, seq_len_max tf.Output, x tf.Output, cs_prev tf. return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6), op.Output(7) } -// Op that executes a program with optional in-place variable updates. -// -// It (optionally) reads device variables, loads and executes a TPU program on a -// TPU device, and then (optionally) in-place updates variables using the program -// outputs, as specified in attributes device_var_reads_indices (program input -// indices from directly reading variables) and device_var_updates_indices (program -// output indices used to update variables, -1 means no-update/read-only). Such -// program outputs are consumed by these variables will not appear in the op -// output. For the internal use of the distributed TPU compiler. -func TPUExecuteAndUpdateVariables(scope *Scope, args []tf.Output, key tf.Output, Tresults []tf.DataType, device_var_reads_indices []int64, device_var_updates_indices []int64) (results []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"Tresults": Tresults, "device_var_reads_indices": device_var_reads_indices, "device_var_updates_indices": device_var_updates_indices} - opspec := tf.OpSpec{ - Type: "TPUExecuteAndUpdateVariables", - Input: []tf.Input{ - tf.OutputList(args), key, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if results, idx, err = makeOutputList(op, idx, "results"); err != nil { - scope.UpdateErr("TPUExecuteAndUpdateVariables", err) - return - } - return results -} - // ResourceSparseApplyAdagradAttr is an optional argument to ResourceSparseApplyAdagrad. type ResourceSparseApplyAdagradAttr func(optionalAttr) @@ -38728,6 +38694,40 @@ func ResourceSparseApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, l return scope.AddOperation(opspec) } +// Op that executes a program with optional in-place variable updates. +// +// It (optionally) reads device variables, loads and executes a TPU program on a +// TPU device, and then (optionally) in-place updates variables using the program +// outputs, as specified in attributes device_var_reads_indices (program input +// indices from directly reading variables) and device_var_updates_indices (program +// output indices used to update variables, -1 means no-update/read-only). Such +// program outputs are consumed by these variables will not appear in the op +// output. For the internal use of the distributed TPU compiler. +func TPUExecuteAndUpdateVariables(scope *Scope, args []tf.Output, key tf.Output, Tresults []tf.DataType, device_var_reads_indices []int64, device_var_updates_indices []int64) (results []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"Tresults": Tresults, "device_var_reads_indices": device_var_reads_indices, "device_var_updates_indices": device_var_updates_indices} + opspec := tf.OpSpec{ + Type: "TPUExecuteAndUpdateVariables", + Input: []tf.Input{ + tf.OutputList(args), key, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if results, idx, err = makeOutputList(op, idx, "results"); err != nil { + scope.UpdateErr("TPUExecuteAndUpdateVariables", err) + return + } + return results +} + // OutfeedDequeueTupleAttr is an optional argument to OutfeedDequeueTuple. type OutfeedDequeueTupleAttr func(optionalAttr) @@ -38952,6 +38952,21 @@ func ReaderSerializeStateV2(scope *Scope, reader_handle tf.Output) (state tf.Out return op.Output(0) } +// Computes rectified linear 6: `min(max(features, 0), 6)`. +func Relu6(scope *Scope, features tf.Output) (activations tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Relu6", + Input: []tf.Input{ + features, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Split a `SparseTensor` into `num_split` tensors along one dimension. // // If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices @@ -39021,21 +39036,6 @@ func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf return output_indices, output_values, output_shape } -// Computes rectified linear 6: `min(max(features, 0), 6)`. -func Relu6(scope *Scope, features tf.Output) (activations tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Relu6", - Input: []tf.Input{ - features, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // RaggedRangeAttr is an optional argument to RaggedRange. type RaggedRangeAttr func(optionalAttr) @@ -40142,68 +40142,6 @@ func RemoteFusedGraphExecute(scope *Scope, inputs []tf.Output, Toutputs []tf.Dat return outputs } -// LoadTPUEmbeddingMDLAdagradLightParametersAttr is an optional argument to LoadTPUEmbeddingMDLAdagradLightParameters. -type LoadTPUEmbeddingMDLAdagradLightParametersAttr func(optionalAttr) - -// LoadTPUEmbeddingMDLAdagradLightParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingMDLAdagradLightParametersTableId(value int64) LoadTPUEmbeddingMDLAdagradLightParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingMDLAdagradLightParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingMDLAdagradLightParametersTableName(value string) LoadTPUEmbeddingMDLAdagradLightParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingMDLAdagradLightParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingMDLAdagradLightParametersConfig(value string) LoadTPUEmbeddingMDLAdagradLightParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load MDL Adagrad Light embedding parameters. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the MDL Adagrad Light optimization algorithm. -// accumulators: Value of accumulators used in the MDL Adagrad Light optimization algorithm. -// weights: Value of weights used in the MDL Adagrad Light optimization algorithm. -// benefits: Value of benefits used in the MDL Adagrad Light optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingMDLAdagradLightParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, weights tf.Output, benefits tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMDLAdagradLightParametersAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingMDLAdagradLightParameters", - Input: []tf.Input{ - parameters, accumulators, weights, benefits, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - // DatasetToGraphAttr is an optional argument to DatasetToGraph. type DatasetToGraphAttr func(optionalAttr) @@ -43246,472 +43184,6 @@ func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug(scope *Scope, num_shar return op.Output(0), op.Output(1), op.Output(2), op.Output(3) } -// IRFFTAttr is an optional argument to IRFFT. -type IRFFTAttr func(optionalAttr) - -// IRFFTTreal sets the optional Treal attribute to value. -// If not specified, defaults to DT_FLOAT -func IRFFTTreal(value tf.DataType) IRFFTAttr { - return func(m optionalAttr) { - m["Treal"] = value - } -} - -// Inverse real-valued fast Fourier transform. -// -// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued -// signal over the inner-most dimension of `input`. -// -// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the -// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If -// `fft_length` is not provided, it is computed from the size of the inner-most -// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to -// compute `input` is odd, it should be provided since it cannot be inferred -// properly. -// -// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller -// than the corresponding dimension of `input`, the dimension is cropped. If it is -// larger, the dimension is padded with zeros. -// -// Arguments: -// input: A complex tensor. -// fft_length: An int32 tensor of shape [1]. The FFT length. -// -// Returns A float32 tensor of the same rank as `input`. The inner-most -// dimension of `input` is replaced with the `fft_length` samples of its inverse -// 1D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.irfft -// @end_compatibility -func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output, optional ...IRFFTAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "IRFFT", - Input: []tf.Input{ - input, fft_length, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// EnqueueTPUEmbeddingSparseBatchAttr is an optional argument to EnqueueTPUEmbeddingSparseBatch. -type EnqueueTPUEmbeddingSparseBatchAttr func(optionalAttr) - -// EnqueueTPUEmbeddingSparseBatchDeviceOrdinal sets the optional device_ordinal attribute to value. -// -// value: The TPU device to use. Should be >= 0 and less than the number -// of TPU cores in the task on which the node is placed. -// If not specified, defaults to -1 -func EnqueueTPUEmbeddingSparseBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingSparseBatchAttr { - return func(m optionalAttr) { - m["device_ordinal"] = value - } -} - -// EnqueueTPUEmbeddingSparseBatchCombiners sets the optional combiners attribute to value. -// -// value: A list of string scalars, one for each embedding table that specify -// how to normalize the embedding activations after weighted summation. -// Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have -// the sum of the weights be 0 for 'mean' or the sum of the squared weights be -// 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for -// all tables. -// If not specified, defaults to <> -func EnqueueTPUEmbeddingSparseBatchCombiners(value []string) EnqueueTPUEmbeddingSparseBatchAttr { - return func(m optionalAttr) { - m["combiners"] = value - } -} - -// An op that enqueues TPUEmbedding input indices from a SparseTensor. -// -// This Op eases the porting of code that uses embedding_lookup_sparse(), -// although some Python preprocessing of the SparseTensor arguments to -// embedding_lookup_sparse() is required to produce the arguments to this Op, -// since only a single EnqueueTPUEmbeddingSparseBatch Op is allowed per training -// step. -// -// The tensors at corresponding positions in the three input lists -// must have the same shape, i.e. rank 1 with dim_size() equal to the total -// number of lookups into the table described by the corresponding table_id. -// -// Arguments: -// sample_indices: A list of rank 1 Tensors specifying the training example and -// feature to which the corresponding embedding_indices and aggregation_weights -// values belong. sample_indices[i] must equal b * nf + f, where nf is the -// number of features from the corresponding table, f is in [0, nf), and -// b is in [0, batch size). -// embedding_indices: A list of rank 1 Tensors, indices into the embedding tables. -// aggregation_weights: A list of rank 1 Tensors containing per sample -- i.e. per -// (training example, feature) -- aggregation weights. -// mode_override: A string input that overrides the mode specified in the -// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference', -// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set -// in TPUEmbeddingConfiguration is used, otherwise mode_override is used. -// -// Returns the created operation. -func EnqueueTPUEmbeddingSparseBatch(scope *Scope, sample_indices []tf.Output, embedding_indices []tf.Output, aggregation_weights []tf.Output, mode_override tf.Output, optional ...EnqueueTPUEmbeddingSparseBatchAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "EnqueueTPUEmbeddingSparseBatch", - Input: []tf.Input{ - tf.OutputList(sample_indices), tf.OutputList(embedding_indices), tf.OutputList(aggregation_weights), mode_override, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// ResourceScatterNdUpdateAttr is an optional argument to ResourceScatterNdUpdate. -type ResourceScatterNdUpdateAttr func(optionalAttr) - -// ResourceScatterNdUpdateUseLocking sets the optional use_locking attribute to value. -// -// value: An optional bool. Defaults to True. If True, the assignment will -// be protected by a lock; otherwise the behavior is undefined, -// but may exhibit less contention. -// If not specified, defaults to true -func ResourceScatterNdUpdateUseLocking(value bool) ResourceScatterNdUpdateAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Applies sparse `updates` to individual values or slices within a given -// -// variable according to `indices`. -// -// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. -// -// `indices` must be integer tensor, containing indices into `ref`. -// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. -// -// The innermost dimension of `indices` (with length `K`) corresponds to -// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th -// dimension of `ref`. -// -// `updates` is `Tensor` of rank `Q-1+P-K` with shape: -// -// ``` -// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]. -// ``` -// -// For example, say we want to update 4 scattered elements to a rank-1 tensor to -// 8 elements. In Python, that update would look like this: -// -// ```python -// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8]) -// indices = tf.constant([[4], [3], [1] ,[7]]) -// updates = tf.constant([9, 10, 11, 12]) -// update = tf.scatter_nd_update(ref, indices, updates) -// with tf.Session() as sess: -// print sess.run(update) -// ``` -// -// The resulting update to ref would look like this: -// -// [1, 11, 3, 10, 9, 6, 7, 12] -// -// See `tf.scatter_nd` for more details about how to make updates to -// slices. -// -// Arguments: -// ref: A resource handle. Must be from a VarHandleOp. -// indices: A Tensor. Must be one of the following types: int32, int64. -// A tensor of indices into ref. -// updates: A Tensor. Must have the same type as ref. A tensor of updated -// values to add to ref. -// -// Returns the created operation. -func ResourceScatterNdUpdate(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdUpdateAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceScatterNdUpdate", - Input: []tf.Input{ - ref, indices, updates, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Computes the sum along sparse segments of a tensor divided by the sqrt of N. -// -// N is the size of the segment being reduced. -// -// See `tf.sparse.segment_sum` for usage examples. -// -// -// Arguments: -// -// indices: A 1-D tensor. Has same rank as `segment_ids`. -// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SparseSegmentSqrtN(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSegmentSqrtN", - Input: []tf.Input{ - data, indices, segment_ids, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes square root of x element-wise. -// -// I.e., \\(y = \sqrt{x} = x^{1/2}\\). -func Sqrt(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Sqrt", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Gradient op for `MirrorPad` op. This op folds a mirror-padded tensor. -// -// This operation folds the padded areas of `input` by `MirrorPad` according to the -// `paddings` you specify. `paddings` must be the same as `paddings` argument -// given to the corresponding `MirrorPad` op. -// -// The folded size of each dimension D of the output is: -// -// `input.dim_size(D) - paddings(D, 0) - paddings(D, 1)` -// -// For example: -// -// ``` -// # 't' is [[1, 2, 3], [4, 5, 6], [7, 8, 9]]. -// # 'paddings' is [[0, 1]], [0, 1]]. -// # 'mode' is SYMMETRIC. -// # rank of 't' is 2. -// pad(t, paddings) ==> [[ 1, 5] -// [11, 28]] -// ``` -// -// Arguments: -// input: The input tensor to be folded. -// paddings: A two-column matrix specifying the padding sizes. The number of -// rows must be the same as the rank of `input`. -// mode: The mode used in the `MirrorPad` op. -// -// Returns The folded tensor. -func MirrorPadGrad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"mode": mode} - opspec := tf.OpSpec{ - Type: "MirrorPadGrad", - Input: []tf.Input{ - input, paddings, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Produces the max pool of the input tensor for quantized types. -// -// Arguments: -// input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over. -// min_input: The float value that the lowest quantized input value represents. -// max_input: The float value that the highest quantized input value represents. -// ksize: The size of the window for each dimension of the input tensor. -// The length must be 4 to match the number of dimensions of the input. -// strides: The stride of the sliding window for each dimension of the input -// tensor. The length must be 4 to match the number of dimensions of the input. -// padding: The type of padding algorithm to use. -// -// Returns: -// output -// min_output: The float value that the lowest quantized output value represents. -// max_output: The float value that the highest quantized output value represents. -func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - opspec := tf.OpSpec{ - Type: "QuantizedMaxPool", - Input: []tf.Input{ - input, min_input, max_input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// ResourceApplyAdagradAttr is an optional argument to ResourceApplyAdagrad. -type ResourceApplyAdagradAttr func(optionalAttr) - -// ResourceApplyAdagradUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyAdagradUseLocking(value bool) ResourceApplyAdagradAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// ResourceApplyAdagradUpdateSlots sets the optional update_slots attribute to value. -// If not specified, defaults to true -func ResourceApplyAdagradUpdateSlots(value bool) ResourceApplyAdagradAttr { - return func(m optionalAttr) { - m["update_slots"] = value - } -} - -// Update '*var' according to the adagrad scheme. -// -// accum += grad * grad -// var -= lr * grad * (1 / sqrt(accum)) -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// grad: The gradient. -// -// Returns the created operation. -func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, optional ...ResourceApplyAdagradAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyAdagrad", - Input: []tf.Input{ - var_, accum, lr, grad, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Computes rectified linear gradients for a Relu operation. -// -// Arguments: -// gradients: The backpropagated gradients to the corresponding Relu operation. -// features: The features passed as input to the corresponding Relu operation, OR -// the outputs of that operation (both work equivalently). -// -// Returns `gradients * (features > 0)`. -func ReluGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReluGrad", - Input: []tf.Input{ - gradients, features, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum. -type ResourceApplyMomentumAttr func(optionalAttr) - -// ResourceApplyMomentumUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyMomentumUseLocking(value bool) ResourceApplyMomentumAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// ResourceApplyMomentumUseNesterov sets the optional use_nesterov attribute to value. -// -// value: If `True`, the tensor passed to compute grad will be -// var - lr * momentum * accum, so in the end, the var you get is actually -// var - lr * momentum * accum. -// If not specified, defaults to false -func ResourceApplyMomentumUseNesterov(value bool) ResourceApplyMomentumAttr { - return func(m optionalAttr) { - m["use_nesterov"] = value - } -} - -// Update '*var' according to the momentum scheme. -// -// Set use_nesterov = True if you want to use Nesterov momentum. -// -// accum = accum * momentum + grad -// var -= lr * accum -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// grad: The gradient. -// momentum: Momentum. Must be a scalar. -// -// Returns the created operation. -func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyMomentumAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyMomentum", - Input: []tf.Input{ - var_, accum, lr, grad, momentum, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - // Reverses specific dimensions of a tensor. // // Given a `tensor`, and a `bool` tensor `dims` representing the dimensions @@ -43915,6 +43387,28 @@ func StringToHashBucketStrong(scope *Scope, input tf.Output, num_buckets int64, return op.Output(0) } +// Computes rectified linear gradients for a Relu operation. +// +// Arguments: +// gradients: The backpropagated gradients to the corresponding Relu operation. +// features: The features passed as input to the corresponding Relu operation, OR +// the outputs of that operation (both work equivalently). +// +// Returns `gradients * (features > 0)`. +func ReluGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ReluGrad", + Input: []tf.Input{ + gradients, features, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Retrieves the tree ensemble resource stamp token, number of trees and growing statistics. // // Arguments: @@ -44018,6 +43512,66 @@ func ResourceScatterNdAdd(scope *Scope, ref tf.Output, indices tf.Output, update return scope.AddOperation(opspec) } +// ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum. +type ResourceApplyMomentumAttr func(optionalAttr) + +// ResourceApplyMomentumUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyMomentumUseLocking(value bool) ResourceApplyMomentumAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// ResourceApplyMomentumUseNesterov sets the optional use_nesterov attribute to value. +// +// value: If `True`, the tensor passed to compute grad will be +// var - lr * momentum * accum, so in the end, the var you get is actually +// var - lr * momentum * accum. +// If not specified, defaults to false +func ResourceApplyMomentumUseNesterov(value bool) ResourceApplyMomentumAttr { + return func(m optionalAttr) { + m["use_nesterov"] = value + } +} + +// Update '*var' according to the momentum scheme. +// +// Set use_nesterov = True if you want to use Nesterov momentum. +// +// accum = accum * momentum + grad +// var -= lr * accum +// +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// grad: The gradient. +// momentum: Momentum. Must be a scalar. +// +// Returns the created operation. +func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyMomentumAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceApplyMomentum", + Input: []tf.Input{ + var_, accum, lr, grad, momentum, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + // An Op to sum inputs across replicated TPU instances. // // Each instance supplies its own input. @@ -44193,6 +43747,159 @@ func LoadTPUEmbeddingADAMParameters(scope *Scope, parameters tf.Output, momenta return scope.AddOperation(opspec) } +// ResourceScatterNdUpdateAttr is an optional argument to ResourceScatterNdUpdate. +type ResourceScatterNdUpdateAttr func(optionalAttr) + +// ResourceScatterNdUpdateUseLocking sets the optional use_locking attribute to value. +// +// value: An optional bool. Defaults to True. If True, the assignment will +// be protected by a lock; otherwise the behavior is undefined, +// but may exhibit less contention. +// If not specified, defaults to true +func ResourceScatterNdUpdateUseLocking(value bool) ResourceScatterNdUpdateAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Applies sparse `updates` to individual values or slices within a given +// +// variable according to `indices`. +// +// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. +// +// `indices` must be integer tensor, containing indices into `ref`. +// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. +// +// The innermost dimension of `indices` (with length `K`) corresponds to +// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th +// dimension of `ref`. +// +// `updates` is `Tensor` of rank `Q-1+P-K` with shape: +// +// ``` +// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]. +// ``` +// +// For example, say we want to update 4 scattered elements to a rank-1 tensor to +// 8 elements. In Python, that update would look like this: +// +// ```python +// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8]) +// indices = tf.constant([[4], [3], [1] ,[7]]) +// updates = tf.constant([9, 10, 11, 12]) +// update = tf.scatter_nd_update(ref, indices, updates) +// with tf.Session() as sess: +// print sess.run(update) +// ``` +// +// The resulting update to ref would look like this: +// +// [1, 11, 3, 10, 9, 6, 7, 12] +// +// See `tf.scatter_nd` for more details about how to make updates to +// slices. +// +// Arguments: +// ref: A resource handle. Must be from a VarHandleOp. +// indices: A Tensor. Must be one of the following types: int32, int64. +// A tensor of indices into ref. +// updates: A Tensor. Must have the same type as ref. A tensor of updated +// values to add to ref. +// +// Returns the created operation. +func ResourceScatterNdUpdate(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdUpdateAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceScatterNdUpdate", + Input: []tf.Input{ + ref, indices, updates, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// EnqueueTPUEmbeddingSparseBatchAttr is an optional argument to EnqueueTPUEmbeddingSparseBatch. +type EnqueueTPUEmbeddingSparseBatchAttr func(optionalAttr) + +// EnqueueTPUEmbeddingSparseBatchDeviceOrdinal sets the optional device_ordinal attribute to value. +// +// value: The TPU device to use. Should be >= 0 and less than the number +// of TPU cores in the task on which the node is placed. +// If not specified, defaults to -1 +func EnqueueTPUEmbeddingSparseBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingSparseBatchAttr { + return func(m optionalAttr) { + m["device_ordinal"] = value + } +} + +// EnqueueTPUEmbeddingSparseBatchCombiners sets the optional combiners attribute to value. +// +// value: A list of string scalars, one for each embedding table that specify +// how to normalize the embedding activations after weighted summation. +// Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have +// the sum of the weights be 0 for 'mean' or the sum of the squared weights be +// 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for +// all tables. +// If not specified, defaults to <> +func EnqueueTPUEmbeddingSparseBatchCombiners(value []string) EnqueueTPUEmbeddingSparseBatchAttr { + return func(m optionalAttr) { + m["combiners"] = value + } +} + +// An op that enqueues TPUEmbedding input indices from a SparseTensor. +// +// This Op eases the porting of code that uses embedding_lookup_sparse(), +// although some Python preprocessing of the SparseTensor arguments to +// embedding_lookup_sparse() is required to produce the arguments to this Op, +// since only a single EnqueueTPUEmbeddingSparseBatch Op is allowed per training +// step. +// +// The tensors at corresponding positions in the three input lists +// must have the same shape, i.e. rank 1 with dim_size() equal to the total +// number of lookups into the table described by the corresponding table_id. +// +// Arguments: +// sample_indices: A list of rank 1 Tensors specifying the training example and +// feature to which the corresponding embedding_indices and aggregation_weights +// values belong. sample_indices[i] must equal b * nf + f, where nf is the +// number of features from the corresponding table, f is in [0, nf), and +// b is in [0, batch size). +// embedding_indices: A list of rank 1 Tensors, indices into the embedding tables. +// aggregation_weights: A list of rank 1 Tensors containing per sample -- i.e. per +// (training example, feature) -- aggregation weights. +// mode_override: A string input that overrides the mode specified in the +// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference', +// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set +// in TPUEmbeddingConfiguration is used, otherwise mode_override is used. +// +// Returns the created operation. +func EnqueueTPUEmbeddingSparseBatch(scope *Scope, sample_indices []tf.Output, embedding_indices []tf.Output, aggregation_weights []tf.Output, mode_override tf.Output, optional ...EnqueueTPUEmbeddingSparseBatchAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "EnqueueTPUEmbeddingSparseBatch", + Input: []tf.Input{ + tf.OutputList(sample_indices), tf.OutputList(embedding_indices), tf.OutputList(aggregation_weights), mode_override, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + // Transforms a vector of brain.Example protos (as strings) into typed tensors. // // Arguments: @@ -44706,128 +44413,45 @@ func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug(scope *Scope, n return op.Output(0), op.Output(1), op.Output(2) } -// Returns the value stored in an Optional variant or raises an error if none exists. -func OptionalGetValue(scope *Scope, optional tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { - if scope.Err() != nil { - return +// IRFFTAttr is an optional argument to IRFFT. +type IRFFTAttr func(optionalAttr) + +// IRFFTTreal sets the optional Treal attribute to value. +// If not specified, defaults to DT_FLOAT +func IRFFTTreal(value tf.DataType) IRFFTAttr { + return func(m optionalAttr) { + m["Treal"] = value } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "OptionalGetValue", - Input: []tf.Input{ - optional, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("OptionalGetValue", err) - return - } - return components } -// Determine the script codes of a given tensor of Unicode integer code points. +// Inverse real-valued fast Fourier transform. // -// This operation converts Unicode code points to script codes corresponding to -// each code point. Script codes correspond to International Components for -// Unicode (ICU) UScriptCode values. See http://icu-project.org/apiref/icu4c/uscript_8h.html. -// Returns -1 (USCRIPT_INVALID_CODE) for invalid codepoints. Output shape will -// match input shape. +// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued +// signal over the inner-most dimension of `input`. // -// Examples: +// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the +// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If +// `fft_length` is not provided, it is computed from the size of the inner-most +// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to +// compute `input` is odd, it should be provided since it cannot be inferred +// properly. // -// >>> tf.strings.unicode_script([1, 31, 38]) -// +// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller +// than the corresponding dimension of `input`, the dimension is cropped. If it is +// larger, the dimension is padded with zeros. // // Arguments: -// input: A Tensor of int32 Unicode code points. +// input: A complex tensor. +// fft_length: An int32 tensor of shape [1]. The FFT length. // -// Returns A Tensor of int32 script codes corresponding to each input code point. -func UnicodeScript(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "UnicodeScript", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// CropAndResizeAttr is an optional argument to CropAndResize. -type CropAndResizeAttr func(optionalAttr) - -// CropAndResizeMethod sets the optional method attribute to value. +// Returns A float32 tensor of the same rank as `input`. The inner-most +// dimension of `input` is replaced with the `fft_length` samples of its inverse +// 1D Fourier transform. // -// value: A string specifying the sampling method for resizing. It can be either -// `"bilinear"` or `"nearest"` and default to `"bilinear"`. Currently two sampling -// methods are supported: Bilinear and Nearest Neighbor. -// If not specified, defaults to "bilinear" -func CropAndResizeMethod(value string) CropAndResizeAttr { - return func(m optionalAttr) { - m["method"] = value - } -} - -// CropAndResizeExtrapolationValue sets the optional extrapolation_value attribute to value. -// -// value: Value used for extrapolation, when applicable. -// If not specified, defaults to 0 -func CropAndResizeExtrapolationValue(value float32) CropAndResizeAttr { - return func(m optionalAttr) { - m["extrapolation_value"] = value - } -} - -// Extracts crops from the input image tensor and resizes them. -// -// Extracts crops from the input image tensor and resizes them using bilinear -// sampling or nearest neighbor sampling (possibly with aspect ratio change) to a -// common output size specified by `crop_size`. This is more general than the -// `crop_to_bounding_box` op which extracts a fixed size slice from the input image -// and does not allow resizing or aspect ratio change. -// -// Returns a tensor with `crops` from the input `image` at positions defined at the -// bounding box locations in `boxes`. The cropped boxes are all resized (with -// bilinear or nearest neighbor interpolation) to a fixed -// `size = [crop_height, crop_width]`. The result is a 4-D tensor -// `[num_boxes, crop_height, crop_width, depth]`. The resizing is corner aligned. -// In particular, if `boxes = [[0, 0, 1, 1]]`, the method will give identical -// results to using `tf.image.resize_bilinear()` or -// `tf.image.resize_nearest_neighbor()`(depends on the `method` argument) with -// `align_corners=True`. -// -// Arguments: -// image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`. -// Both `image_height` and `image_width` need to be positive. -// boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor -// specifies the coordinates of a box in the `box_ind[i]` image and is specified -// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of -// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the -// `[0, 1]` interval of normalized image height is mapped to -// `[0, image_height - 1]` in image height coordinates. We do allow `y1` > `y2`, in -// which case the sampled crop is an up-down flipped version of the original -// image. The width dimension is treated similarly. Normalized coordinates -// outside the `[0, 1]` range are allowed, in which case we use -// `extrapolation_value` to extrapolate the input image values. -// box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. -// The value of `box_ind[i]` specifies the image that the `i`-th box refers to. -// crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. All -// cropped image patches are resized to this size. The aspect ratio of the image -// content is not preserved. Both `crop_height` and `crop_width` need to be -// positive. -// -// Returns A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. -func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Output, crop_size tf.Output, optional ...CropAndResizeAttr) (crops tf.Output) { +// @compatibility(numpy) +// Equivalent to np.fft.irfft +// @end_compatibility +func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output, optional ...IRFFTAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -44836,9 +44460,9 @@ func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Ou a(attrs) } opspec := tf.OpSpec{ - Type: "CropAndResize", + Type: "IRFFT", Input: []tf.Input{ - image, boxes, box_ind, crop_size, + input, fft_length, }, Attrs: attrs, } @@ -44846,122 +44470,43 @@ func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Ou return op.Output(0) } -// DepthwiseConv2dNativeBackpropFilterAttr is an optional argument to DepthwiseConv2dNativeBackpropFilter. -type DepthwiseConv2dNativeBackpropFilterAttr func(optionalAttr) - -// DepthwiseConv2dNativeBackpropFilterExplicitPaddings sets the optional explicit_paddings attribute to value. -// If not specified, defaults to <> -func DepthwiseConv2dNativeBackpropFilterExplicitPaddings(value []int64) DepthwiseConv2dNativeBackpropFilterAttr { - return func(m optionalAttr) { - m["explicit_paddings"] = value - } -} - -// DepthwiseConv2dNativeBackpropFilterDataFormat sets the optional data_format attribute to value. +// Computes the sum along sparse segments of a tensor divided by the sqrt of N. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, height, width, channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, channels, height, width]. -// If not specified, defaults to "NHWC" -func DepthwiseConv2dNativeBackpropFilterDataFormat(value string) DepthwiseConv2dNativeBackpropFilterAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// DepthwiseConv2dNativeBackpropFilterDilations sets the optional dilations attribute to value. +// N is the size of the segment being reduced. +// +// See `tf.sparse.segment_sum` for usage examples. // -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each filter -// element on that dimension. The dimension order is determined by the value of -// `data_format`, see above for details. Dilations in the batch and depth -// dimensions must be 1. -// If not specified, defaults to -func DepthwiseConv2dNativeBackpropFilterDilations(value []int64) DepthwiseConv2dNativeBackpropFilterAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes the gradients of depthwise convolution with respect to the filter. // // Arguments: -// input: 4-D with shape based on `data_format`. For example, if -// `data_format` is 'NHWC' then `input` is a 4-D `[batch, in_height, -// in_width, in_channels]` tensor. -// filter_sizes: An integer vector representing the tensor shape of `filter`, -// where `filter` is a 4-D -// `[filter_height, filter_width, in_channels, depthwise_multiplier]` tensor. -// out_backprop: 4-D with shape based on `data_format`. -// For example, if `data_format` is 'NHWC' then -// out_backprop shape is `[batch, out_height, out_width, out_channels]`. -// Gradients w.r.t. the output of the convolution. -// strides: The stride of the sliding window for each dimension of the input -// of the convolution. -// padding: The type of padding algorithm to use. // -// Returns 4-D with shape -// `[filter_height, filter_width, in_channels, out_channels]`. Gradient w.r.t. -// the `filter` input of the convolution. -func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropFilterAttr) (output tf.Output) { +// indices: A 1-D tensor. Has same rank as `segment_ids`. +// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SparseSegmentSqrtN(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "DepthwiseConv2dNativeBackpropFilter", + Type: "SparseSegmentSqrtN", Input: []tf.Input{ - input, filter_sizes, out_backprop, + data, indices, segment_ids, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Creates a dataset that zips together `input_datasets`. +// Computes square root of x element-wise. // -// The elements of the resulting dataset are created by zipping corresponding -// elements from each of the input datasets. -// -// The size of the resulting dataset will match the size of the smallest input -// dataset, and no error will be raised if input datasets have different sizes. -// -// Arguments: -// input_datasets: List of `N` variant Tensors representing datasets to be zipped together. -// -// -func ZipDataset(scope *Scope, input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ZipDataset", - Input: []tf.Input{ - tf.OutputList(input_datasets), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Rounds the values of a tensor to the nearest integer, element-wise. -// -// Rounds half to even. Also known as bankers rounding. If you want to round -// according to the current system rounding mode use std::cint. -func Round(scope *Scope, x tf.Output) (y tf.Output) { +// I.e., \\(y = \sqrt{x} = x^{1/2}\\). +func Sqrt(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Round", + Type: "Sqrt", Input: []tf.Input{ x, }, @@ -44970,49 +44515,43 @@ func Round(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } -// Creates a tree ensemble model and returns a handle to it. +// Gradient op for `MirrorPad` op. This op folds a mirror-padded tensor. +// +// This operation folds the padded areas of `input` by `MirrorPad` according to the +// `paddings` you specify. `paddings` must be the same as `paddings` argument +// given to the corresponding `MirrorPad` op. +// +// The folded size of each dimension D of the output is: +// +// `input.dim_size(D) - paddings(D, 0) - paddings(D, 1)` +// +// For example: +// +// ``` +// # 't' is [[1, 2, 3], [4, 5, 6], [7, 8, 9]]. +// # 'paddings' is [[0, 1]], [0, 1]]. +// # 'mode' is SYMMETRIC. +// # rank of 't' is 2. +// pad(t, paddings) ==> [[ 1, 5] +// [11, 28]] +// ``` // // Arguments: -// tree_ensemble_handle: Handle to the tree ensemble resource to be created. -// stamp_token: Token to use as the initial value of the resource stamp. -// tree_ensemble_serialized: Serialized proto of the tree ensemble. +// input: The input tensor to be folded. +// paddings: A two-column matrix specifying the padding sizes. The number of +// rows must be the same as the rank of `input`. +// mode: The mode used in the `MirrorPad` op. // -// Returns the created operation. -func BoostedTreesCreateEnsemble(scope *Scope, tree_ensemble_handle tf.Output, stamp_token tf.Output, tree_ensemble_serialized tf.Output) (o *tf.Operation) { +// Returns The folded tensor. +func MirrorPadGrad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"mode": mode} opspec := tf.OpSpec{ - Type: "BoostedTreesCreateEnsemble", + Type: "MirrorPadGrad", Input: []tf.Input{ - tree_ensemble_handle, stamp_token, tree_ensemble_serialized, - }, - } - return scope.AddOperation(opspec) -} - -// Calculates the softmax of a CSRSparseMatrix. -// -// Calculate the softmax of the innermost dimensions of a SparseMatrix. -// -// Missing values are treated as `-inf` (i.e., logits of zero probability); and -// the output has the same sparsity structure as the input (though missing values -// in the output may now be treated as having probability zero). -// -// Arguments: -// logits: A CSRSparseMatrix. -// -// -// Returns A CSRSparseMatrix. -func SparseMatrixSoftmax(scope *Scope, logits tf.Output, type_ tf.DataType) (softmax tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"type": type_} - opspec := tf.OpSpec{ - Type: "SparseMatrixSoftmax", - Input: []tf.Input{ - logits, + input, paddings, }, Attrs: attrs, } @@ -45020,154 +44559,74 @@ func SparseMatrixSoftmax(scope *Scope, logits tf.Output, type_ tf.DataType) (sof return op.Output(0) } -// RestoreAttr is an optional argument to Restore. -type RestoreAttr func(optionalAttr) - -// RestorePreferredShard sets the optional preferred_shard attribute to value. -// -// value: Index of file to open first if multiple files match -// `file_pattern`. -// If not specified, defaults to -1 -func RestorePreferredShard(value int64) RestoreAttr { - return func(m optionalAttr) { - m["preferred_shard"] = value - } -} - -// Restores a tensor from checkpoint files. -// -// Reads a tensor stored in one or several files. If there are several files (for -// instance because a tensor was saved as slices), `file_pattern` may contain -// wildcard symbols (`*` and `?`) in the filename portion only, not in the -// directory portion. -// -// If a `file_pattern` matches several files, `preferred_shard` can be used to hint -// in which file the requested tensor is likely to be found. This op will first -// open the file at index `preferred_shard` in the list of matching files and try -// to restore tensors from that file. Only if some tensors or tensor slices are -// not found in that first file, then the Op opens all the files. Setting -// `preferred_shard` to match the value passed as the `shard` input -// of a matching `Save` Op may speed up Restore. This attribute only affects -// performance, not correctness. The default value -1 means files are processed in -// order. -// -// See also `RestoreSlice`. +// Produces the max pool of the input tensor for quantized types. // // Arguments: -// file_pattern: Must have a single element. The pattern of the files from -// which we read the tensor. -// tensor_name: Must have a single element. The name of the tensor to be -// restored. -// dt: The type of the tensor to be restored. -// -// Returns The restored tensor. -func Restore(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, dt tf.DataType, optional ...RestoreAttr) (tensor tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dt": dt} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Restore", - Input: []tf.Input{ - file_pattern, tensor_name, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the next record (key, value pair) produced by a Reader. -// -// Will dequeue from the input queue if necessary (e.g. when the -// Reader needs to start reading from a new file since it has finished -// with the previous file). -// -// Arguments: -// reader_handle: Handle to a Reader. -// queue_handle: Handle to a Queue, with string work items. +// input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over. +// min_input: The float value that the lowest quantized input value represents. +// max_input: The float value that the highest quantized input value represents. +// ksize: The size of the window for each dimension of the input tensor. +// The length must be 4 to match the number of dimensions of the input. +// strides: The stride of the sliding window for each dimension of the input +// tensor. The length must be 4 to match the number of dimensions of the input. +// padding: The type of padding algorithm to use. // // Returns: -// key: A scalar. -// value: A scalar. -func ReaderReadV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output) (key tf.Output, value tf.Output) { +// output +// min_output: The float value that the lowest quantized output value represents. +// max_output: The float value that the highest quantized output value represents. +func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} opspec := tf.OpSpec{ - Type: "ReaderReadV2", + Type: "QuantizedMaxPool", Input: []tf.Input{ - reader_handle, queue_handle, + input, min_input, max_input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0), op.Output(1), op.Output(2) } -// CumprodAttr is an optional argument to Cumprod. -type CumprodAttr func(optionalAttr) +// ResourceApplyAdagradAttr is an optional argument to ResourceApplyAdagrad. +type ResourceApplyAdagradAttr func(optionalAttr) -// CumprodExclusive sets the optional exclusive attribute to value. +// ResourceApplyAdagradUseLocking sets the optional use_locking attribute to value. // -// value: If `True`, perform exclusive cumprod. +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. // If not specified, defaults to false -func CumprodExclusive(value bool) CumprodAttr { +func ResourceApplyAdagradUseLocking(value bool) ResourceApplyAdagradAttr { return func(m optionalAttr) { - m["exclusive"] = value + m["use_locking"] = value } } -// CumprodReverse sets the optional reverse attribute to value. -// -// value: A `bool` (default: False). -// If not specified, defaults to false -func CumprodReverse(value bool) CumprodAttr { +// ResourceApplyAdagradUpdateSlots sets the optional update_slots attribute to value. +// If not specified, defaults to true +func ResourceApplyAdagradUpdateSlots(value bool) ResourceApplyAdagradAttr { return func(m optionalAttr) { - m["reverse"] = value + m["update_slots"] = value } } -// Compute the cumulative product of the tensor `x` along `axis`. +// Update '*var' according to the adagrad scheme. // -// By default, this op performs an inclusive cumprod, which means that the first -// element of the input is identical to the first element of the output: -// -// ```python -// tf.cumprod([a, b, c]) # => [a, a * b, a * b * c] -// ``` -// -// By setting the `exclusive` kwarg to `True`, an exclusive cumprod is -// performed instead: -// -// ```python -// tf.cumprod([a, b, c], exclusive=True) # => [1, a, a * b] -// ``` -// -// By setting the `reverse` kwarg to `True`, the cumprod is performed in the -// opposite direction: -// -// ```python -// tf.cumprod([a, b, c], reverse=True) # => [a * b * c, b * c, c] -// ``` -// -// This is more efficient than using separate `tf.reverse` ops. -// -// The `reverse` and `exclusive` kwargs can also be combined: -// -// ```python -// tf.cumprod([a, b, c], exclusive=True, reverse=True) # => [b * c, c, 1] -// ``` +// accum += grad * grad +// var -= lr * grad * (1 / sqrt(accum)) // // Arguments: -// x: A `Tensor`. Must be one of the following types: `float32`, `float64`, -// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`, -// `complex128`, `qint8`, `quint8`, `qint32`, `half`. -// axis: A `Tensor` of type `int32` (default: 0). Must be in the range -// `[-rank(x), rank(x))`. -func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) (out tf.Output) { +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// grad: The gradient. +// +// Returns the created operation. +func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, optional ...ResourceApplyAdagradAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -45176,70 +44635,9 @@ func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) a(attrs) } opspec := tf.OpSpec{ - Type: "Cumprod", + Type: "ResourceApplyAdagrad", Input: []tf.Input{ - x, axis, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug. -type LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr func(optionalAttr) - -// LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load SGD embedding parameters. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the stochastic gradient descent optimization algorithm. -// gradient_accumulators: Value of gradient_accumulators used in the Adadelta optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug(scope *Scope, parameters tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug", - Input: []tf.Input{ - parameters, gradient_accumulators, + var_, accum, lr, grad, }, Attrs: attrs, } @@ -46802,6 +46200,61 @@ func LoadTPUEmbeddingFTRLParameters(scope *Scope, parameters tf.Output, accumula return scope.AddOperation(opspec) } +// RetrieveTPUEmbeddingFTRLParametersAttr is an optional argument to RetrieveTPUEmbeddingFTRLParameters. +type RetrieveTPUEmbeddingFTRLParametersAttr func(optionalAttr) + +// RetrieveTPUEmbeddingFTRLParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 +func RetrieveTPUEmbeddingFTRLParametersTableId(value int64) RetrieveTPUEmbeddingFTRLParametersAttr { + return func(m optionalAttr) { + m["table_id"] = value + } +} + +// RetrieveTPUEmbeddingFTRLParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingFTRLParametersTableName(value string) RetrieveTPUEmbeddingFTRLParametersAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// RetrieveTPUEmbeddingFTRLParametersConfig sets the optional config attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingFTRLParametersConfig(value string) RetrieveTPUEmbeddingFTRLParametersAttr { + return func(m optionalAttr) { + m["config"] = value + } +} + +// Retrieve FTRL embedding parameters. +// +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. +// +// Returns: +// parameters: Parameter parameters updated by the FTRL optimization algorithm. +// accumulators: Parameter accumulators updated by the FTRL optimization algorithm. +// linears: Parameter linears updated by the FTRL optimization algorithm. +func RetrieveTPUEmbeddingFTRLParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingFTRLParametersAttr) (parameters tf.Output, accumulators tf.Output, linears tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RetrieveTPUEmbeddingFTRLParameters", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + // UnicodeDecodeWithOffsetsAttr is an optional argument to UnicodeDecodeWithOffsets. type UnicodeDecodeWithOffsetsAttr func(optionalAttr) @@ -47139,61 +46592,6 @@ func ResourceSparseApplyKerasMomentum(scope *Scope, var_ tf.Output, accum tf.Out return scope.AddOperation(opspec) } -// RetrieveTPUEmbeddingFTRLParametersAttr is an optional argument to RetrieveTPUEmbeddingFTRLParameters. -type RetrieveTPUEmbeddingFTRLParametersAttr func(optionalAttr) - -// RetrieveTPUEmbeddingFTRLParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingFTRLParametersTableId(value int64) RetrieveTPUEmbeddingFTRLParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingFTRLParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingFTRLParametersTableName(value string) RetrieveTPUEmbeddingFTRLParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingFTRLParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingFTRLParametersConfig(value string) RetrieveTPUEmbeddingFTRLParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve FTRL embedding parameters. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns: -// parameters: Parameter parameters updated by the FTRL optimization algorithm. -// accumulators: Parameter accumulators updated by the FTRL optimization algorithm. -// linears: Parameter linears updated by the FTRL optimization algorithm. -func RetrieveTPUEmbeddingFTRLParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingFTRLParametersAttr) (parameters tf.Output, accumulators tf.Output, linears tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingFTRLParameters", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - // Returns the result of a TPU compilation. // // This operation returns the result of a TPU compilation as a serialized @@ -49320,6 +48718,608 @@ func MakeUnique(scope *Scope, input tf.Output) (output tf.Output) { return op.Output(0) } +// Returns the value stored in an Optional variant or raises an error if none exists. +func OptionalGetValue(scope *Scope, optional tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "OptionalGetValue", + Input: []tf.Input{ + optional, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if components, idx, err = makeOutputList(op, idx, "components"); err != nil { + scope.UpdateErr("OptionalGetValue", err) + return + } + return components +} + +// Determine the script codes of a given tensor of Unicode integer code points. +// +// This operation converts Unicode code points to script codes corresponding to +// each code point. Script codes correspond to International Components for +// Unicode (ICU) UScriptCode values. See http://icu-project.org/apiref/icu4c/uscript_8h.html. +// Returns -1 (USCRIPT_INVALID_CODE) for invalid codepoints. Output shape will +// match input shape. +// +// Examples: +// +// >>> tf.strings.unicode_script([1, 31, 38]) +// +// +// Arguments: +// input: A Tensor of int32 Unicode code points. +// +// Returns A Tensor of int32 script codes corresponding to each input code point. +func UnicodeScript(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "UnicodeScript", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// CropAndResizeAttr is an optional argument to CropAndResize. +type CropAndResizeAttr func(optionalAttr) + +// CropAndResizeMethod sets the optional method attribute to value. +// +// value: A string specifying the sampling method for resizing. It can be either +// `"bilinear"` or `"nearest"` and default to `"bilinear"`. Currently two sampling +// methods are supported: Bilinear and Nearest Neighbor. +// If not specified, defaults to "bilinear" +func CropAndResizeMethod(value string) CropAndResizeAttr { + return func(m optionalAttr) { + m["method"] = value + } +} + +// CropAndResizeExtrapolationValue sets the optional extrapolation_value attribute to value. +// +// value: Value used for extrapolation, when applicable. +// If not specified, defaults to 0 +func CropAndResizeExtrapolationValue(value float32) CropAndResizeAttr { + return func(m optionalAttr) { + m["extrapolation_value"] = value + } +} + +// Extracts crops from the input image tensor and resizes them. +// +// Extracts crops from the input image tensor and resizes them using bilinear +// sampling or nearest neighbor sampling (possibly with aspect ratio change) to a +// common output size specified by `crop_size`. This is more general than the +// `crop_to_bounding_box` op which extracts a fixed size slice from the input image +// and does not allow resizing or aspect ratio change. +// +// Returns a tensor with `crops` from the input `image` at positions defined at the +// bounding box locations in `boxes`. The cropped boxes are all resized (with +// bilinear or nearest neighbor interpolation) to a fixed +// `size = [crop_height, crop_width]`. The result is a 4-D tensor +// `[num_boxes, crop_height, crop_width, depth]`. The resizing is corner aligned. +// In particular, if `boxes = [[0, 0, 1, 1]]`, the method will give identical +// results to using `tf.image.resize_bilinear()` or +// `tf.image.resize_nearest_neighbor()`(depends on the `method` argument) with +// `align_corners=True`. +// +// Arguments: +// image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`. +// Both `image_height` and `image_width` need to be positive. +// boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor +// specifies the coordinates of a box in the `box_ind[i]` image and is specified +// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of +// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the +// `[0, 1]` interval of normalized image height is mapped to +// `[0, image_height - 1]` in image height coordinates. We do allow `y1` > `y2`, in +// which case the sampled crop is an up-down flipped version of the original +// image. The width dimension is treated similarly. Normalized coordinates +// outside the `[0, 1]` range are allowed, in which case we use +// `extrapolation_value` to extrapolate the input image values. +// box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. +// The value of `box_ind[i]` specifies the image that the `i`-th box refers to. +// crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. All +// cropped image patches are resized to this size. The aspect ratio of the image +// content is not preserved. Both `crop_height` and `crop_width` need to be +// positive. +// +// Returns A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. +func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Output, crop_size tf.Output, optional ...CropAndResizeAttr) (crops tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "CropAndResize", + Input: []tf.Input{ + image, boxes, box_ind, crop_size, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// DepthwiseConv2dNativeBackpropFilterAttr is an optional argument to DepthwiseConv2dNativeBackpropFilter. +type DepthwiseConv2dNativeBackpropFilterAttr func(optionalAttr) + +// DepthwiseConv2dNativeBackpropFilterExplicitPaddings sets the optional explicit_paddings attribute to value. +// If not specified, defaults to <> +func DepthwiseConv2dNativeBackpropFilterExplicitPaddings(value []int64) DepthwiseConv2dNativeBackpropFilterAttr { + return func(m optionalAttr) { + m["explicit_paddings"] = value + } +} + +// DepthwiseConv2dNativeBackpropFilterDataFormat sets the optional data_format attribute to value. +// +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, height, width, channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, channels, height, width]. +// If not specified, defaults to "NHWC" +func DepthwiseConv2dNativeBackpropFilterDataFormat(value string) DepthwiseConv2dNativeBackpropFilterAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// DepthwiseConv2dNativeBackpropFilterDilations sets the optional dilations attribute to value. +// +// value: 1-D tensor of length 4. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each filter +// element on that dimension. The dimension order is determined by the value of +// `data_format`, see above for details. Dilations in the batch and depth +// dimensions must be 1. +// If not specified, defaults to +func DepthwiseConv2dNativeBackpropFilterDilations(value []int64) DepthwiseConv2dNativeBackpropFilterAttr { + return func(m optionalAttr) { + m["dilations"] = value + } +} + +// Computes the gradients of depthwise convolution with respect to the filter. +// +// Arguments: +// input: 4-D with shape based on `data_format`. For example, if +// `data_format` is 'NHWC' then `input` is a 4-D `[batch, in_height, +// in_width, in_channels]` tensor. +// filter_sizes: An integer vector representing the tensor shape of `filter`, +// where `filter` is a 4-D +// `[filter_height, filter_width, in_channels, depthwise_multiplier]` tensor. +// out_backprop: 4-D with shape based on `data_format`. +// For example, if `data_format` is 'NHWC' then +// out_backprop shape is `[batch, out_height, out_width, out_channels]`. +// Gradients w.r.t. the output of the convolution. +// strides: The stride of the sliding window for each dimension of the input +// of the convolution. +// padding: The type of padding algorithm to use. +// +// Returns 4-D with shape +// `[filter_height, filter_width, in_channels, out_channels]`. Gradient w.r.t. +// the `filter` input of the convolution. +func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropFilterAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "DepthwiseConv2dNativeBackpropFilter", + Input: []tf.Input{ + input, filter_sizes, out_backprop, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a dataset that zips together `input_datasets`. +// +// The elements of the resulting dataset are created by zipping corresponding +// elements from each of the input datasets. +// +// The size of the resulting dataset will match the size of the smallest input +// dataset, and no error will be raised if input datasets have different sizes. +// +// Arguments: +// input_datasets: List of `N` variant Tensors representing datasets to be zipped together. +// +// +func ZipDataset(scope *Scope, input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "ZipDataset", + Input: []tf.Input{ + tf.OutputList(input_datasets), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Rounds the values of a tensor to the nearest integer, element-wise. +// +// Rounds half to even. Also known as bankers rounding. If you want to round +// according to the current system rounding mode use std::cint. +func Round(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Round", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a tree ensemble model and returns a handle to it. +// +// Arguments: +// tree_ensemble_handle: Handle to the tree ensemble resource to be created. +// stamp_token: Token to use as the initial value of the resource stamp. +// tree_ensemble_serialized: Serialized proto of the tree ensemble. +// +// Returns the created operation. +func BoostedTreesCreateEnsemble(scope *Scope, tree_ensemble_handle tf.Output, stamp_token tf.Output, tree_ensemble_serialized tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "BoostedTreesCreateEnsemble", + Input: []tf.Input{ + tree_ensemble_handle, stamp_token, tree_ensemble_serialized, + }, + } + return scope.AddOperation(opspec) +} + +// Calculates the softmax of a CSRSparseMatrix. +// +// Calculate the softmax of the innermost dimensions of a SparseMatrix. +// +// Missing values are treated as `-inf` (i.e., logits of zero probability); and +// the output has the same sparsity structure as the input (though missing values +// in the output may now be treated as having probability zero). +// +// Arguments: +// logits: A CSRSparseMatrix. +// +// +// Returns A CSRSparseMatrix. +func SparseMatrixSoftmax(scope *Scope, logits tf.Output, type_ tf.DataType) (softmax tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"type": type_} + opspec := tf.OpSpec{ + Type: "SparseMatrixSoftmax", + Input: []tf.Input{ + logits, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// RestoreAttr is an optional argument to Restore. +type RestoreAttr func(optionalAttr) + +// RestorePreferredShard sets the optional preferred_shard attribute to value. +// +// value: Index of file to open first if multiple files match +// `file_pattern`. +// If not specified, defaults to -1 +func RestorePreferredShard(value int64) RestoreAttr { + return func(m optionalAttr) { + m["preferred_shard"] = value + } +} + +// Restores a tensor from checkpoint files. +// +// Reads a tensor stored in one or several files. If there are several files (for +// instance because a tensor was saved as slices), `file_pattern` may contain +// wildcard symbols (`*` and `?`) in the filename portion only, not in the +// directory portion. +// +// If a `file_pattern` matches several files, `preferred_shard` can be used to hint +// in which file the requested tensor is likely to be found. This op will first +// open the file at index `preferred_shard` in the list of matching files and try +// to restore tensors from that file. Only if some tensors or tensor slices are +// not found in that first file, then the Op opens all the files. Setting +// `preferred_shard` to match the value passed as the `shard` input +// of a matching `Save` Op may speed up Restore. This attribute only affects +// performance, not correctness. The default value -1 means files are processed in +// order. +// +// See also `RestoreSlice`. +// +// Arguments: +// file_pattern: Must have a single element. The pattern of the files from +// which we read the tensor. +// tensor_name: Must have a single element. The name of the tensor to be +// restored. +// dt: The type of the tensor to be restored. +// +// Returns The restored tensor. +func Restore(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, dt tf.DataType, optional ...RestoreAttr) (tensor tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dt": dt} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Restore", + Input: []tf.Input{ + file_pattern, tensor_name, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// LoadTPUEmbeddingMDLAdagradLightParametersAttr is an optional argument to LoadTPUEmbeddingMDLAdagradLightParameters. +type LoadTPUEmbeddingMDLAdagradLightParametersAttr func(optionalAttr) + +// LoadTPUEmbeddingMDLAdagradLightParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 +func LoadTPUEmbeddingMDLAdagradLightParametersTableId(value int64) LoadTPUEmbeddingMDLAdagradLightParametersAttr { + return func(m optionalAttr) { + m["table_id"] = value + } +} + +// LoadTPUEmbeddingMDLAdagradLightParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingMDLAdagradLightParametersTableName(value string) LoadTPUEmbeddingMDLAdagradLightParametersAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// LoadTPUEmbeddingMDLAdagradLightParametersConfig sets the optional config attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingMDLAdagradLightParametersConfig(value string) LoadTPUEmbeddingMDLAdagradLightParametersAttr { + return func(m optionalAttr) { + m["config"] = value + } +} + +// Load MDL Adagrad Light embedding parameters. +// +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. +// +// Arguments: +// parameters: Value of parameters used in the MDL Adagrad Light optimization algorithm. +// accumulators: Value of accumulators used in the MDL Adagrad Light optimization algorithm. +// weights: Value of weights used in the MDL Adagrad Light optimization algorithm. +// benefits: Value of benefits used in the MDL Adagrad Light optimization algorithm. +// +// +// +// Returns the created operation. +func LoadTPUEmbeddingMDLAdagradLightParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, weights tf.Output, benefits tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMDLAdagradLightParametersAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "LoadTPUEmbeddingMDLAdagradLightParameters", + Input: []tf.Input{ + parameters, accumulators, weights, benefits, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// Returns the next record (key, value pair) produced by a Reader. +// +// Will dequeue from the input queue if necessary (e.g. when the +// Reader needs to start reading from a new file since it has finished +// with the previous file). +// +// Arguments: +// reader_handle: Handle to a Reader. +// queue_handle: Handle to a Queue, with string work items. +// +// Returns: +// key: A scalar. +// value: A scalar. +func ReaderReadV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output) (key tf.Output, value tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ReaderReadV2", + Input: []tf.Input{ + reader_handle, queue_handle, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// CumprodAttr is an optional argument to Cumprod. +type CumprodAttr func(optionalAttr) + +// CumprodExclusive sets the optional exclusive attribute to value. +// +// value: If `True`, perform exclusive cumprod. +// If not specified, defaults to false +func CumprodExclusive(value bool) CumprodAttr { + return func(m optionalAttr) { + m["exclusive"] = value + } +} + +// CumprodReverse sets the optional reverse attribute to value. +// +// value: A `bool` (default: False). +// If not specified, defaults to false +func CumprodReverse(value bool) CumprodAttr { + return func(m optionalAttr) { + m["reverse"] = value + } +} + +// Compute the cumulative product of the tensor `x` along `axis`. +// +// By default, this op performs an inclusive cumprod, which means that the first +// element of the input is identical to the first element of the output: +// +// ```python +// tf.cumprod([a, b, c]) # => [a, a * b, a * b * c] +// ``` +// +// By setting the `exclusive` kwarg to `True`, an exclusive cumprod is +// performed instead: +// +// ```python +// tf.cumprod([a, b, c], exclusive=True) # => [1, a, a * b] +// ``` +// +// By setting the `reverse` kwarg to `True`, the cumprod is performed in the +// opposite direction: +// +// ```python +// tf.cumprod([a, b, c], reverse=True) # => [a * b * c, b * c, c] +// ``` +// +// This is more efficient than using separate `tf.reverse` ops. +// +// The `reverse` and `exclusive` kwargs can also be combined: +// +// ```python +// tf.cumprod([a, b, c], exclusive=True, reverse=True) # => [b * c, c, 1] +// ``` +// +// Arguments: +// x: A `Tensor`. Must be one of the following types: `float32`, `float64`, +// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`, +// `complex128`, `qint8`, `quint8`, `qint32`, `half`. +// axis: A `Tensor` of type `int32` (default: 0). Must be in the range +// `[-rank(x), rank(x))`. +func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) (out tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Cumprod", + Input: []tf.Input{ + x, axis, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug. +type LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr func(optionalAttr) + +// LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 +func LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["table_id"] = value + } +} + +// LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugConfig sets the optional config attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["config"] = value + } +} + +// Load SGD embedding parameters. +// +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. +// +// Arguments: +// parameters: Value of parameters used in the stochastic gradient descent optimization algorithm. +// gradient_accumulators: Value of gradient_accumulators used in the Adadelta optimization algorithm. +// +// +// +// Returns the created operation. +func LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug(scope *Scope, parameters tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug", + Input: []tf.Input{ + parameters, gradient_accumulators, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + // RandomUniformAttr is an optional argument to RandomUniform. type RandomUniformAttr func(optionalAttr) From 3451ef8a16069a569afba20f3ed01ebf8e4d1f22 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Wed, 29 Jul 2020 17:15:32 -0400 Subject: [PATCH 1659/2522] Update literal.cc --- tensorflow/compiler/xla/literal.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc index d03f3f8140f..50ab69b4efe 100644 --- a/tensorflow/compiler/xla/literal.cc +++ b/tensorflow/compiler/xla/literal.cc @@ -1894,13 +1894,13 @@ bool LiteralBase::IsR1Iota() const { auto is_iota_at_idx = [&](const int64 idx) { switch (shape().element_type()) { case U8: - return Get({idx}) == static_cast(idx); + return static_cast(Get({idx})) == idx; case U16: - return Get({idx}) == static_cast(idx); + return static_cast(Get({idx})) == idx; case U32: - return Get({idx}) == static_cast(idx); + return static_cast(Get({idx})) == idx; case U64: - return Get({idx}) == static_cast(idx); + return static_cast(Get({idx})) == idx; case S8: return Get({idx}) == idx; case S16: From 2158b9de8d6064d1a15e3e3a48b9c5c93c128144 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Wed, 29 Jul 2020 17:20:29 -0400 Subject: [PATCH 1660/2522] Update literal.cc --- tensorflow/compiler/xla/literal.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc index 50ab69b4efe..543ea18155e 100644 --- a/tensorflow/compiler/xla/literal.cc +++ b/tensorflow/compiler/xla/literal.cc @@ -1894,13 +1894,13 @@ bool LiteralBase::IsR1Iota() const { auto is_iota_at_idx = [&](const int64 idx) { switch (shape().element_type()) { case U8: - return static_cast(Get({idx})) == idx; + return static_cast(Get({idx})) == idx; case U16: - return static_cast(Get({idx})) == idx; + return static_cast(Get({idx})) == idx; case U32: - return static_cast(Get({idx})) == idx; + return static_cast(Get({idx})) == idx; case U64: - return static_cast(Get({idx})) == idx; + return static_cast(Get({idx})) == idx; case S8: return Get({idx}) == idx; case S16: From 497525e2c426bd648f16c314c4eb4788d24535f0 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Wed, 29 Jul 2020 17:27:29 -0400 Subject: [PATCH 1661/2522] Update build_xla_ops_pass.cc --- tensorflow/compiler/jit/build_xla_ops_pass.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/jit/build_xla_ops_pass.cc b/tensorflow/compiler/jit/build_xla_ops_pass.cc index d6f50532f62..a340b9d3f45 100644 --- a/tensorflow/compiler/jit/build_xla_ops_pass.cc +++ b/tensorflow/compiler/jit/build_xla_ops_pass.cc @@ -452,7 +452,7 @@ Status PredicateInt32Inputs(const Scope& root, Node* n, root.graph()->AddControlEdge(predicate_as_control.node(), identity_n.operation.node()); - for (int32 i = 0, end = int32_inputs.size(); i < end; i++) { + for (int i = 0, end = int32_inputs.size(); i < end; i++) { TF_RETURN_IF_ERROR(root.graph()->UpdateEdge(identity_n[i].node(), i, n, int32_inputs_input_idxs[i])); } From 20c3049f075e3f1b285be4969fb754ad0440e3af Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Wed, 29 Jul 2020 17:32:16 -0400 Subject: [PATCH 1662/2522] Update extract_outside_compilation_pass.cc --- tensorflow/compiler/jit/extract_outside_compilation_pass.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/jit/extract_outside_compilation_pass.cc b/tensorflow/compiler/jit/extract_outside_compilation_pass.cc index c84fca81420..f295efa5d8b 100644 --- a/tensorflow/compiler/jit/extract_outside_compilation_pass.cc +++ b/tensorflow/compiler/jit/extract_outside_compilation_pass.cc @@ -1806,7 +1806,7 @@ TF_ATTRIBUTE_NOINLINE Status ExtractOutsideCompilationForFuncCallNode( continue; } - const int input_size_check = e->dst_input() < inputs.size(); + const bool input_size_check = e->dst_input() < static_cast(inputs.size()); TF_RET_CHECK(e->dst_input() >= 0 && input_size_check); inputs[e->dst_input()] = NodeDefBuilder::NodeOut{e->src()->name(), e->src_output(), From 08b81eceedf62f68bc572d198126888075b190dd Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Wed, 29 Jul 2020 14:51:51 -0700 Subject: [PATCH 1663/2522] Remove usages of `smart_cond` module from Keras. We have a version of smart_cond in keras/utils/tf_utils.py, removing that and adding smart_cond from smart_cond TF module to keras/utils/control_flow_util.py PiperOrigin-RevId: 323873456 Change-Id: Ieb9ea7a7bea86e3ebcdcea4455b0d0c2f8111882 --- .../python/keras/engine/base_layer_test.py | 8 +-- .../python/keras/engine/data_adapter.py | 4 +- .../python/keras/engine/training_utils.py | 7 +- tensorflow/python/keras/layers/core.py | 6 +- .../python/keras/layers/dense_attention.py | 8 +-- .../python/keras/layers/normalization.py | 48 +++++++------- .../preprocessing/image_preprocessing.py | 36 ++++++----- tensorflow/python/keras/losses.py | 15 +++-- .../experimental/loss_scale_optimizer.py | 7 +- .../saving/saved_model/saved_model_test.py | 14 ++-- .../python/keras/saving/saved_model/utils.py | 7 +- .../saving/saved_model_experimental_test.py | 6 +- .../python/keras/utils/control_flow_util.py | 64 +++++++++++++++++++ tensorflow/python/keras/utils/tf_utils.py | 54 ---------------- 14 files changed, 147 insertions(+), 137 deletions(-) diff --git a/tensorflow/python/keras/engine/base_layer_test.py b/tensorflow/python/keras/engine/base_layer_test.py index efd0e490059..022718ea549 100644 --- a/tensorflow/python/keras/engine/base_layer_test.py +++ b/tensorflow/python/keras/engine/base_layer_test.py @@ -45,7 +45,7 @@ from tensorflow.python.keras.engine import sequential from tensorflow.python.keras.engine import training as training_lib from tensorflow.python.keras.mixed_precision.experimental import policy from tensorflow.python.keras.optimizer_v2 import rmsprop -from tensorflow.python.keras.utils import tf_utils +from tensorflow.python.keras.utils import control_flow_util from tensorflow.python.layers import core as legacy_core from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops @@ -376,9 +376,9 @@ class BaseLayerTest(keras_parameterized.TestCase): def call(self, inputs, training=None): if training is None: training = backend.learning_phase() - return tf_utils.smart_cond(training, - lambda: array_ops.ones_like(inputs), - lambda: array_ops.zeros_like(inputs)) + return control_flow_util.smart_cond( + training, lambda: array_ops.ones_like(inputs), + lambda: array_ops.zeros_like(inputs)) return TrainingLayer() diff --git a/tensorflow/python/keras/engine/data_adapter.py b/tensorflow/python/keras/engine/data_adapter.py index 3672ef64da3..74036054d79 100644 --- a/tensorflow/python/keras/engine/data_adapter.py +++ b/tensorflow/python/keras/engine/data_adapter.py @@ -37,12 +37,12 @@ from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops -from tensorflow.python.framework import smart_cond from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.framework.ops import composite_tensor from tensorflow.python.keras import backend from tensorflow.python.keras.engine import training_utils +from tensorflow.python.keras.utils import control_flow_util from tensorflow.python.keras.utils import data_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops @@ -1308,7 +1308,7 @@ def _make_class_weight_map_fn(class_weight): raise ValueError("`class_weight` not supported for " "3+ dimensional targets.") - y_classes = smart_cond.smart_cond( + y_classes = control_flow_util.smart_cond( y.shape.rank == 2 and backend.shape(y)[1] > 1, lambda: backend.argmax(y, axis=1), lambda: math_ops.cast(backend.reshape(y, (-1,)), dtypes.int64)) diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py index de686f2cb61..157a0c77ebf 100644 --- a/tensorflow/python/keras/engine/training_utils.py +++ b/tensorflow/python/keras/engine/training_utils.py @@ -40,7 +40,6 @@ from tensorflow.python.framework import composite_tensor_utils from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops -from tensorflow.python.framework import smart_cond from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_spec from tensorflow.python.framework import tensor_util @@ -48,6 +47,7 @@ from tensorflow.python.keras import backend as K from tensorflow.python.keras import callbacks as cbks from tensorflow.python.keras import losses from tensorflow.python.keras import metrics as metrics_module +from tensorflow.python.keras.utils import control_flow_util from tensorflow.python.keras.utils import data_utils from tensorflow.python.keras.utils import generic_utils from tensorflow.python.keras.utils import losses_utils @@ -997,11 +997,10 @@ def standardize_weights(y, weight_vector[:] = np.nan weight_vector[keys] = values - y_classes = smart_cond.smart_cond( + y_classes = control_flow_util.smart_cond( len(y.shape.as_list()) == 2 and K.shape(y)[1] > 1, lambda: K.argmax(y, axis=1), - lambda: math_ops.cast(K.reshape(y, (-1,)), dtypes.int64) - ) + lambda: math_ops.cast(K.reshape(y, (-1,)), dtypes.int64)) class_sample_weight = array_ops.gather(weight_vector, y_classes) gen_array_ops.check_numerics( class_sample_weight, diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py index 37907b0ecea..36ac087ef64 100644 --- a/tensorflow/python/keras/layers/core.py +++ b/tensorflow/python/keras/layers/core.py @@ -43,6 +43,7 @@ from tensorflow.python.keras.engine import keras_tensor from tensorflow.python.keras.engine.base_layer import Layer from tensorflow.python.keras.engine.input_spec import InputSpec from tensorflow.python.keras.layers.ops import core as core_ops +from tensorflow.python.keras.utils import control_flow_util from tensorflow.python.keras.utils import conv_utils from tensorflow.python.keras.utils import generic_utils from tensorflow.python.keras.utils import tf_utils @@ -213,9 +214,8 @@ class Dropout(Layer): seed=self.seed, rate=self.rate) - output = tf_utils.smart_cond(training, - dropped_inputs, - lambda: array_ops.identity(inputs)) + output = control_flow_util.smart_cond(training, dropped_inputs, + lambda: array_ops.identity(inputs)) return output def compute_output_shape(self, input_shape): diff --git a/tensorflow/python/keras/layers/dense_attention.py b/tensorflow/python/keras/layers/dense_attention.py index 494f1c1c464..d3f204d661b 100644 --- a/tensorflow/python/keras/layers/dense_attention.py +++ b/tensorflow/python/keras/layers/dense_attention.py @@ -27,7 +27,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.keras import backend as K from tensorflow.python.keras.engine.base_layer import Layer -from tensorflow.python.keras.utils import tf_utils +from tensorflow.python.keras.utils import control_flow_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops @@ -127,10 +127,8 @@ class BaseDenseAttention(Layer): def dropped_weights(): return nn.dropout(weights, rate=self.dropout) - weights = tf_utils.smart_cond( - training, - dropped_weights, - lambda: array_ops.identity(weights)) + weights = control_flow_util.smart_cond(training, dropped_weights, + lambda: array_ops.identity(weights)) return math_ops.matmul(weights, value) # TODO(b/125916026): Consider exposing a __call__ method with named args. diff --git a/tensorflow/python/keras/layers/normalization.py b/tensorflow/python/keras/layers/normalization.py index e5723a3ef98..fd77cddb08d 100644 --- a/tensorflow/python/keras/layers/normalization.py +++ b/tensorflow/python/keras/layers/normalization.py @@ -28,7 +28,7 @@ from tensorflow.python.keras import initializers from tensorflow.python.keras import regularizers from tensorflow.python.keras.engine.base_layer import Layer from tensorflow.python.keras.engine.input_spec import InputSpec -from tensorflow.python.keras.utils import tf_utils +from tensorflow.python.keras.utils import control_flow_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops @@ -568,21 +568,22 @@ class BatchNormalizationBase(Layer): train_op = _fused_batch_norm_training if use_fused_avg_updates and input_batch_size is not None: # pylint: disable=g-long-lambda - train_op = lambda: tf_utils.smart_cond(input_batch_size > 0, - _fused_batch_norm_training, - _fused_batch_norm_training_empty) + train_op = lambda: control_flow_util.smart_cond( + input_batch_size > 0, _fused_batch_norm_training, + _fused_batch_norm_training_empty) # pylint: enable=g-long-lambda - output, mean, variance = tf_utils.smart_cond(training, train_op, - _fused_batch_norm_inference) + output, mean, variance = control_flow_util.smart_cond( + training, train_op, _fused_batch_norm_inference) variance = _maybe_add_or_remove_bessels_correction(variance, remove=True) - training_value = tf_utils.constant_value(training) + training_value = control_flow_util.smart_constant_value(training) if training_value or training_value is None: if not use_fused_avg_updates: if training_value is None: - momentum = tf_utils.smart_cond(training, lambda: self.momentum, - lambda: 1.0) + momentum = control_flow_util.smart_cond(training, + lambda: self.momentum, + lambda: 1.0) else: momentum = ops.convert_to_tensor_v2(self.momentum) @@ -635,9 +636,10 @@ class BatchNormalizationBase(Layer): d = math_ops.maximum(d, -dmax) d = math_ops.minimum(d, dmax) # When not training, use r=1, d=0. - r = tf_utils.smart_cond(training, lambda: r, lambda: array_ops.ones_like(r)) - d = tf_utils.smart_cond(training, lambda: d, - lambda: array_ops.zeros_like(d)) + r = control_flow_util.smart_cond(training, lambda: r, + lambda: array_ops.ones_like(r)) + d = control_flow_util.smart_cond(training, lambda: d, + lambda: array_ops.zeros_like(d)) def _update_renorm_variable(var, value, inputs_size): """Updates a moving average and weight, returns the unbiased value.""" @@ -652,7 +654,7 @@ class BatchNormalizationBase(Layer): def _fake_update(): return array_ops.identity(var) - return tf_utils.smart_cond(training, _do_update, _fake_update) + return control_flow_util.smart_cond(training, _do_update, _fake_update) # TODO(yuefengz): colocate the operations update_new_mean = _update_renorm_variable(self.renorm_mean, mean, @@ -760,17 +762,17 @@ class BatchNormalizationBase(Layer): return (scale, offset) # Determine a boolean value for `training`: could be True, False, or None. - training_value = tf_utils.constant_value(training) + training_value = control_flow_util.smart_constant_value(training) if training_value == False: # pylint: disable=singleton-comparison,g-explicit-bool-comparison mean, variance = self.moving_mean, self.moving_variance else: if self.adjustment: adj_scale, adj_bias = self.adjustment(array_ops.shape(inputs)) # Adjust only during training. - adj_scale = tf_utils.smart_cond(training, lambda: adj_scale, - lambda: array_ops.ones_like(adj_scale)) - adj_bias = tf_utils.smart_cond(training, lambda: adj_bias, - lambda: array_ops.zeros_like(adj_bias)) + adj_scale = control_flow_util.smart_cond( + training, lambda: adj_scale, lambda: array_ops.ones_like(adj_scale)) + adj_bias = control_flow_util.smart_cond( + training, lambda: adj_bias, lambda: array_ops.zeros_like(adj_bias)) scale, offset = _compose_transforms(adj_scale, adj_bias, scale, offset) # Some of the computations here are not necessary when training==False @@ -784,9 +786,9 @@ class BatchNormalizationBase(Layer): moving_mean = self.moving_mean moving_variance = self.moving_variance - mean = tf_utils.smart_cond(training, lambda: mean, - lambda: ops.convert_to_tensor_v2(moving_mean)) - variance = tf_utils.smart_cond( + mean = control_flow_util.smart_cond( + training, lambda: mean, lambda: ops.convert_to_tensor_v2(moving_mean)) + variance = control_flow_util.smart_cond( training, lambda: variance, lambda: ops.convert_to_tensor_v2(moving_variance)) @@ -826,7 +828,7 @@ class BatchNormalizationBase(Layer): def mean_update(): true_branch = lambda: _do_update(self.moving_mean, new_mean) false_branch = lambda: self.moving_mean - return tf_utils.smart_cond(training, true_branch, false_branch) + return control_flow_util.smart_cond(training, true_branch, false_branch) def variance_update(): """Update the moving variance.""" @@ -848,7 +850,7 @@ class BatchNormalizationBase(Layer): true_branch = lambda: _do_update(self.moving_variance, new_variance) false_branch = lambda: self.moving_variance - return tf_utils.smart_cond(training, true_branch, false_branch) + return control_flow_util.smart_cond(training, true_branch, false_branch) self.add_update(mean_update) self.add_update(variance_update) diff --git a/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py index 9b7772d02b1..87a18db31f3 100644 --- a/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py +++ b/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py @@ -29,7 +29,7 @@ from tensorflow.python.keras import backend as K from tensorflow.python.keras.engine import base_preprocessing_layer from tensorflow.python.keras.engine.base_preprocessing_layer import PreprocessingLayer from tensorflow.python.keras.engine.input_spec import InputSpec -from tensorflow.python.keras.utils import tf_utils +from tensorflow.python.keras.utils import control_flow_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops @@ -252,11 +252,11 @@ class RandomCrop(PreprocessingLayer): input_width_t = input_shape[W_AXIS] ratio_cond = (input_height_t / input_width_t > (self.height / self.width)) # pylint: disable=g-long-lambda - resized_height = tf_utils.smart_cond( + resized_height = control_flow_util.smart_cond( ratio_cond, lambda: math_ops.cast(self.width * input_height_t / input_width_t, input_height_t.dtype), lambda: self.height) - resized_width = tf_utils.smart_cond( + resized_width = control_flow_util.smart_cond( ratio_cond, lambda: self.width, lambda: math_ops.cast(self.height * input_width_t / input_height_t, input_width_t.dtype)) @@ -273,8 +273,8 @@ class RandomCrop(PreprocessingLayer): outputs = array_ops.slice(resized_inputs, bbox_begin, bbox_size) return outputs - output = tf_utils.smart_cond(training, random_cropped_inputs, - resize_and_center_cropped_inputs) + output = control_flow_util.smart_cond(training, random_cropped_inputs, + resize_and_center_cropped_inputs) original_shape = inputs.shape.as_list() batch_size, num_channels = original_shape[0], original_shape[3] output_shape = [batch_size] + [self.height, self.width] + [num_channels] @@ -414,8 +414,8 @@ class RandomFlip(PreprocessingLayer): flipped_outputs, self.seed) return flipped_outputs - output = tf_utils.smart_cond(training, random_flipped_inputs, - lambda: inputs) + output = control_flow_util.smart_cond(training, random_flipped_inputs, + lambda: inputs) output.set_shape(inputs.shape) return output @@ -561,8 +561,8 @@ class RandomTranslation(PreprocessingLayer): interpolation=self.interpolation, fill_mode=self.fill_mode) - output = tf_utils.smart_cond(training, random_translated_inputs, - lambda: inputs) + output = control_flow_util.smart_cond(training, random_translated_inputs, + lambda: inputs) output.set_shape(inputs.shape) return output @@ -836,8 +836,8 @@ class RandomRotation(PreprocessingLayer): fill_mode=self.fill_mode, interpolation=self.interpolation) - output = tf_utils.smart_cond(training, random_rotated_inputs, - lambda: inputs) + output = control_flow_util.smart_cond(training, random_rotated_inputs, + lambda: inputs) output.set_shape(inputs.shape) return output @@ -987,8 +987,8 @@ class RandomZoom(PreprocessingLayer): fill_mode=self.fill_mode, interpolation=self.interpolation) - output = tf_utils.smart_cond(training, random_zoomed_inputs, - lambda: inputs) + output = control_flow_util.smart_cond(training, random_zoomed_inputs, + lambda: inputs) output.set_shape(inputs.shape) return output @@ -1103,8 +1103,8 @@ class RandomContrast(PreprocessingLayer): return image_ops.random_contrast(inputs, 1. - self.lower, 1. + self.upper, self.seed) - output = tf_utils.smart_cond(training, random_contrasted_inputs, - lambda: inputs) + output = control_flow_util.smart_cond(training, random_contrasted_inputs, + lambda: inputs) output.set_shape(inputs.shape) return output @@ -1201,7 +1201,8 @@ class RandomHeight(PreprocessingLayer): output.set_shape(output_shape) return output - return tf_utils.smart_cond(training, random_height_inputs, lambda: inputs) + return control_flow_util.smart_cond(training, random_height_inputs, + lambda: inputs) def compute_output_shape(self, input_shape): input_shape = tensor_shape.TensorShape(input_shape).as_list() @@ -1300,7 +1301,8 @@ class RandomWidth(PreprocessingLayer): output.set_shape(output_shape) return output - return tf_utils.smart_cond(training, random_width_inputs, lambda: inputs) + return control_flow_util.smart_cond(training, random_width_inputs, + lambda: inputs) def compute_output_shape(self, input_shape): input_shape = tensor_shape.TensorShape(input_shape).as_list() diff --git a/tensorflow/python/keras/losses.py b/tensorflow/python/keras/losses.py index 36402489feb..a149418fdd8 100644 --- a/tensorflow/python/keras/losses.py +++ b/tensorflow/python/keras/losses.py @@ -26,9 +26,9 @@ from tensorflow.python.autograph.core import ag_ctx from tensorflow.python.autograph.impl import api as autograph from tensorflow.python.distribute import distribution_strategy_context from tensorflow.python.framework import ops -from tensorflow.python.framework import smart_cond from tensorflow.python.framework import tensor_util from tensorflow.python.keras import backend as K +from tensorflow.python.keras.utils import control_flow_util from tensorflow.python.keras.utils import losses_utils from tensorflow.python.keras.utils import tf_utils from tensorflow.python.keras.utils.generic_utils import deserialize_keras_object @@ -1313,8 +1313,9 @@ def _maybe_convert_labels(y_true): # Convert the binary labels to -1 or 1. return 2. * y_true - 1. - updated_y_true = smart_cond.smart_cond(is_binary, - _convert_binary_labels, lambda: y_true) + updated_y_true = control_flow_util.smart_cond(is_binary, + _convert_binary_labels, + lambda: y_true) return updated_y_true @@ -1526,8 +1527,8 @@ def categorical_crossentropy(y_true, num_classes = math_ops.cast(array_ops.shape(y_true)[-1], y_pred.dtype) return y_true * (1.0 - label_smoothing) + (label_smoothing / num_classes) - y_true = smart_cond.smart_cond(label_smoothing, - _smooth_labels, lambda: y_true) + y_true = control_flow_util.smart_cond(label_smoothing, _smooth_labels, + lambda: y_true) return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits) @@ -1595,8 +1596,8 @@ def binary_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0): def _smooth_labels(): return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing - y_true = smart_cond.smart_cond(label_smoothing, - _smooth_labels, lambda: y_true) + y_true = control_flow_util.smart_cond(label_smoothing, _smooth_labels, + lambda: y_true) return K.mean( K.binary_crossentropy(y_true, y_pred, from_logits=from_logits), axis=-1) diff --git a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py index c2ae3b375d4..55737a9da9c 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py +++ b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py @@ -24,11 +24,11 @@ from tensorflow.python.distribute import one_device_strategy from tensorflow.python.distribute import tpu_strategy from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import smart_cond from tensorflow.python.keras import backend from tensorflow.python.keras import optimizers from tensorflow.python.keras.mixed_precision.experimental import loss_scale as keras_loss_scale_module from tensorflow.python.keras.optimizer_v2 import optimizer_v2 +from tensorflow.python.keras.utils import control_flow_util from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.training.experimental import mixed_precision @@ -406,9 +406,8 @@ class LossScaleOptimizer(_DelegatingTrackableMixin, optimizer_v2.OptimizerV2): # DistributionStrategy does not support having a cond in a replica context # with a branch that calls `merge_call`, and self._optimizer.apply_gradients # calls `merge_call`. - maybe_apply_op = smart_cond.smart_cond(should_apply_grads, - apply_fn, - do_not_apply_fn) + maybe_apply_op = control_flow_util.smart_cond(should_apply_grads, apply_fn, + do_not_apply_fn) return control_flow_ops.group(maybe_apply_op, loss_scale_update_op) def _apply_gradients(self, grads, wrapped_vars, name, diff --git a/tensorflow/python/keras/saving/saved_model/saved_model_test.py b/tensorflow/python/keras/saving/saved_model/saved_model_test.py index e76e524f93b..1dff9a2e8cf 100644 --- a/tensorflow/python/keras/saving/saved_model/saved_model_test.py +++ b/tensorflow/python/keras/saving/saved_model/saved_model_test.py @@ -51,8 +51,8 @@ from tensorflow.python.keras import testing_utils from tensorflow.python.keras.feature_column.dense_features import DenseFeatures from tensorflow.python.keras.saving.saved_model import load as keras_load from tensorflow.python.keras.saving.saved_model import save_impl as keras_save +from tensorflow.python.keras.utils import control_flow_util from tensorflow.python.keras.utils import generic_utils -from tensorflow.python.keras.utils import tf_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops @@ -75,8 +75,8 @@ class LayerWithLearningPhase(keras.engine.base_layer.Layer): def call(self, x, training=None): if training is None: training = keras.backend.learning_phase() - output = tf_utils.smart_cond( - training, lambda: x * 0, lambda: array_ops.identity(x)) + output = control_flow_util.smart_cond(training, lambda: x * 0, + lambda: array_ops.identity(x)) if not context.executing_eagerly(): output._uses_learning_phase = True # pylint: disable=protected-access return output @@ -530,14 +530,14 @@ class TestModelSavingAndLoadingV2(keras_parameterized.TestCase): class LayerWithTrainingRequiredArg(keras.engine.base_layer.Layer): def call(self, inputs, training): - return tf_utils.smart_cond( - training, lambda: inputs * 0, lambda: array_ops.identity(inputs)) + return control_flow_util.smart_cond(training, lambda: inputs * 0, + lambda: array_ops.identity(inputs)) class LayerWithTrainingDefaultTrue(keras.engine.base_layer.Layer): def call(self, inputs, training=True): - return tf_utils.smart_cond( - training, lambda: inputs * 0, lambda: array_ops.identity(inputs)) + return control_flow_util.smart_cond(training, lambda: inputs * 0, + lambda: array_ops.identity(inputs)) class Model(keras.models.Model): diff --git a/tensorflow/python/keras/saving/saved_model/utils.py b/tensorflow/python/keras/saving/saved_model/utils.py index 9c1926b11ab..82547cc393d 100644 --- a/tensorflow/python/keras/saving/saved_model/utils.py +++ b/tensorflow/python/keras/saving/saved_model/utils.py @@ -23,7 +23,7 @@ import types from tensorflow.python.eager import context from tensorflow.python.keras import backend as K from tensorflow.python.keras.engine import base_layer_utils -from tensorflow.python.keras.utils import tf_utils +from tensorflow.python.keras.utils import control_flow_util from tensorflow.python.keras.utils.generic_utils import LazyLoader from tensorflow.python.training.tracking import layer_utils as trackable_layer_utils from tensorflow.python.util import tf_decorator @@ -164,9 +164,8 @@ def maybe_add_training_arg( set_training_arg(training, training_arg_index, args, kwargs) return wrapped_call(*args, **kwargs) - return tf_utils.smart_cond( - training, - lambda: replace_training_and_call(True), + return control_flow_util.smart_cond( + training, lambda: replace_training_and_call(True), lambda: replace_training_and_call(False)) # Create arg spec for decorated function. If 'training' is not defined in the diff --git a/tensorflow/python/keras/saving/saved_model_experimental_test.py b/tensorflow/python/keras/saving/saved_model_experimental_test.py index 527d2721481..f4b91298d10 100644 --- a/tensorflow/python/keras/saving/saved_model_experimental_test.py +++ b/tensorflow/python/keras/saving/saved_model_experimental_test.py @@ -35,8 +35,8 @@ from tensorflow.python.keras.engine import training as model_lib from tensorflow.python.keras.optimizer_v2 import adadelta from tensorflow.python.keras.optimizer_v2 import rmsprop from tensorflow.python.keras.saving import saved_model_experimental as keras_saved_model +from tensorflow.python.keras.utils import control_flow_util from tensorflow.python.keras.utils import mode_keys -from tensorflow.python.keras.utils import tf_utils from tensorflow.python.ops import array_ops from tensorflow.python.platform import test from tensorflow.python.saved_model import loader_impl @@ -209,8 +209,8 @@ class LayerWithLearningPhase(keras.engine.base_layer.Layer): def call(self, x, training=None): if training is None: training = keras.backend.learning_phase() - output = tf_utils.smart_cond( - training, lambda: x * 0, lambda: array_ops.identity(x)) + output = control_flow_util.smart_cond(training, lambda: x * 0, + lambda: array_ops.identity(x)) if not context.executing_eagerly(): output._uses_learning_phase = True # pylint: disable=protected-access return output diff --git a/tensorflow/python/keras/utils/control_flow_util.py b/tensorflow/python/keras/utils/control_flow_util.py index d30856b0342..c17dc30b0a2 100644 --- a/tensorflow/python/keras/utils/control_flow_util.py +++ b/tensorflow/python/keras/utils/control_flow_util.py @@ -22,6 +22,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import control_flow_ops + def InXlaContext(graph): ctxt = graph._get_control_flow_context() # pylint: disable=protected-access @@ -81,3 +85,63 @@ def GetContainingXLAContext(ctxt): if ctxt.IsXLAContext(): return ctxt ctxt = ctxt.outer_context return None + + +def smart_cond(pred, true_fn=None, false_fn=None, name=None): # pylint: disable=invalid-name + """Return either `true_fn()` if predicate `pred` is true else `false_fn()`. + + If `pred` is a bool or has a constant value, we return either `true_fn()` + or `false_fn()`, otherwise we use `tf.cond` to dynamically route to both. + + Arguments: + pred: A scalar determining whether to return the result of `true_fn` or + `false_fn`. + true_fn: The callable to be performed if pred is true. + false_fn: The callable to be performed if pred is false. + name: Optional name prefix when using `tf.cond`. + + Returns: + Tensors returned by the call to either `true_fn` or `false_fn`. + + Raises: + TypeError: If `true_fn` or `false_fn` is not callable. + """ + if not callable(true_fn): + raise TypeError("`true_fn` must be callable.") + if not callable(false_fn): + raise TypeError("`false_fn` must be callable.") + + pred_value = smart_constant_value(pred) + if pred_value is not None: + if pred_value: + return true_fn() + else: + return false_fn() + else: + return control_flow_ops.cond( + pred, true_fn=true_fn, false_fn=false_fn, name=name) + + +def smart_constant_value(pred): # pylint: disable=invalid-name + """Return the bool value for `pred`, or None if `pred` had a dynamic value. + + Arguments: + pred: A scalar, either a Python bool or tensor. + + Returns: + True or False if `pred` has a constant boolean value, None otherwise. + + Raises: + TypeError: If `pred` is not a Tensor or bool. + """ + if isinstance(pred, ops.Tensor): + pred_value = tensor_util.constant_value(pred) + elif pred in {0, 1}: # Accept 1/0 as valid boolean values + pred_value = bool(pred) + elif isinstance(pred, bool): + pred_value = pred + else: + raise TypeError("`pred` must be a Tensor, or a Python bool, or 1 or 0. " + "Found instead: %s" % type(pred)) + + return pred_value diff --git a/tensorflow/python/keras/utils/tf_utils.py b/tensorflow/python/keras/utils/tf_utils.py index 3bc38d44398..51cb1acc899 100644 --- a/tensorflow/python/keras/utils/tf_utils.py +++ b/tensorflow/python/keras/utils/tf_utils.py @@ -25,13 +25,11 @@ from tensorflow.python.data.experimental.ops import cardinality from tensorflow.python.eager import context from tensorflow.python.framework import composite_tensor from tensorflow.python.framework import ops -from tensorflow.python.framework import smart_cond as smart_module from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_spec from tensorflow.python.framework import tensor_util from tensorflow.python.framework import type_spec from tensorflow.python.keras import backend as K -from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables from tensorflow.python.ops.ragged import ragged_tensor @@ -41,58 +39,6 @@ from tensorflow.python.util import object_identity from tensorflow.python.util import tf_contextlib -def smart_cond(pred, true_fn=None, false_fn=None, name=None): - """Return either `true_fn()` if predicate `pred` is true else `false_fn()`. - - If `pred` is a bool or has a constant value, we return either `true_fn()` - or `false_fn()`, otherwise we use `tf.cond` to dynamically route to both. - - Arguments: - pred: A scalar determining whether to return the result of `true_fn` or - `false_fn`. - true_fn: The callable to be performed if pred is true. - false_fn: The callable to be performed if pred is false. - name: Optional name prefix when using `tf.cond`. - - Returns: - Tensors returned by the call to either `true_fn` or `false_fn`. - - Raises: - TypeError: If `true_fn` or `false_fn` is not callable. - """ - if isinstance(pred, variables.Variable): - return control_flow_ops.cond( - pred, true_fn=true_fn, false_fn=false_fn, name=name) - return smart_module.smart_cond( - pred, true_fn=true_fn, false_fn=false_fn, name=name) - - -def constant_value(pred): - """Return the bool value for `pred`, or None if `pred` had a dynamic value. - - Arguments: - pred: A scalar, either a Python bool or a TensorFlow boolean variable - or tensor, or the Python integer 1 or 0. - - Returns: - True or False if `pred` has a constant boolean value, None otherwise. - - Raises: - TypeError: If `pred` is not a Variable, Tensor or bool, or Python - integer 1 or 0. - """ - # Allow integer booleans. - if isinstance(pred, int): - if pred == 1: - pred = True - elif pred == 0: - pred = False - - if isinstance(pred, variables.Variable): - return None - return smart_module.smart_constant_value(pred) - - def is_tensor_or_tensor_list(v): v = nest.flatten(v) if v and isinstance(v[0], ops.Tensor): From 933c88197f6c470828eaeb6d92442bb81694718a Mon Sep 17 00:00:00 2001 From: Robert David Date: Wed, 29 Jul 2020 15:13:10 -0700 Subject: [PATCH 1664/2522] Fix output tensor size. PiperOrigin-RevId: 323878012 Change-Id: Iee10c39a1cbf4b6c83a39015aa19d9867356619a --- tensorflow/lite/kernels/lstm_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/kernels/lstm_test.cc b/tensorflow/lite/kernels/lstm_test.cc index 023df0e67b8..7d51cb39ae9 100644 --- a/tensorflow/lite/kernels/lstm_test.cc +++ b/tensorflow/lite/kernels/lstm_test.cc @@ -124,7 +124,7 @@ class LSTMOpModel : public SingleOpModel { } } - output_ = AddOutput({TensorType_FLOAT32, {n_output}}); + output_ = AddOutput({TensorType_FLOAT32, {n_batch, n_output}}); // TODO(b/161825581): Add tests where cell_clip and/or proj_clip is not the // default 0. From 90fee7bb9d14e769b13ebc90fec6ba2e1b9a850c Mon Sep 17 00:00:00 2001 From: Pete Warden Date: Wed, 29 Jul 2020 15:40:48 -0700 Subject: [PATCH 1665/2522] Speed up Arduino build presubmit and use mirror for dependency downloads The changes to slim down the number of build targets reduces the overall time for the Arduino build script by 4 minutes on my workstation. The switch to using our own mirror to cache download dependencies should also help with build speed and reliability, though this is harder to measure since it depends on network conditions. PiperOrigin-RevId: 323883386 Change-Id: I42aca54177e6bb2cb21c8451f86e630fbc811c1e --- .../lite/micro/tools/ci_build/test_all.sh | 7 +-- tensorflow/lite/micro/tools/make/Makefile | 6 +-- .../tools/make/third_party_downloads.inc | 49 ++++++++++--------- 3 files changed, 31 insertions(+), 31 deletions(-) diff --git a/tensorflow/lite/micro/tools/ci_build/test_all.sh b/tensorflow/lite/micro/tools/ci_build/test_all.sh index 345bf33e216..e0cb0b325ef 100755 --- a/tensorflow/lite/micro/tools/ci_build/test_all.sh +++ b/tensorflow/lite/micro/tools/ci_build/test_all.sh @@ -49,10 +49,7 @@ tensorflow/lite/micro/tools/ci_build/test_sparkfun.sh echo "Running stm32f4 tests at `date`" tensorflow/lite/micro/tools/ci_build/test_stm32f4.sh -# TODO(b/158607483): Disabling Arduino because it is slow (~20mins) and has also -# become very flaky from the download of cifar-10-binary.tar.gz which is 160 MB -# and has started failing a lot. -# echo "Running Arduino tests at `date`" -# tensorflow/lite/micro/tools/ci_build/test_arduino.sh +echo "Running Arduino tests at `date`" +tensorflow/lite/micro/tools/ci_build/test_arduino.sh echo "Finished all micro tests at `date`" diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile index 6c6e89561f2..62510159547 100644 --- a/tensorflow/lite/micro/tools/make/Makefile +++ b/tensorflow/lite/micro/tools/make/Makefile @@ -368,11 +368,9 @@ build: $(MICROLITE_BUILD_TARGETS) generate_projects: $(ALL_PROJECT_TARGETS) -generate_non_kernel_projects: $(filter-out generate_kernel%,$(ALL_PROJECT_TARGETS)) +ARDUINO_PROJECT_TARGETS := $(foreach TARGET,$(ALL_PROJECT_TARGETS),$(if $(findstring _arduino,$(TARGET)),$(TARGET),)) -generate_non_test_projects: $(filter-out %_test%,$(ALL_PROJECT_TARGETS)) - -generate_arduino_zip: generate_non_kernel_projects $(ARDUINO_LIBRARY_ZIPS) +generate_arduino_zip: $(ARDUINO_PROJECT_TARGETS) $(ARDUINO_LIBRARY_ZIPS) python tensorflow/lite/micro/tools/make/merge_arduino_zips.py $(PRJDIR)/tensorflow_lite.zip $(ARDUINO_LIBRARY_ZIPS) # Gets rid of all generated files. diff --git a/tensorflow/lite/micro/tools/make/third_party_downloads.inc b/tensorflow/lite/micro/tools/make/third_party_downloads.inc index 0a85995efd0..7c6dc211963 100644 --- a/tensorflow/lite/micro/tools/make/third_party_downloads.inc +++ b/tensorflow/lite/micro/tools/make/third_party_downloads.inc @@ -1,65 +1,70 @@ # Add URLs and MD5 checksums for third-party libraries here. +# We use mirror.tensorflow.org to cache copies of third-party files, +# but this is just an optimization applied manually by TensorFlow +# engineers, so add non-mirrored URLs if you need to update this +# in a pull request and we'll periodically copy them and update +# the URL. GEMMLOWP_URL := "https://github.com/google/gemmlowp/archive/719139ce755a0f31cbf1c37f7f98adcc7fc9f425.zip" GEMMLOWP_MD5 := "7e8191b24853d75de2af87622ad293ba" ifeq ($(HOST_OS),windows) - FLATBUFFERS_URL := "https://github.com/google/flatbuffers/archive/v1.12.0.zip" + FLATBUFFERS_URL := "http://mirror.tensorflow.org/github.com/google/flatbuffers/archive/v1.12.0.zip" FLATBUFFERS_MD5 := "a1afdbf114dec01a861c1b8c917d0fc7" else - FLATBUFFERS_URL := "https://github.com/google/flatbuffers/archive/v1.12.0.tar.gz" + FLATBUFFERS_URL := "http://mirror.tensorflow.org/github.com/google/flatbuffers/archive/v1.12.0.tar.gz" FLATBUFFERS_MD5 := "c62ffefb3d4548b127cca14ce047f16c" endif ifeq ($(HOST_OS),osx) - GCC_EMBEDDED_URL := "https://developer.arm.com/-/media/Files/downloads/gnu-rm/7-2018q2/gcc-arm-none-eabi-7-2018-q2-update-mac.tar.bz2" + GCC_EMBEDDED_URL := "http://mirror.tensorflow.org/developer.arm.com/-/media/Files/downloads/gnu-rm/7-2018q2/gcc-arm-none-eabi-7-2018-q2-update-mac.tar.bz2" GCC_EMBEDDED_MD5 := "a66be9828cf3c57d7d21178e07cd8904" else ifeq ($(HOST_OS),windows) - GCC_EMBEDDED_URL := "https://developer.arm.com/-/media/Files/downloads/gnu-rm/7-2018q2/gcc-arm-none-eabi-7-2018-q2-update-win32.zip" + GCC_EMBEDDED_URL := "http://mirror.tensorflow.org/developer.arm.com/-/media/Files/downloads/gnu-rm/7-2018q2/gcc-arm-none-eabi-7-2018-q2-update-win32.zip" GCC_EMBEDDED_MD5 := "bc8ae26d7c429f30d583a605a4bcf9bc" else - GCC_EMBEDDED_URL := "https://developer.arm.com/-/media/Files/downloads/gnu-rm/7-2018q2/gcc-arm-none-eabi-7-2018-q2-update-linux.tar.bz2" + GCC_EMBEDDED_URL := "http://mirror.tensorflow.org/developer.arm.com/-/media/Files/downloads/gnu-rm/7-2018q2/gcc-arm-none-eabi-7-2018-q2-update-linux.tar.bz2" GCC_EMBEDDED_MD5 := "299ebd3f1c2c90930d28ab82e5d8d6c0" endif -LEON_BCC2_URL := "https://www.gaisler.com/anonftp/bcc2/bin/bcc-2.0.7-gcc-linux64.tar.xz" +LEON_BCC2_URL := "http://mirror.tensorflow.org/www.gaisler.com/anonftp/bcc2/bin/bcc-2.0.7-gcc-linux64.tar.xz" LEON_BCC2_MD5 := "cdf78082be4882da2a92c9baa82fe765" -TSIM_URL := "https://www.gaisler.com/anonftp/tsim/tsim-eval-2.0.63.tar.gz" +TSIM_URL := "http://mirror.tensorflow.org/www.gaisler.com/anonftp/tsim/tsim-eval-2.0.63.tar.gz" TSIM_MD5 := "afa0095d3ed989a949e1467f94e41d2f" -CMSIS_URL := "https://github.com/ARM-software/CMSIS_5/archive/9daaa7a34a5627a24009462b8fa8413a00c4fdb1.zip" +CMSIS_URL := "http://mirror.tensorflow.org/github.com/ARM-software/CMSIS_5/archive/9daaa7a34a5627a24009462b8fa8413a00c4fdb1.zip" CMSIS_MD5 := "b988dacff8925ffffcb7e5079cc713b7" -AM_SDK_URL := "http://s3.asia.ambiqmicro.com/downloads/AmbiqSuite-Rel2.2.0.zip" +AM_SDK_URL := "http://mirror.tensorflow.org/s3.asia.ambiqmicro.com/downloads/AmbiqSuite-Rel2.2.0.zip" AM_SDK_MD5 := "7605fa2d4d97e6bb7a1190c92b66b597" AM_SDK_DEST := AmbiqSuite-Rel2.2.0 -SF_BSPS_URL := "https://github.com/sparkfun/SparkFun_Apollo3_AmbiqSuite_BSPs/archive/v0.0.7.zip" +SF_BSPS_URL := "http://mirror.tensorflow.org/github.com/sparkfun/SparkFun_Apollo3_AmbiqSuite_BSPs/archive/v0.0.7.zip" SF_BSPS_MD5 := "34199f7e754735661d1c8a70a40ca7a3" SF_BSPS_DEST := boards_sfe -STM32_BARE_LIB_URL := "https://github.com/google/stm32_bare_lib/archive/c07d611fb0af58450c5a3e0ab4d52b47f99bc82d.zip" +STM32_BARE_LIB_URL := "http://mirror.tensorflow.org/github.com/google/stm32_bare_lib/archive/c07d611fb0af58450c5a3e0ab4d52b47f99bc82d.zip" STM32_BARE_LIB_MD5 := "282bff40d4d0b92278fd123a3b6e3123" ifeq ($(HOST_OS),osx) - RISCV_TOOLCHAIN_URL := "https://static.dev.sifive.com/dev-tools/riscv64-unknown-elf-gcc-8.1.0-2019.01.0-x86_64-apple-darwin.tar.gz" + RISCV_TOOLCHAIN_URL := "http://mirror.tensorflow.org/static.dev.sifive.com/dev-tools/riscv64-unknown-elf-gcc-8.1.0-2019.01.0-x86_64-apple-darwin.tar.gz" RISCV_TOOLCHAIN_MD5 := "2ac2fa00618b9ab7fa0c7d0ec173de94" else - RISCV_TOOLCHAIN_URL := "https://static.dev.sifive.com/dev-tools/riscv64-unknown-elf-gcc-20181030-x86_64-linux-ubuntu14.tar.gz" + RISCV_TOOLCHAIN_URL := "http://mirror.tensorflow.org/static.dev.sifive.com/dev-tools/riscv64-unknown-elf-gcc-20181030-x86_64-linux-ubuntu14.tar.gz" RISCV_TOOLCHAIN_MD5="2366b7afe36a54dc94fb0ff8a0830934" endif -SIFIVE_FE310_LIB_URL := "https://github.com/sifive/freedom-e-sdk/archive/baeeb8fd497a99b3c141d7494309ec2e64f19bdf.zip" +SIFIVE_FE310_LIB_URL := "http://mirror.tensorflow.org/github.com/sifive/freedom-e-sdk/archive/baeeb8fd497a99b3c141d7494309ec2e64f19bdf.zip" SIFIVE_FE310_LIB_MD5 := "06ee24c4956f8e21670ab3395861fe64" -KISSFFT_URL="https://github.com/mborgerding/kissfft/archive/v130.zip" +KISSFFT_URL="http://mirror.tensorflow.org/github.com/mborgerding/kissfft/archive/v130.zip" KISSFFT_MD5="438ba1fef5783cc5f5f201395cc477ca" RUY_URL="https://github.com/google/ruy/archive/5bb02fbf90824c2eb6cd7418f766c593106a332b.zip" RUY_MD5="c720b1743360259ac45809a321f8f26c" -CIFAR10_DATASET_URL="https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz" +CIFAR10_DATASET_URL="http://mirror.tensorflow.org/www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz" CIFAR10_DATASET_MD5="c32a1d4ab5d03f1284b67883e8d87530" IMAGE_RECOGNITION_MODEL_URL := "https://storage.googleapis.com/download.tensorflow.org/models/tflite/cifar_image_recognition_model_2020_05_27.zip" @@ -71,22 +76,22 @@ PERSON_MODEL_MD5 := "55b85f76e2995153e660391d4a209ef1" PERSON_MODEL_INT8_URL := "https://storage.googleapis.com/download.tensorflow.org/data/tf_lite_micro_person_data_int8_grayscale_2020_06_23.zip" PERSON_MODEL_INT8_MD5 := "9b5b6d4677dd0a91b1bb992d1c4c0417" -EMBARC_MLI_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/archive/58284867ca52d1f43b25045e8601999d7359d986.zip" +EMBARC_MLI_URL := "http://mirror.tensorflow.org/github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/archive/58284867ca52d1f43b25045e8601999d7359d986.zip" EMBARC_MLI_MD5 := "2bf4982a327fdaa9d475803ce014d1ef" -EMBARC_MLI_PRE_COMPILED_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/releases/download/Release_1.1_RC2/embARC_MLI_package.zip" +EMBARC_MLI_PRE_COMPILED_URL := "http://mirror.tensorflow.org/github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/releases/download/Release_1.1_RC2/embARC_MLI_package.zip" EMBARC_MLI_PRE_COMPILED_MD5 := "a95ff9e0370434484f14e7e4114327f6" -ZEPHYR_URL := "https://github.com/antmicro/zephyr/archive/55e36b9.zip" +ZEPHYR_URL := "http://mirror.tensorflow.org/github.com/antmicro/zephyr/archive/55e36b9.zip" ZEPHYR_MD5 := "755622eb4812fde918a6382b65d50c3b" -XTENSA_HIFI4_URL :="https://github.com/foss-xtensa/nnlib-hifi4/raw/master/archive/xa_nnlib_06_27.zip" +XTENSA_HIFI4_URL :="http://mirror.tensorflow.org/github.com/foss-xtensa/nnlib-hifi4/raw/master/archive/xa_nnlib_06_27.zip" XTENSA_HIFI4_MD5 :="45fdc1209a8da62ab568aa6040f7eabf" -ETHOSU_URL := "https://git.mlplatform.org/ml/ethos-u/ethos-u-core-driver.git/snapshot/ethos-u-core-driver-bcb5aaa99756f1b5c1295b079ebdd60996bc75a5.tar.gz" +ETHOSU_URL := "http://mirror.tensorflow.org/git.mlplatform.org/ml/ethos-u/ethos-u-core-driver.git/snapshot/ethos-u-core-driver-bcb5aaa99756f1b5c1295b079ebdd60996bc75a5.tar.gz" ETHOSU_MD5 := "d2073c8d88fc167fd5c46b5dcda58ea1" -HIMAX_WE1_SDK_URL ="https://www.himax.com.tw/we-i/himax_we1_sdk_v02.zip" +HIMAX_WE1_SDK_URL ="http://mirror.tensorflow.org/www.himax.com.tw/we-i/himax_we1_sdk_v02.zip" HIMAX_WE1_SDK_MD5 ="9a4b2f29b16052764e437b64bdcba816" From 0abee9f2eb645ff6045904b2bf9bcb0aa4b17be2 Mon Sep 17 00:00:00 2001 From: Xinyi Wang Date: Wed, 29 Jul 2020 15:49:23 -0700 Subject: [PATCH 1666/2522] Remove constraint of specifying steps_per_epoch for MWMS+Keras model.fit. PiperOrigin-RevId: 323885033 Change-Id: I6db8e98b401b09083a89edf2d779cbc14eb8a10d --- tensorflow/python/keras/distribute/BUILD | 2 + .../collective_all_reduce_strategy_test.py | 55 ++++++++++++++++++- .../python/keras/engine/data_adapter.py | 12 ---- 3 files changed, 56 insertions(+), 13 deletions(-) diff --git a/tensorflow/python/keras/distribute/BUILD b/tensorflow/python/keras/distribute/BUILD index bdd4cbc58aa..50a7bee7e49 100644 --- a/tensorflow/python/keras/distribute/BUILD +++ b/tensorflow/python/keras/distribute/BUILD @@ -176,11 +176,13 @@ cuda_py_test( "//tensorflow/python:training_lib", "//tensorflow/python:training_util", "//tensorflow/python:variables", + "//tensorflow/python/compat:v2_compat", "//tensorflow/python/distribute:collective_all_reduce_strategy", "//tensorflow/python/distribute:combinations", "//tensorflow/python/distribute:cross_device_utils", "//tensorflow/python/distribute:multi_worker_test_base", "//tensorflow/python/distribute:multi_worker_util", + "//tensorflow/python/distribute:strategy_combinations", "//tensorflow/python/distribute:strategy_test_lib", "//tensorflow/python/distribute/cluster_resolver:cluster_resolver_lib", "//tensorflow/python/eager:context", diff --git a/tensorflow/python/keras/distribute/collective_all_reduce_strategy_test.py b/tensorflow/python/keras/distribute/collective_all_reduce_strategy_test.py index f2869e4d478..60b7d4690bb 100644 --- a/tensorflow/python/keras/distribute/collective_all_reduce_strategy_test.py +++ b/tensorflow/python/keras/distribute/collective_all_reduce_strategy_test.py @@ -22,22 +22,29 @@ from absl.testing import parameterized import numpy as np from tensorflow.core.protobuf import config_pb2 +from tensorflow.python.compat import v2_compat +from tensorflow.python.data.ops import dataset_ops from tensorflow.python.distribute import collective_all_reduce_strategy from tensorflow.python.distribute import combinations from tensorflow.python.distribute import cross_device_utils from tensorflow.python.distribute import multi_worker_test_base from tensorflow.python.distribute import multi_worker_util +from tensorflow.python.distribute import strategy_combinations from tensorflow.python.distribute import strategy_test_lib from tensorflow.python.distribute.cluster_resolver import SimpleClusterResolver from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.keras import layers from tensorflow.python.keras import testing_utils from tensorflow.python.keras.engine import sequential +from tensorflow.python.keras.engine import training from tensorflow.python.keras.mixed_precision.experimental import policy from tensorflow.python.keras.mixed_precision.experimental import test_util as mp_test_util +from tensorflow.python.keras.optimizer_v2 import gradient_descent as gradient_descent_keras +from tensorflow.python.ops import array_ops from tensorflow.python.ops import nn from tensorflow.python.ops import random_ops from tensorflow.python.ops import variables @@ -316,5 +323,51 @@ class LocalCollectiveAllReduceStrategy( self._test_mixed_precision(None, None, required_gpus) +@combinations.generate( + combinations.combine( + strategy=[ + strategy_combinations.multi_worker_mirrored_2x1_cpu, + strategy_combinations.multi_worker_mirrored_2x1_gpu, + ], + mode=['eager'])) +class DistributedCollectiveAllReduceStrategyEagerTest(test.TestCase, + parameterized.TestCase): + + def testFitWithoutStepsPerEpochPartialBatch(self, strategy): + + def _model_fn(): + x = layers.Input(shape=(1,), name='input') + y = layers.Dense(1, name='dense')(x) + model = training.Model(x, y) + return model + + def _get_dataset(): + inputs = array_ops.expand_dims_v2(constant_op.constant(range(10)), axis=1) + targets = array_ops.expand_dims_v2( + constant_op.constant(range(10)), axis=1) + # Make global batch size 12 for 2 replicas and a non-repeated dataset with + # 10 elements so that we have partial batch + dataset = dataset_ops.Dataset.from_tensor_slices( + (inputs, targets)).batch(12, drop_remainder=False) + return dataset + + with strategy.scope(): + optimizer_fn = gradient_descent_keras.SGD + optimizer = optimizer_fn(0.001) + model = _model_fn() + loss = 'mse' + metrics = ['mae'] + model.compile( + optimizer, + loss, + metrics=metrics) + dataset = _get_dataset() + kernel_before = model.get_weights()[0][0] + model.fit(dataset, epochs=10) + kernel_after = model.get_weights()[0][0] + self.assertNotEqual(kernel_before, kernel_after) + self.assertGreater(abs(kernel_before-1), abs(kernel_after-1)) + if __name__ == '__main__': - test.main() + v2_compat.enable_v2_behavior() + combinations.main() diff --git a/tensorflow/python/keras/engine/data_adapter.py b/tensorflow/python/keras/engine/data_adapter.py index 74036054d79..0e4886fc8cb 100644 --- a/tensorflow/python/keras/engine/data_adapter.py +++ b/tensorflow/python/keras/engine/data_adapter.py @@ -1238,18 +1238,6 @@ class DataHandler(object): if adapter_steps is not None: return adapter_steps - if (ds_context.get_strategy().extended._in_multi_worker_mode() and # pylint: disable=protected-access - (dataset.options().experimental_distribute.auto_shard_policy != - distribute_options.AutoShardPolicy.OFF)): - # If the dataset would be auto-sharded, we should not infer a local - # steps_per_epoch due to the possible inbalanced sharding between workers. - raise ValueError("When dataset is sharded across workers, please " - "specify a reasonable `steps_per_epoch` such that all " - "workers will train the same number of steps and each " - "step can get data from dataset without EOF. This is " - "required for allreduce to succeed. We will handle the " - "last partial batch in the future.") - size = cardinality.cardinality(dataset) if size == cardinality.INFINITE and steps is None: raise ValueError("When passing an infinitely repeating dataset, you " From 25fd5e7afba67722f3f35f4b7e9bf85876bcc529 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Wed, 29 Jul 2020 15:58:53 -0700 Subject: [PATCH 1667/2522] [ci_build] Upgrade `setuptools` PiperOrigin-RevId: 323886961 Change-Id: I7c6603a2d49190ba81671e13bafa0d59f48fe4cc --- tensorflow/tools/ci_build/builds/pip_new.sh | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/tensorflow/tools/ci_build/builds/pip_new.sh b/tensorflow/tools/ci_build/builds/pip_new.sh index 330fa44b0de..32a4241ca01 100755 --- a/tensorflow/tools/ci_build/builds/pip_new.sh +++ b/tensorflow/tools/ci_build/builds/pip_new.sh @@ -448,11 +448,10 @@ install_tensorflow_pip() { # Check that requested python version matches configured one. check_python_pip_version - # Force upgrade of setuptools. This must happen before the pip install of the - # WHL_PATH, which pulls in absl-py, which uses install_requires notation - # introduced in setuptools >=20.5. The default version of setuptools is 5.5.1, - # which is too old for absl-py. - ${PIP_BIN_PATH} install --upgrade setuptools==39.1.0 || \ + # Force upgrade of setuptools. We need it to install pips using + # `install_requires` notation introduced in setuptools >=20.5. The default + # version of setuptools is 5.5.1. + ${PIP_BIN_PATH} install --upgrade setuptools || \ die "Error: setuptools install, upgrade FAILED" # Force tensorflow reinstallation. Otherwise it may not get installed from @@ -462,13 +461,6 @@ install_tensorflow_pip() { die "pip install (forcing to reinstall tensorflow) FAILED" echo "Successfully installed pip package ${WHL_PATH}" - # Force downgrade of setuptools. This must happen after the pip install of the - # WHL_PATH, which ends up upgrading to the latest version of setuptools. - # Versions of setuptools >= 39.1.0 will cause tests to fail like this: - # ImportError: cannot import name py31compat - ${PIP_BIN_PATH} install --upgrade setuptools==39.1.0 || \ - die "Error: setuptools install, upgrade FAILED" - # Install the future package in the virtualenv. Installing it in user system # packages does not appear to port it over when creating a virtualenv. # ImportError: No module named builtins From 9b7121e84439894a06e50c90a1bceebd3a7b1af2 Mon Sep 17 00:00:00 2001 From: Allen Wang Date: Wed, 29 Jul 2020 16:11:37 -0700 Subject: [PATCH 1668/2522] Add in GCS tracing in TF. PiperOrigin-RevId: 323889423 Change-Id: I06fc3d723660668ae8c4e1d42c30214f82c46d93 --- tensorflow/core/platform/cloud/BUILD | 2 ++ tensorflow/core/platform/cloud/gcs_file_system.cc | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/tensorflow/core/platform/cloud/BUILD b/tensorflow/core/platform/cloud/BUILD index 2440549a353..5553c9094cb 100644 --- a/tensorflow/core/platform/cloud/BUILD +++ b/tensorflow/core/platform/cloud/BUILD @@ -103,6 +103,7 @@ cc_library( "//tensorflow/core/platform:retrying_utils", "//tensorflow/core/platform:str_util", "//tensorflow/core/platform:stringprintf", + "//tensorflow/core/profiler/lib:traceme", "@jsoncpp_git//:jsoncpp", ], alwayslink = 1, @@ -139,6 +140,7 @@ cc_library( "//tensorflow/core/platform:retrying_utils", "//tensorflow/core/platform:str_util", "//tensorflow/core/platform:stringprintf", + "//tensorflow/core/profiler/lib:traceme", "@jsoncpp_git//:jsoncpp", ], alwayslink = 1, diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index 0874c47142a..59a4f2558b3 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -47,6 +47,7 @@ limitations under the License. #include "tensorflow/core/platform/str_util.h" #include "tensorflow/core/platform/stringprintf.h" #include "tensorflow/core/platform/thread_annotations.h" +#include "tensorflow/core/profiler/lib/traceme.h" #ifdef _WIN32 #ifdef DeleteFile @@ -1023,6 +1024,9 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& fname, size_t offset, string bucket, object; TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object)); + profiler::TraceMe activity( + [fname]() { return absl::StrCat("LoadBufferFromGCS ", fname); }); + std::unique_ptr request; TF_RETURN_WITH_CONTEXT_IF_ERROR(CreateHttpRequest(&request), "when reading gs://", bucket, "/", object); @@ -1044,6 +1048,9 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& fname, size_t offset, *bytes_transferred = bytes_read; VLOG(1) << "Successful read of gs://" << bucket << "/" << object << " @ " << offset << " of size: " << bytes_read; + activity.AppendMetadata([bytes_read]() { + return profiler::TraceMeEncode({{"block_size", bytes_read}}); + }); if (stats_ != nullptr) { stats_->RecordBlockRetrieved(fname, offset, bytes_read); From 22f03654ba1de1ce40ac32c50186abd43567014b Mon Sep 17 00:00:00 2001 From: Bruce Fontaine Date: Wed, 29 Jul 2020 16:21:57 -0700 Subject: [PATCH 1669/2522] Allow delayed creation of variables in TPU Embedding mid level API. PiperOrigin-RevId: 323891307 Change-Id: I8f47b64dbe3492cf550de6773cb3e7742d81d832 --- tensorflow/python/tpu/tpu_embedding_v2.py | 202 ++++++++++++++---- .../tpu/tpu_embedding_v2_correctness_test.py | 10 +- .../python/tpu/tpu_embedding_v2_cpu_test.py | 6 +- .../python/tpu/tpu_embedding_v2_test.py | 70 ++++-- ...erimental.embedding.-t-p-u-embedding.pbtxt | 6 +- ...erimental.embedding.-t-p-u-embedding.pbtxt | 6 +- 6 files changed, 229 insertions(+), 71 deletions(-) diff --git a/tensorflow/python/tpu/tpu_embedding_v2.py b/tensorflow/python/tpu/tpu_embedding_v2.py index fd3d1055618..5e316d35aa4 100644 --- a/tensorflow/python/tpu/tpu_embedding_v2.py +++ b/tensorflow/python/tpu/tpu_embedding_v2.py @@ -45,6 +45,7 @@ from tensorflow.python.tpu import tpu from tensorflow.python.tpu import tpu_embedding_v2_utils from tensorflow.python.tpu.ops import tpu_ops from tensorflow.python.training.saving import saveable_hook +from tensorflow.python.training.tracking import base from tensorflow.python.training.tracking import tracking from tensorflow.python.util import compat from tensorflow.python.util import nest @@ -136,7 +137,6 @@ class TPUEmbedding(tracking.AutoTrackable): with strategy.scope(): embedding = tf.tpu.experimental.embedding.TPUEmbedding( feature_config=feature_config, - batch_size=1024, optimizer=tf.tpu.experimental.embedding.SGD(0.1)) ``` @@ -152,6 +152,12 @@ class TPUEmbedding(tracking.AutoTrackable): dataset_iterator = iter(distributed_dataset) ``` + NOTE: All batches passed to the layer must have the same batch size for each + input, more over once you have called the layer with one batch size all + subsequent calls must use the same batch_size. In the event that the batch + size cannot be automatically determined by the enqueue method, you must call + the build method with the batch size to initialize the layer. + To use this API on TPU you should use a custom training loop. Below is an example of a training and evaluation step: @@ -228,9 +234,8 @@ class TPUEmbedding(tracking.AutoTrackable): """ - def __init__(self, feature_config, batch_size, optimizer, - pipeline_execution_with_tensor_core=False, - initialize_tpu_embedding=True): + def __init__(self, feature_config, optimizer, + pipeline_execution_with_tensor_core=False): """Creates the TPUEmbedding mid level API object. ```python @@ -246,9 +251,6 @@ class TPUEmbedding(tracking.AutoTrackable): Args: feature_config: A nested structure of `tf.tpu.experimental.embedding.FeatureConfig` configs. - batch_size: The global batch size that you indend to use. Note that is - fixed and the same batch size must be used for both training and - evaluation. optimizer: An instance of one of `tf.tpu.experimental.embedding.SGD`, `tf.tpu.experimental.embedding.Adagrad` or `tf.tpu.experimental.embedding.Adam`. When not created under @@ -258,10 +260,6 @@ class TPUEmbedding(tracking.AutoTrackable): pipeline_execution_with_tensor_core: If True, the TPU embedding computations will overlap with the TensorCore computations (and hence will be one step old). Set to True for improved performance. - initialize_tpu_embedding: If False, will not initialize the TPU embedding - engine. If this is set to False and another instance of this class has - not initialized the tpu embedding engine, the creation of this object - will fail. Raises: ValueError: If optimizer is not one of tf.tpu.experimental.embedding.(SGD, @@ -327,32 +325,73 @@ class TPUEmbedding(tracking.AutoTrackable): # We need to list of host devices for the load/retrieve operations. self._hosts = get_list_of_hosts(self._strategy) - # We generally use the per core batch size, but will have the user pass - # in a global batch size. - self._batch_size = batch_size // self._strategy.num_replicas_in_sync + self._built = False + + def build(self, per_replica_batch_size=None): + """Create the underlying variables and initializes the TPU for embeddings. + + This method creates the underlying variables (including slot variables). If + created under a TPUStrategy, this will also initialize the TPU for + embeddings. + + This function will automatically get called by enqueue, which will try to + determine your batch size automatically. If this fails, you must manually + call this method before you call enqueue. + + Args: + per_replica_batch_size: The per replica batch size that you intend to use. + Note that is fixed and the same batch size must be used for both + training and evaluation. If you want to calculate this from the global + batch size, you can use `num_replicas_in_sync` property of your strategy + object. May be set to None if not created under a TPUStrategy. + + Raises: + ValueError: If per_replica_batch_size is None and object was created in a + TPUStrategy scope. + """ + if self._built: + return + + if self._using_tpu: + if per_replica_batch_size is None: + raise ValueError("You must specify a per_replica_batch_size when " + "calling build if object is created under a " + "TPUStrategy.") + + self._batch_size = per_replica_batch_size self._config_proto = self._create_config_proto() - if initialize_tpu_embedding: - # This is mainly for testing purposes, sometimes we don't want to - # initialize the embedding engine, but just want a copy of the API - # which can interact with an already initialized engine. - logging.info("Initializing TPU Embedding engine with config: %s", - self._config_proto) - @def_function.function - def load_config(): - tpu.initialize_system_for_tpu_embedding(self._config_proto) - load_config() - logging.info("Done initializing TPU Embedding engine.") + logging.info("Initializing TPU Embedding engine with config: %s", + self._config_proto) + @def_function.function + def load_config(): + tpu.initialize_system_for_tpu_embedding(self._config_proto) + + load_config() + logging.info("Done initializing TPU Embedding engine.") # Create and load variables and slot variables into the TPU. # Note that this is a dict of dicts. Keys to the first dict are table names. # We would prefer to use TableConfigs, but then these variables won't be # properly tracked by the tracking API. self._variables = self._create_variables_and_slots() + if self._using_tpu: self._load_variables() + self._built = True + + def _maybe_build(self, batch_size): + if not self._built: + # This can be called while tracing a function, so we wrap the + # initialization code with init_scope so it runs eagerly, this means that + # it will not be included the function graph generated by tracing so that + # we can be sure that we only initialize the TPU for embeddings exactly + # once. + with ops.init_scope(): + self.build(batch_size) + @property def embedding_tables(self): """Returns a dict of embedding tables, keyed by `TableConfig`. @@ -376,6 +415,8 @@ class TPUEmbedding(tracking.AutoTrackable): "strategy. If you need access, save your model, " "create this object under a CPU strategy and restore.") + self._maybe_build(None) + # Only return the tables and not the slot variables. On CPU this are honest # tf.Variables. return {table: self._variables[table.name]["parameters"] @@ -553,7 +594,8 @@ class TPUEmbedding(tracking.AutoTrackable): name: A name for the underlying op. Raises: - RuntimeError: If called when object wasn't created under a `TPUStrategy`. + RuntimeError: If called when object wasn't created under a `TPUStrategy` + or if not built (either by manually calling build or calling enqueue). ValueError: If a non-`tf.Tensor` non-`None` gradient is passed in, or a `tf.Tensor` of the incorrect shape is passed in. Also if the size of any sequence in `gradients` does not match corresponding @@ -565,6 +607,11 @@ class TPUEmbedding(tracking.AutoTrackable): raise RuntimeError("apply_gradients is not valid when TPUEmbedding " "object is not created under a TPUStrategy.") + if not self._built: + raise RuntimeError("apply_gradients called on unbuilt TPUEmbedding " + "object. Please either call enqueue first or manually " + "call the build method.") + # send_tpu_embedding_gradients requires per table gradient, if we only have # one feature per table this isn't an issue. When multiple features share # the same table, the order of the features in per table tensor returned by @@ -646,12 +693,18 @@ class TPUEmbedding(tracking.AutoTrackable): passed to this instance of the `TPUEmbedding` object. Raises: - RuntimeError: If called when object wasn't created under a `TPUStrategy`. + RuntimeError: If called when object wasn't created under a `TPUStrategy` + or if not built (either by manually calling build or calling enqueue). """ if not self._using_tpu: raise RuntimeError("dequeue is not valid when TPUEmbedding object is not " "created under a TPUStrategy.") + if not self._built: + raise RuntimeError("dequeue called on unbuilt TPUEmbedding object. " + "Please either call enqueue first or manually call " + "the build method.") + # The activations returned by this op are per table. So we must separate # them out into per feature activations. The activations are interleaved: # for each table, we expect a [num_features*batch_size, dim] tensor. @@ -719,9 +772,16 @@ class TPUEmbedding(tracking.AutoTrackable): shape = (table.vocabulary_size, table.dim) def getter(name, shape, dtype, initializer, trainable): + # TODO(bfontain): make CheckpointInitialValue a callable rather than + # something that inherits from tensor. + if not isinstance(initializer, base.CheckpointInitialValue): + initial_value = functools.partial(initializer, shape, dtype=dtype) + else: + initial_value = initializer + return tf_variables.Variable( name=name, - initial_value=functools.partial(initializer, shape, dtype=dtype), + initial_value=initial_value, trainable=trainable) def variable_creator(name, initializer, trainable=True): @@ -827,11 +887,17 @@ class TPUEmbedding(tracking.AutoTrackable): # TODO(bfontain): Update restore logic in saver so that these hooks are # always executed. Once that is done, we can output an empty list when on # CPU. + + def _load_variables(): + if self._using_tpu and self._built: + self._load_variables() + + def _retrieve_variables(): + if self._using_tpu and self._built: + self._retrieve_variables() + def factory(name=_HOOK_KEY): - return TPUEmbeddingSaveable( - name, - self._load_variables if self._using_tpu else None, - self._retrieve_variables if self._using_tpu else None) + return TPUEmbeddingSaveable(name, _load_variables, _retrieve_variables) return {_HOOK_KEY: factory} # Some helper functions for the below enqueue function. @@ -1124,8 +1190,10 @@ class TPUEmbedding(tracking.AutoTrackable): directly taken from the args of the `strategy.run` call. Also if the size of any sequence in `features` does not match corresponding sequence in `feature_config`. Similarly for `weights`, if not `None`. + If batch size of features is unequal or different from a previous call. RuntimeError: When called inside a strategy.run call and inside XLA - control flow. + control flow. If batch_size is not able to be determined and build was + not called. TypeError: If the type of any sequence in `features` does not match corresponding sequence in `feature_config`. Similarly for `weights`, if not `None`. @@ -1134,10 +1202,24 @@ class TPUEmbedding(tracking.AutoTrackable): raise RuntimeError("enqueue is not valid when TPUEmbedding object is not " "created under a TPUStrategy.") - nest.assert_same_structure(self._feature_config, features) + in_tpu_context = self._raise_error_for_incorrect_control_flow_context() - # TODO(bfontain): Add a check that the input batch_size matches the per core - # batch size that this instance of the API was initialized with. + # Should we also get batch_size from weights if they exist? + # Since features is assumed to be batched at the per replica batch size + # the returned batch size here is per replica an not global. + batch_size = self._get_batch_size(features, in_tpu_context) + if batch_size is None and not self._built: + raise RuntimeError("Unable to determine batch size from input features." + "Please call build() with global batch size to " + "initialize the TPU for embeddings.") + if batch_size is not None: + self._maybe_build(batch_size) + if self._batch_size != batch_size: + raise ValueError("Multiple calls to enqueue with different batch sizes " + "{} and {}.".format(self._batch_size, + batch_size)) + + nest.assert_same_structure(self._feature_config, features) flat_inputs = nest.flatten(features) flat_weights = [None] * len(flat_inputs) @@ -1147,7 +1229,6 @@ class TPUEmbedding(tracking.AutoTrackable): flat_features = nest.flatten_with_joined_string_paths(self._feature_config) self._raise_error_for_inputs_not_on_cpu(features) - in_tpu_context = self._raise_error_for_incorrect_control_flow_context() # If we are in a tpu_context, automatically apply outside compilation. if in_tpu_context: self._raise_error_for_non_direct_inputs(features) @@ -1206,6 +1287,34 @@ class TPUEmbedding(tracking.AutoTrackable): enqueue_ops.append(enqueue_op) ops.get_default_graph().control_outputs.extend(enqueue_ops) + def _get_batch_size(self, tensors, in_tpu_context): + """Gets the batch size from a nested structure of features.""" + batch_size = None + for path, maybe_tensor in nest.flatten_with_joined_string_paths(tensors): + tensor_list = [] + if not in_tpu_context: + # if we are not in a context, then this is PerReplica and we need to + # check each replica's batch size. + for replica_id in range(self._strategy.num_replicas_in_sync): + tensor_list.append(distribute_utils.select_replica(replica_id, + maybe_tensor)) + else: + tensor_list = [maybe_tensor] + + for tensor in tensor_list: + if tensor.shape.rank < 1: + raise ValueError( + "Input {} has rank 0, rank must be at least 1.".format(path)) + shape = tensor.shape.as_list() + if shape[0] is not None: + if batch_size is None: + batch_size = shape[0] + elif batch_size != shape[0]: + raise ValueError("Found multiple batch sizes {} and {}. All inputs " + "must have the same batch dimensions size.".format( + batch_size, shape[0])) + return batch_size + class TPUEmbeddingSaveable(saveable_hook.SaveableHook): """Save/Restore hook to Retrieve/Load TPUEmbedding variables.""" @@ -1346,6 +1455,9 @@ def extract_variable_info(kwargs): return (kwargs["name"], shape, kwargs["initial_value"].keywords.get("dtype", kwargs["dtype"]), kwargs["initial_value"].func) + elif isinstance(kwargs["initial_value"], base.CheckpointInitialValue): + return (kwargs["name"], kwargs["initial_value"].shape, + kwargs["initial_value"].dtype, kwargs["initial_value"]) elif "shape" not in kwargs or kwargs["shape"] is None: raise ValueError( "Unable to extract initializer function and shape from {}. Please " @@ -1384,12 +1496,24 @@ def make_sharded_variable_creator(hosts): variables = [] newkwargs = kwargs newkwargs["dtype"] = dtype + # TODO(bfontain): Remove this check once we can pass position and shape of + # shards to CheckpointInitialValue. + if isinstance(initial_value, base.CheckpointInitialValue) and num_hosts > 1: + raise RuntimeError("Delayed restoration of variables not available when " + "there are multiple TPU hosts, please ensure that the " + "api object is build before you restore.") + for i, p in enumerate(partitions): with ops.device(hosts[i]): newkwargs["shape"] = (p, cols) newkwargs["name"] = "{}_{}".format(name, i) - newkwargs["initial_value"] = ( - lambda: initial_value(newkwargs["shape"], dtype=dtype)) + if isinstance(initial_value, base.CheckpointInitialValue): + # TODO(bfontain): Patch CheckpointInitialValue to take in account the + # position and shape of this shard. + newkwargs["initial_value"] = initial_value + else: + newkwargs["initial_value"] = ( + lambda: initial_value(newkwargs["shape"], dtype=dtype)) variables.append(next_creator(*args, **kwargs)) return TPUShardedVariable(variables, name=name) return sharded_variable_creator diff --git a/tensorflow/python/tpu/tpu_embedding_v2_correctness_test.py b/tensorflow/python/tpu/tpu_embedding_v2_correctness_test.py index afb6743cbc2..7a9a727d956 100644 --- a/tensorflow/python/tpu/tpu_embedding_v2_correctness_test.py +++ b/tensorflow/python/tpu/tpu_embedding_v2_correctness_test.py @@ -28,7 +28,6 @@ import numpy as np from tensorflow.python.compat import v2_compat from tensorflow.python.data.ops import dataset_ops from tensorflow.python.distribute import distribute_lib -from tensorflow.python.distribute import distribution_strategy_context from tensorflow.python.distribute import tpu_strategy from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver from tensorflow.python.eager import backprop @@ -211,11 +210,8 @@ class TPUEmbeddingCorrectness(parameterized.TestCase, test.TestCase): if optimizer is None: optimizer = tpu_embedding_v2_utils.SGD(learning_rate=0.1) - num_replicas = ( - distribution_strategy_context.get_strategy().num_replicas_in_sync) return tpu_embedding_v2.TPUEmbedding( feature_config=self.feature_config, - batch_size=self.batch_size * num_replicas, optimizer=optimizer) def _create_sparse_dataset(self, strategy, include_weights=False, weight=0.5): @@ -474,9 +470,11 @@ class TPUEmbeddingCorrectness(parameterized.TestCase, test.TestCase): with strategy.scope(): mid_level = tpu_embedding_v2.TPUEmbedding( feature_config=feature_config, - batch_size=self.batch_size * num_replicas, optimizer=optimizer) - + # Call build here. We call 'next' outside of the tf.function and this + # results in data where the shape of the sparse tensor is a tensor which we + # can't tell the shape of at tracing time. + mid_level.build(self.batch_size) dataset = self._create_sparse_dataset(strategy) data = next(iter(strategy.experimental_distribute_dataset( dataset, diff --git a/tensorflow/python/tpu/tpu_embedding_v2_cpu_test.py b/tensorflow/python/tpu/tpu_embedding_v2_cpu_test.py index 6c44b77b5ec..fa1e843179f 100644 --- a/tensorflow/python/tpu/tpu_embedding_v2_cpu_test.py +++ b/tensorflow/python/tpu/tpu_embedding_v2_cpu_test.py @@ -105,7 +105,6 @@ class CPUEmbeddingTest(test.TestCase): optimizer = tpu_embedding_v2_utils.SGD(learning_rate=0.1) return tpu_embedding_v2.TPUEmbedding( feature_config=self.feature_config, - batch_size=self.batch_size, optimizer=optimizer) def _get_dense_tensors(self, dtype=dtypes.int32): @@ -285,7 +284,6 @@ class CPUEmbeddingTest(test.TestCase): optimizer = tpu_embedding_v2_utils.SGD(learning_rate=0.1) mid_level = tpu_embedding_v2.TPUEmbedding( feature_config=feature_config, - batch_size=self.batch_size, optimizer=optimizer) features = tuple(self._get_sparse_tensors()[:1]) with self.assertRaisesRegex( @@ -302,8 +300,10 @@ class CPUEmbeddingTest(test.TestCase): table=self.table_video, name='watched', max_sequence_length=2),) mid_level = tpu_embedding_v2.TPUEmbedding( feature_config=feature_config, - batch_size=self.batch_size, optimizer=None) + # Build the layer manually to create the variables. Normally calling enqueue + # would do this. + mid_level.build() self.assertEqual( list(mid_level._variables[self.table_video.name].keys()), ['parameters']) diff --git a/tensorflow/python/tpu/tpu_embedding_v2_test.py b/tensorflow/python/tpu/tpu_embedding_v2_test.py index c9f9b90ebd0..5e081d6f9ef 100644 --- a/tensorflow/python/tpu/tpu_embedding_v2_test.py +++ b/tensorflow/python/tpu/tpu_embedding_v2_test.py @@ -50,6 +50,7 @@ from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.platform import test from tensorflow.python.saved_model import load from tensorflow.python.saved_model import save +from tensorflow.python.tpu import tpu from tensorflow.python.tpu import tpu_embedding from tensorflow.python.tpu import tpu_embedding_v2 from tensorflow.python.tpu import tpu_embedding_v2_utils @@ -163,6 +164,9 @@ class TPUEmbeddingCheckpointTest(parameterized.TestCase, test.TestCase): ) def test_checkpoint_restore_before_variable_creation(self): + # This test works right now because we only have one TPU host in the unit + # environment. Initializing from checkpoint does not understand how to + # pass the sharding info to the restore op right now. class TestModule(module.Module): @@ -170,7 +174,6 @@ class TPUEmbeddingCheckpointTest(parameterized.TestCase, test.TestCase): self._initializer = initializer self._rows = rows - def create_embedding(self): table = tpu_embedding_v2_utils.TableConfig( vocabulary_size=self._rows, dim=4, initializer=self._initializer, combiner='sum', name='table') @@ -179,9 +182,13 @@ class TPUEmbeddingCheckpointTest(parameterized.TestCase, test.TestCase): optimizer = tpu_embedding_v2_utils.SGD() self.tpu_embedding = tpu_embedding_v2.TPUEmbedding( - feature_config, self._rows, optimizer) + feature_config, optimizer) - # We need to clear the already loaded config provided by setUp method. + def create_embedding(self): + # We aren't training so batch_size here doesn't matter. + self.tpu_embedding.build(64) + + # We need to clear the any already loaded config provided by setUp method. tpu_strategy_util.initialize_tpu_system(self.resolver) with self.strategy.scope(): @@ -227,11 +234,23 @@ class TPUEmbeddingCheckpointTest(parameterized.TestCase, test.TestCase): feature_config = (tpu_embedding_v2_utils.FeatureConfig( table=table, name='feature'),) + mid_level = tpu_embedding_v2.TPUEmbedding( + feature_config, optimizer) + + # We want to create a second object (with its own variables) but not + # initialize the TPU. + if not initialize_tpu_embedding: + saved_fn = tpu.initialize_system_for_tpu_embedding + tpu.initialize_system_for_tpu_embedding = lambda x: None + # batch_size here does not matter as we aren't training in any of these # tests. - return tpu_embedding_v2.TPUEmbedding( - feature_config, 64, optimizer, - initialize_tpu_embedding=initialize_tpu_embedding) + mid_level.build(64) + + if not initialize_tpu_embedding: + tpu.initialize_system_for_tpu_embedding = saved_fn + + return mid_level def make_checkpoint_and_get_embedding(self, name, model): """Saves model to checkpoint name, retrieves embedding variables.""" @@ -406,7 +425,6 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): dim=2, initializer=self.initializer), name='favorited')), - self.batch_size, tpu_embedding_v2_utils.SGD(learning_rate=0.1)) def test_unsupported_optimizer(self): @@ -414,11 +432,14 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): ValueError, 'is an unsupported optimizer class.'): with self._get_strategy().scope(): tpu_embedding_v2.TPUEmbedding( - self.feature_config, self.batch_size, + self.feature_config, tpu_embedding.AdagradParameters(learning_rate=0.1)) def test_pass_non_tensor_to_apply_gradients(self): strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') + # We aren't going to actually run anything, so the batch_size here does not + # matter. + mid_level_api.build(64) @def_function.function def test_apply(): @@ -429,7 +450,9 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): def test_pass_different_structure_to_apply_gradients(self): strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') - + # We aren't going to actually run anything, so the batch_size here does not + # matter. + mid_level_api.build(64) @def_function.function def test_apply(): # This should be a tuple as feature_config is a tuple of 3 configs. @@ -442,6 +465,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): def test_pass_none_to_apply_gradients(self): strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') + mid_level_api.build(self.batch_size) dataset = self._create_sparse_dataset(strategy) data = next(iter(strategy.experimental_distribute_dataset( dataset, @@ -492,7 +516,9 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): tpu=FLAGS.tpu, zone=FLAGS.zone, project=FLAGS.project) remote.connect_to_cluster(self.resolver) tpu_strategy_util.initialize_tpu_system(self.resolver) - return tpu_strategy.TPUStrategy(self.resolver) + strategy = tpu_strategy.TPUStrategy(self.resolver) + self.num_replicas = strategy.num_replicas_in_sync + return strategy def test_dequeue_on_cpu(self): mid_level_api = self._create_mid_level() @@ -767,6 +793,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): else: dataset = self._create_sparse_dataset(strategy, include_weights=True, weight=weight) + mid_level_api.build(self.batch_size) dataset_iter = iter(strategy.experimental_distribute_dataset( dataset, @@ -808,6 +835,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): config.enable_mlir_bridge() strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') + mid_level_api.build(self.batch_size) dataset = self._create_sparse_dataset(strategy) dataset_iter = iter(strategy.experimental_distribute_dataset( dataset, @@ -872,6 +900,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): def test_enqueue_with_outside_compilation_non_direct_input(self): strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') + mid_level_api.build(self.batch_size) dataset = self._create_sparse_dataset(strategy) dataset_iter = iter(strategy.experimental_distribute_dataset( dataset, @@ -894,6 +923,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): def test_enqueue_with_outside_compilation_auto_mode(self): strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') + mid_level_api.build(self.batch_size) dataset = self._create_sparse_dataset(strategy) dataset_iter = iter(strategy.experimental_distribute_dataset( dataset, @@ -973,11 +1003,8 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): if optimizer is None: optimizer = tpu_embedding_v2_utils.SGD(learning_rate=0.1) - num_replicas = ( - distribution_strategy_context.get_strategy().num_replicas_in_sync) return tpu_embedding_v2.TPUEmbedding( feature_config=self.feature_config, - batch_size=self.batch_size * num_replicas, optimizer=optimizer) def _create_sparse_dataset(self, strategy, include_weights=False, weight=0.5): @@ -1093,7 +1120,6 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): feature_config={ 'feature': tpu_embedding_v2_utils.FeatureConfig( table=table_config, name='feature')}, - batch_size=num_replicas, optimizer=optimizer) feature = {'feature': constant_op.constant([0], dtype=dtypes.int32)} @@ -1164,12 +1190,13 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): strategy = self._get_strategy() else: strategy = distribution_strategy_context.get_strategy() - num_replicas = strategy.num_replicas_in_sync with strategy.scope(): mid_level = tpu_embedding_v2.TPUEmbedding( feature_config=self.feature_config, - batch_size=self.batch_size * num_replicas, optimizer=optimizer) + # We aren't going to actually run anything, so the batch_size here does + # not matter. + mid_level.build(self.batch_size) video_accumulator = mid_level._variables['video']['accumulators'] user_accumulator = mid_level._variables['user']['accumulators'] if use_tpu: @@ -1203,14 +1230,15 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): learning_rate=0.1, slot_variable_creation_fn=slot_creation_fn) strategy = self._get_strategy() - num_replicas = strategy.num_replicas_in_sync with strategy.scope(): + mid_level_api = tpu_embedding_v2.TPUEmbedding( + feature_config=self.feature_config, + optimizer=optimizer) with self.assertRaisesRegex(ValueError, 'Unable to extract initializer function'): - tpu_embedding_v2.TPUEmbedding( - feature_config=self.feature_config, - batch_size=self.batch_size*num_replicas, - optimizer=optimizer) + # We aren't going to actually run anything, so the batch_size here does + # not matter. + mid_level_api.build(self.batch_size) def _unpack(strategy, per_replica_output): diff --git a/tensorflow/tools/api/golden/v1/tensorflow.tpu.experimental.embedding.-t-p-u-embedding.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.tpu.experimental.embedding.-t-p-u-embedding.pbtxt index 9cc8354b4bf..c15fdab977b 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.tpu.experimental.embedding.-t-p-u-embedding.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.tpu.experimental.embedding.-t-p-u-embedding.pbtxt @@ -10,12 +10,16 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'feature_config\', \'batch_size\', \'optimizer\', \'pipeline_execution_with_tensor_core\', \'initialize_tpu_embedding\'], varargs=None, keywords=None, defaults=[\'False\', \'True\'], " + argspec: "args=[\'self\', \'feature_config\', \'optimizer\', \'pipeline_execution_with_tensor_core\'], varargs=None, keywords=None, defaults=[\'False\'], " } member_method { name: "apply_gradients" argspec: "args=[\'self\', \'gradients\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "build" + argspec: "args=[\'self\', \'per_replica_batch_size\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "dequeue" argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.tpu.experimental.embedding.-t-p-u-embedding.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.tpu.experimental.embedding.-t-p-u-embedding.pbtxt index 9cc8354b4bf..c15fdab977b 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.tpu.experimental.embedding.-t-p-u-embedding.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.tpu.experimental.embedding.-t-p-u-embedding.pbtxt @@ -10,12 +10,16 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'feature_config\', \'batch_size\', \'optimizer\', \'pipeline_execution_with_tensor_core\', \'initialize_tpu_embedding\'], varargs=None, keywords=None, defaults=[\'False\', \'True\'], " + argspec: "args=[\'self\', \'feature_config\', \'optimizer\', \'pipeline_execution_with_tensor_core\'], varargs=None, keywords=None, defaults=[\'False\'], " } member_method { name: "apply_gradients" argspec: "args=[\'self\', \'gradients\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "build" + argspec: "args=[\'self\', \'per_replica_batch_size\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "dequeue" argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " From 61ecd4c2816a07bcc9984a98dffb4db3929fefcd Mon Sep 17 00:00:00 2001 From: Thomas O'Malley Date: Wed, 29 Jul 2020 16:44:00 -0700 Subject: [PATCH 1670/2522] Allow loss to be passed as a Tensor to Optimizer.minimize PiperOrigin-RevId: 323895500 Change-Id: I49a35556f3b602b254b26584827f1d09aaeac30a --- RELEASE.md | 3 +- .../experimental/loss_scale_optimizer.py | 2 +- .../optimizer_v2/gradient_descent_test.py | 19 ++++++ .../python/keras/optimizer_v2/optimizer_v2.py | 66 ++++++++++++------- ...n.experimental.-loss-scale-optimizer.pbtxt | 2 +- ...ensorflow.keras.optimizers.-adadelta.pbtxt | 2 +- ...tensorflow.keras.optimizers.-adagrad.pbtxt | 2 +- .../tensorflow.keras.optimizers.-adam.pbtxt | 2 +- .../tensorflow.keras.optimizers.-adamax.pbtxt | 2 +- .../tensorflow.keras.optimizers.-ftrl.pbtxt | 2 +- .../tensorflow.keras.optimizers.-nadam.pbtxt | 2 +- ...nsorflow.keras.optimizers.-optimizer.pbtxt | 2 +- ...nsorflow.keras.optimizers.-r-m-sprop.pbtxt | 2 +- .../tensorflow.keras.optimizers.-s-g-d.pbtxt | 2 +- ...n.experimental.-loss-scale-optimizer.pbtxt | 2 +- ...ensorflow.keras.optimizers.-adadelta.pbtxt | 2 +- ...tensorflow.keras.optimizers.-adagrad.pbtxt | 2 +- .../tensorflow.keras.optimizers.-adam.pbtxt | 2 +- .../tensorflow.keras.optimizers.-adamax.pbtxt | 2 +- .../tensorflow.keras.optimizers.-ftrl.pbtxt | 2 +- .../tensorflow.keras.optimizers.-nadam.pbtxt | 2 +- ...nsorflow.keras.optimizers.-optimizer.pbtxt | 2 +- ...nsorflow.keras.optimizers.-r-m-sprop.pbtxt | 2 +- .../tensorflow.keras.optimizers.-s-g-d.pbtxt | 2 +- .../v2/tensorflow.optimizers.-adadelta.pbtxt | 2 +- .../v2/tensorflow.optimizers.-adagrad.pbtxt | 2 +- .../v2/tensorflow.optimizers.-adam.pbtxt | 2 +- .../v2/tensorflow.optimizers.-adamax.pbtxt | 2 +- .../v2/tensorflow.optimizers.-ftrl.pbtxt | 2 +- .../v2/tensorflow.optimizers.-nadam.pbtxt | 2 +- .../v2/tensorflow.optimizers.-optimizer.pbtxt | 2 +- .../v2/tensorflow.optimizers.-r-m-sprop.pbtxt | 2 +- .../v2/tensorflow.optimizers.-s-g-d.pbtxt | 2 +- 33 files changed, 95 insertions(+), 53 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 0e5d7f11733..8039e8244cf 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -67,7 +67,8 @@ * `tf.distribute`: * * `tf.keras`: - * + * `Optimizer.minimize` can now accept a loss `Tensor` and a `GradientTape` + as an alternative to accepting a `callable` loss. * `tf.function` / AutoGraph: * Added `experimental_follow_type_hints` argument for `tf.function`. When True, the function may use type annotations to optimize the tracing diff --git a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py index 55737a9da9c..f09c8c92e8c 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py +++ b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py @@ -348,7 +348,7 @@ class LossScaleOptimizer(_DelegatingTrackableMixin, optimizer_v2.OptimizerV2): for g in grads ] - def _compute_gradients(self, loss, var_list, grad_loss=None): + def _compute_gradients(self, loss, var_list, grad_loss=None, tape=None): loss = self.get_scaled_loss(loss) grads_and_vars = self._optimizer._compute_gradients(loss, var_list, # pylint: disable=protected-access grad_loss) diff --git a/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py b/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py index 0084f04bdd9..0f25beacc9a 100644 --- a/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py +++ b/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py @@ -702,6 +702,25 @@ class MomentumOptimizerTest(test.TestCase, parameterized.TestCase): self.assertAllClose(self.evaluate(opt_2.lr), (1.0)) self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) + @combinations.generate(combinations.combine(mode=["eager"])) + def testMinimizeLossTensor(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + var0 = variables.Variable([[1.0, 2.0]], dtype=dtype) + var1 = variables.Variable([3.0], dtype=dtype) + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + + tape = backprop.GradientTape() + with tape: + loss = math_ops.matmul(var0, x) + var1 + sgd = gradient_descent.SGD(1.0) + with self.assertRaisesRegex(ValueError, "`tape` is required"): + sgd.minimize(loss, [var0, var1]) + sgd.minimize(loss, [var0, var1], tape=tape) + + self.assertAllCloseAccordingToType([[1.0 - 4.0, 2.0 - 5.0]], + self.evaluate(var0)) + self.assertAllCloseAccordingToType([3.0 - 1.0], self.evaluate(var1)) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py index 71b58739fb2..18d94594542 100644 --- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py +++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py @@ -55,6 +55,12 @@ from tensorflow.python.util import tf_inspect from tensorflow.python.util.tf_export import keras_export +_DEFAULT_VALID_DTYPES = frozenset([ + dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64, + dtypes.complex64, dtypes.complex128 +]) + + def _deduplicate_indexed_slices(values, indices): """Sums `values` associated with any non-unique `indices`. @@ -344,7 +350,7 @@ class OptimizerV2(trackable.Trackable): else: self._distribution_strategy = None - def minimize(self, loss, var_list, grad_loss=None, name=None): + def minimize(self, loss, var_list, grad_loss=None, name=None, tape=None): """Minimize `loss` by updating `var_list`. This method simply computes gradient using `tf.GradientTape` and calls @@ -353,14 +359,19 @@ class OptimizerV2(trackable.Trackable): of using this function. Args: - loss: A callable taking no arguments which returns the value to minimize. + loss: `Tensor` or callable. If a callable, `loss` should take no arguments + and return the value to minimize. If a `Tensor`, the `tape` argument + must be passed. var_list: list or tuple of `Variable` objects to update to minimize `loss`, or a callable returning the list or tuple of `Variable` objects. Use callable when the variable list would otherwise be incomplete before `minimize` since the variables are created at the first time `loss` is called. - grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`. - name: Optional name for the returned operation. + grad_loss: (Optional). A `Tensor` holding the gradient computed for + `loss`. + name: (Optional) str. Name for the returned operation. + tape: (Optional) `tf.GradientTape`. If `loss` is provided as a `Tensor`, + the tape that computed the `loss` must be provided. Returns: An `Operation` that updates the variables in `var_list`. The `iterations` @@ -371,8 +382,7 @@ class OptimizerV2(trackable.Trackable): """ grads_and_vars = self._compute_gradients( - loss, var_list=var_list, grad_loss=grad_loss) - + loss, var_list=var_list, grad_loss=grad_loss, tape=tape) return self.apply_gradients(grads_and_vars, name=name) def _clip_gradients(self, grads): @@ -395,7 +405,7 @@ class OptimizerV2(trackable.Trackable): ] return grads - def _compute_gradients(self, loss, var_list, grad_loss=None): + def _compute_gradients(self, loss, var_list, grad_loss=None, tape=None): """Compute gradients of `loss` for the variables in `var_list`. This is the first part of `minimize()`. It returns a list @@ -405,13 +415,17 @@ class OptimizerV2(trackable.Trackable): given variable. Args: - loss: A callable taking no arguments which returns the value to minimize. + loss: `Tensor` or callable. If a callable, `loss` should take no + arguments and return the value to minimize. If a `Tensor`, the `tape` + argument must be passed. var_list: list or tuple of `Variable` objects to update to minimize `loss`, or a callable returning the list or tuple of `Variable` objects. Use callable when the variable list would otherwise be incomplete before `minimize` and the variables are created at the first time when `loss` is called. grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`. + tape: (Optional) `tf.GradientTape`. If `loss` is provided as a `Tensor`, + the tape that computed the `loss` must be provided. Returns: A list of (gradient, variable) pairs. Variable is always present, but @@ -422,18 +436,28 @@ class OptimizerV2(trackable.Trackable): ValueError: If some arguments are invalid, or var_list is None. """ # TODO(josh11b): Test that we handle weight decay in a reasonable way. - with backprop.GradientTape() as tape: - if not callable(var_list): - tape.watch(var_list) - loss_value = loss() - if callable(var_list): - var_list = var_list() - var_list = nest.flatten(var_list) - with backend.name_scope(self._name + "/gradients"): - grads = tape.gradient(loss_value, var_list, grad_loss) - grads = self._clip_gradients(grads) + if not callable(loss) and tape is None: + raise ValueError("`tape` is required when a `Tensor` loss is passed.") + tape = tape if tape is not None else backprop.GradientTape() + if callable(loss): + with tape: + if not callable(var_list): + tape.watch(var_list) + + if callable(loss): + loss = loss() + + if callable(var_list): + var_list = var_list() + + var_list = nest.flatten(var_list) + with ops.name_scope_v2(self._name + "/gradients"): + grads = tape.gradient(loss, var_list, grad_loss) + # TODO(omalleyt): Move to post-aggregation. + grads = self._clip_gradients(grads) grads_and_vars = list(zip(grads, var_list)) + self._assert_valid_dtypes([ v for g, v in grads_and_vars if g is not None and v.dtype != dtypes.resource @@ -508,6 +532,7 @@ class OptimizerV2(trackable.Trackable): Raises: TypeError: If `grads_and_vars` is malformed. ValueError: If none of the variables have gradients. + RuntimeError: If called in a cross-replica context. """ grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars) var_list = [v for (_, v) in grads_and_vars] @@ -1054,10 +1079,7 @@ class OptimizerV2(trackable.Trackable): Returns: Valid types for loss, variables and gradients. """ - return set([ - dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64, - dtypes.complex64, dtypes.complex128 - ]) + return _DEFAULT_VALID_DTYPES def _call_if_callable(self, param): """Call the function if param is callable.""" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt index 4f586cfc1ef..dbab3abae8e 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt @@ -79,7 +79,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt index 3a4e965007e..af854e98013 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt index 2cbc8b84800..e89cc5cef75 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt index 19e805bd51c..15414d7234f 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt index 9e8914aa432..8b3c429e6b5 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-ftrl.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-ftrl.pbtxt index e3656cef3e7..51ab675db74 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-ftrl.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-ftrl.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-nadam.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-nadam.pbtxt index ed7b6cf0eb8..342c0951bbe 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-nadam.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-nadam.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt index d5f60307028..f007b4b971a 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt @@ -57,7 +57,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt index 7bb57f46a34..d5bf6fa7f47 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt index c14f081f444..df904f72511 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt index 4f586cfc1ef..dbab3abae8e 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt @@ -79,7 +79,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt index 3a4e965007e..af854e98013 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt index 2cbc8b84800..e89cc5cef75 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt index 19e805bd51c..15414d7234f 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt index 9e8914aa432..8b3c429e6b5 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-ftrl.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-ftrl.pbtxt index e3656cef3e7..51ab675db74 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-ftrl.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-ftrl.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-nadam.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-nadam.pbtxt index ed7b6cf0eb8..342c0951bbe 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-nadam.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-nadam.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt index d5f60307028..f007b4b971a 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt @@ -57,7 +57,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt index 7bb57f46a34..d5bf6fa7f47 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt index c14f081f444..df904f72511 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adadelta.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adadelta.pbtxt index a1a7c59dc48..cb3d38246a7 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adadelta.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adadelta.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adagrad.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adagrad.pbtxt index 95579a35612..c7b2bca4b6b 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adagrad.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adagrad.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adam.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adam.pbtxt index 78e5f6af446..209c9fe6620 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adam.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adam.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adamax.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adamax.pbtxt index 29e50bd90dc..12bbb14fb71 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adamax.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adamax.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-ftrl.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-ftrl.pbtxt index 4ba2ee05ef4..1482ed54eb9 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-ftrl.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-ftrl.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-nadam.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-nadam.pbtxt index 419378e2174..2a422fa2340 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-nadam.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-nadam.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-optimizer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-optimizer.pbtxt index a390e41c6ea..e7021e02772 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-optimizer.pbtxt @@ -57,7 +57,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-r-m-sprop.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-r-m-sprop.pbtxt index 588dce59e1c..6543f4023a4 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-r-m-sprop.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-r-m-sprop.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-s-g-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-s-g-d.pbtxt index 5c4bcc4c17d..94ff8dfcdfc 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-s-g-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-s-g-d.pbtxt @@ -58,7 +58,7 @@ tf_class { } member_method { name: "minimize" - argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\', \'tape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "set_weights" From 91433ded8dd744cf772f88bce1ee3bdc259e0875 Mon Sep 17 00:00:00 2001 From: Ayush Dubey Date: Wed, 29 Jul 2020 16:46:46 -0700 Subject: [PATCH 1671/2522] [TF2XLA] Test MLIR bridge support in def_function_xla_jit_test PiperOrigin-RevId: 323896029 Change-Id: I614d2c76dd607e2a6c6fdb5edb55f9349874d546 --- tensorflow/python/eager/BUILD | 12 +- .../python/eager/def_function_xla_jit_test.py | 640 ++++++++---------- 2 files changed, 297 insertions(+), 355 deletions(-) diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index 7336e85b2de..b9c43c2e4e9 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -846,26 +846,18 @@ tf_py_test( ], ) -tf_xla_py_test( +cuda_py_test( name = "def_function_xla_jit_test", srcs = ["def_function_xla_jit_test.py"], - enable_mlir_bridge = True, - enabled_backends = [ - # TODO(b/162438052): Enable the test on TPU. - "cpu", - "gpu", - ], python_version = "PY3", tags = [ "no_mac", - "no_pip", "no_windows", ], - use_xla_device = False, + xla_enabled = True, deps = [ ":backprop", ":def_function", - "//tensorflow/compiler/tests:xla_test", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", "//tensorflow/python:framework_ops", diff --git a/tensorflow/python/eager/def_function_xla_jit_test.py b/tensorflow/python/eager/def_function_xla_jit_test.py index 3e324c90fbc..bd7a6ec2279 100644 --- a/tensorflow/python/eager/def_function_xla_jit_test.py +++ b/tensorflow/python/eager/def_function_xla_jit_test.py @@ -17,7 +17,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.compiler.tests import xla_test from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.eager import def_function @@ -37,45 +36,43 @@ from tensorflow.python.ops import variables from tensorflow.python.platform import test -class DefFunctionTest(xla_test.XLATestCase): +class DefFunctionTest(test.TestCase): def testAutoclusteringWithTfFunction(self): - with ops.device('device:{}:0'.format(self.device)): - @def_function.function(experimental_compile=False) - def outer(a, b, c): - return a * inner(b, c) + c + @def_function.function(experimental_compile=False) + def outer(a, b, c): + return a * inner(b, c) + c - @def_function.function(experimental_compile=True) - def inner(b, c): - return b + c * b + @def_function.function(experimental_compile=True) + def inner(b, c): + return b + c * b - i1 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) - i2 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) - i3 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) + i1 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) + i2 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) + i3 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) - with context.collect_graphs(optimized=True) as graphs: - outer(i1, i2, i3) + with context.collect_graphs(optimized=True) as graphs: + outer(i1, i2, i3) - if test_util.is_xla_enabled(): - self.assertIn('_XlaRun', [n.op for n in graphs[0].node]) - else: - self.assertNotIn('_XlaRun', [n.op for n in graphs[0].node]) + if test_util.is_xla_enabled(): + self.assertIn('_XlaRun', [n.op for n in graphs[0].node]) + else: + self.assertNotIn('_XlaRun', [n.op for n in graphs[0].node]) def testBasic(self): - with ops.device('device:{}:0'.format(self.device)): - def fn(x, a): - return x + a + def fn(x, a): + return x + a - func = def_function.function(fn, experimental_compile=False) - xla_func = def_function.function(fn, experimental_compile=True) + func = def_function.function(fn, experimental_compile=False) + xla_func = def_function.function(fn, experimental_compile=True) - inputs = constant_op.constant([1, 2, 2, 3, 3]) - self.assertAllClose([2, 3, 3, 4, 4], func(inputs, 1)) - if not test.is_built_with_rocm(): - # XLA support is not yet enabled for TF ROCm - self.assertAllClose([2, 3, 3, 4, 4], xla_func(inputs, 1)) + inputs = constant_op.constant([1, 2, 2, 3, 3]) + self.assertAllClose([2, 3, 3, 4, 4], func(inputs, 1)) + if not test.is_built_with_rocm(): + # XLA support is not yet enabled for TF ROCm + self.assertAllClose([2, 3, 3, 4, 4], xla_func(inputs, 1)) def testBasicInt32(self): @@ -90,432 +87,385 @@ class DefFunctionTest(xla_test.XLATestCase): self.assertAllClose([2, 3, 3, 4, 4], xla_func(inputs, 1)) def testDerivative(self): - with ops.device('device:{}:0'.format(self.device)): - if test.is_built_with_rocm(): - return + if test.is_built_with_rocm(): + return - def fn(x, a): - return 2 * x + a + def fn(x, a): + return 2 * x + a - xla_func = def_function.function(fn, experimental_compile=True) + xla_func = def_function.function(fn, experimental_compile=True) - with backprop.GradientTape() as tape: - inputs = constant_op.constant([1., 2., 2., 3., 3.]) - tape.watch(inputs) - outputs = xla_func(inputs, 1) + with backprop.GradientTape() as tape: + inputs = constant_op.constant([1., 2., 2., 3., 3.]) + tape.watch(inputs) + outputs = xla_func(inputs, 1) - self.assertAllClose([2, 2, 2, 2, 2], tape.gradient(outputs, inputs)) + self.assertAllClose([2, 2, 2, 2, 2], tape.gradient(outputs, inputs)) - # pylint: disable=protected-access - (forward, backward) = xla_func.get_concrete_function( - inputs, 1)._delayed_rewrite_functions.forward_backward() + # pylint: disable=protected-access + (forward, backward) = xla_func.get_concrete_function( + inputs, 1)._delayed_rewrite_functions.forward_backward() - # Check that the must-compile attribute gets correctly propagated to the - # created derivatives. - self.assertTrue(backward.function_def.attr['_XlaMustCompile']) - self.assertTrue(forward.definition.attr['_XlaMustCompile']) + # Check that the must-compile attribute gets correctly propagated to the + # created derivatives. + self.assertTrue(backward.function_def.attr['_XlaMustCompile']) + self.assertTrue(forward.definition.attr['_XlaMustCompile']) # Calling function with experimental_compile=True from # experimental_compile=False should compile the inner func. def testNestedCall(self): - with ops.device('device:{}:0'.format(self.device)): - def fn(x, a): - return x + a + def fn(x, a): + return x + a - xla_func = def_function.function(fn, experimental_compile=True) + xla_func = def_function.function(fn, experimental_compile=True) - def fn2(x, a): - return xla_func(x, a) + def fn2(x, a): + return xla_func(x, a) - func = def_function.function(fn2, experimental_compile=False) + func = def_function.function(fn2, experimental_compile=False) - inputs = constant_op.constant([1, 2, 2, 3, 3]) - if not test.is_built_with_rocm(): - # XLA support is not yet enabled for TF ROCm - self.assertAllClose([2, 3, 3, 4, 4], func(inputs, 1)) + inputs = constant_op.constant([1, 2, 2, 3, 3]) + if not test.is_built_with_rocm(): + # XLA support is not yet enabled for TF ROCm + self.assertAllClose([2, 3, 3, 4, 4], func(inputs, 1)) - @test_util.disable_mlir_bridge('TODO(b/162272821): MLIR bridge returns' - ' wrong status type') def testNestedCallUnsupportedOps(self): - with ops.device('device:{}:0'.format(self.device)): - def fn(x): - return array_ops.unique(x).y + def fn(x): + return array_ops.unique(x).y - xla_func = def_function.function(fn, experimental_compile=True) + xla_func = def_function.function(fn, experimental_compile=True) - def fn2(x): - return xla_func(x) + def fn2(x): + return xla_func(x) - func = def_function.function(fn2, experimental_compile=False) - inputs = constant_op.constant([1, 2, 2, 3, 3]) - if not test.is_built_with_rocm(): - with self.assertRaisesRegex(errors.InvalidArgumentError, - 'not compilable'): - func(inputs) - - @test_util.disable_mlir_bridge('TODO(b/162272821): MLIR bridge returns' - ' wrong status type') - def testUnsupportedOps(self): - with ops.device('device:{}:0'.format(self.device)): - - def fn(x): - return array_ops.unique(x).y # Unique is not supported by XLA - - func = def_function.function(fn, experimental_compile=False) - xla_func = def_function.function(fn, experimental_compile=True) - - inputs = constant_op.constant([1, 2, 2, 3, 3]) - self.assertAllClose([1, 2, 3], func(inputs)) + func = def_function.function(fn2, experimental_compile=False) + inputs = constant_op.constant([1, 2, 2, 3, 3]) + if not test.is_built_with_rocm(): with self.assertRaisesRegex(errors.InvalidArgumentError, 'not compilable'): - xla_func(inputs) + func(inputs) + + def testUnsupportedOps(self): + + def fn(x): + return array_ops.unique(x).y # Unique is not supported by XLA + + func = def_function.function(fn, experimental_compile=False) + xla_func = def_function.function(fn, experimental_compile=True) + + inputs = constant_op.constant([1, 2, 2, 3, 3]) + self.assertAllClose([1, 2, 3], func(inputs)) + with self.assertRaisesRegex(errors.InvalidArgumentError, 'not compilable'): + xla_func(inputs) def testFunctionGradient(self): - with ops.device('device:{}:0'.format(self.device)): - v = resource_variable_ops.ResourceVariable(2.0) + v = resource_variable_ops.ResourceVariable(2.0) - def fn(x): - return v * x + def fn(x): + return v * x - func = def_function.function(fn, experimental_compile=False) - xla_func = def_function.function(fn, experimental_compile=True) + func = def_function.function(fn, experimental_compile=False) + xla_func = def_function.function(fn, experimental_compile=True) - def run_and_check(test_func): - x = constant_op.constant(3.0) - with backprop.GradientTape() as tape: - y = test_func(x) - dy = tape.gradient(y, v) + def run_and_check(test_func): + x = constant_op.constant(3.0) + with backprop.GradientTape() as tape: + y = test_func(x) + dy = tape.gradient(y, v) - self.assertAllClose(6.0, y) - self.assertAllClose(3.0, dy) + self.assertAllClose(6.0, y) + self.assertAllClose(3.0, dy) - run_and_check(func) - if not test.is_built_with_rocm(): - # XLA support is not yet enabled for TF ROCm - run_and_check(xla_func) + run_and_check(func) + if not test.is_built_with_rocm(): + # XLA support is not yet enabled for TF ROCm + run_and_check(xla_func) def testControlFlow(self): - with ops.device('device:{}:0'.format(self.device)): + @def_function.function(experimental_compile=True) + def f(x): + assert control_flow_util.GraphOrParentsInXlaContext( + ops.get_default_graph()) + x = ops.convert_to_tensor(x) - @def_function.function(experimental_compile=True) - def f(x): - assert control_flow_util.GraphOrParentsInXlaContext( - ops.get_default_graph()) - x = ops.convert_to_tensor(x) + def body(i, a): + return i + 1, control_flow_ops.cond(i > 2, lambda: a + (x**2), + lambda: a + 3) - def body(i, a): - return i + 1, control_flow_ops.cond(i > 2, lambda: a + (x**2), - lambda: a + 3) + return control_flow_ops.while_loop( + lambda i, *_: i < 10, + body, (constant_op.constant(0), constant_op.constant(3.)), + maximum_iterations=10)[1] - return control_flow_ops.while_loop( - lambda i, *_: i < 10, - body, (constant_op.constant(0), constant_op.constant(3.)), - maximum_iterations=10)[1] + @def_function.function(experimental_compile=True) + def g(x): + x = ops.convert_to_tensor(x) + with backprop.GradientTape() as tape: + tape.watch(x) + y = f(x) + return y, tape.gradient(y, x) - @def_function.function(experimental_compile=True) - def g(x): - x = ops.convert_to_tensor(x) - with backprop.GradientTape() as tape: - tape.watch(x) - y = f(x) - return y, tape.gradient(y, x) - - self.assertAllClose(40.0, f(2.0)) - self.assertAllClose([40.0, 28.0], g(2.0)) + self.assertAllClose(40.0, f(2.0)) + self.assertAllClose([40.0, 28.0], g(2.0)) def testMethodCompilation(self): if test.is_built_with_rocm(): return - with ops.device('device:{}:0'.format(self.device)): + class C(object): - class C(object): + @def_function.function(experimental_compile=True) + def f1(self, x, a): + return x + a - @def_function.function(experimental_compile=True) - def f1(self, x, a): - return x + a + inputs = constant_op.constant([1, 2, 2, 3, 3]) + c = C() + self.assertAllClose([2, 3, 3, 4, 4], c.f1(inputs, 1)) - inputs = constant_op.constant([1, 2, 2, 3, 3]) - c = C() - self.assertAllClose([2, 3, 3, 4, 4], c.f1(inputs, 1)) - - @test_util.disable_mlir_bridge('TODO(b/162272821): MLIR bridge returns ' - ' wrong status type') def testMethodCompilationUnsupportedFunc(self): if test.is_built_with_rocm(): return - with ops.device('device:{}:0'.format(self.device)): + class C(object): - class C(object): + @def_function.function(experimental_compile=True) + def f1(self, x): + return array_ops.unique(x).y - @def_function.function(experimental_compile=True) - def f1(self, x): - return array_ops.unique(x).y - - inputs = constant_op.constant([1, 2, 2, 3, 3]) - c = C() - with self.assertRaisesRegex(errors.InvalidArgumentError, - 'not compilable'): - c.f1(inputs) + inputs = constant_op.constant([1, 2, 2, 3, 3]) + c = C() + with self.assertRaisesRegex(errors.InvalidArgumentError, 'not compilable'): + c.f1(inputs) def testMustBeConstantPropagation(self): - with ops.device('device:{}:0'.format(self.device)): - if test.is_built_with_rocm(): - return + if test.is_built_with_rocm(): + return - @def_function.function(experimental_compile=True) - def f(): - return constant_op.constant([0, 2, 1], dtype=dtypes.int32) + @def_function.function(experimental_compile=True) + def f(): + return constant_op.constant([0, 2, 1], dtype=dtypes.int32) - @def_function.function(experimental_compile=True) - def g(a, b): - return array_ops.transpose(a, b) + @def_function.function(experimental_compile=True) + def g(a, b): + return array_ops.transpose(a, b) - @def_function.function - def z(): - return g(array_ops.ones([3, 4, 3], dtype=dtypes.float32), f()) + @def_function.function + def z(): + return g(array_ops.ones([3, 4, 3], dtype=dtypes.float32), f()) - z() + z() - @test_util.disable_mlir_bridge('TODO(b/162271237): argmax gives different' - ' results in MLIR-based bridge') def testArgMinMax(self): - with ops.device('device:{}:0'.format(self.device)): - @def_function.function(experimental_compile=True) - def argmax(x): - return math_ops.argmax(x) + @def_function.function(experimental_compile=True) + def argmax(x): + return math_ops.argmax(x) - @def_function.function(experimental_compile=True) - def argmin(x): - return math_ops.argmin(x) + @def_function.function(experimental_compile=True) + def argmin(x): + return math_ops.argmin(x) - self.assertAllClose(0, argmax(array_ops.ones([10], dtype=dtypes.float32))) - self.assertAllClose(0, argmax(array_ops.ones([10]))) - self.assertAllClose(0, argmin(array_ops.ones([10], dtype=dtypes.float32))) - self.assertAllClose(0, argmin(array_ops.ones([10]))) + self.assertAllClose(0, argmax(array_ops.ones([10], dtype=dtypes.float32))) + self.assertAllClose(0, argmax(array_ops.ones([10]))) + self.assertAllClose(0, argmin(array_ops.ones([10], dtype=dtypes.float32))) + self.assertAllClose(0, argmin(array_ops.ones([10]))) - @test_util.disable_mlir_bridge('TensorArray support not implemented') def testErrorMessagePassingTensorArray(self): - with ops.device('device:{}:0'.format(self.device)): - @def_function.function(experimental_compile=True) - def f(x): - ta = tensor_array_ops.TensorArray( - dtype=dtypes.float32, size=1, element_shape=[]) - ta = ta.write(0, 2 * x) - y = ta.read(0) - return y + @def_function.function(experimental_compile=True) + def f(x): + ta = tensor_array_ops.TensorArray( + dtype=dtypes.float32, size=1, element_shape=[]) + ta = ta.write(0, 2 * x) + y = ta.read(0) + return y - x = constant_op.constant(3.14) + x = constant_op.constant(3.14) + with backprop.GradientTape() as tape: + tape.watch(x) + with self.assertRaisesRegex(errors.UnimplementedError, + 'TensorList crossing the XLA/TF boundary'): + y = f(x) + tape.gradient(y, x) + + def testTensorListConcatV2(self): + + def f(x): + ta = tensor_array_ops.TensorArray( + dtype=dtypes.float32, size=2, element_shape=[3]) + ta = ta.write(0, 2 * x) + ta = ta.write(1, 3 * x) + return ta.concat() + + compiled_f = def_function.function(experimental_compile=True)(f) + + inputs = constant_op.constant([3.14, 2.68, 7.69]) + + self.assertAllClose([6.28, 5.36, 15.38, 9.42, 8.04, 23.07], f(inputs)) + + self.assertAllClose(compiled_f(inputs), f(inputs)) + + def testTensorListConcatV2Multidim(self): + + def f(x): + ta = tensor_array_ops.TensorArray( + dtype=dtypes.float32, size=2, element_shape=[3, 2]) + ta = ta.write(0, 2 * x) + ta = ta.write(1, 3 * x) + return ta.concat() + + compiled_f = def_function.function(experimental_compile=True)(f) + + inputs = constant_op.constant([[3.14, 21.1], [2.68, 22.2], [7.69, 23.3]]) + self.assertAllClose(f(inputs), compiled_f(inputs)) + + def testTensorListConcatV2Scalars(self): + + def f(x): + ta = tensor_array_ops.TensorArray( + dtype=dtypes.float32, size=2, element_shape=[1]) + ta = ta.write(0, 2 * x) + ta = ta.write(1, 3 * x) + return ta.concat() + + compiled_f = def_function.function(experimental_compile=True)(f) + inputs = constant_op.constant([3.14]) + self.assertAllClose(f(inputs), compiled_f(inputs)) + + def testTensorListConcatGrad(self): + + def f(x): + ta = tensor_array_ops.TensorArray( + dtype=dtypes.float32, size=2, element_shape=[3]) + ta = ta.write(0, 2 * x) + ta = ta.write(1, 3 * x) + return ta.concat() + + def g(): + x = constant_op.constant([3.14, 2.68, 7.69]) with backprop.GradientTape() as tape: tape.watch(x) - with self.assertRaisesRegex(errors.UnimplementedError, - 'TensorList crossing the XLA/TF boundary'): - y = f(x) - tape.gradient(y, x) + y = f(x) + return tape.gradient(y, x) - @test_util.disable_mlir_bridge('TODO(b/162281863): MLIR bridge errors out' - ' lowering TensorListConcatV2') - def testTensorListConcatV2(self): - with ops.device('device:{}:0'.format(self.device)): + compiled_g = def_function.function(experimental_compile=True)(g) - def f(x): - ta = tensor_array_ops.TensorArray( - dtype=dtypes.float32, size=2, element_shape=[3]) - ta = ta.write(0, 2 * x) - ta = ta.write(1, 3 * x) - return ta.concat() + self.assertAllClose([5.0, 5.0, 5.0], g()) + self.assertAllClose(compiled_g(), g()) - compiled_f = def_function.function(experimental_compile=True)(f) - - inputs = constant_op.constant([3.14, 2.68, 7.69]) - - self.assertAllClose([6.28, 5.36, 15.38, 9.42, 8.04, 23.07], f(inputs)) - - self.assertAllClose(compiled_f(inputs), f(inputs)) - - @test_util.disable_mlir_bridge('TODO(b/162281863): MLIR bridge errors out' - ' lowering TensorListConcatV2') - def testTensorListConcatV2Multidim(self): - with ops.device('device:{}:0'.format(self.device)): - - def f(x): - ta = tensor_array_ops.TensorArray( - dtype=dtypes.float32, size=2, element_shape=[3, 2]) - ta = ta.write(0, 2 * x) - ta = ta.write(1, 3 * x) - return ta.concat() - - compiled_f = def_function.function(experimental_compile=True)(f) - - inputs = constant_op.constant([[3.14, 21.1], [2.68, 22.2], [7.69, 23.3]]) - self.assertAllClose(f(inputs), compiled_f(inputs)) - - @test_util.disable_mlir_bridge('TODO(b/162281863): MLIR bridge errors out' - ' lowering TensorListConcatV2') - def testTensorListConcatV2Scalars(self): - with ops.device('device:{}:0'.format(self.device)): - - def f(x): - ta = tensor_array_ops.TensorArray( - dtype=dtypes.float32, size=2, element_shape=[1]) - ta = ta.write(0, 2 * x) - ta = ta.write(1, 3 * x) - return ta.concat() - - compiled_f = def_function.function(experimental_compile=True)(f) - inputs = constant_op.constant([3.14]) - self.assertAllClose(f(inputs), compiled_f(inputs)) - - @test_util.disable_mlir_bridge('TODO(b/162281863): MLIR bridge errors out' - ' lowering TensorListConcatV2') - def testTensorListConcatGrad(self): - with ops.device('device:{}:0'.format(self.device)): - - def f(x): - ta = tensor_array_ops.TensorArray( - dtype=dtypes.float32, size=2, element_shape=[3]) - ta = ta.write(0, 2 * x) - ta = ta.write(1, 3 * x) - return ta.concat() - - def g(): - x = constant_op.constant([3.14, 2.68, 7.69]) - with backprop.GradientTape() as tape: - tape.watch(x) - y = f(x) - return tape.gradient(y, x) - - compiled_g = def_function.function(experimental_compile=True)(g) - - self.assertAllClose([5.0, 5.0, 5.0], g()) - self.assertAllClose(compiled_g(), g()) - - @test_util.disable_mlir_bridge('TODO(b/162281863): MLIR bridge errors out' - ' lowering TensorListConcatV2') def testTensorListConcatGradNestedCompile(self): - with ops.device('device:{}:0'.format(self.device)): - @def_function.function(experimental_compile=True) - def f(x): - ta = tensor_array_ops.TensorArray( - dtype=dtypes.float32, size=2, element_shape=[3]) - ta = ta.write(0, 2 * x) - ta = ta.write(1, 3 * x) - return ta.concat() + @def_function.function(experimental_compile=True) + def f(x): + ta = tensor_array_ops.TensorArray( + dtype=dtypes.float32, size=2, element_shape=[3]) + ta = ta.write(0, 2 * x) + ta = ta.write(1, 3 * x) + return ta.concat() - @def_function.function(experimental_compile=True) - def g(): - x = constant_op.constant([3.14, 2.68, 7.69]) - with backprop.GradientTape() as tape: - tape.watch(x) - y = f(x) - out = tape.gradient(y, x) - return out + @def_function.function(experimental_compile=True) + def g(): + x = constant_op.constant([3.14, 2.68, 7.69]) + with backprop.GradientTape() as tape: + tape.watch(x) + y = f(x) + out = tape.gradient(y, x) + return out - self.assertAllClose([5.0, 5.0, 5.0], g()) + self.assertAllClose([5.0, 5.0, 5.0], g()) def testCumsum(self): - with ops.device('device:{}:0'.format(self.device)): - @def_function.function(experimental_compile=True) - def f(x): - return math_ops.cumsum(x) + @def_function.function(experimental_compile=True) + def f(x): + return math_ops.cumsum(x) - f64_input = constant_op.constant([1.1, 2.2, 3.3], dtype=dtypes.float64) - self.assertAllClose([1.1, 3.3, 6.6], f(f64_input)) + f64_input = constant_op.constant([1.1, 2.2, 3.3], dtype=dtypes.float64) + self.assertAllClose([1.1, 3.3, 6.6], f(f64_input)) def testNoExcessiveRetracing(self): - with ops.device('device:{}:0'.format(self.device)): - inner_retracings = 0 + inner_retracings = 0 - @def_function.function(experimental_compile=True) - def inner(a, b): - nonlocal inner_retracings - inner_retracings += 1 - return a * b + a + @def_function.function(experimental_compile=True) + def inner(a, b): + nonlocal inner_retracings + inner_retracings += 1 + return a * b + a - def outer(a, b): - return inner(a, b) + def outer(a, b): + return inner(a, b) - func_input = random_ops.random_normal([10, 10]) - for _ in range(2): - def_function.function(outer)(func_input, func_input) + func_input = random_ops.random_normal([10, 10]) + for _ in range(2): + def_function.function(outer)(func_input, func_input) - self.assertEqual(inner_retracings, 1) + self.assertEqual(inner_retracings, 1) def testUpdateVariable(self): - with ops.device('device:{}:0'.format(self.device)): - v = variables.Variable(3.1) + v = variables.Variable(3.1) - @def_function.function(experimental_compile=True) - def update_var(a, b): - v.assign_add(a * b) + @def_function.function(experimental_compile=True) + def update_var(a, b): + v.assign_add(a * b) - update_var(constant_op.constant(0.7), constant_op.constant(0.6)) - self.assertAllClose(v, 3.52) + update_var(constant_op.constant(0.7), constant_op.constant(0.6)) + self.assertAllClose(v, 3.52) def testUpdateVariableVector(self): - with ops.device('device:{}:0'.format(self.device)): - v = variables.Variable([3.1, 3.1]) + v = variables.Variable([3.1, 3.1]) - @def_function.function(experimental_compile=True) - def update_var(a, b): - v.assign_add(a * b) + @def_function.function(experimental_compile=True) + def update_var(a, b): + v.assign_add(a * b) - update_var( - constant_op.constant([0.7, 0.7]), constant_op.constant([0.6, 0.6])) - self.assertAllClose(v, [3.52, 3.52]) + update_var( + constant_op.constant([0.7, 0.7]), constant_op.constant([0.6, 0.6])) + self.assertAllClose(v, [3.52, 3.52]) def testUpdateVariableInClass(self): - with ops.device('device:{}:0'.format(self.device)): - class C(object): + class C(object): - @def_function.function(experimental_compile=True) - def update_var(self, a, b): - if not hasattr(self, 'v'): - self.v = variables.Variable(3.1) - self.v.assign_add(a * b) + @def_function.function(experimental_compile=True) + def update_var(self, a, b): + if not hasattr(self, 'v'): + self.v = variables.Variable(3.1) + self.v.assign_add(a * b) - c = C() + c = C() - @def_function.function - def outer(): - c.update_var(constant_op.constant(0.7), constant_op.constant(0.6)) + @def_function.function + def outer(): + c.update_var(constant_op.constant(0.7), constant_op.constant(0.6)) - outer() - self.assertAllClose(c.v, 3.52) + outer() + self.assertAllClose(c.v, 3.52) def testUpdateVariableMultipleOutputs(self): - with ops.device('device:{}:0'.format(self.device)): - v = variables.Variable(3.1) + v = variables.Variable(3.1) - @def_function.function(experimental_compile=True) - def update_var(a, b): - v.assign_add(a * b) - return a * b + v + @def_function.function(experimental_compile=True) + def update_var(a, b): + v.assign_add(a * b) + return a * b + v - out = update_var(constant_op.constant(0.7), constant_op.constant(0.6)) - self.assertAllClose(v, 3.52) - self.assertAllClose(out, 3.94) + out = update_var(constant_op.constant(0.7), constant_op.constant(0.6)) + self.assertAllClose(v, 3.52) + self.assertAllClose(out, 3.94) def testReturnIdentity(self): - with ops.device('device:{}:0'.format(self.device)): - @def_function.function(experimental_compile=True) - def f(a, b): - return (a, b) + @def_function.function(experimental_compile=True) + def f(a, b): + return (a, b) - a = constant_op.constant([0.7]) - b = constant_op.constant([0.6]) + a = constant_op.constant([0.7]) + b = constant_op.constant([0.6]) - f(a, b) + f(a, b) if __name__ == '__main__': From 02fbff33097a51c3c837b3c6499e95cd108f54fb Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Wed, 29 Jul 2020 23:55:11 +0000 Subject: [PATCH 1672/2522] amend to BUILD file --- tensorflow/c/kernels/BUILD | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/kernels/BUILD b/tensorflow/c/kernels/BUILD index af6253f3403..efb81749ec7 100644 --- a/tensorflow/c/kernels/BUILD +++ b/tensorflow/c/kernels/BUILD @@ -34,6 +34,7 @@ tf_kernel_library( "//tensorflow/c/kernels:tensor_shape_utils", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", ], ) @@ -89,7 +90,10 @@ cc_library( srcs = ["tensor_shape_utils.cc"], hdrs = ["tensor_shape_utils.h"], visibility = ["//visibility:private"], - deps = ["//tensorflow/c:tf_tensor"], + deps = [ + "//tensorflow/c:tf_tensor", + "//tensorflow/core:lib", + ], ) tf_cc_test( From 308e0670532ac2e6d171bdaecca127be2e0f1bdd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Jul 2020 16:51:18 -0700 Subject: [PATCH 1673/2522] Make GCS File System's timeouts_ protected so that subclasses can access it in overwritten methods. PiperOrigin-RevId: 323896915 Change-Id: Iaa5d8652ad64e2ce889c24a7a3db789c0ab5aa80 --- tensorflow/core/platform/cloud/gcs_file_system.cc | 4 ++-- tensorflow/core/platform/cloud/gcs_file_system.h | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index 59a4f2558b3..63c601f2244 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -911,7 +911,8 @@ GcsFileSystem::GcsFileSystem( TimeoutConfig timeouts, const std::unordered_set& allowed_locations, std::pair* additional_header, bool compose_append) - : auth_provider_(std::move(auth_provider)), + : timeouts_(timeouts), + auth_provider_(std::move(auth_provider)), http_request_factory_(std::move(http_request_factory)), zone_provider_(std::move(zone_provider)), block_size_(block_size), @@ -924,7 +925,6 @@ GcsFileSystem::GcsFileSystem( kCacheNeverExpire, kBucketLocationCacheMaxEntries)), allowed_locations_(allowed_locations), compose_append_(compose_append), - timeouts_(timeouts), retry_config_(retry_config), additional_header_(additional_header) {} diff --git a/tensorflow/core/platform/cloud/gcs_file_system.h b/tensorflow/core/platform/cloud/gcs_file_system.h index 7bf95170cb1..6f0e9535bfe 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.h +++ b/tensorflow/core/platform/cloud/gcs_file_system.h @@ -331,6 +331,9 @@ class GcsFileSystem : public FileSystem { std::shared_ptr compute_engine_metadata_client_; + // Used by a subclass. + TimeoutConfig timeouts_; + private: // GCS file statistics. struct GcsFileStat { @@ -427,8 +430,6 @@ class GcsFileSystem : public FileSystem { std::unordered_set allowed_locations_; bool compose_append_; - TimeoutConfig timeouts_; - GcsStatsInterface* stats_ = nullptr; // Not owned. /// The initial delay for exponential backoffs when retrying failed calls. From 94b36bc3495ce2ee6a739a5a80d498edafc4fb5c Mon Sep 17 00:00:00 2001 From: Karim Nosir Date: Wed, 29 Jul 2020 16:52:49 -0700 Subject: [PATCH 1674/2522] Update pattern for changing squeeze to reshape to handle other shapes with dynamic dimensions. If shape has unknown rank it will not do the transformation. PiperOrigin-RevId: 323897190 Change-Id: Ifab5cdd76986329927cb0ae1f9618dec338611b0 --- .../compiler/mlir/lite/tests/optimize.mlir | 29 +++++++++++++++++++ .../mlir/lite/transforms/optimize_patterns.td | 7 +++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/tests/optimize.mlir b/tensorflow/compiler/mlir/lite/tests/optimize.mlir index cafa654f0c0..b2e2e57168b 100644 --- a/tensorflow/compiler/mlir/lite/tests/optimize.mlir +++ b/tensorflow/compiler/mlir/lite/tests/optimize.mlir @@ -1028,3 +1028,32 @@ func @squaredDifferenceReluRemoveRelu(%arg0: tensor<1xf32>, %arg1: tensor<1xf32> // CHECK: return %[[RESULT]] } +func @ConvertSqueezeToReshapeWithDynamicDimension(%arg0: tensor) -> tensor { + %0 = "tfl.squeeze"(%arg0) {squeeze_dims = [1]}: (tensor) -> tensor + return %0: tensor + +// CHECK-LABEL: ConvertSqueezeToReshapeWithDynamicDimension +// CHECK: [[CONST:.*]] = constant dense<[-1, 8, 3]> : tensor<3xi32> +// CHECK: %[[RESULT:.*]] = "tfl.reshape"(%arg0, %[[CONST:.*]]) : (tensor, tensor<3xi32>) -> tensor +// CHECK: return %[[RESULT]] +} + +func @ConvertSqueezeToReshapeWithDynamicDimension2(%arg0: tensor) -> tensor<1x8x3xf32> { + %0 = "tfl.squeeze"(%arg0) {squeeze_dims = [0]}: (tensor) -> tensor<1x8x3xf32> + return %0: tensor<1x8x3xf32> + +// CHECK-LABEL: ConvertSqueezeToReshapeWithDynamicDimension2 +// CHECK: [[CONST:.*]] = constant dense<[1, 8, 3]> : tensor<3xi32> +// CHECK: %[[RESULT:.*]] = "tfl.reshape"(%arg0, %[[CONST:.*]]) : (tensor, tensor<3xi32>) -> tensor<1x8x3xf32> +// CHECK: return %[[RESULT]] +} + +func @DontConvertSqueezeToReshape(%arg0: tensor<*xf32>) -> tensor<*xf32> { + %0 = "tfl.squeeze"(%arg0) {squeeze_dims = [0]}: (tensor<*xf32>) -> tensor<*xf32> + return %0: tensor<*xf32> + +// CHECK-LABEL: DontConvertSqueezeToReshape +// CHECK: %[[RESULT:.*]] = "tfl.squeeze"(%arg0) +// CHECK: return %[[RESULT]] +} + diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td index bffcf975ccc..4e9fbcf7a0c 100644 --- a/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td +++ b/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td @@ -404,11 +404,14 @@ foreach ValueOp = [TFL_CeilOp, TFL_ExpOp, TFL_FloorOp, TFL_NegOp, // if called without a ranked tensor it will fail. def GetShape: NativeCodeCall<"GetShape($0)">; -// Convert squeeze to reshape if possible. +// Returns True if the operand type is RankedTensorType. +def HasRankedTensor : Constraint< + CPred<"$0.getType().isa()">>; + def ConvertSqueezeToReshape : Pat< (TFL_SqueezeOp:$squeeze_op $input, $squeeze_dims), (TFL_ReshapeOp $input, (ConstantOp (GetShape $squeeze_op))), - [(AnyStaticShapeTensor $squeeze_op)]>; + [(HasRankedTensor $squeeze_op)]>; // Convert expand_dims to reshape if possible. def ConvertExpandDimsToReshape : Pat< From 832592656fd10a93de70c929b490b9b3064be0d9 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Thu, 30 Jul 2020 00:09:30 +0000 Subject: [PATCH 1675/2522] clean up only --- tensorflow/c/kernels/summary_op.cc | 14 ++++++-------- tensorflow/c/kernels/tensor_shape_utils.cc | 2 +- tensorflow/c/kernels/tensor_shape_utils.h | 1 - tensorflow/c/kernels/tensor_shape_utils_test.cc | 2 +- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index 3ce3a1256a9..48648733f08 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -38,15 +38,15 @@ struct Params { TF_Tensor* tags; TF_Tensor* values; TF_Status* status; - Params(TF_OpKernelContext* ctx) : tags(nullptr), - values(nullptr), - status(nullptr) { + explicit Params(TF_OpKernelContext* ctx) : tags(nullptr), + values(nullptr), + status(nullptr) { status = TF_NewStatus(); TF_GetInput(ctx, 0, &tags, status); if (TF_GetCode(status) == TF_OK) { TF_GetInput(ctx, 1, &values, status); } - }; + } ~Params() { TF_DeleteStatus(status); TF_DeleteTensor(tags); @@ -59,9 +59,7 @@ void* ScalarSummaryOp_Create(TF_OpKernelConstruction* ctx) { return nullptr; } -void ScalarSummaryOp_Delete(void* kernel) { - return; -} +void ScalarSummaryOp_Delete(void* kernel) {} // Helper functions for compute method bool IsSameSize(TF_Tensor* tensor1, TF_Tensor* tensor2); @@ -96,7 +94,7 @@ void ScalarSummaryOp_Compute(void* kernel, TF_OpKernelContext* ctx) { tensorflow::Summary::Value* v = s.add_value(); const tensorflow::tstring& Ttags_i = tags_array[i]; v->set_tag(Ttags_i.data(), Ttags_i.size()); - v->set_simple_value(float(values_array[i])); + v->set_simple_value(static_cast(values_array[i])); } TF_Tensor* summary_tensor = TF_AllocateOutput(ctx, 0, TF_ExpectedOutputDataType(ctx, 0), nullptr, 0, diff --git a/tensorflow/c/kernels/tensor_shape_utils.cc b/tensorflow/c/kernels/tensor_shape_utils.cc index 0720414dea6..40bdb78efd9 100644 --- a/tensorflow/c/kernels/tensor_shape_utils.cc +++ b/tensorflow/c/kernels/tensor_shape_utils.cc @@ -37,4 +37,4 @@ std::string ShapeDebugString(TF_Tensor* tensor) { tensorflow::strings::StrAppend(&s, "]"); return s; } -} // namespace tensorflow \ No newline at end of file +} // namespace tensorflow diff --git a/tensorflow/c/kernels/tensor_shape_utils.h b/tensorflow/c/kernels/tensor_shape_utils.h index 7b48a8939ae..1a9a9df187a 100644 --- a/tensorflow/c/kernels/tensor_shape_utils.h +++ b/tensorflow/c/kernels/tensor_shape_utils.h @@ -35,4 +35,3 @@ std::string ShapeDebugString(TF_Tensor* tensor); } // namespace tensorflow #endif // TENSORFLOW_C_TENSOR_SHAPE_UTILS_H_ - diff --git a/tensorflow/c/kernels/tensor_shape_utils_test.cc b/tensorflow/c/kernels/tensor_shape_utils_test.cc index 23e5940dc7b..35e572ad9bd 100644 --- a/tensorflow/c/kernels/tensor_shape_utils_test.cc +++ b/tensorflow/c/kernels/tensor_shape_utils_test.cc @@ -29,7 +29,7 @@ namespace { // once out of scope. struct TF_TensorWrapper { TF_Tensor* tf_tensor; - TF_TensorWrapper(TF_Tensor* tensor) { + explicit TF_TensorWrapper(TF_Tensor* tensor) { tf_tensor = tensor; } ~TF_TensorWrapper() { From 4f97cdddb63bac7e2c48a484fef4acb0cb7072c1 Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Wed, 29 Jul 2020 17:05:16 -0700 Subject: [PATCH 1676/2522] Do not use pool runner by default It's required to shutdown pool runners, otherwise tsan may complain, since the faulthandler library is not thread safe. When the pool runners are global, it's needed to register atexit hooks to shutdown them. PiperOrigin-RevId: 323899619 Change-Id: I5eede1072f8dcd29b212ec404696310af350d905 --- tensorflow/python/distribute/BUILD | 3 --- tensorflow/python/distribute/combinations.py | 2 +- tensorflow/python/distribute/strategy_combinations.py | 4 ++++ 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index 947ec987de1..c802a5b47e6 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -848,9 +848,6 @@ py_test( name = "combinations_test", srcs = ["combinations_test.py"], python_version = "PY3", - tags = [ - "notsan", # TODO(b/160006974) - ], deps = [ ":combinations", "//tensorflow/python:client_testlib", diff --git a/tensorflow/python/distribute/combinations.py b/tensorflow/python/distribute/combinations.py index a86c751ec79..17bc285b222 100644 --- a/tensorflow/python/distribute/combinations.py +++ b/tensorflow/python/distribute/combinations.py @@ -234,7 +234,7 @@ class NamedDistribution(object): use_cloud_tpu=False, has_chief=False, num_workers=1, - use_pool_runner=True): + use_pool_runner=False): """Initialize NamedDistribution. Args: diff --git a/tensorflow/python/distribute/strategy_combinations.py b/tensorflow/python/distribute/strategy_combinations.py index 7bb3aea5461..b72cdd77a0e 100644 --- a/tensorflow/python/distribute/strategy_combinations.py +++ b/tensorflow/python/distribute/strategy_combinations.py @@ -204,6 +204,7 @@ multi_worker_mirrored_2x1_cpu = combinations.NamedDistribution( _get_multi_worker_mirrored_creator(required_gpus=0), has_chief=True, num_workers=1, + use_pool_runner=True, ) # chief + 1 worker, with 1 GPU each. multi_worker_mirrored_2x1_gpu = combinations.NamedDistribution( @@ -212,6 +213,7 @@ multi_worker_mirrored_2x1_gpu = combinations.NamedDistribution( has_chief=True, num_workers=1, required_gpus=1, + use_pool_runner=True, ) # chief + 1 worker, with 2 GPU each. multi_worker_mirrored_2x2_gpu = combinations.NamedDistribution( @@ -220,6 +222,7 @@ multi_worker_mirrored_2x2_gpu = combinations.NamedDistribution( has_chief=True, num_workers=1, required_gpus=2, + use_pool_runner=True, ) # chief + 3 workers, with CPU. multi_worker_mirrored_4x1_cpu = combinations.NamedDistribution( @@ -227,6 +230,7 @@ multi_worker_mirrored_4x1_cpu = combinations.NamedDistribution( _get_multi_worker_mirrored_creator(required_gpus=0), has_chief=True, num_workers=3, + use_pool_runner=True, ) From cd60aa9b952de211e0d0ce14db744d7f2d997b2b Mon Sep 17 00:00:00 2001 From: Ayush Dubey Date: Wed, 29 Jul 2020 17:09:24 -0700 Subject: [PATCH 1677/2522] Introduce a CollectiveReduceV2 op that accepts keys and group size as input tensors. CollectiveReduce accepts the following inputs as attributes on the op: group_size, group_key, and instance_key. Attributes imply these values are embedded in the NodeDef. The use case motivating this change is a compact representation for SPMD computation. The goal is to change those inputs from the collective op which can be accepted during runtime to tensors rather than attributes. This enables the graph builder to avoid early explosion of the SPMD program. This op is not exposed in the `tf.` namespace for now, and should be considered experimental. PiperOrigin-RevId: 323900361 Change-Id: I5ad92b1a46834906d4d102cce3b4a52c9f4b0d73 --- .../base_api/api_def_CollectiveReduceV2.pbtxt | 5 + .../grappler/optimizers/function_optimizer.cc | 5 +- tensorflow/core/kernels/collective_ops.cc | 184 +++++++++++++++--- tensorflow/core/ops/collective_ops.cc | 13 ++ .../python/framework/auto_control_deps.py | 1 + tensorflow/python/kernel_tests/BUILD | 11 ++ .../kernel_tests/collective_ops_test.py | 83 ++++++++ .../api/golden/v1/tensorflow.raw_ops.pbtxt | 4 + .../api/golden/v2/tensorflow.raw_ops.pbtxt | 4 + 9 files changed, 286 insertions(+), 24 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_CollectiveReduceV2.pbtxt create mode 100644 tensorflow/python/kernel_tests/collective_ops_test.py diff --git a/tensorflow/core/api_def/base_api/api_def_CollectiveReduceV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_CollectiveReduceV2.pbtxt new file mode 100644 index 00000000000..7663ac0afa2 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_CollectiveReduceV2.pbtxt @@ -0,0 +1,5 @@ +op { + graph_op_name: "CollectiveReduceV2" + summary: "Mutually reduces multiple tensors of identical type and shape." + visibility: HIDDEN +} diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index 6312bd0880c..30fc8d71ba7 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -828,8 +828,9 @@ const bool IsExemptFromSideEffectsExecutionValidation(const string& op) { {// LINT.IfChange // Op types that should not run in program order, e.g. because they need // to run asynchronously to avoid deadlock. - "CollectiveGather", "CollectiveReduce", "CollectiveBcastSend", - "CollectiveBcastRecv", "NcclAllReduce", "Send", "Recv", + "CollectiveGather", "CollectiveReduce", "CollectiveReduceV2", + "CollectiveBcastSend", "CollectiveBcastRecv", "NcclAllReduce", "Send", + "Recv", // Legacy random ops. // See details in tensorflow/python/framework/auto_control_deps.py. diff --git a/tensorflow/core/kernels/collective_ops.cc b/tensorflow/core/kernels/collective_ops.cc index 4951d0895c6..409694ec017 100644 --- a/tensorflow/core/kernels/collective_ops.cc +++ b/tensorflow/core/kernels/collective_ops.cc @@ -21,6 +21,30 @@ limitations under the License. namespace tensorflow { namespace { + +static string CollectiveKey(OpKernelContext* ctx, int32 instance_key) { + return strings::StrCat(instance_key, ":", ctx->frame_iter().frame_id, ":", + ctx->frame_iter().iter_id); +} + +static std::unique_ptr BuildOpKernel(OpKernelConstruction* c, + const string& name, + NodeDef* sub_node) { + std::unique_ptr k; + if (name.empty() || name == "Id") return k; + sub_node->set_name(name); + sub_node->set_op(name); + Status status; + k = CreateOpKernel(c->device_type(), c->device(), + c->device()->GetAllocator(AllocatorAttributes()), + *sub_node, c->graph_def_version(), &status); + if (!status.ok()) { + c->CtxFailureWithWarning(errors::Internal( + "Failed to build OpKernel for ", name, " : ", status.error_message())); + } + return k; +} + class CollectiveOpKernel : public AsyncOpKernel { public: explicit CollectiveOpKernel(OpKernelConstruction* c) : AsyncOpKernel(c) {} @@ -28,9 +52,7 @@ class CollectiveOpKernel : public AsyncOpKernel { // A string encoding instance, frame and iter to be handed off to // the implementation for use in generating RecvBuf keys. string GetCollectiveKey(OpKernelContext* c) { - return strings::StrCat(col_params_.instance.instance_key, ":", - c->frame_iter().frame_id, ":", - c->frame_iter().iter_id); + return CollectiveKey(c, col_params_.instance.instance_key); } // Returns false if calling invocation of ComputeAsync should return @@ -205,25 +227,6 @@ class CollectiveReduceOpKernel : public CollectiveOpKernel { col_params_.final_op = BuildOpKernel(c, final_op_name, &sub_node); } - std::unique_ptr BuildOpKernel(OpKernelConstruction* c, - const string& name, - NodeDef* sub_node) { - std::unique_ptr k; - if (name.empty() || name == "Id") return k; - sub_node->set_name(name); - sub_node->set_op(name); - Status status; - k = CreateOpKernel(c->device_type(), c->device(), - c->device()->GetAllocator(AllocatorAttributes()), - *sub_node, c->graph_def_version(), &status); - if (!status.ok()) { - c->CtxFailureWithWarning(errors::Internal("Failed to build OpKernel for ", - name, " : ", - status.error_message())); - } - return k; - } - void ComputeAsync(OpKernelContext* c, DoneCallback done) override { CollectiveExecutor* col_exec = c->collective_executor(); OP_REQUIRES_ASYNC( @@ -430,5 +433,142 @@ REGISTER_KERNEL_BUILDER(Name("CollectiveBcastRecv").Device(DEVICE_CPU), REGISTER_KERNEL_BUILDER(Name("CollectiveBcastRecv").Device(DEVICE_GPU), CollectiveBcastRecvOpKernel); +class CollectiveReduceV2OpKernel : public AsyncOpKernel { + public: + explicit CollectiveReduceV2OpKernel(OpKernelConstruction* c) + : AsyncOpKernel(c) { + col_params_ = std::make_shared(); + OP_REQUIRES_OK(c, c->GetAttr("T", &col_params_->instance.data_type)); + string merge_op_name; + OP_REQUIRES_OK(c, c->GetAttr("merge_op", &merge_op_name)); + string final_op_name; + OP_REQUIRES_OK(c, c->GetAttr("final_op", &final_op_name)); + OP_REQUIRES_OK( + c, c->GetAttr("communication_hint", + &col_params_->instance.impl_details.communication_hint)); + // Prepare OpKernels for reduction and final operations. + // The merge_op takes two inputs + NodeDef sub_node; + sub_node.add_input(c->def().input(0)); + sub_node.add_input(c->def().input(0)); + sub_node.set_device(c->def().device()); + SetAttrValue(col_params_->instance.data_type, + &(*sub_node.mutable_attr())["T"]); + col_params_->merge_op = BuildOpKernel(c, merge_op_name, &sub_node); + col_params_->final_op = BuildOpKernel(c, final_op_name, &sub_node); + + col_params_->name = strings::StrCat(c->def().name(), ": ReduceV2(", + merge_op_name, ",", final_op_name, ")"); + col_params_->group.device_type = c->device_type(); + // Add a default value for subdiv offsets, which is the same as the default + // value in the V1 op's attribute. + col_params_->instance.impl_details.subdiv_offsets.push_back(0); + VLOG(2) << "CollectiveReduceV2 " << this << " name " << col_params_->name + << " communication_hint " + << col_params_->instance.impl_details.communication_hint; + } + + void ComputeAsync(OpKernelContext* c, DoneCallback done) override { + CollectiveExecutor* col_exec = c->collective_executor(); + OP_REQUIRES_ASYNC( + c, col_exec, + errors::Internal( + "Failed to get CollectiveExecutor from OpKernelContext for Op ", + col_params_->name), + done); + const Tensor& input = c->input(0); + const Tensor& group_size = c->input(1); + const Tensor& group_key = c->input(2); + const Tensor& instance_key = c->input(3); + OP_REQUIRES_ASYNC( + c, group_size.dims() == 0, + errors::Internal("Unexpected dimensions on input group_size"), done); + OP_REQUIRES_ASYNC( + c, group_key.dims() == 0, + errors::Internal("Unexpected dimensions on input group_key"), done); + OP_REQUIRES_ASYNC( + c, instance_key.dims() == 0, + errors::Internal("Unexpected dimensions on input instance_key"), done); + + auto col_params = std::make_shared(); + col_params->name = col_params_->name; + col_params->group.device_type = col_params_->group.device_type; + col_params->group.group_size = group_size.unaligned_flat()(0); + col_params->group.group_key = group_key.unaligned_flat()(0); + col_params->instance.type = REDUCTION_COLLECTIVE; + col_params->instance.instance_key = instance_key.unaligned_flat()(0); + col_params->instance.data_type = col_params_->instance.data_type; + col_params->instance.impl_details.communication_hint = + col_params_->instance.impl_details.communication_hint; + col_params->instance.impl_details.timeout_seconds = 0; + col_params->instance.impl_details.subdiv_offsets = + col_params_->instance.impl_details.subdiv_offsets; + col_params->merge_op = std::move(col_params_->merge_op); + col_params->final_op = std::move(col_params_->final_op); + VLOG(1) << "CollectiveReduceV2 group_size " << col_params->group.group_size + << " group_key " << col_params->group.group_key << " instance_key " + << col_params->instance.instance_key; + + // Allocate the output tensor, trying to reuse the input. + Tensor* output = nullptr; + OP_REQUIRES_OK_ASYNC( + c, c->forward_input_or_allocate_output({0}, 0, input.shape(), &output), + done); + col_params->instance.shape = input.shape(); + + // Store the updated params in this OpKernel. + col_params_ = col_params; + + // Resolve the collective params. + // Schedule the `CompleteParamsAsync` call on a work queue that can handle + // blocking work because it's not guaranteed that this call cannot block. + c->collective_executor()->RunClosure([c, done = std::move(done), col_params, + col_exec]() { + VLOG(1) << "CollectiveReduceV2 CompleteParams for collective " + << col_params->name << " device " << c->device()->name() + << " group " << col_params->group.group_key << " instance " + << col_params->instance.instance_key; + col_exec->CompleteParamsAsync( + c->device()->name(), col_params.get(), c->cancellation_manager(), + [c, done = std::move(done), col_params, col_exec](const Status& s) { + if (s.ok()) { + auto actual_done = [c, group_key = col_params->group.group_key, + instance_key = + col_params->instance.instance_key, + done = std::move(done)](const Status& s) { + VLOG(1) << "CollectiveReduceV2 ExecuteAsync done for " + "collective " + << c->op_kernel().name() << " device " + << c->device()->name() << " group " << group_key + << " instance " << instance_key << " status " << s; + OP_REQUIRES_OK_ASYNC(c, s, done); + done(); + }; + VLOG(1) << "CollectiveReduceV2 ExecuteAsync start for " + "collective " + << col_params->name << " device " << c->device()->name() + << " group " << col_params->group.group_key + << " instance " << col_params->instance.instance_key; + col_exec->ExecuteAsync( + c, *col_params, + CollectiveKey(c, col_params->instance.instance_key), + actual_done); + } else { + c->SetStatus(s); + done(); + } + }); + }); + } + + private: + std::shared_ptr col_params_; +}; + +REGISTER_KERNEL_BUILDER(Name("CollectiveReduceV2").Device(DEVICE_CPU), + CollectiveReduceV2OpKernel); +REGISTER_KERNEL_BUILDER(Name("CollectiveReduceV2").Device(DEVICE_GPU), + CollectiveReduceV2OpKernel); + } // namespace } // namespace tensorflow diff --git a/tensorflow/core/ops/collective_ops.cc b/tensorflow/core/ops/collective_ops.cc index 23d09ff61ec..51b266d8f08 100644 --- a/tensorflow/core/ops/collective_ops.cc +++ b/tensorflow/core/ops/collective_ops.cc @@ -104,4 +104,17 @@ REGISTER_OP("CollectiveBcastRecv") .SetIsStateful() .SetShapeFn(shape_inference::ExplicitShape); +REGISTER_OP("CollectiveReduceV2") + .Input("input: T") + .Output("data: T") + .Attr("T: {float, float16, float64, int32, int64}") + .Input("group_size: int32") + .Input("group_key: int32") + .Input("instance_key: int32") + .Attr("merge_op: {'Min', 'Max', 'Mul', 'Add'}") + .Attr("final_op: {'Id', 'Div'}") + .Attr("communication_hint: string = 'auto'") + .SetIsStateful() + .SetShapeFn(shape_inference::UnchangedShape); + } // namespace tensorflow diff --git a/tensorflow/python/framework/auto_control_deps.py b/tensorflow/python/framework/auto_control_deps.py index ccf9877b08d..06a5b6dea33 100644 --- a/tensorflow/python/framework/auto_control_deps.py +++ b/tensorflow/python/framework/auto_control_deps.py @@ -42,6 +42,7 @@ from tensorflow.python.util import tf_decorator ASYNC_STATEFUL_OPS = [ "CollectiveGather", "CollectiveReduce", + "CollectiveReduceV2", "CollectiveBcastSend", "CollectiveBcastRecv", "NcclAllReduce", diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 5bd81aa26ef..10b67dee01c 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -244,6 +244,17 @@ tf_py_test( ], ) +tf_py_test( + name = "collective_ops_test", + size = "small", + srcs = ["collective_ops_test.py"], + deps = [ + "//tensorflow/python:client_testlib", + "//tensorflow/python:collective_ops_gen", + "//tensorflow/python:framework_for_generated_wrappers", + ], +) + tf_py_test( name = "conditional_accumulator_test", size = "small", diff --git a/tensorflow/python/kernel_tests/collective_ops_test.py b/tensorflow/python/kernel_tests/collective_ops_test.py new file mode 100644 index 00000000000..25d93672c7b --- /dev/null +++ b/tensorflow/python/kernel_tests/collective_ops_test.py @@ -0,0 +1,83 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for V2 Collective Operations.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.eager import context +from tensorflow.python.eager import def_function +from tensorflow.python.framework import config +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import gen_collective_ops +from tensorflow.python.platform import test + + +class CollectiveOpsTest(test.TestCase): + + def _setup_context(self, num_cpus=2): + context._reset_context() + cpus = config.list_physical_devices('CPU') + self.assertEqual(len(cpus), 1) + config.set_logical_device_configuration(cpus[0], [ + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration() + ]) + context.ensure_initialized() + + @test_util.run_v2_only + def testReduceV2(self): + self._setup_context() + + @def_function.function + def single_all_reduce(in_value, group_size, group_key, instance_key): + return gen_collective_ops.collective_reduce_v2( + in_value, group_size, group_key, instance_key, merge_op='Add', + final_op='Id', communication_hint='auto') + + @def_function.function + def run_all_reduce_1cpu(): + with ops.device('/device:CPU:0'): + in_value = constant_op.constant([1.]) + group_size = constant_op.constant(1) + group_key = constant_op.constant(1) + instance_key = constant_op.constant(1) + return single_all_reduce(in_value, group_size, group_key, instance_key) + + @def_function.function + def run_all_reduce_2cpus(): + in_value = constant_op.constant([1.]) + group_size = constant_op.constant(2) + group_key = constant_op.constant(2) + instance_key = constant_op.constant(2) + collectives = [] + with ops.device('/device:CPU:0'): + collectives.append(single_all_reduce(in_value, group_size, group_key, + instance_key)) + with ops.device('/device:CPU:1'): + collectives.append(single_all_reduce(in_value, group_size, group_key, + instance_key)) + return collectives + + self.assertAllClose(run_all_reduce_1cpu(), [1.], rtol=1e-5, atol=1e-5) + for result in run_all_reduce_2cpus(): + self.assertAllClose(result, [2.], rtol=1e-5, atol=1e-5) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt index 3c47a392b7e..c30783036c1 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt @@ -768,6 +768,10 @@ tf_module { name: "CollectiveReduce" argspec: "args=[\'input\', \'group_size\', \'group_key\', \'instance_key\', \'merge_op\', \'final_op\', \'subdiv_offsets\', \'wait_for\', \'communication_hint\', \'timeout_seconds\', \'name\'], varargs=None, keywords=None, defaults=[\'[]\', \'auto\', \'0\', \'None\'], " } + member_method { + name: "CollectiveReduceV2" + argspec: "args=[\'input\', \'group_size\', \'group_key\', \'instance_key\', \'merge_op\', \'final_op\', \'communication_hint\', \'name\'], varargs=None, keywords=None, defaults=[\'auto\', \'None\'], " + } member_method { name: "CombinedNonMaxSuppression" argspec: "args=[\'boxes\', \'scores\', \'max_output_size_per_class\', \'max_total_size\', \'iou_threshold\', \'score_threshold\', \'pad_per_class\', \'clip_boxes\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'True\', \'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt index 3c47a392b7e..c30783036c1 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt @@ -768,6 +768,10 @@ tf_module { name: "CollectiveReduce" argspec: "args=[\'input\', \'group_size\', \'group_key\', \'instance_key\', \'merge_op\', \'final_op\', \'subdiv_offsets\', \'wait_for\', \'communication_hint\', \'timeout_seconds\', \'name\'], varargs=None, keywords=None, defaults=[\'[]\', \'auto\', \'0\', \'None\'], " } + member_method { + name: "CollectiveReduceV2" + argspec: "args=[\'input\', \'group_size\', \'group_key\', \'instance_key\', \'merge_op\', \'final_op\', \'communication_hint\', \'name\'], varargs=None, keywords=None, defaults=[\'auto\', \'None\'], " + } member_method { name: "CombinedNonMaxSuppression" argspec: "args=[\'boxes\', \'scores\', \'max_output_size_per_class\', \'max_total_size\', \'iou_threshold\', \'score_threshold\', \'pad_per_class\', \'clip_boxes\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'True\', \'None\'], " From d17ee32fdf55145a7b62e74152eec730dee09e36 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Wed, 29 Jul 2020 17:15:40 -0700 Subject: [PATCH 1678/2522] Added additional structures for describing src/dst tensors in GPUOperation. Need for merge automatization. Fixed all ops. PiperOrigin-RevId: 323901337 Change-Id: I3eb55efab890dc9d177e5dfce7cd67c853541570 --- .../lite/delegates/gpu/cl/kernels/BUILD | 1 + .../lite/delegates/gpu/cl/kernels/add.cc | 36 +-- .../lite/delegates/gpu/cl/kernels/add.h | 9 - .../delegates/gpu/cl/kernels/concat_xy.cc | 49 +-- .../lite/delegates/gpu/cl/kernels/concat_xy.h | 4 +- .../lite/delegates/gpu/cl/kernels/concat_z.cc | 53 ++-- .../lite/delegates/gpu/cl/kernels/concat_z.h | 4 +- .../lite/delegates/gpu/cl/kernels/conv_3d.cc | 287 +++++++++--------- .../lite/delegates/gpu/cl/kernels/conv_3d.h | 3 + .../gpu/cl/kernels/conv_buffer_1x1.cc | 217 +++++++------ .../gpu/cl/kernels/conv_buffer_1x1.h | 5 +- .../gpu/cl/kernels/conv_constants.cc | 130 ++++---- .../delegates/gpu/cl/kernels/conv_constants.h | 4 + .../delegates/gpu/cl/kernels/conv_powervr.cc | 74 +++-- .../delegates/gpu/cl/kernels/conv_powervr.h | 4 + .../delegates/gpu/cl/kernels/conv_texture.cc | 170 +++++------ .../delegates/gpu/cl/kernels/conv_texture.h | 7 + .../gpu/cl/kernels/conv_weights_converter.cc | 56 ++-- .../gpu/cl/kernels/conv_weights_converter.h | 4 + .../gpu/cl/kernels/convolution_transposed.cc | 135 ++++---- .../gpu/cl/kernels/convolution_transposed.h | 5 + .../cl/kernels/convolution_transposed_3d.cc | 115 ++++--- .../cl/kernels/convolution_transposed_3d.h | 5 + .../cl/kernels/convolution_transposed_3x3.cc | 103 +++---- .../cl/kernels/convolution_transposed_3x3.h | 5 + .../convolution_transposed_3x3_thin.cc | 70 ++--- .../kernels/convolution_transposed_3x3_thin.h | 5 +- .../cl/kernels/convolution_transposed_4x4.cc | 89 +++--- .../cl/kernels/convolution_transposed_4x4.h | 4 + .../cl/kernels/convolution_transposed_thin.cc | 76 ++--- .../cl/kernels/convolution_transposed_thin.h | 4 +- .../gpu/cl/kernels/depthwise_conv.cc | 154 +++++----- .../delegates/gpu/cl/kernels/depthwise_conv.h | 6 + .../gpu/cl/kernels/depthwise_conv_3x3.cc | 73 +++-- .../gpu/cl/kernels/depthwise_conv_3x3.h | 5 + .../delegates/gpu/cl/kernels/elementwise.cc | 36 +-- .../delegates/gpu/cl/kernels/elementwise.h | 12 - .../gpu/cl/kernels/fully_connected.cc | 48 ++- .../gpu/cl/kernels/fully_connected.h | 4 +- .../delegates/gpu/cl/kernels/gpu_operation.cc | 110 +++++-- .../delegates/gpu/cl/kernels/gpu_operation.h | 33 +- .../lite/delegates/gpu/cl/kernels/lstm.cc | 54 ++-- .../lite/delegates/gpu/cl/kernels/lstm.h | 4 +- .../delegates/gpu/cl/kernels/max_unpooling.cc | 116 ++++--- .../delegates/gpu/cl/kernels/max_unpooling.h | 3 + .../lite/delegates/gpu/cl/kernels/mean.cc | 40 +-- .../lite/delegates/gpu/cl/kernels/mean.h | 4 + .../lite/delegates/gpu/cl/kernels/padding.cc | 54 ++-- .../lite/delegates/gpu/cl/kernels/padding.h | 3 + .../lite/delegates/gpu/cl/kernels/pooling.cc | 174 +++++------ .../lite/delegates/gpu/cl/kernels/pooling.h | 7 + .../lite/delegates/gpu/cl/kernels/reshape.cc | 38 +-- .../lite/delegates/gpu/cl/kernels/reshape.h | 4 +- .../delegates/gpu/cl/kernels/reshapex4.cc | 40 +-- .../lite/delegates/gpu/cl/kernels/reshapex4.h | 4 +- .../lite/delegates/gpu/cl/kernels/resize.cc | 187 ++++++------ .../lite/delegates/gpu/cl/kernels/resize.h | 7 + .../lite/delegates/gpu/cl/kernels/softmax.cc | 46 ++- .../lite/delegates/gpu/cl/kernels/softmax.h | 4 +- .../delegates/gpu/cl/kernels/softmax1x1.cc | 44 ++- .../delegates/gpu/cl/kernels/softmax1x1.h | 3 + .../gpu/cl/kernels/space_to_depth.cc | 41 +-- .../delegates/gpu/cl/kernels/space_to_depth.h | 2 + .../special/depthwise_conv_plus_1x1_conv.cc | 208 ++++++------- .../special/depthwise_conv_plus_1x1_conv.h | 5 +- .../delegates/gpu/cl/kernels/strided_slice.cc | 132 ++++---- .../delegates/gpu/cl/kernels/strided_slice.h | 2 + .../delegates/gpu/cl/kernels/transpose.cc | 45 +-- .../lite/delegates/gpu/cl/kernels/transpose.h | 3 +- .../lite/delegates/gpu/cl/kernels/winograd.cc | 279 ++++++++--------- .../lite/delegates/gpu/cl/kernels/winograd.h | 4 + 71 files changed, 1803 insertions(+), 1968 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD index 1f81a34604a..727214b1faa 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD @@ -731,6 +731,7 @@ cc_library( ":util", ":work_group_picking", "//tensorflow/lite/delegates/gpu/cl:arguments", + "//tensorflow/lite/delegates/gpu/cl:buffer", "//tensorflow/lite/delegates/gpu/cl:cl_context", "//tensorflow/lite/delegates/gpu/cl:cl_device", "//tensorflow/lite/delegates/gpu/cl:precision", diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/add.cc b/tensorflow/lite/delegates/gpu/cl/kernels/add.cc index 858d188945f..1d09e39b83b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/add.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/add.cc @@ -27,23 +27,20 @@ namespace cl { Add::Add(const OperationDef& definition, const std::vector& channels, int dst_channels) - : ElementwiseOperation(definition), - dst_depth_(DivideRoundUp(dst_channels, 4)) { - src_depthes_.resize(channels.size()); - for (int i = 0; i < channels.size(); ++i) { - src_depthes_[i] = DivideRoundUp(channels[i], 4); - } - if (src_depthes_[0] < dst_depth_) { + : ElementwiseOperation(definition) { + int dst_depth = DivideRoundUp(dst_channels, 4); + int src0_depth = DivideRoundUp(channels[0], 4); + linkable_ = dst_depth == src0_depth; + if (src0_depth < dst_depth) { check_src_channels_size_ = true; } for (int i = 1; i < definition_.src_tensors.size(); ++i) { const std::string tensor_name = absl::StrCat("src_data_", i); - auto src_desc = - absl::make_unique(definition_.src_tensors[i]); + auto src_desc = definition_.src_tensors[i]; if (definition_.IsBatchSupported()) { - src_desc->SetStateVar("BatchedWidth", "true"); + src_desc.SetStateVar("BatchedWidth", "true"); } - args_.AddObjectRef(tensor_name, AccessType::READ, std::move(src_desc)); + AddSrcTensor(tensor_name, src_desc); code_ += "if (S_COORD < args." + tensor_name + ".Slices()) {\n"; code_ += " in_out_value += args." + tensor_name + ".Read(X_COORD, Y_COORD, S_COORD);\n"; @@ -51,30 +48,15 @@ Add::Add(const OperationDef& definition, const std::vector& channels, } } -Add::Add(Add&& operation) - : ElementwiseOperation(std::move(operation)), - link_index_(operation.link_index_), - src_depthes_(std::move(operation.src_depthes_)), - dst_depth_(operation.dst_depth_) {} +Add::Add(Add&& operation) : ElementwiseOperation(std::move(operation)) {} Add& Add::operator=(Add&& operation) { if (this != &operation) { - link_index_ = operation.link_index_; - src_depthes_ = std::move(operation.src_depthes_); - dst_depth_ = operation.dst_depth_; ElementwiseOperation::operator=(std::move(operation)); } return *this; } -absl::Status Add::SetArgs(const std::string& unique_postfix, Arguments* args) { - for (int i = 1; i < definition_.src_tensors.size(); ++i) { - std::string tensor_name = absl::StrCat("src_data_", i, unique_postfix); - RETURN_IF_ERROR(args->SetObjectRef(tensor_name, src_[i])); - } - return absl::OkStatus(); -} - Add CreateAdd(const OperationDef& definition, const std::vector& channels, int dst_channels) { Add operation(definition, channels, dst_channels); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/add.h b/tensorflow/lite/delegates/gpu/cl/kernels/add.h index f20425c48dd..81b2fed116f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/add.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/add.h @@ -41,15 +41,6 @@ class Add : public ElementwiseOperation { Add& operator=(Add&& operation); Add(const Add&) = delete; Add& operator=(const Add&) = delete; - - absl::Status SetArgs(const std::string& unique_postfix, - Arguments* args) override; - bool IsLinkable() const override { return dst_depth_ == src_depthes_[0]; } - - private: - int link_index_; - std::vector src_depthes_; - int dst_depth_; }; Add CreateAdd(const OperationDef& definition, const std::vector& channels, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.cc b/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.cc index 9feb3ace50e..a37ca76a7fe 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.cc @@ -27,20 +27,25 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { +ConcatXY::ConcatXY(ConcatXY&& operation) + : GPUOperation(std::move(operation)), attr_(operation.attr_) {} -std::string GetConcatKernelCode(const OperationDef& op_def, - const ConcatAttributes& attr, Arguments* args) { +ConcatXY& ConcatXY::operator=(ConcatXY&& operation) { + if (this != &operation) { + attr_ = operation.attr_; + GPUOperation::operator=(std::move(operation)); + } + return *this; +} + +std::string ConcatXY::GetConcatKernelCode(const OperationDef& op_def, + const ConcatAttributes& attr) { std::vector tensor_names(op_def.src_tensors.size()); for (int i = 0; i < op_def.src_tensors.size(); ++i) { tensor_names[i] = "src_tensor_" + std::to_string(i); - args->AddObjectRef( - tensor_names[i], AccessType::READ, - absl::make_unique(op_def.src_tensors[0])); + AddSrcTensor(tensor_names[i], op_def.src_tensors[i]); } - args->AddObjectRef( - "dst_tensor", AccessType::WRITE, - absl::make_unique(op_def.dst_tensors[0])); + AddDstTensor("dst_tensor", op_def.dst_tensors[0]); std::map axis_to_selector = { {Axis::WIDTH, "Width"}, {Axis::HEIGHT, "Height"}, @@ -120,24 +125,8 @@ std::string GetConcatKernelCode(const OperationDef& op_def, return c; } -} // namespace - -ConcatXY::ConcatXY(ConcatXY&& operation) - : GPUOperation(std::move(operation)), - attr_(operation.attr_), - tensors_count_(operation.tensors_count_) {} - -ConcatXY& ConcatXY::operator=(ConcatXY&& operation) { - if (this != &operation) { - attr_ = operation.attr_; - tensors_count_ = operation.tensors_count_; - GPUOperation::operator=(std::move(operation)); - } - return *this; -} - absl::Status ConcatXY::Compile(const CreationContext& creation_context) { - std::string code = GetConcatKernelCode(definition_, attr_, &args_); + std::string code = GetConcatKernelCode(definition_, attr_); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -149,14 +138,6 @@ absl::Status ConcatXY::Compile(const CreationContext& creation_context) { *creation_context.device, &kernel_); } -absl::Status ConcatXY::BindArguments() { - for (int i = 0; i < definition_.src_tensors.size(); ++i) { - RETURN_IF_ERROR( - args_.SetObjectRef("src_tensor_" + std::to_string(i), src_[i])); - } - return args_.SetObjectRef("dst_tensor", dst_[0]); -} - int3 ConcatXY::GetGridSize() const { const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); const int grid_y = dst_[0]->Height() * dst_[0]->Depth(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.h b/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.h index 011d8fb191f..42e4c1552a5 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.h @@ -32,7 +32,6 @@ class ConcatXY : public GPUOperation { int tensors_count) : GPUOperation(definition), attr_(attr), tensors_count_(tensors_count) {} absl::Status Compile(const CreationContext& creation_context) override; - absl::Status BindArguments() override; int3 GetGridSize() const override; // Move only @@ -42,6 +41,9 @@ class ConcatXY : public GPUOperation { ConcatXY& operator=(const ConcatXY&) = delete; private: + std::string GetConcatKernelCode(const OperationDef& op_def, + const ConcatAttributes& attr); + ConcatAttributes attr_; int tensors_count_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.cc b/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.cc index 7878919d6a0..00d46325d17 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.cc @@ -36,23 +36,35 @@ bool IsAllChannelsX4(const std::vector& channels) { return true; } -std::string GetConcatKernelCode(const OperationDef& op_def, - const std::vector& channels, - Arguments* args) { +} // namespace + +ConcatZ::ConcatZ(ConcatZ&& kernel) + : GPUOperation(std::move(kernel)), channels_(std::move(kernel.channels_)) {} + +ConcatZ& ConcatZ::operator=(ConcatZ&& kernel) { + if (this != &kernel) { + channels_ = std::move(kernel.channels_); + GPUOperation::operator=(std::move(kernel)); + } + return *this; +} + +std::string ConcatZ::GetConcatKernelCode(const OperationDef& op_def, + const std::vector& channels) { std::vector tensor_names(op_def.src_tensors.size()); for (int i = 0; i < op_def.src_tensors.size(); ++i) { tensor_names[i] = "src_tensor_" + std::to_string(i); - auto src_desc = absl::make_unique(op_def.src_tensors[i]); + auto src_desc = op_def.src_tensors[i]; if (op_def.IsBatchSupported()) { - src_desc->SetStateVar("BatchedWidth", "true"); + src_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef(tensor_names[i], AccessType::READ, std::move(src_desc)); + AddSrcTensor(tensor_names[i], src_desc); } - auto dst_desc = absl::make_unique(op_def.dst_tensors[0]); + auto dst_desc = op_def.dst_tensors[0]; if (op_def.IsBatchSupported()) { - dst_desc->SetStateVar("BatchedWidth", "true"); + dst_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); + AddDstTensor("dst_tensor", dst_desc); std::string c = GetCommonDefines(op_def.precision); c += "__kernel void main_function(\n"; @@ -131,21 +143,8 @@ std::string GetConcatKernelCode(const OperationDef& op_def, return c; } -} // namespace - -ConcatZ::ConcatZ(ConcatZ&& kernel) - : GPUOperation(std::move(kernel)), channels_(std::move(kernel.channels_)) {} - -ConcatZ& ConcatZ::operator=(ConcatZ&& kernel) { - if (this != &kernel) { - channels_ = std::move(kernel.channels_); - GPUOperation::operator=(std::move(kernel)); - } - return *this; -} - absl::Status ConcatZ::Compile(const CreationContext& creation_context) { - std::string code = GetConcatKernelCode(definition_, channels_, &args_); + std::string code = GetConcatKernelCode(definition_, channels_); std::vector options; if (creation_context.device->IsPowerVR() && definition_.precision == CalculationsPrecision::F32 && @@ -171,14 +170,6 @@ absl::Status ConcatZ::Compile(const CreationContext& creation_context) { *creation_context.device, &kernel_); } -absl::Status ConcatZ::BindArguments() { - for (int i = 0; i < definition_.src_tensors.size(); ++i) { - RETURN_IF_ERROR( - args_.SetObjectRef("src_tensor_" + std::to_string(i), src_[i])); - } - return args_.SetObjectRef("dst_tensor", dst_[0]); -} - int3 ConcatZ::GetGridSize() const { const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); const int grid_y = dst_[0]->Height(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h b/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h index 496b9437706..a813cd5236b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h @@ -33,7 +33,6 @@ class ConcatZ : public GPUOperation { ConcatZ(const OperationDef& definition, const std::vector& channels) : GPUOperation(definition), channels_(channels) {} absl::Status Compile(const CreationContext& creation_context) override; - absl::Status BindArguments() override; int3 GetGridSize() const override; // Move only @@ -43,6 +42,9 @@ class ConcatZ : public GPUOperation { ConcatZ& operator=(const ConcatZ&) = delete; private: + std::string GetConcatKernelCode(const OperationDef& op_def, + const std::vector& channels); + std::vector channels_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc index 788b56c73cc..8032c41ed1b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc @@ -30,124 +30,6 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { - -Conv3D::Conv3D(const OperationDef& definition, - const Convolution3DAttributes& attr, const CLDevice& device) - : GPUOperation(definition), - stride_(attr.strides.w, attr.strides.h, attr.strides.d), - padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, - -attr.padding.prepended.d), - kernel_size_(attr.weights.shape.w, attr.weights.shape.h, - attr.weights.shape.d), - dilation_(attr.dilations.w, attr.dilations.h, attr.dilations.d), - conv_params_(GuessBestParams(device, definition, attr)) {} - -Conv3D::Conv3D(Conv3D&& operation) - : GPUOperation(std::move(operation)), - stride_(operation.stride_), - padding_(operation.padding_), - kernel_size_(operation.kernel_size_), - dilation_(operation.dilation_), - conv_params_(operation.conv_params_) {} - -Conv3D& Conv3D::operator=(Conv3D&& operation) { - if (this != &operation) { - std::swap(stride_, operation.stride_); - std::swap(padding_, operation.padding_); - std::swap(kernel_size_, operation.kernel_size_); - std::swap(dilation_, operation.dilation_); - std::swap(conv_params_, operation.conv_params_); - GPUOperation::operator=(std::move(operation)); - } - return *this; -} - -absl::Status Conv3D::Compile(const CreationContext& creation_context) { - const bool stride_correction = - definition_.IsBatchSupported() && stride_.x != 1; - std::string code = - GenerateConv3D(definition_, stride_correction, conv_params_, &args_); - work_group_size_ = conv_params_.work_group_size; - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - - std::vector options; - if (definition_.precision == CalculationsPrecision::F16 && - creation_context.device->IsPowerVR()) { - options.push_back(CompilerOptions::POWERVR_FP16); - } - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", options, *creation_context.context, - *creation_context.device, &kernel_); -} - -absl::Status Conv3D::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - if (!conv_params_.x_kernel_is_1) { - RETURN_IF_ERROR(args_.SetInt("stride_x", stride_.x)); - RETURN_IF_ERROR(args_.SetInt("padding_x", padding_.x * src_[0]->Batch())); - RETURN_IF_ERROR(args_.SetInt("kernel_size_x", kernel_size_.x)); - RETURN_IF_ERROR(args_.SetInt("dilation_x", dilation_.x * src_[0]->Batch())); - } - if (!conv_params_.y_kernel_is_1) { - RETURN_IF_ERROR(args_.SetInt("stride_y", stride_.y)); - RETURN_IF_ERROR(args_.SetInt("padding_y", padding_.y)); - RETURN_IF_ERROR(args_.SetInt("kernel_size_y", kernel_size_.y)); - RETURN_IF_ERROR(args_.SetInt("dilation_y", dilation_.y)); - } - if (!conv_params_.z_kernel_is_1) { - RETURN_IF_ERROR(args_.SetInt("stride_z", stride_.z)); - RETURN_IF_ERROR(args_.SetInt("padding_z", padding_.z)); - RETURN_IF_ERROR(args_.SetInt("kernel_size_z", kernel_size_.z)); - RETURN_IF_ERROR(args_.SetInt("dilation_z", dilation_.z)); - } - return args_.SetInt("grid_size_s", DivideRoundUp(dst_[0]->Slices(), - conv_params_.block_size.w)); -} - -int3 Conv3D::GetGridSize() const { - const int grid_x = DivideRoundUp(dst_[0]->Width() * dst_[0]->Batch(), - conv_params_.block_size.x); - const int grid_y = - DivideRoundUp(dst_[0]->Height(), conv_params_.block_size.y); - const int grid_z = - DivideRoundUp(dst_[0]->Slices(), conv_params_.block_size.w) * - DivideRoundUp(dst_[0]->Depth(), conv_params_.block_size.z); - int3 wg; - wg.x = DivideRoundUp(grid_x, conv_params_.work_group_size.x); - wg.y = DivideRoundUp(grid_y, conv_params_.work_group_size.y); - wg.z = DivideRoundUp(grid_z, conv_params_.work_group_size.z); - return int3(wg[conv_params_.work_group_launch_order[0]] * - conv_params_.work_group_size.x, - wg[conv_params_.work_group_launch_order[1]] * - conv_params_.work_group_size.y, - wg[conv_params_.work_group_launch_order[2]] * - conv_params_.work_group_size.z); -} - -absl::Status Conv3D::Tune(const TuningParameters& params) { - if (conv_params_.weights_upload_type == - WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP || - conv_params_.weights_upload_type == - WeightsUploadType::LOCAL_MEM_BY_THREADS) { - return absl::OkStatus(); - } - if (conv_params_.work_group_launch_order[0] == 0 && - conv_params_.work_group_launch_order[1] == 1 && - conv_params_.work_group_launch_order[2] == 2) { - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); - RETURN_IF_ERROR(GetBestWorkGroupConv(params, kernel_, grid_size_, - &conv_params_.work_group_size)); - work_group_size_ = conv_params_.work_group_size; - } - return absl::OkStatus(); -} - namespace { std::string GenerateUploadByThreads(const std::string& local_ptr_name, const std::string& global_ptr_name, @@ -284,39 +166,156 @@ std::string GenerateConv(CalculationsPrecision precision, } } // namespace -std::string GenerateConv3D(const OperationDef& op_def, bool stride_correction, - const Conv3D::ConvParams& conv_params, - Arguments* args) { - auto src_desc = absl::make_unique(op_def.src_tensors[0]); - src_desc->SetTextureAddressMode(TextureAddressMode::ZERO); +Conv3D::Conv3D(const OperationDef& definition, + const Convolution3DAttributes& attr, const CLDevice& device) + : GPUOperation(definition), + stride_(attr.strides.w, attr.strides.h, attr.strides.d), + padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, + -attr.padding.prepended.d), + kernel_size_(attr.weights.shape.w, attr.weights.shape.h, + attr.weights.shape.d), + dilation_(attr.dilations.w, attr.dilations.h, attr.dilations.d), + conv_params_(GuessBestParams(device, definition, attr)) {} + +Conv3D::Conv3D(Conv3D&& operation) + : GPUOperation(std::move(operation)), + stride_(operation.stride_), + padding_(operation.padding_), + kernel_size_(operation.kernel_size_), + dilation_(operation.dilation_), + conv_params_(operation.conv_params_) {} + +Conv3D& Conv3D::operator=(Conv3D&& operation) { + if (this != &operation) { + std::swap(stride_, operation.stride_); + std::swap(padding_, operation.padding_); + std::swap(kernel_size_, operation.kernel_size_); + std::swap(dilation_, operation.dilation_); + std::swap(conv_params_, operation.conv_params_); + GPUOperation::operator=(std::move(operation)); + } + return *this; +} + +absl::Status Conv3D::Compile(const CreationContext& creation_context) { + const bool stride_correction = + definition_.IsBatchSupported() && stride_.x != 1; + std::string code = + GenerateConv3D(definition_, stride_correction, conv_params_); + work_group_size_ = conv_params_.work_group_size; + std::string element_wise_code; + RETURN_IF_ERROR( + MergeOperations(linked_operations_, &args_, &element_wise_code)); + RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), + {{"dst_tensor", element_wise_code}}, + &code)); + + std::vector options; + if (definition_.precision == CalculationsPrecision::F16 && + creation_context.device->IsPowerVR()) { + options.push_back(CompilerOptions::POWERVR_FP16); + } + return creation_context.cache->GetOrCreateCLKernel( + code, "main_function", options, *creation_context.context, + *creation_context.device, &kernel_); +} + +absl::Status Conv3D::BindArguments() { + if (!conv_params_.x_kernel_is_1) { + RETURN_IF_ERROR(args_.SetInt("stride_x", stride_.x)); + RETURN_IF_ERROR(args_.SetInt("padding_x", padding_.x * src_[0]->Batch())); + RETURN_IF_ERROR(args_.SetInt("kernel_size_x", kernel_size_.x)); + RETURN_IF_ERROR(args_.SetInt("dilation_x", dilation_.x * src_[0]->Batch())); + } + if (!conv_params_.y_kernel_is_1) { + RETURN_IF_ERROR(args_.SetInt("stride_y", stride_.y)); + RETURN_IF_ERROR(args_.SetInt("padding_y", padding_.y)); + RETURN_IF_ERROR(args_.SetInt("kernel_size_y", kernel_size_.y)); + RETURN_IF_ERROR(args_.SetInt("dilation_y", dilation_.y)); + } + if (!conv_params_.z_kernel_is_1) { + RETURN_IF_ERROR(args_.SetInt("stride_z", stride_.z)); + RETURN_IF_ERROR(args_.SetInt("padding_z", padding_.z)); + RETURN_IF_ERROR(args_.SetInt("kernel_size_z", kernel_size_.z)); + RETURN_IF_ERROR(args_.SetInt("dilation_z", dilation_.z)); + } + return args_.SetInt("grid_size_s", DivideRoundUp(dst_[0]->Slices(), + conv_params_.block_size.w)); +} + +int3 Conv3D::GetGridSize() const { + const int grid_x = DivideRoundUp(dst_[0]->Width() * dst_[0]->Batch(), + conv_params_.block_size.x); + const int grid_y = + DivideRoundUp(dst_[0]->Height(), conv_params_.block_size.y); + const int grid_z = + DivideRoundUp(dst_[0]->Slices(), conv_params_.block_size.w) * + DivideRoundUp(dst_[0]->Depth(), conv_params_.block_size.z); + int3 wg; + wg.x = DivideRoundUp(grid_x, conv_params_.work_group_size.x); + wg.y = DivideRoundUp(grid_y, conv_params_.work_group_size.y); + wg.z = DivideRoundUp(grid_z, conv_params_.work_group_size.z); + return int3(wg[conv_params_.work_group_launch_order[0]] * + conv_params_.work_group_size.x, + wg[conv_params_.work_group_launch_order[1]] * + conv_params_.work_group_size.y, + wg[conv_params_.work_group_launch_order[2]] * + conv_params_.work_group_size.z); +} + +absl::Status Conv3D::Tune(const TuningParameters& params) { + if (conv_params_.weights_upload_type == + WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP || + conv_params_.weights_upload_type == + WeightsUploadType::LOCAL_MEM_BY_THREADS) { + return absl::OkStatus(); + } + if (conv_params_.work_group_launch_order[0] == 0 && + conv_params_.work_group_launch_order[1] == 1 && + conv_params_.work_group_launch_order[2] == 2) { + RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); + RETURN_IF_ERROR(GetBestWorkGroupConv(params, kernel_, grid_size_, + &conv_params_.work_group_size)); + work_group_size_ = conv_params_.work_group_size; + } + return absl::OkStatus(); +} + +std::string Conv3D::GenerateConv3D(const OperationDef& op_def, + bool stride_correction, + const Conv3D::ConvParams& conv_params) { + auto src_desc = op_def.src_tensors[0]; + src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); if (op_def.IsBatchSupported()) { - src_desc->SetStateVar("BatchedWidth", "true"); + src_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - auto dst_desc = absl::make_unique(op_def.dst_tensors[0]); + AddSrcTensor("src_tensor", src_desc); + + auto dst_desc = op_def.dst_tensors[0]; if (op_def.IsBatchSupported()) { - dst_desc->SetStateVar("BatchedWidth", "true"); + dst_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); - if (!conv_params.x_kernel_is_1) { - args->AddInt("stride_x"); - args->AddInt("padding_x"); - args->AddInt("kernel_size_x"); - args->AddInt("dilation_x"); + AddDstTensor("dst_tensor", dst_desc); + + if (!conv_params_.x_kernel_is_1) { + args_.AddInt("stride_x"); + args_.AddInt("padding_x"); + args_.AddInt("kernel_size_x"); + args_.AddInt("dilation_x"); } - if (!conv_params.y_kernel_is_1) { - args->AddInt("stride_y"); - args->AddInt("padding_y"); - args->AddInt("kernel_size_y"); - args->AddInt("dilation_y"); + if (!conv_params_.y_kernel_is_1) { + args_.AddInt("stride_y"); + args_.AddInt("padding_y"); + args_.AddInt("kernel_size_y"); + args_.AddInt("dilation_y"); } - if (!conv_params.z_kernel_is_1) { - args->AddInt("stride_z"); - args->AddInt("padding_z"); - args->AddInt("kernel_size_z"); - args->AddInt("dilation_z"); + if (!conv_params_.z_kernel_is_1) { + args_.AddInt("stride_z"); + args_.AddInt("padding_z"); + args_.AddInt("kernel_size_z"); + args_.AddInt("dilation_z"); } - args->AddInt("grid_size_s"); + args_.AddInt("grid_size_s"); const auto src_tensor_type = op_def.src_tensors[0].storage_type; const bool buffer_type = src_tensor_type == TensorStorageType::BUFFER || diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h index 720f1edb22e..569d78c4808 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h @@ -106,6 +106,9 @@ class Conv3D : public GPUOperation { int dst_slices, bool x_kernel_is_1, bool y_kernel_is_1, bool z_kernel_is_1) const; + std::string GenerateConv3D(const OperationDef& op_def, bool stride_correction, + const Conv3D::ConvParams& conv_params); + int3 stride_; int3 padding_; int3 kernel_size_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc index 90071554d0f..797e7c9ad90 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc @@ -81,24 +81,117 @@ std::string GetComputationPart(const int3& block_size, int element_size, return c; } -std::string GenerateConvBuffer1x1(const OperationDef& op_def, - const ConvBuffer1x1::ConvParams& conv_params, - Arguments* args) { - auto src_desc = absl::make_unique(op_def.src_tensors[0]); +ConvBuffer1x1::ConvParams GetBestParams(const CLDevice& device, + const OperationDef& definition, + const BHWC& shape, int src_depth, + int dst_depth) { + ConvBuffer1x1::ConvParams conv_params; + conv_params.element_size = 4; + conv_params.block_size = int3(1, 1, 1); + if (!device.IsMali()) { + return conv_params; + } + bool can_use_flt8 = (shape.w * shape.b) % 2 == 0 && + definition.precision != CalculationsPrecision::F32; + bool is_midgard = device.IsMali() && device.GetInfo().mali_info.IsMidgard(); + if (is_midgard) { + if (can_use_flt8) { + conv_params.element_size = 8; + } + if (definition.precision == CalculationsPrecision::F16 || !can_use_flt8) { + conv_params.block_size.x = 2; + } + return conv_params; + } + + int task_size = shape.w * shape.b * shape.h * dst_depth; + int block_size = + GetRecommendedBlockSizeForConv(device, definition.precision, task_size); + + if (!can_use_flt8 && block_size > 4) { + block_size = 4; + } + + if (can_use_flt8 && block_size >= 2) { + conv_params.element_size = 8; + block_size /= 2; + } + if (block_size == 4) { + conv_params.block_size.x = 2; + if (definition.precision == CalculationsPrecision::F32 && dst_depth < 32) { + conv_params.block_size.y = 2; + } else { + conv_params.block_size.z = 2; + } + } else if (block_size == 2) { + if (dst_depth >= 32) { + conv_params.block_size.z = 2; + } else { + conv_params.block_size.x = 2; + } + } + + return conv_params; +} + +ConvBuffer1x1::ConvParams GetBestParams(const CLDevice& device, + const OperationDef& definition, + int src_depth, int dst_depth) { + ConvBuffer1x1::ConvParams conv_params; + conv_params.element_size = 4; + conv_params.block_size = int3(1, 1, 1); + if (device.IsMali() && definition.precision == CalculationsPrecision::F16 && + device.GetInfo().compute_units_count <= 4) { + conv_params.block_size.x *= 2; + } + return conv_params; +} + +} // namespace + +ConvBuffer1x1::ConvBuffer1x1(const OperationDef& definition, + const ConvParams& conv_params) + : GPUOperation(definition), conv_params_(conv_params) {} + +ConvBuffer1x1::ConvBuffer1x1(ConvBuffer1x1&& operation) + : GPUOperation(std::move(operation)), + conv_params_(std::move(operation.conv_params_)) {} + +ConvBuffer1x1& ConvBuffer1x1::operator=(ConvBuffer1x1&& operation) { + if (this != &operation) { + std::swap(conv_params_, operation.conv_params_); + GPUOperation::operator=(std::move(operation)); + } + return *this; +} + +std::string ConvBuffer1x1::GenerateConvBuffer1x1( + const OperationDef& op_def, const ConvBuffer1x1::ConvParams& conv_params, + Arguments* args) { + auto src_desc = op_def.src_tensors[0]; if (op_def.IsBatchSupported()) { - src_desc->SetStateVar("BatchedWidth", "true"); + src_desc.SetStateVar("BatchedWidth", "true"); } - if (conv_params.element_size == 8) { - src_desc->SetStateVar("ElementsX2", "true"); - } else if (conv_params.element_size == 16) { - src_desc->SetStateVar("ElementsX4", "true"); + if (conv_params_.element_size == 8) { + src_desc.SetStateVar("ElementsX2", "true"); + } else if (conv_params_.element_size == 16) { + src_desc.SetStateVar("ElementsX4", "true"); } - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - auto dst_desc = absl::make_unique(op_def.dst_tensors[0]); + AddSrcTensor("src_tensor", src_desc); + if (op_def.src_tensors.size() == 2) { + // dynamic weights + BufferDescriptor desc; + desc.element_type = op_def.src_tensors[1].data_type; + desc.element_size = 16; + desc.memory_type = MemoryType::GLOBAL; + AddSrcBuffer("weights", desc); + } + + auto dst_desc = op_def.dst_tensors[0]; if (op_def.IsBatchSupported()) { - dst_desc->SetStateVar("BatchedWidth", "true"); + dst_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); + AddDstTensor("dst_tensor", dst_desc); std::string c = GetCommonDefines(op_def.precision); switch (op_def.precision) { @@ -207,90 +300,6 @@ std::string GenerateConvBuffer1x1(const OperationDef& op_def, return c; } -ConvBuffer1x1::ConvParams GetBestParams(const CLDevice& device, - const OperationDef& definition, - const BHWC& shape, int src_depth, - int dst_depth) { - ConvBuffer1x1::ConvParams conv_params; - conv_params.element_size = 4; - conv_params.block_size = int3(1, 1, 1); - if (!device.IsMali()) { - return conv_params; - } - bool can_use_flt8 = (shape.w * shape.b) % 2 == 0 && - definition.precision != CalculationsPrecision::F32; - bool is_midgard = device.IsMali() && device.GetInfo().mali_info.IsMidgard(); - if (is_midgard) { - if (can_use_flt8) { - conv_params.element_size = 8; - } - if (definition.precision == CalculationsPrecision::F16 || !can_use_flt8) { - conv_params.block_size.x = 2; - } - return conv_params; - } - - int task_size = shape.w * shape.b * shape.h * dst_depth; - int block_size = - GetRecommendedBlockSizeForConv(device, definition.precision, task_size); - - if (!can_use_flt8 && block_size > 4) { - block_size = 4; - } - - if (can_use_flt8 && block_size >= 2) { - conv_params.element_size = 8; - block_size /= 2; - } - if (block_size == 4) { - conv_params.block_size.x = 2; - if (definition.precision == CalculationsPrecision::F32 && dst_depth < 32) { - conv_params.block_size.y = 2; - } else { - conv_params.block_size.z = 2; - } - } else if (block_size == 2) { - if (dst_depth >= 32) { - conv_params.block_size.z = 2; - } else { - conv_params.block_size.x = 2; - } - } - - return conv_params; -} - -ConvBuffer1x1::ConvParams GetBestParams(const CLDevice& device, - const OperationDef& definition, - int src_depth, int dst_depth) { - ConvBuffer1x1::ConvParams conv_params; - conv_params.element_size = 4; - conv_params.block_size = int3(1, 1, 1); - if (device.IsMali() && definition.precision == CalculationsPrecision::F16 && - device.GetInfo().compute_units_count <= 4) { - conv_params.block_size.x *= 2; - } - return conv_params; -} - -} // namespace - -ConvBuffer1x1::ConvBuffer1x1(const OperationDef& definition, - const ConvParams& conv_params) - : GPUOperation(definition), conv_params_(conv_params) {} - -ConvBuffer1x1::ConvBuffer1x1(ConvBuffer1x1&& operation) - : GPUOperation(std::move(operation)), - conv_params_(std::move(operation.conv_params_)) {} - -ConvBuffer1x1& ConvBuffer1x1::operator=(ConvBuffer1x1&& operation) { - if (this != &operation) { - std::swap(conv_params_, operation.conv_params_); - GPUOperation::operator=(std::move(operation)); - } - return *this; -} - absl::Status ConvBuffer1x1::Compile(const CreationContext& creation_context) { std::string code = GenerateConvBuffer1x1(definition_, conv_params_, &args_); work_group_size_ = conv_params_.work_group_size; @@ -306,14 +315,6 @@ absl::Status ConvBuffer1x1::Compile(const CreationContext& creation_context) { return absl::OkStatus(); } -absl::Status ConvBuffer1x1::BindArguments() { - if (definition_.src_tensors.size() == 2) { - RETURN_IF_ERROR(args_.SetObjectRef("weights", src_[1])); - } - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - return args_.SetObjectRef("dst_tensor", dst_[0]); -} - int3 ConvBuffer1x1::GetGridSize() const { const int dst_width_elements = DivideRoundUp( dst_[0]->Width() * dst_[0]->Batch(), (conv_params_.element_size / 4)); @@ -435,12 +436,6 @@ absl::Status CreateConvBuffer1x1DynamicWeights( dst_depth); } *result = ConvBuffer1x1(definition, conv_params); - BufferDescriptor desc; - desc.element_type = definition.src_tensors[1].data_type; - desc.element_size = 16; - desc.memory_type = MemoryType::GLOBAL; - result->args_.AddObjectRef("weights", AccessType::READ, - absl::make_unique(desc)); return result->UploadBiases(attr.bias, creation_context.context); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h index 9f549d33e71..e1527a7486d 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h @@ -49,7 +49,6 @@ class ConvBuffer1x1 : public GPUOperation { absl::Status Tune(const TuningParameters& params) override; absl::Status Compile(const CreationContext& creation_context) override; - absl::Status BindArguments() override; int3 GetGridSize() const override; ConvWeightsDescription GetConvWeightsDescription() const { @@ -107,6 +106,10 @@ class ConvBuffer1x1 : public GPUOperation { absl::Status UploadBiases(const tflite::gpu::Tensor& biases, CLContext* context); + std::string GenerateConvBuffer1x1( + const OperationDef& op_def, const ConvBuffer1x1::ConvParams& conv_params, + Arguments* args); + ConvParams conv_params_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc index 83c4300c219..1c4d033a006 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc @@ -26,30 +26,71 @@ namespace tflite { namespace gpu { namespace cl { namespace { +// Adreno can provide up to ~3-4KB of constant memory, but in some cases even +// 3KB can have very bad performance. +int GetAdrenoOptimalMaxConstantSize(int gpu_version) { + if (gpu_version < 600) { + return 256 * 10; // 2.5KB + } else { + return 256 * 14; // 3.5KB + } +} -std::string GenerateConvolutionConstantCode(const OperationDef& op_def, - const int2& kernel_size, - int src_channels, int dst_channels, - bool stride_correction, - const CLDevice& device, - Arguments* args) { - auto src_desc = absl::make_unique(op_def.src_tensors[0]); - src_desc->SetTextureAddressMode(GetFastestZeroMode(device)); - if (op_def.IsBatchSupported()) { - src_desc->SetStateVar("BatchedWidth", "true"); +int GetOptimalMaxConstantSize(const DeviceInfo& info) { + if (info.vendor != Vendor::QUALCOMM) { + // In general we do not expect that this kernel will be used with non Adreno + // so as it tuned for __constant memory that have big profit on Adreno + return 1024; // 1KB + } else { + return GetAdrenoOptimalMaxConstantSize(info.adreno_info.gpu_version); } - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - auto dst_desc = absl::make_unique(op_def.dst_tensors[0]); - if (op_def.IsBatchSupported()) { - dst_desc->SetStateVar("BatchedWidth", "true"); +} +} // namespace + +ConvConstants::ConvConstants(ConvConstants&& kernel) + : GPUOperation(std::move(kernel)), + kernel_size_(kernel.kernel_size_), + stride_(kernel.stride_), + padding_(kernel.padding_), + dilation_(kernel.dilation_), + src_channels_(kernel.src_channels_), + dst_channels_(kernel.dst_channels_) {} + +ConvConstants& ConvConstants::operator=(ConvConstants&& kernel) { + if (this != &kernel) { + std::swap(kernel_size_, kernel.kernel_size_); + std::swap(stride_, kernel.stride_); + std::swap(padding_, kernel.padding_); + std::swap(dilation_, kernel.dilation_); + std::swap(src_channels_, kernel.src_channels_); + std::swap(dst_channels_, kernel.dst_channels_); + GPUOperation::operator=(std::move(kernel)); } - args->AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); - args->AddInt("stride_x"); - args->AddInt("stride_y"); - args->AddInt("padding_x"); - args->AddInt("padding_y"); - args->AddInt("dilation_x"); - args->AddInt("dilation_y"); + return *this; +} + +std::string ConvConstants::GenerateConvolutionConstantCode( + const OperationDef& op_def, const int2& kernel_size, int src_channels, + int dst_channels, bool stride_correction, const CLDevice& device) { + auto src_desc = op_def.src_tensors[0]; + src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); + if (op_def.IsBatchSupported()) { + src_desc.SetStateVar("BatchedWidth", "true"); + } + AddSrcTensor("src_tensor", src_desc); + + auto dst_desc = op_def.dst_tensors[0]; + if (op_def.IsBatchSupported()) { + dst_desc.SetStateVar("BatchedWidth", "true"); + } + AddDstTensor("dst_tensor", dst_desc); + + args_.AddInt("stride_x"); + args_.AddInt("stride_y"); + args_.AddInt("padding_x"); + args_.AddInt("padding_y"); + args_.AddInt("dilation_x"); + args_.AddInt("dilation_y"); std::string c = GetCommonDefines(op_def.precision); @@ -173,55 +214,12 @@ std::string GenerateConvolutionConstantCode(const OperationDef& op_def, return c; } -// Adreno can provide up to ~3-4KB of constant memory, but in some cases even -// 3KB can have very bad performance. -int GetAdrenoOptimalMaxConstantSize(int gpu_version) { - if (gpu_version < 600) { - return 256 * 10; // 2.5KB - } else { - return 256 * 14; // 3.5KB - } -} - -int GetOptimalMaxConstantSize(const DeviceInfo& info) { - if (info.vendor != Vendor::QUALCOMM) { - // In general we do not expect that this kernel will be used with non Adreno - // so as it tuned for __constant memory that have big profit on Adreno - return 1024; // 1KB - } else { - return GetAdrenoOptimalMaxConstantSize(info.adreno_info.gpu_version); - } -} -} // namespace - -ConvConstants::ConvConstants(ConvConstants&& kernel) - : GPUOperation(std::move(kernel)), - kernel_size_(kernel.kernel_size_), - stride_(kernel.stride_), - padding_(kernel.padding_), - dilation_(kernel.dilation_), - src_channels_(kernel.src_channels_), - dst_channels_(kernel.dst_channels_) {} - -ConvConstants& ConvConstants::operator=(ConvConstants&& kernel) { - if (this != &kernel) { - std::swap(kernel_size_, kernel.kernel_size_); - std::swap(stride_, kernel.stride_); - std::swap(padding_, kernel.padding_); - std::swap(dilation_, kernel.dilation_); - std::swap(src_channels_, kernel.src_channels_); - std::swap(dst_channels_, kernel.dst_channels_); - GPUOperation::operator=(std::move(kernel)); - } - return *this; -} - absl::Status ConvConstants::Compile(const CreationContext& creation_context) { const bool stride_correction = definition_.IsBatchSupported() && stride_.x != 1; std::string code = GenerateConvolutionConstantCode( definition_, kernel_size_, src_channels_, dst_channels_, - stride_correction, *creation_context.device, &args_); + stride_correction, *creation_context.device); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -244,8 +242,6 @@ absl::Status ConvConstants::Compile(const CreationContext& creation_context) { } absl::Status ConvConstants::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); RETURN_IF_ERROR(args_.SetInt("stride_x", stride_.x)); RETURN_IF_ERROR(args_.SetInt("stride_y", stride_.y)); RETURN_IF_ERROR(args_.SetInt("padding_x", padding_.x * src_[0]->Batch())); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h index d434af0a337..0864243c6f2 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h @@ -68,6 +68,10 @@ class ConvConstants : public GPUOperation { void RearrangeWeightsData(const tflite::gpu::Tensor& weights, absl::Span dst); + std::string GenerateConvolutionConstantCode( + const OperationDef& op_def, const int2& kernel_size, int src_channels, + int dst_channels, bool stride_correction, const CLDevice& device); + int2 kernel_size_; int2 stride_; int2 padding_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc index a3ad9a4eb39..34b15850ebe 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc @@ -183,7 +183,7 @@ absl::Status ConvPowerVR::Compile(const CreationContext& creation_context) { const bool stride_correction = definition_.IsBatchSupported() && stride_padding_.x != 1; std::string code = GenerateConv(*creation_context.device, definition_, - stride_correction, conv_params_, &args_); + stride_correction, conv_params_); work_group_size_ = conv_params_.work_group_size; std::string element_wise_code; RETURN_IF_ERROR( @@ -205,9 +205,6 @@ absl::Status ConvPowerVR::Compile(const CreationContext& creation_context) { } absl::Status ConvPowerVR::BindArguments() { - if (definition_.src_tensors.size() == 2) { - RETURN_IF_ERROR(args_.SetObjectRef("weights", src_[1])); - } if (!conv_params_.x_kernel_is_1 || !conv_params_.y_kernel_is_1) { RETURN_IF_ERROR(args_.SetInt("stride_x", stride_padding_.x)); RETURN_IF_ERROR(args_.SetInt("stride_y", stride_padding_.y)); @@ -220,8 +217,6 @@ absl::Status ConvPowerVR::BindArguments() { args_.SetInt("dilation_x", kernel_dilation_.z * src_[0]->Batch())); RETURN_IF_ERROR(args_.SetInt("dilation_y", kernel_dilation_.w)); } - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); if (conv_params_.linear_hw) { const int grid_x = DivideRoundUp(dst_[0]->Width() * dst_[0]->Batch(), conv_params_.block_size.x); @@ -279,34 +274,47 @@ absl::Status ConvPowerVR::Tune(const TuningParameters& params) { return absl::OkStatus(); } -std::string GenerateConv(const CLDevice& device, const OperationDef& op_def, - bool stride_correction, - const ConvPowerVR::ConvParams& conv_params, - Arguments* args) { - auto src_desc = absl::make_unique(op_def.src_tensors[0]); - src_desc->SetTextureAddressMode(TextureAddressMode::ZERO); +std::string ConvPowerVR::GenerateConv( + const CLDevice& device, const OperationDef& op_def, bool stride_correction, + const ConvPowerVR::ConvParams& conv_params) { + auto src_desc = op_def.src_tensors[0]; + src_desc.SetTextureAddressMode(GetFastestZeroMode(device)); if (op_def.IsBatchSupported()) { - src_desc->SetStateVar("BatchedWidth", "true"); + src_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - auto dst_desc = absl::make_unique(op_def.dst_tensors[0]); + AddSrcTensor("src_tensor", src_desc); + if (op_def.src_tensors.size() == 2) { + // dynamic weights + BufferDescriptor desc; + desc.element_type = op_def.src_tensors[1].data_type; + desc.element_size = 4; + desc.memory_type = conv_params.weights_upload_type == + ConvPowerVR::WeightsUploadType::CONSTANT_MEM + ? MemoryType::CONSTANT + : MemoryType::GLOBAL; + + AddSrcBuffer("weights", desc); + } + + auto dst_desc = op_def.dst_tensors[0]; if (op_def.IsBatchSupported()) { - dst_desc->SetStateVar("BatchedWidth", "true"); + dst_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); - const bool is1x1 = conv_params.x_kernel_is_1 && conv_params.y_kernel_is_1; + AddDstTensor("dst_tensor", dst_desc); + + const bool is1x1 = conv_params_.x_kernel_is_1 && conv_params_.y_kernel_is_1; if (!is1x1) { - args->AddInt("stride_x"); - args->AddInt("stride_y"); - args->AddInt("padding_x"); - args->AddInt("padding_y"); - args->AddInt("kernel_size_x"); - args->AddInt("kernel_size_y"); - args->AddInt("dilation_x"); - args->AddInt("dilation_y"); + args_.AddInt("stride_x"); + args_.AddInt("stride_y"); + args_.AddInt("padding_x"); + args_.AddInt("padding_y"); + args_.AddInt("kernel_size_x"); + args_.AddInt("kernel_size_y"); + args_.AddInt("dilation_x"); + args_.AddInt("dilation_y"); } - if (conv_params.linear_hw) { - args->AddInt("task_size_x"); + if (conv_params_.linear_hw) { + args_.AddInt("task_size_x"); } const auto src_tensor_type = op_def.src_tensors[0].storage_type; @@ -1013,16 +1021,6 @@ absl::Status CreateConvPowerVRDynamicWeights( ConvPowerVR* result, const BHWC* dst_shape) { *result = ConvPowerVR(definition, attr, weights_shape, *creation_context.device, dst_shape); - BufferDescriptor desc; - desc.element_type = definition.src_tensors[1].data_type; - desc.element_size = 4; - desc.memory_type = result->conv_params_.weights_upload_type == - ConvPowerVR::WeightsUploadType::CONSTANT_MEM - ? MemoryType::CONSTANT - : MemoryType::GLOBAL; - - result->args_.AddObjectRef("weights", AccessType::READ, - absl::make_unique(desc)); return result->UploadBias(attr.bias, creation_context.context); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h index 8ef8bc6fbde..9dceeffc7a7 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h @@ -206,6 +206,10 @@ class ConvPowerVR : public GPUOperation { bool different_weights_for_height, const BHWC* dst_shape = nullptr) const; + std::string GenerateConv(const CLDevice& device, const OperationDef& op_def, + bool stride_correction, + const ConvPowerVR::ConvParams& conv_params); + int4 stride_padding_; int4 kernel_dilation_; ConvParams conv_params_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc index a31674de2fd..59cfa6c012e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc @@ -30,33 +30,96 @@ namespace tflite { namespace gpu { namespace cl { namespace { +bool UseFP16SIMD(const CLDevice& device, CalculationsPrecision precision, + bool kernel1x1) { + if (!device.IsAdreno()) { + return false; + } + switch (precision) { + case CalculationsPrecision::F32: + case CalculationsPrecision::F32_F16: + return false; + case CalculationsPrecision::F16: + return device.IsAdreno3xx() && kernel1x1; + } +} +} // namespace -std::string GenerateConvCode(const OperationDef& op_def, const int3& block_size, - bool is1x1, bool adreno4xx_optimization, - bool stride_correction, - bool different_weights_for_height, - const CLDevice& device, Arguments* args) { - auto src_desc = absl::make_unique(op_def.src_tensors[0]); - src_desc->SetTextureAddressMode(GetFastestZeroMode(device)); - if (op_def.IsBatchSupported()) { - src_desc->SetStateVar("BatchedWidth", "true"); +ConvTexture::ConvTexture(const OperationDef& definition, + const Convolution2DAttributes& attr) + : GPUOperation(definition), + kernel_size_(attr.weights.shape.w, attr.weights.shape.h), + stride_(attr.strides.w, attr.strides.h), + padding_(-attr.padding.prepended.w, -attr.padding.prepended.h), + dilation_(attr.dilations.w, attr.dilations.h), + different_weights_for_height_(false), + block_size_(2, 2, 2) { + work_group_size_ = int3(4, 4, 2); +} + +ConvTexture::ConvTexture(const OperationDef& definition) + : GPUOperation(definition), + kernel_size_(1, 1), + stride_(1, 1), + padding_(0, 0), + dilation_(1, 1), + different_weights_for_height_(false), + block_size_(4, 1, 2) { + work_group_size_ = int3(16, 1, 2); +} + +ConvTexture::ConvTexture(ConvTexture&& operation) + : GPUOperation(std::move(operation)), + kernel_size_(operation.kernel_size_), + stride_(operation.stride_), + padding_(operation.padding_), + dilation_(operation.dilation_), + different_weights_for_height_(operation.different_weights_for_height_), + block_size_(operation.block_size_) {} + +ConvTexture& ConvTexture::operator=(ConvTexture&& operation) { + if (this != &operation) { + std::swap(kernel_size_, operation.kernel_size_); + std::swap(stride_, operation.stride_); + std::swap(padding_, operation.padding_); + std::swap(dilation_, operation.dilation_); + std::swap(different_weights_for_height_, + operation.different_weights_for_height_); + std::swap(block_size_, operation.block_size_); + GPUOperation::operator=(std::move(operation)); } - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - auto dst_desc = absl::make_unique(op_def.dst_tensors[0]); + return *this; +} + +std::string ConvTexture::GenerateConvCode(const OperationDef& op_def, + const int3& block_size, bool is1x1, + bool adreno4xx_optimization, + bool stride_correction, + bool different_weights_for_height, + const CLDevice& device) { + auto src_desc = op_def.src_tensors[0]; + src_desc.SetTextureAddressMode(GetFastestZeroMode(device)); if (op_def.IsBatchSupported()) { - dst_desc->SetStateVar("BatchedWidth", "true"); + src_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); + AddSrcTensor("src_tensor", src_desc); + + auto dst_desc = op_def.dst_tensors[0]; + if (op_def.IsBatchSupported()) { + dst_desc.SetStateVar("BatchedWidth", "true"); + } + AddDstTensor("dst_tensor", dst_desc); + if (!is1x1) { - args->AddInt("kernel_size_x"); - args->AddInt("kernel_size_y"); - args->AddInt("dilation_x"); - args->AddInt("dilation_y"); + args_.AddInt("kernel_size_x"); + args_.AddInt("kernel_size_y"); + args_.AddInt("dilation_x"); + args_.AddInt("dilation_y"); } - args->AddInt("stride_x"); - args->AddInt("stride_y"); - args->AddInt("padding_x"); - args->AddInt("padding_y"); + args_.AddInt("stride_x"); + args_.AddInt("stride_y"); + args_.AddInt("padding_x"); + args_.AddInt("padding_y"); const auto src_tensor_type = op_def.src_tensors[0].storage_type; const bool is_buffer = src_tensor_type == TensorStorageType::IMAGE_BUFFER || @@ -317,67 +380,6 @@ std::string GenerateConvCode(const OperationDef& op_def, const int3& block_size, return c; } -bool UseFP16SIMD(const CLDevice& device, CalculationsPrecision precision, - bool kernel1x1) { - if (!device.IsAdreno()) { - return false; - } - switch (precision) { - case CalculationsPrecision::F32: - case CalculationsPrecision::F32_F16: - return false; - case CalculationsPrecision::F16: - return device.IsAdreno3xx() && kernel1x1; - } -} -} // namespace - -ConvTexture::ConvTexture(const OperationDef& definition, - const Convolution2DAttributes& attr) - : GPUOperation(definition), - kernel_size_(attr.weights.shape.w, attr.weights.shape.h), - stride_(attr.strides.w, attr.strides.h), - padding_(-attr.padding.prepended.w, -attr.padding.prepended.h), - dilation_(attr.dilations.w, attr.dilations.h), - different_weights_for_height_(false), - block_size_(2, 2, 2) { - work_group_size_ = int3(4, 4, 2); -} - -ConvTexture::ConvTexture(const OperationDef& definition) - : GPUOperation(definition), - kernel_size_(1, 1), - stride_(1, 1), - padding_(0, 0), - dilation_(1, 1), - different_weights_for_height_(false), - block_size_(4, 1, 2) { - work_group_size_ = int3(16, 1, 2); -} - -ConvTexture::ConvTexture(ConvTexture&& operation) - : GPUOperation(std::move(operation)), - kernel_size_(operation.kernel_size_), - stride_(operation.stride_), - padding_(operation.padding_), - dilation_(operation.dilation_), - different_weights_for_height_(operation.different_weights_for_height_), - block_size_(operation.block_size_) {} - -ConvTexture& ConvTexture::operator=(ConvTexture&& operation) { - if (this != &operation) { - std::swap(kernel_size_, operation.kernel_size_); - std::swap(stride_, operation.stride_); - std::swap(padding_, operation.padding_); - std::swap(dilation_, operation.dilation_); - std::swap(different_weights_for_height_, - operation.different_weights_for_height_); - std::swap(block_size_, operation.block_size_); - GPUOperation::operator=(std::move(operation)); - } - return *this; -} - absl::Status ConvTexture::Compile(const CreationContext& creation_context) { auto storage_type = definition_.GetPrimaryStorageType(); bool is1x1 = kernel_size_.x == 1 && kernel_size_.y == 1; @@ -391,7 +393,7 @@ absl::Status ConvTexture::Compile(const CreationContext& creation_context) { std::string code = GenerateConvCode(definition_, block_size_, is1x1, adreno4xx_optimization, stride_correction, different_weights_for_height_, - *creation_context.device, &args_); + *creation_context.device); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -408,8 +410,6 @@ absl::Status ConvTexture::Compile(const CreationContext& creation_context) { } absl::Status ConvTexture::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); if (!(kernel_size_.x == 1 && kernel_size_.y == 1)) { RETURN_IF_ERROR(args_.SetInt("kernel_size_x", kernel_size_.x)); RETURN_IF_ERROR(args_.SetInt("kernel_size_y", kernel_size_.y)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h index 80a328e4eef..6ed9f8a6ee6 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h @@ -89,6 +89,13 @@ class ConvTexture : public GPUOperation { absl::Span dst_0, absl::Span dst_1, absl::Span dst_2, absl::Span dst_3); + std::string GenerateConvCode(const OperationDef& op_def, + const int3& block_size, bool is1x1, + bool adreno4xx_optimization, + bool stride_correction, + bool different_weights_for_height, + const CLDevice& device); + int2 kernel_size_; int2 stride_; int2 padding_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc index ce973115266..8314871ab00 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc @@ -23,21 +23,30 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { -std::string GetConverterToConvWeightsCode( - const OperationDef& op_def, const ConvWeightsDescription& conv_weights_desc, - Arguments* args) { - args->AddObjectRef( - "src_tensor", AccessType::READ, - absl::make_unique(op_def.src_tensors[0])); - args->AddObjectRef( - "dst_tensor", AccessType::WRITE, - absl::make_unique(op_def.dst_tensors[0])); - args->AddFloat("mask_x"); - args->AddFloat("mask_y"); - args->AddFloat("mask_z"); - args->AddFloat("mask_w"); +ConverterToConvWeights::ConverterToConvWeights( + ConverterToConvWeights&& operation) + : GPUOperation(std::move(operation)), + conv_weights_desc_(operation.conv_weights_desc_) {} + +ConverterToConvWeights& ConverterToConvWeights::operator=( + ConverterToConvWeights&& operation) { + if (this != &operation) { + conv_weights_desc_ = operation.conv_weights_desc_; + GPUOperation::operator=(std::move(operation)); + } + return *this; +} + +std::string ConverterToConvWeights::GetConverterToConvWeightsCode( + const OperationDef& op_def, + const ConvWeightsDescription& conv_weights_desc) { + AddSrcTensor("src_tensor", op_def.src_tensors[0]); + AddDstTensor("dst_tensor", op_def.dst_tensors[0]); + args_.AddFloat("mask_x"); + args_.AddFloat("mask_y"); + args_.AddFloat("mask_z"); + args_.AddFloat("mask_w"); std::string c = GetCommonDefines(op_def.precision); c += "__kernel void main_function(\n"; @@ -93,26 +102,11 @@ std::string GetConverterToConvWeightsCode( c += "}\n"; return c; } -} // namespace - -ConverterToConvWeights::ConverterToConvWeights( - ConverterToConvWeights&& operation) - : GPUOperation(std::move(operation)), - conv_weights_desc_(operation.conv_weights_desc_) {} - -ConverterToConvWeights& ConverterToConvWeights::operator=( - ConverterToConvWeights&& operation) { - if (this != &operation) { - conv_weights_desc_ = operation.conv_weights_desc_; - GPUOperation::operator=(std::move(operation)); - } - return *this; -} absl::Status ConverterToConvWeights::Compile( const CreationContext& creation_context) { std::string code = - GetConverterToConvWeightsCode(definition_, conv_weights_desc_, &args_); + GetConverterToConvWeightsCode(definition_, conv_weights_desc_); RETURN_IF_ERROR( args_.TransformToCLCode(creation_context.device->GetInfo(), {}, &code)); return creation_context.cache->GetOrCreateCLKernel( @@ -121,8 +115,6 @@ absl::Status ConverterToConvWeights::Compile( } absl::Status ConverterToConvWeights::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); float4 mask = GetMaskForLastPlane(src_[0]->Channels()); RETURN_IF_ERROR(args_.SetFloat("mask_x", mask.x)); RETURN_IF_ERROR(args_.SetFloat("mask_y", mask.y)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h index d8d84b8f5b5..015e6b3a0dd 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h @@ -43,6 +43,10 @@ class ConverterToConvWeights : public GPUOperation { ConverterToConvWeights& operator=(const ConverterToConvWeights&) = delete; private: + std::string GetConverterToConvWeightsCode( + const OperationDef& op_def, + const ConvWeightsDescription& conv_weights_desc); + ConvWeightsDescription conv_weights_desc_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc index dc146c48895..7e7847282a6 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc @@ -28,25 +28,70 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { -std::string GenerateConvolutionTransposedCode(const OperationDef& op_def, - const CLDevice& device, - bool weights_are_buffer, - const int3& block_size, - Arguments* args) { - auto src_desc = absl::make_unique(op_def.src_tensors[0]); - src_desc->SetTextureAddressMode(GetFastestZeroMode(device)); - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - args->AddObjectRef( - "dst_tensor", AccessType::WRITE, - absl::make_unique(op_def.dst_tensors[0])); - args->AddInt("stride_x"); - args->AddInt("stride_y"); - args->AddInt("padding_x"); - args->AddInt("padding_y"); - args->AddInt("kernel_size_x"); - args->AddInt("kernel_size_y"); +ConvolutionTransposed::ConvolutionTransposed( + const OperationDef& definition, const ConvolutionTransposedAttributes& attr, + const CLDevice& device) + : GPUOperation(definition), + weights_are_buffer_(device.IsMali()), + kernel_size_(attr.weights.shape.w, attr.weights.shape.h), + stride_(attr.stride.w, attr.stride.h), + padding_(attr.padding.prepended.w, attr.padding.prepended.h), + block_size_(2, 2, 2) { + const bool is_f16 = definition.precision == CalculationsPrecision::F16; + if (device.IsMali()) { + MaliInfo mali_info = device.GetInfo().mali_info; + if (mali_info.IsMidgard()) { + block_size_ = is_f16 ? int3(2, 1, 2) : int3(2, 1, 1); + } else { + block_size_ = is_f16 ? int3(2, 2, 2) : int3(2, 2, 1); + } + } + const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); + if (dst_depth == 1 || dst_depth == 3) { + if (!device.IsMali()) { + block_size_.y *= block_size_.z; + } + block_size_.z = 1; + } +} + +ConvolutionTransposed::ConvolutionTransposed(ConvolutionTransposed&& operation) + : GPUOperation(std::move(operation)), + weights_are_buffer_(operation.weights_are_buffer_), + kernel_size_(operation.kernel_size_), + stride_(operation.stride_), + padding_(operation.padding_), + block_size_(operation.block_size_) {} + +ConvolutionTransposed& ConvolutionTransposed::operator=( + ConvolutionTransposed&& operation) { + if (this != &operation) { + std::swap(weights_are_buffer_, operation.weights_are_buffer_); + std::swap(kernel_size_, operation.kernel_size_); + std::swap(stride_, operation.stride_); + std::swap(padding_, operation.padding_); + std::swap(block_size_, operation.block_size_); + GPUOperation::operator=(std::move(operation)); + } + return *this; +} + +std::string ConvolutionTransposed::GenerateConvolutionTransposedCode( + const OperationDef& op_def, const CLDevice& device, bool weights_are_buffer, + const int3& block_size) { + auto src_desc = op_def.src_tensors[0]; + src_desc.SetTextureAddressMode(GetFastestZeroMode(device)); + AddSrcTensor("src_tensor", src_desc); + + AddDstTensor("dst_tensor", op_def.dst_tensors[0]); + + args_.AddInt("stride_x"); + args_.AddInt("stride_y"); + args_.AddInt("padding_x"); + args_.AddInt("padding_y"); + args_.AddInt("kernel_size_x"); + args_.AddInt("kernel_size_y"); const auto src_tensor_type = op_def.src_tensors[0].storage_type; bool image_buffer = src_tensor_type == TensorStorageType::IMAGE_BUFFER; @@ -285,61 +330,11 @@ std::string GenerateConvolutionTransposedCode(const OperationDef& op_def, c += "}\n"; return c; } -} // namespace - -ConvolutionTransposed::ConvolutionTransposed( - const OperationDef& definition, const ConvolutionTransposedAttributes& attr, - const CLDevice& device) - : GPUOperation(definition), - weights_are_buffer_(device.IsMali()), - kernel_size_(attr.weights.shape.w, attr.weights.shape.h), - stride_(attr.stride.w, attr.stride.h), - padding_(attr.padding.prepended.w, attr.padding.prepended.h), - block_size_(2, 2, 2) { - const bool is_f16 = definition.precision == CalculationsPrecision::F16; - if (device.IsMali()) { - MaliInfo mali_info = device.GetInfo().mali_info; - if (mali_info.IsMidgard()) { - block_size_ = is_f16 ? int3(2, 1, 2) : int3(2, 1, 1); - } else { - block_size_ = is_f16 ? int3(2, 2, 2) : int3(2, 2, 1); - } - } - const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); - if (dst_depth == 1 || dst_depth == 3) { - if (!device.IsMali()) { - block_size_.y *= block_size_.z; - } - block_size_.z = 1; - } -} - -ConvolutionTransposed::ConvolutionTransposed(ConvolutionTransposed&& operation) - : GPUOperation(std::move(operation)), - weights_are_buffer_(operation.weights_are_buffer_), - kernel_size_(operation.kernel_size_), - stride_(operation.stride_), - padding_(operation.padding_), - block_size_(operation.block_size_) {} - -ConvolutionTransposed& ConvolutionTransposed::operator=( - ConvolutionTransposed&& operation) { - if (this != &operation) { - std::swap(weights_are_buffer_, operation.weights_are_buffer_); - std::swap(kernel_size_, operation.kernel_size_); - std::swap(stride_, operation.stride_); - std::swap(padding_, operation.padding_); - std::swap(block_size_, operation.block_size_); - GPUOperation::operator=(std::move(operation)); - } - return *this; -} absl::Status ConvolutionTransposed::Compile( const CreationContext& creation_context) { std::string code = GenerateConvolutionTransposedCode( - definition_, *creation_context.device, weights_are_buffer_, block_size_, - &args_); + definition_, *creation_context.device, weights_are_buffer_, block_size_); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -355,8 +350,6 @@ absl::Status ConvolutionTransposed::Compile( } absl::Status ConvolutionTransposed::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); RETURN_IF_ERROR(args_.SetInt("stride_x", stride_.x)); RETURN_IF_ERROR(args_.SetInt("stride_y", stride_.y)); RETURN_IF_ERROR(args_.SetInt("padding_x", padding_.x)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h index fc53884bcc7..929444b3915 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h @@ -65,6 +65,11 @@ class ConvolutionTransposed : public GPUOperation { void RearrangeWeightsData(const tflite::gpu::Tensor& weights, absl::Span dst); + std::string GenerateConvolutionTransposedCode(const OperationDef& op_def, + const CLDevice& device, + bool weights_are_buffer, + const int3& block_size); + bool weights_are_buffer_; int2 kernel_size_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc index 409f7e3716b..cbd4d436dcd 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc @@ -27,29 +27,60 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { -std::string GenerateConvolutionTransposed3DCode(const OperationDef& op_def, - const CLDevice& device, - bool weights_are_buffer, - const int4& block_size, - Arguments* args) { - auto src_desc = absl::make_unique(op_def.src_tensors[0]); - src_desc->SetTextureAddressMode(GetFastestZeroMode(device)); - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - args->AddObjectRef( - "dst_tensor", AccessType::WRITE, - absl::make_unique(op_def.dst_tensors[0])); - args->AddInt("stride_x"); - args->AddInt("stride_y"); - args->AddInt("stride_z"); - args->AddInt("padding_x"); - args->AddInt("padding_y"); - args->AddInt("padding_z"); - args->AddInt("kernel_size_x"); - args->AddInt("kernel_size_y"); - args->AddInt("kernel_size_z"); - args->AddInt("grid_size_s"); +ConvolutionTransposed3D::ConvolutionTransposed3D( + const OperationDef& definition, + const ConvolutionTransposed3DAttributes& attr, const CLDevice& device) + : GPUOperation(definition), + weights_are_buffer_(device.IsMali()), + kernel_size_(attr.weights.shape.w, attr.weights.shape.h, + attr.weights.shape.d), + stride_(attr.stride.w, attr.stride.h, attr.stride.d), + padding_(attr.padding.prepended.w, attr.padding.prepended.h, + attr.padding.prepended.d), + block_size_(2, 2, 1, 2) {} + +ConvolutionTransposed3D::ConvolutionTransposed3D( + ConvolutionTransposed3D&& operation) + : GPUOperation(std::move(operation)), + weights_are_buffer_(operation.weights_are_buffer_), + kernel_size_(operation.kernel_size_), + stride_(operation.stride_), + padding_(operation.padding_), + block_size_(operation.block_size_) {} + +ConvolutionTransposed3D& ConvolutionTransposed3D::operator=( + ConvolutionTransposed3D&& operation) { + if (this != &operation) { + std::swap(weights_are_buffer_, operation.weights_are_buffer_); + std::swap(kernel_size_, operation.kernel_size_); + std::swap(stride_, operation.stride_); + std::swap(padding_, operation.padding_); + std::swap(block_size_, operation.block_size_); + GPUOperation::operator=(std::move(operation)); + } + return *this; +} + +std::string ConvolutionTransposed3D::GenerateConvolutionTransposed3DCode( + const OperationDef& op_def, const CLDevice& device, bool weights_are_buffer, + const int4& block_size) { + auto src_desc = op_def.src_tensors[0]; + src_desc.SetTextureAddressMode(GetFastestZeroMode(device)); + AddSrcTensor("src_tensor", src_desc); + + AddDstTensor("dst_tensor", op_def.dst_tensors[0]); + + args_.AddInt("stride_x"); + args_.AddInt("stride_y"); + args_.AddInt("stride_z"); + args_.AddInt("padding_x"); + args_.AddInt("padding_y"); + args_.AddInt("padding_z"); + args_.AddInt("kernel_size_x"); + args_.AddInt("kernel_size_y"); + args_.AddInt("kernel_size_z"); + args_.AddInt("grid_size_s"); const auto src_tensor_type = op_def.src_tensors[0].storage_type; bool image_buffer = src_tensor_type == TensorStorageType::IMAGE_BUFFER; @@ -324,47 +355,11 @@ std::string GenerateConvolutionTransposed3DCode(const OperationDef& op_def, c += "}\n"; return c; } -} // namespace - -ConvolutionTransposed3D::ConvolutionTransposed3D( - const OperationDef& definition, - const ConvolutionTransposed3DAttributes& attr, const CLDevice& device) - : GPUOperation(definition), - weights_are_buffer_(device.IsMali()), - kernel_size_(attr.weights.shape.w, attr.weights.shape.h, - attr.weights.shape.d), - stride_(attr.stride.w, attr.stride.h, attr.stride.d), - padding_(attr.padding.prepended.w, attr.padding.prepended.h, - attr.padding.prepended.d), - block_size_(2, 2, 1, 2) {} - -ConvolutionTransposed3D::ConvolutionTransposed3D( - ConvolutionTransposed3D&& operation) - : GPUOperation(std::move(operation)), - weights_are_buffer_(operation.weights_are_buffer_), - kernel_size_(operation.kernel_size_), - stride_(operation.stride_), - padding_(operation.padding_), - block_size_(operation.block_size_) {} - -ConvolutionTransposed3D& ConvolutionTransposed3D::operator=( - ConvolutionTransposed3D&& operation) { - if (this != &operation) { - std::swap(weights_are_buffer_, operation.weights_are_buffer_); - std::swap(kernel_size_, operation.kernel_size_); - std::swap(stride_, operation.stride_); - std::swap(padding_, operation.padding_); - std::swap(block_size_, operation.block_size_); - GPUOperation::operator=(std::move(operation)); - } - return *this; -} absl::Status ConvolutionTransposed3D::Compile( const CreationContext& creation_context) { std::string code = GenerateConvolutionTransposed3DCode( - definition_, *creation_context.device, weights_are_buffer_, block_size_, - &args_); + definition_, *creation_context.device, weights_are_buffer_, block_size_); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -388,8 +383,6 @@ absl::Status ConvolutionTransposed3D::Compile( } absl::Status ConvolutionTransposed3D::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); RETURN_IF_ERROR(args_.SetInt("stride_x", stride_.x)); RETURN_IF_ERROR(args_.SetInt("stride_y", stride_.y)); RETURN_IF_ERROR(args_.SetInt("stride_z", stride_.z)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h index 09f7e700967..9a9f0d55199 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h @@ -65,6 +65,11 @@ class ConvolutionTransposed3D : public GPUOperation { void RearrangeWeightsData(const tflite::gpu::Tensor& weights, absl::Span dst); + std::string GenerateConvolutionTransposed3DCode(const OperationDef& op_def, + const CLDevice& device, + bool weights_are_buffer, + const int4& block_size); + bool weights_are_buffer_; int3 kernel_size_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc index 9446f0f7e3d..d60282d8662 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc @@ -27,26 +27,61 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { +ConvolutionTransposed3x3::ConvolutionTransposed3x3( + const OperationDef& definition, const CLDevice& device, int2 padding) + : GPUOperation(definition), + padding_(padding), + work_group_launch_order_(2, 0, 1) { + work_group_size_ = int3(8, 4, 1); + if (device.IsPowerVR()) { + weights_upload_type_ = WeightsUploadType::LOCAL_MEM_ASYNC; + } else if (device.IsNvidia() || device.IsIntel()) { + weights_upload_type_ = WeightsUploadType::LOCAL_MEM_BY_THREADS; + } else if (device.IsAMD()) { + weights_upload_type_ = WeightsUploadType::CONSTANT_MEM; + } else { + weights_upload_type_ = WeightsUploadType::GLOBAL_MEM; + } +} -std::string GenerateConvolutionTransposedCode( +ConvolutionTransposed3x3::ConvolutionTransposed3x3( + ConvolutionTransposed3x3&& operation) + : GPUOperation(std::move(operation)), + padding_(operation.padding_), + work_group_launch_order_(operation.work_group_launch_order_), + weights_upload_type_(operation.weights_upload_type_) {} + +ConvolutionTransposed3x3& ConvolutionTransposed3x3::operator=( + ConvolutionTransposed3x3&& operation) { + if (this != &operation) { + std::swap(padding_, operation.padding_); + std::swap(work_group_launch_order_, operation.work_group_launch_order_); + std::swap(weights_upload_type_, operation.weights_upload_type_); + GPUOperation::operator=(std::move(operation)); + } + return *this; +} + +std::string ConvolutionTransposed3x3::GenerateConvolutionTransposedCode( const OperationDef& op_def, ConvolutionTransposed3x3::WeightsUploadType weights_upload_type, - int2 padding, int3 work_group_launch_order, Arguments* args) { - auto src_desc = absl::make_unique(op_def.src_tensors[0]); - src_desc->SetTextureAddressMode(TextureAddressMode::ZERO); + int2 padding, int3 work_group_launch_order) { + auto src_desc = op_def.src_tensors[0]; + src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); if (op_def.IsBatchSupported()) { - src_desc->SetStateVar("BatchedWidth", "true"); + src_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - auto dst_desc = absl::make_unique(op_def.dst_tensors[0]); + AddSrcTensor("src_tensor", src_desc); + + auto dst_desc = op_def.dst_tensors[0]; if (op_def.IsBatchSupported()) { - dst_desc->SetStateVar("BatchedWidth", "true"); + dst_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); - args->AddInt("filter_offset"); - args->AddInt("padding_x"); - args->AddInt("padding_y"); + AddDstTensor("dst_tensor", dst_desc); + + args_.AddInt("filter_offset"); + args_.AddInt("padding_x"); + args_.AddInt("padding_y"); const auto src_tensor_type = op_def.src_tensors[0].storage_type; const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER || @@ -264,48 +299,10 @@ std::string GenerateConvolutionTransposedCode( return c; } -} // namespace - -ConvolutionTransposed3x3::ConvolutionTransposed3x3( - const OperationDef& definition, const CLDevice& device, int2 padding) - : GPUOperation(definition), - padding_(padding), - work_group_launch_order_(2, 0, 1) { - work_group_size_ = int3(8, 4, 1); - if (device.IsPowerVR()) { - weights_upload_type_ = WeightsUploadType::LOCAL_MEM_ASYNC; - } else if (device.IsNvidia() || device.IsIntel()) { - weights_upload_type_ = WeightsUploadType::LOCAL_MEM_BY_THREADS; - } else if (device.IsAMD()) { - weights_upload_type_ = WeightsUploadType::CONSTANT_MEM; - } else { - weights_upload_type_ = WeightsUploadType::GLOBAL_MEM; - } -} - -ConvolutionTransposed3x3::ConvolutionTransposed3x3( - ConvolutionTransposed3x3&& operation) - : GPUOperation(std::move(operation)), - padding_(operation.padding_), - work_group_launch_order_(operation.work_group_launch_order_), - weights_upload_type_(operation.weights_upload_type_) {} - -ConvolutionTransposed3x3& ConvolutionTransposed3x3::operator=( - ConvolutionTransposed3x3&& operation) { - if (this != &operation) { - std::swap(padding_, operation.padding_); - std::swap(work_group_launch_order_, operation.work_group_launch_order_); - std::swap(weights_upload_type_, operation.weights_upload_type_); - GPUOperation::operator=(std::move(operation)); - } - return *this; -} - absl::Status ConvolutionTransposed3x3::Compile( const CreationContext& creation_context) { std::string code = GenerateConvolutionTransposedCode( - definition_, weights_upload_type_, padding_, work_group_launch_order_, - &args_); + definition_, weights_upload_type_, padding_, work_group_launch_order_); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -325,8 +322,6 @@ absl::Status ConvolutionTransposed3x3::Compile( } absl::Status ConvolutionTransposed3x3::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); RETURN_IF_ERROR(args_.SetInt("filter_offset", 4 * 9 * src_[0]->Slices())); const int padding_x = padding_.x >= 1 ? (padding_.x - 1) / 2 : (padding_.x - 2) / 2; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h index 0dc42a7563d..d68957ffb54 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h @@ -72,6 +72,11 @@ class ConvolutionTransposed3x3 : public GPUOperation { void RearrangeWeightsData(const tflite::gpu::Tensor& weights, absl::Span dst); + std::string GenerateConvolutionTransposedCode( + const OperationDef& op_def, + ConvolutionTransposed3x3::WeightsUploadType weights_upload_type, + int2 padding, int3 work_group_launch_order); + int2 padding_; int3 work_group_launch_order_; WeightsUploadType weights_upload_type_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc index 56a21cb7239..ac25b75db6d 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc @@ -25,18 +25,36 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { -std::string GenerateConvolutionTransposedCode(const OperationDef& op_def, - int src_depth, int dst_depth, - const CLDevice& device, - Arguments* args) { - auto src_desc = absl::make_unique(op_def.src_tensors[0]); - src_desc->SetTextureAddressMode(GetFastestZeroMode(device)); - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - args->AddObjectRef( - "dst_tensor", AccessType::WRITE, - absl::make_unique(op_def.dst_tensors[0])); +ConvolutionTransposed3x3Thin::ConvolutionTransposed3x3Thin( + const OperationDef& definition, const ConvolutionTransposedAttributes& attr) + : GPUOperation(definition), + src_channels_(attr.weights.shape.i), + dst_channels_(attr.weights.shape.o) {} + +ConvolutionTransposed3x3Thin::ConvolutionTransposed3x3Thin( + ConvolutionTransposed3x3Thin&& operation) + : GPUOperation(std::move(operation)), + src_channels_(operation.src_channels_), + dst_channels_(operation.dst_channels_) {} + +ConvolutionTransposed3x3Thin& ConvolutionTransposed3x3Thin::operator=( + ConvolutionTransposed3x3Thin&& operation) { + if (this != &operation) { + std::swap(src_channels_, operation.src_channels_); + std::swap(dst_channels_, operation.dst_channels_); + GPUOperation::operator=(std::move(operation)); + } + return *this; +} + +std::string ConvolutionTransposed3x3Thin::GenerateConvolutionTransposedCode( + const OperationDef& op_def, int src_depth, int dst_depth, + const CLDevice& device) { + auto src_desc = op_def.src_tensors[0]; + src_desc.SetTextureAddressMode(GetFastestZeroMode(device)); + AddSrcTensor("src_tensor", src_desc); + AddDstTensor("dst_tensor", op_def.dst_tensors[0]); const auto src_tensor_type = op_def.src_tensors[0].storage_type; @@ -165,35 +183,12 @@ std::string GenerateConvolutionTransposedCode(const OperationDef& op_def, return c; } -} // namespace - -ConvolutionTransposed3x3Thin::ConvolutionTransposed3x3Thin( - const OperationDef& definition, const ConvolutionTransposedAttributes& attr) - : GPUOperation(definition), - src_channels_(attr.weights.shape.i), - dst_channels_(attr.weights.shape.o) {} - -ConvolutionTransposed3x3Thin::ConvolutionTransposed3x3Thin( - ConvolutionTransposed3x3Thin&& operation) - : GPUOperation(std::move(operation)), - src_channels_(operation.src_channels_), - dst_channels_(operation.dst_channels_) {} - -ConvolutionTransposed3x3Thin& ConvolutionTransposed3x3Thin::operator=( - ConvolutionTransposed3x3Thin&& operation) { - if (this != &operation) { - std::swap(src_channels_, operation.src_channels_); - std::swap(dst_channels_, operation.dst_channels_); - GPUOperation::operator=(std::move(operation)); - } - return *this; -} absl::Status ConvolutionTransposed3x3Thin::Compile( const CreationContext& creation_context) { std::string code = GenerateConvolutionTransposedCode( definition_, DivideRoundUp(src_channels_, 4), - DivideRoundUp(dst_channels_, 4), *creation_context.device, &args_); + DivideRoundUp(dst_channels_, 4), *creation_context.device); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -205,11 +200,6 @@ absl::Status ConvolutionTransposed3x3Thin::Compile( *creation_context.device, &kernel_); } -absl::Status ConvolutionTransposed3x3Thin::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - return args_.SetObjectRef("dst_tensor", dst_[0]); -} - int3 ConvolutionTransposed3x3Thin::GetGridSize() const { const int grid_x = src_[0]->Width() * dst_[0]->Batch(); const int grid_y = src_[0]->Height(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h index 282f1b3b476..67feef13a7f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h @@ -38,7 +38,6 @@ class ConvolutionTransposed3x3Thin : public GPUOperation { public: ConvolutionTransposed3x3Thin() = default; absl::Status Compile(const CreationContext& creation_context) override; - absl::Status BindArguments() override; int3 GetGridSize() const override; // Move only @@ -66,6 +65,10 @@ class ConvolutionTransposed3x3Thin : public GPUOperation { void RearrangeWeightsData(const tflite::gpu::Tensor& weights, absl::Span dst); + std::string GenerateConvolutionTransposedCode(const OperationDef& op_def, + int src_depth, int dst_depth, + const CLDevice& device); + int src_channels_; int dst_channels_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc index d7660fca097..866246460b2 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc @@ -27,24 +27,52 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { +ConvolutionTransposed4x4::ConvolutionTransposed4x4( + const OperationDef& definition, const CLDevice& device) + : GPUOperation(definition) { + work_group_size_ = int3(8, 4, 1); + if (device.IsPowerVR()) { + weights_upload_type_ = WeightsUploadType::LOCAL_MEM_ASYNC; + } else if (device.IsNvidia() || device.IsIntel()) { + weights_upload_type_ = WeightsUploadType::LOCAL_MEM_BY_THREADS; + } else if (device.IsAMD()) { + weights_upload_type_ = WeightsUploadType::CONSTANT_MEM; + } else { + weights_upload_type_ = WeightsUploadType::GLOBAL_MEM; + } +} -std::string GenerateConvolutionTransposedCode( +ConvolutionTransposed4x4::ConvolutionTransposed4x4( + ConvolutionTransposed4x4&& operation) + : GPUOperation(std::move(operation)), + weights_upload_type_(operation.weights_upload_type_) {} + +ConvolutionTransposed4x4& ConvolutionTransposed4x4::operator=( + ConvolutionTransposed4x4&& operation) { + if (this != &operation) { + std::swap(weights_upload_type_, operation.weights_upload_type_); + GPUOperation::operator=(std::move(operation)); + } + return *this; +} + +std::string ConvolutionTransposed4x4::GenerateConvolutionTransposedCode( const OperationDef& op_def, - ConvolutionTransposed4x4::WeightsUploadType weights_upload_type, - Arguments* args) { - auto src_desc = absl::make_unique(op_def.src_tensors[0]); - src_desc->SetTextureAddressMode(TextureAddressMode::ZERO); + ConvolutionTransposed4x4::WeightsUploadType weights_upload_type) { + auto src_desc = op_def.src_tensors[0]; + src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); if (op_def.IsBatchSupported()) { - src_desc->SetStateVar("BatchedWidth", "true"); + src_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - auto dst_desc = absl::make_unique(op_def.dst_tensors[0]); + AddSrcTensor("src_tensor", src_desc); + + auto dst_desc = op_def.dst_tensors[0]; if (op_def.IsBatchSupported()) { - dst_desc->SetStateVar("BatchedWidth", "true"); + dst_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); - args->AddInt("filter_offset"); + AddDstTensor("dst_tensor", dst_desc); + + args_.AddInt("filter_offset"); const auto src_tensor_type = op_def.src_tensors[0].storage_type; const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER || @@ -262,41 +290,10 @@ std::string GenerateConvolutionTransposedCode( return c; } -} // namespace - -ConvolutionTransposed4x4::ConvolutionTransposed4x4( - const OperationDef& definition, const CLDevice& device) - : GPUOperation(definition) { - work_group_size_ = int3(8, 4, 1); - if (device.IsPowerVR()) { - weights_upload_type_ = WeightsUploadType::LOCAL_MEM_ASYNC; - } else if (device.IsNvidia() || device.IsIntel()) { - weights_upload_type_ = WeightsUploadType::LOCAL_MEM_BY_THREADS; - } else if (device.IsAMD()) { - weights_upload_type_ = WeightsUploadType::CONSTANT_MEM; - } else { - weights_upload_type_ = WeightsUploadType::GLOBAL_MEM; - } -} - -ConvolutionTransposed4x4::ConvolutionTransposed4x4( - ConvolutionTransposed4x4&& operation) - : GPUOperation(std::move(operation)), - weights_upload_type_(operation.weights_upload_type_) {} - -ConvolutionTransposed4x4& ConvolutionTransposed4x4::operator=( - ConvolutionTransposed4x4&& operation) { - if (this != &operation) { - std::swap(weights_upload_type_, operation.weights_upload_type_); - GPUOperation::operator=(std::move(operation)); - } - return *this; -} - absl::Status ConvolutionTransposed4x4::Compile( const CreationContext& creation_context) { - std::string code = GenerateConvolutionTransposedCode( - definition_, weights_upload_type_, &args_); + std::string code = + GenerateConvolutionTransposedCode(definition_, weights_upload_type_); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -316,8 +313,6 @@ absl::Status ConvolutionTransposed4x4::Compile( } absl::Status ConvolutionTransposed4x4::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); return args_.SetInt("filter_offset", 4 * 16 * src_[0]->Slices()); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h index 982937486dc..73f2da51eb3 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h @@ -72,6 +72,10 @@ class ConvolutionTransposed4x4 : public GPUOperation { void RearrangeWeightsData(const tflite::gpu::Tensor& weights, absl::Span dst); + std::string GenerateConvolutionTransposedCode( + const OperationDef& op_def, + ConvolutionTransposed4x4::WeightsUploadType weights_upload_type); + WeightsUploadType weights_upload_type_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc index 5b31c98cb02..12b90874706 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc @@ -26,18 +26,37 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { -std::string GenerateConvolutionTransposedCode(const OperationDef& op_def, - int src_depth, int dst_channels, - const int2& kernel_size, - Arguments* args) { - args->AddObjectRef( - "src_tensor", AccessType::READ, - absl::make_unique(op_def.src_tensors[0])); - args->AddObjectRef( - "dst_tensor", AccessType::WRITE, - absl::make_unique(op_def.dst_tensors[0])); +ConvolutionTransposedThin::ConvolutionTransposedThin( + const OperationDef& definition, const ConvolutionTransposedAttributes& attr) + : GPUOperation(definition), + kernel_size_(attr.weights.shape.w, attr.weights.shape.h), + src_channels_(attr.weights.shape.i), + dst_channels_(attr.weights.shape.o) {} + +ConvolutionTransposedThin::ConvolutionTransposedThin( + ConvolutionTransposedThin&& operation) + : GPUOperation(std::move(operation)), + kernel_size_(operation.kernel_size_), + src_channels_(operation.src_channels_), + dst_channels_(operation.dst_channels_) {} + +ConvolutionTransposedThin& ConvolutionTransposedThin::operator=( + ConvolutionTransposedThin&& operation) { + if (this != &operation) { + std::swap(kernel_size_, operation.kernel_size_); + std::swap(src_channels_, operation.src_channels_); + std::swap(dst_channels_, operation.dst_channels_); + GPUOperation::operator=(std::move(operation)); + } + return *this; +} + +std::string ConvolutionTransposedThin::GenerateConvolutionTransposedCode( + const OperationDef& op_def, int src_depth, int dst_channels, + const int2& kernel_size) { + AddSrcTensor("src_tensor", op_def.src_tensors[0]); + AddDstTensor("dst_tensor", op_def.dst_tensors[0]); const std::string channel_x = dst_channels == 1 ? "" : ".x"; const std::vector postfix = {channel_x, ".y", ".z", ".w"}; @@ -131,38 +150,12 @@ std::string GenerateConvolutionTransposedCode(const OperationDef& op_def, return c; } -} // namespace - -ConvolutionTransposedThin::ConvolutionTransposedThin( - const OperationDef& definition, const ConvolutionTransposedAttributes& attr) - : GPUOperation(definition), - kernel_size_(attr.weights.shape.w, attr.weights.shape.h), - src_channels_(attr.weights.shape.i), - dst_channels_(attr.weights.shape.o) {} - -ConvolutionTransposedThin::ConvolutionTransposedThin( - ConvolutionTransposedThin&& operation) - : GPUOperation(std::move(operation)), - kernel_size_(operation.kernel_size_), - src_channels_(operation.src_channels_), - dst_channels_(operation.dst_channels_) {} - -ConvolutionTransposedThin& ConvolutionTransposedThin::operator=( - ConvolutionTransposedThin&& operation) { - if (this != &operation) { - std::swap(kernel_size_, operation.kernel_size_); - std::swap(src_channels_, operation.src_channels_); - std::swap(dst_channels_, operation.dst_channels_); - GPUOperation::operator=(std::move(operation)); - } - return *this; -} absl::Status ConvolutionTransposedThin::Compile( const CreationContext& creation_context) { std::string code = GenerateConvolutionTransposedCode( - definition_, DivideRoundUp(src_channels_, 4), dst_channels_, kernel_size_, - &args_); + definition_, DivideRoundUp(src_channels_, 4), dst_channels_, + kernel_size_); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -181,11 +174,6 @@ absl::Status ConvolutionTransposedThin::Compile( *creation_context.device, &kernel_); } -absl::Status ConvolutionTransposedThin::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - return args_.SetObjectRef("dst_tensor", dst_[0]); -} - int3 ConvolutionTransposedThin::GetGridSize() const { const int grid_x = src_[0]->Width() * dst_[0]->Batch(); const int grid_y = src_[0]->Height(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h index 90a1b026369..306114959e0 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h @@ -38,7 +38,6 @@ class ConvolutionTransposedThin : public GPUOperation { public: ConvolutionTransposedThin() = default; absl::Status Compile(const CreationContext& creation_context) override; - absl::Status BindArguments() override; int3 GetGridSize() const override; // Move only @@ -63,6 +62,9 @@ class ConvolutionTransposedThin : public GPUOperation { template void RearrangeWeightsData(const tflite::gpu::Tensor& weights, absl::Span dst); + std::string GenerateConvolutionTransposedCode(const OperationDef& op_def, + int src_depth, int dst_channels, + const int2& kernel_size); int2 kernel_size_; int src_channels_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc index 7d6bee6877b..1854ca9001a 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc @@ -66,37 +66,91 @@ std::string GetSrcValue(int channel_multiplier, const std::string coords) { return c; } +} // namespace -std::string GenerateDepthwiseConvolutionCode( +DepthwiseConvolution::DepthwiseConvolution( + const OperationDef& definition, + const DepthwiseConvolution2DAttributes& attr, bool weights_are_buffer) + : GPUOperation(definition), + weights_are_buffer_(weights_are_buffer), + kernel_size_(attr.weights.shape.w, attr.weights.shape.h, 0, 0), + stride_(attr.strides.w, attr.strides.h, 0, 0), + padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, 0, 0), + dilation_(attr.dilations.w, attr.dilations.h, 0, 0), + channel_multiplier_(attr.weights.shape.o) { + work_group_size_ = int3(8, 8, 1); +} + +DepthwiseConvolution::DepthwiseConvolution( + const OperationDef& definition, + const DepthwiseConvolution3DAttributes& attr, bool weights_are_buffer) + : GPUOperation(definition), + weights_are_buffer_(weights_are_buffer), + kernel_size_(attr.weights.shape.w, attr.weights.shape.h, + attr.weights.shape.d, 0), + stride_(attr.strides.w, attr.strides.h, attr.strides.d, 0), + padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, + -attr.padding.prepended.d, 0), + dilation_(attr.dilations.w, attr.dilations.h, attr.dilations.d, 0), + channel_multiplier_(attr.weights.shape.o) { + work_group_size_ = int3(8, 8, 1); +} + +DepthwiseConvolution::DepthwiseConvolution(DepthwiseConvolution&& operation) + : GPUOperation(std::move(operation)), + weights_are_buffer_(operation.weights_are_buffer_), + kernel_size_(operation.kernel_size_), + stride_(operation.stride_), + padding_(operation.padding_), + dilation_(operation.dilation_), + channel_multiplier_(operation.channel_multiplier_) {} + +DepthwiseConvolution& DepthwiseConvolution::operator=( + DepthwiseConvolution&& operation) { + if (this != &operation) { + std::swap(weights_are_buffer_, operation.weights_are_buffer_); + std::swap(kernel_size_, operation.kernel_size_); + std::swap(stride_, operation.stride_); + std::swap(padding_, operation.padding_); + std::swap(dilation_, operation.dilation_); + std::swap(channel_multiplier_, operation.channel_multiplier_); + GPUOperation::operator=(std::move(operation)); + } + return *this; +} + +std::string DepthwiseConvolution::GenerateDepthwiseConvolutionCode( const OperationDef& op_def, bool stride_correction, int channel_multiplier, - bool weights_are_buffer, const CLDevice& device, Arguments* args) { - auto src_desc = absl::make_unique(op_def.src_tensors[0]); - src_desc->SetTextureAddressMode(GetFastestZeroMode(device)); + bool weights_are_buffer, const CLDevice& device) { + auto src_desc = op_def.src_tensors[0]; + src_desc.SetTextureAddressMode(GetFastestZeroMode(device)); if (op_def.IsBatchSupported()) { - src_desc->SetStateVar("BatchedWidth", "true"); + src_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - auto dst_desc = absl::make_unique(op_def.dst_tensors[0]); + AddSrcTensor("src_tensor", src_desc); + + auto dst_desc = op_def.dst_tensors[0]; if (op_def.IsBatchSupported()) { - dst_desc->SetStateVar("BatchedWidth", "true"); + dst_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); - args->AddInt("kernel_size_x"); - args->AddInt("stride_x"); - args->AddInt("padding_x"); - args->AddInt("dilation_x"); - args->AddInt("kernel_size_y"); - args->AddInt("stride_y"); - args->AddInt("padding_y"); - args->AddInt("dilation_y"); + AddDstTensor("dst_tensor", dst_desc); + + args_.AddInt("kernel_size_x"); + args_.AddInt("stride_x"); + args_.AddInt("padding_x"); + args_.AddInt("dilation_x"); + args_.AddInt("kernel_size_y"); + args_.AddInt("stride_y"); + args_.AddInt("padding_y"); + args_.AddInt("dilation_y"); if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) { - args->AddInt("kernel_size_z"); - args->AddInt("stride_z"); - args->AddInt("padding_z"); - args->AddInt("dilation_z"); + args_.AddInt("kernel_size_z"); + args_.AddInt("stride_z"); + args_.AddInt("padding_z"); + args_.AddInt("dilation_z"); } if (!IsSpecializedCase(channel_multiplier)) { - args->AddInt("ch_multiplier"); + args_.AddInt("ch_multiplier"); } const auto src_tensor_type = op_def.src_tensors[0].storage_type; @@ -215,58 +269,6 @@ std::string GenerateDepthwiseConvolutionCode( return c; } -} // namespace - -DepthwiseConvolution::DepthwiseConvolution( - const OperationDef& definition, - const DepthwiseConvolution2DAttributes& attr, bool weights_are_buffer) - : GPUOperation(definition), - weights_are_buffer_(weights_are_buffer), - kernel_size_(attr.weights.shape.w, attr.weights.shape.h, 0, 0), - stride_(attr.strides.w, attr.strides.h, 0, 0), - padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, 0, 0), - dilation_(attr.dilations.w, attr.dilations.h, 0, 0), - channel_multiplier_(attr.weights.shape.o) { - work_group_size_ = int3(8, 8, 1); -} - -DepthwiseConvolution::DepthwiseConvolution( - const OperationDef& definition, - const DepthwiseConvolution3DAttributes& attr, bool weights_are_buffer) - : GPUOperation(definition), - weights_are_buffer_(weights_are_buffer), - kernel_size_(attr.weights.shape.w, attr.weights.shape.h, - attr.weights.shape.d, 0), - stride_(attr.strides.w, attr.strides.h, attr.strides.d, 0), - padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, - -attr.padding.prepended.d, 0), - dilation_(attr.dilations.w, attr.dilations.h, attr.dilations.d, 0), - channel_multiplier_(attr.weights.shape.o) { - work_group_size_ = int3(8, 8, 1); -} - -DepthwiseConvolution::DepthwiseConvolution(DepthwiseConvolution&& operation) - : GPUOperation(std::move(operation)), - weights_are_buffer_(operation.weights_are_buffer_), - kernel_size_(operation.kernel_size_), - stride_(operation.stride_), - padding_(operation.padding_), - dilation_(operation.dilation_), - channel_multiplier_(operation.channel_multiplier_) {} - -DepthwiseConvolution& DepthwiseConvolution::operator=( - DepthwiseConvolution&& operation) { - if (this != &operation) { - std::swap(weights_are_buffer_, operation.weights_are_buffer_); - std::swap(kernel_size_, operation.kernel_size_); - std::swap(stride_, operation.stride_); - std::swap(padding_, operation.padding_); - std::swap(dilation_, operation.dilation_); - std::swap(channel_multiplier_, operation.channel_multiplier_); - GPUOperation::operator=(std::move(operation)); - } - return *this; -} absl::Status DepthwiseConvolution::Compile( const CreationContext& creation_context) { @@ -274,7 +276,7 @@ absl::Status DepthwiseConvolution::Compile( definition_.IsBatchSupported() && stride_.x != 1; std::string code = GenerateDepthwiseConvolutionCode( definition_, stride_correction, channel_multiplier_, weights_are_buffer_, - *creation_context.device, &args_); + *creation_context.device); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -287,8 +289,6 @@ absl::Status DepthwiseConvolution::Compile( } absl::Status DepthwiseConvolution::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); RETURN_IF_ERROR(args_.SetInt("kernel_size_x", kernel_size_.x)); RETURN_IF_ERROR(args_.SetInt("stride_x", stride_.x)); RETURN_IF_ERROR(args_.SetInt("padding_x", padding_.x * src_[0]->Batch())); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h index 51cf68aaf9e..73782bbfaa1 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h @@ -80,6 +80,12 @@ class DepthwiseConvolution : public GPUOperation { void RearrangeWeightsData(const tflite::gpu::Tensor& weights, absl::Span dst); + std::string GenerateDepthwiseConvolutionCode(const OperationDef& op_def, + bool stride_correction, + int channel_multiplier, + bool weights_are_buffer, + const CLDevice& device); + bool weights_are_buffer_; int4 kernel_size_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc index 9e58ce78cf0..db5c920bcd4 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc @@ -26,18 +26,38 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { -std::string GenerateDepthwiseConvCode(const OperationDef& op_def, - const CLDevice& device, - bool weights_are_buffer, - bool local_mem_uploads, Arguments* args) { - auto src_desc = absl::make_unique(op_def.src_tensors[0]); - src_desc->SetTextureAddressMode(GetFastestZeroMode(device)); - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - args->AddObjectRef( - "dst_tensor", AccessType::WRITE, - absl::make_unique(op_def.dst_tensors[0])); +DepthwiseConv3x3::DepthwiseConv3x3(const OperationDef& definition, + bool weights_are_buffer, + bool local_mem_uploads) + : GPUOperation(definition), + weights_are_buffer_(weights_are_buffer), + local_mem_uploads_(local_mem_uploads) { + work_group_size_ = int3(8, 4, 1); +} + +DepthwiseConv3x3::DepthwiseConv3x3(DepthwiseConv3x3&& operation) + : GPUOperation(std::move(operation)), + weights_are_buffer_(operation.weights_are_buffer_), + local_mem_uploads_(operation.local_mem_uploads_) {} + +DepthwiseConv3x3& DepthwiseConv3x3::operator=(DepthwiseConv3x3&& operation) { + if (this != &operation) { + std::swap(weights_are_buffer_, operation.weights_are_buffer_); + std::swap(local_mem_uploads_, operation.local_mem_uploads_); + GPUOperation::operator=(std::move(operation)); + } + return *this; +} + +std::string DepthwiseConv3x3::GenerateDepthwiseConvCode( + const OperationDef& op_def, const CLDevice& device, bool weights_are_buffer, + bool local_mem_uploads) { + auto src_desc = op_def.src_tensors[0]; + src_desc.SetTextureAddressMode(GetFastestZeroMode(device)); + AddSrcTensor("src_tensor", src_desc); + AddDstTensor("dst_tensor", op_def.dst_tensors[0]); + const auto src_tensor_type = op_def.src_tensors[0].storage_type; const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER || @@ -261,36 +281,11 @@ std::string GenerateDepthwiseConvCode(const OperationDef& op_def, return c; } -} // namespace - -DepthwiseConv3x3::DepthwiseConv3x3(const OperationDef& definition, - bool weights_are_buffer, - bool local_mem_uploads) - : GPUOperation(definition), - weights_are_buffer_(weights_are_buffer), - local_mem_uploads_(local_mem_uploads) { - work_group_size_ = int3(8, 4, 1); -} - -DepthwiseConv3x3::DepthwiseConv3x3(DepthwiseConv3x3&& operation) - : GPUOperation(std::move(operation)), - weights_are_buffer_(operation.weights_are_buffer_), - local_mem_uploads_(operation.local_mem_uploads_) {} - -DepthwiseConv3x3& DepthwiseConv3x3::operator=(DepthwiseConv3x3&& operation) { - if (this != &operation) { - std::swap(weights_are_buffer_, operation.weights_are_buffer_); - std::swap(local_mem_uploads_, operation.local_mem_uploads_); - GPUOperation::operator=(std::move(operation)); - } - return *this; -} - absl::Status DepthwiseConv3x3::Compile( const CreationContext& creation_context) { - std::string code = GenerateDepthwiseConvCode( - definition_, *creation_context.device, weights_are_buffer_, - local_mem_uploads_, &args_); + std::string code = + GenerateDepthwiseConvCode(definition_, *creation_context.device, + weights_are_buffer_, local_mem_uploads_); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h index ce5b2d82981..b2a2a1b9463 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h @@ -66,6 +66,11 @@ class DepthwiseConv3x3 : public GPUOperation { const tflite::gpu::Tensor& weights, const tflite::gpu::Tensor& biases, absl::Span dst); + std::string GenerateDepthwiseConvCode(const OperationDef& op_def, + const CLDevice& device, + bool weights_are_buffer, + bool local_mem_uploads); + bool weights_are_buffer_; bool local_mem_uploads_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc index 4d2afc5bcd7..21866021e91 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc @@ -136,18 +136,16 @@ std::string GetTwoInputCode(const OperationType& op_type, ElementwiseOneInput::ElementwiseOneInput(const OperationDef& definition, const OperationType& op_type) - : ElementwiseOperation(definition), op_type_(op_type) { + : ElementwiseOperation(definition) { code_ = GetOneInputCode(op_type, definition.precision, "in_out_value"); } ElementwiseOneInput::ElementwiseOneInput(ElementwiseOneInput&& operation) - : ElementwiseOperation(std::move(operation)), - op_type_(operation.op_type_) {} + : ElementwiseOperation(std::move(operation)) {} ElementwiseOneInput& ElementwiseOneInput::operator=( ElementwiseOneInput&& operation) { if (this != &operation) { - std::swap(op_type_, operation.op_type_); ElementwiseOperation::operator=(std::move(operation)); } return *this; @@ -162,7 +160,7 @@ ElementwiseOneInput CreateElementwiseOneInput(const OperationDef& definition, ElementwiseOneRuntimeOneScalar::ElementwiseOneRuntimeOneScalar( const OperationDef& definition, const OperationType& op_type, float scalar_parameter, CalculationsPrecision scalar_precision) - : ElementwiseOperation(definition), op_type_(op_type) { + : ElementwiseOperation(definition) { if (definition.precision == CalculationsPrecision::F32) { args_.AddFloat("scalar", scalar_parameter); } else { @@ -173,15 +171,11 @@ ElementwiseOneRuntimeOneScalar::ElementwiseOneRuntimeOneScalar( ElementwiseOneRuntimeOneScalar::ElementwiseOneRuntimeOneScalar( ElementwiseOneRuntimeOneScalar&& operation) - : ElementwiseOperation(std::move(operation)), - link_index_(operation.link_index_), - op_type_(operation.op_type_) {} + : ElementwiseOperation(std::move(operation)) {} ElementwiseOneRuntimeOneScalar& ElementwiseOneRuntimeOneScalar::operator=( ElementwiseOneRuntimeOneScalar&& operation) { if (this != &operation) { - link_index_ = operation.link_index_; - op_type_ = operation.op_type_; ElementwiseOperation::operator=(std::move(operation)); } return *this; @@ -202,14 +196,12 @@ ElementwiseTwoInput::ElementwiseTwoInput(const OperationDef& definition, const OperationType& op_type, const BroadcastSettings& broadcast) : ElementwiseOperation(definition), - op_type_(op_type), broadcast_(broadcast) { - auto src_desc = - absl::make_unique(definition.src_tensors[1]); + auto src_desc = definition.src_tensors[1]; if (definition.IsBatchSupported()) { - src_desc->SetStateVar("BatchedWidth", "true"); + src_desc.SetStateVar("BatchedWidth", "true"); } - args_.AddObjectRef("second_tensor", AccessType::READ, std::move(src_desc)); + AddSrcTensor("second_tensor", src_desc); const std::string x_coord = broadcast.width ? "0" : "X_COORD"; const std::string y_coord = broadcast.height ? "0" : "Y_COORD"; const std::string s_coord = broadcast.channels ? "0" : "S_COORD"; @@ -228,7 +220,6 @@ ElementwiseTwoInput::ElementwiseTwoInput(const OperationDef& definition, const BroadcastSettings& broadcast, Tensor&& constant_tensor) : ElementwiseOperation(definition), - op_type_(op_type), broadcast_(broadcast) { auto descriptor = constant_tensor.GetDescriptor(); args_.AddObject("second_tensor", AccessType::READ, @@ -249,30 +240,17 @@ ElementwiseTwoInput::ElementwiseTwoInput(const OperationDef& definition, ElementwiseTwoInput::ElementwiseTwoInput(ElementwiseTwoInput&& operation) : ElementwiseOperation(std::move(operation)), - link_index_(operation.link_index_), - op_type_(operation.op_type_), broadcast_(operation.broadcast_) {} ElementwiseTwoInput& ElementwiseTwoInput::operator=( ElementwiseTwoInput&& operation) { if (this != &operation) { - link_index_ = operation.link_index_; - op_type_ = operation.op_type_; broadcast_ = operation.broadcast_; ElementwiseOperation::operator=(std::move(operation)); } return *this; } -absl::Status ElementwiseTwoInput::SetArgs(const std::string& unique_postfix, - Arguments* args) { - std::string tensor_name = absl::StrCat("second_tensor", unique_postfix); - if (src_.size() == 2) { - RETURN_IF_ERROR(args->SetObjectRef(tensor_name, src_[1])); - } - return absl::OkStatus(); -} - absl::Status CreateElementwiseTwoInput( const CreationContext& creation_context, const OperationDef& definition, const OperationType& op_type, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h index be037802dbc..9712ee96b90 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h @@ -38,9 +38,6 @@ class ElementwiseOneInput : public ElementwiseOperation { ElementwiseOneInput& operator=(ElementwiseOneInput&& operation); ElementwiseOneInput(const ElementwiseOneInput&) = delete; ElementwiseOneInput& operator=(const ElementwiseOneInput&) = delete; - - private: - OperationType op_type_; }; ElementwiseOneInput CreateElementwiseOneInput(const OperationDef& definition, @@ -64,10 +61,6 @@ class ElementwiseOneRuntimeOneScalar : public ElementwiseOperation { delete; ElementwiseOneRuntimeOneScalar& operator=( const ElementwiseOneRuntimeOneScalar&) = delete; - - private: - int link_index_; - OperationType op_type_; }; ElementwiseOneRuntimeOneScalar CreateElementwiseOneRuntimeOneScalar( @@ -101,12 +94,7 @@ class ElementwiseTwoInput : public ElementwiseOperation { ElementwiseTwoInput(const ElementwiseTwoInput&) = delete; ElementwiseTwoInput& operator=(const ElementwiseTwoInput&) = delete; - absl::Status SetArgs(const std::string& unique_postfix, - Arguments* args) override; - private: - int link_index_; - OperationType op_type_; BroadcastSettings broadcast_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc index 944af0a2280..eb4dcec0de4 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc @@ -23,7 +23,19 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { + +FullyConnected::FullyConnected(const OperationDef& definition) + : GPUOperation(definition) {} + +FullyConnected::FullyConnected(FullyConnected&& kernel) + : GPUOperation(std::move(kernel)) {} + +FullyConnected& FullyConnected::operator=(FullyConnected&& kernel) { + if (this != &kernel) { + GPUOperation::operator=(std::move(kernel)); + } + return *this; +} // We split vec vec dot (every thread do vec vec dot product in basic // vec mat mult) on 4 parts to create more threads @@ -31,15 +43,10 @@ namespace { // Good results for ~1024 x 1024 sizes, for other can be written more // optimized shaders -std::string GetFullyConnectedKernelCode(const OperationDef& op_def, - const int3& work_group_size, - Arguments* args) { - args->AddObjectRef( - "src_tensor", AccessType::READ, - absl::make_unique(op_def.src_tensors[0])); - args->AddObjectRef( - "dst_tensor", AccessType::WRITE, - absl::make_unique(op_def.dst_tensors[0])); +std::string FullyConnected::GetFullyConnectedKernelCode( + const OperationDef& op_def, const int3& work_group_size) { + AddSrcTensor("src_tensor", op_def.src_tensors[0]); + AddDstTensor("dst_tensor", op_def.dst_tensors[0]); std::string c = GetCommonDefines(op_def.precision); switch (op_def.precision) { @@ -84,20 +91,6 @@ std::string GetFullyConnectedKernelCode(const OperationDef& op_def, return c; } -} // namespace - -FullyConnected::FullyConnected(const OperationDef& definition) - : GPUOperation(definition) {} - -FullyConnected::FullyConnected(FullyConnected&& kernel) - : GPUOperation(std::move(kernel)) {} - -FullyConnected& FullyConnected::operator=(FullyConnected&& kernel) { - if (this != &kernel) { - GPUOperation::operator=(std::move(kernel)); - } - return *this; -} absl::Status FullyConnected::Compile(const CreationContext& creation_context) { int wg_width = 32; @@ -107,7 +100,7 @@ absl::Status FullyConnected::Compile(const CreationContext& creation_context) { work_group_size_ = {wg_width, wg_height, 1}; wg_width /= 2; std::string code = - GetFullyConnectedKernelCode(definition_, work_group_size_, &args_); + GetFullyConnectedKernelCode(definition_, work_group_size_); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -129,11 +122,6 @@ absl::Status FullyConnected::Compile(const CreationContext& creation_context) { return absl::OkStatus(); } -absl::Status FullyConnected::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - return args_.SetObjectRef("dst_tensor", dst_[0]); -} - int3 FullyConnected::GetGridSize() const { return int3(dst_[0]->Slices(), 1, 1); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h index 138db001332..6a969eda21f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h @@ -40,7 +40,6 @@ class FullyConnected : public GPUOperation { absl::Status Tune(const TuningParameters& params) override { return absl::OkStatus(); } - absl::Status BindArguments() override; int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; @@ -63,6 +62,9 @@ class FullyConnected : public GPUOperation { template void RearrangeWeights(const tflite::gpu::Tensor& weights, absl::Span dst); + + std::string GetFullyConnectedKernelCode(const OperationDef& op_def, + const int3& work_group_size); }; template diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc index d0d1f88c9e6..e9de63e1211 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc @@ -26,18 +26,7 @@ namespace cl { namespace { std::string GetElementWiseCode(const OperationDef& op_def, - bool check_src_slices, Arguments* args) { - auto src_desc = absl::make_unique(op_def.src_tensors[0]); - if (op_def.IsBatchSupported()) { - src_desc->SetStateVar("BatchedWidth", "true"); - } - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - auto dst_desc = absl::make_unique(op_def.dst_tensors[0]); - if (op_def.IsBatchSupported()) { - dst_desc->SetStateVar("BatchedWidth", "true"); - } - args->AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); - + bool check_src_slices) { std::string c = GetCommonDefines(op_def.precision); c += "__kernel void main_function(\n"; @@ -126,6 +115,8 @@ GPUOperation::GPUOperation(GPUOperation&& operation) kernel_(std::move(operation.kernel_)), work_group_size_(operation.work_group_size_), grid_size_(operation.grid_size_), + src_tensors_names_(std::move(operation.src_tensors_names_)), + dst_tensors_names_(std::move(operation.dst_tensors_names_)), linked_operations_(std::move(operation.linked_operations_)) {} GPUOperation& GPUOperation::operator=(GPUOperation&& operation) { @@ -137,6 +128,8 @@ GPUOperation& GPUOperation::operator=(GPUOperation&& operation) { kernel_ = std::move(operation.kernel_); std::swap(work_group_size_, operation.work_group_size_); std::swap(grid_size_, operation.grid_size_); + src_tensors_names_ = std::move(operation.src_tensors_names_); + dst_tensors_names_ = std::move(operation.dst_tensors_names_); linked_operations_ = std::move(operation.linked_operations_); } return *this; @@ -146,27 +139,62 @@ void GPUOperation::AddOperation(ElementwiseOperation* operation) { linked_operations_.push_back(operation); } +void GPUOperation::AddSrcTensor(const std::string& tensor_name, + const TensorDescriptor& desc) { + src_tensors_names_.push_back(tensor_name); + auto desc_new = absl::make_unique(desc); + args_.AddObjectRef(tensor_name, AccessType::READ, std::move(desc_new)); +} + +void GPUOperation::AddSrcBuffer(const std::string& buffer_name, + const BufferDescriptor& desc) { + src_tensors_names_.push_back(buffer_name); + auto desc_new = absl::make_unique(desc); + args_.AddObjectRef(buffer_name, AccessType::READ, std::move(desc_new)); +} + +void GPUOperation::AddDstTensor(const std::string& tensor_name, + const TensorDescriptor& desc) { + dst_tensors_names_.push_back(tensor_name); + auto desc_new = absl::make_unique(desc); + args_.AddObjectRef(tensor_name, AccessType::WRITE, std::move(desc_new)); +} + +absl::Status GPUOperation::UpdateParams() { + for (int i = 0; i < src_tensors_names_.size(); ++i) { + RETURN_IF_ERROR(args_.SetObjectRef(src_tensors_names_[i], src_[i])); + } + for (int i = 0; i < dst_tensors_names_.size(); ++i) { + RETURN_IF_ERROR(args_.SetObjectRef(dst_tensors_names_[i], dst_[i])); + } + for (const auto linked_op : linked_operations_) { + for (int i = 0; i < linked_op->src_tensors_names_.size(); ++i) { + RETURN_IF_ERROR(args_.SetObjectRef(linked_op->src_tensors_names_[i], + linked_op->src_[i + 1])); + } + } + RETURN_IF_ERROR(BindArguments()); + grid_size_ = GetGridSize(); + return absl::OkStatus(); +} + ElementwiseOperation::ElementwiseOperation(ElementwiseOperation&& operation) : GPUOperation(std::move(operation)), check_src_channels_size_(operation.check_src_channels_size_), - code_(std::move(operation.code_)) {} + code_(std::move(operation.code_)), + linkable_(operation.linkable_) {} ElementwiseOperation& ElementwiseOperation::operator=( ElementwiseOperation&& operation) { if (this != &operation) { check_src_channels_size_ = operation.check_src_channels_size_; code_ = std::move(operation.code_); + linkable_ = operation.linkable_; GPUOperation::operator=(std::move(operation)); } return *this; } -absl::Status ElementwiseOperation::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - return SetArgs("", &args_); -} - int3 ElementwiseOperation::GetGridSize() const { const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); const int grid_y = dst_[0]->Height(); @@ -176,21 +204,45 @@ int3 ElementwiseOperation::GetGridSize() const { absl::Status ElementwiseOperation::Compile( const CreationContext& creation_context) { - std::string code = - GetElementWiseCode(definition_, check_src_channels_size_, &args_); + auto src_desc = + absl::make_unique(definition_.src_tensors[0]); + if (definition_.IsBatchSupported()) { + src_desc->SetStateVar("BatchedWidth", "true"); + } + src_tensors_names_.insert(src_tensors_names_.begin(), "src_tensor"); + args_.AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); + + auto dst_desc = + absl::make_unique(definition_.dst_tensors[0]); + if (definition_.IsBatchSupported()) { + dst_desc->SetStateVar("BatchedWidth", "true"); + } + dst_tensors_names_.insert(dst_tensors_names_.begin(), "dst_tensor"); + args_.AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); + + std::string code = GetElementWiseCode(definition_, check_src_channels_size_); std::string element_wise_code; element_wise_code += "{\n" + code_ + "\n}\n"; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); + RETURN_IF_ERROR(args_.TransformToCLCode( + creation_context.device->GetInfo(), + {{dst_tensors_names_[0], element_wise_code}}, &code)); code = absl::Substitute(code, args_.GetListOfArgs()); return creation_context.cache->GetOrCreateCLKernel( code, "main_function", *creation_context.context, *creation_context.device, &kernel_); } +void ElementwiseOperation::AddUniquePostfix(const std::string& unique_postfix) { + for (int i = 0; i < src_tensors_names_.size(); ++i) { + src_tensors_names_[i] += unique_postfix; + } + for (int i = 0; i < dst_tensors_names_.size(); ++i) { + dst_tensors_names_[i] += unique_postfix; + } +} + absl::Status MergeOperations( const std::vector& linked_ops, Arguments* merged_args, std::string* merged_code) { @@ -201,15 +253,7 @@ absl::Status MergeOperations( link_args.RenameArgs(unique_postfix, &code); *merged_code += "{\n" + code + "\n}\n"; RETURN_IF_ERROR(merged_args->Merge(std::move(link_args), unique_postfix)); - } - return absl::OkStatus(); -} - -absl::Status SetArguments(const std::vector& linked_ops, - Arguments* args) { - for (int i = 0; i < linked_ops.size(); ++i) { - std::string unique_postfix = absl::StrCat("_link", i + 1); - RETURN_IF_ERROR(linked_ops[i]->SetArgs(unique_postfix, args)); + linked_ops[i]->AddUniquePostfix(unique_postfix); } return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h index 88d0ff0b46f..c2c1fbaa146 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h @@ -21,6 +21,7 @@ limitations under the License. #include #include "tensorflow/lite/delegates/gpu/cl/arguments.h" +#include "tensorflow/lite/delegates/gpu/cl/buffer.h" #include "tensorflow/lite/delegates/gpu/cl/cl_context.h" #include "tensorflow/lite/delegates/gpu/cl/cl_device.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h" @@ -60,9 +61,6 @@ struct OperationDef { class ElementwiseOperation; -absl::Status SetArguments(const std::vector& linked_ops, - Arguments* args); - // GPUOperation represents some implementation of neural network operation on // GPU. GPUOperation can contain ElementwiseOperation operations, in this case, // ElementwiseOperation still hold necessary data and should be alive. @@ -91,12 +89,7 @@ class GPUOperation { void SetDst(Tensor* ptr, int index = 0); // should be called after changes of inputs/outputs. - absl::Status UpdateParams() { - RETURN_IF_ERROR(BindArguments()); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - grid_size_ = GetGridSize(); - return absl::OkStatus(); - } + absl::Status UpdateParams(); absl::Status AddToQueue(CLCommandQueue* queue) { RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); @@ -114,8 +107,15 @@ class GPUOperation { const OperationDef& GetDefinition() const { return definition_; } + void AddSrcTensor(const std::string& tensor_name, + const TensorDescriptor& desc); + void AddSrcBuffer(const std::string& buffer_name, + const BufferDescriptor& desc); + void AddDstTensor(const std::string& tensor_name, + const TensorDescriptor& desc); + protected: - virtual absl::Status BindArguments() = 0; + virtual absl::Status BindArguments() { return absl::OkStatus(); } virtual int3 GetGridSize() const = 0; // Defines operation calculation precision and format of src/dst tensors. @@ -126,6 +126,8 @@ class GPUOperation { CLKernel kernel_; int3 work_group_size_ = int3(8, 4, 1); int3 grid_size_ = int3(0, 0, 0); + std::vector src_tensors_names_; + std::vector dst_tensors_names_; std::vector linked_operations_; }; @@ -145,7 +147,6 @@ class ElementwiseOperation : public GPUOperation { virtual ~ElementwiseOperation() {} absl::Status Compile(const CreationContext& creation_context) override; - absl::Status BindArguments() override; int3 GetGridSize() const override; // Move only @@ -154,20 +155,16 @@ class ElementwiseOperation : public GPUOperation { ElementwiseOperation(const ElementwiseOperation&) = delete; ElementwiseOperation& operator=(const ElementwiseOperation&) = delete; - virtual absl::Status SetArgs(const std::string& unique_postfix, - Arguments* args) { - return absl::OkStatus(); - } - Arguments&& MoveArgs() { return std::move(args_); } std::string GetCode() const { return code_; } + void AddUniquePostfix(const std::string& unique_postfix); - // ovveride to return false if for any reason operation can not be linked. - virtual bool IsLinkable() const { return true; } + bool IsLinkable() const { return linkable_; } protected: bool check_src_channels_size_ = false; std::string code_; + bool linkable_ = true; }; absl::Status MergeOperations( diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc index ab61fcb0b62..f894a5cc45e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc @@ -24,22 +24,24 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { -std::string GetLSTMCode(const OperationDef& op_def, const CLDevice& device, - Arguments* args) { - args->AddObjectRef( - "intermediate", AccessType::READ, - absl::make_unique(op_def.src_tensors[0])); - args->AddObjectRef( - "prev_state", AccessType::READ, - absl::make_unique(op_def.src_tensors[1])); - args->AddObjectRef( - "new_state", AccessType::WRITE, - absl::make_unique(op_def.dst_tensors[0])); - args->AddObjectRef( - "activation", AccessType::WRITE, - absl::make_unique(op_def.dst_tensors[1])); +LSTM::LSTM(const OperationDef& definition) : GPUOperation(definition) {} + +LSTM::LSTM(LSTM&& kernel) : GPUOperation(std::move(kernel)) {} + +LSTM& LSTM::operator=(LSTM&& kernel) { + if (this != &kernel) { + GPUOperation::operator=(std::move(kernel)); + } + return *this; +} + +std::string LSTM::GetLSTMCode(const OperationDef& op_def, + const CLDevice& device) { + AddSrcTensor("intermediate", op_def.src_tensors[0]); + AddSrcTensor("prev_state", op_def.src_tensors[1]); + AddDstTensor("new_state", op_def.dst_tensors[0]); + AddDstTensor("activation", op_def.dst_tensors[1]); std::string c = GetCommonDefines(op_def.precision); c += "__kernel void main_function(\n"; @@ -98,21 +100,9 @@ std::string GetLSTMCode(const OperationDef& op_def, const CLDevice& device, c += "}\n"; return c; } -} // namespace - -LSTM::LSTM(const OperationDef& definition) : GPUOperation(definition) {} - -LSTM::LSTM(LSTM&& kernel) : GPUOperation(std::move(kernel)) {} - -LSTM& LSTM::operator=(LSTM&& kernel) { - if (this != &kernel) { - GPUOperation::operator=(std::move(kernel)); - } - return *this; -} absl::Status LSTM::Compile(const CreationContext& creation_context) { - std::string code = GetLSTMCode(definition_, *creation_context.device, &args_); + std::string code = GetLSTMCode(definition_, *creation_context.device); RETURN_IF_ERROR( args_.TransformToCLCode(creation_context.device->GetInfo(), {}, &code)); return creation_context.cache->GetOrCreateCLKernel( @@ -120,14 +110,6 @@ absl::Status LSTM::Compile(const CreationContext& creation_context) { *creation_context.device, &kernel_); } -absl::Status LSTM::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("intermediate", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("prev_state", src_[1])); - RETURN_IF_ERROR(args_.SetObjectRef("new_state", dst_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("activation", dst_[1])); - return absl::OkStatus(); -} - int3 LSTM::GetGridSize() const { const int grid_x = dst_[0]->Batch(); const int grid_y = dst_[0]->Slices(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h index 6490f396709..31b3c0f876b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h @@ -28,7 +28,6 @@ namespace cl { class LSTM : public GPUOperation { public: explicit LSTM(const OperationDef& definition); - absl::Status BindArguments() override; int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; @@ -37,6 +36,9 @@ class LSTM : public GPUOperation { LSTM& operator=(LSTM&& kernel); LSTM(const LSTM&) = delete; LSTM& operator=(const LSTM&) = delete; + + private: + std::string GetLSTMCode(const OperationDef& op_def, const CLDevice& device); }; LSTM CreateLSTM(const OperationDef& definition); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc index bef4c26b177..e0e49e82a09 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc @@ -23,42 +23,71 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { -std::string GetMaxUnpoolingKernelCode(const OperationDef& op_def, - const CLDevice& device, Arguments* args) { - auto src_desc = absl::make_unique(op_def.src_tensors[0]); - src_desc->SetTextureAddressMode(GetFastestZeroMode(device)); - if (op_def.IsBatchSupported()) { - src_desc->SetStateVar("BatchedWidth", "true"); +MaxUnpooling::MaxUnpooling(const OperationDef& definition, + const MaxUnpooling2DAttributes& attr) + : GPUOperation(definition), + stride_(attr.strides.w, attr.strides.h, 0, 0), + padding_(attr.padding.appended.w, attr.padding.appended.h, 0, 0), + kernel_size_(attr.kernel.w, attr.kernel.h, 0, 0) {} + +MaxUnpooling::MaxUnpooling(const OperationDef& definition, + const MaxUnpooling3DAttributes& attr) + : GPUOperation(definition), + stride_(attr.strides.w, attr.strides.h, attr.strides.d, 0), + padding_(attr.padding.appended.w, attr.padding.appended.h, + attr.padding.appended.d, 0), + kernel_size_(attr.kernel.w, attr.kernel.h, attr.kernel.d, 0) {} + +MaxUnpooling::MaxUnpooling(MaxUnpooling&& kernel) + : GPUOperation(std::move(kernel)), + stride_(kernel.stride_), + padding_(kernel.padding_), + kernel_size_(kernel.kernel_size_) {} + +MaxUnpooling& MaxUnpooling::operator=(MaxUnpooling&& kernel) { + if (this != &kernel) { + std::swap(stride_, kernel.stride_); + std::swap(padding_, kernel.padding_); + std::swap(kernel_size_, kernel.kernel_size_); + GPUOperation::operator=(std::move(kernel)); } - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - auto src_ind_desc = - absl::make_unique(op_def.src_tensors[1]); - src_ind_desc->SetTextureAddressMode(GetFastestZeroMode(device)); + return *this; +} + +std::string MaxUnpooling::GetMaxUnpoolingKernelCode(const OperationDef& op_def, + const CLDevice& device) { + auto src_desc = op_def.src_tensors[0]; + src_desc.SetTextureAddressMode(GetFastestZeroMode(device)); if (op_def.IsBatchSupported()) { - src_ind_desc->SetStateVar("BatchedWidth", "true"); + src_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("src_indices", AccessType::READ, std::move(src_ind_desc)); - auto dst_desc = absl::make_unique(op_def.dst_tensors[0]); + AddSrcTensor("src_tensor", src_desc); + auto src_ind_desc = op_def.src_tensors[1]; + src_ind_desc.SetTextureAddressMode(GetFastestZeroMode(device)); if (op_def.IsBatchSupported()) { - dst_desc->SetStateVar("BatchedWidth", "true"); + src_ind_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); + AddSrcTensor("src_indices", src_ind_desc); + auto dst_desc = op_def.dst_tensors[0]; + if (op_def.IsBatchSupported()) { + dst_desc.SetStateVar("BatchedWidth", "true"); + } + AddDstTensor("dst_tensor", dst_desc); if (op_def.dst_tensors[0].HasAxis(Axis::WIDTH)) { - args->AddInt("kernel_size_x"); - args->AddInt("padding_x"); - args->AddInt("stride_x"); + args_.AddInt("kernel_size_x"); + args_.AddInt("padding_x"); + args_.AddInt("stride_x"); } if (op_def.dst_tensors[0].HasAxis(Axis::HEIGHT)) { - args->AddInt("kernel_size_y"); - args->AddInt("padding_y"); - args->AddInt("stride_y"); + args_.AddInt("kernel_size_y"); + args_.AddInt("padding_y"); + args_.AddInt("stride_y"); } if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) { - args->AddInt("kernel_size_z"); - args->AddInt("padding_z"); - args->AddInt("stride_z"); + args_.AddInt("kernel_size_z"); + args_.AddInt("padding_z"); + args_.AddInt("stride_z"); } std::string c = GetCommonDefines(op_def.precision); @@ -139,42 +168,10 @@ std::string GetMaxUnpoolingKernelCode(const OperationDef& op_def, return c; } -} // namespace - -MaxUnpooling::MaxUnpooling(const OperationDef& definition, - const MaxUnpooling2DAttributes& attr) - : GPUOperation(definition), - stride_(attr.strides.w, attr.strides.h, 0, 0), - padding_(attr.padding.appended.w, attr.padding.appended.h, 0, 0), - kernel_size_(attr.kernel.w, attr.kernel.h, 0, 0) {} - -MaxUnpooling::MaxUnpooling(const OperationDef& definition, - const MaxUnpooling3DAttributes& attr) - : GPUOperation(definition), - stride_(attr.strides.w, attr.strides.h, attr.strides.d, 0), - padding_(attr.padding.appended.w, attr.padding.appended.h, - attr.padding.appended.d, 0), - kernel_size_(attr.kernel.w, attr.kernel.h, attr.kernel.d, 0) {} - -MaxUnpooling::MaxUnpooling(MaxUnpooling&& kernel) - : GPUOperation(std::move(kernel)), - stride_(kernel.stride_), - padding_(kernel.padding_), - kernel_size_(kernel.kernel_size_) {} - -MaxUnpooling& MaxUnpooling::operator=(MaxUnpooling&& kernel) { - if (this != &kernel) { - std::swap(stride_, kernel.stride_); - std::swap(padding_, kernel.padding_); - std::swap(kernel_size_, kernel.kernel_size_); - GPUOperation::operator=(std::move(kernel)); - } - return *this; -} absl::Status MaxUnpooling::Compile(const CreationContext& creation_context) { std::string code = - GetMaxUnpoolingKernelCode(definition_, *creation_context.device, &args_); + GetMaxUnpoolingKernelCode(definition_, *creation_context.device); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -187,9 +184,6 @@ absl::Status MaxUnpooling::Compile(const CreationContext& creation_context) { } absl::Status MaxUnpooling::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("src_indices", src_[1])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); if (definition_.dst_tensors[0].HasAxis(Axis::WIDTH)) { RETURN_IF_ERROR(args_.SetInt("stride_x", stride_.x)); RETURN_IF_ERROR(args_.SetInt("padding_x", padding_.x * src_[0]->Batch())); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h index 38f47df1527..d406dc2aee1 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h @@ -43,6 +43,9 @@ class MaxUnpooling : public GPUOperation { MaxUnpooling& operator=(const MaxUnpooling&) = delete; private: + std::string GetMaxUnpoolingKernelCode(const OperationDef& op_def, + const CLDevice& device); + int4 stride_; int4 padding_; int4 kernel_size_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc b/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc index e3fa023633f..9378dafd049 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc @@ -25,18 +25,22 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { -std::string GetMeanKernelCode(const OperationDef& op_def, - const int3& work_group_size, Arguments* args) { - args->AddObjectRef( - "src_tensor", AccessType::READ, - absl::make_unique(op_def.src_tensors[0])); - args->AddObjectRef( - "dst_tensor", AccessType::WRITE, - absl::make_unique(op_def.dst_tensors[0])); - args->AddFloat("inv_multiplier_1"); - args->AddFloat("inv_multiplier_2"); +Mean::Mean(Mean&& operation) : GPUOperation(std::move(operation)) {} + +Mean& Mean::operator=(Mean&& operation) { + if (this != &operation) { + GPUOperation::operator=(std::move(operation)); + } + return *this; +} + +std::string Mean::GetMeanKernelCode(const OperationDef& op_def, + const int3& work_group_size) { + AddSrcTensor("src_tensor", op_def.src_tensors[0]); + AddDstTensor("dst_tensor", op_def.dst_tensors[0]); + args_.AddFloat("inv_multiplier_1"); + args_.AddFloat("inv_multiplier_2"); std::string c = GetCommonDefines(op_def.precision); const std::string wg_x = std::to_string(work_group_size.x); @@ -91,16 +95,6 @@ std::string GetMeanKernelCode(const OperationDef& op_def, c += "}\n"; return c; } -} // namespace - -Mean::Mean(Mean&& operation) : GPUOperation(std::move(operation)) {} - -Mean& Mean::operator=(Mean&& operation) { - if (this != &operation) { - GPUOperation::operator=(std::move(operation)); - } - return *this; -} absl::Status Mean::Compile(const CreationContext& creation_context) { // must be: (x * y) % 4 = 0; @@ -109,7 +103,7 @@ absl::Status Mean::Compile(const CreationContext& creation_context) { if (creation_context.device->IsAdreno3xx()) { work_group_size_ = int3(16, 8, 1); } - std::string code = GetMeanKernelCode(definition_, work_group_size_, &args_); + std::string code = GetMeanKernelCode(definition_, work_group_size_); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -122,8 +116,6 @@ absl::Status Mean::Compile(const CreationContext& creation_context) { } absl::Status Mean::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); const double total_size = src_[0]->Width() * src_[0]->Height(); const double size_0 = work_group_size_.x * work_group_size_.y; const double size_1 = total_size / size_0; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean.h b/tensorflow/lite/delegates/gpu/cl/kernels/mean.h index 0552f167d92..938b82d3a6f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean.h @@ -43,6 +43,10 @@ class Mean : public GPUOperation { Mean& operator=(Mean&& operation); Mean(const Mean&) = delete; Mean& operator=(const Mean&) = delete; + + private: + std::string GetMeanKernelCode(const OperationDef& op_def, + const int3& work_group_size); }; Mean CreateMean(const OperationDef& definition); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc b/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc index ebd2809b97c..57b52deeb8b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc @@ -24,20 +24,29 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { -std::string GetPaddingCode(const OperationDef& op_def, - const PadAttributes& attr, Arguments* args) { - args->AddObjectRef( - "src_tensor", AccessType::READ, - absl::make_unique(op_def.src_tensors[0])); - args->AddObjectRef( - "dst_tensor", AccessType::WRITE, - absl::make_unique(op_def.dst_tensors[0])); - args->AddInt("prepended_x"); - args->AddInt("prepended_y"); - args->AddInt("prepended_z"); - args->AddInt("prepended_w"); +Padding::Padding(const OperationDef& definition, const PadAttributes& attr) + : GPUOperation(definition), attributes_(attr) {} + +Padding::Padding(Padding&& kernel) + : GPUOperation(std::move(kernel)), attributes_(kernel.attributes_) {} + +Padding& Padding::operator=(Padding&& kernel) { + if (this != &kernel) { + std::swap(attributes_, kernel.attributes_); + GPUOperation::operator=(std::move(kernel)); + } + return *this; +} + +std::string Padding::GetPaddingCode(const OperationDef& op_def, + const PadAttributes& attr) { + AddSrcTensor("src_tensor", op_def.src_tensors[0]); + AddDstTensor("dst_tensor", op_def.dst_tensors[0]); + args_.AddInt("prepended_x"); + args_.AddInt("prepended_y"); + args_.AddInt("prepended_z"); + args_.AddInt("prepended_w"); const std::string dst_batch = op_def.dst_tensors[0].HasAxis(Axis::BATCH) ? "B" : "0"; @@ -139,24 +148,9 @@ std::string GetPaddingCode(const OperationDef& op_def, return c; } -} // namespace - -Padding::Padding(const OperationDef& definition, const PadAttributes& attr) - : GPUOperation(definition), attributes_(attr) {} - -Padding::Padding(Padding&& kernel) - : GPUOperation(std::move(kernel)), attributes_(kernel.attributes_) {} - -Padding& Padding::operator=(Padding&& kernel) { - if (this != &kernel) { - std::swap(attributes_, kernel.attributes_); - GPUOperation::operator=(std::move(kernel)); - } - return *this; -} absl::Status Padding::Compile(const CreationContext& creation_context) { - std::string code = GetPaddingCode(definition_, attributes_, &args_); + std::string code = GetPaddingCode(definition_, attributes_); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -169,8 +163,6 @@ absl::Status Padding::Compile(const CreationContext& creation_context) { } absl::Status Padding::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); RETURN_IF_ERROR(args_.SetInt("prepended_x", attributes_.prepended.w)); RETURN_IF_ERROR(args_.SetInt("prepended_y", attributes_.prepended.h)); RETURN_IF_ERROR(args_.SetInt("prepended_z", attributes_.prepended.c)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/padding.h b/tensorflow/lite/delegates/gpu/cl/kernels/padding.h index 12a83a4f360..d7fc5c58fe3 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/padding.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/padding.h @@ -40,6 +40,9 @@ class Padding : public GPUOperation { Padding& operator=(const Padding&) = delete; private: + std::string GetPaddingCode(const OperationDef& op_def, + const PadAttributes& attr); + PadAttributes attributes_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc index 6ba49e335e0..d264061004a 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc @@ -23,37 +23,74 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { -std::string GetAveragePoolingKernelCode(const OperationDef& op_def, - bool stride_correction, - const CLDevice& device, - Arguments* args) { - auto src_desc = absl::make_unique(op_def.src_tensors[0]); - src_desc->SetTextureAddressMode(GetFastestZeroMode(device)); - if (op_def.IsBatchSupported()) { - src_desc->SetStateVar("BatchedWidth", "true"); +Pooling::Pooling(const OperationDef& definition, + const Pooling2DAttributes& attr) + : GPUOperation(definition), + stride_(attr.strides.w, attr.strides.h, 0, 0), + padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, 0, 0), + kernel_size_(attr.kernel.w, attr.kernel.h, 0, 0), + type_(attr.type), + output_indices_(attr.output_indices) {} + +Pooling::Pooling(const OperationDef& definition, + const Pooling3DAttributes& attr) + : GPUOperation(definition), + stride_(attr.strides.w, attr.strides.h, attr.strides.d, 0), + padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, + -attr.padding.prepended.d, 0), + kernel_size_(attr.kernel.w, attr.kernel.h, attr.kernel.d, 0), + type_(attr.type), + output_indices_(attr.output_indices) {} + +Pooling::Pooling(Pooling&& kernel) + : GPUOperation(std::move(kernel)), + stride_(kernel.stride_), + padding_(kernel.padding_), + kernel_size_(kernel.kernel_size_), + type_(kernel.type_), + output_indices_(kernel.output_indices_) {} + +Pooling& Pooling::operator=(Pooling&& kernel) { + if (this != &kernel) { + std::swap(stride_, kernel.stride_); + std::swap(padding_, kernel.padding_); + std::swap(kernel_size_, kernel.kernel_size_); + std::swap(type_, kernel.type_); + std::swap(output_indices_, kernel.output_indices_); + GPUOperation::operator=(std::move(kernel)); } - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - auto dst_desc = absl::make_unique(op_def.dst_tensors[0]); + return *this; +} + +std::string Pooling::GetAveragePoolingKernelCode(const OperationDef& op_def, + bool stride_correction, + const CLDevice& device) { + auto src_desc = op_def.src_tensors[0]; + src_desc.SetTextureAddressMode(GetFastestZeroMode(device)); if (op_def.IsBatchSupported()) { - dst_desc->SetStateVar("BatchedWidth", "true"); + src_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); + AddSrcTensor("src_tensor", src_desc); + auto dst_desc = op_def.dst_tensors[0]; + if (op_def.IsBatchSupported()) { + dst_desc.SetStateVar("BatchedWidth", "true"); + } + AddDstTensor("dst_tensor", dst_desc); if (op_def.dst_tensors[0].HasAxis(Axis::WIDTH)) { - args->AddInt("kernel_size_x"); - args->AddInt("padding_x"); - args->AddInt("stride_x"); + args_.AddInt("kernel_size_x"); + args_.AddInt("padding_x"); + args_.AddInt("stride_x"); } if (op_def.dst_tensors[0].HasAxis(Axis::HEIGHT)) { - args->AddInt("kernel_size_y"); - args->AddInt("padding_y"); - args->AddInt("stride_y"); + args_.AddInt("kernel_size_y"); + args_.AddInt("padding_y"); + args_.AddInt("stride_y"); } if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) { - args->AddInt("kernel_size_z"); - args->AddInt("padding_z"); - args->AddInt("stride_z"); + args_.AddInt("kernel_size_z"); + args_.AddInt("padding_z"); + args_.AddInt("stride_z"); } std::map axis_to_src_coord = { @@ -155,42 +192,40 @@ std::string GetAveragePoolingKernelCode(const OperationDef& op_def, return c; } -std::string GetMaxPoolingKernelCode(const OperationDef& op_def, - bool stride_correction, bool output_indices, - Arguments* args) { - auto src_desc = absl::make_unique(op_def.src_tensors[0]); +std::string Pooling::GetMaxPoolingKernelCode(const OperationDef& op_def, + bool stride_correction, + bool output_indices) { + auto src_desc = op_def.src_tensors[0]; if (op_def.IsBatchSupported()) { - src_desc->SetStateVar("BatchedWidth", "true"); + src_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - auto dst_desc = absl::make_unique(op_def.dst_tensors[0]); + AddSrcTensor("src_tensor", src_desc); + auto dst_desc = op_def.dst_tensors[0]; if (op_def.IsBatchSupported()) { - dst_desc->SetStateVar("BatchedWidth", "true"); + dst_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); + AddDstTensor("dst_tensor", dst_desc); if (output_indices) { - auto dst_ind_desc = - absl::make_unique(op_def.dst_tensors[1]); + auto dst_ind_desc = op_def.dst_tensors[1]; if (op_def.IsBatchSupported()) { - dst_ind_desc->SetStateVar("BatchedWidth", "true"); + dst_ind_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("dst_indices", AccessType::WRITE, - std::move(dst_ind_desc)); + AddDstTensor("dst_indices", dst_ind_desc); } if (op_def.dst_tensors[0].HasAxis(Axis::WIDTH)) { - args->AddInt("kernel_size_x"); - args->AddInt("padding_x"); - args->AddInt("stride_x"); + args_.AddInt("kernel_size_x"); + args_.AddInt("padding_x"); + args_.AddInt("stride_x"); } if (op_def.dst_tensors[0].HasAxis(Axis::HEIGHT)) { - args->AddInt("kernel_size_y"); - args->AddInt("padding_y"); - args->AddInt("stride_y"); + args_.AddInt("kernel_size_y"); + args_.AddInt("padding_y"); + args_.AddInt("stride_y"); } if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) { - args->AddInt("kernel_size_z"); - args->AddInt("padding_z"); - args->AddInt("stride_z"); + args_.AddInt("kernel_size_z"); + args_.AddInt("padding_z"); + args_.AddInt("stride_z"); } std::map axis_to_src_coord = { @@ -308,46 +343,6 @@ std::string GetMaxPoolingKernelCode(const OperationDef& op_def, return c; } -} // namespace - -Pooling::Pooling(const OperationDef& definition, - const Pooling2DAttributes& attr) - : GPUOperation(definition), - stride_(attr.strides.w, attr.strides.h, 0, 0), - padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, 0, 0), - kernel_size_(attr.kernel.w, attr.kernel.h, 0, 0), - type_(attr.type), - output_indices_(attr.output_indices) {} - -Pooling::Pooling(const OperationDef& definition, - const Pooling3DAttributes& attr) - : GPUOperation(definition), - stride_(attr.strides.w, attr.strides.h, attr.strides.d, 0), - padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, - -attr.padding.prepended.d, 0), - kernel_size_(attr.kernel.w, attr.kernel.h, attr.kernel.d, 0), - type_(attr.type), - output_indices_(attr.output_indices) {} - -Pooling::Pooling(Pooling&& kernel) - : GPUOperation(std::move(kernel)), - stride_(kernel.stride_), - padding_(kernel.padding_), - kernel_size_(kernel.kernel_size_), - type_(kernel.type_), - output_indices_(kernel.output_indices_) {} - -Pooling& Pooling::operator=(Pooling&& kernel) { - if (this != &kernel) { - std::swap(stride_, kernel.stride_); - std::swap(padding_, kernel.padding_); - std::swap(kernel_size_, kernel.kernel_size_); - std::swap(type_, kernel.type_); - std::swap(output_indices_, kernel.output_indices_); - GPUOperation::operator=(std::move(kernel)); - } - return *this; -} absl::Status Pooling::Compile(const CreationContext& creation_context) { std::string code; @@ -356,11 +351,11 @@ absl::Status Pooling::Compile(const CreationContext& creation_context) { switch (type_) { case PoolingType::AVERAGE: code = GetAveragePoolingKernelCode(definition_, stride_correction, - *creation_context.device, &args_); + *creation_context.device); break; case PoolingType::MAX: code = GetMaxPoolingKernelCode(definition_, stride_correction, - output_indices_, &args_); + output_indices_); break; default: return absl::InvalidArgumentError( @@ -379,8 +374,6 @@ absl::Status Pooling::Compile(const CreationContext& creation_context) { } absl::Status Pooling::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); if (definition_.dst_tensors[0].HasAxis(Axis::WIDTH)) { RETURN_IF_ERROR(args_.SetInt("stride_x", stride_.x)); RETURN_IF_ERROR(args_.SetInt("padding_x", padding_.x * src_[0]->Batch())); @@ -396,9 +389,6 @@ absl::Status Pooling::BindArguments() { RETURN_IF_ERROR(args_.SetInt("padding_z", padding_.z)); RETURN_IF_ERROR(args_.SetInt("kernel_size_z", kernel_size_.z)); } - if (output_indices_) { - RETURN_IF_ERROR(args_.SetObjectRef("dst_indices", dst_[1])); - } return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h index c0199d6de71..712335d68a1 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h @@ -43,6 +43,13 @@ class Pooling : public GPUOperation { Pooling& operator=(const Pooling&) = delete; private: + std::string GetAveragePoolingKernelCode(const OperationDef& op_def, + bool stride_correction, + const CLDevice& device); + std::string GetMaxPoolingKernelCode(const OperationDef& op_def, + bool stride_correction, + bool output_indices); + int4 stride_; int4 padding_; int4 kernel_size_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc b/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc index a2e1092b387..cf9b3893896 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc @@ -23,15 +23,19 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { -std::string GetReshapeCode(const OperationDef& op_def, Arguments* args) { - args->AddObjectRef( - "src_tensor", AccessType::READ, - absl::make_unique(op_def.src_tensors[0])); - args->AddObjectRef( - "dst_tensor", AccessType::WRITE, - absl::make_unique(op_def.dst_tensors[0])); +Reshape::Reshape(Reshape&& operation) : GPUOperation(std::move(operation)) {} + +Reshape& Reshape::operator=(Reshape&& operation) { + if (this != &operation) { + GPUOperation::operator=(std::move(operation)); + } + return *this; +} + +std::string Reshape::GetReshapeCode(const OperationDef& op_def) { + AddSrcTensor("src_tensor", op_def.src_tensors[0]); + AddDstTensor("dst_tensor", op_def.dst_tensors[0]); std::string c = GetCommonDefines(op_def.precision); c += "__kernel void main_function(\n"; @@ -87,19 +91,9 @@ std::string GetReshapeCode(const OperationDef& op_def, Arguments* args) { c += "}\n"; return c; } -} // namespace - -Reshape::Reshape(Reshape&& operation) : GPUOperation(std::move(operation)) {} - -Reshape& Reshape::operator=(Reshape&& operation) { - if (this != &operation) { - GPUOperation::operator=(std::move(operation)); - } - return *this; -} absl::Status Reshape::Compile(const CreationContext& creation_context) { - std::string code = GetReshapeCode(definition_, &args_); + std::string code = GetReshapeCode(definition_); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -111,12 +105,6 @@ absl::Status Reshape::Compile(const CreationContext& creation_context) { *creation_context.device, &kernel_); } -absl::Status Reshape::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - return absl::OkStatus(); -} - int3 Reshape::GetGridSize() const { const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); const int grid_y = dst_[0]->Height(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshape.h b/tensorflow/lite/delegates/gpu/cl/kernels/reshape.h index 571a225d02d..1783bb7b2c7 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/reshape.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshape.h @@ -29,7 +29,6 @@ class Reshape : public GPUOperation { public: explicit Reshape(const OperationDef& definition) : GPUOperation(definition) {} - absl::Status BindArguments() override; int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; @@ -38,6 +37,9 @@ class Reshape : public GPUOperation { Reshape& operator=(Reshape&& operation); Reshape(const Reshape&) = delete; Reshape& operator=(const Reshape&) = delete; + + private: + std::string GetReshapeCode(const OperationDef& op_def); }; Reshape CreateReshape(const OperationDef& definition); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc index 1036dd8ef4e..7043469202b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc @@ -23,15 +23,20 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { -std::string GetReshapeCode(const OperationDef& op_def, Arguments* args) { - args->AddObjectRef( - "src_tensor", AccessType::READ, - absl::make_unique(op_def.src_tensors[0])); - args->AddObjectRef( - "dst_tensor", AccessType::WRITE, - absl::make_unique(op_def.dst_tensors[0])); +Reshapex4::Reshapex4(Reshapex4&& operation) + : GPUOperation(std::move(operation)) {} + +Reshapex4& Reshapex4::operator=(Reshapex4&& operation) { + if (this != &operation) { + GPUOperation::operator=(std::move(operation)); + } + return *this; +} + +std::string Reshapex4::GetReshapeCode(const OperationDef& op_def) { + AddSrcTensor("src_tensor", op_def.src_tensors[0]); + AddDstTensor("dst_tensor", op_def.dst_tensors[0]); std::string c = GetCommonDefines(op_def.precision); c += "__kernel void main_function(\n"; @@ -71,20 +76,9 @@ std::string GetReshapeCode(const OperationDef& op_def, Arguments* args) { c += "}\n"; return c; } -} // namespace - -Reshapex4::Reshapex4(Reshapex4&& operation) - : GPUOperation(std::move(operation)) {} - -Reshapex4& Reshapex4::operator=(Reshapex4&& operation) { - if (this != &operation) { - GPUOperation::operator=(std::move(operation)); - } - return *this; -} absl::Status Reshapex4::Compile(const CreationContext& creation_context) { - std::string code = GetReshapeCode(definition_, &args_); + std::string code = GetReshapeCode(definition_); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -96,12 +90,6 @@ absl::Status Reshapex4::Compile(const CreationContext& creation_context) { *creation_context.device, &kernel_); } -absl::Status Reshapex4::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - return absl::OkStatus(); -} - int3 Reshapex4::GetGridSize() const { const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); const int grid_y = dst_[0]->Height(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h index 040b5b82e70..f278f52652d 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h @@ -31,7 +31,6 @@ class Reshapex4 : public GPUOperation { explicit Reshapex4(const OperationDef& definition) : GPUOperation(definition) {} - absl::Status BindArguments() override; int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; @@ -40,6 +39,9 @@ class Reshapex4 : public GPUOperation { Reshapex4& operator=(Reshapex4&& operation); Reshapex4(const Reshapex4&) = delete; Reshapex4& operator=(const Reshapex4&) = delete; + + private: + std::string GetReshapeCode(const OperationDef& op_def); }; // More optimized, but require src_channels % 4 == 0 and dst_channels % 4 == 0 diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/resize.cc b/tensorflow/lite/delegates/gpu/cl/kernels/resize.cc index 33bb3b8f4cb..297f1ee5fb7 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/resize.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/resize.cc @@ -23,25 +23,35 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { -std::string GetResizeCode(const OperationDef& op_def, - SamplingType sampling_type, bool half_pixel_centers, - Arguments* args) { - auto src_desc = absl::make_unique(op_def.src_tensors[0]); - if (op_def.IsBatchSupported()) { - src_desc->SetStateVar("BatchedWidth", "true"); +Resize::Resize(Resize&& operation) + : GPUOperation(std::move(operation)), attr_(operation.attr_) {} + +Resize& Resize::operator=(Resize&& operation) { + if (this != &operation) { + attr_ = operation.attr_; + GPUOperation::operator=(std::move(operation)); } - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - auto dst_desc = absl::make_unique(op_def.dst_tensors[0]); + return *this; +} + +std::string Resize::GetResizeCode(const OperationDef& op_def, + SamplingType sampling_type, + bool half_pixel_centers) { + auto src_desc = op_def.src_tensors[0]; if (op_def.IsBatchSupported()) { - dst_desc->SetStateVar("BatchedWidth", "true"); + src_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); - args->AddInt("border_x"); - args->AddInt("border_y"); - args->AddFloat("scale_factor_x"); - args->AddFloat("scale_factor_y"); + AddSrcTensor("src_tensor", src_desc); + auto dst_desc = op_def.dst_tensors[0]; + if (op_def.IsBatchSupported()) { + dst_desc.SetStateVar("BatchedWidth", "true"); + } + AddDstTensor("dst_tensor", dst_desc); + args_.AddInt("border_x"); + args_.AddInt("border_y"); + args_.AddFloat("scale_factor_x"); + args_.AddFloat("scale_factor_y"); std::string c = GetCommonDefines(op_def.precision); c += "__kernel void main_function(\n"; @@ -100,24 +110,73 @@ std::string GetResizeCode(const OperationDef& op_def, return c; } -std::string GetResize3DCode(const OperationDef& op_def, - SamplingType sampling_type, Arguments* args) { - auto src_desc = absl::make_unique(op_def.src_tensors[0]); - if (op_def.IsBatchSupported()) { - src_desc->SetStateVar("BatchedWidth", "true"); +absl::Status Resize::Compile(const CreationContext& creation_context) { + std::string code = + GetResizeCode(definition_, attr_.type, attr_.half_pixel_centers); + std::string element_wise_code; + RETURN_IF_ERROR( + MergeOperations(linked_operations_, &args_, &element_wise_code)); + RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), + {{"dst_tensor", element_wise_code}}, + &code)); + return creation_context.cache->GetOrCreateCLKernel( + code, "main_function", *creation_context.context, + *creation_context.device, &kernel_); +} + +absl::Status Resize::BindArguments() { + RETURN_IF_ERROR(args_.SetInt("border_x", src_[0]->Width() - 1)); + RETURN_IF_ERROR(args_.SetInt("border_y", src_[0]->Height() - 1)); + RETURN_IF_ERROR(args_.SetFloat( + "scale_factor_x", + CalculateResizeScale(src_[0]->Width(), dst_[0]->Width(), attr_))); + RETURN_IF_ERROR(args_.SetFloat( + "scale_factor_y", + CalculateResizeScale(src_[0]->Height(), dst_[0]->Height(), attr_))); + return absl::OkStatus(); +} + +int3 Resize::GetGridSize() const { + const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); + const int grid_y = dst_[0]->Height(); + const int grid_z = dst_[0]->Slices(); + return int3(grid_x, grid_y, grid_z); +} + +Resize CreateResize(const OperationDef& definition, + const Resize2DAttributes& attr) { + return Resize(definition, attr); +} + +Resize3D::Resize3D(Resize3D&& operation) + : GPUOperation(std::move(operation)), attr_(operation.attr_) {} + +Resize3D& Resize3D::operator=(Resize3D&& operation) { + if (this != &operation) { + attr_ = operation.attr_; + GPUOperation::operator=(std::move(operation)); } - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - auto dst_desc = absl::make_unique(op_def.dst_tensors[0]); + return *this; +} + +std::string Resize3D::GetResize3DCode(const OperationDef& op_def, + SamplingType sampling_type) { + auto src_desc = op_def.src_tensors[0]; if (op_def.IsBatchSupported()) { - dst_desc->SetStateVar("BatchedWidth", "true"); + src_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); - args->AddInt("border_x"); - args->AddInt("border_y"); - args->AddInt("border_z"); - args->AddFloat("scale_factor_x"); - args->AddFloat("scale_factor_y"); - args->AddFloat("scale_factor_z"); + AddSrcTensor("src_tensor", src_desc); + auto dst_desc = op_def.dst_tensors[0]; + if (op_def.IsBatchSupported()) { + dst_desc.SetStateVar("BatchedWidth", "true"); + } + AddDstTensor("dst_tensor", dst_desc); + args_.AddInt("border_x"); + args_.AddInt("border_y"); + args_.AddInt("border_z"); + args_.AddFloat("scale_factor_x"); + args_.AddFloat("scale_factor_y"); + args_.AddFloat("scale_factor_z"); std::string c = GetCommonDefines(op_def.precision); c += "__kernel void main_function(\n"; @@ -189,72 +248,8 @@ std::string GetResize3DCode(const OperationDef& op_def, return c; } -} // namespace - -Resize::Resize(Resize&& operation) - : GPUOperation(std::move(operation)), attr_(operation.attr_) {} - -Resize& Resize::operator=(Resize&& operation) { - if (this != &operation) { - attr_ = operation.attr_; - GPUOperation::operator=(std::move(operation)); - } - return *this; -} - -absl::Status Resize::Compile(const CreationContext& creation_context) { - std::string code = - GetResizeCode(definition_, attr_.type, attr_.half_pixel_centers, &args_); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); -} - -absl::Status Resize::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - RETURN_IF_ERROR(args_.SetInt("border_x", src_[0]->Width() - 1)); - RETURN_IF_ERROR(args_.SetInt("border_y", src_[0]->Height() - 1)); - RETURN_IF_ERROR(args_.SetFloat( - "scale_factor_x", - CalculateResizeScale(src_[0]->Width(), dst_[0]->Width(), attr_))); - RETURN_IF_ERROR(args_.SetFloat( - "scale_factor_y", - CalculateResizeScale(src_[0]->Height(), dst_[0]->Height(), attr_))); - return absl::OkStatus(); -} - -int3 Resize::GetGridSize() const { - const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); - const int grid_y = dst_[0]->Height(); - const int grid_z = dst_[0]->Slices(); - return int3(grid_x, grid_y, grid_z); -} - -Resize CreateResize(const OperationDef& definition, - const Resize2DAttributes& attr) { - return Resize(definition, attr); -} - -Resize3D::Resize3D(Resize3D&& operation) - : GPUOperation(std::move(operation)), attr_(operation.attr_) {} - -Resize3D& Resize3D::operator=(Resize3D&& operation) { - if (this != &operation) { - attr_ = operation.attr_; - GPUOperation::operator=(std::move(operation)); - } - return *this; -} - absl::Status Resize3D::Compile(const CreationContext& creation_context) { - std::string code = GetResize3DCode(definition_, attr_.type, &args_); + std::string code = GetResize3DCode(definition_, attr_.type); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -267,8 +262,6 @@ absl::Status Resize3D::Compile(const CreationContext& creation_context) { } absl::Status Resize3D::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); RETURN_IF_ERROR(args_.SetInt("border_x", src_[0]->Width() - 1)); RETURN_IF_ERROR(args_.SetInt("border_y", src_[0]->Height() - 1)); RETURN_IF_ERROR(args_.SetInt("border_z", src_[0]->Depth() - 1)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/resize.h b/tensorflow/lite/delegates/gpu/cl/kernels/resize.h index 899c85b7758..5286a72aceb 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/resize.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/resize.h @@ -44,6 +44,10 @@ class Resize : public GPUOperation { Resize(const OperationDef& definition, const Resize2DAttributes& attr) : GPUOperation(definition), attr_(attr) {} + std::string GetResizeCode(const OperationDef& op_def, + SamplingType sampling_type, + bool half_pixel_centers); + Resize2DAttributes attr_; }; @@ -69,6 +73,9 @@ class Resize3D : public GPUOperation { Resize3D(const OperationDef& definition, const Resize3DAttributes& attr) : GPUOperation(definition), attr_(attr) {} + std::string GetResize3DCode(const OperationDef& op_def, + SamplingType sampling_type); + Resize3DAttributes attr_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc index edc720dbdb8..fc75be3c46f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc @@ -24,21 +24,27 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { -std::string GetSoftmaxKernelCode( - const OperationDef& op_def, - Arguments* args) { - auto src_desc = absl::make_unique(op_def.src_tensors[0]); - if (op_def.IsBatchSupported()) { - src_desc->SetStateVar("BatchedWidth", "true"); +Softmax::Softmax(Softmax&& kernel) : GPUOperation(std::move(kernel)) {} + +Softmax& Softmax::operator=(Softmax&& kernel) { + if (this != &kernel) { + GPUOperation::operator=(std::move(kernel)); } - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - auto dst_desc = absl::make_unique(op_def.dst_tensors[0]); + return *this; +} + +std::string Softmax::GetSoftmaxKernelCode(const OperationDef& op_def) { + auto src_desc = op_def.src_tensors[0]; if (op_def.IsBatchSupported()) { - dst_desc->SetStateVar("BatchedWidth", "true"); + src_desc.SetStateVar("BatchedWidth", "true"); } - args->AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); + AddSrcTensor("src_tensor", src_desc); + auto dst_desc = op_def.dst_tensors[0]; + if (op_def.IsBatchSupported()) { + dst_desc.SetStateVar("BatchedWidth", "true"); + } + AddDstTensor("dst_tensor", dst_desc); std::string c = GetCommonDefines(op_def.precision); c += "__kernel void main_function(\n"; @@ -64,19 +70,9 @@ std::string GetSoftmaxKernelCode( c += "}\n"; return c; } -} // namespace - -Softmax::Softmax(Softmax&& kernel) : GPUOperation(std::move(kernel)) {} - -Softmax& Softmax::operator=(Softmax&& kernel) { - if (this != &kernel) { - GPUOperation::operator=(std::move(kernel)); - } - return *this; -} absl::Status Softmax::Compile(const CreationContext& creation_context) { - std::string code = GetSoftmaxKernelCode(definition_, &args_); + std::string code = GetSoftmaxKernelCode(definition_); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -88,12 +84,6 @@ absl::Status Softmax::Compile(const CreationContext& creation_context) { *creation_context.device, &kernel_); } -absl::Status Softmax::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - return absl::OkStatus(); -} - int3 Softmax::GetGridSize() const { const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); const int grid_y = dst_[0]->Height(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h index eac06caa767..62925a6c67a 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h @@ -31,7 +31,6 @@ class Softmax : public GPUOperation { Softmax() = default; explicit Softmax(const OperationDef& definition) : GPUOperation(definition) {} - absl::Status BindArguments() override; int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; @@ -42,6 +41,9 @@ class Softmax : public GPUOperation { Softmax& operator=(const Softmax&) = delete; friend Softmax CreateSoftmax(); + + private: + std::string GetSoftmaxKernelCode(const OperationDef& op_def); }; Softmax CreateSoftmax(const OperationDef& definition); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc index 33dd2857262..8f2cd8d4c23 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc @@ -23,20 +23,24 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { -std::string GetSoftmaxKernelCode(const OperationDef& op_def, Arguments* args) { - args->AddObjectRef( - "src_tensor", AccessType::READ, - absl::make_unique(op_def.src_tensors[0])); - args->AddObjectRef( - "dst_tensor", AccessType::WRITE, - absl::make_unique(op_def.dst_tensors[0])); - args->AddFloat("mask_x"); - args->AddFloat("mask_y"); - args->AddFloat("mask_z"); - args->AddFloat("mask_w"); - args->AddInt("slices_x32"); +Softmax1x1::Softmax1x1(Softmax1x1&& kernel) : GPUOperation(std::move(kernel)) {} + +Softmax1x1& Softmax1x1::operator=(Softmax1x1&& kernel) { + if (this != &kernel) { + GPUOperation::operator=(std::move(kernel)); + } + return *this; +} + +std::string Softmax1x1::GetSoftmaxKernelCode(const OperationDef& op_def) { + AddSrcTensor("src_tensor", op_def.src_tensors[0]); + AddDstTensor("dst_tensor", op_def.dst_tensors[0]); + args_.AddFloat("mask_x"); + args_.AddFloat("mask_y"); + args_.AddFloat("mask_z"); + args_.AddFloat("mask_w"); + args_.AddInt("slices_x32"); std::string c = GetCommonDefines(op_def.precision); c += "__kernel void main_function(\n"; @@ -98,19 +102,9 @@ std::string GetSoftmaxKernelCode(const OperationDef& op_def, Arguments* args) { c += "}\n"; return c; } -} // namespace - -Softmax1x1::Softmax1x1(Softmax1x1&& kernel) : GPUOperation(std::move(kernel)) {} - -Softmax1x1& Softmax1x1::operator=(Softmax1x1&& kernel) { - if (this != &kernel) { - GPUOperation::operator=(std::move(kernel)); - } - return *this; -} absl::Status Softmax1x1::Compile(const CreationContext& creation_context) { - std::string code = GetSoftmaxKernelCode(definition_, &args_); + std::string code = GetSoftmaxKernelCode(definition_); std::string element_wise_code; work_group_size_ = int3(32, 1, 1); RETURN_IF_ERROR( @@ -124,8 +118,6 @@ absl::Status Softmax1x1::Compile(const CreationContext& creation_context) { } absl::Status Softmax1x1::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); float4 mask = GetMaskForLastPlane(src_[0]->Channels()); RETURN_IF_ERROR(args_.SetFloat("mask_x", mask.x)); RETURN_IF_ERROR(args_.SetFloat("mask_y", mask.y)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h index f749a7b3db6..2f6ff94df1a 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h @@ -44,6 +44,9 @@ class Softmax1x1 : public GPUOperation { Softmax1x1& operator=(const Softmax1x1&) = delete; friend Softmax1x1 CreateSoftmax1x1(); + + private: + std::string GetSoftmaxKernelCode(const OperationDef& op_def); }; Softmax1x1 CreateSoftmax1x1(const OperationDef& definition); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.cc b/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.cc index 37c3e092995..0df91dbec60 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.cc @@ -25,16 +25,22 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { -std::string GetSpaceToDepthCode(const OperationDef& op_def, Arguments* args) { - args->AddObjectRef( - "src_tensor", AccessType::READ, - absl::make_unique(op_def.src_tensors[0])); - args->AddObjectRef( - "dst_tensor", AccessType::WRITE, - absl::make_unique(op_def.dst_tensors[0])); - args->AddInt("block_size"); +SpaceToDepth::SpaceToDepth(SpaceToDepth&& operation) + : GPUOperation(std::move(operation)), attr_(operation.attr_) {} + +SpaceToDepth& SpaceToDepth::operator=(SpaceToDepth&& operation) { + if (this != &operation) { + attr_ = operation.attr_; + GPUOperation::operator=(std::move(operation)); + } + return *this; +} + +std::string SpaceToDepth::GetSpaceToDepthCode(const OperationDef& op_def) { + AddSrcTensor("src_tensor", op_def.src_tensors[0]); + AddDstTensor("dst_tensor", op_def.dst_tensors[0]); + args_.AddInt("block_size"); std::string c = GetCommonDefines(op_def.precision); c += "__kernel void main_function(\n"; @@ -76,21 +82,8 @@ std::string GetSpaceToDepthCode(const OperationDef& op_def, Arguments* args) { return c; } -} // namespace - -SpaceToDepth::SpaceToDepth(SpaceToDepth&& operation) - : GPUOperation(std::move(operation)), attr_(operation.attr_) {} - -SpaceToDepth& SpaceToDepth::operator=(SpaceToDepth&& operation) { - if (this != &operation) { - attr_ = operation.attr_; - GPUOperation::operator=(std::move(operation)); - } - return *this; -} - absl::Status SpaceToDepth::Compile(const CreationContext& creation_context) { - std::string code = GetSpaceToDepthCode(definition_, &args_); + std::string code = GetSpaceToDepthCode(definition_); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -103,8 +96,6 @@ absl::Status SpaceToDepth::Compile(const CreationContext& creation_context) { } absl::Status SpaceToDepth::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); RETURN_IF_ERROR(args_.SetInt("block_size", attr_.block_size)); return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.h b/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.h index 99a0ca0c55c..e44d4eb781b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.h @@ -40,6 +40,8 @@ class SpaceToDepth : public GPUOperation { SpaceToDepth& operator=(const SpaceToDepth&) = delete; private: + std::string GetSpaceToDepthCode(const OperationDef& op_def); + SpaceToDepthAttributes attr_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.cc b/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.cc index 8f5d94fdc69..abb56b5f41f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.cc @@ -26,108 +26,6 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { -std::string GenerateCode(const OperationDef& op_def, - const DepthwiseConvolution2DAttributes& dw_attr, - int result_depth, const CLDevice& device, - Arguments* args) { - auto src_desc = absl::make_unique(op_def.src_tensors[0]); - src_desc->SetTextureAddressMode(GetFastestZeroMode(device)); - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - args->AddObjectRef( - "dst_tensor", AccessType::WRITE, - absl::make_unique(op_def.dst_tensors[0])); - - args->AddInt("stride_x", dw_attr.strides.w); - args->AddInt("padding_x", -dw_attr.padding.prepended.w); - args->AddInt("dilation_x", dw_attr.dilations.w); - args->AddInt("stride_y", dw_attr.strides.h); - args->AddInt("padding_y", -dw_attr.padding.prepended.h); - args->AddInt("dilation_y", dw_attr.dilations.h); - - const auto src_tensor_type = op_def.src_tensors[0].storage_type; - - const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER || - src_tensor_type == TensorStorageType::IMAGE_BUFFER; - - std::string c = GetCommonDefines(op_def.precision); - c += "__kernel void main_function(\n"; - c += "$0) {\n"; - if (op_def.dst_tensors[0].HasAxis(Axis::BATCH)) { - c += " int linear_id = get_global_id(0);\n"; - c += " int X = linear_id / args.dst_tensor.Batch();\n"; - c += " int B = linear_id % args.dst_tensor.Batch();\n"; - c += " args.dst_tensor.SetBatchRef(B);\n"; - c += " args.src_tensor.SetBatchRef(B);\n"; - } else { - c += " int X = get_global_id(0);\n"; - } - c += " int Y = get_global_id(1);\n"; - c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height()) { " - "\n"; - c += " return; \n"; - c += " } \n"; - c += " __constant FLT4* constants = args.constants.GetPtr();\n"; - int intermediate_depth = DivideRoundUp(dw_attr.weights.shape.i, 4); - int weights_counter = 0; - for (int d = 0; d < intermediate_depth; ++d) { - c += " FLT4 dw_res_" + std::to_string(d) + " = constants[" + - std::to_string(weights_counter++) + "];\n"; - } - c += " int x_offseted = X * args.stride_x + args.padding_x;\n"; - c += " int y_offseted = Y * args.stride_y + args.padding_y;\n"; - c += " int x_c, y_c;\n"; - if (manual_clamp) { - c += " bool x_in, y_in;\n"; - } - c += " FLT4 src;\n"; - for (int ky = 0; ky < dw_attr.weights.shape.h; ++ky) { - c += " y_c = y_offseted + " + std::to_string(ky) + " * args.dilation_y;\n"; - if (manual_clamp) { - c += " y_in = y_c >= 0 && y_c < args.src_tensor.Height();\n"; - c += " y_c = clamp(y_c, 0, args.src_tensor.Height() - 1);\n"; - } - for (int kx = 0; kx < dw_attr.weights.shape.w; ++kx) { - c += " x_c = x_offseted + " + std::to_string(kx) + - " * args.dilation_x;\n"; - if (manual_clamp) { - c += " x_in = x_c >= 0 && x_c < args.src_tensor.Width();\n"; - c += " x_c = clamp(x_c, 0, args.src_tensor.Width() - 1);\n"; - } - for (int d = 0; d < intermediate_depth; ++d) { - std::string multiplier = manual_clamp ? "* (FLT)(x_in && y_in)" : ""; - c += " src = args.src_tensor.Read(x_c, y_c, " + std::to_string(d) + - ")" + multiplier + ";\n"; - c += " dw_res_" + std::to_string(d) + " += src * constants[" + - std::to_string(weights_counter++) + "];\n"; - } - } - } - for (int d = 0; d < result_depth; ++d) { - c += " FLT4 conv_res_" + std::to_string(d) + " = constants[" + - std::to_string(weights_counter++) + "];\n"; - } - for (int d = 0; d < result_depth; ++d) { - for (int s = 0; s < intermediate_depth; ++s) { - std::string src = "dw_res_" + std::to_string(s); - std::string dst = "conv_res_" + std::to_string(d); - c += " " + dst + " += " + src + ".x * constants[" + - std::to_string(weights_counter++) + "];\n"; - c += " " + dst + " += " + src + ".y * constants[" + - std::to_string(weights_counter++) + "];\n"; - c += " " + dst + " += " + src + ".z * constants[" + - std::to_string(weights_counter++) + "];\n"; - c += " " + dst + " += " + src + ".w * constants[" + - std::to_string(weights_counter++) + "];\n"; - } - c += " args.dst_tensor.Write(conv_res_" + std::to_string(d) + ", X, Y, " + - std::to_string(d) + ");\n"; - } - c += "}\n"; - - return c; -} -} // namespace DepthwiseConvPlus1x1Conv::DepthwiseConvPlus1x1Conv( const OperationDef& definition, @@ -247,10 +145,108 @@ absl::Status DepthwiseConvPlus1x1Conv::UploadWeights( return absl::OkStatus(); } +std::string DepthwiseConvPlus1x1Conv::GenerateCode( + const OperationDef& op_def, const DepthwiseConvolution2DAttributes& dw_attr, + int result_depth, const CLDevice& device) { + auto src_desc = op_def.src_tensors[0]; + src_desc.SetTextureAddressMode(GetFastestZeroMode(device)); + AddSrcTensor("src_tensor", src_desc); + AddDstTensor("dst_tensor", op_def.dst_tensors[0]); + + args_.AddInt("stride_x", dw_attr.strides.w); + args_.AddInt("padding_x", -dw_attr.padding.prepended.w); + args_.AddInt("dilation_x", dw_attr.dilations.w); + args_.AddInt("stride_y", dw_attr.strides.h); + args_.AddInt("padding_y", -dw_attr.padding.prepended.h); + args_.AddInt("dilation_y", dw_attr.dilations.h); + + const auto src_tensor_type = op_def.src_tensors[0].storage_type; + + const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER || + src_tensor_type == TensorStorageType::IMAGE_BUFFER; + + std::string c = GetCommonDefines(op_def.precision); + c += "__kernel void main_function(\n"; + c += "$0) {\n"; + if (op_def.dst_tensors[0].HasAxis(Axis::BATCH)) { + c += " int linear_id = get_global_id(0);\n"; + c += " int X = linear_id / args.dst_tensor.Batch();\n"; + c += " int B = linear_id % args.dst_tensor.Batch();\n"; + c += " args.dst_tensor.SetBatchRef(B);\n"; + c += " args.src_tensor.SetBatchRef(B);\n"; + } else { + c += " int X = get_global_id(0);\n"; + } + c += " int Y = get_global_id(1);\n"; + c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height()) { " + "\n"; + c += " return; \n"; + c += " } \n"; + c += " __constant FLT4* constants = args.constants.GetPtr();\n"; + int intermediate_depth = DivideRoundUp(dw_attr.weights.shape.i, 4); + int weights_counter = 0; + for (int d = 0; d < intermediate_depth; ++d) { + c += " FLT4 dw_res_" + std::to_string(d) + " = constants[" + + std::to_string(weights_counter++) + "];\n"; + } + c += " int x_offseted = X * args.stride_x + args.padding_x;\n"; + c += " int y_offseted = Y * args.stride_y + args.padding_y;\n"; + c += " int x_c, y_c;\n"; + if (manual_clamp) { + c += " bool x_in, y_in;\n"; + } + c += " FLT4 src;\n"; + for (int ky = 0; ky < dw_attr.weights.shape.h; ++ky) { + c += " y_c = y_offseted + " + std::to_string(ky) + " * args.dilation_y;\n"; + if (manual_clamp) { + c += " y_in = y_c >= 0 && y_c < args.src_tensor.Height();\n"; + c += " y_c = clamp(y_c, 0, args.src_tensor.Height() - 1);\n"; + } + for (int kx = 0; kx < dw_attr.weights.shape.w; ++kx) { + c += " x_c = x_offseted + " + std::to_string(kx) + + " * args.dilation_x;\n"; + if (manual_clamp) { + c += " x_in = x_c >= 0 && x_c < args.src_tensor.Width();\n"; + c += " x_c = clamp(x_c, 0, args.src_tensor.Width() - 1);\n"; + } + for (int d = 0; d < intermediate_depth; ++d) { + std::string multiplier = manual_clamp ? "* (FLT)(x_in && y_in)" : ""; + c += " src = args.src_tensor.Read(x_c, y_c, " + std::to_string(d) + + ")" + multiplier + ";\n"; + c += " dw_res_" + std::to_string(d) + " += src * constants[" + + std::to_string(weights_counter++) + "];\n"; + } + } + } + for (int d = 0; d < result_depth; ++d) { + c += " FLT4 conv_res_" + std::to_string(d) + " = constants[" + + std::to_string(weights_counter++) + "];\n"; + } + for (int d = 0; d < result_depth; ++d) { + for (int s = 0; s < intermediate_depth; ++s) { + std::string src = "dw_res_" + std::to_string(s); + std::string dst = "conv_res_" + std::to_string(d); + c += " " + dst + " += " + src + ".x * constants[" + + std::to_string(weights_counter++) + "];\n"; + c += " " + dst + " += " + src + ".y * constants[" + + std::to_string(weights_counter++) + "];\n"; + c += " " + dst + " += " + src + ".z * constants[" + + std::to_string(weights_counter++) + "];\n"; + c += " " + dst + " += " + src + ".w * constants[" + + std::to_string(weights_counter++) + "];\n"; + } + c += " args.dst_tensor.Write(conv_res_" + std::to_string(d) + ", X, Y, " + + std::to_string(d) + ");\n"; + } + c += "}\n"; + + return c; +} + absl::Status DepthwiseConvPlus1x1Conv::Compile( const CreationContext& creation_context) { std::string code = GenerateCode(definition_, dw_attr_, result_depth_, - *creation_context.device, &args_); + *creation_context.device); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -262,12 +258,6 @@ absl::Status DepthwiseConvPlus1x1Conv::Compile( *creation_context.device, &kernel_); } -absl::Status DepthwiseConvPlus1x1Conv::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - return absl::OkStatus(); -} - int3 DepthwiseConvPlus1x1Conv::GetGridSize() const { const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); const int grid_y = dst_[0]->Height(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.h b/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.h index 5d8da6ac973..90a7ea751f7 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.h @@ -36,7 +36,6 @@ namespace cl { class DepthwiseConvPlus1x1Conv : public GPUOperation { public: DepthwiseConvPlus1x1Conv() = default; - absl::Status BindArguments() override; int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; @@ -60,6 +59,10 @@ class DepthwiseConvPlus1x1Conv : public GPUOperation { const Convolution2DAttributes& conv_attr, CLContext* context); + std::string GenerateCode(const OperationDef& op_def, + const DepthwiseConvolution2DAttributes& dw_attr, + int result_depth, const CLDevice& device); + DepthwiseConvolution2DAttributes dw_attr_; int result_depth_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc index 443c4a403c1..081488e4fe8 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc @@ -24,72 +24,6 @@ namespace tflite { namespace gpu { namespace cl { namespace { - -std::string GetStridedSliceCode(const OperationDef& op_def, bool alignedx4, - Arguments* args) { - args->AddObjectRef( - "src_tensor", AccessType::READ, - absl::make_unique(op_def.src_tensors[0])); - args->AddObjectRef( - "dst_tensor", AccessType::WRITE, - absl::make_unique(op_def.dst_tensors[0])); - args->AddInt("offset_x"); - args->AddInt("offset_y"); - args->AddInt("offset_z"); - args->AddInt("offset_b"); - args->AddInt("stride_x"); - args->AddInt("stride_y"); - args->AddInt("stride_z"); - args->AddInt("stride_b"); - - const std::string batch_id = - op_def.dst_tensors[0].HasAxis(Axis::BATCH) ? "B" : "0"; - std::string c = GetCommonDefines(op_def.precision); - c += "__kernel void main_function(\n"; - c += "$0) {\n"; - if (op_def.dst_tensors[0].HasAxis(Axis::BATCH)) { - c += " int linear_id = get_global_id(0);\n"; - c += " int X = linear_id / args.dst_tensor.Batch();\n"; - c += " int B = linear_id % args.dst_tensor.Batch();\n"; - c += " args.dst_tensor.SetBatchRef(B);\n"; - } else { - c += " int X = get_global_id(0);\n"; - } - c += " int Y = get_global_id(1);\n"; - c += " int Z = get_global_id(2);\n"; - c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || " - "Z >= args.dst_tensor.Slices()) { \n"; - c += " return; \n"; - c += " } \n"; - c += " int s_x = X * args.stride_x + args.offset_x;\n"; - c += " int s_y = Y * args.stride_y + args.offset_y;\n"; - if (op_def.src_tensors[0].HasAxis(Axis::BATCH)) { - c += " int s_b = " + batch_id + " * args.stride_b + args.offset_b;\n"; - c += " args.src_tensor.SetBatchRef(s_b);\n"; - } - if (alignedx4) { - c += " int s_z = Z + args.offset_z;\n"; - c += " FLT4 result = args.src_tensor.Read(s_x, s_y, s_z);\n"; - } else { - c += " FLT4 result;\n"; - const std::string postfixes[] = {"x", "y", "z", "w"}; - for (int i = 0; i < 4; ++i) { - c += " {\n"; - const std::string channel = "(Z * 4 + " + std::to_string(i) + ")"; - c += " int s_ch = " + channel + " * args.stride_z + args.offset_z;\n"; - c += " int s_z = min(s_ch >> 2, args.src_tensor.Slices() - 1);\n"; - c += " int s_z_rem = s_ch & 3;\n"; - c += " FLT4 t = args.src_tensor.Read(s_x, s_y, s_z);\n"; - c += " FLT t_ar[4] = {t.x, t.y, t.z, t.w};\n"; - c += " result." + postfixes[i] + " = t_ar[s_z_rem];\n"; - c += " }\n"; - } - } - c += " args.dst_tensor.Write(result, X, Y, Z);\n"; - c += "}\n"; - return c; -} - bool Is4Aligned(const SliceAttributes& attr) { return attr.strides.c == 1 && attr.starts.c % 4 == 0; } @@ -158,9 +92,69 @@ StridedSlice& StridedSlice::operator=(StridedSlice&& operation) { return *this; } +std::string StridedSlice::GetStridedSliceCode(const OperationDef& op_def, + bool alignedx4) { + AddSrcTensor("src_tensor", op_def.src_tensors[0]); + AddDstTensor("dst_tensor", op_def.dst_tensors[0]); + args_.AddInt("offset_x"); + args_.AddInt("offset_y"); + args_.AddInt("offset_z"); + args_.AddInt("offset_b"); + args_.AddInt("stride_x"); + args_.AddInt("stride_y"); + args_.AddInt("stride_z"); + args_.AddInt("stride_b"); + + const std::string batch_id = + op_def.dst_tensors[0].HasAxis(Axis::BATCH) ? "B" : "0"; + std::string c = GetCommonDefines(op_def.precision); + c += "__kernel void main_function(\n"; + c += "$0) {\n"; + if (op_def.dst_tensors[0].HasAxis(Axis::BATCH)) { + c += " int linear_id = get_global_id(0);\n"; + c += " int X = linear_id / args.dst_tensor.Batch();\n"; + c += " int B = linear_id % args.dst_tensor.Batch();\n"; + c += " args.dst_tensor.SetBatchRef(B);\n"; + } else { + c += " int X = get_global_id(0);\n"; + } + c += " int Y = get_global_id(1);\n"; + c += " int Z = get_global_id(2);\n"; + c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || " + "Z >= args.dst_tensor.Slices()) { \n"; + c += " return; \n"; + c += " } \n"; + c += " int s_x = X * args.stride_x + args.offset_x;\n"; + c += " int s_y = Y * args.stride_y + args.offset_y;\n"; + if (op_def.src_tensors[0].HasAxis(Axis::BATCH)) { + c += " int s_b = " + batch_id + " * args.stride_b + args.offset_b;\n"; + c += " args.src_tensor.SetBatchRef(s_b);\n"; + } + if (alignedx4) { + c += " int s_z = Z + args.offset_z;\n"; + c += " FLT4 result = args.src_tensor.Read(s_x, s_y, s_z);\n"; + } else { + c += " FLT4 result;\n"; + const std::string postfixes[] = {"x", "y", "z", "w"}; + for (int i = 0; i < 4; ++i) { + c += " {\n"; + const std::string channel = "(Z * 4 + " + std::to_string(i) + ")"; + c += " int s_ch = " + channel + " * args.stride_z + args.offset_z;\n"; + c += " int s_z = min(s_ch >> 2, args.src_tensor.Slices() - 1);\n"; + c += " int s_z_rem = s_ch & 3;\n"; + c += " FLT4 t = args.src_tensor.Read(s_x, s_y, s_z);\n"; + c += " FLT t_ar[4] = {t.x, t.y, t.z, t.w};\n"; + c += " result." + postfixes[i] + " = t_ar[s_z_rem];\n"; + c += " }\n"; + } + } + c += " args.dst_tensor.Write(result, X, Y, Z);\n"; + c += "}\n"; + return c; +} + absl::Status StridedSlice::Compile(const CreationContext& creation_context) { - std::string code = - GetStridedSliceCode(definition_, Is4Aligned(attributes_), &args_); + std::string code = GetStridedSliceCode(definition_, Is4Aligned(attributes_)); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -173,8 +167,6 @@ absl::Status StridedSlice::Compile(const CreationContext& creation_context) { } absl::Status StridedSlice::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); int4 offset = GetOffset(attributes_, src_[0]->Width(), src_[0]->Height(), src_[0]->Channels(), src_[0]->Batch()); RETURN_IF_ERROR(args_.SetInt("offset_x", offset.x)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.h b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.h index 40005db7b21..fb8acd308f0 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.h @@ -38,6 +38,8 @@ class StridedSlice : public GPUOperation { StridedSlice& operator=(const StridedSlice&) = delete; private: + std::string GetStridedSliceCode(const OperationDef& op_def, bool alignedx4); + SliceAttributes attributes_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc index eb62e1e35f7..93e6241fc55 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc @@ -24,17 +24,22 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { -std::string GetTransposeCode( - const OperationDef& op_def, const TransposeAttributes& attr, - Arguments* args) { - args->AddObjectRef( - "src_tensor", AccessType::READ, - absl::make_unique(op_def.src_tensors[0])); - args->AddObjectRef( - "dst_tensor", AccessType::WRITE, - absl::make_unique(op_def.dst_tensors[0])); +Transpose::Transpose(Transpose&& operation) + : GPUOperation(std::move(operation)), attr_(operation.attr_) {} + +Transpose& Transpose::operator=(Transpose&& operation) { + if (this != &operation) { + attr_ = operation.attr_; + GPUOperation::operator=(std::move(operation)); + } + return *this; +} + +std::string Transpose::GetTransposeCode(const OperationDef& op_def, + const TransposeAttributes& attr) { + AddSrcTensor("src_tensor", op_def.src_tensors[0]); + AddDstTensor("dst_tensor", op_def.dst_tensors[0]); const std::string batch_id = op_def.dst_tensors[0].HasAxis(Axis::BATCH) ? "B" : "0"; @@ -101,21 +106,9 @@ std::string GetTransposeCode( c += "}\n"; return c; } -} // namespace - -Transpose::Transpose(Transpose&& operation) - : GPUOperation(std::move(operation)), attr_(operation.attr_) {} - -Transpose& Transpose::operator=(Transpose&& operation) { - if (this != &operation) { - attr_ = operation.attr_; - GPUOperation::operator=(std::move(operation)); - } - return *this; -} absl::Status Transpose::Compile(const CreationContext& creation_context) { - std::string code = GetTransposeCode(definition_, attr_, &args_); + std::string code = GetTransposeCode(definition_, attr_); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -127,12 +120,6 @@ absl::Status Transpose::Compile(const CreationContext& creation_context) { *creation_context.device, &kernel_); } -absl::Status Transpose::BindArguments() { - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - return absl::OkStatus(); -} - int3 Transpose::GetGridSize() const { const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); const int grid_y = dst_[0]->Height(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h index 36976d57ea6..10cc9720ad4 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h @@ -28,7 +28,6 @@ class Transpose : public GPUOperation { public: Transpose(const OperationDef& definition, const TransposeAttributes& attr) : GPUOperation(definition), attr_(attr) {} - absl::Status BindArguments() override; int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; @@ -39,6 +38,8 @@ class Transpose : public GPUOperation { Transpose& operator=(const Transpose&) = delete; private: + std::string GetTransposeCode(const OperationDef& op_def, + const TransposeAttributes& attr); TransposeAttributes attr_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc index d64b61a6a8e..b266bd832d8 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc @@ -31,11 +31,20 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { -namespace { -std::string GetWinograd4x4To36Code( - const OperationDef& op_def, - Arguments* args) { +Winograd4x4To36::Winograd4x4To36(Winograd4x4To36&& operation) + : GPUOperation(std::move(operation)), padding_(operation.padding_) {} + +Winograd4x4To36& Winograd4x4To36::operator=(Winograd4x4To36&& operation) { + if (this != &operation) { + std::swap(padding_, operation.padding_); + GPUOperation::operator=(std::move(operation)); + } + return *this; +} + +std::string Winograd4x4To36::GetWinograd4x4To36Code( + const OperationDef& op_def) { std::string c = GetCommonDefines(op_def.precision); const auto src_tensor_type = op_def.src_tensors[0].storage_type; @@ -69,16 +78,14 @@ std::string GetWinograd4x4To36Code( c += "};\n"; std::string cl_type = accum_type == DataType::FLOAT16 ? "half" : "float"; - auto src_desc = absl::make_unique(op_def.src_tensors[0]); - src_desc->SetStateVar("ACCUM_FLT", cl_type); - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - args->AddObjectRef( - "dst_tensor", AccessType::WRITE, - absl::make_unique(op_def.dst_tensors[0])); - args->AddInt("padding_x"); - args->AddInt("padding_y"); - args->AddInt("tiles_total"); - args->AddInt("tiles_x"); + auto src_desc = op_def.src_tensors[0]; + src_desc.SetStateVar("ACCUM_FLT", cl_type); + AddSrcTensor("src_tensor", src_desc); + AddDstTensor("dst_tensor", op_def.dst_tensors[0]); + args_.AddInt("padding_x"); + args_.AddInt("padding_y"); + args_.AddInt("tiles_total"); + args_.AddInt("tiles_x"); c += "__kernel void main_function(\n"; c += "$0) {\n"; @@ -212,8 +219,114 @@ std::string GetWinograd4x4To36Code( return c; } -std::string GetWinograd36To4x4Code(const OperationDef& op_def, - Arguments* args) { +absl::Status Winograd4x4To36::Compile(const CreationContext& creation_context) { + std::vector options; + if (creation_context.device->IsAdreno()) { + options.push_back(CompilerOptions::ADRENO_MORE_WAVES); + } + if (definition_.precision == CalculationsPrecision::F16 && + creation_context.device->IsPowerVR()) { + options.push_back(CompilerOptions::POWERVR_FP16); + } + RETURN_IF_ERROR(UploadBt(creation_context.context)); + std::string code = GetWinograd4x4To36Code(definition_); + std::string element_wise_code; + RETURN_IF_ERROR( + MergeOperations(linked_operations_, &args_, &element_wise_code)); + RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), + {{"dst_tensor", element_wise_code}}, + &code)); + RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel( + code, "main_function", options, *creation_context.context, + *creation_context.device, &kernel_)); + work_group_size_ = SelectBestWorkGroup(); + return absl::OkStatus(); +} + +absl::Status Winograd4x4To36::UploadBt(CLContext* context) { + tflite::gpu::Tensor bt_aligned; + bt_aligned.shape = Linear(6 * 8); + bt_aligned.data.resize(6 * 8); + auto bt_mat = BtMatrixForWinograd4x4To6x6(); + for (int y = 0; y < 6; ++y) { + for (int x = 0; x < 6; ++x) { + bt_aligned.data[y * 8 + x] = bt_mat[y * 6 + x]; + } + bt_aligned.data[y * 8 + 6] = 0.0f; + bt_aligned.data[y * 8 + 7] = 0.0f; + } + + TensorLinearDescriptor desc; + desc.storage_type = LinearStorageType::TEXTURE_2D; + desc.element_type = definition_.GetDataType(); + + LinearStorage lt; + RETURN_IF_ERROR(CreateLinearStorage(desc, bt_aligned, context, <)); + args_.AddObject("bt", AccessType::READ, + absl::make_unique(std::move(lt)), + absl::make_unique(desc)); + return absl::OkStatus(); +} + +int3 Winograd4x4To36::SelectBestWorkGroup() { + const std::vector wgs = {{8, 6, 4}, {8, 6, 2}, {4, 6, 2}, + {4, 6, 2}, {2, 6, 2}, {2, 6, 1}, + {1, 6, 1}, {1, 3, 1}, {1, 1, 1}}; + return GetFirstSuitableWorkGroup(wgs, kernel_.GetMaxWorkGroupSize()); +} + +absl::Status Winograd4x4To36::BindArguments() { + const int tiles_x = DivideRoundUp( + src_[0]->Width() + padding_.prepended.w + padding_.appended.w - 2, 4); + const int tiles_y = DivideRoundUp( + src_[0]->Height() + padding_.prepended.h + padding_.appended.h - 2, 4); + const int tiles_total = tiles_x * tiles_y; + RETURN_IF_ERROR(args_.SetInt("padding_x", -padding_.prepended.w)); + RETURN_IF_ERROR(args_.SetInt("padding_y", -padding_.prepended.h)); + RETURN_IF_ERROR(args_.SetInt("tiles_total", tiles_total)); + RETURN_IF_ERROR(args_.SetInt("tiles_x", tiles_x)); + return absl::OkStatus(); +} + +int3 Winograd4x4To36::GetGridSize() const { + const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); + const int grid_y = 6; + const int grid_z = dst_[0]->Slices(); + return int3(grid_x, grid_y, grid_z); +} + +absl::Status Winograd4x4To36::Tune(const TuningParameters& params) { + switch (params.tuning_type) { + case TuningType::EXHAUSTIVE: + RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); + return GetBestWorkGroup(params, kernel_, grid_size_, &work_group_size_); + case TuningType::FAST: + default: + work_group_size_ = SelectBestWorkGroup(); + return absl::OkStatus(); + } +} + +absl::Status CreateWinograd4x4To36(const CreationContext& creation_context, + const OperationDef& definition, + const Padding2D& padding, + Winograd4x4To36* result) { + *result = Winograd4x4To36(definition, padding); + return result->UploadBt(creation_context.context); +} + +Winograd36To4x4::Winograd36To4x4(Winograd36To4x4&& operation) + : GPUOperation(std::move(operation)) {} + +Winograd36To4x4& Winograd36To4x4::operator=(Winograd36To4x4&& operation) { + if (this != &operation) { + GPUOperation::operator=(std::move(operation)); + } + return *this; +} + +std::string Winograd36To4x4::GetWinograd36To4x4Code( + const OperationDef& op_def) { std::string c = GetCommonDefines(op_def.precision); switch (op_def.precision) { @@ -231,13 +344,11 @@ std::string GetWinograd36To4x4Code(const OperationDef& op_def, : DataType::FLOAT32; std::string cl_type = accum_type == DataType::FLOAT16 ? "half" : "float"; - auto src_desc = absl::make_unique(op_def.src_tensors[0]); - src_desc->SetStateVar("ACCUM_FLT", cl_type); - args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - args->AddObjectRef( - "dst_tensor", AccessType::WRITE, - absl::make_unique(op_def.dst_tensors[0])); - args->AddInt("tiles_x"); + auto src_desc = op_def.src_tensors[0]; + src_desc.SetStateVar("ACCUM_FLT", cl_type); + AddSrcTensor("src_tensor", src_desc); + AddDstTensor("dst_tensor", op_def.dst_tensors[0]); + args_.AddInt("tiles_x"); auto at_mat = AtMatrixForWinograd4x4To6x6(); c += "constant ACCUM_FLT At[24] = {\n"; @@ -322,126 +433,6 @@ std::string GetWinograd36To4x4Code(const OperationDef& op_def, c += "}\n"; return c; } -} // namespace - -Winograd4x4To36::Winograd4x4To36(Winograd4x4To36&& operation) - : GPUOperation(std::move(operation)), padding_(operation.padding_) {} - -Winograd4x4To36& Winograd4x4To36::operator=(Winograd4x4To36&& operation) { - if (this != &operation) { - std::swap(padding_, operation.padding_); - GPUOperation::operator=(std::move(operation)); - } - return *this; -} - -absl::Status Winograd4x4To36::Compile(const CreationContext& creation_context) { - std::vector options; - if (creation_context.device->IsAdreno()) { - options.push_back(CompilerOptions::ADRENO_MORE_WAVES); - } - if (definition_.precision == CalculationsPrecision::F16 && - creation_context.device->IsPowerVR()) { - options.push_back(CompilerOptions::POWERVR_FP16); - } - RETURN_IF_ERROR(UploadBt(creation_context.context)); - std::string code = GetWinograd4x4To36Code(definition_, &args_); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel( - code, "main_function", options, *creation_context.context, - *creation_context.device, &kernel_)); - work_group_size_ = SelectBestWorkGroup(); - return absl::OkStatus(); -} - -absl::Status Winograd4x4To36::UploadBt(CLContext* context) { - tflite::gpu::Tensor bt_aligned; - bt_aligned.shape = Linear(6 * 8); - bt_aligned.data.resize(6 * 8); - auto bt_mat = BtMatrixForWinograd4x4To6x6(); - for (int y = 0; y < 6; ++y) { - for (int x = 0; x < 6; ++x) { - bt_aligned.data[y * 8 + x] = bt_mat[y * 6 + x]; - } - bt_aligned.data[y * 8 + 6] = 0.0f; - bt_aligned.data[y * 8 + 7] = 0.0f; - } - - TensorLinearDescriptor desc; - desc.storage_type = LinearStorageType::TEXTURE_2D; - desc.element_type = definition_.GetDataType(); - - LinearStorage lt; - RETURN_IF_ERROR(CreateLinearStorage(desc, bt_aligned, context, <)); - args_.AddObject("bt", AccessType::READ, - absl::make_unique(std::move(lt)), - absl::make_unique(desc)); - return absl::OkStatus(); -} - -int3 Winograd4x4To36::SelectBestWorkGroup() { - const std::vector wgs = {{8, 6, 4}, {8, 6, 2}, {4, 6, 2}, - {4, 6, 2}, {2, 6, 2}, {2, 6, 1}, - {1, 6, 1}, {1, 3, 1}, {1, 1, 1}}; - return GetFirstSuitableWorkGroup(wgs, kernel_.GetMaxWorkGroupSize()); -} - -absl::Status Winograd4x4To36::BindArguments() { - const int tiles_x = DivideRoundUp( - src_[0]->Width() + padding_.prepended.w + padding_.appended.w - 2, 4); - const int tiles_y = DivideRoundUp( - src_[0]->Height() + padding_.prepended.h + padding_.appended.h - 2, 4); - const int tiles_total = tiles_x * tiles_y; - RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - RETURN_IF_ERROR(args_.SetInt("padding_x", -padding_.prepended.w)); - RETURN_IF_ERROR(args_.SetInt("padding_y", -padding_.prepended.h)); - RETURN_IF_ERROR(args_.SetInt("tiles_total", tiles_total)); - RETURN_IF_ERROR(args_.SetInt("tiles_x", tiles_x)); - return absl::OkStatus(); -} - -int3 Winograd4x4To36::GetGridSize() const { - const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); - const int grid_y = 6; - const int grid_z = dst_[0]->Slices(); - return int3(grid_x, grid_y, grid_z); -} - -absl::Status Winograd4x4To36::Tune(const TuningParameters& params) { - switch (params.tuning_type) { - case TuningType::EXHAUSTIVE: - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); - return GetBestWorkGroup(params, kernel_, grid_size_, &work_group_size_); - case TuningType::FAST: - default: - work_group_size_ = SelectBestWorkGroup(); - return absl::OkStatus(); - } -} - -absl::Status CreateWinograd4x4To36(const CreationContext& creation_context, - const OperationDef& definition, - const Padding2D& padding, - Winograd4x4To36* result) { - *result = Winograd4x4To36(definition, padding); - return result->UploadBt(creation_context.context); -} - -Winograd36To4x4::Winograd36To4x4(Winograd36To4x4&& operation) - : GPUOperation(std::move(operation)) {} - -Winograd36To4x4& Winograd36To4x4::operator=(Winograd36To4x4&& operation) { - if (this != &operation) { - GPUOperation::operator=(std::move(operation)); - } - return *this; -} absl::Status Winograd36To4x4::Compile(const CreationContext& creation_context) { std::vector options; @@ -449,7 +440,7 @@ absl::Status Winograd36To4x4::Compile(const CreationContext& creation_context) { creation_context.device->IsPowerVR()) { options.push_back(CompilerOptions::POWERVR_FP16); } - std::string code = GetWinograd36To4x4Code(definition_, &args_); + std::string code = GetWinograd36To4x4Code(definition_); std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h index 7fe0fc071ca..3a4e1e97188 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h @@ -56,6 +56,8 @@ class Winograd4x4To36 : public GPUOperation { absl::Status UploadBt(CLContext* context); + std::string GetWinograd4x4To36Code(const OperationDef& op_def); + // Must be called after kernel compilation int3 SelectBestWorkGroup(); @@ -93,6 +95,8 @@ class Winograd36To4x4 : public GPUOperation { absl::Status UploadAt(CLContext* context); + std::string GetWinograd36To4x4Code(const OperationDef& op_def); + // Must be called after kernel compilation int3 SelectBestWorkGroup(); }; From c8480da012933e054fe50ffae8ac814e3eff296c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Jul 2020 17:17:59 -0700 Subject: [PATCH 1679/2522] Legalize matrix-diag-part to xla. PiperOrigin-RevId: 323901789 Change-Id: Ibc67a0811d2b9fb16c9496f121528df27ad5fec1 --- .../mlir/tensorflow/ir/tf_generated_ops.td | 120 ++++++++ .../compiler/mlir/xla/tests/legalize-tf.mlir | 136 +++++++++ .../mlir/xla/transforms/legalize_tf.cc | 267 +++++++++++++++++- 3 files changed, 521 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index 54b85525346..9b86f7e3b49 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -5068,6 +5068,126 @@ which has shape (2, 4, 4) TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } +def TF_MatrixDiagPartV3Op : TF_Op<"MatrixDiagPartV3", [NoSideEffect]> { + let summary = "Returns the batched diagonal part of a batched tensor."; + + let description = [{ +Returns a tensor with the `k[0]`-th to `k[1]`-th diagonals of the batched +`input`. + +Assume `input` has `r` dimensions `[I, J, ..., L, M, N]`. +Let `max_diag_len` be the maximum length among all diagonals to be extracted, +`max_diag_len = min(M + min(k[1], 0), N + min(-k[0], 0))` +Let `num_diags` be the number of diagonals to extract, +`num_diags = k[1] - k[0] + 1`. + +If `num_diags == 1`, the output tensor is of rank `r - 1` with shape +`[I, J, ..., L, max_diag_len]` and values: + +``` +diagonal[i, j, ..., l, n] + = input[i, j, ..., l, n+y, n+x] ; if 0 <= n+y < M and 0 <= n+x < N, + padding_value ; otherwise. +``` +where `y = max(-k[1], 0)`, `x = max(k[1], 0)`. + +Otherwise, the output tensor has rank `r` with dimensions +`[I, J, ..., L, num_diags, max_diag_len]` with values: + +``` +diagonal[i, j, ..., l, m, n] + = input[i, j, ..., l, n+y, n+x] ; if 0 <= n+y < M and 0 <= n+x < N, + padding_value ; otherwise. +``` +where `d = k[1] - m`, `y = max(-d, 0) - offset`, and `x = max(d, 0) - offset`. + +`offset` is zero except when the alignment of the diagonal is to the right. +``` +offset = max_diag_len - diag_len(d) ; if (`align` in {RIGHT_LEFT, RIGHT_RIGHT} + and `d >= 0`) or + (`align` in {LEFT_RIGHT, RIGHT_RIGHT} + and `d <= 0`) + 0 ; otherwise +``` +where `diag_len(d) = min(cols - max(d, 0), rows + min(d, 0))`. + +The input must be at least a matrix. + +For example: + +``` +input = np.array([[[1, 2, 3, 4], # Input shape: (2, 3, 4) + [5, 6, 7, 8], + [9, 8, 7, 6]], + [[5, 4, 3, 2], + [1, 2, 3, 4], + [5, 6, 7, 8]]]) + +# A main diagonal from each batch. +tf.matrix_diag_part(input) ==> [[1, 6, 7], # Output shape: (2, 3) + [5, 2, 7]] + +# A superdiagonal from each batch. +tf.matrix_diag_part(input, k = 1) + ==> [[2, 7, 6], # Output shape: (2, 3) + [4, 3, 8]] + +# A band from each batch. +tf.matrix_diag_part(input, k = (-1, 2)) + ==> [[[0, 3, 8], # Output shape: (2, 4, 3) + [2, 7, 6], + [1, 6, 7], + [5, 8, 0]], + [[0, 3, 4], + [4, 3, 8], + [5, 2, 7], + [1, 6, 0]]] + +# LEFT_RIGHT alignment. +tf.matrix_diag_part(input, k = (-1, 2), align="LEFT_RIGHT") + ==> [[[3, 8, 0], # Output shape: (2, 4, 3) + [2, 7, 6], + [1, 6, 7], + [0, 5, 8]], + [[3, 4, 0], + [4, 3, 8], + [5, 2, 7], + [0, 1, 6]]] + +# max_diag_len can be shorter than the main diagonal. +tf.matrix_diag_part(input, k = (-2, -1)) + ==> [[[5, 8], + [9, 0]], + [[1, 6], + [5, 0]]] + +# padding_value = 9 +tf.matrix_diag_part(input, k = (1, 3), padding_value = 9) + ==> [[[9, 9, 4], # Output shape: (2, 3, 3) + [9, 3, 8], + [2, 7, 6]], + [[9, 9, 2], + [9, 3, 4], + [4, 3, 8]]] + +``` + }]; + + let arguments = (ins + TF_Tensor:$input, + I32Tensor:$k, + TF_Tensor:$padding_value, + + DefaultValuedAttr, "RIGHT_LEFT">:$align + ); + + let results = (outs + TF_Tensor:$diagonal + ); + + TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; +} + def TF_MatrixDiagV2Op : TF_Op<"MatrixDiagV2", [NoSideEffect]> { let summary = [{ Returns a batched diagonal tensor with given batched diagonal values. diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir index a2c4159db1a..7c2c374a68f 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir @@ -495,6 +495,142 @@ func @diag_part(%arg0: tensor<4x3x4x3xf32>) -> tensor<4x3xf32> { return %0: tensor<4x3xf32> } +//===----------------------------------------------------------------------===// +// MatrixDiagPart +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: func @matrix_diag_part +// CHECK-SAME: %[[ARG:.*]]: tensor<7x140x128xi32> +func @matrix_diag_part(%arg0: tensor<7x140x128xi32>) -> tensor<7x22x128xi32> { + // CHECK-DAG: %[[V0:.*]] = mhlo.constant dense<42> : tensor + // CHECK-DAG: %[[V1:.*]] = mhlo.constant dense<[-10, 11]> : tensor<2xi32> + // CHECK-DAG: %[[V2:.*]] = "mhlo.iota"() {iota_dimension = 1 : i64} : () -> tensor<1x22x128xi32> + // CHECK-DAG: %[[V3:.*]] = "mhlo.iota"() {iota_dimension = 2 : i64} : () -> tensor<1x22x128xi32> + // CHECK-DAG: %[[V4:.*]] = mhlo.constant dense<0> : tensor + // CHECK-DAG: %[[V5:.*]] = "mhlo.broadcast"(%[[V4]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi32> + // CHECK-DAG: %[[V6:.*]] = mhlo.constant dense : tensor + // CHECK-DAG: %[[V7:.*]] = "mhlo.broadcast"(%[[V6]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi1> + // CHECK-DAG: %[[V8:.*]] = mhlo.constant dense : tensor + // CHECK-DAG: %[[V9:.*]] = "mhlo.broadcast"(%[[V8]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi1> + // CHECK-DAG: %[[V10:.*]] = mhlo.constant dense<11> : tensor + // CHECK-DAG: %[[V11:.*]] = "mhlo.broadcast"(%[[V10]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi32> + // CHECK-DAG: %[[V12:.*]] = mhlo.constant dense<140> : tensor + // CHECK-DAG: %[[V13:.*]] = "mhlo.broadcast"(%[[V12]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi32> + // CHECK-DAG: %[[V14:.*]] = mhlo.constant dense<128> : tensor + // CHECK-DAG: %[[V15:.*]] = "mhlo.broadcast"(%[[V14]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi32> + // CHECK-DAG: %[[V16:.*]] = mhlo.constant dense<128> : tensor + // CHECK-DAG: %[[V17:.*]] = "mhlo.broadcast"(%[[V16]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi32> + // CHECK-DAG: %[[V18:.*]] = mhlo.subtract %[[V11]], %[[V2]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V19:.*]] = "mhlo.negate"(%[[V18]]) : (tensor<1x22x128xi32>) -> tensor<1x22x128xi32> + // CHECK-DAG: %[[V20:.*]] = mhlo.minimum %[[V18]], %[[V5]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V21:.*]] = mhlo.add %[[V13]], %[[V20]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V22:.*]] = mhlo.maximum %[[V18]], %[[V5]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V23:.*]] = mhlo.subtract %[[V15]], %[[V22]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V24:.*]] = mhlo.minimum %[[V21]], %[[V23]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V25:.*]] = chlo.broadcast_compare %[[V18]], %[[V5]] {comparison_direction = "GE"} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi1> + // CHECK-DAG: %[[V26:.*]] = mhlo.subtract %[[V17]], %[[V24]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V27:.*]] = "mhlo.select"(%[[V25]], %[[V26]], %[[V5]]) : (tensor<1x22x128xi1>, tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi32> + // CHECK-DAG: %[[V28:.*]] = mhlo.maximum %[[V18]], %[[V5]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V29:.*]] = mhlo.subtract %[[V28]], %[[V27]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V30:.*]] = mhlo.maximum %[[V19]], %[[V5]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V31:.*]] = mhlo.subtract %[[V30]], %[[V27]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V32:.*]] = mhlo.add %[[V3]], %[[V29]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V33:.*]] = mhlo.add %[[V3]], %[[V31]] : tensor<1x22x128xi32> + // CHECK-DAG: %[[V34:.*]] = chlo.broadcast_compare %[[V32]], %[[V5]] {comparison_direction = "GE"} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi1> + // CHECK-DAG: %[[V35:.*]] = chlo.broadcast_compare %[[V32]], %[[V15]] {comparison_direction = "LT"} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi1> + // CHECK-DAG: %[[V36:.*]] = mhlo.and %[[V34]], %[[V35]] : tensor<1x22x128xi1> + // CHECK-DAG: %[[V37:.*]] = chlo.broadcast_compare %[[V33]], %[[V5]] {comparison_direction = "GE"} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi1> + // CHECK-DAG: %[[V38:.*]] = chlo.broadcast_compare %[[V33]], %[[V13]] {comparison_direction = "LT"} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi1> + // CHECK-DAG: %[[V39:.*]] = mhlo.and %[[V37]], %[[V38]] : tensor<1x22x128xi1> + // CHECK-DAG: %[[V40:.*]] = mhlo.and %[[V36]], %[[V39]] : tensor<1x22x128xi1> + // CHECK-DAG: %[[V41:.*]] = "mhlo.reshape"(%[[V40]]) : (tensor<1x22x128xi1>) -> tensor<22x128xi1> + // CHECK-DAG: %[[V42:.*]] = "mhlo.concatenate"(%[[V33]], %[[V32]]) {dimension = 0 : i64} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<2x22x128xi32> + // CHECK-DAG: %[[V43:.*]] = "mhlo.gather"(%[[ARG]], %[[V42]]) {dimension_numbers = {collapsed_slice_dims = dense<[1, 2]> : tensor<2xi64>, index_vector_dim = 0 : i64, offset_dims = dense<0> : tensor<1xi64>, start_index_map = dense<[1, 2]> : tensor<2xi64>}, indices_are_sorted = false, slice_sizes = dense<[7, 1, 1]> : tensor<3xi64>} : (tensor<7x140x128xi32>, tensor<2x22x128xi32>) -> tensor<7x22x128xi32> + // CHECK-DAG: %[[V44:.*]] = "mhlo.broadcast"(%[[V41]]) {broadcast_sizes = dense<7> : tensor<1xi64>} : (tensor<22x128xi1>) -> tensor<7x22x128xi1> + // CHECK-DAG: %[[V45:.*]] = "mhlo.broadcast"(%[[V0]]) {broadcast_sizes = dense<[7, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<7x22x128xi32> + // CHECK: %[[V46:.*]] = "mhlo.select"(%[[V44]], %[[V43]], %[[V45]]) : (tensor<7x22x128xi1>, tensor<7x22x128xi32>, tensor<7x22x128xi32>) -> tensor<7x22x128xi32> + // CHECK: return %[[V46]] : tensor<7x22x128xi32> + %0 = mhlo.constant dense<42> : tensor // padding value + %1 = mhlo.constant dense<[-10, 11]> : tensor<2xi32> // k + %2 = "tf.MatrixDiagPartV3"(%arg0, %1, %0) { + T = i32, align = "RIGHT_LEFT" + } : (tensor<7x140x128xi32>, tensor<2xi32>, tensor) -> tensor<7x22x128xi32> + return %2: tensor<7x22x128xi32> +} + +// CHECK-LABEL: func @matrix_diag_part_single_diagonal +func @matrix_diag_part_single_diagonal(%arg0: tensor<7x140x128xi32>) -> tensor<7x128xi32> { + %0 = mhlo.constant dense<42> : tensor // padding value + %1 = mhlo.constant dense<0> : tensor<2xi32> // k + %2 = "tf.MatrixDiagPartV3"(%arg0, %1, %0) { + T = i32, align = "RIGHT_LEFT" + } : (tensor<7x140x128xi32>, tensor<2xi32>, tensor) -> tensor<7x128xi32> + // CHECK: %[[result:.*]] = "mhlo.reshape"({{.*}}) : (tensor<7x1x128xi32>) -> tensor<7x128xi32> + // CHECK: return %[[result]] : tensor<7x128xi32> + return %2: tensor<7x128xi32> +} + +// CHECK-LABEL: func @matrix_diag_part_align_ll +func @matrix_diag_part_align_ll(%arg0: tensor<7x140x128xi32>) -> tensor<7x22x128xi32> { + %0 = mhlo.constant dense<42> : tensor // padding value + %1 = mhlo.constant dense<[-10, 11]> : tensor<2xi32> // k + %2 = "tf.MatrixDiagPartV3"(%arg0, %1, %0) { + T = i32, align = "LEFT_LEFT" + } : (tensor<7x140x128xi32>, tensor<2xi32>, tensor) -> tensor<7x22x128xi32> + // CHECK: %[[false:.*]] = mhlo.constant dense : tensor + // CHECK: %[[b_false:.*]] = "mhlo.broadcast"(%[[false]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi1> + // CHECK: %{{[0-9]*}} = "mhlo.select"(%[[b_false]], %{{[0-9]*}}, %{{[0-9]*}}) : (tensor<1x22x128xi1>, tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi32> + return %2: tensor<7x22x128xi32> +} + +// CHECK-LABEL: func @matrix_diag_part_align_lr +func @matrix_diag_part_align_lr(%arg0: tensor<7x140x128xi32>) -> tensor<7x22x128xi32> { + %0 = mhlo.constant dense<42> : tensor // padding value + %1 = mhlo.constant dense<[-10, 11]> : tensor<2xi32> // k + %2 = "tf.MatrixDiagPartV3"(%arg0, %1, %0) { + T = i32, align = "LEFT_RIGHT" + } : (tensor<7x140x128xi32>, tensor<2xi32>, tensor) -> tensor<7x22x128xi32> + // CHECK: %[[le:.*]] = chlo.broadcast_compare %{{[0-9]*}}, %{{[0-9]*}} {comparison_direction = "LE"} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi1> + // CHECK: %{{[0-9]*}} = "mhlo.select"(%[[le]], %{{[0-9]*}}, %{{[0-9]*}}) : (tensor<1x22x128xi1>, tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi32> + return %2: tensor<7x22x128xi32> +} + +// CHECK-LABEL: func @matrix_diag_part_align_rl +func @matrix_diag_part_align_rl(%arg0: tensor<7x140x128xi32>) -> tensor<7x22x128xi32> { + %0 = mhlo.constant dense<42> : tensor // padding value + %1 = mhlo.constant dense<[-10, 11]> : tensor<2xi32> // k + %2 = "tf.MatrixDiagPartV3"(%arg0, %1, %0) { + T = i32, align = "RIGHT_LEFT" + } : (tensor<7x140x128xi32>, tensor<2xi32>, tensor) -> tensor<7x22x128xi32> + // CHECK: %[[ge:.*]] = chlo.broadcast_compare %{{[0-9]*}}, %{{[0-9]*}} {comparison_direction = "GE"} : (tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi1> + // CHECK: %{{[0-9]*}} = "mhlo.select"(%[[ge]], %{{[0-9]*}}, %{{[0-9]*}}) : (tensor<1x22x128xi1>, tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi32> + return %2: tensor<7x22x128xi32> +} + +// CHECK-LABEL: func @matrix_diag_part_align_rr +func @matrix_diag_part_align_rr(%arg0: tensor<7x140x128xi32>) -> tensor<7x22x128xi32> { + %0 = mhlo.constant dense<42> : tensor // padding value + %1 = mhlo.constant dense<[-10, 11]> : tensor<2xi32> // k + %2 = "tf.MatrixDiagPartV3"(%arg0, %1, %0) { + T = i32, align = "RIGHT_RIGHT" + } : (tensor<7x140x128xi32>, tensor<2xi32>, tensor) -> tensor<7x22x128xi32> + // CHECK: %[[true:.*]] = mhlo.constant dense : tensor + // CHECK: %[[b_true:.*]] = "mhlo.broadcast"(%[[true]]) {broadcast_sizes = dense<[1, 22, 128]> : tensor<3xi64>} : (tensor) -> tensor<1x22x128xi1> + // CHECK: %{{[0-9]*}} = "mhlo.select"(%[[b_true]], %{{[0-9]*}}, %{{[0-9]*}}) : (tensor<1x22x128xi1>, tensor<1x22x128xi32>, tensor<1x22x128xi32>) -> tensor<1x22x128xi32> + return %2: tensor<7x22x128xi32> +} + +// CHECK-LABEL: func @matrix_diag_part_align_7d +// CHECK: (%arg0: tensor<3x5x7x9x11x13x17xf32>) -> tensor<3x5x7x9x11x4x10xf32> +func @matrix_diag_part_align_7d(%arg0: tensor<3x5x7x9x11x13x17xf32>) -> tensor<3x5x7x9x11x4x10xf32> { + %0 = mhlo.constant dense<-1.> : tensor // padding value + %1 = mhlo.constant dense<[-6, -3]> : tensor<2xi32> // k + %2 = "tf.MatrixDiagPartV3"(%arg0, %1, %0) { + T = f32, align = "LEFT_RIGHT" + } : (tensor<3x5x7x9x11x13x17xf32>, tensor<2xi32>, tensor) -> tensor<3x5x7x9x11x4x10xf32> + return %2: tensor<3x5x7x9x11x4x10xf32> +} + //===----------------------------------------------------------------------===// // Einsum. //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc index b1e74e354fe..78247fd481a 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc @@ -1387,6 +1387,269 @@ class ConvertDiagPartOp : public OpRewritePattern { } }; +// Converts TensorFlow MatrixDiagPartOp to HLO ops. +class ConvertMatrixDiagPartV3Op + : public OpRewritePattern { + using Shape = llvm::SmallVector; + + // Parse the "k" parameter. MatrixDiagPartV3 allows to specify the diagonal(s) + // with k. This can be either a single value (for a single diagonal) or a + // tuple of two values (starting and ending diagonal, for a band). + LogicalResult ExtractK(TF::MatrixDiagPartV3Op op, int64_t (*k)[2]) const { + DenseIntElementsAttr kattr; + if (!matchPattern(op.k(), m_Constant(&kattr))) { + return failure(); + } + DenseIntElementsAttr::iterator it = kattr.begin(); + (*k)[0] = (*it).getSExtValue(); + it++; + if (it == kattr.end()) { + // Handle input like e.g. "k = 5", in which case we extract a single + // diagonal. + (*k)[1] = (*k)[0]; + } else { + // Handle input like e.g. "k = [-1, 1]", in which case we extract a + // band (multiple diagonals). + (*k)[1] = (*it).getSExtValue(); + } + return success(); + } + + // Utility method for broadcasting integer constants to a given shape. + BroadcastOp BroadcastConstant(Location loc, Shape shape, int32_t constant, + int int_size, PatternRewriter &rewriter) const { + return rewriter.create( + loc, RankedTensorType::get(shape, rewriter.getIntegerType(int_size)), + GetScalarConstOfType(rewriter.getIntegerType(int_size), loc, constant, + &rewriter), + GetI64ElementsAttr(shape, &rewriter)); + } + + public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(TF::MatrixDiagPartV3Op op, + PatternRewriter &rewriter) const override { + Location loc = op.getLoc(); + ShapedType input_type = op.input().getType().dyn_cast(); + auto element_type = input_type.getElementType(); + + // Align is a string specifying how superdiagonals and subdiagonals should + // be aligned/padded for diagonals that are shorter than max_diag_len. The + // format is "{super}_{sub}", with {super} the superdiagonal alignment and + // {sub} the subdiagonal alignment. "LEFT" means rows will be padded to the + // left, "RIGHT" means rows will be padded ot the right. The default is + // "RIGHT_LEFT". + StringRef align = op.getAttrOfType("align").getValue(); + enum Alignment { kLeft, kRight }; + + // default is RIGHT_LEFT + Alignment superdiagonal_align = kRight; + Alignment subdiagonal_align = kLeft; + + if (align == "RIGHT_LEFT") { + superdiagonal_align = kRight; + subdiagonal_align = kLeft; + } else if (align == "RIGHT_RIGHT") { + superdiagonal_align = kRight; + subdiagonal_align = kRight; + } else if (align == "LEFT_RIGHT") { + superdiagonal_align = kLeft; + subdiagonal_align = kRight; + } else if (align == "LEFT_LEFT") { + superdiagonal_align = kLeft; + subdiagonal_align = kLeft; + } else { + return failure(); // unsupported alignment + } + + // MatrixDiagPart operates on a matrix of shape [I, J, ..., L, M, N], and + // will extract the diagonal(s) out of [M, N], for all [I, J, ..., L]. + if (!input_type || !input_type.hasStaticShape()) return failure(); + int64_t num_dims = input_type.getRank(); + if (num_dims < 2) return failure(); + int64_t rows = input_type.getDimSize(num_dims - 2); // rows + int64_t cols = input_type.getDimSize(num_dims - 1); // cols + + // We extract the diagonals from k[0] up to and including k[1]. + // Addressing is 0 for the main diagonal. (So k = [0, 0] would just extract + // the main diagonal). It's negative for subdiagonals (under and to the left + // of the main diagonal) and positive for superdiagonals (above and to the + // right of the main diagonal). + int64_t k[2]; + if (failed(ExtractK(op, &k))) return failure(); + int num_diags = k[1] - k[0] + 1; + + // Shifting diagonals away from the main diagonal might shorten them. This + // is the longest diagonal we will see. We make this the last dimension of + // the output shape. + int64_t max_diag_len = + std::min(rows + std::min(k[1], static_cast(0)), + cols + std::min(-k[0], static_cast(0))); + + // The first dimension is the index vector dimension we'll use for gather. + // It's 1 here, but will be 2 once we glue x and y together. + Shape indices_shape({1, num_diags, max_diag_len}); + + RankedTensorType iota_type = + RankedTensorType::get(indices_shape, rewriter.getIntegerType(32)); + Value iotaM = + rewriter.create(loc, iota_type, rewriter.getI64IntegerAttr(1)); + Value iotaN = + rewriter.create(loc, iota_type, rewriter.getI64IntegerAttr(2)); + + // Boradcasted constants, of the same shape as iotaM and iotaN. + Value b_zero = BroadcastConstant(loc, indices_shape, 0, 32, rewriter); + Value b_false = BroadcastConstant(loc, indices_shape, 0, 1, rewriter); + Value b_true = BroadcastConstant(loc, indices_shape, 1, 1, rewriter); + Value b_k1 = BroadcastConstant(loc, indices_shape, k[1], 32, rewriter); + Value b_rows = BroadcastConstant(loc, indices_shape, rows, 32, rewriter); + Value b_cols = BroadcastConstant(loc, indices_shape, cols, 32, rewriter); + Value b_max_diag_len = + BroadcastConstant(loc, indices_shape, max_diag_len, 32, rewriter); + + // d = k[1] - m + // (A.k.a. the number of the diagonal, depending on m. Note that we + // subtract m here. This means we start with the superdiagonals and + // move downwards towards the subdiagonals. So the start indices will + // be decreasing.) + Value d = rewriter.create(loc, b_k1, iotaM); + Value neg_d = rewriter.create(loc, d); + + // diag_len_d = min(rows + min(d, 0), cols - max(d, 0)) + // (Length of a diagonal for a given d. Same as max_diag_len for m = 0.) + Value diag_len_d = rewriter.create( + loc, + rewriter.create(loc, b_rows, + rewriter.create(loc, d, b_zero)), + rewriter.create(loc, b_cols, + rewriter.create(loc, d, b_zero))); + + // offset is max_diag_len - diag_len_d if we're padding, 0 otherwise. + Value cmp; + if (subdiagonal_align == kRight && superdiagonal_align == kRight) { + cmp = b_true; + } else if (superdiagonal_align == kRight) { + // offset = d>=0 ? max_diag_len - diag_len_d : 0 + cmp = rewriter.create(loc, d, b_zero); + } else if (subdiagonal_align == kRight) { + // offset = d<=0 ? max_diag_len - diag_len_d : 0 + cmp = rewriter.create(loc, d, b_zero); + } else { + // offset = 0 + cmp = b_false; + } + + // This offset shifts the diagonals to the "left" or "right", depending + // on alignment. + Value offset = rewriter.create( + loc, b_zero.getType(), cmp, + rewriter.create(loc, b_max_diag_len, diag_len_d), b_zero); + + // x = max(d, 0) - offset + // y = max(-d, 0) - offset + Value x = rewriter.create( + loc, rewriter.create(loc, d, b_zero), offset); + Value y = rewriter.create( + loc, rewriter.create(loc, neg_d, b_zero), offset); + + Value n_plus_x = rewriter.create(loc, iotaN, x); + Value n_plus_y = rewriter.create(loc, iotaN, y); + + // GatherOp is happy about letting us index out of bounds values, but those + // values will be undefined. So we mask them later. Set up the boolean + // expression that tells us which entries, in the output shape, are out of + // bounds and thus become the padding_value. + Value x_in_bounds = rewriter.create( + loc, + rewriter.create(loc, b_false.getType(), n_plus_x, + b_zero), + rewriter.create(loc, b_false.getType(), n_plus_x, b_cols)); + Value y_in_bounds = rewriter.create( + loc, + rewriter.create(loc, b_false.getType(), n_plus_y, + b_zero), + rewriter.create(loc, b_false.getType(), n_plus_y, b_rows)); + Value in_bounds = rewriter.create( + loc, + RankedTensorType::get(Shape({num_diags, max_diag_len}), + rewriter.getIntegerType(1)), + rewriter.create(loc, x_in_bounds, y_in_bounds)); + + // Now combine x and y into the index data structure needed for gather. + Shape concat_shape({2, num_diags, max_diag_len}); + Value start_indices = rewriter.create( + loc, RankedTensorType::get(concat_shape, rewriter.getIntegerType(32)), + mlir::ValueRange({n_plus_y, n_plus_x}), + mlir::IntegerAttr::get(rewriter.getIntegerType(64), 0)); + + // Shape of the final output. (Except for dimension folding in the + // single diagonal case.) + Shape output_shape; + for (int i = 0; i < num_dims - 2; i++) { + output_shape.push_back(input_type.getDimSize(i)); + } + output_shape.push_back(num_diags); + output_shape.push_back(max_diag_len); + auto output_type = RankedTensorType::get(output_shape, element_type); + + // A slice is the shape of what GatherOp copies per lookup. So the last + // two dimensions (M, N in the matrix-diag-part docs) are where we go + // through entry by entry. + ArrayRef input_shape = input_type.getShape(); + Shape slice_sizes(input_shape.begin(), input_shape.end()); + int slice_dimensions = slice_sizes.size(); + slice_sizes[slice_dimensions - 2] = 1; + slice_sizes[slice_dimensions - 1] = 1; + + // Dimensions of the input we won't see in the output (M and N). + SmallVector collapsed_dims( + {slice_dimensions - 2, slice_dimensions - 1}); + + // Which dimensions (in the input) the two offset "columns" map to. + SmallVector start_index_map({num_dims - 2, num_dims - 1}); + + // Gather the diagonal entries. + // TODO(kramm): For a single diagonal, this might be slower than the + // mask + sum approach. Special-case num_diags==1? + auto dims_attr = GatherDimensionNumbers::get( + /*offset_dims=*/GetI64ElementsAttrForSeq(0, num_dims - 2, &rewriter), + /*collapsed_slice_dims=*/GetI64ElementsAttr(collapsed_dims, &rewriter), + /*start_index_map=*/GetI64ElementsAttr(start_index_map, &rewriter), + /*index_vector_dim=*/rewriter.getI64IntegerAttr(0), + rewriter.getContext()); + Value gather = rewriter.create( + loc, output_type, op.input(), start_indices, dims_attr, + GetI64ElementsAttr(slice_sizes, &rewriter)); + + // We now need to broadcast the "in_bounds" boolean expression, as well as + // the padding value, to do the final select. + Shape broadcast_bounds; + for (int i = 0; i < output_shape.size() - 2; i++) { + broadcast_bounds.push_back(output_shape[i]); + } + Value b_in_bounds = rewriter.create( + loc, RankedTensorType::get(output_shape, rewriter.getIntegerType(1)), + in_bounds, GetI64ElementsAttr(broadcast_bounds, &rewriter)); + Value b_padding = rewriter.create( + loc, output_type, op.padding_value(), + GetI64ElementsAttr(output_shape, &rewriter)); + + // Replace all out-of-bounds values in the result with padding_value. + Value result = rewriter.create(loc, output_type, b_in_bounds, + gather, b_padding); + + if (num_diags == 1) { + // matrix_diag_part folds away the 1-sized band dimension if we only + // extract a single diagonal. + result = rewriter.create(loc, op.getType(), result); + } + + rewriter.replaceOp(op, result); + return success(); + } +}; + // Converts TensorFlow EinsumOp to either HLO EinsumOp or UnaryEinsumOp // depending on arity of the op. class ConvertEinsumOp : public OpRewritePattern { @@ -5486,8 +5749,8 @@ LogicalResult legalizeTF(Operation *op, bool allow_partial_conversion, ConvertAvgPool3DGradOp, ConvertMaxPool2DOp, ConvertMaxPool3DOp, ConvertMaxPool2DGradOp, ConvertMaxPool3DGradOp, ConvertMeanOp, ConvertOneHotOp, ConvertOutfeedEnqueueTupleOp, ConvertProdOp, ConvertQrOp, - ConvertDynamicRangeOp, ConvertRangeOp, ConvertSelectV2Op, - ConvertSigmoidOp, ConvertShapeOp, ConvertSizeOp, + ConvertMatrixDiagPartV3Op, ConvertDynamicRangeOp, ConvertRangeOp, + ConvertSelectV2Op, ConvertSigmoidOp, ConvertShapeOp, ConvertSizeOp, ConvertSoftmaxOp, ConvertSoftmaxOp, ConvertSplitOp, ConvertSplitVOp, ConvertStridedSliceOp, ConvertStridedSliceGradOp, ConvertSumOp, From 30b567764a5e7b8c2e02dd81807441cc238d3872 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Jul 2020 17:19:44 -0700 Subject: [PATCH 1680/2522] Update ops-related pbtxt files. PiperOrigin-RevId: 323902095 Change-Id: I591d47c8e977b8d4a49399004929d01de9e0edae --- .../ops_history_v2/CollectiveReduceV2.pbtxt | 66 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 66 +++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 tensorflow/core/ops/compat/ops_history_v2/CollectiveReduceV2.pbtxt diff --git a/tensorflow/core/ops/compat/ops_history_v2/CollectiveReduceV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CollectiveReduceV2.pbtxt new file mode 100644 index 00000000000..dd39ac27f93 --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/CollectiveReduceV2.pbtxt @@ -0,0 +1,66 @@ +op { + name: "CollectiveReduceV2" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "group_size" + type: DT_INT32 + } + input_arg { + name: "group_key" + type: DT_INT32 + } + input_arg { + name: "instance_key" + type: DT_INT32 + } + output_arg { + name: "data" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_HALF + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "merge_op" + type: "string" + allowed_values { + list { + s: "Min" + s: "Max" + s: "Mul" + s: "Add" + } + } + } + attr { + name: "final_op" + type: "string" + allowed_values { + list { + s: "Id" + s: "Div" + } + } + } + attr { + name: "communication_hint" + type: "string" + default_value { + s: "auto" + } + } + is_stateful: true +} diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 91e0a3b74a3..ccaad23b6f7 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -7668,6 +7668,72 @@ op { } is_stateful: true } +op { + name: "CollectiveReduceV2" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "group_size" + type: DT_INT32 + } + input_arg { + name: "group_key" + type: DT_INT32 + } + input_arg { + name: "instance_key" + type: DT_INT32 + } + output_arg { + name: "data" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_HALF + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "merge_op" + type: "string" + allowed_values { + list { + s: "Min" + s: "Max" + s: "Mul" + s: "Add" + } + } + } + attr { + name: "final_op" + type: "string" + allowed_values { + list { + s: "Id" + s: "Div" + } + } + } + attr { + name: "communication_hint" + type: "string" + default_value { + s: "auto" + } + } + is_stateful: true +} op { name: "CombinedNonMaxSuppression" input_arg { From 26264bf486f9f3ddf1db7a194bc6706cdb143e2a Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Wed, 29 Jul 2020 17:21:23 -0700 Subject: [PATCH 1681/2522] [ci_build] Axe unused `pip.sh` PiperOrigin-RevId: 323902378 Change-Id: I87fa1470c8045b3cd71f0f8d91e6ce881e429ebe --- tensorflow/tools/ci_build/builds/pip.sh | 581 ------------------------ 1 file changed, 581 deletions(-) delete mode 100755 tensorflow/tools/ci_build/builds/pip.sh diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh deleted file mode 100755 index d9f2a4df61a..00000000000 --- a/tensorflow/tools/ci_build/builds/pip.sh +++ /dev/null @@ -1,581 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# -# Build the Python PIP installation package for TensorFlow and install -# the package. -# The PIP installation is done using the --user flag. -# -# Usage: -# pip.sh CONTAINER_TYPE [--test_tutorials] [--integration_tests] [bazel flags] -# -# When executing the Python unit tests, the script obeys the shell -# variables: TF_BUILD_BAZEL_CLEAN, TF_BUILD_INSTALL_EXTRA_PIP_PACKAGES, -# NO_TEST_ON_INSTALL, PIP_TEST_ROOT, TF_NIGHTLY -# -# TF_BUILD_BAZEL_CLEAN, if set to any non-empty and non-0 value, directs the -# script to perform bazel clean prior to main build and test steps. -# -# TF_BUILD_INSTALL_EXTRA_PIP_PACKAGES overrides the default extra pip packages -# to be installed in virtualenv before run_pip_tests.sh is called. Multiple -# package names are separated with spaces. -# -# If NO_TEST_ON_INSTALL has any non-empty and non-0 value, the test-on-install -# part will be skipped. -# -# If NO_TEST_USER_OPS has any non-empty and non-0 value, the testing of user- -# defined ops against the installation will be skipped. -# -# If NO_TEST_TFDBG_BINARIES has any non-empty and non-0 value, the testing of -# TensorFlow Debugger (tfdbg) binaries and examples will be skipped. -# -# If PIP_TEST_ROOT has a non-empty and a non-0 value, the whl files will be -# placed in that directory. -# -# If TF_NIGHTLY has a non-empty and a non-0 value, the name of the project will -# be changed to tf_nightly or tf_nightly_gpu. -# -# Any flags not listed in the usage above will be passed directly to Bazel. -# -# If the --test_tutorials flag is set, it will cause the script to run the -# tutorial tests (see test_tutorials.sh) after the PIP -# installation and the Python unit tests-on-install step. Likewise, -# --integration_tests will cause the integration tests (integration_tests.sh) -# to run. -# - -# Helper function: Strip leading and trailing whitespaces -str_strip () { - echo -e "$1" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' -} - -# Fixed naming patterns for wheel (.whl) files given different python versions -if [[ $(uname) == "Linux" ]]; then - declare -A WHL_TAGS - WHL_TAGS=(["2.7"]="cp27-none" ["3.4"]="cp34-cp34m" ["3.5"]="cp35-cp35m") -fi - - -INSTALL_EXTRA_PIP_PACKAGES=${TF_BUILD_INSTALL_EXTRA_PIP_PACKAGES} - - -# Script directory -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -source "${SCRIPT_DIR}/builds_common.sh" - - -SKIP_RETURN_CODE=112 - - -# Get the command line arguments -CONTAINER_TYPE=$( echo "$1" | tr '[:upper:]' '[:lower:]' ) -shift - -if [[ -n "${TF_BUILD_BAZEL_CLEAN}" ]] && \ - [[ "${TF_BUILD_BAZEL_CLEAN}" != "0" ]]; then - echo "TF_BUILD_BAZEL_CLEAN=${TF_BUILD_BAZEL_CLEAN}: Performing 'bazel clean'" - bazel clean -fi - -DO_TEST_USER_OPS=1 -if [[ -n "${NO_TEST_USER_OPS}" ]] && \ - [[ "${NO_TEST_USER_OPS}" != "0" ]]; then - echo "NO_TEST_USER_OPS=${NO_TEST_USER_OPS}: Will skip testing of user ops" - DO_TEST_USER_OPS=0 -fi - -DO_TEST_TFDBG_BINARIES=1 -if [[ -n "${NO_TEST_TFDBG_BINARIES}" ]] && \ - [[ "${NO_TEST_TFDBG_BINARIES}" != "0" ]]; then - echo "NO_TEST_TFDBG_BINARIES=${NO_TEST_TFDBG_BINARIES}: Will skip testing of tfdbg binaries" - DO_TEST_TFDBG_BINARIES=0 -fi - -DO_TEST_TUTORIALS=0 -DO_INTEGRATION_TESTS=0 -BAZEL_FLAGS="" -while true; do - if [[ "${1}" == "--test_tutorials" ]]; then - DO_TEST_TUTORIALS=1 - elif [[ "${1}" == "--integration_tests" ]]; then - DO_INTEGRATION_TESTS=1 - else - BAZEL_FLAGS="${BAZEL_FLAGS} ${1}" - fi - - shift - if [[ -z "${1}" ]]; then - break - fi -done - -BAZEL_FLAGS=$(str_strip "${BAZEL_FLAGS}") - -if [[ -z "$GIT_TAG_OVERRIDE" ]]; then - BAZEL_FLAGS+=" --action_env=GIT_TAG_OVERRIDE" -fi - -echo "Using Bazel flags: ${BAZEL_FLAGS}" - -PIP_BUILD_TARGET="//tensorflow/tools/pip_package:build_pip_package" -GPU_FLAG="" -ROCM_FLAG="" -if [[ ${CONTAINER_TYPE} == "cpu" ]] || \ - [[ ${CONTAINER_TYPE} == "debian.jessie.cpu" ]]; then - bazel build ${BAZEL_FLAGS} ${PIP_BUILD_TARGET} || \ - die "Build failed." -elif [[ ${CONTAINER_TYPE} == "gpu" ]]; then - bazel build ${BAZEL_FLAGS} ${PIP_BUILD_TARGET} || \ - die "Build failed." - GPU_FLAG="--gpu" -elif [[ ${CONTAINER_TYPE} == "rocm" ]]; then - bazel build ${BAZEL_FLAGS} ${PIP_BUILD_TARGET} || \ - die "Build failed." - ROCM_FLAG="--rocm" -else - die "Unrecognized container type: \"${CONTAINER_TYPE}\"" -fi - -MAC_FLAG="" -if [[ $(uname) == "Darwin" ]]; then - MAC_FLAG="--mac" -fi - - -# Check if in a virtualenv -IN_VENV=$(python -c 'import sys; print("1" if hasattr(sys, "real_prefix") else "0")') -# If still in a virtualenv, deactivate it first -if [[ "$IN_VENV" == "1" ]]; then - echo "It appears that we are already in a virtualenv. Deactivating..." - deactivate || die "FAILED: Unable to deactivate from existing virtualenv" -fi - -# Obtain the path to Python binary -source tools/python_bin_path.sh - -# Assume: PYTHON_BIN_PATH is exported by the script above -if [[ -z "$PYTHON_BIN_PATH" ]]; then - die "PYTHON_BIN_PATH was not provided. Did you run configure?" -fi - -# Determine the major and minor versions of Python being used (e.g., 2.7) -# This info will be useful for determining the directory of the local pip -# installation of Python -PY_MAJOR_MINOR_VER=$(${PYTHON_BIN_PATH} -V 2>&1 | awk '{print $NF}' | cut -d. -f-2) -if [[ -z "${PY_MAJOR_MINOR_VER}" ]]; then - die "ERROR: Unable to determine the major.minor version of Python" -fi - -echo "Python binary path to be used in PIP install: ${PYTHON_BIN_PATH} "\ -"(Major.Minor version: ${PY_MAJOR_MINOR_VER})" - -# Create a TF_NIGHTLY argument if this is a nightly build -PROJECT_NAME="tensorflow" -NIGHTLY_FLAG="" -if [ -n "$TF_NIGHTLY" ]; then - PROJECT_NAME="tf_nightly" - NIGHTLY_FLAG="--nightly_flag" -fi - -# Build PIP Wheel file -# Set default pip file folder unless specified by env variable -if [ -z "$PIP_TEST_ROOT" ]; then - PIP_TEST_ROOT="pip_test" -fi -PIP_WHL_DIR="${PIP_TEST_ROOT}/whl" -PIP_WHL_DIR=$(realpath ${PIP_WHL_DIR}) # Get absolute path -rm -rf ${PIP_WHL_DIR} && mkdir -p ${PIP_WHL_DIR} -bazel-bin/tensorflow/tools/pip_package/build_pip_package ${PIP_WHL_DIR} ${GPU_FLAG} ${ROCM_FLAG} ${NIGHTLY_FLAG} || \ - die "build_pip_package FAILED" - -WHL_PATH=$(ls ${PIP_WHL_DIR}/${PROJECT_NAME}*.whl) -if [[ $(echo ${WHL_PATH} | wc -w) -ne 1 ]]; then - die "ERROR: Failed to find exactly one built TensorFlow .whl file in "\ -"directory: ${PIP_WHL_DIR}" -fi - -# Print the size of the PIP wheel file. -echo -echo "Size of the PIP wheel file built: $(ls -l ${WHL_PATH} | awk '{print $5}')" -echo - -# Rename the whl file properly so it will have the python -# version tags and platform tags that won't cause pip install issues. -if [[ $(uname) == "Linux" ]]; then - PY_TAGS=${WHL_TAGS[${PY_MAJOR_MINOR_VER}]} - PLATFORM_TAG=$(to_lower "$(uname)_$(uname -m)") -# MAC has bash v3, which does not have associative array -elif [[ $(uname) == "Darwin" ]]; then - if [[ ${PY_MAJOR_MINOR_VER} == "2.7" ]]; then - PY_TAGS="py2-none" - elif [[ ${PY_MAJOR_MINOR_VER} == "3.5" ]]; then - PY_TAGS="py3-none" - elif [[ ${PY_MAJOR_MINOR_VER} == "3.6" ]]; then - PY_TAGS="py3-none" - fi - PLATFORM_TAG="any" -fi - -WHL_DIR=$(dirname "${WHL_PATH}") -WHL_BASE_NAME=$(basename "${WHL_PATH}") - -if [[ -n "${PY_TAGS}" ]]; then - NEW_WHL_BASE_NAME=$(echo ${WHL_BASE_NAME} | cut -d \- -f 1)-\ -$(echo ${WHL_BASE_NAME} | cut -d \- -f 2)-${PY_TAGS}-${PLATFORM_TAG}.whl - - if [[ ! -f "${WHL_DIR}/${NEW_WHL_BASE_NAME}" ]]; then - if cp "${WHL_DIR}/${WHL_BASE_NAME}" "${WHL_DIR}/${NEW_WHL_BASE_NAME}" - then - echo "Copied wheel file: ${WHL_BASE_NAME} --> ${NEW_WHL_BASE_NAME}" - else - die "ERROR: Failed to copy wheel file to ${NEW_WHL_BASE_NAME}" - fi - fi -fi - -if [[ $(uname) == "Linux" ]]; then - AUDITED_WHL_NAME="${WHL_DIR}/$(echo ${WHL_BASE_NAME//linux/manylinux1})" - - # Repair the wheels for cpu manylinux1 - if [[ ${CONTAINER_TYPE} == "cpu" ]]; then - echo "auditwheel repairing ${WHL_PATH}" - auditwheel repair -w ${WHL_DIR} ${WHL_PATH} - - if [[ -f ${AUDITED_WHL_NAME} ]]; then - WHL_PATH=${AUDITED_WHL_NAME} - echo "Repaired manylinx1 wheel file at: ${WHL_PATH}" - else - die "ERROR: Cannot find repaired wheel." - fi - # Copy and rename for gpu manylinux as we do not want auditwheel to package in libcudart.so - elif [[ ${CONTAINER_TYPE} == "gpu" ]] || \ - [[ ${CONTAINER_TYPE} == "rocm" ]]; then - WHL_PATH=${AUDITED_WHL_NAME} - cp ${WHL_DIR}/${WHL_BASE_NAME} ${WHL_PATH} - echo "Copied manylinx1 wheel file at ${WHL_PATH}" - fi -fi - - -create_activate_virtualenv_and_install_tensorflow() { - # Create and activate a virtualenv; then install tensorflow pip package in it. - # - # Usage: - # create_activate_virtualenv_and_install_tensorflow [--clean] \ - # - # - # Arguments: - # --clean: Create a clean virtualenv, i.e., without --system-site-packages. - # VIRTUALENV_DIR: virtualenv directory to be created. - # TF_WHEEL_PATH: Path to the tensorflow wheel file to be installed in the - # virtualenv. - - VIRTUALENV_FLAGS="--system-site-packages" - if [[ "$1" == "--clean" ]]; then - VIRTUALENV_FLAGS="" - shift - fi - - VIRTUALENV_DIR="$1" - TF_WHEEL_PATH="$2" - if [[ -d "${VIRTUALENV_DIR}" ]]; then - if rm -rf "${VIRTUALENV_DIR}" - then - echo "Removed existing virtualenv directory: ${VIRTUALENV_DIR}" - else - die "Failed to remove existing virtualenv directory: ${VIRTUALENV_DIR}" - fi - fi - - if mkdir -p "${VIRTUALENV_DIR}" - then - echo "Created virtualenv directory: ${VIRTUALENV_DIR}" - else - die "FAILED to create virtualenv directory: ${VIRTUALENV_DIR}" - fi - - # Use the virtualenv from the default python version (i.e., python-virtualenv) - # to create the virtualenv directory for testing. Use the -p flag to specify - # the python version inside the to-be-created virtualenv directory. - ${PYTHON_BIN_PATH} -m virtualenv -p "${PYTHON_BIN_PATH}" ${VIRTUALENV_FLAGS} \ - "${VIRTUALENV_DIR}" || \ - die "FAILED: Unable to create virtualenv" - - source "${VIRTUALENV_DIR}/bin/activate" || \ - die "FAILED: Unable to activate virtualenv in ${VIRTUALENV_DIR}" - - # Install the pip file in virtual env. - - # Upgrade pip so it supports tags such as cp27mu, manylinux1 etc. - echo "Upgrade pip in virtualenv" - - # NOTE: pip install --upgrade pip leads to a documented TLS issue for - # some versions in python - curl https://bootstrap.pypa.io/get-pip.py | python - - # Force upgrade of setuptools. This must happen before the pip install of the - # WHL_PATH, which pulls in absl-py, which uses install_requires notation - # introduced in setuptools >=20.5. The default version of setuptools is 5.5.1, - # which is too old for absl-py. - pip install --upgrade setuptools==39.1.0 - - # Force tensorflow reinstallation. Otherwise it may not get installed from - # last build if it had the same version number as previous build. - PIP_FLAGS="--upgrade --force-reinstall" - pip install -v ${PIP_FLAGS} ${WHL_PATH} || \ - die "pip install (forcing to reinstall tensorflow) FAILED" - echo "Successfully installed pip package ${TF_WHEEL_PATH}" - - # Force downgrade of setuptools. This must happen after the pip install of the - # WHL_PATH, which ends up upgrading to the latest version of setuptools. - # Versions of setuptools >= 39.1.0 will cause tests to fail like this: - # ImportError: cannot import name py31compat - pip install --upgrade setuptools==39.1.0 -} - -################################################################################ -# Smoke test of tensorflow install in clean virtualenv -################################################################################ -do_clean_virtualenv_smoke_test() { - if [[ -n "${NO_TEST_ON_INSTALL}" ]] && - [[ "${NO_TEST_ON_INSTALL}" != "0" ]]; then - echo "NO_TEST_ON_INSTALL=${NO_TEST_ON_INSTALL}:" - echo " Skipping smoke test of tensorflow install in clean virtualenv" - return ${SKIP_RETURN_CODE} - fi - - CLEAN_VENV_DIR="${PIP_TEST_ROOT}/venv_clean" - create_activate_virtualenv_and_install_tensorflow --clean \ - "${CLEAN_VENV_DIR}" "${WHL_PATH}" - - # cd to a temporary directory to avoid picking up Python files in the source - # tree. - TMP_DIR=$(mktemp -d) - pushd "${TMP_DIR}" - if [[ $(python -c "import tensorflow as tf; print(tf.Session().run(tf.constant(42)))") == 42 ]]; - then - echo "Smoke test of tensorflow install in clean virtualenv PASSED." - else - echo "Smoke test of tensorflow install in clean virtualenv FAILED." - return 1 - fi - - deactivate - if [[ $? != 0 ]]; then - echo "FAILED: Unable to deactivate virtualenv from ${CLEAN_VENV_DIR}" - return 1 - fi - - popd - rm -rf "${TMP_DIR}" "${CLEAN_VENV_DIR}" -} - -################################################################################ -# Perform installation of tensorflow in "non-clean" virtualenv and tests against -# the install. -################################################################################ -do_virtualenv_pip_test() { - # Create virtualenv directory for install test - VENV_DIR="${PIP_TEST_ROOT}/venv" - create_activate_virtualenv_and_install_tensorflow \ - "${VENV_DIR}" "${WHL_PATH}" - - # Install extra pip packages required by the test-on-install - for PACKAGE in ${INSTALL_EXTRA_PIP_PACKAGES}; do - echo "Installing extra pip package required by test-on-install: ${PACKAGE}" - - pip install ${PACKAGE} - if [[ $? != 0 ]]; then - echo "pip install ${PACKAGE} FAILED" - return 1 - fi - done - - if [[ -n "${NO_TEST_ON_INSTALL}" ]] && - [[ "${NO_TEST_ON_INSTALL}" != "0" ]]; then - echo "NO_TEST_ON_INSTALL=${NO_TEST_ON_INSTALL}:" - echo " Skipping ALL Python unit tests on install" - return ${SKIP_RETURN_CODE} - else - # Call run_pip_tests.sh to perform test-on-install - "${SCRIPT_DIR}/run_pip_tests.sh" --virtualenv ${GPU_FLAG} ${ROCM_FLAG} ${MAC_FLAG} - if [[ $? != 0 ]]; then - echo "PIP tests-on-install FAILED" - return 1 - fi - fi -} - -################################################################################ -# Run tests tagged with oss_serial against the virtualenv install. -################################################################################ -do_virtualenv_oss_serial_pip_test() { - if [[ -n "${NO_TEST_ON_INSTALL}" ]] && - [[ "${NO_TEST_ON_INSTALL}" != "0" ]]; then - echo "NO_TEST_ON_INSTALL=${NO_TEST_ON_INSTALL}:" - echo " Skipping Python unit tests on install tagged with oss_serial" - return ${SKIP_RETURN_CODE} - else - # Call run_pip_tests.sh to perform test-on-install - "${SCRIPT_DIR}/run_pip_tests.sh" \ - --virtualenv ${GPU_FLAG} ${ROCM_FLAG} ${MAC_FLAG} --oss_serial - if [[ $? != 0 ]]; then - echo "PIP tests-on-install (oss_serial) FAILED" - return 1 - fi - fi -} - -################################################################################ -# Test user ops (optional). -################################################################################ -do_test_user_ops() { - if [[ "${DO_TEST_USER_OPS}" == "1" ]]; then - "${SCRIPT_DIR}/test_user_ops.sh" --virtualenv ${GPU_FLAG} ${ROCM_FLAG} - if [[ $? != 0 ]]; then - echo "PIP user-op tests-on-install FAILED" - return 1 - fi - else - echo "Skipping user-op test-on-install due to DO_TEST_USER_OPS = ${DO_TEST_USER_OPS}" - return ${SKIP_RETURN_CODE} - fi -} - -################################################################################ -# Test TensorFlow Debugger (tfdbg) binaries (optional). -################################################################################ -do_test_tfdbg_binaries() { - if [[ "${DO_TEST_TFDBG_BINARIES}" == "1" ]]; then - # cd to a temporary directory to avoid picking up Python files in the source - # tree. - TMP_DIR=$(mktemp -d) - pushd "${TMP_DIR}" - - "${SCRIPT_DIR}/../../../python/debug/examples/examples_test.sh" \ - --virtualenv - if [[ $? != 0 ]]; then - echo "PIP tests-on-install of tfdbg binaries FAILED" - return 1 - fi - popd - else - echo "Skipping test of tfdbg binaries due to DO_TEST_TFDBG_BINARIES = ${DO_TEST_TFDBG_BINARIES}" - return ${SKIP_RETURN_CODE} - fi -} - -################################################################################ -# Test tutorials (optional). -################################################################################ -do_test_tutorials() { - if [[ "${DO_TEST_TUTORIALS}" == "1" ]]; then - "${SCRIPT_DIR}/test_tutorials.sh" --virtualenv - if [[ $? != 0 ]]; then - echo "PIP tutorial tests-on-install FAILED" - return 1 - fi - else - echo "Skipping tutorial tests-on-install due to DO_TEST_TUTORIALS = ${DO_TEST_TUTORIALS}" - return ${SKIP_RETURN_CODE} - fi -} - -################################################################################ -# Integration test for ffmpeg (optional). -################################################################################ -do_ffmpeg_integration_test() { - # Optional: Run integration tests - if [[ "${DO_INTEGRATION_TESTS}" == "1" ]]; then - "${SCRIPT_DIR}/integration_tests.sh" --virtualenv - if [[ $? != 0 ]]; then - echo "Integration tests on install FAILED" - return 1 - fi - else - echo "Skipping ffmpeg integration due to DO_INTEGRATION_TESTS = ${DO_INTEGRATION_TESTS}" - return ${SKIP_RETURN_CODE} - fi -} - - -# List of all PIP test tasks and their descriptions. -PIP_TASKS=("do_clean_virtualenv_smoke_test" "do_virtualenv_pip_test" "do_virtualenv_oss_serial_pip_test" "do_test_user_ops" "do_test_tfdbg_binaries" "do_test_tutorials" "do_ffmpeg_integration_test") -PIP_TASKS_DESC=("Smoke test of pip install in clean virtualenv" "PIP tests in virtualenv" "PIP test in virtualenv (tag: oss_serial)" "User ops test" "TensorFlow Debugger (tfdbg) binaries test" "Tutorials test" "ffmpeg integration test") - - -# Execute all the PIP test steps. -COUNTER=0 -FAIL_COUNTER=0 -PASS_COUNTER=0 -SKIP_COUNTER=0 -while [[ ${COUNTER} -lt "${#PIP_TASKS[@]}" ]]; do - INDEX=COUNTER - ((INDEX++)) - - echo - printf "${COLOR_BOLD}=== PIP test step ${INDEX} of ${#PIP_TASKS[@]}: "\ -"${PIP_TASKS[COUNTER]} (${PIP_TASKS_DESC[COUNTER]}) ===${COLOR_NC}" - echo - - ${PIP_TASKS[COUNTER]} - RESULT=$? - - if [[ ${RESULT} == ${SKIP_RETURN_CODE} ]]; then - ((SKIP_COUNTER++)) - elif [[ ${RESULT} != "0" ]]; then - ((FAIL_COUNTER++)) - else - ((PASS_COUNTER++)) - fi - - STEP_EXIT_CODES+=(${RESULT}) - - echo "" - ((COUNTER++)) -done - -deactivate || die "FAILED: Unable to deactivate virtualenv from ${VENV_DIR}" - - -# Print summary of build results -COUNTER=0 -echo "==== Summary of PIP test results ====" -while [[ ${COUNTER} -lt "${#PIP_TASKS[@]}" ]]; do - INDEX=COUNTER - ((INDEX++)) - - echo "${INDEX}. ${PIP_TASKS[COUNTER]}: ${PIP_TASKS_DESC[COUNTER]}" - if [[ ${STEP_EXIT_CODES[COUNTER]} == ${SKIP_RETURN_CODE} ]]; then - printf " ${COLOR_LIGHT_GRAY}SKIP${COLOR_NC}\n" - elif [[ ${STEP_EXIT_CODES[COUNTER]} == "0" ]]; then - printf " ${COLOR_GREEN}PASS${COLOR_NC}\n" - else - printf " ${COLOR_RED}FAIL${COLOR_NC}\n" - fi - - ((COUNTER++)) -done - -echo -echo "${SKIP_COUNTER} skipped; ${FAIL_COUNTER} failed; ${PASS_COUNTER} passed." - -echo -if [[ ${FAIL_COUNTER} == "0" ]]; then - printf "PIP test ${COLOR_GREEN}PASSED${COLOR_NC}\n" -else - printf "PIP test ${COLOR_RED}FAILED${COLOR_NC}\n" - exit 1 -fi From 14ea32321eaffa33a506de3417659fdcfd62b7de Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Wed, 29 Jul 2020 17:29:22 -0700 Subject: [PATCH 1682/2522] [tflite] Fix null pointer dereference introduced in #41757 PiperOrigin-RevId: 323903626 Change-Id: Ibd227eab4ce0b1352536594ab5e43450ca5c4932 --- tensorflow/lite/tools/verifier.cc | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tensorflow/lite/tools/verifier.cc b/tensorflow/lite/tools/verifier.cc index 7a4a795e459..bdd4a35dac7 100644 --- a/tensorflow/lite/tools/verifier.cc +++ b/tensorflow/lite/tools/verifier.cc @@ -261,10 +261,14 @@ absl::optional VerifyAndCountSparseElements(const Tensor& tensor) { } const int block_rank = total_dims - original_rank; - const int sparsity_block_map_size = sparsity->block_map()->size(); - if (block_rank > 0 && (sparsity->block_map() == nullptr || - sparsity_block_map_size != block_rank)) { - return absl::nullopt; + if (block_rank > 0) { + if (sparsity->block_map() == nullptr) { + return absl::nullopt; + } + const int sparse_rank = sparsity->block_map()->size(); + if (sparse_rank != block_rank) { + return absl::nullopt; + } } // For a n-dimensional tensor (d0, ..., dn-1) with k-dimensional block (dn, From bb8d761d2ed00aed92fab27c285d345f9f82f992 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Jul 2020 17:47:04 -0700 Subject: [PATCH 1683/2522] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 323906299 Change-Id: I92cb913fee27809dd3693ea3a47f45462a7e4794 --- tensorflow/go/op/wrappers.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 470d8fe43ed..1bff193830a 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -18416,6 +18416,37 @@ func SparseSegmentSum(scope *Scope, data tf.Output, indices tf.Output, segment_i return op.Output(0) } +// CollectiveReduceV2Attr is an optional argument to CollectiveReduceV2. +type CollectiveReduceV2Attr func(optionalAttr) + +// CollectiveReduceV2CommunicationHint sets the optional communication_hint attribute to value. +// If not specified, defaults to "auto" +func CollectiveReduceV2CommunicationHint(value string) CollectiveReduceV2Attr { + return func(m optionalAttr) { + m["communication_hint"] = value + } +} + +// Mutually reduces multiple tensors of identical type and shape. +func CollectiveReduceV2(scope *Scope, input tf.Output, group_size tf.Output, group_key tf.Output, instance_key tf.Output, merge_op string, final_op string, optional ...CollectiveReduceV2Attr) (data tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"merge_op": merge_op, "final_op": final_op} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "CollectiveReduceV2", + Input: []tf.Input{ + input, group_size, group_key, instance_key, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Computes the sum along segments of a tensor. // // Read From 4aa666bff449a10add5f4181a7d680d08c51253a Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Wed, 29 Jul 2020 18:35:01 -0700 Subject: [PATCH 1684/2522] Rollback of rollback of enabling MLIR bridge for tf.function PiperOrigin-RevId: 323912950 Change-Id: I596ed1e1e015bf36c07a11dbac083503b70f24e7 --- .../compiler/jit/xla_kernel_creator_util.cc | 48 ++++++++++--------- tensorflow/compiler/tests/BUILD | 8 ---- tensorflow/compiler/tf2xla/xla_compiler.cc | 16 ++++++- 3 files changed, 40 insertions(+), 32 deletions(-) diff --git a/tensorflow/compiler/jit/xla_kernel_creator_util.cc b/tensorflow/compiler/jit/xla_kernel_creator_util.cc index 3cc68f2a1a4..61c89d8a67a 100644 --- a/tensorflow/compiler/jit/xla_kernel_creator_util.cc +++ b/tensorflow/compiler/jit/xla_kernel_creator_util.cc @@ -80,31 +80,35 @@ Status CreateXlaKernel(FunctionLibraryRuntime* flr, const NodeDef& node_def, // Make sure that kernels have been registered on the JIT device. XlaOpRegistry::RegisterCompilationKernels(); - RecursiveCompilabilityChecker::UncompilableNodesMap uncompilable_nodes_map; - if (!IsCompilable(flr, node_def, &uncompilable_nodes_map)) { - std::vector - uncompilable_node_info; - for (const auto& it : uncompilable_nodes_map) { - for (const auto& info : it.second.second) { - uncompilable_node_info.emplace_back(info); + + // Only check for compilability if the MLIR bridge is not enabled. + if (!GetMlirCommonFlags()->tf_mlir_enable_mlir_bridge) { + RecursiveCompilabilityChecker::UncompilableNodesMap uncompilable_nodes_map; + if (!IsCompilable(flr, node_def, &uncompilable_nodes_map)) { + std::vector + uncompilable_node_info; + for (const auto& it : uncompilable_nodes_map) { + for (const auto& info : it.second.second) { + uncompilable_node_info.emplace_back(info); + } } - } - string message = absl::StrCat( - "Function invoked by the following node is not compilable: ", - SummarizeNodeDef(node_def, /*max_inputs_in_summary=*/10), ".\n"); - absl::StrAppend(&message, "Uncompilable nodes:"); - for (const auto& node_info : uncompilable_node_info) { - string node_message = - absl::StrCat("\n", node_info.name, ": ", - node_info.uncompilable_reason, "\n", "\tStacktrace:\n"); - for (const auto& stack_frame : node_info.stack_trace) { - absl::StrAppendFormat(&node_message, "\t\tNode: %s, function: %s\n", - stack_frame.name, stack_frame.function_name); + string message = absl::StrCat( + "Function invoked by the following node is not compilable: ", + SummarizeNodeDef(node_def, /*max_inputs_in_summary=*/10), ".\n"); + absl::StrAppend(&message, "Uncompilable nodes:"); + for (const auto& node_info : uncompilable_node_info) { + string node_message = absl::StrCat("\n", node_info.name, ": ", + node_info.uncompilable_reason, "\n", + "\tStacktrace:\n"); + for (const auto& stack_frame : node_info.stack_trace) { + absl::StrAppendFormat(&node_message, "\t\tNode: %s, function: %s\n", + stack_frame.name, stack_frame.function_name); + } + absl::StrAppend(&message, node_message); } - absl::StrAppend(&message, node_message); + VLOG(1) << message; + return errors::InvalidArgument(message); } - VLOG(1) << message; - return errors::InvalidArgument(message); } // Get function body, constant args, and resource args. diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index d9450cb6364..c2b5000647d 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -123,7 +123,6 @@ tf_xla_py_test( name = "adagrad_da_test", size = "small", srcs = ["adagrad_da_test.py"], - enable_mlir_bridge = True, python_version = "PY3", tags = [ "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip @@ -161,7 +160,6 @@ tf_xla_py_test( srcs = ["add_n_test.py"], # TensorList ops are not implemented in the on-demand compilation model yet. disabled_backends = ["cpu_ondemand"], - enable_mlir_bridge = True, python_version = "PY3", tags = [ "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip @@ -689,7 +687,6 @@ tf_xla_py_test( name = "fft_test", size = "medium", srcs = ["fft_test.py"], - enable_mlir_bridge = True, python_version = "PY3", shard_count = 6, tags = [ @@ -929,7 +926,6 @@ tf_xla_py_test( name = "pooling_ops_test", size = "medium", srcs = ["pooling_ops_test.py"], - enable_mlir_bridge = True, python_version = "PY3", shard_count = 20, tags = [ @@ -1243,7 +1239,6 @@ tf_xla_py_test( name = "stack_ops_test", size = "small", srcs = ["stack_ops_test.py"], - enable_mlir_bridge = True, python_version = "PY3", tags = [ "config-cuda-only", @@ -1304,7 +1299,6 @@ tf_xla_py_test( srcs = ["tensor_array_ops_test.py"], # TensorArray ops are not implemented in the on-demand compilation model yet. disabled_backends = ["cpu_ondemand"], - enable_mlir_bridge = True, python_version = "PY3", tags = [ "config-cuda-only", @@ -1333,7 +1327,6 @@ tf_xla_py_test( srcs = ["tensor_list_ops_test.py"], # TensorList ops are not implemented in the on-demand compilation model yet. disabled_backends = ["cpu_ondemand"], - enable_mlir_bridge = True, python_version = "PY3", tags = [ "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip @@ -1889,7 +1882,6 @@ tf_xla_py_test( name = "special_math_test", size = "medium", srcs = ["special_math_test.py"], - enable_mlir_bridge = True, shard_count = 5, tags = [ "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index db54f2f6563..0045a7958b4 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/compiler/jit/defs.h" #include "tensorflow/compiler/jit/flags.h" #include "tensorflow/compiler/jit/shape_inference.h" +#include "tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h" #include "tensorflow/compiler/tf2xla/graph_compiler.h" #include "tensorflow/compiler/tf2xla/rearrange_function_argument.h" #include "tensorflow/compiler/tf2xla/shape_util.h" @@ -52,6 +53,7 @@ limitations under the License. #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/protobuf/error_codes.pb.h" +#include "tensorflow/core/protobuf/graph_debug_info.pb.h" #include "tensorflow/core/util/dump_graph.h" namespace tensorflow { @@ -726,8 +728,18 @@ Status XlaCompiler::CompileFunction( } VLOG(1) << "===================================================="; - TF_RETURN_IF_ERROR( - CompileGraph(options, function_id, std::move(graph), args, result)); + if (GetMlirCommonFlags()->tf_mlir_enable_mlir_bridge) { + VLOG(1) << "Using MLIR bridge"; + GraphDebugInfo debug_info; + TF_RETURN_IF_ERROR(CompileGraphToXlaHlo( + std::move(*graph), {args.data(), args.size()}, + options_.device_type.type_string(), options.use_tuple_arg, + *options_.flib_def, debug_info, options_.shape_representation_fn, + result)); + } else { + TF_RETURN_IF_ERROR( + CompileGraph(options, function_id, std::move(graph), args, result)); + } VLOG(1) << "===================================================="; cache_[{function_id, arg_vector}] = *result; From b6f0c84c910d3166f687eeb97c31eb5464e0a01a Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Wed, 29 Jul 2020 19:15:22 -0700 Subject: [PATCH 1685/2522] Rollback of rollback of testing tf.function(compile=True) with MLIR bridge PiperOrigin-RevId: 323917806 Change-Id: I04fd56932748ab624786f80522fced4acba2a3ee --- tensorflow/python/eager/BUILD | 12 +- .../python/eager/def_function_xla_jit_test.py | 724 ++++++++++-------- 2 files changed, 397 insertions(+), 339 deletions(-) diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index b9c43c2e4e9..7336e85b2de 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -846,18 +846,26 @@ tf_py_test( ], ) -cuda_py_test( +tf_xla_py_test( name = "def_function_xla_jit_test", srcs = ["def_function_xla_jit_test.py"], + enable_mlir_bridge = True, + enabled_backends = [ + # TODO(b/162438052): Enable the test on TPU. + "cpu", + "gpu", + ], python_version = "PY3", tags = [ "no_mac", + "no_pip", "no_windows", ], - xla_enabled = True, + use_xla_device = False, deps = [ ":backprop", ":def_function", + "//tensorflow/compiler/tests:xla_test", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", "//tensorflow/python:framework_ops", diff --git a/tensorflow/python/eager/def_function_xla_jit_test.py b/tensorflow/python/eager/def_function_xla_jit_test.py index bd7a6ec2279..3e324c90fbc 100644 --- a/tensorflow/python/eager/def_function_xla_jit_test.py +++ b/tensorflow/python/eager/def_function_xla_jit_test.py @@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.compiler.tests import xla_test from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.eager import def_function @@ -36,43 +37,45 @@ from tensorflow.python.ops import variables from tensorflow.python.platform import test -class DefFunctionTest(test.TestCase): +class DefFunctionTest(xla_test.XLATestCase): def testAutoclusteringWithTfFunction(self): + with ops.device('device:{}:0'.format(self.device)): - @def_function.function(experimental_compile=False) - def outer(a, b, c): - return a * inner(b, c) + c + @def_function.function(experimental_compile=False) + def outer(a, b, c): + return a * inner(b, c) + c - @def_function.function(experimental_compile=True) - def inner(b, c): - return b + c * b + @def_function.function(experimental_compile=True) + def inner(b, c): + return b + c * b - i1 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) - i2 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) - i3 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) + i1 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) + i2 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) + i3 = constant_op.constant([1.0, 2.0, 3.0, 4.0, 5.0]) - with context.collect_graphs(optimized=True) as graphs: - outer(i1, i2, i3) + with context.collect_graphs(optimized=True) as graphs: + outer(i1, i2, i3) - if test_util.is_xla_enabled(): - self.assertIn('_XlaRun', [n.op for n in graphs[0].node]) - else: - self.assertNotIn('_XlaRun', [n.op for n in graphs[0].node]) + if test_util.is_xla_enabled(): + self.assertIn('_XlaRun', [n.op for n in graphs[0].node]) + else: + self.assertNotIn('_XlaRun', [n.op for n in graphs[0].node]) def testBasic(self): + with ops.device('device:{}:0'.format(self.device)): - def fn(x, a): - return x + a + def fn(x, a): + return x + a - func = def_function.function(fn, experimental_compile=False) - xla_func = def_function.function(fn, experimental_compile=True) + func = def_function.function(fn, experimental_compile=False) + xla_func = def_function.function(fn, experimental_compile=True) - inputs = constant_op.constant([1, 2, 2, 3, 3]) - self.assertAllClose([2, 3, 3, 4, 4], func(inputs, 1)) - if not test.is_built_with_rocm(): - # XLA support is not yet enabled for TF ROCm - self.assertAllClose([2, 3, 3, 4, 4], xla_func(inputs, 1)) + inputs = constant_op.constant([1, 2, 2, 3, 3]) + self.assertAllClose([2, 3, 3, 4, 4], func(inputs, 1)) + if not test.is_built_with_rocm(): + # XLA support is not yet enabled for TF ROCm + self.assertAllClose([2, 3, 3, 4, 4], xla_func(inputs, 1)) def testBasicInt32(self): @@ -87,385 +90,432 @@ class DefFunctionTest(test.TestCase): self.assertAllClose([2, 3, 3, 4, 4], xla_func(inputs, 1)) def testDerivative(self): - if test.is_built_with_rocm(): - return + with ops.device('device:{}:0'.format(self.device)): + if test.is_built_with_rocm(): + return - def fn(x, a): - return 2 * x + a + def fn(x, a): + return 2 * x + a - xla_func = def_function.function(fn, experimental_compile=True) + xla_func = def_function.function(fn, experimental_compile=True) - with backprop.GradientTape() as tape: - inputs = constant_op.constant([1., 2., 2., 3., 3.]) - tape.watch(inputs) - outputs = xla_func(inputs, 1) + with backprop.GradientTape() as tape: + inputs = constant_op.constant([1., 2., 2., 3., 3.]) + tape.watch(inputs) + outputs = xla_func(inputs, 1) - self.assertAllClose([2, 2, 2, 2, 2], tape.gradient(outputs, inputs)) + self.assertAllClose([2, 2, 2, 2, 2], tape.gradient(outputs, inputs)) - # pylint: disable=protected-access - (forward, backward) = xla_func.get_concrete_function( - inputs, 1)._delayed_rewrite_functions.forward_backward() + # pylint: disable=protected-access + (forward, backward) = xla_func.get_concrete_function( + inputs, 1)._delayed_rewrite_functions.forward_backward() - # Check that the must-compile attribute gets correctly propagated to the - # created derivatives. - self.assertTrue(backward.function_def.attr['_XlaMustCompile']) - self.assertTrue(forward.definition.attr['_XlaMustCompile']) + # Check that the must-compile attribute gets correctly propagated to the + # created derivatives. + self.assertTrue(backward.function_def.attr['_XlaMustCompile']) + self.assertTrue(forward.definition.attr['_XlaMustCompile']) # Calling function with experimental_compile=True from # experimental_compile=False should compile the inner func. def testNestedCall(self): + with ops.device('device:{}:0'.format(self.device)): - def fn(x, a): - return x + a + def fn(x, a): + return x + a - xla_func = def_function.function(fn, experimental_compile=True) + xla_func = def_function.function(fn, experimental_compile=True) - def fn2(x, a): - return xla_func(x, a) + def fn2(x, a): + return xla_func(x, a) - func = def_function.function(fn2, experimental_compile=False) + func = def_function.function(fn2, experimental_compile=False) - inputs = constant_op.constant([1, 2, 2, 3, 3]) - if not test.is_built_with_rocm(): - # XLA support is not yet enabled for TF ROCm - self.assertAllClose([2, 3, 3, 4, 4], func(inputs, 1)) + inputs = constant_op.constant([1, 2, 2, 3, 3]) + if not test.is_built_with_rocm(): + # XLA support is not yet enabled for TF ROCm + self.assertAllClose([2, 3, 3, 4, 4], func(inputs, 1)) + @test_util.disable_mlir_bridge('TODO(b/162272821): MLIR bridge returns' + ' wrong status type') def testNestedCallUnsupportedOps(self): + with ops.device('device:{}:0'.format(self.device)): - def fn(x): - return array_ops.unique(x).y + def fn(x): + return array_ops.unique(x).y - xla_func = def_function.function(fn, experimental_compile=True) + xla_func = def_function.function(fn, experimental_compile=True) - def fn2(x): - return xla_func(x) + def fn2(x): + return xla_func(x) - func = def_function.function(fn2, experimental_compile=False) - inputs = constant_op.constant([1, 2, 2, 3, 3]) - if not test.is_built_with_rocm(): + func = def_function.function(fn2, experimental_compile=False) + inputs = constant_op.constant([1, 2, 2, 3, 3]) + if not test.is_built_with_rocm(): + with self.assertRaisesRegex(errors.InvalidArgumentError, + 'not compilable'): + func(inputs) + + @test_util.disable_mlir_bridge('TODO(b/162272821): MLIR bridge returns' + ' wrong status type') + def testUnsupportedOps(self): + with ops.device('device:{}:0'.format(self.device)): + + def fn(x): + return array_ops.unique(x).y # Unique is not supported by XLA + + func = def_function.function(fn, experimental_compile=False) + xla_func = def_function.function(fn, experimental_compile=True) + + inputs = constant_op.constant([1, 2, 2, 3, 3]) + self.assertAllClose([1, 2, 3], func(inputs)) with self.assertRaisesRegex(errors.InvalidArgumentError, 'not compilable'): - func(inputs) - - def testUnsupportedOps(self): - - def fn(x): - return array_ops.unique(x).y # Unique is not supported by XLA - - func = def_function.function(fn, experimental_compile=False) - xla_func = def_function.function(fn, experimental_compile=True) - - inputs = constant_op.constant([1, 2, 2, 3, 3]) - self.assertAllClose([1, 2, 3], func(inputs)) - with self.assertRaisesRegex(errors.InvalidArgumentError, 'not compilable'): - xla_func(inputs) + xla_func(inputs) def testFunctionGradient(self): - v = resource_variable_ops.ResourceVariable(2.0) + with ops.device('device:{}:0'.format(self.device)): + v = resource_variable_ops.ResourceVariable(2.0) - def fn(x): - return v * x + def fn(x): + return v * x - func = def_function.function(fn, experimental_compile=False) - xla_func = def_function.function(fn, experimental_compile=True) + func = def_function.function(fn, experimental_compile=False) + xla_func = def_function.function(fn, experimental_compile=True) - def run_and_check(test_func): - x = constant_op.constant(3.0) - with backprop.GradientTape() as tape: - y = test_func(x) - dy = tape.gradient(y, v) + def run_and_check(test_func): + x = constant_op.constant(3.0) + with backprop.GradientTape() as tape: + y = test_func(x) + dy = tape.gradient(y, v) - self.assertAllClose(6.0, y) - self.assertAllClose(3.0, dy) + self.assertAllClose(6.0, y) + self.assertAllClose(3.0, dy) - run_and_check(func) - if not test.is_built_with_rocm(): - # XLA support is not yet enabled for TF ROCm - run_and_check(xla_func) + run_and_check(func) + if not test.is_built_with_rocm(): + # XLA support is not yet enabled for TF ROCm + run_and_check(xla_func) def testControlFlow(self): - @def_function.function(experimental_compile=True) - def f(x): - assert control_flow_util.GraphOrParentsInXlaContext( - ops.get_default_graph()) - x = ops.convert_to_tensor(x) + with ops.device('device:{}:0'.format(self.device)): - def body(i, a): - return i + 1, control_flow_ops.cond(i > 2, lambda: a + (x**2), - lambda: a + 3) + @def_function.function(experimental_compile=True) + def f(x): + assert control_flow_util.GraphOrParentsInXlaContext( + ops.get_default_graph()) + x = ops.convert_to_tensor(x) - return control_flow_ops.while_loop( - lambda i, *_: i < 10, - body, (constant_op.constant(0), constant_op.constant(3.)), - maximum_iterations=10)[1] + def body(i, a): + return i + 1, control_flow_ops.cond(i > 2, lambda: a + (x**2), + lambda: a + 3) - @def_function.function(experimental_compile=True) - def g(x): - x = ops.convert_to_tensor(x) - with backprop.GradientTape() as tape: - tape.watch(x) - y = f(x) - return y, tape.gradient(y, x) + return control_flow_ops.while_loop( + lambda i, *_: i < 10, + body, (constant_op.constant(0), constant_op.constant(3.)), + maximum_iterations=10)[1] - self.assertAllClose(40.0, f(2.0)) - self.assertAllClose([40.0, 28.0], g(2.0)) + @def_function.function(experimental_compile=True) + def g(x): + x = ops.convert_to_tensor(x) + with backprop.GradientTape() as tape: + tape.watch(x) + y = f(x) + return y, tape.gradient(y, x) + + self.assertAllClose(40.0, f(2.0)) + self.assertAllClose([40.0, 28.0], g(2.0)) def testMethodCompilation(self): if test.is_built_with_rocm(): return - class C(object): + with ops.device('device:{}:0'.format(self.device)): - @def_function.function(experimental_compile=True) - def f1(self, x, a): - return x + a + class C(object): - inputs = constant_op.constant([1, 2, 2, 3, 3]) - c = C() - self.assertAllClose([2, 3, 3, 4, 4], c.f1(inputs, 1)) + @def_function.function(experimental_compile=True) + def f1(self, x, a): + return x + a + inputs = constant_op.constant([1, 2, 2, 3, 3]) + c = C() + self.assertAllClose([2, 3, 3, 4, 4], c.f1(inputs, 1)) + + @test_util.disable_mlir_bridge('TODO(b/162272821): MLIR bridge returns ' + ' wrong status type') def testMethodCompilationUnsupportedFunc(self): if test.is_built_with_rocm(): return - class C(object): + with ops.device('device:{}:0'.format(self.device)): - @def_function.function(experimental_compile=True) - def f1(self, x): - return array_ops.unique(x).y + class C(object): - inputs = constant_op.constant([1, 2, 2, 3, 3]) - c = C() - with self.assertRaisesRegex(errors.InvalidArgumentError, 'not compilable'): - c.f1(inputs) + @def_function.function(experimental_compile=True) + def f1(self, x): + return array_ops.unique(x).y + + inputs = constant_op.constant([1, 2, 2, 3, 3]) + c = C() + with self.assertRaisesRegex(errors.InvalidArgumentError, + 'not compilable'): + c.f1(inputs) def testMustBeConstantPropagation(self): - if test.is_built_with_rocm(): - return - - @def_function.function(experimental_compile=True) - def f(): - return constant_op.constant([0, 2, 1], dtype=dtypes.int32) - - @def_function.function(experimental_compile=True) - def g(a, b): - return array_ops.transpose(a, b) - - @def_function.function - def z(): - return g(array_ops.ones([3, 4, 3], dtype=dtypes.float32), f()) - - z() - - def testArgMinMax(self): - - @def_function.function(experimental_compile=True) - def argmax(x): - return math_ops.argmax(x) - - @def_function.function(experimental_compile=True) - def argmin(x): - return math_ops.argmin(x) - - self.assertAllClose(0, argmax(array_ops.ones([10], dtype=dtypes.float32))) - self.assertAllClose(0, argmax(array_ops.ones([10]))) - self.assertAllClose(0, argmin(array_ops.ones([10], dtype=dtypes.float32))) - self.assertAllClose(0, argmin(array_ops.ones([10]))) - - def testErrorMessagePassingTensorArray(self): - - @def_function.function(experimental_compile=True) - def f(x): - ta = tensor_array_ops.TensorArray( - dtype=dtypes.float32, size=1, element_shape=[]) - ta = ta.write(0, 2 * x) - y = ta.read(0) - return y - - x = constant_op.constant(3.14) - with backprop.GradientTape() as tape: - tape.watch(x) - with self.assertRaisesRegex(errors.UnimplementedError, - 'TensorList crossing the XLA/TF boundary'): - y = f(x) - tape.gradient(y, x) - - def testTensorListConcatV2(self): - - def f(x): - ta = tensor_array_ops.TensorArray( - dtype=dtypes.float32, size=2, element_shape=[3]) - ta = ta.write(0, 2 * x) - ta = ta.write(1, 3 * x) - return ta.concat() - - compiled_f = def_function.function(experimental_compile=True)(f) - - inputs = constant_op.constant([3.14, 2.68, 7.69]) - - self.assertAllClose([6.28, 5.36, 15.38, 9.42, 8.04, 23.07], f(inputs)) - - self.assertAllClose(compiled_f(inputs), f(inputs)) - - def testTensorListConcatV2Multidim(self): - - def f(x): - ta = tensor_array_ops.TensorArray( - dtype=dtypes.float32, size=2, element_shape=[3, 2]) - ta = ta.write(0, 2 * x) - ta = ta.write(1, 3 * x) - return ta.concat() - - compiled_f = def_function.function(experimental_compile=True)(f) - - inputs = constant_op.constant([[3.14, 21.1], [2.68, 22.2], [7.69, 23.3]]) - self.assertAllClose(f(inputs), compiled_f(inputs)) - - def testTensorListConcatV2Scalars(self): - - def f(x): - ta = tensor_array_ops.TensorArray( - dtype=dtypes.float32, size=2, element_shape=[1]) - ta = ta.write(0, 2 * x) - ta = ta.write(1, 3 * x) - return ta.concat() - - compiled_f = def_function.function(experimental_compile=True)(f) - inputs = constant_op.constant([3.14]) - self.assertAllClose(f(inputs), compiled_f(inputs)) - - def testTensorListConcatGrad(self): - - def f(x): - ta = tensor_array_ops.TensorArray( - dtype=dtypes.float32, size=2, element_shape=[3]) - ta = ta.write(0, 2 * x) - ta = ta.write(1, 3 * x) - return ta.concat() - - def g(): - x = constant_op.constant([3.14, 2.68, 7.69]) - with backprop.GradientTape() as tape: - tape.watch(x) - y = f(x) - return tape.gradient(y, x) - - compiled_g = def_function.function(experimental_compile=True)(g) - - self.assertAllClose([5.0, 5.0, 5.0], g()) - self.assertAllClose(compiled_g(), g()) - - def testTensorListConcatGradNestedCompile(self): - - @def_function.function(experimental_compile=True) - def f(x): - ta = tensor_array_ops.TensorArray( - dtype=dtypes.float32, size=2, element_shape=[3]) - ta = ta.write(0, 2 * x) - ta = ta.write(1, 3 * x) - return ta.concat() - - @def_function.function(experimental_compile=True) - def g(): - x = constant_op.constant([3.14, 2.68, 7.69]) - with backprop.GradientTape() as tape: - tape.watch(x) - y = f(x) - out = tape.gradient(y, x) - return out - - self.assertAllClose([5.0, 5.0, 5.0], g()) - - def testCumsum(self): - - @def_function.function(experimental_compile=True) - def f(x): - return math_ops.cumsum(x) - - f64_input = constant_op.constant([1.1, 2.2, 3.3], dtype=dtypes.float64) - self.assertAllClose([1.1, 3.3, 6.6], f(f64_input)) - - def testNoExcessiveRetracing(self): - inner_retracings = 0 - - @def_function.function(experimental_compile=True) - def inner(a, b): - nonlocal inner_retracings - inner_retracings += 1 - return a * b + a - - def outer(a, b): - return inner(a, b) - - func_input = random_ops.random_normal([10, 10]) - for _ in range(2): - def_function.function(outer)(func_input, func_input) - - self.assertEqual(inner_retracings, 1) - - def testUpdateVariable(self): - v = variables.Variable(3.1) - - @def_function.function(experimental_compile=True) - def update_var(a, b): - v.assign_add(a * b) - - update_var(constant_op.constant(0.7), constant_op.constant(0.6)) - self.assertAllClose(v, 3.52) - - def testUpdateVariableVector(self): - v = variables.Variable([3.1, 3.1]) - - @def_function.function(experimental_compile=True) - def update_var(a, b): - v.assign_add(a * b) - - update_var( - constant_op.constant([0.7, 0.7]), constant_op.constant([0.6, 0.6])) - self.assertAllClose(v, [3.52, 3.52]) - - def testUpdateVariableInClass(self): - - class C(object): + with ops.device('device:{}:0'.format(self.device)): + if test.is_built_with_rocm(): + return @def_function.function(experimental_compile=True) - def update_var(self, a, b): - if not hasattr(self, 'v'): - self.v = variables.Variable(3.1) - self.v.assign_add(a * b) + def f(): + return constant_op.constant([0, 2, 1], dtype=dtypes.int32) - c = C() + @def_function.function(experimental_compile=True) + def g(a, b): + return array_ops.transpose(a, b) - @def_function.function - def outer(): - c.update_var(constant_op.constant(0.7), constant_op.constant(0.6)) + @def_function.function + def z(): + return g(array_ops.ones([3, 4, 3], dtype=dtypes.float32), f()) - outer() - self.assertAllClose(c.v, 3.52) + z() + + @test_util.disable_mlir_bridge('TODO(b/162271237): argmax gives different' + ' results in MLIR-based bridge') + def testArgMinMax(self): + with ops.device('device:{}:0'.format(self.device)): + + @def_function.function(experimental_compile=True) + def argmax(x): + return math_ops.argmax(x) + + @def_function.function(experimental_compile=True) + def argmin(x): + return math_ops.argmin(x) + + self.assertAllClose(0, argmax(array_ops.ones([10], dtype=dtypes.float32))) + self.assertAllClose(0, argmax(array_ops.ones([10]))) + self.assertAllClose(0, argmin(array_ops.ones([10], dtype=dtypes.float32))) + self.assertAllClose(0, argmin(array_ops.ones([10]))) + + @test_util.disable_mlir_bridge('TensorArray support not implemented') + def testErrorMessagePassingTensorArray(self): + with ops.device('device:{}:0'.format(self.device)): + + @def_function.function(experimental_compile=True) + def f(x): + ta = tensor_array_ops.TensorArray( + dtype=dtypes.float32, size=1, element_shape=[]) + ta = ta.write(0, 2 * x) + y = ta.read(0) + return y + + x = constant_op.constant(3.14) + with backprop.GradientTape() as tape: + tape.watch(x) + with self.assertRaisesRegex(errors.UnimplementedError, + 'TensorList crossing the XLA/TF boundary'): + y = f(x) + tape.gradient(y, x) + + @test_util.disable_mlir_bridge('TODO(b/162281863): MLIR bridge errors out' + ' lowering TensorListConcatV2') + def testTensorListConcatV2(self): + with ops.device('device:{}:0'.format(self.device)): + + def f(x): + ta = tensor_array_ops.TensorArray( + dtype=dtypes.float32, size=2, element_shape=[3]) + ta = ta.write(0, 2 * x) + ta = ta.write(1, 3 * x) + return ta.concat() + + compiled_f = def_function.function(experimental_compile=True)(f) + + inputs = constant_op.constant([3.14, 2.68, 7.69]) + + self.assertAllClose([6.28, 5.36, 15.38, 9.42, 8.04, 23.07], f(inputs)) + + self.assertAllClose(compiled_f(inputs), f(inputs)) + + @test_util.disable_mlir_bridge('TODO(b/162281863): MLIR bridge errors out' + ' lowering TensorListConcatV2') + def testTensorListConcatV2Multidim(self): + with ops.device('device:{}:0'.format(self.device)): + + def f(x): + ta = tensor_array_ops.TensorArray( + dtype=dtypes.float32, size=2, element_shape=[3, 2]) + ta = ta.write(0, 2 * x) + ta = ta.write(1, 3 * x) + return ta.concat() + + compiled_f = def_function.function(experimental_compile=True)(f) + + inputs = constant_op.constant([[3.14, 21.1], [2.68, 22.2], [7.69, 23.3]]) + self.assertAllClose(f(inputs), compiled_f(inputs)) + + @test_util.disable_mlir_bridge('TODO(b/162281863): MLIR bridge errors out' + ' lowering TensorListConcatV2') + def testTensorListConcatV2Scalars(self): + with ops.device('device:{}:0'.format(self.device)): + + def f(x): + ta = tensor_array_ops.TensorArray( + dtype=dtypes.float32, size=2, element_shape=[1]) + ta = ta.write(0, 2 * x) + ta = ta.write(1, 3 * x) + return ta.concat() + + compiled_f = def_function.function(experimental_compile=True)(f) + inputs = constant_op.constant([3.14]) + self.assertAllClose(f(inputs), compiled_f(inputs)) + + @test_util.disable_mlir_bridge('TODO(b/162281863): MLIR bridge errors out' + ' lowering TensorListConcatV2') + def testTensorListConcatGrad(self): + with ops.device('device:{}:0'.format(self.device)): + + def f(x): + ta = tensor_array_ops.TensorArray( + dtype=dtypes.float32, size=2, element_shape=[3]) + ta = ta.write(0, 2 * x) + ta = ta.write(1, 3 * x) + return ta.concat() + + def g(): + x = constant_op.constant([3.14, 2.68, 7.69]) + with backprop.GradientTape() as tape: + tape.watch(x) + y = f(x) + return tape.gradient(y, x) + + compiled_g = def_function.function(experimental_compile=True)(g) + + self.assertAllClose([5.0, 5.0, 5.0], g()) + self.assertAllClose(compiled_g(), g()) + + @test_util.disable_mlir_bridge('TODO(b/162281863): MLIR bridge errors out' + ' lowering TensorListConcatV2') + def testTensorListConcatGradNestedCompile(self): + with ops.device('device:{}:0'.format(self.device)): + + @def_function.function(experimental_compile=True) + def f(x): + ta = tensor_array_ops.TensorArray( + dtype=dtypes.float32, size=2, element_shape=[3]) + ta = ta.write(0, 2 * x) + ta = ta.write(1, 3 * x) + return ta.concat() + + @def_function.function(experimental_compile=True) + def g(): + x = constant_op.constant([3.14, 2.68, 7.69]) + with backprop.GradientTape() as tape: + tape.watch(x) + y = f(x) + out = tape.gradient(y, x) + return out + + self.assertAllClose([5.0, 5.0, 5.0], g()) + + def testCumsum(self): + with ops.device('device:{}:0'.format(self.device)): + + @def_function.function(experimental_compile=True) + def f(x): + return math_ops.cumsum(x) + + f64_input = constant_op.constant([1.1, 2.2, 3.3], dtype=dtypes.float64) + self.assertAllClose([1.1, 3.3, 6.6], f(f64_input)) + + def testNoExcessiveRetracing(self): + with ops.device('device:{}:0'.format(self.device)): + inner_retracings = 0 + + @def_function.function(experimental_compile=True) + def inner(a, b): + nonlocal inner_retracings + inner_retracings += 1 + return a * b + a + + def outer(a, b): + return inner(a, b) + + func_input = random_ops.random_normal([10, 10]) + for _ in range(2): + def_function.function(outer)(func_input, func_input) + + self.assertEqual(inner_retracings, 1) + + def testUpdateVariable(self): + with ops.device('device:{}:0'.format(self.device)): + v = variables.Variable(3.1) + + @def_function.function(experimental_compile=True) + def update_var(a, b): + v.assign_add(a * b) + + update_var(constant_op.constant(0.7), constant_op.constant(0.6)) + self.assertAllClose(v, 3.52) + + def testUpdateVariableVector(self): + with ops.device('device:{}:0'.format(self.device)): + v = variables.Variable([3.1, 3.1]) + + @def_function.function(experimental_compile=True) + def update_var(a, b): + v.assign_add(a * b) + + update_var( + constant_op.constant([0.7, 0.7]), constant_op.constant([0.6, 0.6])) + self.assertAllClose(v, [3.52, 3.52]) + + def testUpdateVariableInClass(self): + with ops.device('device:{}:0'.format(self.device)): + + class C(object): + + @def_function.function(experimental_compile=True) + def update_var(self, a, b): + if not hasattr(self, 'v'): + self.v = variables.Variable(3.1) + self.v.assign_add(a * b) + + c = C() + + @def_function.function + def outer(): + c.update_var(constant_op.constant(0.7), constant_op.constant(0.6)) + + outer() + self.assertAllClose(c.v, 3.52) def testUpdateVariableMultipleOutputs(self): - v = variables.Variable(3.1) + with ops.device('device:{}:0'.format(self.device)): + v = variables.Variable(3.1) - @def_function.function(experimental_compile=True) - def update_var(a, b): - v.assign_add(a * b) - return a * b + v + @def_function.function(experimental_compile=True) + def update_var(a, b): + v.assign_add(a * b) + return a * b + v - out = update_var(constant_op.constant(0.7), constant_op.constant(0.6)) - self.assertAllClose(v, 3.52) - self.assertAllClose(out, 3.94) + out = update_var(constant_op.constant(0.7), constant_op.constant(0.6)) + self.assertAllClose(v, 3.52) + self.assertAllClose(out, 3.94) def testReturnIdentity(self): + with ops.device('device:{}:0'.format(self.device)): - @def_function.function(experimental_compile=True) - def f(a, b): - return (a, b) + @def_function.function(experimental_compile=True) + def f(a, b): + return (a, b) - a = constant_op.constant([0.7]) - b = constant_op.constant([0.6]) + a = constant_op.constant([0.7]) + b = constant_op.constant([0.6]) - f(a, b) + f(a, b) if __name__ == '__main__': From 040d0354124adf1f47e616bda0858a39014c934a Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Wed, 29 Jul 2020 19:34:10 -0700 Subject: [PATCH 1686/2522] Delete mirrored_function_strategy and its test PiperOrigin-RevId: 323919899 Change-Id: I007ba0c224d81b33ccaf6f6d5d38b53c5617cef8 --- tensorflow/python/distribute/BUILD | 38 ---- .../distribute/mirrored_function_strategy.py | 199 ------------------ .../mirrored_function_strategy_test.py | 102 --------- 3 files changed, 339 deletions(-) delete mode 100644 tensorflow/python/distribute/mirrored_function_strategy.py delete mode 100644 tensorflow/python/distribute/mirrored_function_strategy_test.py diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index c802a5b47e6..11d1be1bdc3 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -434,44 +434,6 @@ py_library( ], ) -py_library( - name = "mirrored_function_strategy", - srcs = ["mirrored_function_strategy.py"], - deps = [ - ":device_util", - ":distribute_lib", - ":mirrored_strategy", - ":numpy_dataset", - ":values", - "//tensorflow/python:constant_op", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:tensor_spec", - "//tensorflow/python:type_spec", - "//tensorflow/python:util", - "//tensorflow/python/eager:context", - "//tensorflow/python/eager:def_function", - ], -) - -tf_py_test( - name = "mirrored_function_strategy_test", - srcs = ["mirrored_function_strategy_test.py"], - python_version = "PY3", - tags = ["no_pip"], - deps = [ - ":distribute_lib", - ":mirrored_function_strategy", - ":strategy_combinations", - ":values", - "//tensorflow/python:constant_op", - "//tensorflow/python:framework_ops", - "//tensorflow/python:tensor_util", - "//tensorflow/python/eager:def_function", - "//tensorflow/python/eager:test", - ], -) - py_library( name = "multi_worker_util", srcs = [ diff --git a/tensorflow/python/distribute/mirrored_function_strategy.py b/tensorflow/python/distribute/mirrored_function_strategy.py deleted file mode 100644 index 57e2f2e5c69..00000000000 --- a/tensorflow/python/distribute/mirrored_function_strategy.py +++ /dev/null @@ -1,199 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Class MirroredFunctionStrategy implementing tf.distribute.Strategy.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import threading - -from tensorflow.python.distribute import device_util -from tensorflow.python.distribute import distribute_lib -from tensorflow.python.distribute import distribute_utils -from tensorflow.python.distribute import distribution_strategy_context -from tensorflow.python.distribute import mirrored_strategy -from tensorflow.python.distribute import values -from tensorflow.python.eager import context -from tensorflow.python.eager import def_function -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_spec -from tensorflow.python.util import nest - - -_replica_index = threading.local() -_replica_id_key = object() - - -def _replica_id_tensor(): - return ops.get_default_graph().capture_call_time_value( - closure=lambda: constant_op.constant(_replica_index.current), - spec=tensor_spec.TensorSpec((), dtypes.int32), - key=_replica_id_key) - - -def _in_run(): - return (hasattr(_replica_index, "current") and - _replica_index.current is not None) - - -def _outside_run_graph(): - if hasattr(_replica_index, "graph_outside_run"): - return _replica_index.graph_outside_run - else: - return None - - -class MirroredFunctionStrategy(distribute_lib.Strategy): - """Mirrors vars to distribute across multiple devices and machines. - - This strategy uses one replica per device and sync replication for its - multi-GPU version. Unlike `tf.distribute.MirroredStrategy`, it creates a - function for a single replica, and calls that function repeatedly instead of - recording the operations for each replica separately. - """ - - def __init__(self, devices=None): - """Create an instance of `MirroredFunctionStrategy`. - - Args: - devices: a list of device strings. If `None`, all available GPUs are - used. If no GPUs are found, CPU is used. - """ - extended = MirroredFunctionExtended(self, devices) - super(MirroredFunctionStrategy, self).__init__(extended) - - -# TODO(josh11b): Switch to V2 when we no longer need to support tf.compat.v1. -class MirroredFunctionExtended(distribute_lib.StrategyExtendedV1): - """Implementation of MirroredFunctionStrategy.""" - - def __init__(self, container_strategy, devices): - super(MirroredFunctionExtended, self).__init__(container_strategy) - if devices is None: - devices = mirrored_strategy.all_devices() - if not devices: - raise ValueError("Got an empty `devices` list. Please make sure the " - "`devices` you pass in is not empty.") - device_tuple = tuple(device_util.resolve(d) for d in devices) - assert len(set(device_tuple)) == len(device_tuple), ( - "No duplicates allowed in `devices` argument: %s" % (devices,)) - self._devices = device_tuple - self._retrace_functions_for_each_device = False - - def _call_for_each_replica(self, fn, args, kwargs): - # For now, `fn` must be an @tf.function. - # TODO(josh11b): Relax this restriction? Main problem is if - # (a) executing eagerly, (b) `fn` not @tf.function, and - # (c) executed frequently. - assert isinstance(fn, def_function.Function) - - if _outside_run_graph() is not None: - # Nested case, should just use outer function's context for things like - # the current replica index. - # TODO(josh11b): Test this case! - with MirroredFunctionReplicaContext(self._container_strategy()): - results = fn(*nest.map_structure(_unwrap_tensors, args), - **nest.map_structure(_unwrap_tensors, kwargs)) - return nest.map_structure(_wrap_tensors, results) - - _replica_index.graph_outside_run = ops.get_default_graph() - return_values = [] - - try: - with MirroredFunctionReplicaContext(self._container_strategy()): - for index, device in enumerate(self._devices): - _replica_index.current = index - with ops.device(device): - if context.executing_eagerly(): - # NOTE: These functions need to execute concurrently if they - # use a collective op. This is a particular concern with eager - # execution. - with context.execution_mode(context.ASYNC): - return_values.append( - fn(*distribute_utils.select_replica(index, args), - **distribute_utils.select_replica(index, kwargs))) - else: - return_values.append( - fn(*distribute_utils.select_replica(index, args), - **distribute_utils.select_replica(index, kwargs))) - finally: - _replica_index.graph_outside_run = None - _replica_index.current = None - - return distribute_utils.regroup(return_values) - - def _local_results(self, val): - if isinstance(val, values.DistributedValues): - return val.values - return (val,) - - -class FnMergedValue(object): - - def __init__(self, value): - self._value = value - - -def _wrap_tensors(maybe_tensor): - if isinstance(maybe_tensor, ops.Tensor): # TODO(josh11b): or composite tensor? - return FnMergedValue(maybe_tensor) - return maybe_tensor - - -def _unwrap_tensors(maybe_wrapped): - if isinstance(maybe_wrapped, FnMergedValue): - return maybe_wrapped._value # pylint: disable=protected-access - return maybe_wrapped - - -class MirroredFunctionReplicaContext(distribute_lib.ReplicaContext): - """ReplicaContext used in MirroredFunctionStrategy.""" - - def __init__(self, strategy): - distribute_lib.ReplicaContext.__init__(self, strategy, None) - - @property - def replica_id_in_sync_group(self): - return _replica_id_tensor() - - @replica_id_in_sync_group.setter - def replica_id_in_sync_group(self, value): - assert value is None - - def _merge_call(self, merge_fn, args, kwargs): - # We wrap all args/kwargs with tensor values in a class that prevents them - # for being used by anything other than MirroredFunctionStrategy APIs that - # have been specifically written to recognize the wrapper and unwrap the - # values (such as extended.reduce_to/update). - - # TODO(josh11b): Should these set expand_composites=True? - args = nest.map_structure(_wrap_tensors, args) - kwargs = nest.map_structure(_wrap_tensors, kwargs) - # pylint: disable=protected-access - distribution_strategy_context._push_per_thread_mode( - distribution_strategy_context._CrossReplicaThreadMode(self._strategy)) - try: - results = merge_fn(self._strategy, *args, **kwargs) - finally: - distribution_strategy_context._pop_per_thread_mode() - # pylint: enable=protected-access - return nest.map_structure(_unwrap_tensors, results) - - @property - def devices(self): - raise RuntimeError("Can't get the devices for the current replica.") diff --git a/tensorflow/python/distribute/mirrored_function_strategy_test.py b/tensorflow/python/distribute/mirrored_function_strategy_test.py deleted file mode 100644 index c883241114e..00000000000 --- a/tensorflow/python/distribute/mirrored_function_strategy_test.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for MirroredFunctionStrategy.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.distribute import distribution_strategy_context -from tensorflow.python.distribute import mirrored_function_strategy -from tensorflow.python.distribute import strategy_combinations -from tensorflow.python.distribute import values -from tensorflow.python.eager import def_function -from tensorflow.python.eager import test -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_util - - -class MirroredFunctionStrategyTest(test.TestCase): - - def setUp(self): - super(MirroredFunctionStrategyTest, self).setUp() - strategy_combinations.set_virtual_cpus_to_at_least(3) - self._strategy = mirrored_function_strategy.MirroredFunctionStrategy( - devices=("/cpu:1", "/cpu:2")) - - def testReplicaId(self): - f_traces = [] - - @def_function.function - def f(x): - f_traces.append(None) # Only happens on trace. - replica_context = distribution_strategy_context.get_replica_context() - # This is a non-constant tensor. - replica_id = replica_context.replica_id_in_sync_group - self.assertIsInstance(replica_id, ops.Tensor) - self.assertIsNone(tensor_util.constant_value(replica_id)) - return x + replica_id - - one = constant_op.constant(1) - self.assertLen(f_traces, 0) - result1 = self._strategy.run(f, args=(one,)) - self.assertLen(f_traces, 1) # Function traced once, not for each replica. - # Returns a per-replica value. - self.assertIsInstance(result1, values.PerReplica) - self.assertAllEqual([1, 2], - self._strategy.experimental_local_results(result1)) - - # Try passing a per-replica value as an argument. - result2 = self._strategy.run(f, args=(result1,)) - self.assertLen(f_traces, 1) - self.assertIsInstance(result2, values.PerReplica) - self.assertAllEqual([1, 3], - self._strategy.experimental_local_results(result2)) - - def testMergeCall(self): - f_traces = [] - g_traces = [] - - def g(strategy, z): - g_traces.append(None) # Only happens on trace. - self.assertIs(strategy, self._strategy) - self.assertTrue(distribution_strategy_context.in_cross_replica_context()) - self.assertIsInstance(z, mirrored_function_strategy.FnMergedValue) - return z - - @def_function.function - def f(x): - f_traces.append(None) # Only happens on trace. - replica_context = distribution_strategy_context.get_replica_context() - y = replica_context.merge_call(g, args=(x,)) - self.assertIsInstance(y, ops.Tensor) - return y - - one = constant_op.constant(1) - self.assertLen(f_traces, 0) - self.assertLen(g_traces, 0) - result = self._strategy.run(f, args=(one,)) - # Functions traced once, not for each replica. - self.assertLen(f_traces, 1) - self.assertLen(g_traces, 1) - # Returns a per-replica value. - self.assertIsInstance(result, values.PerReplica) - self.assertAllEqual([1, 1], - self._strategy.experimental_local_results(result)) - - -if __name__ == "__main__": - test.main() From 3ee58683132495864102fb08cd2b1caf56377a93 Mon Sep 17 00:00:00 2001 From: Chao Mei Date: Wed, 29 Jul 2020 19:40:02 -0700 Subject: [PATCH 1687/2522] 1. Create an external delegate adaptor to illustrate the use of external delegate as an alternative way for testing, benchmarking and evaluation. 2. Fixed a memory bug in parsing and using external delegate options in external delegate provider. PiperOrigin-RevId: 323920482 Change-Id: Id258ccd48c924dc4b438293d2dd6776285958d81 --- .../lite/delegates/utils/dummy_delegate/BUILD | 15 +++ .../delegates/utils/dummy_delegate/README.md | 76 +++++++++++-- .../external_delegate_adaptor.cc | 106 ++++++++++++++++++ .../delegates/external_delegate_provider.cc | 16 ++- 4 files changed, 200 insertions(+), 13 deletions(-) create mode 100644 tensorflow/lite/delegates/utils/dummy_delegate/external_delegate_adaptor.cc diff --git a/tensorflow/lite/delegates/utils/dummy_delegate/BUILD b/tensorflow/lite/delegates/utils/dummy_delegate/BUILD index a451b51e47f..fd4e6e9eedb 100644 --- a/tensorflow/lite/delegates/utils/dummy_delegate/BUILD +++ b/tensorflow/lite/delegates/utils/dummy_delegate/BUILD @@ -22,6 +22,21 @@ cc_library( ], ) +cc_binary( + name = "dummy_external_delegate.so", + srcs = [ + "external_delegate_adaptor.cc", + ], + linkshared = 1, + linkstatic = 1, + deps = [ + ":dummy_delegate", + "//tensorflow/lite/c:common", + "//tensorflow/lite/tools:command_line_flags", + "//tensorflow/lite/tools:logging", + ], +) + #### The following are for using the dummy test delegate in TFLite tooling #### cc_library( name = "dummy_delegate_provider", diff --git a/tensorflow/lite/delegates/utils/dummy_delegate/README.md b/tensorflow/lite/delegates/utils/dummy_delegate/README.md index e77d92b9d8a..ae17f1b67d3 100644 --- a/tensorflow/lite/delegates/utils/dummy_delegate/README.md +++ b/tensorflow/lite/delegates/utils/dummy_delegate/README.md @@ -20,18 +20,32 @@ the ideas above. For more sophisticated examples, refer to [Flex delegate](https ## Testing & Tooling -We recommend levaraging the -[delegate registrar](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/delegates) -to plug in the newly created TFLite delegate to reuse existing TFLite kernel -tests and utility tools including the model benchmark tool and the task -evaluation tools. In short, create a delegate provider like the -[`dummy_delegate_provider`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/utils/dummy_delegate/dummy_delegate_provider.cc) +There are currently **two optionss** to plug in a newly created TFLite delegate +to reuse existing TFLite kernel tests and and tooling: + +- Utilize the **[delegate registrar](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/delegates)** +mechansim +- Utilize the +**[external delegate](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/delegates/external)** +mechanism. + +The former approach requires few changes as detailed below. The latter one +requires even fewer changes and works with pre-built Tensorflow Lite tooling +binaries. However, it is less explicit and it might be more complicated to set +up in automated integration tests. Therefore, for better clarity, the +delegate-registrar approach is slightly preferred here. + +We now describe each option above in more details in the following sections. + +### Option 1: Utilize Delegate Registrar +In this approach, create a delegate provider like the +[`dummy_delegate_provider.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/utils/dummy_delegate/dummy_delegate_provider.cc) here, and then add it as an extra dependency when building the binary. Refer [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/delegates) -for more delegate provider examples. The following details the above in the -context of this dummy delegate. +for more delegate provider examples. Now we look at using this provider for +testing and evaluation. -###Kernel Tests +#### Kernel Tests Tests referred here are defined in [tensorflow/lite/kernels](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/kernels). They are based on the [test_util library](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/kernels/test_util.h) @@ -64,12 +78,12 @@ bazel build -c opt tensorflow/lite/kernels:add_test bazel-bin/tensorflow/lite/kernels/add_test --use_dummy_delegate=true ``` -### Benchmark and Task Evaluation Tools +#### Benchmark and Task Evaluation Tools In TFLite, we have developed [model benchmark tool](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/benchmark) and -[task evaluation tools](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/evaluation/tasks) +[evaluation tools](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/evaluation/tasks) that already have integrated existing various TFLite delegates. To reuse these tools for the new delegate, similar to the kernel testing above, we simply add the created delegate provider as an additional dependency when building the @@ -107,4 +121,44 @@ bazel-bin/tensorflow/lite/delegates/utils/dummy_delegate/benchmark_model_plus_du ``` +### Option 2: Utilize Tensorflow Lite External Delegate +In this **alternative approach to reuse existing Tensorflow Lite kernel testing +and tooling**, we first create an external delegate adaptor like the [`external_delegate_adaptor.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/utils/dummy_delegate/external_delegate_adaptor.cc) here, and create the corresponding BUILD target +to build a dynamic library. + +Afterwards, one could build binaries or use pre-built ones that are linked with +the +[`external_delegate_provider`](https://github.com/tensorflow/tensorflow/blob/8c6f2d55762f3fc94f98fdd8b3c5d59ee1276dba/tensorflow/lite/tools/delegates/BUILD#L145-L159) +library which supports command-line flags as described +[here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/delegates#external-delegate-provider). +Note this delegate provider has already been linked to existing testing and +tooling binaries. + +For example, the following illustrates how to benchmark the dummy delegate here +via this external-delegate approach. We could use similar commands for testing +and evaluation tools. + +``` +bazel build -c opt tensorflow/lite/delegates/utils/dummy_delegate:dummy_external_delegate.so + +# Copy the .so file to the directory that the external delegate will be loaded +# from at your choice. +cp bazel-bin/tensorflow/lite/delegates/utils/dummy_delegate/dummy_external_delegate.so /tmp + +bazel build -c opt tensorflow/lite/tools/benchmark:benchmark_model + +# Setting a non-empty --external_delegate_path value will trigger applying +# the external delegate during runtime. +bazel-bin/tensorflow/lite/tools/benchmark/benchmark_model \ + --graph=/tmp/mobilenet-v2.tflite \ + --external_delegate_path=/tmp/dummy_external_delegate.so \ + --external_delegate_options='error_during_init:true;error_during_prepare:true' +``` + +It is worth noting the *external delegate* is the corresponding C++ +implementation of the *delegate* in Tensorflow Lite Python binding as shown +[here](https://github.com/tensorflow/tensorflow/blob/7145fc0e49be01ef6943f4df386ce38567e37797/tensorflow/lite/python/interpreter.py#L42). +Therefore, the dynamic external delegate adaptor library created here could be +directly used with Tensorflow Lite Python APIs. + More detailed guide on TFLite delegate is coming soon. diff --git a/tensorflow/lite/delegates/utils/dummy_delegate/external_delegate_adaptor.cc b/tensorflow/lite/delegates/utils/dummy_delegate/external_delegate_adaptor.cc new file mode 100644 index 00000000000..7ae6539e9ba --- /dev/null +++ b/tensorflow/lite/delegates/utils/dummy_delegate/external_delegate_adaptor.cc @@ -0,0 +1,106 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include + +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/delegates/utils/dummy_delegate/dummy_delegate.h" +#include "tensorflow/lite/tools/command_line_flags.h" +#include "tensorflow/lite/tools/logging.h" + +namespace tflite { +namespace tools { + +TfLiteDelegate* CreateDummyDelegateFromOptions(char** options_keys, + char** options_values, + size_t num_options) { + DummyDelegateOptions options = TfLiteDummyDelegateOptionsDefault(); + + // Parse key-values options to DummyDelegateOptions by mimicking them as + // command-line flags. + std::unique_ptr argv = + std::unique_ptr(new const char*[num_options + 1]); + constexpr char kDummyDelegateParsing[] = "dummy_delegate_parsing"; + argv.get()[0] = kDummyDelegateParsing; + + std::vector option_args; + option_args.reserve(num_options); + for (int i = 0; i < num_options; ++i) { + option_args.emplace_back("--"); + option_args.rbegin()->append(options_keys[i]); + option_args.rbegin()->push_back('='); + option_args.rbegin()->append(options_values[i]); + argv.get()[i + 1] = option_args.rbegin()->c_str(); + } + + constexpr char kAllowedBuiltinOp[] = "allowed_builtin_code"; + constexpr char kReportErrorDuingInit[] = "error_during_init"; + constexpr char kReportErrorDuingPrepare[] = "error_during_prepare"; + constexpr char kReportErrorDuingInvoke[] = "error_during_invoke"; + + std::vector flag_list = { + tflite::Flag::CreateFlag(kAllowedBuiltinOp, &options.allowed_builtin_code, + "Allowed builtin code."), + tflite::Flag::CreateFlag(kReportErrorDuingInit, + &options.error_during_init, + "Report error during init."), + tflite::Flag::CreateFlag(kReportErrorDuingPrepare, + &options.error_during_prepare, + "Report error during prepare."), + tflite::Flag::CreateFlag(kReportErrorDuingInvoke, + &options.error_during_invoke, + "Report error during invoke."), + }; + + int argc = num_options + 1; + if (!tflite::Flags::Parse(&argc, argv.get(), flag_list)) { + return nullptr; + } + + TFLITE_LOG(INFO) << "Dummy delegate: allowed_builtin_code set to " + << options.allowed_builtin_code << "."; + TFLITE_LOG(INFO) << "Dummy delegate: error_during_init set to " + << options.error_during_init << "."; + TFLITE_LOG(INFO) << "Dummy delegate: error_during_prepare set to " + << options.error_during_prepare << "."; + TFLITE_LOG(INFO) << "Dummy delegate: error_during_invoke set to " + << options.error_during_invoke << "."; + + return TfLiteDummyDelegateCreate(&options); +} + +} // namespace tools +} // namespace tflite + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +// Defines two symbols that need to be exported to use the TFLite external +// delegate. See tensorflow/lite/delegates/external for details. +TFL_CAPI_EXPORT TfLiteDelegate* tflite_plugin_create_delegate( + char** options_keys, char** options_values, size_t num_options, + void (*report_error)(const char*)) { + return tflite::tools::CreateDummyDelegateFromOptions( + options_keys, options_values, num_options); +} + +TFL_CAPI_EXPORT void tflite_plugin_destroy_delegate(TfLiteDelegate* delegate) { + TfLiteDummyDelegateDelete(delegate); +} + +#ifdef __cplusplus +} +#endif // __cplusplus diff --git a/tensorflow/lite/tools/delegates/external_delegate_provider.cc b/tensorflow/lite/tools/delegates/external_delegate_provider.cc index f61f90127a9..f3ae9a02ca9 100644 --- a/tensorflow/lite/tools/delegates/external_delegate_provider.cc +++ b/tensorflow/lite/tools/delegates/external_delegate_provider.cc @@ -88,11 +88,23 @@ TfLiteDelegatePtr ExternalDelegateProvider::CreateTfLiteDelegate( const std::vector options = SplitString(params.Get("external_delegate_options"), ';'); std::vector keys, values; + // We reserve the memory here to avoid memory pointer change during + // insertion to vectors above. + keys.reserve(options.size()); + values.reserve(options.size()); for (const auto& option : options) { auto key_value = SplitString(option, ':'); if (key_value.size() == 2) { - delegate_options.insert(&delegate_options, key_value[0].c_str(), - key_value[1].c_str()); + // The inserted (key,value) pair has to outlive the + // TfLiteExternalDelegateCreate call, therefore, we use two vectors + // 'keys' and 'values' to achieve this. + // Also, we will insert the memory pointer of key and value to + // delegate_options later, we have to ensure the pointer won't change by + // reserving the memory earlier. + keys.emplace_back(key_value[0]); + values.emplace_back(key_value[1]); + delegate_options.insert(&delegate_options, keys.back().c_str(), + values.back().c_str()); } } From 74a0b00ea2006ea972baeea81467d577b5e3057c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Jul 2020 19:42:46 -0700 Subject: [PATCH 1688/2522] [XLA] Add support for moving instructions inside conditional branches. The existing implementation for supporting moving instructions outside of branches has been extended to allow movement in the opposite direction. A new transformation has been added to modify the HLO accordingly. PiperOrigin-RevId: 323920746 Change-Id: Icfa95fe4a53ae2e6b7bc7a003b3318d41ff21b41 --- .../xla/service/conditional_code_motion.cc | 448 +++++++++++++----- .../xla/service/conditional_code_motion.h | 11 +- .../service/conditional_code_motion_test.cc | 62 ++- 3 files changed, 393 insertions(+), 128 deletions(-) diff --git a/tensorflow/compiler/xla/service/conditional_code_motion.cc b/tensorflow/compiler/xla/service/conditional_code_motion.cc index 5d858096679..7609eafbc97 100644 --- a/tensorflow/compiler/xla/service/conditional_code_motion.cc +++ b/tensorflow/compiler/xla/service/conditional_code_motion.cc @@ -18,7 +18,6 @@ limitations under the License. #include #include #include -#include #include #include @@ -48,47 +47,64 @@ namespace xla { namespace conditional_opt { -// Visit the root instructions to its operands follow BFS. -// Will visit an instructions after all its users have been visited. Parameters -// are not visited. class BoundaryVisitor { public: // start with an existing conditional computation. explicit BoundaryVisitor(HloInstruction* conditional) { Boundary b(Boundary::Position::kInsideBranch); - b.Operands().push_back(conditional); + b.mutable_operands().push_back(conditional); worklist_.push_back(b); } // Start with an empty work list. BoundaryVisitor() {} - // Get next intruction to visit. + // Get next boundary to visit. Boundary PopNextBoundary() { CHECK(!worklist_.empty()); - Boundary inst = worklist_.front(); + Boundary b = worklist_.front(); worklist_.pop_front(); - return inst; + // if b is already visited, it must have multiple users and is already in + // new boundaries. Skip it. Only checking the first operand of b because b + // is expected to have at least one operand, and all the operands in b + // must be identical instructions from different branches for b to be moved. + while (!worklist_.empty() && ContainsKey(visited_, b.operands()[0])) { + b = worklist_.front(); + worklist_.pop_front(); + } + visited_.insert(b.operands()[0]); + return b; } void AddToWorkList(const Boundary& b) { - CHECK(!b.Operands().empty()); + CHECK(!b.operands().empty()); worklist_.push_back(b); } - bool HasNextBoundary() const { return !worklist_.empty(); } + bool HasNextBoundary() { + while (!worklist_.empty()) { + Boundary b = worklist_.front(); + if (!ContainsKey(visited_, b.operands()[0])) { + break; + } + worklist_.pop_front(); + } + return !worklist_.empty(); + } private: // worklist is the deque that contains instructions to be visited. std::deque worklist_; + absl::flat_hash_set visited_; }; -// Returns estimation of potential reuses carried by a given instruction. -// Use different integers to classify different levels of reuses -// This is used as a placeholder only, assuming all instructions can be -// fused to enable data reuses +// Returns estimation of potential reuses carried by a given pair of +// instructions. Use different integers to classify different levels +// of reuses This is used as a placeholder only, assuming all +// instructions can be fused to enable data reuses int64 ReusesCarriedBy(HloInstruction* op, HloInstruction* user) { VLOG(1) << "ConditionalCodeMotion: Add reuses carried by instr: " << op->ToString() << "=>" << user->ToString() << "\n"; switch (user->opcode()) { case HloOpcode::kGetTupleElement: + case HloOpcode::kTuple: return 0; default: break; @@ -96,6 +112,7 @@ int64 ReusesCarriedBy(HloInstruction* op, HloInstruction* user) { switch (op->opcode()) { // These instructions are lightweight and easy to fuse. case HloOpcode::kConstant: + case HloOpcode::kGetTupleElement: return 0; default: // Assume fusion will not happen anyway if user count > 1) @@ -150,27 +167,42 @@ bool InstructionWithinBranchIdentical( }); } -// Copy identical instructions within conditional outside of conditional. -Status CopyOutOfConditional( - Boundary& boundary, HloComputation* conditional_parent, - absl::flat_hash_map& - hoisted_instructions) { - // Insert GetTupleElement before the instructions whose operands might still - // be within the conditional. - HloInstruction* op = boundary.Operands()[0]; +// Copy the ith instruction in boundary to outside of conditional, or do the +// opposite (for moving in). +Status CopyInOrOutOfConditional( + Boundary& boundary, int64 dest_index, HloComputation* parent, + absl::flat_hash_map& hoisted_instructions) { + CHECK(dest_index == 0 || boundary.IsOutsideBranch()); + HloInstruction* op = boundary.operands()[0]; absl::InlinedVector new_operands; for (int i = 0; i < op->operands().size(); ++i) { auto op_i = op->operands()[i]; VLOG(2) << "Looking for operand:" << op_i->ToString() << "\n"; - CHECK(ContainsKey(hoisted_instructions, op_i)); - new_operands.push_back(FindOrDie(hoisted_instructions, op_i)); + if (ContainsKey(hoisted_instructions, op_i)) { + auto new_op_i = + FindOrDie(hoisted_instructions, op_i).operands()[dest_index]; + VLOG(2) << "new operand:" << new_op_i->ToString() << "\n"; + new_operands.push_back(new_op_i); + } else { + CHECK(op_i->opcode() == HloOpcode::kConstant); + auto new_op_i = parent->AddInstruction(op_i->Clone()); + VLOG(2) << "new operand:" << new_op_i->ToString() << "\n"; + new_operands.push_back(new_op_i); + } } - HloInstruction* new_instruction = conditional_parent->AddInstruction( + HloInstruction* new_instruction = parent->AddInstruction( op->CloneWithNewOperands(op->shape(), new_operands)); + VLOG(2) << "new instruction:" << new_instruction->ToString() << "\n"; // Maps the instruction outside of conditional to the instruction // inside of the conditional. - for (HloInstruction* op : boundary.Operands()) { - hoisted_instructions[op] = new_instruction; + for (HloInstruction* op : boundary.operands()) { + Boundary b2 = ContainsKey(hoisted_instructions, op) + ? hoisted_instructions[op] + : Boundary(boundary.IsOutsideBranch() + ? Boundary::Position::kInsideBranch + : Boundary::Position::kOutsideBranch); + b2.mutable_operands().push_back(new_instruction); + hoisted_instructions[op] = b2; } return Status::OK(); } @@ -302,7 +334,7 @@ StatusOr ConvertSpecialMove(HloInstruction* conditional, old_root = conditional->branch_computation(branch)->root_instruction(); absl::flat_hash_map map_inst_to_tuple_index; std::vector new_operands(old_root->operand_count()); - std::unordered_set to_hoist_set; + absl::flat_hash_set to_hoist_set; for (int64 operand_num = 0; operand_num < old_root->operand_count(); ++operand_num) { @@ -404,28 +436,32 @@ StatusOr ConditionalCodeMotion::MoveInstructionOut( HloComputation* conditional_parent = conditional->parent(); // save the old users before add new conditional user instructions std::vector old_conditional_users = conditional->users(); - absl::flat_hash_map hoisted_instructions; // Maps instructions in the conditional body to instructions hoisted outside // the conditional that compute the same value. + absl::flat_hash_map hoisted_instructions; + // Insert GetTupleElement before the instructions whose operands might still + // be within the conditional. VLOG(2) << "before opt:" << conditional_parent->ToString(HloPrintOptions::Fingerprint()) << "\n"; int64 op_index = 0; for (Boundary b : new_boundaries) { - HloInstruction* op = b.Operands()[0]; + HloInstruction* op = b.operands()[0]; CHECK(op != nullptr); VLOG(2) << "Mapping new boundary instr: " << op->ToString() << "\n"; HloInstruction* gtr = conditional_parent->AddInstruction( HloInstruction::CreateGetTupleElement(op->shape(), conditional, op_index++)); - hoisted_instructions[op] = gtr; + Boundary b2(Boundary::Position::kOutsideBranch); + b2.mutable_operands().push_back(gtr); + hoisted_instructions[op] = b2; } // Copy boundary instructions out of the conditional. // Visit the operands before its users and copy it, so that the copied // user will point to the correct operand. for (int64 i = to_move_out.size() - 1; i >= 0; i--) { - TF_RETURN_IF_ERROR(CopyOutOfConditional(to_move_out[i], conditional_parent, - hoisted_instructions)); + TF_RETURN_IF_ERROR(CopyInOrOutOfConditional( + to_move_out[i], 0, conditional_parent, hoisted_instructions)); } VLOG(2) << "Done copy branch instructions out\n" << conditional_parent->ToString(HloPrintOptions::Fingerprint()) @@ -438,7 +474,7 @@ StatusOr ConditionalCodeMotion::MoveInstructionOut( auto tuple_opd = static_cast(user_instr); int64 index = tuple_opd->tuple_index(); HloInstruction* old_opd = old_root->operands()[index]; - HloInstruction* new_opd = hoisted_instructions[old_opd]; + HloInstruction* new_opd = hoisted_instructions[old_opd].operands()[0]; CHECK(old_opd != nullptr); CHECK(new_opd != nullptr); TF_RETURN_IF_ERROR(user_instr->ReplaceAllUsesWith(new_opd)); @@ -450,7 +486,7 @@ StatusOr ConditionalCodeMotion::MoveInstructionOut( auto computation = conditional->branch_computation(i); std::vector elements; for (auto b1 : new_boundaries) { - HloInstruction* op = b1.Operands()[i]; + HloInstruction* op = b1.operands()[i]; VLOG(1) << "branch count=" << i << "\n"; CHECK(op != nullptr); VLOG(1) << "Adding to root " << i << " with " << op->ToString() << "\n"; @@ -463,7 +499,7 @@ StatusOr ConditionalCodeMotion::MoveInstructionOut( // Remove hoisted instructions from the branches. for (auto b2 : to_move_out) { VLOG(2) << "Removing boundary:" << b2.ToString() << "\n"; - TF_RETURN_IF_ERROR(computation->RemoveInstruction(b2.Operands()[i])); + TF_RETURN_IF_ERROR(computation->RemoveInstruction(b2.operands()[i])); } } // Change conditional instruction shape to the shape of the new root. @@ -477,29 +513,151 @@ StatusOr ConditionalCodeMotion::MoveInstructionOut( return true; } +// Hoist ops from outside of the conditional to inside the branches. +StatusOr ConditionalCodeMotion::MoveInstructionIn( + HloInstruction* conditional, std::vector& to_move_in, + std::vector& new_boundaries) { + if (to_move_in.empty()) { + return false; + } + VLOG(1) << "number of boundaries to move in:" << to_move_in.size() << "\n"; + HloComputation* conditional_parent = conditional->parent(); + VLOG(2) << "before opt:" + << conditional_parent->ToString(HloPrintOptions::Fingerprint()) + << "\n"; + // Mapping instructions to be moved to their new representations. + absl::flat_hash_map hoisted_instructions; + int64 to_move_in_size = to_move_in.size(); + int64 branch_count = conditional->branch_count(); + int64 op_index = conditional->shape().tuple_shapes_size(); + // Map conditional to its old root, then create a new root instruction in each + // branch. + Boundary b(Boundary::Position::kInsideBranch); + for (int i = 0; i < branch_count; i++) { + auto computation = conditional->branch_computation(i); + auto old_root = computation->root_instruction(); + b.mutable_operands().push_back(old_root); + HloInstruction* new_root = nullptr; + if (old_root->opcode() == HloOpcode::kTuple) { + new_root = computation->AddInstruction(old_root->Clone()); + } else { + std::vector operands; + if (!old_root->shape().IsTuple()) { + operands.push_back(old_root); + } else { + const Shape& old_shape = old_root->shape(); + for (int64 i = 0; i < old_shape.tuple_shapes_size(); ++i) { + auto element = + computation->AddInstruction(HloInstruction::CreateGetTupleElement( + old_shape.tuple_shapes(i), old_root, i)); + operands.push_back(element); + } + } + new_root = + computation->AddInstruction(HloInstruction::CreateTuple(operands)); + } + VLOG(2) << "setting new root: " << new_root->ToString() << "\n"; + computation->set_root_instruction(new_root); + VLOG(2) << "new branch computation: " << computation->ToString() << "\n"; + } + hoisted_instructions[conditional] = b; + for (int64 i = 0; i < to_move_in_size; i++) { + Boundary b_to_move = to_move_in[i]; + HloInstruction* op = b_to_move.operands()[0]; + CHECK(op != nullptr); + bool to_be_used_outside = true; + VLOG(2) << "Mapping new boundary instr: " << op->ToString() << "\n"; + if (i < to_move_in_size - 1 && op->user_count() == 1 && + op->users()[0] == to_move_in[i + 1].operands()[0]) { + to_be_used_outside = false; + VLOG(2) << "Instruction is not to be used outside the branch\n"; + } + Boundary b(Boundary::Position::kInsideBranch); + for (int i = 0; i < branch_count; i++) { + auto computation = conditional->branch_computation(i); + TF_RETURN_IF_ERROR(CopyInOrOutOfConditional(b_to_move, i, computation, + hoisted_instructions)); + VLOG(2) << "After Copying to branch: " << computation->ToString() << "\n"; + if (to_be_used_outside) { + auto new_op = hoisted_instructions[op].operands()[i]; + auto new_root = computation->root_instruction(); + new_root->AppendOperand(new_op); + *new_root->mutable_shape()->add_tuple_shapes() = new_op->shape(); + VLOG(2) << "Extending conditional root " << i << " : " + << new_root->ToString() << "\n"; + } + VLOG(2) << "After extending branch root: " << computation->ToString() + << "\n"; + } + if (to_be_used_outside) { + // Modify uses of instructions outside of the conditionals + HloInstruction* gtr = conditional_parent->AddInstruction( + HloInstruction::CreateGetTupleElement(op->shape(), conditional, + op_index++)); + TF_RETURN_IF_ERROR(op->ReplaceAllUsesWith(gtr)); + if (conditional_parent->root_instruction() == op) { + conditional_parent->set_root_instruction(gtr); + } + } + } + VLOG(2) << "Done copying instructions inside branch: " + << conditional->ToString(HloPrintOptions::Fingerprint()) << "\n"; + // Change conditional instruction shape to the shape of the new root. + HloInstruction* new_root = + conditional->branch_computation(0)->root_instruction(); + *conditional->mutable_shape() = new_root->shape(); + VLOG(2) << "Before removing instructions:" << conditional_parent->ToString() + << "\n"; + // Remove hoisted instructions from the branches. + for (int64 i = to_move_in_size - 1; i >= 0; i--) { + Boundary boundary_to_move_in = to_move_in[i]; + VLOG(2) << "Removing boundary:" << boundary_to_move_in.ToString() << "\n"; + HloInstruction* op = boundary_to_move_in.operands()[0]; + for (auto user : op->users()) { + VLOG(2) << "Has User: " << user->ToString() << "\n"; + } + TF_RETURN_IF_ERROR(conditional_parent->RemoveInstruction(op)); + } + VLOG(2) << "Done moving instructions inside branches\n" + << conditional_parent->ToString(HloPrintOptions::Fingerprint()) + << "\n"; + return true; +} + // Group single chains of operands or uses of boundaries into new boundaries class GroupConnectedBoundaries { private: - std::unordered_set visited_; std::vector connected_boundaries_, new_boundaries_; HloInstruction* conditional_; + HloComputation* conditional_parent_; bool is_layout_sensitive_; + absl::flat_hash_set visited_; public: explicit GroupConnectedBoundaries(HloInstruction* conditional, bool is_layout_sensitive) - : conditional_(conditional), is_layout_sensitive_(is_layout_sensitive) {} + : conditional_(conditional), + conditional_parent_(conditional->parent()), + is_layout_sensitive_(is_layout_sensitive) {} // Returns true if `instruction` is worth hoisting out. bool WorthHoisting(HloInstruction* instruction) { + // This is needed for the "moving-in" transformation, to prevent the root + // of the parent computation (which contains the conditional) to be moved + // inside the conditional. + if (instruction->opcode() == HloOpcode::kTuple && + instruction == conditional_parent_->root_instruction()) { + return false; + } switch (instruction->opcode()) { case HloOpcode::kConvert: - // If Convert is after AllReduce, it is worth moving out AllReduce out - // of conditional for AR/CRS combine. If Convert is after other ops such - // as Dot or Convolutional, it is better to keep convert within - // conditional so that convert can be fused with Dot or Convolutional. + // If Convert is after AllReduce, it is worth moving out AllReduce + // out of conditional for AR/CRS combine. If Convert is after other + // ops such as Dot or Convolutional, it is better to keep convert + // within conditional so that convert can be fused with Dot or + // Convolutional. // - // TODO(b/154283721): figure out the scenario when convert can be fused - // with AllReduce out of conditional. + // TODO(b/154283721): figure out the scenario when convert can be + // fused with AllReduce out of conditional. switch (instruction->operand(0)->opcode()) { case HloOpcode::kAllReduce: case HloOpcode::kReshape: @@ -511,6 +669,7 @@ class GroupConnectedBoundaries { } case HloOpcode::kAllReduce: case HloOpcode::kAdd: + case HloOpcode::kPower: case HloOpcode::kConstant: case HloOpcode::kSubtract: case HloOpcode::kMultiply: @@ -525,27 +684,32 @@ class GroupConnectedBoundaries { return false; } } - // Calculates the degree of reuses carried by a pair of conditional - // boundaries, if b1 is inside a conditional and b2 is outside. int64 ReusesBeforeBoundary(HloInstruction* user) { int64 reuses = 0; for (auto op : user->operands()) { // Only consider single-user cases as reuseable. if (ContainsKey(visited_, op) && op->user_count() == 1) { reuses += ReusesCarriedBy(op, user); + } else if (op->opcode() == HloOpcode::kConditional && + user->opcode() == HloOpcode::kGetTupleElement) { + if (user->user_count() == 1) { + reuses += ReusesCarriedBy(op, user->users()[0]); + } } } - VLOG(1) << "cost to be paied after moving out" << user->ToString() << ":" - << reuses << "\n"; + VLOG(1) << "Reuses before instruction " << user->ToString() << ":" << reuses + << "\n"; return reuses; } int64 ReusesAfterBoundary(HloInstruction* user) { CHECK(user != nullptr); auto all_users = user->users(); - // For now, assume that if an instruction has multiple-consumers, it will - // not be reused (the reuse currently requires duplication in fusion and so - // is expensive). + // For now, assume that if an instruction has multiple-consumers, it + // will not be reused, as the reuse may require duplication in + // fusion and so is expensive. If the situation changes in the future, + // some aspects of the overall algorithm need to be redesigned to + // accommandate the change. if (all_users.size() > 1) { return 0; } @@ -566,8 +730,10 @@ class GroupConnectedBoundaries { } } } + } else if (ContainsKey(visited_, op)) { + reuses += ReusesCarriedBy(user, op); } - VLOG(1) << "reuses to be gained after moving " << user->ToString() << ":" + VLOG(1) << "reuses after instruction " << user->ToString() << ":" << reuses << "\n"; return reuses; } @@ -576,15 +742,19 @@ class GroupConnectedBoundaries { int64 BenefitForMovingBoundaries(const std::vector& boundaries) { int64 reuses_before = 0, reuses_after = 0; + if (boundaries.size() == 1 && boundaries[0].IsOutsideBranch()) { + // The only boundary of moving-in is the get_tuple_element op. + return -1; + } for (Boundary b : boundaries) { - auto op = b.Operands()[0]; + auto op = b.operands()[0]; if (op == conditional_->branch_computation(0)->root_instruction()) { continue; } reuses_before += ReusesBeforeBoundary(op); - VLOG(1) << "Cost of moving so far: " << reuses_before << "\n"; + VLOG(1) << "Reuses before boundary so far: " << reuses_before << "\n"; reuses_after += ReusesAfterBoundary(op); - VLOG(1) << "Benefit from moving so far : " << reuses_after << "\n"; + VLOG(1) << "Reuese after boundary so far : " << reuses_after << "\n"; } if (reuses_after == 0 && reuses_before == 0) { return -1; @@ -597,102 +767,128 @@ class GroupConnectedBoundaries { Boundary GetNextBoundary(const Boundary& b, int64 op_index) { Boundary b2(b.GetPosition()); - CHECK(b.Operands().size() == conditional_->branch_count()); - for (int j = 0; j < b.Operands().size(); ++j) { - HloInstruction* inst = b.Operands()[j]; + for (int j = 0; j < b.operands().size(); ++j) { + HloInstruction* inst = b.operands()[j]; CHECK(inst != nullptr); HloInstruction* op = (b.IsInsideBranch()) ? inst->operands()[op_index] : inst->users()[op_index]; CHECK(op != nullptr); - b2.Operands().push_back(op); + b2.mutable_operands().push_back(op); } return b2; } + int64 CountNonLeafOps(const xla::HloInstruction::InstructionVector& ops) { + int64 count = 0; + absl::flat_hash_set op_set; + for (auto op : ops) { + if (!op_set.contains(op) && op->opcode() != HloOpcode::kConstant) { + count++; + op_set.insert(op); + } + } + return count; + } + // This function is reused both for moving the boundary outside or into a + // conditional. As the result, the readability is somewhat compromised. + // It might be nice to refactor this function to factor the outside-inside + // considerations into separate function pointer parameters to improve + // readability. void AddBoundaries(const Boundary& boundary) { BoundaryVisitor visitor; visitor.AddToWorkList(boundary); while (visitor.HasNextBoundary()) { Boundary b = visitor.PopNextBoundary(); - // if b is already visited, it must have multiple users and is already in - // new boundaries. Skip it. - if (ContainsKey(visited_, b.Operands()[0])) { - continue; - } VLOG(1) << "visiting boundary " << b.ToString() << "\n"; - if ((b.Operands().size() == 1 || - InstructionWithinBranchIdentical(b.Operands(), - is_layout_sensitive_)) && - WorthHoisting(b.Operands()[0])) { + if ((b.IsOutsideBranch() || InstructionWithinBranchIdentical( + b.operands(), is_layout_sensitive_)) && + WorthHoisting(b.operands()[0])) { connected_boundaries_.push_back(b); VLOG(1) << "boundary can be moved\n"; int64 operand_count = (b.IsInsideBranch()) - ? b.Operands()[0]->operand_count() - : b.Operands()[0]->users().size(); + ? b.operands()[0]->operand_count() + : b.operands()[0]->users().size(); for (int i = 0; i < operand_count; i++) { - Boundary b2 = GetNextBoundary(b, i); - int64 b2_count = (b2.IsInsideBranch()) - ? b2.Operands()[0]->user_count() - : b2.Operands()[0]->operand_count(); + Boundary next_boundary = GetNextBoundary(b, i); + int64 next_boundary_count = + (next_boundary.IsInsideBranch()) + ? next_boundary.operands()[0]->user_count() + : CountNonLeafOps(next_boundary.operands()[0]->operands()); // only consider adding an exclusive producor into the same group. - if (b2_count == 1) { + if (next_boundary_count == 1) { VLOG(2) << "Add operand " << i << " to visit later\n"; - visitor.AddToWorkList(b2); + visitor.AddToWorkList(next_boundary); } else { - VLOG(2) << "Operand " << i << " has multiple uses\n"; - if (!ContainsKey(visited_, b2.Operands()[0])) { - visited_.insert(b2.Operands()[0]); - new_boundaries_.push_back(b2); + VLOG(2) << "Next boundary " << i + << " has multiple uses: " << next_boundary_count << "\n"; + if (!ContainsKey(visited_, next_boundary.operands()[0])) { + visited_.insert(next_boundary.operands()[0]); + new_boundaries_.push_back(next_boundary); } } } } else { VLOG(1) << "boundary cannot be moved\n"; - visited_.insert(b.Operands()[0]); + visited_.insert(b.operands()[0]); new_boundaries_.push_back(b); } } } - std::vector BoundariesToMoveOut(const Boundary& b) { - HloInstruction* inst = b.Operands()[0]; + std::vector BoundariesToMoveInOrOut(const Boundary& b) { + // At the beginning of optimization, a conditional itself is added to a + // worklist. Here the conditional is expanded into two sets of boundaries: + // the first set contains the boundary that is inside branches and + // contains the root of all branches; the second set of boundaries + // contains all the users of the conditional. + HloInstruction* inst = b.operands()[0]; if (inst->opcode() == HloOpcode::kConditional) { int branch_count = inst->branch_count(); - // Visit instructions from the root instruction to the operands using BFS. + // Add conditional roots as a new boundary to visit. Boundary boundary_in(Boundary::Position::kInsideBranch); for (int i = 0; i < branch_count; i++) { HloComputation* branch_computation = inst->branch_computation(i); HloInstruction* root_inst = branch_computation->root_instruction(); CHECK(root_inst != nullptr); - boundary_in.Operands().push_back(root_inst); + boundary_in.mutable_operands().push_back(root_inst); } - AddBoundaries(boundary_in); + new_boundaries_.push_back(boundary_in); + // Add conditional users as new boundaries to visit. + for (auto u : inst->users()) { + Boundary boundary_in(Boundary::Position::kOutsideBranch); + boundary_in.mutable_operands().push_back(u); + new_boundaries_.push_back(boundary_in); + } + } else { + AddBoundaries(b); } return connected_boundaries_; } - std::vector BoundariesToMoveIn(const Boundary& b) { - if (b.IsInsideBranch()) { - return std::vector(); - } - AddBoundaries(b); - return connected_boundaries_; + void AddNewBoundaries(std::vector& b) { + b.insert(b.end(), new_boundaries_.begin(), new_boundaries_.end()); } - std::vector GetNewBoundaries() { return new_boundaries_; } }; ConditionalCodeMotion::Decision ConditionalCodeMotion::ConsiderCodeMotion( HloInstruction* conditional, const Boundary& cur_boundary, std::vector& to_move, std::vector& new_boundaries) { GroupConnectedBoundaries connect(conditional, is_layout_sensitive_); - auto move_out = connect.BoundariesToMoveOut(cur_boundary); - if (!move_out.empty()) { - std::vector next_boundaries = connect.GetNewBoundaries(); - auto benefit = connect.BenefitForMovingBoundaries(move_out); - VLOG(1) << "benefit of moving " << cur_boundary.Operands()[0]->ToString() - << ":" << benefit << "\n"; + auto move_in_or_out = connect.BoundariesToMoveInOrOut(cur_boundary); + if (!move_in_or_out.empty()) { + auto benefit = connect.BenefitForMovingBoundaries(move_in_or_out); + VLOG(1) << "benefit of moving in or out " + << cur_boundary.operands()[0]->ToString() << ":" << benefit << "\n"; if (benefit >= 0) { - new_boundaries = next_boundaries; - to_move = move_out; - return Decision::kMoveOutOfBranch; + new_boundaries.clear(); + connect.AddNewBoundaries(new_boundaries); + // The whole sequence in move_in_or_out is either all moving into a + // conditional, or all moving out of a conditional. So looking only + // at the first entry of the sequence is sufficient to know which + // direction the move is intended. + to_move = move_in_or_out; + return to_move[0].IsInsideBranch() ? Decision::kMoveOutOfBranch + : Decision::kMoveIntoBranch; } + } else { + connect.AddNewBoundaries(new_boundaries); } return ConditionalCodeMotion::Decision::kNoChange; } @@ -710,40 +906,60 @@ StatusOr ConditionalCodeMotion::Run(HloModule* module) { } bool changed = false; - std::vector to_move_out, to_move_in, new_boundaries; for (HloInstruction* conditional : conditional_ops) { + // Boundaries to move out or to move into the branches. + std::vector to_move_out, to_move_in, new_boundaries; + // The conditional is moved into a worklist as the seed (starting point). + // The conditional will be expanded into multiple seeds (starting points), + // its roots and its users, when it is visited by GroupConnectedBoundaries. + // A NO_CHANGE decision will always be returned for the conditional itself, + // so that the other seeding boundaries can be visited in turn. BoundaryVisitor visitor(conditional); VLOG(2) << "Analyzing conditional:" << conditional->ToString() << "\n"; - // Boundariess to move out of and to move into the branches. - while (visitor.HasNextBoundary()) { + ConditionalCodeMotion::Decision d = Decision::kNoChange; + // The following loop breaks out as soon as a decision to modify the + // conditional is reached --- irrespective of whether visitor is empty. + while (d == Decision::kNoChange && visitor.HasNextBoundary()) { std::vector to_move, next_boundary; Boundary boundary = visitor.PopNextBoundary(); VLOG(2) << "Analyzing boundary:" << boundary.ToString() << "\n"; - ConditionalCodeMotion::Decision d = - ConsiderCodeMotion(conditional, boundary, to_move, next_boundary); + d = ConsiderCodeMotion(conditional, boundary, to_move, next_boundary); switch (d) { case Decision::kMoveOutOfBranch: VLOG(2) << "Decision is move out of branch\n"; to_move_out.insert(to_move_out.end(), to_move.begin(), to_move.end()); + new_boundaries.insert(new_boundaries.end(), next_boundary.begin(), + next_boundary.end()); break; case Decision::kMoveIntoBranch: VLOG(2) << "Decision is move into branch\n"; to_move_in.insert(to_move_in.end(), to_move.begin(), to_move.end()); + new_boundaries.insert(new_boundaries.end(), next_boundary.begin(), + next_boundary.end()); break; case Decision::kNoChange: VLOG(2) << "Decision is no change\n"; - new_boundaries.push_back(boundary); + for (const Boundary& b : next_boundary) { + visitor.AddToWorkList(b); + } break; } - for (const Boundary& b : next_boundary) { - visitor.AddToWorkList(b); - } } - TF_ASSIGN_OR_RETURN( - bool result, - MoveInstructionOut(conditional, to_move_out, new_boundaries)); - VLOG(2) << "moving out result:" << result << "\n"; - changed |= result; + // At most one of to_move_out or to_move_in can be non-empty, since there is + // only one optimization decision. + if (!to_move_out.empty()) { + TF_ASSIGN_OR_RETURN( + bool result, + MoveInstructionOut(conditional, to_move_out, new_boundaries)); + VLOG(2) << "moving out result:" << result << "\n"; + changed |= result; + } else if (!to_move_in.empty()) { + TF_ASSIGN_OR_RETURN( + bool result, + MoveInstructionIn(conditional, to_move_in, new_boundaries)); + VLOG(2) << "moving in result:" << result << "\n"; + changed |= result; + } } // handling convert rematerialization/hoisting if (!changed && pursue_full_conditional_code_motion_) { diff --git a/tensorflow/compiler/xla/service/conditional_code_motion.h b/tensorflow/compiler/xla/service/conditional_code_motion.h index d7295058467..68a2aa58235 100644 --- a/tensorflow/compiler/xla/service/conditional_code_motion.h +++ b/tensorflow/compiler/xla/service/conditional_code_motion.h @@ -24,7 +24,7 @@ limitations under the License. namespace xla { namespace conditional_opt { -// At the conceptural level, a boundary can be thought of as representing a +// At the conceptual level, a boundary can be thought of as representing a // single virtual operation, except this virtual operation is conditionally // instantiated into different concrete operations at each conditional branch. // So a boundary is mapped to a single concrete operation if it is outside of @@ -36,10 +36,11 @@ namespace conditional_opt { // inside branches. class Boundary { public: - enum class Position { kInsideBranch, kOutsideBranch }; + enum class Position { kInsideBranch, kOutsideBranch, kUndefined }; + Boundary() : position_(Position::kUndefined) {} explicit Boundary(Position p) : position_(p) {} - std::vector& Operands() { return operands_; } - const std::vector& Operands() const { return operands_; } + std::vector& mutable_operands() { return operands_; } + const std::vector& operands() const { return operands_; } bool IsInsideBranch() const { return position_ == Position::kInsideBranch; } bool IsOutsideBranch() const { return position_ == Position::kOutsideBranch; } Position GetPosition() const { return position_; } @@ -54,7 +55,7 @@ class Boundary { private: // Boundary instructions in the conditional branches, one from each branch - // of the conditional. + // of the conditional; or a single operand from outside the conditional. std::vector operands_; Position position_; }; diff --git a/tensorflow/compiler/xla/service/conditional_code_motion_test.cc b/tensorflow/compiler/xla/service/conditional_code_motion_test.cc index b3c5e17094a..b0a6ba92f48 100644 --- a/tensorflow/compiler/xla/service/conditional_code_motion_test.cc +++ b/tensorflow/compiler/xla/service/conditional_code_motion_test.cc @@ -255,7 +255,7 @@ ENTRY main { false_computation=on_false get-first-index = f32[] get-tuple-element(conditional), index=0 get-second-index = f32[] get-tuple-element(conditional), index=1 - ROOT result = (f32[], f32[]) tuple(get-first-index, get-second-index) + ROOT result = f32[] add(get-first-index, get-second-index) } )"; auto module = ParseAndReturnVerifiedModule(hlo_string).ValueOrDie(); @@ -270,8 +270,8 @@ ENTRY main { ASSERT_EQ(on_false->instruction_count(), 9); HloInstruction* root = module->entry_computation()->root_instruction(); - EXPECT_THAT( - root, AllOf(op::Tuple(op::Multiply(op::GetTupleElement(op::Conditional()), + EXPECT_THAT(root, + AllOf(op::Add(op::Multiply(op::GetTupleElement(op::Conditional()), op::Constant()), op::GetTupleElement(op::Conditional())))); } @@ -311,7 +311,7 @@ ENTRY main { conditional(pred.1, tuple.1, tuple.2), true_computation=on_true, false_computation=on_false get-first-index = f32[] get-tuple-element(conditional), index=0 - ROOT result = (f32[]) tuple(get-first-index) + ROOT result = f32[] add(get-first-index, get-first-index) } )"; auto module = ParseAndReturnVerifiedModule(hlo_string).ValueOrDie(); @@ -327,9 +327,14 @@ ENTRY main { HloInstruction* root = module->entry_computation()->root_instruction(); EXPECT_THAT( root, - AllOf(op::Tuple(op::Add( - op::Add(op::GetTupleElement(op::Conditional()), op::Constant()), - op::Add(op::GetTupleElement(op::Conditional()), op::Constant()))))); + AllOf(op::Add( + op::Add( + op::Add(op::GetTupleElement(op::Conditional()), op::Constant()), + op::Add(op::GetTupleElement(op::Conditional()), op::Constant())), + op::Add( + op::Add(op::GetTupleElement(op::Conditional()), op::Constant()), + op::Add(op::GetTupleElement(op::Conditional()), + op::Constant()))))); } TEST_F(ConditionalCodeMotionTest, ConditionalIsRootInstruction) { @@ -532,6 +537,49 @@ ENTRY main { op::AllReduce(op::GetTupleElement(op::Conditional()))))))); } +TEST_F(ConditionalCodeMotionTest, MovePowOpIn) { + absl::string_view hlo_string = + R"( +HloModule RemoveIdenticalInstruction + +on_true { + arg_tuple.1 = (f32[10]) parameter(0) + get-tuple-element.1 = f32[10] get-tuple-element(arg_tuple.1), index=0 + add.1 = f32[10] add(get-tuple-element.1, get-tuple-element.1) + ROOT tuple.3 = (f32[10]) tuple(add.1) +} + +on_false { + arg_tuple.2 = (f32[10]) parameter(0) + get-tuple-element.2 = f32[10] get-tuple-element(arg_tuple.2), index=0 + mul.1 = f32[10] multiply(get-tuple-element.2, get-tuple-element.2) + ROOT tuple.4 = (f32[10]) tuple(mul.1) +} + +ENTRY main { + pred.1 = pred[] parameter(0) + tuple.1 = (f32[10]) parameter(1) + tuple.2 = (f32[10]) parameter(2) + conditional = (f32[10]) + conditional(pred.1, tuple.1, tuple.2), true_computation=on_true, + false_computation=on_false + get-first-index = f32[10] get-tuple-element(conditional), index=0 + ROOT pow.1 = f32[10] power(get-first-index, get-first-index) +} +)"; + auto module = ParseAndReturnVerifiedModule(hlo_string).ValueOrDie(); + ConditionalCodeMotion pass(true, true); + ASSERT_TRUE(pass.Run(&*module).ValueOrDie()); + const HloInstruction* conditional = + FindInstruction(module.get(), "conditional"); + const HloComputation* on_true = conditional->branch_computation(0); + ASSERT_EQ(on_true->instruction_count(), 5); + const HloComputation* on_false = conditional->branch_computation(1); + ASSERT_EQ(on_false->instruction_count(), 5); + + HloInstruction* root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, AllOf(op::GetTupleElement(op::Conditional()))); +} } // namespace conditional_opt } // namespace xla From a08026db88ececbdf00e355a6102251cc88d88a3 Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Wed, 29 Jul 2020 19:45:54 -0700 Subject: [PATCH 1689/2522] Correct the comment about the purpose of the lock in CollectiveAllReduce The previous statement is incorrect. There can be issues even when instance keys are not re-used. PiperOrigin-RevId: 323921048 Change-Id: Ia51a111dadb61b95cab0c0bd70517eff99b7bda7 --- tensorflow/python/distribute/cross_device_ops.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/distribute/cross_device_ops.py b/tensorflow/python/distribute/cross_device_ops.py index 81a401fb4a9..c57491cff70 100644 --- a/tensorflow/python/distribute/cross_device_ops.py +++ b/tensorflow/python/distribute/cross_device_ops.py @@ -964,13 +964,14 @@ class CollectiveAllReduce(CrossDeviceOps): # # In a multi threaded eager program we need to ensure different groups of # collectives don't interleave each other, otherwise there couuld be - # deadlocks. E.g. + # deadlocks. E.g. if two user threads both are launching collectives: # user-thread-0 device0 device1 # user-thread-1 device0 device1 - # Note that thanks to protection in the runtime, this is only an issue when - # the instance key is re-used. The instance key is reused if the user builds - # a tf.function and runs it in multiple threads, since the instance key is - # an attribute of the collective ops. + # In eager mode, we use one executor per device. Executors use single FIFO + # queues, so the above launch sequences end up with the following queues: + # device-0 collective-0 collective-1 + # device-1 collective-1 collective-0 + # This deadlocks since neither collective is able to finish. self._lock = threading.Lock() # Collective ops requires all devices to participate and is blocking. In From 2e1a2a7e357fd13f512bf3f0e3af4e486476d411 Mon Sep 17 00:00:00 2001 From: Robert David Date: Wed, 29 Jul 2020 20:28:20 -0700 Subject: [PATCH 1690/2522] LSTM test cleanup: separate num_steps and num_inputs/num_outputs in the input and golden vectors. Because each LSTM step is compared separately in the test, this makes is clearer where an error might be. PiperOrigin-RevId: 323925293 Change-Id: Iab4caec547c9961431fd260fb55e3900bf4df869 --- tensorflow/lite/kernels/lstm_test.cc | 211 +++++++++++++-------------- 1 file changed, 99 insertions(+), 112 deletions(-) diff --git a/tensorflow/lite/kernels/lstm_test.cc b/tensorflow/lite/kernels/lstm_test.cc index 7d51cb39ae9..16e28619daf 100644 --- a/tensorflow/lite/kernels/lstm_test.cc +++ b/tensorflow/lite/kernels/lstm_test.cc @@ -41,7 +41,10 @@ class LSTMOpModel : public SingleOpModel { bool use_projection_bias, const TensorType weight_type, bool model_has_legacy_20_inputs, bool is_layer_norm, bool asymmetric_quantize_inputs) - : n_input_(n_input), n_output_(n_output), weight_type_(weight_type) { + : n_input_(n_input), + n_output_(n_output), + n_batch_(n_batch), + weight_type_(weight_type) { input_ = AddInput({TensorType_FLOAT32, {n_batch, n_input}}); if (use_cifg) { @@ -234,6 +237,7 @@ class LSTMOpModel : public SingleOpModel { int num_inputs() { return n_input_; } int num_outputs() { return n_output_; } + int num_batches() { return n_batch_; } protected: int input_; @@ -268,6 +272,7 @@ class LSTMOpModel : public SingleOpModel { int n_input_; int n_output_; + int n_batch_; private: void PopulateTensor(int index, const std::vector& data) { @@ -327,10 +332,10 @@ class LstmOpTest std::vector cell_layer_norm_coefficients_; std::vector output_layer_norm_coefficients_; - // LSTM input is stored as num_batch x num_inputs vector. - std::vector> lstm_input_; - // LSTM output is stored as num_batch x num_outputs vector. - std::vector> lstm_golden_output_; + // LSTM input is stored as num_steps * num_batch * num_inputs vector. + std::vector>> lstm_input_; + // LSTM output is stored as num_steps * num_batch * num_outputs vector. + std::vector>> lstm_golden_output_; // Compares output up to tolerance to the result of the lstm given the input. void VerifyGoldens(LSTMOpModel* lstm, float tolerance) { @@ -340,30 +345,33 @@ class LstmOpTest SetAllWeightsAndBiases(lstm); lstm->ApplyDelegate(); - const int num_batches = lstm_input_.size(); - EXPECT_GT(num_batches, 0); const int num_inputs = lstm->num_inputs(); - EXPECT_GT(num_inputs, 0); - const int input_sequence_size = lstm_input_[0].size() / num_inputs; - EXPECT_GT(input_sequence_size, 0); - for (int i = 0; i < input_sequence_size; ++i) { - for (int b = 0; b < num_batches; ++b) { - const float* batch_start = lstm_input_[b].data() + i * num_inputs; - const float* batch_end = batch_start + num_inputs; + const int num_outputs = lstm->num_outputs(); + const int num_batches = lstm->num_batches(); - lstm->SetInput(b * lstm->num_inputs(), batch_start, batch_end); + ASSERT_EQ(lstm_input_.size(), lstm_golden_output_.size()); + const int num_steps = lstm_input_.size(); + + for (int i = 0; i < num_steps; ++i) { + ASSERT_EQ(num_batches, lstm_input_[i].size()); + for (int b = 0; b < num_batches; ++b) { + ASSERT_EQ(num_inputs, lstm_input_[i][b].size()); + const float* batch_start = lstm_input_[i][b].data(); + const float* batch_end = batch_start + num_inputs; + lstm->SetInput(b * num_inputs, batch_start, batch_end); } lstm->Invoke(); - const int num_outputs = lstm->num_outputs(); std::vector expected; + ASSERT_EQ(num_batches, lstm_golden_output_[i].size()); for (int b = 0; b < num_batches; ++b) { - const float* golden_start_batch = - lstm_golden_output_[b].data() + i * num_outputs; - const float* golden_end_batch = golden_start_batch + num_outputs; - expected.insert(expected.end(), golden_start_batch, golden_end_batch); + ASSERT_EQ(num_outputs, lstm_golden_output_[i][b].size()); + const float* batch_start = lstm_golden_output_[i][b].data(); + const float* batch_end = batch_start + num_outputs; + expected.insert(expected.end(), batch_start, batch_end); } + EXPECT_THAT(lstm->GetOutput(), ElementsAreArray(ArrayFloatNear(expected, tolerance))); } @@ -457,10 +465,12 @@ TEST_P(LstmOpTest, NoCifg_NoPeephole_NoProjection_NoLayerNorm) { 0.48572797, -0.50656658, 0.20047462, -0.20607421, -0.51818722, -0.15390486, 0.0468148, 0.39922136}; - lstm_input_ = {{2., 3., 3., 4., 1., 1.}}; - lstm_golden_output_ = {{-0.02973187, 0.1229473, 0.20885126, -0.15358765, - -0.03716109, 0.12507336, 0.41193449, -0.20860538, - -0.15053082, 0.09120187, 0.24278517, -0.12222792}}; + // num_steps * num_batch * num_inputs + lstm_input_ = {{{2., 3.}}, {{3., 4.}}, {{1., 1.}}}; + // num_steps * num_batch * num_outputs + lstm_golden_output_ = {{{-0.02973187, 0.1229473, 0.20885126, -0.15358765}}, + {{-0.03716109, 0.12507336, 0.41193449, -0.20860538}}, + {{-0.15053082, 0.09120187, 0.24278517, -0.12222792}}}; LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, /*use_cifg=*/false, /*use_peephole=*/false, @@ -528,10 +538,10 @@ TEST_P(LstmOpTest, Cifg_Peephole_NoProjection_NoLayerNorm) { cell_to_forget_weights_ = {0.47485286, -0.51955009, -0.24458408, 0.31544167}; cell_to_output_weights_ = {-0.17135078, 0.82760304, 0.85573703, -0.77109635}; - lstm_input_ = {{2., 3., 3., 4., 1., 1.}}; - lstm_golden_output_ = {{-0.36444446, -0.00352185, 0.12886585, -0.05163646, - -0.42312205, -0.01218222, 0.24201041, -0.08124574, - -0.358325, -0.04621704, 0.21641694, -0.06471302}}; + lstm_input_ = {{{2., 3.}}, {{3., 4.}}, {{1., 1.}}}; + lstm_golden_output_ = {{{-0.36444446, -0.00352185, 0.12886585, -0.05163646}}, + {{-0.42312205, -0.01218222, 0.24201041, -0.08124574}}, + {{-0.358325, -0.04621704, 0.21641694, -0.06471302}}}; LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, /*use_cifg=*/true, /*use_peephole=*/true, @@ -1073,49 +1083,47 @@ TEST_P(LstmOpTest, NoCifg_Peephole_Projection_NoLayerNorm) { 0.01891392, -0.046839405, -0.040167913, 0.017953383, -0.09700955, 0.0061885654, -0.07000971, 0.026893595, -0.038844477, 0.14543656}; - lstm_input_ = { - {// Batch0: 4 (input_sequence_size) * 5 (n_input) - 0.787926, 0.151646, 0.071352, 0.118426, 0.458058, // step 0 - 0.596268, 0.998386, 0.568695, 0.864524, 0.571277, // step 1 - 0.073204, 0.296072, 0.743333, 0.069199, 0.045348, // step 2 - 0.867394, 0.291279, 0.013714, 0.482521, 0.626339}, // step 3 - - {// Batch1: 4 (input_sequence_size) * 5 (n_input) - 0.295743, 0.544053, 0.690064, 0.858138, 0.497181, // step 0 - 0.642421, 0.524260, 0.134799, 0.003639, 0.162482, // step 1 - 0.640394, 0.930399, 0.050782, 0.432485, 0.988078, // step 2 - 0.082922, 0.563329, 0.865614, 0.333232, 0.259916} // step 3 - }; + lstm_input_ = {// Step 1 + {{0.787926, 0.151646, 0.071352, 0.118426, 0.458058}, + {0.295743, 0.544053, 0.690064, 0.858138, 0.497181}}, + // Step 2 + {{0.596268, 0.998386, 0.568695, 0.864524, 0.571277}, + {0.642421, 0.524260, 0.134799, 0.003639, 0.162482}}, + // Step 3 + {{0.073204, 0.296072, 0.743333, 0.069199, 0.045348}, + {0.640394, 0.930399, 0.050782, 0.432485, 0.988078}}, + // Step 4 + {{0.867394, 0.291279, 0.013714, 0.482521, 0.626339}, + {0.082922, 0.563329, 0.865614, 0.333232, 0.259916}}}; lstm_golden_output_ = { - {// Batch0: 4 (input_sequence_size) * 16 (n_output) - -0.00396806, 0.029352, -0.00279226, 0.0159977, -0.00835576, - -0.0211779, 0.0283512, -0.0114597, 0.00907307, -0.0244004, - -0.0152191, -0.0259063, 0.00914318, 0.00415118, 0.017147, - 0.0134203, -0.0166936, 0.0381209, 0.000889694, 0.0143363, - -0.0328911, -0.0234288, 0.0333051, -0.012229, 0.0110322, - -0.0457725, -0.000832209, -0.0202817, 0.0327257, 0.0121308, - 0.0155969, 0.0312091, -0.0213783, 0.0350169, 0.000324794, - 0.0276012, -0.0263374, -0.0371449, 0.0446149, -0.0205474, - 0.0103729, -0.0576349, -0.0150052, -0.0292043, 0.0376827, - 0.0136115, 0.0243435, 0.0354492, -0.0189322, 0.0464512, - -0.00251373, 0.0225745, -0.0308346, -0.0317124, 0.0460407, - -0.0189395, 0.0149363, -0.0530162, -0.0150767, -0.0340193, - 0.0286833, 0.00824207, 0.0264887, 0.0305169}, - {// Batch1: 4 (input_sequence_size) * 16 (n_output) - -0.013869, 0.0287268, -0.00334693, 0.00733398, -0.0287926, - -0.0186926, 0.0193662, -0.0115437, 0.00422612, -0.0345232, - 0.00223253, -0.00957321, 0.0210624, 0.013331, 0.0150954, - 0.02168, -0.0141913, 0.0322082, 0.00227024, 0.0260507, - -0.0188721, -0.0296489, 0.0399134, -0.0160509, 0.0116039, - -0.0447318, -0.0150515, -0.0277406, 0.0316596, 0.0118233, - 0.0214762, 0.0293641, -0.0204549, 0.0450315, -0.00117378, - 0.0167673, -0.0375007, -0.0238314, 0.038784, -0.0174034, - 0.0131743, -0.0506589, -0.0048447, -0.0240239, 0.0325789, - 0.00790065, 0.0220157, 0.0333314, -0.0264787, 0.0387855, - -0.000764675, 0.0217599, -0.037537, -0.0335206, 0.0431679, - -0.0211424, 0.010203, -0.062785, -0.00832363, -0.025181, - 0.0412031, 0.0118723, 0.0239643, 0.0394009}}; + {{-0.00396806, 0.029352, -0.00279226, 0.0159977, -0.00835576, -0.0211779, + 0.0283512, -0.0114597, 0.00907307, -0.0244004, -0.0152191, -0.0259063, + 0.00914318, 0.00415118, 0.017147, 0.0134203}, + {-0.013869, 0.0287268, -0.00334693, 0.00733398, -0.0287926, -0.0186926, + 0.0193662, -0.0115437, 0.00422612, -0.0345232, 0.00223253, -0.00957321, + 0.0210624, 0.013331, 0.0150954, 0.02168}}, + + {{-0.0166936, 0.0381209, 0.000889694, 0.0143363, -0.0328911, -0.0234288, + 0.0333051, -0.012229, 0.0110322, -0.0457725, -0.000832209, -0.0202817, + 0.0327257, 0.0121308, 0.0155969, 0.0312091}, + {-0.0141913, 0.0322082, 0.00227024, 0.0260507, -0.0188721, -0.0296489, + 0.0399134, -0.0160509, 0.0116039, -0.0447318, -0.0150515, -0.0277406, + 0.0316596, 0.0118233, 0.0214762, 0.0293641}}, + + {{-0.0213783, 0.0350169, 0.000324794, 0.0276012, -0.0263374, -0.0371449, + 0.0446149, -0.0205474, 0.0103729, -0.0576349, -0.0150052, -0.0292043, + 0.0376827, 0.0136115, 0.0243435, 0.0354492}, + {-0.0204549, 0.0450315, -0.00117378, 0.0167673, -0.0375007, -0.0238314, + 0.038784, -0.0174034, 0.0131743, -0.0506589, -0.0048447, -0.0240239, + 0.0325789, 0.00790065, 0.0220157, 0.0333314}}, + + {{-0.0189322, 0.0464512, -0.00251373, 0.0225745, -0.0308346, -0.0317124, + 0.0460407, -0.0189395, 0.0149363, -0.0530162, -0.0150767, -0.0340193, + 0.0286833, 0.00824207, 0.0264887, 0.0305169}, + {-0.0264787, 0.0387855, -0.000764675, 0.0217599, -0.037537, -0.0335206, + 0.0431679, -0.0211424, 0.010203, -0.062785, -0.00832363, -0.025181, + 0.0412031, 0.0118723, 0.0239643, 0.0394009}}}; LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, /*use_cifg=*/false, /*use_peephole=*/true, @@ -1199,29 +1207,19 @@ TEST_P(LstmOpTest, NoCifg_Peephole_Projection_LayerNorm) { 0.3, 0.08, 0.07, 0.2, -0.4, 0.2}; lstm_input_ = { - {// Batch0: 3 (input_sequence_size) * 5 (n_input) - 0.7, 0.8, 0.1, 0.2, 0.3, // seq 0 - 0.8, 0.1, 0.2, 0.4, 0.5, // seq 1 - 0.2, 0.7, 0.7, 0.1, 0.7}, // seq 2 + {{0.7, 0.8, 0.1, 0.2, 0.3}, {0.3, 0.2, 0.9, 0.8, 0.1}}, - {// Batch1: 3 (input_sequence_size) * 5 (n_input) - 0.3, 0.2, 0.9, 0.8, 0.1, // seq 0 - 0.1, 0.5, 0.2, 0.4, 0.2, // seq 1 - 0.6, 0.9, 0.2, 0.5, 0.7}, // seq 2 + {{0.8, 0.1, 0.2, 0.4, 0.5}, {0.1, 0.5, 0.2, 0.4, 0.2}}, + + {{0.2, 0.7, 0.7, 0.1, 0.7}, {0.6, 0.9, 0.2, 0.5, 0.7}}, }; - lstm_golden_output_ = {{ - // Batch0: 3 (input_sequence_size) * 3 (n_output) - 0.0244077, 0.128027, -0.00170918, // seq 0 - 0.0137642, 0.140751, 0.0395835, // seq 1 - -0.00459231, 0.155278, 0.0837377, // seq 2 - }, - { - // Batch1: 3 (input_sequence_size) * 3 (n_output) - -0.00692428, 0.0848741, 0.063445, // seq 0 - -0.00403912, 0.139963, 0.072681, // seq 1 - 0.00752706, 0.161903, 0.0561371, // seq 2 - }}; + lstm_golden_output_ = { + {{0.0244077, 0.128027, -0.00170918}, {-0.00692428, 0.0848741, 0.063445}}, + + {{0.0137642, 0.140751, 0.0395835}, {-0.00403912, 0.139963, 0.072681}}, + + {{-0.00459231, 0.155278, 0.0837377}, {0.00752706, 0.161903, 0.0561371}}}; LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, /*use_cifg=*/false, /*use_peephole=*/true, @@ -1283,30 +1281,19 @@ TEST_P(LstmOpTest, Cifg_Peephole_Projection_LayerNorm) { projection_weights_ = {-0.1, 0.2, 0.01, -0.2, 0.1, 0.5, 0.3, 0.08, 0.07, 0.2, -0.4, 0.2}; - lstm_input_ = { - {// Batch0: 3 (input_sequence_size) * 5 (n_input) - 0.7, 0.8, 0.1, 0.2, 0.3, // seq 0 - 0.8, 0.1, 0.2, 0.4, 0.5, // seq 1 - 0.2, 0.7, 0.7, 0.1, 0.7}, // seq 2 + lstm_input_ = {{{0.7, 0.8, 0.1, 0.2, 0.3}, {0.3, 0.2, 0.9, 0.8, 0.1}}, - {// Batch1: 3 (input_sequence_size) * 5 (n_input) - 0.3, 0.2, 0.9, 0.8, 0.1, // seq 0 - 0.1, 0.5, 0.2, 0.4, 0.2, // seq 1 - 0.6, 0.9, 0.2, 0.5, 0.7}, // seq 2 - }; - lstm_golden_output_ = { - { - // Batch0: 3 (input_sequence_size) * 3 (n_output) - 0.02129706, 0.140816242, 0.0112733059, // seq 0 - 0.0132302344, 0.152308047, 0.0346313119, // seq 1 - -0.0123688057, 0.165790111, 0.0893077999, // seq 2 - }, - { - // Batch1: 3 (input_sequence_size) * 3 (n_output) - -0.0226350538, 0.0916948169, 0.0769175813, // seq 0 - -0.0269966982, 0.149707705, 0.094149217, // seq 1 - -0.0103429332, 0.173016444, 0.0720508844, // seq 2 - }}; + {{0.8, 0.1, 0.2, 0.4, 0.5}, {0.1, 0.5, 0.2, 0.4, 0.2}}, + + {{0.2, 0.7, 0.7, 0.1, 0.7}, {0.6, 0.9, 0.2, 0.5, 0.7}}}; + lstm_golden_output_ = {{{0.02129706, 0.140816242, 0.0112733059}, + {-0.0226350538, 0.0916948169, 0.0769175813}}, + + {{0.0132302344, 0.152308047, 0.0346313119}, + {-0.0269966982, 0.149707705, 0.094149217}}, + + {{-0.0123688057, 0.165790111, 0.0893077999}, + {-0.0103429332, 0.173016444, 0.0720508844}}}; LSTMOpModel lstm(n_batch, n_input, n_cell, n_output, /*use_cifg=*/true, /*use_peephole=*/true, From 69cd0c1a38dad691aa715bb68066d4507aee9bec Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Wed, 29 Jul 2020 20:43:10 -0700 Subject: [PATCH 1691/2522] Resolve to handle of the primary variable when saving a loaded function If a model is loaded under strategy, we wrap the loaded function so that when used in strategy.run(), the captured variables can resolve the ones in the current replica. We placed the guard that such loaded functions cannot be called outside of replica context, because the function may mutate the variables. However this breaks saving which happens in the cross replica context. This change checks if we're saving a non distributed version of the model, and if so, resolve the primary variable. PiperOrigin-RevId: 323926989 Change-Id: I5c24395b82e9601214e969f5889655b93839ba20 --- .../integration_test/saved_model_test.py | 39 ++++++------------- tensorflow/python/saved_model/BUILD | 1 + tensorflow/python/saved_model/load.py | 21 +++++++--- 3 files changed, 27 insertions(+), 34 deletions(-) diff --git a/tensorflow/python/distribute/integration_test/saved_model_test.py b/tensorflow/python/distribute/integration_test/saved_model_test.py index 4455e1f79f5..33d94435ff7 100644 --- a/tensorflow/python/distribute/integration_test/saved_model_test.py +++ b/tensorflow/python/distribute/integration_test/saved_model_test.py @@ -205,16 +205,14 @@ class SaveAndLoadForServingTest(test.TestCase, parameterized.TestCase): self.assertEqual(op.device, "") self.assertEqual(loaded().numpy(), 1.) - def test_model_with_loaded_layer_broken(self, strategy): - # If a model contains a layer loaded from SavedModel, including tf.hub - # layers, and if the model is created under tf.distribute.Strategy, it - # cannot be saved again. The saving won't error but the saved model cannot - # be used. + def test_model_with_loaded_layer(self, strategy): + # When a model is loaded under strategy, we wrap it so that when it's passed + # to strategy.run(), the captured variables resolve to the ones of the + # current replica. Since the saved tf.function may contain updates to the + # variables, we don't allow using the model outside of strategy.run(). # - # The reason is that if a saved model is loaded under - # tf.distribute.Strategy, the tf.functions are wrapped by - # saved_model._WrapperFunction, which generates an assertion node in the - # cross-replica context. + # That is to say, a loaded model is different from the original Python one. + # We need to test save-load-save-load to make sure things work correctly. class Layer(tf.Module): @@ -240,28 +238,13 @@ class SaveAndLoadForServingTest(test.TestCase, parameterized.TestCase): with strategy.scope(): m = Model(tf.saved_model.load(layer_export_dir)) export_dir = self.get_temp_dir() - # It happens to work if we save the model outside of strategy.scope(), - # because DistributedVariable.handle and _WrapperFunction behaved - # differently under the cross-replica context and the default strategy's - # replica context. tf.saved_model.save(m, export_dir) loaded = tf.saved_model.load(export_dir) - # got error, want [1., 1.] - if isinstance(strategy, tf.distribute.MirroredStrategy): - with self.assertRaisesRegex( - tf.errors.InvalidArgumentError, - "from the cross-replica context in an in-replica context"): - strategy.run(loaded) - else: - with self.assertRaisesRegex(tf.errors.InvalidArgumentError, - "No registered 'Placeholder'"): - strategy.run(loaded) - # TODO(b/160646235): Uncomment after fix. - # self.assertAllEqual( - # self.evaluate( - # strategy.experimental_local_results(strategy.run(loaded)), - # [1., 1.])) + self.assertAllEqual( + self.evaluate( + strategy.experimental_local_results(strategy.run(loaded))), + [1., 1.]) @combinations.generate( diff --git a/tensorflow/python/saved_model/BUILD b/tensorflow/python/saved_model/BUILD index 5e96ea596bf..27e0e984f5f 100644 --- a/tensorflow/python/saved_model/BUILD +++ b/tensorflow/python/saved_model/BUILD @@ -419,6 +419,7 @@ py_library( "//tensorflow/python:variables", "//tensorflow/python/distribute:distribute_lib", "//tensorflow/python/distribute:distribute_utils", + "//tensorflow/python/distribute:values_util", "//tensorflow/python/eager:context", "//tensorflow/python/eager:function", "//tensorflow/python/training/saving:checkpoint_options", diff --git a/tensorflow/python/saved_model/load.py b/tensorflow/python/saved_model/load.py index 0835481ab69..0c64275ce01 100644 --- a/tensorflow/python/saved_model/load.py +++ b/tensorflow/python/saved_model/load.py @@ -24,6 +24,7 @@ import os from tensorflow.core.protobuf import graph_debug_info_pb2 from tensorflow.python.distribute import distribute_utils from tensorflow.python.distribute import distribution_strategy_context as ds_context +from tensorflow.python.distribute import values_util from tensorflow.python.eager import context from tensorflow.python.eager import function from tensorflow.python.framework import constant_op @@ -90,18 +91,26 @@ class _WrapperFunction(function.ConcreteFunction): def _call_flat(self, args, captured_inputs, cancellation_manager=None): - def get_in_replica_handle(x): + def get_handle(x): return x.handle if distribute_utils.is_distributed_variable(x) else x - def get_cross_replica_handle(x): + def get_unused_handle(x): return _unused_handle() if distribute_utils.is_distributed_variable(x) \ else x - if ds_context.get_replica_context() is not None: # in-replica context - captured_inputs = list(map(get_in_replica_handle, captured_inputs)) + if (ds_context.get_replica_context() is not None or + values_util.is_saving_non_distributed()): + # If we're in the replica context or are saving a non-distributed version + # of the model, we resolve the captured variables to the corresponding + # resource handle. In both situation we call var.handle, but it has + # different behavior. In the replica context, var.handle resolves the + # replica local variable handle if the variable is replicated. When saving + # a non-distributed version of the model, var.handle resolves to the + # primary variable handle, since we only save one copy of a replicated + # variable. + captured_inputs = list(map(get_handle, captured_inputs)) else: # cross-replica context - captured_inputs = list( - map(get_cross_replica_handle, captured_inputs)) + captured_inputs = list(map(get_unused_handle, captured_inputs)) return super(_WrapperFunction, self)._call_flat(args, captured_inputs, cancellation_manager) From 640cdad89f7d3d2ab392526f2859e4f3e3f78721 Mon Sep 17 00:00:00 2001 From: Priya Gupta Date: Wed, 29 Jul 2020 20:49:34 -0700 Subject: [PATCH 1692/2522] Remove unused helper in values_test.py PiperOrigin-RevId: 323927733 Change-Id: I4ff7e83d9f7419430259e488456a61f440a061ac --- tensorflow/python/distribute/values_test.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/tensorflow/python/distribute/values_test.py b/tensorflow/python/distribute/values_test.py index 48b6b973a6b..447a8b427eb 100644 --- a/tensorflow/python/distribute/values_test.py +++ b/tensorflow/python/distribute/values_test.py @@ -1019,25 +1019,6 @@ def _make_replica_local(method, strategy=None): return v, replica_local -# TODO(b/144432582): Add variable aggregation type to combinations to simplify -# tests. -def strategy_and_run_tf_function_combinations(): - # Test the combination of different strategies and whether a tf.function - # is passed into strategy.run.""" - return combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - ], - mode=["graph", "eager"], - experimental_run_tf_function=[True, False]) + combinations.combine( - distribution=[ - strategy_combinations.tpu_strategy, - strategy_combinations.tpu_strategy_packed_var, - ], - mode=["graph", "eager"], - experimental_run_tf_function=[True]) - - class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase): def _assign_replica_local(self, v, new): From 561b8292a2b718c80dfabb2fd2b2f24d849cb696 Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Wed, 29 Jul 2020 21:38:00 -0700 Subject: [PATCH 1693/2522] Support multi-core execution. PiperOrigin-RevId: 323932743 Change-Id: I3235ddec06018eca323c974fabfcb094cceda10a --- tensorflow/core/tpu/kernels/tpu_execute_op.cc | 6 +- .../core/tpu/kernels/tpu_program_group.cc | 153 ++++++++---------- .../core/tpu/kernels/tpu_program_group.h | 22 +-- 3 files changed, 82 insertions(+), 99 deletions(-) diff --git a/tensorflow/core/tpu/kernels/tpu_execute_op.cc b/tensorflow/core/tpu/kernels/tpu_execute_op.cc index 0f451e52242..3522ace379a 100644 --- a/tensorflow/core/tpu/kernels/tpu_execute_op.cc +++ b/tensorflow/core/tpu/kernels/tpu_execute_op.cc @@ -649,8 +649,9 @@ Status TPUExecuteOp::DoWork(OpKernelContext* context) { tensorflow::down_cast( entry.tpu_program_group()); CHECK_NE(tpu_program_group, nullptr); + const int core_index = entry.core_index(); const TPUExecutableInfoProto& executable = - tpu_program_group->executable_info(); + tpu_program_group->executable_info(core_index); xla::Backend* const backend = node_context->backend(); xla::TransferManager* const transfer_manager = backend->transfer_manager(); @@ -749,8 +750,7 @@ Status TPUExecuteOp::DoWork(OpKernelContext* context) { // all subsequent writes to the program that could possibly clobber the memory // will depend on the program to finish. const TPUHostTransferInfoProto& host_transfer_info = - tpu_program_group->host_transfer_info(); - const int core_index = entry.core_index(); + tpu_program_group->host_transfer_info(core_index); TF_ASSIGN_OR_RETURN( xla::ExecutionOutput output, TPUExecute(executable, host_transfer_info, diff --git a/tensorflow/core/tpu/kernels/tpu_program_group.cc b/tensorflow/core/tpu/kernels/tpu_program_group.cc index 27b699e1acd..2ee926f9d2b 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_group.cc +++ b/tensorflow/core/tpu/kernels/tpu_program_group.cc @@ -98,55 +98,62 @@ StatusOr> CompileAheadOfTime( compilation_result, metadata, per_core_arg_shapes, per_core_output_shapes, per_core_variable_indices, device_assignment); } +} // namespace -Status CreateTpuProgramGroup( - absl::Span xla_tpu_programs, - TpuProgramGroupInterface* tpu_program_group_interface) { +void TpuProgramGroup::Initialize( + absl::Span xla_tpu_programs) { CHECK_GT(xla_tpu_programs.size(), 0); - TpuProgramGroup* tpu_program_group = - tensorflow::down_cast(tpu_program_group_interface); - CHECK_NE(tpu_program_group, nullptr); - tpu_program_group->set_tpu_programs(xla_tpu_programs); + set_tpu_programs(xla_tpu_programs); - // TODO(jiawenhao): Handle the case of xla_tpu_programs.size() > 1. - bool may_modify_variables; - TpuProgramApiFn()->TpuProgram_GetMayModifyVariablesFn(xla_tpu_programs[0], - &may_modify_variables); - tpu_program_group->set_may_modify_variables( - std::vector(1, may_modify_variables)); + std::vector may_modify_variables_array(xla_tpu_programs.size(), false); + std::vector executable_infos(xla_tpu_programs.size()); + std::vector host_transfer_infos( + xla_tpu_programs.size()); + std::vector hlo_metadatas(xla_tpu_programs.size()); + for (size_t i = 0; i < xla_tpu_programs.size(); ++i) { + const XLA_TpuProgram* xla_tpu_program = xla_tpu_programs[i]; + bool may_modify_variables; + TpuProgramApiFn()->TpuProgram_GetMayModifyVariablesFn( + xla_tpu_program, &may_modify_variables); + may_modify_variables_array[i] = may_modify_variables; - TpuSerializedProto serialized_executable_info; - TpuProgramApiFn()->TpuProgram_GetExecutableInfoFn( - xla_tpu_programs[0], &serialized_executable_info); - TPUExecutableInfoProto executable_info = - se_tpu::DeserializeProto( - serialized_executable_info); - tpu_program_group->set_executable_info(executable_info); - StreamExecutor_Tpu_FreeSerializedProto(&serialized_executable_info); + TpuSerializedProto serialized_executable_info; + TpuProgramApiFn()->TpuProgram_GetExecutableInfoFn( + xla_tpu_program, &serialized_executable_info); + TPUExecutableInfoProto executable_info = + se_tpu::DeserializeProto( + serialized_executable_info); + executable_infos[i] = executable_info; + StreamExecutor_Tpu_FreeSerializedProto(&serialized_executable_info); - TPUHostTransferInfoProto host_transfer_info; - TpuSerializedProto serialized_host_transfer_info; - TpuProgramApiFn()->TpuProgram_GetHostTransferInfoFn( - xla_tpu_programs[0], &serialized_host_transfer_info); - if (serialized_host_transfer_info.size > 0) { - host_transfer_info = se_tpu::DeserializeProto( - serialized_host_transfer_info); - StreamExecutor_Tpu_FreeSerializedProto(&serialized_host_transfer_info); + TPUHostTransferInfoProto host_transfer_info; + TpuSerializedProto serialized_host_transfer_info; + TpuProgramApiFn()->TpuProgram_GetHostTransferInfoFn( + xla_tpu_program, &serialized_host_transfer_info); + if (serialized_host_transfer_info.size > 0) { + host_transfer_info = se_tpu::DeserializeProto( + serialized_host_transfer_info); + StreamExecutor_Tpu_FreeSerializedProto(&serialized_host_transfer_info); + } + host_transfer_infos[i] = host_transfer_info; + + TpuSerializedProto serialized_hlo_metadata; + TpuProgramApiFn()->TpuProgram_GetHloMetadataFn(xla_tpu_program, + &serialized_hlo_metadata); + xla::HloProto hlo_metadata = + se_tpu::DeserializeProto(serialized_hlo_metadata); + hlo_metadatas[i] = hlo_metadata; + StreamExecutor_Tpu_FreeSerializedProto(&serialized_hlo_metadata); } - tpu_program_group->set_host_transfer_info(host_transfer_info); - TpuSerializedProto serialized_hlo_metadata; - TpuProgramApiFn()->TpuProgram_GetHloMetadataFn(xla_tpu_programs[0], - &serialized_hlo_metadata); - xla::HloProto hlo_metadata = - se_tpu::DeserializeProto(serialized_hlo_metadata); - tpu_program_group->set_hlo_metadata(hlo_metadata); - StreamExecutor_Tpu_FreeSerializedProto(&serialized_hlo_metadata); - - return Status::OK(); + may_modify_variables_ = may_modify_variables_array; + executable_infos_ = executable_infos; + host_transfer_infos_ = host_transfer_infos; + hlo_metadatas_ = hlo_metadatas; + RefreshHloMetadatasPtrs(); } -} // namespace +size_t TpuProgramGroup::program_count() const { return tpu_programs_.size(); } int64_t TpuProgramGroup::program_size() const { int64_t total_size = 0; @@ -201,12 +208,6 @@ void TpuProgramGroup::UnloadAndDestroyPrograms() { TF_RET_CHECK(per_core_output_shapes.size() == per_core_variable_indices.size()); - // TODO(henrytan): add an interface to TpuProgramGroupInterface to set - // may_modify_variables. - TpuProgramGroup* tpu_program_group = - tensorflow::down_cast(tpu_program_group_interface); - tpu_program_group->may_modify_variables_ = may_modify_variables; - // With shardable input/output pairs, XLA could generate separate // sharding/unsharding programs along with the main program. The // sharding/unsharding programs will be in nested entries of the AOT @@ -221,17 +222,20 @@ void TpuProgramGroup::UnloadAndDestroyPrograms() { TF_RET_CHECK(xla_tpu_programs.size() == 1 || xla_tpu_programs.size() == metadata.num_cores_per_replica()); - TF_RETURN_IF_ERROR( - CreateTpuProgramGroup(xla_tpu_programs, tpu_program_group)); + // TODO(henrytan): add an interface to TpuProgramGroupInterface to set + // may_modify_variables. + TpuProgramGroup* tpu_program_group = + tensorflow::down_cast(tpu_program_group_interface); + tpu_program_group->Initialize(xla_tpu_programs); + tpu_program_group->may_modify_variables_ = may_modify_variables; return Status::OK(); } TpuProgramGroup::TpuProgramGroup(TpuProgramGroup&& other) : may_modify_variables_(std::move(other.may_modify_variables_)), - host_compute_metadata_(std::move(other.host_compute_metadata_)), tpu_programs_(std::move(other.tpu_programs_)), - executable_info_(std::move(other.executable_info_)), - host_transfer_info_(std::move(other.host_transfer_info_)), + executable_infos_(std::move(other.executable_infos_)), + host_transfer_infos_(std::move(other.host_transfer_infos_)), hlo_metadatas_(std::move(other.hlo_metadatas_)) { RefreshHloMetadatasPtrs(); } @@ -277,16 +281,6 @@ void TpuProgramGroup::set_may_modify_variables( may_modify_variables_ = may_modify_variables; } -const tf2xla::HostComputeMetadata& TpuProgramGroup::host_compute_metadata() - const { - return host_compute_metadata_; -} - -void TpuProgramGroup::set_host_compute_metadata( - const tf2xla::HostComputeMetadata& host_compute_metadata) { - host_compute_metadata_ = host_compute_metadata; -} - const std::vector& TpuProgramGroup::tpu_programs() const { return tpu_programs_; } @@ -305,22 +299,18 @@ void TpuProgramGroup::set_tpu_programs( } } -const TPUExecutableInfoProto& TpuProgramGroup::executable_info() const { - return executable_info_; +const TPUExecutableInfoProto& TpuProgramGroup::executable_info( + int index) const { + CHECK_GE(index, 0); + CHECK_LT(index, executable_infos_.size()); + return executable_infos_[index]; } -void TpuProgramGroup::set_executable_info( - const TPUExecutableInfoProto& executable_info) { - executable_info_ = executable_info; -} - -const TPUHostTransferInfoProto& TpuProgramGroup::host_transfer_info() const { - return host_transfer_info_; -} - -void TpuProgramGroup::set_host_transfer_info( - const TPUHostTransferInfoProto& host_transfer_info) { - host_transfer_info_ = host_transfer_info; +const TPUHostTransferInfoProto& TpuProgramGroup::host_transfer_info( + int index) const { + CHECK_GE(index, 0); + CHECK_LT(index, host_transfer_infos_.size()); + return host_transfer_infos_[index]; } /*static*/ @@ -348,14 +338,13 @@ Status TpuProgramGroup::CompileAndBuild( TF_RET_CHECK(count == 1 || count == compilation_request.metadata().num_cores_per_replica()); - VLOG(1) << "CreateTpuProgramGroup"; - Status serialize_status = - CreateTpuProgramGroup(absl::MakeConstSpan(&xla_tpu_programs[0], count), - tpu_program_group_interface); - VLOG(1) << absl::StrCat("Run CreateTpuProgramGroup completed. StatusCode: ", - serialize_status.code()); + VLOG(1) << "Initialize TpuProgramGroup."; + TpuProgramGroup* tpu_program_group = + tensorflow::down_cast(tpu_program_group_interface); + tpu_program_group->Initialize( + absl::MakeConstSpan(&xla_tpu_programs[0], count)); TpuProgramApiFn()->TpuProgram_FreeArrayFn(xla_tpu_programs); - return serialize_status; + return status.status(); } } // namespace tpu diff --git a/tensorflow/core/tpu/kernels/tpu_program_group.h b/tensorflow/core/tpu/kernels/tpu_program_group.h index 5a36fa4e78d..bceede5ac07 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_group.h +++ b/tensorflow/core/tpu/kernels/tpu_program_group.h @@ -102,11 +102,14 @@ class TpuProgramGroup : public TpuProgramGroupInterface { const absl::optional& xla_device_assignment, TpuProgramGroupInterface* tpu_program_group_interface); + // Initializes `TpuProgramGroup` object with `xla_tpu_programs`. + void Initialize(absl::Span xla_tpu_programs); + TpuProgramGroup() = default; TpuProgramGroup(TpuProgramGroup&& other); TpuProgramGroup& operator=(TpuProgramGroup&&) = delete; - size_t program_count() const override { return tpu_programs_.size(); } + size_t program_count() const override; int64_t program_size() const override; @@ -120,21 +123,13 @@ class TpuProgramGroup : public TpuProgramGroupInterface { const std::vector& may_modify_variables() const override; void set_may_modify_variables(const std::vector& may_modify_variables); - const tf2xla::HostComputeMetadata& host_compute_metadata() const; - void set_host_compute_metadata( - const tf2xla::HostComputeMetadata& host_compute_metadata); - const std::vector& tpu_programs() const; const XLA_TpuProgram* tpu_program(int index) const; void set_tpu_programs(absl::Span tpu_programs); - const TPUExecutableInfoProto& executable_info() const; - void set_executable_info(const TPUExecutableInfoProto& executable_info); - - const TPUHostTransferInfoProto& host_transfer_info() const; - void set_host_transfer_info( - const TPUHostTransferInfoProto& host_transfer_info); + const TPUExecutableInfoProto& executable_info(int index) const; + const TPUHostTransferInfoProto& host_transfer_info(int index) const; void set_hlo_metadata(const xla::HloProto& hlo_metadata); const xla::HloProto* hlo_metadata(int index) const; absl::Span hlo_metadatas() const override; @@ -143,11 +138,10 @@ class TpuProgramGroup : public TpuProgramGroupInterface { void RefreshHloMetadatasPtrs(); std::vector may_modify_variables_; - tf2xla::HostComputeMetadata host_compute_metadata_; std::vector tpu_programs_; // Not owned. - TPUExecutableInfoProto executable_info_; - TPUHostTransferInfoProto host_transfer_info_; + std::vector executable_infos_; + std::vector host_transfer_infos_; // To be consistent with the TpuProgramGroupInterface::hlo_metadatas() // signature, we store HloProto values in hlo_metadatas_ when From cb2cf6f56fc538c935c422a32eb1fab530e170b3 Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Wed, 29 Jul 2020 22:23:50 -0700 Subject: [PATCH 1694/2522] Add back the lock in CollectiveKeys MWMS is using CollectiveKeys directly when broadcasting variable initial values, so the lock in CollectiveAllReduce is not enough. This change also acquires in all methods in CollectiveKeys, instead of get_group_key(). PiperOrigin-RevId: 323938375 Change-Id: I15ea98ff62952d0c3bd4d33f74067b4bad03d7cb --- .../python/distribute/cross_device_utils.py | 40 ++++++++++++++----- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/distribute/cross_device_utils.py b/tensorflow/python/distribute/cross_device_utils.py index 1d5c2c8f452..dc6dc4071bd 100644 --- a/tensorflow/python/distribute/cross_device_utils.py +++ b/tensorflow/python/distribute/cross_device_utils.py @@ -19,6 +19,8 @@ from __future__ import division from __future__ import print_function import collections as pycoll +import copy +import threading from tensorflow.python.distribute import all_reduce from tensorflow.python.distribute import values as value_lib @@ -244,6 +246,8 @@ class CollectiveKeys(object): "Graph key": an integer key that is unique key graph. This is used to support multiple graphs per client session. It must be non-zero and set in the `config` argument of each call to `session.run`. + + This class is thread safe. """ def __init__(self, @@ -264,6 +268,7 @@ class CollectiveKeys(object): assert op_instance_key_start != variable_instance_key_start self._op_instance_key = op_instance_key_start self._variable_instance_key = variable_instance_key_start + self._lock = threading.Lock() def get_group_key(self, devices): """Returns a group key for the set of devices. @@ -282,23 +287,36 @@ class CollectiveKeys(object): # task_type and task_id. names = sorted(['%s:%d' % (d.device_type, d.device_index) for d in parsed]) key_id = ','.join(names) - if key_id not in self._group_key_table: - new_key = self._group_key - self._group_key += 1 - self._group_key_table[key_id] = new_key - return self._group_key_table[key_id] + with self._lock: + if key_id not in self._group_key_table: + new_key = self._group_key + self._group_key += 1 + self._group_key_table[key_id] = new_key + return self._group_key_table[key_id] def get_op_instance_key(self): """Returns a new instance key for use in defining a collective op.""" - v = self._op_instance_key - self._op_instance_key += 1 - return v + with self._lock: + v = self._op_instance_key + self._op_instance_key += 1 + return v def get_variable_instance_key(self): """Returns a new instance key for use in creating a Variable.""" - v = self._variable_instance_key - self._variable_instance_key += 1 - return v + with self._lock: + v = self._variable_instance_key + self._variable_instance_key += 1 + return v + + def __deepcopy__(self, memo): + # distribute_coordinator deep-copies the strategy object, so + # CollectiveKeys needs to support deep copy as well. + copied = CollectiveKeys() + copied._group_key = self._group_key + copied._group_key_table = copy.deepcopy(self._group_key_table, memo) + copied._op_instance_key = self._op_instance_key + copied._variable_instance_key = self._variable_instance_key + return copied def build_collective_reduce(input_tensors, From 1d19b8f0601471250ae4b6e5630e6df9d0ae1320 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Wed, 29 Jul 2020 23:05:16 -0700 Subject: [PATCH 1695/2522] Export utility `inside_function` to the public API. PiperOrigin-RevId: 323943686 Change-Id: Ie50ea6880731feb99b0d4d243aab8e468bb7b86b --- tensorflow/python/framework/ops.py | 17 +++++++++++++++++ tensorflow/tools/api/golden/v2/tensorflow.pbtxt | 4 ++++ 2 files changed, 21 insertions(+) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 2d47618a62b..75a36f83fc5 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -5820,7 +5820,24 @@ def executing_eagerly_outside_functions(): return context.executing_eagerly() +@tf_export("inside_function", v1=[]) def inside_function(): + """Indicates whether the caller code is executing inside a `tf.function`. + + Returns: + Boolean, True if the caller code is executing inside a `tf.function` + rather than eagerly. + + Example: + + >>> tf.inside_function() + False + >>> @tf.function + ... def f(): + ... print(tf.inside_function()) + >>> f() + True + """ return get_default_graph().building_function diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt index 909cc8b4b91..7cf617ddf8b 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt @@ -744,6 +744,10 @@ tf_module { name: "init_scope" argspec: "args=[], varargs=None, keywords=None, defaults=None" } + member_method { + name: "inside_function" + argspec: "args=[], varargs=None, keywords=None, defaults=None" + } member_method { name: "is_tensor" argspec: "args=[\'x\'], varargs=None, keywords=None, defaults=None" From 9e0cfddcdb292bc0bf6456e96a7bce0f7c4e6199 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Jul 2020 23:15:10 -0700 Subject: [PATCH 1696/2522] [XLA] Add hash function to HloUse/HloPosition. PiperOrigin-RevId: 323944629 Change-Id: Ib66d374aea015fad3fbed063ed580034599afb18 --- tensorflow/compiler/xla/service/hlo_value.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_value.h b/tensorflow/compiler/xla/service/hlo_value.h index a1150ae299d..a721aabef76 100644 --- a/tensorflow/compiler/xla/service/hlo_value.h +++ b/tensorflow/compiler/xla/service/hlo_value.h @@ -57,6 +57,11 @@ struct HloPosition { (instruction->unique_id() == other.instruction->unique_id() && index < other.index); } + + template + friend H AbslHashValue(H h, const HloPosition& pos) { + return H::combine(std::move(h), pos.instruction->Hash(), pos.index); + } }; std::ostream& operator<<(std::ostream& out, const HloPosition& position); @@ -81,6 +86,12 @@ struct HloUse { } bool operator!=(const HloUse& other) const { return !(*this == other); } + + template + friend H AbslHashValue(H h, const HloUse& use) { + return H::combine(std::move(h), use.instruction, use.operand_index, + use.operand_number); + } }; std::ostream& operator<<(std::ostream& out, const HloUse& use); @@ -240,7 +251,8 @@ std::ostream& operator<<(std::ostream& out, const HloValueSet& hlo_value); // hold multiple HloValueSets. class InstructionValueSet : public ShapeTree { public: - InstructionValueSet(const Shape& shape) : ShapeTree(shape) {} + explicit InstructionValueSet(const Shape& shape) + : ShapeTree(shape) {} // Sets this value set to the union of the given value sets. Returns whether // this value set changed. From f636ca42bc7f8240ac6f29319e2a5f66b3d77b72 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 29 Jul 2020 23:34:08 -0700 Subject: [PATCH 1697/2522] [XLA] Add equality operator to MemorySpaceAssignment::Allocation. PiperOrigin-RevId: 323946338 Change-Id: I43f08843cfec038b1c0fd678e516c7bed9aba1cf --- .../xla/service/memory_space_assignment.cc | 18 ++++++++++++++++++ .../xla/service/memory_space_assignment.h | 2 ++ 2 files changed, 20 insertions(+) diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.cc b/tensorflow/compiler/xla/service/memory_space_assignment.cc index 7d3101c907f..84c24862d60 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment.cc +++ b/tensorflow/compiler/xla/service/memory_space_assignment.cc @@ -574,6 +574,24 @@ CostAnalysisPrefetchIntervalPicker::BufferIntervalAlternateMemoryBenefit( return cost_analysis_.GetMemoryBoundedness(interval); } +bool MemorySpaceAssignment::Allocation::operator==( + const MemorySpaceAssignment::Allocation& other) const { + return defining_position() == other.defining_position() && + uses() == other.uses() && memory_space() == other.memory_space() && + chunk() == other.chunk() && start_time() == other.start_time() && + end_time() == other.end_time() && + is_copy_allocation() == other.is_copy_allocation(); +} + +bool MemorySpaceAssignment::CopyAllocation::operator==( + const MemorySpaceAssignment::CopyAllocation& other) const { + return static_cast(*this) == + static_cast(other) && + copy_done_schedule_before() == other.copy_done_schedule_before() && + copy_start_schedule_after() == other.copy_start_schedule_after() && + copy_start() == other.copy_start() && copy_done() == other.copy_done(); +} + std::string MemorySpaceAssignment::AllocationValue::ToString() const { std::string out = absl::StrCat("computation = ", computation()->name()); absl::StrAppend(&out, "\n position:\n"); diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.h b/tensorflow/compiler/xla/service/memory_space_assignment.h index 4bb1632e5c9..87f7dd2ddae 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment.h +++ b/tensorflow/compiler/xla/service/memory_space_assignment.h @@ -515,6 +515,7 @@ class MemorySpaceAssignment { int64 start_time() const { return start_time_; } int64 end_time() const { return end_time_; } + bool operator==(const Allocation& other) const; virtual std::string ToString() const; protected: @@ -589,6 +590,7 @@ class MemorySpaceAssignment { copy_start_schedule_after_ = copy_start_schedule_after; } + bool operator==(const CopyAllocation& other) const; std::string ToString() const override; private: From 55c87a582d427de5778666333a0a188ba34cf5a0 Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Wed, 29 Jul 2020 23:58:39 -0700 Subject: [PATCH 1698/2522] Add a module level multi_process_runner.manager() It's more convienent since we often need to create objects then passing them to create MultiProcessRunner. PiperOrigin-RevId: 323948350 Change-Id: Iaa22f03e7564e5e53b4a58e119f2e81e05f21851 --- .../python/distribute/multi_process_runner.py | 60 +++++++++++-------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/tensorflow/python/distribute/multi_process_runner.py b/tensorflow/python/distribute/multi_process_runner.py index 7f653c0e2de..e5be4fa4a14 100644 --- a/tensorflow/python/distribute/multi_process_runner.py +++ b/tensorflow/python/distribute/multi_process_runner.py @@ -205,7 +205,7 @@ class MultiProcessRunner(object): self._outstanding_subprocess_count = 0 self._reading_threads = [] - self._manager = multiprocessing.Manager() + self._manager = manager() self._process_status_queue = self._manager.Queue() self._parent_to_sub_queue = self._manager.Queue() parties = sum(len(addresses) for addresses in self._cluster_spec.values()) @@ -568,29 +568,6 @@ class MultiProcessRunner(object): task_type, task_id) self._all_forced_terminated = True - def get_manager(self): - """Returns the multiprocessing manager object for concurrency tools. - - The manager object is useful as it controls a server process that holds - the python objects that can be shared across processes. This can be used - for parent-subprocess communication: - - ```python - mpr = multi_process_runner.MultiProcessRunner(...) - manager = mpr.get_manager() - some_event_happening_in_subprocess = manager.Event() - mpr.set_args(args=(some_event_happening_in_subprocess,)) - mpr.start() - some_event_happening_in_subprocess.wait() - # Do something that only should after some event happens in subprocess. - ``` - - Note that the user of multi_process_runner should not create additional - `multiprocessing.Manager()` objects; doing so can result in segfault in - some cases. - """ - return self._manager - class _Process(multi_process_lib.Process): """A modified `multiprocessing.Process` that can set up environment variables.""" @@ -991,6 +968,41 @@ def barrier(): return _barrier +_manager = None +_manager_lock = threading.Lock() + + +def manager(): + """Returns the multiprocessing manager object for concurrency tools. + + The manager object is useful as it controls a server process that holds + the python objects that can be shared across processes. This can be used + for parent-subprocess communication: + + ```python + manager = multi_process_runner.manager() + some_event_happening_in_subprocess = manager.Event() + mpr = multi_process_runner.MultiProcessRunner(proc_func, cluster_spec, + args=(some_event_happening_in_subprocess,)) + mpr.start() + some_event_happening_in_subprocess.wait() + # Do something that only should after some event happens in subprocess. + ``` + + Note that the user of multi_process_runner should not create additional + `multiprocessing.Manager()` objects; doing so can result in segfault in + some cases. + + This method should only be called after multi_process_runner.test_main() is + called. + """ + global _manager + with _manager_lock: + if _manager is None: + _manager = multiprocessing.Manager() + return _manager + + def test_main(): """Main function to be called within `__main__` of a test file.""" multi_process_lib.test_main() From 26dc0e3ee280a825d701a0a02f570d34ea9e6d78 Mon Sep 17 00:00:00 2001 From: Priya Gupta Date: Thu, 30 Jul 2020 00:04:00 -0700 Subject: [PATCH 1699/2522] Add multi worker mirrored strategy to DistributedVariable test. Some cases that are broken are currently skipped and being fixed in separate changes. PiperOrigin-RevId: 323948947 Change-Id: I3bd22d5309fb7be491b6036134b328883957e15c --- tensorflow/python/distribute/values_test.py | 24 ++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/distribute/values_test.py b/tensorflow/python/distribute/values_test.py index 447a8b427eb..a9d7dd2125e 100644 --- a/tensorflow/python/distribute/values_test.py +++ b/tensorflow/python/distribute/values_test.py @@ -26,6 +26,7 @@ import numpy as np from tensorflow.core.protobuf import config_pb2 from tensorflow.python import tf2 +from tensorflow.python.distribute import collective_all_reduce_strategy from tensorflow.python.distribute import combinations from tensorflow.python.distribute import distribute_lib from tensorflow.python.distribute import distribute_utils @@ -408,6 +409,9 @@ class DistributedDelegateTest(test.TestCase): strategy_combinations.tpu_strategy, strategy_combinations.tpu_strategy_packed_var, strategy_combinations.central_storage_strategy_with_two_gpus, + strategy_combinations.multi_worker_mirrored_2x1_cpu, + strategy_combinations.multi_worker_mirrored_2x1_gpu, + strategy_combinations.multi_worker_mirrored_2x2_gpu ], synchronization=[ variables_lib.VariableSynchronization.ON_READ, @@ -427,7 +431,19 @@ class DistributedVariableTest(test.TestCase, parameterized.TestCase): 1., synchronization=synchronization, aggregation=aggregation) self.assertIsInstance(v, variables_lib.Variable) - def testCheckpointing(self, distribution, synchronization, aggregation): + def testCheckpointing(self, distribution, synchronization, aggregation, mode): + # TODO(anjs): Remove this when b/162147051 is fixed. + if (isinstance(distribution, + collective_all_reduce_strategy.CollectiveAllReduceStrategy) + and aggregation == variables_lib.VariableAggregation.SUM + and synchronization == variables_lib.VariableSynchronization.ON_READ): + self.skipTest("b/162147051") + + if (isinstance(distribution, + collective_all_reduce_strategy.CollectiveAllReduceStrategy) + and mode == "graph"): + self.skipTest("MWMS combinations tests do not work well in graph mode.") + with distribution.scope(): v = variables_lib.Variable( constant_op.constant([1., 2., 3., 4]), @@ -594,6 +610,12 @@ class DistributedVariableTest(test.TestCase, parameterized.TestCase): self.skipTest("b/148689177: AggregatingVariable doesn't " "conform to Variable interface well") + # TODO(crccw): Unskip this in cl/323875091. + if (isinstance( + distribution, + collective_all_reduce_strategy.CollectiveAllReduceStrategy)): + self.skipTest("Being fixed in cl/323875091") + # tf.function requires the return value to be Tensors, which is not always # case for properties and methods of Variable, so we simply discard the # return values. From a1e78629fa0b461273d0ff4c5b45e01ee4b8836d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 30 Jul 2020 00:19:26 -0700 Subject: [PATCH 1700/2522] Fix to handle Reshape Layer in experimental TFLite writer library. Changes: 1. Updated handling of ReshapeParams. 2. Added write_lib tests to check different scenarios. PiperOrigin-RevId: 323950640 Change-Id: I20c4a5dcd3d80c591366edb7341634c0b13ffd45 --- .../writer/option_writer_generator.cc | 30 +++++++ .../lite/experimental/writer/writer_lib.cc | 4 +- .../experimental/writer/writer_lib_test.cc | 80 +++++++++++++++++++ 3 files changed, 112 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/experimental/writer/option_writer_generator.cc b/tensorflow/lite/experimental/writer/option_writer_generator.cc index a565422457c..898f4a95ef6 100644 --- a/tensorflow/lite/experimental/writer/option_writer_generator.cc +++ b/tensorflow/lite/experimental/writer/option_writer_generator.cc @@ -265,6 +265,29 @@ void GenerateImportForResizeBilinearOp(FILE* fp) { " }\n break;\n"); } +// Reshape Op infers output shape either from Parameter or from shape tensor +// that's is an additional input. When we have this additional shape tensor as +// input we don't have the parameter present in this layer. In case of more than +// one input we import an empty vector for the parameters. +void GenerateImportForReshapeOp(FILE* fp) { + fprintf(fp, + " case BuiltinOperator_RESHAPE: {\n" + " const auto* params = reinterpret_cast(builtin_op_data);\n" + " flatbuffers::Offset union_type;\n" + " if (node.inputs->size > 1) {\n" + " union_type = CreateReshapeOptions(*fbb).Union();\n" + " } else {\n" + " auto val0 = fbb->CreateVector(std::vector(params->shape, " + "params->shape + params->num_dimensions));\n" + " union_type = CreateReshapeOptions(*fbb, " + "val0).Union();\n" + " }\n" + " return std::make_pair(BuiltinOptions_ReshapeOptions, " + "union_type);\n" + " }\n break;\n"); +} + void GenerateImportForOp(FILE* fp, const std::string& op_name, const std::string& option_name, const std::string& option_type, @@ -276,6 +299,13 @@ void GenerateImportForOp(FILE* fp, const std::string& op_name, return; } + // Special case Reshape that may have 'new_shape' field missing from the + // parameters. + if (struct_name == "TfLiteReshapeParams") { + GenerateImportForReshapeOp(fp); + return; + } + fprintf(fp, " case BuiltinOperator_%s: {\n", op_name.c_str()); if (options->num_elems != 0) { fprintf(fp, diff --git a/tensorflow/lite/experimental/writer/writer_lib.cc b/tensorflow/lite/experimental/writer/writer_lib.cc index ed26c7f9038..2f509daa9cb 100644 --- a/tensorflow/lite/experimental/writer/writer_lib.cc +++ b/tensorflow/lite/experimental/writer/writer_lib.cc @@ -31,7 +31,7 @@ namespace tflite { std::pair> CreateBuiltinUnion( flatbuffers::FlatBufferBuilder* fbb, enum BuiltinOperator op, - void* builtin_op_data) { + void* builtin_op_data, const TfLiteNode& node) { switch (op) { #include "tensorflow/lite/experimental/writer/option_writer_generated.h" } @@ -82,7 +82,7 @@ SubgraphWriter::ExportOperators(flatbuffers::FlatBufferBuilder* fbb) { // builtin auto builtin_options_and_type = CreateBuiltinUnion( fbb, static_cast(registration.builtin_code), - node.builtin_data); + node.builtin_data, node); builtin_options = builtin_options_and_type.second; builtin_options_type = builtin_options_and_type.first; } else { diff --git a/tensorflow/lite/experimental/writer/writer_lib_test.cc b/tensorflow/lite/experimental/writer/writer_lib_test.cc index 41cca88ead7..fb59482f705 100644 --- a/tensorflow/lite/experimental/writer/writer_lib_test.cc +++ b/tensorflow/lite/experimental/writer/writer_lib_test.cc @@ -15,6 +15,9 @@ limitations under the License. #include "tensorflow/lite/experimental/writer/writer_lib.h" +#include +#include + #include #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/interpreter.h" @@ -184,6 +187,83 @@ TEST(Writer, PerTensorQuantizedModelTest) { CHECK_EQ(new_interpreter->AllocateTensors(), kTfLiteOk); } +struct ReshapeTestPattern { + int num_inputs; + bool is_param_valid; +}; + +class ReshapeLayerTest : public ::testing::TestWithParam {}; + +TEST_P(ReshapeLayerTest, ReshapeLayerTest) { + const auto param = GetParam(); + Interpreter interpreter; + const int total_tensors = param.num_inputs + 1; + interpreter.AddTensors(total_tensors); + int output_shape[] = {1, 2, 3}; + interpreter.SetTensorParametersReadWrite(/*tensor_index=*/0, kTfLiteFloat32, + /*name=*/"a", /*dims=*/{6}, + TfLiteQuantization()); + ASSERT_LE(param.num_inputs, 2); + if (param.num_inputs == 2) { + interpreter.SetTensorParametersReadOnly( + /*tensor_index=*/1, kTfLiteInt32, /*name=*/"b", /*dims=*/{3}, + TfLiteQuantization(), reinterpret_cast(output_shape), + sizeof(output_shape)); + } + interpreter.SetTensorParametersReadWrite(/*tensor_index=*/total_tensors - 1, + kTfLiteFloat32, /*name=*/"c", + /*dims=*/{3}, TfLiteQuantization()); + + std::vector input_tensors(param.num_inputs); + std::iota(input_tensors.begin(), input_tensors.end(), 0); + + interpreter.SetInputs(input_tensors); + interpreter.SetOutputs({total_tensors - 1}); + const char* initial_data = ""; + tflite::ops::builtin::BuiltinOpResolver resolver; + TfLiteReshapeParams* builtin_data = reinterpret_cast( + malloc(sizeof(TfLiteReshapeParams))); + if (param.is_param_valid) { + builtin_data->num_dimensions = 3; + for (int dim = 0; dim < builtin_data->num_dimensions; ++dim) { + builtin_data->shape[dim] = output_shape[dim]; + } + } + const TfLiteRegistration* reg = resolver.FindOp(BuiltinOperator_RESHAPE, 1); + interpreter.AddNodeWithParameters(input_tensors, + /*outputs=*/{total_tensors - 1}, + initial_data, /*init_data_size=*/0, + reinterpret_cast(builtin_data), reg); + + SubgraphWriter writer(&interpreter.primary_subgraph()); + std::stringstream ss; + ss << "/tmp/test_reshape_" << param.num_inputs << param.is_param_valid + << ".tflite"; + std::string filename = ss.str(); + writer.Write(filename); + std::unique_ptr model = + FlatBufferModel::BuildFromFile(filename.c_str()); + InterpreterBuilder builder(*model, resolver); + std::unique_ptr new_interpreter; + builder(&new_interpreter); + ASSERT_EQ(new_interpreter->AllocateTensors(), kTfLiteOk); +} + +INSTANTIATE_TEST_SUITE_P( + Writer, ReshapeLayerTest, + ::testing::Values(ReshapeTestPattern{/*num_inputs=*/2, + /*is_param_valid=*/true}, + ReshapeTestPattern{/*num_inputs=*/2, + /*is_param_valid=*/false}, + ReshapeTestPattern{/*num_inputs=*/1, + /*is_param_valid=*/true}), + [](const ::testing::TestParamInfo& info) { + std::stringstream ss; + ss << "num_inputs_" << info.param.num_inputs << "_valid_param_" + << info.param.is_param_valid; + std::string name = ss.str(); + return name; + }); } // namespace tflite int main(int argc, char** argv) { From e4fe5dc57ba7a408c0d5c36cfb0682fbeb9e6357 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 29 Jul 2020 16:57:39 +0200 Subject: [PATCH 1701/2522] Fix environment for protobuf compilation Patch using upstream https://github.com/grpc/grpc/pull/23664 --- tensorflow/workspace.bzl | 1 + third_party/grpc/generate_cc_env_fix.patch | 10 ++++++++++ 2 files changed, 11 insertions(+) create mode 100644 third_party/grpc/generate_cc_env_fix.patch diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 76d23dd81ab..b9c187733f6 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -688,6 +688,7 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): sha256 = "b956598d8cbe168b5ee717b5dafa56563eb5201a947856a6688bbeac9cac4e1f", strip_prefix = "grpc-b54a5b338637f92bfcf4b0bc05e0f57a5fd8fadd", system_build_file = clean_dep("//third_party/systemlibs:grpc.BUILD"), + patch_file = clean_dep("//third_party/grpc:generate_cc_env_fix.patch"), system_link_files = { "//third_party/systemlibs:BUILD": "bazel/BUILD", "//third_party/systemlibs:grpc.BUILD": "src/compiler/BUILD", diff --git a/third_party/grpc/generate_cc_env_fix.patch b/third_party/grpc/generate_cc_env_fix.patch new file mode 100644 index 00000000000..51832fe9628 --- /dev/null +++ b/third_party/grpc/generate_cc_env_fix.patch @@ -0,0 +1,10 @@ +--- a/bazel/generate_cc.bzl ++++ b/bazel/generate_cc.bzl +@@ -141,6 +141,7 @@ def generate_cc_impl(ctx): + outputs = out_files, + executable = ctx.executable._protoc, + arguments = arguments, ++ use_default_shell_env = True, + ) + + return struct(files = depset(out_files)) From c6023a81d4976f6ff79f957925364d21d7884004 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Thu, 30 Jul 2020 00:50:52 -0700 Subject: [PATCH 1702/2522] Enable mlir generated GPU kernels by default for cuda builds. PiperOrigin-RevId: 323954095 Change-Id: I93fbc5c77d6e91ef8e2bf198614f1dda7c9a9380 --- .bazelrc | 5 +++++ tensorflow/core/kernels/mlir_generated/BUILD | 4 ++-- tensorflow/core/kernels/mlir_generated/build_defs.bzl | 4 ++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.bazelrc b/.bazelrc index 6a448b267e0..da988e4c928 100644 --- a/.bazelrc +++ b/.bazelrc @@ -173,6 +173,11 @@ build:using_cuda --define=using_cuda=true build:using_cuda --action_env TF_NEED_CUDA=1 build:using_cuda --crosstool_top=@local_config_cuda//crosstool:toolchain +# Enable the mlir generated GPU kernels only for cuda builds. +build --define=tensorflow_enable_mlir_generated_gpu_kernels=0 +# This is a more specific option, so it takes precedence over the line above for cuda builds. +build:using_cuda --define=tensorflow_enable_mlir_generated_gpu_kernels=1 + # This config refers to building CUDA op kernels with nvcc. build:cuda --config=using_cuda build:cuda --define=using_cuda_nvcc=true diff --git a/tensorflow/core/kernels/mlir_generated/BUILD b/tensorflow/core/kernels/mlir_generated/BUILD index 79ccda50c87..9f3efe9d972 100644 --- a/tensorflow/core/kernels/mlir_generated/BUILD +++ b/tensorflow/core/kernels/mlir_generated/BUILD @@ -18,9 +18,9 @@ package( ) config_setting( - name = "mlir_generated_gpu_kernels_enabled", + name = "mlir_generated_gpu_kernels_disabled", define_values = { - "tensorflow_enable_mlir_generated_gpu_kernels": "1", + "tensorflow_enable_mlir_generated_gpu_kernels": "0", }, ) diff --git a/tensorflow/core/kernels/mlir_generated/build_defs.bzl b/tensorflow/core/kernels/mlir_generated/build_defs.bzl index 3426aba94a4..2bf6e8fa3bb 100644 --- a/tensorflow/core/kernels/mlir_generated/build_defs.bzl +++ b/tensorflow/core/kernels/mlir_generated/build_defs.bzl @@ -4,8 +4,8 @@ load("@local_config_cuda//cuda:build_defs.bzl", "cuda_gpu_architectures", "if_cu def if_mlir_generated_gpu_kernels_enabled(if_true, if_false = []): return select({ - "//tensorflow/core/kernels/mlir_generated:mlir_generated_gpu_kernels_enabled": if_true, - "//conditions:default": if_false, + "//tensorflow/core/kernels/mlir_generated:mlir_generated_gpu_kernels_disabled": if_false, + "//conditions:default": if_true, }) def _lookup_file(filegroup, path): From 306aa1d7632492eb2d909239227696362fba531b Mon Sep 17 00:00:00 2001 From: Juho Ha Date: Thu, 30 Jul 2020 01:50:40 -0700 Subject: [PATCH 1703/2522] Add Hexagon library to Android benchmark apps libhexagon_interface.so is added to the app, but libhexagon_nn_skel*.so files are not added by licence issue. Users who want to build this app should create a build target for the library by themselves. --hexagon_lib_path is automatically appended when --use_hexagon option is used. PiperOrigin-RevId: 323960185 Change-Id: I80a7b96e93a53b37e2c790c0498e5878dc8ecf63 --- tensorflow/lite/special_rules.bzl | 17 ++++++++++++++ tensorflow/lite/tools/benchmark/android/BUILD | 20 ++++++++++++++++- .../lite/tools/benchmark/android/README.md | 20 +++++++++++++++++ .../benchmark/BenchmarkModelActivity.java | 4 ++++ .../firebase/android/AndroidManifest.xml | 2 +- .../experimental/firebase/android/BUILD | 22 +++++++++++++++++-- .../android/jni/benchmark_model_jni.cc | 21 +++++++++++++----- .../benchmark/firebase/BenchmarkModel.java | 11 ++++++---- .../firebase/BenchmarkModelActivity.java | 2 +- 9 files changed, 105 insertions(+), 14 deletions(-) diff --git a/tensorflow/lite/special_rules.bzl b/tensorflow/lite/special_rules.bzl index 5053eb2a16b..cc5fd15e5d5 100644 --- a/tensorflow/lite/special_rules.bzl +++ b/tensorflow/lite/special_rules.bzl @@ -51,3 +51,20 @@ def if_nnapi(supported, not_supported = [], supported_android = None): def tflite_hexagon_mobile_test(name): """This is a no-op outside of Google.""" pass + +def tflite_hexagon_nn_skel_libraries(): + """This is a no-op outside of Google due to license agreement process. + + Developers who want to use hexagon nn skel libraries can download + and install the libraries as the guided in + https://www.tensorflow.org/lite/performance/hexagon_delegate#step_2_add_hexagon_libraries_to_your_android_app. + For example, if you installed the libraries at third_party/hexagon_nn_skel + and created third_party/hexagon_nn_skel/BUILD with a build target, + filegroup( + name = "libhexagon_nn_skel", + srcs = glob(["*.so"]), + ) + you need to modify this macro to specifiy the build target. + return ["//third_party/hexagon_nn_skel:libhexagon_nn_skel"] + """ + return [] diff --git a/tensorflow/lite/tools/benchmark/android/BUILD b/tensorflow/lite/tools/benchmark/android/BUILD index b9b9331fcb8..6645b730bac 100644 --- a/tensorflow/lite/tools/benchmark/android/BUILD +++ b/tensorflow/lite/tools/benchmark/android/BUILD @@ -2,6 +2,7 @@ # BenchmarkModel Android harness for TensorFlow Lite benchmarks. load("//tensorflow/lite:build_def.bzl", "tflite_jni_binary") +load("//tensorflow/lite:special_rules.bzl", "tflite_hexagon_nn_skel_libraries") load("@build_bazel_rules_android//android:rules.bzl", "android_binary") package( @@ -23,7 +24,10 @@ android_binary( # can't be built. We need to prevent the build system from trying to # use the target in that case. tags = ["manual"], - deps = [":tensorflowlite_benchmark_native"], + deps = [ + ":hexagon_libs", + ":tensorflowlite_benchmark_native", + ], ) tflite_jni_binary( @@ -43,3 +47,17 @@ cc_library( srcs = ["libtensorflowlite_benchmark.so"], visibility = ["//visibility:private"], ) + +cc_library( + name = "hexagon_libs", + srcs = select({ + "//tensorflow:android_arm64": [ + "//tensorflow/lite/delegates/hexagon/hexagon_nn:libhexagon_interface.so", + ] + tflite_hexagon_nn_skel_libraries(), + "//tensorflow:android_arm": [ + "//tensorflow/lite/delegates/hexagon/hexagon_nn:libhexagon_interface.so", + ] + tflite_hexagon_nn_skel_libraries(), + "//conditions:default": [], + }), + visibility = ["//visibility:private"], +) diff --git a/tensorflow/lite/tools/benchmark/android/README.md b/tensorflow/lite/tools/benchmark/android/README.md index 00092c4a44f..f73939c96bf 100644 --- a/tensorflow/lite/tools/benchmark/android/README.md +++ b/tensorflow/lite/tools/benchmark/android/README.md @@ -31,6 +31,26 @@ bazel build -c opt \ tensorflow/lite/tools/benchmark/android:benchmark_model ``` +(Optional) To enable Hexagon delegate with `--use_hexagon=true` option, you can +download and install the libraries as the guided in [hexagon delegate] +(https://www.tensorflow.org/lite/performance/hexagon_delegate#step_2_add_hexagon_libraries_to_your_android_app) +page. For example, if you installed the libraries at third_party/hexagon_nn_skel +and created third_party/hexagon_nn_skel/BUILD with a build target, + +``` +filegroup( + name = "libhexagon_nn_skel", + srcs = glob(["*.so"]), +) +``` + +you need to modify tflite_hexagon_nn_skel_libraries macro in +tensorflow/lite/special_rules.bzl to specifiy the build target. + +``` +return ["//third_party/hexagon_nn_skel:libhexagon_nn_skel"] +``` + (2) Connect your phone. Install the benchmark APK to your phone with adb: ``` diff --git a/tensorflow/lite/tools/benchmark/android/src/org/tensorflow/lite/benchmark/BenchmarkModelActivity.java b/tensorflow/lite/tools/benchmark/android/src/org/tensorflow/lite/benchmark/BenchmarkModelActivity.java index 6833d70931b..baf981f6680 100644 --- a/tensorflow/lite/tools/benchmark/android/src/org/tensorflow/lite/benchmark/BenchmarkModelActivity.java +++ b/tensorflow/lite/tools/benchmark/android/src/org/tensorflow/lite/benchmark/BenchmarkModelActivity.java @@ -36,6 +36,10 @@ public class BenchmarkModelActivity extends Activity { Intent intent = getIntent(); Bundle bundle = intent.getExtras(); String args = bundle.getString(ARGS_INTENT_KEY_0, bundle.getString(ARGS_INTENT_KEY_1)); + if (args.contains("--use_hexagon=true") || args.contains("--use_hexagon=1")) { + // Users should not specify this argument. + args = args + " --hexagon_lib_path=" + getApplicationInfo().nativeLibraryDir; + } Log.i(TAG, "Running TensorFlow Lite benchmark with args: " + args); Trace.beginSection("TFLite Benchmark Model"); diff --git a/tensorflow/lite/tools/benchmark/experimental/firebase/android/AndroidManifest.xml b/tensorflow/lite/tools/benchmark/experimental/firebase/android/AndroidManifest.xml index 563e90eb93f..647204e2f04 100644 --- a/tensorflow/lite/tools/benchmark/experimental/firebase/android/AndroidManifest.xml +++ b/tensorflow/lite/tools/benchmark/experimental/firebase/android/AndroidManifest.xml @@ -1,6 +1,6 @@ | g0 g1 g2 g3 h0 h1 h2 h3 | + // | e0 e1 e2 e3 e4 e5 e6 e7 | | a4 a5 a6 a7 b4 b5 b6 b7 | + // | f0 f1 f2 f3 f4 f5 f6 f7 | | c4 c5 c6 c7 d4 d5 d6 d7 | + // | g0 g1 g2 g3 g4 g5 g6 g7 | | e4 e5 e6 e7 f4 f5 f6 f7 | + // | h0 h1 h2 h3 h4 h5 h6 h7 | | g4 g5 g6 g7 h4 h5 h6 h7 | + + for (int y = 0; y < dst_channels; y++) { + int x = 0; + for (; x + 4 <= src_channels; x += 4) { + const int idx_data_0 = src_channels * y + x; + S filter = S(weights.data[idx_data_0], weights.data[idx_data_0 + 1], + weights.data[idx_data_0 + 2], weights.data[idx_data_0 + 3]); + dst[y + padded_dst_channels * x / 4] = filter; + } + + // If the width is not a multiple of 4, padding is required and the padded + // region is filled with zeros. + if (src_channels != padded_src_channels) { + const int idx_data_0 = src_channels * y + x; + + S filter = S(x < src_channels ? weights.data[idx_data_0] : 0.0, + x + 1 < src_channels ? weights.data[idx_data_0 + 1] : 0.0, + x + 2 < src_channels ? weights.data[idx_data_0 + 2] : 0.0, + x + 3 < src_channels ? weights.data[idx_data_0 + 3] : 0.0); + dst[y + padded_dst_channels * x / 4] = filter; + } + } + + // Fill the padded columns with zeros. + for (int y = dst_channels; y < padded_dst_channels; y++) { + for (int x = 0; x < padded_src_channels; x += 4) { + dst[y + padded_dst_channels * x / 4] = S(0.0); + } + } +} + class FullyConnected : public GPUOperation { public: FullyConnected() = default; @@ -59,10 +111,6 @@ class FullyConnected : public GPUOperation { absl::Status UploadWeights(const tflite::gpu::Tensor& weights, CLContext* context); - template - void RearrangeWeights(const tflite::gpu::Tensor& weights, - absl::Span dst); - std::string GetFullyConnectedKernelCode(const OperationDef& op_def, const int3& work_group_size); }; @@ -85,13 +133,13 @@ absl::Status FullyConnected::UploadWeights( Buffer weights_buffer; if (f32_weights) { std::vector gpu_data(dst_depth * src_depth * 4); - RearrangeWeights(weights, absl::MakeSpan(gpu_data)); + RearrangeFCWeightsToIOO4I4(weights, absl::MakeSpan(gpu_data)); RETURN_IF_ERROR(CreateReadOnlyBuffer(float4_size * elements_count, gpu_data.data(), context, &weights_buffer)); } else { std::vector gpu_data(dst_depth * src_depth * 4); - RearrangeWeights(weights, absl::MakeSpan(gpu_data)); + RearrangeFCWeightsToIOO4I4(weights, absl::MakeSpan(gpu_data)); RETURN_IF_ERROR(CreateReadOnlyBuffer(float4_size * elements_count, gpu_data.data(), context, &weights_buffer)); @@ -104,37 +152,6 @@ absl::Status FullyConnected::UploadWeights( return absl::OkStatus(); } -template -void FullyConnected::RearrangeWeights( - const tflite::gpu::Tensor& weights, absl::Span dst) { - const int src_depth = DivideRoundUp(weights.shape.i, 4); - const int dst_depth = DivideRoundUp(weights.shape.o, 4); - int counter = 0; - - for (int s = 0; s < src_depth; ++s) { - for (int d = 0; d < dst_depth; ++d) { - S filters[4]; - for (int i = 0; i < 4; ++i) { - for (int j = 0; j < 4; ++j) { - const int dst_ch = d * 4 + i; - const int src_ch = s * 4 + j; - if (dst_ch < weights.shape.o && src_ch < weights.shape.i) { - const int f_index = - weights.shape.LinearIndex({dst_ch, 0, 0, src_ch}); - filters[i][j] = weights.data[f_index]; - } else { - filters[i][j] = 0.0; - } - } - } - dst[counter++] = filters[0]; - dst[counter++] = filters[1]; - dst[counter++] = filters[2]; - dst[counter++] = filters[3]; - } - } -} - absl::Status CreateFullyConnected(const CreationContext& creation_context, const OperationDef& definition, const FullyConnectedAttributes& attr, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected_test.cc index 4525d49e783..900b244ceb2 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected_test.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/operations.h" #include "tensorflow/lite/delegates/gpu/common/status.h" +using ::testing::ElementsAreArray; using ::testing::FloatNear; using ::testing::Pointwise; @@ -61,6 +62,60 @@ TEST_F(OpenCLOperationTest, FullyConnected) { } } +TEST_F(OpenCLOperationTest, RearrageWeights) { + tflite::gpu::Tensor weights; + weights.shape = OHWI(8, 1, 1, 8); + weights.data = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0, 11.0, + 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 20.0, 21.0, 22.0, 23.0, + 24.0, 25.0, 26.0, 27.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, + 36.0, 37.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, + 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 60.0, 61.0, + 62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 70.0, 71.0, 72.0, 73.0, + 74.0, 75.0, 76.0, 77.0}; + + std::vector expected_rearranged_data = { + 0.0, 1.0, 2.0, 3.0, 10.0, 11.0, 12.0, 13.0, 20.0, 21.0, 22.0, + 23.0, 30.0, 31.0, 32.0, 33.0, 40.0, 41.0, 42.0, 43.0, 50.0, 51.0, + 52.0, 53.0, 60.0, 61.0, 62.0, 63.0, 70.0, 71.0, 72.0, 73.0, 4.0, + 5.0, 6.0, 7.0, 14.0, 15.0, 16.0, 17.0, 24.0, 25.0, 26.0, 27.0, + 34.0, 35.0, 36.0, 37.0, 44.0, 45.0, 46.0, 47.0, 54.0, 55.0, 56.0, + 57.0, 64.0, 65.0, 66.0, 67.0, 74.0, 75.0, 76.0, 77.0, + }; + + std::vector data(8 * 8); + float4* data_ptr = static_cast(static_cast(data.data())); + RearrangeFCWeightsToIOO4I4(weights, absl::MakeSpan(data_ptr, 8 * 8 / 4)); + + EXPECT_THAT(data, ElementsAreArray(expected_rearranged_data)); +} + +TEST_F(OpenCLOperationTest, RearrageWeightsWhenPaddingIsRequired) { + tflite::gpu::Tensor weights; + weights.shape = OHWI(7, 1, 1, 7); + weights.data = { + 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 10.0, 11.0, 12.0, + 13.0, 14.0, 15.0, 16.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, + 26.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 40.0, 41.0, + 42.0, 43.0, 44.0, 45.0, 46.0, 50.0, 51.0, 52.0, 53.0, 54.0, + 55.0, 56.0, 60.0, 61.0, 62.0, 63.0, 64.0, 65.0, 66.0, + }; + + std::vector expected_rearranged_data = { + 0.0, 1.0, 2.0, 3.0, 10.0, 11.0, 12.0, 13.0, 20.0, 21.0, 22.0, + 23.0, 30.0, 31.0, 32.0, 33.0, 40.0, 41.0, 42.0, 43.0, 50.0, 51.0, + 52.0, 53.0, 60.0, 61.0, 62.0, 63.0, 0.0, 0.0, 0.0, 0.0, 4.0, + 5.0, 6.0, 0.0, 14.0, 15.0, 16.0, 0.0, 24.0, 25.0, 26.0, 0.0, + 34.0, 35.0, 36.0, 0.0, 44.0, 45.0, 46.0, 0.0, 54.0, 55.0, 56.0, + 0.0, 64.0, 65.0, 66.0, 0.0, 0.0, 0.0, 0.0, 0.0, + }; + + std::vector data(8 * 8); + float4* data_ptr = static_cast(static_cast(data.data())); + RearrangeFCWeightsToIOO4I4(weights, absl::MakeSpan(data_ptr, 8 * 8 / 4)); + + EXPECT_THAT(data, ElementsAreArray(expected_rearranged_data)); +} + } // namespace } // namespace cl } // namespace gpu From dd2b59ae182b731b1f0a913b0cfb9ed673f778c6 Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Fri, 31 Jul 2020 09:01:10 -0700 Subject: [PATCH 1847/2522] Move session_ops_test.py under ./v1_compat_tests/ Session ops are V1 API. PiperOrigin-RevId: 324217477 Change-Id: I11e03dc00dc0de23d327970715b6a387372500ee --- tensorflow/python/kernel_tests/BUILD | 12 ------------ .../python/kernel_tests/v1_compat_tests/BUILD | 15 +++++++++++++++ .../{ => v1_compat_tests}/session_ops_test.py | 2 +- 3 files changed, 16 insertions(+), 13 deletions(-) rename tensorflow/python/kernel_tests/{ => v1_compat_tests}/session_ops_test.py (99%) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 48d88a2b61c..1e093af1019 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2455,18 +2455,6 @@ cuda_py_test( ], ) -cuda_py_test( - name = "session_ops_test", - size = "small", - srcs = ["session_ops_test.py"], - deps = [ - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:math_ops", - "//tensorflow/python:session_ops", - ], -) - cuda_py_test( name = "shape_ops_test", size = "medium", diff --git a/tensorflow/python/kernel_tests/v1_compat_tests/BUILD b/tensorflow/python/kernel_tests/v1_compat_tests/BUILD index 4529dd19645..9cd0f4df101 100644 --- a/tensorflow/python/kernel_tests/v1_compat_tests/BUILD +++ b/tensorflow/python/kernel_tests/v1_compat_tests/BUILD @@ -1,3 +1,6 @@ +load("//tensorflow:tensorflow.bzl", "cuda_py_test") + +# buildifier: disable=same-origin-load load("//tensorflow:tensorflow.bzl", "tf_py_test") package( @@ -15,3 +18,15 @@ tf_py_test( "//tensorflow/python:variables", ], ) + +cuda_py_test( + name = "session_ops_test", + size = "small", + srcs = ["session_ops_test.py"], + deps = [ + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:math_ops", + "//tensorflow/python:session_ops", + ], +) diff --git a/tensorflow/python/kernel_tests/session_ops_test.py b/tensorflow/python/kernel_tests/v1_compat_tests/session_ops_test.py similarity index 99% rename from tensorflow/python/kernel_tests/session_ops_test.py rename to tensorflow/python/kernel_tests/v1_compat_tests/session_ops_test.py index 7d422278408..15f6afc6eaa 100644 --- a/tensorflow/python/kernel_tests/session_ops_test.py +++ b/tensorflow/python/kernel_tests/v1_compat_tests/session_ops_test.py @@ -1,4 +1,4 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 79c810942f3ea77595996feb3452979955b9b5bc Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Fri, 31 Jul 2020 09:01:13 -0700 Subject: [PATCH 1848/2522] Remove @test_util.run_deprecated_v1 in spacetodepth_op_test.py PiperOrigin-RevId: 324217490 Change-Id: Idcf1c371afcb11c0b7291f590c5ea8e71aa0c48d --- .../kernel_tests/spacetodepth_op_test.py | 59 ++++++++----------- 1 file changed, 23 insertions(+), 36 deletions(-) diff --git a/tensorflow/python/kernel_tests/spacetodepth_op_test.py b/tensorflow/python/kernel_tests/spacetodepth_op_test.py index 976880c10ee..762a644b065 100644 --- a/tensorflow/python/kernel_tests/spacetodepth_op_test.py +++ b/tensorflow/python/kernel_tests/spacetodepth_op_test.py @@ -22,11 +22,12 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_array_ops -from tensorflow.python.ops import gradient_checker +from tensorflow.python.ops import gradient_checker_v2 from tensorflow.python.ops import math_ops from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging @@ -159,79 +160,72 @@ class SpaceToDepthTest(test.TestCase): # Error handling: - @test_util.run_deprecated_v1 def testInputWrongDimMissingDepth(self): # The input is missing the last dimension ("depth") x_np = [[[1, 2], [3, 4]]] block_size = 2 - with self.assertRaises(ValueError): + with self.assertRaises((ValueError, errors.InvalidArgumentError)): out_tf = array_ops.space_to_depth(x_np, block_size) self.evaluate(out_tf) - @test_util.run_deprecated_v1 def testInputWrongDimMissingBatch(self): # The input is missing the first dimension ("batch") x_np = [[[1], [2]], [[3], [4]]] block_size = 2 - with self.assertRaises(ValueError): + with self.assertRaises((ValueError, errors.InvalidArgumentError)): _ = array_ops.space_to_depth(x_np, block_size) - @test_util.run_deprecated_v1 def testBlockSize0(self): # The block size is 0. x_np = [[[[1], [2]], [[3], [4]]]] block_size = 0 - with self.assertRaises(ValueError): + with self.assertRaises((ValueError, errors.InvalidArgumentError)): out_tf = array_ops.space_to_depth(x_np, block_size) self.evaluate(out_tf) - @test_util.run_deprecated_v1 def testBlockSizeOne(self): # The block size is 1. The block size needs to be > 1. x_np = [[[[1], [2]], [[3], [4]]]] block_size = 1 - with self.assertRaises(ValueError): + with self.assertRaises((ValueError, errors.InvalidArgumentError)): out_tf = array_ops.space_to_depth(x_np, block_size) self.evaluate(out_tf) - @test_util.run_deprecated_v1 def testBlockSizeLarger(self): # The block size is too large for this input. x_np = [[[[1], [2]], [[3], [4]]]] block_size = 10 - with self.assertRaises(ValueError): + with self.assertRaises((ValueError, errors.InvalidArgumentError)): out_tf = array_ops.space_to_depth(x_np, block_size) self.evaluate(out_tf) - @test_util.run_deprecated_v1 def testBlockSizeNotDivisibleWidth(self): # The block size divides width but not height. x_np = [[[[1], [2], [3]], [[3], [4], [7]]]] block_size = 3 - with self.assertRaises(ValueError): + with self.assertRaises((ValueError, errors.InvalidArgumentError)): _ = array_ops.space_to_depth(x_np, block_size) - @test_util.run_deprecated_v1 def testBlockSizeNotDivisibleHeight(self): # The block size divides height but not width. x_np = [[[[1], [2]], [[3], [4]], [[5], [6]]]] block_size = 3 - with self.assertRaises(ValueError): + with self.assertRaises((ValueError, errors.InvalidArgumentError)): _ = array_ops.space_to_depth(x_np, block_size) - @test_util.run_deprecated_v1 def testBlockSizeNotDivisibleBoth(self): # The block size does not divide neither width or height. x_np = [[[[1], [2]], [[3], [4]]]] block_size = 3 - with self.assertRaises(ValueError): + with self.assertRaises((ValueError, errors.InvalidArgumentError)): _ = array_ops.space_to_depth(x_np, block_size) - @test_util.run_deprecated_v1 def testUnknownShape(self): - t = array_ops.space_to_depth( - array_ops.placeholder(dtypes.float32), block_size=4) - self.assertEqual(4, t.get_shape().ndims) + # Testing an unkown shape in graph. + with ops.Graph().as_default(): + t = array_ops.space_to_depth( + array_ops.placeholder(dtypes.float32), block_size=4) + self.assertEqual(4, t.get_shape().ndims) def spaceToDepthUsingTranspose(self, tensor, block_size, data_format): block_size_sq = block_size * block_size @@ -315,8 +309,6 @@ class SpaceToDepthTest(test.TestCase): actual_vals, expected_vals = self.evaluate([actual, expected]) self.assertTrue(np.array_equal(actual_vals, expected_vals)) - # TODO(jingyue): figure out why this test failed in eager mode. - @test_util.run_deprecated_v1 def testAgainstTranspose(self): self.compareToTranspose(3, 2, 3, 1, 2, "NHWC", dtypes.float32, False) self.compareToTranspose(1, 2, 3, 2, 2, "NHWC", dtypes.float32, False) @@ -350,19 +342,15 @@ class SpaceToDepthGradientTest(test.TestCase): return assert 4 == x.ndim - with self.cached_session(use_gpu=True): - tf_x = ops.convert_to_tensor(x) - tf_y = array_ops.space_to_depth(tf_x, block_size, data_format=data_format) - epsilon = 1e-2 - ((x_jacob_t, x_jacob_n)) = gradient_checker.compute_gradient( - tf_x, - x.shape, - tf_y, - tf_y.get_shape().as_list(), - x_init_value=x, - delta=epsilon) - self.assertAllClose(x_jacob_t, x_jacob_n, rtol=1e-2, atol=epsilon) + def func(x): + return array_ops.space_to_depth(x, block_size, data_format=data_format) + + with test_util.use_gpu(): + with self.cached_session(): + theoretical, numerical = gradient_checker_v2.compute_gradient( + func, [ops.convert_to_tensor(x)]) + self.assertAllClose(theoretical, numerical, rtol=1e-2, atol=1e-2) # Tests a gradient for space_to_depth of x which is a four dimensional # tensor of shape [b, h * block_size, w * block_size, d]. @@ -379,7 +367,6 @@ class SpaceToDepthGradientTest(test.TestCase): # Don't use very large numbers as dimensions here as the result is tensor # with cartesian product of the dimensions. - @test_util.run_deprecated_v1 def testSmall(self): block_size = 2 self._compare(1, 2, 3, 5, block_size, "NHWC") From 74408f404a1c22ece5f17e94dc588c8a8337b9bb Mon Sep 17 00:00:00 2001 From: Mingxing Tan Date: Fri, 31 Jul 2020 09:01:49 -0700 Subject: [PATCH 1849/2522] Output meaningful logs for tf profiler. PiperOrigin-RevId: 324217592 Change-Id: Iaea30dde65d0acc51a6915e39a58a18983c6e2ef --- tensorflow/core/profiler/internal/tfprof_node.h | 4 ---- tensorflow/python/profiler/tfprof_logger.py | 11 ++++++----- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/profiler/internal/tfprof_node.h b/tensorflow/core/profiler/internal/tfprof_node.h index 5b2cd5fc309..4ce8f20ec00 100644 --- a/tensorflow/core/profiler/internal/tfprof_node.h +++ b/tensorflow/core/profiler/internal/tfprof_node.h @@ -325,13 +325,11 @@ class TFGraphNode { (*node_.mutable_attrs())[attr.first].MergeFrom(attr.second); if (attr.first == "shape" && attr.second.has_shape()) { if (!shape_.empty()) { - absl::FPrintF(stderr, "Found duplicated shapes!\n"); continue; } shape_ = ShapeProtoToVec(attr.second.shape()); } else if (attr.first == "_output_shapes" && attr.second.has_list()) { if (!output_shapes_.empty()) { - absl::FPrintF(stderr, "Found duplicated output shapes!\n"); continue; } for (int i = 0; i < attr.second.list().shape_size(); ++i) { @@ -667,8 +665,6 @@ class TFGraphNode { } if (complete_shape) { return params; - } else { - absl::FPrintF(stderr, "Incomplete shape.\n"); } } return 0; diff --git a/tensorflow/python/profiler/tfprof_logger.py b/tensorflow/python/profiler/tfprof_logger.py index 8aff8cec085..27a1d360c43 100644 --- a/tensorflow/python/profiler/tfprof_logger.py +++ b/tensorflow/python/profiler/tfprof_logger.py @@ -91,7 +91,7 @@ def _get_logged_ops(graph, run_meta=None, add_trace=True, if run_meta: graph = _fill_missing_graph_shape(graph, run_meta) - op_missing_shape = 0 + missing_shape_ops = [] logged_ops = {} string_to_id = {} string_to_id['none'] = len(string_to_id) @@ -102,7 +102,7 @@ def _get_logged_ops(graph, run_meta=None, add_trace=True, graph, op.node_def, REGISTERED_FLOP_STATS) except ValueError: # Catch Exception When shape is incomplete. Skip it. - op_missing_shape += 1 + missing_shape_ops.append(op.name) stats = None entry = tfprof_log_pb2.OpLogEntry() @@ -136,9 +136,10 @@ def _get_logged_ops(graph, run_meta=None, add_trace=True, else: logged_ops[v.op.name].types.append(TRAINABLE_VARIABLES) - if op_missing_shape > 0 and not run_meta: - sys.stderr.write('%d ops no flops stats due to incomplete shapes.\n' % - op_missing_shape) + if missing_shape_ops and not run_meta: + sys.stderr.write( + '%d ops have no flops stats due to incomplete shapes: [%s] \n' % + len(missing_shape_ops), missing_shape_ops) return logged_ops, string_to_id From a3c993eab7dc891bc4ccf51d0661c5ff1f0c9abf Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Fri, 31 Jul 2020 09:23:00 -0700 Subject: [PATCH 1850/2522] PSv2: Move TF2 parameter server training main library code into OSS. PiperOrigin-RevId: 324221520 Change-Id: I19922552dc9e804067f2aa077b5a08af68020eb5 --- tensorflow/python/distribute/BUILD | 18 - tensorflow/python/distribute/client/BUILD | 102 -- tensorflow/python/distribute/client/client.py | 1221 ----------------- .../python/distribute/client/client_test.py | 388 ------ .../python/distribute/client/metric_utils.py | 79 -- .../distribute/client/metric_utils_test.py | 69 - .../client/parameter_server_client.py | 55 - .../client/parameter_server_client_test.py | 405 ------ .../parameter_server_strategy_v2.py | 202 --- tensorflow/tools/pip_package/BUILD | 3 - 10 files changed, 2542 deletions(-) delete mode 100644 tensorflow/python/distribute/client/BUILD delete mode 100644 tensorflow/python/distribute/client/client.py delete mode 100644 tensorflow/python/distribute/client/client_test.py delete mode 100644 tensorflow/python/distribute/client/metric_utils.py delete mode 100644 tensorflow/python/distribute/client/metric_utils_test.py delete mode 100644 tensorflow/python/distribute/client/parameter_server_client.py delete mode 100644 tensorflow/python/distribute/client/parameter_server_client_test.py delete mode 100644 tensorflow/python/distribute/parameter_server_strategy_v2.py diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index c1b0ee6ce23..f7ec692a01f 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -1760,21 +1760,3 @@ distribute_py_test( "@absl_py//absl/testing:parameterized", ], ) - -py_library( - name = "parameter_server_strategy_v2", - srcs = ["parameter_server_strategy_v2.py"], - srcs_version = "PY3", - deps = [ - ":parameter_server_strategy", - "//tensorflow/python:constant_op", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:util", - "//tensorflow/python:variables", - "//tensorflow/python/distribute:distribute_lib", - "//tensorflow/python/distribute:input_lib", - "//tensorflow/python/distribute:sharded_variable", - "//tensorflow/python/distribute:values", - ], -) diff --git a/tensorflow/python/distribute/client/BUILD b/tensorflow/python/distribute/client/BUILD deleted file mode 100644 index 907d8e95ee5..00000000000 --- a/tensorflow/python/distribute/client/BUILD +++ /dev/null @@ -1,102 +0,0 @@ -package( - default_visibility = ["//tensorflow:internal"], - licenses = ["notice"], # Apache 2.0 -) - -exports_files(["LICENSE"]) - -py_library( - name = "parameter_server_client", - srcs = ["parameter_server_client.py"], - srcs_version = "PY3", - deps = [ - ":client", - "//tensorflow/python/distribute:parameter_server_strategy_v2", - ], -) - -py_library( - name = "client", - srcs = ["client.py"], - srcs_version = "PY3", - deps = [ - ":metric_utils", - "//tensorflow/python:errors", - "//tensorflow/python:framework_ops", - "//tensorflow/python:func_graph", - "//tensorflow/python:resource_variable_ops", - "//tensorflow/python:training_server_lib", - "//tensorflow/python:util", - "//tensorflow/python/distribute:input_lib", - "//tensorflow/python/distribute:parameter_server_strategy_v2", - "//tensorflow/python/distribute:values", - "//tensorflow/python/eager:context", - "//tensorflow/python/eager:def_function", - "//tensorflow/python/eager:executor", - "//tensorflow/python/eager:function", - "//tensorflow/python/eager:remote", - "@absl_py//absl/logging", - "@six_archive//:six", - ], -) - -py_test( - name = "client_test", - size = "small", - srcs = ["client_test.py"], - python_version = "PY3", - shard_count = 12, - deps = [ - ":client", - "//tensorflow/python:client_testlib", - "//tensorflow/python:training_lib", - "//tensorflow/python:util", - "//tensorflow/python/eager:def_function", - "@absl_py//absl/logging", - ], -) - -py_test( - name = "parameter_server_client_test", - srcs = ["parameter_server_client_test.py"], - python_version = "PY3", - shard_count = 14, - tags = ["no_oss"], # TODO(b/162119374) - deps = [ - ":parameter_server_client", - "//tensorflow/python:constant_op", - "//tensorflow/python:dtypes", - "//tensorflow/python:init_ops_v2", - "//tensorflow/python:training_server_lib", - "//tensorflow/python:variables", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/distribute:multi_worker_test_base", - "//tensorflow/python/distribute:sharded_variable", - "//tensorflow/python/distribute/cluster_resolver:cluster_resolver_lib", - "//tensorflow/python/eager:def_function", - "//tensorflow/python/eager:test", - ], -) - -py_library( - name = "metric_utils", - srcs = ["metric_utils.py"], - srcs_version = "PY3", - deps = [ - "//tensorflow/python/eager:monitoring", - ], -) - -py_test( - name = "metric_utils_test", - srcs = ["metric_utils_test.py"], - python_version = "PY3", - deps = [ - ":client", - ":metric_utils", - "//tensorflow/python:training_server_lib", - "//tensorflow/python/distribute:multi_worker_test_base", - "//tensorflow/python/distribute/cluster_resolver:cluster_resolver_lib", - "//tensorflow/python/eager:test", - ], -) diff --git a/tensorflow/python/distribute/client/client.py b/tensorflow/python/distribute/client/client.py deleted file mode 100644 index 533d5f19042..00000000000 --- a/tensorflow/python/distribute/client/client.py +++ /dev/null @@ -1,1221 +0,0 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Module for `Client` and relevant cluster-worker related library. - -This is currently under development and the API is subject to change. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import contextlib -import enum -import functools -import os -import sys -import threading -import weakref -from absl import logging -from six.moves import queue -from tensorflow.python.distribute import distribute_lib -from tensorflow.python.distribute import input_lib -from tensorflow.python.distribute import parameter_server_strategy_v2 -from tensorflow.python.distribute.client import metric_utils -from tensorflow.python.eager import context -from tensorflow.python.eager import def_function -from tensorflow.python.eager import executor -from tensorflow.python.eager import function as tf_function -from tensorflow.python.eager import remote -from tensorflow.python.framework import errors -from tensorflow.python.framework import func_graph -from tensorflow.python.framework import ops -from tensorflow.python.ops import resource_variable_ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.training import server_lib -from tensorflow.python.util import nest - -# Maximum time for failed worker to come back is 1 hour -_WORKER_MAXIMUM_RECOVERY_SEC = 3600 - -# Maximum size for queued closures, "infinite" if set to 0. -# When the maximum queue size is reached, further schedule calls will become -# blocking until some previously queued closures are executed on workers. -# Note that using an "infinite" queue size can take a non-trivial portion of -# memory, and even lead to client OOM. Modify the size to a smaller value for -# client with constrained memory resource (only recommended for advanced users). -# Also used in unit tests to ensure the correctness when the queue is full. -_CLOSURE_QUEUE_MAX_SIZE = 256 * 1024 - -# RPC error message from PS -_RPC_ERROR_FROM_PS = "GRPC error information from remote target /job:ps" - -# InvalidArgumentError (unknown device) will not have "GRPC error..." string. -_JOB_WORKER_STRING_IDENTIFIER = "/job:worker" - - -class _RemoteValueStatus(enum.Enum): - """The status of a `RemoteValue` object. - - A `RemoteValue` object can have three states: - 1) not ready: no value, no non-retryable error and not aborted; - 2) aborted: i.e. the execution of function was aborted because of task - failure, but can be retried; - 3) ready: i.e. has value or has non-tryable error; - - The initial state of a `RemoteValue` is "not ready". When its corresponding - closure has - been executed at least once, it will become aborted or ready. The state - transitions are: - 1) not ready -> 2) aborted: - when the corresponding closure is aborted due to worker failure, and the - worker failure is not immediately handled. - 1) not ready -> 3) ready: - when the corresponding closure has been executed successfully. - 2) aborted -> 3) ready: - when the `RemoteValue` is rebuilt by rerunning the corresponding closure - and the closure has been executed successfully. - 3) ready -> 2) aborted: - when the corresponding closure had been executed successfully but later - the corresponding remote worker failed. This is currently only implemented - for resource `RemoteValue` like iterators. - """ - NOT_READY = "NOT_READY" - ABORTED = "ABORTED" - READY = "READY" - - -class RemoteValue(object): - """An asynchronously available value of a remotely executed function. - - `RemoteValue` class is used as the return value of `Client.schedule()` where - the underlying concrete value comes at a later time once the function has been - remotely executed. `RemoteValue` can be used as an input to a subsequent - function scheduled with `Client.schedule()`. - - Note: this class is not thread-safe. - """ - - def __init__(self, closure, type_spec): - self._closure = closure - # The type spec for this `RemoteValue` which is used to trace functions that - # take this `RemoteValue` as input. - self._type_spec = func_graph.convert_structure_to_signature(type_spec) - self._value = None - self._error = None - self._status_available_event = threading.Event() - self._status = _RemoteValueStatus.NOT_READY - - def _set_aborted(self): - self._status = _RemoteValueStatus.ABORTED - self._value = None - self._error = None - - # Wake up any waiting thread and clear the event. - self._status_available_event.set() - - def _rebuild_on(self, worker): - self._status_available_event.clear() - # TODO(yuefengz): we may need to rebuild its inputs as well. - self._closure.execute_on(worker) - - def _set_value(self, value): - self._status = _RemoteValueStatus.READY - self._value = value - self._error = None - self._status_available_event.set() - - def _set_error(self, exception): - self._status = _RemoteValueStatus.READY - self._value = None - self._error = exception - self._status_available_event.set() - - def _get_value(self): - self._status_available_event.wait() - return self._value - - def _get_error(self): - self._status_available_event.wait() - return self._error - - def _set_type_spec(self, type_spec): - self._type_spec = func_graph.convert_structure_to_signature(type_spec) - - def fetch(self): - """Wait for the result of RemoteValue to be ready and return the result. - - Returns: - The remote value, as a numpy data type (if scalar) or ndarray. - - Raises: - FunctionRetryableError: If the function that produces this `RemoteValue` - is aborted or cancelled due to failure, and the user should handle and - reschedule. - """ - self._status_available_event.wait() - if self._status is _RemoteValueStatus.ABORTED: - raise FunctionRetryableError( - "The corresponding function is aborted. Please reschedule the " - "function.") - if self._error is not None: - raise self._error # pylint: disable=raising-bad-type - else: - if isinstance(self._value, - (ops.Tensor, resource_variable_ops.BaseResourceVariable)): - return self._value.numpy() - else: - return self._value - - -class InputError(Exception): - - def __init__(self, original_exception): - message = ("Input has an error, the original exception is %r, " - "error message is %s." % - (original_exception, str(original_exception))) - super().__init__(message) - - -class FunctionRetryableError(Exception): - """An error that represents the closure was aborted and should be retried.""" - pass - - -def _maybe_get_error_and_rebuild_remote_values(worker, structure): - """Attempts to return errors from `RemoteValue`s. Rebuilds them if needed.""" - errors_in_structure = [] - - def _get_error(val): - if isinstance(val, RemoteValue): - if val._status is _RemoteValueStatus.ABORTED: # pylint: disable=protected-access - with worker.failure_handler.wait_on_failure( - on_recovery_fn=functools.partial(val._rebuild_on, worker), # pylint: disable=protected-access - worker_device_name=worker.device_name): - val._rebuild_on(worker) # pylint: disable=protected-access - error = val._get_error() # pylint: disable=protected-access - if error: - errors_in_structure.append(error) - - nest.map_structure(_get_error, structure) - if errors_in_structure: - return errors_in_structure[0] - else: - return None - - -def _maybe_get_remote_value(val): - """Gets the value of `val` if it is a `RemoteValue`.""" - if isinstance(val, RemoteValue): - error = val._get_error() # pylint: disable=protected-access - if error: - raise AssertionError( - "RemoteValue doesn't have a value because it has errors.") - else: - return val._get_value() # pylint: disable=protected-access - else: - return val - - -def _maybe_as_type_spec(val): - if isinstance(val, RemoteValue): - if val._type_spec is None: # pylint: disable=protected-access - raise ValueError("Output of a scheduled function that is not " - "tf.function cannot be the input of another function.") - return val._type_spec # pylint: disable=protected-access - else: - return val - - -class PerWorkerValues(object): - """Holds a list of per worker values.""" - - def __init__(self, values): - self._values = tuple(values) - - -class Closure(object): - """Hold a function to be scheduled and its arguments.""" - - def __init__(self, function, args=None, kwargs=None): - if not callable(function): - raise ValueError("Function passed to `Client.schedule` must be a " - "callable object.") - self._args = args or () - self._kwargs = kwargs or {} - self._function = function - - if isinstance(function, def_function.Function): - replica_args = self._select_worker_slice(0, self._args) - replica_kwargs = self._select_worker_slice(0, self._kwargs) - - # Note: no need to handle function registration failure since this kind of - # failure will not raise exceptions as designed in the runtime. The client - # has to rely on subsequent operations that raise to catch function - # registration failure. - - # Record the function tracing overhead. Note that we pass in the tracing - # count of the def_function.Function as a state tracker, so that metrics - # will only record the time for actual function tracing (i.e., excluding - # function cache lookups). - with metric_utils.monitored_timer( - "function_tracing", state_tracker=function._get_tracing_count): # pylint: disable=protected-access - concrete_function = function.get_concrete_function( - *nest.map_structure(_maybe_as_type_spec, replica_args), - **nest.map_structure(_maybe_as_type_spec, replica_kwargs)) - self._output_remote_values = nest.map_structure( - lambda x: RemoteValue(self, x), concrete_function.structured_outputs) - elif isinstance(function, tf_function.ConcreteFunction): - self._output_remote_values = nest.map_structure( - lambda x: RemoteValue(self, x), function.structured_outputs) - else: - # Regular python functions. - # TODO(yuefengz): maybe we should trace python functions if their inputs - # are Python primitives, tensors and composite tensors. - self._output_remote_values = RemoteValue(self, None) - - def _select_worker_slice(self, worker_id, structured): - """Selects the worker slice of each of the items in `structured`.""" - - def _get(x): - return x._values[worker_id] if isinstance(x, PerWorkerValues) else x # pylint: disable=protected-access - - return nest.map_structure(_get, structured) - - def _fetch_output_remote_values(self): - """Temporary method used to sync the scheduler.""" - # It will do nothing if there is no return value. - nest.map_structure(lambda x: x.fetch(), self._output_remote_values) # pylint: disable=protected-access - - def _set_output_remote_values_aborted(self): - """Set output remote_value aborted.""" - # It will do nothing if there is no return value. - nest.map_structure(lambda x: x._set_aborted(), self._output_remote_values) # pylint: disable=protected-access - - def _set_output_remote_values_cancelled(self): - nest.map_structure( - lambda x: x._set_error( # pylint: disable=protected-access,g-long-lambda - FunctionRetryableError("The corresponding function is " - "cancelled. Please reschedule the " - "function.")), - self._output_remote_values) # pylint: disable=protected-access - - def execute_on(self, worker): - """Executes the closure on the given worker. - - Args: - worker: a `Worker` object. - """ - replica_args = self._select_worker_slice(worker.worker_index, self._args) - replica_kwargs = self._select_worker_slice(worker.worker_index, - self._kwargs) - - e = ( - _maybe_get_error_and_rebuild_remote_values(worker, replica_args) or - _maybe_get_error_and_rebuild_remote_values(worker, replica_kwargs)) - if e: - if not isinstance(e, InputError): - e = InputError(e) - for remote_value in nest.flatten(self._output_remote_values): - remote_value._set_error(e) # pylint: disable=protected-access - return - - with ops.device(worker.device_name): - with context.executor_scope(worker.executor): - with metric_utils.monitored_timer("closure_execution"): - output_value = self._function( - *nest.map_structure(_maybe_get_remote_value, replica_args), - **nest.map_structure(_maybe_get_remote_value, replica_kwargs)) - for remote_value, value in zip( - nest.flatten(self._output_remote_values), nest.flatten(output_value)): - remote_value._set_value(value) # pylint: disable=protected-access - - -class _CoordinatedClosureQueue(object): - """Manage a queue of closures, inflight count and errors from execution. - - This class is thread-safe. - """ - - def __init__(self): - - # `self._inflight_closure_count` only tracks the number of inflight closures - # that are "in generation". Once an error occurs, error generation is - # incremented and all subsequent arriving closures (from inflight) are - # considered "out of generation". - self._inflight_closure_count = 0 - - self._queue_lock = threading.Lock() - # Condition indicating that all pending closures (either queued or inflight) - # have been processed, failed, or cancelled. - self._stop_waiting_condition = threading.Condition(self._queue_lock) - # Condition indicating that an item becomes available in queue (not empty). - self._closures_queued_condition = threading.Condition(self._queue_lock) - # Condition indicating that a queue slot becomes available (not full). - # Note that even with "infinite" queue size, there is still a "practical" - # size limit for the queue depending on host memory capacity, and thus the - # queue will eventually become full with a lot of enqueued closures. - self._queue_free_slot_condition = threading.Condition(self._queue_lock) - - if _CLOSURE_QUEUE_MAX_SIZE <= 0: - logging.warning( - "In ParameterServerClient, creating an infinite closure queue can " - "consume a significant amount of memory and even lead to OOM.") - self._queue = queue.Queue(maxsize=_CLOSURE_QUEUE_MAX_SIZE) - self._error = None - - # Error generation is a counter that helps us track whether a closure - # should be cancelled when it is being put back to `self._queue`. It works - # in the following way: - # 1) Error generation starts off at 0. - # 2) When a worker thread calls `get()`, the closure's error generation - # is copied from this queue's error generation. - # 3) If any worker thread experiences an error that's categorized as a - # non-retryable error, the queue's error will be set, error generation - # increments by 1, and the queue is cleared (with the closures marked - # with cancelled error), so other worker threads stop getting closures - # from the queue. Worker preemption is categorized as a retryable error. - # 4) At this point, if `put()` or `wait()` is called (usually by the main - # thread via `schedule` and `join`), the error is raised through that - # call. - # 5) The closures that are inflight, i.e. that are being executed remotely, - # will not be aware of such error event. If the worker that's executing - # the closure happens to be interrupted, the closure should not be put - # back to the queue, and be cancelled with error instead. Checking the - # generation id of the closure and queue is how the worker thread tells - # whether the closure should be put back. Likewise for `mark_finished` - # and `mark_failed`: if the arriving closure is considered out of - # generation in those two methods, it is simply discarded (the inflight - # closure count still decrements). - self._error_generation = 0 - - # The following is a lock to make sure when `wait` is called and before it - # returns no `put` can be executed during this period. It is because `wait` - # won't know what to do with newly put closures. This lock adds an cutoff - # for `wait` so that closures put into the queue while waiting would not be - # taken responsible by this `wait`. - # - # We cannot reuse the `self._queue_lock` since when `wait` waits for a - # condition, the `self._queue_lock` will be released. - # - # We don't use a reader/writer's lock on purpose to reduce the complexity - # of the code. - self._put_wait_lock = threading.Lock() - - def _cancel_closures_in_queue(self): - """Clears the queue and sets remaining closures cancelled error. - - This method expects self._queue_lock to be held prior to entry. - """ - while True: - try: - closure = self._queue.get(block=False) - self._queue_free_slot_condition.notify() - closure._set_output_remote_values_cancelled() # pylint: disable=protected-access - except queue.Empty: - break - - def _raise_if_error(self): - """Raises the error if one exists. - - If an error exists, cancel the closures in queue, raises it, and clear - the error. - - This method expects self._queue_lock to be held prior to entry. - """ - if self._error: - try: - self._cancel_closures_in_queue() - raise self._error # pylint: disable=raising-bad-type - finally: - self._error = None - - def put(self, closure): - """Put a closure into the queue for later execution. - - If `mark_failed` was called before `put`, the error from the first - invocation of `mark_failed` will be raised. - - Args: - closure: The `Closure` to put into the queue. - """ - with self._put_wait_lock, self._queue_lock: - self._queue_free_slot_condition.wait_for(lambda: not self._queue.full()) - self._queue.put(closure, block=False) - self._raise_if_error() - self._closures_queued_condition.notify() - - def get(self, timeout=None): - """Return a closure from the queue to be executed.""" - with self._queue_lock: - while self._queue.empty(): - if not self._closures_queued_condition.wait(timeout=timeout): - return None - closure = self._queue.get(block=False) - self._queue_free_slot_condition.notify() - closure._error_generation = self._error_generation # pylint: disable=protected-access - self._inflight_closure_count += 1 - return closure - - def mark_finished(self, closure): - """Let the queue know that a closure has been successfully executed.""" - with self._queue_lock: - if self._inflight_closure_count < 1: - raise AssertionError("There is no inflight closures to mark_finished.") - self._inflight_closure_count -= 1 - if self._queue.empty() and self._inflight_closure_count == 0: - self._stop_waiting_condition.notifyAll() - - def put_back(self, closure): - """Put the closure back into the queue as it was not properly executed.""" - with self._queue_lock: - if self._inflight_closure_count < 1: - raise AssertionError("There is no inflight closures to put_back.") - self._inflight_closure_count -= 1 - if closure._error_generation < self._error_generation: # pylint: disable=protected-access - # If the closure to put back is out of generation, cancel the closure - # and ignore it. - logging.info("Function %r should no longer be dispatched; marking " - "as cancelled.") - closure._set_output_remote_values_cancelled() # pylint: disable=protected-access - return - self._queue_free_slot_condition.wait_for(lambda: not self._queue.full()) - self._queue.put(closure, block=False) - self._closures_queued_condition.notify() - - def wait(self, timeout=None): - """Wait for all closures to be finished before returning. - - If `mark_failed` was called before or during `wait`, the error from the - first invocation of `mark_failed` will be raised. - - Args: - timeout: A float specifying a timeout for the wait in seconds. - - Returns: - True unless the given timeout expired, in which case it returns False. - """ - with self._put_wait_lock, self._queue_lock: - while (not self._error and - (not self._queue.empty() or self._inflight_closure_count > 0)): - if not self._stop_waiting_condition.wait(timeout=timeout): - return False - self._raise_if_error() - return True - - def mark_failed(self, e, closure): - """Sets error and unblocks any wait() call.""" - with self._queue_lock: - # TODO(yuefengz): maybe record all failure and give users more - # information? - if self._inflight_closure_count < 1: - raise AssertionError("There is no inflight closures to mark_failed.") - self._inflight_closure_count -= 1 - if closure._error_generation < self._error_generation: # pylint: disable=protected-access - # If the closure to mark fail is out of generation, simply ignore it - # (with the actual error associated with the closure preserved). - return - assert self._error is None - self._error = e - self._error_generation += 1 - self._cancel_closures_in_queue() - self._stop_waiting_condition.notifyAll() - - def done(self): - """Returns true if the queue is empty and there is no inflight closure. - - If `mark_failed` was called before `done`, the error from the first - invocation of `mark_failed` will be raised. - """ - with self._queue_lock: - self._raise_if_error() - return self._queue.empty() and self._inflight_closure_count == 0 - - -class WorkerPreemptionHandler(object): - """Handles worker preemptions.""" - - def __init__(self, server_def): - self._server_def = server_def - self._cluster_update_lock = threading.Lock() - self._cluster_due_for_update = threading.Event() - self._worker_up_cond = threading.Condition(self._cluster_update_lock) - threading.Thread(target=self._preemption_handler, - name="WorkerPreemptionHandler", - daemon=True).start() - - def _validate_preemption_failure(self, e): - """Validates that the given exception represents worker preemption.""" - if _is_worker_failure(e): - return - raise e - - @contextlib.contextmanager - def wait_on_failure(self, - on_failure_fn=None, - on_recovery_fn=None, - worker_device_name="(unknown)"): - """Catches worker preemption error and wait until failed workers are back. - - Args: - on_failure_fn: an optional function to run if preemption happens. - on_recovery_fn: an optional function to run when a worker is recovered - from preemption. - worker_device_name: the device name of the worker instance that is passing - through the failure. - - Yields: - None. - """ - try: - yield - except errors.OpError as e: - self._validate_preemption_failure(e) - logging.error("Worker %s failed with error: %s", worker_device_name, e) - if on_failure_fn: - on_failure_fn() - - with self._cluster_update_lock: - self._cluster_due_for_update.set() - self._worker_up_cond.wait(_WORKER_MAXIMUM_RECOVERY_SEC) - logging.info("Worker %s has been recovered.", worker_device_name) - - if on_recovery_fn: - with self.wait_on_failure( - on_recovery_fn=on_recovery_fn, - worker_device_name=worker_device_name): - on_recovery_fn() - - def _preemption_handler(self): - """A loop that handles preemption. - - This loop waits for signal of worker preemption and upon worker preemption, - it waits until all workers are back and updates the cluster about the - restarted workers. - """ - while True: - self._cluster_due_for_update.wait() - with self._cluster_update_lock: - try: - # TODO(haoyuzhang): support partial cluster recovery - logging.info("Cluster now being recovered.") - context.context().update_server_def(self._server_def) - - # Cluster updated successfully, clear the update signal, and notify - # all workers that they are recovered from failure. - logging.info("Cluster successfully recovered.") - self._worker_up_cond.notify_all() - self._cluster_due_for_update.clear() - except Exception as e: # pylint: disable=broad-except - self._validate_preemption_failure(e) - # NOTE: Since the first RPC (GetStatus) of update_server_def is - # currently blocking by default, error should only happen if: - # (1) More workers failed while waiting for the previous workers to - # come back; - # (2) Worker failed when exchanging subsequent RPCs after the first - # RPC returns. - # Consider adding backoff retry logic if we see the error logged - # too frequently. - logging.error("Cluster update failed with error: %s. Retrying...", e) - - -class Worker(object): - """A worker in a cluster. - - Attributes: - worker_index: The index of the worker in the cluster. - device_name: The device string of the worker, e.g. "/job:worker/task:1". - executor: The worker's executor for remote function execution. - failure_handler: The failure handler used to handler worker preemption - failure. - """ - - def __init__(self, worker_index, device_name, cluster): - self.worker_index = worker_index - self.device_name = device_name - self.executor = executor.new_executor(enable_async=False) - self.failure_handler = cluster.failure_handler - self._cluster = cluster - self._resource_remote_value_refs = [] - - # Worker threads need to start after `Worker`'s initialization. - threading.Thread(target=self._process_queue, - name="WorkerClosureProcessingLoop-%d" % self.worker_index, - daemon=True).start() - - def _set_resources_aborted(self): - # TODO(yuefengz): maybe we can query whether a tensor is valid or not - # instead of marking a tensor aborted? - for weakref_resource in self._resource_remote_value_refs: - resource = weakref_resource() - if resource: - resource._set_aborted() # pylint: disable=protected-access - - def _set_dead(self): - raise NotImplementedError("_set_dead is not implemented.") - - def _process_closure(self, closure): - """Runs a closure with preemption handling.""" - try: - with self._cluster.failure_handler.wait_on_failure( - on_failure_fn=lambda: self._cluster._closure_queue.put_back(closure), # pylint: disable=protected-access - on_recovery_fn=self._set_resources_aborted, - worker_device_name=self.device_name): - closure.execute_on(self) - # TODO(yuefengz): we don't have to materialize results every step. - with metric_utils.monitored_timer("remote_value_fetch"): - closure._fetch_output_remote_values() # pylint: disable=protected-access - self._cluster._closure_queue.mark_finished(closure) # pylint: disable=protected-access - except Exception as e: # pylint: disable=broad-except - logging.error( - "/job:worker/task:%d encountered the following error when processing " - "closure: %r:%s", self.worker_index, e, e) - nest.map_structure( - lambda x: x._set_error(e), # pylint: disable=protected-access - closure._output_remote_values) # pylint: disable=protected-access - self._cluster._closure_queue.mark_failed(e, closure) # pylint: disable=protected-access - - def _process_queue(self): - while True: - closure = self._cluster._closure_queue.get() # pylint: disable=protected-access - self._process_closure(closure) - - def _create_resource(self, function, args=None, kwargs=None): - """Synchronously creates a per-worker resource represented by a `RemoteValue`. - - Args: - function: the resource function to be run remotely. It should be a - `tf.function`, a concrete function or a Python function. - args: positional arguments to be passed to the function. - kwargs: keyword arguments to be passed to the function. - - Returns: - one or several RemoteValue objects depending on the function return - values. - """ - # Some notes about the concurrency: currently all the activities related to - # the same worker such as creating resources, setting resources' aborted - # status, and executing closures happen on the same thread. This allows us - # to have simpler logic of concurrency. - closure = Closure(function=function, args=args, kwargs=kwargs) - resource_remote_value = closure._output_remote_values # pylint: disable=protected-access - self._register_resource(resource_remote_value) - - # The following is a short-term solution to lazily create resources in - # parallel. - # TODO(b/160343165): we should create resources eagerly, i.e. schedule the - # resource creation function as soon as users call this method. - resource_remote_value._set_aborted() # pylint: disable=protected-access - return resource_remote_value - - def _register_resource(self, resource_remote_value): - if not isinstance(resource_remote_value, RemoteValue): - raise ValueError( - "Resource being registered is not of type `RemoteValue`.") - self._resource_remote_value_refs.append(weakref.ref(resource_remote_value)) - - -class Cluster(object): - """A cluster with workers. - - We assume all function errors are fatal and based on this assumption our - error reporting logic is: - 1) Both `schedule` and `join` can raise a non-retryable error which is the - first error seen by the client from any previously scheduled functions. - 2) When an error is raised, there is no guarantee on how many previously - scheduled functions have been executed; functions that have not been executed - will be thrown away and marked as cancelled. - 3) After an error is raised, the internal state of error will be cleared. - I.e. functions can continue to be scheduled and subsequent calls of `schedule` - or `join` will not raise the same error again. - - Attributes: - failure_handler: The failure handler used to handler worker preemption - failure. - workers: a list of `Worker` objects in the cluster. - """ - - def __init__(self, cluster_resolver, client_name="chief"): - """Initializes the cluster instance and connect to the remote cluster.""" - if client_name in ["worker", "ps"]: - raise ValueError("Client name should not be 'worker' or 'ps'.") - cluster_spec = cluster_resolver.cluster_spec() - - self._num_workers = len(cluster_spec.as_dict().get("worker", ())) - self._num_ps = len(cluster_spec.as_dict().get("ps", ())) - device_filters = server_lib.ClusterDeviceFilters() - # For any worker, only the devices on PS and chief nodes are visible - for i in range(self._num_workers): - device_filters.set_device_filters( - "worker", i, ["/job:ps", "/job:%s" % client_name]) - # Similarly for any ps, only the devices on workers and chief are visible - for i in range(self._num_ps): - device_filters.set_device_filters( - "ps", i, ["/job:worker", "/job:%s" % client_name]) - - context.context().mirroring_policy = context.MIRRORING_ALL - # Allow at most one outstanding RPC for each worker at a certain time. This - # is to simplify worker failure handling in the runtime - os.environ["TF_ENABLE_EAGER_CLIENT_STREAMING_ENQUEUE"] = "False" - remote.connect_to_cluster(cluster_spec, - job_name=client_name, - protocol=cluster_resolver.rpc_layer, - cluster_device_filters=device_filters) - - self._closure_queue = _CoordinatedClosureQueue() - self.failure_handler = WorkerPreemptionHandler(context.get_server_def()) - worker_device_strings = [ - "/job:worker/replica:0/task:%d" % i for i in range(self._num_workers) - ] - self.workers = [ - Worker(i, w, self) for i, w in enumerate(worker_device_strings) - ] - - def schedule(self, function, args, kwargs): - """Schedules `function` to be dispatched to a worker for execution. - - Args: - function: The function to be dispatched to a worker for execution - asynchronously. - args: Positional arguments for `fn`. - kwargs: Keyword arguments for `fn`. - - Returns: - A structure of `RemoteValue` object. - """ - closure = Closure(function=function, args=args, kwargs=kwargs) - self._closure_queue.put(closure) - return closure._output_remote_values # pylint: disable=protected-access - - def join(self): - """Blocks until all scheduled functions are executed.""" - self._closure_queue.wait() - - def done(self): - """Returns true if all scheduled functions are executed.""" - return self._closure_queue.done() - - -class ParameterServerFailureError(Exception): - """An error representing at least one parameter server is interrupted.""" - pass - - -class Client(object): - """An object to schedule and orchestrate remote function execution. - - A `Client` object represents a program used to create dataset, schedule - functions to be executed, and fetch the results of the functions. Operations - that will involve other tasks in the cluster, such as variable creation, - reading variables etc., should be performed within `client.context()`. - - Currently, `Client` is not supported to be used in a standalone manner. - It should be used in conjunction with `ParameterServerStrategyV2`. The - recommended way of using the combination is through a `ParameterServerClient` - object. Please see `ParameterServerClient` for more information. - - This is currently under development, and the API as well as implementation - is subject to changes. - """ - - def __init__(self, strategy): - """Initialization of a `Client` instance. - - This connects the client to remote workers and parameter servers, through - a `tf.config.experimental_connect_to_cluster` call. - - Args: - strategy: a `tf.distribute.Strategy` object. Currently, only - `ParameterServerStrategyV2` is supported. - - Raises: - ValueError: if the strategy being used is not supported. - """ - if not isinstance(strategy, - parameter_server_strategy_v2.ParameterServerStrategyV2): - raise ValueError("Only `ParameterServerStrategyV2` is supported in " - "`Client` currently.") - self._strategy = strategy - self.cluster = Cluster(strategy._cluster_resolver) - - @contextlib.contextmanager - def context(self): - """Context manager under which client distribution is in effect. - - All distribution related methods using this `Client`, including those that - create and update variables, should be used within this context. This - context manager handles cluster fault tolerance in remote function - execution. - - The context manager calls `join` automatically when exiting successfully. - - Entering `Client.context` also enters the underlying strategy's scope, and - this means that `tf.distribute.get_strategy()` will return the strategy - object being used. - - Yields: - Nothing. - """ - with self._strategy.scope(), self._handle_parameter_server_failure(): - yield - self.join() - - @contextlib.contextmanager - def experimental_variable_partitioning_scope(self): - with self._strategy.experimental_variable_partitioning_scope(): - yield - - (experimental_variable_partitioning_scope.__doc__) = ( - parameter_server_strategy_v2.ParameterServerStrategyV2 - .experimental_variable_partitioning_scope.__doc__) - - def schedule(self, fn, args=None, kwargs=None): - """Schedules `fn` to be dispatched to a worker for execution asynchronously. - - When calling `schedule` with a function `fn`, `fn` will be executed on a - remote worker at some later time. The process is asynchronous, meaning - `schedule` returns immediately, possibly without having the result ready - yet. `schedule` returns a structure of `RemoteValue` object, which wraps the - output of the function. Call `fetch()` on `RemoteValue` to wait for the - function execution to finish and retrieve its output from the remote worker. - - `schedule` guarantees that `fn` will be executed on a worker at least once; - it could be more than once if a worker fails and restarts in the middle of - function scheduling. Note that since worker can fail at any point when - executing the function, it is possible that the function is partially - executed, but `Client` guarantees that in those events, the function will - eventually be fully executed, possibly on a different worker that is - available. - - If any previously scheduled function raises an error, `schedule` will fail - by raising any one of those errors, and clear the errors collected so far. - There are two implications when this happens: 1) user should call `schedule` - with `fn` again to re-schedule, and 2) some of the previously scheduled - functions may no longer execute. User can call `fetch` on the returned - `RemoteValue` to inspect if they have executed, failed, or cancelled, and - reschedule the corresponding function if needed. - - When `schedule` raises, it is possible that there are still functions being - executed on workers, at the time `schedule` raises. When this happens, users - can call `join` again to wait for all pending async function execution to - finish, and bring the cluster into a consistent state. - - At this time, there is no support of worker assignment for function - execution, or priority of the workers. - - `args` and `kwargs` are the arguments passed into `fn`, when `fn` is - executed on a worker. They can be `PerWorkerValues`, which is a collection - of values, each of which represents a component specific to a worker; in - this case, the argument will be substituted with the corresponding component - on the target worker. Arguments that are not `PerWorkerValues` will be - passed into `fn` as-is. - - Args: - fn: A `tf.function`; the function to be dispatched to a worker for - execution asynchronously. - args: Positional arguments for `fn`. - kwargs: Keyword arguments for `fn`. - - Returns: - A structure of `RemoteValue` object. - - Raises: - Exception: one of the exceptions caught by the client by any previously - scheduled function since the last time an error was thrown or since - the beginning of the program. - """ - # TODO(b/160702436): Invoke `strategy.run` for user's function so it enters - # a `ReplicaContext` in a logically correct way. - with distribute_lib.ReplicaContext( - self._strategy, replica_id_in_sync_group=0): - with self._translate_parameter_server_failure(): - return self.cluster.schedule(fn, args=args, kwargs=kwargs) - - def join(self): - """Blocks until all the scheduled functions have finished execution. - - If any previously scheduled function raises an error, `join` will fail by - raising any one of those errors, and clear the errors collected so far. If - this happens, some of the previously scheduled functions may no longer - execute. Users can call `fetch` on the returned `RemoteValue` to inspect if - they have executed, failed, or cancelled. If some that have been cancelled - need to be rescheduled, users should call `schedule` with the function - again. - - Note: `join` raises an exception as soon as the client detects one, and this - means it is possible that there are still functions being executed on - workers, at the time `join` raises. When this happens, users can call `join` - again to wait for all pending async function execution to finish, and bring - the cluster into a consistent state. - - Raises: - Exception: one of the exceptions caught by the client by any previously - scheduled function since the last time an error was thrown or since - the beginning of the program. - """ - # TODO(b/159486639): Update the docs once we can cancel the functions being - # executed on workers, that when `join` returns, the system is stabilized. - with self._translate_parameter_server_failure(): - self.cluster.join() - - def done(self): - """Returns whether all the scheduled functions have finished execution. - - If any previously scheduled function raises an error, `done` will fail by - raising any one of those errors. - """ - return self.cluster.done() - - def create_per_worker_dataset(self, dataset_fn): - """Create dataset on workers by calling `dataset_fn` on worker devices. - - This creates the given dataset generated by dataset_fn on the workers - and returns an object that represents the collection of those individual - datasets. Calling `iter` on such collection of dataset returns a - `PerWorkerValues`, which is a collection of iterators, where the iterators - have been placed on respective workers. - - Calling `next` on this `PerWorkerValues` of iterators is currently - unsupported; it is meant to be passed as an argument into `Client.schedule`. - When the scheduled function is picked up and being executed by a worker, the - function will receive the individual iterator that corresponds to the - worker, and now `next` can be called on iterator to get the next (batch or - example) of data. - - Dataset shuffling and repeating are usually needed in `dataset_fn`; however, - sharding is not recommended: some worker may not be available and those - examples may be skipped and not covered by other workers, if the dataset is - sharded. - - Args: - dataset_fn: The dataset function that returns a dataset. This is to be - executed on the workers. - - Returns: - An object that represents the collection of those individual - datasets. `iter` is expected to be called on this object that returns - a `PerWorkerValues` of the iterators (that are on the workers). - """ - input_workers = input_lib.InputWorkers([ - (w.device_name, [w.device_name]) for w in self.cluster.workers - ]) - - return _PerWorkerDistributedDataset(dataset_fn, input_workers, self) - - def _create_per_worker_resources(self, fn, args=None, kwargs=None): - """Synchronously create resources on the workers. - - The resources are represented by `RemoteValue`s. - - Args: - fn: The function to be dispatched to all workers for execution - asynchronously. - args: Positional arguments for `fn`. - kwargs: Keyword arguments for `fn`. - - Returns: - A `PerWorkerValues` object, which wraps a tuple of `RemoteValue` objects. - """ - results = [] - for w in self.cluster.workers: - results.append(w._create_resource(fn, args=args, kwargs=kwargs)) # pylint: disable=protected-access - return PerWorkerValues(tuple(results)) - - def fetch(self, val): - """Blocking call to fetch results from `RemoteValue`s. - - This returns the execution result of `RemoteValue`s; if not ready, - waiting for it while blocking the caller. - - Args: - val: The value to fetch the results from. If this is structure of - `RemoteValue`, `fetch()` will be called on the individual `RemoteValue` - to get the result. - - Returns: - If `val` is a `RemoteValue` or a structure of `RemoteValue`s, returns - the fetched `RemoteValue` value immediately if it's available, or blocks - the call until it's available, and returns the fetched `RemoteValue` - values with the same structure. If `val` is other types, return (`val`,). - """ - - def _maybe_fetch(val): - if isinstance(val, RemoteValue): - return val.fetch() - else: - return val - - # TODO(yuefengz): we should fetch values in a batch. - result = nest.map_structure(_maybe_fetch, val) - if not isinstance(result, tuple): - return (result,) - return result - - # pylint: disable=missing-function-docstring - @contextlib.contextmanager - def _translate_parameter_server_failure(self): - try: - yield - except Exception as e: # pylint: disable=broad-except - if _is_ps_failure(e): - logging.exception("Encountered parameter server failures!") - raise ParameterServerFailureError(e) - else: - raise - - # pylint: disable=missing-function-docstring - @contextlib.contextmanager - def _handle_parameter_server_failure(self): - try: - with self._translate_parameter_server_failure(): - yield - except ParameterServerFailureError as e: # pylint: disable=broad-except - restart_exit_code = os.environ.get( - "TF_CLIENT_NON_FATAL_RESTART_EXIT_CODE", None) - if restart_exit_code is not None: - sys.exit(int(restart_exit_code)) - else: - raise - - -class _PerWorkerDistributedDataset(object): # pylint: disable=protected-access - """Represents worker-distributed datasets created from dataset function.""" - - def __init__(self, dataset_fn, input_workers, client): - """Makes an iterable from datasets created by the given function. - - Args: - dataset_fn: A function that returns a `Dataset`. - input_workers: an `InputWorkers` object. - client: a `Client` object, used to create dataset resources. - """ - def disallow_variable_creation(next_creator, **kwargs): - raise ValueError("Creating variables in `dataset_fn` is not allowed.") - - if isinstance(dataset_fn, def_function.Function): - with variable_scope.variable_creator_scope(disallow_variable_creation): - self._dataset_fn = dataset_fn.get_concrete_function() - elif isinstance(dataset_fn, tf_function.ConcreteFunction): - self._dataset_fn = dataset_fn - else: - with variable_scope.variable_creator_scope(disallow_variable_creation): - self._dataset_fn = def_function.function( - dataset_fn).get_concrete_function() - self._input_workers = input_workers - self._client = client - self._element_spec = None - - def __iter__(self): - # We would like users to create iterators outside `tf.function`s so that we - # can track them. - if (not context.executing_eagerly() or - ops.get_default_graph().building_function): - raise RuntimeError( - "__iter__() is not supported inside of tf.function or in graph mode.") - - def _create_per_worker_iterator(): - dataset = self._dataset_fn() - return iter(dataset) - - # If _PerWorkerDistributedDataset.__iter__ is called multiple - # times, for the same object it should only create and register resource - # once. Using object id to distinguish different iterator resources. - per_worker_iterator = self._client._create_per_worker_resources( - _create_per_worker_iterator) - - # Create an iterator, so the consumer function of this iterator can start - # tracing using this iterator without needing to wait for the completion of - # the iterater creation. Note: the iterator shouldn't use memory until it is - # consumed. - # TODO(b/154675763): get rid of this workaround once we can make input_fn a - # tf.function. - iterator = _create_per_worker_iterator() - for iterator_remote_value in per_worker_iterator._values: - iterator_remote_value._set_type_spec(iterator._type_spec) - return _PerWorkerDistributedIterator(per_worker_iterator._values) - - @property - def element_spec(self): - """The type specification of an element of this dataset.""" - raise NotImplementedError("Passing `AsyncDistributedDataset` to a " - "tf.function is not supported.") - - -class _PerWorkerDistributedIterator(PerWorkerValues): - """Distributed iterator for `Client`.""" - - def __next__(self): - return self.get_next() - - def get_next(self, name=None): - """Returns the next input from the iterator for all replicas.""" - raise NotImplementedError("Iterating over an `AsyncDistributedIterator` " - "is not supported right now.") - - -def _is_ps_failure(error): - """Whether the error is considered a parameter server failure.""" - if (_RPC_ERROR_FROM_PS in str(error) or - (isinstance(error, errors.InvalidArgumentError) and - "/job:ps" in str(error))): - return True - - -def _is_worker_failure(error): - """Whether the error is considered a worker failure.""" - if _JOB_WORKER_STRING_IDENTIFIER not in str(error): - return False - if _RPC_ERROR_FROM_PS in str(error): - return False - - # TODO(haoyuzhang): Consider using special status code if error from a - # remote is derived from RPC errors originated from other hosts. - if isinstance(error, (errors.UnavailableError, errors.AbortedError)): - return True - - # The following error could happen when the remote task fails and restarts - # in a very short interval during which no RPCs were exchanged to detect the - # failure. In that case, gRPC allows channel (which is different from a - # connection) to be reused for a replaced server listening to same address. - if isinstance(error, errors.InvalidArgumentError): - if ("Unable to find a context_id" in str(error) or - "unknown device" in str(error) or - "Unable to find the relevant tensor remote_handle" in str(error)): - # TODO(b/159961667): Fix "Unable to find the relevant tensor - # remote_handle" part. - return True - - # TODO(b/162541228): The following 3 types of errors are very rare and only - # observed in large-scale testing. The types of errors should be reduced. - # This error could show up when copying function inputs from remote tasks. - if isinstance(error, errors.InternalError): - if ("Failed copying input tensor" in str(error) or - "Unable to find a context_id" in str(error)): - return True - - # This could happen when the function registration fails. In the observed - # cases this only happens to the dataset related functions. - if isinstance(error, errors.NotFoundError): - if ("is neither a type of a primitive operation nor a name of a function " - "registered" in str(error)): - return True - - # This could happen when the iterator is no longer valid on the remote worker - # "Resource input tensor contains an invalid device" - if isinstance(error, errors.CancelledError): - return True - - return False diff --git a/tensorflow/python/distribute/client/client_test.py b/tensorflow/python/distribute/client/client_test.py deleted file mode 100644 index 12152407c5d..00000000000 --- a/tensorflow/python/distribute/client/client_test.py +++ /dev/null @@ -1,388 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for client.py.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import threading -import time -from absl import logging - -from tensorflow.python.distribute.client import client -from tensorflow.python.eager import def_function -from tensorflow.python.platform import test -from tensorflow.python.training import coordinator -from tensorflow.python.util import nest - - -class CoordinatedClosureQueueTest(test.TestCase): - - def testBasic(self): - queue = client._CoordinatedClosureQueue() - closure1 = self._create_closure() - queue.put(closure1) - self.assertIs(closure1, queue.get()) - self.assertFalse(queue.done()) - queue.put_back(closure1) - self.assertEqual(closure1, queue.get()) - queue.mark_finished(closure1) - self.assertTrue(queue.done()) - queue.wait() - - def testProcessAtLeaseOnce(self): - closure_queue = client._CoordinatedClosureQueue() - labels = ['A', 'B', 'C', 'D', 'E'] - processed_count = collections.defaultdict(int) - - coord = coordinator.Coordinator(clean_stop_exception_types=[]) - - def process_queue(): - with coord.stop_on_exception(): - has_been_put_back = False - while True: - closure = closure_queue.get(timeout=30) - if closure is None: - break - if not has_been_put_back: - has_been_put_back = True - closure_queue.put_back(closure) - continue - closure._function() - closure_queue.mark_finished(closure) - - def get_func(label): - - def func(): - logging.info('Label: %s, before waiting 3 sec', label) - time.sleep(3) - processed_count[label] += 1 - logging.info('Label: %s, after waiting 3 sec', label) - - return func - - for label in labels: - closure_queue.put(client.Closure(get_func(label))) - t1 = threading.Thread(target=process_queue, daemon=True) - t1.start() - t2 = threading.Thread(target=process_queue, daemon=True) - t2.start() - - # Make sure multiple wait() calls are fine. - closure_queue.wait() - closure_queue.wait() - closure_queue.wait() - closure_queue.wait() - - self.assertEqual(processed_count, collections.Counter(labels)) - - coord.join([t1, t2]) - - def testNotifyBeforeWait(self): - closure_queue = client._CoordinatedClosureQueue() - - def func(): - logging.info('func running') - - coord = coordinator.Coordinator(clean_stop_exception_types=[]) - - def process_queue(): - with coord.stop_on_exception(): - closure = closure_queue.get() - closure_queue.mark_finished(closure) - - closure_queue.put(client.Closure(func)) - t = threading.Thread(target=process_queue) - t.start() - coord.join([t]) - - # This test asserts that waiting at the time the function has been processed - # doesn't time out. - closure_queue.wait() - - def testWaitRaiseErrorAfterMarkFailure(self): - closure_queue = client._CoordinatedClosureQueue() - closure_queue.put(self._create_closure()) - closure = closure_queue.get() - - wait_finish_event = threading.Event() - coord = coordinator.Coordinator(clean_stop_exception_types=[]) - - # Using a thread to verify that closure_queue.wait() will not return until - # all inflight closures are finished. - - def mark_finished_fn(): - with coord.stop_on_exception(): - self.assertFalse(wait_finish_event.is_set()) - try: - raise ValueError('Some error.') - except ValueError as e: - closure_queue.mark_failed(e, closure) - wait_finish_event.wait() - - t = threading.Thread(target=mark_finished_fn) - t.start() - - with self.assertRaises(ValueError): - closure_queue.wait() - wait_finish_event.set() - - coord.join([t]) - self.assertTrue(closure_queue.done()) - - def _create_closure(self): - - @def_function.function() - def some_function(): - return 1.0 - - return client.Closure(some_function) - - def _put_two_closures_and_get_one(self): - closure_queue = client._CoordinatedClosureQueue() - closure1 = self._create_closure() - closure_queue.put(closure1) - - closure2 = self._create_closure() - closure_queue.put(closure2) - - closure_got = closure_queue.get() # returns closure1 - self.assertIs(closure_got, closure1) - self.assertIsNot(closure_got, closure2) - return closure_queue, closure1, closure2 - - def testPutRaiseError(self): - closure_queue, closure1, closure2 = self._put_two_closures_and_get_one() - - closure_queue.mark_failed(ValueError(), closure1) - - with self.assertRaises(ValueError): - closure_queue.put(self._create_closure()) - - self.assertTrue(closure_queue.done()) - - with self.assertRaisesRegex( - client.FunctionRetryableError, - 'The corresponding function is cancelled. Please reschedule the ' - 'function.'): - closure2._fetch_output_remote_values() - - # The error is cleared. - closure_queue.put(self._create_closure()) - - def testWaitRaiseError(self): - closure_queue, closure1, closure2 = self._put_two_closures_and_get_one() - - closure_queue.mark_failed(ValueError(), closure1) - - with self.assertRaises(ValueError): - closure_queue.wait() - self.assertTrue(closure_queue.done()) - - with self.assertRaisesRegex( - client.FunctionRetryableError, - 'The corresponding function is cancelled. Please reschedule the ' - 'function.'): - closure2._fetch_output_remote_values() - - # The error is cleared. - closure_queue.wait() - - def testDoneRaiseError(self): - closure_queue, closure1, _ = self._put_two_closures_and_get_one() - closure_queue.get() - - self.assertFalse(closure_queue.done()) - closure_queue.mark_failed(ValueError(), closure1) - with self.assertRaises(ValueError): - closure_queue.done() - - def _test_error_reporting_and_cancel_flow(self, call_wait): - closure_queue, closure1, closure2 = self._put_two_closures_and_get_one() - closure_queue.put(self._create_closure()) - closure_queue.get() - # At this moment, there are two inflight, one in queue. - self.assertEqual(closure_queue._inflight_closure_count, 2) - - # Simulating closure1 fails. - try: - raise ValueError('Some error.') - except ValueError as e: - nest.map_structure(lambda x: x._set_error(e), - closure1._output_remote_values) - self.assertEqual(closure_queue._error_generation, 0) # pylint: disable=g-assert-in-except - closure_queue.mark_failed(e, closure1) - self.assertEqual(closure_queue._error_generation, 1) - # At this moment, there are one inflight, nothing - # in queue (because the ones in queue should have been removed and - # cancelled). - self.assertTrue(closure_queue._queue.empty()) - # Doesn't include out of generation closures. - self.assertEqual(closure_queue._inflight_closure_count, 1) - - coord = coordinator.Coordinator(clean_stop_exception_types=[]) - closure3 = self._create_closure() - - with self.assertRaises(ValueError): - # Verifying `wait()` or `put()` raises even if one closure is in - # flight. - if call_wait: - closure_queue.wait() - else: - closure_queue.put(closure3) - # At this moment, there is one inflight, nothing in queue. - self.assertTrue(closure_queue._queue.empty()) - self.assertEqual(closure_queue._inflight_closure_count, 1) - - # This asserts that closure1 has errored. - with self.assertRaisesRegex(ValueError, 'Some error.'): - closure1._fetch_output_remote_values() - - # The following asserts that closure3 should have been cancelled. - if not call_wait: - with self.assertRaisesRegex( - client.FunctionRetryableError, - 'The corresponding function is cancelled. Please reschedule the ' - 'function.'): - closure3._fetch_output_remote_values() - - # Closure2 is inflight, so it shouldn't be ready. - self.assertEqual(closure2._output_remote_values._status, - client._RemoteValueStatus.NOT_READY) - - # And `wait` should block because closure2 is not back yet. - self.assertFalse(closure_queue.wait(timeout=20)) - - # Now let's assume that closure2 isn't successful due to worker preemption, - # and now it's attempted to be put back, but ends up getting cancelled. - self.assertEqual(closure2._error_generation, 0) - self.assertEqual(closure_queue._error_generation, 1) - closure_queue.put_back(closure2) - - with self.assertRaisesRegex( - client.FunctionRetryableError, - 'The corresponding function is cancelled. Please reschedule the ' - 'function.'): - closure2._fetch_output_remote_values() - - # At this moment, there is nothing inflight, and the queue is also empty - # (because closure2 should not be added back to the queue). - self.assertTrue(closure_queue._queue.empty()) - self.assertEqual(closure_queue._inflight_closure_count, 0) - - closure4 = self._create_closure() - - e = threading.Event() - - def get_fn(): - with coord.stop_on_exception(): - # This should end up getting closure4, not closure2, because closure2 - # has been cancelled and should not be got. - closure_got = closure_queue.get() - e.set() - self.assertEqual(closure_got._error_generation, 1) - self.assertEqual(closure_queue._error_generation, 1) - self.assertIs(closure4, closure_got) - self.assertIsNot(closure2, closure_got) - - t = threading.Thread(target=get_fn) - t.start() - - time.sleep(10) - - # Make sure `closure_got = closure_queue.get()` is unblocked as a result of - # `closure_queue.put(closure4)`. - self.assertFalse(e.is_set()) - closure_queue.put(closure4) - self.assertTrue(e.wait()) - coord.join([t]) - - self.assertEqual(closure_queue._inflight_closure_count, 1) - closure_queue.mark_finished(closure4) - # The queue is now cleared and nothing inflight. - self.assertEqual(closure_queue._inflight_closure_count, 0) - closure_queue.wait() - - def testWaitRaiseErrorAfterAnErrorIsReported(self): - self._test_error_reporting_and_cancel_flow(call_wait=True) - - def testPutRaiseErrorAfterAnErrorIsReported(self): - self._test_error_reporting_and_cancel_flow(call_wait=False) - - def testStateIsRestoredAfterJoinIsCalled(self): - closure_queue, closure1, closure2 = self._put_two_closures_and_get_one() - closure_queue.get() - self.assertEqual(closure_queue._inflight_closure_count, 2) - closure_queue.mark_failed(ValueError('test error'), closure1) - with self.assertRaises(ValueError): - closure_queue.put(self._create_closure()) - closure_queue.mark_failed(ValueError('test error'), closure2) - - # closure2's error is previous generation so should not raise at this - # following put, and _error should have been cleared. - self.assertIsNone(closure_queue._error) - closure_queue.put(self._create_closure()) - self.assertIsNone(closure_queue._error) - - def testStateIsRestoredAfterJoinIsCalled_WaitShouldReturn(self): - closure_queue, closure1, closure2 = self._put_two_closures_and_get_one() - closure_queue.put(self._create_closure()) - closure_queue.get() # got closure2 - self.assertFalse(closure_queue._queue.empty()) # still has closure3 - self.assertEqual(closure_queue._inflight_closure_count, 2) # closure1,2 - closure_queue.mark_failed(ValueError('test error'), closure1) - self.assertTrue(closure_queue._queue.empty()) # closure3 cancelled - self.assertEqual(closure_queue._inflight_closure_count, 1) - with self.assertRaises(ValueError): - closure_queue.wait() # reports error from closure1 - - # `wait` should block because closure2 is not back yet, even if closure2 - # was sent inflight before the error. - self.assertFalse(closure_queue.wait(timeout=20)) - self.assertEqual(closure_queue._inflight_closure_count, 1) - closure_queue.mark_finished(closure2) - closure_queue.wait() # wait should pass immediately - self.assertEqual(closure_queue._inflight_closure_count, 0) - - def testThreadSafey(self): - thread_count = 10 - queue = client._CoordinatedClosureQueue() - - # Each thread performs 20 queue actions: 10 are `put_back` and 10 are - # `mark_finished`. - action_count = 20 - - def func(): - for i in range(action_count): - closure = queue.get() - if i % 2 == 0: - queue.put_back(closure) - else: - queue.mark_finished(closure) - - threads = [threading.Thread(target=func) for i in range(thread_count)] - for t in threads: - t.start() - - for _ in range(thread_count * action_count // 2): - queue.put(self._create_closure()) - queue.wait() - self.assertTrue(queue.done()) - - -if __name__ == '__main__': - test.main() diff --git a/tensorflow/python/distribute/client/metric_utils.py b/tensorflow/python/distribute/client/metric_utils.py deleted file mode 100644 index f0a6628a333..00000000000 --- a/tensorflow/python/distribute/client/metric_utils.py +++ /dev/null @@ -1,79 +0,0 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Metrics collecting utilities for single client training.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import time - -from tensorflow.python.eager import monitoring -from tensorflow.python.util import tf_contextlib - -enable_metrics = False - -# Time in seconds to bucket the distribution of execution time. Range from -# 0.001s (i.e., 1ms) to 1000s. -_time_buckets = monitoring.ExponentialBuckets(0.001, 10, 6) - -_function_tracing_sampler = monitoring.Sampler( - '/tensorflow/api/ps_strategy/client/function_tracing', _time_buckets, - 'Sampler to track the time (in seconds) for tracing functions.') - -_closure_execution_sampler = monitoring.Sampler( - '/tensorflow/api/ps_strategy/client/closure_execution', _time_buckets, - 'Sampler to track the time (in seconds) for executing closures.') - -_remote_value_fetch_sampler = monitoring.Sampler( - '/tensorflow/api/ps_strategy/client/remote_value_fetch', _time_buckets, - 'Sampler to track the time (in seconds) for fetching remote_value.') - -_METRICS_MAPPING = { - 'function_tracing': _function_tracing_sampler, - 'closure_execution': _closure_execution_sampler, - 'remote_value_fetch': _remote_value_fetch_sampler -} - - -@tf_contextlib.contextmanager -def monitored_timer(metric_name, state_tracker=None): - """Monitor the execution time and collect it into the specified metric.""" - if not enable_metrics: - yield - else: - start_time = time.time() - start_state = state_tracker() if state_tracker else None - yield - duration_sec = time.time() - start_time - # If a state_checker is provided, record the metric only if the end state is - # different from the start state. - if state_tracker is None or state_tracker() != start_state: - metric = _METRICS_MAPPING[metric_name] - metric.get_cell().add(duration_sec) - - -def get_metric_summary(metric_name): - """Get summary for the specified metric.""" - metric = _METRICS_MAPPING[metric_name] - histogram_proto = metric.get_cell().value() - ret = dict() - ret['min'] = histogram_proto.min - ret['max'] = histogram_proto.max - ret['num'] = histogram_proto.num - ret['sum'] = histogram_proto.sum - # TODO(haoyuzhang): consider reporting the distribution in buckets. - return ret diff --git a/tensorflow/python/distribute/client/metric_utils_test.py b/tensorflow/python/distribute/client/metric_utils_test.py deleted file mode 100644 index 79827e5e9f6..00000000000 --- a/tensorflow/python/distribute/client/metric_utils_test.py +++ /dev/null @@ -1,69 +0,0 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for metrics collecting in client.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import time -from tensorflow.python.distribute import multi_worker_test_base -from tensorflow.python.distribute.client import client -from tensorflow.python.distribute.client import metric_utils -from tensorflow.python.distribute.cluster_resolver import SimpleClusterResolver -from tensorflow.python.eager import def_function -from tensorflow.python.eager import test -from tensorflow.python.training.server_lib import ClusterSpec - - -class MetricUtilsTest(test.TestCase): - - def testClientMetrics(self): - metric_utils.enable_metrics = True - - cluster_def = multi_worker_test_base.create_in_process_cluster( - num_workers=1, num_ps=1, rpc_layer='grpc') - cluster_def['chief'] = [ - 'localhost:%d' % multi_worker_test_base.pick_unused_port() - ] - cluster_resolver = SimpleClusterResolver( - ClusterSpec(cluster_def), rpc_layer='grpc') - cluster = client.Cluster(cluster_resolver) - - @def_function.function - def func(): - time.sleep(0.5) - return 3 - - result = cluster.schedule(func, args=None, kwargs=None) - result = cluster.schedule(func, args=None, kwargs=None) - cluster.join() - self.assertEqual(result._get_value().numpy(), 3) - - # Tracing, closure execution, and remote_value fetching should be executed - # exactly once for running this function. - metric_tracing = metric_utils.get_metric_summary('function_tracing') - self.assertEqual(metric_tracing['num'], 1) - # Tracing time should be longer than the sleep time in Python function. - self.assertGreater(metric_tracing['sum'], 0.5) - metric_closure = metric_utils.get_metric_summary('closure_execution') - self.assertEqual(metric_closure['num'], 2) - metric_remote_value = metric_utils.get_metric_summary('remote_value_fetch') - self.assertEqual(metric_remote_value['num'], 2) - - -if __name__ == '__main__': - test.main() diff --git a/tensorflow/python/distribute/client/parameter_server_client.py b/tensorflow/python/distribute/client/parameter_server_client.py deleted file mode 100644 index 8236c2410d8..00000000000 --- a/tensorflow/python/distribute/client/parameter_server_client.py +++ /dev/null @@ -1,55 +0,0 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Parameter server client module. - -This is currently under development and the API is subject to change. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.distribute import parameter_server_strategy_v2 -from tensorflow.python.distribute.client import client - - -class ParameterServerClient(client.Client): - """A client that uses `ParameterServerStrategy` to distribute tasks. - - Parameter server training refers to the distributed training architecture - that requires two jobs in the cluster: workers and parameter servers. The - variables and updates to those variables are assigned on the parameter - servers' tasks, and the actual computation intensive operations are assigned - on worker tasks. In TF2, parameter server training only starts up one - client process, to drive and coordinate the workers and parameter servers. - This is referred to as single-client architecture, as opposed to multi-client - approach which is seen more often in traditional TensorFlow distributed - training, including `tf.estimator.Estimator` and `tf.keras` with - `tf.distribute.experimental.MultiWorkerMirroredStrategy`. - - `ParameterServerClient` is a `Client` that uses `ParameterServerStrategy` as - the underlying strategy to distribute, and is the starting point of parameter - server training/evaluation. - - If 'TF_CONFIG' environment variable is used, provide a - `TFConfigClusterResolver` to detect configurations for multi-worker training. - - """ - - def __init__(self, cluster_resolver): - super(ParameterServerClient, self).__init__( - parameter_server_strategy_v2.ParameterServerStrategyV2( - cluster_resolver)) diff --git a/tensorflow/python/distribute/client/parameter_server_client_test.py b/tensorflow/python/distribute/client/parameter_server_client_test.py deleted file mode 100644 index db22a476b4a..00000000000 --- a/tensorflow/python/distribute/client/parameter_server_client_test.py +++ /dev/null @@ -1,405 +0,0 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for parameter_server_client.py.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl import logging -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.distribute import multi_worker_test_base -from tensorflow.python.distribute import sharded_variable -from tensorflow.python.distribute.client import client -from tensorflow.python.distribute.client import parameter_server_client -from tensorflow.python.distribute.cluster_resolver import SimpleClusterResolver -from tensorflow.python.eager import def_function -from tensorflow.python.eager import test -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors -from tensorflow.python.framework import tensor_spec -from tensorflow.python.ops import check_ops -from tensorflow.python.ops import init_ops_v2 -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops import variables -from tensorflow.python.training.server_lib import ClusterSpec - - -def make_client(num_workers, num_ps): - # TODO(rchao): Test the internal rpc_layer version. - cluster_def = multi_worker_test_base.create_in_process_cluster( - num_workers=num_workers, num_ps=num_ps, rpc_layer="grpc") - cluster_def["chief"] = [ - "localhost:%d" % multi_worker_test_base.pick_unused_port() - ] - cluster_resolver = SimpleClusterResolver( - ClusterSpec(cluster_def), rpc_layer="grpc") - return parameter_server_client.ParameterServerClient(cluster_resolver) - - -class ParameterServerClientTest(test.TestCase): - - @classmethod - def setUpClass(cls): - super(ParameterServerClientTest, cls).setUpClass() - cls.client = make_client(num_workers=3, num_ps=2) - - def testBasic(self): - self.client._strategy.extended._variable_count = 0 - with self.client.context(): - v1 = variables.Variable(initial_value=0.0) - v2 = variables.Variable(initial_value=1.0) - self.assertEqual(self.client._strategy.extended._variable_count, 2) - - @def_function.function - def worker_fn(): - v1.assign_add(0.1) - v2.assign_sub(0.2) - return v1.read_value() / v2.read_value() - - results = self.client.schedule(worker_fn) - logging.info("Results of experimental_run_v2: %f", - self.client.fetch(results)) - - self.assertAlmostEqual(v1.read_value().numpy(), 0.1, delta=1e-6) - self.assertAlmostEqual(v2.read_value().numpy(), 0.8, delta=1e-6) - - def testFnReturnNestedValues(self): - x = constant_op.constant(1) - - @def_function.function - def f(): - return x + 1, (x + 2, x + 3), [x + 4], {"v": x} - - got = self.client.schedule(f) - want = 2, (3, 4), [5], {"v": 1} - self.assertEqual(self.client.fetch(got), want) - - def testInputFunction(self): - - def input_fn(): - return dataset_ops.DatasetV2.range(1, 2) - - with self.client.context(): - v = variables.Variable(initial_value=0, dtype=dtypes.int64) - - @def_function.function - def worker_fn(iterator): - x = next(iterator) - v.assign_add(x) - return x - - distributed_dataset = self.client.create_per_worker_dataset(input_fn) - result = self.client.schedule(worker_fn, args=(iter(distributed_dataset),)) - result = self.client.fetch(result) - self.assertEqual(result, (1,)) - result = self.client.schedule(worker_fn, args=(iter(distributed_dataset),)) - result = self.client.fetch(result) - self.assertEqual(result, (1,)) - - self.assertAlmostEqual(v.read_value().numpy(), 2, delta=1e-6) - - def testAsyncScheduleAndJoin(self): - - def input_fn(): - return dataset_ops.DatasetV2.from_tensor_slices([2] * 10) - - with self.client.context(): - v = variables.Variable(initial_value=0, dtype=dtypes.int32) - - # TODO(yuefengz): the following tf.function has a return value which is None - # in its structured_outputs. - @def_function.function - def worker_fn(iterator): - x = next(iterator) - v.assign_add(x) - - distributed_dataset = self.client.create_per_worker_dataset(input_fn) - - iterator = iter(distributed_dataset) - - # Verifying joining without any scheduling doesn't hang. - self.client.join() - self.assertEqual(v.read_value().numpy(), 0) - - for _ in range(5): - self.client.schedule(worker_fn, args=(iterator,)) - self.client.join() - - # With 5 addition it should be 2*5 = 10. - self.assertEqual(v.read_value().numpy(), 10) - - for _ in range(5): - self.client.schedule(worker_fn, args=(iterator,)) - - # Verifying multiple join is fine. - self.client.join() - self.client.join() - self.client.join() - - self.assertTrue(self.client.done()) - - # Likewise, it's now 20. - self.assertEqual(v.read_value().numpy(), 20) - - def testInputFunctionWithMap(self): - self._map_fn_tracing_count = 0 - - def input_fn(): - def map_fn(x): - self._map_fn_tracing_count += 1 - return x + 10 - return dataset_ops.DatasetV2.range(0, 10).map(map_fn) - - @def_function.function - def worker_fn(iterator): - return next(iterator) - - distributed_dataset = ( - self.client.create_per_worker_dataset(input_fn)) - result = self.client.schedule( - worker_fn, args=(iter(distributed_dataset),)) - self.assertEqual(result.fetch(), (10,)) - self.assertEqual(self._map_fn_tracing_count, 1) - - def testInputFunctionCreateVariables(self): - - def input_fn(): - v = variables.Variable(initial_value=0.0) - return v.read_value() - - with self.assertRaises(ValueError): - self.client.create_per_worker_dataset(input_fn) - - -class LimitedClosureQueueSizeBasicTest(ParameterServerClientTest): - """Test basic functionality works with explicit maximum closure queue size. - - Execute the same set of test cases as in ParameterServerClientTest, with an - explicit size limit for the closure queue. Note that even when the queue size - is set to infinite, there is still a maximum practical size (depends on host - memory limit) that might cause the queue.put operations to be blocking when - scheduling a large number of closures on a big cluster. These tests make sure - that the client does not run into deadlocks in such scenario. - """ - - @classmethod - def setUpClass(cls): - super(LimitedClosureQueueSizeBasicTest, cls).setUpClass() - client._CLOSURE_QUEUE_MAX_SIZE = 2 - cls.client = make_client(num_workers=3, num_ps=2) - - -class VariablePartitioningScopeTest(test.TestCase): - - @classmethod - def setUpClass(cls): - super(VariablePartitioningScopeTest, cls).setUpClass() - cls.client = make_client(num_workers=3, num_ps=2) - - def testBasic(self): - with self.client.context(): - with self.client.experimental_variable_partitioning_scope(): - init1 = init_ops_v2.Constant([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) - v1 = variables.Variable( - initial_value=lambda: init1(shape=(5, 2), dtype=dtypes.int64), - shape=(5, 2), - dtype=dtypes.int64) - - init2 = init_ops_v2.Constant([0, 1, 2, 3, 4, 5]) - v2 = variables.Variable( - initial_value=lambda: init2(shape=(6, 1), dtype=dtypes.int64), - shape=(6, 1), - dtype=dtypes.int64) - - self.assertIsInstance(v1, sharded_variable.ShardedVariable) - self.assertLen(v1.variables, 2) - self.assertRegex(v1.variables[0].device, "/job:ps/replica:0/task:0") - self.assertRegex(v1.variables[1].device, "/job:ps/replica:0/task:1") - self.assertAllEqual(v1.variables[0].read_value().numpy(), - [[0, 1], [2, 3], [4, 5]]) - self.assertAllEqual(v1.variables[1].read_value().numpy(), [[6, 7], [8, 9]]) - - self.assertIsInstance(v2, sharded_variable.ShardedVariable) - self.assertLen(v2.variables, 2) - self.assertRegex(v2.variables[0].device, "/job:ps/replica:0/task:0") - self.assertRegex(v2.variables[1].device, "/job:ps/replica:0/task:1") - self.assertAllEqual(v2.variables[0].read_value().numpy(), [[0], [1], [2]]) - self.assertAllEqual(v2.variables[1].read_value().numpy(), [[3], [4], [5]]) - - def testSurplusPS(self): - with self.client.context(): - with self.client.experimental_variable_partitioning_scope(): - initializer = init_ops_v2.Constant([0]) - - v = variables.Variable( - initial_value=lambda: initializer(shape=(1,), dtype=dtypes.int64), - shape=(1,), - dtype=dtypes.int64) - - self.assertIsInstance(v, sharded_variable.ShardedVariable) - self.assertLen(v.variables, 1) - self.assertRegex(v.variables[0].device, "/job:ps/replica:0/task:0") - self.assertAllEqual(v.variables[0].read_value().numpy(), [0]) - - def testInvalidArgument(self): - with self.assertRaisesRegex(ValueError, "initial_value"): - with self.client.experimental_variable_partitioning_scope(): - variables.Variable(initial_value=[0, 1, 2], shape=(3,)) - - with self.assertRaisesRegex(ValueError, "shape"): - with self.client.experimental_variable_partitioning_scope(): - initializer = init_ops_v2.Constant([0, 1, 2]) - variables.Variable( - initial_value=lambda: initializer(shape=(3,), dtype=dtypes.int64), - dtype=dtypes.int64) - - def testPerWorkerValue(self): - var_shape = tuple() - var_dtype = dtypes.float32 - var_name = "var" - - def create_var(): - var = variables.Variable( - initial_value=0.0, dtype=var_dtype, name=var_name) - self.assertIn("worker", var.device) - return var - - worker_local_var = self.client._create_per_worker_resources(create_var) - - # The following is a workaround to allow `worker_local_var` to be passed in - # as args to the `client.schedule` method which requires tensor specs to - # trace tf.function but _create_worker_resources' return values don't have - # tensor specs. We can get rid of this workaround once - # _create_worker_resources is able to infer the tensor spec of the return - # value of the function passed in. See b/154675763. - for var in worker_local_var._values: - var._set_type_spec(tensor_spec.TensorSpec(var_shape, var_dtype, var_name)) - - def worker_fn(var): - var.assign_add(1.0) - - for _ in range(10): - # Which slice of `worker_local_var` will be used will depend on which - # worker the `worker_fn` gets scheduled on. - self.client.schedule(worker_fn, args=(worker_local_var,)) - self.client.join() - - var_sum = sum(self.client.fetch(worker_local_var._values)) - self.assertEqual(var_sum, 10.0) - - -class ErrorReportingTest(test.TestCase): - - @classmethod - def setUpClass(cls): - super(ErrorReportingTest, cls).setUpClass() - cls.client = make_client(num_workers=3, num_ps=2) - - with cls.client.context(): - cls.iteration = variables.Variable(initial_value=0.0) - - @def_function.function - def _normal_function(self): - x = random_ops.random_uniform((2, 10)) - y = random_ops.random_uniform((10, 2)) - self.iteration.assign_add(1.0) - return math_ops.reduce_mean(math_ops.matmul(x, y)) - - @def_function.function - def _error_function(self): - x = random_ops.random_uniform((2, 10)) - y = random_ops.random_uniform((10, 2)) - check_ops.assert_non_positive_v2(math_ops.reduce_sum(math_ops.matmul(x, y))) - self.iteration.assign_add(1.0) - return self.iteration - - def testJoinRaiseError(self): - for _ in range(3): - self.client.schedule(self._normal_function) - self.client.schedule(self._error_function) - with self.assertRaises(errors.InvalidArgumentError): - self.client.join() - - def testScheduleRaiseError(self): - for _ in range(3): - self.client.schedule(self._normal_function) - self.client.schedule(self._error_function) - with self.assertRaises(errors.InvalidArgumentError): - while True: - self.client.schedule(self._normal_function) - - def testErrorWillbeCleared(self): - self.skipTest("b/157597579") - self.client.schedule(self._error_function) - with self.assertRaises(errors.InvalidArgumentError): - self.client.join() - - for _ in range(3): - self.client.schedule(self._normal_function) - self.client.schedule(self._error_function) - with self.assertRaises(errors.InvalidArgumentError): - self.client.join() - - def testFutureReturnError(self): - result = self.client.schedule(self._error_function) - - with self.assertRaises(errors.InvalidArgumentError): - result.fetch() - - # Clear the error. - with self.assertRaises(errors.InvalidArgumentError): - self.client.join() - - def testInputError(self): - aborted = self.client.schedule(self._error_function) - - @def_function.function - def func(x): - return x + 1.0 - - with self.assertRaises(errors.InvalidArgumentError): - self.client.join() - - result = self.client.schedule(func, args=(aborted,)) - with self.assertRaises(client.InputError): - result.fetch() - - with self.assertRaises(client.InputError): - self.client.join() - - -class LimitedClosureQueueErrorTest(ErrorReportingTest): - """Test error reporting works with explicit maximum closure queue size. - - Execute the same set of test cases as in ErrorReportingTest, with an explicit - size limit for the closure queue. - """ - - @classmethod - def setUpClass(cls): - super(LimitedClosureQueueErrorTest, cls).setUpClass() - client._CLOSURE_QUEUE_MAX_SIZE = 2 - cls.client = make_client(num_workers=3, num_ps=2) - - with cls.client.context(): - cls.iteration = variables.Variable(initial_value=0.0) - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/python/distribute/parameter_server_strategy_v2.py b/tensorflow/python/distribute/parameter_server_strategy_v2.py deleted file mode 100644 index 02f3c35a716..00000000000 --- a/tensorflow/python/distribute/parameter_server_strategy_v2.py +++ /dev/null @@ -1,202 +0,0 @@ -# Lint as: python3 -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Parameter server strategy V2 class. - -This is currently under development and the API is subject to change. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl import logging -from tensorflow.python.distribute import distribute_lib -from tensorflow.python.distribute import parameter_server_strategy -from tensorflow.python.distribute import sharded_variable -from tensorflow.python.framework import ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.util import tf_contextlib - - -# pylint: disable=protected-access -class ParameterServerStrategyV2(distribute_lib.Strategy): - """An asynchronous multi-worker parameter server tf.distribute strategy. - - Currently, `ParameterServerStrategyV2` is not supported to be used as a - standalone tf.distribute strategy. It must be used in conjunction with - `Client`. The recommended way of using the combination is through a - `ParameterServerClient` object. Please see `Client` and - `ParameterServerClient` for more information. - - This is currently under development, and the API as well as implementation - is subject to changes. - """ - - def __init__(self, cluster_resolver): - """Initializes the V2 parameter server strategy. - - Args: - cluster_resolver: a `tf.distribute.cluster_resolver.ClusterResolver` - object. - """ - self._extended = ParameterServerStrategyV2Extended(self, cluster_resolver) - self._cluster_resolver = cluster_resolver - self._verify_args_and_config(cluster_resolver) - logging.info( - "ParameterServerStrategyV2 is initialized with cluster_spec: " - "%s", cluster_resolver.cluster_spec()) - super(ParameterServerStrategyV2, self).__init__(self._extended) - - @tf_contextlib.contextmanager - def experimental_variable_partitioning_scope(self): - """A context manager for creating `ShardedVariable`. - - Variables created inside a `with experimental_variable_partitioning_scope()` - code block will be of type `ShardedVariable` and their values are - partitioned among parameter servers along the first / outermost axis. The - number of shards are equal to the number of parameter servers. - - Variables created within this scope must be initialized using a callable as - `initial_value` and a known shape. - - Div partition strategy is used to partition the variable. Assuming we - assign consective integer ids along the first axis of the variable, then ids - are assigned to shards in a contiguous manner, while attempting to keep each - shard size identical. If the ids do not evenly divide the number of shards, - each of the first several shards will be assigned one more id. For instance, - a variable whose first dimension is 13 has 13 ids, and they are split across - 5 shards as: `[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10], [11, 12]]`. - - Yields: - A context manager for creating `ShardedVariable`. - """ - with variable_scope.variable_creator_scope( - self._extended._make_sharded_variable_creator()): - yield - - def _verify_args_and_config(self, cluster_resolver): - if not cluster_resolver.cluster_spec(): - raise ValueError("Cluster spec must be non-empty in `cluster_resolver`.") - if self.extended._num_gpus_per_worker > 1: - raise NotImplementedError("Multi-gpu is not supported yet.") - - -class ParameterServerStrategyV2Extended( - parameter_server_strategy.ParameterServerStrategyExtended): - """Extended class for ParameterServerStrategyV2. - - Please see `tf.distribute.StrategyExtended` doc for more information. - """ - - def __init__(self, container_strategy, cluster_resolver): - """Initialization of ParameterServerStrategyV2Extended.""" - super(ParameterServerStrategyV2Extended, self).__init__(container_strategy) - self._num_ps = len(cluster_resolver.cluster_spec().as_dict().get("ps", [])) - self._variable_count = 0 - - def _create_variable(self, next_creator, **kwargs): - - if "colocate_with" in kwargs: - colocate_with = kwargs["colocate_with"] - # Clear the variable scope to avoid possible conflicts between device - # scope and colocation scope. - with ops.device(None): - with ops.colocate_with(colocate_with): - var = next_creator(**kwargs) - logging.debug( - "Creating variable (name:%s, shape:%r) that colocates with %s", - var.name, var.shape, kwargs["colocate_with"].name) - return var - - # Clear the colocation scope to avoid possible conflicts between device - # scope and colocation scope. - with ops.colocate_with(None, ignore_existing=True): - with ops.device("/job:ps/task:%d" % - (self._variable_count % self._num_ps)): - var = next_creator(**kwargs) - logging.debug( - "Creating variable (name:%s, shape:%r) on /job:ps/task:%d", - var.name, var.shape, (self._variable_count % self._num_ps)) - self._variable_count += 1 - return var - - def _make_sharded_variable_creator(self): - """Returns a function conforming to the `variable_creator` signature. - - The returned function creates `ShardedVariable` when called. - """ - - def sharded_variable_creator(next_creator, **kwargs): - if "shape" not in kwargs or kwargs["shape"] is None: - raise ValueError("shape must be explicitly specified when creating " - "sharded variables") - init_fn = kwargs.get("initial_value", None) - # We intentionally don't allow non-callable initial_value to ensure the - # value is created on PS but not client. If the value is created on - # client, it will needed to be sent to PS for variable initialization, - # which is inefficient and can potentially hit the 2GB limit on protobuf - # serialization. - if init_fn is None or not callable(init_fn): - raise ValueError("initial_value must be specified as a callable when " - "creating sharded variables") - - # Use "div" partition strategy to partition the variable. - full_shape = kwargs["shape"] - if self._num_ps < full_shape[0]: - num_shards = self._num_ps - else: - num_shards = full_shape[0] - offsets = [] - base = full_shape[0] // num_shards - extra = full_shape[0] % num_shards - for i in range(num_shards): - if i == 0: - offsets.append(0) - else: - prev_shard_size = base + (1 if i - 1 < extra else 0) - offsets.append(offsets[i - 1] + prev_shard_size) - - # Note: The way we initialize sharded variables is suboptimal, as it - # needs to create the full value tensor separately on each PS which the - # variable is going to be placed on. The full value could be very large - # and consume a lot of memory. The ideal way is to only create what's - # needed on the shard, however that's not practical because: - # 1. Initializers don't have sharded behavior support, even though some - # initializers (e.g, uniform) can be used directly. - # 2. tf.Variable signature requires "initial_value" to be either a value - # or a callable without arguments, meaning it is not straightforward - # to make the sharded component from it. - def init_shard_fn(shard_index): - full_value = init_fn() - if shard_index < num_shards - 1: - return full_value[offsets[shard_index]:offsets[shard_index + 1]] - else: - return full_value[offsets[shard_index]:] - - var_list = [] - for i in range(num_shards): - kwargs["shape"] = None - kwargs["initial_value"] = lambda: init_shard_fn(i) - var_list.append(next_creator(**kwargs)) - - result = sharded_variable.ShardedVariable(var_list) - return result - - return sharded_variable_creator - - def _call_for_each_replica(self, fn, args, kwargs): - # TODO(rchao): Consider implementing sync PS training. - raise NotImplementedError("Sync PS training is not implemented yet.") diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 9cf6e10702f..bbd4f49d95b 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -150,9 +150,6 @@ COMMON_PIP_DEPS = [ "//tensorflow/tools/docs:generate_lib", "//tensorflow/tools/docs:parser", "//tensorflow/tools/docs:py_guide_parser", - "//tensorflow/python/distribute/client:client", - "//tensorflow/python/distribute/client:parameter_server_client", - "//tensorflow/python/distribute/client:metric_utils", ] # On Windows, python binary is a zip file of runfiles tree. From 0a8177bbe007d2311b2c7b18445b4514cab8d6f3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 31 Jul 2020 09:34:24 -0700 Subject: [PATCH 1851/2522] Supports return_attention_scores option in tf.keras.layers.Attention. PiperOrigin-RevId: 324223537 Change-Id: I93c9220401c57ac343b5faa5ab52c0443e8c88ce --- .../python/keras/layers/dense_attention.py | 31 ++++- .../keras/layers/dense_attention_test.py | 126 +++++++++++++----- 2 files changed, 114 insertions(+), 43 deletions(-) diff --git a/tensorflow/python/keras/layers/dense_attention.py b/tensorflow/python/keras/layers/dense_attention.py index d3f204d661b..cd277a1a6a9 100644 --- a/tensorflow/python/keras/layers/dense_attention.py +++ b/tensorflow/python/keras/layers/dense_attention.py @@ -49,6 +49,8 @@ class BaseDenseAttention(Layer): flow of information from the future towards the past. dropout: Float between 0 and 1. Fraction of the units to drop for the attention scores. + return_attention_scores: bool, it `True`, returns the attention scores + (after masking and softmax) as an additional output argument. Call Arguments: @@ -68,15 +70,19 @@ class BaseDenseAttention(Layer): training: Python boolean indicating whether the layer should behave in training mode (adding dropout) or in inference mode (no dropout). - Output shape: + Output: Attention outputs of shape `[batch_size, Tq, dim]`. + [Optional] Attention scores after masking and softmax with shape + `[batch_size, Tq, Tv]`. """ - def __init__(self, causal=False, dropout=0.0, **kwargs): + def __init__(self, causal=False, dropout=0.0, return_attention_scores=False, + **kwargs): super(BaseDenseAttention, self).__init__(**kwargs) self.causal = causal self.dropout = dropout + self.return_attention_scores = return_attention_scores self.supports_masking = True def _calculate_scores(self, query, key): @@ -115,6 +121,8 @@ class BaseDenseAttention(Layer): Returns: Tensor of shape `[batch_size, Tq, dim]`. + Attention scores after masking and softmax with shape + `[batch_size, Tq, Tv]`. """ if scores_mask is not None: padding_mask = math_ops.logical_not(scores_mask) @@ -129,7 +137,7 @@ class BaseDenseAttention(Layer): weights = control_flow_util.smart_cond(training, dropped_weights, lambda: array_ops.identity(weights)) - return math_ops.matmul(weights, value) + return math_ops.matmul(weights, value), weights # TODO(b/125916026): Consider exposing a __call__ method with named args. def call(self, inputs, mask=None, training=None): @@ -156,12 +164,14 @@ class BaseDenseAttention(Layer): else: causal_mask = None scores_mask = _merge_masks(v_mask, causal_mask) - result = self._apply_scores( + result, attention_scores = self._apply_scores( scores=scores, value=v, scores_mask=scores_mask, training=training) if q_mask is not None: # Mask of shape [batch_size, Tq, 1]. q_mask = array_ops.expand_dims(q_mask, axis=-1) result *= math_ops.cast(q_mask, dtype=result.dtype) + if self.return_attention_scores: + return result, attention_scores return result def compute_mask(self, inputs, mask=None): @@ -199,6 +209,7 @@ class BaseDenseAttention(Layer): config = { 'causal': self.causal, 'dropout': self.dropout, + 'return_attention_scores': self.return_attention_scores, } base_config = super(BaseDenseAttention, self).get_config() return dict(list(base_config.items()) + list(config.items())) @@ -228,6 +239,8 @@ class Attention(BaseDenseAttention): flow of information from the future towards the past. dropout: Float between 0 and 1. Fraction of the units to drop for the attention scores. + return_attention_scores: bool, it `True`, returns the attention scores + (after masking and softmax) as an additional output argument. Call Arguments: @@ -247,9 +260,11 @@ class Attention(BaseDenseAttention): training: Python boolean indicating whether the layer should behave in training mode (adding dropout) or in inference mode (no dropout). - Output shape: + Output: Attention outputs of shape `[batch_size, Tq, dim]`. + [Optional] Attention scores after masking and softmax with shape + `[batch_size, Tq, Tv]`. The meaning of `query`, `value` and `key` depend on the application. In the case of text similarity, for example, `query` is the sequence embeddings of @@ -363,6 +378,8 @@ class AdditiveAttention(BaseDenseAttention): flow of information from the future towards the past. dropout: Float between 0 and 1. Fraction of the units to drop for the attention scores. + return_attention_scores: bool, it `True`, returns the attention scores + (after masking and softmax) as an additional output argument. Call Arguments: @@ -382,9 +399,11 @@ class AdditiveAttention(BaseDenseAttention): training: Python boolean indicating whether the layer should behave in training mode (adding dropout) or in inference mode (no dropout). - Output shape: + Output: Attention outputs of shape `[batch_size, Tq, dim]`. + [Optional] Attention scores after masking and softmax with shape + `[batch_size, Tq, Tv]`. The meaning of `query`, `value` and `key` depend on the application. In the case of text similarity, for example, `query` is the sequence embeddings of diff --git a/tensorflow/python/keras/layers/dense_attention_test.py b/tensorflow/python/keras/layers/dense_attention_test.py index 85780900593..942304e4316 100644 --- a/tensorflow/python/keras/layers/dense_attention_test.py +++ b/tensorflow/python/keras/layers/dense_attention_test.py @@ -40,11 +40,14 @@ class BaseDenseAttentionTest(test.TestCase, parameterized.TestCase): v = np.array([[[1.6]]], dtype=np.float32) # Scores mask tensor of shape [1, 1, 1] scores_mask = np.array([[[True]]], dtype=np.bool_) - actual = dense_attention.BaseDenseAttention()._apply_scores( + actual, actual_scores = dense_attention.BaseDenseAttention()._apply_scores( scores=scores, value=v, scores_mask=scores_mask) + # Expected softmax_scores = [[[1]]] + expected_scores = np.array([[[1.]]], dtype=np.float32) + self.assertAllClose(expected_scores, actual_scores) # Expected tensor of shape [1, 1, 1]. - # expected000 = softmax(scores)[0, 0] * 1.6 = 1.6 + # expected000 = softmax_scores[0, 0] * 1.6 = 1.6 expected = np.array([[[1.6]]], dtype=np.float32) self.assertAllClose(expected, actual) @@ -53,11 +56,14 @@ class BaseDenseAttentionTest(test.TestCase, parameterized.TestCase): scores = np.array([[[1.1]]], dtype=np.float32) # Value tensor of shape [1, 1, 1] v = np.array([[[1.6]]], dtype=np.float32) - actual = dense_attention.BaseDenseAttention()._apply_scores( + actual, actual_scores = dense_attention.BaseDenseAttention()._apply_scores( scores=scores, value=v) + # Expected softmax_scores = [[[1]]] + expected_scores = np.array([[[1.]]], dtype=np.float32) + self.assertAllClose(expected_scores, actual_scores) # Expected tensor of shape [1, 1, 1]. - # expected000 = softmax(scores)[0, 0] * 1.6 = 1.6 + # expected000 = softmax_scores[0, 0] * 1.6 = 1.6 expected = np.array([[[1.6]]], dtype=np.float32) self.assertAllClose(expected, actual) @@ -68,15 +74,17 @@ class BaseDenseAttentionTest(test.TestCase, parameterized.TestCase): v = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) # Scores mask tensor of shape [1, 1, 3] scores_mask = np.array([[[True, True, False]]], dtype=np.bool_) - actual = dense_attention.BaseDenseAttention()._apply_scores( + actual, actual_scores = dense_attention.BaseDenseAttention()._apply_scores( scores=scores, value=v, scores_mask=scores_mask) - # Expected attention distribution = softmax(scores) with zeros in - # positions where v_mask == False. - # => attention_distribution000 = exp(1)/(exp(1) + exp(0)) = 0.73105857863 - # attention_distribution001 = exp(0)/(exp(1) + exp(0)) = 0.26894142137 - # attention_distribution002 = 0 - # + # Expected softmax scores = softmax(scores) with zeros in positions where + # v_mask == False. + # => softmax_scores000 = exp(1)/(exp(1) + exp(0)) = 0.73105857863 + # softmax_scores001 = exp(0)/(exp(1) + exp(0)) = 0.26894142137 + # softmax_scores002 = 0 + expected_scores = np.array( + [[[0.73105857863, 0.26894142137, 0.]]], dtype=np.float32) + self.assertAllClose(expected_scores, actual_scores) # Expected tensor of shape [1, 1, 1]. # expected000 = 0.73105857863 * 1.6 + 0.26894142137 * 0.7 - 0 * 0.8 # = 1.35795272077 @@ -88,17 +96,19 @@ class BaseDenseAttentionTest(test.TestCase, parameterized.TestCase): scores = np.array([[[1., 0., 1.]]], dtype=np.float32) # Value tensor of shape [1, 3, 1] v = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) - actual = dense_attention.BaseDenseAttention()._apply_scores( + actual, actual_scores = dense_attention.BaseDenseAttention()._apply_scores( scores=scores, value=v) - # Expected attention distribution = softmax(scores). - # => attention_distribution000 = exp(1)/(exp(1) + exp(0) + exp(1)) - # = 0.42231879825 - # attention_distribution001 = exp(0)/(exp(1) + exp(0) + exp(1)) - # = 0.15536240349 - # attention_distribution002 = exp(1)/(exp(1) + exp(0) + exp(1)) - # = 0.42231879825 - # + # Expected softmax_scores = softmax(scores). + # => softmax_scores000 = exp(1)/(exp(1) + exp(0) + exp(1)) + # = 0.42231879825 + # softmax_scores001 = exp(0)/(exp(1) + exp(0) + exp(1)) + # = 0.15536240349 + # softmax_scores002 = exp(1)/(exp(1) + exp(0) + exp(1)) + # = 0.42231879825 + expected_scores = np.array( + [[[0.42231879825, 0.15536240349, 0.42231879825]]], dtype=np.float32) + self.assertAllClose(expected_scores, actual_scores) # Expected tensor of shape [1, 1, 1]. # expected000 = 0.42231879825 * 1.6 + 0.15536240349 * 0.7 # - 0.42231879825 * 0.8 @@ -113,12 +123,15 @@ class BaseDenseAttentionTest(test.TestCase, parameterized.TestCase): v = np.array([[[1.6]], [[2.6]]], dtype=np.float32) # Scpres mask tensor of shape [2, 1, 1] scores_mask = np.array([[[True]], [[True]]], dtype=np.bool_) - actual = dense_attention.BaseDenseAttention()._apply_scores( + actual, actual_scores = dense_attention.BaseDenseAttention()._apply_scores( scores=scores, value=v, scores_mask=scores_mask) + # Expected softmax_scores = [[[1]], [[1]]] + expected_scores = np.array([[[1.]], [[1.]]], dtype=np.float32) + self.assertAllClose(expected_scores, actual_scores) # Expected tensor of shape [2, 1, 1]. - # expected000 = softmax(scores)[0, 0] * 1.6 = 1.6 - # expected100 = softmax(scores)[1, 0] * 2.6 = 2.6 + # expected000 = softmax_scores[0, 0] * 1.6 = 1.6 + # expected100 = softmax_scores[1, 0] * 2.6 = 2.6 expected = np.array([[[1.6]], [[2.6]]], dtype=np.float32) self.assertAllClose(expected, actual) @@ -131,9 +144,13 @@ class BaseDenseAttentionTest(test.TestCase, parameterized.TestCase): dim = 7 scores = np.ones((batch_size, tq, tv)) value = np.ones((batch_size, tv, dim)) - actual = dense_attention.BaseDenseAttention(dropout=0.1)._apply_scores( - scores=scores, value=value, training=False) + actual, actual_scores = dense_attention.BaseDenseAttention( + dropout=0.1)._apply_scores( + scores=scores, value=value, training=False) + # Expected Tensor of shape `[batch_size, tq, tv]`. + expected_scores_shape = [batch_size, tq, tv] + self.assertAllEqual(expected_scores_shape, array_ops.shape(actual_scores)) # Expected Tensor of shape `[batch_size, tq, dim]`. expected_shape = [batch_size, tq, dim] self.assertAllEqual(expected_shape, array_ops.shape(actual)) @@ -312,7 +329,11 @@ class AttentionTest(test.TestCase, parameterized.TestCase): expected = np.array([[[0.58127362329]]], dtype=np.float32) self.assertAllClose(expected, actual) - def test_multi_dim_with_query_mask(self): + @parameterized.named_parameters( + ('', False), + ('return_attention_scores', True), + ) + def test_multi_dim_with_query_mask(self, return_attention_scores): # Query tensor of shape [1, 2, 1] q = np.array([[[1.1], [-0.5]]], dtype=np.float32) # Value tensor of shape [1, 3, 1] @@ -321,8 +342,12 @@ class AttentionTest(test.TestCase, parameterized.TestCase): q_mask = np.array([[True, False]], dtype=np.bool_) # Value mask tensor of shape [1, 3] v_mask = np.array([[True, True, False]], dtype=np.bool_) - attention_layer = dense_attention.Attention() - actual = attention_layer([q, v], mask=[q_mask, v_mask]) + attention_layer = dense_attention.Attention( + return_attention_scores=return_attention_scores) + if return_attention_scores: + actual, actual_scores = attention_layer([q, v], mask=[q_mask, v_mask]) + else: + actual = attention_layer([q, v], mask=[q_mask, v_mask]) # Expected scores of shape [1, 2, 3] # scores = [[[1.1*1.6, 1.1*0.7, -1.1*0.8], [-0.5*1.6, -0.5*0.7, 0.5*0.8]]] @@ -339,7 +364,12 @@ class AttentionTest(test.TestCase, parameterized.TestCase): # attention_distribution011 = exp(-0.35)/(exp(-0.8) + exp(-0.35)) # = 0.61063923394 # attention_distribution012 = 0 - # + if return_attention_scores: + expected_scores = np.array( + [[[0.72908792234, 0.27091207765, 0.], + [0.38936076605, 0.61063923394, 0.]]], + dtype=np.float32) + self.assertAllClose(expected_scores, actual_scores) # Expected tensor of shape [1, 2, 1] with zeros where q_mask == False. # expected000 = 0.72908792234 * 1.6 + 0.27091207765 * 0.7 - 0 * 0.8 # = 1.3561791301 @@ -368,11 +398,19 @@ class AttentionTest(test.TestCase, parameterized.TestCase): sess.run(attention_layer.scale.initializer) self.assertAllClose(1., attention_layer.scale.value()) - def test_self_attention_causal(self): + @parameterized.named_parameters( + ('', False), + ('return_attention_scores', True), + ) + def test_self_attention_causal(self, return_attention_scores): # Query-value tensor of shape [1, 3, 1] q = np.array([[[0.5], [0.8], [-0.3]]], dtype=np.float32) - attention_layer = dense_attention.Attention(causal=True) - actual = attention_layer([q, q]) + attention_layer = dense_attention.Attention( + causal=True, return_attention_scores=return_attention_scores) + if return_attention_scores: + actual, actual_scores = attention_layer([q, q]) + else: + actual = attention_layer([q, q]) # Expected scores of shape [1, 3, 3] # scores = [[0.25, 0.4, -0.15], [0.4, 0.64, -0.24], [-0.15, -0.24, 0.09]] @@ -385,7 +423,13 @@ class AttentionTest(test.TestCase, parameterized.TestCase): # = [exp(-0.15), exp(-0.24), exp(0.09)] # / (exp(-0.15) + exp(-0.24) + exp(0.09)) # = [0.31395396638, 0.28693232061, 0.399113713] - # + if return_attention_scores: + expected_scores = np.array( + [[[1., 0., 0.], + [0.44028635073, 0.55971364926, 0.], + [0.31395396638, 0.28693232061, 0.399113713]]], + dtype=np.float32) + self.assertAllClose(expected_scores, actual_scores) # Expected tensor of shape [1, 3, 1]. # expected000 = 0.5 # expected010 = 0.44028635073 * 0.5 + 0.55971364926 * 0.8 @@ -455,17 +499,25 @@ class AttentionTest(test.TestCase, parameterized.TestCase): actual = attention_layer([q, v]) self.assertAllClose([[[0], [1]]], actual) - def test_serialization(self): + @parameterized.named_parameters( + ('', False, False), + ('use_scale', True, False), + ('return_attention_scores', False, True), + ) + def test_serialization(self, use_scale, return_attention_scores): # Test serialization with use_scale - layer = dense_attention.Attention(use_scale=True) + layer = dense_attention.Attention( + use_scale=use_scale, return_attention_scores=return_attention_scores) config = keras.layers.serialize(layer) new_layer = keras.layers.deserialize(config) - self.assertEqual(new_layer.use_scale, True) + self.assertEqual(new_layer.use_scale, use_scale) + self.assertEqual(new_layer.return_attention_scores, return_attention_scores) config = layer.get_config() new_layer = dense_attention.Attention.from_config(config) - self.assertEqual(new_layer.use_scale, True) + self.assertEqual(new_layer.use_scale, use_scale) + self.assertEqual(new_layer.return_attention_scores, return_attention_scores) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) From d8d6160034b6c98c96798b03c6c31d8cfca3935c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 31 Jul 2020 09:49:54 -0700 Subject: [PATCH 1852/2522] Drop log level for a few log lines in the HLO parser The goal is to make it consistent with other logs in the same file PiperOrigin-RevId: 324226412 Change-Id: I69fa71a20e1cfd2ba1b844749164df4cd7d8ffbb --- tensorflow/compiler/xla/service/hlo_parser.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index d47be84e7fc..0530062c43b 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -3599,7 +3599,7 @@ bool HloParserImpl::ParseHloComputationList( if (!ParseHloComputation(&computation)) { return false; } - LOG(INFO) << "parsed computation " << computation->name(); + VLOG(3) << "parsed computation " << computation->name(); result->push_back(computation); return true; }; @@ -4117,7 +4117,7 @@ bool HloParserImpl::ParseFftType(FftType* result) { } bool HloParserImpl::ParseComparisonDirection(ComparisonDirection* result) { - VLOG(1) << "ParseComparisonDirection"; + VLOG(3) << "ParseComparisonDirection"; if (lexer_.GetKind() != TokKind::kIdent) { return TokenError("expects comparison direction"); } From b511ad39baa4724a48efa371b68c1a4f92f9d6b8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 31 Jul 2020 10:03:28 -0700 Subject: [PATCH 1853/2522] Output meaningful logs for tf profiler. PiperOrigin-RevId: 324229311 Change-Id: I95fbd8c90905e3c2dc78167aadb2e6c2db774bca --- tensorflow/core/profiler/internal/tfprof_node.h | 4 ++++ tensorflow/python/profiler/tfprof_logger.py | 11 +++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/profiler/internal/tfprof_node.h b/tensorflow/core/profiler/internal/tfprof_node.h index 4ce8f20ec00..5b2cd5fc309 100644 --- a/tensorflow/core/profiler/internal/tfprof_node.h +++ b/tensorflow/core/profiler/internal/tfprof_node.h @@ -325,11 +325,13 @@ class TFGraphNode { (*node_.mutable_attrs())[attr.first].MergeFrom(attr.second); if (attr.first == "shape" && attr.second.has_shape()) { if (!shape_.empty()) { + absl::FPrintF(stderr, "Found duplicated shapes!\n"); continue; } shape_ = ShapeProtoToVec(attr.second.shape()); } else if (attr.first == "_output_shapes" && attr.second.has_list()) { if (!output_shapes_.empty()) { + absl::FPrintF(stderr, "Found duplicated output shapes!\n"); continue; } for (int i = 0; i < attr.second.list().shape_size(); ++i) { @@ -665,6 +667,8 @@ class TFGraphNode { } if (complete_shape) { return params; + } else { + absl::FPrintF(stderr, "Incomplete shape.\n"); } } return 0; diff --git a/tensorflow/python/profiler/tfprof_logger.py b/tensorflow/python/profiler/tfprof_logger.py index 27a1d360c43..8aff8cec085 100644 --- a/tensorflow/python/profiler/tfprof_logger.py +++ b/tensorflow/python/profiler/tfprof_logger.py @@ -91,7 +91,7 @@ def _get_logged_ops(graph, run_meta=None, add_trace=True, if run_meta: graph = _fill_missing_graph_shape(graph, run_meta) - missing_shape_ops = [] + op_missing_shape = 0 logged_ops = {} string_to_id = {} string_to_id['none'] = len(string_to_id) @@ -102,7 +102,7 @@ def _get_logged_ops(graph, run_meta=None, add_trace=True, graph, op.node_def, REGISTERED_FLOP_STATS) except ValueError: # Catch Exception When shape is incomplete. Skip it. - missing_shape_ops.append(op.name) + op_missing_shape += 1 stats = None entry = tfprof_log_pb2.OpLogEntry() @@ -136,10 +136,9 @@ def _get_logged_ops(graph, run_meta=None, add_trace=True, else: logged_ops[v.op.name].types.append(TRAINABLE_VARIABLES) - if missing_shape_ops and not run_meta: - sys.stderr.write( - '%d ops have no flops stats due to incomplete shapes: [%s] \n' % - len(missing_shape_ops), missing_shape_ops) + if op_missing_shape > 0 and not run_meta: + sys.stderr.write('%d ops no flops stats due to incomplete shapes.\n' % + op_missing_shape) return logged_ops, string_to_id From b51bba348602ef9e9d4e6269d019a40ec3d74c30 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 31 Jul 2020 10:22:40 -0700 Subject: [PATCH 1854/2522] Qualify uses of std::string PiperOrigin-RevId: 324233556 Change-Id: I5537b9bd231c0ea1f267406a9b19ec943928e8cd --- tensorflow/core/util/device_name_utils.h | 25 ++++++++++++------------ tensorflow/core/util/padding.h | 6 +++--- tensorflow/core/util/tensor_format.h | 18 ++++++++--------- tensorflow/core/util/util.h | 4 ++-- 4 files changed, 27 insertions(+), 26 deletions(-) diff --git a/tensorflow/core/util/device_name_utils.h b/tensorflow/core/util/device_name_utils.h index 93fab444da4..a1515ba8508 100644 --- a/tensorflow/core/util/device_name_utils.h +++ b/tensorflow/core/util/device_name_utils.h @@ -46,8 +46,8 @@ namespace tensorflow { class DeviceNameUtils { public: // Returns a fully qualified device name given the parameters. - static string FullName(const string& job, int replica, int task, - const string& type, int id); + static std::string FullName(const std::string& job, int replica, int task, + const std::string& type, int id); struct ParsedName { void Clear() { @@ -79,13 +79,13 @@ class DeviceNameUtils { } bool has_job = false; - string job; + std::string job; bool has_replica = false; int replica = 0; bool has_task = false; int task = 0; bool has_type = false; - string type; + std::string type; bool has_id = false; int id = 0; }; @@ -107,7 +107,7 @@ class DeviceNameUtils { // an error and *canonical_name is set to "". static Status CanonicalizeDeviceName(StringPiece fullname, StringPiece basename, - string* canonical_name); + std::string* canonical_name); // Returns true if "name" specifies any non-trivial constraint on the device. static bool HasSomeDetails(const ParsedName& name) { @@ -163,11 +163,11 @@ class DeviceNameUtils { static const ParsedName AddressSpace(const ParsedName& name); // Returns the local device given its "type" and "id". - static string LocalName(StringPiece type, int id); + static std::string LocalName(StringPiece type, int id); // Returns a short local device name (cpu:0, gpu:1, etc) based on // the given fullname. - static string LocalName(StringPiece fullname); + static std::string LocalName(StringPiece fullname); // If "name" is a valid local device name (cpu:0, gpu:1, etc.), // fills in parsed.type and parsed.id accordingly. Returns true iff @@ -181,13 +181,14 @@ class DeviceNameUtils { // component into *device. This function will still return true if // the task component is empty, but it requires the relative device // component to be fully specified. - static bool SplitDeviceName(StringPiece name, string* task, string* device); + static bool SplitDeviceName(StringPiece name, std::string* task, + std::string* device); // Get the task name from ParsedName. Return false if the task component is // not fully specified. - static bool GetTaskName(const ParsedName& pn, string* task); + static bool GetTaskName(const ParsedName& pn, std::string* task); - static string ParsedNameToString(const ParsedName& pn); + static std::string ParsedNameToString(const ParsedName& pn); // Returns canonical and legacy full names for the given parsed // device name 'pn'. The returned string names are often useful to @@ -202,8 +203,8 @@ class DeviceNameUtils { // Returns name of the CPU:0 device on the same host as the device // `device_name`. - static Status DeviceNameToCpuDeviceName(const string& device_name, - string* host_device_name); + static Status DeviceNameToCpuDeviceName(const std::string& device_name, + std::string* host_device_name); }; std::ostream& operator<<(std::ostream& os, diff --git a/tensorflow/core/util/padding.h b/tensorflow/core/util/padding.h index 90e353d23fa..b96c47753b8 100644 --- a/tensorflow/core/util/padding.h +++ b/tensorflow/core/util/padding.h @@ -53,12 +53,12 @@ Status CheckValidPadding(Padding padding_type, // Return the string containing the list of valid padding types, that can be // used as an Attr() in REGISTER_OP. -string GetPaddingAttrString(); +std::string GetPaddingAttrString(); // Like GetPaddingAttrString(), but also includes EXPLICIT. -string GetPaddingAttrStringWithExplicit(); +std::string GetPaddingAttrStringWithExplicit(); -string GetExplicitPaddingsAttrString(); +std::string GetExplicitPaddingsAttrString(); // Sets padding value based on the given string padding value. Status GetPaddingFromString(StringPiece str_value, Padding* value); diff --git a/tensorflow/core/util/tensor_format.h b/tensorflow/core/util/tensor_format.h index aea7021d0bd..d2d7b9e58de 100644 --- a/tensorflow/core/util/tensor_format.h +++ b/tensorflow/core/util/tensor_format.h @@ -97,18 +97,18 @@ enum FilterTensorFormat { // Parse tensor format from the given string. // Return true if the parsing succeeds, and false if it fails. -bool FormatFromString(const string& format_str, TensorFormat* format); +bool FormatFromString(const std::string& format_str, TensorFormat* format); // Parse tensor format from the given string. // Return true if the parsing succeeds, and false if it fails. -bool FilterFormatFromString(const string& format_str, +bool FilterFormatFromString(const std::string& format_str, FilterTensorFormat* format); // Convert a tensor format into string. -string ToString(TensorFormat format); +std::string ToString(TensorFormat format); // Convert a filter tensor format into string. -string ToString(FilterTensorFormat format); +std::string ToString(FilterTensorFormat format); // Returns the number of spatial dims of a tensor of rank 'num_dims' and tensor // format 'format'. @@ -504,13 +504,13 @@ inline void GetExplicitPaddingForDim( } // Return the string that specifies the data format for convnet operations. -string GetConvnetDataFormatAttrString(); -string GetConvnet3dDataFormatAttrString(); +std::string GetConvnetDataFormatAttrString(); +std::string GetConvnet3dDataFormatAttrString(); // Return the string that specifies the filter format for convnet operations. -string GetConvnetFilterFormatAttrString(); -string GetConvnet3dFilterFormatAttrString(); -string GetConvnetDataFormat2D3DAttrString(); +std::string GetConvnetFilterFormatAttrString(); +std::string GetConvnet3dFilterFormatAttrString(); +std::string GetConvnetDataFormat2D3DAttrString(); // Returns a tensor shape for the specified format and dimension sizes. // Works for both 2D and 3D operations. The output shapes are as follows: diff --git a/tensorflow/core/util/util.h b/tensorflow/core/util/util.h index 4aa47aa48a2..74b3ec79eb0 100644 --- a/tensorflow/core/util/util.h +++ b/tensorflow/core/util/util.h @@ -49,12 +49,12 @@ class MovingAverage { // Returns a string printing bytes in ptr[0..n). The output looks // like "00 01 ef cd cd ef". -string PrintMemory(const char* ptr, size_t n); +std::string PrintMemory(const char* ptr, size_t n); // Given a flattened index into a tensor, computes a string s so that // StrAppend("tensor", s) is a Python indexing expression. E.g., // "tensor", "tensor[i]", "tensor[i, j]", etc. -string SliceDebugString(const TensorShape& shape, const int64 flat); +std::string SliceDebugString(const TensorShape& shape, const int64 flat); // disable MKL in runtime #ifdef INTEL_MKL From 0939a55e78256c428974e42474f188dda165e8a4 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Fri, 31 Jul 2020 10:29:00 -0700 Subject: [PATCH 1855/2522] Fold Identity op in TF to CoreRT dialect lowering pass. PiperOrigin-RevId: 324234936 Change-Id: I157cf292431eb693fec34b2c136e82bb1df3f859 --- tensorflow/compiler/mlir/tensorflow/BUILD | 1 - .../mlir/tensorflow/tests/device_copy.mlir | 16 ---- .../mlir/tensorflow/transforms/passes.h | 5 -- .../tensor_device_copy_conversion.cc | 81 ------------------- .../benchmarks/resnet50/resnet50_test.py | 1 + 5 files changed, 1 insertion(+), 103 deletions(-) delete mode 100644 tensorflow/compiler/mlir/tensorflow/tests/device_copy.mlir delete mode 100644 tensorflow/compiler/mlir/tensorflow/transforms/tensor_device_copy_conversion.cc diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index 07db8b15297..f4fdbff2df1 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -748,7 +748,6 @@ cc_library( "transforms/sink_constant.cc", "transforms/stack_ops_decomposition.cc", "transforms/tensor_array_ops_decomposition.cc", - "transforms/tensor_device_copy_conversion.cc", "transforms/tensor_list_ops_decomposition.cc", "transforms/test_side_effect_analysis.cc", "transforms/tf_data_optimization_pass.cc", diff --git a/tensorflow/compiler/mlir/tensorflow/tests/device_copy.mlir b/tensorflow/compiler/mlir/tensorflow/tests/device_copy.mlir deleted file mode 100644 index 8250bcf7101..00000000000 --- a/tensorflow/compiler/mlir/tensorflow/tests/device_copy.mlir +++ /dev/null @@ -1,16 +0,0 @@ -// RUN: tf-opt -tf-tensor-device-copy %s | FileCheck %s --dump-input=fail - -// CHECK-LABEL: func @fold_identity -// CHECK-SAME: ([[arg0:%.*]]: tensor<2x2xf32>, [[arg1:%.*]]: tensor<2x2xf32> -module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32}} { - func @fold_identity(%arg0: tensor<2x2xf32>, %arg1: tensor<2x2xf32>) -> tensor<2x2xf32> { - %0 = tf_executor.graph { - // CHECK: tf.MatMul - %outputs, %control = tf_executor.island wraps "tf.MatMul"(%arg0, %arg1) {device = "", transpose_a = false, transpose_b = false} : (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32> - // CHECK-NOT: tf.Identity - %outputs_0, %control_1 = tf_executor.island wraps "tf.Identity"(%outputs) {device = ""} : (tensor<2x2xf32>) -> tensor<2x2xf32> - tf_executor.fetch %outputs_0 : tensor<2x2xf32> - } - return %0 : tensor<2x2xf32> - } -} diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h index 1876ffde7d9..3aec322308d 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h +++ b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h @@ -76,11 +76,6 @@ std::unique_ptr> CreateRewriteTPUEmbeddingOpsPass(); // Performs specific fusion for GPU targets. std::unique_ptr> CreateGpuOpFusionPass(); -// Create a pass that convert ops that copy tensors between devices, e.g. -// tf.Identity. -std::unique_ptr> -CreateTensorDeviceCopyConversionPass(); - struct LayoutOptimizationPipelineOptions : public PassPipelineOptions { Option force_data_format{ diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_device_copy_conversion.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_device_copy_conversion.cc deleted file mode 100644 index f14efeb91ce..00000000000 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_device_copy_conversion.cc +++ /dev/null @@ -1,81 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "mlir/Dialect/StandardOps/IR/Ops.h" -#include "mlir/Pass/PassManager.h" -#include "mlir/Transforms/DialectConversion.h" -#include "mlir/Transforms/Passes.h" -#include "mlir/IR/OperationSupport.h" // from @llvm-project -#include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "mlir/IR/Types.h" // from @llvm-project -#include "mlir/Pass/PassOptions.h" // from @llvm-project -#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" -#include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" -#include "tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h" - -namespace mlir { -namespace TF { -namespace { - -// Deletes the op and forwards the arguments. -template -class PassThroughConversion : public mlir::OpConversionPattern { - public: - explicit PassThroughConversion(MLIRContext *context) - : mlir::OpConversionPattern(context) {} - - LogicalResult matchAndRewrite( - TF_Op op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { // NOLINT - // Just forward the arguments to results. - rewriter.replaceOp(op, operands); - return success(); - } -}; - -class TensorDeviceCopyConversionPass - : public PassWrapper { - public: - void runOnFunction() override { - mlir::OwningRewritePatternList patterns; - mlir::ConversionTarget target(getContext()); - - // TODO(tfrt-devs): when device placer is introduced in the lowering pass, - // we need to check if Identity op and it's previous op are placed on the - // same device. If not, we don't fold Identity op since it's used for tensor - // copying between devices. - patterns.insert, - PassThroughConversion>(&getContext()); - - if (failed(applyPartialConversion(getFunction(), target, patterns))) { - signalPassFailure(); - } - } -}; - -} // namespace - -std::unique_ptr> -CreateTensorDeviceCopyConversionPass() { - return std::make_unique(); -} - -static mlir::PassRegistration - tensor_device_copy_pass( - "tf-tensor-device-copy", - "Handle ops that copy tensors between devices. E.g., tf.Identity."); - -} // namespace TF -} // namespace mlir diff --git a/tensorflow/python/eager/benchmarks/resnet50/resnet50_test.py b/tensorflow/python/eager/benchmarks/resnet50/resnet50_test.py index 8b45b1a645d..e034cf0e296 100644 --- a/tensorflow/python/eager/benchmarks/resnet50/resnet50_test.py +++ b/tensorflow/python/eager/benchmarks/resnet50/resnet50_test.py @@ -113,6 +113,7 @@ class ResNet50Test(tf.test.TestCase): def test_apply_async(self): self._apply(defun=False, execution_mode=context.ASYNC) + @test_util.disable_tfrt('Graph is not supported yet. b/156187905') def test_apply_with_defun(self): self._apply(defun=True) From 04112a1910a75b09ae6f46092afb36719c385bc0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 31 Jul 2020 10:29:36 -0700 Subject: [PATCH 1856/2522] Qualify uses of std::string PiperOrigin-RevId: 324235054 Change-Id: Ia0f0279b70bac000ff67334a5ca871bd4ec6ef5e --- .../core/common_runtime/gpu/gpu_device.h | 15 +++---- tensorflow/core/common_runtime/gpu/gpu_init.h | 2 +- tensorflow/core/graph/graph.h | 42 +++++++++---------- 3 files changed, 30 insertions(+), 29 deletions(-) diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h index 5609334ce9c..5d4a8abad25 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.h +++ b/tensorflow/core/common_runtime/gpu/gpu_device.h @@ -51,9 +51,9 @@ class GPUKernelTracker; class BaseGPUDevice : public LocalDevice { public: - BaseGPUDevice(const SessionOptions& options, const string& name, + BaseGPUDevice(const SessionOptions& options, const std::string& name, Bytes memory_limit, const DeviceLocality& locality, - TfGpuId tf_gpu_id, const string& physical_device_desc, + TfGpuId tf_gpu_id, const std::string& physical_device_desc, Allocator* gpu_allocator, Allocator* cpu_allocator, bool sync_every_op); @@ -160,8 +160,8 @@ class BaseGPUDevice : public LocalDevice { void ReinitializeDevice(OpKernelContext* context, PerOpGpuDevice* device, int stream_id, Allocator* allocator); - string ComputeOpKernelDebugString(const OpKernel& op_kernel, - const int& stream_id); + std::string ComputeOpKernelDebugString(const OpKernel& op_kernel, + const int& stream_id); // This method returns an initialization status, in addition to // calling the "done" StatusCallback, if there is a failure to @@ -309,14 +309,15 @@ class GPUKernelTracker { class BaseGPUDeviceFactory : public DeviceFactory { public: Status ListPhysicalDevices(std::vector* devices) override; - Status CreateDevices(const SessionOptions& options, const string& name_prefix, + Status CreateDevices(const SessionOptions& options, + const std::string& name_prefix, std::vector>* devices) override; Status GetDeviceDetails(int device_index, std::unordered_map* details) override; struct InterconnectMap { // Name of interconnect technology, if known. - string name; + std::string name; // If possible, strength should approximate Gb/sec bandwidth rate. // Where architecture-specific subclassing is not done that won't // always be possible. The minimum expectation is that @@ -351,7 +352,7 @@ class BaseGPUDeviceFactory : public DeviceFactory { // 'memory_limit' bytes of GPU memory to it, and adds it to the 'devices' // vector. Status CreateGPUDevice(const SessionOptions& options, - const string& name_prefix, TfGpuId tf_gpu_id, + const std::string& name_prefix, TfGpuId tf_gpu_id, int64 memory_limit, const DeviceLocality& dev_locality, std::vector>* devices); diff --git a/tensorflow/core/common_runtime/gpu/gpu_init.h b/tensorflow/core/common_runtime/gpu/gpu_init.h index 4c8f0868df0..b1a82390147 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_init.h +++ b/tensorflow/core/common_runtime/gpu/gpu_init.h @@ -38,7 +38,7 @@ stream_executor::Platform* GPUMachineManager(); // Returns the string describing the name of the GPU platform in use. // This value is "CUDA" by default, and // "ROCM" when TF is built with `--config==rocm` -string GpuPlatformName(); +std::string GpuPlatformName(); } // namespace tensorflow diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h index 7174c5a20a3..3d3921d68c0 100644 --- a/tensorflow/core/graph/graph.h +++ b/tensorflow/core/graph/graph.h @@ -72,12 +72,12 @@ class NodeIter; // Declared below class Node { public: - string DebugString() const; + std::string DebugString() const; int id() const { return id_; } int cost_id() const { return cost_id_; } - const string& name() const; - void set_name(string name); - const string& type_string() const; + const std::string& name() const; + void set_name(std::string name); + const std::string& type_string() const; // def() provides the NodeDef the user supplied, but the specifics // of this Node may have changed due to placement, optimization, etc. @@ -103,11 +103,11 @@ class Node { // The device requested by the user. For the actual assigned device, // use assigned_device_name() below. - const string& requested_device() const; + const std::string& requested_device() const; // This changes the user requested device but not necessarily the device that // on which the operation will run. - void set_requested_device(const string& device); + void set_requested_device(const std::string& device); // This gives the device the runtime has assigned this node to. If // you want the device the user requested, use def().device() instead. @@ -115,8 +115,8 @@ class Node { // fully specifies a device, and satisfies def().device(). // TODO(josh11b): Move assigned_device_name outside of Node into a // NodeId->DeviceName map. - const string& assigned_device_name() const; - void set_assigned_device_name(const string& device_name); + const std::string& assigned_device_name() const; + void set_assigned_device_name(const std::string& device_name); bool has_assigned_device_name() const { return assigned_device_name_index_ > 0; } @@ -196,17 +196,17 @@ class Node { bool IsRetval() const { return class_ == NC_RETVAL; } template - void AddAttr(const string& name, const T& val) { + void AddAttr(const std::string& name, const T& val) { SetAttrValue(val, AddAttrHelper(name)); UpdateProperties(); } - void AddAttr(const string& name, std::vector&& val) { + void AddAttr(const std::string& name, std::vector&& val) { MoveAttrValue(std::move(val), AddAttrHelper(name)); UpdateProperties(); } - void ClearAttr(const string& name); + void ClearAttr(const std::string& name); // Returns into '*e' the edge connecting to the 'idx' input of this Node. Status input_edge(int idx, const Edge** e) const; @@ -250,7 +250,7 @@ class Node { // property of the node (stored in props_). void UpdateProperties(); - AttrValue* AddAttrHelper(const string& name); + AttrValue* AddAttrHelper(const std::string& name); // A set of mutually exclusive classes for different kinds of nodes, // class_ is initialized in the Node::Initialize routine based on the @@ -292,7 +292,7 @@ class Node { void Initialize(int id, int cost_id, std::shared_ptr props, NodeClass node_class); - static NodeClass GetNodeClassForOp(const string& ts); + static NodeClass GetNodeClassForOp(const std::string& ts); int id_; // -1 until Initialize() is called int cost_id_; // -1 if there is no corresponding cost accounting node @@ -329,7 +329,7 @@ class Node { // Stores debug information associated with the Node. struct NodeDebugInfo { - const string name; + const std::string name; std::vector original_node_names; NodeDebugInfo(const Node& n); @@ -398,7 +398,7 @@ class Edge { // (as opposed to a data-flow) dependency. bool IsControlEdge() const; - string DebugString() const; + std::string DebugString() const; private: Edge() {} @@ -595,7 +595,7 @@ class Graph { // Generate new node name with the specified prefix that is unique // across this graph. - string NewName(StringPiece prefix); + std::string NewName(StringPiece prefix); // Access to the list of all nodes. Example usage: // for (Node* node : graph.nodes()) { ... } @@ -639,9 +639,9 @@ class Graph { DCHECK_LT(index, static_cast(device_names_.size())); } - int InternDeviceName(const string& device_name); + int InternDeviceName(const std::string& device_name); - const string& get_assigned_device_name(const Node& node) const { + const std::string& get_assigned_device_name(const Node& node) const { return device_names_[node.assigned_device_name_index()]; } @@ -650,7 +650,7 @@ class Graph { node->assigned_device_name_index_ = device_name_index; } - void set_assigned_device_name(Node* node, const string& device_name) { + void set_assigned_device_name(Node* node, const std::string& device_name) { node->assigned_device_name_index_ = InternDeviceName(device_name); } @@ -925,11 +925,11 @@ inline void Node::set_assigned_device_name_index(int index) { assigned_device_name_index_ = index; } -inline void Node::set_assigned_device_name(const string& device_name) { +inline void Node::set_assigned_device_name(const std::string& device_name) { graph_->set_assigned_device_name(this, device_name); } -inline const string& Node::assigned_device_name() const { +inline const std::string& Node::assigned_device_name() const { return graph_->get_assigned_device_name(*this); } From 906a2f68a9248f03e0386cba2c9757adbdcb5ded Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Fri, 31 Jul 2020 10:44:47 -0700 Subject: [PATCH 1857/2522] Add more test coverage for TPUStrategy with distribute.Strategy tests. PiperOrigin-RevId: 324238458 Change-Id: I47caae58d871138d54152aae47cb95fb151e8e53 --- .../python/distribute/strategy_test_lib.py | 5 ++-- .../python/distribute/tpu_strategy_test.py | 25 +++++++++++++++---- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/distribute/strategy_test_lib.py b/tensorflow/python/distribute/strategy_test_lib.py index 9ffbe9424b5..0bc4c6fca68 100644 --- a/tensorflow/python/distribute/strategy_test_lib.py +++ b/tensorflow/python/distribute/strategy_test_lib.py @@ -428,7 +428,7 @@ class DistributionTestBase(test.TestCase): global_step_values = self.evaluate(global_step_tensors) self.assertEqual((1,) * len(global_step_tensors), global_step_values) - def _test_numpy_dataset(self, strategy, session=None): + def _test_numpy_dataset(self, strategy, session=None, run_in_function=False): cached_session = session or self.cached_session() with strategy.scope(), cached_session as sess: x = np.asarray([[1, 2], [6, 12], [2, 4], [5, 10], [3, 6], [4, 8]]) @@ -449,7 +449,8 @@ class DistributionTestBase(test.TestCase): self.evaluate(i.initializer) def run_and_concatenate(strategy, i): - x, y = strategy.experimental_run(lambda z: z, i) + x, y = strategy.experimental_run( + _maybe_run_in_function(lambda z: z, run_in_function), i) x, y = self.evaluate((strategy.experimental_local_results(x), strategy.experimental_local_results(y))) return np.concatenate(x), np.concatenate(y) diff --git a/tensorflow/python/distribute/tpu_strategy_test.py b/tensorflow/python/distribute/tpu_strategy_test.py index 2f5be9c211d..c1318927ca8 100644 --- a/tensorflow/python/distribute/tpu_strategy_test.py +++ b/tensorflow/python/distribute/tpu_strategy_test.py @@ -772,11 +772,6 @@ class TPUStrategyDistributionTest( # Verify isolate_session_state self.assertTrue(new_config.isolate_session_state) - # TODO(b/158110684): enable this test. - def disable_test_numpy_dataset(self): - strategy = get_tpu_strategy() - self._test_numpy_dataset(strategy) - def test_make_input_fn_iterable(self): dataset_fn = lambda: dataset_ops.Dataset.range(10) expected_values = [[i, i+1] for i in range(0, 10, 2)] @@ -803,10 +798,30 @@ class TPUStrategyDistributionTest( distribution.extended.worker_devices, expected_values) + def test_num_replicas_in_sync(self): + strategy = get_tpu_strategy() + self.assertEqual(2, strategy.num_replicas_in_sync) + + def test_call_and_merge_exceptions(self): + strategy = get_tpu_strategy() + self._test_call_and_merge_exceptions(strategy) + + def test_numpy_dataset(self): + strategy = get_tpu_strategy() + self._test_numpy_dataset(strategy, run_in_function=True) + + def test_global_step_update(self): + strategy = get_tpu_strategy() + self._test_global_step_update(strategy) + def test_run(self): strategy = get_tpu_strategy() self._test_run(strategy, run_in_function=True) + def test_summary_for_replica_zero_only(self): + strategy = get_tpu_strategy() + self._test_summary_for_replica_zero_only(strategy) + def test_all_reduce_sum(self): strategy = get_tpu_strategy() self._test_all_reduce_sum(strategy, run_in_function=True) From c6123e88bfa8934e83e1357555a622f00b778681 Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Fri, 31 Jul 2020 10:59:57 -0700 Subject: [PATCH 1858/2522] Add pass for legalizating TF/XLA communication ops. This module pass walks functions and legalizes TF/XLA communication ops (tf._XlaHostComputeMlir, tf.XlaSendToHost, tf.XlaRecvFromHost`) into `mhlo.send` and `mhlo.recv` ops with special handling specific to TF/XLA (op sharding and frontend attributes). Functions are rewritten to create or pass around tokens, to properly sequence these communication ops. PiperOrigin-RevId: 324241811 Change-Id: Ia35374a66b50c788dd096bdd5defb39b8b168ff1 --- tensorflow/compiler/mlir/xla/BUILD | 3 + .../xla/tests/legalize-tf-communication.mlir | 419 +++++++++++++++++ .../transforms/legalize_tf_communication.cc | 434 ++++++++++++++++++ .../compiler/mlir/xla/transforms/passes.h | 4 + 4 files changed, 860 insertions(+) create mode 100644 tensorflow/compiler/mlir/xla/tests/legalize-tf-communication.mlir create mode 100644 tensorflow/compiler/mlir/xla/transforms/legalize_tf_communication.cc diff --git a/tensorflow/compiler/mlir/xla/BUILD b/tensorflow/compiler/mlir/xla/BUILD index 729371cf747..ada81634567 100644 --- a/tensorflow/compiler/mlir/xla/BUILD +++ b/tensorflow/compiler/mlir/xla/BUILD @@ -48,17 +48,20 @@ cc_library( srcs = [ "transforms/generated_legalize_tf.inc", "transforms/legalize_tf.cc", + "transforms/legalize_tf_communication.cc", "transforms/legalize_tf_control_flow.cc", ], hdrs = [ "transforms/passes.h", ], deps = [ + ":type_to_shape", "//tensorflow/compiler/mlir/hlo", "//tensorflow/compiler/mlir/hlo:chlo_legalize_to_hlo", "//tensorflow/compiler/mlir/hlo:convert_op_folder", "//tensorflow/compiler/mlir/tensorflow", "//tensorflow/compiler/mlir/tensorflow:lower_tf_lib", + "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:xla_data_proto_cc", "//tensorflow/compiler/xla/client:padding", "//tensorflow/compiler/xla/client:sharding_builder", diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf-communication.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf-communication.mlir new file mode 100644 index 00000000000..f84a2f28a23 --- /dev/null +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf-communication.mlir @@ -0,0 +1,419 @@ +// RUN: tf-opt -split-input-file -verify-diagnostics -xla-legalize-tf-communication %s | FileCheck %s + +// Test legalization of `tf._XlaHostComputeMlir` expands into individual +// `mhlo.send` per operand and `mhlo.recv` per result. Channel Id's are uniquely +// assigned per mhlo communcation op, and frontend attributes (modified keys) +// and op shardings (based on `tpu_core`) are added. Sink tokens are created +// if there are more than one operand or more than one result. +// +// The following op sharding is used: +// Proto debug string: +// type: MAXIMAL +// tile_assignment_dimensions: 1 +// tile_assignment_devices: 0 +// Serialized string: +// "\08\01\1A\01\01\22\01\00" + +// CHECK-LABEL: func @host_compute +// CHECK-SAME: ([[ARG0:%.*]]: tensor, [[ARG1:%.*]]: tensor) +func @host_compute(%arg0: tensor, %arg1: tensor) -> (tensor, tensor) { + // CHECK: [[INIT_TOKEN:%.*]] = "mhlo.create_token" + + // CHECK: [[SEND_ARG0_TOKEN:%.*]] = "mhlo.send"([[ARG0]], [[INIT_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 1 : i64, type = 2 : i64} + // CHECK-SAME: is_host_transfer = true + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "s32", _xla_host_transfer_rendezvous = "host_compute_channel_send_dtoh_0"} + // CHECK-SAME: mhlo.sharding = "\08\01\1A\01\01\22\01\00" + // CHECK-SAME: (tensor, !mhlo.token) -> !mhlo.token + + // CHECK: [[SEND_ARG1_TOKEN:%.*]] = "mhlo.send"([[ARG1]], [[INIT_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 2 : i64, type = 2 : i64} + // CHECK-SAME: is_host_transfer = true + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "s64", _xla_host_transfer_rendezvous = "host_compute_channel_send_dtoh_1"} + // CHECK-SAME: mhlo.sharding = "\08\01\1A\01\01\22\01\00" + // CHECK-SAME: (tensor, !mhlo.token) -> !mhlo.token + + // CHECK: [[SEND_SINK_TOKEN:%.*]] = "mhlo.after_all"([[SEND_ARG0_TOKEN]], [[SEND_ARG1_TOKEN]]) + + // CHECK: [[RECV_RETVAL0_TUPLE:%.*]] = "mhlo.recv"([[SEND_SINK_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 3 : i64, type = 3 : i64} + // CHECK-SAME: is_host_transfer = true + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "f32", _xla_host_transfer_rendezvous = "host_compute_channel_recv_htod_0"} + // CHECK-SAME: mhlo.sharding = "\08\01\1A\01\01\22\01\00" + // CHECK-SAME: (!mhlo.token) -> tuple, !mhlo.token> + + // CHECK: [[RECV_RETVAL0_VAL:%.*]] = "mhlo.get_tuple_element"([[RECV_RETVAL0_TUPLE]]) + // CHECK-SAME: index = 0 + // CHECK-SAME: mhlo.sharding = "\08\01\1A\01\01\22\01\00" + // CHECK-SAME: (tuple, !mhlo.token>) -> tensor + + // CHECK: [[RECV_RETVAL0_TOKEN:%.*]] = "mhlo.get_tuple_element"([[RECV_RETVAL0_TUPLE]]) + // CHECK-SAME: index = 1 + // CHECK-SAME: mhlo.sharding = "\08\01\1A\01\01\22\01\00" + // CHECK-SAME: (tuple, !mhlo.token>) -> !mhlo.token + + // CHECK: [[RECV_RETVAL1_TUPLE:%.*]] = "mhlo.recv"([[SEND_SINK_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 4 : i64, type = 3 : i64} + // CHECK-SAME: is_host_transfer = true + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "f64", _xla_host_transfer_rendezvous = "host_compute_channel_recv_htod_1"} + // CHECK-SAME: mhlo.sharding = "\08\01\1A\01\01\22\01\00" + // CHECK-SAME: (!mhlo.token) -> tuple, !mhlo.token> + + // CHECK: [[RECV_RETVAL1_VAL:%.*]] = "mhlo.get_tuple_element"([[RECV_RETVAL1_TUPLE]]) + // CHECK-SAME: index = 0 + // CHECK-SAME: mhlo.sharding = "\08\01\1A\01\01\22\01\00" + // CHECK-SAME: (tuple, !mhlo.token>) -> tensor + + // CHECK: [[RECV_RETVAL1_TOKEN:%.*]] = "mhlo.get_tuple_element"([[RECV_RETVAL1_TUPLE]]) + // CHECK-SAME: index = 1 + // CHECK-SAME: mhlo.sharding = "\08\01\1A\01\01\22\01\00" + // CHECK-SAME: (tuple, !mhlo.token>) -> !mhlo.token + + // CHECK: [[RECV_SINK_TOKEN:%.*]] = "mhlo.after_all"([[RECV_RETVAL0_TOKEN]], [[RECV_RETVAL1_TOKEN]]) + %0:2 = "tf._XlaHostComputeMlir"(%arg0, %arg1) {recv_key = "host_compute_channel_recv", send_key = "host_compute_channel_send", tpu_core = 0 : i64} : (tensor, tensor) -> (tensor, tensor) + + // CHECK: return [[RECV_RETVAL0_VAL]], [[RECV_RETVAL1_VAL]] : tensor, tensor + return %0#0, %0#1 : tensor, tensor +} + +// ----- + +// Tests `tf._XlaHostComputeMlir` with `tpu_core` assigns the correct op +// sharding. +// +// The following op sharding is used: +// Proto debug string: +// type: MAXIMAL +// tile_assignment_dimensions: 1 +// tile_assignment_devices: 1 +// Serialized string: +// "\08\01\1A\01\01\22\01\01" + +// CHECK-LABEL: func @host_compute_sharding +// CHECK-SAME: ([[ARG0:%.*]]: tensor) +func @host_compute_sharding(%arg0: tensor) -> tensor { + // CHECK: "mhlo.send" + // CHECK-SAME: mhlo.sharding = "\08\01\1A\01\01\22\01\01" + // CHECK: "mhlo.recv" + // CHECK-SAME: mhlo.sharding = "\08\01\1A\01\01\22\01\01" + // CHECK: "mhlo.get_tuple_element" + // CHECK-SAME: mhlo.sharding = "\08\01\1A\01\01\22\01\01" + // CHECK: "mhlo.get_tuple_element" + // CHECK-SAME: mhlo.sharding = "\08\01\1A\01\01\22\01\01" + %0 = "tf._XlaHostComputeMlir"(%arg0) {recv_key = "host_compute_channel_recv", send_key = "host_compute_channel_send", tpu_core = 1 : i64} : (tensor) -> tensor + return %0 : tensor +} + +// ----- + +// Tests `tf._XlaHostComputeMlir` with no operands simply forwards the input +// token to its generated `mhlo.recv`. + +// CHECK-LABEL: func @host_compute_no_operands_one_result +func @host_compute_no_operands_one_result() { + // CHECK: [[INIT_TOKEN:%.*]] = "mhlo.create_token" + + // CHECK-NOT: "mhlo.send" + // CHECK-NOT: "mhlo.after_all" + // CHECK: "mhlo.recv"([[INIT_TOKEN]]) + %0 = "tf._XlaHostComputeMlir"() {recv_key = "host_compute_channel_recv", send_key = "host_compute_channel_send", tpu_core = 0 : i64} : () -> tensor + return +} + +// ----- + +// Tests `tf._XlaHostComputeMlir` with no results simply forwards its token from +// the generated `mhlo.send`. + +// CHECK-LABEL: func @host_compute_one_operand_no_results +// CHECK-SAME: ([[ARG0:%.*]]: tensor) +func @host_compute_one_operand_no_results(%arg0: tensor) { + // CHECK: [[INIT_TOKEN:%.*]] = "mhlo.create_token" + + // CHECK: [[SEND_TOKEN:%.*]] = "mhlo.send"([[ARG0]], [[INIT_TOKEN]]) + // CHECK-NOT: "mhlo.after_all" + "tf._XlaHostComputeMlir"(%arg0) {recv_key = "host_compute_channel_recv", send_key = "host_compute_channel_send", tpu_core = 0 : i64} : (tensor) -> () + + // CHECK: "mhlo.recv"([[SEND_TOKEN]]) + %0 = "tf.XlaRecvFromHost"() {key = "recv_key", shape = #tf.shape<>} : () -> tensor + return +} + +// ----- + +// Tests `tf._XlaHostComputeMlir` with one operand and one result does not +// create any `mhlo.after_all` ops. + +// CHECK-LABEL: func @host_compute_single_operand_result +// CHECK-SAME: ([[ARG0:%.*]]: tensor) +func @host_compute_single_operand_result(%arg0: tensor) { + // CHECK: [[INIT_TOKEN:%.*]] = "mhlo.create_token" + + // CHECK: [[SEND_TOKEN:%.*]] = "mhlo.send"([[ARG0]], [[INIT_TOKEN]]) + // CHECK-NOT: "mhlo.after_all" + // CHECK: "mhlo.recv"([[SEND_TOKEN]]) + // CHECK-NOT: "mhlo.after_all" + %0 = "tf._XlaHostComputeMlir"(%arg0) {recv_key = "host_compute_channel_recv", send_key = "host_compute_channel_send", tpu_core = 0 : i64} : (tensor) -> tensor + return +} + +// ----- + +// Test legalization of `tf.XlaSendToHost` expands into a `mhlo.send` op. + +// CHECK-LABEL: func @send_to_host +// CHECK-SAME: ([[ARG0:%.*]]: tensor) +func @send_to_host(%arg0: tensor) { + // CHECK: [[INIT_TOKEN:%.*]] = "mhlo.create_token" + + // CHECK: "mhlo.send"([[ARG0]], [[INIT_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 1 : i64, type = 2 : i64} + // CHECK-SAME: is_host_transfer = true + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "s32", _xla_host_transfer_rendezvous = "send_key"} + // CHECK-SAME: (tensor, !mhlo.token) -> !mhlo.token + "tf.XlaSendToHost"(%arg0) {key = "send_key"} : (tensor) -> () + return +} + +// ----- + +// Test legalization of `tf.XlaRecvFromHost` expands into a `mhlo.recv` op. + +// CHECK-LABEL: func @recv_from_host +func @recv_from_host() -> tensor { + // CHECK: [[INIT_TOKEN:%.*]] = "mhlo.create_token" + + // CHECK: [[RECV_TUPLE:%.*]] = "mhlo.recv"([[INIT_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 1 : i64, type = 3 : i64} + // CHECK-SAME: is_host_transfer = true + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "s32", _xla_host_transfer_rendezvous = "recv_key"} + // CHECK-SAME: (!mhlo.token) -> tuple, !mhlo.token> + + + // CHECK: [[RECV_VAL:%.*]] = "mhlo.get_tuple_element"([[RECV_TUPLE]]) + // CHECK-SAME: index = 0 + // CHECK-SAME: (tuple, !mhlo.token>) -> tensor + + // CHECK: [[RECV_TOKEN:%.*]] = "mhlo.get_tuple_element"([[RECV_TUPLE]]) + // CHECK-SAME: index = 1 + // CHECK-SAME: (tuple, !mhlo.token>) -> !mhlo.token + %0 = "tf.XlaRecvFromHost"() {key = "recv_key", shape = #tf.shape<>} : () -> tensor + + // CHECK: return [[RECV_VAL]] : tensor + return %0 : tensor +} + +// ----- + +// Test legalization of multiple TF/XLA communication ops are sequenced with +// their generated tokens. Channel Id's are also uniquely assigned. + +// CHECK-LABEL: func @multiple_consecutive_ops +// CHECK-SAME: ([[ARG0:%.*]]: tensor) +func @multiple_consecutive_ops(%arg0: tensor) -> tensor { + // CHECK: [[INIT_TOKEN:%.*]] = "mhlo.create_token" + + // CHECK: [[SEND0_ARG0_TOKEN:%.*]] = "mhlo.send"([[ARG0]], [[INIT_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 1 : i64, type = 2 : i64} + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "s32", _xla_host_transfer_rendezvous = "send0_dtoh_0"} + + // CHECK: [[RECV0_RETVAL0_TUPLE:%.*]] = "mhlo.recv"([[SEND0_ARG0_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 2 : i64, type = 3 : i64} + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "s32", _xla_host_transfer_rendezvous = "recv0_htod_0"} + + // CHECK: [[RECV0_RETVAL0_VAL:%.*]] = "mhlo.get_tuple_element"([[RECV0_RETVAL0_TUPLE]]) + // CHECK-SAME: index = 0 + + // CHECK: [[RECV0_RETVAL0_TOKEN:%.*]] = "mhlo.get_tuple_element"([[RECV0_RETVAL0_TUPLE]]) + // CHECK-SAME: index = 1 + %0 = "tf._XlaHostComputeMlir"(%arg0) {recv_key = "recv0", send_key = "send0", tpu_core = 0 : i64} : (tensor) -> tensor + + // CHECK: [[SEND1_ARG0_TOKEN:%.*]] = "mhlo.send"([[RECV0_RETVAL0_VAL]], [[RECV0_RETVAL0_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 3 : i64, type = 2 : i64} + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "s32", _xla_host_transfer_rendezvous = "send1_dtoh_0"} + + // CHECK: [[RECV1_RETVAL0_TUPLE:%.*]] = "mhlo.recv"([[SEND1_ARG0_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 4 : i64, type = 3 : i64} + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "s32", _xla_host_transfer_rendezvous = "recv1_htod_0"} + + // CHECK: [[RECV1_RETVAL0_VAL:%.*]] = "mhlo.get_tuple_element"([[RECV1_RETVAL0_TUPLE]]) + // CHECK-SAME: index = 0 + + // CHECK: [[RECV1_RETVAL0_TOKEN:%.*]] = "mhlo.get_tuple_element"([[RECV1_RETVAL0_TUPLE]]) + // CHECK-SAME: index = 1 + %1 = "tf._XlaHostComputeMlir"(%0) {recv_key = "recv1", send_key = "send1", tpu_core = 0 : i64} : (tensor) -> tensor + + // CHECK: return [[RECV1_RETVAL0_VAL]] : tensor + return %1 : tensor +} + +// ----- + +// Test private function with TF/XLA communication op being called by another +// function gets rewritten with an extra token argument and an extra token +// result, and the caller passes in a token. The top level function not called +// (or public) will be updated to create a token. + +// CHECK: func @main([[MAIN_ARG0:%.*]]: tensor) -> tensor +func @main(%arg0: tensor) -> tensor { + // CHECK: [[MAIN_TOKEN:%.*]] = "mhlo.create_token" + + // CHECK: [[CALL:%.*]]:2 = call @callee([[MAIN_ARG0]], [[MAIN_TOKEN]]) + // CHECK-SAME: (tensor, !mhlo.token) -> (tensor, !mhlo.token) + %0 = call @callee(%arg0) : (tensor) -> tensor + + // CHECK: return [[CALL]]#0 + return %0 : tensor +} + +// CHECK: func @callee([[CALLEE_ARG0:%.*]]: tensor, [[CALLEE_ARG1:%.*]]: !mhlo.token) -> (tensor, !mhlo.token) +func @callee(%arg0: tensor) -> tensor attributes {sym_visibility = "private"} { + // CHECK-NOT: "mhlo.create_token" + + // CHECK: [[SEND_ARG0_TOKEN:%.*]] = "mhlo.send"([[CALLEE_ARG0]], [[CALLEE_ARG1]]) + // CHECK: [[RECV_RETVAL0_TUPLE:%.*]] = "mhlo.recv"([[SEND_ARG0_TOKEN]]) + // CHECK: [[RECV_RETVAL0_VAL:%.*]] = "mhlo.get_tuple_element"([[RECV_RETVAL0_TUPLE]]) + // CHECK-SAME: index = 0 + // CHECK: [[RECV_RETVAL0_TOKEN:%.*]] = "mhlo.get_tuple_element"([[RECV_RETVAL0_TUPLE]]) + // CHECK-SAME: index = 1 + %0 = "tf._XlaHostComputeMlir"(%arg0) {recv_key = "recv", send_key = "send", tpu_core = 0 : i64} : (tensor) -> tensor + + // CHECK: return [[RECV_RETVAL0_VAL]], [[RECV_RETVAL0_TOKEN]] + return %0 : tensor +} + +// ----- + +// Test public function with TF/XLA communication op being called by another +// function. The original public function will be modified to create a token, +// while the function is cloned and rewritten with an extra token argument and +// an extra token result. All callers to the original function are updated to +// point to the cloned function and the function the caller is in is updated to +// pass a token or create a token. + +// CHECK: func @main([[MAIN_ARG0:%.*]]: tensor) -> tensor +func @main(%arg0: tensor) -> tensor { + // CHECK: [[MAIN_TOKEN:%.*]] = "mhlo.create_token" + + // CHECK: [[CALL:%.*]]:2 = call [[CALLEE_CLONE:@.*]]([[MAIN_ARG0]], [[MAIN_TOKEN]]) + // CHECK-SAME: (tensor, !mhlo.token) -> (tensor, !mhlo.token) + %0 = call @callee(%arg0) : (tensor) -> tensor + + // CHECK: return [[CALL]]#0 : tensor + return %0 : tensor +} + +// CHECK: func @callee([[CALLEE_ARG0:%.*]]: tensor) -> tensor +func @callee(%arg0: tensor) -> tensor { + // CHECK: [[CALLEE_TOKEN:%.*]] = "mhlo.create_token" + + // CHECK: [[SEND_ARG0_TOKEN:%.*]] = "mhlo.send"([[CALLEE_ARG0]], [[CALLEE_TOKEN]]) + // CHECK: [[RECV_RETVAL0_TUPLE:%.*]] = "mhlo.recv"([[SEND_ARG0_TOKEN]]) + // CHECK: [[RECV_RETVAL0_VAL:%.*]] = "mhlo.get_tuple_element"([[RECV_RETVAL0_TUPLE]]) + // CHECK-SAME: index = 0 + // CHECK: [[RECV_RETVAL0_TOKEN:%.*]] = "mhlo.get_tuple_element"([[RECV_RETVAL0_TUPLE]]) + // CHECK-SAME: index = 1 + %0 = "tf._XlaHostComputeMlir"(%arg0) {recv_key = "recv", send_key = "send", tpu_core = 0 : i64} : (tensor) -> tensor + + // CHECK: return [[RECV_RETVAL0_VAL]] + return %0 : tensor +} + +// CHECK: func [[CALLEE_CLONE]]([[CALLEE_CLONE_ARG0:%.*]]: tensor, [[CALLEE_CLONE_ARG1:%.*]]: !mhlo.token) -> (tensor, !mhlo.token) +// CHECK-NOT: "mhlo.create_token" + +// CHECK: [[CLONE_SEND_ARG0_TOKEN:%.*]] = "mhlo.send"([[CALLEE_CLONE_ARG0]], [[CALLEE_CLONE_ARG1]]) +// CHECK: [[CLONE_RECV_RETVAL0_TUPLE:%.*]] = "mhlo.recv"([[CLONE_SEND_ARG0_TOKEN]]) +// CHECK: [[CLONE_RECV_RETVAL0_VAL:%.*]] = "mhlo.get_tuple_element"([[CLONE_RECV_RETVAL0_TUPLE]]) +// CHECK-SAME: index = 0 +// CHECK: [[CLONE_RECV_RETVAL0_TOKEN:%.*]] = "mhlo.get_tuple_element"([[CLONE_RECV_RETVAL0_TUPLE]]) +// CHECK-SAME: index = 1 + +// CHECK: return [[CLONE_RECV_RETVAL0_VAL]], [[CLONE_RECV_RETVAL0_TOKEN]] + +// ----- + +// Tests generated tokens are passed into a function call that also has TF/XLA +// communication ops. + +// CHECK: func @main([[MAIN_ARG0:%.*]]: tensor) +func @main(%arg0: tensor) { + // CHECK: [[MAIN_TOKEN:%.*]] = "mhlo.create_token" + + // CHECK: [[MAIN_SEND0_TOKEN:%.*]] = "mhlo.send"([[MAIN_ARG0]], [[MAIN_TOKEN]]) + "tf.XlaSendToHost"(%arg0) {key = "send0"} : (tensor) -> () + + // CHECK: [[CALL_TOKEN:%.*]] = call @callee([[MAIN_SEND0_TOKEN]]) + // CHECK-SAME: (!mhlo.token) -> !mhlo.token + call @callee() : () -> () + + // CHECK: [[MAIN_SEND2_TOKEN:%.*]] = "mhlo.send"([[MAIN_ARG0]], [[CALL_TOKEN]]) + "tf.XlaSendToHost"(%arg0) {key = "send2"} : (tensor) -> () + return +} + +// CHECK: func @callee([[CALLEE_ARG0:%.*]]: !mhlo.token) -> !mhlo.token +func @callee() attributes {sym_visibility = "private"} { + // CHECK-NOT: "mhlo.create_token" + + // CHECK: [[ZERO:%.*]] = mhlo.constant dense<0> + %0 = mhlo.constant dense<0> : tensor + + // CHECK: [[CALLEE_SEND_TOKEN:%.*]] = "mhlo.send"([[ZERO]], [[CALLEE_ARG0]]) + "tf.XlaSendToHost"(%0) {key = "send1"} : (tensor) -> () + + // CHECK: return [[CALLEE_SEND_TOKEN]] + return +} + +// ----- + +// Test only the top level function generates a token. + +// CHECK: func @callee0() +func @callee0() attributes {sym_visibility = "private"} { + // CHECK: [[INIT_TOKEN:%.*]] = "mhlo.create_token" + + // CHECK: call @callee1([[INIT_TOKEN]]) + call @callee1() : () -> () + return +} + +// CHECK: func @callee1([[CALLEE1_ARG0:%.*]]: !mhlo.token) -> !mhlo.token +func @callee1() attributes {sym_visibility = "private"} { + // CHECK-NOT: "mhlo.create_token" + + // CHECK: [[CALL_2:%.*]] = call @callee2([[CALLEE1_ARG0]]) + call @callee2() : () -> () + + // CHECK: return [[CALL_2]] + return +} + +// CHECK: func @callee2([[CALLEE2_ARG0:%.*]]: !mhlo.token) -> !mhlo.token +func @callee2() attributes {sym_visibility = "private"} { + // CHECK-NOT: "mhlo.create_token" + + // CHECK: [[RECV_TUPLE:%.*]] = "mhlo.recv"([[CALLEE2_ARG0]]) + // CHECK: [[RECV_VAL:%.*]] = "mhlo.get_tuple_element"([[RECV_TUPLE]]) + // CHECK-SAME: index = 0 + // CHECK: [[RECV_TOKEN:%.*]] = "mhlo.get_tuple_element"([[RECV_TUPLE]]) + // CHECK-SAME: index = 1 + %0 = "tf.XlaRecvFromHost"() {key = "recv_key", shape = #tf.shape<>} : () -> tensor + + // CHECK: return [[RECV_TOKEN]] + return +} + +// ----- + +// Tests function with more than one block that is to be rewritten emits an +// error instead. + +// expected-error@+1 {{'func' ops with more than one block are not supported}} +func @multi_block_func() { + br ^bb1 +^bb1: + %0 = "tf.XlaRecvFromHost"() {key = "recv_key", shape = #tf.shape<>} : () -> tensor + return +} diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_communication.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_communication.cc new file mode 100644 index 00000000000..42c719da266 --- /dev/null +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_communication.cc @@ -0,0 +1,434 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This file implements logic for lowering TensorFlow dialect's communication +// ops (TF/XLA) to the HLO dialect. + +#include +#include + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/FormatVariadic.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/Module.h" // from @llvm-project +#include "mlir/IR/TypeUtilities.h" // from @llvm-project +#include "mlir/IR/Visitors.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" +#include "tensorflow/compiler/mlir/xla/type_to_shape.h" +#include "tensorflow/compiler/xla/client/sharding_builder.h" +#include "tensorflow/compiler/xla/primitive_util.h" + +namespace mlir { +namespace mhlo { + +namespace { +constexpr char kShardingAttr[] = "mhlo.sharding"; +constexpr char kFrontendAttributesAttr[] = "mhlo.frontend_attributes"; +const char kXlaHostTransferRendezvousNameAttr[] = + "_xla_host_transfer_rendezvous"; +const char kXlaHostTransferOriginalTypeAttr[] = + "_xla_host_transfer_original_type"; + +// A pass that legalizes TF/XLA communication ops, propagate their respective +// tokens (for ordering), and rewrite their respective functions when necessary. +// Note, this currently does not handle nested modules/functions or region based +// ops (e.g. control flow). +class LegalizeTFCommunication + : public PassWrapper> { + public: + void runOnOperation() override; +}; + +// Checks if a function has any communication ops. +bool HasCommunicationOps(FuncOp func) { + auto result = func.walk([](Operation* op) { + if (isa(op)) + return WalkResult::interrupt(); + return WalkResult::advance(); + }); + return result.wasInterrupted(); +} + +// Helper struct holding a function and optional cloned version. If `clone` is +// set, function calls to `original` will be replaced with `clone`. +struct FuncAndClone { + FuncOp original; + FuncOp clone; +}; + +// Finds all functions that need to be rewritten with communication ops and +// and associated tokens. +llvm::SmallDenseMap GetFunctionsToRewrite( + ModuleOp module) { + // Find functions containing communication ops. + llvm::SmallDenseMap funcs; + SmallVector funcs_to_visit; + for (FuncOp func : module.getOps()) { + if (HasCommunicationOps(func)) { + funcs.insert({func.getName(), {func, /*clone=*/nullptr}}); + funcs_to_visit.push_back(func); + } + } + + // Find functions that call functions with communication ops, transitively. + while (!funcs_to_visit.empty()) { + SmallVector new_funcs_to_visit; + for (FuncOp& func : funcs_to_visit) { + auto uses = func.getSymbolUses(module); + if (!uses) continue; + for (auto& use : uses.getValue()) { + // Only `mlir::CallOp` is supported as this requires knowing how to + // rewrite arguments and results to a function. + if (!isa(use.getUser())) continue; + auto caller_func = use.getUser()->getParentOfType(); + if (!caller_func) continue; + if (funcs + .insert( + {caller_func.getName(), {caller_func, /*clone=*/nullptr}}) + .second) + new_funcs_to_visit.push_back(caller_func); + } + } + + funcs_to_visit.swap(new_funcs_to_visit); + } + + // Clone public functions that need to be rewritten. Function calls to this + // function will be replaced with the cloned function. + SymbolTable symbol_table(module); + for (auto& func : funcs) { + if (func.getSecond().original.isPublic()) { + auto clone = func.getSecond().original.clone(); + clone.setVisibility(SymbolTable::Visibility::Private); + symbol_table.insert(clone); + func.getSecond().clone = clone; + } + } + + return funcs; +} + +// Assigns op sharding to an op for a given device core. +void SetOpSharding(Operation* op, int64_t tpu_core) { + std::string sharding_serialized = + ::xla::sharding_builder::AssignDevice(tpu_core).SerializeAsString(); + op->setAttr(kShardingAttr, + StringAttr::get(sharding_serialized, op->getContext())); +} + +// Assigns frontend attributes holding information about data type and +// TensorFlow rendezvous channel name. +void SetFrontendAttributes(Operation* op, StringRef key, Type type) { + MLIRContext* context = op->getContext(); + + auto rendezvous_name = StringAttr::get(key, context); + auto rendezvous_name_attr = NamedAttribute( + Identifier::get(kXlaHostTransferRendezvousNameAttr, context), + rendezvous_name); + + auto element_type = getElementTypeOrSelf(type); + auto xla_element_type = ::xla::TypeToPrimitiveType(element_type); + const std::string& xla_element_type_str = + ::xla::primitive_util::LowercasePrimitiveTypeName(xla_element_type); + auto original_type = StringAttr::get(xla_element_type_str, context); + auto original_type_attr = + NamedAttribute(Identifier::get(kXlaHostTransferOriginalTypeAttr, context), + original_type); + + auto frontend_attributes = DictionaryAttr::get( + ArrayRef{rendezvous_name_attr, original_type_attr}, + context); + op->setAttr(kFrontendAttributesAttr, frontend_attributes); +} + +// Assigns frontend attributes holding information about data type and +// TensorFlow rendezvous channel name specific to `tf._XlaHostComputeMlir`. +// TensorFlow rendezvous channel name is handled differently as individual names +// are used per data send and receive. +void SetFrontendAttributes(Operation* op, int32_t index, StringRef key, + Type type, bool device_to_host) { + std::string formatted_key = + device_to_host ? llvm::formatv("{0}_dtoh_{1}", key, index).str() + : llvm::formatv("{0}_htod_{1}", key, index).str(); + + return SetFrontendAttributes(op, formatted_key, type); +} + +// Creates a `mhlo.send` op for sending value `operand`. If `index` is set, +// `key` will be rewritten with a suffix and index. If `tpu_core` is set, op +// sharding for the respective device will be set. +Value CreateSendOp(OpBuilder& builder, int64_t& channel_id, Location loc, + Value operand, StringRef key, const Optional& index, + const Optional& tpu_core, Value token) { + // type 2 == DEVICE_TO_HOST + auto channel_handle = ChannelHandle::get( + /*handle=*/builder.getI64IntegerAttr(channel_id++), + /*type=*/builder.getI64IntegerAttr(2), builder.getContext()); + auto send = builder.create( + loc, token.getType(), operand, token, channel_handle, + /*is_host_transfer=*/builder.getBoolAttr(true)); + + if (index) { + SetFrontendAttributes(send, index.getValue(), key, operand.getType(), + /*device_to_host=*/true); + } else { + SetFrontendAttributes(send, key, operand.getType()); + } + + if (tpu_core) SetOpSharding(send, tpu_core.getValue()); + + return send.getResult(); +} + +// Creates a `mhlo.recv` op for receiving a value. If `index` is set, `key` will +// be rewritten with a suffix and index. If `tpu_core` is set, op sharding for +// the respective device will be set. +Value CreateRecvOp(OpBuilder& builder, int64_t& channel_id, Location loc, + Value result, StringRef key, const Optional& index, + const Optional& tpu_core, Value token) { + // type 3 == HOST_TO_DEVICE + auto channel_handle = ChannelHandle::get( + /*handle=*/builder.getI64IntegerAttr(channel_id++), + /*type=*/builder.getI64IntegerAttr(3), builder.getContext()); + auto result_type = result.getType(); + auto recv_result_type = + TupleType::get({result_type, token.getType()}, builder.getContext()); + auto recv = + builder.create(loc, recv_result_type, token, channel_handle, + /*is_host_transfer=*/builder.getBoolAttr(true)); + if (index) { + SetFrontendAttributes(recv, index.getValue(), key, result_type, + /*device_to_host=*/false); + } else { + SetFrontendAttributes(recv, key, result.getType()); + } + if (tpu_core) SetOpSharding(recv, tpu_core.getValue()); + + auto get_tuple_element = + builder.create(loc, recv.getResult(), /*index=*/0); + if (tpu_core) SetOpSharding(get_tuple_element, tpu_core.getValue()); + + result.replaceAllUsesWith(get_tuple_element); + + auto new_token = builder.create(loc, recv.getResult(), + /*index=*/1); + if (tpu_core) SetOpSharding(new_token, tpu_core.getValue()); + + return new_token.getResult(); +} + +// Creates a new token if necessary, acting as a sink to previous tokens. If +// there is only one token in `tokens`, the only token is returned. If `tokens` +// is empty, `original_token` is returned instead. +Value CreateSinkToken(OpBuilder& builder, Location loc, ArrayRef tokens, + Value original_token) { + if (tokens.empty()) { + return original_token; + } else if (llvm::hasSingleElement(tokens)) { + return tokens[0]; + } else { + return builder.create(loc, original_token.getType(), tokens) + .getResult(); + } +} + +// Replaces `tf._XlaHostComputeMlir` with individual `mhlo.send` and `mhlo.recv` +// ops per operand and result. Unique Channel Id's are assigned per transfer. +// Sink tokens are created across all `mhlo.send` ops first and then by +// all `mhlo.recv` ops. +Value RewriteHostComputeOp(OpBuilder& builder, int64_t& channel_id, + TF::_XlaHostComputeMlirOp host_compute, + Value token) { + builder.setInsertionPoint(host_compute); + Location loc = host_compute.getLoc(); + int64_t tpu_core = host_compute.tpu_coreAttr().getInt(); + + SmallVector send_tokens; + for (auto operand : llvm::enumerate(host_compute.inputs())) { + auto send_token = + CreateSendOp(builder, channel_id, loc, operand.value(), + host_compute.send_key(), operand.index(), tpu_core, token); + send_tokens.push_back(send_token); + } + token = CreateSinkToken(builder, loc, send_tokens, token); + + SmallVector recv_tokens; + for (auto result : llvm::enumerate(host_compute.outputs())) { + auto recv_token = + CreateRecvOp(builder, channel_id, loc, result.value(), + host_compute.recv_key(), result.index(), tpu_core, token); + recv_tokens.push_back(recv_token); + } + token = CreateSinkToken(builder, loc, recv_tokens, token); + + host_compute.erase(); + return token; +} + +// Replaces `tf.XlaSendToHost` with a `mhlo.send`. +Value RewriteSendToHostOp(OpBuilder& builder, int64_t& channel_id, + TF::XlaSendToHostOp send_to_host, Value token) { + builder.setInsertionPoint(send_to_host); + token = CreateSendOp(builder, channel_id, send_to_host.getLoc(), + send_to_host.input(), send_to_host.key(), + /*index=*/llvm::None, /*tpu_core=*/llvm::None, token); + + send_to_host.erase(); + return token; +} + +// Replaces `tf.XlaRecvFromHost` with a `mhlo.recv`. +Value RewriteRecvFromHostOp(OpBuilder& builder, int64_t& channel_id, + TF::XlaRecvFromHostOp recv_from_host, Value token) { + builder.setInsertionPoint(recv_from_host); + token = CreateRecvOp(builder, channel_id, recv_from_host.getLoc(), + recv_from_host.output(), recv_from_host.key(), + /*index=*/llvm::None, /*tpu_core=*/llvm::None, token); + + recv_from_host.erase(); + return token; +} + +// Replaces a `mlir::CallOp` with one that has an extra `!mhlo.token` operand +// and `!mhlo.token` result. If `new_symbol` is set, the new call will be +// updated to call the `new_symbol` instead. +Value RewriteCallOp(OpBuilder& builder, CallOp call, + const Optional& new_symbol, Value token) { + builder.setInsertionPoint(call); + auto new_operands = llvm::to_vector<4>(call.getArgOperands()); + new_operands.push_back(token); + auto new_result_types = llvm::to_vector<4>(call.getResultTypes()); + new_result_types.push_back(token.getType()); + auto new_call = builder.create( + call.getLoc(), new_result_types, + new_symbol ? new_symbol.getValue() : call.callee(), new_operands); + + for (auto results : llvm::zip(call.getResults(), new_call.getResults())) + std::get<0>(results).replaceAllUsesWith(std::get<1>(results)); + call.erase(); + return new_call.getResults().back(); +} + +// Updates function terminator and type if a token is to be emitted by the +// function. +void RewriteFunctionTerminatorAndUpdateType(OpBuilder& builder, FuncOp func, + Block& func_body, Value token) { + // If the function signature is changed, update to emit a token and update + // the function type. + Operation* terminator = func_body.getTerminator(); + auto new_results = llvm::to_vector<4>(terminator->getOperands()); + new_results.push_back(token); + builder.setInsertionPoint(terminator); + auto new_return = + builder.create(terminator->getLoc(), new_results); + terminator->erase(); + + auto new_argument_types = llvm::to_vector<4>(func_body.getArgumentTypes()); + auto new_result_types = llvm::to_vector<4>(new_return.getOperandTypes()); + func.setType(FunctionType::get(new_argument_types, new_result_types, + builder.getContext())); +} + +// Rewrites a function body and communication ops inside. The function may +// either be rewritten to create a token or take in and return a token, +// depending on its visibility and if there are any callers. +LogicalResult RewriteFunction( + OpBuilder& builder, int64_t& channel_id, ModuleOp module, FuncOp func, + const llvm::SmallDenseMap& funcs) { + MLIRContext* context = module.getContext(); + if (!llvm::hasSingleElement(func.getBody())) + return func.emitError() + << "'" << FuncOp::getOperationName() + << "' ops with more than one block are not supported"; + + bool rewrite_block = !func.isPublic() && !func.symbolKnownUseEmpty(module); + Block& func_body = func.front(); + + builder.setInsertionPointToStart(&func_body); + auto token_type = mlir::mhlo::TokenType::get(context); + // If a function is public, it's signature should not be modified, and instead + // a token will be created. Otherwise a token block argument is inserted. + Value token = rewrite_block + ? func_body.addArgument(token_type) + : builder.create(func.getLoc(), token_type) + .getResult(); + + for (Operation& op : llvm::make_early_inc_range(func_body)) { + if (auto host_compute = dyn_cast(op)) { + token = RewriteHostComputeOp(builder, channel_id, host_compute, token); + } else if (auto send_to_host = dyn_cast(op)) { + token = RewriteSendToHostOp(builder, channel_id, send_to_host, token); + } else if (auto recv_from_host = dyn_cast(op)) { + token = RewriteRecvFromHostOp(builder, channel_id, recv_from_host, token); + } else if (auto call = dyn_cast(op)) { + // Only `mlir::CallOp` is supported as this requires knowing how to + // rewrite arguments and results to a function. + auto it = funcs.find(call.getCallee()); + if (it == funcs.end()) continue; + FuncOp clone = it->getSecond().clone; + Optional symbol_name = + clone ? Optional(clone.getName()) : llvm::None; + // If the function being called is to be cloned, update the call to also + // point to the cloned function. + token = RewriteCallOp(builder, call, symbol_name, token); + } + } + + if (rewrite_block) + RewriteFunctionTerminatorAndUpdateType(builder, func, func_body, token); + + return success(); +} + +void LegalizeTFCommunication::runOnOperation() { + auto module = getOperation(); + llvm::SmallDenseMap funcs = + GetFunctionsToRewrite(module); + + // Module level counter to make sure Channel Id's are unique. + int64_t channel_id = 1; + OpBuilder builder(&getContext()); + for (const auto& func_and_name : funcs) { + FuncOp func = func_and_name.getSecond().original; + if (failed(RewriteFunction(builder, channel_id, module, func, funcs))) + return signalPassFailure(); + + FuncOp clone = func_and_name.getSecond().clone; + if (!clone) continue; + if (failed(RewriteFunction(builder, channel_id, module, clone, funcs))) + return signalPassFailure(); + } +} + +static PassRegistration pass( + "xla-legalize-tf-communication", + "Legalize TF/XLA communication ops (TensorFlow dialect) to the HLO " + "dialect"); +} // namespace + +std::unique_ptr> CreateLegalizeTFCommunicationPass() { + return std::make_unique(); +} + +} // namespace mhlo +} // namespace mlir diff --git a/tensorflow/compiler/mlir/xla/transforms/passes.h b/tensorflow/compiler/mlir/xla/transforms/passes.h index d566135b0c1..8850581f0bd 100644 --- a/tensorflow/compiler/mlir/xla/transforms/passes.h +++ b/tensorflow/compiler/mlir/xla/transforms/passes.h @@ -63,6 +63,10 @@ std::unique_ptr> createLegalizeTFControlFlowPass(); LogicalResult legalizeTF(Operation* op, bool allow_partial_conversion = false, bool legalize_chlo = true); +// Legalizes TF/XLA communication ops (TF dialect) to HLO dialect communication +// ops. +std::unique_ptr> CreateLegalizeTFCommunicationPass(); + } // namespace mhlo } // namespace mlir From 75ae31d069a763af0e196a7458a2d72dff8cc666 Mon Sep 17 00:00:00 2001 From: Robert David Date: Fri, 31 Jul 2020 11:04:34 -0700 Subject: [PATCH 1859/2522] Implement MeanStdDevNormalization in OpenCL; similar to the function in tensor_utils.h. Implementing the serial version for now, to ensure correctness. Implementation using multiple threads (apart from just one thread per batch) to follow. PiperOrigin-RevId: 324242893 Change-Id: I29d4edcbb5ff2035e3a8af15e7e613f0a8ce6c71 --- .../lite/delegates/gpu/cl/kernels/BUILD | 33 +++++ .../cl/kernels/mean_stddev_normalization.cc | 97 ++++++++++++ .../cl/kernels/mean_stddev_normalization.h | 54 +++++++ .../kernels/mean_stddev_normalization_test.cc | 139 ++++++++++++++++++ 4 files changed, 323 insertions(+) create mode 100644 tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc create mode 100644 tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h create mode 100644 tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization_test.cc diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD index 39842508273..35ed09633a0 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD @@ -779,6 +779,39 @@ cc_test( ], ) +cc_library( + name = "mean_stddev_normalization", + srcs = ["mean_stddev_normalization.cc"], + hdrs = ["mean_stddev_normalization.h"], + deps = [ + ":gpu_operation", + ":util", + ":work_group_picking", + "//tensorflow/lite/delegates/gpu/cl:precision", + "//tensorflow/lite/delegates/gpu/common:operations", + "//tensorflow/lite/delegates/gpu/common:status", + "//tensorflow/lite/delegates/gpu/common:types", + ], +) + +cc_test( + name = "mean_stddev_normalization_test", + srcs = ["mean_stddev_normalization_test.cc"], + linkstatic = True, + tags = tf_gpu_tests_tags() + [ + "linux", + "local", + ], + deps = [ + ":cl_test", + ":mean_stddev_normalization", + "//tensorflow/lite/delegates/gpu/cl:tensor", + "//tensorflow/lite/delegates/gpu/common:operations", + "//tensorflow/lite/delegates/gpu/common:status", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "max_unpooling", srcs = ["max_unpooling.cc"], diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc new file mode 100644 index 00000000000..2b4fc457c71 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc @@ -0,0 +1,97 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h" + +#include + +#include "tensorflow/lite/delegates/gpu/cl/kernels/util.h" +#include "tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h" +#include "tensorflow/lite/delegates/gpu/cl/precision.h" + +namespace tflite { +namespace gpu { +namespace cl { + +MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition) + : GPUOperation(definition) {} + +std::string MeanStdDevNormalization::GetNormalizationCode( + const OperationDef& op_def) { + AddSrcTensor("src_tensor", op_def.src_tensors[0]); + AddDstTensor("dst_tensor", op_def.dst_tensors[0]); + + std::string c = GetCommonDefines(op_def.precision); + c += "__kernel void main_function(\n"; + c += "$0) {\n"; + c += " size_t B = get_global_id(0);\n"; + c += " if (B >= args.src_tensor.Batch()) { return; }\n"; + c += " if (get_global_id(1) > 0) { return; }\n"; // ?!? + c += " float sum = 0.0f;\n"; + c += " for (int S = 0; S < args.src_tensor.Slices(); ++S) {\n"; + c += " const float4 t = args.src_tensor.Read(0, 0, S, B);\n"; + c += " sum += t.x;\n"; + c += " if (S * 4 + 1 < args.src_tensor.Channels()) sum += t.y;\n"; + c += " if (S * 4 + 2 < args.src_tensor.Channels()) sum += t.z;\n"; + c += " if (S * 4 + 3 < args.src_tensor.Channels()) sum += t.w;\n"; + c += " }\n"; + c += " float mean = sum / args.src_tensor.Channels();\n"; + c += " float sum_diff_sq = 0.0f;\n"; + c += " for (int S = 0; S < args.src_tensor.Slices(); ++S) {\n"; + c += " const float4 t = args.src_tensor.Read(0, 0, S, B);\n"; + c += " float4 diff = t - (float4)(mean, mean, mean, mean);"; + c += " if (S * 4 + 1 >= args.src_tensor.Channels()) diff.y = 0.0f;\n"; + c += " if (S * 4 + 2 >= args.src_tensor.Channels()) diff.z = 0.0f;\n"; + c += " if (S * 4 + 3 >= args.src_tensor.Channels()) diff.w = 0.0f;\n"; + c += " float dotprod = dot(diff, diff);\n"; + c += " sum_diff_sq += dotprod;\n"; + c += " }\n"; + c += " const float variance = sum_diff_sq / args.src_tensor.Channels();\n"; + c += " const float stddev_inv = rsqrt(variance + 1.0e-8f);\n"; + c += " for (int S = 0; S < args.src_tensor.Slices(); ++S) {\n"; + c += " float4 t = args.src_tensor.Read(0, 0, S, B);\n"; + c += " t = (t - mean) * stddev_inv;\n"; + c += " FLT4 result = TO_FLT4(t);\n"; + c += " args.dst_tensor.Write(result, 0, 0, S, B);\n"; + c += " }\n"; + c += "}\n"; + return c; +} + +absl::Status MeanStdDevNormalization::Compile( + const CreationContext& creation_context) { + std::string code = GetNormalizationCode(definition_); + RETURN_IF_ERROR( + args_.TransformToCLCode(creation_context.device->GetInfo(), {}, &code)); + return creation_context.cache->GetOrCreateCLKernel( + code, "main_function", *creation_context.context, + *creation_context.device, &kernel_); +} + +int3 MeanStdDevNormalization::GetGridSize() const { + const int grid_x = dst_[0]->Batch(); + const int grid_y = 1; + const int grid_z = 1; + return int3(grid_x, grid_y, grid_z); +} + +MeanStdDevNormalization CreateMeanStdDevNormalization( + const OperationDef& definition) { + return MeanStdDevNormalization(definition); +} + +} // namespace cl +} // namespace gpu +} // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h new file mode 100644 index 00000000000..6d2b00c07ff --- /dev/null +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h @@ -0,0 +1,54 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_LSTM_NORMALIZATION_H_ +#define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_LSTM_NORMALIZATION_H_ + +#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h" +#include "tensorflow/lite/delegates/gpu/common/operations.h" +#include "tensorflow/lite/delegates/gpu/common/status.h" +#include "tensorflow/lite/delegates/gpu/common/types.h" + +namespace tflite { +namespace gpu { +namespace cl { + +// Implements tensor_utils::MeanStddevNormalization +class MeanStdDevNormalization : public GPUOperation { + public: + explicit MeanStdDevNormalization(const OperationDef& definition); + + int3 GetGridSize() const override; + absl::Status Compile(const CreationContext& creation_context) override; + + // Move only + MeanStdDevNormalization(MeanStdDevNormalization&& kernel) = default; + MeanStdDevNormalization& operator=(MeanStdDevNormalization&& kernel) = + default; + MeanStdDevNormalization(const MeanStdDevNormalization&) = delete; + MeanStdDevNormalization& operator=(const MeanStdDevNormalization&) = delete; + + private: + std::string GetNormalizationCode(const OperationDef& op_def); +}; + +MeanStdDevNormalization CreateMeanStdDevNormalization( + const OperationDef& definition); + +} // namespace cl +} // namespace gpu +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_LSTM_NORMALIZATION_H_ diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization_test.cc new file mode 100644 index 00000000000..57f052557d4 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization_test.cc @@ -0,0 +1,139 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h" + +#include + +#include +#include +#include "tensorflow/lite/delegates/gpu/cl/kernels/cl_test.h" +#include "tensorflow/lite/delegates/gpu/common/operations.h" +#include "tensorflow/lite/delegates/gpu/common/status.h" + +using ::testing::FloatNear; +using ::testing::Pointwise; + +namespace tflite { +namespace gpu { +namespace cl { +namespace { + +// Parameterized test: mean, difference, tolerance. +// Input is constructed as [mean-2*diff, mean-diff, mean+diff, mean+2*diff] +class MeanStddevNormalizationTest + : public OpenCLOperationTest, + public testing::WithParamInterface> {}; + +TEST_P(MeanStddevNormalizationTest, SeparateBatches) { + const float mean = std::get<0>(GetParam()); + const float diff = std::get<1>(GetParam()); + const float tolerance = std::get<2>(GetParam()); + + TensorFloat32 src_tensor; + src_tensor.shape = BHWC(1, 1, 1, 4); + src_tensor.data = {mean - 2 * diff, mean - diff, mean + diff, + mean + 2 * diff}; + for (auto storage : env_.GetSupportedStorages()) { + for (auto precision : env_.GetSupportedPrecisions()) { + OperationDef op_def; + op_def.precision = precision; + auto data_type = DeduceDataTypeFromPrecision(precision); + op_def.src_tensors.push_back({data_type, storage, Layout::BHWC}); + op_def.dst_tensors.push_back({data_type, storage, Layout::BHWC}); + TensorFloat32 dst_tensor; + auto operation = CreateMeanStdDevNormalization(op_def); + ASSERT_OK(ExecuteGPUOperation({src_tensor}, creation_context_, &operation, + BHWC(1, 1, 1, 4), &dst_tensor)); + + std::vector expected_output; + if (diff == 0.0f) { + expected_output.assign({0.0f, 0.0f, 0.0f, 0.0f}); + } else { + const float ksqrt16 = std::sqrt(1.6f); + const float ksqrt04 = std::sqrt(0.4f); + expected_output.assign({-ksqrt16, -ksqrt04, ksqrt04, ksqrt16}); + } + EXPECT_THAT(dst_tensor.data, + Pointwise(FloatNear(tolerance), expected_output)); + } + } +} + +INSTANTIATE_TEST_SUITE_P( + uKernels, MeanStddevNormalizationTest, + testing::Values( + std::make_tuple(0.0f, 0.0f, 0.0f), // zero mean, zero variance + std::make_tuple(0.0f, 0.01f, 2.53e-5f), // zero mean, small variance + std::make_tuple(0.0f, 100.0f, 1.20e-7f), // zero mean, large variance + std::make_tuple(0.01f, 0.0f, 0.0f), // small mean, zero variance + std::make_tuple(0.01f, 0.01f, 2.53e-5f), // small mean, small variance + std::make_tuple(0.01f, 100.0f, 1.20e-7f), // small mean, large variance + std::make_tuple(100.0f, 0.0f, 0.0f), // large mean, zero variance + std::make_tuple(100.0f, 0.01f, 1.81e-4f), // large mean, small variance + std::make_tuple(100.0f, 100.0f, 1.20e-7f) // large mean, large variance + )); + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MeanStddevNormalizationTest); + +TEST_F(OpenCLOperationTest, MeanStddevNormalizationAllBatches) { + TensorFloat32 src_tensor; + src_tensor.shape = BHWC(9, 1, 1, 4); + src_tensor.data = { + 0.0f, 0.0f, 0.0f, 0.0f, // zero mean, zero variance + -0.02f, -0.01f, 0.01f, 0.02f, // zero mean, small variance + -200.0f, -100.0f, 100.0f, 200.0f, // zero mean, large variance + 0.01f, 0.01f, 0.01f, 0.01f, // small mean, zero variance + -0.01f, 0.0f, 0.02f, 0.03f, // small mean, small variance + -199.99f, -99.99f, 100.01f, 200.01f, // small mean, large variance + 100.0f, 100.0f, 100.0f, 100.0f, // large mean, zero variance + 99.98f, 99.99f, 100.01f, 100.02f, // large mean, small variance + -100.0f, 0.0f, 200.0f, 300.0f, // large mean, large variance + }; + for (auto storage : env_.GetSupportedStorages()) { + for (auto precision : env_.GetSupportedPrecisions()) { + OperationDef op_def; + op_def.precision = precision; + auto data_type = DeduceDataTypeFromPrecision(precision); + op_def.src_tensors.push_back({data_type, storage, Layout::BHWC}); + op_def.dst_tensors.push_back({data_type, storage, Layout::BHWC}); + TensorFloat32 dst_tensor; + auto operation = CreateMeanStdDevNormalization(op_def); + ASSERT_OK(ExecuteGPUOperation({src_tensor}, creation_context_, &operation, + BHWC(9, 1, 1, 4), &dst_tensor)); + + const float ksqrt16 = std::sqrt(1.6f); + const float ksqrt04 = std::sqrt(0.4f); + const std::vector expected_output = { + 0.0f, 0.0f, 0.0f, 0.0f, // zero mean, zero variance + -ksqrt16, -ksqrt04, ksqrt04, ksqrt16, // zero mean, small variance + -ksqrt16, -ksqrt04, ksqrt04, ksqrt16, // zero mean, large variance + 0.0f, 0.0f, 0.0f, 0.0f, // small mean, zero variance + -ksqrt16, -ksqrt04, ksqrt04, ksqrt16, // small mean, small variance + -ksqrt16, -ksqrt04, ksqrt04, ksqrt16, // small mean, large variance + 0.0f, 0.0f, 0.0f, 0.0f, // large mean, zero variance + -ksqrt16, -ksqrt04, ksqrt04, ksqrt16, // large mean, small variance + -ksqrt16, -ksqrt04, ksqrt04, ksqrt16, // large mean, large variance + }; + EXPECT_THAT(dst_tensor.data, + Pointwise(FloatNear(1.81e-4f), expected_output)); + } + } +} + +} // namespace +} // namespace cl +} // namespace gpu +} // namespace tflite From a452e69984de98cc260c1368898b2f33ee7f8670 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 31 Jul 2020 11:24:39 -0700 Subject: [PATCH 1860/2522] Qualify uses of std::string PiperOrigin-RevId: 324247205 Change-Id: Ie5e2164261078620060ebccb74eab2297b639a23 --- tensorflow/core/platform/abi.h | 2 +- tensorflow/core/platform/env.h | 63 ++++++++-------- tensorflow/core/platform/file_system.h | 95 +++++++++++++------------ tensorflow/core/platform/hash.h | 2 +- tensorflow/core/platform/stringprintf.h | 6 +- tensorflow/core/platform/test.h | 4 +- tensorflow/core/platform/threadpool.h | 10 +-- 7 files changed, 95 insertions(+), 87 deletions(-) diff --git a/tensorflow/core/platform/abi.h b/tensorflow/core/platform/abi.h index d1498a6a649..33881690749 100644 --- a/tensorflow/core/platform/abi.h +++ b/tensorflow/core/platform/abi.h @@ -22,7 +22,7 @@ limitations under the License. namespace tensorflow { namespace port { -string MaybeAbiDemangle(const char* name); +std::string MaybeAbiDemangle(const char* name); } // namespace port } // namespace tensorflow diff --git a/tensorflow/core/platform/env.h b/tensorflow/core/platform/env.h index 99924ec1143..25544e87702 100644 --- a/tensorflow/core/platform/env.h +++ b/tensorflow/core/platform/env.h @@ -108,7 +108,7 @@ class Env { /// The ownership of the returned RandomAccessFile is passed to the caller /// and the object should be deleted when is not used. The file object /// shouldn't live longer than the Env object. - Status NewRandomAccessFile(const string& fname, + Status NewRandomAccessFile(const std::string& fname, std::unique_ptr* result); /// \brief Creates an object that writes to a new file with the specified @@ -124,7 +124,7 @@ class Env { /// The ownership of the returned WritableFile is passed to the caller /// and the object should be deleted when is not used. The file object /// shouldn't live longer than the Env object. - Status NewWritableFile(const string& fname, + Status NewWritableFile(const std::string& fname, std::unique_ptr* result); /// \brief Creates an object that either appends to an existing file, or @@ -139,7 +139,7 @@ class Env { /// The ownership of the returned WritableFile is passed to the caller /// and the object should be deleted when is not used. The file object /// shouldn't live longer than the Env object. - Status NewAppendableFile(const string& fname, + Status NewAppendableFile(const std::string& fname, std::unique_ptr* result); /// \brief Creates a readonly region of memory with the file context. @@ -154,10 +154,10 @@ class Env { /// and the object should be deleted when is not used. The memory region /// object shouldn't live longer than the Env object. Status NewReadOnlyMemoryRegionFromFile( - const string& fname, std::unique_ptr* result); + const std::string& fname, std::unique_ptr* result); /// Returns OK if the named path exists and NOT_FOUND otherwise. - Status FileExists(const string& fname); + Status FileExists(const std::string& fname); /// Returns true if all the listed files exist, false otherwise. /// if status is not null, populate the vector with a detailed status @@ -169,7 +169,7 @@ class Env { /// directory. The names are relative to "dir". /// /// Original contents of *results are dropped. - Status GetChildren(const string& dir, std::vector* result); + Status GetChildren(const std::string& dir, std::vector* result); /// \brief Returns true if the path matches the given pattern. The wildcards /// allowed in pattern are described in FileSystem::GetMatchingPaths. @@ -180,11 +180,11 @@ class Env { /// that pattern. *results is cleared. /// /// More details about `pattern` in FileSystem::GetMatchingPaths. - virtual Status GetMatchingPaths(const string& pattern, + virtual Status GetMatchingPaths(const std::string& pattern, std::vector* results); /// Deletes the named file. - Status DeleteFile(const string& fname); + Status DeleteFile(const std::string& fname); /// \brief Deletes the specified directory and all subdirectories and files /// underneath it. This is accomplished by traversing the directory tree @@ -210,7 +210,7 @@ class Env { /// * PERMISSION_DENIED - dirname or some descendant is not writable /// * UNIMPLEMENTED - Some underlying functions (like Delete) are not /// implemented - Status DeleteRecursively(const string& dirname, int64* undeleted_files, + Status DeleteRecursively(const std::string& dirname, int64* undeleted_files, int64* undeleted_dirs); /// \brief Creates the specified directory and all the necessary @@ -218,19 +218,19 @@ class Env { /// * OK - successfully created the directory and sub directories, even if /// they were already created. /// * PERMISSION_DENIED - dirname or some subdirectory is not writable. - Status RecursivelyCreateDir(const string& dirname); + Status RecursivelyCreateDir(const std::string& dirname); /// \brief Creates the specified directory. Typical return codes /// * OK - successfully created the directory. /// * ALREADY_EXISTS - directory already exists. /// * PERMISSION_DENIED - dirname is not writable. - Status CreateDir(const string& dirname); + Status CreateDir(const std::string& dirname); /// Deletes the specified directory. - Status DeleteDir(const string& dirname); + Status DeleteDir(const std::string& dirname); /// Obtains statistics for the given path. - Status Stat(const string& fname, FileStatistics* stat); + Status Stat(const std::string& fname, FileStatistics* stat); /// \brief Returns whether the given path is a directory or not. /// Typical return codes (not guaranteed exhaustive): @@ -239,7 +239,7 @@ class Env { /// * NOT_FOUND - The path entry does not exist. /// * PERMISSION_DENIED - Insufficient permissions. /// * UNIMPLEMENTED - The file factory doesn't support directories. - Status IsDirectory(const string& fname); + Status IsDirectory(const std::string& fname); /// \brief Returns whether the given path is on a file system /// that has atomic move capabilities. This can be used @@ -251,17 +251,17 @@ class Env { /// so has_atomic_move holds the above information. /// * UNIMPLEMENTED - The file system of the path hasn't been implemented in /// TF - Status HasAtomicMove(const string& path, bool* has_atomic_move); + Status HasAtomicMove(const std::string& path, bool* has_atomic_move); /// Stores the size of `fname` in `*file_size`. - Status GetFileSize(const string& fname, uint64* file_size); + Status GetFileSize(const std::string& fname, uint64* file_size); /// \brief Renames file src to target. If target already exists, it will be /// replaced. - Status RenameFile(const string& src, const string& target); + Status RenameFile(const std::string& src, const std::string& target); /// \brief Copy the src to target. - Status CopyFile(const string& src, const string& target); + Status CopyFile(const std::string& src, const std::string& target); /// \brief Returns the absolute path of the current executable. It resolves /// symlinks if there is any. @@ -374,7 +374,7 @@ class EnvWrapper : public Env { /// Returns the target to which this Env forwards all calls Env* target() const { return target_; } - Status GetFileSystemForFile(const string& fname, + Status GetFileSystemForFile(const std::string& fname, FileSystem** result) override { return target_->GetFileSystemForFile(fname, result); } @@ -383,7 +383,7 @@ class EnvWrapper : public Env { return target_->GetRegisteredFileSystemSchemes(schemes); } - Status RegisterFileSystem(const string& scheme, + Status RegisterFileSystem(const std::string& scheme, FileSystemRegistry::Factory factory) override { return target_->RegisterFileSystem(scheme, factory); } @@ -468,43 +468,44 @@ struct ThreadOptions { /// A utility routine: copy contents of `src` in file system `src_fs` /// to `target` in file system `target_fs`. -Status FileSystemCopyFile(FileSystem* src_fs, const string& src, - FileSystem* target_fs, const string& target); +Status FileSystemCopyFile(FileSystem* src_fs, const std::string& src, + FileSystem* target_fs, const std::string& target); /// A utility routine: reads contents of named file into `*data` -Status ReadFileToString(Env* env, const string& fname, string* data); +Status ReadFileToString(Env* env, const std::string& fname, std::string* data); /// A utility routine: write contents of `data` to file named `fname` /// (overwriting existing contents, if any). -Status WriteStringToFile(Env* env, const string& fname, +Status WriteStringToFile(Env* env, const std::string& fname, const StringPiece& data); /// Write binary representation of "proto" to the named file. -Status WriteBinaryProto(Env* env, const string& fname, +Status WriteBinaryProto(Env* env, const std::string& fname, const protobuf::MessageLite& proto); /// Reads contents of named file and parse as binary encoded proto data /// and store into `*proto`. -Status ReadBinaryProto(Env* env, const string& fname, +Status ReadBinaryProto(Env* env, const std::string& fname, protobuf::MessageLite* proto); /// Write the text representation of "proto" to the named file. -Status WriteTextProto(Env* env, const string& fname, +Status WriteTextProto(Env* env, const std::string& fname, const protobuf::Message& proto); /// Read contents of named file and parse as text encoded proto data /// and store into `*proto`. -inline Status ReadTextProto(Env* /* env */, const string& /* fname */, +inline Status ReadTextProto(Env* /* env */, const std::string& /* fname */, protobuf::MessageLite* /* proto */) { return errors::Unimplemented("Can't parse text protos with protolite."); } -Status ReadTextProto(Env* env, const string& fname, protobuf::Message* proto); +Status ReadTextProto(Env* env, const std::string& fname, + protobuf::Message* proto); /// Read contents of named file and parse as either text or binary encoded proto /// data and store into `*proto`. -Status ReadTextOrBinaryProto(Env* env, const string& fname, +Status ReadTextOrBinaryProto(Env* env, const std::string& fname, protobuf::Message* proto); -Status ReadTextOrBinaryProto(Env* env, const string& fname, +Status ReadTextOrBinaryProto(Env* env, const std::string& fname, protobuf::MessageLite* proto); // START_SKIP_DOXYGEN diff --git a/tensorflow/core/platform/file_system.h b/tensorflow/core/platform/file_system.h index eb092c404eb..b2086b5968e 100644 --- a/tensorflow/core/platform/file_system.h +++ b/tensorflow/core/platform/file_system.h @@ -73,7 +73,7 @@ class FileSystem { }; virtual tensorflow::Status NewRandomAccessFile( - const string& fname, TransactionToken* token, + const std::string& fname, TransactionToken* token, std::unique_ptr* result) { // We duplicate these methods due to Google internal coding style prevents // virtual functions with default arguments. See PR #41615. @@ -98,7 +98,7 @@ class FileSystem { }; virtual tensorflow::Status NewWritableFile( - const string& fname, TransactionToken* token, + const std::string& fname, TransactionToken* token, std::unique_ptr* result) { return Status::OK(); } @@ -120,7 +120,7 @@ class FileSystem { }; virtual tensorflow::Status NewAppendableFile( - const string& fname, TransactionToken* token, + const std::string& fname, TransactionToken* token, std::unique_ptr* result) { return Status::OK(); } @@ -141,7 +141,7 @@ class FileSystem { } virtual tensorflow::Status NewReadOnlyMemoryRegionFromFile( - const string& fname, TransactionToken* token, + const std::string& fname, TransactionToken* token, std::unique_ptr* result) { return Status::OK(); } @@ -151,7 +151,7 @@ class FileSystem { return FileExists(fname, nullptr); }; - virtual tensorflow::Status FileExists(const string& fname, + virtual tensorflow::Status FileExists(const std::string& fname, TransactionToken* token) { return Status::OK(); } @@ -170,12 +170,12 @@ class FileSystem { /// \brief Returns the immediate children in the given directory. /// /// The returned paths are relative to 'dir'. - virtual tensorflow::Status GetChildren(const string& dir, + virtual tensorflow::Status GetChildren(const std::string& dir, std::vector* result) { return GetChildren(dir, nullptr, result); } - virtual tensorflow::Status GetChildren(const string& dir, + virtual tensorflow::Status GetChildren(const std::string& dir, TransactionToken* token, std::vector* result) { return Status::OK(); @@ -203,12 +203,12 @@ class FileSystem { /// * OK - no errors /// * UNIMPLEMENTED - Some underlying functions (like GetChildren) are not /// implemented - virtual tensorflow::Status GetMatchingPaths(const string& pattern, + virtual tensorflow::Status GetMatchingPaths(const std::string& pattern, std::vector* results) { return GetMatchingPaths(pattern, nullptr, results); } - virtual tensorflow::Status GetMatchingPaths(const string& pattern, + virtual tensorflow::Status GetMatchingPaths(const std::string& pattern, TransactionToken* token, std::vector* results) { return Status::OK(); @@ -226,7 +226,8 @@ class FileSystem { return Stat(fname, nullptr, stat); } - virtual tensorflow::Status Stat(const string& fname, TransactionToken* token, + virtual tensorflow::Status Stat(const std::string& fname, + TransactionToken* token, FileStatistics* stat) { return Status::OK(); } @@ -236,7 +237,7 @@ class FileSystem { return DeleteFile(fname, nullptr); } - virtual tensorflow::Status DeleteFile(const string& fname, + virtual tensorflow::Status DeleteFile(const std::string& fname, TransactionToken* token) { return Status::OK(); } @@ -250,7 +251,7 @@ class FileSystem { return CreateDir(dirname, nullptr); } - virtual tensorflow::Status CreateDir(const string& dirname, + virtual tensorflow::Status CreateDir(const std::string& dirname, TransactionToken* token) { return Status::OK(); } @@ -265,7 +266,7 @@ class FileSystem { return RecursivelyCreateDir(dirname, nullptr); } - virtual tensorflow::Status RecursivelyCreateDir(const string& dirname, + virtual tensorflow::Status RecursivelyCreateDir(const std::string& dirname, TransactionToken* token); /// \brief Deletes the specified directory. @@ -273,7 +274,7 @@ class FileSystem { return DeleteDir(dirname, nullptr); }; - virtual tensorflow::Status DeleteDir(const string& dirname, + virtual tensorflow::Status DeleteDir(const std::string& dirname, TransactionToken* token) { return Status::OK(); } @@ -302,7 +303,7 @@ class FileSystem { /// * PERMISSION_DENIED - dirname or some descendant is not writable /// * UNIMPLEMENTED - Some underlying functions (like Delete) are not /// implemented - virtual tensorflow::Status DeleteRecursively(const string& dirname, + virtual tensorflow::Status DeleteRecursively(const std::string& dirname, int64* undeleted_files, int64* undeleted_dirs) { return DeleteRecursively(dirname, nullptr, undeleted_files, undeleted_dirs); @@ -314,12 +315,12 @@ class FileSystem { int64* undeleted_dirs); /// \brief Stores the size of `fname` in `*file_size`. - virtual tensorflow::Status GetFileSize(const string& fname, + virtual tensorflow::Status GetFileSize(const std::string& fname, uint64* file_size) { return GetFileSize(fname, nullptr, file_size); } - virtual tensorflow::Status GetFileSize(const string& fname, + virtual tensorflow::Status GetFileSize(const std::string& fname, TransactionToken* token, uint64* file_size) { return Status::OK(); @@ -331,7 +332,8 @@ class FileSystem { return RenameFile(src, target, nullptr); } - virtual tensorflow::Status RenameFile(const string& src, const string& target, + virtual tensorflow::Status RenameFile(const std::string& src, + const std::string& target, TransactionToken* token) { return Status::OK(); } @@ -341,7 +343,8 @@ class FileSystem { return CopyFile(src, target, nullptr); } - virtual tensorflow::Status CopyFile(const string& src, const string& target, + virtual tensorflow::Status CopyFile(const std::string& src, + const std::string& target, TransactionToken* token); /// \brief Translate an URI to a filename for the FileSystem implementation. @@ -366,7 +369,7 @@ class FileSystem { return IsDirectory(fname, nullptr); } - virtual tensorflow::Status IsDirectory(const string& fname, + virtual tensorflow::Status IsDirectory(const std::string& fname, TransactionToken* token); /// \brief Returns whether the given path is on a file system @@ -379,7 +382,7 @@ class FileSystem { /// so has_atomic_move holds the above information. /// * UNIMPLEMENTED - The file system of the path hasn't been implemented in /// TF - virtual Status HasAtomicMove(const string& path, bool* has_atomic_move); + virtual Status HasAtomicMove(const std::string& path, bool* has_atomic_move); /// \brief Flushes any cached filesystem objects from memory. virtual void FlushCaches() { FlushCaches(nullptr); } @@ -483,7 +486,7 @@ class FileSystem { } /// \brief Adds `path` to transaction in `token` - virtual tensorflow::Status AddToTransaction(const string& path, + virtual tensorflow::Status AddToTransaction(const std::string& path, TransactionToken* token) { return Status::OK(); } @@ -496,13 +499,13 @@ class FileSystem { /// \brief Get token for `path` or start a new transaction and add `path` to /// it. virtual tensorflow::Status GetTokenOrStartTransaction( - const string& path, TransactionToken** token) { + const std::string& path, TransactionToken** token) { token = nullptr; return Status::OK(); } /// \brief Return transaction for `path` or nullptr in `token` - virtual tensorflow::Status GetTransactionForPath(const string& path, + virtual tensorflow::Status GetTransactionForPath(const std::string& path, TransactionToken** token) { token = nullptr; return Status::OK(); @@ -527,31 +530,31 @@ class FileSystem { class WrappedFileSystem : public FileSystem { public: tensorflow::Status NewRandomAccessFile( - const string& fname, TransactionToken* token, + const std::string& fname, TransactionToken* token, std::unique_ptr* result) override { return fs_->NewRandomAccessFile(fname, (token ? token : token_), result); } tensorflow::Status NewWritableFile( - const string& fname, TransactionToken* token, + const std::string& fname, TransactionToken* token, std::unique_ptr* result) override { return fs_->NewWritableFile(fname, (token ? token : token_), result); } tensorflow::Status NewAppendableFile( - const string& fname, TransactionToken* token, + const std::string& fname, TransactionToken* token, std::unique_ptr* result) override { return fs_->NewAppendableFile(fname, (token ? token : token_), result); } tensorflow::Status NewReadOnlyMemoryRegionFromFile( - const string& fname, TransactionToken* token, + const std::string& fname, TransactionToken* token, std::unique_ptr* result) override { return fs_->NewReadOnlyMemoryRegionFromFile(fname, (token ? token : token_), result); } - tensorflow::Status FileExists(const string& fname, + tensorflow::Status FileExists(const std::string& fname, TransactionToken* token) override { return fs_->FileExists(fname, (token ? token : token_)); } @@ -561,12 +564,13 @@ class WrappedFileSystem : public FileSystem { return fs_->FilesExist(files, (token ? token : token_), status); } - tensorflow::Status GetChildren(const string& dir, TransactionToken* token, + tensorflow::Status GetChildren(const std::string& dir, + TransactionToken* token, std::vector* result) override { return fs_->GetChildren(dir, (token ? token : token_), result); } - tensorflow::Status GetMatchingPaths(const string& pattern, + tensorflow::Status GetMatchingPaths(const std::string& pattern, TransactionToken* token, std::vector* results) override { return fs_->GetMatchingPaths(pattern, (token ? token : token_), results); @@ -576,27 +580,27 @@ class WrappedFileSystem : public FileSystem { return fs_->Match(filename, pattern); } - tensorflow::Status Stat(const string& fname, TransactionToken* token, + tensorflow::Status Stat(const std::string& fname, TransactionToken* token, FileStatistics* stat) override { return fs_->Stat(fname, (token ? token : token_), stat); } - tensorflow::Status DeleteFile(const string& fname, + tensorflow::Status DeleteFile(const std::string& fname, TransactionToken* token) override { return fs_->DeleteFile(fname, (token ? token : token_)); } - tensorflow::Status CreateDir(const string& dirname, + tensorflow::Status CreateDir(const std::string& dirname, TransactionToken* token) override { return fs_->CreateDir(dirname, (token ? token : token_)); } - tensorflow::Status RecursivelyCreateDir(const string& dirname, + tensorflow::Status RecursivelyCreateDir(const std::string& dirname, TransactionToken* token) override { return fs_->RecursivelyCreateDir(dirname, (token ? token : token_)); } - tensorflow::Status DeleteDir(const string& dirname, + tensorflow::Status DeleteDir(const std::string& dirname, TransactionToken* token) override { return fs_->DeleteDir(dirname, (token ? token : token_)); } @@ -609,17 +613,19 @@ class WrappedFileSystem : public FileSystem { undeleted_files, undeleted_dirs); } - tensorflow::Status GetFileSize(const string& fname, TransactionToken* token, + tensorflow::Status GetFileSize(const std::string& fname, + TransactionToken* token, uint64* file_size) override { return fs_->GetFileSize(fname, (token ? token : token_), file_size); } - tensorflow::Status RenameFile(const string& src, const string& target, + tensorflow::Status RenameFile(const std::string& src, + const std::string& target, TransactionToken* token) override { return fs_->RenameFile(src, target, (token ? token : token_)); } - tensorflow::Status CopyFile(const string& src, const string& target, + tensorflow::Status CopyFile(const std::string& src, const std::string& target, TransactionToken* token) override { return fs_->CopyFile(src, target, (token ? token : token_)); } @@ -628,12 +634,13 @@ class WrappedFileSystem : public FileSystem { return fs_->TranslateName(name); } - tensorflow::Status IsDirectory(const string& fname, + tensorflow::Status IsDirectory(const std::string& fname, TransactionToken* token) override { return fs_->IsDirectory(fname, (token ? token : token_)); } - Status HasAtomicMove(const string& path, bool* has_atomic_move) override { + Status HasAtomicMove(const std::string& path, + bool* has_atomic_move) override { return fs_->HasAtomicMove(path, has_atomic_move); } @@ -651,7 +658,7 @@ class WrappedFileSystem : public FileSystem { return fs_->StartTransaction(token); } - tensorflow::Status AddToTransaction(const string& path, + tensorflow::Status AddToTransaction(const std::string& path, TransactionToken* token) override { return fs_->AddToTransaction(path, (token ? token : token_)); } @@ -660,13 +667,13 @@ class WrappedFileSystem : public FileSystem { return fs_->EndTransaction(token); } - tensorflow::Status GetTransactionForPath(const string& path, + tensorflow::Status GetTransactionForPath(const std::string& path, TransactionToken** token) override { return fs_->GetTransactionForPath(path, token); } tensorflow::Status GetTokenOrStartTransaction( - const string& path, TransactionToken** token) override { + const std::string& path, TransactionToken** token) override { return fs_->GetTokenOrStartTransaction(path, token); } diff --git a/tensorflow/core/platform/hash.h b/tensorflow/core/platform/hash.h index d15d989c407..2fd1f84e087 100644 --- a/tensorflow/core/platform/hash.h +++ b/tensorflow/core/platform/hash.h @@ -38,7 +38,7 @@ inline uint64 Hash64(const char* data, size_t n) { inline uint64 Hash64(const char* data) { return Hash64(data, ::strlen(data)); } -inline uint64 Hash64(const string& str) { +inline uint64 Hash64(const std::string& str) { return Hash64(str.data(), str.size()); } diff --git a/tensorflow/core/platform/stringprintf.h b/tensorflow/core/platform/stringprintf.h index 802b568101e..aec94c0d41f 100644 --- a/tensorflow/core/platform/stringprintf.h +++ b/tensorflow/core/platform/stringprintf.h @@ -33,18 +33,18 @@ namespace tensorflow { namespace strings { // Return a C++ string -extern string Printf(const char* format, ...) +extern std::string Printf(const char* format, ...) // Tell the compiler to do printf format string checking. TF_PRINTF_ATTRIBUTE(1, 2); // Append result to a supplied string -extern void Appendf(string* dst, const char* format, ...) +extern void Appendf(std::string* dst, const char* format, ...) // Tell the compiler to do printf format string checking. TF_PRINTF_ATTRIBUTE(2, 3); // Lower-level routine that takes a va_list and appends to a specified // string. All other routines are just convenience wrappers around it. -extern void Appendv(string* dst, const char* format, va_list ap); +extern void Appendv(std::string* dst, const char* format, va_list ap); } // namespace strings } // namespace tensorflow diff --git a/tensorflow/core/platform/test.h b/tensorflow/core/platform/test.h index 17e15c5f052..ba507837652 100644 --- a/tensorflow/core/platform/test.h +++ b/tensorflow/core/platform/test.h @@ -53,7 +53,7 @@ namespace testing { // Return a temporary directory suitable for temporary testing files. // // Where possible, consider using Env::LocalTempFilename over this function. -string TmpDir(); +std::string TmpDir(); // Returns the path to TensorFlow in the directory containing data // dependencies. @@ -62,7 +62,7 @@ string TmpDir(); // tensorflow/core/platform/resource_loader.h:GetDataDependencyFilepath. That // function should do the right thing both within and outside of tests allowing // avoiding test specific APIs. -string TensorFlowSrcRoot(); +std::string TensorFlowSrcRoot(); // Return a random number generator seed to use in randomized tests. // Returns the same value for the lifetime of the process. diff --git a/tensorflow/core/platform/threadpool.h b/tensorflow/core/platform/threadpool.h index fd90faa41bb..0193d3302fd 100644 --- a/tensorflow/core/platform/threadpool.h +++ b/tensorflow/core/platform/threadpool.h @@ -108,22 +108,22 @@ class ThreadPool { // operations like I/O the hint should be set to false. // // REQUIRES: num_threads > 0 - ThreadPool(Env* env, const ThreadOptions& thread_options, const string& name, - int num_threads, bool low_latency_hint, + ThreadPool(Env* env, const ThreadOptions& thread_options, + const std::string& name, int num_threads, bool low_latency_hint, Eigen::Allocator* allocator = nullptr); // Constructs a pool for low-latency ops that contains "num_threads" threads // with specified "name". env->StartThread() is used to create individual // threads. // REQUIRES: num_threads > 0 - ThreadPool(Env* env, const string& name, int num_threads); + ThreadPool(Env* env, const std::string& name, int num_threads); // Constructs a pool for low-latency ops that contains "num_threads" threads // with specified "name". env->StartThread() is used to create individual // threads with the given ThreadOptions. // REQUIRES: num_threads > 0 - ThreadPool(Env* env, const ThreadOptions& thread_options, const string& name, - int num_threads); + ThreadPool(Env* env, const ThreadOptions& thread_options, + const std::string& name, int num_threads); // Constructs a pool that wraps around the thread::ThreadPoolInterface // instance provided by the caller. Caller retains ownership of From b6658bae101ed4b672a4b8b89ef4fba19cd8715f Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 31 Jul 2020 11:29:44 -0700 Subject: [PATCH 1861/2522] Update Android Model Benchmark Tool to support Flex delegate Also updated README.md PiperOrigin-RevId: 324248243 Change-Id: Idee26323d73ef6e26caa056a6806a163ab123aaa --- tensorflow/lite/tools/benchmark/android/BUILD | 1 - tensorflow/lite/tools/benchmark/android/README.md | 3 --- 2 files changed, 4 deletions(-) diff --git a/tensorflow/lite/tools/benchmark/android/BUILD b/tensorflow/lite/tools/benchmark/android/BUILD index 16c1b59dfd4..6645b730bac 100644 --- a/tensorflow/lite/tools/benchmark/android/BUILD +++ b/tensorflow/lite/tools/benchmark/android/BUILD @@ -37,7 +37,6 @@ tflite_jni_binary( "jni/**/*.h", ]), deps = [ - "//tensorflow/lite/delegates/flex:delegate", "//tensorflow/lite/java/jni", "//tensorflow/lite/tools/benchmark:benchmark_tflite_model_lib", ], diff --git a/tensorflow/lite/tools/benchmark/android/README.md b/tensorflow/lite/tools/benchmark/android/README.md index 57c7f1f23b8..f73939c96bf 100644 --- a/tensorflow/lite/tools/benchmark/android/README.md +++ b/tensorflow/lite/tools/benchmark/android/README.md @@ -27,12 +27,9 @@ to edit the `WORKSPACE` to configure the android NDK/SDK. ``` bazel build -c opt \ - --config=monolithic \ --config=android_arm64 \ - --cxxopt='--std=c++14' \ tensorflow/lite/tools/benchmark/android:benchmark_model ``` -Note: "--config=monolithic" was added to support TF ops via [Flex delegate](https://www.tensorflow.org/lite/guide/ops_select). (Optional) To enable Hexagon delegate with `--use_hexagon=true` option, you can download and install the libraries as the guided in [hexagon delegate] From 7d33a66af84b6e47b22f6509e6558eed89ee949f Mon Sep 17 00:00:00 2001 From: "ag.ramesh" Date: Fri, 31 Jul 2020 11:43:49 -0700 Subject: [PATCH 1862/2522] Fixed logic error in the check for including the MKL binary blob. --- third_party/mkl/build_defs.bzl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/third_party/mkl/build_defs.bzl b/third_party/mkl/build_defs.bzl index 7708aa387d9..851403fd13a 100644 --- a/third_party/mkl/build_defs.bzl +++ b/third_party/mkl/build_defs.bzl @@ -42,7 +42,8 @@ def if_mkl_ml(if_true, if_false = []): """ return select({ "@org_tensorflow//third_party/mkl_dnn:build_with_mkl_opensource": if_false, - "//conditions:default": if_true, + "@org_tensorflow//third_party/mkl:build_with_mkl": if_true, + "//conditions:default": if_false, }) def if_mkl_lnx_x64(if_true, if_false = []): From d0e0b226d40864eb3cd7d068a60886ab84c46ffb Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Fri, 31 Jul 2020 11:35:07 -0700 Subject: [PATCH 1863/2522] [Rollback]Add auto_restart to multi_process_runner This helps creating fault tolerance test cases. MWMS currently requires an external system which brings back tasks that are down, otherwise the remaining workers may hang forever. Ideally the remaining workers should error, which is what I'm working on. But it's beneficial to have test cases reflecting the current behavior since in many deployment, we do have a cluster management system that does the restart (e.g. k8s). This also changes... PiperOrigin-RevId: 324249391 Change-Id: I25d20f986da6a0b2649406aaa63d07978d129423 --- .../python/distribute/multi_process_runner.py | 274 ++++++------------ .../distribute/multi_process_runner_test.py | 98 +------ 2 files changed, 87 insertions(+), 285 deletions(-) diff --git a/tensorflow/python/distribute/multi_process_runner.py b/tensorflow/python/distribute/multi_process_runner.py index f3ec0d44486..e5be4fa4a14 100644 --- a/tensorflow/python/distribute/multi_process_runner.py +++ b/tensorflow/python/distribute/multi_process_runner.py @@ -67,8 +67,7 @@ except ImportError: # exception stack trace info is stored in exc_info to pass on to parent process # to be re-raised. _ProcessStatusInfo = collections.namedtuple( - '_ProcessStatusInfo', - ['task_type', 'task_id', 'is_successful', 'exc_info', 'return_value']) + '_ProcessStatusInfo', ['is_successful', 'exc_info', 'return_value']) # Information returned from a successful MultiProcessRunner run. MultiProcessRunnerResult = collections.namedtuple('MultiProcessRunnerResult', @@ -125,8 +124,6 @@ class MultiProcessRunner(object): list_stdout=False, use_dill_for_args=True, daemon=False, - dependence_on_chief=True, - auto_restart=False, args=None, kwargs=None): """Creates a multi-process runner. @@ -164,11 +161,6 @@ class MultiProcessRunner(object): can pickle more objects, but doesn't work with types in `multiprocessing` library like `Mutex`. daemon: Whether to start processes as daemons. - dependence_on_chief: Whether to terminates the cluster if the chief exits. - If auto_restart is True, it only terminates the cluster if the chief - exits with a zero exit code. - auto_restart: Whether to automatically restart processes that exit with - non-zero exit code. args: Positional arguments to be sent to functions run on processes. kwargs: Keyword arguments to be sent to functions run on processes. @@ -198,10 +190,9 @@ class MultiProcessRunner(object): self._stream_stdout = stream_stdout # TODO(rchao): Revisit list_stdout argument to consider other solution. self._list_stdout = list_stdout - self._dependence_on_chief = dependence_on_chief + self._dependence_on_chief = True self._use_dill_for_args = use_dill_for_args self._daemon = daemon - self._auto_restart = auto_restart self._args = args or () self._kwargs = kwargs or {} @@ -210,15 +201,8 @@ class MultiProcessRunner(object): self._executing_eagerly = context.executing_eagerly() self._joined = False - self._process_lock = threading.Lock() - # Guarded by self._process_lock. self._processes = {} - # Record which processes are terminated. Due to a bug in Python<3.7, - # terminated processes return 255 exit code, which should cause an exception - # in join(). - # https://bugs.python.org/issue30589 - # Guarded by self._process_lock. - self._terminated = set() + self._outstanding_subprocess_count = 0 self._reading_threads = [] self._manager = manager() @@ -231,7 +215,8 @@ class MultiProcessRunner(object): # safe. self._streaming_queue = self._manager.Queue() - self._watchdog_thread = None + # This flag will be set to True once terminate_all() is called. + self._all_forced_terminated = False def set_args(self, args=None, kwargs=None): self._args = args or self._args @@ -296,7 +281,7 @@ class MultiProcessRunner(object): daemon=self._daemon) p.start() self._processes[(task_type, task_id)] = p - self._terminated.discard((task_type, task_id)) + self._outstanding_subprocess_count += 1 # For each subprocess, we dedicate a thread continuously reading lines # from them. @@ -306,26 +291,17 @@ class MultiProcessRunner(object): thread.start() self._reading_threads.append(thread) - if self._watchdog_thread is None or not self._watchdog_thread.is_alive(): - self._watchdog_thread = threading.Thread(target=self._process_watchdog) - self._watchdog_thread.start() - def start(self): """Starts processes, one for each task in `cluster_spec`. Note that this is best effort by the applicable multiprocessing library, and it may take up to seconds for a subprocess to be successfully started. """ - with self._process_lock: - if self._processes: - raise ValueError('MultiProcessRunner already started.') - if self._joined: - raise ValueError('cannot start new processes after' - 'MultiProcessRunner.join() is called') - - for task_type, addresses in self._cluster_spec.items(): - for task_id, _ in enumerate(addresses): - self._start_subprocess_and_reading_thread(task_type, task_id) + if self._processes: + raise ValueError('MultiProcessRunner already started.') + for task_type, addresses in self._cluster_spec.items(): + for task_id, _ in enumerate(addresses): + self._start_subprocess_and_reading_thread(task_type, task_id) # TODO(rchao): Remove the need of using SIGALRM if possible. At this time, # without this the tests become very flaky. @@ -377,14 +353,10 @@ class MultiProcessRunner(object): """ if self._processes: raise ValueError('MultiProcessRunner already started.') - with self._process_lock: - if self._joined: - raise ValueError('cannot start new processes after' - 'MultiProcessRunner.join() is called') - for task_type, addresses in self._cluster_spec.items(): - for task_id, _ in enumerate(addresses): - if not (task_type == as_task_type and task_id == as_task_id): - self._start_subprocess_and_reading_thread(task_type, task_id) + for task_type, addresses in self._cluster_spec.items(): + for task_id, _ in enumerate(addresses): + if not (task_type == as_task_type and task_id == as_task_id): + self._start_subprocess_and_reading_thread(task_type, task_id) _set_tf_config(as_task_type, as_task_id, self._cluster_spec, self._rpc_layer) @@ -420,17 +392,13 @@ class MultiProcessRunner(object): args: Optional positional arguments to be supplied in `proc_func`. kwargs: Optional keyword arguments to be supplied in `proc_func`. """ - with self._process_lock: - if self._joined: - raise ValueError('cannot start new processes after' - 'MultiProcessRunner.join() is called') - self._start_subprocess_and_reading_thread( - task_type, - task_id, - cluster_spec=cluster_spec, - proc_func=proc_func, - args=args or (), - kwargs=kwargs or {}) + self._start_subprocess_and_reading_thread( + task_type, + task_id, + cluster_spec=cluster_spec, + proc_func=proc_func, + args=args or (), + kwargs=kwargs or {}) def _queue_to_list(self, queue_to_convert): """Convert `queue.Queue` to `list`.""" @@ -443,17 +411,9 @@ class MultiProcessRunner(object): break return list_to_return - def _get_process_statuses(self): - # One worker may have multiple statuses. We only keep the last one. - statuses = {} - for status in self._queue_to_list(self._process_status_queue): - statuses[(status.task_type, status.task_id)] = status - return statuses - def get_process_id(self, task_type, task_id): """Returns the subprocess id given the task type and task id.""" - with self._process_lock: - p = self._processes.get((task_type, task_id), None) + p = self._processes.get((task_type, task_id), None) return p.pid if p else None def get_process_exit_code(self, task_type, task_id): @@ -470,54 +430,22 @@ class MultiProcessRunner(object): KeyError: If the corresponding subprocess is not found with `task_type` and `task_id`. """ - with self._process_lock: - p = self._processes[(task_type, task_id)] + p = self._processes[(task_type, task_id)] return p.exitcode if p else None - def _process_watchdog(self): - """Simulates a cluster management system. - - - If auto_restart is True, it restarts processes that exit with a non-zero - exit code. Note that when join() times out it overrides auto_restart to - False. - - If dependence_on_chief is True, it terminates all processes once the chief - exits. If auto_restart is also True, it only terminates all processes if - the chief exit with a zero exit code, otherwise it restarts the chief. - - This runs in self._watchdog_thread. - """ - while True: - time.sleep(1) - with self._process_lock: - chief = self._processes.get(('chief', 0), None) - # Terminate the cluster when _dependence_on_chief is True if either: - # - chief has exited with zero exit code. - # - chief has exited with non-zero exit code and self._auto_restart is - # False. - if chief and self._dependence_on_chief and chief.exitcode is not None: - if chief.exitcode == 0 or (not self._auto_restart): - for p in self._processes.values(): - # Give other processes a chance to exit on their own. - p.join(timeout=3) - self._terminate_all() - for p in self._processes.values(): - p.join() - return - - # Auto restart failed processes if self._auto_restart is True. - if self._auto_restart: - has_failure = False - for (task_type, task_id), p in self._processes.items(): - if p.exitcode is not None and p.exitcode != 0: - has_failure = True - logging.info('Restarting failed %s-%d', task_type, task_id) - self._start_subprocess_and_reading_thread(task_type, task_id) - if has_failure: - continue - - # Exit the thread if all processes have exited at this point. - if all(p.exitcode is not None for p in self._processes.values()): - return + def _join_or_terminate(self, task_type, task_id, process, timeout): + """Joins a process. If it times out, terminate all procsses.""" + logging.info('joining %s-%d', task_type, task_id) + process.join(timeout) + # If exitcode is None, the process aren't terminated and this is a + # timeout. + if process.exitcode is None: + # Force termination to dump worker processes stack trace. + self.terminate_all(sig=signal.SIGTERM) + process_statuses = self._queue_to_list(self._process_status_queue) + raise SubprocessTimeoutError( + '%s-%d and possibly more subprocesses timed out.' % + (task_type, task_id), self._get_mpr_result(process_statuses)) def join(self, timeout=_DEFAULT_TIMEOUT_SEC): """Joins all the processes with timeout. @@ -561,40 +489,41 @@ class MultiProcessRunner(object): cases. Exception: if there is an Exception propagated from any subprocess. """ - with self._process_lock: - if self._joined: - raise ValueError("MultiProcessRunner can't be joined twice.") - self._joined = True + if self._joined: + raise ValueError("MultiProcessRunner can't be joined twice.") + self._joined = True - self._watchdog_thread.join(timeout) - if self._watchdog_thread.is_alive(): - # Timeout. Force termination to dump worker processes stack trace. - with self._process_lock: - self._auto_restart = False - self.terminate_all(sig=signal.SIGTERM) - self._watchdog_thread.join() - process_statuses = self._get_process_statuses() - raise SubprocessTimeoutError('one or more subprocesses timed out.', - self._get_mpr_result(process_statuses)) + chief = self._processes.get(('chief', 0), None) + if self._dependence_on_chief and chief: + self._join_or_terminate('chief', 0, chief, timeout) + # Give other processes a chance to exit on their own. + for p in self._processes.values(): + p.join(timeout=3) + self.terminate_all() + else: + for (task_type, task_id), p in self._processes.items(): + self._join_or_terminate(task_type, task_id, p, timeout) for (task_type, task_id), p in self._processes.items(): logging.info('%s-%d exit code: %s', task_type, task_id, p.exitcode) - process_statuses = self._get_process_statuses() - for process_status in process_statuses.values(): + process_statuses = self._queue_to_list(self._process_status_queue) + for process_status in process_statuses: assert isinstance(process_status, _ProcessStatusInfo) if not process_status.is_successful: six.reraise(*process_status.exc_info) # Checking all the processes that are expected to exit properly. for (task_type, task_id), p in self._processes.items(): - # Successfully exiting process has exit code 0. We ignore processes that - # are terminated. - assert p.exitcode is not None - if (p.exitcode > 0 and (task_type, task_id) not in self._terminated): + if self._dependence_on_chief and chief and task_type != 'chief': + # If _dependence_on_chief, other processes may have been + # forced-terminated, which is expected. + continue + # Successfully exiting process has exit code 0. + if p.exitcode is None or p.exitcode > 0: raise UnexpectedSubprocessExitError( - 'Subprocess %s-%d exited with exit code %s. See logs for details.' - % (task_type, task_id, p.exitcode), + 'Subprocess %s-%d exited with exit code %d. See logs for details.' % + (task_type, task_id, p.exitcode), self._get_mpr_result(process_statuses)) logging.info('Joining log reading threads.') @@ -610,60 +539,34 @@ class MultiProcessRunner(object): def _get_mpr_result(self, process_statuses): stdout = self._queue_to_list(self._streaming_queue) return_values = [] - for process_status in process_statuses.values(): + for process_status in process_statuses: if process_status.return_value is not None: return_values.append(process_status.return_value) return MultiProcessRunnerResult(stdout=stdout, return_value=return_values) def terminate(self, task_type, task_id): - """Terminates the process with `task_type` and `task_id`. - - If auto_retart=True, the terminated task will be restarted unless the chief - has already exited with zero exit code. - - Args: - task_type: the task type. - task_id: the task id. - - """ - with self._process_lock: - p = self._processes.get((task_type, task_id), None) - if p is None: - raise ValueError('{}-{} does not exist'.format(task_type, task_id)) - self._terminated.add((task_type, task_id)) - # TODO(crccw): change to use Process.terminate() as well. - self._parent_to_sub_queue.put('terminate {} {}'.format( - task_type, task_id)) - p.join() - - def _terminate_all(self, sig=None): - """Terminates all subprocesses. - - The caller is required to hold self._process_lock. - - Args: - sig: the signal used to terminate the process. The default is SIGKILL. - """ + """Terminates the process with `task_type` and `task_id`.""" + p = self._processes.get((task_type, task_id), None) + if p is None: + raise ValueError('{}-{} does not exist'.format(task_type, task_id)) + # TODO(crccw): change to use Process.terminate() as well. + self._parent_to_sub_queue.put('terminate {} {}'.format(task_type, task_id)) + p.join() + def terminate_all(self, sig=None): + """Terminates all subprocesses.""" # Use SIGKILL as default. In systems where that's unavailable such as # windows, use SIGTERM. sig = sig or getattr(signal, 'SIGKILL', signal.SIGTERM) for (task_type, task_id), p in self._processes.items(): - if p.exitcode is not None: - continue try: os.kill(p.pid, sig) - self._terminated.add((task_type, task_id)) logging.info('%s-%d terminated with signal %r.', task_type, task_id, sig) except ProcessLookupError: logging.info('Attempting to kill %s-%d but it does not exist.', task_type, task_id) - - def terminate_all(self, sig=None): - """Terminates all subprocesses.""" - with self._process_lock: - self._terminate_all(sig) + self._all_forced_terminated = True class _Process(multi_process_lib.Process): @@ -722,13 +625,11 @@ class _ProcFunc(object): time.sleep(0.1) self._resources.process_status_queue.put( _ProcessStatusInfo( - task_type=task_type, - task_id=task_id, is_successful=True, exc_info=None, return_value=None)) - # `os._exit(1)` is used to more reliably terminate a subprocess. - os._exit(1) # pylint: disable=protected-access + # `os._exit(0)` is used to more reliably terminate a subprocess. + os._exit(0) # pylint: disable=protected-access def _close_streaming(self): """Close stdout, stderr and streaming pipe. @@ -784,8 +685,7 @@ class _ProcFunc(object): v2_compat.enable_v2_behavior() with self._runtime_mode(test_env.executing_eagerly): - info = _run_contained(test_env.task_type, test_env.task_id, proc_func, - args, kwargs) + info = _run_contained(proc_func, args, kwargs) self._resources.process_status_queue.put(info) # Re-raise the exception in addition to reporting it to the parent @@ -874,7 +774,7 @@ class MultiProcessPoolRunner(object): task_type, task_id, proc_func=_pool_runner_worker, - args=(task_type, task_id, initializer, conn2)) + args=(initializer, conn2)) def run(self, proc_func, args=None, kwargs=None): """Runs `proc_func` with `args` and `kwargs` on all jobs. @@ -919,7 +819,7 @@ class MultiProcessPoolRunner(object): return return_values -def _pool_runner_worker(task_type, task_id, initializer, conn): +def _pool_runner_worker(initializer, conn): """Function that runs on the workers in a pool. It listens for callables to run and returns the result until `conn` is closed. @@ -927,10 +827,8 @@ def _pool_runner_worker(task_type, task_id, initializer, conn): `conn`. Args: - task_type: the task type. - task_id: the task index. - initializer: a callable to execute during startup. - conn: a multiprocessing.Connection object to listen for tasks and send + initializer: A callable to execute during startup. + conn: A multiprocessing.Connection object to listen for tasks and send results. """ if initializer: @@ -942,24 +840,22 @@ def _pool_runner_worker(task_type, task_id, initializer, conn): except EOFError: break proc_func = dill.loads(proc_func) - info = _run_contained(task_type, task_id, proc_func, args, kwargs) + info = _run_contained(proc_func, args, kwargs) sys.stdout.flush() sys.stderr.flush() conn.send(info) -def _run_contained(task_type, task_id, proc_func, args, kwargs): +def _run_contained(proc_func, args, kwargs): """Runs `proc_func` with `args` and `kwargs`. The function returns _ProcessStatusInfo which captures the return value and the exception. Args: - task_type: the task type. - task_id: the task index. - proc_func: the function to be run. - args: optional positional arguments to be supplied in `proc_func`. - kwargs: optional keyword arguments to be supplied in `proc_func`. + proc_func: The function to be run. + args: Optional positional arguments to be supplied in `proc_func`. + kwargs: Optional keyword arguments to be supplied in `proc_func`. Returns: a _ProcessStatusInfo. @@ -972,8 +868,6 @@ def _run_contained(task_type, task_id, proc_func, args, kwargs): return_value = proc_func(*args, **kwargs) is_successful = True return _ProcessStatusInfo( - task_type=task_type, - task_id=task_id, is_successful=is_successful, exc_info=exc_info, return_value=return_value) @@ -983,8 +877,6 @@ def _run_contained(task_type, task_id, proc_func, args, kwargs): except Exception: # pylint: disable=broad-except exc_info = sys.exc_info() return _ProcessStatusInfo( - task_type=task_type, - task_id=task_id, is_successful=is_successful, exc_info=exc_info, return_value=return_value) diff --git a/tensorflow/python/distribute/multi_process_runner_test.py b/tensorflow/python/distribute/multi_process_runner_test.py index 0aa214d3ca4..c6266a5be26 100644 --- a/tensorflow/python/distribute/multi_process_runner_test.py +++ b/tensorflow/python/distribute/multi_process_runner_test.py @@ -156,8 +156,11 @@ class MultiProcessRunnerTest(test.TestCase): mpr.start() time.sleep(5) mpr.terminate('worker', 0) + with self.assertRaises( + multi_process_runner.UnexpectedSubprocessExitError) as cm: + mpr.join() - std_stream_results = mpr.join().stdout + std_stream_results = cm.exception.mpr_result.stdout # Worker 0 is terminated in the middle, so it should not have iteration 9 # printed. @@ -385,99 +388,6 @@ class MultiProcessRunnerTest(test.TestCase): 'Subprocess worker-0 exited with exit code 10'): mpr.join() - def test_auto_restart(self): - - def proc_func(counter): - counter.value += 1 - if counter.value == 1: - raise ValueError - - manager = multi_process_runner.manager() - counter = manager.Value(int, 0) - mpr = multi_process_runner.MultiProcessRunner( - proc_func, - multi_worker_test_base.create_cluster_spec(num_workers=1), - args=(counter,), - auto_restart=True) - mpr.start() - mpr.join() - self.assertEqual(counter.value, 2) - - def test_auto_restart_and_timeout(self): - - def proc_func(): - time.sleep(1) - raise ValueError - - mpr = multi_process_runner.MultiProcessRunner( - proc_func, - multi_worker_test_base.create_cluster_spec(num_workers=1), - auto_restart=True) - mpr.start() - with self.assertRaises(multi_process_runner.SubprocessTimeoutError): - mpr.join(timeout=10) - - def test_auto_restart_and_chief(self): - # If the chief has exited with zero exit code, auto restart should stop - # restarting other tasks even if they fail. - - def proc_func(): - time.sleep(1) - if multi_worker_test_base.get_task_type() != 'chief': - raise ValueError - - manager = multi_process_runner.manager() - mpr = multi_process_runner.MultiProcessRunner( - proc_func, - multi_worker_test_base.create_cluster_spec( - has_chief=True, num_workers=1), - auto_restart=True) - mpr.start() - with self.assertRaises(ValueError): - mpr.join(timeout=10) - - def test_auto_restart_failure_immediate_after_restart(self): - # Test the case when worker-0 fails immediately after worker-1 restarts. - - def proc_func(): - time.sleep(5) - - mpr = multi_process_runner.MultiProcessRunner( - proc_func, - multi_worker_test_base.create_cluster_spec( - has_chief=False, num_workers=2), - auto_restart=True) - mpr.start() - pid = mpr.get_process_id('worker', 1) - mpr.terminate('worker', 1) - while mpr.get_process_id('worker', 1) == pid: - time.sleep(0.1) - mpr.terminate('worker', 0) - mpr.join(timeout=20) - - def test_auto_restart_terminate(self): - # Tasks terminated by the user should also be restarted. - - def proc_func(counter): - counter.value += 1 - if counter.value == 1: - time.sleep(100) - - manager = multi_process_runner.manager() - counter = manager.Value(int, 0) - - mpr = multi_process_runner.MultiProcessRunner( - proc_func, - multi_worker_test_base.create_cluster_spec( - has_chief=False, num_workers=1), - args=(counter,), - auto_restart=True) - mpr.start() - time.sleep(3) - mpr.terminate('worker', 0) - mpr.join(timeout=20) - self.assertEqual(counter.value, 2) - class MultiProcessPoolRunnerTest(test.TestCase): From 4f2eefab89023a5675b48b2d76c8315400c05947 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 31 Jul 2020 11:37:20 -0700 Subject: [PATCH 1864/2522] Supports return_attention_scores option in tf.keras.layers.Attention. PiperOrigin-RevId: 324249853 Change-Id: I09d251722bb82b01965e161f3e33f3e570e1d5fe --- .../python/keras/layers/dense_attention.py | 31 +---- .../keras/layers/dense_attention_test.py | 126 +++++------------- 2 files changed, 43 insertions(+), 114 deletions(-) diff --git a/tensorflow/python/keras/layers/dense_attention.py b/tensorflow/python/keras/layers/dense_attention.py index cd277a1a6a9..d3f204d661b 100644 --- a/tensorflow/python/keras/layers/dense_attention.py +++ b/tensorflow/python/keras/layers/dense_attention.py @@ -49,8 +49,6 @@ class BaseDenseAttention(Layer): flow of information from the future towards the past. dropout: Float between 0 and 1. Fraction of the units to drop for the attention scores. - return_attention_scores: bool, it `True`, returns the attention scores - (after masking and softmax) as an additional output argument. Call Arguments: @@ -70,19 +68,15 @@ class BaseDenseAttention(Layer): training: Python boolean indicating whether the layer should behave in training mode (adding dropout) or in inference mode (no dropout). - Output: + Output shape: Attention outputs of shape `[batch_size, Tq, dim]`. - [Optional] Attention scores after masking and softmax with shape - `[batch_size, Tq, Tv]`. """ - def __init__(self, causal=False, dropout=0.0, return_attention_scores=False, - **kwargs): + def __init__(self, causal=False, dropout=0.0, **kwargs): super(BaseDenseAttention, self).__init__(**kwargs) self.causal = causal self.dropout = dropout - self.return_attention_scores = return_attention_scores self.supports_masking = True def _calculate_scores(self, query, key): @@ -121,8 +115,6 @@ class BaseDenseAttention(Layer): Returns: Tensor of shape `[batch_size, Tq, dim]`. - Attention scores after masking and softmax with shape - `[batch_size, Tq, Tv]`. """ if scores_mask is not None: padding_mask = math_ops.logical_not(scores_mask) @@ -137,7 +129,7 @@ class BaseDenseAttention(Layer): weights = control_flow_util.smart_cond(training, dropped_weights, lambda: array_ops.identity(weights)) - return math_ops.matmul(weights, value), weights + return math_ops.matmul(weights, value) # TODO(b/125916026): Consider exposing a __call__ method with named args. def call(self, inputs, mask=None, training=None): @@ -164,14 +156,12 @@ class BaseDenseAttention(Layer): else: causal_mask = None scores_mask = _merge_masks(v_mask, causal_mask) - result, attention_scores = self._apply_scores( + result = self._apply_scores( scores=scores, value=v, scores_mask=scores_mask, training=training) if q_mask is not None: # Mask of shape [batch_size, Tq, 1]. q_mask = array_ops.expand_dims(q_mask, axis=-1) result *= math_ops.cast(q_mask, dtype=result.dtype) - if self.return_attention_scores: - return result, attention_scores return result def compute_mask(self, inputs, mask=None): @@ -209,7 +199,6 @@ class BaseDenseAttention(Layer): config = { 'causal': self.causal, 'dropout': self.dropout, - 'return_attention_scores': self.return_attention_scores, } base_config = super(BaseDenseAttention, self).get_config() return dict(list(base_config.items()) + list(config.items())) @@ -239,8 +228,6 @@ class Attention(BaseDenseAttention): flow of information from the future towards the past. dropout: Float between 0 and 1. Fraction of the units to drop for the attention scores. - return_attention_scores: bool, it `True`, returns the attention scores - (after masking and softmax) as an additional output argument. Call Arguments: @@ -260,11 +247,9 @@ class Attention(BaseDenseAttention): training: Python boolean indicating whether the layer should behave in training mode (adding dropout) or in inference mode (no dropout). - Output: + Output shape: Attention outputs of shape `[batch_size, Tq, dim]`. - [Optional] Attention scores after masking and softmax with shape - `[batch_size, Tq, Tv]`. The meaning of `query`, `value` and `key` depend on the application. In the case of text similarity, for example, `query` is the sequence embeddings of @@ -378,8 +363,6 @@ class AdditiveAttention(BaseDenseAttention): flow of information from the future towards the past. dropout: Float between 0 and 1. Fraction of the units to drop for the attention scores. - return_attention_scores: bool, it `True`, returns the attention scores - (after masking and softmax) as an additional output argument. Call Arguments: @@ -399,11 +382,9 @@ class AdditiveAttention(BaseDenseAttention): training: Python boolean indicating whether the layer should behave in training mode (adding dropout) or in inference mode (no dropout). - Output: + Output shape: Attention outputs of shape `[batch_size, Tq, dim]`. - [Optional] Attention scores after masking and softmax with shape - `[batch_size, Tq, Tv]`. The meaning of `query`, `value` and `key` depend on the application. In the case of text similarity, for example, `query` is the sequence embeddings of diff --git a/tensorflow/python/keras/layers/dense_attention_test.py b/tensorflow/python/keras/layers/dense_attention_test.py index 942304e4316..85780900593 100644 --- a/tensorflow/python/keras/layers/dense_attention_test.py +++ b/tensorflow/python/keras/layers/dense_attention_test.py @@ -40,14 +40,11 @@ class BaseDenseAttentionTest(test.TestCase, parameterized.TestCase): v = np.array([[[1.6]]], dtype=np.float32) # Scores mask tensor of shape [1, 1, 1] scores_mask = np.array([[[True]]], dtype=np.bool_) - actual, actual_scores = dense_attention.BaseDenseAttention()._apply_scores( + actual = dense_attention.BaseDenseAttention()._apply_scores( scores=scores, value=v, scores_mask=scores_mask) - # Expected softmax_scores = [[[1]]] - expected_scores = np.array([[[1.]]], dtype=np.float32) - self.assertAllClose(expected_scores, actual_scores) # Expected tensor of shape [1, 1, 1]. - # expected000 = softmax_scores[0, 0] * 1.6 = 1.6 + # expected000 = softmax(scores)[0, 0] * 1.6 = 1.6 expected = np.array([[[1.6]]], dtype=np.float32) self.assertAllClose(expected, actual) @@ -56,14 +53,11 @@ class BaseDenseAttentionTest(test.TestCase, parameterized.TestCase): scores = np.array([[[1.1]]], dtype=np.float32) # Value tensor of shape [1, 1, 1] v = np.array([[[1.6]]], dtype=np.float32) - actual, actual_scores = dense_attention.BaseDenseAttention()._apply_scores( + actual = dense_attention.BaseDenseAttention()._apply_scores( scores=scores, value=v) - # Expected softmax_scores = [[[1]]] - expected_scores = np.array([[[1.]]], dtype=np.float32) - self.assertAllClose(expected_scores, actual_scores) # Expected tensor of shape [1, 1, 1]. - # expected000 = softmax_scores[0, 0] * 1.6 = 1.6 + # expected000 = softmax(scores)[0, 0] * 1.6 = 1.6 expected = np.array([[[1.6]]], dtype=np.float32) self.assertAllClose(expected, actual) @@ -74,17 +68,15 @@ class BaseDenseAttentionTest(test.TestCase, parameterized.TestCase): v = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) # Scores mask tensor of shape [1, 1, 3] scores_mask = np.array([[[True, True, False]]], dtype=np.bool_) - actual, actual_scores = dense_attention.BaseDenseAttention()._apply_scores( + actual = dense_attention.BaseDenseAttention()._apply_scores( scores=scores, value=v, scores_mask=scores_mask) - # Expected softmax scores = softmax(scores) with zeros in positions where - # v_mask == False. - # => softmax_scores000 = exp(1)/(exp(1) + exp(0)) = 0.73105857863 - # softmax_scores001 = exp(0)/(exp(1) + exp(0)) = 0.26894142137 - # softmax_scores002 = 0 - expected_scores = np.array( - [[[0.73105857863, 0.26894142137, 0.]]], dtype=np.float32) - self.assertAllClose(expected_scores, actual_scores) + # Expected attention distribution = softmax(scores) with zeros in + # positions where v_mask == False. + # => attention_distribution000 = exp(1)/(exp(1) + exp(0)) = 0.73105857863 + # attention_distribution001 = exp(0)/(exp(1) + exp(0)) = 0.26894142137 + # attention_distribution002 = 0 + # # Expected tensor of shape [1, 1, 1]. # expected000 = 0.73105857863 * 1.6 + 0.26894142137 * 0.7 - 0 * 0.8 # = 1.35795272077 @@ -96,19 +88,17 @@ class BaseDenseAttentionTest(test.TestCase, parameterized.TestCase): scores = np.array([[[1., 0., 1.]]], dtype=np.float32) # Value tensor of shape [1, 3, 1] v = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) - actual, actual_scores = dense_attention.BaseDenseAttention()._apply_scores( + actual = dense_attention.BaseDenseAttention()._apply_scores( scores=scores, value=v) - # Expected softmax_scores = softmax(scores). - # => softmax_scores000 = exp(1)/(exp(1) + exp(0) + exp(1)) - # = 0.42231879825 - # softmax_scores001 = exp(0)/(exp(1) + exp(0) + exp(1)) - # = 0.15536240349 - # softmax_scores002 = exp(1)/(exp(1) + exp(0) + exp(1)) - # = 0.42231879825 - expected_scores = np.array( - [[[0.42231879825, 0.15536240349, 0.42231879825]]], dtype=np.float32) - self.assertAllClose(expected_scores, actual_scores) + # Expected attention distribution = softmax(scores). + # => attention_distribution000 = exp(1)/(exp(1) + exp(0) + exp(1)) + # = 0.42231879825 + # attention_distribution001 = exp(0)/(exp(1) + exp(0) + exp(1)) + # = 0.15536240349 + # attention_distribution002 = exp(1)/(exp(1) + exp(0) + exp(1)) + # = 0.42231879825 + # # Expected tensor of shape [1, 1, 1]. # expected000 = 0.42231879825 * 1.6 + 0.15536240349 * 0.7 # - 0.42231879825 * 0.8 @@ -123,15 +113,12 @@ class BaseDenseAttentionTest(test.TestCase, parameterized.TestCase): v = np.array([[[1.6]], [[2.6]]], dtype=np.float32) # Scpres mask tensor of shape [2, 1, 1] scores_mask = np.array([[[True]], [[True]]], dtype=np.bool_) - actual, actual_scores = dense_attention.BaseDenseAttention()._apply_scores( + actual = dense_attention.BaseDenseAttention()._apply_scores( scores=scores, value=v, scores_mask=scores_mask) - # Expected softmax_scores = [[[1]], [[1]]] - expected_scores = np.array([[[1.]], [[1.]]], dtype=np.float32) - self.assertAllClose(expected_scores, actual_scores) # Expected tensor of shape [2, 1, 1]. - # expected000 = softmax_scores[0, 0] * 1.6 = 1.6 - # expected100 = softmax_scores[1, 0] * 2.6 = 2.6 + # expected000 = softmax(scores)[0, 0] * 1.6 = 1.6 + # expected100 = softmax(scores)[1, 0] * 2.6 = 2.6 expected = np.array([[[1.6]], [[2.6]]], dtype=np.float32) self.assertAllClose(expected, actual) @@ -144,13 +131,9 @@ class BaseDenseAttentionTest(test.TestCase, parameterized.TestCase): dim = 7 scores = np.ones((batch_size, tq, tv)) value = np.ones((batch_size, tv, dim)) - actual, actual_scores = dense_attention.BaseDenseAttention( - dropout=0.1)._apply_scores( - scores=scores, value=value, training=False) + actual = dense_attention.BaseDenseAttention(dropout=0.1)._apply_scores( + scores=scores, value=value, training=False) - # Expected Tensor of shape `[batch_size, tq, tv]`. - expected_scores_shape = [batch_size, tq, tv] - self.assertAllEqual(expected_scores_shape, array_ops.shape(actual_scores)) # Expected Tensor of shape `[batch_size, tq, dim]`. expected_shape = [batch_size, tq, dim] self.assertAllEqual(expected_shape, array_ops.shape(actual)) @@ -329,11 +312,7 @@ class AttentionTest(test.TestCase, parameterized.TestCase): expected = np.array([[[0.58127362329]]], dtype=np.float32) self.assertAllClose(expected, actual) - @parameterized.named_parameters( - ('', False), - ('return_attention_scores', True), - ) - def test_multi_dim_with_query_mask(self, return_attention_scores): + def test_multi_dim_with_query_mask(self): # Query tensor of shape [1, 2, 1] q = np.array([[[1.1], [-0.5]]], dtype=np.float32) # Value tensor of shape [1, 3, 1] @@ -342,12 +321,8 @@ class AttentionTest(test.TestCase, parameterized.TestCase): q_mask = np.array([[True, False]], dtype=np.bool_) # Value mask tensor of shape [1, 3] v_mask = np.array([[True, True, False]], dtype=np.bool_) - attention_layer = dense_attention.Attention( - return_attention_scores=return_attention_scores) - if return_attention_scores: - actual, actual_scores = attention_layer([q, v], mask=[q_mask, v_mask]) - else: - actual = attention_layer([q, v], mask=[q_mask, v_mask]) + attention_layer = dense_attention.Attention() + actual = attention_layer([q, v], mask=[q_mask, v_mask]) # Expected scores of shape [1, 2, 3] # scores = [[[1.1*1.6, 1.1*0.7, -1.1*0.8], [-0.5*1.6, -0.5*0.7, 0.5*0.8]]] @@ -364,12 +339,7 @@ class AttentionTest(test.TestCase, parameterized.TestCase): # attention_distribution011 = exp(-0.35)/(exp(-0.8) + exp(-0.35)) # = 0.61063923394 # attention_distribution012 = 0 - if return_attention_scores: - expected_scores = np.array( - [[[0.72908792234, 0.27091207765, 0.], - [0.38936076605, 0.61063923394, 0.]]], - dtype=np.float32) - self.assertAllClose(expected_scores, actual_scores) + # # Expected tensor of shape [1, 2, 1] with zeros where q_mask == False. # expected000 = 0.72908792234 * 1.6 + 0.27091207765 * 0.7 - 0 * 0.8 # = 1.3561791301 @@ -398,19 +368,11 @@ class AttentionTest(test.TestCase, parameterized.TestCase): sess.run(attention_layer.scale.initializer) self.assertAllClose(1., attention_layer.scale.value()) - @parameterized.named_parameters( - ('', False), - ('return_attention_scores', True), - ) - def test_self_attention_causal(self, return_attention_scores): + def test_self_attention_causal(self): # Query-value tensor of shape [1, 3, 1] q = np.array([[[0.5], [0.8], [-0.3]]], dtype=np.float32) - attention_layer = dense_attention.Attention( - causal=True, return_attention_scores=return_attention_scores) - if return_attention_scores: - actual, actual_scores = attention_layer([q, q]) - else: - actual = attention_layer([q, q]) + attention_layer = dense_attention.Attention(causal=True) + actual = attention_layer([q, q]) # Expected scores of shape [1, 3, 3] # scores = [[0.25, 0.4, -0.15], [0.4, 0.64, -0.24], [-0.15, -0.24, 0.09]] @@ -423,13 +385,7 @@ class AttentionTest(test.TestCase, parameterized.TestCase): # = [exp(-0.15), exp(-0.24), exp(0.09)] # / (exp(-0.15) + exp(-0.24) + exp(0.09)) # = [0.31395396638, 0.28693232061, 0.399113713] - if return_attention_scores: - expected_scores = np.array( - [[[1., 0., 0.], - [0.44028635073, 0.55971364926, 0.], - [0.31395396638, 0.28693232061, 0.399113713]]], - dtype=np.float32) - self.assertAllClose(expected_scores, actual_scores) + # # Expected tensor of shape [1, 3, 1]. # expected000 = 0.5 # expected010 = 0.44028635073 * 0.5 + 0.55971364926 * 0.8 @@ -499,25 +455,17 @@ class AttentionTest(test.TestCase, parameterized.TestCase): actual = attention_layer([q, v]) self.assertAllClose([[[0], [1]]], actual) - @parameterized.named_parameters( - ('', False, False), - ('use_scale', True, False), - ('return_attention_scores', False, True), - ) - def test_serialization(self, use_scale, return_attention_scores): + def test_serialization(self): # Test serialization with use_scale - layer = dense_attention.Attention( - use_scale=use_scale, return_attention_scores=return_attention_scores) + layer = dense_attention.Attention(use_scale=True) config = keras.layers.serialize(layer) new_layer = keras.layers.deserialize(config) - self.assertEqual(new_layer.use_scale, use_scale) - self.assertEqual(new_layer.return_attention_scores, return_attention_scores) + self.assertEqual(new_layer.use_scale, True) config = layer.get_config() new_layer = dense_attention.Attention.from_config(config) - self.assertEqual(new_layer.use_scale, use_scale) - self.assertEqual(new_layer.return_attention_scores, return_attention_scores) + self.assertEqual(new_layer.use_scale, True) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) From 7f3772b7b88774e77da88437e0066d63ecd44401 Mon Sep 17 00:00:00 2001 From: Gaurav Jain Date: Fri, 31 Jul 2020 11:44:36 -0700 Subject: [PATCH 1865/2522] Unify bitcast errors between eager and graph mode PiperOrigin-RevId: 324251337 Change-Id: I5945713530d5ed00e647db98be281e545bc73d09 --- tensorflow/c/kernels/ops/bitcast.cc | 38 +++++++++---------- .../python/kernel_tests/bitcast_op_test.py | 17 +++++---- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/tensorflow/c/kernels/ops/bitcast.cc b/tensorflow/c/kernels/ops/bitcast.cc index 3ba56411c38..0bc9fe86f10 100644 --- a/tensorflow/c/kernels/ops/bitcast.cc +++ b/tensorflow/c/kernels/ops/bitcast.cc @@ -22,8 +22,19 @@ limitations under the License. #include "tensorflow/core/platform/macros.h" static void ComputeNewShape(TF_ShapeInferenceContext* ctx, - TF_ShapeHandle* shape, size_t input_type_size, - size_t output_type_size, TF_Status* status) { + TF_ShapeHandle* shape, TF_DataType input_type, + TF_DataType output_type, TF_Status* status) { + size_t input_type_size = TF_DataTypeSize(input_type); + size_t output_type_size = TF_DataTypeSize(output_type); + + if (input_type_size == 0 || output_type_size == 0) { + std::ostringstream err; + err << "Cannot bitcast type " << input_type << " to " << output_type + << " because one of the type sizes is zero"; + TF_SetStatus(status, TF_INVALID_ARGUMENT, err.str().c_str()); + return; + } + TF_SetStatus(status, TF_OK, ""); if (input_type_size < output_type_size) { TF_ShapeInferenceContextWithRankAtLeast(ctx, shape, 1, shape, status); @@ -37,9 +48,9 @@ static void ComputeNewShape(TF_ShapeInferenceContext* ctx, TF_ShapeInferenceContextSubshape(ctx, shape, 0, -1, shape, status); } else { std::ostringstream err; - err << "Cannot bitcast due to shape. " - << TF_DimensionHandleValue(last_dim) << " does not match " - << divisor_val; + err << "Cannot bitcast from " << input_type << " to " << output_type + << " due to shape. " << TF_DimensionHandleValue(last_dim) + << " does not match " << divisor_val; TF_SetStatus(status, TF_INVALID_ARGUMENT, err.str().c_str()); } TF_DeleteDimensionHandle(last_dim); @@ -78,23 +89,8 @@ static void bitcast_shape_inference_fn(TF_ShapeInferenceContext* ctx, TF_ShapeInferenceContext_GetAttrType(ctx, "type", &output_type, status); } - size_t input_type_size; - size_t output_type_size; - if (TF_GetCode(status) == TF_OK) { - input_type_size = TF_DataTypeSize(input_type); - output_type_size = TF_DataTypeSize(output_type); - - if (input_type_size == 0 || output_type_size == 0) { - std::ostringstream err; - err << "Cannot bitcast type " << input_type << " to " << output_type - << " because one of the type sizes is zero"; - TF_SetStatus(status, TF_INVALID_ARGUMENT, err.str().c_str()); - } - } - - if (TF_GetCode(status) == TF_OK) { - ComputeNewShape(ctx, result, input_type_size, output_type_size, status); + ComputeNewShape(ctx, result, input_type, output_type, status); } if (TF_GetCode(status) == TF_OK) { diff --git a/tensorflow/python/kernel_tests/bitcast_op_test.py b/tensorflow/python/kernel_tests/bitcast_op_test.py index 60ed92d2173..ed6d7799c7e 100644 --- a/tensorflow/python/kernel_tests/bitcast_op_test.py +++ b/tensorflow/python/kernel_tests/bitcast_op_test.py @@ -21,6 +21,8 @@ from __future__ import print_function import numpy as np from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.platform import test @@ -60,11 +62,11 @@ class BitcastTest(test.TestCase): shape = [3, 4] self._testBitcast(x, dtypes.int64, shape) - @test_util.run_deprecated_v1 def testErrors(self): x = np.zeros([1, 1], np.int8) datatype = dtypes.int32 - with self.assertRaisesRegex(ValueError, "Cannot bitcast due to shape"): + with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError), + "Cannot bitcast from 6 to 3"): array_ops.bitcast(x, datatype, None) def testEmpty(self): @@ -73,11 +75,12 @@ class BitcastTest(test.TestCase): shape = [4] self._testBitcast(x, datatype, shape) - @test_util.run_deprecated_v1 - def testUnknown(self): - x = array_ops.placeholder(dtypes.float32) - datatype = dtypes.int8 - array_ops.bitcast(x, datatype, None) + def testUnknownShape(self): + # Need to use placeholder for unknown shape + with ops.Graph().as_default(): + x = array_ops.placeholder(dtypes.float32) + datatype = dtypes.int8 + array_ops.bitcast(x, datatype, None) def testQuantizedType(self): shape = [3, 4] From a697dbc60433fc8378680716cef9ab363bf70568 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 31 Jul 2020 11:49:28 -0700 Subject: [PATCH 1866/2522] Qualify uses of std::string PiperOrigin-RevId: 324252304 Change-Id: I82a93ab00c2cba05ea0b7fe4fc3709f0bec30d93 --- tensorflow/core/framework/attr_value_util.h | 4 +- tensorflow/core/framework/device_base.h | 2 +- tensorflow/core/framework/function.h | 96 ++++++------- tensorflow/core/framework/log_memory.h | 16 +-- tensorflow/core/framework/node_def_util.h | 29 ++-- tensorflow/core/framework/op.h | 27 ++-- tensorflow/core/framework/op_def_builder.h | 16 +-- tensorflow/core/framework/op_def_util.h | 2 +- tensorflow/core/framework/op_kernel.h | 29 ++-- tensorflow/core/framework/op_segment.h | 11 +- tensorflow/core/framework/ops_util.h | 2 +- tensorflow/core/framework/rendezvous.h | 10 +- tensorflow/core/framework/resource_mgr.h | 126 +++++++++--------- tensorflow/core/framework/resource_var.h | 2 +- tensorflow/core/framework/session_state.h | 12 +- tensorflow/core/framework/shape_inference.h | 10 +- .../core/framework/tracking_allocator.h | 2 +- tensorflow/core/framework/variant.h | 32 ++--- .../core/framework/variant_encode_decode.h | 32 ++--- .../core/framework/variant_op_registry.h | 28 ++-- .../core/framework/variant_tensor_data.h | 28 ++-- 21 files changed, 270 insertions(+), 246 deletions(-) diff --git a/tensorflow/core/framework/attr_value_util.h b/tensorflow/core/framework/attr_value_util.h index 966e716e39a..094c007d20b 100644 --- a/tensorflow/core/framework/attr_value_util.h +++ b/tensorflow/core/framework/attr_value_util.h @@ -36,7 +36,7 @@ class NameAttrList; // A human-readable rendering of attr_value, that is more concise than a // text-format proto. -string SummarizeAttrValue(const AttrValue& attr_value); +std::string SummarizeAttrValue(const AttrValue& attr_value); // Generates an error if attr_value doesn't have the indicated attr type. Status AttrValueHasType(const AttrValue& attr_value, StringPiece type); @@ -51,7 +51,7 @@ Status AttrValueHasType(const AttrValue& attr_value, StringPiece type); bool ParseAttrValue(StringPiece type, StringPiece text, AttrValue* out); // Sets *out based on the type of value. -void SetAttrValue(const string& value, AttrValue* out); +void SetAttrValue(const std::string& value, AttrValue* out); void SetAttrValue(const tstring& value, AttrValue* out); void SetAttrValue(const char* value, AttrValue* out); void SetAttrValue(StringPiece value, AttrValue* out); diff --git a/tensorflow/core/framework/device_base.h b/tensorflow/core/framework/device_base.h index 3415c7f23fc..fabb0b24a93 100644 --- a/tensorflow/core/framework/device_base.h +++ b/tensorflow/core/framework/device_base.h @@ -237,7 +237,7 @@ class DeviceBase { // Unimplemented by default virtual const DeviceAttributes& attributes() const; virtual int NumaNode() const { return attributes().locality().numa_node(); } - virtual const string& name() const; + virtual const std::string& name() const; // Materializes the given TensorProto into 'tensor' stored in Device // memory. Most devices will want to override this. diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h index 03da4dffa7f..95f733d23a6 100644 --- a/tensorflow/core/framework/function.h +++ b/tensorflow/core/framework/function.h @@ -114,9 +114,9 @@ class FunctionDefHelper { // Constructs an AttrValue.func given the "name" and "attrs". static AttrValueWrapper FunctionRef( - const string& name, + const std::string& name, gtl::ArraySlice> attrs); - static AttrValueWrapper FunctionRef(const string& name) { + static AttrValueWrapper FunctionRef(const std::string& name) { return FunctionRef(name, {}); } @@ -127,11 +127,11 @@ class FunctionDefHelper { // When constructing a NodeDef, the first entry in ret is used as // the node name, the remaining values are ignored. std::vector ret; - string op; + std::string op; std::vector arg; std::vector> attr; std::vector dep; - string device; + std::string device; NodeDef ToNodeDef() const; }; @@ -143,7 +143,7 @@ class FunctionDefHelper { // - `control_ret_def` holds a mapping from the function control // output names to the nodes from `node_def`. static FunctionDef Create( - const string& function_name, gtl::ArraySlice in_def, + const std::string& function_name, gtl::ArraySlice in_def, gtl::ArraySlice out_def, gtl::ArraySlice attr_def, gtl::ArraySlice node_def, gtl::ArraySlice> ret_def, @@ -153,7 +153,7 @@ class FunctionDefHelper { // function encoding (node_name:output_name[:output_index]). // - `ret_def` holds a mapping from the function output names from `out_def` // to the node outputs from `node_def`. - static FunctionDef Create(const string& function_name, + static FunctionDef Create(const std::string& function_name, gtl::ArraySlice in_def, gtl::ArraySlice out_def, gtl::ArraySlice attr_def, @@ -161,7 +161,7 @@ class FunctionDefHelper { gtl::ArraySlice> ret_def); // TODO(josh11b): Get rid of these and transition to the one above. - static FunctionDef Define(const string& function_name, + static FunctionDef Define(const std::string& function_name, gtl::ArraySlice arg_def, gtl::ArraySlice ret_def, gtl::ArraySlice attr_def, @@ -175,7 +175,7 @@ class FunctionDefHelper { // Helpers to construct a constant scalar. template - static Node Const(const string& name, const T& val) { + static Node Const(const std::string& name, const T& val) { Node n = {{name}, "Const"}; const DataType dtype = DataTypeToEnum::value; n.attr.push_back({"dtype", dtype}); @@ -186,7 +186,7 @@ class FunctionDefHelper { } template - static Node Const(const string& name, gtl::ArraySlice vals) { + static Node Const(const std::string& name, gtl::ArraySlice vals) { Node n = {{name}, "Const"}; const DataType dtype = DataTypeToEnum::value; n.attr.push_back({"dtype", dtype}); @@ -207,7 +207,7 @@ inline FunctionDefHelper::AttrValueWrapper::AttrValueWrapper(const char* val) { template <> inline FunctionDefHelper::AttrValueWrapper::AttrValueWrapper( - const string& val) { + const std::string& val) { InitFromString(val); } @@ -251,13 +251,13 @@ Status InstantiateFunction(const FunctionDef& fdef, AttrSlice attr_values, // Particularly, it may not include all information presented in // "func_def" (e.g., comments, description of the function arguments, // etc.) -string DebugString(const FunctionDef& func_def); -string DebugString(const GraphDef& instantiated_func_def); -string DebugString(gtl::ArraySlice instantiated_func_nodes); +std::string DebugString(const FunctionDef& func_def); +std::string DebugString(const GraphDef& instantiated_func_def); +std::string DebugString(gtl::ArraySlice instantiated_func_nodes); // Returns a debug string for a top level graph (the main program and // its supporting functions defined in its library). -string DebugStringWhole(const GraphDef& gdef); +std::string DebugStringWhole(const GraphDef& gdef); // Returns true if f1 == f2. Compares all fields, including descriptions. Order // of NodeDefs doesn't matter. @@ -360,14 +360,14 @@ class FunctionLibraryDefinition : public OpRegistryInterface { delete; // Returns True if the library contains `func`, False otherwise. - bool Contains(const string& func) const; + bool Contains(const std::string& func) const; // Returns nullptr if "func" is not defined in "lib_def". Otherwise, // returns its definition proto. // // NB: This function returns a borrowed pointer, which can be invalidated by a // subsequent call to `ReplaceFunction()` with the given name. - const FunctionDef* Find(const string& func) const TF_LOCKS_EXCLUDED(mu_); + const FunctionDef* Find(const std::string& func) const TF_LOCKS_EXCLUDED(mu_); // Adds function definition 'fdef' to this function library. // Returns status 'ok' on success, or error otherwise. This is a no-op if @@ -388,7 +388,7 @@ class FunctionLibraryDefinition : public OpRegistryInterface { // a non-OK status if "func" was not found in the library, OK otherwise. // Please be careful when replacing function: make sure all previous pointers // returned by `Find()` are no longer in use. - Status ReplaceFunction(const string& func, const FunctionDef& fdef) + Status ReplaceFunction(const std::string& func, const FunctionDef& fdef) TF_LOCKS_EXCLUDED(mu_); // Replaces the gradient corresponding to `grad.function_name()`. Returns @@ -401,7 +401,7 @@ class FunctionLibraryDefinition : public OpRegistryInterface { // Please be careful when removing function: make sure there are no other // nodes using the function, and all previous pointers returned by `Find()` // are no longer in use. - Status RemoveFunction(const string& func) TF_LOCKS_EXCLUDED(mu_); + Status RemoveFunction(const std::string& func) TF_LOCKS_EXCLUDED(mu_); // Adds the functions and gradients in 'other' to this function library. // Duplicate functions and gradients are ignored. @@ -417,7 +417,8 @@ class FunctionLibraryDefinition : public OpRegistryInterface { // If the gradient function for 'func' is specified explicitly in // the library, returns the gradient function name. Otherwise, // returns an empty string. - string FindGradient(const string& func) const TF_LOCKS_EXCLUDED(mu_); + std::string FindGradient(const std::string& func) const + TF_LOCKS_EXCLUDED(mu_); // OpRegistryInterface method. Useful for constructing a Graph. // @@ -427,26 +428,27 @@ class FunctionLibraryDefinition : public OpRegistryInterface { // // NB: This function outputs a borrowed pointer, which can be invalidated by a // subsequent call to `ReplaceFunction()` with the given name. - Status LookUp(const string& op_type_name, + Status LookUp(const std::string& op_type_name, const OpRegistrationData** op_reg_data) const override TF_LOCKS_EXCLUDED(mu_); // Generates new function name with the specified prefix that is unique // across this library. - string UniqueFunctionName(StringPiece prefix) const TF_LOCKS_EXCLUDED(mu_); + std::string UniqueFunctionName(StringPiece prefix) const + TF_LOCKS_EXCLUDED(mu_); // Given a node def 'ndef', inspects attributes of the callee // function to derive the attribute 'value' for 'attr'. Returns OK // iff the attribute is given by the function's definition. // TODO(irving): Remove; keep only the const Node& version. template - Status GetAttr(const NodeDef& ndef, const string& attr, T* value) const; + Status GetAttr(const NodeDef& ndef, const std::string& attr, T* value) const; // Given a node, inspects attributes of the callee function to derive the // attribute 'value' for 'attr'. Returns OK iff the attribute is given by the // function's definition. template - Status GetAttr(const Node& node, const string& attr, T* value) const; + Status GetAttr(const Node& node, const std::string& attr, T* value) const; // Returns a proto representation of the state of this function library. FunctionDefLibrary ToProto() const TF_LOCKS_EXCLUDED(mu_); @@ -475,7 +477,7 @@ class FunctionLibraryDefinition : public OpRegistryInterface { // name `func` already exists in this function library, and has the same // implementation as in `other`. If the implementations conflict, an invalid // argument error is returned. - Status CopyFunctionDefFrom(const string& func, + Status CopyFunctionDefFrom(const std::string& func, const FunctionLibraryDefinition& other) TF_LOCKS_EXCLUDED(mu_); @@ -491,7 +493,7 @@ class FunctionLibraryDefinition : public OpRegistryInterface { std::shared_ptr FindHelper( const string& func) const TF_SHARED_LOCKS_REQUIRED(mu_); - string FindGradientHelper(const string& func) const + std::string FindGradientHelper(const std::string& func) const TF_SHARED_LOCKS_REQUIRED(mu_); Status AddHelper(std::shared_ptr registration, @@ -518,12 +520,13 @@ class FunctionLibraryDefinition : public OpRegistryInterface { // Remove `func` from the library. Returns non-OK Status unless `func` is in // the library. This should only be called when there is a guarantee that the // function being removed hasn't been retrieved with `Find`. - Status RemoveFunctionHelper(const string& func) + Status RemoveFunctionHelper(const std::string& func) TF_EXCLUSIVE_LOCKS_REQUIRED(mu_); // Remove gradient of function `func` from the library. Returns non-OK Status // unless `func` has a gradient. - Status RemoveGradient(const string& func) TF_EXCLUSIVE_LOCKS_REQUIRED(mu_); + Status RemoveGradient(const std::string& func) + TF_EXCLUSIVE_LOCKS_REQUIRED(mu_); mutable mutex mu_; const OpRegistryInterface* const default_registry_; @@ -566,7 +569,7 @@ class FunctionLibraryRuntime { // The canonical device name of the device on which the function // should be instantiated. If empty, the function will be // instantiated on the local device. - string target; + std::string target; // Should the function be instantiated as a multi-device function? bool is_multi_device_function = false; @@ -640,13 +643,13 @@ class FunctionLibraryRuntime { // `state_handle` will have the same handle and share the same // state (in stateful kernels); and two functions with different // values for `state_handle` will have independent state. - string state_handle; + std::string state_handle; // This interface is EXPERIMENTAL and subject to change. // // Instantiates the function using an executor of the given type. If empty, // the default TensorFlow executor will be used. - string executor_type; + std::string executor_type; // If true, the runtime will attempt to create kernels for the function at // instantiation time, rather than on the first run. This can be used to @@ -680,10 +683,10 @@ class FunctionLibraryRuntime { bool include_optimized_graph_in_debug_string = false; }; typedef uint64 Handle; - virtual Status Instantiate(const string& function_name, AttrSlice attrs, + virtual Status Instantiate(const std::string& function_name, AttrSlice attrs, const InstantiateOptions& options, Handle* handle) = 0; - Status Instantiate(const string& function_name, AttrSlice attrs, + Status Instantiate(const std::string& function_name, AttrSlice attrs, Handle* handle) { auto opts = absl::make_unique(); return Instantiate(function_name, attrs, *opts, handle); @@ -738,7 +741,7 @@ class FunctionLibraryRuntime { // Parameters for remote function execution. bool remote_execution = false; - string source_device = ""; // Fully specified device name. + std::string source_device = ""; // Fully specified device name. // Allocator attributes specifying where the args are / rets should be put. // These should either be {} or match the length of args / retvals. If {}, @@ -758,7 +761,7 @@ class FunctionLibraryRuntime { bool run_all_kernels_inline = false; // Returns a human readable representation of this. - string DebugString() const; + std::string DebugString() const; }; typedef std::function DoneCallback; virtual void Run(const Options& opts, Handle handle, @@ -786,7 +789,7 @@ class FunctionLibraryRuntime { // NOTE(mrry): This method assumes that the runtime is associated with a // default function library, and looks up `function_name` in that library. // It does not support overriding the function library. - virtual bool IsStateful(const string& function_name) const = 0; + virtual bool IsStateful(const std::string& function_name) const = 0; // Returns the device on which the function executes. virtual Device* device() = 0; @@ -817,7 +820,7 @@ class FunctionLibraryRuntime { // Returns a debug string showing the definition of the function of // 'handle'. - virtual string DebugString(Handle handle) = 0; + virtual std::string DebugString(Handle handle) = 0; // Returns the graph version number. virtual int graph_def_version() const = 0; @@ -847,13 +850,13 @@ class FunctionLibraryRuntime { // `ExecutorFactory::GetFactory()`) that will be used based on the given // dynamic `options` and static `attrs`. If none is specified, this method // will return an empty string, which leaves the decision up to the runtime. - static string ExecutorType(const InstantiateOptions& options, - AttrSlice attrs); + static std::string ExecutorType(const InstantiateOptions& options, + AttrSlice attrs); }; // Returns the device of the `arg_index`-th function input. Update // `composite_devices` if the input device is a composite device. -string GetFunctionResourceInputDevice( +std::string GetFunctionResourceInputDevice( const Tensor& input, const int arg_index, const FunctionDef& function_def, absl::flat_hash_map>* composite_devices); @@ -864,9 +867,10 @@ string GetFunctionResourceInputDevice( // space. But it may be change as the implementation // evolves. Therefore, it should not be persisted or compared across // address spaces. -string Canonicalize(const string& funcname, AttrSlice attrs, - const FunctionLibraryRuntime::InstantiateOptions& options); -string Canonicalize(const string& funcname, AttrSlice attrs); +std::string Canonicalize( + const std::string& funcname, AttrSlice attrs, + const FunctionLibraryRuntime::InstantiateOptions& options); +std::string Canonicalize(const std::string& funcname, AttrSlice attrs); const FunctionLibraryRuntime::Handle kInvalidHandle = -1; const FunctionLibraryRuntime::LocalHandle kInvalidLocalHandle = -1; @@ -907,8 +911,8 @@ class DistributedFunctionLibraryRuntime { // local `handle` is filled for the instantiated function data and can be used // for subsequent run function calls on the remote target. virtual void Instantiate( - const string& function_name, const FunctionLibraryDefinition& lib_def, - AttrSlice attrs, + const std::string& function_name, + const FunctionLibraryDefinition& lib_def, AttrSlice attrs, const FunctionLibraryRuntime::InstantiateOptions& options, FunctionLibraryRuntime::LocalHandle* handle, FunctionLibraryRuntime::DoneCallback done) = 0; @@ -1022,11 +1026,11 @@ Status ArgNumType(AttrSlice attrs, const OpDef::ArgDef& arg_def, namespace gradient { // Register a gradient creator for the "op". typedef std::function Creator; -bool RegisterOp(const string& op, Creator func); +bool RegisterOp(const std::string& op, Creator func); // Returns OK the gradient creator for the "op" is found (may be // nullptr if REGISTER_OP_NO_GRADIENT is used. -Status GetOpGradientCreator(const string& op, Creator* creator); +Status GetOpGradientCreator(const std::string& op, Creator* creator); }; // namespace gradient // Declare explicit instantiations of GetAttr diff --git a/tensorflow/core/framework/log_memory.h b/tensorflow/core/framework/log_memory.h index 1b926ddaa3f..e714c742b43 100644 --- a/tensorflow/core/framework/log_memory.h +++ b/tensorflow/core/framework/log_memory.h @@ -52,14 +52,14 @@ class LogMemory { UNKNOWN_STEP_ID = -6, }; - static const string kLogMemoryLabel; + static const std::string kLogMemoryLabel; // Test to see if memory logging is enabled. For now, logging is // enabled whenever VLOG_IS_ON(1) for the log_memory module. static bool IsEnabled(); // Log the beginning of a step. - static void RecordStep(int64 step_id, const string& handle); + static void RecordStep(int64 step_id, const std::string& handle); // Log a tensor buffer allocation. The name indicates which kernel // made the allocation. If the allocation is made through an @@ -67,8 +67,8 @@ class LogMemory { // otherwise step_id is one of the SpecialStepIds defined in // op_kernel.h, e.g. Op Kernel construction or an optimization pass // such as constant folding. - static void RecordTensorAllocation(const string& kernel_name, int64 step_id, - const Tensor& tensor); + static void RecordTensorAllocation(const std::string& kernel_name, + int64 step_id, const Tensor& tensor); // Log a tensor buffer deallocation. The deallocation is triggered // when the buffer's refcount falls to zero, and the tracking @@ -77,10 +77,10 @@ class LogMemory { // corresponding tensor previously passed in to // RecordTensorAllocation. static void RecordTensorDeallocation(int64 allocation_id, - const string& allocator_name); + const std::string& allocator_name); // Log the use of a tensor as an output from a kernel. - static void RecordTensorOutput(const string& kernel_name, int64 step_id, + static void RecordTensorOutput(const std::string& kernel_name, int64 step_id, int index, const Tensor& tensor); // Log a "raw" allocation, which is just a buffer sized in @@ -92,7 +92,7 @@ class LogMemory { // is executing, otherwise step_id is one of the SpecialStepIds // defined in op_kernel.h, e.g. Op Kernel construction or an // optimization pass such as constant folding. - static void RecordRawAllocation(const string& operation, int64 step_id, + static void RecordRawAllocation(const std::string& operation, int64 step_id, size_t num_bytes, void* ptr, Allocator* allocator); @@ -101,7 +101,7 @@ class LogMemory { // enqueued using the buffer. A deferred deallocation should always // be followed by a matching non-deferred deallocation when the // buffer is actually returned and can be reused. - static void RecordRawDeallocation(const string& operation, int64 step_id, + static void RecordRawDeallocation(const std::string& operation, int64 step_id, void* ptr, Allocator* allocator, bool deferred); }; diff --git a/tensorflow/core/framework/node_def_util.h b/tensorflow/core/framework/node_def_util.h index d937a8e51e1..d1a7c9aebba 100644 --- a/tensorflow/core/framework/node_def_util.h +++ b/tensorflow/core/framework/node_def_util.h @@ -62,16 +62,16 @@ extern const char* const kColocationGroupPrefix; // The parameter `max_inputs_in_summary` specifies how many inputs at most to // serialize in the output (in order not to get a string which is overly large). // The value `-1` specifies that all inputs will be shown. -string SummarizeNodeDef(const NodeDef& node_def, - int max_inputs_in_summary = -1); -string SummarizeAttrs(const NodeDef& node_def); -string SummarizeAttrsHelper(AttrSlice attrs, StringPiece device); +std::string SummarizeNodeDef(const NodeDef& node_def, + int max_inputs_in_summary = -1); +std::string SummarizeAttrs(const NodeDef& node_def); +std::string SummarizeAttrsHelper(AttrSlice attrs, StringPiece device); // Produces a formatted string pattern from the node which can uniquely identify // this node upstream to produce an informative error message. The pattern // followed is: {{node }} -string FormatNodeDefForError(const NodeDef& node_def); -string FormatNodeDefForError( +std::string FormatNodeDefForError(const NodeDef& node_def); +std::string FormatNodeDefForError( StringPiece node_name, bool has_experimental_debug_info, const NodeDef_ExperimentalDebugInfo& experimental_debug_info); @@ -148,7 +148,7 @@ class AttrSlice { // Returns the attr with attr_name if found. Otherwise, returns // nullptr. const AttrValue* Find(StringPiece attr_name) const; - const AttrValue* FindByString(const string& attr_name) const; + const AttrValue* FindByString(const std::string& attr_name) const; // Returns the attr_value for attr_name if found. Otherwise, returns a // NotFound status. @@ -157,8 +157,8 @@ class AttrSlice { // Helper class to avoid allocations in EqualAttrs. // TODO(irving): Will go away once NodeInfo is used. struct Scratch { - string a; - string b; + std::string a; + std::string b; }; // Check if all attrs and attr values match. Does not take defaults into @@ -175,13 +175,13 @@ class AttrSlice { // If this AttrSlice has an attached NodeDef, summarize it. This is for // error messages only: we intentionally do not provide direct access to the // NodeDef, since it is not always there. - string SummarizeNode() const; + std::string SummarizeNode() const; // Iteration over all attrs AttrValueMap::const_iterator begin() const { return attrs_->begin(); } AttrValueMap::const_iterator end() const { return attrs_->end(); } - string DebugString() const; + std::string DebugString() const; private: const NodeDef* ndef_; @@ -195,7 +195,7 @@ bool HasNodeAttr(const NodeDef& node_def, StringPiece attr_name); // attr with attr_name is found in node_def, or the attr does not have // a matching type, a non-ok status will be returned. Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name, - string* value); // type: "string" + std::string* value); // type: "string" Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name, tstring* value); // type: "tstring" Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name, @@ -266,7 +266,7 @@ Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name, // attr with attr_name is found in node_def, or the attr does not have // a matching type, false is returned. bool TryGetNodeAttr(const AttrSlice& attrs, StringPiece attr_name, - string* value); // type: "string" + std::string* value); // type: "string" bool TryGetNodeAttr(const AttrSlice& attrs, StringPiece attr_name, int64* value); // type: "int" bool TryGetNodeAttr(const AttrSlice& attrs, StringPiece attr_name, @@ -309,7 +309,8 @@ bool TryGetNodeAttr( // If no attr with attr_name is found in node_def, or the attr does not have // a matching type, a reference to an empty string is returned. // REQUIRES: Must not use the returned value beyond the lifetime of node_def. -const string& GetNodeAttrString(const AttrSlice& attrs, StringPiece attr_name); +const std::string& GetNodeAttrString(const AttrSlice& attrs, + StringPiece attr_name); // Specialization to parse an attribute directly into a Padding enum. Status GetNodeAttr(const AttrSlice& attrs, StringPiece attr_name, diff --git a/tensorflow/core/framework/op.h b/tensorflow/core/framework/op.h index 86bc70448d2..adc52d963c9 100644 --- a/tensorflow/core/framework/op.h +++ b/tensorflow/core/framework/op.h @@ -45,11 +45,12 @@ class OpRegistryInterface { // Returns an error status and sets *op_reg_data to nullptr if no OpDef is // registered under that name, otherwise returns the registered OpDef. // Caller must not delete the returned pointer. - virtual Status LookUp(const string& op_type_name, + virtual Status LookUp(const std::string& op_type_name, const OpRegistrationData** op_reg_data) const = 0; // Shorthand for calling LookUp to get the OpDef. - Status LookUpOpDef(const string& op_type_name, const OpDef** op_def) const; + Status LookUpOpDef(const std::string& op_type_name, + const OpDef** op_def) const; }; // The standard implementation of OpRegistryInterface, along with a @@ -71,11 +72,11 @@ class OpRegistry : public OpRegistryInterface { void Register(const OpRegistrationDataFactory& op_data_factory); - Status LookUp(const string& op_type_name, + Status LookUp(const std::string& op_type_name, const OpRegistrationData** op_reg_data) const override; // Returns OpRegistrationData* of registered op type, else returns nullptr. - const OpRegistrationData* LookUp(const string& op_type_name) const; + const OpRegistrationData* LookUp(const std::string& op_type_name) const; // Fills *ops with all registered OpDefs (except those with names // starting with '_' if include_internal == false) sorted in @@ -84,7 +85,7 @@ class OpRegistry : public OpRegistryInterface { // Returns ASCII-format OpList for all registered OpDefs (except // those with names starting with '_' if include_internal == false). - string DebugString(bool include_internal) const; + std::string DebugString(bool include_internal) const; // A singleton available at startup. static OpRegistry* Global(); @@ -153,7 +154,7 @@ class OpRegistry : public OpRegistryInterface { Status RegisterAlreadyLocked(const OpRegistrationDataFactory& op_data_factory) const TF_EXCLUSIVE_LOCKS_REQUIRED(mu_); - const OpRegistrationData* LookUpSlow(const string& op_type_name) const; + const OpRegistrationData* LookUpSlow(const std::string& op_type_name) const; mutable mutex mu_; // Functions in deferred_ may only be called with mu_ held. @@ -179,11 +180,11 @@ class OpListOpRegistry : public OpRegistryInterface { // Does not take ownership of op_list, *op_list must outlive *this. explicit OpListOpRegistry(const OpList* op_list); ~OpListOpRegistry() override; - Status LookUp(const string& op_type_name, + Status LookUp(const std::string& op_type_name, const OpRegistrationData** op_reg_data) const override; // Returns OpRegistrationData* of op type in list, else returns nullptr. - const OpRegistrationData* LookUp(const string& op_type_name) const; + const OpRegistrationData* LookUp(const std::string& op_type_name) const; private: // Values are owned. @@ -225,15 +226,15 @@ template <> class OpDefBuilderWrapper { public: explicit OpDefBuilderWrapper(const char name[]) : builder_(name) {} - OpDefBuilderWrapper& Attr(string spec) { + OpDefBuilderWrapper& Attr(std::string spec) { builder_.Attr(std::move(spec)); return *this; } - OpDefBuilderWrapper& Input(string spec) { + OpDefBuilderWrapper& Input(std::string spec) { builder_.Input(std::move(spec)); return *this; } - OpDefBuilderWrapper& Output(string spec) { + OpDefBuilderWrapper& Output(std::string spec) { builder_.Output(std::move(spec)); return *this; } @@ -259,11 +260,11 @@ class OpDefBuilderWrapper { builder_.SetAllowsUninitializedInput(); return *this; } - OpDefBuilderWrapper& Deprecated(int version, string explanation) { + OpDefBuilderWrapper& Deprecated(int version, std::string explanation) { builder_.Deprecated(version, std::move(explanation)); return *this; } - OpDefBuilderWrapper& Doc(string text) { + OpDefBuilderWrapper& Doc(std::string text) { builder_.Doc(std::move(text)); return *this; } diff --git a/tensorflow/core/framework/op_def_builder.h b/tensorflow/core/framework/op_def_builder.h index aab0c63636e..b69ee46cd59 100644 --- a/tensorflow/core/framework/op_def_builder.h +++ b/tensorflow/core/framework/op_def_builder.h @@ -53,7 +53,7 @@ struct OpRegistrationData { class OpDefBuilder { public: // Constructs an OpDef with just the name field set. - explicit OpDefBuilder(string op_name); + explicit OpDefBuilder(std::string op_name); // Adds an attr to this OpDefBuilder (and returns *this). The spec has // format ":" or ":=" @@ -86,7 +86,7 @@ class OpDefBuilder { // * Ability to restrict the type of the tensor like the existing // restrictions for type attrs. // Perhaps by linking the type of the tensor to a type attr? - OpDefBuilder& Attr(string spec); + OpDefBuilder& Attr(std::string spec); // Adds an input or output to this OpDefBuilder (and returns *this). // The spec has form ":" or ":Ref()" @@ -103,8 +103,8 @@ class OpDefBuilder { // in the spec? // TODO(josh11b): SparseInput() and SparseOutput() matching the Python // handling? - OpDefBuilder& Input(string spec); - OpDefBuilder& Output(string spec); + OpDefBuilder& Input(std::string spec); + OpDefBuilder& Output(std::string spec); // Turns on the indicated boolean flag in this OpDefBuilder (and // returns *this). @@ -114,7 +114,7 @@ class OpDefBuilder { OpDefBuilder& SetAllowsUninitializedInput(); // Deprecate the op at a certain GraphDef version. - OpDefBuilder& Deprecated(int version, string explanation); + OpDefBuilder& Deprecated(int version, std::string explanation); // Adds docs to this OpDefBuilder (and returns *this). // Docs have the format: @@ -130,7 +130,7 @@ class OpDefBuilder { // to suppress the automatically-generated type documentation in // generated output. #ifndef TF_LEAN_BINARY - OpDefBuilder& Doc(string text); + OpDefBuilder& Doc(std::string text); #else OpDefBuilder& Doc(string text) { return *this; } #endif @@ -157,7 +157,7 @@ class OpDefBuilder { // Adds control output to this OpDefBuilder (and returns *this). // The must be a valid node name (matches regexp // [a-zA-Z][a-zA-Z0-9_]*). Named control output can only exist for functions. - OpDefBuilder& ControlOutput(string name); + OpDefBuilder& ControlOutput(std::string name); OpDef* op_def() { return &op_reg_data_.op_def; } @@ -166,7 +166,7 @@ class OpDefBuilder { std::vector inputs_; std::vector outputs_; std::vector control_outputs_; - string doc_; + std::string doc_; std::vector errors_; }; diff --git a/tensorflow/core/framework/op_def_util.h b/tensorflow/core/framework/op_def_util.h index 311e40afeea..4a4a2e8e897 100644 --- a/tensorflow/core/framework/op_def_util.h +++ b/tensorflow/core/framework/op_def_util.h @@ -54,7 +54,7 @@ const ApiDef::Arg* FindInputArg(StringPiece name, const ApiDef& api_def); // Produce a human-readable version of an op_def that is more concise // than a text-format proto. Excludes descriptions. -string SummarizeOpDef(const OpDef& op_def); +std::string SummarizeOpDef(const OpDef& op_def); // Returns an error if new_op is not backwards-compatible with (more // accepting than) old_op. diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h index b4302999511..3bfcedaee82 100644 --- a/tensorflow/core/framework/op_kernel.h +++ b/tensorflow/core/framework/op_kernel.h @@ -145,14 +145,16 @@ class OpKernel { // Accessors. const NodeDef& def() const { return props_->node_def; } - const string& name() const { return props_->node_def.name(); } + const std::string& name() const { return props_->node_def.name(); } absl::string_view name_view() const { return name_view_; } - const string& type_string() const { return props_->node_def.op(); } + const std::string& type_string() const { return props_->node_def.op(); } absl::string_view type_string_view() const { return type_string_view_; } - const string& requested_input(int i) const { + const std::string& requested_input(int i) const { return props_->node_def.input(i); } - const string& requested_device() const { return props_->node_def.device(); } + const std::string& requested_device() const { + return props_->node_def.device(); + } int num_inputs() const { return props_->input_types.size(); } DataType input_type(int i) const { return props_->input_types[i]; } @@ -177,10 +179,11 @@ class OpKernel { // Returns a trace string for current computation, op name/type and input // tensor shape/dtype are encoded for profiler cost analysis. Most OpKernel // should use the default implementation. - virtual string TraceString(const OpKernelContext& ctx, bool verbose) const; + virtual std::string TraceString(const OpKernelContext& ctx, + bool verbose) const; protected: - string ShapeTraceString(const OpKernelContext& ctx) const; + std::string ShapeTraceString(const OpKernelContext& ctx) const; private: const std::shared_ptr props_; @@ -652,7 +655,7 @@ class OpKernelContext { SessionState* session_state = nullptr; // Unique session identifier. Can be empty. - string session_handle; + std::string session_handle; // Metadata about the session. Can be nullptr. const SessionMetadata* session_metadata = nullptr; @@ -684,7 +687,7 @@ class OpKernelContext { StepStatsCollectorInterface* stats_collector = nullptr; GraphCollector* graph_collector = nullptr; bool run_all_kernels_inline = false; - const string* executor_type = nullptr; + const std::string* executor_type = nullptr; // TensorSliceReaderCache support. checkpoint::TensorSliceReaderCacheWrapper* slice_reader_cache = nullptr; @@ -826,7 +829,7 @@ class OpKernelContext { // Returns the registered name for the executor type that is executing the // current kernel. If empty, the default executor is used. - const string& executor_type() const; + const std::string& executor_type() const; // Input to output forwarding. @@ -1100,7 +1103,7 @@ class OpKernelContext { SessionState* session_state() const { return params_->session_state; } // Unique identifier of the session it belongs to. Can be empty. - string session_handle() const { return params_->session_handle; } + std::string session_handle() const { return params_->session_handle; } // Metadata about the session. Can be nullptr. const SessionMetadata* session_metadata() const { @@ -1405,7 +1408,7 @@ Status SupportedDeviceTypesForNode( // Returns a message with a description of the kernels registered for op // `op_name`. -string KernelsRegisteredForOp(StringPiece op_name); +std::string KernelsRegisteredForOp(StringPiece op_name); // Call once after Op registration has completed. Status ValidateKernelRegistrations(const OpRegistryInterface& op_registry); @@ -1497,13 +1500,13 @@ Status FindKernelDef( bool has_experimental_debug_info, const NodeDef_ExperimentalDebugInfo& experimental_debug_info, StringPiece node_op, StringPiece node_device, AttrSlice node_attrs, - const KernelDef** def, string* kernel_class_name); + const KernelDef** def, std::string* kernel_class_name); // If node_def has a corresponding kernel registered on device_type, // returns OK and fill in the kernel def and kernel_class_name. and // may be null. Status FindKernelDef(const DeviceType& device_type, const NodeDef& node_def, - const KernelDef** def, string* kernel_class_name); + const KernelDef** def, std::string* kernel_class_name); // Writes a list of all registered kernels to LOG(INFO), to help users debug // missing kernel errors. diff --git a/tensorflow/core/framework/op_segment.h b/tensorflow/core/framework/op_segment.h index ab3ef6009b3..9a6f6e9664b 100644 --- a/tensorflow/core/framework/op_segment.h +++ b/tensorflow/core/framework/op_segment.h @@ -46,8 +46,8 @@ class OpSegment { // A hold can be placed on a session, preventing all its kernels // from being deleted. - void AddHold(const string& session_handle); - void RemoveHold(const string& session_handle); + void AddHold(const std::string& session_handle); + void RemoveHold(const std::string& session_handle); // If the kernel for "node_name" has been created in the // "session_handle", returns the existing op kernel in "*kernel". @@ -57,12 +57,13 @@ class OpSegment { // // OpSegment keeps the ownership of the returned "*kernel". typedef std::function CreateKernelFn; - Status FindOrCreate(const string& session_handle, const string& node_name, - OpKernel** kernel, CreateKernelFn create_fn); + Status FindOrCreate(const std::string& session_handle, + const std::string& node_name, OpKernel** kernel, + CreateKernelFn create_fn); // Returns true if OpSegment should own the kernel. static bool ShouldOwnKernel(FunctionLibraryRuntime* lib, - const string& node_op); + const std::string& node_op); private: // op name -> OpKernel diff --git a/tensorflow/core/framework/ops_util.h b/tensorflow/core/framework/ops_util.h index b323109abfc..aaf2361cc9d 100644 --- a/tensorflow/core/framework/ops_util.h +++ b/tensorflow/core/framework/ops_util.h @@ -81,7 +81,7 @@ bool IsDim0SliceAligned(const TensorShape& s, int64 start, int64 end_or_size) { } // Returns sanitized to have only [a-zA-Z0-9-_]. -string SanitizeThreadSuffix(string suffix); +std::string SanitizeThreadSuffix(std::string suffix); // Helper to compute 'strides' given a tensor 'shape'. I.e., // strides[i] = prod(shape.dim_size[(i+1):]) diff --git a/tensorflow/core/framework/rendezvous.h b/tensorflow/core/framework/rendezvous.h index ccd6d102b5e..d59bbb2809e 100644 --- a/tensorflow/core/framework/rendezvous.h +++ b/tensorflow/core/framework/rendezvous.h @@ -74,7 +74,7 @@ class RendezvousInterface { friend class Rendezvous; friend class SendOp; friend class RecvOp; - string buf_; + std::string buf_; }; // The caller is a tensor producer and it sends a message (a tensor @@ -169,9 +169,11 @@ class Rendezvous : public RendezvousInterface, public core::RefCounted { // Constructs a rendezvous key for the tensor of "name" sent from // "src_device" to "dst_device". The tensor is generated in the frame // and iteration specified by "frame_iter". - static string CreateKey(const string& src_device, uint64 src_incarnation, - const string& dst_device, const string& name, - const FrameAndIter& frame_iter); + static std::string CreateKey(const std::string& src_device, + uint64 src_incarnation, + const std::string& dst_device, + const std::string& name, + const FrameAndIter& frame_iter); static Status ParseKey(StringPiece key, ParsedKey* out); }; diff --git a/tensorflow/core/framework/resource_mgr.h b/tensorflow/core/framework/resource_mgr.h index 3af8d81b0dc..758837e017a 100644 --- a/tensorflow/core/framework/resource_mgr.h +++ b/tensorflow/core/framework/resource_mgr.h @@ -79,7 +79,7 @@ namespace tensorflow { class ResourceBase : public core::RefCounted { public: // Returns a debug string for *this. - virtual string DebugString() const = 0; + virtual std::string DebugString() const = 0; // Returns memory used by this resource. virtual int64 MemoryUsed() const { return 0; } @@ -100,7 +100,7 @@ class ScopedStepContainer { ScopedStepContainer(const int64 step_id, std::function cleanup, - const string& prefix) + const std::string& prefix) : container_(strings::StrCat("__", prefix, "_per_step_", step_id)), cleanup_(cleanup), dirty_(false) {} @@ -125,25 +125,25 @@ class ScopedStepContainer { // Pass through to MakeResourceHandle with the container name template ResourceHandle MakeResourceHandle( - const string& name, const DeviceBase& device) TF_MUST_USE_RESULT; + const std::string& name, const DeviceBase& device) TF_MUST_USE_RESULT; // Pass through to ResourceMgr::Create with the container name template - Status Create(ResourceMgr* rm, const string& name, + Status Create(ResourceMgr* rm, const std::string& name, T* resource) TF_MUST_USE_RESULT; // Pass through to ResourceMgr::Delete with the container name template - Status Delete(ResourceMgr* rm, const string& name) TF_MUST_USE_RESULT; + Status Delete(ResourceMgr* rm, const std::string& name) TF_MUST_USE_RESULT; // Pass through to ResourceMgr::Lookup with the container name template - Status Lookup(ResourceMgr* rm, const string& name, + Status Lookup(ResourceMgr* rm, const std::string& name, T** resource) const TF_MUST_USE_RESULT; // Pass through to ResourceMgr::LookupOrCreate with the container name template - Status LookupOrCreate(ResourceMgr* rm, const string& name, T** resource, + Status LookupOrCreate(ResourceMgr* rm, const std::string& name, T** resource, std::function creator) TF_MUST_USE_RESULT; private: - const string container_; + const std::string container_; const std::function cleanup_; mutex mu_; mutable std::atomic dirty_ TF_GUARDED_BY(mu_); @@ -152,11 +152,11 @@ class ScopedStepContainer { class ResourceMgr { public: ResourceMgr(); - explicit ResourceMgr(const string& default_container); + explicit ResourceMgr(const std::string& default_container); ~ResourceMgr(); // Returns the default container name for *this. - const string& default_container() const { return default_container_; } + const std::string& default_container() const { return default_container_; } // Creates a resource "name" in the "container". The caller transfers // the ownership of one ref on "resource" to *this, regardless of whether this @@ -165,7 +165,7 @@ class ResourceMgr { // REQUIRES: std::is_base_of // REQUIRES: resource != nullptr. template - Status Create(const string& container, const string& name, + Status Create(const std::string& container, const std::string& name, T* resource) TF_MUST_USE_RESULT; // If "container" has a resource "name", returns it in "*resource" and @@ -174,7 +174,7 @@ class ResourceMgr { // REQUIRES: std::is_base_of // REQUIRES: resource != nullptr template - Status Lookup(const string& container, const string& name, + Status Lookup(const std::string& container, const std::string& name, T** resource) const TF_MUST_USE_RESULT; // Similar to Lookup, but looks up multiple resources at once, with only a @@ -197,7 +197,7 @@ class ResourceMgr { // REQUIRES: std::is_base_of // REQUIRES: resource != nullptr template - Status LookupOrCreate(const string& container, const string& name, + Status LookupOrCreate(const std::string& container, const std::string& name, T** resource, std::function creator) TF_MUST_USE_RESULT; @@ -205,19 +205,20 @@ class ResourceMgr { // // REQUIRES: std::is_base_of template - Status Delete(const string& container, const string& name) TF_MUST_USE_RESULT; + Status Delete(const std::string& container, + const std::string& name) TF_MUST_USE_RESULT; // Deletes the resource pointed by "handle". Status Delete(const ResourceHandle& handle) TF_MUST_USE_RESULT; // Deletes all resources from the "container" and removes the container. - Status Cleanup(const string& container) TF_MUST_USE_RESULT; + Status Cleanup(const std::string& container) TF_MUST_USE_RESULT; // Deletes all resources in all containers. void Clear(); // Returns a text description for all resources. - string DebugString() const; + std::string DebugString() const; private: typedef std::pair Key; @@ -236,7 +237,7 @@ class ResourceMgr { std::unique_ptr name; ResourceAndName(); - ResourceAndName(ResourceBase* resource, string name); + ResourceAndName(ResourceBase* resource, std::string name); ResourceAndName(ResourceAndName&& other) noexcept; ~ResourceAndName(); @@ -247,31 +248,31 @@ class ResourceMgr { }; typedef std::unordered_map Container; - const string default_container_; + const std::string default_container_; mutable mutex mu_; std::unordered_map containers_ TF_GUARDED_BY(mu_); template - Status LookupInternal(const string& container, const string& name, + Status LookupInternal(const std::string& container, const std::string& name, T** resource) const TF_SHARED_LOCKS_REQUIRED(mu_) TF_MUST_USE_RESULT; - Status DoCreate(const string& container, TypeIndex type, const string& name, - ResourceBase* resource) + Status DoCreate(const std::string& container, TypeIndex type, + const std::string& name, ResourceBase* resource) TF_EXCLUSIVE_LOCKS_REQUIRED(mu_) TF_MUST_USE_RESULT; - Status DoLookup(const string& container, TypeIndex type, const string& name, - ResourceBase** resource) const + Status DoLookup(const std::string& container, TypeIndex type, + const std::string& name, ResourceBase** resource) const TF_SHARED_LOCKS_REQUIRED(mu_) TF_MUST_USE_RESULT; - Status DoDelete(const string& container, uint64 type_hash_code, - const string& resource_name, - const string& type_name) TF_MUST_USE_RESULT; - Status DoDelete(const string& container, TypeIndex type, - const string& resource_name) TF_MUST_USE_RESULT; + Status DoDelete(const std::string& container, uint64 type_hash_code, + const std::string& resource_name, + const std::string& type_name) TF_MUST_USE_RESULT; + Status DoDelete(const std::string& container, TypeIndex type, + const std::string& resource_name) TF_MUST_USE_RESULT; // Inserts the type name for 'hash_code' into the hash_code to type name map. - Status InsertDebugTypeName(uint64 hash_code, const string& type_name) + Status InsertDebugTypeName(uint64 hash_code, const std::string& type_name) TF_EXCLUSIVE_LOCKS_REQUIRED(mu_) TF_MUST_USE_RESULT; // Returns the type name for the 'hash_code'. @@ -289,14 +290,14 @@ class ResourceMgr { // Makes a resource handle with the specified type for a given container / // name. ResourceHandle MakeResourceHandle( - const string& container, const string& name, const DeviceBase& device, - const TypeIndex& type_index, + const std::string& container, const std::string& name, + const DeviceBase& device, const TypeIndex& type_index, const std::vector& dtypes_and_shapes = {}) TF_MUST_USE_RESULT; template ResourceHandle MakeResourceHandle( - OpKernelContext* ctx, const string& container, const string& name, + OpKernelContext* ctx, const std::string& container, const std::string& name, const std::vector& dtypes_and_shapes = {}) { return MakeResourceHandle( container.empty() ? ctx->resource_manager()->default_container() @@ -306,7 +307,8 @@ ResourceHandle MakeResourceHandle( template ResourceHandle MakeResourceHandle( - OpKernelConstruction* ctx, const string& container, const string& name, + OpKernelConstruction* ctx, const std::string& container, + const std::string& name, const std::vector& dtypes_and_shapes = {}) { return MakeResourceHandle( container.empty() ? ctx->resource_manager()->default_container() @@ -315,7 +317,8 @@ ResourceHandle MakeResourceHandle( } Status MakeResourceHandleToOutput(OpKernelContext* context, int output_index, - const string& container, const string& name, + const std::string& container, + const std::string& name, const TypeIndex& type_index); // Returns a resource handle from a numbered op input. @@ -409,19 +412,19 @@ class ContainerInfo { // name is name(). If resource_is_private_to_kernel() is true, the // kernel should delete the resource when the kernel is deleted. ResourceMgr* resource_manager() const { return rmgr_; } - const string& container() const { return container_; } - const string& name() const { return name_; } + const std::string& container() const { return container_; } + const std::string& name() const { return name_; } bool resource_is_private_to_kernel() const { return resource_is_private_to_kernel_; } // Returns a readable string for *this. - string DebugString() const; + std::string DebugString() const; private: ResourceMgr* rmgr_ = nullptr; - string container_; - string name_; + std::string container_; + std::string name_; bool resource_is_private_to_kernel_ = false; }; @@ -435,8 +438,8 @@ class ContainerInfo { // Returns OK if the resource is found and transfers one ref of // *resource to the caller. Otherwise, returns an error. template -Status GetResourceFromContext(OpKernelContext* ctx, const string& input_name, - T** resource); +Status GetResourceFromContext(OpKernelContext* ctx, + const std::string& input_name, T** resource); // Utility op kernel to check if a handle to resource type T is initialized. template @@ -470,8 +473,8 @@ class ResourceHandleOp : public OpKernel { bool IsExpensive() override { return false; } private: - string container_; - string name_; + std::string container_; + std::string name_; mutex mutex_; Tensor resource_; std::atomic initialized_{false}; @@ -584,8 +587,8 @@ void CheckDeriveFromResourceBase() { } template -Status ResourceMgr::Create(const string& container, const string& name, - T* resource) { +Status ResourceMgr::Create(const std::string& container, + const std::string& name, T* resource) { CheckDeriveFromResourceBase(); CHECK(resource != nullptr); mutex_lock l(mu_); @@ -593,8 +596,8 @@ Status ResourceMgr::Create(const string& container, const string& name, } template -Status ResourceMgr::Lookup(const string& container, const string& name, - T** resource) const { +Status ResourceMgr::Lookup(const std::string& container, + const std::string& name, T** resource) const { CheckDeriveFromResourceBase(); tf_shared_lock l(mu_); return LookupInternal(container, name, resource); @@ -632,7 +635,8 @@ struct TypeCastFunctor { }; template -Status ResourceMgr::LookupInternal(const string& container, const string& name, +Status ResourceMgr::LookupInternal(const std::string& container, + const std::string& name, T** resource) const { ResourceBase* found = nullptr; Status s = DoLookup(container, TypeIndex::Make(), name, &found); @@ -645,8 +649,8 @@ Status ResourceMgr::LookupInternal(const string& container, const string& name, } template -Status ResourceMgr::LookupOrCreate(const string& container, const string& name, - T** resource, +Status ResourceMgr::LookupOrCreate(const std::string& container, + const std::string& name, T** resource, std::function creator) { CheckDeriveFromResourceBase(); *resource = nullptr; @@ -669,14 +673,15 @@ Status ResourceMgr::LookupOrCreate(const string& container, const string& name, } template -Status ResourceMgr::Delete(const string& container, const string& name) { +Status ResourceMgr::Delete(const std::string& container, + const std::string& name) { CheckDeriveFromResourceBase(); return DoDelete(container, TypeIndex::Make(), name); } template -Status GetResourceFromContext(OpKernelContext* ctx, const string& input_name, - T** resource) { +Status GetResourceFromContext(OpKernelContext* ctx, + const std::string& input_name, T** resource) { DataType dtype; TF_RETURN_IF_ERROR(ctx->input_dtype(input_name, &dtype)); if (dtype == DT_RESOURCE) { @@ -684,8 +689,8 @@ Status GetResourceFromContext(OpKernelContext* ctx, const string& input_name, TF_RETURN_IF_ERROR(ctx->input(input_name, &handle)); return LookupResource(ctx, handle->scalar()(), resource); } - string container; - string shared_name; + std::string container; + std::string shared_name; { mutex* mu; TF_RETURN_IF_ERROR(ctx->input_ref_mutex(input_name, &mu)); @@ -879,7 +884,7 @@ void ResourceHandlesOp::Compute(OpKernelContext* ctx) { template ResourceHandle ScopedStepContainer::MakeResourceHandle( - const string& name, const DeviceBase& device) { + const std::string& name, const DeviceBase& device) { mutex_lock ml(mu_); dirty_ = true; return tensorflow::MakeResourceHandle(container_, name, device, @@ -887,13 +892,14 @@ ResourceHandle ScopedStepContainer::MakeResourceHandle( } template -Status ScopedStepContainer::Lookup(ResourceMgr* rm, const string& name, +Status ScopedStepContainer::Lookup(ResourceMgr* rm, const std::string& name, T** resource) const { return rm->Lookup(container_, name, resource); } template -Status ScopedStepContainer::LookupOrCreate(ResourceMgr* rm, const string& name, +Status ScopedStepContainer::LookupOrCreate(ResourceMgr* rm, + const std::string& name, T** resource, std::function creator) { mutex_lock ml(mu_); @@ -902,7 +908,7 @@ Status ScopedStepContainer::LookupOrCreate(ResourceMgr* rm, const string& name, } template -Status ScopedStepContainer::Create(ResourceMgr* rm, const string& name, +Status ScopedStepContainer::Create(ResourceMgr* rm, const std::string& name, T* resource) { mutex_lock ml(mu_); dirty_ = true; @@ -910,7 +916,7 @@ Status ScopedStepContainer::Create(ResourceMgr* rm, const string& name, } template -Status ScopedStepContainer::Delete(ResourceMgr* rm, const string& name) { +Status ScopedStepContainer::Delete(ResourceMgr* rm, const std::string& name) { return rm->Delete(container_, name); } diff --git a/tensorflow/core/framework/resource_var.h b/tensorflow/core/framework/resource_var.h index 39fe5bbff91..f4ae7d5de61 100644 --- a/tensorflow/core/framework/resource_var.h +++ b/tensorflow/core/framework/resource_var.h @@ -67,7 +67,7 @@ class Var : public ResourceBase { mutex* mu() { return &mu_; } Tensor* tensor() { return &tensor_; } - string DebugString() const override { + std::string DebugString() const override { return strings::StrCat(DataTypeString(tensor_.dtype()), "/", tensor_.shape().DebugString()); } diff --git a/tensorflow/core/framework/session_state.h b/tensorflow/core/framework/session_state.h index 877c9970de4..ca0abd5b9d2 100644 --- a/tensorflow/core/framework/session_state.h +++ b/tensorflow/core/framework/session_state.h @@ -31,13 +31,13 @@ namespace tensorflow { class SessionState { public: // Get a tensor from the session state. - Status GetTensor(const string& handle, Tensor* tensor); + Status GetTensor(const std::string& handle, Tensor* tensor); // Store a tensor in the session state. - Status AddTensor(const string& handle, const Tensor& tensor); + Status AddTensor(const std::string& handle, const Tensor& tensor); // Delete a tensor from the session state. - Status DeleteTensor(const string& handle); + Status DeleteTensor(const std::string& handle); int64 GetNewId(); @@ -60,15 +60,15 @@ class TensorStore { struct TensorAndKey { Tensor tensor; int64 id; - string device_name; + std::string device_name; - string GetHandle(const string& tensor_name) { + std::string GetHandle(const std::string& tensor_name) { return strings::StrCat(tensor_name, ";", id, ";", device_name); } }; // Add the named tensor to the tensor store for this run. - Status AddTensor(const string& name, const TensorAndKey& tk); + Status AddTensor(const std::string& name, const TensorAndKey& tk); // Save the tensors in the tensor store of this run to the session. Status SaveTensors(const std::vector& output_names, diff --git a/tensorflow/core/framework/shape_inference.h b/tensorflow/core/framework/shape_inference.h index 1ccaa8216ec..bb79b278cb1 100644 --- a/tensorflow/core/framework/shape_inference.h +++ b/tensorflow/core/framework/shape_inference.h @@ -344,13 +344,13 @@ class InferenceContext { // incomplete shape. DimensionHandle NumElements(ShapeHandle s); - string DebugString(ShapeHandle s); - string DebugString(DimensionHandle d); - string DebugString(const ShapeAndType& shape_and_type); - string DebugString(gtl::ArraySlice shape_and_types); + std::string DebugString(ShapeHandle s); + std::string DebugString(DimensionHandle d); + std::string DebugString(const ShapeAndType& shape_and_type); + std::string DebugString(gtl::ArraySlice shape_and_types); // Describes the whole context, for debugging purposes. - string DebugString() const; + std::string DebugString() const; // If has rank , or its rank is unknown, return OK and return // the shape with asserted rank in <*out>. Otherwise return an error. diff --git a/tensorflow/core/framework/tracking_allocator.h b/tensorflow/core/framework/tracking_allocator.h index ca18dc9a050..7b5b3914917 100644 --- a/tensorflow/core/framework/tracking_allocator.h +++ b/tensorflow/core/framework/tracking_allocator.h @@ -54,7 +54,7 @@ struct AllocRecord { class TrackingAllocator : public Allocator { public: explicit TrackingAllocator(Allocator* allocator, bool track_ids); - string Name() override { return allocator_->Name(); } + std::string Name() override { return allocator_->Name(); } void* AllocateRaw(size_t alignment, size_t num_bytes) override { return AllocateRaw(alignment, num_bytes, AllocationAttributes()); } diff --git a/tensorflow/core/framework/variant.h b/tensorflow/core/framework/variant.h index e8a0c332968..f67d94b48e2 100644 --- a/tensorflow/core/framework/variant.h +++ b/tensorflow/core/framework/variant.h @@ -32,10 +32,10 @@ limitations under the License. namespace tensorflow { template -string TypeNameVariant(const T& value); +std::string TypeNameVariant(const T& value); template -string DebugStringVariant(const T& value); +std::string DebugStringVariant(const T& value); // Allows for specializations of Variant Decoding. `data` may be modified in // the process of decoding to `value`. @@ -43,13 +43,13 @@ template bool DecodeVariant(VariantTensorData* data, T* value); template -bool DecodeVariant(string* buf, T* value); +bool DecodeVariant(std::string* buf, T* value); template void EncodeVariant(const T& value, VariantTensorData* data); template -void EncodeVariant(const T& value, string* buf); +void EncodeVariant(const T& value, std::string* buf); // This is an implementation of a type-erased container that can store an // object of any type. The implementation is very similar to std::any, but has @@ -234,7 +234,7 @@ class Variant { return GetValue()->TypeId(); } - string DebugString() const { + std::string DebugString() const { return strings::StrCat( "VariantDebugString(), ">"); @@ -264,7 +264,7 @@ class Variant { // In the special case that a serialized Variant is stored (value // is a VariantTensorDataProto), returns value.TypeName(), the // TypeName field stored in the VariantTensorDataProto buffer. - string TypeName() const { + std::string TypeName() const { if (is_empty()) { return ""; } @@ -282,12 +282,12 @@ class Variant { bool Decode(VariantTensorData data); // Helper methods to directly serialize/deserialize from strings. - void Encode(string* buf) const { + void Encode(std::string* buf) const { if (!is_empty()) { GetValue()->Encode(buf); } } - bool Decode(string buf) { + bool Decode(std::string buf) { if (!is_empty()) { return GetValue()->Decode(std::move(buf)); } @@ -313,12 +313,12 @@ class Variant { virtual void CloneInto(ValueInterface* memory) const = 0; virtual void MoveAssign(ValueInterface* memory) = 0; virtual void MoveInto(ValueInterface* memory) = 0; - virtual string TypeName() const = 0; - virtual string DebugString() const = 0; + virtual std::string TypeName() const = 0; + virtual std::string DebugString() const = 0; virtual void Encode(VariantTensorData* data) const = 0; virtual bool Decode(VariantTensorData data) = 0; - virtual void Encode(string* buf) const = 0; - virtual bool Decode(string data) = 0; + virtual void Encode(std::string* buf) const = 0; + virtual bool Decode(std::string data) = 0; }; template @@ -359,9 +359,9 @@ class Variant { new (memory) Value(InPlace(), std::move(value)); } - string TypeName() const final { return TypeNameVariant(value); } + std::string TypeName() const final { return TypeNameVariant(value); } - string DebugString() const final { return DebugStringVariant(value); } + std::string DebugString() const final { return DebugStringVariant(value); } void Encode(VariantTensorData* data) const final { EncodeVariant(value, data); @@ -371,9 +371,9 @@ class Variant { return DecodeVariant(&data, &value); } - void Encode(string* buf) const final { EncodeVariant(value, buf); } + void Encode(std::string* buf) const final { EncodeVariant(value, buf); } - bool Decode(string buf) final { return DecodeVariant(&buf, &value); } + bool Decode(std::string buf) final { return DecodeVariant(&buf, &value); } T value; }; diff --git a/tensorflow/core/framework/variant_encode_decode.h b/tensorflow/core/framework/variant_encode_decode.h index 502bbd57422..340d01d5f5d 100644 --- a/tensorflow/core/framework/variant_encode_decode.h +++ b/tensorflow/core/framework/variant_encode_decode.h @@ -105,7 +105,7 @@ bool DecodeVariantImpl(VariantTensorData data, TypeResolver, T* value) { - string metadata; + std::string metadata; data.get_metadata(&metadata); return value->ParseFromString(std::move(metadata)); } @@ -136,27 +136,27 @@ template ::type>::value, struct TypeNameResolver {}; template -string TypeNameVariantImpl(const T& value, - TypeNameResolver) { +std::string TypeNameVariantImpl(const T& value, + TypeNameResolver) { return value.TypeName(); } template -string TypeNameVariantImpl( +std::string TypeNameVariantImpl( const T& value, TypeNameResolver) { return "tensorflow::Tensor"; } template -string TypeNameVariantImpl( +std::string TypeNameVariantImpl( const T& value, TypeNameResolver) { return value.GetTypeName(); } template -string TypeNameVariantImpl( +std::string TypeNameVariantImpl( const T& value, TypeNameResolver) { @@ -164,7 +164,7 @@ string TypeNameVariantImpl( } template -string TypeNameVariant(const T& value) { +std::string TypeNameVariant(const T& value) { return TypeNameVariantImpl(value, TypeNameResolver()); } @@ -194,27 +194,27 @@ struct DebugStringResolver {}; // TODO(ebrevdo): Expand DebugStringResolver to return TypeString if // there is no StrCat() constructor. template -string DebugStringVariantImpl( +std::string DebugStringVariantImpl( const T& value, DebugStringResolver) { return value.DebugString(); } template -string DebugStringVariantImpl( +std::string DebugStringVariantImpl( const T& value, DebugStringResolver) { return strings::StrCat(value); } template -string DebugStringVariantImpl( +std::string DebugStringVariantImpl( const T& value, DebugStringResolver) { return "?"; } template -string DebugStringVariant(const T& value) { +std::string DebugStringVariant(const T& value) { return DebugStringVariantImpl(value, DebugStringResolver()); } @@ -230,7 +230,7 @@ bool DecodeVariant(VariantTensorData* data, T* value) { } template -void EncodeVariant(const T& value, string* buf) { +void EncodeVariant(const T& value, std::string* buf) { VariantTensorData data; EncodeVariantImpl(value, TypeResolver(), &data); data.set_type_name(TypeNameVariant(value)); @@ -239,7 +239,7 @@ void EncodeVariant(const T& value, string* buf) { } template -bool DecodeVariant(string* buf, T* value) { +bool DecodeVariant(std::string* buf, T* value) { VariantTensorData data; if (!data.ParseFromString(*buf)) return false; if (!DecodeVariantImpl(std::move(data), TypeResolver(), value)) { @@ -250,7 +250,7 @@ bool DecodeVariant(string* buf, T* value) { // Specializations for VariantTensorDataProto template <> -string TypeNameVariant(const VariantTensorDataProto& value); +std::string TypeNameVariant(const VariantTensorDataProto& value); template <> void EncodeVariant(const VariantTensorDataProto& value, @@ -260,10 +260,10 @@ template <> bool DecodeVariant(VariantTensorData* data, VariantTensorDataProto* value); template <> -void EncodeVariant(const VariantTensorDataProto& value, string* buf); +void EncodeVariant(const VariantTensorDataProto& value, std::string* buf); template <> -bool DecodeVariant(string* buf, VariantTensorDataProto* value); +bool DecodeVariant(std::string* buf, VariantTensorDataProto* value); // Encodes an array of Variant objects in to the given StringListEncoder. // `variant_array` is assumed to point to an array of `n` Variant objects. diff --git a/tensorflow/core/framework/variant_op_registry.h b/tensorflow/core/framework/variant_op_registry.h index 5879597e5eb..edfb9c544c0 100644 --- a/tensorflow/core/framework/variant_op_registry.h +++ b/tensorflow/core/framework/variant_op_registry.h @@ -93,7 +93,7 @@ class UnaryVariantOpRegistry { AsyncVariantDeviceCopyFn; // Add a decode function to the registry. - void RegisterDecodeFn(const string& type_name, + void RegisterDecodeFn(const std::string& type_name, const VariantDecodeFn& decode_fn); // Returns nullptr if no decode function was found for the given TypeName. @@ -124,7 +124,7 @@ class UnaryVariantOpRegistry { } // Add a unary op function to the registry. - void RegisterUnaryOpFn(VariantUnaryOp op, const string& device, + void RegisterUnaryOpFn(VariantUnaryOp op, const std::string& device, const TypeIndex& type_index, const VariantUnaryOpFn& unary_op_fn) { VariantUnaryOpFn* existing = GetUnaryOpFn(op, device, type_index); @@ -146,7 +146,7 @@ class UnaryVariantOpRegistry { } // Add a binary op function to the registry. - void RegisterBinaryOpFn(VariantBinaryOp op, const string& device, + void RegisterBinaryOpFn(VariantBinaryOp op, const std::string& device, const TypeIndex& type_index, const VariantBinaryOpFn& add_fn) { VariantBinaryOpFn* existing = GetBinaryOpFn(op, device, type_index); @@ -252,7 +252,7 @@ class UnaryVariantOpRegistry { // Find or insert a string into a persistent string storage // container; return the StringPiece pointing to the permanent string // location. - static StringPiece GetPersistentStringPiece(const string& str) { + static StringPiece GetPersistentStringPiece(const std::string& str) { const auto string_storage = PersistentStringStorage(); auto found = string_storage->find(str); if (found == string_storage->end()) { @@ -307,7 +307,7 @@ Status VariantDeviceCopy( template Status UnaryOpVariant(OpKernelContext* ctx, VariantUnaryOp op, const Variant& v, Variant* v_out) { - const string& device = DeviceName::value; + const std::string& device = DeviceName::value; UnaryVariantOpRegistry::VariantUnaryOpFn* unary_op_fn = UnaryVariantOpRegistry::Global()->GetUnaryOpFn(op, device, v.TypeId()); if (unary_op_fn == nullptr) { @@ -336,7 +336,7 @@ Status BinaryOpVariants(OpKernelContext* ctx, VariantBinaryOp op, "type ids. Type names: '", a.TypeName(), "' vs. '", b.TypeName(), "'"); } - const string& device = DeviceName::value; + const std::string& device = DeviceName::value; UnaryVariantOpRegistry::VariantBinaryOpFn* binary_op_fn = UnaryVariantOpRegistry::Global()->GetBinaryOpFn(op, device, a.TypeId()); if (binary_op_fn == nullptr) { @@ -354,7 +354,7 @@ namespace variant_op_registry_fn_registration { template class UnaryVariantDecodeRegistration { public: - UnaryVariantDecodeRegistration(const string& type_name) { + UnaryVariantDecodeRegistration(const std::string& type_name) { // The Variant is passed by pointer because it should be // mutable: get below may Decode the variant, which // is a self-mutating behavior. The variant is not modified in @@ -386,7 +386,8 @@ class UnaryVariantDeviceCopyRegistration { UnaryVariantDeviceCopyRegistration( const VariantDeviceCopyDirection direction, const TypeIndex& type_index, const LocalVariantDeviceCopyFn& device_copy_fn) { - const string type_index_name = port::MaybeAbiDemangle(type_index.name()); + const std::string type_index_name = + port::MaybeAbiDemangle(type_index.name()); UnaryVariantOpRegistry::Global()->RegisterDeviceCopyFn( direction, type_index, [type_index_name, device_copy_fn]( @@ -413,10 +414,11 @@ class UnaryVariantUnaryOpRegistration { LocalVariantUnaryOpFn; public: - UnaryVariantUnaryOpRegistration(VariantUnaryOp op, const string& device, + UnaryVariantUnaryOpRegistration(VariantUnaryOp op, const std::string& device, const TypeIndex& type_index, const LocalVariantUnaryOpFn& unary_op_fn) { - const string type_index_name = port::MaybeAbiDemangle(type_index.name()); + const std::string type_index_name = + port::MaybeAbiDemangle(type_index.name()); UnaryVariantOpRegistry::Global()->RegisterUnaryOpFn( op, device, type_index, [type_index_name, unary_op_fn](OpKernelContext* ctx, const Variant& v, @@ -442,10 +444,12 @@ class UnaryVariantBinaryOpRegistration { LocalVariantBinaryOpFn; public: - UnaryVariantBinaryOpRegistration(VariantBinaryOp op, const string& device, + UnaryVariantBinaryOpRegistration(VariantBinaryOp op, + const std::string& device, const TypeIndex& type_index, const LocalVariantBinaryOpFn& binary_op_fn) { - const string type_index_name = port::MaybeAbiDemangle(type_index.name()); + const std::string type_index_name = + port::MaybeAbiDemangle(type_index.name()); UnaryVariantOpRegistry::Global()->RegisterBinaryOpFn( op, device, type_index, [type_index_name, binary_op_fn](OpKernelContext* ctx, const Variant& a, diff --git a/tensorflow/core/framework/variant_tensor_data.h b/tensorflow/core/framework/variant_tensor_data.h index 8c654ccec82..59246f2bb15 100644 --- a/tensorflow/core/framework/variant_tensor_data.h +++ b/tensorflow/core/framework/variant_tensor_data.h @@ -44,8 +44,8 @@ class VariantTensorData { VariantTensorData(VariantTensorDataProto proto); // Name of the type of objects being serialized. - const string& type_name() const { return type_name_; } - void set_type_name(const string& type_name) { type_name_ = type_name; } + const std::string& type_name() const { return type_name_; } + void set_type_name(const std::string& type_name) { type_name_ = type_name; } template ::type>::value> struct PODResolver {}; @@ -62,9 +62,9 @@ class VariantTensorData { return GetMetadata(value, PODResolver()); } - string& metadata_string() { return metadata_; } + std::string& metadata_string() { return metadata_; } - const string& metadata_string() const { return metadata_; } + const std::string& metadata_string() const { return metadata_; } // Tensors contained within objects being serialized. int tensors_size() const; @@ -84,25 +84,27 @@ class VariantTensorData { bool FromConstProto(const VariantTensorDataProto& proto); // Serialization via VariantTensorDataProto - string SerializeAsString() const; - bool SerializeToString(string* buf); - bool ParseFromString(string s); + std::string SerializeAsString() const; + bool SerializeToString(std::string* buf); + bool ParseFromString(std::string s); - string DebugString() const; + std::string DebugString() const; public: - string type_name_; - string metadata_; + std::string type_name_; + std::string metadata_; std::vector tensors_; private: template - void SetMetadata(const string& value, PODResolver) { + void SetMetadata(const std::string& value, + PODResolver) { metadata_ = value; } template - bool GetMetadata(string* value, PODResolver) const { + bool GetMetadata(std::string* value, + PODResolver) const { *value = metadata_; return true; } @@ -121,7 +123,7 @@ class VariantTensorData { }; // For backwards compatibility for when this was a proto -string ProtoDebugString(const VariantTensorData& object); +std::string ProtoDebugString(const VariantTensorData& object); } // namespace tensorflow From a01e88a079264365d5e42ddbb152fe82b93fa410 Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Fri, 31 Jul 2020 11:50:22 -0700 Subject: [PATCH 1867/2522] Constrain mhlo.const to static shaped tensors. Constants of unknown shape cannot be materialized. In most cases, one likely wants to use a scalar constant and rely on broadcasting instead. PiperOrigin-RevId: 324252475 Change-Id: Ic8ba4785dbdd865d2c7f720ce45e4ee3f7b21c18 --- .../mlir-hlo/Dialect/mhlo/IR/hlo_ops.td | 2 +- tensorflow/compiler/mlir/hlo/tests/ops.mlir | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td index 93c5388ad5d..3d7b8273d67 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td @@ -52,7 +52,7 @@ def HLO_ConstOp : HLO_Op<"constant", ); let results = (outs - HLO_Tensor:$output + HLO_StaticShapeTensor:$output ); let builders = [OpBuilder< diff --git a/tensorflow/compiler/mlir/hlo/tests/ops.mlir b/tensorflow/compiler/mlir/hlo/tests/ops.mlir index b46827b88a5..920e62e57b4 100644 --- a/tensorflow/compiler/mlir/hlo/tests/ops.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/ops.mlir @@ -939,7 +939,23 @@ func @constants() -> () { func @constant_invalid() -> () { // expected-error@+1 {{op failed to verify that all of {value, output} have same type}} - %0 = "mhlo.constant"() {value = dense<0> : tensor} : () -> (tensor<*xi32>) + %0 = "mhlo.constant"() {value = dense<0> : tensor} : () -> (tensor<3xi32>) + return +} + +// ----- + +func @constant_invalid() -> () { + // expected-error@+1 {{op result #0 must be statically shaped tensor}} + %0 = "mhlo.constant"() {value = dense<1> : tensor} : () -> tensor + return +} + +// ----- + +func @constant_invalid() -> () { + // expected-error@+1 {{elements literal type must have static shape}} + %0 = "mhlo.constant"() {value = dense<1> : tensor} : () -> tensor return } From 82c0a2194adb0b3cabc99d61f2576bf003aa789d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 31 Jul 2020 11:50:36 -0700 Subject: [PATCH 1868/2522] Speed up LogSoftmax by using Eigen's vectorized math. PiperOrigin-RevId: 324252517 Change-Id: Ib6e93fa8d27b244404db1c28742bd55266c6524e --- .../internal/optimized/optimized_ops.h | 27 ++++--------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h index 6ca8b781805..c505ee81767 100644 --- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h @@ -2635,8 +2635,6 @@ inline void BroadcastMulFivefold(const ArithmeticParams& params, output_shape, output_data); } - - // TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary // dimensionality if the runtime code does a single loop over one dimension // that handles broadcasting as the base case. The code generator would then @@ -4264,8 +4262,6 @@ inline void SoftmaxInt8LUT(const SoftmaxParams& params, } } -// TODO(myenik): This is the same as the reference implementation, not actually -// optimized yet. inline void LogSoftmax(const SoftmaxParams& params, const RuntimeShape& input_shape, const float* input_data, const RuntimeShape& output_shape, float* output_data) { @@ -4277,27 +4273,14 @@ inline void LogSoftmax(const SoftmaxParams& params, MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); for (int i = 0; i < outer_size; ++i) { - const float* block_input_data = input_data + i * depth; - float* block_output_data = output_data + i * depth; + VectorMap block_input(input_data + i * depth, depth, 1); + VectorMap block_output(output_data + i * depth, depth, 1); // Find max element value which we'll use to ensure numerical stability // taking advantage of the following equality: // log(exp(x[i])/sum(exp(x[i]))) == log(exp(x[i]+C)/sum(exp(x[i]+C))) - float max = std::numeric_limits::lowest(); - for (int c = 0; c < depth; ++c) { - max = std::max(max, block_input_data[c]); - } - - // Compute sum. - float sum = 0.f; - for (int c = 0; c < depth; ++c) { - sum += std::exp(block_input_data[c] - max); - } - - // Compute result. - const float log_sum = std::log(sum); - for (int c = 0; c < depth; ++c) { - block_output_data[c] = block_input_data[c] - max - log_sum; - } + const float max = block_input.maxCoeff(); + const float log_sum = std::log((block_input.array() - max).exp().sum()); + block_output = block_input.array() - max - log_sum; } } From f3e7bc6a0b347d7db21fad8280b687f53c34e8d7 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Fri, 31 Jul 2020 12:01:11 -0700 Subject: [PATCH 1869/2522] [MLIR][NFC] Split RegionAliasAnalysis and SideEffectAnalysis into separate files - Also changed the name of the build target to tensorflow_analysis PiperOrigin-RevId: 324254818 Change-Id: Ie661873942c3ccf7b406f6be796b6d5fe1b79cea --- tensorflow/compiler/mlir/tensorflow/BUILD | 43 +- .../per_function_aggregate_analysis.h | 76 ++++ .../analysis/resource_alias_analysis.cc | 406 ++++++++++++++++++ .../analysis/resource_alias_analysis.h | 97 +++++ .../analysis/side_effect_analysis.cc | 362 +--------------- .../analysis/side_effect_analysis.h | 103 +---- .../transforms/resource_device_inference.cc | 2 +- 7 files changed, 608 insertions(+), 481 deletions(-) create mode 100644 tensorflow/compiler/mlir/tensorflow/analysis/per_function_aggregate_analysis.h create mode 100644 tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.cc create mode 100644 tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index f4fdbff2df1..093b1277a61 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -702,6 +702,30 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "tensorflow_analysis", + srcs = [ + "analysis/per_function_aggregate_analysis.h", + "analysis/resource_alias_analysis.cc", + "analysis/side_effect_analysis.cc", + ], + hdrs = [ + "analysis/resource_alias_analysis.h", + "analysis/side_effect_analysis.h", + ], + deps = [ + ":tensorflow", + ":tensorflow_types", + "//tensorflow/compiler/tf2xla:resource_operation_table", + "//tensorflow/core:framework", + "@com_google_absl//absl/strings", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:Pass", + "@llvm-project//mlir:Support", + ], +) + cc_library( name = "tensorflow_passes", srcs = [ @@ -789,8 +813,8 @@ cc_library( ":error_util", ":export_tf_dialect_op", ":mangling_util", - ":side_effect_analysis", ":tensorflow", + ":tensorflow_analysis", ":tensorflow_optimize_inc_gen", ":tensorflow_types", ":tf_data_optimization", @@ -1754,23 +1778,6 @@ cc_library( ], ) -cc_library( - name = "side_effect_analysis", - srcs = ["analysis/side_effect_analysis.cc"], - hdrs = ["analysis/side_effect_analysis.h"], - deps = [ - ":tensorflow", - ":tensorflow_types", - "//tensorflow/compiler/tf2xla:resource_operation_table", - "//tensorflow/core:framework", - "@com_google_absl//absl/strings", - "@llvm-project//llvm:Support", - "@llvm-project//mlir:IR", - "@llvm-project//mlir:Pass", - "@llvm-project//mlir:Support", - ], -) - cc_library( name = "xla_sharding_util", srcs = [ diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/per_function_aggregate_analysis.h b/tensorflow/compiler/mlir/tensorflow/analysis/per_function_aggregate_analysis.h new file mode 100644 index 00000000000..da7a2bd9b5c --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/analysis/per_function_aggregate_analysis.h @@ -0,0 +1,76 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_ANALYSIS_PER_FUNCTION_AGGREGATE_ANALYSIS_H_ +#define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_ANALYSIS_PER_FUNCTION_AGGREGATE_ANALYSIS_H_ + +#include +#include +#include + +#include "llvm/ADT/DenseMap.h" +#include "mlir/IR/Function.h" // from @llvm-project +#include "mlir/IR/Module.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project + +namespace mlir { +namespace TF { +namespace detail { + +// This template defines an aggregate analysis base class, which analyzes a +// module but the analysis info is stored per function. +template +class PerFunctionAggregateAnalysis { + public: + using Info = InfoT; + + // Returns the analysis info for the given function. + const Info& GetAnalysisForFunc(FuncOp func) const { + auto it = info_map_.find(func); + assert(it != info_map_.end()); + return it->second; + } + + protected: + llvm::SmallDenseMap info_map_; +}; + +} // namespace detail + +// Base CRTP class to help write passes that are consumes a per-function +// aggregate analysis and operate on all non-extern functions (similar to a +// FunctionPass, but with no concurrency between functions). The derived classes +// need to provide a runOnFunction() method that accepts the function and the +// analysis information for that function. +template +class PerFunctionAggregateAnalysisConsumerPass + : public PassWrapper< + PerFunctionAggregateAnalysisConsumerPass, + OperationPass> { + void runOnOperation() override { + ModuleOp op = this->getOperation(); + DerivedT& derived = *static_cast(this); + auto& analysis = this->template getAnalysis(); + + for (auto func : op.getOps()) + if (!func.isExternal()) + derived.runOnFunction(func, analysis.GetAnalysisForFunc(func)); + } +}; + +} // namespace TF +} // namespace mlir + +#endif // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_ANALYSIS_PER_FUNCTION_AGGREGATE_ANALYSIS_H_ diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.cc b/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.cc new file mode 100644 index 00000000000..3278c06fabe --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.cc @@ -0,0 +1,406 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h" + +#include +#include + +#include "absl/strings/str_cat.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/Block.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/Location.h" // from @llvm-project +#include "mlir/IR/Module.h" // from @llvm-project +#include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "mlir/IR/Value.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project +#include "mlir/Support/LogicalResult.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" +#include "tensorflow/compiler/tf2xla/resource_operation_table.h" +#include "tensorflow/core/framework/resource_mgr.h" + +namespace mlir { +namespace TF { + +namespace { +//===----------------------------------------------------------------------===// +// BacktrackAnalysisInfo +//===----------------------------------------------------------------------===// +// Class to hold backtrack analysis for a results of a region. Backtrack +// analysis will trace back the definition of return values of regions through +// pass-through operations, so that the return value of the region will have the +// same value as the backtracked value. +class BacktrackAnalysisInfo { + public: + // Initializes the backtrack analysis for the given region. + explicit BacktrackAnalysisInfo(Region& region, + detail::BacktrackAnalysis& backtrack_analysis); + + BacktrackAnalysisInfo(BacktrackAnalysisInfo&&) = default; + + // Returns the value to which the given result number of the region can be + // backtracked to. + Value GetValue(int result_index) const { + return backtracked_values_[result_index]; + } + + // Returns the argument index of the region to which the given result number + // can backtracked to. Such results will be called "function passthrough". If + // the result cannot be backtracked to a region argument, returns llvm::None. + llvm::Optional GetArg(int result_index) const { + if (auto arg = GetValue(result_index).dyn_cast()) + if (arg.getParentBlock() == ®ion_->front()) return arg.getArgNumber(); + return llvm::None; + } + + private: + friend class detail::BacktrackAnalysis; + + // Region for which this object holds the analysis info. + Region* region_; + + // Backtracked values indexed by the result number. + llvm::SmallVector backtracked_values_; +}; +} // namespace + +namespace detail { + +//===----------------------------------------------------------------------===// +// BacktrackAnalysis +//===----------------------------------------------------------------------===// +// Holds backtrack analysis for all functions and regions within a module. +class BacktrackAnalysis { + public: + using InfoT = BacktrackAnalysisInfo; + + // Constructs the analysis by analyzing the given module. + explicit BacktrackAnalysis(ModuleOp module); + + // Returns backtracking analysis for the given region. + const InfoT& GetAnalysisForRegion(Region& region) const { + auto it = info_map_.find(®ion); + assert(it != info_map_.end()); + return it->second; + } + + // Returns backtracking analysis for the given function. + const InfoT& GetAnalysisForFunc(FuncOp func) const { + return GetAnalysisForRegion(func.getBody()); + } + + // Backtracks the given value. + Value BacktrackValue(Value value); + + private: + // Returns the analysis for the given region (analyzing the region if it has + // not yet been analyzed). + const InfoT& GetOrCreateAnalysis(Region& region) { + auto it = info_map_.find(®ion); + if (it == info_map_.end()) { + // Note: Keep object construction and insertion separate. If we use + // emplace() to construct and insert in a single shot, when analyzing + // this region, calls to BacktrackValue() may end up inserting additional + // entries in the map, causing the underlying storage to be moved. This + // would also include this pertially constructed object that we have just + // inserted into the map and are constructing it. To avoid this issue, + // construct the analysis object separately and then insert it into the + // map. + InfoT info(region, *this); + info_map_.insert({®ion, std::move(info)}); + } + + return GetAnalysisForRegion(region); + } + + private: + llvm::SmallDenseMap info_map_; +}; + +// Analyzes all regions attached to all operations in the module. +BacktrackAnalysis::BacktrackAnalysis(ModuleOp module) { + module.walk([this](Operation* op) { + for (Region& region : op->getRegions()) GetOrCreateAnalysis(region); + }); +} + +// Backtracks the definition of `value` looking through passthrough ops. +// Returns a non-null value and can return `value` if backtracking is not +// possible. +Value BacktrackAnalysis::BacktrackValue(Value value) { + while (Operation* op = value.getDefiningOp()) { + int res_index = value.cast().getResultNumber(); + if (auto graph = dyn_cast(op)) { + value = graph.GetFetch().getOperand(res_index); + } else if (auto island = dyn_cast(op)) { + // Control output is generated by the IslandOp, not the yield in + // in the Island body. + if (value == island.control()) break; + value = island.GetYield().getOperand(res_index); + } else if (isa(op)) { + value = op->getOperand(res_index); + } else { + break; + } + } + return value; +} +} // namespace detail + +namespace { + +// Analyze the region. +BacktrackAnalysisInfo::BacktrackAnalysisInfo( + Region& region, detail::BacktrackAnalysis& backtrack_analysis) + : region_(®ion) { + if (region.empty()) return; + + assert(llvm::hasSingleElement(region.getBlocks())); + auto results = region.front().getTerminator()->getOperands(); + if (results.empty()) return; + + backtracked_values_.reserve(results.size()); + for (auto result : results) + backtracked_values_.push_back(backtrack_analysis.BacktrackValue(result)); +} + +//===----------------------------------------------------------------------===// +// ResourceAliasAnalysisInfo helper functions. +//===----------------------------------------------------------------------===// + +constexpr char kResourceArgUniqueIdAttr[] = "tf._resource_arg_unique_id"; + +// Returns if a VarHandleOp is anonymous, which means it always creates a new +// variable. +bool IsResourceHandleAnonymous(TF::VarHandleOp handle) { + return handle.shared_name() == tensorflow::ResourceHandle::ANONYMOUS_NAME; +} + +// Returns a string unique identifier for a non-anonymous VarHandleOp. +std::string GetVarHandleStringId(TF::VarHandleOp handle) { + auto device = handle.getAttrOfType("device"); + return absl::StrCat(handle.container().str(), "/", handle.shared_name().str(), + "/", device ? device.getValue().str() : std::string("")); +} + +// Finds a unique ID for a VarHandleOp's output. If it is anonymous, always +// creates a new ID; otherwise, tries to reuse the existing ID for the +// referenced variable if it exists, or creates a new one if not. +int64_t GetOrCreateIdForVarHandle(TF::VarHandleOp handle, int64_t* next_id, + llvm::StringMap* name_id_map) { + // Always create a new ID for anonymous handle. + if (IsResourceHandleAnonymous(handle)) return (*next_id)++; + + auto name = GetVarHandleStringId(handle); + auto emplace_res = name_id_map->try_emplace(name, *next_id); + // New ID created, increment next_id. + if (emplace_res.second) ++(*next_id); + return emplace_res.first->second; +} + +} // namespace + +namespace detail { +//===----------------------------------------------------------------------===// +// ResourceAliasAnalysisInfo +//===----------------------------------------------------------------------===// + +// Constructs the analysis info by analyzing the given function. +ResourceAliasAnalysisInfo::ResourceAliasAnalysisInfo( + FuncOp func_op, const detail::BacktrackAnalysis& backtrack_analysis) { + // This function populates resource_value_to_ids_ and id_to_resource_values_. + + // If the "tf.resource_arg_unique_id" argument attributes are present for + // resource-type arguments, respect them when choosing IDs; otherwise, they + // must not alias. + int64_t next_unique_id = 0; + const bool has_arg_unique_id_attrs = + llvm::any_of(func_op.getArguments(), [&](const BlockArgument& arg) { + return func_op.getArgAttr(arg.getArgNumber(), kResourceArgUniqueIdAttr); + }); + // Maps the kResourceArgUniqueIdAttr attribute value to the internal integer + // ID used by this pass. + llvm::SmallDenseMap attr_id_to_internal_id; + for (auto arg : func_op.getArguments()) { + if (!mlir::getElementTypeOrSelf(arg.getType()).isa()) + continue; + if (has_arg_unique_id_attrs) { + auto id_attr = func_op.getArgAttrOfType( + arg.getArgNumber(), kResourceArgUniqueIdAttr); + assert(id_attr && + "tf.resource_arg_unique_id attribute should exist on either none " + "or all arguments."); + auto emplace_res = attr_id_to_internal_id.try_emplace(id_attr.getInt(), + next_unique_id++); + AddValueUniqueIDMapping(arg, emplace_res.first->getSecond()); + } else { + AddValueUniqueIDMapping(arg, next_unique_id++); + } + } + llvm::StringMap var_handle_name_id_map; + auto forward_input_to_output = [&](const Value& operand, + const Value& result) { + if (!mlir::getElementTypeOrSelf(result.getType()).isa()) + return; + auto& result_ids = resource_value_to_ids_[result]; + auto operand_it = resource_value_to_ids_.find(operand); + assert(operand_it != resource_value_to_ids_.end() && + "A resource-type output does not have the corresponding " + "resource-type input."); + result_ids.insert(operand_it->getSecond().begin(), + operand_it->getSecond().end()); + }; + + func_op.walk([&](Operation* op) { + if (auto var_handle = llvm::dyn_cast(op)) { + AddValueUniqueIDMapping( + var_handle.resource(), + GetOrCreateIdForVarHandle(var_handle, &next_unique_id, + &var_handle_name_id_map)); + } else if (llvm::isa(op)) { + for (auto operand_and_result : + llvm::zip(op->getOperands(), op->getResults())) { + forward_input_to_output(std::get<0>(operand_and_result), + std::get<1>(operand_and_result)); + } + } else if (auto replicate = llvm::dyn_cast(op)) { + // The nested block for ReplicateOp is handled separately in side-effect + // analysis. Inside that block, we can still treat its block arguments as + // different resources. + for (auto arg : replicate.GetBody().getArguments()) { + if (mlir::getElementTypeOrSelf(arg.getType()).isa()) { + AddValueUniqueIDMapping(arg, next_unique_id++); + } + } + } else if (auto while_op = llvm::dyn_cast(op)) { + const auto& body_info = + backtrack_analysis.GetAnalysisForFunc(while_op.body_func()); + // If a result is a passthrough of the body input, use the corresponding + // operand's resource IDs. + for (auto result : llvm::enumerate(while_op.getResults())) { + if (!mlir::getElementTypeOrSelf(result.value().getType()) + .isa()) { + continue; + } + auto passthrough_arg = body_info.GetArg(result.index()); + if (passthrough_arg) { + forward_input_to_output( + while_op.getOperand(passthrough_arg.getValue()), result.value()); + } else { + AddValueUniqueIDMapping(result.value(), kUnknownResourceId); + } + } + } else if (auto if_op = llvm::dyn_cast(op)) { + const auto& then_info = + backtrack_analysis.GetAnalysisForFunc(if_op.then_func()); + const auto& else_info = + backtrack_analysis.GetAnalysisForFunc(if_op.else_func()); + // If a result is a passthrough of both branches' inputs, merge the + // resource IDs of corresponding operands for the two inputs. + for (auto result : llvm::enumerate(if_op.getResults())) { + if (!mlir::getElementTypeOrSelf(result.value().getType()) + .isa()) { + continue; + } + auto passthrough_then_arg = then_info.GetArg(result.index()); + auto passthrough_else_arg = else_info.GetArg(result.index()); + if (passthrough_then_arg && passthrough_else_arg) { + Value then_operand = if_op.input()[passthrough_then_arg.getValue()]; + Value else_operand = if_op.input()[passthrough_else_arg.getValue()]; + forward_input_to_output(then_operand, result.value()); + forward_input_to_output(else_operand, result.value()); + } else { + AddValueUniqueIDMapping(result.value(), kUnknownResourceId); + } + } + } else { + for (auto result : op->getResults()) { + if (!mlir::getElementTypeOrSelf(result.getType()) + .isa()) + continue; + AddValueUniqueIDMapping(result, kUnknownResourceId); + } + } + }); +} + +bool ResourceAliasAnalysisInfo::IsUnknownResource(const Value resource) const { + auto it = resource_value_to_ids_.find(resource); + assert(it != resource_value_to_ids_.end() && !it->getSecond().empty()); + // The set is sorted so we only need to check the first element since + // kUnknownResourceId < 0. + static_assert(kUnknownResourceId < 0, + "kUnknownResourceId should be negative"); + return *it->getSecond().begin() == kUnknownResourceId; +} + +const llvm::SmallSet& +ResourceAliasAnalysisInfo::GetResourceUniqueIds(Value resource) const { + auto it = resource_value_to_ids_.find(resource); + assert(it != resource_value_to_ids_.end() && "Unseen resource was queried"); + return it->getSecond(); +} + +const llvm::SmallSetVector& +ResourceAliasAnalysisInfo::GetUniqueIdResources(const int64_t id) const { + auto it = id_to_resource_values_.find(id); + assert(it != id_to_resource_values_.end() && "Unseen id was queried"); + return it->getSecond(); +} + +llvm::SmallSetVector ResourceAliasAnalysisInfo::GetResourceAliases( + const Value resource) const { + assert(!IsUnknownResource(resource) && "Unseen resource was queried"); + llvm::SmallSetVector aliases; + for (int64_t id : GetResourceUniqueIds(resource)) { + const llvm::SmallSetVector& resources_aliasing_id = + GetUniqueIdResources(id); + aliases.insert(resources_aliasing_id.begin(), resources_aliasing_id.end()); + } + return aliases; +} + +} // namespace detail + +//===----------------------------------------------------------------------===// +// ResourceAliasAnalysis +//===----------------------------------------------------------------------===// + +ResourceAliasAnalysis::ResourceAliasAnalysis(Operation* op) { + auto module = dyn_cast(op); + assert(module); + + // Analyze all regions for backtracking info. + detail::BacktrackAnalysis backtrack_analysis(module); + + // Analyze each function. + for (auto func : module.getOps()) + this->info_map_.try_emplace(func, func, backtrack_analysis); +} + +} // namespace TF +} // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h b/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h new file mode 100644 index 00000000000..5a514a7fb64 --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h @@ -0,0 +1,97 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_ANALYSIS_RESOURCE_ALIAS_ANALYSIS_H_ +#define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_ANALYSIS_RESOURCE_ALIAS_ANALYSIS_H_ + +#include +#include +#include + +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/Region.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/analysis/per_function_aggregate_analysis.h" + +namespace mlir { +namespace TF { +namespace detail { +class BacktrackAnalysis; + +// Resource alias analysis information for a single function. +class ResourceAliasAnalysisInfo { + public: + // Constructs analysis info by analyzing the given function. + ResourceAliasAnalysisInfo(FuncOp func, + const BacktrackAnalysis& backtrack_analysis); + + ResourceAliasAnalysisInfo(ResourceAliasAnalysisInfo&&) = default; + + // Returns if the analysis fails to resolve a resource-type value. + bool IsUnknownResource(const Value resource) const; + + // Returns the set unique IDs which `resource` could alias. Requires that + // IsUnknownResource(resource) == false. + const llvm::SmallSet& GetResourceUniqueIds(Value resource) const; + + // Returns the set of values that are potentially aliases of `value`. Requires + // that IsUnknownResource(resource) == false. + llvm::SmallSetVector GetResourceAliases(Value resource) const; + + private: + // Maps resource value to unique ID and vice-versa. + void AddValueUniqueIDMapping(Value value, int64_t id) { + resource_value_to_ids_[value].insert(id); + id_to_resource_values_[id].insert(value); + } + + // Returns the set unique Values which map to `id`. + const llvm::SmallSetVector& GetUniqueIdResources(int64_t id) const; + + // Maps each resource-type value to a set of unique IDs that it could alias. + llvm::SmallDenseMap, 8> + resource_value_to_ids_; + + // Maps each unique ID to a set of resource-type values that could alias to + // it. This is inverse of `resource_value_to_ids_` map. + llvm::SmallDenseMap, 8> + id_to_resource_values_; + + public: + static constexpr int64_t kUnknownResourceId = -1; +}; + +} // namespace detail + +// An analysis that runs on a module and maps each resource-type value to a +// set of unique IDs representing the possible resources it could alias. +// +// Note that this is not an inter-procedural or inter-regional analysis, i.e., +// each function and region are handled separately and cross-function or cross- +// region aliasing cannot be checked by this analysis. +class ResourceAliasAnalysis : public detail::PerFunctionAggregateAnalysis< + detail::ResourceAliasAnalysisInfo> { + public: + // Constructs analysis by analyzing the given module operation. + explicit ResourceAliasAnalysis(Operation* op); +}; + +} // namespace TF +} // namespace mlir + +#endif // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_ANALYSIS_RESOURCE_ALIAS_ANALYSIS_H_ diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc index 89bf95cea51..9e78b90debc 100644 --- a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc +++ b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc @@ -45,368 +45,10 @@ limitations under the License. namespace mlir { namespace TF { - -namespace { -//===----------------------------------------------------------------------===// -// BacktrackAnalysisInfo -//===----------------------------------------------------------------------===// -// Class to hold backtrack analysis for a results of a region. Backtrack -// analysis will trace back the definition of return values of regions through -// pass-through operations, so that the return value of the region will have the -// same value as the backtracked value. -class BacktrackAnalysisInfo { - public: - // Initializes the backtrack analysis for the given region. - explicit BacktrackAnalysisInfo(Region& region, - detail::BacktrackAnalysis& backtrack_analysis); - - BacktrackAnalysisInfo(BacktrackAnalysisInfo&&) = default; - - // Returns the value to which the given result number of the region can be - // backtracked to. - Value GetValue(int result_index) const { - return backtracked_values_[result_index]; - } - - // Returns the argument index of the region to which the given result number - // can backtracked to. Such results will be called "function passthrough". If - // the result cannot be backtracked to a region argument, returns llvm::None. - llvm::Optional GetArg(int result_index) const { - if (auto arg = GetValue(result_index).dyn_cast()) - if (arg.getParentBlock() == ®ion_->front()) return arg.getArgNumber(); - return llvm::None; - } - - private: - friend class detail::BacktrackAnalysis; - - // Region for which this object holds the analysis info. - Region* region_; - - // Backtracked values indexed by the result number. - llvm::SmallVector backtracked_values_; -}; -} // namespace - -namespace detail { - -//===----------------------------------------------------------------------===// -// BacktrackAnalysis -//===----------------------------------------------------------------------===// -// Holds backtrack analysis for all functions and regions within a module. -class BacktrackAnalysis { - public: - using InfoT = BacktrackAnalysisInfo; - - // Constructs the analysis by analyzing the given module. - explicit BacktrackAnalysis(ModuleOp module); - - // Returns backtracking analysis for the given region. - const InfoT& GetAnalysisForRegion(Region& region) const { - auto it = info_map_.find(®ion); - assert(it != info_map_.end()); - return it->second; - } - - // Returns backtracking analysis for the given function. - const InfoT& GetAnalysisForFunc(FuncOp func) const { - return GetAnalysisForRegion(func.getBody()); - } - - // Backtracks the given value. - Value BacktrackValue(Value value); - - private: - // Returns the analysis for the given region (analyzing the region if it has - // not yet been analyzed). - const InfoT& GetOrCreateAnalysis(Region& region) { - auto it = info_map_.find(®ion); - if (it == info_map_.end()) { - // Note: Keep object construction and insertion separate. If we use - // emplace() to construct and insert in a single shot, when analyzing - // this region, calls to BacktrackValue() may end up inserting additional - // entries in the map, causing the underlying storage to be moved. This - // would also include this pertially constructed object that we have just - // inserted into the map and are constructing it. To avoid this issue, - // construct the analysis object separately and then insert it into the - // map. - InfoT info(region, *this); - info_map_.insert({®ion, std::move(info)}); - } - - return GetAnalysisForRegion(region); - } - - private: - llvm::SmallDenseMap info_map_; -}; - -// Analyzes all regions attached to all operations in the module. -BacktrackAnalysis::BacktrackAnalysis(ModuleOp module) { - module.walk([this](Operation* op) { - for (Region& region : op->getRegions()) GetOrCreateAnalysis(region); - }); -} - -// Backtracks the definition of `value` looking through passthrough ops. -// Returns a non-null value and can return `value` if backtracking is not -// possible. -Value BacktrackAnalysis::BacktrackValue(Value value) { - while (Operation* op = value.getDefiningOp()) { - int res_index = value.cast().getResultNumber(); - if (auto graph = dyn_cast(op)) { - value = graph.GetFetch().getOperand(res_index); - } else if (auto island = dyn_cast(op)) { - // Control output is generated by the IslandOp, not the yield in - // in the Island body. - if (value == island.control()) break; - value = island.GetYield().getOperand(res_index); - } else if (isa(op)) { - value = op->getOperand(res_index); - } else { - break; - } - } - return value; -} -} // namespace detail - namespace { -// Analyze the region. -BacktrackAnalysisInfo::BacktrackAnalysisInfo( - Region& region, detail::BacktrackAnalysis& backtrack_analysis) - : region_(®ion) { - if (region.empty()) return; - - assert(llvm::hasSingleElement(region.getBlocks())); - auto results = region.front().getTerminator()->getOperands(); - if (results.empty()) return; - - backtracked_values_.reserve(results.size()); - for (auto result : results) - backtracked_values_.push_back(backtrack_analysis.BacktrackValue(result)); -} - -} // namespace - -namespace { -//===----------------------------------------------------------------------===// -// ResourceAliasAnalysisInfo helper functions. -//===----------------------------------------------------------------------===// - -constexpr int64_t kUnknownResourceId = -1; -constexpr char kResourceArgUniqueIdAttr[] = "tf._resource_arg_unique_id"; - -// Returns if a VarHandleOp is anonymous, which means it always creates a new -// variable. -bool IsResourceHandleAnonymous(TF::VarHandleOp handle) { - return handle.shared_name() == tensorflow::ResourceHandle::ANONYMOUS_NAME; -} - -// Returns a string unique identifier for a non-anonymous VarHandleOp. -std::string GetVarHandleStringId(TF::VarHandleOp handle) { - auto device = handle.getAttrOfType("device"); - return absl::StrCat(handle.container().str(), "/", handle.shared_name().str(), - "/", device ? device.getValue().str() : std::string("")); -} - -// Finds a unique ID for a VarHandleOp's output. If it is anonymous, always -// creates a new ID; otherwise, tries to reuse the existing ID for the -// referenced variable if it exists, or creates a new one if not. -int64_t GetOrCreateIdForVarHandle(TF::VarHandleOp handle, int64_t* next_id, - llvm::StringMap* name_id_map) { - // Always create a new ID for anonymous handle. - if (IsResourceHandleAnonymous(handle)) return (*next_id)++; - - auto name = GetVarHandleStringId(handle); - auto emplace_res = name_id_map->try_emplace(name, *next_id); - // New ID created, increment next_id. - if (emplace_res.second) ++(*next_id); - return emplace_res.first->second; -} - -} // namespace - -namespace detail { -//===----------------------------------------------------------------------===// -// ResourceAliasAnalysisInfo -//===----------------------------------------------------------------------===// - -// Constructs the analysis info by analyzing the given function. -ResourceAliasAnalysisInfo::ResourceAliasAnalysisInfo( - FuncOp func_op, const detail::BacktrackAnalysis& backtrack_analysis) { - // This function populates resource_value_to_ids_ and id_to_resource_values_. - - // If the "tf.resource_arg_unique_id" argument attributes are present for - // resource-type arguments, respect them when choosing IDs; otherwise, they - // must not alias. - int64_t next_unique_id = 0; - const bool has_arg_unique_id_attrs = - llvm::any_of(func_op.getArguments(), [&](const BlockArgument& arg) { - return func_op.getArgAttr(arg.getArgNumber(), kResourceArgUniqueIdAttr); - }); - // Maps the kResourceArgUniqueIdAttr attribute value to the internal integer - // ID used by this pass. - llvm::SmallDenseMap attr_id_to_internal_id; - for (auto arg : func_op.getArguments()) { - if (!mlir::getElementTypeOrSelf(arg.getType()).isa()) - continue; - if (has_arg_unique_id_attrs) { - auto id_attr = func_op.getArgAttrOfType( - arg.getArgNumber(), kResourceArgUniqueIdAttr); - assert(id_attr && - "tf.resource_arg_unique_id attribute should exist on either none " - "or all arguments."); - auto emplace_res = attr_id_to_internal_id.try_emplace(id_attr.getInt(), - next_unique_id++); - AddValueUniqueIDMapping(arg, emplace_res.first->getSecond()); - } else { - AddValueUniqueIDMapping(arg, next_unique_id++); - } - } - llvm::StringMap var_handle_name_id_map; - auto forward_input_to_output = [&](const Value& operand, - const Value& result) { - if (!mlir::getElementTypeOrSelf(result.getType()).isa()) - return; - auto& result_ids = resource_value_to_ids_[result]; - auto operand_it = resource_value_to_ids_.find(operand); - assert(operand_it != resource_value_to_ids_.end() && - "A resource-type output does not have the corresponding " - "resource-type input."); - result_ids.insert(operand_it->getSecond().begin(), - operand_it->getSecond().end()); - }; - - func_op.walk([&](Operation* op) { - if (auto var_handle = llvm::dyn_cast(op)) { - AddValueUniqueIDMapping( - var_handle.resource(), - GetOrCreateIdForVarHandle(var_handle, &next_unique_id, - &var_handle_name_id_map)); - } else if (llvm::isa(op)) { - for (auto operand_and_result : - llvm::zip(op->getOperands(), op->getResults())) { - forward_input_to_output(std::get<0>(operand_and_result), - std::get<1>(operand_and_result)); - } - } else if (auto replicate = llvm::dyn_cast(op)) { - // The nested block for ReplicateOp is handled separately in side-effect - // analysis. Inside that block, we can still treat its block arguments as - // different resources. - for (auto arg : replicate.GetBody().getArguments()) { - if (mlir::getElementTypeOrSelf(arg.getType()).isa()) { - AddValueUniqueIDMapping(arg, next_unique_id++); - } - } - } else if (auto while_op = llvm::dyn_cast(op)) { - const auto& body_info = - backtrack_analysis.GetAnalysisForFunc(while_op.body_func()); - // If a result is a passthrough of the body input, use the corresponding - // operand's resource IDs. - for (auto result : llvm::enumerate(while_op.getResults())) { - if (!mlir::getElementTypeOrSelf(result.value().getType()) - .isa()) { - continue; - } - auto passthrough_arg = body_info.GetArg(result.index()); - if (passthrough_arg) { - forward_input_to_output( - while_op.getOperand(passthrough_arg.getValue()), result.value()); - } else { - AddValueUniqueIDMapping(result.value(), kUnknownResourceId); - } - } - } else if (auto if_op = llvm::dyn_cast(op)) { - const auto& then_info = - backtrack_analysis.GetAnalysisForFunc(if_op.then_func()); - const auto& else_info = - backtrack_analysis.GetAnalysisForFunc(if_op.else_func()); - // If a result is a passthrough of both branches' inputs, merge the - // resource IDs of corresponding operands for the two inputs. - for (auto result : llvm::enumerate(if_op.getResults())) { - if (!mlir::getElementTypeOrSelf(result.value().getType()) - .isa()) { - continue; - } - auto passthrough_then_arg = then_info.GetArg(result.index()); - auto passthrough_else_arg = else_info.GetArg(result.index()); - if (passthrough_then_arg && passthrough_else_arg) { - Value then_operand = if_op.input()[passthrough_then_arg.getValue()]; - Value else_operand = if_op.input()[passthrough_else_arg.getValue()]; - forward_input_to_output(then_operand, result.value()); - forward_input_to_output(else_operand, result.value()); - } else { - AddValueUniqueIDMapping(result.value(), kUnknownResourceId); - } - } - } else { - for (auto result : op->getResults()) { - if (!mlir::getElementTypeOrSelf(result.getType()) - .isa()) - continue; - AddValueUniqueIDMapping(result, kUnknownResourceId); - } - } - }); -} - -bool ResourceAliasAnalysisInfo::IsUnknownResource(const Value resource) const { - auto it = resource_value_to_ids_.find(resource); - assert(it != resource_value_to_ids_.end() && !it->getSecond().empty()); - // The set is sorted so we only need to check the first element since - // kUnknownResourceId < 0. - static_assert(kUnknownResourceId < 0, - "kUnknownResourceId should be negative"); - return *it->getSecond().begin() == kUnknownResourceId; -} - -const llvm::SmallSet& -ResourceAliasAnalysisInfo::GetResourceUniqueIds(Value resource) const { - auto it = resource_value_to_ids_.find(resource); - assert(it != resource_value_to_ids_.end() && "Unseen resource was queried"); - return it->getSecond(); -} - -const llvm::SmallSetVector& -ResourceAliasAnalysisInfo::GetUniqueIdResources(const int64_t id) const { - auto it = id_to_resource_values_.find(id); - assert(it != id_to_resource_values_.end() && "Unseen id was queried"); - return it->getSecond(); -} - -llvm::SmallSetVector ResourceAliasAnalysisInfo::GetResourceAliases( - const Value resource) const { - assert(!IsUnknownResource(resource) && "Unseen resource was queried"); - llvm::SmallSetVector aliases; - for (int64_t id : GetResourceUniqueIds(resource)) { - const llvm::SmallSetVector& resources_aliasing_id = - GetUniqueIdResources(id); - aliases.insert(resources_aliasing_id.begin(), resources_aliasing_id.end()); - } - return aliases; -} - -} // namespace detail - -//===----------------------------------------------------------------------===// -// ResourceAliasAnalysis -//===----------------------------------------------------------------------===// - -ResourceAliasAnalysis::ResourceAliasAnalysis(Operation* op) { - auto module = dyn_cast(op); - assert(module); - - // Analyze all regions for backtracking info. - detail::BacktrackAnalysis backtrack_analysis(module); - - // Analyze each function. - for (auto func : module.getOps()) - this->info_map_.try_emplace(func, func, backtrack_analysis); -} - -namespace { +constexpr auto kUnknownResourceId = + ResourceAliasAnalysis::Info::kUnknownResourceId; //===----------------------------------------------------------------------===// // SideEffectAnalysisInfo helper functions. diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h index 69334d4ce77..c92c6e1882c 100644 --- a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h +++ b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h @@ -20,99 +20,19 @@ limitations under the License. #include #include -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" -#include "mlir/IR/Function.h" // from @llvm-project -#include "mlir/IR/Module.h" // from @llvm-project #include "mlir/IR/Operation.h" // from @llvm-project #include "mlir/IR/Region.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h" namespace mlir { namespace TF { - namespace detail { -// This template defines an aggregate analysis base class, which analyzes a -// module but the analysis info is stored per function. -template -class PerFunctionAggregateAnalysis { - public: - using Info = InfoT; - - // Returns the analysis info for the given function. - const Info& GetAnalysisForFunc(FuncOp func) const { - auto it = info_map_.find(func); - assert(it != info_map_.end()); - return it->second; - } - - protected: - llvm::SmallDenseMap info_map_; -}; - -class BacktrackAnalysis; - -// Resource alias analysis information for a single function. -class ResourceAliasAnalysisInfo { - public: - // Constructs analysis info by analyzing the given function. - ResourceAliasAnalysisInfo(FuncOp func, - const BacktrackAnalysis& backtrack_analysis); - - ResourceAliasAnalysisInfo(ResourceAliasAnalysisInfo&&) = default; - - // Returns if the analysis fails to resolve a resource-type value. - bool IsUnknownResource(const Value resource) const; - - // Returns the set unique IDs which `resource` could alias. Requires that - // IsUnknownResource(resource) == false. - const llvm::SmallSet& GetResourceUniqueIds(Value resource) const; - - // Returns the set of values that are potentially aliases of `value`. Requires - // that IsUnknownResource(resource) == false. - llvm::SmallSetVector GetResourceAliases(Value resource) const; - - private: - // Maps resource value to unique ID and vice-versa. - void AddValueUniqueIDMapping(Value value, int64_t id) { - resource_value_to_ids_[value].insert(id); - id_to_resource_values_[id].insert(value); - } - - // Returns the set unique Values which map to `id`. - const llvm::SmallSetVector& GetUniqueIdResources(int64_t id) const; - - // Maps each resource-type value to a set of unique IDs that it could alias. - llvm::SmallDenseMap, 8> - resource_value_to_ids_; - - // Maps each unique ID to a set of resource-type values that could alias to - // it. This is inverse of `resource_value_to_ids_` map. - llvm::SmallDenseMap, 8> - id_to_resource_values_; -}; - -} // namespace detail - -// An analysis that runs on a module and maps each resource-type value to a -// set of unique IDs representing the possible resources it could alias. -// -// Note that this is not an inter-procedural or inter-regional analysis, i.e., -// each function and region are handled separately and cross-function or cross- -// region aliasing cannot be checked by this analysis. -class ResourceAliasAnalysis : public detail::PerFunctionAggregateAnalysis< - detail::ResourceAliasAnalysisInfo> { - public: - // Constructs analysis by analyzing the given module operation. - explicit ResourceAliasAnalysis(Operation* op); -}; - -namespace detail { // Side effect analysis info for a single function. class SideEffectAnalysisInfo { public: @@ -213,27 +133,6 @@ class SideEffectAnalysis : public detail::PerFunctionAggregateAnalysis< explicit SideEffectAnalysis(Operation* op); }; -// Base CRTP class to help write passes that are consumes a per-function -// aggregate analysis and operate on all non-extern functions (similar to a -// FunctionPass, but with no concurrency between functions). The derived classes -// need to provide a runOnFunction() method that accepts the function and the -// analysis information for that function. -template -class PerFunctionAggregateAnalysisConsumerPass - : public PassWrapper< - PerFunctionAggregateAnalysisConsumerPass, - OperationPass> { - void runOnOperation() override { - ModuleOp op = this->getOperation(); - DerivedT& derived = *static_cast(this); - auto& analysis = this->template getAnalysis(); - - for (auto func : op.getOps()) - if (!func.isExternal()) - derived.runOnFunction(func, analysis.GetAnalysisForFunc(func)); - } -}; - } // namespace TF } // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/resource_device_inference.cc b/tensorflow/compiler/mlir/tensorflow/transforms/resource_device_inference.cc index 262c7ec983c..7e8e9ee30c8 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/resource_device_inference.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/resource_device_inference.cc @@ -36,7 +36,7 @@ limitations under the License. #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Pass/PassRegistry.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project -#include "tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h" +#include "tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" From 56bb3cc5e8542de66dabb1de34501bb9abbdfb09 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Fri, 31 Jul 2020 19:25:28 +0000 Subject: [PATCH 1870/2522] fix tensor_map_test dependencies --- tensorflow/core/kernels/BUILD | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 28f651fb33c..11d3b330600 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2975,8 +2975,9 @@ tf_cc_tests( ":tensor_map", "//tensorflow/core:test", "//tensorflow/core:test_main", - "//tensorflow/core/framework:tensor_testutil", + "//tensorflow/core:framework", "@com_google_absl//absl/strings", + "@com_google_absl//absl/container:flat_hash_map", ], ) From 7e7d62b73532a72a7136bd2a77bfcef5161ea7ae Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Fri, 31 Jul 2020 12:05:20 -0700 Subject: [PATCH 1871/2522] [XLA:PJRT] Add optional platform-specific fingerprint to PjRtExecutable. If implemented by the client, this fingerprint is used as the executable's launch ID. PiperOrigin-RevId: 324256013 Change-Id: I2288dad54dc5ba73d3d65cb71d7dd1e54e14b048 --- tensorflow/compiler/xla/pjrt/pjrt_client.cc | 12 ++++++++---- tensorflow/compiler/xla/pjrt/pjrt_client.h | 18 ++++++++++++++++++ tensorflow/compiler/xla/python/BUILD | 1 + tensorflow/compiler/xla/python/py_client.cc | 6 +++++- .../compiler/xla/python/py_executable.cc | 17 +++++++++++++++-- tensorflow/compiler/xla/python/py_executable.h | 8 +++++++- tensorflow/core/platform/fingerprint.h | 9 +++++++++ 7 files changed, 63 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/xla/pjrt/pjrt_client.cc b/tensorflow/compiler/xla/pjrt/pjrt_client.cc index 06ec69f44c1..126b74b9b98 100644 --- a/tensorflow/compiler/xla/pjrt/pjrt_client.cc +++ b/tensorflow/compiler/xla/pjrt/pjrt_client.cc @@ -1610,6 +1610,10 @@ StatusOr PjRtExecutable::EnqueueExecution( run_options.set_run_id(run_id); run_options.set_rng_seed(device_state->GetNewPrngSeed()); run_options.set_gpu_executable_run_options(client_->gpu_run_options()); + run_options.set_launch_id(options.launch_id); + if (run_options.launch_id() != 0) { + VLOG(1) << "launch id for " << name() << ": " << run_options.launch_id(); + } // The choice of where we wait is arbitrary; the reason for the wait is // pacing to avoid problems such as memory fragmentation and running ahead @@ -2138,13 +2142,13 @@ StatusOr, Shape>> GetShardedProgramShapes( client->client()->Compile(computation, argument_layout_pointers, build_options)); - auto py_executable = absl::make_unique( + auto executable = absl::make_unique( std::move(local_executables), options.parameter_is_tupled_arguments, std::move(device_assignment), std::move(local_logical_device_ids), std::move(local_devices), client); - TF_RETURN_IF_ERROR(py_executable->SetUpDonation( - client, options.parameter_is_tupled_arguments)); - return py_executable; + TF_RETURN_IF_ERROR( + executable->SetUpDonation(client, options.parameter_is_tupled_arguments)); + return executable; } } // namespace xla diff --git a/tensorflow/compiler/xla/pjrt/pjrt_client.h b/tensorflow/compiler/xla/pjrt/pjrt_client.h index b234027adf3..bb9093a8bf7 100644 --- a/tensorflow/compiler/xla/pjrt/pjrt_client.h +++ b/tensorflow/compiler/xla/pjrt/pjrt_client.h @@ -119,6 +119,8 @@ struct PjRtCrossHostRecvBuffer { using PjRtCrossHostRecvNotifier = std::function>&&)>; +class PjRtExecutable; + // Encapsulates the state of Python session with XLA. // // It is the responsibility of the client of this API to keep the PjRtClient @@ -181,6 +183,13 @@ class PjRtClient { virtual StatusOr> GetParametersThatMustBeDonated( const LocalExecutable& executable, bool tuple_inputs) const; + // Generates a unique fingerprint for `executable`. See + // PjRtExecutable::fingerprint_. + virtual StatusOr> ExecutableFingerprint( + const PjRtExecutable& executable) const { + return absl::optional(); + } + protected: friend class PjRtBuffer; virtual void EnqueueCrossHostReceive( @@ -668,6 +677,11 @@ struct ExecuteOptions { // If true, the computation must return a tuple, which will be destructured // into its elements. bool untuple_result = false; + // If non-zero, identifies this execution as part of a potentially + // multi-device launch. This can be used to detect scheduling errors, e.g. if + // multi-host programs are launched in different orders on different hosts, + // the launch IDs may be used by the runtime to detect the mismatch. + int32 launch_id = 0; }; // Represents a compiled computation that can be executed given handles to @@ -687,6 +701,8 @@ class PjRtExecutable { std::vector> local_logical_device_ids, std::vector local_devices, PjRtClient* client); + virtual ~PjRtExecutable() = default; + PjRtClient* client() const { return client_; } int num_replicas() const { @@ -744,12 +760,14 @@ class PjRtExecutable { // Initializes information about which arguments to which executables must be // donated due to aliases that were specified by the computation. Status SetUpDonation(PjRtClient* client, bool tuple_inputs); + StatusOr EnqueueExecution( absl::Span argument_handles, int replica, int partition, int executable_idx, const RunId& run_id, const ExecuteOptions& options, Device* device, std::vector* device_buffers, std::shared_ptr device_assignment) const; + StatusOr>> ExecuteHelper( absl::Span argument_handles, int replica, int partition, const RunId& run_id, const ExecuteOptions& options, diff --git a/tensorflow/compiler/xla/python/BUILD b/tensorflow/compiler/xla/python/BUILD index b8f9b8e57ca..aa55a39218d 100644 --- a/tensorflow/compiler/xla/python/BUILD +++ b/tensorflow/compiler/xla/python/BUILD @@ -202,6 +202,7 @@ cc_library( "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla/pjrt:pjrt_client", + "//tensorflow/core/platform:fingerprint", "//tensorflow/core/profiler:protos_all_cc", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:flat_hash_map", diff --git a/tensorflow/compiler/xla/python/py_client.cc b/tensorflow/compiler/xla/python/py_client.cc index f0273d5ed4b..1f07c6e2042 100644 --- a/tensorflow/compiler/xla/python/py_client.cc +++ b/tensorflow/compiler/xla/python/py_client.cc @@ -124,15 +124,19 @@ StatusOr> PyClient::BufferFromPyval( StatusOr> PyClient::Compile( const XlaComputation& computation, CompileOptions options) { std::unique_ptr executable; + absl::optional fingerprint; { py::gil_scoped_release gil_release; TF_ASSIGN_OR_RETURN(executable, PjRtExecutable::Compile(computation, pjrt_client_.get(), std::move(options))); + TF_ASSIGN_OR_RETURN(fingerprint, + pjrt_client_->ExecutableFingerprint(*executable)); } auto traceback = Traceback::Get(); return std::make_unique( - shared_from_this(), std::move(executable), std::move(traceback)); + shared_from_this(), std::move(executable), std::move(traceback), + std::move(fingerprint)); } class ProfileBuilder { diff --git a/tensorflow/compiler/xla/python/py_executable.cc b/tensorflow/compiler/xla/python/py_executable.cc index c56fd3a89fc..b2cd2af56ea 100644 --- a/tensorflow/compiler/xla/python/py_executable.cc +++ b/tensorflow/compiler/xla/python/py_executable.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/compiler/xla/python/py_executable.h" #include "absl/algorithm/container.h" +#include "tensorflow/core/platform/fingerprint.h" namespace xla { @@ -23,10 +24,12 @@ namespace py = pybind11; PyExecutable::PyExecutable(std::shared_ptr client, std::unique_ptr executable, - std::shared_ptr traceback) + std::shared_ptr traceback, + absl::optional fingerprint) : client_(std::move(client)), executable_(std::move(executable)), - traceback_(std::move(traceback)) { + traceback_(std::move(traceback)), + fingerprint_(std::move(fingerprint)) { CHECK(PyGILState_Check()); next_ = client_->executables_; client_->executables_ = this; @@ -34,6 +37,10 @@ PyExecutable::PyExecutable(std::shared_ptr client, if (next_) { next_->prev_ = this; } + if (fingerprint_) { + VLOG(1) << "Fingerprint for executable " << executable_->name() << ": " + << *fingerprint_; + } } PyExecutable::~PyExecutable() { @@ -65,6 +72,9 @@ StatusOr>> PyExecutable::Execute( py::gil_scoped_release gil_release; ExecuteOptions options; options.untuple_result = true; + if (fingerprint_) { + options.launch_id = tensorflow::Fingerprint32(*fingerprint_); + } std::vector arg_buffers(args.size()); absl::c_transform(args, arg_buffers.begin(), [](PyBuffer* buf) { return buf->buffer(); }); @@ -89,6 +99,9 @@ PyExecutable::ExecuteOnLocalDevices( py::gil_scoped_release gil_release; ExecuteOptions options; options.untuple_result = true; + if (fingerprint_) { + options.launch_id = tensorflow::Fingerprint32(*fingerprint_); + } std::vector> arg_buffers(args.size()); for (int computation = 0; computation < args.size(); ++computation) { arg_buffers[computation].resize(args[computation].size()); diff --git a/tensorflow/compiler/xla/python/py_executable.h b/tensorflow/compiler/xla/python/py_executable.h index 7f35f97f6e9..1051d065335 100644 --- a/tensorflow/compiler/xla/python/py_executable.h +++ b/tensorflow/compiler/xla/python/py_executable.h @@ -37,7 +37,8 @@ class PyExecutable { public: PyExecutable(std::shared_ptr client, std::unique_ptr executable, - std::shared_ptr traceback); + std::shared_ptr traceback, + absl::optional fingerprint); ~PyExecutable(); std::shared_ptr client() const { return client_; } @@ -71,6 +72,11 @@ class PyExecutable { std::unique_ptr executable_; std::shared_ptr traceback_; + // Identical executables (i.e. representing the same program) will have the + // same fingerprint. nullopt on platforms or executables where fingerprints + // aren't implemented. + absl::optional fingerprint_; + // Doubly-linked list of all executables known to the client. Protected by the // GIL. PyExecutable* next_; diff --git a/tensorflow/core/platform/fingerprint.h b/tensorflow/core/platform/fingerprint.h index b1260615580..cebb0679f0d 100644 --- a/tensorflow/core/platform/fingerprint.h +++ b/tensorflow/core/platform/fingerprint.h @@ -90,6 +90,15 @@ inline uint64 Fingerprint64(const StringPiece s) { #endif } +// 32-bit variant of Fingerprint64 above (same properties and caveats apply). +inline uint32 Fingerprint32(const StringPiece s) { +#ifdef USE_OSS_FARMHASH + return ::util::Fingerprint32(s.data(), s.size()); +#else + return farmhash::Fingerprint32(s.data(), s.size()); +#endif +} + // 128-bit variant of Fingerprint64 above (same properties and caveats apply). inline Fprint128 Fingerprint128(const StringPiece s) { #ifdef USE_OSS_FARMHASH From 2dbca54575bd46d221d7c2f6e0f2a57410834a24 Mon Sep 17 00:00:00 2001 From: Jonathan Chu Date: Fri, 31 Jul 2020 19:32:19 +0000 Subject: [PATCH 1872/2522] Most modifications so that nest.flatten() is called once between _convert_numpy_inputs and _filtered_call --- tensorflow/python/eager/def_function.py | 7 +-- tensorflow/python/eager/function.py | 68 ++++++++++++++----------- 2 files changed, 43 insertions(+), 32 deletions(-) diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py index efc648a2f0c..a3bc0516403 100644 --- a/tensorflow/python/eager/def_function.py +++ b/tensorflow/python/eager/def_function.py @@ -855,11 +855,11 @@ class Function(object): # stateless function. return self._stateless_fn(*args, **kwds) else: - canon_args, canon_kwds = \ + _, _, flat_args, flat_kwds = \ self._stateful_fn._function_spec.canonicalize_function_inputs( # pylint: disable=protected-access *args, **kwds) # If we did not create any variables the trace we have is good enough. - return self._concrete_stateful_fn._filtered_call(canon_args, canon_kwds) # pylint: disable=protected-access + return self._concrete_stateful_fn._filtered_call(flat_args, flat_kwds) # pylint: disable=protected-access def fn_with_cond(*inner_args, **inner_kwds): """Conditionally runs initialization if it's needed.""" @@ -914,9 +914,10 @@ class Function(object): # We've created variables and are unable to lift the initialization graphs, # so we fall back to initializing with conds while running the function. - canon_args, canon_kwds = \ + canon_args, canon_kwds, _, _ = \ self._stateful_fn._function_spec.canonicalize_function_inputs( # pylint: disable=protected-access *args, **kwds) + # TODO(jlchu): fix arguments for this, two cases for fn_with_cond return function_lib.defun(fn_with_cond)(*canon_args, **canon_kwds) @property diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 53d4b62b9b5..41b9bbaa13e 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -1747,12 +1747,12 @@ class ConcreteFunction(object): TypeError: if `args` and `kwargs` do not match the structured signature of this `ConcreteFunction`. """ - args, kwargs = self._function_spec.canonicalize_function_inputs( - *args, **kwargs) + args, kwargs, flat_args, flat_kwargs = \ + self._function_spec.canonicalize_function_inputs(*args, **kwargs) self._structured_signature_check_missing_args(args, kwargs) self._structured_signature_check_unexpected_args(args, kwargs) self._structured_signature_check_arg_types(args, kwargs) - return self._filtered_call(args, kwargs, cancellation_manager) + return self._filtered_call(flat_args, flat_kwargs, cancellation_manager) def _structured_signature_check_missing_args(self, args, kwargs): """Raises a TypeError if any args are missing.""" @@ -1834,24 +1834,27 @@ class ConcreteFunction(object): type(spec_piece).__name__, spec_piece, name, type(arg_piece).__name__, arg_piece)) - def _filtered_call(self, args, kwargs, cancellation_manager=None): + def _filtered_call(self, flat_args, flat_kwargs, cancellation_manager=None): """Executes the function, filtering arguments from the Python function. Objects aside from Tensors, CompositeTensors, and Variables are ignored. - CompositeTensors are expanded into their components. + CompositeTensors have been expanded into their components on input. Args: - args: Canonicalized positional arguments of the Python function. - kwargs: Canonicalized keyword arguments of the Python function. + flat_args: Flattened canonicalized positional arguments of the Python + function. + flat_kwargs: Flattened canonicalized keyword arguments of the Python + function. cancellation_manager: (Optional.) A `CancellationManager` that can be used to cancel function invocation. Returns: The result of applying the function on the Tensors/Variables contained in - `args` and `kwargs`. + `flat_args` and `flat_kwargs`. """ return self._call_flat( - [t for t in nest.flatten((args, kwargs), expand_composites=True) + [t for t in flat_args + flat_kwargs \ + # TODO(jlchu): delete when final [t for t in nest.flatten((args, kwargs), expand_composites=True) if isinstance(t, (ops.Tensor, resource_variable_ops.BaseResourceVariable))], captured_inputs=self.captured_inputs, @@ -2590,7 +2593,7 @@ class FunctionSpec(object): """Canonicalizes `args` and `kwargs`. Canonicalize the inputs to the Python function using a `FunctionSpec` - instance. In particular, we parse the varags and kwargs that the + instance. In particular, we parse the varargs and kwargs that the original function was called with into a tuple corresponding to the Python function's positional (named) arguments and a dictionary corresponding to its kwargs. Missing default arguments are added. @@ -2607,8 +2610,9 @@ class FunctionSpec(object): Returns: A canonicalized ordering of the inputs representened by a tuple in the - form (args, kwargs). Here: `args` is a full list of bound arguments, and - `kwargs` contains only true keyword arguments, as opposed to named + form (args, kwargs), followed by their flattened versions in the form + (flat_args, flat_kwargs). Here: `args` is a full list of bound arguments, + and `kwargs` contains only true keyword arguments, as opposed to named arguments called in a keyword-like fashion. Raises: @@ -2689,16 +2693,16 @@ class FunctionSpec(object): kwargs.setdefault(kwarg, default) if self._input_signature is None: - inputs = _convert_numpy_inputs(inputs) - kwargs = _convert_numpy_inputs(kwargs) - return inputs, kwargs + inputs, flat_inputs = _convert_numpy_inputs(inputs) + kwargs, flat_kwargs = _convert_numpy_inputs(kwargs) + return inputs, kwargs, flat_inputs, flat_kwargs else: assert not kwargs inputs = _convert_inputs_to_signature( inputs, self._input_signature, self._flat_input_signature) - return inputs, {} + return inputs, {}, flat_inputs, {} ## TODO(jlchu): Check if last should be a dict def _as_ndarray(value): @@ -2723,9 +2727,10 @@ def _is_ndarray(value): def _convert_numpy_inputs(inputs): """Convert numpy array inputs to tensors.""" + ## TODO(jlchu): Modify/delete comment when change is final!!! # We assume that any CompositeTensors have already converted their components # from numpy arrays to Tensors, so we don't need to expand composites here. - flat_inputs = nest.flatten(inputs, expand_composites=False) + flat_inputs = nest.flatten(inputs, expand_composites=True) # Check for NumPy arrays in arguments and convert them to Tensors. # TODO(nareshmodi): Skip ndarray conversion to tensor altogether, perhaps @@ -2741,10 +2746,11 @@ def _convert_numpy_inputs(inputs): flat_inputs[index] = constant_op.constant(a) need_packing = True if need_packing: - return nest.pack_sequence_as( - structure=inputs, flat_sequence=flat_inputs, expand_composites=False) + return (nest.pack_sequence_as( + structure=inputs, flat_sequence=flat_inputs, expand_composites=True), + flat_inputs) else: - return inputs + return inputs, flat_inputs def _convert_inputs_to_signature(inputs, input_signature, flat_input_signature): @@ -2921,8 +2927,9 @@ class Function(object): def __call__(self, *args, **kwargs): """Calls a graph function specialized to the inputs.""" with self._lock: - graph_function, args, kwargs = self._maybe_define_function(args, kwargs) - return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access + graph_function, flat_args, flat_kwargs = \ + self._maybe_define_function(args, kwargs) + return graph_function._filtered_call(flat_args, flat_kwargs) # pylint: disable=protected-access @property def python_function(self): @@ -2998,7 +3005,7 @@ class Function(object): (str(args), str(self.input_signature))) args, kwargs = None, None with self._lock: - graph_function, args, kwargs = self._maybe_define_function(args, kwargs) + graph_function, _, _ = self._maybe_define_function(args, kwargs) seen_names = set() captured = object_identity.ObjectIdentitySet( graph_function.graph.internal_captures) @@ -3267,7 +3274,7 @@ class Function(object): Returns: A graph function corresponding to the input signature implied by args and - kwargs, as well as the inputs that the object should be called with. + kwargs, as well as flattened inputs that the object should be called with. Raises: ValueError: If inputs are incompatible with the input signature. @@ -3276,8 +3283,8 @@ class Function(object): shape relaxation retracing. """ if self.input_signature is None or args is not None or kwargs is not None: - args, kwargs = self._function_spec.canonicalize_function_inputs( - *args, **kwargs) + args, kwargs, flat_args, flat_kwargs = \ + self._function_spec.canonicalize_function_inputs(*args, **kwargs) cache_key = self._cache_key(args, kwargs) @@ -3290,7 +3297,7 @@ class Function(object): graph_function = self._function_cache.primary.get(cache_key, None) if graph_function is not None: - return graph_function, args, kwargs + return graph_function, flat_args, flat_kwargs logging.vlog(1, "Creating new FuncGraph for Python function %r (key: %r)", @@ -3316,7 +3323,10 @@ class Function(object): if (self._experimental_relax_shapes and self.input_signature is None and call_context_key in self._function_cache.missed): - return self._define_function_with_shape_relaxation(args, kwargs) + return_function, _, _ = \ + self.define_function_with_shape_relaxation(args, kwargs) + #TODO(jlchu): Investigate modifying above function sig directly + return return_function, flat_args, flat_kwargs self._function_cache.missed.add(call_context_key) graph_function = self._create_graph_function(args, kwargs) @@ -3325,7 +3335,7 @@ class Function(object): if ops.get_default_graph()._distribution_strategy_stack: self._traced_with_distribution_strategy = True - return graph_function, args, kwargs + return graph_function, flat_args, flat_kwargs def register(func, *args, **kwargs): From e4866b3bc1ed0c5bbada17eda90d0c67a61d311b Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Fri, 31 Jul 2020 19:34:06 +0000 Subject: [PATCH 1873/2522] fix tensor_map_test_dependencies --- tensorflow/core/kernels/BUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 11d3b330600..c9a209be999 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2973,11 +2973,11 @@ tf_cc_tests( ], deps = [ ":tensor_map", + "//tensorflow/core:framework", "//tensorflow/core:test", "//tensorflow/core:test_main", - "//tensorflow/core:framework", - "@com_google_absl//absl/strings", "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/strings", ], ) From 22cad1b6d91ec3478a6d67e1b8698cb439538d5d Mon Sep 17 00:00:00 2001 From: Doe Hyun Yoon Date: Fri, 31 Jul 2020 12:17:55 -0700 Subject: [PATCH 1874/2522] Lower priority of Merge nodes in LIFO scheduler For a subgraph like Switch - Merge, without control dependency annotation, VirtualScheduler schedules both outputs of Switch; unlike FIFO or FirstReady, LIFO choses one output path of Switch, keep executing those nodes, and when it reaches Merge, we schedules the Merge node (as it's Last In); then the other path of the Switch is scheduled after the Merge; and this scheduling is wrong in terms of control dependency. It is only a problem in VirtualScheduler, as in real execution, only one output path of Swtich will run. This CL puts Merge node at the beginning of the node queue in LIFOManager; that way Merge is scheduled with the lowest priority. All the inputs of Merge will be scheduled before the Merge. It's possible that Merge is not for Switch-Merge, more commonly, Merge is for implementing while loop. When there are many NextIteration-Merge pairs, all the Merge nodes will be in the lowest priority; and it makes sense as proceeding to the next iteration better be (loosely) sync'ed among many NextIteration-Merge pairs. Note that it is only for LIFOManager; VirtualScheduler by default uses FirstReady, so it doesn't change the behavior unless LIFO or Composite is specifically used. PiperOrigin-RevId: 324258814 Change-Id: Icaab672b349ce4cfd1195cf8fe10cf3b69c9fe56 --- .../core/grappler/costs/virtual_scheduler.cc | 14 +++++++++++++ .../core/grappler/costs/virtual_scheduler.h | 2 +- .../grappler/costs/virtual_scheduler_test.cc | 20 +++++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc index 2a33806719d..392eff98c78 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler.cc +++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc @@ -111,6 +111,20 @@ void UpdateDeviceAnnotationState(const NodeDef* node, } // namespace +void LIFOManager::AddNode(const NodeDef* node) { + // Merge nodes are scheduled with the lowest priority in LIFO manager; virtual + // scheduler may run multiple input nodes of Merge (when we don't have + // annotation, which is quite common); simply scheduling Merge after one of + // its input may break scheduling constraints; some inputs of Merge may be + // scheduled after the Merge. So, we place Merge at the beginning of the queue + // to guarantee all the inputs of Merge are scheduled before the Merge. + if (IsMerge(*node)) { + nodes_.push_front(node); + } else { + nodes_.push_back(node); + } +} + const NodeDef* LIFOManager::GetCurrNode() { CHECK(!nodes_.empty()) << "GetCurrNode(), but there's no ready node"; if (curr_pos_ == nodes_.end()) { diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.h b/tensorflow/core/grappler/costs/virtual_scheduler.h index 70f00f53927..0e15b9842a1 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler.h +++ b/tensorflow/core/grappler/costs/virtual_scheduler.h @@ -195,7 +195,7 @@ class LIFOManager : public ReadyNodeManager { public: LIFOManager() : ReadyNodeManager() {} ~LIFOManager() override {} - void AddNode(const NodeDef* node) override { nodes_.push_back(node); } + void AddNode(const NodeDef* node) override; const NodeDef* GetCurrNode() override; void RemoveCurrNode() override; bool Empty() const override { return nodes_.empty(); } diff --git a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc index 3a332ff03db..cca91d8fe77 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc +++ b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc @@ -205,6 +205,26 @@ TEST_F(ReadyNodeManagerTest, AddAndRemoveMultipleLIFOManager) { EXPECT_TRUE(manager.Empty()); } +TEST_F(ReadyNodeManagerTest, MergeOrderInLIFOManager) { + LIFOManager manager = LIFOManager(); + node3_.set_op("Merge"); + manager.AddNode(&node1_); + manager.AddNode(&node2_); + manager.AddNode(&node3_); + manager.AddNode(&node4_); + + // Merge node (node3) will be scheduled at the end (even though it's added + // after nodde2). + EXPECT_EQ(manager.GetCurrNode()->name(), "Node4"); + manager.RemoveCurrNode(); + EXPECT_EQ(manager.GetCurrNode()->name(), "Node2"); + manager.RemoveCurrNode(); + EXPECT_EQ(manager.GetCurrNode()->name(), "Node1"); + manager.RemoveCurrNode(); + EXPECT_EQ(manager.GetCurrNode()->name(), "Node3"); + manager.RemoveCurrNode(); +} + TEST_F(ReadyNodeManagerTest, GetSingleNodeFirstReadyManager) { FirstReadyManager manager; TF_EXPECT_OK(manager.Init(&node_states_)); From de32abb6c4c9a8e735171568f23660be6f7e02ed Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 31 Jul 2020 12:19:51 -0700 Subject: [PATCH 1875/2522] Change the namespace for core boosted trees quantiles. PiperOrigin-RevId: 324259213 Change-Id: I0e9ca90a49dc0520f5642da071a5e1a7bb163db8 --- .../core/kernels/boosted_trees/quantile_ops.cc | 12 ++++++++---- .../quantiles/quantile_stream_resource.h | 2 +- .../quantiles/weighted_quantiles_buffer.h | 4 ++-- .../quantiles/weighted_quantiles_buffer_test.cc | 6 +++--- .../quantiles/weighted_quantiles_stream.h | 4 ++-- .../quantiles/weighted_quantiles_stream_test.cc | 9 ++++----- .../quantiles/weighted_quantiles_summary.h | 4 ++-- .../quantiles/weighted_quantiles_summary_test.cc | 14 +++++++------- 8 files changed, 29 insertions(+), 26 deletions(-) diff --git a/tensorflow/core/kernels/boosted_trees/quantile_ops.cc b/tensorflow/core/kernels/boosted_trees/quantile_ops.cc index 0065bdd66aa..ad1e5a47f00 100644 --- a/tensorflow/core/kernels/boosted_trees/quantile_ops.cc +++ b/tensorflow/core/kernels/boosted_trees/quantile_ops.cc @@ -51,12 +51,12 @@ const char* const kResourceHandleName = "quantile_stream_resource_handle"; using QuantileStreamResource = BoostedTreesQuantileStreamResource; using QuantileStream = - boosted_trees::quantiles::WeightedQuantilesStream; + core_boosted_trees::quantiles::WeightedQuantilesStream; using QuantileSummary = - boosted_trees::quantiles::WeightedQuantilesSummary; + core_boosted_trees::quantiles::WeightedQuantilesSummary; using QuantileSummaryEntry = - boosted_trees::quantiles::WeightedQuantilesSummary::SummaryEntry; + core_boosted_trees::quantiles::WeightedQuantilesSummary< + float, float>::SummaryEntry; // Generates quantiles on a finalized QuantileStream. std::vector GenerateBoundaries(const QuantileStream& stream, @@ -421,6 +421,10 @@ class BoostedTreesQuantileStreamResourceFlushOp : public OpKernel { generate_quantiles_ ? GenerateQuantiles(*stream, num_buckets) : GenerateBoundaries(*stream, num_buckets), stream_idx); + VLOG(1) << "Generated " + << stream_resource->boundaries(stream_idx).size() + << " boundaries. Num buckets: " << num_buckets + << " Generate quantiles: " << generate_quantiles_; } }; diff --git a/tensorflow/core/kernels/boosted_trees/quantiles/quantile_stream_resource.h b/tensorflow/core/kernels/boosted_trees/quantiles/quantile_stream_resource.h index 10afc9ee618..d2e3720aff5 100644 --- a/tensorflow/core/kernels/boosted_trees/quantiles/quantile_stream_resource.h +++ b/tensorflow/core/kernels/boosted_trees/quantiles/quantile_stream_resource.h @@ -24,7 +24,7 @@ namespace tensorflow { using QuantileStream = - boosted_trees::quantiles::WeightedQuantilesStream; + core_boosted_trees::quantiles::WeightedQuantilesStream; // Quantile Stream Resource for a list of streams sharing the same number of // quantiles, maximum elements, and epsilon. diff --git a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_buffer.h b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_buffer.h index 07aa9831c44..5f7f74ec7bb 100644 --- a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_buffer.h +++ b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_buffer.h @@ -23,7 +23,7 @@ #include "tensorflow/core/platform/types.h" namespace tensorflow { -namespace boosted_trees { +namespace core_boosted_trees { namespace quantiles { // Buffering container ideally suited for scenarios where we need @@ -126,7 +126,7 @@ constexpr decltype(CompareFn()) WeightedQuantilesBuffer::kCompFn; } // namespace quantiles -} // namespace boosted_trees +} // namespace core_boosted_trees } // namespace tensorflow #endif // TENSORFLOW_CORE_KERNELS_BOOSTED_TREES_QUANTILES_WEIGHTED_QUANTILES_BUFFER_H_ diff --git a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_buffer_test.cc b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_buffer_test.cc index 29e28811225..d99a6816f9b 100644 --- a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_buffer_test.cc +++ b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_buffer_test.cc @@ -22,10 +22,10 @@ namespace tensorflow { namespace { using Buffer = - boosted_trees::quantiles::WeightedQuantilesBuffer; + core_boosted_trees::quantiles::WeightedQuantilesBuffer; using BufferEntry = - boosted_trees::quantiles::WeightedQuantilesBuffer::BufferEntry; + core_boosted_trees::quantiles::WeightedQuantilesBuffer::BufferEntry; class WeightedQuantilesBufferTest : public ::testing::Test {}; diff --git a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_stream.h b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_stream.h index 1e8d8d5e22d..6f195bc4dd2 100644 --- a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_stream.h +++ b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_stream.h @@ -24,7 +24,7 @@ #include "tensorflow/core/platform/types.h" namespace tensorflow { -namespace boosted_trees { +namespace core_boosted_trees { namespace quantiles { // Class to compute approximate quantiles with error bound guarantees for @@ -326,7 +326,7 @@ WeightedQuantilesStream::GetQuantileSpecs( } } // namespace quantiles -} // namespace boosted_trees +} // namespace core_boosted_trees } // namespace tensorflow #endif // TENSORFLOW_CORE_KERNELS_BOOSTED_TREES_QUANTILES_WEIGHTED_QUANTILES_STREAM_H_ diff --git a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_stream_test.cc b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_stream_test.cc index 6c5b9fd23bf..59173bfc2c5 100644 --- a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_stream_test.cc +++ b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_stream_test.cc @@ -23,12 +23,11 @@ namespace { using Tuple = std::tuple; using Summary = - boosted_trees::quantiles::WeightedQuantilesSummary; -using SummaryEntry = - boosted_trees::quantiles::WeightedQuantilesSummary::SummaryEntry; + core_boosted_trees::quantiles::WeightedQuantilesSummary; +using SummaryEntry = core_boosted_trees::quantiles::WeightedQuantilesSummary< + double, double>::SummaryEntry; using Stream = - boosted_trees::quantiles::WeightedQuantilesStream; + core_boosted_trees::quantiles::WeightedQuantilesStream; TEST(GetQuantileSpecs, InvalidEps) { EXPECT_DEATH({ Stream::GetQuantileSpecs(-0.01, 0L); }, "eps >= 0"); diff --git a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_summary.h b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_summary.h index a22af7ab71e..ca8eb5fd266 100644 --- a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_summary.h +++ b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_summary.h @@ -22,7 +22,7 @@ #include "tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_buffer.h" namespace tensorflow { -namespace boosted_trees { +namespace core_boosted_trees { namespace quantiles { // Summary holding a sorted block of entries with upper bound guarantees @@ -366,7 +366,7 @@ constexpr decltype(CompareFn()) WeightedQuantilesSummary::kCompFn; } // namespace quantiles -} // namespace boosted_trees +} // namespace core_boosted_trees } // namespace tensorflow #endif // TENSORFLOW_CORE_KERNELS_BOOSTED_TREES_QUANTILES_WEIGHTED_QUANTILES_SUMMARY_H_ diff --git a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_summary_test.cc b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_summary_test.cc index ccd1215cf49..0f8d1a3cbfd 100644 --- a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_summary_test.cc +++ b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_summary_test.cc @@ -21,15 +21,15 @@ namespace tensorflow { namespace { -using Buffer = boosted_trees::quantiles::WeightedQuantilesBuffer; +using Buffer = + core_boosted_trees::quantiles::WeightedQuantilesBuffer; using BufferEntry = - boosted_trees::quantiles::WeightedQuantilesBuffer::BufferEntry; + core_boosted_trees::quantiles::WeightedQuantilesBuffer::BufferEntry; using Summary = - boosted_trees::quantiles::WeightedQuantilesSummary; -using SummaryEntry = - boosted_trees::quantiles::WeightedQuantilesSummary::SummaryEntry; + core_boosted_trees::quantiles::WeightedQuantilesSummary; +using SummaryEntry = core_boosted_trees::quantiles::WeightedQuantilesSummary< + float, float>::SummaryEntry; class WeightedQuantilesSummaryTest : public ::testing::Test { protected: From 255747154861b8d9b10341347b5aa2887183422a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 31 Jul 2020 12:21:55 -0700 Subject: [PATCH 1876/2522] Move scalar multiply to the smaller side of convolution. PiperOrigin-RevId: 324259684 Change-Id: I8116f28a5973824f2b28c1b4b5f0292016b5e1f3 --- .../xla/service/algebraic_simplifier.cc | 199 ++++++++++++++++++ .../xla/service/algebraic_simplifier.h | 12 ++ .../xla/service/algebraic_simplifier_test.cc | 53 +++++ 3 files changed, 264 insertions(+) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 1f82c062df9..d77e62a1357 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -428,6 +428,10 @@ class AlgebraicSimplifierVisitor : public DfsHloRewriteVisitor { shape, hlo, zero, dims, AddReduce_computation)); } + // Move scalar multiply to the smallest side of convolution to + // reduce multiply computations. + Status ScalarMultiplyReduction(HloInstruction* dot); + // Convenience method for replacing an instruction with a bitcast. If operand // is not null, then the bitcast will use the specified operand instead of the // operand of the instruction. @@ -563,6 +567,197 @@ bool AlgebraicSimplifierVisitor::SameShape(const HloInstruction* lhs, } } +namespace { + +float GetConstantValue(HloInstruction* inst) { + switch (inst->shape().element_type()) { + case BF16: + return static_cast(inst->literal().GetFirstElement()); + case F32: + return inst->literal().GetFirstElement(); + default: + LOG(FATAL) << "Unsupported data type: " << inst->shape().element_type(); + } +} + +bool IsOpCodeMultiplyCommutative(HloOpcode opcode) { + switch (opcode) { + case HloOpcode::kMultiply: + case HloOpcode::kTranspose: + case HloOpcode::kReshape: + case HloOpcode::kSelect: + return true; + default: + return false; + } +} + +std::unique_ptr MakeScalarInstruction(HloInstruction* target, + float multiplier) { + switch (target->shape().element_type()) { + case BF16: + return HloInstruction::CreateConstant(LiteralUtil::ConvertF32ToBF16( + LiteralUtil::CreateR0(multiplier))); + break; + case F32: + return HloInstruction::CreateConstant( + LiteralUtil::CreateR0(multiplier)); + break; + default: + LOG(FATAL) << "Unsupported data type: " << target->shape().element_type(); + } +} + +} // namespace + +Status AlgebraicSimplifierVisitor::ScalarMultiplyReduction( + HloInstruction* dot) { + // We only process bfloat16 and float32 for now. + if (dot->shape().element_type() != BF16 && + dot->shape().element_type() != F32) { + return Status::OK(); + } + + auto lhs = dot->mutable_operand(0); + auto rhs = dot->mutable_operand(1); + + const int64 dot_size = ShapeUtil::ElementsIn(dot->shape()); + const int64 lhs_size = ShapeUtil::ElementsIn(lhs->shape()); + const int64 rhs_size = ShapeUtil::ElementsIn(rhs->shape()); + + HloInstruction* target = nullptr; + // (current node, user, operand_index) + std::vector> operands; + std::vector users; + + // Find which side of dot has the smallest size: + // operand 0, operand 1, or output. + if (dot_size <= std::min(lhs_size, rhs_size)) { + target = dot; + if (dot_size < lhs_size) { + operands.emplace_back(lhs, dot, 0); + } + if (dot_size < rhs_size) { + operands.emplace_back(rhs, dot, 1); + } + } else if (lhs_size <= rhs_size) { + target = lhs; + if (lhs_size < rhs_size) { + operands.emplace_back(rhs, dot, 1); + } + if (lhs_size < dot_size && dot->user_count() == 1) { + users.push_back(dot->users().front()); + } + } else { + target = rhs; + if (rhs_size < lhs_size) { + operands.emplace_back(lhs, dot, 0); + } + if (rhs_size < dot_size && dot->user_count() == 1) { + users.push_back(dot->users().front()); + } + } + + std::vector values; + + // DFS to find scalar multiply ops from the operands. + while (!operands.empty()) { + auto [inst, user, index] = operands.back(); + operands.pop_back(); + + // Skip the op types that are not commutative with multiply. + if (!IsOpCodeMultiplyCommutative(inst->opcode())) { + continue; + } + + HloInstruction* operand; + HloInstruction* multiplier; + // Pattern match a scalar multiply. + if (Match(inst, m::MultiplyAnyOrder( + m::Op(&operand), + m::Broadcast(m::ConstantScalar(&multiplier))))) { + CHECK_LT(index, user->operand_count()); + CHECK_EQ(inst, user->operands()[index]); + + // When found a scalar multiply, save its scalar value. + values.push_back(GetConstantValue(multiplier)); + // And remove the scalar multiply op. + TF_RETURN_IF_ERROR(user->ReplaceOperandWith(index, operand)); + inst = operand; + } + + // Push the operands of inst. + int64 i = 0; + for (auto* operand : inst->operands()) { + operands.emplace_back(operand, inst, i++); + } + } + + // DFS to find scalar multiply ops from the users. + while (!users.empty()) { + auto inst = users.back(); + users.pop_back(); + + if (!IsOpCodeMultiplyCommutative(inst->opcode())) { + continue; + } + + HloInstruction* operand; + HloInstruction* multiplier; + if (Match(inst, m::MultiplyAnyOrder( + m::Op(&operand), + m::Broadcast(m::ConstantScalar(&multiplier))))) { + values.push_back(GetConstantValue(multiplier)); + + TF_RETURN_IF_ERROR(inst->ReplaceAllUsesWith(operand)); + inst = operand; + } + + // Process the instructions with only one user. + // Otherwise moving scalar multiply to the operands changes the values of + // other users. + if (inst->user_count() == 1) { + users.push_back(inst->users().front()); + } + } + + if (values.empty()) { + return Status::OK(); + } + + changed_ = true; + + // Combine all constant multipliers. + float multiplier = 1.0; + for (const float v : values) { + multiplier *= v; + } + + // Create a new const scalar multiply instruction. + HloInstruction* new_const_inst; + new_const_inst = + computation_->AddInstruction(MakeScalarInstruction(target, multiplier)); + + // Broadcast the scalar multiplier. + HloInstruction* new_broadcast = computation_->AddInstruction( + HloInstruction::CreateBroadcast(target->shape(), new_const_inst, {})); + // Create a new scalar multiply instruction. + HloInstruction* new_multiply = + computation_->AddInstruction(HloInstruction::CreateBinary( + target->shape(), HloOpcode::kMultiply, target, new_broadcast)); + CHECK_EQ(new_multiply->shape(), target->shape()); + + // Update the dependency with the rest of the instructions. + if (target == lhs) { + return dot->ReplaceOperandWith(0, new_multiply); + } else if (target == rhs) { + return dot->ReplaceOperandWith(1, new_multiply); + } else { + CHECK_EQ(target, dot); + return dot->ReplaceAllUsesWith(new_multiply); + } +} + void AlgebraicSimplifierVisitor::ReplaceWithBitcast(HloInstruction* instruction, HloInstruction* operand) { CHECK_EQ(1, instruction->operand_count()); @@ -5042,6 +5237,10 @@ StatusOr AlgebraicSimplifierVisitor::SimplifyConvToDot( Status AlgebraicSimplifierVisitor::HandleConvolution( HloInstruction* convolution) { + if (options_.enable_scalar_multiply_reduction()) { + TF_RETURN_IF_ERROR(ScalarMultiplyReduction(convolution)); + } + // Zero-sized input or filter. if (ShapeUtil::IsZeroElementArray(convolution->operand(0)->shape()) || ShapeUtil::IsZeroElementArray(convolution->operand(1)->shape())) { diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.h b/tensorflow/compiler/xla/service/algebraic_simplifier.h index 9f29df3c209..9f2a3404116 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.h +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.h @@ -86,6 +86,17 @@ class AlgebraicSimplifierOptions { } bool enable_conv_operand_swap() const { return enable_conv_operand_swap_; } + // Move constant scalar multiply to one operand or output of convolutions with + // the smallest tensor size, to reduce the number of scalar multiply. + void set_enable_scalar_multiply_reduction( + bool enable_scalar_multiply_reduction) { + enable_scalar_multiply_reduction_ = enable_scalar_multiply_reduction; + } + + bool enable_scalar_multiply_reduction() const { + return enable_scalar_multiply_reduction_; + } + // If enable_window_reduce_replacement is true, the kReduceWindow instruction // can be optimized by replacement with simpler operations. void set_enable_window_reduce_to_reduce_replacement( @@ -146,6 +157,7 @@ class AlgebraicSimplifierOptions { bool enable_dot_to_multiply_rewrite_{true}; bool enable_conv_simplification_{true}; bool enable_conv_operand_swap_{true}; + bool enable_scalar_multiply_reduction_{false}; bool enable_window_reduce_to_reduce_replacement_{true}; bool enable_reduce_of_reshape_{true}; bool replace_transpose_with_bitcast_{true}; diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 034d8ec4361..90ca44714f7 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -5343,6 +5343,59 @@ ENTRY AddBroadcastZeroWithDynamicSlice { EXPECT_THAT(root->operand(1)->opcode(), HloOpcode::kPad); } +TEST_F(AlgebraicSimplifierTest, ScalarMultiplyReduction) { + const char* hlo_string = R"( +HloModule ConstScalarMultiply +ENTRY ConstScalarMultiply { + param0 = f32[16,512,4096]{2,1,0} parameter(0) + constant.0 = f32[] constant(0.5) + broadcast.0 = f32[16,512,4096] broadcast(constant.0), dimensions={} + multiply.0 = f32[16,512,4096]{2,1,0} multiply(param0, broadcast.0) + param1 = f32[16,512,4096]{2,1,0} parameter(1) + multiply.1 = f32[16,512,4096]{2,1,0} multiply(multiply.0, param1) + param2 = f32[16,512,1024]{2,1,0} parameter(2) + constant.1 = f32[] constant(1.109) + broadcast.1 = f32[16,512,1024] broadcast(constant.1), dimensions={} + multiply.2 = f32[16,512,1024]{2,1,0} multiply(param2, broadcast.1) + ROOT convolution = f32[4096,1024,1]{1,0,2} convolution(multiply.1, multiply.2), window={size=16}, dim_labels=0fb_0io->bf0 +} +)"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + AlgebraicSimplifierOptions options; + options.set_enable_scalar_multiply_reduction(true); + AlgebraicSimplifier simplifier(options); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + auto root = module->entry_computation()->root_instruction(); + EXPECT_EQ(root->opcode(), HloOpcode::kMultiply); + EXPECT_THAT(root, + GmockMatch(m::MultiplyAnyOrder( + m::Op(), m::Broadcast(m::ConstantScalar(0.5f * 1.109f))))); +} + +TEST_F(AlgebraicSimplifierTest, ScalarMultiplyReductionMultiUser) { + const char* hlo_string = R"( +HloModule ConstScalarMultiply +ENTRY ConstScalarMultiply { + param0 = f32[16,512,1024] parameter(0) + param1 = f32[4096,1024,1] parameter(1) + convolution = f32[16,512,4096] convolution(param0, param1), window={size=1}, dim_labels=0bf_oi0->0bf + constant.1 = f32[] constant(0.5) + broadcast.1 = f32[16,512,4096] broadcast(constant.1), dimensions={} + multiply.1 = f32[16,512,4096] multiply(convolution, broadcast.1) + param2 = f32[16,512,4096] parameter(2) + multiply.2 = f32[16,512,4096] multiply(convolution, param2) + ROOT add.1 = f32[16,512,4096] add(multiply.1, multiply.2) +} +)"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + AlgebraicSimplifierOptions options; + options.set_enable_scalar_multiply_reduction(true); + AlgebraicSimplifier simplifier(options); + ASSERT_FALSE(simplifier.Run(module.get()).ValueOrDie()); +} + INSTANTIATE_TEST_SUITE_P(DotOfConcatSimplificationTestInstantiation, DotOfConcatSimplificationTest, ::testing::ValuesIn(kDotOfConcatTestSpecs)); From f3f617d6723672a231d052effd902c9786f6f122 Mon Sep 17 00:00:00 2001 From: HanBin Yoon Date: Fri, 31 Jul 2020 12:31:37 -0700 Subject: [PATCH 1877/2522] Add compiler pass to remove duplicate 'tf_saved_model.bound_input' bindings. Consolidate identical bound inputs so that resource variables do not alias in modules with tf_saved_model semantics. PiperOrigin-RevId: 324261832 Change-Id: Ia85a159cf7f65bc3e5d2aaf295023ecb09dd2d31 --- tensorflow/compiler/mlir/tensorflow/BUILD | 1 + .../mlir/tensorflow/ir/tf_saved_model.cc | 1 + .../tf_saved_model/hash_table_asset_v1.py | 17 +++-- ...odel_deduplicate_bound_input_bindings.mlir | 33 ++++++++++ .../tensorflow/tests/tf_saved_model_ops.mlir | 13 ++++ .../tests/tf_saved_model_ops_invalid.mlir | 14 ++++ .../deduplicate_bound_input_bindings.cc | 65 +++++++++++++++++++ .../transforms/tf_saved_model_passes.h | 3 + .../mlir/tensorflow/translate/import_model.cc | 4 +- 9 files changed, 145 insertions(+), 6 deletions(-) create mode 100644 tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_deduplicate_bound_input_bindings.mlir create mode 100644 tensorflow/compiler/mlir/tensorflow/transforms/deduplicate_bound_input_bindings.cc diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index 093b1277a61..bbcf08143c6 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -675,6 +675,7 @@ cc_library( cc_library( name = "tf_saved_model_passes", srcs = [ + "transforms/deduplicate_bound_input_bindings.cc", "transforms/freeze_global_tensors.cc", "transforms/lift_variables_pass.cc", "transforms/optimize_global_tensors.cc", diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc index edfc7feefd5..94a792ec3db 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc @@ -337,6 +337,7 @@ LogicalResult VerifyExportedFunc(FuncOp func) { if (auto attr = func.getArgAttrOfType( i, "tf_saved_model.bound_input")) { if (!unique_bound_inputs.insert(attr.getValue()).second) { + if (module.getAttr("tf_saved_model.under_construction")) continue; return func.emitError() << "duplicate 'tf_saved_model.bound_input' binding"; } diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/hash_table_asset_v1.py b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/hash_table_asset_v1.py index 7e86953eb8f..4cb931253b3 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/hash_table_asset_v1.py +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/hash_table_asset_v1.py @@ -27,13 +27,15 @@ import tensorflow.compat.v1 as tf from tensorflow.compiler.mlir.tensorflow.tests.tf_saved_model import common_v1 # CHECK: "tf_saved_model.session_initializer"() {initializer = [[init:@.*]]} : () -> () -# CHECK: "tf_saved_model.asset"() {filename = {{.*}}, sym_name = "[[asset:.*]]"} +# CHECK: "tf_saved_model.asset"() {filename = {{.*}}, sym_name = "[[asset1:__tf_saved_model_asset1_.*]]"} +# CHECK: "tf_saved_model.asset"() {filename = {{.*}}, sym_name = "[[asset0:__tf_saved_model_asset0_.*]]"} # CHECK: func [[init]] -# CHECK-SAME: [[ARG:%.*]]: tensor {tf_saved_model.bound_input = @[[asset]]} +# CHECK-SAME: [[ARG0:%.*]]: tensor {tf_saved_model.bound_input = @[[asset0]]} +# CHECK-SAME: [[ARG1:%.*]]: tensor {tf_saved_model.bound_input = @[[asset1]]} # CHECK-NEXT: [[R0:%.*]] = "tf.HashTableV2"() # CHECK-SAME: shared_name = "[[hash_table:.*]]" -# CHECK-NEXT: "tf.InitializeTableFromTextFileV2"([[R0]], [[ARG]]) +# CHECK-NEXT: "tf.InitializeTableFromTextFileV2"([[R0]], [[ARG0]]) def write_vocabulary_file(vocabulary): @@ -48,11 +50,16 @@ def write_vocabulary_file(vocabulary): def test(): + vocabulary_file = write_vocabulary_file(['cat', 'is', 'on', 'the', 'mat']) table_initializer = tf.lookup.TextFileInitializer( - write_vocabulary_file(['cat', 'is', 'on', 'the', 'mat']), tf.string, - tf.lookup.TextFileIndex.WHOLE_LINE, tf.int64, + vocabulary_file, tf.string, tf.lookup.TextFileIndex.WHOLE_LINE, tf.int64, tf.lookup.TextFileIndex.LINE_NUMBER) + # Incur another bound_input on the asset, but with a different sym_name, i.e., + # __tf_saved_model_asset1_tokens.txt vs. __tf_saved_model_asset0_tokens.txt. table = tf.lookup.StaticVocabularyTable(table_initializer, num_oov_buckets=10) + vocab_file_tensor = tf.convert_to_tensor(vocabulary_file, tf.string, + name='asset_filepath') + tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, vocab_file_tensor) x = tf.placeholder(tf.string, shape=(), name='input') r = table.lookup(x) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_deduplicate_bound_input_bindings.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_deduplicate_bound_input_bindings.mlir new file mode 100644 index 00000000000..22fd3d86068 --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_deduplicate_bound_input_bindings.mlir @@ -0,0 +1,33 @@ +// RUN: tf-opt %s -split-input-file -verify-diagnostics -tf-saved-model-dedup-bound-input-binding-pass | FileCheck %s + +module attributes {tf_saved_model.semantics, tf_saved_model.under_construction} { + // Test case: Remove duplicate bound_input symbols. + "tf_saved_model.global_tensor"() { is_mutable, sym_name = "v", type = tensor, value = dense<42.0> : tensor } : () -> () + "tf_saved_model.global_tensor"() { is_mutable, sym_name = "w", type = tensor, value = dense<43.0> : tensor } : () -> () + "tf_saved_model.global_tensor"() { is_mutable, sym_name = "x", type = tensor, value = dense<44.0> : tensor } : () -> () + // CHECK: func @f + // CHECK: %arg0: tensor>> {tf_saved_model.bound_input = @v} + // CHECK: %arg1: tensor>> {tf_saved_model.bound_input = @w} + // CHECK: %arg2: tensor>> {tf_saved_model.bound_input = @x} + // CHECK-NOT: %arg3 + // CHECK-NOT: %arg4 + func @f( + %arg0: tensor>> {tf_saved_model.bound_input = @v}, + %arg1: tensor>> {tf_saved_model.bound_input = @w}, + %arg2: tensor>> {tf_saved_model.bound_input = @v}, + %arg3: tensor>> {tf_saved_model.bound_input = @x}, + %arg4: tensor>> {tf_saved_model.bound_input = @v} + ) attributes {tf_saved_model.exported_names = ["f"]} { + // CHECK: "tf.ReadVariableOp"(%arg0) : (tensor>>) -> tensor + // CHECK: "tf.ReadVariableOp"(%arg1) : (tensor>>) -> tensor + // CHECK: "tf.ReadVariableOp"(%arg0) : (tensor>>) -> tensor + // CHECK: "tf.ReadVariableOp"(%arg2) : (tensor>>) -> tensor + // CHECK: "tf.ReadVariableOp"(%arg0) : (tensor>>) -> tensor + %val0 = "tf.ReadVariableOp"(%arg0) : (tensor>>) -> tensor + %val1 = "tf.ReadVariableOp"(%arg1) : (tensor>>) -> tensor + %val2 = "tf.ReadVariableOp"(%arg2) : (tensor>>) -> tensor + %val3 = "tf.ReadVariableOp"(%arg3) : (tensor>>) -> tensor + %val4 = "tf.ReadVariableOp"(%arg4) : (tensor>>) -> tensor + return + } +} diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops.mlir index 7156a1fab63..d2c5509b52d 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops.mlir @@ -76,3 +76,16 @@ module attributes {tf_saved_model.semantics, tf_saved_model.under_construction} } } + +// ----- + +module attributes {tf_saved_model.semantics, tf_saved_model.under_construction} { + "tf_saved_model.global_tensor"() { is_mutable, sym_name = "v", type = tensor, value = dense<42.0> : tensor } : () -> () + // CHECK: func @f + func @f( + %arg0: tensor>> {tf_saved_model.bound_input = @v}, + %arg1: tensor>> {tf_saved_model.bound_input = @v} + ) attributes {tf_saved_model.exported_names = ["f"]} { + return + } +} diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops_invalid.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops_invalid.mlir index dcb889ff99e..714c8908825 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops_invalid.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops_invalid.mlir @@ -400,3 +400,17 @@ module attributes {tf_saved_model.semantics} { } } + +// ----- + +module attributes {tf_saved_model.semantics} { + + "tf_saved_model.global_tensor"() { is_mutable, sym_name = "v", type = tensor, value = dense<42.0> : tensor } : () -> () + // expected-error@+1 {{duplicate 'tf_saved_model.bound_input' binding}} + func @f( + %arg0: tensor>> {tf_saved_model.bound_input = @v}, + %arg1: tensor>> {tf_saved_model.bound_input = @v} + ) attributes {tf_saved_model.exported_names = ["f"]} { + return + } +} diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/deduplicate_bound_input_bindings.cc b/tensorflow/compiler/mlir/tensorflow/transforms/deduplicate_bound_input_bindings.cc new file mode 100644 index 00000000000..c1514dfa357 --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/transforms/deduplicate_bound_input_bindings.cc @@ -0,0 +1,65 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "llvm/ADT/DenseMap.h" +#include "mlir/IR/Function.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h" + +namespace mlir { +namespace tf_saved_model { +namespace { + +class DedupBoundInputBindingPass + : public PassWrapper { + public: + void runOnFunction() override; +}; + +void DedupBoundInputBindingPass::runOnFunction() { + FuncOp func = getFunction(); + if (!mlir::tf_saved_model::IsExported(func)) return; + llvm::SmallDenseMap unique_bound_inputs; + llvm::SmallVector arg_indices_to_erase; + for (unsigned i = 0, e = func.getNumArguments(); i < e; i++) { + auto attr = func.getArgAttrOfType( + i, "tf_saved_model.bound_input"); + if (!attr) continue; + auto inserted = unique_bound_inputs.insert(std::make_pair(attr, i)); + if (inserted.second) continue; + auto duplicate_arg = func.getArgument(i); + auto original_arg = func.getArgument(unique_bound_inputs[attr]); + duplicate_arg.replaceAllUsesWith(original_arg); + arg_indices_to_erase.push_back(i); + } + func.eraseArguments(arg_indices_to_erase); +} + +} // namespace + +static PassRegistration pass( + "tf-saved-model-dedup-bound-input-binding-pass", + "Remove duplicate 'tf_saved_model.bound_input' bindings."); + +std::unique_ptr> CreateDedupBoundInputBindingPass() { + return std::make_unique(); +} + +} // namespace tf_saved_model +} // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tf_saved_model_passes.h b/tensorflow/compiler/mlir/tensorflow/transforms/tf_saved_model_passes.h index f7a73dc1561..59532a2b123 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tf_saved_model_passes.h +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tf_saved_model_passes.h @@ -46,6 +46,9 @@ CreateRemoveVariablesInSessionInitializerPass(); std::unique_ptr> CreateLiftVariablesPass( ::tensorflow::Session* session); +// Creates a pass that removes duplicate 'tf_saved_model.bound_input' bindings. +std::unique_ptr> CreateDedupBoundInputBindingPass(); + } // namespace tf_saved_model } // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc index 2c44aaa5c42..27385e81262 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc @@ -3368,12 +3368,13 @@ SavedModelSignatureDefImporter::ConvertAssets() { results.reserve(asset_file_defs.size()); mlir::OpBuilder builder(module_->getBodyRegion()); + unsigned i = 0; // Use to generate unique sym_name(s) for duplicate assets. for (const auto& asset : asset_file_defs) { auto asset_op = builder.create( module_->getLoc(), /*sym_name=*/ builder.getStringAttr( - absl::StrCat("__tf_saved_model_asset_", asset.filename())), + absl::StrCat("__tf_saved_model_asset", i++, "_", asset.filename())), /*filename=*/ builder.getStringAttr( io::JoinPath(kSavedModelAssetsDirectory, asset.filename()))); @@ -3569,6 +3570,7 @@ Status SavedModelSignatureDefImporter::LiftVariables() { pm.addPass(mlir::TF::CreatePromoteVarHandlesToArgsPass()); pm.addPass( mlir::tf_saved_model::CreateLiftVariablesPass(bundle_.GetSession())); + pm.addPass(mlir::tf_saved_model::CreateDedupBoundInputBindingPass()); if (mlir::failed(pm.run(*module_))) return diag_handler.Combine(errors::Internal("Failed to lift variables.")); From b1da7fd091c96e81c0a0345f56023ad58ffb3d53 Mon Sep 17 00:00:00 2001 From: Haoyu Zhang Date: Fri, 31 Jul 2020 12:36:38 -0700 Subject: [PATCH 1878/2522] Fix order-dependent test cases in c_api_distributed_test. PiperOrigin-RevId: 324262896 Change-Id: Iafc667f18762227ee38ff7ca202a8b05e03930b4 --- tensorflow/c/eager/BUILD | 1 - tensorflow/c/eager/c_api_distributed_test.cc | 141 +++++++++++-------- tensorflow/c/eager/c_api_test_util.cc | 2 +- 3 files changed, 81 insertions(+), 63 deletions(-) diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index 2fc88f4a287..61701bc8b21 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -550,7 +550,6 @@ tf_cuda_cc_test( args = ["--heap_check=local"], extra_copts = tfe_xla_copts(), tags = [ - "no_oss", # b/162361408 "no_windows", "noasan", # leaks gRPC server instances ], diff --git a/tensorflow/c/eager/c_api_distributed_test.cc b/tensorflow/c/eager/c_api_distributed_test.cc index 414adceaeb3..3738768cf02 100644 --- a/tensorflow/c/eager/c_api_distributed_test.cc +++ b/tensorflow/c/eager/c_api_distributed_test.cc @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include // NOLINT + #include "tensorflow/c/eager/c_api.h" #include "tensorflow/c/eager/c_api_experimental.h" #include "tensorflow/c/eager/c_api_internal.h" @@ -262,61 +264,64 @@ TEST(CAPI, TestRemoteFunctionWithPackedInput) { TestFunctionWithPackedInput(/*remote=*/true); } +string VariableAddFunctionSignature() { + return " signature {" + " name: 'VariableAddFunction'" + " input_arg {" + " name: 'var0'" + " type: DT_RESOURCE" + " }" + " output_arg {" + " name: 'var0_value'" + " type: DT_FLOAT" + " }" + " }" + " node_def {" + " name: 'read0'" + " op: 'ReadVariableOp'" + " input: 'var0'" + " attr {" + " key: 'dtype'" + " value {" + " type: DT_FLOAT" + " }" + " }" + " }" + " node_def {" + " name: 'add'" + " op: 'Add'" + " input: 'read0:value:0'" + " input: 'read0:value:0'" + " device: '/job:localhost/task:1/device:CPU:0'" + " attr {" + " key: 'T'" + " value {" + " type: DT_FLOAT" + " }" + " }" + " }" + " node_def {" + " name: 'identity'" + " op: 'Identity'" + " input: 'add:z:0'" + " device: '/job:localhost/task:0/device:CPU:0'" + " attr {" + " key: 'T'" + " value {" + " type: DT_FLOAT" + " }" + " }" + " }" + " ret {" + " key: 'var0_value'" + " value: 'identity:output:0'" + " }"; +} + string VariableAddFunction() { tensorflow::FunctionDef def; CHECK(tensorflow::protobuf::TextFormat::ParseFromString( - " signature {" - " name: 'VariableAddFunction'" - " input_arg {" - " name: 'var0'" - " type: DT_RESOURCE" - " }" - " output_arg {" - " name: 'var0_value'" - " type: DT_FLOAT" - " }" - " }" - " node_def {" - " name: 'read0'" - " op: 'ReadVariableOp'" - " input: 'var0'" - " attr {" - " key: 'dtype'" - " value {" - " type: DT_FLOAT" - " }" - " }" - " }" - " node_def {" - " name: 'add'" - " op: 'Add'" - " input: 'read0:value:0'" - " input: 'read0:value:0'" - " device: '/job:localhost/task:1/device:CPU:0'" - " attr {" - " key: 'T'" - " value {" - " type: DT_FLOAT" - " }" - " }" - " }" - " node_def {" - " name: 'identity'" - " op: 'Identity'" - " input: 'add:z:0'" - " device: '/job:localhost/task:0/device:CPU:0'" - " attr {" - " key: 'T'" - " value {" - " type: DT_FLOAT" - " }" - " }" - " }" - " ret {" - " key: 'var0_value'" - " value: 'identity:output:0'" - " }", - &def)); + VariableAddFunctionSignature(), &def)); return def.SerializeAsString(); } @@ -428,6 +433,17 @@ TEST(CAPI, DistributedFunctionGraphPassOnlyOnce) { GraphErrorInjectionPass::enabled_ = false; } +string VariableAddFunctionWithGraphError() { + string signature = VariableAddFunctionSignature(); + // Replace the node 'read0' with 'read0_maybe_with_graph_error', so that the + // error injecting pass can identify and introduce graph pass errors. + signature = std::regex_replace(signature, std::regex("read0"), + "read0_maybe_with_graph_error"); + tensorflow::FunctionDef def; + CHECK(tensorflow::protobuf::TextFormat::ParseFromString(signature, &def)); + return def.SerializeAsString(); +} + class FunctionErrorInjectionPass : public tensorflow::FunctionOptimizationPass { public: FunctionErrorInjectionPass(string error_node, string error_device) @@ -474,16 +490,19 @@ void TestDistributedFunctionCancellation(bool inject_error) { const char dev2_name[] = "/job:localhost/replica:0/task:2/device:CPU:0"; if (inject_error) { - // Inject a function optimization pass failure when it sees the 'read0' op - // having a requested device `dev2_name`. During execution: - // * task:0 processes the main function `VariableAddFunction` and places - // the read0 op on task:2 - // * task:0 partitions the main function with a subgraph containing read0 - // sent to task:2 - // * task:2 graph pass reports an error when it sees read0 with dev2_name + // Inject a function optimization pass failure when it sees the + // 'read0_maybe_with_graph_error' op having a requested device `dev2_name`. + // During execution: + // * task:0 processes main function `VariableAddFunctionWithGraphError` + // and places the 'read0_maybe_with_graph_error' op on task:2 + // * task:0 partitions the main function with a subgraph containing + // 'read0_maybe_with_graph_error' sent to task:2 + // * task:2 graph pass reports an error when it sees + // 'read0_maybe_with_graph_error' with dev2_name tensorflow::function_optimization_registration:: FunctionOptimizationPassRegistration register_test_pass( - std::make_unique("read0", dev2_name)); + std::make_unique( + "read0_maybe_with_graph_error", dev2_name)); } TF_Status* status = TF_NewStatus(); @@ -499,7 +518,7 @@ void TestDistributedFunctionCancellation(bool inject_error) { TFE_TensorHandle* var_handle = TestVariable(ctx, 2.0, dev2_name); EXPECT_NE(var_handle, nullptr); - const string function_def = VariableAddFunction(); + const string function_def = VariableAddFunctionWithGraphError(); TFE_ContextAddFunctionDef(ctx, function_def.data(), function_def.size(), status); ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); diff --git a/tensorflow/c/eager/c_api_test_util.cc b/tensorflow/c/eager/c_api_test_util.cc index b18a36c12e8..192f10533a6 100644 --- a/tensorflow/c/eager/c_api_test_util.cc +++ b/tensorflow/c/eager/c_api_test_util.cc @@ -157,7 +157,7 @@ TFE_TensorHandle* TestVariable(TFE_Context* ctx, float value, if (TF_GetCode(status) != TF_OK) return nullptr; TFE_OpSetAttrType(op, "dtype", TF_FLOAT); TFE_OpSetAttrShape(op, "shape", {}, 0, status); - TFE_OpSetAttrString(op, "container", "", 0); + TFE_OpSetAttrString(op, "container", "localhost", 0); TFE_OpSetAttrString(op, "shared_name", "", 0); if (!device_name.empty()) { TFE_OpSetDevice(op, device_name.c_str(), status); From 56aa1b17ed7f5315850574d81082a91d04547efc Mon Sep 17 00:00:00 2001 From: Amy Skerry-Ryan Date: Fri, 31 Jul 2020 12:41:43 -0700 Subject: [PATCH 1879/2522] Change not exposed in public API PiperOrigin-RevId: 324263999 Change-Id: I7f776a5ba2e735f0bf6695dc0ad79801ee15fca7 --- tensorflow/python/tpu/feature_column.py | 30 +++++-- tensorflow/python/tpu/feature_column_v2.py | 29 +++++-- .../python/tpu/feature_column_v2_test.py | 86 +++++++++++++++++++ 3 files changed, 128 insertions(+), 17 deletions(-) diff --git a/tensorflow/python/tpu/feature_column.py b/tensorflow/python/tpu/feature_column.py index 924acc0ee0d..3a481f6ff84 100644 --- a/tensorflow/python/tpu/feature_column.py +++ b/tensorflow/python/tpu/feature_column.py @@ -372,10 +372,12 @@ class _TPUEmbeddingColumn(_TPUBaseEmbeddingColumn, fc._EmbeddingColumn): trainable=True, max_sequence_length=0, learning_rate_fn=None, - use_safe_embedding_lookup=True): + use_safe_embedding_lookup=True, + bypass_scope_validation=False): # Note, args ckpt_to_load_from, tensor_name_in_ckpt, max_norm and trainable # are not supported on TPU. They are solely for matching the signature of # __new__ of parent class fc._EmbeddingColumn. + del bypass_scope_validation return fc._EmbeddingColumn.__new__( cls, categorical_column, @@ -399,13 +401,18 @@ class _TPUEmbeddingColumn(_TPUBaseEmbeddingColumn, fc._EmbeddingColumn): trainable=True, max_sequence_length=0, learning_rate_fn=None, - use_safe_embedding_lookup=True): + use_safe_embedding_lookup=True, + bypass_scope_validation=False): _TPUBaseEmbeddingColumn.__init__( self, categorical_column, max_sequence_length=max_sequence_length, learning_rate_fn=learning_rate_fn) self._key = None + # If true, scope validation is skipped to allow the same column to be used + # in multiple variable scopes. By default, this is False, and we expect a + # 1:1 mapping between feature columns and scopes. + self._bypass_scope_validation = bypass_scope_validation def get_combiner(self): return self.combiner @@ -459,8 +466,10 @@ class _TPUEmbeddingColumn(_TPUBaseEmbeddingColumn, fc._EmbeddingColumn): tensor = inputs.get(self.get_feature_key_name()) # Add to collection for _create_tpu_embedding_variables_and_ops - _record_variable_scope_and_name(self.get_embedding_var_name(), - 'embedding_weights') + _record_variable_scope_and_name( + self.get_embedding_var_name(), + 'embedding_weights', + bypass_scope_validation=self._bypass_scope_validation) return tensor @@ -484,8 +493,10 @@ class _TPUEmbeddingColumn(_TPUBaseEmbeddingColumn, fc._EmbeddingColumn): tensor_lengths = array_ops.squeeze(tensor_lengths, -1) # Add to collection for _create_tpu_embedding_variables_and_ops - _record_variable_scope_and_name(self.get_embedding_var_name(), - 'embedding_weights') + _record_variable_scope_and_name( + self.get_embedding_var_name(), + 'embedding_weights', + bypass_scope_validation=self._bypass_scope_validation) return fc._SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=tensor, sequence_length=tensor_lengths) @@ -627,7 +638,8 @@ class _TPUSharedEmbeddingColumn(_TPUBaseEmbeddingColumn, def _record_variable_scope_and_name(embedding_var_name, embedding_var_name_in_fc, - is_shared_embedding=False): + is_shared_embedding=False, + bypass_scope_validation=False): """Add embedding variable name and scope to collection.""" g = ops.get_default_graph() collection = g.get_collection_ref(_TPU_FC_TO_SCOPE) @@ -640,8 +652,8 @@ def _record_variable_scope_and_name(embedding_var_name, captured_scope_name = captured_scope.name if embedding_var_name in var_def_dict: - if (var_def_dict[embedding_var_name][0] != captured_scope_name - and not is_shared_embedding): + if (var_def_dict[embedding_var_name][0] != captured_scope_name and + not is_shared_embedding and not bypass_scope_validation): raise ValueError( 'For embedding var name {}, the variable scope name is different, ' 'got {}; expected {}'.format(embedding_var_name, diff --git a/tensorflow/python/tpu/feature_column_v2.py b/tensorflow/python/tpu/feature_column_v2.py index 1012506c48b..32472053791 100644 --- a/tensorflow/python/tpu/feature_column_v2.py +++ b/tensorflow/python/tpu/feature_column_v2.py @@ -427,7 +427,9 @@ class _TPUEmbeddingColumnV2(_TPUBaseEmbeddingColumn, fc_lib.EmbeddingColumn): initializer=None, max_sequence_length=0, learning_rate_fn=None, - use_safe_embedding_lookup=True): + use_safe_embedding_lookup=True, + bypass_scope_validation=False): + del bypass_scope_validation return fc_lib.EmbeddingColumn.__new__( cls, categorical_column, @@ -455,13 +457,18 @@ class _TPUEmbeddingColumnV2(_TPUBaseEmbeddingColumn, fc_lib.EmbeddingColumn): initializer=None, max_sequence_length=0, learning_rate_fn=None, - use_safe_embedding_lookup=True): + use_safe_embedding_lookup=True, + bypass_scope_validation=False): _TPUBaseEmbeddingColumn.__init__( self, categorical_column, max_sequence_length=max_sequence_length, learning_rate_fn=learning_rate_fn) self._key = None + # If true, scope validation is skipped to allow the same column to be used + # in multiple variable scopes. By default, this is False, and we expect a + # 1:1 mapping between feature columns and scopes. + self._bypass_scope_validation = bypass_scope_validation def get_combiner(self): return self.combiner @@ -515,8 +522,10 @@ class _TPUEmbeddingColumnV2(_TPUBaseEmbeddingColumn, fc_lib.EmbeddingColumn): tensor = inputs.get(self.get_feature_key_name()) # Add to collection for _create_tpu_embedding_variables_and_ops - _record_variable_scope_and_name(self.get_embedding_var_name(), - 'embedding_weights') + _record_variable_scope_and_name( + self.get_embedding_var_name(), + 'embedding_weights', + bypass_scope_validation=self._bypass_scope_validation) return tensor @@ -528,8 +537,10 @@ class _TPUEmbeddingColumnV2(_TPUBaseEmbeddingColumn, fc_lib.EmbeddingColumn): # Create state is called for the EmbeddingColumn to create its embedding # variables under feature column V2, if we are on TPU so record the scope # here. - _record_variable_scope_and_name(self.get_embedding_var_name(), - 'embedding_weights') + _record_variable_scope_and_name( + self.get_embedding_var_name(), + 'embedding_weights', + bypass_scope_validation=self._bypass_scope_validation) def get_dense_tensor(self, transformation_cache, state_manager): if tpu.under_tpu_inference_context(): @@ -569,8 +580,10 @@ class _TPUEmbeddingColumnV2(_TPUBaseEmbeddingColumn, fc_lib.EmbeddingColumn): tensor_lengths = array_ops.squeeze(tensor_lengths, -1) # Add to collection for _create_tpu_embedding_variables_and_ops - _record_variable_scope_and_name(self.get_embedding_var_name(), - 'embedding_weights') + _record_variable_scope_and_name( + self.get_embedding_var_name(), + 'embedding_weights', + bypass_scope_validation=self._bypass_scope_validation) return fc_lib.SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=tensor, sequence_length=tensor_lengths) diff --git a/tensorflow/python/tpu/feature_column_v2_test.py b/tensorflow/python/tpu/feature_column_v2_test.py index c1a34fad107..93f65d6e1c4 100644 --- a/tensorflow/python/tpu/feature_column_v2_test.py +++ b/tensorflow/python/tpu/feature_column_v2_test.py @@ -28,8 +28,10 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util +from tensorflow.python.ops import init_ops from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import parsing_ops +from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib from tensorflow.python.platform import test from tensorflow.python.tpu import feature_column_v2 as tpu_fc @@ -44,6 +46,40 @@ def _initialized_session(): return sess +class _TestStateManager(fc_lib.StateManager): + + def __init__(self, trainable=True): + self._all_variables = {} + self._trainable = trainable + + def create_variable(self, + feature_column, + name, + shape, + dtype=None, + trainable=True, + use_resource=True, + initializer=None): + if feature_column not in self._all_variables: + self._all_variables[feature_column] = {} + var_dict = self._all_variables[feature_column] + if name in var_dict: + return var_dict[name] + else: + var = variable_scope.get_variable( + name=name, + shape=shape, + dtype=dtype, + trainable=self._trainable and trainable, + use_resource=use_resource, + initializer=initializer) + var_dict[name] = var + return var + + def get_variable(self, feature_column, name): + return self._all_variables[feature_column][name] + + class EmbeddingColumnTestV2(test.TestCase, parameterized.TestCase): def test_defaults(self): @@ -193,6 +229,56 @@ class EmbeddingColumnTestV2(test.TestCase, parameterized.TestCase): self.assertEqual(embedding_column._max_sequence_length, embedding_column_copy._max_sequence_length) + def test_with_scope_validation(self): + categorical_column = fc_lib.categorical_column_with_identity( + key='aaa', num_buckets=3) + embedding_dimension = 2 + initializer = init_ops.truncated_normal_initializer(mean=0.0, stddev=.5) + embedding_column = tpu_fc._TPUEmbeddingColumnV2( + categorical_column=categorical_column, + dimension=embedding_dimension, + combiner='mean', + initializer=initializer, + max_sequence_length=0, + learning_rate_fn=None, + use_safe_embedding_lookup=True, + bypass_scope_validation=False) + self.assertIs(categorical_column, embedding_column.categorical_column) + self.assertEqual(embedding_dimension, embedding_column.dimension) + state_manager = _TestStateManager() + with tpu_function.tpu_shard_context(1): + with variable_scope.variable_scope('tower1/scope1'): + embedding_column.create_state(state_manager) + with variable_scope.variable_scope('tower2/scope2'): + # With default scope validation, the same column cannot be used in a new + # variable scope. + with self.assertRaisesRegex(ValueError, + 'the variable scope name is different'): + embedding_column.create_state(state_manager) + + def test_bypass_scope_validation(self): + categorical_column = fc_lib.categorical_column_with_identity( + key='aaa', num_buckets=3) + embedding_dimension = 2 + initializer = init_ops.truncated_normal_initializer(mean=0.0, stddev=.5) + embedding_column = tpu_fc._TPUEmbeddingColumnV2( + categorical_column=categorical_column, + dimension=embedding_dimension, + combiner='mean', + initializer=initializer, + max_sequence_length=0, + learning_rate_fn=None, + use_safe_embedding_lookup=True, + bypass_scope_validation=True) + self.assertIs(categorical_column, embedding_column.categorical_column) + self.assertEqual(embedding_dimension, embedding_column.dimension) + state_manager = _TestStateManager() + with tpu_function.tpu_shard_context(1): + with variable_scope.variable_scope('tower1/scope1'): + embedding_column.create_state(state_manager) + with variable_scope.variable_scope('tower2/scope2'): + embedding_column.create_state(state_manager) + class SharedEmbeddingColumnTestV2(test.TestCase, parameterized.TestCase): From a5795009f8214a39d42f2e2a9bef414f991cafd3 Mon Sep 17 00:00:00 2001 From: Xiao Yu Date: Fri, 31 Jul 2020 12:56:34 -0700 Subject: [PATCH 1880/2522] Introduce Eager placer for TFRT for op handler placement. This placer reuse placement logic borrowed from TF eager runtime. We need to build NodeDef in order to reuse placement logic from existing runtime. As a result, we update SetAttrs.* methods to set fallback attribute for both fallback ops and native ops. Verify with a few existing benchmark (Resnet50 GPU, Kumamon CPU, tf.add micro benchmark on cpu) that it does not cause noticeable overheads if user specify the device explicitly. If the user does not specify device, it will cause ~1us overhead to build node_def. PiperOrigin-RevId: 324267375 Change-Id: Iaf7e29495e469de37666bb2df646d1ee44aba13c --- .../c/eager/immediate_execution_operation.h | 4 ++++ tensorflow/core/common_runtime/eager/core.cc | 8 ++------ .../common_runtime/eager/eager_operation.cc | 8 ++++++++ .../core/common_runtime/eager/eager_operation.h | 1 + .../common_runtime/eager/placement_utils.cc | 17 +++++++---------- .../core/common_runtime/eager/placement_utils.h | 7 ++++--- 6 files changed, 26 insertions(+), 19 deletions(-) diff --git a/tensorflow/c/eager/immediate_execution_operation.h b/tensorflow/c/eager/immediate_execution_operation.h index f599da6dadc..ee212b21a96 100644 --- a/tensorflow/c/eager/immediate_execution_operation.h +++ b/tensorflow/c/eager/immediate_execution_operation.h @@ -38,6 +38,10 @@ class ImmediateExecutionOperation : public AbstractOperation { public: virtual void Clear() = 0; + // Returns the inputs of this op. + virtual absl::Span GetInputs() + const = 0; + virtual const tensorflow::OpDef* OpDef() const = 0; virtual Status InputLength(const char* input_name, int* length) = 0; diff --git a/tensorflow/core/common_runtime/eager/core.cc b/tensorflow/core/common_runtime/eager/core.cc index c0fe1b4fe42..43daf37f6b2 100644 --- a/tensorflow/core/common_runtime/eager/core.cc +++ b/tensorflow/core/common_runtime/eager/core.cc @@ -197,14 +197,10 @@ Status EagerOperation::Execute(absl::Span retvals, if (device == kVariantDeviceNull) { TF_RETURN_IF_ERROR(eager::MaybePinToResourceDevice(&device, *this)); } - if (device == kVariantDeviceNull) { + if (device == kVariantDeviceNull && ctx_.PinSmallOpsToCPU()) { bool pin_to_cpu; TF_RETURN_IF_ERROR(eager::MaybePinSmallOpsToCpu( - &pin_to_cpu, Name(), - absl::MakeSpan( - reinterpret_cast(inputs_.data()), - inputs_.size()), - ctx_)); + &pin_to_cpu, Name(), GetInputs(), ctx_.HostCPU()->name())); if (pin_to_cpu) { device = ctx_.HostCPU(); } diff --git a/tensorflow/core/common_runtime/eager/eager_operation.cc b/tensorflow/core/common_runtime/eager/eager_operation.cc index df3b3727b60..947b67a4dab 100644 --- a/tensorflow/core/common_runtime/eager/eager_operation.cc +++ b/tensorflow/core/common_runtime/eager/eager_operation.cc @@ -235,6 +235,14 @@ Status EagerOperation::InputLength(const char* input_name, int* length) { return Status::OK(); } +absl::Span EagerOperation::GetInputs() + const { + // TODO(b/162536003): Remove reinterpret_cast. + return absl::MakeSpan( + reinterpret_cast(inputs_.data()), + inputs_.size()); +} + Status EagerOperation::OutputLength(const char* output_name, int* length) { Status status; const tensorflow::OpDef* op_def = GetOpDef(&status); diff --git a/tensorflow/core/common_runtime/eager/eager_operation.h b/tensorflow/core/common_runtime/eager/eager_operation.h index 9fc35a18a7f..327411e19c9 100644 --- a/tensorflow/core/common_runtime/eager/eager_operation.h +++ b/tensorflow/core/common_runtime/eager/eager_operation.h @@ -82,6 +82,7 @@ class EagerOperation : public ImmediateExecutionOperation { Status AddInput(AbstractTensorHandle* input) override; Status AddInputList(absl::Span inputs) override; + absl::Span GetInputs() const override; Status Execute(absl::Span retvals, int* num_retvals) override; const tensorflow::OpDef* OpDef() const override { return op_def_; }; diff --git a/tensorflow/core/common_runtime/eager/placement_utils.cc b/tensorflow/core/common_runtime/eager/placement_utils.cc index dd99c0fca83..148c6c6ce03 100644 --- a/tensorflow/core/common_runtime/eager/placement_utils.cc +++ b/tensorflow/core/common_runtime/eager/placement_utils.cc @@ -81,11 +81,12 @@ bool IsCustomDevice(StringPiece device_name, const EagerContext& ctx) { return ctx.FindCustomDeviceFromName(string(device_name), &custom_device); } -Status MaybePinSmallOpsToCpu(bool* result, StringPiece op_name, - absl::Span args, - const EagerContext& ctx) { - if (!ctx.PinSmallOpsToCPU() || IsFunction(op_name) || - IsColocationExempt(op_name) || !IsPinnableOp(op_name)) { +Status MaybePinSmallOpsToCpu( + bool* result, StringPiece op_name, + absl::Span args, + StringPiece cpu_device_name) { + if (IsFunction(op_name) || IsColocationExempt(op_name) || + !IsPinnableOp(op_name)) { *result = false; return Status::OK(); } @@ -104,16 +105,12 @@ Status MaybePinSmallOpsToCpu(bool* result, StringPiece op_name, const char* device_name = arg->DeviceName(&s); DataType dtype = arg->DataType(); TF_RETURN_IF_ERROR(s); - if (IsCustomDevice(device_name, ctx)) { - *result = false; - return Status::OK(); - } DVLOG(2) << "for op " << op_name << " input " << i << " " << DataTypeString(dtype) << " input device = " << device_name; // Input is on CPU. - if (device_name != ctx.HostCPU()->name()) { + if (device_name != cpu_device_name) { *result = false; return Status::OK(); } diff --git a/tensorflow/core/common_runtime/eager/placement_utils.h b/tensorflow/core/common_runtime/eager/placement_utils.h index d58bd304b27..b051e13ea08 100644 --- a/tensorflow/core/common_runtime/eager/placement_utils.h +++ b/tensorflow/core/common_runtime/eager/placement_utils.h @@ -35,9 +35,10 @@ bool IsCustomDevice(StringPiece device_name, const EagerContext& ctx); // Pin the op to cpu if all op inputs are on the CPU, small (<64 elements) and // integers (int32/int64). This can be disabled by setting the environment // variable "TF_EAGER_ENABLE_SMALL_TENSOR_CPU_PINNING" to "0" or "false". -Status MaybePinSmallOpsToCpu(bool* result, StringPiece op_name, - absl::Span args, - const EagerContext& ctx); +Status MaybePinSmallOpsToCpu( + bool* result, StringPiece op_name, + absl::Span args, + StringPiece cpu_device_name); // If a resource touching input is specified, all resource-touching ops run in // the device the resource is, regardless of anything else that has been From 8eccff2ea17c37a4cbc689a2c15d9923dc91d2ff Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Fri, 31 Jul 2020 12:58:40 -0700 Subject: [PATCH 1881/2522] Provide definition and use SIZE_MAX for BFCAllocator::kInvalidChunkHandle PiperOrigin-RevId: 324267861 Change-Id: I070baabf95939c60636d365d16ccc826175a76f5 --- tensorflow/core/common_runtime/bfc_allocator.cc | 2 ++ tensorflow/core/common_runtime/bfc_allocator.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc index 6f75d944a12..440ed235455 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.cc +++ b/tensorflow/core/common_runtime/bfc_allocator.cc @@ -36,6 +36,8 @@ limitations under the License. namespace tensorflow { +constexpr BFCAllocator::ChunkHandle BFCAllocator::kInvalidChunkHandle; + BFCAllocator::BFCAllocator(SubAllocator* sub_allocator, size_t total_memory, bool allow_growth, const string& name, bool garbage_collection) diff --git a/tensorflow/core/common_runtime/bfc_allocator.h b/tensorflow/core/common_runtime/bfc_allocator.h index cfe54c23abe..f79a6048bbb 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.h +++ b/tensorflow/core/common_runtime/bfc_allocator.h @@ -133,7 +133,7 @@ class BFCAllocator : public Allocator { // A ChunkHandle is an index into the chunks_ vector in BFCAllocator // kInvalidChunkHandle means an invalid chunk typedef size_t ChunkHandle; - static constexpr ChunkHandle kInvalidChunkHandle = -1; + static constexpr ChunkHandle kInvalidChunkHandle = SIZE_MAX; typedef int BinNum; static constexpr int kInvalidBinNum = -1; From a164043513f31a69420e3d2720ab1673a3dc49e3 Mon Sep 17 00:00:00 2001 From: Lucy Fox Date: Fri, 31 Jul 2020 13:01:04 -0700 Subject: [PATCH 1882/2522] Move IREE GuaranteeAllFuncsOneUse pass into TF. This pass is useful for the TF to XLA bridge as well, so moving it into shared TF transforms. PiperOrigin-RevId: 324268498 Change-Id: I071b326bc3c8e5d9ea72a425b5828d7d7598d9ac --- tensorflow/compiler/mlir/tensorflow/BUILD | 1 + .../tests/guarantee-all-funcs-one-use.mlir | 54 ++++++++ .../transforms/guarantee_all_funcs_one_use.cc | 120 ++++++++++++++++++ .../mlir/tensorflow/transforms/passes.h | 3 + 4 files changed, 178 insertions(+) create mode 100644 tensorflow/compiler/mlir/tensorflow/tests/guarantee-all-funcs-one-use.mlir create mode 100644 tensorflow/compiler/mlir/tensorflow/transforms/guarantee_all_funcs_one_use.cc diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index bbcf08143c6..ef95b830206 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -752,6 +752,7 @@ cc_library( "transforms/generated_optimize.inc", "transforms/gpu_fusion.cc", "transforms/graph_pruning.cc", + "transforms/guarantee_all_funcs_one_use.cc", "transforms/init_text_file_to_import.cc", "transforms/launch_to_device_attribute.cc", "transforms/layout_optimization.cc", diff --git a/tensorflow/compiler/mlir/tensorflow/tests/guarantee-all-funcs-one-use.mlir b/tensorflow/compiler/mlir/tensorflow/tests/guarantee-all-funcs-one-use.mlir new file mode 100644 index 00000000000..d8903846158 --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/tests/guarantee-all-funcs-one-use.mlir @@ -0,0 +1,54 @@ +// RUN: tf-opt %s -split-input-file -verify-diagnostics -tf-guarantee-all-funcs-one-use | FileCheck %s + +// ----- +// Basic test. +// CHECK-LABEL: func @f +func @f() { + // CHECK: call @g() : () -> () + // CHECK: call @[[NEWG:.+]]() : () -> () + call @g() : () -> () + call @g() : () -> () + return +} + +// CHECK: func @g() +// CHECK: func @[[NEWG]]() attributes {sym_visibility = "private"} +func @g() { + return +} + +// ----- +// Transitive callees. +// CHECK-LABEL: func @f +// 2 copies of @g +// CHECK-DAG: func @g{{.*}} +// CHECK-DAG: func @g{{.*}} +// 4 copies of @h +// CHECK-DAG: func @h{{.*}} +// CHECK-DAG: func @h{{.*}} +// CHECK-DAG: func @h{{.*}} +// CHECK-DAG: func @h{{.*}} +func @f() { + call @g() : () -> () + call @g() : () -> () + return +} + +func @g() { + call @h() : () -> () + call @h() : () -> () + return +} + +func @h() { + return +} + +// ----- +// Handle error case of infinite recursion. +// expected-error @+1 {{reached cloning limit}} +func @f() attributes {sym_visibility = "private"} { + call @f() : () -> () + call @f() : () -> () + return +} diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/guarantee_all_funcs_one_use.cc b/tensorflow/compiler/mlir/tensorflow/transforms/guarantee_all_funcs_one_use.cc new file mode 100644 index 00000000000..a1aed65bd36 --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/transforms/guarantee_all_funcs_one_use.cc @@ -0,0 +1,120 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "llvm/ADT/STLExtras.h" +#include "mlir/IR/SymbolTable.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "mlir/Pass/PassRegistry.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project +#include "mlir/Support/LogicalResult.h" // from @llvm-project +#include "mlir/Transforms/Utils.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" + +namespace mlir { +namespace TF { + +namespace { + +// Clones FuncOp's until they have a single use only (or no users). +// +// The tf-shape-inference pass doesn't support functions that have more than +// a single use. But some real code from frontends does end up creating code +// like that. For example, the same LSTM cell function or loop body function +// will be reused. +// +// This pass clones functions as needed to establish the invariant that all +// functions have a single use. This can in principle cause exponential code +// size bloat, and should in general be guided by a proper cost model. +// +// There are two factors which should be considered by a principled replacement +// to this pass: +// +// 1. TF currently relies on "sufficiently good shape inference" for +// correctness so for now the cost of doing this seems acceptable since +// pathological cases haven't hit us yet. +// +// 2. Cloning functions can help by allowing code to be specialized (much as +// inlining does). In fact, tf-shape-inference attempts to do specialization +// of callees which is difficult if callees have multiple uses. +class GuaranteeAllFuncsOneUse + : public PassWrapper> { + public: + void runOnOperation() override { + if (failed(run())) { + signalPassFailure(); + } + } + + LogicalResult run() { + auto module = getOperation(); + + // Overall strategy: + // Fixed point iteration, iteratively applying a rule that clones + // any FuncOp with more than one use to eliminate its uses. + + SymbolTable symbol_table(module); + bool made_changes = false; + // This value needs to be low enough to actually stop compilation in a + // reasonable time, but not too low that it blocks real programs. + // This number was chosen semi-randomly. + const int k_max_clones = 1000; + int num_clones = 0; + do { + made_changes = false; + for (auto func : llvm::make_early_inc_range(module.getOps())) { + auto uses_optional = symbol_table.getSymbolUses(func, module); + if (!uses_optional.hasValue()) { + return func.emitError() << "could not walk uses of func"; + } + auto &uses = *uses_optional; + if (llvm::size(uses) <= 1) { + continue; + } + // At this point, we know we are going to change the module. + made_changes = true; + for (const SymbolTable::SymbolUse &use : llvm::drop_begin(uses, 1)) { + auto new_func = func.clone(); + if (num_clones++ > k_max_clones) { + return func.emitError() + << "reached cloning limit (likely recursive call graph or " + "repeated diamond-like call structure " + "or just very large program)"; + } + symbol_table.insert(new_func); + new_func.setVisibility(SymbolTable::Visibility::Private); + if (failed(symbol_table.replaceAllSymbolUses(func, new_func.getName(), + use.getUser()))) { + return func.emitError() << "could not replace symbol use"; + } + } + } + } while (made_changes); + + return success(); + } +}; + +} // namespace + +std::unique_ptr> CreateGuaranteeAllFuncsOneUsePass() { + return std::make_unique(); +} + +static PassRegistration pass( + "tf-guarantee-all-funcs-one-use", + "Guarantee all FuncOp's have only a single use."); + +} // namespace TF + +} // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h index 3aec322308d..3afadd2b06d 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h +++ b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h @@ -60,6 +60,9 @@ std::unique_ptr> CreateMaterializePassthroughOpPass(); // Performs Shape Inference on the TensorFlow dialect using the global registry. std::unique_ptr> CreateTFShapeInferencePass(); +// Guarantee that all FuncOp's have a single use. +std::unique_ptr> CreateGuaranteeAllFuncsOneUsePass(); + // Optional pass which will unroll BatchMatMul and use only MatMul std::unique_ptr> CreateUnrollBatchMatMulPassPass(); From a56d66eb68e4049a5cfefaae01dec9ff6294d786 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 31 Jul 2020 13:05:17 -0700 Subject: [PATCH 1883/2522] Qualify uses of std::string PiperOrigin-RevId: 324269479 Change-Id: I40d1373268bfacf72428bd17fef0638950becabb --- tensorflow/core/kernels/sparse/sparse_matrix.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/sparse/sparse_matrix.h b/tensorflow/core/kernels/sparse/sparse_matrix.h index 8fec9f42fbd..fc4afd06851 100644 --- a/tensorflow/core/kernels/sparse/sparse_matrix.h +++ b/tensorflow/core/kernels/sparse/sparse_matrix.h @@ -312,10 +312,10 @@ class CSRSparseMatrix { return batch_pointers_; } - string TypeName() const { return kTypeName; } + std::string TypeName() const { return kTypeName; } // TODO(ebrevdo): A better debug string. - string DebugString() const { return dense_shape_.DebugString(); } + std::string DebugString() const { return dense_shape_.DebugString(); } // Returns the number of elements. This is equal to 1 if the // CSRSparseMatrix is a singleton matrix (dense_shape is length 2). From 35ee98e7881e1ddfb7062b20f6ca860a6895e26a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 31 Jul 2020 13:05:33 -0700 Subject: [PATCH 1884/2522] Qualify uses of std::string PiperOrigin-RevId: 324269536 Change-Id: I41acb8b5c0dfe3e8af8141ee2161e88a0bd855b5 --- tensorflow/core/kernels/crop_and_resize_op.h | 4 +-- .../core/kernels/crop_and_resize_op_gpu.cu.cc | 4 +-- tensorflow/core/kernels/cuda_solvers.h | 29 ++++++++++--------- tensorflow/core/kernels/cuda_sparse.h | 3 +- tensorflow/core/kernels/fused_batch_norm_op.h | 2 +- tensorflow/core/kernels/gpu_utils.h | 8 ++--- 6 files changed, 27 insertions(+), 23 deletions(-) diff --git a/tensorflow/core/kernels/crop_and_resize_op.h b/tensorflow/core/kernels/crop_and_resize_op.h index 8c34e3e71cc..66ff695d9ce 100644 --- a/tensorflow/core/kernels/crop_and_resize_op.h +++ b/tensorflow/core/kernels/crop_and_resize_op.h @@ -31,7 +31,7 @@ struct CropAndResize { typename TTypes::ConstTensor image, typename TTypes::ConstTensor boxes, typename TTypes::ConstTensor box_ind, - const string& method_name, float extrapolation_value, + const std::string& method_name, float extrapolation_value, typename TTypes::Tensor crops); }; @@ -43,7 +43,7 @@ struct CropAndResizeBackpropImage { typename TTypes::ConstTensor boxes, typename TTypes::ConstTensor box_ind, typename TTypes::Tensor grads_image, - const string& method_name); + const std::string& method_name); }; template diff --git a/tensorflow/core/kernels/crop_and_resize_op_gpu.cu.cc b/tensorflow/core/kernels/crop_and_resize_op_gpu.cu.cc index d268eb7b21e..e64a055503f 100644 --- a/tensorflow/core/kernels/crop_and_resize_op_gpu.cu.cc +++ b/tensorflow/core/kernels/crop_and_resize_op_gpu.cu.cc @@ -352,7 +352,7 @@ struct CropAndResize { typename TTypes::ConstTensor image, typename TTypes::ConstTensor boxes, typename TTypes::ConstTensor box_ind, - const string& method_name, float extrapolation_value, + const std::string& method_name, float extrapolation_value, typename TTypes::Tensor crops) { const int batch = image.dimension(0); const int image_height = image.dimension(1); @@ -391,7 +391,7 @@ struct CropAndResizeBackpropImage { typename TTypes::ConstTensor boxes, typename TTypes::ConstTensor box_ind, typename TTypes::Tensor grads_image, - const string& method_name) { + const std::string& method_name) { const int batch = grads_image.dimension(0); const int image_height = grads_image.dimension(1); const int image_width = grads_image.dimension(2); diff --git a/tensorflow/core/kernels/cuda_solvers.h b/tensorflow/core/kernels/cuda_solvers.h index 6833905e379..eb1d5c8a200 100644 --- a/tensorflow/core/kernels/cuda_solvers.h +++ b/tensorflow/core/kernels/cuda_solvers.h @@ -169,14 +169,16 @@ class CudaSolver { // to the underlying Tensor to prevent it from being deallocated prematurely. template ScratchSpace GetScratchSpace(const TensorShape& shape, - const string& debug_info, bool on_host); + const std::string& debug_info, + bool on_host); template - ScratchSpace GetScratchSpace(int64 size, const string& debug_info, + ScratchSpace GetScratchSpace(int64 size, + const std::string& debug_info, bool on_host); // Returns a DeviceLapackInfo that will live for the duration of the // CudaSolver object. inline DeviceLapackInfo GetDeviceLapackInfo(int64 size, - const string& debug_info); + const std::string& debug_info); // Allocates a temporary tensor that will live for the duration of the // CudaSolver object. @@ -377,12 +379,12 @@ class ScratchSpace { ScratchSpace(OpKernelContext* context, int64 size, bool on_host) : ScratchSpace(context, TensorShape({size}), "", on_host) {} - ScratchSpace(OpKernelContext* context, int64 size, const string& debug_info, - bool on_host) + ScratchSpace(OpKernelContext* context, int64 size, + const std::string& debug_info, bool on_host) : ScratchSpace(context, TensorShape({size}), debug_info, on_host) {} ScratchSpace(OpKernelContext* context, const TensorShape& shape, - const string& debug_info, bool on_host) + const std::string& debug_info, bool on_host) : context_(context), debug_info_(debug_info), on_host_(on_host) { AllocatorAttributes alloc_attr; if (on_host) { @@ -411,7 +413,7 @@ class ScratchSpace { } int64 bytes() const { return scratch_tensor_.TotalBytes(); } int64 size() const { return scratch_tensor_.NumElements(); } - const string& debug_info() const { return debug_info_; } + const std::string& debug_info() const { return debug_info_; } Tensor& tensor() { return scratch_tensor_; } const Tensor& tensor() const { return scratch_tensor_; } @@ -424,21 +426,22 @@ class ScratchSpace { private: OpKernelContext* context_; // not owned - const string debug_info_; + const std::string debug_info_; const bool on_host_; Tensor scratch_tensor_; }; class HostLapackInfo : public ScratchSpace { public: - HostLapackInfo(OpKernelContext* context, int64 size, const string& debug_info) + HostLapackInfo(OpKernelContext* context, int64 size, + const std::string& debug_info) : ScratchSpace(context, size, debug_info, /* on_host */ true){}; }; class DeviceLapackInfo : public ScratchSpace { public: DeviceLapackInfo(OpKernelContext* context, int64 size, - const string& debug_info) + const std::string& debug_info) : ScratchSpace(context, size, debug_info, /* on_host */ false) {} // Allocates a new scratch space on the host and launches a copy of the @@ -460,7 +463,7 @@ class DeviceLapackInfo : public ScratchSpace { #if GOOGLE_CUDA template ScratchSpace CudaSolver::GetScratchSpace(const TensorShape& shape, - const string& debug_info, + const std::string& debug_info, bool on_host) { ScratchSpace new_scratch_space(context_, shape, debug_info, on_host); scratch_tensor_refs_.emplace_back(new_scratch_space.tensor()); @@ -469,13 +472,13 @@ ScratchSpace CudaSolver::GetScratchSpace(const TensorShape& shape, template ScratchSpace CudaSolver::GetScratchSpace(int64 size, - const string& debug_info, + const std::string& debug_info, bool on_host) { return GetScratchSpace(TensorShape({size}), debug_info, on_host); } inline DeviceLapackInfo CudaSolver::GetDeviceLapackInfo( - int64 size, const string& debug_info) { + int64 size, const std::string& debug_info) { DeviceLapackInfo new_dev_info(context_, size, debug_info); scratch_tensor_refs_.emplace_back(new_dev_info.tensor()); return new_dev_info; diff --git a/tensorflow/core/kernels/cuda_sparse.h b/tensorflow/core/kernels/cuda_sparse.h index 2d41cc72421..978bc9005ed 100644 --- a/tensorflow/core/kernels/cuda_sparse.h +++ b/tensorflow/core/kernels/cuda_sparse.h @@ -75,7 +75,8 @@ using gpuStream_t = hipStream_t; namespace tensorflow { -inline string ConvertGPUSparseErrorToString(const gpusparseStatus_t status) { +inline std::string ConvertGPUSparseErrorToString( + const gpusparseStatus_t status) { switch (status) { #define STRINGIZE(q) #q #define RETURN_IF_STATUS(err) \ diff --git a/tensorflow/core/kernels/fused_batch_norm_op.h b/tensorflow/core/kernels/fused_batch_norm_op.h index 7a64046b335..624f7ecf59a 100644 --- a/tensorflow/core/kernels/fused_batch_norm_op.h +++ b/tensorflow/core/kernels/fused_batch_norm_op.h @@ -30,7 +30,7 @@ namespace functor { // (2) batch norm + side input + activation enum class FusedBatchNormActivationMode { kIdentity, kRelu }; -string ToString(FusedBatchNormActivationMode activation_mode); +std::string ToString(FusedBatchNormActivationMode activation_mode); Status ParseActivationMode(OpKernelConstruction* context, FusedBatchNormActivationMode* activation_mode); diff --git a/tensorflow/core/kernels/gpu_utils.h b/tensorflow/core/kernels/gpu_utils.h index c0dd3b6bc77..a1589db3b5b 100644 --- a/tensorflow/core/kernels/gpu_utils.h +++ b/tensorflow/core/kernels/gpu_utils.h @@ -146,7 +146,7 @@ class AutoTuneMap { } private: - AutoTuneMap(const string& name) : name_(name) { + AutoTuneMap(const std::string& name) : name_(name) { min_score_threshold_ = 1; int min_warmup_iterations = 10; const char* threshold_str = getenv("TF_AUTOTUNE_THRESHOLD"); @@ -174,8 +174,8 @@ class AutoTuneMap { } }; - string GetActionSummary(StringPiece action, const Parameters& params, - const Config& config) { + std::string GetActionSummary(StringPiece action, const Parameters& params, + const Config& config) { return strings::Printf("autotune_map %s %s: %s -> (%s)", name_.c_str(), string(action).c_str(), params.ToString().c_str(), config.ToString().c_str()); @@ -189,7 +189,7 @@ class AutoTuneMap { }; std::unordered_map params_config_map_ TF_GUARDED_BY(mu_); - string name_; + std::string name_; int32 min_score_threshold_; int32 max_autotune_count_; int32 max_autotune_global_count_; From cc7e68b611392174c95e13fa8efa9d7bd21832df Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 22 Jun 2020 23:14:18 +0000 Subject: [PATCH 1885/2522] Update to have a better error message for tf.math.segment_[*] This PR tries to address the issue in 40653 where the error message of `tf.math.segment_[*]` does not match the error. This PR fixes 40653. Signed-off-by: Yong Tang --- .../core/kernels/segment_reduction_ops_impl_1.cc | 2 ++ .../kernel_tests/segment_reduction_ops_test.py | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/tensorflow/core/kernels/segment_reduction_ops_impl_1.cc b/tensorflow/core/kernels/segment_reduction_ops_impl_1.cc index ae71ac31f2c..f71a8dac462 100644 --- a/tensorflow/core/kernels/segment_reduction_ops_impl_1.cc +++ b/tensorflow/core/kernels/segment_reduction_ops_impl_1.cc @@ -22,6 +22,8 @@ namespace internal { void SegmentReductionValidationHelper(OpKernelContext* context, const Tensor& input, const Tensor& segment_ids) { + OP_REQUIRES(context, TensorShapeUtils::IsVectorOrHigher(input.shape()), + errors::InvalidArgument("input must be at least rank 1")); OP_REQUIRES(context, TensorShapeUtils::IsVector(segment_ids.shape()), errors::InvalidArgument("segment_ids should be a vector.")); const int64 num_indices = segment_ids.NumElements(); diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index 9c0e0e38b6a..03d31a59b47 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -25,6 +25,7 @@ import numpy as np from tensorflow.python.client import session from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes as dtypes_lib +from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import gradient_checker @@ -255,6 +256,17 @@ class SegmentReductionOpTest(SegmentReductionHelper): delta=1) self.assertAllClose(jacob_t, jacob_n) + def testDataInvalid(self): + # Test case for GitHub issue 40653. + for use_gpu in [True, False]: + with self.cached_session(use_gpu=use_gpu): + with self.assertRaisesRegex( + (ValueError, errors_impl.InvalidArgumentError), + "must be at least rank 1"): + s = math_ops.segment_mean( + data=np.uint16(10), segment_ids=np.array([]).astype('int64')) + self.evaluate(s) + class UnsortedSegmentTest(SegmentReductionHelper): From 3a2dd11d51e050aa1a018e20aaadd7bde7287808 Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Fri, 31 Jul 2020 13:06:02 -0700 Subject: [PATCH 1886/2522] Convert `extras` to `metrics` in benchmark_report for metric monitoring. PiperOrigin-RevId: 324269617 Change-Id: I1f8f9a26f99552281d978573e1786919eaf71f7b --- .../benchmarks/eager_microbenchmarks_test.py | 16 +++++++++++----- .../model_components_benchmarks_test.py | 16 ++++++++++------ 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/eager_microbenchmarks_test.py b/tensorflow/python/keras/benchmarks/eager_microbenchmarks_test.py index 83a4fc27424..82f2a8342c2 100644 --- a/tensorflow/python/keras/benchmarks/eager_microbenchmarks_test.py +++ b/tensorflow/python/keras/benchmarks/eager_microbenchmarks_test.py @@ -51,13 +51,19 @@ class MicroBenchmarksBase(tf.test.Benchmark): """Run and report benchmark results.""" total_time = run_benchmark(func, num_iters, execution_mode) mean_us = total_time * 1e6 / num_iters - extras = { - "examples_per_sec": float("{0:.3f}".format(num_iters / total_time)), - "us_per_example": float("{0:.3f}".format(total_time * 1e6 / num_iters)) - } + metrics = [{ + "name": "exp_per_sec", + "value": float("{0:.3f}".format(num_iters / total_time)) + }, { + "name": "us_per_exp", + "value": float("{0:.3f}".format(total_time * 1e6 / num_iters)) + }] benchmark_name = self._get_benchmark_name() self.report_benchmark( - iters=num_iters, wall_time=mean_us, extras=extras, name=benchmark_name) + iters=num_iters, + wall_time=mean_us, + metrics=metrics, + name=benchmark_name) def _get_benchmark_name(self): """Mostly copied from benchmark.py _get_name().""" diff --git a/tensorflow/python/keras/benchmarks/model_components_benchmarks_test.py b/tensorflow/python/keras/benchmarks/model_components_benchmarks_test.py index 5119d196b6a..624c318bedb 100644 --- a/tensorflow/python/keras/benchmarks/model_components_benchmarks_test.py +++ b/tensorflow/python/keras/benchmarks/model_components_benchmarks_test.py @@ -108,12 +108,16 @@ class KerasComponentsBenchmarks(test.Benchmark): self.report_benchmark( iters=num_iters, wall_time=mean_us, - extras={ - "examples_per_sec": - float("{0:.3f}".format(num_iters / total_time)), - "us_per_example": - float("{0:.3f}".format(total_time * 1e6 / num_iters)) - }) + metrics=[ + { + "name": "exp_per_sec", + "value": float("{0:.3f}".format(num_iters / total_time)) + }, + { + "name": "us_per_exp", + "value": float("{0:.3f}".format(total_time * 1e6 / num_iters)) + }, + ]) def benchmark_keras_model_subclassed(self): model = SubclassedKerasModel() From 841805724e2f6b793886d3e616d0e5657ebb3182 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 31 Jul 2020 13:06:13 -0700 Subject: [PATCH 1887/2522] Roll back "Enable mlir generated GPU kernels by default for cuda builds." It breaks some internal builds. PiperOrigin-RevId: 324269660 Change-Id: I23bc3d52345d2388c7750cffd1c562a87a342c77 --- .bazelrc | 5 ----- tensorflow/core/kernels/mlir_generated/BUILD | 4 ++-- tensorflow/core/kernels/mlir_generated/build_defs.bzl | 4 ++-- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/.bazelrc b/.bazelrc index da988e4c928..6a448b267e0 100644 --- a/.bazelrc +++ b/.bazelrc @@ -173,11 +173,6 @@ build:using_cuda --define=using_cuda=true build:using_cuda --action_env TF_NEED_CUDA=1 build:using_cuda --crosstool_top=@local_config_cuda//crosstool:toolchain -# Enable the mlir generated GPU kernels only for cuda builds. -build --define=tensorflow_enable_mlir_generated_gpu_kernels=0 -# This is a more specific option, so it takes precedence over the line above for cuda builds. -build:using_cuda --define=tensorflow_enable_mlir_generated_gpu_kernels=1 - # This config refers to building CUDA op kernels with nvcc. build:cuda --config=using_cuda build:cuda --define=using_cuda_nvcc=true diff --git a/tensorflow/core/kernels/mlir_generated/BUILD b/tensorflow/core/kernels/mlir_generated/BUILD index 9f3efe9d972..79ccda50c87 100644 --- a/tensorflow/core/kernels/mlir_generated/BUILD +++ b/tensorflow/core/kernels/mlir_generated/BUILD @@ -18,9 +18,9 @@ package( ) config_setting( - name = "mlir_generated_gpu_kernels_disabled", + name = "mlir_generated_gpu_kernels_enabled", define_values = { - "tensorflow_enable_mlir_generated_gpu_kernels": "0", + "tensorflow_enable_mlir_generated_gpu_kernels": "1", }, ) diff --git a/tensorflow/core/kernels/mlir_generated/build_defs.bzl b/tensorflow/core/kernels/mlir_generated/build_defs.bzl index 2bf6e8fa3bb..3426aba94a4 100644 --- a/tensorflow/core/kernels/mlir_generated/build_defs.bzl +++ b/tensorflow/core/kernels/mlir_generated/build_defs.bzl @@ -4,8 +4,8 @@ load("@local_config_cuda//cuda:build_defs.bzl", "cuda_gpu_architectures", "if_cu def if_mlir_generated_gpu_kernels_enabled(if_true, if_false = []): return select({ - "//tensorflow/core/kernels/mlir_generated:mlir_generated_gpu_kernels_disabled": if_false, - "//conditions:default": if_true, + "//tensorflow/core/kernels/mlir_generated:mlir_generated_gpu_kernels_enabled": if_true, + "//conditions:default": if_false, }) def _lookup_file(filegroup, path): From 421f52cc8e1d618dc200fe1f63cd16750968236c Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Fri, 31 Jul 2020 13:42:03 -0700 Subject: [PATCH 1888/2522] [MLIR] Fix TFInlinerInterface to be handle ops that cannot be duplicated - Allow all ops that are in a single use non-public function to be inlined, assuming post inlining, the function will be deleted. - Introduce a new trait `CannotDuplicate` to tag ops that cannot be duplicated. - For ops in multi-use functions, use various checks (CannotDuplicate, NoSideEffects, is_stateless attribute) to check if the op can/cannot be duplicated. By default assume that ops can be duplicated. - Fixed TPUBridgeExecutorIslandOutlining pass to mark the outlined function in the nested module with nested visibility. PiperOrigin-RevId: 324276074 Change-Id: I2f0a8506390f161766eafe8e28a349779f4a9ef4 --- tensorflow/compiler/mlir/tensorflow/BUILD | 1 + .../mlir/tensorflow/ir/tf_generated_ops.td | 149 +++++++++++++++++- .../compiler/mlir/tensorflow/ir/tf_op_base.td | 4 + .../compiler/mlir/tensorflow/ir/tf_ops.cc | 75 ++++++++- .../compiler/mlir/tensorflow/ir/tf_traits.h | 14 ++ .../executor_tpuv1_inline_tpu_island.mlir | 4 +- .../while_op.mlir | 2 +- .../mlir/tensorflow/tests/inlining.mlir | 13 +- .../executor_tpuv1_outline_tpu_island.cc | 1 + 9 files changed, 243 insertions(+), 20 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index ef95b830206..c6f0083fc92 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -333,6 +333,7 @@ cc_library( deps = [ ":tensorflow_types", "@llvm-project//mlir:IR", + "@llvm-project//mlir:SideEffects", "@llvm-project//mlir:Support", ], ) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index bc7ea2fece0..e00ea4c342a 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -6116,7 +6116,7 @@ Returns x * y element-wise. Returns zero if y is zero, even if x if infinite or TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_MultinomialOp : TF_Op<"Multinomial", []> { +def TF_MultinomialOp : TF_Op<"Multinomial", [TF_CannotDuplicate]> { let summary = "Draws samples from a multinomial distribution."; let arguments = (ins @@ -6573,6 +6573,35 @@ pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0] TF_DerivedOperandTypeAttr Tpaddings = TF_DerivedOperandTypeAttr<1>; } +def TF_ParameterizedTruncatedNormalOp : TF_Op<"ParameterizedTruncatedNormal", [TF_CannotDuplicate]> { + let summary = [{ +Outputs random values from a normal distribution. The parameters may each be a + }]; + + let description = [{ +scalar which applies to the entire output, or a vector of length shape[0] which +stores the parameters for each batch. + }]; + + let arguments = (ins + TF_I32OrI64Tensor:$shape, + TF_FpTensor:$means, + TF_FpTensor:$stdevs, + TF_FpTensor:$minvals, + TF_FpTensor:$maxvals, + + DefaultValuedAttr:$seed, + DefaultValuedAttr:$seed2 + ); + + let results = (outs + TF_FpTensor:$output + ); + + TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; + TF_DerivedOperandTypeAttr dtype = TF_DerivedOperandTypeAttr<1>; +} + def TF_PowOp : TF_Op<"Pow", [NoSideEffect, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef]>, WithBroadcastableBinOpBuilder { let summary = "Computes the power of one value to another."; @@ -6956,6 +6985,33 @@ array([0.6666667, 1. , 1. ], dtype=float32) TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } +def TF_RandomGammaOp : TF_Op<"RandomGamma", [TF_CannotDuplicate]> { + let summary = [{ +Outputs random values from the Gamma distribution(s) described by alpha. + }]; + + let description = [{ +This op uses the algorithm by Marsaglia et al. to acquire samples via +transformation-rejection from pairs of uniform and normal random variables. +See http://dl.acm.org/citation.cfm?id=358414 + }]; + + let arguments = (ins + TF_I32OrI64Tensor:$shape, + TensorOf<[F16, F32, F64]>:$alpha, + + DefaultValuedAttr:$seed, + DefaultValuedAttr:$seed2 + ); + + let results = (outs + TensorOf<[F16, F32, F64]>:$output + ); + + TF_DerivedOperandTypeAttr S = TF_DerivedOperandTypeAttr<0>; + TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<1>; +} + def TF_RandomGammaGradOp : TF_Op<"RandomGammaGrad", [NoSideEffect, ResultsBroadcastableShape]>, WithBroadcastableBinOpBuilder { let summary = [{ @@ -6974,7 +7030,60 @@ Computes the derivative of a Gamma random sample w.r.t. `alpha`. TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_RandomShuffleOp : TF_Op<"RandomShuffle", [SameOperandsAndResultType]> { +def TF_RandomPoissonOp : TF_Op<"RandomPoisson", [TF_CannotDuplicate]> { + let summary = "Use RandomPoissonV2 instead."; + + let arguments = (ins + TF_I32OrI64Tensor:$shape, + TensorOf<[F16, F32, F64]>:$rate, + + DefaultValuedAttr:$seed, + DefaultValuedAttr:$seed2 + ); + + let results = (outs + TensorOf<[F16, F32, F64]>:$output + ); + + TF_DerivedOperandTypeAttr S = TF_DerivedOperandTypeAttr<0>; + TF_DerivedOperandTypeAttr dtype = TF_DerivedOperandTypeAttr<1>; +} + +def TF_RandomPoissonV2Op : TF_Op<"RandomPoissonV2", [TF_CannotDuplicate]> { + let summary = [{ +Outputs random values from the Poisson distribution(s) described by rate. + }]; + + let description = [{ +This op uses two algorithms, depending on rate. If rate >= 10, then +the algorithm by Hormann is used to acquire samples via +transformation-rejection. +See http://www.sciencedirect.com/science/article/pii/0167668793909974. + +Otherwise, Knuth's algorithm is used to acquire samples via multiplying uniform +random variables. +See Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer +Programming, Volume 2. Addison Wesley + }]; + + let arguments = (ins + TF_I32OrI64Tensor:$shape, + TensorOf<[F16, F32, F64, I32, I64]>:$rate, + + DefaultValuedAttr:$seed, + DefaultValuedAttr:$seed2 + ); + + let results = (outs + TensorOf<[F16, F32, F64, I32, I64]>:$output + ); + + TF_DerivedOperandTypeAttr R = TF_DerivedOperandTypeAttr<1>; + TF_DerivedOperandTypeAttr S = TF_DerivedOperandTypeAttr<0>; + TF_DerivedResultTypeAttr dtype = TF_DerivedResultTypeAttr<0>; +} + +def TF_RandomShuffleOp : TF_Op<"RandomShuffle", [SameOperandsAndResultType, TF_CannotDuplicate]> { let summary = "Randomly shuffles a tensor along its first dimension."; let description = [{ @@ -7003,7 +7112,7 @@ The tensor is shuffled along dimension 0, such that each `value[j]` is mapped TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_RandomStandardNormalOp : TF_Op<"RandomStandardNormal", []> { +def TF_RandomStandardNormalOp : TF_Op<"RandomStandardNormal", [TF_CannotDuplicate]> { let summary = "Outputs random values from a normal distribution."; let description = [{ @@ -7025,7 +7134,7 @@ The generated values will have mean 0 and standard deviation 1. TF_DerivedResultTypeAttr dtype = TF_DerivedResultTypeAttr<0>; } -def TF_RandomUniformOp : TF_Op<"RandomUniform", []> { +def TF_RandomUniformOp : TF_Op<"RandomUniform", [TF_CannotDuplicate]> { let summary = "Outputs random values from a uniform distribution."; let description = [{ @@ -7052,6 +7161,36 @@ lower bound 0 is included in the range, while the upper bound 1 is excluded. }]; } +def TF_RandomUniformIntOp : TF_Op<"RandomUniformInt", [TF_CannotDuplicate]> { + let summary = "Outputs random integers from a uniform distribution."; + + let description = [{ +The generated values are uniform integers in the range `[minval, maxval)`. +The lower bound `minval` is included in the range, while the upper bound +`maxval` is excluded. + +The random integers are slightly biased unless `maxval - minval` is an exact +power of two. The bias is small for values of `maxval - minval` significantly +smaller than the range of the output (either `2^32` or `2^64`). + }]; + + let arguments = (ins + TF_I32OrI64Tensor:$shape, + TF_I32OrI64Tensor:$minval, + TF_I32OrI64Tensor:$maxval, + + DefaultValuedAttr:$seed, + DefaultValuedAttr:$seed2 + ); + + let results = (outs + TF_I32OrI64Tensor:$output + ); + + TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; + TF_DerivedOperandTypeAttr Tout = TF_DerivedOperandTypeAttr<1>; +} + def TF_RangeOp : TF_Op<"Range", [NoSideEffect, TF_SameOperandsAndResultElementTypeResolveRef]> { let summary = "Creates a sequence of numbers."; @@ -10898,7 +11037,7 @@ y + truncate_mod(x, y) = x`. TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_TruncatedNormalOp : TF_Op<"TruncatedNormal", []> { +def TF_TruncatedNormalOp : TF_Op<"TruncatedNormal", [TF_CannotDuplicate]> { let summary = "Outputs random values from a truncated normal distribution."; let description = [{ diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td index 7aa4c1b54b5..544cfb8af64 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td @@ -69,6 +69,10 @@ def TF_SameOperandsAndResultElementTypeResolveRef : NativeOpTrait< // format), as an example all element wise operations are layout agnostic. def TF_LayoutAgnostic : NativeOpTrait<"TF::LayoutAgnostic">; +// Trait to indicate operations that cannot be duplicated as they might carry +// certain state around within their implementations. +def TF_CannotDuplicate : NativeOpTrait<"TF::CannotDuplicate">; + // Variant of broadcastable trait that considers TF's subtype behavior. class TF_OpIsBroadcastableToRes : And<[ TCOpResIsShapedTypePred, diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc index 61935153c18..7a791afb24d 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc @@ -74,6 +74,61 @@ namespace TF { //===----------------------------------------------------------------------===// namespace { +// Returns true if the op can be duplicated. +bool CanDuplicate(Operation *op) { + // If the op is marked with the cannot duplicate trait, it cannot be + // duplicated. + if (op->hasTrait()) return false; + + // If the op has no memory side effects, it can be duplicated. + if (MemoryEffectOpInterface::hasNoEffect(op)) return true; + + // If the op is marked stateless using the `is_stateless` attribute, that + // attribute determines if the op can be duplicated. + if (auto is_stateless = op->getAttrOfType("is_stateless")) + return is_stateless.getValue(); + + // Otherwise, assume ops can be duplicated by default. + return true; +} + +// Returns true of the given function has a single uses (within the scope +// of the module containing it and all parent modules). +bool HasSingleUse(FuncOp func) { + // Public function can have any number of external uses. + if (func.isPublic()) return false; + + // Return false if unexpected IR structure seen. + ModuleOp module = func.getParentOfType(); + if (!module) return false; + + // Inspect function uses in the containing module and all parent + // modules. + bool use_seen = false; + for (; module; module = module.getParentOfType()) { + auto func_uses_optional = + SymbolTable::getSymbolUses(func, &module.getBodyRegion()); + // Found an unknown use. + if (!func_uses_optional) return false; + + // If no uses in this scope, continue looking in parent module + SymbolTable::UseRange func_uses = func_uses_optional.getValue(); + if (llvm::empty(func_uses)) continue; + + // Check if multiple uses at this scope or another use already seen. + if (!llvm::hasSingleElement(func_uses) || use_seen) return false; + + // This is the first use seen. + use_seen = true; + + // If the function is private, no need to inspect parent modules. + if (func.isPrivate()) break; + } + + // No multiple uses seen. + return true; +} + struct TFInlinerInterface : public DialectInlinerInterface { using DialectInlinerInterface::DialectInlinerInterface; @@ -81,8 +136,8 @@ struct TFInlinerInterface : public DialectInlinerInterface { // Analysis Hooks //===--------------------------------------------------------------------===// - // Defines the legality of inlinining 'src' region into the 'dest' region - // attached to a TF operation + // Returns if its legal to inline 'src' region into the 'dest' region + // attached to a TF operation. bool isLegalToInline(Region *dest, Region *src, BlockAndValueMapping &valueMapping) const final { // Allow inlining in regions attached to region based control flow @@ -91,13 +146,17 @@ struct TFInlinerInterface : public DialectInlinerInterface { llvm::hasSingleElement(*src); } - // Defines the legality of inlining TF operations. - bool isLegalToInline(Operation *, Region *, + // Returns true if its legal to inline a TF operation `op` into the `dest` + // region. + bool isLegalToInline(Operation *op, Region *dest, BlockAndValueMapping &) const final { - // TODO(riverriddle) For now, enable inlining all operations. This isn't - // correct in the face of operations that cannot be duplicated, but this - // requires more intricate side-effect modeling. - return true; + // An op is legal to inline if either of the following conditions is true: + // (a) Its legal to duplicate the Op. + // (a) The Op is inside a single use function. If that function is inlined, + // post inlining, the function will be dead and eliminated from the IR. + // So there won't be any code duplication. + FuncOp func = op->getParentOfType(); + return !func || CanDuplicate(op) || HasSingleUse(func); } //===--------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h index a94d7dbd219..b9a781b99e7 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h @@ -21,6 +21,7 @@ limitations under the License. #include "mlir/IR/OpDefinition.h" // from @llvm-project #include "mlir/IR/StandardTypes.h" // from @llvm-project #include "mlir/IR/TypeUtilities.h" // from @llvm-project +#include "mlir/Interfaces/SideEffectInterfaces.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" @@ -110,6 +111,19 @@ class SameOperandsAndResultElementTypeResolveRef template class LayoutAgnostic : public TraitBase {}; +// Trait to indicate operations that cannot be duplicated as they might carry +// certain state around within their implementations. +template +class CannotDuplicate : public TraitBase { + public: + static LogicalResult verifyTrait(Operation* op) { + if (MemoryEffectOpInterface::hasNoEffect(op)) + return op->emitError( + "operations with no side effects cannot have CannotDuplicate trait"); + return success(); + } +}; + } // namespace TF } // namespace OpTrait } // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/tests/executor_tpuv1_island_inlining/executor_tpuv1_inline_tpu_island.mlir b/tensorflow/compiler/mlir/tensorflow/tests/executor_tpuv1_island_inlining/executor_tpuv1_inline_tpu_island.mlir index f45f0a435c3..b7bdf505a85 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/executor_tpuv1_island_inlining/executor_tpuv1_inline_tpu_island.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/executor_tpuv1_island_inlining/executor_tpuv1_inline_tpu_island.mlir @@ -35,11 +35,11 @@ module { } // CHECK-NOT: _tpu_v1_compat_outlined module @_tpu_v1_compat_outlined { - func @_tpu_v1_compat_outlined_func0(%arg0: tensor) -> tensor { + func @_tpu_v1_compat_outlined_func0(%arg0: tensor) -> tensor attributes {sym_visibility = "nested"} { %0 = "tf.opA"(%arg0) : (tensor) -> tensor return %0 : tensor } - func @_tpu_v1_compat_outlined_func1(%arg0: tensor, %arg1: tensor) -> (tensor, tensor) { + func @_tpu_v1_compat_outlined_func1(%arg0: tensor, %arg1: tensor) -> (tensor, tensor) attributes {sym_visibility = "nested"} { %0 = "tf.opA"(%arg0) : (tensor) -> tensor %1 = "tf.opA"(%0) : (tensor) -> tensor %2 = "tf.SomeOp"(%arg0, %arg1) : (tensor, tensor) -> tensor diff --git a/tensorflow/compiler/mlir/tensorflow/tests/executor_tpuv1_island_inlining/while_op.mlir b/tensorflow/compiler/mlir/tensorflow/tests/executor_tpuv1_island_inlining/while_op.mlir index 8c174a7cfaf..6724033d292 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/executor_tpuv1_island_inlining/while_op.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/executor_tpuv1_island_inlining/while_op.mlir @@ -12,7 +12,7 @@ module { return %0#0 : tensor } module @_tpu_v1_compat_outlined { - func @_tpu_v1_compat_outlined_func0(%arg0: tensor) -> (tensor, tensor, tensor, tensor) { + func @_tpu_v1_compat_outlined_func0(%arg0: tensor) -> (tensor, tensor, tensor, tensor) attributes {sym_visibility = "nested"} { "tf.TPUReplicateMetadata"() {_tpu_replicate = "cluster", device = "device", num_replicas = 1 : i64, topology = "topology"} : () -> () %0 = "tf.opA"(%arg0) {_tpu_replicate = "cluster"} : (tensor) -> tensor %1 = "tf.While"(%0) {body = @while_body_with_cluster_attr, cond = @while_cond_with_cluster_attr, is_stateless = false, name = "A", parallel_iterations = 10 : i64} : (tensor) -> tensor diff --git a/tensorflow/compiler/mlir/tensorflow/tests/inlining.mlir b/tensorflow/compiler/mlir/tensorflow/tests/inlining.mlir index 5f4bffcc7c2..7e583d0425a 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/inlining.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/inlining.mlir @@ -2,7 +2,7 @@ // Test that simple TF operations can be inlined. -func @inline_simple_callee() -> tensor<2xi32> { +func @inline_simple_callee() -> tensor<2xi32> attributes {sym_visibility = "private"} { %cst = "tf.Const"() { value = dense<2> : tensor<2xi32> } : () -> tensor<2xi32> return %cst : tensor<2xi32> } @@ -18,7 +18,7 @@ func @inline_simple() -> tensor<2xi32> { // Check that TF call operations can be inlined, even when the shape of the // argument or result is different than the called function. -func @inline_shape_cast_callee(%arg : tensor<*xi32>) -> tensor<*xi32> { +func @inline_shape_cast_callee(%arg : tensor<*xi32>) -> tensor<*xi32> attributes {sym_visibility = "private"} { return %arg : tensor<*xi32> } @@ -34,7 +34,12 @@ func @inline_shape_cast(%arg: tensor<2xi32>) -> tensor<2xi32> { // Check that functions can be inlined into islands. -func @inline_into_island_multi_block_callee() -> tensor<2xi32> { +func @inline_simple_callee1() -> tensor<2xi32> attributes {sym_visibility = "private"} { + %cst = "tf.Const"() { value = dense<2> : tensor<2xi32> } : () -> tensor<2xi32> + return %cst : tensor<2xi32> +} + +func @inline_into_island_multi_block_callee() -> tensor<2xi32> attributes {sym_visibility = "private"} { br ^bb1 ^bb1: @@ -48,7 +53,7 @@ func @inline_into_island() -> (tensor<2xi32>, tensor<2xi32>) { %1:3 = tf_executor.island { // Single block regions may be inlined. // CHECK: %[[CST:.*]] = "tf.Const" - %result = "tf.StatefulPartitionedCall"() {config = "", config_proto = "", executor_type = "", f = @inline_simple_callee} : () -> tensor<2xi32> + %result = "tf.StatefulPartitionedCall"() {config = "", config_proto = "", executor_type = "", f = @inline_simple_callee1} : () -> tensor<2xi32> // Multi block regions may not. // CHECK-NEXT: %[[CALL:.*]] = "tf.StatefulPartitionedCall" diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/executor_tpuv1_outline_tpu_island.cc b/tensorflow/compiler/mlir/tensorflow/transforms/executor_tpuv1_outline_tpu_island.cc index d8e739ee949..a5177fac647 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/executor_tpuv1_outline_tpu_island.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/executor_tpuv1_outline_tpu_island.cc @@ -108,6 +108,7 @@ void TPUBridgeExecutorIslandOutlining::runOnOperation() { auto outlined_func = OpBuilder(ctx).create(island_op.getLoc(), name, func_type); outlined_symbol_table.insert(outlined_func); + outlined_func.setVisibility(FuncOp::Visibility::Nested); // We will "steal" the body of the island and replace it with a call to the // new function later. From 4030bec833ea4f7a4b212e14bae72c56c579c5b1 Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Fri, 31 Jul 2020 13:54:16 -0700 Subject: [PATCH 1889/2522] Remove @test_util.run_deprecated_v1 in gradient_correctness_test.py Also introduce AbstractGradientTape utility class for testing tf.gradients() in tf.GradientTape() test code. PiperOrigin-RevId: 324278332 Change-Id: Icc1c0518926f414e25e80b4ab9a1cbc0a701c086 --- tensorflow/python/framework/test_util.py | 55 +++++++++++++++++++ .../kernel_tests/gradient_correctness_test.py | 15 +++-- 2 files changed, 64 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 9e49f4b02cc..958c7697c96 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -50,6 +50,7 @@ from tensorflow.python.client import device_lib from tensorflow.python.client import pywrap_tf_session from tensorflow.python.client import session from tensorflow.python.compat.compat import forward_compatibility_horizon +from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.eager import def_function from tensorflow.python.eager import tape @@ -68,6 +69,7 @@ from tensorflow.python.framework import versions from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_util from tensorflow.python.ops import control_flow_util_v2 +from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import script_ops from tensorflow.python.ops import summary_ops_v2 from tensorflow.python.ops import variables @@ -3276,3 +3278,56 @@ def set_producer_version(graph, producer_version): with graph.as_default(): importer.import_graph_def(graph_def) assert graph.graph_def_versions.producer, producer_version + + +@contextlib.contextmanager +def _fake_gradient_tape_context_manager(): + """tf.gradients(...) implemented as tf.GradientTape context manager interface. + + This is useful to test tf.gradients() in tests that uses tf.GradientTape(). + + Yields: + gradient tape instance that's implemented by tf.gradients() underneath. + """ + try: + class FakeGradientTape: + + def watch(self, x): + pass + + def gradient(self, y, x): + result = gradients_impl.gradients(y, x) + + # Unlike `tape.gradient()`, `tf.gradients()` returns a list for a single + # element. So unpack if needed to match `tape.gradient()` behavior. + if not isinstance(x, (list, tuple)): + assert len(result) == 1 + return result[0] + + return result + + yield FakeGradientTape() + finally: + pass + + +class AbstractGradientTape: + """Abstract GradientTape context manager that has multiple implementations. + + This is useful to test both tf.GradientTape() and tf.gradients() without + duplicating tests. + """ + + def __init__(self, use_tape): + self._use_tape = use_tape + + def __enter__(self): + if self._use_tape: + self._tape_impl = backprop.GradientTape() + else: + self._tape_impl = _fake_gradient_tape_context_manager() + return self._tape_impl.__enter__() + + def __exit__(self, exc_type, exc_val, exc_tb): + self._tape_impl.__exit__(exc_type, exc_val, exc_tb) + diff --git a/tensorflow/python/kernel_tests/gradient_correctness_test.py b/tensorflow/python/kernel_tests/gradient_correctness_test.py index 682566742c2..911ba59bed4 100644 --- a/tensorflow/python/kernel_tests/gradient_correctness_test.py +++ b/tensorflow/python/kernel_tests/gradient_correctness_test.py @@ -18,8 +18,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from absl.testing import parameterized import numpy as np +from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import test_util @@ -28,7 +30,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.platform import test -class GradientCorrectnessTest(test.TestCase): +class GradientCorrectnessTest(test.TestCase, parameterized.TestCase): @test_util.run_deprecated_v1 def testMultipleOutputChainedGradients(self): @@ -42,12 +44,13 @@ class GradientCorrectnessTest(test.TestCase): # [dexp(x)/dx + d(log(exp(x)))/dx] @ x=1 == exp(1) + 1 self.assertAllClose(grad_vals[0], exp1_plus_one) - @test_util.run_deprecated_v1 - def testIdentityGradient(self): + @parameterized.parameters(set((True, context.executing_eagerly()))) + def testIdentityGradient(self, use_tape): x = constant_op.constant(3.) - dx_dx, = gradients_impl.gradients(x, x) - with self.cached_session() as sess: - self.assertAllClose(1., self.evaluate(dx_dx)) + with test_util.AbstractGradientTape(use_tape=use_tape) as tape: + tape.watch(x) + dx_dx = tape.gradient(x, x) + self.assertAllClose(1., self.evaluate(dx_dx)) @test_util.run_deprecated_v1 def testIntegerIdentityGradient(self): From ee006f32fb5752dfd0cc7a3f6528726b3c96219f Mon Sep 17 00:00:00 2001 From: Lucy Fox Date: Fri, 31 Jul 2020 13:56:50 -0700 Subject: [PATCH 1890/2522] Add GuaranteeAllFuncsOneUse pass to bridge pass pipeline. This enables shape inference where functions are used multiple times. PiperOrigin-RevId: 324278827 Change-Id: I812f132e6d490dd5e21f7c8a526a426894220988 --- tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc index e27302071fa..f06fe1280f0 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc @@ -304,6 +304,8 @@ Status ConvertMLIRToXlaComputation( tf2xla.addPass(mlir::TFDevice::CreateDecomposeResourceOpsPass()); tf2xla.addPass(mlir::TF::CreatePromoteResourcesToArgsPass()); tf2xla.addPass(mlir::createSymbolDCEPass()); + // Guarantee all functions have one use, which enables shape inference. + tf2xla.addPass(mlir::TF::CreateGuaranteeAllFuncsOneUsePass()); tf2xla.addPass(mlir::TF::CreateTFShapeInferencePass()); // LegalizeTFControlFlow encapsulates arguments for control flow operations // with a tuple argument which break the assumption of resource lifting From 18c370bc42eb7a5189caa869d2d7b9e3d6c1182d Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Fri, 31 Jul 2020 13:58:01 -0700 Subject: [PATCH 1891/2522] [MLIR] Add conversion between XLA Fusion and MHLO FusionOp. PiperOrigin-RevId: 324279065 Change-Id: I8a64abcc7d239cd75a06b1faea596ba4da0ef163 --- .../mlir-hlo/Dialect/mhlo/IR/hlo_ops.td | 11 +++++- .../mlir/xla/hlo_function_importer.cc | 9 +++++ .../compiler/mlir/xla/mlir_hlo_to_hlo.cc | 38 ++++++++++++++++++- .../compiler/mlir/xla/mlir_hlo_to_hlo.h | 5 +++ .../mlir/xla/tests/translate/fusion.hlotxt | 35 +++++++++++++++++ .../mlir/xla/tests/translate/fusion.mlir | 27 +++++++++++++ tensorflow/compiler/xla/client/xla_builder.cc | 21 ++++++++++ tensorflow/compiler/xla/client/xla_builder.h | 14 +++++++ 8 files changed, 157 insertions(+), 3 deletions(-) create mode 100644 tensorflow/compiler/mlir/xla/tests/translate/fusion.hlotxt create mode 100644 tensorflow/compiler/mlir/xla/tests/translate/fusion.mlir diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td index 3d7b8273d67..e2c9a1aac89 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td @@ -40,6 +40,14 @@ class HLO_Op traits> : let verifier = [{ return Verify(*this); }]; } +def HLO_LOOP_FUSION : StrEnumAttrCase<"kLoop">; +def HLO_INPUT_FUSION : StrEnumAttrCase<"kInput">; +def HLO_OUTPUT_FUSION : StrEnumAttrCase<"kOutput">; +def HLO_CUSTOM_FUSION : StrEnumAttrCase<"kCustom">; +def HLO_FusionKindAttr : StrEnumAttr<"FusionKind", "fusion kind", [ + HLO_LOOP_FUSION, HLO_INPUT_FUSION, HLO_OUTPUT_FUSION, HLO_CUSTOM_FUSION +]>; + //===----------------------------------------------------------------------===// // MHLO nullary op definitions. //===----------------------------------------------------------------------===// @@ -1375,7 +1383,8 @@ def HLO_FusionOp : HLO_Op<"fusion", []> { let regions = (region SizedRegion<1>:$fused_computation); let arguments = (ins - Variadic:$operands + Variadic:$operands, + OptionalAttr:$fusion_kind ); let results = (outs diff --git a/tensorflow/compiler/mlir/xla/hlo_function_importer.cc b/tensorflow/compiler/mlir/xla/hlo_function_importer.cc index ad177ce1dc5..d366a36c212 100644 --- a/tensorflow/compiler/mlir/xla/hlo_function_importer.cc +++ b/tensorflow/compiler/mlir/xla/hlo_function_importer.cc @@ -708,6 +708,15 @@ StatusOr HloFunctionImporter::ImportInstruction( NoAttributeCase(kCopy, CopyOp); #undef NoAttributeCase #undef MakeAndReturn + case HloOpcode::kFusion: { + auto fusion = func_builder->create( + loc, result_type, operands, + builder_->getStringAttr(xla::ToString(instruction->fusion_kind()))); + TF_RETURN_IF_ERROR( + ImportAsRegion(*instruction->fused_instructions_computation(), + &fusion.fused_computation())); + return fusion.getOperation(); + } case HloOpcode::kAddDependency: // Arbitrary op code that I suspect we will not implement for quite a // while and allows testing handling of unknown ops. Selected because it diff --git a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc index cad1ce644b6..e6d0b8f8dd8 100644 --- a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc +++ b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc @@ -1009,8 +1009,32 @@ LogicalResult ExportXlaOp(WhileOp op, OpLoweringContext ctx) { } LogicalResult ExportXlaOp(FusionOp op, OpLoweringContext ctx) { - // TODO(whoever): currently not supported. - return failure(); + if (!op.fusion_kind()) { + op.emitOpError() << "requires fusion kind for HLO translation"; + return failure(); + } + + xla::XlaComputation fused_computation; + if (failed(ctx.converter->LowerRegionAsComputation(&op.fused_computation(), + &fused_computation))) + return failure(); + + auto& values = *ctx.values; + llvm::SmallVector operands; + for (auto operand : op.operands()) operands.push_back(values[operand]); + + xla::XlaOp fusion = xla::internal::XlaBuilderBuildFusion( + ctx.builder, operands, + absl::string_view(op.fusion_kind()->data(), op.fusion_kind()->size()), + fused_computation); + if (op.getNumResults() == 1) { + values[op.getResult(0)] = fusion; + } else { + for (auto item : llvm::enumerate(op.getResults())) { + values[item.value()] = xla::GetTupleElement(fusion, item.index()); + } + } + return success(); } } // namespace @@ -1582,6 +1606,16 @@ LogicalResult AddDynamicParameterBindings(mlir::ModuleOp module, } // namespace +Status ConvertRegionToComputation(mlir::Region* region, + xla::XlaComputation* func) { + mlir::ModuleOp module; + ConvertToHloModule converter(module, true, true, {}); + if (failed(converter.LowerRegionAsComputation(region, func))) + return tensorflow::errors::Internal( + "failed to convert region to computation"); + return Status::OK(); +} + Status ConvertMlirHloToHlo(mlir::ModuleOp module, xla::HloProto* hlo_proto, bool use_tuple_args, bool return_tuple, const tensorflow::XlaHelpers::ShapeRepresentationFn diff --git a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h index d84aa92d3e2..6f2b5a6db95 100644 --- a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h +++ b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h @@ -37,6 +37,11 @@ Status ConvertMlirHloToHlo(mlir::ModuleOp module, ::xla::HloProto* hlo_proto, const tensorflow::XlaHelpers::ShapeRepresentationFn shape_representation_fn = nullptr); +// Converts a region to a computation. It returns a standalone module that +// contains the converted region as the entry computation. +Status ConvertRegionToComputation(mlir::Region* region, + ::xla::XlaComputation* func); + // Creates XlaOp equivalent of a given MLIR operation using the operand info // from `value_lowering` map. llvm::Optional<::xla::XlaOp> CreateXlaOperator( diff --git a/tensorflow/compiler/mlir/xla/tests/translate/fusion.hlotxt b/tensorflow/compiler/mlir/xla/tests/translate/fusion.hlotxt new file mode 100644 index 00000000000..dc2ce6d58f8 --- /dev/null +++ b/tensorflow/compiler/mlir/xla/tests/translate/fusion.hlotxt @@ -0,0 +1,35 @@ +// RUN: tf-mlir-translate -hlo-text-to-mlir-hlo %s -o - | FileCheck %s + +HloModule main.17 + +// CHECK: func @main(%[[ARG0:.*]]: tensor, %[[ARG1:.*]]: tensor) -> tensor { +// CHECK: %0 = "mhlo.fusion"(%[[ARG0:.*]], %[[ARG1:.*]]) ( { +// CHECK: ^bb0(%[[ARG2:.*]]: tensor, %[[ARG3:.*]]: tensor): +// CHECK: }) {fusion_kind = "kLoop"} : (tensor, tensor) -> tensor +// CHECK: %1 = "mhlo.fusion"(%[[ARG0:.*]], %[[ARG1:.*]]) ( { +// CHECK: ^bb0(%[[ARG2:.*]]: tensor, %[[ARG3:.*]]: tensor): +// CHECK: }) {fusion_kind = "kLoop"} : (tensor, tensor) -> tuple, tensor> +// CHECK: } + +%region_0.3 (Arg_0.4: f32[], Arg_1.5: f32[]) -> f32[] { + %Arg_0.4 = f32[] parameter(0) + %Arg_1.5 = f32[] parameter(1) + ROOT %add.6 = f32[] add(f32[] %Arg_0.4, f32[] %Arg_1.5) +} + +%region_1.8 (Arg_0.9: f32[], Arg_1.10: f32[]) -> (f32[], f32[]) { + %Arg_0.9 = f32[] parameter(0) + %Arg_1.10 = f32[] parameter(1) + %add.11 = f32[] add(f32[] %Arg_0.9, f32[] %Arg_1.10) + %subtract.12 = f32[] subtract(f32[] %Arg_0.9, f32[] %Arg_1.10) + ROOT %tuple.13 = (f32[], f32[]) tuple(f32[] %add.11, f32[] %subtract.12) +} + +ENTRY %main.17 (Arg_0.1: f32[], Arg_1.2: f32[]) -> f32[] { + %Arg_0.1 = f32[] parameter(0) + %Arg_1.2 = f32[] parameter(1) + %fusion.7 = f32[] fusion(f32[] %Arg_0.1, f32[] %Arg_1.2), kind=kLoop, calls=%region_0.3 + %fusion.14 = (f32[], f32[]) fusion(f32[] %Arg_0.1, f32[] %Arg_1.2), kind=kLoop, calls=%region_1.8 + %get-tuple-element.15 = f32[] get-tuple-element((f32[], f32[]) %fusion.14), index=0 + ROOT %get-tuple-element.16 = f32[] get-tuple-element((f32[], f32[]) %fusion.14), index=1 +} diff --git a/tensorflow/compiler/mlir/xla/tests/translate/fusion.mlir b/tensorflow/compiler/mlir/xla/tests/translate/fusion.mlir new file mode 100644 index 00000000000..7da9b7c5f7b --- /dev/null +++ b/tensorflow/compiler/mlir/xla/tests/translate/fusion.mlir @@ -0,0 +1,27 @@ +// RUN: tf-mlir-translate -mlir-hlo-to-hlo-text %s | FileCheck %s + +// CHECK: %[[REGION0:.*]] ({{.*}}: f32[], {{.*}}: f32[]) -> f32[] +// CHECK: %[[REGION1:.*]] ({{.*}}: f32[], {{.*}}: f32[]) -> (f32[], f32[]) +// +// CHECK: ENTRY +// CHECK: %[[PARAM0:.*]] = f32[] parameter(0) +// CHECK: %[[PARAM1:.*]] = f32[] parameter(1) +// CHECK: %[[FUSION0:.*]] = f32[] fusion(f32[] %[[PARAM0]], f32[] %[[PARAM1]]), kind=kLoop, calls=%[[REGION0]] +// CHECK: %[[FUSION1:.*]] = (f32[], f32[]) fusion(f32[] %[[PARAM0]], f32[] %[[PARAM1]]), kind=kLoop, calls=%[[REGION1]] +// CHECK: f32[] get-tuple-element((f32[], f32[]) %[[FUSION1]]), index=0 +// CHECK: f32[] get-tuple-element((f32[], f32[]) %[[FUSION1]]), index=1 +// CHECK: } +func @main(%arg0: tensor, %arg1: tensor) { + %result = "mhlo.fusion"(%arg0, %arg1) ( { + ^bb0(%arg2: tensor, %arg3: tensor): + %result = "mhlo.add"(%arg2, %arg3): (tensor, tensor) -> tensor + "mhlo.return"(%result) : (tensor) -> () + }) { fusion_kind = "kLoop" } : (tensor, tensor) -> tensor + %result0, %result1 = "mhlo.fusion"(%arg0, %arg1) ( { + ^bb0(%arg2: tensor, %arg3: tensor): + %elem0 = "mhlo.add"(%arg2, %arg3): (tensor, tensor) -> tensor + %elem1 = "mhlo.subtract"(%arg2, %arg3): (tensor, tensor) -> tensor + "mhlo.return"(%elem0, %elem1) : (tensor, tensor) -> () + }) { fusion_kind="kLoop" } : (tensor, tensor) -> (tensor, tensor) + return +} diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index 0d8da6732c7..0fc299f031f 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -73,6 +73,27 @@ void SetProtoIdAndName(T* entry, const string& base_name, char separator, } } // namespace +namespace internal { + +XlaOp XlaBuilderBuildFusion(XlaBuilder* builder, + absl::Span operands, + absl::string_view fusion_kind, + const XlaComputation& fused_computation) { + return builder->ReportErrorOrReturn([&]() -> StatusOr { + HloInstructionProto instr; + instr.set_fusion_kind(std::string(fusion_kind)); + std::vector operand_shape_ptrs; + TF_ASSIGN_OR_RETURN(auto program_shape, + fused_computation.GetProgramShape()); + *instr.mutable_shape() = program_shape.result().ToProto(); + builder->AddCalledComputation(fused_computation, &instr); + return builder->AddInstruction(std::move(instr), HloOpcode::kFusion, + operands); + }); +} + +} // namespace internal + XlaOp operator-(XlaOp x) { return Neg(x); } XlaOp operator+(XlaOp x, XlaOp y) { return Add(x, y); } XlaOp operator-(XlaOp x, XlaOp y) { return Sub(x, y); } diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h index b43a5a2c06e..7b96c6dfed6 100644 --- a/tensorflow/compiler/xla/client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_builder.h @@ -45,6 +45,16 @@ limitations under the License. namespace xla { class XlaBuilder; +class XlaOp; + +namespace internal { + +XlaOp XlaBuilderBuildFusion(XlaBuilder* builder, + absl::Span operands, + absl::string_view fusion_kind, + const XlaComputation& fused_computation); + +} // namespace internal // This represents an instruction that has been enqueued using the XlaBuilder. // This is used to pass to subsequent computations that depends upon the @@ -1213,6 +1223,10 @@ class XlaBuilder { TF_RETURN_IF_ERROR(CheckOpBuilder(op)); return LookUpInstructionByHandleInternal(op.handle()); } + + friend XlaOp internal::XlaBuilderBuildFusion( + XlaBuilder* builder, absl::Span operands, + absl::string_view fusion_kind, const XlaComputation& fused_computation); }; // RAII-style object: sets the current sharding assignment in builder on From 7947d9c27367fecaefa559c7529743e8c3e9a4cd Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Fri, 31 Jul 2020 14:10:19 -0700 Subject: [PATCH 1892/2522] Handle NPY_INT for numpy conversion case. This enum is used on Windows and unbreaks int32 ndarray support on Windows. PiperOrigin-RevId: 324281712 Change-Id: Ibef344d1480b86649e80077627a269160405648f --- tensorflow/python/kernel_tests/array_ops/BUILD | 1 - tensorflow/python/lib/core/ndarray_tensor.cc | 10 ++++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/array_ops/BUILD b/tensorflow/python/kernel_tests/array_ops/BUILD index 19fe3af425d..df48258f396 100644 --- a/tensorflow/python/kernel_tests/array_ops/BUILD +++ b/tensorflow/python/kernel_tests/array_ops/BUILD @@ -37,7 +37,6 @@ cuda_py_test( name = "slice_op_test", size = "medium", srcs = ["slice_op_test.py"], - tags = ["no_windows"], # b/126916429 deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", diff --git a/tensorflow/python/lib/core/ndarray_tensor.cc b/tensorflow/python/lib/core/ndarray_tensor.cc index 7314fec41ed..7be05c03e36 100644 --- a/tensorflow/python/lib/core/ndarray_tensor.cc +++ b/tensorflow/python/lib/core/ndarray_tensor.cc @@ -181,6 +181,16 @@ Status PyArray_TYPE_to_TF_DataType(PyArrayObject* array, // might be different on certain platforms. *out_tf_datatype = TF_INT64; break; + } else if (pyarray_type == NPY_INT) { + // NPY_INT is equivalent to NPY_INT32, while their enum values might be + // different on certain platforms. + *out_tf_datatype = TF_INT32; + break; + } else if (pyarray_type == NPY_UINT) { + // NPY_UINT is equivalent to NPY_UINT32, while their enum values might + // be different on certain platforms. + *out_tf_datatype = TF_UINT32; + break; } return errors::Internal("Unsupported numpy type: ", numpy_type_name(pyarray_type)); From cf2e284a9a22c7213218ffc73fed7340433c57b4 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Fri, 31 Jul 2020 14:11:09 -0700 Subject: [PATCH 1893/2522] Add host location plumbing to TPU client. This change adds a TpuHostLocationExternal class, which currently only supports getting the host's ID, and a getter method on TpuPlatformIn terface (the host location is fetched from the platform, instead of the topology, since it varies across each host). PiperOrigin-RevId: 324281861 Change-Id: I30704a480fa5f801161c12b67b5db6bdf690e858 --- tensorflow/core/tpu/tpu_library_init_fns.inc | 5 ++++- tensorflow/stream_executor/tpu/BUILD | 1 + tensorflow/stream_executor/tpu/tpu_executor_c_api.h | 6 ++++++ tensorflow/stream_executor/tpu/tpu_platform.cc | 6 ++++++ tensorflow/stream_executor/tpu/tpu_platform.h | 3 +++ .../stream_executor/tpu/tpu_platform_interface.h | 3 +++ tensorflow/stream_executor/tpu/tpu_topology.cc | 4 ++++ tensorflow/stream_executor/tpu/tpu_topology.h | 10 ++++++++++ 8 files changed, 37 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index a2c0894d70d..682cc8b1c13 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -81,6 +81,7 @@ tensorflow::Status SetExecutorStructFn(void* library_handle) { TFTPU_SET_FN(executor_fn, TpuPlatform_TpuMemoryLimit); TFTPU_SET_FN(executor_fn, TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopy); TFTPU_SET_FN(executor_fn, TpuPlatform_GetTopologyPtr); + TFTPU_SET_FN(executor_fn, TpuPlatform_GetHostLocation); TFTPU_SET_FN(executor_fn, TpuExecutor_Init); TFTPU_SET_FN(executor_fn, TpuExecutor_Free); @@ -175,6 +176,8 @@ tensorflow::Status SetExecutorStructFn(void* library_handle) { TFTPU_SET_FN(executor_fn, TpuCoreLocation_Index); TFTPU_SET_FN(executor_fn, TpuCoreLocation_Id); + TFTPU_SET_FN(executor_fn, TpuHostLocation_Id); + TFTPU_SET_FN(executor_fn, TpuCompiler_New); TFTPU_SET_FN(executor_fn, TpuCompiler_Free); @@ -229,4 +232,4 @@ tensorflow::Status InitializeTpuStructFns(void* library_handle) { return tensorflow::Status::OK(); } -} // namespace \ No newline at end of file +} // namespace diff --git a/tensorflow/stream_executor/tpu/BUILD b/tensorflow/stream_executor/tpu/BUILD index 1e5063f31f8..459021043df 100644 --- a/tensorflow/stream_executor/tpu/BUILD +++ b/tensorflow/stream_executor/tpu/BUILD @@ -269,6 +269,7 @@ cc_library( hdrs = ["tpu_platform_interface.h"], visibility = ["//visibility:public"], deps = [ + ":tpu_topology_external", "//tensorflow/core:lib", "//tensorflow/stream_executor", ], diff --git a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h index 46d908ac18a..77806bd338e 100644 --- a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h +++ b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h @@ -40,6 +40,7 @@ int64_t TpuPlatform_VisibleDeviceCount(SE_Platform* platform); int64_t TpuPlatform_TpuMemoryLimit(SE_Platform* platform); bool TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopy(SE_Platform* platform); void* TpuPlatform_GetTopologyPtr(SE_Platform* platform); +void* TpuPlatform_GetHostLocation(SE_Platform* platform); void TpuExecutor_Init(SE_StreamExecutor* executor, int device_ordinal, SE_DeviceOptions* device_options, SE_Status* status); @@ -201,6 +202,8 @@ int TpuCoreLocation_ChipCoordinates_Z(void* tpu_core_location); int TpuCoreLocation_Index(void* tpu_core_location); int TpuCoreLocation_Id(void* tpu_core_location); +int TpuHostLocation_Id(void* tpu_host_location); + // C API for XLA::Compiler interface TFTPU_CAPI_EXPORT Tpu_Compiler* TpuCompiler_New(); @@ -253,6 +256,7 @@ struct TfTpu_ExecutorApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_TpuMemoryLimit); TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopy); TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetTopologyPtr); + TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetHostLocation); TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Init); TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Free); @@ -347,6 +351,8 @@ struct TfTpu_ExecutorApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Index); TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Id); + TFTPU_ADD_FN_IN_STRUCT(TpuHostLocation_Id); + TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_New); TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_Free); TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_RunHloPasses); diff --git a/tensorflow/stream_executor/tpu/tpu_platform.cc b/tensorflow/stream_executor/tpu/tpu_platform.cc index 95472417b4e..90401a3dfb7 100644 --- a/tensorflow/stream_executor/tpu/tpu_platform.cc +++ b/tensorflow/stream_executor/tpu/tpu_platform.cc @@ -122,6 +122,12 @@ const tensorflow::tpu::TpuTopologyPtr TpuPlatform::GetTopologyPtr() { return tpu::ExecutorApiFn()->TpuPlatform_GetTopologyPtrFn(platform_); } +const tensorflow::tpu::TpuHostLocationExternal TpuPlatform::GetTpuHostLocation() + const { + return tpu::TpuHostLocationExternal( + tpu::ExecutorApiFn()->TpuPlatform_GetHostLocationFn(platform_)); +} + void TpuPlatform::InsertEvent(stream_executor::internal::EventInterface* key, SE_Event* val) { tensorflow::mutex_lock lock(event_map_mu_); diff --git a/tensorflow/stream_executor/tpu/tpu_platform.h b/tensorflow/stream_executor/tpu/tpu_platform.h index b01d033feee..a70634f7055 100644 --- a/tensorflow/stream_executor/tpu/tpu_platform.h +++ b/tensorflow/stream_executor/tpu/tpu_platform.h @@ -62,6 +62,9 @@ class TpuPlatform : public ::tensorflow::tpu::TpuPlatformInterface { const tensorflow::tpu::TpuTopologyPtr GetTopologyPtr() override; + const tensorflow::tpu::TpuHostLocationExternal GetTpuHostLocation() + const override; + bool Initialized() const override; Status Initialize( diff --git a/tensorflow/stream_executor/tpu/tpu_platform_interface.h b/tensorflow/stream_executor/tpu/tpu_platform_interface.h index 889375245a8..a0a3b444550 100644 --- a/tensorflow/stream_executor/tpu/tpu_platform_interface.h +++ b/tensorflow/stream_executor/tpu/tpu_platform_interface.h @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/core/platform/types.h" #include "tensorflow/stream_executor/platform.h" +#include "tensorflow/stream_executor/tpu/tpu_topology.h" namespace tensorflow { namespace tpu { @@ -45,6 +46,8 @@ class TpuPlatformInterface : public stream_executor::Platform { virtual bool ShouldRegisterTpuDeviceToDeviceCopy() = 0; virtual const TpuTopologyPtr GetTopologyPtr() = 0; + + virtual const TpuHostLocationExternal GetTpuHostLocation() const = 0; }; } // namespace tpu diff --git a/tensorflow/stream_executor/tpu/tpu_topology.cc b/tensorflow/stream_executor/tpu/tpu_topology.cc index 749cb291940..4499b2d70eb 100644 --- a/tensorflow/stream_executor/tpu/tpu_topology.cc +++ b/tensorflow/stream_executor/tpu/tpu_topology.cc @@ -36,6 +36,10 @@ int32 TpuCoreLocationExternal::Id() const { return tpu::ExecutorApiFn()->TpuCoreLocation_IdFn(core_location_); } +int32 TpuHostLocationExternal::Id() const { + return tpu::ExecutorApiFn()->TpuHostLocation_IdFn(host_location_); +} + int32 TpuTopologyExternal::LogicalDevicesPerHost( TpuCoreTypeEnum core_type) const { return tpu::ExecutorApiFn()->TpuTopology_LogicalDevicesPerHostFn(topology_, diff --git a/tensorflow/stream_executor/tpu/tpu_topology.h b/tensorflow/stream_executor/tpu/tpu_topology.h index b49b1e24386..d6c169f4fa0 100644 --- a/tensorflow/stream_executor/tpu/tpu_topology.h +++ b/tensorflow/stream_executor/tpu/tpu_topology.h @@ -41,6 +41,16 @@ class TpuCoreLocationExternal { void* core_location_; }; +class TpuHostLocationExternal { + public: + explicit TpuHostLocationExternal(void* host_location) + : host_location_(host_location) {} + int32 Id() const; + + private: + void* host_location_; +}; + struct TpuTopologyChipBoundsExternal { int x; int y; From 1edbacaacc88d39bd96f774f07c141a216c6a52b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 31 Jul 2020 14:11:31 -0700 Subject: [PATCH 1894/2522] Break up core/kernels/BUILD (part 1 of N): Move linear algebra kernels to subdirectory tensorflow/core/kernels/linalg with its own BUILD file. PiperOrigin-RevId: 324281944 Change-Id: Ic30d1c2f9be17c32c3968f84caec00d5270dd492 --- tensorflow/core/BUILD | 2 +- tensorflow/core/kernels/BUILD | 411 ++---------------- tensorflow/core/kernels/linalg/BUILD | 353 +++++++++++++++ .../banded_triangular_solve_op.cc | 2 +- .../banded_triangular_solve_op_test.cc | 2 +- .../kernels/{ => linalg}/cholesky_grad.cc | 2 +- .../core/kernels/{ => linalg}/cholesky_op.cc | 6 +- .../kernels/{ => linalg}/determinant_op.cc | 6 +- .../kernels/{ => linalg}/determinant_op.h | 6 +- .../{ => linalg}/determinant_op_gpu.cu.cc | 4 +- .../kernels/{ => linalg}/eig_op_complex128.cc | 2 +- .../kernels/{ => linalg}/eig_op_complex64.cc | 2 +- .../kernels/{ => linalg}/eig_op_double.cc | 2 +- .../core/kernels/{ => linalg}/eig_op_float.cc | 2 +- .../core/kernels/{ => linalg}/eig_op_impl.h | 8 +- .../core/kernels/{ => linalg}/einsum_op.h | 4 +- .../kernels/{ => linalg}/einsum_op_gpu.cu.cc | 2 +- .../kernels/{ => linalg}/einsum_op_impl.h | 8 +- .../{ => linalg}/einsum_op_impl_bfloat16.cc | 2 +- .../{ => linalg}/einsum_op_impl_complex128.cc | 2 +- .../{ => linalg}/einsum_op_impl_complex64.cc | 2 +- .../{ => linalg}/einsum_op_impl_double.cc | 2 +- .../{ => linalg}/einsum_op_impl_float.cc | 2 +- .../{ => linalg}/einsum_op_impl_half.cc | 2 +- .../{ => linalg}/einsum_op_impl_int32.cc | 2 +- .../{ => linalg}/einsum_op_impl_int64.cc | 2 +- .../core/kernels/{ => linalg}/eye_functor.h | 4 +- .../{ => linalg}/eye_functor_gpu.cu.cc | 2 +- .../kernels/{ => linalg}/linalg_ops_common.cc | 2 +- .../core/kernels/linalg/linalg_ops_common.h | 221 ++++++++++ tensorflow/core/kernels/{ => linalg}/lu_op.cc | 0 .../core/kernels/{ => linalg}/lu_op_gpu.cu.cc | 2 +- .../{ => linalg}/matrix_band_part_op.cc | 3 +- .../{ => linalg}/matrix_band_part_op.h | 6 +- .../matrix_band_part_op_gpu.cu.cc | 2 +- .../kernels/{ => linalg}/matrix_diag_op.cc | 2 +- .../kernels/{ => linalg}/matrix_diag_op.h | 6 +- .../{ => linalg}/matrix_diag_op_gpu.cu.cc | 2 +- .../{ => linalg}/matrix_exponential_op.cc | 2 +- .../kernels/{ => linalg}/matrix_inverse_op.cc | 6 +- .../{ => linalg}/matrix_logarithm_op.cc | 2 +- .../{ => linalg}/matrix_set_diag_op.cc | 4 +- .../kernels/{ => linalg}/matrix_set_diag_op.h | 6 +- .../{ => linalg}/matrix_set_diag_op_gpu.cu.cc | 2 +- .../matrix_solve_ls_op_complex128.cc | 2 +- .../matrix_solve_ls_op_complex64.cc | 2 +- .../{ => linalg}/matrix_solve_ls_op_double.cc | 2 +- .../{ => linalg}/matrix_solve_ls_op_float.cc | 2 +- .../{ => linalg}/matrix_solve_ls_op_impl.h | 8 +- .../kernels/{ => linalg}/matrix_solve_op.cc | 4 +- .../{ => linalg}/matrix_square_root_op.cc | 2 +- .../matrix_triangular_solve_op_complex.cc | 2 +- .../matrix_triangular_solve_op_impl.h | 12 +- .../matrix_triangular_solve_op_real.cc | 2 +- .../matrix_triangular_solve_op_test.cc | 0 .../kernels/{ => linalg}/qr_op_complex128.cc | 2 +- .../kernels/{ => linalg}/qr_op_complex64.cc | 2 +- .../core/kernels/{ => linalg}/qr_op_double.cc | 2 +- .../core/kernels/{ => linalg}/qr_op_float.cc | 2 +- .../core/kernels/{ => linalg}/qr_op_impl.h | 14 +- .../{ => linalg}/self_adjoint_eig_op.cc | 2 +- .../self_adjoint_eig_v2_op_complex128.cc | 2 +- .../self_adjoint_eig_v2_op_complex64.cc | 2 +- .../self_adjoint_eig_v2_op_double.cc | 2 +- .../self_adjoint_eig_v2_op_float.cc | 2 +- .../self_adjoint_eig_v2_op_gpu.cc | 2 +- .../self_adjoint_eig_v2_op_impl.h | 8 +- .../kernels/{ => linalg}/svd_op_complex128.cc | 2 +- .../kernels/{ => linalg}/svd_op_complex64.cc | 2 +- .../kernels/{ => linalg}/svd_op_double.cc | 2 +- .../core/kernels/{ => linalg}/svd_op_float.cc | 2 +- .../kernels/{ => linalg}/svd_op_gpu.cu.cc | 6 +- .../core/kernels/{ => linalg}/svd_op_impl.h | 8 +- .../{ => linalg}/tridiagonal_matmul_op.cc | 2 +- .../tridiagonal_matmul_op_gpu.cu.cc | 6 +- .../{ => linalg}/tridiagonal_solve_op.cc | 2 +- .../tridiagonal_solve_op_gpu.cu.cc | 6 +- tensorflow/core/kernels/linalg_ops_common.h | 205 +-------- .../core/kernels/segment_reduction_ops_impl.h | 4 +- tensorflow/core/kernels/sparse/BUILD | 4 +- tensorflow/core/kernels/sparse/add_op.cc | 4 +- tensorflow/core/kernels/sparse/conj_op.cc | 4 +- .../sparse/csr_sparse_matrix_to_dense_op.cc | 4 +- .../csr_sparse_matrix_to_sparse_tensor_op.cc | 4 +- .../sparse/dense_to_csr_sparse_matrix_op.cc | 4 +- .../core/kernels/sparse/kernels_gpu.cu.cc | 2 +- tensorflow/core/kernels/sparse/mat_mul_op.cc | 4 +- tensorflow/core/kernels/sparse/mul_op.cc | 2 +- tensorflow/core/kernels/sparse/nnz_op.cc | 4 +- tensorflow/core/kernels/sparse/softmax_op.cc | 2 +- .../core/kernels/sparse/sparse_mat_mul_op.cc | 4 +- .../sparse/sparse_matrix_components_op.cc | 4 +- .../sparse_tensor_to_csr_sparse_matrix_op.cc | 4 +- .../core/kernels/sparse/transpose_op.cc | 2 +- tensorflow/core/kernels/where_op.cc | 2 +- tensorflow/core/util/BUILD | 63 +++ .../core/{kernels => util}/cuda_solvers.cc | 2 +- .../core/{kernels => util}/cuda_solvers.h | 6 +- .../core/{kernels => util}/cuda_sparse.cc | 4 +- .../core/{kernels => util}/cuda_sparse.h | 9 +- .../core/{kernels => util}/rocm_solvers.cc | 2 +- .../core/{kernels => util}/rocm_solvers.h | 6 +- .../core/{kernels => util}/rocm_sparse.cc | 4 +- 103 files changed, 840 insertions(+), 751 deletions(-) create mode 100644 tensorflow/core/kernels/linalg/BUILD rename tensorflow/core/kernels/{ => linalg}/banded_triangular_solve_op.cc (99%) rename tensorflow/core/kernels/{ => linalg}/banded_triangular_solve_op_test.cc (99%) rename tensorflow/core/kernels/{ => linalg}/cholesky_grad.cc (99%) rename tensorflow/core/kernels/{ => linalg}/cholesky_op.cc (98%) rename tensorflow/core/kernels/{ => linalg}/determinant_op.cc (99%) rename tensorflow/core/kernels/{ => linalg}/determinant_op.h (90%) rename tensorflow/core/kernels/{ => linalg}/determinant_op_gpu.cu.cc (98%) rename tensorflow/core/kernels/{ => linalg}/eig_op_complex128.cc (93%) rename tensorflow/core/kernels/{ => linalg}/eig_op_complex64.cc (93%) rename tensorflow/core/kernels/{ => linalg}/eig_op_double.cc (93%) rename tensorflow/core/kernels/{ => linalg}/eig_op_float.cc (93%) rename tensorflow/core/kernels/{ => linalg}/eig_op_impl.h (93%) rename tensorflow/core/kernels/{ => linalg}/einsum_op.h (94%) rename tensorflow/core/kernels/{ => linalg}/einsum_op_gpu.cu.cc (96%) rename tensorflow/core/kernels/{ => linalg}/einsum_op_impl.h (99%) rename tensorflow/core/kernels/{ => linalg}/einsum_op_impl_bfloat16.cc (94%) rename tensorflow/core/kernels/{ => linalg}/einsum_op_impl_complex128.cc (95%) rename tensorflow/core/kernels/{ => linalg}/einsum_op_impl_complex64.cc (95%) rename tensorflow/core/kernels/{ => linalg}/einsum_op_impl_double.cc (95%) rename tensorflow/core/kernels/{ => linalg}/einsum_op_impl_float.cc (95%) rename tensorflow/core/kernels/{ => linalg}/einsum_op_impl_half.cc (95%) rename tensorflow/core/kernels/{ => linalg}/einsum_op_impl_int32.cc (94%) rename tensorflow/core/kernels/{ => linalg}/einsum_op_impl_int64.cc (94%) rename tensorflow/core/kernels/{ => linalg}/eye_functor.h (90%) rename tensorflow/core/kernels/{ => linalg}/eye_functor_gpu.cu.cc (97%) rename tensorflow/core/kernels/{ => linalg}/linalg_ops_common.cc (99%) create mode 100644 tensorflow/core/kernels/linalg/linalg_ops_common.h rename tensorflow/core/kernels/{ => linalg}/lu_op.cc (100%) rename tensorflow/core/kernels/{ => linalg}/lu_op_gpu.cu.cc (99%) rename tensorflow/core/kernels/{ => linalg}/matrix_band_part_op.cc (99%) rename tensorflow/core/kernels/{ => linalg}/matrix_band_part_op.h (86%) rename tensorflow/core/kernels/{ => linalg}/matrix_band_part_op_gpu.cu.cc (97%) rename tensorflow/core/kernels/{ => linalg}/matrix_diag_op.cc (99%) rename tensorflow/core/kernels/{ => linalg}/matrix_diag_op.h (94%) rename tensorflow/core/kernels/{ => linalg}/matrix_diag_op_gpu.cu.cc (99%) rename tensorflow/core/kernels/{ => linalg}/matrix_exponential_op.cc (97%) rename tensorflow/core/kernels/{ => linalg}/matrix_inverse_op.cc (98%) rename tensorflow/core/kernels/{ => linalg}/matrix_logarithm_op.cc (97%) rename tensorflow/core/kernels/{ => linalg}/matrix_set_diag_op.cc (99%) rename tensorflow/core/kernels/{ => linalg}/matrix_set_diag_op.h (89%) rename tensorflow/core/kernels/{ => linalg}/matrix_set_diag_op_gpu.cu.cc (99%) rename tensorflow/core/kernels/{ => linalg}/matrix_solve_ls_op_complex128.cc (92%) rename tensorflow/core/kernels/{ => linalg}/matrix_solve_ls_op_complex64.cc (92%) rename tensorflow/core/kernels/{ => linalg}/matrix_solve_ls_op_double.cc (92%) rename tensorflow/core/kernels/{ => linalg}/matrix_solve_ls_op_float.cc (92%) rename tensorflow/core/kernels/{ => linalg}/matrix_solve_ls_op_impl.h (96%) rename tensorflow/core/kernels/{ => linalg}/matrix_solve_op.cc (99%) rename tensorflow/core/kernels/{ => linalg}/matrix_square_root_op.cc (97%) rename tensorflow/core/kernels/{ => linalg}/matrix_triangular_solve_op_complex.cc (92%) rename tensorflow/core/kernels/{ => linalg}/matrix_triangular_solve_op_impl.h (97%) rename tensorflow/core/kernels/{ => linalg}/matrix_triangular_solve_op_real.cc (93%) rename tensorflow/core/kernels/{ => linalg}/matrix_triangular_solve_op_test.cc (100%) rename tensorflow/core/kernels/{ => linalg}/qr_op_complex128.cc (96%) rename tensorflow/core/kernels/{ => linalg}/qr_op_complex64.cc (95%) rename tensorflow/core/kernels/{ => linalg}/qr_op_double.cc (96%) rename tensorflow/core/kernels/{ => linalg}/qr_op_float.cc (96%) rename tensorflow/core/kernels/{ => linalg}/qr_op_impl.h (96%) rename tensorflow/core/kernels/{ => linalg}/self_adjoint_eig_op.cc (98%) rename tensorflow/core/kernels/{ => linalg}/self_adjoint_eig_v2_op_complex128.cc (93%) rename tensorflow/core/kernels/{ => linalg}/self_adjoint_eig_v2_op_complex64.cc (93%) rename tensorflow/core/kernels/{ => linalg}/self_adjoint_eig_v2_op_double.cc (92%) rename tensorflow/core/kernels/{ => linalg}/self_adjoint_eig_v2_op_float.cc (92%) rename tensorflow/core/kernels/{ => linalg}/self_adjoint_eig_v2_op_gpu.cc (99%) rename tensorflow/core/kernels/{ => linalg}/self_adjoint_eig_v2_op_impl.h (91%) rename tensorflow/core/kernels/{ => linalg}/svd_op_complex128.cc (93%) rename tensorflow/core/kernels/{ => linalg}/svd_op_complex64.cc (93%) rename tensorflow/core/kernels/{ => linalg}/svd_op_double.cc (93%) rename tensorflow/core/kernels/{ => linalg}/svd_op_float.cc (93%) rename tensorflow/core/kernels/{ => linalg}/svd_op_gpu.cu.cc (99%) rename tensorflow/core/kernels/{ => linalg}/svd_op_impl.h (95%) rename tensorflow/core/kernels/{ => linalg}/tridiagonal_matmul_op.cc (98%) rename tensorflow/core/kernels/{ => linalg}/tridiagonal_matmul_op_gpu.cu.cc (96%) rename tensorflow/core/kernels/{ => linalg}/tridiagonal_solve_op.cc (99%) rename tensorflow/core/kernels/{ => linalg}/tridiagonal_solve_op_gpu.cu.cc (99%) rename tensorflow/core/{kernels => util}/cuda_solvers.cc (99%) rename tensorflow/core/{kernels => util}/cuda_solvers.h (99%) rename tensorflow/core/{kernels => util}/cuda_sparse.cc (99%) rename tensorflow/core/{kernels => util}/cuda_sparse.h (99%) rename tensorflow/core/{kernels => util}/rocm_solvers.cc (99%) rename tensorflow/core/{kernels => util}/rocm_solvers.h (96%) rename tensorflow/core/{kernels => util}/rocm_sparse.cc (99%) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 0279dc84a5c..71d08d8abbe 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -996,7 +996,7 @@ cc_library( "//tensorflow/core/kernels:histogram_op", "//tensorflow/core/kernels:image", "//tensorflow/core/kernels:io", - "//tensorflow/core/kernels:linalg", + "//tensorflow/core/kernels/linalg:linalg", "//tensorflow/core/kernels:lookup", "//tensorflow/core/kernels:logging", "//tensorflow/core/kernels:manip", diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index f98053c7d4f..0f507273258 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -1041,9 +1041,6 @@ cc_library( ":immutable_constant_op", ":inplace_ops", ":listdiff_op", - ":matrix_band_part_op", - ":matrix_diag_op", - ":matrix_set_diag_op", ":mirror_pad_op", ":one_hot_op", ":pack_op", @@ -1176,26 +1173,6 @@ tf_kernel_library( deps = ARRAY_DEPS, ) -tf_kernel_library( - name = "matrix_band_part_op", - prefix = "matrix_band_part_op", - deps = if_cuda([ - ":cuda_solvers", - ]) + ARRAY_DEPS, -) - -tf_kernel_library( - name = "matrix_diag_op", - prefix = "matrix_diag_op", - deps = ARRAY_DEPS, -) - -tf_kernel_library( - name = "matrix_set_diag_op", - prefix = "matrix_set_diag_op", - deps = ARRAY_DEPS + [":matrix_diag_op"], -) - tf_kernel_library( name = "mirror_pad_op", prefix = "mirror_pad_op", @@ -1407,7 +1384,7 @@ tf_kernel_library( "where_op_gpu_impl_8.cu.cc", ], deps = if_cuda_or_rocm([ - ":cuda_solvers", + "//tensorflow/core/util:cuda_solvers", ]) + [":gpu_prim_hdrs"] + ARRAY_DEPS, ) @@ -2787,21 +2764,6 @@ tf_cuda_cc_tests( ], ) -tf_kernel_library( - name = "eye_functor", - hdrs = ["eye_functor.h"], - gpu_srcs = [ - "eye_functor_gpu.cu.cc", - "eye_functor.h", - ], - visibility = [":friends"], - deps = [ - "//tensorflow/core:framework", - "//third_party/eigen3", - ], - alwayslink = 0, -) - cc_library( name = "fifo_queue", srcs = ["fifo_queue.cc"], @@ -3560,289 +3522,6 @@ tf_cc_tests( ], ) -cc_library( - name = "linalg", - deps = [ - ":banded_triangular_solve_op", - ":cholesky_grad", - ":cholesky_op", - ":determinant_op", - ":eig_op", - ":einsum_op", - ":lu_op", - ":matrix_exponential_op", - ":matrix_inverse_op", - ":matrix_logarithm_op", - ":matrix_solve_ls_op", - ":matrix_solve_op", - ":matrix_square_root_op", - ":matrix_triangular_solve_op", - ":qr_op", - ":self_adjoint_eig_op", - ":self_adjoint_eig_v2_op", - ":svd_op", - ":tridiagonal_matmul_op", - ":tridiagonal_solve_op", - ], -) - -tf_kernel_library( - name = "cuda_solvers", - srcs = ["cuda_solvers.cc"], - hdrs = ["cuda_solvers.h"], - # @local_config_cuda//cuda:cusolver_static, //third_party/eigen3:blas, - # and //third_party/libf2c all contain various parts of BLAS, LAPACK, - # and f2c helper functions in global namespace. Tell the compiler to - # allow multiple definitions when linking this. - linkopts = select({ - "//tensorflow:macos": [], - "//tensorflow:windows": [], - "//conditions:default": ["-Wl,-z,muldefs"], - }), - visibility = [":friends"], - deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core/platform/default/build_config:cublas_plugin", - "//tensorflow/stream_executor/cuda:cublas_lib", - "//tensorflow/stream_executor/cuda:cusolver_lib", - ], -) - -tf_kernel_library( - name = "rocm_solvers", - srcs = ["rocm_solvers.cc"], - hdrs = ["rocm_solvers.h"], - visibility = [":friends"], - deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", - "//tensorflow/core:lib", - "//tensorflow/stream_executor/lib", - "//tensorflow/stream_executor/platform:dso_loader", - "//tensorflow/stream_executor/rocm:rocblas_plugin", - "//tensorflow/stream_executor/rocm:rocm_gpu_executor", - ] + if_rocm([ - "@local_config_rocm//rocm:rocprim", - ]), -) - -tf_kernel_library( - name = "cuda_sparse", - srcs = if_cuda(["cuda_sparse.cc"]) + if_rocm(["rocm_sparse.cc"]), - hdrs = ["cuda_sparse.h"], - deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core/kernels:cuda_solvers", - ] + if_cuda([ - "//tensorflow/stream_executor/cuda:cusparse_lib", - "@cub_archive//:cub", - ]) + if_rocm([ - "@local_config_rocm//rocm:hipsparse", - ]), -) - -LINALG_DEPS = [ - ":linalg_ops_common", - "//third_party/eigen3", - "//tensorflow/core:framework", - "//tensorflow/core:lib", -] + if_cuda([ - ":cuda_solvers", - ":transpose_functor", -]) + if_rocm([ - ":rocm_solvers", -]) - -tf_kernel_library( - name = "cholesky_op", - prefix = "cholesky_op", - deps = if_cuda([ - ":matrix_band_part_op", - ]) + LINALG_DEPS, -) - -tf_kernel_library( - name = "cholesky_grad", - prefix = "cholesky_grad", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "determinant_op", - prefix = "determinant_op", - deps = if_cuda([ - ":fill_functor", - ]) + LINALG_DEPS, -) - -tf_kernel_library( - name = "matrix_exponential_op", - prefix = "matrix_exponential_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "matrix_logarithm_op", - prefix = "matrix_logarithm_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "self_adjoint_eig_op", - prefix = "self_adjoint_eig_op", - deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"], -) - -tf_kernel_library( - name = "self_adjoint_eig_v2_op", - prefix = "self_adjoint_eig_v2_op", - deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"] + if_cuda([ - ":cast_op", - ":cwise_op", - ]), -) - -tf_kernel_library( - name = "eig_op", - prefix = "eig_op", - deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"] + if_cuda([ - ":cast_op", - ":cwise_op", - ]), -) - -tf_kernel_library( - name = "matrix_inverse_op", - prefix = "matrix_inverse_op", - deps = LINALG_DEPS + if_cuda([":eye_functor"]), -) - -tf_kernel_library( - name = "matrix_solve_ls_op", - prefix = "matrix_solve_ls_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "matrix_solve_op", - prefix = "matrix_solve_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "matrix_square_root_op", - prefix = "matrix_square_root_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "banded_triangular_solve_op", - prefix = "banded_triangular_solve_op", - deps = LINALG_DEPS + [":fill_functor"], -) - -tf_kernel_library( - name = "matrix_triangular_solve_op", - hdrs = ["matrix_triangular_solve_op_impl.h"], - prefix = "matrix_triangular_solve_op", - deps = [ - ":linalg_ops_common", - "//third_party/eigen3", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - ":fill_functor", - "//tensorflow/core:stream_executor", - ] + if_cuda([ - "//tensorflow/core/platform/default/build_config:cublas_plugin", - ":cuda_solvers", - ]) + if_rocm([ - "@local_config_rocm//rocm:rocprim", - ":rocm_solvers", - ]) + if_cuda_or_rocm([ - ":transpose_functor", - ]), -) - -tf_kernel_library( - name = "tridiagonal_matmul_op", - srcs = ["tridiagonal_matmul_op.cc"], - gpu_srcs = ["tridiagonal_matmul_op_gpu.cu.cc"], - deps = LINALG_DEPS + if_cuda([ - ":cuda_sparse", - ]), -) - -tf_kernel_library( - name = "tridiagonal_solve_op", - srcs = ["tridiagonal_solve_op.cc"], - gpu_srcs = ["tridiagonal_solve_op_gpu.cu.cc"], - deps = LINALG_DEPS + if_cuda([ - ":cuda_sparse", - ]), -) - -tf_kernel_library( - name = "qr_op", - prefix = "qr_op", - deps = LINALG_DEPS + if_cuda([ - ":cwise_op", - ":eye_functor", - ":matrix_band_part_op", - ]), -) - -tf_kernel_library( - name = "svd_op", - prefix = "svd_op", - deps = LINALG_DEPS + if_cuda([ - ":eye_functor", - ]), -) - -tf_kernel_library( - name = "lu_op", - prefix = "lu_op", - deps = if_cuda([ - ":cuda_solvers", - ":transpose_functor", - ]) + [ - "//third_party/eigen3", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - ], -) - -tf_kernel_library( - name = "einsum_op", - prefix = "einsum_op", - deps = [ - ":batch_matmul_op", - ":fill_functor", - ":reduction_ops", - ":transpose_functor", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core/profiler/lib:traceme", - "//third_party/eigen3", - "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/strings", - ], -) - -cc_library( - name = "linalg_ops_common", - srcs = ["linalg_ops_common.cc"], - hdrs = ["linalg_ops_common.h"], - visibility = ["//visibility:private"], - deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//third_party/eigen3", - ], -) - cc_library( name = "logging", deps = [ @@ -4208,7 +3887,7 @@ tf_kernel_library( name = "segment_reduction_ops", prefix = "segment_reduction_ops", deps = MATH_DEPS + if_cuda_or_rocm([ - ":cuda_solvers", + "//tensorflow/core/util:cuda_solvers", ]), ) @@ -4405,45 +4084,6 @@ tf_cuda_cc_test( ], ) -tf_cuda_cc_test( - name = "banded_triangular_solve_op_test", - size = "small", - srcs = ["banded_triangular_solve_op_test.cc"], - deps = [ - ":banded_triangular_solve_op", - ":matrix_set_diag_op", - ":matrix_triangular_solve_op", - ":ops_testutil", - ":ops_util", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - -tf_cuda_cc_test( - name = "matrix_triangular_solve_op_test", - size = "small", - srcs = ["matrix_triangular_solve_op_test.cc"], - deps = [ - ":broadcast_to_op", - ":matrix_triangular_solve_op", - ":ops_testutil", - ":ops_util", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - tf_cuda_cc_test( name = "scan_ops_test", size = "small", @@ -6672,10 +6312,7 @@ filegroup( "lookup_table_init_op.h", "lookup_table_op.h", "lookup_util.h", - "linalg_ops_common.h", "list_kernels.h", - "matrix_diag_op.h", - "matrix_set_diag_op.h", "maxpooling_op.h", "mfcc.h", "mfcc_dct.h", @@ -6723,6 +6360,9 @@ filegroup( "xent_op.h", ] + [ "//tensorflow/core/kernels/boosted_trees/quantiles:weighted_quantiles_hdrs", + "//tensorflow/core/kernels/linalg:linalg_ops_common.h", + "//tensorflow/core/kernels/linalg:matrix_diag_op.h", + "//tensorflow/core/kernels/linalg:matrix_set_diag_op.h", ], ) @@ -6823,16 +6463,6 @@ filegroup( "encode_wav_op.cc", "eigen_contraction_kernel.cc", "eigen_contraction_kernel.h", - "einsum_op_impl_half.cc", - "einsum_op_impl_bfloat16.cc", - "einsum_op_impl_int32.cc", - "einsum_op_impl_int64.cc", - "einsum_op_impl_float.cc", - "einsum_op_impl_double.cc", - "einsum_op_impl_complex64.cc", - "einsum_op_impl_complex128.cc", - "einsum_op_impl.h", - "einsum_op.h", "fake_quant_ops.cc", "fifo_queue.cc", "fifo_queue_op.cc", @@ -6844,6 +6474,17 @@ filegroup( "population_count_op.h", "winograd_transform.h", ":android_extended_ops_headers", + ] + [ + "//tensorflow/core/kernels/linalg:einsum_op_impl_half.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl_bfloat16.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl_int32.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl_int64.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl_float.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl_double.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl_complex64.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl_complex128.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl.h", + "//tensorflow/core/kernels/linalg:einsum_op.h", ] + select({ ":xsmm_convolutions": [ "xsmm_conv2d.h", @@ -6874,7 +6515,6 @@ filegroup( "in_topk_op.cc", "in_topk_op.h", "initializable_lookup_table.cc", - "linalg_ops_common.cc", "list_kernels.cc", "logging_ops.cc", "logging_ops.h", @@ -6882,9 +6522,6 @@ filegroup( "lookup_table_op.cc", "lookup_util.cc", "lrn_op.cc", - "matrix_diag_op.cc", - "matrix_inverse_op.cc", - "matrix_set_diag_op.cc", "maxpooling_op.cc", "mfcc.cc", "mfcc_dct.cc", @@ -7006,6 +6643,10 @@ filegroup( ":android_extended_ops_headers", ] + [ "//tensorflow/core/kernels/boosted_trees:quantile_ops.cc", + "//tensorflow/core/kernels/linalg:linalg_ops_common.cc", + "//tensorflow/core/kernels/linalg:matrix_diag_op.cc", + "//tensorflow/core/kernels/linalg:matrix_inverse_op.cc", + "//tensorflow/core/kernels/linalg:matrix_set_diag_op.cc", ], ) @@ -8827,3 +8468,15 @@ tf_kernel_library( "@sobol_data", ], ) + +# ---- temporary forwarding declaration for libraries in linalg +# TODO(b/160344057): Remove after updating dependencies. +tf_kernel_library( + name = "matrix_inverse_op", + deps = ["//tensorflow/core/kernels/linalg:matrix_inverse_op"], +) + +tf_kernel_library( + name = "einsum_op", + deps = ["//tensorflow/core/kernels/linalg:einsum_op"], +) diff --git a/tensorflow/core/kernels/linalg/BUILD b/tensorflow/core/kernels/linalg/BUILD new file mode 100644 index 00000000000..52bead431ea --- /dev/null +++ b/tensorflow/core/kernels/linalg/BUILD @@ -0,0 +1,353 @@ +load( + "//tensorflow:tensorflow.bzl", + "if_cuda_or_rocm", + "tf_kernel_library", +) +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") +load( + "@local_config_rocm//rocm:build_defs.bzl", + "if_rocm", +) +load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test") + +# Description: +# Op kernel implementations for TensorFlow. +# +# Note: Any test that uses GPU support and which we would like to +# benchmark should be linked statically so that it can be executed +# from a py_binary or cuda_py_test test logger. For such a test, +# append "_gpu" to the test name to invoke the GPU benchmarks. Example: +# +# # for CPU tests +# $ bazel test --config opt //third_party/tensorflow/core/kernels:my_op_test +# # for GPU benchmarks +# $ bazel run --config opt --config=cuda //third_party/tensorflow/core/kernels:my_op_test_gpu -- --benchmarks=.. +# +package( + default_visibility = [ + "//tensorflow:__subpackages__", + "//tensorflow:internal", + ], + licenses = ["notice"], # Apache 2.0 +) + +# Export a few files for use on Android. +exports_files([ + "einsum_op_impl_half.cc", + "einsum_op_impl_bfloat16.cc", + "einsum_op_impl_int32.cc", + "einsum_op_impl_int64.cc", + "einsum_op_impl_float.cc", + "einsum_op_impl_double.cc", + "einsum_op_impl_complex64.cc", + "einsum_op_impl_complex128.cc", + "einsum_op_impl.h", + "einsum_op.h", + "linalg_ops_common.h", + "linalg_ops_common.cc", + "matrix_diag_op.h", + "matrix_diag_op.cc", + "matrix_inverse_op.cc", + "matrix_set_diag_op.h", + "matrix_set_diag_op.cc", +]) + +# Public support libraries ---------------------------------------------------- + +cc_library( + name = "linalg", + deps = [ + ":banded_triangular_solve_op", + ":cholesky_grad", + ":cholesky_op", + ":determinant_op", + ":eig_op", + ":einsum_op", + ":lu_op", + ":matrix_band_part_op", + ":matrix_diag_op", + ":matrix_exponential_op", + ":matrix_inverse_op", + ":matrix_logarithm_op", + ":matrix_set_diag_op", + ":matrix_solve_ls_op", + ":matrix_solve_op", + ":matrix_square_root_op", + ":matrix_triangular_solve_op", + ":qr_op", + ":self_adjoint_eig_op", + ":self_adjoint_eig_v2_op", + ":svd_op", + ":tridiagonal_matmul_op", + ":tridiagonal_solve_op", + ], +) + +LINALG_DEPS = [ + ":linalg_ops_common", + ":eye_functor", + "//third_party/eigen3", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/kernels:cast_op", + "//tensorflow/core/kernels:fill_functor", +] + if_cuda([ + "//tensorflow/core/util:cuda_solvers", + "//tensorflow/core/kernels:transpose_functor", +]) + if_rocm([ + "//tensorflow/core/util:rocm_solvers", +]) + +tf_kernel_library( + name = "matrix_band_part_op", + prefix = "matrix_band_part_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_diag_op", + prefix = "matrix_diag_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_set_diag_op", + prefix = "matrix_set_diag_op", + deps = LINALG_DEPS + [":matrix_diag_op"], +) + +tf_kernel_library( + name = "cholesky_op", + prefix = "cholesky_op", + deps = if_cuda([ + ":matrix_band_part_op", + ]) + LINALG_DEPS, +) + +tf_kernel_library( + name = "cholesky_grad", + prefix = "cholesky_grad", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "determinant_op", + prefix = "determinant_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_exponential_op", + prefix = "matrix_exponential_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_logarithm_op", + prefix = "matrix_logarithm_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "self_adjoint_eig_op", + prefix = "self_adjoint_eig_op", + deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"], +) + +tf_kernel_library( + name = "self_adjoint_eig_v2_op", + prefix = "self_adjoint_eig_v2_op", + deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"] + if_cuda([ + "//tensorflow/core/kernels:cwise_op", + ]), +) + +tf_kernel_library( + name = "eig_op", + prefix = "eig_op", + deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"] + if_cuda([ + "//tensorflow/core/kernels:cwise_op", + ]), +) + +tf_kernel_library( + name = "matrix_inverse_op", + prefix = "matrix_inverse_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_solve_ls_op", + prefix = "matrix_solve_ls_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_solve_op", + prefix = "matrix_solve_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_square_root_op", + prefix = "matrix_square_root_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "banded_triangular_solve_op", + prefix = "banded_triangular_solve_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_triangular_solve_op", + hdrs = ["matrix_triangular_solve_op_impl.h"], + prefix = "matrix_triangular_solve_op", + deps = [ + ":linalg_ops_common", + "//third_party/eigen3", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/kernels:fill_functor", + "//tensorflow/core:stream_executor", + ] + if_cuda([ + "//tensorflow/core/platform/default/build_config:cublas_plugin", + "//tensorflow/core/util:cuda_solvers", + ]) + if_rocm([ + "@local_config_rocm//rocm:rocprim", + "//tensorflow/core/util:rocm_solvers", + ]) + if_cuda_or_rocm([ + "//tensorflow/core/kernels:transpose_functor", + ]), +) + +tf_kernel_library( + name = "tridiagonal_matmul_op", + srcs = ["tridiagonal_matmul_op.cc"], + gpu_srcs = ["tridiagonal_matmul_op_gpu.cu.cc"], + deps = LINALG_DEPS + if_cuda([ + "//tensorflow/core/util:cuda_sparse", + ]), +) + +tf_kernel_library( + name = "tridiagonal_solve_op", + srcs = ["tridiagonal_solve_op.cc"], + gpu_srcs = ["tridiagonal_solve_op_gpu.cu.cc"], + deps = LINALG_DEPS + if_cuda([ + "//tensorflow/core/util:cuda_sparse", + ]), +) + +tf_kernel_library( + name = "qr_op", + prefix = "qr_op", + deps = LINALG_DEPS + if_cuda([ + "//tensorflow/core/kernels:cwise_op", + ":matrix_band_part_op", + ]), +) + +tf_kernel_library( + name = "svd_op", + prefix = "svd_op", + deps = LINALG_DEPS + if_cuda([ + ]), +) + +tf_kernel_library( + name = "lu_op", + prefix = "lu_op", + deps = if_cuda([ + "//tensorflow/core/util:cuda_solvers", + "//tensorflow/core/kernels:transpose_functor", + ]) + [ + "//third_party/eigen3", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ], +) + +tf_kernel_library( + name = "einsum_op", + prefix = "einsum_op", + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/kernels:batch_matmul_op", + "//tensorflow/core/kernels:fill_functor", + "//tensorflow/core/kernels:reduction_ops", + "//tensorflow/core/kernels:transpose_functor", + "//tensorflow/core/profiler/lib:traceme", + "//third_party/eigen3", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/strings", + ], +) + +cc_library( + name = "linalg_ops_common", + srcs = ["linalg_ops_common.cc"], + hdrs = ["linalg_ops_common.h"], + visibility = ["//visibility:private"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//third_party/eigen3", + ], +) + +tf_cuda_cc_test( + name = "banded_triangular_solve_op_test", + size = "small", + srcs = ["banded_triangular_solve_op_test.cc"], + deps = [ + ":banded_triangular_solve_op", + ":matrix_set_diag_op", + ":matrix_triangular_solve_op", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + +tf_kernel_library( + name = "eye_functor", + hdrs = ["eye_functor.h"], + gpu_srcs = [ + "eye_functor_gpu.cu.cc", + "eye_functor.h", + ], + visibility = ["//tensorflow/core/kernels:friends"], + deps = [ + "//tensorflow/core:framework", + "//third_party/eigen3", + ], + alwayslink = 0, +) + +tf_cuda_cc_test( + name = "matrix_triangular_solve_op_test", + size = "small", + srcs = ["matrix_triangular_solve_op_test.cc"], + deps = [ + ":matrix_triangular_solve_op", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:broadcast_to_op", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) diff --git a/tensorflow/core/kernels/banded_triangular_solve_op.cc b/tensorflow/core/kernels/linalg/banded_triangular_solve_op.cc similarity index 99% rename from tensorflow/core/kernels/banded_triangular_solve_op.cc rename to tensorflow/core/kernels/linalg/banded_triangular_solve_op.cc index d01a015502a..6758dcf5b8b 100644 --- a/tensorflow/core/kernels/banded_triangular_solve_op.cc +++ b/tensorflow/core/kernels/linalg/banded_triangular_solve_op.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/kernels/fill_functor.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/kernels/banded_triangular_solve_op_test.cc b/tensorflow/core/kernels/linalg/banded_triangular_solve_op_test.cc similarity index 99% rename from tensorflow/core/kernels/banded_triangular_solve_op_test.cc rename to tensorflow/core/kernels/linalg/banded_triangular_solve_op_test.cc index 37e904a3e0e..7c20b88845f 100644 --- a/tensorflow/core/kernels/banded_triangular_solve_op_test.cc +++ b/tensorflow/core/kernels/linalg/banded_triangular_solve_op_test.cc @@ -21,7 +21,7 @@ limitations under the License. #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/node_builder.h" #include "tensorflow/core/graph/testlib.h" -#include "tensorflow/core/kernels/matrix_set_diag_op.h" +#include "tensorflow/core/kernels/linalg/matrix_set_diag_op.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" diff --git a/tensorflow/core/kernels/cholesky_grad.cc b/tensorflow/core/kernels/linalg/cholesky_grad.cc similarity index 99% rename from tensorflow/core/kernels/cholesky_grad.cc rename to tensorflow/core/kernels/linalg/cholesky_grad.cc index eac66e580dd..31a5570cddf 100644 --- a/tensorflow/core/kernels/cholesky_grad.cc +++ b/tensorflow/core/kernels/linalg/cholesky_grad.cc @@ -18,7 +18,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/cholesky_op.cc b/tensorflow/core/kernels/linalg/cholesky_op.cc similarity index 98% rename from tensorflow/core/kernels/cholesky_op.cc rename to tensorflow/core/kernels/linalg/cholesky_op.cc index ff8fd08f228..eae09124b36 100644 --- a/tensorflow/core/kernels/cholesky_op.cc +++ b/tensorflow/core/kernels/linalg/cholesky_op.cc @@ -25,16 +25,16 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" #if GOOGLE_CUDA #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/matrix_band_part_op.h" +#include "tensorflow/core/kernels/linalg/matrix_band_part_op.h" #include "tensorflow/core/platform/stream_executor.h" +#include "tensorflow/core/util/cuda_solvers.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/determinant_op.cc b/tensorflow/core/kernels/linalg/determinant_op.cc similarity index 99% rename from tensorflow/core/kernels/determinant_op.cc rename to tensorflow/core/kernels/linalg/determinant_op.cc index b06f42384eb..8f0b0b618cf 100644 --- a/tensorflow/core/kernels/determinant_op.cc +++ b/tensorflow/core/kernels/linalg/determinant_op.cc @@ -20,7 +20,7 @@ limitations under the License. #if GOOGLE_CUDA #define EIGEN_USE_GPU #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/determinant_op.h" +#include "tensorflow/core/kernels/linalg/determinant_op.h" #endif #include "third_party/eigen3/Eigen/LU" @@ -28,14 +28,14 @@ limitations under the License. #include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" #if GOOGLE_CUDA -#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/kernels/fill_functor.h" +#include "tensorflow/core/util/cuda_solvers.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/determinant_op.h b/tensorflow/core/kernels/linalg/determinant_op.h similarity index 90% rename from tensorflow/core/kernels/determinant_op.h rename to tensorflow/core/kernels/linalg/determinant_op.h index eefdfe0ae40..6ace1bef44b 100644 --- a/tensorflow/core/kernels/determinant_op.h +++ b/tensorflow/core/kernels/linalg/determinant_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_DETERMINANT_OP_H_ -#define TENSORFLOW_CORE_KERNELS_DETERMINANT_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_DETERMINANT_OP_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_DETERMINANT_OP_H_ #include "tensorflow/core/framework/tensor_types.h" @@ -44,4 +44,4 @@ struct LogDeterminantFromPivotedLUFunctor { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_DETERMINANT_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_DETERMINANT_OP_H_ diff --git a/tensorflow/core/kernels/determinant_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/determinant_op_gpu.cu.cc similarity index 98% rename from tensorflow/core/kernels/determinant_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/determinant_op_gpu.cu.cc index 9aa64b3a7da..f6ab327bce0 100644 --- a/tensorflow/core/kernels/determinant_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/determinant_op_gpu.cu.cc @@ -21,8 +21,8 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/determinant_op.h" +#include "tensorflow/core/kernels/linalg/determinant_op.h" +#include "tensorflow/core/util/cuda_solvers.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/eig_op_complex128.cc b/tensorflow/core/kernels/linalg/eig_op_complex128.cc similarity index 93% rename from tensorflow/core/kernels/eig_op_complex128.cc rename to tensorflow/core/kernels/linalg/eig_op_complex128.cc index 988cc2f98d9..bd4b6fe36d0 100644 --- a/tensorflow/core/kernels/eig_op_complex128.cc +++ b/tensorflow/core/kernels/linalg/eig_op_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/eig_op_impl.h" +#include "tensorflow/core/kernels/linalg/eig_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/eig_op_complex64.cc b/tensorflow/core/kernels/linalg/eig_op_complex64.cc similarity index 93% rename from tensorflow/core/kernels/eig_op_complex64.cc rename to tensorflow/core/kernels/linalg/eig_op_complex64.cc index 6a3f7928715..b5b4a26ee85 100644 --- a/tensorflow/core/kernels/eig_op_complex64.cc +++ b/tensorflow/core/kernels/linalg/eig_op_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/eig_op_impl.h" +#include "tensorflow/core/kernels/linalg/eig_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/eig_op_double.cc b/tensorflow/core/kernels/linalg/eig_op_double.cc similarity index 93% rename from tensorflow/core/kernels/eig_op_double.cc rename to tensorflow/core/kernels/linalg/eig_op_double.cc index 2cd931cc135..c360637c84a 100644 --- a/tensorflow/core/kernels/eig_op_double.cc +++ b/tensorflow/core/kernels/linalg/eig_op_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/eig_op_impl.h" +#include "tensorflow/core/kernels/linalg/eig_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/eig_op_float.cc b/tensorflow/core/kernels/linalg/eig_op_float.cc similarity index 93% rename from tensorflow/core/kernels/eig_op_float.cc rename to tensorflow/core/kernels/linalg/eig_op_float.cc index a06f76e935f..18f576fcc19 100644 --- a/tensorflow/core/kernels/eig_op_float.cc +++ b/tensorflow/core/kernels/linalg/eig_op_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/eig_op_impl.h" +#include "tensorflow/core/kernels/linalg/eig_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/eig_op_impl.h b/tensorflow/core/kernels/linalg/eig_op_impl.h similarity index 93% rename from tensorflow/core/kernels/eig_op_impl.h rename to tensorflow/core/kernels/linalg/eig_op_impl.h index 4ebb6bde08b..a7aff7c2a5d 100644 --- a/tensorflow/core/kernels/eig_op_impl.h +++ b/tensorflow/core/kernels/linalg/eig_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_EIG_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_EIG_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_EIG_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_EIG_OP_IMPL_H_ // See docs in ../ops/linalg_ops.cc. @@ -23,7 +23,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/logging.h" @@ -95,4 +95,4 @@ class EigOp : public LinearAlgebraOp { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_EIG_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_EIG_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/einsum_op.h b/tensorflow/core/kernels/linalg/einsum_op.h similarity index 94% rename from tensorflow/core/kernels/einsum_op.h rename to tensorflow/core/kernels/linalg/einsum_op.h index 31d1109004c..f22f33c600a 100644 --- a/tensorflow/core/kernels/einsum_op.h +++ b/tensorflow/core/kernels/linalg/einsum_op.h @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_EINSUM_OP_H_ -#define TENSORFLOW_CORE_KERNELS_EINSUM_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_H_ #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor_types.h" diff --git a/tensorflow/core/kernels/einsum_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/einsum_op_gpu.cu.cc similarity index 96% rename from tensorflow/core/kernels/einsum_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/einsum_op_gpu.cu.cc index 2935b7fd02a..5461e43e0ab 100644 --- a/tensorflow/core/kernels/einsum_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_gpu.cu.cc @@ -17,7 +17,7 @@ limitations under the License. #define EIGEN_USE_GPU #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/einsum_op.h" +#include "tensorflow/core/kernels/linalg/einsum_op.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl.h b/tensorflow/core/kernels/linalg/einsum_op_impl.h similarity index 99% rename from tensorflow/core/kernels/einsum_op_impl.h rename to tensorflow/core/kernels/linalg/einsum_op_impl.h index 312738442b8..b9b2d1f0eae 100644 --- a/tensorflow/core/kernels/einsum_op_impl.h +++ b/tensorflow/core/kernels/linalg/einsum_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_EINSUM_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_EINSUM_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_IMPL_H_ #define EIGEN_USE_THREADS #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -31,8 +31,8 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/kernels/batch_matmul_op_impl.h" -#include "tensorflow/core/kernels/einsum_op.h" #include "tensorflow/core/kernels/fill_functor.h" +#include "tensorflow/core/kernels/linalg/einsum_op.h" #include "tensorflow/core/kernels/reduction_ops_common.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/lib/core/errors.h" @@ -780,4 +780,4 @@ DECLARE_GPU_SPECS(complex128); } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_EINSUM_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/einsum_op_impl_bfloat16.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_bfloat16.cc similarity index 94% rename from tensorflow/core/kernels/einsum_op_impl_bfloat16.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_bfloat16.cc index 44508f86a5e..e2e13052df5 100644 --- a/tensorflow/core/kernels/einsum_op_impl_bfloat16.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_bfloat16.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl_complex128.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_complex128.cc similarity index 95% rename from tensorflow/core/kernels/einsum_op_impl_complex128.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_complex128.cc index 8473cbf545d..ff78d460acf 100644 --- a/tensorflow/core/kernels/einsum_op_impl_complex128.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl_complex64.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_complex64.cc similarity index 95% rename from tensorflow/core/kernels/einsum_op_impl_complex64.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_complex64.cc index bd506a04f5f..cd3788846b2 100644 --- a/tensorflow/core/kernels/einsum_op_impl_complex64.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl_double.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_double.cc similarity index 95% rename from tensorflow/core/kernels/einsum_op_impl_double.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_double.cc index f994590779b..e0c093fa4a9 100644 --- a/tensorflow/core/kernels/einsum_op_impl_double.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl_float.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_float.cc similarity index 95% rename from tensorflow/core/kernels/einsum_op_impl_float.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_float.cc index 1875310b687..ad9135c991c 100644 --- a/tensorflow/core/kernels/einsum_op_impl_float.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl_half.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_half.cc similarity index 95% rename from tensorflow/core/kernels/einsum_op_impl_half.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_half.cc index 0486b133e62..72a9f6bec4f 100644 --- a/tensorflow/core/kernels/einsum_op_impl_half.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_half.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl_int32.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_int32.cc similarity index 94% rename from tensorflow/core/kernels/einsum_op_impl_int32.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_int32.cc index db5169498d9..7569c979c59 100644 --- a/tensorflow/core/kernels/einsum_op_impl_int32.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_int32.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl_int64.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_int64.cc similarity index 94% rename from tensorflow/core/kernels/einsum_op_impl_int64.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_int64.cc index 7f1a1eac411..6ee0ebc9637 100644 --- a/tensorflow/core/kernels/einsum_op_impl_int64.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_int64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/eye_functor.h b/tensorflow/core/kernels/linalg/eye_functor.h similarity index 90% rename from tensorflow/core/kernels/eye_functor.h rename to tensorflow/core/kernels/linalg/eye_functor.h index 3799cfba9ae..c77372f089a 100644 --- a/tensorflow/core/kernels/eye_functor.h +++ b/tensorflow/core/kernels/linalg/eye_functor.h @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_EYE_FUNCTOR_H_ -#define TENSORFLOW_CORE_KERNELS_EYE_FUNCTOR_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_EYE_FUNCTOR_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_EYE_FUNCTOR_H_ #include "tensorflow/core/framework/tensor_types.h" diff --git a/tensorflow/core/kernels/eye_functor_gpu.cu.cc b/tensorflow/core/kernels/linalg/eye_functor_gpu.cu.cc similarity index 97% rename from tensorflow/core/kernels/eye_functor_gpu.cu.cc rename to tensorflow/core/kernels/linalg/eye_functor_gpu.cu.cc index 90df538dd2c..85865588f2c 100644 --- a/tensorflow/core/kernels/eye_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/eye_functor_gpu.cu.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/type_traits.h" -#include "tensorflow/core/kernels/eye_functor.h" +#include "tensorflow/core/kernels/linalg/eye_functor.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg_ops_common.cc b/tensorflow/core/kernels/linalg/linalg_ops_common.cc similarity index 99% rename from tensorflow/core/kernels/linalg_ops_common.cc rename to tensorflow/core/kernels/linalg/linalg_ops_common.cc index 56a941fbd1f..c8d33e435c7 100644 --- a/tensorflow/core/kernels/linalg_ops_common.cc +++ b/tensorflow/core/kernels/linalg/linalg_ops_common.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include diff --git a/tensorflow/core/kernels/linalg/linalg_ops_common.h b/tensorflow/core/kernels/linalg/linalg_ops_common.h new file mode 100644 index 00000000000..3ab37480c90 --- /dev/null +++ b/tensorflow/core/kernels/linalg/linalg_ops_common.h @@ -0,0 +1,221 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_LINALG_OPS_COMMON_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_LINALG_OPS_COMMON_H_ + +// Classes to support linear algebra functionality, similar to the numpy.linalg +// module. Supports batch computation on several matrices at once, sharding the +// computations across different threads if necessary. +#include + +#include "third_party/eigen3/Eigen/Core" +#include "tensorflow/core/framework/kernel_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/work_sharder.h" + +namespace tensorflow { + +// Base class for linear algebra operators. +template +class LinearAlgebraOp : public OpKernel { + public: + explicit LinearAlgebraOp(OpKernelConstruction* context) : OpKernel(context) {} + + void Compute(OpKernelContext* context) override; + + protected: + using TensorShapes = gtl::InlinedVector; + // Returns the number of leading inputs that are to be treated as matrix + // inputs. By default this is all the inputs. Derived classes can override + // this to tell the base class to ignore one or more trailing inputs. + virtual int NumMatrixInputs(const OpKernelContext* context) const { + return context->num_inputs(); + } + + // Returns true if the number of inputs and their shapes are as expected. + // Many ops take a single square input matrix, so we provide that as a default + // implementation for convenience. + virtual void ValidateInputMatrixShapes( + OpKernelContext* context, const TensorShapes& input_matrix_shapes) const { + ValidateSingleSquareMatrix(context, input_matrix_shapes); + } + + // Convenience validators for common cases: + // + // Validate op taking a single matrix A. + static void ValidateSingleMatrix(OpKernelContext* context, + const TensorShapes& input_matrix_shapes); + // Validate op taking a single square matrix A. + static void ValidateSingleSquareMatrix( + OpKernelContext* context, const TensorShapes& input_matrix_shapes); + // Validate op taking two matrices A and B that have the same number of rows. + static void ValidateSolver(OpKernelContext* context, + const TensorShapes& input_matrix_shapes); + // Validate op taking two matrices A and B that have the same number of rows + // and A is square. + static void ValidateSquareSolver(OpKernelContext* context, + const TensorShapes& input_matrix_shapes); + + // Returns the output shapes of each individual matrix operation. Output + // matrices shapes must be rank 0, 1, or 2. Scalar outputs are rank 0. + // + // The derived class may return a number of shapes (N) less than + // context->num_outputs() (M) to indicate that a only leading subset of + // the outputs will be populated. In this case, a dummy scalar tensor with + // value zero will be return for the last M-N outputs. + // + // For many ops, the output dimensions are the same as the input dimensions, + // so we provide that as a default implementation for convenience. + virtual TensorShapes GetOutputMatrixShapes( + const TensorShapes& input_matrix_shapes) const { + return input_matrix_shapes; + } + + // Returns the cost per matrix operation. This is used to determine the + // number of threads to use for parallelizing calls to ComputeMatrix in + // batch mode. Cost per unit is assumed to be roughly 1ns, based on comments + // in core/util/work_sharder.cc. Many linear algebra ops take roughly max(m,n) + // * min(m,n)^2, where the first input matrix is m-by-n. We provide that as a + // default implementation for convenience. + virtual int64 GetCostPerUnit(const TensorShapes& input_matrix_shapes) const { + double m = static_cast(input_matrix_shapes[0].dim_size(0)); + double n = static_cast(input_matrix_shapes[0].dim_size(1)); + double cost = std::max(m, n) * std::min(m, n) * std::min(m, n); + return cost >= static_cast(kint64max) ? kint64max + : static_cast(cost); + } + + // Returns true if it is safe to forward (alias) input to output buffer + // and expect the kernel to perform the computation inplace. + virtual bool EnableInputForwarding() const { return true; } + + using InputMatrix = Eigen::Matrix; + using InputConstMatrixMap = Eigen::Map; + using InputMatrixMap = Eigen::Map; + using InputConstVectorMap = + Eigen::Map>; + using InputConstMatrixMaps = gtl::InlinedVector; + using InputMatrixMaps = gtl::InlinedVector; + using InputRealScalar = typename Eigen::NumTraits::Real; + + using OutputMatrix = Eigen::Matrix; + using OutputConstMatrixMap = Eigen::Map; + using OutputMatrixMap = Eigen::Map; + using OutputConstVectorMap = + Eigen::Map>; + using OutputConstMatrixMaps = gtl::InlinedVector; + using OutputMatrixMaps = gtl::InlinedVector; + using OutputRealScalar = typename Eigen::NumTraits::Real; + + // backward compatibility + using Scalar = OutputScalar; + using Matrix = + Eigen::Matrix; + using ConstMatrixMap = Eigen::Map; + using MatrixMap = Eigen::Map; + using ConstVectorMap = + Eigen::Map>; + using ConstMatrixMaps = gtl::InlinedVector; + using MatrixMaps = gtl::InlinedVector; + using RealScalar = typename Eigen::NumTraits::Real; + + // Performs a single matrix computation given input matrices, and + // stores the result in outputs. For batch operations, this will be called + // repeatedly for a single call to Compute() when multiple matrices exist in + // input Tensors with rank > 2. In this case the calls to ComputeMatrix are + // parallelized. The number of threads used is determined by a cost model from + // the value returned by GetCostPerUnit(). + virtual void ComputeMatrix(OpKernelContext* context, + const InputConstMatrixMaps& inputs, + OutputMatrixMaps* outputs) = 0; + + private: + using TensorInputs = gtl::InlinedVector; + using TensorOutputs = gtl::InlinedVector; + // This function maps 2-d slices (matrices) of the input and output tensors + // using Eigen::Map and calls ComputeMatrix implemented in terms of the + // Eigen::MatrixBase API by the derived class. + // + // The 'matrix_index' parameter specifies the index of the matrix to be used + // from each input tensor, and the index of the matrix to be written to each + // output tensor. The input matrices are in row major order, and located at + // the memory addresses + // inputs[i].flat().data() + + // matrix_index * input_matrix_shapes[i].num_elements() + // for i in 0...inputs.size()-1. + // The output matrices are in row major order, and located at the memory + // address + // outputs[i]->flat().data() + + // matrix_index * output_matrix_shapes[i].num_elements(). + // for i in 0...outputs.size()-1. + // + void ComputeTensorSlice(OpKernelContext* context, int64 matrix_index, + const TensorInputs& inputs, + const TensorShapes& input_matrix_shapes, + const TensorOutputs& outputs, + const TensorShapes& output_matrix_shapes); + + void AnalyzeInputs(OpKernelContext* context, TensorInputs* inputs, + TensorShapes* input_matrix_shapes, + TensorShape* batch_shape); + + void PrepareOutputs(OpKernelContext* context, + const TensorShapes& input_matrix_shapes, + const TensorShape& batch_shape, TensorOutputs* outputs, + TensorShapes* output_matrix_shapes); +}; + +// Declare LinearAlgebraOp, which is explicitly instantiated in +// linalg_ops_common.cc for float, double, complex64, and complex128. +extern template class LinearAlgebraOp; +extern template class LinearAlgebraOp; +extern template class LinearAlgebraOp; +extern template class LinearAlgebraOp; + +} // namespace tensorflow + +#define INHERIT_LINALG_TYPEDEFS(Scalar) \ + typedef LinearAlgebraOp Base; \ + using RealScalar = typename Eigen::NumTraits::Real; \ + using Matrix = typename Base::Matrix; \ + using MatrixMap = typename Base::MatrixMap; \ + using MatrixMaps = typename Base::MatrixMaps; \ + using ConstMatrixMap = typename Base::ConstMatrixMap; \ + using ConstMatrixMaps = typename Base::ConstMatrixMaps; \ + using ConstVectorMap = typename Base::ConstVectorMap; \ + using TensorShapes = typename Base::TensorShapes; + +#define REGISTER_LINALG_OP_CPU(OpName, OpClass, Scalar) \ + REGISTER_KERNEL_BUILDER( \ + Name(OpName).Device(DEVICE_CPU).TypeConstraint("T"), OpClass) + +#define REGISTER_LINALG_OP_GPU(OpName, OpClass, Scalar) \ + REGISTER_KERNEL_BUILDER( \ + Name(OpName).Device(DEVICE_GPU).TypeConstraint("T"), OpClass) + +// Deprecated, use one of the device-specific macros above. +#define REGISTER_LINALG_OP(OpName, OpClass, Scalar) \ + REGISTER_LINALG_OP_CPU(OpName, OpClass, Scalar) + +#endif // TENSORFLOW_CORE_KERNELS_LINALG_LINALG_OPS_COMMON_H_ diff --git a/tensorflow/core/kernels/lu_op.cc b/tensorflow/core/kernels/linalg/lu_op.cc similarity index 100% rename from tensorflow/core/kernels/lu_op.cc rename to tensorflow/core/kernels/linalg/lu_op.cc diff --git a/tensorflow/core/kernels/lu_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/lu_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/lu_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/lu_op_gpu.cu.cc index 47b37ed7f7a..9d23a35057d 100644 --- a/tensorflow/core/kernels/lu_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/lu_op_gpu.cu.cc @@ -25,9 +25,9 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_solvers.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_band_part_op.cc b/tensorflow/core/kernels/linalg/matrix_band_part_op.cc similarity index 99% rename from tensorflow/core/kernels/matrix_band_part_op.cc rename to tensorflow/core/kernels/linalg/matrix_band_part_op.cc index 4dcce5a8f58..23619bacc33 100644 --- a/tensorflow/core/kernels/matrix_band_part_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_band_part_op.cc @@ -21,11 +21,12 @@ limitations under the License. #define EIGEN_USE_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/matrix_band_part_op.h" +#include "tensorflow/core/kernels/linalg/matrix_band_part_op.h" #include #include #include + #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" diff --git a/tensorflow/core/kernels/matrix_band_part_op.h b/tensorflow/core/kernels/linalg/matrix_band_part_op.h similarity index 86% rename from tensorflow/core/kernels/matrix_band_part_op.h rename to tensorflow/core/kernels/linalg/matrix_band_part_op.h index b04e36db8ed..2f68eba6dcd 100644 --- a/tensorflow/core/kernels/matrix_band_part_op.h +++ b/tensorflow/core/kernels/linalg/matrix_band_part_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_BAND_PART_OP_H_ -#define TENSORFLOW_CORE_KERNELS_MATRIX_BAND_PART_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_BAND_PART_OP_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_BAND_PART_OP_H_ #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" @@ -34,4 +34,4 @@ struct MatrixBandPartFunctor { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_MATRIX_BAND_PART_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_BAND_PART_OP_H_ diff --git a/tensorflow/core/kernels/matrix_band_part_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/matrix_band_part_op_gpu.cu.cc similarity index 97% rename from tensorflow/core/kernels/matrix_band_part_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/matrix_band_part_op_gpu.cu.cc index 9eb3e4f72a2..9c734b7fd6e 100644 --- a/tensorflow/core/kernels/matrix_band_part_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/matrix_band_part_op_gpu.cu.cc @@ -21,7 +21,7 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/matrix_band_part_op.h" +#include "tensorflow/core/kernels/linalg/matrix_band_part_op.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_diag_op.cc b/tensorflow/core/kernels/linalg/matrix_diag_op.cc similarity index 99% rename from tensorflow/core/kernels/matrix_diag_op.cc rename to tensorflow/core/kernels/linalg/matrix_diag_op.cc index 05d7e4e6f86..69cc8170793 100644 --- a/tensorflow/core/kernels/matrix_diag_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_diag_op.cc @@ -20,7 +20,7 @@ limitations under the License. #define EIGEN_USE_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/matrix_diag_op.h" +#include "tensorflow/core/kernels/linalg/matrix_diag_op.h" #include #include diff --git a/tensorflow/core/kernels/matrix_diag_op.h b/tensorflow/core/kernels/linalg/matrix_diag_op.h similarity index 94% rename from tensorflow/core/kernels/matrix_diag_op.h rename to tensorflow/core/kernels/linalg/matrix_diag_op.h index 707fd9b6c14..5758ba664cc 100644 --- a/tensorflow/core/kernels/matrix_diag_op.h +++ b/tensorflow/core/kernels/linalg/matrix_diag_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_DIAG_OP_H_ -#define TENSORFLOW_CORE_KERNELS_MATRIX_DIAG_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_DIAG_OP_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_DIAG_OP_H_ // Generator definition for MatrixDiagOp, must be compilable by nvcc. @@ -69,4 +69,4 @@ struct MatrixDiag { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_MATRIX_DIAG_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_DIAG_OP_H_ diff --git a/tensorflow/core/kernels/matrix_diag_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/matrix_diag_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/matrix_diag_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/matrix_diag_op_gpu.cu.cc index 76271798d5f..6b52e70716d 100644 --- a/tensorflow/core/kernels/matrix_diag_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/matrix_diag_op_gpu.cu.cc @@ -18,7 +18,7 @@ limitations under the License. #define EIGEN_USE_GPU #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/matrix_diag_op.h" +#include "tensorflow/core/kernels/linalg/matrix_diag_op.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_exponential_op.cc b/tensorflow/core/kernels/linalg/matrix_exponential_op.cc similarity index 97% rename from tensorflow/core/kernels/matrix_exponential_op.cc rename to tensorflow/core/kernels/linalg/matrix_exponential_op.cc index 01d4894438c..73407614955 100644 --- a/tensorflow/core/kernels/matrix_exponential_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_exponential_op.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/kernels/matrix_inverse_op.cc b/tensorflow/core/kernels/linalg/matrix_inverse_op.cc similarity index 98% rename from tensorflow/core/kernels/matrix_inverse_op.cc rename to tensorflow/core/kernels/linalg/matrix_inverse_op.cc index 52afdd15ba6..dc51776f2fe 100644 --- a/tensorflow/core/kernels/matrix_inverse_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_inverse_op.cc @@ -24,7 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -32,9 +32,9 @@ limitations under the License. #if GOOGLE_CUDA #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/eye_functor.h" +#include "tensorflow/core/kernels/linalg/eye_functor.h" #include "tensorflow/core/kernels/transpose_functor.h" +#include "tensorflow/core/util/cuda_solvers.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_logarithm_op.cc b/tensorflow/core/kernels/linalg/matrix_logarithm_op.cc similarity index 97% rename from tensorflow/core/kernels/matrix_logarithm_op.cc rename to tensorflow/core/kernels/linalg/matrix_logarithm_op.cc index 22ca094e243..79d5472f140 100644 --- a/tensorflow/core/kernels/matrix_logarithm_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_logarithm_op.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/kernels/matrix_set_diag_op.cc b/tensorflow/core/kernels/linalg/matrix_set_diag_op.cc similarity index 99% rename from tensorflow/core/kernels/matrix_set_diag_op.cc rename to tensorflow/core/kernels/linalg/matrix_set_diag_op.cc index bf98fd0d47d..df32228d0f2 100644 --- a/tensorflow/core/kernels/matrix_set_diag_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_set_diag_op.cc @@ -21,7 +21,7 @@ limitations under the License. #define EIGEN_USE_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/matrix_set_diag_op.h" +#include "tensorflow/core/kernels/linalg/matrix_set_diag_op.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" @@ -30,7 +30,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/matrix_diag_op.h" +#include "tensorflow/core/kernels/linalg/matrix_diag_op.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/kernels/matrix_set_diag_op.h b/tensorflow/core/kernels/linalg/matrix_set_diag_op.h similarity index 89% rename from tensorflow/core/kernels/matrix_set_diag_op.h rename to tensorflow/core/kernels/linalg/matrix_set_diag_op.h index 04877cd34ca..449a3607ede 100644 --- a/tensorflow/core/kernels/matrix_set_diag_op.h +++ b/tensorflow/core/kernels/linalg/matrix_set_diag_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_SET_DIAG_OP_H_ -#define TENSORFLOW_CORE_KERNELS_MATRIX_SET_DIAG_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SET_DIAG_OP_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SET_DIAG_OP_H_ #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" @@ -39,4 +39,4 @@ struct MatrixSetDiag { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_MATRIX_SET_DIAG_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SET_DIAG_OP_H_ diff --git a/tensorflow/core/kernels/matrix_set_diag_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/matrix_set_diag_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/matrix_set_diag_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/matrix_set_diag_op_gpu.cu.cc index 4e32f8a52e8..0cdb457db03 100644 --- a/tensorflow/core/kernels/matrix_set_diag_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/matrix_set_diag_op_gpu.cu.cc @@ -18,7 +18,7 @@ limitations under the License. #define EIGEN_USE_GPU #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/matrix_set_diag_op.h" +#include "tensorflow/core/kernels/linalg/matrix_set_diag_op.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_solve_ls_op_complex128.cc b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex128.cc similarity index 92% rename from tensorflow/core/kernels/matrix_solve_ls_op_complex128.cc rename to tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex128.cc index 22274cc3daf..4e64eb42371 100644 --- a/tensorflow/core/kernels/matrix_solve_ls_op_complex128.cc +++ b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/matrix_solve_ls_op_impl.h" +#include "tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_solve_ls_op_complex64.cc b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex64.cc similarity index 92% rename from tensorflow/core/kernels/matrix_solve_ls_op_complex64.cc rename to tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex64.cc index c8421a3efba..719201f3f9e 100644 --- a/tensorflow/core/kernels/matrix_solve_ls_op_complex64.cc +++ b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/matrix_solve_ls_op_impl.h" +#include "tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_solve_ls_op_double.cc b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_double.cc similarity index 92% rename from tensorflow/core/kernels/matrix_solve_ls_op_double.cc rename to tensorflow/core/kernels/linalg/matrix_solve_ls_op_double.cc index c7d03cb1052..614ecee4e23 100644 --- a/tensorflow/core/kernels/matrix_solve_ls_op_double.cc +++ b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/matrix_solve_ls_op_impl.h" +#include "tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_solve_ls_op_float.cc b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_float.cc similarity index 92% rename from tensorflow/core/kernels/matrix_solve_ls_op_float.cc rename to tensorflow/core/kernels/linalg/matrix_solve_ls_op_float.cc index c98a84beded..809cff8148c 100644 --- a/tensorflow/core/kernels/matrix_solve_ls_op_float.cc +++ b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/matrix_solve_ls_op_impl.h" +#include "tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_solve_ls_op_impl.h b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h similarity index 96% rename from tensorflow/core/kernels/matrix_solve_ls_op_impl.h rename to tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h index 00a05a87a3a..1c8101a05b4 100644 --- a/tensorflow/core/kernels/matrix_solve_ls_op_impl.h +++ b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_SOLVE_LS_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_MATRIX_SOLVE_LS_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SOLVE_LS_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SOLVE_LS_OP_IMPL_H_ // See docs in ../ops/linalg_ops.cc. @@ -24,7 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" @@ -163,4 +163,4 @@ class MatrixSolveLsOp : public LinearAlgebraOp { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_MATRIX_SOLVE_LS_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SOLVE_LS_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/matrix_solve_op.cc b/tensorflow/core/kernels/linalg/matrix_solve_op.cc similarity index 99% rename from tensorflow/core/kernels/matrix_solve_op.cc rename to tensorflow/core/kernels/linalg/matrix_solve_op.cc index 3a75054f4ea..70f02bddf9b 100644 --- a/tensorflow/core/kernels/matrix_solve_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_solve_op.cc @@ -25,7 +25,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -33,8 +33,8 @@ limitations under the License. #if GOOGLE_CUDA #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/kernels/transpose_functor.h" +#include "tensorflow/core/util/cuda_solvers.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_square_root_op.cc b/tensorflow/core/kernels/linalg/matrix_square_root_op.cc similarity index 97% rename from tensorflow/core/kernels/matrix_square_root_op.cc rename to tensorflow/core/kernels/linalg/matrix_square_root_op.cc index fe3d3043c26..ce43e358350 100644 --- a/tensorflow/core/kernels/matrix_square_root_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_square_root_op.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/kernels/matrix_triangular_solve_op_complex.cc b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_complex.cc similarity index 92% rename from tensorflow/core/kernels/matrix_triangular_solve_op_complex.cc rename to tensorflow/core/kernels/linalg/matrix_triangular_solve_op_complex.cc index ae3702078a0..27f3e77e29c 100644 --- a/tensorflow/core/kernels/matrix_triangular_solve_op_complex.cc +++ b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_complex.cc @@ -14,7 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/matrix_triangular_solve_op_impl.h" +#include "tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_triangular_solve_op_impl.h b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h similarity index 97% rename from tensorflow/core/kernels/matrix_triangular_solve_op_impl.h rename to tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h index fb7e6f0f5ff..99249f792b6 100644 --- a/tensorflow/core/kernels/matrix_triangular_solve_op_impl.h +++ b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h @@ -15,8 +15,8 @@ limitations under the License. // See docs in ../ops/linalg_ops.cc. // -#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ #include "third_party/eigen3/Eigen/Core" #include "tensorflow/core/framework/kernel_def_builder.h" @@ -24,7 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/kernels/fill_functor.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -38,9 +38,9 @@ limitations under the License. #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if GOOGLE_CUDA -#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/util/cuda_solvers.h" #elif TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/rocm_solvers.h" +#include "tensorflow/core/util/rocm_solvers.h" #endif namespace tensorflow { @@ -434,4 +434,4 @@ struct LaunchBatchMatrixTriangularSolve { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/matrix_triangular_solve_op_real.cc b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_real.cc similarity index 93% rename from tensorflow/core/kernels/matrix_triangular_solve_op_real.cc rename to tensorflow/core/kernels/linalg/matrix_triangular_solve_op_real.cc index 0f92964dd72..71a62441dc4 100644 --- a/tensorflow/core/kernels/matrix_triangular_solve_op_real.cc +++ b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_real.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/matrix_triangular_solve_op_impl.h" +#include "tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h" #if GOOGLE_CUDA #include "third_party/gpus/cuda/include/cuda.h" diff --git a/tensorflow/core/kernels/matrix_triangular_solve_op_test.cc b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_test.cc similarity index 100% rename from tensorflow/core/kernels/matrix_triangular_solve_op_test.cc rename to tensorflow/core/kernels/linalg/matrix_triangular_solve_op_test.cc diff --git a/tensorflow/core/kernels/qr_op_complex128.cc b/tensorflow/core/kernels/linalg/qr_op_complex128.cc similarity index 96% rename from tensorflow/core/kernels/qr_op_complex128.cc rename to tensorflow/core/kernels/linalg/qr_op_complex128.cc index 8a3e3dc0a92..0c14c6d2818 100644 --- a/tensorflow/core/kernels/qr_op_complex128.cc +++ b/tensorflow/core/kernels/linalg/qr_op_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/qr_op_impl.h" +#include "tensorflow/core/kernels/linalg/qr_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/qr_op_complex64.cc b/tensorflow/core/kernels/linalg/qr_op_complex64.cc similarity index 95% rename from tensorflow/core/kernels/qr_op_complex64.cc rename to tensorflow/core/kernels/linalg/qr_op_complex64.cc index 467fa6c2d6a..fc0227ef7f9 100644 --- a/tensorflow/core/kernels/qr_op_complex64.cc +++ b/tensorflow/core/kernels/linalg/qr_op_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/qr_op_impl.h" +#include "tensorflow/core/kernels/linalg/qr_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/qr_op_double.cc b/tensorflow/core/kernels/linalg/qr_op_double.cc similarity index 96% rename from tensorflow/core/kernels/qr_op_double.cc rename to tensorflow/core/kernels/linalg/qr_op_double.cc index 05537a0eaa3..ae00b3e7921 100644 --- a/tensorflow/core/kernels/qr_op_double.cc +++ b/tensorflow/core/kernels/linalg/qr_op_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/qr_op_impl.h" +#include "tensorflow/core/kernels/linalg/qr_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/qr_op_float.cc b/tensorflow/core/kernels/linalg/qr_op_float.cc similarity index 96% rename from tensorflow/core/kernels/qr_op_float.cc rename to tensorflow/core/kernels/linalg/qr_op_float.cc index 6aebd981865..77b8eeb0286 100644 --- a/tensorflow/core/kernels/qr_op_float.cc +++ b/tensorflow/core/kernels/linalg/qr_op_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/qr_op_impl.h" +#include "tensorflow/core/kernels/linalg/qr_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/qr_op_impl.h b/tensorflow/core/kernels/linalg/qr_op_impl.h similarity index 96% rename from tensorflow/core/kernels/qr_op_impl.h rename to tensorflow/core/kernels/linalg/qr_op_impl.h index 535df9d160d..876594bc511 100644 --- a/tensorflow/core/kernels/qr_op_impl.h +++ b/tensorflow/core/kernels/linalg/qr_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_QR_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_QR_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_QR_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_QR_OP_IMPL_H_ // See docs in ../ops/linalg_ops.cc. // @@ -33,7 +33,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -41,11 +41,11 @@ limitations under the License. #if GOOGLE_CUDA #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/kernels/cwise_ops.h" -#include "tensorflow/core/kernels/eye_functor.h" -#include "tensorflow/core/kernels/matrix_band_part_op.h" +#include "tensorflow/core/kernels/linalg/eye_functor.h" +#include "tensorflow/core/kernels/linalg/matrix_band_part_op.h" #include "tensorflow/core/kernels/transpose_functor.h" +#include "tensorflow/core/util/cuda_solvers.h" #endif namespace tensorflow { @@ -299,4 +299,4 @@ class QrOpGpu : public AsyncOpKernel { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_QR_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_QR_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/self_adjoint_eig_op.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_op.cc similarity index 98% rename from tensorflow/core/kernels/self_adjoint_eig_op.cc rename to tensorflow/core/kernels/linalg/self_adjoint_eig_op.cc index cea5883db7b..ebf1955b8ff 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_op.cc +++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_op.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/logging.h" diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_complex128.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex128.cc similarity index 93% rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_complex128.cc rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex128.cc index 4c7a391d56c..424c33a7ac1 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_complex128.cc +++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h" +#include "tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_complex64.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex64.cc similarity index 93% rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_complex64.cc rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex64.cc index 0ec5ec24dd1..bdd20998e3c 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_complex64.cc +++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h" +#include "tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_double.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_double.cc similarity index 92% rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_double.cc rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_double.cc index 7f81bb69021..afc50500d40 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_double.cc +++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h" +#include "tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_float.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_float.cc similarity index 92% rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_float.cc rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_float.cc index bf30952d1e7..1f795777a2e 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_float.cc +++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h" +#include "tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_gpu.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_gpu.cc similarity index 99% rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_gpu.cc rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_gpu.cc index 3a84df07a9a..778c50ff408 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_gpu.cc +++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_gpu.cc @@ -26,12 +26,12 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/kernels/cast_op.h" -#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/kernels/cwise_ops.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_solvers.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h similarity index 91% rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h index b5274f8788b..56f2936a66e 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h +++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ // See docs in ../ops/linalg_ops.cc. @@ -23,7 +23,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/logging.h" @@ -89,4 +89,4 @@ class SelfAdjointEigV2Op : public LinearAlgebraOp { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/svd_op_complex128.cc b/tensorflow/core/kernels/linalg/svd_op_complex128.cc similarity index 93% rename from tensorflow/core/kernels/svd_op_complex128.cc rename to tensorflow/core/kernels/linalg/svd_op_complex128.cc index a0f39418aca..36ac629e38a 100644 --- a/tensorflow/core/kernels/svd_op_complex128.cc +++ b/tensorflow/core/kernels/linalg/svd_op_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/svd_op_impl.h" +#include "tensorflow/core/kernels/linalg/svd_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/svd_op_complex64.cc b/tensorflow/core/kernels/linalg/svd_op_complex64.cc similarity index 93% rename from tensorflow/core/kernels/svd_op_complex64.cc rename to tensorflow/core/kernels/linalg/svd_op_complex64.cc index a8fd50c67d1..50d940b534a 100644 --- a/tensorflow/core/kernels/svd_op_complex64.cc +++ b/tensorflow/core/kernels/linalg/svd_op_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/svd_op_impl.h" +#include "tensorflow/core/kernels/linalg/svd_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/svd_op_double.cc b/tensorflow/core/kernels/linalg/svd_op_double.cc similarity index 93% rename from tensorflow/core/kernels/svd_op_double.cc rename to tensorflow/core/kernels/linalg/svd_op_double.cc index 539dae3a081..85bbe08d8c9 100644 --- a/tensorflow/core/kernels/svd_op_double.cc +++ b/tensorflow/core/kernels/linalg/svd_op_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/svd_op_impl.h" +#include "tensorflow/core/kernels/linalg/svd_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/svd_op_float.cc b/tensorflow/core/kernels/linalg/svd_op_float.cc similarity index 93% rename from tensorflow/core/kernels/svd_op_float.cc rename to tensorflow/core/kernels/linalg/svd_op_float.cc index 03839aa49c3..961d131293b 100644 --- a/tensorflow/core/kernels/svd_op_float.cc +++ b/tensorflow/core/kernels/linalg/svd_op_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/svd_op_impl.h" +#include "tensorflow/core/kernels/linalg/svd_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/svd_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/svd_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc index 482fd057e4e..06d1efe6dd5 100644 --- a/tensorflow/core/kernels/svd_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc @@ -36,14 +36,14 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/eye_functor.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/eye_functor.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_solvers.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/svd_op_impl.h b/tensorflow/core/kernels/linalg/svd_op_impl.h similarity index 95% rename from tensorflow/core/kernels/svd_op_impl.h rename to tensorflow/core/kernels/linalg/svd_op_impl.h index 675826a057c..c43aaaa4b7b 100644 --- a/tensorflow/core/kernels/svd_op_impl.h +++ b/tensorflow/core/kernels/linalg/svd_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_SVD_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_SVD_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_SVD_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_SVD_OP_IMPL_H_ // See docs in ../ops/linalg_ops.cc. // @@ -27,7 +27,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -118,4 +118,4 @@ class SvdOp : public LinearAlgebraOp { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_SVD_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_SVD_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/tridiagonal_matmul_op.cc b/tensorflow/core/kernels/linalg/tridiagonal_matmul_op.cc similarity index 98% rename from tensorflow/core/kernels/tridiagonal_matmul_op.cc rename to tensorflow/core/kernels/linalg/tridiagonal_matmul_op.cc index 3ddf22012de..9d17c574148 100644 --- a/tensorflow/core/kernels/tridiagonal_matmul_op.cc +++ b/tensorflow/core/kernels/linalg/tridiagonal_matmul_op.cc @@ -19,7 +19,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/core/kernels/tridiagonal_matmul_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/tridiagonal_matmul_op_gpu.cu.cc similarity index 96% rename from tensorflow/core/kernels/tridiagonal_matmul_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/tridiagonal_matmul_op_gpu.cu.cc index 1c82cc18e32..a65db40d822 100644 --- a/tensorflow/core/kernels/tridiagonal_matmul_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/tridiagonal_matmul_op_gpu.cu.cc @@ -22,11 +22,11 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #include "tensorflow/core/util/gpu_device_functions.h" #include "tensorflow/core/util/gpu_kernel_helper.h" #include "tensorflow/core/util/gpu_launch_config.h" diff --git a/tensorflow/core/kernels/tridiagonal_solve_op.cc b/tensorflow/core/kernels/linalg/tridiagonal_solve_op.cc similarity index 99% rename from tensorflow/core/kernels/tridiagonal_solve_op.cc rename to tensorflow/core/kernels/linalg/tridiagonal_solve_op.cc index 88931ff3e66..8fe04125f9a 100644 --- a/tensorflow/core/kernels/tridiagonal_solve_op.cc +++ b/tensorflow/core/kernels/linalg/tridiagonal_solve_op.cc @@ -19,7 +19,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/core/kernels/tridiagonal_solve_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/tridiagonal_solve_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/tridiagonal_solve_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/tridiagonal_solve_op_gpu.cu.cc index 089fa8c040f..86514cfb033 100644 --- a/tensorflow/core/kernels/tridiagonal_solve_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/tridiagonal_solve_op_gpu.cu.cc @@ -23,11 +23,11 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #include "tensorflow/core/util/gpu_device_functions.h" #include "tensorflow/core/util/gpu_kernel_helper.h" #include "tensorflow/core/util/gpu_launch_config.h" diff --git a/tensorflow/core/kernels/linalg_ops_common.h b/tensorflow/core/kernels/linalg_ops_common.h index 65c2fb90f0e..0aa69801f19 100644 --- a/tensorflow/core/kernels/linalg_ops_common.h +++ b/tensorflow/core/kernels/linalg_ops_common.h @@ -12,211 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ - #ifndef TENSORFLOW_CORE_KERNELS_LINALG_OPS_COMMON_H_ #define TENSORFLOW_CORE_KERNELS_LINALG_OPS_COMMON_H_ -// Classes to support linear algebra functionality, similar to the numpy.linalg -// module. Supports batch computation on several matrices at once, sharding the -// computations across different threads if necessary. -#include - -#include "third_party/eigen3/Eigen/Core" -#include "tensorflow/core/framework/kernel_def_builder.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/framework/types.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/gtl/inlined_vector.h" -#include "tensorflow/core/platform/types.h" -#include "tensorflow/core/util/work_sharder.h" - -namespace tensorflow { - -// Base class for linear algebra operators. -template -class LinearAlgebraOp : public OpKernel { - public: - explicit LinearAlgebraOp(OpKernelConstruction* context) : OpKernel(context) {} - - void Compute(OpKernelContext* context) override; - - protected: - using TensorShapes = gtl::InlinedVector; - // Returns the number of leading inputs that are to be treated as matrix - // inputs. By default this is all the inputs. Derived classes can override - // this to tell the base class to ignore one or more trailing inputs. - virtual int NumMatrixInputs(const OpKernelContext* context) const { - return context->num_inputs(); - } - - // Returns true if the number of inputs and their shapes are as expected. - // Many ops take a single square input matrix, so we provide that as a default - // implementation for convenience. - virtual void ValidateInputMatrixShapes( - OpKernelContext* context, const TensorShapes& input_matrix_shapes) const { - ValidateSingleSquareMatrix(context, input_matrix_shapes); - } - - // Convenience validators for common cases: - // - // Validate op taking a single matrix A. - static void ValidateSingleMatrix(OpKernelContext* context, - const TensorShapes& input_matrix_shapes); - // Validate op taking a single square matrix A. - static void ValidateSingleSquareMatrix( - OpKernelContext* context, const TensorShapes& input_matrix_shapes); - // Validate op taking two matrices A and B that have the same number of rows. - static void ValidateSolver(OpKernelContext* context, - const TensorShapes& input_matrix_shapes); - // Validate op taking two matrices A and B that have the same number of rows - // and A is square. - static void ValidateSquareSolver(OpKernelContext* context, - const TensorShapes& input_matrix_shapes); - - // Returns the output shapes of each individual matrix operation. Output - // matrices shapes must be rank 0, 1, or 2. Scalar outputs are rank 0. - // - // The derived class may return a number of shapes (N) less than - // context->num_outputs() (M) to indicate that a only leading subset of - // the outputs will be populated. In this case, a dummy scalar tensor with - // value zero will be return for the last M-N outputs. - // - // For many ops, the output dimensions are the same as the input dimensions, - // so we provide that as a default implementation for convenience. - virtual TensorShapes GetOutputMatrixShapes( - const TensorShapes& input_matrix_shapes) const { - return input_matrix_shapes; - } - - // Returns the cost per matrix operation. This is used to determine the - // number of threads to use for parallelizing calls to ComputeMatrix in - // batch mode. Cost per unit is assumed to be roughly 1ns, based on comments - // in core/util/work_sharder.cc. Many linear algebra ops take roughly max(m,n) - // * min(m,n)^2, where the first input matrix is m-by-n. We provide that as a - // default implementation for convenience. - virtual int64 GetCostPerUnit(const TensorShapes& input_matrix_shapes) const { - double m = static_cast(input_matrix_shapes[0].dim_size(0)); - double n = static_cast(input_matrix_shapes[0].dim_size(1)); - double cost = std::max(m, n) * std::min(m, n) * std::min(m, n); - return cost >= static_cast(kint64max) ? kint64max - : static_cast(cost); - } - - // Returns true if it is safe to forward (alias) input to output buffer - // and expect the kernel to perform the computation inplace. - virtual bool EnableInputForwarding() const { return true; } - - using InputMatrix = Eigen::Matrix; - using InputConstMatrixMap = Eigen::Map; - using InputMatrixMap = Eigen::Map; - using InputConstVectorMap = - Eigen::Map>; - using InputConstMatrixMaps = gtl::InlinedVector; - using InputMatrixMaps = gtl::InlinedVector; - using InputRealScalar = typename Eigen::NumTraits::Real; - - using OutputMatrix = Eigen::Matrix; - using OutputConstMatrixMap = Eigen::Map; - using OutputMatrixMap = Eigen::Map; - using OutputConstVectorMap = - Eigen::Map>; - using OutputConstMatrixMaps = gtl::InlinedVector; - using OutputMatrixMaps = gtl::InlinedVector; - using OutputRealScalar = typename Eigen::NumTraits::Real; - - // backward compatibility - using Scalar = OutputScalar; - using Matrix = - Eigen::Matrix; - using ConstMatrixMap = Eigen::Map; - using MatrixMap = Eigen::Map; - using ConstVectorMap = - Eigen::Map>; - using ConstMatrixMaps = gtl::InlinedVector; - using MatrixMaps = gtl::InlinedVector; - using RealScalar = typename Eigen::NumTraits::Real; - - // Performs a single matrix computation given input matrices, and - // stores the result in outputs. For batch operations, this will be called - // repeatedly for a single call to Compute() when multiple matrices exist in - // input Tensors with rank > 2. In this case the calls to ComputeMatrix are - // parallelized. The number of threads used is determined by a cost model from - // the value returned by GetCostPerUnit(). - virtual void ComputeMatrix(OpKernelContext* context, - const InputConstMatrixMaps& inputs, - OutputMatrixMaps* outputs) = 0; - - private: - using TensorInputs = gtl::InlinedVector; - using TensorOutputs = gtl::InlinedVector; - // This function maps 2-d slices (matrices) of the input and output tensors - // using Eigen::Map and calls ComputeMatrix implemented in terms of the - // Eigen::MatrixBase API by the derived class. - // - // The 'matrix_index' parameter specifies the index of the matrix to be used - // from each input tensor, and the index of the matrix to be written to each - // output tensor. The input matrices are in row major order, and located at - // the memory addresses - // inputs[i].flat().data() + - // matrix_index * input_matrix_shapes[i].num_elements() - // for i in 0...inputs.size()-1. - // The output matrices are in row major order, and located at the memory - // address - // outputs[i]->flat().data() + - // matrix_index * output_matrix_shapes[i].num_elements(). - // for i in 0...outputs.size()-1. - // - void ComputeTensorSlice(OpKernelContext* context, int64 matrix_index, - const TensorInputs& inputs, - const TensorShapes& input_matrix_shapes, - const TensorOutputs& outputs, - const TensorShapes& output_matrix_shapes); - - void AnalyzeInputs(OpKernelContext* context, TensorInputs* inputs, - TensorShapes* input_matrix_shapes, - TensorShape* batch_shape); - - void PrepareOutputs(OpKernelContext* context, - const TensorShapes& input_matrix_shapes, - const TensorShape& batch_shape, TensorOutputs* outputs, - TensorShapes* output_matrix_shapes); -}; - -// Declare LinearAlgebraOp, which is explicitly instantiated in -// linalg_ops_common.cc for float, double, complex64, and complex128. -extern template class LinearAlgebraOp; -extern template class LinearAlgebraOp; -extern template class LinearAlgebraOp; -extern template class LinearAlgebraOp; - -} // namespace tensorflow - -#define INHERIT_LINALG_TYPEDEFS(Scalar) \ - typedef LinearAlgebraOp Base; \ - using RealScalar = typename Eigen::NumTraits::Real; \ - using Matrix = typename Base::Matrix; \ - using MatrixMap = typename Base::MatrixMap; \ - using MatrixMaps = typename Base::MatrixMaps; \ - using ConstMatrixMap = typename Base::ConstMatrixMap; \ - using ConstMatrixMaps = typename Base::ConstMatrixMaps; \ - using ConstVectorMap = typename Base::ConstVectorMap; \ - using TensorShapes = typename Base::TensorShapes; - -#define REGISTER_LINALG_OP_CPU(OpName, OpClass, Scalar) \ - REGISTER_KERNEL_BUILDER( \ - Name(OpName).Device(DEVICE_CPU).TypeConstraint("T"), OpClass) - -#define REGISTER_LINALG_OP_GPU(OpName, OpClass, Scalar) \ - REGISTER_KERNEL_BUILDER( \ - Name(OpName).Device(DEVICE_GPU).TypeConstraint("T"), OpClass) - -// Deprecated, use one of the device-specific macros above. -#define REGISTER_LINALG_OP(OpName, OpClass, Scalar) \ - REGISTER_LINALG_OP_CPU(OpName, OpClass, Scalar) +// Temporary forwarding header. +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #endif // TENSORFLOW_CORE_KERNELS_LINALG_OPS_COMMON_H_ diff --git a/tensorflow/core/kernels/segment_reduction_ops_impl.h b/tensorflow/core/kernels/segment_reduction_ops_impl.h index 6c3fad668ae..7cf15ef5b72 100644 --- a/tensorflow/core/kernels/segment_reduction_ops_impl.h +++ b/tensorflow/core/kernels/segment_reduction_ops_impl.h @@ -45,13 +45,13 @@ limitations under the License. #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if GOOGLE_CUDA -#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/util/cuda_solvers.h" #include "tensorflow/stream_executor/cuda/cuda_activation.h" using stream_executor::cuda::ScopedActivateExecutorContext; #elif TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/platform/rocm.h" +#include "tensorflow/core/util/cuda_solvers.h" using stream_executor::rocm::ScopedActivateExecutorContext; #endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/sparse/BUILD b/tensorflow/core/kernels/sparse/BUILD index 1d281bc1d61..bfb6c4934bb 100644 --- a/tensorflow/core/kernels/sparse/BUILD +++ b/tensorflow/core/kernels/sparse/BUILD @@ -80,8 +80,8 @@ tf_kernel_library( "//tensorflow/core/kernels:transpose_functor", "//tensorflow/core/kernels:gpu_prim_hdrs", ] + if_cuda_or_rocm([ - "//tensorflow/core/kernels:cuda_solvers", - "//tensorflow/core/kernels:cuda_sparse", + "//tensorflow/core/util:cuda_solvers", + "//tensorflow/core/util:cuda_sparse", ]), alwayslink = 1, ) diff --git a/tensorflow/core/kernels/sparse/add_op.cc b/tensorflow/core/kernels/sparse/add_op.cc index b6265a1412c..06fe1cd042e 100644 --- a/tensorflow/core/kernels/sparse/add_op.cc +++ b/tensorflow/core/kernels/sparse/add_op.cc @@ -32,8 +32,8 @@ limitations under the License. #include "tensorflow/core/kernels/fill_functor.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/conj_op.cc b/tensorflow/core/kernels/sparse/conj_op.cc index 7275262c1f0..147160fbe6c 100644 --- a/tensorflow/core/kernels/sparse/conj_op.cc +++ b/tensorflow/core/kernels/sparse/conj_op.cc @@ -32,8 +32,8 @@ limitations under the License. #include "tensorflow/core/kernels/sparse/sparse_matrix.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc index 364c2c07bd8..2e5afbdcad7 100644 --- a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc +++ b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc @@ -34,8 +34,8 @@ limitations under the License. #include "tensorflow/core/util/work_sharder.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc index 55ebfa4fc10..a81ccfa562e 100644 --- a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc +++ b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc @@ -32,8 +32,8 @@ limitations under the License. #include "tensorflow/core/util/work_sharder.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc b/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc index 459bb219343..5c62a44f9ba 100644 --- a/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc +++ b/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc @@ -35,8 +35,8 @@ limitations under the License. #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif #if GOOGLE_CUDA diff --git a/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc b/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc index 1c014db3d0a..6b11e64307a 100644 --- a/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc +++ b/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc @@ -20,13 +20,13 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/kernels/cuda_sparse.h" #include "tensorflow/core/kernels/gpu_device_array.h" #include "tensorflow/core/kernels/gpu_device_array_gpu.h" #include "tensorflow/core/kernels/gpu_prim.h" #include "tensorflow/core/kernels/sparse/kernels.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_sparse.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/mat_mul_op.cc b/tensorflow/core/kernels/sparse/mat_mul_op.cc index 50fa0ec88ea..bf9de570fbf 100644 --- a/tensorflow/core/kernels/sparse/mat_mul_op.cc +++ b/tensorflow/core/kernels/sparse/mat_mul_op.cc @@ -37,8 +37,8 @@ limitations under the License. #include "tensorflow/core/platform/threadpool.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/mul_op.cc b/tensorflow/core/kernels/sparse/mul_op.cc index 33c3756ce58..d08f1568db1 100644 --- a/tensorflow/core/kernels/sparse/mul_op.cc +++ b/tensorflow/core/kernels/sparse/mul_op.cc @@ -29,7 +29,7 @@ limitations under the License. #include "tensorflow/core/kernels/sparse/sparse_matrix.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/nnz_op.cc b/tensorflow/core/kernels/sparse/nnz_op.cc index ebc48c3e9a4..d67620443f0 100644 --- a/tensorflow/core/kernels/sparse/nnz_op.cc +++ b/tensorflow/core/kernels/sparse/nnz_op.cc @@ -29,8 +29,8 @@ limitations under the License. #include "tensorflow/core/kernels/sparse/sparse_matrix.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/softmax_op.cc b/tensorflow/core/kernels/sparse/softmax_op.cc index 25025bfe2a6..f1a5db8d0f0 100644 --- a/tensorflow/core/kernels/sparse/softmax_op.cc +++ b/tensorflow/core/kernels/sparse/softmax_op.cc @@ -20,7 +20,7 @@ limitations under the License. #define EIGEN_USE_THREADS #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_sparse.h" #define EIGEN_USE_GPU #endif diff --git a/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc b/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc index fb652e13d15..fecee9e4555 100644 --- a/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc +++ b/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc @@ -36,8 +36,8 @@ limitations under the License. #include "tensorflow/core/util/work_sharder.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc b/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc index 59540f63846..2eaf9bd5310 100644 --- a/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc +++ b/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc @@ -30,8 +30,8 @@ limitations under the License. #include "tensorflow/core/kernels/sparse/sparse_matrix.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc b/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc index e1a4b4194d2..2548ceaa57c 100644 --- a/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc +++ b/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc @@ -33,8 +33,8 @@ limitations under the License. #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif #if GOOGLE_CUDA diff --git a/tensorflow/core/kernels/sparse/transpose_op.cc b/tensorflow/core/kernels/sparse/transpose_op.cc index 3158eb5016d..08d37fa1692 100644 --- a/tensorflow/core/kernels/sparse/transpose_op.cc +++ b/tensorflow/core/kernels/sparse/transpose_op.cc @@ -20,7 +20,7 @@ limitations under the License. #define EIGEN_USE_THREADS #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_sparse.h" #define EIGEN_USE_GPU #endif diff --git a/tensorflow/core/kernels/where_op.cc b/tensorflow/core/kernels/where_op.cc index 598cb526d77..d504ec9b2ed 100644 --- a/tensorflow/core/kernels/where_op.cc +++ b/tensorflow/core/kernels/where_op.cc @@ -39,7 +39,7 @@ limitations under the License. #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" -#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/util/cuda_solvers.h" #if GOOGLE_CUDA #include "tensorflow/stream_executor/cuda/cuda_activation.h" using stream_executor::cuda::ScopedActivateExecutorContext; diff --git a/tensorflow/core/util/BUILD b/tensorflow/core/util/BUILD index bb2b9ff429e..d33d239e928 100644 --- a/tensorflow/core/util/BUILD +++ b/tensorflow/core/util/BUILD @@ -14,6 +14,7 @@ load( "tf_copts", "tf_cuda_library", "tf_cuda_only_cc_test", + "tf_kernel_library", ) load("//tensorflow:tensorflow.bzl", "tf_version_info_genrule") load( @@ -24,6 +25,11 @@ load( "//tensorflow/core/platform:build_config_root.bzl", "if_static", ) +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") +load( + "@local_config_rocm//rocm:build_defs.bzl", + "if_rocm", +) default_package_visibility = [ "//tensorflow/core:__subpackages__", @@ -567,6 +573,63 @@ cc_library( ], ) +tf_kernel_library( + name = "cuda_solvers", + srcs = ["cuda_solvers.cc"], + hdrs = ["cuda_solvers.h"], + # @local_config_cuda//cuda:cusolver_static, //third_party/eigen3:blas, + # and //third_party/libf2c all contain various parts of BLAS, LAPACK, + # and f2c helper functions in global namespace. Tell the compiler to + # allow multiple definitions when linking this. + linkopts = select({ + "//tensorflow:macos": [], + "//tensorflow:windows": [], + "//conditions:default": ["-Wl,-z,muldefs"], + }), + visibility = ["//tensorflow/core/kernels:friends"], + deps = [ + # "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/platform/default/build_config:cublas_plugin", + "//tensorflow/stream_executor/cuda:cublas_lib", + "//tensorflow/stream_executor/cuda:cusolver_lib", + ], +) + +tf_kernel_library( + name = "rocm_solvers", + srcs = ["rocm_solvers.cc"], + hdrs = ["rocm_solvers.h"], + visibility = ["//tensorflow/core/kernels:friends"], + deps = [ + # "//tensorflow/core:framework", + # "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/stream_executor/lib", + "//tensorflow/stream_executor/platform:dso_loader", + "//tensorflow/stream_executor/rocm:rocblas_plugin", + "//tensorflow/stream_executor/rocm:rocm_gpu_executor", + ] + if_rocm([ + "@local_config_rocm//rocm:rocprim", + ]), +) + +tf_kernel_library( + name = "cuda_sparse", + srcs = if_cuda(["cuda_sparse.cc"]) + if_rocm(["rocm_sparse.cc"]), + hdrs = ["cuda_sparse.h"], + deps = [ + ":cuda_solvers", + # "//tensorflow/core:framework", + "//tensorflow/core:lib", + ] + if_cuda([ + "//tensorflow/stream_executor/cuda:cusparse_lib", + "@cub_archive//:cub", + ]) + if_rocm([ + "@local_config_rocm//rocm:hipsparse", + ]), +) + # Tests. tf_cc_test( diff --git a/tensorflow/core/kernels/cuda_solvers.cc b/tensorflow/core/util/cuda_solvers.cc similarity index 99% rename from tensorflow/core/kernels/cuda_solvers.cc rename to tensorflow/core/util/cuda_solvers.cc index f41ce2a5d27..3e4d2a05ac6 100644 --- a/tensorflow/core/kernels/cuda_solvers.cc +++ b/tensorflow/core/util/cuda_solvers.cc @@ -14,7 +14,7 @@ ============================================================================== */ #ifdef GOOGLE_CUDA -#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/util/cuda_solvers.h" #include #include diff --git a/tensorflow/core/kernels/cuda_solvers.h b/tensorflow/core/util/cuda_solvers.h similarity index 99% rename from tensorflow/core/kernels/cuda_solvers.h rename to tensorflow/core/util/cuda_solvers.h index eb1d5c8a200..46bd7b42dc6 100644 --- a/tensorflow/core/kernels/cuda_solvers.h +++ b/tensorflow/core/util/cuda_solvers.h @@ -14,8 +14,8 @@ limitations under the License. ============================================================================== */ -#ifndef TENSORFLOW_CORE_KERNELS_CUDA_SOLVERS_H_ -#define TENSORFLOW_CORE_KERNELS_CUDA_SOLVERS_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SOLVERS_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SOLVERS_H_ // This header declares the class CudaSolver, which contains wrappers of linear // algebra solvers in the cuBlas and cuSolverDN libraries for use in TensorFlow @@ -489,4 +489,4 @@ inline DeviceLapackInfo CudaSolver::GetDeviceLapackInfo( #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#endif // TENSORFLOW_CORE_KERNELS_CUDA_SOLVERS_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SOLVERS_H_ diff --git a/tensorflow/core/kernels/cuda_sparse.cc b/tensorflow/core/util/cuda_sparse.cc similarity index 99% rename from tensorflow/core/kernels/cuda_sparse.cc rename to tensorflow/core/util/cuda_sparse.cc index 141aae61571..47e018560e1 100644 --- a/tensorflow/core/kernels/cuda_sparse.cc +++ b/tensorflow/core/util/cuda_sparse.cc @@ -15,7 +15,7 @@ limitations under the License. #ifdef GOOGLE_CUDA -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_sparse.h" #include #include @@ -28,7 +28,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/lib/core/blocking_counter.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/stringpiece.h" @@ -38,6 +37,7 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_solvers.h" // TODO(rmlarsen,penporn): Investigate using newer kernels in CUDA 10.1+. diff --git a/tensorflow/core/kernels/cuda_sparse.h b/tensorflow/core/util/cuda_sparse.h similarity index 99% rename from tensorflow/core/kernels/cuda_sparse.h rename to tensorflow/core/util/cuda_sparse.h index 978bc9005ed..813e87610dd 100644 --- a/tensorflow/core/kernels/cuda_sparse.h +++ b/tensorflow/core/util/cuda_sparse.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_CUDA_SPARSE_H_ -#define TENSORFLOW_CORE_KERNELS_CUDA_SPARSE_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SPARSE_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SPARSE_H_ // This header declares the class GpuSparse, which contains wrappers of // cuSparse libraries for use in TensorFlow kernels. @@ -75,8 +75,7 @@ using gpuStream_t = hipStream_t; namespace tensorflow { -inline std::string ConvertGPUSparseErrorToString( - const gpusparseStatus_t status) { +inline string ConvertGPUSparseErrorToString(const gpusparseStatus_t status) { switch (status) { #define STRINGIZE(q) #q #define RETURN_IF_STATUS(err) \ @@ -585,4 +584,4 @@ class GpuSparseCsrSortingConversionInfo { #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#endif // TENSORFLOW_CORE_KERNELS_CUDA_SPARSE_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SPARSE_H_ diff --git a/tensorflow/core/kernels/rocm_solvers.cc b/tensorflow/core/util/rocm_solvers.cc similarity index 99% rename from tensorflow/core/kernels/rocm_solvers.cc rename to tensorflow/core/util/rocm_solvers.cc index 5faf718332e..13dadf602a7 100644 --- a/tensorflow/core/kernels/rocm_solvers.cc +++ b/tensorflow/core/util/rocm_solvers.cc @@ -14,7 +14,7 @@ ============================================================================== */ #if TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/rocm_solvers.h" +#include "tensorflow/core/util/rocm_solvers.h" #include #include diff --git a/tensorflow/core/kernels/rocm_solvers.h b/tensorflow/core/util/rocm_solvers.h similarity index 96% rename from tensorflow/core/kernels/rocm_solvers.h rename to tensorflow/core/util/rocm_solvers.h index 94d3c82a497..afc8b936d05 100644 --- a/tensorflow/core/kernels/rocm_solvers.h +++ b/tensorflow/core/util/rocm_solvers.h @@ -14,8 +14,8 @@ limitations under the License. ============================================================================== */ -#ifndef TENSORFLOW_CORE_KERNELS_ROCM_SOLVERS_H_ -#define TENSORFLOW_CORE_KERNELS_ROCM_SOLVERS_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_ROCM_SOLVERS_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_ROCM_SOLVERS_H_ // This header declares the class ROCmSolver, which contains wrappers of linear // algebra solvers in the cuBlas and cuSolverDN libraries for use in TensorFlow @@ -158,4 +158,4 @@ class ScratchSpace { #endif // TENSORFLOW_USE_ROCM -#endif // TENSORFLOW_CORE_KERNELS_ROCM_SOLVERS_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_ROCM_SOLVERS_H_ diff --git a/tensorflow/core/kernels/rocm_sparse.cc b/tensorflow/core/util/rocm_sparse.cc similarity index 99% rename from tensorflow/core/kernels/rocm_sparse.cc rename to tensorflow/core/util/rocm_sparse.cc index 97488692bc1..cc7b56fdc01 100644 --- a/tensorflow/core/kernels/rocm_sparse.cc +++ b/tensorflow/core/util/rocm_sparse.cc @@ -24,8 +24,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" #include "tensorflow/core/lib/core/blocking_counter.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/stringpiece.h" @@ -35,6 +33,8 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" namespace tensorflow { namespace { From 4e118127252147ab755b1f92c8127f93564205f8 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Fri, 31 Jul 2020 14:12:22 -0700 Subject: [PATCH 1895/2522] Introduce functional<->region conversion passes around extract outside compilation - Follow functional->region transformation with a inlining pass to make sure calls generated by the transform get inlined. PiperOrigin-RevId: 324282111 Change-Id: Ifaacec3d8919f390fdeda8ca9af129d8e7dce086 --- .../mlir/tensorflow/transforms/bridge.cc | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc index cb1dd2332a8..ed0528ae054 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc @@ -82,15 +82,23 @@ void CreateTPUBridgePipeline(OpPassManager &pm) { // Run shape inference so that tf_executor/tf_device ops created later will // likely to inherit more concrete types. pm.addPass(TF::CreateTFShapeInferencePass()); - OpPassManager &func_pm = pm.nest(); - func_pm.addPass(CreateTPUClusterFormationPass()); - // Place DecomposeResourceOpsPass before TFExecutorConstantSinking pass - // because DecomposeResourceOpsPass uses pattern rewriter which hoists - // changed constants out of tf_device.Launch. - func_pm.addPass(TFDevice::CreateDecomposeResourceOpsPass()); - func_pm.addPass(CreateTPUHostComputationExpansionPass()); - pm.addNestedPass(CreateTPUUpdateEmbeddingEnqueueOpInputsPass()); + // Encode this in its own scope so that func_pm is not mistakenly used + // later on. + { + OpPassManager &func_pm = pm.nest(); + func_pm.addPass(CreateTPUClusterFormationPass()); + // Place DecomposeResourceOpsPass before TFExecutorConstantSinking pass + // because DecomposeResourceOpsPass uses pattern rewriter which hoists + // changed constants out of tf_device.Launch. + func_pm.addPass(TFDevice::CreateDecomposeResourceOpsPass()); + func_pm.addPass(CreateTPUHostComputationExpansionPass()); + func_pm.addPass(CreateTPUUpdateEmbeddingEnqueueOpInputsPass()); + } + pm.addPass(TF::CreateTFFunctionalControlFlowToRegions()); + pm.addPass(mlir::createInlinerPass()); pm.addPass(CreateTPUExtractHeadTailOutsideCompilationPass()); + pm.addPass(TF::CreateTFRegionControlFlowToFunctional()); + // Run another shape inference pass because resource decomposition might have // created new partial types. pm.addPass(TF::CreateTFShapeInferencePass()); From cbc7ac0e8c1ef93a3a09650fb77a06774911dcea Mon Sep 17 00:00:00 2001 From: Jonathan Chu Date: Fri, 31 Jul 2020 21:58:53 +0000 Subject: [PATCH 1896/2522] Modify _convert_inputs_to_signature, return empty list for flat_kwargs --- tensorflow/python/eager/function.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 41b9bbaa13e..8c5815e21b3 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -2698,11 +2698,11 @@ class FunctionSpec(object): return inputs, kwargs, flat_inputs, flat_kwargs else: assert not kwargs - inputs = _convert_inputs_to_signature( + inputs, flat_inputs = _convert_inputs_to_signature( inputs, self._input_signature, self._flat_input_signature) - return inputs, {}, flat_inputs, {} ## TODO(jlchu): Check if last should be a dict + return inputs, {}, flat_inputs, [] def _as_ndarray(value): @@ -2802,7 +2802,7 @@ def _convert_inputs_to_signature(inputs, input_signature, flat_input_signature): flat_sequence=flatten_inputs, expand_composites=True) - return inputs + return inputs, flatten_inputs class FunctionCache(object): From 781ff0196ce5a877ec17655157fa681c3cb1e027 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 31 Jul 2020 14:15:03 -0700 Subject: [PATCH 1897/2522] Change the namespace for core boosted trees quantiles. PiperOrigin-RevId: 324282593 Change-Id: I047b5e2ea221a5d9eaf920b5b46317959c6b4207 --- .../core/kernels/boosted_trees/quantile_ops.cc | 12 ++++-------- .../quantiles/quantile_stream_resource.h | 2 +- .../quantiles/weighted_quantiles_buffer.h | 4 ++-- .../quantiles/weighted_quantiles_buffer_test.cc | 6 +++--- .../quantiles/weighted_quantiles_stream.h | 4 ++-- .../quantiles/weighted_quantiles_stream_test.cc | 9 +++++---- .../quantiles/weighted_quantiles_summary.h | 4 ++-- .../quantiles/weighted_quantiles_summary_test.cc | 14 +++++++------- 8 files changed, 26 insertions(+), 29 deletions(-) diff --git a/tensorflow/core/kernels/boosted_trees/quantile_ops.cc b/tensorflow/core/kernels/boosted_trees/quantile_ops.cc index ad1e5a47f00..0065bdd66aa 100644 --- a/tensorflow/core/kernels/boosted_trees/quantile_ops.cc +++ b/tensorflow/core/kernels/boosted_trees/quantile_ops.cc @@ -51,12 +51,12 @@ const char* const kResourceHandleName = "quantile_stream_resource_handle"; using QuantileStreamResource = BoostedTreesQuantileStreamResource; using QuantileStream = - core_boosted_trees::quantiles::WeightedQuantilesStream; + boosted_trees::quantiles::WeightedQuantilesStream; using QuantileSummary = - core_boosted_trees::quantiles::WeightedQuantilesSummary; + boosted_trees::quantiles::WeightedQuantilesSummary; using QuantileSummaryEntry = - core_boosted_trees::quantiles::WeightedQuantilesSummary< - float, float>::SummaryEntry; + boosted_trees::quantiles::WeightedQuantilesSummary::SummaryEntry; // Generates quantiles on a finalized QuantileStream. std::vector GenerateBoundaries(const QuantileStream& stream, @@ -421,10 +421,6 @@ class BoostedTreesQuantileStreamResourceFlushOp : public OpKernel { generate_quantiles_ ? GenerateQuantiles(*stream, num_buckets) : GenerateBoundaries(*stream, num_buckets), stream_idx); - VLOG(1) << "Generated " - << stream_resource->boundaries(stream_idx).size() - << " boundaries. Num buckets: " << num_buckets - << " Generate quantiles: " << generate_quantiles_; } }; diff --git a/tensorflow/core/kernels/boosted_trees/quantiles/quantile_stream_resource.h b/tensorflow/core/kernels/boosted_trees/quantiles/quantile_stream_resource.h index d2e3720aff5..10afc9ee618 100644 --- a/tensorflow/core/kernels/boosted_trees/quantiles/quantile_stream_resource.h +++ b/tensorflow/core/kernels/boosted_trees/quantiles/quantile_stream_resource.h @@ -24,7 +24,7 @@ namespace tensorflow { using QuantileStream = - core_boosted_trees::quantiles::WeightedQuantilesStream; + boosted_trees::quantiles::WeightedQuantilesStream; // Quantile Stream Resource for a list of streams sharing the same number of // quantiles, maximum elements, and epsilon. diff --git a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_buffer.h b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_buffer.h index 5f7f74ec7bb..07aa9831c44 100644 --- a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_buffer.h +++ b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_buffer.h @@ -23,7 +23,7 @@ #include "tensorflow/core/platform/types.h" namespace tensorflow { -namespace core_boosted_trees { +namespace boosted_trees { namespace quantiles { // Buffering container ideally suited for scenarios where we need @@ -126,7 +126,7 @@ constexpr decltype(CompareFn()) WeightedQuantilesBuffer::kCompFn; } // namespace quantiles -} // namespace core_boosted_trees +} // namespace boosted_trees } // namespace tensorflow #endif // TENSORFLOW_CORE_KERNELS_BOOSTED_TREES_QUANTILES_WEIGHTED_QUANTILES_BUFFER_H_ diff --git a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_buffer_test.cc b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_buffer_test.cc index d99a6816f9b..29e28811225 100644 --- a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_buffer_test.cc +++ b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_buffer_test.cc @@ -22,10 +22,10 @@ namespace tensorflow { namespace { using Buffer = - core_boosted_trees::quantiles::WeightedQuantilesBuffer; + boosted_trees::quantiles::WeightedQuantilesBuffer; using BufferEntry = - core_boosted_trees::quantiles::WeightedQuantilesBuffer::BufferEntry; + boosted_trees::quantiles::WeightedQuantilesBuffer::BufferEntry; class WeightedQuantilesBufferTest : public ::testing::Test {}; diff --git a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_stream.h b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_stream.h index 6f195bc4dd2..1e8d8d5e22d 100644 --- a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_stream.h +++ b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_stream.h @@ -24,7 +24,7 @@ #include "tensorflow/core/platform/types.h" namespace tensorflow { -namespace core_boosted_trees { +namespace boosted_trees { namespace quantiles { // Class to compute approximate quantiles with error bound guarantees for @@ -326,7 +326,7 @@ WeightedQuantilesStream::GetQuantileSpecs( } } // namespace quantiles -} // namespace core_boosted_trees +} // namespace boosted_trees } // namespace tensorflow #endif // TENSORFLOW_CORE_KERNELS_BOOSTED_TREES_QUANTILES_WEIGHTED_QUANTILES_STREAM_H_ diff --git a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_stream_test.cc b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_stream_test.cc index 59173bfc2c5..6c5b9fd23bf 100644 --- a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_stream_test.cc +++ b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_stream_test.cc @@ -23,11 +23,12 @@ namespace { using Tuple = std::tuple; using Summary = - core_boosted_trees::quantiles::WeightedQuantilesSummary; -using SummaryEntry = core_boosted_trees::quantiles::WeightedQuantilesSummary< - double, double>::SummaryEntry; + boosted_trees::quantiles::WeightedQuantilesSummary; +using SummaryEntry = + boosted_trees::quantiles::WeightedQuantilesSummary::SummaryEntry; using Stream = - core_boosted_trees::quantiles::WeightedQuantilesStream; + boosted_trees::quantiles::WeightedQuantilesStream; TEST(GetQuantileSpecs, InvalidEps) { EXPECT_DEATH({ Stream::GetQuantileSpecs(-0.01, 0L); }, "eps >= 0"); diff --git a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_summary.h b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_summary.h index ca8eb5fd266..a22af7ab71e 100644 --- a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_summary.h +++ b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_summary.h @@ -22,7 +22,7 @@ #include "tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_buffer.h" namespace tensorflow { -namespace core_boosted_trees { +namespace boosted_trees { namespace quantiles { // Summary holding a sorted block of entries with upper bound guarantees @@ -366,7 +366,7 @@ constexpr decltype(CompareFn()) WeightedQuantilesSummary::kCompFn; } // namespace quantiles -} // namespace core_boosted_trees +} // namespace boosted_trees } // namespace tensorflow #endif // TENSORFLOW_CORE_KERNELS_BOOSTED_TREES_QUANTILES_WEIGHTED_QUANTILES_SUMMARY_H_ diff --git a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_summary_test.cc b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_summary_test.cc index 0f8d1a3cbfd..ccd1215cf49 100644 --- a/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_summary_test.cc +++ b/tensorflow/core/kernels/boosted_trees/quantiles/weighted_quantiles_summary_test.cc @@ -21,15 +21,15 @@ namespace tensorflow { namespace { -using Buffer = - core_boosted_trees::quantiles::WeightedQuantilesBuffer; +using Buffer = boosted_trees::quantiles::WeightedQuantilesBuffer; using BufferEntry = - core_boosted_trees::quantiles::WeightedQuantilesBuffer::BufferEntry; + boosted_trees::quantiles::WeightedQuantilesBuffer::BufferEntry; using Summary = - core_boosted_trees::quantiles::WeightedQuantilesSummary; -using SummaryEntry = core_boosted_trees::quantiles::WeightedQuantilesSummary< - float, float>::SummaryEntry; + boosted_trees::quantiles::WeightedQuantilesSummary; +using SummaryEntry = + boosted_trees::quantiles::WeightedQuantilesSummary::SummaryEntry; class WeightedQuantilesSummaryTest : public ::testing::Test { protected: From 934b4b6a35a22ec203be4dac3a3bdf4e64497fff Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 31 Jul 2020 14:21:44 -0700 Subject: [PATCH 1898/2522] Move scalar multiply to the smaller side of convolution. PiperOrigin-RevId: 324283914 Change-Id: I40f5f8cbf47e4c60997ed03bbff114f5f17519b4 --- .../xla/service/algebraic_simplifier.cc | 199 ------------------ .../xla/service/algebraic_simplifier.h | 12 -- .../xla/service/algebraic_simplifier_test.cc | 53 ----- 3 files changed, 264 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index d77e62a1357..1f82c062df9 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -428,10 +428,6 @@ class AlgebraicSimplifierVisitor : public DfsHloRewriteVisitor { shape, hlo, zero, dims, AddReduce_computation)); } - // Move scalar multiply to the smallest side of convolution to - // reduce multiply computations. - Status ScalarMultiplyReduction(HloInstruction* dot); - // Convenience method for replacing an instruction with a bitcast. If operand // is not null, then the bitcast will use the specified operand instead of the // operand of the instruction. @@ -567,197 +563,6 @@ bool AlgebraicSimplifierVisitor::SameShape(const HloInstruction* lhs, } } -namespace { - -float GetConstantValue(HloInstruction* inst) { - switch (inst->shape().element_type()) { - case BF16: - return static_cast(inst->literal().GetFirstElement()); - case F32: - return inst->literal().GetFirstElement(); - default: - LOG(FATAL) << "Unsupported data type: " << inst->shape().element_type(); - } -} - -bool IsOpCodeMultiplyCommutative(HloOpcode opcode) { - switch (opcode) { - case HloOpcode::kMultiply: - case HloOpcode::kTranspose: - case HloOpcode::kReshape: - case HloOpcode::kSelect: - return true; - default: - return false; - } -} - -std::unique_ptr MakeScalarInstruction(HloInstruction* target, - float multiplier) { - switch (target->shape().element_type()) { - case BF16: - return HloInstruction::CreateConstant(LiteralUtil::ConvertF32ToBF16( - LiteralUtil::CreateR0(multiplier))); - break; - case F32: - return HloInstruction::CreateConstant( - LiteralUtil::CreateR0(multiplier)); - break; - default: - LOG(FATAL) << "Unsupported data type: " << target->shape().element_type(); - } -} - -} // namespace - -Status AlgebraicSimplifierVisitor::ScalarMultiplyReduction( - HloInstruction* dot) { - // We only process bfloat16 and float32 for now. - if (dot->shape().element_type() != BF16 && - dot->shape().element_type() != F32) { - return Status::OK(); - } - - auto lhs = dot->mutable_operand(0); - auto rhs = dot->mutable_operand(1); - - const int64 dot_size = ShapeUtil::ElementsIn(dot->shape()); - const int64 lhs_size = ShapeUtil::ElementsIn(lhs->shape()); - const int64 rhs_size = ShapeUtil::ElementsIn(rhs->shape()); - - HloInstruction* target = nullptr; - // (current node, user, operand_index) - std::vector> operands; - std::vector users; - - // Find which side of dot has the smallest size: - // operand 0, operand 1, or output. - if (dot_size <= std::min(lhs_size, rhs_size)) { - target = dot; - if (dot_size < lhs_size) { - operands.emplace_back(lhs, dot, 0); - } - if (dot_size < rhs_size) { - operands.emplace_back(rhs, dot, 1); - } - } else if (lhs_size <= rhs_size) { - target = lhs; - if (lhs_size < rhs_size) { - operands.emplace_back(rhs, dot, 1); - } - if (lhs_size < dot_size && dot->user_count() == 1) { - users.push_back(dot->users().front()); - } - } else { - target = rhs; - if (rhs_size < lhs_size) { - operands.emplace_back(lhs, dot, 0); - } - if (rhs_size < dot_size && dot->user_count() == 1) { - users.push_back(dot->users().front()); - } - } - - std::vector values; - - // DFS to find scalar multiply ops from the operands. - while (!operands.empty()) { - auto [inst, user, index] = operands.back(); - operands.pop_back(); - - // Skip the op types that are not commutative with multiply. - if (!IsOpCodeMultiplyCommutative(inst->opcode())) { - continue; - } - - HloInstruction* operand; - HloInstruction* multiplier; - // Pattern match a scalar multiply. - if (Match(inst, m::MultiplyAnyOrder( - m::Op(&operand), - m::Broadcast(m::ConstantScalar(&multiplier))))) { - CHECK_LT(index, user->operand_count()); - CHECK_EQ(inst, user->operands()[index]); - - // When found a scalar multiply, save its scalar value. - values.push_back(GetConstantValue(multiplier)); - // And remove the scalar multiply op. - TF_RETURN_IF_ERROR(user->ReplaceOperandWith(index, operand)); - inst = operand; - } - - // Push the operands of inst. - int64 i = 0; - for (auto* operand : inst->operands()) { - operands.emplace_back(operand, inst, i++); - } - } - - // DFS to find scalar multiply ops from the users. - while (!users.empty()) { - auto inst = users.back(); - users.pop_back(); - - if (!IsOpCodeMultiplyCommutative(inst->opcode())) { - continue; - } - - HloInstruction* operand; - HloInstruction* multiplier; - if (Match(inst, m::MultiplyAnyOrder( - m::Op(&operand), - m::Broadcast(m::ConstantScalar(&multiplier))))) { - values.push_back(GetConstantValue(multiplier)); - - TF_RETURN_IF_ERROR(inst->ReplaceAllUsesWith(operand)); - inst = operand; - } - - // Process the instructions with only one user. - // Otherwise moving scalar multiply to the operands changes the values of - // other users. - if (inst->user_count() == 1) { - users.push_back(inst->users().front()); - } - } - - if (values.empty()) { - return Status::OK(); - } - - changed_ = true; - - // Combine all constant multipliers. - float multiplier = 1.0; - for (const float v : values) { - multiplier *= v; - } - - // Create a new const scalar multiply instruction. - HloInstruction* new_const_inst; - new_const_inst = - computation_->AddInstruction(MakeScalarInstruction(target, multiplier)); - - // Broadcast the scalar multiplier. - HloInstruction* new_broadcast = computation_->AddInstruction( - HloInstruction::CreateBroadcast(target->shape(), new_const_inst, {})); - // Create a new scalar multiply instruction. - HloInstruction* new_multiply = - computation_->AddInstruction(HloInstruction::CreateBinary( - target->shape(), HloOpcode::kMultiply, target, new_broadcast)); - CHECK_EQ(new_multiply->shape(), target->shape()); - - // Update the dependency with the rest of the instructions. - if (target == lhs) { - return dot->ReplaceOperandWith(0, new_multiply); - } else if (target == rhs) { - return dot->ReplaceOperandWith(1, new_multiply); - } else { - CHECK_EQ(target, dot); - return dot->ReplaceAllUsesWith(new_multiply); - } -} - void AlgebraicSimplifierVisitor::ReplaceWithBitcast(HloInstruction* instruction, HloInstruction* operand) { CHECK_EQ(1, instruction->operand_count()); @@ -5237,10 +5042,6 @@ StatusOr AlgebraicSimplifierVisitor::SimplifyConvToDot( Status AlgebraicSimplifierVisitor::HandleConvolution( HloInstruction* convolution) { - if (options_.enable_scalar_multiply_reduction()) { - TF_RETURN_IF_ERROR(ScalarMultiplyReduction(convolution)); - } - // Zero-sized input or filter. if (ShapeUtil::IsZeroElementArray(convolution->operand(0)->shape()) || ShapeUtil::IsZeroElementArray(convolution->operand(1)->shape())) { diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.h b/tensorflow/compiler/xla/service/algebraic_simplifier.h index 9f2a3404116..9f29df3c209 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.h +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.h @@ -86,17 +86,6 @@ class AlgebraicSimplifierOptions { } bool enable_conv_operand_swap() const { return enable_conv_operand_swap_; } - // Move constant scalar multiply to one operand or output of convolutions with - // the smallest tensor size, to reduce the number of scalar multiply. - void set_enable_scalar_multiply_reduction( - bool enable_scalar_multiply_reduction) { - enable_scalar_multiply_reduction_ = enable_scalar_multiply_reduction; - } - - bool enable_scalar_multiply_reduction() const { - return enable_scalar_multiply_reduction_; - } - // If enable_window_reduce_replacement is true, the kReduceWindow instruction // can be optimized by replacement with simpler operations. void set_enable_window_reduce_to_reduce_replacement( @@ -157,7 +146,6 @@ class AlgebraicSimplifierOptions { bool enable_dot_to_multiply_rewrite_{true}; bool enable_conv_simplification_{true}; bool enable_conv_operand_swap_{true}; - bool enable_scalar_multiply_reduction_{false}; bool enable_window_reduce_to_reduce_replacement_{true}; bool enable_reduce_of_reshape_{true}; bool replace_transpose_with_bitcast_{true}; diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 90ca44714f7..034d8ec4361 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -5343,59 +5343,6 @@ ENTRY AddBroadcastZeroWithDynamicSlice { EXPECT_THAT(root->operand(1)->opcode(), HloOpcode::kPad); } -TEST_F(AlgebraicSimplifierTest, ScalarMultiplyReduction) { - const char* hlo_string = R"( -HloModule ConstScalarMultiply -ENTRY ConstScalarMultiply { - param0 = f32[16,512,4096]{2,1,0} parameter(0) - constant.0 = f32[] constant(0.5) - broadcast.0 = f32[16,512,4096] broadcast(constant.0), dimensions={} - multiply.0 = f32[16,512,4096]{2,1,0} multiply(param0, broadcast.0) - param1 = f32[16,512,4096]{2,1,0} parameter(1) - multiply.1 = f32[16,512,4096]{2,1,0} multiply(multiply.0, param1) - param2 = f32[16,512,1024]{2,1,0} parameter(2) - constant.1 = f32[] constant(1.109) - broadcast.1 = f32[16,512,1024] broadcast(constant.1), dimensions={} - multiply.2 = f32[16,512,1024]{2,1,0} multiply(param2, broadcast.1) - ROOT convolution = f32[4096,1024,1]{1,0,2} convolution(multiply.1, multiply.2), window={size=16}, dim_labels=0fb_0io->bf0 -} -)"; - TF_ASSERT_OK_AND_ASSIGN(auto module, - ParseAndReturnVerifiedModule(hlo_string)); - AlgebraicSimplifierOptions options; - options.set_enable_scalar_multiply_reduction(true); - AlgebraicSimplifier simplifier(options); - ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); - auto root = module->entry_computation()->root_instruction(); - EXPECT_EQ(root->opcode(), HloOpcode::kMultiply); - EXPECT_THAT(root, - GmockMatch(m::MultiplyAnyOrder( - m::Op(), m::Broadcast(m::ConstantScalar(0.5f * 1.109f))))); -} - -TEST_F(AlgebraicSimplifierTest, ScalarMultiplyReductionMultiUser) { - const char* hlo_string = R"( -HloModule ConstScalarMultiply -ENTRY ConstScalarMultiply { - param0 = f32[16,512,1024] parameter(0) - param1 = f32[4096,1024,1] parameter(1) - convolution = f32[16,512,4096] convolution(param0, param1), window={size=1}, dim_labels=0bf_oi0->0bf - constant.1 = f32[] constant(0.5) - broadcast.1 = f32[16,512,4096] broadcast(constant.1), dimensions={} - multiply.1 = f32[16,512,4096] multiply(convolution, broadcast.1) - param2 = f32[16,512,4096] parameter(2) - multiply.2 = f32[16,512,4096] multiply(convolution, param2) - ROOT add.1 = f32[16,512,4096] add(multiply.1, multiply.2) -} -)"; - TF_ASSERT_OK_AND_ASSIGN(auto module, - ParseAndReturnVerifiedModule(hlo_string)); - AlgebraicSimplifierOptions options; - options.set_enable_scalar_multiply_reduction(true); - AlgebraicSimplifier simplifier(options); - ASSERT_FALSE(simplifier.Run(module.get()).ValueOrDie()); -} - INSTANTIATE_TEST_SUITE_P(DotOfConcatSimplificationTestInstantiation, DotOfConcatSimplificationTest, ::testing::ValuesIn(kDotOfConcatTestSpecs)); From 4a02cef1bfaaa96f6796e601e6e12ef009f61835 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Fri, 31 Jul 2020 18:03:16 +0000 Subject: [PATCH 1899/2522] updated include for integral_types --- tensorflow/c/kernels/summary_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index 105b0312e11..a631567c9fb 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -26,7 +26,7 @@ limitations under the License. #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/tstring.h" #include "tensorflow/core/platform/strcat.h" -#include "tensorflow/core/platform/default/integral_types.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/framework/selective_registration.h" #include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/framework/types.h" From 8a2c608cf7d18b5eea38d1956b076ee83e8e91f9 Mon Sep 17 00:00:00 2001 From: Smit Hinsu Date: Fri, 31 Jul 2020 14:37:20 -0700 Subject: [PATCH 1900/2522] Restrict GetDimensionSize HLO op result type to 32 bit integer XLA implementation has this limitation and always uses 32 bit result for this instruction. This will cause mismatch between the result type in MLIR and XLA at the time of export. This should be resolved once we have a special dialect mapping directly to HLOInstructionProto. Another option until then could be to introduce a pass to legalize mhlo itself to match XLA semantics. PiperOrigin-RevId: 324286936 Change-Id: Ice7893f9920bbbc96936b90c8063248b1627e3e9 --- .../mlir-hlo/Dialect/mhlo/IR/hlo_ops.td | 5 ++- .../compiler/mlir/xla/tests/legalize-tf.mlir | 35 +++++++++++++++---- .../mlir/xla/transforms/legalize_tf.cc | 20 ++++++----- 3 files changed, 43 insertions(+), 17 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td index e2c9a1aac89..db98bd16f76 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td @@ -1075,7 +1075,10 @@ def HLO_GetDimensionSizeOp: HLO_Op<"get_dimension_size", [NoSideEffect]>, HLO_Tensor:$operand, I32Attr:$dimension ); - let results = (outs HLO_IntTensor); + // TODO(hinsu): Allow 64-bit result types once XLA HLO dialect based on the + // XLA semantics is available. This limitation is because of the current XLA + // implementation. + let results = (outs I32Tensor); } def HLO_MapOp: HLO_Op<"map", diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir index a25da49e472..c406022be5c 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir @@ -3482,8 +3482,8 @@ func @cross_replica_sum(%input: tensor<10xf32>) -> tensor<10xf32> { // tf.Size legalization //===----------------------------------------------------------------------===// -// CHECK-LABEL: @size_rank_one_i32 -func @size_rank_one_i32(%input: tensor) -> (tensor) { +// CHECK-LABEL: @size_scalar_i32 +func @size_scalar_i32(%input: tensor) -> (tensor) { // CHECK: %[[CONST:.*]] = mhlo.constant dense<1> // CHECK-SAME: tensor %size = "tf.Size"(%input) {T = "tfdtype$DT_FLOAT", out_type = "tfdtype$DT_INT32"} : (tensor) -> tensor @@ -3491,8 +3491,8 @@ func @size_rank_one_i32(%input: tensor) -> (tensor) { return %size : tensor } -// CHECK-LABEL: @size_rank_one_i64 -func @size_rank_one_i64(%input: tensor) -> (tensor) { +// CHECK-LABEL: @size_scalar_i64 +func @size_scalar_i64(%input: tensor) -> (tensor) { // CHECK: %[[CONST:.*]] = mhlo.constant dense<1> // CHECK-SAME: tensor %size = "tf.Size"(%input) {T = "tfdtype$DT_FLOAT", out_type = "tfdtype$DT_INT64"} : (tensor) -> tensor @@ -3500,19 +3500,40 @@ func @size_rank_one_i64(%input: tensor) -> (tensor) { return %size : tensor } +// CHECK-LABEL: @size_rank_one_i64 +// CHECK-SAME: (%[[INPUT:.*]]: tensor) +func @size_rank_one_i64(%input: tensor) -> (tensor) { + // CHECK: %[[INIT:.*]] = mhlo.constant dense<1> + // CHECK-SAME: tensor + + // CHECK: %[[DIM_0:.*]] = "mhlo.get_dimension_size"(%[[INPUT]]) + // CHECK-SAME: dimension = 0 + // CHECK-SAME: tensor + + // CHECK: %[[CAST_DIM_0:.*]] = "mhlo.convert"(%[[DIM_0]]) : (tensor) -> tensor + // CHECK: %[[RESULT:.*]] = chlo.broadcast_multiply %[[INIT]], %[[CAST_DIM_0]] + + %size = "tf.Size"(%input) : (tensor) -> tensor + // CHECK: return %[[RESULT]] + return %size : tensor +} + // CHECK-LABEL: @size_ranked // CHECK-SAME: (%[[INPUT:.*]]: tensor<2x?x8xf32>) func @size_ranked(%input: tensor<2x?x8xf32>) -> (tensor) { // CHECK: %[[CONST:.*]] = mhlo.constant dense<1> // CHECK: %[[DIM_0:.*]] = "mhlo.get_dimension_size"(%[[INPUT]]) // CHECK-SAME: dimension = 0 - // CHECK: %[[MUL_0:.*]] = chlo.broadcast_multiply %[[CONST]], %[[DIM_0]] + // CHECK: %[[CAST_DIM_0:.*]] = "mhlo.convert"(%[[DIM_0]]) : (tensor) -> tensor + // CHECK: %[[MUL_0:.*]] = chlo.broadcast_multiply %[[CONST]], %[[CAST_DIM_0]] // CHECK: %[[DIM_1:.*]] = "mhlo.get_dimension_size"(%[[INPUT]]) // CHECK-SAME: dimension = 1 - // CHECK: %[[MUL_1:.*]] = chlo.broadcast_multiply %[[MUL_0]], %[[DIM_1]] + // CHECK: %[[CAST_DIM_1:.*]] = "mhlo.convert"(%[[DIM_1]]) : (tensor) -> tensor + // CHECK: %[[MUL_1:.*]] = chlo.broadcast_multiply %[[MUL_0]], %[[CAST_DIM_1]] // CHECK: %[[DIM_2:.*]] = "mhlo.get_dimension_size"(%[[INPUT]]) // CHECK-SAME: dimension = 2 - // CHECK: %[[MUL_2:.*]] = chlo.broadcast_multiply %[[MUL_1]], %[[DIM_2]] + // CHECK: %[[CAST_DIM_2:.*]] = "mhlo.convert"(%[[DIM_2]]) : (tensor) -> tensor + // CHECK: %[[MUL_2:.*]] = chlo.broadcast_multiply %[[MUL_1]], %[[CAST_DIM_2]] %size = "tf.Size"(%input) {T = "tfdtype$DT_FLOAT", out_type = "tfdtype$DT_INT32"} : (tensor<2x?x8xf32>) -> tensor // CHECK: return %[[MUL_2]] return %size : tensor diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc index bd0871d08aa..0b420fff785 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc @@ -2630,19 +2630,21 @@ class ConvertSizeOp : public OpRewritePattern { if (!input_ty) return failure(); const int64_t rank = input_ty.getRank(); - auto result_type = op.getResult().getType(); - Operation *size = - GetScalarConstOfType(result_type.cast().getElementType(), - op.getLoc(), 1, &rewriter); + auto result_ty = op.getResult().getType(); + auto element_ty = result_ty.cast().getElementType(); + Value size = GetScalarConstOfType(element_ty, op.getLoc(), 1, &rewriter); for (int64_t i = 0; i < rank; ++i) { - auto dim = rewriter.create( - op.getLoc(), result_type, input, - rewriter.getIntegerAttr(rewriter.getIntegerType(32), i)); + auto i32_ty = rewriter.getIntegerType(32); + auto size_ty = RankedTensorType::get({}, i32_ty); + auto dim_index = rewriter.getIntegerAttr(i32_ty, i); + Value dim = rewriter.create(op.getLoc(), size_ty, + input, dim_index); + dim = rewriter.create(op.getLoc(), result_ty, dim); size = rewriter.create( - op.getLoc(), size->getResult(0), dim.getResult(), + op.getLoc(), size, dim, /*DenseIntElementsAttr=*/DenseIntElementsAttr()); } - rewriter.replaceOp(op, size->getResult(0)); + rewriter.replaceOp(op, size); return success(); } From 2240aad0a2d82d64ad41bb0298ec4bbba1f9778d Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Fri, 31 Jul 2020 22:15:13 +0000 Subject: [PATCH 1901/2522] fix dependencies --- tensorflow/core/kernels/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index c9a209be999..6ed4248e2d1 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2976,6 +2976,7 @@ tf_cc_tests( "//tensorflow/core:framework", "//tensorflow/core:test", "//tensorflow/core:test_main", + "//tensorflow/core:tensor_testutil.h", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", ], From e3ac0822d81d486cde16d6cd32d560d056a42d4e Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Fri, 31 Jul 2020 14:45:42 -0700 Subject: [PATCH 1902/2522] Add compiler pass to remove duplicate 'tf_saved_model.bound_input' bindings. Consolidate identical bound inputs so that resource variables do not alias in modules with tf_saved_model semantics. PiperOrigin-RevId: 324288443 Change-Id: I4ccf9c19f3e2df123667b71560c3d3ae3c751913 --- tensorflow/compiler/mlir/tensorflow/BUILD | 1 - .../mlir/tensorflow/ir/tf_saved_model.cc | 1 - .../tf_saved_model/hash_table_asset_v1.py | 17 ++--- ...odel_deduplicate_bound_input_bindings.mlir | 33 ---------- .../tensorflow/tests/tf_saved_model_ops.mlir | 13 ---- .../tests/tf_saved_model_ops_invalid.mlir | 14 ---- .../deduplicate_bound_input_bindings.cc | 65 ------------------- .../transforms/tf_saved_model_passes.h | 3 - .../mlir/tensorflow/translate/import_model.cc | 4 +- 9 files changed, 6 insertions(+), 145 deletions(-) delete mode 100644 tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_deduplicate_bound_input_bindings.mlir delete mode 100644 tensorflow/compiler/mlir/tensorflow/transforms/deduplicate_bound_input_bindings.cc diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index c6f0083fc92..518992d03db 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -676,7 +676,6 @@ cc_library( cc_library( name = "tf_saved_model_passes", srcs = [ - "transforms/deduplicate_bound_input_bindings.cc", "transforms/freeze_global_tensors.cc", "transforms/lift_variables_pass.cc", "transforms/optimize_global_tensors.cc", diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc index 94a792ec3db..edfc7feefd5 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc @@ -337,7 +337,6 @@ LogicalResult VerifyExportedFunc(FuncOp func) { if (auto attr = func.getArgAttrOfType( i, "tf_saved_model.bound_input")) { if (!unique_bound_inputs.insert(attr.getValue()).second) { - if (module.getAttr("tf_saved_model.under_construction")) continue; return func.emitError() << "duplicate 'tf_saved_model.bound_input' binding"; } diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/hash_table_asset_v1.py b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/hash_table_asset_v1.py index 4cb931253b3..7e86953eb8f 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/hash_table_asset_v1.py +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/hash_table_asset_v1.py @@ -27,15 +27,13 @@ import tensorflow.compat.v1 as tf from tensorflow.compiler.mlir.tensorflow.tests.tf_saved_model import common_v1 # CHECK: "tf_saved_model.session_initializer"() {initializer = [[init:@.*]]} : () -> () -# CHECK: "tf_saved_model.asset"() {filename = {{.*}}, sym_name = "[[asset1:__tf_saved_model_asset1_.*]]"} -# CHECK: "tf_saved_model.asset"() {filename = {{.*}}, sym_name = "[[asset0:__tf_saved_model_asset0_.*]]"} +# CHECK: "tf_saved_model.asset"() {filename = {{.*}}, sym_name = "[[asset:.*]]"} # CHECK: func [[init]] -# CHECK-SAME: [[ARG0:%.*]]: tensor {tf_saved_model.bound_input = @[[asset0]]} -# CHECK-SAME: [[ARG1:%.*]]: tensor {tf_saved_model.bound_input = @[[asset1]]} +# CHECK-SAME: [[ARG:%.*]]: tensor {tf_saved_model.bound_input = @[[asset]]} # CHECK-NEXT: [[R0:%.*]] = "tf.HashTableV2"() # CHECK-SAME: shared_name = "[[hash_table:.*]]" -# CHECK-NEXT: "tf.InitializeTableFromTextFileV2"([[R0]], [[ARG0]]) +# CHECK-NEXT: "tf.InitializeTableFromTextFileV2"([[R0]], [[ARG]]) def write_vocabulary_file(vocabulary): @@ -50,16 +48,11 @@ def write_vocabulary_file(vocabulary): def test(): - vocabulary_file = write_vocabulary_file(['cat', 'is', 'on', 'the', 'mat']) table_initializer = tf.lookup.TextFileInitializer( - vocabulary_file, tf.string, tf.lookup.TextFileIndex.WHOLE_LINE, tf.int64, + write_vocabulary_file(['cat', 'is', 'on', 'the', 'mat']), tf.string, + tf.lookup.TextFileIndex.WHOLE_LINE, tf.int64, tf.lookup.TextFileIndex.LINE_NUMBER) - # Incur another bound_input on the asset, but with a different sym_name, i.e., - # __tf_saved_model_asset1_tokens.txt vs. __tf_saved_model_asset0_tokens.txt. table = tf.lookup.StaticVocabularyTable(table_initializer, num_oov_buckets=10) - vocab_file_tensor = tf.convert_to_tensor(vocabulary_file, tf.string, - name='asset_filepath') - tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, vocab_file_tensor) x = tf.placeholder(tf.string, shape=(), name='input') r = table.lookup(x) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_deduplicate_bound_input_bindings.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_deduplicate_bound_input_bindings.mlir deleted file mode 100644 index 22fd3d86068..00000000000 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_deduplicate_bound_input_bindings.mlir +++ /dev/null @@ -1,33 +0,0 @@ -// RUN: tf-opt %s -split-input-file -verify-diagnostics -tf-saved-model-dedup-bound-input-binding-pass | FileCheck %s - -module attributes {tf_saved_model.semantics, tf_saved_model.under_construction} { - // Test case: Remove duplicate bound_input symbols. - "tf_saved_model.global_tensor"() { is_mutable, sym_name = "v", type = tensor, value = dense<42.0> : tensor } : () -> () - "tf_saved_model.global_tensor"() { is_mutable, sym_name = "w", type = tensor, value = dense<43.0> : tensor } : () -> () - "tf_saved_model.global_tensor"() { is_mutable, sym_name = "x", type = tensor, value = dense<44.0> : tensor } : () -> () - // CHECK: func @f - // CHECK: %arg0: tensor>> {tf_saved_model.bound_input = @v} - // CHECK: %arg1: tensor>> {tf_saved_model.bound_input = @w} - // CHECK: %arg2: tensor>> {tf_saved_model.bound_input = @x} - // CHECK-NOT: %arg3 - // CHECK-NOT: %arg4 - func @f( - %arg0: tensor>> {tf_saved_model.bound_input = @v}, - %arg1: tensor>> {tf_saved_model.bound_input = @w}, - %arg2: tensor>> {tf_saved_model.bound_input = @v}, - %arg3: tensor>> {tf_saved_model.bound_input = @x}, - %arg4: tensor>> {tf_saved_model.bound_input = @v} - ) attributes {tf_saved_model.exported_names = ["f"]} { - // CHECK: "tf.ReadVariableOp"(%arg0) : (tensor>>) -> tensor - // CHECK: "tf.ReadVariableOp"(%arg1) : (tensor>>) -> tensor - // CHECK: "tf.ReadVariableOp"(%arg0) : (tensor>>) -> tensor - // CHECK: "tf.ReadVariableOp"(%arg2) : (tensor>>) -> tensor - // CHECK: "tf.ReadVariableOp"(%arg0) : (tensor>>) -> tensor - %val0 = "tf.ReadVariableOp"(%arg0) : (tensor>>) -> tensor - %val1 = "tf.ReadVariableOp"(%arg1) : (tensor>>) -> tensor - %val2 = "tf.ReadVariableOp"(%arg2) : (tensor>>) -> tensor - %val3 = "tf.ReadVariableOp"(%arg3) : (tensor>>) -> tensor - %val4 = "tf.ReadVariableOp"(%arg4) : (tensor>>) -> tensor - return - } -} diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops.mlir index d2c5509b52d..7156a1fab63 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops.mlir @@ -76,16 +76,3 @@ module attributes {tf_saved_model.semantics, tf_saved_model.under_construction} } } - -// ----- - -module attributes {tf_saved_model.semantics, tf_saved_model.under_construction} { - "tf_saved_model.global_tensor"() { is_mutable, sym_name = "v", type = tensor, value = dense<42.0> : tensor } : () -> () - // CHECK: func @f - func @f( - %arg0: tensor>> {tf_saved_model.bound_input = @v}, - %arg1: tensor>> {tf_saved_model.bound_input = @v} - ) attributes {tf_saved_model.exported_names = ["f"]} { - return - } -} diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops_invalid.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops_invalid.mlir index 714c8908825..dcb889ff99e 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops_invalid.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops_invalid.mlir @@ -400,17 +400,3 @@ module attributes {tf_saved_model.semantics} { } } - -// ----- - -module attributes {tf_saved_model.semantics} { - - "tf_saved_model.global_tensor"() { is_mutable, sym_name = "v", type = tensor, value = dense<42.0> : tensor } : () -> () - // expected-error@+1 {{duplicate 'tf_saved_model.bound_input' binding}} - func @f( - %arg0: tensor>> {tf_saved_model.bound_input = @v}, - %arg1: tensor>> {tf_saved_model.bound_input = @v} - ) attributes {tf_saved_model.exported_names = ["f"]} { - return - } -} diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/deduplicate_bound_input_bindings.cc b/tensorflow/compiler/mlir/tensorflow/transforms/deduplicate_bound_input_bindings.cc deleted file mode 100644 index c1514dfa357..00000000000 --- a/tensorflow/compiler/mlir/tensorflow/transforms/deduplicate_bound_input_bindings.cc +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include - -#include "llvm/ADT/DenseMap.h" -#include "mlir/IR/Function.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "mlir/Support/LLVM.h" // from @llvm-project -#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" -#include "tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h" - -namespace mlir { -namespace tf_saved_model { -namespace { - -class DedupBoundInputBindingPass - : public PassWrapper { - public: - void runOnFunction() override; -}; - -void DedupBoundInputBindingPass::runOnFunction() { - FuncOp func = getFunction(); - if (!mlir::tf_saved_model::IsExported(func)) return; - llvm::SmallDenseMap unique_bound_inputs; - llvm::SmallVector arg_indices_to_erase; - for (unsigned i = 0, e = func.getNumArguments(); i < e; i++) { - auto attr = func.getArgAttrOfType( - i, "tf_saved_model.bound_input"); - if (!attr) continue; - auto inserted = unique_bound_inputs.insert(std::make_pair(attr, i)); - if (inserted.second) continue; - auto duplicate_arg = func.getArgument(i); - auto original_arg = func.getArgument(unique_bound_inputs[attr]); - duplicate_arg.replaceAllUsesWith(original_arg); - arg_indices_to_erase.push_back(i); - } - func.eraseArguments(arg_indices_to_erase); -} - -} // namespace - -static PassRegistration pass( - "tf-saved-model-dedup-bound-input-binding-pass", - "Remove duplicate 'tf_saved_model.bound_input' bindings."); - -std::unique_ptr> CreateDedupBoundInputBindingPass() { - return std::make_unique(); -} - -} // namespace tf_saved_model -} // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tf_saved_model_passes.h b/tensorflow/compiler/mlir/tensorflow/transforms/tf_saved_model_passes.h index 59532a2b123..f7a73dc1561 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tf_saved_model_passes.h +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tf_saved_model_passes.h @@ -46,9 +46,6 @@ CreateRemoveVariablesInSessionInitializerPass(); std::unique_ptr> CreateLiftVariablesPass( ::tensorflow::Session* session); -// Creates a pass that removes duplicate 'tf_saved_model.bound_input' bindings. -std::unique_ptr> CreateDedupBoundInputBindingPass(); - } // namespace tf_saved_model } // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc index 27385e81262..2c44aaa5c42 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc @@ -3368,13 +3368,12 @@ SavedModelSignatureDefImporter::ConvertAssets() { results.reserve(asset_file_defs.size()); mlir::OpBuilder builder(module_->getBodyRegion()); - unsigned i = 0; // Use to generate unique sym_name(s) for duplicate assets. for (const auto& asset : asset_file_defs) { auto asset_op = builder.create( module_->getLoc(), /*sym_name=*/ builder.getStringAttr( - absl::StrCat("__tf_saved_model_asset", i++, "_", asset.filename())), + absl::StrCat("__tf_saved_model_asset_", asset.filename())), /*filename=*/ builder.getStringAttr( io::JoinPath(kSavedModelAssetsDirectory, asset.filename()))); @@ -3570,7 +3569,6 @@ Status SavedModelSignatureDefImporter::LiftVariables() { pm.addPass(mlir::TF::CreatePromoteVarHandlesToArgsPass()); pm.addPass( mlir::tf_saved_model::CreateLiftVariablesPass(bundle_.GetSession())); - pm.addPass(mlir::tf_saved_model::CreateDedupBoundInputBindingPass()); if (mlir::failed(pm.run(*module_))) return diag_handler.Combine(errors::Internal("Failed to lift variables.")); From 0bf6bb64ce7639c9513073313fe96492a0b6f6fa Mon Sep 17 00:00:00 2001 From: Cesar Crusius Date: Fri, 31 Jul 2020 14:52:21 -0700 Subject: [PATCH 1903/2522] Remove v1 decorators from saved_model:saved_model_test. Saved model tests care about building the right graphs, so they were forced into graph mode (instead of being ported to tf.function infrastructure). There are differences in the graphs produced between v1 and v2, but those can be handled easily - for testing purposes - by carefully choosing operation names based on the mode. There were unneeded collection operations in a few tests, which were removed. Other small changes were made but mostly tests were forced to run in graph mode. PiperOrigin-RevId: 324289722 Change-Id: Iad60aac85cc3954755a9c6a35cf5a4ddb42640ed --- .../python/saved_model/saved_model_test.py | 1632 +++++++++-------- 1 file changed, 834 insertions(+), 798 deletions(-) diff --git a/tensorflow/python/saved_model/saved_model_test.py b/tensorflow/python/saved_model/saved_model_test.py index f998bbfce38..6e662d7d83c 100644 --- a/tensorflow/python/saved_model/saved_model_test.py +++ b/tensorflow/python/saved_model/saved_model_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import os +import six from tensorflow.core.framework import types_pb2 from tensorflow.core.protobuf import config_pb2 @@ -34,6 +35,7 @@ from tensorflow.python.lib.io import file_io from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.ops.ragged import ragged_factory_ops from tensorflow.python.platform import test @@ -80,6 +82,26 @@ class SavedModelTestBase(test.TestCase): asset_collection = ops.get_collection(ops.GraphKeys.ASSET_FILEPATHS) return asset_collection + def _eval(self, tensor): + """Evaluate a tensor. + + Takes care of the variations between graphs produced with and without + resource variables when determining the name of the operation to run. + + Args: + tensor: The tensor to evaluate, or a string with the tensor name. + + Returns: + The evaluated tensor as a numpy array. + """ + name = tensor if isinstance(tensor, six.string_types) else tensor.name + index = "0" + if ":" in name: + name, index = name.split(":") + if variable_scope.resource_variables_enabled(): + name = name + "/Read/ReadVariableOp" + return self.evaluate(name + ":" + index) + class SavedModelTest(SavedModelTestBase): @@ -119,12 +141,10 @@ class SavedModelTest(SavedModelTestBase): with self.session(graph=ops.Graph()) as sess: self._init_and_validate_variable(sess, "v", 42) - foo_signature = signature_def_utils.build_signature_def({ - "foo_inputs": tensor_info - }, dict(), "foo") + foo_signature = signature_def_utils.build_signature_def( + {"foo_inputs": tensor_info}, dict(), "foo") builder.add_meta_graph_and_variables( - sess, ["foo"], - signature_def_map={"foo_key": foo_signature}) + sess, ["foo"], signature_def_map={"foo_key": foo_signature}) def _validate_outputs_tensor_info_fail(self, builder, tensor_info): with self.session(graph=ops.Graph()) as sess: @@ -145,8 +165,7 @@ class SavedModelTest(SavedModelTestBase): foo_signature = signature_def_utils.build_signature_def( dict(), {"foo_outputs": tensor_info}, "foo") builder.add_meta_graph_and_variables( - sess, ["foo"], - signature_def_map={"foo_key": foo_signature}) + sess, ["foo"], signature_def_map={"foo_key": foo_signature}) def _validate_sig_def_keys(self, builder, valid_tensor_info, invalid_key): with self.session(graph=ops.Graph()) as sess: @@ -201,392 +220,404 @@ class SavedModelTest(SavedModelTestBase): "Cannot parse file.*%s" % constants.SAVED_MODEL_FILENAME_PBTXT): loader.load(sess, ["foo"], export_dir) - @test_util.run_deprecated_v1 def testVerifySessionGraphUsage(self): export_dir = self._get_export_dir("test_verify_session_graph_usage") builder = saved_model_builder._SavedModelBuilder(export_dir) - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 42) - builder.add_meta_graph_and_variables(sess, [tag_constants.TRAINING]) + with ops.Graph().as_default(): + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) + builder.add_meta_graph_and_variables(sess, [tag_constants.TRAINING]) - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() - # Build a session and supply it to the load operation. - sess = session.Session(graph=ops.Graph()) - loader.load(sess, [tag_constants.TRAINING], export_dir) + # Build a session and supply it to the load operation. + sess = session.Session(graph=ops.Graph()) + loader.load(sess, [tag_constants.TRAINING], export_dir) - # Check the variable within the scope of the session and its graph. - with sess: - self.assertEqual( - 42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval()) + # Check the variable within the scope of the session and its graph. + with sess: + self.assertEqual( + 42, + self._eval(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0])) - @test_util.run_deprecated_v1 def testSequence(self): export_dir = self._get_export_dir("test_sequence") builder = saved_model_builder._SavedModelBuilder(export_dir) - # Expect an assertion error since add_meta_graph_and_variables() should be - # invoked before any add_meta_graph() calls. - with self.session(graph=ops.Graph()) as sess: - self.assertRaises(AssertionError, builder.add_meta_graph, ["foo"]) + with ops.Graph().as_default(): + # Expect an assertion error since add_meta_graph_and_variables() should be + # invoked before any add_meta_graph() calls. + with self.session(graph=ops.Graph()) as sess: + self.assertRaises(AssertionError, builder.add_meta_graph, ["foo"]) - # Expect an assertion error for multiple calls of - # add_meta_graph_and_variables() since weights should be saved exactly once. - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 42) - builder.add_meta_graph_and_variables(sess, ["bar"]) - self.assertRaises(AssertionError, builder.add_meta_graph_and_variables, - sess, ["baz"]) + # Expect an assertion error for multiple calls of + # add_meta_graph_and_variables() since weights should be saved exactly + # once. + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) + builder.add_meta_graph_and_variables(sess, ["bar"]) + self.assertRaises(AssertionError, builder.add_meta_graph_and_variables, + sess, ["baz"]) - @test_util.run_deprecated_v1 def testTags(self): export_dir = self._get_export_dir("test_tags") builder = saved_model_builder._SavedModelBuilder(export_dir) - # Graph with a single variable. SavedModel invoked to: - # - add with weights. - # - a single tag (from predefined constants). - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 42) - builder.add_meta_graph_and_variables(sess, [tag_constants.TRAINING]) + with ops.Graph().as_default(): + # Graph with a single variable. SavedModel invoked to: + # - add with weights. + # - a single tag (from predefined constants). + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) + builder.add_meta_graph_and_variables(sess, [tag_constants.TRAINING]) - # Graph that updates the single variable. SavedModel invoked to: - # - simply add the model (weights are not updated). - # - a single tag (from predefined constants). - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 43) - builder.add_meta_graph([tag_constants.SERVING]) + # Graph that updates the single variable. SavedModel invoked to: + # - simply add the model (weights are not updated). + # - a single tag (from predefined constants). + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 43) + builder.add_meta_graph([tag_constants.SERVING]) - # Graph that updates the single variable. SavedModel invoked to: - # - simply add the model (weights are not updated). - # - multiple tags (from predefined constants). - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 45) - builder.add_meta_graph([tag_constants.SERVING, tag_constants.GPU]) + # Graph that updates the single variable. SavedModel invoked to: + # - simply add the model (weights are not updated). + # - multiple tags (from predefined constants). + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 45) + builder.add_meta_graph([tag_constants.SERVING, tag_constants.GPU]) - # Graph that updates the single variable. SavedModel invoked to: - # - simply add the model (weights are not updated). - # - multiple tags (from predefined constants for serving on TPU). - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 45) - builder.add_meta_graph([tag_constants.SERVING, tag_constants.TPU]) + # Graph that updates the single variable. SavedModel invoked to: + # - simply add the model (weights are not updated). + # - multiple tags (from predefined constants for serving on TPU). + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 45) + builder.add_meta_graph([tag_constants.SERVING, tag_constants.TPU]) - # Graph that updates the single variable. SavedModel is invoked: - # - to add the model (weights are not updated). - # - multiple custom tags. - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 44) - builder.add_meta_graph(["foo", "bar"]) + # Graph that updates the single variable. SavedModel is invoked: + # - to add the model (weights are not updated). + # - multiple custom tags. + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 44) + builder.add_meta_graph(["foo", "bar"]) - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() - # Restore the graph with a single predefined tag whose variables were saved. - with self.session(graph=ops.Graph()) as sess: - loader.load(sess, [tag_constants.TRAINING], export_dir) - self.assertEqual( - 42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval()) + # Restore the graph with a single predefined tag whose variables were + # saved. + with self.session(graph=ops.Graph()) as sess: + loader.load(sess, [tag_constants.TRAINING], export_dir) + self.assertEqual( + 42, + self._eval(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0])) - # Restore the graph with a single predefined tag whose variables were not - # saved. - with self.session(graph=ops.Graph()) as sess: - loader.load(sess, [tag_constants.SERVING], export_dir) - self.assertEqual( - 42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval()) + # Restore the graph with a single predefined tag whose variables were not + # saved. + with self.session(graph=ops.Graph()) as sess: + loader.load(sess, [tag_constants.SERVING], export_dir) + self.assertEqual( + 42, + self._eval(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0])) - # Restore the graph with multiple predefined tags whose variables were not - # saved. - with self.session(graph=ops.Graph()) as sess: - loader.load(sess, [tag_constants.SERVING, tag_constants.GPU], export_dir) - self.assertEqual( - 42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval()) + # Restore the graph with multiple predefined tags whose variables were not + # saved. + with self.session(graph=ops.Graph()) as sess: + loader.load(sess, [tag_constants.SERVING, tag_constants.GPU], + export_dir) + self.assertEqual( + 42, + self._eval(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0])) - # Restore the graph with multiple predefined tags (for serving on TPU) - # whose variables were not saved. - with self.session(graph=ops.Graph()) as sess: - loader.load(sess, [tag_constants.SERVING, tag_constants.TPU], export_dir) - self.assertEqual( - 42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval()) + # Restore the graph with multiple predefined tags (for serving on TPU) + # whose variables were not saved. + with self.session(graph=ops.Graph()) as sess: + loader.load(sess, [tag_constants.SERVING, tag_constants.TPU], + export_dir) + self.assertEqual( + 42, + self._eval(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0])) - # Restore the graph with multiple tags. Provide duplicate tags to test set - # semantics. - with self.session(graph=ops.Graph()) as sess: - loader.load(sess, ["foo", "bar", "foo"], export_dir) - self.assertEqual( - 42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval()) + # Restore the graph with multiple tags. Provide duplicate tags to test set + # semantics. + with self.session(graph=ops.Graph()) as sess: + loader.load(sess, ["foo", "bar", "foo"], export_dir) + self.assertEqual( + 42, + self._eval(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0])) - # Try restoring a graph with a non-existent tag. This should yield a runtime - # error. - with self.session(graph=ops.Graph()) as sess: - self.assertRaises(RuntimeError, loader.load, sess, ["INVALID"], - export_dir) + # Try restoring a graph with a non-existent tag. This should yield a + # runtime error. + with self.session(graph=ops.Graph()) as sess: + self.assertRaises(RuntimeError, loader.load, sess, ["INVALID"], + export_dir) - # Try restoring a graph where a subset of the tags match. Since tag matching - # for meta graph defs follows "all" semantics, this should yield a runtime - # error. - with self.session(graph=ops.Graph()) as sess: - self.assertRaises(RuntimeError, loader.load, sess, ["foo", "baz"], - export_dir) + # Try restoring a graph where a subset of the tags match. Since tag + # matching for meta graph defs follows "all" semantics, this should yield + # a runtime error. + with self.session(graph=ops.Graph()) as sess: + self.assertRaises(RuntimeError, loader.load, sess, ["foo", "baz"], + export_dir) - @test_util.run_v1_only("b/120545219") def testVariables(self): export_dir = self._get_export_dir("test_variables") builder = saved_model_builder._SavedModelBuilder(export_dir) - # Graph with two variables. SavedModel invoked to: - # - add with weights. - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v1", 1) - self._init_and_validate_variable(sess, "v2", 2) - builder.add_meta_graph_and_variables(sess, ["foo"]) + with ops.Graph().as_default(): + # Graph with two variables. SavedModel invoked to: + # - add with weights. + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v1", 1) + self._init_and_validate_variable(sess, "v2", 2) + builder.add_meta_graph_and_variables(sess, ["foo"]) - # Graph with a single variable (subset of the variables from the previous - # graph whose weights were saved). SavedModel invoked to: - # - simply add the model (weights are not updated). - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v2", 3) - builder.add_meta_graph(["bar"]) + # Graph with a single variable (subset of the variables from the previous + # graph whose weights were saved). SavedModel invoked to: + # - simply add the model (weights are not updated). + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v2", 3) + builder.add_meta_graph(["bar"]) - # Graph with a single variable (disjoint set of variables from the previous - # graph whose weights were saved). SavedModel invoked to: - # - simply add the model (weights are not updated). - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v3", 4) - builder.add_meta_graph(["baz"]) + # Graph with a single variable (disjoint set of variables from the + # previous graph whose weights were saved). SavedModel invoked to: + # - simply add the model (weights are not updated). + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v3", 4) + builder.add_meta_graph(["baz"]) - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() - # Restore the graph with tag "foo", whose variables were saved. - with self.session(graph=ops.Graph()) as sess: - loader.load(sess, ["foo"], export_dir) - collection_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertEqual(len(collection_vars), 2) - self.assertEqual(1, collection_vars[0].eval()) - self.assertEqual(2, collection_vars[1].eval()) + # Restore the graph with tag "foo", whose variables were saved. + with self.session(graph=ops.Graph()) as sess: + loader.load(sess, ["foo"], export_dir) + collection_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertEqual(len(collection_vars), 2) + self.assertEqual(1, self._eval(collection_vars[0])) + self.assertEqual(2, self._eval(collection_vars[1])) - # Restore the graph with tag "bar", whose variables were not saved. Only the - # subset of the variables added to the graph will be restored with the - # checkpointed value. - with self.session(graph=ops.Graph()) as sess: - loader.load(sess, ["bar"], export_dir) - collection_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertEqual(len(collection_vars), 1) - self.assertEqual(2, collection_vars[0].eval()) + # Restore the graph with tag "bar", whose variables were not saved. Only + # the subset of the variables added to the graph will be restored with the + # checkpointed value. + with self.session(graph=ops.Graph()) as sess: + loader.load(sess, ["bar"], export_dir) + collection_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertEqual(len(collection_vars), 1) + self.assertEqual(2, self._eval(collection_vars[0])) - # Try restoring the graph with tag "baz", whose variables were not saved. - # Since this graph has a disjoint set of variables from the set that was - # saved, this should raise an error. - with self.session(graph=ops.Graph()) as sess: - self.assertRaises(errors.NotFoundError, loader.load, sess, ["baz"], - export_dir) + # Try restoring the graph with tag "baz", whose variables were not saved. + # Since this graph has a disjoint set of variables from the set that was + # saved, this should raise an error. + with self.session(graph=ops.Graph()) as sess: + self.assertRaises(errors.NotFoundError, loader.load, sess, ["baz"], + export_dir) - @test_util.run_deprecated_v1 def testGraphWithoutVariables(self): export_dir = self._get_export_dir("test_graph_has_variables") builder = saved_model_builder._SavedModelBuilder(export_dir) - # Graph with no variables. - with self.session(graph=ops.Graph()) as sess: - constant_5_name = constant_op.constant(5.0).name - builder.add_meta_graph_and_variables(sess, ["foo"]) + with ops.Graph().as_default(): + # Graph with no variables. + with self.session(graph=ops.Graph()) as sess: + constant_5_name = constant_op.constant(5.0).name + builder.add_meta_graph_and_variables(sess, ["foo"]) - # Second graph with no variables - with self.session(graph=ops.Graph()) as sess: - constant_6_name = constant_op.constant(6.0).name - builder.add_meta_graph(["bar"]) + # Second graph with no variables + with self.session(graph=ops.Graph()) as sess: + constant_6_name = constant_op.constant(6.0).name + builder.add_meta_graph(["bar"]) - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() - # Restore the graph with tag "foo". - with self.session(graph=ops.Graph()) as sess: - loader.load(sess, ["foo"], export_dir) - # Read the constant a from the graph. - a = ops.get_default_graph().get_tensor_by_name(constant_5_name) - b = constant_op.constant(6.0) - c = a * b - self.assertEqual(30.0, self.evaluate(c)) + # Restore the graph with tag "foo". + with self.session(graph=ops.Graph()) as sess: + loader.load(sess, ["foo"], export_dir) + # Read the constant a from the graph. + a = ops.get_default_graph().get_tensor_by_name(constant_5_name) + b = constant_op.constant(6.0) + c = a * b + self.assertEqual(30.0, self.evaluate(c)) - # Restore the graph with tag "bar". - with self.session(graph=ops.Graph()) as sess: - loader.load(sess, ["bar"], export_dir) - # Read the constant a from the graph. - a = ops.get_default_graph().get_tensor_by_name(constant_6_name) - b = constant_op.constant(5.0) - c = a * b - self.assertEqual(30.0, self.evaluate(c)) + # Restore the graph with tag "bar". + with self.session(graph=ops.Graph()) as sess: + loader.load(sess, ["bar"], export_dir) + # Read the constant a from the graph. + a = ops.get_default_graph().get_tensor_by_name(constant_6_name) + b = constant_op.constant(5.0) + c = a * b + self.assertEqual(30.0, self.evaluate(c)) - @test_util.run_deprecated_v1 def testNoOverwrite(self): export_dir = self._get_export_dir("test_no_overwrite") builder = saved_model_builder._SavedModelBuilder(export_dir) - # Graph with a single variable. SavedModel invoked to: - # - add with weights. - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 42) - builder.add_meta_graph_and_variables(sess, ["foo"]) + with ops.Graph().as_default(): + # Graph with a single variable. SavedModel invoked to: + # - add with weights. + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) + builder.add_meta_graph_and_variables(sess, ["foo"]) - # Save the SavedModel to disk in text format. - builder.save(as_text=True) + # Save the SavedModel to disk in text format. + builder.save(as_text=True) - # Restore the graph with tag "foo", whose variables were saved. - with self.session(graph=ops.Graph()) as sess: - loader.load(sess, ["foo"], export_dir) - self.assertEqual( - 42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval()) + # Restore the graph with tag "foo", whose variables were saved. + with self.session(graph=ops.Graph()) as sess: + loader.load(sess, ["foo"], export_dir) + self.assertEqual(42, self._eval("v")) - # An attempt to create another builder with the same export directory should - # result in an assertion error. - self.assertRaises(AssertionError, saved_model_builder._SavedModelBuilder, - export_dir) + # An attempt to create another builder with the same export directory + # should result in an assertion error. + self.assertRaises(AssertionError, saved_model_builder._SavedModelBuilder, + export_dir) - @test_util.run_deprecated_v1 def testSaveAsText(self): export_dir = self._get_export_dir("test_astext") builder = saved_model_builder._SavedModelBuilder(export_dir) - # Graph with a single variable. SavedModel invoked to: - # - add with weights. - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 42) - builder.add_meta_graph_and_variables(sess, ["foo"]) + with ops.Graph().as_default(): + # Graph with a single variable. SavedModel invoked to: + # - add with weights. + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) + builder.add_meta_graph_and_variables(sess, ["foo"]) - # Graph with the same single variable. SavedModel invoked to: - # - simply add the model (weights are not updated). - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 43) - builder.add_meta_graph(["bar"]) + # Graph with the same single variable. SavedModel invoked to: + # - simply add the model (weights are not updated). + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 43) + builder.add_meta_graph(["bar"]) - # Save the SavedModel to disk in text format. - builder.save(as_text=True) + # Save the SavedModel to disk in text format. + builder.save(as_text=True) - # Restore the graph with tag "foo", whose variables were saved. - with self.session(graph=ops.Graph()) as sess: - loader.load(sess, ["foo"], export_dir) - self.assertEqual( - 42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval()) + # Restore the graph with tag "foo", whose variables were saved. + with self.session(graph=ops.Graph()) as sess: + loader.load(sess, ["foo"], export_dir) + self.assertEqual( + 42, + self._eval(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0])) - # Restore the graph with tag "bar", whose variables were not saved. - with self.session(graph=ops.Graph()) as sess: - loader.load(sess, ["bar"], export_dir) - self.assertEqual( - 42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval()) + # Restore the graph with tag "bar", whose variables were not saved. + with self.session(graph=ops.Graph()) as sess: + loader.load(sess, ["bar"], export_dir) + self.assertEqual( + 42, + self._eval(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0])) - @test_util.run_v1_only("b/120545219") def testCollections(self): export_dir = self._get_export_dir("test_collections") builder = saved_model_builder._SavedModelBuilder(export_dir) - # Graph with a single variable added to a collection. SavedModel invoked to: - # - add with weights. - with self.session(graph=ops.Graph()) as sess: - v = variables.VariableV1(42, name="v") - ops.add_to_collection("foo_vars", v) - self.evaluate(variables.global_variables_initializer()) - self.assertEqual(42, self.evaluate(v)) - builder.add_meta_graph_and_variables(sess, ["foo"]) + with ops.Graph().as_default(): + # Graph with a single variable added to a collection. SavedModel invoked + # to: + # - add with weights. + with self.session(graph=ops.Graph()) as sess: + v = variables.VariableV1(42, name="v") + ops.add_to_collection("foo_vars", v) + self.evaluate(variables.global_variables_initializer()) + self.assertEqual(42, self.evaluate(v)) + builder.add_meta_graph_and_variables(sess, ["foo"]) - # Graph with the same single variable added to a different collection. - # SavedModel invoked to: - # - simply add the model (weights are not updated). - with self.session(graph=ops.Graph()) as sess: - v = variables.VariableV1(43, name="v") - ops.add_to_collection("bar_vars", v) - self.evaluate(variables.global_variables_initializer()) - self.assertEqual(43, self.evaluate(v)) - builder.add_meta_graph(["bar"]) + # Graph with the same single variable added to a different collection. + # SavedModel invoked to: + # - simply add the model (weights are not updated). + with self.session(graph=ops.Graph()) as sess: + v = variables.VariableV1(43, name="v") + ops.add_to_collection("bar_vars", v) + self.evaluate(variables.global_variables_initializer()) + self.assertEqual(43, self.evaluate(v)) + builder.add_meta_graph(["bar"]) - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() - # Restore the graph with tag "foo", whose variables were saved. The - # collection 'foo_vars' should contain a single element. The collection - # 'bar_vars' should not be found. - with self.session(graph=ops.Graph()) as sess: - loader.load(sess, ["foo"], export_dir) - collection_foo_vars = ops.get_collection("foo_vars") - self.assertEqual(len(collection_foo_vars), 1) - self.assertEqual(42, collection_foo_vars[0].eval()) + # Restore the graph with tag "foo", whose variables were saved. The + # collection 'foo_vars' should contain a single element. The collection + # 'bar_vars' should not be found. + with self.session(graph=ops.Graph()) as sess: + loader.load(sess, ["foo"], export_dir) + collection_foo_vars = ops.get_collection("foo_vars") + self.assertEqual(len(collection_foo_vars), 1) + self.assertEqual(42, self._eval(collection_foo_vars[0])) - self.assertEqual(len(ops.get_collection("bar_vars")), 0) + self.assertEqual(len(ops.get_collection("bar_vars")), 0) - # Restore the graph with tag "bar", whose variables were not saved. The - # collection-def exported as part of the meta graph def is updated to - # reflect the new collection. The value of the variable in the - # collection-def corresponds to the saved value (from the previous graph - # with tag "foo"). - with self.session(graph=ops.Graph()) as sess: - loader.load(sess, ["bar"], export_dir) - collection_bar_vars = ops.get_collection("bar_vars") - self.assertEqual(len(collection_bar_vars), 1) - self.assertEqual(42, collection_bar_vars[0].eval()) + # Restore the graph with tag "bar", whose variables were not saved. The + # collection-def exported as part of the meta graph def is updated to + # reflect the new collection. The value of the variable in the + # collection-def corresponds to the saved value (from the previous graph + # with tag "foo"). + with self.session(graph=ops.Graph()) as sess: + loader.load(sess, ["bar"], export_dir) + collection_bar_vars = ops.get_collection("bar_vars") + self.assertEqual(len(collection_bar_vars), 1) + self.assertEqual(42, self._eval(collection_bar_vars[0])) - self.assertEqual(len(ops.get_collection("foo_vars")), 0) + self.assertEqual(len(ops.get_collection("foo_vars")), 0) - @test_util.run_deprecated_v1 def testSignatureDefs(self): export_dir = self._get_export_dir("test_signature_defs") builder = saved_model_builder._SavedModelBuilder(export_dir) - # Graph with a single variable and a single entry in the signature def map. - # SavedModel is invoked to add with weights. - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 42) - # Build and populate an empty SignatureDef for testing. - foo_signature = signature_def_utils.build_signature_def(dict(), - dict(), "foo") - builder.add_meta_graph_and_variables( - sess, ["foo"], signature_def_map={"foo_key": foo_signature}) + with ops.Graph().as_default(): + # Graph with a single variable and a single entry in the signature def + # map. SavedModel is invoked to add with weights. + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) + # Build and populate an empty SignatureDef for testing. + foo_signature = signature_def_utils.build_signature_def( + dict(), dict(), "foo") + builder.add_meta_graph_and_variables( + sess, ["foo"], signature_def_map={"foo_key": foo_signature}) - # Graph with the same single variable and multiple entries in the signature - # def map. No weights are saved by SavedModel. - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 43) - # Build and populate a different SignatureDef for testing. - bar_signature = signature_def_utils.build_signature_def(dict(), - dict(), "bar") - # Also, build a different SignatureDef corresponding to "foo_key" defined - # in the previous graph. - foo_new_signature = signature_def_utils.build_signature_def(dict(), - dict(), - "foo_new") - builder.add_meta_graph( - ["bar"], - signature_def_map={ - "bar_key": bar_signature, - "foo_key": foo_new_signature - }) + # Graph with the same single variable and multiple entries in the + # signature def map. No weights are saved by SavedModel. + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 43) + # Build and populate a different SignatureDef for testing. + bar_signature = signature_def_utils.build_signature_def( + dict(), dict(), "bar") + # Also, build a different SignatureDef corresponding to "foo_key" + # defined in the previous graph. + foo_new_signature = signature_def_utils.build_signature_def( + dict(), dict(), "foo_new") + builder.add_meta_graph(["bar"], + signature_def_map={ + "bar_key": bar_signature, + "foo_key": foo_new_signature + }) - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() - # Restore the graph with tag "foo". The single entry in the SignatureDef map - # corresponding to "foo_key" should exist. - with self.session(graph=ops.Graph()) as sess: - foo_graph = loader.load(sess, ["foo"], export_dir) - self.assertEqual( - 42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval()) + # Restore the graph with tag "foo". The single entry in the SignatureDef + # map corresponding to "foo_key" should exist. + with self.session(graph=ops.Graph()) as sess: + foo_graph = loader.load(sess, ["foo"], export_dir) + self.assertEqual( + 42, + self._eval(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0])) - foo_signature = foo_graph.signature_def - self.assertEqual(len(foo_signature), 1) - self.assertEqual("foo", foo_signature["foo_key"].method_name) + foo_signature = foo_graph.signature_def + self.assertEqual(len(foo_signature), 1) + self.assertEqual("foo", foo_signature["foo_key"].method_name) - # Restore the graph with tag "bar". The SignatureDef map should have two - # entries. One corresponding to "bar_key" and another corresponding to the - # new value of "foo_key". - with self.session(graph=ops.Graph()) as sess: - bar_graph = loader.load(sess, ["bar"], export_dir) - self.assertEqual( - 42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval()) + # Restore the graph with tag "bar". The SignatureDef map should have two + # entries. One corresponding to "bar_key" and another corresponding to the + # new value of "foo_key". + with self.session(graph=ops.Graph()) as sess: + bar_graph = loader.load(sess, ["bar"], export_dir) + self.assertEqual( + 42, + self._eval(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0])) - bar_signature = bar_graph.signature_def - self.assertEqual(len(bar_signature), 2) - self.assertEqual("bar", bar_signature["bar_key"].method_name) - self.assertEqual("foo_new", bar_signature["foo_key"].method_name) + bar_signature = bar_graph.signature_def + self.assertEqual(len(bar_signature), 2) + self.assertEqual("bar", bar_signature["bar_key"].method_name) + self.assertEqual("foo_new", bar_signature["foo_key"].method_name) def testSignatureDefValidationFails(self): export_dir = self._get_export_dir("test_signature_def_validation_fail") @@ -615,479 +646,491 @@ class SavedModelTest(SavedModelTestBase): self._validate_sig_def_keys(builder, valid_tensor_info, constants.TRAIN_OP_SIGNATURE_KEY) - @test_util.run_deprecated_v1 def testSignatureDefValidationSucceedsWithName(self): tensor_with_name = meta_graph_pb2.TensorInfo() tensor_with_name.name = "foo" tensor_with_name.dtype = types_pb2.DT_FLOAT - export_dir = self._get_export_dir("test_signature_def_validation_name_1") - builder = saved_model_builder._SavedModelBuilder(export_dir) - self._validate_inputs_tensor_info_accept(builder, tensor_with_name) + with ops.Graph().as_default(): + export_dir = self._get_export_dir("test_signature_def_validation_name_1") + builder = saved_model_builder._SavedModelBuilder(export_dir) + self._validate_inputs_tensor_info_accept(builder, tensor_with_name) - export_dir = self._get_export_dir("test_signature_def_validation_name_2") - builder = saved_model_builder._SavedModelBuilder(export_dir) - self._validate_outputs_tensor_info_accept(builder, tensor_with_name) + export_dir = self._get_export_dir("test_signature_def_validation_name_2") + builder = saved_model_builder._SavedModelBuilder(export_dir) + self._validate_outputs_tensor_info_accept(builder, tensor_with_name) - @test_util.run_deprecated_v1 def testSignatureDefValidationSucceedsWithCoo(self): - tensor_with_coo = meta_graph_pb2.TensorInfo() - # TODO(soergel) test validation of each of the fields of coo_sparse - tensor_with_coo.coo_sparse.values_tensor_name = "foo" - tensor_with_coo.dtype = types_pb2.DT_FLOAT + with ops.Graph().as_default(): + tensor_with_coo = meta_graph_pb2.TensorInfo() + # TODO(soergel) test validation of each of the fields of coo_sparse + tensor_with_coo.coo_sparse.values_tensor_name = "foo" + tensor_with_coo.dtype = types_pb2.DT_FLOAT - export_dir = self._get_export_dir("test_signature_def_validation_coo_1") - builder = saved_model_builder._SavedModelBuilder(export_dir) - self._validate_inputs_tensor_info_accept(builder, tensor_with_coo) + export_dir = self._get_export_dir("test_signature_def_validation_coo_1") + builder = saved_model_builder._SavedModelBuilder(export_dir) + self._validate_inputs_tensor_info_accept(builder, tensor_with_coo) - export_dir = self._get_export_dir("test_signature_def_validation_coo_2") - builder = saved_model_builder._SavedModelBuilder(export_dir) - self._validate_outputs_tensor_info_accept(builder, tensor_with_coo) + export_dir = self._get_export_dir("test_signature_def_validation_coo_2") + builder = saved_model_builder._SavedModelBuilder(export_dir) + self._validate_outputs_tensor_info_accept(builder, tensor_with_coo) - @test_util.run_deprecated_v1 def testSignatureDefValidationSucceedsWithRagged(self): - ragged_tensor = ragged_factory_ops.constant([[1, 2], [3]]) - tensor_with_ragged = utils.build_tensor_info(ragged_tensor) + with ops.Graph().as_default(): + ragged_tensor = ragged_factory_ops.constant([[1, 2], [3]]) + tensor_with_ragged = utils.build_tensor_info(ragged_tensor) - export_dir = self._get_export_dir("test_signature_def_validation_ragged_1") - builder = saved_model_builder._SavedModelBuilder(export_dir) - self._validate_inputs_tensor_info_accept(builder, tensor_with_ragged) + export_dir = self._get_export_dir( + "test_signature_def_validation_ragged_1") + builder = saved_model_builder._SavedModelBuilder(export_dir) + self._validate_inputs_tensor_info_accept(builder, tensor_with_ragged) - export_dir = self._get_export_dir("test_signature_def_validation_ragged_2") - builder = saved_model_builder._SavedModelBuilder(export_dir) - self._validate_outputs_tensor_info_accept(builder, tensor_with_ragged) + export_dir = self._get_export_dir( + "test_signature_def_validation_ragged_2") + builder = saved_model_builder._SavedModelBuilder(export_dir) + self._validate_outputs_tensor_info_accept(builder, tensor_with_ragged) - @test_util.run_deprecated_v1 def testAssets(self): export_dir = self._get_export_dir("test_assets") builder = saved_model_builder._SavedModelBuilder(export_dir) - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 42) + with ops.Graph().as_default(): + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) - # Build an asset collection. - ignored_filepath = os.path.join( - compat.as_bytes(test.get_temp_dir()), compat.as_bytes("ignored.txt")) - file_io.write_string_to_file(ignored_filepath, "will be ignored") + # Build an asset collection. + ignored_filepath = os.path.join( + compat.as_bytes(test.get_temp_dir()), + compat.as_bytes("ignored.txt")) + file_io.write_string_to_file(ignored_filepath, "will be ignored") - asset_list = self._build_asset_collection("hello42.txt", "foo bar baz", - "asset_file_tensor") + asset_list = self._build_asset_collection("hello42.txt", "foo bar baz", + "asset_file_tensor") - builder.add_meta_graph_and_variables( - sess, ["foo"], assets_list=asset_list) + builder.add_meta_graph_and_variables( + sess, ["foo"], assets_list=asset_list) - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() - with self.session(graph=ops.Graph()) as sess: - foo_graph = loader.load(sess, ["foo"], export_dir) - self._validate_assets(export_dir, foo_graph.asset_file_def, "hello42.txt", - "foo bar baz", "asset_file_tensor:0") - ignored_asset_path = os.path.join( - compat.as_bytes(export_dir), - compat.as_bytes(constants.ASSETS_DIRECTORY), - compat.as_bytes("ignored.txt")) - self.assertFalse(file_io.file_exists(ignored_asset_path)) + with self.session(graph=ops.Graph()) as sess: + foo_graph = loader.load(sess, ["foo"], export_dir) + self._validate_assets(export_dir, foo_graph.asset_file_def, + "hello42.txt", "foo bar baz", + "asset_file_tensor:0") + ignored_asset_path = os.path.join( + compat.as_bytes(export_dir), + compat.as_bytes(constants.ASSETS_DIRECTORY), + compat.as_bytes("ignored.txt")) + self.assertFalse(file_io.file_exists(ignored_asset_path)) - @test_util.run_deprecated_v1 def testAssetsNameCollisionDiffFile(self): export_dir = self._get_export_dir("test_assets_name_collision_diff_file") builder = saved_model_builder._SavedModelBuilder(export_dir) - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 42) + with ops.Graph().as_default(): + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) - asset_list = self._build_asset_collection( - "hello42.txt", "foo bar bak", "asset_file_tensor", asset_subdir="1") + asset_list = self._build_asset_collection( + "hello42.txt", "foo bar bak", "asset_file_tensor", asset_subdir="1") - asset_list = self._build_asset_collection( - "hello42.txt", "foo bar baz", "asset_file_tensor_1", asset_subdir="2") + asset_list = self._build_asset_collection( + "hello42.txt", + "foo bar baz", + "asset_file_tensor_1", + asset_subdir="2") - builder.add_meta_graph_and_variables( - sess, ["foo"], assets_list=asset_list) + builder.add_meta_graph_and_variables( + sess, ["foo"], assets_list=asset_list) - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() - with self.session(graph=ops.Graph()) as sess: - foo_graph = loader.load(sess, ["foo"], export_dir) - self._validate_assets(export_dir, foo_graph.asset_file_def, "hello42.txt", - "foo bar bak", "asset_file_tensor:0") - self._validate_assets( - export_dir, - foo_graph.asset_file_def, - "hello42.txt_1", - "foo bar baz", - "asset_file_tensor_1:0", - asset_id=1) + with self.session(graph=ops.Graph()) as sess: + foo_graph = loader.load(sess, ["foo"], export_dir) + self._validate_assets(export_dir, foo_graph.asset_file_def, + "hello42.txt", "foo bar bak", + "asset_file_tensor:0") + self._validate_assets( + export_dir, + foo_graph.asset_file_def, + "hello42.txt_1", + "foo bar baz", + "asset_file_tensor_1:0", + asset_id=1) - @test_util.run_deprecated_v1 def testAssetsNameCollisionSameFilepath(self): export_dir = self._get_export_dir("test_assets_name_collision_same_path") builder = saved_model_builder._SavedModelBuilder(export_dir) - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 42) + with ops.Graph().as_default(): + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) - asset_list = self._build_asset_collection("hello42.txt", "foo bar baz", - "asset_file_tensor") + asset_list = self._build_asset_collection("hello42.txt", "foo bar baz", + "asset_file_tensor") - asset_list = self._build_asset_collection("hello42.txt", "foo bar baz", - "asset_file_tensor_1") + asset_list = self._build_asset_collection("hello42.txt", "foo bar baz", + "asset_file_tensor_1") - builder.add_meta_graph_and_variables( - sess, ["foo"], assets_list=asset_list) + builder.add_meta_graph_and_variables( + sess, ["foo"], assets_list=asset_list) - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() - with self.session(graph=ops.Graph()) as sess: - foo_graph = loader.load(sess, ["foo"], export_dir) - self._validate_assets(export_dir, foo_graph.asset_file_def, "hello42.txt", - "foo bar baz", "asset_file_tensor:0") - # The second tensor should be recorded, but the same. - self._validate_assets( - export_dir, - foo_graph.asset_file_def, - "hello42.txt", - "foo bar baz", - "asset_file_tensor_1:0", - asset_id=1) - ignored_asset_path = os.path.join( - compat.as_bytes(export_dir), - compat.as_bytes(constants.ASSETS_DIRECTORY), - compat.as_bytes("hello42.txt_1")) - self.assertFalse(file_io.file_exists(ignored_asset_path)) + with self.session(graph=ops.Graph()) as sess: + foo_graph = loader.load(sess, ["foo"], export_dir) + self._validate_assets(export_dir, foo_graph.asset_file_def, + "hello42.txt", "foo bar baz", + "asset_file_tensor:0") + # The second tensor should be recorded, but the same. + self._validate_assets( + export_dir, + foo_graph.asset_file_def, + "hello42.txt", + "foo bar baz", + "asset_file_tensor_1:0", + asset_id=1) + ignored_asset_path = os.path.join( + compat.as_bytes(export_dir), + compat.as_bytes(constants.ASSETS_DIRECTORY), + compat.as_bytes("hello42.txt_1")) + self.assertFalse(file_io.file_exists(ignored_asset_path)) - @test_util.run_deprecated_v1 def testAssetsNameCollisionSameFile(self): export_dir = self._get_export_dir("test_assets_name_collision_same_file") builder = saved_model_builder._SavedModelBuilder(export_dir) - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 42) + with ops.Graph().as_default(): + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) - asset_list = self._build_asset_collection( - "hello42.txt", "foo bar baz", "asset_file_tensor", asset_subdir="1") + asset_list = self._build_asset_collection( + "hello42.txt", "foo bar baz", "asset_file_tensor", asset_subdir="1") - asset_list = self._build_asset_collection( - "hello42.txt", "foo bar baz", "asset_file_tensor_1", asset_subdir="2") + asset_list = self._build_asset_collection( + "hello42.txt", + "foo bar baz", + "asset_file_tensor_1", + asset_subdir="2") - builder.add_meta_graph_and_variables( - sess, ["foo"], assets_list=asset_list) + builder.add_meta_graph_and_variables( + sess, ["foo"], assets_list=asset_list) - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() - with self.session(graph=ops.Graph()) as sess: - foo_graph = loader.load(sess, ["foo"], export_dir) - self._validate_assets(export_dir, foo_graph.asset_file_def, "hello42.txt", - "foo bar baz", "asset_file_tensor:0") - # The second tensor should be recorded, but the same. - self._validate_assets( - export_dir, - foo_graph.asset_file_def, - "hello42.txt", - "foo bar baz", - "asset_file_tensor_1:0", - asset_id=1) - ignored_asset_path = os.path.join( - compat.as_bytes(export_dir), - compat.as_bytes(constants.ASSETS_DIRECTORY), - compat.as_bytes("hello42.txt_1")) - self.assertFalse(file_io.file_exists(ignored_asset_path)) + with self.session(graph=ops.Graph()) as sess: + foo_graph = loader.load(sess, ["foo"], export_dir) + self._validate_assets(export_dir, foo_graph.asset_file_def, + "hello42.txt", "foo bar baz", + "asset_file_tensor:0") + # The second tensor should be recorded, but the same. + self._validate_assets( + export_dir, + foo_graph.asset_file_def, + "hello42.txt", + "foo bar baz", + "asset_file_tensor_1:0", + asset_id=1) + ignored_asset_path = os.path.join( + compat.as_bytes(export_dir), + compat.as_bytes(constants.ASSETS_DIRECTORY), + compat.as_bytes("hello42.txt_1")) + self.assertFalse(file_io.file_exists(ignored_asset_path)) - @test_util.run_deprecated_v1 def testAssetsNameCollisionManyFiles(self): export_dir = self._get_export_dir("test_assets_name_collision_many_files") builder = saved_model_builder._SavedModelBuilder(export_dir) - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 42) + with ops.Graph().as_default(): + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) - for i in range(5): - idx = str(i) - asset_list = self._build_asset_collection( - "hello42.txt", - "foo bar baz " + idx, - "asset_file_tensor_" + idx, - asset_subdir=idx) + for i in range(5): + idx = str(i) + asset_list = self._build_asset_collection( + "hello42.txt", + "foo bar baz " + idx, + "asset_file_tensor_" + idx, + asset_subdir=idx) - builder.add_meta_graph_and_variables( - sess, ["foo"], assets_list=asset_list) + builder.add_meta_graph_and_variables( + sess, ["foo"], assets_list=asset_list) - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() - with self.session(graph=ops.Graph()) as sess: - foo_graph = loader.load(sess, ["foo"], export_dir) - for i in range(1, 5): - idx = str(i) - self._validate_assets( - export_dir, - foo_graph.asset_file_def, - "hello42.txt_" + idx, - "foo bar baz " + idx, - "asset_file_tensor_{}:0".format(idx), - asset_id=i) + with self.session(graph=ops.Graph()) as sess: + foo_graph = loader.load(sess, ["foo"], export_dir) + for i in range(1, 5): + idx = str(i) + self._validate_assets( + export_dir, + foo_graph.asset_file_def, + "hello42.txt_" + idx, + "foo bar baz " + idx, + "asset_file_tensor_{}:0".format(idx), + asset_id=i) - self._validate_assets(export_dir, foo_graph.asset_file_def, "hello42.txt", - "foo bar baz 0", "asset_file_tensor_0:0") + self._validate_assets(export_dir, foo_graph.asset_file_def, + "hello42.txt", "foo bar baz 0", + "asset_file_tensor_0:0") - @test_util.run_v1_only("b/120545219") def testCustomInitOp(self): export_dir = self._get_export_dir("test_main_op") builder = saved_model_builder._SavedModelBuilder(export_dir) - with self.session(graph=ops.Graph()) as sess: - # Add `v1` and `v2` variables to the graph. - v1 = variables.VariableV1(1, name="v1") - ops.add_to_collection("v", v1) - v2 = variables.VariableV1(2, name="v2") - ops.add_to_collection("v", v2) + with ops.Graph().as_default(): + with self.session(graph=ops.Graph()) as sess: + # Add `v1` and `v2` variables to the graph. + v1 = variables.VariableV1(1, name="v1") + v2 = variables.VariableV1(2, name="v2") - # Initialize another variable `v3` to 42. - v3 = variables.VariableV1(42, name="v3") - ops.add_to_collection("v", v3) + # Initialize another variable `v3` to 42. + v3 = variables.VariableV1(42, name="v3") - # Set up an assignment op to be run as part of the main_op. - with ops.control_dependencies([main_op.main_op()]): - add_v1_v2 = math_ops.add(v1._ref(), v2._ref()) - custom_init_op = control_flow_ops.group(state_ops.assign(v3, add_v1_v2)) + # Set up an assignment op to be run as part of the main_op. + with ops.control_dependencies([main_op.main_op()]): + add_v1_v2 = math_ops.add(v1, v2) + custom_init_op = control_flow_ops.group( + state_ops.assign(v3, add_v1_v2)) - self.evaluate(custom_init_op) - builder.add_meta_graph_and_variables( - sess, ["foo"], init_op=custom_init_op) + self.evaluate(variables.global_variables_initializer()) + self.evaluate(custom_init_op) + builder.add_meta_graph_and_variables( + sess, ["foo"], init_op=custom_init_op) - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() - with self.session(graph=ops.Graph()) as sess: - loader.load(sess, ["foo"], export_dir) - self.assertEqual(1, ops.get_collection("v")[0].eval()) - self.assertEqual(2, ops.get_collection("v")[1].eval()) - # Evaluates to the sum of the first two variables and assigned as part of - # the main_op, following a restore. - self.assertEqual(3, ops.get_collection("v")[2].eval()) + with self.session(graph=ops.Graph()) as sess: + loader.load(sess, ["foo"], export_dir) + self.assertEqual(1, self._eval("v1")) + self.assertEqual(2, self._eval("v2")) + # Evaluates to the sum of the first two variables and assigned as part + # of the main_op, following a restore. + self.assertEqual(3, self._eval("v3")) - @test_util.run_v1_only("b/120545219") def testTrainOp(self): export_dir = self._get_export_dir("test_train_op") builder = saved_model_builder._SavedModelBuilder(export_dir) - with self.session(graph=ops.Graph()) as sess: - # Add `v1` and `v2` variables to the graph. - v1 = variables.VariableV1(1, name="v1") - ops.add_to_collection("v", v1) - v2 = variables.VariableV1(2, name="v2") - ops.add_to_collection("v", v2) + with ops.Graph().as_default(): + with self.session(graph=ops.Graph()) as sess: + # Add `v1` and `v2` variables to the graph. + v1 = variables.VariableV1(1, name="v1") + v2 = variables.VariableV1(2, name="v2") - self.evaluate(variables.global_variables_initializer()) - train_op = state_ops.assign_add(v1, v2) + self.evaluate(variables.global_variables_initializer()) + train_op = state_ops.assign_add(v1, v2) - self.evaluate(train_op) - builder.add_meta_graph_and_variables(sess, ["foo"], train_op=train_op) + self.evaluate(train_op) + builder.add_meta_graph_and_variables(sess, ["foo"], train_op=train_op) - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() - with self.session(graph=ops.Graph()) as sess: - meta_graph_def = loader.load(sess, ["foo"], export_dir) - self.assertEqual(3, ops.get_collection("v")[0].eval()) - self.assertEqual(2, ops.get_collection("v")[1].eval()) - self.assertIsInstance( - loader_impl.get_train_op(meta_graph_def), ops.Tensor) + with self.session(graph=ops.Graph()) as sess: + meta_graph_def = loader.load(sess, ["foo"], export_dir) + self.assertEqual(3, self._eval("v1")) + self.assertEqual(2, self._eval("v2")) + if variable_scope.resource_variables_enabled(): + self.assertEqual( + loader_impl.get_train_op(meta_graph_def).type, + "AssignAddVariableOp") + else: + self.assertIsInstance( + loader_impl.get_train_op(meta_graph_def), ops.Tensor) - @test_util.run_v1_only("b/120545219") def testTrainOpGroup(self): export_dir = self._get_export_dir("test_train_op_group") builder = saved_model_builder._SavedModelBuilder(export_dir) - with self.session(graph=ops.Graph()) as sess: - # Add `v1` and `v2` variables to the graph. - v1 = variables.VariableV1(1, name="v1") - ops.add_to_collection("v", v1) - v2 = variables.VariableV1(2, name="v2") - ops.add_to_collection("v", v2) + with ops.Graph().as_default(): + with self.session(graph=ops.Graph()) as sess: + # Add `v1` and `v2` variables to the graph. + variables.VariableV1(1, name="v1") + variables.VariableV1(2, name="v2") - self.evaluate(variables.global_variables_initializer()) - train_op = control_flow_ops.group() + self.evaluate(variables.global_variables_initializer()) + train_op = control_flow_ops.group() - self.evaluate(train_op) - builder.add_meta_graph_and_variables(sess, ["foo"], train_op=train_op) + self.evaluate(train_op) + builder.add_meta_graph_and_variables(sess, ["foo"], train_op=train_op) - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() - with self.session(graph=ops.Graph()) as sess: - meta_graph_def = loader.load(sess, ["foo"], export_dir) - self.assertEqual(1, ops.get_collection("v")[0].eval()) - self.assertEqual(2, ops.get_collection("v")[1].eval()) - self.assertIsInstance( - loader_impl.get_train_op(meta_graph_def), ops.Operation) + with self.session(graph=ops.Graph()) as sess: + meta_graph_def = loader.load(sess, ["foo"], export_dir) + self.assertEqual(1, self._eval("v1")) + self.assertEqual(2, self._eval("v2")) + self.assertIsInstance( + loader_impl.get_train_op(meta_graph_def), ops.Operation) - @test_util.run_v1_only("b/120545219") def testTrainOpAfterVariables(self): export_dir = self._get_export_dir("test_train_op_after_variables") builder = saved_model_builder._SavedModelBuilder(export_dir) - with self.session(graph=ops.Graph()) as sess: - # Add `v1` and `v2` variables to the graph. - v1 = variables.VariableV1(1, name="v1") - ops.add_to_collection("v", v1) - v2 = variables.VariableV1(2, name="v2") - ops.add_to_collection("v", v2) + with ops.Graph().as_default(): + with self.session(graph=ops.Graph()) as sess: + # Add `v1` and `v2` variables to the graph. + v1 = variables.VariableV1(1, name="v1") + v2 = variables.VariableV1(2, name="v2") - self.evaluate(variables.global_variables_initializer()) - builder.add_meta_graph_and_variables(sess, ["pre_foo"]) + self.evaluate(variables.global_variables_initializer()) + builder.add_meta_graph_and_variables(sess, ["pre_foo"]) - train_op = state_ops.assign_add(v1, v2) - self.evaluate(train_op) - builder.add_meta_graph(["foo"], train_op=train_op) + train_op = state_ops.assign_add(v1, v2) + self.evaluate(train_op) + builder.add_meta_graph(["foo"], train_op=train_op) - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() - with self.session(graph=ops.Graph()) as sess: - meta_graph_def = loader.load(sess, ["foo"], export_dir) - self.assertIsInstance( - loader_impl.get_train_op(meta_graph_def), ops.Tensor) + with self.session(graph=ops.Graph()) as sess: + meta_graph_def = loader.load(sess, ["foo"], export_dir) + if variable_scope.resource_variables_enabled(): + self.assertEqual( + loader_impl.get_train_op(meta_graph_def).type, + "AssignAddVariableOp") + else: + self.assertIsInstance( + loader_impl.get_train_op(meta_graph_def), ops.Tensor) - with self.session(graph=ops.Graph()) as sess: - loader.load(sess, ["pre_foo"], export_dir) - self.assertFalse(ops.get_collection(constants.TRAIN_OP_KEY)) + with self.session(graph=ops.Graph()) as sess: + loader.load(sess, ["pre_foo"], export_dir) + self.assertFalse(ops.get_collection(constants.TRAIN_OP_KEY)) - @test_util.run_deprecated_v1 def testMultipleAssets(self): export_dir = self._get_export_dir("test_multiple_assets") builder = saved_model_builder._SavedModelBuilder(export_dir) - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 42) + with ops.Graph().as_default(): + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) - # Build an asset collection specific to `foo` graph. - asset_list = self._build_asset_collection("foo.txt", "content_foo", - "asset_file_tensor") + # Build an asset collection specific to `foo` graph. + asset_list = self._build_asset_collection("foo.txt", "content_foo", + "asset_file_tensor") - # Add the asset collection as part of the graph with tag "foo". - builder.add_meta_graph_and_variables( - sess, ["foo"], assets_list=asset_list) + # Add the asset collection as part of the graph with tag "foo". + builder.add_meta_graph_and_variables( + sess, ["foo"], assets_list=asset_list) - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 42) + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) - # Build an asset collection specific to `bar` graph. - asset_list = self._build_asset_collection("bar.txt", "content_bar", - "asset_file_tensor") + # Build an asset collection specific to `bar` graph. + asset_list = self._build_asset_collection("bar.txt", "content_bar", + "asset_file_tensor") - # Add the asset collection as part of the graph with tag "bar". - builder.add_meta_graph(["bar"], assets_list=asset_list) + # Add the asset collection as part of the graph with tag "bar". + builder.add_meta_graph(["bar"], assets_list=asset_list) - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() - # Check assets restored for graph with tag "foo". - with self.session(graph=ops.Graph()) as sess: - foo_graph = loader.load(sess, ["foo"], export_dir) - self._validate_assets(export_dir, foo_graph.asset_file_def, "foo.txt", - "content_foo", "asset_file_tensor:0") + # Check assets restored for graph with tag "foo". + with self.session(graph=ops.Graph()) as sess: + foo_graph = loader.load(sess, ["foo"], export_dir) + self._validate_assets(export_dir, foo_graph.asset_file_def, "foo.txt", + "content_foo", "asset_file_tensor:0") - # Check assets restored for graph with tag "bar". - with self.session(graph=ops.Graph()) as sess: - bar_graph = loader.load(sess, ["bar"], export_dir) - self._validate_assets(export_dir, bar_graph.asset_file_def, "bar.txt", - "content_bar", "asset_file_tensor:0") + # Check assets restored for graph with tag "bar". + with self.session(graph=ops.Graph()) as sess: + bar_graph = loader.load(sess, ["bar"], export_dir) + self._validate_assets(export_dir, bar_graph.asset_file_def, "bar.txt", + "content_bar", "asset_file_tensor:0") - @test_util.run_deprecated_v1 def testDuplicateAssets(self): export_dir = self._get_export_dir("test_duplicate_assets") builder = saved_model_builder._SavedModelBuilder(export_dir) - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 42) + with ops.Graph().as_default(): + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) - # Build an asset collection with `foo.txt` that has `foo` specific - # content. - asset_list = self._build_asset_collection("foo.txt", "content_foo", - "asset_file_tensor") + # Build an asset collection with `foo.txt` that has `foo` specific + # content. + asset_list = self._build_asset_collection("foo.txt", "content_foo", + "asset_file_tensor") - # Add the asset collection as part of the graph with tag "foo". - builder.add_meta_graph_and_variables( - sess, ["foo"], assets_list=asset_list) + # Add the asset collection as part of the graph with tag "foo". + builder.add_meta_graph_and_variables( + sess, ["foo"], assets_list=asset_list) - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 42) + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) - # Build an asset collection with `foo.txt` that has `bar` specific - # content. - asset_list = self._build_asset_collection("foo.txt", "content_bar", - "asset_file_tensor") + # Build an asset collection with `foo.txt` that has `bar` specific + # content. + asset_list = self._build_asset_collection("foo.txt", "content_bar", + "asset_file_tensor") - # Add the asset collection as part of the graph with tag "bar". - builder.add_meta_graph(["bar"], assets_list=asset_list) + # Add the asset collection as part of the graph with tag "bar". + builder.add_meta_graph(["bar"], assets_list=asset_list) - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() - # Check assets restored for graph with tag "foo". - with self.session(graph=ops.Graph()) as sess: - foo_graph = loader.load(sess, ["foo"], export_dir) - self._validate_assets(export_dir, foo_graph.asset_file_def, "foo.txt", - "content_foo", "asset_file_tensor:0") + # Check assets restored for graph with tag "foo". + with self.session(graph=ops.Graph()) as sess: + foo_graph = loader.load(sess, ["foo"], export_dir) + self._validate_assets(export_dir, foo_graph.asset_file_def, "foo.txt", + "content_foo", "asset_file_tensor:0") - # Check assets restored for graph with tag "bar". - with self.session(graph=ops.Graph()) as sess: - bar_graph = loader.load(sess, ["bar"], export_dir) + # Check assets restored for graph with tag "bar". + with self.session(graph=ops.Graph()) as sess: + bar_graph = loader.load(sess, ["bar"], export_dir) - # Validate the assets for `bar` graph. `foo.txt` should contain the - # original contents corresponding to `foo` graph since an asset with the - # same name across multiple graphs is only stored the first time - self._validate_assets(export_dir, bar_graph.asset_file_def, "foo.txt", - "content_foo", "asset_file_tensor:0") + # Validate the assets for `bar` graph. `foo.txt` should contain the + # original contents corresponding to `foo` graph since an asset with the + # same name across multiple graphs is only stored the first time + self._validate_assets(export_dir, bar_graph.asset_file_def, "foo.txt", + "content_foo", "asset_file_tensor:0") - @test_util.run_v1_only("b/120545219") def testOp(self): export_dir = self._get_export_dir("test_op") builder = saved_model_builder._SavedModelBuilder(export_dir) - with session.Session( - graph=ops.Graph(), - config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess: - with sess.graph.device("/cpu:0"): - v1 = variables.VariableV1(1, name="v1") - with sess.graph.device("/cpu:1"): - v2 = variables.VariableV1(2, name="v2") + with ops.Graph().as_default(): + with session.Session( + graph=ops.Graph(), + config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess: + with sess.graph.device("/cpu:0"): + v1 = variables.VariableV1(1, name="v1") + with sess.graph.device("/cpu:1"): + v2 = variables.VariableV1(2, name="v2") - # v3 is an unsaved variable derived from v1 and v2. It is used to - # exercise the ability to run an init op when restoring a graph. - v3 = variables.VariableV1(1, name="v3", trainable=False, collections=[]) - assign_v3 = state_ops.assign(v3, math_ops.add(v1, v2)) - init_op = control_flow_ops.group(assign_v3, name="init_op") + # v3 is an unsaved variable derived from v1 and v2. It is used to + # exercise the ability to run an init op when restoring a graph. + v3 = variables.VariableV1(1, name="v3", trainable=False, collections=[]) + assign_v3 = state_ops.assign(v3, math_ops.add(v1, v2)) + control_flow_ops.group(assign_v3, name="init_op") - ops.add_to_collection("v", v1) - ops.add_to_collection("v", v2) - ops.add_to_collection("v", v3) - ops.add_to_collection("init_op", init_op) + self.evaluate(variables.global_variables_initializer()) + self.assertEqual(1, self._eval("v1")) + self.assertEqual(2, self._eval("v2")) - self.evaluate(variables.global_variables_initializer()) - self.assertEqual(1, ops.get_collection("v")[0].eval()) - self.assertEqual(2, ops.get_collection("v")[1].eval()) + builder.add_meta_graph_and_variables(sess, ["foo"]) - builder.add_meta_graph_and_variables(sess, ["foo"]) + # Save the SavedModel to disk. + builder.save() - # Save the SavedModel to disk. - builder.save() + with session.Session( + graph=ops.Graph(), + config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess: + loader.load(sess, ["foo"], export_dir) - with session.Session( - graph=ops.Graph(), - config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess: - loader.load(sess, ["foo"], export_dir) - - # Validate variables, run the init op and verify result. - self.assertEqual(1, ops.get_collection("v")[0].eval()) - self.assertEqual(2, ops.get_collection("v")[1].eval()) - ops.get_collection("init_op")[0].run() - self.assertEqual(3, ops.get_collection("v")[2].eval()) + # Validate variables, run the init op and verify result. + self.assertEqual(1, self._eval("v1")) + self.assertEqual(2, self._eval("v2")) + sess.run("init_op") + self.assertEqual(3, self._eval("v3")) def testCustomSaveable(self): export_dir = self._get_export_dir("custom_saveable") @@ -1118,21 +1161,20 @@ class SavedModelTest(SavedModelTestBase): self.assertEqual(b"k1", v1.keys().eval()) self.assertEqual(3.0, v1.values().eval()) - @test_util.run_deprecated_v1 def testCustomSaver(self): export_dir = self._get_export_dir("test_custom_saver") builder = saved_model_builder._SavedModelBuilder(export_dir) - with self.session(graph=ops.Graph()) as sess: - variables.VariableV1(1, name="v1") - self.evaluate(variables.global_variables_initializer()) - custom_saver = training.Saver(name="my_saver") - builder.add_meta_graph_and_variables(sess, ["tag"], saver=custom_saver) - - # Save the SavedModel to disk. - builder.save() - with ops.Graph().as_default() as graph: + with self.session(graph=ops.Graph()) as sess: + variables.VariableV1(1, name="v1") + self.evaluate(variables.global_variables_initializer()) + custom_saver = training.Saver(name="my_saver") + builder.add_meta_graph_and_variables(sess, ["tag"], saver=custom_saver) + + # Save the SavedModel to disk. + builder.save() + with self.session(graph=graph) as sess: saved_graph = loader.load(sess, ["tag"], export_dir) graph_ops = [x.name for x in graph.get_operations()] @@ -1141,21 +1183,20 @@ class SavedModelTest(SavedModelTestBase): self.assertEqual( saved_graph.saver_def.restore_op_name, "my_saver/restore_all") - @test_util.run_deprecated_v1 def testNoCustomSaver(self): export_dir = self._get_export_dir("test_no_custom_saver") builder = saved_model_builder._SavedModelBuilder(export_dir) - with self.session(graph=ops.Graph()) as sess: - variables.VariableV1(1, name="v1") - self.evaluate(variables.global_variables_initializer()) - training.Saver(name="my_saver") - builder.add_meta_graph_and_variables(sess, ["tag"]) - - # Save the SavedModel to disk. - builder.save() - with ops.Graph().as_default() as graph: + with self.session(graph=ops.Graph()) as sess: + variables.VariableV1(1, name="v1") + self.evaluate(variables.global_variables_initializer()) + training.Saver(name="my_saver") + builder.add_meta_graph_and_variables(sess, ["tag"]) + + # Save the SavedModel to disk. + builder.save() + with self.session(graph=graph) as sess: saved_graph = loader.load(sess, ["tag"], export_dir) graph_ops = [x.name for x in graph.get_operations()] @@ -1164,24 +1205,24 @@ class SavedModelTest(SavedModelTestBase): self.assertEqual( saved_graph.saver_def.restore_op_name, "save/restore_all") - @test_util.run_deprecated_v1 def testMultipleCustomSavers(self): export_dir = self._get_export_dir("test_multiple_custom_savers") builder = saved_model_builder._SavedModelBuilder(export_dir) - with self.session(graph=ops.Graph()) as sess: - variables.VariableV1(1, name="v1") - self.evaluate(variables.global_variables_initializer()) - builder.add_meta_graph_and_variables(sess, ["tag_0"]) + with ops.Graph().as_default(): + with self.session(graph=ops.Graph()) as sess: + variables.VariableV1(1, name="v1") + self.evaluate(variables.global_variables_initializer()) + builder.add_meta_graph_and_variables(sess, ["tag_0"]) - saver_1 = training.Saver() - builder.add_meta_graph(["tag_1"], saver=saver_1) + saver_1 = training.Saver() + builder.add_meta_graph(["tag_1"], saver=saver_1) - saver_2 = training.Saver() - builder.add_meta_graph(["tag_2"], saver=saver_2) + saver_2 = training.Saver() + builder.add_meta_graph(["tag_2"], saver=saver_2) - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() def _validate_custom_saver(tag_name, saver_name): with ops.Graph().as_default() as graph: @@ -1195,82 +1236,78 @@ class SavedModelTest(SavedModelTestBase): _validate_custom_saver("tag_1", "save_1/restore_all") _validate_custom_saver("tag_2", "save_2/restore_all") - @test_util.run_deprecated_v1 def testImportScope(self): export_dir = self._get_export_dir("test_scoped_assets") builder = saved_model_builder._SavedModelBuilder(export_dir) - # Build a SavedModel with a variable, an asset, and a constant tensor. - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 42) - asset_list = self._build_asset_collection("foo.txt", "content_foo", - "asset_file_tensor") - constant_op.constant("constant value", name="constant_tensor_name") - builder.add_meta_graph_and_variables( - sess, ["tag_name"], assets_list=asset_list) + with ops.Graph().as_default(): + # Build a SavedModel with a variable, an asset, and a constant tensor. + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) + asset_list = self._build_asset_collection("foo.txt", "content_foo", + "asset_file_tensor") + constant_op.constant("constant value", name="constant_tensor_name") + builder.add_meta_graph_and_variables( + sess, ["tag_name"], assets_list=asset_list) - # Save the asset file path for later comparison. - asset_file_path = asset_list[0].eval() + # Save the asset file path for later comparison. + asset_file_path = asset_list[0].eval() - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() - with self.session(graph=ops.Graph()) as sess: - # Restore the SavedModel under an import_scope in a new graph/session. - graph_proto = loader.load( - sess, ["tag_name"], export_dir, import_scope="scope_name") + with self.session(graph=ops.Graph()) as sess: + # Restore the SavedModel under an import_scope in a new graph/session. + graph_proto = loader.load( + sess, ["tag_name"], export_dir, import_scope="scope_name") - # The loaded variable tensor should be scoped, but its contents should be - # unchanged. - self.assertEqual( - "scope_name/v:0", - ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].name) - self.assertEqual( - 42, - ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval()) + # The loaded variable tensor should be scoped, but its contents should + # be unchanged. + self.assertEqual( + "scope_name/v:0", + ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].name) + self.assertEqual(42, self._eval("scope_name/v")) - # The loaded asset tensor should be scoped, but the asset file path and - # contents should be unchanged. - asset_list = ops.get_collection(ops.GraphKeys.ASSET_FILEPATHS) - self.assertEqual(1, len(asset_list)) - self.assertEqual(asset_file_path, asset_list[0].eval()) - self.assertEqual("scope_name/asset_file_tensor:0", asset_list[0].name) - # The static asset data inside graph_proto.collection_def should not be - # scoped. - self._validate_assets(export_dir, graph_proto.asset_file_def, "foo.txt", - "content_foo", "asset_file_tensor:0") + # The loaded asset tensor should be scoped, but the asset file path and + # contents should be unchanged. + asset_list = ops.get_collection(ops.GraphKeys.ASSET_FILEPATHS) + self.assertEqual(1, len(asset_list)) + self.assertEqual(asset_file_path, asset_list[0].eval()) + self.assertEqual("scope_name/asset_file_tensor:0", asset_list[0].name) + # The static asset data inside graph_proto.collection_def should not be + # scoped. + self._validate_assets(export_dir, graph_proto.asset_file_def, "foo.txt", + "content_foo", "asset_file_tensor:0") - # The constant tensor should be scoped, but its contents should be - # unchanged. - self.assertEqual( - compat.as_bytes("constant value"), - ops.get_default_graph().get_tensor_by_name( - "scope_name/constant_tensor_name:0").eval()) + # The constant tensor should be scoped, but its contents should be + # unchanged. + self.assertEqual( + compat.as_bytes("constant value"), + ops.get_default_graph().get_tensor_by_name( + "scope_name/constant_tensor_name:0").eval()) - @test_util.run_deprecated_v1 def testClearDevices(self): export_dir = self._get_export_dir("test_clear_devices") builder = saved_model_builder._SavedModelBuilder(export_dir) - # Specify a device and save a variable. - ops.reset_default_graph() - with session.Session( - target="", - config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess: - with sess.graph.device("/cpu:0"): - self._init_and_validate_variable(sess, "v", 42) - builder.add_meta_graph_and_variables( - sess, [tag_constants.TRAINING], clear_devices=True) + with ops.Graph().as_default(): + # Specify a device and save a variable. + with session.Session( + target="", + config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess: + with sess.graph.device("/cpu:0"): + self._init_and_validate_variable(sess, "v", 42) + builder.add_meta_graph_and_variables( + sess, [tag_constants.TRAINING], clear_devices=True) - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() - # Restore the graph with a single predefined tag whose variables were saved - # without any device information. - with self.session(graph=ops.Graph()) as sess: - loader.load(sess, [tag_constants.TRAINING], export_dir) - self.assertEqual( - 42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval()) + # Restore the graph with a single predefined tag whose variables were + # saved without any device information. + with self.session(graph=ops.Graph()) as sess: + loader.load(sess, [tag_constants.TRAINING], export_dir) + self.assertEqual(42, self._eval("v")) # Tests the behavior of loading SavedModels that having missing attrs or attrs # with incorrect types. @@ -1361,47 +1398,47 @@ class SavedModelV1Test(SavedModelTestBase): self.assertEqual(expected_asset_file_name, asset.filename) self.assertEqual(expected_asset_tensor_name, asset.tensor_info.name) - @test_util.run_deprecated_v1 def testWritingAssetsToCollection(self): export_dir = self._get_export_dir("test_writing_assets_to_collection") builder = saved_model_builder.SavedModelBuilder(export_dir) - with self.session(graph=ops.Graph()) as sess: - self._init_and_validate_variable(sess, "v", 42) + with ops.Graph().as_default(): + with self.session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) - # Build an asset list. - ignored_filepath = os.path.join( - compat.as_bytes(test.get_temp_dir()), compat.as_bytes("ignored.txt")) - file_io.write_string_to_file(ignored_filepath, "will be ignored") + # Build an asset list. + ignored_filepath = os.path.join( + compat.as_bytes(test.get_temp_dir()), + compat.as_bytes("ignored.txt")) + file_io.write_string_to_file(ignored_filepath, "will be ignored") - asset_collection = self._build_asset_collection( - "hello42.txt", "foo bar baz", "asset_file_tensor") + asset_collection = self._build_asset_collection("hello42.txt", + "foo bar baz", + "asset_file_tensor") - builder.add_meta_graph_and_variables( - sess, ["foo"], assets_collection=asset_collection) + builder.add_meta_graph_and_variables( + sess, ["foo"], assets_collection=asset_collection) - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() - with self.session(graph=ops.Graph()) as sess: - foo_graph = loader.load(sess, ["foo"], export_dir) - self._validate_asset_collection(export_dir, foo_graph.collection_def, - "hello42.txt", "foo bar baz", - "asset_file_tensor:0") - ignored_asset_path = os.path.join( - compat.as_bytes(export_dir), - compat.as_bytes(constants.ASSETS_DIRECTORY), - compat.as_bytes("ignored.txt")) - self.assertFalse(file_io.file_exists(ignored_asset_path)) + with self.session(graph=ops.Graph()) as sess: + foo_graph = loader.load(sess, ["foo"], export_dir) + self._validate_asset_collection(export_dir, foo_graph.collection_def, + "hello42.txt", "foo bar baz", + "asset_file_tensor:0") + ignored_asset_path = os.path.join( + compat.as_bytes(export_dir), + compat.as_bytes(constants.ASSETS_DIRECTORY), + compat.as_bytes("ignored.txt")) + self.assertFalse(file_io.file_exists(ignored_asset_path)) - @test_util.run_deprecated_v1 def testLegacyInitOpWithNonEmptyCollection(self): export_dir = self._get_export_dir( "test_legacy_init_op_with_non_empty_collection") self._testInitOpsWithNonEmptyCollection(export_dir, constants.LEGACY_INIT_OP_KEY) - @test_util.run_deprecated_v1 def testMainOpWithNonEmptyCollection(self): export_dir = self._get_export_dir("test_main_op_with_non_empty_collection") self._testInitOpsWithNonEmptyCollection(export_dir, constants.MAIN_OP_KEY) @@ -1409,31 +1446,32 @@ class SavedModelV1Test(SavedModelTestBase): def _testInitOpsWithNonEmptyCollection(self, export_dir, key): builder = saved_model_builder.SavedModelBuilder(export_dir) - g = ops.Graph() - with self.session(graph=g) as sess: - # Initialize variable `v1` to 1. - v1 = variables.VariableV1(1, name="v1") - ops.add_to_collection("v", v1) + with ops.Graph().as_default(): + with self.session() as sess: + # Initialize variable `v1` to 1. + v1 = variables.VariableV1(1, name="v1") + ops.add_to_collection("v", v1) - # Initialize another variable `v2` to 42. - v2 = variables.VariableV1(42, name="v2", trainable=False, collections=[]) - ops.add_to_collection("v", v2) + # Initialize another variable `v2` to 42. + v2 = variables.VariableV1( + 42, name="v2", trainable=False, collections=[]) + ops.add_to_collection("v", v2) - # Set up an assignment op to be run as part of the init op. - assign_v2 = state_ops.assign(v2, v1) - init_op = control_flow_ops.group(assign_v2, name="init_op") + # Set up an assignment op to be run as part of the init op. + assign_v2 = state_ops.assign(v2, v1) + init_op = control_flow_ops.group(assign_v2, name="init_op") - self.evaluate(variables.global_variables_initializer()) + self.evaluate(variables.global_variables_initializer()) - ops.add_to_collection(key, control_flow_ops.no_op()) - # ValueError should be raised since the LEGACY_INIT_OP_KEY collection - # is not empty and we don't support multiple init ops. - with self.assertRaisesRegex(ValueError, "Graph already contains"): - builder.add_meta_graph_and_variables( - sess, ["foo"], legacy_init_op=init_op) - # We shouldn't be able to add as MAIN_OP, either. - with self.assertRaisesRegex(ValueError, "Graph already contains"): - builder.add_meta_graph_and_variables(sess, ["foo"], main_op=init_op) + ops.add_to_collection(key, control_flow_ops.no_op()) + # ValueError should be raised since the LEGACY_INIT_OP_KEY collection + # is not empty and we don't support multiple init ops. + with self.assertRaisesRegex(ValueError, "Graph already contains"): + builder.add_meta_graph_and_variables( + sess, ["foo"], legacy_init_op=init_op) + # We shouldn't be able to add as MAIN_OP, either. + with self.assertRaisesRegex(ValueError, "Graph already contains"): + builder.add_meta_graph_and_variables(sess, ["foo"], main_op=init_op) def testStripDefaultAttrs(self): export_dir = self._get_export_dir("test_strip_default_attrs") @@ -1503,40 +1541,38 @@ class SavedModelV1Test(SavedModelTestBase): self.assertIn("T", node_def.attr) self.assertIn("Tout", node_def.attr) - @test_util.run_v1_only("b/120545219") def testLegacyInitOp(self): export_dir = self._get_export_dir("test_legacy_init_op") builder = saved_model_builder.SavedModelBuilder(export_dir) - with self.session(graph=ops.Graph()) as sess: - # Add `v1` and `v2` variables to the graph. - v1 = variables.VariableV1(1, name="v1") - ops.add_to_collection("v", v1) - v2 = variables.VariableV1(2, name="v2") - ops.add_to_collection("v", v2) + with ops.Graph().as_default(): + with self.session(graph=ops.Graph()) as sess: + # Add `v1` and `v2` variables to the graph. + v1 = variables.VariableV1(1, name="v1") + v2 = variables.VariableV1(2, name="v2") - # Initialize another variable `v3` to 42. - v3 = variables.VariableV1(42, name="v3", trainable=False, collections=[]) - ops.add_to_collection("v", v3) + # Initialize another variable `v3` to 42. + v3 = variables.VariableV1(42, name="v3", trainable=False) - # Set up an assignment op to be run as part of the init_op. - assign_v3 = state_ops.assign(v3, math_ops.add(v1, v2)) - legacy_init_op = control_flow_ops.group(assign_v3, name="legacy_init_op") + # Set up an assignment op to be run as part of the init_op. + assign_v3 = state_ops.assign(v3, math_ops.add(v1, v2)) + legacy_init_op = control_flow_ops.group( + assign_v3, name="legacy_init_op") - self.evaluate(variables.global_variables_initializer()) - builder.add_meta_graph_and_variables( - sess, ["foo"], legacy_init_op=legacy_init_op) + self.evaluate(variables.global_variables_initializer()) + builder.add_meta_graph_and_variables( + sess, ["foo"], legacy_init_op=legacy_init_op) - # Save the SavedModel to disk. - builder.save() + # Save the SavedModel to disk. + builder.save() - with self.session(graph=ops.Graph()) as sess: - loader.load(sess, ["foo"], export_dir) - self.assertEqual(1, ops.get_collection("v")[0].eval()) - self.assertEqual(2, ops.get_collection("v")[1].eval()) - # Evaluates to the sum of the first two variables and assigned as part of - # the legacy_init_op, following a restore. - self.assertEqual(3, ops.get_collection("v")[2].eval()) + with self.session(graph=ops.Graph()) as sess: + loader.load(sess, ["foo"], export_dir) + self.assertEqual(1, self._eval("v1")) + self.assertEqual(2, self._eval("v2")) + # Evaluates to the sum of the first two variables and assigned as part + # of the legacy_init_op, following a restore. + self.assertEqual(3, self._eval("v3")) if __name__ == "__main__": From c3bddbddaa19aa0d82d57b4e6e9ad99b4b3d9876 Mon Sep 17 00:00:00 2001 From: Jared Duke Date: Fri, 31 Jul 2020 15:13:58 -0700 Subject: [PATCH 1904/2522] Reland "Fix issues with 32-bit ARM builds" PiperOrigin-RevId: 324293928 Change-Id: Ifc28dc29cd33af42f7d80c9f87f27692cc505854 --- tensorflow/BUILD | 30 +++++++++++++++++++++++++++++ tensorflow/core/kernels/BUILD | 14 ++++++-------- tensorflow/core/platform/platform.h | 14 +++++++++----- tensorflow/tensorflow.bzl | 2 +- 4 files changed, 46 insertions(+), 14 deletions(-) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index c4963a8f106..d1c1d7dcdef 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -260,6 +260,36 @@ config_setting( visibility = ["//visibility:public"], ) +config_setting( + name = "armeabi", + values = {"cpu": "armeabi"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "armeabi-v7a", + values = {"cpu": "armeabi-v7a"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "arm64-v8a", + values = {"cpu": "arm64-v8a"}, + visibility = ["//visibility:public"], +) + +selects.config_setting_group( + name = "arm_any", + match_any = [ + ":arm", + ":armeabi", + ":armeabi-v7a", + ":arm64-v8a", + ":linux_aarch64", + ":linux_armhf", + ], +) + config_setting( name = "freebsd", values = {"cpu": "freebsd"}, diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 0f507273258..a9884eea52b 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -810,10 +810,9 @@ cc_library( srcs = ["eigen_contraction_kernel.cc"], hdrs = ["eigen_contraction_kernel.h"], defines = select({ - "//tensorflow:android": [], - "//tensorflow:arm": [], + "//tensorflow:android_x86": [], + "//tensorflow:arm_any": [], "//tensorflow:ios": [], - "//tensorflow:linux_aarch64": [], "//tensorflow:linux_ppc64le": [], "//conditions:default": [ "TENSORFLOW_USE_CUSTOM_CONTRACTION_KERNEL", @@ -825,10 +824,9 @@ cc_library( "//third_party/eigen3", "//tensorflow/core/platform:dynamic_annotations", ] + select({ - "//tensorflow:android": [], - "//tensorflow:arm": [], + "//tensorflow:android_x86": [], + "//tensorflow:arm_any": [], "//tensorflow:ios": [], - "//tensorflow:linux_aarch64": [], "//tensorflow:linux_ppc64le": [], "//conditions:default": ["@mkl_dnn//:mkldnn_single_threaded"], }), @@ -3179,8 +3177,8 @@ tf_cc_test( name = "eigen_mkldnn_contraction_kernel_test", size = "small", srcs = select({ - "//tensorflow:android": [], - "//tensorflow:arm": [], + "//tensorflow:android_x86": [], + "//tensorflow:arm_any": [], "//tensorflow:ios": [], "//tensorflow:linux_ppc64le": [], ":no_mkldnn_contraction_kernel": [], diff --git a/tensorflow/core/platform/platform.h b/tensorflow/core/platform/platform.h index a840d7b06e3..3375a6e50eb 100644 --- a/tensorflow/core/platform/platform.h +++ b/tensorflow/core/platform/platform.h @@ -41,18 +41,22 @@ limitations under the License. #elif defined(_WIN32) #define PLATFORM_WINDOWS -#elif defined(__arm__) -#define PLATFORM_POSIX - #elif defined(__EMSCRIPTEN__) #define PLATFORM_PORTABLE_GOOGLE #define PLATFORM_POSIX +// EMSCRIPTEN builds are considered "mobile" for the sake of portability. +#define IS_MOBILE_PLATFORM + +#elif defined(__arm__) || defined(__aarch64__) +// If no platform specified, use: +#define PLATFORM_POSIX // Require an outside macro to tell us if we're building for Raspberry Pi or // another ARM device that's not a mobile platform. -#if !defined(RASPBERRY_PI) && !defined(ARM_NON_MOBILE) +#if !defined(RASPBERRY_PI) && !defined(ARM_NON_MOBILE) && \ + !defined(PLATFORM_GOOGLE) #define IS_MOBILE_PLATFORM -#endif // !defined(RASPBERRY_PI) && !defined(ARM_NON_MOBILE) +#endif #else // If no platform specified, use: diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 3edf2a417ba..86369a6372e 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -220,7 +220,7 @@ def if_not_mobile(a): # Config setting selector used when building for products # which requires restricted licenses to be avoided. -def if_not_lgpl_restricted(a): +def if_not_mobile_or_arm_or_lgpl_restricted(a): _ = (a,) return select({ "//conditions:default": [], From 00426cb40333774e31e0ed67a6dcc141dba6ce1f Mon Sep 17 00:00:00 2001 From: Peng Wang Date: Fri, 31 Jul 2020 15:18:40 -0700 Subject: [PATCH 1905/2522] [TF-numpy] Adds a validator for numpy doc links, and fixes broken links. PiperOrigin-RevId: 324294777 Change-Id: If8fbf6a73dc05c09f0aa3240817f4b554d0cfc9e --- tensorflow/python/ops/numpy_ops/__init__.py | 6 +- .../python/ops/numpy_ops/np_array_ops.py | 5 +- .../python/ops/numpy_ops/np_math_ops.py | 6 +- tensorflow/python/ops/numpy_ops/np_utils.py | 104 ++++++++++++++---- 4 files changed, 91 insertions(+), 30 deletions(-) diff --git a/tensorflow/python/ops/numpy_ops/__init__.py b/tensorflow/python/ops/numpy_ops/__init__.py index d2b1264b752..5cc5cf5ac85 100644 --- a/tensorflow/python/ops/numpy_ops/__init__.py +++ b/tensorflow/python/ops/numpy_ops/__init__.py @@ -181,17 +181,17 @@ from tensorflow.python.ops.numpy_ops.np_utils import result_type # pylint: disable=redefined-builtin,undefined-variable -@np_utils.np_doc("max") +@np_utils.np_doc("max", link=np_utils.AliasOf("maximum")) def max(a, axis=None, keepdims=None): return amax(a, axis=axis, keepdims=keepdims) -@np_utils.np_doc("min") +@np_utils.np_doc("min", link=np_utils.AliasOf("minimum")) def min(a, axis=None, keepdims=None): return amin(a, axis=axis, keepdims=keepdims) -@np_utils.np_doc("round") +@np_utils.np_doc("round", link=np_utils.AliasOf("around")) def round(a, decimals=0): return around(a, decimals=decimals) # pylint: enable=redefined-builtin,undefined-variable diff --git a/tensorflow/python/ops/numpy_ops/np_array_ops.py b/tensorflow/python/ops/numpy_ops/np_array_ops.py index d5b537e5a9f..5f82bca0061 100644 --- a/tensorflow/python/ops/numpy_ops/np_array_ops.py +++ b/tensorflow/python/ops/numpy_ops/np_array_ops.py @@ -958,13 +958,14 @@ def select(condlist, choicelist, default=0): # pylint: disable=missing-docstrin return output -@np_utils.np_doc('shape') +@np_utils.np_doc('shape', link=np_utils.Link( + 'https://numpy.org/doc/1.18/reference/generated/numpy.shape.html')) def shape(a): a = asarray(a) return a.shape -@np_utils.np_doc('ndim') +@np_utils.np_doc('ndim', link=np_utils.NoLink()) def ndim(a): a = asarray(a) return a.ndim diff --git a/tensorflow/python/ops/numpy_ops/np_math_ops.py b/tensorflow/python/ops/numpy_ops/np_math_ops.py index 690de58ea38..c1505e6fb65 100644 --- a/tensorflow/python/ops/numpy_ops/np_math_ops.py +++ b/tensorflow/python/ops/numpy_ops/np_math_ops.py @@ -565,7 +565,7 @@ def bitwise_xor(x1, x2): return _bitwise_binary_op(bitwise_ops.bitwise_xor, x1, x2) -@np_utils.np_doc('bitwise_not') +@np_utils.np_doc('bitwise_not', link=np_utils.AliasOf('invert')) def bitwise_not(x): def f(x): @@ -612,7 +612,7 @@ def sqrt(x): return _scalar(math_ops.sqrt, x, True) -@np_utils.np_doc('abs') +@np_utils.np_doc('abs', link=np_utils.AliasOf('absolute')) def abs(x): # pylint: disable=redefined-builtin return _scalar(math_ops.abs, x) @@ -769,7 +769,7 @@ def cbrt(x): return _scalar(f, x, True) -@np_utils.np_doc('conjugate') +@np_utils.np_doc('conjugate', link=np_utils.AliasOf('conj')) def conjugate(x): return _scalar(math_ops.conj, x) diff --git a/tensorflow/python/ops/numpy_ops/np_utils.py b/tensorflow/python/ops/numpy_ops/np_utils.py index 2abf7f9a795..ca09624de76 100644 --- a/tensorflow/python/ops/numpy_ops/np_utils.py +++ b/tensorflow/python/ops/numpy_ops/np_utils.py @@ -207,7 +207,8 @@ def _prepare_np_fun_name_and_fun(np_fun_name, np_fun): return np_fun_name, np_fun -def _np_doc_helper(f, np_f, np_fun_name=None, unsupported_params=None): +def _np_doc_helper(f, np_f, np_fun_name=None, unsupported_params=None, + link=None): """Helper to get docs.""" assert np_f or np_fun_name if not np_fun_name: @@ -221,7 +222,7 @@ def _np_doc_helper(f, np_f, np_fun_name=None, unsupported_params=None): doc = _add_blank_line(doc) # TODO(wangpeng): Re-enable the following and choose inlined vs. link to numpy # doc according to some global switch. - doc = _add_np_doc(doc, np_fun_name, np_f) + doc = _add_np_doc(doc, np_fun_name, np_f, link=link) return doc @@ -257,7 +258,61 @@ def set_np_doc_form(value): _np_doc_form = value -def _add_np_doc(doc, np_fun_name, np_f): +class Link: + + def __init__(self, v): + self.value = v + + +class AliasOf: + + def __init__(self, v): + self.value = v + + +class NoLink: + pass + + +def generate_link(flag, np_fun_name): + """Generates link from numpy function name. + + Args: + flag: the flag to control link form. See `set_np_doc_form`. + np_fun_name: the numpy function name. + + Returns: + A string. + """ + # Only adds link in this case + if flag == 'dev': + template = 'https://numpy.org/devdocs/reference/generated/numpy.%s.html' + elif flag == 'stable': + template = ( + 'https://numpy.org/doc/stable/reference/generated/numpy.%s.html') + elif re.match(r'\d+(\.\d+(\.\d+)?)?$', flag): + # `flag` is the version number + template = ('https://numpy.org/doc/' + flag + + '/reference/generated/numpy.%s.html') + else: + return None + return template % np_fun_name + + +_is_check_link = (os.getenv('TF_NP_CHECK_LINK', 'False') in + ('True', 'true', '1')) + + +def is_check_link(): + return _is_check_link + + +def set_check_link(value): + global _is_check_link + _is_check_link = value + + +def _add_np_doc(doc, np_fun_name, np_f, link): """Appends the numpy docstring to `doc`, according to `set_np_doc_form`. See `set_np_doc_form` for how it controls the form of the numpy docstring. @@ -266,6 +321,7 @@ def _add_np_doc(doc, np_fun_name, np_f): doc: the docstring to be appended to. np_fun_name: the name of the numpy function. np_f: (optional) the numpy function. + link: (optional) which link to use. See `np_doc` for details. Returns: `doc` with numpy docstring appended. @@ -279,22 +335,23 @@ def _add_np_doc(doc, np_fun_name, np_f): # comment. doc += np_f.__doc__.replace('>>>', '>') elif isinstance(flag, str): - # Only adds link in this case - if flag == 'dev': - template = 'https://numpy.org/devdocs/reference/generated/numpy.%s.html' - elif flag == 'stable': - template = ( - 'https://numpy.org/doc/stable/reference/generated/numpy.%s.html') - elif re.match(r'\d+(\.\d+(\.\d+)?)?$', flag): - # `flag` is the version number - template = ('https://numpy.org/doc/' + flag + - '/reference/generated/numpy.%s.html') + if link is None: + url = generate_link(flag, np_fun_name) + elif isinstance(link, AliasOf): + url = generate_link(flag, link.value) + elif isinstance(link, Link): + url = link.value else: - template = None - if template is not None: - link = template % np_fun_name + url = None + if url is not None: + if is_check_link(): + # Imports locally because some builds may not have `requests` + import requests # pylint: disable=g-import-not-at-top + r = requests.head(url) + if r.status_code != 200: + raise ValueError("Can't open link for %s: %s" % (np_fun_name, url)) doc += 'See the NumPy documentation for [`numpy.%s`](%s).' % ( - np_fun_name, link) + np_fun_name, url) return doc @@ -311,7 +368,7 @@ def set_is_sig_mismatch_an_error(value): _is_sig_mismatch_an_error = value -def np_doc(np_fun_name, np_fun=None, export=True): +def np_doc(np_fun_name, np_fun=None, export=True, link=None): """Attachs numpy docstring to a function. Args: @@ -322,6 +379,11 @@ def np_doc(np_fun_name, np_fun=None, export=True): `tf.experimental.numpy`. Note that if `export` is `True`, `np_fun` must be a function directly under the `numpy` module, not under any submodule of `numpy` (e.g. `numpy.random`). + link: (optional) which link to use. If `None`, a default link generated from + `np_fun_name` will be used. If an instance of `AliasOf`, `link.value` will + be used in place of `np_fun_name` for the link generation. If an instance + of `Link`, `link.value` will be used as the whole link. If an instance of + `NoLink`, no link will be added. Returns: A function decorator that attaches the docstring from `np_fun` to the @@ -363,10 +425,8 @@ def np_doc(np_fun_name, np_fun=None, export=True): if name not in sig.parameters: unsupported_params.append(name) f.__doc__ = _np_doc_helper( - f, - np_fun, - np_fun_name=np_fun_name, - unsupported_params=unsupported_params) + f, np_fun, np_fun_name=np_fun_name, + unsupported_params=unsupported_params, link=link) if export: return np_export.np_export(np_fun_name)(f) else: From 111caf888de00cfed7b092d07a7a18034934f7a6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 31 Jul 2020 15:34:16 -0700 Subject: [PATCH 1906/2522] Supports return_attention_scores option in tf.keras.layers.Attention. PiperOrigin-RevId: 324297401 Change-Id: I4185c206a44237e3289a8cb18d6c2086761165b0 --- .../python/keras/layers/dense_attention.py | 31 ++++- .../keras/layers/dense_attention_test.py | 126 +++++++++++++----- 2 files changed, 114 insertions(+), 43 deletions(-) diff --git a/tensorflow/python/keras/layers/dense_attention.py b/tensorflow/python/keras/layers/dense_attention.py index d3f204d661b..cd277a1a6a9 100644 --- a/tensorflow/python/keras/layers/dense_attention.py +++ b/tensorflow/python/keras/layers/dense_attention.py @@ -49,6 +49,8 @@ class BaseDenseAttention(Layer): flow of information from the future towards the past. dropout: Float between 0 and 1. Fraction of the units to drop for the attention scores. + return_attention_scores: bool, it `True`, returns the attention scores + (after masking and softmax) as an additional output argument. Call Arguments: @@ -68,15 +70,19 @@ class BaseDenseAttention(Layer): training: Python boolean indicating whether the layer should behave in training mode (adding dropout) or in inference mode (no dropout). - Output shape: + Output: Attention outputs of shape `[batch_size, Tq, dim]`. + [Optional] Attention scores after masking and softmax with shape + `[batch_size, Tq, Tv]`. """ - def __init__(self, causal=False, dropout=0.0, **kwargs): + def __init__(self, causal=False, dropout=0.0, return_attention_scores=False, + **kwargs): super(BaseDenseAttention, self).__init__(**kwargs) self.causal = causal self.dropout = dropout + self.return_attention_scores = return_attention_scores self.supports_masking = True def _calculate_scores(self, query, key): @@ -115,6 +121,8 @@ class BaseDenseAttention(Layer): Returns: Tensor of shape `[batch_size, Tq, dim]`. + Attention scores after masking and softmax with shape + `[batch_size, Tq, Tv]`. """ if scores_mask is not None: padding_mask = math_ops.logical_not(scores_mask) @@ -129,7 +137,7 @@ class BaseDenseAttention(Layer): weights = control_flow_util.smart_cond(training, dropped_weights, lambda: array_ops.identity(weights)) - return math_ops.matmul(weights, value) + return math_ops.matmul(weights, value), weights # TODO(b/125916026): Consider exposing a __call__ method with named args. def call(self, inputs, mask=None, training=None): @@ -156,12 +164,14 @@ class BaseDenseAttention(Layer): else: causal_mask = None scores_mask = _merge_masks(v_mask, causal_mask) - result = self._apply_scores( + result, attention_scores = self._apply_scores( scores=scores, value=v, scores_mask=scores_mask, training=training) if q_mask is not None: # Mask of shape [batch_size, Tq, 1]. q_mask = array_ops.expand_dims(q_mask, axis=-1) result *= math_ops.cast(q_mask, dtype=result.dtype) + if self.return_attention_scores: + return result, attention_scores return result def compute_mask(self, inputs, mask=None): @@ -199,6 +209,7 @@ class BaseDenseAttention(Layer): config = { 'causal': self.causal, 'dropout': self.dropout, + 'return_attention_scores': self.return_attention_scores, } base_config = super(BaseDenseAttention, self).get_config() return dict(list(base_config.items()) + list(config.items())) @@ -228,6 +239,8 @@ class Attention(BaseDenseAttention): flow of information from the future towards the past. dropout: Float between 0 and 1. Fraction of the units to drop for the attention scores. + return_attention_scores: bool, it `True`, returns the attention scores + (after masking and softmax) as an additional output argument. Call Arguments: @@ -247,9 +260,11 @@ class Attention(BaseDenseAttention): training: Python boolean indicating whether the layer should behave in training mode (adding dropout) or in inference mode (no dropout). - Output shape: + Output: Attention outputs of shape `[batch_size, Tq, dim]`. + [Optional] Attention scores after masking and softmax with shape + `[batch_size, Tq, Tv]`. The meaning of `query`, `value` and `key` depend on the application. In the case of text similarity, for example, `query` is the sequence embeddings of @@ -363,6 +378,8 @@ class AdditiveAttention(BaseDenseAttention): flow of information from the future towards the past. dropout: Float between 0 and 1. Fraction of the units to drop for the attention scores. + return_attention_scores: bool, it `True`, returns the attention scores + (after masking and softmax) as an additional output argument. Call Arguments: @@ -382,9 +399,11 @@ class AdditiveAttention(BaseDenseAttention): training: Python boolean indicating whether the layer should behave in training mode (adding dropout) or in inference mode (no dropout). - Output shape: + Output: Attention outputs of shape `[batch_size, Tq, dim]`. + [Optional] Attention scores after masking and softmax with shape + `[batch_size, Tq, Tv]`. The meaning of `query`, `value` and `key` depend on the application. In the case of text similarity, for example, `query` is the sequence embeddings of diff --git a/tensorflow/python/keras/layers/dense_attention_test.py b/tensorflow/python/keras/layers/dense_attention_test.py index 85780900593..942304e4316 100644 --- a/tensorflow/python/keras/layers/dense_attention_test.py +++ b/tensorflow/python/keras/layers/dense_attention_test.py @@ -40,11 +40,14 @@ class BaseDenseAttentionTest(test.TestCase, parameterized.TestCase): v = np.array([[[1.6]]], dtype=np.float32) # Scores mask tensor of shape [1, 1, 1] scores_mask = np.array([[[True]]], dtype=np.bool_) - actual = dense_attention.BaseDenseAttention()._apply_scores( + actual, actual_scores = dense_attention.BaseDenseAttention()._apply_scores( scores=scores, value=v, scores_mask=scores_mask) + # Expected softmax_scores = [[[1]]] + expected_scores = np.array([[[1.]]], dtype=np.float32) + self.assertAllClose(expected_scores, actual_scores) # Expected tensor of shape [1, 1, 1]. - # expected000 = softmax(scores)[0, 0] * 1.6 = 1.6 + # expected000 = softmax_scores[0, 0] * 1.6 = 1.6 expected = np.array([[[1.6]]], dtype=np.float32) self.assertAllClose(expected, actual) @@ -53,11 +56,14 @@ class BaseDenseAttentionTest(test.TestCase, parameterized.TestCase): scores = np.array([[[1.1]]], dtype=np.float32) # Value tensor of shape [1, 1, 1] v = np.array([[[1.6]]], dtype=np.float32) - actual = dense_attention.BaseDenseAttention()._apply_scores( + actual, actual_scores = dense_attention.BaseDenseAttention()._apply_scores( scores=scores, value=v) + # Expected softmax_scores = [[[1]]] + expected_scores = np.array([[[1.]]], dtype=np.float32) + self.assertAllClose(expected_scores, actual_scores) # Expected tensor of shape [1, 1, 1]. - # expected000 = softmax(scores)[0, 0] * 1.6 = 1.6 + # expected000 = softmax_scores[0, 0] * 1.6 = 1.6 expected = np.array([[[1.6]]], dtype=np.float32) self.assertAllClose(expected, actual) @@ -68,15 +74,17 @@ class BaseDenseAttentionTest(test.TestCase, parameterized.TestCase): v = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) # Scores mask tensor of shape [1, 1, 3] scores_mask = np.array([[[True, True, False]]], dtype=np.bool_) - actual = dense_attention.BaseDenseAttention()._apply_scores( + actual, actual_scores = dense_attention.BaseDenseAttention()._apply_scores( scores=scores, value=v, scores_mask=scores_mask) - # Expected attention distribution = softmax(scores) with zeros in - # positions where v_mask == False. - # => attention_distribution000 = exp(1)/(exp(1) + exp(0)) = 0.73105857863 - # attention_distribution001 = exp(0)/(exp(1) + exp(0)) = 0.26894142137 - # attention_distribution002 = 0 - # + # Expected softmax scores = softmax(scores) with zeros in positions where + # v_mask == False. + # => softmax_scores000 = exp(1)/(exp(1) + exp(0)) = 0.73105857863 + # softmax_scores001 = exp(0)/(exp(1) + exp(0)) = 0.26894142137 + # softmax_scores002 = 0 + expected_scores = np.array( + [[[0.73105857863, 0.26894142137, 0.]]], dtype=np.float32) + self.assertAllClose(expected_scores, actual_scores) # Expected tensor of shape [1, 1, 1]. # expected000 = 0.73105857863 * 1.6 + 0.26894142137 * 0.7 - 0 * 0.8 # = 1.35795272077 @@ -88,17 +96,19 @@ class BaseDenseAttentionTest(test.TestCase, parameterized.TestCase): scores = np.array([[[1., 0., 1.]]], dtype=np.float32) # Value tensor of shape [1, 3, 1] v = np.array([[[1.6], [0.7], [-0.8]]], dtype=np.float32) - actual = dense_attention.BaseDenseAttention()._apply_scores( + actual, actual_scores = dense_attention.BaseDenseAttention()._apply_scores( scores=scores, value=v) - # Expected attention distribution = softmax(scores). - # => attention_distribution000 = exp(1)/(exp(1) + exp(0) + exp(1)) - # = 0.42231879825 - # attention_distribution001 = exp(0)/(exp(1) + exp(0) + exp(1)) - # = 0.15536240349 - # attention_distribution002 = exp(1)/(exp(1) + exp(0) + exp(1)) - # = 0.42231879825 - # + # Expected softmax_scores = softmax(scores). + # => softmax_scores000 = exp(1)/(exp(1) + exp(0) + exp(1)) + # = 0.42231879825 + # softmax_scores001 = exp(0)/(exp(1) + exp(0) + exp(1)) + # = 0.15536240349 + # softmax_scores002 = exp(1)/(exp(1) + exp(0) + exp(1)) + # = 0.42231879825 + expected_scores = np.array( + [[[0.42231879825, 0.15536240349, 0.42231879825]]], dtype=np.float32) + self.assertAllClose(expected_scores, actual_scores) # Expected tensor of shape [1, 1, 1]. # expected000 = 0.42231879825 * 1.6 + 0.15536240349 * 0.7 # - 0.42231879825 * 0.8 @@ -113,12 +123,15 @@ class BaseDenseAttentionTest(test.TestCase, parameterized.TestCase): v = np.array([[[1.6]], [[2.6]]], dtype=np.float32) # Scpres mask tensor of shape [2, 1, 1] scores_mask = np.array([[[True]], [[True]]], dtype=np.bool_) - actual = dense_attention.BaseDenseAttention()._apply_scores( + actual, actual_scores = dense_attention.BaseDenseAttention()._apply_scores( scores=scores, value=v, scores_mask=scores_mask) + # Expected softmax_scores = [[[1]], [[1]]] + expected_scores = np.array([[[1.]], [[1.]]], dtype=np.float32) + self.assertAllClose(expected_scores, actual_scores) # Expected tensor of shape [2, 1, 1]. - # expected000 = softmax(scores)[0, 0] * 1.6 = 1.6 - # expected100 = softmax(scores)[1, 0] * 2.6 = 2.6 + # expected000 = softmax_scores[0, 0] * 1.6 = 1.6 + # expected100 = softmax_scores[1, 0] * 2.6 = 2.6 expected = np.array([[[1.6]], [[2.6]]], dtype=np.float32) self.assertAllClose(expected, actual) @@ -131,9 +144,13 @@ class BaseDenseAttentionTest(test.TestCase, parameterized.TestCase): dim = 7 scores = np.ones((batch_size, tq, tv)) value = np.ones((batch_size, tv, dim)) - actual = dense_attention.BaseDenseAttention(dropout=0.1)._apply_scores( - scores=scores, value=value, training=False) + actual, actual_scores = dense_attention.BaseDenseAttention( + dropout=0.1)._apply_scores( + scores=scores, value=value, training=False) + # Expected Tensor of shape `[batch_size, tq, tv]`. + expected_scores_shape = [batch_size, tq, tv] + self.assertAllEqual(expected_scores_shape, array_ops.shape(actual_scores)) # Expected Tensor of shape `[batch_size, tq, dim]`. expected_shape = [batch_size, tq, dim] self.assertAllEqual(expected_shape, array_ops.shape(actual)) @@ -312,7 +329,11 @@ class AttentionTest(test.TestCase, parameterized.TestCase): expected = np.array([[[0.58127362329]]], dtype=np.float32) self.assertAllClose(expected, actual) - def test_multi_dim_with_query_mask(self): + @parameterized.named_parameters( + ('', False), + ('return_attention_scores', True), + ) + def test_multi_dim_with_query_mask(self, return_attention_scores): # Query tensor of shape [1, 2, 1] q = np.array([[[1.1], [-0.5]]], dtype=np.float32) # Value tensor of shape [1, 3, 1] @@ -321,8 +342,12 @@ class AttentionTest(test.TestCase, parameterized.TestCase): q_mask = np.array([[True, False]], dtype=np.bool_) # Value mask tensor of shape [1, 3] v_mask = np.array([[True, True, False]], dtype=np.bool_) - attention_layer = dense_attention.Attention() - actual = attention_layer([q, v], mask=[q_mask, v_mask]) + attention_layer = dense_attention.Attention( + return_attention_scores=return_attention_scores) + if return_attention_scores: + actual, actual_scores = attention_layer([q, v], mask=[q_mask, v_mask]) + else: + actual = attention_layer([q, v], mask=[q_mask, v_mask]) # Expected scores of shape [1, 2, 3] # scores = [[[1.1*1.6, 1.1*0.7, -1.1*0.8], [-0.5*1.6, -0.5*0.7, 0.5*0.8]]] @@ -339,7 +364,12 @@ class AttentionTest(test.TestCase, parameterized.TestCase): # attention_distribution011 = exp(-0.35)/(exp(-0.8) + exp(-0.35)) # = 0.61063923394 # attention_distribution012 = 0 - # + if return_attention_scores: + expected_scores = np.array( + [[[0.72908792234, 0.27091207765, 0.], + [0.38936076605, 0.61063923394, 0.]]], + dtype=np.float32) + self.assertAllClose(expected_scores, actual_scores) # Expected tensor of shape [1, 2, 1] with zeros where q_mask == False. # expected000 = 0.72908792234 * 1.6 + 0.27091207765 * 0.7 - 0 * 0.8 # = 1.3561791301 @@ -368,11 +398,19 @@ class AttentionTest(test.TestCase, parameterized.TestCase): sess.run(attention_layer.scale.initializer) self.assertAllClose(1., attention_layer.scale.value()) - def test_self_attention_causal(self): + @parameterized.named_parameters( + ('', False), + ('return_attention_scores', True), + ) + def test_self_attention_causal(self, return_attention_scores): # Query-value tensor of shape [1, 3, 1] q = np.array([[[0.5], [0.8], [-0.3]]], dtype=np.float32) - attention_layer = dense_attention.Attention(causal=True) - actual = attention_layer([q, q]) + attention_layer = dense_attention.Attention( + causal=True, return_attention_scores=return_attention_scores) + if return_attention_scores: + actual, actual_scores = attention_layer([q, q]) + else: + actual = attention_layer([q, q]) # Expected scores of shape [1, 3, 3] # scores = [[0.25, 0.4, -0.15], [0.4, 0.64, -0.24], [-0.15, -0.24, 0.09]] @@ -385,7 +423,13 @@ class AttentionTest(test.TestCase, parameterized.TestCase): # = [exp(-0.15), exp(-0.24), exp(0.09)] # / (exp(-0.15) + exp(-0.24) + exp(0.09)) # = [0.31395396638, 0.28693232061, 0.399113713] - # + if return_attention_scores: + expected_scores = np.array( + [[[1., 0., 0.], + [0.44028635073, 0.55971364926, 0.], + [0.31395396638, 0.28693232061, 0.399113713]]], + dtype=np.float32) + self.assertAllClose(expected_scores, actual_scores) # Expected tensor of shape [1, 3, 1]. # expected000 = 0.5 # expected010 = 0.44028635073 * 0.5 + 0.55971364926 * 0.8 @@ -455,17 +499,25 @@ class AttentionTest(test.TestCase, parameterized.TestCase): actual = attention_layer([q, v]) self.assertAllClose([[[0], [1]]], actual) - def test_serialization(self): + @parameterized.named_parameters( + ('', False, False), + ('use_scale', True, False), + ('return_attention_scores', False, True), + ) + def test_serialization(self, use_scale, return_attention_scores): # Test serialization with use_scale - layer = dense_attention.Attention(use_scale=True) + layer = dense_attention.Attention( + use_scale=use_scale, return_attention_scores=return_attention_scores) config = keras.layers.serialize(layer) new_layer = keras.layers.deserialize(config) - self.assertEqual(new_layer.use_scale, True) + self.assertEqual(new_layer.use_scale, use_scale) + self.assertEqual(new_layer.return_attention_scores, return_attention_scores) config = layer.get_config() new_layer = dense_attention.Attention.from_config(config) - self.assertEqual(new_layer.use_scale, True) + self.assertEqual(new_layer.use_scale, use_scale) + self.assertEqual(new_layer.return_attention_scores, return_attention_scores) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) From 1749ef5aef4a2171117b5dbcbe477bdaef8e8cac Mon Sep 17 00:00:00 2001 From: Tom Hennigan Date: Fri, 31 Jul 2020 15:47:27 -0700 Subject: [PATCH 1907/2522] Make distributed client RPC timeout configurable. PiperOrigin-RevId: 324299630 Change-Id: I47cdd1d8f769dd014f4a45172c9f0797a17b8c98 --- tensorflow/compiler/xla/pjrt/distributed/client.cc | 5 +++-- tensorflow/compiler/xla/pjrt/distributed/client.h | 7 +++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/pjrt/distributed/client.cc b/tensorflow/compiler/xla/pjrt/distributed/client.cc index 830e512b156..55b02c6a09e 100644 --- a/tensorflow/compiler/xla/pjrt/distributed/client.cc +++ b/tensorflow/compiler/xla/pjrt/distributed/client.cc @@ -23,8 +23,9 @@ limitations under the License. namespace xla { DistributedRuntimeClient::DistributedRuntimeClient( - std::shared_ptr<::grpc::Channel> channel) - : stub_(grpc::DistributedRuntimeService::NewStub(std::move(channel))) {} + std::shared_ptr<::grpc::Channel> channel, absl::Duration rpc_timeout) + : stub_(grpc::DistributedRuntimeService::NewStub(std::move(channel))), + rpc_timeout_(rpc_timeout) {} DistributedRuntimeClient::~DistributedRuntimeClient() = default; xla::Status DistributedRuntimeClient::Connect( diff --git a/tensorflow/compiler/xla/pjrt/distributed/client.h b/tensorflow/compiler/xla/pjrt/distributed/client.h index 865a752849e..049d76af4d6 100644 --- a/tensorflow/compiler/xla/pjrt/distributed/client.h +++ b/tensorflow/compiler/xla/pjrt/distributed/client.h @@ -29,7 +29,10 @@ namespace xla { class DistributedRuntimeClient { public: - explicit DistributedRuntimeClient(std::shared_ptr<::grpc::Channel> channel); + DistributedRuntimeClient(std::shared_ptr<::grpc::Channel> channel, + absl::Duration rpc_timeout); + explicit DistributedRuntimeClient(std::shared_ptr<::grpc::Channel> channel) + : DistributedRuntimeClient(channel, absl::Seconds(120)) {} ~DistributedRuntimeClient(); xla::Status Connect(const LocalTopologyProto& local_topology, @@ -42,7 +45,7 @@ class DistributedRuntimeClient { private: const std::unique_ptr stub_; - const absl::Duration rpc_timeout_ = absl::Seconds(120); + const absl::Duration rpc_timeout_; }; } // namespace xla From 496d3166b689c67a19f0f67773d03d8df5f6614e Mon Sep 17 00:00:00 2001 From: Jiho Choi Date: Fri, 31 Jul 2020 15:47:52 -0700 Subject: [PATCH 1908/2522] Add IteratorName helper. PiperOrigin-RevId: 324299687 Change-Id: Idb44852d52e7b3e1dbd91591597add0073d083a7 --- tensorflow/core/profiler/utils/tf_op_utils.cc | 6 ++++++ tensorflow/core/profiler/utils/tf_op_utils.h | 3 +++ 2 files changed, 9 insertions(+) diff --git a/tensorflow/core/profiler/utils/tf_op_utils.cc b/tensorflow/core/profiler/utils/tf_op_utils.cc index e981b1f7a70..e58ccba445b 100644 --- a/tensorflow/core/profiler/utils/tf_op_utils.cc +++ b/tensorflow/core/profiler/utils/tf_op_utils.cc @@ -114,6 +114,12 @@ std::string DatasetOpEventName(absl::string_view full_name) { return absl::StrCat(kIterator, kSeparator, split_result.back()); } +std::string IteratorName(absl::string_view full_name) { + std::vector split_result = + absl::StrSplit(full_name, kSeparator); + return std::string(split_result.back()); +} + std::vector ParseTensorShapes( absl::string_view tensor_shapes) { absl::ConsumePrefix(&tensor_shapes, "("); diff --git a/tensorflow/core/profiler/utils/tf_op_utils.h b/tensorflow/core/profiler/utils/tf_op_utils.h index 8656ecdb319..f0668190a07 100644 --- a/tensorflow/core/profiler/utils/tf_op_utils.h +++ b/tensorflow/core/profiler/utils/tf_op_utils.h @@ -59,6 +59,9 @@ std::string TfOpEventName(absl::string_view tf_op_fullname); // Trace event name for dataset ops. std::string DatasetOpEventName(absl::string_view full_name); +// Returns the iterator name without prefix and parent iterator names. +std::string IteratorName(absl::string_view full_name); + // Returns true if the given name is a TensorFlow Dataset Op. inline bool IsDatasetOp(absl::string_view tf_op_type) { return tf_op_type == kDatasetOp; From cea9f19ebf1ff74177d91c9d18926af0f3e2ce13 Mon Sep 17 00:00:00 2001 From: Smit Hinsu Date: Fri, 31 Jul 2020 16:01:37 -0700 Subject: [PATCH 1909/2522] Sink standard dialect constants in sink_constants_to_control_flow pass This is required before exporting HLO dialect ops with standard dialect constant to XLA. Also, sink constants for sort op as well. Added a TODO to generalize this pass to handle more ops and non-const values defined outside. PiperOrigin-RevId: 324301911 Change-Id: I2a67a2cc5d1f58dc5fad11a319a2f4ca63a8f434 --- tensorflow/compiler/mlir/hlo/BUILD | 1 + .../sink_constants_to_control_flow.cc | 26 +++++++++++++------ .../tests/sink-constants-to-control-flow.mlir | 14 ++++++++++ 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/BUILD b/tensorflow/compiler/mlir/hlo/BUILD index 49d0cb318db..e003e9ba279 100644 --- a/tensorflow/compiler/mlir/hlo/BUILD +++ b/tensorflow/compiler/mlir/hlo/BUILD @@ -341,6 +341,7 @@ cc_library( "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", "@llvm-project//mlir:Pass", + "@llvm-project//mlir:StandardOps", "@llvm-project//mlir:Support", "@llvm-project//mlir:Transforms", ], diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/sink_constants_to_control_flow.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/sink_constants_to_control_flow.cc index 0f31e613afe..059fdc3edbe 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/sink_constants_to_control_flow.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/sink_constants_to_control_flow.cc @@ -21,6 +21,7 @@ limitations under the License. #include "mlir/Pass/PassManager.h" #include "mlir/Support/LLVM.h" #include "mlir/Transforms/RegionUtils.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project namespace mlir { namespace mhlo { @@ -29,6 +30,13 @@ namespace { // A pass that sinks constants implicitly captured in control flow regions. This // is necessary to export to XLA. +// TODO(hinsu): Generalize this pass to handle all the ops with regions. Any +// value used within the region that is defined outside of op's region should be +// sank to the regions and not just the constants. Ops such as If and While +// whose computations doesn't require fixed signature like Sort or Reduce have +// an option to pass outside values as operands of the op to avoid recomputing +// those within internally. Note that doing so is the only option in case of +// BlockArguments. class SinkConstantsToControlFlowPass : public mlir::PassWrapper { void runOnFunction() override { @@ -39,6 +47,8 @@ class SinkConstantsToControlFlowPass } else if (auto if_op = llvm::dyn_cast(op)) { SinkToRegion(&if_op.true_branch()); SinkToRegion(&if_op.false_branch()); + } else if (auto sort_op = llvm::dyn_cast(op)) { + SinkToRegion(&sort_op.comparator()); } }); } @@ -46,26 +56,26 @@ class SinkConstantsToControlFlowPass private: // Performs constant sinking into a region. static void SinkToRegion(Region* region) { - llvm::DenseMap sunk_constant; + llvm::DenseMap sunk_constant; visitUsedValuesDefinedAbove({*region}, [&](OpOperand* use) { Value constant = use->get(); - auto const_op = dyn_cast_or_null(constant.getDefiningOp()); - if (!const_op) return; + auto op = constant.getDefiningOp(); + if (!op || !isa(op)) return; auto map_entry = sunk_constant.try_emplace(constant, nullptr); if (!map_entry.second) { // This constant has already been cloned into the region, reuse it. - use->set(map_entry.first->getSecond().getResult()); - if (constant.use_empty()) const_op.erase(); + use->set(map_entry.first->getSecond()->getResult(0)); + if (op->use_empty()) op->erase(); return; } if (constant.hasOneUse()) { - const_op.getOperation()->moveBefore(®ion->front().front()); + op->moveBefore(®ion->front().front()); return; } - map_entry.first->getSecond() = const_op.clone(); + map_entry.first->getSecond() = op->clone(); region->front().getOperations().insert(region->front().begin(), map_entry.first->getSecond()); - use->set(map_entry.first->getSecond().getResult()); + use->set(map_entry.first->getSecond()->getResult(0)); }); } }; diff --git a/tensorflow/compiler/mlir/hlo/tests/sink-constants-to-control-flow.mlir b/tensorflow/compiler/mlir/hlo/tests/sink-constants-to-control-flow.mlir index f8b6b629c9e..9e18ad8a2d8 100644 --- a/tensorflow/compiler/mlir/hlo/tests/sink-constants-to-control-flow.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/sink-constants-to-control-flow.mlir @@ -58,3 +58,17 @@ func @sink_const_to_conditional(%arg0: tensor) -> tensor { %9 = "mhlo.get_tuple_element"(%2) {index = 0 : i32} : (tuple>) -> tensor return %9 : tensor } + +func @sink_const_to_sort(%arg0: tensor<16xf32>) { + %c0 = constant dense<1.0> : tensor + // CHECK: "mhlo.sort" + %0 = "mhlo.sort"(%arg0) ( { + ^bb0(%arg1: tensor, %arg2: tensor): + // CHECK: constant dense<1.000000e+00> + %1 = "mhlo.divide"(%arg1, %c0) : (tensor, tensor) -> tensor + %2 = "mhlo.divide"(%arg2, %c0) : (tensor, tensor) -> tensor + %3 = "mhlo.compare"(%1, %2) {comparison_direction = "GT"} : (tensor, tensor) -> tensor + "mhlo.return"(%3) : (tensor) -> () + }) {is_stable = true} : (tensor<16xf32>) -> tensor<16xi32> + return +} From 5594fde93b356efd07f412d6e517c44305692c6d Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Fri, 31 Jul 2020 16:05:20 -0700 Subject: [PATCH 1910/2522] Fixing run_v1_decorator for gather_op_test.py. Also moving it to an array_ops folder. PiperOrigin-RevId: 324302642 Change-Id: I85d54f334537a25a4c7f6d6eaeb17721cce39e25 --- tensorflow/core/kernels/gather_op.cc | 7 +- tensorflow/python/kernel_tests/BUILD | 14 -- .../python/kernel_tests/array_ops/BUILD | 14 ++ .../{ => array_ops}/gather_op_test.py | 224 +++++++++++------- 4 files changed, 156 insertions(+), 103 deletions(-) rename tensorflow/python/kernel_tests/{ => array_ops}/gather_op_test.py (75%) diff --git a/tensorflow/core/kernels/gather_op.cc b/tensorflow/core/kernels/gather_op.cc index 948567e019a..e9e6a93ef70 100644 --- a/tensorflow/core/kernels/gather_op.cc +++ b/tensorflow/core/kernels/gather_op.cc @@ -78,10 +78,11 @@ class GatherOp : public OpKernel { } } + int64 min_params_dim = axis < 0 ? -axis : axis + 1; OP_REQUIRES( - c, axis >= -params.dims() && axis < params.dims(), - errors::InvalidArgument("Expected axis in the range [", -params.dims(), - ", ", params.dims(), "), but got ", axis)); + c, params.dims() >= min_params_dim, + errors::InvalidArgument("Shape must be at least rank ", min_params_dim, + " but is rank ", params.dims())); if (axis < 0) { axis = params.dims() + axis; diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 1e093af1019..8504052c16a 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2035,20 +2035,6 @@ cuda_py_test( ], ) -cuda_py_test( - name = "gather_op_test", - size = "medium", - srcs = ["gather_op_test.py"], - deps = [ - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:gradients", - "//third_party/py/numpy", - "@absl_py//absl/testing:parameterized", - ], -) - cuda_py_test( name = "gradient_correctness_test", size = "small", diff --git a/tensorflow/python/kernel_tests/array_ops/BUILD b/tensorflow/python/kernel_tests/array_ops/BUILD index df48258f396..bc448f3da05 100644 --- a/tensorflow/python/kernel_tests/array_ops/BUILD +++ b/tensorflow/python/kernel_tests/array_ops/BUILD @@ -46,3 +46,17 @@ cuda_py_test( "//third_party/py/numpy", ], ) + +cuda_py_test( + name = "gather_op_test", + size = "medium", + srcs = ["gather_op_test.py"], + deps = [ + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:gradients", + "//third_party/py/numpy", + "@absl_py//absl/testing:parameterized", + ], +) diff --git a/tensorflow/python/kernel_tests/gather_op_test.py b/tensorflow/python/kernel_tests/array_ops/gather_op_test.py similarity index 75% rename from tensorflow/python/kernel_tests/gather_op_test.py rename to tensorflow/python/kernel_tests/array_ops/gather_op_test.py index 0f59d10c720..d553b2912ef 100644 --- a/tensorflow/python/kernel_tests/gather_op_test.py +++ b/tensorflow/python/kernel_tests/array_ops/gather_op_test.py @@ -107,18 +107,20 @@ class GatherTest(test.TestCase, parameterized.TestCase): expected_shape = data.shape[:axis] + (4,) + data.shape[axis + 1:] self.assertEqual(expected_shape, gather_t.get_shape()) - @test_util.run_deprecated_v1 def testHigherRank(self): - # We check that scalar and empty indices shapes work as well - shape = (2, 1, 3, 2) - for indices_shape in (), (0,), (2, 0), (2, 3): - for dtype in _TEST_TYPES: - for axis in range(len(shape)): - params = self._buildParams(np.random.randn(*shape), dtype) - indices = np.random.randint(shape[axis], size=indices_shape) - with self.subTest(indices_shape=indices_shape, dtype=dtype, axis=axis, - indices=indices): - with self.cached_session(use_gpu=True) as sess: + with ops.Graph().as_default(): + # We check that scalar and empty indices shapes work as well + shape = (2, 1, 3, 2) + for indices_shape in (), (0,), (2, 0), (2, 3): + for dtype in _TEST_TYPES: + for axis in range(len(shape)): + params = self._buildParams(np.random.randn(*shape), dtype) + indices = np.random.randint(shape[axis], size=indices_shape) + with self.subTest( + indices_shape=indices_shape, + dtype=dtype, + axis=axis, + indices=indices): tf_params = constant_op.constant(params) tf_indices = constant_op.constant(indices) # Check that both positive and negative indices for axis work. @@ -127,7 +129,7 @@ class GatherTest(test.TestCase, parameterized.TestCase): gather = array_ops.gather(tf_params, tf_indices, axis=tf_axis) gather_negative_axis = array_ops.gather( tf_params, tf_indices, axis=tf_negative_axis) - gather_value, gather_negative_axis_value = sess.run( + gather_value, gather_negative_axis_value = self.evaluate( [gather, gather_negative_axis]) gather_np = np.take(params, indices, axis) self.assertAllEqual(gather_np, gather_value) @@ -144,10 +146,10 @@ class GatherTest(test.TestCase, parameterized.TestCase): gather_grad -= 1j * gather_grad params_grad, indices_grad, axis_grad = gradients_impl.gradients( gather, [tf_params, tf_indices, tf_axis], gather_grad) - self.assertEqual(indices_grad, None) - self.assertEqual(axis_grad, None) + self.assertIsNone(indices_grad) + self.assertIsNone(axis_grad) if dtype.is_integer: - self.assertEqual(params_grad, None) + self.assertIsNone(params_grad) continue # For axis 0, we are able to create an efficient IndexedSlices for # the gradient. @@ -171,47 +173,113 @@ class GatherTest(test.TestCase, parameterized.TestCase): atol=2e-6, rtol=2e-6) - @test_util.run_deprecated_v1 + def testHigherRankGradientTape(self): + # We check that scalar and empty indices shapes work as well + shape = (2, 1, 3, 2) + for indices_shape in (), (0,), (2, 0), (2, 3): + for dtype in _TEST_TYPES: + for axis in range(len(shape)): + params = self._buildParams(np.random.randn(*shape), dtype) + indices = np.random.randint(shape[axis], size=indices_shape) + with self.subTest( + indices_shape=indices_shape, + dtype=dtype, + axis=axis, + indices=indices): + with backprop.GradientTape() as tape: + tf_params = constant_op.constant(params) + tf_indices = constant_op.constant(indices) + # Check that both positive and negative indices for axis work. + tf_axis = constant_op.constant(axis) + tape.watch(tf_params) + tape.watch(tf_indices) + tape.watch(tf_axis) + tf_negative_axis = constant_op.constant(-len(shape) + axis) + gather = array_ops.gather(tf_params, tf_indices, axis=tf_axis) + gather_negative_axis = array_ops.gather( + tf_params, tf_indices, axis=tf_negative_axis) + gather_value, gather_negative_axis_value = self.evaluate( + [gather, gather_negative_axis]) + gather_np = np.take(params, indices, axis) + self.assertAllEqual(gather_np, gather_value) + self.assertAllEqual(gather_np, gather_negative_axis_value) + expected_shape = ( + params.shape[:axis] + indices.shape + params.shape[axis + 1:]) + self.assertEqual(expected_shape, gather.shape) + self.assertEqual(expected_shape, gather_negative_axis.shape) + + # Test gradients + gather_grad = np.random.randn( + *gather.get_shape().as_list()).astype(dtype.as_numpy_dtype) + if dtype.is_complex: + gather_grad -= 1j * gather_grad + params_grad, indices_grad, axis_grad = tape.gradient( + gather, [tf_params, tf_indices, tf_axis], gather_grad) + self.assertIsNone(indices_grad) + self.assertIsNone(axis_grad) + if dtype.is_integer: + self.assertIsNone(params_grad) + continue + # For axis 0, we are able to create an efficient IndexedSlices for + # the gradient. + if axis == 0: + self.assertEqual(type(params_grad), ops.IndexedSlices) + params_grad = ops.convert_to_tensor(params_grad) + correct_params_grad = np.zeros(shape).astype(dtype.as_numpy_dtype) + outer_dims = axis + inner_dims = len(shape) - axis - 1 + gather_grad = gather_grad.reshape(shape[:axis] + (indices.size,) + + shape[axis + 1:]) + for source_index, dest_index in enumerate(indices.flat): + dest_slice = ((slice(None),) * outer_dims + (dest_index,) + + (slice(None),) * inner_dims) + source_slice = ((slice(None),) * outer_dims + (source_index,) + + (slice(None),) * inner_dims) + correct_params_grad[dest_slice] += gather_grad[source_slice] + self.assertAllClose( + correct_params_grad, + self.evaluate(params_grad), + atol=2e-6, + rtol=2e-6) + def testString(self): params = np.array([[b"asdf", b"zxcv"], [b"qwer", b"uiop"]]) - with self.cached_session(): - self.assertAllEqual([b"qwer", b"uiop"], - array_ops.gather(params, 1, axis=0).eval()) - self.assertAllEqual([b"asdf", b"qwer"], - array_ops.gather(params, 0, axis=1).eval()) + self.assertAllEqual([b"qwer", b"uiop"], array_ops.gather(params, 1, axis=0)) + self.assertAllEqual([b"asdf", b"qwer"], array_ops.gather(params, 0, axis=1)) - @test_util.run_deprecated_v1 def testUInt32AndUInt64(self): for unsigned_type in (dtypes.uint32, dtypes.uint64): with self.subTest(unsigned_type=unsigned_type): params = self._buildParams( np.array([[1, 2, 3], [7, 8, 9]]), unsigned_type) with self.cached_session(): - self.assertAllEqual([7, 8, 9], - array_ops.gather(params, 1, axis=0).eval()) - self.assertAllEqual([1, 7], - array_ops.gather(params, 0, axis=1).eval()) + self.assertAllEqual([7, 8, 9], array_ops.gather(params, 1, axis=0)) + self.assertAllEqual([1, 7], array_ops.gather(params, 0, axis=1)) - @test_util.run_deprecated_v1 def testUnknownIndices(self): - params = constant_op.constant([[0, 1, 2]]) - indices = array_ops.placeholder(dtypes.int32) - gather_t = array_ops.gather(params, indices) - self.assertEqual(None, gather_t.get_shape()) + # This test is purely a test for placeholder inputs which is only applicable + # in graph mode. + with ops.Graph().as_default(): + params = constant_op.constant([[0, 1, 2]]) + indices = array_ops.placeholder(dtypes.int32) + gather_t = array_ops.gather(params, indices) + self.assertEqual(None, gather_t.get_shape()) - @test_util.run_deprecated_v1 def testUnknownAxis(self): - params = constant_op.constant([[0, 1, 2]]) - indices = constant_op.constant([[0, 0], [0, 0]]) - axis = array_ops.placeholder(dtypes.int32) - gather_t = array_ops.gather(params, indices, axis=axis) - # Rank 2 params with rank 2 indices results in a rank 3 shape. - self.assertEqual([None, None, None], gather_t.shape.as_list()) + # This test is purely a test for placeholder inputs which is only applicable + # in graph mode. + with ops.Graph().as_default(): + params = constant_op.constant([[0, 1, 2]]) + indices = constant_op.constant([[0, 0], [0, 0]]) + axis = array_ops.placeholder(dtypes.int32) + gather_t = array_ops.gather(params, indices, axis=axis) + # Rank 2 params with rank 2 indices results in a rank 3 shape. + self.assertEqual([None, None, None], gather_t.shape.as_list()) - # If indices is also unknown the result rank is unknown. - indices = array_ops.placeholder(dtypes.int32) - gather_t = array_ops.gather(params, indices, axis=axis) - self.assertEqual(None, gather_t.shape) + # If indices is also unknown the result rank is unknown. + indices = array_ops.placeholder(dtypes.int32) + gather_t = array_ops.gather(params, indices, axis=axis) + self.assertEqual(None, gather_t.shape) def testBadIndicesType(self): with self.assertRaisesRegex( @@ -243,45 +311,36 @@ class GatherTest(test.TestCase, parameterized.TestCase): with self.assertRaisesOpError(r"indices\[0,0\] = 7 is not in \[0, 3\)"): array_ops.gather(params, [[7]], axis=1).eval() - @test_util.run_deprecated_v1 def testBadAxis(self): - with self.session(use_gpu=True): - params = [0, 1, 2] - params_ph = array_ops.placeholder(dtypes.int32) - indices = 0 - for bad_axis in (1, 2, -2): - # Shape inference can validate axis for known params rank. - with self.subTest(bad_axis=bad_axis): - with self.assertRaisesWithPredicateMatch( - ValueError, "Shape must be at least rank . but is rank 1"): - array_ops.gather(params, indices, axis=bad_axis) - # If params rank is unknown, an op error occurs. - with self.assertRaisesOpError( - r"Expected axis in the range \[-1, 1\), but got %s" % bad_axis): - array_ops.gather(params_ph, indices, axis=bad_axis).eval( - feed_dict={params_ph: params}) + params = [0, 1, 2] + indices = 0 + for bad_axis in (1, 2, -2): + # Shape inference can validate axis for known params rank. + with self.subTest(bad_axis=bad_axis): + with self.assertRaisesRegex( + (ValueError, errors.InvalidArgumentError), + "Shape must be at least rank .* but is rank 1"): + array_ops.gather(params, indices, axis=bad_axis) - @test_util.run_deprecated_v1 def testEmptySlices(self): - with self.session(use_gpu=True): - for dtype in _TEST_TYPES: - for itype in np.int32, np.int64: - # Leading axis gather. - with self.subTest(dtype=dtype, itype=itype): - params = np.zeros((7, 0, 0), dtype=dtype.as_numpy_dtype) - indices = np.array([3, 4], dtype=itype) - gather = array_ops.gather(params, indices, axis=0) - self.assertAllEqual(gather, np.zeros((2, 0, 0))) + for dtype in _TEST_TYPES: + for itype in np.int32, np.int64: + # Leading axis gather. + with self.subTest(dtype=dtype, itype=itype): + params = np.zeros((7, 0, 0), dtype=dtype.as_numpy_dtype) + indices = np.array([3, 4], dtype=itype) + gather = array_ops.gather(params, indices, axis=0) + self.assertAllEqual(gather, np.zeros((2, 0, 0))) - # Middle axis gather. - params = np.zeros((0, 7, 0), dtype=dtype.as_numpy_dtype) - gather = array_ops.gather(params, indices, axis=1) - self.assertAllEqual(gather, np.zeros((0, 2, 0))) + # Middle axis gather. + params = np.zeros((0, 7, 0), dtype=dtype.as_numpy_dtype) + gather = array_ops.gather(params, indices, axis=1) + self.assertAllEqual(gather, np.zeros((0, 2, 0))) - # Trailing axis gather. - params = np.zeros((0, 0, 7), dtype=dtype.as_numpy_dtype) - gather = array_ops.gather(params, indices, axis=2) - self.assertAllEqual(gather, np.zeros((0, 0, 2))) + # Trailing axis gather. + params = np.zeros((0, 0, 7), dtype=dtype.as_numpy_dtype) + gather = array_ops.gather(params, indices, axis=2) + self.assertAllEqual(gather, np.zeros((0, 0, 2))) @parameterized.parameters([ # batch_dims=0 (equivalent to tf.gather) @@ -385,20 +444,13 @@ class GatherTest(test.TestCase, parameterized.TestCase): self.assertAllEqual(expected, result) # Test the gradients shape. - if context.executing_eagerly(): - with backprop.GradientTape() as tape: - zeros = array_ops.zeros_like(params, dtype=dtypes.float32) - tape.watch(zeros) - values = zeros * 2 + zeros - result = array_ops.gather( - values, indices, axis=axis, batch_dims=batch_dims) - gradients = tape.gradient(result, zeros) - else: + with backprop.GradientTape() as tape: zeros = array_ops.zeros_like(params, dtype=dtypes.float32) + tape.watch(zeros) values = zeros * 2 + zeros result = array_ops.gather( values, indices, axis=axis, batch_dims=batch_dims) - gradients = gradients_impl.gradients(result, [zeros])[0] + gradients = tape.gradient(result, zeros) self.assertAllEqual(array_ops.shape(params), array_ops.shape(gradients)) From 29b576a718d11264695276e624ad95f15a337cb7 Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Fri, 31 Jul 2020 16:24:03 -0700 Subject: [PATCH 1911/2522] Remove @test_util.run_deprecated_v1 in gradient_correctness_test.py Also add `persistent` arg to test_util.AbstractGradientTape PiperOrigin-RevId: 324306126 Change-Id: I4f30cff141d725b9c0dd9d2c71e353df4923ca2e --- tensorflow/python/framework/test_util.py | 6 +- .../kernel_tests/gradient_correctness_test.py | 147 ++++++++++-------- 2 files changed, 88 insertions(+), 65 deletions(-) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 958c7697c96..15f4507b5e2 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -3318,16 +3318,16 @@ class AbstractGradientTape: duplicating tests. """ - def __init__(self, use_tape): + def __init__(self, use_tape, persistent=False): self._use_tape = use_tape + self._persistent = persistent def __enter__(self): if self._use_tape: - self._tape_impl = backprop.GradientTape() + self._tape_impl = backprop.GradientTape(persistent=self._persistent) else: self._tape_impl = _fake_gradient_tape_context_manager() return self._tape_impl.__enter__() def __exit__(self, exc_type, exc_val, exc_tb): self._tape_impl.__exit__(exc_type, exc_val, exc_tb) - diff --git a/tensorflow/python/kernel_tests/gradient_correctness_test.py b/tensorflow/python/kernel_tests/gradient_correctness_test.py index 911ba59bed4..ddbe514fa9d 100644 --- a/tensorflow/python/kernel_tests/gradient_correctness_test.py +++ b/tensorflow/python/kernel_tests/gradient_correctness_test.py @@ -25,20 +25,21 @@ from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import test_util -from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_ops from tensorflow.python.platform import test class GradientCorrectnessTest(test.TestCase, parameterized.TestCase): - @test_util.run_deprecated_v1 - def testMultipleOutputChainedGradients(self): - with self.cached_session() as sess: + @parameterized.parameters(set((True, context.executing_eagerly()))) + def testMultipleOutputChainedGradients(self, use_tape): + with test_util.AbstractGradientTape(use_tape=use_tape) as tape: x = constant_op.constant(1.0, dtype=dtypes.float32) + tape.watch(x) + yexp = math_ops.exp(x) yexplog = math_ops.log(yexp) - grads = gradients_impl.gradients([yexp, yexplog], [x]) + grads = tape.gradient([yexp, yexplog], [x]) grad_vals = self.evaluate(grads) exp1_plus_one = (1.0 + np.exp(1.0)).astype(np.float32) # [dexp(x)/dx + d(log(exp(x)))/dx] @ x=1 == exp(1) + 1 @@ -52,72 +53,94 @@ class GradientCorrectnessTest(test.TestCase, parameterized.TestCase): dx_dx = tape.gradient(x, x) self.assertAllClose(1., self.evaluate(dx_dx)) - @test_util.run_deprecated_v1 - def testIntegerIdentityGradient(self): + @parameterized.parameters(set((True, context.executing_eagerly()))) + def testIntegerIdentityGradient(self, use_tape): x = constant_op.constant(3) - dx_dx, = gradients_impl.gradients(x, x) - with self.cached_session() as sess: - self.assertAllClose(1, self.evaluate(dx_dx)) + with test_util.AbstractGradientTape(use_tape=use_tape) as tape: + tape.watch(x) + dx_dx = tape.gradient(x, x) + self.assertAllClose(1, self.evaluate(dx_dx)) - @test_util.run_deprecated_v1 - def testGradientWithIntegerPath(self): - x = constant_op.constant([3.9, 4.1]) - k = math_ops.cast(math_ops.cast(x, dtypes.int32), dtypes.float32) - y = x * k - dy_dx, = gradients_impl.gradients(y, x) - with self.cached_session() as sess: + @parameterized.parameters(set((True, context.executing_eagerly()))) + def testGradientWithIntegerPath(self, use_tape): + with test_util.AbstractGradientTape(use_tape=use_tape) as tape: + x = constant_op.constant([3.9, 4.1]) + tape.watch(x) + + k = math_ops.cast(math_ops.cast(x, dtypes.int32), dtypes.float32) + y = x * k + dy_dx = tape.gradient(y, x) self.assertAllClose([3., 4.], self.evaluate(dy_dx)) - @test_util.run_deprecated_v1 - def testNoIntegerGradient1(self): - x = constant_op.constant([3.9, 4.1]) - k = math_ops.cast(math_ops.cast(x, dtypes.int32), dtypes.float32) - y = k * k - dy_dx, = gradients_impl.gradients(y, x) - self.assertIsNone(dy_dx) + @parameterized.parameters(set((True, context.executing_eagerly()))) + def testNoIntegerGradient1(self, use_tape): + with test_util.AbstractGradientTape(use_tape=use_tape) as tape: + x = constant_op.constant([3.9, 4.1]) + tape.watch(x) - @test_util.run_deprecated_v1 - def testNoIntegerGradient2(self): - k = constant_op.constant([3, 4]) - x = math_ops.cast(k, dtypes.float32) - y = x * x - dy_dk, = gradients_impl.gradients(y, k) - self.assertIsNone(dy_dk) + k = math_ops.cast(math_ops.cast(x, dtypes.int32), dtypes.float32) + y = k * k + dy_dx = tape.gradient(y, x) + self.assertIsNone(dy_dx) - @test_util.run_deprecated_v1 - def testNoIntegerGradient3(self): - k = constant_op.constant([3, 4]) - m = k * k - dm_dk, = gradients_impl.gradients(m, k) - self.assertIsNone(dm_dk) + @parameterized.parameters(set((True, context.executing_eagerly()))) + def testNoIntegerGradient2(self, use_tape): + with test_util.AbstractGradientTape(use_tape=use_tape) as tape: + k = constant_op.constant([3, 4]) + x = math_ops.cast(k, dtypes.float32) + tape.watch([k, x]) - @test_util.run_deprecated_v1 - def testNoIntegerGradient4(self): - k = constant_op.constant([3, 4]) - m = k * k * k - dm_dk, = gradients_impl.gradients(m, k) - self.assertIsNone(dm_dk) + y = x * x + dy_dk = tape.gradient(y, k) + self.assertIsNone(dy_dk) - @test_util.run_deprecated_v1 - def testNoIntegerGradient5(self): - k = constant_op.constant([3, 4]) - m = k * k - n = m * m - dn_dk, = gradients_impl.gradients(n, k) - self.assertIsNone(dn_dk) + @parameterized.parameters(set((True, context.executing_eagerly()))) + def testNoIntegerGradient3(self, use_tape): + with test_util.AbstractGradientTape(use_tape=use_tape) as tape: + k = constant_op.constant([3, 4]) + tape.watch(k) - @test_util.run_deprecated_v1 - def testNoIntegerGradient6(self): - k = constant_op.constant(3) - x = math_ops.cast(k, dtypes.float32) - grad_1, = gradients_impl.gradients(k * k, k) - grad_2, = gradients_impl.gradients(x * x, k) - grad_3, = gradients_impl.gradients(math_ops.square(k), k) - grad_4, = gradients_impl.gradients(math_ops.square(x), k) - self.assertIsNone(grad_1) - self.assertIsNone(grad_2) - self.assertIsNone(grad_3) - self.assertIsNone(grad_4) + m = k * k + dm_dk = tape.gradient(m, k) + self.assertIsNone(dm_dk) + + @parameterized.parameters(set((True, context.executing_eagerly()))) + def testNoIntegerGradient4(self, use_tape): + with test_util.AbstractGradientTape(use_tape=use_tape) as tape: + k = constant_op.constant([3, 4]) + tape.watch(k) + + m = k * k * k + dm_dk = tape.gradient(m, k) + self.assertIsNone(dm_dk) + + @parameterized.parameters(set((True, context.executing_eagerly()))) + def testNoIntegerGradient5(self, use_tape): + with test_util.AbstractGradientTape(use_tape=use_tape) as tape: + k = constant_op.constant([3, 4]) + tape.watch(k) + + m = k * k + n = m * m + dn_dk = tape.gradient(n, k) + self.assertIsNone(dn_dk) + + @parameterized.parameters(set((True, context.executing_eagerly()))) + def testNoIntegerGradient6(self, use_tape): + with test_util.AbstractGradientTape( + use_tape=use_tape, persistent=True) as tape: + k = constant_op.constant(3) + tape.watch(k) + + x = math_ops.cast(k, dtypes.float32) + grad_1 = tape.gradient(k * k, k) + grad_2 = tape.gradient(x * x, k) + grad_3 = tape.gradient(math_ops.square(k), k) + grad_4 = tape.gradient(math_ops.square(x), k) + self.assertIsNone(grad_1) + self.assertIsNone(grad_2) + self.assertIsNone(grad_3) + self.assertIsNone(grad_4) if __name__ == '__main__': From 40626d2c54d0769d868570f0c1eeae6ae87c3d0d Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Fri, 31 Jul 2020 16:41:56 -0700 Subject: [PATCH 1912/2522] Fix the BUILD rule for platform_port, after changing the NominalCPUFrequency implementation. We were missing the implementation of one dependency. PiperOrigin-RevId: 324309158 Change-Id: Ic7ad8ede507f0dc8896544bcdb52ba2a40b50d5b --- tensorflow/core/BUILD | 1 + tensorflow/core/platform/BUILD | 23 ++++++++++++++++++++++- tensorflow/core/platform/default/BUILD | 2 +- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 71d08d8abbe..bf382105369 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1899,6 +1899,7 @@ cc_library( "//tensorflow/core/platform:platform_port", "//tensorflow/core/platform:platform_strings", "//tensorflow/core/platform:prefetch", + "//tensorflow/core/platform:profile_utils_cpu_utils", "//tensorflow/core/platform:protobuf_internal", "//tensorflow/core/platform:regexp", "//tensorflow/core/platform:resource", diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD index 198a431ac17..4fe09498e93 100644 --- a/tensorflow/core/platform/BUILD +++ b/tensorflow/core/platform/BUILD @@ -85,6 +85,8 @@ exports_files( "mutex.h", "net.h", "numa.h", + "profile_utils/android_armv7a_cpu_utils_helper.h", + "profile_utils/cpu_utils.cc", "profile_utils/cpu_utils.h", "profile_utils/i_cpu_utils_helper.h", "ram_file_system.h", @@ -960,6 +962,26 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "profile_utils_cpu_utils", + srcs = [ + "profile_utils/android_armv7a_cpu_utils_helper.h", + "profile_utils/cpu_utils.cc", + "profile_utils/i_cpu_utils_helper.h", + ], + hdrs = [ + "profile_utils/cpu_utils.h", + ], + copts = tf_copts(), + deps = [ + ":logging", + ":macros", + ":types", + "@com_google_absl//absl/base", + ], + alwayslink = 1, +) + filegroup( name = "tf32_hdr", srcs = ["tf32_utils.h"], @@ -1642,7 +1664,6 @@ filegroup( srcs = [ "profile_utils/android_armv7a_cpu_utils_helper.cc", "profile_utils/clock_cycle_profiler.cc", - "profile_utils/cpu_utils.cc", ], visibility = ["//tensorflow/core:__pkg__"], ) diff --git a/tensorflow/core/platform/default/BUILD b/tensorflow/core/platform/default/BUILD index a6a33384659..40883cb69d4 100644 --- a/tensorflow/core/platform/default/BUILD +++ b/tensorflow/core/platform/default/BUILD @@ -269,7 +269,6 @@ cc_library( "//tensorflow/core/platform:mem.h", "//tensorflow/core/platform:numa.h", "//tensorflow/core/platform:profile_utils/cpu_utils.h", - "//tensorflow/core/platform:profile_utils/i_cpu_utils_helper.h", "//tensorflow/core/platform:snappy.h", ], copts = tf_copts(), @@ -289,6 +288,7 @@ cc_library( "//tensorflow/core/platform:dynamic_annotations", "//tensorflow/core/platform:logging", "//tensorflow/core/platform:macros", + "//tensorflow/core/platform:profile_utils_cpu_utils", "//tensorflow/core/platform:types", "//tensorflow/core/platform", "@snappy", From 0d78d544c3102fd6820941fbcde2b8ab7a1d56ec Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Fri, 31 Jul 2020 16:48:37 -0700 Subject: [PATCH 1913/2522] Remove @test_util.deprecated_graph_mode_only in gradients_test.py PiperOrigin-RevId: 324310254 Change-Id: I47728b12de273d1fa50eac71ef06f6209ba4e6f6 --- tensorflow/python/ops/gradients_test.py | 45 ++++--------------------- 1 file changed, 7 insertions(+), 38 deletions(-) diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py index 7425e1e1522..5bd31aa8c73 100644 --- a/tensorflow/python/ops/gradients_test.py +++ b/tensorflow/python/ops/gradients_test.py @@ -1444,7 +1444,8 @@ class TensorListGradientsTest(test_util.TensorFlowTestCase): self.assertEqual(self.evaluate(grad), 5.) -class VariablesGradientTest(test_util.TensorFlowTestCase): +class VariablesGradientTest(test_util.TensorFlowTestCase, + parameterized.TestCase): def _TestFnVariablesGradient(self, inputs, test_fn, vars_to_grad): """Returns gradients of `test_model` with respect to `vars_to_grad`.""" @@ -1550,8 +1551,8 @@ class VariablesGradientTest(test_util.TensorFlowTestCase): for g, g_re in zip(grads, grads_re): self.assertAllClose(g, g_re) - @test_util.deprecated_graph_mode_only - def testFnRecomputeWithScopeGradientTape(self): + @parameterized.parameters(set((True, context.executing_eagerly()))) + def testFnRecomputeWithScopeGradient(self, use_tape): """Checks that recompute_grad works with var scope and GradientTape.""" def TestFn(input_t): @@ -1561,7 +1562,6 @@ class VariablesGradientTest(test_util.TensorFlowTestCase): shape=10, trainable=True, ) - self.evaluate(test_var.assign(np.ones([10]))) return input_t * test_var test_input_t = constant(np.zeros((10, 10), dtype=np.float32)) @@ -1570,10 +1570,12 @@ class VariablesGradientTest(test_util.TensorFlowTestCase): "output_scope", reuse=variable_scope.AUTO_REUSE, use_resource=True): test_fn_re = custom_gradient.recompute_grad(TestFn) - with backprop.GradientTape(persistent=True) as tape: + with test_util.AbstractGradientTape( + use_tape=use_tape, persistent=True) as tape: out_re = test_fn_re(test_input_t) out = TestFn(test_input_t) + self.evaluate(variables.global_variables_initializer()) grads_re = tape.gradient(out_re, variables.trainable_variables()) grads = tape.gradient(out, variables.trainable_variables()) @@ -1581,39 +1583,6 @@ class VariablesGradientTest(test_util.TensorFlowTestCase): grads = self.evaluate(grads) for g, g_re in zip(grads, grads_re): self.assertAllClose(g, g_re) - self.assertAllClose(g, g_re) - - @test_util.deprecated_graph_mode_only - def testFnRecomputeWithScopeGradients(self): - """Checks that recompute_grad works with var scope and gradients(..).""" - - def TestFn(input_t): - with variable_scope.variable_scope("inner_scope"): - test_var = variable_scope.get_variable( - name="test_var", - shape=10, - trainable=True, - ) - return input_t * test_var - - test_input_t = constant(np.zeros((10, 10), dtype=np.float32)) - - with variable_scope.variable_scope( - "output_scope", reuse=variable_scope.AUTO_REUSE, use_resource=True): - test_fn_re = custom_gradient.recompute_grad(TestFn) - out_re = test_fn_re(test_input_t) - out = TestFn(test_input_t) - - init = variables.global_variables_initializer() - self.evaluate(init) - grads_re = gradients.gradients(out_re, variables.trainable_variables()) - grads = gradients.gradients(out, variables.trainable_variables()) - - grads_re = self.evaluate(grads_re) - grads = self.evaluate(grads) - for g, g_re in zip(grads, grads_re): - self.assertAllClose(g, g_re) - self.assertAllClose(g, g_re) @test_util.run_in_graph_and_eager_modes def testFnRecomputeSameTensor(self): From 68bedc248dddae432d32ae257afc9fa4edf84386 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 31 Jul 2020 16:48:51 -0700 Subject: [PATCH 1914/2522] Use V2 image summary implementation for Keras automatic outside compilation test. PiperOrigin-RevId: 324310298 Change-Id: I3819864d3420a880bdcb99725396e9951b6ba211 --- .../automatic_outside_compilation_test.py | 87 ++++++++++--------- 1 file changed, 44 insertions(+), 43 deletions(-) diff --git a/tensorflow/python/keras/tests/automatic_outside_compilation_test.py b/tensorflow/python/keras/tests/automatic_outside_compilation_test.py index 76e5ca98af9..a770b7fa6aa 100644 --- a/tensorflow/python/keras/tests/automatic_outside_compilation_test.py +++ b/tensorflow/python/keras/tests/automatic_outside_compilation_test.py @@ -22,6 +22,9 @@ import os import numpy as np +from tensorboard.plugins.histogram import summary_v2 as histogram_summary_v2 +from tensorboard.plugins.image import summary_v2 as image_summary_v2 +from tensorboard.plugins.scalar import summary_v2 as scalar_summary_v2 from tensorflow.python.compat import v2_compat from tensorflow.python.data.ops import dataset_ops from tensorflow.python.distribute import tpu_strategy as tpu_strategy_lib @@ -74,7 +77,8 @@ class LayerForScalarSummary(base_layer.Layer): """A pass-through layer that only records scalar values to summary.""" def call(self, x): - summary_ops_v2.scalar('custom_scalar_summary', math_ops.reduce_sum(x)) + # Add summary scalar using compat v2 implementation. + scalar_summary_v2.scalar('custom_scalar_summary_v2', math_ops.reduce_sum(x)) return x @@ -82,7 +86,9 @@ class LayerForImageSummary(base_layer.Layer): """A pass-through layer that only records image values to summary.""" def call(self, x): - summary_ops_v2.image('custom_image_summary', x) + # Add summary image using compat v2 implementation. + image_summary_v2.image('custom_image_summary_v2', x) + return x @@ -90,7 +96,9 @@ class LayerForHistogramSummary(base_layer.Layer): """A pass-through layer that records histogram values to summary.""" def call(self, x): - summary_ops_v2.histogram('custom_histogram_summary', x) + # Add summary histogram using compat v2 implementation. + histogram_summary_v2.histogram('custom_histogram_summary_v2', x) + return x @@ -161,7 +169,18 @@ class AutoOutsideCompilationWithKerasTest(test.TestCase): context.context().soft_device_placement = True self.summary_dir = self.get_temp_dir() - def testV2SummaryWithImageModel(self): + def validate_recorded_sumary_file(self, event_files, summary_dict, + expected_count): + for event_file in event_files: + for e in summary_iterator.summary_iterator(event_file): + for v in e.summary.value: + if v.tag in summary_dict: + summary_dict[v.tag] += 1 + + for key in summary_dict: + self.assertEqual(summary_dict[key], expected_count) + + def testV2SummaryWithKerasSequentialModel(self): strategy = get_tpu_strategy() with strategy.scope(): @@ -177,34 +196,21 @@ class AutoOutsideCompilationWithKerasTest(test.TestCase): epochs=1, callbacks=[tensorboard_callback]) - event_files = file_io.get_matching_files_v2( - os.path.join(self.summary_dir, 'train', 'event*')) events_count_dictionary = { - ('sequential/layer_for_histogram_summary' - '/custom_histogram_summary'): 0, - 'sequential/layer_for_image_summary/custom_image_summary/image/0': 0 + 'sequential/layer_for_histogram_summary/custom_histogram_summary_v2': + 0, + 'sequential/layer_for_image_summary/custom_image_summary_v2': + 0, } - for event_file in event_files: - for e in summary_iterator.summary_iterator(event_file): - for v in e.summary.value: - if v.tag in events_count_dictionary: - events_count_dictionary[v.tag] += 1 - + event_files = file_io.get_matching_files_v2( + os.path.join(self.summary_dir, 'train', 'event*')) # Since total of 10 steps are ran and summary ops should be invoked # every 2 batches, we should see total of 5 event logs. - self.assertEqual( - events_count_dictionary[ - ('sequential/layer_for_histogram_summary/' - 'custom_histogram_summary')], - 5) - self.assertEqual( - events_count_dictionary[ - ('sequential/layer_for_image_summary/' - 'custom_image_summary/image/0')], - 5) + self.validate_recorded_sumary_file(event_files, events_count_dictionary, + 5) - def testV2SummaryWithKerasFit(self): + def testV2SummaryWithKerasSubclassedModel(self): strategy = get_tpu_strategy() with strategy.scope(): @@ -223,25 +229,18 @@ class AutoOutsideCompilationWithKerasTest(test.TestCase): event_files = file_io.get_matching_files_v2( os.path.join(self.summary_dir, 'train', 'event*')) events_count_dictionary = { - 'custom_model/layer_for_scalar_summary/custom_scalar_summary': 0, - 'custom_model/layer_for_histogram_summary/custom_histogram_summary': 0 + ('custom_model/layer_for_scalar_summary/' + 'custom_scalar_summary_v2'): + 0, + ('custom_model/layer_for_histogram_summary/' + 'custom_histogram_summary_v2'): + 0 } - for event_file in event_files: - for e in summary_iterator.summary_iterator(event_file): - for v in e.summary.value: - if v.tag in events_count_dictionary: - events_count_dictionary[v.tag] += 1 - # Since total of 10 steps are ran and summary ops should be invoked # every 2 batches, we should see total of 5 event logs. - self.assertEqual( - events_count_dictionary[('custom_model/layer_for_histogram_summary/' - 'custom_histogram_summary')], - 5) - self.assertEqual( - events_count_dictionary[ - 'custom_model/layer_for_scalar_summary/custom_scalar_summary'], 5) + self.validate_recorded_sumary_file(event_files, events_count_dictionary, + 5) def testSummaryWithCustomTrainingLoop(self): strategy = get_tpu_strategy() @@ -258,8 +257,10 @@ class AutoOutsideCompilationWithKerasTest(test.TestCase): del labels logits = model(features) with summary_ops_v2.always_record_summaries(), writer.as_default(): - summary_ops_v2.scalar( - 'logits', logits, step=model.optimizer.iterations) + scalar_summary_v2.scalar( + 'logits', + math_ops.reduce_sum(logits), + step=model.optimizer.iterations) return logits iterator = iter(dataset) From 1b2ac91b25aefeb445d1baa20ebcd9ba5c42f0e2 Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Fri, 31 Jul 2020 16:55:05 -0700 Subject: [PATCH 1915/2522] Remove @test_util.deprecated_graph_mode_only in dense_update_ops_test.py PiperOrigin-RevId: 324311282 Change-Id: I7f8c4fc89e4982dc885a8a07c155010d3f6097fd --- .../python/kernel_tests/dense_update_ops_test.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/kernel_tests/dense_update_ops_test.py b/tensorflow/python/kernel_tests/dense_update_ops_test.py index 2d7eac10a12..b73f04b25d0 100644 --- a/tensorflow/python/kernel_tests/dense_update_ops_test.py +++ b/tensorflow/python/kernel_tests/dense_update_ops_test.py @@ -30,28 +30,28 @@ from tensorflow.python.platform import test class AssignOpTest(test.TestCase): - def _initAssignFetch(self, x, y, use_gpu=False): + def _initAssignFetch(self, x, y, use_gpu): """Initialize a param to init and update it with y.""" super(AssignOpTest, self).setUp() - with self.cached_session(use_gpu=use_gpu): + with test_util.device(use_gpu=use_gpu): p = variables.Variable(x) assign = state_ops.assign(p, y) self.evaluate(p.initializer) new_value = self.evaluate(assign) return self.evaluate(p), new_value - def _initAssignAddFetch(self, x, y, use_gpu=False): + def _initAssignAddFetch(self, x, y, use_gpu): """Initialize a param to init, and compute param += y.""" - with self.cached_session(use_gpu=use_gpu): + with test_util.device(use_gpu=use_gpu): p = variables.Variable(x) add = state_ops.assign_add(p, y) self.evaluate(p.initializer) new_value = self.evaluate(add) return self.evaluate(p), new_value - def _initAssignSubFetch(self, x, y, use_gpu=False): + def _initAssignSubFetch(self, x, y, use_gpu): """Initialize a param to init, and compute param -= y.""" - with self.cached_session(use_gpu=use_gpu): + with test_util.device(use_gpu=use_gpu): p = variables.Variable(x) sub = state_ops.assign_sub(p, y) self.evaluate(p.initializer) @@ -78,11 +78,10 @@ class AssignOpTest(test.TestCase): var_value, op_value = self._initAssignAddFetch(x, y, use_gpu=True) self.assertAllEqual(x + y, var_value) self.assertAllEqual(x + y, op_value) - var_value, op_value = self._initAssignSubFetch(x, y, use_gpu=False) + var_value, op_value = self._initAssignSubFetch(x, y, use_gpu=True) self.assertAllEqual(x - y, var_value) self.assertAllEqual(x - y, op_value) - @test_util.run_deprecated_v1 def testBasic(self): self._testTypes(np.arange(0, 20).reshape([4, 5])) From d5baa96fed1d40edd3e62c21c7a3ba80d2ba9d63 Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Fri, 31 Jul 2020 17:16:30 -0700 Subject: [PATCH 1916/2522] Shard from_generator_test. The test occasionally times out because many of the individual tests take around 20 seconds, and there are many tests. PiperOrigin-RevId: 324314603 Change-Id: Id990ecabf7e1932d6eba585649553554ea32ca1e --- tensorflow/python/data/kernel_tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD index 109a978514e..210b6f59681 100644 --- a/tensorflow/python/data/kernel_tests/BUILD +++ b/tensorflow/python/data/kernel_tests/BUILD @@ -224,6 +224,7 @@ tf_py_test( name = "from_generator_test", size = "medium", srcs = ["from_generator_test.py"], + shard_count = 10, deps = [ ":test_base", "//tensorflow/python:client_testlib", From 5313d56b1e21e50170de136732d53abf70075604 Mon Sep 17 00:00:00 2001 From: Haitang Hu Date: Fri, 31 Jul 2020 17:16:41 -0700 Subject: [PATCH 1917/2522] Improve error message format when reporting batch dimension mismatch in conv2d. Before: input and out_backprop must have the same batch sizeinput batch: 2outbackprop batch: 1 batch_dim: 0 After: input and out_backprop must have the same batch size. Input batch: 2, outbackprop batch: 1 , batch_dim: 0 PiperOrigin-RevId: 324314629 Change-Id: I5e65341d545203209629933202d463374596b8e2 --- tensorflow/core/kernels/conv_grad_shape_utils.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/conv_grad_shape_utils.cc b/tensorflow/core/kernels/conv_grad_shape_utils.cc index acb052968e1..bba989b4f92 100644 --- a/tensorflow/core/kernels/conv_grad_shape_utils.cc +++ b/tensorflow/core/kernels/conv_grad_shape_utils.cc @@ -115,10 +115,10 @@ Status ConvBackpropComputeDimensionsV2( dims->batch_size = input_shape.dim_size(batch_dim); if (dims->batch_size != out_backprop_shape.dim_size(batch_dim)) { return errors::InvalidArgument( - label, ": input and out_backprop must have the same batch size", - "input batch: ", dims->batch_size, - "outbackprop batch: ", out_backprop_shape.dim_size(batch_dim), - " batch_dim: ", batch_dim); + label, ": input and out_backprop must have the same batch size.", + " Input batch: ", dims->batch_size, + ", outbackprop batch: ", out_backprop_shape.dim_size(batch_dim), + ", batch_dim: ", batch_dim); } int feature_dim = GetTensorFeatureDimIndex(num_dims, data_format); From a49117d4b5dd68c3cbecd2f67c7add9e5a8cab55 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Fri, 31 Jul 2020 17:30:36 -0700 Subject: [PATCH 1918/2522] Using default Compile for all ops. PiperOrigin-RevId: 324316456 Change-Id: I1a9c31a6893174798420de07bc57811db013c2fe --- .../lite/delegates/gpu/cl/kernels/conv_3d.cc | 33 +++----- .../lite/delegates/gpu/cl/kernels/conv_3d.h | 1 - .../gpu/cl/kernels/conv_buffer_1x1.cc | 20 +---- .../gpu/cl/kernels/conv_buffer_1x1.h | 1 - .../gpu/cl/kernels/conv_constants.cc | 59 +++++++-------- .../delegates/gpu/cl/kernels/conv_constants.h | 16 +--- .../delegates/gpu/cl/kernels/conv_powervr.cc | 41 +++++----- .../delegates/gpu/cl/kernels/conv_powervr.h | 15 ++-- .../delegates/gpu/cl/kernels/conv_texture.cc | 39 ++++------ .../delegates/gpu/cl/kernels/conv_texture.h | 5 +- .../gpu/cl/kernels/conv_weights_converter.cc | 18 ++--- .../gpu/cl/kernels/conv_weights_converter.h | 4 +- .../gpu/cl/kernels/convolution_transposed.cc | 21 +----- .../gpu/cl/kernels/convolution_transposed.h | 1 - .../cl/kernels/convolution_transposed_3d.cc | 40 ++++------ .../cl/kernels/convolution_transposed_3d.h | 1 - .../cl/kernels/convolution_transposed_3x3.cc | 28 ++----- .../cl/kernels/convolution_transposed_3x3.h | 1 - .../convolution_transposed_3x3_thin.cc | 40 +++------- .../kernels/convolution_transposed_3x3_thin.h | 19 ++--- .../cl/kernels/convolution_transposed_4x4.cc | 31 ++------ .../cl/kernels/convolution_transposed_4x4.h | 4 +- .../cl/kernels/convolution_transposed_thin.cc | 50 ++++--------- .../cl/kernels/convolution_transposed_thin.h | 26 +++---- .../gpu/cl/kernels/depthwise_conv.cc | 44 +++++------ .../delegates/gpu/cl/kernels/depthwise_conv.h | 7 +- .../gpu/cl/kernels/depthwise_conv_3x3.cc | 41 ++++------ .../gpu/cl/kernels/depthwise_conv_3x3.h | 6 +- .../gpu/cl/kernels/fully_connected.cc | 50 +++++-------- .../gpu/cl/kernels/fully_connected.h | 3 +- .../delegates/gpu/cl/kernels/gpu_operation.cc | 4 +- .../delegates/gpu/cl/kernels/gpu_operation.h | 4 +- .../lite/delegates/gpu/cl/kernels/lstm.cc | 23 +++--- .../lite/delegates/gpu/cl/kernels/lstm.h | 8 +- .../delegates/gpu/cl/kernels/lstm_test.cc | 2 +- .../delegates/gpu/cl/kernels/max_unpooling.cc | 46 +++++------- .../delegates/gpu/cl/kernels/max_unpooling.h | 15 ++-- .../gpu/cl/kernels/max_unpooling_test.cc | 3 +- .../lite/delegates/gpu/cl/kernels/mean.cc | 35 ++++----- .../lite/delegates/gpu/cl/kernels/mean.h | 5 +- .../cl/kernels/mean_stddev_normalization.cc | 14 +--- .../cl/kernels/mean_stddev_normalization.h | 1 - .../delegates/gpu/cl/kernels/mean_test.cc | 2 +- .../lite/delegates/gpu/cl/kernels/padding.cc | 37 ++------- .../lite/delegates/gpu/cl/kernels/padding.h | 5 -- .../lite/delegates/gpu/cl/kernels/pooling.cc | 61 +++++++-------- .../lite/delegates/gpu/cl/kernels/pooling.h | 17 +++-- .../delegates/gpu/cl/kernels/pooling_test.cc | 12 ++- .../lite/delegates/gpu/cl/kernels/reshape.cc | 17 +---- .../lite/delegates/gpu/cl/kernels/reshape.h | 3 +- .../delegates/gpu/cl/kernels/reshapex4.cc | 18 ++--- .../lite/delegates/gpu/cl/kernels/reshapex4.h | 4 +- .../lite/delegates/gpu/cl/kernels/resize.cc | 37 +++------ .../lite/delegates/gpu/cl/kernels/resize.h | 8 +- .../lite/delegates/gpu/cl/kernels/softmax.cc | 17 +---- .../lite/delegates/gpu/cl/kernels/softmax.h | 3 +- .../delegates/gpu/cl/kernels/softmax1x1.cc | 20 ++--- .../delegates/gpu/cl/kernels/softmax1x1.h | 4 +- .../gpu/cl/kernels/space_to_depth.cc | 19 ++--- .../delegates/gpu/cl/kernels/space_to_depth.h | 4 +- .../special/depthwise_conv_plus_1x1_conv.cc | 35 +++------ .../special/depthwise_conv_plus_1x1_conv.h | 7 +- .../delegates/gpu/cl/kernels/strided_slice.cc | 14 +--- .../delegates/gpu/cl/kernels/strided_slice.h | 1 - .../delegates/gpu/cl/kernels/transpose.cc | 19 ++--- .../lite/delegates/gpu/cl/kernels/transpose.h | 4 +- .../lite/delegates/gpu/cl/kernels/util.cc | 5 ++ .../lite/delegates/gpu/cl/kernels/util.h | 1 + .../lite/delegates/gpu/cl/kernels/winograd.cc | 75 +++++++------------ .../lite/delegates/gpu/cl/kernels/winograd.h | 14 +--- .../gpu/cl/selectors/operation_selector.cc | 10 ++- .../gpu/cl/selectors/simple_selectors.cc | 13 ++-- .../gpu/cl/selectors/simple_selectors.h | 6 +- 73 files changed, 475 insertions(+), 842 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc index 8032c41ed1b..b1e1e39327c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc @@ -175,7 +175,15 @@ Conv3D::Conv3D(const OperationDef& definition, kernel_size_(attr.weights.shape.w, attr.weights.shape.h, attr.weights.shape.d), dilation_(attr.dilations.w, attr.dilations.h, attr.dilations.d), - conv_params_(GuessBestParams(device, definition, attr)) {} + conv_params_(GuessBestParams(device, definition, attr)) { + const bool stride_correction = + definition_.IsBatchSupported() && stride_.x != 1; + code_ = GenerateConv3D(definition_, stride_correction, conv_params_); + if (definition_.precision == CalculationsPrecision::F16 && + device.IsPowerVR()) { + compiler_options_.push_back(CompilerOptions::POWERVR_FP16); + } +} Conv3D::Conv3D(Conv3D&& operation) : GPUOperation(std::move(operation)), @@ -197,29 +205,6 @@ Conv3D& Conv3D::operator=(Conv3D&& operation) { return *this; } -absl::Status Conv3D::Compile(const CreationContext& creation_context) { - const bool stride_correction = - definition_.IsBatchSupported() && stride_.x != 1; - std::string code = - GenerateConv3D(definition_, stride_correction, conv_params_); - work_group_size_ = conv_params_.work_group_size; - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - - std::vector options; - if (definition_.precision == CalculationsPrecision::F16 && - creation_context.device->IsPowerVR()) { - options.push_back(CompilerOptions::POWERVR_FP16); - } - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", options, *creation_context.context, - *creation_context.device, &kernel_); -} - absl::Status Conv3D::BindArguments() { if (!conv_params_.x_kernel_is_1) { RETURN_IF_ERROR(args_.SetInt("stride_x", stride_.x)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h index 569d78c4808..ce2d7794411 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h @@ -40,7 +40,6 @@ class Conv3D : public GPUOperation { public: Conv3D() = default; absl::Status Tune(const TuningParameters& params) override; - absl::Status Compile(const CreationContext& creation_context) override; absl::Status BindArguments() override; int3 GetGridSize() const override; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc index 797e7c9ad90..949651c1f87 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc @@ -151,7 +151,10 @@ ConvBuffer1x1::ConvParams GetBestParams(const CLDevice& device, ConvBuffer1x1::ConvBuffer1x1(const OperationDef& definition, const ConvParams& conv_params) - : GPUOperation(definition), conv_params_(conv_params) {} + : GPUOperation(definition), conv_params_(conv_params) { + code_ = GenerateConvBuffer1x1(definition_, conv_params_, &args_); + work_group_size_ = conv_params_.work_group_size; +} ConvBuffer1x1::ConvBuffer1x1(ConvBuffer1x1&& operation) : GPUOperation(std::move(operation)), @@ -300,21 +303,6 @@ std::string ConvBuffer1x1::GenerateConvBuffer1x1( return c; } -absl::Status ConvBuffer1x1::Compile(const CreationContext& creation_context) { - std::string code = GenerateConvBuffer1x1(definition_, conv_params_, &args_); - work_group_size_ = conv_params_.work_group_size; - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_)); - return absl::OkStatus(); -} - int3 ConvBuffer1x1::GetGridSize() const { const int dst_width_elements = DivideRoundUp( dst_[0]->Width() * dst_[0]->Batch(), (conv_params_.element_size / 4)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h index e1527a7486d..90df8f2f9ad 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h @@ -48,7 +48,6 @@ class ConvBuffer1x1 : public GPUOperation { ConvBuffer1x1& operator=(const ConvBuffer1x1&) = delete; absl::Status Tune(const TuningParameters& params) override; - absl::Status Compile(const CreationContext& creation_context) override; int3 GetGridSize() const override; ConvWeightsDescription GetConvWeightsDescription() const { diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc index 1c4d033a006..b93fe113d89 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc @@ -47,6 +47,32 @@ int GetOptimalMaxConstantSize(const DeviceInfo& info) { } } // namespace +ConvConstants::ConvConstants(const OperationDef& definition, + const Convolution2DAttributes& attr, + const DeviceInfo& device_info) + : GPUOperation(definition), + kernel_size_(attr.weights.shape.w, attr.weights.shape.h), + stride_(attr.strides.w, attr.strides.h), + padding_(-attr.padding.prepended.w, -attr.padding.prepended.h), + dilation_(attr.dilations.w, attr.dilations.h), + src_channels_(attr.weights.shape.i), + dst_channels_(attr.weights.shape.o) { + const bool stride_correction = + definition_.IsBatchSupported() && stride_.x != 1; + code_ = GenerateConvolutionConstantCode(definition_, kernel_size_, + src_channels_, dst_channels_, + stride_correction, device_info); + if (definition_.precision == CalculationsPrecision::F16 && + device_info.IsAdreno3xx()) { + compiler_options_.push_back(CompilerOptions::ADRENO_FULL_SIMD_LINE); + } + if (definition_.precision != CalculationsPrecision::F32 && + device_info.IsPowerVR()) { + // BUG, some PowerVRs (GE8320) produce incorrect result without it + compiler_options_.push_back(CompilerOptions::CL_OPT_DISABLE); + } +} + ConvConstants::ConvConstants(ConvConstants&& kernel) : GPUOperation(std::move(kernel)), kernel_size_(kernel.kernel_size_), @@ -71,9 +97,9 @@ ConvConstants& ConvConstants::operator=(ConvConstants&& kernel) { std::string ConvConstants::GenerateConvolutionConstantCode( const OperationDef& op_def, const int2& kernel_size, int src_channels, - int dst_channels, bool stride_correction, const CLDevice& device) { + int dst_channels, bool stride_correction, const DeviceInfo& device_info) { auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); + src_desc.SetTextureAddressMode(GetFastestZeroMode(device_info)); if (op_def.IsBatchSupported()) { src_desc.SetStateVar("BatchedWidth", "true"); } @@ -214,33 +240,6 @@ std::string ConvConstants::GenerateConvolutionConstantCode( return c; } -absl::Status ConvConstants::Compile(const CreationContext& creation_context) { - const bool stride_correction = - definition_.IsBatchSupported() && stride_.x != 1; - std::string code = GenerateConvolutionConstantCode( - definition_, kernel_size_, src_channels_, dst_channels_, - stride_correction, *creation_context.device); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - std::vector options; - if (definition_.precision == CalculationsPrecision::F16 && - creation_context.device->IsAdreno3xx()) { - options.push_back(CompilerOptions::ADRENO_FULL_SIMD_LINE); - } - if (definition_.precision != CalculationsPrecision::F32 && - creation_context.device->IsPowerVR()) { - // BUG, some PowerVRs (GE8320) produce incorrect result without it - options.push_back(CompilerOptions::CL_OPT_DISABLE); - } - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", options, *creation_context.context, - *creation_context.device, &kernel_); -} - absl::Status ConvConstants::BindArguments() { RETURN_IF_ERROR(args_.SetInt("stride_x", stride_.x)); RETURN_IF_ERROR(args_.SetInt("stride_y", stride_.y)); @@ -284,7 +283,7 @@ absl::Status CreateConvConstants(const CreationContext& creation_context, if (!IsConvConstantsSupported(*creation_context.device, definition, attr)) { return absl::InvalidArgumentError("ConvConstants doesn't supported"); } - *result = ConvConstants(definition, attr); + *result = ConvConstants(definition, attr, creation_context.device->GetInfo()); RETURN_IF_ERROR( result->UploadWeights(attr.weights, creation_context.context)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h index 0864243c6f2..877f32bdf4c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h @@ -35,8 +35,6 @@ namespace cl { class ConvConstants : public GPUOperation { public: ConvConstants() = default; - - absl::Status Compile(const CreationContext& creation_context) override; absl::Status BindArguments() override; int3 GetGridSize() const override; @@ -50,15 +48,9 @@ class ConvConstants : public GPUOperation { friend absl::Status CreateConvConstants( const CreationContext& creation_context, const OperationDef& definition, const Convolution2DAttributes& attr, ConvConstants* result); - explicit ConvConstants(const OperationDef& definition, - const Convolution2DAttributes& attr) - : GPUOperation(definition), - kernel_size_(attr.weights.shape.w, attr.weights.shape.h), - stride_(attr.strides.w, attr.strides.h), - padding_(-attr.padding.prepended.w, -attr.padding.prepended.h), - dilation_(attr.dilations.w, attr.dilations.h), - src_channels_(attr.weights.shape.i), - dst_channels_(attr.weights.shape.o) {} + ConvConstants(const OperationDef& definition, + const Convolution2DAttributes& attr, + const DeviceInfo& device_info); template absl::Status UploadWeights(const tflite::gpu::Tensor& weights, @@ -70,7 +62,7 @@ class ConvConstants : public GPUOperation { std::string GenerateConvolutionConstantCode( const OperationDef& op_def, const int2& kernel_size, int src_channels, - int dst_channels, bool stride_correction, const CLDevice& device); + int dst_channels, bool stride_correction, const DeviceInfo& device_info); int2 kernel_size_; int2 stride_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc index 34b15850ebe..40060007b4e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc @@ -179,29 +179,19 @@ ConvPowerVR& ConvPowerVR::operator=(ConvPowerVR&& operation) { return *this; } -absl::Status ConvPowerVR::Compile(const CreationContext& creation_context) { +void ConvPowerVR::GenerateCode(const DeviceInfo& device_info) { const bool stride_correction = definition_.IsBatchSupported() && stride_padding_.x != 1; - std::string code = GenerateConv(*creation_context.device, definition_, - stride_correction, conv_params_); + code_ = + GenerateConv(device_info, definition_, stride_correction, conv_params_); work_group_size_ = conv_params_.work_group_size; - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - std::vector options; if (definition_.precision == CalculationsPrecision::F16 && - creation_context.device->IsPowerVR()) { - options.push_back(CompilerOptions::POWERVR_FP16); + device_info.IsPowerVR()) { + compiler_options_.push_back(CompilerOptions::POWERVR_FP16); } if (conv_params_.IsPrivateMemBroadcast()) { - options.push_back(CompilerOptions::CL_2_0); + compiler_options_.push_back(CompilerOptions::CL_2_0); } - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", options, *creation_context.context, - *creation_context.device, &kernel_); } absl::Status ConvPowerVR::BindArguments() { @@ -274,11 +264,12 @@ absl::Status ConvPowerVR::Tune(const TuningParameters& params) { return absl::OkStatus(); } -std::string ConvPowerVR::GenerateConv( - const CLDevice& device, const OperationDef& op_def, bool stride_correction, - const ConvPowerVR::ConvParams& conv_params) { +std::string ConvPowerVR::GenerateConv(const DeviceInfo& device_info, + const OperationDef& op_def, + bool stride_correction, + const ConvParams& conv_params) { auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(GetFastestZeroMode(device)); + src_desc.SetTextureAddressMode(GetFastestZeroMode(device_info)); if (op_def.IsBatchSupported()) { src_desc.SetStateVar("BatchedWidth", "true"); } @@ -350,7 +341,7 @@ std::string ConvPowerVR::GenerateConv( std::string c = GetCommonDefines(op_def.precision); if (use_simd_broadcast) { - if (device.cl_version() == OpenCLVersion::CL_2_0) { + if (device_info.cl_version == OpenCLVersion::CL_2_0) { c += "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"; } } @@ -363,7 +354,7 @@ std::string ConvPowerVR::GenerateConv( std::to_string(work_group_size.y) + ", " + std::to_string(work_group_size.z) + ")))\n"; } - if (use_simd_broadcast && device.IsIntel()) { + if (use_simd_broadcast && device_info.IsIntel()) { c += "__attribute__((intel_reqd_sub_group_size(" + std::to_string(simd_size) + ")))\n"; } @@ -498,7 +489,7 @@ std::string ConvPowerVR::GenerateConv( } } }; - const bool conditional_read = device.IsMali(); + const bool conditional_read = device_info.IsMali(); auto read_src = [&]() { const std::string cl_type = ToCLDataType(conv_params.weights_data_type); for (int y = 0; y < block_size.y; ++y) { @@ -1004,6 +995,7 @@ absl::Status CreateConvPowerVR(const CreationContext& creation_context, const Convolution2DAttributes& attr, ConvPowerVR* result, const BHWC* dst_shape) { *result = ConvPowerVR(definition, attr, *creation_context.device, dst_shape); + result->GenerateCode(creation_context.device->GetInfo()); return result->UploadData(attr.weights, attr.bias, creation_context.context); } @@ -1012,6 +1004,7 @@ absl::Status CreateConvPowerVR(const CreationContext& creation_context, const FullyConnectedAttributes& attr, ConvPowerVR* result, const BHWC* dst_shape) { *result = ConvPowerVR(definition, attr, *creation_context.device, dst_shape); + result->GenerateCode(creation_context.device->GetInfo()); return result->UploadData(attr.weights, attr.bias, creation_context.context); } @@ -1021,6 +1014,7 @@ absl::Status CreateConvPowerVRDynamicWeights( ConvPowerVR* result, const BHWC* dst_shape) { *result = ConvPowerVR(definition, attr, weights_shape, *creation_context.device, dst_shape); + result->GenerateCode(creation_context.device->GetInfo()); return result->UploadBias(attr.bias, creation_context.context); } @@ -1031,6 +1025,7 @@ absl::Status CreateConvPowerVRWino4x4To6x6( *result = ConvPowerVR(definition); result->conv_params_ = result->GuessBestParamsWinograd( *creation_context.device, definition, attr, dst_shape); + result->GenerateCode(creation_context.device->GetInfo()); return result->UploadDataForWinograd4x4To6x6( attr.weights, *creation_context.device, creation_context.context); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h index 9dceeffc7a7..148dad38708 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h @@ -42,7 +42,6 @@ class ConvPowerVR : public GPUOperation { public: ConvPowerVR() = default; absl::Status Tune(const TuningParameters& params) override; - absl::Status Compile(const CreationContext& creation_context) override; absl::Status BindArguments() override; int3 GetGridSize() const override; @@ -137,6 +136,8 @@ class ConvPowerVR : public GPUOperation { const BHWC* dst_shape = nullptr); explicit ConvPowerVR(const OperationDef& definition); + void GenerateCode(const DeviceInfo& device_info); + template absl::Status UploadData(const tflite::gpu::Tensor& weights, const tflite::gpu::Tensor& biases, @@ -176,12 +177,6 @@ class ConvPowerVR : public GPUOperation { const Convolution2DAttributes& attr, ConvPowerVR* result, const BHWC* dst_shape); - friend std::string GenerateConv(const CLDevice& device, - const OperationDef& op_def, - bool stride_correction, - const ConvParams& conv_params, - Arguments* args); - ConvParams GuessBestParams(const CLDevice& device, const OperationDef& definition, const Convolution2DAttributes& attr, @@ -206,9 +201,9 @@ class ConvPowerVR : public GPUOperation { bool different_weights_for_height, const BHWC* dst_shape = nullptr) const; - std::string GenerateConv(const CLDevice& device, const OperationDef& op_def, - bool stride_correction, - const ConvPowerVR::ConvParams& conv_params); + std::string GenerateConv(const DeviceInfo& device_info, + const OperationDef& op_def, bool stride_correction, + const ConvParams& conv_params); int4 stride_padding_; int4 kernel_dilation_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc index 59cfa6c012e..6f7ebf2b64b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc @@ -30,9 +30,9 @@ namespace tflite { namespace gpu { namespace cl { namespace { -bool UseFP16SIMD(const CLDevice& device, CalculationsPrecision precision, +bool UseFP16SIMD(const DeviceInfo& device_info, CalculationsPrecision precision, bool kernel1x1) { - if (!device.IsAdreno()) { + if (!device_info.IsAdreno()) { return false; } switch (precision) { @@ -40,7 +40,7 @@ bool UseFP16SIMD(const CLDevice& device, CalculationsPrecision precision, case CalculationsPrecision::F32_F16: return false; case CalculationsPrecision::F16: - return device.IsAdreno3xx() && kernel1x1; + return device_info.IsAdreno3xx() && kernel1x1; } } } // namespace @@ -96,9 +96,9 @@ std::string ConvTexture::GenerateConvCode(const OperationDef& op_def, bool adreno4xx_optimization, bool stride_correction, bool different_weights_for_height, - const CLDevice& device) { + const DeviceInfo& device_info) { auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(GetFastestZeroMode(device)); + src_desc.SetTextureAddressMode(GetFastestZeroMode(device_info)); if (op_def.IsBatchSupported()) { src_desc.SetStateVar("BatchedWidth", "true"); } @@ -380,33 +380,23 @@ std::string ConvTexture::GenerateConvCode(const OperationDef& op_def, return c; } -absl::Status ConvTexture::Compile(const CreationContext& creation_context) { +void ConvTexture::GenerateCode(const DeviceInfo& device_info) { auto storage_type = definition_.GetPrimaryStorageType(); bool is1x1 = kernel_size_.x == 1 && kernel_size_.y == 1; bool adreno4xx_optimization = stride_.x == 1 && stride_.y == 1 && padding_.x == 0 && padding_.y == 0 && - creation_context.device->IsAdreno4xx() && + device_info.IsAdreno4xx() && storage_type == TensorStorageType::TEXTURE_ARRAY && definition_.precision == CalculationsPrecision::F16; const bool stride_correction = definition_.IsBatchSupported() && stride_.x != 1; - std::string code = - GenerateConvCode(definition_, block_size_, is1x1, adreno4xx_optimization, - stride_correction, different_weights_for_height_, - *creation_context.device); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - std::vector options; - if (UseFP16SIMD(*creation_context.device, definition_.precision, is1x1)) { - options.push_back(CompilerOptions::ADRENO_FULL_SIMD_LINE); + code_ = GenerateConvCode(definition_, block_size_, is1x1, + adreno4xx_optimization, stride_correction, + different_weights_for_height_, device_info); + + if (UseFP16SIMD(device_info, definition_.precision, is1x1)) { + compiler_options_.push_back(CompilerOptions::ADRENO_FULL_SIMD_LINE); } - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", options, *creation_context.context, - *creation_context.device, &kernel_); } absl::Status ConvTexture::BindArguments() { @@ -441,6 +431,7 @@ absl::Status CreateConvTexture(const CreationContext& creation_context, const Convolution2DAttributes& attr, ConvTexture* result) { *result = ConvTexture(definition, attr); + result->GenerateCode(creation_context.device->GetInfo()); return result->UploadData(attr.weights, attr.bias, creation_context.context); } @@ -449,6 +440,7 @@ absl::Status CreateConvTexture(const CreationContext& creation_context, const FullyConnectedAttributes& attr, ConvTexture* result) { *result = ConvTexture(definition); + result->GenerateCode(creation_context.device->GetInfo()); return result->UploadData(attr.weights, attr.bias, creation_context.context); } @@ -458,6 +450,7 @@ absl::Status CreateConvTextureWino4x4To6x6( *result = ConvTexture(definition); result->different_weights_for_height_ = true; result->block_size_ = {4, 1, 2}; + result->GenerateCode(creation_context.device->GetInfo()); return result->UploadDataForWinograd4x4To6x6( attr.weights, *creation_context.device, creation_context.context); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h index 6ed9f8a6ee6..9d50f0291da 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h @@ -43,7 +43,6 @@ class ConvTexture : public GPUOperation { public: ConvTexture() = default; absl::Status Tune(const TuningParameters& params) override; - absl::Status Compile(const CreationContext& creation_context) override; absl::Status BindArguments() override; int3 GetGridSize() const override; @@ -89,12 +88,14 @@ class ConvTexture : public GPUOperation { absl::Span dst_0, absl::Span dst_1, absl::Span dst_2, absl::Span dst_3); + void GenerateCode(const DeviceInfo& device_info); + std::string GenerateConvCode(const OperationDef& op_def, const int3& block_size, bool is1x1, bool adreno4xx_optimization, bool stride_correction, bool different_weights_for_height, - const CLDevice& device); + const DeviceInfo& device_info); int2 kernel_size_; int2 stride_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc index 8314871ab00..d6e17ce2a86 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc @@ -24,6 +24,13 @@ namespace tflite { namespace gpu { namespace cl { +ConverterToConvWeights::ConverterToConvWeights( + const OperationDef& definition, + const ConvWeightsDescription& conv_weights_desc) + : GPUOperation(definition), conv_weights_desc_(conv_weights_desc) { + code_ = GetConverterToConvWeightsCode(definition_, conv_weights_desc_); +} + ConverterToConvWeights::ConverterToConvWeights( ConverterToConvWeights&& operation) : GPUOperation(std::move(operation)), @@ -103,17 +110,6 @@ std::string ConverterToConvWeights::GetConverterToConvWeightsCode( return c; } -absl::Status ConverterToConvWeights::Compile( - const CreationContext& creation_context) { - std::string code = - GetConverterToConvWeightsCode(definition_, conv_weights_desc_); - RETURN_IF_ERROR( - args_.TransformToCLCode(creation_context.device->GetInfo(), {}, &code)); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); -} - absl::Status ConverterToConvWeights::BindArguments() { float4 mask = GetMaskForLastPlane(src_[0]->Channels()); RETURN_IF_ERROR(args_.SetFloat("mask_x", mask.x)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h index 015e6b3a0dd..fe814d296fa 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h @@ -30,9 +30,7 @@ namespace cl { class ConverterToConvWeights : public GPUOperation { public: ConverterToConvWeights(const OperationDef& definition, - const ConvWeightsDescription& conv_weights_desc) - : GPUOperation(definition), conv_weights_desc_(conv_weights_desc) {} - absl::Status Compile(const CreationContext& creation_context) override; + const ConvWeightsDescription& conv_weights_desc); absl::Status BindArguments() override; int3 GetGridSize() const override; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc index 7e7847282a6..ecd2fcbc6e1 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc @@ -54,6 +54,9 @@ ConvolutionTransposed::ConvolutionTransposed( } block_size_.z = 1; } + + code_ = GenerateConvolutionTransposedCode(definition_, device, + weights_are_buffer_, block_size_); } ConvolutionTransposed::ConvolutionTransposed(ConvolutionTransposed&& operation) @@ -331,24 +334,6 @@ std::string ConvolutionTransposed::GenerateConvolutionTransposedCode( return c; } -absl::Status ConvolutionTransposed::Compile( - const CreationContext& creation_context) { - std::string code = GenerateConvolutionTransposedCode( - definition_, *creation_context.device, weights_are_buffer_, block_size_); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - - std::vector options; - // options.push_back(CompilerOptions::POWERVR_FP16); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", options, *creation_context.context, - *creation_context.device, &kernel_); -} - absl::Status ConvolutionTransposed::BindArguments() { RETURN_IF_ERROR(args_.SetInt("stride_x", stride_.x)); RETURN_IF_ERROR(args_.SetInt("stride_y", stride_.y)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h index 929444b3915..2263e7d2e4f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h @@ -39,7 +39,6 @@ class ConvolutionTransposed : public GPUOperation { public: ConvolutionTransposed() = default; absl::Status Tune(const TuningParameters& params) override; - absl::Status Compile(const CreationContext& creation_context) override; absl::Status BindArguments() override; int3 GetGridSize() const override; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc index cbd4d436dcd..5385c09eb0f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc @@ -38,7 +38,19 @@ ConvolutionTransposed3D::ConvolutionTransposed3D( stride_(attr.stride.w, attr.stride.h, attr.stride.d), padding_(attr.padding.prepended.w, attr.padding.prepended.h, attr.padding.prepended.d), - block_size_(2, 2, 1, 2) {} + block_size_(2, 2, 1, 2) { + code_ = GenerateConvolutionTransposed3DCode(definition_, device, + weights_are_buffer_, block_size_); + if (device.IsPowerVR() && block_size_.y != 1) { + bool is_texture3d = definition_.src_tensors[0].storage_type == + TensorStorageType::TEXTURE_3D; + bool is_texture_array = definition_.src_tensors[0].storage_type == + TensorStorageType::TEXTURE_ARRAY; + if (is_texture3d || is_texture_array) { + compiler_options_.push_back(CompilerOptions::CL_OPT_DISABLE); + } + } +} ConvolutionTransposed3D::ConvolutionTransposed3D( ConvolutionTransposed3D&& operation) @@ -356,32 +368,6 @@ std::string ConvolutionTransposed3D::GenerateConvolutionTransposed3DCode( return c; } -absl::Status ConvolutionTransposed3D::Compile( - const CreationContext& creation_context) { - std::string code = GenerateConvolutionTransposed3DCode( - definition_, *creation_context.device, weights_are_buffer_, block_size_); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - - std::vector options; - if (creation_context.device->IsPowerVR() && block_size_.y != 1) { - bool is_texture3d = definition_.src_tensors[0].storage_type == - TensorStorageType::TEXTURE_3D; - bool is_texture_array = definition_.src_tensors[0].storage_type == - TensorStorageType::TEXTURE_ARRAY; - if (is_texture3d || is_texture_array) { - options.push_back(CompilerOptions::CL_OPT_DISABLE); - } - } - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", options, *creation_context.context, - *creation_context.device, &kernel_); -} - absl::Status ConvolutionTransposed3D::BindArguments() { RETURN_IF_ERROR(args_.SetInt("stride_x", stride_.x)); RETURN_IF_ERROR(args_.SetInt("stride_y", stride_.y)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h index 9a9f0d55199..b8b4aa75df2 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h @@ -39,7 +39,6 @@ class ConvolutionTransposed3D : public GPUOperation { public: ConvolutionTransposed3D() = default; absl::Status Tune(const TuningParameters& params) override; - absl::Status Compile(const CreationContext& creation_context) override; absl::Status BindArguments() override; int3 GetGridSize() const override; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc index d60282d8662..3e3a5a1f7f4 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc @@ -42,6 +42,12 @@ ConvolutionTransposed3x3::ConvolutionTransposed3x3( } else { weights_upload_type_ = WeightsUploadType::GLOBAL_MEM; } + code_ = GenerateConvolutionTransposedCode(definition_, weights_upload_type_, + padding_, work_group_launch_order_); + if (definition_.precision == CalculationsPrecision::F16 && + device.IsPowerVR()) { + compiler_options_.push_back(CompilerOptions::POWERVR_FP16); + } } ConvolutionTransposed3x3::ConvolutionTransposed3x3( @@ -299,28 +305,6 @@ std::string ConvolutionTransposed3x3::GenerateConvolutionTransposedCode( return c; } -absl::Status ConvolutionTransposed3x3::Compile( - const CreationContext& creation_context) { - std::string code = GenerateConvolutionTransposedCode( - definition_, weights_upload_type_, padding_, work_group_launch_order_); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - - std::vector options; - if (definition_.precision == CalculationsPrecision::F16 && - creation_context.device->IsPowerVR()) { - options.push_back(CompilerOptions::POWERVR_FP16); - } - RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel( - code, "main_function", options, *creation_context.context, - *creation_context.device, &kernel_)); - return absl::OkStatus(); -} - absl::Status ConvolutionTransposed3x3::BindArguments() { RETURN_IF_ERROR(args_.SetInt("filter_offset", 4 * 9 * src_[0]->Slices())); const int padding_x = diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h index d68957ffb54..9addfe11984 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h @@ -40,7 +40,6 @@ class ConvolutionTransposed3x3 : public GPUOperation { absl::Status Tune(const TuningParameters& params) override { return absl::OkStatus(); } - absl::Status Compile(const CreationContext& creation_context) override; absl::Status BindArguments() override; int3 GetGridSize() const override; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc index ac25b75db6d..8f8282781df 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc @@ -27,22 +27,21 @@ namespace gpu { namespace cl { ConvolutionTransposed3x3Thin::ConvolutionTransposed3x3Thin( - const OperationDef& definition, const ConvolutionTransposedAttributes& attr) - : GPUOperation(definition), - src_channels_(attr.weights.shape.i), - dst_channels_(attr.weights.shape.o) {} + const OperationDef& definition, const ConvolutionTransposedAttributes& attr, + const DeviceInfo& device_info) + : GPUOperation(definition) { + code_ = GenerateConvolutionTransposedCode( + definition_, DivideRoundUp(attr.weights.shape.i, 4), + DivideRoundUp(attr.weights.shape.o, 4), device_info); +} ConvolutionTransposed3x3Thin::ConvolutionTransposed3x3Thin( ConvolutionTransposed3x3Thin&& operation) - : GPUOperation(std::move(operation)), - src_channels_(operation.src_channels_), - dst_channels_(operation.dst_channels_) {} + : GPUOperation(std::move(operation)) {} ConvolutionTransposed3x3Thin& ConvolutionTransposed3x3Thin::operator=( ConvolutionTransposed3x3Thin&& operation) { if (this != &operation) { - std::swap(src_channels_, operation.src_channels_); - std::swap(dst_channels_, operation.dst_channels_); GPUOperation::operator=(std::move(operation)); } return *this; @@ -50,9 +49,9 @@ ConvolutionTransposed3x3Thin& ConvolutionTransposed3x3Thin::operator=( std::string ConvolutionTransposed3x3Thin::GenerateConvolutionTransposedCode( const OperationDef& op_def, int src_depth, int dst_depth, - const CLDevice& device) { + const DeviceInfo& device_info) { auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(GetFastestZeroMode(device)); + src_desc.SetTextureAddressMode(GetFastestZeroMode(device_info)); AddSrcTensor("src_tensor", src_desc); AddDstTensor("dst_tensor", op_def.dst_tensors[0]); @@ -184,22 +183,6 @@ std::string ConvolutionTransposed3x3Thin::GenerateConvolutionTransposedCode( return c; } -absl::Status ConvolutionTransposed3x3Thin::Compile( - const CreationContext& creation_context) { - std::string code = GenerateConvolutionTransposedCode( - definition_, DivideRoundUp(src_channels_, 4), - DivideRoundUp(dst_channels_, 4), *creation_context.device); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); -} - int3 ConvolutionTransposed3x3Thin::GetGridSize() const { const int grid_x = src_[0]->Width() * dst_[0]->Batch(); const int grid_y = src_[0]->Height(); @@ -225,7 +208,8 @@ absl::Status CreateConvolutionTransposed3x3Thin( return absl::InvalidArgumentError( "ConvolutionTransposed3x3Thin doesn't support this attributes"); } - *result = ConvolutionTransposed3x3Thin(definition, attr); + *result = ConvolutionTransposed3x3Thin(definition, attr, + creation_context.device->GetInfo()); RETURN_IF_ERROR( result->UploadData(attr.weights, attr.bias, creation_context.context)); return absl::OkStatus(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h index 67feef13a7f..274d75cb167 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h @@ -37,7 +37,6 @@ namespace cl { class ConvolutionTransposed3x3Thin : public GPUOperation { public: ConvolutionTransposed3x3Thin() = default; - absl::Status Compile(const CreationContext& creation_context) override; int3 GetGridSize() const override; // Move only @@ -55,7 +54,8 @@ class ConvolutionTransposed3x3Thin : public GPUOperation { ConvolutionTransposed3x3Thin* result); explicit ConvolutionTransposed3x3Thin( const OperationDef& definition, - const ConvolutionTransposedAttributes& attr); + const ConvolutionTransposedAttributes& attr, + const DeviceInfo& device_info); template absl::Status UploadData(const tflite::gpu::Tensor& weights, const tflite::gpu::Tensor& biases, @@ -67,18 +67,15 @@ class ConvolutionTransposed3x3Thin : public GPUOperation { std::string GenerateConvolutionTransposedCode(const OperationDef& op_def, int src_depth, int dst_depth, - const CLDevice& device); - - int src_channels_; - int dst_channels_; + const DeviceInfo& device_info); }; template absl::Status ConvolutionTransposed3x3Thin::UploadData( const tflite::gpu::Tensor& weights, const tflite::gpu::Tensor& biases, CLContext* context) { - const int src_depth = DivideRoundUp(src_channels_, 4); - const int dst_depth = DivideRoundUp(dst_channels_, 4); + const int src_depth = DivideRoundUp(weights.shape.i, 4); + const int dst_depth = DivideRoundUp(weights.shape.o, 4); const int kernel_x = 3; // This operation support only 3x3 kernel const int kernel_y = 3; const int flt4_count = kernel_x * kernel_y * src_depth * dst_depth * 4; @@ -131,8 +128,8 @@ absl::Status ConvolutionTransposed3x3Thin::UploadData( template void ConvolutionTransposed3x3Thin::RearrangeWeightsData( const tflite::gpu::Tensor& weights, absl::Span dst) { - const int src_depth = DivideRoundUp(src_channels_, 4); - const int dst_depth = DivideRoundUp(dst_channels_, 4); + const int src_depth = DivideRoundUp(weights.shape.i, 4); + const int dst_depth = DivideRoundUp(weights.shape.o, 4); const int kernel_x = 3; const int kernel_y = 3; @@ -151,7 +148,7 @@ void ConvolutionTransposed3x3Thin::RearrangeWeightsData( for (int i = 0; i < 4; ++i) { const int s_ch = s * 4 + i; const int d_ch = d * 4 + j; - if (s_ch < src_channels_ && d_ch < dst_channels_) { + if (s_ch < weights.shape.i && d_ch < weights.shape.o) { const int f_index = weights.shape.LinearIndex( {d_ch, kernel_index_y, kernel_index_x, s_ch}); filters[i][j] = weights.data[f_index]; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc index 866246460b2..4ecb23c318c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc @@ -40,6 +40,12 @@ ConvolutionTransposed4x4::ConvolutionTransposed4x4( } else { weights_upload_type_ = WeightsUploadType::GLOBAL_MEM; } + + code_ = GenerateConvolutionTransposedCode(definition_, weights_upload_type_); + if (definition_.precision == CalculationsPrecision::F16 && + device.IsPowerVR()) { + compiler_options_.push_back(CompilerOptions::POWERVR_FP16); + } } ConvolutionTransposed4x4::ConvolutionTransposed4x4( @@ -57,8 +63,7 @@ ConvolutionTransposed4x4& ConvolutionTransposed4x4::operator=( } std::string ConvolutionTransposed4x4::GenerateConvolutionTransposedCode( - const OperationDef& op_def, - ConvolutionTransposed4x4::WeightsUploadType weights_upload_type) { + const OperationDef& op_def, WeightsUploadType weights_upload_type) { auto src_desc = op_def.src_tensors[0]; src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); if (op_def.IsBatchSupported()) { @@ -290,28 +295,6 @@ std::string ConvolutionTransposed4x4::GenerateConvolutionTransposedCode( return c; } -absl::Status ConvolutionTransposed4x4::Compile( - const CreationContext& creation_context) { - std::string code = - GenerateConvolutionTransposedCode(definition_, weights_upload_type_); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - - std::vector options; - if (definition_.precision == CalculationsPrecision::F16 && - creation_context.device->IsPowerVR()) { - options.push_back(CompilerOptions::POWERVR_FP16); - } - RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel( - code, "main_function", options, *creation_context.context, - *creation_context.device, &kernel_)); - return absl::OkStatus(); -} - absl::Status ConvolutionTransposed4x4::BindArguments() { return args_.SetInt("filter_offset", 4 * 16 * src_[0]->Slices()); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h index 73f2da51eb3..21ec8c3e293 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h @@ -40,7 +40,6 @@ class ConvolutionTransposed4x4 : public GPUOperation { absl::Status Tune(const TuningParameters& params) override { return absl::OkStatus(); } - absl::Status Compile(const CreationContext& creation_context) override; absl::Status BindArguments() override; int3 GetGridSize() const override; @@ -73,8 +72,7 @@ class ConvolutionTransposed4x4 : public GPUOperation { absl::Span dst); std::string GenerateConvolutionTransposedCode( - const OperationDef& op_def, - ConvolutionTransposed4x4::WeightsUploadType weights_upload_type); + const OperationDef& op_def, WeightsUploadType weights_upload_type); WeightsUploadType weights_upload_type_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc index 12b90874706..2268313a867 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc @@ -28,25 +28,25 @@ namespace gpu { namespace cl { ConvolutionTransposedThin::ConvolutionTransposedThin( - const OperationDef& definition, const ConvolutionTransposedAttributes& attr) - : GPUOperation(definition), - kernel_size_(attr.weights.shape.w, attr.weights.shape.h), - src_channels_(attr.weights.shape.i), - dst_channels_(attr.weights.shape.o) {} + const OperationDef& definition, const ConvolutionTransposedAttributes& attr, + const DeviceInfo& device_info) + : GPUOperation(definition) { + code_ = GenerateConvolutionTransposedCode( + definition_, DivideRoundUp(attr.weights.shape.i, 4), attr.weights.shape.o, + int2(attr.weights.shape.w, attr.weights.shape.h)); + if (definition_.precision == CalculationsPrecision::F16 && + device_info.IsAdreno3xx()) { + compiler_options_.push_back(CompilerOptions::ADRENO_FULL_SIMD_LINE); + } +} ConvolutionTransposedThin::ConvolutionTransposedThin( ConvolutionTransposedThin&& operation) - : GPUOperation(std::move(operation)), - kernel_size_(operation.kernel_size_), - src_channels_(operation.src_channels_), - dst_channels_(operation.dst_channels_) {} + : GPUOperation(std::move(operation)) {} ConvolutionTransposedThin& ConvolutionTransposedThin::operator=( ConvolutionTransposedThin&& operation) { if (this != &operation) { - std::swap(kernel_size_, operation.kernel_size_); - std::swap(src_channels_, operation.src_channels_); - std::swap(dst_channels_, operation.dst_channels_); GPUOperation::operator=(std::move(operation)); } return *this; @@ -151,29 +151,6 @@ std::string ConvolutionTransposedThin::GenerateConvolutionTransposedCode( return c; } -absl::Status ConvolutionTransposedThin::Compile( - const CreationContext& creation_context) { - std::string code = GenerateConvolutionTransposedCode( - definition_, DivideRoundUp(src_channels_, 4), dst_channels_, - kernel_size_); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - - std::vector options; - if (definition_.precision == CalculationsPrecision::F16 && - creation_context.device->IsAdreno3xx()) { - options.push_back(CompilerOptions::ADRENO_FULL_SIMD_LINE); - } - - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); -} - int3 ConvolutionTransposedThin::GetGridSize() const { const int grid_x = src_[0]->Width() * dst_[0]->Batch(); const int grid_y = src_[0]->Height(); @@ -197,7 +174,8 @@ absl::Status CreateConvolutionTransposedThin( return absl::InvalidArgumentError( "ConvolutionTransposedThin doesn't support this attributes"); } - *result = ConvolutionTransposedThin(definition, attr); + *result = ConvolutionTransposedThin(definition, attr, + creation_context.device->GetInfo()); RETURN_IF_ERROR( result->UploadData(attr.weights, attr.bias, creation_context.context)); return absl::OkStatus(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h index 306114959e0..817887ab7af 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h @@ -37,7 +37,6 @@ namespace cl { class ConvolutionTransposedThin : public GPUOperation { public: ConvolutionTransposedThin() = default; - absl::Status Compile(const CreationContext& creation_context) override; int3 GetGridSize() const override; // Move only @@ -53,7 +52,8 @@ class ConvolutionTransposedThin : public GPUOperation { const ConvolutionTransposedAttributes& attr, ConvolutionTransposedThin* result); ConvolutionTransposedThin(const OperationDef& definition, - const ConvolutionTransposedAttributes& attr); + const ConvolutionTransposedAttributes& attr, + const DeviceInfo& device_info); template absl::Status UploadData(const tflite::gpu::Tensor& weights, const tflite::gpu::Tensor& biases, @@ -65,19 +65,15 @@ class ConvolutionTransposedThin : public GPUOperation { std::string GenerateConvolutionTransposedCode(const OperationDef& op_def, int src_depth, int dst_channels, const int2& kernel_size); - - int2 kernel_size_; - int src_channels_; - int dst_channels_; }; template absl::Status ConvolutionTransposedThin::UploadData( const tflite::gpu::Tensor& weights, const tflite::gpu::Tensor& biases, CLContext* context) { - const int src_depth = DivideRoundUp(src_channels_, 4); + const int src_depth = DivideRoundUp(weights.shape.i, 4); const int flt4_count = - kernel_size_.x * kernel_size_.y * src_depth * dst_channels_; + weights.shape.w * weights.shape.h * src_depth * weights.shape.o; const bool f32_weights = definition_.precision == CalculationsPrecision::F32; @@ -121,20 +117,20 @@ absl::Status ConvolutionTransposedThin::UploadData( template void ConvolutionTransposedThin::RearrangeWeightsData( const tflite::gpu::Tensor& weights, absl::Span dst) { - const int src_depth = DivideRoundUp(src_channels_, 4); - const int kernel_x = kernel_size_.x; - const int kernel_y = kernel_size_.y; + const int src_depth = DivideRoundUp(weights.shape.i, 4); + const int kernel_x = weights.shape.w; + const int kernel_y = weights.shape.h; int counter = 0; for (int s = 0; s < src_depth; ++s) { for (int y = 0; y < kernel_y; ++y) { for (int x = 0; x < kernel_x; ++x) { - std::vector filters(dst_channels_); - for (int j = 0; j < dst_channels_; ++j) { + std::vector filters(weights.shape.o); + for (int j = 0; j < weights.shape.o; ++j) { for (int i = 0; i < 4; ++i) { const int s_ch = s * 4 + i; const int d_ch = j; - if (s_ch < src_channels_ && d_ch < dst_channels_) { + if (s_ch < weights.shape.i && d_ch < weights.shape.o) { const int f_index = weights.shape.LinearIndex({d_ch, y, x, s_ch}); filters[j][i] = weights.data[f_index]; } else { @@ -142,7 +138,7 @@ void ConvolutionTransposedThin::RearrangeWeightsData( } } } - for (int j = 0; j < dst_channels_; ++j) { + for (int j = 0; j < weights.shape.o; ++j) { dst[counter++] = filters[j]; } } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc index 1854ca9001a..3ab05134bd6 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc @@ -70,7 +70,8 @@ std::string GetSrcValue(int channel_multiplier, const std::string coords) { DepthwiseConvolution::DepthwiseConvolution( const OperationDef& definition, - const DepthwiseConvolution2DAttributes& attr, bool weights_are_buffer) + const DepthwiseConvolution2DAttributes& attr, bool weights_are_buffer, + const DeviceInfo& device_info) : GPUOperation(definition), weights_are_buffer_(weights_are_buffer), kernel_size_(attr.weights.shape.w, attr.weights.shape.h, 0, 0), @@ -79,11 +80,17 @@ DepthwiseConvolution::DepthwiseConvolution( dilation_(attr.dilations.w, attr.dilations.h, 0, 0), channel_multiplier_(attr.weights.shape.o) { work_group_size_ = int3(8, 8, 1); + const bool stride_correction = + definition_.IsBatchSupported() && stride_.x != 1; + code_ = GenerateDepthwiseConvolutionCode(definition_, stride_correction, + channel_multiplier_, + weights_are_buffer_, device_info); } DepthwiseConvolution::DepthwiseConvolution( const OperationDef& definition, - const DepthwiseConvolution3DAttributes& attr, bool weights_are_buffer) + const DepthwiseConvolution3DAttributes& attr, bool weights_are_buffer, + const DeviceInfo& device_info) : GPUOperation(definition), weights_are_buffer_(weights_are_buffer), kernel_size_(attr.weights.shape.w, attr.weights.shape.h, @@ -94,6 +101,11 @@ DepthwiseConvolution::DepthwiseConvolution( dilation_(attr.dilations.w, attr.dilations.h, attr.dilations.d, 0), channel_multiplier_(attr.weights.shape.o) { work_group_size_ = int3(8, 8, 1); + const bool stride_correction = + definition_.IsBatchSupported() && stride_.x != 1; + code_ = GenerateDepthwiseConvolutionCode(definition_, stride_correction, + channel_multiplier_, + weights_are_buffer_, device_info); } DepthwiseConvolution::DepthwiseConvolution(DepthwiseConvolution&& operation) @@ -121,9 +133,9 @@ DepthwiseConvolution& DepthwiseConvolution::operator=( std::string DepthwiseConvolution::GenerateDepthwiseConvolutionCode( const OperationDef& op_def, bool stride_correction, int channel_multiplier, - bool weights_are_buffer, const CLDevice& device) { + bool weights_are_buffer, const DeviceInfo& device_info) { auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(GetFastestZeroMode(device)); + src_desc.SetTextureAddressMode(GetFastestZeroMode(device_info)); if (op_def.IsBatchSupported()) { src_desc.SetStateVar("BatchedWidth", "true"); } @@ -270,24 +282,6 @@ std::string DepthwiseConvolution::GenerateDepthwiseConvolutionCode( return c; } -absl::Status DepthwiseConvolution::Compile( - const CreationContext& creation_context) { - const bool stride_correction = - definition_.IsBatchSupported() && stride_.x != 1; - std::string code = GenerateDepthwiseConvolutionCode( - definition_, stride_correction, channel_multiplier_, weights_are_buffer_, - *creation_context.device); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); -} - absl::Status DepthwiseConvolution::BindArguments() { RETURN_IF_ERROR(args_.SetInt("kernel_size_x", kernel_size_.x)); RETURN_IF_ERROR(args_.SetInt("stride_x", stride_.x)); @@ -321,7 +315,8 @@ absl::Status CreateDepthwiseConvolution( const DepthwiseConvolution2DAttributes& attr, DepthwiseConvolution* result) { bool weights_are_buffer = creation_context.device->IsMali(); - *result = DepthwiseConvolution(definition, attr, weights_are_buffer); + *result = DepthwiseConvolution(definition, attr, weights_are_buffer, + creation_context.device->GetInfo()); RETURN_IF_ERROR( result->UploadWeights(attr.weights, creation_context.context)); @@ -344,7 +339,8 @@ absl::Status CreateDepthwiseConvolution( const DepthwiseConvolution3DAttributes& attr, DepthwiseConvolution* result) { bool weights_are_buffer = creation_context.device->IsMali(); - *result = DepthwiseConvolution(definition, attr, weights_are_buffer); + *result = DepthwiseConvolution(definition, attr, weights_are_buffer, + creation_context.device->GetInfo()); RETURN_IF_ERROR( result->UploadWeights(attr.weights, creation_context.context)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h index 73782bbfaa1..be87c182880 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h @@ -38,7 +38,6 @@ namespace cl { class DepthwiseConvolution : public GPUOperation { public: DepthwiseConvolution() = default; - absl::Status Compile(const CreationContext& creation_context) override; absl::Status BindArguments() override; int3 GetGridSize() const override; @@ -59,10 +58,10 @@ class DepthwiseConvolution : public GPUOperation { DepthwiseConvolution* result); DepthwiseConvolution(const OperationDef& definition, const DepthwiseConvolution2DAttributes& attr, - bool weights_are_buffer); + bool weights_are_buffer, const DeviceInfo& device_info); DepthwiseConvolution(const OperationDef& definition, const DepthwiseConvolution3DAttributes& attr, - bool weights_are_buffer); + bool weights_are_buffer, const DeviceInfo& device_info); template absl::Status UploadWeights(const tflite::gpu::Tensor& weights, @@ -84,7 +83,7 @@ class DepthwiseConvolution : public GPUOperation { bool stride_correction, int channel_multiplier, bool weights_are_buffer, - const CLDevice& device); + const DeviceInfo& device_info); bool weights_are_buffer_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc index db5c920bcd4..0bd84c3b116 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc @@ -29,11 +29,19 @@ namespace cl { DepthwiseConv3x3::DepthwiseConv3x3(const OperationDef& definition, bool weights_are_buffer, - bool local_mem_uploads) + bool local_mem_uploads, + const DeviceInfo& device_info) : GPUOperation(definition), weights_are_buffer_(weights_are_buffer), local_mem_uploads_(local_mem_uploads) { work_group_size_ = int3(8, 4, 1); + code_ = GenerateDepthwiseConvCode(definition_, device_info, + weights_are_buffer_, local_mem_uploads_); + + if (definition_.precision == CalculationsPrecision::F16 && + device_info.IsPowerVR()) { + compiler_options_.push_back(CompilerOptions::POWERVR_FP16); + } } DepthwiseConv3x3::DepthwiseConv3x3(DepthwiseConv3x3&& operation) @@ -51,10 +59,10 @@ DepthwiseConv3x3& DepthwiseConv3x3::operator=(DepthwiseConv3x3&& operation) { } std::string DepthwiseConv3x3::GenerateDepthwiseConvCode( - const OperationDef& op_def, const CLDevice& device, bool weights_are_buffer, - bool local_mem_uploads) { + const OperationDef& op_def, const DeviceInfo& device_info, + bool weights_are_buffer, bool local_mem_uploads) { auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(GetFastestZeroMode(device)); + src_desc.SetTextureAddressMode(GetFastestZeroMode(device_info)); AddSrcTensor("src_tensor", src_desc); AddDstTensor("dst_tensor", op_def.dst_tensors[0]); @@ -281,28 +289,6 @@ std::string DepthwiseConv3x3::GenerateDepthwiseConvCode( return c; } -absl::Status DepthwiseConv3x3::Compile( - const CreationContext& creation_context) { - std::string code = - GenerateDepthwiseConvCode(definition_, *creation_context.device, - weights_are_buffer_, local_mem_uploads_); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - - std::vector options; - if (definition_.precision == CalculationsPrecision::F16 && - creation_context.device->IsPowerVR()) { - options.push_back(CompilerOptions::POWERVR_FP16); - } - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", options, *creation_context.context, - *creation_context.device, &kernel_); -} - absl::Status DepthwiseConv3x3::BindArguments() { RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); return args_.SetObjectRef("dst_tensor", dst_[0]); @@ -343,7 +329,8 @@ absl::Status CreateDepthwiseConv3x3( creation_context.device->IsPowerVR() || creation_context.device->IsMali(); bool local_mem_uploads = weights_are_buffer && creation_context.device->IsPowerVR(); - *result = DepthwiseConv3x3(definition, weights_are_buffer, local_mem_uploads); + *result = DepthwiseConv3x3(definition, weights_are_buffer, local_mem_uploads, + creation_context.device->GetInfo()); return result->UploadWeightsAndBiases(attr.weights, attr.bias, creation_context.context); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h index b2a2a1b9463..d02d65b4e38 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h @@ -39,7 +39,6 @@ class DepthwiseConv3x3 : public GPUOperation { public: DepthwiseConv3x3() = default; absl::Status Tune(const TuningParameters& params) override; - absl::Status Compile(const CreationContext& creation_context) override; absl::Status BindArguments() override; int3 GetGridSize() const override; @@ -51,7 +50,8 @@ class DepthwiseConv3x3 : public GPUOperation { private: explicit DepthwiseConv3x3(const OperationDef& definition, - bool weights_are_buffer, bool local_mem_uploads); + bool weights_are_buffer, bool local_mem_uploads, + const DeviceInfo& device_info); template absl::Status UploadWeightsAndBiases( const tflite::gpu::Tensor& weights, @@ -67,7 +67,7 @@ class DepthwiseConv3x3 : public GPUOperation { const tflite::gpu::Tensor& biases, absl::Span dst); std::string GenerateDepthwiseConvCode(const OperationDef& op_def, - const CLDevice& device, + const DeviceInfo& device_info, bool weights_are_buffer, bool local_mem_uploads); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc index eb4dcec0de4..2ab0284febe 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc @@ -24,8 +24,22 @@ namespace tflite { namespace gpu { namespace cl { -FullyConnected::FullyConnected(const OperationDef& definition) - : GPUOperation(definition) {} +FullyConnected::FullyConnected(const OperationDef& definition, + const DeviceInfo& device_info) + : GPUOperation(definition) { + if (device_info.IsAdreno()) { + if (device_info.IsAdreno3xx()) { + work_group_size_ = int3(8, 4, 1); + } else if (device_info.IsAdreno4xx()) { + work_group_size_ = int3(16, 4, 1); + } else { + work_group_size_ = int3(32, 4, 1); + } + } else { + work_group_size_ = int3(16, 4, 1); + } + code_ = GetFullyConnectedKernelCode(definition_, work_group_size_); +} FullyConnected::FullyConnected(FullyConnected&& kernel) : GPUOperation(std::move(kernel)) {} @@ -92,36 +106,6 @@ std::string FullyConnected::GetFullyConnectedKernelCode( return c; } -absl::Status FullyConnected::Compile(const CreationContext& creation_context) { - int wg_width = 32; - int wg_height = 4; - int work_items; - do { - work_group_size_ = {wg_width, wg_height, 1}; - wg_width /= 2; - std::string code = - GetFullyConnectedKernelCode(definition_, work_group_size_); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - auto status = creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); - if (!status.ok()) { - if (work_group_size_.x == 1) { - return status; - } else { - continue; - } - } - work_items = work_group_size_.x * work_group_size_.y * work_group_size_.z; - } while (work_items > kernel_.GetMaxWorkGroupSize()); - return absl::OkStatus(); -} - int3 FullyConnected::GetGridSize() const { return int3(dst_[0]->Slices(), 1, 1); } @@ -130,7 +114,7 @@ absl::Status CreateFullyConnected(const CreationContext& creation_context, const OperationDef& definition, const FullyConnectedAttributes& attr, FullyConnected* result) { - *result = FullyConnected(definition); + *result = FullyConnected(definition, creation_context.device->GetInfo()); RETURN_IF_ERROR( result->UploadWeights(attr.weights, creation_context.context)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h index 344e4be82c3..ced3913ead7 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h @@ -93,7 +93,6 @@ class FullyConnected : public GPUOperation { return absl::OkStatus(); } int3 GetGridSize() const override; - absl::Status Compile(const CreationContext& creation_context) override; // Move only FullyConnected(FullyConnected&& kernel); @@ -102,7 +101,7 @@ class FullyConnected : public GPUOperation { FullyConnected& operator=(const FullyConnected&) = delete; private: - explicit FullyConnected(const OperationDef& definition); + FullyConnected(const OperationDef& definition, const DeviceInfo& device_info); friend absl::Status CreateFullyConnected( const CreationContext& creation_context, const OperationDef& definition, const FullyConnectedAttributes& attr, FullyConnected* result); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc index e0a47381c8c..beb62632099 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc @@ -190,9 +190,9 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) { creation_context.device->GetInfo(), {{dst_tensors_names_[0], element_wise_code}}, &code_)); RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel( - code_, "main_function", *creation_context.context, + code_, "main_function", compiler_options_, *creation_context.context, *creation_context.device, &kernel_)); - return PostCompileCheck(); + return PostCompileCheck(creation_context.device->GetInfo()); } ElementwiseOperation::ElementwiseOperation(ElementwiseOperation&& operation) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h index eb3ca8f7e3e..01e11f3ea64 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h @@ -103,7 +103,9 @@ class GPUOperation { virtual absl::Status Compile(const CreationContext& creation_context); - virtual absl::Status PostCompileCheck() { return absl::OkStatus(); } + virtual absl::Status PostCompileCheck(const DeviceInfo& device_info) { + return absl::OkStatus(); + } const OperationDef& GetDefinition() const { return definition_; } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc index f894a5cc45e..0fc5e498de4 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc @@ -25,7 +25,10 @@ namespace tflite { namespace gpu { namespace cl { -LSTM::LSTM(const OperationDef& definition) : GPUOperation(definition) {} +LSTM::LSTM(const OperationDef& definition, const DeviceInfo& device_info) + : GPUOperation(definition) { + code_ = GetLSTMCode(definition_, device_info); +} LSTM::LSTM(LSTM&& kernel) : GPUOperation(std::move(kernel)) {} @@ -37,7 +40,7 @@ LSTM& LSTM::operator=(LSTM&& kernel) { } std::string LSTM::GetLSTMCode(const OperationDef& op_def, - const CLDevice& device) { + const DeviceInfo& device_info) { AddSrcTensor("intermediate", op_def.src_tensors[0]); AddSrcTensor("prev_state", op_def.src_tensors[1]); AddDstTensor("new_state", op_def.dst_tensors[0]); @@ -56,7 +59,8 @@ std::string LSTM::GetLSTMCode(const OperationDef& op_def, c += " FLT4 r1 = args.intermediate.Read(0, 0, Z + state_stride, B);\n"; c += " FLT4 r2 = args.intermediate.Read(0, 0, Z + state_stride * 2, B);\n"; c += " FLT4 r3 = args.intermediate.Read(0, 0, Z + state_stride * 3, B);\n"; - if (op_def.precision != CalculationsPrecision::F32 && device.IsAdreno()) { + if (op_def.precision != CalculationsPrecision::F32 && + device_info.IsAdreno()) { c += " FLT4 input_gate;\n"; c += " FLT4 new_input;\n"; c += " FLT4 forget_gate;\n"; @@ -101,15 +105,6 @@ std::string LSTM::GetLSTMCode(const OperationDef& op_def, return c; } -absl::Status LSTM::Compile(const CreationContext& creation_context) { - std::string code = GetLSTMCode(definition_, *creation_context.device); - RETURN_IF_ERROR( - args_.TransformToCLCode(creation_context.device->GetInfo(), {}, &code)); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); -} - int3 LSTM::GetGridSize() const { const int grid_x = dst_[0]->Batch(); const int grid_y = dst_[0]->Slices(); @@ -117,7 +112,9 @@ int3 LSTM::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -LSTM CreateLSTM(const OperationDef& definition) { return LSTM(definition); } +LSTM CreateLSTM(const OperationDef& definition, const DeviceInfo& device_info) { + return LSTM(definition, device_info); +} } // namespace cl } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h index 31b3c0f876b..91bfd22a0e6 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h @@ -27,9 +27,8 @@ namespace cl { class LSTM : public GPUOperation { public: - explicit LSTM(const OperationDef& definition); + LSTM(const OperationDef& definition, const DeviceInfo& device_info); int3 GetGridSize() const override; - absl::Status Compile(const CreationContext& creation_context) override; // Move only LSTM(LSTM&& kernel); @@ -38,10 +37,11 @@ class LSTM : public GPUOperation { LSTM& operator=(const LSTM&) = delete; private: - std::string GetLSTMCode(const OperationDef& op_def, const CLDevice& device); + std::string GetLSTMCode(const OperationDef& op_def, + const DeviceInfo& device_info); }; -LSTM CreateLSTM(const OperationDef& definition); +LSTM CreateLSTM(const OperationDef& definition, const DeviceInfo& device_info); } // namespace cl } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/lstm_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/lstm_test.cc index 6e1b858711a..d7ea3ee6474 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/lstm_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/lstm_test.cc @@ -67,7 +67,7 @@ TEST_F(OpenCLOperationTest, LSTM) { op_def.dst_tensors.push_back({data_type, storage, Layout::BHWC}); TensorFloat32 new_state; TensorFloat32 new_activ; - LSTM operation = CreateLSTM(op_def); + LSTM operation = CreateLSTM(op_def, env_.GetDevicePtr()->GetInfo()); ASSERT_OK(ExecuteGPUOperation( {src_tensor, prev_state}, creation_context_, &operation, {BHWC(1, 1, 1, 4), BHWC(1, 1, 1, 4)}, {&new_state, &new_activ})); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc index e0e49e82a09..7be6cc0b9b4 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc @@ -25,19 +25,25 @@ namespace gpu { namespace cl { MaxUnpooling::MaxUnpooling(const OperationDef& definition, - const MaxUnpooling2DAttributes& attr) + const MaxUnpooling2DAttributes& attr, + const DeviceInfo& device_info) : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, 0, 0), padding_(attr.padding.appended.w, attr.padding.appended.h, 0, 0), - kernel_size_(attr.kernel.w, attr.kernel.h, 0, 0) {} + kernel_size_(attr.kernel.w, attr.kernel.h, 0, 0) { + code_ = GetMaxUnpoolingKernelCode(definition_, device_info); +} MaxUnpooling::MaxUnpooling(const OperationDef& definition, - const MaxUnpooling3DAttributes& attr) + const MaxUnpooling3DAttributes& attr, + const DeviceInfo& device_info) : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, attr.strides.d, 0), padding_(attr.padding.appended.w, attr.padding.appended.h, attr.padding.appended.d, 0), - kernel_size_(attr.kernel.w, attr.kernel.h, attr.kernel.d, 0) {} + kernel_size_(attr.kernel.w, attr.kernel.h, attr.kernel.d, 0) { + code_ = GetMaxUnpoolingKernelCode(definition_, device_info); +} MaxUnpooling::MaxUnpooling(MaxUnpooling&& kernel) : GPUOperation(std::move(kernel)), @@ -55,16 +61,16 @@ MaxUnpooling& MaxUnpooling::operator=(MaxUnpooling&& kernel) { return *this; } -std::string MaxUnpooling::GetMaxUnpoolingKernelCode(const OperationDef& op_def, - const CLDevice& device) { +std::string MaxUnpooling::GetMaxUnpoolingKernelCode( + const OperationDef& op_def, const DeviceInfo& device_info) { auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(GetFastestZeroMode(device)); + src_desc.SetTextureAddressMode(GetFastestZeroMode(device_info)); if (op_def.IsBatchSupported()) { src_desc.SetStateVar("BatchedWidth", "true"); } AddSrcTensor("src_tensor", src_desc); auto src_ind_desc = op_def.src_tensors[1]; - src_ind_desc.SetTextureAddressMode(GetFastestZeroMode(device)); + src_ind_desc.SetTextureAddressMode(GetFastestZeroMode(device_info)); if (op_def.IsBatchSupported()) { src_ind_desc.SetStateVar("BatchedWidth", "true"); } @@ -169,20 +175,6 @@ std::string MaxUnpooling::GetMaxUnpoolingKernelCode(const OperationDef& op_def, return c; } -absl::Status MaxUnpooling::Compile(const CreationContext& creation_context) { - std::string code = - GetMaxUnpoolingKernelCode(definition_, *creation_context.device); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); -} - absl::Status MaxUnpooling::BindArguments() { if (definition_.dst_tensors[0].HasAxis(Axis::WIDTH)) { RETURN_IF_ERROR(args_.SetInt("stride_x", stride_.x)); @@ -210,13 +202,15 @@ int3 MaxUnpooling::GetGridSize() const { } MaxUnpooling CreateMaxUnpooling(const OperationDef& definition, - const MaxUnpooling2DAttributes& attr) { - return MaxUnpooling(definition, attr); + const MaxUnpooling2DAttributes& attr, + const DeviceInfo& device_info) { + return MaxUnpooling(definition, attr, device_info); } MaxUnpooling CreateMaxUnpooling(const OperationDef& definition, - const MaxUnpooling3DAttributes& attr) { - return MaxUnpooling(definition, attr); + const MaxUnpooling3DAttributes& attr, + const DeviceInfo& device_info) { + return MaxUnpooling(definition, attr, device_info); } } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h index d406dc2aee1..da4b0e28cec 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h @@ -28,13 +28,14 @@ namespace cl { class MaxUnpooling : public GPUOperation { public: MaxUnpooling(const OperationDef& definition, - const MaxUnpooling2DAttributes& attr); + const MaxUnpooling2DAttributes& attr, + const DeviceInfo& device_info); MaxUnpooling(const OperationDef& definition, - const MaxUnpooling3DAttributes& attr); + const MaxUnpooling3DAttributes& attr, + const DeviceInfo& device_info); absl::Status BindArguments() override; int3 GetGridSize() const override; - absl::Status Compile(const CreationContext& creation_context) override; // Move only MaxUnpooling(MaxUnpooling&& kernel); @@ -44,7 +45,7 @@ class MaxUnpooling : public GPUOperation { private: std::string GetMaxUnpoolingKernelCode(const OperationDef& op_def, - const CLDevice& device); + const DeviceInfo& device_info); int4 stride_; int4 padding_; @@ -52,10 +53,12 @@ class MaxUnpooling : public GPUOperation { }; MaxUnpooling CreateMaxUnpooling(const OperationDef& definition, - const MaxUnpooling2DAttributes& attr); + const MaxUnpooling2DAttributes& attr, + const DeviceInfo& device_info); MaxUnpooling CreateMaxUnpooling(const OperationDef& definition, - const MaxUnpooling3DAttributes& attr); + const MaxUnpooling3DAttributes& attr, + const DeviceInfo& device_info); } // namespace cl } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling_test.cc index c03cb4f89d7..77e92c8950b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling_test.cc @@ -55,7 +55,8 @@ TEST_F(OpenCLOperationTest, MaxUnpooling) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - MaxUnpooling operation = CreateMaxUnpooling(op_def, attr); + MaxUnpooling operation = + CreateMaxUnpooling(op_def, attr, env_.GetDevicePtr()->GetInfo()); ASSERT_OK(ExecuteGPUOperation({src_tensor, src_ind_tensor}, creation_context_, &operation, BHWC(1, 4, 4, 1), &dst_tensor)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc b/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc index 9378dafd049..e1628a7e9a7 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc @@ -26,6 +26,18 @@ namespace tflite { namespace gpu { namespace cl { +Mean::Mean(const OperationDef& definition, const DeviceInfo& device_info) + : GPUOperation(definition) { + // for workgroup size: + // must be: (x * y) % 4 = 0; + // must be: z = 1; + work_group_size_ = int3(16, 16, 1); + if (device_info.IsAdreno3xx()) { + work_group_size_ = int3(16, 8, 1); + } + code_ = GetMeanKernelCode(definition_, work_group_size_); +} + Mean::Mean(Mean&& operation) : GPUOperation(std::move(operation)) {} Mean& Mean::operator=(Mean&& operation) { @@ -96,25 +108,6 @@ std::string Mean::GetMeanKernelCode(const OperationDef& op_def, return c; } -absl::Status Mean::Compile(const CreationContext& creation_context) { - // must be: (x * y) % 4 = 0; - // must be: z = 1; - work_group_size_ = int3(16, 16, 1); - if (creation_context.device->IsAdreno3xx()) { - work_group_size_ = int3(16, 8, 1); - } - std::string code = GetMeanKernelCode(definition_, work_group_size_); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); -} - absl::Status Mean::BindArguments() { const double total_size = src_[0]->Width() * src_[0]->Height(); const double size_0 = work_group_size_.x * work_group_size_.y; @@ -131,7 +124,9 @@ int3 Mean::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -Mean CreateMean(const OperationDef& definition) { return Mean(definition); } +Mean CreateMean(const OperationDef& definition, const DeviceInfo& device_info) { + return Mean(definition, device_info); +} } // namespace cl } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean.h b/tensorflow/lite/delegates/gpu/cl/kernels/mean.h index 938b82d3a6f..cfdd7be53d3 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean.h @@ -29,14 +29,13 @@ namespace cl { class Mean : public GPUOperation { public: Mean() = default; - explicit Mean(const OperationDef& definition) : GPUOperation(definition) {} + Mean(const OperationDef& definition, const DeviceInfo& device_info); absl::Status Tune(const TuningParameters& params) override { return absl::OkStatus(); } absl::Status BindArguments() override; int3 GetGridSize() const override; - absl::Status Compile(const CreationContext& creation_context) override; // Move only Mean(Mean&& operation); @@ -49,7 +48,7 @@ class Mean : public GPUOperation { const int3& work_group_size); }; -Mean CreateMean(const OperationDef& definition); +Mean CreateMean(const OperationDef& definition, const DeviceInfo& device_info); } // namespace cl } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc index 2b4fc457c71..3e8e4d952de 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc @@ -26,7 +26,9 @@ namespace gpu { namespace cl { MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition) - : GPUOperation(definition) {} + : GPUOperation(definition) { + code_ = GetNormalizationCode(definition_); +} std::string MeanStdDevNormalization::GetNormalizationCode( const OperationDef& op_def) { @@ -70,16 +72,6 @@ std::string MeanStdDevNormalization::GetNormalizationCode( return c; } -absl::Status MeanStdDevNormalization::Compile( - const CreationContext& creation_context) { - std::string code = GetNormalizationCode(definition_); - RETURN_IF_ERROR( - args_.TransformToCLCode(creation_context.device->GetInfo(), {}, &code)); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); -} - int3 MeanStdDevNormalization::GetGridSize() const { const int grid_x = dst_[0]->Batch(); const int grid_y = 1; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h index 6d2b00c07ff..7c8627df88c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h @@ -31,7 +31,6 @@ class MeanStdDevNormalization : public GPUOperation { explicit MeanStdDevNormalization(const OperationDef& definition); int3 GetGridSize() const override; - absl::Status Compile(const CreationContext& creation_context) override; // Move only MeanStdDevNormalization(MeanStdDevNormalization&& kernel) = default; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/mean_test.cc index 0379c59dd45..dbb70127317 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean_test.cc @@ -47,7 +47,7 @@ TEST_F(OpenCLOperationTest, Mean) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - Mean operation = CreateMean(op_def); + Mean operation = CreateMean(op_def, env_.GetDevicePtr()->GetInfo()); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 1, 1, 1), &dst_tensor)); EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {2.5f})); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc b/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc index 57b52deeb8b..4e2a6fb2bce 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc @@ -26,14 +26,14 @@ namespace gpu { namespace cl { Padding::Padding(const OperationDef& definition, const PadAttributes& attr) - : GPUOperation(definition), attributes_(attr) {} + : GPUOperation(definition) { + code_ = GetPaddingCode(definition_, attr); +} -Padding::Padding(Padding&& kernel) - : GPUOperation(std::move(kernel)), attributes_(kernel.attributes_) {} +Padding::Padding(Padding&& kernel) : GPUOperation(std::move(kernel)) {} Padding& Padding::operator=(Padding&& kernel) { if (this != &kernel) { - std::swap(attributes_, kernel.attributes_); GPUOperation::operator=(std::move(kernel)); } return *this; @@ -43,10 +43,10 @@ std::string Padding::GetPaddingCode(const OperationDef& op_def, const PadAttributes& attr) { AddSrcTensor("src_tensor", op_def.src_tensors[0]); AddDstTensor("dst_tensor", op_def.dst_tensors[0]); - args_.AddInt("prepended_x"); - args_.AddInt("prepended_y"); - args_.AddInt("prepended_z"); - args_.AddInt("prepended_w"); + args_.AddInt("prepended_x", attr.prepended.w); + args_.AddInt("prepended_y", attr.prepended.h); + args_.AddInt("prepended_z", attr.prepended.c); + args_.AddInt("prepended_w", attr.prepended.b); const std::string dst_batch = op_def.dst_tensors[0].HasAxis(Axis::BATCH) ? "B" : "0"; @@ -149,27 +149,6 @@ std::string Padding::GetPaddingCode(const OperationDef& op_def, return c; } -absl::Status Padding::Compile(const CreationContext& creation_context) { - std::string code = GetPaddingCode(definition_, attributes_); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); -} - -absl::Status Padding::BindArguments() { - RETURN_IF_ERROR(args_.SetInt("prepended_x", attributes_.prepended.w)); - RETURN_IF_ERROR(args_.SetInt("prepended_y", attributes_.prepended.h)); - RETURN_IF_ERROR(args_.SetInt("prepended_z", attributes_.prepended.c)); - RETURN_IF_ERROR(args_.SetInt("prepended_w", attributes_.prepended.b)); - return absl::OkStatus(); -} - int3 Padding::GetGridSize() const { const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); const int grid_y = dst_[0]->Height(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/padding.h b/tensorflow/lite/delegates/gpu/cl/kernels/padding.h index d7fc5c58fe3..44d53204e16 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/padding.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/padding.h @@ -28,10 +28,7 @@ namespace cl { class Padding : public GPUOperation { public: Padding(const OperationDef& definition, const PadAttributes& attr); - - absl::Status BindArguments() override; int3 GetGridSize() const override; - absl::Status Compile(const CreationContext& creation_context) override; // Move only Padding(Padding&& kernel); @@ -42,8 +39,6 @@ class Padding : public GPUOperation { private: std::string GetPaddingCode(const OperationDef& op_def, const PadAttributes& attr); - - PadAttributes attributes_; }; Padding CreatePadding(const OperationDef& definition, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc index d264061004a..0c5a7a64d15 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc @@ -25,23 +25,27 @@ namespace gpu { namespace cl { Pooling::Pooling(const OperationDef& definition, - const Pooling2DAttributes& attr) + const Pooling2DAttributes& attr, const DeviceInfo& device_info) : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, 0, 0), padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, 0, 0), kernel_size_(attr.kernel.w, attr.kernel.h, 0, 0), type_(attr.type), - output_indices_(attr.output_indices) {} + output_indices_(attr.output_indices) { + GenerateCode(device_info); +} Pooling::Pooling(const OperationDef& definition, - const Pooling3DAttributes& attr) + const Pooling3DAttributes& attr, const DeviceInfo& device_info) : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, attr.strides.d, 0), padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, -attr.padding.prepended.d, 0), kernel_size_(attr.kernel.w, attr.kernel.h, attr.kernel.d, 0), type_(attr.type), - output_indices_(attr.output_indices) {} + output_indices_(attr.output_indices) { + GenerateCode(device_info); +} Pooling::Pooling(Pooling&& kernel) : GPUOperation(std::move(kernel)), @@ -63,11 +67,11 @@ Pooling& Pooling::operator=(Pooling&& kernel) { return *this; } -std::string Pooling::GetAveragePoolingKernelCode(const OperationDef& op_def, - bool stride_correction, - const CLDevice& device) { +std::string Pooling::GetAveragePoolingKernelCode( + const OperationDef& op_def, bool stride_correction, + const DeviceInfo& device_info) { auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(GetFastestZeroMode(device)); + src_desc.SetTextureAddressMode(GetFastestZeroMode(device_info)); if (op_def.IsBatchSupported()) { src_desc.SetStateVar("BatchedWidth", "true"); } @@ -344,33 +348,16 @@ std::string Pooling::GetMaxPoolingKernelCode(const OperationDef& op_def, return c; } -absl::Status Pooling::Compile(const CreationContext& creation_context) { - std::string code; +void Pooling::GenerateCode(const DeviceInfo& device_info) { const bool stride_correction = definition_.IsBatchSupported() && stride_.x != 1; - switch (type_) { - case PoolingType::AVERAGE: - code = GetAveragePoolingKernelCode(definition_, stride_correction, - *creation_context.device); - break; - case PoolingType::MAX: - code = GetMaxPoolingKernelCode(definition_, stride_correction, - output_indices_); - break; - default: - return absl::InvalidArgumentError( - "You should create another kernel with this params"); - break; + if (type_ == PoolingType::AVERAGE) { + code_ = GetAveragePoolingKernelCode(definition_, stride_correction, + device_info); + } else if (type_ == PoolingType::MAX) { + code_ = GetMaxPoolingKernelCode(definition_, stride_correction, + output_indices_); } - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); } absl::Status Pooling::BindArguments() { @@ -400,13 +387,15 @@ int3 Pooling::GetGridSize() const { } Pooling CreatePooling(const OperationDef& definition, - const Pooling2DAttributes& attr) { - return Pooling(definition, attr); + const Pooling2DAttributes& attr, + const DeviceInfo& device_info) { + return Pooling(definition, attr, device_info); } Pooling CreatePooling(const OperationDef& definition, - const Pooling3DAttributes& attr) { - return Pooling(definition, attr); + const Pooling3DAttributes& attr, + const DeviceInfo& device_info) { + return Pooling(definition, attr, device_info); } } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h index 712335d68a1..07c3c6d85da 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h @@ -29,12 +29,13 @@ namespace cl { class Pooling : public GPUOperation { public: - Pooling(const OperationDef& definition, const Pooling2DAttributes& attr); - Pooling(const OperationDef& definition, const Pooling3DAttributes& attr); + Pooling(const OperationDef& definition, const Pooling2DAttributes& attr, + const DeviceInfo& device_info); + Pooling(const OperationDef& definition, const Pooling3DAttributes& attr, + const DeviceInfo& device_info); absl::Status BindArguments() override; int3 GetGridSize() const override; - absl::Status Compile(const CreationContext& creation_context) override; // Move only Pooling(Pooling&& kernel); @@ -45,11 +46,13 @@ class Pooling : public GPUOperation { private: std::string GetAveragePoolingKernelCode(const OperationDef& op_def, bool stride_correction, - const CLDevice& device); + const DeviceInfo& device_info); std::string GetMaxPoolingKernelCode(const OperationDef& op_def, bool stride_correction, bool output_indices); + void GenerateCode(const DeviceInfo& device_info); + int4 stride_; int4 padding_; int4 kernel_size_; @@ -59,10 +62,12 @@ class Pooling : public GPUOperation { }; Pooling CreatePooling(const OperationDef& definition, - const Pooling2DAttributes& attr); + const Pooling2DAttributes& attr, + const DeviceInfo& device_info); Pooling CreatePooling(const OperationDef& definition, - const Pooling3DAttributes& attr); + const Pooling3DAttributes& attr, + const DeviceInfo& device_info); } // namespace cl } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/pooling_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/pooling_test.cc index 12efd56f5d2..7ebcc4871c5 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/pooling_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/pooling_test.cc @@ -52,7 +52,8 @@ TEST_F(OpenCLOperationTest, AveragePooling) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - Pooling operation = CreatePooling(op_def, attr); + Pooling operation = + CreatePooling(op_def, attr, env_.GetDevicePtr()->GetInfo()); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 1, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {3.0f, 4.0f})); @@ -81,7 +82,8 @@ TEST_F(OpenCLOperationTest, AveragePoolingNonEmptyPadding) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - Pooling operation = CreatePooling(op_def, attr); + Pooling operation = + CreatePooling(op_def, attr, env_.GetDevicePtr()->GetInfo()); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 2, 1), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -111,7 +113,8 @@ TEST_F(OpenCLOperationTest, MaxPooling) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - Pooling operation = CreatePooling(op_def, attr); + Pooling operation = + CreatePooling(op_def, attr, env_.GetDevicePtr()->GetInfo()); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 1, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {8.0f, 7.0f})); @@ -143,7 +146,8 @@ TEST_F(OpenCLOperationTest, MaxPoolingIndices) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; TensorFloat32 dst_tensor_ind; - Pooling operation = CreatePooling(op_def, attr); + Pooling operation = + CreatePooling(op_def, attr, env_.GetDevicePtr()->GetInfo()); ASSERT_OK(ExecuteGPUOperation({src_tensor}, creation_context_, &operation, {BHWC(1, 1, 1, 2), BHWC(1, 1, 1, 2)}, {&dst_tensor, &dst_tensor_ind})); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc b/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc index cf9b3893896..4e2ab1307a5 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc @@ -24,6 +24,10 @@ namespace tflite { namespace gpu { namespace cl { +Reshape::Reshape(const OperationDef& definition) : GPUOperation(definition) { + code_ = GetReshapeCode(definition_); +} + Reshape::Reshape(Reshape&& operation) : GPUOperation(std::move(operation)) {} Reshape& Reshape::operator=(Reshape&& operation) { @@ -92,19 +96,6 @@ std::string Reshape::GetReshapeCode(const OperationDef& op_def) { return c; } -absl::Status Reshape::Compile(const CreationContext& creation_context) { - std::string code = GetReshapeCode(definition_); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); -} - int3 Reshape::GetGridSize() const { const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); const int grid_y = dst_[0]->Height(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshape.h b/tensorflow/lite/delegates/gpu/cl/kernels/reshape.h index 1783bb7b2c7..a5da616c451 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/reshape.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshape.h @@ -27,10 +27,9 @@ namespace cl { class Reshape : public GPUOperation { public: - explicit Reshape(const OperationDef& definition) : GPUOperation(definition) {} + explicit Reshape(const OperationDef& definition); int3 GetGridSize() const override; - absl::Status Compile(const CreationContext& creation_context) override; // Move only Reshape(Reshape&& operation); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc index 7043469202b..e5692cbc736 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc @@ -24,6 +24,11 @@ namespace tflite { namespace gpu { namespace cl { +Reshapex4::Reshapex4(const OperationDef& definition) + : GPUOperation(definition) { + code_ = GetReshapeCode(definition_); +} + Reshapex4::Reshapex4(Reshapex4&& operation) : GPUOperation(std::move(operation)) {} @@ -77,19 +82,6 @@ std::string Reshapex4::GetReshapeCode(const OperationDef& op_def) { return c; } -absl::Status Reshapex4::Compile(const CreationContext& creation_context) { - std::string code = GetReshapeCode(definition_); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); -} - int3 Reshapex4::GetGridSize() const { const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); const int grid_y = dst_[0]->Height(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h index f278f52652d..654e37e93be 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h @@ -28,11 +28,9 @@ namespace cl { class Reshapex4 : public GPUOperation { public: - explicit Reshapex4(const OperationDef& definition) - : GPUOperation(definition) {} + explicit Reshapex4(const OperationDef& definition); int3 GetGridSize() const override; - absl::Status Compile(const CreationContext& creation_context) override; // Move only Reshapex4(Reshapex4&& operation); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/resize.cc b/tensorflow/lite/delegates/gpu/cl/kernels/resize.cc index c119f37dade..a0fd699062c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/resize.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/resize.cc @@ -24,6 +24,11 @@ namespace tflite { namespace gpu { namespace cl { +Resize::Resize(const OperationDef& definition, const Resize2DAttributes& attr) + : GPUOperation(definition), attr_(attr) { + code_ = GetResizeCode(definition_, attr_); +} + Resize::Resize(Resize&& operation) : GPUOperation(std::move(operation)), attr_(operation.attr_) {} @@ -127,19 +132,6 @@ std::string Resize::GetResizeCode(const OperationDef& op_def, return c; } -absl::Status Resize::Compile(const CreationContext& creation_context) { - std::string code = GetResizeCode(definition_, attr_); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); -} - absl::Status Resize::BindArguments() { RETURN_IF_ERROR(args_.SetInt("border_x", src_[0]->Width() - 1)); RETURN_IF_ERROR(args_.SetInt("border_y", src_[0]->Height() - 1)); @@ -164,6 +156,12 @@ Resize CreateResize(const OperationDef& definition, return Resize(definition, attr); } +Resize3D::Resize3D(const OperationDef& definition, + const Resize3DAttributes& attr) + : GPUOperation(definition), attr_(attr) { + code_ = GetResize3DCode(definition_, attr_); +} + Resize3D::Resize3D(Resize3D&& operation) : GPUOperation(std::move(operation)), attr_(operation.attr_) {} @@ -288,19 +286,6 @@ std::string Resize3D::GetResize3DCode(const OperationDef& op_def, return c; } -absl::Status Resize3D::Compile(const CreationContext& creation_context) { - std::string code = GetResize3DCode(definition_, attr_); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); -} - absl::Status Resize3D::BindArguments() { RETURN_IF_ERROR(args_.SetInt("border_x", src_[0]->Width() - 1)); RETURN_IF_ERROR(args_.SetInt("border_y", src_[0]->Height() - 1)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/resize.h b/tensorflow/lite/delegates/gpu/cl/kernels/resize.h index a5e7e86fb4f..0349afe5664 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/resize.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/resize.h @@ -29,7 +29,6 @@ class Resize : public GPUOperation { public: absl::Status BindArguments() override; int3 GetGridSize() const override; - absl::Status Compile(const CreationContext& creation_context) override; // Move only Resize(Resize&& operation); @@ -41,8 +40,7 @@ class Resize : public GPUOperation { const Resize2DAttributes& attr); private: - Resize(const OperationDef& definition, const Resize2DAttributes& attr) - : GPUOperation(definition), attr_(attr) {} + Resize(const OperationDef& definition, const Resize2DAttributes& attr); std::string GetResizeCode(const OperationDef& op_def, const Resize2DAttributes& attr); @@ -57,7 +55,6 @@ class Resize3D : public GPUOperation { public: absl::Status BindArguments() override; int3 GetGridSize() const override; - absl::Status Compile(const CreationContext& creation_context) override; // Move only Resize3D(Resize3D&& operation); @@ -69,8 +66,7 @@ class Resize3D : public GPUOperation { const Resize3DAttributes& attr); private: - Resize3D(const OperationDef& definition, const Resize3DAttributes& attr) - : GPUOperation(definition), attr_(attr) {} + Resize3D(const OperationDef& definition, const Resize3DAttributes& attr); std::string GetResize3DCode(const OperationDef& op_def, const Resize3DAttributes& attr); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc index fc75be3c46f..be8e979305b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc @@ -25,6 +25,10 @@ namespace tflite { namespace gpu { namespace cl { +Softmax::Softmax(const OperationDef& definition) : GPUOperation(definition) { + code_ = GetSoftmaxKernelCode(definition_); +} + Softmax::Softmax(Softmax&& kernel) : GPUOperation(std::move(kernel)) {} Softmax& Softmax::operator=(Softmax&& kernel) { @@ -71,19 +75,6 @@ std::string Softmax::GetSoftmaxKernelCode(const OperationDef& op_def) { return c; } -absl::Status Softmax::Compile(const CreationContext& creation_context) { - std::string code = GetSoftmaxKernelCode(definition_); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); -} - int3 Softmax::GetGridSize() const { const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); const int grid_y = dst_[0]->Height(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h index 62925a6c67a..0fa10721df9 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h @@ -29,10 +29,9 @@ namespace cl { class Softmax : public GPUOperation { public: Softmax() = default; - explicit Softmax(const OperationDef& definition) : GPUOperation(definition) {} + explicit Softmax(const OperationDef& definition); int3 GetGridSize() const override; - absl::Status Compile(const CreationContext& creation_context) override; // Move only Softmax(Softmax&& kernel); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc index 8f2cd8d4c23..e7cf72aa72a 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc @@ -24,6 +24,12 @@ namespace tflite { namespace gpu { namespace cl { +Softmax1x1::Softmax1x1(const OperationDef& definition) + : GPUOperation(definition) { + work_group_size_ = int3(32, 1, 1); + code_ = GetSoftmaxKernelCode(definition_); +} + Softmax1x1::Softmax1x1(Softmax1x1&& kernel) : GPUOperation(std::move(kernel)) {} Softmax1x1& Softmax1x1::operator=(Softmax1x1&& kernel) { @@ -103,20 +109,6 @@ std::string Softmax1x1::GetSoftmaxKernelCode(const OperationDef& op_def) { return c; } -absl::Status Softmax1x1::Compile(const CreationContext& creation_context) { - std::string code = GetSoftmaxKernelCode(definition_); - std::string element_wise_code; - work_group_size_ = int3(32, 1, 1); - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); -} - absl::Status Softmax1x1::BindArguments() { float4 mask = GetMaskForLastPlane(src_[0]->Channels()); RETURN_IF_ERROR(args_.SetFloat("mask_x", mask.x)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h index 2f6ff94df1a..42cbbabe799 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h @@ -28,14 +28,12 @@ namespace cl { class Softmax1x1 : public GPUOperation { public: Softmax1x1() = default; - explicit Softmax1x1(const OperationDef& definition) - : GPUOperation(definition) {} + explicit Softmax1x1(const OperationDef& definition); absl::Status Tune(const TuningParameters& params) override { return absl::OkStatus(); } absl::Status BindArguments() override; int3 GetGridSize() const override; - absl::Status Compile(const CreationContext& creation_context) override; // Move only Softmax1x1(Softmax1x1&& kernel); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.cc b/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.cc index 0df91dbec60..0fa266aa8e7 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.cc @@ -26,6 +26,12 @@ namespace tflite { namespace gpu { namespace cl { +SpaceToDepth::SpaceToDepth(const OperationDef& op_def, + const SpaceToDepthAttributes& attr) + : GPUOperation(op_def), attr_(attr) { + code_ = GetSpaceToDepthCode(definition_); +} + SpaceToDepth::SpaceToDepth(SpaceToDepth&& operation) : GPUOperation(std::move(operation)), attr_(operation.attr_) {} @@ -82,19 +88,6 @@ std::string SpaceToDepth::GetSpaceToDepthCode(const OperationDef& op_def) { return c; } -absl::Status SpaceToDepth::Compile(const CreationContext& creation_context) { - std::string code = GetSpaceToDepthCode(definition_); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); -} - absl::Status SpaceToDepth::BindArguments() { RETURN_IF_ERROR(args_.SetInt("block_size", attr_.block_size)); return absl::OkStatus(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.h b/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.h index e44d4eb781b..65ade000836 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.h @@ -28,11 +28,9 @@ namespace cl { class SpaceToDepth : public GPUOperation { public: - SpaceToDepth(const OperationDef& op_def, const SpaceToDepthAttributes& attr) - : GPUOperation(op_def), attr_(attr) {} + SpaceToDepth(const OperationDef& op_def, const SpaceToDepthAttributes& attr); absl::Status BindArguments() override; int3 GetGridSize() const override; - absl::Status Compile(const CreationContext& creation_context) override; SpaceToDepth(SpaceToDepth&& operation); SpaceToDepth& operator=(SpaceToDepth&& operation); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.cc b/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.cc index abb56b5f41f..88417ce6f1e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.cc @@ -30,24 +30,23 @@ namespace cl { DepthwiseConvPlus1x1Conv::DepthwiseConvPlus1x1Conv( const OperationDef& definition, const DepthwiseConvolution2DAttributes& dw_attr, - const Convolution2DAttributes& conv_attr) - : GPUOperation(definition), - dw_attr_(dw_attr), - result_depth_(DivideRoundUp(conv_attr.weights.shape.o, 4)) { + const Convolution2DAttributes& conv_attr, const DeviceInfo& device_info) + : GPUOperation(definition), dw_attr_(dw_attr) { work_group_size_ = int3(8, 8, 1); + code_ = + GenerateCode(definition_, dw_attr_, + DivideRoundUp(conv_attr.weights.shape.o, 4), device_info); } DepthwiseConvPlus1x1Conv::DepthwiseConvPlus1x1Conv( DepthwiseConvPlus1x1Conv&& operation) : GPUOperation(std::move(operation)), - dw_attr_(std::move(operation.dw_attr_)), - result_depth_(operation.result_depth_) {} + dw_attr_(std::move(operation.dw_attr_)) {} DepthwiseConvPlus1x1Conv& DepthwiseConvPlus1x1Conv::operator=( DepthwiseConvPlus1x1Conv&& operation) { if (this != &operation) { dw_attr_ = std::move(operation.dw_attr_); - std::swap(result_depth_, operation.result_depth_); GPUOperation::operator=(std::move(operation)); } return *this; @@ -147,9 +146,9 @@ absl::Status DepthwiseConvPlus1x1Conv::UploadWeights( std::string DepthwiseConvPlus1x1Conv::GenerateCode( const OperationDef& op_def, const DepthwiseConvolution2DAttributes& dw_attr, - int result_depth, const CLDevice& device) { + int result_depth, const DeviceInfo& device_info) { auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(GetFastestZeroMode(device)); + src_desc.SetTextureAddressMode(GetFastestZeroMode(device_info)); AddSrcTensor("src_tensor", src_desc); AddDstTensor("dst_tensor", op_def.dst_tensors[0]); @@ -243,21 +242,6 @@ std::string DepthwiseConvPlus1x1Conv::GenerateCode( return c; } -absl::Status DepthwiseConvPlus1x1Conv::Compile( - const CreationContext& creation_context) { - std::string code = GenerateCode(definition_, dw_attr_, result_depth_, - *creation_context.device); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); -} - int3 DepthwiseConvPlus1x1Conv::GetGridSize() const { const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); const int grid_y = dst_[0]->Height(); @@ -289,7 +273,8 @@ absl::Status CreateDepthwiseConvPlus1x1Conv( const DepthwiseConvolution2DAttributes& dw_attr, const Convolution2DAttributes& conv_attr, DepthwiseConvPlus1x1Conv* result) { - *result = DepthwiseConvPlus1x1Conv(definition, dw_attr, conv_attr); + *result = DepthwiseConvPlus1x1Conv(definition, dw_attr, conv_attr, + creation_context.device->GetInfo()); RETURN_IF_ERROR( result->UploadWeights(dw_attr, conv_attr, creation_context.context)); return absl::OkStatus(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.h b/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.h index 90a7ea751f7..d4037c83b30 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.h @@ -37,7 +37,6 @@ class DepthwiseConvPlus1x1Conv : public GPUOperation { public: DepthwiseConvPlus1x1Conv() = default; int3 GetGridSize() const override; - absl::Status Compile(const CreationContext& creation_context) override; // Move only DepthwiseConvPlus1x1Conv(DepthwiseConvPlus1x1Conv&& operation); @@ -53,7 +52,8 @@ class DepthwiseConvPlus1x1Conv : public GPUOperation { DepthwiseConvPlus1x1Conv* result); DepthwiseConvPlus1x1Conv(const OperationDef& definition, const DepthwiseConvolution2DAttributes& dw_attr, - const Convolution2DAttributes& conv_attr); + const Convolution2DAttributes& conv_attr, + const DeviceInfo& device_info); absl::Status UploadWeights(const DepthwiseConvolution2DAttributes& dw_attr, const Convolution2DAttributes& conv_attr, @@ -61,10 +61,9 @@ class DepthwiseConvPlus1x1Conv : public GPUOperation { std::string GenerateCode(const OperationDef& op_def, const DepthwiseConvolution2DAttributes& dw_attr, - int result_depth, const CLDevice& device); + int result_depth, const DeviceInfo& device_info); DepthwiseConvolution2DAttributes dw_attr_; - int result_depth_; }; bool IsDepthwiseConvPlus1x1ConvSupported( diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc index 081488e4fe8..b2ce0690a9c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc @@ -79,6 +79,7 @@ StridedSlice::StridedSlice(const OperationDef& definition, const SliceAttributes& attr) : GPUOperation(definition), attributes_(attr) { work_group_size_ = int3(8, 4, 1); + code_ = GetStridedSliceCode(definition_, Is4Aligned(attributes_)); } StridedSlice::StridedSlice(StridedSlice&& operation) @@ -153,19 +154,6 @@ std::string StridedSlice::GetStridedSliceCode(const OperationDef& op_def, return c; } -absl::Status StridedSlice::Compile(const CreationContext& creation_context) { - std::string code = GetStridedSliceCode(definition_, Is4Aligned(attributes_)); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); -} - absl::Status StridedSlice::BindArguments() { int4 offset = GetOffset(attributes_, src_[0]->Width(), src_[0]->Height(), src_[0]->Channels(), src_[0]->Batch()); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.h b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.h index fb8acd308f0..5a6d8ad6047 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.h @@ -29,7 +29,6 @@ class StridedSlice : public GPUOperation { StridedSlice(const OperationDef& definition, const SliceAttributes& attr); absl::Status BindArguments() override; int3 GetGridSize() const override; - absl::Status Compile(const CreationContext& creation_context) override; // Move only StridedSlice(StridedSlice&& operation); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc index 93e6241fc55..259f66e0f38 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc @@ -25,6 +25,12 @@ namespace tflite { namespace gpu { namespace cl { +Transpose::Transpose(const OperationDef& definition, + const TransposeAttributes& attr) + : GPUOperation(definition), attr_(attr) { + code_ = GetTransposeCode(definition_, attr_); +} + Transpose::Transpose(Transpose&& operation) : GPUOperation(std::move(operation)), attr_(operation.attr_) {} @@ -107,19 +113,6 @@ std::string Transpose::GetTransposeCode(const OperationDef& op_def, return c; } -absl::Status Transpose::Compile(const CreationContext& creation_context) { - std::string code = GetTransposeCode(definition_, attr_); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); -} - int3 Transpose::GetGridSize() const { const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); const int grid_y = dst_[0]->Height(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h index 10cc9720ad4..950f838923e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h @@ -26,10 +26,8 @@ namespace cl { class Transpose : public GPUOperation { public: - Transpose(const OperationDef& definition, const TransposeAttributes& attr) - : GPUOperation(definition), attr_(attr) {} + Transpose(const OperationDef& definition, const TransposeAttributes& attr); int3 GetGridSize() const override; - absl::Status Compile(const CreationContext& creation_context) override; // Move only Transpose(Transpose&& operation); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/util.cc b/tensorflow/lite/delegates/gpu/cl/kernels/util.cc index 26fbc33f17c..e3599eb5044 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/util.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/util.cc @@ -100,6 +100,11 @@ TextureAddressMode GetFastestZeroMode(const CLDevice& device) { : TextureAddressMode::ZERO; } +TextureAddressMode GetFastestZeroMode(const DeviceInfo& device_info) { + return device_info.IsAdreno3xx() ? TextureAddressMode::DONT_CARE + : TextureAddressMode::ZERO; +} + float4 GetMaskForLastPlane(int channels) { float4 mask = float4(0.0f); const int reminder = channels % 4 == 0 ? 4 : channels % 4; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/util.h b/tensorflow/lite/delegates/gpu/cl/kernels/util.h index 42be865e3a3..b41d0efb91e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/util.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/util.h @@ -95,6 +95,7 @@ void RearrangeWeightsToOHWIOGroupI4O4( // textures on Adreno3xx devices. Using CLK_ADDRESS_NONE is significantly faster // than CLK_ADDRESS_CLAMP on Adreno 3xx. TextureAddressMode GetFastestZeroMode(const CLDevice& device); +TextureAddressMode GetFastestZeroMode(const DeviceInfo& device_info); // Returns float4 mask for last plane(batch of 4 channels) // assumes that plane size is 4; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc index b266bd832d8..4c3e8ddba05 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc @@ -32,6 +32,21 @@ namespace tflite { namespace gpu { namespace cl { +Winograd4x4To36::Winograd4x4To36(const OperationDef& definition, + const Padding2D& padding, + const DeviceInfo& device_info) + : GPUOperation(definition), padding_(padding) { + work_group_size_ = int3(32, 1, 1); + code_ = GetWinograd4x4To36Code(definition_); + if (device_info.IsAdreno()) { + compiler_options_.push_back(CompilerOptions::ADRENO_MORE_WAVES); + } + if (definition_.precision == CalculationsPrecision::F16 && + device_info.IsPowerVR()) { + compiler_options_.push_back(CompilerOptions::POWERVR_FP16); + } +} + Winograd4x4To36::Winograd4x4To36(Winograd4x4To36&& operation) : GPUOperation(std::move(operation)), padding_(operation.padding_) {} @@ -219,30 +234,6 @@ std::string Winograd4x4To36::GetWinograd4x4To36Code( return c; } -absl::Status Winograd4x4To36::Compile(const CreationContext& creation_context) { - std::vector options; - if (creation_context.device->IsAdreno()) { - options.push_back(CompilerOptions::ADRENO_MORE_WAVES); - } - if (definition_.precision == CalculationsPrecision::F16 && - creation_context.device->IsPowerVR()) { - options.push_back(CompilerOptions::POWERVR_FP16); - } - RETURN_IF_ERROR(UploadBt(creation_context.context)); - std::string code = GetWinograd4x4To36Code(definition_); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel( - code, "main_function", options, *creation_context.context, - *creation_context.device, &kernel_)); - work_group_size_ = SelectBestWorkGroup(); - return absl::OkStatus(); -} - absl::Status Winograd4x4To36::UploadBt(CLContext* context) { tflite::gpu::Tensor bt_aligned; bt_aligned.shape = Linear(6 * 8); @@ -311,10 +302,22 @@ absl::Status CreateWinograd4x4To36(const CreationContext& creation_context, const OperationDef& definition, const Padding2D& padding, Winograd4x4To36* result) { - *result = Winograd4x4To36(definition, padding); + *result = + Winograd4x4To36(definition, padding, creation_context.device->GetInfo()); return result->UploadBt(creation_context.context); } +Winograd36To4x4::Winograd36To4x4(const OperationDef& definition, + const DeviceInfo& device_info) + : GPUOperation(definition) { + work_group_size_ = int3(32, 1, 1); + if (definition_.precision == CalculationsPrecision::F16 && + device_info.IsPowerVR()) { + compiler_options_.push_back(CompilerOptions::POWERVR_FP16); + } + code_ = GetWinograd36To4x4Code(definition_); +} + Winograd36To4x4::Winograd36To4x4(Winograd36To4x4&& operation) : GPUOperation(std::move(operation)) {} @@ -434,26 +437,6 @@ std::string Winograd36To4x4::GetWinograd36To4x4Code( return c; } -absl::Status Winograd36To4x4::Compile(const CreationContext& creation_context) { - std::vector options; - if (definition_.precision == CalculationsPrecision::F16 && - creation_context.device->IsPowerVR()) { - options.push_back(CompilerOptions::POWERVR_FP16); - } - std::string code = GetWinograd36To4x4Code(definition_); - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), - {{"dst_tensor", element_wise_code}}, - &code)); - RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel( - code, "main_function", options, *creation_context.context, - *creation_context.device, &kernel_)); - work_group_size_ = SelectBestWorkGroup(); - return absl::OkStatus(); -} - absl::Status Winograd36To4x4::UploadAt(CLContext* context) { tflite::gpu::Tensor at_aligned; at_aligned.shape = Linear(4 * 8); @@ -519,7 +502,7 @@ absl::Status CreateWinograd36To4x4( const CreationContext& creation_context, const OperationDef& definition, const tflite::gpu::Tensor& biases, Winograd36To4x4* result) { - *result = Winograd36To4x4(definition); + *result = Winograd36To4x4(definition, creation_context.device->GetInfo()); TensorLinearDescriptor desc; desc.storage_type = LinearStorageType::TEXTURE_2D; desc.element_type = definition.GetDataType(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h index 3a4e1e97188..ddc1155e0b5 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h @@ -34,14 +34,11 @@ namespace cl { class Winograd4x4To36 : public GPUOperation { public: Winograd4x4To36() = default; - Winograd4x4To36(const OperationDef& definition, const Padding2D& padding) - : GPUOperation(definition), padding_(padding) { - work_group_size_ = int3(128, 1, 1); - } + Winograd4x4To36(const OperationDef& definition, const Padding2D& padding, + const DeviceInfo& device_info); absl::Status BindArguments() override; int3 GetGridSize() const override; absl::Status Tune(const TuningParameters& params) override; - absl::Status Compile(const CreationContext& creation_context) override; // Move only Winograd4x4To36(Winograd4x4To36&& operation); @@ -72,14 +69,11 @@ absl::Status CreateWinograd4x4To36(const CreationContext& creation_context, class Winograd36To4x4 : public GPUOperation { public: Winograd36To4x4() = default; - explicit Winograd36To4x4(const OperationDef& definition) - : GPUOperation(definition) { - work_group_size_ = int3(128, 1, 1); - } + Winograd36To4x4(const OperationDef& definition, + const DeviceInfo& device_info); absl::Status BindArguments() override; int3 GetGridSize() const override; absl::Status Tune(const TuningParameters& params) override; - absl::Status Compile(const CreationContext& creation_context) override; // Move only Winograd36To4x4(Winograd36To4x4&& operation); diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc index 994659179e7..6134cd48bd7 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc @@ -270,18 +270,20 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, inputs[0]->tensor.shape.b, gpu_op); } case OperationType::LSTM: { - SelectLSTM(op_def, gpu_op); + SelectLSTM(op_def, creation_context.device->GetInfo(), gpu_op); return absl::OkStatus(); } case OperationType::MAX_UNPOOLING_2D: { auto attr = absl::any_cast(node.operation.attributes); - SelectMaxUnpooling(attr, op_def, gpu_op); + SelectMaxUnpooling(attr, op_def, creation_context.device->GetInfo(), + gpu_op); return absl::OkStatus(); } case OperationType::MEAN: { auto attr = absl::any_cast(node.operation.attributes); - return SelectMean(attr, op_def, gpu_op); + return SelectMean(attr, op_def, creation_context.device->GetInfo(), + gpu_op); } case OperationType::MUL: { if (inputs.size() == 2) { @@ -333,7 +335,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, case OperationType::POOLING_2D: { auto attr = absl::any_cast(node.operation.attributes); - SelectPooling(attr, op_def, gpu_op); + SelectPooling(attr, op_def, creation_context.device->GetInfo(), gpu_op); return absl::OkStatus(); } case OperationType::PRELU: { diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc b/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc index eb0ed7cd264..a32efd5dd2c 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc @@ -45,9 +45,9 @@ namespace tflite { namespace gpu { namespace cl { -void SelectLSTM(const OperationDef& op_def, +void SelectLSTM(const OperationDef& op_def, const DeviceInfo& device_info, std::unique_ptr* ptr) { - LSTM operation = CreateLSTM(op_def); + LSTM operation = CreateLSTM(op_def, device_info); *ptr = absl::make_unique(std::move(operation)); } @@ -69,15 +69,17 @@ absl::Status SelectPReLU(const PReLUAttributes& attr, } void SelectPooling(const Pooling2DAttributes& attr, const OperationDef& op_def, + const DeviceInfo& device_info, std::unique_ptr* ptr) { - Pooling pooling = CreatePooling(op_def, attr); + Pooling pooling = CreatePooling(op_def, attr, device_info); *ptr = absl::make_unique(std::move(pooling)); } void SelectMaxUnpooling(const MaxUnpooling2DAttributes& attr, const OperationDef& op_def, + const DeviceInfo& device_info, std::unique_ptr* ptr) { - MaxUnpooling operation = CreateMaxUnpooling(op_def, attr); + MaxUnpooling operation = CreateMaxUnpooling(op_def, attr, device_info); *ptr = absl::make_unique(std::move(operation)); } @@ -151,11 +153,12 @@ void SelectStridedSlice(const SliceAttributes& attr, const OperationDef& op_def, } absl::Status SelectMean(const MeanAttributes& attr, const OperationDef& op_def, + const DeviceInfo& device_info, std::unique_ptr* ptr) { if (attr.dims != std::set({Axis::HEIGHT, Axis::WIDTH})) { return absl::UnimplementedError("Mean operation supports only HW plane"); } - Mean operation = CreateMean(op_def); + Mean operation = CreateMean(op_def, device_info); *ptr = absl::make_unique(std::move(operation)); return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h b/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h index ec8ecfb192d..f266882a458 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h +++ b/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h @@ -28,7 +28,8 @@ namespace tflite { namespace gpu { namespace cl { -void SelectLSTM(const OperationDef& op_def, std::unique_ptr* ptr); +void SelectLSTM(const OperationDef& op_def, const DeviceInfo& device_info, + std::unique_ptr* ptr); void SelectReLU(const CreationContext& creation_context, const ReLUAttributes& attr, const OperationDef& op_def, @@ -40,10 +41,12 @@ absl::Status SelectPReLU(const PReLUAttributes& attr, std::unique_ptr* ptr); void SelectPooling(const Pooling2DAttributes& attr, const OperationDef& op_def, + const DeviceInfo& device_info, std::unique_ptr* ptr); void SelectMaxUnpooling(const MaxUnpooling2DAttributes& attr, const OperationDef& op_def, + const DeviceInfo& device_info, std::unique_ptr* ptr); void SelectAdd(const OperationDef& op_def, const std::vector& channels, @@ -70,6 +73,7 @@ void SelectStridedSlice(const SliceAttributes& attr, const OperationDef& op_def, std::unique_ptr* ptr); absl::Status SelectMean(const MeanAttributes& attr, const OperationDef& op_def, + const DeviceInfo& device_info, std::unique_ptr* ptr); void SelectSoftmax(const BHWC& shape, const OperationDef& op_def, From 095a20b660514150b91f0ba222632ed8b97ba8cf Mon Sep 17 00:00:00 2001 From: Ayush Dubey Date: Fri, 31 Jul 2020 17:41:57 -0700 Subject: [PATCH 1919/2522] Scope instance keys under group keys for collective ops. Before this change, we would store a global mapping from instance key -> resolved instance runtime parameters. This prevented reusing the same key across device groups. After this change, instance keys are scoped under group key. It is legal to execute 2 collectives with the same instance key, as long as they have different device groups. This enables the user to assign the same instance key to a logical collective which is sharded across device groups. PiperOrigin-RevId: 324324902 Change-Id: Ib994b68f96c8f6cf1cc634d5a7c4998d9f3fb96c --- .../collective_param_resolver_local.cc | 41 +++++++++++------- .../collective_param_resolver_local.h | 4 +- .../collective_param_resolver_distributed.cc | 13 ++++-- .../collective_param_resolver_distributed.h | 3 +- tensorflow/core/kernels/collective_ops.cc | 12 ++++-- .../kernel_tests/collective_ops_test.py | 43 +++++++++++++++++++ 6 files changed, 89 insertions(+), 27 deletions(-) diff --git a/tensorflow/core/common_runtime/collective_param_resolver_local.cc b/tensorflow/core/common_runtime/collective_param_resolver_local.cc index a0153a5fff0..ba21abcbaa8 100644 --- a/tensorflow/core/common_runtime/collective_param_resolver_local.cc +++ b/tensorflow/core/common_runtime/collective_param_resolver_local.cc @@ -586,25 +586,32 @@ void CollectiveParamResolverLocal::FindInstanceRec( InstanceRec* irec = nullptr; bool exit_outside_locks = false; { + bool found_instance = false; mutex_lock l(instance_mu_); - auto it = instance_table_.find(cp->instance.instance_key); - if (it != instance_table_.end()) { - irec = it->second.get(); - { - mutex_lock l(irec->in_mu); - if (irec->is_init) { - exit_outside_locks = true; - } else { - irec->init_waiters.push_back([this, done](InstanceRec* irec) { - CallbackWithStatus(done, irec); - }); - return; + auto group_it = instance_table_.find(gr->group.group_key); + if (group_it != instance_table_.end()) { + auto instance_it = group_it->second.find(cp->instance.instance_key); + if (instance_it != group_it->second.end()) { + irec = instance_it->second.get(); + { + mutex_lock l(irec->in_mu); + if (irec->is_init) { + exit_outside_locks = true; + } else { + irec->init_waiters.push_back([this, done](InstanceRec* irec) { + CallbackWithStatus(done, irec); + }); + return; + } } + found_instance = true; } - } else { + } + if (!found_instance) { // Create new InstanceRec. irec = new InstanceRec; - instance_table_[cp->instance.instance_key].reset(irec); + instance_table_[gr->group.group_key][cp->instance.instance_key].reset( + irec); } } Status status; @@ -890,8 +897,10 @@ void CollectiveParamResolverLocal::StartAbortLocal(const Status& s) { std::vector instances; { mutex_lock l(instance_mu_); - for (const auto& item : instance_table_) { - instances.push_back(item.second.get()); + for (const auto& group_entry : instance_table_) { + for (const auto& item : group_entry.second) { + instances.push_back(item.second.get()); + } } } for (InstanceRec* ir : instances) { diff --git a/tensorflow/core/common_runtime/collective_param_resolver_local.h b/tensorflow/core/common_runtime/collective_param_resolver_local.h index 2b7528d6377..40f0f00affc 100644 --- a/tensorflow/core/common_runtime/collective_param_resolver_local.h +++ b/tensorflow/core/common_runtime/collective_param_resolver_local.h @@ -241,8 +241,8 @@ class CollectiveParamResolverLocal : public ParamResolverInterface { gtl::FlatMap> group_table_ TF_GUARDED_BY(group_mu_); mutex instance_mu_; - gtl::FlatMap> instance_table_ - TF_GUARDED_BY(instance_mu_); + gtl::FlatMap>> + instance_table_ TF_GUARDED_BY(instance_mu_); mutex status_mu_; Status status_ TF_GUARDED_BY(status_mu_); }; diff --git a/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.cc b/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.cc index bfcd5b85ea4..650c52cd8da 100644 --- a/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.cc +++ b/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.cc @@ -304,10 +304,15 @@ void CollectiveParamResolverDistributed::CompleteGroupDistributed( } } -bool CollectiveParamResolverDistributed::InstanceIsCached(int32 instance_key) { +bool CollectiveParamResolverDistributed::InstanceIsCached(int32 group_key, + int32 instance_key) { mutex_lock l(instance_mu_); - const auto& it = instance_table_.find(instance_key); - return it != instance_table_.end(); + auto group_it = instance_table_.find(group_key); + if (group_it == instance_table_.end()) { + return false; + } + auto instance_it = group_it->second.find(instance_key); + return instance_it != group_it->second.end(); } void CollectiveParamResolverDistributed::UpdateInstanceCache( @@ -374,7 +379,7 @@ void CollectiveParamResolverDistributed::CompleteInstanceDistributed( if (group_leader_.empty()) { // This is the group leader so resolution is local. return CompleteInstanceLocal(device, gr, cp, cp->is_source, done); - } else if (InstanceIsCached(cp->instance.instance_key)) { + } else if (InstanceIsCached(gr->group.group_key, cp->instance.instance_key)) { return CompleteInstanceLocal(device, gr, cp, cp->is_source, done); } else { CompleteInstanceCall* call = new CompleteInstanceCall( diff --git a/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.h b/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.h index 7d30c3d5e55..684887430c3 100644 --- a/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.h +++ b/tensorflow/core/distributed_runtime/collective_param_resolver_distributed.h @@ -65,7 +65,8 @@ class CollectiveParamResolverDistributed : public CollectiveParamResolverLocal { // Returns true iff there's an entry for this instance_key in the // local instance_table_. - bool InstanceIsCached(int32 instance_key) TF_LOCKS_EXCLUDED(instance_mu_); + bool InstanceIsCached(int32 group_key, int32 instance_key) + TF_LOCKS_EXCLUDED(instance_mu_); // Updates instance_table_ with contents of resp. void UpdateInstanceCache(const GroupRec* gr, CollectiveParams* cp, diff --git a/tensorflow/core/kernels/collective_ops.cc b/tensorflow/core/kernels/collective_ops.cc index 51a5219b24d..0230852d082 100644 --- a/tensorflow/core/kernels/collective_ops.cc +++ b/tensorflow/core/kernels/collective_ops.cc @@ -22,8 +22,10 @@ namespace tensorflow { namespace { -static string CollectiveKey(OpKernelContext* ctx, int32 instance_key) { - return strings::StrCat(instance_key, ":", ctx->frame_iter().frame_id, ":", +static string CollectiveKey(OpKernelContext* ctx, int32 group_key, + int32 instance_key) { + return strings::StrCat(group_key, ":", instance_key, ":", + ctx->frame_iter().frame_id, ":", ctx->frame_iter().iter_id); } @@ -52,7 +54,8 @@ class CollectiveOpKernel : public AsyncOpKernel { // A string encoding instance, frame and iter to be handed off to // the implementation for use in generating RecvBuf keys. string GetCollectiveKey(OpKernelContext* c) { - return CollectiveKey(c, col_params_.instance.instance_key); + return CollectiveKey(c, col_params_.group.group_key, + col_params_.instance.instance_key); } // Returns false if calling invocation of ComputeAsync should return @@ -557,7 +560,8 @@ class CollectiveReduceV2OpKernel : public AsyncOpKernel { << " instance " << col_params->instance.instance_key; col_exec->ExecuteAsync( c, *col_params, - CollectiveKey(c, col_params->instance.instance_key), + CollectiveKey(c, col_params->group.group_key, + col_params->instance.instance_key), actual_done); } else { c->SetStatus(s); diff --git a/tensorflow/python/kernel_tests/collective_ops_test.py b/tensorflow/python/kernel_tests/collective_ops_test.py index 25d93672c7b..4225df7537a 100644 --- a/tensorflow/python/kernel_tests/collective_ops_test.py +++ b/tensorflow/python/kernel_tests/collective_ops_test.py @@ -35,6 +35,8 @@ class CollectiveOpsTest(test.TestCase): cpus = config.list_physical_devices('CPU') self.assertEqual(len(cpus), 1) config.set_logical_device_configuration(cpus[0], [ + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration(), context.LogicalDeviceConfiguration(), context.LogicalDeviceConfiguration() ]) @@ -78,6 +80,47 @@ class CollectiveOpsTest(test.TestCase): for result in run_all_reduce_2cpus(): self.assertAllClose(result, [2.], rtol=1e-5, atol=1e-5) + @test_util.run_v2_only + def testInstanceKeyScopedUnderGroupKey(self): + self._setup_context() + + @def_function.function + def single_all_reduce(in_value, group_size, group_key, instance_key): + return gen_collective_ops.collective_reduce_v2( + in_value, group_size, group_key, instance_key, merge_op='Add', + final_op='Id', communication_hint='auto') + + @def_function.function + def run_all_reduce_4cpus_same_instance_key(): + # Use a common instance key for both groups. + instance_key = constant_op.constant(0) + # We will create 2 groups each with 2 devices. + group_size = constant_op.constant(2) + # Group 0 comprises cpu:0 and cpu:1. + group0_key = constant_op.constant(0) + # Group 1 comprises cpu:2 and cpu:3. + group1_key = constant_op.constant(1) + collectives = [] + with ops.device('/device:CPU:0'): + collectives.append(single_all_reduce( + constant_op.constant(1.), group_size, group0_key, instance_key)) + with ops.device('/device:CPU:1'): + collectives.append(single_all_reduce( + constant_op.constant(2.), group_size, group0_key, instance_key)) + with ops.device('/device:CPU:2'): + collectives.append(single_all_reduce( + constant_op.constant(3.), group_size, group1_key, instance_key)) + with ops.device('/device:CPU:3'): + collectives.append(single_all_reduce( + constant_op.constant(4.), group_size, group1_key, instance_key)) + return collectives + + results = run_all_reduce_4cpus_same_instance_key() + self.assertAllClose(results[0], 3., rtol=1e-5, atol=1e-5) + self.assertAllClose(results[1], 3., rtol=1e-5, atol=1e-5) + self.assertAllClose(results[2], 7., rtol=1e-5, atol=1e-5) + self.assertAllClose(results[3], 7., rtol=1e-5, atol=1e-5) + if __name__ == '__main__': test.main() From c504bd3899a8d6908132f046be0b49ef06a9aad8 Mon Sep 17 00:00:00 2001 From: Robert David Date: Fri, 31 Jul 2020 17:57:43 -0700 Subject: [PATCH 1920/2522] Cleanup MeanStdDevNormalization: - Use raw string literals instead of concatenating strings per line. - Add const to variables in the kernel. - Add comments to the kernel. - Change the batch to be the 2nd dimension of the grid in preparing for vectorized implementation. PiperOrigin-RevId: 324327708 Change-Id: I7244e49ffe29f6d6153080ac408e6955793df9fc --- .../cl/kernels/mean_stddev_normalization.cc | 89 ++++++++++--------- .../cl/kernels/mean_stddev_normalization.h | 2 +- 2 files changed, 49 insertions(+), 42 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc index 3e8e4d952de..9e7e0c3283e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc @@ -27,54 +27,61 @@ namespace cl { MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition) : GPUOperation(definition) { - code_ = GetNormalizationCode(definition_); + code_ = GetNormalizationCode(); } -std::string MeanStdDevNormalization::GetNormalizationCode( - const OperationDef& op_def) { - AddSrcTensor("src_tensor", op_def.src_tensors[0]); - AddDstTensor("dst_tensor", op_def.dst_tensors[0]); +std::string MeanStdDevNormalization::GetNormalizationCode() { + AddSrcTensor("src_tensor", definition_.src_tensors[0]); + AddDstTensor("dst_tensor", definition_.dst_tensors[0]); - std::string c = GetCommonDefines(op_def.precision); - c += "__kernel void main_function(\n"; - c += "$0) {\n"; - c += " size_t B = get_global_id(0);\n"; - c += " if (B >= args.src_tensor.Batch()) { return; }\n"; - c += " if (get_global_id(1) > 0) { return; }\n"; // ?!? - c += " float sum = 0.0f;\n"; - c += " for (int S = 0; S < args.src_tensor.Slices(); ++S) {\n"; - c += " const float4 t = args.src_tensor.Read(0, 0, S, B);\n"; - c += " sum += t.x;\n"; - c += " if (S * 4 + 1 < args.src_tensor.Channels()) sum += t.y;\n"; - c += " if (S * 4 + 2 < args.src_tensor.Channels()) sum += t.z;\n"; - c += " if (S * 4 + 3 < args.src_tensor.Channels()) sum += t.w;\n"; - c += " }\n"; - c += " float mean = sum / args.src_tensor.Channels();\n"; - c += " float sum_diff_sq = 0.0f;\n"; - c += " for (int S = 0; S < args.src_tensor.Slices(); ++S) {\n"; - c += " const float4 t = args.src_tensor.Read(0, 0, S, B);\n"; - c += " float4 diff = t - (float4)(mean, mean, mean, mean);"; - c += " if (S * 4 + 1 >= args.src_tensor.Channels()) diff.y = 0.0f;\n"; - c += " if (S * 4 + 2 >= args.src_tensor.Channels()) diff.z = 0.0f;\n"; - c += " if (S * 4 + 3 >= args.src_tensor.Channels()) diff.w = 0.0f;\n"; - c += " float dotprod = dot(diff, diff);\n"; - c += " sum_diff_sq += dotprod;\n"; - c += " }\n"; - c += " const float variance = sum_diff_sq / args.src_tensor.Channels();\n"; - c += " const float stddev_inv = rsqrt(variance + 1.0e-8f);\n"; - c += " for (int S = 0; S < args.src_tensor.Slices(); ++S) {\n"; - c += " float4 t = args.src_tensor.Read(0, 0, S, B);\n"; - c += " t = (t - mean) * stddev_inv;\n"; - c += " FLT4 result = TO_FLT4(t);\n"; - c += " args.dst_tensor.Write(result, 0, 0, S, B);\n"; - c += " }\n"; - c += "}\n"; + std::string c = GetCommonDefines(definition_.precision); + c += R"(__kernel void main_function( +$0) { + if (get_global_id(0) > 0) { return; } + size_t B = get_global_id(1); + if (get_global_id(2) > 0) { return; } + if (B >= args.src_tensor.Batch()) { return; } + // Calculate the total sum of the input tensor. + // First, get a local sum of input[local_id_x + N*local_size_x] for all N. + float sum = 0.0f; + for (int S = 0; S < args.src_tensor.Slices(); ++S) { + const float4 t = args.src_tensor.Read(0, 0, S, B); + sum += t.x; + // Filter out out-of-bounds reads + if (S * 4 + 1 < args.src_tensor.Channels()) sum += t.y; + if (S * 4 + 2 < args.src_tensor.Channels()) sum += t.z; + if (S * 4 + 3 < args.src_tensor.Channels()) sum += t.w; + } + // Calculate the mean + const float mean = sum / args.src_tensor.Channels(); + // Calculate the squared sum of the difference from the mean. + float sum_diff_sq = 0.0f; + for (int S = 0; S < args.src_tensor.Slices(); ++S) { + const float4 t = args.src_tensor.Read(0, 0, S, B); + float4 diff = t - mean; + // Filter out out-of-bounds reads + if (S * 4 + 1 >= args.src_tensor.Channels()) diff.y = 0.0f; + if (S * 4 + 2 >= args.src_tensor.Channels()) diff.z = 0.0f; + if (S * 4 + 3 >= args.src_tensor.Channels()) diff.w = 0.0f; + float dotprod = dot(diff, diff); + sum_diff_sq += dotprod; + } + // Calculate 1/stddev (with the 'regulazing constant' as in tensor_utils.cc) + const float variance = sum_diff_sq / args.src_tensor.Channels(); + const float stddev_inv = rsqrt(variance + 1.0e-8f); + // Calculate (t-mean)/stddev for each element + for (int S = 0; S < args.src_tensor.Slices(); ++S) { + const float4 t = args.src_tensor.Read(0, 0, S, B); + FLT4 result = TO_FLT4((t - mean) * stddev_inv); + args.dst_tensor.Write(result, 0, 0, S, B); + } +})"; return c; } int3 MeanStdDevNormalization::GetGridSize() const { - const int grid_x = dst_[0]->Batch(); - const int grid_y = 1; + const int grid_x = 1; + const int grid_y = src_[0]->Batch(); const int grid_z = 1; return int3(grid_x, grid_y, grid_z); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h index 7c8627df88c..5724d72bcd1 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h @@ -40,7 +40,7 @@ class MeanStdDevNormalization : public GPUOperation { MeanStdDevNormalization& operator=(const MeanStdDevNormalization&) = delete; private: - std::string GetNormalizationCode(const OperationDef& op_def); + std::string GetNormalizationCode(); }; MeanStdDevNormalization CreateMeanStdDevNormalization( From 1c1a93b62bf775d85f8f07ed6868b44c8cb7c00b Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Fri, 31 Jul 2020 18:01:38 -0700 Subject: [PATCH 1921/2522] Support Sharding/Unsharding program for sharding weight update. PiperOrigin-RevId: 324328186 Change-Id: I6db88da3e13d325f202b6f6c025b96c623398f99 --- tensorflow/core/tpu/kernels/BUILD | 3 +- .../kernels/tpu_compilation_cache_external.cc | 38 ++++++++++++++++--- .../core/tpu/kernels/tpu_program_c_api.h | 14 +++++++ .../core/tpu/kernels/tpu_program_group.cc | 23 +++++++++++ .../core/tpu/kernels/tpu_program_group.h | 3 ++ .../tpu/kernels/tpu_program_group_interface.h | 5 ++- tensorflow/core/tpu/tpu_library_init_fns.inc | 2 + 7 files changed, 81 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index ed3227065ca..6f74123131f 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -371,6 +371,7 @@ cc_library( ":tpu_compile_op_support", ":tpu_mesh_state_interface", ":tpu_op_consts", + ":tpu_program_c_api_hdrs", ":tpu_program_group", ":tpu_util", ":trace_util_hdrs", @@ -380,10 +381,10 @@ cc_library( "//tensorflow/core:framework_internal", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:protos_all_cc", "//tensorflow/core/profiler/lib:traceme", "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", "@com_google_absl//absl/container:node_hash_map", + "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/types:span", diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc index a58eb21f81d..80010d70cd4 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc @@ -16,15 +16,18 @@ limitations under the License. #include +#include "absl/memory/memory.h" #include "absl/strings/str_cat.h" #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/platform/random.h" #include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/core/tpu/kernels/compiled_subgraph.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_metrics.h" #include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" #include "tensorflow/core/tpu/kernels/tpu_compile_op_support.h" +#include "tensorflow/core/tpu/kernels/tpu_program_c_api.h" #include "tensorflow/core/tpu/kernels/tpu_util.h" #include "tensorflow/core/tpu/kernels/trace_util.h" @@ -48,6 +51,19 @@ void PopulateEntry(const std::string& key, CompiledSubgraph* entry, entry->tpu_program_group = absl::make_unique(std::move(tpu_program_group)); entry->initialized = true; + + if (entry->initialization_status.ok()) { + // Compute the entries total size once all members are initialized. + entry->total_size = entry->ComputeTotalSize(); + } +} + +std::unique_ptr CreateAndInitializeCompiledSubgraph( + CompiledSubgraph* main_entry) { + auto entry = absl::make_unique(); + entry->main_entry = main_entry; + entry->tpu_program_group = absl::make_unique(); + return entry; } } // namespace @@ -97,17 +113,29 @@ CompiledSubgraph* TpuCompilationCacheExternal::InitializeEntry( std::pair(main_entry->uid, main_entry)); CHECK(uid_inserted.second); - if (initialization_status.ok()) { - // Compute the entries total size once all members are initialized. - main_entry->total_size = tpu_program_group.program_size(); + if (tpu_program_group.has_sharding_program()) { + main_entry->sharding_entry = + CreateAndInitializeCompiledSubgraph(main_entry); + TpuProgramGroup sharding_programs; + sharding_programs.Initialize( + tpu_program_group.tpu_programs(TpuProgramShardingType::kSharding)); + PopulateEntry(key, main_entry->sharding_entry.get(), + std::move(sharding_programs)); + + main_entry->unsharding_entry = + CreateAndInitializeCompiledSubgraph(main_entry); + TpuProgramGroup unsharding_programs; + unsharding_programs.Initialize( + tpu_program_group.tpu_programs(TpuProgramShardingType::kUnsharding)); + PopulateEntry(key, main_entry->unsharding_entry.get(), + std::move(unsharding_programs)); } - // TODO(henrytan): handle sharding/unsharding. PopulateEntry(key, main_entry, std::move(tpu_program_group)); for (int64 i = 0; i < main_entry->proto_key.size(); ++i) { auto entry_inserted = entries_by_proto_key_.insert( - std::pair>( + std::pair>( main_entry->proto_key[i], std::make_pair(main_entry, i))); CHECK(entry_inserted.second); } diff --git a/tensorflow/core/tpu/kernels/tpu_program_c_api.h b/tensorflow/core/tpu/kernels/tpu_program_c_api.h index c9951e4d5ce..41c7d47cf97 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_c_api.h +++ b/tensorflow/core/tpu/kernels/tpu_program_c_api.h @@ -21,6 +21,9 @@ limitations under the License. typedef struct XLA_TpuProgram XLA_TpuProgram; +// Enum for choosing sharding/unsharding program from a `XLA_TpuProgram` obj. +enum TpuProgramShardingType { kInvalid = 0, kMain, kSharding, kUnsharding }; + extern "C" { // Creates a new TPU program. @@ -64,6 +67,15 @@ TFTPU_CAPI_EXPORT void TpuProgram_GetHloMetadata( TFTPU_CAPI_EXPORT void TpuProgram_GetMayModifyVariables( const XLA_TpuProgram* tpu_program, bool* may_modify_variables); +// Check if TPU program has sharding. +TFTPU_CAPI_EXPORT bool TpuProgram_HasSharding( + const XLA_TpuProgram* tpu_program); + +// Gets TPU program by sharding type. Return value is valid only when the +// `status.status()` returns `OK`. +TFTPU_CAPI_EXPORT XLA_TpuProgram* TpuProgram_GetTpuProgram( + XLA_TpuProgram* tpu_program, TpuProgramShardingType type); + struct TfTpu_TpuProgramApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuProgram_New); TFTPU_ADD_FN_IN_STRUCT(TpuProgram_Free); @@ -76,6 +88,8 @@ struct TfTpu_TpuProgramApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetHostTransferInfo); TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetHloMetadata); TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetMayModifyVariables); + TFTPU_ADD_FN_IN_STRUCT(TpuProgram_HasSharding); + TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetTpuProgram); }; } // extern "C" diff --git a/tensorflow/core/tpu/kernels/tpu_program_group.cc b/tensorflow/core/tpu/kernels/tpu_program_group.cc index 2ee926f9d2b..39d1f38b104 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_group.cc +++ b/tensorflow/core/tpu/kernels/tpu_program_group.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/tpu/kernels/tpu_compile.pb.h" #include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" #include "tensorflow/core/tpu/kernels/tpu_compile_op_support.h" +#include "tensorflow/core/tpu/kernels/tpu_program_c_api.h" #include "tensorflow/core/tpu/tpu_api.h" #include "tensorflow/stream_executor/tpu/proto_helper.h" #include "tensorflow/stream_executor/tpu/status_helper.h" @@ -153,6 +154,15 @@ void TpuProgramGroup::Initialize( RefreshHloMetadatasPtrs(); } +bool TpuProgramGroup::has_sharding_program() const { + for (const XLA_TpuProgram* tpu_program : tpu_programs_) { + if (!TpuProgramApiFn()->TpuProgram_HasShardingFn(tpu_program)) { + return false; + } + } + return true; +} + size_t TpuProgramGroup::program_count() const { return tpu_programs_.size(); } int64_t TpuProgramGroup::program_size() const { @@ -347,5 +357,18 @@ Status TpuProgramGroup::CompileAndBuild( return status.status(); } +std::vector TpuProgramGroup::tpu_programs( + TpuProgramShardingType sharding_type) const { + std::vector tpu_programs; + tpu_programs.reserve(tpu_programs_.size()); + for (size_t i = 0; i < tpu_programs_.size(); ++i) { + if (TpuProgramApiFn()->TpuProgram_HasShardingFn(tpu_programs_[i])) { + tpu_programs.push_back(TpuProgramApiFn()->TpuProgram_GetTpuProgramFn( + tpu_programs_[i], sharding_type)); + CHECK_NE(tpu_programs[i], nullptr); + } + } + return tpu_programs; +} } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_program_group.h b/tensorflow/core/tpu/kernels/tpu_program_group.h index bceede5ac07..b76ef3d507a 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_group.h +++ b/tensorflow/core/tpu/kernels/tpu_program_group.h @@ -109,6 +109,8 @@ class TpuProgramGroup : public TpuProgramGroupInterface { TpuProgramGroup(TpuProgramGroup&& other); TpuProgramGroup& operator=(TpuProgramGroup&&) = delete; + bool has_sharding_program() const override; + size_t program_count() const override; int64_t program_size() const override; @@ -124,6 +126,7 @@ class TpuProgramGroup : public TpuProgramGroupInterface { void set_may_modify_variables(const std::vector& may_modify_variables); const std::vector& tpu_programs() const; + std::vector tpu_programs(TpuProgramShardingType type) const; const XLA_TpuProgram* tpu_program(int index) const; void set_tpu_programs(absl::Span tpu_programs); diff --git a/tensorflow/core/tpu/kernels/tpu_program_group_interface.h b/tensorflow/core/tpu/kernels/tpu_program_group_interface.h index 9a2bf8d9f02..4af94f8e1ad 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_group_interface.h +++ b/tensorflow/core/tpu/kernels/tpu_program_group_interface.h @@ -36,13 +36,16 @@ class TpuProgramGroupInterface { public: virtual ~TpuProgramGroupInterface() = default; + // Check if whether sharding/unsharding program exists. + virtual bool has_sharding_program() const = 0; + // Computes program count. virtual size_t program_count() const = 0; // Computes total program size. virtual int64_t program_size() const = 0; - // Unloads and destroys safely Tpu programs. + // Unloads and destroys safely TPU programs. virtual void UnloadAndDestroyPrograms() = 0; // Logs program memory summary. diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index 682cc8b1c13..6914a8cd102 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -64,6 +64,8 @@ tensorflow::Status SetTpuProgramStructFn(void* library_handle) { TFTPU_SET_FN(tpu_program_fn, TpuProgram_GetHostTransferInfo); TFTPU_SET_FN(tpu_program_fn, TpuProgram_GetHloMetadata); TFTPU_SET_FN(tpu_program_fn, TpuProgram_GetMayModifyVariables); + TFTPU_SET_FN(tpu_program_fn, TpuProgram_HasSharding); + TFTPU_SET_FN(tpu_program_fn, TpuProgram_GetTpuProgram); return tensorflow::Status::OK(); } From 6453d1cf2412769ce07a471fcf927e24709d00e7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 31 Jul 2020 18:10:40 -0700 Subject: [PATCH 1922/2522] Break up core/kernels/BUILD (part 1 of N): Move linear algebra kernels to subdirectory tensorflow/core/kernels/linalg with its own BUILD file. PiperOrigin-RevId: 324329298 Change-Id: Id7ca60219a3a7c83ef37f0e62a11a48cff2b7679 --- tensorflow/core/BUILD | 2 +- tensorflow/core/kernels/BUILD | 411 ++++++++++++++++-- .../banded_triangular_solve_op.cc | 2 +- .../banded_triangular_solve_op_test.cc | 2 +- .../kernels/{linalg => }/cholesky_grad.cc | 2 +- .../core/kernels/{linalg => }/cholesky_op.cc | 6 +- .../core/{util => kernels}/cuda_solvers.cc | 2 +- .../core/{util => kernels}/cuda_solvers.h | 6 +- .../core/{util => kernels}/cuda_sparse.cc | 4 +- .../core/{util => kernels}/cuda_sparse.h | 9 +- .../kernels/{linalg => }/determinant_op.cc | 6 +- .../kernels/{linalg => }/determinant_op.h | 6 +- .../{linalg => }/determinant_op_gpu.cu.cc | 4 +- .../kernels/{linalg => }/eig_op_complex128.cc | 2 +- .../kernels/{linalg => }/eig_op_complex64.cc | 2 +- .../kernels/{linalg => }/eig_op_double.cc | 2 +- .../core/kernels/{linalg => }/eig_op_float.cc | 2 +- .../core/kernels/{linalg => }/eig_op_impl.h | 8 +- .../core/kernels/{linalg => }/einsum_op.h | 4 +- .../kernels/{linalg => }/einsum_op_gpu.cu.cc | 2 +- .../kernels/{linalg => }/einsum_op_impl.h | 8 +- .../{linalg => }/einsum_op_impl_bfloat16.cc | 2 +- .../{linalg => }/einsum_op_impl_complex128.cc | 2 +- .../{linalg => }/einsum_op_impl_complex64.cc | 2 +- .../{linalg => }/einsum_op_impl_double.cc | 2 +- .../{linalg => }/einsum_op_impl_float.cc | 2 +- .../{linalg => }/einsum_op_impl_half.cc | 2 +- .../{linalg => }/einsum_op_impl_int32.cc | 2 +- .../{linalg => }/einsum_op_impl_int64.cc | 2 +- .../core/kernels/{linalg => }/eye_functor.h | 4 +- .../{linalg => }/eye_functor_gpu.cu.cc | 2 +- tensorflow/core/kernels/linalg/BUILD | 353 --------------- .../core/kernels/linalg/linalg_ops_common.h | 221 ---------- .../kernels/{linalg => }/linalg_ops_common.cc | 2 +- tensorflow/core/kernels/linalg_ops_common.h | 205 ++++++++- tensorflow/core/kernels/{linalg => }/lu_op.cc | 0 .../core/kernels/{linalg => }/lu_op_gpu.cu.cc | 2 +- .../{linalg => }/matrix_band_part_op.cc | 3 +- .../{linalg => }/matrix_band_part_op.h | 6 +- .../matrix_band_part_op_gpu.cu.cc | 2 +- .../kernels/{linalg => }/matrix_diag_op.cc | 2 +- .../kernels/{linalg => }/matrix_diag_op.h | 6 +- .../{linalg => }/matrix_diag_op_gpu.cu.cc | 2 +- .../{linalg => }/matrix_exponential_op.cc | 2 +- .../kernels/{linalg => }/matrix_inverse_op.cc | 6 +- .../{linalg => }/matrix_logarithm_op.cc | 2 +- .../{linalg => }/matrix_set_diag_op.cc | 4 +- .../kernels/{linalg => }/matrix_set_diag_op.h | 6 +- .../{linalg => }/matrix_set_diag_op_gpu.cu.cc | 2 +- .../matrix_solve_ls_op_complex128.cc | 2 +- .../matrix_solve_ls_op_complex64.cc | 2 +- .../{linalg => }/matrix_solve_ls_op_double.cc | 2 +- .../{linalg => }/matrix_solve_ls_op_float.cc | 2 +- .../{linalg => }/matrix_solve_ls_op_impl.h | 8 +- .../kernels/{linalg => }/matrix_solve_op.cc | 4 +- .../{linalg => }/matrix_square_root_op.cc | 2 +- .../matrix_triangular_solve_op_complex.cc | 2 +- .../matrix_triangular_solve_op_impl.h | 12 +- .../matrix_triangular_solve_op_real.cc | 2 +- .../matrix_triangular_solve_op_test.cc | 0 .../kernels/{linalg => }/qr_op_complex128.cc | 2 +- .../kernels/{linalg => }/qr_op_complex64.cc | 2 +- .../core/kernels/{linalg => }/qr_op_double.cc | 2 +- .../core/kernels/{linalg => }/qr_op_float.cc | 2 +- .../core/kernels/{linalg => }/qr_op_impl.h | 14 +- .../core/{util => kernels}/rocm_solvers.cc | 2 +- .../core/{util => kernels}/rocm_solvers.h | 6 +- .../core/{util => kernels}/rocm_sparse.cc | 4 +- .../core/kernels/segment_reduction_ops_impl.h | 4 +- .../{linalg => }/self_adjoint_eig_op.cc | 2 +- .../self_adjoint_eig_v2_op_complex128.cc | 2 +- .../self_adjoint_eig_v2_op_complex64.cc | 2 +- .../self_adjoint_eig_v2_op_double.cc | 2 +- .../self_adjoint_eig_v2_op_float.cc | 2 +- .../self_adjoint_eig_v2_op_gpu.cc | 2 +- .../self_adjoint_eig_v2_op_impl.h | 8 +- tensorflow/core/kernels/sparse/BUILD | 4 +- tensorflow/core/kernels/sparse/add_op.cc | 4 +- tensorflow/core/kernels/sparse/conj_op.cc | 4 +- .../sparse/csr_sparse_matrix_to_dense_op.cc | 4 +- .../csr_sparse_matrix_to_sparse_tensor_op.cc | 4 +- .../sparse/dense_to_csr_sparse_matrix_op.cc | 4 +- .../core/kernels/sparse/kernels_gpu.cu.cc | 2 +- tensorflow/core/kernels/sparse/mat_mul_op.cc | 4 +- tensorflow/core/kernels/sparse/mul_op.cc | 2 +- tensorflow/core/kernels/sparse/nnz_op.cc | 4 +- tensorflow/core/kernels/sparse/softmax_op.cc | 2 +- .../core/kernels/sparse/sparse_mat_mul_op.cc | 4 +- .../sparse/sparse_matrix_components_op.cc | 4 +- .../sparse_tensor_to_csr_sparse_matrix_op.cc | 4 +- .../core/kernels/sparse/transpose_op.cc | 2 +- .../kernels/{linalg => }/svd_op_complex128.cc | 2 +- .../kernels/{linalg => }/svd_op_complex64.cc | 2 +- .../kernels/{linalg => }/svd_op_double.cc | 2 +- .../core/kernels/{linalg => }/svd_op_float.cc | 2 +- .../kernels/{linalg => }/svd_op_gpu.cu.cc | 6 +- .../core/kernels/{linalg => }/svd_op_impl.h | 8 +- .../{linalg => }/tridiagonal_matmul_op.cc | 2 +- .../tridiagonal_matmul_op_gpu.cu.cc | 6 +- .../{linalg => }/tridiagonal_solve_op.cc | 2 +- .../tridiagonal_solve_op_gpu.cu.cc | 6 +- tensorflow/core/kernels/where_op.cc | 2 +- tensorflow/core/util/BUILD | 63 --- 103 files changed, 751 insertions(+), 840 deletions(-) rename tensorflow/core/kernels/{linalg => }/banded_triangular_solve_op.cc (99%) rename tensorflow/core/kernels/{linalg => }/banded_triangular_solve_op_test.cc (99%) rename tensorflow/core/kernels/{linalg => }/cholesky_grad.cc (99%) rename tensorflow/core/kernels/{linalg => }/cholesky_op.cc (98%) rename tensorflow/core/{util => kernels}/cuda_solvers.cc (99%) rename tensorflow/core/{util => kernels}/cuda_solvers.h (99%) rename tensorflow/core/{util => kernels}/cuda_sparse.cc (99%) rename tensorflow/core/{util => kernels}/cuda_sparse.h (99%) rename tensorflow/core/kernels/{linalg => }/determinant_op.cc (99%) rename tensorflow/core/kernels/{linalg => }/determinant_op.h (90%) rename tensorflow/core/kernels/{linalg => }/determinant_op_gpu.cu.cc (98%) rename tensorflow/core/kernels/{linalg => }/eig_op_complex128.cc (93%) rename tensorflow/core/kernels/{linalg => }/eig_op_complex64.cc (93%) rename tensorflow/core/kernels/{linalg => }/eig_op_double.cc (93%) rename tensorflow/core/kernels/{linalg => }/eig_op_float.cc (93%) rename tensorflow/core/kernels/{linalg => }/eig_op_impl.h (93%) rename tensorflow/core/kernels/{linalg => }/einsum_op.h (94%) rename tensorflow/core/kernels/{linalg => }/einsum_op_gpu.cu.cc (96%) rename tensorflow/core/kernels/{linalg => }/einsum_op_impl.h (99%) rename tensorflow/core/kernels/{linalg => }/einsum_op_impl_bfloat16.cc (94%) rename tensorflow/core/kernels/{linalg => }/einsum_op_impl_complex128.cc (95%) rename tensorflow/core/kernels/{linalg => }/einsum_op_impl_complex64.cc (95%) rename tensorflow/core/kernels/{linalg => }/einsum_op_impl_double.cc (95%) rename tensorflow/core/kernels/{linalg => }/einsum_op_impl_float.cc (95%) rename tensorflow/core/kernels/{linalg => }/einsum_op_impl_half.cc (95%) rename tensorflow/core/kernels/{linalg => }/einsum_op_impl_int32.cc (94%) rename tensorflow/core/kernels/{linalg => }/einsum_op_impl_int64.cc (94%) rename tensorflow/core/kernels/{linalg => }/eye_functor.h (90%) rename tensorflow/core/kernels/{linalg => }/eye_functor_gpu.cu.cc (97%) delete mode 100644 tensorflow/core/kernels/linalg/BUILD delete mode 100644 tensorflow/core/kernels/linalg/linalg_ops_common.h rename tensorflow/core/kernels/{linalg => }/linalg_ops_common.cc (99%) rename tensorflow/core/kernels/{linalg => }/lu_op.cc (100%) rename tensorflow/core/kernels/{linalg => }/lu_op_gpu.cu.cc (99%) rename tensorflow/core/kernels/{linalg => }/matrix_band_part_op.cc (99%) rename tensorflow/core/kernels/{linalg => }/matrix_band_part_op.h (86%) rename tensorflow/core/kernels/{linalg => }/matrix_band_part_op_gpu.cu.cc (97%) rename tensorflow/core/kernels/{linalg => }/matrix_diag_op.cc (99%) rename tensorflow/core/kernels/{linalg => }/matrix_diag_op.h (94%) rename tensorflow/core/kernels/{linalg => }/matrix_diag_op_gpu.cu.cc (99%) rename tensorflow/core/kernels/{linalg => }/matrix_exponential_op.cc (97%) rename tensorflow/core/kernels/{linalg => }/matrix_inverse_op.cc (98%) rename tensorflow/core/kernels/{linalg => }/matrix_logarithm_op.cc (97%) rename tensorflow/core/kernels/{linalg => }/matrix_set_diag_op.cc (99%) rename tensorflow/core/kernels/{linalg => }/matrix_set_diag_op.h (89%) rename tensorflow/core/kernels/{linalg => }/matrix_set_diag_op_gpu.cu.cc (99%) rename tensorflow/core/kernels/{linalg => }/matrix_solve_ls_op_complex128.cc (92%) rename tensorflow/core/kernels/{linalg => }/matrix_solve_ls_op_complex64.cc (92%) rename tensorflow/core/kernels/{linalg => }/matrix_solve_ls_op_double.cc (92%) rename tensorflow/core/kernels/{linalg => }/matrix_solve_ls_op_float.cc (92%) rename tensorflow/core/kernels/{linalg => }/matrix_solve_ls_op_impl.h (96%) rename tensorflow/core/kernels/{linalg => }/matrix_solve_op.cc (99%) rename tensorflow/core/kernels/{linalg => }/matrix_square_root_op.cc (97%) rename tensorflow/core/kernels/{linalg => }/matrix_triangular_solve_op_complex.cc (92%) rename tensorflow/core/kernels/{linalg => }/matrix_triangular_solve_op_impl.h (97%) rename tensorflow/core/kernels/{linalg => }/matrix_triangular_solve_op_real.cc (93%) rename tensorflow/core/kernels/{linalg => }/matrix_triangular_solve_op_test.cc (100%) rename tensorflow/core/kernels/{linalg => }/qr_op_complex128.cc (96%) rename tensorflow/core/kernels/{linalg => }/qr_op_complex64.cc (95%) rename tensorflow/core/kernels/{linalg => }/qr_op_double.cc (96%) rename tensorflow/core/kernels/{linalg => }/qr_op_float.cc (96%) rename tensorflow/core/kernels/{linalg => }/qr_op_impl.h (96%) rename tensorflow/core/{util => kernels}/rocm_solvers.cc (99%) rename tensorflow/core/{util => kernels}/rocm_solvers.h (96%) rename tensorflow/core/{util => kernels}/rocm_sparse.cc (99%) rename tensorflow/core/kernels/{linalg => }/self_adjoint_eig_op.cc (98%) rename tensorflow/core/kernels/{linalg => }/self_adjoint_eig_v2_op_complex128.cc (93%) rename tensorflow/core/kernels/{linalg => }/self_adjoint_eig_v2_op_complex64.cc (93%) rename tensorflow/core/kernels/{linalg => }/self_adjoint_eig_v2_op_double.cc (92%) rename tensorflow/core/kernels/{linalg => }/self_adjoint_eig_v2_op_float.cc (92%) rename tensorflow/core/kernels/{linalg => }/self_adjoint_eig_v2_op_gpu.cc (99%) rename tensorflow/core/kernels/{linalg => }/self_adjoint_eig_v2_op_impl.h (91%) rename tensorflow/core/kernels/{linalg => }/svd_op_complex128.cc (93%) rename tensorflow/core/kernels/{linalg => }/svd_op_complex64.cc (93%) rename tensorflow/core/kernels/{linalg => }/svd_op_double.cc (93%) rename tensorflow/core/kernels/{linalg => }/svd_op_float.cc (93%) rename tensorflow/core/kernels/{linalg => }/svd_op_gpu.cu.cc (99%) rename tensorflow/core/kernels/{linalg => }/svd_op_impl.h (95%) rename tensorflow/core/kernels/{linalg => }/tridiagonal_matmul_op.cc (98%) rename tensorflow/core/kernels/{linalg => }/tridiagonal_matmul_op_gpu.cu.cc (96%) rename tensorflow/core/kernels/{linalg => }/tridiagonal_solve_op.cc (99%) rename tensorflow/core/kernels/{linalg => }/tridiagonal_solve_op_gpu.cu.cc (99%) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index bf382105369..1fdf316a036 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -996,7 +996,7 @@ cc_library( "//tensorflow/core/kernels:histogram_op", "//tensorflow/core/kernels:image", "//tensorflow/core/kernels:io", - "//tensorflow/core/kernels/linalg:linalg", + "//tensorflow/core/kernels:linalg", "//tensorflow/core/kernels:lookup", "//tensorflow/core/kernels:logging", "//tensorflow/core/kernels:manip", diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index a9884eea52b..9f84ba5d135 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -1039,6 +1039,9 @@ cc_library( ":immutable_constant_op", ":inplace_ops", ":listdiff_op", + ":matrix_band_part_op", + ":matrix_diag_op", + ":matrix_set_diag_op", ":mirror_pad_op", ":one_hot_op", ":pack_op", @@ -1171,6 +1174,26 @@ tf_kernel_library( deps = ARRAY_DEPS, ) +tf_kernel_library( + name = "matrix_band_part_op", + prefix = "matrix_band_part_op", + deps = if_cuda([ + ":cuda_solvers", + ]) + ARRAY_DEPS, +) + +tf_kernel_library( + name = "matrix_diag_op", + prefix = "matrix_diag_op", + deps = ARRAY_DEPS, +) + +tf_kernel_library( + name = "matrix_set_diag_op", + prefix = "matrix_set_diag_op", + deps = ARRAY_DEPS + [":matrix_diag_op"], +) + tf_kernel_library( name = "mirror_pad_op", prefix = "mirror_pad_op", @@ -1382,7 +1405,7 @@ tf_kernel_library( "where_op_gpu_impl_8.cu.cc", ], deps = if_cuda_or_rocm([ - "//tensorflow/core/util:cuda_solvers", + ":cuda_solvers", ]) + [":gpu_prim_hdrs"] + ARRAY_DEPS, ) @@ -2762,6 +2785,21 @@ tf_cuda_cc_tests( ], ) +tf_kernel_library( + name = "eye_functor", + hdrs = ["eye_functor.h"], + gpu_srcs = [ + "eye_functor_gpu.cu.cc", + "eye_functor.h", + ], + visibility = [":friends"], + deps = [ + "//tensorflow/core:framework", + "//third_party/eigen3", + ], + alwayslink = 0, +) + cc_library( name = "fifo_queue", srcs = ["fifo_queue.cc"], @@ -3520,6 +3558,289 @@ tf_cc_tests( ], ) +cc_library( + name = "linalg", + deps = [ + ":banded_triangular_solve_op", + ":cholesky_grad", + ":cholesky_op", + ":determinant_op", + ":eig_op", + ":einsum_op", + ":lu_op", + ":matrix_exponential_op", + ":matrix_inverse_op", + ":matrix_logarithm_op", + ":matrix_solve_ls_op", + ":matrix_solve_op", + ":matrix_square_root_op", + ":matrix_triangular_solve_op", + ":qr_op", + ":self_adjoint_eig_op", + ":self_adjoint_eig_v2_op", + ":svd_op", + ":tridiagonal_matmul_op", + ":tridiagonal_solve_op", + ], +) + +tf_kernel_library( + name = "cuda_solvers", + srcs = ["cuda_solvers.cc"], + hdrs = ["cuda_solvers.h"], + # @local_config_cuda//cuda:cusolver_static, //third_party/eigen3:blas, + # and //third_party/libf2c all contain various parts of BLAS, LAPACK, + # and f2c helper functions in global namespace. Tell the compiler to + # allow multiple definitions when linking this. + linkopts = select({ + "//tensorflow:macos": [], + "//tensorflow:windows": [], + "//conditions:default": ["-Wl,-z,muldefs"], + }), + visibility = [":friends"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/platform/default/build_config:cublas_plugin", + "//tensorflow/stream_executor/cuda:cublas_lib", + "//tensorflow/stream_executor/cuda:cusolver_lib", + ], +) + +tf_kernel_library( + name = "rocm_solvers", + srcs = ["rocm_solvers.cc"], + hdrs = ["rocm_solvers.h"], + visibility = [":friends"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/stream_executor/lib", + "//tensorflow/stream_executor/platform:dso_loader", + "//tensorflow/stream_executor/rocm:rocblas_plugin", + "//tensorflow/stream_executor/rocm:rocm_gpu_executor", + ] + if_rocm([ + "@local_config_rocm//rocm:rocprim", + ]), +) + +tf_kernel_library( + name = "cuda_sparse", + srcs = if_cuda(["cuda_sparse.cc"]) + if_rocm(["rocm_sparse.cc"]), + hdrs = ["cuda_sparse.h"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/kernels:cuda_solvers", + ] + if_cuda([ + "//tensorflow/stream_executor/cuda:cusparse_lib", + "@cub_archive//:cub", + ]) + if_rocm([ + "@local_config_rocm//rocm:hipsparse", + ]), +) + +LINALG_DEPS = [ + ":linalg_ops_common", + "//third_party/eigen3", + "//tensorflow/core:framework", + "//tensorflow/core:lib", +] + if_cuda([ + ":cuda_solvers", + ":transpose_functor", +]) + if_rocm([ + ":rocm_solvers", +]) + +tf_kernel_library( + name = "cholesky_op", + prefix = "cholesky_op", + deps = if_cuda([ + ":matrix_band_part_op", + ]) + LINALG_DEPS, +) + +tf_kernel_library( + name = "cholesky_grad", + prefix = "cholesky_grad", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "determinant_op", + prefix = "determinant_op", + deps = if_cuda([ + ":fill_functor", + ]) + LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_exponential_op", + prefix = "matrix_exponential_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_logarithm_op", + prefix = "matrix_logarithm_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "self_adjoint_eig_op", + prefix = "self_adjoint_eig_op", + deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"], +) + +tf_kernel_library( + name = "self_adjoint_eig_v2_op", + prefix = "self_adjoint_eig_v2_op", + deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"] + if_cuda([ + ":cast_op", + ":cwise_op", + ]), +) + +tf_kernel_library( + name = "eig_op", + prefix = "eig_op", + deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"] + if_cuda([ + ":cast_op", + ":cwise_op", + ]), +) + +tf_kernel_library( + name = "matrix_inverse_op", + prefix = "matrix_inverse_op", + deps = LINALG_DEPS + if_cuda([":eye_functor"]), +) + +tf_kernel_library( + name = "matrix_solve_ls_op", + prefix = "matrix_solve_ls_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_solve_op", + prefix = "matrix_solve_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_square_root_op", + prefix = "matrix_square_root_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "banded_triangular_solve_op", + prefix = "banded_triangular_solve_op", + deps = LINALG_DEPS + [":fill_functor"], +) + +tf_kernel_library( + name = "matrix_triangular_solve_op", + hdrs = ["matrix_triangular_solve_op_impl.h"], + prefix = "matrix_triangular_solve_op", + deps = [ + ":linalg_ops_common", + "//third_party/eigen3", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ":fill_functor", + "//tensorflow/core:stream_executor", + ] + if_cuda([ + "//tensorflow/core/platform/default/build_config:cublas_plugin", + ":cuda_solvers", + ]) + if_rocm([ + "@local_config_rocm//rocm:rocprim", + ":rocm_solvers", + ]) + if_cuda_or_rocm([ + ":transpose_functor", + ]), +) + +tf_kernel_library( + name = "tridiagonal_matmul_op", + srcs = ["tridiagonal_matmul_op.cc"], + gpu_srcs = ["tridiagonal_matmul_op_gpu.cu.cc"], + deps = LINALG_DEPS + if_cuda([ + ":cuda_sparse", + ]), +) + +tf_kernel_library( + name = "tridiagonal_solve_op", + srcs = ["tridiagonal_solve_op.cc"], + gpu_srcs = ["tridiagonal_solve_op_gpu.cu.cc"], + deps = LINALG_DEPS + if_cuda([ + ":cuda_sparse", + ]), +) + +tf_kernel_library( + name = "qr_op", + prefix = "qr_op", + deps = LINALG_DEPS + if_cuda([ + ":cwise_op", + ":eye_functor", + ":matrix_band_part_op", + ]), +) + +tf_kernel_library( + name = "svd_op", + prefix = "svd_op", + deps = LINALG_DEPS + if_cuda([ + ":eye_functor", + ]), +) + +tf_kernel_library( + name = "lu_op", + prefix = "lu_op", + deps = if_cuda([ + ":cuda_solvers", + ":transpose_functor", + ]) + [ + "//third_party/eigen3", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ], +) + +tf_kernel_library( + name = "einsum_op", + prefix = "einsum_op", + deps = [ + ":batch_matmul_op", + ":fill_functor", + ":reduction_ops", + ":transpose_functor", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/profiler/lib:traceme", + "//third_party/eigen3", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/strings", + ], +) + +cc_library( + name = "linalg_ops_common", + srcs = ["linalg_ops_common.cc"], + hdrs = ["linalg_ops_common.h"], + visibility = ["//visibility:private"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//third_party/eigen3", + ], +) + cc_library( name = "logging", deps = [ @@ -3885,7 +4206,7 @@ tf_kernel_library( name = "segment_reduction_ops", prefix = "segment_reduction_ops", deps = MATH_DEPS + if_cuda_or_rocm([ - "//tensorflow/core/util:cuda_solvers", + ":cuda_solvers", ]), ) @@ -4082,6 +4403,45 @@ tf_cuda_cc_test( ], ) +tf_cuda_cc_test( + name = "banded_triangular_solve_op_test", + size = "small", + srcs = ["banded_triangular_solve_op_test.cc"], + deps = [ + ":banded_triangular_solve_op", + ":matrix_set_diag_op", + ":matrix_triangular_solve_op", + ":ops_testutil", + ":ops_util", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + +tf_cuda_cc_test( + name = "matrix_triangular_solve_op_test", + size = "small", + srcs = ["matrix_triangular_solve_op_test.cc"], + deps = [ + ":broadcast_to_op", + ":matrix_triangular_solve_op", + ":ops_testutil", + ":ops_util", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + tf_cuda_cc_test( name = "scan_ops_test", size = "small", @@ -6310,7 +6670,10 @@ filegroup( "lookup_table_init_op.h", "lookup_table_op.h", "lookup_util.h", + "linalg_ops_common.h", "list_kernels.h", + "matrix_diag_op.h", + "matrix_set_diag_op.h", "maxpooling_op.h", "mfcc.h", "mfcc_dct.h", @@ -6358,9 +6721,6 @@ filegroup( "xent_op.h", ] + [ "//tensorflow/core/kernels/boosted_trees/quantiles:weighted_quantiles_hdrs", - "//tensorflow/core/kernels/linalg:linalg_ops_common.h", - "//tensorflow/core/kernels/linalg:matrix_diag_op.h", - "//tensorflow/core/kernels/linalg:matrix_set_diag_op.h", ], ) @@ -6461,6 +6821,16 @@ filegroup( "encode_wav_op.cc", "eigen_contraction_kernel.cc", "eigen_contraction_kernel.h", + "einsum_op_impl_half.cc", + "einsum_op_impl_bfloat16.cc", + "einsum_op_impl_int32.cc", + "einsum_op_impl_int64.cc", + "einsum_op_impl_float.cc", + "einsum_op_impl_double.cc", + "einsum_op_impl_complex64.cc", + "einsum_op_impl_complex128.cc", + "einsum_op_impl.h", + "einsum_op.h", "fake_quant_ops.cc", "fifo_queue.cc", "fifo_queue_op.cc", @@ -6472,17 +6842,6 @@ filegroup( "population_count_op.h", "winograd_transform.h", ":android_extended_ops_headers", - ] + [ - "//tensorflow/core/kernels/linalg:einsum_op_impl_half.cc", - "//tensorflow/core/kernels/linalg:einsum_op_impl_bfloat16.cc", - "//tensorflow/core/kernels/linalg:einsum_op_impl_int32.cc", - "//tensorflow/core/kernels/linalg:einsum_op_impl_int64.cc", - "//tensorflow/core/kernels/linalg:einsum_op_impl_float.cc", - "//tensorflow/core/kernels/linalg:einsum_op_impl_double.cc", - "//tensorflow/core/kernels/linalg:einsum_op_impl_complex64.cc", - "//tensorflow/core/kernels/linalg:einsum_op_impl_complex128.cc", - "//tensorflow/core/kernels/linalg:einsum_op_impl.h", - "//tensorflow/core/kernels/linalg:einsum_op.h", ] + select({ ":xsmm_convolutions": [ "xsmm_conv2d.h", @@ -6513,6 +6872,7 @@ filegroup( "in_topk_op.cc", "in_topk_op.h", "initializable_lookup_table.cc", + "linalg_ops_common.cc", "list_kernels.cc", "logging_ops.cc", "logging_ops.h", @@ -6520,6 +6880,9 @@ filegroup( "lookup_table_op.cc", "lookup_util.cc", "lrn_op.cc", + "matrix_diag_op.cc", + "matrix_inverse_op.cc", + "matrix_set_diag_op.cc", "maxpooling_op.cc", "mfcc.cc", "mfcc_dct.cc", @@ -6641,10 +7004,6 @@ filegroup( ":android_extended_ops_headers", ] + [ "//tensorflow/core/kernels/boosted_trees:quantile_ops.cc", - "//tensorflow/core/kernels/linalg:linalg_ops_common.cc", - "//tensorflow/core/kernels/linalg:matrix_diag_op.cc", - "//tensorflow/core/kernels/linalg:matrix_inverse_op.cc", - "//tensorflow/core/kernels/linalg:matrix_set_diag_op.cc", ], ) @@ -8466,15 +8825,3 @@ tf_kernel_library( "@sobol_data", ], ) - -# ---- temporary forwarding declaration for libraries in linalg -# TODO(b/160344057): Remove after updating dependencies. -tf_kernel_library( - name = "matrix_inverse_op", - deps = ["//tensorflow/core/kernels/linalg:matrix_inverse_op"], -) - -tf_kernel_library( - name = "einsum_op", - deps = ["//tensorflow/core/kernels/linalg:einsum_op"], -) diff --git a/tensorflow/core/kernels/linalg/banded_triangular_solve_op.cc b/tensorflow/core/kernels/banded_triangular_solve_op.cc similarity index 99% rename from tensorflow/core/kernels/linalg/banded_triangular_solve_op.cc rename to tensorflow/core/kernels/banded_triangular_solve_op.cc index 6758dcf5b8b..d01a015502a 100644 --- a/tensorflow/core/kernels/linalg/banded_triangular_solve_op.cc +++ b/tensorflow/core/kernels/banded_triangular_solve_op.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/kernels/fill_functor.h" -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/kernels/linalg/banded_triangular_solve_op_test.cc b/tensorflow/core/kernels/banded_triangular_solve_op_test.cc similarity index 99% rename from tensorflow/core/kernels/linalg/banded_triangular_solve_op_test.cc rename to tensorflow/core/kernels/banded_triangular_solve_op_test.cc index 7c20b88845f..37e904a3e0e 100644 --- a/tensorflow/core/kernels/linalg/banded_triangular_solve_op_test.cc +++ b/tensorflow/core/kernels/banded_triangular_solve_op_test.cc @@ -21,7 +21,7 @@ limitations under the License. #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/node_builder.h" #include "tensorflow/core/graph/testlib.h" -#include "tensorflow/core/kernels/linalg/matrix_set_diag_op.h" +#include "tensorflow/core/kernels/matrix_set_diag_op.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" diff --git a/tensorflow/core/kernels/linalg/cholesky_grad.cc b/tensorflow/core/kernels/cholesky_grad.cc similarity index 99% rename from tensorflow/core/kernels/linalg/cholesky_grad.cc rename to tensorflow/core/kernels/cholesky_grad.cc index 31a5570cddf..eac66e580dd 100644 --- a/tensorflow/core/kernels/linalg/cholesky_grad.cc +++ b/tensorflow/core/kernels/cholesky_grad.cc @@ -18,7 +18,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/cholesky_op.cc b/tensorflow/core/kernels/cholesky_op.cc similarity index 98% rename from tensorflow/core/kernels/linalg/cholesky_op.cc rename to tensorflow/core/kernels/cholesky_op.cc index eae09124b36..ff8fd08f228 100644 --- a/tensorflow/core/kernels/linalg/cholesky_op.cc +++ b/tensorflow/core/kernels/cholesky_op.cc @@ -25,16 +25,16 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" #if GOOGLE_CUDA #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/linalg/matrix_band_part_op.h" +#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/kernels/matrix_band_part_op.h" #include "tensorflow/core/platform/stream_executor.h" -#include "tensorflow/core/util/cuda_solvers.h" #endif namespace tensorflow { diff --git a/tensorflow/core/util/cuda_solvers.cc b/tensorflow/core/kernels/cuda_solvers.cc similarity index 99% rename from tensorflow/core/util/cuda_solvers.cc rename to tensorflow/core/kernels/cuda_solvers.cc index 3e4d2a05ac6..f41ce2a5d27 100644 --- a/tensorflow/core/util/cuda_solvers.cc +++ b/tensorflow/core/kernels/cuda_solvers.cc @@ -14,7 +14,7 @@ ============================================================================== */ #ifdef GOOGLE_CUDA -#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/kernels/cuda_solvers.h" #include #include diff --git a/tensorflow/core/util/cuda_solvers.h b/tensorflow/core/kernels/cuda_solvers.h similarity index 99% rename from tensorflow/core/util/cuda_solvers.h rename to tensorflow/core/kernels/cuda_solvers.h index 46bd7b42dc6..eb1d5c8a200 100644 --- a/tensorflow/core/util/cuda_solvers.h +++ b/tensorflow/core/kernels/cuda_solvers.h @@ -14,8 +14,8 @@ limitations under the License. ============================================================================== */ -#ifndef TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SOLVERS_H_ -#define TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SOLVERS_H_ +#ifndef TENSORFLOW_CORE_KERNELS_CUDA_SOLVERS_H_ +#define TENSORFLOW_CORE_KERNELS_CUDA_SOLVERS_H_ // This header declares the class CudaSolver, which contains wrappers of linear // algebra solvers in the cuBlas and cuSolverDN libraries for use in TensorFlow @@ -489,4 +489,4 @@ inline DeviceLapackInfo CudaSolver::GetDeviceLapackInfo( #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#endif // TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SOLVERS_H_ +#endif // TENSORFLOW_CORE_KERNELS_CUDA_SOLVERS_H_ diff --git a/tensorflow/core/util/cuda_sparse.cc b/tensorflow/core/kernels/cuda_sparse.cc similarity index 99% rename from tensorflow/core/util/cuda_sparse.cc rename to tensorflow/core/kernels/cuda_sparse.cc index 47e018560e1..141aae61571 100644 --- a/tensorflow/core/util/cuda_sparse.cc +++ b/tensorflow/core/kernels/cuda_sparse.cc @@ -15,7 +15,7 @@ limitations under the License. #ifdef GOOGLE_CUDA -#include "tensorflow/core/util/cuda_sparse.h" +#include "tensorflow/core/kernels/cuda_sparse.h" #include #include @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/lib/core/blocking_counter.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/stringpiece.h" @@ -37,7 +38,6 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" -#include "tensorflow/core/util/cuda_solvers.h" // TODO(rmlarsen,penporn): Investigate using newer kernels in CUDA 10.1+. diff --git a/tensorflow/core/util/cuda_sparse.h b/tensorflow/core/kernels/cuda_sparse.h similarity index 99% rename from tensorflow/core/util/cuda_sparse.h rename to tensorflow/core/kernels/cuda_sparse.h index 813e87610dd..978bc9005ed 100644 --- a/tensorflow/core/util/cuda_sparse.h +++ b/tensorflow/core/kernels/cuda_sparse.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SPARSE_H_ -#define TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SPARSE_H_ +#ifndef TENSORFLOW_CORE_KERNELS_CUDA_SPARSE_H_ +#define TENSORFLOW_CORE_KERNELS_CUDA_SPARSE_H_ // This header declares the class GpuSparse, which contains wrappers of // cuSparse libraries for use in TensorFlow kernels. @@ -75,7 +75,8 @@ using gpuStream_t = hipStream_t; namespace tensorflow { -inline string ConvertGPUSparseErrorToString(const gpusparseStatus_t status) { +inline std::string ConvertGPUSparseErrorToString( + const gpusparseStatus_t status) { switch (status) { #define STRINGIZE(q) #q #define RETURN_IF_STATUS(err) \ @@ -584,4 +585,4 @@ class GpuSparseCsrSortingConversionInfo { #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#endif // TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SPARSE_H_ +#endif // TENSORFLOW_CORE_KERNELS_CUDA_SPARSE_H_ diff --git a/tensorflow/core/kernels/linalg/determinant_op.cc b/tensorflow/core/kernels/determinant_op.cc similarity index 99% rename from tensorflow/core/kernels/linalg/determinant_op.cc rename to tensorflow/core/kernels/determinant_op.cc index 8f0b0b618cf..b06f42384eb 100644 --- a/tensorflow/core/kernels/linalg/determinant_op.cc +++ b/tensorflow/core/kernels/determinant_op.cc @@ -20,7 +20,7 @@ limitations under the License. #if GOOGLE_CUDA #define EIGEN_USE_GPU #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/linalg/determinant_op.h" +#include "tensorflow/core/kernels/determinant_op.h" #endif #include "third_party/eigen3/Eigen/LU" @@ -28,14 +28,14 @@ limitations under the License. #include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" #if GOOGLE_CUDA +#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/kernels/fill_functor.h" -#include "tensorflow/core/util/cuda_solvers.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/determinant_op.h b/tensorflow/core/kernels/determinant_op.h similarity index 90% rename from tensorflow/core/kernels/linalg/determinant_op.h rename to tensorflow/core/kernels/determinant_op.h index 6ace1bef44b..eefdfe0ae40 100644 --- a/tensorflow/core/kernels/linalg/determinant_op.h +++ b/tensorflow/core/kernels/determinant_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_LINALG_DETERMINANT_OP_H_ -#define TENSORFLOW_CORE_KERNELS_LINALG_DETERMINANT_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_DETERMINANT_OP_H_ +#define TENSORFLOW_CORE_KERNELS_DETERMINANT_OP_H_ #include "tensorflow/core/framework/tensor_types.h" @@ -44,4 +44,4 @@ struct LogDeterminantFromPivotedLUFunctor { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_LINALG_DETERMINANT_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_DETERMINANT_OP_H_ diff --git a/tensorflow/core/kernels/linalg/determinant_op_gpu.cu.cc b/tensorflow/core/kernels/determinant_op_gpu.cu.cc similarity index 98% rename from tensorflow/core/kernels/linalg/determinant_op_gpu.cu.cc rename to tensorflow/core/kernels/determinant_op_gpu.cu.cc index f6ab327bce0..9aa64b3a7da 100644 --- a/tensorflow/core/kernels/linalg/determinant_op_gpu.cu.cc +++ b/tensorflow/core/kernels/determinant_op_gpu.cu.cc @@ -21,8 +21,8 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/kernels/linalg/determinant_op.h" -#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/kernels/determinant_op.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/eig_op_complex128.cc b/tensorflow/core/kernels/eig_op_complex128.cc similarity index 93% rename from tensorflow/core/kernels/linalg/eig_op_complex128.cc rename to tensorflow/core/kernels/eig_op_complex128.cc index bd4b6fe36d0..988cc2f98d9 100644 --- a/tensorflow/core/kernels/linalg/eig_op_complex128.cc +++ b/tensorflow/core/kernels/eig_op_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/eig_op_impl.h" +#include "tensorflow/core/kernels/eig_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/eig_op_complex64.cc b/tensorflow/core/kernels/eig_op_complex64.cc similarity index 93% rename from tensorflow/core/kernels/linalg/eig_op_complex64.cc rename to tensorflow/core/kernels/eig_op_complex64.cc index b5b4a26ee85..6a3f7928715 100644 --- a/tensorflow/core/kernels/linalg/eig_op_complex64.cc +++ b/tensorflow/core/kernels/eig_op_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/eig_op_impl.h" +#include "tensorflow/core/kernels/eig_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/eig_op_double.cc b/tensorflow/core/kernels/eig_op_double.cc similarity index 93% rename from tensorflow/core/kernels/linalg/eig_op_double.cc rename to tensorflow/core/kernels/eig_op_double.cc index c360637c84a..2cd931cc135 100644 --- a/tensorflow/core/kernels/linalg/eig_op_double.cc +++ b/tensorflow/core/kernels/eig_op_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/eig_op_impl.h" +#include "tensorflow/core/kernels/eig_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/eig_op_float.cc b/tensorflow/core/kernels/eig_op_float.cc similarity index 93% rename from tensorflow/core/kernels/linalg/eig_op_float.cc rename to tensorflow/core/kernels/eig_op_float.cc index 18f576fcc19..a06f76e935f 100644 --- a/tensorflow/core/kernels/linalg/eig_op_float.cc +++ b/tensorflow/core/kernels/eig_op_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/eig_op_impl.h" +#include "tensorflow/core/kernels/eig_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/eig_op_impl.h b/tensorflow/core/kernels/eig_op_impl.h similarity index 93% rename from tensorflow/core/kernels/linalg/eig_op_impl.h rename to tensorflow/core/kernels/eig_op_impl.h index a7aff7c2a5d..4ebb6bde08b 100644 --- a/tensorflow/core/kernels/linalg/eig_op_impl.h +++ b/tensorflow/core/kernels/eig_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_LINALG_EIG_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_LINALG_EIG_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_EIG_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_EIG_OP_IMPL_H_ // See docs in ../ops/linalg_ops.cc. @@ -23,7 +23,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/logging.h" @@ -95,4 +95,4 @@ class EigOp : public LinearAlgebraOp { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_LINALG_EIG_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_EIG_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/linalg/einsum_op.h b/tensorflow/core/kernels/einsum_op.h similarity index 94% rename from tensorflow/core/kernels/linalg/einsum_op.h rename to tensorflow/core/kernels/einsum_op.h index f22f33c600a..31d1109004c 100644 --- a/tensorflow/core/kernels/linalg/einsum_op.h +++ b/tensorflow/core/kernels/einsum_op.h @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_H_ -#define TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_EINSUM_OP_H_ +#define TENSORFLOW_CORE_KERNELS_EINSUM_OP_H_ #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor_types.h" diff --git a/tensorflow/core/kernels/linalg/einsum_op_gpu.cu.cc b/tensorflow/core/kernels/einsum_op_gpu.cu.cc similarity index 96% rename from tensorflow/core/kernels/linalg/einsum_op_gpu.cu.cc rename to tensorflow/core/kernels/einsum_op_gpu.cu.cc index 5461e43e0ab..2935b7fd02a 100644 --- a/tensorflow/core/kernels/linalg/einsum_op_gpu.cu.cc +++ b/tensorflow/core/kernels/einsum_op_gpu.cu.cc @@ -17,7 +17,7 @@ limitations under the License. #define EIGEN_USE_GPU #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/linalg/einsum_op.h" +#include "tensorflow/core/kernels/einsum_op.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/einsum_op_impl.h b/tensorflow/core/kernels/einsum_op_impl.h similarity index 99% rename from tensorflow/core/kernels/linalg/einsum_op_impl.h rename to tensorflow/core/kernels/einsum_op_impl.h index b9b2d1f0eae..312738442b8 100644 --- a/tensorflow/core/kernels/linalg/einsum_op_impl.h +++ b/tensorflow/core/kernels/einsum_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_EINSUM_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_EINSUM_OP_IMPL_H_ #define EIGEN_USE_THREADS #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -31,8 +31,8 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/kernels/batch_matmul_op_impl.h" +#include "tensorflow/core/kernels/einsum_op.h" #include "tensorflow/core/kernels/fill_functor.h" -#include "tensorflow/core/kernels/linalg/einsum_op.h" #include "tensorflow/core/kernels/reduction_ops_common.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/lib/core/errors.h" @@ -780,4 +780,4 @@ DECLARE_GPU_SPECS(complex128); } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_EINSUM_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/linalg/einsum_op_impl_bfloat16.cc b/tensorflow/core/kernels/einsum_op_impl_bfloat16.cc similarity index 94% rename from tensorflow/core/kernels/linalg/einsum_op_impl_bfloat16.cc rename to tensorflow/core/kernels/einsum_op_impl_bfloat16.cc index e2e13052df5..44508f86a5e 100644 --- a/tensorflow/core/kernels/linalg/einsum_op_impl_bfloat16.cc +++ b/tensorflow/core/kernels/einsum_op_impl_bfloat16.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" +#include "tensorflow/core/kernels/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/einsum_op_impl_complex128.cc b/tensorflow/core/kernels/einsum_op_impl_complex128.cc similarity index 95% rename from tensorflow/core/kernels/linalg/einsum_op_impl_complex128.cc rename to tensorflow/core/kernels/einsum_op_impl_complex128.cc index ff78d460acf..8473cbf545d 100644 --- a/tensorflow/core/kernels/linalg/einsum_op_impl_complex128.cc +++ b/tensorflow/core/kernels/einsum_op_impl_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" +#include "tensorflow/core/kernels/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/einsum_op_impl_complex64.cc b/tensorflow/core/kernels/einsum_op_impl_complex64.cc similarity index 95% rename from tensorflow/core/kernels/linalg/einsum_op_impl_complex64.cc rename to tensorflow/core/kernels/einsum_op_impl_complex64.cc index cd3788846b2..bd506a04f5f 100644 --- a/tensorflow/core/kernels/linalg/einsum_op_impl_complex64.cc +++ b/tensorflow/core/kernels/einsum_op_impl_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" +#include "tensorflow/core/kernels/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/einsum_op_impl_double.cc b/tensorflow/core/kernels/einsum_op_impl_double.cc similarity index 95% rename from tensorflow/core/kernels/linalg/einsum_op_impl_double.cc rename to tensorflow/core/kernels/einsum_op_impl_double.cc index e0c093fa4a9..f994590779b 100644 --- a/tensorflow/core/kernels/linalg/einsum_op_impl_double.cc +++ b/tensorflow/core/kernels/einsum_op_impl_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" +#include "tensorflow/core/kernels/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/einsum_op_impl_float.cc b/tensorflow/core/kernels/einsum_op_impl_float.cc similarity index 95% rename from tensorflow/core/kernels/linalg/einsum_op_impl_float.cc rename to tensorflow/core/kernels/einsum_op_impl_float.cc index ad9135c991c..1875310b687 100644 --- a/tensorflow/core/kernels/linalg/einsum_op_impl_float.cc +++ b/tensorflow/core/kernels/einsum_op_impl_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" +#include "tensorflow/core/kernels/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/einsum_op_impl_half.cc b/tensorflow/core/kernels/einsum_op_impl_half.cc similarity index 95% rename from tensorflow/core/kernels/linalg/einsum_op_impl_half.cc rename to tensorflow/core/kernels/einsum_op_impl_half.cc index 72a9f6bec4f..0486b133e62 100644 --- a/tensorflow/core/kernels/linalg/einsum_op_impl_half.cc +++ b/tensorflow/core/kernels/einsum_op_impl_half.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" +#include "tensorflow/core/kernels/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/einsum_op_impl_int32.cc b/tensorflow/core/kernels/einsum_op_impl_int32.cc similarity index 94% rename from tensorflow/core/kernels/linalg/einsum_op_impl_int32.cc rename to tensorflow/core/kernels/einsum_op_impl_int32.cc index 7569c979c59..db5169498d9 100644 --- a/tensorflow/core/kernels/linalg/einsum_op_impl_int32.cc +++ b/tensorflow/core/kernels/einsum_op_impl_int32.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" +#include "tensorflow/core/kernels/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/einsum_op_impl_int64.cc b/tensorflow/core/kernels/einsum_op_impl_int64.cc similarity index 94% rename from tensorflow/core/kernels/linalg/einsum_op_impl_int64.cc rename to tensorflow/core/kernels/einsum_op_impl_int64.cc index 6ee0ebc9637..7f1a1eac411 100644 --- a/tensorflow/core/kernels/linalg/einsum_op_impl_int64.cc +++ b/tensorflow/core/kernels/einsum_op_impl_int64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" +#include "tensorflow/core/kernels/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/eye_functor.h b/tensorflow/core/kernels/eye_functor.h similarity index 90% rename from tensorflow/core/kernels/linalg/eye_functor.h rename to tensorflow/core/kernels/eye_functor.h index c77372f089a..3799cfba9ae 100644 --- a/tensorflow/core/kernels/linalg/eye_functor.h +++ b/tensorflow/core/kernels/eye_functor.h @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_LINALG_EYE_FUNCTOR_H_ -#define TENSORFLOW_CORE_KERNELS_LINALG_EYE_FUNCTOR_H_ +#ifndef TENSORFLOW_CORE_KERNELS_EYE_FUNCTOR_H_ +#define TENSORFLOW_CORE_KERNELS_EYE_FUNCTOR_H_ #include "tensorflow/core/framework/tensor_types.h" diff --git a/tensorflow/core/kernels/linalg/eye_functor_gpu.cu.cc b/tensorflow/core/kernels/eye_functor_gpu.cu.cc similarity index 97% rename from tensorflow/core/kernels/linalg/eye_functor_gpu.cu.cc rename to tensorflow/core/kernels/eye_functor_gpu.cu.cc index 85865588f2c..90df538dd2c 100644 --- a/tensorflow/core/kernels/linalg/eye_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/eye_functor_gpu.cu.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/type_traits.h" -#include "tensorflow/core/kernels/linalg/eye_functor.h" +#include "tensorflow/core/kernels/eye_functor.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/BUILD b/tensorflow/core/kernels/linalg/BUILD deleted file mode 100644 index 52bead431ea..00000000000 --- a/tensorflow/core/kernels/linalg/BUILD +++ /dev/null @@ -1,353 +0,0 @@ -load( - "//tensorflow:tensorflow.bzl", - "if_cuda_or_rocm", - "tf_kernel_library", -) -load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") -load( - "@local_config_rocm//rocm:build_defs.bzl", - "if_rocm", -) -load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test") - -# Description: -# Op kernel implementations for TensorFlow. -# -# Note: Any test that uses GPU support and which we would like to -# benchmark should be linked statically so that it can be executed -# from a py_binary or cuda_py_test test logger. For such a test, -# append "_gpu" to the test name to invoke the GPU benchmarks. Example: -# -# # for CPU tests -# $ bazel test --config opt //third_party/tensorflow/core/kernels:my_op_test -# # for GPU benchmarks -# $ bazel run --config opt --config=cuda //third_party/tensorflow/core/kernels:my_op_test_gpu -- --benchmarks=.. -# -package( - default_visibility = [ - "//tensorflow:__subpackages__", - "//tensorflow:internal", - ], - licenses = ["notice"], # Apache 2.0 -) - -# Export a few files for use on Android. -exports_files([ - "einsum_op_impl_half.cc", - "einsum_op_impl_bfloat16.cc", - "einsum_op_impl_int32.cc", - "einsum_op_impl_int64.cc", - "einsum_op_impl_float.cc", - "einsum_op_impl_double.cc", - "einsum_op_impl_complex64.cc", - "einsum_op_impl_complex128.cc", - "einsum_op_impl.h", - "einsum_op.h", - "linalg_ops_common.h", - "linalg_ops_common.cc", - "matrix_diag_op.h", - "matrix_diag_op.cc", - "matrix_inverse_op.cc", - "matrix_set_diag_op.h", - "matrix_set_diag_op.cc", -]) - -# Public support libraries ---------------------------------------------------- - -cc_library( - name = "linalg", - deps = [ - ":banded_triangular_solve_op", - ":cholesky_grad", - ":cholesky_op", - ":determinant_op", - ":eig_op", - ":einsum_op", - ":lu_op", - ":matrix_band_part_op", - ":matrix_diag_op", - ":matrix_exponential_op", - ":matrix_inverse_op", - ":matrix_logarithm_op", - ":matrix_set_diag_op", - ":matrix_solve_ls_op", - ":matrix_solve_op", - ":matrix_square_root_op", - ":matrix_triangular_solve_op", - ":qr_op", - ":self_adjoint_eig_op", - ":self_adjoint_eig_v2_op", - ":svd_op", - ":tridiagonal_matmul_op", - ":tridiagonal_solve_op", - ], -) - -LINALG_DEPS = [ - ":linalg_ops_common", - ":eye_functor", - "//third_party/eigen3", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core/kernels:cast_op", - "//tensorflow/core/kernels:fill_functor", -] + if_cuda([ - "//tensorflow/core/util:cuda_solvers", - "//tensorflow/core/kernels:transpose_functor", -]) + if_rocm([ - "//tensorflow/core/util:rocm_solvers", -]) - -tf_kernel_library( - name = "matrix_band_part_op", - prefix = "matrix_band_part_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "matrix_diag_op", - prefix = "matrix_diag_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "matrix_set_diag_op", - prefix = "matrix_set_diag_op", - deps = LINALG_DEPS + [":matrix_diag_op"], -) - -tf_kernel_library( - name = "cholesky_op", - prefix = "cholesky_op", - deps = if_cuda([ - ":matrix_band_part_op", - ]) + LINALG_DEPS, -) - -tf_kernel_library( - name = "cholesky_grad", - prefix = "cholesky_grad", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "determinant_op", - prefix = "determinant_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "matrix_exponential_op", - prefix = "matrix_exponential_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "matrix_logarithm_op", - prefix = "matrix_logarithm_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "self_adjoint_eig_op", - prefix = "self_adjoint_eig_op", - deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"], -) - -tf_kernel_library( - name = "self_adjoint_eig_v2_op", - prefix = "self_adjoint_eig_v2_op", - deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"] + if_cuda([ - "//tensorflow/core/kernels:cwise_op", - ]), -) - -tf_kernel_library( - name = "eig_op", - prefix = "eig_op", - deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"] + if_cuda([ - "//tensorflow/core/kernels:cwise_op", - ]), -) - -tf_kernel_library( - name = "matrix_inverse_op", - prefix = "matrix_inverse_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "matrix_solve_ls_op", - prefix = "matrix_solve_ls_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "matrix_solve_op", - prefix = "matrix_solve_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "matrix_square_root_op", - prefix = "matrix_square_root_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "banded_triangular_solve_op", - prefix = "banded_triangular_solve_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "matrix_triangular_solve_op", - hdrs = ["matrix_triangular_solve_op_impl.h"], - prefix = "matrix_triangular_solve_op", - deps = [ - ":linalg_ops_common", - "//third_party/eigen3", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core/kernels:fill_functor", - "//tensorflow/core:stream_executor", - ] + if_cuda([ - "//tensorflow/core/platform/default/build_config:cublas_plugin", - "//tensorflow/core/util:cuda_solvers", - ]) + if_rocm([ - "@local_config_rocm//rocm:rocprim", - "//tensorflow/core/util:rocm_solvers", - ]) + if_cuda_or_rocm([ - "//tensorflow/core/kernels:transpose_functor", - ]), -) - -tf_kernel_library( - name = "tridiagonal_matmul_op", - srcs = ["tridiagonal_matmul_op.cc"], - gpu_srcs = ["tridiagonal_matmul_op_gpu.cu.cc"], - deps = LINALG_DEPS + if_cuda([ - "//tensorflow/core/util:cuda_sparse", - ]), -) - -tf_kernel_library( - name = "tridiagonal_solve_op", - srcs = ["tridiagonal_solve_op.cc"], - gpu_srcs = ["tridiagonal_solve_op_gpu.cu.cc"], - deps = LINALG_DEPS + if_cuda([ - "//tensorflow/core/util:cuda_sparse", - ]), -) - -tf_kernel_library( - name = "qr_op", - prefix = "qr_op", - deps = LINALG_DEPS + if_cuda([ - "//tensorflow/core/kernels:cwise_op", - ":matrix_band_part_op", - ]), -) - -tf_kernel_library( - name = "svd_op", - prefix = "svd_op", - deps = LINALG_DEPS + if_cuda([ - ]), -) - -tf_kernel_library( - name = "lu_op", - prefix = "lu_op", - deps = if_cuda([ - "//tensorflow/core/util:cuda_solvers", - "//tensorflow/core/kernels:transpose_functor", - ]) + [ - "//third_party/eigen3", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - ], -) - -tf_kernel_library( - name = "einsum_op", - prefix = "einsum_op", - deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core/kernels:batch_matmul_op", - "//tensorflow/core/kernels:fill_functor", - "//tensorflow/core/kernels:reduction_ops", - "//tensorflow/core/kernels:transpose_functor", - "//tensorflow/core/profiler/lib:traceme", - "//third_party/eigen3", - "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/strings", - ], -) - -cc_library( - name = "linalg_ops_common", - srcs = ["linalg_ops_common.cc"], - hdrs = ["linalg_ops_common.h"], - visibility = ["//visibility:private"], - deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//third_party/eigen3", - ], -) - -tf_cuda_cc_test( - name = "banded_triangular_solve_op_test", - size = "small", - srcs = ["banded_triangular_solve_op_test.cc"], - deps = [ - ":banded_triangular_solve_op", - ":matrix_set_diag_op", - ":matrix_triangular_solve_op", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - "//tensorflow/core/kernels:ops_testutil", - "//tensorflow/core/kernels:ops_util", - ], -) - -tf_kernel_library( - name = "eye_functor", - hdrs = ["eye_functor.h"], - gpu_srcs = [ - "eye_functor_gpu.cu.cc", - "eye_functor.h", - ], - visibility = ["//tensorflow/core/kernels:friends"], - deps = [ - "//tensorflow/core:framework", - "//third_party/eigen3", - ], - alwayslink = 0, -) - -tf_cuda_cc_test( - name = "matrix_triangular_solve_op_test", - size = "small", - srcs = ["matrix_triangular_solve_op_test.cc"], - deps = [ - ":matrix_triangular_solve_op", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - "//tensorflow/core/kernels:broadcast_to_op", - "//tensorflow/core/kernels:ops_testutil", - "//tensorflow/core/kernels:ops_util", - ], -) diff --git a/tensorflow/core/kernels/linalg/linalg_ops_common.h b/tensorflow/core/kernels/linalg/linalg_ops_common.h deleted file mode 100644 index 3ab37480c90..00000000000 --- a/tensorflow/core/kernels/linalg/linalg_ops_common.h +++ /dev/null @@ -1,221 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_LINALG_LINALG_OPS_COMMON_H_ -#define TENSORFLOW_CORE_KERNELS_LINALG_LINALG_OPS_COMMON_H_ - -// Classes to support linear algebra functionality, similar to the numpy.linalg -// module. Supports batch computation on several matrices at once, sharding the -// computations across different threads if necessary. -#include - -#include "third_party/eigen3/Eigen/Core" -#include "tensorflow/core/framework/kernel_def_builder.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/framework/types.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/gtl/inlined_vector.h" -#include "tensorflow/core/platform/types.h" -#include "tensorflow/core/util/work_sharder.h" - -namespace tensorflow { - -// Base class for linear algebra operators. -template -class LinearAlgebraOp : public OpKernel { - public: - explicit LinearAlgebraOp(OpKernelConstruction* context) : OpKernel(context) {} - - void Compute(OpKernelContext* context) override; - - protected: - using TensorShapes = gtl::InlinedVector; - // Returns the number of leading inputs that are to be treated as matrix - // inputs. By default this is all the inputs. Derived classes can override - // this to tell the base class to ignore one or more trailing inputs. - virtual int NumMatrixInputs(const OpKernelContext* context) const { - return context->num_inputs(); - } - - // Returns true if the number of inputs and their shapes are as expected. - // Many ops take a single square input matrix, so we provide that as a default - // implementation for convenience. - virtual void ValidateInputMatrixShapes( - OpKernelContext* context, const TensorShapes& input_matrix_shapes) const { - ValidateSingleSquareMatrix(context, input_matrix_shapes); - } - - // Convenience validators for common cases: - // - // Validate op taking a single matrix A. - static void ValidateSingleMatrix(OpKernelContext* context, - const TensorShapes& input_matrix_shapes); - // Validate op taking a single square matrix A. - static void ValidateSingleSquareMatrix( - OpKernelContext* context, const TensorShapes& input_matrix_shapes); - // Validate op taking two matrices A and B that have the same number of rows. - static void ValidateSolver(OpKernelContext* context, - const TensorShapes& input_matrix_shapes); - // Validate op taking two matrices A and B that have the same number of rows - // and A is square. - static void ValidateSquareSolver(OpKernelContext* context, - const TensorShapes& input_matrix_shapes); - - // Returns the output shapes of each individual matrix operation. Output - // matrices shapes must be rank 0, 1, or 2. Scalar outputs are rank 0. - // - // The derived class may return a number of shapes (N) less than - // context->num_outputs() (M) to indicate that a only leading subset of - // the outputs will be populated. In this case, a dummy scalar tensor with - // value zero will be return for the last M-N outputs. - // - // For many ops, the output dimensions are the same as the input dimensions, - // so we provide that as a default implementation for convenience. - virtual TensorShapes GetOutputMatrixShapes( - const TensorShapes& input_matrix_shapes) const { - return input_matrix_shapes; - } - - // Returns the cost per matrix operation. This is used to determine the - // number of threads to use for parallelizing calls to ComputeMatrix in - // batch mode. Cost per unit is assumed to be roughly 1ns, based on comments - // in core/util/work_sharder.cc. Many linear algebra ops take roughly max(m,n) - // * min(m,n)^2, where the first input matrix is m-by-n. We provide that as a - // default implementation for convenience. - virtual int64 GetCostPerUnit(const TensorShapes& input_matrix_shapes) const { - double m = static_cast(input_matrix_shapes[0].dim_size(0)); - double n = static_cast(input_matrix_shapes[0].dim_size(1)); - double cost = std::max(m, n) * std::min(m, n) * std::min(m, n); - return cost >= static_cast(kint64max) ? kint64max - : static_cast(cost); - } - - // Returns true if it is safe to forward (alias) input to output buffer - // and expect the kernel to perform the computation inplace. - virtual bool EnableInputForwarding() const { return true; } - - using InputMatrix = Eigen::Matrix; - using InputConstMatrixMap = Eigen::Map; - using InputMatrixMap = Eigen::Map; - using InputConstVectorMap = - Eigen::Map>; - using InputConstMatrixMaps = gtl::InlinedVector; - using InputMatrixMaps = gtl::InlinedVector; - using InputRealScalar = typename Eigen::NumTraits::Real; - - using OutputMatrix = Eigen::Matrix; - using OutputConstMatrixMap = Eigen::Map; - using OutputMatrixMap = Eigen::Map; - using OutputConstVectorMap = - Eigen::Map>; - using OutputConstMatrixMaps = gtl::InlinedVector; - using OutputMatrixMaps = gtl::InlinedVector; - using OutputRealScalar = typename Eigen::NumTraits::Real; - - // backward compatibility - using Scalar = OutputScalar; - using Matrix = - Eigen::Matrix; - using ConstMatrixMap = Eigen::Map; - using MatrixMap = Eigen::Map; - using ConstVectorMap = - Eigen::Map>; - using ConstMatrixMaps = gtl::InlinedVector; - using MatrixMaps = gtl::InlinedVector; - using RealScalar = typename Eigen::NumTraits::Real; - - // Performs a single matrix computation given input matrices, and - // stores the result in outputs. For batch operations, this will be called - // repeatedly for a single call to Compute() when multiple matrices exist in - // input Tensors with rank > 2. In this case the calls to ComputeMatrix are - // parallelized. The number of threads used is determined by a cost model from - // the value returned by GetCostPerUnit(). - virtual void ComputeMatrix(OpKernelContext* context, - const InputConstMatrixMaps& inputs, - OutputMatrixMaps* outputs) = 0; - - private: - using TensorInputs = gtl::InlinedVector; - using TensorOutputs = gtl::InlinedVector; - // This function maps 2-d slices (matrices) of the input and output tensors - // using Eigen::Map and calls ComputeMatrix implemented in terms of the - // Eigen::MatrixBase API by the derived class. - // - // The 'matrix_index' parameter specifies the index of the matrix to be used - // from each input tensor, and the index of the matrix to be written to each - // output tensor. The input matrices are in row major order, and located at - // the memory addresses - // inputs[i].flat().data() + - // matrix_index * input_matrix_shapes[i].num_elements() - // for i in 0...inputs.size()-1. - // The output matrices are in row major order, and located at the memory - // address - // outputs[i]->flat().data() + - // matrix_index * output_matrix_shapes[i].num_elements(). - // for i in 0...outputs.size()-1. - // - void ComputeTensorSlice(OpKernelContext* context, int64 matrix_index, - const TensorInputs& inputs, - const TensorShapes& input_matrix_shapes, - const TensorOutputs& outputs, - const TensorShapes& output_matrix_shapes); - - void AnalyzeInputs(OpKernelContext* context, TensorInputs* inputs, - TensorShapes* input_matrix_shapes, - TensorShape* batch_shape); - - void PrepareOutputs(OpKernelContext* context, - const TensorShapes& input_matrix_shapes, - const TensorShape& batch_shape, TensorOutputs* outputs, - TensorShapes* output_matrix_shapes); -}; - -// Declare LinearAlgebraOp, which is explicitly instantiated in -// linalg_ops_common.cc for float, double, complex64, and complex128. -extern template class LinearAlgebraOp; -extern template class LinearAlgebraOp; -extern template class LinearAlgebraOp; -extern template class LinearAlgebraOp; - -} // namespace tensorflow - -#define INHERIT_LINALG_TYPEDEFS(Scalar) \ - typedef LinearAlgebraOp Base; \ - using RealScalar = typename Eigen::NumTraits::Real; \ - using Matrix = typename Base::Matrix; \ - using MatrixMap = typename Base::MatrixMap; \ - using MatrixMaps = typename Base::MatrixMaps; \ - using ConstMatrixMap = typename Base::ConstMatrixMap; \ - using ConstMatrixMaps = typename Base::ConstMatrixMaps; \ - using ConstVectorMap = typename Base::ConstVectorMap; \ - using TensorShapes = typename Base::TensorShapes; - -#define REGISTER_LINALG_OP_CPU(OpName, OpClass, Scalar) \ - REGISTER_KERNEL_BUILDER( \ - Name(OpName).Device(DEVICE_CPU).TypeConstraint("T"), OpClass) - -#define REGISTER_LINALG_OP_GPU(OpName, OpClass, Scalar) \ - REGISTER_KERNEL_BUILDER( \ - Name(OpName).Device(DEVICE_GPU).TypeConstraint("T"), OpClass) - -// Deprecated, use one of the device-specific macros above. -#define REGISTER_LINALG_OP(OpName, OpClass, Scalar) \ - REGISTER_LINALG_OP_CPU(OpName, OpClass, Scalar) - -#endif // TENSORFLOW_CORE_KERNELS_LINALG_LINALG_OPS_COMMON_H_ diff --git a/tensorflow/core/kernels/linalg/linalg_ops_common.cc b/tensorflow/core/kernels/linalg_ops_common.cc similarity index 99% rename from tensorflow/core/kernels/linalg/linalg_ops_common.cc rename to tensorflow/core/kernels/linalg_ops_common.cc index c8d33e435c7..56a941fbd1f 100644 --- a/tensorflow/core/kernels/linalg/linalg_ops_common.cc +++ b/tensorflow/core/kernels/linalg_ops_common.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" #include diff --git a/tensorflow/core/kernels/linalg_ops_common.h b/tensorflow/core/kernels/linalg_ops_common.h index 0aa69801f19..65c2fb90f0e 100644 --- a/tensorflow/core/kernels/linalg_ops_common.h +++ b/tensorflow/core/kernels/linalg_ops_common.h @@ -12,10 +12,211 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ + #ifndef TENSORFLOW_CORE_KERNELS_LINALG_OPS_COMMON_H_ #define TENSORFLOW_CORE_KERNELS_LINALG_OPS_COMMON_H_ -// Temporary forwarding header. -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +// Classes to support linear algebra functionality, similar to the numpy.linalg +// module. Supports batch computation on several matrices at once, sharding the +// computations across different threads if necessary. +#include + +#include "third_party/eigen3/Eigen/Core" +#include "tensorflow/core/framework/kernel_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/work_sharder.h" + +namespace tensorflow { + +// Base class for linear algebra operators. +template +class LinearAlgebraOp : public OpKernel { + public: + explicit LinearAlgebraOp(OpKernelConstruction* context) : OpKernel(context) {} + + void Compute(OpKernelContext* context) override; + + protected: + using TensorShapes = gtl::InlinedVector; + // Returns the number of leading inputs that are to be treated as matrix + // inputs. By default this is all the inputs. Derived classes can override + // this to tell the base class to ignore one or more trailing inputs. + virtual int NumMatrixInputs(const OpKernelContext* context) const { + return context->num_inputs(); + } + + // Returns true if the number of inputs and their shapes are as expected. + // Many ops take a single square input matrix, so we provide that as a default + // implementation for convenience. + virtual void ValidateInputMatrixShapes( + OpKernelContext* context, const TensorShapes& input_matrix_shapes) const { + ValidateSingleSquareMatrix(context, input_matrix_shapes); + } + + // Convenience validators for common cases: + // + // Validate op taking a single matrix A. + static void ValidateSingleMatrix(OpKernelContext* context, + const TensorShapes& input_matrix_shapes); + // Validate op taking a single square matrix A. + static void ValidateSingleSquareMatrix( + OpKernelContext* context, const TensorShapes& input_matrix_shapes); + // Validate op taking two matrices A and B that have the same number of rows. + static void ValidateSolver(OpKernelContext* context, + const TensorShapes& input_matrix_shapes); + // Validate op taking two matrices A and B that have the same number of rows + // and A is square. + static void ValidateSquareSolver(OpKernelContext* context, + const TensorShapes& input_matrix_shapes); + + // Returns the output shapes of each individual matrix operation. Output + // matrices shapes must be rank 0, 1, or 2. Scalar outputs are rank 0. + // + // The derived class may return a number of shapes (N) less than + // context->num_outputs() (M) to indicate that a only leading subset of + // the outputs will be populated. In this case, a dummy scalar tensor with + // value zero will be return for the last M-N outputs. + // + // For many ops, the output dimensions are the same as the input dimensions, + // so we provide that as a default implementation for convenience. + virtual TensorShapes GetOutputMatrixShapes( + const TensorShapes& input_matrix_shapes) const { + return input_matrix_shapes; + } + + // Returns the cost per matrix operation. This is used to determine the + // number of threads to use for parallelizing calls to ComputeMatrix in + // batch mode. Cost per unit is assumed to be roughly 1ns, based on comments + // in core/util/work_sharder.cc. Many linear algebra ops take roughly max(m,n) + // * min(m,n)^2, where the first input matrix is m-by-n. We provide that as a + // default implementation for convenience. + virtual int64 GetCostPerUnit(const TensorShapes& input_matrix_shapes) const { + double m = static_cast(input_matrix_shapes[0].dim_size(0)); + double n = static_cast(input_matrix_shapes[0].dim_size(1)); + double cost = std::max(m, n) * std::min(m, n) * std::min(m, n); + return cost >= static_cast(kint64max) ? kint64max + : static_cast(cost); + } + + // Returns true if it is safe to forward (alias) input to output buffer + // and expect the kernel to perform the computation inplace. + virtual bool EnableInputForwarding() const { return true; } + + using InputMatrix = Eigen::Matrix; + using InputConstMatrixMap = Eigen::Map; + using InputMatrixMap = Eigen::Map; + using InputConstVectorMap = + Eigen::Map>; + using InputConstMatrixMaps = gtl::InlinedVector; + using InputMatrixMaps = gtl::InlinedVector; + using InputRealScalar = typename Eigen::NumTraits::Real; + + using OutputMatrix = Eigen::Matrix; + using OutputConstMatrixMap = Eigen::Map; + using OutputMatrixMap = Eigen::Map; + using OutputConstVectorMap = + Eigen::Map>; + using OutputConstMatrixMaps = gtl::InlinedVector; + using OutputMatrixMaps = gtl::InlinedVector; + using OutputRealScalar = typename Eigen::NumTraits::Real; + + // backward compatibility + using Scalar = OutputScalar; + using Matrix = + Eigen::Matrix; + using ConstMatrixMap = Eigen::Map; + using MatrixMap = Eigen::Map; + using ConstVectorMap = + Eigen::Map>; + using ConstMatrixMaps = gtl::InlinedVector; + using MatrixMaps = gtl::InlinedVector; + using RealScalar = typename Eigen::NumTraits::Real; + + // Performs a single matrix computation given input matrices, and + // stores the result in outputs. For batch operations, this will be called + // repeatedly for a single call to Compute() when multiple matrices exist in + // input Tensors with rank > 2. In this case the calls to ComputeMatrix are + // parallelized. The number of threads used is determined by a cost model from + // the value returned by GetCostPerUnit(). + virtual void ComputeMatrix(OpKernelContext* context, + const InputConstMatrixMaps& inputs, + OutputMatrixMaps* outputs) = 0; + + private: + using TensorInputs = gtl::InlinedVector; + using TensorOutputs = gtl::InlinedVector; + // This function maps 2-d slices (matrices) of the input and output tensors + // using Eigen::Map and calls ComputeMatrix implemented in terms of the + // Eigen::MatrixBase API by the derived class. + // + // The 'matrix_index' parameter specifies the index of the matrix to be used + // from each input tensor, and the index of the matrix to be written to each + // output tensor. The input matrices are in row major order, and located at + // the memory addresses + // inputs[i].flat().data() + + // matrix_index * input_matrix_shapes[i].num_elements() + // for i in 0...inputs.size()-1. + // The output matrices are in row major order, and located at the memory + // address + // outputs[i]->flat().data() + + // matrix_index * output_matrix_shapes[i].num_elements(). + // for i in 0...outputs.size()-1. + // + void ComputeTensorSlice(OpKernelContext* context, int64 matrix_index, + const TensorInputs& inputs, + const TensorShapes& input_matrix_shapes, + const TensorOutputs& outputs, + const TensorShapes& output_matrix_shapes); + + void AnalyzeInputs(OpKernelContext* context, TensorInputs* inputs, + TensorShapes* input_matrix_shapes, + TensorShape* batch_shape); + + void PrepareOutputs(OpKernelContext* context, + const TensorShapes& input_matrix_shapes, + const TensorShape& batch_shape, TensorOutputs* outputs, + TensorShapes* output_matrix_shapes); +}; + +// Declare LinearAlgebraOp, which is explicitly instantiated in +// linalg_ops_common.cc for float, double, complex64, and complex128. +extern template class LinearAlgebraOp; +extern template class LinearAlgebraOp; +extern template class LinearAlgebraOp; +extern template class LinearAlgebraOp; + +} // namespace tensorflow + +#define INHERIT_LINALG_TYPEDEFS(Scalar) \ + typedef LinearAlgebraOp Base; \ + using RealScalar = typename Eigen::NumTraits::Real; \ + using Matrix = typename Base::Matrix; \ + using MatrixMap = typename Base::MatrixMap; \ + using MatrixMaps = typename Base::MatrixMaps; \ + using ConstMatrixMap = typename Base::ConstMatrixMap; \ + using ConstMatrixMaps = typename Base::ConstMatrixMaps; \ + using ConstVectorMap = typename Base::ConstVectorMap; \ + using TensorShapes = typename Base::TensorShapes; + +#define REGISTER_LINALG_OP_CPU(OpName, OpClass, Scalar) \ + REGISTER_KERNEL_BUILDER( \ + Name(OpName).Device(DEVICE_CPU).TypeConstraint("T"), OpClass) + +#define REGISTER_LINALG_OP_GPU(OpName, OpClass, Scalar) \ + REGISTER_KERNEL_BUILDER( \ + Name(OpName).Device(DEVICE_GPU).TypeConstraint("T"), OpClass) + +// Deprecated, use one of the device-specific macros above. +#define REGISTER_LINALG_OP(OpName, OpClass, Scalar) \ + REGISTER_LINALG_OP_CPU(OpName, OpClass, Scalar) #endif // TENSORFLOW_CORE_KERNELS_LINALG_OPS_COMMON_H_ diff --git a/tensorflow/core/kernels/linalg/lu_op.cc b/tensorflow/core/kernels/lu_op.cc similarity index 100% rename from tensorflow/core/kernels/linalg/lu_op.cc rename to tensorflow/core/kernels/lu_op.cc diff --git a/tensorflow/core/kernels/linalg/lu_op_gpu.cu.cc b/tensorflow/core/kernels/lu_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/linalg/lu_op_gpu.cu.cc rename to tensorflow/core/kernels/lu_op_gpu.cu.cc index 9d23a35057d..47b37ed7f7a 100644 --- a/tensorflow/core/kernels/linalg/lu_op_gpu.cu.cc +++ b/tensorflow/core/kernels/lu_op_gpu.cu.cc @@ -25,9 +25,9 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/platform/types.h" -#include "tensorflow/core/util/cuda_solvers.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/matrix_band_part_op.cc b/tensorflow/core/kernels/matrix_band_part_op.cc similarity index 99% rename from tensorflow/core/kernels/linalg/matrix_band_part_op.cc rename to tensorflow/core/kernels/matrix_band_part_op.cc index 23619bacc33..4dcce5a8f58 100644 --- a/tensorflow/core/kernels/linalg/matrix_band_part_op.cc +++ b/tensorflow/core/kernels/matrix_band_part_op.cc @@ -21,12 +21,11 @@ limitations under the License. #define EIGEN_USE_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/linalg/matrix_band_part_op.h" +#include "tensorflow/core/kernels/matrix_band_part_op.h" #include #include #include - #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" diff --git a/tensorflow/core/kernels/linalg/matrix_band_part_op.h b/tensorflow/core/kernels/matrix_band_part_op.h similarity index 86% rename from tensorflow/core/kernels/linalg/matrix_band_part_op.h rename to tensorflow/core/kernels/matrix_band_part_op.h index 2f68eba6dcd..b04e36db8ed 100644 --- a/tensorflow/core/kernels/linalg/matrix_band_part_op.h +++ b/tensorflow/core/kernels/matrix_band_part_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_BAND_PART_OP_H_ -#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_BAND_PART_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_BAND_PART_OP_H_ +#define TENSORFLOW_CORE_KERNELS_MATRIX_BAND_PART_OP_H_ #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" @@ -34,4 +34,4 @@ struct MatrixBandPartFunctor { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_BAND_PART_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_MATRIX_BAND_PART_OP_H_ diff --git a/tensorflow/core/kernels/linalg/matrix_band_part_op_gpu.cu.cc b/tensorflow/core/kernels/matrix_band_part_op_gpu.cu.cc similarity index 97% rename from tensorflow/core/kernels/linalg/matrix_band_part_op_gpu.cu.cc rename to tensorflow/core/kernels/matrix_band_part_op_gpu.cu.cc index 9c734b7fd6e..9eb3e4f72a2 100644 --- a/tensorflow/core/kernels/linalg/matrix_band_part_op_gpu.cu.cc +++ b/tensorflow/core/kernels/matrix_band_part_op_gpu.cu.cc @@ -21,7 +21,7 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/linalg/matrix_band_part_op.h" +#include "tensorflow/core/kernels/matrix_band_part_op.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/matrix_diag_op.cc b/tensorflow/core/kernels/matrix_diag_op.cc similarity index 99% rename from tensorflow/core/kernels/linalg/matrix_diag_op.cc rename to tensorflow/core/kernels/matrix_diag_op.cc index 69cc8170793..05d7e4e6f86 100644 --- a/tensorflow/core/kernels/linalg/matrix_diag_op.cc +++ b/tensorflow/core/kernels/matrix_diag_op.cc @@ -20,7 +20,7 @@ limitations under the License. #define EIGEN_USE_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/linalg/matrix_diag_op.h" +#include "tensorflow/core/kernels/matrix_diag_op.h" #include #include diff --git a/tensorflow/core/kernels/linalg/matrix_diag_op.h b/tensorflow/core/kernels/matrix_diag_op.h similarity index 94% rename from tensorflow/core/kernels/linalg/matrix_diag_op.h rename to tensorflow/core/kernels/matrix_diag_op.h index 5758ba664cc..707fd9b6c14 100644 --- a/tensorflow/core/kernels/linalg/matrix_diag_op.h +++ b/tensorflow/core/kernels/matrix_diag_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_DIAG_OP_H_ -#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_DIAG_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_DIAG_OP_H_ +#define TENSORFLOW_CORE_KERNELS_MATRIX_DIAG_OP_H_ // Generator definition for MatrixDiagOp, must be compilable by nvcc. @@ -69,4 +69,4 @@ struct MatrixDiag { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_DIAG_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_MATRIX_DIAG_OP_H_ diff --git a/tensorflow/core/kernels/linalg/matrix_diag_op_gpu.cu.cc b/tensorflow/core/kernels/matrix_diag_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/linalg/matrix_diag_op_gpu.cu.cc rename to tensorflow/core/kernels/matrix_diag_op_gpu.cu.cc index 6b52e70716d..76271798d5f 100644 --- a/tensorflow/core/kernels/linalg/matrix_diag_op_gpu.cu.cc +++ b/tensorflow/core/kernels/matrix_diag_op_gpu.cu.cc @@ -18,7 +18,7 @@ limitations under the License. #define EIGEN_USE_GPU #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/linalg/matrix_diag_op.h" +#include "tensorflow/core/kernels/matrix_diag_op.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/matrix_exponential_op.cc b/tensorflow/core/kernels/matrix_exponential_op.cc similarity index 97% rename from tensorflow/core/kernels/linalg/matrix_exponential_op.cc rename to tensorflow/core/kernels/matrix_exponential_op.cc index 73407614955..01d4894438c 100644 --- a/tensorflow/core/kernels/linalg/matrix_exponential_op.cc +++ b/tensorflow/core/kernels/matrix_exponential_op.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/kernels/linalg/matrix_inverse_op.cc b/tensorflow/core/kernels/matrix_inverse_op.cc similarity index 98% rename from tensorflow/core/kernels/linalg/matrix_inverse_op.cc rename to tensorflow/core/kernels/matrix_inverse_op.cc index dc51776f2fe..52afdd15ba6 100644 --- a/tensorflow/core/kernels/linalg/matrix_inverse_op.cc +++ b/tensorflow/core/kernels/matrix_inverse_op.cc @@ -24,7 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -32,9 +32,9 @@ limitations under the License. #if GOOGLE_CUDA #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/linalg/eye_functor.h" +#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/kernels/eye_functor.h" #include "tensorflow/core/kernels/transpose_functor.h" -#include "tensorflow/core/util/cuda_solvers.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/matrix_logarithm_op.cc b/tensorflow/core/kernels/matrix_logarithm_op.cc similarity index 97% rename from tensorflow/core/kernels/linalg/matrix_logarithm_op.cc rename to tensorflow/core/kernels/matrix_logarithm_op.cc index 79d5472f140..22ca094e243 100644 --- a/tensorflow/core/kernels/linalg/matrix_logarithm_op.cc +++ b/tensorflow/core/kernels/matrix_logarithm_op.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/kernels/linalg/matrix_set_diag_op.cc b/tensorflow/core/kernels/matrix_set_diag_op.cc similarity index 99% rename from tensorflow/core/kernels/linalg/matrix_set_diag_op.cc rename to tensorflow/core/kernels/matrix_set_diag_op.cc index df32228d0f2..bf98fd0d47d 100644 --- a/tensorflow/core/kernels/linalg/matrix_set_diag_op.cc +++ b/tensorflow/core/kernels/matrix_set_diag_op.cc @@ -21,7 +21,7 @@ limitations under the License. #define EIGEN_USE_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/linalg/matrix_set_diag_op.h" +#include "tensorflow/core/kernels/matrix_set_diag_op.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" @@ -30,7 +30,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/linalg/matrix_diag_op.h" +#include "tensorflow/core/kernels/matrix_diag_op.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/kernels/linalg/matrix_set_diag_op.h b/tensorflow/core/kernels/matrix_set_diag_op.h similarity index 89% rename from tensorflow/core/kernels/linalg/matrix_set_diag_op.h rename to tensorflow/core/kernels/matrix_set_diag_op.h index 449a3607ede..04877cd34ca 100644 --- a/tensorflow/core/kernels/linalg/matrix_set_diag_op.h +++ b/tensorflow/core/kernels/matrix_set_diag_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SET_DIAG_OP_H_ -#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SET_DIAG_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_SET_DIAG_OP_H_ +#define TENSORFLOW_CORE_KERNELS_MATRIX_SET_DIAG_OP_H_ #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" @@ -39,4 +39,4 @@ struct MatrixSetDiag { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SET_DIAG_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_MATRIX_SET_DIAG_OP_H_ diff --git a/tensorflow/core/kernels/linalg/matrix_set_diag_op_gpu.cu.cc b/tensorflow/core/kernels/matrix_set_diag_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/linalg/matrix_set_diag_op_gpu.cu.cc rename to tensorflow/core/kernels/matrix_set_diag_op_gpu.cu.cc index 0cdb457db03..4e32f8a52e8 100644 --- a/tensorflow/core/kernels/linalg/matrix_set_diag_op_gpu.cu.cc +++ b/tensorflow/core/kernels/matrix_set_diag_op_gpu.cu.cc @@ -18,7 +18,7 @@ limitations under the License. #define EIGEN_USE_GPU #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/linalg/matrix_set_diag_op.h" +#include "tensorflow/core/kernels/matrix_set_diag_op.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex128.cc b/tensorflow/core/kernels/matrix_solve_ls_op_complex128.cc similarity index 92% rename from tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex128.cc rename to tensorflow/core/kernels/matrix_solve_ls_op_complex128.cc index 4e64eb42371..22274cc3daf 100644 --- a/tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex128.cc +++ b/tensorflow/core/kernels/matrix_solve_ls_op_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h" +#include "tensorflow/core/kernels/matrix_solve_ls_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex64.cc b/tensorflow/core/kernels/matrix_solve_ls_op_complex64.cc similarity index 92% rename from tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex64.cc rename to tensorflow/core/kernels/matrix_solve_ls_op_complex64.cc index 719201f3f9e..c8421a3efba 100644 --- a/tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex64.cc +++ b/tensorflow/core/kernels/matrix_solve_ls_op_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h" +#include "tensorflow/core/kernels/matrix_solve_ls_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/matrix_solve_ls_op_double.cc b/tensorflow/core/kernels/matrix_solve_ls_op_double.cc similarity index 92% rename from tensorflow/core/kernels/linalg/matrix_solve_ls_op_double.cc rename to tensorflow/core/kernels/matrix_solve_ls_op_double.cc index 614ecee4e23..c7d03cb1052 100644 --- a/tensorflow/core/kernels/linalg/matrix_solve_ls_op_double.cc +++ b/tensorflow/core/kernels/matrix_solve_ls_op_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h" +#include "tensorflow/core/kernels/matrix_solve_ls_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/matrix_solve_ls_op_float.cc b/tensorflow/core/kernels/matrix_solve_ls_op_float.cc similarity index 92% rename from tensorflow/core/kernels/linalg/matrix_solve_ls_op_float.cc rename to tensorflow/core/kernels/matrix_solve_ls_op_float.cc index 809cff8148c..c98a84beded 100644 --- a/tensorflow/core/kernels/linalg/matrix_solve_ls_op_float.cc +++ b/tensorflow/core/kernels/matrix_solve_ls_op_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h" +#include "tensorflow/core/kernels/matrix_solve_ls_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h b/tensorflow/core/kernels/matrix_solve_ls_op_impl.h similarity index 96% rename from tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h rename to tensorflow/core/kernels/matrix_solve_ls_op_impl.h index 1c8101a05b4..00a05a87a3a 100644 --- a/tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h +++ b/tensorflow/core/kernels/matrix_solve_ls_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SOLVE_LS_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SOLVE_LS_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_SOLVE_LS_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_MATRIX_SOLVE_LS_OP_IMPL_H_ // See docs in ../ops/linalg_ops.cc. @@ -24,7 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" @@ -163,4 +163,4 @@ class MatrixSolveLsOp : public LinearAlgebraOp { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SOLVE_LS_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_MATRIX_SOLVE_LS_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/linalg/matrix_solve_op.cc b/tensorflow/core/kernels/matrix_solve_op.cc similarity index 99% rename from tensorflow/core/kernels/linalg/matrix_solve_op.cc rename to tensorflow/core/kernels/matrix_solve_op.cc index 70f02bddf9b..3a75054f4ea 100644 --- a/tensorflow/core/kernels/linalg/matrix_solve_op.cc +++ b/tensorflow/core/kernels/matrix_solve_op.cc @@ -25,7 +25,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -33,8 +33,8 @@ limitations under the License. #if GOOGLE_CUDA #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/kernels/transpose_functor.h" -#include "tensorflow/core/util/cuda_solvers.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/matrix_square_root_op.cc b/tensorflow/core/kernels/matrix_square_root_op.cc similarity index 97% rename from tensorflow/core/kernels/linalg/matrix_square_root_op.cc rename to tensorflow/core/kernels/matrix_square_root_op.cc index ce43e358350..fe3d3043c26 100644 --- a/tensorflow/core/kernels/linalg/matrix_square_root_op.cc +++ b/tensorflow/core/kernels/matrix_square_root_op.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_complex.cc b/tensorflow/core/kernels/matrix_triangular_solve_op_complex.cc similarity index 92% rename from tensorflow/core/kernels/linalg/matrix_triangular_solve_op_complex.cc rename to tensorflow/core/kernels/matrix_triangular_solve_op_complex.cc index 27f3e77e29c..ae3702078a0 100644 --- a/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_complex.cc +++ b/tensorflow/core/kernels/matrix_triangular_solve_op_complex.cc @@ -14,7 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h" +#include "tensorflow/core/kernels/matrix_triangular_solve_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h b/tensorflow/core/kernels/matrix_triangular_solve_op_impl.h similarity index 97% rename from tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h rename to tensorflow/core/kernels/matrix_triangular_solve_op_impl.h index 99249f792b6..fb7e6f0f5ff 100644 --- a/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h +++ b/tensorflow/core/kernels/matrix_triangular_solve_op_impl.h @@ -15,8 +15,8 @@ limitations under the License. // See docs in ../ops/linalg_ops.cc. // -#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ #include "third_party/eigen3/Eigen/Core" #include "tensorflow/core/framework/kernel_def_builder.h" @@ -24,7 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/kernels/fill_functor.h" -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -38,9 +38,9 @@ limitations under the License. #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if GOOGLE_CUDA -#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/kernels/cuda_solvers.h" #elif TENSORFLOW_USE_ROCM -#include "tensorflow/core/util/rocm_solvers.h" +#include "tensorflow/core/kernels/rocm_solvers.h" #endif namespace tensorflow { @@ -434,4 +434,4 @@ struct LaunchBatchMatrixTriangularSolve { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_real.cc b/tensorflow/core/kernels/matrix_triangular_solve_op_real.cc similarity index 93% rename from tensorflow/core/kernels/linalg/matrix_triangular_solve_op_real.cc rename to tensorflow/core/kernels/matrix_triangular_solve_op_real.cc index 71a62441dc4..0f92964dd72 100644 --- a/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_real.cc +++ b/tensorflow/core/kernels/matrix_triangular_solve_op_real.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h" +#include "tensorflow/core/kernels/matrix_triangular_solve_op_impl.h" #if GOOGLE_CUDA #include "third_party/gpus/cuda/include/cuda.h" diff --git a/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_test.cc b/tensorflow/core/kernels/matrix_triangular_solve_op_test.cc similarity index 100% rename from tensorflow/core/kernels/linalg/matrix_triangular_solve_op_test.cc rename to tensorflow/core/kernels/matrix_triangular_solve_op_test.cc diff --git a/tensorflow/core/kernels/linalg/qr_op_complex128.cc b/tensorflow/core/kernels/qr_op_complex128.cc similarity index 96% rename from tensorflow/core/kernels/linalg/qr_op_complex128.cc rename to tensorflow/core/kernels/qr_op_complex128.cc index 0c14c6d2818..8a3e3dc0a92 100644 --- a/tensorflow/core/kernels/linalg/qr_op_complex128.cc +++ b/tensorflow/core/kernels/qr_op_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/qr_op_impl.h" +#include "tensorflow/core/kernels/qr_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/qr_op_complex64.cc b/tensorflow/core/kernels/qr_op_complex64.cc similarity index 95% rename from tensorflow/core/kernels/linalg/qr_op_complex64.cc rename to tensorflow/core/kernels/qr_op_complex64.cc index fc0227ef7f9..467fa6c2d6a 100644 --- a/tensorflow/core/kernels/linalg/qr_op_complex64.cc +++ b/tensorflow/core/kernels/qr_op_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/qr_op_impl.h" +#include "tensorflow/core/kernels/qr_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/qr_op_double.cc b/tensorflow/core/kernels/qr_op_double.cc similarity index 96% rename from tensorflow/core/kernels/linalg/qr_op_double.cc rename to tensorflow/core/kernels/qr_op_double.cc index ae00b3e7921..05537a0eaa3 100644 --- a/tensorflow/core/kernels/linalg/qr_op_double.cc +++ b/tensorflow/core/kernels/qr_op_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/qr_op_impl.h" +#include "tensorflow/core/kernels/qr_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/qr_op_float.cc b/tensorflow/core/kernels/qr_op_float.cc similarity index 96% rename from tensorflow/core/kernels/linalg/qr_op_float.cc rename to tensorflow/core/kernels/qr_op_float.cc index 77b8eeb0286..6aebd981865 100644 --- a/tensorflow/core/kernels/linalg/qr_op_float.cc +++ b/tensorflow/core/kernels/qr_op_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/qr_op_impl.h" +#include "tensorflow/core/kernels/qr_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/qr_op_impl.h b/tensorflow/core/kernels/qr_op_impl.h similarity index 96% rename from tensorflow/core/kernels/linalg/qr_op_impl.h rename to tensorflow/core/kernels/qr_op_impl.h index 876594bc511..535df9d160d 100644 --- a/tensorflow/core/kernels/linalg/qr_op_impl.h +++ b/tensorflow/core/kernels/qr_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_LINALG_QR_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_LINALG_QR_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_QR_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_QR_OP_IMPL_H_ // See docs in ../ops/linalg_ops.cc. // @@ -33,7 +33,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -41,11 +41,11 @@ limitations under the License. #if GOOGLE_CUDA #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/kernels/cwise_ops.h" -#include "tensorflow/core/kernels/linalg/eye_functor.h" -#include "tensorflow/core/kernels/linalg/matrix_band_part_op.h" +#include "tensorflow/core/kernels/eye_functor.h" +#include "tensorflow/core/kernels/matrix_band_part_op.h" #include "tensorflow/core/kernels/transpose_functor.h" -#include "tensorflow/core/util/cuda_solvers.h" #endif namespace tensorflow { @@ -299,4 +299,4 @@ class QrOpGpu : public AsyncOpKernel { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_LINALG_QR_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_QR_OP_IMPL_H_ diff --git a/tensorflow/core/util/rocm_solvers.cc b/tensorflow/core/kernels/rocm_solvers.cc similarity index 99% rename from tensorflow/core/util/rocm_solvers.cc rename to tensorflow/core/kernels/rocm_solvers.cc index 13dadf602a7..5faf718332e 100644 --- a/tensorflow/core/util/rocm_solvers.cc +++ b/tensorflow/core/kernels/rocm_solvers.cc @@ -14,7 +14,7 @@ ============================================================================== */ #if TENSORFLOW_USE_ROCM -#include "tensorflow/core/util/rocm_solvers.h" +#include "tensorflow/core/kernels/rocm_solvers.h" #include #include diff --git a/tensorflow/core/util/rocm_solvers.h b/tensorflow/core/kernels/rocm_solvers.h similarity index 96% rename from tensorflow/core/util/rocm_solvers.h rename to tensorflow/core/kernels/rocm_solvers.h index afc8b936d05..94d3c82a497 100644 --- a/tensorflow/core/util/rocm_solvers.h +++ b/tensorflow/core/kernels/rocm_solvers.h @@ -14,8 +14,8 @@ limitations under the License. ============================================================================== */ -#ifndef TENSORFLOW_CORE_KERNELS_LINALG_ROCM_SOLVERS_H_ -#define TENSORFLOW_CORE_KERNELS_LINALG_ROCM_SOLVERS_H_ +#ifndef TENSORFLOW_CORE_KERNELS_ROCM_SOLVERS_H_ +#define TENSORFLOW_CORE_KERNELS_ROCM_SOLVERS_H_ // This header declares the class ROCmSolver, which contains wrappers of linear // algebra solvers in the cuBlas and cuSolverDN libraries for use in TensorFlow @@ -158,4 +158,4 @@ class ScratchSpace { #endif // TENSORFLOW_USE_ROCM -#endif // TENSORFLOW_CORE_KERNELS_LINALG_ROCM_SOLVERS_H_ +#endif // TENSORFLOW_CORE_KERNELS_ROCM_SOLVERS_H_ diff --git a/tensorflow/core/util/rocm_sparse.cc b/tensorflow/core/kernels/rocm_sparse.cc similarity index 99% rename from tensorflow/core/util/rocm_sparse.cc rename to tensorflow/core/kernels/rocm_sparse.cc index cc7b56fdc01..97488692bc1 100644 --- a/tensorflow/core/util/rocm_sparse.cc +++ b/tensorflow/core/kernels/rocm_sparse.cc @@ -24,6 +24,8 @@ limitations under the License. #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/kernels/cuda_sparse.h" #include "tensorflow/core/lib/core/blocking_counter.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/stringpiece.h" @@ -33,8 +35,6 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" -#include "tensorflow/core/util/cuda_solvers.h" -#include "tensorflow/core/util/cuda_sparse.h" namespace tensorflow { namespace { diff --git a/tensorflow/core/kernels/segment_reduction_ops_impl.h b/tensorflow/core/kernels/segment_reduction_ops_impl.h index 7cf15ef5b72..6c3fad668ae 100644 --- a/tensorflow/core/kernels/segment_reduction_ops_impl.h +++ b/tensorflow/core/kernels/segment_reduction_ops_impl.h @@ -45,13 +45,13 @@ limitations under the License. #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if GOOGLE_CUDA -#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/stream_executor/cuda/cuda_activation.h" using stream_executor::cuda::ScopedActivateExecutorContext; #elif TENSORFLOW_USE_ROCM +#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/platform/rocm.h" -#include "tensorflow/core/util/cuda_solvers.h" using stream_executor::rocm::ScopedActivateExecutorContext; #endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/linalg/self_adjoint_eig_op.cc b/tensorflow/core/kernels/self_adjoint_eig_op.cc similarity index 98% rename from tensorflow/core/kernels/linalg/self_adjoint_eig_op.cc rename to tensorflow/core/kernels/self_adjoint_eig_op.cc index ebf1955b8ff..cea5883db7b 100644 --- a/tensorflow/core/kernels/linalg/self_adjoint_eig_op.cc +++ b/tensorflow/core/kernels/self_adjoint_eig_op.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/logging.h" diff --git a/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex128.cc b/tensorflow/core/kernels/self_adjoint_eig_v2_op_complex128.cc similarity index 93% rename from tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex128.cc rename to tensorflow/core/kernels/self_adjoint_eig_v2_op_complex128.cc index 424c33a7ac1..4c7a391d56c 100644 --- a/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex128.cc +++ b/tensorflow/core/kernels/self_adjoint_eig_v2_op_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h" +#include "tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex64.cc b/tensorflow/core/kernels/self_adjoint_eig_v2_op_complex64.cc similarity index 93% rename from tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex64.cc rename to tensorflow/core/kernels/self_adjoint_eig_v2_op_complex64.cc index bdd20998e3c..0ec5ec24dd1 100644 --- a/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex64.cc +++ b/tensorflow/core/kernels/self_adjoint_eig_v2_op_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h" +#include "tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_double.cc b/tensorflow/core/kernels/self_adjoint_eig_v2_op_double.cc similarity index 92% rename from tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_double.cc rename to tensorflow/core/kernels/self_adjoint_eig_v2_op_double.cc index afc50500d40..7f81bb69021 100644 --- a/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_double.cc +++ b/tensorflow/core/kernels/self_adjoint_eig_v2_op_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h" +#include "tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_float.cc b/tensorflow/core/kernels/self_adjoint_eig_v2_op_float.cc similarity index 92% rename from tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_float.cc rename to tensorflow/core/kernels/self_adjoint_eig_v2_op_float.cc index 1f795777a2e..bf30952d1e7 100644 --- a/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_float.cc +++ b/tensorflow/core/kernels/self_adjoint_eig_v2_op_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h" +#include "tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_gpu.cc b/tensorflow/core/kernels/self_adjoint_eig_v2_op_gpu.cc similarity index 99% rename from tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_gpu.cc rename to tensorflow/core/kernels/self_adjoint_eig_v2_op_gpu.cc index 778c50ff408..3a84df07a9a 100644 --- a/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_gpu.cc +++ b/tensorflow/core/kernels/self_adjoint_eig_v2_op_gpu.cc @@ -26,12 +26,12 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/kernels/cast_op.h" +#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/kernels/cwise_ops.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" -#include "tensorflow/core/util/cuda_solvers.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h b/tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h similarity index 91% rename from tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h rename to tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h index 56f2936a66e..b5274f8788b 100644 --- a/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h +++ b/tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_LINALG_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_LINALG_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ // See docs in ../ops/linalg_ops.cc. @@ -23,7 +23,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/logging.h" @@ -89,4 +89,4 @@ class SelfAdjointEigV2Op : public LinearAlgebraOp { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_LINALG_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/sparse/BUILD b/tensorflow/core/kernels/sparse/BUILD index bfb6c4934bb..1d281bc1d61 100644 --- a/tensorflow/core/kernels/sparse/BUILD +++ b/tensorflow/core/kernels/sparse/BUILD @@ -80,8 +80,8 @@ tf_kernel_library( "//tensorflow/core/kernels:transpose_functor", "//tensorflow/core/kernels:gpu_prim_hdrs", ] + if_cuda_or_rocm([ - "//tensorflow/core/util:cuda_solvers", - "//tensorflow/core/util:cuda_sparse", + "//tensorflow/core/kernels:cuda_solvers", + "//tensorflow/core/kernels:cuda_sparse", ]), alwayslink = 1, ) diff --git a/tensorflow/core/kernels/sparse/add_op.cc b/tensorflow/core/kernels/sparse/add_op.cc index 06fe1cd042e..b6265a1412c 100644 --- a/tensorflow/core/kernels/sparse/add_op.cc +++ b/tensorflow/core/kernels/sparse/add_op.cc @@ -32,8 +32,8 @@ limitations under the License. #include "tensorflow/core/kernels/fill_functor.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/util/cuda_solvers.h" -#include "tensorflow/core/util/cuda_sparse.h" +#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/kernels/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/conj_op.cc b/tensorflow/core/kernels/sparse/conj_op.cc index 147160fbe6c..7275262c1f0 100644 --- a/tensorflow/core/kernels/sparse/conj_op.cc +++ b/tensorflow/core/kernels/sparse/conj_op.cc @@ -32,8 +32,8 @@ limitations under the License. #include "tensorflow/core/kernels/sparse/sparse_matrix.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/util/cuda_solvers.h" -#include "tensorflow/core/util/cuda_sparse.h" +#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/kernels/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc index 2e5afbdcad7..364c2c07bd8 100644 --- a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc +++ b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc @@ -34,8 +34,8 @@ limitations under the License. #include "tensorflow/core/util/work_sharder.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/util/cuda_solvers.h" -#include "tensorflow/core/util/cuda_sparse.h" +#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/kernels/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc index a81ccfa562e..55ebfa4fc10 100644 --- a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc +++ b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc @@ -32,8 +32,8 @@ limitations under the License. #include "tensorflow/core/util/work_sharder.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/util/cuda_solvers.h" -#include "tensorflow/core/util/cuda_sparse.h" +#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/kernels/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc b/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc index 5c62a44f9ba..459bb219343 100644 --- a/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc +++ b/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc @@ -35,8 +35,8 @@ limitations under the License. #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" -#include "tensorflow/core/util/cuda_solvers.h" -#include "tensorflow/core/util/cuda_sparse.h" +#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/kernels/cuda_sparse.h" #endif #if GOOGLE_CUDA diff --git a/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc b/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc index 6b11e64307a..1c014db3d0a 100644 --- a/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc +++ b/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc @@ -20,13 +20,13 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/kernels/cuda_sparse.h" #include "tensorflow/core/kernels/gpu_device_array.h" #include "tensorflow/core/kernels/gpu_device_array_gpu.h" #include "tensorflow/core/kernels/gpu_prim.h" #include "tensorflow/core/kernels/sparse/kernels.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/types.h" -#include "tensorflow/core/util/cuda_sparse.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/mat_mul_op.cc b/tensorflow/core/kernels/sparse/mat_mul_op.cc index bf9de570fbf..50fa0ec88ea 100644 --- a/tensorflow/core/kernels/sparse/mat_mul_op.cc +++ b/tensorflow/core/kernels/sparse/mat_mul_op.cc @@ -37,8 +37,8 @@ limitations under the License. #include "tensorflow/core/platform/threadpool.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/util/cuda_solvers.h" -#include "tensorflow/core/util/cuda_sparse.h" +#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/kernels/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/mul_op.cc b/tensorflow/core/kernels/sparse/mul_op.cc index d08f1568db1..33c3756ce58 100644 --- a/tensorflow/core/kernels/sparse/mul_op.cc +++ b/tensorflow/core/kernels/sparse/mul_op.cc @@ -29,7 +29,7 @@ limitations under the License. #include "tensorflow/core/kernels/sparse/sparse_matrix.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/util/cuda_sparse.h" +#include "tensorflow/core/kernels/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/nnz_op.cc b/tensorflow/core/kernels/sparse/nnz_op.cc index d67620443f0..ebc48c3e9a4 100644 --- a/tensorflow/core/kernels/sparse/nnz_op.cc +++ b/tensorflow/core/kernels/sparse/nnz_op.cc @@ -29,8 +29,8 @@ limitations under the License. #include "tensorflow/core/kernels/sparse/sparse_matrix.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/util/cuda_solvers.h" -#include "tensorflow/core/util/cuda_sparse.h" +#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/kernels/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/softmax_op.cc b/tensorflow/core/kernels/sparse/softmax_op.cc index f1a5db8d0f0..25025bfe2a6 100644 --- a/tensorflow/core/kernels/sparse/softmax_op.cc +++ b/tensorflow/core/kernels/sparse/softmax_op.cc @@ -20,7 +20,7 @@ limitations under the License. #define EIGEN_USE_THREADS #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/util/cuda_sparse.h" +#include "tensorflow/core/kernels/cuda_sparse.h" #define EIGEN_USE_GPU #endif diff --git a/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc b/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc index fecee9e4555..fb652e13d15 100644 --- a/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc +++ b/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc @@ -36,8 +36,8 @@ limitations under the License. #include "tensorflow/core/util/work_sharder.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/util/cuda_solvers.h" -#include "tensorflow/core/util/cuda_sparse.h" +#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/kernels/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc b/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc index 2eaf9bd5310..59540f63846 100644 --- a/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc +++ b/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc @@ -30,8 +30,8 @@ limitations under the License. #include "tensorflow/core/kernels/sparse/sparse_matrix.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/util/cuda_solvers.h" -#include "tensorflow/core/util/cuda_sparse.h" +#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/kernels/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc b/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc index 2548ceaa57c..e1a4b4194d2 100644 --- a/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc +++ b/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc @@ -33,8 +33,8 @@ limitations under the License. #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" -#include "tensorflow/core/util/cuda_solvers.h" -#include "tensorflow/core/util/cuda_sparse.h" +#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/kernels/cuda_sparse.h" #endif #if GOOGLE_CUDA diff --git a/tensorflow/core/kernels/sparse/transpose_op.cc b/tensorflow/core/kernels/sparse/transpose_op.cc index 08d37fa1692..3158eb5016d 100644 --- a/tensorflow/core/kernels/sparse/transpose_op.cc +++ b/tensorflow/core/kernels/sparse/transpose_op.cc @@ -20,7 +20,7 @@ limitations under the License. #define EIGEN_USE_THREADS #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/util/cuda_sparse.h" +#include "tensorflow/core/kernels/cuda_sparse.h" #define EIGEN_USE_GPU #endif diff --git a/tensorflow/core/kernels/linalg/svd_op_complex128.cc b/tensorflow/core/kernels/svd_op_complex128.cc similarity index 93% rename from tensorflow/core/kernels/linalg/svd_op_complex128.cc rename to tensorflow/core/kernels/svd_op_complex128.cc index 36ac629e38a..a0f39418aca 100644 --- a/tensorflow/core/kernels/linalg/svd_op_complex128.cc +++ b/tensorflow/core/kernels/svd_op_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/svd_op_impl.h" +#include "tensorflow/core/kernels/svd_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/svd_op_complex64.cc b/tensorflow/core/kernels/svd_op_complex64.cc similarity index 93% rename from tensorflow/core/kernels/linalg/svd_op_complex64.cc rename to tensorflow/core/kernels/svd_op_complex64.cc index 50d940b534a..a8fd50c67d1 100644 --- a/tensorflow/core/kernels/linalg/svd_op_complex64.cc +++ b/tensorflow/core/kernels/svd_op_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/svd_op_impl.h" +#include "tensorflow/core/kernels/svd_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/svd_op_double.cc b/tensorflow/core/kernels/svd_op_double.cc similarity index 93% rename from tensorflow/core/kernels/linalg/svd_op_double.cc rename to tensorflow/core/kernels/svd_op_double.cc index 85bbe08d8c9..539dae3a081 100644 --- a/tensorflow/core/kernels/linalg/svd_op_double.cc +++ b/tensorflow/core/kernels/svd_op_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/svd_op_impl.h" +#include "tensorflow/core/kernels/svd_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/svd_op_float.cc b/tensorflow/core/kernels/svd_op_float.cc similarity index 93% rename from tensorflow/core/kernels/linalg/svd_op_float.cc rename to tensorflow/core/kernels/svd_op_float.cc index 961d131293b..03839aa49c3 100644 --- a/tensorflow/core/kernels/linalg/svd_op_float.cc +++ b/tensorflow/core/kernels/svd_op_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg/svd_op_impl.h" +#include "tensorflow/core/kernels/svd_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc b/tensorflow/core/kernels/svd_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc rename to tensorflow/core/kernels/svd_op_gpu.cu.cc index 06d1efe6dd5..482fd057e4e 100644 --- a/tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc +++ b/tensorflow/core/kernels/svd_op_gpu.cu.cc @@ -36,14 +36,14 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/linalg/eye_functor.h" -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/kernels/eye_functor.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" -#include "tensorflow/core/util/cuda_solvers.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg/svd_op_impl.h b/tensorflow/core/kernels/svd_op_impl.h similarity index 95% rename from tensorflow/core/kernels/linalg/svd_op_impl.h rename to tensorflow/core/kernels/svd_op_impl.h index c43aaaa4b7b..675826a057c 100644 --- a/tensorflow/core/kernels/linalg/svd_op_impl.h +++ b/tensorflow/core/kernels/svd_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_LINALG_SVD_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_LINALG_SVD_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_SVD_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_SVD_OP_IMPL_H_ // See docs in ../ops/linalg_ops.cc. // @@ -27,7 +27,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -118,4 +118,4 @@ class SvdOp : public LinearAlgebraOp { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_LINALG_SVD_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_SVD_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/linalg/tridiagonal_matmul_op.cc b/tensorflow/core/kernels/tridiagonal_matmul_op.cc similarity index 98% rename from tensorflow/core/kernels/linalg/tridiagonal_matmul_op.cc rename to tensorflow/core/kernels/tridiagonal_matmul_op.cc index 9d17c574148..3ddf22012de 100644 --- a/tensorflow/core/kernels/linalg/tridiagonal_matmul_op.cc +++ b/tensorflow/core/kernels/tridiagonal_matmul_op.cc @@ -19,7 +19,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/core/kernels/linalg/tridiagonal_matmul_op_gpu.cu.cc b/tensorflow/core/kernels/tridiagonal_matmul_op_gpu.cu.cc similarity index 96% rename from tensorflow/core/kernels/linalg/tridiagonal_matmul_op_gpu.cu.cc rename to tensorflow/core/kernels/tridiagonal_matmul_op_gpu.cu.cc index a65db40d822..1c82cc18e32 100644 --- a/tensorflow/core/kernels/linalg/tridiagonal_matmul_op_gpu.cu.cc +++ b/tensorflow/core/kernels/tridiagonal_matmul_op_gpu.cu.cc @@ -22,11 +22,11 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/util/cuda_solvers.h" -#include "tensorflow/core/util/cuda_sparse.h" #include "tensorflow/core/util/gpu_device_functions.h" #include "tensorflow/core/util/gpu_kernel_helper.h" #include "tensorflow/core/util/gpu_launch_config.h" diff --git a/tensorflow/core/kernels/linalg/tridiagonal_solve_op.cc b/tensorflow/core/kernels/tridiagonal_solve_op.cc similarity index 99% rename from tensorflow/core/kernels/linalg/tridiagonal_solve_op.cc rename to tensorflow/core/kernels/tridiagonal_solve_op.cc index 8fe04125f9a..88931ff3e66 100644 --- a/tensorflow/core/kernels/linalg/tridiagonal_solve_op.cc +++ b/tensorflow/core/kernels/tridiagonal_solve_op.cc @@ -19,7 +19,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/core/kernels/linalg/tridiagonal_solve_op_gpu.cu.cc b/tensorflow/core/kernels/tridiagonal_solve_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/linalg/tridiagonal_solve_op_gpu.cu.cc rename to tensorflow/core/kernels/tridiagonal_solve_op_gpu.cu.cc index 86514cfb033..089fa8c040f 100644 --- a/tensorflow/core/kernels/linalg/tridiagonal_solve_op_gpu.cu.cc +++ b/tensorflow/core/kernels/tridiagonal_solve_op_gpu.cu.cc @@ -23,11 +23,11 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" +#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/kernels/linalg_ops_common.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/util/cuda_solvers.h" -#include "tensorflow/core/util/cuda_sparse.h" #include "tensorflow/core/util/gpu_device_functions.h" #include "tensorflow/core/util/gpu_kernel_helper.h" #include "tensorflow/core/util/gpu_launch_config.h" diff --git a/tensorflow/core/kernels/where_op.cc b/tensorflow/core/kernels/where_op.cc index d504ec9b2ed..598cb526d77 100644 --- a/tensorflow/core/kernels/where_op.cc +++ b/tensorflow/core/kernels/where_op.cc @@ -39,7 +39,7 @@ limitations under the License. #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" -#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/kernels/cuda_solvers.h" #if GOOGLE_CUDA #include "tensorflow/stream_executor/cuda/cuda_activation.h" using stream_executor::cuda::ScopedActivateExecutorContext; diff --git a/tensorflow/core/util/BUILD b/tensorflow/core/util/BUILD index d33d239e928..bb2b9ff429e 100644 --- a/tensorflow/core/util/BUILD +++ b/tensorflow/core/util/BUILD @@ -14,7 +14,6 @@ load( "tf_copts", "tf_cuda_library", "tf_cuda_only_cc_test", - "tf_kernel_library", ) load("//tensorflow:tensorflow.bzl", "tf_version_info_genrule") load( @@ -25,11 +24,6 @@ load( "//tensorflow/core/platform:build_config_root.bzl", "if_static", ) -load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") -load( - "@local_config_rocm//rocm:build_defs.bzl", - "if_rocm", -) default_package_visibility = [ "//tensorflow/core:__subpackages__", @@ -573,63 +567,6 @@ cc_library( ], ) -tf_kernel_library( - name = "cuda_solvers", - srcs = ["cuda_solvers.cc"], - hdrs = ["cuda_solvers.h"], - # @local_config_cuda//cuda:cusolver_static, //third_party/eigen3:blas, - # and //third_party/libf2c all contain various parts of BLAS, LAPACK, - # and f2c helper functions in global namespace. Tell the compiler to - # allow multiple definitions when linking this. - linkopts = select({ - "//tensorflow:macos": [], - "//tensorflow:windows": [], - "//conditions:default": ["-Wl,-z,muldefs"], - }), - visibility = ["//tensorflow/core/kernels:friends"], - deps = [ - # "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core/platform/default/build_config:cublas_plugin", - "//tensorflow/stream_executor/cuda:cublas_lib", - "//tensorflow/stream_executor/cuda:cusolver_lib", - ], -) - -tf_kernel_library( - name = "rocm_solvers", - srcs = ["rocm_solvers.cc"], - hdrs = ["rocm_solvers.h"], - visibility = ["//tensorflow/core/kernels:friends"], - deps = [ - # "//tensorflow/core:framework", - # "//tensorflow/core:framework_internal", - "//tensorflow/core:lib", - "//tensorflow/stream_executor/lib", - "//tensorflow/stream_executor/platform:dso_loader", - "//tensorflow/stream_executor/rocm:rocblas_plugin", - "//tensorflow/stream_executor/rocm:rocm_gpu_executor", - ] + if_rocm([ - "@local_config_rocm//rocm:rocprim", - ]), -) - -tf_kernel_library( - name = "cuda_sparse", - srcs = if_cuda(["cuda_sparse.cc"]) + if_rocm(["rocm_sparse.cc"]), - hdrs = ["cuda_sparse.h"], - deps = [ - ":cuda_solvers", - # "//tensorflow/core:framework", - "//tensorflow/core:lib", - ] + if_cuda([ - "//tensorflow/stream_executor/cuda:cusparse_lib", - "@cub_archive//:cub", - ]) + if_rocm([ - "@local_config_rocm//rocm:hipsparse", - ]), -) - # Tests. tf_cc_test( From eb846979a5485a5b5d823d6bb85439180e2d8cdf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 31 Jul 2020 18:23:00 -0700 Subject: [PATCH 1923/2522] Integrate LLVM at llvm/llvm-project@b7cfa6ca9283 Updates LLVM usage to match [b7cfa6ca9283](https://github.com/llvm/llvm-project/commit/b7cfa6ca9283) PiperOrigin-RevId: 324331764 Change-Id: Iec14eaef1ce4e385bf2aebe2d99ecf2c5934b4b6 --- .../mlir-hlo/Dialect/mhlo/transforms/rewriters.h | 3 +-- .../Dialect/mhlo/transforms/lhlo_legalize_to_llvm.cc | 5 ++--- .../mhlo/transforms/lhlo_legalize_to_llvm_pass.cc | 10 ++++------ tensorflow/workspace.bzl | 4 ++-- 4 files changed, 9 insertions(+), 13 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h index e5ca4f727a3..725155e9403 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h @@ -90,8 +90,7 @@ void PopulateTanhToApproximationPatterns(MLIRContext *context, namespace lmhlo { /// Collect a set of patterns to convert from the LHLO dialect to LLVM. -void PopulateLhloToLLVMConversionPatterns(const LowerToLLVMOptions &options, - LLVMTypeConverter *converter, +void PopulateLhloToLLVMConversionPatterns(LLVMTypeConverter *converter, OwningRewritePatternList *patterns); } // namespace lmhlo diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm.cc index 35bbea7ccd4..af64c448ad9 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm.cc @@ -361,11 +361,10 @@ struct ReshapeMemRefCastOpConverter } // namespace -void PopulateLhloToLLVMConversionPatterns(const LowerToLLVMOptions &options, - LLVMTypeConverter *converter, +void PopulateLhloToLLVMConversionPatterns(LLVMTypeConverter *converter, OwningRewritePatternList *patterns) { patterns->insert(*converter, options); + StaticMemRefCastOpConverter>(*converter); } } // namespace lmhlo diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm_pass.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm_pass.cc index 2ed0182319b..00252735023 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm_pass.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm_pass.cc @@ -36,13 +36,11 @@ class TestLhloToLLVMPass ModuleOp m = getOperation(); OwningRewritePatternList patterns; - LLVMTypeConverter converter(m.getContext()); + LLVMTypeConverter converter(&getContext()); populateStdToLLVMConversionPatterns(converter, patterns); - PopulateLhloToLLVMConversionPatterns( - LowerToLLVMOptions::getDefaultOptions(), &converter, &patterns); - mlir::populateLoopToStdConversionPatterns(patterns, &getContext()); - - mlir::populateAffineToStdConversionPatterns(patterns, m.getContext()); + PopulateLhloToLLVMConversionPatterns(&converter, &patterns); + populateLoopToStdConversionPatterns(patterns, &getContext()); + populateAffineToStdConversionPatterns(patterns, &getContext()); ConversionTarget target(getContext()); target.addLegalDialect(); diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index c43d1ea5734..47cc5951579 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -712,8 +712,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "cd4e8d7f6f5ef108919f9f53db35ac73d1edea3d" - LLVM_SHA256 = "671ec1ffb82ad1533ce2c24ef8f4cf73291f3247dc332a70a46623443fb9afde" + LLVM_COMMIT = "b7cfa6ca92830b3c331cb44706bb279996663439" + LLVM_SHA256 = "bad1849f86e5b83571d8a83c849e07dd66c5ddbc01a73432d4fef4da2db21543" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From c4014988b53bc9a027e1e4bee5644921d48f5043 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Fri, 31 Jul 2020 18:33:03 -0700 Subject: [PATCH 1924/2522] Prepare for enabling XLA SPMD by default PiperOrigin-RevId: 324333256 Change-Id: I512917c91d9d8b51cbc1413743904d2ac93de443 --- .../distributed_tpu_rewrite_pass.cc | 43 ++++++++++++++++--- .../distributed_tpu_rewrite_pass.h | 4 +- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc index e7c0c2e04b3..075a1ec9069 100644 --- a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc +++ b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc @@ -1693,6 +1693,7 @@ Status DistributedTPURewritePass::AssignArgsAndRetvalsToCores( const DataTypeVector& retval_types, const std::vector& retval_shapes, const Graph& graph, const Node* replicate_node, FunctionLibraryRuntime* flr, + bool allow_parameter_replication_for_spmd, std::vector* arg_sharding, std::vector* arg_fast_mem, std::vector* retval_sharding, std::vector* arg_names) { @@ -1748,8 +1749,9 @@ Status DistributedTPURewritePass::AssignArgsAndRetvalsToCores( arg_names->resize(args.size()); arg_fast_mem->resize(args.size()); CachedFunctionHandles cached_function_handles(flr); - const bool use_spmd = UseSpmdForXlaPartitioning(replicate_node) || - replicate_inputs_outputs_by_default_for_xla_spmd_; + const bool use_spmd = (UseSpmdForXlaPartitioning(replicate_node) || + replicate_inputs_outputs_by_default_for_xla_spmd_) && + allow_parameter_replication_for_spmd; for (int i = 0; i < args.size(); ++i) { const Node* n = args[i]; absl::optional assigned_core; @@ -1918,6 +1920,23 @@ Status DistributedTPURewritePass::AssignArgsAndRetvalsToCores( retvals[i]->AddAttr(kShardingAttribute, sharding->SerializeAsString()); (*retval_sharding)[i] = *sharding; } + if (use_spmd && + (absl::c_any_of(*arg_sharding, + [](const xla::OpSharding& s) { + return s.type() == xla::OpSharding::MAXIMAL; + }) || + absl::c_any_of(*retval_sharding, [](const xla::OpSharding& s) { + return s.type() == xla::OpSharding::MAXIMAL; + }))) { + LOG(WARNING) << "XLA SPMD only supports cases where all inputs/outputs " + "exist on every partition (sharded or replicated). Fall " + "back to MPMD."; + return AssignArgsAndRetvalsToCores( + num_cores_per_replica, params_info, arg_types, arg_shapes, retval_types, + retval_shapes, graph, replicate_node, flr, + /*allow_parameter_replication_for_spmd=*/false, arg_sharding, + arg_fast_mem, retval_sharding, arg_names); + } return Status::OK(); } @@ -2017,7 +2036,15 @@ Status DistributedTPURewritePass::BuildCompileNode( proto.set_function_library_fingerprint(library_fingerprint); proto.set_enable_automatic_model_parallelism( enable_cross_replica_sharding_mirrored_variables_); - const bool use_spmd = UseSpmdForXlaPartitioning(replicate_node); + const bool use_spmd = + UseSpmdForXlaPartitioning(replicate_node) && allow_xla_spmd_partition_ && + !absl::c_any_of(arg_sharding, + [](const xla::OpSharding& s) { + return s.type() == xla::OpSharding::MAXIMAL; + }) && + !absl::c_any_of(retval_sharding, [](const xla::OpSharding& s) { + return s.type() == xla::OpSharding::MAXIMAL; + }); proto.set_use_spmd_for_xla_partitioning(use_spmd); // Get and fill padding map. @@ -3821,8 +3848,9 @@ Status DistributedTPURewritePass::FingerprintFunctionLibrary( std::vector retval_sharding; TF_RETURN_IF_ERROR(AssignArgsAndRetvalsToCores( num_cores_per_replica, params_info, arg_types, arg_shapes, retval_types, - retval_shapes, *computation, replicate_node, flr, &arg_sharding, - &arg_fast_mem, &retval_sharding, &arg_names)); + retval_shapes, *computation, replicate_node, flr, + allow_xla_spmd_partition_, &arg_sharding, &arg_fast_mem, &retval_sharding, + &arg_names)); VLOG(1) << DumpGraphToFile("distributed_tpu_graph_to_replicate", *computation, flib_def); @@ -4090,6 +4118,7 @@ Status DistributedTPURewritePass::Run( } bool DistributedTPURewritePass::distribute_vars_ = false; +bool DistributedTPURewritePass::allow_xla_spmd_partition_ = true; bool DistributedTPURewritePass:: replicate_inputs_outputs_by_default_for_xla_spmd_ = false; bool DistributedTPURewritePass:: @@ -4097,10 +4126,12 @@ bool DistributedTPURewritePass:: bool DistributedTPURewritePass::enable_automatic_model_parallelism_ = false; /*static*/ void DistributedTPURewritePass::SetDistributedTpuRewritePassOptions( - bool distribute_vars, bool replicate_inputs_outputs_by_default_for_xla_spmd, + bool distribute_vars, bool allow_xla_spmd_partition, + bool replicate_inputs_outputs_by_default_for_xla_spmd, bool enable_cross_replica_sharding_mirrored_variables, bool enable_automatic_model_parallelism) { distribute_vars_ = distribute_vars; + allow_xla_spmd_partition_ = allow_xla_spmd_partition; replicate_inputs_outputs_by_default_for_xla_spmd_ = replicate_inputs_outputs_by_default_for_xla_spmd; enable_cross_replica_sharding_mirrored_variables_ = diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h index ac1a3c38690..1931b4ac80f 100644 --- a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h +++ b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h @@ -129,7 +129,7 @@ namespace tensorflow { class DistributedTPURewritePass : public GraphOptimizationPass { public: static void SetDistributedTpuRewritePassOptions( - bool distribute_vars, + bool distribute_vars, bool allow_xla_spmd_partition, bool replicate_inputs_outputs_by_default_for_xla_spmd, bool enable_cross_replica_sharding_mirrored_variables, bool enable_automatic_model_parallelism); @@ -308,6 +308,7 @@ class DistributedTPURewritePass : public GraphOptimizationPass { const DataTypeVector& retval_types, const std::vector& retval_shapes, const Graph& graph, const Node* replicate_node, FunctionLibraryRuntime* flr, + bool allow_parameter_replication_for_spmd, std::vector<::xla::OpSharding>* arg_sharding, std::vector* arg_fast_mem, std::vector<::xla::OpSharding>* retval_sharding, @@ -581,6 +582,7 @@ class DistributedTPURewritePass : public GraphOptimizationPass { private: static bool distribute_vars_; + static bool allow_xla_spmd_partition_; static bool replicate_inputs_outputs_by_default_for_xla_spmd_; static bool enable_cross_replica_sharding_mirrored_variables_; static bool enable_automatic_model_parallelism_; From d93ce2d383f188aeae2e603f23c6d65894a9dc69 Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Fri, 31 Jul 2020 18:44:35 -0700 Subject: [PATCH 1925/2522] Remove @test_util.deprecated_graph_mode_only in lookup_ops_test.py PiperOrigin-RevId: 324335058 Change-Id: Idad283d371fbdb6e0018c5e90ec97bbb1e91a6bb --- .../python/kernel_tests/lookup_ops_test.py | 394 +++++++++--------- 1 file changed, 186 insertions(+), 208 deletions(-) diff --git a/tensorflow/python/kernel_tests/lookup_ops_test.py b/tensorflow/python/kernel_tests/lookup_ops_test.py index a5446d8977a..a1ded4a9e3b 100644 --- a/tensorflow/python/kernel_tests/lookup_ops_test.py +++ b/tensorflow/python/kernel_tests/lookup_ops_test.py @@ -2430,170 +2430,156 @@ class IdTableWithHashBucketsTest(test.TestCase): f.write("\n".join(values) + "\n") return vocabulary_file - @test_util.run_deprecated_v1 def testStringIdTableWithHashBuckets(self): vocab_file = self._createVocabFile("feat_to_id_1.txt") - with self.cached_session(): - default_value = -1 - vocab_size = 3 - oov_buckets = 1 - table = lookup_ops.IdTableWithHashBuckets( - lookup_ops.StaticHashTable( - lookup_ops.TextFileIdTableInitializer( - vocab_file, vocab_size=vocab_size), default_value), - oov_buckets) + default_value = -1 + vocab_size = 3 + oov_buckets = 1 + table = lookup_ops.IdTableWithHashBuckets( + lookup_ops.StaticHashTable( + lookup_ops.TextFileIdTableInitializer( + vocab_file, vocab_size=vocab_size), default_value), + oov_buckets) - self.evaluate(table.initializer) + self.evaluate(table.initializer) - input_string = constant_op.constant(["brain", "salad", "surgery", "UNK"]) + input_string = constant_op.constant(["brain", "salad", "surgery", "UNK"]) - out = table.lookup(input_string) - self.assertAllEqual([0, 1, 2, 3], self.evaluate(out)) - self.assertEqual(vocab_size + oov_buckets, table.size().eval()) + out = table.lookup(input_string) + self.assertAllEqual([0, 1, 2, 3], self.evaluate(out)) + self.assertEqual(vocab_size + oov_buckets, self.evaluate(table.size())) - @test_util.run_deprecated_v1 def testInt32IdTableWithHashBuckets(self): vocab_file = self._createVocabFile("feat_to_id_2.txt", ("42", "1", "-1000")) - with self.cached_session(): - default_value = -1 - vocab_size = 3 - oov_buckets = 1 - table = lookup_ops.IdTableWithHashBuckets( - lookup_ops.StaticHashTable( - lookup_ops.TextFileIdTableInitializer( - vocab_file, vocab_size=vocab_size, key_dtype=dtypes.int64), - default_value), - oov_buckets, - key_dtype=dtypes.int32) + default_value = -1 + vocab_size = 3 + oov_buckets = 1 + table = lookup_ops.IdTableWithHashBuckets( + lookup_ops.StaticHashTable( + lookup_ops.TextFileIdTableInitializer( + vocab_file, vocab_size=vocab_size, key_dtype=dtypes.int64), + default_value), + oov_buckets, + key_dtype=dtypes.int32) - self.evaluate(table.initializer) + self.evaluate(table.initializer) - values = constant_op.constant((42, 1, -1000, 11), dtype=dtypes.int32) + values = constant_op.constant((42, 1, -1000, 11), dtype=dtypes.int32) - out = table.lookup(values) - self.assertAllEqual([0, 1, 2, 3], self.evaluate(out)) - self.assertEqual(vocab_size + oov_buckets, table.size().eval()) + out = table.lookup(values) + self.assertAllEqual([0, 1, 2, 3], self.evaluate(out)) + self.assertEqual(vocab_size + oov_buckets, self.evaluate(table.size())) - @test_util.run_deprecated_v1 def testInt64IdTableWithHashBuckets(self): vocab_file = self._createVocabFile("feat_to_id_3.txt", ("42", "1", "-1000")) - with self.cached_session(): - default_value = -1 - vocab_size = 3 - oov_buckets = 1 - table = lookup_ops.IdTableWithHashBuckets( - lookup_ops.StaticHashTable( - lookup_ops.TextFileIdTableInitializer( - vocab_file, vocab_size=vocab_size, key_dtype=dtypes.int64), - default_value), oov_buckets) + default_value = -1 + vocab_size = 3 + oov_buckets = 1 + table = lookup_ops.IdTableWithHashBuckets( + lookup_ops.StaticHashTable( + lookup_ops.TextFileIdTableInitializer( + vocab_file, vocab_size=vocab_size, key_dtype=dtypes.int64), + default_value), oov_buckets) - self.evaluate(table.initializer) + self.evaluate(table.initializer) - values = constant_op.constant((42, 1, -1000, 11), dtype=dtypes.int64) + values = constant_op.constant((42, 1, -1000, 11), dtype=dtypes.int64) - out = table.lookup(values) - self.assertAllEqual([0, 1, 2, 3], self.evaluate(out)) - self.assertEqual(vocab_size + oov_buckets, table.size().eval()) + out = table.lookup(values) + self.assertAllEqual([0, 1, 2, 3], self.evaluate(out)) + self.assertEqual(vocab_size + oov_buckets, self.evaluate(table.size())) - @test_util.run_deprecated_v1 def testStringIdTableWithOnlyHashBucket(self): - with self.cached_session(): - oov_buckets = 5 + oov_buckets = 5 - # Set a table that only uses hash buckets, for each input value returns - # an id calculated by fingerprint("input") mod oov_buckets. - table = lookup_ops.IdTableWithHashBuckets(None, oov_buckets) - self.evaluate(table.initializer) + # Set a table that only uses hash buckets, for each input value returns + # an id calculated by fingerprint("input") mod oov_buckets. + table = lookup_ops.IdTableWithHashBuckets(None, oov_buckets) + self.evaluate(table.initializer) - values = constant_op.constant(("brain", "salad", "surgery")) + values = constant_op.constant(("brain", "salad", "surgery")) - out = table.lookup(values) - self.assertAllEqual( - [ - 3, # fingerprint("brain") mod 5. - 1, # fingerprint("salad") mod 5. - 4 # fingerprint("surgery") mod 5 - ], - self.evaluate(out)) - self.assertEqual(oov_buckets, table.size().eval()) + out = table.lookup(values) + self.assertAllEqual( + [ + 3, # fingerprint("brain") mod 5. + 1, # fingerprint("salad") mod 5. + 4 # fingerprint("surgery") mod 5 + ], + self.evaluate(out)) + self.assertEqual(oov_buckets, self.evaluate(table.size())) - @test_util.run_deprecated_v1 def testInt32IdTableWithOnlyHashBucket(self): - with self.cached_session(): - oov_buckets = 5 + oov_buckets = 5 - # Set a table that only uses hash buckets, for each input value returns - # an id calculated by fingerprint("input") mod oov_buckets. - table = lookup_ops.IdTableWithHashBuckets( - None, oov_buckets, key_dtype=dtypes.int32) - self.evaluate(table.initializer) + # Set a table that only uses hash buckets, for each input value returns + # an id calculated by fingerprint("input") mod oov_buckets. + table = lookup_ops.IdTableWithHashBuckets( + None, oov_buckets, key_dtype=dtypes.int32) + self.evaluate(table.initializer) - input_string = constant_op.constant([42, 1, -1000], dtype=dtypes.int32) + input_string = constant_op.constant([42, 1, -1000], dtype=dtypes.int32) - out = table.lookup(input_string) - self.assertAllEqual( - [ - 1, # fingerprint("42") mod 5. - 4, # fingerprint("1") mod 5. - 2 # fingerprint("-1000") mod 5 - ], - self.evaluate(out)) - self.assertEqual(oov_buckets, table.size().eval()) + out = table.lookup(input_string) + self.assertAllEqual( + [ + 1, # fingerprint("42") mod 5. + 4, # fingerprint("1") mod 5. + 2 # fingerprint("-1000") mod 5 + ], + self.evaluate(out)) + self.assertEqual(oov_buckets, self.evaluate(table.size())) def testFloat64IdTableWithOnlyHashBucket(self): - with self.cached_session(): - with self.assertRaisesRegex(TypeError, "Invalid key_dtype"): - lookup_ops.IdTableWithHashBuckets( - None, num_oov_buckets=5, key_dtype=dtypes.float64) + with self.assertRaisesRegex(TypeError, "Invalid key_dtype"): + lookup_ops.IdTableWithHashBuckets( + None, num_oov_buckets=5, key_dtype=dtypes.float64) def testBoolIdTableWithOnlyHashBucket(self): - with self.cached_session(): - with self.assertRaisesRegex(TypeError, "Invalid key_dtype"): - lookup_ops.IdTableWithHashBuckets( - None, num_oov_buckets=5, key_dtype=dtypes.bool) + with self.assertRaisesRegex(TypeError, "Invalid key_dtype"): + lookup_ops.IdTableWithHashBuckets( + None, num_oov_buckets=5, key_dtype=dtypes.bool) - @test_util.run_deprecated_v1 def testIdTableWithHashBucketsWithMultipleInitializers(self): vocab_file = self._createVocabFile("feat_to_id_4.txt") - with self.cached_session() as sess: - default_value = -1 - vocab_size = 3 - oov_buckets = 3 + default_value = -1 + vocab_size = 3 + oov_buckets = 3 - vocab_table = lookup_ops.StaticHashTable( - lookup_ops.TextFileIdTableInitializer( - vocab_file, vocab_size=vocab_size), default_value) - table1 = lookup_ops.IdTableWithHashBuckets( - vocab_table, - oov_buckets, - hasher_spec=lookup_ops.FastHashSpec, - name="table1") + vocab_table = lookup_ops.StaticHashTable( + lookup_ops.TextFileIdTableInitializer( + vocab_file, vocab_size=vocab_size), default_value) + table1 = lookup_ops.IdTableWithHashBuckets( + vocab_table, + oov_buckets, + hasher_spec=lookup_ops.FastHashSpec, + name="table1") - table2 = lookup_ops.IdTableWithHashBuckets( - vocab_table, - oov_buckets, - hasher_spec=lookup_ops.StrongHashSpec((1, 2)), - name="table2") + table2 = lookup_ops.IdTableWithHashBuckets( + vocab_table, + oov_buckets, + hasher_spec=lookup_ops.StrongHashSpec((1, 2)), + name="table2") - lookup_ops.tables_initializer().run() + self.evaluate(lookup_ops.tables_initializer()) - input_string = constant_op.constant( - ["fruit", "brain", "salad", "surgery", "UNK"]) + input_string = constant_op.constant( + ["fruit", "brain", "salad", "surgery", "UNK"]) - out1 = table1.lookup(input_string) - out2 = table2.lookup(input_string) + out1 = table1.lookup(input_string) + out2 = table2.lookup(input_string) - out1, out2 = self.evaluate([out1, out2]) - self.assertAllEqual([5, 0, 1, 2, 5], out1) - self.assertAllEqual([5, 0, 1, 2, 3], out2) - self.assertEqual(vocab_size + oov_buckets, table1.size().eval()) - self.assertEqual(vocab_size + oov_buckets, table2.size().eval()) + out1, out2 = self.evaluate([out1, out2]) + self.assertAllEqual([5, 0, 1, 2, 5], out1) + self.assertAllEqual([5, 0, 1, 2, 3], out2) + self.assertEqual(vocab_size + oov_buckets, self.evaluate(table1.size())) + self.assertEqual(vocab_size + oov_buckets, self.evaluate(table2.size())) + if not context.executing_eagerly(): test_util.assert_ops_in_graph({ "table1_Lookup/hash_bucket": "StringToHashBucketFast", "table2_Lookup/hash_bucket": "StringToHashBucketStrong", - }, sess.graph) + }, ops.get_default_graph()) - @test_util.run_deprecated_v1 def testIdTableWithHashBucketsInitializationAcrossSessions(self): vocab_file = self._createVocabFile("feat_to_id_5.txt") with self.cached_session(): @@ -2614,7 +2600,7 @@ class IdTableWithHashBucketsTest(test.TestCase): out1 = table1.lookup(input_string_1) self.assertAllEqual([0, 1, 2, 3], self.evaluate(out1)) - self.assertEqual(vocab_size + oov_buckets, table1.size().eval()) + self.assertEqual(vocab_size + oov_buckets, self.evaluate(table1.size())) with self.cached_session(): default_value = -1 @@ -2634,129 +2620,121 @@ class IdTableWithHashBucketsTest(test.TestCase): out2 = table2.lookup(input_string_2) self.assertAllEqual([3, 1, 3], self.evaluate(out2)) - self.assertEqual(vocab_size + oov_buckets, table2.size().eval()) + self.assertEqual(vocab_size + oov_buckets, self.evaluate(table2.size())) - @test_util.run_deprecated_v1 def testIdTableWithHashBucketsWithMultipleInitializersDifferentDefault(self): vocab_file = self._createVocabFile("feat_to_id_6.txt") - with self.cached_session() as sess: - default_value1 = -1 - vocab_size = 3 - oov_buckets = 0 - table1 = lookup_ops.IdTableWithHashBuckets( - lookup_ops.StaticHashTable( - lookup_ops.TextFileIdTableInitializer( - vocab_file, vocab_size=vocab_size), default_value1), - oov_buckets) + default_value1 = -1 + vocab_size = 3 + oov_buckets = 0 + table1 = lookup_ops.IdTableWithHashBuckets( + lookup_ops.StaticHashTable( + lookup_ops.TextFileIdTableInitializer( + vocab_file, vocab_size=vocab_size), default_value1), + oov_buckets) - default_value2 = -2 - table2 = lookup_ops.IdTableWithHashBuckets( - lookup_ops.StaticHashTable( - lookup_ops.TextFileIdTableInitializer( - vocab_file, vocab_size=vocab_size), default_value2), - oov_buckets) + default_value2 = -2 + table2 = lookup_ops.IdTableWithHashBuckets( + lookup_ops.StaticHashTable( + lookup_ops.TextFileIdTableInitializer( + vocab_file, vocab_size=vocab_size), default_value2), + oov_buckets) - lookup_ops.tables_initializer().run() + self.evaluate(lookup_ops.tables_initializer()) - input_string_1 = constant_op.constant( - ["brain", "salad", "surgery", "UNK"]) - input_string_2 = constant_op.constant(["fruit", "salad", "UNK"]) + input_string_1 = constant_op.constant( + ["brain", "salad", "surgery", "UNK"]) + input_string_2 = constant_op.constant(["fruit", "salad", "UNK"]) - out1 = table1.lookup(input_string_1) - out2 = table2.lookup(input_string_2) + out1 = table1.lookup(input_string_1) + out2 = table2.lookup(input_string_2) - out1, out2 = self.evaluate([out1, out2]) - self.assertAllEqual([0, 1, 2, -1], out1) - self.assertAllEqual([-2, 1, -2], out2) - self.assertEqual(vocab_size + oov_buckets, table1.size().eval()) - self.assertEqual(vocab_size + oov_buckets, table2.size().eval()) + out1, out2 = self.evaluate([out1, out2]) + self.assertAllEqual([0, 1, 2, -1], out1) + self.assertAllEqual([-2, 1, -2], out2) + self.assertEqual(vocab_size + oov_buckets, self.evaluate(table1.size())) + self.assertEqual(vocab_size + oov_buckets, self.evaluate(table2.size())) - @test_util.run_deprecated_v1 def testSparseTensor(self): vocab_file = self._createVocabFile("feat_to_id_7.txt") input_indices = [[0, 0], [0, 1], [2, 0], [2, 2], [3, 0]] input_shape = [4, 4] - with self.cached_session() as sess: - sp_features = sparse_tensor.SparseTensor( - constant_op.constant(input_indices, dtypes.int64), - constant_op.constant(["brain", "salad", "brain", "surgery", "tarkus"], - dtypes.string), - constant_op.constant(input_shape, dtypes.int64)) + sp_features = sparse_tensor.SparseTensor( + constant_op.constant(input_indices, dtypes.int64), + constant_op.constant(["brain", "salad", "brain", "surgery", "tarkus"], + dtypes.string), + constant_op.constant(input_shape, dtypes.int64)) - table = lookup_ops.IdTableWithHashBuckets( - lookup_ops.StaticHashTable( - lookup_ops.TextFileIdTableInitializer(vocab_file, vocab_size=3), - -1), 1) - self.evaluate(table.initializer) + table = lookup_ops.IdTableWithHashBuckets( + lookup_ops.StaticHashTable( + lookup_ops.TextFileIdTableInitializer(vocab_file, vocab_size=3), + -1), 1) + self.evaluate(table.initializer) - sp_ids = table.lookup(sp_features) + sp_ids = table.lookup(sp_features) - self.assertAllEqual([5], sp_ids.values._shape_as_list()) + self.assertAllEqual([5], sp_ids.values._shape_as_list()) - sp_ids_ind, sp_ids_val, sp_ids_shape = sess.run( - [sp_ids.indices, sp_ids.values, sp_ids.dense_shape]) + sp_ids_ind, sp_ids_val, sp_ids_shape = self.evaluate( + [sp_ids.indices, sp_ids.values, sp_ids.dense_shape]) - self.assertAllEqual(input_indices, sp_ids_ind) - self.assertAllEqual([0, 1, 0, 2, 3], sp_ids_val) - self.assertAllEqual(input_shape, sp_ids_shape) + self.assertAllEqual(input_indices, sp_ids_ind) + self.assertAllEqual([0, 1, 0, 2, 3], sp_ids_val) + self.assertAllEqual(input_shape, sp_ids_shape) - @test_util.run_deprecated_v1 def testInt32SparseTensor(self): input_indices = [[0, 0], [0, 1], [2, 0], [2, 2], [3, 0]] input_shape = [4, 4] - with self.cached_session() as sess: - sp_features = sparse_tensor.SparseTensor( - constant_op.constant(input_indices, dtypes.int64), - constant_op.constant([42, 1, 42, -1000, 11], dtypes.int32), - constant_op.constant(input_shape, dtypes.int64)) + sp_features = sparse_tensor.SparseTensor( + constant_op.constant(input_indices, dtypes.int64), + constant_op.constant([42, 1, 42, -1000, 11], dtypes.int32), + constant_op.constant(input_shape, dtypes.int64)) - table = lookup_ops.IdTableWithHashBuckets( - lookup_ops.StaticHashTable( - lookup_ops.KeyValueTensorInitializer( - (42, 1, -1000), (0, 1, 2), dtypes.int64, dtypes.int64), -1), - 1, - key_dtype=dtypes.int32) - self.evaluate(table.initializer) + table = lookup_ops.IdTableWithHashBuckets( + lookup_ops.StaticHashTable( + lookup_ops.KeyValueTensorInitializer( + (42, 1, -1000), (0, 1, 2), dtypes.int64, dtypes.int64), -1), + 1, + key_dtype=dtypes.int32) + self.evaluate(table.initializer) - sp_ids = table.lookup(sp_features) + sp_ids = table.lookup(sp_features) - self.assertAllEqual([5], sp_ids.values._shape_as_list()) + self.assertAllEqual([5], sp_ids.values._shape_as_list()) - sp_ids_ind, sp_ids_val, sp_ids_shape = sess.run( - [sp_ids.indices, sp_ids.values, sp_ids.dense_shape]) + sp_ids_ind, sp_ids_val, sp_ids_shape = self.evaluate( + [sp_ids.indices, sp_ids.values, sp_ids.dense_shape]) - self.assertAllEqual(input_indices, sp_ids_ind) - self.assertAllEqual([0, 1, 0, 2, 3], sp_ids_val) - self.assertAllEqual(input_shape, sp_ids_shape) + self.assertAllEqual(input_indices, sp_ids_ind) + self.assertAllEqual([0, 1, 0, 2, 3], sp_ids_val) + self.assertAllEqual(input_shape, sp_ids_shape) - @test_util.run_deprecated_v1 def testInt64SparseTensor(self): input_indices = [[0, 0], [0, 1], [2, 0], [2, 2], [3, 0]] input_shape = [4, 4] - with self.cached_session() as sess: - sp_features = sparse_tensor.SparseTensor( - constant_op.constant(input_indices, dtypes.int64), - constant_op.constant([42, 1, 42, -1000, 11], dtypes.int64), - constant_op.constant(input_shape, dtypes.int64)) + sp_features = sparse_tensor.SparseTensor( + constant_op.constant(input_indices, dtypes.int64), + constant_op.constant([42, 1, 42, -1000, 11], dtypes.int64), + constant_op.constant(input_shape, dtypes.int64)) - table = lookup_ops.IdTableWithHashBuckets( - lookup_ops.StaticHashTable( - lookup_ops.KeyValueTensorInitializer( - (42, 1, -1000), (0, 1, 2), dtypes.int64, dtypes.int64), -1), - 1, - key_dtype=dtypes.int64) - self.evaluate(table.initializer) + table = lookup_ops.IdTableWithHashBuckets( + lookup_ops.StaticHashTable( + lookup_ops.KeyValueTensorInitializer( + (42, 1, -1000), (0, 1, 2), dtypes.int64, dtypes.int64), -1), + 1, + key_dtype=dtypes.int64) + self.evaluate(table.initializer) - sp_ids = table.lookup(sp_features) + sp_ids = table.lookup(sp_features) - self.assertAllEqual([5], sp_ids.values._shape_as_list()) + self.assertAllEqual([5], sp_ids.values._shape_as_list()) - sp_ids_ind, sp_ids_val, sp_ids_shape = sess.run( - [sp_ids.indices, sp_ids.values, sp_ids.dense_shape]) + sp_ids_ind, sp_ids_val, sp_ids_shape = self.evaluate( + [sp_ids.indices, sp_ids.values, sp_ids.dense_shape]) - self.assertAllEqual(input_indices, sp_ids_ind) - self.assertAllEqual([0, 1, 0, 2, 3], sp_ids_val) - self.assertAllEqual(input_shape, sp_ids_shape) + self.assertAllEqual(input_indices, sp_ids_ind) + self.assertAllEqual([0, 1, 0, 2, 3], sp_ids_val) + self.assertAllEqual(input_shape, sp_ids_shape) def testIdTableWithHashBucketsWithInvalidHashers(self): vocab_file = self._createVocabFile("feat_to_id_4.txt") From d5f7397a1accf4f090181f7284a641e6c56ccb31 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Fri, 31 Jul 2020 18:56:51 -0700 Subject: [PATCH 1926/2522] Added possibility to use BHWC tensor method Batch with HWC tensor. PiperOrigin-RevId: 324336963 Change-Id: Ic4333069ac41c1a61c77525200453f0053628128 --- tensorflow/lite/delegates/gpu/cl/tensor_type.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/delegates/gpu/cl/tensor_type.cc b/tensorflow/lite/delegates/gpu/cl/tensor_type.cc index d8455648907..e19de02d59d 100644 --- a/tensorflow/lite/delegates/gpu/cl/tensor_type.cc +++ b/tensorflow/lite/delegates/gpu/cl/tensor_type.cc @@ -162,7 +162,11 @@ absl::Status TensorDescriptor::PerformSelector( *result = "channels"; return absl::OkStatus(); } else if (selector == "Batch") { - *result = "batch"; + if (HasAxis(Axis::BATCH)) { + *result = "batch"; + } else { + *result = "1"; + } return absl::OkStatus(); } else if (selector == "Depth") { *result = "depth"; From 024a8354e5257f01a9a5a7b30e3cd96be0a16e54 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Fri, 31 Jul 2020 18:58:18 -0700 Subject: [PATCH 1927/2522] AddAttributes and MultiplyAttributes replaced with ElementwiseAttributes. PiperOrigin-RevId: 324337337 Change-Id: I57ad0a5c4cff387f24b4375751a805ae939ddbb6 --- .../gpu/cl/selectors/operation_selector.cc | 5 ++-- .../delegates/gpu/common/model_builder.cc | 4 +-- .../lite/delegates/gpu/common/operations.h | 8 ------ .../add_quant_adjustments_test.cc | 4 +-- .../transformations/fuse_add_to_conv.cc | 14 +++++----- .../common/transformations/fuse_add_to_conv.h | 8 +++--- .../transformations/fuse_add_to_conv_test.cc | 10 +++---- .../transformations/fuse_mul_to_conv.cc | 26 ++++++++++--------- .../common/transformations/fuse_mul_to_conv.h | 20 ++++++++------ .../transformations/fuse_mul_to_conv_test.cc | 20 +++++++------- .../transformations/merge_padding_with.cc | 4 +-- .../merge_padding_with_test.cc | 4 +-- .../gpu/common/transformations/remove_noop.cc | 4 +-- .../transformations/remove_noop_test.cc | 8 +++--- .../lite/delegates/gpu/gl/kernels/add.cc | 3 ++- .../lite/delegates/gpu/gl/kernels/add_test.cc | 8 +++--- .../lite/delegates/gpu/gl/kernels/mul.cc | 2 +- .../lite/delegates/gpu/gl/kernels/mul_test.cc | 4 +-- tensorflow/lite/delegates/gpu/metal/api.cc | 5 ++-- .../delegates/gpu/metal/kernels/add_test.mm | 8 +++--- 20 files changed, 85 insertions(+), 84 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc index 6134cd48bd7..ffe9acb8299 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc @@ -156,7 +156,8 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, SelectAdd(op_def, channels, output->tensor.shape.c, gpu_op); return absl::OkStatus(); } else if (inputs.size() == 1 && node.operation.attributes.has_value()) { - auto attr = absl::any_cast(node.operation.attributes); + auto attr = + absl::any_cast(node.operation.attributes); const float* scalar = absl::get_if(&attr.param); const auto* linear_tensor = absl::get_if>( @@ -293,7 +294,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, return absl::OkStatus(); } else if (inputs.size() == 1 && node.operation.attributes.has_value()) { auto attr = - absl::any_cast(node.operation.attributes); + absl::any_cast(node.operation.attributes); const float* scalar = absl::get_if(&attr.param); const auto* linear_tensor = absl::get_if>( diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.cc b/tensorflow/lite/delegates/gpu/common/model_builder.cc index 7385a68c0d6..bf24e0d9eff 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc @@ -376,7 +376,7 @@ class AddOperationParser : public TFLiteOperationParser { Node* node = graph->NewNode(); node->operation.type = ToString(OperationType::ADD); RETURN_IF_ERROR(reader->AddOutputs(node)); - AddAttributes attr; + ElementwiseAttributes attr; RETURN_IF_ERROR(ParseInputsWithConstTensor(node, reader, &attr.param)); node->operation.attributes = std::move(attr); const TfLiteAddParams* tf_options; @@ -1255,7 +1255,7 @@ class MulOperationParser : public TFLiteOperationParser { const TfLiteIntArray* constant_dims, GraphFloat32* graph, ObjectReader* reader) { RETURN_IF_ERROR(reader->AddInput(node, runtime_tensor)); - MultiplyAttributes attr; + ElementwiseAttributes attr; if (constant_dims->size <= 0 || NumElements(constant_dims) == 1) { Tensor tensor; RETURN_IF_ERROR(reader->ReadTensor(constant_tensor, &tensor)); diff --git a/tensorflow/lite/delegates/gpu/common/operations.h b/tensorflow/lite/delegates/gpu/common/operations.h index 5e0f08c143d..fcce6532c1d 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.h +++ b/tensorflow/lite/delegates/gpu/common/operations.h @@ -370,10 +370,6 @@ struct LstmAttributes { LstmKernelType kernel_type = LstmKernelType::BASIC; }; -struct MultiplyAttributes { - TensorOrScalar param; -}; - enum class SamplingType { UNKNOWN = 0, NEAREST = 1, @@ -478,10 +474,6 @@ struct Slice3DAttributes { // input. BHWDC CalculateOutputShape(const BHWDC& input, const Slice3DAttributes& attr); -struct AddAttributes { - TensorOrScalar param; -}; - struct FullyConnectedAttributes { Tensor weights; Tensor bias; diff --git a/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments_test.cc b/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments_test.cc index ef75b5bb23b..2ff84981f9d 100644 --- a/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments_test.cc +++ b/tensorflow/lite/delegates/gpu/common/transformations/add_quant_adjustments_test.cc @@ -51,7 +51,7 @@ TEST(AddQuantAdjustments, OneNode) { Tensor add_tensor; add_tensor.shape = Linear(8); add_tensor.data.resize(8); - AddAttributes add_attr; + ElementwiseAttributes add_attr; add_attr.param = add_tensor; auto add_node = graph.NewNode(); add_node->operation.type = ToString(OperationType::ADD); @@ -95,7 +95,7 @@ TEST(AddQuantAdjustments, GeneralCase) { Tensor add_tensor; add_tensor.shape = Linear(8); add_tensor.data.resize(8); - AddAttributes add_attr; + ElementwiseAttributes add_attr; add_attr.param = add_tensor; auto add1_node = graph.NewNode(); add1_node->operation.type = ToString(OperationType::ADD); diff --git a/tensorflow/lite/delegates/gpu/common/transformations/fuse_add_to_conv.cc b/tensorflow/lite/delegates/gpu/common/transformations/fuse_add_to_conv.cc index adee86e4a64..fdbd6e03755 100644 --- a/tensorflow/lite/delegates/gpu/common/transformations/fuse_add_to_conv.cc +++ b/tensorflow/lite/delegates/gpu/common/transformations/fuse_add_to_conv.cc @@ -22,7 +22,7 @@ namespace tflite { namespace gpu { namespace { -void FuseBiasWithAddAttributes(const AddAttributes& add_attr, +void FuseBiasWithAddAttributes(const ElementwiseAttributes& add_attr, const int channels, Tensor* bias) { auto add = absl::get_if>(&add_attr.param); @@ -46,8 +46,8 @@ class MergeConvolutionWithAdd : public SequenceTransformation { if (add_node.operation.type != ToString(OperationType::ADD)) { return {TransformStatus::SKIPPED, ""}; } - AddAttributes add_attr = - absl::any_cast(add_node.operation.attributes); + ElementwiseAttributes add_attr = + absl::any_cast(add_node.operation.attributes); if (!absl::holds_alternative>( add_attr.param) && !absl::holds_alternative(add_attr.param)) { @@ -98,23 +98,23 @@ std::unique_ptr NewMergeConvolutionWithAdd() { return absl::make_unique(); } -void FuseConvolution2DWithAdd(const AddAttributes& add_attr, +void FuseConvolution2DWithAdd(const ElementwiseAttributes& add_attr, Convolution2DAttributes* attr) { FuseBiasWithAddAttributes(add_attr, attr->weights.shape.o, &attr->bias); } -void FuseDepthwiseConvolution2DWithAdd(const AddAttributes& add_attr, +void FuseDepthwiseConvolution2DWithAdd(const ElementwiseAttributes& add_attr, DepthwiseConvolution2DAttributes* attr) { FuseBiasWithAddAttributes( add_attr, attr->weights.shape.o * attr->weights.shape.i, &attr->bias); } -void FuseConvolutionTransposedWithAdd(const AddAttributes& add_attr, +void FuseConvolutionTransposedWithAdd(const ElementwiseAttributes& add_attr, ConvolutionTransposedAttributes* attr) { FuseBiasWithAddAttributes(add_attr, attr->weights.shape.o, &attr->bias); } -void FuseFullyConnectedWithAdd(const AddAttributes& add_attr, +void FuseFullyConnectedWithAdd(const ElementwiseAttributes& add_attr, FullyConnectedAttributes* attr) { FuseBiasWithAddAttributes(add_attr, attr->weights.shape.o, &attr->bias); } diff --git a/tensorflow/lite/delegates/gpu/common/transformations/fuse_add_to_conv.h b/tensorflow/lite/delegates/gpu/common/transformations/fuse_add_to_conv.h index 85014ec177e..53a0cef63c8 100644 --- a/tensorflow/lite/delegates/gpu/common/transformations/fuse_add_to_conv.h +++ b/tensorflow/lite/delegates/gpu/common/transformations/fuse_add_to_conv.h @@ -33,25 +33,25 @@ std::unique_ptr NewMergeConvolutionWithAdd(); // Modify Convolution2DAttributes so that after making convolution with // modified attributes we will have the same result as convolution // with old attributes and following add operation. -void FuseConvolution2DWithAdd(const AddAttributes& add_attr, +void FuseConvolution2DWithAdd(const ElementwiseAttributes& add_attr, Convolution2DAttributes* attr); // Modify DepthwiseConvolution2DAttributes so that after making depth wise // convolution with modified attributes we will have the same result as depth // wise convolution with old attributes and following add operation. -void FuseDepthwiseConvolution2DWithAdd(const AddAttributes& add_attr, +void FuseDepthwiseConvolution2DWithAdd(const ElementwiseAttributes& add_attr, DepthwiseConvolution2DAttributes* attr); // Modify ConvolutionTransposedAttributes so that after making convolution // transposed with modified attributes we will have the same result as // convolution transposed with old attributes and following add operation. -void FuseConvolutionTransposedWithAdd(const AddAttributes& add_attr, +void FuseConvolutionTransposedWithAdd(const ElementwiseAttributes& add_attr, ConvolutionTransposedAttributes* attr); // Modify FullyConnectedAttributes so that after making fully connected with // modified attributes we will have the same result as fully connected // with old attributes and following add operation. -void FuseFullyConnectedWithAdd(const AddAttributes& add_attr, +void FuseFullyConnectedWithAdd(const ElementwiseAttributes& add_attr, FullyConnectedAttributes* attr); } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/common/transformations/fuse_add_to_conv_test.cc b/tensorflow/lite/delegates/gpu/common/transformations/fuse_add_to_conv_test.cc index 53dba56ffb8..4a48c7c0b28 100644 --- a/tensorflow/lite/delegates/gpu/common/transformations/fuse_add_to_conv_test.cc +++ b/tensorflow/lite/delegates/gpu/common/transformations/fuse_add_to_conv_test.cc @@ -45,7 +45,7 @@ TEST(MergeConvolutionWithAddTest, Smoke) { Tensor add_tensor; add_tensor.shape = Linear(16); add_tensor.data.resize(16); - AddAttributes add_attr; + ElementwiseAttributes add_attr; add_attr.param = add_tensor; auto conv_node = graph.NewNode(); @@ -88,7 +88,7 @@ TEST(FuseAddAfterConvolution2DTest, Smoke) { Tensor add_tensor; add_tensor.shape = Linear(2); add_tensor.data = {0.3f, 0.7f}; - AddAttributes add_attr; + ElementwiseAttributes add_attr; add_attr.param = add_tensor; FuseConvolution2DWithAdd(add_attr, &attr); @@ -109,7 +109,7 @@ TEST(FuseAddAfterDepthwiseConvolution2DTest, Smoke) { Tensor add_tensor; add_tensor.shape = Linear(4); add_tensor.data = {0.3f, 0.7f, 0.5f, 0.1f}; - AddAttributes add_attr; + ElementwiseAttributes add_attr; add_attr.param = add_tensor; FuseDepthwiseConvolution2DWithAdd(add_attr, &attr); @@ -131,7 +131,7 @@ TEST(FuseAddAfterConvolutionTransposedTest, Smoke) { Tensor add_tensor; add_tensor.shape = Linear(2); add_tensor.data = {0.3f, 0.7f}; - AddAttributes add_attr; + ElementwiseAttributes add_attr; add_attr.param = add_tensor; FuseConvolutionTransposedWithAdd(add_attr, &attr); @@ -152,7 +152,7 @@ TEST(FuseAddAfterFullyConnectedTest, Smoke) { Tensor add_tensor; add_tensor.shape = Linear(2); add_tensor.data = {0.3f, 0.7f}; - AddAttributes add_attr; + ElementwiseAttributes add_attr; add_attr.param = add_tensor; FuseFullyConnectedWithAdd(add_attr, &attr); diff --git a/tensorflow/lite/delegates/gpu/common/transformations/fuse_mul_to_conv.cc b/tensorflow/lite/delegates/gpu/common/transformations/fuse_mul_to_conv.cc index f4ace3c0d41..25ec6299f11 100644 --- a/tensorflow/lite/delegates/gpu/common/transformations/fuse_mul_to_conv.cc +++ b/tensorflow/lite/delegates/gpu/common/transformations/fuse_mul_to_conv.cc @@ -43,8 +43,8 @@ class MergeConvolutionWithMul : public SequenceTransformation { return {TransformStatus::SKIPPED, ""}; } - MultiplyAttributes mul_attr = - absl::any_cast(mul_node.operation.attributes); + ElementwiseAttributes mul_attr = + absl::any_cast(mul_node.operation.attributes); if (!absl::holds_alternative>( mul_attr.param) && !absl::holds_alternative(mul_attr.param)) { @@ -107,8 +107,8 @@ class MergeMulWithConvolution : public SequenceTransformation { return {TransformStatus::SKIPPED, ""}; } - MultiplyAttributes mul_attr = - absl::any_cast(mul_node.operation.attributes); + ElementwiseAttributes mul_attr = + absl::any_cast(mul_node.operation.attributes); if (!absl::holds_alternative>( mul_attr.param) && !absl::holds_alternative(mul_attr.param)) { @@ -164,7 +164,7 @@ std::unique_ptr NewMergeMulWithConvolution() { return absl::make_unique(); } -void FuseConvolution2DWithMultiply(const MultiplyAttributes& mul_attr, +void FuseConvolution2DWithMultiply(const ElementwiseAttributes& mul_attr, Convolution2DAttributes* attr) { auto mul = absl::get_if>(&mul_attr.param); auto mul_scalar = absl::get_if(&mul_attr.param); @@ -185,7 +185,7 @@ void FuseConvolution2DWithMultiply(const MultiplyAttributes& mul_attr, } void FuseDepthwiseConvolution2DWithMultiply( - const MultiplyAttributes& mul_attr, + const ElementwiseAttributes& mul_attr, DepthwiseConvolution2DAttributes* attr) { auto mul = absl::get_if>(&mul_attr.param); auto mul_scalar = absl::get_if(&mul_attr.param); @@ -207,7 +207,8 @@ void FuseDepthwiseConvolution2DWithMultiply( } void FuseConvolutionTransposedWithMultiply( - const MultiplyAttributes& mul_attr, ConvolutionTransposedAttributes* attr) { + const ElementwiseAttributes& mul_attr, + ConvolutionTransposedAttributes* attr) { auto mul = absl::get_if>(&mul_attr.param); auto mul_scalar = absl::get_if(&mul_attr.param); for (int d = 0; d < attr->weights.shape.o; ++d) { @@ -226,7 +227,7 @@ void FuseConvolutionTransposedWithMultiply( } } -void FuseFullyConnectedWithMultiply(const MultiplyAttributes& mul_attr, +void FuseFullyConnectedWithMultiply(const ElementwiseAttributes& mul_attr, FullyConnectedAttributes* attr) { auto mul = absl::get_if>(&mul_attr.param); auto mul_scalar = absl::get_if(&mul_attr.param); @@ -242,7 +243,7 @@ void FuseFullyConnectedWithMultiply(const MultiplyAttributes& mul_attr, } } -void FuseMultiplyWithConvolution2D(const MultiplyAttributes& mul_attr, +void FuseMultiplyWithConvolution2D(const ElementwiseAttributes& mul_attr, Convolution2DAttributes* attr) { auto mul = absl::get_if>(&mul_attr.param); auto mul_scalar = absl::get_if(&mul_attr.param); @@ -260,7 +261,7 @@ void FuseMultiplyWithConvolution2D(const MultiplyAttributes& mul_attr, } void FuseMultiplyWithDepthwiseConvolution2D( - const MultiplyAttributes& mul_attr, + const ElementwiseAttributes& mul_attr, DepthwiseConvolution2DAttributes* attr) { auto mul = absl::get_if>(&mul_attr.param); auto mul_scalar = absl::get_if(&mul_attr.param); @@ -278,7 +279,8 @@ void FuseMultiplyWithDepthwiseConvolution2D( } void FuseMultiplyWithConvolutionTransposed( - const MultiplyAttributes& mul_attr, ConvolutionTransposedAttributes* attr) { + const ElementwiseAttributes& mul_attr, + ConvolutionTransposedAttributes* attr) { auto mul = absl::get_if>(&mul_attr.param); auto mul_scalar = absl::get_if(&mul_attr.param); for (int s = 0; s < attr->weights.shape.i; ++s) { @@ -294,7 +296,7 @@ void FuseMultiplyWithConvolutionTransposed( } } -void FuseMultiplyWithFullyConnected(const MultiplyAttributes& mul_attr, +void FuseMultiplyWithFullyConnected(const ElementwiseAttributes& mul_attr, FullyConnectedAttributes* attr) { auto mul = absl::get_if>(&mul_attr.param); auto mul_scalar = absl::get_if(&mul_attr.param); diff --git a/tensorflow/lite/delegates/gpu/common/transformations/fuse_mul_to_conv.h b/tensorflow/lite/delegates/gpu/common/transformations/fuse_mul_to_conv.h index 2f19f7d93c4..8d64ae50488 100644 --- a/tensorflow/lite/delegates/gpu/common/transformations/fuse_mul_to_conv.h +++ b/tensorflow/lite/delegates/gpu/common/transformations/fuse_mul_to_conv.h @@ -38,49 +38,53 @@ std::unique_ptr NewMergeMulWithConvolution(); // Modify Convolution2DAttributes so that after making convolution with // modified attributes we will have the same result as convolution // with old attributes and following multiply operation. -void FuseConvolution2DWithMultiply(const MultiplyAttributes& mul_attr, +void FuseConvolution2DWithMultiply(const ElementwiseAttributes& mul_attr, Convolution2DAttributes* attr); // Modify DepthwiseConvolution2DAttributes so that after making depth wise // convolution with modified attributes we will have the same result as depth // wise convolution with old attributes and following multiply operation. void FuseDepthwiseConvolution2DWithMultiply( - const MultiplyAttributes& mul_attr, DepthwiseConvolution2DAttributes* attr); + const ElementwiseAttributes& mul_attr, + DepthwiseConvolution2DAttributes* attr); // Modify ConvolutionTransposedAttributes so that after making convolution // transposed with modified attributes we will have the same result as // convolution transposed with old attributes and following multiply operation. void FuseConvolutionTransposedWithMultiply( - const MultiplyAttributes& mul_attr, ConvolutionTransposedAttributes* attr); + const ElementwiseAttributes& mul_attr, + ConvolutionTransposedAttributes* attr); // Modify FullyConnectedAttributes so that after making fully connected with // modified attributes we will have the same result as fully connected // with old attributes and following multiply operation. -void FuseFullyConnectedWithMultiply(const MultiplyAttributes& mul_attr, +void FuseFullyConnectedWithMultiply(const ElementwiseAttributes& mul_attr, FullyConnectedAttributes* attr); // Modify Convolution2DAttributes so that after making convolution with // modified attributes we will have the same result as multiply operation and // convolution with old attributes -void FuseMultiplyWithConvolution2D(const MultiplyAttributes& mul_attr, +void FuseMultiplyWithConvolution2D(const ElementwiseAttributes& mul_attr, Convolution2DAttributes* attr); // Modify DepthwiseConvolution2DAttributes so that after making depth wise // convolution with modified attributes we will have the same result as multiply // operation and depth wise convolution with old attributes void FuseMultiplyWithDepthwiseConvolution2D( - const MultiplyAttributes& mul_attr, DepthwiseConvolution2DAttributes* attr); + const ElementwiseAttributes& mul_attr, + DepthwiseConvolution2DAttributes* attr); // Modify ConvolutionTransposedAttributes so that after making convolution // transposed with modified attributes we will have the same result as multiply // operation and convolution transposed with old attributes void FuseMultiplyWithConvolutionTransposed( - const MultiplyAttributes& mul_attr, ConvolutionTransposedAttributes* attr); + const ElementwiseAttributes& mul_attr, + ConvolutionTransposedAttributes* attr); // Modify FullyConnectedAttributes so that after making fully connected // with modified attributes we will have the same result as multiply // operation and fully connected with old attributes -void FuseMultiplyWithFullyConnected(const MultiplyAttributes& mul_attr, +void FuseMultiplyWithFullyConnected(const ElementwiseAttributes& mul_attr, FullyConnectedAttributes* attr); } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/common/transformations/fuse_mul_to_conv_test.cc b/tensorflow/lite/delegates/gpu/common/transformations/fuse_mul_to_conv_test.cc index 593a18b8731..ea990dd8267 100644 --- a/tensorflow/lite/delegates/gpu/common/transformations/fuse_mul_to_conv_test.cc +++ b/tensorflow/lite/delegates/gpu/common/transformations/fuse_mul_to_conv_test.cc @@ -46,7 +46,7 @@ TEST(MergeConvolutionWithMulTest, Smoke) { Tensor mul_tensor; mul_tensor.shape = Linear(16); mul_tensor.data.resize(16); - MultiplyAttributes mul_attr; + ElementwiseAttributes mul_attr; mul_attr.param = mul_tensor; auto conv_node = graph.NewNode(); @@ -87,7 +87,7 @@ TEST(MergeMulWithConvolutionTest, Smoke) { Tensor mul_tensor; mul_tensor.shape = Linear(8); mul_tensor.data.resize(8); - MultiplyAttributes mul_attr; + ElementwiseAttributes mul_attr; mul_attr.param = mul_tensor; Convolution2DAttributes conv_attr; @@ -140,7 +140,7 @@ TEST(FuseMulAfterConvolution2DTest, Smoke) { Tensor mul_tensor; mul_tensor.shape = Linear(2); mul_tensor.data = {0.5f, 2.0f}; - MultiplyAttributes mul_attr; + ElementwiseAttributes mul_attr; mul_attr.param = mul_tensor; FuseConvolution2DWithMultiply(mul_attr, &attr); @@ -161,7 +161,7 @@ TEST(FuseMulAfterDepthwiseConvolution2DTest, Smoke) { Tensor mul_tensor; mul_tensor.shape = Linear(4); mul_tensor.data = {0.5f, 2.0f, 4.0f, 0.25f}; - MultiplyAttributes mul_attr; + ElementwiseAttributes mul_attr; mul_attr.param = mul_tensor; FuseDepthwiseConvolution2DWithMultiply(mul_attr, &attr); @@ -183,7 +183,7 @@ TEST(FuseMulAfterConvolutionTransposedTest, Smoke) { Tensor mul_tensor; mul_tensor.shape = Linear(2); mul_tensor.data = {0.5f, 2.0f}; - MultiplyAttributes mul_attr; + ElementwiseAttributes mul_attr; mul_attr.param = mul_tensor; FuseConvolutionTransposedWithMultiply(mul_attr, &attr); @@ -204,7 +204,7 @@ TEST(FuseMulAfterFullyConnectedTest, Smoke) { Tensor mul_tensor; mul_tensor.shape = Linear(2); mul_tensor.data = {0.5f, 2.0f}; - MultiplyAttributes mul_attr; + ElementwiseAttributes mul_attr; mul_attr.param = mul_tensor; FuseFullyConnectedWithMultiply(mul_attr, &attr); @@ -224,7 +224,7 @@ TEST(FuseMulBeforeConvolution2DTest, Smoke) { Tensor mul_tensor; mul_tensor.shape = Linear(2); mul_tensor.data = {0.5f, 2.0f}; - MultiplyAttributes mul_attr; + ElementwiseAttributes mul_attr; mul_attr.param = mul_tensor; FuseMultiplyWithConvolution2D(mul_attr, &attr); @@ -245,7 +245,7 @@ TEST(FuseMulBeforeDepthwiseConvolution2DTest, Smoke) { Tensor mul_tensor; mul_tensor.shape = Linear(4); mul_tensor.data = {0.5f, 2.0f, 4.0f, 0.25f}; - MultiplyAttributes mul_attr; + ElementwiseAttributes mul_attr; mul_attr.param = mul_tensor; FuseMultiplyWithDepthwiseConvolution2D(mul_attr, &attr); @@ -267,7 +267,7 @@ TEST(FuseMulBeforeConvolutionTransposedTest, Smoke) { Tensor mul_tensor; mul_tensor.shape = Linear(2); mul_tensor.data = {0.5f, 2.0f}; - MultiplyAttributes mul_attr; + ElementwiseAttributes mul_attr; mul_attr.param = mul_tensor; FuseMultiplyWithConvolutionTransposed(mul_attr, &attr); @@ -288,7 +288,7 @@ TEST(FuseMulBeforeFullyConnectedTest, Smoke) { Tensor mul_tensor; mul_tensor.shape = Linear(2); mul_tensor.data = {0.5f, 2.0f}; - MultiplyAttributes mul_attr; + ElementwiseAttributes mul_attr; mul_attr.param = mul_tensor; FuseMultiplyWithFullyConnected(mul_attr, &attr); diff --git a/tensorflow/lite/delegates/gpu/common/transformations/merge_padding_with.cc b/tensorflow/lite/delegates/gpu/common/transformations/merge_padding_with.cc index 5d328cac803..6a4e24b5042 100644 --- a/tensorflow/lite/delegates/gpu/common/transformations/merge_padding_with.cc +++ b/tensorflow/lite/delegates/gpu/common/transformations/merge_padding_with.cc @@ -144,8 +144,8 @@ class MergePaddingWithAddOperation : public NodeTransformation { return {TransformStatus::SKIPPED, ""}; } - AddAttributes add_attr = - absl::any_cast(add_node->operation.attributes); + ElementwiseAttributes add_attr = + absl::any_cast(add_node->operation.attributes); const bool is_add_hwc = absl::holds_alternative>(add_attr.param); const bool is_add_linear = diff --git a/tensorflow/lite/delegates/gpu/common/transformations/merge_padding_with_test.cc b/tensorflow/lite/delegates/gpu/common/transformations/merge_padding_with_test.cc index 6952187364e..40029efbc65 100644 --- a/tensorflow/lite/delegates/gpu/common/transformations/merge_padding_with_test.cc +++ b/tensorflow/lite/delegates/gpu/common/transformations/merge_padding_with_test.cc @@ -127,7 +127,7 @@ TEST(MergePaddingWithAdd, MergeAlignedPadding) { ASSERT_TRUE(graph.SetProducer(pad_node->id, padded->id).ok()); auto add_node = graph.NewNode(); - AddAttributes add_attr; + ElementwiseAttributes add_attr; ASSERT_TRUE(graph.AddConsumer(add_node->id, padded->id).ok()); ASSERT_TRUE(graph.AddConsumer(add_node->id, input1->id).ok()); ASSERT_TRUE(graph.SetProducer(add_node->id, output->id).ok()); @@ -165,7 +165,7 @@ TEST(MergePaddingWithAdd, DoNotTrigger_AddWithAttributes) { ASSERT_TRUE(graph.SetProducer(pad_node->id, padded->id).ok()); auto add_node = graph.NewNode(); - AddAttributes add_attr; + ElementwiseAttributes add_attr; add_attr.param = Tensor(); ASSERT_TRUE(graph.AddConsumer(add_node->id, padded->id).ok()); ASSERT_TRUE(graph.AddConsumer(add_node->id, input1->id).ok()); diff --git a/tensorflow/lite/delegates/gpu/common/transformations/remove_noop.cc b/tensorflow/lite/delegates/gpu/common/transformations/remove_noop.cc index 2e3cdb0332e..6cc370899e4 100644 --- a/tensorflow/lite/delegates/gpu/common/transformations/remove_noop.cc +++ b/tensorflow/lite/delegates/gpu/common/transformations/remove_noop.cc @@ -75,8 +75,8 @@ std::unique_ptr NewRemoveSingleInputAdd() { if (node->operation.type != type) { return false; } - auto& attr = - absl::any_cast(node->operation.attributes); + auto& attr = absl::any_cast( + node->operation.attributes); return !absl::holds_alternative>( attr.param) && !absl::holds_alternative>( diff --git a/tensorflow/lite/delegates/gpu/common/transformations/remove_noop_test.cc b/tensorflow/lite/delegates/gpu/common/transformations/remove_noop_test.cc index 559140d2852..a6aafee4f06 100644 --- a/tensorflow/lite/delegates/gpu/common/transformations/remove_noop_test.cc +++ b/tensorflow/lite/delegates/gpu/common/transformations/remove_noop_test.cc @@ -38,7 +38,7 @@ TEST(RemoveSingleInputAdd, Smoke) { Value* output; ASSERT_TRUE(AddOutput(&graph, add_node, &output).ok()); add_node->operation.type = ToString(OperationType::ADD); - add_node->operation.attributes = AddAttributes(); + add_node->operation.attributes = ElementwiseAttributes(); Value* temp; ASSERT_TRUE(ConnectTwoNodes(&graph, first_node, add_node, &temp).ok()); @@ -66,7 +66,7 @@ TEST(RemoveSingleInputAdd, DoNotTrigger_TensorHWC) { Value* output; ASSERT_TRUE(AddOutput(&graph, add_node, &output).ok()); add_node->operation.type = ToString(OperationType::ADD); - AddAttributes attr; + ElementwiseAttributes attr; attr.param = Tensor(); add_node->operation.attributes = attr; @@ -93,7 +93,7 @@ TEST(RemoveSingleInputAdd, DoNotTrigger_LinearTensor) { Value* output; ASSERT_TRUE(AddOutput(&graph, add_node, &output).ok()); add_node->operation.type = ToString(OperationType::ADD); - AddAttributes attr; + ElementwiseAttributes attr; attr.param = Tensor(); add_node->operation.attributes = attr; @@ -120,7 +120,7 @@ TEST(RemoveSingleInputAdd, DoNotTrigger_Scalar) { Value* output; ASSERT_TRUE(AddOutput(&graph, add_node, &output).ok()); add_node->operation.type = ToString(OperationType::ADD); - AddAttributes attr; + ElementwiseAttributes attr; attr.param = 0.5f; add_node->operation.attributes = attr; diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/add.cc b/tensorflow/lite/delegates/gpu/gl/kernels/add.cc index 0c0aaaab4fb..a7e056239bf 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/add.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/add.cc @@ -36,7 +36,8 @@ class Add : public NodeShader { public: absl::Status GenerateCode(const GenerationContext& ctx, GeneratedCode* generated_code) const final { - const auto& attr = absl::any_cast(ctx.op_attr); + const auto& attr = + absl::any_cast(ctx.op_attr); auto adds = absl::get_if>(&attr.param); auto scalar = absl::get_if(&attr.param); diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/add_test.cc b/tensorflow/lite/delegates/gpu/gl/kernels/add_test.cc index f4c81841b9f..98eeb1718b0 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/add_test.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/add_test.cc @@ -44,7 +44,7 @@ TEST(AddTest, TwoInputTensorsOfTheSameShape) { output.ref = 2; output.shape = BHWC(1, 2, 2, 1); - AddAttributes attr; + ElementwiseAttributes attr; SingleOpModel model({ToString(OperationType::ADD), std::move(attr)}, {augend, addend}, {output}); ASSERT_TRUE(model.PopulateTensor(0, {-2.0, 0.2, 0.7, 0.8})); @@ -55,7 +55,7 @@ TEST(AddTest, TwoInputTensorsOfTheSameShape) { } TEST(AddTest, InputTensorAndScalar) { - AddAttributes attr; + ElementwiseAttributes attr; attr.param = 0.1f; TensorRef input, output; input.type = DataType::FLOAT32; @@ -80,7 +80,7 @@ TEST(AddTest, InputTensorWithConstantBroadcast) { input.ref = 0; input.shape = BHWC(1, 2, 2, 2); - AddAttributes attr; + ElementwiseAttributes attr; Tensor tensor; tensor.shape.v = 2; tensor.id = 1; @@ -114,7 +114,7 @@ TEST(AddTest, InputTensorWithRuntimeBroadcast) { input2.ref = 1; input2.shape = BHWC(1, 1, 1, 2); - AddAttributes attr; + ElementwiseAttributes attr; TensorRef output; output.type = DataType::FLOAT32; diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/mul.cc b/tensorflow/lite/delegates/gpu/gl/kernels/mul.cc index 9cf96255176..b66decc3ca3 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/mul.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/mul.cc @@ -80,7 +80,7 @@ absl::Status GenerateApplyMaskCode(const NodeShader::GenerationContext& ctx, absl::Status GenerateMultiplyScalarCode( const NodeShader::GenerationContext& ctx, GeneratedCode* generated_code) { - const auto& attr = absl::any_cast(ctx.op_attr); + const auto& attr = absl::any_cast(ctx.op_attr); auto muls = absl::get_if>(&attr.param); auto scalar = absl::get_if(&attr.param); diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/mul_test.cc b/tensorflow/lite/delegates/gpu/gl/kernels/mul_test.cc index 6bd5e85df01..e19f00f763e 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/mul_test.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/mul_test.cc @@ -41,7 +41,7 @@ TEST(MulTest, Scalar) { output.ref = 1; output.shape = BHWC(1, 2, 2, 1); - MultiplyAttributes attr; + ElementwiseAttributes attr; attr.param = 2.f; SingleOpModel model({ToString(OperationType::MUL), attr}, {input}, {output}); @@ -61,7 +61,7 @@ TEST(MulTest, Linear) { output.ref = 1; output.shape = BHWC(1, 1, 2, 2); - MultiplyAttributes attr; + ElementwiseAttributes attr; Tensor tensor; tensor.shape.v = 2; tensor.id = 1; diff --git a/tensorflow/lite/delegates/gpu/metal/api.cc b/tensorflow/lite/delegates/gpu/metal/api.cc index 648fa166bc0..7b086a6d130 100644 --- a/tensorflow/lite/delegates/gpu/metal/api.cc +++ b/tensorflow/lite/delegates/gpu/metal/api.cc @@ -190,7 +190,8 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node, case OperationType::ADD: { if (inputs.size() == 1) { if (node->operation.attributes.has_value()) { - auto attr = absl::any_cast(node->operation.attributes); + auto attr = + absl::any_cast(node->operation.attributes); *tasks = ElementwiseWithOneInputAndConstantArguent( node_id, inputs[0], outputs[0], options, op_type, attr.param); } else { @@ -291,7 +292,7 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node, if (inputs.size() == 1) { if (node->operation.attributes.has_value()) { auto attr = - absl::any_cast(node->operation.attributes); + absl::any_cast(node->operation.attributes); *tasks = ElementwiseWithOneInputAndConstantArguent( node_id, inputs[0], outputs[0], options, op_type, attr.param); } else { diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/add_test.mm b/tensorflow/lite/delegates/gpu/metal/kernels/add_test.mm index 540308f23b4..22a798c59cc 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/add_test.mm +++ b/tensorflow/lite/delegates/gpu/metal/kernels/add_test.mm @@ -29,7 +29,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/metal/kernels/test_util.h" #include "tensorflow/lite/delegates/gpu/metal/runtime_options.h" -using ::tflite::gpu::AddAttributes; +using ::tflite::gpu::ElementwiseAttributes; using ::tflite::gpu::BHWC; using ::tflite::gpu::DataType; using ::tflite::gpu::Linear; @@ -61,7 +61,7 @@ using ::tflite::gpu::metal::SingleOpModel; output.ref = 2; output.shape = BHWC(1, 2, 2, 1); - AddAttributes attr; + ElementwiseAttributes attr; SingleOpModel model({ToString(OperationType::ADD), std::move(attr)}, {augend, addend}, {output}); XCTAssertTrue(model.PopulateTensor(0, {-2.0, 0.2, 0.7, 0.8})); XCTAssertTrue(model.PopulateTensor(1, {0.1, 0.2, 0.3, 0.5})); @@ -72,7 +72,7 @@ using ::tflite::gpu::metal::SingleOpModel; } - (void)testInputTensorAndScalar { - AddAttributes attr; + ElementwiseAttributes attr; attr.param = 0.1f; TensorRef input, output; input.type = DataType::FLOAT32; @@ -97,7 +97,7 @@ using ::tflite::gpu::metal::SingleOpModel; input.ref = 0; input.shape = BHWC(1, 2, 2, 2); - AddAttributes attr; + ElementwiseAttributes attr; Tensor tensor; tensor.shape.v = 2; tensor.id = 1; From d033ed5ee8f3bff3209a1f8b341c48bc2ab955af Mon Sep 17 00:00:00 2001 From: Robert David Date: Fri, 31 Jul 2020 19:06:17 -0700 Subject: [PATCH 1928/2522] Use branchless code to detect out-of-bound reads. PiperOrigin-RevId: 324338479 Change-Id: I031c4c29304220253c2b53ceef9563b1c8f51f01 --- .../cl/kernels/mean_stddev_normalization.cc | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc index 9e7e0c3283e..fb206cc0692 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc @@ -46,11 +46,10 @@ $0) { float sum = 0.0f; for (int S = 0; S < args.src_tensor.Slices(); ++S) { const float4 t = args.src_tensor.Read(0, 0, S, B); - sum += t.x; - // Filter out out-of-bounds reads - if (S * 4 + 1 < args.src_tensor.Channels()) sum += t.y; - if (S * 4 + 2 < args.src_tensor.Channels()) sum += t.z; - if (S * 4 + 3 < args.src_tensor.Channels()) sum += t.w; + // Filter out reads beyond the end of the tensor. + const int4 is_after_end_of_tensor = (int4)(0, 1, 2, 3) >= (args.src_tensor.Channels() - S * 4); + const float4 filtered_t = select(t, (float4)(0.0f), is_after_end_of_tensor); + sum += filtered_t.x + filtered_t.y + filtered_t.z + filtered_t.w; } // Calculate the mean const float mean = sum / args.src_tensor.Channels(); @@ -58,12 +57,11 @@ $0) { float sum_diff_sq = 0.0f; for (int S = 0; S < args.src_tensor.Slices(); ++S) { const float4 t = args.src_tensor.Read(0, 0, S, B); - float4 diff = t - mean; - // Filter out out-of-bounds reads - if (S * 4 + 1 >= args.src_tensor.Channels()) diff.y = 0.0f; - if (S * 4 + 2 >= args.src_tensor.Channels()) diff.z = 0.0f; - if (S * 4 + 3 >= args.src_tensor.Channels()) diff.w = 0.0f; - float dotprod = dot(diff, diff); + const float4 diff = t - mean; + // Filter out reads beyond the end of the tensor. + const int4 is_after_end_of_tensor = (int4)(0, 1, 2, 3) >= (args.src_tensor.Channels() - S * 4); + const float4 filtered_diff = select(diff, (float4)(0.0f), is_after_end_of_tensor); + float dotprod = dot(filtered_diff, filtered_diff); sum_diff_sq += dotprod; } // Calculate 1/stddev (with the 'regulazing constant' as in tensor_utils.cc) From 71bb46a15f6dffa16b11f597d2059028c9fd4440 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 31 Jul 2020 19:12:08 -0700 Subject: [PATCH 1929/2522] Clean up error message differences between XLA and TF for convolutions 1. The Conv2DTest.testOpEdgeCases test was enabled, and the two places where the XLA and TF error messages were different now check for different error messages. 2. Change TF to return an Unimplemented error instead of an InvalidArgument error for unsupported convolution configurations. This is what XLA does and seems more logical. 3. Change the tests to use placeholders correctly as otherwise the XLA version throws an exception because of the missing feeds. PiperOrigin-RevId: 324338945 Change-Id: Ib6999c31918987cc1db9c0ba220e229097157747 --- .../tf2xla/kernels/conv_op_helpers.cc | 4 + .../core/kernels/conv_grad_input_ops.cc | 24 ++-- tensorflow/core/kernels/conv_ops.cc | 8 +- .../python/kernel_tests/conv_ops_test.py | 133 +++++++++++++----- 4 files changed, 116 insertions(+), 53 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc index a7a8b8bcb52..d29644dd0de 100644 --- a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc +++ b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc @@ -203,6 +203,10 @@ xla::StatusOr ConvOpAttrs::Create(int num_spatial_dims, return errors::InvalidArgument("Invalid data format: ", data_format); } + TF_RETURN_IF_ERROR(CheckValidPadding(attrs.padding, attrs.explicit_paddings, + /*num_dims=*/num_spatial_dims + 2, + attrs.data_format)); + return attrs; } diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc index d9743a1dc57..86090864ddb 100644 --- a/tensorflow/core/kernels/conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/conv_grad_input_ops.cc @@ -397,8 +397,8 @@ class Conv2DBackpropInputOp : public OpKernel { int stride_w = GetTensorDim(strides_, data_format_, 'W'); OP_REQUIRES( context, (stride_n == 1 && stride_c == 1), - errors::InvalidArgument("Current implementation does not yet support " - "strides in the batch and depth dimensions.")); + errors::Unimplemented("Current implementation does not yet support " + "strides in the batch and depth dimensions.")); OP_REQUIRES(context, stride_h > 0 && stride_w > 0, errors::InvalidArgument( "Row and column strides should be larger than 0.")); @@ -411,10 +411,10 @@ class Conv2DBackpropInputOp : public OpKernel { int dilation_c = GetTensorDim(dilations_, data_format_, 'C'); int dilation_h = GetTensorDim(dilations_, data_format_, 'H'); int dilation_w = GetTensorDim(dilations_, data_format_, 'W'); - OP_REQUIRES(context, (dilation_n == 1 && dilation_c == 1), - errors::InvalidArgument( - "Current implementation does not yet support " - "dilations in the batch and depth dimensions.")); + OP_REQUIRES( + context, (dilation_n == 1 && dilation_c == 1), + errors::Unimplemented("Current implementation does not yet support " + "dilations in the batch and depth dimensions.")); OP_REQUIRES( context, dilation_h > 0 && dilation_w > 0, errors::InvalidArgument("Dilated rates should be larger than 0.")); @@ -517,8 +517,8 @@ class Conv2DCustomBackpropInputOp : public OpKernel { "specify 4 dimensions")); OP_REQUIRES( context, (strides_[0] == 1 && strides_[3] == 1), - errors::InvalidArgument("Current implementation does not yet support " - "strides in the batch and depth dimensions.")); + errors::Unimplemented("Current implementation does not yet support " + "strides in the batch and depth dimensions.")); OP_REQUIRES(context, strides_[1] > 0 && strides_[2] > 0, errors::InvalidArgument( "Row and column strides should be larger than 0.")); @@ -527,10 +527,10 @@ class Conv2DCustomBackpropInputOp : public OpKernel { OP_REQUIRES(context, dilations_.size() == 4, errors::InvalidArgument("Sliding window dilations field must " "specify 4 dimensions")); - OP_REQUIRES(context, (dilations_[0] == 1 && dilations_[3] == 1), - errors::InvalidArgument( - "Current implementation does not yet support " - "dilations in the batch and depth dimensions.")); + OP_REQUIRES( + context, (dilations_[0] == 1 && dilations_[3] == 1), + errors::Unimplemented("Current implementation does not yet support " + "dilations in the batch and depth dimensions.")); // TODO(yangzihao): Add a CPU implementation for dilated convolution. OP_REQUIRES(context, (dilations_[1] == 1 && dilations_[2] == 1), errors::InvalidArgument( diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc index ab8e24a311f..8db796c216b 100644 --- a/tensorflow/core/kernels/conv_ops.cc +++ b/tensorflow/core/kernels/conv_ops.cc @@ -374,8 +374,8 @@ Status InitConv2DParameters(const OpKernelConstruction* context, const int64 stride_w = GetTensorDim(strides, data_format, 'W'); TF_REQUIRES( stride_n == 1 && stride_c == 1, - errors::InvalidArgument("Current implementation does not yet support " - "strides in the batch and depth dimensions.")); + errors::Unimplemented("Current implementation does not yet support " + "strides in the batch and depth dimensions.")); TF_REQUIRES(stride_h > 0 && stride_w > 0, errors::InvalidArgument( "Row and column strides should be larger than 0.")); @@ -386,8 +386,8 @@ Status InitConv2DParameters(const OpKernelConstruction* context, const int64 dilation_w = GetTensorDim(dilations, data_format, 'W'); TF_REQUIRES( dilation_n == 1 && dilation_c == 1, - errors::InvalidArgument("Current implementation does not yet support " - "dilations in the batch and depth dimensions.")); + errors::Unimplemented("Current implementation does not yet support " + "dilations in the batch and depth dimensions.")); TF_REQUIRES( dilation_h > 0 && dilation_w > 0, errors::InvalidArgument("Dilated rates should be larger than 0.")); diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py index 5c7ef34cad8..f480f4319da 100644 --- a/tensorflow/python/kernel_tests/conv_ops_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_test.py @@ -2522,79 +2522,138 @@ class Conv2DTest(test.TestCase): padding=[0, 0, 0, 0]) @test_util.deprecated_graph_mode_only - @test_util.disable_xla("b/123337890") # Error messages differ def testOpEdgeCases(self): with self.cached_session() as sess: # Illegal strides. - with self.assertRaisesRegex(errors_impl.InvalidArgumentError, + with self.assertRaisesRegex(errors_impl.UnimplementedError, "strides in the batch and depth"): + input_placeholder = array_ops.placeholder(dtypes.float32) + input_val = np.ones([10, 10]) + filter_placeholder = array_ops.placeholder(dtypes.float32) + filter_val = np.ones([10, 10]) sess.run( nn_ops.conv2d( - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32), + input_placeholder, + filter_placeholder, strides=[2, 1, 1, 1], - padding="SAME")) - with self.assertRaisesRegex(errors_impl.InvalidArgumentError, + padding="SAME"), + feed_dict={ + input_placeholder: input_val, + filter_placeholder: filter_val + }) + with self.assertRaisesRegex(errors_impl.UnimplementedError, "strides in the batch and depth"): + input_placeholder = array_ops.placeholder(dtypes.float32) + filter_placeholder = array_ops.placeholder(dtypes.float32) + input_val = np.ones([10, 10]) + filter_val = np.ones([10, 10]) sess.run( nn_ops.conv2d( - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32), + input_placeholder, + filter_placeholder, strides=[1, 1, 1, 2], - padding="SAME")) + padding="SAME"), + feed_dict={ + input_placeholder: input_val, + filter_placeholder: filter_val + }) # Filter larger than input. with self.assertRaisesRegex(ValueError, "Negative dimension size"): + input_placeholder = array_ops.placeholder( + dtypes.float32, shape=[32, 20, 20, 3]) + input_val = np.ones([32, 20, 20, 3]) + filter_placeholder = array_ops.placeholder( + dtypes.float32, shape=[20, 21, 3, 2]) + filter_val = np.ones([20, 21, 3, 2]) + sess.run( nn_ops.conv2d( - array_ops.placeholder( - dtypes.float32, shape=[32, 20, 20, 3]), - array_ops.placeholder( - dtypes.float32, shape=[20, 21, 3, 2]), + input_placeholder, + filter_placeholder, strides=[1, 1, 1, 1], - padding="VALID")) + padding="VALID"), + feed_dict={ + input_placeholder: input_val, + filter_placeholder: filter_val + }) with self.assertRaisesRegex(ValueError, "Negative dimension size"): + input_placeholder = array_ops.placeholder( + dtypes.float32, shape=[32, 20, 20, 3]) + input_val = np.ones([32, 20, 20, 3]) + filter_placeholder = array_ops.placeholder( + dtypes.float32, shape=[21, 20, 3, 2]) + filter_val = np.ones([21, 20, 3, 2]) sess.run( nn_ops.conv2d( - array_ops.placeholder( - dtypes.float32, shape=[32, 20, 20, 3]), - array_ops.placeholder( - dtypes.float32, shape=[21, 20, 3, 2]), + input_placeholder, + filter_placeholder, strides=[1, 1, 1, 1], - padding="VALID")) + padding="VALID"), + feed_dict={ + input_placeholder: input_val, + filter_placeholder: filter_val + }) # Filter larger than input + padding. with self.assertRaisesRegex(ValueError, "Negative dimension size"): + input_placeholder = array_ops.placeholder( + dtypes.float32, shape=[32, 20, 20, 3]) + input_val = np.ones([32, 20, 20, 3]) + filter_placeholder = array_ops.placeholder( + dtypes.float32, shape=[24, 25, 3, 2]) + filter_val = np.ones([24, 25, 3, 2]) sess.run( nn_ops.conv2d( - array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 3]), - array_ops.placeholder(dtypes.float32, shape=[24, 25, 3, 2]), + input_placeholder, + filter_placeholder, strides=[1, 1, 1, 1], - padding=[[0, 0], [2, 2], [2, 2], [0, 0]])) + padding=[[0, 0], [2, 2], [2, 2], [0, 0]]), + feed_dict={ + input_placeholder: input_val, + filter_placeholder: filter_val + }) # Negative padding during backprop. - with self.assertRaisesRegex(errors_impl.InvalidArgumentError, - "nonnegative"): + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + "All elements of explicit_paddings must be nonnegative"): + filter_placeholder = array_ops.placeholder( + dtypes.float32, shape=[18, 18, 3, 2]) + filter_val = np.ones([18, 18, 3, 2]) + out_backprop = array_ops.placeholder( + dtypes.float32, shape=[32, 3, 2, 2]) + out_backprop_val = np.ones([32, 3, 2, 2]) sess.run( nn_ops.conv2d_backprop_input([32, 20, 20, 3], - array_ops.placeholder( - dtypes.float32, - shape=[18, 18, 3, 2]), - array_ops.placeholder( - dtypes.float32, - shape=[32, 3, 2, 2]), + filter_placeholder, + out_backprop, strides=[1, 1, 1, 1], padding=[[0, 0], [-1, 0], [0, 0], - [0, 0]])) - with self.assertRaisesRegex(errors_impl.InvalidArgumentError, - "nonnegative"): + [0, 0]]), + feed_dict={ + filter_placeholder: filter_val, + out_backprop: out_backprop_val + }) + with self.assertRaisesRegex( + errors_impl.InvalidArgumentError, + "All elements of explicit_paddings must be nonnegative"): + input_placeholder = array_ops.placeholder( + dtypes.float32, shape=[32, 20, 20, 3]) + input_val = np.ones([32, 20, 20, 3]) + out_backprop = array_ops.placeholder( + dtypes.float32, shape=[32, 3, 2, 2]) + out_backprop_val = np.ones([32, 3, 2, 2]) sess.run( nn_ops.conv2d_backprop_filter( - array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 3]), - [18, 18, 3, 2], - array_ops.placeholder(dtypes.float32, shape=[32, 3, 2, 2]), + input_placeholder, [18, 18, 3, 2], + out_backprop, strides=[1, 1, 1, 1], - padding=[[0, 0], [-1, 0], [0, 0], [0, 0]])) + padding=[[0, 0], [-1, 0], [0, 0], [0, 0]]), + feed_dict={ + input_placeholder: input_val, + out_backprop: out_backprop_val + }) class DepthwiseConv2DTest(test.TestCase): From 3c3f0aa1d7ca537e3f16635f234fa51ee951efaa Mon Sep 17 00:00:00 2001 From: Souradeep Nanda Date: Sat, 1 Aug 2020 08:02:52 +0530 Subject: [PATCH 1930/2522] Added testable docstring --- tensorflow/python/ops/custom_gradient.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/ops/custom_gradient.py b/tensorflow/python/ops/custom_gradient.py index 5a6b3cc7130..e67f19099fc 100644 --- a/tensorflow/python/ops/custom_gradient.py +++ b/tensorflow/python/ops/custom_gradient.py @@ -139,7 +139,8 @@ def custom_gradient(f=None): the same number of variables. We take the function `z = x * y` as an example. ```python - @tf.custom_gradient + >>> import tensorflow as tf + >>> @tf.custom_gradient def bar(x, y): def grad(upstream): dz_dx = y @@ -150,18 +151,22 @@ def custom_gradient(f=None): return z, grad - x = tf.constant(2.0, dtype=tf.float32) - y = tf.constant(3.0, dtype=tf.float32) + >>> x = tf.constant(2.0, dtype=tf.float32) + >>> y = tf.constant(3.0, dtype=tf.float32) - with tf.GradientTape(persistent=True) as tape: + >>> with tf.GradientTape(persistent=True) as tape: tape.watch(x) tape.watch(y) z = bar(x, y) - tf.print(z) # Output: 6 - tf.print(tape.gradient(z, x)) # Output: 3 - tf.print(tape.gradient(z, y)) # Output: 2 - tf.print(tape.gradient(x, y)) # Output: None + >>> z + 6 + >>> tape.gradient(z, x) + 3 + >>> tape.gradient(z, y) + 2 + >>> tape.gradient(x, y) + None ``` Nesting custom gradients can lead to unintuitive results. The default From 803e198f8358afde11bc97a44a37eba877e60746 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 31 Jul 2020 20:07:59 -0700 Subject: [PATCH 1931/2522] [XLA:SPMD] Support first simple recursive partiton on both parallel dim and spatial non parallel dim. PiperOrigin-RevId: 324343345 Change-Id: Ibd84eac1080c864f62589368611035c9f50584a9 --- .../xla/service/spmd/convolution_handler.cc | 357 ++++++++++++++---- .../xla/service/spmd/spmd_partitioner.h | 16 + .../xla/service/spmd/spmd_partitioner_test.cc | 53 +++ 3 files changed, 351 insertions(+), 75 deletions(-) diff --git a/tensorflow/compiler/xla/service/spmd/convolution_handler.cc b/tensorflow/compiler/xla/service/spmd/convolution_handler.cc index 06aae9347ee..78dc7d94798 100644 --- a/tensorflow/compiler/xla/service/spmd/convolution_handler.cc +++ b/tensorflow/compiler/xla/service/spmd/convolution_handler.cc @@ -34,15 +34,23 @@ namespace xla { namespace spmd { namespace { +// Partition convolution. +StatusOr PartitionConvolution( + PartitionedHlo lhs, PartitionedHlo rhs, const Shape& output_base_shape, + const HloSharding& output_sharding, const Window& conv_window, + HloInstruction* original_hlo, int64 num_partitions, + const SpmdPartitionerOptions& options, HloInstruction* partition_id, + HloModule* module, SpmdBuilder* b); + // Partition convolution with only paralell dims are tiled StatusOr PartitionConvolutionWithParallelDimension( - PartitionedHlo lhs, PartitionedHlo rhs, HloInstruction* original_hlo, - int64 num_partitions, const SpmdPartitionerOptions& options, - HloInstruction* partition_id, HloModule* module, SpmdBuilder* b) { + PartitionedHlo lhs, PartitionedHlo rhs, const Shape& output_base_shape, + const HloSharding& output_sharding, const Window& conv_window, + HloInstruction* original_hlo, int64 num_partitions, SpmdBuilder* b) { TF_RET_CHECK(original_hlo->opcode() == HloOpcode::kConvolution); const auto& dnums = original_hlo->convolution_dimension_numbers(); - std::vector rhs_to_lhs_indices(original_hlo->shape().rank()); + std::vector rhs_to_lhs_indices(output_base_shape.rank()); rhs_to_lhs_indices[dnums.kernel_output_feature_dimension()] = dnums.input_batch_dimension(); rhs_to_lhs_indices[dnums.kernel_input_feature_dimension()] = @@ -51,7 +59,7 @@ StatusOr PartitionConvolutionWithParallelDimension( rhs_to_lhs_indices[dnums.kernel_spatial_dimensions(i)] = dnums.input_spatial_dimensions(i); } - std::vector lhs_to_rhs_indices(original_hlo->shape().rank()); + std::vector lhs_to_rhs_indices(output_base_shape.rank()); for (int64 i = 0; i < rhs_to_lhs_indices.size(); ++i) { lhs_to_rhs_indices[rhs_to_lhs_indices[i]] = i; } @@ -68,11 +76,9 @@ StatusOr PartitionConvolutionWithParallelDimension( for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { int64 lhs_dim = dnums.input_spatial_dimensions(i); int64 lhs_size = lhs.base_shape().dimensions(lhs_dim); - const auto& wd = original_hlo->window().dimensions(i); + const auto& wd = conv_window.dimensions(i); int64 rhs_dim = dnums.kernel_spatial_dimensions(i); - // Only non reversal window is supported right now. - if (!wd.window_reversal() && - dot_as_convolution_util::ConvSpatialDimensionIsParallel(wd, lhs_size)) { + if (dot_as_convolution_util::ConvSpatialDimensionIsParallel(wd, lhs_size)) { parallel_spatial_dims.emplace_back(i); lhs_parallel_dim_partitions *= ShardCountAtDim(lhs.sharding(), lhs_dim); rhs_parallel_dim_partitions *= ShardCountAtDim(rhs.sharding(), rhs_dim); @@ -102,7 +108,7 @@ StatusOr PartitionConvolutionWithParallelDimension( auto rhs_shard_shape = MakePartitionedShape(rhs.base_shape(), rhs.sharding()); // Update convolution window. - auto new_window = original_hlo->window(); + auto new_window = conv_window; for (const auto& spatial_dim : parallel_spatial_dims) { auto wd = new_window.mutable_dimensions(spatial_dim); wd->set_size(lhs_shard_shape.dimensions( @@ -115,14 +121,13 @@ StatusOr PartitionConvolutionWithParallelDimension( ShapeInference::InferConvolveShape( lhs_shard_shape, rhs_shard_shape, original_hlo->feature_group_count(), original_hlo->batch_group_count(), new_window, dnums)); - *sharded_conv_shape.mutable_layout() = original_hlo->shape().layout(); auto sharded_conv = b->AddInstruction(HloInstruction::CreateConvolve( sharded_conv_shape, lhs.hlo(), rhs.hlo(), original_hlo->feature_group_count(), original_hlo->batch_group_count(), new_window, dnums, original_hlo->precision_config())); sharded_conv->set_sharding(original_hlo->sharding()); - return PartitionedHlo(sharded_conv, original_hlo->shape(), lhs.state()) - .Reshard(original_hlo->sharding()) + return PartitionedHlo(sharded_conv, output_base_shape, lhs.state()) + .Reshard(output_sharding) .hlo(); } @@ -130,15 +135,16 @@ StatusOr PartitionConvolutionWithParallelDimension( // dimensions. Halo exchange will happen on RHS only. StatusOr PartitionConvolutionWithSpatialDimensionHaloExchangeOnRHS( - PartitionedHlo lhs, PartitionedHlo rhs, HloInstruction* original_hlo, - int64 num_partitions, const SpmdPartitionerOptions& options, - HloInstruction* partition_id, HloModule* module, SpmdBuilder* b) { + PartitionedHlo lhs, PartitionedHlo rhs, const Shape& output_base_shape, + const HloSharding& output_sharding, const Window& conv_window, + HloInstruction* original_hlo, HloInstruction* partition_id, + HloModule* module, SpmdBuilder* b) { TF_RET_CHECK(original_hlo->opcode() == HloOpcode::kConvolution); TF_RET_CHECK(!lhs.sharding().IsTileMaximal() && !rhs.sharding().IsTileMaximal()); const auto& dnums = original_hlo->convolution_dimension_numbers(); - std::vector rhs_to_lhs_indices(original_hlo->shape().rank()); + std::vector rhs_to_lhs_indices(output_base_shape.rank()); rhs_to_lhs_indices[dnums.kernel_output_feature_dimension()] = dnums.input_batch_dimension(); rhs_to_lhs_indices[dnums.kernel_input_feature_dimension()] = @@ -147,7 +153,7 @@ PartitionConvolutionWithSpatialDimensionHaloExchangeOnRHS( rhs_to_lhs_indices[dnums.kernel_spatial_dimensions(i)] = dnums.input_spatial_dimensions(i); } - std::vector lhs_to_rhs_indices(original_hlo->shape().rank()); + std::vector lhs_to_rhs_indices(output_base_shape.rank()); for (int64 i = 0; i < rhs_to_lhs_indices.size(); ++i) { lhs_to_rhs_indices[rhs_to_lhs_indices[i]] = i; } @@ -167,7 +173,7 @@ PartitionConvolutionWithSpatialDimensionHaloExchangeOnRHS( }; auto zero = b->AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(original_hlo->shape().element_type()))); + LiteralUtil::Zero(output_base_shape.element_type()))); if (ShapeSizeInBytes(lhs.base_shape()) < ShapeSizeInBytes(rhs.base_shape())) { if (unsupported_sharding(aligned_lhs_sharding, rhs.sharding())) { return nullptr; @@ -199,7 +205,6 @@ PartitionConvolutionWithSpatialDimensionHaloExchangeOnRHS( // = i * (RHS * D - LHS) + (WC - 1) * stride - low_padding // * right-halo: limit(i) - (i + 1) * RHS // = (i + 1) * (LHS - RHS * D) + low_pading - Window window = original_hlo->window(); const auto& collective_ops_creator = lhs.state().collective_ops_creator; std::vector shard_counts(dnums.input_spatial_dimensions_size()); std::vector lhs_shard_sizes(dnums.input_spatial_dimensions_size()); @@ -209,7 +214,7 @@ PartitionConvolutionWithSpatialDimensionHaloExchangeOnRHS( int64 lhs_dimension = dnums.input_spatial_dimensions(i); int64 rhs_dimension = dnums.kernel_spatial_dimensions(i); int64 shard_count = rhs.sharding().tile_assignment().dim(rhs_dimension); - auto wd = window.dimensions(i); + auto wd = conv_window.dimensions(i); if (wd.base_dilation() != 1 || wd.window_reversal()) { return nullptr; } @@ -224,21 +229,21 @@ PartitionConvolutionWithSpatialDimensionHaloExchangeOnRHS( } std::vector left_halo_size_functions( - original_hlo->shape().rank()); + output_base_shape.rank()); std::vector right_halo_size_functions( - original_hlo->shape().rank()); - Window new_window = window; + output_base_shape.rank()); + Window new_window = conv_window; // Data structures needed for Pad and DynamicSlice on LHS if needed. bool need_dynamic_slice_lhs = false; auto partition_ordinals = MakeTiledPartitionOrdinals(lhs.sharding(), partition_id, b); - std::vector zero_padding(original_hlo->shape().rank()); + std::vector zero_padding(output_base_shape.rank()); PaddingConfig pad_config = window_util::MakeSymmetricPadding(zero_padding); auto zero_s32 = b->AddInstruction(HloInstruction::CreateConstant(LiteralUtil::Zero(S32))); std::vector dynamic_slice_start_indices( - original_hlo->shape().rank(), zero_s32); + output_base_shape.rank(), zero_s32); Shape dynamic_slice_shape = lhs.hlo()->shape(); Shape pad_shape = lhs.hlo()->shape(); @@ -255,7 +260,7 @@ PartitionConvolutionWithSpatialDimensionHaloExchangeOnRHS( // Calculate the left and right halo sizes as described in the comments // above. It calculcates the halo sizes with dilation, so we apply // CeilOfRatio({left,right}_halo_size, window_dilation). - auto wd = window.dimensions(i); + auto wd = conv_window.dimensions(i); int64 padding_low = wd.padding_low(); int64 padding_high = wd.padding_high(); int64 base = lhs.base_shape().dimensions(lhs_dimension); @@ -383,15 +388,15 @@ PartitionConvolutionWithSpatialDimensionHaloExchangeOnRHS( } auto conv = b->AddInstruction(HloInstruction::CreateConvolve( - original_hlo->shape(), conv_lhs, rhs_with_halo, + output_base_shape, conv_lhs, rhs_with_halo, original_hlo->feature_group_count(), original_hlo->batch_group_count(), new_window, dnums, original_hlo->precision_config())); auto ar = collective_ops_creator.create_cross_partition_all_reduce( b, conv, MakeBinaryAdd(original_hlo->shape().element_type(), module), {}, (*lhs.state().next_channel_id)++); ar->set_sharding(HloSharding::Replicate()); - return PartitionedHlo(ar, original_hlo->shape(), lhs.state()) - .Reshard(original_hlo->sharding()) + return PartitionedHlo(ar, output_base_shape, lhs.state()) + .Reshard(output_sharding) .hlo(); } @@ -399,9 +404,10 @@ PartitionConvolutionWithSpatialDimensionHaloExchangeOnRHS( // dimensions. Halo exchange will happen on LHS only. StatusOr PartitionConvolutionWithSpatialDimensionHaloExchangeOnLHS( - PartitionedHlo lhs, PartitionedHlo rhs, HloInstruction* original_hlo, - int64 num_partitions, const SpmdPartitionerOptions& options, - HloInstruction* partition_id, HloModule* module, SpmdBuilder* b) { + PartitionedHlo lhs, PartitionedHlo rhs, const Shape& output_base_shape, + const HloSharding& output_sharding, const Window& conv_window, + HloInstruction* original_hlo, HloInstruction* partition_id, + HloModule* module, SpmdBuilder* b) { TF_RET_CHECK(original_hlo->opcode() == HloOpcode::kConvolution); TF_RET_CHECK(!lhs.sharding().IsTileMaximal() && !rhs.sharding().IsTileMaximal()); @@ -410,7 +416,7 @@ PartitionConvolutionWithSpatialDimensionHaloExchangeOnLHS( // Check if the operand shardings are aligned. Also we currently don't // support partitioning non-spatial dimensions. - std::vector rhs_to_lhs_indices(original_hlo->shape().rank()); + std::vector rhs_to_lhs_indices(output_base_shape.rank()); rhs_to_lhs_indices[dnums.kernel_output_feature_dimension()] = dnums.input_batch_dimension(); rhs_to_lhs_indices[dnums.kernel_input_feature_dimension()] = @@ -419,12 +425,12 @@ PartitionConvolutionWithSpatialDimensionHaloExchangeOnLHS( rhs_to_lhs_indices[dnums.kernel_spatial_dimensions(i)] = dnums.input_spatial_dimensions(i); } - std::vector lhs_to_rhs_indices(original_hlo->shape().rank()); + std::vector lhs_to_rhs_indices(output_base_shape.rank()); for (int64 i = 0; i < rhs_to_lhs_indices.size(); ++i) { lhs_to_rhs_indices[rhs_to_lhs_indices[i]] = i; } - Window window = original_hlo->window(); + Window window = conv_window; std::vector reversed_rhs_dims; for (int64 i = 0; i < window.dimensions_size(); ++i) { if (window.dimensions(i).window_reversal()) { @@ -458,7 +464,7 @@ PartitionConvolutionWithSpatialDimensionHaloExchangeOnLHS( }; auto zero = b->AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(original_hlo->shape().element_type()))); + LiteralUtil::Zero(output_base_shape.element_type()))); if (ShapeSizeInBytes(lhs.base_shape()) < ShapeSizeInBytes(rhs.base_shape())) { if (unsupported_sharding(aligned_lhs_sharding, rhs.sharding())) { return nullptr; @@ -499,6 +505,7 @@ PartitionConvolutionWithSpatialDimensionHaloExchangeOnLHS( int64 shard_count = lhs.sharding().tile_assignment().dim(lhs_dimension); auto wd = window.dimensions(i); if (wd.base_dilation() != 1) { + // TODO(wangtao): support parallel dim if it is replicate here. return nullptr; } @@ -512,9 +519,9 @@ PartitionConvolutionWithSpatialDimensionHaloExchangeOnLHS( } std::vector left_halo_size_functions( - original_hlo->shape().rank()); + output_base_shape.rank()); std::vector right_halo_size_functions( - original_hlo->shape().rank()); + output_base_shape.rank()); Window new_window = window; auto partition_ordinals = @@ -588,37 +595,37 @@ PartitionConvolutionWithSpatialDimensionHaloExchangeOnLHS( } auto conv = b->AddInstruction(HloInstruction::CreateConvolve( - original_hlo->shape(), lhs_with_halo, rhs.hlo(), + output_base_shape, lhs_with_halo, rhs.hlo(), original_hlo->feature_group_count(), original_hlo->batch_group_count(), new_window, original_hlo->convolution_dimension_numbers(), original_hlo->precision_config())); auto ar = lhs.state().collective_ops_creator.create_cross_partition_all_reduce( - b, conv, MakeBinaryAdd(original_hlo->shape().element_type(), module), - {}, (*lhs.state().next_channel_id)++); + b, conv, MakeBinaryAdd(output_base_shape.element_type(), module), {}, + (*lhs.state().next_channel_id)++); ar->set_sharding(HloSharding::Replicate()); - return PartitionedHlo(ar, original_hlo->shape(), lhs.state()) - .Reshard(original_hlo->sharding()) + return PartitionedHlo(ar, output_base_shape, lhs.state()) + .Reshard(output_sharding) .hlo(); } // Partition convolution when output is sharded. Will shard LHS with replicated // RHS. -StatusOr PartitionConvolutionBaseCase( - PartitionedHlo lhs, PartitionedHlo rhs, HloInstruction* original_hlo, - int64 num_partitions, const SpmdPartitionerOptions& options, - HloInstruction* partition_id, HloModule* module, SpmdBuilder* b) { +StatusOr PartitionConvolutionTiledOutput( + PartitionedHlo lhs, PartitionedHlo rhs, const Shape& output_base_shape, + const HloSharding& output_sharding, const Window& conv_window, + HloInstruction* original_hlo, SpmdBuilder* b) { TF_RET_CHECK(original_hlo->opcode() == HloOpcode::kConvolution); const auto& dnums = original_hlo->convolution_dimension_numbers(); - const auto& sharding = original_hlo->sharding(); - TF_RET_CHECK(!sharding.IsTileMaximal()); + TF_RET_CHECK(!output_sharding.IsTileMaximal()); // We don't currently support sharding on output feature dimension. - if (sharding.tile_assignment().dim(dnums.output_feature_dimension()) > 1) { + if (output_sharding.tile_assignment().dim(dnums.output_feature_dimension()) > + 1) { return nullptr; } // Check if the operand and the output sharding are aligned. - std::vector input_to_output_indices(original_hlo->shape().rank()); + std::vector input_to_output_indices(output_base_shape.rank()); input_to_output_indices[dnums.input_batch_dimension()] = dnums.output_batch_dimension(); input_to_output_indices[dnums.input_feature_dimension()] = @@ -627,8 +634,8 @@ StatusOr PartitionConvolutionBaseCase( input_to_output_indices[dnums.input_spatial_dimensions(i)] = dnums.output_spatial_dimensions(i); } - auto target_operand_sharding = - hlo_sharding_util::TransposeSharding(sharding, input_to_output_indices); + auto target_operand_sharding = hlo_sharding_util::TransposeSharding( + output_sharding, input_to_output_indices); lhs = lhs.Reshard(target_operand_sharding); // Replicate the RHS. @@ -638,15 +645,15 @@ StatusOr PartitionConvolutionBaseCase( // whereas ReshardAsWindowedInput() expects the same number of window // dimensions as the rank of the operand. So add two more trivial // dimensions. - std::vector ones(original_hlo->shape().rank(), 1); + std::vector ones(output_base_shape.rank(), 1); auto operand_window = window_util::MakeWindow(ones); for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { *operand_window.mutable_dimensions(dnums.input_spatial_dimensions(i)) = - original_hlo->window().dimensions(i); + conv_window.dimensions(i); } auto zero = b->AddInstruction(HloInstruction::CreateConstant( - LiteralUtil::Zero(original_hlo->shape().element_type()))); + LiteralUtil::Zero(output_base_shape.element_type()))); auto resharded_operand_and_window = lhs.ReshardAsWindowedInput(operand_window, target_operand_sharding, zero); if (!resharded_operand_and_window.has_value()) { @@ -664,8 +671,7 @@ StatusOr PartitionConvolutionBaseCase( resharded_operand_and_window->sharded_input->shape(), rhs.hlo()->shape(), original_hlo->feature_group_count(), original_hlo->batch_group_count(), new_window, dnums)); - auto shard_shape = - MakePartitionedShape(original_hlo->shape(), original_hlo->sharding()); + auto shard_shape = MakePartitionedShape(output_base_shape, output_sharding); *sharded_conv_shape.mutable_layout() = shard_shape.layout(); auto sharded_conv = b->AddInstruction(HloInstruction::CreateConvolve( sharded_conv_shape, resharded_operand_and_window->sharded_input, @@ -683,18 +689,130 @@ StatusOr PartitionConvolutionBaseCase( shard_shape.dimensions())); } -// Partition convolution. -StatusOr PartitionConvolution( - PartitionedHlo lhs, PartitionedHlo rhs, HloInstruction* original_hlo, +StatusOr PartitionConvolutionGroupOnParallelDim( + PartitionedHlo lhs, PartitionedHlo rhs, const Shape& output_base_shape, + const HloSharding& output_sharding, const Window& conv_window, + HloInstruction* original_hlo, const ConvolutionDimsMapping& dims_mapping, int64 num_partitions, const SpmdPartitionerOptions& options, HloInstruction* partition_id, HloModule* module, SpmdBuilder* b) { + std::vector lhs_dims; + std::vector rhs_dims; + std::vector output_dims; + auto lhs_sharding_dims_adjusted_to_output = + lhs.sharding().IsReplicated() + ? std::vector(lhs.base_shape().rank(), 1) + : lhs.sharding().tile_assignment().dimensions(); + auto rhs_sharding_dims_adjusted_to_output = + rhs.sharding().IsReplicated() + ? std::vector(rhs.base_shape().rank(), 1) + : rhs.sharding().tile_assignment().dimensions(); + auto output_sharding_dims_adjusted_to_lhs = + output_sharding.tile_assignment().dimensions(); + bool lhs_rhs_dims_matching = true; + for (const auto& dim : dims_mapping.parallel_spatial_dims) { + lhs_dims.push_back(dim.lhs); + rhs_dims.push_back(dim.rhs); + output_dims.push_back(dim.output); + if (lhs_sharding_dims_adjusted_to_output[dim.lhs] != + rhs_sharding_dims_adjusted_to_output[dim.rhs]) { + lhs_rhs_dims_matching = false; + } + lhs_sharding_dims_adjusted_to_output[dim.lhs] = + output_sharding.tile_assignment().dim(dim.output); + rhs_sharding_dims_adjusted_to_output[dim.rhs] = + output_sharding.tile_assignment().dim(dim.output); + output_sharding_dims_adjusted_to_lhs[dim.output] = + lhs.sharding().tile_assignment().dim(dim.lhs); + } + auto lhs_grouped = GroupShardingOnDims(lhs.sharding(), lhs_dims); + auto rhs_grouped = GroupShardingOnDims(rhs.sharding(), rhs_dims); + auto output_grouped = GroupShardingOnDims(output_sharding, output_dims); + if (lhs_rhs_dims_matching) { + if (ShapeUtil::ByteSizeOf(lhs.base_shape()) > + ShapeUtil::ByteSizeOf(rhs.base_shape())) { + rhs_grouped = AlignGroupsWith(std::move(rhs_grouped), lhs_grouped); + rhs = rhs.Reshard(UngroupSharding(rhs_grouped)); + } else { + lhs_grouped = AlignGroupsWith(std::move(lhs_grouped), rhs_grouped); + lhs = lhs.Reshard(UngroupSharding(lhs_grouped)); + } + auto reshaped_output_tiling = output_sharding.tile_assignment(); + reshaped_output_tiling.Reshape(output_sharding_dims_adjusted_to_lhs); + output_grouped = AlignGroupsWith( + GroupShardingOnDims(HloSharding::Tile(reshaped_output_tiling), + output_dims), + lhs_grouped); + } else { + auto reshaped_lhs_tiling = lhs.sharding().tile_assignment(); + reshaped_lhs_tiling.Reshape(lhs_sharding_dims_adjusted_to_output); + lhs_grouped = AlignGroupsWith( + GroupShardingOnDims(HloSharding::Tile(reshaped_lhs_tiling), lhs_dims), + output_grouped); + lhs = lhs.Reshard(UngroupSharding(lhs_grouped)); + auto reshaped_rhs_tiling = rhs.sharding().tile_assignment(); + reshaped_rhs_tiling.Reshape(rhs_sharding_dims_adjusted_to_output); + rhs_grouped = AlignGroupsWith( + GroupShardingOnDims(HloSharding::Tile(reshaped_rhs_tiling), rhs_dims), + output_grouped); + rhs = rhs.Reshard(UngroupSharding(rhs_grouped)); + } + + // Update LHS and RHS sharding and shape. + lhs.hlo()->set_sharding(lhs_grouped.sharding); + rhs.hlo()->set_sharding(rhs_grouped.sharding); + CHECK(lhs.hlo() != rhs.hlo() || lhs_grouped.sharding == rhs_grouped.sharding); + auto per_group_partitioner_state = CreatePerGroupPartitioningState( + lhs.state(), lhs_grouped.device_groups, b); + auto grouped_lhs_base_shape = + GetPerGroupBaseShape(lhs_grouped, lhs.base_shape()); + auto grouped_lhs_shard_shape = + MakePartitionedShape(grouped_lhs_base_shape, lhs.sharding()); + // Update convolution window with the new shape + auto new_window = conv_window; + for (const auto& dim : dims_mapping.parallel_spatial_dims) { + auto wd = new_window.mutable_dimensions(dim.spatial); + wd->set_size(grouped_lhs_shard_shape.dimensions(dim.lhs)); + wd->set_stride(std::max(1, wd->size() - 1)); + wd->set_base_dilation(wd->size()); + } + + auto new_partition_id = + lhs.state().collective_ops_creator.create_partition_id(b); + TF_ASSIGN_OR_RETURN( + auto conv, + PartitionConvolution( + PartitionedHlo(lhs.hlo(), grouped_lhs_base_shape, + per_group_partitioner_state), + PartitionedHlo(rhs.hlo(), + GetPerGroupBaseShape(rhs_grouped, rhs.base_shape()), + per_group_partitioner_state), + GetPerGroupBaseShape(output_grouped, output_base_shape), + output_grouped.sharding, new_window, original_hlo, + num_partitions / output_grouped.device_groups.size(), options, + new_partition_id, module, b)); + // Reset the LHS sharding to the ungrouped one. + lhs.hlo()->set_sharding(UngroupSharding(lhs_grouped)); + rhs.hlo()->set_sharding(UngroupSharding(rhs_grouped)); + conv->set_sharding(UngroupSharding(output_grouped)); + return PartitionedHlo(conv, output_base_shape, lhs.state()) + .Reshard(output_sharding) + .hlo(); +} + +// Partition convolution with only one kind of dims partitioned. +StatusOr PartitionConvolutionBaseCase( + PartitionedHlo lhs, PartitionedHlo rhs, const Shape& output_base_shape, + const HloSharding& output_sharding, const Window& conv_window, + HloInstruction* original_hlo, int64 num_partitions, + const SpmdPartitionerOptions& options, HloInstruction* partition_id, + HloModule* module, SpmdBuilder* b) { TF_RET_CHECK(original_hlo->opcode() == HloOpcode::kConvolution); // Case 1: Either RHS or LHS is only partitioned at parallel dimensions. TF_ASSIGN_OR_RETURN(auto parallel_partitioned_conv, PartitionConvolutionWithParallelDimension( - lhs, rhs, original_hlo, num_partitions, options, - partition_id, module, b)); + lhs, rhs, output_base_shape, output_sharding, + conv_window, original_hlo, num_partitions, b)); if (parallel_partitioned_conv) { return parallel_partitioned_conv; } @@ -709,8 +827,8 @@ StatusOr PartitionConvolution( TF_ASSIGN_OR_RETURN( auto partitioned_conv, PartitionConvolutionWithSpatialDimensionHaloExchangeOnLHS( - lhs, rhs, original_hlo, num_partitions, options, partition_id, - module, b)); + lhs, rhs, output_base_shape, output_sharding, conv_window, + original_hlo, partition_id, module, b)); if (partitioned_conv) { return partitioned_conv; } @@ -718,8 +836,8 @@ StatusOr PartitionConvolution( TF_ASSIGN_OR_RETURN( auto partitioned_conv, PartitionConvolutionWithSpatialDimensionHaloExchangeOnRHS( - lhs, rhs, original_hlo, num_partitions, options, partition_id, - module, b)); + lhs, rhs, output_base_shape, output_sharding, conv_window, + original_hlo, partition_id, module, b)); if (partitioned_conv) { return partitioned_conv; @@ -728,12 +846,11 @@ StatusOr PartitionConvolution( } // Case 3: output is tiled. - const HloSharding& sharding = original_hlo->sharding(); - if (!sharding.IsTileMaximal()) { - TF_ASSIGN_OR_RETURN( - auto partitioned_conv, - PartitionConvolutionBaseCase(lhs, rhs, original_hlo, num_partitions, - options, partition_id, module, b)); + if (!output_sharding.IsTileMaximal()) { + TF_ASSIGN_OR_RETURN(auto partitioned_conv, + PartitionConvolutionTiledOutput( + lhs, rhs, output_base_shape, output_sharding, + conv_window, original_hlo, b)); if (partitioned_conv) { return partitioned_conv; @@ -742,6 +859,95 @@ StatusOr PartitionConvolution( return nullptr; } +// Partition convolution. +StatusOr PartitionConvolution( + PartitionedHlo lhs, PartitionedHlo rhs, const Shape& output_base_shape, + const HloSharding& output_sharding, const Window& conv_window, + HloInstruction* original_hlo, int64 num_partitions, + const SpmdPartitionerOptions& options, HloInstruction* partition_id, + HloModule* module, SpmdBuilder* b) { + TF_RET_CHECK(original_hlo->opcode() == HloOpcode::kConvolution); + + TF_ASSIGN_OR_RETURN( + auto try_partitioned_conv, + PartitionConvolutionBaseCase(lhs, rhs, output_base_shape, output_sharding, + conv_window, original_hlo, num_partitions, + options, partition_id, module, b)); + if (try_partitioned_conv) { + return try_partitioned_conv; + } + + const auto& dnums = original_hlo->convolution_dimension_numbers(); + spmd::ConvolutionDimsMapping mapping; + for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { + int64 lhs_dim = dnums.input_spatial_dimensions(i); + int64 lhs_size = lhs.base_shape().dimensions(lhs_dim); + const auto& wd = original_hlo->window().dimensions(i); + int64 rhs_dim = dnums.kernel_spatial_dimensions(i); + int64 output_dim = dnums.output_spatial_dimensions(i); + if (dot_as_convolution_util::ConvSpatialDimensionIsParallel(wd, lhs_size)) { + mapping.parallel_spatial_dims.emplace_back(); + mapping.parallel_spatial_dims.back().lhs = lhs_dim; + mapping.parallel_spatial_dims.back().rhs = rhs_dim; + mapping.parallel_spatial_dims.back().output = output_dim; + mapping.parallel_spatial_dims.back().spatial = i; + } else { + mapping.non_parallel_spatial_dims.emplace_back(); + mapping.non_parallel_spatial_dims.back().lhs = lhs_dim; + mapping.non_parallel_spatial_dims.back().rhs = rhs_dim; + mapping.non_parallel_spatial_dims.back().output = output_dim; + mapping.non_parallel_spatial_dims.back().spatial = i; + } + } + + // lhs_rhs_or_output: 0 lhs, 1 rhs, 2 output. + auto get_partitions_for_dims = + [&](const HloSharding& sharding, + absl::Span dims, + int lhs_rhs_or_output) { + int64 partitions = 1; + if (sharding.IsTileMaximal()) { + return partitions; + } + for (const auto& dim : dims) { + if (lhs_rhs_or_output == 0) { + partitions *= sharding.tile_assignment().dim(dim.lhs); + } else if (lhs_rhs_or_output == 1) { + partitions *= sharding.tile_assignment().dim(dim.rhs); + } else { + CHECK_EQ(lhs_rhs_or_output, 2); + partitions *= sharding.tile_assignment().dim(dim.output); + } + } + return partitions; + }; + + const int64 lhs_parallel_spatial_partitions = + get_partitions_for_dims(lhs.sharding(), mapping.parallel_spatial_dims, 0); + const int64 rhs_parallel_spatial_partitions = + get_partitions_for_dims(rhs.sharding(), mapping.parallel_spatial_dims, 1); + const int64 output_parallel_spatial_partitions = get_partitions_for_dims( + original_hlo->sharding(), mapping.parallel_spatial_dims, 2); + + // Recursively partition on different types of dimensions. + // + // Case 1: Group partitions by parallel spatial dims. + if (lhs_parallel_spatial_partitions == rhs_parallel_spatial_partitions && + lhs_parallel_spatial_partitions == output_parallel_spatial_partitions && + lhs_parallel_spatial_partitions > 1) { + TF_ASSIGN_OR_RETURN(auto try_partitioned_conv, + PartitionConvolutionGroupOnParallelDim( + lhs, rhs, output_base_shape, output_sharding, + conv_window, original_hlo, mapping, num_partitions, + options, partition_id, module, b)); + if (try_partitioned_conv) { + return try_partitioned_conv; + } + } + + return nullptr; +} + } // namespace Status SpmdPartitioningVisitor::HandleConvolution(HloInstruction* hlo) { @@ -789,7 +995,8 @@ Status SpmdPartitioningVisitor::HandleConvolution(HloInstruction* hlo) { auto rhs = GetPartitionedHlo(hlo->operand(1)); TF_ASSIGN_OR_RETURN( auto partitioned_conv, - PartitionConvolution(lhs, rhs, hlo, num_partitions_, options_, + PartitionConvolution(lhs, rhs, hlo->shape(), hlo->sharding(), + hlo->window(), hlo, num_partitions_, options_, partition_id_, module_, &b_)); if (partitioned_conv) { diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h index d6e6818608b..a612c16bdae 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.h @@ -330,6 +330,22 @@ struct DotGeneralDimsMapping { std::vector rhs_non_contracting_dims; }; +struct ConvolutionDimsMapping { + // The dimension numbers for the operands and output corresponding to a + // logical dimension (e.g., batch, parallel, non-parallel). If an + // operand or the output doesn't have the logical dimension, it is set to + // -1. + struct DimsMapping { + int64 lhs; + int64 rhs; + int64 output; + // input mapped to index in input_spatial_dimensions(). + int64 spatial; + }; + std::vector parallel_spatial_dims; + std::vector non_parallel_spatial_dims; +}; + class SpmdPartitioningVisitor : public DfsHloVisitorWithDefault { public: SpmdPartitioningVisitor( diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc index 40148d9bfcd..b052567d9c6 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc @@ -564,6 +564,59 @@ ENTRY entry { op::Constant()))))); } +TEST_F(SpmdPartitioningTest, + ConvWithParallelDimAndNonParallelSpatialDimPartitioned) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %lhs = f32[32,12,12,24,32] parameter(0) + %lhs.copy = f32[32,12,12,24,32] copy(%lhs), + sharding={devices=[2,2,1,1,1]0,1,2,3} + %rhs = f32[32,6,6,16,32] parameter(1) + %rhs.copy = f32[32,6,6,16,32] copy(%rhs), + sharding={devices=[2,2,1,1,1]0,1,2,3} + ROOT %conv = f32[32,7,7,24,16] convolution(%lhs.copy, %rhs.copy), + dim_labels=012bf_012oi->012bf, + window={size=32x6x6 stride=31x1x1 lhs_dilate=32x1x1}, + sharding={devices=[2,2,1,1,1]0,1,2,3} +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/4)); + VLOG(1) << module->ToString(); + auto root = module->entry_computation()->root_instruction(); + auto lhs = AllOf(op::Copy(op::DynamicSlice(op::Parameter(), op::Reshape(), + op::Reshape(), op::Constant(), + op::Constant(), op::Constant())), + op::Shape("f32[16,6,12,24,32]")); + auto rhs = AllOf(op::Copy(op::DynamicSlice(op::Parameter(), op::Reshape(), + op::Reshape(), op::Constant(), + op::Constant(), op::Constant())), + op::Shape("f32[16,3,6,16,32]")); + auto resharded_rhs = + AllOf(op::Shape("f32[16,6,6,16,32]"), + op::AllReduce(op::DynamicUpdateSlice( + op::Broadcast(), rhs, op::Constant(), op::Reshape(), + op::Constant(), op::Constant(), op::Constant()))); + + auto left_halo = AllOf(op::CollectivePermute(op::Slice(lhs)), + op::Shape("f32[16,2,12,24,32]")); + auto right_halo = AllOf(op::CollectivePermute(op::Slice(lhs)), + op::Shape("f32[16,3,12,24,32]")); + EXPECT_THAT( + root, + AllOf(op::Convolution( + op::Select(op::Compare(), + op::DynamicSlice( + op::Concatenate(left_halo, lhs, right_halo), + op::Constant(), op::Add(), op::Constant(), + op::Constant(), op::Constant()), + op::Broadcast()), + resharded_rhs), + op::Shape("f32[16,4,7,24,16]"))); +} + TEST_F(SpmdPartitioningTest, BroadcastPropagateTiledSharding) { const char* const hlo_string = R"( HloModule module From 8a449bdb65252abbe5b5b4c9e430e5620e6d01cd Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Fri, 31 Jul 2020 20:59:29 -0700 Subject: [PATCH 1932/2522] [XLA:SPMD] Improve sharding propagation for scatter/gather Try to pass through between the operands and outputs. PiperOrigin-RevId: 324347628 Change-Id: I1ded92984c87c3d269316f90c6952102f3ec3c76 --- tensorflow/compiler/xla/service/BUILD | 1 + .../compiler/xla/service/hlo_sharding_util.cc | 180 ++++++++++++ .../compiler/xla/service/hlo_sharding_util.h | 20 ++ .../xla/service/sharding_propagation.cc | 76 ++++- .../xla/service/sharding_propagation_test.cc | 270 ++++++++++++++++++ .../xla/service/spmd/spmd_partitioner.cc | 67 +---- 6 files changed, 550 insertions(+), 64 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 8d267affdd9..1491b9070ac 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -473,6 +473,7 @@ cc_library( "//tensorflow/compiler/xla:array", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto_cc", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/types:optional", diff --git a/tensorflow/compiler/xla/service/hlo_sharding_util.cc b/tensorflow/compiler/xla/service/hlo_sharding_util.cc index 11a24b30ac9..94c348cdeaa 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding_util.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding_util.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_instructions.h" #include "tensorflow/compiler/xla/service/hlo_sharding.h" #include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" namespace xla { @@ -331,6 +332,10 @@ HloSharding GatherOutputSharding(const HloSharding& index_sharding, } } Array new_tile_assignment = index_sharding.tile_assignment(); + if (new_tile_assignment.num_elements() != + Product(output_tile_assignment_dims)) { + return HloSharding::Replicate(); + } new_tile_assignment.Reshape(output_tile_assignment_dims); return HloSharding::Tile(new_tile_assignment); } @@ -350,6 +355,10 @@ HloSharding GatherIndexSharding(const HloSharding& output_sharding, } } Array new_tile_assignment = output_sharding.tile_assignment(); + if (new_tile_assignment.num_elements() != + Product(index_tile_assignment_dims)) { + return HloSharding::Replicate(); + } new_tile_assignment.Reshape(index_tile_assignment_dims); return HloSharding::Tile(new_tile_assignment); } @@ -422,6 +431,10 @@ HloSharding ScatterIndexSharding(const HloSharding& data_sharding, index_tile_assignment_dims.push_back(1); } Array new_tile_assignment = data_sharding.tile_assignment(); + if (new_tile_assignment.num_elements() != + Product(index_tile_assignment_dims)) { + return HloSharding::Replicate(); + } new_tile_assignment.Reshape(index_tile_assignment_dims); return HloSharding::Tile(new_tile_assignment); } @@ -444,6 +457,10 @@ HloSharding ScatterDataSharding(const HloSharding& index_sharding, } } Array new_tile_assignment = index_sharding.tile_assignment(); + if (new_tile_assignment.num_elements() != + Product(data_tile_assignment_dims)) { + return HloSharding::Replicate(); + } new_tile_assignment.Reshape(data_tile_assignment_dims); return HloSharding::Tile(new_tile_assignment); } @@ -533,6 +550,169 @@ HloSharding ScatterEffectiveDataSharding(const HloSharding& data_sharding, return HloSharding::Tile(tile_assignment); } +namespace { + +// If partitioning in the operand only happens in dimensions in passthrough +// dimensions (offset dimensions in the gather output (or scatter update) that +// have the same size as the operand), returns the corresponding output (or +// update) sharding by passing through the input sharding. +absl::optional PassthroughOperandToGatherOutputOrScatterUpdate( + const Shape& operand_shape, const HloSharding& operand_sharding, + const Shape& update_or_gather_shape, + absl::Span collapsed_or_inserted_dims, + absl::Span index_map, + absl::Span offset_or_window_dims, + absl::Span slice_size) { + if (operand_sharding.IsTileMaximal()) { + return operand_sharding; + } + std::vector passthrough_tile(update_or_gather_shape.rank(), 1); + int64 collapsed = 0; + for (int64 i = 0; i < operand_shape.rank(); ++i) { + int64 dim_partitions = operand_sharding.tile_assignment().dim(i); + if (absl::c_linear_search(collapsed_or_inserted_dims, i) || + absl::c_linear_search(index_map, i)) { + if (dim_partitions > 1) { + return absl::nullopt; + } + collapsed++; + continue; + } + if (slice_size[i] != operand_shape.dimensions(i) && dim_partitions > 1) { + return absl::nullopt; + } + int64 offset_dim = offset_or_window_dims[i - collapsed]; + if (i - collapsed > 0 && + offset_dim < offset_or_window_dims[i - collapsed - 1]) { + // Output offsets are transposed, we do not support this case. + return absl::nullopt; + } + passthrough_tile[offset_dim] = dim_partitions; + } + Array tile_assignment = operand_sharding.tile_assignment(); + tile_assignment.Reshape(passthrough_tile); + return HloSharding::Tile(tile_assignment); +} + +// Inverse of PassthroughOperandToGatherOutputOrScatterUpdate. +absl::optional PassthroughGatherOutputOrScatterUpdateToOperand( + const Shape& operand_shape, const HloSharding& update_or_gather_sharding, + absl::Span collapsed_or_inserted_dims, + absl::Span index_map, + absl::Span offset_or_window_dims, + absl::Span slice_size) { + if (update_or_gather_sharding.IsTileMaximal()) { + return update_or_gather_sharding; + } + std::vector passthrough_tile(operand_shape.rank(), 1); + int64 collapsed = 0; + for (int64 i = 0; i < operand_shape.rank(); ++i) { + if (absl::c_linear_search(collapsed_or_inserted_dims, i) || + absl::c_linear_search(index_map, i)) { + collapsed++; + continue; + } + int64 offset_dim = offset_or_window_dims[i - collapsed]; + int64 dim_partitions = + update_or_gather_sharding.tile_assignment().dim(offset_dim); + if (slice_size[i] != operand_shape.dimensions(i) && dim_partitions > 1) { + return absl::nullopt; + } + if (i - collapsed > 0 && + offset_dim < offset_or_window_dims[i - collapsed - 1]) { + // Output offsets are transposed, we do not support this case. + return absl::nullopt; + } + passthrough_tile[i] = dim_partitions; + } + Array tile_assignment = update_or_gather_sharding.tile_assignment(); + if (tile_assignment.num_elements() != Product(passthrough_tile)) { + return absl::nullopt; + } + tile_assignment.Reshape(passthrough_tile); + return HloSharding::Tile(tile_assignment); +} + +} // namespace + +absl::optional GatherOutputShardingFromDataOperand( + const HloSharding& data_operand_sharding, const HloInstruction& hlo) { + const auto& dnums = hlo.gather_dimension_numbers(); + std::vector collapsed_slice_dims(dnums.collapsed_slice_dims().begin(), + dnums.collapsed_slice_dims().end()); + std::vector start_index_map(dnums.start_index_map().begin(), + dnums.start_index_map().end()); + std::vector offset_dims(dnums.offset_dims().begin(), + dnums.offset_dims().end()); + return PassthroughOperandToGatherOutputOrScatterUpdate( + hlo.operand(0)->shape(), data_operand_sharding, hlo.shape(), + collapsed_slice_dims, start_index_map, offset_dims, + hlo.gather_slice_sizes()); +} + +absl::optional GatherDataOperandShardingFromOutput( + const HloSharding& output_sharding, const HloInstruction& hlo) { + const auto& dnums = hlo.gather_dimension_numbers(); + std::vector collapsed_slice_dims(dnums.collapsed_slice_dims().begin(), + dnums.collapsed_slice_dims().end()); + std::vector start_index_map(dnums.start_index_map().begin(), + dnums.start_index_map().end()); + std::vector offset_dims(dnums.offset_dims().begin(), + dnums.offset_dims().end()); + return PassthroughGatherOutputOrScatterUpdateToOperand( + hlo.operand(0)->shape(), output_sharding, collapsed_slice_dims, + start_index_map, offset_dims, hlo.gather_slice_sizes()); +} + +absl::optional ScatterOutputShardingFromUpdate( + const HloSharding& update_sharding, const HloInstruction& hlo) { + const auto& dnums = hlo.scatter_dimension_numbers(); + std::vector inserted_window_dims(dnums.inserted_window_dims().begin(), + dnums.inserted_window_dims().end()); + std::vector scatter_dims_to_operand_dims( + dnums.scatter_dims_to_operand_dims().begin(), + dnums.scatter_dims_to_operand_dims().end()); + std::vector update_window_dims(dnums.update_window_dims().begin(), + dnums.update_window_dims().end()); + std::vector slice_size(hlo.shape().rank(), 1); + int64 num_update_window_dims = 0; + for (int64 i = 0; i < hlo.shape().rank(); ++i) { + if (absl::c_linear_search(dnums.inserted_window_dims(), i)) { + continue; + } + slice_size[i] = hlo.operand(2)->shape().dimensions( + dnums.update_window_dims(num_update_window_dims++)); + } + return PassthroughGatherOutputOrScatterUpdateToOperand( + hlo.shape(), update_sharding, inserted_window_dims, + scatter_dims_to_operand_dims, update_window_dims, slice_size); +} + +absl::optional ScatterUpdateShardingFromOutput( + const HloSharding& output_sharding, const HloInstruction& hlo) { + const auto& dnums = hlo.scatter_dimension_numbers(); + std::vector inserted_window_dims(dnums.inserted_window_dims().begin(), + dnums.inserted_window_dims().end()); + std::vector scatter_dims_to_operand_dims( + dnums.scatter_dims_to_operand_dims().begin(), + dnums.scatter_dims_to_operand_dims().end()); + std::vector update_window_dims(dnums.update_window_dims().begin(), + dnums.update_window_dims().end()); + std::vector slice_size(hlo.shape().rank(), 1); + int64 num_update_window_dims = 0; + for (int64 i = 0; i < hlo.shape().rank(); ++i) { + if (absl::c_linear_search(dnums.inserted_window_dims(), i)) { + continue; + } + slice_size[i] = hlo.operand(2)->shape().dimensions( + dnums.update_window_dims(num_update_window_dims++)); + } + return PassthroughOperandToGatherOutputOrScatterUpdate( + hlo.shape(), output_sharding, hlo.operand(2)->shape(), + inserted_window_dims, scatter_dims_to_operand_dims, update_window_dims, + slice_size); +} + StatusOr, HloOpcode>> IdentityValueAndHloOpcodeForScatterReduceComputation( const HloScatterInstruction& scatter) { diff --git a/tensorflow/compiler/xla/service/hlo_sharding_util.h b/tensorflow/compiler/xla/service/hlo_sharding_util.h index 562f6d1420d..cc4068121ae 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding_util.h +++ b/tensorflow/compiler/xla/service/hlo_sharding_util.h @@ -127,6 +127,26 @@ HloSharding ScatterEffectiveIndexSharding(const HloSharding& index_sharding, HloSharding ScatterEffectiveDataSharding(const HloSharding& data_sharding, const HloInstruction& hlo); +// Returns an output sharding of gather by passing through the data operand's +// sharding. +absl::optional GatherOutputShardingFromDataOperand( + const HloSharding& data_operand_sharding, const HloInstruction& hlo); + +// Returns a data operand sharding of gather by passing through the output's +// sharding. +absl::optional GatherDataOperandShardingFromOutput( + const HloSharding& output_sharding, const HloInstruction& hlo); + +// Returns an output sharding of scatter by passing through the update operand's +// sharding. +absl::optional ScatterOutputShardingFromUpdate( + const HloSharding& update_sharding, const HloInstruction& hlo); + +// Returns an update operand sharding of scatter by passing through the output's +// sharding. +absl::optional ScatterUpdateShardingFromOutput( + const HloSharding& output_sharding, const HloInstruction& hlo); + // Returns an identity value and an HloOpcode for reduce computation of scatter // instruction. // - If computation is add/or, return 0/false with corresponding op code; diff --git a/tensorflow/compiler/xla/service/sharding_propagation.cc b/tensorflow/compiler/xla/service/sharding_propagation.cc index 6c4cf2d7866..5d85fb5189c 100644 --- a/tensorflow/compiler/xla/service/sharding_propagation.cc +++ b/tensorflow/compiler/xla/service/sharding_propagation.cc @@ -899,20 +899,45 @@ bool InferShardingFromOperands(HloInstruction* instruction, return propagate_slicing() || propagate_base(); } case HloOpcode::kGather: { - if (!IsSpatiallyPartitioned(instruction->operand(1))) { - return false; + bool changed = false; + if (IsSpatiallyPartitioned(instruction->operand(1))) { + HloSharding new_sharding = hlo_sharding_util::GatherOutputSharding( + instruction->operand(1)->sharding(), instruction); + changed |= MaybeImproveInstructionSharding(new_sharding, instruction); } - HloSharding new_sharding = hlo_sharding_util::GatherOutputSharding( - instruction->operand(1)->sharding(), instruction); - return MaybeImproveInstructionSharding(new_sharding, instruction); + if (is_spmd && IsSpatiallyPartitioned(instruction->operand(0))) { + auto maybe_from_data = + hlo_sharding_util::GatherOutputShardingFromDataOperand( + instruction->operand(0)->sharding(), *instruction); + if (maybe_from_data) { + changed |= + MaybeImproveInstructionSharding(*maybe_from_data, instruction); + } + } + return changed; } case HloOpcode::kScatter: { + bool changed = false; + if (is_spmd && IsSpatiallyPartitioned(instruction->operand(0))) { + changed |= MaybeImproveInstructionSharding( + instruction->operand(0)->sharding(), instruction); + } if (!IsSpatiallyPartitioned(instruction->operand(1)) && !IsSpatiallyPartitioned(instruction->operand(2))) { return false; } - return MaybeImproveInstructionSharding(HloSharding::Replicate(), - instruction); + if (is_spmd && IsSpatiallyPartitioned(instruction->operand(2))) { + auto maybe_from_update = + hlo_sharding_util::ScatterOutputShardingFromUpdate( + instruction->operand(2)->sharding(), *instruction); + if (maybe_from_update) { + changed |= + MaybeImproveInstructionSharding(*maybe_from_update, instruction); + } + } + changed |= MaybeImproveInstructionSharding(HloSharding::Replicate(), + instruction); + return changed; } case HloOpcode::kWhile: { if (!instruction->operand(0)->has_sharding()) { @@ -1218,6 +1243,43 @@ absl::optional GetShardingFromUser( return hlo_sharding_util::ReverseSharding(user.sharding(), user.dimensions()); } + case HloOpcode::kGather: { + if (&instruction == user.operand(1)) { + return hlo_sharding_util::GatherIndexSharding(user.sharding(), &user); + } + if (is_spmd) { + return hlo_sharding_util::GatherDataOperandShardingFromOutput( + user.sharding(), user); + } + return absl::nullopt; + } + case HloOpcode::kScatter: { + if (&instruction == user.operand(0)) { + return user.sharding(); + } + if (&instruction == user.operand(1)) { + auto update = user.operand(2); + if (!IsSpatiallyPartitioned(update)) { + return absl::nullopt; + } + return hlo_sharding_util::ScatterIndexSharding(update->sharding(), + &user); + } + CHECK_EQ(&instruction, user.operand(2)); + auto indices = user.operand(1); + if (IsSpatiallyPartitioned(indices)) { + auto from_indices = + hlo_sharding_util::ScatterDataSharding(indices->sharding(), &user); + if (!from_indices.IsTileMaximal()) { + return from_indices; + } + } + if (is_spmd) { + return hlo_sharding_util::ScatterUpdateShardingFromOutput( + user.sharding(), user); + } + return absl::nullopt; + } default: { // If the user output shape is compatible with the current instruction // shape excluding element type and the current instruction is supported diff --git a/tensorflow/compiler/xla/service/sharding_propagation_test.cc b/tensorflow/compiler/xla/service/sharding_propagation_test.cc index d62328aa9ad..594130daf0b 100644 --- a/tensorflow/compiler/xla/service/sharding_propagation_test.cc +++ b/tensorflow/compiler/xla/service/sharding_propagation_test.cc @@ -1494,5 +1494,275 @@ ENTRY entry { op::Sharding("{devices=[2,1,1,1]0,1}")); } +TEST_F(ShardingPropagationTest, GatherFromIndex) { + const char* hlo_string = R"( +HloModule module + +ENTRY entry { + %input = f32[2,9] parameter(0), sharding={replicated} + %indices = s32[3] parameter(1), sharding={devices=[2]0,1} + %gather = f32[3,9] gather(%input, %indices), offset_dims={1}, + collapsed_slice_dims={0}, start_index_map={0}, index_vector_dim=1, + slice_sizes={1,9} + ROOT %copy = f32[3,9] copy(%gather) +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + TF_ASSERT_OK_AND_ASSIGN(bool changed, + ShardingPropagation().Run(module.get())); + EXPECT_TRUE(changed); + EXPECT_THAT(FindInstruction(module.get(), "gather"), + op::Sharding("{devices=[2,1]0,1}")); +} + +TEST_F(ShardingPropagationTest, GatherFromDataOperand) { + const char* hlo_string = R"( +HloModule module + +ENTRY entry { + %input = f32[2,9] parameter(0), sharding={devices=[1,2]0,1} + %indices = s32[3] parameter(1), sharding={replicated} + %gather = f32[3,9] gather(%input, %indices), offset_dims={1}, + collapsed_slice_dims={0}, start_index_map={0}, index_vector_dim=1, + slice_sizes={1,9} + ROOT %copy = f32[3,9] copy(%gather) +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + TF_ASSERT_OK_AND_ASSIGN( + bool changed, ShardingPropagation(/*is_spmd=*/true).Run(module.get())); + EXPECT_TRUE(changed); + EXPECT_THAT(FindInstruction(module.get(), "gather"), + op::Sharding("{devices=[1,2]0,1}")); +} + +TEST_F(ShardingPropagationTest, GatherToIndex) { + const char* hlo_string = R"( +HloModule module + +ENTRY entry { + %input = f32[2,9] parameter(0), sharding={replicated} + %p1 = s32[3] parameter(1) + %indices = s32[3] copy(%p1) + ROOT %gather = f32[3,9] gather(%input, %indices), offset_dims={1}, + collapsed_slice_dims={0}, start_index_map={0}, index_vector_dim=1, + slice_sizes={1,9}, sharding={devices=[2,1]0,1} +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + TF_ASSERT_OK_AND_ASSIGN(bool changed, + ShardingPropagation().Run(module.get())); + EXPECT_TRUE(changed); + EXPECT_THAT(FindInstruction(module.get(), "indices"), + op::Sharding("{devices=[2]0,1}")); +} + +TEST_F(ShardingPropagationTest, GatherToDataOperand) { + const char* hlo_string = R"( +HloModule module + +ENTRY entry { + %p0 = f32[2,9] parameter(0) + %input = f32[2,9] copy(%p0) + %indices = s32[3] parameter(1), sharding={replicated} + ROOT %gather = f32[3,9] gather(%input, %indices), offset_dims={1}, + collapsed_slice_dims={0}, start_index_map={0}, index_vector_dim=1, + slice_sizes={1,9}, sharding={devices=[1,2]0,1} +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + TF_ASSERT_OK_AND_ASSIGN( + bool changed, ShardingPropagation(/*is_spmd=*/true).Run(module.get())); + EXPECT_TRUE(changed); + EXPECT_THAT(FindInstruction(module.get(), "input"), + op::Sharding("{devices=[1,2]0,1}")); +} + +TEST_F(ShardingPropagationTest, DataOperandToScatter) { + const char* const hlo_string = R"( +HloModule module + +add (lhs: f32[], rhs: f32[]) -> f32[] { + lhs = f32[] parameter(0) + rhs = f32[] parameter(1) + ROOT sum = f32[] add(lhs, rhs) +} + +ENTRY entry { + %input = f32[2,9] parameter(0), sharding={devices=[1,2]0,1} + %indices = s32[3] parameter(1), sharding={replicated} + %updates = f32[3,9] parameter(2), sharding={replicated} + %scatter = f32[2,9] scatter(%input, %indices, %updates), + to_apply=add, + update_window_dims={1}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0}, + index_vector_dim=1 + ROOT %copy = f32[2,9] copy(%scatter) +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + TF_ASSERT_OK_AND_ASSIGN( + bool changed, ShardingPropagation(/*is_spmd=*/true).Run(module.get())); + EXPECT_TRUE(changed); + EXPECT_THAT(FindInstruction(module.get(), "scatter"), + op::Sharding("{devices=[1,2]0,1}")); +} + +TEST_F(ShardingPropagationTest, UpdateOperandToScatter) { + const char* const hlo_string = R"( +HloModule module + +add (lhs: f32[], rhs: f32[]) -> f32[] { + lhs = f32[] parameter(0) + rhs = f32[] parameter(1) + ROOT sum = f32[] add(lhs, rhs) +} + +ENTRY entry { + %input = f32[2,9] parameter(0), sharding={replicated} + %indices = s32[3] parameter(1), sharding={replicated} + %updates = f32[3,9] parameter(2), sharding={devices=[1,2]0,1} + %scatter = f32[2,9] scatter(%input, %indices, %updates), + to_apply=add, + update_window_dims={1}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0}, + index_vector_dim=1 + ROOT %copy = f32[2,9] copy(%scatter) +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + TF_ASSERT_OK_AND_ASSIGN( + bool changed, ShardingPropagation(/*is_spmd=*/true).Run(module.get())); + EXPECT_TRUE(changed); + EXPECT_THAT(FindInstruction(module.get(), "scatter"), + op::Sharding("{devices=[1,2]0,1}")); +} + +TEST_F(ShardingPropagationTest, ScatterToDataOperand) { + const char* const hlo_string = R"( +HloModule module + +add (lhs: f32[], rhs: f32[]) -> f32[] { + lhs = f32[] parameter(0) + rhs = f32[] parameter(1) + ROOT sum = f32[] add(lhs, rhs) +} + +ENTRY entry { + %p0 = f32[2,9] parameter(0) + %input = f32[2,9] copy(%p0) + %indices = s32[3] parameter(1), sharding={replicated} + %updates = f32[3,9] parameter(2), sharding={replicated} + ROOT %scatter = f32[2,9] scatter(%input, %indices, %updates), + to_apply=add, + update_window_dims={1}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0}, + index_vector_dim=1, sharding={devices=[1,2]0,1} +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + TF_ASSERT_OK_AND_ASSIGN(bool changed, + ShardingPropagation().Run(module.get())); + EXPECT_TRUE(changed); + EXPECT_THAT(FindInstruction(module.get(), "input"), + op::Sharding("{devices=[1,2]0,1}")); +} + +TEST_F(ShardingPropagationTest, ScatterToUpdateOperand) { + const char* const hlo_string = R"( +HloModule module + +add (lhs: f32[], rhs: f32[]) -> f32[] { + lhs = f32[] parameter(0) + rhs = f32[] parameter(1) + ROOT sum = f32[] add(lhs, rhs) +} + +ENTRY entry { + %input = f32[2,9] parameter(0) + %indices = s32[3] parameter(1), sharding={replicated} + %p2 = f32[3,9] parameter(2) + %updates = f32[3,9] copy(%p2) + ROOT %scatter = f32[2,9] scatter(%input, %indices, %updates), + to_apply=add, + update_window_dims={1}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0}, + index_vector_dim=1, sharding={devices=[1,2]0,1} +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + TF_ASSERT_OK_AND_ASSIGN( + bool changed, ShardingPropagation(/*is_spmd=*/true).Run(module.get())); + EXPECT_TRUE(changed); + EXPECT_THAT(FindInstruction(module.get(), "updates"), + op::Sharding("{devices=[1,2]0,1}")); +} + +TEST_F(ShardingPropagationTest, ScatterUpdateToIndex) { + const char* const hlo_string = R"( +HloModule module + +add (lhs: f32[], rhs: f32[]) -> f32[] { + lhs = f32[] parameter(0) + rhs = f32[] parameter(1) + ROOT sum = f32[] add(lhs, rhs) +} + +ENTRY entry { + %input = f32[2,9] parameter(0), sharding={replicated} + %p1 = s32[3] parameter(1), sharding={replicated} + %indices = s32[3] copy(%p1) + %updates = f32[3,9] parameter(2), sharding={devices=[2,1]0,1} + ROOT %scatter = f32[2,9] scatter(%input, %indices, %updates), + to_apply=add, + update_window_dims={1}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0}, + index_vector_dim=1, sharding={replicated} +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + TF_ASSERT_OK_AND_ASSIGN(bool changed, + ShardingPropagation().Run(module.get())); + EXPECT_TRUE(changed); + EXPECT_THAT(FindInstruction(module.get(), "indices"), + op::Sharding("{devices=[2]0,1}")); +} + +TEST_F(ShardingPropagationTest, ScatterIndexToUpdate) { + const char* const hlo_string = R"( +HloModule module + +add (lhs: f32[], rhs: f32[]) -> f32[] { + lhs = f32[] parameter(0) + rhs = f32[] parameter(1) + ROOT sum = f32[] add(lhs, rhs) +} + +ENTRY entry { + %input = f32[2,9] parameter(0), sharding={replicated} + %indices = s32[3] parameter(1), sharding={devices=[2]0,1} + %p2 = f32[3,9] parameter(2), sharding={replicated} + %updates = f32[3,9] copy(%p2) + ROOT %scatter = f32[2,9] scatter(%input, %indices, %updates), + to_apply=add, + update_window_dims={1}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0}, + index_vector_dim=1, sharding={replicated} +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + TF_ASSERT_OK_AND_ASSIGN(bool changed, + ShardingPropagation().Run(module.get())); + EXPECT_TRUE(changed); + EXPECT_THAT(FindInstruction(module.get(), "updates"), + op::Sharding("{devices=[2,1]0,1}")); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc index 66534ed681f..2d76966a494 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc @@ -1069,47 +1069,7 @@ Status SpmdPartitioningVisitor::HandleConcatenate(HloInstruction* hlo) { return Status::OK(); } -// If partitioning in the operand only happens in dimensions in passthrough -// dimensions (offset dimensions in the gather output (or scatter update) that -// have the same size as the operand), returns the corresponding output (or -// update) sharding by passing through the input sharding. -absl::optional PassthroughOperandToGatherOutputOrScatterUpdate( - const PartitionedHlo& operand, const Shape& update_or_gather_shape, - absl::Span collapsed_or_inserted_dims, - absl::Span index_map, - absl::Span offset_or_window_dims, - absl::Span slice_size) { - if (operand.sharding().IsTileMaximal()) { - return operand.sharding(); - } - std::vector passthrough_tile(update_or_gather_shape.rank(), 1); - int64 collapsed = 0; - for (int64 i = 0; i < operand.base_shape().rank(); ++i) { - int64 dim_partitions = operand.sharding().tile_assignment().dim(i); - if (absl::c_linear_search(collapsed_or_inserted_dims, i) || - absl::c_linear_search(index_map, i)) { - if (dim_partitions > 1) { - return absl::nullopt; - } - collapsed++; - continue; - } - if (slice_size[i] != operand.base_shape().dimensions(i) && - dim_partitions > 1) { - return absl::nullopt; - } - int64 offset_dim = offset_or_window_dims[i - collapsed]; - if (i - collapsed > 0 && - offset_dim < offset_or_window_dims[i - collapsed - 1]) { - // Output offsets are transposed, we do not support this case. - return absl::nullopt; - } - passthrough_tile[offset_dim] = dim_partitions; - } - Array tile_assignment = operand.sharding().tile_assignment(); - tile_assignment.Reshape(passthrough_tile); - return HloSharding::Tile(tile_assignment); -} +namespace { // Returns whether partitioning in the operand only happens in dimensions with // gather/scatter slice size 1. @@ -1204,6 +1164,8 @@ IndexBoundsForGatherScatterOperandPartitionedOnTrivialSliceDims( return {broadcast_min, broadcast_max}; } +} // namespace + Status SpmdPartitioningVisitor::HandleScatter(HloInstruction* hlo) { auto scatter = Cast(hlo); auto dnums = scatter->scatter_dimension_numbers(); @@ -1219,16 +1181,12 @@ Status SpmdPartitioningVisitor::HandleScatter(HloInstruction* hlo) { slice_size[i] = updates.base_shape().dimensions( dnums.update_window_dims(num_update_window_dims++)); } - std::vector inserted_window_dims(dnums.inserted_window_dims().begin(), - dnums.inserted_window_dims().end()); std::vector scatter_dims_to_operand_dims( dnums.scatter_dims_to_operand_dims().begin(), dnums.scatter_dims_to_operand_dims().end()); - std::vector update_window_dims(dnums.update_window_dims().begin(), - dnums.update_window_dims().end()); std::vector update_scatter_dims; for (int64 i = 0; i < updates.base_shape().rank(); ++i) { - if (!absl::c_linear_search(update_window_dims, i)) { + if (!absl::c_linear_search(dnums.update_window_dims(), i)) { update_scatter_dims.push_back(i); } } @@ -1292,9 +1250,8 @@ Status SpmdPartitioningVisitor::HandleScatter(HloInstruction* hlo) { return Status::OK(); } } else { - auto maybe_passthrough = PassthroughOperandToGatherOutputOrScatterUpdate( - operand, updates.base_shape(), inserted_window_dims, - scatter_dims_to_operand_dims, update_window_dims, slice_size); + auto maybe_passthrough = hlo_sharding_util::ScatterUpdateShardingFromOutput( + operand.sharding(), *hlo); // Handle pass through cases if we can use compatible sharding for update. if (maybe_passthrough.has_value()) { indices = indices.Reshard(HloSharding::Replicate()); @@ -2148,15 +2105,11 @@ Status SpmdPartitioningVisitor::HandleGather(HloInstruction* hlo) { const auto& dnums = gather->gather_dimension_numbers(); auto operand = GetPartitionedHlo(gather->operand(0)); auto indices = GetPartitionedHlo(gather->operand(1)); - std::vector collapsed_slice_dims(dnums.collapsed_slice_dims().begin(), - dnums.collapsed_slice_dims().end()); std::vector start_index_map(dnums.start_index_map().begin(), dnums.start_index_map().end()); - std::vector offset_dims(dnums.offset_dims().begin(), - dnums.offset_dims().end()); std::vector batch_dims; for (int64 i = 0; i < gather->shape().rank(); ++i) { - if (!absl::c_linear_search(offset_dims, i)) { + if (!absl::c_linear_search(dnums.offset_dims(), i)) { batch_dims.push_back(i); } } @@ -2193,9 +2146,9 @@ Status SpmdPartitioningVisitor::HandleGather(HloInstruction* hlo) { return Status::OK(); } } else { - auto maybe_passthrough = PassthroughOperandToGatherOutputOrScatterUpdate( - operand, gather->shape(), collapsed_slice_dims, start_index_map, - offset_dims, gather->gather_slice_sizes()); + auto maybe_passthrough = + hlo_sharding_util::GatherOutputShardingFromDataOperand( + operand.sharding(), *hlo); if (maybe_passthrough.has_value()) { indices = indices.Reshard(HloSharding::Replicate()); auto pshape = MakePartitionedShape(gather->shape(), *maybe_passthrough); From df89afd14876a764cef50aa618cf6347e41c966a Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Fri, 31 Jul 2020 22:51:52 -0700 Subject: [PATCH 1933/2522] Fix conj doc --- tensorflow/python/ops/math_ops.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 6a3440cdae7..a01638d0602 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -3848,20 +3848,27 @@ def cumulative_logsumexp(x, axis=0, exclusive=False, reverse=False, name=None): def conj(x, name=None): r"""Returns the complex conjugate of a complex number. - Given a tensor `input` of complex numbers, this operation returns a tensor of - complex numbers that are the complex conjugate of each element in `input`. The - complex numbers in `input` must be of the form \\(a + bj\\), where *a* is the + Given a tensor `x` of complex numbers, this operation returns a tensor of + complex numbers that are the complex conjugate of each element in `x`. The + complex numbers in `x` must be of the form \\(a + bj\\), where *a* is the real part and *b* is the imaginary part. The complex conjugate returned by this operation is of the form \\(a - bj\\). For example: - # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] - tf.math.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j] + >>> x = tf.constant([-2.25 + 4.75j, 3.25 + 5.75j]) + >>> tf.math.conj(x) + If `x` is real, it is returned unchanged. + For example: + + >>> x = tf.constant([-2.25, 3.25]) + >>> tf.math.conj(x) + + Args: x: `Tensor` to conjugate. Must have numeric or variant type. name: A name for the operation (optional). @@ -3871,6 +3878,10 @@ def conj(x, name=None): Raises: TypeError: If `x` is not a numeric tensor. + + @compatibility(numpy) + Equivalent to numpy.conj. + @end_compatibility """ if isinstance(x, ops.Tensor): dt = x.dtype From d3323e54e2db9db14592d9a784d2ba6464871307 Mon Sep 17 00:00:00 2001 From: Smit Hinsu Date: Fri, 31 Jul 2020 23:10:33 -0700 Subject: [PATCH 1934/2522] Legalize TensorFlow NonMaxSuppressionV4 and SelfAdjointEigV2Op ops to HLO Added support for HLO ops bitcast-convert, sort and while in MlirHloBuilder and enabled tests for NonMaxSuppressionV4 and SelfAdjointEigV2Op using these ops. PiperOrigin-RevId: 324360651 Change-Id: I300b67cfea37a1a4362cd543e8ba7c82b00273a7 --- .../sink_constants_to_control_flow.cc | 7 ++- .../compiler/mlir/xla/ir/mlir_hlo_builder.cc | 34 +++++++++++ .../compiler/mlir/xla/ir/mlir_hlo_builder.h | 13 ++++ .../xla/tests/legalize-tf-with-tf2xla.mlir | 8 +++ .../xla/transforms/legalize_tf_with_tf2xla.cc | 2 + tensorflow/compiler/tests/BUILD | 1 + tensorflow/compiler/tests/image_ops_test.py | 16 ----- tensorflow/compiler/xla/client/xla_builder.cc | 60 ++++++++++++------- tensorflow/compiler/xla/client/xla_builder.h | 9 +++ 9 files changed, 111 insertions(+), 39 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/sink_constants_to_control_flow.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/sink_constants_to_control_flow.cc index 059fdc3edbe..14d89a7e196 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/sink_constants_to_control_flow.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/sink_constants_to_control_flow.cc @@ -30,13 +30,14 @@ namespace { // A pass that sinks constants implicitly captured in control flow regions. This // is necessary to export to XLA. +// // TODO(hinsu): Generalize this pass to handle all the ops with regions. Any // value used within the region that is defined outside of op's region should be // sank to the regions and not just the constants. Ops such as If and While // whose computations doesn't require fixed signature like Sort or Reduce have // an option to pass outside values as operands of the op to avoid recomputing // those within internally. Note that doing so is the only option in case of -// BlockArguments. +// values defined outside that are BlockArguments of any of the parent region. class SinkConstantsToControlFlowPass : public mlir::PassWrapper { void runOnFunction() override { @@ -60,7 +61,7 @@ class SinkConstantsToControlFlowPass visitUsedValuesDefinedAbove({*region}, [&](OpOperand* use) { Value constant = use->get(); auto op = constant.getDefiningOp(); - if (!op || !isa(op)) return; + if (!op || !op->hasTrait()) return; auto map_entry = sunk_constant.try_emplace(constant, nullptr); if (!map_entry.second) { // This constant has already been cloned into the region, reuse it. @@ -82,6 +83,8 @@ class SinkConstantsToControlFlowPass } // anonymous namespace +// TODO(hinsu): Rename this pass and move to a different file along with the +// generalization to make all ops isolated from above. std::unique_ptr> createSinkConstantsToControlFlowPass() { return std::make_unique(); } diff --git a/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.cc b/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.cc index 31512c90f09..c94110d9102 100644 --- a/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.cc +++ b/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.cc @@ -206,6 +206,15 @@ XlaOp MlirHloBuilder::Iota(const Shape& shape, int64 iota_dimension) { }); } +StatusOr MlirHloBuilder::BitcastConvertTypeInternal(const Shape& shape, + XlaOp operand) { + TF_ASSIGN_OR_RETURN(mlir::Type ty, ConvertShapeToType( + shape, builder_)); + auto op = builder_.create(loc_, ty, + GetValue(operand)); + return MakeXlaOp(op); +} + StatusOr MlirHloBuilder::TransposeInternal( const Shape& shape, XlaOp operand, absl::Span permutation) { TF_ASSIGN_OR_RETURN(mlir::Type ty, ConvertShapeToType( @@ -224,6 +233,31 @@ StatusOr MlirHloBuilder::RevInternal( return MakeXlaOp(op); } +StatusOr MlirHloBuilder::SortInternal(const Shape& shape, + absl::Span operands, + const XlaComputation& comparator, + int64 dimension, bool is_stable) { + TF_ASSIGN_OR_RETURN(mlir::Type ty, ConvertShapeToType( + shape, builder_)); + auto op = builder_.create( + loc_, ty, GetValues(operands), builder_.getI64IntegerAttr(dimension), + builder_.getBoolAttr(is_stable)); + TF_RETURN_IF_ERROR(ImportComputation(comparator.proto(), &op.comparator())); + return MakeXlaOp(op); +} + +StatusOr MlirHloBuilder::WhileInternal(const Shape& shape, + const XlaComputation& condition, + const XlaComputation& body, + XlaOp init) { + TF_ASSIGN_OR_RETURN(mlir::Type ty, ConvertShapeToType( + shape, builder_)); + auto op = builder_.create(loc_, ty, GetValue(init)); + TF_RETURN_IF_ERROR(ImportComputation(condition.proto(), &op.cond())); + TF_RETURN_IF_ERROR(ImportComputation(body.proto(), &op.body())); + return MakeXlaOp(op); +} + StatusOr MlirHloBuilder::GatherInternal( const Shape& shape, XlaOp input, XlaOp start_indices, const GatherDimensionNumbers& dimension_numbers, diff --git a/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.h b/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.h index ab1a0d2c9b3..a12eb723465 100644 --- a/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.h +++ b/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.h @@ -142,6 +142,9 @@ class MlirHloBuilder : public XlaBuilder { XlaOp Iota(const Shape& shape, int64 iota_dimension) override; + StatusOr BitcastConvertTypeInternal(const Shape& shape, + XlaOp operand) override; + StatusOr TransposeInternal( const Shape& shape, XlaOp operand, absl::Span permutation) override; @@ -149,6 +152,16 @@ class MlirHloBuilder : public XlaBuilder { StatusOr RevInternal(const Shape& shape, XlaOp operand, absl::Span dimensions) override; + StatusOr SortInternal(const Shape& shape, + absl::Span operands, + const XlaComputation& comparator, + int64 dimension, bool is_stable) override; + + StatusOr WhileInternal(const Shape& shape, + const XlaComputation& condition, + const XlaComputation& body, + XlaOp init) override; + StatusOr GatherInternal( const Shape& shape, XlaOp input, XlaOp start_indices, const GatherDimensionNumbers& dimension_numbers, diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf-with-tf2xla.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf-with-tf2xla.mlir index 5a1edc0d933..cd351447303 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf-with-tf2xla.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf-with-tf2xla.mlir @@ -257,6 +257,14 @@ func @arg_min(%arg0: tensor<6xf64>) -> tensor { return %1 : tensor } +// CHECK-LABEL: non_max_suppression_v4 +func @non_max_suppression_v4(%arg0: tensor<3x4xf32>, %arg1: tensor<3xf32>, %arg2: tensor, %arg3: tensor) -> tensor<2xi32> { + %max_size = mhlo.constant dense<2> : tensor + // CHECK-NOT: tf.NonMaxSuppressionV4 + %0:2 = "tf.NonMaxSuppressionV4"(%arg0, %arg1, %max_size, %arg2, %arg3) {pad_to_max_output_size = true}: (tensor<3x4xf32>, tensor<3xf32>, tensor, tensor, tensor) -> (tensor<2xi32>, tensor) + return %0#0 : tensor<2xi32> +} + // TODO(hinsu): Add a test with a valid TF op for which tf2xla kernel is // available but doesn't support this instance. } diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc index 1743ae7be17..bb50fc198c8 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc @@ -159,6 +159,7 @@ static bool IsOpAllowlisted(Operation* op) { TypeID::get(), TypeID::get(), TypeID::get(), + TypeID::get(), TypeID::get(), TypeID::get(), TypeID::get(), @@ -178,6 +179,7 @@ static bool IsOpAllowlisted(Operation* op) { TypeID::get(), TypeID::get(), TypeID::get(), + TypeID::get(), TypeID::get(), TypeID::get(), TypeID::get(), diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index c2b5000647d..a3134fc1c94 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -324,6 +324,7 @@ tf_xla_py_test( name = "self_adjoint_eig_op_test", size = "medium", srcs = ["self_adjoint_eig_op_test.py"], + enable_mlir_bridge = True, python_version = "PY3", tags = [ "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip diff --git a/tensorflow/compiler/tests/image_ops_test.py b/tensorflow/compiler/tests/image_ops_test.py index 326c3ec4929..9590688fda7 100644 --- a/tensorflow/compiler/tests/image_ops_test.py +++ b/tensorflow/compiler/tests/image_ops_test.py @@ -30,7 +30,6 @@ from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.compiler.tests import xla_test from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_image_ops from tensorflow.python.ops import image_ops @@ -775,7 +774,6 @@ class ResizeBilinearNonAlignCornersTest(xla_test.XLATestCase): class NonMaxSuppressionTest(xla_test.XLATestCase): - @test_util.disable_mlir_bridge("%1") def testNMS128From1024(self): num_boxes = 1024 boxes_np = np.random.normal(50, 10, (num_boxes, 4)).astype("f4") @@ -810,7 +808,6 @@ class NonMaxSuppressionTest(xla_test.XLATestCase): self.assertEqual(indices_tf.size, max_output_size) - @test_util.disable_mlir_bridge("%1") def testNMS3From6Boxes(self): # Three boxes are selected based on IOU. boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], @@ -852,7 +849,6 @@ class NonMaxSuppressionTest(xla_test.XLATestCase): self.assertEqual(num_valid, 3) self.assertAllClose(indices_tf[:num_valid], [3, 0, 5]) - @test_util.disable_mlir_bridge("%1") def testNMS3Then2WithScoreThresh(self): # Three boxes are selected based on IOU. # One is filtered out by score threshold. @@ -895,7 +891,6 @@ class NonMaxSuppressionTest(xla_test.XLATestCase): self.assertEqual(num_valid, 2) self.assertAllClose(indices_tf[:num_valid], [3, 0]) - @test_util.disable_mlir_bridge("%1") def testNMS3Then1WithScoreMaxThresh(self): # Three boxes are selected based on IOU. # One is filtered out by score threshold. @@ -939,7 +934,6 @@ class NonMaxSuppressionTest(xla_test.XLATestCase): self.assertEqual(num_valid, 1) self.assertAllClose(indices_tf[:num_valid], [3]) - @test_util.disable_mlir_bridge("%1") def testSelectFromContinuousOverLap(self): # Tests that a suppressed box does not itself suppress other boxes. @@ -984,7 +978,6 @@ class NonMaxSuppressionTest(xla_test.XLATestCase): class BatchedNonMaxSuppressionCorrectnessTest(xla_test.XLATestCase): - @test_util.disable_mlir_bridge("%1") def testBatchedNMSFrom6(self): boxes_data = [[[0, 0, 1, 1], [3, 3, 4, 4], [0, 0.4, 1, 1.4], [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8], [0, 2, 1, 2]], @@ -1022,7 +1015,6 @@ class BatchedNonMaxSuppressionCorrectnessTest(xla_test.XLATestCase): indices_output) self.assertAllEqual([5, 4], num_valid_output) - @test_util.disable_mlir_bridge("%1") def testBatchedNMSFrom6Max3(self): boxes_data = [[[0, 0, 1, 1], [3, 3, 4, 4], [0, 0.4, 1, 1.4], [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8], [0, 2, 1, 2]], @@ -1056,7 +1048,6 @@ class BatchedNonMaxSuppressionCorrectnessTest(xla_test.XLATestCase): self.assertAllEqual([[0, 1, 2], [0, 1, 3]], indices_output) self.assertAllEqual([3, 3], num_valid_output) - @test_util.disable_mlir_bridge("%1") def testBatchedNMSSingleFrom6Max3(self): boxes_data = [[0, 0, 1, 1], [3, 3, 4, 4], [0, 0.4, 1, 1.4], [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8], [0, 2, 1, 2]] @@ -1087,7 +1078,6 @@ class BatchedNonMaxSuppressionCorrectnessTest(xla_test.XLATestCase): self.assertAllEqual([0, 1, 2], indices_output) self.assertAllEqual(3, num_valid_output) - @test_util.disable_mlir_bridge("%1") def testBatchedNMSSingleFrom6NoPad(self): boxes_data = [[0, 0, 1, 1], [3, 3, 4, 4], [0, 0.4, 1, 1.4], [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8], [0, 2, 1, 2]] @@ -1117,7 +1107,6 @@ class BatchedNonMaxSuppressionCorrectnessTest(xla_test.XLATestCase): self.assertAllEqual([0, 1, 2, 4, 5], indices_output) self.assertAllEqual(5, num_valid_output) - @test_util.disable_mlir_bridge("%1") def testBatchedNMSBatchDimsFrom6Max3(self): boxes_data = [[[[0, 0, 1, 1], [3, 3, 4, 4], [0, 0.4, 1, 1.4], [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8], [0, 2, 1, 2]], @@ -1151,7 +1140,6 @@ class BatchedNonMaxSuppressionCorrectnessTest(xla_test.XLATestCase): self.assertAllEqual([[[0, 1, 2], [0, 1, 3]]], indices_output) self.assertAllEqual([[3, 3]], num_valid_output) - @test_util.disable_mlir_bridge("%1") def testBatchedNMSScoreThresholdFrom6Max3(self): boxes_data = [[[0, 0, 1, 1], [3, 3, 4, 4], [0, 0.4, 1, 1.4], [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8], [0, 2, 1, 2]], @@ -1187,7 +1175,6 @@ class BatchedNonMaxSuppressionCorrectnessTest(xla_test.XLATestCase): self.assertAllEqual([3, 2], num_valid_output) self.assertAllEqual([[0, 1, 2], [0, 1, invalid_index]], indices_output) - @test_util.disable_mlir_bridge("%1") def testBatchedNMSUnsortedInputFrom6(self): boxes_data = [[[0, 2, 1, 2], [3, 3, 4, 4], [0, 0, 1, 1], [0, 0.4, 1, 1.4], [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8]], @@ -1224,7 +1211,6 @@ class BatchedNonMaxSuppressionCorrectnessTest(xla_test.XLATestCase): indices_output) self.assertAllEqual([5, 4], num_valid_output) - @test_util.disable_mlir_bridge("%1") def testBatchedNMSNoncanonicalizedInputFrom6(self): boxes_data = [[[1, 0, 0, 1], [4, 3, 3, 4], [1, 0.4, 0, 1.4], [1, 0.6, 0, 1.6], [1, 0.8, 0, 1.8], [1, 2, 0, 2]], @@ -1262,7 +1248,6 @@ class BatchedNonMaxSuppressionCorrectnessTest(xla_test.XLATestCase): indices_output) self.assertAllEqual([5, 4], num_valid_output) - @test_util.disable_mlir_bridge("%1") def testBatchedNMSScoreThresholdCanInputsFrom6Max3(self): boxes_data = [[[0, 0, 1, 1], [3, 3, 4, 4], [0, 0.4, 1, 1.4], [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8], [0, 2, 1, 2]], @@ -1298,7 +1283,6 @@ class BatchedNonMaxSuppressionCorrectnessTest(xla_test.XLATestCase): self.assertAllEqual([3, 2], num_valid_output) self.assertAllEqual([[0, 1, 2], [0, 1, invalid_index]], indices_output) - @test_util.disable_mlir_bridge("%1") def testBatchedNMSFrom6DynamicInput(self): boxes_data = [[[0, 0, 1, 1], [3, 3, 4, 4], [0, 0.4, 1, 1.4], [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8], [0, 2, 1, 2]], diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index 0fc299f031f..52f61408cbb 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -1728,8 +1728,6 @@ XlaOp XlaBuilder::Sort(absl::Span operands, const XlaComputation& comparator, int64 dimension, bool is_stable) { return ReportErrorOrReturn([&]() -> StatusOr { - HloInstructionProto instr; - instr.set_is_stable(is_stable); std::vector operand_shape_ptrs; TF_ASSIGN_OR_RETURN(std::vector operand_shapes, GetOperandShapes(operands)); @@ -1737,17 +1735,26 @@ XlaOp XlaBuilder::Sort(absl::Span operands, [](const Shape& shape) { return &shape; }); TF_ASSIGN_OR_RETURN(Shape shape, ShapeInference::InferVariadicOpShape( HloOpcode::kSort, operand_shape_ptrs)); - *instr.mutable_shape() = shape.ToProto(); - if (dimension == -1) { - TF_ASSIGN_OR_RETURN(const Shape* keys_shape, GetShapePtr(operands[0])); - dimension = keys_shape->rank() - 1; - } - instr.add_dimensions(dimension); - AddCalledComputation(comparator, &instr); - return AddInstruction(std::move(instr), HloOpcode::kSort, operands); + return SortInternal(shape, operands, comparator, dimension, is_stable); }); } +StatusOr XlaBuilder::SortInternal(const Shape& shape, + absl::Span operands, + const XlaComputation& comparator, + int64 dimension, bool is_stable) { + HloInstructionProto instr; + *instr.mutable_shape() = shape.ToProto(); + instr.set_is_stable(is_stable); + if (dimension == -1) { + TF_ASSIGN_OR_RETURN(const Shape* keys_shape, GetShapePtr(operands[0])); + dimension = keys_shape->rank() - 1; + } + instr.add_dimensions(dimension); + AddCalledComputation(comparator, &instr); + return AddInstruction(std::move(instr), HloOpcode::kSort, operands); +} + XlaOp XlaBuilder::ConvertElementType(XlaOp operand, PrimitiveType new_element_type) { return ReportErrorOrReturn([&]() -> StatusOr { @@ -1761,16 +1768,21 @@ XlaOp XlaBuilder::ConvertElementType(XlaOp operand, XlaOp XlaBuilder::BitcastConvertType(XlaOp operand, PrimitiveType new_element_type) { return ReportErrorOrReturn([&]() -> StatusOr { - HloInstructionProto instr; TF_ASSIGN_OR_RETURN(const Shape* operand_shape, GetShapePtr(operand)); TF_ASSIGN_OR_RETURN(Shape shape, ShapeInference::InferConvertShape( *operand_shape, new_element_type)); - *instr.mutable_shape() = shape.ToProto(); - return AddInstruction(std::move(instr), HloOpcode::kBitcastConvert, - {operand}); + return BitcastConvertTypeInternal(shape, operand); }); } +StatusOr XlaBuilder::BitcastConvertTypeInternal(const Shape& shape, + XlaOp operand) { + HloInstructionProto instr; + *instr.mutable_shape() = shape.ToProto(); + return AddInstruction(std::move(instr), HloOpcode::kBitcastConvert, + {operand}); +} + XlaOp XlaBuilder::Clamp(XlaOp min, XlaOp operand, XlaOp max) { return TernaryOp(HloOpcode::kClamp, min, operand, max); } @@ -1892,8 +1904,6 @@ XlaOp XlaBuilder::RngBitGenerator(RandomAlgorithm algorithm, XlaOp XlaBuilder::While(const XlaComputation& condition, const XlaComputation& body, XlaOp init) { return ReportErrorOrReturn([&]() -> StatusOr { - HloInstructionProto instr; - // Infer shape. TF_ASSIGN_OR_RETURN(const auto& body_program_shape, body.GetProgramShape()); TF_ASSIGN_OR_RETURN(const auto& condition_program_shape, @@ -1902,14 +1912,22 @@ XlaOp XlaBuilder::While(const XlaComputation& condition, TF_ASSIGN_OR_RETURN(Shape shape, ShapeInference::InferWhileShape( condition_program_shape, body_program_shape, *init_shape)); - *instr.mutable_shape() = shape.ToProto(); - // Body comes before condition computation in the vector. - AddCalledComputation(body, &instr); - AddCalledComputation(condition, &instr); - return AddInstruction(std::move(instr), HloOpcode::kWhile, {init}); + return WhileInternal(shape, condition, body, init); }); } +StatusOr XlaBuilder::WhileInternal(const Shape& shape, + const XlaComputation& condition, + const XlaComputation& body, + XlaOp init) { + HloInstructionProto instr; + *instr.mutable_shape() = shape.ToProto(); + // Body comes before condition computation in the vector. + AddCalledComputation(body, &instr); + AddCalledComputation(condition, &instr); + return AddInstruction(std::move(instr), HloOpcode::kWhile, {init}); +} + XlaOp XlaBuilder::Gather(XlaOp input, XlaOp start_indices, const GatherDimensionNumbers& dimension_numbers, absl::Span slice_sizes, diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h index 7b96c6dfed6..1960d0c4632 100644 --- a/tensorflow/compiler/xla/client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_builder.h @@ -639,6 +639,8 @@ class XlaBuilder { XlaOp ConvertElementType(XlaOp operand, PrimitiveType new_element_type); XlaOp BitcastConvertType(XlaOp operand, PrimitiveType new_element_type); + virtual StatusOr BitcastConvertTypeInternal(const Shape& shape, + XlaOp operand); XlaOp Transpose(XlaOp operand, absl::Span permutation); virtual StatusOr TransposeInternal( @@ -650,6 +652,10 @@ class XlaBuilder { XlaOp Sort(absl::Span operands, const XlaComputation& comparator, int64 dimension = -1, bool is_stable = false); + virtual StatusOr SortInternal(const Shape& shape, + absl::Span operands, + const XlaComputation& comparator, + int64 dimension, bool is_stable); XlaOp Clamp(XlaOp min, XlaOp operand, XlaOp max); @@ -666,6 +672,9 @@ class XlaBuilder { XlaOp While(const XlaComputation& condition, const XlaComputation& body, XlaOp init); + virtual StatusOr WhileInternal(const Shape& shape, + const XlaComputation& condition, + const XlaComputation& body, XlaOp init); XlaOp Conditional(XlaOp predicate, XlaOp true_operand, const XlaComputation& true_computation, XlaOp false_operand, From 270f4ce57cf758744b07a43761ac72d64f673b3a Mon Sep 17 00:00:00 2001 From: Jose Baiocchi Date: Fri, 31 Jul 2020 23:34:41 -0700 Subject: [PATCH 1935/2522] Cleanup TraceMe idioms PiperOrigin-RevId: 324362137 Change-Id: Iaee461adaa5b3372531b73439323505cfb765caf --- .../xla/python/tpu_driver/grpc_tpu_driver.cc | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tensorflow/compiler/xla/python/tpu_driver/grpc_tpu_driver.cc b/tensorflow/compiler/xla/python/tpu_driver/grpc_tpu_driver.cc index 7632f21d5b2..c6aff604aee 100644 --- a/tensorflow/compiler/xla/python/tpu_driver/grpc_tpu_driver.cc +++ b/tensorflow/compiler/xla/python/tpu_driver/grpc_tpu_driver.cc @@ -657,7 +657,7 @@ void GrpcTpuStream::StreamWriterFn() { request_lock_.Unlock(); for (const auto& r : reqs) { - TraceMe activity(absl::StrCat("GrpcTpuStream::Send ")); + TraceMe activity("GrpcTpuStream::Send "); ::grpc::WriteOptions opts; opts.set_no_compression().clear_buffer_hint(); stream_->Write(r, opts); @@ -721,7 +721,7 @@ std::unique_ptr GrpcTpuStream::Allocate( absl::Span wait_for) { auto req = absl::make_unique(); InitializeRequest(req.get(), wait_for); - TraceMe activity(absl::StrCat("GrpcTpuStream::Allocate(num_bytes)")); + TraceMe activity("GrpcTpuStream::Allocate(num_bytes)"); req->mutable_alloc()->set_core_id(core_id); req->mutable_alloc()->set_region(region); req->mutable_alloc()->set_num_bytes(num_bytes); @@ -737,7 +737,7 @@ std::unique_ptr GrpcTpuStream::Allocate( absl::Span wait_for) { auto req = absl::make_unique(); InitializeRequest(req.get(), wait_for); - TraceMe activity(absl::StrCat("GrpcTpuStream::Allocate(shape)")); + TraceMe activity("GrpcTpuStream::Allocate(shape)"); req->mutable_alloc()->set_core_id(core_id); req->mutable_alloc()->set_region(region); *req->mutable_alloc()->mutable_shape() = shape; @@ -754,7 +754,7 @@ std::unique_ptr GrpcTpuStream::AllocateTuple( absl::Span wait_for) { auto req = absl::make_unique(); InitializeRequest(req.get(), wait_for); - TraceMe activity(absl::StrCat("GrpcTpuStream::AllocateTuple")); + TraceMe activity("GrpcTpuStream::AllocateTuple"); req->mutable_alloc_tuple()->set_core_id(core_id); req->mutable_alloc_tuple()->set_region(region); for (auto child : children) { @@ -771,7 +771,7 @@ std::shared_ptr GrpcTpuStream::Deallocate( std::unique_ptr handle, absl::Span wait_for) { auto req = absl::make_unique(); InitializeRequest(req.get(), wait_for); - TraceMe activity(absl::StrCat("GrpcTpuStream::Deallocate")); + TraceMe activity("GrpcTpuStream::Deallocate"); auto grpc_handle = static_cast(handle.get()); req->mutable_dealloc()->set_handle(grpc_handle->id().AsInt()); auto event = @@ -784,7 +784,7 @@ std::shared_ptr GrpcTpuStream::TransferToDevice( const void* src, BufferHandle* dst, absl::Span wait_for) { auto req = absl::make_unique(); InitializeRequest(req.get(), wait_for); - TraceMe activity(absl::StrCat("GrpcTpuStream::TransferToDevice")); + TraceMe activity("GrpcTpuStream::TransferToDevice"); req->mutable_transfer_to()->mutable_data()->assign( static_cast(src), dst->size_in_bytes()); req->mutable_transfer_to()->set_target_handle( @@ -799,7 +799,7 @@ std::shared_ptr GrpcTpuStream::TransferFromDevice( const BufferHandle* src, void* dst, absl::Span wait_for) { auto req = absl::make_unique(); InitializeRequest(req.get(), wait_for); - TraceMe activity(absl::StrCat("GrpcTpuStream::TransferFromDevice")); + TraceMe activity("GrpcTpuStream::TransferFromDevice"); req->mutable_transfer_from()->set_source_handle( static_cast(src)->id().AsInt()); EventId event_id = EventId::FromInt(req->operation_id()); @@ -818,8 +818,10 @@ std::shared_ptr GrpcTpuStream::TransferFromDeviceToDevice( absl::Span wait_for) { auto req = absl::make_unique(); InitializeRequest(req.get(), wait_for); - TraceMe activity(absl::StrCat("GrpcTpuStream::TransferFromDeviceToDevice", - req->operation_id())); + TraceMe activity([&req] { + return absl::StrCat("GrpcTpuStream::TransferFromDeviceToDevice", + req->operation_id()); + }); req->mutable_transfer_from_to()->set_source_handle( static_cast(src)->id().AsInt()); @@ -836,7 +838,7 @@ std::unique_ptr GrpcTpuStream::CompileProgram( absl::Span wait_for) { auto req = absl::make_unique(); InitializeRequest(req.get(), wait_for); - TraceMe activity(absl::StrCat("GrpcTpuStream::CompileProgram")); + TraceMe activity("GrpcTpuStream::CompileProgram"); *req->mutable_compile()->mutable_hlo_program() = source; req->mutable_compile()->set_num_replicas(num_replicas); EventId event_id = EventId::FromInt(req->operation_id()); @@ -861,7 +863,7 @@ std::unique_ptr GrpcTpuStream::LoadProgram( absl::Span wait_for) { auto req = absl::make_unique(); InitializeRequest(req.get(), wait_for); - TraceMe activity(absl::StrCat("GrpcTpuStream::LoadProgram")); + TraceMe activity("GrpcTpuStream::LoadProgram"); req->mutable_load()->set_core_id(core_id); auto grpc_handle = static_cast(handle); if (grpc_handle->id().client_id != driver_->client_id()) { @@ -884,7 +886,7 @@ std::shared_ptr GrpcTpuStream::UnloadProgram( absl::Span wait_for) { auto req = absl::make_unique(); InitializeRequest(req.get(), wait_for); - TraceMe activity(absl::StrCat("GrpcTpuStream::UnloadProgram")); + TraceMe activity("GrpcTpuStream::UnloadProgram"); req->mutable_unload()->set_loaded_program_handle( static_cast(handle.get())->id().AsInt()); auto event = From 6043a03c1d4225a3d903c87fdb9c0bb2fbf7e830 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 1 Aug 2020 02:01:51 -0700 Subject: [PATCH 1936/2522] compat: Update forward compatibility horizon to 2020-08-01 PiperOrigin-RevId: 324373053 Change-Id: I5380d867404da5f020cdbbf11f0380d70f442a8c --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 7131601a40c..8f29de673fe 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 31) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 1) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 8f000581820530473f99715f6d98b31dede6cf03 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 1 Aug 2020 02:01:52 -0700 Subject: [PATCH 1937/2522] Update GraphDef version to 480. PiperOrigin-RevId: 324373055 Change-Id: I79bfa00be3e56c7d18763d581e505b0706b868df --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 065cb501e1b..f935f47d210 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 479 // Updated: 2020/7/31 +#define TF_GRAPH_DEF_VERSION 480 // Updated: 2020/8/1 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 44597e39fb9e6eeb614d2119d516c0d4ede084cc Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Sat, 1 Aug 2020 02:40:22 -0700 Subject: [PATCH 1938/2522] Enforce xla in xla_control_flow_ops_test. I actually don't understand how one can avoid using xla in an xla test. But the test seems to pass now. PiperOrigin-RevId: 324379840 Change-Id: I62c8638183148a95c9c5ab930fa64cfb4c66926c --- .../python/ops/parallel_for/xla_control_flow_ops_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/python/ops/parallel_for/xla_control_flow_ops_test.py b/tensorflow/python/ops/parallel_for/xla_control_flow_ops_test.py index b1762e2f55f..33f0d7b76ae 100644 --- a/tensorflow/python/ops/parallel_for/xla_control_flow_ops_test.py +++ b/tensorflow/python/ops/parallel_for/xla_control_flow_ops_test.py @@ -233,8 +233,7 @@ class WhileV2Test(PForTestCase): body, [True, 0, 0.]) - # b/155430349: Enabling forrce_xla=True triggers a CHECK in debug mode. - self._test_loop_fn(loop_fn, 3, force_xla=False) + self._test_loop_fn(loop_fn, 3, force_xla=True) if __name__ == "__main__": From af197c251c1c4251dee82f8dd9a2f10fc7b28b24 Mon Sep 17 00:00:00 2001 From: Tiezhen WANG Date: Sat, 1 Aug 2020 07:04:25 -0700 Subject: [PATCH 1939/2522] TFL java/BUILD: remove no_mac tag from a few targets. The bug has been resolved. PiperOrigin-RevId: 324395469 Change-Id: I776c5ca54864dc94a9669bdf110408a23883ef82 --- tensorflow/lite/java/BUILD | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/tensorflow/lite/java/BUILD b/tensorflow/lite/java/BUILD index 9bceb939c02..1b89451a888 100644 --- a/tensorflow/lite/java/BUILD +++ b/tensorflow/lite/java/BUILD @@ -152,7 +152,6 @@ java_test( srcs = ["src/test/java/org/tensorflow/lite/TensorFlowLiteTest.java"], javacopts = JAVACOPTS, tags = [ - "no_mac", # TODO(b/122888913): libtensorflowlite_test_jni broke on mac. "v1only", ], test_class = "org.tensorflow.lite.TensorFlowLiteTest", @@ -198,9 +197,6 @@ java_test( size = "small", srcs = ["src/test/java/org/tensorflow/lite/DataTypeTest.java"], javacopts = JAVACOPTS, - tags = [ - "no_mac", # TODO(b/122888913): libtensorflowlite_test_jni broke on mac. - ], test_class = "org.tensorflow.lite.DataTypeTest", visibility = ["//visibility:private"], deps = [ @@ -227,9 +223,6 @@ java_test( "src/testdata/with_custom_op.lite", ], javacopts = JAVACOPTS, - tags = [ - "no_mac", # TODO(b/122888913): libtensorflowlite_test_jni broke on mac. - ], test_class = "org.tensorflow.lite.NativeInterpreterWrapperTest", visibility = ["//visibility:private"], deps = [ @@ -256,9 +249,6 @@ java_test( "//tensorflow/lite:testdata/multi_add_flex.bin", ], javacopts = JAVACOPTS, - tags = [ - "no_mac", # TODO(b/122888913): libtensorflowlite_test_jni broke on mac. - ], test_class = "org.tensorflow.lite.InterpreterTest", visibility = ["//visibility:private"], deps = [ @@ -331,9 +321,6 @@ java_test( "src/testdata/string.bin", ], javacopts = JAVACOPTS, - tags = [ - "no_mac", # TODO(b/122888913): libtensorflowlite_test_jni broke on mac. - ], test_class = "org.tensorflow.lite.TensorTest", visibility = ["//visibility:private"], deps = [ @@ -358,7 +345,6 @@ java_test( # Add customized libtensorflowlite_jni.so to java_path jvm_flags = ["-Djava.library.path=third_party/tensorflow/lite/testing"], tags = [ - "no_mac", # TODO(b/122888913): libtensorflowlite_test_jni broke on mac. "v1only", ], test_class = "org.tensorflow.lite.InterpreterCustomizedAndroidBuildTest", @@ -415,9 +401,6 @@ filegroup( srcs = select({ "//conditions:default": [":libtensorflowlite_jni.so"], }), - tags = [ - "no_mac", # TODO(b/122888913): libtensorflowlite_test_jni broke on mac. - ], ) cc_library( From f343727e6c347872fd955adab8e0340e59352447 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Sat, 1 Aug 2020 11:24:16 -0700 Subject: [PATCH 1940/2522] Verify TransposeOp Revert clang-format --- .../compiler/mlir/tensorflow/ir/tf_ops_n_z.cc | 62 +++++++++++++++++-- .../mlir/tensorflow/tests/tf-ops.mlir | 54 ++++++++++++++++ 2 files changed, 111 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc index 6662b0fed8f..0b9b757da55 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc @@ -1759,11 +1759,63 @@ void ToBoolOp::getCanonicalizationPatterns(OwningRewritePatternList &results, //===----------------------------------------------------------------------===// static LogicalResult Verify(TransposeOp op) { - // TODO(hinsu): Verify using a custom verifier that, - // * Transpose permutation is 1-D of size equal to the rank of the first - // input, if the shapes are partially known. Requires use of a more - // restrictive type than TF_Tensor. - // * Result shape dimensions are possible based on the input shape. + auto perm_type = op.perm().getType().dyn_cast(); + if (!perm_type) { + return success(); + } + + if (perm_type.getRank() != 1) { + return op.emitOpError() + << "expected perm to be a 1-D Tensor, got perm of rank " + << perm_type.getRank(); + } + + if (!perm_type.hasStaticShape()) { + return success(); + } + + auto x_type = op.x().getType().dyn_cast(); + if (!x_type) { + return success(); + } + + const int64_t x_rank = x_type.getRank(); + if (x_rank != perm_type.getNumElements()) { + return op.emitOpError() + << "expected perm to be a 1-D Tensor of size " + << "equal to the rank of x, got perm of size " + << perm_type.getNumElements() << ", and x of rank " << x_rank; + } + + auto y_type = op.y().getType().dyn_cast(); + if (!y_type) { + return success(); + } + + const int64_t y_rank = y_type.getRank(); + if (x_rank != y_rank) { + return op.emitOpError() + << "x should be of the same rank with y, got " + << "x of rank " << x_rank << ", and y of rank " << y_rank; + } + + DenseIntElementsAttr attr_perm; + if (matchPattern(op.perm(), m_Constant(&attr_perm))) { + // y.shape[i] should be equal to x.shape[perm[i]] + // for i = [0, 1, ..., rank(x) - 1] + for (auto e : llvm::enumerate(attr_perm)) { + const int64_t y_idx = e.index(); + const int64_t y_dim = y_type.getDimSize(y_idx); + const int64_t x_idx = e.value().getSExtValue(); + const int64_t x_dim = x_type.getDimSize(x_idx); + if (y_dim != x_dim) { + return op.emitOpError() + << "y.shape[" << y_idx << "] = " << y_dim + << " != x.shape[perm[" << x_idx << "]] = " << x_dim; + } + } + } + return success(); } diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir index ec28b32b485..4fd691cc104 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir @@ -2044,6 +2044,60 @@ func @testTranspose(tensor<2x3xf32>) -> tensor<3x2xf32> { // ----- +// Test tf.Transpose with partial unknown shape +// CHECK-LABEL: testTranspose +func @testTranspose(tensor<2x?xf32>) -> tensor { +^bb0(%arg0: tensor<2x?xf32>): + %cst = constant dense<[1, 0]> : tensor<2xi32> + %0 = "tf.Transpose"(%arg0, %cst) {T = "tfdtype$DT_FLOAT", Tperm = "tfdtype$DT_INT32"} : (tensor<2x?xf32>, tensor<2xi32>) -> tensor + return %0 : tensor +} + +// ----- + +// Test tf.Transpose with invalid rank of perm +func @testTranspose(tensor<2x3xf32>, tensor<1x2xi32>) -> tensor<3x2xf32> { +^bb0(%arg0: tensor<2x3xf32>, %arg1: tensor<1x2xi32>): + // expected-error @+1 {{expected perm to be a 1-D Tensor, got perm of rank 2}} + %0 = "tf.Transpose"(%arg0, %arg1) {T = "tfdtype$DT_FLOAT", Tperm = "tfdtype$DT_INT32"} : (tensor<2x3xf32>, tensor<1x2xi32>) -> tensor<3x2xf32> + return %0 : tensor<3x2xf32> +} + +// ----- + +// Test tf.Transpose with invalid size of perm +func @testTranspose(tensor<2x3xf32>) -> tensor<3x2xf32> { +^bb0(%arg0: tensor<2x3xf32>): + %cst = constant dense<[1, 0, 2]> : tensor<3xi32> + // expected-error @+1 {{expected perm to be a 1-D Tensor of size equal to the rank of x, got perm of size 3, and x of rank 2}} + %0 = "tf.Transpose"(%arg0, %cst) {T = "tfdtype$DT_FLOAT", Tperm = "tfdtype$DT_INT32"} : (tensor<2x3xf32>, tensor<3xi32>) -> tensor<3x2xf32> + return %0 : tensor<3x2xf32> +} + +// ----- + +// Test tf.Transpose with invalid rank of y +func @testTranspose(tensor<2x3xf32>) -> tensor<3x2x1xf32> { +^bb0(%arg0: tensor<2x3xf32>): + %cst = constant dense<[1, 0]> : tensor<2xi32> + // expected-error @+1 {{x should be of the same rank with y, got x of rank 2, and y of rank 3}} + %0 = "tf.Transpose"(%arg0, %cst) {T = "tfdtype$DT_FLOAT", Tperm = "tfdtype$DT_INT32"} : (tensor<2x3xf32>, tensor<2xi32>) -> tensor<3x2x1xf32> + return %0 : tensor<3x2x1xf32> +} + +// ----- + +// Test tf.Transpose with invalid shape of y +func @testTranspose(tensor<2x3x4xf32>) -> tensor<3x2x4xf32> { +^bb0(%arg0: tensor<2x3x4xf32>): + %cst = constant dense<[2, 0, 1]> : tensor<3xi32> + // expected-error @+1 {{y.shape[0] = 3 != x.shape[perm[2]] = 4}} + %0 = "tf.Transpose"(%arg0, %cst) {T = "tfdtype$DT_FLOAT", Tperm = "tfdtype$DT_INT32"} : (tensor<2x3x4xf32>, tensor<3xi32>) -> tensor<3x2x4xf32> + return %0 : tensor<3x2x4xf32> +} + +// ----- + // Test invalid tf.Less func @testLess(tensor<4xi32>, tensor<4xi32>) -> tensor<4xi32> { ^bb0(%arg0: tensor<4xi32>, %arg1: tensor<4xi32>): From d0b99d9dde59b7ede020a99922e08c7b0f9a1428 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 1 Aug 2020 12:17:58 -0700 Subject: [PATCH 1941/2522] Qualify uses of std::string PiperOrigin-RevId: 324413189 Change-Id: Ibb31fed1b4162936f180be5d7e8a6f4f39291396 --- tensorflow/core/common_runtime/device.h | 14 ++++++++------ tensorflow/core/common_runtime/device_factory.h | 12 ++++++------ tensorflow/core/common_runtime/scoped_allocator.h | 8 ++++---- .../core/common_runtime/scoped_allocator_mgr.h | 11 ++++++----- 4 files changed, 24 insertions(+), 21 deletions(-) diff --git a/tensorflow/core/common_runtime/device.h b/tensorflow/core/common_runtime/device.h index 13877933ce6..9e2db9faaf1 100644 --- a/tensorflow/core/common_runtime/device.h +++ b/tensorflow/core/common_runtime/device.h @@ -60,7 +60,7 @@ class Device : public DeviceBase { ~Device() override; // Full name of this device (see top comment). - const string& name() const override { return device_attributes_.name(); } + const std::string& name() const override { return device_attributes_.name(); } // Parsed name of this device const DeviceNameUtils::ParsedName& parsed_name() const { @@ -71,7 +71,9 @@ class Device : public DeviceBase { // human-readable and not computer-parsed, except that two devices // with the same device_type() are expected to perform similarly // (both from a computation and communication perspective). - const string& device_type() const { return device_attributes_.device_type(); } + const std::string& device_type() const { + return device_attributes_.device_type(); + } // Returns an aggregation of device attributes. const DeviceAttributes& attributes() const override { @@ -157,15 +159,15 @@ class Device : public DeviceBase { virtual ResourceMgr* resource_manager() { return rmgr_; } // Summarizes the status of this Device, for debugging. - string DebugString() const { return device_attributes_.DebugString(); } + std::string DebugString() const { return device_attributes_.DebugString(); } // Assembles the parameter components into a complete DeviceAttributes value. static DeviceAttributes BuildDeviceAttributes( - const string& name, DeviceType device, Bytes memory_limit, - const DeviceLocality& locality, const string& physical_device_desc); + const std::string& name, DeviceType device, Bytes memory_limit, + const DeviceLocality& locality, const std::string& physical_device_desc); static DeviceAttributes BuildDeviceAttributes( - const string& name, DeviceType device, Bytes memory_limit, + const std::string& name, DeviceType device, Bytes memory_limit, const DeviceLocality& locality) { // Pass in an empty string as physical device name. return BuildDeviceAttributes(name, device, memory_limit, locality, ""); diff --git a/tensorflow/core/common_runtime/device_factory.h b/tensorflow/core/common_runtime/device_factory.h index c026a188f5e..9d911c20e25 100644 --- a/tensorflow/core/common_runtime/device_factory.h +++ b/tensorflow/core/common_runtime/device_factory.h @@ -30,16 +30,16 @@ struct SessionOptions; class DeviceFactory { public: virtual ~DeviceFactory() {} - static void Register(const string& device_type, DeviceFactory* factory, + static void Register(const std::string& device_type, DeviceFactory* factory, int priority); - static DeviceFactory* GetFactory(const string& device_type); + static DeviceFactory* GetFactory(const std::string& device_type); // Append to "*devices" all suitable devices, respecting // any device type specific properties/counts listed in "options". // // CPU devices are added first. static Status AddDevices(const SessionOptions& options, - const string& name_prefix, + const std::string& name_prefix, std::vector>* devices); // Helper for tests. Create a single device of type "type". The @@ -73,7 +73,7 @@ class DeviceFactory { // Most clients should call AddDevices() instead. virtual Status CreateDevices( - const SessionOptions& options, const string& name_prefix, + const SessionOptions& options, const std::string& name_prefix, std::vector>* devices) = 0; // Return the device priority number for a "device_type" string. @@ -88,7 +88,7 @@ class DeviceFactory { // higher than the packaged devices. See calls to // REGISTER_LOCAL_DEVICE_FACTORY to see the existing priorities used // for built-in devices. - static int32 DevicePriority(const string& device_type); + static int32 DevicePriority(const std::string& device_type); }; namespace dfactory { @@ -127,7 +127,7 @@ class Registrar { // GPUCompatibleCPU: 70 // ThreadPoolDevice: 60 // Default: 50 - explicit Registrar(const string& device_type, int priority = 50) { + explicit Registrar(const std::string& device_type, int priority = 50) { DeviceFactory::Register(device_type, new Factory(), priority); } }; diff --git a/tensorflow/core/common_runtime/scoped_allocator.h b/tensorflow/core/common_runtime/scoped_allocator.h index f25bee45b01..a8bb71d2f4b 100644 --- a/tensorflow/core/common_runtime/scoped_allocator.h +++ b/tensorflow/core/common_runtime/scoped_allocator.h @@ -45,7 +45,7 @@ class ScopedAllocator { // instance. It must be large enough to back all of the specified // (offset, byte) ranges of the fields. ScopedAllocator(const Tensor& backing_tensor, int32 scope_id, - const string& name, const gtl::ArraySlice fields, + const std::string& name, const gtl::ArraySlice fields, int32 expected_call_count, ScopedAllocatorContainer* container); @@ -60,7 +60,7 @@ class ScopedAllocator { const Tensor& tensor() const { return backing_tensor_; } - const string& name() const { return name_; } + const std::string& name() const { return name_; } private: friend class ScopedAllocatorInstance; @@ -71,7 +71,7 @@ class ScopedAllocator { Tensor backing_tensor_; TensorBuffer* tbuf_; int32 id_; - string name_; + std::string name_; ScopedAllocatorContainer* container_; std::vector fields_; mutex mu_; @@ -111,7 +111,7 @@ class ScopedAllocatorInstance : public Allocator { size_t AllocatedSize(const void* ptr) const override { return 0; } int64 AllocationId(const void* ptr) const override { return 0; } size_t AllocatedSizeSlow(const void* ptr) const override { return 0; } - string Name() override; + std::string Name() override; private: mutex mu_; diff --git a/tensorflow/core/common_runtime/scoped_allocator_mgr.h b/tensorflow/core/common_runtime/scoped_allocator_mgr.h index d10679a5488..268f7b3dc78 100644 --- a/tensorflow/core/common_runtime/scoped_allocator_mgr.h +++ b/tensorflow/core/common_runtime/scoped_allocator_mgr.h @@ -32,7 +32,8 @@ class ScopedAllocatorContainer : public core::RefCounted { public: // Establishes a reachable ScopedAllocator. Status AddScopedAllocator( - const Tensor& backing_tensor, int32 scope_id, const string& scope_name, + const Tensor& backing_tensor, int32 scope_id, + const std::string& scope_name, const gtl::ArraySlice& fields, int32 expected_call_count); @@ -72,7 +73,7 @@ class ScopedAllocatorContainer : public core::RefCounted { // At most one of these exists per device. class ScopedAllocatorMgr { public: - explicit ScopedAllocatorMgr(const string& device_name) + explicit ScopedAllocatorMgr(const std::string& device_name) : device_name_(device_name) {} ~ScopedAllocatorMgr(); @@ -81,7 +82,7 @@ class ScopedAllocatorMgr { // Establishes a reachable ScopedAllocator. Status AddScopedAllocator( const Tensor& backing_tensor, int64 step_id, int32 scope_id, - const string& scope_name, + const std::string& scope_name, const gtl::ArraySlice& fields, int32 expected_call_count); @@ -97,10 +98,10 @@ class ScopedAllocatorMgr { const DataType dtype, std::vector* fields); - const string& device_name() const { return device_name_; } + const std::string& device_name() const { return device_name_; } private: - string device_name_; + std::string device_name_; mutex mu_; std::unordered_map per_step_map_ TF_GUARDED_BY(mu_); From 04b5af681660be58f30cde75bdcec97934f20134 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Sat, 1 Aug 2020 20:06:19 +0000 Subject: [PATCH 1942/2522] build fix --- tensorflow/core/kernels/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 6ed4248e2d1..290773845db 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2976,7 +2976,7 @@ tf_cc_tests( "//tensorflow/core:framework", "//tensorflow/core:test", "//tensorflow/core:test_main", - "//tensorflow/core:tensor_testutil.h", + "//tensorflow/core/framework:tensor_testutil.h", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", ], From e1a47eb5daf82a2ffc03d977896605557c6a1012 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Sat, 1 Aug 2020 15:07:47 -0700 Subject: [PATCH 1943/2522] Run pylint --- tensorflow/python/ops/math_ops.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index a01638d0602..2eae0ade4dd 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -3859,7 +3859,8 @@ def conj(x, name=None): >>> x = tf.constant([-2.25 + 4.75j, 3.25 + 5.75j]) >>> tf.math.conj(x) - + If `x` is real, it is returned unchanged. @@ -3867,7 +3868,8 @@ def conj(x, name=None): >>> x = tf.constant([-2.25, 3.25]) >>> tf.math.conj(x) - + Args: x: `Tensor` to conjugate. Must have numeric or variant type. From d5ed5f9895cc10c1ac7be0a589312414af84f4e1 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Sat, 1 Aug 2020 15:39:17 -0700 Subject: [PATCH 1944/2522] Fix and cleanup head adjustment allocation with offsets. Currently, some platforms that have offsets during allocation (e.g. something on the Sparkfun @ 32bits) will fail to allocate. This is due to how the head was adjusted and the allocation size request in MicroAllocator.cc during memory planning phase (the part that gets committed to head). First, this change fixes the actual bytes available call by taking in account the offset requested. This is a bug that is exposed with the new adjust head API. All head space was requested as a temp buffer to plan memory usage. This allocation did not account for offsets properly. Secondly, I've simplified the API for head adjustment. The head is a value that can be set with a given requested size + offset. The watermark logic has been removed in favor of simplicity - callers (e.g. MicroAllocator) should check if they need to increase the head size before adjusting. PiperOrigin-RevId: 324426138 Change-Id: Ifc683450ba32b9dd9fc5ba587855608a0bc6e311 --- tensorflow/lite/micro/micro_allocator.cc | 15 +-- .../recording_simple_memory_allocator.cc | 20 +-- .../micro/recording_simple_memory_allocator.h | 5 +- .../recording_simple_memory_allocator_test.cc | 26 ++-- .../lite/micro/simple_memory_allocator.cc | 66 +++++----- .../lite/micro/simple_memory_allocator.h | 22 ++-- .../micro/simple_memory_allocator_test.cc | 116 +++++++++++++----- 7 files changed, 169 insertions(+), 101 deletions(-) diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc index 29a0c002cab..881b9b9abb0 100644 --- a/tensorflow/lite/micro/micro_allocator.cc +++ b/tensorflow/lite/micro/micro_allocator.cc @@ -1020,6 +1020,8 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan( // Note that AllocationInfo is only needed for creating the plan. It will be // thrown away when the child allocator (tmp_allocator) goes out of scope. { + // TODO(b/162595810): Use temp allocation buffer instead of a stack + // instance: SimpleMemoryAllocator tmp_allocator(error_reporter_, memory_allocator_->GetBufferHead(), memory_allocator_->GetTail()); @@ -1038,16 +1040,17 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan( const AllocationInfo* allocation_info = builder.Finish(); // Remaining arena size that memory planner can use for calculating offsets. - size_t remaining_arena_size = tmp_allocator.GetAvailableMemory(); + size_t remaining_arena_size = + tmp_allocator.GetAvailableMemory(kBufferAlignment); uint8_t* planner_arena = - tmp_allocator.AdjustHead(remaining_arena_size, kBufferAlignment); + tmp_allocator.AllocateTemp(remaining_arena_size, kBufferAlignment); TF_LITE_ENSURE(error_reporter_, planner_arena != nullptr); GreedyMemoryPlanner planner(planner_arena, remaining_arena_size); TF_LITE_ENSURE_STATUS( CreatePlan(error_reporter_, &planner, allocation_info, builder.Size())); size_t actual_available_arena_size = - memory_allocator_->GetAvailableMemory(); + memory_allocator_->GetAvailableMemory(kBufferAlignment); // Make sure we have enough arena size. if (planner.GetMaximumMemorySize() > actual_available_arena_size) { TF_LITE_REPORT_ERROR( @@ -1064,11 +1067,9 @@ TfLiteStatus MicroAllocator::CommitStaticMemoryPlan( allocation_info, builder.Size())); head_usage = planner.GetMaximumMemorySize(); } - // Allocate the planned area, so the allocator knows it's used. - uint8_t* allocated_tensor_memory = - memory_allocator_->AdjustHead(head_usage, kBufferAlignment); - TF_LITE_ENSURE(error_reporter_, allocated_tensor_memory != nullptr); + TF_LITE_ENSURE_STATUS( + memory_allocator_->EnsureHeadSize(head_usage, kBufferAlignment)); return kTfLiteOk; } diff --git a/tensorflow/lite/micro/recording_simple_memory_allocator.cc b/tensorflow/lite/micro/recording_simple_memory_allocator.cc index f44afad5eb2..ef2e9f31664 100644 --- a/tensorflow/lite/micro/recording_simple_memory_allocator.cc +++ b/tensorflow/lite/micro/recording_simple_memory_allocator.cc @@ -24,7 +24,8 @@ namespace tflite { RecordingSimpleMemoryAllocator::RecordingSimpleMemoryAllocator( ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size) : SimpleMemoryAllocator(error_reporter, buffer_head, buffer_size), - requested_bytes_(0), + requested_head_bytes_(0), + requested_tail_bytes_(0), used_bytes_(0), alloc_count_(0) {} @@ -45,7 +46,7 @@ RecordingSimpleMemoryAllocator* RecordingSimpleMemoryAllocator::Create( } size_t RecordingSimpleMemoryAllocator::GetRequestedBytes() const { - return requested_bytes_; + return requested_head_bytes_ + requested_tail_bytes_; } size_t RecordingSimpleMemoryAllocator::GetUsedBytes() const { @@ -56,16 +57,15 @@ size_t RecordingSimpleMemoryAllocator::GetAllocatedCount() const { return alloc_count_; } -uint8_t* RecordingSimpleMemoryAllocator::AdjustHead(size_t size, - size_t alignment) { +TfLiteStatus RecordingSimpleMemoryAllocator::EnsureHeadSize(size_t size, + size_t alignment) { const uint8_t* previous_head = GetHead(); - uint8_t* result = SimpleMemoryAllocator::AdjustHead(size, alignment); - if (result != nullptr) { + TfLiteStatus status = SimpleMemoryAllocator::EnsureHeadSize(size, alignment); + if (status == kTfLiteOk) { used_bytes_ += GetHead() - previous_head; - requested_bytes_ += size; - alloc_count_++; + requested_head_bytes_ = size; } - return result; + return status; } uint8_t* RecordingSimpleMemoryAllocator::AllocateFromTail(size_t size, @@ -74,7 +74,7 @@ uint8_t* RecordingSimpleMemoryAllocator::AllocateFromTail(size_t size, uint8_t* result = SimpleMemoryAllocator::AllocateFromTail(size, alignment); if (result != nullptr) { used_bytes_ += previous_tail - GetTail(); - requested_bytes_ += size; + requested_tail_bytes_ += size; alloc_count_++; } return result; diff --git a/tensorflow/lite/micro/recording_simple_memory_allocator.h b/tensorflow/lite/micro/recording_simple_memory_allocator.h index e1ac0ebfd20..8d3e9fb49d4 100644 --- a/tensorflow/lite/micro/recording_simple_memory_allocator.h +++ b/tensorflow/lite/micro/recording_simple_memory_allocator.h @@ -47,11 +47,12 @@ class RecordingSimpleMemoryAllocator : public SimpleMemoryAllocator { // Returns the number of alloc calls from the head or tail. size_t GetAllocatedCount() const; - uint8_t* AdjustHead(size_t size, size_t alignment) override; + TfLiteStatus EnsureHeadSize(size_t size, size_t alignment) override; uint8_t* AllocateFromTail(size_t size, size_t alignment) override; private: - size_t requested_bytes_; + size_t requested_head_bytes_; + size_t requested_tail_bytes_; size_t used_bytes_; size_t alloc_count_; diff --git a/tensorflow/lite/micro/recording_simple_memory_allocator_test.cc b/tensorflow/lite/micro/recording_simple_memory_allocator_test.cc index ec37f399ed8..6450cb53cac 100644 --- a/tensorflow/lite/micro/recording_simple_memory_allocator_test.cc +++ b/tensorflow/lite/micro/recording_simple_memory_allocator_test.cc @@ -77,44 +77,46 @@ TF_LITE_MICRO_TEST(TestDoesNotRecordFailedTailAllocations) { static_cast(0)); } -TF_LITE_MICRO_TEST(TestRecordsHeadAllocations) { +TF_LITE_MICRO_TEST(TestRecordsHeadSizeAdjustment) { constexpr size_t arena_size = 1024; uint8_t arena[arena_size]; tflite::RecordingSimpleMemoryAllocator allocator(micro_test::reporter, arena, arena_size); - uint8_t* result = allocator.AdjustHead(/*size=*/5, /*alignment=*/1); - TF_LITE_MICRO_EXPECT_NE(result, nullptr); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, allocator.EnsureHeadSize(/*size=*/5, /*alignment=*/1)); TF_LITE_MICRO_EXPECT_EQ(allocator.GetUsedBytes(), static_cast(5)); TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), static_cast(5)); + // Head adjustments do not count as an allocation: TF_LITE_MICRO_EXPECT_EQ(allocator.GetAllocatedCount(), - static_cast(1)); + static_cast(0)); - result = allocator.AllocateFromTail(/*size=*/15, /*alignment=*/1); + uint8_t* result = allocator.AllocateFromTail(/*size=*/15, /*alignment=*/1); TF_LITE_MICRO_EXPECT_NE(result, nullptr); TF_LITE_MICRO_EXPECT_EQ(allocator.GetUsedBytes(), static_cast(20)); TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), static_cast(20)); TF_LITE_MICRO_EXPECT_EQ(allocator.GetAllocatedCount(), - static_cast(2)); + static_cast(1)); } -TF_LITE_MICRO_TEST(TestRecordsMisalignedHeadAllocations) { +TF_LITE_MICRO_TEST(TestRecordsMisalignedHeadSizeAdjustments) { constexpr size_t arena_size = 1024; uint8_t arena[arena_size]; tflite::RecordingSimpleMemoryAllocator allocator(micro_test::reporter, arena, arena_size); - uint8_t* result = allocator.AdjustHead(/*size=*/10, /*alignment=*/12); - TF_LITE_MICRO_EXPECT_NE(result, nullptr); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, allocator.EnsureHeadSize(/*size=*/10, /*alignment=*/12)); // Validate used bytes in 8 byte range that can included alignment of 12: TF_LITE_MICRO_EXPECT_GE(allocator.GetUsedBytes(), static_cast(10)); TF_LITE_MICRO_EXPECT_LE(allocator.GetUsedBytes(), static_cast(20)); TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), static_cast(10)); + // Head adjustments do not count as an allocation: TF_LITE_MICRO_EXPECT_EQ(allocator.GetAllocatedCount(), - static_cast(1)); + static_cast(0)); } TF_LITE_MICRO_TEST(TestDoesNotRecordFailedTailAllocations) { @@ -123,8 +125,8 @@ TF_LITE_MICRO_TEST(TestDoesNotRecordFailedTailAllocations) { tflite::RecordingSimpleMemoryAllocator allocator(micro_test::reporter, arena, arena_size); - uint8_t* result = allocator.AdjustHead(/*size=*/2048, /*alignment=*/1); - TF_LITE_MICRO_EXPECT(result == nullptr); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteError, allocator.EnsureHeadSize(/*size=*/2048, /*alignment=*/1)); TF_LITE_MICRO_EXPECT_EQ(allocator.GetUsedBytes(), static_cast(0)); TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), static_cast(0)); diff --git a/tensorflow/lite/micro/simple_memory_allocator.cc b/tensorflow/lite/micro/simple_memory_allocator.cc index 37c5acd37d3..bea1a9d7175 100644 --- a/tensorflow/lite/micro/simple_memory_allocator.cc +++ b/tensorflow/lite/micro/simple_memory_allocator.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "tensorflow/lite/c/common.h" #include "tensorflow/lite/core/api/error_reporter.h" #include "tensorflow/lite/kernels/internal/compatibility.h" #include "tensorflow/lite/micro/memory_helpers.h" @@ -31,7 +32,7 @@ SimpleMemoryAllocator::SimpleMemoryAllocator(ErrorReporter* error_reporter, : error_reporter_(error_reporter), buffer_head_(buffer_head), buffer_tail_(buffer_tail), - head_watermark_(buffer_head), + head_(buffer_head), tail_(buffer_tail), temp_(buffer_head_) {} @@ -59,43 +60,46 @@ SimpleMemoryAllocator* SimpleMemoryAllocator::Create( SimpleMemoryAllocator::~SimpleMemoryAllocator() {} -uint8_t* SimpleMemoryAllocator::AdjustHead(size_t size, size_t alignment) { - if (head_watermark_ != temp_) { - TF_LITE_REPORT_ERROR(error_reporter_, - "Internal error: AdjustHead() needs to be called after" - "ResetTempAllocations()."); - return nullptr; +TfLiteStatus SimpleMemoryAllocator::EnsureHeadSize(size_t size, + size_t alignment) { + if (head_ != temp_) { + TF_LITE_REPORT_ERROR( + error_reporter_, + "Internal error: EnsureHeadSize() needs to be called after" + "ResetTempAllocations()."); + return kTfLiteError; } uint8_t* const aligned_result = AlignPointerUp(buffer_head_, alignment); - if (aligned_result + size < head_watermark_) { - return aligned_result; + if (aligned_result + size < head_) { + // Size is below the current head size, just return. + return kTfLiteOk; } const size_t available_memory = tail_ - aligned_result; if (available_memory < size) { TF_LITE_REPORT_ERROR( error_reporter_, - "Failed to allocate memory. Requested: %u, available %u, missing: %u", + "Failed to adjust head size. Requested: %u, available %u, missing: %u", size, available_memory, size - available_memory); - return nullptr; + return kTfLiteError; } - head_watermark_ = aligned_result + size; - temp_ = head_watermark_; + head_ = aligned_result + size; + temp_ = head_; - return aligned_result; + return kTfLiteOk; } uint8_t* SimpleMemoryAllocator::AllocateFromTail(size_t size, size_t alignment) { uint8_t* const aligned_result = AlignPointerDown(tail_ - size, alignment); - if (aligned_result < head_watermark_) { + if (aligned_result < head_) { #ifndef TF_LITE_STRIP_ERROR_STRINGS - const size_t missing_memory = head_watermark_ - aligned_result; - TF_LITE_REPORT_ERROR( - error_reporter_, - "Failed to allocate memory. Requested: %u, available %u, missing: %u", - size, size - missing_memory, missing_memory); + const size_t missing_memory = head_ - aligned_result; + TF_LITE_REPORT_ERROR(error_reporter_, + "Failed to allocate tail memory. Requested: %u, " + "available %u, missing: %u", + size, size - missing_memory, missing_memory); #endif return nullptr; } @@ -107,38 +111,40 @@ uint8_t* SimpleMemoryAllocator::AllocateTemp(size_t size, size_t alignment) { uint8_t* const aligned_result = AlignPointerUp(temp_, alignment); const size_t available_memory = tail_ - aligned_result; if (available_memory < size) { - TF_LITE_REPORT_ERROR( - error_reporter_, - "Failed to allocate memory. Requested: %u, available %u, missing: %u", - size, available_memory, size - available_memory); + TF_LITE_REPORT_ERROR(error_reporter_, + "Failed to allocate temp memory. Requested: %u, " + "available %u, missing: %u", + size, available_memory, size - available_memory); return nullptr; } temp_ = aligned_result + size; return aligned_result; } -void SimpleMemoryAllocator::ResetTempAllocations() { temp_ = head_watermark_; } +void SimpleMemoryAllocator::ResetTempAllocations() { temp_ = head_; } -uint8_t* SimpleMemoryAllocator::GetHead() const { return head_watermark_; } +uint8_t* SimpleMemoryAllocator::GetHead() const { return head_; } uint8_t* SimpleMemoryAllocator::GetBufferHead() const { return buffer_head_; } uint8_t* SimpleMemoryAllocator::GetTail() const { return tail_; } size_t SimpleMemoryAllocator::GetHeadUsedBytes() const { - return head_watermark_ - buffer_head_; + return head_ - buffer_head_; } size_t SimpleMemoryAllocator::GetTailUsedBytes() const { return buffer_tail_ - tail_; } -size_t SimpleMemoryAllocator::GetAvailableMemory() const { - return tail_ - buffer_head_; +size_t SimpleMemoryAllocator::GetAvailableMemory(size_t alignment) const { + uint8_t* const aligned_head = AlignPointerUp(head_, alignment); + uint8_t* const aligned_tail = AlignPointerDown(tail_, alignment); + return aligned_tail - aligned_head; } size_t SimpleMemoryAllocator::GetUsedBytes() const { - return GetBufferSize() - GetAvailableMemory(); + return GetBufferSize() - (tail_ - head_); } size_t SimpleMemoryAllocator::GetBufferSize() const { diff --git a/tensorflow/lite/micro/simple_memory_allocator.h b/tensorflow/lite/micro/simple_memory_allocator.h index 26e217096e5..8c216f47848 100644 --- a/tensorflow/lite/micro/simple_memory_allocator.h +++ b/tensorflow/lite/micro/simple_memory_allocator.h @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "tensorflow/lite/c/common.h" #include "tensorflow/lite/core/api/error_reporter.h" #include "tensorflow/lite/micro/compatibility.h" @@ -42,13 +43,14 @@ class SimpleMemoryAllocator { uint8_t* buffer_head, size_t buffer_size); - // Adjust memory allocations starting at the head of the arena (lowest address - // and moving upwards). It only tracks the maximum head usage and make sure - // that memory used at head will not overlap with memory reserved at tail. - // Calls to this method will also invalidate all temporary allocation values. - // This call will fail if a chain allocation calls through AllocateTemp() have - // not been cleaned up with a call to ResetTempAllocations(). - virtual uint8_t* AdjustHead(size_t size, size_t alignment); + // Ensure that the head (lowest address and moving upwards) memory allocation + // is at least a given size. This function will only increase the head size if + // the passed in value is larger than the current head size. Calls to this + // method will also invalidate all temporary allocation values. This call will + // fail if a chain of allocations through AllocateTemp() have not been cleaned + // up with a call to ResetTempAllocations(). + virtual TfLiteStatus EnsureHeadSize(size_t size, size_t alignment); + // Allocates memory starting at the tail of the arena (highest address and // moving downwards). virtual uint8_t* AllocateFromTail(size_t size, size_t alignment); @@ -74,7 +76,9 @@ class SimpleMemoryAllocator { size_t GetHeadUsedBytes() const; size_t GetTailUsedBytes() const; - size_t GetAvailableMemory() const; + // Returns the number of bytes available with a given alignment. + size_t GetAvailableMemory(size_t alignment) const; + size_t GetUsedBytes() const; private: @@ -83,7 +87,7 @@ class SimpleMemoryAllocator { ErrorReporter* error_reporter_; uint8_t* buffer_head_; uint8_t* buffer_tail_; - uint8_t* head_watermark_; + uint8_t* head_; uint8_t* tail_; uint8_t* temp_; diff --git a/tensorflow/lite/micro/simple_memory_allocator_test.cc b/tensorflow/lite/micro/simple_memory_allocator_test.cc index 0829c7766d4..adffc9566da 100644 --- a/tensorflow/lite/micro/simple_memory_allocator_test.cc +++ b/tensorflow/lite/micro/simple_memory_allocator_test.cc @@ -22,31 +22,89 @@ limitations under the License. TF_LITE_MICRO_TESTS_BEGIN -TF_LITE_MICRO_TEST(TestAdjustHead) { +TF_LITE_MICRO_TEST(TestEnsureHeadSizeSimpleAlignment) { constexpr size_t arena_size = 1024; uint8_t arena[arena_size]; tflite::SimpleMemoryAllocator allocator(micro_test::reporter, arena, arena_size); - // First allocation from head. - { - uint8_t* result = allocator.AdjustHead(100, 1); - TF_LITE_MICRO_EXPECT(arena == result); - TF_LITE_MICRO_EXPECT(arena + 100 == allocator.GetHead()); - } - // Second allocation doesn't require as much space so head pointer didn't - // move. - { - uint8_t* result = allocator.AdjustHead(10, 1); - TF_LITE_MICRO_EXPECT(arena == result); - TF_LITE_MICRO_EXPECT(arena + 100 == allocator.GetHead()); - } - // Third allocation increase head memory usage. - { - uint8_t* result = allocator.AdjustHead(1000, 1); - TF_LITE_MICRO_EXPECT(arena == result); - TF_LITE_MICRO_EXPECT(arena + 1000 == allocator.GetHead()); - } + // First head adjustment + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, allocator.EnsureHeadSize(/*size=*/100, /*alignment=*/1)); + TF_LITE_MICRO_EXPECT(arena + 100 == allocator.GetHead()); + + // Second head adjusment is smaller, head size should still be 100. + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, allocator.EnsureHeadSize(/*size=*/10, /*alignment=*/1)); + TF_LITE_MICRO_EXPECT(arena + 100 == allocator.GetHead()); + + // Third head adjustment re-increases the head size: + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, allocator.EnsureHeadSize(/*size=*/1000, /*alignment=*/1)); + TF_LITE_MICRO_EXPECT(arena + 1000 == allocator.GetHead()); +} + +TF_LITE_MICRO_TEST(TestAdjustHeadSizeMisalignment) { + constexpr size_t arena_size = 1024; + uint8_t arena[arena_size]; + tflite::SimpleMemoryAllocator allocator(micro_test::reporter, arena, + arena_size); + + // First head adjustment of 100 bytes (aligned 12): + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, allocator.EnsureHeadSize(/*size=*/100, /*alignment=*/12)); + + // Offset alignment of 12 can lead to allocation within 8 byte range of + // requested bytes based to arena alignment at runtime: + TF_LITE_MICRO_EXPECT_GE(allocator.GetHead(), arena + 100); + TF_LITE_MICRO_EXPECT_LE(allocator.GetHead(), arena + 100 + 11); + + // Second head adjusment shrinks the head size (aligned at 12), head size + // should still be 100: + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, allocator.EnsureHeadSize(/*size=*/10, /*alignment=*/12)); + TF_LITE_MICRO_EXPECT_GE(allocator.GetHead(), arena + 100); + TF_LITE_MICRO_EXPECT_LE(allocator.GetHead(), arena + 100 + 11); + + // Third head adjustment re-increases the head size (aligned at 12): + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, allocator.EnsureHeadSize(/*size=*/1000, /*alignment=*/12)); + TF_LITE_MICRO_EXPECT_GE(allocator.GetHead(), arena + 1000); + TF_LITE_MICRO_EXPECT_LE(allocator.GetHead(), arena + 1000 + 11); +} + +TF_LITE_MICRO_TEST(TestAdjustHeadSizeMisalignedHandlesCorrectBytesAvailable) { + constexpr size_t arena_size = 1024; + uint8_t arena[arena_size]; + tflite::SimpleMemoryAllocator allocator(micro_test::reporter, arena, + arena_size); + + // First head adjustment of 100 bytes (aligned 12): + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, allocator.EnsureHeadSize(/*size=*/100, /*alignment=*/12)); + + // allocator.GetAvailableMemory() should also report the actual amount of + // memory available based on a requested offset (12): + size_t aligned_available_bytes = + allocator.GetAvailableMemory(/*alignment=*/12); + TF_LITE_MICRO_EXPECT_LE(aligned_available_bytes, arena_size - 100); + TF_LITE_MICRO_EXPECT_GE(aligned_available_bytes, arena_size - 100 - 24); + + // Second head adjusment shrinks the head size (aligned at 12), head size + // should still be 100: + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, allocator.EnsureHeadSize(/*size=*/10, /*alignment=*/12)); + aligned_available_bytes = allocator.GetAvailableMemory(/*alignment=*/12); + + TF_LITE_MICRO_EXPECT_LE(aligned_available_bytes, arena_size - 100); + TF_LITE_MICRO_EXPECT_GE(aligned_available_bytes, arena_size - 100 - 24); + + // Third head adjustment re-increases the head size (aligned at 12): + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, allocator.EnsureHeadSize(/*size=*/1000, /*alignment=*/12)); + aligned_available_bytes = allocator.GetAvailableMemory(/*alignment=*/12); + TF_LITE_MICRO_EXPECT_LE(aligned_available_bytes, arena_size - 1000); + TF_LITE_MICRO_EXPECT_GE(aligned_available_bytes, arena_size - 1000 - 24); } TF_LITE_MICRO_TEST(TestJustFits) { @@ -121,7 +179,7 @@ TF_LITE_MICRO_TEST(TestResetTempAllocations) { TF_LITE_MICRO_EXPECT_EQ(temp2 - temp1, 0); } -TF_LITE_MICRO_TEST(TestAllocateHeadWithoutResettingTemp) { +TF_LITE_MICRO_TEST(TestEnsureHeadSizeWithoutResettingTemp) { constexpr size_t arena_size = 1024; uint8_t arena[arena_size]; tflite::SimpleMemoryAllocator allocator(micro_test::reporter, arena, @@ -130,22 +188,18 @@ TF_LITE_MICRO_TEST(TestAllocateHeadWithoutResettingTemp) { uint8_t* temp = allocator.AllocateTemp(100, 1); TF_LITE_MICRO_EXPECT(nullptr != temp); - // Allocation should be null since temp allocation was not followed by a call - // to ResetTempAllocations(). - uint8_t* head = allocator.AdjustHead(100, 1); - TF_LITE_MICRO_EXPECT(nullptr == head); + // Adjustment to head should fail since temp allocation was not followed by a + // call to ResetTempAllocations(). + TF_LITE_MICRO_EXPECT_EQ(kTfLiteError, allocator.EnsureHeadSize(100, 1)); allocator.ResetTempAllocations(); - head = allocator.AdjustHead(100, 1); - TF_LITE_MICRO_EXPECT(nullptr != head); + // Reduce head size back to zero. + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocator.EnsureHeadSize(0, 1)); // The most recent head allocation should be in the same location as the // original temp allocation pointer. - TF_LITE_MICRO_EXPECT(temp == head); + TF_LITE_MICRO_EXPECT(temp == allocator.GetHead()); } -// TODO(b/161171251): Add more coverage to this test - specifically around -1 -// alignments and other odd allocation requests. - TF_LITE_MICRO_TESTS_END From 628954a351df5f286b9339b6b04eacd3e5d23230 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Sat, 1 Aug 2020 18:00:39 -0700 Subject: [PATCH 1945/2522] Fix a bug in the new legalization of sign. tf.sign(NaN) should be NaN. PiperOrigin-RevId: 324434550 Change-Id: I91d57d943971d709396fb520980ec20bee743e09 --- tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir | 5 +---- .../mlir/xla/transforms/legalize_tf_patterns.td | 13 ++----------- tensorflow/compiler/tests/unary_ops_test.py | 8 ++++++-- 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir index c406022be5c..3b4efc388eb 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir @@ -2303,11 +2303,8 @@ func @expand_dims(%arg0: tensor<2xf32>, %axis: tensor) -> tensor<1x2xf32> { // CHECK-LABEL: func @sign // CHECK-SAME: [[ARG:%arg.*]]: tensor<1x2x3x4xf32> func @sign(%arg0: tensor<1x2x3x4xf32>) -> tensor<1x2x3x4xf32> { - // CHECK: [[PRED:%.*]] = "mhlo.compare"([[ARG]], [[ARG]]) - // CHECK: [[ZEROS:%.*]] = mhlo.constant dense<0.000000e+00> : tensor<1x2x3x4xf32> // CHECK: [[SIGN:%.*]] = "mhlo.sign"([[ARG]]) - // CHECK: [[SELECT:%.*]] = "mhlo.select"([[PRED]], [[ZEROS]], [[SIGN]]) - // CHECK: return [[SELECT]] : tensor<1x2x3x4xf32> + // CHECK: return [[SIGN]] : tensor<1x2x3x4xf32> %0 = "tf.Sign"(%arg0) : (tensor<1x2x3x4xf32>) -> (tensor<1x2x3x4xf32>) return %0 : tensor<1x2x3x4xf32> } diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td index 6369897f56f..0ef62deed7d 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td @@ -577,17 +577,8 @@ foreach TfOp = [TF_ExpandDimsOp, TF_ReshapeOp, TF_SqueezeOp, ] in { (HLO_ReshapeOp $arg), [(AnyStaticShapeTensor $res)]>; } -// Returns 0 if x is NaN, 0 if x is 0, -1 if x < 0 and 1 if x > 0. -def : Pat<(TF_SignOp $x), - (HLO_SelectOp - (HLO_CompareOp - $x, - $x, - HLO_COMPARISON_DIRECTION_NE - ), - (HLO_ConstOp (ConstantSplat<"0"> $x)), - (HLO_SignOp $x) - )>; +// Returns NaN if x is NaN, 0 if x is 0, -1 if x < 0 and 1 if x > 0. +def : Pat<(TF_SignOp $x), (HLO_SignOp $x)>; def BothElementTypesSameWidthIntOrFloat : Constraint Date: Sat, 1 Aug 2020 21:52:12 -0700 Subject: [PATCH 1946/2522] TFL java/BUILD: remove no_mac tag from a few targets. The bug has been resolved. PiperOrigin-RevId: 324449997 Change-Id: Id849e7ce3cbb1ea610c1a748abc985d40ee1bc52 --- tensorflow/lite/java/BUILD | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tensorflow/lite/java/BUILD b/tensorflow/lite/java/BUILD index 1b89451a888..9bceb939c02 100644 --- a/tensorflow/lite/java/BUILD +++ b/tensorflow/lite/java/BUILD @@ -152,6 +152,7 @@ java_test( srcs = ["src/test/java/org/tensorflow/lite/TensorFlowLiteTest.java"], javacopts = JAVACOPTS, tags = [ + "no_mac", # TODO(b/122888913): libtensorflowlite_test_jni broke on mac. "v1only", ], test_class = "org.tensorflow.lite.TensorFlowLiteTest", @@ -197,6 +198,9 @@ java_test( size = "small", srcs = ["src/test/java/org/tensorflow/lite/DataTypeTest.java"], javacopts = JAVACOPTS, + tags = [ + "no_mac", # TODO(b/122888913): libtensorflowlite_test_jni broke on mac. + ], test_class = "org.tensorflow.lite.DataTypeTest", visibility = ["//visibility:private"], deps = [ @@ -223,6 +227,9 @@ java_test( "src/testdata/with_custom_op.lite", ], javacopts = JAVACOPTS, + tags = [ + "no_mac", # TODO(b/122888913): libtensorflowlite_test_jni broke on mac. + ], test_class = "org.tensorflow.lite.NativeInterpreterWrapperTest", visibility = ["//visibility:private"], deps = [ @@ -249,6 +256,9 @@ java_test( "//tensorflow/lite:testdata/multi_add_flex.bin", ], javacopts = JAVACOPTS, + tags = [ + "no_mac", # TODO(b/122888913): libtensorflowlite_test_jni broke on mac. + ], test_class = "org.tensorflow.lite.InterpreterTest", visibility = ["//visibility:private"], deps = [ @@ -321,6 +331,9 @@ java_test( "src/testdata/string.bin", ], javacopts = JAVACOPTS, + tags = [ + "no_mac", # TODO(b/122888913): libtensorflowlite_test_jni broke on mac. + ], test_class = "org.tensorflow.lite.TensorTest", visibility = ["//visibility:private"], deps = [ @@ -345,6 +358,7 @@ java_test( # Add customized libtensorflowlite_jni.so to java_path jvm_flags = ["-Djava.library.path=third_party/tensorflow/lite/testing"], tags = [ + "no_mac", # TODO(b/122888913): libtensorflowlite_test_jni broke on mac. "v1only", ], test_class = "org.tensorflow.lite.InterpreterCustomizedAndroidBuildTest", @@ -401,6 +415,9 @@ filegroup( srcs = select({ "//conditions:default": [":libtensorflowlite_jni.so"], }), + tags = [ + "no_mac", # TODO(b/122888913): libtensorflowlite_test_jni broke on mac. + ], ) cc_library( From 7045927bac31c4e1e02e46958f71ce7bbed254b7 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Sat, 1 Aug 2020 21:55:23 -0700 Subject: [PATCH 1947/2522] [XLA] Add support for int8/uint8 CLZ/PopCount to the evaluator PiperOrigin-RevId: 324450178 Change-Id: I136c01a44972f027e6010dbc7c0a0b3b007100fc --- .../xla/service/hlo_evaluator_typed_visitor.h | 45 +++++-------------- 1 file changed, 12 insertions(+), 33 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h index 250e2cf1f08..9226cd556ff 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h @@ -2409,39 +2409,23 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { } // Enable CLZ only for int32, uint32, int64 and uint64. - template < - typename NativeT, - typename std::enable_if< - (std::is_floating_point::value || - std::is_integral::value || is_complex_t::value) && - !(std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value)>::type* = nullptr> + template ::value || + std::is_same::value)>::type* = nullptr> Status HandleClz(HloInstruction* clz) { return UnsupportedTypeError(clz); } template ::value || - std::is_same::value>::type* = nullptr> + std::is_integral::value && + !std::is_same::value>::type* = nullptr> Status HandleClz(HloInstruction* clz) { TF_ASSIGN_OR_RETURN(parent_->evaluated_[clz], ElementWiseUnaryOp(clz, [](ElementwiseT elem_operand) { - return 31 - tensorflow::Log2Floor(elem_operand); - })); - return Status::OK(); - } - - template ::value || - std::is_same::value>::type* = nullptr> - Status HandleClz(HloInstruction* clz) { - TF_ASSIGN_OR_RETURN(parent_->evaluated_[clz], - ElementWiseUnaryOp(clz, [](ElementwiseT elem_operand) { - return 63 - tensorflow::Log2Floor64(elem_operand); + return (sizeof(elem_operand) * CHAR_BIT - 1) - + tensorflow::Log2Floor64(elem_operand); })); return Status::OK(); } @@ -2450,23 +2434,18 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { return HandleClz(clz); } - // Enable Popcnt only for int32, uint32, int64 and uint64. template ::value || - std::is_same::value || - std::is_same::value || - std::is_same::value)>::type* = nullptr> + (!std::is_integral::value || + std::is_same::value)>::type* = nullptr> Status HandlePopulationCount(HloInstruction* popcnt) { return UnsupportedTypeError(popcnt); } template ::value || - std::is_same::value || - std::is_same::value || - std::is_same::value>::type* = nullptr> + std::is_integral::value && + !std::is_same::value>::type* = nullptr> Status HandlePopulationCount(HloInstruction* popcnt) { TF_ASSIGN_OR_RETURN( parent_->evaluated_[popcnt], From 43b5e2cbc173df1b0623d6e6b2a83ea8066af215 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 2 Aug 2020 02:01:35 -0700 Subject: [PATCH 1948/2522] Update GraphDef version to 481. PiperOrigin-RevId: 324465560 Change-Id: I02d8329d8c03fee93d323388751f60c6eb61863e --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index f935f47d210..3784f2b212d 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 480 // Updated: 2020/8/1 +#define TF_GRAPH_DEF_VERSION 481 // Updated: 2020/8/2 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 4910e8e8ed56af3779eaa88449631a7855d4815e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 2 Aug 2020 02:01:35 -0700 Subject: [PATCH 1949/2522] compat: Update forward compatibility horizon to 2020-08-02 PiperOrigin-RevId: 324465561 Change-Id: I56ee13f60a2628cdc1b4f8b5c690feca74bdc9bc --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 8f29de673fe..e36055c6a93 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 1) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 2) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 5df70e988252901d072cfd32cd5f69ab261895a7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 2 Aug 2020 12:44:04 -0700 Subject: [PATCH 1950/2522] [XLA:SPMD] Fix right_halo size in convolution handler when RHS is dilated and uneven partitioned. PiperOrigin-RevId: 324502815 Change-Id: I3343d02eebf5775b7c30e4754cff7e3a26c17996 --- .../xla/service/spmd/convolution_handler.cc | 15 ++++--- .../xla/service/spmd/spmd_partitioner_test.cc | 44 +++++++++++++++++++ 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/xla/service/spmd/convolution_handler.cc b/tensorflow/compiler/xla/service/spmd/convolution_handler.cc index 78dc7d94798..01d7ea2ff14 100644 --- a/tensorflow/compiler/xla/service/spmd/convolution_handler.cc +++ b/tensorflow/compiler/xla/service/spmd/convolution_handler.cc @@ -489,13 +489,15 @@ PartitionConvolutionWithSpatialDimensionHaloExchangeOnLHS( // window dilation. // // * offset(i): RHS * D * i - low_padding - // * limit(i): {(RHS - 1) * D + 1} * (i + 1) + (WC - 1) * stride - low_padding + // * limit(i): {RHS * (i + 1) * D - (D - 1)} + (WC - 1) * stride - low_padding // // Since shard i has LHS of range [i * LHS, (i + 1) * LHS) // * left-halo: i * LHS - offset(i) - // = (LHS - RHS) * i + low_padding + // = (LHS - RHS * D) * i + low_padding // * right-halo: limit(i) - (i + 1) * LHS - // = [{(RHS - 1) * D + 1} - LHS] * (i + 1) + (WC - 1) * stride - low_padding + // = (RHS * D - LHS) * (i + 1) + (1 - D) + (WC - 1) * stride - low_padding + // = (RHS * D - LHS) * i + (RHS * D - LHS) + (1-D) + // + (WC - 1) * stride - low_padding std::vector shard_counts(dnums.input_spatial_dimensions_size()); std::vector lhs_shard_sizes(dnums.input_spatial_dimensions_size()); std::vector rhs_shard_sizes(dnums.input_spatial_dimensions_size()); @@ -554,9 +556,10 @@ PartitionConvolutionWithSpatialDimensionHaloExchangeOnLHS( 1)); right_halo_size_functions[lhs_dimension] = OffsetCalculation(MultiplyAddDivideOffsetCalculation( - rhs_shard_size_dilated - lhs_shard_size, - rhs_shard_size_dilated - lhs_shard_size + - wd.stride() * (window_count - 1) - padding_low, + rhs_shard_size * wd.window_dilation() - lhs_shard_size, + rhs_shard_size * wd.window_dilation() - lhs_shard_size + 1 - + wd.window_dilation() + wd.stride() * (window_count - 1) - + padding_low, 1)); // Exchange halo and concatenate. diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc index b052567d9c6..d5342e3e1f4 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc @@ -1489,6 +1489,50 @@ ENTRY entry { op::Shape("f32[1,1,512,64]"))); } +TEST_F(SpmdPartitioningTest, + ConvolutionLhsTiledRhsTiled_UnevenDilatedRHSPartitioned) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %lhs = f32[8,28,28,8] parameter(0) + %lhs.copy = f32[8,28,28,8] copy(%lhs), sharding={devices=[1,4,1,1]0,1,2,3} + %rhs = f32[8,14,14,64] parameter(1) + %rhs.copy = f32[8,14,14,64] copy(%rhs), sharding={devices=[1,4,1,1]0,1,2,3} + ROOT %conv = f32[1,1,8,64] convolution(%lhs.copy, %rhs.copy), + window={size=14x14 pad=0_-1x0_-1 rhs_dilate=2x2}, + dim_labels=f01b_i01o->01bf, sharding={replicated} +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/4)); + VLOG(1) << module->ToString(); + + auto root = module->entry_computation()->root_instruction(); + auto lhs = AllOf( + op::Copy(op::DynamicSlice(op::Parameter(), op::Constant(), op::Reshape(), + op::Constant(), op::Constant())), + op::Shape("f32[8,7,28,8]")); + auto rhs = AllOf(op::Pad(op::Parameter(), op::Constant()), + op::Shape("f32[8,16,14,64]")); + auto selected_rhs = AllOf( + op::Select(op::Compare(), + op::Copy(op::DynamicSlice(rhs, op::Constant(), op::Reshape(), + op::Constant(), op::Constant())), + op::Broadcast()), + op::Shape("f32[8,4,14,64]")); + auto right_halo = + AllOf(op::CollectivePermute(op::Slice(lhs)), op::Shape("f32[8,2,28,8]")); + auto selected_lhs = + AllOf(op::DynamicSlice( + op::Pad(op::Concatenate(lhs, right_halo), op::Constant()), + op::Constant(), op::Reshape(), op::Constant(), op::Constant()), + op::Shape("f32[8,7,28,8]")); + EXPECT_THAT(root, + AllOf(op::AllReduce(op::Convolution(selected_lhs, selected_rhs)), + op::Shape("f32[1,1,8,64]"))); +} + TEST_F(SpmdPartitioningTest, ConvolutionLhsTiledRhsTiledWithPadding) { const char* const hlo_string = R"( HloModule module From 2ea7d77bb08c1c989170528012b281c9ce1111ca Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Sun, 2 Aug 2020 13:58:08 -0700 Subject: [PATCH 1951/2522] Fix macOS build PiperOrigin-RevId: 324506921 Change-Id: I55f2354a7232949dd90de52ddb531041647455a0 --- tensorflow/compiler/xla/service/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 1491b9070ac..540cd7fecd2 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -296,6 +296,7 @@ cc_library( "//tensorflow/compiler/xla:xla_data_proto_cc", "//tensorflow/compiler/xla/service/cpu:runtime_single_threaded_matmul", "//tensorflow/core:lib", + "//third_party/eigen3", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/base", "@com_google_absl//absl/container:inlined_vector", From 62800dd0f150eacfdcc4128d58f98de4e7ce493e Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Sun, 2 Aug 2020 22:33:05 +0000 Subject: [PATCH 1952/2522] build fix --- tensorflow/core/kernels/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 290773845db..589165fcd2f 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2976,7 +2976,7 @@ tf_cc_tests( "//tensorflow/core:framework", "//tensorflow/core:test", "//tensorflow/core:test_main", - "//tensorflow/core/framework:tensor_testutil.h", + "//tensorflow/core/framework:tensor_testutil", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", ], From 8148680637a37f3d0f9cea56111c760ad6f238c2 Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Sun, 2 Aug 2020 18:02:39 -0700 Subject: [PATCH 1953/2522] PSv2: Move TF2 parameter server training main library code into OSS. PiperOrigin-RevId: 324524057 Change-Id: Ifa033dd03b0f1ca830b6b5dcdc71a8ea34b67d08 --- tensorflow/python/distribute/BUILD | 18 + tensorflow/python/distribute/client/BUILD | 104 ++ tensorflow/python/distribute/client/client.py | 1221 +++++++++++++++++ .../python/distribute/client/client_test.py | 388 ++++++ .../python/distribute/client/metric_utils.py | 79 ++ .../distribute/client/metric_utils_test.py | 69 + .../client/parameter_server_client.py | 55 + .../client/parameter_server_client_test.py | 405 ++++++ .../parameter_server_strategy_v2.py | 202 +++ tensorflow/tools/pip_package/BUILD | 3 + 10 files changed, 2544 insertions(+) create mode 100644 tensorflow/python/distribute/client/BUILD create mode 100644 tensorflow/python/distribute/client/client.py create mode 100644 tensorflow/python/distribute/client/client_test.py create mode 100644 tensorflow/python/distribute/client/metric_utils.py create mode 100644 tensorflow/python/distribute/client/metric_utils_test.py create mode 100644 tensorflow/python/distribute/client/parameter_server_client.py create mode 100644 tensorflow/python/distribute/client/parameter_server_client_test.py create mode 100644 tensorflow/python/distribute/parameter_server_strategy_v2.py diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index f7ec692a01f..c1b0ee6ce23 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -1760,3 +1760,21 @@ distribute_py_test( "@absl_py//absl/testing:parameterized", ], ) + +py_library( + name = "parameter_server_strategy_v2", + srcs = ["parameter_server_strategy_v2.py"], + srcs_version = "PY3", + deps = [ + ":parameter_server_strategy", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:util", + "//tensorflow/python:variables", + "//tensorflow/python/distribute:distribute_lib", + "//tensorflow/python/distribute:input_lib", + "//tensorflow/python/distribute:sharded_variable", + "//tensorflow/python/distribute:values", + ], +) diff --git a/tensorflow/python/distribute/client/BUILD b/tensorflow/python/distribute/client/BUILD new file mode 100644 index 00000000000..0f7b7df145f --- /dev/null +++ b/tensorflow/python/distribute/client/BUILD @@ -0,0 +1,104 @@ +load("//tensorflow:tensorflow.bzl", "tf_py_test") + +package( + default_visibility = ["//tensorflow:internal"], + licenses = ["notice"], # Apache 2.0 +) + +exports_files(["LICENSE"]) + +py_library( + name = "parameter_server_client", + srcs = ["parameter_server_client.py"], + srcs_version = "PY3", + deps = [ + ":client", + "//tensorflow/python/distribute:parameter_server_strategy_v2", + ], +) + +py_library( + name = "client", + srcs = ["client.py"], + srcs_version = "PY3", + deps = [ + ":metric_utils", + "//tensorflow/python:errors", + "//tensorflow/python:framework_ops", + "//tensorflow/python:func_graph", + "//tensorflow/python:resource_variable_ops", + "//tensorflow/python:training_server_lib", + "//tensorflow/python:util", + "//tensorflow/python/distribute:input_lib", + "//tensorflow/python/distribute:parameter_server_strategy_v2", + "//tensorflow/python/distribute:values", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:def_function", + "//tensorflow/python/eager:executor", + "//tensorflow/python/eager:function", + "//tensorflow/python/eager:remote", + "@absl_py//absl/logging", + "@six_archive//:six", + ], +) + +tf_py_test( + name = "client_test", + size = "small", + srcs = ["client_test.py"], + python_version = "PY3", + shard_count = 12, + deps = [ + ":client", + "//tensorflow/python:client_testlib", + "//tensorflow/python:training_lib", + "//tensorflow/python:util", + "//tensorflow/python/eager:def_function", + "@absl_py//absl/logging", + ], +) + +tf_py_test( + name = "parameter_server_client_test", + srcs = ["parameter_server_client_test.py"], + python_version = "PY3", + shard_count = 14, + tags = ["no_oss"], # TODO(b/162119374) + deps = [ + ":parameter_server_client", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:init_ops_v2", + "//tensorflow/python:training_server_lib", + "//tensorflow/python:variables", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/distribute:multi_worker_test_base", + "//tensorflow/python/distribute:sharded_variable", + "//tensorflow/python/distribute/cluster_resolver:cluster_resolver_lib", + "//tensorflow/python/eager:def_function", + "//tensorflow/python/eager:test", + ], +) + +py_library( + name = "metric_utils", + srcs = ["metric_utils.py"], + srcs_version = "PY3", + deps = [ + "//tensorflow/python/eager:monitoring", + ], +) + +tf_py_test( + name = "metric_utils_test", + srcs = ["metric_utils_test.py"], + python_version = "PY3", + deps = [ + ":client", + ":metric_utils", + "//tensorflow/python:training_server_lib", + "//tensorflow/python/distribute:multi_worker_test_base", + "//tensorflow/python/distribute/cluster_resolver:cluster_resolver_lib", + "//tensorflow/python/eager:test", + ], +) diff --git a/tensorflow/python/distribute/client/client.py b/tensorflow/python/distribute/client/client.py new file mode 100644 index 00000000000..533d5f19042 --- /dev/null +++ b/tensorflow/python/distribute/client/client.py @@ -0,0 +1,1221 @@ +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Module for `Client` and relevant cluster-worker related library. + +This is currently under development and the API is subject to change. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import contextlib +import enum +import functools +import os +import sys +import threading +import weakref +from absl import logging +from six.moves import queue +from tensorflow.python.distribute import distribute_lib +from tensorflow.python.distribute import input_lib +from tensorflow.python.distribute import parameter_server_strategy_v2 +from tensorflow.python.distribute.client import metric_utils +from tensorflow.python.eager import context +from tensorflow.python.eager import def_function +from tensorflow.python.eager import executor +from tensorflow.python.eager import function as tf_function +from tensorflow.python.eager import remote +from tensorflow.python.framework import errors +from tensorflow.python.framework import func_graph +from tensorflow.python.framework import ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.training import server_lib +from tensorflow.python.util import nest + +# Maximum time for failed worker to come back is 1 hour +_WORKER_MAXIMUM_RECOVERY_SEC = 3600 + +# Maximum size for queued closures, "infinite" if set to 0. +# When the maximum queue size is reached, further schedule calls will become +# blocking until some previously queued closures are executed on workers. +# Note that using an "infinite" queue size can take a non-trivial portion of +# memory, and even lead to client OOM. Modify the size to a smaller value for +# client with constrained memory resource (only recommended for advanced users). +# Also used in unit tests to ensure the correctness when the queue is full. +_CLOSURE_QUEUE_MAX_SIZE = 256 * 1024 + +# RPC error message from PS +_RPC_ERROR_FROM_PS = "GRPC error information from remote target /job:ps" + +# InvalidArgumentError (unknown device) will not have "GRPC error..." string. +_JOB_WORKER_STRING_IDENTIFIER = "/job:worker" + + +class _RemoteValueStatus(enum.Enum): + """The status of a `RemoteValue` object. + + A `RemoteValue` object can have three states: + 1) not ready: no value, no non-retryable error and not aborted; + 2) aborted: i.e. the execution of function was aborted because of task + failure, but can be retried; + 3) ready: i.e. has value or has non-tryable error; + + The initial state of a `RemoteValue` is "not ready". When its corresponding + closure has + been executed at least once, it will become aborted or ready. The state + transitions are: + 1) not ready -> 2) aborted: + when the corresponding closure is aborted due to worker failure, and the + worker failure is not immediately handled. + 1) not ready -> 3) ready: + when the corresponding closure has been executed successfully. + 2) aborted -> 3) ready: + when the `RemoteValue` is rebuilt by rerunning the corresponding closure + and the closure has been executed successfully. + 3) ready -> 2) aborted: + when the corresponding closure had been executed successfully but later + the corresponding remote worker failed. This is currently only implemented + for resource `RemoteValue` like iterators. + """ + NOT_READY = "NOT_READY" + ABORTED = "ABORTED" + READY = "READY" + + +class RemoteValue(object): + """An asynchronously available value of a remotely executed function. + + `RemoteValue` class is used as the return value of `Client.schedule()` where + the underlying concrete value comes at a later time once the function has been + remotely executed. `RemoteValue` can be used as an input to a subsequent + function scheduled with `Client.schedule()`. + + Note: this class is not thread-safe. + """ + + def __init__(self, closure, type_spec): + self._closure = closure + # The type spec for this `RemoteValue` which is used to trace functions that + # take this `RemoteValue` as input. + self._type_spec = func_graph.convert_structure_to_signature(type_spec) + self._value = None + self._error = None + self._status_available_event = threading.Event() + self._status = _RemoteValueStatus.NOT_READY + + def _set_aborted(self): + self._status = _RemoteValueStatus.ABORTED + self._value = None + self._error = None + + # Wake up any waiting thread and clear the event. + self._status_available_event.set() + + def _rebuild_on(self, worker): + self._status_available_event.clear() + # TODO(yuefengz): we may need to rebuild its inputs as well. + self._closure.execute_on(worker) + + def _set_value(self, value): + self._status = _RemoteValueStatus.READY + self._value = value + self._error = None + self._status_available_event.set() + + def _set_error(self, exception): + self._status = _RemoteValueStatus.READY + self._value = None + self._error = exception + self._status_available_event.set() + + def _get_value(self): + self._status_available_event.wait() + return self._value + + def _get_error(self): + self._status_available_event.wait() + return self._error + + def _set_type_spec(self, type_spec): + self._type_spec = func_graph.convert_structure_to_signature(type_spec) + + def fetch(self): + """Wait for the result of RemoteValue to be ready and return the result. + + Returns: + The remote value, as a numpy data type (if scalar) or ndarray. + + Raises: + FunctionRetryableError: If the function that produces this `RemoteValue` + is aborted or cancelled due to failure, and the user should handle and + reschedule. + """ + self._status_available_event.wait() + if self._status is _RemoteValueStatus.ABORTED: + raise FunctionRetryableError( + "The corresponding function is aborted. Please reschedule the " + "function.") + if self._error is not None: + raise self._error # pylint: disable=raising-bad-type + else: + if isinstance(self._value, + (ops.Tensor, resource_variable_ops.BaseResourceVariable)): + return self._value.numpy() + else: + return self._value + + +class InputError(Exception): + + def __init__(self, original_exception): + message = ("Input has an error, the original exception is %r, " + "error message is %s." % + (original_exception, str(original_exception))) + super().__init__(message) + + +class FunctionRetryableError(Exception): + """An error that represents the closure was aborted and should be retried.""" + pass + + +def _maybe_get_error_and_rebuild_remote_values(worker, structure): + """Attempts to return errors from `RemoteValue`s. Rebuilds them if needed.""" + errors_in_structure = [] + + def _get_error(val): + if isinstance(val, RemoteValue): + if val._status is _RemoteValueStatus.ABORTED: # pylint: disable=protected-access + with worker.failure_handler.wait_on_failure( + on_recovery_fn=functools.partial(val._rebuild_on, worker), # pylint: disable=protected-access + worker_device_name=worker.device_name): + val._rebuild_on(worker) # pylint: disable=protected-access + error = val._get_error() # pylint: disable=protected-access + if error: + errors_in_structure.append(error) + + nest.map_structure(_get_error, structure) + if errors_in_structure: + return errors_in_structure[0] + else: + return None + + +def _maybe_get_remote_value(val): + """Gets the value of `val` if it is a `RemoteValue`.""" + if isinstance(val, RemoteValue): + error = val._get_error() # pylint: disable=protected-access + if error: + raise AssertionError( + "RemoteValue doesn't have a value because it has errors.") + else: + return val._get_value() # pylint: disable=protected-access + else: + return val + + +def _maybe_as_type_spec(val): + if isinstance(val, RemoteValue): + if val._type_spec is None: # pylint: disable=protected-access + raise ValueError("Output of a scheduled function that is not " + "tf.function cannot be the input of another function.") + return val._type_spec # pylint: disable=protected-access + else: + return val + + +class PerWorkerValues(object): + """Holds a list of per worker values.""" + + def __init__(self, values): + self._values = tuple(values) + + +class Closure(object): + """Hold a function to be scheduled and its arguments.""" + + def __init__(self, function, args=None, kwargs=None): + if not callable(function): + raise ValueError("Function passed to `Client.schedule` must be a " + "callable object.") + self._args = args or () + self._kwargs = kwargs or {} + self._function = function + + if isinstance(function, def_function.Function): + replica_args = self._select_worker_slice(0, self._args) + replica_kwargs = self._select_worker_slice(0, self._kwargs) + + # Note: no need to handle function registration failure since this kind of + # failure will not raise exceptions as designed in the runtime. The client + # has to rely on subsequent operations that raise to catch function + # registration failure. + + # Record the function tracing overhead. Note that we pass in the tracing + # count of the def_function.Function as a state tracker, so that metrics + # will only record the time for actual function tracing (i.e., excluding + # function cache lookups). + with metric_utils.monitored_timer( + "function_tracing", state_tracker=function._get_tracing_count): # pylint: disable=protected-access + concrete_function = function.get_concrete_function( + *nest.map_structure(_maybe_as_type_spec, replica_args), + **nest.map_structure(_maybe_as_type_spec, replica_kwargs)) + self._output_remote_values = nest.map_structure( + lambda x: RemoteValue(self, x), concrete_function.structured_outputs) + elif isinstance(function, tf_function.ConcreteFunction): + self._output_remote_values = nest.map_structure( + lambda x: RemoteValue(self, x), function.structured_outputs) + else: + # Regular python functions. + # TODO(yuefengz): maybe we should trace python functions if their inputs + # are Python primitives, tensors and composite tensors. + self._output_remote_values = RemoteValue(self, None) + + def _select_worker_slice(self, worker_id, structured): + """Selects the worker slice of each of the items in `structured`.""" + + def _get(x): + return x._values[worker_id] if isinstance(x, PerWorkerValues) else x # pylint: disable=protected-access + + return nest.map_structure(_get, structured) + + def _fetch_output_remote_values(self): + """Temporary method used to sync the scheduler.""" + # It will do nothing if there is no return value. + nest.map_structure(lambda x: x.fetch(), self._output_remote_values) # pylint: disable=protected-access + + def _set_output_remote_values_aborted(self): + """Set output remote_value aborted.""" + # It will do nothing if there is no return value. + nest.map_structure(lambda x: x._set_aborted(), self._output_remote_values) # pylint: disable=protected-access + + def _set_output_remote_values_cancelled(self): + nest.map_structure( + lambda x: x._set_error( # pylint: disable=protected-access,g-long-lambda + FunctionRetryableError("The corresponding function is " + "cancelled. Please reschedule the " + "function.")), + self._output_remote_values) # pylint: disable=protected-access + + def execute_on(self, worker): + """Executes the closure on the given worker. + + Args: + worker: a `Worker` object. + """ + replica_args = self._select_worker_slice(worker.worker_index, self._args) + replica_kwargs = self._select_worker_slice(worker.worker_index, + self._kwargs) + + e = ( + _maybe_get_error_and_rebuild_remote_values(worker, replica_args) or + _maybe_get_error_and_rebuild_remote_values(worker, replica_kwargs)) + if e: + if not isinstance(e, InputError): + e = InputError(e) + for remote_value in nest.flatten(self._output_remote_values): + remote_value._set_error(e) # pylint: disable=protected-access + return + + with ops.device(worker.device_name): + with context.executor_scope(worker.executor): + with metric_utils.monitored_timer("closure_execution"): + output_value = self._function( + *nest.map_structure(_maybe_get_remote_value, replica_args), + **nest.map_structure(_maybe_get_remote_value, replica_kwargs)) + for remote_value, value in zip( + nest.flatten(self._output_remote_values), nest.flatten(output_value)): + remote_value._set_value(value) # pylint: disable=protected-access + + +class _CoordinatedClosureQueue(object): + """Manage a queue of closures, inflight count and errors from execution. + + This class is thread-safe. + """ + + def __init__(self): + + # `self._inflight_closure_count` only tracks the number of inflight closures + # that are "in generation". Once an error occurs, error generation is + # incremented and all subsequent arriving closures (from inflight) are + # considered "out of generation". + self._inflight_closure_count = 0 + + self._queue_lock = threading.Lock() + # Condition indicating that all pending closures (either queued or inflight) + # have been processed, failed, or cancelled. + self._stop_waiting_condition = threading.Condition(self._queue_lock) + # Condition indicating that an item becomes available in queue (not empty). + self._closures_queued_condition = threading.Condition(self._queue_lock) + # Condition indicating that a queue slot becomes available (not full). + # Note that even with "infinite" queue size, there is still a "practical" + # size limit for the queue depending on host memory capacity, and thus the + # queue will eventually become full with a lot of enqueued closures. + self._queue_free_slot_condition = threading.Condition(self._queue_lock) + + if _CLOSURE_QUEUE_MAX_SIZE <= 0: + logging.warning( + "In ParameterServerClient, creating an infinite closure queue can " + "consume a significant amount of memory and even lead to OOM.") + self._queue = queue.Queue(maxsize=_CLOSURE_QUEUE_MAX_SIZE) + self._error = None + + # Error generation is a counter that helps us track whether a closure + # should be cancelled when it is being put back to `self._queue`. It works + # in the following way: + # 1) Error generation starts off at 0. + # 2) When a worker thread calls `get()`, the closure's error generation + # is copied from this queue's error generation. + # 3) If any worker thread experiences an error that's categorized as a + # non-retryable error, the queue's error will be set, error generation + # increments by 1, and the queue is cleared (with the closures marked + # with cancelled error), so other worker threads stop getting closures + # from the queue. Worker preemption is categorized as a retryable error. + # 4) At this point, if `put()` or `wait()` is called (usually by the main + # thread via `schedule` and `join`), the error is raised through that + # call. + # 5) The closures that are inflight, i.e. that are being executed remotely, + # will not be aware of such error event. If the worker that's executing + # the closure happens to be interrupted, the closure should not be put + # back to the queue, and be cancelled with error instead. Checking the + # generation id of the closure and queue is how the worker thread tells + # whether the closure should be put back. Likewise for `mark_finished` + # and `mark_failed`: if the arriving closure is considered out of + # generation in those two methods, it is simply discarded (the inflight + # closure count still decrements). + self._error_generation = 0 + + # The following is a lock to make sure when `wait` is called and before it + # returns no `put` can be executed during this period. It is because `wait` + # won't know what to do with newly put closures. This lock adds an cutoff + # for `wait` so that closures put into the queue while waiting would not be + # taken responsible by this `wait`. + # + # We cannot reuse the `self._queue_lock` since when `wait` waits for a + # condition, the `self._queue_lock` will be released. + # + # We don't use a reader/writer's lock on purpose to reduce the complexity + # of the code. + self._put_wait_lock = threading.Lock() + + def _cancel_closures_in_queue(self): + """Clears the queue and sets remaining closures cancelled error. + + This method expects self._queue_lock to be held prior to entry. + """ + while True: + try: + closure = self._queue.get(block=False) + self._queue_free_slot_condition.notify() + closure._set_output_remote_values_cancelled() # pylint: disable=protected-access + except queue.Empty: + break + + def _raise_if_error(self): + """Raises the error if one exists. + + If an error exists, cancel the closures in queue, raises it, and clear + the error. + + This method expects self._queue_lock to be held prior to entry. + """ + if self._error: + try: + self._cancel_closures_in_queue() + raise self._error # pylint: disable=raising-bad-type + finally: + self._error = None + + def put(self, closure): + """Put a closure into the queue for later execution. + + If `mark_failed` was called before `put`, the error from the first + invocation of `mark_failed` will be raised. + + Args: + closure: The `Closure` to put into the queue. + """ + with self._put_wait_lock, self._queue_lock: + self._queue_free_slot_condition.wait_for(lambda: not self._queue.full()) + self._queue.put(closure, block=False) + self._raise_if_error() + self._closures_queued_condition.notify() + + def get(self, timeout=None): + """Return a closure from the queue to be executed.""" + with self._queue_lock: + while self._queue.empty(): + if not self._closures_queued_condition.wait(timeout=timeout): + return None + closure = self._queue.get(block=False) + self._queue_free_slot_condition.notify() + closure._error_generation = self._error_generation # pylint: disable=protected-access + self._inflight_closure_count += 1 + return closure + + def mark_finished(self, closure): + """Let the queue know that a closure has been successfully executed.""" + with self._queue_lock: + if self._inflight_closure_count < 1: + raise AssertionError("There is no inflight closures to mark_finished.") + self._inflight_closure_count -= 1 + if self._queue.empty() and self._inflight_closure_count == 0: + self._stop_waiting_condition.notifyAll() + + def put_back(self, closure): + """Put the closure back into the queue as it was not properly executed.""" + with self._queue_lock: + if self._inflight_closure_count < 1: + raise AssertionError("There is no inflight closures to put_back.") + self._inflight_closure_count -= 1 + if closure._error_generation < self._error_generation: # pylint: disable=protected-access + # If the closure to put back is out of generation, cancel the closure + # and ignore it. + logging.info("Function %r should no longer be dispatched; marking " + "as cancelled.") + closure._set_output_remote_values_cancelled() # pylint: disable=protected-access + return + self._queue_free_slot_condition.wait_for(lambda: not self._queue.full()) + self._queue.put(closure, block=False) + self._closures_queued_condition.notify() + + def wait(self, timeout=None): + """Wait for all closures to be finished before returning. + + If `mark_failed` was called before or during `wait`, the error from the + first invocation of `mark_failed` will be raised. + + Args: + timeout: A float specifying a timeout for the wait in seconds. + + Returns: + True unless the given timeout expired, in which case it returns False. + """ + with self._put_wait_lock, self._queue_lock: + while (not self._error and + (not self._queue.empty() or self._inflight_closure_count > 0)): + if not self._stop_waiting_condition.wait(timeout=timeout): + return False + self._raise_if_error() + return True + + def mark_failed(self, e, closure): + """Sets error and unblocks any wait() call.""" + with self._queue_lock: + # TODO(yuefengz): maybe record all failure and give users more + # information? + if self._inflight_closure_count < 1: + raise AssertionError("There is no inflight closures to mark_failed.") + self._inflight_closure_count -= 1 + if closure._error_generation < self._error_generation: # pylint: disable=protected-access + # If the closure to mark fail is out of generation, simply ignore it + # (with the actual error associated with the closure preserved). + return + assert self._error is None + self._error = e + self._error_generation += 1 + self._cancel_closures_in_queue() + self._stop_waiting_condition.notifyAll() + + def done(self): + """Returns true if the queue is empty and there is no inflight closure. + + If `mark_failed` was called before `done`, the error from the first + invocation of `mark_failed` will be raised. + """ + with self._queue_lock: + self._raise_if_error() + return self._queue.empty() and self._inflight_closure_count == 0 + + +class WorkerPreemptionHandler(object): + """Handles worker preemptions.""" + + def __init__(self, server_def): + self._server_def = server_def + self._cluster_update_lock = threading.Lock() + self._cluster_due_for_update = threading.Event() + self._worker_up_cond = threading.Condition(self._cluster_update_lock) + threading.Thread(target=self._preemption_handler, + name="WorkerPreemptionHandler", + daemon=True).start() + + def _validate_preemption_failure(self, e): + """Validates that the given exception represents worker preemption.""" + if _is_worker_failure(e): + return + raise e + + @contextlib.contextmanager + def wait_on_failure(self, + on_failure_fn=None, + on_recovery_fn=None, + worker_device_name="(unknown)"): + """Catches worker preemption error and wait until failed workers are back. + + Args: + on_failure_fn: an optional function to run if preemption happens. + on_recovery_fn: an optional function to run when a worker is recovered + from preemption. + worker_device_name: the device name of the worker instance that is passing + through the failure. + + Yields: + None. + """ + try: + yield + except errors.OpError as e: + self._validate_preemption_failure(e) + logging.error("Worker %s failed with error: %s", worker_device_name, e) + if on_failure_fn: + on_failure_fn() + + with self._cluster_update_lock: + self._cluster_due_for_update.set() + self._worker_up_cond.wait(_WORKER_MAXIMUM_RECOVERY_SEC) + logging.info("Worker %s has been recovered.", worker_device_name) + + if on_recovery_fn: + with self.wait_on_failure( + on_recovery_fn=on_recovery_fn, + worker_device_name=worker_device_name): + on_recovery_fn() + + def _preemption_handler(self): + """A loop that handles preemption. + + This loop waits for signal of worker preemption and upon worker preemption, + it waits until all workers are back and updates the cluster about the + restarted workers. + """ + while True: + self._cluster_due_for_update.wait() + with self._cluster_update_lock: + try: + # TODO(haoyuzhang): support partial cluster recovery + logging.info("Cluster now being recovered.") + context.context().update_server_def(self._server_def) + + # Cluster updated successfully, clear the update signal, and notify + # all workers that they are recovered from failure. + logging.info("Cluster successfully recovered.") + self._worker_up_cond.notify_all() + self._cluster_due_for_update.clear() + except Exception as e: # pylint: disable=broad-except + self._validate_preemption_failure(e) + # NOTE: Since the first RPC (GetStatus) of update_server_def is + # currently blocking by default, error should only happen if: + # (1) More workers failed while waiting for the previous workers to + # come back; + # (2) Worker failed when exchanging subsequent RPCs after the first + # RPC returns. + # Consider adding backoff retry logic if we see the error logged + # too frequently. + logging.error("Cluster update failed with error: %s. Retrying...", e) + + +class Worker(object): + """A worker in a cluster. + + Attributes: + worker_index: The index of the worker in the cluster. + device_name: The device string of the worker, e.g. "/job:worker/task:1". + executor: The worker's executor for remote function execution. + failure_handler: The failure handler used to handler worker preemption + failure. + """ + + def __init__(self, worker_index, device_name, cluster): + self.worker_index = worker_index + self.device_name = device_name + self.executor = executor.new_executor(enable_async=False) + self.failure_handler = cluster.failure_handler + self._cluster = cluster + self._resource_remote_value_refs = [] + + # Worker threads need to start after `Worker`'s initialization. + threading.Thread(target=self._process_queue, + name="WorkerClosureProcessingLoop-%d" % self.worker_index, + daemon=True).start() + + def _set_resources_aborted(self): + # TODO(yuefengz): maybe we can query whether a tensor is valid or not + # instead of marking a tensor aborted? + for weakref_resource in self._resource_remote_value_refs: + resource = weakref_resource() + if resource: + resource._set_aborted() # pylint: disable=protected-access + + def _set_dead(self): + raise NotImplementedError("_set_dead is not implemented.") + + def _process_closure(self, closure): + """Runs a closure with preemption handling.""" + try: + with self._cluster.failure_handler.wait_on_failure( + on_failure_fn=lambda: self._cluster._closure_queue.put_back(closure), # pylint: disable=protected-access + on_recovery_fn=self._set_resources_aborted, + worker_device_name=self.device_name): + closure.execute_on(self) + # TODO(yuefengz): we don't have to materialize results every step. + with metric_utils.monitored_timer("remote_value_fetch"): + closure._fetch_output_remote_values() # pylint: disable=protected-access + self._cluster._closure_queue.mark_finished(closure) # pylint: disable=protected-access + except Exception as e: # pylint: disable=broad-except + logging.error( + "/job:worker/task:%d encountered the following error when processing " + "closure: %r:%s", self.worker_index, e, e) + nest.map_structure( + lambda x: x._set_error(e), # pylint: disable=protected-access + closure._output_remote_values) # pylint: disable=protected-access + self._cluster._closure_queue.mark_failed(e, closure) # pylint: disable=protected-access + + def _process_queue(self): + while True: + closure = self._cluster._closure_queue.get() # pylint: disable=protected-access + self._process_closure(closure) + + def _create_resource(self, function, args=None, kwargs=None): + """Synchronously creates a per-worker resource represented by a `RemoteValue`. + + Args: + function: the resource function to be run remotely. It should be a + `tf.function`, a concrete function or a Python function. + args: positional arguments to be passed to the function. + kwargs: keyword arguments to be passed to the function. + + Returns: + one or several RemoteValue objects depending on the function return + values. + """ + # Some notes about the concurrency: currently all the activities related to + # the same worker such as creating resources, setting resources' aborted + # status, and executing closures happen on the same thread. This allows us + # to have simpler logic of concurrency. + closure = Closure(function=function, args=args, kwargs=kwargs) + resource_remote_value = closure._output_remote_values # pylint: disable=protected-access + self._register_resource(resource_remote_value) + + # The following is a short-term solution to lazily create resources in + # parallel. + # TODO(b/160343165): we should create resources eagerly, i.e. schedule the + # resource creation function as soon as users call this method. + resource_remote_value._set_aborted() # pylint: disable=protected-access + return resource_remote_value + + def _register_resource(self, resource_remote_value): + if not isinstance(resource_remote_value, RemoteValue): + raise ValueError( + "Resource being registered is not of type `RemoteValue`.") + self._resource_remote_value_refs.append(weakref.ref(resource_remote_value)) + + +class Cluster(object): + """A cluster with workers. + + We assume all function errors are fatal and based on this assumption our + error reporting logic is: + 1) Both `schedule` and `join` can raise a non-retryable error which is the + first error seen by the client from any previously scheduled functions. + 2) When an error is raised, there is no guarantee on how many previously + scheduled functions have been executed; functions that have not been executed + will be thrown away and marked as cancelled. + 3) After an error is raised, the internal state of error will be cleared. + I.e. functions can continue to be scheduled and subsequent calls of `schedule` + or `join` will not raise the same error again. + + Attributes: + failure_handler: The failure handler used to handler worker preemption + failure. + workers: a list of `Worker` objects in the cluster. + """ + + def __init__(self, cluster_resolver, client_name="chief"): + """Initializes the cluster instance and connect to the remote cluster.""" + if client_name in ["worker", "ps"]: + raise ValueError("Client name should not be 'worker' or 'ps'.") + cluster_spec = cluster_resolver.cluster_spec() + + self._num_workers = len(cluster_spec.as_dict().get("worker", ())) + self._num_ps = len(cluster_spec.as_dict().get("ps", ())) + device_filters = server_lib.ClusterDeviceFilters() + # For any worker, only the devices on PS and chief nodes are visible + for i in range(self._num_workers): + device_filters.set_device_filters( + "worker", i, ["/job:ps", "/job:%s" % client_name]) + # Similarly for any ps, only the devices on workers and chief are visible + for i in range(self._num_ps): + device_filters.set_device_filters( + "ps", i, ["/job:worker", "/job:%s" % client_name]) + + context.context().mirroring_policy = context.MIRRORING_ALL + # Allow at most one outstanding RPC for each worker at a certain time. This + # is to simplify worker failure handling in the runtime + os.environ["TF_ENABLE_EAGER_CLIENT_STREAMING_ENQUEUE"] = "False" + remote.connect_to_cluster(cluster_spec, + job_name=client_name, + protocol=cluster_resolver.rpc_layer, + cluster_device_filters=device_filters) + + self._closure_queue = _CoordinatedClosureQueue() + self.failure_handler = WorkerPreemptionHandler(context.get_server_def()) + worker_device_strings = [ + "/job:worker/replica:0/task:%d" % i for i in range(self._num_workers) + ] + self.workers = [ + Worker(i, w, self) for i, w in enumerate(worker_device_strings) + ] + + def schedule(self, function, args, kwargs): + """Schedules `function` to be dispatched to a worker for execution. + + Args: + function: The function to be dispatched to a worker for execution + asynchronously. + args: Positional arguments for `fn`. + kwargs: Keyword arguments for `fn`. + + Returns: + A structure of `RemoteValue` object. + """ + closure = Closure(function=function, args=args, kwargs=kwargs) + self._closure_queue.put(closure) + return closure._output_remote_values # pylint: disable=protected-access + + def join(self): + """Blocks until all scheduled functions are executed.""" + self._closure_queue.wait() + + def done(self): + """Returns true if all scheduled functions are executed.""" + return self._closure_queue.done() + + +class ParameterServerFailureError(Exception): + """An error representing at least one parameter server is interrupted.""" + pass + + +class Client(object): + """An object to schedule and orchestrate remote function execution. + + A `Client` object represents a program used to create dataset, schedule + functions to be executed, and fetch the results of the functions. Operations + that will involve other tasks in the cluster, such as variable creation, + reading variables etc., should be performed within `client.context()`. + + Currently, `Client` is not supported to be used in a standalone manner. + It should be used in conjunction with `ParameterServerStrategyV2`. The + recommended way of using the combination is through a `ParameterServerClient` + object. Please see `ParameterServerClient` for more information. + + This is currently under development, and the API as well as implementation + is subject to changes. + """ + + def __init__(self, strategy): + """Initialization of a `Client` instance. + + This connects the client to remote workers and parameter servers, through + a `tf.config.experimental_connect_to_cluster` call. + + Args: + strategy: a `tf.distribute.Strategy` object. Currently, only + `ParameterServerStrategyV2` is supported. + + Raises: + ValueError: if the strategy being used is not supported. + """ + if not isinstance(strategy, + parameter_server_strategy_v2.ParameterServerStrategyV2): + raise ValueError("Only `ParameterServerStrategyV2` is supported in " + "`Client` currently.") + self._strategy = strategy + self.cluster = Cluster(strategy._cluster_resolver) + + @contextlib.contextmanager + def context(self): + """Context manager under which client distribution is in effect. + + All distribution related methods using this `Client`, including those that + create and update variables, should be used within this context. This + context manager handles cluster fault tolerance in remote function + execution. + + The context manager calls `join` automatically when exiting successfully. + + Entering `Client.context` also enters the underlying strategy's scope, and + this means that `tf.distribute.get_strategy()` will return the strategy + object being used. + + Yields: + Nothing. + """ + with self._strategy.scope(), self._handle_parameter_server_failure(): + yield + self.join() + + @contextlib.contextmanager + def experimental_variable_partitioning_scope(self): + with self._strategy.experimental_variable_partitioning_scope(): + yield + + (experimental_variable_partitioning_scope.__doc__) = ( + parameter_server_strategy_v2.ParameterServerStrategyV2 + .experimental_variable_partitioning_scope.__doc__) + + def schedule(self, fn, args=None, kwargs=None): + """Schedules `fn` to be dispatched to a worker for execution asynchronously. + + When calling `schedule` with a function `fn`, `fn` will be executed on a + remote worker at some later time. The process is asynchronous, meaning + `schedule` returns immediately, possibly without having the result ready + yet. `schedule` returns a structure of `RemoteValue` object, which wraps the + output of the function. Call `fetch()` on `RemoteValue` to wait for the + function execution to finish and retrieve its output from the remote worker. + + `schedule` guarantees that `fn` will be executed on a worker at least once; + it could be more than once if a worker fails and restarts in the middle of + function scheduling. Note that since worker can fail at any point when + executing the function, it is possible that the function is partially + executed, but `Client` guarantees that in those events, the function will + eventually be fully executed, possibly on a different worker that is + available. + + If any previously scheduled function raises an error, `schedule` will fail + by raising any one of those errors, and clear the errors collected so far. + There are two implications when this happens: 1) user should call `schedule` + with `fn` again to re-schedule, and 2) some of the previously scheduled + functions may no longer execute. User can call `fetch` on the returned + `RemoteValue` to inspect if they have executed, failed, or cancelled, and + reschedule the corresponding function if needed. + + When `schedule` raises, it is possible that there are still functions being + executed on workers, at the time `schedule` raises. When this happens, users + can call `join` again to wait for all pending async function execution to + finish, and bring the cluster into a consistent state. + + At this time, there is no support of worker assignment for function + execution, or priority of the workers. + + `args` and `kwargs` are the arguments passed into `fn`, when `fn` is + executed on a worker. They can be `PerWorkerValues`, which is a collection + of values, each of which represents a component specific to a worker; in + this case, the argument will be substituted with the corresponding component + on the target worker. Arguments that are not `PerWorkerValues` will be + passed into `fn` as-is. + + Args: + fn: A `tf.function`; the function to be dispatched to a worker for + execution asynchronously. + args: Positional arguments for `fn`. + kwargs: Keyword arguments for `fn`. + + Returns: + A structure of `RemoteValue` object. + + Raises: + Exception: one of the exceptions caught by the client by any previously + scheduled function since the last time an error was thrown or since + the beginning of the program. + """ + # TODO(b/160702436): Invoke `strategy.run` for user's function so it enters + # a `ReplicaContext` in a logically correct way. + with distribute_lib.ReplicaContext( + self._strategy, replica_id_in_sync_group=0): + with self._translate_parameter_server_failure(): + return self.cluster.schedule(fn, args=args, kwargs=kwargs) + + def join(self): + """Blocks until all the scheduled functions have finished execution. + + If any previously scheduled function raises an error, `join` will fail by + raising any one of those errors, and clear the errors collected so far. If + this happens, some of the previously scheduled functions may no longer + execute. Users can call `fetch` on the returned `RemoteValue` to inspect if + they have executed, failed, or cancelled. If some that have been cancelled + need to be rescheduled, users should call `schedule` with the function + again. + + Note: `join` raises an exception as soon as the client detects one, and this + means it is possible that there are still functions being executed on + workers, at the time `join` raises. When this happens, users can call `join` + again to wait for all pending async function execution to finish, and bring + the cluster into a consistent state. + + Raises: + Exception: one of the exceptions caught by the client by any previously + scheduled function since the last time an error was thrown or since + the beginning of the program. + """ + # TODO(b/159486639): Update the docs once we can cancel the functions being + # executed on workers, that when `join` returns, the system is stabilized. + with self._translate_parameter_server_failure(): + self.cluster.join() + + def done(self): + """Returns whether all the scheduled functions have finished execution. + + If any previously scheduled function raises an error, `done` will fail by + raising any one of those errors. + """ + return self.cluster.done() + + def create_per_worker_dataset(self, dataset_fn): + """Create dataset on workers by calling `dataset_fn` on worker devices. + + This creates the given dataset generated by dataset_fn on the workers + and returns an object that represents the collection of those individual + datasets. Calling `iter` on such collection of dataset returns a + `PerWorkerValues`, which is a collection of iterators, where the iterators + have been placed on respective workers. + + Calling `next` on this `PerWorkerValues` of iterators is currently + unsupported; it is meant to be passed as an argument into `Client.schedule`. + When the scheduled function is picked up and being executed by a worker, the + function will receive the individual iterator that corresponds to the + worker, and now `next` can be called on iterator to get the next (batch or + example) of data. + + Dataset shuffling and repeating are usually needed in `dataset_fn`; however, + sharding is not recommended: some worker may not be available and those + examples may be skipped and not covered by other workers, if the dataset is + sharded. + + Args: + dataset_fn: The dataset function that returns a dataset. This is to be + executed on the workers. + + Returns: + An object that represents the collection of those individual + datasets. `iter` is expected to be called on this object that returns + a `PerWorkerValues` of the iterators (that are on the workers). + """ + input_workers = input_lib.InputWorkers([ + (w.device_name, [w.device_name]) for w in self.cluster.workers + ]) + + return _PerWorkerDistributedDataset(dataset_fn, input_workers, self) + + def _create_per_worker_resources(self, fn, args=None, kwargs=None): + """Synchronously create resources on the workers. + + The resources are represented by `RemoteValue`s. + + Args: + fn: The function to be dispatched to all workers for execution + asynchronously. + args: Positional arguments for `fn`. + kwargs: Keyword arguments for `fn`. + + Returns: + A `PerWorkerValues` object, which wraps a tuple of `RemoteValue` objects. + """ + results = [] + for w in self.cluster.workers: + results.append(w._create_resource(fn, args=args, kwargs=kwargs)) # pylint: disable=protected-access + return PerWorkerValues(tuple(results)) + + def fetch(self, val): + """Blocking call to fetch results from `RemoteValue`s. + + This returns the execution result of `RemoteValue`s; if not ready, + waiting for it while blocking the caller. + + Args: + val: The value to fetch the results from. If this is structure of + `RemoteValue`, `fetch()` will be called on the individual `RemoteValue` + to get the result. + + Returns: + If `val` is a `RemoteValue` or a structure of `RemoteValue`s, returns + the fetched `RemoteValue` value immediately if it's available, or blocks + the call until it's available, and returns the fetched `RemoteValue` + values with the same structure. If `val` is other types, return (`val`,). + """ + + def _maybe_fetch(val): + if isinstance(val, RemoteValue): + return val.fetch() + else: + return val + + # TODO(yuefengz): we should fetch values in a batch. + result = nest.map_structure(_maybe_fetch, val) + if not isinstance(result, tuple): + return (result,) + return result + + # pylint: disable=missing-function-docstring + @contextlib.contextmanager + def _translate_parameter_server_failure(self): + try: + yield + except Exception as e: # pylint: disable=broad-except + if _is_ps_failure(e): + logging.exception("Encountered parameter server failures!") + raise ParameterServerFailureError(e) + else: + raise + + # pylint: disable=missing-function-docstring + @contextlib.contextmanager + def _handle_parameter_server_failure(self): + try: + with self._translate_parameter_server_failure(): + yield + except ParameterServerFailureError as e: # pylint: disable=broad-except + restart_exit_code = os.environ.get( + "TF_CLIENT_NON_FATAL_RESTART_EXIT_CODE", None) + if restart_exit_code is not None: + sys.exit(int(restart_exit_code)) + else: + raise + + +class _PerWorkerDistributedDataset(object): # pylint: disable=protected-access + """Represents worker-distributed datasets created from dataset function.""" + + def __init__(self, dataset_fn, input_workers, client): + """Makes an iterable from datasets created by the given function. + + Args: + dataset_fn: A function that returns a `Dataset`. + input_workers: an `InputWorkers` object. + client: a `Client` object, used to create dataset resources. + """ + def disallow_variable_creation(next_creator, **kwargs): + raise ValueError("Creating variables in `dataset_fn` is not allowed.") + + if isinstance(dataset_fn, def_function.Function): + with variable_scope.variable_creator_scope(disallow_variable_creation): + self._dataset_fn = dataset_fn.get_concrete_function() + elif isinstance(dataset_fn, tf_function.ConcreteFunction): + self._dataset_fn = dataset_fn + else: + with variable_scope.variable_creator_scope(disallow_variable_creation): + self._dataset_fn = def_function.function( + dataset_fn).get_concrete_function() + self._input_workers = input_workers + self._client = client + self._element_spec = None + + def __iter__(self): + # We would like users to create iterators outside `tf.function`s so that we + # can track them. + if (not context.executing_eagerly() or + ops.get_default_graph().building_function): + raise RuntimeError( + "__iter__() is not supported inside of tf.function or in graph mode.") + + def _create_per_worker_iterator(): + dataset = self._dataset_fn() + return iter(dataset) + + # If _PerWorkerDistributedDataset.__iter__ is called multiple + # times, for the same object it should only create and register resource + # once. Using object id to distinguish different iterator resources. + per_worker_iterator = self._client._create_per_worker_resources( + _create_per_worker_iterator) + + # Create an iterator, so the consumer function of this iterator can start + # tracing using this iterator without needing to wait for the completion of + # the iterater creation. Note: the iterator shouldn't use memory until it is + # consumed. + # TODO(b/154675763): get rid of this workaround once we can make input_fn a + # tf.function. + iterator = _create_per_worker_iterator() + for iterator_remote_value in per_worker_iterator._values: + iterator_remote_value._set_type_spec(iterator._type_spec) + return _PerWorkerDistributedIterator(per_worker_iterator._values) + + @property + def element_spec(self): + """The type specification of an element of this dataset.""" + raise NotImplementedError("Passing `AsyncDistributedDataset` to a " + "tf.function is not supported.") + + +class _PerWorkerDistributedIterator(PerWorkerValues): + """Distributed iterator for `Client`.""" + + def __next__(self): + return self.get_next() + + def get_next(self, name=None): + """Returns the next input from the iterator for all replicas.""" + raise NotImplementedError("Iterating over an `AsyncDistributedIterator` " + "is not supported right now.") + + +def _is_ps_failure(error): + """Whether the error is considered a parameter server failure.""" + if (_RPC_ERROR_FROM_PS in str(error) or + (isinstance(error, errors.InvalidArgumentError) and + "/job:ps" in str(error))): + return True + + +def _is_worker_failure(error): + """Whether the error is considered a worker failure.""" + if _JOB_WORKER_STRING_IDENTIFIER not in str(error): + return False + if _RPC_ERROR_FROM_PS in str(error): + return False + + # TODO(haoyuzhang): Consider using special status code if error from a + # remote is derived from RPC errors originated from other hosts. + if isinstance(error, (errors.UnavailableError, errors.AbortedError)): + return True + + # The following error could happen when the remote task fails and restarts + # in a very short interval during which no RPCs were exchanged to detect the + # failure. In that case, gRPC allows channel (which is different from a + # connection) to be reused for a replaced server listening to same address. + if isinstance(error, errors.InvalidArgumentError): + if ("Unable to find a context_id" in str(error) or + "unknown device" in str(error) or + "Unable to find the relevant tensor remote_handle" in str(error)): + # TODO(b/159961667): Fix "Unable to find the relevant tensor + # remote_handle" part. + return True + + # TODO(b/162541228): The following 3 types of errors are very rare and only + # observed in large-scale testing. The types of errors should be reduced. + # This error could show up when copying function inputs from remote tasks. + if isinstance(error, errors.InternalError): + if ("Failed copying input tensor" in str(error) or + "Unable to find a context_id" in str(error)): + return True + + # This could happen when the function registration fails. In the observed + # cases this only happens to the dataset related functions. + if isinstance(error, errors.NotFoundError): + if ("is neither a type of a primitive operation nor a name of a function " + "registered" in str(error)): + return True + + # This could happen when the iterator is no longer valid on the remote worker + # "Resource input tensor contains an invalid device" + if isinstance(error, errors.CancelledError): + return True + + return False diff --git a/tensorflow/python/distribute/client/client_test.py b/tensorflow/python/distribute/client/client_test.py new file mode 100644 index 00000000000..12152407c5d --- /dev/null +++ b/tensorflow/python/distribute/client/client_test.py @@ -0,0 +1,388 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for client.py.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import threading +import time +from absl import logging + +from tensorflow.python.distribute.client import client +from tensorflow.python.eager import def_function +from tensorflow.python.platform import test +from tensorflow.python.training import coordinator +from tensorflow.python.util import nest + + +class CoordinatedClosureQueueTest(test.TestCase): + + def testBasic(self): + queue = client._CoordinatedClosureQueue() + closure1 = self._create_closure() + queue.put(closure1) + self.assertIs(closure1, queue.get()) + self.assertFalse(queue.done()) + queue.put_back(closure1) + self.assertEqual(closure1, queue.get()) + queue.mark_finished(closure1) + self.assertTrue(queue.done()) + queue.wait() + + def testProcessAtLeaseOnce(self): + closure_queue = client._CoordinatedClosureQueue() + labels = ['A', 'B', 'C', 'D', 'E'] + processed_count = collections.defaultdict(int) + + coord = coordinator.Coordinator(clean_stop_exception_types=[]) + + def process_queue(): + with coord.stop_on_exception(): + has_been_put_back = False + while True: + closure = closure_queue.get(timeout=30) + if closure is None: + break + if not has_been_put_back: + has_been_put_back = True + closure_queue.put_back(closure) + continue + closure._function() + closure_queue.mark_finished(closure) + + def get_func(label): + + def func(): + logging.info('Label: %s, before waiting 3 sec', label) + time.sleep(3) + processed_count[label] += 1 + logging.info('Label: %s, after waiting 3 sec', label) + + return func + + for label in labels: + closure_queue.put(client.Closure(get_func(label))) + t1 = threading.Thread(target=process_queue, daemon=True) + t1.start() + t2 = threading.Thread(target=process_queue, daemon=True) + t2.start() + + # Make sure multiple wait() calls are fine. + closure_queue.wait() + closure_queue.wait() + closure_queue.wait() + closure_queue.wait() + + self.assertEqual(processed_count, collections.Counter(labels)) + + coord.join([t1, t2]) + + def testNotifyBeforeWait(self): + closure_queue = client._CoordinatedClosureQueue() + + def func(): + logging.info('func running') + + coord = coordinator.Coordinator(clean_stop_exception_types=[]) + + def process_queue(): + with coord.stop_on_exception(): + closure = closure_queue.get() + closure_queue.mark_finished(closure) + + closure_queue.put(client.Closure(func)) + t = threading.Thread(target=process_queue) + t.start() + coord.join([t]) + + # This test asserts that waiting at the time the function has been processed + # doesn't time out. + closure_queue.wait() + + def testWaitRaiseErrorAfterMarkFailure(self): + closure_queue = client._CoordinatedClosureQueue() + closure_queue.put(self._create_closure()) + closure = closure_queue.get() + + wait_finish_event = threading.Event() + coord = coordinator.Coordinator(clean_stop_exception_types=[]) + + # Using a thread to verify that closure_queue.wait() will not return until + # all inflight closures are finished. + + def mark_finished_fn(): + with coord.stop_on_exception(): + self.assertFalse(wait_finish_event.is_set()) + try: + raise ValueError('Some error.') + except ValueError as e: + closure_queue.mark_failed(e, closure) + wait_finish_event.wait() + + t = threading.Thread(target=mark_finished_fn) + t.start() + + with self.assertRaises(ValueError): + closure_queue.wait() + wait_finish_event.set() + + coord.join([t]) + self.assertTrue(closure_queue.done()) + + def _create_closure(self): + + @def_function.function() + def some_function(): + return 1.0 + + return client.Closure(some_function) + + def _put_two_closures_and_get_one(self): + closure_queue = client._CoordinatedClosureQueue() + closure1 = self._create_closure() + closure_queue.put(closure1) + + closure2 = self._create_closure() + closure_queue.put(closure2) + + closure_got = closure_queue.get() # returns closure1 + self.assertIs(closure_got, closure1) + self.assertIsNot(closure_got, closure2) + return closure_queue, closure1, closure2 + + def testPutRaiseError(self): + closure_queue, closure1, closure2 = self._put_two_closures_and_get_one() + + closure_queue.mark_failed(ValueError(), closure1) + + with self.assertRaises(ValueError): + closure_queue.put(self._create_closure()) + + self.assertTrue(closure_queue.done()) + + with self.assertRaisesRegex( + client.FunctionRetryableError, + 'The corresponding function is cancelled. Please reschedule the ' + 'function.'): + closure2._fetch_output_remote_values() + + # The error is cleared. + closure_queue.put(self._create_closure()) + + def testWaitRaiseError(self): + closure_queue, closure1, closure2 = self._put_two_closures_and_get_one() + + closure_queue.mark_failed(ValueError(), closure1) + + with self.assertRaises(ValueError): + closure_queue.wait() + self.assertTrue(closure_queue.done()) + + with self.assertRaisesRegex( + client.FunctionRetryableError, + 'The corresponding function is cancelled. Please reschedule the ' + 'function.'): + closure2._fetch_output_remote_values() + + # The error is cleared. + closure_queue.wait() + + def testDoneRaiseError(self): + closure_queue, closure1, _ = self._put_two_closures_and_get_one() + closure_queue.get() + + self.assertFalse(closure_queue.done()) + closure_queue.mark_failed(ValueError(), closure1) + with self.assertRaises(ValueError): + closure_queue.done() + + def _test_error_reporting_and_cancel_flow(self, call_wait): + closure_queue, closure1, closure2 = self._put_two_closures_and_get_one() + closure_queue.put(self._create_closure()) + closure_queue.get() + # At this moment, there are two inflight, one in queue. + self.assertEqual(closure_queue._inflight_closure_count, 2) + + # Simulating closure1 fails. + try: + raise ValueError('Some error.') + except ValueError as e: + nest.map_structure(lambda x: x._set_error(e), + closure1._output_remote_values) + self.assertEqual(closure_queue._error_generation, 0) # pylint: disable=g-assert-in-except + closure_queue.mark_failed(e, closure1) + self.assertEqual(closure_queue._error_generation, 1) + # At this moment, there are one inflight, nothing + # in queue (because the ones in queue should have been removed and + # cancelled). + self.assertTrue(closure_queue._queue.empty()) + # Doesn't include out of generation closures. + self.assertEqual(closure_queue._inflight_closure_count, 1) + + coord = coordinator.Coordinator(clean_stop_exception_types=[]) + closure3 = self._create_closure() + + with self.assertRaises(ValueError): + # Verifying `wait()` or `put()` raises even if one closure is in + # flight. + if call_wait: + closure_queue.wait() + else: + closure_queue.put(closure3) + # At this moment, there is one inflight, nothing in queue. + self.assertTrue(closure_queue._queue.empty()) + self.assertEqual(closure_queue._inflight_closure_count, 1) + + # This asserts that closure1 has errored. + with self.assertRaisesRegex(ValueError, 'Some error.'): + closure1._fetch_output_remote_values() + + # The following asserts that closure3 should have been cancelled. + if not call_wait: + with self.assertRaisesRegex( + client.FunctionRetryableError, + 'The corresponding function is cancelled. Please reschedule the ' + 'function.'): + closure3._fetch_output_remote_values() + + # Closure2 is inflight, so it shouldn't be ready. + self.assertEqual(closure2._output_remote_values._status, + client._RemoteValueStatus.NOT_READY) + + # And `wait` should block because closure2 is not back yet. + self.assertFalse(closure_queue.wait(timeout=20)) + + # Now let's assume that closure2 isn't successful due to worker preemption, + # and now it's attempted to be put back, but ends up getting cancelled. + self.assertEqual(closure2._error_generation, 0) + self.assertEqual(closure_queue._error_generation, 1) + closure_queue.put_back(closure2) + + with self.assertRaisesRegex( + client.FunctionRetryableError, + 'The corresponding function is cancelled. Please reschedule the ' + 'function.'): + closure2._fetch_output_remote_values() + + # At this moment, there is nothing inflight, and the queue is also empty + # (because closure2 should not be added back to the queue). + self.assertTrue(closure_queue._queue.empty()) + self.assertEqual(closure_queue._inflight_closure_count, 0) + + closure4 = self._create_closure() + + e = threading.Event() + + def get_fn(): + with coord.stop_on_exception(): + # This should end up getting closure4, not closure2, because closure2 + # has been cancelled and should not be got. + closure_got = closure_queue.get() + e.set() + self.assertEqual(closure_got._error_generation, 1) + self.assertEqual(closure_queue._error_generation, 1) + self.assertIs(closure4, closure_got) + self.assertIsNot(closure2, closure_got) + + t = threading.Thread(target=get_fn) + t.start() + + time.sleep(10) + + # Make sure `closure_got = closure_queue.get()` is unblocked as a result of + # `closure_queue.put(closure4)`. + self.assertFalse(e.is_set()) + closure_queue.put(closure4) + self.assertTrue(e.wait()) + coord.join([t]) + + self.assertEqual(closure_queue._inflight_closure_count, 1) + closure_queue.mark_finished(closure4) + # The queue is now cleared and nothing inflight. + self.assertEqual(closure_queue._inflight_closure_count, 0) + closure_queue.wait() + + def testWaitRaiseErrorAfterAnErrorIsReported(self): + self._test_error_reporting_and_cancel_flow(call_wait=True) + + def testPutRaiseErrorAfterAnErrorIsReported(self): + self._test_error_reporting_and_cancel_flow(call_wait=False) + + def testStateIsRestoredAfterJoinIsCalled(self): + closure_queue, closure1, closure2 = self._put_two_closures_and_get_one() + closure_queue.get() + self.assertEqual(closure_queue._inflight_closure_count, 2) + closure_queue.mark_failed(ValueError('test error'), closure1) + with self.assertRaises(ValueError): + closure_queue.put(self._create_closure()) + closure_queue.mark_failed(ValueError('test error'), closure2) + + # closure2's error is previous generation so should not raise at this + # following put, and _error should have been cleared. + self.assertIsNone(closure_queue._error) + closure_queue.put(self._create_closure()) + self.assertIsNone(closure_queue._error) + + def testStateIsRestoredAfterJoinIsCalled_WaitShouldReturn(self): + closure_queue, closure1, closure2 = self._put_two_closures_and_get_one() + closure_queue.put(self._create_closure()) + closure_queue.get() # got closure2 + self.assertFalse(closure_queue._queue.empty()) # still has closure3 + self.assertEqual(closure_queue._inflight_closure_count, 2) # closure1,2 + closure_queue.mark_failed(ValueError('test error'), closure1) + self.assertTrue(closure_queue._queue.empty()) # closure3 cancelled + self.assertEqual(closure_queue._inflight_closure_count, 1) + with self.assertRaises(ValueError): + closure_queue.wait() # reports error from closure1 + + # `wait` should block because closure2 is not back yet, even if closure2 + # was sent inflight before the error. + self.assertFalse(closure_queue.wait(timeout=20)) + self.assertEqual(closure_queue._inflight_closure_count, 1) + closure_queue.mark_finished(closure2) + closure_queue.wait() # wait should pass immediately + self.assertEqual(closure_queue._inflight_closure_count, 0) + + def testThreadSafey(self): + thread_count = 10 + queue = client._CoordinatedClosureQueue() + + # Each thread performs 20 queue actions: 10 are `put_back` and 10 are + # `mark_finished`. + action_count = 20 + + def func(): + for i in range(action_count): + closure = queue.get() + if i % 2 == 0: + queue.put_back(closure) + else: + queue.mark_finished(closure) + + threads = [threading.Thread(target=func) for i in range(thread_count)] + for t in threads: + t.start() + + for _ in range(thread_count * action_count // 2): + queue.put(self._create_closure()) + queue.wait() + self.assertTrue(queue.done()) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/python/distribute/client/metric_utils.py b/tensorflow/python/distribute/client/metric_utils.py new file mode 100644 index 00000000000..f0a6628a333 --- /dev/null +++ b/tensorflow/python/distribute/client/metric_utils.py @@ -0,0 +1,79 @@ +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Metrics collecting utilities for single client training.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import time + +from tensorflow.python.eager import monitoring +from tensorflow.python.util import tf_contextlib + +enable_metrics = False + +# Time in seconds to bucket the distribution of execution time. Range from +# 0.001s (i.e., 1ms) to 1000s. +_time_buckets = monitoring.ExponentialBuckets(0.001, 10, 6) + +_function_tracing_sampler = monitoring.Sampler( + '/tensorflow/api/ps_strategy/client/function_tracing', _time_buckets, + 'Sampler to track the time (in seconds) for tracing functions.') + +_closure_execution_sampler = monitoring.Sampler( + '/tensorflow/api/ps_strategy/client/closure_execution', _time_buckets, + 'Sampler to track the time (in seconds) for executing closures.') + +_remote_value_fetch_sampler = monitoring.Sampler( + '/tensorflow/api/ps_strategy/client/remote_value_fetch', _time_buckets, + 'Sampler to track the time (in seconds) for fetching remote_value.') + +_METRICS_MAPPING = { + 'function_tracing': _function_tracing_sampler, + 'closure_execution': _closure_execution_sampler, + 'remote_value_fetch': _remote_value_fetch_sampler +} + + +@tf_contextlib.contextmanager +def monitored_timer(metric_name, state_tracker=None): + """Monitor the execution time and collect it into the specified metric.""" + if not enable_metrics: + yield + else: + start_time = time.time() + start_state = state_tracker() if state_tracker else None + yield + duration_sec = time.time() - start_time + # If a state_checker is provided, record the metric only if the end state is + # different from the start state. + if state_tracker is None or state_tracker() != start_state: + metric = _METRICS_MAPPING[metric_name] + metric.get_cell().add(duration_sec) + + +def get_metric_summary(metric_name): + """Get summary for the specified metric.""" + metric = _METRICS_MAPPING[metric_name] + histogram_proto = metric.get_cell().value() + ret = dict() + ret['min'] = histogram_proto.min + ret['max'] = histogram_proto.max + ret['num'] = histogram_proto.num + ret['sum'] = histogram_proto.sum + # TODO(haoyuzhang): consider reporting the distribution in buckets. + return ret diff --git a/tensorflow/python/distribute/client/metric_utils_test.py b/tensorflow/python/distribute/client/metric_utils_test.py new file mode 100644 index 00000000000..79827e5e9f6 --- /dev/null +++ b/tensorflow/python/distribute/client/metric_utils_test.py @@ -0,0 +1,69 @@ +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for metrics collecting in client.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import time +from tensorflow.python.distribute import multi_worker_test_base +from tensorflow.python.distribute.client import client +from tensorflow.python.distribute.client import metric_utils +from tensorflow.python.distribute.cluster_resolver import SimpleClusterResolver +from tensorflow.python.eager import def_function +from tensorflow.python.eager import test +from tensorflow.python.training.server_lib import ClusterSpec + + +class MetricUtilsTest(test.TestCase): + + def testClientMetrics(self): + metric_utils.enable_metrics = True + + cluster_def = multi_worker_test_base.create_in_process_cluster( + num_workers=1, num_ps=1, rpc_layer='grpc') + cluster_def['chief'] = [ + 'localhost:%d' % multi_worker_test_base.pick_unused_port() + ] + cluster_resolver = SimpleClusterResolver( + ClusterSpec(cluster_def), rpc_layer='grpc') + cluster = client.Cluster(cluster_resolver) + + @def_function.function + def func(): + time.sleep(0.5) + return 3 + + result = cluster.schedule(func, args=None, kwargs=None) + result = cluster.schedule(func, args=None, kwargs=None) + cluster.join() + self.assertEqual(result._get_value().numpy(), 3) + + # Tracing, closure execution, and remote_value fetching should be executed + # exactly once for running this function. + metric_tracing = metric_utils.get_metric_summary('function_tracing') + self.assertEqual(metric_tracing['num'], 1) + # Tracing time should be longer than the sleep time in Python function. + self.assertGreater(metric_tracing['sum'], 0.5) + metric_closure = metric_utils.get_metric_summary('closure_execution') + self.assertEqual(metric_closure['num'], 2) + metric_remote_value = metric_utils.get_metric_summary('remote_value_fetch') + self.assertEqual(metric_remote_value['num'], 2) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/python/distribute/client/parameter_server_client.py b/tensorflow/python/distribute/client/parameter_server_client.py new file mode 100644 index 00000000000..8236c2410d8 --- /dev/null +++ b/tensorflow/python/distribute/client/parameter_server_client.py @@ -0,0 +1,55 @@ +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Parameter server client module. + +This is currently under development and the API is subject to change. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.distribute import parameter_server_strategy_v2 +from tensorflow.python.distribute.client import client + + +class ParameterServerClient(client.Client): + """A client that uses `ParameterServerStrategy` to distribute tasks. + + Parameter server training refers to the distributed training architecture + that requires two jobs in the cluster: workers and parameter servers. The + variables and updates to those variables are assigned on the parameter + servers' tasks, and the actual computation intensive operations are assigned + on worker tasks. In TF2, parameter server training only starts up one + client process, to drive and coordinate the workers and parameter servers. + This is referred to as single-client architecture, as opposed to multi-client + approach which is seen more often in traditional TensorFlow distributed + training, including `tf.estimator.Estimator` and `tf.keras` with + `tf.distribute.experimental.MultiWorkerMirroredStrategy`. + + `ParameterServerClient` is a `Client` that uses `ParameterServerStrategy` as + the underlying strategy to distribute, and is the starting point of parameter + server training/evaluation. + + If 'TF_CONFIG' environment variable is used, provide a + `TFConfigClusterResolver` to detect configurations for multi-worker training. + + """ + + def __init__(self, cluster_resolver): + super(ParameterServerClient, self).__init__( + parameter_server_strategy_v2.ParameterServerStrategyV2( + cluster_resolver)) diff --git a/tensorflow/python/distribute/client/parameter_server_client_test.py b/tensorflow/python/distribute/client/parameter_server_client_test.py new file mode 100644 index 00000000000..db22a476b4a --- /dev/null +++ b/tensorflow/python/distribute/client/parameter_server_client_test.py @@ -0,0 +1,405 @@ +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for parameter_server_client.py.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl import logging +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.distribute import multi_worker_test_base +from tensorflow.python.distribute import sharded_variable +from tensorflow.python.distribute.client import client +from tensorflow.python.distribute.client import parameter_server_client +from tensorflow.python.distribute.cluster_resolver import SimpleClusterResolver +from tensorflow.python.eager import def_function +from tensorflow.python.eager import test +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.framework import tensor_spec +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import init_ops_v2 +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.ops import variables +from tensorflow.python.training.server_lib import ClusterSpec + + +def make_client(num_workers, num_ps): + # TODO(rchao): Test the internal rpc_layer version. + cluster_def = multi_worker_test_base.create_in_process_cluster( + num_workers=num_workers, num_ps=num_ps, rpc_layer="grpc") + cluster_def["chief"] = [ + "localhost:%d" % multi_worker_test_base.pick_unused_port() + ] + cluster_resolver = SimpleClusterResolver( + ClusterSpec(cluster_def), rpc_layer="grpc") + return parameter_server_client.ParameterServerClient(cluster_resolver) + + +class ParameterServerClientTest(test.TestCase): + + @classmethod + def setUpClass(cls): + super(ParameterServerClientTest, cls).setUpClass() + cls.client = make_client(num_workers=3, num_ps=2) + + def testBasic(self): + self.client._strategy.extended._variable_count = 0 + with self.client.context(): + v1 = variables.Variable(initial_value=0.0) + v2 = variables.Variable(initial_value=1.0) + self.assertEqual(self.client._strategy.extended._variable_count, 2) + + @def_function.function + def worker_fn(): + v1.assign_add(0.1) + v2.assign_sub(0.2) + return v1.read_value() / v2.read_value() + + results = self.client.schedule(worker_fn) + logging.info("Results of experimental_run_v2: %f", + self.client.fetch(results)) + + self.assertAlmostEqual(v1.read_value().numpy(), 0.1, delta=1e-6) + self.assertAlmostEqual(v2.read_value().numpy(), 0.8, delta=1e-6) + + def testFnReturnNestedValues(self): + x = constant_op.constant(1) + + @def_function.function + def f(): + return x + 1, (x + 2, x + 3), [x + 4], {"v": x} + + got = self.client.schedule(f) + want = 2, (3, 4), [5], {"v": 1} + self.assertEqual(self.client.fetch(got), want) + + def testInputFunction(self): + + def input_fn(): + return dataset_ops.DatasetV2.range(1, 2) + + with self.client.context(): + v = variables.Variable(initial_value=0, dtype=dtypes.int64) + + @def_function.function + def worker_fn(iterator): + x = next(iterator) + v.assign_add(x) + return x + + distributed_dataset = self.client.create_per_worker_dataset(input_fn) + result = self.client.schedule(worker_fn, args=(iter(distributed_dataset),)) + result = self.client.fetch(result) + self.assertEqual(result, (1,)) + result = self.client.schedule(worker_fn, args=(iter(distributed_dataset),)) + result = self.client.fetch(result) + self.assertEqual(result, (1,)) + + self.assertAlmostEqual(v.read_value().numpy(), 2, delta=1e-6) + + def testAsyncScheduleAndJoin(self): + + def input_fn(): + return dataset_ops.DatasetV2.from_tensor_slices([2] * 10) + + with self.client.context(): + v = variables.Variable(initial_value=0, dtype=dtypes.int32) + + # TODO(yuefengz): the following tf.function has a return value which is None + # in its structured_outputs. + @def_function.function + def worker_fn(iterator): + x = next(iterator) + v.assign_add(x) + + distributed_dataset = self.client.create_per_worker_dataset(input_fn) + + iterator = iter(distributed_dataset) + + # Verifying joining without any scheduling doesn't hang. + self.client.join() + self.assertEqual(v.read_value().numpy(), 0) + + for _ in range(5): + self.client.schedule(worker_fn, args=(iterator,)) + self.client.join() + + # With 5 addition it should be 2*5 = 10. + self.assertEqual(v.read_value().numpy(), 10) + + for _ in range(5): + self.client.schedule(worker_fn, args=(iterator,)) + + # Verifying multiple join is fine. + self.client.join() + self.client.join() + self.client.join() + + self.assertTrue(self.client.done()) + + # Likewise, it's now 20. + self.assertEqual(v.read_value().numpy(), 20) + + def testInputFunctionWithMap(self): + self._map_fn_tracing_count = 0 + + def input_fn(): + def map_fn(x): + self._map_fn_tracing_count += 1 + return x + 10 + return dataset_ops.DatasetV2.range(0, 10).map(map_fn) + + @def_function.function + def worker_fn(iterator): + return next(iterator) + + distributed_dataset = ( + self.client.create_per_worker_dataset(input_fn)) + result = self.client.schedule( + worker_fn, args=(iter(distributed_dataset),)) + self.assertEqual(result.fetch(), (10,)) + self.assertEqual(self._map_fn_tracing_count, 1) + + def testInputFunctionCreateVariables(self): + + def input_fn(): + v = variables.Variable(initial_value=0.0) + return v.read_value() + + with self.assertRaises(ValueError): + self.client.create_per_worker_dataset(input_fn) + + +class LimitedClosureQueueSizeBasicTest(ParameterServerClientTest): + """Test basic functionality works with explicit maximum closure queue size. + + Execute the same set of test cases as in ParameterServerClientTest, with an + explicit size limit for the closure queue. Note that even when the queue size + is set to infinite, there is still a maximum practical size (depends on host + memory limit) that might cause the queue.put operations to be blocking when + scheduling a large number of closures on a big cluster. These tests make sure + that the client does not run into deadlocks in such scenario. + """ + + @classmethod + def setUpClass(cls): + super(LimitedClosureQueueSizeBasicTest, cls).setUpClass() + client._CLOSURE_QUEUE_MAX_SIZE = 2 + cls.client = make_client(num_workers=3, num_ps=2) + + +class VariablePartitioningScopeTest(test.TestCase): + + @classmethod + def setUpClass(cls): + super(VariablePartitioningScopeTest, cls).setUpClass() + cls.client = make_client(num_workers=3, num_ps=2) + + def testBasic(self): + with self.client.context(): + with self.client.experimental_variable_partitioning_scope(): + init1 = init_ops_v2.Constant([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + v1 = variables.Variable( + initial_value=lambda: init1(shape=(5, 2), dtype=dtypes.int64), + shape=(5, 2), + dtype=dtypes.int64) + + init2 = init_ops_v2.Constant([0, 1, 2, 3, 4, 5]) + v2 = variables.Variable( + initial_value=lambda: init2(shape=(6, 1), dtype=dtypes.int64), + shape=(6, 1), + dtype=dtypes.int64) + + self.assertIsInstance(v1, sharded_variable.ShardedVariable) + self.assertLen(v1.variables, 2) + self.assertRegex(v1.variables[0].device, "/job:ps/replica:0/task:0") + self.assertRegex(v1.variables[1].device, "/job:ps/replica:0/task:1") + self.assertAllEqual(v1.variables[0].read_value().numpy(), + [[0, 1], [2, 3], [4, 5]]) + self.assertAllEqual(v1.variables[1].read_value().numpy(), [[6, 7], [8, 9]]) + + self.assertIsInstance(v2, sharded_variable.ShardedVariable) + self.assertLen(v2.variables, 2) + self.assertRegex(v2.variables[0].device, "/job:ps/replica:0/task:0") + self.assertRegex(v2.variables[1].device, "/job:ps/replica:0/task:1") + self.assertAllEqual(v2.variables[0].read_value().numpy(), [[0], [1], [2]]) + self.assertAllEqual(v2.variables[1].read_value().numpy(), [[3], [4], [5]]) + + def testSurplusPS(self): + with self.client.context(): + with self.client.experimental_variable_partitioning_scope(): + initializer = init_ops_v2.Constant([0]) + + v = variables.Variable( + initial_value=lambda: initializer(shape=(1,), dtype=dtypes.int64), + shape=(1,), + dtype=dtypes.int64) + + self.assertIsInstance(v, sharded_variable.ShardedVariable) + self.assertLen(v.variables, 1) + self.assertRegex(v.variables[0].device, "/job:ps/replica:0/task:0") + self.assertAllEqual(v.variables[0].read_value().numpy(), [0]) + + def testInvalidArgument(self): + with self.assertRaisesRegex(ValueError, "initial_value"): + with self.client.experimental_variable_partitioning_scope(): + variables.Variable(initial_value=[0, 1, 2], shape=(3,)) + + with self.assertRaisesRegex(ValueError, "shape"): + with self.client.experimental_variable_partitioning_scope(): + initializer = init_ops_v2.Constant([0, 1, 2]) + variables.Variable( + initial_value=lambda: initializer(shape=(3,), dtype=dtypes.int64), + dtype=dtypes.int64) + + def testPerWorkerValue(self): + var_shape = tuple() + var_dtype = dtypes.float32 + var_name = "var" + + def create_var(): + var = variables.Variable( + initial_value=0.0, dtype=var_dtype, name=var_name) + self.assertIn("worker", var.device) + return var + + worker_local_var = self.client._create_per_worker_resources(create_var) + + # The following is a workaround to allow `worker_local_var` to be passed in + # as args to the `client.schedule` method which requires tensor specs to + # trace tf.function but _create_worker_resources' return values don't have + # tensor specs. We can get rid of this workaround once + # _create_worker_resources is able to infer the tensor spec of the return + # value of the function passed in. See b/154675763. + for var in worker_local_var._values: + var._set_type_spec(tensor_spec.TensorSpec(var_shape, var_dtype, var_name)) + + def worker_fn(var): + var.assign_add(1.0) + + for _ in range(10): + # Which slice of `worker_local_var` will be used will depend on which + # worker the `worker_fn` gets scheduled on. + self.client.schedule(worker_fn, args=(worker_local_var,)) + self.client.join() + + var_sum = sum(self.client.fetch(worker_local_var._values)) + self.assertEqual(var_sum, 10.0) + + +class ErrorReportingTest(test.TestCase): + + @classmethod + def setUpClass(cls): + super(ErrorReportingTest, cls).setUpClass() + cls.client = make_client(num_workers=3, num_ps=2) + + with cls.client.context(): + cls.iteration = variables.Variable(initial_value=0.0) + + @def_function.function + def _normal_function(self): + x = random_ops.random_uniform((2, 10)) + y = random_ops.random_uniform((10, 2)) + self.iteration.assign_add(1.0) + return math_ops.reduce_mean(math_ops.matmul(x, y)) + + @def_function.function + def _error_function(self): + x = random_ops.random_uniform((2, 10)) + y = random_ops.random_uniform((10, 2)) + check_ops.assert_non_positive_v2(math_ops.reduce_sum(math_ops.matmul(x, y))) + self.iteration.assign_add(1.0) + return self.iteration + + def testJoinRaiseError(self): + for _ in range(3): + self.client.schedule(self._normal_function) + self.client.schedule(self._error_function) + with self.assertRaises(errors.InvalidArgumentError): + self.client.join() + + def testScheduleRaiseError(self): + for _ in range(3): + self.client.schedule(self._normal_function) + self.client.schedule(self._error_function) + with self.assertRaises(errors.InvalidArgumentError): + while True: + self.client.schedule(self._normal_function) + + def testErrorWillbeCleared(self): + self.skipTest("b/157597579") + self.client.schedule(self._error_function) + with self.assertRaises(errors.InvalidArgumentError): + self.client.join() + + for _ in range(3): + self.client.schedule(self._normal_function) + self.client.schedule(self._error_function) + with self.assertRaises(errors.InvalidArgumentError): + self.client.join() + + def testFutureReturnError(self): + result = self.client.schedule(self._error_function) + + with self.assertRaises(errors.InvalidArgumentError): + result.fetch() + + # Clear the error. + with self.assertRaises(errors.InvalidArgumentError): + self.client.join() + + def testInputError(self): + aborted = self.client.schedule(self._error_function) + + @def_function.function + def func(x): + return x + 1.0 + + with self.assertRaises(errors.InvalidArgumentError): + self.client.join() + + result = self.client.schedule(func, args=(aborted,)) + with self.assertRaises(client.InputError): + result.fetch() + + with self.assertRaises(client.InputError): + self.client.join() + + +class LimitedClosureQueueErrorTest(ErrorReportingTest): + """Test error reporting works with explicit maximum closure queue size. + + Execute the same set of test cases as in ErrorReportingTest, with an explicit + size limit for the closure queue. + """ + + @classmethod + def setUpClass(cls): + super(LimitedClosureQueueErrorTest, cls).setUpClass() + client._CLOSURE_QUEUE_MAX_SIZE = 2 + cls.client = make_client(num_workers=3, num_ps=2) + + with cls.client.context(): + cls.iteration = variables.Variable(initial_value=0.0) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/distribute/parameter_server_strategy_v2.py b/tensorflow/python/distribute/parameter_server_strategy_v2.py new file mode 100644 index 00000000000..02f3c35a716 --- /dev/null +++ b/tensorflow/python/distribute/parameter_server_strategy_v2.py @@ -0,0 +1,202 @@ +# Lint as: python3 +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Parameter server strategy V2 class. + +This is currently under development and the API is subject to change. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl import logging +from tensorflow.python.distribute import distribute_lib +from tensorflow.python.distribute import parameter_server_strategy +from tensorflow.python.distribute import sharded_variable +from tensorflow.python.framework import ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.util import tf_contextlib + + +# pylint: disable=protected-access +class ParameterServerStrategyV2(distribute_lib.Strategy): + """An asynchronous multi-worker parameter server tf.distribute strategy. + + Currently, `ParameterServerStrategyV2` is not supported to be used as a + standalone tf.distribute strategy. It must be used in conjunction with + `Client`. The recommended way of using the combination is through a + `ParameterServerClient` object. Please see `Client` and + `ParameterServerClient` for more information. + + This is currently under development, and the API as well as implementation + is subject to changes. + """ + + def __init__(self, cluster_resolver): + """Initializes the V2 parameter server strategy. + + Args: + cluster_resolver: a `tf.distribute.cluster_resolver.ClusterResolver` + object. + """ + self._extended = ParameterServerStrategyV2Extended(self, cluster_resolver) + self._cluster_resolver = cluster_resolver + self._verify_args_and_config(cluster_resolver) + logging.info( + "ParameterServerStrategyV2 is initialized with cluster_spec: " + "%s", cluster_resolver.cluster_spec()) + super(ParameterServerStrategyV2, self).__init__(self._extended) + + @tf_contextlib.contextmanager + def experimental_variable_partitioning_scope(self): + """A context manager for creating `ShardedVariable`. + + Variables created inside a `with experimental_variable_partitioning_scope()` + code block will be of type `ShardedVariable` and their values are + partitioned among parameter servers along the first / outermost axis. The + number of shards are equal to the number of parameter servers. + + Variables created within this scope must be initialized using a callable as + `initial_value` and a known shape. + + Div partition strategy is used to partition the variable. Assuming we + assign consective integer ids along the first axis of the variable, then ids + are assigned to shards in a contiguous manner, while attempting to keep each + shard size identical. If the ids do not evenly divide the number of shards, + each of the first several shards will be assigned one more id. For instance, + a variable whose first dimension is 13 has 13 ids, and they are split across + 5 shards as: `[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10], [11, 12]]`. + + Yields: + A context manager for creating `ShardedVariable`. + """ + with variable_scope.variable_creator_scope( + self._extended._make_sharded_variable_creator()): + yield + + def _verify_args_and_config(self, cluster_resolver): + if not cluster_resolver.cluster_spec(): + raise ValueError("Cluster spec must be non-empty in `cluster_resolver`.") + if self.extended._num_gpus_per_worker > 1: + raise NotImplementedError("Multi-gpu is not supported yet.") + + +class ParameterServerStrategyV2Extended( + parameter_server_strategy.ParameterServerStrategyExtended): + """Extended class for ParameterServerStrategyV2. + + Please see `tf.distribute.StrategyExtended` doc for more information. + """ + + def __init__(self, container_strategy, cluster_resolver): + """Initialization of ParameterServerStrategyV2Extended.""" + super(ParameterServerStrategyV2Extended, self).__init__(container_strategy) + self._num_ps = len(cluster_resolver.cluster_spec().as_dict().get("ps", [])) + self._variable_count = 0 + + def _create_variable(self, next_creator, **kwargs): + + if "colocate_with" in kwargs: + colocate_with = kwargs["colocate_with"] + # Clear the variable scope to avoid possible conflicts between device + # scope and colocation scope. + with ops.device(None): + with ops.colocate_with(colocate_with): + var = next_creator(**kwargs) + logging.debug( + "Creating variable (name:%s, shape:%r) that colocates with %s", + var.name, var.shape, kwargs["colocate_with"].name) + return var + + # Clear the colocation scope to avoid possible conflicts between device + # scope and colocation scope. + with ops.colocate_with(None, ignore_existing=True): + with ops.device("/job:ps/task:%d" % + (self._variable_count % self._num_ps)): + var = next_creator(**kwargs) + logging.debug( + "Creating variable (name:%s, shape:%r) on /job:ps/task:%d", + var.name, var.shape, (self._variable_count % self._num_ps)) + self._variable_count += 1 + return var + + def _make_sharded_variable_creator(self): + """Returns a function conforming to the `variable_creator` signature. + + The returned function creates `ShardedVariable` when called. + """ + + def sharded_variable_creator(next_creator, **kwargs): + if "shape" not in kwargs or kwargs["shape"] is None: + raise ValueError("shape must be explicitly specified when creating " + "sharded variables") + init_fn = kwargs.get("initial_value", None) + # We intentionally don't allow non-callable initial_value to ensure the + # value is created on PS but not client. If the value is created on + # client, it will needed to be sent to PS for variable initialization, + # which is inefficient and can potentially hit the 2GB limit on protobuf + # serialization. + if init_fn is None or not callable(init_fn): + raise ValueError("initial_value must be specified as a callable when " + "creating sharded variables") + + # Use "div" partition strategy to partition the variable. + full_shape = kwargs["shape"] + if self._num_ps < full_shape[0]: + num_shards = self._num_ps + else: + num_shards = full_shape[0] + offsets = [] + base = full_shape[0] // num_shards + extra = full_shape[0] % num_shards + for i in range(num_shards): + if i == 0: + offsets.append(0) + else: + prev_shard_size = base + (1 if i - 1 < extra else 0) + offsets.append(offsets[i - 1] + prev_shard_size) + + # Note: The way we initialize sharded variables is suboptimal, as it + # needs to create the full value tensor separately on each PS which the + # variable is going to be placed on. The full value could be very large + # and consume a lot of memory. The ideal way is to only create what's + # needed on the shard, however that's not practical because: + # 1. Initializers don't have sharded behavior support, even though some + # initializers (e.g, uniform) can be used directly. + # 2. tf.Variable signature requires "initial_value" to be either a value + # or a callable without arguments, meaning it is not straightforward + # to make the sharded component from it. + def init_shard_fn(shard_index): + full_value = init_fn() + if shard_index < num_shards - 1: + return full_value[offsets[shard_index]:offsets[shard_index + 1]] + else: + return full_value[offsets[shard_index]:] + + var_list = [] + for i in range(num_shards): + kwargs["shape"] = None + kwargs["initial_value"] = lambda: init_shard_fn(i) + var_list.append(next_creator(**kwargs)) + + result = sharded_variable.ShardedVariable(var_list) + return result + + return sharded_variable_creator + + def _call_for_each_replica(self, fn, args, kwargs): + # TODO(rchao): Consider implementing sync PS training. + raise NotImplementedError("Sync PS training is not implemented yet.") diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index bbd4f49d95b..9cf6e10702f 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -150,6 +150,9 @@ COMMON_PIP_DEPS = [ "//tensorflow/tools/docs:generate_lib", "//tensorflow/tools/docs:parser", "//tensorflow/tools/docs:py_guide_parser", + "//tensorflow/python/distribute/client:client", + "//tensorflow/python/distribute/client:parameter_server_client", + "//tensorflow/python/distribute/client:metric_utils", ] # On Windows, python binary is a zip file of runfiles tree. From 336ba1f2f381a09f2b6e8231a101a0e2dd18164e Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Sun, 2 Aug 2020 21:11:18 -0700 Subject: [PATCH 1954/2522] Disable guarantee-all-funcs-one-use.mlir.test for it failing asan test. PiperOrigin-RevId: 324537924 Change-Id: Ie7f7bfb863408ff9a77ccf080310c65b35a4ad52 --- tensorflow/compiler/mlir/tensorflow/tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/BUILD b/tensorflow/compiler/mlir/tensorflow/tests/BUILD index daa583bed0e..1fc35f37058 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/tests/BUILD @@ -5,6 +5,7 @@ package(licenses = ["notice"]) glob_lit_tests( data = [":test_utilities"], driver = "@llvm-project//mlir:run_lit.sh", + exclude = ["guarantee-all-funcs-one-use.mlir"], # TODO(b/162700124): Re-enable. tags_override = { "optimize.mlir": ["no_rocm"], "tf_optimize.mlir": ["no_rocm"], From f496d8d9bfac3f36d7689fb01da93224d1c1c15b Mon Sep 17 00:00:00 2001 From: Tian Lin Date: Sun, 2 Aug 2020 21:13:21 -0700 Subject: [PATCH 1955/2522] Create g3doc for recommendation models. PiperOrigin-RevId: 324538098 Change-Id: If246a268110168272dc5d4869606271e203ef1c6 --- tensorflow/lite/g3doc/_book.yaml | 3 + .../recommendation/images/screenshot.gif | Bin 0 -> 302564 bytes .../g3doc/models/recommendation/overview.md | 122 ++++++++++++++++++ 3 files changed, 125 insertions(+) create mode 100644 tensorflow/lite/g3doc/models/recommendation/images/screenshot.gif create mode 100644 tensorflow/lite/g3doc/models/recommendation/overview.md diff --git a/tensorflow/lite/g3doc/_book.yaml b/tensorflow/lite/g3doc/_book.yaml index 715e0c8431b..e28fee87316 100644 --- a/tensorflow/lite/g3doc/_book.yaml +++ b/tensorflow/lite/g3doc/_book.yaml @@ -190,6 +190,9 @@ upper_tabs: path: /lite/models/smart_reply/overview - title: "Text classification" path: /lite/models/text_classification/overview + - heading: "Others" + - title: "Recommendation" + path: /lite/models/recommendation/overview - name: "API" skip_translation: true diff --git a/tensorflow/lite/g3doc/models/recommendation/images/screenshot.gif b/tensorflow/lite/g3doc/models/recommendation/images/screenshot.gif new file mode 100644 index 0000000000000000000000000000000000000000..0a43707c5a1567062738458267b0d8baf71bea95 GIT binary patch literal 302564 zcma%?X*d+#8~0~l3>u7GHP#Ta?=qGcLStXcShA}bQWQ0eeI5I*v9HyTon+q$Wmj2B zw2EX&KTrSX<@4$}*SW6q>V9*U`?|lM?|lr73{=%z7(fL0Hvj;EK>ic~5)%`T-$ag$ zjs57ja`3(TaM1PFQ^A9G#b3XEH8C+c?zcRx0vs=;|DFu``(yTZv;4uA=7Ym^DJiMC zx;j5Uzx_U|>5%^>heVRz<(iv zM-YhSzg6mg;r}}(0Q{7~5QN=~7rrln_OW@YE(=H(X@7CojHmz0*3 zS5#J2*VNWMdHRe|-|)P#skx=Kt-Yi3Mc2#jp5DIxfx)5SkWFuHx=I^0&SA-nx$nrB!`WXzF*K5t@Jt;85UnRtNs`OB;B`og9K ziVh{oUd1zD4LbTXT0R0@i4v5yQdV!uE;SZVj24J2^g`4v7~I@Q#Ab-xbkq2 z1x;cX0

    2rx%$5xNfB?F?Da18V3! z0oW}TU47o0lpZLI9gl|kzKiX)0foPfjJ?$@u^WAxE*E7WV6BfuAmuZ02(&y=6FpeCUGuA|pSfGe>N(7grR@)5VPW_!etlyn}3qH5AbPKrjkt`nOTGOv*Y{9HV zG2x*=FjqA-1*4628c< z`A;a^a-i6W-SY+s$R!zt#oNTIk#X5BcDz1(f=-kV_Dw?79^k;i9x9xhv1G%3Nj#{` zm{nvA=xC+MhU0Mwqjf{g8_`@m74^nYR@FO4c3_Xe@F=Fk>R)HrxTi#+taEXxX|RZ| zabKVsl48TqX){NEyW|^ptCWNep>Be86^*|$y?uJIY7{K;u*UWjjFS)? z4)?|q9bh?pXaJ9d3sICQeXy7ehBsq-u@;f#+$?@ZSo||HX88!?QI}8~R+cI{duN1x z@r<5JRoTFZ!BFa65*aUt06iSw#ogx#769=3SuX+dE~;Rk6|;OD?;9w-3~?9WKnZ=! zK6Mq=H3oBlpx6BJn|Fj14+l6Fc?o$vt1O44Iey;5CWxRJy|`~yvgbw308oJE6BcKb z6hh#I$X+sFunx2~cj8R$fVy~qjvi1CGwQGrjuyK8V&=z_L!Y0aKlWDd7+itmp+u zKGOyOT3=q~A3+3{dj$_IcTe8eDl_;K(JDE+R&ko{T5T?55go3BM;wlqz5wN;h*9!6;y)cKbxc>2!>yQb1CyJhDj!yJStw_b9+ieH zH)g0c`U$&uEIryK!QyF4CSeN4%yiwsbn5q zK!ayPdXPR-8gQ9qag|$g1!j67BO6v1Suw^r;j|55`I@{t>Fxdse|OATOB)9g;@PjN z)hY4FYwkiD(!U-X~{E@lPo# z%3gu`9oblX50+mD)5*{ZisoqYjCCDwQHhe3zIK8~)C?47Wc9uwiGYWme~t8GL0cYI z^}=4`=WI3DMx8k&W0MofXm@eHXImo%vm+j|5tKs1q72v@BZU-R45YF_v0^f6Ojs&a zcl%$86H5GMM|LuDxF4n$q4OS9Fpk0{UVYM{RUBmRzMa%fGIMV0dC7!17Gk1pn|9B0KpWvY!1kavICSr zSFmVqfn2O`Q5!!IeKrBT7!C1QeiL)S2!amx$_FaMm?-WXOXSp~z|P^9*%dzy)R#^?!{*)&5r=Xa16bl4PRN|El6q3hLm0x#{lMEtrZ8<(EX_-K4yuI%lDmZtSWH$F1&?%4mQT%pQ817+;L+iPl!QIg>56 zuArtzjk%sU6XhkEYdi7TrsF>EcTK35v*{Oy<>R^5$)>0GCcfPGb3EV8+RTVC{p!m9 zXJOc^xgq;>;;Vr`cZK z@(EmZ=!mPCwpaA$bu7Jp3V_!A{pNN1&+@X&z1HQhZ$T*rQ zFQnW+YfR~QiI$r!pp6}R%|2P;^5Zt95gl_Ep zchI45?pM~2&W)?iM+2wVcOGBRT(|sjw9s+(x3>!Qd(eB(Tjz%d9t6P7#MFbeGunrJ zKeTpZZigyZxg1OwYWyfqIoVWsbolD!e?N<_{pBKP|5-kG@jLVUe;X}YM;|>O9wlA- z@7sJ4a+^bYXXxe0hPLzHO~aSJW3Gj5b-X{?&wg}j7&<)vsO!_C?)it?7ak$)9>VOx zIzEM&tWr3(9(Yw!P{E;X+!V-gIPYONmM20;HA2KLLM%8!JST#;^^xR4gv?=tEDu$F zA)*l&s-zmKQg!-3EtcBI22o$2;)4IpP+gwLD_)U$tzq~rss}oSi_Nr)Nso=o zU5LwVjS4=DOH+-u-ipIl#aQRW4=Kdo2#&5ejA!s9k~j*d@&cCoSY8HMJ%zEhN1=Oj_kh zUROO${$Q898JxV8ll-|g`RhXR_rv5Jo|N6z$Q9L;-@z%*@Ck>lDJKgl{~e|PcxfOt z8pNIk3!y#FiKWNVmh4iPk7#d(lf0#JgKDFJ|+(Agb{Ky6|Y7ZxnDm>8m(r9%L6VF7qttjxdV ze^dc%DUc*pup1iAPXzL>=4qN|DL2!kz1n!4NN80t;X@ z${knBOYqL(CqePK1*6EgS!7=MNX|vJg0=08W$*kY?=1Rak!MxLxI|%VTuQrrwxb%Bn82&?NGdUF|y`ybqIulglh#ugK~>0 zi(s$e#|k@iCRsYfIKOckEWu!hkRU_CbivUmiJjsGiPJ155`@Hn83H2N??ZiQprJi5 zUptJGkx3!|tMO$K_L-Di5Ly!KHU&O+zl3QDtNZuaS+>`{5dtS zh6;U1p-P?rb|XRck&zH|meLg1laS3M3zpacXMZmDKo=I?&x4SlH`=Q-V(3hi3P)^? zV{kQ~y%OjHJX>`04~W?LE6c=J(}yrfW{u@KVqUks%8fJ}DnWvRuwX|@R#kO=j6(^zJNLXK z*p~jl<*2qIzQ&rTOq)_=NXox=Ke-(Vf09>Sxc!(c0Whv%x6RF~=7W=_9(NdLa$-Zz zwX^%uYpaJJUj)K~)L;wM>>3VH(?@k;%a64QIRY9DqWqCP4$t^60;Fr$vn6vB9%!EZ zQ+u8-QVCZr`4dbts<)-H`(kU1_~D1@c?yhjD<8M_Y6aRvAld;adbQrSJ4t6Of8PgK ztzP4bYzQ%`nhSYuNH1$wYqo#TaGy7u+Exy6WPGT~|97{10lC-h!KI_wVQTb?f?!Tc zlP^h=lLY0&0W1e_{6 zu&X*)*#ICxqq`ne+J$7&7SpUH0BfVqb>cHoHEq{EztkZ$YGM#6d248?;K+<)(R>Q+0FLmZ0wJL;v)>%)EPGc>A{=-C;y8I3(H zA#trQju}$LZOSX`)xPu&4F-Ope(Qc;=VYXI5UmaGl z>%6UA{lhWa+%anU^Yfcy`I>yKk_GX?%ul4%t4JMC*XlMi%|_MF&#JwfU;1XvkM@we zA1f=v{w~(W^EKY!>o8at%*=g8?}TaD)qw!T*N=){F?aUzx7{}G#B>Z%C9*cvJ0cQC z6tJgR@;i^kP(_kI3!h)?n`fp!6@XVV(GS}*y=>bvHIwgGKQ2WUc%ri})X=|-RwLVc ze6fx5d7zlu3P&8>P6I6I=jKSOK*r~J$X265q3uk~==`cLc{PW5p!8!#_UDRp`!VD8 zcyN5v%6Q|0ww`8(F>&TYE&(#!(MrVzg6uzflju|oe@%$v%P#*oCXk&l-0_WT8 z>|4rp7JfDil7fs}SdRVUQ^;dFZQc6R)@as*Si4t{{C+gvg#IMl5g5KiwRHeKW10hg z&OUFPX;hP`X*B20QY#q_d3aP|efT6$dXDr!({VNX#sf_VsW!1Ke&S1BL;?dBdkS&= zn=V=R1YZS^XJpppm+=D@155$6BSm4~UR;!WT8&tARLkKcG={$gH}Xfun(|-whh3kT zy#FojbxnNmx21{+_VTbLd18*^_2g^3Gr!srf&*V^hP)cHFOZfi?7jZjSgkO_2$1g7 zy)VwOG7>xecA1fz1Fee=E?s$}_1OMo`H|+z`>+*xa#2TEvKlg$T`lx(b*?CHhS<^5 z|Crip7qTi4@%z%TS6kn0w7oxkk&$JWm1|P?J?x#jeO8iMRPj;lM=1cu)%UxdsTSjT zD)y_ehe?P2?^|+W5`yFSlHWcF(fsdk64!sx8BTE*9NE zc~7@KA8vR6Ua)AwI<-)nSH`{PzmS=xewx4P{LYZ2S2GTRix`K0_W?Ok6w zik3-c3C?zJGrZ$7q*f37quzgu2HcO4c^CWUXlT;g&3ycW>#Z+|7r-Rix23LcEALLf ztvp=f9*K(eD)76tPJOZR%x;Ss`(%0h^ZxPI5Vd%x)~{q5{Uhu4Tv4#&>bH}3KbCG( z_p5?s=-+>wow*haMad@ogR8!1Fp2^2VN^q|#bI7WS_woqiUUxj0e74WWWP?C*U+xKTO99bbnQmHvO z?G@Y>$c9$N$p8cpUc_xLf46^(#{uttOJi*VUOO{-;Wp%4Eda9$3X_I7j!j}2FeMrY zOMmTqEk}2?NgfCM!n=1>buZ2kp#1n?@m6U6yRWEcKV^u(XXlFQ{}yZ;pFYN~Hd*sL zdp}mxUETUA4ib+Vk*hh}+6I|U#RgCcm9q}z<(>w-z3V>F;xPrrV!=Hs2PJ1BZC62B zRdjXj86gwkMRQ;PC69{%xJZWj(Aj;^Lu3XZ;*ln5>hT@tgS%&f`RNBCXTBS4|5=oT zqkUk#?;#j0{2bqIz)qW-&#nyvfR;Y-!2wJ2j;L*b3on1Ee)($y$mLoE*^>UQYG-*8 z|Faf{3noGYuK`iSc_jeC6yt;8B$GiXvIkirUI7z;@x^20W0Y*7JrX^l5@N_q0a|n} zQ@Ue7wwy0rD}c$PjP5H(H?)b-`G1)0>DYKr0n`75wvMF#3vHM26C*K@06ZBZk>Ft? z2l3#<+Gu%@3_dYoxTZ|2Jmdm6KXAPfuy~G}<3l@otSPvKgM_)U7UDP|&p{z)PcA<} zTz78-U&(hJN4hxcNF*qdmS3EZO37lZ$~FXiBtX@$dU3*^YGF|pi5>`d#hN!D6y1Gz zk=@Yjg=!nmfcYo}Q+nI(xzF3trY~|n#6HP|nNoPmbFa1FJ<0Kn;)|lM&mzk!HjAN3 zfl(#HZ{jJ0TWeA*K^1Sx_2eUE{ntUVHO^c(LahFeDK@4D@bfo5T$;=)COnZ9qU_KB zF=QvYE;rmJ$+=W{Ap8^znk-f_Fh+K_4M!EQkfpZb(~VdvRWX->+J~Fl_C8HD#TTCQ z*IW9sOfmd)#l@tRoN!WN>6U8#b~Qk%!2Wi>a@5Pjm7?$N(ewUcKHVxhe2<)~RC#mF zsk{sxSDpJ`3a8%*O0?qA(B%w_v6W@@omk1{ph2XtO1RbeOP5EuhByPN=K3WQq_Bnt zC<0)H%f@aEYeav8jqyK!t8yb*^HwE}x$a+U!EE>OgE`wYZ zo`JFv5bVw}l-)0qi0`~(jgHCDu{Z+}`041AwC_2q_aCqdk>waLl0o7eo}!|o$4Mw* z3_DR!7&T7MI0excFjob-Far+0pl(5G!c8082#?RN5*!og4QYT^>D{4MN?DfAXL*wo zw7Z==Puy9c#^+McGxAPy0#FMZ`x}``M@P=N>jM|iQn5=`V1J8m7`R(Se@Lv)X`ASq zXJkJ?2(b8$vNXNK6nairqKX_1?M4i~38=T{^yheSVtB69JzOT+!8cP)0IWj%HV;;vHfNhn}gerXGn-an-l#8%KcwF!-7eFmHp6{Id z#HDCF8KvJJ)2&kv(rqp2W^Nl;V{KMaRO3U8vKi|5e`dc>6LuMFy=f5;@&JhPtb-ymr*5YJV zjZ69D8wT7_#QY3_^alu;FA7pBu5MdSPximH1}fRiPx;9j*muJ(o$U0C;TXwy@d*c( z1)(*;fFWV>Q*kXTLMAUn^Kqy+lhtYG+tUS>v|rWYk5*s;WUD%>w~mj+**O&-X$f%p z5n=3Khi)@HI#|*yScBlwE;ZPS{brNJBof~pkN!&6&y=`_PFz({W&q!1iiXJ#(_uGh zrNZaFIlACQH0hwII&PTD+ZJ<^A; z%o>*3%Q0sbQ|HppcTTuR?EBbV`{LCa9}0wxeIgF3!KxnH6X)#bW#Yi>HsTIvmOfrj zaM3j5rqTz&FHCu4d#9tbe%4;HcmkJGJ;!x0Z0I5@7@|asz*Y(lpGRW_313UAd$$L%dk-BNTlbf%`NFXpFfg0i-nneVOgg3WOz`4{qI3e5pLzWSf!Kje;sD)x#F$15)%jJ$ey zy!RNuQiX%JkKSXV7O6X2P(Vvw4QBgQjGoBW6<(T%v-nlQGf`!rGCJ+g*I0^WsmA*s zO{cm>6^ofl7%W%Ma5VoamrbfR@icy&xAdz*b)wqr-stN(dgH&>UPFj6{XA7NKYVC%MZ5}!_)_Iv;v}&uKd0dm3 zoGc>lIAA|>yI`Mg(Y7vk*1@D}j^oBXmQR|Wh6g*9g_FW3Y?YEe&&jWkofbtsalf9v zn0YBJupw1C`X;oav|rbg_8RNw$nwXg23Fm8=ZZ9!N~5IDu27MhZmE~`w*An0rjKLw zx2`)^ZH?0~<-7eA&P^p(-WgQ-YI|jNXC$!jVs$7LK6Tti9fr~O720!xq;CBg6>w)L zwhWsHJHWZPIajE|{AIRCOTbyJ#k!Q>o73)BH%=2UBvnO!9@zOgNqYsf3~5XstI{=&gU1 z?C2e6q&c@TmUnD+uz?v{-pZ{_jA-{;(cY~gw<--AB?ZiAFG?IXlTHcX9#%mVuY=z3 z^a>zuLN-ep|2)EDH~CIjKM{Je+T$9S%)?2dj}QQcCe-f~l@Ie9cf>JM#ze46$L5(u zw(OUxsUKX+-0dP{pjgt|!W8lMy^|p02IwF&rapY~aGfdFB6_*`5S2FklwedKEiW}C zR$Z`e>Q`~5@Na>ui6V(ji$s%B7ZkWS>>~4|$C24Xi&vq*W9~DZA|&bZY$ZsIZyXvJ zSnAer-#i4iYQNY*ru{L&{KiF8?x7;<=+&ef^y1zEZe!8 z)gen|y%4p!@T|IRAfj8rNeKX$_I;kvYR`1*xI=>Se_~kTnb~YwsT=oQjI_l;#m93u;*UT!g+s* z=>2RIGkEZP;D#gtxCKK$)Zgymkvt(28+&8lSDdd4mSd}aLkGFxZ+5QAM)wJP+Wv{) zA`eal8t5Jey{e_4DbEWyYyU8r@|=}Di5A*>qc3S?TL^NeOFR&){5W)UFO-R!RY>{* zr7PWtJ?i08jwO81&U0R$-t^l2)v0q+|Gg>;C}Hx;%pN`4!aDrCxbTyJs}F}wH=k1y zS)m{tWJ-Wr@`aU{onL<~+}_Ll?rz5NuwdZj#C*Qy(tb|Eg7-@@s25wySp$5QoSNr^jPhx_%T-riK+YO;WNA6x}12PJ`!b(do5B3 zcDJp7)5@KVU(k;wLEb2&4D`fn;3E!2g!+(VLZ}a*WCSuh%wdpEfXWvNA)q8`Ep z#LQJgsNxv&a~Bbuo%I5YAqtvDl#Uha>V~{sBOp{+yH>gQhr{Pq!{-4JU;Fv+B<#iY zn24Ex0T8<vvH27yIqikh1@Z&WnMRB6)ho9}gqDlD*WVNipoKnk<5vMHrB5Mx zT9j|Qsd(&wbt%Hi9H9&Xr|%v&doKdxa+%DKwgwBh(H_~_a(xffl>_(+yWQaedM$t+ z4X^2mg=0WiG=zk~n*&ks(5{@&0LB?--6SkUnwbNWqa(GogRGl|(0AQfS_Os&F~)4( zFG#d9*n^>;yj{_8dBLYFg`YxJEP--3V0S9d=mtW>T!=j7HLiLqY87#>C#X-3Uu0Tn zP)>!;07}n53|Ad5^ee~pBAx=7n(pz&g5YxNpa*U;Mr>YJs|?m%fVwob5xC-5>MdqP z?xq7bc^q1fs7Pu})Tt2j`5nTZuGT5fk0qpRyQtJ-0bDdvPtWV&*tW92}M1Gc8-9!=;%kZ=TfE==fpN&Kq5*{=HmGcinI9EaMieaQF z$h)7Y$|@ONU^bm5^LHUJx{5`A5CO?ePh!Nzy6}4vI9{L=A1X<+{^AA9asJ))?GyIX zL7d&23IK;=tjOF$2_8|YQ5X6ob3R)9VaaY zAQ&$5Wge9serc$I|IdXicSEHnJMZ6d8ai_~olE0Vc+b)po)0(#uoMi_D*wSMmuGAI zSP$eAS-_c_NhIRFR1e07c998?V{spvFps(*1W&^vA2WH&f&4#TIE{VCAuIz!Mq&Y~ zHuW@^FMdksxee1xhS%qYBx6epot@F2uI-{GR3hX*fbcGNcBVP9rXF9f_h*iVTr$*jsBMBU>y4>qnBSb>AH0oa0j#m zc3jSDVD?RrAz!_z(ClKesX3Y}+<;~_y?e=T36-3{YC~d8zNlgKp=o_2%aD3$;HGN& zn5|bn!YcoIAE4#G-J6q{nhgGq291sxYIA>up0Yb}M~OMn>QrCE$}mNq6y4`n%t-$m zh1(d6dcl8dGpoR$qFu7N!vNFS_vL!1PdPM;HAO*>{1H_d7?WJqS-O0^=>x|oP{-rg zNe9#dpX=fpy!uPTpuR@tJxM^r-kovZVl&$$*yd%;va>Ss+CcutAQk>XV;9UoiA)j5D$Ld=ADvH?dS|cnsV%)YjOtw-F!}ULz^z9P0_cDc$VCa} zjukIQA4uMX^pCwUx~DV%>NN8l7^q0=dcxuf79VC9eVOQsHs}t{k(v+cS zd)sM9L9}0W_OWlLBUwu4j){+OVvm`kk9*V5C#$D?&%B3Px)>M~h5G>P|*^zm76A#~Pi+niDxa znF-M=CT(?A9d+iEFT<0+hFaJ?z5|~(08u6CWUlT^qDH<3KgPsM1D%r$4{I%(2F3@k8?TNnWbV0zCpZWI6ityqev891=)J@f#4{ovr z4%XHhWjQ^~&h0nQRk~lqmm?16KVM#@#@4% zi2;Lua$UT7ZH)%%B5)xxKsD5tW3{Hm1fXLB%n78_jRn^WPxWXpuPu1k3g5Ys0h%&k z){_SKk{}Kdb4qlKHbb`+#jMSkv-W=@n057K!#}TXZWMVNA`7-9L2G$#SrpH9qQO4G zFIX~X%td*2#Nl_tAsI~eD`h5HM94!sQFJT>#qL8NG#&`j<(KD`AuRp*sp<7Wn;*8s z@isSl^c9^3GQun20f6Z4j$s{IhUKXc>n=H9XRhv!KJZC;zrzx%ML=~@|Mz~6&WcIm zK7L{zV!O)E$plwmEY%67#75+O%wF=%vi8Ag^nE^iRzd4cgnrp`6^!|%7+IJ}@H;{O zTkBcE7r%7vUN_WKd@=3i4_0Bi@`aZb_?l8ow$`f!%{xb!m&--!_iIFd`-4($c;QBb zEX;($S*3LEtW8vmmkT+*43#XUtjw#z+(<&Egkt_xHW`Yn!iM|@UBxqCP7$E*EkPy) z`k8lRel3Q6ti_YyR1U+P^{%!@|0KFE7R*b28(Tk{g!^ZE3CORqdFP7Hwyi&$rX3Rk z)KGj!cgOqb&(-ZOk;u?jdZz$Z-OY#4@(T|H47&r5i3m;{JEwXL1i+ngAEC{dCzu11 zDbghuRE0Ef&GF*ybKp2aSozduGK0<0KTyV!)8oGlI(nOn7_!{6_AokR28%y0GgPeu zgtdo>%(!|;WQ^PYD};Du=pS|JqfD`Y>iYE&w-2N zqHe47)i*PcpLrm7+Q;$gwjT`GS}(smZf#7rF1;{RLKcipbKSEo=OhUk7X6rcAu+<5 z-U#%K0DhZcI*r+`FKoU)#RhS3)nV@X(kJ%y4!Ry-NY@JY;GpO7RneIkF!iTvNYk(A z-<~Q%cF}`)?nX;S{!eiai?GCLNH+Giv>{(KDdRbz_y>ZW7wieDmq&1{P2_L@LN2}D zz9@C)&21#p9>8A%S+Yr&A=I3y2|rU^OpX9}#xwra8Y(fH(hCux#j^K{>5uDl07ui1 z>qCOPd3@aWGd|Kl?k`{T1taXwWcCz)?BC?v90+GNXc{c*G(U%AY^KJ6;1W|`s;c7a z*8FQ_B?^d8UJ?Jh=^!_XXdX?oSaBHwVoEjM>I}$)#4(nd zA&);5F`g+`RL4LQQHg8&u%UZU)}P7(Y`?&8?J?U3Ik2ym(O7SmYJzybw{$i|{FV;i z2TFKk17JJ;k-i6~7GoK)k^WiiS$~z=Bg`R);zu*3P!3rF zh4JCcsbf5O7T2j2pwx--BpU`~d-(cbQ1axi?reyzk|Czm1M`e4AhBjVE-wxez#r|{ znLPdWn1r+=wQV^QRa}+q&AAgaQ7w~yyk^<6>`4;A zqUQ0=agsa6c7b9M`V46AChp zZ3@Jyo3_4VE#v-e_|ZfMwjLoo)Zm=Z$10}jbr&TXRkmZ@8Ell~GCU+>y>riWlFe0^ zgUv8xBtD8SfcYB4W!bG{TsPV2?6t0w$#3ek#f< z`*guvN$TS}`Za$5|6}bnd4sEl2;vpyZsL5TG$)1FBaW4I?{XlO~ycw)#gSxIX07@?MIv8GmF*A&s0?f7uGm74WnoeYW$ZFb2f| zovy~24N^1#DJ4c3B{bwX@+ybNnowz)JWn|OjBcy!AZI|G(iwp1*8Rai)3316O80@C z5>DSTG?i0iM7H8ljyRoJqv}Kd7;}If4b&&&Dz# zS@Vu~i8y@!)2KZ9bAY8u8M9TGZH`E{t5!x{Hp*1$W)8>}8xUaTt5~&Sl08f^;E?cx zjx&o54exROwI)TG#UbdmPdyH2`Z!JNdh~@zCU$-tEEk*!n0ao z|6zwqKgjMl05zRk-hIjiuHTN}{*VP1U8_L6GobKB7CU2Nb=f9q)J`7$fM)LAlW(uC zY45zJqo@1Mp1l?=D^P^e-s?OoBS-D)Th%{x!lelX+@=aPrh?8_71ueLCph=(v*ZzZ z#cs)kyyZXhll5a)Z7`eo?Z%RjhryVYzc_+!1{3>-!+|ccT!JR^sDCWCwp7JeJ{x^^ zKM-%X;;x*za3_=5xoti58hA=bgH-DfJMu9C2g?~P+a)4AI#Ta@(VU6t8F=M-3n(NV|BiM z=N`=kKF>?ge*<}DpZ73;5!q68_V?tc(`ymmOGn18$EO9xGnfj;#`1!%aKM zT!D3DY&iN*B3Yb(kowYha<8YeLEHK)oHYPxED%M~GG+bi0x<$n5l{l|swYENJ&&04 zxPAZ0Tx6|TE(5qSZ4Q&Li@$L&I^VU9B*pRonYB)qV;AOXY&cAsYQHyQ!6K0T=GKWP zx2W+NcDsq(i@mS-vWlbcp1PRvn$q;H-*G7x+u;y=MZGoC9gj@60~y3qyafb;SeO#m zG%&H%QV-Ox&Kt`|r-=OUJ=)?cKK4f8(N4cMQ7;YG*D$2qJh!5j%yQW49@@N2NQL@A1h8{^GASB5Lvn^`xHP6#rMUl-Fj8rd)h zuva)uN-AUm_-UW6y;vAsKH&=Mp!8k#aN$}KK%O0d0s;pQDn5PHZ_i$cFa=pzpzLvK zgH1b-Oid|GosiR}Pt3%WI@fpFr;$(ju>f|=ldXV&oLtex3DKwYOL_u55g=N<;Jd*r z9wH~ice*}5EQ40qUCe6-Mcj~KyX0DB&0&K0kv8k=SQ%oZO}5%V2?5;XDc)`R8B+xCh;4|EViIUJafn8tn$d8Z2)B}QW&7@BCc&8x zS|^*Tr?1qCZ~JE`-RdUX2!LC@{kiT&5;-fkx@^=oYdqe>Pu-9rJe4ip`(LN6PO8rd zu|CKve&@W%GaJDpFVPm97f6^n2Cu?%>9-{|3!3 z`qzeJ(0X1vOWu#zYntn4-Y?;yZ`75Re;p4WN;?E-p5cQ27 zhhMX}L8`d?)Oluz|4Dz4PyglS4T^RZLP8d{Y{U90M#sONLkR#Mqdr=O!~b#HVz!7C zVVzC@+;)oVD}|M7Rpv`Zgmyh_--G>M-$Hz*rq+(zdZpV8yN=$T?7kaQMu0%`C#2Y# z?0iF{fR{eJ6@Ox-#S~!UI>gZzW&8+%dE!RN1Tj0jj-Idu>i!cH6ydpDvi!X$GUPHz z_Efl>fmPE1&|&<$^H*91nQXd|zKnR4Jq{8jT5ju#=pCw&n6y_JhkV1eC;%XmL5>^d zs21mAehyGqG~u)nSsDs{tZ2lo=md+5`@1F%-{Sr6jtx9o;5+V&Au)!Q^WcU)FsKB8 zC5oQYz2gnJJcJHdXJ97)XZIAW`!L2SoR|CeA}!pLr{O4TPO7H4owBZX9PZ+?dHc?P z>Y!WgEbt8Xrpd2Zonb10kVg9>sCcHq;3p3zDcqn)hutWE&y>rsj-nyF;C=*DO9jJo z4S;g89KK#09XsH299X#pA6AP?3O{}E1dgOB&TRQ8k)K$b9hRQxD<^40qm5G#rz{GE zRa6T^o_FDpVL;VVK|Zv2xsRg!^Y$tG8C9||BQU|+@*K7lcN=paf|ciaTMiXD15^oY zf|8-OCQIG{o0P(ka`J3Cf|u)Ir@Jh#by3OBMK5uJqf?o!kVthzh9@=?S1dGYo{2(R zyTXu8yWnLg7c$EAVd^;$ zXj~LX7hUwytopJjT&r6dOBA>19a(Iv&aPqUae^PUhyAy|YYw>JMg z7~d@uOH03{7MMn0u4ub%unIDrmSqblgmfdy-39KFX>nBUo36l?UlyN8Fm8{iTTQu- zW*jRGo%fWkItAE#J0$B)o!1{gQI(J~4OeU%pjPrXg~QKIP#}kb>~X}bIMRhq228P+ zW57Cdlz`kS5n0^5ey@jV!5XYU0DDlbKM`Up!3%GNaFf|Z7-;a78D2|=_{(hnI*x25 zpv+z_+&)*h_6&#vYiGdgLQe=6u2gfB1eaXS^V@@6xyha+pM{u}WwQ&oeq_WGq$A2X z?{~8Yl@`qEsUG}nfJs}?%#q=k(eCJAU=&9HHxN%Hj5yebNDwYGTdFK_qFc*q27_h? zBPE@`fJD8Gi6Z6_Tm)d+fS%k=lo^ou7YP}3ms_Mqb~}UBRzC0$O_GokT*5lM(!F)2 zCraKxv>*`LRKg*T&aeSE)vNJb zwQtSiXiJtMLEMPhsl{BwRI&mkmx~d?HOx0Ek zX?ZfRWF3Bm3-Ngl0mFQ7HOJ^S(CK;gHGarcDq4`ue>&2Xd?!?nW96pc967qpwPG*p z=|nT|RzqnsqS#+BsTC_EGF`=>oWbNacv>)r1j9UKt0bcD1hJuSJ%H_VabL;h(WNl1 zHJh3_nj29%RzpVD`|(8#Q~Z2Rxs3 z;koJq%jTEbfLP~XizZ^wXW<|6$0`RA8h)GRp9JRVkayo#+lY^sa! zOkiV+L#Li7vg!&a-4%4ZuXCBBhckyRG&6M`Z0?y|-%?kb>%lNBaWuacZgnLCp}iy7 zNf3J#eg;>`mqtv=n>BD3K84fLsxLL!Pi~1O63%i)um}jj$%1C5^M3Na?m5fMLhKM} zi}R&+a)31SLOm)AhR*GPu1|B^U+X~E)6PLmH@%-p@S!R+*bd1Rq2akRJ51jaTzVK! z&2r^#L=*+*p{7>_mxEj+uxwT(XFW*X1bqNt=Xw<37C`7sJ+AwNjRqfUXX;=qD`MXL z!K`9*Td`nC``glM88S$;F6n}vBr?BOZ{6z>7Z4O-2}bR5U4k<6QaweRm~VD)`~9?d zOFzSeM&Dz9dWBueZI*qRl8Z-YxK+c9EhM1zfT~=j?FO>RAcqyG5iSTvZ*h=xtxxNR zmKmrB0eLLp6n$~+41Q{Wr%#gP>JxyzWIfK0w2`$Pm(9TfRBE}i^rCie_LmiiL2#(2 zKD___Udc`M$Y9w_oBQ=r8%>!NLNj=4TkW$ckj^7=?Sd62;|1r`d8jqeT!~FRjpL2% zzx-5P?=XSK^2Ld=&ew4yHmONWMRie16$mOZqIC+4N#zp2at>%-2>gm&-24ilb^&{N z8l$nsnA+ob)7e*yMAxcvjmm*_!SGk?`kbk}iW*a+;ZugFNw6STYbR)AJijGUb)yry zs;NtJd38lk6Q}|kFxO91TCJB|o>g zb82GiWvph}f&KVe&ns2+Ep2h*&1tsl=RN%=50XWLP3pz1m(pnj9@B0qv%VA4Bd``f zUXOQ0=K{uvv=tr&^2uqC z#e~I1K_G1*cg~hjv^cA<6~tT#0JQYK{b1TBZOHh-hWLB-D^{_U4tQj_iY)*M2MJCl z$(KnR{t63}Z#RvZ)j0izF}O&e!(D#=LR|PsZe##X;?*x^IR(ilq8PU~aL@B>*Vx|a zX}6ea5v<<5S?j{Qvvq;WG>G2}zmWJ7nd!=FBg-teF4o3F!=XAfF26g5YZ7kg%U|Bm z!MWan{3Cpru#cGgP|jd|z-#~4awW~CnSQ1;4rVTEbo^N{z5m1BeFZfgw(X)%C-l%E zw9q@!rD{Mrh%^ByO0S9m=`}#;E%e@dl}@PAI|>Sdf{KU&VnL*%X61dqHM7?`+xvXy zEN7WaX8w7u=f1C>J(GR1*L~V&SnGGDpYNtM)Sl72|NhZbGTiLPDj#zLWa)!Jib~Y` z-E!)Ic|JP_=_jw5>P~*K*NzsKofGXWO83K#z_$f#w4+q~m>#J8y)i2hWlyP#Dv#4Mmec&hgJl$%T)=&Ru!W*A#XpAP^^ zGPW^~M;CA;9jv?;VsCTr@wZtY1_pB(Jfxt+d=Zc=&-7Y?#C{!fG4&Z7nR8>_@8Pmy zxc|K+@3xtOODqlMe8}uWE5d{8CSaPSM@sEbit2Or)^H+u3-iyT;lre6z+kHvYoNW_ zse!DqZ#*T+KBGQyr29w5=*9Cx)KBLrgU9DnORwLgx#CUh0-zekbf4@i)XHhOMp22S zA9x~v7xPbM`lr}*!6-??vKXT2%dd|ws3dKZWWQhjP<(cL=k#}#UHTE`G~zk_S#UJ>MfQV>+{lakvWvoJ z7sVegN{JWc+?SQtLbGmO)~27%tA*A?UN&DdNcnKt#(mXs^UAXK5=Ff1D!XDFj@N&B z)&7%(x8Xx~TvWuR64qc_;#`^WKaGKuNhDj^j9JBPxu)d zpYvHC@s^t1zlIN9A?cTdbYk@IV=upn{Sz+Y(M{sfwW?*!^`UDYd)D4Zb!nzj!(G)$ z?FcbGK5@8&B=v-p)c?C30Y~wlc7zHUCbj>zBQ!!N%1T^aeWE)ENnHb7J-tZ@-QA;! znfRC?+`#a}Z0h`Yci;cCBV^TV)L{PGj-P4}x#Z!Src4h*Bn+)4Z)*M@vgr+zP|vo1$&t3t7*s zMrl(7F{LryGIxk76r(sTpexr-T?m(Nt-psMZQ{v_F^JmDb$Rf_8N28fBhyBRFOewG zJ?E+BwONh(LqGTe=$^l}i`!!De(WDH)i8MH!@#tK% z4)acm9?vcXFElPxs^@Sf$->%5*6V#SrRXitCoWTJ%jc*{q^~F|@0Z7{TDqf^)Mf~3 zyGO6Pw`LlC!O0WyI_@!4q3mBWQ|KaU+DaG)jz$IpWPXp;UeA#Fl; zu5U2-cY9gL{RHQ(D+zs*XPX^cA;rjrPa{4)xUm#PHE~i1D*AmD$7EqEO+`;8Xho^x zI>{4*wP>zL@SZ*C0JvP5SfL_B>OV%a1;nmF9>*Sb-Q{@xoi7G)ywc%7W5@O2(Kv+RCgD1g9}$QTA}GzYRDY4_Na6?(^f=vkD?iUZsh|omF{)mvuY=vn;GtBsuu1Md z&bKvU=1_n^yH+cwke37uD~|xz{(c?&m!o#(hTQ&+xp74F8~&kkS5oeCLpw@G4ci6{ zu;!r`$oe=^G zp)E#?_WFWQNn?+_Hq<7x9O-LiVvV?Md@50Y{$*gc@oR?u9Fe)ss5{Ek+w<;c537jj z4JnYV5469BVC;>fyYnXjWHp6tkopa)V2@@rZX3b(aD0@SXe-o8M&9lkNatak=@PcT zO7Iv(QqGKiq(Tw8u?QLPJ@o`jMqijI_kPoH@R@HVn)+XyX$;$UD91AG3WqF<@sugs zK@Qhlzn;WFo2$IuaJ7L^(VQtEy~_nA~&B zzFqMnFVD6N#&$AI){LX}Km92Wkrx3RI#Hlh-}-Yn7S>4d7V`RXm@*j~+=RRCMG+@7 z*~K{>9?G)PO8YVJ@qVV&Wke?(c{)G~&S8ps^=nq1?SwGYD8(r<*_)y(PGuPaIYw(j zXBmc>1*<)Z>wznk%!0?Ue0O0!QO~WvNR}=eQJXbl=>qsG#`@6IjK*7RemxT)0atzz z9tOCfBP`Lbf?Puemlx5M#*zq6{PYG($LkDJ%J-sjI@Xd%!#Cf5=nag^SD%4tq{!l^ z%y4a$G%?Kli=#`i*gq>U1>@mw>Y8%#=ETVn8jUPA_{fd9LUoFTz$S^5DBf=L81glu zg@USztcd>};1*koG^QPtX3Hw;NgCfHk3kgLj>E<@Yo+ZUA(-m9M5#=`6bYsp7+nLX zX+4AamIv$clj?9!9_)CV6n<%94fPiq|66a8eQn~)^^StuAXF5&LHpv(V+VHMG14Jr z{jJrOG>h|?lh&67GW0)buv`BV`F6Hll}Y-dCs5f!z$yy^9zAoSX-B%h8}kRj$Ui{hUf)-5S(=pz*sgvH0k2 z>8-y__GAX@WwoowrhYF6mihH5<MV(Re!t1lPLOjwg0rcEw$yLo59B0x2OLNZ3E{w-d~>fj!?H^;te*pk!OUN+pWHN z^P6AjSG%64w))i>y!&c>)*luw8qhoc?nlVkz!vq>plO5qbv?QC`H%EN7OU)>AU%UY zw_oV5Q4UX(vJ9UuK8DtDpfs43CPt@(d_6qXq?lnifbyZ-Ft4mA_{E~P8mEliQ(JQ+uY@c0rB9U|k-@qpq|q_Cn|LTp7?*|o>NL7x$xDGkoclCi%BnN#8U3I+UHM1N9ybu=RA&W=uGIvq2#`776KGxP_Y!Kk5IH%a$y)LW#A#jizeG%u87D zkZNGS(OleQN)gue5oJz%j)!IUdM~}?5rlr~Cry1X(2mN<%DjbJ`TmVjMsgj?H!OsQ zP%b!WkF1#Y9ccsld}rS%q*kXt;)|mh=Rmfa6E>MOa5p8ip>9`$&$8u52V5y^CoVNV7epUqQ$#a94~LbeqAaTwNR#6U8U?Tqg?q z+aRFB^Qn#|$R0X`Ra)~=TVbFgogIqz_~@g z0zH`@A0N>Ku1lJ}J)(U&O?5b)cpJ+%Cqx#-Di-$1dwPl|x02p3C;)=FG0sBteJ$m6 zIUWeI7niq?Ou))+I0_6@sxAeZzMvyxM?_llLn^eXyvorA=X@4R;Q4J6PIUlTHpglP z&n+K_sW3pK{CY1o^0>;sE8kxUD8`HqvqUHI)5{Iy(>R=@ZA3`{B^<^vq3;O-NP=S=RR z^0CiKmBHmqD?yJqLt>PUN1_v?_==tw4Fk?$*$@tsEU zzU-R1^tOC4lnFkI@|jgmH^#}x#On}eaY7o^Okh%f!J)v_7ARnrsQxlOIi?)o5}bTf z8tjps^(DcgyA9TErFke#olQr?ao8;94IN;p!0Pc=nLkM@qAtP+_@9*tJf)oxfK4c2%E!Xh0(xE zx@%`J4fz-i=>l<^tpa$Kn252WcvgZ^VtD1ttar=81-IZ&8p2}18cRxn8YuHscLtZ) z>^4hW%AQKX2*hfIL3x6`lQpWF)zwmtG)T;(R0Yd5i{mllR{q7j*C2CzE3tpp)}2M3 zT?$rj?1J!gv2sP4XR1y6algm-USR{CNg4E@0Z$!ZBK5?ZB5mdmIxEIMJvz(aah6fV z9fGYC#F^0PL6v|$=a@I(RLrF7__+EhFV(7?d=LYoxTmUlG{2_z`j8tXBiq8$C|S^qD0~$Uk!4`*^dxa)4L)b=mUJ9XiLu+B+ButDg|s z2La3vqG)<7AMZfwY${lMl3br$(g8-jJWlNlF*Vojx*0Ukgipx{eAcG_qD}t_0wvJM zy4X9MO4Lnd2?$CS9FnpiW1&&USTv(@h1V}*$65j?Z-7vxSe~9{>?7X+VGrm<63wHpejaAw-;$~uyp?qqtT%! z4M(k62e#o?lJ%1i=Uw#OQm3|*OR8Q(!w*l_HRZdZ3T=!VjD6PatVQiEXKf#=+Ae(C zkzV{<$xo%G+Id1dMB+L`2Rg)?O!+oCq}V%UR66BsIu$}Y6^lBR2Rc_?`2c?=S+W1|?h@BS`2&Xw~r_kPtswc+n{1ozp%FCXw ztGy2WF6SLRi3ay&VRQoIeKH8MMlF+92RW) zP2OHskLL*~`u5ezD}jbK#?ZkO7!NjMVeoUx zNHlk(XR_xj-j=l5OZ7oWieTnkl&HcrSrV>#^AsMjx8-8c{a?0}hekL-m9Rmen3627 z^hhFl1>y$2Cf&xz8LX+pD08zT=$1odEZsFfzvoPkns*ymoq$!iEn&Is8(Y>sf<99A z3|uRg%XFEnYEfD$+n}?lCHW}LTbob3vSj;wHM2#R#WjGw<*?&d-o!Q;};EY!Gh zrk9g<4-I^y#Iz^jPo{%EiE=LK3zUzM$&_UDd9J>M9|fe;^wC~j@Xjtn?kGvRwnKu$|1HLzVjB}Ul0_ASK z6_-ntdh&$UG4`!mB!6cJVTjiFFYRL=v3o4Zq^UhGR2pme=#-kJ3oq3oo`} z=*JgY!ek0O#NdG_q%%a#>K-o5VJ(*MUP41K!;#_OXwCX>v|3+;RQ2m)LvNP^0Yah_ zmyc>ec0%!5pkVd;Q@g-fQ(^p`jOM@h=J)DrZ%f_;l2N8T^u#mR)kV0~WILJg#UaYR zem9&}2R=Bin(W9~U<6Mdm=o~VUs>G3v`qlPscWeB3#oJ)ek;*tWdg0R?5ORN#huu7 zh~#8Gmx)vY2A0{AfmURRJIv>3UUxov{ekdF#4c2kb?1v9naa`b%b^T1Mw;)O@5Ju& zg~+mGozwQZGvC|wb_L+N55c}@!0oou?ved*sa4zM*#NR?whGc?pYiTr%7lH_WTr3< zvfbJ$;@H}j630Ju(4{>2>?S@-n4bPXo%;eAElGrafb!m2a*$O6VPg~&7`;W8J z3~34azifAW*mVaJJ)WQY_@+_vXzyMxPnUdvOJx0vsXz2Xr(I8J7xy_tHkEqh{!N*hId_ZjJjOrg^hidtaUh_P#$kW~GnO zyvcKAd3eszKK^8sm4^Y`t59C1KJ2d5)hEVB*9YTE;X|RGs_q({kreGCubUK;l17o`RNRE&$s&Ml~A)J??`ebU^R zTPq6FT1s8JvuiqAqID=M{+~+Q(VKb2q)JKO^>n0FZ|{7Hsp#q7 z?XBKVts$u6eh-ZOJe>Gk@iXHh8ctX@O&lu-DnKo+pN*zZH?*{G^ z%xoSmq%2D4M!=bp*FJ0GW4T|4^9#3svj1#2D&0j`cL=*r6yr{lYt~Lj9?6(x0xx~| zpcuoZ`;Wen0YUPfs>$Iku!9Z4lY^<|$GtB@bHz5ke;**+fyz91z=y4dF??F8GO&c5 zvHP^X>hxNp(HqVE`K~9K`%c80mrp*wYI^WucUG9hDF;e-emcSRh{b{dD!RUJRNWOhbtQ9fArf2MJ!58xQ+5ou*@>EHc zlSI7t?O6E*tc!rt%OITT^%u(7n@V=2A=;u1)qSg#Q{Pv2~f33X0<|_9i3nXrf z-lnauZT#BUF!u3l6J`V^Y}{)+;bCcVF`@a`_Y1vn(Qv<0UHz*9QR?Oy!KUV>dU~vl z^VlVybqmp1pQ`}6e8aY$blep^gId!UmBmZU^J}j zANHS?lq)>}T(|*9Je2)>On2qz)Z6Tr5vdPVr$2I#vk&KxmeElK%rLGULK>0 z#54f#2h;6nZJ%lsM4iG|3asn7pn8D~p&JOt3H`M-&d~rD>RWl@#sKNI%{6BPcxmDz z1;ntF1MUN($d`sP8bdhH#xZyB;ys6={QN8`+Gib2e@$;Ne2S;qOJ<$TTZBx1Jp+`^u;`A} zBfm?rU8(>3dEjeCih1(qoGkL2^Nb_UbXh?(j7zbB+ih`Ch3fg@YUK}`h3E2lo1)}6 zA=+_yuk=c#BV+Q3>KhRhv+BhT^+fXt%}@+N3C`H+G!mJKsP=qJZ~F0^ljp5L)eT%gkb95^gLfr5Qc(WG$2 zl^EPxxD?E>?Rt;VcKME{*n5+tW}}Vb8MTwczMA?GixX_>iI&=R!Rx` zAOM+^2)v96W4K^W@=}q3d}_m9x?7AFmiYOreC4>EsQHD%2+$hqmtvrARZ= zLQsx{^Wm)ZFtz<_QIUcjXWQ>fvq4y1Eoa6MZ^%~5&xot54*=S8vvUay^wHOidkTxAlt$clloGZU zycNcUjD9go3@X0)M?E3-4`oxZx6MIjZmaOh>+GQ_TVttizY&*jSsRDo8KBRH|DSka zudT|PqZ1s;qAreqlkSK0>EdJ+mZF)*Y2zw5T)TD9ma_IpJoSn%Y2r-ebHj#&#fp=~JZ1AKF5rE9$wFXO|n=!D}C;YV(m^Hvm z<`6tr1y{q6e$>>KZ6N_`U>njjZ5xc#b+9-ZU5o};+T&gdM910#%PNT2B6APzpr~s* z@DCF>hza+&|--u7e|l;eA5z?`vXz10E4_1UudQ46AWB zV;yMJt#DXea{x3&D%AB{-V&`k`&(I9m(6v{u|_Ji&oY5o3Q6e>qtS(X9m?Zoq`wLP zAbaty_SbOsK$lmEN-n_fnn^hOz$7WK*lD8aRB-MJDPc;A(&dQAS2cQK*54NQEC^om z+ugD^zH=$QA5MnVl^)ZGCszPB&_^HUgxIwM*R5iU0inqlXxKy|w?opDm83x@lr*;3 zU>fJi5>@OToTmsU%E0D6MK%+Fmd0Sxo*uQ8R4i0(?`L=qD-}VDe8rs(_y#`& z4AM?W7+R+w?L8wJaP)dq!ujTm1**~)H=BA9O0&$7I;6u-s7^IN*!_n8ngjJrX0vcw z`d*r>2Ta&qyBd?|wj9jicQ?;H1GkhY@g}MKSGvwb@*NNSdtaJj9dZy>%IHFVNs&qo zFU@L^5*}!eAkRN$_9Zn==3tsO&x=$HJXDR*p5y|$zMRWNu^WYe4`gcfF3;-MKf0ykQdxl4RLm*s$ zJXOpDe6eb{2M_yXCRN{%ft?|@yFiGq<8KKwtqB3_gy7v55_3*;IDg28-VmFu&<}2o zIe>EOI(SAtdCo38JHL#h6Z`I!!^^vokMmUxZpG6y0)996KXd?!;3Kln&1wtG+z`?f z=rSf0;5{Mk+wc4eeCeu3;ohp@St8rs39W8EsOpUg>)Wtn^8DLtrK4ELSK-8HERt!7 z;|salrO>ssHmI60aKy6uH&acr6!C!!ib)0)&YTt1`Y{$_PhqJHH1T!ng=prY9M&MdLLN|d`-<##3{&c+dS}MH?}zzq5PW-K`X#HNs#Ufnid_$wG-WtK@t4VUcyYI;U!3v0}_^)>z+VX zy;1qZY0W*w!YHP~O=?up@k}A*qu~6hZ>Ok>mpTD%4`bYz2OVjv+*FJL zTHf%`bR@NW?{Ce^e42lk=)5-;)PePe(sPizJ$atn{L@-!R#4){*QcWnPbKQwc07gY z=bw^7Ta;-#@<4d^Z99s+gIA@4eyp88v_rV)zoG3$hve1&5!x29vxNRPv^DTn+W5ai zTN_@%qAtUYE~Be19{Boq;n1_&t|gtRMy z%vBFwr7zp2FE_L=zlcAisIPdVuk@;~oV~wNrN7#yzc#eLzKGc0IMCm`(cgO2|3CEz zT{Z(fp#y}Xf&PJk!Ht38tAP>r!7-J=-@JVjp@TC;gL4Ce3mb#auLeU0`j%9NR&0h| zhYqdV_)HZIZ5njFY!cYm7?P(S-nJS35<0wBH2ig7_}j*CsLt>Y_Gd>b&;Feb9fv+U zFM4)4@a)e#>U857fMW!tIs(}oBHE0=i$^Fp2FM0SX#R{KI7aDJM;WPFnZia{i${@z zqa2%~Tz^JU9AmtyWBj&brW+%I#bcs_W8#}*l7GgwfzPB=$K`Ct6~e|9i^r7*$5l7S zZ~PfI5gSLVPH5UrXopSc7EkC8P8g=J3vW&sb4=230O9{paiFUIr-~C#dh%PgGizfi7xu{^CgJ~T3&iV!i-3yk*d=3HOVs~Sw$q$h{OZ^Yhxi%M=&LyLYGLq5y>_pAw(nyX?bN`)6D(sTR#a`Iwi=Fgrm8|mu3%qNF37p zk2_I`SuGs|l4nHHh)Cek|7b_a$;s#I@iWuYSKlX;bnLvmyy}`dcK5!L6sPs|HIjLx zVdVVh&jE>BA`wXc@sE0CoF0Xfpdu1=^yl{q$yEA}e6-tdWaCYulrTYY*!b+)<}MO@ zMB`Hr=@;v^S8Bx~ zBt180+MTo$K*~uRdL28{nZjm`wzL0}UA-`rQ3Kq)`zb~3;jSPKr3^XnNqJS9)#Mv_9~KcH2? z)gS~N{JlmJtGQh?l&!wZ2Z6+Gs=I9p(S?yYDB>f@eew}lvS8Nos9X2wG7`ySA&y3R z&O$}EsOaU&V^|8}5g~L(nsFL2Z8}amugK*%W8MjvSEGx0K#G4-H5vo3>v;-xmj<%%@2w>%_ zZ1^mttKknPb_rN1z}_y`aYkQnxOL0poa+e~7%Bj8p-IN)flNdIG@y&i_ogX4$#&{Y zpfR_Rlj=Y#-EJCx{VB%6UT zFHFFyHd-d2&>Q;;S|D1QC#mF-L9UO5lQLgt0N@SH%G4<2Z@HxU5uN#$M$p}P#53-< zqWD4YiYPJtYfr?NC?P|K8Vl~CxZ;>sG|tdjkc0z1lkKk8AzF?FvY&%k&#`_~kMtYo z{Bh43&w9$K-laN=d_?l2S1(0Mk!Fx!ZH8L!_EtrcMJ{RT648J6Q}a8vNn& zCFm7d{OtE+6gTdH6Q~Y>YQyu#VbUSTYt809*R&wm7n2|uKw+uPhFZh+f=KvWZ_wxA zw9T8q!%C3B;lA;8f5ZmBw9#5}k5J~tWlYWGWUUYb)KlvZ$5S;Lx#bc9@0=2%@TTzs z9NUOM!Z^>rPeQ;QL%QpTaY{KC1n77-5GT> zk~K5&^Dj8U*%p>s>K^HVSOLb90CKDVa?6}smwLbL%ME~Sx>TdsCrxCXc68Xc0FVQi zs391oU3(Be0mjn6P1`k<`E3ZH=|%l!WF;7xMCh-YP}EY1`nhP%N1(!IHS9vOryZww&V=+7vY7IX0GwX`;waoThI5;h^p%OI5g`;wS??SLZDwJt- zmmH6QZKbvp=4I8YtCrZ2rFxevE{}Pqn)gH)K#wuS1uTTI-)_`j3eZl?Hf!+qXmpL} z5-YXp;LSqrn3wjmWoa@s+7Bg|&&s@RD?E|1Rco^TX!*LEv@4EDC9QW~U&D2g#ts*o zNW0>hT}LNRvDD4>@3z-Qk*945x0@aR_t+smwb@zTDNoE=82`8-(fr3(rcv`%-r{bt zQ5kmL*5C|57>L`HT#g;7_UP6hl|&wg(6U2f*iMpZd-_9vc8w~eW;dkc+hZ3nYPIkb z4Qr=;?V7y#<@9-S$7kyx5=Ozu1y6!+-1W>JsIYU{;k&qy%$!iM{gGDRH~LOI$c|56tf8H(1r8}iR??-GId#N#Qff-nz@g326 z7X_@oJ$4)T_Zja(R=h=NL%2Csvgqr-@$t8YNjK&2)A$c!j&`w2nLzKuL^Bnx)Bg2Q z$bFyK2VI|b=X%Fpg};rpGqjbq^_iJC!MkK^zC#_%-K1dt<;$e{fc@H9y(LU4?kVes z6y5cCVytf<-IpyEPPV?Y6X}fDZw^56HR71zg!rR7lyo1zh(2Qi{stOsAmH3bcx4`=N=IKh}xQyBXS9e&OYVLaD(QJ-y^(kK$8`P}{IG{d`Qu6En?sOg?9t z6RAHQb3@)!W&{qMaC(OZPJH+cJJ&v6qz|P0^MUG(1qHx3Ax`DbN9r}^2_eZ}@v@uS z-~z&w{0)4pTi7QKmV{~O_90_6V26&#axrakKjE4p2amv=D7dEm5#FZwGtV={IkESL zg-*pg;=4*S{{MXAC^>gIpAufgdmraKDc&YyQ(gf|K}*4QJ6zY4m#;@0H$3Lt)-Jo4 zpEe4JTHx5TzkazWQhJn^9RA(-gT)44n%~94gYV9}mkZzDV+Nmu+kgr*R`*-|$I>~! z2Tv<)eg1crUG{g|(`|4kk>;ZBp!n#))4}bsh`?d5`$w_yS8I=l{Q-mF$KE&9_Cy|^ z50?#{)CehU-z>W*E4opb{$S=kdq-#@&hQ2)hq)FL85)1?%R7A*ZW=<{v2zo5LGBZw z6ZatOvyJBEOG{mj#>a?Vw+H8hoBxPED`;gN>))i=&>^1QrVRs>NB&H=jO5rN29XxW z{(g>+4BLAadh)Z32=a;CaEm;cj{L`h-J6au+A$zo4_`bB-8_r9Bnyj?g(J>I)7_#d z^25mYBB=MG$Q2`CJds>`5l|ECP+-8$b;LiGFj=7}QC1nwbzSP90EvX~1-BR;c-Scm z7M>qL4#%k|Mgf8leR&b2VHd#}KGZsHRtUtb2nV?bK!PFxQi{gwxU1=CzV#>}R@^(C zsNQKT)GETsDhx!3u}i>0fslA!(EXWM@r1}H@Hk6W?%L~$t}L;PGx12pCno1Hgy{$? zcsv;)kX0AqqZ=vU9*rjMCC~_mdlQW9XJV8S;t%{n{d8lm?Zxt+lhUMwgS@yZVVnga z{$5aQYIXeccpQq5SYs6q7LMg7AWW>1$v#FR>yx@z<6W(yixLpbgd}6Dlp9ub7K%y1 zGkks1@x$(M)QYi2hcx=EP)*qhivzOGjmsl4RJ!NWbU7e8AU zRX&qErJGhJoLXd+`hE{VI8WNvO{hFidB>8ln~>xIm-(EJ7)?lP-itqX$Jek%PX)$@ z?xn*nQo#kOA^GvlvuWe?8PkgK3>T?<1jM;esK7F#Vp0TzU zYIPprCN-M%sxDNRC+7S-Egw!t#+F6bo@InXC?=+}Zk>fW&N586(fQ5fcr}F6 zXXGkY$E!>LH;u!byewp^V)yup69Qb8tPUp}u-=8bzfNS=tmi(C)# z?6&-Z^aeVY^_*uxIjSCce5Bgs+0c}IhFXsTtVpIrMc#T|A-h9fyh!0pJ-uFVzUG_M zvWEQb{rse$qFy})4mhb&G<^7aje7KPi^ndC zPRSYHyve(|D4!=JYqKUu7M4%##AzE06^lXTH+hD_1QvLPzx8N{};EdUZ zN{ZPUgnEV2Tt!ts-U(UV(U>Qw*TnL%@=kDax?`c^gU%jDcN>*X%Z+K=ATdC+}j9zdB>aq;;Hs_vhovBBZ;@1r8kO;71mYPT-ZwNy+ zDVDsRqq<+FaETZfj>OC*eP3%hDafcTNLSyB^|7w8g=c9uGqy2CMUE31 zyxgN{Lo43At@}*Y^187spRI<6z0t_zDe*iBJ5wFI1=M8=m)Oq<42h@RN`MlTo(AnF zT^B=0ZnSc4)ZAIG<`+#lBqaC?w@)Nx?1ePVB$dkhJazVL5?C$=v-v;m0NSBDYBE#V zuQK@ax(yNvZx6)j2DQlwBg~Bqw`rJ8@e;UNjo*H(?w!V%0O85l}{uS$M@Lc1i- z(>b;{^m3u`L4NiH)J_WMDTM$ZLP5YDt$Q&mGs#rJ2qZzmdMVc8JjfFbWoT-Oc_K`s z)ET@NM^+z|P}H}nh&?(_(wUEYAO!?RLDR$%43rU-0}%!WEfA8oGZ$BW)nRs7=0usZ z-q-wAsd7@Kwbv7|K@WqQK==`*bA`B7_Uini26Ab@GaKv=)>ca*MFyGlP?|~uHCy`^ z-Na{<0Q^C%VnJoty;%Rt@Hwv%8I>-9(E_rq62NpL zuu<^lK_#0iV)GONMnkb=qX5h(za{Braj1J9?9xUk80ZC6jHb*FQFf0)qyY{ikd9=i z)B*wgao{!!nblG!_!lHOR5*=v_57{z>6`oz>nUNL(wicM^OSUH^OK;ciC63}{04L> znE(O~p^GPcR1vC6!?2G)yAd#`0-$CC0RX&Xh*TqxUr_6|8HoBwBFBUSAZX)krn{*Z zTHKws0Tsp|kj1mKTfOYX-O1(CfJ1h(LDhF z`YlRd6#$h$Xl;$%!Qc~8dXB%!pq_io# zof3=6^&5CuwOd(7fY4<#Y^Z(ajww-0$9=~B80Fa z6+#LP+)0JrP(w)058;~U#`X*p7e`c^$3Yo=c4`R3w-qSQfLPHqRqym;j(MQr3^RPG zLABj&cii=CE?EVLr1IuHoVAxg+$oR88@#F-iha6GVEhY_w5iH@H)8n*06r9bbMWTi z&xDA0*1fMUy{Kx|bV~AxLF*TK2#g^@&TbOXKKHF?R$L@p>DypP_*%v7qR^}Or>5f( z5B(VIR`i;IDAf&pc5ku@=pBTNZ!%N^wdylCEh#dTt}?K*Q2OW{bogqicxd|R_GC5n z=t1BFtt8^>(9_Jror%~gOxW|(_g=gOWF*i4@yYx#`G0F>mx6py4<~; z(LWMA&x`6cN4^4FoARLH5X0KLi2Nk?WEniawfa& zjs=Wd$>O!ecWHAuAMUk?{%c$*XnXCoPFVr%Hi)OJi!Nw_eHKF`+sxjMU%=SHao-kk z9zQw)Ct*?FztOA|S#^@TzlQ?Xf0ax^#pfR!0($;Jy+ZncDG`?se_Yy@+@Km_|M9Mo z5}+13I=L73rF8bkKhYnOh=-Kk=e@w2k}J8lQ=h)w_)l2@C z6lx&W?Rgpbnt0hPHMA>O7D*bdkA4;PF9=I5)44^g-hm z_3t+&^w0M*YZ}V5LIQM&xO@3YUxkNn(6)?-X1uS9AN^Wsj@_>h%8(X|VNT9XJ|DUg z>O)-i^n0ZY3E}WFZCV$ZTcX2(jZ^P}>Ecsg(gpn%D9gY5VCt3fGh_F+xcBks3V9J#Fzk)XFMaOF2gWZDiJmwg>fR zq6ejS?$oT@=wdWjl=Ug*L~i%9X9C`wGm|Gs&z+bIoktkdAMBl-^>*Ntd*ed>5R=;1 z?(_|=Rp%2$bARgB{tOz1DrNe;IJ;pm=U}kD;C;?ZSO39!T`*!>l;(tce`qD}&*6G< zGO@2xDaMi(;j;~3<#$8?0KD|Pn!-yT%cIFDwVmtndcE{|KO6?7 z{Xuyy`EmS?!0_jy6)DGw7>EX=NwCY?kpOPWDXKnCx5)sN$7uz@tsmyi-$))0_R7yD zDnED;L434a<(XT~W0P{w-Ra%YWizJr_fi@&R_){US=>_KQ;xzDrxMR>kNb5q23(9h z?81(*!BIpmYrlTCr5=V}t`=n9$*NO#lZ1_Gx9y3b2zRm4>e*4VpX_|wpX#HA!KOb| zKRT<@ycRllk#a3P5VkI3rJjFJya{i$GmR7xJO_I>C|evUWMB4c+*a+r{4}S4^a44CQ7p2`KCJtV$-br|B%o<9y_LXkG)4X0>ql4K zX_3s#nas|TKHRNI@`iYP%8_1A8l$1!Skj8GTJ`2b-%+lZ6q=c(uSg84K-4fxzLYiV zw?6T&W+D9M?{a0o1qnSWyuLiQnNcrt5oi8l=QT&lb@VMKQ{6b_#zbS8rpyHuP5G9N z8|G>`ein;I6IlidLXT#jUj;FpD0At(^AVYQ?<=RwxB6{aIhDDDL5J$aLyBpgpE#=F zW?l|+3o8aBzTWe!T4eH_ByR$*L&c_1v*q%q9{p)JbkS0yot=3w(C)j!JE>8K*Jk*B z*InM99S6taKNRVyCosQnuR3^&b)?w(^C;Y@jY9t~098P$zoM1XSdZ%HW1PrKbEk+t zMkz$2=tc8hy}nIX-W$smIACw9Ov^F=3ekDzoC~oSVvNV-leYeR;_kp&x8jH*=#Y$= z=7AX<495RwIgS%*TsLlaC7)T+8M}dS8oF0qwu<*1AO5zs(OohU5ndIBn=Q{6iGqK7Dxs9D2z zs?YM6G6zYg-X?boBhNW?3pPXP7P;(&bEVYcO!2O&Yo!(eKnvi55B_)Iix1v|5Zgxg zB#TC@?BtO?vdO1YLEn5a#}70V?y(64>$ZhXy$CGS%V+-e@7cxK+_#WM-Ce9TJXlbz zvrSq^rg*Okc>vS^K!g4<#D56U003FS0~@jAV?x`NOG2Wsa~TUd7z-QrAb69Ip~P|+ z0SfLIwvD{mq61a9UZH4kWt7 zohxw$*@9!gw-v`vk#c_->l(Fh`&q;z2=CtIOWO31DhkYoTwxQVm2 z#<;(+@e#z!)7 zX7s}gZC(h+_}p@LIlM|M{|6HQ0#J_((E?o-^btjYW{|jyn*gb>MOgyKjIRX19`OIL z0R`qVD98)LSDMHW3p@ZJqGX8zHsHTV0swjGB*`PMP+9{_ zk<#KPWPPfZvvg7z8DhX;_AjA)Ld(ox>2Thz-I@!Awp{EKLpmaAYbs=R90#dDp(+lUD(4)Sb$1~ zRMeXoY+e0i^`S2z#$au%7%r!xGoFs6jwUgnF!P8IH6(OL${Y#k5b{b3FvMpS#lsV{ zfDk-r@gVb50sxqhR8__SsQ|#HNjyN!w*o+^7AUH@5MoJ^r1he9Fi8mm0f7H!0>GsR zVJu5exlx2PpsqmGixN`Ko=B} zU7J4Ov}v>C29T#k!mYJyg_R`XumFR;lEkHIfB_G3NCi9?bfn=d$rn<)lG6&7n0T<` zW>LtHYwGEO{Um6EvBtQkMd^iD)h!Nvy9PZHSD^$1>o4(WN(Y_@zssgMM&658FfuxM2|F95X z0G##&K&IvfED+ezT<=-oYERXegGSx85yD<;Km(LgQ!{Ust6n7u9>vNYGy1Fy2*&6{ z6SrE5TELQsC8bNdz*tct4I|v$C}IW7KmaJ9sJ=DrGrOSYM4P0j>~-Zz-ilE?6hH(z zhHDsI;@=!Buze_XhsHiHcG0)D+kqYR;8d~9?6v6`w_^MS3=;1GD zkCrS%mj-C;N*J)yr7T!&cCqkmh6KXT<1{gf_b@;xyBJ`wIV@u>Ie}-p)X*G;i5BEp z5-nK3XVTGt1vr^xgm|)l1K@!W>M2HX0$H+QH1rxQ&`OQ4t5yHB)z`2?yltzkX%Jsn z1Fl)ZQyzS_&BbMCLe)?Jt%x*F8EhU_n0*x`Oxv<139xtz?P5$?=FcZCNqk=nfB{f* z&Io=k3%XEIm+-&>*mRZ!3@ADqpB2w6mH2h>fYJ3m510CMb^K=gGvjC#xJS`nAr?{e z;<7};4Mpx)%*X^h3=;-=v$AQ}vjEW1lkf=-5P=6+A;A;892x`=Ln*a-v%Vsn=jy0a zv7@&FF#+PMyJLt4=qn&VAm6dT>8dC*TD>5fKh!f4m5?cucr1x=oTWpV(fg-c8NHD) zxPdW>LbxkG%Av0Lf-mqN;|W3`)B+HUqn8m1HvtkTYN}mHi7}HL1v(UqumFs^3mb4L zzyJv*VnQ)OJR^iUrb`|Xn?cXJlS1ew{qdvpY5@Nan7UvYBqc&PgVQC&VHS#rrjrP> z!Wk7~$qn~WjxLC+?=Y2taJiMjnG|w}=fT6;qMV90$%(khC!vX(xlyaCim(@iBYz^eG339NiIKb!mb4(8CfLUZ=tp(Bw;lpRYta_X z3l%L>3yj|AQ(!7d5moy5;VJwfwLBY@QMA#}F8!Se+SU>v_ zy-9SELhu#&*~qA=LH%f?@5mU1OcZB%9@6m(Eo7kmn3kB_#=^12D2gCe6rUhsL|dbs zi1|W!WXrY8nzxij9#TpM;h=MrB3UvNK_bgqj7d}>OD8NJ@i7arv=FUyqRb&jSaB1O z3dpELMYj~irK}n`X(6YgHT9Frj-Pgw)xi&Je50cb6^&GiRY5obF{^!vO^v|~(P4}yETrqSN4u1t z-}Fpk+)N1@l#e9HDw>{GM8dBSi==ysLn27x5Kfpp83nb<|Lh(IYPHV1l_%^)fjk>+ zs!nmB3#XJ<2^pWyB309#@3yqRsl(3$hOUyLK$fULItW5eO z4g|HwW`ahQ?7v#uPGO^&fb+xed^)HJ(95{bOQAuiqeI@HP3m+K$yCr_$;b}X5nL>c zDxJ=tyvcVVkS~SGEu~T@ZN&dKsUMY5ku^#i*xH-ic~KLRkj^>NI4K#Syq6d1mo?(k zvaE|awJNKzly|gF+0?@#sn9e{joj8d}L&d|%wLG8`=gG{>&(z49XHEq@!nv46+)*%;L{IHa z_!-uBWL07n)mwebozztuc~@Wcl!jT=ClpsD4OMMLQ?iNH+7VS!<<=41)p|YF+Ip1H zFxOf2)@jAhJsp;N6;=OqrBmg)P<hS68k!@A<<#12QeAS@@_;aU zMcC+U#P0N)lnmIq5!4X{5TtC_EdmuDiLL#lwa{DR;RsL;mFN(WmXt1Q<#hulAS?7x>(mh5wOM9wdjqk<&ei5lI=v3 zxFuMEi&!vq*}8?-+pO4nQPalASSJZuoE(^eMcTIA6sH{0;4oXQU0aIjR(HJ03FX+6 zl~ih_*qmI>#@$+{)ukj9QlrR9|BJ$- zF&VM~7y2kG1)|0{5K8r27y9+kfKp9zbj?&`M9gEK%c)+#T%_9oP>R%xVqr{|Bt&}M z3>Y3FcI@7l9mX6@6l97WzFeR)K^yVpI5WD8{b;SGSWF{g9n|UJCFYLI{ge@pr$!b^7)XNjGkrs%7a`S3+23}0S^B|3)|QTxSy{ zU;}+5t&}6i$Yf3q!04&ptyJMD+A`pr7Up8ebwtUs(#RQBI?F+>)*|6_w92>qMU?<# zpV6(I;T@cz;J~0~T-9E-QO!eaQKAGP7y=t@WZa{PKh{v>^Z7>SY0t#;VDLeWYo^)0 zh~xh>&Xa%(Py8TD{>7*C73Q2&4G><@VSJMbXLMC0_0P4KF#*lSHPhoJw%% z;{K%%GwKLt)?G~L=LQJH>dL=6I;gcl2~|Fi3u+UfY+TZoTnEQjN*wXVp%JZt}n z0Dv+>6(E5qqkd(EcqK(vi4EWZPGSP)zN@?P8Pi7X=&s1X=3A`5HFzdBgsKYSVgh<` z5%~P$M?)>+{+(x5sc>Xk3e9-hN7E^vL~VFU<1xW+PjL%S~bC9yY2X5 zn*xBiqsOsP&1yc7?E0=}A?`9#&XsVpp&f5di|@yZJS@<$qUt98nNcSJf}DBmtROBn zlSgA#l~c?!?Z&?h+iv1~P?1C{s!Uduy-;DIveJ%pfw0dgr3KdXbluacm#4PIYAr;5G711!@wOy{3M z(1ISsC5D=%eFLj22qhTc9}9qr49`S>)V8SS zs{reQr!NUZOMz@t3Uczdp&j#NV>!xVzN5G@a#R2yL!iQXGKQcuhS-2(J2ccPY>Qa< zl9+dDL%a%m0RHOx!U9;SP>4^*iZ;K$W*;UCT-H`joG@@cNXzrgrKu!D1_CJn#?{Yj_PQYvw>qgKMNK`<>pnymkWSg~EKG_X3YGO%&S z3!sst8%qL=1;(P9DVk23kvz}aXrYipDhf9$)Z(yV!j1w7D#SRj;K@A-PHHI_s9_w3 zUjSIxIWSJniUaq&99fWs4LmbFe8kd);lP;+DFPrWQKD1;c+@CNK@vd1fmFWQD7bPW zMVShNHk1g`!pVyR!#afPkZ#B6U}}i?VRI2`cH(IPjHVC^2QD|%fhWa>7K;S}AjC-uF6htz%<*)Rc0!$mhlYjV z0bUt)4WMC(>M4YUT`)i;9TrMxR>VnxbODoZQ_=r8kcK;%)`dVW5%k(Xc-XZDesS5c z(s(}Z*b+rqImCmMCvBM~ViN)o;~-|{^}ttx0ss^Mh3LncRhyysD2fz`z{FyGX4XI< zYb@|+bpv(!AxM~+2Rf&yJ6l7oOs2h{10T9TPQ{u!OPMrZT0l0!m=jBikl6o+~57t&l11BnmDFB-yoS2~} zvPEHDH!0XrjBvFk&>Bj_hv-x+j8`LQ16ltx+^-P^>X8L-*`m3ZD4^#6afcQk%6qo(!RuU)M!yL3jO2ID2qDv zXD(%@ka2s61XNH54np$3)>#|awvb}%vSCw90kLN1qU$N)pH{~eS6UR3H)j(IOrD%_ zec|BbgMb84biT!L`Jp*24mSW_=)2fSOgsRRZ%o3y80#4kS;&eclN?4E6p{SJ=|62d ztU~&>M7o!@UyS))LGDDnTcJJ#{LrLh1iPhxZJyU+fkq^$Uk~^sk(B^w-#J^k_xtC( zU}Am}=!xbPJy;e)nzv@WPi1|6iFp>7`Nzd!%|`|m;I9@m$H!EZ;`dx-ER5+uJ+ zEqqbUNY?NW-F41hYe}C$4hSRyIf!n5F=3$2*ChlJFM-y>UI@#E zK7q+^Lf50jcbO9aU2r$wgK&dq)&vwRHi)8up|a%Z+n{) z7UOrr9i8tYz9C*-dPuV9AZmAf5uyls2tL>KON^LtBMNJ1JGs0}K1TyzP@;o5jBo~y zEZkz|YzV@*xMYrgoKP2)FoCJejfgTF7hm2Z!4ruvetI#}6(=^wNCChDJ_8AdSjVt6 ze5OK!;~={RM?Y?EMR*ir8{NoAiFC-%XiHSo$UBz9rydof%xfXqP z5pUI`n_s}_5I$ZrJhGhNEghxN4Pxw>>+Bl8H2D+5?C${&*q{0kD${zVlR#C(+o(Kx zm4#njN$uTK$sL z!4_7uj>_Xw9|g{)^0jPYMc`Iv>%}#;g|8Z=5LGFd(N}UarTU!MQ73v?x%xAxq}pU{ zn`=|${&S=jUFBg}nOx;66p2rJu6A&lQw6S;gNVJRIDMnr2r?6#m<(hi2`bc}3@CFA z6|dvyyIGmmb-Dii>SKonUA1U?b+UfW5hCfIB*4S2pv2o$gu-XlehBn#^66@i8;)=|-=b)t#oUq@Ro2 zR4@0`uf8?H#;oeD*_zcf25b$v+}~b*8O*4rHHu0t>lo_|z{U0~sCi6mQYrh`pS`WF zHI3_Sw;2D_1jH#yb4_McSKGPb?k}v7ZEZd)b=kY#SGKv0>vB_Q+VZ}4yi;m#1W!8F z%x!nF)qPJMFDP$k_V>T#{nmVDa^Ei=c)~fGVppI0#SW+Vy|Gy8i=R>Am<{)5A#~KY zBAnso#`S>H`sk3WJYhDDc(a*}ur>=E=Kn>fx+6_&ae7?A#%4LXKW^we)92tcfkjb*uWK` zWbbM4N1{IO#TS35N!=DAm;dTRlW*3&exf6*7xsN8v!nmN@~KggX&>P^ zn)cNj`tcp-RZ#7z)#g=EupJ!eXGRH5P}vh1&K+m(e(kAp^#R0 z3Dbm02o7!^+d<(FPGE@9URNEEkZIN=Ug1U+Pk7RviLj!U#)m zKvRV|5a=`zAG!>68IvL=RAKSfLx>96k_C(D!QUG$RA?35MP$w*>_ugb zPc%6M5OgE~+RSi35+hz>9DZVBMb*(%qbIVJt2t9~p-wHrmO-W-#Wmbo9iP0J9Bg#K zr|`?#9aVJ%#RQZRmbFzvO+*h-!2k>Z7Br>$5Sl~)Q2~0#Kk?S%bxsNm#Edi=6EtO5 zETu*8Kxin_8a9?`cx6*^#VWQ$#lRtZ8AL8c$7m?P6X3{+NM#@_(kPmVAK}-urJynX z7amgJi)jH|6hRk!MF0(p@Lk@6WCum~PW(tjM`DoS-Ah&+OK(xd)_nis{ozHhHH2wU z#RFVHXXXWSoPZLL#2!3AdR$Ue1xZg51xbC^vk|2_u8~%y$xwU+YDh?D=Ej}%2X9@q~RH6m1n4j$yjXOP4~@rIz39o;BfX)R#bk=WGD7(_4uArJxyd=q1S z*_IheU@XdkRL4&6Ky0j}o;UH7&Nn#&X>u#kWXi~t;r!5BD!6U+e* zV93zrVLA>(jD85XuxA*sC(5)Ba)_uvBm|mlghhzN0IbJOU;%BeP`BI#*_=dw>;Xk+ zL`Nt99@ym*z(7Lm!4k_Ttr9{^Cn2~dqm?5fQSj6=+7LkNKdbb&~Gs?xRK19D!O zzF^m+kT9iy3cx`GjH{cD!MH|%9L#{=bzx>i#~1Kuki!2bdE6>bAjW4{OX9=`Uc`tE zNX1#TUR-d+iV$UgD1-;Vt3xmZhys9DB!mXwK?*3uactjfV9BFs2adu(d}xFQ0V*QN zC~N#{Ow8-nP>pWj!DqZjm4w^_umB#MK&vRkY1RdkoaICygl2H*I#u7OS>Lvb63_;% z(1HXBi~tvuE7B${9famVouip_M8L!-s)z)$`sjk(hZgv#NkFW@rVG%Z#75+46A}cA zHsy-+1V+4v5-7idTqwd#8|2OccKitvC2po&zu z1W7n0YcR@!9EgvA#JqgQywD~O48Y%>1PWlP5_JE{!~(!fXvo^)>a(7P2B3yi3~S{D z6=1SinSKEY%mD1huI$b(?Zz$uWI)p9F48V-CiS1S>7_!%Xp7t+K}-OeC_q!5g_jlu z04TwCa!qDDFG-|@R7`AcP)kF+s)o)jSrkY(dH}r+jE4Ay-%im$ECLX{i5)b}VdQK> zSgckA25v$x0NBAEm~V!3gfEFKPcTW%)bC5YMyCMpFjiuKy3y;xZWAmp11~WDH1Gl+ zXza>??pCmy`oRs3QzkJ2y(1+E9=NoL?do_ejeNP zq8rKXF{ktb3j#A&FapTIg2I)C{ZT_qNsD^OcYX>(NYra|$!p}qi%PP*lE*+Sz!Nmz zTquA7sB=fqj2d*rl4xZ^XlPU~?o-sqZuDeDD9;||W)2nFNibX%lR;bQY-XH zoMu>9zyb^aj|Px-fM!KeD6^6Zhwg_}m(q)Nfk{{w3;w6Y>=FRPu1c$P3AA(tI{^u( z6Hd0PgG5&6Ri#x$7Jdasy|n+I5IRm{ap4eo-bMj-25&JT$KJ)t=g{~fcjBO`#vw&s z>!b~{1kb=>!!hiRfMQp$5QHh1(G((T4sMOqnYkGut{yQdT2(6I{6P@hIhS!SB!i{W zl1-FD+E50-V2I8vvSg4?%$tsP{2z0q(kY()vL-rY?o;7Ex^{^7PQlM0ZMNVsIn( z;$>wA9ohVO5m^E`ckBP24hoMR+LCAsI9qAiDQ-psJykWvVz7MCdiR3$ulYHG3q}x;Rbkw77AcwR)eg{s)KZYv3Z`WySf9GChS1i|FM9yfh~J263@ z9QPT0SAtW}9;E+|PmqiU@PLCf#=mB*Ldc1TJtX6V7miIhk-1ouVFpj}I5sW%VA8i} z^IRcX(6G1q402F>N{WXgQyifBo2?~mU)r`FfwsFb52Qf^l)4n)FNHbCqV$$#nITn} ztS3eXs&s7J?mi8N z${;xJSnrdDnD9u<2+pzzS4?O84g?c`w5J5?2+84gKV01VAFO@hcy|W)wLR z;z^YnNp6gIF;2{wD*?2Oh;d^`iWgg2JV;Td$(#`@(ln}2=+TosKOznK6e`i8Co@(p zv_}6+i7{KbcHQd4%MmCcr#j?Gu%k@?3RnQ3QpQdff?DwKQ29V1763NdYGg5hBSE;eiQLrn)CquHFR?UltYs11LoB z3jn<;l>q1x==;u_t3wWU_+9Wq02Wx_QA2RCMW*gcJ}ix!s+nRNEjOPugJ>rLRT~Yn z&RnxDHPv3~O(c*a9FRiNW~;40+LB6&rrmZUP=pjV@Sp%Mq8K5K7K2bh10fz@0zUtZ zgt9QL$PU7gFg(VyE1|Y{z+;1rEVyF4wEXLgKLCmZAP@!OB8Y`9DymDozA{t5vx-0@ zuqup@Nv4A1PvQQSLI}2kkM)c}+gKIW*5C^H;)`defHOmt7ADq?Fg$RG3K*#PVqNbN zdZ3V$?-P&8J$@6+X3KDPC=4~8P}w3S5KCxg7WQbM0klXmwWd4u^cQNQ?u>fFs%y2l zRjMnrT5DVT?CC$Qjlx3-czb)Ip*^~bkA*;(W~@kqNSayS9nsyxUV_4avRj@LqBOiW zGh6jbSl7I5v$~sI_NZ6OlsM$b0TxYhs=5~2Pska+yg-B#67D;4SNa04&kGSM@ToOQ zPtuJ_cPLu41RE?e$~31MxDZ**eQ`WF$NYDt5N~sGe|evH&)a=ZimCt0Wb4)LfEfTk+G4l1!On#c6*DANQ^)(KE0kn~Y z9@ddLP3%o|d_jz$9wmvB34)AIF^m%^X{Z!HI#7?{Ll$c=nJ$hTE+v@! zP#yUQlQ51FS&Hl=B9&M~G@6f{n?JgB)zdi$Me}tf9iRXe$XR)(iSkuLr>`_CO>V>@iEe z5;Clm|AQbYvv;4s0x?E(xzXNI5wS|05ZGWGdd_(4iTf7 zEHYlUX-+yJQ<_P=;Y8zeYoc~zsR@nOIwt>@lGxMMYBw7|7Gg0^Q%MaIPwV0GfiqAj z)(w}bX%cPywxhg(B1>g` zSY|GAh4WNr5+^L(Ot!bxyq5SZ<9FT7a>lx&4pYMs$jMEMyV0c`s6-)CQ;t`VvkB`p z!=kundJ~w}F%8VlCH%F0sk_$vg?AH-=NzdL`uSbptOf*BxY+lMy5k2D(DtOf7AZ{AB>q zA3%CgLXZVb!_S75TfiXz+OPbC{Cv&un#R*K>I?Y@FR`*=Vv8lpSYzzzVw0tsRfa>i!TL@@y11iFLv0HAb| z4?en}^42DA9K$2JqbTfxY4(5yi zF}kok_{ zSYlT|>%YkBb5>C$B=FoEr;t?5iuMlx!TrN;t_wb3m$+6EWkZP z1q3KMRJ@R86o3bck@g1T z_=;;969NyY03hK7FS@`3Oa)$;@uF%4yN&|;HU^bc#oP+6oF46c24Em)?;;)m1`=Xk zm=X3gY)C560;q)`)_@{M1NAb4K3Je_XhI3T!)(H%UEuE|=CS+|!U7;KF&x4%)}#Pt zMhU{B2oz!hSO6iK1OoS>J4EG6P!9kaaw9y^B=&|02tqG<;sJ;REi8Z^pHC?jVhX#U zTPm>>b1(8#q5=O30cje53H}lQNMIBLlOP6j6xRST0pKjbV@HtgB*MdH_=6ko(qY`P zKLX)C#$_pz&t<3p54vChkcT7`Knw1JZt#F929vlYSr>mA|2EY^T01K#K zXwb47SwIQ!B~m~mI^FS9Xpc43-~=4wI~{^46GATiRw73e;y}ZbuUe{* z>d*uuR6*9W4xtQIrbPcP)I4AiQ~qoZK=D*^&>(;j0F={z+K_JQOGj`}BQ(G$K%n zUp$~CH3C&!G9gkBf)+`dBF_WJQ5s>-Qcofw$H%k8DE1arZ~mrCHv%yVLr;~fB8DUo zyeB-cL_FZ-J6K>$)<#qoQ+CY7AxslliX@aUB)y7k<8g{ z;z0ii;zIeON}!cyHo!7Bf=Wq(JCxFYaz+(L6$|uR0`mWu z@?<-Io%OT2u>MS4uFCq+r5e;KwD`iIugfl$$HY}r0Z2N;BD}quIPbEBn z?0j?p(3A$55DQ8W0LmadszWR-?H5B&Ky)^0D}@TCVkWeJa?NucGAjiQwx%FiZeTDOmClg0<#XrWlC;&BIiu z$0k*EvpA4vOQc9Id#p&r?o6(X4&6v1LVZ{4JYaa|_-k`Dv-HG}YsX9T1&RJl+-@UK zTCj>atf5p4>;^}7>*>Hs%Z6%c!(_ti#1Dl?il~-%nkJ}~;ndYO__P0(X*6b-TqBTp zNtupWQIuBhdlU#*NEVx9cgd!?ed;UhitxrH?znzRkNQh^qyoS6Wh(s4wWjxi@~L&6 z=E^=SLIz5n*=XtR#AnsGmdOX)6k-j?r4BhJP4~B6^*4u=i9rAeM40n)dRQ_{1uyg| zbyHUgxmmqvnaRf4p}RzcH5g9Nq)pr_+s=p1l1$8Yt(jh}e0_O)A?bT#>yX7SG$4+J z6_Y8u_B}52hU*w?`Bx*P7-E&|sC*j>mmY;|>PoihNLiJs}K$UK-E z5tkN2g&+$eAw1wU$yA@dwUztN-%xn91Db;mjPy_g4;?9qvjvXA2~|1LgU70`V6BAz z%Jr)6t%G!@_v8!yvJTq?I-)s->pMK`cZM6K5K8;T_6wxDP$>9=F6?w8b}_kGQ);^U z>ChKfBw0{44U#!Sg-#0auBpn%7?Kmj2bXtE`VCnk{6hcOS8%;-knu7k76CelaDMOC zJKmTOB=EcKd0qq9kldsO7GKfcf*?7&akm!`~!mU7PE@RF1)NK&&3^dN{| ztvkJqf)EISM2RqTzF-XuWsii9+{)-Angq<(wly6h5P0)W?Krh8`Cxy3= z>IaLiyjw9WwY)H=g+@L}kJyQ@zT9h}1oe<|Ee{Yi5&NJ-TzH&Uede5ze#kX4!^o3Y zl9UWg$Wy{{tW%$tRlY!4rlmt8T(D;2Vf~eljIs-hV*h{?xB)DINU{izi-j%Qp^0d{ zkvy&9$;lgiRY0f3H4vJ}32N%pu|?uxo104HLZbgg0u8~5lOgR}U5e6~e8_RSk1k7m zaoxL!OSI^myR!Y!Dd!11vDhoc0X2fDA+ig!fCtdkw1IHcRv4YNy_xTBr69-8vAs_W zr&$I%qUaqJRc$^ZbUa960PLVUIwMxwf;ao3wx@mCejBS83S=fK;(v!$JY$4yO4u*H zgD_2U3Xpsu>5wSaNjoT+q)HnXk*L>6^N7OrhvbQ~#Yb~9u-66f*b;9!G zS{t>Vo2&|g1?GLycbDf|e(ro;!k)b8ZN9z9`R4UqcOIcV|Yd6H=!JW`xaqGK1yE>k`$FsZg9p3-#u6)U)NE}R3gbS*S1!=Z-=|eiu_I>69 zzs|)|sz%9JI&v~<$Of%myH<$ocMR9mDea;?Wadnkl}O`+UDm5@n*`t~MToMbTEM4M9-}V+i>l4a?)qaZrzU{T1+9KI^ z+{fz>sGGM5*T|SnXjKv#r6#?SrRg@U zRIq5n4gzrZtlX1v1;!ot7cfwSeub8;IhXF?x(;IoUKrSG+{cRv+bl|%apSLG$Lc*e z=~X__>so{uvb$~RY^@RvTlsO{x1*sR-o6<--0{=v+pkVPK6~_< zA%~_Y8cNUQ*B?y$L8q8ozAgWDomJ`yh#zF>3Fn}El2ONOi7QSCWp-Ca$&yGzQpV(V$1zvnmt49hpOgyarjU~nQu$<3<@M$x zlvyE#T9;3z<|KSCa>e9`5$;%4Za7(qXPb_0$(Ner$*CcURZhuNrW>9HVuye|M(LVn zf%zzcZKAm;R`wMN+*z!qDd~g^in?l|rlx8qttQ^6rJt5^DrKy~4ofUd0$CJbu($Du z<(ye27-_EU6>Dp$Z$1AQC9`FPT3JjX_L}FC<7wL@U=e9bZGPgWt0%d(q1!8cHxS1P zN2nmhBJU^RyctUTDw6dMwP1%Kp~~fD?20*+L^i|d&_^E;SCO%vs#Nbs=3>OLM4}M~ zvtO+$hqrUoLnlq^jIUn(@V+F|DJf3jkwj5iNykaEuB5gmBGd(0WUaYQZ}j2N zO&5okL>#dlHPNPhw{wcj{r$4jdQX(M&{97H=*i%ock#%|{>oLrj~f+RP=ysDp^nyh zE26MG7tHjupW6S-H*xgkEo9wb8*TtsK&ZdBUw8E#cufigSdr00Q}lA|;WcD>)>x-Z zA>|B5dtV`#f5G3+!LN;1V(!+)y5sovy>{;|A3L~dCH+^rZKWsMFx|WBoI7(p%Z*X) zb%Wn_){$!oTXNw;P9L}P3qNZLEiK|f|Nj3EK>vjSfB?W8jgX?POo@hV1`8JZ*b}z( z0jPm&bDP3u)3gR&e}3`W96y^di^YcsPGZ#369RmqAc21E(+G%ye~5JZMELah_oY5HDTgO1 zL<1Hh1ps`Zf(sM>A{l$vBZs8~ji8|*Q=(Y2k~}~G!*SvV5dyxuc%Tb}YY}A%qs2KT zu8j0!(|bte9N;+wb85ID037+qmJ|Y2gS1v5x}XIS!jU0fsY)UCfP#fYL?j+i03Ex) z$AU1EInmSnz}v#^IFLySilRu6SBA|6E-!%lScytf2s#=B2+TCd00sa63)Ez?701bKTHhz0gZOG4hSONC<9n^eZNJVkUdvwUPD9YVv4 z4wO9Sx}(zn8eoA1I1OkhQb-F~$-J!A?R$Rq%G0DzcIzyc*v10I;*5fQ*FFk@2V#8RV3kfn$>j5B1Df;fdK^yCYL zAVfhRsZo?{fOtOn8sZxA1w4du0|gz(ON+XPG5}N!b{Gg2oD$P!ev%+ssB1rQf(M=! zM4bTWD+~HL5MK%evUl}srxb!vq84Ni2n|~xU>ew=;vuva$YcevR-de@GJ)QGyo4&Na_`kIuN1KZ~_mIAr`_=T)=#mrvL@O zXNd~`&=!UP4_#=kP?`B%enOWZF|{dj`$6-Hpn!sgbEc7nr=LWv}K!Yg(viLlReG3v=g78=g@5RYa zRk)=hx;B{--66z87nPLhu7C($=}8_OLxVK(wlbnfNr20eFBbO!crYqR2Z91T+*Je< z7|3S}G63W(4{mHQyw$GXVDaX2;7c2}0wTMjRm%C&*f*4a3k16$XK&0v<5O>VQ@@)khtzN!F@PJOujxEyE_u zk@KlMo-&poJaFZm#pn=wzK`488IpF2gwn0`w@cUgOkhSX$p!>Gnj^t&gnMFWhj7=j z2dQ=44{`zvn3B-=)Vl)k(B^@J^DY1dC~;&MWP{k%h-iYHDkF<*O~po5IfaUJqmfm3=Qv-wA*XL|4RXL%|aU zP~zMhMC$=61LoKl2HcOg5M_92Ln2JWjFk2yJlH)_P=GwQnmEFRMj+0L334jIi$J1p zl=~(&5)zOAB`3!m6&guHi4_0UlPO9j9>*g8DicHyefBYT zlX2P?Wy?lF(?Ll~mN3R-X%nR;4?qH9H4Qxtiz z6Y;aQ8vb_>ptbie^KC z=%|n?$sP~oiwiS;Y1Dd6wt2cCGv%Qu1H*+Ils%+am1!aWi>}iY-KdQnqdvHCG*NUr z6lsAA^^M86gAM6b=a_%7a+bOnB{}jR6@hWM_$R8fe^a?bI>LbONMUiOl}faY96ylEE=T5~4&)XoK4bJr2l<0KzdlLPS1kf-mTd;#8Ub!8<&; zm>uUJC3znxDS_BVEwI;htvQzxnHE<`igF`L-DHDa^>S#0ENn?QjfgHn)0=2nEUx*D zUZ+e9$uQsOk_XZ`kvJMo=$w>hflP88B-xg=q86>?hj3^`F7Z6$DW28VBd%hPlBgzc znTB3c9XW!IviY8cq+e=jp7fcP-HDy4=^I?Rm(;ocpGQ%iZ!w)nLW?X3lxz5sE0l5A zIeMPAmJEtV5D0$ba*$>?lnLW411g~B*`JEDUjPQ45+M%jj|WuI`VgA!U7+bN1+q9uOiE1GNufSSppl8AH%f?0 zd6Zleq2?K))Z(6=^P|l!Z4gDPbvVS$Lo!di5y zCM?>gwGl+QQjb@{5ssRtUb-c~2&T^CnA+z5nq1eEX7h#+gP7qnBRHa|M9QavT7`i+ zg6^rNSCWYdnl2~0rNug|eELUO;xbOgt0{4yBh#v%B1FxKrtx8`)haC=3ZXB$pst!G z%vh%+g(k<5tZ;g!ak`;>+NvpIr9xRFpkk`#Diy-mP}CZ)7HW{td62C)nP5?%-HNWE z!=KO^tjwb1UEr) zoZJ~DwkR9R8YfXWvZx6}!iYy;HdSA$uw}`ER+EE4+AzwPsO^M>;US>&6f#qoc|x3}s30(#Gh+!PO~izpv9#wpfvMWD23e}?l8H&OO1nC#0J}A+ ztQn(9ylI`cQ8t>gAlrkAzl4LwohB+GdEm78@xZ)TjJG*z=JfC5=<$}G3 zTb;NGs@)T_t4mUA(zhehFQn`Lj%1rglu>1v6M|@~WW*Z25>++~i${f{LXqly4P0ri zm=wmLzqE_Dv}ignx3WFzuDjU38uPWhOLA6*L!RQ19+|x1HlFJ?t+134$kIj`OqEVs zofFZxg2TeEcekuVJsv}qB5S7h^1IQBFVkan5zs@q9lwQT0q5_RK-_ZR8^eC zRO|r*2aJaKTlw8whM!nL@)PgFZ3(?Cls zmGgtQHmsX;!%kzg7H>5FoL^HCFZDHlL#4zUwhk%4=<2LS_!4Bb7^4=gpBO|OnI}Yp zI?}olWF$*3Shz?$$F`ZbzXicX*~TfqWECVa(gH1CE_Hozs!{Dtc?5JtrUzNByn;Q@G{S+a|?SSLyf z(*-SN2M86-5!Ezm0MGgqc>9J=Bo|^&U=Vf?2%l9Fs00A)6uWi4qE_Vob4myUvi6vZW)I4aHP1^~Nv5MKaU5Q!A*G;4`hh0=Bu z)%d(ivOqyyOV$Pd1CBRs#5NEqpm!T!Zu!J$1!Mto7C6s4x^n@>*5b`TX`_+hydf=9 zLU3hS=3qlzGb;k1R!4A-hdg(IJYiFW%t40(T@W6CJkD1TYF1~jG`@5AIw@^e_%{gs zLvU&U14zX-z16fPwrVmsz6Im~VE}P@^k@k2S$EM-c=lcOrhfpi04_BUN+23kU;*@n zv|a|EVyw8E(!|7@mtwdP){GE!*mS685|FgpfT0DNb!2^|Pd^n1_J!A~XI;3KTr_4_ zEYJm4kW})7RD0b^NTqNScSBu3+YMwu3%v%@7YAPu(FHMthkayg6CjW$9zRuGOhph( zRT2QDuzq3xq`3{E-^;aG>Lb8S*9w+;Gab|FJlIy-tm{?;O}A49u}@zBPzEPtNtIkh zfH}<;Ac(zT@We~KMO`v=P?$4yM};{74O8un6aA#rQY``xpxu#05K5o~=4KJ@XGD!< z5VPHAK2C9XAVXi!Vjl2Vhb=>y=CEjrvA=7qUfS6-VZf)ycB)5Bj`VhxYOIllVDO{` z77$ZZ0MA{(+y_T-0EGoLM{p&9Z%Qz3R^4L;QG4MPSW{iAm-I=<8(wvLPbV}U)O}3?F zbFBub?7-)I>>_RfZI(&)H2?^hPiw&HXLb;$mk}~#J_@Y8 zJ?psFQXdNTiI{E>DIja4h7u3<$o8RuFQox6P+WG`Pju8zUGV4y@kKBN1H9K^91&Bt zIRJrR5pbH|@A10ikuR9vBDV+ApD zg62zxhZL93@%D8WT5ttKg)T7QOMf@^Y%YrtJrf9pIkIk021WMZ!F! z0AT?D3jhTRQZYD4#2|wJL^OmLQR2ji4=G;6h%rt^jt)D1Y-kbU#)5GgtZ7iFW1%$w z)ToiD0e}XX0Hna2NYbE4kR2b)g!ska8V@^vHarT^gV2JDsCZa}r=UocDj$jvS+rnP z01Ou`>dMgIhJs8BYN1FnPMk17(H88|1tJdsUw~*)skN(Kq!!3JRCrXUhqe*u^Ag1RARaz=CBJniq#B2r)A>?7g?a&+@RWya=Rv%D1x{{kvy^g*9MRMyYlSi zJ1&QeQ6&0|<0q<+X&$g2KGl&HXV8Ti| z^2k%@G2PTcZ;<^mvktTXZo7osOK(N77*viVAPg+*Blgq-FUJYP z)A1q@aSREfLLPV^CHYDNppY*}@PG*r(R;GJ^M+rfQC96}NzqOsgpeKqtte1N-CPl(3S-UPTSL@(P!6+r2{ zbWBZLcVhK1FDtXNqBzAAbh|b=z39^*30-!oO`Cld+S`CkAyWW6*nr3;ny5?JXS>^s zN(5u&GtC9-RFF;>Q%$I#60ZYrw=}&&^~y2#j4?kDuj)`ue@*@Gx>(&k)mdj*{czXJ zqHPq>@gSX6L56i)&!Mlt0-&WIcdYHviQOG|-Vfh>*P=G1Tkcq1=f%<@%;rTkA~_j~ zdBufjb`v={%`H#Y5@)S2LEhq=j#!Bqf>>fj!DH0hXqVH>+oS{hxZ$2m-R|a7edgCO znJ3$bW}dIwy5QW36Fb;bMIO`DSl`X9SAXMfIi#C?CDu6qHnkR*&!H!xGgKcDb=u&n z2d}y^h^LmwX~qpd+`l?ajDxomNBvjjfj8op^2yoUyX~t8<`QPnYle4pf<-nx<*i3% z-LbV{&JtI8QwLDX!dr}dGxym0?s3Qy_b^A2rWW43`@)-Vjk0LHJPX#|HLJcmEgvi<* zPjq4^<1t5io7r6h-S-f6ed>iH^i<_g7qztbuU-cK1Y)1Ewy+3t4|pJn;gk@yDe+0E zeo17QnIdMwkYF!d8T23im^hMk;ZTc9gy0e>g1P@?kBQ(SBE=@=!7twMi-`*06@$mc zI6UxojpGcY!WTjRgsEy-%Uv4b*vIBjPkqT^U=Wjo#u%>fjpy+e^s2n(A z`bNPMTCI17dK8MpD8cewB#?vTBhvN+#X}yhfXE7B97k!v-tjDiw^UTcuGq?+umH_Lk^``CBHVp>v=xSY}eLFh#D?XR1UyrdTwCP~Y% za+g<|A^|%V%XG5SnLVT=I=hHRX?BvE%*Q`*bN*HR{P0arD|yLnapZlm6U10EGldN`LTz1EUh>qZDx7tKLH_eurWn#ie`&A_%Vw` zg|lo$)w$Sp(zTVx1CnB)Xi?^llc`G`p7e5S)8-P^W!OPgy zvi5MnbnCLFnpe8I7Fc?NFKD?aIQp7)ud>}OfJ+(MsMe2h$E|8fu`A29ayNwDVQ+TU zGCPWrOR`u!rfaFzHPJRUpaO&pIo(y9^Ck|OKDrtGTsc2OhBZfeoa9?)7)HjD4F}@chfcz@170ceNqs7FECuOB?lCoY92_2*K;Mjl9nG-zF!d zL~|1|ZeErsHh%ZDhb^#{Ty^IEcAYJ#0kvg-LyBH3hm)nMl_JZMyyw#PSeOxJ9xs8UzKSNUJFe}NYBUseBr3c??dH)=?ctqmyxz|F3t=sK75$o%MBxqaQx-y+ zD7PA|?p!*lY9*FA!xpTus@H=2u{EJw?M|~@x5P;92(2S^q)3sfc)b?tMrr#IxY1a% zX7yFg9!sA6q$rp}?vz$ggTAGo8SWGIh z3~hef?lWpX7^f6_o6z4vQE@xgK@~x4Z$#TPkgb<$yE+wTFNXM$>AY`d1#T;$Q4DE> zBplUg>7w!-uD;#nOM8v~n0^ukcD|B}V~tSo+EtvLqXA9Jvg!AG8`E$Uk;^~~+N~_6 z$+&P@0sy)gitUXIh!Z+s4MGL;BNoe@JXhCy^sDkR| z=c2`0gE*#P=`1u}oc_BbOF<}JbD|hlmOAaKtxVOEB$ReHBs3{$K$0lJcfJ>-^#<|4 zuKjCf+%Sa+0ANDyrKCCd6~;~iPp*;a7?E|K4p)lF9iP6?g~K=+fi#9p?I>;#m3$=y zvQwAkpr&fWrT$L;5NV50P*oJ+n10`_Dwl%5edf?7oPn9#$h(#F9ss1IAZs|fOmLF- zf*jrGc^^98`iKFrfIxtVJ+O+eQ5wUG4gatkqzROV(1Oxy8}&j6r1Owf8klwi7c|j3 zQ9+xtiI*`l8Hsoc6)OWgFu;P)i+;1PE#nCa1b{tgmo5+pfjEslumB~9h%gucf-s1m z(2IYey<~H-E(4UckPo4Fw~jc$h^Ptv>%WNL0S()TF9?I%D~rEtvwUlpLeK)1Qy7vM z1gRjH_P~rMQ~(^rkjAS&Jh;GvDTv}Lhz&490H8w~JEcH+j56TCh`<5>*Z{H+k)pdd z!=V+CPyhx0V8Ma_LW95p3-}2tfB~gr0j9tMjIlugfB}g34eXN+?(3${Td<3xkGhb- zx|olxppBS-h}XM6gYhHrF_YY}0571wBzZ!-001h$i8Y8r`FMbbNQ%0kAC)Kry+8pA z7=WL62%(6>f+$9Vu#P(HMBIakLrjn_umFSrL(`Z*@;Hh$V8hrbgP(YdpUAi`aTdk+ z3#Vg8r+We%gor>~m@p`Vh9Hd7;Kkmc$D^Qv6@)~IfR2W+M?stjdd!@T10c;yGaIqH zkC4B@h(gzUMT3Av6uTl3qsM}n032k2p|b!TEC_+bgA*7C01ybFctVIc3bbH>JupKm za1a3hU_^+Z$4Hb(BC-IzXbZ2~hz2mjFt|gNYzRCch@FHBqy&Hy=*WVgM3f3I!-i>#zgs;J>G|M}$C& zKtzZVG`Shv3L)A?gCNS&z(Kw^NJ=a^$!rL+C{XM6;5W+!4x#*aJrF4zM`EK-f)3G(i~9ikl1(gSbA`!vhomfVI59Pdv;|jJA!j zfbLW|5^DgfEKP&JLI3;$Jdi@7!viRQi`xW%A`nghz=L+XK>9pR3os3fI7pE=$bodk zA^-rQJVESuPlK381${(2K*stshyzWJ0kDAwlpO-vrV=Bty9-a8$Vk`QOEuU4>Z~fx zu^qu|2s`M=48>5TQ-T!*fKR+o$c%{N?1BruL$;L7-;fo+sV#vMOoT|#y5Lggj0iO- zN(wzr8z6`q9nlHJ6S0WX^4o(8WdWWz$WhsYR8xcC(1x&3sB8!x7&=y!QK&2g6^YXa zAWo^cy%EKOmTZWdBo04}h_{Ff>f?bY{E?1ih#%ZCv9c{2A+U!!l+>G4ioiGk(9es^ zy9e#o^vbpUGR*^w2stuT!S-3Dkq^TuGGEWfp#TM*E766cs+yka90t+|+y4XOMFgYAVw(~Nq*0mlIAiazrjiQ*>h|o`ag$TTm zy(A(y_!>m>%MHa?h$9(XVNN#^4hrC6=qhhAMaZbN_hF2g>=hZQ4?Us9@_C{>It}8wyYY86`4ai-)os#5fgD+$kXX%l|yHCq@is# zMy{z9g+b#n*@}N|7%E=((&8>FQ!DR$^qC zapH9qYEN36y!tG&nx-)B46f<5b@i}cc_QeduOv<*j(W4kf$KU;s2GZ`St3O_J3K3L z>53SiQ`%{oR&0Sgu%x*oe4?&w0<_-hRQKy>0er|9gRq}6J3fN37b_XNvXDsgtg}Wk zeK|D2d+FZ#Xo=?Qgd(?*PC4e8tfOMQsGcE3d+86OHpp^RBSb5p#%a)PEa^g_-|A|r z>Ae2Z>vCF_nMs~sL2OxiV;Kt}^4zqvgOtGk4m^)qwx<^Eo}L!FMrtQ>CR|%TWm@Uk zzHNz4S4!gU(X#HLR;TnX=zc0|c-n4W+G~7O@5uV=pcd@M@g4=v?C^MOxP&58(`*6D z>&=Es%$~R|i|>eYCxUkFkjvLOQtfVw?k7W{*rMwI;^0S`>ZB#^FDh)pL8%D`YLL$G z6pv`+=Iv7a?%-bU4tF~ke>M7c@c`TFYep}~s_?cmG^SDFw>st;cWuDNq6(MsiEeF0 zg6+Io=3m?G3lFIN1~%&)oB(&~waYvnPi`*PDIHGkzOwMua>yV1lsCsTrUvn3n<}tI zG9fQ9JojliGi@(gznnWT3CD6WZy+fDFRcHb^A~q+L#Mnp|8dz~u&-ir@BZm$F>}*G zbHT238CPZ{hd4)~sE^6(9M5d5JCH^4xm4ftED!UDS#Y!#sN{z4O&9e)YmW|;bsXPF z8t;xw=c`9InA(PQ_O2!(XLTp4>?JC;sk$f^}%+z zY9uDlpriryP}g#DXDld3^<&RK!trriSM)@SaShMcW>+qePQs+7@ph+hb)R-xZ*6T? zsAMwkuKw&;*K~6O_8qr(VxRAW8u2As@ogucZU-)@X?4~+%~vQ_Eu8! z#nQ2dgPfn|A)WedFvo4v5caRq zN@SSIu$X_Ehj+5h?sxtADz7shp|2^t&ab$Sc3N8XVJCDLY9nZ8`jxk}$R?^`R{8@+ z!mL;Le0Qs~Z*cS8^s9>}-*$GDGW?FB`BtBkT^X-He|m6w7|MI3hh{au|20Feug2&5 zl>4sobM#?8ZT_uyDuL>yN^Ia(s-IuA)BpO)lcBR`D#z}6jnqz+h_@LjyHrcR_WGbS zVQbtOW?N+`B^#Bp!64KBAz{opn-)VmogwFP7B|3fOP8m4()VwfPV!O@`HZ)l@kqck zTeqq&Z!p!sdZwMQD8FfDj=90DwV*1{M@?@E}2f3l$EGIFKR1iVXoMq)3qB z!-ow!LR`2op+Ji!PohkD(IP^XE?>foDU)EFnJiJ(jM>uWO`a8Zp4=&7$`qAf3IQl| z^eEB>P8%Rix>TuBf`tkV@;Ni3!<8l@g3MYltHG`n#s0)7Qlvz*YJ+yfxKTu$oINYn ztqJyKS+*1D<~$fu;aIbB^?rqESF6j#I$gE|3cxW+$&wjgwwc)^Wvv25e0f=QO+_^* z0H{$zP<3el8Z@~7eoasd6R&n#cE(sz?%cu_>;6u-FfVc4WjUu5d|dZQkCOvKBs{!k z)XrpoUM6mq@Xx*wOB$Udp-Ec61XxgtO~#vt>-nc%h(7hF(qm77yyK(AWbGt7~M!Z0>Hy+9VSPTcWw;; zrI04sN19z8Oh5%vE%7kHNj07|5OEe->D7qhsVL%_$dUM_oFBgA(2Fw_VxL9$vDO-F zsiF2#i{mN(2S67rO$5Ld1`^}}6_-tbM+(V(N00&);Q@dh6=6|eU*+{+nrk9d;+$#W^%$cSCT7!s!n(;Avq8Do z08fQ5*_vxJrS@gB=2=8hPKSQfQl>AB+ZeY9=7WrL~n21+-GK3bW7KpwD;9*}Fc#!#AP0zN8Fw{Hx zw>7d^r^sQliscl?RrJ9WnzR7uDKd^D+V}Z&PTdvsf$3SjG!d9OuDm zwm&RnLWuwsOh^?nU!EWn#B8w%6rU=BJ7Z}jUN|d+t1O}g4?KZ*R;h^N+7^`}L~nWj zzmnM?Bxn)^6oOWG_)=ETMkjrVWdjdz+#>MM1^QgUNlqftzIq1)AmB_m@*^F|wAMpG zb#7tyix>i#hZ3hu3o2pw*pvPg6a8pOh~!yJyx6sf7P1Bl(PP2CHfEAS#IiGjtYHvesFy|1mP1nkAYl4ZP_K44#AjBFb0C`k-{^p~ zraVQ`C95OfwNf&+Cjr0%?^7hhVA2IUCp2qV&{hMgVa4&`K~61KgrBuBdl zcU0vwjp31bm%EYNd`8HtmF|boMCc(?A}saA#E%Ni-57UbsyxX^CNf$8`7Rl#X&Nz* zF_BeFoK!B8kQ7sWv&u=kg%Bs%vs3EaS4H-4D6^#WBI%@vs(SYlpN4wW$ad}(I`v4Dca zQll~gfR1eQ+rUmzhUft&WrT4jgb4CBglK^UDf1=JGR7=NE|fV%1mfxcM7piT4(nMD za|}|d2a{@tlb)nsqy>D7s4EOt(- zmXMh1N+hF@w|o*6v}}$t;0;x;sACauRjR8elB`1$+fa)7C%cRU-5*8d0jTt^sx$h+ z8eG|}<`gS?30<&xnu(Uz4#+S`a!zk7(;e=_mrj@QWp~a*848CBC2An4Q9qPjKx!AH z2HF~M9EW17M;M#VH=}z!x^z5UTO!K=n(JVH0xyCWoxILe1o-)Bfqw zunfkwpZQKp!qB4kMVZQCaq^6|YhTZZuEnR58ktl3*x}xnbSDnZ7n)GxD}Es~1@=G~ zB;4QeU@S5eLX2`ete~$#XDkWvO=Z%R(iu#0I8vBGNeMe3!Ao82tfqO% zxVM~SursTPUH&puy~X0(X2LuZb$vz3pF3)Art{>63@AY*if}Ir0_&b&*dqu5Hlje? zXyJTYIN$|KrHyp4X;Zqs4JEA)l?qK|YZoZE5%zqgx=5w}_C?!09MoHYv*d;$+RKmL z^|N(o(0^;XGv}#vrs=2RQg52rD88+{?I*etSy9uQ=52=M-D$EUIiyxY`(N%XM5l);u~#3qn6vw=6SElOTF-Q+ z4=FXgBMVe|NBV?7%(;V?XEAeUdaz-PR7{7c%uh{h&TXD`7u(OfJTJ7{&D?q3dLHXl z=Xksu4|?G~{qmDPXns}S7{}ZlMY%!9iCE9x)T18%k*>$HhNwP&zY@!-;;jxqMz~ZpDNpqSG@8+9ue2W zZ+h;J|K)oZ_WPYG^UFkJ-su;Vs4P!-#M)lVsh_6dmyki+_JLpgo!$a6PtI9i@158H z{z~Q9UgQnl^Hm_UJRbCMAk#fm@AVwAX^sTR!5AA_A= z>MG*QIZg&yKzjYu>YoYaK>V1P)`Y2k+ip@w1K@4cSRbs*zCpA*Jk z6b_P=c%Knbh6_p)g1{Y41cuXW#d4I`-+e^Y*ad9SUW!r8+&xX->7Wc=AJkala5+-n z$zc`t85%Q;XDIbBG%oPMTgcp{Yq79$rLKN!}%1;LH&p>P#R1xtqF` z*ateFl5yb>;un)G9lMQ0{-vFg07kFP8ewqaTQo@7MVqTB2d!xZh?U+h$`~q&U*=t4 zE!x{{wO1j|8mKvxhaE{4uH91IUN&lDnatXvp&fYTX+VB+Co9A2FavK&I;+>!~z5*pzDN(|w)NyuUlV~{0XFXbP}WZ8Gc1+V#8pqYtx zNdz{c2<;g}frQ~oLd^T*MS+-N9mb(2s$goVUo<96gngp|YT+M>3UnY^3z7`WmE(ip zpCP7UrKBCB#RbDDS)+YgGe+On&15NFqg4u|5glDkewiNHp~sD5Sh}7F+z>Lc1lVy6 zbQnn^9$ay_U|MD)dr+li^I*3Uwj7nw%LXux$f*!?nAd9F8SW*;Y zCS-Cb2o`vXwrG$p?#2}^S!O;9E9#AdsU-lA%A!4pAzDPdXoy=_hqqusXl~0=qF?m= zSYFPc4aOY-M%D(V*&iaCD3(Y6XsAk+SQf*uS}PKw=A=%#CCX#n<4y47XPi*nbs=00 z+OM<(T4;y|bir7Ghl7+v7xD&i6i6dR-y{Cf>}|_LBH%WHAZ=#d6$&GxDCUPqCeP%e z+2LhDREKOH7-Wb=egcPeq6BL`QWszr^t~7)meWBL$CeG>c>rYK?VnG=-~rAiZq6Ti z7URw(6WfFVxLj3%?q{Iz<*p0>`k)4>&=UAmMD+~71NhX+n1ren0uQK4XmA)7ppBDg z!2r}%MHB$3PyrP@0a^?|kTro|&Oh6IH3{v3E27t>0biru|LCRE#8f*Zg z@=aFI2bp{UsW5>(mBFD14OtWdYc|0P!3!3w!5U1aR#k)$%mo{o=jib!`?=iutQZp3 zTkIL;gvbR_%+SvK()xWAK@>nSF~^&p#20jdyA%T3fN08mXhjSFH(dlRh2~WiL5&PV z=G0aAfKq_6hFw(40_cmix=XyE;{ap@6?lpk^k|H#lg|{S^+6Pgc&9C@&6&n$N5WtI zCCLqK46`;3T9H-%pr}R{gux=nDx$IurdEXaJb^3e3lBWO4ebX(Xe>8%E;&oo_-eqn98@-22zxP-qu7J#EZiw+DHUq#tp3Mktea%Cr!#?*ez|f z@Fm2qi_@CKklriITC7FT2h(^!5kv*Hh7)gK$c;oRWwb0>g>6C<0V%OYA*}5e#Vr8T z)$6V();=r$%$@`m=&f9kkF~+e0??;iEKWe-Y?-`bPgWD)F&-2itI+zSCNiL!UYgI) zM`#dl^C%AH7Vn0_5%RW1_zb}B#1eeeRaVqh*_Ort2$i?$EZ;UsXbP#f0%sTK>Z!QP zz5;-C?FusCX~tlI0G-6e_ElO6+!qu9fv!zOVCo%BfQ*9CTjZPdN@99K?qQCp7f$bE z;-5>Y1j8z6Y03nn#?`w73>H{vl@@7KFl|y~YpEE@mzIjD@IV**3-}0VhxADysEVfy zXoh12^>%7kJOg)JV1cBmSwnzYP_*r zYygvRW^r0wu!3BLDiR7F?fdChh%{k|mDe3Un`zy~8@k(x>{&D>No~sGQ*jwJs@>Az z^0*;vvQS{c&N2s|F`5$XkaeEHdDVl`Nn28{zV*h>_3YC)ME|MdD=`xVhg1(<7(u$? zF+z($X=PeQgp<&#JMQXKT4k_;qb}2O(N5d3v8TaW=t^AUI})K-v{k|_mBHO*VUQvJ zg?(`PG>xA1h>;+gv~5{FD=*(tK^c6B&m2cnen*r#rWvE@nF1~J-lDVp>N=-gObVR2 zd4yX;hYo?{phZTncn2|C4RVI$U}gk&D%zicN|a>?cNoA(|6=1WLE>MG{0ul&CD1-8d4u9 z9g1RrgSCHvSimxGcdrRX4)}VmXC|H}4aS%sU9f$li(2TXu>POt4D-)QVb?%&de5Ah znzxw=^o82sT_)QiJiGL+Gs)sBS+ipR)c;_B9U)q0a z^9}BI(8{%jaM#clc!^uMS86za>-Xi2;CWMCKSH2=F{W~K@vq``lHXhZ1{-93B_CBw zI0E*nubTIJyCIJc8A*0unJc+hIb+eSIT$7rhDk-T`?V);68%`K4Rl1B&4^w>KxfvWK&uCB9*l z?=$3qcbcm@7`tPk`}dB|_SKMZo0aVM>{(bB?4k-ImIXMt=ZyMdlcHbtjBt@PxAi1jPW&S}J51)s8oN0?LyXfsHvQ?7H*bnQEZ-qwdVcGDyjBN&TpEAjqxn5+ zw(3Kr*QuneJ;qM=7o_G`G$|52iu54KrAwVYRm$|@)S*oWMs;{nrPY@qVG@i=@Znah zOEr4^dJ-zegENQ13>mTHR*P9_+D%LKslu@yQ=S$7o6&Do!9@S|ReY83VxEqZLXJGt z?jXw%Z-#BT(P3rF72#%-Yu6;{q-F(n#;I^?Ou8`T5~bU6XKB|mYqLgK6LW9RsxL-W zi`Hmjw~~74q>!wsD~&9yI-*u#TD8ff5)7hPLJyZgLrNDUiUz>Fdm8?fmz`(`T8z0Lkp z&9WlY;4Z$-!g!FbK?>R6K9L65KuXI7pyIv%uewm83;9 z$<>NV&an~Yj1w|C1H;QGJnQ_4xvSo5Adp1{>B0^h5DjF3LB80-(IVt3go-`NaEJ#M z^2=xtCRjMsh5_z_4YvP|D@fE(!?FvDE+SfF0viSd&`}m9aHtFx_ORmt%ThQ2Q5&>N z!43}~DD(sWDyn! z)q;gCEO>K}J&G8_10|9a!r(y$u*0(dJ?<6ZgoP&!^+~e2c!Hro$`B!pE?8L7;hhPD z*gF=IY>8W!#*#2OKGQv#M4)^E4Ba{<^fOPz0%c)W?gn7BkU+Gstc*RN+_j5;2T=pl zo$T5mkQoP20tL&oD58s6@5_3C_`aAR?k;IdB#d7um`6cAyJCltUUK3CN!LV<%(ue2d8y68goL4d~X z+peA-H^Ml)RrlOG!%Z%DsmSfUThJU@gkIbVVFB_*l&E(Q6m0111W;|v@jo9oP(sRv zYA@S@GSmo!`#ls;1LZ0$*aLSg@FA)FA9d(X! zyr%7GXv$k2(`eYI;^_cXK&!tnIDD6%1$0`)7Vse65;-@l*^O_PK zvCB3MyNY+i zMD4m73J%GgHx8hhsN5L$TB%OlRtW|QD8e43M^6hBp$JM4m;&s#Rhmy?Pq8jF=f+{lnbtzzwMY2xZ69yBvA z55NKzm@uV5@ZfTTu$YhRBB=>XKn*wa1ty>%CMbA98R8?ULWl$f*0b^i>T4Mf zq}Dsxc@hdxz@LUNp$uJfj|P?;XDNa3Gq$18PDP-BGL!)YnO(zD!!UpeEMfr-06?1Q zX+0q{bI4IRYs>#PaR@+C`uE~qTXuKx-3=bwj0(A7F-QvbOeoUq`?ul}h_}>&)*V*+paJdO zrvYlv5FzKrIFyZ$DI;3o(3&e{SunfJ!g7ebM=#q+!IffeB=*QU3}Z4?&6ACn6vN3a zv$;sDQ@pKAYP-`+2bag~q^^>8+~S_*>s@~9MBpKpGCpD+*D0eeWm_9O#mb9LqP3Ex z)LbbOVqUj%7~Hsb+PGlXEw@IAP>xf_=|(XN(M5eGzfa31Y5@zW0t)xts!1*@*$&{< zzNg1y|DA?H3OR5+q!Qzd=;Ft{`{&1MZEts4xj_Mr+lOY6+=h$YP}k(>brV)it$y}K zPae&wM*E6y9Q2;=y(pL0_ZpIZxiY@|(Ft#RIdz2XeQ^lzpx@#<)!g5ZcRWC0zV+jD z1ms^QJ|^p~yc$s8M=>F2SF;nsB5BtOm&H7GPOXe-) z<9;ucT<_@yu=Mng_YBGaog@61@8DJ=H=K@I8c*&hME3N~?wAkBhGP(-FX|A&vru9& zoFe_828T+-D$b7qi$X)bEdZ74lc>(_=43A1tpJa2chbW8wogH(#06{62Fs5pl#e=G z|E35L@Y;$G1i8*Dh66DUkO0Nytdj8c0*~5~;wN;@G)&OnD#GlLZ!T<*E{1UVK!XQG zLuuqA=7>iM(=PN9@cXc>>Toa)udtJV&<$14chI6E^6uL-ZwWUJL%I(r*sl()5M8TsViRdl7QqGMe35sQQNN>Ls8G2+|3VpU zXe#8f17U(1rI8U~>?4Mw3oFeH^>OYlQSnx+94+t;3sO$x(Ri}W_e#U^IBEO>k_Gco z{M1Tn>@WlOun;$?A8An_c_@dZ!#r3q3LWx1GNl^jusO(4_{M^iywMuHZ6Vz%7n|`K z9dhr)&rI?UClzsbcEY%55&`k#CMYr|XHgwZ@)dii?UGU_Wl|7@?hE}e6Z;AEcBnw0 zP$dDTk1yB{2k}KUX{DQ|R2e1>DW+=7A{n+p#2@q-0V;$}8ED=&u+5!g2(HdED zxgwEHo)Hco5xCg$9qWV*Sx+kOl4=lhC6~+c#E}m}aRy;=8<&z9!EP75|Kcy$QX!)f zAj_ihgn}Ofk0?Gd_SU2R-ikHU4=KNm7&p^1pAsLpFC?Lok{s{_=TJ8V(8-=MC8*Cb znc_5glM!uVG4~NCiN+@>b0GV|xQuc)i}NTC&?+(wIFFM$wR18ZFEzJwBB3rZbSEiC zE(d9hCwhVs71KFUkSyB}GuPAgPSY%q@F%6N_JSlnn~q4#^Y?bKGB+f6^|sulx7FSg4r@Xc z&{Rf?#7Ha)JVGl~3~we(h7G4vIKVGgzkj*y;fO3RfYKF~WJQZ|xL>ijSc za8^p+Pt`huN=akhJS)~9g5{F!T%lAAHnu)L(n}{YJ>l>yHv&ae1U-(naBhwJ6hs8D zzykhdFG6HmP=HtpL1p12KQ#qO&}02Jz)8R)1>mJesDufEOeC`8@M7aTx&)@6WXqlu z$wJ}_xVwKNy5kj0C!@M**-4TlNF1 zV!{S$rKlJcUPp8gCZKT`AMSX(GSL~o;f@)Yk)=CRyY_Dg4m^M;E z3SZpJRQfeYV`C3AMg(xzC^yi3MRQ=CHw3=`zH-HLw(dKGrEBtoQ?%Cx)Q58jxHJw6 zN3=H(I%dMOR4zP#1yaDl2IQsw&E4c8bq?<{>GJYCvLYqJxs2>$GDapADFs*nRQBZr z{ACd6jZs(t4_m6Lv-?|3w8zIaA^?;#gYRdaq#Lg zZp4ci4SOsjdL$|{=A(*d#|FNJMw(56xVTHY0CLJ8$gZb$`o@)OCzi7n005|R`eqH} z1`9L-efUNQs3715q71sA2(E?)s9*w~|KKH*fPhIxfvBK%x<;geOfJ+0=OUsE0!P}m z!>c|+c_-im#0br6d zF8XUL?BI(4+jCA}yb9oB(srnU|95iu46^PTu5tKC=@T0>(sg01DoL>+oTs5|%1Ca- zWfu8(g#ZA!CZ%cBNDio|V*+@KYO^|M$Ph~eEFhMP3EG~?A=s!8LdYQs08=Uom%6~h zM1pw>VvojYuu!@P25YrRT1o~bsKQGLdRYm0`#vn-a`KY}l*LorWDpcUdfUcxC@7P~ z%mN5V#0aa1K%*s|AQ18;K7{IqC*WA5$7aIdkK^YA*as%U3x1&&Z1MxEVp;(PtEkg9 zwvBlZDtw~q`!a|ZZP;R9zB7iL6#=DHwBO2vTSAQ-;*G{9ugIndCP<<{OXZp<5UAkh z-W#Tw8oX9!5Aa~B$P7gw|3@Rrnp#jintUafJNuio*H^N~zEVbnaz&gff}FBz4ZdIj zEX<{;g{5{ysKP*imV~9IiUmXhMHZMQjLD8=WXNqmOX7o^a)+>3rpdz_3!eM|c?F3u zrGg14CcIqFsQ^W&pa6EPvI!VO7yR+!e7!}Ps4a|h4!KeS0cGHea_n4*x)oSglN8gh zFGF~eEF-SS4R>lv2oPJR?dk#WfPEr6fs%Y)&F5Eq>0~OUs5qtoPzrLG2ujAgH!8~} zatHcws(f=d5Po_)3PA%zDwmn}S!g4v)ce5@#RA@jsa=Cp>V<{B9DPLqMH=8zL?Q(& z;D&)D&c6z#D+*My|M}7}ynEz)Xf0!^xPw;KfZKn0sPP%Fl*Fm;BE#k4j4Y#kENT#h zAP^?JYudc2Cx8b~YF92j-D7n~eDOEcR7a&Wh&);ZeqDs@;D%ksgM+F7MBZ#nx6waT zy=5i^0N`V^1GhV-sV+lGP-{Qxx`(m2Au8)3F$<6IYyIk7n?Q?b?Sh=@-2!f~D=fVFQ3Z`IiH`TF{8y3+76?;9eg73+8R!!r+Zz z>sW@Iuxdr2pXhlaCA=>9M;gMbGy>pbdXYUQXQHQTfERQU>)k7AlxJ@&L)-iofB2-6 zH6%Hf8$!SC|0598hrEg)vuF!qlmNzct*`K4SEvNSaHm$v=L>kLzgR^BqALYpAe8N^ zV?cxQ)4so0;J*eT4C;qgso(^RsWTGnM16#ux<2Odpk568RT_tLR0IG98wEV*gqo?s z8p8EGU=8emaTx-#G=lO|Kw&-#e5HQ19Kx?x<&H|~sUe3-vf9OSg@6t#Yv{$n4!uf_ zpGFD+u{(YZ35%kd8X!;%8g$~spgm6p3b7zUgpvY->`WNsK}i=Z5(homGe`u%0UbjE z7)jD201+fpmPDyCCCip0U&4ebQ)Nw(acIum2{R|pL5o;?>F5!~nn3^xQJKJ$X#=E# z)OZ-!{}81J6hjAf0qL=8$Tb3EQq&Oj!5}SKs7`o7W#AeO9+jq`N|A*}qyUT#1TcsS zib5Uvh9UcM5DzMgh@Q29cVtMbTCoh$Amnl0FdeBp5jgO{lc`t-dKfSO^Psm~sfZ{L zh;LWDua6EYROSiUjv8cpPy^NQlL=XO5Bj2HgW^G9NUIJh7n`nsk>*5Ndc(TQ6A{pAv0R z|FvLm3YI7mgAi^vkaP2S=pcs)rkEm14C0j_6;(w zZ5AnXO^(_=(xfdyffwU}p~iUpdcb&Htv=9q*I zI-pHULe>Caj@?-mr`DZRYpIQS^aZWk-Kl4vyb4>WrFPo+XJcsIwB~)U-uEn;_Q{kV zq|>&UqLMC#)Yx8oE*j*b-KJDypo2u}n1kCLn!-^!8tNg6Vzs!aSAQm8Q4!E~qU}$ErytoFb>G>&Gbb zm#0dh3XJigE8|ybnB*d?aD+Pcx~psI?sVyg6$f^!sWjD$sLDc@ETGFZhuk#GK!=&} z!xHza@we#S1X>?Jv15 z%{=d-CR3fZ-}@bBRev(mjBejt^Zo3JkQ+UyzZ3@x_`z8#81~pGlYRE)pWi()!Wzdd zlgn)9TiD{5^No4rOS4{a->$>TDT$9lTO&_q%GC4TME~8mVsXbC@S}jg|4F)%wcAv) zSfL9DpxM%bj`)7opBeIfPixKWo^HNudh>W+9;VyBiyrjk^)g$m!9lN`@J>Rb4j@a_ zJ|FS(at~{Gx50A_Ey)Ry=ms+JWv^%pgOq+&)k6O$@PV9Tn*zb*LFd6qOk#qV1yQKD z54uoq^wZh{$wfqJQBQ_vW0(z-sK5L5P-HaJ)ra5*CkK+LK2H?T2xr(VpB1f#_90&Q z!UQeL=_iBs!BE#gHb(5(Pj@;ZTOAqrHN{=3f=#<)>(U3HE%wM-|0Num5pS461(FPo zKCB*^jux%`=~0j@$(IA6s401k?k;a5qtY13#=Qk=bWpRJ?wYvD2VK!6rR)hQ6WPXD z$}UmtlG`3(**6XjEPeTb3i}!vy{-^aQa8(0O}xlQGolP(9#mo~7r00_k_lIWoMjGu ziNhB%keMwc3iopLN?tOvhvlplpPIQw?qzU@0?ekfv^lzo!L4qUdR812=DC`9Yi+!A zrXmL_P+Yc?o|2r~?&=o7ZNiYD&~jBRy=hSF{byZ&Bw;L(I6YB1uTA8;=H$FMwQRC; zk>G@2LTP!=08I@}{G!(VRJpS^qHvcK1!zTKNS7Px5msjW|Dm>SSxs2Z?2<53+bDT= z(1~h`nPCj*P7#{2pVrW+*~_0vbDE!&a+RtdBiZ(BI98w*Gw@!jP-<@%EN>M*wh z-fOmS3g5QUx3lw=rS-5U1u876%a;=Nr-%41qq>SNx+qc0Ehm^y511Gc*&QdsP>bS^4im)fHJ^=rOh;m!7q(#yVHMe$ z+qun4sg5*mG0AG%+4h)o-}*9J)fhS@_w|2U)v#tSo62-XnPxbC?0yqVWhURPf@bsF zf+=NCDAw1L!L_8F*E{Ibt#iYngRC@7mbnjBtH@$L0wBo115O>bI&7WTwRe$HEc!SE0i7FIClh|7w)V*~b9J*j7u;!B9$>s5&lVfiddl zIkP#5>w=y&8Tn{sC!3VuTs3KFJ?F;Gt$<>C-=(iaK5D=6$kk?(*@Q{2O%2n)Ke_c@ z7metTfeqaU4t9o}3F?Wiu-+uCqH3+x;=SRe*%CfnynFrNPXfAq4t}!A@K?rKQ}?_3 zQFVlyC9WU=00=<1855ytObngtTU!*=ATUlpod%L)4Q(Ge=M_1tS*GHFJf5=&SYmV~ zZCKb2RmHIVs7cw0f3_;t0T0PAP?spRvQD~zy-1fxiGT;NnInEVZ9;RU8-Iqs*&8Z#HEdY`s3!C z*~UF~FtDh>RmF~U(mhKyA|uT!#MuLa_H`QB*eMKGt2|W(U;)WgPMwAuMB8TrtbzH6 zauw=I7fge@j)Va)iunTO&rW*4bRjKU1K1#JzykQ-SYuB*$rqMn=CF5U4VFK9T)u$u z<`<$6Q;7Z)lJW&fHRcPyDntsle;O8j`YzmmKwB13hU&ZzFQOuypd6zv?wr5^$uCrU zmrQ?gAKR8_Q~Mwq5QxM8WdeJ^1KhC(dte7$a2Fb22L&h)AYubHZ~|Ge74<_qDZm2W zw|f&&1WEEX1A-^^GBgPyfyglc|9fy09>D`Fum`=DAXvZx4v2fRQ3Na?8G|4Mb}$!7 zWjQtgA0Dw0l>&FZA_N{`gG1qgLX$g5C3*^I2PZHWB1Z)raVTY=1U6s?cEAE*fC3!x zAS0mwE*J<77zkRx14@7=ebR$PFbHK}2Q{b!I4CK$QYgJq1d?$Qd%ziCUC0h#oQHu7mXWB6{?gjR~>= zf5>ri^IFc*rU05rG(&ld=?k$?%Z2NQt>1~D8;a1<+FIk5I3h50*P?~Tro@@@c?f5k|Dv8HaQ4Pzyj^T0wAFVFhH4J*bp|* z1w7z@)>oS_I0y*I1OtHrj1eCRAp|T?8BIwLPp}cIND?d{fK37!RGFMqum*tudk8rI zYG8nC@C1RN8bY81Ls0~Fpaxi=1n4;-8t{PVGbNVz1Uxy6*m(ksv4%yGmqlWs?Bf%5 z;C(!R2b9?b|H8NkcwhqqAOZ>iii~NVb5R64D1#os0)g-Z(I`wT01wwqD|*h-PsYD@ZA8=n=aql7rxz0}%#7Nf4vR zK0TNu<_UW&*_q2Z2*zm`ZrKqQumN9y2U{1G10kE*!50Wg0&Zae0=NN6K!aFNqHo#} zHt?Sspac^!m8%#R8_=jCF`sI>k1H||X}Sgm@stW-7Y|?oJn4h!sRdf1o8by$XRKOUQDFP1= zs=L<~|6KYZyeR^$@dO|dtXM}CflwLEijaV)Q}!osN<(WHHgPp}OsB(tm_en!=Midf zi^zyo&B5MKa|gD?Pr&;^`v7hVdU z+mWXsF@<}e0D&+O1z-mS@C78;5rJ?UD8K@ak((iropz9nQyC*9xP3l2sS7fXL+F;p zx~p!9l;FV-Mx&;j2NJ`29A&@}3Sb8|kOCN?1!|xMW)ZVa77}()l`cC7De#MaxCVT& zvn$%GC=jckXaOK$o9KcCq{$H);G^`o0V%2g2)d;hnUM8HXQ)O(dd6eb#U{fsn~K2# z|AaAwN&th5nTo5}p`8&3b0LaEcNc140R*@QN&t!|$(c#A2Src-MOzt7TL=d0ojtn; zUSXf`!WZ@lh+WDXraA};kcI;h2oSM?HF%#Oxu{0yA@eB$SLlE?FsLk92p-b3mnnMV z85P1vvuB|Mu;&#XzPnAK`v9i7b)CtWPk9yZsR0_0 zv0Hi@;-Ut7&;=8*2q(~)_E`pvdY<1q5ZX#CPTFpNB~*}BzewYbQ`!*)8k=^I7JKQ2 zv+)p|skjp`i-Nm4){&M@D*_d96#UAWTZkM{F|hj=tR66sg@6a=>a;A-1kkhw|11EA z3-XVA(FIAF0(q+y16jb$XPZoNvb>uDA}|+wptDK20R}OTqG1XIqZLkfq;pqXdQ(b zu>pV?6>;xQ`HBqK5+O`;+>x*@lsIj%^1bWsR4;CB4O0{hD` zdY2*CI3q)nASb|se@85WJvhfPBBkv&8RFQ4{n%M@bfnE2VnNttrzIou017aupVwm* zMoiRph=xdCRDcDh0fHWwEXksNnK1!V%4&xLgfe0*zBI&Vm`7b&f-(0!W(YY;(ghdz zb_R-jF%*Z`v?nXWL~F1(W{4{bqkbzyDN?Ib$AUaY_} z)8R-8;h@tY31&H-tXquE zKInoqzXNXS3zY0R#DzG+F0oYYfK@>Z)@cy8nuh-6q(x73>|w>-N9g|Sw&fzg{g9n= zgqCjW+$3FiR%2I}q~Zldj&KG(mp*8I?(2sbATYRmtLAQ!XM4Wx@=M&Ibzv4S?0Rxt>Lu=t4z~ieZ^Om#qO|F(9`79gWzv4@MUL=% z4(1FQXUKSuC&fWkd7WKIv~2^*rX!cQ*8A9`GlH z_2KGn;U@AmFJA8kT%n%uO5YRYFe>WhnV3Mxz)#P}kZ|7X!@^=n!Y2WkV75n%FT{|}Tw!iivw)DH7`2%n7-n8qS z&h);I`B{eaPE|+D?^6wqW}|;*bJX$Uwrp-cNiwJX&C~YWFYAT$+hULD0p;iC{`tzE z{(j$9R(Ejz6<}jxQkHLjGBW+VZhdr4aR@G1L8t8b0}ucN4kTF6;6Z~T48}nSfMG+3 z5eF(fNKxWNj2RnRw0Lol2oZxmh7|Zv|095o5mO#CIT64|j4cPsOn4Bc$dv?Xo*X#{ zB}|ANeTE$B^W;&2J$nkwNsuB)qbf@x)!3A3Myn1ZK1>M`>(z!Pi7LeD6fDA>40js* zXf*3trV~e!#Q3)2OQ>|sZfqN}>0hl2Gq!!Hv2V?jP1h>EXmKsdjfo{b))^SAUa3Y! z7XAv?b6mkka~3^plVWL%Z8vLN8#S^_vUU-o_DJ-q?7fgPGQ@qA@>I_$Tdr-{8?-{q zThT@?*zsv&$GJ~F9*fttQ@22M15Y^A@66>^0Y@Y(aV7HW6nB!e%P}l#^Pl5)mMY)o zZSBhuFTQ*9cU7KZ8_KfNpem{{|ACZhZo%8=Lrk#m3M%eF(EdA%Bnz4A@3XAlYOg-* zwi^sH3ucruW>MOtaXfts{mqxr!A{bpmF{jbaQ<1wF=ku$)A&2CPBo=?N z>P7M3Qq4%V5GoSJ4^s@yy5`DDaLOT>{3wp~UW{w0hM1fzOD<%1G8Diu7Bnw4@^|JthcDoxOq%@rys1@%e0|89D+*lt_Z&A_Jc{I*cuzyuTB z{M022t_&#zak_$L^){qJ3L%fZpWHQxH>N_B7Fzc(b(SuZbWLzyAQAi%RP0C$wlF;7 z>n%b2r)5?z|$oJOLaLZrCRnbCT-E6b52hjqB2Resa z;j2yMGboPc4rvi3_VB=Ge^q0G2~GK}ov1bN{s;>Tuu!y95x)4MknbC4fGW4Xut?{p zWj8m=e!q9W-!f8oJ>j3!D1v#Vz6hh(L8>pSt$qzc2wk9@ND5$qF5sQ;C1|ohI*q<0T#TF*m zp?4xs!wH_CnkU%71}Bi88f^=Swx6Jp8ls`8=+1fq9ZU_low;KGjt;)yA6Vo#oMg+mTv0xXc<2^1ib6KJni zLs>&1P@sfG*=I#1vID({L4Iu4x$Tth~Ol9G5~bqgay4@-yjOWf+3**2GAVntE?!Jd`j(wv3t<~ zSYW;fSm2X2?VtQ;rN=_#Gl)!(U#-@V0u*>Or|}dhFm@D8ox0PzEY= z;R|I@LswBC0xT#13$(JZMW9x=;im45E-qcmm!EF@P>SA&7%Ofd-xs z%bq3brfp?`YZbuRkZ3@ep&im9B>IuwJ+mz*!0Q77AwL#?v#V{Pt5!qe+`OTO2QY05 z|1I}WLa3l%w}Zd}3+(X87K&sIn#BT9%20p-DBzPxQfn+ZN=Y6l69pon0&j_uT*xgT zw1g4WOGTxCCr}}tA>jcPuKn>4Fj_ z5Y;DQ0R~+_K_QfI%m&|&2|V`k6M;-z?s}C$VYQTgtobzG4${S;epMMZ8ImU~)2?=a z@eu-nH>#ePzlxsqqeJ=tB`7)&_hsuK6hP7uvOs|AZh#U<>`B%piov}UNg3oVl07_8 z&GC9kYS*j;mf~9p%M*H|K;uSJ43?p zkcO$wO~zFuoeXVOmx*anIvU4`k~ptu#%!u&NH~poky-mFNot40)O6N@4X}N`6~j0T z9;NZ%3`XwSVb$mv#lsgg2^IV{TqG#iWTQpkTkEl)mW9x4t-(8MNRyPRE;4vX*6?i? zQ(+MnIQj_!q3k+(6)MZc$t#aO<7}@1iM7;%9p2uSFyM3JHn4$-T}oXMbU6hZcyV5v z+7<;ovEyo00q~y~cuWsURik}C?s=t=xN=dNY(x|b{{#h402a_RwOX$S zU?*Xcr=oe4rUSSlD1a6UEgw^rWub&|K` z>KF1;s|EwQ2%NwYY>PaTjp%4Lu)2@lW$9uR``>zyKi zCL)3-1|%D(STt;dw-^&GJHU(n6RD8WfK5ud9iy}F5`n(U|GYe8t#uO#JJ6nRyCCeS zyg?uV7J$J)Py%pDD%GF@k}?p9yO~!T1YhGUb-J@gYCKN8?1nQ3Kb5D`opbh!6{V`NN+Xglv*F0H8UXPy!_w08@H`i4qBz8-xuA z1S{f2c)B8k=@L^TicU(gh7v6$@CzkqH;@{nQ{(m;l#w&87+oM{J8P7=Qs-o{*R=F&Ppjcq{-ag8&mR7z2R2@d+L%A&z9L zFX%OUY9tisBjEIa0x%*)E3LtM0?QH!HK0I8a}7m9PGjt~!6Pw3yUnsdv3bm*GdrU) zIJ=NQuK{q+_cWrHIK-1FxlLLF6p#xK04FWL{{k#f!ZN6(>BI@(gfS7&06Hts-b0Te zdM!2LNZVv7HE>VX!kY@J&=q4VxnKefkS>OMfcfgrp+EsDQiI+pP-6tn^;!dJ6RsxN z4br?2K=h3Aa7fNuo(l4tiW(?H!-)yN1Ce|hXHg%u!5!Cu9Ys-6of4O=@|h=)r+j%4 z!bmjBL6k}(stUTD(!m{`3W+i}NV6a?JSYOA;tZbRAGN6|9~B89*aO>$oY)B{vQe9Y zaZ@;j7Rk92{4*$9sVa)HosftsnnK78xiCY;7>X!_1;7HqnIp7W!ox75CG?Bdl(t%d zlrmJDP;rPj5Vw^%F-biiGb&9t>Qqw={}q-&Gfd>JOLCb>)f-hxl91SdJ@|rX)D<^! zO)3%?mr>O?g4Hral@k*oT6q($xfWF=Rze90@6j2q`BfI73sKb)Tp*qRe- z6;$Vep8MQVc}-U?-5ywNlR0JAd%cm2Q5S`k8};c98(Wz4(H9Og4<5;(4x60dc$0!z zlQc1&C%IPsY7xYV6;ELgC@I(;xzm-^8;i9&%;}hqMUPF9SB?G5OFf*&ElnJF~6P z+@K8}F`=4O3V5-dkuj9?sG-rRh;x;fd5VxJ;Uzm+T67tUz6A~pl9il^qnj-rs5w-r zr56vgkEp$ibt&4Y$=i%crK2@mvg??=)!ELm+QI-akkuBAFp(jUX6J2n(VD!x@ER{ zQQTz%9tS&zjHj<4$) zR1K1qAY!l>+WA-v_7$EEL9~U}EE?+bZFU?GR+vb(Evo5IZ&)9j4?vXNE( zVXW}0V3v+*#UZYcWBB9Xu)~*|p&&zz;##H@O7>+#USbZ$uJp5}UX=#18Bfo5p=Naz%nYHLR7PM+$8{_3-^KN=2{ zduHN1&K8B7X@#!gvc4L%HVd7;+OxLnY6gqFHWei9)?emopB8Jg9@i0`YqI|0unFay zehr+)>OcJIyawo~F5r^}Yl^09&yLo_o@^;`>d+QkKvbO$vuLPh>qb#&riN=2VN zW^A;M|Lx8;>52Yp&wgsf^=Fp3QN2YI?)jsLQI=8$nQ+C-sTNrIpeIYlc9m-KGfpFaN|;C z#3hOzLU9b+#c=kT8Jg|;SwiSGTD5EcfW%Qo@(b7uoarqqO#as8EN zX(8^~cw`aT2}|ad_m*d&Xpty4k-(O1zP9GPzHA@2;O+j}RVg5zcyQ>(l8J35%}CsW zp&G6E(?ZE&K@3_9w&aJ67tm&BjaJ-2sh2j_=QroDt`OI_er@hf*sO@(B@vJmp$=3g z|8J`ynS(Lkg(hEV?ha=&)%#xaR&VTRj&noRX;(gC5QkMySrjryvn)wk*w)^f8S@;U z+-us~*p?#%i5I2`2_r{Hn5pJi-tX$}>#2x#0pD&~hieEr*scj$CizjvA(3SDmZ(tg zHAdk(6}QP10e3+truAA?8E%BSYn9m(^e9&q2lj3Eje-dd#I9@pp6F>eZ0_cP1yCT% z853G<+K&JnIx`Zt{q|Tf+#cE5E(X*+XbyqPCZ%=8(r0?aG!9m_`hrCFLJf9w8zzzww_yU()`u9>~fbiFKf@$@hA9|6Lc~ zr=CGmCdf|n36`5NGlA-oFlZ!`41>HYozWXA(-EmtyAz@hZ^nsGyyzG4X#kM8AncLS zZO7;t6=~ECaG}Si;J{C;K|;CXujA+^h^4zbH3_ zLL1Q0U(KSQqRKmxqB!HdAc}nLc5??&ZsOhfnW=PMq4SfTm!yccI-){5%1s&~C+%5; zMN$F=T&H%DITaM&keGk~IQ-lSfJ57-^7ksp87PAd)K^Kl{ux6+b-@|q|0Zv;y}Cj| z&H@Mx79I=|xM9$sE_Sqh5g}p_l|2Xt0jOvZz{4(228jrSMFapW7E(+sDPqk-js=x0 zTEv8jLzII~4B&Y2CeEBXamLx{QzyuuLU#tO*z=~toJETcCEE0<(W5{w_GJ2$BS@)( zzOW$pL=D0eO4bYtqy^ywiG{)>#G-P~1cX8YJv`wy<-@##u-LFew(cH@00x!ONS2BX z7FYljpfX`2VFg7zAXI~bih?JFRHEq81?-@?DMXa{i=|5uvKSwp=D3Ee4bU3`eL?u7 z%b*Ey5wg%QNKGJ<99d9adNJWf4;E4;EPlGk!8K))KGeX1`JhE*|EA2mixhxVj7p6{ zwF>ooMf2#Nh9|#%JWuiJ-_yVN(0=wnY!N<0VP~rn1Py|M4JFy75(_9G6hP;6KzA;exVV4*=^2$d1YWlEeV1Pg@} zqC^1+?ZHDAO4J6@AaJQP9B2$SAfrV54e~%BWIdq(7JEFQmyr|=hERIQab(9HWR;|V z4Lm%dL<&L(S(0@ng|_B_^G)TGd2+(HQ+xKUmuG!A&Bv6TefkOBdx2VH({Ws}<TGMj^!Vg#XtU1-4qLPdNPoCivz|71fMxt)Mel?bIu(jZSr z6zd>`SRe!tO7JjgaH5I80*gv$O2t8g0U+W;74?Q-T!q;1Dx-tguv-@vOu&O3xII*a zw-{|OZctuk7bL3>shLs{YHYy57hi`RAmkgCw|MAWS?M;eCS)@G1*n1+oDZ!J%Nmy$V=R z6RudiN|;)M@Ib1~n4&P-W`nHJAe1_q1yLX;?1op$pf>#w4}t7)f(_mxq<{i%Rk#65 zwoxmz4K^sSSw&!)791|oD3*S8wh<(7^CmKv^( z4(X-QlPn^ns}KZYB^T$6wpkL6wH+v4;SD={xZi)TcX0fB2dcQ=ap=!z11#)ks+(-! zmV|igAyiUH5weyW4PkCeB|DkWSP+O4O@LHkuz-WILlW7^ZUYwB57ze5B(4#JMlvE* z+kWSPLYyFI#yZj-PDG?-fh9swt4(g2G?58t|EK^;2->B}#h*`c?`1&~fd#aHu?%*h z0Abh{5P&ck>wr#QOA2EB-jgT#-3efM!kKMOYaOH}a6#bYTEW@J;8&$0$ZUK|>0_!)7L+1cdkz3q`O4TLe-Q(WwLris=F# zCxC|{U?C8=S;>TaH3%(Opn7Ks5f&6+2OK`+36axFAbi89Dh1+KzX^awz||@d1g4YF zv(~Sion=!TJllpBU!d(>ifeI);_fcRb#Zss#eLab+}+*Xr4*M!i@O$g_m;QMUwAW< z$@gTE$t2ftoC|Wr(PI1P%mjS^T^COCl^>{-s=w|guDqATKXn zBV<0u2)`j>wSn^?K=xT=7n>$TA#wNrd;lCtcPKb~$S8cp{n(Ut*q+PVBDhb>3FX0v zjlvc0EaT~HOYqYq=UXWkmzq} zWg!#l1C_#YX*hY^N6fx{@n#*}>frDkMbkG&BND3o;KuvX$`o2p)C&Ez+I@{0!d*E+ zOVGOAPkEo^0gzr++6IwM%YcES6S?^eYsvqh@KZZfhR<-#Dth0SScjImwtndrlV$&7 z)!kcA@I0*X9Me(tZ%OVR?SlNhu)4yoFG=E`I}N+ZI)g}{fhxsM6W*8|6O8W)UT~ev zU~@K?&xwfhTX>oS&0oTX%qco}cqGhg>Ws=OO$(S8dfe@dUx9Rd&(+?I1=4l-<5{`I zC7tgK`XFX=X&IduFg^&&Bc>c9xX;F)@Id0bC&FpQ11Mk_H`_+WRXgbw6_o47w zZeP@%Q_}&0jqPaOW@86jAVp?Z<_GRt*U%K5NaE%dnKdQy@HDxXljTd&yu!T+X^j{D zpi>^N26KPMNbBrn<$Lad6!kZ!tasVY>4A1d)_?5Pe|v8acU*fb&>Hw+t~BzH@-OHTth8W2&quX>F}l#8(Y9;9UG^X_XkQ zb1H|Zd_kaO&h+51tB#1T;O%HgT!n|%e(I!PWZ4;r_OiIn%GMmY((US9x7-lzU1leI z7CdFQaW#2V6V!2-CsMZ4Z3K!I6#Uu0@w7ehXf(nUyOZ%*=nymDp%UVM&971v(EGI4 zuj1d0ALR2ZEAW}IiEh)rETF_y^d;}YaYHJyVNq+1PVk@2JU8z1sc@{%IGOQ%(pmR@ zn~2{CUEsr+`Jd}OGOHDDd*N$nncpeOw?8{we|!9Ngq|cnvt2l+?kk`Q9Tn9D_Plhr zmDB~C--zz^A3W|T{s}y+Sv*|-yq7_Cjbii%w0T;A^-_^DgiHQd^7*=8wZ*o1MwtJiMqZwQc2z@+kZOD{zcuV=B8`j;pm;(?5dr`J%>O!|9s!61`8OgX zBfsPV!o$O#=fVdE2Vc$xd8PDkyQC~EEDjD1Z)+*$=H`mZs$cJS?#C^K6-=3#nL$?I ze`ol^M#0^%!F@Z|+S*!ja`N=ApN|g@&CShn+IIGy;W#)r&nrnI%%t%cX#)P z2I}d#rRwVH*VmJ$Dfj2S#+Nap+cv)4t<|EUqVC>)e}8`x5|ZcjtiSd67gyKs-@lKJ zj(%E*8XXz2x3^zfT6}%kyBoKYP`2RY@bK8#*{Nad z>H>z`uBE)*%ssa-y;NcS{{1@$nldplA*Ep>DJl8-_I6*2{&M|8Lqp@aSN^$?x~8%H z;iUDkjh~8&>iYWnfAjhOYG`l4fY7k;2uLIp29Ju4iH(a-NK8sjNli=7$jr(HVCUo) z6c!bi{IdvFR902j)YjEEG&bfnwYIf)bar+3M7Q*Q?;jZKKpYw!YeyUzpDLaFIWs#q zzc7(7ow2gGzOlKrz4Lo_54yI0bbNA})^~P!b$xStcfWoybolW4=kFT;34=tAO|v(& z;X|i7w0^4>@{vj*TdtvSC~BL)Y;ClmXe4fvD>S^>VwXAr`E#a0h5Zi#ReI3)uIDfj z3IO$8MM80-sfvWCH6_#Z2@-%lR1au44znpl3;jqfnCO8;EWvKNP5vFnUPqdrZk|iC zF)g?uW;mB&0$N#!$M}W^+e-IT|5ew-oF0mGr-mnOhN{(yNxMDK3#Y>+Fl9LwZTn$;nhN; z|LIA4aADd-GNqy)>e~5Ki2k({7cH0yLmijh%s{&sX~tDD4rPyESFlaspP(-SY5-CN zambz}t{{Y68 z`Ak79(MU>RrAfa7RXo#%!l+A^fI*C{hdWVh!*UGJR zpv#Zp4QG3$vbJ-`&5yDD$#>bYJcGo*PJ~*p(KHfi1nxx1Kx}5LO$^6zFEp1BU>Gb4 zBwxT+{ufUlI+=D$$C z(=VsWFQ+X7%7R!@kN|p-`|t)^0>g4^E_q0<6nv@FBco4c|w0{KZlC^T|6;xQFp1^3~U{_?Ly zqB#C>AOWx;hfriwd~k1<*y;Ef!QEZ(E(k0^)Ef&G$H@oA*htV&x2TCLu0DXU#T)lTDI@}Uma;VvuAHhxu&0f zs8?dgdHpcw+5;)MkI$y9-qJFw5UO}3CS{yBSo5wjsQTF@WzD@iU(3i}I$` zo;6dSnr+ChU@BPx+{K1}j?^i9wd2g(OElIQl!WcnO9MHp)F%&B7c&+zI5>0tV{8=8 zj^}GpJ!>QPm`wkB(W#xnsWkRH!n^nv{FRwpW!`eC^ru57OD3f%gw$4-iAZli@1{I& zZ_nmsc^MwM(A406D0T&3(dEHyC6B3+aqZGyhs@Pzzb_Zp>H-y)w&V}rF)L~iYgBV+ z*4LI)^2___czGpN&iy%)4vAXd)${HG-8osNC>r!I;&g5~mKz1xFaCxsb`H{%IsX>%aVJ@(RuUF`PGG3}0zpJ-H%HF|!bMOBz)!B1kTz|X&wuR5r%Il6v`OXfU zexY=wg>Y<+t*HC`J<5e)iT*N{Io|NEy(o4g3dq-9oqniP&scBDB zSo(xLK;>G2AtrdjARyloz>%xYnF=Iavo{me#RGF86rAF+0{aj~8U z46K0&;}KMk1{lc<6}vAw>Oh`&YelDpr`C^^-^g##Q! zzj1Z4Pc{})#F*(7wXB-{Z7tc(-lBZ#%;mzX=)`}q`Olg-{nMUTX@|qkVxsP1@6-u+ zGy6!2S7&^8gU%)I_}Iz%pAygOzAb{pmia%v7RrFeV5(e`FPq>gf(>r-x=Ax%Y?2>3 zj(k2xRqNI&B;UkhuM%qPPLnYCw@~nTS_}Ntlbvz4X>|C*oMSd_S4cW2E-w3uYV1ZJ z-SdL22mWfz`ynUk{VF5XKS;CPZ8h|d2UtDh!)o^FKkMx{1bSq>tyz39Uz;biWchW# zs9R!>-*lzP7zFyO74EF}-RZ%xQDbArqse#iqqU&yqKNaGCt~V=Go54Ns;^bQ$YS%{ zd{_Qn=90^oh5mCZjv@4K_l-!t?JibA9d%t7X0mv>bo<;gPqmNf6MEM~BVDJkas6+t zvlTbRUEiWuJwom^TF;$MzeS9%|LX0zkZj`{5d35PTVe4hySGT`$@>6}hsU_!nkW9D z=Zm(Lq^nG84y8?d&w?)Sg@E(R7uUeQS@PlJ3+-^W zy1)1G%D(e(*jKQKH0H;%q?R^!^b80I@x8zU&r7K_%X-U<*hWbEj`CV^FL?E!S}Q;3 zCvk(CwVh!5I)S`F^liT6c+RnHhQkYbqN&DAK;NG_Z@vc$L1x-Usi{tYa)LvqfWL<0foaJ!@Nc-*1jBGdS#yAKl7ep_5PqGCwnV%OmFKpI&X5Y zYRQl}9?*FpRpYZ~zyzI{iLRR^y(>SOg}aWAYdQ><*F)sdTi(h(>(SOc&427b6ALT} z-GaZ%mk`o+LO9g$iu8EbDqXk;+2M`)JnFqE4XmQ`i}W%HX;){7gxTyziqS=rdV%=T z90W>zo7#QG!FFtiA-;=Yum?}UVTXBZmnXV6gQ$GuhylH0%g9foUfTHjp~1*;^D!tk zLAeiBkzh$)06@kh_CYGHpd?C0jL(SPkx5p?eo2kT0$HKOl!`aBPR4T%Y)GnOHZUKn zbZFuRjyXiLtANI2r-kluq7btI#Z*N?$>UdXy!)-yrw`${{4j>22nZla)YF}cP$>`^ z-S6!SrBM&NMB$Uy_|MW$sU-A+7Enf2v)Fs%=g2tT45R`ZRHL zNx)Gg4cJ{^Fxdzfx`PbB4^CIL{g;I-pEik{;(SEzutk3A5#6bw?s=5fW|@(^3{O3R ztD%F(q1IcdkssQ^NAP2Z_=1hwV?ubs&Bpw9WMRZwB4RMHuq}QJ5HR;QlARQAkBZ{C z92(9L4Imfr$pDI^#D$89^zZ|se3C`-qXg!%oHBC4Q~}ZG0I~cWq|4M$6J#BB#CZ@< z`!Z=r9?1Lex_pzv$Dd3q4@~0(h%sb#CgqhE#S;bRKt2)_{?_=5>-yyDb+-(qS_O4x`s{edWpv|CJ>#CJ?I14|B;4b3xel!9C(nl0^rsfq&Q7=5Y-MSus7QjyBmWbLmC4BY{x{<4` zZUMMO4!=;slK_|CL;xFLN2Mj{Sc zoHPkcC9Q(!mcWJdPENA6VrY&ZVV?YD72$y7v#|0A7bJ|QLSynwF@dC!jKWYhK#wiT ze1W!w_zEr$qD0!9{oCG-x9M?egh3A?PRB8mxsm8>f9xwI$E1b;+7H-wjxAg?;jW?gM`KW0+JZ$abo0| z1(lljzQE$9f=+1hEgE2+5y;h7{hvkigMg$$Ji;ZY`DwXHj5}3l#Pf?!J~dj{BVIL# z*WVGYZ|D`Md>@tfpg~5IcX14qCT@f1g#t|4B3*FV^?@jC&9WeVUO#Y{fF!FOP<1>z zqw)<{TONOF7rrW>5|NLL^3r~-DYDNed3>yTxs>%Ukm-GYTQXxd4RM9DFLVYi!cPwT z36@#D>c&nV`W^(aYfTxy&s;prPajv2#sI7{0)HImP~QHty#P4{fcv8=Q<=Hr6)j1O zK$X?30U&_)sqm4%EwnEVCI{3OLt-4v3rWb`T?VEp6s<+&9_rQCRtjDcHqjiDU-Fm# z@T)YAM^4iKZ0a>`SUQ>02Aj2iYhU#w<*T;IGOYqd$(8%`=)_CV0nuIZB={>|bT`*1uO( zxOdWhxg3d}Ups!R3bU`-ux-{jBG*vO5sxm`AhZuUY=MnbVZ&YhP=_#n_#H`?eu_%T z1cv`diwd+*js}%StVpNB1&%<4o6Qvt)IgEy9NxL%#Hemb!fpv4NhJFyAHocsm1>0! zSrkQUl$NgMWJmnI&!2DzyyqJstj!|`y^oN zhTDiQ>xuq6CSUI6J`Pi$OQnDY>n>X39fz-5{9C~8X#df)?8JYR$%$my_F$3!GaN$y=et54b>sMr6nqWd)+TG5@jS3m00CMFFf+Iw>hEA`umPKZ6I z3Q23gvGw5^P3hCqXk-dQI!eCP(d?LO;g0gvlul{2(R}=>EF;Iq)z@@VuI1njX&OoL z9{FK*AKW?OpK+&-{2ulZoKRB|4CQrv(ecEdDCJHG=2*1CR|@_-X+w!-e3s%NL!aNq z`?JqmyF*r`oLfzG!ME5)Qhlqgt=kt~K3i(9#we{s3rw9gNKsyPd417Ub)E?lo zpjgje!#5z8o%%yO1EhzU@HnUQ#WeAR1DVZIbhPl~f<_;8Svr1Q&1?VH9+-@x8v5iM z#t2KuRAmqj-Gjt}(==8V!wm?`e1t7yJLYnLWn+geIyP}ZJ#T(Rk>iG^izWV79bs+HlnSU5p=h-GDfFV5EEXe zU5#GjuURwoPCg!s&{_knFWfM$lX2N6&9j@uyye88(cb^heo6=9TpvqtdymJDoJA(- zobN%vI?T&^fxC;5=@h&pkM z7E<9qZPgy~V`xpoW;b%(M%rm#06K-51D79JY&!{b;?AVf4<`0Y{f5zM;FB3?7->1a zPrCYx^lVMP_cw$rw3<0AByb~4L{D>gIU}vpQa&*H&jEROy*JzCv z=LdYFp~`r0SIA$DwXw?L$+vNCfKEZzmf701~Ix&q>5gYidr57R z%KQt!L(j2&`D<0${twdkVedwNKK8C-bRI#|-ll{`j%`+V(($h&-hOL1`V^3zrt|E> zRPKoW+_`_=cTjflW6cuhJwDcT$ocPrY((dj*H*XvhWYcZ(8Kt5G@phF2$Auge3e}@cR&Z}oQk@qB^vO~;^UE_tK{(fcTT5`Zq3Vhg#VUlko zmP=>WA@#TMC}At%c&hMdURI+}8 z-lF}}iVMAiy9(-91V%-_M}FtvGXCw*?ES=v2kwTXhwVA8?RkT7jk=|>B9ZQDdz=P? z@~v)z@pRIM9v;Zc94)93qd1=B?J~1fW<`$A%e0b!^hXe{cqel<+eu5WvvV%)-hzI_#>Kj2|Bmm| z7@#*2M>c)@uoOuJR_&Y=QU!z*Yy%g^^+FRNpc_@3_e_h0hBFT3{Q~9DYq%;SK5H7j z(N=!D0hMJ8_ZU1^>=rZZ_Vh;0?zBeZw@o>g2a$vb**#n>6IKM%C1U8C20p|@AO}B6 z6Ose)4Y(}qi^m>zAfJ0VKpvw$gNvzBiDPC;#-T7gCOn9bfeaCahPKees1L3A%6IfK z(;fs1^*G#xwq;)=%VDZFb?xIhOf_8jd27SoW^8zJ9YY`Fld2uHNl@l}T19a7C7upb zTswDCZmdCQLB?-OoME@5LT_q!lAcPe(EXH%4@RS}UTIjSHgsh{-iU0$aMoCq{9J;X z5%q{Vxnr9QUA;we_E~nla+^-TGhvn@h)y->kpVrp!__L20g#2bwb@Awl&g+0>@Ayv zAJ`=m>m^q39m$L)NFPMhN=I5n)$)I51#nas42I1gJ?+5$7?GPnQIYCplw_!@)G*YW zG5he75yd1w);Zph6z{C3&&U<9KC2d$4BBk@YfhgO2|s!uviPAyjAC`FHzs_S$#Z+o z>&uy!{1R^GmMr$BGH&B?TSKTD&8%leKZc6LiEzobm>8m4wnP{#XN|}J=$08FWCvX8 zy3um>M~wn0O%O&-hn+XLXt1o;-Qzq5h5=s<-L|>;ml)#(W6N^$ZsBuU?L|D`^t~@g z?+rzZ{!-P-?53XH4PZ`GVFhO)+C&6V>a+7SXK-38z)2{kpfxRw*??`L6mfD6_(erR zzxkDuJ|g_Y$z>H#)cvSS`p`8!j5L~)6%|##jU0q+&rH5OYBQOTP23d1$1#6Vv7{;4 zA9wSkf@ZI|P-ojWrJcK%klOid8$(A`y>W)!mia6Z|72;tlOEwCcR`0?;IT~aA_<@lFSBMh{N2W~0+I|6@%UFbLO1evjJ}i6JCBlt zyCXKy$BL{gC)^yj-BA4(MVzxiEwT>@v{r&q#((L{;+PVs;Ee)src)j+1$J9xLon$~ zYHO$Rko`YR=tpaEI<7_=fX^(I;@wU=)=^9e80n-S`J;7~rNgdnqG?FBP(2nEUxlRFo=h8 zkp2y3nmDAeOWl$r_|7TXyAFYlXAzl}ilYQ>SYS3O1CnOyKgG`GER#45nIMeepfsyo zm%$Nh=2M`6SN(n^L2i{<=(MdtHfGP2qgRXE5JL*o$Zo6TTldD?cq;4JozsPVFEjuP zceNBm=D3(8a?+5kAJPtWH86d80LDk;WgBe&$sbIO8w!*1ZgGllf;+FFcw+7t3~$s2 z=>Hv(l|}IipOniF?Qv5|t83HR%GaqGmf>luV#-uo-xAXpEh$t;OMR*nE4KQ=zY}7i zU|Z~MhDveOL}ziz)5;X{6@NjDc#%2!y^qu^rQH6=IYP6`>JMO)5WaNbR<+k<2c@tm zU1w>Fh84VqL^I;1$Oc%5X{b8PSmcoxG;_tBB?MI0!i__y-X>oBZRz4UG7%f2O7)|J zit^^LJDQvoF&J6wWBdb;=&^DMKHM2u46D&~O4@u48C@=lA)YpJqDa$Ia*nrmoL_|mQ{P>2Fi<;O0 zT!>xp?LL#EDc@a)dAw@{Ag;-{4SYTFb-?0ev2WO=;1cB5>7ec1)D*RzNDC&AbuP`| z%LIS>?Cccy%je1`Rj~+*#&t{tawzEC!6s!R4pbQJ_VH}8_uYC@ro-V2Dr+b6d%6f*r&?hj7N88 z#}LN2?GPm4Ky%h&RaqwIvfpPg+S%@Y@(ldMGzPTE!Qi#hPIM0|64zG0PBr%Ql**e0 z7s)x&djB|3bn%GYZ&Z+ef=#D}+#Z)=GH`KOX|a6MSRk7I+&c3EL2?TC#3j=hOw?MY zl|RBKB-p?#7uX*L!bSqjVrFE8FlV)+ldp)vGr7wqr z9K)L!5=rW~bkuwA>_FD>oBFjPq^z!L0)644SiWm0c-|T>?9TcmqVh=yH(zjEkV_lI^z9XtAauvVHUINVK4D?=q&YjGK18 zAZ0r#yUbER1VQJAGaXvt90pIMWAmUA-p##6x;CFL*;R|_Z| z1h@7%uD8-cBxqR*d}h)p&l~V>%}xBA<#PCgggN3Jo5lYkyE(l3G`S9o(FiX z-Ep>o-IRXwZ>5Ke57EoxIwRsl3Yp5ISTo-5@WD%!I|x3PCwqz){@1nTPXfjl(+-ge z^=Q!zZ1wV6?e`r(i>jxMWFL`nqhrEq_ae9P)ds$aTRliU*-*Rb#2-Awf82>e3w7oz zgDm2itplY^tts3WqY7_hrZ_Yqyjrql(v#O3z!be_`oyqMpLFbmZ?aB#(C`m11+ir} zuC%!HFtU$sNt(FP9Bv@y)`0PQ`Er^VI4uK~IX+H#?DK(Bxo!~fA(<}KzG@NNDHoU& z;igRwsX~nr0>y|mxu_E+lps)F@OXgtoz76ne)3{sTPF&`%)G6gb<2Xcb<@-}q0gx1 z2n=Lytxl?Rk?LHL1zg~x(Mak;LN)P>>|481wOBJo|jW)texk!=P}Do3-PG0XLMil(!HrCJ&u%SLEkIq^cH z!{oAoFmN8feMNgXlU4ARJpbN<2D~vfRX5L+0i1UL%`c8f-L>oL>>$>_yTFSZF9L94wQcbt0W7jHDTmLO_mk26 zVll`}Q~D@eY_FJ2KIFWNg+lkO*y|}%L?fY6CtlAj{T!4i!wCIGkcAE^*dr_=NeA`Y zmT~gtSMwNY2qY_erYN%WyISS7rI#IydQAh}7V$xZ3P$(H$&h1tYF8|sQKzi&kDQDZ z92qLjHXh1DL=4Yl=x7<&DZT}t3i9 z$1r9UW|Z~mM7+vJd>f-s1;(yl=ZX4e`pCcv1Ty#-a{!rHw^oM6hjknLF}Akh=n6SA z&yeotLc+1ko6!o%$G3VzU;B6P$c>I1pR$~uF)4Okqp{3{Sc2>aqFQ$Jj4N?L$K&A5 zv{L7GD#$-Z>EvDfXyuw`{nK%o)p$(#O8pI_rUVp+#+Vn@SxQ1w9A};+A^^iEi!B1? zRURkhrx$(d%*V>Iaw2LxZ;zC&OcG@ZAbWhrla;CtE{%s*HTZrS>a2v|$38Hq++RB| z!pmbbTBtiCdi0Wt>C4!O@*q%XVI?Su(a1|7Q3A6qrzq}jENz3n-ciT530mwi(x|&z zdKn2Zu`4_zsECeA1XX5z(1%Z(f&4o=+N5AfdgWG31r*0*TcH7c%WZ8$@I8u5I<%4t z4|yJRUw?tOSTx%Na96>5{*wNe>H>!R-U&^Hm(E6p*r=nHmcx>#qxL(Vm^0*#TB44? zqx_hR7Mh@m9+X=4<4hq9~uKO}(H7u7OwDP?Smh-zMha;-FhBiZ%zk9qQXSpLbUtcOC z-S>sIbotmVuCf?;xq*BlLvtk-Pq6U|aY0_?fH?5`)P0~{wr<+dckcFq`c*o??KspZ ze_4aK>{^Avitk~iFx{59A%)|(MD@3iC?y%r?wM!`Z54jt>0{S?er?J4$s*wUsp z7R@p781wL+D|rz1ni=^8lf<=4*{rH{1D@XPU}&)xilN>#xcZS!qEm^tFEWF2HI*XP z)Dt=nA@Tn>^vUd3SC= zdd?8Y`DAt~XiB)~52Z<1lTE)d@%IKfj;@88S%=-P(H?fyn-e5C^D}8akKR=H%j>;K zu@zD{dii_tqiapv4jp4v(<>$pdY!uTD=j&$gc;dmd}$v} z`{>H6TSX2(xV@dwr&l#G~xsDQLpq_>Q8Fv9i!>vn=#KI>uYbrLF ztB6NQocw&~z3TW7N1x?}Px8Yq^F33*1yz2>`!@dF3U(=$$Okynm80F$?Nm_l&o9mO_ z57jysl5C<@RT&l|E^UzC^hZUMJcKjmdu}p}`%kswO>&Kie>yeD`{+7(3PL7fpOa0eMsYK};2?MACfOjIzpwPctOe6D8Y=HP_OTjI= z+4ej}3>kv55BR9$vV%nYi^=Er=VC8)wZyL7$jEvY3EGR@6JP0N@kz~4(QyRRt%KKIMP~4%8QvFR z4YV8+xiZfM>J!RXx_v^eQ$O$vy{~)S`E#x2RtON86A3dy|3*uQTN-X}6`JE16tOP- z;vw>N$Kw24pakWsc{2mkLpmEPm#GU9u9j@@&yz4?UeJObApeam(pU!Xo9&}1vUL2{ zFVe!|!K@OZ!h6*xWHK*)qSxk5$W|p-%<))W#jn3_i845313p;JG|_3)D;c%||BImg z6G7@TMR_vt75%g>sR#@O29qL-FmVaucQniv5y=wRU0c(FZ*)I%q~X0dh`%(q2mzl>7fffZ z>hOYz$=~z4w*GrWF&y@vf01UG%SX=TYXGPYtCxI^4h8ll~1boobJ9HTUe)GSQ?=AD5hJJD2v$e4=pWrFwsQSbb-b7EqK(y5>Jg&(+ zrvF`gGHHUSFIlC!oYV~UEbp+cfI7)vZ#1$lao_{RJ$z~X@ORjPI3gQj)4j?#6GU@W{p*+u;9?|M1{IQIPd3Ah_iP3xP=rS1!J(qBj0}j zepR1WlZD#S+SaS{G#98RIV<`sVupsF@@?z=u0fe_UICCBg6IC_pYvYr-jSK;u0-q! zCAn50FhW>H<33p^9nKqrh9j-gE@P0$0q^>tq<~t(#04hPaK2Hh@Z@DVMPl{-5>B!; z0%a?LSd&2_Hg2A7fE<{Fw+Q3rpDbx}?f&*XQVL)6y6VS~0q zNrx5!L#`Y7jr6=>$Z9_vDs3OX3L2@;ZM>xf)u2^3)CPl$zdWbdmdfxbb2&7OXv;hD z$o!oJ#ulmmvkJB&q9H?Mwyoz_QmPABy%1uW4X9laO^R76(s!u<%&947#?#_-69zVj z_UbAl4A&MyrlYh9J`linbb7mace3&pc7ss`4Fw8In&C+v>uBDH;Y?NT89PU#t1IrL z;B!#&e*h`^>7R|-SF+<0=#E>=65;;SZY|e_T;w4e=@MT6Lk7olz|BH=CVz!8DmeXv zcH**BhGoujU3V$%^-k|8MU9hH*)P1fhqWTk}`Y0yvE zUx}&3lkbd6y<+Yu_5G?4X8BhJZr|EG`Y{|D9?QG;GS{oKJERS9uxSDNE4-m`{^#g) zNabanjStRqr@4L4Ul9h%XO=d9fqh3$Z?WLNzh4|e<5HdTW$6(Gg12>LUm%gQBCvt zRjW2g_8iuWO!JI0;GB6PAI2JpuNRGV7CrMOV!DDn=)YAIE<7)xI zX(TR6fwY#ID%q16g2O)*_Q$b$#EM8LVq?J`MgESRlgZ4fk1vgWd-&>-qM0lkOUXB; zFb?%4`Bx5Max@}#ubz_Ny8N)>KF+WAAyU}kW5djnViefg*UpUawL5}CpQk9*26-9L z{7J;(uUgt_H3lvqK19~vGzn$GjIBy8|35tXNHKGjw@Of#YNbQknjaOX{E?!#my6e? z2f1jQJaaG}mH_#wx&%Lif!8~~w814=F{7+m z>QP$h&##~glA^CPp{l~ex7PE>$B3LI5)Zbv-hG3ZJXc4PR*f9FgOd$a}9Z~Ec!_pK4Oj>HB9mGiJ% zEKo$ZhRh0)gut~{MVSSm35c+tC6S}*UyiG$<%(Jp(Mf1d0ELJRKgh4sGpLK%MpgO+ z6e@9B=Y#V3y-(IW+gs{oh$lD<5A5U}T z!3?n_;<`*+Ac^_kIUIh~GjWB9@O6ux9chVUOa)bbD(57v1Hw`SgJ$siP2SZC;U1-J zfxHt5W0|%IR%e6MZ#s1?8HKCpb`pVe-#z~@j5<6o{C>Xu*2S`Kk_24QE97p+{aZ)e z;-~w8x3$Qp**>KV#e9u1qfxoEo>E@XsPuua(k2dAK@7HBJr?J8z=I|@qJv0ga=8|YPngOB}x~%q+ZHL=*&UH(+ zqQr?jCuUKEPO}eX*J+f7j+r4<$Cv_qZFIwD72kXoKtBa$a=0DCJ(=acK?tfP&)h49 z7_A;TGMYPN&Z11~oi=OA38|2@&yj~HmNS(LX<)A1$5T43?Ni-8$LgM&wrX2iW0Q;jM8oNQ8tX`321 zhnw#8&%@jGO0U*3m|deYoX{z}L-&uUPJ@Zro1#dRB_`ZJ- z@f5K&a?1G~>h)Z(NxE0@vNkho+-1-+`J^?jn|N_6G)4T61oZBF_SxW=pLgF_+P_T~ z`|$FX%&T;k{N3Qfu-Ol~N3{GK%}?eho$DVRM?|2_Ild8smu0QBx45MH$cTGOs%k_^ zP)=HKZ<>dn7C02os&F9~KBC0N;}mY?x~yYE&*SQhVg!zms$6he9riQ8x53e|&a&2} zqw~cp4%&lw^;j!0&YFx``)|owvnPgq*42KoCJ!ifxT5o)ETur2ci(KYA6U>KhPpSj z=?Qx~Y+GCU(|Pwxd8Xidn9vycYe9G;b^1yJzZJPM$v zR;ytmnzbpN<^6(ro0q;Ao}Jrv z#}DvsmHxHoO7Q^WSc*)CDunWa-nE6Gg01ZHYfVq3=A8lv1hny(|=W$uOf`z2Gqc z!j;fG8b+rZlLqv{(ZOv=A{NZ*zjR}LC4_h2-o&e``` zchXErSP|&)ziiTC2+ZS_`J$c-JS{V(BgN7ioXYbFK1T1ZT`HZ(O#=j(O4kCh*y_x>W{QR{}-DVgVX&3pH%EQ2pQjdBPOf}R;s@rB9 z&c;KA^@YyDdL%J(@{*FbQ3lHsFPs}}iH!f$glE>kl%gu2sP$=xoFQBcBh(~kZ1-Jv zLuHaYNf@GpyD5;N@1sSfWYxcV0A6svRvhcHb8mY*3@u@nTa7fW%62ek>iC_QS~Z!4 z(2**J{TB0y+xxhFq4#Fkz5l>K0crvPc4}8(yD$1MIn4$FVashJ&z6~z#4xIAZ8d8!CucLo@sh@8m|477JZEfiIJdkriIrtOw*@ zt(OBJb5>!7#+U34QcW{*!~=Nv*wO1d-&MI{JTp@K{{DXeJ3z$0XRWq}R>_95qigj> zuEqEzgKz}^zyTZ}0bbx@;A)9bLNEu^3-nsHhp;1)a2>2-2}U3TLSz+_@F$0$36T2_ zL696b@*;;@nN|=4s`@KfRI#|3704Pcfm;ZL@(*1L3p-&dKkBoDFt(c@FA+NkH?R(z z&Te2f*S)iBvT?a4EqXAF|;)L zv^S*e$Igw;iArD6y~PzV`32pMc4 z8|)0sI|yhnhOGpjGwK^>l3u}qz2pER5vyRLFa>@(C!c{Ev`e<6(7lns2`S*y>P!pL7Wi7RlK@H5Wejnn%UK~BI57eYaJrGO zxjK9llaM68f)OlmKKMCgi;w^{`wCrPK-{Y|XH(9wu(XHJ5vCv^Dq^J^O3p-_D!AbU z37`N1d^G*by2v$9mSmpyjG;BVA2OPPT9P?&jJBxfNoPJ zq#w}<<2w?Cq6|J8A1my-LlLhS5(yD&4Qt}QoDc?&%dWS(6`)WQoS_VpAO>Mu#nL0M zmRW?>xCkk*4#Y7EKJ5XLZN31o(}o}&i(S#!;Q|0~$s7_1Mv?^DlO1bN2JIZXFv8ct zIta$G1`eUhkd&=rX0D(&XsJg`b}Yzn+yH?K0TAE<%%`Je~&g2-aZLWnHk$$$u#MW%;*?i#8i~?AC$X$ALRV z#aM!&&BcN83J%nvc0Vz;ftwt|_{1s{&gAnpuZZYIx5q@PKkRK%Exd6{LA zPNnDNs(r8SM+JIV_LDMv3+S+3J1GDZzbY}>w5ry4qBqKyJoY@GckDGLk;xs8BN=^_ zzRYHV0-3()(p+2H$aK&HJ|t>AcDal(N*PCGQ9^SR=(2==Sxw3$>VS^tX$5MRWQ~#; zNb7!PvNRXbD_L?FK6wOCz)34rHGgu3g|YZ}lA3WGHUgJEA=JLJkF7_k8T@&c%1&C!jnCFS0svo}ahLHCm{49?np_Cll8=**ikFK3Mw^w1VgO;2HxZPUl&O=Q3;-jam8pb+gN=%VxQ@EBpozSQ zgowGoz_h)@z5vL`zQKmf#=E+Q(ZRyNw$#pyv%-$W#L(W?zRBF6YvElZ4)*fb{0^W9Erc+Z(D*Y$3%pN?#OZfJBE*ke>XJ6*a> z9aGDPl!E(QX=>eY=B7&fEHf)lspA$7RM=G6!cZ5!oB#j;uG}u1yV<1dk`}=Pg;%B; zr3!_zbe@q@RZc(6))HP85;fW*a{AaijXD zs+4`6`d>}Ny~hnZml-MNGFOJ#-kGI=hNV-aKDe7P7-iJT7bG<+i4rlkmfC$#MYh-} znLfs7K`Bz?=c3n!1PK5VWh4e-ilh)pbwM#6n(hh?!%@6y_A8()PK)H9@Aa5GSZ;evFt75-39UiXO*QjWL8HlYbhPVCI9Huu0Le(63^8B_H# z0Nk+jGbm!>Bp?aR!LC?B$F&hD#$sZ1z0EO>eOoN#;Y^9fUml2bkW?CI*vLfZg{@SsMC1ao$TQPjv5p!FBfStL zuSMBMd2Z9Azp8dg9125M<-FYFh6O4uiBORve5ds~c}#c=t~FI$CLh)Ly)=Rnn*7vK z2Xp4ejQIp7KWi8;2kFdV9rJQotB$@(M>%)?kA`nzh~eA=y{tISG(HO*LJ#_>v7Vcg&oGKE6P zpAyxD%o`mg4?0Mc25y(>Q(R3S8c&*SG?2}#k2JBizQyU&p_-wkGq0IeLY5Vvz(V6& z6Pb;t^=opvM<(qsm%p;_#$3oCh7j!{#&dfznE|iYHZLJ&sxLm@3r?kGp zRh5)U#ZzH#ox$R-)0!t?P&9G@J=wUOn#?`S>gK;T3erB1|e2q$#e z&Mm2*6bzv*$qBecrtp0UTPfrM%d2eiW?+l-CcqK|$XiLzCU71VEHp;qt2nZq39e@t zZ@O1xVi~)AWTZ&9*SYCM7skq?S4~w&=E=Ua%f+Oue&q{b=cci)>4RKjTgZS5+n>i}ls7pb=6K>m z%6-X_pO_e(CT9x{OS{v&-aRxOJt1^=G=Rq!S@B0zi}X2mCRy|Gd@7`T{z8B25L?}Y zdP9~-+7x)W=2?$+R8w_Q{$)PbXa7qDc6vv`dfb;$;>S}X6+Ow9S)+v)+{O&zQ4C23 zfq(~nT4sSJ^kx=iXez(Kx1Mp$Z&z3t~iX2~k%` z$af^806yRYgF%Bi(h}cSDl#;N3gHP>Kpx@NQ6<%U@W)ExTF2+Mr?R7`0Ffs+RDSyL5W)alOP&nD}tOB|;)NG^W&R<3(GJ7BH$nC>@iF zv1nk5U+YIu?G*csm8tGBl8XQFKcrRCrH!Pq;XRQx$rZR(pE} zEZ{*Xlu!h16LdUO7|D4qtk&!{ye{|C|GU;v3aX*=`HEttqB_lvGnGr3Ma~a51?A3nh zmRHcwKVs=~ge7_l*Z)*~mo<#x9p_RgozOaz5R)3wh>`FyG@=0s;2qA9Df-5Q&cYoR zgO)`pDe_~GRQZeFLYb1_k~)VAlcJR4k_w%$HvqsansAt^@DQCjhOr?#h`}AGAedDt zmB%y@H#re4(-_nEeGddthUY0LHa<*2mz23RG8ZtL#(2G^XK0rRLJ}^N01`2I0YMM} z7r_XGISQc=lUC3e9+53ZK$|ytW1cAp9uWYOusWQn3PCUh1vC(&aO4~CKNglXu&lHnhGuIqIF13_k>(I;R!AxJ8MY^Xwxkj8ZsA? z5h?=_+ku8Xn}I_%WHz5sjGnqN=b44tis$Aee+H2~u#D3D}BxDOAi!QXYdAf@%VU zN~j{y1&BHliJGWV(5O;?s4>EpW(0n{v4xkp3E`5bfAdudW7_unZGCr&HW8meutSBJ307bbd748eu^0&eprh$6sxY7$K$Mx_n2^zwh;c39_9y*{ zhTamc1c4akYICwTR@o+b87O`pW2=&&0Bm5I06P%&`duw|Z%oE>0h6Ir< zwdozAsR|Hh9?4(@D$_9?0kKB836p`D5YZWj!723vmOpZ&wfQ@;I2>Z2u)!0C;0Y0% zdH<Vur7dyMr-xwnKxE3|AX36|0_AaS&+umJ_ob{4g0&rr0^ zA*U#kjH=vVJYp(7@Bf>KfxfwhIagd&h z5{}8Kt+NI(`E3=NwiD5@^tp}zy9SynyG0jfmW!8fXFHhdu4}mxNBg*pdnp-fXeLG) z*ifgt2(Z7PE$c|MYhbH&8!`i-q`eR_?D+~oI<84tBhGOVGxDE8#u&8tC`3aceQ^u& z3A_XGuR=PuCljlGp%7OZ6oWaXZwmkg>li7T0uQk+GV3D93vQODP%`CqR`!Dg@&5`y zOBRzLv`ApD7>YNLtGz2M2`{io?==;qc&$`Ryp%8(f61x<9FV6OyRLwTnc%DKcmv*r zwdyNB3)`#$R2~;GOFbN;=VY=m6H3@HHbf$%sX(2P;R7QOl!ui7cwxI$_wauIP@C{GzR*k>ON^=m139_9$oBt=7sCYj=Z=oOZI4 zi)o9FDS9YFQkzBgbV_J~b+)t3qoO}7s5&I5BS1fskizy_%-Wlya5$o1b^o~q28}W& zf$^1w<1}Rmf=RxhlxRc9!N?i?BZ1#mJ}1R41V~(UH_gh~eZn|&&o*+?R&xz`%QZL6 z<>O|Iw{b>gNzR!ivGK02kjH0RDVDMU*PF$YV9bFmyDwNs6Xu0KhK}c|&d<$?S2IcvwQa4}XYJ8@bc|4BU3@m3WwzYYFx*vVMgMAu7}Wjj96(#p z!%Wb8eJKJ>uDf+=2!uTdO;jb|AvU#R}D5NP7%awtccNutR=mOR1c&aJ@O&GJ(C8_zKr9rXdk&7=_q~i&&Wi5j#DlJ!kY{8+J5i zIC8XSekNsdGUb3~*ISl*ahdZ&Nu{*&1kJ+8NW&*Ji-%}m$Nzyc(FN`*!p<=Q-0Igy z{nyny(|je8Cr7eyn#4V-5mRccY+P&>`P`9KZ4&N5R|1NZi&Mlq$)$dhFGQpGkggn4H}n5+vN-slWz5P|(I4*Z|Gx0X+%OvD0`p!{ghN&vL0a8omP53X?e*dB%X%c8()0 z&x3nBun>QsT}R*m(_4CS4`+&W%{_f4O7-k!SEfKSeg9}OYHcM4gc@MtQlRB@qXbB> zH)e6|cnu(~bL1lqnPEW&nxK*)*gB2oa7a=wvl ztGBtCV^!$fevnOV4ki`gs&FSaYzx@#R$fE~l?Ws71tW3o{;U=m0EfmAHA!b2#339} z#!#_-?~34=!VR$BYQskfsBC@HAsiZbHXx zMvt{&BF#*8skFp&XY1v3B$@VD2UvTTVkYXnZFl@Axp;jQLu9#BCq}s&tlP3?hj|ei zk_?-Ykij0K$nZXDz9&f4MEW1h^3SW}@z_9aH&k*32pWPKVuOW(Vu^%@iin1Uh=-7q zj+Bgrjf;-CD@*;6Q(!enE_fZ<|GL0FW>n#!=ZYLDfKx2w82U24QSC zqLaAEkS>brLW!FB&!Nj&-r`LRx09W{o0cNB`}FdXEr#4Oy(|ZmmBgI&U>;3ca1+3X z{`O7X$F(N7tw*a0oCmTQqGM>S4HJVBEnAMqPMSSgj~u*iukdn(Y7pPToivNev&zra zRi!|A_G-y;FHWtGyW&MmGiJ=MjQ@519UQQ7;jaCRHq6&=FyhGpTM@HX>f@`m$Ue&Y z$Z-k)w#e!>4Xsy~UvM>b^JTcwan@degK}Os6*F(Kwt$0<&XYRjt&1U79JqNYDW#Qx zgKtSS@9w-p2@+Kf37RU|l4yr(cT!21WA-SXPP|nAcm4kT6&+0EL^hy0qohYzMF=(6 zR4WQvwq9!Tc$Uy{s#z#qc?d1{(}Bz}Mb=@zosv#Huz?fTPq&?g+BdI_Z1}Ck(f>K4eQBO}8+x~$k#`X#U7dABX`^^9<%!;$za?~&n5TWU zCw)}~M&f))6}g{kk+LLaZ_e#l>5hxC#}sUiqHMP-H2pqG}HV}vt>C7hpe-esas zAO?qOjz<2YrD9_eO4guoQd**jrvBR5cCwPTuy-P*EQdZ`Rp5jDjSHS{yAfC-v+O~8&pYGl%5P!DI?1I%O8%Rm zReUnJEiEmAgv=ogXVUPBX8tHucvy~ltxmE=nqin8D@U(n7}L6`wC82!o3XB%%AL5X zruWmcy_(kCSbC}ImH(I7HD;)uC$U5=NMLOODw@T)^VG`%H?|m$#BPb1&Q|tz=%_w- zoalr6rR-^@o!Ysz%Q;(JENA*aH=oiJYW!+qJ@-qUZsdB|aIO%>J!#qoPK~p(`Q<95 zRv<%tbIoG!`rVR||Rb)vxCF;;x-Vv)*bR^MPB} zdb-3@Zz<2FvI&q5RlQ2-?s6ilV4-w|Jih#oZX=Q)rs5Pm zQiW|(yrUDpE>c2zp^ip*Gaxo10jil%=wPV}R^&8OrDNfVWQqgZo_+C*RY$HFK59Dh zqmlp#r^D&#Z$f88*6aX=P};+ z?tGOYATQuKpdCJh71P7cgpRHPf%mIprxjY&(S`-}=1M>+aDFnq7`qe8%y zz^+I#PyeB0->}f;p>y%fGkeim(acsLFY1bi<;c|T_{Xd~4)Q$WQqaFolWLi^^|E&5yAfePW}?GY5@Ckfm4?`w)4(PXb~TFPtU5=fuXeF)K?EU;cto~E zp5Y(L_ZFtg%fbq=KXa07NhoA+?AyG&{t= zu1hgq?`$t_^*3Aa=yI6J{7G@B$;yy6+EEPsw zcB{G7mQ~ z=G-{0@JUc|wx@F4>3pKnS?d&PZ1Go#V5B6~!5{)##trUYm-}vfVs>#U)c>L|JDH%M z=(4eKQ!N(nr`_{z4Sd&)Wup0;grbmU~^OLO{ zW%)s-5UHKfUQ^}r#BLG1;2mp!4a?X)1Gj|0sxNb!q!)}wLq-7Dzy^e|b*`tn>%l-m zz(wOOI?|(K0wwykkgAZD=LOh|2s+O7jh{2woysk;)^y>b;r&(#je0$H?I?GUo-6vL!Z@VcNGVu1BJi>kegyBBS zH$S?SnLf#yS^30g9U!7$7)DnrHfGi*B!1;#Ms{KvfOWz!dC(w%6PR`9rggzU4FEt5 zU8jMdr$#P?Th)?!-9{VuMswQ7cPHU~*>Vfe;1SFq7v~pc0HA(6CwtN00kfA3skRZ& zFcLCw13ut`%;!cc#w&ysSHqGdm{mliBs{hBNywx(%(O8n6#p|skY~~NgVkUK99Rt) z$aR=E0ua_oHD^!&Fam3E0{~D3dWRckAq-(AgK`0MCYXP6!Gd%V3_kdM@)rztxE2wh zegMD*Y!E8TPz}bw5o{0vR_B4s@D{<)1>Z++m*!L}HGFszF9)Y!GS(p6^;wlhP~qYr zPPA$Fg$yG>fs)6CA_s07h5OkVx|wm-~#|41ptr%&L<33P=1aG4Niaok_Zj% zSAO#6e$haPWq}gR7!4S361><9H3$uq*d88HdToJ>;)fCt6AjVGhjF1)Nn?deWoOz# zL^pOmQ`J<%Ton1L3!fgZR4&=)gnGjRXV z0uC7s8V8Bca38`ze}71hg$NBBF#;Z-i{K`ECQxn``GQt(5+iwgt%rx=7<>7b7R~S$ zHh6fqq=5bfR8nV?c;#NOBVDrud=ynr`h!8y15lV?brLvvY@h%NPSY-i6{(WcWx57g@woledvcEp@CBHidHw3bypV7sFLHjh}D1r8R>y3@r;ya zJqM&#Lgh7!qc!lzH9(VFRzy!d^(~CIOMP`R5|fb?C=6KFk4edlT^W$n;Dt{~asy{f zh$e7#f^lJ)kz#O?&tL=^SOsoand3N@K?jKu5dVJ}IgVjxg4Z~gHP;q&2@)QOkfU5|~qht=qoA8Dd_2$sP>ktrdYbjcdUP;MZ4542pOuW>G%_O-IIkXR~4tXa6f#LImd7e3e&uH-LFv`Jo5NbzQ)KKNe{; zAq?}`eB+668ej!MV3vls5w;Nx?q&rW!7x?_5+@0pd}j?sNvCCDrz2Tza0x8f!#2;M zHy0CV{&avcIg?$pfR96JwzOEm5g~(6jaoVq1Da}(D2o#5L&$|a9I*zANQmdAspV#> zJg5N)zzoV54K0wB!-x!+$DeFEo;cVV34nhyniAnSh-<-&cS>@_%0UnoRTr8#=@lTv zH=&#)Afb3un4@1L6FgN}2+y~vBao#~$(3FQ4N5>H)&?z|fO>-(40Cz`et33#s} zlJRy#xS?# zXdOX%-lAvuhzler9Cox-aBGuc3tRisbZUokLKs)cb>#NhWbc#QyqI7YOh6O=j5#>0DZyW zi-dPXdnL0^=%@xkY(mMqhSY8PI%_divsqSb*%Gt5M#1=!bG_iea|lhD6h_VJj>AN_ z9MyeG>S8v7OE@M#TKibZ*QoI!vX-d?U0@Pj>xu4VO#Js_w1tFdr;hN#8Tm#_%Y$RK z#$vugWFqE5#59lF*8g~HCO1}GuB*j5e}!;x^tj{XLC$KR`eufolU=6-#oEKanJ87k zL6t)BZfzA2fSXZHswObZ6M5@dJzPqNxwjSCs0`J$&3cJ+7QaLHyH}V?Ojt!{qakDz zUpQI1Qgvtf^|??gO;&W7^jou%OgLSpO7s}M!PcB*Wq8~}t!$@QP$k6&e7gJQdzdzg zZmUE+2|xjLm=SDkrWTYz5pYHqLq{@{-2q$IRAi2mF71XTiQvl=bY$oBq~JSRgVk`Q zOiRdHaEoe>(Nv`mYP)?bO4@WKCMC^_*C)$lsP6E|d%0G(OTqj*LdvE|S+Qi1T0mxM z3Ik?GO8h@?b^kh9RYUP}KY z&kh%L!Ilt1Q-Wn-bH&GL;bpzMch2v_whrc9hUse2`O4AuN3eEwX)}Bo1Hb1eLQ%`M z5z2YzH8Iz`e?XDXu9ukv=WCDlLU(v~3|23=C%)UeoB-3jnpU^~7Nt=tI1{a1#j8l! zQ>J~EP*HSYgS5zMwoPxv$D=^hlV-@Qd&HOYZZ+0(b_X>DRV3tFN`@0p92PZ*YQ^hS z#V^d!3YSYgO(Kiz)(-fwvZY~w&8$Y6%?K2HRD-PHD}`YF!z4YFF4NSbquESWeKuyK z19g{|TK~UK?4*7DIHxUR)~e4b6gFYpphhUZRej8MQ8W;`a9%@joTR-#)xx)J*NDtc zmkd0(Qi_KwdRm=2SiO9jmeuj~*lV_18SBN-b+H42(ks2!6$VcKrAq|ZoGoq5=*W|+ z^v6 zZFLp70WN5a?9)yN$-F(?vfbeH4T3qAz~9Z;k7_9=1eCVSB*>eD&$?{83|73{q<@Lw zWM_)^bx7u{Y{Mf&GmeiFk;=8z-X|`?awT~DV^@2POMd0t@R)KZoJK5DUNOv6?pVA; zF8}1(@=*!1XZ?%DP;S3&;EUv=c)xqxHdLM4wnKGatWjKz(p8PUgdXU<<(+gq z%;ikns10Ti3!H%6W-H9v!;$1_vpu5fNd;EM*2HMLjb#5d)sT8yqxBQA|umE+LwI!Z_5~cwS2trtH&pj!>9m`~=TK*2tW0SX=Dc)${D> zCG6&vN^=%gX?01jtL=mvXj!|-@XKD6zRe2FUwWRLHgTb(c!{}Paa~M+Q<#p{E&t}0 z?(P1*<@bwKa2xPgFwz z2FmC#?ZxE|Fu^TQAzwkb3xZI6k9g_Lz;xqEKg$|K>OHbDSbHJrbk0S7Rl~vvS3}I4_(s0c#W6 zY$`p@VD+@0>) z3r*=UhNLou;2Gv-G>O7e=;P$NPnsmUP~G*j&+RpaP@wHN*PGg^&A?4p*2h2og|qRb zM%rxT_tjpe*qYgnp?7H(_`uH?>Wrvb1_)vrgBoIngNKCxgoTWPf{28NiH(Slm6w&3 zi;<6znv|H4mZYPfp_r$VlAf!quBn})g_oJ2sX$)}Kp z#mB+I0D_#3&d$r&u8G8@&yv>NwyVsgv$u$g=9bG(ULTg z-yT}CwvnI3UHvpdnu+ZtOj9*qJ-m6&=1Feb0_H0iG}p<9v_^Vt2(1^qZUdKM#EP!f zzN|6X8C$i88ck7SCniNpm7d9W+cXlrnvyO{ukv!C%3IDJ-IeY_ah;U(@2+$JdExt~ z46ZM?g@^8SY*=qdqf;lc)rZzwMW8W1iYBab)V;|0o{ACI5f^Ke!=O3lcx=|a%v4>S zhS+bWzo|k!_vPjYw z$T(G-eONqpgoc&JbvTwKbgZ{WM=HCTDBP=J=~R_S^nK+LZvfJEoig;y2VYLx?IRyB zjA;PCg=}0X-dMQ>*P&<({?(N&JBcV@a66Ty4L;wTRaJ$8CCH(L2i8TRUqYp2nn{J7 z#a>r42HBuM3EJckSJ5RFAB#4ACRlbP7V{({7glLyOAK*HpF{_JbRt*~Ay?F6WMwqh zXMUm99Z6#vw4QNNeL|B$Cg;r0UDMx1LZ!NFy6)EH-#oP9S}Km4yf&|RrGjj0qbtscUPh4-ic))2AJ*|r zZh2TIu}4Qc2pbuiZ0!gotL=8AV7hthh^AdguK&G2=K7DGs485gB4?@RplynjS=rw^ z(Ws(}jUzlRs0F>eEff14BB$!^$h2Eq$#MsP z4d||0sgM%?S^^HWRjXa$yHZ&+XQ#iBs#+Ji0RUKFG7QjeJ(pXb>@I+*%Oy^2?z%_w z-n6vtnamz#;Gnf4a3KQ@fLm**5*yZ#GXKf2Cu|@B8nb=~riBb>AplX!vxw%phtX#p zl{#RgMv#HGZNPwUn-rWRCZ?6JAp(%AU9B>ALGtA(Z2*YkO01P3$XSj7S_Ih$*2o6T zF~Cj+w1VPf*DEK4v2tq&BONhxB{6W0d)9Je7=SQAD=|Q1H(0|V0T2WgD)uvxr_+4vk&F<}taY0qj|D zKfkCe0>QQ7Z{^cCc*SLH)EkMqvzI zIpJqI=0;0e7Sv}>%vy@x#>z=ql4TZ!WggX}ekS;(%eqO<%_mZ}4)3uWMjleHEG)RxjN(9lX~ zTp0$dw9A%8aGq=kCtV5qwwamiTgSCh8OZlk0Nikmm*r;jtOZj5Y!jQ0EUY4g8LPf{ zg*LggK%h0cmx2eL6z@-&$dMhvpu>~)?rD_#LSir!K=VYn8O;a?F4xLlrCf)^@4 z6-5@}LC83@Oj6LJh$ug*6|p-*vkv_}^r6%($4r74K^I)Bwh>Giu(d@iX1Bss8T|#r zHm+Uq_(V7odZ|27GnwkHL*f5M?d?mkyb*h0Vz)^fNZ03wk|qh1Y>jr6fu-f zLS#s56=9{$wNMHGFkDNLRC8`i>2jAwOtSzWyhpXtw-Bs0Mw-N%9A>B^d0AsVx^uk8 zoCXFwV8E5cmRmF^?-!Paof{o$T0_=ae~m2PbYapdO5}z97-!mcWDCH$LxKhcYb67c z@F`GYm}$Cj)Z%&63NSVFg^F-*dMjGPkFeytF6(Ry6Vj<`Fy~pk3Bxg)H@GY)12zL$ z7K%&Og)5Q48vj^O?Qse^*c}%1jUtxG&X!K9l{VMb4E3;0*5o>rrW<%LF#;vPTWwRm zc7ZE9QCMF1LS2ARxQ%r`ET^+dk6cYseT|_Egv{X*+%Wa5OS81KfSj?!B1ba7??&3A zxhlCLs=uIc0HoK9Yar?LBDV+zNYjhkKDlFeS2HgskkjuKo3MzZnPJ(IE%<&Jn^-QW zmusL4w+l6ZIrqOSb8B1N>Vlp8i+CU}i|;0n;2N2h@(vgENqQHrIKV#Mb2iO+^Gqq7 z=OwurpOCXpJIycuimKLA@5x^c*2)hC{DZdVP@alkdlO2!m5cT#w)6RgCNC+ZyAyxa zDOL-S-~SA;R_zenmfq(UR+N`kdwQ!<96aWi^fuZ_IP{^ z^XXK3LS%hnBscd~TZ0+i)=x5La}iS^nq&w)cOfrOV$&x_T|fm&KqdyI4M$QVWrHM} z;5Uw7AGUBo_hKddCNIVVeoY}i3KJ3t6F0u0a)5y|BUlRA;seRE4S@rKx}sdxBNtti zUtm*s3UoE&)^0JkAYVfvWwRnp_%XLbb`Hfh7jOX?Kz6usWJ&gagvW&eMI8IbDjK6H zGq!!T;(kx_A2>2MJRvWlVLgR2e=68NsU|0vGaLkyhW-#2G#DzK#Xk!;gf#~m3Zfc} z2LC8WqkxK%Bc{bQofcb4FohYAfrA%6@1qpW@hU)6Byo5`P}3UIP$JGlFb)=k8>JX& z!YT2BC1#i(_E$J6AtR?TeEAnB{#AJ9wi&9$8(h{t?`4Ex@e|VFNecvJ`c{CD5Ji)y z1SWtsY_KOCs5K3yUsE-Rtq6yWqch((H7uAkaCi+Q*f@L$hdz=g(X)p8v1;BJJUX>{ zzf=383gRweIMvR{nBy1Q&<)&L*buP!TRWuhn8{$^d!a(_Tj9zv```03{ zVv3jmi#IrnuYfPhpdLOVi;_c%*cTX^=MMIBaC<+sFGqh5Rd{cuXxg#cM zDnUpmPU9S9v`58=Zp-I|m&S=Sw;WX!H)UCn-vJy#**g!ZigQzwT;*9$*fr+yd`#k# zXQGfg*-wdgAATu|H`!%+Sd;5WRW1mZ2lIu=rxS`fPylF*(IFfI)f1Dx5#*|y=I;lgRh!~cfF=#q#=S)GW`qsh!72L_zvlUL~YGNuu{dCsE>{7jub11}bg< zRY0o0vXez=o~SB>YqE2X$(H8uthJh=?&G9QG$lKln77(qfwFw5nxBJLY2aC=u4-i7 z>NW97uevIVM;aiWTBn`}SG;O~O<`3uS*QB+uR_vQ&Z>k@#&|Y~T8wC~=4Pn#`mjVw zI+FUJ%t~%v!>6?BnLpvK=oy>fX(CA!6yCw4>vogBxT{h@joddGY&!o+pGu)=Ii!Lr zAxM~zud|C-12g}Zq7Eo?)G;<(N|25=lbjc=@UfmTo2A~jg}wt{2s>nuHl3{VX!_WB z^_s3fLbUhgWcmp$i28x2^G|Kjg3Jl7?xVF2OKu$^pCM9=V@r)j13!}Ho|ifrk(!fw zyO`7oqyc5ONtmD#8Ze;KxA~cpV<3- zEt2jneCd2MDks+hyJsR)(v*!3(b%YN|*}lofi7$w-z1IX~#Sn1!OD-y64% za+bk2IxN?g0J*S?t1QjBwWsw@{%ece$Cv`jt+o3p^gEv0dckXXt{^)Cro@JD~hM|wd3lDFgd{IwyeEKtU+3| z-CMQh+kyI8nf%Ja%-U$rOTLTBi_5vb+qrX$njGVsw|=L>gc`kuXTDf$HGx{HJzGYDs>RvAK-7=i$sJH3RUk3Bii^Oc>xvg#sEp>Npe6r!0ZM>ig}zwIfsG2jfJ&G9 zBgL&5k`c1Mm`c0h!L~C>#86_K*r~u1{J;g}!YiDR)M`ZhlD;4+kix6G{g{jL`o{n0 zf?w;p%=gF3n#Qo~Su;B&jHjAGmVE&$%m-+Ha~rFhDlFR384OmU7UM7#%b>^{%&siR zkoLb{Jh+*W!DX8>W(%C(T)%5ud^Xau8=T1l>&Wffenxwy2OG1SI~`OCW$v4VXiJw# ztGvyuv&6}>3M`cDsIPJ;$O7fg$BWDSiOZZcuZ&!}?TgSCn-s3Xz%h5j;#RQpGrSPJ zq~PnE{bG_5JF1|Wu}`YIPhzNn$gy9O2<*~-44D7PEFCWtYiTo`B;S0z1ew3;JjN<( zN{s2MnMke{T9`}utyOw>3;8Ap8-_#{%jhPB$<>dm(r!JtYc-sdoy zo6f7+sPW0q)A};hywy#eD5$_q4rt96daU-tIs%wKvdPa>SuVTVr$(Hk@;thF{aO5? z*a7*coa!+l+$wjvv)4O|Q)#?GD?}+x$LE}-fN7KBm##Buy*zu^{!F~92HY5nmX}%} z_sBx5F_u)!$;{ipl}gZE4VxJg-MU3eQat~fkn+4=4a0}KsgS{B@&&T3z%FNFIa1Rz zc1@V?YPfiD*GOidwAz^iBFZzn-n}zAfZNVd3yjp=!J8{0)R3SC({8ZAIx2MF5jx7y z(#538yk;uR^k)x;^gnfkm9Vpo^xc?2w#7(lUr%W(l`6Q3Vmh%cHr)ocNhBNM1u7bA zUpuNodcqeq)3rX-DCNT4sR+xHe9b?}p}|_+=N)R(*Eatnen_6kzKd07?1`!!+_})w zcG{}c+o$tfra4=?Tgim=rO(5B<@FQU#*L}A;hS9jmqL=+;s(ZuCljVj;UyC?KUa3& zl4EsVB?Ul0d7fh!;+KiS)hL~(C$0a;uPoTpiZ3F9w|rTp;=CUJLVi=5EP(jPKa*ck zbDBO)65&{=oyzDw9>%>&k1!094>m<|BzYmzRDp$2s$NkAjL6UTLZ~Z-BM9jyDYprX zO0sjtXffa;LJ1V4m#>01A*z65#cgqN;NVwcoy1vV#TKMeMQEv24d~yO7?(J_%*wZZ z4ka?E4l5;4*3}um2HTWH_2P~>kIaYuK=pzm2 zI0%|2m?2SU&VH1AQ8R*uetB}0C02G66g;fR5v)=r4o`RUl2csASs0QwvQt6(LARFu ztA@EC6pY4+JqZ8vbJh2ASXBS+HFR{aL_h)0>F3+R76Jrjwsi#C5`PA0b)$bFj(!Cw zTQfcl^H-9za8_gV(PeE|Oj8s5hLzT`Z6?_>=!9c4m_zwAPkF{i$YwhLK=rRk)IB_- zX6)G=?8qgI@+sdo6>lwfZgjSlWpaOX1pvG--H8n!?qRlE;}mFGS4U=4R1^d^vsYuO z=L7a2PL~uFp*IsH1ZL|HDS>82J-_gVUr6@AQ?>PRPxDwV#75O%1VON4bZ`0Ql|wz| zY=Lu)p>4^p{IO8%z$QLz}vXJ<~g1^^^Q1dJj18N%_*4)&5SI~kyL563N)-$<(w{fDMp zqYrlVv_2MOR*jVN3*TL_M_Fv3gFXca003)902+n>Vj7EzjE#zo8j+Kal8cvOm5!K< zmYkcJou84Gq@DmHf~u>mf+MM`R;n8SvH-NVv{tgXt4fBFkCU9F!lIS{3PFO4f)T0; z07`3vh+(Q_f-=;CU#;KIt=w#@VcY;l=ZJz`>p={xo6W2qtLopX*Ja@VEP~u&uHjS1 zB84h=37nj}gbn~Xa)Nr@6U0W0szdV-9a^<<#4?8fFbFDi0pvwN3b<_qAP^%)U>Ju5 zn<+|WC{v+m=9K>@b5zcsJA*-L_6i@qS0hUHvej!<(_5?HT{@O&XtAN4u&$)2$`*hG z0Hmm5L#_+_vc4EU=>)NeqyRxmwXoFjAd}+kV zOL8I$d3DS1B-x7?op6;aR@PQUs4#ELHnkbDN?J!@N-eafCz?7-{VWZ8^=o9%R0%%% zstYf`PJV6E_uE8rN+}5_<*d|UiLM*M6H(K!p8;|xsW*1Fu z^!wH=S6L`E&{_ta-EgdAz>s@-gw6~GX)}5oL9>;_>toSW)31_ zgg?hA=Slxi8qStgF|vVDiceEPV%1e4%5w-}?y-o^Z?&+(R4z`5H6m3d*0iBa&jApD zK!XTo%SzX|hgUwVtT7=+M>Z!(g5^=gj76{j0FQ3`p!ZFECy{rgK-b8nC0kfE^q5<1 zi3yB+z<_qwA>{alAxC_v)J$59z@+DGv#Gb?pCD#+Ol-F4XcM0`A%%~NsvwmCKDcnF-{fSqTfZe$c5Fi1m7)Ogv)C_qi<+)K16OuCwN^2_fO&9mnhv8XbbvdSk z#0fTED-fQ7K?D!s(*+v?nDksmZ61PwV{5LG01Y6qVGD-Mq=eq4JAOzERj8>LqE;x5 zxD)?Yo*0AxfOv%ajNVH>P)u^sFfn} zOOH^hX%rjhtijiA35YuCSO8G#RvBP5>5`|?LfeYT2Xad!e~N9jMk++vIdD%X+N8;i zgj%X1YdWci=_j=1`Yx;^JUnzOPXTQrPMO}MqAKBShBY*5hd1ZhLPc zvYRSm!Ly\>kw0ty(d3I^n`Q0rG(UL!^}%95G@-b*$IS~~Wyazht4P{Zt=CspQ{ zn}Ptojv$~$J&#I^gS75UKIiPT&X{*=E-^Qc>+{l}h*3oqU3`7?)JK1V3%;dYE_?se zj7$FRWm=|1jtr{YbBM=o1-Oc4s??>CARqB64V2#;G8ypz6k;`{?fIS`tgR#vvb}^= z4ly;n52jMI7mR<%f-WgPc^LuY#-xLp+m^4Uo`-%Y=bC!BoNI$#qJ*OEAVt1JiuA^-fVvsN^`WB`Ch8G&GIU>qx5DFbLswYj^^$F=LWC4*WFA4YGW;|n6NBWFXws+1 zM3734QNp91^w>y5;wD?^0b1-fx5_HwC^@vD8R$x<$}P@P14{TG>xkBl5g4p>mqLxr zV%fR@wh4FFpA2Ar>YodMPsDqUaQ<$EQLV{D%Mh8`s7?BPWOvmt|Aq$C}=h>l}u@V zizfg@muePR&LQ4qn$>&_;uvzjNgR)(N;~2v+C);l%!Hg4J;h2UV;=w0)C^~3GO6cO zip~{<=Qz!5rzUeq$Jv$BA+iXUpp0p#73nGN2g}PD6B669A`-nC$B;^C zsAvW2QOl)2Ol7g3IU{Oeo2r3!NG}70P(nLs11`pjvvUhfS7PCMOF%`_saL%#iEgu0 zuKHD=h$Sj%U+GVXipaF2JrO+p$(sY8lq zRmoSN#7~YXLhSrlnboZ}keZ>Drfo6DR@y2JrVIVn5W}^_Dn|cnpXfDLT-Qoj*fv?K}$ct9{9AIOK?$} zJ7L1U^17_qu4WU+GdLDDnI4j5Ar&}ddFD9Cx5c7usmod#6L_)3EN*{ed)4Vu(XOMF zZk6qM)Q92|sn<0sm2Vtehl=>H`1EnUN}Oi+%7(5e)}MI2HO$!XjIy>I83QFM;uS-e zyE_X7O66GL)mHbz!c5?Q?U!EavhvE7z3`BcJX{I)Ph9_uuB<*~H`X?v_*|)7>S|0o zyHKN6o@!1iW8bS=Gp8E6s8+SN8k)!1t~$})m2mu$YUtK_*~Y+)B72#}*{bN6pNAD) zr*oXv?^au>xsCE`p9)wd>vg~puCrNRiV0FOxU62??Y7CfV^(R$tTw^_Vf1ZTIa z7}hMfH~r?b_8CzLCUm#U%(QuPb-%y%b6Mxi>WTVQ-7}SOsbLLPe`EHIHI^^755?Fc z{};dM#&&WD3|?z*cG(!0<&_6}%Vit5m@f}>uGL&{ZfBRpohCMiVq8yRyH+T`MzW80 ztyMVEml6#xZ)sy4^qB{`wXeqNt_^GIj$ii3`fdMZ&xIb+L$}gTB5(Aen{3M~lDff3 z&ZdJ+o>g$~d0$1IZ^GTYb9aZDkT{3pJ@rk|fde|u)5foCy)O1j-&tRFpRm?>J=}`# zGrprcJB6o=@0$0WxhX2}6Tv*kZdo4Gx?XGLr)_VCW@^WZ?J>Qdt=@FInAbY~F``4f z^%B#&poG|8e!omwzH(@m>97Ul^}@&sR4!pXm>D^@CaO ztMx{f6Ikv)6M;%9LnmU?3t zhFm5;Kv-95b!|@=UuPI^7YB8oCu1AuhMwkduXl1q7jn^8ahnk0i z##UP#*MrwbY5v!9~w>Rw5XHB}jMD#c%kwTn2bwx#(|K z7>oTUf?&sgD3*)F_==S_Y-dMac6M}=w{6wcjpQebcDRf%7kam4iA|PhIrMNeC0@lk|%*4_k_bYe-wyy zss@pcsD}+Eci8uX`=e#^c#9braBH`Tgau{Q=xDnLcW=ji4|RI$sFDOokugVnM&@D6 zc929^fG(zGkSKduwu{e4lAlL>=Qm@a!+ze@c3qcmeAjLM*j>j+wJW_Z2` zfDyP^jMs~e{>7VqsjcoU6H}O%})Q-vqf#KJBV|SO-8H_NOZ20(@ z?8TSf=Z(I2ht3CRIVYB136s>hc+O=gJsDj?`D?=Td4pG>Q`c9QhmWnchgI2mv)5=F zX>wZGq4hSBnV5I1Ra|{%bz+EAGf9>esCUn1lfy`W#_5q5*O**%l-wnzI2oi2m}Tkc zZURY({`pr8J@Ar`!mn@wb|Gnw)+Lpv#wy_6emG8I98isK2Okf$_JX$8JF`) zuM385&RK|xXomdBgbpX2>4~h4IiOfdm@ws_iukFTYOdtD2=THCL_h?Xr$c{sbja9+ zAf*~PwXqtPErA#9mZ{Xxi&XhzdrBWg5rflVc-x`tnTB_hFhr20?T6lF2 z7;c&8u)}wszsWNAK(zl76H_u?=Bcsb7f$b0Zac(cCo2!|*|JVlB6~u$t`T6av|&e- zQWfdbUd%oTvz2CJLdcnSu)Xcb@=8j8YUxp(uX~ z3xNwj`!T0@sc)*5A;*(W8~aC4s3UVqoEVn1JA_ltQC|sSdt!@3q!eJ~Mo#o4Ww^+b z{1>32CbN)+Ym)|g^>(Ko_jvV$sV~`&A-hWll?r3C0klv+xuCn$0{~r+m(D_X=2CabZwm!~O-hm-ny2Szv&Gty%_)48cY6uA zr@R`ns1UIeGc=RI3chOv8Gw5nd6v*SFdFe0kmp03v|}hm87Xu#nWSi{kVu^5PDG@* zCz1a+{q(d*%4S@;p$|EsJ&SiSTV-LIx7Vnph*qvuX{9^zr$P#`e#;xl5E=#~zpW6z z@j#`q26vK6v}bcTvz0higEP?7P65-vpz9{(WVG!RExLhIi7^jWFd!`>Q!Tfx=_;!8 z35*E1rA!$|mN~_E+G0TZbaXg)>^OIHAQkIJzf(uDU?<3P4dWv*z>omcum&{6 z8*RWAR+=z0?ot#C&=hJs%Isj011p+GG7n)eC#}FGt#DIQKp)LiIa09>Y~)D?R1zs= z%Pheag8&f}12NiB%XM^}gqE9D$Z3@L%mk-~LwcTXIC2^Hq%qrU)_0=ZDum_?FVeuf zZc#u|L7%l*V;iFm@SH5{0Waf|4P}x@eAGLtfXM&wC@iDINCFb*V@M`3H|($q-f*fl zYM9x`Gm4&pnQ=LXDPl~!LZj zT#S~gj-0$gyR9d>kYX^g?rYYe@wXDx(cfSkWcp-0ixz^w0!9Es%HjlPQ6JI)&j3s+ zg&_zoz!E8N7B54@_aWODp)ubO1|D$6Nm57HqO#1#atE5GH6xM>%bQf%QOT>m$2O%) zEwIDwYajQcQxUP0-3nkd3#e=qlVUGTsE^0G6>?z(6Cp8>q1FZM5Ej8A>9gJkaXdIJ z2w~s^PTtfLIEHEuB(5aA#C8_;$QN-WdEFyRF64FH-!4kf`v z8E!{7+$iRg82E8Vn4yIKx0M9ihZ#I*FgTY`jdc15w*(86f0xiuIetmn2rr;IM&Tkz z0n(WLs;PL6w^1fUFNr}^c1U=T`@g!_u9T@>CLOdpHKs-Es%$Y=pZyV(5%$T#N znzH$k%o?}88rWRSflMvJFa;4+Qr%<021;;K?^4dqvxczTs;q$FFCqxgvn3pp>0^#l zyle=2JjCh{GB&&>y}jdbt}JEn%Llw3HsZ!9`ivXdaU1xY-;MvtS?6WM8@{d%u;;0| zi#S$NAO%+)Ml!PDMj;%nte*OMZw!$vyCN2p{?1_YI3bh|8sGyaUO_BjBllV{wVjH-vTc?hq5riAx8)C399_%mO$KCeq7!lc;@Qne^-!Qx0` zK)YKnR-gpC4z3Aj!1cW{yTTWZw7~5IGK!lKZ%o^@JTd@41W;k)`KvNJuIkS5=6NM+ zhZu?|Po(NQm%&YS3$NrW*zf)dUfQ~!Zq}flGXh@}FYfENf{{8vAM`*U>_X2Dz6748 z3y6Ij#8Yb*B6~j$kpS?ZXe--al|aV~e>PxOK{p9d9drM++DP-hX^tzZj5b=9d>;07 z>VGgB;pFDMPJEUd*$%yQ-R=AB;xXHw1o13sthstpox83?wP7j?_}G+Fa+NP-TgO#P z^(IA0*n0}J01=GslvO%|-in#eJ*GOT(T8_zZij>i{*JNUuI|W_j6eilq)V3F8&cJY z72B5&Cy!3`CevA*;;W%YDGVRAzZP48dJ)WXrNbIuNRi7Q$Kw06Bl?|>g!fB=C^&3B zDXEcGt)E49PgoI*+f{BWQh=u=;gNKHUkcNnsk&~B^mz$lKl9q{{ zkBgj(m6ME_kfW`cp`@m&pR1&esG+dCr?Io6l)$#4oW;0`0D{So$&DJ##ih=Qv(U(t z%BacKs?%c4-`lFowBX#|*~Z=0o1nzHt*+F^lDVkD_wtzS-KMwwxuOMZN6;55TdoGq zI;0O9t%vjOIkWd{A;DLgunD78NtQZc=H@Mf2CQKr041T3Q#J@?xn~jG`D@gSWFmGI zThcS-l3_1U0U@Reni1&ChVX`UO!)C#ET9lq=`spcYR6tzk8P~ybmly%1|<$%s&UxY zuNVhL4LY{zO0h+)(p#$&oHwd8@5X$3l_>vQX>$R0%UDuVNw#O{%ABTDsXcjrwW6(B z*rYd$5+`3)m)LMqte$;>CAQJg*^^v3A|(yWw631F+KQe;jvvRJNQtHn4G*)xz@?{p zjamBNK7ALdsr{MQc5cRdYcWoK{MSU>*UffHH4(DK(eb7tY%Pl9`0Jgx8McnhFD{In z?6LwZz2*1V3zOTr)w zc4?e+nBZ~R~xKwq0Y;&o83a(7{?p0 zsI$%CvI%sEN7;Jpw!-ct*?Dj|+w`#Hj%zX0QxXOrto~W$WSsVP5 zk%&?%vC{&kt!mKi8eMIsOH+z9tY4E#cW0-rIc3ZK&gbaUaN?PkxFhRY9dsKWcsP&w z;B_|QL1pEvt(*dLEyxGWB2s2oPT+yS!0N zWW^pnZ;j`^H@td^=%W9I(+<@=tJyx^EcO?JEiKx~vs*de@yOEdViiBq8A?}Q^9DGs z6(wR-$$JX{0F1=eA?{snEiR#-*TRyj>{RG|9_$nCu;;qg(a&TUlM%&gr6dO7V}#Z+ z6+fkBi~KO;z#R&&9~}$cWZWe! z;-K&&7V;5?Fjf{I{;X?piypx6_c!OgY>;`>ql}cN6P!J#IfO)H>7tku8HL1DfKlQe zGg6b5n8z3k43PgyqH(O3e2+3@tWvK+Xu%25Ohhq*-<2}COOSPGKY65G0{;iV2oWMA z+LPWaHRdgj5zK|9Tt!b%LM#cwml^ectt_hOoZkd=B?U>$I=n2i&xv`MKIEneUQgNwPDS(Xmd?()f>EEN?O*3I5be(OkqiW-5|vZj(esE1;V2^GBc92#o%lqbv>ju#?de zcY)Ma_CEi_xRK$*s77__`N~+gBVCMjSQ?O0af(w3Dq)V;NGmW)!Hr~mA+6T4plgIW zmx>z4hGkmN@p_3%?*xxdS^aAFEQy{sS+=ApGZY*>deCPW)d*=NtcCno#|H6Hp_uy? ztwv%>mn1ZU%IJa-x*#8`?!vViFaiw#pa3HB%8XEJM?$d%vcG|AQ&uBwWkq$^s&%Vv z0p#QspBo)f2+XqB1tr+f%3X2PR-@na#3@n8r1mJIl#fW|A-?;J8x&L%0ANFRf0~oq zvSf(7Eoe7+!ZQWEWm-4QXk_nZQp~=xzubD7Z0yHM7uhjHWh|Q7zS$cnL3oHp0Bv{A zYtaAwj-s2M9nm)F%UzoI^rz-{=tCtSaZc8 zMoEj?2;?DGMU**2*~iM+u}da!t$Yle@3uxFdo8(tA!FlpN!G4hl=vA8FzSqr`q%lM zn7c66u}=p803#@&yBY?-2+T}pqZYuZbLMWGt>6O*0s#X*<}RC09B1yzwzm>A0ua#p zj5{+L8EDupt7o+Kxz2PRbfS@MR3-Ep)bkB?caZ>qF$4c# z0MD2LngLK|yK-03NZ4&MJ}tonyqP8Q-K`SdjMiX(Vc5x_fqMb)>^5r{(3K7}J1)7# zVxhC9xnpvIw?|G^at(>@JaB^k4bJV|a$r&ctTN>5Xnj+f!~g)o#UIUV?uuK^zBV%r zU_9d&lN;I3DD<_HVeOEkm9{EJIi%4#@A=|85^x4U0g^xjw6+|rx@K=VLCC0N{c_fa zDq#F%h{(#EuYm@CD|1Z(tIWACqYW~ud@GV>9BZ88#6Gi86CedC=vBT-paE?!o`m_< zIMg#%M#WKHZIq-r%^TPHeDNM%Zu>gUH?aZ`x~}3!U&5#+kl>gBecnRfc&Gmc9*$e% znpmiFYm`*OAy)$)aQ}r@chn8vV_tXV>aGqM)ZTjDg{*J+0NtqC=57*(y>Uu+UB1tz z^r9b6X?COa&h8DznDZ=lNq4;OKPLQ{8By)8hokj?O|hl>jseDd*6~K=ujUmC-6ogF zp6crIIa?KB;|g!98>P^h+A4mfUspn8Rx$DWKI7-_m(re9Hd@7`zs7Hadi4Qw!Gp2Y?QKzq?wX;Sb7$An_rSZ!SwTO&b%xR(EYcNbsipm&Avgh6M9 z-W6)0l?lFfV;k^hCy|ZMaBe7LOI%iEma{NX79;x9LWgueS%pO`h$t^JBrWBOXmng* zM_=|QYW_7rl%`bq_>rbmkDEvi)>w`j$a+HNSs)pXU-o(_32CMjQ6-p&C|L_0h<`9K zX@Ip)ic?`Xb3{DY6i_5LOk++5#xw<~N&nDQcKt@Wnb3tUnk{O zNh3;za7xv{AWqd_PofeLQd@^GV@AMFJNZ&Dp(Tq$luDyF0(4GgGe)CwBe+w34&+$m zqCQwzVE@85I22EEBo3AEU+L8_afw@RSu)xX48CO>@{#{!(!^maL1Nc&nap=D_Z61? z@KCC!7fI!r2eUcE)j{jymUswBTgG|(!zQqaBup`7h4VnSs9476khB2|6I4({)lkhf zoRqK)A10Adxe3!$Yw{3H^~F%_2aNG}5~x9u8d#G=RZ-}MO`=C!$)uZhIDU9hhtV@N zNQ6fVsf+WnQN8(n6ckBM)*9Z$Bhjf7chpnyc$|fTTrxG8kmsAzF3`gnNC zR9@*NEb7r*qf#*1LqP5KOj+cfxA{R%S9N-0bg4#=#Pfc5NMX)FW5>0hY}0TIf}BlN zbu5H)%<)AY36j{fMH|GN*9oI(lv7(7QjOxFba?-m4R$T9;vDnjLR(`u5G16;*~# zSw&k0EQ2I51IAQ^34Zl*OS$nwwAo7|Br?#$keE3<C(7bCjPaVAB$nnU{GB1~*L^OXKpT0VSnK3P8)nQ%~8N zF0`Q^DxgS*IDAN<_Vk9N8lmAOAhPi&{W$6)mP|S}X;Vz)ikSlUhmnIf zo8?Q91*>L7NWgPho~LF07Om6ymzt42;0H`K^MWf_k?a{ym6EI)vw1$leY|uuSk_<6 zw5g*dLl9G&tgZvK#`rs_>a4EPuIK8i<5d4dvgwv{x`%SLuxzNXi_|;ru_O>l9Q#%>icy0O zds=H0k+?vmV>MGw#;20Gn-;^WFw-<81+y-&p@B%A zOm=kB({xU>Ba7>@(E_Qxt6HIIf=%MAk%B(Mic(TLVOZ2iilQR6E3>E;RsS}Wlk+yY ziLi`IxWEd#;6Z*=#aNEOngy1NorhPN1sd7AC{fpf52QlCNH>&cpKlwv)^$Op+F9S5 zq5=FkPiwloJEFJ4HSSvI<`?##? zP9l3RRQtIX>3w2V#PX8DlhVaAtG;CsvQo2FLF*V#B3OG0q}UlmOWDP2g}8>wCva@4 z6FYgf*~VHGuuV)^k_E)+L!I(uYfGD}zFVOL=dLkRylYD;&MCO>S#W?%kl%~Ledxg; zyhNpPPyaNkJdBaF>XQ$uH64t&9&EL?#${a#VU{Z@L)<<_OtwK>r+erFH&>&qsiae+%34QO3{MjBx={>2$}FjNh%^c%!i4EB zlx(&!t101(jHVV-feZh|jkTxco4)(=r=Z!aOZTT3+*YP)uoi2+3Eapb%y5?4klHMn z0aIMOIBJ;JFFPeob+Vbm`M%D8UZ6Ul*I>>s0aHZ8WJvZgUV0&DJXQH@x&v9LJ+q+e z1A5%+bVZAG@NCenG*&BkyFXK&kd&m5r2KGM?1f0fsQ;5-=Ot^Spn<($y?`!4S_dY-*AIYwSNsu{>+Me{t#lU2m*X;Fo{ zM{=0McI_PP1yx?!bv0#WpQpPVd_~Q) zIBt>67$)#4Mj4aJK*d9Y4#`2f#l71vUj$qV*07x?n$BQt_D7?>ZB*`MA6^Dx9gYY{ z?kH}Gb926(p01r@n$Q7uOkw7MM2DaNdlTu{+%Y<$D%7e`+|$y`;@@qp+3K`c=i%#z zSeq?H1-wkwagR>$V+w$1#70{xc3xrNTe|RK;ZT-aSwf=eQ>$l>JN680paeuceV=7y z!X^K1lF03$AlBXh#4M>*R0Th07^v++5XOHOpcF6L_FTy03IM+d)N(ovXd12RAc@r2AY~fG_fd=x+E_QX+TYps{+mHaD z^$aWk0Mkx;Zkyp*$&aH>v1P2Y7pt3=my{Q*+~v3A`J=R#a%e)ZjUgdB$&Z--+Pc!n+~g2c(G z`8g<=Rl8V~p0-IP)9RE>OKKoY&SqIZbT98i|AL3Bpeea`d-rB4e_~|`-*xs^39$c& z@)d0IMSKcRiVDAzMxgL)UuRm_lXk9mmWYOJCSMAe^)w)D#Fh=|$Cu(dFvyL|`o~E6Qgb9R~zKEod$byi!%FDON&e713(agzW%h~^t*4)#| z-`dpI-{i^9;MC&H>&NNt%Q>jVL zG|lSeX#{`^r3^VS^^irYQ-vJ00+3==uZatNAqAD8)vKooK8=h3Q%~ zeD(g3YiG_JyngnCo;#{;XufjlirQoMlxaMp`{V_^x|Q8Lf8)G{^U1GjI)K~00YEs4 z5T{aXA{Z*uHs&aMFAbFn1B?*LLP*%KCB%!+2p_`k4nnldSD{iE^5S%)NLX5si8K8j zG}0B$PDlVuk)cV_LViRqKiy|Es=jdLUx0G2_5hzxHuTAw^ZbD)66LQHR_*+r^JounL7?Ne2Q@yDMTXPbg zm6|(*X(S1E7uBUAfXbXkNDwJ_a-S!_XwbzP#AxE3d*vlE$N=fRbU|W&X|mINY7t3M z1d7N-$Ql;SNRgB&DTWtvNjat4SzBhsjd8U_B^!hHfD@2#L!ovXK2))}npIad^(AX# zjR;k5KmF4f05=grQ$&Oq!PX=rIZ{9v#|`4h6m1bo2twRBLK**xhE)jw3#Ms+NKY|M zG8lR{h|&os9G1t(MC4K>P{TUr2o5noCj3mo%hr_~d1rj5{bb885ba`PZ5&rApg zps);2z~u&QDm0|D*MjUa~P|pVaJy=OTJ=?2i zb+E`*3p-(jHQ(l2L>oeFu5w((k7ee$Up|n)+342pFqbPb1uknV>!0dE7Pk5s=z;3X zRdGP4Hv4t1Dc>U-#JI*AtWoTGAM29VT$ezpg$w^$47`}^$W=b=1?Vos`52Z^NR4h0 zY=DWPkG0GKAnd6RN!TJ-mJ$@KviPus8!VXrzCyAEfs8`g(^$jQRwiP_&STi4;clEo zM6>-zW<>FguP)>wfTIlQ0*jkufr>CucK1WqGU*Q*vzZ97Vn7zYn#EwzM*iE3Qp{K&{Ovds{8 zqN^CLqUba~O3syu44x(1Xfd#D&teHQ+r}1TOH*!%eB_&;Pt>?VXmlxl#Q{1HYIS&QsiQTvu1rbQYVy>=gT!|2b zV28+XhAnNrl%zai*+fm|^H;*MCsa7)p;y`lSfm@L80n{&I4%pH_JgGkOJyf@X3<#3 zQYW$4$ukUgWsnE0rE-Xgz>XGDLKfQPDLtx4J4v#28gxxP8#%VaNt1Y0OldkY**#!F z?s_KtVfxgxLTCaLDzwuYQYSe>WJ1e&IBlQ&L<&=xYH4&+^x*k0rd4LGkAt{0szBP; zQI=kik_A+X+de5&)UEWY)QTwmbo#8L7LS-wRjQjPy0o@>ZH=k)s@LwSxv4HSm3ma9 zRE`Nkvr2VCVnwS!(^x^mel4mLMBD!d6|~H1Io6F%l$t7y+SiR~Gl-tk-Upop)tQ~G zHX}43XI0xle@-%EUu&WWG3&(AGI1)2J*mT}_`s)Hk&-ezCpzJ}Sc|%ri1=&jTWRXY zxU$Zp6Qt=a7gkBwQS_zh3n|CaNnNq6jX~LUr-qO#N8(27ie-&ub;%m6!X9XocnT!` zLP^p!K8?BmG;af|=t6pmX|JsVt7qvOx;1rjh_(BscZ(`E*t(QX^5yO2dO6^x*437M zm1_C0iOACRbG+C*YcCWya8y9{w`-I{h@gPK*wR5p}Fqc59kl4dYYB+IZI*vaT6DJ3&ux*}HkkofpgO zp{6jc%>1Y*+dHzARy)^4^|oS_ZRH})@|Il2*iwxP&pjR$rPub=Va2^_`X0B{M9ypy zJuPNH4;)RoJ}bqnh+Y4zcG#XL9w|}h{6~Z1ceODE9<^H=&Ur)I(gQ};U)LETgcj6l z2Td=<#cWI+XV<^iWg%bTTlG=);^TPi@yM zK(PL7?|hJS`N zR#6l?^_769LwSLQep@APU$h+`_h9{{Ys_;dHbiTCmwa%ifYPN_VK;P|*LB*&Z=I)Z z07zGl)m0LhZ<{lL0fba~WH_dgSmfj&)>nf=Bt2%t3^@2kV~0jRD1ACx%w2 zJR#T^tJeP*SD0i_NIPDbgy14Tzte@QL_>YEGablFbeJW;2Yk(BgsT)G@bWX5b~Jw2 zg%GhyJJVwI1Z={DQUbPTv-MgGmoZv}JmW+TsdcdHSFt=MCy$cW4YKo~?v zTM>mhC{MXDPB+s--Sloiq&ukSb7+Kxz9Ky?LVjDrS36}|D${v@_g@@ShxX@n8>EY7 z7EdxVaSR7MtfJS!L!I8+w*hq6mU)L|e0TSR0X0<$!w+oG89F&Uap)Yy7%x~jAGlH%I+Tug;ulH-DD`q4P+1F; zm<*EfC-YK|H9SXTAu7H=B5ZJ(ndwWU^KAtpdxZ&o5ruJ8R%ph_EWY zPgbjtCLux595v65d8Cd!fkGU<7Nv1F}pq1jJvLGItA)3EHo6e*E zZx@xAuXBO?gKDfuLO-Nid%_>rVF>UckN}`FfucVZsV|-|IFivB_+kX@!Kj8{o_E;^ zov{iNQAe54GUs9uV%iC8u_>(3qm#2DY+<49ai~Q)sy(SFmFfwO04YNntG<(}n4q8F z=_(wm3GLyiRLOp3SX?XGetgCpTmyr$6++7hns8$(sS2Q9I#Gkbp08RRg#ZK;!2*w2 z28`0Bl-d>(fg-UGBVPKa*kK46FbbUDs1YD2vp^jH;3S0bE?lWChA{$#F*r`iAAQjm z(?k({aUYHl8k0enDM20d5eAI11`0tXu+RyzVhB6Z7Ltc_SafrCre}aar7DIiZi(5a+4^yBZ(#h^_ztqJ}`PmRblz5CQ4xn`*Hl;TjSd(H@vG2uZM|7vP~I zJFlBy2$j$ava$gi@ez&SBoJ|{h7bna0XvM zBC%BJ8U2ABS{e!y>IgUS7c3B;tzZSIAR>C<9!c=Cuj?0VFt{`_vL;*qalW`?zm;Jl zw|ClAO|0j5QiPCuE2Rb-yoL|~`U!~TAtl5MsEwec8CgS*SO_Kyr;gyM=SmC5Ik~?1 z9EA`BqFNsPePSlY8O`Px38NKnc%AuS{Ic1u&^4U=z*}lnE;hb7_hLb zh2W^b5Uz+Qo0?{<>Xn!chEEntd}EY}qth6J;h}@DA0^AL>3F1mL9GA)uK+`-EZibg z=^3e#l1N$?r&2lSm>7ei2?`-3Xxj-eJP4zkuD)|5MZyMC>m5?!z^~g)`&$Ss0K>0a z7%c3iSm~QQY_8NeH0mP%udmv_S8JD#(F;&) zo;%{I<(sCo@VZQEruYKC0OA(e0fGisHa@50D#1o?5bz! zGmTQfP=B9{hsCHS8xN!Jlwr5%W8-d6AhAcWf>?Z=*DL zdIf>0V{3uAAOtbV5!wlWVuy&cyF95lS}MTO%uO(4C1bq*oHLxL((tN9qkeALF4elb zg)Gs1%n07HJAJ$-NBX|+>o|F92rXb9bjcln^DV3TFZh`_SQ@Say{C&5feVCMUNkZN z!-D*$CWL@8#T<+OQ-$TRDu2jMXqii;gr>!;JYQHoFAPIDbXP8qjK0Wy$AqQy zM);P_!+3U>mNX16SYjoU&D5pK)HaKRLPR(LO_oJ*n~Sx69=UkovOJ&#j(oF#C)9}S z(3KtNP#D|{Dfvv!J7idsfz>v*1f5fGxnVuGlt`lgg=h(hwEQ`4$aZPDj}#eaVJ3gt z6~aBoy~16O`elO>m)W-6h0<+8EA`aiHIUtOhKpEqdY6y*Sa3l&jQ|$CHdTAQH8H=( z)s-km&r^sJ<%cJxj+c~sjWmf&gB0qhTYa;J?g*93)wZ4F*j33{IK6PrFbnase#fuetueyZD&}g^*Gph*jTiS7FcPD zsfaDAkASnNKDx<=LV=4Qbv~yhZK37=gF;l0+)YW3NKGW7;L+fQI%$iCX;IPW zZtaH9R<6q%_g-kcRl#=XtrltbSl;#n+Vol8pi?{)$VFxIe#MPd%qVtIN9JkvRr-d6 zF&@x@wS_e%SxF~nLdI^h226@>ZZXqY09SEQ4%@s&tWjFwvF(Wy=M{X8UTF-C6J~GO zNn7P4aFAtb6^Vwrb3lGpIN%3Pv<>W|pi%NNErWS-CrIpa>~SYK&tQKnqp=Z%P7 zc*A{xv)AXYCX>0uWr!Bo-IFAZC=jnCVJlg?pL_%5wz5A+hYKBjM6k&OJR&Ec!varV`vW0 zS*L;$58akWrx=TRLs`^;}*hwIjeOHckQt5iiwg-aj&5DgzS46$_P=TKSsE%gPH(TGPKM<*hJJjJc_j$rv zVJD~FiI$T#9%GJph#vcYbB`S64oFFV@z7g$EY*NWZ^nfk)GQX0J2;C~eKj^;muSY2wS0Q^HF%Y@GIH2CMjZ_t`P^u4a4)=lZCPL98%r!jbxJsve} z#b~7eyqHCQ19)f|h5d!! z*0R`}-{zv_tF+X+#nqk0>hs^o?C;^Xw)nccu+74d*-9tv5x7j#>|IL73}7sAk#NDY zcJCZFSe3{H^W|)urdj0wvbl@LjN~(omqL0|xsKpFeIWx9i`On$F90+B$=fKbp+}4Z zwS{}ubDO_w7vTjhiu7nIj7m8j>o_r`&z&Tj6{DK;B|e%qAtvN0QysrjXJty$S_*2f zssrb3HH4FCs#yOv0xWs=y*4VrvgRbEF~`phjX46-d_XN#%vHG0nGP^@hEMtu7c zExL~*Vdf0Wv8PUPz#xx|Hj%Z|vM2!qm7F(kYpy#ZUtV0BJGj-Lu2p|do+@nLxP>y@8+W~J&~Ep>kKZ-DVy>q% zQmlP{GCPEXwO8H$U-*6Z-*rEQhm~X5O*hv;3Pz$HVoE`#9E1q2r=VULR)~~oun}e9 zN*jhpn?t`rb5VP7$!DQ!zpQxDiK5NK5Qs1qryOY}B^Fp)c-6$=Zw?6+lV#ex7}bjt zw&awPNVYVjO?CaqUV@!j2S6b>p_gBe1iCkkltdl(VwE6T1t4B!8W>iRD$bSTQ~VSO z6^j}nC*@ZGmbPGv5bjdfmiry}6QO8o7vq71);J7ySz$Lxm#BpH9c}s<*PT1!9ac$Q zVt{(%ZOY(RsUno_>FFmF33!=jWg=vvW|E4eUyWqBR8Tnr5vG=dpY11KrtnlbY>Fbv z8Yp0SstAn#W65pjYm>hU7FbFO#rNtp01OIhXr?t=lx;Pw6c9==E>)pKI~oa~V9)(m zUuIjaDwU6%Y$<0`n+`(ga`r-G-a%+WDx$trAUWM(R#2&V4W(lu07`&eS z8+6CIx!F{maasqhbi2Jtq?jw7<>|%BuDKz6I}$BXm)UL>-M`Nts^)&6Vh~X?FHFh3iTmCh^b0?r8To27maU+1r>f z^IU_^<8{X~)+Wo8bldH?mT&Iw{{9MLf1~>1AC>s$n@}O7X8wwps5DYG-&}5D`Gc7K z=)#IYv1}31UNFBKN?e*asAx87(!F507v67wGtivxfzbgPOe+?M1- zq_j>o$vip?QK4L=C*at_i~p;l0{eJB0|G#jMZCZT{%C|X7(s!SIbyd$Wj`u@Q6FF2 z#Gr_ffJm~Tf-I{dT}I$RGg48Km84$)**L*GmL-TINZ|YGH$*z_F@eF=l8+#d%g#Q*@TA(vrd+CyAtl?XLwWj7>CL868< zHzEp>0FVI?t60u%B_M$;h-0_HP=+$tua$ap#Tr=_hK0HXe~wV#Cn3o)E8uhgqG9+# z#H#3oiNgXQ~0^pMgr07R*NmA`O%V|TA8Lg&gi+m4|(i zVShq4RnP&{nxK_rSWDY2B7Aft_A3JrPy4@CvP=m+uqXif>Ca0Vv0EPhH6?E`U{Eja zwunxU<@G`(Qd~|nsR&hWB;i`xl#0@e3P=DgyD)%OR&l6g!>2|`2s(UvmbdnE0vucE z$H(riTeRG5SrK|zfxZhy9ctvGXoASP9e8?;VG}oex4CXH_l*(kR#3l--z<95el(y# zf%(f@pR$Yw77YUmO=>?W=+wCv1?}D}npBC2lw~qtu@^_u6MbFrRHK^L|OfKA$O4Nz7ho(0=WHf*|EeGM%sJxkYu0f5eI zV8I)mZ~=L5WUezS1zo5*@wV(O*99(nkRRUwfO}i!UXu zFuX-pC(s$<5gWl>|7TFTnEIZ*TrO|LJ91Tj`o>;-^(zhk-Rb{%^b2e7u`VIG2FF73 zmb?vT5?LMeXAT;)KJ6CImC9zAFBVp_Qp|i@W{faLPB>^r_IrADo*H}brMR5$(?M;& zDu>U03Lr8*Z$8+sBGl>K(&$WOTEtHt{C{%y;^B)JDx;EGzx4*K;LRCV(!$hR_ig$H za?aN8*4UI>5(LlzpzCM8dfjVKK$K0XSblr`gv)UdrWh2p>Jh2IQ#lvoG}kM;F{s~8 z*u36hj?`PQ8Ri7 zBNH+#Yf*6=cmtj%KwonewgpovWe&z-Le+5pJz6+gW&=`G(PgmYaFv8bpE!!BA`x9s zL0t4uji*uuWK`&LBdUZ8wx&g_Rg3y_Ty};)G6NmoLn{|@Jeg=_reb>umK-kQDRE?c z0v9*}6iABq2;UTr!*(Q;f{JChVtH~#dm}-j1T(5;L637mTC$G!R23ekj**2%17{{u z=T{?AOS-f_+bD2((?kVXV7B!>^I{QMQgL!|Fc$}jC9^yV85_J|7sZ1!tHmiu4P(zo7(i;s?G6EQYjhQUM1bfZ{ zRUiX2ZPFeHgeV0hno#*PO?V`hl9NwjnfoF+vUzqTG9qay7i-BGx#NU$>6U#%l)HhP zxksBLH=`b;sV=(VIFaIU-MD*P$tluViB9M)q$xE) zVm!(=Im~98o%v9sfs+B)_APzX(;s17@?OI3}}TQ^Fkj38Q3}F31vQ z4e6nHsi9OloWQd#>4=^{(>$%kkGE5uO5!~(36nHQrfV9JIyxp5<0W9KW`3ziU8<85 zmzvcnld+*8(0LwK6QlGXHadclie#q5=9@2rJsvZdg4CqR;xKahsOW-me(I1*;-+od zo|=L+l5f0cuv(^?U7X%Ioee}hB}s+BX4(|f=|q_X;2qM;~DIv@7Ao-eaQUK*Qc z)-IqapSXH6embZrVo21}n~l>pD3c%HxjUj7nhWxoKmuE2!m2y7gjh)~I25Y4H#9i( zB$$YQMvPh2&84^my@cto+GVbAwLo3e;=YV_sTmE zx}#XLF(R6TfBUx#D^K7dv+7ewVyio7XP39>oVC&_Pw}-tVr*plFsCb%!5N(cYaF84 zn#!3ziu$_x*PAT)tr?q0PwBZQs<)V`rcg34N71KBsiK#NBlb5QOu?(FOS?m=9CIX( z3Cp#b%B|=cj55MLU`jnDD?>pkaS-K*1&NzeIkKu0ddRCW5y`0;yQdDBmAjdnG@!v6oYOwQ4E4fjYdZ$u&*|v$xx}4e7kl5IVWhJ_Kv1 zQ_{3e$Pi4#kLyv8Hmj%LTd4sVd*`dX!m^{a^ToqKFF`tmRXhBi=h|TH@07QFktcmS88wE2fsESs{Dvw!= z7MPr78ZvR|lCOyyBBTNx#d)y-Be`OBw10JM0$if(L`Kn;M0`d_wC;TZr}Uu-XqZ4x8&dozTgCo;KTjj`hDTA zJ>cE_;S(O=96sONjo}+E;lK@)7q>JyG|ZHkkh}ObY#?6fa7LzJYp&e|4sPNW&f^bG z;sL(n_+8&4(B8al;wX;a9bV+v?c@0UOd`(U7+&Gi-(6KH$Lp0xy8zQUK^7?%{-Hgl>Xn_ zP2?h8;0lmgRF2?8UgZsr>S5l`th1m99MOMee7s~oT`&dmS62Q)1R~Yi8-N6Np5rCL9XnvZtaLJ?Um*1&CUi&0OkSy?97x|_U-IMZtkeg>O{ck zus-egP43v9?RXyT(GJ|q-tDm7+U%a=+`jHz(C*xR?Ch@XtqlNDK<2DI@Y!D9^X}=T zuI+)|BeW~KZA-~MA~DEERxC47Smt(ZmH~#|1^O=XKrZVSQ0f()+*AbVU9R63fZMK) zS;3RO}Y>=Y1J0vxUz^|i) zDd$IFz;n8VYfm>>RHXE+?e`eoYrw5ZgwF<(MeRDCS^v)XP(A{XuH%~Y+KgY{Nd7wa3V-rJZ|@|p;M6YqM6dZSMg&UW+Db3_oj>rc&IX5H^&?RF!Oi%sZ34Rg z>LIWBT`*Y}Kv^y?{EOZNJ74*jW#m2&+x3q0r(gTR&-lMTSws;0#ee!FKjuCk`hVW` ziO%HB|KDwHAJka2GyUR$LrUtlhaPxZBVheBkMjuL`p*yl?>4~bcMklT^!_J50!jb~ zBO_8$BV9^tY=nkNf`Lsic`~qKTWSl0-zC zvr>t(jhme&CaJ8fO0&7NU6_Z1g~^GZj+>vkjf23i!o$N-3JSHGxU&}u*TtQ)n1ZK? zN=n+`L<&lr0H@u%hMe_;QpK!-)c(|+Mq~=8p`e(82n~ukcrc+th8h|!bQn?MM1&F@ zE`+E7KtTW?L4FLGkfay@G(s8yFh~KW3$yC|W7eq~%~04dT}q@(N-BLZySN*MZRwOFj;Wes1upmPLE6WGeJ*>10uCS%#G#`1TF*sNGm^A*3>sogBS zwn%0|IPSnCo;_Ghk`p3z zq*{ruRq`4Q@CX%OItlv26nR!@qYWFY(6|@>CejdxR9guS$6kA3Wx`%MKzgE0K$wv7 z-3zsh)!$Fo2t&vw`5ZPAj&=0{Pjc$XWtCjZ!S_{}qg+!9RKsW!%W%N>M}(5LyjNY7 z2=2qpRRN(Ro`sVUO3gJKipY>@p=pHKh>c=ck%p+XxKJ1%vC(2k3;|R3 zhR(GRKf;PKI#I_H!WEmmo^q;4^^dw2zZ|VGCA$T2djKd5OL1{^vtze>0qOHNBLrJv zTbN`i?sct5{F8#Id;&3#(8(OxyOnYM5561bTkoUwf;hH^8I{=4YZcwlMWzOStD|IL zB}vWD$Bm)@Q+g-D_gdf5-Gz>4S~XW$^O++V-XT9$xZmW&gWxMw~slC4MpAhhhDqD%fz=j(bK?wwtltv_^qtr#{bGeIP_L|c=cipaZ z(1T1^9(Y0V#Sc}4`$q=9jq2I%FYCI-{lI6V^bx^cVWXF%LUgbH zWu!Fn`ba}eQj&ut5FsxF#s}}mwZB1WhESBC?NZUi3N__}6ik{7qc;-{&Txxe)1o-S zh{7^T@pP)|9_?=DMG>kogB^<04?8rr-swvjL0~4dk@XyB4ZE08jeZj{ zVo=9B;}uVmB2jk%DZnk^mrreKD3AW!7c5gr%~T5XcRQS?N4Yi(C0GFo7{LG#1o4H@ zVL}W(paHx*iisru+<<^?Orrem6jfbjvH%G%sz0QV037FyH||{mnKJN`Vu^;e;f3st8t)Dsq7l zh5J42-}8WpIK5#E<|P%%T9MV#7jqI{+uZQv=sni0;Hdga~+- z8YR^*d1tYa2yOUwFGfg+H8WS^));@s>6d}$D z2*X0ZKpiAZ!3T5ga3_#}gkfZ03O9h929$v65jf$*FKhA$Zcu=N%z6q8ATJxNfPw60 zq5!D>j%NW5(P}X;zz3=3iB~r;1)yrc3R7@0CZO%>wWt)>1t?PiFpz;aLj~B?PQ(|c zUhIT^_+^DaQf{z<}z7R7F5qap7h~00eH~1PRa)x_1EpfLa#-TaX9;b$v!a zGN5U$CIb_ed`^HsNg?x;J5;rD0Ye zhD{b*JN8>ECrjf;SkDK6Mwo>?$b_inT+0;!4kUiy$8mV}aT>M(X{Kv3Kv--LWGRPd z1$So{5CKbML6O5)HK$7b(rEtIScnE$jY3LQl|e1{TDC)BvC#z$CycGmOF$)pk0~)XfAn0aBpaCWZTVk;Pg|!v|q$Pgj z#}WV#25z`}cG!Lq@deOljd6BoG!PqO=MrwVaheEnl1LsAu#RHTj;A$pup(rIwT~dz ziE}jtYcLh(7K}cydnJ}%;CF<&XK`Y9XO-9xx&wa;5p^1NfNw^8_Xu+OICg-gecTst z9>xmW=ZI42M9i5 zfO{IC072k-#~5W(=K&>|2&czjVSrvEAdr}FjhP^ce#Tr>83va&0zr^$b9Dh@2~}ON zlAp?l93dXk4`kVp*|AdTuEiSadl&G&qEX?KL_1Up6^ z>=$o~$qH|#W{Ssm3E%{P;#u)0ei@)yEFgHz$A~;eUg88kF}FjjsAzfwL_6tN&lF7| z@>pb~TEGBw2*?JgCW{Q1Uw)}y24Msr_?t;)f*Vkt<*5N!$s<}hS7Rp(A-4t_a0BuA zS7~`=;)jG!#S>@Qky2THV(^x4$d?82d>N3I17MyTP-|=^1qIQFV$hv{xd}lq09J4V zR$v7Xm;gbS2$%?lKv0s){Pceask76DSg0?@esT&eYD)+8UUvoVTT^j zZUw=g`xclD>H#gdr)KTOVGRM+U)89aWV`Z~QTlt}+Ik9r1QD=-Be0%rd1sYMr~*ot z4+ySBFa^b$0!k>EgW7I)Xar4`cTYD3E&H|22X_qBuKPdSegYPTHJY>E330E%K$0hUF{hWcc^C9uzh!R zRBuKFr~+*fPzFy?tIfdx8$biCr>*U{UIU>2RoR1J(J3~K=2DDuM~Wp)IVmE{)CEc4 zUMb+3Nl+C6R>ncyajy5s+(ixdv#JyX}<%zva4@S^yD{sRVgu9~zg+ z>jabApqkaJZUu&)i@Pi^qKcRZG$6aTn+PcPi{a`P<$HQGFkvZ4qo>8K*Px;Q35aW8 zx+1r{N7!|@kfKM*yGb`!R#3bPC7j(AWxKdVN6B>IcBmLo5K?%g93)BF^QmS-Da@Hi zin~vahI7?Ka}mKN$V5wGAO)NTLYyQ2ej|{=lff>^FbjI6VVRHtM~FAaP$yq7VVY1> z|EK_8kYRWO!$G47*MJnV^aV$l!?A`c4%VhvwrNY;q?BeU#o3ZiE!={~;3m2=0AWxjN5Lk_$O=y)Q7Kly*+NV{>1g{WN{rT6d)z~N>>)Y>=f?zd88k6;@g4{!dmVZK7M~{5TRb^PobzBWKPF(CptA$+q0g`(;(& zOiZ;^S^kiXYBt6*bVWp9UkYYFOfgdh1)aZxe_!gQ2Eif{p*G4n$n=ED-JH(D+|D_M z(Dk=Tn>?MnWXuYU&lT-+;PlYpyjmE|Uj@Ccz%z3kd^Y)`M`{xqC;da`ywWVqMD-NR z-V8p%jLr^K!5CFT^gPZ0cgtr)&t^K)WBSj^SztX0V24&L>zLrW4KC z5uH8vY|*P6O``SPZ_Mu)x_42ZPG0bQIiddbWKm7 zz01fg$+?Z%;WX2iEZF0H*;9pz|Mh=!1h39%)B?7Xuf(_~(a11o&e_e4Z(%Khb>Y7I8qtjC&Bp{tQ1Lu5PyRag{K69ni^ zUKlooNP=l{YDYx2FJLMe_7Z-57jQBcy-o|Z6ZAu~K{svJ+fD2c{q(fu%k9J_KzZZ; z$I6R^e=^RlUr-}-p~vVf*mhG`9d!jgg(j%{@tHw z+l@=%rx>Q#EXdRzKrDeGXKv=M6z;AO5=syOAaMiw#w#JT3*&McSTQSIyux>E^Q3+i zZUG7(&lJt5#yar=9bpqnV-GFA`$NwZU11x=&+>7#E`we!LvzvD`BS?`7gRWh+Y=}hv zyhH^zU%|Zq8%m}O03#z-3Yi&5wo1~yL`WH#pn_jkL>C3Qtx^mOBT^g6=%Seg3>w*j zyt#wa@wxT#@b~y^`uqF(z5LM|7;qjnL758r!$&PsCNW|hMwIwa!^DXdEk=Y$u_DHg z5_^J9m=L0S|jJkAz$Z)Y!mLw-(fJ^RSUP~)Tk@pj_mEO<|CXh85Di#oass> z(^(pk@w$)}xr<+UgU zOOE*E4n^G^w<1gVk%xg3MkK&Oeb#vwV+4Y*#6nF!l1COc(_!OWjKldw6IlR+;fRL5 zu@n>l;w4vQdMOQ7MQ;Fn)y0&yiBuYi8|GHoWvH1Lp@|^MDdC%JVhG#+n+4*g8*K!( z*5Qc};s#NL5~lXppAyLhBLM9IAl!{)QaPoP1I{#aAu=%^`+UI3x0NzXmKLCCY_>Xcx+{P zqByOZrcIXXv3o)|Xh8=h)+d}PAL^NC0Nh}N66vBlTW=gXfa!3=Z%&nGeWa&vi&Xr4(u z3+)aYDHNde(er{VTaatYG;4`Hd;PSZLeICGhkQEC>~7Saji76}o{Z<9arZ^V;(zUaohExBQXcq)6dnu4aiv4U)St2fPUj+=OGa&Ei!w-y?>vYX@1 z9_aB}Kdv~&b?SDtp?YuDxwMqAK6;|rGma7KefO!TM`A~7u4NxfFSde!w-3DVH*32%N5>KL~M7{*l=k&G15Q-WIA zE%VinjH*1Mn})Yb2yu{-u)JWfFl4hoeh+;HOP%BY7_~?;NzH^+q~i(Ih`n1b2Xf-V z(hQ+BHl8d&8NX*O0(t(!5WFBDyG^AzBWT4cd-niJ!mvT~#{F2#E zmzu$BifEKG#asJocGL*kj)6!;Vl)XEN}ZMus_s;22)UWci#1iAGWF%h=2uN4itLJO zt;qH=lT!ntw5)uL>$7|&J_A0|mUum@hLT7BsKJKtex{UXLw&d=aUw97*b}5X_nKIt zGLNko4d9&c`oiEfmV4$?WgQE7I|MrRvw!{UT~kKcMFlf>Ike?zn>tI?f|IpZEne>~ zJ6Hw=_LTNJtRu_XQMa+Oon&>X8MS#lt!6Z(n_TR2;b+a3X4AM`MIl&~Th#;Vk$&o| z;_f&a)u3K?tFR^SRCnu8W6H2lJk;7r5gI*fWfih}Y*`QkI!D^@H*Y_LsZw`3!vzA^ zGXIU`eLH$yT<+JT1QlZ-51g8U($k_W4Vr@^D$#V_mz(+hn?HYwN_5_Bj372EJqyb= z8*0;K7M&@B&FWZa#;k=EEGimnOk>FZ64<`pGI2Dm3FF=>n3qQ`XYO3w&+!Lk+I&BdvP7O+s|d zuxm4yMeI>h%58A;4Wxz_yv`qU^uY+$X>iGT)0&Odgm3lmQnmZtdLpmTUOjX~;de(o z3z^cov~*32?8ql?`fZAyu6gqq=|1+Csdg4zZe0!7{_-x)g48f*Kbq+-f-KDu_G_nG zj6>hK)rGa|bFt63(?K3E$O1%kj)w@u>z}yYsFq5(oex z`2+<#0000i00031umJ}Hh5!Hn1Ox>8o&bb|g!JOq^W&wow6r87B>$)Y(a_KN%WI94p*KZaYjuR&+}!-{()IQA z`r4Dn%Fy@6VfnW*Nmgq2s}z@=r$SC--QVJUg_Fa@%B-xc?Ck9N<+8lK!Zb8A{QdM4 z6cqaH%42PP_q8oON@09_d{A6%{;(ILqoenv1o_sB`NmxLu_Tk1pR%^S%FWXC(S18e zUgY50`}N%T_xSnYoc79P^{yP4pQ+#A&sDt+MUz@%!z*LqkJ!e2EPW4U3D5 z$j8U~?Zi`4Q~JbHrK_`RYisA|>e$%VV`F3d{OkJUtSl@n{HhE0&UTTPpgBfe#lh0h)tAhS*p!{x*52RX;o{@u<>u#!-P`Ew?e6dJ@$&QX z)b#iG`TEHF{r>-+>;W7|u%N+%kOU%J$grU!b*cm?bm(d##fum-va_hMqeoF3KZ za#hKZC=uOAi3lRgm%TW^9JF%gBbPQ;iGj&XO3t6&h*V+{AshiFj#OBvX^bUKK~I4W ziHeATEfA;H0Nu)!5g9>R7WGk7rt7|wW(CQ7##E7ru!R479{PGI2Y|L|BT>z32yP?1 zvU~v8J7^A}WF7d1LaVq^i${hv1u;aYlp8^S8%tuWSy2pS9O`&NAW3VXE`|h{>CD=b z4gfl8ID;UX6jI2o2NBMddlZDvnbuOu4B+wCIdC*`?u|+?Ap)p(3CFah5DrIjDaa+{ z7k~xk;9ISIF_>2l0nr7OEi_%w#0_4<%YP+X&=5#%v4$d@j7p)$gxqF`7xy({ReJM{ zvRs0!6oH^14@%O2Bt!EFvck3 zNF~n5;fW35XycAN_UPk}KrT{akVF<~MkT2f^q zSZ*?9GF^(IVz6`xcnMPk~Nz#HaZHUu} zBt50nk4WqY)Sh5%&Clz04apIsgrP|oWRv~%C1!&owj*GR(sm_!piM~Gl9WyNBX0w6 zwb z0SCSVa6kT*ueCq__Y0r?_Gi4^L2hyT+uZ=~N4f3IkAS=jU;ushKmH9*Eo(`h0WJ4F z?e&3y4QyQnaThrdHqL_|lnL+-*uO!TaCi_*p$hdkzzEima0ztW3&%2sF>FwS7@VQ} zQi!+-(r|~dBVr0|*S;VMv55b}-Q;|jKtcr2hc4tm53MMOEn;vGSo9(mqiDq$Ix&C? zte|kzNX95$ag1)PVjR&pz%jN%^Kvp_%s zvWj-3;~CM2NSToFh$F0|9Vbc1JbIGDjAbEX z2}>^mK$f?Zr7dYWOIQ|ymWIrwFAWJyVh&T5#WW@_eMyMR2ossW6lO1b3C(3zGn&UV zW-bST%V#RHn_zrpF})egaYmDy!1N+BshQ1ff)kzA4CXD_8Bb+SbDZQPCoXx(&U_{! znbPcH5BW*Yg8Flo;>6}R&ACv3j=>FXNTx*P`OSG=W}+C)rYs>EQD1J6p&;ERL>0rSJ|QLkckul(d_DYe?xyYls^ZXG9Gu_;Wt z>b0vt6@pL&TUE~Hw5Qh`Do!seQ$ZYdt|onEVXcbUx>mEOjddwZMQhf~685pN1*2%^ z+0@SZQnB02C07IMRpAQuwB1zdV4qsq!%{P<8(r#PYx`Tz0yLjtjjL%9YFxScwx+=q z>s1jeS)1}!o5URLZ6908!ltyR`ZVoihuK|u7L>Z_#cNXoDqryuHMNCRE`F^U-?a|+ zw}EA@Q3-lfzsk3`4pyxHe(UPqmpYZU)kP^~CtB8`!tJJRezmz=59Xo645 zS`g#5x*r9tL=OzrU@8`>9VV`HRmxcl-`K`I_A8GC+u!#F^`R*yDNZl5T3T+_ocX-2 zc6n@67F!pe0==#_3k+G<&a|#Y{_j~63{L-cHLN=(GeEKWU=lkQuGy`pMd#a3<}SI# zQO)UPAubJ)tnq`&3R;DdYt5ALTU1jIhqPP_BpDRjW4nNk^?tJb(0o`DP8rqqm zPOor}8PK85m%8EQCR!W)?n^fqsS7TrS(Q4|)s8u-7-PerfLddE)vesy#i- za3i17*zrAQl!BL1U zuG?+W7{}A%&AmComy29gU%b@vO>?p9iewNs^`W+n`A&DuOg>Zl-tsMFT;)D)UQZX) zTdwX=KYUyN;`+SY{@(jOeLZ)z+x_k`Pwc~is(6fgdZ8BYDi(*m*%lA_!Je0Gd&zs| ziJv!~y>xevAD>I1`Z(xHUT+wY4sfCe8QCgkw6;5*T#{~jy>f0e$hU3q(4ze0;=LzQ zb-mctdKuI99jAcNsr;Hoyz~l>vE6OXQ_ZJ2p^=w+*ku~*MyomNq^_{vXY1;Mv)k+z z*7AziX?KT@8vTDqZ;@^9((FF{wf1CR6ZdYu_Da2FeKr?f_f}2A6>OH4Xl;jj4d+a* zwPBqVTy!^Jm&arAb!9P!f8JMjxD{NF7IHO4cm^m>71v`2htHX zdw3@QW9p`Ylh$X!^icCQeEWuEPKR-n6?%M^fy>8Ri3fGDC3xLLV-^K}dDVc&MP%kD zbt=Ym@ML1d_H9h2UuouOkrse`M_M>$RrrT><7RpbwM{@Ma?ti&v=(>zq-ao>WEt38 zeKvliW_2uAb} zc6#OYa&IVTmS=<&n263de4Qv-HRoT{r+|Ytf%fKIjCNhyr-<>_bR{Qb;0J?d*Kc6h zPL!C74#jadc4u3(d-z3n;+opK%)J*log(k&%?A3i~$8iG4Z3*{m?k9Qlb!g~DeG}Gms}yObj3a=(JisxsDrGbmo|6 znb(3~=8cKSQ(M_-HpzbL1x1eJ552Ye32eUgY*KuCwPCSrLdJ%SMb23hcm zwaAk&$AcsXjQk`M1(0hLXl3{Kh}UO)CS?Ow00j&%e;y}(-82As<58_9P={%TL^W+A z1^`g-jnpP|l;x8ZJOJNAV|1KO4<2z`cxM%&p-xI30vzr=;1Vdd8%*DF|jso#g2VHfjiZ>Z4)c0I!odf{>Yy+Nk2Wrk7Xj^~!g&rz>Ag5*|my9o& zqJ+?@f?%q8V3wsBH#Tqt46vt&&>nU`t&3U+k2yDca5*tL2w~9wt%H!CdIPSs$pB;F zOBIj>gbD?k*2hR~|DITjL-1$J-;g0QWzTA%o8uA_Mf zdzv?ePykl2N`^2U*b03j`fDE=WZT$>W>`uzxr^p#X$`r6`{!joDSUl4T>V6YaR+e} z_?L%cHd!F9001^nU;~rTJpeERG;4s0pscq;oN(h|tTq}3R)7WT`H7`fc$o!}4F!{3CU)Ugk&L*E z^)`4{*_>N>v4UAkc{XJDTB$pG2pq7dk22a2;@1h>sbg;u&0dV2tNp?08pJ6$N*N*2X?Tp-`J;`notu1s%EW2&!Hq>t)MI4ctj0Iy@=wSr&+Hju7_V5)=1u@?$!)%21E zxm>74vUmu1y}EM~NM`-Cdt_yh@aLDjB&t$c2!iVW2#ll$akI3Fpu@cbzU+Z+?aGAj z$*6iKyZwp+{7QWSNWND)w9e!J)j7V1FaSs_PI{&XGoZlKDg+Zun>f2gK72UTYPgQG zqN=x75y^?fM}J>AOJ{6+d!=o!xW=Psa_0$%_C$#V7J6^`jxX$sJ1ed*o1gA`M0TpR zg|Ie-K(&dWtcuJy;Hv=y+oyKA#6&EhmYlGq`F?~T0lBL`l`NhEu)a?zoxodms|Bn zP@hR#@Cjgq7H-1DqtFNdDPXXfSg?#Uw4yrysfkmf*Ln!GiJb#Fz|`6Y+B^u@8mZXY zvongbU4*DJz{nJguiMI=SpYU8`MT?Dq`|bU`1`0^qzC!@N|@8m=sQp9Ot6Ds1J^3W z0AQAZx|e$hot8C`l6ISliC|>fyy7>^BCAmDXwl}DY~U4ma13JwkjjkXOq{AY!Q`xc za0hE4ye5S?nJhp6zyK%!t1T^{Yhz!tJEH9QN}PJYgCGH;Gea>aIka0ljPu_0 z$p$DnVsp>ac{`U8)b5L?5&)!wAiN1%2v|)BjJ&(oh+#PimLUy}b|`tC$9h|+gEE$d zuo>6H+P3Frmw)_$-}p{8roV;|hfY=h$jIumU6g{34Sr5pqbLR0|1_eC>Xa{AX3u-s zOI&}1uyss!*^IWGJAKF8^`C0>m+E)Xl_qC}#-d=RjQGctC7it?$X=`{Xa|>|OqX%x z^}OgdbHQy0Be|0H#fnn8Z10zJ%-73-%bh+uh$@&p}DcxI4II>vgSXfwVzTLO$i*5CY?fW!%0 z`?qE072K5NsbT4{))!phNo+Uudmsp`t;LlIsm+s}_^YCWy@`bnU7Cn|>{9P-dkbXu{XX#?95FE0$|~D8~|q*bIKmp!I@# zsBbrFj>m`JENP9@t*GHRtj#9GQl{NUnNlk5n+g?fk*8_FEQ*DA+b~YwRT*$yuH-%X zcx9%6h6R`fWq&7_+0KY;6N!d%Xiq=pX(nvPr)j+V&5DihN2BFS&WN7|% z+e^ncxmGRihu1cK8~0Z#yMO=~ZG6S$7ki=8-FIdN&48|u`@Py9o{Gz-<<1?6(Cm;2 zzG3YJa4zV=*0|dHHk_eoY#N%NMmX6F^~P6@fdN-m%N)0krO}j`Zc(G=7V8vcq__itTb2`T1>22<}c~CB^ilV z0q;y}7jkKNZRl8gvR-z{j9UM$=wNE_C$4{$_l4J3@CeSAQ%TLnJ#_t;UwBsWU1(Le zn6WE&kGz_(?Y(;u3GWepV!o3%x`60IDYj(esj6T zeGs~HwN~ywDT`E3e%Du=7T@E>WvqACm!&vnEb5i~?wZ?(?;UJ?${F$1w&MXe>tg4Z z&Q71*$CC`Y^W>fXe{xrT>K>Qqn6V;N>cy2-C%*JoX^Nod^)Kashlp{{PJG!4XwJuK z9FBr{nU6LWy}>^36;AEDb@jchpo5=Vq;*g1IAq{RVvAN|gfFI?ulcR7`xy>w9vE{$ z_k1kQl5~Dd#pdN`yzEo@f`4y=qJI2K$NUq{rUG}2UC#W=H=Mr=hO6WB@I zKUe^mDQqZln<#$K)ah$^FPXxEsz9m=rcS3QfG%C)!#OKsLYO_9O{*wOUCN6yi%LBS zQHW2GyNI?sW^tgruF`HU9M-HC(52!*U&l%)pK6R->kYsuJm7q1wxvAX$ zkSKBaP;`mCcFbIM9SG@A9+G3Ap9ED$mz&a2GtPLu2}tE<8X9`1n9#kVqFDDtGOC=P z5tAl^1v#?cc;F?am@0kr0f4W)rc{uN5xIp_iAgD$Vu1xp36)L2(Wa7S5bf$6M0 zscNB;cF8H9L<^&qrTGNTdRYc}?qBRs>WeY^nENGj0iLwmC`4j7-7`@o#Uqj3iFin_ zedsZ8gEnbu3XrV;mQsJK*=iv+=Eerhj@s!Wua@&Ng^@ErB4^H!5kDrOtvo__DZV#F zcOR;|y;q+|3#SFnU0`_!uE_cBJC-2Yth(tBG|$$Y}vzp;pShFFk(~ZVCW?X?v{nJ_z#VjwMCtxb zHc%>n7Cx1jeN)6;#_dHo7=iHa)Z5pi!Amkv0gLXNZ8x5V9nCa~7`jDB@ddgi0;%Qp z@IZ&iu5PS5mucgUE1d!WcZ3gs1mv?4{vF_>e@OYpL=LB3HPO8ovmN0!t~EzT9lSoJ zM*4S9;z^#9knuh1Xasm{(w!y%V6d0bNf*QtSx02mC~);HO}7A#Y>Y?$md&`wB60GJ z?K)H@GchM=0Qep&f~B+qNT7Q#R2um{C%=SoN_A?4T*jyANMnSM$cu}*r_4Ay&ZA0GA0F-a!agpOKrmD2 zNpht$D+I$}&FH7Fu;5JwFhDH^1L-9WAyaTVu9OV;BmgXFj0q^w81S5=N8`wd6ex3G zzRC*=SrG?Tl+qYiE$ShNdI*5}b5&yzrj<02Ah$d;D+4;zjwaT|yPYO()hd_UV6g9H!B>tvgd_Ff z1}L~7si*~jKJCcF2s^s7DEi*L1M|A>+)>BgYMY6SYZ|@KIx!1(O1x z0xobIS9v^vEVBZ&8dxXztV&qmmQu_BfC3jJYtCi%7KT%fp@k>FO$V-&S<5qQ4Ttfw z5T+;|9C=-RR=&Hk;3`P7Fab|D$PPjlr-Yf9IrJ*_!tc^B0$IDn`3{(qUuGUl{-1=sY~rfatTm44gnNoI5>{7-6UGbm{Ql!9T#C*G=m$% zc{6|jQi*r34=!|EAFXUsI~E`_!~;-qV8{Y`lG&g1zO{$LyVYJRvcsQ-;TSL;Y{{kf zst<6fIQ3YK(CTKW-`vU8Is9-A8pVs$agCszay+^>DItPKq24Ki}BNXlmiB{Fv$u3<3Z5ehvm=}8?`hxHBvE*y7p;Rrp~I;Uyv?; zRP&sF{n<=#H|T{l9Mv^cEU-nc`RSnDboW9#<)~-faV+?By|tWWu9MuTnoa)%9Kg86 zKDIPjCG{i|e6OdfLieDRO0RVYspLsr)gXmYKgFjvBf&DwVpc1oCpi`t>Xt^wlPZeh zP%&afC1)c^^=OWxOyxsv1`OanN&`}YU~5h%M-^Z& zNyP|ZaCxjSIY0M8J(XTD6i21uD;(!}=QK`SfoZG&f(L_D?j|?rFgp~bA7n%jy#Z)s z(RWpoKMlAskoQHpBwdJ*bA%HAHi{4ipCg4(r3Y~&0UXykoO6em!vHiCI?j_fT(TCg zq<_dadU*07Py|Z813a*#G&z`qgs3$1Fj@zZTDgQQXQO{>vTGDVUJ2zaEmJe5qY>8R zGtYM~o%k?Zm_4*~fdkcic2i@AMnpa4I60*UdI&R8^EKcRPBlj~&So^d*nN|TKjnm2 zJCT40b4l&eR^^3=zcYKszyYO1RL~(|kT@Lc0dvt4R;ib1(X)t-fr{f|E{W!HXTvgf z)nYazcpmX=aS?n80V*$35g9Xl#sW``wlsYZIbqN>uecY;b%u^nd|c!^JGBlmgFVaD zM1%A$-BevWf*=El2}gAQj*3`Lh~gW&!h4d$3N~?HGBKG{8;$ZV zPe~=w!eclgL^vTxxzvHO!-%y|k|=Xm7o#Tia5E1zHE!oM!9kWwHhg%8QMD8yj8bSq zBSofDl7P1+ws>Xn1cYQ*FMB4H|1*IZ`5TH;hC0(b|HC0-$&Ao~l-TG&&l8S3;yXc; z7+xYoIz$;b7hReX95GQn9>Y8lD02~+AKno>Mu{3iQ!+~FXHnmT)JPQUg)`Vhpar9copS3)pN+7aC{-PAAtv znsJhQ0!ebUmg6v$-7ps~SC0m=i#fC@T45}V;7I_)8(An_b0sh1a2gINfi@8~c5;`; z_9pY=4m(9B#P$;3cs2~vBL-?DEyrXP1BiU`ArsXW+Ch&&i4wO0DhH%=m&qPMd4YOU zm{x&phe?*W*+XxHMMtxOUid{RlU-UOI4H4ESK*BdVV`|xF?;8aYZ5}Jwv-lhW4w4x zCkZ>Br#C&yhw@n!Gp10OGNd{pLiy5?;n^u?!hCWU8m^;m>Gn#?;Wcft6-#kqyMr?2 zNuLWUZJ@FLDqxg1%%&UvI22C-nahS<ud%91l)v^L?QDSjd^ z`Eqvsa)n-DsBX$B6qKrE)iY^*_p%_nAFA~3!{Q= zMV~V&s8SM07&Mg7&>m{(djnKZ6_u*qFrX-sn2&Nu{NtMU(h^n49%hn|$5*VzX?z}v zqsF-ZVuWd9Z)BY%M{JRDI+)aonei^J@{@1mFxYWK#RQ9?f^K)}mgZS33CM|UnVfecuYDHA5m3ZkJRHk}AC^AuKhqoX<| zOQIT&jA}NCBDV>9V!e=Tv-*5rXc*%`p6SM)ZF;9A(=r{ZJhBK%%d)BwfFx*4wyR1~A>rAYqIxfYHkC+5cb=%d%wUS3Dy%+1O_7_tz;G2* zn@uP=J3;B9eL=tZ%ap62HnbYE^Mbj3i@q)ks9WJYJ7vGni(O=6Hx6}*f@`fdDVFk^ zs*AdOR{~5cbvxBk2)5WK?DI}Z@}ytHebZOM*I8))OYm?~^t zE2*!#i=_YXw%Gwu`eL9E38}YRDB)TeL|dIUJh$3MD9su>`if`S1wR2f!wyRSwU`nL z4aloW+)b-<#n$8^-x_!-J4{$4qhho>nINMroKn=&t{gh8stUs!Jg-ZOEdU`n(^W)~ zVX_CsC8Kz}%KNbC;+evuBixt7sd=MjY#uzjqJ$KoOT5E7417A%P;>H{grcm*Ry>sJ z#Y?BU4!96N#45S_JEQEOD}!gfiaVfDO>1gt#rjHdyJ*2Uu}1v0Y^k|(DXhAFxxyS(SIpNMv|+AE?g9CvNojwig1X6um38L~BeGA-Qyv#44>Q+TbqbCZ+!U3PrJHc*B zGkp^^M7dT7x;O#DrT)1TV{FVFyvGY$BzKa}#5b5O`ywnTjGuBCObr-$i-usS&~f(@ z2V^zeQk43VUC$fTmP}4ACyPo!pfkB71U(B5O{^yAz{v5xO(WAsy+Alz*4kr22zwc; zC>W`6zyZx<5yjEUgwZ@%=oxL6jN6$!!ai##Xri7Y3eGvv$eX=^o(RNW?I`vvw*3P?VHnEa ziy`Vx|dq%f^IzpJ8i^X?R4$N7rSk9BK#=#L}I_Ts+f> zbdT#?eAg}wSw*3kucCd)!tBokCCdRJ-7#0EA)bKArDAs7*)a*MD@M#jf|)Ju;$GN1 zYx2Az6Ap)ApNuiPHO<*XtrGWYp^r+O&^oJ+TNnBq%eMpn9~4bUB6;HZ(3gkyTz^?n z6RYOlEFlQ|8_*)n~J!2W!LG8Nr48=07~C->K-6XU={z zd_pY9-2zu%e$!)R=CQcgTyd_hJInYv-J&5VQ%%nGDcvDm$*StIip{!mwbK1Uh8k?^ z&;iu?vdCe*qc1in8*S5m7vEOO=!1aT*i+mjGZlzEwDkR+u|kj^G>FiakNR@q+4S zT8+1^f$W=B%Z!p0=-e_cOi~WAO1k7$Ya_5tE`FK+-1MGSUM!w!l5?iw%-l-fzG>VZ zyFt!zmav$asLQu3_wkgGy~wMokE!H%bJ~UGN^H%1F-xI+@<9h~G1tCJTf;Mna!ZhT zY~l{nb&fNbps8H}%V^5guF2p~YbqI%%FrW35aYg5n(>&7x@R|eo`T&I11aNc+Zx!^ z68g{rdnDls@Z2tZ&)(LDPLy{R-ac#iJq^N+r0^|@O=&c<*eeq&2`e>s_QO3z@r;|y3x@l`9q zSsB_;tEL^}Kh^NL`_Y$b81{z)?l`2(s^GmG4+uhnf&hbshKGcMLWgc{i;jf=laq^- zjF*Ovj*5wkjF^sNqn4nin4XfDpM-sVl(2nzl&`O-u&B4Jm8^`BoV}5zz=Wfonwx~g zil3^-0GpA}kA|ny)TNegs)M6rh`az}&&`wDgx811n$x@MsnF`xy@{2)mbk^s^u){D zo5rG%67);jI&JF4Nz+9S-9d-hK1IX-)(f9ISQuZyIzs6V#*P?00w_R z50|)spx#Z~mWdg%d=(KnthY0!FLXYIEZg@I9ITY?LY<>TaThaxjK*x+BnczFWb?{h zqdCc2L1Q`F1+|yYs!wxdnX$ZAwJ4->YJtHNN^KfgiFE;J1dw7<-;aI6fDB1^l7zz& z$~NIEQeosJ<-*X|Xd=cvF9LwKo>Y;vYpChGnu}Q@ZkC1Fc6q zIIM3dm%`FxXqIr-ZCx^z#9D~|;3G#$lmKu%5@l$8bcdc%;lYD~ZMogjhXx9b10O-( zQKE-Ny-diI8}5~tnoa2FK@%f-%<>_Eb%_GaIH(x(5m&WMk|6+L2nK)#8{x;1V8@XL zR7|J+^PM%P04Inm*n!rEjFX6TSd!W$mY8;+?PJJj^%a9&RmV**2@A07Q5{AL6-Uzp zMec;zd2GSL1q*C)d4)P0pkoIBFRC^kW5xiWC4cgnH^~GxU}VJs;6%hoju++$zyXLZ zDqmYhaR@*bMRHXPMpv@<9%zZ}2Edwo0-yu{ZMF#j6r`FuiAlCua++tcv387QL7K@| zk(2D@BrKg0wkw*xJyaV1R;gv7C6Sc$03%stuDYI{brPrPiXTGg2A3G+$Rdapyk-@q z#9)a>t0iAz_ZMg$b=(n@9#bO0EHQpzg47RtBd=#AoyC;}KHeAP|1RUrrp z*p(?wTMu9kI4n$X9j<*8kmHL+2j|VCoMua7ry#XK0GEGZyEV0>iv3Zjr>?wL69>Ej8W&R_I{KeGQ_{ppp!Lirg5S1CYLL)a?ffl! z9>II42i#udy{L`&<9Oi96owfG>}bz-2a(PEN5E zB_2mW$5l>bPNE!f6ljw4Xy!6up@i^=L!&=YqGx)@k^7E=EiS!cPDyIs^T4M-a(zSs zE}#SyT%flJ9ES_H6PSDAM<-eohj*Tufw=z0I{~5t3T?0;OIV;g;w%p%u-n~rFtIvE z^$soit0BSvszwP1Oh9S(GlU&zxTWMJPZEd%2g{D*FXhaG8)4}L0Ci)C$HB{SkL%Qv zLIS1AQLZ~Gn#(obrkiaTg8{~xKo%-^wvRM%8oP^}(2nyaR58MN&svN6f=H@fRcB`| zB-J6Gw;fpYjY~zFK(|m42jbXaeOHqJ3XlYgYSJa}h#0w@ zA7QcoNlK1uW$sZ%EOFI_d+{>`sA2$@W&lv^Z1RHg#7H~mW6PZ&>J>PQN#C&d5l4nI z6|N#&ga9a(a#)C(l-NTjso5j|+~E~7feSn!BqSGg%Xruc11>$qP*_;OmO2FhM~c|H zrMhe$S4G`NtcXh-W=){(v0LAE%G9iZMj!s0mZ?Cf3P{F{VAxp-K$&>2StKS$p$h4q zym}(PeZ&ssY|C}B@Ko2qCwfPr&2NVFO`hye9(@2p#{S|)juqyF+k{00ICPc9R7)w! zz-e)iG)Zpwp;}94nu+YjUVoF8c!mZ|Zg2OET z=#Ou))hbyjA_ZaYD0(4T6U!>sBkDQQSgKsjxI<371=f?(uR-f!B{Kf z!2JBB+drvozLN-mi^>^>6wC>zVB7Ch0P@g}6!L=2g~jdqasbNtE2?RFkHP}QUrKSX z02%Y31P;))Es+=i@N>o_Dy*R)mt|23jRs6jA!OQE1}z)1PZ-)OTG5JxWD%3*HEeK{k2v6?(HXXk4m+dvOW>p*%yH ztxz#A{4)8hhj{HM4)8@R@HM%5K^mCUgD=n=hXa%>8*$2c&wDDD5+IbuDIfLG_=M`o zv+2x2q6}KPn0KC3{>V}@%PR#VP%rDk)0K8qGGdvCKX8>$wCE&_2vzI5h^*mJT%o9? z=|w$4pbm9>*9X1XkP;ez>o#T)YgaggV4O)ys#o))ztqPT-EdH$B6;oPdfE`)HU*Ig z!2t#+4I{%2U9qOq2aIXt-TYO_nTCAankeJ5;7PSsq(oFk93j}Zg6&95!YT%sqypBq z_NpvCWkc60D4mc5mB@n=m!zn#F-9XqAl0?0vzCDN54+>(RKW$M8t; zdOZ9}_F=_>#*bVH%AmpL7ehfBW|8Dl7ZKo@z%h#x598feFI25+%0H^eDAw$*GESr; z&1mvt)y{Aqrsh(Oceuv8&S`|DB2pE~LCjP5Xh93uaR@!ysWmk*BU>1Il43Wu*>PWs zK$hZ_tc?dPq;e!f*l2W^2VDyGV z-g!K+p24rXFg44r-ps4l@>1n^f++l|c059>6p!O?_EA|JlfOc^ze{Rt5mfa(mCuV& zP$ebSpT1ojGX0%|`z1)(iHKHxiwCYz8x*p}Lxv)&0J@*+gNX0{B(Csq%(2H%hhA(w z8edZY-|hXKH!Kl!8aM-8!hRYG@I z$5<`+Wd0x?1?V;C)q+H~So1)BAy*&~6Avj9Wqqf7=O=^|VR`pNh4@E<0%&&n*Lhp! z6rlktOvZNcAyb}}7qmh>t&t!o$QU758oK8dPFON(GzbPnCwdSNjq!Uz1%Drxf2$`c zouGwvK^aV_gG>luDl>yW$5~WZUb!JFf`|xgHznC13P+d!M&R^$jOYxx=632LgW;xR zN=6^urxa~CUU|51jWLL&Rd(+877>yP=7)P-CIo_8lHa)81DA)%LUw6PMDC@uol7=Bt-5Rq7s3wcK2 zcyOA*k_}ge&2)b7=N)nv9T>S5#0L&f=5Z$U6abO`GSw&un#Wla6MpE(Yr0pBjzJSn z*)?>Rj6=mgj93v7xsOPp8a5dZM|g!x_zcE3O`j2pF36E>*Mm>_lG|rrs@QYz0D{N} zjg765CDo2d9VwZ{2WR$mzw#bj3c!~W76~iJL zJz{7RL2_@ACM%bH4w#N_clmc{^p%T+nzm>sz{r}&5KV&Ek+8yvM^~J8*_PaJjR&`wtp^RM0f*}M zg%tB0AUKfbg%^W&U;oh?Vwj%PfSJq~iLjFcyO zFZFQ-_j-mXEJYWQi&={<*qX`diEX)yZ$};=YMy3zofl)G__K9Zc@}|5drBsU;_!`J zNtbOQB{o?%Q`j347^C^po4rSsv zB|B-72IxN4c!jgM6s;JBf@qvYbrZ4Dp_oyX*l2dSse5rZj=Fe{3@Q;HIDLw^87JwV zzJZUuA)2e{Y6%jj%^5+pQHV#n6#Do7Swx5$X7~$)hiV3>gcg&OjcPbBxrvCHm(4_= zus2gj+ICCX6AH>%Q|Xa~Bcg%Xo5u)~38|mL>6M?Fh!yFoKZlM*s1lVbnCYevAt)ZN z=_>4iq%@h73RiXoIife&kPf7#a0)9Ynj1?tlISq2D1kCv2u)~uiS2ljpUEs)susEF zmC*N*qiGME_>%ZXl+S65w@Q*RL6?FluIfdG_qngXXn=^cjJA2KfCv-zsFS;Zlzxej zg-CP(84>$%eRkQ4emZ=%>Ykv7c`7%r>IaliX`(2Ldw0lm{9=?BLU-&5uFXfQh9VBP zXsnZ&psM&%LTazznsr4rq@&6IdO^#jliH|{aTm)Pqe45AayOT+I<@!37!rx3Ik^*l z$Bhy?uJ$^q*=m$EH!<(%iic*Z=EZ{_35akw70n=<6IzVs8h*dFc{93#WXY~q%6-{j zeFo{Q04tZ63XUL)7k=li!g_ThMyD^UmQKUr>z^5a)^)u zN~rD_fVPW_kl3IBco6a!le*fXX$h{cIjKQ=b~o#lGy#ow*rLKBjMAW?>KZE}7Ld@W zqT*`~)0uqNha!teyIWEJxp-N<<+_<^TBb;7mQ0JM`#6fbmX6_jvI7bk{6e(2iLSrM zmecf=OR9;VSd8Q*z)R?siuJRGnY;MdYQ|WrA?vLzu@w}In-1EQA?c&->!f=Mx-prf zom04IT9_sYh<7@ZYC0evYM65grWUJ_`bxYes}(nkhTf!Qh03kEDPGO`bMojG`I-xm zc!rS594lyfKw`8vs+B{vyvmuZ!2+3!Xc;w&p`K>MvbevJ*nz*=y9B`-JBqNV%Cp?4 z77cv2y0JL^M{eeOcF>!GRI0yTOQZQEy<0)Ue~4c!>Y!t6C1HBV3_Hd#H>yGz$og5C z45yw;Dx@hXvTwNm!fp$sq4>RGiw&>~k9iCvf4Y@qJQl%AqyKurXPIBt*T2VkGF5E0 zWxI<}_@JrkHR+|et7xn5V8s4+v-`kD6Iu!y1JZ-qZhHA$%;p6pIS=9vieynh!=sFeD8X2?AxH?c+Tmnw>5kg8ylPiXrBaJ zn1PBFElb7YToX3>wvYP8o9Dq(d?%@?k|npqsS1)Rb9FXqmPspzu&THk+*!6 z7aAMgd8*8brMP8>4j9m30uZoi#j#PUwk#=Q8FFW|4#W&0Fo)7v3yIQss$Sd7`FF5` z?3D5ha|iAJ33fQosobBxk)PZ3D2aI@^U|7PIJ~or5X^+hK$#AAXNMXpPONsGYKnq? z8E$DxAMwGfSLqnkLl)`$EwWs%FrAkd$*mZz2{X+S{Vao={I(fcA4&?FCk(V@yq%a~ zivP<@=$j`63#17x&hz`SIOlufTb_{|jCt{*DX1fFt$@%9u6z8QM90tC=WMDd#Y<+Y zI$Er!kcrBg$foR^92~#gX{2G%m6`pcY3;$5@-X*^j`@a%rQpwj7bUg*A6JHn2ow{LmK3747GTg`Mg zbyK4Mw4U83I!n>zSbjP(*c5n>t>rC~x0jmRVVDXgIBRkd?Oo1AwggfhCA%BqYI0{6 zbkKvdRGQnbAjL)OAh$`~JQ|{)kbpGlWIuwdvQ4NT3ep)|x@|E4Sx`#x(zE4@S+PS< z4<*fvBMQ_D05;$NP>|!O@+C1r62~DGYL}$&*>ujlLZ;WEP=NJ_j0Xp^NC!y^kq9t`W zbr!SG=#>UiM|K|5(!S9NrWk9=g>wqf5!h?1)5=L(7N@Qs+}Ltn=`g`5fxiU_ZFasHq1ssYkOH}~IC2JS~1$)8pr4rP&6Chk3dUF7qXB)69SNofEO0NVqD$fu>5I zQnAqW*l}Vu#0B7P1NU+q1t9Gs=p!`_*Ilv1ziY^@esYIdAXSnAMgRa{00yn4(IyLi z(x3;2FI*-uY*S!5DFsoKA6t4b0g15(3bQlUl0W|9@{Di*)umt>7h&jx5is--5SKg| z(oY{DF0w@6@zF#|QbRQ#?gI}1qs}|`A{aEr0ej;3OJ4l>;$E|+{O42uPBfA{+5a!Y zMDIWh;iyS{zssjyn4dxhIW&{+!~qCtX#j+Tf&ghsLI7@VLW_?8kCQ@;V`G$rkcFI` zo`q9|1)8LK00pCC04$>brkxKggt7n}08pO*s*{`!P`HH?ubo_dxr7z9gm1%@)_t}NyNv^j8~)h0q3d={!}*Fi#!gew-Rc4RG6 zPQy4oh^NeoS$UlHxIlQ!lt@l<3n?r#Z^9*mjsX-@TUV;omExSbDB5F@+F}s4xD=RH%&{PZK zOHkFOn{b_QP@rRZ{n*I_w9LXqL966LOOp@gmDOKx9peI&vvBf6m!4ep2?`iJ zmJI+7u)u|K0O|srVuTR*N&y|_BGQrXh4vt56J*I&X?u1em?vu@G?y_=l6H)DgiQ7r zjMtsGoDkhG_u4ww;l|q{nl54-AvWH~i2?v1#Ul?PF?Z%`3EEKz6iw=Z=PiV6muiBA z*h=d_7~0~eC*~#k&H=cX9jgmF1;81Z$(`7cs$JudlSLM` z5Tq|QfqD|hzl6A&MFCKZjI5iuX38QXwSAJzKZMNiieME%mygVaxS9(vN4_E^Jy1BB z&1d;?ax9s44j?pVgrq!;09C*oDDh zV+l+g&ox8?(#Eo(B1m-#B*Uvw#3H!2-97Mj_>)>mo|mu`kq&btN*ZF;hp7~`i4=O% z9duT-s?RlSH*LG02Lm^_k~l{>vKbxgOjt0)O(7Y7G2Y>*cN_GfC4(R#%;z*hK9X@R zNG$1Gd2ZM?yTN3KwnHEVCs>GzMQnFf^u%c@mo4;tts~@nQSK~92m$8igKn^2>5R7@ zBQA%Hue%;6fTRmGI_yWaE0OB3^O_;O?2v`K|JK1M_d3}ev3M&y8s(5@!b4(@RZ@A$ z3OA)m1Uig$XH?-G0dRugner1p@nrHKmptZqZi=cp)@xwMKmT#08-uh{`WOMp@-T37 zk=h-?OgN$-Rmvrq>>)9cIm1nMgqL+Qn&txOu$X8LmPbUMG&wdK-gOX>f@7w}FgZ-T zHSdBnJmpT*00u99@jsXRIJOi?lo zSWPgtl0;7FBnO#!tBK0+c?DEq5G}RGjS$9w_=Da90T{~k2`-qM>Imc>I4}h+51*81 zj5-Nn1a&&HIh%_KJC49p{&|jYlVFrR|2vvGfI3ERJu#^`40^@cqzZDl45drU2TVRL zP>4lzp``#hO&MNlq@*Jp_Ljo1VLJ4gS5+li&x%RfDNdzCn_~DXI#U3IO;mgsWg8hf zz^O2)U8<{|>c%jE0ZcM3(P2wAsuia~FyJ1-DnPr0RScpE(5G;NX^Y(IzRhm7q%DP_ zILD_^G}@1@Kpja=&(b+kD9Kpej#)@))0XBh{sJvjM(fZS;_g;yCzIrK|}} zeB+xqIROk8a}3U$*C}lYGD5lFStrO99$R=uB$c&}TCQ@oToA|@QWRWWR;u8n;SQlx z#VtC^nBU$hDRBimTzqeAB911|qx)^iQ^?m=4aV~%FZ?MBYuwrzN~&N}oh2v!F}cYB zAO%bVv+oGugbe56VoK0JZh`7RJwhfmvJjVA=HZRliLE+nPM17p1t!Y$p-|;?qz+q~ zL^uX%#x%aK5U(}W*_Je;=)Gey$vPE4HjqTu!a#fLcbwXDFs$9-Gp`R!zHcK2377Powh%s^$ zQu!TgBtVVpP#1ZCB5aP3W8=Re2e5Oi?xuT-8Amw^n6}uIFTt{+M!IG-a2dnfwCp!c zN!1sDr6YuhdxVr8sQ~~C2!mEPIIvF~AS?wMjSzrABk^2Fc1DF)u8iR_(JZ8~h&vCk zSY1};?pU=PhxmcL=|T2cxwO5sA{Aw;VD%+$k+@MbOD8(z+MWo@2OeF^g#<g9g}fK&iL0yZiDIBt(~c|E)uq0;6(bKqhgkqFJ{+%ONodhHXV00u}jt{57+ z^piwxB?dM7Ba`1v|E`&a`#3{cMNNB_rG9kilPRfa&PFotY}Hf&-N@!?9YC}^lYEM{*>aU{8vEC52qS+EczLpT0SirBtSdBZ1>8Ig2?zUGBxbXPy zIqc6VTC^q(djJlter@|md-EQnBNwY~ERnl+zq+48w|B=W8O?EbFw}j##1R<4vw;8O zy|e7Q{>}uawM2#!W4Sn0qo9bb!2;E@ZG{uK={^KV}6 z_&?ur=~jF|5^jt1Zq1=c&qrQi#9j@xd*~)LHD(M7QD-za8-Q1E`c@s#J=w|D~=bZnMd*hWd9Hgo7?YHzf9 zH}-K?!dV0~P^floN~lz>hgC__V=|Uxv_)3Qmp@p=e2R%29fGzLf@RZa$VM7gJFb(n`OLTZI(e+GDk@Z>xO7JN;p zO*O`UH$ zrcj3+l@NWPM3?7k!^VBfHe_(MO>R|qIaGu)Sa-sNWN)W{k%WD4<$tud4l{U04HRfe zmRr3A|9q>pi84ZbdT2n}_d-qfas!r4Y9&~fa*ZTtIsW80ZiYV9pb(3b8_h!@z@mOd zcqW8KUFHQ$qeg)WC_Nct3C@#k(^iL8^i*0nMUtjew{?U^qlM*kRj?Oz%r;t>hF^ua zO9kggF@`&L6JxN~kMpN?v_~)^1VHmfJ1e1F#HLwJr)lvvR&+5b_l1DU=MoGhQIYt5 zClYQFC5Vw^TM0=~Xa`s46-k-+R$!!kAGva6)iAR)ZtG@g=<`;IGeycJbU#OTI<|f) zRc`QiP+~QVO6ZejbV=$MNBY8)*+-A(Q#;$GhddXA-{@iVHzY~fl1CVfMW&U*H%q?w z|9b<2I~kdL8ibPZ1uu1`cD)6Cf;LdhbudC$U!IeAzQ~xKRcej*Sss;<6WqQR55aPB~@s&BUd0NOx#2X6j@dK6<0u5OL1g> ziAi!T=W^KyKORIn2-HV~`8hQ>KA7T|uC+kG2wyIlM6T336&N_thAj)!gtSRy+)0zg zc%0Bld-K(miU>{GSepxjWX_~p2WX3C<$k8+Mg$024K#D|#)qwmgbdW4N@h*fHeM&U zJk2L=jO2RN19Ogff4_;KIrkaX7DFDFk|lC({bhyn=6eJ-oA?)1 zUS(8BhW47Gwo0HmoGB$eeHnWXN<)u{Mp_x7?KPnANszesOHeqJ<%VcamR11;QYuGM z&WM>PiKMPmdCLf*D2k%*SXR7vF!D*D&7_RY7@0|ka#WahohYL$p$eJ!YzcWs3rb+0 z6+0MkPzvc@%Ks-Du;i0mFp&Mc-l%WB~{J= zX~T4r<)uWd7FTGoi}4nFd*w)kkwcQyZoNl13Pxou>U`&!YSXw{wG)*i%9(VhYGnGA z$uy5eL}{Z|iw=pUwMt#2I(eJ|c(`^nYFe236_XU&p8NQcnb4ewNqLH?|E9O6YeM7* zF{-54)Sx=ocR6U1nQ}nETA(cIjJXD)JDIE(=uF$$rQ5hce2I2_d1-vPdwj>AN4SyD zXi~T|qpAX@^qQCnx?CoDJVz*}YSj&wrC+1Dc;A_oy1|(unuMeFo@bhy*haAJRi)wy zl^dC5R+gkjhMK0TYmioa0aR_k=d0R8sw6s-yW~>Th9V2962oV6_O-D&cTNZNQ_y&@ zs?}Q8NL2**Oz;?e9%itV$XfkZXswE@M@n!8+oDRPniiXrXv$%i28=)ZN5_(PQ;A!* zvT0j;oPhYJNQhgId6nf!e^KbH-r8!=8KkP`O}J>ItA~%6T9j=S|3bz2i58l6dsUo* z7NHna7y0$J@duLS^-DlDnC8kGpxU#3n}b&6kthkdj95?eYG5;&Onlo%xmiQnl%t@= zX!{kiVWfDm>RMqNW#mMdb9o(wIBiv_k!(2?R@G#BYM!f^u5)RXw~A99E2uikc$>3x zsOg?SR&!@nu|U{O9ORyO>3qwnqQ7W};`F;yyP>+Ppqgt#gNL4yxPB!1AIl50-+MQ# zBdN2&p7Zvvm1KAQXh;T$tW_FY8vCJy=~R?PzJR+)LJJi*_oBCpoPXGi%vWIS^P?{p zzyO*w!RfgH8WXN-qz~L|B}8%!=uPx!vY7dsyt6}6nZIkb|DwM4yS0n9G1NOX3anj< zv%Jbl^A$pgheOyJr=PTsl$eV#)TZK;l!&N_RmzV-2#?g%Rl$3Ac&l#eW0Kqpg>!?h z`8u}ux47Q~w!6u>e09UZh-!POsL#fg%vyIYqQmOLr%Q{MCkLf0gkz7ge`p*VEZ0<& z!mt>Ks2=<{sY_%p5moetUP9?Wjd!EfdX)WXWNdj)~%~38hzL4#!1LZ zMI50ymZd4tS7AJN!gRxWs;T1(e5&k!A`xx245#4u|8!(qNL#7Jt_f2pWvU2tn}h^) zVrjv1I=#j7w~TqGDwUm=COh$kk#WbZjH}Jvx~o*xLp1D%;u}^#cDA}ZwaC{;h)b@0 z_*$S8xg*4{NHc$y)NE2}KGCO;1qY!nnOjms7rZM-`lz^|yreAbv3~)VMufFjT*~*< zuU;Ik_*OU-slSOtO%d!y?z@CV)NYnL%T{zmj?8+Pe8#=G%6t4x*F#WhRFRF@$)`J= zkD|K4q+e0IeY4kJf=kcaxko71jYb_#awtHbw@3vxyU*yi+LAgG&808(M@ma_Pq|j# ziH+&1uJW{{U0H?jq^QvoQ1>WU>bJOMa*!dl|2is(MVe)0cOWJ*8IJg@3tijSe$6@2 z!;>SrZnBp@{$K}ofQr58N-L+GaOfl1$qT0NAO&=Y04vj03CJ;2nyYI?4EuCGhl4SD z&01+!uQH1+r)vNZ2P|VffDKY9Nn`i)01q%TcFhUsIEwfjwlHH0@AnBN6JGvFiw=UJ zFlsp({Kz`>d9q*yh{HHyi69B<(EOS@jR6H^SAX-wK+Az;bsS|^icZrU!c_SpzPeG@ zir9dNS{F83s$oSTrdHl=Ww6#TD|>JtKeO-Jv)pLB1?hb zvcwC4LJQu3%**KHxzK`8-HJZ)*L$@Vi%nt|a9M;} zxITZKYF_0~r`*s=Y|)F*2YGRMurezUP6*re3EhV_vj7navFL!+3MoDd5n&6dqbsA) zhS}gU@@*Fg4p_p?9#p~})bVOsmL5}35eL#PxUgM)!5vJI1r>J;^dfQhK^lvGALa)h zUm;UjT^A7137!5O0}uzToTu^y|3{iV*5VL~3eg98pa-J3)XO!TJe<8Dl=Bx8#X~Eld(tW;mB((4^SH}W<0`A}~ekD2Aod5-!F$xm^CtR{zp6~`%gC`YW z0}y|HbS2E0z1Hlv-`%Ji(2gOZSPKm?Qy(_XHjAYqX);`p1=vs*b+T)1jT(Xua9>98 z0Fo%cHaHw`0ADCHn;{l`AOr<640zHcdK2Nn;2E9Z9o-ieIDOBKA_13i87xp2=F;PI zG3>~&D1pTQZy+YzP!$Do|D3^a@ws3MxgOAWD2g^)|6@?FCZk~sGaw6_AX#ikHj%*edhrAYpb&eJ4=!*O z0zw({4Thp)3v;n2SuqOy9w@lb{g&hjE-(X5J{MOZ-AtI-*^HDu$ExY9wmv&F-Zu&@ zreS;Tz1rxYniw#q^J^yG2~7Y9O;C0KEC7d4hl+~;Zi_;YLM(}l5&(LO4~G+dhZ0$g z07r~^4geegLI7Ne|Dai~poeZ_sQ_V$6PSl_vW}MkcfXB}hKMYvim+KTn6av%(1nzt zs8Geo0G-hQ1#gPh(aO%-jLL`Cinq>@Ztaops^?`T9L0mqJk= zZR8Rj3a02@KYI-Y=EL{w&VfT5ki}B?s29g*z7%m_0FBlmec40A@rSC<`E@HbPIe+~&y2MMl`7@ob3ekhWrs1{Pq1b5G^zkR@?CafKblI5;JPxWLA0#CMW1>g-=+C_Hd4s(+) zefq+6`8}H}WFFqVj8dqsw1}Z!hW-^N&`Ea*@g7P(LDN!bEKH)vSc}{On|D;vR7@Lf zJ;!W+yj@=7QOZBh#Z7|vjbV~kJ&MR@>#!kAUWq4$qb#%;(|GKXYg z-6C7r08LpcD#Aii2`**XMn7fKpmR`dL*XG-gd$W|6J&7_jZwC>V_bK+mEJJ+u`^&l zY@KOU|B8#KreB=<0ruA&!N6t-Lw{v+7=3U}G$3$(_Icz~DH8R}P20ez761%Tg3KN^ z6u1p>jv%=WA!n&_6BLdH7Ue%EF|yTsfa2u{ql+lV>SK%Wv>%~vpzx|ciJ%Y-s*rxf zQ#@lhG9GMNciilYudZ)><@F5LX z`>(_Q#6@UF4-ZvpyBR}cutbL;re3szQk!mq3Bh^LEusQA7+`)8+bdgX78oD1CeN2K z|G*Jptms3HT*hxtL`PNTyse^QNpBDp_tQPJoOW@=?4r4GsT3tPve1s`b!^j&OpPYk zKI79!%}pz6CA5lBiJ$0@iy{SwA8v4bl4~hJrG-p$<;Y6Wev=0Vm$LJ?JQ9H($?k2O53?&t7ZJP z+N0tvH)FC>1suNez~!^f9T_Eb(8uzfIq*(9j2GS?DP! z>u`jVz$lP>g7SQKJSf(C-YUDc~!1yfgj{8y$LimVp)lAqf`mOdM%ihuvg-2ZGQ zy{FNIR5&b5))sRd2#PLp57DCLw&=NDgv5N*s@o2Qh(9+WM|MKQoC4+8#C!H>WC^po{M(_Gz=I)=(qS;>V^UuVuI*bJ$Nmzl%M>d5K@qqIz_{m|A>>BxW09W zTL|xZB{W?;F7^y#&5m9l+1k-8rp6{Nl4AK_;1zF(I!vxG4*?ZGWu8lu=3G{~ z&?&l8suPXpYbWNW$ju#!jGhU4m&VrfJAp7p4>0NkFc;A+BY}%p;}VPnwxUM7RBxN{ zAlr7@r=YAPsXA0i6}%9%#&fP^i)UKa-yAB&Nj6KD5P_mK`FF)k!smPio1^{$Nzxjk zv3`<#C@y(eE)SyYfi!DqVOIILNTeVP**l~ka{_=0K+Jf{6dphKdAW0V1xl3*1(fty zEG&&y4>>=vXcv6ne6^fEf|C>#VdNzr~Q=>E; z)0)Cq!c>ZNiZYEV%7AbPUjnla--?-`Eb+p>+{9)-$ro77`Xg8|f-(tFj!HuD6nZNE z*wo097082pd*vz9)->lrRITom>f_LbMk*3abkoC|ZTnk4B>InlIGrH%?glsHR?wtP zq}e>N*g8w1mAIE+08+Ja$3iiHB$cb&GK&&PHkJ~d#2pY;|DG9&DNI3007yi3T*8sF z?&!7GGH%z_7+=6%@I5TeCEb>%zytnOfH|EnvmO^R!_*bT;Y?|gQG(n_j?I=jbe0>b z49g-z*V6Tz||UcM2iv* z;kuT*NTtz1?DJ^C+o z)n&RLwgAurNWqz9PVEjnLA}~D`xCBeCh1ue(g#gIXGq&fgkv^>K$FOEsJb=If$DLi zE$dgVWYpx@JPp6NVpgUH#_&85_%W8AHh=;ebvV%s|J*8z_oT?7aveSFH#c$fFkv|H zAxM&z80Yj{UW=$L30tpki#1qd=t>UL(Fv?@C`f#cct)c)Mt+i;!t+dS$@pePA&<4v zqZZ^^{n0pA{`;_I%Gv$m4Le!W8=SqJsptlNNlovX;OsE9c{N@{eN`!Sb0c zNbhhflbx{+b`ZVJYxGsH#iDL&wKhFR`JgM!)9dn2tr%~G#6ns*UIkZKAhSb^r$iI? zTXcc9x&$89fg?+mHU(?b2Ab98aKE57ujp$v|8Gv@1)eFtf@N|~jvU;i$GFDjEl<pXWE;G(2HHHJTfQ1xqzTg{& z7)7raI35vO)kk|pbVLz_L7V7)Z3HxLG%s-EMAzbpA|-wqrh%}=Jod(OcT;%oQE4?3 zQkIuWMfF1uR%XZ~d%E;oAE-e?M@y);U$f9bjZ}y3W@)=+LR8aekd}lGSapMRG!O_; z2c~W41ZAO@S<+{9tt3WkcuEua|AVykE+^+vs<(uRScL$%UsUySiS=>=Lni+9iVw+Y zhtV%R_fy`uE-^Mi&lrZ%mV#Oqch5(2aVSp}S$_CfJy+&mMqK1k<8_b7nhF` z7GqqvX@xj-@HmH1CxP9#lQrpxEoT=@7Kp|OEukZW>Si-zg(gYlL^D^7WYaqt8Hjec zSB-ds=9XACB~qfaWCX{3Tj^w%Sb}aPe2#cL<7kCSRz=w-KttCv*3y;DC1$=rPv!VX z#uI#1w3Q%J4`|bXgTxk{=q&P9X(32CDHn7h;Zm^WWvYiytGA8kgpUchk9K8-Vr;*Ni-aKB>lE6;zm7C~OkbPK(qg!zF84M36<7 zj1Q4tv8Q@cMVh$vQkM6Ks#SqiM~#K2Xaf0b5NJ(A$e49^7ykBQC?`@k=bfujNzYl7 zFt}S3x18xXj#BiH(9>D~q=~%-l-mbgu(?-o_;1~3lzF8*CxssP^gOCXHdj|qtjUtd zgD~M&J^I8~cG+4ICTVCVqVIAhAgWjTCrYs-k->wK`|}=|7Ks7LR{4jF!nkB`CzV{* zX)CptBg%b}=9_s1|Cww^5p^hDdNz!R$bs7zZE#tVlxLOp1!-&V2#JewN|&~0rUmzY^tYl^riyq9niFU?wDXaIm={zegK08tHyLvCV=bEVgmzPs zzV@4!28HmcU#J*M3R7AW7r-~?6_mn^ugo#=6na9bjw#tQCMsHb| znPe77(K=W#MTVIrMJ}0&KZE&8b)yI_WiXXj{l3we|)ijj&HrAvrz*OrS0qm$1g zogwK{PX%3nxsr03K;inRtGRRTw3qs+rH0r^xG6Ww1gQXupS{42w}q!=Yq5!uuq4<& zMq!RhXV_6G}@$4T7wsvqrwM^xg$vqB1{8~RPAiSc^zaW?8T#7oijJe?%3u0QR0Zhl&cic0g;B_`9*I_IevO zvZ^_yfY@S-;h{RaR^Vr#5lM}~394fCQ6eaU;5J7HV^4$%nuDvB9t=T8r;%iJyE50B zeF|5(SGpTKZqm10tR=T-7Q8ciO^I2V6bp{0wX7k^gYuWH>nUbxyuZEZ?vhto+On+2T7(Z zrVv`PX)4G-ddUo`n^swj!%Da1T3hC|j6dGj8v76+;{W%pI{SRZjlijP`R%3pW14>iD|dm zi?;Lz)E&)}1p1gH+N5aKCVt0cbUC{}DS1O`vd`Pk>v^Er=%F5Wi*1;7;4F;VWOhNT z)!M6-vKh#zkODoR1Yht4Mz8`UfVPtRX&F0CA(Uiq9Kf(9lLQ;6=ws0x9GgIWbQUem zr8`o|_>hbIJh}&#UAJu@C_U@skWEZjr3OynL&GGwe02|8&o6EJ#gl=k-_*a8w z8=o_m%0?L2F{zk5+QgF0YK8m17DX>YZ~_V-12RAYB#;K?P2ONY0vC`1xT=?6CU0Rp zyc(rWgq^gMEMc&n-NxIex0=?JtITAnWRHwi#`ugZ{Mo7k+`m_bBMQePti{f|cce*N z94ET)a0ORT0vf*IC2#~DzTqDZ;vTL68c+qMP2MP;;^>_LSFl#ISi|;+O$PRo;7m2A zCS;{abzx(HW}{-HM`dt>p6&xR_Qa+hv3_)y&P!~f_8Xqu=QJ-yF&VUhOl&DEdfnVaNxfWk(#TQjsLtA&tcQDo zFy~DpJ)HvrRlB(?OV9(T-UE2B>a3pXuKwz)Ugj$x2XS8OrfmeedY=aU6U6y1Wy-Pj zaz>Jv(xGr_7U?)lh*yFd{U-_x5ya{*?JAQ6e zpNU*$QN*Qlo5Q|0*4Xmu?*8uZ-UCjM-nJg+a)1S(z1i8G|13i5w9+e+jb3qB@&E*H zQQuS{$o3ogzyVz$T3z)G7;^}1P$Hl(lgF*dKJA~?$&Zb=lGGkkoY|8;%5gH+tKNE{TuMLfC)w-Tys+~QPXFWLr_qnKFtvn#369d z;2?+AF6@;E$A%&f*AO!Xch|bT8QR>Se&tkH*owS|!VJThi9h>SisRgo2tLke)CV%( z_OAW~=UwwEjsY3K-T{W2i`!f#F#$$`3Pjv6%MLMPWHeThGV4=(y;3a16C?bBGQ?2; zP{ILU;V4{15>W6Vf59?$$n7oIu$)@LvxDE2O{(rr-$&fFXi^4u*gX3@{qW&+l7-Au#bw83A!P z1_h1)Ba$!`QL8CyHmM} zkcA~>cxOLT4RTTm2mnHZgoTAiJ&B5ojE#&Z86=XEl$DlyhM7Wwnw*6I0D7RKq@xl5 z2X+9bPztlYZ9;_=2fh?y_LIJ3Vh>gv*ESx!fE+olCCE>mO0s$By)|@Yq~k{lPD-Yfa&b9< zUo(DkCyLa~P%}@7ayG^RfEg^X^4cc!7B+aLxTJfmA^eC3S!{ zsTriQ^fYG|Aj?by38Kq;5bwaW{|t^2{|t`%+i?#6Zl_)ybGv2S;2QG2p1gCy?xnMR zOg~8yCzJE{0N@U~{7$6+H`7(Nfbxbfs@&2>ET!BAz#GAE!J9kL1xMgB9B{xO0A76s z7I4h0vRo-Wv2aQ$L4oqxQ7ob57IfT+!x3TK6{Z;{m}TY?OW7qlEH8UHfy!#PjI-{R3IpwjTl^KuM7oBaaC~;6imJV z_W+n~%;q3W-tZS9h@oUNL5H-&WSl&JQ3zmm4!LCHbBA81-gGOuI1f1$Wi+El@k#oq zcIP$u9A87S^`edp;ep4LBAt-H{~1_X0tON}k-ATRmWq{)E3x`A+^waYNJ@q?nTDo^ z!iDI|a(9f9r?I@;LZPzK7$%EpqYO*Roy}>2Sw8ZGn(eo}%sAvbfhFeGbe8!O7Pm5f zwJmg;P6TPTEjoITe`#r^ZCD_oD$)x0wOYvueHoCJisP!1I$L6IS;Tf0mCZ}$G~vjMHh})2+KQ9jL>!JsFOslqv^ds z7x^WOq4GCY&+_so?Y0&@L}`+S<@+QdS9suoB33VApVeGn-2>MvAKi~GQa@Mg5rQEVGPQ^|5UE5c*}{OIrGzQU{0hl-hqOW_A?;nmhmZ2g0d5Ot8~Wk zFKjA(U5_T< zHx|OhU5W7)u|ROabJVB@N>dpJ$$<)pLD+G!&ui zhf?yj4x{Jfhqr`u$|>d!kgNG$YZEvwxlGRYj{= z8LlL>|CqF2l$ZqCbOGjh{FLG^DJrdD)|0S-oNE9xp^m=d#T`$K2syvnM_XmK85sg* zJ0IFk*!nUt7p)}cMiouX+6txe5#qa;X0vw<29(QnWQ`uUR<0^guy=&0U3-?#_E<-n z>@DSYWjkC!9*HmPZ4S?PB-WVS&wm>|;=R~&SV)N%v!_(rfCns8>@=9Hqng)oTX|cQ zp0uv?s-LC~7fg2jrHg9AqHPtqy8613Nk0-}N#ml!6blYG&TO0?IT=nH%k{wD!CZX_ z+|%G9YA;1js_Ni{xt%6cr&8k{w-j6}rMmLUjl-%P;R7JGQ1f=$2`xz{+CKQOcP<9L z|7#PmTQ992DyP%g)qH1Whjb{4v3LT2aj0op@Yf$*wVhRR=bVvZ(=G_lhp^R{i+2Lm^2|BLqN zOnKb95!ELWhpR(3|JmMSgt=Jz*~xT0M9-dTa(OD{>X#gRt~@4qa}&7SNxiUs85S%( zoVcjh&Q4scWKu*w)hY0x2e;`I`pNY-M%R9^5S{91Y9FmOjRZQ=irQm^FbQlMy2LpE zL}ca|IX)Qo)~^HhCeFU)PV3^kP>|N=I%PXsCR><~M$U8lVss>K=`5_E`}Wrrm2v?? zU4f5|vX5;n??S^9f@JMhB={D}5#8JW28%dUf;?_1u<{a_FQt75jl6Gh%SVs`YEwA%{<^pS3m~Tz9RX6lzg4Z0$ zba}X^Z?czMY!OX5)OGD)MAZaeqJ>#)22sejS>C5Q{-#c67D@S&|AUoQ9mW)De0X>T z7lU=SJ-=pHM_42Jf`ePdY>vl%`1fp&0)=daSiY4P2j@O?mx&(dgZ70@eHDGGbYZ3R zK)g3>4EP^`l~UnnDLOJ?hZK0gr_KNJQrTnL5s>bN&;qMW(RU{c8yZ_|8Ix&Wv7!grSpRYDR7l% zd`5ROFPK;3w2I-VY^HU3td=xqco?@gFN3&{HYZ_Nr(C&6lh0IprpSMjSA;=$KU}1F zNOz9kmy@z*U0k#|nkG*_=4tp;gDR17M-o7&^jnnFlaw@7X~=<`_%l7Cjh^LRtmKbY zhBWyUR~^=r{vl?9);<$8OfQuf0SAS7g*uN^G;S7Gl+`}pS9eRcZj%;2**Fj~>4ee2 zOz5VT(sPxub&L9?gBRy$CVKc-TlJQ2?QWnG{snm@X;-w zvxDM+epq&AMrc)tHg9q%TDzH4$p(|m;gzd~hkxZ{ImC>Gw}Y%FYYl2hbk$N1H)yM+ zcR0r$m*hQ67kLY2E5F$uG1iyXS)Y$5cI$wN90p>&NoARJK;bDn$VQf-C`k92G@4e1 zEUHTF^-GNycx5M~FUJwLmW+||pp?c_UdRshxP~g%p-`GR7)Wa^cc9XRMTR+DHaAW> zi5Rk}B&bGYNQsOqgl*aMhIV;@K$tDb*MEO1|D`}GXSlacpfgJ40$z>sZI^hUXOWJG z*JJxPgIxw+C+VLu*Ov8XKwC7Cd-z#NSao{nh(LCnm~n+SXqm9+M^z`2ALW=%>VfST zr_{D2inn;^p`N3(rZA|P_jxHcSa1evqVwqwKMHB6R;TJ>X45l611F$vrK8BTD8V*> zjQArg2X2w{hIOf$a(GT+8i+#2lHzEPWr>|o8ah|yNzs>>f0&n#LY+88Kvya~!KgWw z_)Pajl0i3a?wWaPacj@nIBLh7=!RZwF*`eogL$W#;V*Fhe2A{X*kcwv^(OdnBpiUC8oIfcfu4UKDQ{68j++%7VCI~^Q2y? z6LzGyV)7b_vZbB4<+i3q9*Uc?q~(KkC1x*LRju@(I18)gRA<1qPhk0&`9mR*#UWl1 z6&q-vOyZ2ar+3UWgPD3x8#u1UsfpB@gj>g#L#BdOJ4E(ch?M$;9t3Z*W2$;9fS<{a zE0nu-RiXy!Pg)z5JgJ?2n{#MJ|BfQ~S>oUoq5vp*04TCcJKQBC|HWjTa(8E{aW)d0 zsUwmSmApN>w`bL8v-WQ%w~NbX81I6Yhvj3}hcwRx8zvJdxT9FSc_<^HN{jrkTcF6bqB@1H^A(ThmV|c;fHDd>aRfI}09IVZ zSA4}bk;PdI07rmG7dlss+g9)U3^T>70MvM&X-TDpV;_j1gyb&0RibbbbE)V}&w06@ z5{ecWfGX%(ezA5>%Xg$I|4d<~w=hd|3+zXL>A|^bXc320#3v}NQ3^TX6(}G9zOu=y zpvj&5$-W{A2;`}Pn`L0Ze56EKS16LvnSN^5z)NDQ{z|_;Y;E8quRJR`)`LReLnblk ze8~pFFU)UE7)1OLjjqeBNKACMOnin{rdDOauV5{s5XHM39DMeOu?wMOvLah%%A!cN z+m{X8_hRibtP1>y1fXyVEZ`v3(kG&4Jyc~8cFk7Plt`2{9u4vo-M|8JT^plN0d}p^r9c#F zGSrp(pSN1A**wXbx2o`Cl359zW2>sL_KoFq&9Cqb$>J+p&DB}G)hFUBhD%J{6I<10SX#3we^25_SyvOPT=;uRNy+W_EL z*pbQ;|9861pu%i*aHAP(L)*#(+nd4snOT2u_91#0Lw8GEYKNY zFdKVN*L6(_!{OkmkpTZmDd0`?sor%ei3jSO?0-x?E( zbr^e!0VW=U3S=??!O#;sAr)aF#Wubj5)BOpA~D9n6vL4kYLX#Yzy&6b*wt_$*RtGe z-VFpk|0ud50UQ9)d_oNvA{?t*3YGKNdSDdSeb*#}%h(uviF|^lbj&fu%Ya6Y+i9u; zyoPT^v)NSK3=PFvj^)VE)u_$Y0WzY+6m)q!3bB98x8Z$)t&8xJ`HjX0EZ0<-);st>!5Jjb>o^=W)c6I5qr=>9Rkq| zQw&qkLs@Xr7&9leF%#!g3T+VMP;eW$jvF~a6xh@2zQ6*~Fe1-#03y%W7}FriLM<|l z{|j+$bofIG#y$!?u`6Yg4AoxBVLq6FcUL0%%|QAyEgKKZX==cx#!DV&6PvoIKF^jM z9GYy}U+vXYf1Sq4(KYhD&j1CpV8x}t8n=+HysX^GzFa;+v0cgJ) zQ1H&@!2*Cj9Be`qb#4sUZa4!$AV&8r1yBrn0PVYQ2OD1sE|4)7XseEtnK8WcXQ#{_ z37kdk$X=*in0=TFx^0HhF#D694SFyW^WX&ZoDCzA|NI6r z?3CDQha0U?A?g)~v8H;y7zd!E{DB9jfVnwmeE|BD{Qgcq3P4Z_r_bvBzUsxvap*k{ zOOP690thYugM%D{P=tww08IcIi7A8(Z-|2~cK|{{iHVzpEQAGbi4&XvD5ir2sf2EB znnIueh7yT-tb=WBgR-ffiEd+KoNfSaeE?8zsEG^!rKvN}gA=QnnXrSf+nU&euHD(* z#;=~?-00lc=jog7@9mr5?dw9qW9+c5<=UFU>fg%c1BeTkwSouaG*^ASJDT;V$3%GCHL~i?3s)H8*Q$Vc0aoaVq`5s<_ z_pV=9r^U8@^)&D7TeI1=O*G~WFI#whS|TghXE1*SQc04q|%QFFWdEA-3k)Zy>p0V;`DxST^s#onb?t;|*YV zs=|!-=-*$UzyJRLu0r2l4XIPiM0H^^S3<<7v{pY55^`E1$jGCUJlS0s8UJ6?wE~%O zzcdBF5fdn8%5uJl*GgB$h(pLdnr$UwA}lPjk68qQcTQ#Q`Sle;AijrLJn10Fk5_N0 z_tz$4#K%{Gl&Mr1calxXV12iQ@t035#?@4MOa7Ihg$6E1R#OSaSYB2dl}C|y0LkNJ zl;M2%B$`f*BiV|^$*3WRzXVm*ps~5=TvHmz!T~)l=H+I3b;SeZKj|nLrCLN92;n_F zk`-j5Z+5yRUW4f|SZRY4cEk}CXuyGdqN3x|jyHyi7CClhCo4t4xHnmxnz2|Aqxy&i zB%j|f#Z_1m2{fdNa-kRER(vh!34ILhMcsqwIJlCBlEFvaJl@_V=l_vpS$AuDwCWfh zyJ|&RkAw8M2@bXGjbo{y*ec}A5!vh&saeAGN~b!MX2ztlR1S$*U!b~4jAl(Hv|(4U z!1&;r%BpE8gkGN2NuSVarf`&Tm0Q`A1)C|S$#5FVo}8I}I#r#L&g<-_>`;nbxOHMf zDW(pcLugvf0>o^c1d8KKbIlEsg0*Lc%QMQdK3a9GF&|8qT~azI@XjN{m$TX7W!hGl z#aeymu5f2smvm){+O&m$!UyZm%#H)pgH-Xsu^tdi(knxkL1= zdPKr%%Stz0Kb!Zr-=JQm?RO2aw{3S&diObwuu3ZVv!sX4*8kO^!VU2|+P1fNUyoib zF_E{U9cQzC--H)QL{L%$J>MdBHJ6h@WIyTRCTgIvlQoyh+te3e5|C(W$ z7^AEwu8?<*vprPSdbs)!VSYX9jl1|`(GHD`xb;kvj`nJXJHLS}aPLZxv0&$o1*u18 zTFX_e?9#IIQATr90^i*-mpTrP3v{%rlrV^f!A2>EA3mZN^YA9BqN$B#ag&p>vNosd zpi6y58XDo=#X#8IFGgE?N5LA_vJ$54e3Dzzq?%;FZE5XXp#cx3{x?1ie#L&j*dWac zhoqmaNLTC%SmgBfK_*Huial~)@Tye3S@jQua#7>ChX2Q>9BFi*$vx4$=}x z?^6&84?4#kzK&6P%3R*)_qm~A6iHdrSsSVpT6qz60-W_eHx@J98ql~lC%s@)Z zl$KLS?-^Y;?m<+p!RvIhlMlqY7t6X)v~Vsx)BsN=PT~Pkbk3?L8@Ux#_`nEQ84?zx ziebN-73o&uGFnfM2vydxFO>0lYA*?=q^n|YOZ(!G%e*%?2TCeQLsE&x!n(BBgp5gO zbXQDc2e@KIu`*`eqR0?=LyazOrRvk=u6!5Bncfedp{q{H3@8y7W>0UXq+>*7wAu48 z6MMDt7)-KGg>{=&|g9Bm+`A`*a?CvHy{PXHA3-JLuVC^;KG6jHfHHt3>hvPpI)y z=r_R!$lWqhj3nGrAJui%5uUWEaMN1Q#r>(sNGFb76dRaL z4g-p-m;JAMa@aZ!A9612L{IOatYGw}5^XiUAJ8I{%VD(cEa#k`W2@)I1HbaCDHSLU zsj1$Av6rS_%f=AbC9Vg;%}G04QCvEP=ZCbcV|D(amO*U2kD*U~Cq3~oFZd_Njk1<1B;NcrO#0Lr(^2&EB{tf;-#55OwbanWjA&5R3XON3X@T$-ow?GJtyQM zBW;Y;G#wPFXs=*M*_bWf5)-p(z{ke+i{Hs>e)A<6Oe@zsIMpnyYsK0}+|6)O)ZUAI5DMzSMIMeId9J2I!#{bHU!& zHgX4TZ)D!Ev{#+{)eGdDSZ5R^@JCXygckEYpq zwuB-wJIR@Cjh>aiCX^RbA>Hd(BiNozl_Hmk9ny|JyPq0GDOdx@w{@Tjc{It~|}%KloO4tv0=P zz?DVXrRLA>^X(>GT^#J%t?9*hH5$wBtk75ERbINit4JZY>sNYnt^QN3c~oYid%%5=^AjYO7U7Hv(i(T4oKAcQxVrg{!> z^L|OaL_B3>$5}6?lU7QNu{v9rW8IURSdS*&d__3y-j=eVHx~E&3OU7mU#!Sf``x)h zdRvYY_})A0+ylMX^$3^0w1>JjR=wZWV=)`~N;0)*9o_f>inPfZ_A#q=T>q3m?B;4= zvSE{?W#DB*+{07^f&XpgbSDwSeUrCH*ws#_RxynPdz;jJAr*E{Bv?gMdj`XM!pBc* zBY&kMbD~64rZz3yuwH~mgLI~Ig48!nG-DMNX|^VQyM%L|WmD4^X>LazI3g{%vr5nB zdC=AtGh}h%;bSl+Bte#E5ocenv~csJeJ&MGqUCL~GH4)@BgQv_Lj+IkwP98SbrID* zg|&izRd^%?hO+b|?}k3(QWpDAdWaS_nOB0^R6da9ZATJYkylH|S7eitZHE_c+axB$ zmU!8RMJm=s;$UD2mxIs5befVx7dM0k_dXO>S8}L`Yr<#~_ML`HBKtm~XF;b%^JV zy2V<3=w%y~Blu@{IHzt$Dx^w2eL(U)IDOaQh_?Xlww*VDM&rp2CN>$4W?I87K5z&lSlsO1o zWM@pe=>K>d0!H`*OKCB1`}LJBm~l#0bdJT6UAJ_kBx*~EW|7o(1LGu~6oyq&l9*+7 zX32`2NRk|Qam#{q-7y*ehH=vrL>Z-g(w8l?7l$~bJcFrLG$~#PhFSlJg^*20U_sE7eys#}*%1 zrT>%fNgZA&enk0uag<-fCrgMno#uj5Ikt5Lr9Ws#X5Y3>Xq8>-5nTM2iq*A=J84Wo z(xC=*dcd_E^-&nefFG8HIj(hFJi2`DO@ON$0peGuA(RM|1>XAB12kW_5R3WIU^B7p+!4 zniF5RjGQTJ8+CPNsc^Hzo=7x>boe`N;cIc%g1MKJ__IDjg<8VZhG;lpD|UD0 zM}xa`Kx#FpNw_~K8g||YScXPqQdS?hkfVLT0gE6Wry3#TwH<#1A|%6r{7E;`#s6Ar zXFiu_H~k1xNJpUvdXc;3Fk9JXO39zrCOsv(Sv-o0SJRZTrIafeVfE7_O1DBc)f&U9 z4~xfBsnQ20!5}7b2PQHI=L!JjnkquGptLBSh6pSzqL8%5MKyJsqiBo{hCGgmp$}Mn z4g(AWx^d=ZV%R5Zk5n(dMxy1SH4D@V1(;HL7C7y(Ad@yK!~{H7W;ZqUl{^%#gnKLma{XNqW=Y9h^DhMvW~%X zQ~)rpE21$$(jd2a4YsjpFms*rO0ZR$s4=B?`T$?uCWM3FrdQGjPrDfBTK}@9aIOq6 zs8ZVmZ7aEYxB{2_+JSupwiZZ0n&DSXmpbI<*5dB7Go0T6eTr&=O23xbx~9 zD|)jUISBnCnqj6K%26>Mk_xvlO>N2`o}i;0!agNeJh#NFM3YyL=nvE=Oexn8Y|6Qu z;HG5z80zX6iEy%na0GA)y4|T;POuRj0SX7e8XiMlG6MiHVOM(V2BV<|L%WT~qZL(g zAzgvAnFc?@fCVEnF#?yXMO#`036BXFOBH)dcelEFpa&B|A*edLiD3#7Q2z_BTMIear#|}* zEzkmvpaB~2v>42|*NYH+PymJzK39S-sNo6LunAevzPU*q2i(D?pu!`}B_j;H;(!Pv zTe%&aVTn)@;M);~VZ*GTyw9*5R-nR*fE5!H*gv}3Vo@m{xst?|=Dek*6v3$mOr zA#B(#*W(uHsHRUQ8Mav3b(Kb6Mz^H0R_*?2{$ai90I~t zkqTA80+sN|pKuiN%NoO=0o8!4oatnzXqlD~kyFf(tl6yn=l_mz(ZVt880M1>Fwy9KA_&mqfcM{`AEZDVDBmW@@(Fb8r3uC(y6a6iVLAK7| zhYS^gl@iFQK*^zCqyBu?#M{Rofy$Tb2yHq6jjR=VToSS{Y{kH;7%{zmBG;Up2msv+ zS8NEYa1wjXCU?vlm2DNsupE_e*!w&SFEPq(5C@!W4X{DggwPWq0SyIU#2=yuM|}vf znwv5Bs3~cwV%o7mlhC=bZCS&*2@DCQFtTQi5Q-7T+{Tg{lvh`b6K!w+?5wx?3=^i1 z(u2^<6Hv!H{0R!$F%$Ej4ARbX9lzU66r2zgIl3NXyiJ*K!UGgm4Ve-gVoEd+PGV*imsa0_f2*6EuFCF{k$Ea8M80e$eF z!`!oYbH!Ou0IzEkgwO^9VBV^*%`PhvjeXOEfYKh#-L$F-f)Cx2c# z(AFf4J`n~F?!jq&;R@`)1yKQnB%u|gSR2FGFngetZ?7K z8`Urj&_f%K9e9>VB&*|xAy%Yk`%8-ljsI6M90`4(&5jY`5?-=_8=aEMrv&xP15m&n z!qPJ^;wR4II?fyvG7ZSU!_xo?LpCB@kOj{?9M*6JVUQDpam+7{7!zOx8*CG&p~AyZ z62J@H4A2J>Z~!CwA-90d2$9R;EufavMUyDY z80jd22y1UtleY6P39~A@JO;?U)FVjXJ4z1+ya^JZ3qBpmB5nvsJj4w0ybi#wyZ{PM zTM3ZN3ljk3l)K-}+z|j^&e1#wGf)-N5Zlb2><~WHOC;rZEM7{8$R4kh{VZCf$ZQ#zM+ca(K* zI8&+Uzd{fO&T#hZiWm&A$-`g#gK)q1+kYuUiZ1G0U9Y#dr6ww}ZBoCq)9Sh4-LEd3 zv>Fcp*WPBaY_}g$Cn13%!~0lvod{S_Xvz}>ziM_msqfM%nWVyy%h5f7LEm$hL zpF(XU>~W{b&(Az@lsWKNs@5spX&XnnHDj`|VS_V#yO=YLh(V`iY?fF;&8uwfEPA(3 zmJ5)vn@%+^{!p0xNGf-K!8*JcS82;MiH^)paF zI$5Pie9_p4+y8pW`L>)$Z5cR7i$G!qA!Y0gHXDK03Fl9Q{U9k;(xMhwsgoopy~@X`eeGeR^M_hJr`lkA#`plTW46)!-~SW_YEO zYFTO9q{-d%p0o0>0vB+aNdLR$yo}GKpQeH?ElKuY_v#WqKIJ zXdIuN6sL}=%ucrHtVg|bqK5IpnxuNF;JH|fkP>WbYnPU|)R%A>)Y6H^{a21Ip=C6j zU74zMlK+{YVzukNoZ8Esv#BAvn~DXi+oZeU`og7BUgDO_kq9C=T37P+J7jmWYP;;J zjs8hyx7_t3&CWdGLyyt*CF&h;@@70IVi22E^ses6caTn>@Hen)aVlmrp70{Q*w#jK zdm(&m*V-$=Pp*4ksS4*=WU9AD+$F{}`JA(jxh4&cYWF_lEZp@LWLub#Q`T{tdao#G zs$knIp^Eih)~&~@#G`353oR($*V33AO(u{Zh@jitdj4n2B6Cej#zqodFHL9O_ zD#ttB=x=w>^Ov|PaBN-86R1i%j7|1-4YU7FFKt#izAF>BfRwLP3An2$Z8WD9tBccGQW+0P* z?sXeF6djionhXX{kC^$5kcc;!-bsu#xtLQHF;o|PVM{^VdDB@`^`BU^Niz%^U;n@S zqa%gc$ud|(72G}%q)9>&RO-UycowC{?oi8Ywb^0W7Kkk>Au4o}GY*@8_!C<3t3|$fGfm(oF%(Q$oeGIw-wQe!*N?DOEMI zD#~JZFd^qgImfs+DKU5fQ7FyE$4T1^Fl^H^2S*|{(5yN1nIHsM5x+;TGd<%{pi5BH z;z&(e7El=oL>D=W^+-qRCMaw3ppfk2lAL;mgZpZhafo#oJaVXS5scrLmj9~F3+^Xc zQ|qd0E`&~ZCI)r0TUJ5CN7A-=Y9yvB&Gq5}yNpV#tQ1n-l3=w-P6f(wer2mZS2WQ6 z^mBr|{EqRC7PUp0>{b?&*e~;Gw3nptK^4PUV$&ngV9umZ>DU)6zc~|uF)lGETN?f@ z=%2nC3wK6r7i!tK(vj-ZcH+q3V*NR~fbvI-_Usa>Ty$DFKIm{P9AtU!DNLCJHDSnn z$Ec9`xfe+$i!hpQ>uTjQNBxkQ%{!aOjC4YGs_T*%iWhI4=oXots7cZsBGX18L2=kJ zIJyd|L?v`93|h^TH@y#C*T+y$9ja;7#Nx~>8Jto<@v`_8qyu>zsisuo=)*oLFlRNONL^CuW2HLXmh&d&7EvRc*K}OcCEQ3sY9KbO+eP`!w5yqMlia|fF3m$6SSXK_sql4R~F@s`lxByYxJ`>4e{gwD+h~|j}nJgep zdX&h5;c7msO5%xcFPes-abYVVZX zTP&QG{3d(3;$|pBZ0L{`t!K|r*k~QscY5k=ZM=CHqgo`2xvIh^R#c>BRbT832550o z?z}5i>l$}y)NqwFZ)7E)iHMoC6BhT;*(+70plu`lK#kIx(rqoL{Aoufqjs2r$9z+p zW*Q1w9v?GYO{2`-fgjITY|$KJ&iPtkr7o^16<3`zlW4LouaqFxtZ<9YYK~_N$?^>3 zQrJi7K-O-DkGv_%Z!O*BCNxXV&YH;U>P8Z_TOQV}l={H<@v7tdq>)rFxjVfH3kEF9 z2IAuD0Ja+Tr0K{N)JgN+Db1>DF5$GDuv-0W=CntT$p6aR8e;9#z6=J6d%uQlv1{5c zdLTW(D|2T5N~9de0B?x*gON;So?k?P}J|M|_yUaTj3CmWj30%lVF`XAAOTdcq`(PkK{4GAz^^Z^>g1eyY(2 zgWk(M)hWGF~zp*b>TI5Zr|`4HbKs*KQb10r^IdLH##SmQOVPL5Vk#TF?~oU zXLnN~*49qBaz7O(VRht7GQ~z@#xUDwL&}vuY z1~v|3dc9G4>eqb55^5{wMgihmaI|yyQD}Au4s{4n1z}LR24%Nb7=M&)L~?~+15Cs7 zhNhqrJ4kQw77PlMErSwNFK#_E>s&(;!%@F8@%8tfpbUoGHI>0E&3H@ z{v>xG2xOcyZ^BbDLx*++sDBc2eXx}-uQerLm1rC&YGrl_0--S_H;fYT6^o#Us8S0_ zCrCN=X;pV!mUml`tZe6%$orjzvg;yF-Z~Hd33Sd}F6V9H(OLIFLrs zY+W&njTC92VvFxK!m9eHaITWr$IYvj}`a3186& z6G4=RK$MMelt+mOw>3)aVn*%;jyYG3kB5!|6HHtwh*^<@bcagu<6@mQ zUUrBUAtgaXGk}h^hWcZgUME>sG(C332-wI74SAT7fS7+N33mXT^*EIs6flohSyVw2 zSA`5n_l@=G=@(jd9LFi114EINmTlXQ+=oubcGb9 zC}hW2lQ20XWu%DTb|xaiodCcW*jSvx2><{}n87KW6402Mh;Fr+9OSSF4rvJ*AQi&q zoa;w>TjU1c8Jy7pp2(<7qp1i(5f|L~6+8h2l28pr$ShIBUxJ61(_~VMh%LiLo)Be2 zXL*v^cpi$@7WlV}mvmwfnEy_kM+rfh2t}$05U~h`xd_E6po!3=ln_dQ^$xs~2o69@ z01yB~5iq=<2pkZaSJ7~Hax>Hb0M$?pTrdOHV4`69lL-Tn0HCG-@TG}>4wG;I0PqIF zX`*I|qNk7r=ztE`$O6GALbxQIa%n0(u8cimC$YjghxtDjG0&DhY4EpTlV#jwL$2K&Oiknq$BM zd^!#knhFds0YfnZb}9+UY7L<(33eb66QHKl>28LGQ|3mFuGxyY5q6Z>LsZ0%WydFN zXHok2amK)BY&TdmxBpwV+OCrjpadGP4*`rO@>k$B2w5Nru8;)>fTENDrinnRC!h#6 z@D~xG2tk<(QbGyUPysVA1Ac)JD1Z&#ITZ(>reHb&L%FPrV645G2!t95D7t0|xOSkZ zh)*|_CDu1o*M2oJC^)o44rqQ@_)I6uEP+!DwD6L}atzJdtnI3*P}-!YTB?fR7t7~; zTpFidTBjcytpMNxiJ+z&>lJ;FtSs;Vj~Xy{P^TV9U0NkQWAeJ~T*5$T3Mq2w1wYfW~8F ztmU_6_J6K2K`2*-0hvEM8>uQmlzNZ?lv@c)kQYRYm{9tpoBO23nFvVBPl`8fwF|6@ z-~gd1oE(s$TmZ1xn6DWNw9-bM0RylAfTtWV6n1J3eo>y2aJz52y0f~vEMTV#+jPvh zSOW!sG#C zi-4jm@D*+l#mv|v+}4Jmvo(zsqd_Ask)?6|=ZXdO!_p>9$~ZM-24%j2j6jx@)k(R1 z(3r!yxkW3OMk}l>Q%0v&8d|Ferdzd%(7zI)2moBC*f;^#@IbY@KxC{4EHDFn5h9;( zQSMr&R}8RRa1)^_vI}$&WznL2JBQc7BoI_KYn*-@<$(l}h2j=*#P=?U2bLwgbE6kn zId=;F`N!26pv-K@PKv{Yqf0pISy}!>h-bl?kmW(znh=5`j+yf?R2V@a z^;Sfu35c+}k{<^Gh3&dZ{ zyJ`Ee{22**kgTzXm5+-%T7w<37g*$0t@}`bo)$yzBv$oDKMc)O?;?Ff=WPPE89*Sw zUg4Xi{H}xPkYDLiWn5BBOUaU8y4W0=cKWcTTpouG;~+9ou`qWmUAJ;IuDkl4LOukUArUJno6A{lhsyE@pt%b zSSt6SG2upR?ODs>nXsxdlt4Bg3s0XUUk_w%dh0v2@Hz4MsXMKl>{l?7P5)~ptgw(M zDq2=*PV-nBBH>c42tcr*B!;($Wa5={mz;Ak+k!eShJx<1aotN2;B=WWP8;B-WF2<| zVw|W1-fBL`*Y#<5!KZ<+=YQ2IU}q8^nbO&CF6K8@(*m(Gvs|-|F%(Gt2-O{zzvEYI zwib$`h6Tx7B%E?tg?3a_PqhV5_`-x2-f}>UP>g6)oWIaC(c9{8f3msVh2HEcc+~MMPpSmbQSTa@gTRbaz;Lh7H{Qoa~LW_Rhs3(4D zIy6%CR@gs8anE9f>kx`KS>pwid$WXaYgl9J+GGSG<67r6q;BHTHXOF7IH$vm_J}a} zmPWJmt$$TsO^33D9ggW%Xkv8nZndb<+!kiMW<|w9!x&*%4i>tkBf7_8HJy%}4c>(j zbVt{k;WY8}2w<2_TN$ZT^Eko;C`B#ni>|pc5brDeX6}-DjJPG|g!?GV<#=`B;yS8g zqS;R04nTVxmrm(aaA|nI70liTcSLqMTHb>yx~Osan3@jtK814hM4G$wBi#jfjYYkq zSre^R{s}_HtvE%?5H>wwqSFP zt>3;Sdjf{=@p%t)9xlz^Vumrj@6^V3b&LMZ+a)wY8Mz)|VUby2g1c zJ`n_yCH9Scql`h0a81ZWH7IbuZlB(Bnd`|`P&hqJjy^tJs8x?gZJ9qrUgFeMGr0!! z-tOXfX6L;OTMch#3MiHhD(QN6&&Q)E__bOJu6aT2=DCerGdSetKNIxqmeCFfLV^Hp zZi0t|g@cBNV~RqFZUBgdlaQ8^0G5NCLWN^vnTL?0q?`btg^#70qnnqAo2{&_gNdM} zo|UksySb0DovLoQwYi7D#mJJ7tFyVUi;Sbntck9lWB;+orlgv~l*HPltFV}z(5UFV zwwll3mcYi@k)pQa-n_Dzsj9!z*cf4YB`%gbXyv#`g9ZxUAcw{lB4Xw17@=#nxE;i} zuA``L7^8`7rs`R=bhJ)l+gIz|x`dGM9i%udQpHmoGnI73Q6amYv;s;*m&jgAga9o9 zEvhi&Oqx73GK(kh(nvwh6vEsDGb2f#6Lm=n7F6R=rPWYD>_{)H%WP}A$&J{F>?yS4 zN+z5|HK<9fzP^Q0YRa8UZe8V49b5KmRJTBNd7M|%CEuMhbyn4PZW~~-eSI3W8da7* zo?AtpGo5(!+^mh~zRL7XoTEtApaJ$;JM818cK=VhzO)l@MWw0AzUb=iS!H*WaFk<5`s z*lbs}1{gr6nUiHZ(*$PM zjm{K!mqB+DnbAAiDGB3MMB>@|alO!E=>%rKHttRa-;iN*W-A62>}|BC{@YrhdO!Bi*gaRZ^UWB!20n zaSe^jpiqYvO43-{UdbhGtr9z_WBR-_t7ZZr$Z5XsL<8Y_TAi59yULX}*J~P!b6Uku zMd@BYv2}>85dCnsrakUdu?NuWqs|W2|** zbh|uvh!W}7?mzrd;EY*Ux-%M*GB zpNVnBSjk3lfS#AjaM;qC4kxdymm0Wv3Whc$#;g7%md9Env}2_Sdc76A^$w*J$z5VS zEMIb>er)0?X~CFqWE#5HeYhA(UUQzOP1}}v+)Z#%*JvtjW2)xS zh^uw=yHC|qnDIz%*nX&JBNI~~^&v;;=Oo3znA&RNCGv2nEnjlg&V=MZi*=_SWdjJg zKIRRAeW-Y;8x7l*Wx<0DYyWjX>KvN(J#%0V@e8uKN%6p}gb56k+doA8Sn?X`-rw@yI}f1el`K zC6e2SW?qEY4s~b)5>Oe4W`~(qVyS=3O{fS5iPak7KgqEa!9MHi#QP!mv?QlV~QEPybqt`x+MtU_s#s->_Z_TLOg{06-6k92nUaF_-{WPLaT@Tr6X@ z9c+&AdM~r(&&s*L2+AXf0o29VwC9$Cy3$4g9S^XWNQ|+-lR6hInRH0Ct10GhMfua; zb~-saZ+ZiM5^yJRsKWsORKOh$;Lzh{N)&fo6iu0t<^IeAI&UddG%L|oOL%uDc`2_E z69Xu+EQrZp4z+;sg40nACPFbTQ%8q_W;CVgR6?PTtI^Q}2U@B_TTxP)@|+}TW;(@v zvJ?Okcuyai2Y@|LXK!?MAB!AF*1Vd?PuaA1ZI!F& z)C4O_3)b7}RuguZYiS|jPT!&yni(Cy0C>AsccOr~lRUz0IRJ&13U|5Ct>ItaHN2GN zYm5=9+9RK3STTN4SUp^1fr!>C(LQxe59{nt9wj!?x+4W~uq$-p3^4F--w6QJO2EGnKCWH)OPk{YK)8kp19hIe2P^D13=*)y zONpZg8yF@9?i?j`9^7G!&UUNO?5k?^x*>js?Qd2+q>G075C$2FK8o9!^cqK_S_%t{ z`drrRI{(=gULCY{0xWGGKnzwpG*}aHwTg!U(9#(!7y!>5vnDe7Vd|K9k|{Q;J2?=C zGt2h4VV6%-vIsnpWlO-UV6RfU9 z_0^DFhw~z1uvW#wAynb)PdzJ|6V48IBb8AV-!j+>{g%smNU$cXtyb!o)x{8w!xcaR zR{uTB)xg!(F9!(jX0yV1l3U)dVcaQ!6dQ)nAeO|$cPnW#THG*_MtDpk71tT0002AK zhz+*t;Tbs6CwgJXRM4#thZ;vp--HNUqIjx{oX?4&geJ4ErAd0Q8vld? zl3*p9!)`B0=)s+dF0GDjI?(}g^oxzxdTnjl;P)&hNy1$MP;0I+DGP;zo-T+(EJ z45w8zc3c8RU1-)$c~@aM-N5KdlBS9+f9`zdqr;L~p zQe*Qhyl6)Z2Q5rvi}1G^P*jT~HG11tMxVhMjkbc?%U6SqNw;;k7Kb#HGqQqjfsqcXhCbx>T~l&YpBIX*^pBg@BdB*qMpGK80RM0B$b$=#3jH)e zXmWE9sgUy(ZXZ!!I3XuT^hJ~;T$^+XCOK}%)IgwPUSSfBPbO5Zktqe^OkT2hn$|`; zbP1HVSC{2zgJUle^lGJ`!~)?ZGybxFR*^T`Or4jwq3h0+5T8ANYol zG!iXy8I(llL{!uh_o6wrqZ4xiZ{t^zePbD)35}ApG4v%*XGt>R(wRtwC6C~9)HE{c zCOGESk+Px+^<*%h@<;jjP*dhLGNUE$CshgLmiQz`5Aj3RqW_xIR7&+SNRW9m4gorA z1W?nFB#(k*ugRLzv?U5-4epjg1G$jc5G(QKRt*`6An~2X5)3`bBuL>a1e9+W(neJ& zHbnT836oAuhZ5a{O+!O&S;QsX<#~4bL0+YA3u7sv79#rznv(P*$>S)P7ZIs6ppUqq zPL&Msqj~WHKn0m1Mn{-QCYHSeYPcCnPsyN0H6AkOWO>Abl%!PcC7FPzo0}*uStDu$D*u{JCL_tIrc9$ZcKIX%F*Mc5 zEbu2pM*^I1xuWPvOGE~z-z8Fh(JG;IH#0&%CbTt66qeZtrZ#d5OqC@dL6;;YFmp+u zMJl932$=HqJ_J=HPtzIU(r?FuB8xG8?3b9FDn^x%R7(+%q{Be8$u`evpHiV}IwL5G zSZVdrj|`I}9b!wKsz?=cX$0vhl2o3eCy7i|i01ZPi(@jD2psIegFnc9^SCg!xlejx z5a5ABR#&6{f<-IIONi+vxrrhgVSd%(lzqx9)|sTbb9lVtr}MNh%hVd&QZB(El7R(@ zb(s$g)K%liRz;YbcC!~s)}1AZb6**zSJG4_)BlZ#mz+n3Of52%`qn+2ibUX)Id$4c z3c{PoqArxWupu!_dl^EGDl%dMMk(5xO8TgdA{;48RF>M5^%^;+8k-_2Iqnyy5ZWo< zF-tDHoj8cGcDHckd zVyinQhptOoiFP@rpC?qgm@$^owFL=C$k8tMW4h}}mm?B01A(Q8m2{x;2qYI-+E|D?Ic=qB(IjuP~fJ0=U)0oyAG3R%fiY%c$}JG33J{=7p)JQg66% z50xu5d!tMSd?P}F8R;^tPWzNo(f_hOB|{lwuD07*jTyYI`ou|9J|Fct;6%aMWJw6r zA8`4ciJ~5SBgKnxrzr!$3#Btgo)(!_*{FcYIunmj1X3B>hQy5Ux%hq#U^VkM4L!WZ_K-kF6Y-v3xx}P8U55tK> z>s84glfk2wKGAH(;K3?=>^Prwv~^N6z>yYPahS6l96Iy1&05ny7dHYlA*JcUWa1y_+~PBiAj@`hnxf{vp~pS+!#8RfLN7VM$?VsQ!qIqYR9ADG?Ln-R7?_@tw(tm4 z``M!CmcethzM%Q6+-lkN63EWtJf6FL==GvC`^ZdNY0BaV{cAMAG>H^V5(sOQCu%Oo zBva-;XFr^9Jr z!!c^xpg2L(oPE1~w2eSL&Z~J|fR{l88Ao9fu1?!&Fe*pdTheSCQEGEN6HojRkSeDs!@O_&?2eLff@7rasj^fYd?ZYq7X80paJ8oviwJp~oR z0p)(V3agD9lM!tqw_HijVQ)v~oKcu;(dlsYDA59ifRN5>?O-n-Q&MAG+0N!u^iI-Xg zCm#LXa!u0ebpLPfUBUEgoIPThb8D;^bjr>Q*Rn%c%=@j2+TiJS*aSp9*p1tn2Bqjt+W)W*W@;TOghcXOKk4c zHhH-MQiD}%k-6xCfI39PbaR5G8U!8I$aD%{F1;UpZk{yQI;g74Slwl+&CM&V41CSq z>@0n9t9$xxU<;_D^@*gY7jK6O-hM>`U0&8gR|K^_r!CtHjM2eZAT=41JNV|BLRCSb z&tF|rTI=9Xn`Bk~K6%+Y@)lJmitUqepDxLB+O4v6t<*6B+nPxV3JY``uwb?l$aiIN zp%AI!aQ_D=paBLcZvMiWk~)JZSuKV|g_(hlw^_T(kZ~4g5e$F=tOTUCNeTMKZf_?6 z8$Usj>cMYn#Gbe-be_PX*-40NnzdOsO##2!E}qF4({f71sHfP8j~d~PrRHo(697zbQn2ZBLfZzGs`kQbxg>q0R0w*@_QBi~;6*bHm8 zlWkx|?sNbLPyv#_00*E60{3AE z=l|_O5C=^#1rIRy0FVU((6u|Q5rA>Q_u$V7YYv01`p5%iI0TRUjoht2nTy^V{UE$LWzk0 zZdp)=g(xg3hKB%`0EHX{lzmMFgqLGuiinFOorPbMjo*cDVjJBZQUW8y(IAS1q@Xk- z9o=IiHW)Qvba%_>?hbW;G%6u!Jb*|jDk@S2Ki<##Puy{R&$&*j4Fw@SC0VOhE54vD z6Xz*c7P%WDGwn?Q+xt21rQs+&Q?GghX+amre9J@BUL=>-4bd0(R)EIE;IUO zZ5)HPv7+jGcOslC3@I$oVHE=v6c@GK-n$MPqm&kmdr z3j!nw28J(QW51%Gp=xO8VL17&YijnYbRh_etwy6bI!mz#su>Du8+X?<6%zBRX&jM@ z619_9BE~}JMZ$T;vnw;033%sRJ5YtzO=fQGN#B`EJPcY_L|V^s9Mn%WAP zVDu%s)!Pv&3d~zg1Sq~AY@?b7o3KJ)7g)BSJ*i8N9j+E+S5zY+xGIFNH-O&88yr}s zyVmdp!;^Q8M4jcJ6XQZQsUs@zgueMs2DJ8;^{fyMVQqQ$K~MCkC>=sHUs8X3S2vIj zHA1BkhSHp-fan9`ybW1!eW9_DJrb|H{`_-_BBQKvesjXk3>)T;7-gtYK*GZaE3$c4 zt5n}QO(^m<({4iRFl3$%-9JJe%dF_6L&j*t98Y1dQK2WmA3aG4I2IwkOz5 zGJ`5_N$*(Dv(XJ?iJBURv!le~xeg3@VQB?C5D39ykt{=+1xy<&XS6Blf)M(VZlu$F z4T_%IiRU+-`T;zATSH1`;&%|HZpi;T28wmLQ`ICZa*@X%HtHzvml19gV38ZnK2aqh zvZX>RcDomvDwjvX1igN-&C{&1Xm!6(#=qvQ*>NnQhK!Jb5tEO{F@kry7QzY0Qne{(};bN96qA`_2f=GYR}OVHeu0OMJ&y z3Hal;y|2rWwIk*v?F}g7XDFHIcn;CoXNFvWRj(B97Bp$#10~zi&OCfH}XoH~)LPXZWm9hfg+Ex{z$xN-#9+Vg%3QT`b=jZ6)mVWD*_tD4UHcLaRa%Ox2!x}_r0~SYvBARD&k?8sX zR%*Bbe^&TnSAK9)Crm8W{<)+~lkLQK?ni64puMJI#oi{f3|%Thxz&ixMAHqhck8H* zy}4^8b|TNIKz~|v6o}&{xpHFX_m~r{n19nDfFdsf+5Mg5J8z@4mFhJFOi1iyXFULM z{(~^PtPOK-w!WDxf!=)+SYfXu_e1c|5ISjiO-=+9I%4@pKj7JToI9Iy7)5HF;`H

    bXp_-xKMEELre1F%l2r)_Y{2l*C%A%7kJVrt3=#)Dz!FB^Hh}; zo__xNvB@T`=~guRQ|A%$dn))YXy#eC4Ph;8&}iZ@hnt~TvDE+c$FHwm(1=YbR{ya5 z!tL<`Vjq^(;~iv;`;_}F_=Qr1XBislwA7C2>VMkk+a+riXo?;9F38T=;(3spiVL%t zP^Tm)NbKsw$53!6OOAmV5crFE$tIsWBEq<%`$8ODn{=An0{m<_@_+K&M5HR)*e0$u z(Ey-*;TqnnAQyk^=ew!)$>)<*dpxk3C;E*>;6U3Jd%hm0b7aEx_CM>kJpS)Z%c78r zSDxi`r(y)spMRx3@=fp}3f4$M#5P5B-wPtMQ3JN4fUUwjX0!%|1; zwaL;g?s`fk3QN<`Jfipv4^nvk4*TQvPpD_Wz$f>we}_iB2y5j&th6fBXH!#79;||IPAe)u+bJz8w=gk`P}i{~(W%pTo)jaIUz2zF6_d z{YbLVsa1cJv319{1K4EoScLJVV~_26x0dDUmHWR}%AYy-;jS?@*m5u+^b<$K2F&%m z#us6Yc}d~wChszMWK5^xKd9@`jgR;bj|KWI%@zafg8UCSIW^2teEMO21U4RbJ(rq@ zHgW3)u(}b4sA2CNA8UuldcjK~>au&ccM#fEBEEPCnuhZM9Vql|kGeG$^A3n;2Lf%C zqwjiPoR$^5C?j0&`FvhMy#QNpfN&)VNZt^JJ5vG3Ed`E!e~;Q&X_h#eRRT%w!JJ3J zk_bxdRGm{ij&@CvP0L@a?*U<1H?+X_JC6qiHBQE zB8mEnUX%C$Y|vA)%Yc*wM;@!B(HLo<&*w`}(z{>A*M$gXlTkwYI>T5iH+J>1-mn1= zE5^DQN6Mt!TF#-kV7rgrJ}S=p<3Z3E4uzGJU&`@Z3C`vgil=?PYaBu4Q{g_G>7PNu zHFZ+|cFA&XEzRge;gw5>Yk!ihe!L6jhJ*ece=?jL zmR{4-zq=sXPuA-`B))~qO`<;jjbbvn9@V$HG?o*Og1+`4-OUDDIM|RdC6Ikm@mGs`J(BOmspOD;F;%)+a84XimOJ;Z&G{HWebDJwD zfD_*%Ze72dKIDl!+)oe_k3!T4n`vi#tixRIO|f2&2|CAG>c{tND>$B{#!VJ-TNk`_ zb?oRM^u?rE{R(u?Q_)_*zSE8R+Z#zdiTksbxG0u!+*eTBf9G6WEZ#F?hb^?|93QlY z|EQbV2aPMNk4aaK30h5G(v2=H%K6lnHsFO8is5?I&-pda$L!3-?f_lOaU-HInEgD= zU801YD%&#`MP19*1PgLjk;a4uWvYbRSv*#di2g8;niHLjuzn~YEcEJthRf`RR6$rd zJTfr!vIOgyeUxOJcBTl_3Yo)$eWCIA_>6~z1uNPiNZs_d+HmbXg1a7}huUd5k)Z=? zC1;b#)esq(;2>F0NM>89&Kck7p6Q8xtSqOXG3_^3>(@G!9{^7;G>8@JCyosy{|!wY zYDnjwj7!?WyC>@0Su2l{jaXdEa`q~DU#|-}cliiH>Vu25Hl6Pp6phyxUlwt|cS@Yr z)gP}t1}8JvQJ1pq>Kbr{kx5nx3j0V~qx!bP&p!q&{6ywsi7)q&>;ctQ$B)=;gf~Je z{;EDO%tv*uqA3#mSyefu&(%j{YsG1*Rp4=MUP5m$k^fa;w2J(9peedb^|kenCb%v$ z|4?V?nI~`UBH5B@q6^BAL#z#Rl*E>{IX3_kYe&#Z&7>plV#!ZJ-ZvVAyEAPhAs87NB2% z<^|Uz+)BHp5nmosA8~iBXWavLJ-OrgTEMHuD!-WqYe*&i zK+_+p@YVD3q|iE$=Y!BU=?GKTGNn%Yrc$rdwl7ufbO+cS4?dwine4dnH6}z4Y?mqK^W~si8K&9x{ zObwRaVe|c5z3#g9v-R~JC65kjxE9;)(K`0b79tCp()tE+iZbr?4LGY^N^PtoT_p$D za8ESP8XtRDyR9}#?YI0qFW#+7^S3EFVkQ03^^WIi^ZFGwu|K#tShHZ==vSE01Yn91 z>KqiREZ_8g?H(3Z>6@IUUr|iGcbXW>C(tulW zQ0-8|Umxo+J6EYUs5{__!EKq(zq25No=Bdc&csOLVFfAsHYduFH%YpOdHFru*#eSL zUJ3WDPNgA}Eo|Zhr9vs;;TRrl?b4=?t5Z(aK^`4@q@7+pHYDR*YWP1~>aF~S+uOR# zz3mP(x|_Wb-86bmUCo2HvYbGU#9Q9l8`+e}jyF!96Yh7;uU7->mx*AnX9o7_?|sS> z>S^0U!l`IEgD#NI!QMMmQ%lyJ|NYKrPx9wln*#F$&u})`^h^!trRWxwOylz5=lxVg z9(W@q&cyII`%ur6t1r_CNo$8Qh!i;C#Z7P>Z+||OML@VG-X%B%p6BLYeXsIT2op+8M-%%J!_th)Hu8Qr%~t`^Wv?1pU-Mp-z0TEO5XkHW~>c;@q86y zr#)J$JfVOc{(GC@LzrTTV6wnTJDdVwM_m0X_7cdc+uf1=tCw9DlP!0k_1BqED+kf@ zyK&}UvM;l(CV}Dc5SC^_HPRfOS!e1YMnl7YujbR;D;!2N5iUFA!o1DU4X4f#CHws| z-~#IAu+ORdyD|Gs0-nG0{C-gE1r5FvawVjV2)btfy*&kWtvUc`01=8x$>{*52R0Gy zL>BC?xQ~;S=Ri^uYx4=p;=nWhCy%A2UmA;Lm4G!>tvHPvwU{RxK zAQ!*lzr^)i4iM)k_+u*GS3@RYvan)4z(>vBpQeaof~d1CyuRIHX5`7ogV(*^r->W5 z=h?4TS<~O|i?7r&zUn5xK$TjrFXo}oJFjIh2{bHU`tl(__m;u*7Ft{ZHy`tZuT-h# znR6o$>Nt`nqujD^ze4Igk24ZuDqcG!2w=yP)5(Iv+WE?VxVM@BtT0JdcTiVXHYUL^ zb$)9jmGi>~0K5gpFC{K;bQ;65OBKf^b%mr zWEVj5g9Bvp(pTb85Hb|dN&@^dZ%Se==d)K}>se382dHp8JTgv#02}1q3k$Hhy!^ae zd?u)T^Tr)hm=irg^n*lohiom`@TUuQjdkb8@%&BhTI22u&If-#A^>;FNhu)wbN`>4Fh8=6c+YnE7z$p9z3z&T1l`U)Qcn$iI1 zR8EqrQqT&vIrtsG;K`c^&X`P^|2kOC+gYku1ju&)N){9LO@M6+Ai1$i12i8UE$)oG z4?|E$z)Z;lWH1OmdI3Hbjdjpn;0jgR2rUz(d-hGOsZF02<nD~-cF}4So2feKEAqTE>|(}A)BX8_!N}Xm_(~?f-F}~p#5G!ZF7vil9cMeS z*9_D*$r?WTKuPzQYZ?4Dt^eI)lV*a@wL^Lz^Q%6Ptc{V39}{)lCSUBPCCfclaZ#}$ zadXJB+Moa|0Lp5G_`|3&Wnigz9?2b`+R~Y9fs#*nKC8_NhQE9=ttQMlFGR9<}owlf31uMF#?pV$4z*i z6bm@lDBIDKOTGfsy;ci5zfDU0OYj{=DtUWzj1nsb>EtJm^`k&W_}Q?aWk3@CVE`Q~ z9nz0QtFjWPszxGfv}ziG*4pYO7nka`_R9L&mcFJso ztJhuol^6v1fe??FMK%9s+oH|Qmt+Ba!1IpE^w{`kwar9nm}aG77=*Y+`@xE_0M-E4 zFj!{6vj?2MU-!(JF}BQ+2UHMNm_pg5c^v3-k9RQ4O{!v@MJah%%4#?9?kDxQNZ#^Xb2jL>XvpTuog=paT_}X0Hd>(!r4CxJN2627CE;Ty-m_V~EvP3K4lr zN)5N|EDuM458>=(1lBx4HJqJej8}U4ImZiYJV#eH>Pb<|zCUTXl3yX_@k)Jp!m>M` z>B<`L_)Y{g&d_?R1EIs=hl*Yds}i&YXdk(cud|`q1RsJz`H7Dy0Jk2?K_Z<{dRf+1KH?L)Y<3&n-5T0WYpl!A2kg zWg2~lUeW+9Uk&r&<6PdUA!A#AEYVgxLp~p0f|`(wrW9BnKzOYiw%<^J3$6lP& z`K8Q^GMJO-R27J0-}(5wJDMkge1_j*xD**HrXB1s|*&^Ns|+2~zfJ>A-I$TSnq(`Nt#Dm%St zTAR626+7%O{vi*PdY0KL3?_AGa7=lItkMu}qTEa+qW7}hzB*8Zv+tE_o(vzY2;V|F z`6SzVaw~z??5|%#mr8 z%JD3{gy$$*J6g@mMu$A>C&c4AB9pCO&^^0o*7d5qxd!*wbm&Z=uqQF)v?BmURP_GI z5oMR43-EBWjQ4~IU$*&@=lvEH=S%h)^)i?!+0i{Rdtxpb$_YS+jHw#dXULFEgRB~W zQ{p_K+sGfiRsYg@WhCxfQsrGA^K5YEJOLt|ND*2@&UTF}y%ajhcFLN1C*54cOcN|% zRVX5?k|oC>xUIXcKTlbQQj>BgOBoCtd2pO{S-q23E&uR7n@{QP;N!eDlX|N18<_9V zvodgw@qKphQDx{V*%z+~&GaCZ${DpefW~m((HISJNNi0+@$;uxex!{?1XY&}`{Eez z+?fKJ8ck-UN>1avCvBmR0EY>#Kut(3j>jWly7e)=x5NaKROdqWOiM|Y-213hEedC} zj-oO%OMnb3G*d{QjMTyPE0>iW*B1|PNJ>rZ0#jFer+MZN{_b9|`anOxwz%-y8+$L!_m|tk z>80-4IT?&k#7q3K5PhG*1HuJS<)Myrnik-GS8RTehet%`- zJFDJA5!v-`i3%pIv0X*4UH`CoKCN&0=JD+6wpe1Qz81s*$XwlUe<-EqDbUf(CFM%? zI7z%}Txkq(uHgu3QGD@GptF7dA>fvB?xxJM*F`TM&%xk&{A;i_ z6%45*&0aLh`F}5Ks}Yu3pcxsx8~~s|&`8MLHNr2ptg`={Fzy*XJwoN9-DAuFzo(lo zj0PJCpDq16)FC1w-rD5e7nlkS?e&S==QmzLSCtXlrQe<~O?>%_8nPh0&wo&Rm=GuF6WQ@?3Pxy58)GC+#k=1aZsOXym4v7=LGG65BHJe()_i} zFu0dNeVJ$f+?lK#p=XC}PIaRC9-C{~g<6(=QB8(zE_}T2^`8p^o>ae^#lhhpjUTK% zuk#(3`_V*tC6?Tz@br2O-rFlb{pPb=83mUF)^4?&Z{ZEpJpQ6Dx_M{W8?V1VuGTu2 z_gg!gFJ_RcG`!J%RcAAgS!4VYJBE{}jBYcNA2E5?A9r{oUhJCXa5Zg} z$-%b!rmZmR17)h?J<%V;k1G7v;1NH2*TejRyV{y!UwZ$YTit&^oh4pB?-jBs>n!KLMju`Yt3Af&2XB5KoyH+;Qc^OTAgZ^D)@8aF9(`dDI-h`!)&10XN_C=asPd5t3ZCVB} zA!Sc{|Kt8-`Up34kd8ErWn4@3Rw7z|;%V(18jDS6p12$8^S-z9NBYYEqTE9-vvih| zjS1tw`v3O7=NkV(zS9*NnoI~)=k%1JQdeFJ7xMJW%(iQ_CM@^ajsA+%$q%?467y8h z@IO$jx_rFr$=%^oyDzbNcH!ls zyt9IM^E?A@&yfFqIlG)-a(j5j#r^9|2|B02b=D~aYoFRac|Ukkvl0P$5^ACnT;QtS z?}=_*icD}v_3m9J5n}ANPa^AT!%X&ZOx9kHH0{2M(FHCWh$N;RK!OSrl2)Op#C^xq zDGNev((;;1_!&|<-{Vca<(z29dVVYc61|n@yXWo?pG-D~rfGQKnm8U6c;b%qf-Lp| zg)wxq*6{aA_$dhCZeql*)2LUTNfZ-tOJ`9dlWEdLp=7-ow*C0xN$+!OWY?Zu{HT+W zWkBQcBWCKTPhdFW`~7*W`tJq9?<>)LYwEP?9x6(nUqiBJB>XK@J*q=+cO;U8&*HIr z38Ww#uLO>dIy2qV)#$)bIsr|=9hfPWuLgZIg2m8;54vfM=##p$J9Uclrw> zPs0mp3kvcJXsCS?R}k&j^YqN}7vR1h0$no|ONdyWu#X`fz0n)}i9h198TO$$29aQu z^qo@_5*zmjTa-&3wz-xhGJvY`gb!I41W2T%c?GoFNLx-vwbT;&)&iozY|(N7&*(2)3R%>9zuUMn#6C=Z`X?c%m)VODUN z{awtDRu9L(QS`Ykq(0Zhav(u$EzwTY(G--ho)^Sz7<#6no6u0kbsiQjUe$i)(vg&P zBQ(1-G?Ozi&-jzLk9EkW{l`l^(cS0OAHY86Si^^bQ3IAS{C!vTI8TG0exGct?rK-N z6np(w@3qJzAu|=`1P!>qmt14usaKTvki(Guhco&jzx3q4hCa@GuJ<9NSva z>{QOX-G)E|zH7>5hKYXe-dHwxL&Sb~LO|}aj_Y&7$lg`AvB}DiDI7nQcU<9RWtK;I zYf`}dzMQPH($Su5cB(?FzGNNq(%xV_N z4my!VwL-I9E30@>P#arl!w7rGad62EnkF4XEahIbV!ek+l4EpY^Ub;%=t@1@#*qA$ z*Gym09K^NiRziM%XYN^9h*#}qW0fs??I)F>3T%k&8-Jy+;<)Yjpx4>1RSd>&L?GC7X7)Sy*s7lUU{Ts;$cDN0Z+ZVa-m8{E5~o% zGwTAzgYKx5SovFJUEHxH`AsCnT8TFaF{|B1624Xk{^ubuPb$1zSDv_)r_%_MEsbaXa^ zqrw6$aT(IMmNnX}kjB_Ta5m3*mm*ir@#j|TL6b61iNds9P}5~%XJ}_^o$vR)hSsqd&)(Nj0KQ4WoHeb__|T zdcxF+-Er;2~j!?%)zhukW{B7)8^F)HqjMHz#f zhHjQM!QLYs)QLTzSMK~Pq!m*!LmU25b5pWJ}uw53}q*w-HChNIaBeNh3>5ZX%%+#I17g6=-)One*vi8q(- zVz?1I@<652Ra{Zqt`+(@>5psEklUjKORptP{O3ZNh~=lxdb=}=9Bq|dZhZ)j9EurQ zg@?{Yt9ws4?AziLtLXDxe>>F+zg1|}o;cBoV)W90m%RM9H+O#i*vtLZHy^tX^-1#~ z#WZ=gyz90{zYv=wzO6?24*i9wutKQBGcrCBcLQ#wC^)N$!Qu9VS%`DvsTx}eISZa# zg2_yoR{aazQZ;8;pwhxhj}?X|l6%$HL04=Wi|696G&BQlfjX|wG@i`nPN(3Ue>d3A z5rsVY8uB<3OzFRRazXR(>t)-GIcVin>{as-aO@F7#It%uUthbcisbW(#9sYONBLF zN3#qX(#)fwSqO?VdSALLmS@865Pn(mgvzOb3F0SsZJMm7UPGuB1_G{a0-8e4x*(rk zkOMNBtjs(|e`&Hv_Z?ih7k?xsL`!9_JR6jbG;P4kOfCFd-x{-=l~O@Wn$j0%f>=>p zH`%Q2%2JS#wmSUYw`STuWCgQ~kmHVauTddbci|AHZOa$Yw4)!Uql3QdxYr-K{dNQK zF@gC|l+q~B=7}{DBqn2d&6w#rtuqDEQCQ|Xn;e=i8GTnJ_cq@=m^~AjmdUxZO}-9m ztrk`Fzh-^=cJ4{TfzB@@R>AGo7Aw3T?>%QoTqGs0)n`Lzqm>wPazMNj#N}!u+XVRjp7Gy2tHAfoT5@g)02IS4 ztARH58kufIfW~sDh_EA95we2s%=8$(5eNEyGaeq?JrWc99fxAoFN#q1{ZHBbjg4A5qB&!9HJ<4r6=L-uA}acTPJQo~LcB z)N`6bbIH&bOmAHt#PryfWZFVKY0^-4Cr7B#HJD_88!S#z3?xv}h~}OKSD74Hg(6Uf zxP66Xt69PFB=SuK7Dg0Arj_lAx`Og}uXPFP!Kk44^JV;7=+PO5l|i$@!jF|MP_i0wwSrv+I)>P{f6HTB?A!wRjdi%lf_U)4kVMPZab?;efasY{(7di zu#^zlRn&JShr=^EhzR4=`}$_wnn~f|P5VuN^pDD9liUh;VBh}Gib7`fKdKCX3M<3N z0GKcWTFEI%Nhwq;3Tw*h^GmZ!%acng0USV=*O%7k z;&S=_JQfKU=g~>$U8QC#nzXRee1+&|$Vue<7~scJY-3)`VvJ<>WpcWBl-_Vc7Ws(X>}c%uIxBAPHH3rE`(ig^ub_Z|Pnt8ziyXlm8{FwQmL<_%e3 z9mfBIl6HBiC)Kfxha+WKVvfV2){Zi8A zk^%oh{cZZgT`~|ZJ@X(mXZxj!j&bHc*NxWq*Ek}M+uMsR9Lu?Fno&;TLhc4$_U~>V zNu=dv;(6ae8C~Y%A5*&tgqI%bI_o>e-VFo_L}_Vl=dEjlt1bdRXVRgVIj+m|p-B8U z-+=*IB?7Z9FLZt<-!0iymINCCPjrB(y6}I|+?Sz|o<)S#O@Nku+G150@qY z&|9<~jeSE84We&^J(r(@enn3}nr(A}EzV@j66yXOqjzUcLxI}8fNSOT-S<7&v)9ch zwo{?@-nug$X}87Pok}9OcdySh{IrqnJ0#N}o0T?rZSniLIrXJnE|hh;ypXiBm?h69 z7)_~mXMjWbew?|H@m@R8G*Iv*QIkf0V`9!ohW>;ESVt;R-`u3XE+Gb~h#jq9F`^OB zf7}D{@xyu&i+Lqb713f)xB`gw*VeQVUIMkvbn~HM;3OqjhHCMu-b+nN(BhZKP$(B$ zPUE%mU0#!q{Siw=t7edsLBSh3K^~L(v5@*^28m6ohk=rsr8RWGZjlYmm0a(aJJq<` zXOr%{n``M9Un6>DM^99;0T0_q71N8{fOc<^Ybe)y&1s~CgWV($W^3vCTb`D zkc)K~^E|$O-Gwl90}V|ze-M!Ey-1+v8@`H#AKsppy5y|xX3mw<_TJ5mo9##6d-WMW zy@u_zdr+thn^>^jl=e=^zvrW zW5=>&F-q7l+Ypy_!B<*|MrbEBqu}v1SgM@-T%-hS>}q}U-?4~;U~pkU4~gpM4+);6 zz>h+C(7ATkRD2P9rsE;E)FE@M()3Q=LZnU6R_JJ*NH|hqSiie;^!4MsLNHLDPWgv> z)*k*?i**yf#6XJ>_-Jg3B;t+8p!~AYx>i-cNvVweo91{cI)_{ebEBTt=DSdF{3-Z&p+gbzB05BZf#zyQ zKWWTPfy}|dCXTX^^V%Qo7}_U=F~R|E)k?$^X#eQ7zrAug#Mkttt6^hWH_;LkF*+}|p^)yf+uZbQf3T`QFD>3wJCPJxUB;^6f|@+sRY2TB&SRDz!{7X$agT7(&)K1^(%l#euZJL5do0x;V6Ub?HaL* zqbcOtk8CWFm0%K0eJ68~O!>QjaGnGub$wO#P7>ix&gj^4zUXXC3;i3ylt#v>WFYOX zy;8Y#h?#Ccb{p{Q$2ZTCTvVS?l3l(TVu|GsN6S@JlcfrWLR%biVCR6f0u zh2go5pCHGR+#+TZMbRZ=fA4m@K`NVDc3Ht)bm}<0 zL47V3XeSe{ETZ)tb^@z3);@Im$(W>>hf#bt#%2;Yv=S`AslGgS*sTik?CqA&$Xgp4 zjW05?uxuNiN&EJ;@lwwEUaC@fE`PCDuRf9Iq{>>6)tX{ggI9$}@U%R57}cnnc4N2l z&I?e{qNP>hx{Moh>ZYp&Z+3?q9jjj9!Pj(4V~g-i_joiFxTAQ9C|28@*FM1(zgyIP zM~jgGg;##0W*L&E)$__>)_pQFrsU5-O%$#S%OOVCfiBltva^Q~1Jv4uYPr9ybFuB; zlP!&u!nuLo+sn)9jk&KnFQgV)7~8^)Z*w@&ydJmrY3!^GlDPfYnAN_;%jC~RD}r(C z>XnJJd)O`5#(*x2_VAcH2xc$H8E7&q9~z>#NiO|G?gp-B%lfx&v-4r9-4Jo0%0!4u zvaSAmA?Ee_{YzOm?ncDTbUo>!&$#0nHf^pX2h*KvBJReH&TFD<6KfFO)DDB)s$Apn zBI+fuEbjUK4VtPo>1V4~t#6&Y-g_PEDyK7k!QTtka-2a+;P${4D4{0R{2P;%@uuZZ zCwX~E#X(^!cz*ud{+S&akNjC1tAHrD*EH7Bz95R_a!K$$8O0`dxibDXR-8q$VcfO^z-KUqr=t< zXCpnE`-4Z_Ap(j@;f&x(8U zudLa!52{jjm#@C&_nEXgtZ6gyJS$`{k68NSJ^B7OtAk=o!R+YjzW99d?|^yK$tJE+b`~OWYWF_i;r_eSfXRTN^~i{RhTAx9QKjg> zYnBF_(r=v~_1gS;c$V~VZZk?8Rg@*SB`#a42lG;bi`I!+ud30<;>Z(yJt5pZdS(*J z&ct8-i=z&ulN>Cbeo$j-_Yqb3-cEc^|fWzS@*dwXwt zA?PQ$^Lj7+oZxQR%g$+ixFb*~IaRchXWT|X{LM+>dth%GYp0mL_#0d{JGHUkGqs3) zvo??y8ywrh5l{~fFmiW$qKiBg_irxnpTb!Dio+{I9+~f>Ec)HbCaif2(QYcLy%^Nv z3e%+J0HTHvC6{lanA*CfXIgM*?~L8eNZeo9`1D>N4U8sI$?o0*hRxIDR_^R2g*_1RqpvRF4LpE%1%3=yI|~VU;E8UT3a_V1O}311 zsZ-;qNq?c_eX;@*rAmG=p|`-5C7Bpeji)xp zyXQeOk32B!^HfJ0yB&WGAhJV0gWF>?_-$wu%)0yBv5V3mT^ zGZ(&R%CEV5 zLgBeJ^})cp^=N$XxoXFHga7;0_{G7>*99eoS1CMf@VEK0prERMpAw>`f|0NDZ9*c6 zJkG*8i%^;k*wcr7!RVz} z=i>`9UgBHXoQY8?&0uk&WuklXe%F1Ek^+wwT29}S@qnqptXu9_QxcvW`YHL+g07)Xzc@P^k!z!T<>s+5|diLfS3 z+fwO#FF&c;@O!D>y~`UCd-nCc6Kpz!Xh>e4ywT4+ZQ=HYy0tHRBl8QYq-vj(a1zvn zOV;-I84eTR))DOXX48hP0ZAoFI25x{H*S*D5?1}$#&xnbg~>)Nd#0?oAiJd&**R0< zLerM6@Y-=V?$JP&J7-eNLV`Nj~X>ns=#hJwkdAF=`?53!u8#QE4+(v%7 zCumOh8o|bURC7vadk@D-j8eVf_5(d_*Q6<<26H-u8w!VwxOcrK&A8|JW?xKCKWX-@ z@qJNwF%Y4TMo1p3&i^=^e`GZk8{l7hoh%?EXZcS# zBmWGwafr+OOP)>7f{pCmq@*wpny-T%#laT2co2`d!*U!?_H#ZL;EJfS+6+=jpjl<( zaRw>WEQsr5f2o#19NLTX>hDYzI`xsPFJ5@r|Yda3T7$`-g~!9(MhXB z5Bi>za)YZP0@9Tl;%b^N%M4-3~K;a}AE~ z*Tv5T84(Jc@3TNH@0cMPGV><0b8|FU6dXd+ga%FBIc-@<|gzJtXnBCV1pm5gi_>8n`iu@ zS_rTm&o!nHRl4xb{AtvDD1mGO-KTvfdHkl!m9|-saf$N7&2*KNh4o}6LFlH24PHr5 zm~UF16@%bx1}-_?Fljf)Gl0J=FrWc_X0Vd$`$Z;5u;td1cT!Fy7AsR$8xn|Wl(t=n zzMJmF*NE$A5*ZHwI%xk8^K_@^tp;Z`mC}o)lr0S>gyR=MK|}>)h)A{;AaqcKN#XZO z)UCPiGi1A>*VDcj;zqTR1Il{NwHr-+irS!&@&8)!>FpTnyG~Yi03^K)JZqu)wMhA6 zp3&Cr^Dq%IzXddt=)n%aJLfCl22nzQre1<FbL1KEY z!}?BmMFROL;)BNeOY%A*#EM9!rvTD-qEH**#Atk+Os0mkFMpY*v~>cF?|?B(OjZjo z)*~)CE02ZhV+^4h9~m;W>qx63nzq?pZO3Du>QJ{NHUSo*d(e#cZdZOO2A;d9nV@ z+mgZ8Mg9Q+m9y)W{I@gQD2N}m1bNi5Kz|ta5#PaTPCJ#|%q|KIGDC4^OH6p|2>9gE6LERiXp%nSlKVLAt( zCW4%Om}5U2xc<&Gh9%BUyb%_OPEKxSu>ncWyiQdbG0Nz%gjoKlSPZcv5sLvD%o>HU zAfI_}XSWpZ@W{+qi%eyDXX0|^c490-RKqGzYkOQwmc%F3^Eka&?R01OV%NKgy!T>W zpYPigTDI{BXn>`4%(4sd68*G?GT-zXLJ)3%xz;S_R06!I88Us%P|4TafLMU#q5F_Z zy@=rwNzZ7Ja;#;^_r4F)szRxV=)NwsXCnioLX}M<@~!Pku^K@54USzanoVZ9bt#|P~4_!DkGy;jc#nO#5Aa>yD^}Qg{&f@@v zPP3|rJ&@&)6xlTP&C-rbdTlI4v3L{{V0(EagRdQJlPmXc&PtsPC~P_o$D)UqNNKh; zJmSLp(jF%W$Gtp8>f`D*8N*01KBt})*q*6jWo0#`<)*iV&1mX%ij#1J-J+c+kp&b( zcB;a`)xqr25;Gu{8dGo?Y%Y<50hDiQda0s@EeO?pSp7)-m5R}l$GdTf-2E7i#LV5W zAz|=155?Pb?MiDCqztN_n2i3|-GnHFeHHUGL3_oi>Vt<-3#cg(r$nw4#j7$f6}RDe z6gZ}#Go({pF!@C03g=OLckQ+b0vCCjD1fYdsXTbal=K3cOlIjX?_`({X0o4&FNVdv zo%Xo)M56UJRr6u~R%lXP{5e9<4}~2HBLUdD(uK$IN5fKAP{-uzZE%3qgRySZ3>}N= z)b|VSD8^&c*}`xvlUNseakTJu=v{Q;v-LAHc-+%rQJfNBhG67*FSQMV2yL-D7Ao`< zHR0%mCSXz`mhG5ZyEx$RQt;JLa`dF0q9r9y= zb1IkJejL>zP=^O^=#TQ3y}!!nC@A9>ma#a4t*XQB! z6AbL<3QYedhzb>xOdmZgBP%;&SgoC`>5B(Ho@qGBJJlt~R^=XDhT9Bw-9Ak5VwwAh z=L?3i2PEKX1gKq-h# zT_^!R#4U3R?*56dWfJn86vq_;=h!rOvSo&VLl0SdSc12vro-`Gnjo1Nha6wpBark$ z;j!fvGO+<^W@4e>LfFgNum>d!~L4sgPjy*R-ySM~*zg2-~;_Pm? z$Ue;;E&%x=Cx#7Fjqx%52p*RQpk&FF^E;B43yHxX_f)BnUb;0pnw?kcG zM@0PribSXnlq@Rp$@E=%jQRpxM|KC1z*U3(MZ^SQA)B@ z48ks=%B|Vkn`7=l43a80u23(%$ZL_&AfPNO?Zwcj;uHXAipjZcTihOu;^!m|!<Sdm-v-Lgm`sXVtJOZPOhz+jdPMEs=7c=j7w<>%o2lPMTRIbBaZKe<7Nb}@Dif+OH)}$bHmX~@ zMQ|@5v)h*osGXT28E(06yZK?Pt*E-!gySWG6C)}K4%xB(vb$UO^J+4-N{#(<^twil~2NMF#JG zkN3Mi61?y<`CeZiRLcO{t!^44o@>~u<24hf)qa?1olj&7^2>4$sQO#jq=&4tcJE8y zYh2dwfI73L-2UWwVfJmnk9w6eqA`SJ_nE_(bjYf8v}DlrmC}+Cl&vJxgtT*7vif8W z4JbFKC!-t|uo9G=qGjn~8opcb!NxGUIip+_&B?&fZOrPUd!~Rys2INJ&6DwX6jd|Z zruWE={i~@@z~8@6K*^a&tMJTps5O_2gEQ1y!0`KVV=mOVV5(cp*%c^kBDt739_QVf z)_rBA%tttzvEXIyL}D)eu2-5V${FvL;Bb8Q*3{?hsSV4lWfr&AXKGhvP^XW>4XY#* z>D)e8VSXly_q_3+w?#|#jHAuh*MS38pS~q0^&wnd<7gW8c=o~S8 zn%`S^{>cTMPByP27n^$>s>7wX7LOC~)ixX4Su6%@j_+@+F}O{M)T|Dlne=CGNFFNw zUSB02^Yj9uIQm|RbW`4scJ~+?xlJrcU3EN&x4B{Q7m^ch?0bn#DuONY3isV1WQNnu zDlEObeGjSu+aJ^0f@_^~7bZmgS7dB{YhOt`(^q=;6f>I#>#0h|SD3tx5|nB^7-hR?767y0-^Fv+BA+#9s&`H$~LF!yQLM)(X0ZJlga z@gV8F@Tz2e8TD&p%m0nyHQIoD98UT}F8BHF#pp5Vcl|NXVE1@-l*nAA@NX#>t~X`8@jbAnw+T1YL{>43)U9$ z%->G1p85>_Bqs}bKfQVKer+Z@xXdlM`oVhHe@yl#rZVrs+~oaFzv$zvPvN1*m?aXP z$u_O)Qbx!yokwG)r|^?Um5xWKPJrcv?f_J%kIzdD57(eGP%6^}za!MIQ&4up>q)|n z`XZ<+)1J!KH?Y$lr81OqJdj@pw;39sH*CFx0I_^_so>RJ>99GT@Y6r@aIgszu$7th zw))T7J>J^BpH4S@GFYn8Qp4BPJk*(5*THGRwS?EKrZZ5OM;8GUTF0ly=&lv$X>sL|Vsagw}5u6#fgx-5dJRSLDfo=%3gTrjlue`y6Om z98Fbe_sQ4a)jPzxJ$f4GU?3L-?DFV;an{>%aFwMJ=krt-kCm8=7UhdSYPau|byg{k z9ja8ro$`Rv+f`U-7@x&Eb_u3tLeTLOsPJvwDB^O*@h5G)?dcQR&l0xrHJs@zuDk?= za7?^pjbF8GW}gG4UV_#6BWOQFL|y*^nP~Q~1Z2C2<_XJM8Z&EdV>%yQ*=S44tO$P_ zw&qpuZ*(EQqZ0K~UD3NC-CO<-FG(bNZf>6O#Z@0H+6T<@~`&_I|s%zJI@4bP%!veU-oA}xJW4RKvcwbK6KL&drx(C7mX5j@AE zK6W;)qmoT|(t#%{U9>uD@50}(Iv}GW-lD@6ze+9x!!KLAJj=;C$r3*WiXT-YZ=+akxsR8J@QfLk z4t4j<3+PJbopw*si}vL+V|a1l(~E8C&Rrx((xcTPkO}6ugF>gX$YdQO5bgeBcy~YH zb9=Laj8D3jW*QG&dSrq!B;50?wNnu^z7}cw^$=ugo>N}xBydd@w&F#!G1y64=dk${ zyloW8brwZ3nC1#V=ckL_CK3gGd*6>Cu(}#bPEBLmZWD=E56PufwoeqK*_1Kwb_$9yj^bOi+91LqlkBmHNPI|3zTvy^8UjtmlL2uYn=uy{q0k>vt81V zo%4^t0OIA*Ow8VU^|oU@n;dTFCosiFm2?h5n;%DIl%V z?!r5`=PY3V$shS8)g7sl1mvxIRbrs0{@L8YN;Xw3D$0?dqV5-!NDdX5_q*kvjIeD>ycEPqVaf?d9vueK+0ZULcV2xFy+z2^bvSC%MmA$fZu!cfL+s4Qi8 z`9>KQL(PRwIekq{o_@UUbIgLasS|S8pnjI*X?f#iZLxMZRYi1ePl?G)bT?t#^JF3i zk8FWJj?`Aoo`14IvNdZ@buVGItaSDna+UOJM6-{Vw+GIuU#SvdB7;Kp9i45HZdtWO z{f56EK>usgsKus@6D(1l_j!KSOo+d4^{HQA;AzH5cX_Kosf&M2WjW+2!*J?niDsqe zkGG&6*+gs`XRO++j{;M+4%rRDXwxj;iRiDEbW#2t76mV62T-#?xp%vZ3~M}ZYv*ol z%z!$*CzI=$W{LD@9!4wq@)!o0@_{#-vza)-nvzs&1Db%eR{pSzXq#B8GM*?0t?I<7 zXuIcd53v(v1b8J zr^Es6=|hq{ZuQ+V$mK}7dpj%p$_*di$8WawA5yY4qu}qW#w>cM5Un6j`d8r+SJ@FE zXK}p8DL_pCHSrL@)}fR5gSfep%4Wd!)b=jow5O#I*Ge#D6~XY5b7GA1hF0`qgd*dZ zqNginSZ4s6T+v4YkWk4nj*5D9Ef{r=Kk<|WKQAGHPkXYN-r<9WknzHJ{L3%~SD28c z=%juZ8qJh>6UL`o$NbfYT5qS9X0?F2EE*Nb5hKwMG+;ok?7zJ*0jlVH(ZHZ(oT)?m z5f6a!5)#jY;dUgaB0l5wPx1A8_6i)MB^*%h61)>T9IG4W#$$S;mNuXXlppmu_FzZ@ ztimBlaAzP)dqZdS!QI3}Kma*8D0?;+Q27op2hks32kP(+;~0LVBct)dTg?$VUO*k5 zwFdkh#*MEblzL7jx>Rqlsl|ex_|rITaDWe|q&7PYe4Y>-JN4=aj-e@fNVx1EJ5~<_ zow)@MItn8To^ol|{EFFVW-4FRG$9;G9L}s0MgA^IdJz#Rc#V=j{>k8>v^6@3*(`C+O?eD^sk9yut9;MtY`9EDaFO9jiaCO6l> znYKW`w!2HmC~DlQG{6ZUpl6QQv(M-IGi$=MW$2Lto9|0S0D$mQ#`dt{{ppFux%E5|F;Lnaf-bC!}3eq+O z9(qx993cP!pLiksmxi3IZi*9?r=~#Xf^#WFYrETS>!4yLK@g3Y zI{#o+V2UzRkV+{c&Z;V-u=t>SV;f!dvcQ^_l265jJK;=9Ay;%_Y_btPRggJ#$URaP z7|msgUazGuwz;nkLj|xjX>Ix5o1rGIaDpHQ4{&l7)|0@ag=K6NL(M)$zyV0lCm{kw zkSa})ui0VPtPrKd8F<-+OF0na3SuWs@=EDhsXimlF9#>4}LQ zEpwEWEMdZmoX^4{P`;z2N@8RGB%-6hXiqvc*1SA2L>ZwXbpkKl4Q~&*2|wSXo)s%% z2l7_JlFf;eaq`&C2Fg5cntlETeik0~S;&lgp`fHJ(l7IsM|wf=7k=Tnn?@yKKhySR(!cQ+Ll z9tsx6aH6>Wz9|aPj$k6+d6I~TE$$aoG&I2e$~j9YZcfR!JLGe8m{jVlGs z&_z3!vIYcg{~Gf$6l~TE!fD|dAHo@2(PK6OfYopV?huHF=o7uDyS@!LFYSKX{Be~} zO?Wv{M&)07^zLiUoK^C>TwTw;kOs0iAoFX>T0=!Wud9U>wFntms}N=1AQU%Qn#_=5 z7YqY%WB+cRhHQAJZYuf(`O8^JG>)62n~Mgne;f6v8t%`~iGOJNI|rN#VZf(;o;TGe zt3$q*Wst#tIJj=%EK7<1fV*uLE<-`A)IrE^R@$BCsi0zGcGe!u7BUTC*T0D zBw)}Sj?+P~;`|DYRCwM9481Gzk+GUm;OyW%lNrW1$Cq%(W28#M-s7z9OYhn=zMvkz z#^1L&k)9Ja#Q5r{<}XNPOYt_|GR9ipzN%DgU< zq7O|(EWQ765#3T9nlM|QN1g||sS1<9i>sOfQ688Ns1Q>QM9DP)hXK6M@?F4^6b(Eg zmcKuiE&yPN?o4mU1^0nz2@SQ)pcc*=-p&?Tgjj?U5`Z882!vW9p`n3Z=j`n7qz(5G z=g{orpVtMbsR7YD2UV(aF}FK^(ohBIIio8NN^pUoecn#afTR-W>OjrmQPW`m+3ID} zK>y+K&9~B~(-Y^*(u?DVm!EkSt4;sLiB4Jfi`^tPs!KpctN6aplpDLeU-I}xKpY9; z7!$em1gh*u?iLnNXomSw+w@*{rbEfQhC4*#!waZL^fb0ZT1A!eTA27l=Ilf0a7Z=p zs&iefHlON5d7Xxkspil?L=0mbkDEwC%f%n5h~mYSpJGLKRS~^3%!VK$X#0EF%}nJ9 z4A?Mb{Ilp+EDDe^3tW@`3Yi?a&!k3J@1a?6bIZdIz+V$sPPaNLGg5@AXH$GkuK$ z5CO>dPA~dr6{0`8e`LUqKO)N+Q4ISX45#J_i+U(!W}y>?dxHn%Eckyq5IY`yQbkx!3qR62>uDn(f+i-|L~81?AcPN7t>pYc_y40x{p?uHj7 zM`_PwPG=m&9F=+G*H-Qe#ugesKZyT^lm-rB89pO+s~aC*;M;U{r4IfR<74DdOVM4J z91D|O=%X=nZs@G5W*sUeeAKZr{TZ@&BS1ObYIPZApx24#F#w5ht~& zX3Syr+liB@u*q|~Z|7a9fVGN1qMN^WXXMY?(!OGgeLHtxri6LjD|>JQGSF35;h*|G zlV2tmqrGkwc|P`SRTa1M?0m`YrRW70Lu2I@B&RmEz6#_t;DV!$HgHSdOT)db%x(5t z=vV9aWwwbVIlEs}0%?%(*eXtpTNvFyscw=mM<>(F zgvy#s@7`|xFfTE~8gjbP^Jectz>W*J_=Qz06mHI>@i>Q9rMK34sx4-@;Y@W4^;b;C zn9pJaU)fOg@3!^t5Jw-6&z0AW|HTs-vFskho>RwX{%zU2f2S&TW&Li-Bm;aeFxa!_*(`W9lV(-uDYkF9ioBGL> z48*HPj|wG$yOdNCyfc10_>vcoRh^?D%Z@MWwpOQ{ICiXFIdG&ifm)c__OUWl5=Mxj zmmX*nw&9cp_YSHqLPURR@Q4!i<~_1vlmDsnmTxY(sQj!lTwVh*uj4S<%g)Rr>2(Ti zYJG9%VN~7?rY4=d3=diD!NU~SX$~rj;7O4%aG<9t8$nwRxT(?VslqRb2113&us3FX zeT09+u(%(|BkiX|_jC#{IX?jQL8UyRexV7z2q+|Hv1P;ij_(O-zJ`la1#9Tx4`>eR zC!hWf(MvdZeKHd^OujX&C`Z+ENQSSo2vV`-FF~3~m`o)qhUFW_{+-Vp>^G`6F^7}rDC(4;%1A|>kMmg%?Mu{g>=OF64cD78R+Rd6 zq3EmS7nd^30*smMfg3MHIPXL^s(9*e9Wmkn(W?-nQln{fe_=hrs|>!I!h((BZI|B-L zjkD~{qxp&$W~zL?_OF`5VIZL**^^ckB}+l)Z&fPg&(c9_DyitEaL3T&9C?=UzPJVy zzw_a8l}zullYP!c4rMJR2xxQ6dfxa%-$s|j7mI2<1&;GDe}atWK0q%x}= zy{~TliHpWm{z}26Izjl8hw8x6DLpJ^yC0tcu{4LAh_AwQV8Dv%7mdo2$e$d6I}h20 zdIWPTD!d+g0k+<}4jM?YKeGkK&xEZ~XD*L^5>E!c-(Ua{h>NULTsQYf2`lO68vk+T z4N%n@#LU)^b8?o+lQb%M*(-)E;bzTKxjN-`7KkQd4sX;#UHSxs_C?j}pRw&Nh4iKV z3806`NMeXQhJUmX@l>q|Ul=m=3EqTew#(L8P20EYFEB2ScaN|0vM@CY2B$=#IFo$V z3^ewM{FQiTc&_fYt2P3Y;vY?6FHW zn_Vf1=%Nk$c|D?h8c(DQYt*|s>8^0cWUpEMI0a@dGj9dfks`FjwiV`4lRF{B?X-US z_7&f_pVMiiFi}I#RRl{kqgsMA0t^tAw-8_f6tD0=VE_QY{INb%14KCJ#~la+{w@GS zL_|DphmVboJ%1m1=#^4ZQYxvaJ3YI2XrOR#a9CSgOG`_8nz7Q<)co^w^!%;(y$aCQ z*2c`tytlWvytMe|*V3z9UZ+Rnfa9xDwfY>@T221Z?`kg?JPn`Y{0;TR2iOCEu2j}SRij(;w z9&dEzo`?BGN44AvMBB$P+=kE^^c(7@VcaICYnklvBWxsUAAVsjRTL>$F{Ln=yOD$V z{iPsG2*EjdeD18rXv~6<#bbmH{Mq{f1MHA*R6WcIZ*E(+zyu9`EVK3N#Jgm$WjoH*Ga{_ zq^d1bF=(E3J%1C}VI-*5SAsK9M7F_M<=cqr85KwXSP2(wDM%)-69AG0xq#Uv)=&~Z zm2?mE!^m=Nh^oi{n>CpRTpsEyA;JoX=S9QLVdX#*sH)I%O1f+Bd9gZ#(w9uWvgKqL{vSoovK9bzfA}e(zXqxc=UI#~a|>_pssc zqyObhu`v4=>DmvmcXRAJX*euINK~FkM3WRRZk?tLnxy4_{#pdG&E{|%4q(To<~29B ziT<*C$vC$^_ao^DS8fvk6z-qHta#9)8IXD!kqO3t;iZE9W!^_9qGa9DBvEf4=?wgj zn-o1ZmOjorS*lyMs73HF*c$M4vkjXG#~a*@dWu0ZH3_*mADUvDcgJaQQXfq}dftV@Du zJr+hXTm+}sG8}FSrXYJDm+E52q(B^|q*XdZB9oBFsZx!kax9j&oWdYf8H)nwQb-wB zjgpKcMZ(LQNxkcDjgWG}Ly^QOWj|g8g#5_}fIA2%OBYPbyBq^+Mu`hDmj*a4$9`!! z!YbvLX2dLugAJv`YpD=?vm!@RExEqD-pYs8|Pm@0te2SllJOPhKeP} zni?MC{6(A){@48Vi{}Z|B>e=h%Fj4k>=VlTtf^4=l{n)(Dx%uxsfVKEL~iy{Y^v-6 z!Jin3s%a%4QoAW$?Uj@OW(z{`>S>NM%((2na@x_Y9M+%7S#i|I%o^KMD!i@9A;f1~ zUj&qGMO(6*>#Vt8$YC+k)KgcKEcpb==Iz2#a}T-CxmLRulrk#wtRrj~&bt>J>{Ih^ z_Rjbpv);eheoxKOzOfcLW>}Q+zApln)9_9EEd~o}6pU(Ki1o3wG z_^-ru*py$i*~-~nO=LW+m4+Bzeb!T0t6I>kkkcx|QL9<6*}($K&0l^Yn${{#Tq|RI zq}7$Y(hR5fsxxmHR?qgIXixEMfc3kSj(gMAI`wK~0|JOOAGOhe9aY??wx+YCI$7Ua zbMaUlEm4FtM%1<7Q;nFV4~Mj-N7|aDSs1JqXV!|HJL+8gh6WqaT?~zRc)s#cnib{k@T%l;7@uOKYUBXSUC--P=KzYF#5~ zu;@J2+Vt{d?x%L&6U|aeY&G9lo}gASP2RA=6)iTLUxY&JeSigSZ8}&KD7OCyG8Z zP%eI3xL17`5wpQ#V%Fe2O+$2M?h#^mM>7ks8d2C&H*RBV@|P8ZsZ*ROg~>~Y2=0w( zs{TO`^48dxm%G|&t^vN{Yk!x~_xNI=V|ne|lpnlcOE~HDOD4 zKUd34}gB^aQWDAdSF+wMPr7t^DylxHRv0GuY z1f6yW3>gW%TLV$K6RWKEVta9i9&mf#f(CvK2|2m2y!Xr`5B%z${Bv*`mbWwZ-qAHR zF5-Of6wv!C(rNC;m1X&p*J&c_!^miY+4xHD(RU-KSRtn4A3t};TxdHBkrsXkQu%aY zliu57^&Vll2p*90GDpMoork>;?Wd}|wI}zPG;8z^qCB#NY|rkuW%k|lqCdM0_iY{! z3g1qUKKraPJ;??<{2&4e*}pS-ki-d1zBzk@?k>!4m8@1@L~_L^8(rDWW`3g?U~ATJ zxRmXDTXH*Z(a>=8jhxlK=0@j)ihuKy{+zRn483hS{%4Lp$=mA9+R{q1_-q8~-ZuNz zUhpO1T){@~>Yr7e*w~)abfH22m{z}g`i)gRS^l@NN~g20O3$ZpeyvfNf$zn?i1dDN z1+9~z`}aOG8mg!_Jqfd{1j}PwNB2>!aSOH-I$;d6O<3{2n)xp@kF>2yzB_kAr79e55BMRd>9vBaFTIxlx1 zptoD)3%<_?&6SOu-}D{G3@x<^h5{ohtr-8F1z)f#=gW=}Ai$j3nc;Amef?<8BMG968tH!;g))Ft%!UX`ApeJdaj< z@$Jtt-QUt_x-ccujXLhor92CkvWbtGh(wa{&*{`*vN5jZi6DUn%7UW6+J+D9!C2lN z6Q@Cg<*`vfGYVNKMW>6joPUc?$P=!;;)DaHuG7+UytQs}hqN1PcOqJcNkw!dU_)>G z{Objdd!ZS4<|c^$5U^rEKia44D`*oXKQ1?xvPbIxT(C! zd4L`xU&R;aibJyBCfoxv|Z2)H2SAas<9u`g(sCg=EK02s7J3(obAc} zWY5m$MXu`=v+mu(Z_MVC*(ej{xUQbcmv%1gy=0Y3$Lr&&>vN(TFpd<9s*|Wor%p$o ze#aMEDr1bh<%Hc47+4VuPZ#B5lP=j6KxLIh++w-+KAw>_ov#>^w@2U^H01$!)rsCn zawYJgvk9N~(axfC{+QP~JIj2ACW9G5H=pulGYbdW3(p7~;HT0+*|}I&sk^wQJsH7K z+79*Fu7XykJE!phc}0H^ZMebvc9fcTRqC0pqi-QUo*`bgWt~s-!)0>O zxsHBL2p?4c%Vx6ec~A+xGu5W&QG32@OloLN&1pvtvy9hJmtU?x1%9L-wO|r7JG0iN zD*UN(y)um8%Ai@>k4(U(mmabpS4O`Ta$c<`A7yLX{grgeFKZ%s>pWn*y*w#9d%`x5 z^em)*TOW2AP>&J(ksj=HUdsxqXSV{jwpDjz2EuhlYD~rN3Yw3+scQoOng=E?_CBYR z&LLOXM0VawHIToL4v&!)wv!GcX-D zP;Cy20TlE!Ot%hcB_d`^fu_@_30e9H%5Z|5Y)u= zv;2?FVQV`V~E#k7_x=CmW3w~$7qx1zHRr^Go!}6CO2cBw@H$=tc}S4aMuV@1B#@nac+>7?-Z0OTFz#Y zDq00VAYC?V1w_(da4=32A+TSIp~$%%>|n7}}YN}$151@?$5WE3VY z2M%WRNzY_II}i~u_Qz&GwqQ=Qjo(XlV_l}_0$t%u=5Y2@{*Hb2SL8&E>!c8GOEW_- z|DR#lW{G#g3GH`NAoC5G-rQ(HA6RP%e%=76pwy=7p;`b` z@B}(|j^L6reZ=ABvzR42};R3-43oH=F_-0KV3Bt8q`Wq>cZ@nZOep zlSTpXMsyYR^(Bstk2FQ;HhYH#Z_$N(dP#Lfv7sRhYTnSE zM~D#Lp65{nlQv19f~6FN!6I|O;dtcHNn-?2|!M$^&%r~=e;$vBHu8R|;+*w(o z)r|!2TkZYG=4l(T(mI`WomJX))%$?8fiiuGj;;$3U^qijTGRGIH&?s)(9(zraSBU`fI^z?0`#*5!{0f zxa5%*X7J7~96SfW_-}u`nE}l2M#=*ittJGF{eJl54b6wf`r<94ghlw!u9@7t>dQLH zoIFgWNNanq>q-Yj!kk;UVX(CS<=J+Dp3*pSbml8nir<9p3D1l9T|!kFY?m$jyF{%c}N_9$&wnvdvOF zxi+Cd_isd+tMwauX(Prl%QKgtof2_AK{^AVPRB;YA#D3_+_UgMJ!`|S`CNFJeUUaVdiu_5 z-9^3MQyqX1Rlud&`N_jnJx9I}6D@YrZA8M6RZicd&}=9QU^TzZ_nm#~{)!An=+xr~ zHhM&w^fMUG^_LeE_I)RQjRowG)O|K@MpN*UA*5kiw8JNO)>d|mxMVA2w;l0cmDWg@O!W9wvKG(*9qUxfXhHUU$y4&ue2pKnUHCw_VyKHqbQ;sYQk_e{_YBy@4E zlO8hI#Xe`>^B9U)Y~7msUVB@AIBPz*c2`6LG501&63X z?fK&_pea@H)d;zR(jfI~Tym-0pvQ)N3!O2yR=-SHEsi~plRYPN{_~8jI|3Lm5f4KyF(wuc0^wp) zfW^f5KanUnb|oE*#i1My5o|3}iKCz+BhLBWn@L0A54ou?lTIfR&;&5Kx0erQqES#d znYWS;6$(f#2m|?3sfTkVKfkqAFP%$PkeD@i*8HlPj3D#rsH=Lk){3CyWerkrx7RLa z$Kth=$1azR5p>}N#;q4^rkY>B8QR@nO*UH|8e?B$GD3#9F^(hTaqo{JqW?vexZ@)aa$wpx7D`ZySw^Tud(O+s=>|2 zo6j!obUNdgvd)I$Q*BI+iL_@EN z=#Lz%xDJ#M?s?%SL?!#))(1iKW=bN%mVv_?*roQfj#<G4vH)Hrgx6aa3x8Z`Mi2+TB>$C`i(q6F!vfM zH%ALx@$b3%p^A9UO8Qx9_8R_N1&@Ud_|+eMZU1UpWM@Z8*2>|-CDs-Do61+7+**J5zL1lyDvDOYRw(A&qC=Sk-CSa&h9MYJbp z97@o#MBf4gt`I{ovE1}|)F2X)^2zw>&uc%QsUFeMeZL^Ep<`r#K=M^$e; zCCH2t6xAsdPhE8rp2u9c-gqtK`is^Bz$@>w+Da?}wq*US)wSnNjm_E1l(;4Z5_vjX zR1B@yo47huU;AEut_bw4@D|uE*GPEQl@(Y~3Moqd$j?$@C-Y*RDb+ezTgZlL2q+|7 zci<{eJ-638x2lQs$#%0dV~d7#)BdzPJkkH%S{;$-_&AC)?lCpoWAGp+h}B2TfE%Ov zC=gv{-S9X^S=+4%WALR;&M<_~dF;Q>KvhfAUvrrJrM@R_lbM$_D5aIBm|ZmxI`~-j z0#mq_5f$kMWqWleTp!h`{LC*jlAmskcN^mVuzGhwjEppox@Yvr%&;9vy@i|pCxEQPd8k?kHPLZmHn z@Jc-<`FK(69ZNGXe@$`1TQw(O?I>b}z)7)-as(%brBdxi)A0Hy+iaHRJZ-@ivxrot znEoyS>*3F+kF%>Y3a_OYR6QkK6 zXGfKk02-q^Z4%vAOz)%=V$y*3@>~*NvP_@@NgyM1#4sHiNPAUreF3X@m_2VWk+K@d zE`U9O?Vc$EhuLBNC_apc^w~r_xAMuhrW#TeYx0V_vvZbdx`XTpH?uz8U@@7_LH4OCwS#wjFJEDJaJE4J7!IQ3d<}swD)yZd#mwly+*E}d_D)AzG$G; zozK=@uK9>;nK6Y@vB64pRtZkgSRZrI3q0CYRt>(vU#uV_YK%zLOQCb+C;=BqD&^WO z4L+Q9w>T*^BBBd2U%~dE%rQHERNOJSiC6{UfRybG`vvS*O_r4!Y^vYCn3wA&I2Yr< zZWVruT{RrSK|CcW%OjKTPOMh~RyP~dFF8^}#%;oZ58!TYfd<0%HlGiF;nI)~(O)ao zFD8G;bP67+oCT}{;jmAr*O8PC99xS^Uno(Uf{<>P` zTDDQOQ9;t_#q!yF1}D%zou)$fLfx#TXiLfqe_zhq;?R=V1Aw^od#<8F_3>5$aBo5W z?#ofSOk%RMaU5gzUd5oAlTl6^v4P)UR~Z|xQZgb`|NMk$q5RM6F|3TI0g0t|Wrrbk zcGetiKx+DX=$sM8X~T$sf>PwZ0Xt8R6>09KJl2|AO0u0sEg%e8(|4+f3^v+1D$Yo5 z^&DS=Sprg>Af1O}Z20T)zj4|hV&f&Q{!?>S@_w}TK1x(&6hcS;jCorb3zp(Yz#U+um4_pN19r*j!u%SQhv zAu*1(mW392`a9K8p3RNze^pbHDU)lDEI+RCNTv}0A0JL7C?rk**Fy@b;g<;*c1#A( zzPv=eqomJzS{Fzi1xxB{XA=psWEC2v`3y7DXn1PXLGF=Og7miLe%$z=pfdJ_T@wc! zA)j2YHF-WepRwD@^p$CUOvaubUAb23ubnot4&E)&qW35O2&_4o5a5nHxh0O|cJ{j@ z-{*YB@q;l%9PdptXDe4#U~=-AxcamDjfkyucI6-B+o@0uu*9c$%JvkwIO265iBD-U za%&1=uhAxMZy2){R37S~IX`Lyo%%L-?<%LlJxdE(DCRQ|$s@=yBuBp;;a37C9R}Ty z_r85Y&3Jdt8Shvt5V7VSZWDqS!!vt^&MrGXwg*}Sf4}&PJSPtafun7>p`qN_1QdnM za~Ma?YNi)f^Q3EdBdQGM+aK{$M~ILB}w*Oyx@Fyz$30|LQ%9grlTp2d7gu}D5-x^wNL^E zLf?cMcih$38c(Y$K5?9Anp+?l+phY`9_GPBP=+6Z*@W3s8s{zsI_B*UASDZ|@kY)$ z5o-h7@JqCIy~lO5rbg5M>1ttu>=6m+r`BK}H=?45+c{u7NiZzh^~KfjCTs9<-SDF{ z6}*cLchUJpHyKENAL9R<6pa;#i0vvq3kkD@`}|JS`LVEUBN7xAk8Aw^!6N1l&4zzC7TJYFqTnZDM5K;^boP0GLSutfjb z!VIc3!H&~OZCryTP)N(LMu7=PC}_)$tVM}XK}5^Ly|j!NP)uN4L7h;5j%E~|m_DOSq&|ze(V@SN(J$0&mh9RhpG-&iQj4uPs$3DM%CR5a%B_y@00Q+$ z7o>qTEX7e&I|gme7K;y=08Z&cG_(}Y3a~zvbjF9+$d3!jwDi!$qaO&kMnAAef{aNQ zAxIBMI|krTiD-fm1<4Pcy#`n|7X^Ui3?lAhJh_Abob*xUyvTqgz$z1(*(x^OBE)_q zp-~wvrmQ=t+Pb~+A_h!PZQC9k`VNnM)b)@xeQ4?WkCVNxRI#Q zfSgGb@Pou73M#nK0pPg)YeqUX37xb!KTtt33O)EimvG{c+`&Q2>q*}GiENp%h7!t{ z%90c73RPM!gnJEjyAZ{iJ1%HOPh`er3&66wO|w%r-NXZqi^}ssu%ZH&5nZ-2tkbWH zi0!nD8{JRoBuJLHM)G+}iAdCH&DQJl$`5taJ>?C71P$gCIg_xs@!X?a2twcKojlBO`V;;ZbOz9iW zb3M%CJ6FYg2tHI!k?qzLRm+gIf)Y?RUju|}R5p@?!^N{VZ0(U;ya;>D(b<$qhd?y( zEXmVAzDYwTD|xC!9yXoX*_i zI4jh>HT7C*MU**fkl<*zNQFicHO|HS)9n;jJv>qD(+NJjQEIG!0g#9ipi_-JGzh@i z)`Q4ttxP0Q0D&~znY{x-v%vDG0vT1J>{_2OJJKwbsKlA1o5EZftIzHkE{ruWqC*XA zTN(P~sL^3Hg5cO_`#(UKMSWDZV%^FqEW7_JnHvG01B%sJOi2F=1c0_12rCHQWoyQj zfYgS7T^C$~Cu|5|gbOYxO8}5wn%q-~U|o+J)(-tx3q&>~QQVZYfv;Ohm9?O-QkEV; zUs@B;V`&MnX}o$OSSJfB_r0wUd0PEK-QX%4{@P5TaUZi3Mav*i0)7Z7IN%QLS&{on zCUuV{!c19nk=+ACZS_Sj^g`lOS;y?VG?dw$^kCYg#uDO`6ZF@7`NGOH+>81ELk%bh znF&!LsY@f38>^Zds}jagsQU`q+^IpL12Oh(qY!jA=D}TrvpSNK2;~Jp{_8)48L|x) zzTNu`{+hN##4vx7+;!WYT?I5W^hKdtwJq!4e zozW;C@UC4tJ_F1<--hrl!%Z6vn{40pW-__ z47&+CBBndt)iSeGVi2G*p%oHP!5cmtQ5|s_8i{kdfjTds09`|diyzat3joDXdGc3WlV~Oa5p?V> zq_R@?WzE|;6BW}4X1XEbNT>}mUuM#x?r{*7_C?$o!&sWuRX!*7dN&R-XNIF^d~T!7=onr9s;Am}#YhjLzMVrM_Pda#mLhO-~j> zSVL&`i@Rb{I+(Gx67#+HeXwy1-KHbQ8mw0Z22*B-jI9eb1$ z8k>^)HPeYIeTi;_K8_H~C~zJyPGgmXTOGhUk?Md_wW(YPRUSLS9Zlr1SQ{rILd~53 zitT!f5G$jZTWVN~(<}lPmx4fHbIjt=Z!%gnVdI)g32pPDyYW+Msw=uX@+*trr)dK? z3=62pat;9EW&L`gs&*VY;o<##Ijt#2ev+m@*(0aYE%ebh4LZ8!xw>M3PvaYP~4eFQPFXrN)!Ht zZPO*5a$LD}l3}-j&#{a!$UdgeU7-I*At}B?i61hX!|5>8*0lElogmz^UOthjsZaOX zZ6fwb-UvdE+N3c0lL*b9p8TGmdL#)CPL(rS?!ndcOl_N+(wwuBb{^@aOmw4s@3si5 zMQgCQ*;45aSgG;0{FFOZuCTXiR%>FBueLPfDKb&}^nzCN9SWrb^62|KpBow;N6KJT zrgecEU1N^L?sSe60V<#&L7nx2xR~TZy$mW)f6IkdY4Ox$*Ep_icgILD)XN zxf>pt`mbb4HGYpb64SNl!4(avX?U7LUwNg>gmNNgs3ZUEZ^60aSecqPGUs6p0PCQ+ zzB-<(o^U%R0dI9>ekr53Ey6lPe-Y`RrjpcPtba;0xmMc!ydqqYlILOMX}Q?Oxi(t# zc|n3csb;e*i<4HlJ4TP_rWTgLLYCg1@*q}h_lS6D+3s97THbp<kY<@r~xK{-;A zDPG=j#kbBOU~x(BZ*^1r-h)I1!=2 zgCIo`YzVRA$Als)HWVRIqs5gX9h#IFQDeiI4s8w$DYGTbgeMEylsTrQ!iyqwF3p&< zB0-!El{OWaQ|C~V3|o3Ux{;*Tk|$S=t!ULH&8b_fu6=m+AyA2O1wQP_*6Ub;Sqrkg zx>ck~t6hO6bW8uzDB{E=xuW!1_#)Y^f@PLWjJR^dxJ50?_3Mv zp$_!SIke@%sKvUpcsH?YqnM521PuAIP^h9U`qaHJGE%uPNouqV@;FPvn`=7HJ@mTX z&$&mM=FPFAPn^YldoBtTHs`1$TNCeCT6=f$HB0usY<2v3w`D~i$N2GiXnILynO4q` z1)EUst;7{+&eiALcH#}FQ$DEWQiW4TVbR4L zQWi4Bmspt@7#MDTmDd?sE$PN1M~uO?8+U)L*Of=~6}O#avVnBbVqcl%)PXqS^K2ra*mV8WRhoweVdS>QrQvSr2UX6ta9al<)^jKYQt+<(jSyBlVhBXBSUUvTNyL z(|P}P@w~oq3e(HsayuD}X%$x%kqMUhW2yA+Xli`IN>!Lv%2ThNc?M_Fti zwq{?cR}szD&{dH-TEZYk2lKfHyBcO~Hba?Nol}Z=*>%*7dEeD+66x-=+PY_=%+5uX z)OqUSwPU~BvP@ZqiYi4U)J-P+o!&hC%6C%_4j1G`NH6}h&-0md=#y6|+}f53(l?@f zc(q9_i=`oa)3RzxEZA{Yu8!zS+X{%Xmh6+r`rC;uj^|8 zkJO&aUc2F4R)Vgi?y_#1CG@ch@ARw_f{rShII2G0t4Rg#lYoH=#b&WzG7Z?1)Vlxc zD^*ZC%9y*ZeTb>jsE0F2SuJnX2@tVZcB|LqNo?z~57+$W6<)Qhg7>nG@*-v$p3o~K z8MNR8`%)ee4lppv%NXM5QWYl|D`OmE3cL`A9xO#`RJc-1nSh2AIIS=^9rVlY@?$L- zhD}l3JCEKZR>V_bi&_OF-m*9nlD(COHL!XPPY{-zG6iK-SJdC8_{F{oNl}PEYtegl zce2l9@o<9!RTkCuuO31XKo&C$cDf`YpWH?=uS$}x(h`|G`Gq#x^Uc|?WwMm8Petkz zVz!3#sFG!>M1+#liULy@b47BHQ_>AgMg+44cCK=L+E2^;I4{7duXG0LQUw1umpgs& zq+oia437R6sFjuNZHlW8#X7^jGwyCZSc#bIbf`ywA?hq;!w}F2CNWZi<%}%ZEk(DSbb2WEN>`qRi5JJ}Va- zfwRY`daX^4qMOPnhPI>1)R^r_8N+Z%*O;i)q5V^r{UB$M<*1NZvvFlJy>cN9u5m&} zs+;51gRaQ^hG2}L)0yV9s^rZjvI^r5dTc5`sV>u$x2z`@T^khS*b=JDN?{?r3KmD1 zuS=AS&cd2ktvC5t)m6<$IO%QWed_%f+^B2rd^uQ^RB~9=^`By zRu9fw+Nw3B_ul%Tn-=Re>OKOx; z@OxG zMqAxVl(eSWqehsQXyPSytnf~>tv&JzOO>Lt) z5uKK!91@8QY;dB`Hgt9ETh@gNh)@P|I7$rMp+HOPR0B&9a$%Acd2aU2&JK!X-b~7n zZ5vHlt}3ec36(!1x?#Mb6sD-U?rA{Sm~~cYp?Se8QFmjaHz)3WeT9@Q8NAJtb_5Wf z27sqM&D&YlGP|IuV&cSzQ3hWv)f=T_9~CyV-_)^#EXh&2(BtBbDi@<593*;Js7uZu z+Kym85`r3;70c!`b>cdlAFI46JHN%IyN2;%pzWc0vPb`5FT#>u0C6`DkQ)FV$fA^= zinC%DG@QT`0DN=0W^wm))W;<7ww%uG`^9A zA*ZH#*l5<-nCd=NdhxrjCU#RdGn-#nWqh){1Vvz;rb0(hZtimr1Mn8*+Z!)`T1t;SgmD%Qos9{BIGnd3pbq-A;bN5i8-0l%V0RWKq0`Oh$eBb*( z_)dYm<00<=u!6W_oOEie!Y zFu(cE2LSVzPXXy8KSB+ot&Z9LPi7wbsrVH1zPgICQuR4SwC^!k(Cb)DF-G! z1Rzj>%%Pm!r5pfYpcEkB^cBGLK_3WC-}GI8FL_MwGzil14_KW}R9Ie{Rb5AL0iIzX zX}E{mAm4DDo=Bja5CkCr2!Uvf5+GPX*-ZaY#O0v*jUe7QoO^fx7l>Pe0D=YZ4Cf65 z2CB!GY@t%9TmlwIV{oBBD1ZXkK$4iDT|FL<8IFFqo&t_x;DMpIkpKXYfE{X~1?-^( z;Ni^Ojm-&0l0b}P?O12=OJb1Q@Wo*V0)h}Yp?gFh9&`jAOyb>GA_CG_hHw{@HA>6@ zfFB?qAV@*tk=S?qpBAuO z3K$@bq}Vw9AsVq=8^ByU8K61w4wShB10dikqM;4~BO7WT9425X4n!!1;_?B&77WBV z5=0aLKn%FYDk>ijir4fs(5(5~tqlK!8r}^UA|K0%VDnL8M?hjF-pvRe5ARitJicK^ zFhD4NT1TKDJMs=R62;-^T`Ky)Ju+e(F5S&cBOfB8=2c`a-c2)xVJ?zbN6ex?00KC^ zO$&x&O7aN+AOIs`V?f1Bf~=xU!r(Ug$u(}{-N;-NECC7zp1BPK9s|b_7{c;oODf08nN1 zEg-0&09pRi1>_xDq~oKZ+!z?1UMwOH5(Ho3q_~}9D&is@0YDYp4HtC8FsfL_Wd;C1 zKo!g)%#j2FR6t55;9Sle%-#Q^72s85f*KsmoqKF%r+wsKQcGq6;y>MDNBlqn2BuI3 zBOvk|aCsj>5QPgN(s<32J>ufsE#PN{BXRyC2}0ipUZwI4q&pd$bdV%?{9#H0!X1QW zFhW3P)&^1{W=FteNBDtveqtNMoCU-{I{HBspdO(R=G|nbYl7mrUFF?i=MTtSD7FE1 zzNbav*9 zqqw=|Ns=a6Z0AVQN^c1uZZZYJQDp{Np?mzJab_g~Y9#=mphGs7Lt&p=dgEdaL}I2K zY5M1EirZ$&=0Kz)Y0m#9=s@H^G$sN@0TeWVH;UU9^k@{MWDJNK9V*}wv|~r~C<1ON z8@A#=L_s;GpAN;YAe{4no-4q9Uq!1wBB3hy)`r~o-qZ5AS42{W$xXzy#ASj07j>h3pBA`q% zsPcWyrcfRUGH8EJC^#w~U$RC8FllY-9R$$oKUpg~ zu3}}5RXg3}K}KmDvLgVjK?wjrhW=BZ?rM=DV2?InNxG%>aRJKt!MTye7Tl_cJ}4lZ z<8W3%b=r@JdThLQLnp)Z0Oc1EU2LIq*EB7>?b@~~67+UC|NA@HCV zR4AfbD(fUV;Rs@)jJ9eoEt`~}i@x!M%Gsx}o+eUG;&)i&wi;d^0Dys>qr^_0NW?&~ zMx4LmDrrJ!0%oj#MntrZD0fb)m?GUEA>U>SjC(A>d&*`3%<0_}08(a%O#gN)BGAI_TYOrZ(EFx7v>mA|JDP-?;dT*NKIvae-f^V!O^=)&k_sBHtQl z(BMENEfqijP~~>s!8%GLDt5#NUTmErUj;PdRFY~#l_LVe+}!zrBT^`7R%rsBajI1Qetq1hj2rCSI2&?{*4I zZHWICnzH70CT0N~seAbDDh|>UyT^n!CLmmZ3!e_mk)<{M;c}i%SmErWWiIA+LUm|;(DC2;nEx^Ms8h@Zfch9e*wz%9o#D zu30G+ZVv4X=P98^RrVNE0AU&r0|DaisZ_irT1m2JA2&>?QB%#$M$0WC6;Of2{u<={ zh>nRiutb~5K--KE3as%o-xwDh;~M)!HR7>wu-r#}`OEo1rb#EzI1w@6%n_odmTwgz z8SzPEy$=`_%9c=JUCkIY>M?vV4{TJ~MuD2)*~rd07wW*6CgB;oQ5Zd{p%8rL1$cnR zDF=#5lrxppPFa(9Tx^_yc<4r_JYnjjRWw8;)_ z(tk9o4EYq;p3&l^TFo_J&>U5Ti8NDMQlqUmw#dnKJ+X z;**t7CaoTMn@+xE(r!LB29tDQKhUFyoGfuR#J(X`L5b+#ABD3X|Hu;`VOp!S4sBR@ z12*}`g$g}+t;l@@!y(mWSW6|f+vsqPL@lAAdQVte4U)i(fa%X5aRKa>TQoI`)@;yg zCzlVwckA%alaKe9dmbGfVq%kD&*=+~#Y%%R zQXPei;h55aAGV=C+MjTdt9Z=on1@N}QE9zYm3$q+9aj7GQ3HL{DEB5-*-e=#dyOcb z!f;qxY+C&NPQ|5{eO(FyXZxvtjH=v4;|04#jS!ig3p6Ra*y#VCsMC)BHQlf&SzdJC z1l0d(_xM{}Op&-O{JJKGU|-8I zY73Wd)!T%Hh*6oPg;Rmp_XIf)nxGQz-5B{y(UpCkTGV&(4*6y0~xW05r6FatMmt_PFLDXyrC(^7b!-f zjGP6<%&(*sXDH1WCMwgXeI&6pD#4N7AMkeh+QeD#U5)?U->2I7^8LsIehhZZ23`1D ztVxYw6XE1lme+^W43}oFU+PP_uX$bkFiDrVU%k+XRhVl!0T(Ih(uo}!dJV9W{5}M= zIb3Alr4N5cwH~v*RK8_>zE9S?JP5Xfo!iV`h^SOl12tmrhVLI+Y;E`1EYy7;J`7n_ z_gGJ%XceINHh=kCs+?Fe=Sw&YkLomtu%*J1E?H8&D02U*PpCpgJnb43BGH~XBj&^@QlrAO zU)Lrr>+0&x*Kjw;W8hiRvUx zmu$kfU|Fkn%vLeO=YwyK9KJg@_3E|}e@>j6^4G1GZ8oOc*7az=DIs13>YiiAof#&v#BeXHlr{*j!qkk zt+xm)iZ}GMQYt$EvpY&O4NJSrq@+58(WL*_$g=Aq4Vx>9rm|eb4oDOKV$m+vbQCGX zhYk!6#HCF82{wl$JklT+2@Fp$pHefcqI1B9SMW(o?CZG$*TZ&8IHZP_ZAS zOo}oFUFvSLDzlSnMbo^b5YL(9ldv_3%&JqUwUk^?N+=5j^C1YmTN5{r;M}vL2enJ9 z#;qt6%qiyr{i#W*GD=TR#LyH^##H;dOsg-0^NrL-Kjbt<`wS)V#aY2TD^`p)94t;p zLyC$zssxk|xBzwI=q#YN^wH5SWmON+LPP73E2h3v@5#Fy99A=!()_DfaSgn1(?e_Y z)2TwaoAcSb>aFfP$r5~ZF+hEb7EJ$NmGw%r{6upJFEQ;!5@F<|p*-lLo zRpkD{R#VD0ewU=3z}w6-U*m&qR{PGu{kJ5JQUazxz_k@jgG>TyN-&bJkN>uu)>I;LPsj z>Z>75maglAL!8sn6?5i~Xunh=)LDR~;A?6YJY$8eM>O#tQA%W$xv4EWom)+;>~|g6c#B)wg3m?R7O?6yEGnxqlZtxP znZj|8K_EgH-dGsC2li)c=fYkIakRneT*zZ(qR(h@w3zH|twQs$(h(_=nh3FHb>DLx z${==}*Z__)7tx@p0B9@;S*wDQ1IeOV6`&P;{kP@VV9&^@hCSP%A+fItsmvqDs{?>Q4x))F%nYwyw>MmAD#KMxtXRsf@C6LQ$G2 z&sHmbiHT3EJS7VCD6*@e&`%e{*H>IRek54R4pU48m$T8`Rs;X2-F1WAi=nsJQ zF`x;*s2?_>s%NGIqIa?+F^y>p#+-`7n-(C8K;WHSsV7Aro9$A z4r>bZk#UsOJq+!rGE+JYTEeIt3oZz8MFf|PuH_?3Mzo~;X~66;EKZ zxYo&3BuHZv0f+xONPoqwity5@SjIy`4r&vrmlU81eZx6mG399-RZfik2;0EsP=8-fyq>_f)?y~=`>)9a!n<|LPj(sMVFPZNKmSAXph zO-S@gQSs%tM4eG(K9OWo(&jGUU9Yb{Z4v}+^TjwS?m?sb+gkS%S8ataL=d&1s&-=8 z&;Uq8;_Qxpw!*_>8VyMZx{);FIj@C|m6Nxf%$Tk^Hsp*9xVZ_84EdHN;vRQ);qz7D zTyv%4Cbhb-#0uuX($AK$3$f#ZBVau##0)0#N5`9`kFXTH#hS2NoiobgWQ)Ph`sY7- zR37hwHYWemMb)bMDGialGs|(Rk44=nCo6Acnbq=>F*=3JX|!e;pkXt5M_JMz`>4C6 zWhF=Ra?1q~Td(6S_);&S`d6T>P+xggUf9-AMCuQQH@k#L>-H7fDBRQ0unma)b_>R)epyjbSXU-X7Ge1nj(>ffYKWLSNpGF?l;5->cWEkC z4&KR=X_vZp%1NGc^2-x(?BjnI5@zA9avniuX-WfN4ThFZwr_Uo`=B5SQ5ZnT0bp)` zj>Oy_y03UGerZ)5+IW|ab007JBVhilRjG2q%$F8Y9>Nfv{ z-g%WRAa$NU7D5cMfCH6d1t=5%R*eq)XHd&z06>5NRH(8O?=}eH}}vVZ0Nlg z#R}Y6B>=1d0j4bB@Sev50ECe3l1YgI06+j!JYRC7w-wuK*aAcF?P(J>9s*VP0RS4G zb8U;7@{oU*8I?)7w|fWP@a#{+4|C4uLtAR^L*#F%97lPxOyZ=+whds;6hHLd3V!fI z0~pc-KbYL{E5|k#hwk6(SDD-HuX6AQfA6_#xiMk73e?<5qj z{Qw{UGA{72EdfW%kEm`!TqrcwiSW|HmLhQMGDeGT2<~v{C;&kSF5m(zVEX@JCNLT# zzs3U)mZ0$f0sRio5Bg8b6b}Fxz;v{1^{UL|AfWdy0|LyA0z9t@Y|uaUjP?o;{X8!U zVvy28?dFUE0(?;Z7y#?Etd9JG1_3|-wX6t#jL4jjA*PV^5TFSb!U)>{0QQc^>>v!8 zBK5X~DF6Wu009RRkSTu9^#A}5iR=rB38cF$s?6>hpcBIl0&H#z6;BOqZUFXh3z-4|8&KkGZs!bd?z+$m zmmn3laOZ5U55w(fif+<~Z1mKSArJrxlTbNCk;q!F3QE8M0D$}?ZukFk@BPFO8Rswx zCC=Q|VCQ--<3dP57RzPgM37|4jZ$hY>}!wuOW6SH{Z3$AXz7tc%D<>!6*&(8jPNpk zp!GtLNX{=ClW^_|AQ1U*_C(+cd9WU(>sR8Hua{%uN7WZwu^>DKanhn4mZOwv@G=ik10xz4=6 zAoCnRBwsNv4-W^K@&zDNx`J#P241Fft#KDJnqe{?Rafk10x^ z)7r*!_{)BF#@|lqPl#(#!iEirYzU-a2pD1$0pJ97lOb*s@c7^=&y3neZPQLDIEifU z4wFTM@iIb=IS2AG@bZiF@V`)KCx31!`mX{s5)gK=4`M?(W6QY+b#3$e{S@u2k*(4I=^ zh{&lcKx$@u%9@r>)NZpkv5hwwLJbP=(E{$78Y&4*&OHCIt^WXTBv7(H?M^03GBT;a z=1^2XAP@$d5&)LKG6B;G2U0%K4CmpN+M-c+V&nw5W{5+3R zZ$vD^P$Xdy0{rSr+YkK$u?s(7=-ST@2A~Pr!1I&|4Y`m#u_L99vjo8MKm`;_$<+Ul z000gWGR=(|mn{@$@GTiaB=M8>;6O~P%nwA;G^rD>C`hyT24j7;>DPZmx7Vyw-NiQSg z5HIuca6zk#VL=7|qN%LBs0pSuKW*^o5HA9m!cfC)=zbOeN;TZN^Y9vvOB5FVc+YIl z7FIq$6d8a4S+Dy512qe#5fM!$*oWGFgiim?6ab_kWEsL8%b)|~Gzo-2m=>xwB+es8 z!mbAF7aP~q{?9nEtqU8)aj~sTvTvpg?I#du*7j>q4DXQ;x6{HcDdP{DkVN=~H0ww= z`xFe~CQ4gQ=f)O^KK!SLghB}DG-N@xPK}7AqKt@ObLy52*SJaJIIGDF4gHYG_JmN& z~sGshe%qlO%-_Fs8!om#xcSRpq6WE1}xR;$w5iW zO`Mi_mj`zDtbgUy1^V`K2nopwt$-XCu>R)p_~v`LgSeE<#)=V#T=DT##M{W%Q*UH} zQgnKQN;^8MqJ;Cg2JQC}M_!P4l;%i@r|L9vHNc*TOsvrgTh>!^qGQ2v+)Ob=+c!un4TO#A9IZ zkQ+zRQm2V;2B3yh+@i_AU}v08mS{xeWH3?40B5k~&5$UJyJ+luyl#Zx*rFz=#sv8| zk#+BetEa3&d^HXy_H2C_`PKhojA`b@ahD~LFAk3mh+B;9I%Fkd!fbg#r~ohU z=HEDi3P=D>(+@G!m>?*Ur<7LZXfowmR|@#{GBEmhZn>2A*|9L`jVepbKB{JjEgkVB zoAAuH%(-GSZ7MTkr>w6%@Xnr$inVeO!eBGP(9DB&%-1ONz{la_i`=6VU2ElUC(M`gC1$|XbW%)vbAWOk6He!$8Oht$>cfa1a;VVM|7x_ZJ4Yw z(UtK_C^2TY&IFvZMv4Cd48}ZGhdLN#9|t;+>u`^ZOzb&PdcZaTfLM!K;)eNeLL+xN z2?DLa3YKwuvkwoOh{#ajTN{=Y8Da^(lgLtWOuMiLm2N7PXgKRfQ*>w?gQ+y~)^LWmwyiX)KBoJUnuU;nrtOrbdjYLZk)5 zR>&q-q@}+oj&c7P!UIN}AwU}+HBtcrlq9e6sA)hruurJbr)F6(Qn^q_F9QMQZXs;W zAcL@klXnZp6ejP_2yM3SyNkj{FW2+pX0t={GH#Y3$TG>CusAzKZH8-%_jh4%55^z2yAr2Y+B7g-oor$>pz!)KP^^yP9TpHH*EesG=M_0voL^ zC8GZ!taS`ZwAO-lPC7F*sk=-|z+MVFXx(IOCg>|-#4SMaadS3nn2}b?Z6m+RW-mwY za%&|nZsOG{{op$#xb+I)vk$(;rPtP=Ig(6kjSU=sFKYFV8+nFfh5QY|2S0u-sx8Dg z>_FqY@Cmm}iYWpd%Qzj;%UMH z7=8}7;7>h|9tl+b(6>k0Am1H99R&J#e)DOR_sV|Vn-1VOPb_ZF=GG8 zgK$Wev{Q_TBMm#Cd-i=8_Hbia-ha47X9432*4tiFeG0YUQKuiDY6QTb z0)zrKEkdwZ;i!e9R-RHhigM$~ktP>L#Aq_ET&XUl!i>vSuG@nJBi03pQz2ZI8fV%> zIrk+)zHtfDg`0SL5i71Msm+NQ(4 zuvLTC=~mHw2ZCjlp8Rw^a)I#{}mC#y>;WuA<@R|R&8fqWS z_m_Y?VK3CVNUVY^pRgp_9P`vuhsRJO(84+O^k2J>B#bZmI$1CQw4vc@|fxWf`ZFc%j%?VkOeHP)MllWTC2jYPRT} zfxbtRlhn0VA8)w1*AsmEp-JaXdh$skfn+iXlx@MZ`j(W5DiY2ik41$b2fPE}iKnYW^A8W*Dah!$q7ZQBr^`1<Qw zcS|N2Ws%l|d1#-b`L2_6?;MzH=uQhEmnF6=IzW36aO5J6Pkv;Q<14G^- zplqr%V3Xh5(6he(xs7HD6U~{>Q?3hvOf?Sj&9F3w8w;u`NtqE`1ObFNA1R7vsv}!V z6b3Xc>8fF11K0_JltMU_jyJW7(&p$lLYft9g$v0{t)lcBH?;j3az-GD4yqbC-h&r`gFJ) zp6gQyK_k})_#LJBByYfi+ssa5wl@Jtdv_$@el%uC0cLMK_i3YO)`%e6x3|8vG%|>}9HrVMMxXT6W|064oGOzyIKwQl zaZLhT-(oh&M5^#?c0A{4-WbV2Zqc1A%cKnZ=QMNbQ)4yL$p2_2#nhFmfA&1pj6k%R zbg3;vRNLE~5c14LK@3U~s}z`i#IkiI%3!wo62AW)C7u-7?^BcMp-4Yynjq!vd9;EthqTSD=uVF*;t8|$#`LIee{*VJ)WRhtmn~_A z)Zrhc%%d6EaBDK;YflhMWFjQ4$VdNt*2+w%s&7#&h;clrJS%t}EH1An9>iJVg0iNa zf;BcI9o40vC`he&l7q+sCRm^Pl;A-!Gt30+fUd{T7-hz8uJc=<>=qhdrVmvwWoHvb zI>Vy<=Bs2fY*X*}xW}najcGMv+(s0iGRB0jXxrPvX3Lqohz+l?d{bq3+Zzv7v_7$Y zA#j;9*WyMBfyUL;>1d0<-};7C<3yWvO}YP;QSB`+t-76W)`ZldG0bEk9Nd(6k|=;( zNoEjRVU_Zk5)#(Up}(}2MHgwz+T@UyC#)m6mi0D>R^=_U(q$YQ$X`PNH82#z+N1Ra7u}8SLSn4eT|l&CqE5vq1YzPI8l?q|v5FFa35HpXEbj z%#im_+m5uqtzDn2;JQ~I!woP8#t*_E6P^8W3w6vgaWtRlME`nAy|qNxl!Z zm?^0BGO|2sqoQf;3a_iovAit;)?6#RN{tCOjUX{;;tI4N78cKcf0HIlW2&?>{cY6k z5;h6lCP`tdZZ6>&RH-=1qCfqsCcBn1@5ECc{T1+wZ3`p5icMncJ*?MXjTkpA)s?EK z+f)qQ^0G@QK*7SaKs6RO5G_q$A0upIGjS+vSgT5)w~cdjh+3PXB1n}HAHPH zZ@KO3k}0S~IWPDl2?o@l2b=$7J8E}QUE{=Xd)rL*dl#83PC=)l%+JPpA+lfU^ob>^ zajc7;y)J?*#X0a`6LqK`J!3xK((+ebajdB`2tva8D$7?Qx0d>wV-az2J=pq{tP8o5 z^dP*ojNa?d-cF(bUNYXTwbJcW>tJ&c3O~?(Go%pC(5bIyd9V|yLr8n!c;78gQv8`? zTOK(z@l|E|rK_HMw8shcm#=w5`G_v5^I7|}E;pRu>?Kp?Z{uNliy1 z(V-^!wj*i-6_T@okVbjx)+Pl|79BQvCz52GvL_=*H%-@d*uz*DrAOw~6;#j!O^_pw z#ARr+OP-{InB!MXkys_-d`lES3sPhuf?f*qd|qOGumgNN6lGxqfsm7UXrfH_qayKg z6HzcPGKed6vld*DBT!L>cJ^uG2LM079Z&&MNLY8TB38QL8Bu^E?(r^7Kor>(6{{wH zPvL`gr#;-VdWN%24m1-(CxhA1f7nrc-_|cNQg=tu2LQl_lQU%_RBLgxL&^7dTm=^& zNN2;BXu1`55aR!PF2*H!7Eotr5hcI?VDS}^h#e9L0HP9#y0|@s7)G5m5fy*{oydu0 zC~w;_FYq9P6nt0(MzI_+7>MnGEy#6;leUbtW)#=~ z00=OKNb!sIa$f+Tjgpv&H?agaF^)9lXvs#7sG%@pVr-KYcjE+Vjin;#B}-IwCwHS} zTNQqKbQC2pjmh(iD*_09&;%C273(;HWhf+NI2?MjIMS$#ZTOKLI4^>NBUdqv$~X}y zi4`%BgcyV?75Nl0>5jPKdH&ZN_Q;OVk&FJf22fBIIY}))2}T0>k@FIgy*Pr!(m;Sj zI9Br+Iuid`@D_ze1|vA)6fns?rPVjlGb_?}GKDvP!I+U3F)sx$07-Ea6yT1WXc0m% zhq)*f5O9lL@f1Uui&Zc$41kl7I9W0Y05mz20004od5<#4hlA-95Cs5%sFW5#nM-vt z9kB(?;S>n4msXIA4gdgs00EoX2HRmB3}6Rdv5~pBktVq!Oqq*-u$MFm6&|UJN5KU` zRUk2NnHDjZBe;%Hu$QdpfWbyRjWHz-q)3+~92VgL(K#HTRx;Q`SPcebpLREy5r}Pg zlut336@Zg(If5Edkp-}hDA|b@P@KlYM}63rTydIHk((npl*L(wz9@(Qum<>e07XE$ zzZ{3zC0IuPCARsKo#+Sc$(LL)2Do_>M;QeLzz|FDm{7?}4C)mz;EP=LjsSoM=n0H( z$CNkmnhd%IC;*pc=yWQFU9>`F|21Exp-LhUozXcJ3SfuzS)Yj1d&&1%_0*%IXB1p9 zHWZ2}+A$>g=mAFYo%tAvAsUG!au97`p}7eNHsFy)GNl}O6Mn#lO@N#0_>7UbnOqZ; zBWNU4`V=vMlS2v>4G@V{FqA#3aj2Ru!xda7p zBSPZ;2Zk8~mtv}TG$uEKqkf>MNkJ5CI2I^b0e~MTp2n9kaD58lY8rg?cK$8G7 zh(!?t^EoeVFrR$Lqzo{ZY)XeWnv@^`l16$Hc1oWjDvePvq07>k0I&pAFqnNv0i)Th zcQPi$W^EicIPPSs7GWfonwuv26hZJ51z-W;w}9^EPELxWM?tF^YMFj|6ESd%ugRx5 zsx;)Oi$mF}N3od@yPF)j1VXTyN`{QLIESZt6iaZc0KgOzLInj-0PPxM@7RV6Fo%J- zm)Po%(12Z8F_a1#iVzT-eEFm~Ij1uC5nJ$<^D-}4+Nyxiwg6xO zOzITv>J$vRu1Q-Fq-vRC+JYq|0&v?L^~kf8d9y)#qW7w>xoM#q`d2scpGR?vWvH*C z@+n9%DTo*&&bD{>G#`wMqk!P1R|^0XkhOT=x-ILn{%VGUDrfV^1x#uYV}Xo!`xL3_ zlUiCN_=>Kg5(O6v2ng`10Rk1cs~-_7gHWK0-`NI3X`58QrKh{CN#O@BYZY{NUhoM3 z8w(XSyP>`-gKxek6%fT#tH!QdA~qQwrY`>eNX|W zxdrQ(o<;E$n~9GaxVWZk6ctexZJ?}~8p0;ovtnDAS$cIy1c?SEad{IPYl`YtZBo^OjI+2}l7Up2fLP3u3FvZ>=m%$NjyTG!II2Vmm^ASvJZFQ6 z)RVpL+<|C8e=gWJ13&nJ$pxRGs=VcC zfu3t|bR;ntW)o%jp6kphlDADP<}g+0!dynw;^D$?7KALBe1oO`J)UNN%(q5}g;1Zw z90V+bj5;q59L=z+p9h@LR1IfoGFI9Gx`y?O^#>_81YYbRBrTPAPLqimQ5HXF88y^( zz_g9K)IMKhgwA4hROHlx$Q&BlE}zp(63J8@}T& z)+Wv2)en8Eu-mS_$h~iD5oRqGlA>S5qrVb)fsL6%$=SUkW+?ibhrfe>N(N9Ch+dip zb;@SUg!H-JYjX^vE=XM)BIY27xOPw$ISkD?8n(ZIb(t2g4u7O{zMB-dpoc$4V=f+aarLm12U>||~iM)t*U zTh(x4LVQdH-Hua^ZDUMf>>-rBJ1qrO{A)yj4MRC7LiD#J3;jUFRxju#Bq0PtELw0H z_b<@}Ynp=0Xw0*>dfE$|wG`dJtZmKBkx#R&XwMVa;rzl+VF4y8TQgFQ>C%OH!a?ql zSB}JU+h^4=sJ|R?gWJ-h7=z(fM{2Z#v|`d9{ex+$=Ml~QeL(|wMn*kZBS=kKYVCA6 zbM;%EvC>q~%3p03twkv@?;q)PDQ`2~n9Of|28i6M~ zOT^?qW-%6E0Z#E8XKEyxnza{w6K_nle*C~dC3DTqY}ROqOTe%)$ifx@y#T1jlJO>GfJDY!DIhADZQy^UJH250A2 zNQifDq%KDc1bRRSBqE)AbfZ^rBZ4G!05{ne~ILxFx{!Ng=oT97#9dX~hE6v;^*w z!9gVRL>k3HNH$y4RaqP)Tx6FX+P6WL(n7&}kGccvgyxL0Zb_Y^eAUHlY~64r_;OTW zr!+tV(0Q^FeF2)oWdLbB57%g9HC_Fka`3|t1iEfn51b=wlJNV&MrJi9Rw%6Zxob1) zJ=R?36N8_NXh1zxWCl&F)lOv$*)tFS_>T6905L}3KoKI~02n9`K*54=1j2Ec5aB=o z6&XTI_%LF|F%vsBTzE002#X{;hJ+&`BNYG_gaDvH17=Jb7D}q@sPJS)j{+HHgjkRx z&yOSxE<{+dW6Goz0RU2=6O%?{uMx9k&Vng21#a2+ zQpa;ClY}TT0qIifBH|LWPrmnf8cDm*+PhAvjsEkitl>bLZ@mjD8lsOrX!z*2{NNkv zzr9L)kgLZcI|(%PoSKTEiiD7fE3cj!i@A!R3d=O>#s~_q!U$BXq5K5vPa%^~+H5WH z$OEv!{SXtbEbNw4Ev^hr%rGjR^kR`d(f$f?zcPtLugdyF(ksBqUi-_lqP_!fH}S@+ z3rMB_;S)n8Ba#!v`L+xYG`T#A?7xGOT5qhWbaQHeEp!xXtLU79Vu1%6=SGt-G#^v&}=(8VOL4 zdM(tjlZIq$K1Ol9ufP*gBhx(1EL;`Zs9=oFC;$o=0D>Ev!{e(B0a&BCv7SxJ*8?x} zFQUuHTWuq~GO8>z(@ZqzF*xJh%ULQlm8hXM!-JMqZxudBw}2%Q@=gWeDymRf>zix1 zxd!H|HHi;vvAgx^()6suf{K_aU60BbLAtu@cCgV-^9#zhoTE; zU!A=9%_mz6v(+$Z?Yb$7!`sYsVgX!ybg`^tn61HQPO`N%vCMC2Q*&=iR&DP@R?7cO zYsX!ku0ykPub_+-b^3YPPWPtD~z>|$VFBAlHUlO z#k;SR5HA-0S(IJ2Co{qr(1B0Np7wZSv%#3Jg0*6cv@rIWFa4!!II|d12uD9oS%_6_ zl31Vu6UEE5&u|cPAY)P%LA1GNZVL<25Zm|{>ceJ#qEWwG2?P-c;-Xbzp9Vm(@%U^1kc9TV2q+x6k5|J3S zD&Y{2JXyP-2p##P1m2A@Oz{j#oTfZeHW7eO^WiN^0<<7H43(h6!JK}IZ-8srHX~(-$9P^-niJ%s{BDlK(6t#l(!tc3x8>f8xod zo(!*Ff0E0fb|{`;B5;CJ1K$io+9Zi8aizKZnj4iUG0dI z>ItD_am;WbR9iP?7r}vg1X-4;X296v8ycc7R1~ue!78dkwd}BgW18NkTBb~kbyZ)F zIbJ+F2*@4UMW}d^omyXrubj#*UZm-&Q*n0HfJU*c6%yGH2N=eJQEil~gv=Bx$Gf|! z=Y1l|=zXx4EtQ?@Zi%heJaMQZfa>Wy##Cjd0K}8Yk`0WC0;Ex?r$wRtu#o{$EKXPd zxWAM32CAe{-9IV2lmH$^kDx58KwnC@%X*fVrQDUQhIyBO#qEAi9OE?q%A$)J&tHkc z<+I!JW zbOgH#z&L}j{?=@X1S+gwvFWugb#zvla$sbZ$0WaTM_I*8Q#|!|ozg}%um>C!8^O9% z%O*=mrDdzKq*}&jU6;OxRM-j;kQjnp{s^U3)OvQ5b82dh zTPddjH4}`olP{8|>Rw_^*2d@+dFY|5Jd6BCx5 zkE<+rh-YCf8@a*_1+eq1TE)UT4wYr4>QVwpB{#1xxV8)=nnXujp@mh3X=AcJkL5J` z?rfo5Bo9?)^~$YHC!j3MIWdZ70eE&J>qwIy^kx1jtsj z<|3=v-X{arjaa%D!o&@_bUL$a@U0@3sgljX#<*jQ`;O*5D{G1|=vG_PcEa}7?cfdc zL9v=(o=mC6D>;aWSB>tK>ni98Hjy~uEX+G6&Z;95&L|Hq&8j|3!N{)vlSGR-5;qrh z<@}isuXAz@Y(0slU(2jb^V_rg0=MZ@F0w$av7z9Wm$+Vo9DF8b-+^xG>{_B{pkMy^ z%e9p=685ciD}5e&$Qj!n1L@kggc$?(8uiI0ao#!mtZ#qFYfni~=F`*F)56VY{r%Nd zRg%)`hOL@FBkz2R>E`QwfiIYVCMc3Q^)D0cHroO>udrZQ^ zM)()eyXDvmH`W%0%tf^>c8>AS_(7tOD(z^tlpRb^pS2zPixbOYnIzebAgQ=+0WZkF z4yPNY(-5(CinQ25xW4fvI>8yf;|?(KBD>?F)44eTWF4==H+gcPWUID41GuJBCaK$$ z0eP$uvpvyBwxFxCGl8FodMHQ}m>r|9h|;M@%DI`No=d7R!#XTh>AAW)m5HIaR%*gV zO0C@sm86@J)JvkjlRE~Br4F)~meV3O^Av*t8;!_vLzd^HY=gA!8vMbmGlsoLOiF>GZ7cG7)9Y9 z(EuQI!qkFf!i)3kKsu)qr^ffAWg z>J}zRMDVi5Q*z2}JVdctFF(9QaC*p#i_5bbsT=e+UD2y^8$F#QOQIY<5QE1_B090` zFOvGmO%pe}o3G{?nFIO`^3lWJGbzOT!&J+gOsc_eggd}oD1nkSC$h|GT*qobNs>{> z$qORZ+OX6tKh~5f;_=L}xf)m_OMX(#KWQIm$wv2svzPNh%gejTuqvQ>IJE<(^4dkr zORLYqvsGdpoOHm_$;>M1F{LS^d8@bP6B{P~VM@>fM1-WXz*D;HSuMenK*U)OlN=|r zSc{odOqVG`gvlk6RJP32w)iZK%&?=1^c=IGnV94&xNF2o;>lQy729e#Dg!{iY&x%0 z!U!zEFd-nQ#I5-&!6BP9>Y}V@^3Yt&CXi~B3Je)3Q!;`8~fnCarupjsn5^>aDGU(B_N~nV^F& z_=-`QPUK`jiqR!zle#d(N!L8j6~WG>5HF|8ufk&@MKl;b8#m%{y5wv^73C(QY9{Vv zvmWi3ze+oY1k~&!i}^fKyTYpP`ZL)7GBy6>sgg0u_QS)mqaq7rrs_bw3_F&vxhl2^ zPVAe`S23+9>BLmAHpYw(OL|0Z>z z(rdPj(Nf%$MZ%%Ac_gIvh`7VL6iVtL?*ITE0Dv$602W|@YW<^DD-=rQNL0w)3$Oh=71lJlr>;=YZHy3=C5KV&m`?bcn5H$-ik_NTCz~i}R#E%svn;sfM%iADoVUe4Iv`o7k zG($kHutm~o49HB`L;O}Ae95N&tVwi*(2yh<21!gh4Z?E*pnFOlF!UmzDzdbyB{Ef_ zL$o*ju|h!gEekk-sI7>orP>pS4ypwJ7qD2a=zt43pSml%-Ag)J8_cz&x8`aLBqhsi z#ZMX=og`Wst?SiG1J!VG*-W&SoV_c)FwXFLM`1}fuX;r_(yG)%sAl<&O$(4e?bC&M z56Yt|#9GOJB(3jSAF1V#+SuCKu#F)AfF#&lY()v+N~()HA7(|9F#Ww07Pi=H9bamt!pF13>9vvY(_iKQ zgsgiC<~WIljgFB20{}nhncGd8ViJodcI2*Tp{nQ+K&Swx=$VR;f{G}B61d$buAv2X z%=hDNv7rk9^~kVf{x&Vuz-Lu zsDLMi0P6S)1g@N^fQog&5eTr9Ko*PJHRN+qG>YVp{zEux+?ERyN#ZbCn*`o?JH~3- z*wl?oUp*maR3K?H$}t+#j*!?q_+{G&=3)Md6S!9>YZ^$g0UH1SKjMl4NM^1G003|R z07#c8xZNfH*pV%0i2@+vr=aPsC}OT)WFEd06aWC2Cg6&QjVM6rpbna(=n+iXX#-Xc z@Kf2LISEKMK!yphKpNUKvsv1iDD?wAj;!k9QAeyy;QQ1@kZz^ute@XKT{d1VF=WU_ z1ppz)gSwt$U>*ytomP&{<3$A6)Bpe~xDoGh3X>L#qaK>E@E%_V4M33Tu@GSQIBKkD z7Xa`c#C8{p6`?0?k!v1{o|X>F0f0-cqo>Z}$!aXDxFSn#Wo zAFJ2YC^eu>uRl{aF5(m=003bg3yr?oXyxnOtIbU;Y^MP2MCKVBL5`mRmjOWRG0^65 z*=Di-fZvm#fQs0HOWyA3=8DG#7wFK=%F)iu7K_XR3Ti70 zDk0<=IF?*f%KA3_Dm9UrHyZ|zpE{+)?7=asRWg;MlbKqp1z#6#;pA`uz2@r+AV1aI z;XVd{=Z<2m_==(SX3U<7EjVgF7;5Np2=I3?_ z#D-nJ8zKjLCbJe5DrLIoTVh}#6b_6|4&x5ys4atv zu;cN?jRo!{CW7&MUJm!Jh#x58oo0?qPGmlDZ*Ja@%IV!Zi1Q#f3-NJc3;`ECr*j+s z(GhOm?jC{8?fsU*VZiQ`F9x+XI0_z6M3(LqKcYjNjs&3P@uIUE8+PRwiY~qR10mtK zaK5I45IBMuZuOCcOxmdk6L;>w1^`;8Vv7ZGPfnUB&g66}0Q1J(vG4=TZt2${i6ZXo z^5Kq!2zF2|bo#c#Hmjg`*^aI|)xM4OcXhpM*-Bk3LJm!G$wWZZ$<^d?T|;F_0B52n z3Z3wq>*D?j@um07J%M!L=mH4Oxuc=;7K(Pz56*H{I%f_mKwz6-4vQ}z zsHoT=M!BTUP%9V!Y`*brb&BuCl8H~3_P}grLIHM$T_iqPj9c#0p|L#NnJR1F`+z}X z6Ghd+dD7yP)WR=r*&Nk;q>kd8%X<~6A|mz?f*z1}u8?uf1%MzB-OzoBpe$n)(sM)> zABvA^t>1i8^Vo*}TZ`p5N`e`wF0;@-s)B@cgy`o^=d-wChq21^MrwD`d<%%+g{sOQ78rZfvWSeyVLt2A-R@?`B zNXb)X(lcF5GEJi~V+9*#f)1hipNc6~3920G?$uhJFpP0exnH>^Bq7%3S5w+pfB-ON zpa>BI4~_v)5CFr14&fYRh_E0+f(7A-fM~JdLx}@DR*abN<3)@YGin?|F{DEg5(Nq* z=@I~(ln84sqzOkM%$gc~ViW>^4iAP#8M0ughM|R+30qLUAQLjI(y(ntvlCI~YUzvLo4I1bWf0Lck{--;MlOapSpZ)jqWB7$?l9HW>o{b^Nk0XoMs#7!9#E z$xx$!FlHU>w>I&Jd_Uq$E}A$@nifrF#QHV3NRGwfJA*(f2Yu=577<4GTmEJ*-xhEW2>;bluU(11) z6M&^1Wkwp|HIQCOD_tcRXp9jiVSbNwCQ@-FS_s%`z}Z)tajd!597r%xMxuMbS#+Ig zSPiEVUdip$A9r{;#8Hw#4mO#3%_YegP&__0-Gn{a#9ELaqP9|vi@EiqQ%=HlTziet zMqo@kM)(tz&WT83Rp=eH#!*ywbY+ha0;gon`;@D zqL4oEG-IOCrS%<8!KJBHNui}gshlx3wi$8|K6sX%kOt?`s2}DT8JFz66(eo40!LM+ zB|gX1TeO+v7j#|u2N`n(BKv8nm;&H|P)#*O1t45K=pSCyHFVc>*y6aHY+eS|r?t^( zyP%wF$*Qb{)_FJRTHy}ZReu{DrebONk@%rxX&yJ%gS4L3&|%amE99l}p>*I&3yF48 z!do$ns=Xy@l-HOCLWE1Bn2oGyicpd4hL)s7}4t$!ZtsCFmb#TIg9 zt(+B!u#r{oerEafo^hnUE3|CyW~Lg7qa6t-O-7&p*W;5*PSxmrn+Y52fG*Z{=+Rqq z2bP1Er8JjnN*<~xVH{nF<9QduY%X|k5}dAjE9TVT&_$|kE}0RgIH;?l@!KMc_(d#Y z$uj|I)_-LN%@~u4UuG1k4%<5(NRu(C_j!}SC^6D9<>wwstOic9)kl{pxu< z6Mf#gvhBL8R=Q$LD5|Mx&AUm7xu!K~P^u1EynMoC*-^-rOnihTl|6KnljnXo$KGQ2 zGu{bf)^5ZPCp_o5Lc&cFe@GS#rrI%`e?GrTQVDeKmM?iRRKELcSFf~+tWQbH%9PY+ zs;E7wYvdu$k~}pR*epjg(HR)&!qv0pG*4;&0IFT{%yk-1?J7-zOPBPR6|Q6n zXDf5u36~QiB^l9tp$QXIB6qrdQ4U)W;@OH&XBpsa#d1diS4+OgoSAegj>9Vt)NH1w z36+gsDB4T%Ce@k=mC`SxTJxoVW|4s-zF3$Ek3f06=EMROXk4au`qHH z5?09A!Wk(AD@m?YUKNLzxJM!}QJEwE9QxpwFxAM;LydF^YhK2y4Em3ZbTk?1(zh(o zeUV|roZt)xcujR$lTkSI;C6gPIl5^@gA?nUy4*OJBtpkc+=LE2`FEdzJ?@|T`zF^u z=NTGxglYbiBWWPBrd%nqlMAd(PT*-f(0TETN~#%JUI#lE7AI{XgI@b$qtGE*(Vqo! z7Zn}DMhfMxf#hM5nM}x+mPT=tIb@(LQ)tm#WqL6Lqf@YX5s-SEUl6y+MqCa`kHvn;>EkA z_K&KWG2g-#2`TsSMYYHk>se2kphI72+=xRZ z6U{h9vS#g#6@ZsDNEav?1WNJTh@F|d|K{;a9%Hp zp!3$_qi&JQFjx^)U~H-XTNTgs!7dB2XS)P25pqyoH1?+M)(Rid^--&ZOI{&^3$!`e z@nQk?k6KSPJVeR|P|QoQQK8gOn~JVNp*pWEJ*YeY?q_Rq^P64pn8}K63OQs6&iQ6z zqIcRVSi)UxD4)11Kw4=lFPa~OytvLEtS@Dl#Z=;WDl#zw?Zsl zEXj1PseEv8rNM1lJXzhl0XVe2>*yF`4JO$DcxN50pJ(IqP==j$z*+w5eJZrAT|<|( zB!(srFI}`~-W9U!jaB=cYTG^Wc7dxD>5KwO?r`6?n%`-odQajj<$@O2BAOeJx>@S6 zj(KEwtV~1=of`809?z294lItQm}z&fM{LP?Ylt*l)qn~*PBI;07)iQerSba6orEz& zxR}^)uJn>kG@Fhsc2mg#Foek@FixfVYgRY>z}NHPnN5a8%CstL`rY93`qXkyGZW!F z^_x|UxXblt55p(3K{u^DiEs!-A$n>F( zoOSUN4QytYZvT#UaS4KclIQtgDP}pHC(41sA11X#MO2~?Cq;OIq=@= zm^N`3(whdPjy-16ImN&B~mf1m7m;N2f5h%Jwy!EyY+s6`cjm*b)s-BDG$grC2RtpZmxZ(Z$!*6c)!N zl>_}z2_jW^onDPx;8P9GwP={iDBq3Q67Tt3XH^?L)m3-6jEoS|b4V3<{L9MK8uzS+ z&jeBbx5WqBF^`HE4r1&M=ylPHr3c<|Q`bz+yv$vO9Z{?-l7ZbE{$L065mm~WUMTGh zbxa*XEgYt~#yE+WePN4Qkqp}G6^1|>WZWPjdZ8UPis%8LesH1*_RLPn%O&+ix2O#E z=$f<19G7eaD9szMEnsd1QiZtU?sUnGkqP-V8CHeX9f4wHt>6Y49?JO|r-Tb9RY}QI z-_&52?U+y?J=3?;7_Z@9nI#_a9aG_1o$aJm``KBA!5TZY2og=4x1EZ>Bu%e~og{|F ze~FFc(bHQUj6WUFQE1l$y-|ye&PfH3pYhdtFrXsV)5t7D;E|o@kP7nchl~|ZCpnt` zt~I1Q?Ozktivv1Q^a)6CUDTa*6mTs|**(V@)y|Eqlqi-YA#vL?mQ$F)82x!m&()yQ z*&sq?2Lz#;v7pEz${6*4UDRnC5@m)whTQit&V-?&k&O=z5oI1yS*5Ufgpu#}Y3Ojv^^4)7X)kul1d_ zeGX-c3L9cp@1ftuFrWgV4beo|y1iOETANNi)QveG?S!34WzSGuo+=5XRwQ5lEMcMQ zG>Wet$bqgBbJ z-`JjffzQo+7Rs&AD$&||E{4FI)n}PhcWtI?-pm>?XHk-!AbQtr17w zkY7&V;8`YFUX@l|A^Ff$B?bu(j@?G_;Oe}R7h#a8QP56m6x}hV{aIut0;156A0IN$ z9Ht}<^@~6-QFP#9>?j8Sbg8s(sFse%xRBVx$^4=!ni`3t-?l)*`m zM{21ZUK6XR6N_QRr`}1q_CdMsMFH%E6aWCqqAV9oOP)>RXTA-C#vk9fWk~(!ndRuV z%;l<(>W{6EF(uVrSZTjn$;er(Bc)bt3qU9sq#bjx5fXir7wP^gL!^w94GoS;9&H`k1PY#qYU*zs$i$73b@oI85v-A1ZBGIyLqvheZi=RQ zr*Ev{UZ4O31mVFs>+eZZves3`E?AVrmWs`vj81Dcso$909FpG4l8&tT8bwpQ?aCUj z@CNVPGKAbRgz)Na=3MIY!I#N$&X-1tKE_!20IgB9K@1qMQQ(GnTvl^P%k`xm0~oN6 z@|&C)Ub(Vx0f$Ek2d(W-q!ILAqp{$VAM3<4xP~%Y zgmNC=u22ljwBX4g)9|#=^0ZjNQM~L&jItG60G+I?6g%fZYymB6svpqul6)#LTMiZM z#04NQO*BUv8%6m}OCd9}0s8^C9YKmkK6ffb-EFq>~W?}Z=C@LF*6^%?~g$TUow>=MkdzyJUQ z$i-g7z(-qj`7*#5e`+E#gddcNOankVlZhPJWlyooR8Q*)Qngz|j)Zy~;5;(O3~PzD zOwL4!aiRpu@bm}+LPS5XL&NRLMsV;Zw5kLt0$z^foUh43bzJ1MA8av0h%f+@Z!xPc z0|bB?h7^Sbp*NOLiR^H4KycqsK2g!I{BE()0JO!&dH zxWr96aXM3h2!pFM4{|!ucK;0A!mcF$tMIEI5n$^~9nz(sU+tXCMxltMVqc7lTcmBd zf-5jzb`{gD@S-dpFmwUnf!xxyT?-do9M`Ls^QWrpQG9e%8+HYpZBy_rAf$5?$aKj9 z^--916AX5EkaSS*#ac7PI)`h4v;hJ%HUtQB6B}?e(=i{FbaQ?UcXb?_DdNmo_s z`BK(?_fh=ucmsd~0Kg9nH3PH(0I0a!CPYXBz#S+xOV{^N#6W+0wluT!10S+Z?*#;m zu^%9C37D*wn{bfpcX;%*m8%N>lq@E{i@Eo0@^Jes3! zO_t#iWNqRvri!(?4`BY5uo8p?He@x9!9eHkVqf-wPw`vNHAyEl7FRKzeC(o;B+#kK zQTMe`__xXOE|vHA9|J%)Z}35>89WOh>_z_c0&ok`Ho5^Mx9Xy4zzJA^}rP)#-=^Ng($b{ ztSU$F?k?~tw>aP@pysK?(3W06+oC+IrN5tG@$)6?D4Wj{u$81V&<3hhOG)#Fil{42T{a$c+@d`Nk zc6uy6P>w72dMEC(3Tl zSgDR3Vaem3p`v6AvK$;JfQmOu?&du1s<>%8a22<0xCXYRE0$Nw4_CI<2;;_1073%* z5c6KZWnTb(M;#!n!7=YOKmZUp(1t)ELIjQ(Gth!SDgr$WlsFKM2oVa$aN#0Q1OO^h z012!z5kLVH0@s9;xR9bmKLc3;K!|bT!i6eHN(|W%0EC(f;Z(ecQK3beGyxbbI+SQo zr9_ExWQy@A0H8vX^0aDoVMRCzV_NJO^eX_eL1~ILD|F~urB2a8jSCg*(=iq-c!-#D zAPpJ-C8yNL$A&3UYs`Ut{2=CQ`=@8KJn7E%D)1|0*aB6uj67Lvan)Sil9oJ~Zqw7}e7jL^F5cyets-ppbzsGKx}NwpGW15UK9()({B z=~8=6z3c}qY(j(6<2FVMqzU+=t4!ZNqBT%l2up^5{*P`pNx$7?4%1QB# z{B17(i1L$2FAs#sgNq+`#PZ3n$Qum&sdads?ZwSFU8Ou2faC_S9`wUFxzUYd!hugD${N2_#fWab2y73LHsVqahyMeoH{6|Yr89~N>&U+G5BRG~%`aKOx#R8UmQ;;M>f6;bB7NFxdNdp*i#UKnH4 z)V8{2OTTV2-Ongp%4ZV~-kiZJXD`lQcw?oEw7?af*RuJx=JB`6C}YpKUbj?`=b<{Iv2n%~w+wphGwt8A|ovSGHwK3{GB_lB0+ zYHFz$7&8^u=MF2HUk1R}Ei7&1n@;>*Q#_MJrGU|^mczEyowU)cf2>Oxs)+QjLZNO+ zNs}O*I)x;`iH}oJlaR9PGNI0_&s1aE8rn?wHjeGAS8UtfyHF#rx3CLHLqn3mxMD7# zS&d|z%a&t2R2|XLErA^2Psa@S!B|boSx=f$%p91Z{B1}n;%Zsfy2h06hz)xQE7XIc zG@YrHZH(e8T-|`z7PWAQSrkOdT+G9k>6q$wmm=B$|7bL#y~=?>>|>M4(gWszH!n^yz@2Sv(QuX@m;SOr~$ zLhB{+UT=Dh@~o(p&M4@afUKtH6c(VzX|Hq7%GS0f$S4|uj%99ym>!pDMxnKnn-z=V zqulgGSuQe;8T{w2Ue>ZAR*<0iliM3NHy(vDlU81_X7H%DE^9^doChT(g7Rn2^UaE& zj^g9KuxZ7Hf(BDaTUt@J<44xe5|o$ITNALKYQ9aL?8Vn06 zYZEy#K4&{Qn@ZCt=a{oZ#Z^~28^y+Rsp(Phl^A=?^^A5&72yh$Z@e5vVF|CR6!0mX zY2ZtzSxUrq>#|lo4*Em(qNN!MT8Z7k+KWZLnd+%eK z8UI?%*A;l120Ep=qGd4w8_r0n+SlvQ-1>6F~A zp0WvTT@>MmfA*v*hBQbuy_H3YjASs6$EA+r=?iUqrX7k8xl|)pxulsqCF_!TK>c!> z11p~8Hnz%pM9*y#*J7U5)IC33R1G!S*lsa-M?H&}#+K_=i6&Z9`#E%^dwVrBa*(d- zOI4{R>&CHK$5}6i5OMPRPJCuDOOkb;XU;omFnr%8n9Z zTyQc+o2eNR#h-yo0d;!Ogl_yP4<=`u_bEL=0g!&No$N!MnA#kH4=Jgg6vK?!Uxa4~ zgyCh|96NjF;gXJETx0BZIxZ-_CbwQu``eYVjOpIm#jJc?w40@up4PFcQ8cL|&7@Vm zcgMJRhh_S=JcY*gUb&~}$t|DR)v3(_@3d3bQ52&iL-$5@b>IAVo>9fJO<|OkDsJ7} z;A^mGDtOUqN0`Hed*+k_-j&qNVTw%-+*88;jJ)n7a9d5O)r#l|97}4 z_bA5M?yw=hF}7K#&kZeea03Uu?$BeCP$}~aWH#nVUVi2Oqy^(#?!c&OoiJ~JiZ6#G zrH$51^L}r+oaS|kjkWe?bQp}esIJ`FY}S5np@@U{NKMjs?2JY(ok&XmT<3B&sc03jeFkz&B{U#t+>aevP+tV2gtZCn=sFsye_`Ds)&w9 zIWp)sny6jyr(hll&y?_Oh6$+xuUycu%kaxlbY^#`=HQACscOl^3S_COsa!Zq3^^>K z>W{FNZwdjXMzBVR2l@(kAKN7lMx1=|GRup^GtFTesZYr4s4`Y4z( zsYL2zTy77d)}~XiMoSv3m4r*F$cV}Uh!C+bl134A4lI>Qt9LNT0kO{nXD>p=%|vie zn98J@Wa*6l&#!)t{=Np6TG7lr4A({|X_||0Ze{6SI$o#E-R*4iJI=vDvsR8=bD&yer;xtZ{N`y!_7HIw|Pb3T(WW;gCus(hY;o`?70SfSB=p0W_ zlfV%kq2?sTHX=VP>-*}G*C2_0n zOV;qn&_uC0jK_0qCtpnejo4H$Y50xyoF=C9B(};9^fqNHuM$x9Qm%?}5`9j@a3?c2 zrGavXxPtQU;t-<*W>1PSFE>To2Ig%x2PfG`+khmII1ek7tj_9-h)B~Xjb=0^X0!&v96dwwwcx%H1OYa)(oKUi*2*))E`aP(MVtmY?6=6 zY=q&?rM(al085P*sfM~}i33S2m=w}X7A6);DXnTr1uO2v5|ONSNf&LyJVPP0r4EHsv_4(NJQC8};ENjl%vU~+iT}K?9@!E9kr?Rsc8O&gW@kCK{EPg8u z-OKVqwG(qm9@7GLa*I3OF0*>arcjeBziVzhZdFHW0{_%F7bbSJWz@bX`}Rs`UUGBB zwN>T_O#%};H%SzY|L1iUWZRsMOj?a~vM?@6MSWu9^TfpvPpBMh_bfs z>jz^ihT`p2`)~ypY}=G`A<++WDvHgtOSSrBB0;6Yti9$5`SLH?3${-I+Rujrf8C-yj|>n_VhOps@l5B+!tH*KQ`!**`&>wjEPSh)r( z^-&I`syW?JyF$h8tT#Z@O+z2;{Mf3zE;l;VQvHZ@b9ZH0dFruLV`d{_Zasn|xPUMQ zLooEWF#NYLD4>5Y;54|8<}!3X-zN~uF;$QEQPs9$vIj(NZ7NkRN(V7?JY`X-E=V~H z0{!#;G=@#0v>-2a$7VD{*mN~mO(>O8Ga1+}`)6F`s%z8@6J?}J@Ix=G05Adr5Ac8j zfEb8*|ComZBQO91R9Gm5vy;nK5;kg&rpnUFQuKDDZ`aVrw(w^+ptm0BZi!YzO0}`m z)Tysttv)Z)-7@2J{KIx(#!jwywrb>VkcDWKuUZ?*Jd5P(JhvOkr)ns)18)L{kHaHy z0s!!UBo0|1gm@r~_=u4OGM7?D49PzPq7N+jAd(n-)=q6Y#R^Wt3-u)QO!#>dG$sH+ z3Zy_+GGjVypp_AVh_a0e7ywx?11>~F#1`jSVFMZmf&^%zF3k@0L{B2vcp!GTm`h{j z0HBb4V^k+qw=6O1ObrzAmU>JrKd0n74GsWkwpfapjpvqF7+H}UIguF|G4^*RdVuuGpB|_Q&kEVNB#gL`jrcJ`idm|5zB~d)4C7WifN>y}KvM?!MJQRBy#-OyMIfR808m;moVcuq znJ;i6t|dY@{22h2KtI5mCwyWyN_K^Qr5eG?e`s&p+%;xvS!ce?+8`Gr^22^@{}+e9 zd70@th#9$@k64ilpoj@Wox`f>knNjr1`zy!4kn@@#4aT8Km@3oLR>o};J|JTLLi!; zn3-81z}j~EU?OlrHQHd9iA9Kb!UHY@w{rreh2XD4qX{m$092w60O2Gun=}BSk=Ocu zReCf5K(AqAu8o;Bm>I13LcCpKBH}tEJQ+ZI`$g>Y`0#6l*gv+6CDn)Ub1#* zqPNqUn_WAR4;iS3Ixs?ekr~3Cb!25H(ZV`{qoV-5dt#*rd$0i@q|q8SG(5f;;JgQ- z!asZ<#91Qp1f`GaAx?t4sldWZTp(iL2SR!aEMy>xTOuwX2I8A9KxV^{|2PUN0Jk?H z2nK?~1!Bm%TenMkA{>C2QyKuCyu1bc#Rs`$3Q)iK@jbbWf|?c_QFT;=3rCgZJ~-MT zCc*=J8K^tkhl84^H6?JEtSS?vV}K0+#`<C4vDE06<`E@8WGqx>JUj*Z2y(jKKvjG6squSajWYz)TqATQfePX8_VhK(nC)&WzX`R}l|J+|%nI^1Uu8G;g z*?6#VLJ4}p&sD-FdfRq!g5JN~w|9a?^tvkmVGG(juW{VZ$$Kjx;0JP|)4%)D1AWpd z9lgbSiHpw9GK{pmaV$tqUZQ52`Q)zjXn&9dcXc|aa{>^^``E|%oP*k1_(w!b*vgEK zG!y`=U!o0)dkKp9E1H~=3waGtq$gG*#wTLhqa58SqOY9zVl&s{KDKzD6YGKYnl|e<(&L zYC_)7B@dO5+-24jKmkB~BEVTCn0>%KyN4sfeB3h)&lV`5|9rqpqU&+|1Md3YtG**3 zVBuf9+~d9@nEucv;v{T)h45SfSR&8ieZ9XO)NukKz8(OyUbAKW(gV05{5|eBoPb%P zy|vvmgugM69_imY)|sB42SU{s;>Fo~B2@JSg$H5V8EbyV)9B1GCyRRXu3|J>)076PauaZE&s79?r}AOa4;C=$7BeE85K03iz*Z0sWtp$LQn6-q2YWu(S| z0PNU|vSp)5jax24z&VoWMvE71O7w%!VnU`a7ZMdg{}m1#i&R~{tO|!JQm0v~Qhmu$ z1X2JIzmBnL*6Bj8YrD3MSXJ#syLQpqtf>?sAdwpJjHEyTK;eag7kc1`*R9pYXwP1C z+YtcYjUE zB5dOafYcI|liiqAciEp&wj(^;q-jJd z>RTp6_!zI&j+5h#)mz!BUbbC%)Dcz#+Eo{1jnx-XSMXJG zO^Y=~*IapVWKvpjedXOkN%b^RQWAlr5K-|V|A(Pev@Ilsa7{_n-clGT)y8HO%&1Xo zevq&cMe|jG(`70G@Iz@D0b&%7M@_VZd=>ifnOnYf2EdM;IpD{Xx=Dv001Au*l}Tcm zI2K!P=66w8`_ZLVWF@L)6`TaZsg;~}I%Oh>ddW0nW&qSEWM2y5mmf!X>ZhQ7F9rCO zRW@V_h)g3EBvDuiDP+;6^pP6sriPlDkQIwOy7C>c1&TU22L9ySsQCbFI$1ei#51#!I|@Zw7PZCK{P0( z>d%X6o%YkJ0+L8wDP%n*RRJlsqB=B z-ngJBx{MNSi)Dt=1$~4~G15^Z|4*ria!NfXh;7?5oBoD=3mg4x>->kmsJmMet2r zV#@+sD7tiIjZid0N}TG$uGL-8PhZs=+BjzY+k*P|gtRpg774ceeLYbq+LV_XK(IB!ZRuG4{ zM|DLEWl$?zAlEgQBJz=L|CO5+B9Z4t{)mui)jQ#wKGv1S#BMT``ys7B)I+X3$UQye zBPaXVM!10pQVH~s{j6A?zHOy-q>G;J9u&9(GSYxilp_x_Nyeklr9RBN57l^=!|Sll zl!@YkGN~Xc-pNpJedHK}T(>@2!mesbyC(EtXTm8`Q)+|UPgKSixZ3Nx{~m~*DMF7x%t0R$XAM7sj{|^m&!S=NY31%pkf1f=wk*Inmc3p<2?d^qi{5Bw0-JrObw4g`w@z zWzV}hF??y2q(VhBFU8tUjeDI}DzzE67|O{%kWJTQ^Rz~;jL?6y4Cp-Vr=S+5vtl5v zWC~mA#@M-bir)L!btZW?B7*8>Z8{!m5y)F3Mv{nkqhv;NJE)I2a%+)N;}lyN%g0Gj zwH`95h#Uyrc*TxBbP}q9;M!Pf##5*wJnKjFKJ=*g3)E?L*ai&?Ap+UFN7D9swmYf!|0aNLxuINyr72OH+t4; zeQJvv|2Lr+G$tFwhZds;@7-Te)D`xK5TgL z>(v{F@xE^}GQWO$P4C@z$?KCaVk;BHPA5;fTV=F%_eMEGw@uVoWLs>ns;{kT*Kl@j zC5T;@|LS;xdT*J16dFON;PR=l##UY{N&Cds=Cy5?ZnA5W`MJ>(ACJshdz2v=3Rx15 z$eoW)^n(HV;$N1wsL)m2_L73o$VN>-H~r!wvFjL2Mw86;LT4NQdNihivehN6-h$bk z-{&EfUx4PVz@uEq$PF3Ee9p9)U#d+<3oWL6MR+ZloVvI22h5e;p@jK5+t@^KuD+0 zp>6lHTwYQyE=Vk%UETWH=|ow7$f-B8ZXy$@Y{xnhWh$PYdmrtx?nc{9k;KzS<%)4Q z|Br6z1Ac3qM)+(wkLQ#`f4MD@jN3(fT&wdM@!YX`GNN64r7P89(f8cN`A!?lY~P|S zJ*imb#Zh^Tjq7H+ePVFBj?G=&wo;i|I$^VYsCs)jSey9Wx|4mR9IsocOJ8fy>6CKB zycjug9a3P!etOPp?%<>*`1!`p)fu!VrQkm4##$`(h==^k?{&!%A{ex7rLJS|YHHtN zc9YOv&{Wa-;G;=q?56J3{YKj5yF&L>b9Op$19<84JYK)aZ8OiZ2~Wsvos=pDTpKqy zhSO4mCRU0UB6NXpl9N+-!gTIaYKXUJ+hty3HCa$M7urTqf|XL2^??ouAda+S|M0?N zSk-Y?Bt{cvXL8kWn{ zVoeJsXSBC)1wv|%<1VnYN$yr4yyi!f(QPlNYzPN?G*)y2cw+j*bNkd~5+`6Q22k}x zNC*X5^7DNS=SN)hJZKhfwKIC3^MA&LQ+dHQ6_Z_Hp-OLsL{Oz~4l+kQh=QeNFO#=T zf<#K6Vo)}>Jp9FJeCIA0gj?!IxC1_kqLpTcgBfa^^^U2xp*^FI-qL**IaN)OB?yaaOlptVDNnwn$DFhIK=8 z2{~)$WrI*H8Ft|a7s3K{{^>#-Zp#5BRjoA zhK*H$Y{`(d_l}m=W*-%NF4bR!X=t~_QRMY%+lQ9uW0-qak&|UyqsCEk)lsGOOH`;> zy%vLF^ntuaNb1FSlvYu06@P8^fDwd}l7W{y7<;Fr75cG`uS(sya8 zb8Ry*QJh47QzdO2MoY*UoW2>AlH)w!SZILe zec=dS#%GuH7j@hAp6LaFp0skdMTLY{OZqsF#+83#$Ct?idGnbsJF_1+Lr;L_UPZ}a z9H(w1^Nf(eaLcD4^Jj`z6=c>3k_xvL4@O$} zC1pkkQhu6Pa~pPHev~vnsEV}HQ3*Or3YLX?0-Xd}jb%7(K(=7@!FeW17OB{a9V(f` z#Cb9ZbLAI+D@P(@cv^A?h~A_?+2m~8)rn3hh=$ppA@-myR69jeo5$##j_04bLx+|Y zoifIN?bM>QrDS!;WW{j)q0#cJ8I`{_aaR` zYB<+JtAaIw|5kxY#!kYNSlh>CNBVw#N;jqYjkoxf|2jCUGPFJTC1B#yKt0)R0u+tv z=&J1)ev#>AV<%fC*qQ&-h}?&ZTt=tMMW!h?U@v$y80IkPhm4ccSlvgb7fD}qH&Evl zd2DxFgQ+g^3XcaQdQli&e)@uE34OqrPXWtijp(51ieY{leCc(HpM$S)Gmb3iM;j}M zghrhzB@q@@liy@sW=fz)IZ#w)KZ6Q8=0+*ciiZJNiam#u;(2w$x@g|Ug8&+t4|TDZ z12@51fi?LSMB|{Wl|@n~K#ZAqnn;}}fx1ZXIdEjJ!x~G}W_mBX4 zsFQbgTopBpVq$T5%ET%|O(ssvr;Hg^PhgRa)Q)FwhZ_NW-t zeHV0`_r_^)1~%d4hwck11Z6-+t6KTUy`h>)%J)YV+csoGPiAO?LCZ0H#a+Twq=wqA zsx-hIdukLUbQNkj&l_TE=3#ZiZi5ABKNqb zx_l8Vfw7F7H-xymX?I7&TYb!;$s>Cgnrb@xblaG&mwI%l6l|~wQakvB!ZtdE2b=P@Wy}jN zog}>jm{99Qxo-)C#%QuLYb-69n}Vo*5GzvNcuZClH%#fqFtv$WOpw+RB_yLXZ~JyT zH?G`xEy7g8QVBoXTfR7Cl~B33{|Y>A<0fl!#tusp#F zD33ghrwejrWf@n_g+{}B!&*$iRSGdtcAV5VZvyEpf8%K;vMEoq5LAFDsG=IEaw`8! zD*a4)5$cTuhK)GtTr8uczX`W*0X~35OYNvp1VYPtPIbaXj*_aw%Es!IQS88s5r;!*&FQ5b|# z0EnH~htVyuTgk!IRU$Y#BFi&|w9EUme?_#{O2Ij$C2Hl7&-EoF$=4UI>7sWvRo9is zPi>{w^SU+LWOT{WOawZ)wxqY|)}pj|3Oimbq}EKen=X+-Y+PN6(JJP$5zMq68iLr3 zz1Yuv7=rRy)^|kAk~8HIk3w}(rolV zT`dv+lFuiR-ih7V|H0wd8L}Z*TTI?OFE#h>SoRc6j#Eif+=LVaDdN%Kp}=b5(m-d0ASsO zu@KQM=Y|pN{|kW7XAL zMtX{QzIM4(!7|5*e|W?mNu6hY(0|E!!a_m<|9}0sx^ABVXbd z@sYy_Q2^ax8UPRje!wdX;0H>g9wt%WwJ{$Ku-wg|5b#kLDq<)S!Yy-|WSnV~3XeiGOX>@Dxu6`vUEu@DDA zokUwT|G5OT9tl1c2uxg_j~F7Mw`Ypv9jK0dy>AQ6vNaQ3e)R^6{jDEdY=L z00h8Jg{o2qW~I83%95!O2MQbr2jxknXZ@fQGL)>yiDyMxkSn)A-K-*Fq%DilXh(=c z-Ck@M^dUxrW<_)i+ShMGjRYT#6+DjL~V4Z2ml^*qf`LM=}D)6q`;Jk(`Cz+0CI}73G-!D%!mDI>?j%bQITs-t`%88 z{vQEVfK>R=l+(llbjJrAIq;jid> z$g+)kDrhi?K8r9j2n%`*wZ@2w(6zBL;z>cE5=$^N#CqzBs`Sv*{%~-V;nAjuMM4sKh3-F0iQ> z0HA^)MdIqLs+#D^f+YZ`LaqhFbQ1uRAOb|a>Jab`0;Ck$Dyp*p+^#eo9W+p^1zme> z#zRXC(JX>gla5Nj5Z#Eg&dmA?w#;IbETS97=ybLb33H1z6kGGF(bz&g)zd&9?I|J) zH3|u(-%y&>0yCS;X-OwpT24yh8kJ7E5x2Zis<7&!0E%GwLPQOptgI2VvK%ZGv>koSRG_$Wd^feoI_oen!c5Ka z!cGUCP~ZeB`xhm51>Ukmdu=q$#1AE{m@q`+3bNL^Jm`a}<=%9Yw#yu1w9}modl039 zeh}&7x~8g%yDA1aDcyk(;Huxb1RE3p1{@g3PcJ(h$d0mjI7Z zOtnZqjm^|X2?N!}(-ss_RYNCouu7B>w9(4yh}wEXh&Wo9Q;rIytwDkFo(#9f9+uQH z7cC48j;ea&*gqG@9ViHq7+>6Rxe90iUJ5CM>*B$yV>#*d_T%)V|B~5cRH~v?>l|PB zhOD?N6ks)wQqargV*V(;^d?Z4|ylx(t5THDY=yH-`$kIX%bV-MQ0LJ0E9H~)M9&Tp&39aRjBI?yt+&};((=UcP`S%mkk zHmh1m&$~%vFp`POX$E$ZtjKL?VxbbF@I@MwtjRGvikDIH)gVkIYDyU*NU9zKE5x4StFe-F;NuVvjfiUVqD_lhL@%lhFjO=O+y}vkK_yDBRONA41r-ID*onp> zLKuPtNCE&F_=ifw%AN=rskc+DXl!|85&kGs#tynBg)oAR13MTY#T3wj*n?dMM_0nk zyp3Ot5kdmtB^vvU?TRk676m)wlh6pVcnV}t!b12)L-Njf25Zv;0TwAjc~E-xdZ8I_ zw@3^5%r>tw9T&{`k+$>z0CX`V3FGEL{UORVdg3I+LYchHU~Hae^ATK3G)5fi^LEW7 z3cPZtA%a}9m1Pu_!sbUmgoSSB zH(0)kEacpeZHQ&Jx6m*|MpI}!-9|iw@vfCn#ih~=7_Svs^Ey*CnT`vRxe^ao$4bI-Ek*dl6kn-GK#A@Rq;M>xz6g2IN8&4& zS#%U4YiY&mj4nq}wCfVH^e$K}EUGGmDB0*qS5=k_r5ZUQYUF4~zd_Qr)5{`zY#F#J ziVe0I>}`(-D9X3Z6>cU|)v;VBOMxbIZSr#}{|=Ao#`m>JevcerI8){zM?w>hf$=GF zAE~9PwXc<1!X-^BmD)*_4W0_~X+=@#$!_uwVp#nkY)FZ>DaEfvXeFU%MJ3Sj8VZti z{bnh{nZx2n2a!}1E@I(!QVMzVJ}ToIY#&0SE(SG{W7R7YvD!#(UaXqL!`O1UDqZ}l z4aSlJ=_qa5TnLJl#V*QHZ()ou+WIk$YIA99gF0EU&KR65`eKSFYqwnfw7Q#VR0)H6 zKg7y2t9-oFsK6_f(8AM*Uyo7?l34DIdk6(5oF2m%Zj|T#@+C zpX!Z{0j*SlFBQ!mUz3%%6tOVb+P>D3(s~t(wjnhr+eO~f#9=F76SWO)BwF`gCl&9& z5<5JsZMn-=+^uyvJySu^FxX4XDQAd_T*WBJim*1AN{z}pQ_)qi0w8U<@K?&xiibq7yKM7Nk6{+n!QX2^f%Ze>R{6?^`{&eSf~4Wm>DOG9?o) z&P}bMzbgA%Kd!W%4gByF6+~p$T{*V~?U7?6tYz10u%x1Q6gNA2`+C)@J-R!$BH4pa zgzm(N8wMt>Yah~-MmPaxlxMz8`{Y&Qs6Y|DcaM8)!nfy|Ejukv3?WGF4eP>{>pG(F{nTEH8V%M|-GM zgQ`P%H}iTR(Yrn6^1sm_zyuqt_d%PjqtGK5x(52jSM@bq}e_?yQh*W zuI0NBoWm3z<${gue-Hy@|~^nq!uJE#puB&yt(n~HS(*f+u^OMnYkJfr(%J@ z4{DI=;X*E4HHO>2<*5*zTA_T|kKbEA6SFl!(;?HUwDoHtH@lC;ilIA%I^W_ZJ(4Q+ zDmHC1Lc$8dlyk1GQ8hv%GZHkv_S3S$D=trCD0#Cqx#KSeBg5I7tkMF(M@+#I`a{#p zy?mlI+zOlCx~%8Q|Fu1XL=gO(D8sLbYYdAbD)|F1?!&i+^P(nXvSMnJ>KPnZiba3> zm&@WUdl5EH6O^D!8@RI=trNnE+7!#lvtwKo4h%OCe7QvwsYT(UekqlG39{&!5ak)R z&T}eJlRz;7oetq7b}OwFf+a^|IgAOuQ5z&i^eB+SqoO;v?4giyn~a!>7@;dcw_!E8 zDjN&hpi8;1v>GbgL8~9zwa3#Zt?{TY3_oqlE5`6ADjYN&Or@@ZIZCUW5FDji%N;-? zD$cO6kn1PSij`YKta&t$IP0f-DM%yCvahqEk20aGyU7+|l-kR=+{vwt+b5?ZvaB*e zts=A{PvpfonzZl#b7&#W0C$OAkW)zV6_a}_x{M;}AG*C-f59J2|0 zrQ%b*71Xyb5)^|pE)e-ULkY%sbTogG$Pa2wqab3h61$w_JDbBSmgJCy%g8dTC^2KWH)1ck;V8n&zJvkEQ^Pp@ z00()Ow#66i&@a!p?C-$kM1of-=uQ z%gxJCrt3|{3^Po8n5Npo&djl?6H)-uMt~enWr~iFy38>XGPN;FAVV?FV8(FV5)k`O zGxWhX{KS7_q!;8br)r=PvB#i0t4h2b(PA>LNk3)M48D^`oFdXe$t9As$LL7LH`2|W zTf=jt!q}R^MuS5OGQ5kkME+AL&GF5FlsLD-K}vD28RDuF-7KFW4d9tj0}W2HT%y$E z|Gs>r(-+mb9?Li|^EM5Ms3C&`8?b@Zbe}C7jS2y^vpNZQV!+$u5WBP<1e2}jOudY} zES368gB-3k^}|JlF?57R3sWxC3UJ)w?%^QznER zxKySj3)3}C&~F$<%v zBeU=x(~%4Qk&DrJov%%ua&gBh^BDlR0EhaSTf$u$Rg>6D*}S7T>rqS2-IVMsCPfuQ zS45(vDL=2dR9D0;U<4f31g*)Wp>_*Rx%>Cm(4j(^*SRo z(brPXmJA){`^Ti|rtdl)784T)t_ue!9fffjB2fw}@RNb4g5dxfT6u}#c;JEPVEF(5 zD{$WLv0#y~gOvzil!yYdkRUVR3KXCVx)_?1kYjZ9h}KD;ljvgA`C=4K3Y&?KG7*;@ zg)3X4vV3F8W0W+^3fFriy}J9#dZfSglo;d%qkT!eu!A1oER~>w|2W)x*PI)h{>!M5 zql_Su3kMJy`52v|{h_LmZ8l|!Y8_*mJC2|g}i(49lJwD__ zCJw?O4ud`k7mlEmtqMS>0yb`774nW)o?%THi1oqdr=VrweZMSq=OF9VTolG6WiN1) zE7n}YWpy#I<4Q1k#3OzXc^V#>;)vha+eJz|}Qi|A1K$2oEToi!NkfCE=na z7YmT;7aWpH0+5i_30{VaFNT9XeiJwd31mizK$eTPmKHH7Yxpy1|9ES>6bUZyW3ot# zs~&(-qtWgIthU5Fh3x4US<3)j%c2~mo*o@IOS!;B$kcexyiKg63c9qTE$G^*0gXSb z)HWhP36>5J?}%RGf}*Qd>2=!XS~=#CpaKRE>!Pg+mKF$X@f=1bjt8;poEU2|0pn!0 z>yU70wJeV+D37$L3S32DP>TY*p5x>#fc~hytC>}tJ8JK%J#ziLND{%>DcWMr{|cc&X}S2JwmxJ7spgNk3#zDX z2|@wFt%-=X-m|#qVLpqrs116H8USA?YPTto#}wRor_bFHnP?E{sN} z!^2T4st#+_$*izpoe|JY8IepT%MmzNLPdDXUU|=WZ*DV!m1vQ(I$&IxOD>x=WfK__ z7wmkr_(CM&&BHdKVy#-|F_6Q>Y^rhm%Lc`puc#mRvHgzrs9-E$ys=& zUqG6`5u#CPW1k$nE6+T2S<_eeNKM_z9|TQ3+VVP}%014y97lA*3FUizGwT{n*X1z{ z#*5&_^5?JqclR^08y`kU2P|^MXn@hF|3N#|F8Jf;83PCa0LBm;GjJe503zTlJSakhK!^ql zDvSt#;X^oZ3UVBiaic+n5(z$R7;>UOhb0+m%qXJZN{}5XiYzIzV@-wyH6|3;@}afVoa$`ePSfoGU~~SQ$yxF$k8Ffh%!@RjhOM^ z!ImIDQsj9S>QIery~44`aqJkYV5I^*%GEAPp+OCf>{t@#-irtUGe-FJgJHF(>_jJ(&fv)Q}@msoY{A9ycuDm zhFg(rWvzE{Qp7qsIN#q(|Be1mJMsI)-w99pZcY-r?wCM(pBI_-IeEatjYB-Vkg418 zQ7ty+?DBkdy4}^AdpDKVnRrbBWgue&iiX`~`;oQSU}Novo@?4Y=+=6{UFBMKsC_6B zL&wQBSz{mBMVCiTRdi8Og<%xqLVQgJB2}cd_~Llq71bAYEdqICk2nUHBaB7iG#Yf4 z9c9;r_pK=9j8?fcWq7v*>ElY_v3R6xHZ4^YmBI~KTw@F#$Q6O6)d=8?3W)|Idn*xU z6m=33d0=OKp=hOsu7T8=K_3d09frjE5>r^MnU$b@ z4doW0Dx$_`U}z=irky@EwI^kFxrbDJh$h!4Slgagt#NdgXj5rpEhT47(1yyRoP6%b zQjz06$+>b8LwlRc8F(BJx25^WCps2VYV$A<>z;(rbb)1@TFGeoD5^PtGnJM z+2FZfekY%%)()(#UKeE;B=%RU-+ZAd1^oVTM{B6)VrZ6XQlxc902rine;s+dEK+8naW z9AfoZO-_;b_oyl7YhB!T8m=gpL#tOaz%47NY+r+Fx$n<@isd2Ztv~vu!CDqNC&C5Z z+-tXYE1h!8?7}r#*qF1|dDC~w*i^_Vg*M~na{lL=yh^EwnSL<(e%|Do3CwuiTlpnh z?kUH%X!sFN1tmniQ{ND%bMG2X1oNe|b*1o4tSgtsj0d^Vk;*WGDirw; zwjHSC|Kvz1qYuMkV;0yvWr!m4jasrc6P>llhlo;Gh}Kg&6v1YGYjN7k7)UGXy)QE) zlF7O{L=)&4s&iPI%5Z)nzWSh%J9t}S)QaUR*C7X8;DM0VHbkEcWs5=Y5tka%R@_t(-;X{MLINR6Md1=$ zm$EjbYT2tqBl93Fb&13d0f{s5SE@aijIgwNyHdzads1z=(P0Bkq|I_^->OP z*69!-_H;H$q~+KS$j}mwa%>{?*2OG0(TEj_Jr1?rS->b7#8~8@uj%6iQMN327POAm zsh`-;!z=_P$3k>mSfT)9w&(3;t9fK-lRAemGzF+)lXM`I_DZ4hp@}uIat&M>l4Y7ZXV?X;2z+|6`)m zG|S(mi*0=2RArMz$PF?Ngm8K#&>R?*uN>?v+8XY|{uU;kI_{n$ysT}GA~a)G#$Tio zTW+D_reF;yah=*_V1ueyPHM-im%Xf?1d}IGqU&qMTA%_an#KnmPe1h~=a2A+trUq< zoMCxgjD{6mhOLu=i~`xvc=VJ!sce0615fCX20Y54=af=PEn*(G8Q*R$uAzM@QkzFo zwBB=TNE+pJr*$cfj^>JL8DdMGMAqcUGz}wGh+N}iXVsE^5#dDDh+b2Zid^r%=1@1shghM zM{8+9k(QscF;cDh9_G$an+KZhm8-eV46|s0cR_ZT%YzigIg>QECXZavy z*;?yV`^ITxi)A*@in)}WX)REz4QpCE0*wOK`dA99?8 z%@Fx|)}Sj(cL)R`D52KGY{$&iVki0ww$y$=Y(Y} zavkhy-{VuKt$C|-+d8WM2eP!MM|+|?r@F+B)S;;IY?!4sq$1T^vg!S}rG2aRR8}jY z*H)>zeO0@CS8@v0i%hR{^>mPn+LfbzPs8<{J|b1Y$|cTVr^ z#U^LlnqfYEE7i2hN^5NxJH~g8#v=@_1IF;zVL~)?OAj+$|M}9@sPxK}{M_!a;00f8 zdHa&!>{sy>7+@};JCrMiBDC&F%#7oqdeDq*{@vf~aX#YkEePu%&gfX2I>m(nKAfcZ zPNpbJV$~hm3{ROw#%fU-c_5gPEZSmV;K&e}Z4`}lX%Tqz6!WQ6GT{5Xy0# zcqJg2pqiRQ-v?F|=;7U;C0d$USOf8g01gl6MNj~qRR`e=(WFLVDHzuoS8?P<;>}0+ zfZYMQi2QX)4ptRda2Q;vRVSg~k_^Nu(c3~%7Nik|*CbgHB~f?0AK~d5H<=eksS_QM zS3Ctz7JiqG{S~hG$vu(I!6ntcb=;@LA-J4T*kltW|J9-UU>vX+2xZL2brj3LT-Qcf z2e{l}$e0YNd0aVR2&Q~51db}AA}W3krYX|+ms7;TZsls6~*lY-}Zr)%F$5c zA>T|PpUl`7mN1c1jUcue6CN3c->ji4Vj5HsqsHL{50Ty&<)SEMkz$o1uDoIhA{HPx zhX6_tF%C#d^kX|pV?tN~p$u4T-IVoAo@BgYb@iI=m>w{0iPDry^>|e}wGnYl45)ES z{Yl@e{Envq)zWxg9D)`~W{m9B;bkNT6#xJP|3C#oo`?_#0apMd^a(^1_(2r3#(ofF zPxK=o^y54}Qc(gxQ7V7}z+-8di$GKX4;f`h)T9Ohf)!Au@x-IM%@YFvKoblf?Hy%I z`lC!KR2T*20#$*gkWfW8< zI~tKI+5rL-0BNoxR9FB2Ab=7G#AfalMKHhsKt&V8KtGNUR1l<6o+d<|lR$hXBn8oD zRM;%)9rS766iOByGF5F^$L$~&9DRtY|0SSYrC)zcXZID^TJ20*6w_e_ju6SFKoBPt zKqe3UWGlL2b&*A7`T=bM0vAk3JkrEwmgP^f=V}4~7XW}12!tO{08~h&K)@yd_(4tj z0V?8TeN?9W2`GmCQHH9;?|o4^#hymdS-uF)2F{7=4U{8so5KOzidqY|g-v)w-}ar8 zvfz&c!9-3CieRy)9~32KdgfP_B?J6qKdR;y5M+Q>oj^>cgrXvGLS#zBq*W#<0F-4v z^5&BQKolrvfudqrGN^1OC2h8+O!>`c_Tw6WrD~4mSB^msBxjm7L6@SU6*Pf^LPdb) zPr0So7b%kIJUa zq&lXARc2{dp<-{+={g?eR7NSD)>s-U&h6HU!spD0R5>Ea^`X+OnUr}6qW?$*p%O_Lm72V)lgGM|Cut%xL0k-2B>*7g4^#wJ z;$OnjDSUF}s`l$Tf@&kVB9-zh(<1DPEh}uQCLjdlLC_`%NX*b8YjTdNlP;@3^Z`_C zffZQ6ktRe1TBoz>3KVojZ&Sb{AqQ%-P^Ykm&Dy+rKr@by|N;qgl zC81BSC%y6zPTJ0AuGl^KDrh1A1W2v03Iy7w#04Pi7AOF0LIfcAL9iBN7me&l&XpE2 z)D_~D)-hdQ(pt0l)2>->X8$zXGeQg_!pYmER(7_NM^2JYv>0YBiUJHQRPZhURKXoc zjC`Wv!5-(*P9+yWXuXnYU|A-WRsbqmK*t5F(Yhi5wpUH3fR%l9;0C8?91o$w20xST)?N>@=ScU|Cx6F2}SD`gWPYwD$h*pljeLPc5L@R$Ms3Y_97 zb1|FjRxgg*;QU?QR-@LyEGvPNlh_U+?-IK}+XF4nh!U0y4wC_?U`|BUci!U~X7D^w zfrK`I(efvFFw58~M=sA}FQtSBSm-fI3_R*1vl`)q)K7*cglhIvtG=_Y=HVWuY9+== znxu;R6&;g!nY$HS5m8P^n&$@vQ9G_jb^y+-be>vySKm15pAOuz5XJG4Bsx+=KSlx2 za;Cpo&m-AR@c-f(R)qyI)(0Sva$y~xO4lkNOeVP&uq*PRK)`Y_wjwOUP7>yG!4{c8q*lm zVHhN$ERtMNTB$YQiDh(&uFS^8R+KbWxZ2F5L57tqGpS`$WTaGC_aUcLpOVm3unQQM4G2JHDfE~29|hVz9bPa0qPOD z-we}@UH^OY@tn5*{7jMfHK6?uHL3N)SsbrutrH#N_MGntl{d-xDFszh7ix$#mx$~E zkwXjDDP2^cVOk&A7+#MQEza7sJe3vN(f^>eW)X{qG=~F@SHwNb9g0eAL0jtGRPoW3 zv$SacTvO}OSFRk^3(w#QAJKkm<2}Mlbv-XhcJMy=%iTRt874Pktq&>2onlKix9T>E zr;>|9b^CFnk-2bqf1z#$-r>leh$fvJ#!;ia*+&^RJBJ_R;)qdQE8di$i!1nVRSmPO zPU95$;;|V8o6uZZmDm}iFUo~u?Nx9C23hS%CM6*Y;oJX67-}%K0kRODhZS5VSd{Pw z9RKCX!{mz3YBQtjk6HT`7;hg=u3k0w=m2$g7XS&Jl!jdliJa2pd% zRa}4ciz&sI3tKagLv~s{dp{M~nAu~9e-^w^swL)E<7A{~y&tEXSk;7#v%tBIZgnzo zd(7O=nFP^7`!*rgotXfk2VXdN^D*8CSi2n_l1DT(-d>fEPolt@98%V~^ZAas73~qY zo%f}qt5G&?YP~xV`zVtQ(pk*bj);C$r7|G~ho_>OiReHsx6GE9NMVjdF3Lcel>cQs z$>)g)Syy`*^98NO>=4okul$}^_^%MR-$pkznoz0y`SrADWE={VL6vhV{JU)BtJ`D7 z8xm!6XIr&(Ag6WaMNtb+-t@i~Z6M$F@gNqvtKgcb#<9 zkQ#!y$aHqR8*Y>=z0qvFWffi~)l*p+nDvzqHRVe9U1Cdhdv;XbQV;%vIUC`ep|8ms zF{!8Pgje9`UpBwI2`lb>sl2lu3s8>I;GK!j==`0~PA=(fIYP~I){EN=<7CZwY7PE! z|82PW-L(V z5mlPQQb|U~?SC2c_@Yhe+zg_RKgy3lHD*A0Ayp-0c)m))iFUU)#7pQ1n`gPK6QLZ6 zJAVO$2!U_}jxl%;VM2vt05lW`z@S4B2OBbE2!|k?g$e~u+&BhfM*txyHiY=m;+TRF zDT+85a-hPK24Nno8Ij;aj2mO3v^a9-&x9Q_A_U-)Wy7E}bGkHH@~Fp~3Spjvc+_T5 znKLhD#7NL#Ns$_vu3Ts}X~vZguXfD35T*#SMm?f*2q)@Qt3W%-or@5o-o6VT*8LbY zVMT^VbGFpW(4$$qZaEH&T30beg996@WNS1t(ar;1exR*X$CI9=R+gBpuu77{x zy}EenVV5Fhdj_nUr{Rc7hhIL}8ZztD2$7#1Tyin)pc0vLUh9-9?XE^CE(MvBcx9v) zMf*K2w{KIt+J~21y1M7r>WR5lhksrsPT(xt4A-sMJoI`b$Thy$x{$W|Hi9X{%ZN--LC*ratV6sAv@)>bAT!A@v&zHqq2y=; zN;Rwo9P+Ha$aK;>sHQV1F4hEtO+KG^OQ}52f~<(EBd?=S$p02Gqbx7p42ud&&Vmc8 z$DhnYsm>3t0?Vq?JX=vj1MM8KI^kA2%0dWh#PO_oRsMbg%GCi$CCnpD zTjF%nzc`C?wKjjNN=X(qYs*6QdP)i?_)PP1JkUU8s$;|}gDSaUO-nK_9hcPZD^m+|b+t9eGj%j@39L_~Fc-6~RwdDVuu#&BEDNrt*0nUN zaO=gDI~JL=5jjNXk~lDtpmS0`zU|IL-)W~Kh z3zN-@Eq0G($3$K1q}J$tbV2x>dKt*`%CuId-4s)FIREzO8WKQJXW|h?ZMEbXGOxUg z*;kB1Vyx4p(%X?!Ut6mWR42pUPsE`370ua+dlt?+OD)3|s`}oP@X!J$T$ZGycy_zO z=xk-w2g#R5C3V8P#xMz^c)nx3*BW|sBzwc(gYq0 zzNbiPvl{!_ca|DOh%pl@-f1k;I`{F2O98sgi*iV-g2}Bnl4+N{!Z*3Z=tMMD>eS_A z143x=#zOCkl4Q!`5c+9vYNXQ0*(&!e37KhysZ){=b>$*(@#%|y^U4FGG@z|fiDB(p zU&35xKc%pzPqSjw3Q1HctC;FSi^>b7s>Y__y%9lq#MYyNm$YL+X(TMVVe2YpnU}C9 zF6TqzZ|wFqNvZBV3G~&&_T)#g0Sz?;;$Y*hCX{E*t#zoZOxI>&MVtWYBRBIHXdp+o zc&MvtO+h9ca+@=*ZHdX0W6*El${^x89{l1j8kaY-a2{pl;VJpLLvN4r6yJy*I;pM z9}3b!hb0?tl4XeflH?AHW4jg|ag%&(Sf;*JgdvYf>|&P z4Khow%-84+>OE|VrCDA?;H+HwB`T?FgtA(Ydc5a8XlCo3lJpNwC9+HY5#*xiM;i#HP(!wE$V1K&GgrO5ie^v`yVHPwnvp} zHKprZQCuK6vKO7|FD=a=PGTn`$rkpPGlC7xk`uB(<;HMZa?~u{7r;?XPDiIp=i=@; zs^$gGE^U!nDyhk`<3)9R6AKCk_t{2dUPrm=(y2dZi%PTj^PkmulTth+kB{Rx4lQ4OpPa!|KuzHpncgIKe|Ump(70>BBFlRF}8o zptzh#(;d=QCdeSuM4jx}T7 zzC}dtsW*n?g42|q)}J4`mz)~DOKi4hC~t+?aHt$vvxvMzXzj(Vy(?JLhQ`kGT?}R# zUaU$PtEO~X*}-|`PbPua$L>g~PW^QpAOko&Z1D-xDD5!x#;l@r@()YWlGxgGhanS} zx{Mitvc?`%9<$ZZHX1`SN}V{u5w|K*PoB=(>e_3DQ8jp(#A8;28r4W02}6L`F?e^} z*=6@RZ_73>aN0TYS}y83ILQnTBwM7Xwq(X zNu4EaqjoC5+KI86+5ggRDtuTy41RyXn^JN3#-|<%SUfNxf zlJ}NTYJyY5f?S0ix7#UJShfe0Udx7n{EpkyT@y?)@9hLNx#^O1+R_cfNLDSmyyPq~ zLA0Mqb;owL)EJ#ty3H?w7%yDdvBfQ}I|#I4&Flt16w-akhqSxyO5t!r)oUKfbKq^7byTB@c)*!-vHnTZC5>B_Ler>ML&1lbv*D5p?ctv!go-4 z;PY5M`ra|GIR7&bn5Hcs_wH}V+21tV#c_^JpCV7>siMD6&Xp~izc|KW zue$+U+}_t;_K(Ib{ioZT0@CJ!&XYCztH6EiG+O+~n@^IxqaBtpIjR`&?r30KofFZ_4`5-uMpWD5dmhjr`aT%C0P$?CQRH zV$5>voU({yJP_@80BcY5e9#F^0s(}eAz)Da06++$D*JM zcdX+y@BaqvtpK**<`gH7xUdNIZrPMj`jqeoj<6wikWcym4`ZMqe$d_;=un)Fad^tT z;%j--C|X*qEe6QhxT-l`iXo)$?{*9VozUT`un!x84>1wsuFv(hMB3i3A^f1-jE({L zFbei>Xc3CYHr6!QR@J}0UE*r_D>Ce(IA)2;>KVT2Q6Xd z44ks#H7>0?xCD&A4th!?V@$?zCe75qO*N<>C9825Idc1^#3>+h8e37?BvSR7%>(>E z8qKd2dr;bt01K535Gt?p=B^|$kttWwB>qu%{7~8uAPj$i-6}B$-OmAyuI>)d0kJSG zi?J+gao${FBcIXU0`C|>k0niFBX=&dNGB&7rz#{4eh8|0ZbZ=@E{i0NF%f0TJVfM5 zry&#|2DES!+0F3!knVJ63(oN1oDu%?(IBZ30Ps%P>ai6q?jPl@3H1W?n6eMHZvQZy z@D`oWEJg1geenk!Z$)&_4+{?f(AqA5yy08KC&I-a3FH3JCxegQWa< zm)jxT1Zz!(#6Y71>Z>c7}0Fo;qM$MOz!rvQjB7%i?$b!Q`?En8lLrGjr; z1WGVm3&-B+;qYrKdZ)tZLr-dnD5Xybha>z3RoXDraNy8W8=@BZuIad-0F|x!a1gv` zts$_G0!_jfHLu=uNAqBi3cQj&agtARE=-*;2M5(v1#$KY#4g#e7lrg3P@ z~V&02ILU?2}?D5m^8L42Q`} zwe$Y0DI|q3*7D{8iStwIM9|3Vj*ylA{*^;x4>&_^+%7N#TeVC5iT@be1u9_kV1=|^ z30Cy1bx8ch?V>7sibq)-sFs%Nn1t`CR*GO81B!I6OiS#_!Vp<8%IZq5|C}PfXbl2B zQ&Q$;_W;Z#SZ~{Qj<0Tr*-pc>hIMA&#~q`toSx1B$s+?@by)qd`1Zo}F7QgRbm%A% zi41c$i!e7MC&zNKM{xvFRp`X-uSQLy;UtcOZtqsGN52U6d;CM9;!SnjXFU-1Rl2E? zv;y=rkIQi7-ju~5q@dlH1VfMPQe$kfn59XyEpcCC?eum1G=<>`L!W?){McfEKF@DB zEzU5F%n%kpV^-U;X<=x^%K}R&m4n1~gRC$^tH>%c&nbSuMgQLp>|v-zV0g$#kK|jq zOLqC{K~QUWC~+=YqnenRFb#*%=R4aXv@PAN`oO<>R$%&e!&Tm(nIaVb!XZu?7-6r|E1s%{cTxhe^I z`-5qiO=rvLjU3EzPiknJD^?Ha)2a-3Jx-_aO1I35I{^@61`BbETWq? zIu5fe49%4+#?UgjQ72X0sz+$0g9IHlJw-T;>0?gB^~v&q zR^~(a<*#Lgj53U~2I!C;NU_Y!g0M-jttg_N@8LiuI7CW=yVEc{T5L(Un0OhblKD<{ zv|fy+%eWY!dmgS(Z( ziO8Yz5}&*S(bh8b}PHK$OcS&R86!ZYR)F1d0VnJy1Fr{ql`N8P7JhNNXM*K zuEgq*0*-@R7`QFfi-@-z?a9WQS^vk8intvr5#t%B$yJAt8@%H8cDosOK`oA|B@va( zcDE(QiTIYSCy8&Ft^pXR?7VKDX~VvYZpRkSddje+WNyma=~896d-_Wx+tHPGJlkro zy2!aXmXo|XmuE|{)|{?bn3DXZq#28N>}RK~e0w=N!!Z}sj%KiH+`~JFv`9Ra44Qtl zdQ8?cdDL-csN|Mv&A3aefI@|9s7krdNMpxWqnZa>ha0XUD{6$cR|<$}9E5^#`r1c4 zq)b_Ps7c$B2Xlo6bbKdq`Nh{s_g4UVbnTW>1^lH`N5b{`eWb{_Ym1~+cbIjXw(wo5 z+DV)s6J?)SY@Iw{TI`-KSN{+-9hP7|8 z90i`e)pvE$CVx_>fSP$UN=Ax>rCn^S5;;_3oNVbzmC*^B-|b)sF)jK$z6xqmYS!ly zOs^ImLqi^hNFG_f#%UID!C;E}J{!iQer8;VdJ1E(zPG%%6Te3|>uTNB+enf(>3T9V z@5E$(jfI)e{>XB@&RDmLIsIVb`hX52FoG5UN zLO2sKX8fpe;TVG=3ySmzlAuF~2tT3(>9V3om^3pgOlfjv&6*o`f;8yzCd`f&Q34%^ z6r@m!A2%NLDR3jol@E1VG$>KT%YrEfE)D7MA;pUnHzG8cP^(yiIxCWNN)l?)k6bBE z9s4q>PN-pbs{icRw60jYg0qUqsM2J?#E13LO>1-S*v4TO7be)4Y15gQzv>m7Q8C%g zPZ?@uixsrbhFV)6%#2iG)w7OcyIy@3Hq5J`vA$fH6Kz_`nJua9DtNwib^I5iiA7WJwQ*`9CeLK^h%UZX(`e@KBtbF~axW zQCJwsgx-L8F;r1)>nVniR6D7L8G8erHX(lT^%fyVJt6kqhH{0c)`O=Jc%O1?wRPZv z9OZu><^RPPX6e!Rp-aL+q?<+Yk(iT#TJaWAbrNA&Scu+Dd7e;W{;1+rNFuhFc`-(4 zl5`m5w3L3U-G|v@k8u>EQ_y856IO3tc95RE`49Na>BG(Fd4p zjh=ZGiHlb0XMW4g7UYR04F%qhvkrEvbk7YXXo+9m#ZXe&W!GCwh4M9;Ul|TKA*HW1 zN2PHbHt5o7>2;YOT)JvWnuQ^u68|am1Y**G3Vl(E`f+zCXkVF_6xXg!H=Dea?g;i*s_qo1 zXtI@M@Xr@fJ*}5)j|B2-%JE3uOkW8`_;?7b_;PPTc^r|zgAuhNpff3bmQc>=3-DSC zc4rvOuu`VqbR3Hu)8|qa9bxC*mUZEWeyXgVwvZB4g4bDaf3y`6TWvkPmC2shOPHa9`H0=EhW0!$W zmZOE_t9a54*IZv=Q-1Xl61~){Zi`;{i>E zDU+fCzeSmo;7%t#GL>7jcOTL5?^KVY++Qw6F@jl1B67+jf(Bz8lQBn&^~2T zOJIO}5K_4U_eV#=@6jEUe5SSz{17wA90`Qv>0mK0O5CzaFlnga{(cWVk|PiOC`+li6e>^?TK_WSNE1*PrBvw% zjc(b_0t2`pAytxt2r^+SCmKow0zd&<$Q}X;ppu|MpbZQ_YYZ~FCI~dNA%$g{n%2;e z7(^BVb;TsEGu zpa8JCbB+LzY(wNr&xi05K~3N#7d~6kPmMLdn3RnLcq!fgo-%3s0vqRyqOa?^M?rOy zQJ$ch-b6W7ppj$EKy0vAAxk6*4SgsCdK+YFUQMVC5keIh;DAn2;kaAoFo*2`fZkH1 zu>d%5MF%iRHWN^bACL$M)@0`)YZ(AMj_iB^fCmc4d6mt{DRTj^*a&B|4QIu}ns6Eg z&gM64i$TC73(d}Pjuo=M6LO~Sd(rz({dE}Y?Els8qU)LC(@rL!DY z5(BV38WjMi6{fmWP0eEGbBq-oD+Mar*EJQo0u5QhV*gDG-xw4C&la#iZzWQI!ix0{ zeyFXs9Nh;KVfHb|*n45`*e~TFVb!69w$sko;!qh|Yvkm#)Q^gqiE7J+coaA@mTjo|d8;9duNs z-QXX-u>cVtYoI47-FEYG3(5{PxR={W2*A6Vb0(mczl~TK6Pm-IcDbVo((NDLcTMlz z?JA zvS{iZeD-V^7@}(|L3Hw8q~(_s_uSDG=(@in-2V_OI3DD>^|Ff>OY>lwxFIN5?gb)V z@tlS|?hS_f(ygeY(-2F0dGu&)?G|W2f1ZYYPX}|ug(Uaod#wdnBhqy7)JZ+3 zYFfd3sF8ehmweWySpCLDTEPH)wrJ{Bfs-_U^9Be7z)^GeT5u+DiFI5ia~k^BT1_W_ zj|KoJKyTty9LIr5aFQEf!!py-9}1Hq694iS`x741#5T3!5`49IA=pWF=3#4+W)cB! zQu9rK#Sp621`*a+&jtXH#Y^o*dRm598TC;%(t#Zz1Y&q-Or&V}1%Gw+gmxxX0N{mB z(S{9CQc(y1Taa*3Fop0|1z`sWesFX1mJu2#TsY@|6{u*F#CG;4iLZ55ja3E6CW$#T zHG3gQv7=pnV;RU)Ek!03MR-?9Rv#S_WhtjkNSI(%;9pKRM_46X(p3Q)ws`!`;))0!gbjMYE1V)1@ zabShE1^g#*s?-n+rc^LEfX_I9kpD+#(PePrBXVZ+M{hJp?XgK}^E1p;7Bpi?oU%BG zkz%+fPH@0WdbWaF=7I9`I8Q@BfB2M5Y{h@WM31PEwawt(ii29U*7oz+XxAzg8} zcz703_f}k>WOV+9LK_BHQ6N?Z8E@%Ak@ECk8ixnPR#$n_b9V7y6d?ug)mopmc*k`b z$~aE^WHtmfJT^3ngfbpZ#82Pn8%lCbK6E}dfMryGj9a2kmBn%rSC5~FPa<_UMi+do zMMyU(ROxpmVs>427KiDwQ%6K^fWkz8Wp0ba9rN~IY7#M!);#KDT_{8t!3Ba`ZZ$(Kt9VMrJ{26akS00A+1D$KEF>_wW!D4Y$)JpaNG z#)%w_X;;GYNiQW<>Qz_%B2<-SAorGZ)+9SmrJavLKDI%RX%k8(2Xc@zD)$6VF;p#X zqYu$OHx8tHvf}OLSu!XlRc(XS9Vc6Btm}S2W#BqBaCSy`S_H+Go!d9ID6DA zN%TN{(k%V?IR&Feq*zV<7+!I-L2D(CvXdIEgLfws8en3kurYGLvx7rELN|qoPBo^;?i?6RVj`r}!+{A}(;XpGESWU~wZxH=TKfs#64& zo}-|Q3PD7wm_;Twy|Yrmx*x<^L4w0AymD7`B^(q3A-&R8wlYdA^iDyjHxpt&`_dx} z!lgJtt7vL|UjM;yAciL;ai`JLqT3@kj8c`WdZsJNF78q_r+Jp0L!)Ne71_f%c7rv8 zV_Q+8cf0x~8IqnigGV7FP@2*nx#~j<170xnuYO@E%w#I2Q?X|1H0lv+<+OJ~sx#{+ zuv2*=BU>spsvZ4#D)@RUU(>9j^4pvu#l{RfMoP+dLWcI!HE3=VG`hqbG6XCYtGq z^;18#rOUO{y7G)k@$Lxld&>7_%X%f~`ARMO<2n2&_Ers~5}k8CJ%A+?78M<2OH5 z6c}R~QZ`;Cx@+0u8IJXubwn}~rH_ttKkM>B*r7Nvgs@9FK{jf`-a@ccBQ}Qg!hn-i zKmWX6KrF;VOvFVD#F$~kNvy<6oWv}}#83Rg1p&oVOvPE{#8ez1Of1Dy?8IAK#Ydb~ zUOdD#szTA4!*GE=Pthq7a%+kb8_ zJjj~a!XP`SFm$5VWlm{9wC0x~73&jbp*BcH$d`=AABD-A%*lbQ$(;z63CJ2LBmMtWnfj!a$}GOyw> z21Lxy`3%w)J<}My&j8KK{H)P`+|%KV(=x5ogp3;Pv%({sOydblqGKH@89r+KGM-%1 z_e{_n9n@DH&OANTSdGbVaX4W^F#WS5^UJVR0Za)6seH1EF>1&0Jk`|v(_9_UZ|&1O zoy}Uk)pE_t^2)pq-8m|~lQU#wI%uCnYCRy4RaX7awk*)%tk`*s*K@7cz5h(tG(Fji z4aDC3#fEz$IZLDbQ7RFerKGbjXgrw)dD*G0%~yTXs!hm`joC^(U9qyaL82{6*rPI3 zAJ1w<-_gUa{m*gT(ZEgIsSVqIJfA26EB@1i?r|7u%|1J;cII-}8Nt=UP0@Mn$MP)9 z$NkaR?a$y%)A}sj;tj>An=8GRr_sZ>L!_sA7d2DLHz85n*lgaH?A<~=$oZYit?kxO zeBV{B&EIf6;T4|K{0(^i*&S@% zrz@e*`-_B`2~9JR;lW+m#qHluyy6s&$y&YB8y?*GNL)!4r39|Q*#Dz23ws@z&EEme z;WDn_jcw#fuE%w(sf7BwoCG#e<4b<6rjw%_^K9cq+}$y*pF z%Tfgh0#U?N;ObP|QL)-^_;Kz*q!^FK|-`*}6ihOrvhda1xP#R}26p^}`+=bw_i;yw~p+Pz2!sP00=h*DAv<_+=o%{5VI`oQylC? zYyk{l1(oj0#}32=@Btq%01glZ10eET0L)a-Qw_$%RiJ-UTo48CPw9-Zm4+yEU;%V=)_$FA{N9ockW>pu+l0q@Hf zPy)|Z>vuot-Btk~-~nUr0brl$Zh!ayAohR{2oz8O6#wAOa6il4CITps?+{P|P@nIJ zKg-8X@`sQ1N6*vjwFP#7+H3F4@jma^iS{$!QoTO(lz;e3AMsC&ITW&i=#;Czm4gZ7 z%!{FT5)ofWIbSmGRRP8Yw*OKjAooB_1r-1Rzy}8f00nU70Diz!#oj<(5B5hj2K}WK zSSG}nWcFn@%d%y~1@^;NKM_J;#T)TbLLdZ<@$W%AW0=HLslNY5Jj(z92M`GofD8fx zU||Ub0DbxZ@ZbT!L4=p0~bIS7_@6aH;_W1SIgQh^hkMI6%~cfIac5K;b(J z?63ocHiQ$w4hY(CfV%<Skwjj;}1S%kL2?hY505pXBSi{XE{D|U%B?hR>fdCkA;D-_z z5Wxxs+FPOmHUUB5*CyK7VnquoXv2;VN{9lG2DaG3fhd-+%G6&SV4#HH2B-oG_6k^{ zDhIGdfY&zsus{S0Vu-G;h@y%T2B5(LYT@_*3IGNe;8zDrr~z|%Ef8?P z0ywt)VBKbA3xHHR2($pAxG9p;s2&Vb0xcWu%;_Q#{>-z_i+rj}uRi@6<1NJHGF)(r za@>N^#ztza)70w54D&jp0wCtqynHo!T1UYuOfzg#rKJdB|b2U9ZYr+6a-*=0GOW? z01%vsoNWn3sX!E*5`zK2p;h?V4dJvDk&j?TM}PoRj!IVoE?A@qeuzi~RuvJ__{0Mc zfY(V{NCjFsP*Gw-2-XN!feM09cp_*3|KMO4(m{_)2N8l;C=wBZ@Q#Tz7=sWLASu2X zL@$d=oZ<*K7(*FqFCeLlQvwATGSbCxh*H$x9D|g~8O0%J*$kzOQWMhT%pg`6!)c^5 zx=gjfH+_%-vTPSUrT+lp2V1~HdF%iM3KXCOc%aMze=r8koTULvzyr>pHvkmWk5q7o zK^0a42r5tj5EshX76t$SEfI(ccYpv8GZh1CVG?eBK*1Qe;|DwJ1Oj;Iq^&YzOXSq> zI#i&JLh2WTUp4^+NGJgyPMDS8HOL?W?1&KX;D-V@#E}K?!1fY_Ooe3%0b^K$A1+|Q zM_RxS%0a<%w%~!Wd~R3=!I4B5P=WHGpb60f1g#PU2nhTjM^s<{1gHR#>v-UjQCOb! z78(WX8AJ#uaKLFaLV{3Q(|@VJYCA1hUBot7wot{Yi0y zf(!2WbhxM3<^NBO!QJ8f1^~q<>M4s_LmQ0}DNB9MAP%7-AZAHVnmEr6@Uj6PA)wIi z&|onoSi;C=@&iA#p$#!OYXR_JG7fIFI38L^M4(46v#JsR86hQ2cqkr`fC&x=FlbF; z={!x<(v+FOrDPPqOA%}VT4&YRFii;vA^3rNTa8H&84?3OAONxu$W28Jk`YN=bu4`d z!pscfNGbF|VF_6YFX6D(KDfpyj$DI6ws6+7-hrN%aplCeutE{sVF?I8Ygz*ug@h0^ zGA@NBcK}dWAuQo{DQgKuJz#?ZR6t7%xRnTmP^FKE)F6oJ*k>+~H{g1%EM%#tOPAPG zJ6Qxz_Wu=0Uxs%$q3$;+{FO^hyE0%fsxe272?|sshnY#aw>wXzW>kEz1mi>`QwHrA z0Cblr2w3Y)Bp3q)pnwMl0MwA@%nZ&vAcZAJ!NLVC&RNEwK0HJOT?MENO8{^o`2u1M zAQYt#&cYKB02U=%4c2rwr7sFZq_Rzzh--V9&lOFi2MV*OAJWjrm?#qf!wni2{B%Mp zY$QSy5#VXJi3WsxGoVm*h-(=#D&YJMfzeYRlvnf-oOEi65ozoS%?Z4tP$7cNg|Vm- zsuCsVsD|c6h;~I_p9WS*yrG24X(G~qh>qbi4G^+Ol1NXZD4?Env*@U2YLcAhMNn`F z4F913_m{xv1;2UJfwtfWto6Ts;Sndg1(?!96t;ofbbH} zwWW~6wMwrINnu`5>=y$Iv~M9IVBl&CP|44=HgHd;gBBr*H5|Q)4sNQKLV)UC%4gWV1 zzNr1Ce-k5I9_Q+_5B0zcEfg3L0_8P|XLEk1$loc_DmGQY4?pn0*+^JBWgC)*%!9Vh ztsnv}fXpCw@I%^;v~4Kx3j$Dpz!2X!zxov=S-j~P9*9b>rdbzf%yPg0{^ya`AO{ZE zYlsLAi9q^IgbUlFtPo1@yFRobf+Ahj1sI953s@b7AIM+t`+%(|3}Coinbrqf6MzAz zIPpz@LOp{}g3x$C#4fvlu@W{u4~XCb0U!?ETLyr1G6`c4Mc2(}(o{dJ>LA7)00%gd zhya?R*bRwUAOx_mJJ2>Om>R>YiFsQnr>TMuh=I<#yv#!kv~d7>ah+s|m;YyBzp{I& zgFp+xt0FwnuQBqXpCT0eDzGbBoL)O4KbRcK`L(`VHMlu7wOE6M`T-xAttwck4+sy0 zC@9u=83v$&_F;evxTdZ0xPw4}t@0-yL82Uhm2s^X`YEqK>;3T8yLLm?d z_o$}=nY>`Bf`UQ;qfisSX%R88fJyv;N+b|KETl`34Qm1fJNN-MsSJcV!YFV6Hk2R_ z@H7dc0yzwlgP0ICET{q?5zK%kF+qV-`~ZOY4?~6 zL>&I|3z@hGziA6C`iQPbuqZ%`xf2{lsUk_?j+v4R(EuKc&oVv(0xwy3~dW%B&lNI!n8}zmO(y3Sz31+lOwOGde1F4yyFOA%Yu-iYU3=8^t zNj+?%yrHUAkak}*IFwWLdVG)e($ zoWW3>R|AYi0hF|pyT-}8p!^HH1k1g2%*TYx$TZ8fV8*PB%*$lV%hb%xbc)SfwYPgl zKOrNZaK-@(&73ODyVxnmQ9FL5%gpr5*qqB9%u3jl&Ao)pa=cB=oXoK7O}??s$W%() zSsX4pN-~w z%-@8{+BDDgl+3tf#_9|sEFz3;K`JN$U(t zD}X|;EYG%33j|#Y8llYc#7qihPX@ivt|ZV5it=l?4=)#K(OAFWn{75`Fh%n5+15uGjE1-Y{g|8KXgFC1K2<^}f zg-Z<`Pz$|HCzVYqT{S9|3gj%!u0Xrw%r%@k2?H$@z-&#Q=#!ny#`r|fr|d3lselT= z7H$%;l^RexX@efZ5>45{?DB&xP!)M8?WdaJLy2JIF)xvDp-V`N<U4J5kGq6;1%f9s5kp8N{`mQqj=7uWn7d zj+F~%y*&h=2oyLxdU+a)Iu(HdAVq|glia4$fkmFcxmGb72H1kSAPqvR02GJ^oTNDZioEoy5;PvCPKgTB8_^ zbF#(nkt&Q)4D68zJb0u~G>w`xh?i{%7)!iX;WYmXXafeo1MB0_=_(C>l`03QfD$Cu zCk@;wU0RzQRq zHHq11!ZCoE%y=X?kcf6ENwJ|KnAyVbGng^BfHqhHQK6a=XoFvom|O)7<5{}IBUvFm zTEd;L!W~cL9o*|=Uc?n$n;fIToHfwp(*JY|!obu)QOukAFP?I{pxw)_U5erriM6r- zM?x#KT7o;UCgTa%rC6&yHHe-;D+W{yirPXh05cH!o#lyvnXn)~=mCOt*eFffvrN{; zJ>CX(;N3)17WK=wv)E15DJ=z1%AMXU;?n;pd2!O4k67Lcb*?^ajeBejzTBMEM31!^lb<}C) zqO;Rhp^VSNt!jl;;iMKQ7uOzRVU}&Z8u-PMx(I)YvVZPaGUk#I2$* zra}rk;nM-HZz})<@Wf>UIf_6RBNY!kpvewH2wTB}K1jbh+p?+_ibxsGV#2Unp6oe@7+@|>BR?Z1dhR7`@ zMW&pffC``{z-8iocFQInT7YKh16Jw!0-XP%yF5|MD?((1J{+8uVzv8J2AFe8y=yg&iJI@ zpqNI&nBJB?h?-f~t^MhlVu1g~!%DSI-l}$7seV-TBu~A*P(kLKtTxQQx{(_GQssT(pGdvy-*^EHKP9O#94K{6sr& zj7|W})Gg|%Dn)4yRaHfe?9I+>KxWw7cI&C;?IXU%!kJe5;-aBk&bt`Xqe_a@7E>Ag z(pMgdsO=!s6B}&7N!Qu50avZ330S0Il2=?CR^KoWwLxuBNrl$!qD*4D3SEpgPmPs1MZZSBmn{O4p{@fVlwVJ7nFu5TpY%n=Xo|Bky6Wlrj~ zac*Qm?X}(&{qhcJgqw@FTBKGCyFWh}_at%!O`*==_pk!uC{*b&Qr&(JcM7GS_qq z56kP`^n)FAKmK46FYd*T^Ni(U#u4a7Zb2wv6cqJO9o%S7$8Mu#YMQ8R+XiN!Mp*6q zbT9|>zph?pK2HD3ZRlKE@3q@l(oVZZSrnug2}O~LiJ*2n=?;_j+LmzfY~ScY@5;j^ zCHr1tiM9+Ha8l}6wafsnh{$z2kqn`j4$5%Wt3Gso9$!j@K|MKkn+hYLa_zyEyBR#N zMS%cUclJfGj~JM<%n7_7jT4o^9KOK~t~85b1-pkA>XfDlh_9QAFIewsIDZGQYmZ4QIO(zmqK@WNAgfdUOV~lEVkN$CLF z3>%c3rFV=rA@^e|Po~ODDkum-@4-|n_o43PiH~=&bIg^B6SOG#mM{)dYWYfvXmSs} zL@j#)xhMaoS~Uew3^-y63&8d>ch4gCZKYgq)tq99jmFUo@++R+x0}wVz`Gn#j9zn$ zxMq5nI6}a)laVKjn~-SkNR3Ddc^c471t>Nok|Hj6sjbivmFTmwxJa?^deTP;m7siL zqo$W{_&7l|tvCo0kST&ti>p@)(FX(tKzFT>j5s6LqIwWa%>p#V?-`4XCh6ibScxO zPM<=ZX;37MF;>UGfdgR7)~j8&0sxQ|tW^;r!U8aR7LJIlXRT(jRrkDyiKv>?X)yxdnmnmqv)_xf6o*S$e`!t3IS>4LM0?hDhL7a zB%u%_4dS+M-=@tGB0;kTxrp59K|NKL2*U%)4V2^jsNccYx8D@}{iXS(dhnE8Y>EFc z7hz|a61s(=rxe*=(QGkM!O%xzPk~0NCr$-9lfr5_>h`^DUeC}wK z5NQHJZpC{*A%H9 zJ?PP@tj@HUSz687794Zon(JI@U4<81T5+hMuDost8&_y?W!8pNY++(wBgX$XB8rYd z&;(j8t~R1r#x53?V$TAAfCJQqwFO!Ogaus;VYOjx1tXdZz+lk*5EcRetN{QX1pq+7 zh)d)P76c!38=6P4yUR9bGGe3l2DSSzhBhR#&serL2Jp>h+a@ADVcq$|2tb9lt4Wk!@HjE_SQi zc)QJ5x+9hh0N#&rT&>^QT5uR*08}xpzHOXEGh5oGeK_2ah3v0JH>v*&UA}2=(Fa<< zj{COhDg*oL=`(AN78Ok>Cbr+jS`J%adOyw9ggv(vwBE#8tu}@l>;4v98&0lt(NJGF z;Ly3prIm!eBi$B8_!qQbmr&1qv% zTCl=qL!cFFLu=EP+m7Wgy!B-aFGH4NXh+52^$mkH0{~wh^S1xKji_yCnUTdnNEpAE zZ$uk1QOI1fMQvSRj4BHtbhrYDCTLEHZ(EU&Xoj{OD$sOnESUKQiL7H`@N3lD(1KKW ztnB@Yk#RYs+Tf?F?1?2;K+GN_t*FFEYL9v-)LYLc6g3ECQbLo%ovrQytfW=aFC0^u z_0$MP1@et78cSoyl6cD|qVAPc933TZS4-a6@{8^pQ2}*%IuV+2m?86H_>?BhKr)eX z*W}!R9&|{mL5-14BO9*3$3pNauR<}rTG4P=PGX_0ht=%W)`~^WvkA>?+Oy^<>AAeB zaj}bci~?W~>B6+NGGt6MojnmM7P3|IL9eXi=&s4FgYN&*E4yqS6W!KA5Kc0hENdEC zEC7LQb%=I(38Y1>$UVVfQ)PPGs71y0D}53TcO1>}q79kii z6P9X}=!3dPO1F8Sn9S5m^rVRwrCv{blmp~qlK4lx)M~CF)hA9n*+ZOa?`U!DTMy|O zL$?r(hl@<7^_=?7!g{okrZk~HEBm!!9TP#0bRq7}Xx2}rg`Qh=TPLk4R?>cjLL-%I zS0H(z@9B+YtDV{;oywuwo-KPmq?*}CIy`vlcCPZUJkh!p9yh zdd*c{Dosc>2-#|;_&jX}WsA@0ZdRO5jh=yk>By^fGMt|@Z!4|#*j9qitr7*BKf$S> zs7kf9YndS?=ebTCI_Ox!8jwVXy3XU4kcwQRCxXukmisD_Xa^<(_r~J z#W`iLj==`Qva<8b=l!F#h1l8(A6jGb#B-?|E$>=xmytpn?K|;lT2M;(-!s1R3U$5a ziq2W0$YZp(r>a~Xj*sG>j%Sr2d|rLp4&>kNTE$~r-ygG3wm*CH`G9xNS{K>YTD&ql zIXX3WBD_`^8X}i5F6UP_n3dp7wUqx2O>l66valHTudTc(HAt2hYedYdRzt1wI1kvZ zEIZ=Yex`2h*}1@85jv{}1a@(Tbyn9^qnw1Y zyBxCgHJa96JMiJY6uSNvd%T~vS#I@o-A$Gd-cL?u&(>VfV+V2FlU-ia0?Su={3@Dt zne3XsvqCb5nz4JfPN*NZTNq?ExV_2Kaf&Ft<(Ar$QMp6fv-soe9X^(aFWH2X+^U$@ z=+qaQ7jP5b;rsQbJ~vrp#FARH)aG!epU&X#0~xIgMLw^=f6Uo8o51ipIJY-UX*HAg z-LciA`9E&M314`@?@O}g;eG%7xr<(dK-_~hm(*q30lJ!|*WEg&h$)tNOJt6k9t-W5@4)bp_&?Fd!|lGhBXU|FEs zfFYeF!C$+Hh}X3o?m6CH<(m6t(D?0O!%1IgL7#ke);XCR)43G~QlFZQ+X)(A@tt7E z5m=M$&}GdK(gfiWl20+)I64yggbo8D8+wnb%3+=2g-n0TI~TPL>gwq}dZG3F0Kam57WE z$(=6G;ejQijkzyzYpB}|msp{L1U2TqQ?h1wO$l_Cw>u1U=nj@}A&p@p%N0wPiNEzdj^9S+@~(@h|UIbD_U zVG|`%_5m6Uf+Cki)U(Bqb3GEp5u$Ompw(@h=-Fb>NS%`<-+F-|5_Z}}`kN>+BnJB8 zi!q3AePI?!k|mxVLS7hF=^#s<&0REumM1FBx^Z4^5urFmmZdFM*Lh8u zX__E<65+|BixK}@U?3WDHRYmZ9`1=%pq18V{a}J|TiNW@)rnXEZjFK^qk2iwGG-qy zW+PZCC0R)&{cR1y)!Z`fQ^DX~38^8A+10fP&6@?=hUFHm*jn2;h(?AZKq43^f}O9K zk76RG@ifig-4LCvq^OZyg7H$qZ5CpNUbJx<5J8%k4I1Mq$SnR + +We provide a TensorFlow Lite sample application that demonstrates how to +recommend relevant items to users on Android. + +Android +example + +If you are using a platform other than Android, or you are already familiar with +the TensorFlow Lite APIs, you can download our starter recommendation model. + +Download +starter model + +We also provide training script in Github to train your own model. + +Training +code + +## Understand the model architecture + +We leverage a dual-encoder model architecture, with context-encoder to encode +sequential user history and label-encoder to encode predicted recommendation +candidate. Similarity between context and label encodings is used to represent +the likelihood that the predicted candidate meets the user's needs. + +Three different sequential user history encoding techniques are provided with +this code base: + +* Bag-of-words encoder (BOW): averaging user activities' embeddings without + considering context order. +* Convolutional neural network encoder (CNN): applying multiple layers of + convolutional neural networks to generate context encoding. +* Recurrent neural network encoder (RNN): applying recurrent neural network to + encode context sequence. + +*Note: The model is trained based on +[MovieLens](https://grouplens.org/datasets/movielens/1m/) dataset for research +purpose. + +## Examples + +Input IDs: + +* Matrix (ID: 260) +* Saving Private Ryan (ID: 2028) +* (and more) + +Output IDs: + +* Star Wars: Episode VI - Return of the Jedi (ID: 1210) +* (and more) + +## Performance benchmarks + +Performance benchmark numbers are generated with the tool +[described here](https://www.tensorflow.org/lite/performance/benchmarks). + +

  • %s%sTensorFlowTOCO
    %s%s
    + + + + + + + + + + + + + + + + + + +
    Model NameModel Size Device CPU
    + recommendation + + 0.52 Mb + Pixel 30.09ms*
    Pixel 4 0.05ms*
    + +\* 4 threads used. + +## Use your training data + +In addition to the trained model, we provide an open-sourced +[toolkit in GitHub](https://github.com/tensorflow/examples/tree/master/lite/examples/recommendation/ml) +to train models with your own data. You can follow this tutorial to learn how to +use the toolkit and deploy trained models in your own mobile applications. + +Please follow this +[tutorial](https://github.com/tensorflow/examples/tree/master/lite/examples/recommendation/ml/ondevice_recommendation.ipynb) +to apply the same technique used here to train a recommendation model using your +own datasets. + +## Tips for model customization with your data + +The pretrained model integrated in this demo application is trained with +[MovieLens](https://grouplens.org/datasets/movielens/1m/) dataset, you may want +to modify model configuration based on your own data, such as vocab size, +embedding dims and input context length. Here are a few tips: + +* Input context length: The best input context length varies with datasets. We + suggest selecting input context length based on how much label events are + correlated with long-term interests vs short-term context. + +* Encoder type selection: we suggest selecting encoder type based on input + context length. Bag-of-words encoder works well for short input context + length (e.g. <10), CNN and RNN encoders bring in more summarization ability + for long input context length. From 37a0da627ce2b8075c77d6ef576315af7ed8f179 Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Sun, 2 Aug 2020 21:43:37 -0700 Subject: [PATCH 1956/2522] Fix GuaranteeAllFuncsOneUse to only clone a function after checking if the limit for cloning is reached. This fixes a bug determined via ASAN. PiperOrigin-RevId: 324540320 Change-Id: I607082ed26198f70c33b86267105f5271fcdd270 --- tensorflow/compiler/mlir/tensorflow/tests/BUILD | 1 - .../mlir/tensorflow/transforms/guarantee_all_funcs_one_use.cc | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/BUILD b/tensorflow/compiler/mlir/tensorflow/tests/BUILD index 1fc35f37058..daa583bed0e 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/tests/BUILD @@ -5,7 +5,6 @@ package(licenses = ["notice"]) glob_lit_tests( data = [":test_utilities"], driver = "@llvm-project//mlir:run_lit.sh", - exclude = ["guarantee-all-funcs-one-use.mlir"], # TODO(b/162700124): Re-enable. tags_override = { "optimize.mlir": ["no_rocm"], "tf_optimize.mlir": ["no_rocm"], diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/guarantee_all_funcs_one_use.cc b/tensorflow/compiler/mlir/tensorflow/transforms/guarantee_all_funcs_one_use.cc index a1aed65bd36..6112ff500c5 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/guarantee_all_funcs_one_use.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/guarantee_all_funcs_one_use.cc @@ -84,13 +84,13 @@ class GuaranteeAllFuncsOneUse // At this point, we know we are going to change the module. made_changes = true; for (const SymbolTable::SymbolUse &use : llvm::drop_begin(uses, 1)) { - auto new_func = func.clone(); if (num_clones++ > k_max_clones) { return func.emitError() << "reached cloning limit (likely recursive call graph or " "repeated diamond-like call structure " "or just very large program)"; } + auto new_func = func.clone(); symbol_table.insert(new_func); new_func.setVisibility(SymbolTable::Visibility::Private); if (failed(symbol_table.replaceAllSymbolUses(func, new_func.getName(), From 151bd5901aad789d309697ea4ea634f430145896 Mon Sep 17 00:00:00 2001 From: Yuefeng Zhou Date: Sun, 2 Aug 2020 22:11:18 -0700 Subject: [PATCH 1957/2522] Cancel in-flight closures when there is an error. PiperOrigin-RevId: 324542620 Change-Id: I1d6cddf8130df74f00ce7b0a3b6b84f553990e78 --- tensorflow/python/distribute/client/BUILD | 1 + tensorflow/python/distribute/client/client.py | 175 ++++++------ .../python/distribute/client/client_test.py | 253 ++++++++---------- .../client/parameter_server_client_test.py | 61 ++++- 4 files changed, 252 insertions(+), 238 deletions(-) diff --git a/tensorflow/python/distribute/client/BUILD b/tensorflow/python/distribute/client/BUILD index 0f7b7df145f..35d8de95276 100644 --- a/tensorflow/python/distribute/client/BUILD +++ b/tensorflow/python/distribute/client/BUILD @@ -32,6 +32,7 @@ py_library( "//tensorflow/python/distribute:input_lib", "//tensorflow/python/distribute:parameter_server_strategy_v2", "//tensorflow/python/distribute:values", + "//tensorflow/python/eager:cancellation", "//tensorflow/python/eager:context", "//tensorflow/python/eager:def_function", "//tensorflow/python/eager:executor", diff --git a/tensorflow/python/distribute/client/client.py b/tensorflow/python/distribute/client/client.py index 533d5f19042..7bef5e2385c 100644 --- a/tensorflow/python/distribute/client/client.py +++ b/tensorflow/python/distribute/client/client.py @@ -31,15 +31,19 @@ import threading import weakref from absl import logging from six.moves import queue + from tensorflow.python.distribute import distribute_lib from tensorflow.python.distribute import input_lib from tensorflow.python.distribute import parameter_server_strategy_v2 from tensorflow.python.distribute.client import metric_utils +from tensorflow.python.eager import cancellation from tensorflow.python.eager import context from tensorflow.python.eager import def_function from tensorflow.python.eager import executor from tensorflow.python.eager import function as tf_function from tensorflow.python.eager import remote +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import func_graph from tensorflow.python.framework import ops @@ -247,20 +251,28 @@ class PerWorkerValues(object): self._values = tuple(values) +def _select_worker_slice(worker_id, structured): + """Selects the worker slice of each of the items in `structured`.""" + + def _get(x): + return x._values[worker_id] if isinstance(x, PerWorkerValues) else x # pylint: disable=protected-access + + return nest.map_structure(_get, structured) + + class Closure(object): """Hold a function to be scheduled and its arguments.""" - def __init__(self, function, args=None, kwargs=None): + def __init__(self, function, cancellation_mgr, args=None, kwargs=None): if not callable(function): raise ValueError("Function passed to `Client.schedule` must be a " "callable object.") self._args = args or () self._kwargs = kwargs or {} - self._function = function if isinstance(function, def_function.Function): - replica_args = self._select_worker_slice(0, self._args) - replica_kwargs = self._select_worker_slice(0, self._kwargs) + replica_args = _select_worker_slice(0, self._args) + replica_kwargs = _select_worker_slice(0, self._kwargs) # Note: no need to handle function registration failure since this kind of # failure will not raise exceptions as designed in the runtime. The client @@ -276,25 +288,22 @@ class Closure(object): concrete_function = function.get_concrete_function( *nest.map_structure(_maybe_as_type_spec, replica_args), **nest.map_structure(_maybe_as_type_spec, replica_kwargs)) + self._function = cancellation_mgr.get_cancelable_function( + concrete_function) self._output_remote_values = nest.map_structure( lambda x: RemoteValue(self, x), concrete_function.structured_outputs) elif isinstance(function, tf_function.ConcreteFunction): + self._function = cancellation_mgr.get_cancelable_function( + concrete_function) self._output_remote_values = nest.map_structure( lambda x: RemoteValue(self, x), function.structured_outputs) else: # Regular python functions. + self._function = function # TODO(yuefengz): maybe we should trace python functions if their inputs # are Python primitives, tensors and composite tensors. self._output_remote_values = RemoteValue(self, None) - def _select_worker_slice(self, worker_id, structured): - """Selects the worker slice of each of the items in `structured`.""" - - def _get(x): - return x._values[worker_id] if isinstance(x, PerWorkerValues) else x # pylint: disable=protected-access - - return nest.map_structure(_get, structured) - def _fetch_output_remote_values(self): """Temporary method used to sync the scheduler.""" # It will do nothing if there is no return value. @@ -319,9 +328,8 @@ class Closure(object): Args: worker: a `Worker` object. """ - replica_args = self._select_worker_slice(worker.worker_index, self._args) - replica_kwargs = self._select_worker_slice(worker.worker_index, - self._kwargs) + replica_args = _select_worker_slice(worker.worker_index, self._args) + replica_kwargs = _select_worker_slice(worker.worker_index, self._kwargs) e = ( _maybe_get_error_and_rebuild_remote_values(worker, replica_args) or @@ -350,8 +358,7 @@ class _CoordinatedClosureQueue(object): This class is thread-safe. """ - def __init__(self): - + def __init__(self, cancellation_mgr): # `self._inflight_closure_count` only tracks the number of inflight closures # that are "in generation". Once an error occurs, error generation is # incremented and all subsequent arriving closures (from inflight) are @@ -359,17 +366,26 @@ class _CoordinatedClosureQueue(object): self._inflight_closure_count = 0 self._queue_lock = threading.Lock() + # Condition indicating that all pending closures (either queued or inflight) # have been processed, failed, or cancelled. self._stop_waiting_condition = threading.Condition(self._queue_lock) + # Condition indicating that an item becomes available in queue (not empty). self._closures_queued_condition = threading.Condition(self._queue_lock) + # Condition indicating that a queue slot becomes available (not full). # Note that even with "infinite" queue size, there is still a "practical" # size limit for the queue depending on host memory capacity, and thus the # queue will eventually become full with a lot of enqueued closures. self._queue_free_slot_condition = threading.Condition(self._queue_lock) + # Condition indicating there is no inflight closures. + self._no_inflight_closure_condition = threading.Condition(self._queue_lock) + + # Use to cancel in-flight closures. + self._cancellation_mgr = cancellation_mgr + if _CLOSURE_QUEUE_MAX_SIZE <= 0: logging.warning( "In ParameterServerClient, creating an infinite closure queue can " @@ -377,31 +393,6 @@ class _CoordinatedClosureQueue(object): self._queue = queue.Queue(maxsize=_CLOSURE_QUEUE_MAX_SIZE) self._error = None - # Error generation is a counter that helps us track whether a closure - # should be cancelled when it is being put back to `self._queue`. It works - # in the following way: - # 1) Error generation starts off at 0. - # 2) When a worker thread calls `get()`, the closure's error generation - # is copied from this queue's error generation. - # 3) If any worker thread experiences an error that's categorized as a - # non-retryable error, the queue's error will be set, error generation - # increments by 1, and the queue is cleared (with the closures marked - # with cancelled error), so other worker threads stop getting closures - # from the queue. Worker preemption is categorized as a retryable error. - # 4) At this point, if `put()` or `wait()` is called (usually by the main - # thread via `schedule` and `join`), the error is raised through that - # call. - # 5) The closures that are inflight, i.e. that are being executed remotely, - # will not be aware of such error event. If the worker that's executing - # the closure happens to be interrupted, the closure should not be put - # back to the queue, and be cancelled with error instead. Checking the - # generation id of the closure and queue is how the worker thread tells - # whether the closure should be put back. Likewise for `mark_finished` - # and `mark_failed`: if the arriving closure is considered out of - # generation in those two methods, it is simply discarded (the inflight - # closure count still decrements). - self._error_generation = 0 - # The following is a lock to make sure when `wait` is called and before it # returns no `put` can be executed during this period. It is because `wait` # won't know what to do with newly put closures. This lock adds an cutoff @@ -415,11 +406,14 @@ class _CoordinatedClosureQueue(object): # of the code. self._put_wait_lock = threading.Lock() - def _cancel_closures_in_queue(self): + def _cancel_all_closures(self): """Clears the queue and sets remaining closures cancelled error. This method expects self._queue_lock to be held prior to entry. """ + self._cancellation_mgr.start_cancel() + while self._inflight_closure_count > 0: + self._no_inflight_closure_condition.wait() while True: try: closure = self._queue.get(block=False) @@ -437,8 +431,8 @@ class _CoordinatedClosureQueue(object): This method expects self._queue_lock to be held prior to entry. """ if self._error: + self._cancel_all_closures() try: - self._cancel_closures_in_queue() raise self._error # pylint: disable=raising-bad-type finally: self._error = None @@ -466,16 +460,17 @@ class _CoordinatedClosureQueue(object): return None closure = self._queue.get(block=False) self._queue_free_slot_condition.notify() - closure._error_generation = self._error_generation # pylint: disable=protected-access self._inflight_closure_count += 1 return closure - def mark_finished(self, closure): + def mark_finished(self): """Let the queue know that a closure has been successfully executed.""" with self._queue_lock: if self._inflight_closure_count < 1: raise AssertionError("There is no inflight closures to mark_finished.") self._inflight_closure_count -= 1 + if self._inflight_closure_count == 0: + self._no_inflight_closure_condition.notifyAll() if self._queue.empty() and self._inflight_closure_count == 0: self._stop_waiting_condition.notifyAll() @@ -484,17 +479,15 @@ class _CoordinatedClosureQueue(object): with self._queue_lock: if self._inflight_closure_count < 1: raise AssertionError("There is no inflight closures to put_back.") - self._inflight_closure_count -= 1 - if closure._error_generation < self._error_generation: # pylint: disable=protected-access - # If the closure to put back is out of generation, cancel the closure - # and ignore it. - logging.info("Function %r should no longer be dispatched; marking " - "as cancelled.") + if self._error: closure._set_output_remote_values_cancelled() # pylint: disable=protected-access - return - self._queue_free_slot_condition.wait_for(lambda: not self._queue.full()) - self._queue.put(closure, block=False) - self._closures_queued_condition.notify() + else: + self._queue_free_slot_condition.wait_for(lambda: not self._queue.full()) + self._queue.put(closure, block=False) + self._closures_queued_condition.notify() + self._inflight_closure_count -= 1 + if self._inflight_closure_count == 0: + self._no_inflight_closure_condition.notifyAll() def wait(self, timeout=None): """Wait for all closures to be finished before returning. @@ -516,22 +509,18 @@ class _CoordinatedClosureQueue(object): self._raise_if_error() return True - def mark_failed(self, e, closure): + def mark_failed(self, e): """Sets error and unblocks any wait() call.""" with self._queue_lock: # TODO(yuefengz): maybe record all failure and give users more # information? if self._inflight_closure_count < 1: raise AssertionError("There is no inflight closures to mark_failed.") + if self._error is None: + self._error = e self._inflight_closure_count -= 1 - if closure._error_generation < self._error_generation: # pylint: disable=protected-access - # If the closure to mark fail is out of generation, simply ignore it - # (with the actual error associated with the closure preserved). - return - assert self._error is None - self._error = e - self._error_generation += 1 - self._cancel_closures_in_queue() + if self._inflight_closure_count == 0: + self._no_inflight_closure_condition.notifyAll() self._stop_waiting_condition.notifyAll() def done(self): @@ -678,7 +667,7 @@ class Worker(object): # TODO(yuefengz): we don't have to materialize results every step. with metric_utils.monitored_timer("remote_value_fetch"): closure._fetch_output_remote_values() # pylint: disable=protected-access - self._cluster._closure_queue.mark_finished(closure) # pylint: disable=protected-access + self._cluster._closure_queue.mark_finished() # pylint: disable=protected-access except Exception as e: # pylint: disable=broad-except logging.error( "/job:worker/task:%d encountered the following error when processing " @@ -686,7 +675,7 @@ class Worker(object): nest.map_structure( lambda x: x._set_error(e), # pylint: disable=protected-access closure._output_remote_values) # pylint: disable=protected-access - self._cluster._closure_queue.mark_failed(e, closure) # pylint: disable=protected-access + self._cluster._closure_queue.mark_failed(e) # pylint: disable=protected-access def _process_queue(self): while True: @@ -710,7 +699,8 @@ class Worker(object): # the same worker such as creating resources, setting resources' aborted # status, and executing closures happen on the same thread. This allows us # to have simpler logic of concurrency. - closure = Closure(function=function, args=args, kwargs=kwargs) + closure = Closure( + function, self._cluster._cancellation_mgr, args=args, kwargs=kwargs) # pylint: disable=protected-access resource_remote_value = closure._output_remote_values # pylint: disable=protected-access self._register_resource(resource_remote_value) @@ -775,7 +765,8 @@ class Cluster(object): protocol=cluster_resolver.rpc_layer, cluster_device_filters=device_filters) - self._closure_queue = _CoordinatedClosureQueue() + self._cancellation_mgr = cancellation.CancellationManager() + self._closure_queue = _CoordinatedClosureQueue(self._cancellation_mgr) self.failure_handler = WorkerPreemptionHandler(context.get_server_def()) worker_device_strings = [ "/job:worker/replica:0/task:%d" % i for i in range(self._num_workers) @@ -796,7 +787,8 @@ class Cluster(object): Returns: A structure of `RemoteValue` object. """ - closure = Closure(function=function, args=args, kwargs=kwargs) + closure = Closure( + function, self._cancellation_mgr, args=args, kwargs=kwargs) self._closure_queue.put(closure) return closure._output_remote_values # pylint: disable=protected-access @@ -893,8 +885,8 @@ class Client(object): function execution to finish and retrieve its output from the remote worker. `schedule` guarantees that `fn` will be executed on a worker at least once; - it could be more than once if a worker fails and restarts in the middle of - function scheduling. Note that since worker can fail at any point when + it could be more than once if its corresponding worker fails in the middle + of its execution. Note that since worker can fail at any point when executing the function, it is possible that the function is partially executed, but `Client` guarantees that in those events, the function will eventually be fully executed, possibly on a different worker that is @@ -904,14 +896,12 @@ class Client(object): by raising any one of those errors, and clear the errors collected so far. There are two implications when this happens: 1) user should call `schedule` with `fn` again to re-schedule, and 2) some of the previously scheduled - functions may no longer execute. User can call `fetch` on the returned + functions may have not been executed. User can call `fetch` on the returned `RemoteValue` to inspect if they have executed, failed, or cancelled, and reschedule the corresponding function if needed. - When `schedule` raises, it is possible that there are still functions being - executed on workers, at the time `schedule` raises. When this happens, users - can call `join` again to wait for all pending async function execution to - finish, and bring the cluster into a consistent state. + When `schedule` raises, it guarantees that there is no function that is + still being executed. At this time, there is no support of worker assignment for function execution, or priority of the workers. @@ -940,7 +930,8 @@ class Client(object): # TODO(b/160702436): Invoke `strategy.run` for user's function so it enters # a `ReplicaContext` in a logically correct way. with distribute_lib.ReplicaContext( - self._strategy, replica_id_in_sync_group=0): + self._strategy, + replica_id_in_sync_group=constant_op.constant(0, dtypes.int32)): with self._translate_parameter_server_failure(): return self.cluster.schedule(fn, args=args, kwargs=kwargs) @@ -949,17 +940,14 @@ class Client(object): If any previously scheduled function raises an error, `join` will fail by raising any one of those errors, and clear the errors collected so far. If - this happens, some of the previously scheduled functions may no longer - execute. Users can call `fetch` on the returned `RemoteValue` to inspect if + this happens, some of the previously scheduled functions may have not been + executed. Users can call `fetch` on the returned `RemoteValue` to inspect if they have executed, failed, or cancelled. If some that have been cancelled need to be rescheduled, users should call `schedule` with the function again. - Note: `join` raises an exception as soon as the client detects one, and this - means it is possible that there are still functions being executed on - workers, at the time `join` raises. When this happens, users can call `join` - again to wait for all pending async function execution to finish, and bring - the cluster into a consistent state. + When `join` returns or raises, it guarantees that there is no function that + is still being executed. Raises: Exception: one of the exceptions caught by the client by any previously @@ -976,6 +964,9 @@ class Client(object): If any previously scheduled function raises an error, `done` will fail by raising any one of those errors. + + When `done` returns True or raises, it guarantees that there is no function + that is still being executed. """ return self.cluster.done() @@ -1091,7 +1082,7 @@ class Client(object): raise -class _PerWorkerDistributedDataset(object): # pylint: disable=protected-access +class _PerWorkerDistributedDataset(object): """Represents worker-distributed datasets created from dataset function.""" def __init__(self, dataset_fn, input_workers, client): @@ -1107,13 +1098,13 @@ class _PerWorkerDistributedDataset(object): # pylint: disable=protected-access if isinstance(dataset_fn, def_function.Function): with variable_scope.variable_creator_scope(disallow_variable_creation): - self._dataset_fn = dataset_fn.get_concrete_function() - elif isinstance(dataset_fn, tf_function.ConcreteFunction): - self._dataset_fn = dataset_fn - else: + dataset_fn = dataset_fn.get_concrete_function() + elif not isinstance(dataset_fn, tf_function.ConcreteFunction): with variable_scope.variable_creator_scope(disallow_variable_creation): - self._dataset_fn = def_function.function( - dataset_fn).get_concrete_function() + dataset_fn = def_function.function(dataset_fn).get_concrete_function() + self._dataset_fn = ( + client.cluster._cancellation_mgr.get_cancelable_function( # pylint: disable=protected-access + dataset_fn)) self._input_workers = input_workers self._client = client self._element_spec = None diff --git a/tensorflow/python/distribute/client/client_test.py b/tensorflow/python/distribute/client/client_test.py index 12152407c5d..459633aca2b 100644 --- a/tensorflow/python/distribute/client/client_test.py +++ b/tensorflow/python/distribute/client/client_test.py @@ -30,22 +30,34 @@ from tensorflow.python.training import coordinator from tensorflow.python.util import nest +class MockCancellationManager(object): + + def __init__(self): + self.cancelled = False + + def start_cancel(self): + self.cancelled = True + + def get_cancelable_function(self, func): + return func + + class CoordinatedClosureQueueTest(test.TestCase): def testBasic(self): - queue = client._CoordinatedClosureQueue() + queue = client._CoordinatedClosureQueue(MockCancellationManager()) closure1 = self._create_closure() queue.put(closure1) self.assertIs(closure1, queue.get()) self.assertFalse(queue.done()) queue.put_back(closure1) self.assertEqual(closure1, queue.get()) - queue.mark_finished(closure1) + queue.mark_finished() self.assertTrue(queue.done()) queue.wait() def testProcessAtLeaseOnce(self): - closure_queue = client._CoordinatedClosureQueue() + closure_queue = client._CoordinatedClosureQueue(MockCancellationManager()) labels = ['A', 'B', 'C', 'D', 'E'] processed_count = collections.defaultdict(int) @@ -63,7 +75,7 @@ class CoordinatedClosureQueueTest(test.TestCase): closure_queue.put_back(closure) continue closure._function() - closure_queue.mark_finished(closure) + closure_queue.mark_finished() def get_func(label): @@ -76,7 +88,8 @@ class CoordinatedClosureQueueTest(test.TestCase): return func for label in labels: - closure_queue.put(client.Closure(get_func(label))) + closure_queue.put( + client.Closure(get_func(label), MockCancellationManager())) t1 = threading.Thread(target=process_queue, daemon=True) t1.start() t2 = threading.Thread(target=process_queue, daemon=True) @@ -93,7 +106,7 @@ class CoordinatedClosureQueueTest(test.TestCase): coord.join([t1, t2]) def testNotifyBeforeWait(self): - closure_queue = client._CoordinatedClosureQueue() + closure_queue = client._CoordinatedClosureQueue(MockCancellationManager()) def func(): logging.info('func running') @@ -102,10 +115,10 @@ class CoordinatedClosureQueueTest(test.TestCase): def process_queue(): with coord.stop_on_exception(): - closure = closure_queue.get() - closure_queue.mark_finished(closure) + closure_queue.get() + closure_queue.mark_finished() - closure_queue.put(client.Closure(func)) + closure_queue.put(client.Closure(func, MockCancellationManager())) t = threading.Thread(target=process_queue) t.start() coord.join([t]) @@ -114,8 +127,30 @@ class CoordinatedClosureQueueTest(test.TestCase): # doesn't time out. closure_queue.wait() + def _assert_one_unblock_the_other(self, first_fn, second_fn): + """Asserts `second_fn` wouldn't return before `first_fn` is finished.""" + first_fn_done = threading.Event() + second_fn_done = threading.Event() + coord = coordinator.Coordinator(clean_stop_exception_types=[]) + + def wrapped_first_fn(): + with coord.stop_on_exception(): + self.assertFalse(second_fn_done.is_set()) + first_fn() + first_fn_done.set() + + self.assertFalse(first_fn_done.is_set()) + t = threading.Thread(target=wrapped_first_fn) + t.start() + + second_fn() + self.assertTrue(first_fn_done.is_set()) + second_fn_done.set() + + coord.join([t]) + def testWaitRaiseErrorAfterMarkFailure(self): - closure_queue = client._CoordinatedClosureQueue() + closure_queue = client._CoordinatedClosureQueue(MockCancellationManager()) closure_queue.put(self._create_closure()) closure = closure_queue.get() @@ -126,22 +161,17 @@ class CoordinatedClosureQueueTest(test.TestCase): # all inflight closures are finished. def mark_finished_fn(): - with coord.stop_on_exception(): - self.assertFalse(wait_finish_event.is_set()) - try: - raise ValueError('Some error.') - except ValueError as e: - closure_queue.mark_failed(e, closure) - wait_finish_event.wait() + try: + raise ValueError('Some error.') + except ValueError as e: + closure_queue.mark_failed(e) - t = threading.Thread(target=mark_finished_fn) - t.start() + def wait_fn(): + with self.assertRaises(ValueError): + closure_queue.wait() - with self.assertRaises(ValueError): - closure_queue.wait() - wait_finish_event.set() + self._assert_one_unblock_the_other(mark_finished_fn, wait_fn) - coord.join([t]) self.assertTrue(closure_queue.done()) def _create_closure(self): @@ -150,10 +180,10 @@ class CoordinatedClosureQueueTest(test.TestCase): def some_function(): return 1.0 - return client.Closure(some_function) + return client.Closure(some_function, MockCancellationManager()) def _put_two_closures_and_get_one(self): - closure_queue = client._CoordinatedClosureQueue() + closure_queue = client._CoordinatedClosureQueue(MockCancellationManager()) closure1 = self._create_closure() closure_queue.put(closure1) @@ -166,9 +196,9 @@ class CoordinatedClosureQueueTest(test.TestCase): return closure_queue, closure1, closure2 def testPutRaiseError(self): - closure_queue, closure1, closure2 = self._put_two_closures_and_get_one() + closure_queue, _, closure2 = self._put_two_closures_and_get_one() - closure_queue.mark_failed(ValueError(), closure1) + closure_queue.mark_failed(ValueError()) with self.assertRaises(ValueError): closure_queue.put(self._create_closure()) @@ -185,9 +215,9 @@ class CoordinatedClosureQueueTest(test.TestCase): closure_queue.put(self._create_closure()) def testWaitRaiseError(self): - closure_queue, closure1, closure2 = self._put_two_closures_and_get_one() + closure_queue, _, closure2 = self._put_two_closures_and_get_one() - closure_queue.mark_failed(ValueError(), closure1) + closure_queue.mark_failed(ValueError()) with self.assertRaises(ValueError): closure_queue.wait() @@ -203,15 +233,22 @@ class CoordinatedClosureQueueTest(test.TestCase): closure_queue.wait() def testDoneRaiseError(self): - closure_queue, closure1, _ = self._put_two_closures_and_get_one() - closure_queue.get() + closure_queue, _, _ = self._put_two_closures_and_get_one() self.assertFalse(closure_queue.done()) - closure_queue.mark_failed(ValueError(), closure1) + closure_queue.mark_failed(ValueError()) with self.assertRaises(ValueError): closure_queue.done() - def _test_error_reporting_and_cancel_flow(self, call_wait): + def _set_error(self, closure_queue, closure, error): + try: + raise error + except Exception as e: # pylint: disable=broad-except + nest.map_structure(lambda x: x._set_error(e), + closure._output_remote_values) + closure_queue.mark_failed(e) + + def _test_cancel_closure_when_error(self, call_wait): closure_queue, closure1, closure2 = self._put_two_closures_and_get_one() closure_queue.put(self._create_closure()) closure_queue.get() @@ -219,34 +256,37 @@ class CoordinatedClosureQueueTest(test.TestCase): self.assertEqual(closure_queue._inflight_closure_count, 2) # Simulating closure1 fails. - try: - raise ValueError('Some error.') - except ValueError as e: - nest.map_structure(lambda x: x._set_error(e), - closure1._output_remote_values) - self.assertEqual(closure_queue._error_generation, 0) # pylint: disable=g-assert-in-except - closure_queue.mark_failed(e, closure1) - self.assertEqual(closure_queue._error_generation, 1) - # At this moment, there are one inflight, nothing - # in queue (because the ones in queue should have been removed and - # cancelled). - self.assertTrue(closure_queue._queue.empty()) - # Doesn't include out of generation closures. + self._set_error(closure_queue, closure1, ValueError('Some error.')) + + # At this moment, there are one inflight, one in queue. + self.assertEqual(closure_queue._queue.qsize(), 1) self.assertEqual(closure_queue._inflight_closure_count, 1) - coord = coordinator.Coordinator(clean_stop_exception_types=[]) closure3 = self._create_closure() - with self.assertRaises(ValueError): - # Verifying `wait()` or `put()` raises even if one closure is in - # flight. - if call_wait: - closure_queue.wait() - else: - closure_queue.put(closure3) - # At this moment, there is one inflight, nothing in queue. + def fake_cancellation(): + self._set_error(closure_queue, closure2, + ValueError('Fake cancellation error.')) + + def report_error(): + # It should not report the fake cancellation error. + with self.assertRaisesRegex(ValueError, 'Some error.'): + # Verifying `wait()` or `put()` raises even if one closure is in + # flight. + if call_wait: + closure_queue.wait() + else: + closure_queue.put(closure3) + + self._assert_one_unblock_the_other(fake_cancellation, report_error) + + # Cancellation manager has been called. + self.assertTrue(closure_queue._cancellation_mgr.cancelled) + + # At this moment, there is zero inflight, nothing in queue. self.assertTrue(closure_queue._queue.empty()) - self.assertEqual(closure_queue._inflight_closure_count, 1) + self.assertEqual(closure_queue._inflight_closure_count, 0) + self.assertIsNone(closure_queue._error) # This asserts that closure1 has errored. with self.assertRaisesRegex(ValueError, 'Some error.'): @@ -260,107 +300,36 @@ class CoordinatedClosureQueueTest(test.TestCase): 'function.'): closure3._fetch_output_remote_values() - # Closure2 is inflight, so it shouldn't be ready. + # Closure2 was an inflight closure when it got cancelled. self.assertEqual(closure2._output_remote_values._status, - client._RemoteValueStatus.NOT_READY) - - # And `wait` should block because closure2 is not back yet. - self.assertFalse(closure_queue.wait(timeout=20)) - - # Now let's assume that closure2 isn't successful due to worker preemption, - # and now it's attempted to be put back, but ends up getting cancelled. - self.assertEqual(closure2._error_generation, 0) - self.assertEqual(closure_queue._error_generation, 1) - closure_queue.put_back(closure2) - - with self.assertRaisesRegex( - client.FunctionRetryableError, - 'The corresponding function is cancelled. Please reschedule the ' - 'function.'): + client._RemoteValueStatus.READY) + with self.assertRaisesRegex(ValueError, 'Fake cancellation error.'): closure2._fetch_output_remote_values() - # At this moment, there is nothing inflight, and the queue is also empty - # (because closure2 should not be added back to the queue). - self.assertTrue(closure_queue._queue.empty()) - self.assertEqual(closure_queue._inflight_closure_count, 0) + # This asserts that the queue has a clear state. + self.testBasic() - closure4 = self._create_closure() + def testWaitRaiseErrorAfterCancelClosure(self): + self._test_cancel_closure_when_error(call_wait=True) - e = threading.Event() - - def get_fn(): - with coord.stop_on_exception(): - # This should end up getting closure4, not closure2, because closure2 - # has been cancelled and should not be got. - closure_got = closure_queue.get() - e.set() - self.assertEqual(closure_got._error_generation, 1) - self.assertEqual(closure_queue._error_generation, 1) - self.assertIs(closure4, closure_got) - self.assertIsNot(closure2, closure_got) - - t = threading.Thread(target=get_fn) - t.start() - - time.sleep(10) - - # Make sure `closure_got = closure_queue.get()` is unblocked as a result of - # `closure_queue.put(closure4)`. - self.assertFalse(e.is_set()) - closure_queue.put(closure4) - self.assertTrue(e.wait()) - coord.join([t]) - - self.assertEqual(closure_queue._inflight_closure_count, 1) - closure_queue.mark_finished(closure4) - # The queue is now cleared and nothing inflight. - self.assertEqual(closure_queue._inflight_closure_count, 0) - closure_queue.wait() - - def testWaitRaiseErrorAfterAnErrorIsReported(self): - self._test_error_reporting_and_cancel_flow(call_wait=True) - - def testPutRaiseErrorAfterAnErrorIsReported(self): - self._test_error_reporting_and_cancel_flow(call_wait=False) + def testPutRaiseErrorAfterCancelClosure(self): + self._test_cancel_closure_when_error(call_wait=False) def testStateIsRestoredAfterJoinIsCalled(self): - closure_queue, closure1, closure2 = self._put_two_closures_and_get_one() - closure_queue.get() - self.assertEqual(closure_queue._inflight_closure_count, 2) - closure_queue.mark_failed(ValueError('test error'), closure1) + closure_queue, _, _ = self._put_two_closures_and_get_one() + self.assertEqual(closure_queue._inflight_closure_count, 1) + closure_queue.mark_failed(ValueError('test error')) with self.assertRaises(ValueError): closure_queue.put(self._create_closure()) - closure_queue.mark_failed(ValueError('test error'), closure2) - # closure2's error is previous generation so should not raise at this - # following put, and _error should have been cleared. + # Its error should have been cleared. self.assertIsNone(closure_queue._error) closure_queue.put(self._create_closure()) self.assertIsNone(closure_queue._error) - def testStateIsRestoredAfterJoinIsCalled_WaitShouldReturn(self): - closure_queue, closure1, closure2 = self._put_two_closures_and_get_one() - closure_queue.put(self._create_closure()) - closure_queue.get() # got closure2 - self.assertFalse(closure_queue._queue.empty()) # still has closure3 - self.assertEqual(closure_queue._inflight_closure_count, 2) # closure1,2 - closure_queue.mark_failed(ValueError('test error'), closure1) - self.assertTrue(closure_queue._queue.empty()) # closure3 cancelled - self.assertEqual(closure_queue._inflight_closure_count, 1) - with self.assertRaises(ValueError): - closure_queue.wait() # reports error from closure1 - - # `wait` should block because closure2 is not back yet, even if closure2 - # was sent inflight before the error. - self.assertFalse(closure_queue.wait(timeout=20)) - self.assertEqual(closure_queue._inflight_closure_count, 1) - closure_queue.mark_finished(closure2) - closure_queue.wait() # wait should pass immediately - self.assertEqual(closure_queue._inflight_closure_count, 0) - def testThreadSafey(self): thread_count = 10 - queue = client._CoordinatedClosureQueue() + queue = client._CoordinatedClosureQueue(MockCancellationManager()) # Each thread performs 20 queue actions: 10 are `put_back` and 10 are # `mark_finished`. @@ -372,7 +341,7 @@ class CoordinatedClosureQueueTest(test.TestCase): if i % 2 == 0: queue.put_back(closure) else: - queue.mark_finished(closure) + queue.mark_finished() threads = [threading.Thread(target=func) for i in range(thread_count)] for t in threads: diff --git a/tensorflow/python/distribute/client/parameter_server_client_test.py b/tensorflow/python/distribute/client/parameter_server_client_test.py index db22a476b4a..32c7ff9c7e9 100644 --- a/tensorflow/python/distribute/client/parameter_server_client_test.py +++ b/tensorflow/python/distribute/client/parameter_server_client_test.py @@ -19,7 +19,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import functools +import threading from absl import logging + from tensorflow.python.data.ops import dataset_ops from tensorflow.python.distribute import multi_worker_test_base from tensorflow.python.distribute import sharded_variable @@ -40,6 +43,48 @@ from tensorflow.python.ops import variables from tensorflow.python.training.server_lib import ClusterSpec +class ErrorReportingThread(threading.Thread): + + error = None + + def __init__(self, *args, **kwargs): + assert "target" in kwargs + target = kwargs["target"] + + @functools.wraps(target) + def wrapped_target(*args, **kwargs): + try: + return target(*args, **kwargs) + except Exception as e: # pylint: disable=broad-except + ErrorReportingThread.error = e + + kwargs["target"] = wrapped_target + super(ErrorReportingThread, self).__init__(*args, **kwargs) + + +class TestCaseWithErrorReportingThread(test.TestCase): + + @classmethod + def setUpClass(cls): + cls._threading_thread = threading.Thread + threading.Thread = ErrorReportingThread + super(TestCaseWithErrorReportingThread, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(TestCaseWithErrorReportingThread, cls).tearDownClass() + threading.Thread = cls._threading_thread + + def setUp(self): + ErrorReportingThread.error = None + super(TestCaseWithErrorReportingThread, self).setUp() + + def tearDown(self): + super(TestCaseWithErrorReportingThread, self).tearDown() + if ErrorReportingThread.error: + raise ErrorReportingThread.error # pylint: disable=raising-bad-type + + def make_client(num_workers, num_ps): # TODO(rchao): Test the internal rpc_layer version. cluster_def = multi_worker_test_base.create_in_process_cluster( @@ -52,7 +97,7 @@ def make_client(num_workers, num_ps): return parameter_server_client.ParameterServerClient(cluster_resolver) -class ParameterServerClientTest(test.TestCase): +class ParameterServerClientTest(TestCaseWithErrorReportingThread): @classmethod def setUpClass(cls): @@ -304,7 +349,7 @@ class VariablePartitioningScopeTest(test.TestCase): self.assertEqual(var_sum, 10.0) -class ErrorReportingTest(test.TestCase): +class ErrorReportingTest(TestCaseWithErrorReportingThread): @classmethod def setUpClass(cls): @@ -344,8 +389,16 @@ class ErrorReportingTest(test.TestCase): while True: self.client.schedule(self._normal_function) + def testScheduleRaiseErrorWithMultipleFailure(self): + for _ in range(3): + self.client.schedule(self._normal_function) + self.client.schedule(self._error_function) + with self.assertRaises(errors.InvalidArgumentError): + while True: + self.client.schedule(self._error_function) + self.client.join() + def testErrorWillbeCleared(self): - self.skipTest("b/157597579") self.client.schedule(self._error_function) with self.assertRaises(errors.InvalidArgumentError): self.client.join() @@ -356,7 +409,7 @@ class ErrorReportingTest(test.TestCase): with self.assertRaises(errors.InvalidArgumentError): self.client.join() - def testFutureReturnError(self): + def testRemoteValueReturnError(self): result = self.client.schedule(self._error_function) with self.assertRaises(errors.InvalidArgumentError): From 1cb7ce30b747bc0afb301a4ee3af3dbd0e23be85 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Sun, 2 Aug 2020 23:17:55 -0700 Subject: [PATCH 1958/2522] Explicitly disable SPMD in TPU strategy. Mirrored variables are not yet supported for SPMD. This is in preparation of turning SPMD by default. PiperOrigin-RevId: 324548129 Change-Id: Ie7adf563402bd5ef31b7759232b1cd8f441586c7 --- tensorflow/python/distribute/tpu_strategy.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/distribute/tpu_strategy.py b/tensorflow/python/distribute/tpu_strategy.py index 3446f78288d..22aeb37ff7c 100644 --- a/tensorflow/python/distribute/tpu_strategy.py +++ b/tensorflow/python/distribute/tpu_strategy.py @@ -690,7 +690,10 @@ class TPUExtended(distribute_lib.StrategyExtendedV1): select_replica, per_replica_inputs),)) replicate_outputs = tpu.replicate( - run_fn, replicate_inputs, device_assignment=self._device_assignment) + run_fn, + replicate_inputs, + device_assignment=self._device_assignment, + xla_options=tpu.XLAOptions(use_spmd_for_xla_partitioning=False)) # If run_fn has tensor outputs, tpu.replicate returns a list of list. We # will flatten it in this case. If run_fn has no tensor outputs, @@ -1166,7 +1169,8 @@ class TPUExtended(distribute_lib.StrategyExtendedV1): replicate_inputs, device_assignment=self._device_assignment, maximum_shapes=maximum_shapes, - padding_spec=padding_spec) + padding_spec=padding_spec, + xla_options=tpu.XLAOptions(use_spmd_for_xla_partitioning=False)) # Remove all no ops that may have been added during 'tpu.replicate()' if isinstance(result[0], list): From d201be6284693c9ac5b93bdccfeeac2524d05239 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 3 Aug 2020 00:54:27 -0700 Subject: [PATCH 1959/2522] Minor improvement, saying why the object is invalid. PiperOrigin-RevId: 324556545 Change-Id: Ieb0728e400ff2c220d07a29fecb40c37070d2f08 --- tensorflow/compiler/xla/pjrt/pjrt_client.cc | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/pjrt/pjrt_client.cc b/tensorflow/compiler/xla/pjrt/pjrt_client.cc index 126b74b9b98..c5dce4a37f7 100644 --- a/tensorflow/compiler/xla/pjrt/pjrt_client.cc +++ b/tensorflow/compiler/xla/pjrt/pjrt_client.cc @@ -1004,7 +1004,7 @@ PjRtBuffer::GetBufferForHoldLocked(ScopedHold::Type type) { // acquiring any other kind of hold. WaitForOutstandingDonationHold(); if (device_buffer_ == nullptr) { - return InvalidArgument("Hold requested on invalid buffer"); + return InvalidArgument("Hold requested on deleted or donated buffer"); } else { ++holds_[type]; } @@ -1084,7 +1084,8 @@ PjRtBuffer::CopyToHostAsyncInternal(bool discard_cached_copy, // We can't perform any other action while a donation hold is in progress. WaitForOutstandingDonationHold(); if (device_buffer_ == nullptr) { - return InvalidArgument("CopyToHostAsync() called on invalid buffer."); + return InvalidArgument( + "CopyToHostAsync() called on deleted or donated buffer"); } if (discard_cached_copy) { auto it = host_values_.find(host_layout); @@ -1154,7 +1155,7 @@ StatusOr> PjRtBuffer::ToLiteral( TF_ASSIGN_OR_RETURN(std::shared_ptr host_value, CopyToHostAsyncInternal(discard_cached_copy, layout)); if (host_value == nullptr) { - return InvalidArgument("ToLiteral called on invalid buffer"); + return InvalidArgument("ToLiteral called on deleted or donated buffer"); } host_value->ready.WaitForNotification(); TF_RETURN_IF_ERROR(host_value->status); @@ -1272,7 +1273,8 @@ StatusOr> PjRtBuffer::CopyToDevice( // We can't perform any other action while a donation hold is in progress. WaitForOutstandingDonationHold(); if (device_buffer_ == nullptr) { - return InvalidArgument("CopyToDevice called on invalid buffer"); + return InvalidArgument( + "CopyToDevice called on deleted or donated buffer"); } AcquireHoldLocked(&src_device_buffer); } @@ -1313,7 +1315,8 @@ Status PjRtBuffer::BlockHostUntilReady() { { absl::MutexLock lock(&mu_); if (device_buffer_ == nullptr) { - return InvalidArgument("BlockHostUntilReady() called on invalid buffer."); + return InvalidArgument( + "BlockHostUntilReady() called on deleted or donated buffer"); } device_buffer = device_buffer_; } From 82126e56ddb0c76330290e09815ec4240bef8bd3 Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Mon, 3 Aug 2020 01:22:43 -0700 Subject: [PATCH 1960/2522] [MLIR][KERNEL_GEN] Legalize TF Framework dialect to LLVM. PiperOrigin-RevId: 324559430 Change-Id: I1685be7f2aace9cf9658fe05574cf957ee67bd37 --- .../compiler/mlir/tools/kernel_gen/BUILD | 7 + .../compiler/mlir/tools/kernel_gen/ir/BUILD | 8 +- .../tests/tf_framework_legalize_to_llvm.mlir | 75 +++++++ .../mlir/tools/kernel_gen/transforms/BUILD | 48 +++++ .../mlir/tools/kernel_gen/transforms/passes.h | 38 ++++ .../tools/kernel_gen/transforms/passes.td | 27 +++ .../kernel_gen/transforms/register_passes.cc | 30 +++ .../tools/kernel_gen/transforms/rewriters.h | 36 ++++ .../tf_framework_legalize_to_llvm.cc | 187 ++++++++++++++++++ .../tf_framework_legalize_to_llvm_pass.cc | 71 +++++++ 10 files changed, 520 insertions(+), 7 deletions(-) create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_framework_legalize_to_llvm.mlir create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/transforms/register_passes.cc create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm.cc create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm_pass.cc diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD index de5926301dd..32fae8a8305 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/BUILD +++ b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD @@ -3,6 +3,12 @@ load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") licenses(["notice"]) +package_group( + name = "friends", + includes = ["//third_party/mlir:subpackages"], + packages = ["//tensorflow/compiler/mlir/..."], +) + cc_library( name = "cubin_creator", srcs = ["cubin_creator.cc"], @@ -57,6 +63,7 @@ tf_cc_binary( deps = [ "//tensorflow/compiler/mlir/tensorflow:tensorflow_dialect_registration", "//tensorflow/compiler/mlir/tools/kernel_gen/ir:tf_framework_dialect_registration", + "//tensorflow/compiler/mlir/tools/kernel_gen/transforms:passes", "@llvm-project//mlir:AllPassesAndDialects", "@llvm-project//mlir:MlirOptLib", "@llvm-project//mlir:MlirOptMain", diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/ir/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/ir/BUILD index 0c3db5fa4ab..3a28d4815d2 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/ir/BUILD +++ b/tensorflow/compiler/mlir/tools/kernel_gen/ir/BUILD @@ -1,16 +1,10 @@ load("//third_party/mlir:tblgen.bzl", "gentbl") package( - default_visibility = [":friends"], + default_visibility = ["//tensorflow/compiler/mlir/tools/kernel_gen:friends"], licenses = ["notice"], # Apache 2.0 ) -package_group( - name = "friends", - includes = ["//third_party/mlir:subpackages"], - packages = ["//tensorflow/compiler/mlir/..."], -) - gentbl( name = "tf_framework_ops_inc_gen", tbl_outs = [ diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_framework_legalize_to_llvm.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_framework_legalize_to_llvm.mlir new file mode 100644 index 00000000000..77328aa7738 --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_framework_legalize_to_llvm.mlir @@ -0,0 +1,75 @@ +// RUN: kernel-gen-opt %s -test-tf-framework-legalize-to-llvm -split-input-file | FileCheck %s + +// CHECK: llvm.func @_mlir_ciface_tf_alloc_raw +// CHECK-SAME: (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*"> + +// CHECK-LABEL: llvm.func @alloc_raw( +// CHECK-SAME: [[TF_CTX:%.*]]: !llvm<"i8*">, +// CHECK-SAME: [[SIZE_0:%.*]]: !llvm.i64, +// CHECK-SAME: [[SIZE_2:%.*]]: !llvm.i64) -> [[DESC_TY:!.*]] { +func @alloc_raw(%ctx: !tf_framework.op_kernel_context, + %size_0 : index , %size_2 : index) -> memref { + %buf = tf_framework.alloc_raw(%ctx, %size_0, %size_2) : memref + std.return %buf : memref +} +// Compute number of elements. +// CHECK: [[SIZE_1:%.*]] = llvm.mlir.constant(10 : index) : !llvm.i64 +// CHECK: [[NUM_ELEM_0:%.*]] = llvm.mul [[SIZE_0]], [[SIZE_1]] : !llvm.i64 +// CHECK: [[NUM_ELEM_1:%.*]] = llvm.mul [[NUM_ELEM_0]], [[SIZE_2]] : !llvm.i64 + +// Compute the size of an individual element. +// CHECK: [[NULL:%.*]] = llvm.mlir.null : !llvm<"float*"> +// CHECK: [[C1:%.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 +// CHECK: [[GEP:%.*]] = llvm.getelementptr [[NULL]]{{\[}}[[C1]]] +// CHECK-SAME: (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> +// CHECK: [[SIZE_OF_FLOAT:%.*]] = llvm.ptrtoint [[GEP]] +// CHECK-SAME: !llvm<"float*"> to !llvm.i64 + +// Allocate memory. +// CHECK: [[NUM_BYTES:%.*]] = llvm.mul [[NUM_ELEM_1]], [[SIZE_OF_FLOAT]] +// CHECK: [[BYTES_PTR:%.*]] = llvm.call @{{.*}}([[TF_CTX]], [[NUM_BYTES]]) +// CHECK-SAME: (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*"> + +// Build memref descriptor. +// CHECK: [[DESC_0:%.*]] = llvm.mlir.undef : [[DESC_TY]] + +// Set pointers and offset. +// CHECK: [[FLOAT_PTR:%.*]] = llvm.bitcast [[BYTES_PTR]] +// CHECK-SAME: !llvm<"i8*"> to !llvm<"float*"> +// CHECK: [[DESC_1:%.*]] = llvm.insertvalue [[FLOAT_PTR]], [[DESC_0]][0] +// CHECK: [[DESC_2:%.*]] = llvm.insertvalue [[FLOAT_PTR]], [[DESC_1]][1] +// CHECK: [[C0:%.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 +// CHECK: [[DESC_3:%.*]] = llvm.insertvalue [[C0]], [[DESC_2]][2] : [[DESC_TY]] + +// Set sizes and strides. +// CHECK: [[STRIDE_2:%.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 +// CHECK: [[DESC_4:%.*]] = llvm.insertvalue [[SIZE_2]], [[DESC_3]][3, 2] +// CHECK: [[DESC_5:%.*]] = llvm.insertvalue [[STRIDE_2]], [[DESC_4]][4, 2] +// CHECK: [[STRIDE_1:%.*]] = llvm.mul [[STRIDE_2]], [[SIZE_2]] : !llvm.i64 +// CHECK: [[DESC_6:%.*]] = llvm.insertvalue [[SIZE_1]], [[DESC_5]][3, 1] +// CHECK: [[DESC_7:%.*]] = llvm.insertvalue [[STRIDE_1]], [[DESC_6]][4, 1] +// CHECK: [[STRIDE_0:%.*]] = llvm.mul [[STRIDE_1]], [[SIZE_1]] : !llvm.i64 +// CHECK: [[DESC_8:%.*]] = llvm.insertvalue [[SIZE_0]], [[DESC_7]][3, 0] +// CHECK: [[DESC_9:%.*]] = llvm.insertvalue [[STRIDE_0]], [[DESC_8]][4, 0] +// CHECK: llvm.return [[DESC_9]] : [[DESC_TY]] + +// ----- + +// CHECK: llvm.func @_mlir_ciface_tf_dealloc_raw(!llvm<"i8*">) + +// CHECK-LABEL: llvm.func @dealloc_raw( +// CHECK-SAME: [[TF_CTX:%.*]]: !llvm<"i8*">, +func @dealloc_raw(%ctx: !tf_framework.op_kernel_context, + %memref : memref) { + tf_framework.dealloc_raw(%ctx, %memref) : memref + return +} +// Extract allocated ptr from the memref descriptor. +// CHECK: %{{.*}} = llvm.mlir.undef : [[DESC_TY:!.*]] +// CHECK: [[FLOAT_PTR:%.*]] = llvm.extractvalue %{{.*}}[0] : [[DESC_TY]] +// CHECK-NEXT: [[VOID_PTR:%.*]] = llvm.bitcast [[FLOAT_PTR]] +// CHECK-SAME: !llvm<"float*"> to !llvm<"i8*"> + +// Deallocate. +// CHECK: llvm.call @_mlir_ciface_tf_dealloc_raw( +// CHECK-SAME: [[TF_CTX]], [[VOID_PTR]]) : (!llvm<"i8*">, !llvm<"i8*">) -> () diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD new file mode 100644 index 00000000000..15c0d571e61 --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD @@ -0,0 +1,48 @@ +load("//third_party/mlir:tblgen.bzl", "gentbl") + +package( + default_visibility = ["//tensorflow/compiler/mlir/tools/kernel_gen:friends"], + licenses = ["notice"], # Apache 2.0 +) + +cc_library( + name = "tf_framework_legalize_to_llvm", + srcs = ["tf_framework_legalize_to_llvm.cc"], + hdrs = ["rewriters.h"], + deps = [ + "//tensorflow/compiler/mlir/tools/kernel_gen/ir:tf_framework_ops", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:LLVMDialect", + "@llvm-project//mlir:LLVMTransforms", + "@llvm-project//mlir:StandardOps", + "@llvm-project//mlir:Support", + "@llvm-project//mlir:Transforms", + ], +) + +gentbl( + name = "tf_framework_passes_inc_gen", + tbl_outs = [("-gen-pass-decls", "tf_framework_passes.h.inc")], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "passes.td", + td_srcs = ["@llvm-project//mlir:PassBaseTdFiles"], +) + +cc_library( + name = "passes", + srcs = [ + "register_passes.cc", + "tf_framework_legalize_to_llvm_pass.cc", + ], + hdrs = ["passes.h"], + deps = [ + ":tf_framework_legalize_to_llvm", + ":tf_framework_passes_inc_gen", + "//tensorflow/compiler/mlir/tools/kernel_gen/ir:tf_framework_ops", + "@llvm-project//mlir:LLVMDialect", + "@llvm-project//mlir:LLVMTransforms", + "@llvm-project//mlir:Pass", + "@llvm-project//mlir:StandardOps", + ], + alwayslink = 1, +) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h new file mode 100644 index 00000000000..89871ba3faf --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h @@ -0,0 +1,38 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_MLIR_TOOLS_KERNEL_GEN_TRANSFORMS_PASSES_H_ +#define TENSORFLOW_COMPILER_MLIR_TOOLS_KERNEL_GEN_TRANSFORMS_PASSES_H_ + +#include + +namespace mlir { + +class ModuleOp; +template +class OperationPass; + +namespace kernel_gen { +namespace tf_framework { + +// Test pass for applying TF Framework -> LLVM patterns. +std::unique_ptr > +createTestTFFrameworkLegalizeToLLVMPass(); + +} // namespace tf_framework +} // namespace kernel_gen +} // namespace mlir + +#endif // TENSORFLOW_COMPILER_MLIR_TOOLS_KERNEL_GEN_TRANSFORMS_PASSES_H_ diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td new file mode 100644 index 00000000000..71e50379ce7 --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td @@ -0,0 +1,27 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TF_FRAMEWORK_PASSES +#define TF_FRAMEWORK_PASSES + +include "mlir/Pass/PassBase.td" + +def TestTFFrameworkLegalizeToLLVMPass + : Pass<"test-tf-framework-legalize-to-llvm", "ModuleOp"> { + let summary = "Test pass for applying TF Framework -> LLVM patterns."; + let constructor = "createTestTFFrameworkLegalizeToLLVMPass()"; +} + +#endif // TF_FRAMEWORK_PASSES diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/register_passes.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/register_passes.cc new file mode 100644 index 00000000000..b9bad8e18d2 --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/register_passes.cc @@ -0,0 +1,30 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h" + +namespace mlir { +namespace kernel_gen { +namespace tf_framework { + +bool register_all_passes = ([] { +#define GEN_PASS_REGISTRATION +#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_passes.h.inc" +}(), true); + +} // namespace tf_framework +} // namespace kernel_gen +} // namespace mlir diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h new file mode 100644 index 00000000000..28dba379738 --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h @@ -0,0 +1,36 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_MLIR_TOOLS_KERNEL_GEN_TRANSFORMS_REWRITERS_H_ +#define TENSORFLOW_COMPILER_MLIR_TOOLS_KERNEL_GEN_TRANSFORMS_REWRITERS_H_ + +namespace mlir { + +class LLVMTypeConverter; +class LowerToLLVMOptions; +class OwningRewritePatternList; + +namespace kernel_gen { +namespace tf_framework { + +/// Collect a set of patterns to convert from the TF Framework dialect to LLVM. +void PopulateTFFrameworkToLLVMConversionPatterns( + LLVMTypeConverter *converter, OwningRewritePatternList *patterns); + +} // namespace tf_framework +} // namespace kernel_gen +} // namespace mlir + +#endif // TENSORFLOW_COMPILER_MLIR_TOOLS_KERNEL_GEN_TRANSFORMS_REWRITERS_H_ diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm.cc new file mode 100644 index 00000000000..2edcaabd7b4 --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm.cc @@ -0,0 +1,187 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" // from @llvm-project +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" // from @llvm-project +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/IR/Module.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "mlir/Transforms/DialectConversion.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h" +#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h" + +namespace mlir { +namespace kernel_gen { +namespace tf_framework { +namespace { + +using LLVM::LLVMFuncOp; +using LLVM::LLVMType; + +static constexpr StringRef kCInterfaceAlloc = "_mlir_ciface_tf_alloc_raw"; +static constexpr StringRef kCInterfaceDealloc = "_mlir_ciface_tf_dealloc_raw"; + +/// Base class for patterns converting TF Framework ops to function calls. +template +class ConvertToLLVMCallOpPattern : public ConvertOpToLLVMPattern { + public: + using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + + // Attempts to find function symbol in the module, adds it if not found. + FlatSymbolRefAttr getOrInsertTFFunction(PatternRewriter &rewriter, + Operation *op) const { + ModuleOp module = op->getParentOfType(); + StringRef tf_func_name = GetFuncName(); + auto tf_func = module.lookupSymbol(tf_func_name); + if (!tf_func) { + OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(module.getBody()); + auto func_type = GetFuncType(); + tf_func = rewriter.create(rewriter.getUnknownLoc(), + tf_func_name, func_type); + } + return SymbolRefAttr::get(tf_func_name, rewriter.getContext()); + } + + protected: + virtual StringRef GetFuncName() const = 0; + virtual LLVMType GetFuncType() const = 0; +}; + +class AllocRawOpConverter : public ConvertToLLVMCallOpPattern { + public: + using ConvertToLLVMCallOpPattern::ConvertToLLVMCallOpPattern; + + LogicalResult matchAndRewrite( + Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + Location loc = op->getLoc(); + AllocRawOp alloc_raw_op = cast(op); + AllocRawOp::Adaptor transformed(operands); + + MemRefType memref_type = alloc_raw_op.getType(); + + // Get memref descriptor sizes. + SmallVector sizes; + getMemRefDescriptorSizes(loc, memref_type, + llvm::to_vector<4>(transformed.dyn_sizes()), + rewriter, sizes); + // Get memory block size in bytes. + Value num_bytes = getCumulativeSizeInBytes( + loc, memref_type.getElementType(), sizes, rewriter); + + // Insert function call. + FlatSymbolRefAttr tf_func_ref = getOrInsertTFFunction(rewriter, op); + Value allocated_byte_ptr = + rewriter + .create( + loc, getVoidPtrType(), tf_func_ref, + llvm::makeArrayRef({transformed.ctx(), num_bytes})) + .getResult(0); + + MemRefDescriptor memRefDescriptor = CreateMemRefDescriptor( + loc, rewriter, memref_type, allocated_byte_ptr, sizes); + + // Return the final value of the descriptor. + rewriter.replaceOp(op, {memRefDescriptor}); + return success(); + } + + protected: + StringRef GetFuncName() const override { return kCInterfaceAlloc; } + LLVMType GetFuncType() const override { + LLVMType llvm_void_ptr_type = getVoidPtrType(); + return LLVM::LLVMType::getFunctionTy( + llvm_void_ptr_type, + llvm::makeArrayRef({llvm_void_ptr_type, getIndexType()}), + /*isVarArg=*/false); + } + + private: + MemRefDescriptor CreateMemRefDescriptor(Location loc, + ConversionPatternRewriter &rewriter, + MemRefType memref_type, + Value allocated_byte_ptr, + ArrayRef sizes) const { + auto memref_desc = MemRefDescriptor::undef( + rewriter, loc, typeConverter.convertType(memref_type)); + + // TF AllocateRaw returns aligned pointer => AllocatedPtr == AlignedPtr. + Value allocated_type_ptr = rewriter.create( + loc, getElementPtrType(memref_type), allocated_byte_ptr); + memref_desc.setAllocatedPtr(rewriter, loc, allocated_type_ptr); + memref_desc.setAlignedPtr(rewriter, loc, allocated_type_ptr); + memref_desc.setConstantOffset(rewriter, loc, 0); + + if (memref_type.getRank() == 0) { + return memref_desc; + } + + // Compute strides and populate descriptor `size` and `stride` fields. + Value stride_carried = createIndexConstant(rewriter, loc, 1); + for (int pos = sizes.size() - 1; pos >= 0; --pos) { + Value size = sizes[pos]; + memref_desc.setSize(rewriter, loc, pos, size); + memref_desc.setStride(rewriter, loc, pos, stride_carried); + // Update stride + if (pos > 0) { + stride_carried = + rewriter.create(loc, stride_carried, size); + } + } + return memref_desc; + } +}; + +class DeallocRawOpConverter : public ConvertToLLVMCallOpPattern { + public: + using ConvertToLLVMCallOpPattern::ConvertToLLVMCallOpPattern; + + LogicalResult matchAndRewrite( + Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + DeallocRawOp::Adaptor transformed(operands); + MemRefDescriptor memref(transformed.memref()); + + Value allocated_bytes_ptr = rewriter.create( + op->getLoc(), getVoidPtrType(), + memref.allocatedPtr(rewriter, op->getLoc())); + + // Insert function call. + FlatSymbolRefAttr tf_func_ref = getOrInsertTFFunction(rewriter, op); + rewriter.replaceOpWithNewOp( + op, llvm::None, tf_func_ref, + llvm::makeArrayRef({transformed.ctx(), allocated_bytes_ptr})); + return success(); + } + + protected: + StringRef GetFuncName() const override { return kCInterfaceDealloc; } + LLVMType GetFuncType() const override { + return LLVM::LLVMType::getFunctionTy(getVoidType(), getVoidPtrType(), + /*isVarArg=*/false); + } +}; + +} // namespace + +void PopulateTFFrameworkToLLVMConversionPatterns( + LLVMTypeConverter *converter, OwningRewritePatternList *patterns) { + patterns->insert(*converter); +} + +} // namespace tf_framework +} // namespace kernel_gen +} // namespace mlir diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm_pass.cc new file mode 100644 index 00000000000..8439e1617e0 --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm_pass.cc @@ -0,0 +1,71 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" // from @llvm-project +#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" // from @llvm-project +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" // from @llvm-project +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h" +#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h" +#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h" + +namespace mlir { +namespace kernel_gen { +namespace tf_framework { +namespace { + +#define GEN_PASS_CLASSES +#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_passes.h.inc" + +class TestTFFrameworkToLLVMPass + : public TestTFFrameworkLegalizeToLLVMPassBase { + public: + void runOnOperation() override { + ModuleOp m = getOperation(); + + // Populate type conversions. + LLVMTypeConverter type_converter(m.getContext()); + type_converter.addConversion([&](tf_framework::OpKernelContextType type) { + return LLVM::LLVMType::getInt8PtrTy(type_converter.getDialect()); + }); + + // Populate patterns. + OwningRewritePatternList patterns; + populateStdToLLVMConversionPatterns(type_converter, patterns); + PopulateTFFrameworkToLLVMConversionPatterns(&type_converter, &patterns); + + // Set target. + ConversionTarget target(getContext()); + target.addLegalDialect(); + target.addIllegalDialect(); + target.addLegalOp(); + + if (failed(applyFullConversion(m, target, patterns))) { + signalPassFailure(); + } + } +}; + +} // namespace + +std::unique_ptr > +createTestTFFrameworkLegalizeToLLVMPass() { + return std::make_unique(); +} + +} // namespace tf_framework +} // namespace kernel_gen +} // namespace mlir From c730b889e20569833c50c5aa224a943fc92027b0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 3 Aug 2020 02:01:42 -0700 Subject: [PATCH 1961/2522] Update GraphDef version to 482. PiperOrigin-RevId: 324562958 Change-Id: Ie80c4ff6b968b2281711d8437fcdde7205c22518 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 3784f2b212d..dae48097aa8 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 481 // Updated: 2020/8/2 +#define TF_GRAPH_DEF_VERSION 482 // Updated: 2020/8/3 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 71329959e563a6278a31d45029b8b4a86642aad2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 3 Aug 2020 02:01:42 -0700 Subject: [PATCH 1962/2522] compat: Update forward compatibility horizon to 2020-08-03 PiperOrigin-RevId: 324562959 Change-Id: I688798867a67903d517e9402be2441a1657c24da --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index e36055c6a93..e0f751a4376 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 2) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 3) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 4305e83f8cc65d9bf941b7136859fc9397368da6 Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Mon, 3 Aug 2020 02:41:09 -0700 Subject: [PATCH 1963/2522] [MLIR][KERNEL_GEN] Use TF_FRAMEWORK_TYPE instead of PRIVATE_EXPERIMENTAL_0. This is just a clean up. PiperOrigin-RevId: 324567279 Change-Id: I1a14ff07b60ed24baf94ef8c544f175f88e1dc89 --- .../compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h index ae621cb386a..8d6e433d9b9 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h +++ b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h @@ -32,10 +32,7 @@ namespace tf_framework { namespace TFFrameworkTypes { enum Kind { - // TODO(pifon): Replace enum value with - // OpKernelContextType = Type::FIRST_TF_FRAMEWORK_TYPE, - // after DialectSymbolRegistry.def is updated. - OpKernelContextType = Type::FIRST_PRIVATE_EXPERIMENTAL_0_TYPE, + OpKernelContextType = Type::FIRST_TF_FRAMEWORK_TYPE, }; } // namespace TFFrameworkTypes From 123af9ba8605aedeb11c9f1eca0a3e7c64f324c2 Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Mon, 3 Aug 2020 04:00:34 -0700 Subject: [PATCH 1964/2522] Also allow older compute capabilities 32 and 30 when generating ptx. PiperOrigin-RevId: 324575272 Change-Id: I1a9a49edb55b36cb971ad713e070ce1ee58b34fb --- .../xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc index a93810b53f7..1228a1b4823 100644 --- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc +++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc @@ -83,10 +83,10 @@ const int kDefaultInlineThreshold = 1100; static string GetSmName(std::pair compute_capability) { int compute_capability_version = compute_capability.first * 10 + compute_capability.second; - int sm_version = 35; + int sm_version = 30; // If the current compute capability isn't known, fallback to the // most recent version before it. - for (int v : {75, 72, 70, 62, 61, 60, 53, 52, 50, 37, 35}) { + for (int v : {75, 72, 70, 62, 61, 60, 53, 52, 50, 37, 35, 32, 30}) { if (v <= compute_capability_version) { sm_version = v; break; From 8e9f3196fd8841de83bd6a622df696ea191d1d78 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 3 Aug 2020 08:22:33 -0700 Subject: [PATCH 1965/2522] Added a bunch of unary ops to the estimator. PiperOrigin-RevId: 324607213 Change-Id: I24369f36cc29f68caac412a5d3076f5ef43859fe --- .../grappler/costs/op_level_cost_estimator.cc | 8 ++++ .../costs/op_level_cost_estimator_test.cc | 39 +++++++++++-------- 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index d2e56cd2f1c..62e6e361ef8 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -522,6 +522,8 @@ OpLevelCostEstimator::OpLevelCostEstimator() { // Unary ops alphabetically sorted elementwise_ops_.emplace("Acos", EIGEN_COST(scalar_acos_op)); + elementwise_ops_.emplace("All", EIGEN_COST(scalar_boolean_and_op)); + elementwise_ops_.emplace("ArgMax", EIGEN_COST(scalar_max_op)); elementwise_ops_.emplace("Asin", EIGEN_COST(scalar_asin_op)); elementwise_ops_.emplace("Atan", EIGEN_COST(scalar_atan_op)); elementwise_ops_.emplace("Atan2", EIGEN_COST(scalar_quotient_op) + @@ -546,7 +548,10 @@ OpLevelCostEstimator::OpLevelCostEstimator() { elementwise_ops_.emplace("Lgamma", 1); elementwise_ops_.emplace("Log", EIGEN_COST(scalar_log_op)); elementwise_ops_.emplace("Log1p", EIGEN_COST(scalar_log1p_op)); + elementwise_ops_.emplace("Max", EIGEN_COST(scalar_max_op)); + elementwise_ops_.emplace("Min", EIGEN_COST(scalar_min_op)); elementwise_ops_.emplace("Neg", EIGEN_COST(scalar_opposite_op)); + elementwise_ops_.emplace("Prod", EIGEN_COST(scalar_product_op)); elementwise_ops_.emplace("QuantizeAndDequantizeV2", quantize_and_dequantize_v2_cost); elementwise_ops_.emplace("QuantizedSigmoid", @@ -554,6 +559,7 @@ OpLevelCostEstimator::OpLevelCostEstimator() { elementwise_ops_.emplace("QuantizeV2", quantize_v2_cost); elementwise_ops_.emplace("Reciprocal", EIGEN_COST(scalar_inverse_op)); elementwise_ops_.emplace("Relu", EIGEN_COST(scalar_max_op)); + elementwise_ops_.emplace("Relu6", EIGEN_COST(scalar_max_op)); elementwise_ops_.emplace("Rint", 1); elementwise_ops_.emplace("Round", EIGEN_COST(scalar_round_op)); elementwise_ops_.emplace("Rsqrt", EIGEN_COST(scalar_rsqrt_op)); @@ -562,8 +568,10 @@ OpLevelCostEstimator::OpLevelCostEstimator() { elementwise_ops_.emplace("Sin", EIGEN_COST(scalar_sin_op)); elementwise_ops_.emplace("Sqrt", EIGEN_COST(scalar_sqrt_op)); elementwise_ops_.emplace("Square", EIGEN_COST(scalar_square_op)); + elementwise_ops_.emplace("Sum", EIGEN_COST(scalar_sum_op)); elementwise_ops_.emplace("Tan", EIGEN_COST(scalar_tan_op)); elementwise_ops_.emplace("Tanh", EIGEN_COST(scalar_tanh_op)); + elementwise_ops_.emplace("TopKV2", EIGEN_COST(scalar_max_op)); // Binary ops alphabetically sorted elementwise_ops_.emplace("Add", EIGEN_COST(scalar_sum_op)); elementwise_ops_.emplace("AddV2", EIGEN_COST(scalar_sum_op)); diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc index d24533cf532..0f19b54feec 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc @@ -939,24 +939,29 @@ TEST_F(OpLevelCostEstimatorTest, SquaredDifferenceExecutionTime) { EXPECT_EQ(cost.num_ops_with_unknown_shapes, 0); } -TEST_F(OpLevelCostEstimatorTest, ReluExecutionTime) { - auto cost = PredictCosts(DescribeUnaryOp("Relu", 1000)); - EXPECT_EQ(Costs::Duration(800), cost.memory_time); - EXPECT_EQ(Costs::Duration(100), cost.compute_time); - EXPECT_EQ(Costs::Duration(900), cost.execution_time); - EXPECT_EQ(1, cost.num_ops_total); - EXPECT_FALSE(cost.inaccurate); - EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); -} +TEST_F(OpLevelCostEstimatorTest, UnaryOpExecutionTime) { + std::vector> unary_ops = { + {"All", 1}, {"ArgMax", 1}, {"Cast", 1}, {"Max", 1}, {"Min", 1}, + {"Prod", 1}, {"Relu", 1}, {"Relu6", 1}, {"Sum", 1}, {"TopKV2", 1}}; -TEST_F(OpLevelCostEstimatorTest, CastExecutionTime) { - auto cost = PredictCosts(DescribeUnaryOp("Cast", 1000)); - EXPECT_EQ(Costs::Duration(800), cost.memory_time); - EXPECT_EQ(Costs::Duration(100), cost.compute_time); - EXPECT_EQ(Costs::Duration(900), cost.execution_time); - EXPECT_EQ(1, cost.num_ops_total); - EXPECT_FALSE(cost.inaccurate); - EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); + const int kTensorSize = 1000; + for (auto unary_op : unary_ops) { + OpContext op_context = DescribeUnaryOp(unary_op.first, kTensorSize); + + const int kExpectedMemoryTime = 800; + int expected_compute_time = std::ceil( + unary_op.second * kTensorSize / + estimator_.GetDeviceInfo(op_context.op_info.device()).gigaops); + + auto cost = PredictCosts(op_context); + EXPECT_EQ(cost.memory_time, Costs::Duration(kExpectedMemoryTime)); + EXPECT_EQ(cost.compute_time, Costs::Duration(expected_compute_time)); + EXPECT_EQ(cost.execution_time, + Costs::Duration(expected_compute_time + kExpectedMemoryTime)); + EXPECT_EQ(cost.num_ops_total, 1); + EXPECT_EQ(cost.num_ops_with_unknown_shapes, 0); + EXPECT_FALSE(cost.inaccurate); + } } TEST_F(OpLevelCostEstimatorTest, BroadcastAddExecutionTime) { From fbb9c59ab276664ba1a3c09adbe5f2d397c71ea4 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Mon, 3 Aug 2020 16:01:21 +0000 Subject: [PATCH 1966/2522] Update tensorflow/python/ops/custom_gradient.py --- tensorflow/python/ops/custom_gradient.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/ops/custom_gradient.py b/tensorflow/python/ops/custom_gradient.py index e67f19099fc..6715d3bf0a7 100644 --- a/tensorflow/python/ops/custom_gradient.py +++ b/tensorflow/python/ops/custom_gradient.py @@ -139,7 +139,6 @@ def custom_gradient(f=None): the same number of variables. We take the function `z = x * y` as an example. ```python - >>> import tensorflow as tf >>> @tf.custom_gradient def bar(x, y): def grad(upstream): From 86927dd8ddc90ffc5a9bdfb4f8b48ecf76e8aa44 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Mon, 3 Aug 2020 16:02:28 +0000 Subject: [PATCH 1967/2522] Update tensorflow/python/ops/custom_gradient.py --- tensorflow/python/ops/custom_gradient.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/ops/custom_gradient.py b/tensorflow/python/ops/custom_gradient.py index 6715d3bf0a7..c0ee970af3e 100644 --- a/tensorflow/python/ops/custom_gradient.py +++ b/tensorflow/python/ops/custom_gradient.py @@ -138,7 +138,6 @@ def custom_gradient(f=None): In case the function takes multiple variables as input, the `grad` function must also return the same number of variables. We take the function `z = x * y` as an example. - ```python >>> @tf.custom_gradient def bar(x, y): def grad(upstream): From 84ad9375524def803dc0b1e6470688d893f8cf1b Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Mon, 3 Aug 2020 16:02:35 +0000 Subject: [PATCH 1968/2522] Update tensorflow/python/ops/custom_gradient.py --- tensorflow/python/ops/custom_gradient.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/ops/custom_gradient.py b/tensorflow/python/ops/custom_gradient.py index c0ee970af3e..f2675b422ac 100644 --- a/tensorflow/python/ops/custom_gradient.py +++ b/tensorflow/python/ops/custom_gradient.py @@ -165,7 +165,6 @@ def custom_gradient(f=None): 2 >>> tape.gradient(x, y) None - ``` Nesting custom gradients can lead to unintuitive results. The default behavior does not correspond to n-th order derivatives. For example From cad412bfcbaea7952e7758620e3928f64d83be32 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Mon, 3 Aug 2020 16:12:00 +0000 Subject: [PATCH 1969/2522] Update tensorflow/python/ops/math_ops.py --- tensorflow/python/ops/math_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 2eae0ade4dd..1d43802ab41 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -3850,8 +3850,8 @@ def conj(x, name=None): Given a tensor `x` of complex numbers, this operation returns a tensor of complex numbers that are the complex conjugate of each element in `x`. The - complex numbers in `x` must be of the form \\(a + bj\\), where *a* is the - real part and *b* is the imaginary part. + complex numbers in `x` must be of the form \\(a + bj\\), where `a` is the + real part and `b` is the imaginary part. The complex conjugate returned by this operation is of the form \\(a - bj\\). From 3ccad31d3963077b9879c9e18921e32213b9c32f Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Mon, 3 Aug 2020 09:48:09 -0700 Subject: [PATCH 1970/2522] Update copyright in GuaranteeAllFuncsOneUse to be of TensorFlow and update function names to match Google C++ Style Guide (NFC). PiperOrigin-RevId: 324622347 Change-Id: Ib1d19560afa26e4fef197b1694328e26f53ade8e --- .../transforms/guarantee_all_funcs_one_use.cc | 31 ++++++++++--------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/guarantee_all_funcs_one_use.cc b/tensorflow/compiler/mlir/tensorflow/transforms/guarantee_all_funcs_one_use.cc index 6112ff500c5..776afd72ad5 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/guarantee_all_funcs_one_use.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/guarantee_all_funcs_one_use.cc @@ -1,16 +1,17 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ #include "llvm/ADT/STLExtras.h" #include "mlir/IR/SymbolTable.h" // from @llvm-project @@ -51,12 +52,12 @@ class GuaranteeAllFuncsOneUse : public PassWrapper> { public: void runOnOperation() override { - if (failed(run())) { + if (failed(Run())) { signalPassFailure(); } } - LogicalResult run() { + LogicalResult Run() { auto module = getOperation(); // Overall strategy: From 856dc4f7b6b1c4b35142961e432a5fa66c6d1259 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 3 Aug 2020 10:01:28 -0700 Subject: [PATCH 1971/2522] Add MLIR definition for StatelessTruncatedNormalOp. PiperOrigin-RevId: 324625125 Change-Id: Ia17f1179c18c509b60427765134c377be3aef403 --- .../mlir/tensorflow/ir/tf_generated_ops.td | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index e00ea4c342a..63138489ef7 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -9596,6 +9596,33 @@ The outputs are a deterministic function of `shape` and `seed`. TF_DerivedResultTypeAttr dtype = TF_DerivedResultTypeAttr<0>; } +def TF_StatelessTruncatedNormalOp : TF_Op<"StatelessTruncatedNormal", [NoSideEffect]> { + let summary = [{ +Outputs deterministic pseudorandom values from a truncated normal distribution. + }]; + + let description = [{ +The generated values follow a normal distribution with mean 0 and standard +deviation 1, except that values whose magnitude is more than 2 standard +deviations from the mean are dropped and re-picked. + +The outputs are a deterministic function of `shape` and `seed`. + }]; + + let arguments = (ins + TF_I32OrI64Tensor:$shape, + TF_I32OrI64Tensor:$seed + ); + + let results = (outs + TF_FpTensor:$output + ); + + TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; + TF_DerivedOperandTypeAttr Tseed = TF_DerivedOperandTypeAttr<1>; + TF_DerivedResultTypeAttr dtype = TF_DerivedResultTypeAttr<0>; +} + def TF_StopGradientOp : TF_Op<"StopGradient", [NoSideEffect, TF_AllTypesMatch<["input", "output"]>]> { let summary = "Stops gradient computation."; From 59dc165d26caaed925bcfe7b40752b8429d922ea Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Mon, 3 Aug 2020 10:05:11 -0700 Subject: [PATCH 1972/2522] Enable input spec checking for Functional models. PiperOrigin-RevId: 324625967 Change-Id: Ide0a8cb4d6d7614f86f22088a5ef95d72636c54e --- RELEASE.md | 9 +- .../distribute/distribute_strategy_test.py | 2 +- tensorflow/python/keras/engine/base_layer.py | 3 +- tensorflow/python/keras/engine/functional.py | 36 +++++ .../python/keras/engine/functional_test.py | 59 +++++++- tensorflow/python/keras/engine/input_spec.py | 140 ++++++++++++------ tensorflow/python/keras/engine/sequential.py | 6 + .../keras/layers/tensorflow_op_layer_test.py | 14 +- .../keras/legacy_tf_layers/base_test.py | 25 +--- .../tensorflow.keras.layers.-input-spec.pbtxt | 2 +- .../v1/tensorflow.layers.-input-spec.pbtxt | 2 +- .../tensorflow.keras.layers.-input-spec.pbtxt | 2 +- 12 files changed, 214 insertions(+), 86 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 13369fd92f7..0c6b0f556e8 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -25,7 +25,14 @@ * Code that requires very tricky shape manipulation via converted op layers in order to work, where the Keras symbolic shape inference proves insufficient. * Code that tries manually walking a `tf.keras.Model` layer by layer and assumes layers only ever have one positional argument. This assumption doesn't hold true before TF 2.4 either, but is more likely to cause issues know. * Code that manually enters `keras.backend.get_graph()` before building a functional model. This is no longer needed. - +* Start enforcing input shape assumptions when calling Functional API Keras + models. This may potentially break some users, in case there is a mismatch + between the shape used when creating `Input` objects in a Functional model, + and the shape of the data passed to that model. You can fix this mismatch by + either calling the model with correctly-shaped data, or by relaxing `Input` + shape assumptions (note that you can pass shapes with `None` entries for axes + that are meant to be dynamic). You can also disable the input checking + entirely by setting `model.input_spec = None`. ## Known Caveats diff --git a/tensorflow/python/keras/distribute/distribute_strategy_test.py b/tensorflow/python/keras/distribute/distribute_strategy_test.py index df8f4e29764..4b6d3a80730 100644 --- a/tensorflow/python/keras/distribute/distribute_strategy_test.py +++ b/tensorflow/python/keras/distribute/distribute_strategy_test.py @@ -1239,7 +1239,7 @@ class TestDistributionStrategyWithDatasets(test.TestCase, dataset = dataset.repeat(100) dataset = dataset.batch(10) - with self.assertRaisesRegex(ValueError, 'incompatible with the layer'): + with self.assertRaisesRegex(ValueError, 'is incompatible with'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0) @combinations.generate( diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py index a63d499400a..d7cc3fd38a8 100644 --- a/tensorflow/python/keras/engine/base_layer.py +++ b/tensorflow/python/keras/engine/base_layer.py @@ -970,12 +970,11 @@ class Layer(module.Module, version_utils.LayerVersionSelector): if self._autocast: inputs = self._maybe_cast_inputs(inputs, input_list) + input_spec.assert_input_compatibility(self.input_spec, inputs, self.name) if eager: call_fn = self.call name_scope = self._name else: - input_spec.assert_input_compatibility(self.input_spec, inputs, - self.name) name_scope = self._name_scope() # Avoid autoincrementing. call_fn = self._autographed_call() diff --git a/tensorflow/python/keras/engine/functional.py b/tensorflow/python/keras/engine/functional.py index 707dedac028..d2592ac1c42 100644 --- a/tensorflow/python/keras/engine/functional.py +++ b/tensorflow/python/keras/engine/functional.py @@ -33,6 +33,7 @@ from tensorflow.python.keras import backend from tensorflow.python.keras.engine import base_layer from tensorflow.python.keras.engine import base_layer_utils from tensorflow.python.keras.engine import input_layer as input_layer_module +from tensorflow.python.keras.engine import input_spec from tensorflow.python.keras.engine import keras_tensor from tensorflow.python.keras.engine import node as node_module from tensorflow.python.keras.engine import training as training_lib @@ -248,6 +249,32 @@ class Functional(training_lib.Model): """ return nest.map_structure(backend.int_shape, self.input) + @property + def input_spec(self): + if hasattr(self, '_manual_input_spec'): + return self._manual_input_spec + if (isinstance(self._nested_inputs, (dict, list, tuple)) and + len(self._nested_inputs) != len(self.inputs)): + # Case where we have a nested structure. + # In such a case we can't safely run any checks. + return None + if isinstance(self._nested_inputs, dict): + # Case where `_nested_inputs` is a plain dict of Inputs. + names = sorted(self._nested_inputs.keys()) + return [input_spec.InputSpec( + shape=shape_with_no_batch_size(self._nested_inputs[name]), + allow_last_axis_squeeze=True, name=name) for name in names] + else: + # Single input, or list / tuple of inputs. + # The data may be passed as a dict keyed by input name. + return [input_spec.InputSpec( + shape=shape_with_no_batch_size(x), allow_last_axis_squeeze=True, + name=x._keras_history.layer.name) for x in self.inputs] + + @input_spec.setter + def input_spec(self, value): + self._manual_input_spec = value + @property def output(self): """Retrieves the output tensor(s) of a layer. @@ -1312,3 +1339,12 @@ def get_network_config(network, serialize_layer_fn=None): model_outputs = tf_utils.convert_inner_node_data(model_outputs) config['output_layers'] = model_outputs return config + + +def shape_with_no_batch_size(x): + if x.shape.rank is None: + return None + shape = x.shape.as_list() + if shape: + shape[0] = None + return shape diff --git a/tensorflow/python/keras/engine/functional_test.py b/tensorflow/python/keras/engine/functional_test.py index b104668c9e1..1b6d15863e6 100644 --- a/tensorflow/python/keras/engine/functional_test.py +++ b/tensorflow/python/keras/engine/functional_test.py @@ -1059,7 +1059,7 @@ class NetworkConstructionTest(keras_parameterized.TestCase): self.assertEqual(history.history['loss'][0], 0.0) # Check the output dtype - self.assertEqual(model(array_ops.ones(3, 3)).dtype, dtypes.float16) + self.assertEqual(model(array_ops.ones((3, 10))).dtype, dtypes.float16) model = training_lib.Model.from_config( model.get_config(), custom_objects={'Double': Double}) @@ -1075,7 +1075,7 @@ class NetworkConstructionTest(keras_parameterized.TestCase): self.assertEqual(history.history['loss'][0], 0.0) # Check the output dtype - self.assertEqual(model(array_ops.ones(3, 3)).dtype, dtypes.float16) + self.assertEqual(model(array_ops.ones((3, 10))).dtype, dtypes.float16) @combinations.generate(combinations.keras_mode_combinations()) def test_call_kwarg_nonserializable(self): @@ -1793,8 +1793,8 @@ class NestedNetworkTest(keras_parameterized.TestCase): network = functional.Functional.from_config(network.get_config()) result_tensor = network({ - 'x': array_ops.ones((1, 1), 'float32'), - 'y': array_ops.ones((1, 1), 'float32') + 'x1': array_ops.ones((1, 1), 'float32'), + 'x2': array_ops.ones((1, 1), 'float32') }) result = self.evaluate(result_tensor) self.assertAllEqual(result, [[2.]]) @@ -2340,6 +2340,57 @@ class InputsOutputsErrorTest(keras_parameterized.TestCase): TypeError, "('Keyword argument not understood:', 'output')"): models.Model(inputs=inputs, output=outputs) + def test_input_spec(self): + if not context.executing_eagerly(): + return + inputs = input_layer_lib.Input((10,)) + outputs = layers.Dense(10)(inputs) + model = models.Model(inputs, outputs) + with self.assertRaisesRegex( + ValueError, r'.*expected shape=.*'): + model(np.zeros((3, 11))) + + def test_input_spec_list_of_inputs(self): + if not context.executing_eagerly(): + return + input_1 = input_layer_lib.Input((10,), name='1') + input_2 = input_layer_lib.Input((5,), name='2') + x = layers.Concatenate()([input_1, input_2]) + outputs = layers.Dense(10)(x) + model = models.Model([input_1, input_2], outputs) + with self.assertRaisesRegex( + ValueError, r'.*expects 2 input.*'): + model(np.zeros((3, 10))) + with self.assertRaisesRegex( + ValueError, r'.*expects 2 input.*'): + model([np.zeros((3, 10)), np.zeros((3, 5)), np.zeros((3, 10))]) + with self.assertRaisesRegex( + ValueError, r'.*expected shape=.*'): + model([np.zeros((3, 10)), np.zeros((3, 6))]) + + # Test passing data via dict keyed by input name + with self.assertRaisesRegex( + ValueError, r'Missing data for input.*'): + model({'1': np.zeros((3, 10))}) + with self.assertRaisesRegex( + ValueError, r'.*expected shape=.*'): + model({'1': np.zeros((3, 10)), '2': np.zeros((3, 6))}) + + def test_input_spec_dict(self): + if not context.executing_eagerly(): + return + input_1 = input_layer_lib.Input((10,)) + input_2 = input_layer_lib.Input((5,)) + x = layers.Concatenate()([input_1, input_2]) + outputs = layers.Dense(10)(x) + model = models.Model({'1': input_1, '2': input_2}, outputs) + with self.assertRaisesRegex( + ValueError, r'Missing data for input.*'): + model({'1': np.zeros((3, 10))}) + with self.assertRaisesRegex( + ValueError, r'.*expected shape=.*'): + model({'1': np.zeros((3, 10)), '2': np.zeros((3, 6))}) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/engine/input_spec.py b/tensorflow/python/keras/engine/input_spec.py index b57b2974aae..52a2829ffdb 100644 --- a/tensorflow/python/keras/engine/input_spec.py +++ b/tensorflow/python/keras/engine/input_spec.py @@ -44,14 +44,32 @@ class InputSpec(object): a None shape is compatible with any shape. Arguments: - dtype: Expected DataType of the input. - shape: Shape tuple, expected shape of the input - (may include None for unchecked axes). - ndim: Integer, expected rank of the input. - max_ndim: Integer, maximum rank of the input. - min_ndim: Integer, minimum rank of the input. - axes: Dictionary mapping integer axes to - a specific dimension value. + dtype: Expected DataType of the input. + shape: Shape tuple, expected shape of the input + (may include None for unchecked axes). Includes the batch size. + ndim: Integer, expected rank of the input. + max_ndim: Integer, maximum rank of the input. + min_ndim: Integer, minimum rank of the input. + axes: Dictionary mapping integer axes to + a specific dimension value. + allow_last_axis_squeeze: If True, then allow inputs of rank N+1 as long + as the last axis of the input is 1, as well as inputs of rank N-1 + as long as the last axis of the spec is 1. + name: Expected key corresponding to this input when passing data as + a dictionary. + + Example: + + ```python + class MyLayer(Layer): + def __init__(self): + super(MyLayer, self).__init__() + # The layer will accept inputs with shape (?, 28, 28) & (?, 28, 28, 1) + # and raise an appropriate error message otherwise. + self.input_spec = InputSpec( + shape=(None, 28, 28, 1), + allow_last_axis_squeeze=True) + ``` """ def __init__(self, @@ -60,8 +78,15 @@ class InputSpec(object): ndim=None, max_ndim=None, min_ndim=None, - axes=None): + axes=None, + allow_last_axis_squeeze=False, + name=None): self.dtype = dtypes.as_dtype(dtype).name if dtype is not None else None + shape = tensor_shape.TensorShape(shape) + if shape.rank is None: + shape = None + else: + shape = tuple(shape.as_list()) if shape is not None: self.ndim = len(shape) self.shape = shape @@ -70,6 +95,8 @@ class InputSpec(object): self.shape = None self.max_ndim = max_ndim self.min_ndim = min_ndim + self.name = name + self.allow_last_axis_squeeze = allow_last_axis_squeeze try: axes = axes or {} self.axes = {int(k): axes[k] for k in axes} @@ -149,6 +176,21 @@ def assert_input_compatibility(input_spec, inputs, layer_name): if not input_spec: return + input_spec = nest.flatten(input_spec) + if isinstance(inputs, dict): + # Flatten `inputs` by reference order if input spec names are provided + names = [spec.name for spec in input_spec] + if all(names): + list_inputs = [] + for name in names: + if name not in inputs: + raise ValueError('Missing data for input "%s". ' + 'You passed a data dictionary with keys %s. ' + 'Expected the following keys: %s' % + (name, list(inputs.keys()), names)) + list_inputs.append(inputs[name]) + inputs = list_inputs + inputs = nest.flatten(inputs) for x in inputs: # Having a shape/dtype is the only commonality of the various tensor-like @@ -157,81 +199,83 @@ def assert_input_compatibility(input_spec, inputs, layer_name): # have a `shape` attribute. if not hasattr(x, 'shape'): raise TypeError('Inputs to a layer should be tensors. Got: %s' % (x,)) - input_spec = nest.flatten(input_spec) + if len(inputs) != len(input_spec): raise ValueError('Layer ' + layer_name + ' expects ' + - str(len(input_spec)) + ' inputs, ' + str(len(input_spec)) + ' input(s), ' 'but it received ' + str(len(inputs)) + ' input tensors. Inputs received: ' + str(inputs)) for input_index, (x, spec) in enumerate(zip(inputs, input_spec)): if spec is None: continue - if (spec.ndim is not None or - spec.min_ndim is not None or - spec.max_ndim is not None): - if x.shape.ndims is None: - raise ValueError('Input ' + str(input_index) + ' of layer ' + - layer_name + ' is incompatible with the layer: ' - 'its rank is undefined, but the layer requires a ' - 'defined rank.') - + shape = tensor_shape.TensorShape(x.shape) + if shape.rank is None: + return # Check ndim. - if spec.ndim is not None: - ndim = x.shape.ndims + if spec.ndim is not None and not spec.allow_last_axis_squeeze: + ndim = shape.rank if ndim != spec.ndim: raise ValueError('Input ' + str(input_index) + ' of layer ' + layer_name + ' is incompatible with the layer: ' 'expected ndim=' + str(spec.ndim) + ', found ndim=' + str(ndim) + '. Full shape received: ' + - str(x.shape.as_list())) + str(tuple(shape))) if spec.max_ndim is not None: - ndim = x.shape.ndims + ndim = x.shape.rank if ndim is not None and ndim > spec.max_ndim: raise ValueError('Input ' + str(input_index) + ' of layer ' + layer_name + ' is incompatible with the layer: ' 'expected max_ndim=' + str(spec.max_ndim) + ', found ndim=' + str(ndim)) if spec.min_ndim is not None: - ndim = x.shape.ndims + ndim = x.shape.rank if ndim is not None and ndim < spec.min_ndim: raise ValueError('Input ' + str(input_index) + ' of layer ' + layer_name + ' is incompatible with the layer: ' ': expected min_ndim=' + str(spec.min_ndim) + ', found ndim=' + str(ndim) + '. Full shape received: ' + - str(x.shape.as_list())) + str(tuple(shape))) # Check dtype. if spec.dtype is not None: - if x.dtype != spec.dtype: + if x.dtype.name != spec.dtype: raise ValueError('Input ' + str(input_index) + ' of layer ' + layer_name + ' is incompatible with the layer: ' 'expected dtype=' + str(spec.dtype) + ', found dtype=' + str(x.dtype)) + # Check specific shape axes. + shape_as_list = shape.as_list() if spec.axes: - shape = x.shape.as_list() - if shape is not None: - for axis, value in spec.axes.items(): - if hasattr(value, 'value'): - value = value.value - if value is not None and shape[int(axis)] not in {value, None}: - raise ValueError( - 'Input ' + str(input_index) + ' of layer ' + layer_name + ' is' - ' incompatible with the layer: expected axis ' + str(axis) + - ' of input shape to have value ' + str(value) + - ' but received input with shape ' + str(shape)) + for axis, value in spec.axes.items(): + if hasattr(value, 'value'): + value = value.value + if value is not None and shape_as_list[int(axis)] not in {value, None}: + raise ValueError( + 'Input ' + str(input_index) + ' of layer ' + layer_name + ' is' + ' incompatible with the layer: expected axis ' + str(axis) + + ' of input shape to have value ' + str(value) + + ' but received input with shape ' + display_shape(x.shape)) # Check shape. - if spec.shape is not None: - shape = x.shape.as_list() - if shape is not None: - for spec_dim, dim in zip(spec.shape, shape): - if spec_dim is not None and dim is not None: - if spec_dim != dim: - raise ValueError('Input ' + str(input_index) + - ' is incompatible with layer ' + layer_name + - ': expected shape=' + str(spec.shape) + - ', found shape=' + str(shape)) + if spec.shape is not None and shape.rank is not None: + spec_shape = spec.shape + if spec.allow_last_axis_squeeze: + if shape_as_list and shape_as_list[-1] == 1: + shape_as_list = shape_as_list[:-1] + if spec_shape and spec_shape[-1] == 1: + spec_shape = spec_shape[:-1] + for spec_dim, dim in zip(spec_shape, shape_as_list): + if spec_dim is not None and dim is not None: + if spec_dim != dim: + raise ValueError('Input ' + str(input_index) + + ' is incompatible with layer ' + layer_name + + ': expected shape=' + str(spec.shape) + + ', found shape=' + display_shape(x.shape)) + + +def display_shape(shape): + return str(tuple(shape.as_list())) def to_tensor_spec(input_spec, default_dtype=None): diff --git a/tensorflow/python/keras/engine/sequential.py b/tensorflow/python/keras/engine/sequential.py index 595757672ce..e22c4921102 100644 --- a/tensorflow/python/keras/engine/sequential.py +++ b/tensorflow/python/keras/engine/sequential.py @@ -495,10 +495,16 @@ class Sequential(functional.Functional): @property def input_spec(self): + if hasattr(self, '_manual_input_spec'): + return self._manual_input_spec if self.layers and hasattr(self.layers[0], 'input_spec'): return self.layers[0].input_spec return None + @input_spec.setter + def input_spec(self, value): + self._manual_input_spec = value + @property def _trackable_saved_model_saver(self): return model_serialization.SequentialSavedModelSaver(self) diff --git a/tensorflow/python/keras/layers/tensorflow_op_layer_test.py b/tensorflow/python/keras/layers/tensorflow_op_layer_test.py index 18eb82624c1..e128323a1a6 100644 --- a/tensorflow/python/keras/layers/tensorflow_op_layer_test.py +++ b/tensorflow/python/keras/layers/tensorflow_op_layer_test.py @@ -324,9 +324,9 @@ class AutoLambdaTest(keras_parameterized.TestCase): run_eagerly=testing_utils.should_run_eagerly()) np_inputs = nest.map_structure( - lambda x: np.ones((10,) + tuple(x.shape[1:]), 'float32'), model.inputs) + lambda x: np.ones((2,) + tuple(x.shape[1:]), 'float32'), model.inputs) np_outputs = nest.map_structure( - lambda x: np.ones((10,) + tuple(x.shape[1:]), 'float32'), model.outputs) + lambda x: np.ones((2,) + tuple(x.shape[1:]), 'float32'), model.outputs) model.fit(np_inputs, np_outputs, batch_size=2) model(np_inputs) # Test calling the model directly on inputs. @@ -402,7 +402,7 @@ class AutoLambdaTest(keras_parameterized.TestCase): def test_getitem_slice_with_step_only(self): if not context.executing_eagerly(): self.skipTest('Complex slicing like this fails in v1') - inp = keras.Input(shape=(4, 3, 8)) + inp = keras.Input(shape=(8,)) slice_step = keras.Input(shape=(), dtype='int32') out = inp[..., ::slice_step[0]] @@ -508,7 +508,7 @@ class AutoLambdaTest(keras_parameterized.TestCase): def test_getitem_slice_with_stop_only(self): if not context.executing_eagerly(): self.skipTest('Complex slicing like this fails in v1') - inp = keras.Input(shape=(4, 3, 8)) + inp = keras.Input(shape=(8,)) slice_stop = keras.Input(shape=(), dtype='int32') out = inp[:slice_stop[0]] @@ -544,7 +544,7 @@ class AutoLambdaTest(keras_parameterized.TestCase): def test_getitem_slice_with_stop_and_ellipsis_only(self): if not context.executing_eagerly(): self.skipTest('Complex slicing like this fails in v1') - inp = keras.Input(shape=(4, 3, 8)) + inp = keras.Input(shape=(8,)) slice_stop = keras.Input(shape=(), dtype='int32') out = inp[..., :slice_stop[0]] @@ -646,14 +646,14 @@ class AutoLambdaTest(keras_parameterized.TestCase): def test_numerical_correctness_with_attrs(self): x = ops.convert_to_tensor_v2([[1.5, 1.5], [2.5, 3.5]]) - inputs = keras.Input(shape=(10,)) + inputs = keras.Input(shape=(2,)) outputs = math_ops.reduce_mean(inputs, axis=1) model = keras.Model(inputs, outputs) y = self.evaluate(model(x)) self.assertAllClose(y, [1.5, 3.]) def test_numerical_correctness_serialization(self): - x = ops.convert_to_tensor_v2([-1., 0., -2., 1.]) + x = ops.convert_to_tensor_v2([[-1., 0., -2., 1.]]) inputs = keras.Input(shape=(4,)) outputs = gen_nn_ops.relu(inputs) model1 = keras.Model(inputs, outputs) diff --git a/tensorflow/python/keras/legacy_tf_layers/base_test.py b/tensorflow/python/keras/legacy_tf_layers/base_test.py index 36be60f7657..2c9810c4109 100644 --- a/tensorflow/python/keras/legacy_tf_layers/base_test.py +++ b/tensorflow/python/keras/legacy_tf_layers/base_test.py @@ -277,11 +277,6 @@ class BaseLayerTest(test.TestCase, parameterized.TestCase): def call(self, inputs): return inputs - if not context.executing_eagerly(): - layer = CustomerLayer() - with self.assertRaisesRegex(ValueError, r'requires a defined rank'): - layer.apply(array_ops.placeholder('int32')) - layer = CustomerLayer() with self.assertRaisesRegex(ValueError, r'expected ndim=2'): layer.apply(constant_op.constant([1])) @@ -295,29 +290,24 @@ class BaseLayerTest(test.TestCase, parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def testInputSpecMinNdimCheck(self): - class CustomerLayer(base_layers.Layer): + class CustomLayer(base_layers.Layer): def __init__(self): - super(CustomerLayer, self).__init__() + super(CustomLayer, self).__init__() self.input_spec = input_spec.InputSpec(min_ndim=2) def call(self, inputs): return inputs - if not context.executing_eagerly(): - layer = CustomerLayer() - with self.assertRaisesRegex(ValueError, r'requires a defined rank'): - layer.apply(array_ops.placeholder('int32')) - - layer = CustomerLayer() + layer = CustomLayer() with self.assertRaisesRegex(ValueError, r'expected min_ndim=2'): layer.apply(constant_op.constant([1])) # Works - layer = CustomerLayer() + layer = CustomLayer() layer.apply(constant_op.constant([[1], [2]])) - layer = CustomerLayer() + layer = CustomLayer() layer.apply(constant_op.constant([[[1], [2]]])) @combinations.generate(combinations.combine(mode=['graph', 'eager'])) @@ -332,11 +322,6 @@ class BaseLayerTest(test.TestCase, parameterized.TestCase): def call(self, inputs): return inputs - if not context.executing_eagerly(): - layer = CustomerLayer() - with self.assertRaisesRegex(ValueError, r'requires a defined rank'): - layer.apply(array_ops.placeholder('int32')) - layer = CustomerLayer() with self.assertRaisesRegex(ValueError, r'expected max_ndim=2'): layer.apply(constant_op.constant([[[1], [2]]])) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-input-spec.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-input-spec.pbtxt index fce381e8a0a..c95e3135df8 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-input-spec.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-input-spec.pbtxt @@ -4,7 +4,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'dtype\', \'shape\', \'ndim\', \'max_ndim\', \'min_ndim\', \'axes\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'dtype\', \'shape\', \'ndim\', \'max_ndim\', \'min_ndim\', \'axes\', \'allow_last_axis_squeeze\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'False\', \'None\'], " } member_method { name: "from_config" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-input-spec.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-input-spec.pbtxt index 17b89c29fb2..75dbd5e386a 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-input-spec.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-input-spec.pbtxt @@ -4,7 +4,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'dtype\', \'shape\', \'ndim\', \'max_ndim\', \'min_ndim\', \'axes\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'dtype\', \'shape\', \'ndim\', \'max_ndim\', \'min_ndim\', \'axes\', \'allow_last_axis_squeeze\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'False\', \'None\'], " } member_method { name: "from_config" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-input-spec.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-input-spec.pbtxt index fce381e8a0a..c95e3135df8 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-input-spec.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-input-spec.pbtxt @@ -4,7 +4,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'dtype\', \'shape\', \'ndim\', \'max_ndim\', \'min_ndim\', \'axes\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'dtype\', \'shape\', \'ndim\', \'max_ndim\', \'min_ndim\', \'axes\', \'allow_last_axis_squeeze\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'False\', \'None\'], " } member_method { name: "from_config" From 60c8033ebc325550cae779c393f9e4c73108a75e Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Mon, 3 Aug 2020 10:17:46 -0700 Subject: [PATCH 1973/2522] Add support for token operands to mhlo.tuple. mhlo.get_tuple_element supports extracting a mhlo.token type from a tuple. This updates the creation of tuples to allow for mhlo.token typed operands. PiperOrigin-RevId: 324628663 Change-Id: I18c77aabdfcb2d84ae70d49e85a52d751bc962c2 --- .../mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td | 2 +- tensorflow/compiler/mlir/hlo/tests/ops.mlir | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td index db98bd16f76..e83bf874c62 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td @@ -664,7 +664,7 @@ def HLO_GetTupleElementOp: HLO_Op<"get_tuple_element", [NoSideEffect]>, BASE_HLO } def HLO_TupleOp : HLO_Op<"tuple", [NoSideEffect]>, BASE_HLO_TupleOp { - let arguments = (ins Variadic:$val); + let arguments = (ins Variadic:$val); let results = (outs HLO_Tuple); let builders = [OpBuilder< diff --git a/tensorflow/compiler/mlir/hlo/tests/ops.mlir b/tensorflow/compiler/mlir/hlo/tests/ops.mlir index 920e62e57b4..212e79432b1 100644 --- a/tensorflow/compiler/mlir/hlo/tests/ops.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/ops.mlir @@ -847,6 +847,13 @@ func @tuple(%arg0: tensor<1xi32>, %arg1: tensor<1x2xf32>) -> tuple // ----- +func @tuple_token(%arg0: tensor, %arg1: !mhlo.token) -> tuple, !mhlo.token> { + %0 = "mhlo.tuple"(%arg0, %arg1) : (tensor, !mhlo.token) -> tuple, !mhlo.token> + return %0 : tuple, !mhlo.token> +} + +// ----- + func @tuple_arg_size_mismatch(%arg0: tensor, %arg1: tensor) -> tuple, tensor, tensor> { // expected-error@+1 {{has return type tuple, tensor, tensor>, but expected tuple, tensor>}} %0 = "mhlo.tuple"(%arg0, %arg1) : (tensor, tensor) -> tuple, tensor, tensor> From 74d526257013ee74c34e6e48cd52e650b4bde6ec Mon Sep 17 00:00:00 2001 From: Robert David Date: Mon, 3 Aug 2020 10:24:31 -0700 Subject: [PATCH 1974/2522] Use workgroup-local reductions for MeanStdDevNormalization. PiperOrigin-RevId: 324630237 Change-Id: Ie0fe32a072039809b7b1b51bbeda8665e7f1a5ce --- .../cl/kernels/mean_stddev_normalization.cc | 76 ++++++++++++++++--- .../cl/kernels/mean_stddev_normalization.h | 3 + 2 files changed, 69 insertions(+), 10 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc index fb206cc0692..a6ce7e55253 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc @@ -24,9 +24,55 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { +namespace { + +std::string GetVectorReduceCode() { + return R"(static inline float reduce_vector(float4 v) { + return dot(v, (float4)(1.0f)); +})"; +} + +std::string GetReduceCode(size_t work_group_size_x, size_t work_group_size_y) { + // If it is supported, use the built-in work_group_reduce_add function. + // Otherwise, implement a reduction using __local memory. Note this only works + // with power-of-two work group sizes. + return R"( +static inline float local_reduce(float input) { +#if (__OPENCL_C_VERSION__ >= 300 && __opencl_c_work_group_collective_functions) || \ + (__OPENCL_C_VERSION__ >= 200) + return work_group_reduce_add(input); +#else + __local float data[)" + + std::to_string(work_group_size_y) + "][" + + std::to_string(work_group_size_x) + R"(]; + const size_t local_id_x = get_local_id(0); + const size_t local_id_y = get_local_id(1); + data[local_id_y][local_id_x] = input; + mem_fence(CLK_LOCAL_MEM_FENCE); + size_t reduction_size = get_local_size(0) / 2; + while (reduction_size > 0) { + if (local_id_x < reduction_size) { + data[local_id_y][local_id_x] += data[local_id_y][local_id_x + reduction_size]; + } + mem_fence(CLK_LOCAL_MEM_FENCE); + reduction_size /= 2; + } + return data[local_id_y][0]; +} +#endif +)"; +} +} // namespace MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition) : GPUOperation(definition) { + // The kernel code does not inherently need a fixed size, but in order to not + // hardcode the __local array's size for the reductions, we would need to pass + // that size to the kernel at runtime, and that is currently not supported. + // For now, fix workgroup size to 128 threads. + work_group_size_.x = 128; + work_group_size_.y = 1; + work_group_size_.z = 1; code_ = GetNormalizationCode(); } @@ -35,35 +81,43 @@ std::string MeanStdDevNormalization::GetNormalizationCode() { AddDstTensor("dst_tensor", definition_.dst_tensors[0]); std::string c = GetCommonDefines(definition_.precision); - c += R"(__kernel void main_function( + c += GetVectorReduceCode(); + c += GetReduceCode(work_group_size_.x, work_group_size_.y); + c += R"(__attribute__((reqd_work_group_size(128, 1, 1))) +__kernel void main_function( $0) { - if (get_global_id(0) > 0) { return; } size_t B = get_global_id(1); if (get_global_id(2) > 0) { return; } if (B >= args.src_tensor.Batch()) { return; } // Calculate the total sum of the input tensor. // First, get a local sum of input[local_id_x + N*local_size_x] for all N. - float sum = 0.0f; - for (int S = 0; S < args.src_tensor.Slices(); ++S) { + float4 private_sum4 = (float4)(0.0f); + for (int S = get_local_id(0); S < args.src_tensor.Slices(); S += get_local_size(0)) { const float4 t = args.src_tensor.Read(0, 0, S, B); // Filter out reads beyond the end of the tensor. const int4 is_after_end_of_tensor = (int4)(0, 1, 2, 3) >= (args.src_tensor.Channels() - S * 4); const float4 filtered_t = select(t, (float4)(0.0f), is_after_end_of_tensor); - sum += filtered_t.x + filtered_t.y + filtered_t.z + filtered_t.w; + private_sum4 += filtered_t; } + // Reduce the vector to a single float and do a workgroup reduce. + const float private_sum = reduce_vector(private_sum4); + const float sum = local_reduce(private_sum); // Calculate the mean const float mean = sum / args.src_tensor.Channels(); // Calculate the squared sum of the difference from the mean. - float sum_diff_sq = 0.0f; - for (int S = 0; S < args.src_tensor.Slices(); ++S) { + float4 private_sum_diff_sq4 = (float4)(0.0f); + for (int S = get_local_id(0); S < args.src_tensor.Slices(); S += get_local_size(0)) { const float4 t = args.src_tensor.Read(0, 0, S, B); const float4 diff = t - mean; // Filter out reads beyond the end of the tensor. const int4 is_after_end_of_tensor = (int4)(0, 1, 2, 3) >= (args.src_tensor.Channels() - S * 4); const float4 filtered_diff = select(diff, (float4)(0.0f), is_after_end_of_tensor); - float dotprod = dot(filtered_diff, filtered_diff); - sum_diff_sq += dotprod; + // sum_diff_sq += diff² + private_sum_diff_sq4 = mad(filtered_diff, filtered_diff, private_sum_diff_sq4); } + // Reduce + const float private_sum_diff_sq = reduce_vector(private_sum_diff_sq4); + const float sum_diff_sq = local_reduce(private_sum_diff_sq); // Calculate 1/stddev (with the 'regulazing constant' as in tensor_utils.cc) const float variance = sum_diff_sq / args.src_tensor.Channels(); const float stddev_inv = rsqrt(variance + 1.0e-8f); @@ -78,7 +132,9 @@ $0) { } int3 MeanStdDevNormalization::GetGridSize() const { - const int grid_x = 1; + // To avoid dealing with global reductions, we restrict the grid size to the + // work group size in the first dimension. + const int grid_x = work_group_size_.x; const int grid_y = src_[0]->Batch(); const int grid_z = 1; return int3(grid_x, grid_y, grid_z); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h index 5724d72bcd1..7dd45fcb86a 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h @@ -30,6 +30,9 @@ class MeanStdDevNormalization : public GPUOperation { public: explicit MeanStdDevNormalization(const OperationDef& definition); + absl::Status Tune(const TuningParameters& params) override { + return absl::OkStatus(); + } int3 GetGridSize() const override; // Move only From b25ed5a8d47d96860e828139b90aec55a085c137 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Mon, 3 Aug 2020 10:31:42 -0700 Subject: [PATCH 1975/2522] Fixes error when reconstructing functional models from config if layer outputs accessed in call **kwargs come from previously-called layers. Specifically, this may happen when cloning functional models w/ weight sharing in place. PiperOrigin-RevId: 324631738 Change-Id: I8a75b053e16116a55e2a99a8ee011f5ba52c171c --- tensorflow/python/keras/engine/functional.py | 39 ++++++++++---------- tensorflow/python/keras/models_test.py | 33 +++++++++++++---- 2 files changed, 45 insertions(+), 27 deletions(-) diff --git a/tensorflow/python/keras/engine/functional.py b/tensorflow/python/keras/engine/functional.py index d2592ac1c42..71d6faa71b6 100644 --- a/tensorflow/python/keras/engine/functional.py +++ b/tensorflow/python/keras/engine/functional.py @@ -1056,26 +1056,6 @@ def _should_skip_first_node(layer): isinstance(layer._layers[0], input_layer_module.InputLayer)) -def _deserialize_keras_tensors(kwargs, layer_map): - """Deserializes Keras Tensors passed to `call`..""" - - def _deserialize_keras_tensor(t): - """Deserializes a single Keras Tensor passed to `call`.""" - if isinstance(t, tf_utils.ListWrapper): - t = t.as_list() - layer_name = t[0] - node_index = t[1] - tensor_index = t[2] - - layer = layer_map[layer_name] - node = layer._inbound_nodes[node_index] - return nest.flatten(node.outputs)[tensor_index] - return t - - kwargs = tf_utils.convert_inner_node_data(kwargs, wrap=True) - return nest.map_structure(_deserialize_keras_tensor, kwargs) - - def connect_ancillary_layers(model, created_layers): """Adds layers that are not connected to the outputs to the model.""" # Layers not connected to outputs, such as those added in `add_loss`. @@ -1135,6 +1115,25 @@ def reconstruct_from_config(config, custom_objects=None, created_layers=None): return 0 return node_index_map.get((layer.name, config_node_index), None) + def _deserialize_keras_tensors(kwargs, layer_map): + """Deserializes Keras Tensors passed to `call`..""" + + def _deserialize_keras_tensor(t): + """Deserializes a single Keras Tensor passed to `call`.""" + if isinstance(t, tf_utils.ListWrapper): + t = t.as_list() + layer_name = t[0] + node_index = t[1] + tensor_index = t[2] + + layer = layer_map[layer_name] + node = layer._inbound_nodes[get_node_index(layer, node_index)] + return nest.flatten(node.outputs)[tensor_index] + return t + + kwargs = tf_utils.convert_inner_node_data(kwargs, wrap=True) + return nest.map_structure(_deserialize_keras_tensor, kwargs) + def process_node(layer, node_data): """Deserialize a node. diff --git a/tensorflow/python/keras/models_test.py b/tensorflow/python/keras/models_test.py index ea0dc148326..8411ed0d3ea 100644 --- a/tensorflow/python/keras/models_test.py +++ b/tensorflow/python/keras/models_test.py @@ -278,9 +278,20 @@ class TestModelCloning(keras_parameterized.TestCase): has_placeholder = _has_placeholder(graph) self.assertFalse(has_placeholder) - def test_functional_cloning_with_tensor_kwarg(self): + @keras_parameterized.run_all_keras_modes + @parameterized.named_parameters([ + {'testcase_name': 'clone_weights', 'share_weights': False}, + {'testcase_name': 'share_weights', 'share_weights': True}, + ]) + def test_functional_cloning_with_tensor_kwarg(self, share_weights): """Test that cloning works with models that use Tensor kwargs.""" + if share_weights: + clone_fn = functools.partial( + keras.models.clone_model, clone_function=models.share_weights) + else: + clone_fn = keras.models.clone_model + class LayerWithTensorKwarg(keras.layers.Layer): def call(self, inputs, tensor=None): @@ -295,13 +306,21 @@ class TestModelCloning(keras_parameterized.TestCase): model.add_loss(math_ops.reduce_sum(model.outputs)) input_arr = np.random.random((1, 3)).astype(np.float32) - with ops.Graph().as_default(): - with self.session() as sess: - clone = keras.models.clone_model(model) - self.assertLen(clone.losses, 1) + clone = clone_fn(model) - loss = sess.run(clone.losses[0], feed_dict={clone.input: input_arr}) - self.assertAllClose(np.sum(input_arr), loss) + if context.executing_eagerly(): + clone(input_arr) + loss = clone.losses[0] + else: + with self.session() as sess: + clone(input_arr) + if share_weights: + self.skipTest('Weight sharing with inputs in call **kwargs does ' + 'not work correctly in v1') + else: + feed_dict = {clone.input: input_arr} + loss = sess.run(clone.losses[0], feed_dict=feed_dict) + self.assertAllClose(np.sum(input_arr), loss) def _has_placeholder(graph): From 88ee42a7e7a8ac0dba3968fdb48c3c79324611e5 Mon Sep 17 00:00:00 2001 From: Kaixi Hou Date: Fri, 26 Jun 2020 15:13:28 -0700 Subject: [PATCH 1976/2522] Layout grappler for NDHWC Conv3D --- tensorflow/core/grappler/op_types.cc | 8 ++ tensorflow/core/grappler/op_types.h | 2 + .../generic_layout_optimizer_transposer.cc | 86 ++++++++++++- .../generic_layout_optimizer_transposer.h | 24 ++++ ...ric_layout_optimizer_transposer_factory.cc | 11 ++ tensorflow/core/kernels/data_format_ops.cc | 5 +- .../python/grappler/layout_optimizer_test.py | 121 ++++++++++++++++++ 7 files changed, 253 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index 9d30f24e047..6b961c1e18f 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -186,6 +186,14 @@ bool IsConv2DBackpropInput(const NodeDef& node) { bool IsConv3D(const NodeDef& node) { return node.op() == "Conv3D"; } +bool IsConv3DBackpropFilterV2(const NodeDef& node) { + return node.op() == "Conv3DBackpropFilterV2"; +} + +bool IsConv3DBackpropInputV2(const NodeDef& node) { + return node.op() == "Conv3DBackpropInputV2"; +} + bool IsDepthwiseConv2dNative(const NodeDef& node) { return node.op() == "DepthwiseConv2dNative"; } diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index 141eda7415a..1bf26721847 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -63,6 +63,8 @@ bool IsConv2D(const NodeDef& node); bool IsConv2DBackpropFilter(const NodeDef& node); bool IsConv2DBackpropInput(const NodeDef& node); bool IsConv3D(const NodeDef& node); +bool IsConv3DBackpropFilterV2(const NodeDef& node); +bool IsConv3DBackpropInputV2(const NodeDef& node); bool IsDepthwiseConv2dNative(const NodeDef& node); bool IsDepthwiseConv2dNativeBackpropFilter(const NodeDef& node); bool IsDepthwiseConv2dNativeBackpropInput(const NodeDef& node); diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc index 9d702971dd7..2ac47ec36a4 100644 --- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc +++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc @@ -241,7 +241,7 @@ Status Transposer::CreateConstPermNode(TransposeContext* context, node.mutable_attr()->insert({"dtype", attr_data_type}); AttrValue attr_tensor; - Tensor tensor(DT_INT32, TensorShape({4})); + Tensor tensor(DT_INT32, TensorShape({permutation.size()})); for (int i = 0, end = permutation.size(); i < end; i++) { tensor.flat()(i) = permutation[i]; } @@ -752,6 +752,86 @@ Status Conv2DBackpropInputTransposer::TransposeNode( return context->graph_view->GetMutationBuilder()->Apply(); } +Status Conv3DTransposer::TransposeNode( + TransposeContext* context, utils::MutableNodeView* node) { + DCHECK(IsConv3D(*node->node())); + // Update the format from 4D to 5D layout. + std::string src_format = context->src_format; + std::string dst_format = context->dst_format; + std::string src_format_3d = src_format == "NHWC" ? "NDHWC": "NCDHW"; + std::string dst_format_3d = dst_format == "NHWC" ? "NDHWC": "NCDHW"; + context->AssignDeviceAndDataFormats(context->target_device, src_format_3d, + dst_format_3d); + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 5)) { + return Status::OK(); + } + VLOG(3) << "GenericLayoutOptimizer: transforming node '" << node->GetName() + << "' with op '" << node->GetOp() << "' from data format '" + << context->src_format << "' to '" << context->dst_format << "'"; + TF_RETURN_IF_ERROR(UpdateNode(context, node)); + TF_RETURN_IF_ERROR(UpdateFaninEdgesWithOp(context, {0}, node, kOpTranspose)); + TF_RETURN_IF_ERROR(UpdateFanoutEdgesWithOp(context, {0}, node, kOpTranspose)); + // Change back the format from 5D to 4D layout. + context->AssignDeviceAndDataFormats(context->target_device, src_format, + dst_format); + return context->graph_view->GetMutationBuilder()->Apply(); +} + +Status Conv3DBackpropFilterTransposer::TransposeNode( + TransposeContext* context, utils::MutableNodeView* node) { + DCHECK(IsConv3DBackpropFilterV2(*node->node())); + // Update the format from 4D to 5D layout. + std::string src_format = context->src_format; + std::string dst_format = context->dst_format; + std::string src_format_3d = src_format == "NHWC" ? "NDHWC": "NCDHW"; + std::string dst_format_3d = dst_format == "NHWC" ? "NDHWC": "NCDHW"; + context->AssignDeviceAndDataFormats(context->target_device, src_format_3d, + dst_format_3d); + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 5)) { + return Status::OK(); + } + VLOG(3) << "GenericLayoutOptimizer: transforming node '" << node->GetName() + << "' with op '" << node->GetOp() << "' from data format '" + << context->src_format << "' to '" << context->dst_format << "'"; + TF_RETURN_IF_ERROR(UpdateNode(context, node)); + TF_RETURN_IF_ERROR( + UpdateFaninEdgesWithOp(context, {0, 2}, node, kOpTranspose)); + // No need to update output shape, as it is always of shape + // [filter_height, filter_width, in_channels, out_channels], regardless of + // whether NCHW or NHWC is used. + // Change back the format from 5D to 4D layout. + context->AssignDeviceAndDataFormats(context->target_device, src_format, + dst_format); + return context->graph_view->GetMutationBuilder()->Apply(); +} + +Status Conv3DBackpropInputTransposer::TransposeNode( + TransposeContext* context, utils::MutableNodeView* node) { + DCHECK(IsConv3DBackpropInputV2(*node->node())); + // Update the format from 4D to 5D layout. + std::string src_format = context->src_format; + std::string dst_format = context->dst_format; + std::string src_format_3d = src_format == "NHWC" ? "NDHWC": "NCDHW"; + std::string dst_format_3d = dst_format == "NHWC" ? "NDHWC": "NCDHW"; + context->AssignDeviceAndDataFormats(context->target_device, src_format_3d, + dst_format_3d); + if (!ShouldProcess(*context, *node) || !IsFanoutPortRankN(*node, 0, 5)) { + return Status::OK(); + } + VLOG(3) << "GenericLayoutOptimizer: transforming node '" << node->GetName() + << "' with op '" << node->GetOp() << "' from data format '" + << context->src_format << "' to '" << context->dst_format << "'"; + TF_RETURN_IF_ERROR(UpdateNode(context, node)); + TF_RETURN_IF_ERROR( + UpdateFaninEdgesWithOp(context, {0}, node, kOpDataFormatVecPermute)); + TF_RETURN_IF_ERROR(UpdateFaninEdgesWithOp(context, {2}, node, kOpTranspose)); + TF_RETURN_IF_ERROR(UpdateFanoutEdgesWithOp(context, {0}, node, kOpTranspose)); + // Change back the format from 5D to 4D layout. + context->AssignDeviceAndDataFormats(context->target_device, src_format, + dst_format); + return context->graph_view->GetMutationBuilder()->Apply(); +} + Status FusedBatchNormExTransposer::TransposeNode(TransposeContext* context, utils::MutableNodeView* node) { DCHECK(IsFusedBatchNormEx(*node->node())); @@ -1684,7 +1764,9 @@ bool IsLayoutSensitiveOp(const NodeDef& node) { IsDepthwiseConv2dNativeBackpropInput(node) || IsFusedBatchNormEx(node) || IsFusedBatchNormGrad(node) || IsMaxPoolV2(node) || IsMaxPoolGrad(node) || IsMaxPoolGradV2(node) || - IsMaxPoolGradGradV1(node) || IsMaxPoolGradGradV2(node); + IsMaxPoolGradGradV1(node) || IsMaxPoolGradGradV2(node) || + IsConv3D(node) || IsConv3DBackpropInputV2(node) || + IsConv3DBackpropFilterV2(node); } bool IsDefaultLayoutAgnosticOp(const NodeDef& node) { diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.h b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.h index 95af7933d10..b1de9561bfc 100644 --- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.h +++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.h @@ -239,6 +239,30 @@ class Conv2DBackpropInputTransposer : public LayoutSensitiveOpTransposer { utils::MutableNodeView* node) override; }; +class Conv3DTransposer : public LayoutSensitiveOpTransposer { + public: + explicit Conv3DTransposer() : LayoutSensitiveOpTransposer() {} + + Status TransposeNode(TransposeContext* context, + utils::MutableNodeView* node) override; +}; + +class Conv3DBackpropFilterTransposer: public LayoutSensitiveOpTransposer { + public: + explicit Conv3DBackpropFilterTransposer() : LayoutSensitiveOpTransposer() {} + + Status TransposeNode(TransposeContext* context, + utils::MutableNodeView* node) override; +}; + +class Conv3DBackpropInputTransposer: public LayoutSensitiveOpTransposer { + public: + explicit Conv3DBackpropInputTransposer() : LayoutSensitiveOpTransposer() {} + + Status TransposeNode(TransposeContext* context, + utils::MutableNodeView* node) override; +}; + class FusedBatchNormExTransposer : public LayoutSensitiveOpTransposer { public: explicit FusedBatchNormExTransposer() : LayoutSensitiveOpTransposer() {} diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer_factory.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer_factory.cc index 59c06d42441..15bbc08079c 100644 --- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer_factory.cc +++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer_factory.cc @@ -43,6 +43,17 @@ std::shared_ptr TransposerFactory::GetTransposer( return GetOrCreateIfNotFound( "Conv2DBackpropInput"); } + if (IsConv3D(node)) { + return GetOrCreateIfNotFound("Conv3D"); + } + if (IsConv3DBackpropInputV2(node)) { + return GetOrCreateIfNotFound( + "Conv3DBackpropInput"); + } + if (IsConv3DBackpropFilterV2(node)) { + return GetOrCreateIfNotFound( + "Conv3DBackpropFilter"); + } if (IsFusedBatchNormEx(node)) { return GetOrCreateIfNotFound( "FusedBatchNormEx"); diff --git a/tensorflow/core/kernels/data_format_ops.cc b/tensorflow/core/kernels/data_format_ops.cc index 181aa1b8a2c..14f3ea472c3 100644 --- a/tensorflow/core/kernels/data_format_ops.cc +++ b/tensorflow/core/kernels/data_format_ops.cc @@ -90,9 +90,10 @@ class DataFormatVecPermuteOp : public OpKernel { "input must be a vector or 2D tensor, but got shape ", input.shape().DebugString())); if (input.dims() == 1) { - OP_REQUIRES(context, input.NumElements() == 2 || input.NumElements() == 4, + OP_REQUIRES(context, input.NumElements() == 2 || + input.NumElements() == 4 || input.NumElements() == 5, errors::InvalidArgument( - "1D input must be of size 2 or 4, but got shape ", + "1D input must be of size 2, 4 or 5, but got shape ", input.shape().DebugString())); } else if (input.dims() == 2) { OP_REQUIRES(context, input.dim_size(0) == 2 || input.dim_size(0) == 4, diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py index 10f869805d8..9b37bc9fc1b 100644 --- a/tensorflow/python/grappler/layout_optimizer_test.py +++ b/tensorflow/python/grappler/layout_optimizer_test.py @@ -212,6 +212,12 @@ class LayoutOptimizerTest(test.TestCase): def _assert_trans_nhwc_to_nchw(self, name, nodes): self.assertIn(name + '-TransposeNHWCToNCHW-LayoutOptimizer', nodes) + def _assert_trans_ncdhw_to_ndhwc(self, name, nodes): + self.assertIn(name + '-TransposeNCDHWToNDHWC-LayoutOptimizer', nodes) + + def _assert_trans_ndhwc_to_ncdhw(self, name, nodes): + self.assertIn(name + '-TransposeNDHWCToNCDHW-LayoutOptimizer', nodes) + def _assert_map_nhwc_to_nchw(self, name, nodes): self.assertIn(name + '-DimMapNHWCToNCHW-LayoutOptimizer', nodes) @@ -221,6 +227,14 @@ class LayoutOptimizerTest(test.TestCase): def _assert_vec_nhwc_to_nchw(self, name, nodes): self.assertIn(name + '-VecPermuteNHWCToNCHW-LayoutOptimizer', nodes) + def _assert_vec_ncdhw_to_ndhwc(self, name, nodes): + self.assertIn(name + '-DataFormatVecPermuteNCDHWToNDHWC-LayoutOptimizer', + nodes) + + def _assert_vec_ndhwc_to_ncdhw(self, name, nodes): + self.assertIn(name + '-DataFormatVecPermuteNDHWCToNCDHW-LayoutOptimizer', + nodes) + def _train(self, checkpoint_path, layout_optimizer=False, restore=False): ops.reset_default_graph() graph = ops.get_default_graph() @@ -1121,6 +1135,113 @@ class LayoutOptimizerTest(test.TestCase): self.assertIn('MaxPoolGradV2-3-LayoutOptimizer', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3) + @test_util.deprecated_graph_mode_only + def testConv3D(self): + if test.is_gpu_available(cuda_only=True): + random_seed.set_random_seed(0) + x = random_ops.truncated_normal([1, 784], seed=0) + conv = _two_layer_model(x) + filters = random_ops.truncated_normal([2, 2, 2, 1, 2], seed=0) + strides_val = [1, 1, 1, 1, 1] + x_3d = array_ops.reshape(conv, [-1, 4, 14, 14, 1]) + conv3d = gen_nn_ops.conv3d(x_3d, filters, strides_val, 'VALID') + output = array_ops.identity(conv3d) + + with session.Session(config=_get_config(False)) as sess: + output_val_ref = sess.run(output) + + with session.Session(config=_get_config()) as sess: + metadata = config_pb2.RunMetadata() + output_val = sess.run( + output, run_metadata=metadata) + + nodes = [] + num_transposes = 0 + for node in metadata.cost_graph.node: + if _is_transpose(node.name): + num_transposes += 1 + nodes.append(node.name) + + expected_num_transposes = 2 + self.assertEqual(expected_num_transposes, num_transposes) + self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) + self._assert_trans_ndhwc_to_ncdhw('Conv3D-0', nodes) + self._assert_trans_ncdhw_to_ndhwc('Conv3D-0-0', nodes) + self.assertAllClose(output_val_ref, output_val, atol=1e-3) + + @test_util.deprecated_graph_mode_only + def testConv3DBackpropInput(self): + if test.is_gpu_available(cuda_only=True): + random_seed.set_random_seed(0) + x = random_ops.truncated_normal([1, 784], seed=0) + conv = _two_layer_model(x) + x_3d = array_ops.reshape(conv, [-1, 4, 14, 14, 1]) + filters = random_ops.truncated_normal([2, 2, 2, 1, 1], seed=0) + strides_val = [1, 1, 1, 1, 1] + shape = array_ops.shape(x_3d) + conv3d_grad = gen_nn_ops.conv3d_backprop_input_v2( + shape, filters, x_3d, strides_val, 'SAME') + output = array_ops.identity(conv3d_grad) + + with session.Session(config=_get_config(False)) as sess: + output_val_ref = sess.run(output) + + with session.Session(config=_get_config()) as sess: + metadata = config_pb2.RunMetadata() + output_val = sess.run( + output, run_metadata=metadata) + + nodes = [] + num_transposes = 0 + for node in metadata.cost_graph.node: + if _is_transpose(node.name): + num_transposes += 1 + nodes.append(node.name) + + expected_num_transposes = 2 + self.assertEqual(expected_num_transposes, num_transposes) + self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) + self._assert_vec_ndhwc_to_ncdhw('Conv3DBackpropInputV2-0', nodes) + self._assert_trans_ndhwc_to_ncdhw('Conv3DBackpropInputV2-2', nodes) + self._assert_trans_ncdhw_to_ndhwc('Conv3DBackpropInputV2-0-0', nodes) + self.assertAllClose(output_val_ref, output_val, atol=1e-3) + + @test_util.deprecated_graph_mode_only + def testConv3DBackpropFilter(self): + if test.is_gpu_available(cuda_only=True): + random_seed.set_random_seed(0) + x = random_ops.truncated_normal([1, 784], seed=0) + conv = _two_layer_model(x) + x_3d = array_ops.reshape(conv, [-1, 4, 14, 14, 1]) + filters = random_ops.truncated_normal([2, 2, 2, 1, 1], seed=0) + strides_val = [1, 1, 1, 1, 1] + shape = constant_op.constant([2, 2, 2, 1, 1], shape=[5]) + conv3d_grad = gen_nn_ops.conv3d_backprop_filter_v2( + x_3d, shape, x_3d, strides_val, 'SAME') + output = array_ops.identity(conv3d_grad) + + with session.Session(config=_get_config(False)) as sess: + output_val_ref = sess.run(output) + + with session.Session(config=_get_config()) as sess: + metadata = config_pb2.RunMetadata() + output_val = sess.run( + output, run_metadata=metadata) + + nodes = [] + num_transposes = 0 + for node in metadata.cost_graph.node: + if _is_transpose(node.name): + num_transposes += 1 + nodes.append(node.name) + + expected_num_transposes = 2 + self.assertEqual(expected_num_transposes, num_transposes) + self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) + self._assert_trans_ndhwc_to_ncdhw('Conv3DBackpropFilterV2-0', nodes) + self._assert_trans_ndhwc_to_ncdhw('Conv3DBackpropFilterV2-2', nodes) + self.assertAllClose(output_val_ref, output_val, atol=1e-3) + @test_util.deprecated_graph_mode_only def testSliceWithNonConstAxis(self): if test.is_gpu_available(cuda_only=True): From d623697b83541087f759f5f5533fde2c4d86059c Mon Sep 17 00:00:00 2001 From: Kaixi Hou Date: Wed, 22 Jul 2020 15:46:32 -0700 Subject: [PATCH 1977/2522] Add Conv3D in layout opt decision --- .../optimizers/generic_layout_optimizer.cc | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc index 969857879af..6053f96ae08 100644 --- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc @@ -38,7 +38,7 @@ namespace { constexpr char kNHWC[] = "NHWC"; constexpr char kNCHW[] = "NCHW"; constexpr float kVoltaGPURatioThreshold = 0.5; -constexpr float kConv2DGPUFP16Threshold = 0.5; +constexpr float kConvGPUFP16Threshold = 0.5; struct MutableNodeViewFormatter { void operator()(std::string* out, utils::MutableNodeView* node_view) const { @@ -69,15 +69,15 @@ inline std::pair GetNumGPUs(const Cluster& cluster) { return {num_gpus, num_volta}; } -inline bool NumConv2DOnDeviceWithDataTypeOverThreshold( +inline bool NumConvOnDeviceWithDataTypeOverThreshold( const TransposeContext& context, absl::string_view device, const DataType& data_type) { - int num_conv2d_gpu = 0; - int num_conv2d_gpu_fp16 = 0; + int num_conv_gpu = 0; + int num_conv_gpu_fp16 = 0; for (const auto& node : context.graph_view->GetNodes()) { const auto* node_def = node.node(); - if (!IsConv2D(*node_def)) { + if (!IsConv2D(*node_def) or !IsConv3D(*node_def)) { continue; } const string& device_name = @@ -89,20 +89,20 @@ inline bool NumConv2DOnDeviceWithDataTypeOverThreshold( absl::AsciiStrToLower(device))) { continue; } - num_conv2d_gpu++; + num_conv_gpu++; const auto* t_attr = node.GetAttr("T"); if (t_attr == nullptr) { continue; } if (t_attr->type() == data_type) { - num_conv2d_gpu_fp16++; + num_conv_gpu_fp16++; } } - if (num_conv2d_gpu == 0) return false; + if (num_conv_gpu == 0) return false; - return (static_cast(num_conv2d_gpu_fp16) / - static_cast(num_conv2d_gpu)) >= kConv2DGPUFP16Threshold; + return (static_cast(num_conv_gpu_fp16) / + static_cast(num_conv_gpu)) >= kConvGPUFP16Threshold; } inline std::pair GetSrcAndDstDataFormats( @@ -111,7 +111,7 @@ inline std::pair GetSrcAndDstDataFormats( string dst_format = kNCHW; if (((static_cast(num_voltas) / static_cast(num_gpus)) >= kVoltaGPURatioThreshold) && - NumConv2DOnDeviceWithDataTypeOverThreshold(context, kGPU, DT_HALF)) { + NumConvOnDeviceWithDataTypeOverThreshold(context, kGPU, DT_HALF)) { std::swap(src_format, dst_format); } return {src_format, dst_format}; From 0b68f7509ff304bf28289b7da100f4853c45dbfb Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Mon, 3 Aug 2020 10:55:29 -0700 Subject: [PATCH 1978/2522] [MLIR][KERNEL_GEN] Add embed-tf-framework pass. The pass rewrites the function marked with `tf_entry` attribute. * adds tf_framework::OpKernelContextType argument to the function * std.alloc becomes tf_framework.alloc_raw * std.dealloc becomes tf_framework.dealloc_raw PiperOrigin-RevId: 324636895 Change-Id: I5d733ec05c69438f4a7677573cf69155f785105e --- .../kernel_gen/tests/embed_tf_framework.mlir | 37 +++++ .../mlir/tools/kernel_gen/transforms/BUILD | 18 +++ .../transforms/embed_tf_framework.cc | 127 ++++++++++++++++++ .../transforms/embed_tf_framework_pass.cc | 77 +++++++++++ .../mlir/tools/kernel_gen/transforms/passes.h | 6 + .../tools/kernel_gen/transforms/passes.td | 5 + .../tools/kernel_gen/transforms/rewriters.h | 11 +- 7 files changed, 279 insertions(+), 2 deletions(-) create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/tests/embed_tf_framework.mlir create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_tf_framework.cc create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_tf_framework_pass.cc diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/embed_tf_framework.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/embed_tf_framework.mlir new file mode 100644 index 00000000000..bb0f1926cda --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tests/embed_tf_framework.mlir @@ -0,0 +1,37 @@ +// RUN: kernel-gen-opt %s -embed-tf-framework -split-input-file | FileCheck %s + +// CHECK-LABEL: func @tf_entry( +// CHECK-SAME: [[CTX:%.*]]: !tf_framework.op_kernel_context, +// CHECK-SAME: [[SIZE_0:%.*]]: index, +// CHECK-SAME: [[SIZE_2:%.*]]: index) -> index attributes {tf_entry} { +func @tf_entry(%size_0 : index , %size_2 : index) -> index + attributes {tf_entry} { + %buf = alloc(%size_0, %size_2)[] : memref + dealloc %buf : memref + std.return %size_0 : index +} +// CHECK-NEXT: [[VAL_3:%.*]] = tf_framework.alloc_raw +// CHECK-SAME: ([[CTX]], [[SIZE_0]], [[SIZE_2]]) : memref +// CHECK-NEXT: tf_framework.dealloc_raw([[CTX]], [[VAL_3]]) : memref +// CHECK-NEXT: return [[SIZE_0]] : index + +// ----- + +// CHECK-LABEL: func @non_tf_entry( +// CHECK-SAME: [[SIZE_0:%.*]]: index, [[SIZE_2:%.*]]: index) -> index +func @non_tf_entry(%size_0 : index , %size_2 : index) -> index { + std.return %size_0 : index +} + +// ----- + +// CHECK-LABEL: func @tf_entry( +func @tf_entry(%size : index) attributes {tf_entry} { + %buf = alloc()[%size] : memref<64xf32, affine_map<(d0)[s0] -> (d0 + s0)>> + dealloc %buf : memref<64xf32, affine_map<(d0)[s0] -> (d0 + s0)>> + std.return +} +// CHECK_NOT: alloc_raw +// CHECK: alloc() +// CHECK_NOT: dealloc_raw +// CHECK: dealloc % diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD index 15c0d571e61..0119b2e46ea 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD @@ -20,6 +20,21 @@ cc_library( ], ) +cc_library( + name = "embed_tf_framework", + srcs = ["embed_tf_framework.cc"], + hdrs = ["rewriters.h"], + deps = [ + "//tensorflow/compiler/mlir/tools/kernel_gen/ir:tf_framework_ops", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:LLVMDialect", + "@llvm-project//mlir:LLVMTransforms", + "@llvm-project//mlir:StandardOps", + "@llvm-project//mlir:Support", + "@llvm-project//mlir:Transforms", + ], +) + gentbl( name = "tf_framework_passes_inc_gen", tbl_outs = [("-gen-pass-decls", "tf_framework_passes.h.inc")], @@ -31,11 +46,13 @@ gentbl( cc_library( name = "passes", srcs = [ + "embed_tf_framework_pass.cc", "register_passes.cc", "tf_framework_legalize_to_llvm_pass.cc", ], hdrs = ["passes.h"], deps = [ + ":embed_tf_framework", ":tf_framework_legalize_to_llvm", ":tf_framework_passes_inc_gen", "//tensorflow/compiler/mlir/tools/kernel_gen/ir:tf_framework_ops", @@ -43,6 +60,7 @@ cc_library( "@llvm-project//mlir:LLVMTransforms", "@llvm-project//mlir:Pass", "@llvm-project//mlir:StandardOps", + "@llvm-project//mlir:Transforms", ], alwayslink = 1, ) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_tf_framework.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_tf_framework.cc new file mode 100644 index 00000000000..aa02aefa9d2 --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_tf_framework.cc @@ -0,0 +1,127 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/IR/Function.h" // from @llvm-project +#include "mlir/IR/Module.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "mlir/Transforms/DialectConversion.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h" + +namespace mlir { +namespace kernel_gen { +namespace tf_framework { +namespace { + +// Prepends argument type list of the function with an OpKernelContextType arg. +class FuncOpConverter : public OpConversionPattern { + public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult matchAndRewrite( + FuncOp func, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + // Convert function arguments using the provided TypeConverter. + auto func_type = func.getType(); + TypeConverter::SignatureConversion conversion(func_type.getNumInputs()); + + conversion.addInputs(OpKernelContextType::get(rewriter.getContext())); + for (auto arg_type : llvm::enumerate(func_type.getInputs())) { + conversion.addInputs(arg_type.index(), arg_type.value()); + } + + TypeConverter type_converter; + if (failed(rewriter.convertRegionTypes(&func.getBody(), type_converter, + &conversion))) { + return failure(); + } + + // Update the signature of the function. + rewriter.updateRootInPlace(func, [&] { + func.setType(rewriter.getFunctionType(conversion.getConvertedTypes(), + func_type.getResults())); + }); + return success(); + } +}; + +// Converts std.alloc to tf_framework.alloc_raw using OpKernelContextType arg of +// the parent function. +class AllocOpConverter : public OpConversionPattern { + public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult matchAndRewrite( + AllocOp alloc, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + auto func = alloc.getParentOfType(); + if (func.getNumArguments() == 0) { + return failure(); + } + Value ctx = func.getArgument(0); + if (!ctx.getType().isa()) { + return failure(); + } + // Symbolic operands that bind to the symbols of the memref's layout map are + // not supported by AllocRawOp. + if (alloc.getNumSymbolicOperands() != 0) { + return failure(); + } + rewriter.replaceOpWithNewOp(alloc, alloc.getType(), ctx, + operands); + return success(); + } +}; + +// Converts std.dealloc to tf_framework.dealloc_raw using OpKernelContextType +// arg of the parent function. +class DeallocOpConverter : public OpConversionPattern { + public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult matchAndRewrite( + DeallocOp dealloc, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + FuncOp func = dealloc.getParentOfType(); + if (func.getNumArguments() == 0) { + return failure(); + } + Value ctx = func.getArgument(0); + if (!ctx.getType().isa()) { + return failure(); + } + // Operand with no layout is expected. + auto operand_memref_type = dealloc.memref().getType().cast(); + if (!operand_memref_type.getAffineMaps().empty()) { + return failure(); + } + DeallocOp::Adaptor transformed(operands); + rewriter.replaceOpWithNewOp(dealloc, ctx, + transformed.memref()); + return success(); + } +}; + +} // namespace + +void PopulateEmbedTFFrameworkConversionPatterns( + MLIRContext *context, OwningRewritePatternList *patterns) { + patterns->insert( + context); +} + +} // namespace tf_framework +} // namespace kernel_gen +} // namespace mlir diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_tf_framework_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_tf_framework_pass.cc new file mode 100644 index 00000000000..615c596e353 --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_tf_framework_pass.cc @@ -0,0 +1,77 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "mlir/Transforms/DialectConversion.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h" +#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h" +#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h" + +namespace mlir { +namespace kernel_gen { +namespace tf_framework { +namespace { + +#define GEN_PASS_CLASSES +#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_passes.h.inc" + +static constexpr StringRef kTFEntry = "tf_entry"; + +// The pass rewrites the function marked with `tf_entry` attribute. +// * adds tf_framework::OpKernelContextType argument to the function, +// * std.alloc becomes tf_framework.alloc_raw, +// * std.dealloc becomes tf_framework.dealloc_raw. +class EmbedTFFrameworkPass + : public EmbedTFFrameworkPassBase { + public: + void runOnOperation() override { + ModuleOp m = getOperation(); + + // Populate patterns. + OwningRewritePatternList patterns; + PopulateEmbedTFFrameworkConversionPatterns(m.getContext(), &patterns); + + // Set target. + ConversionTarget target(getContext()); + target.addLegalDialect(); + + target.addDynamicallyLegalOp([&](FuncOp op) { + if (!op.getAttrOfType(kTFEntry)) { + return true; + } + FunctionType func_type = op.getType(); + return func_type.getNumInputs() > 0 && + func_type.getInput(0).isa(); + }); + target.addDynamicallyLegalOp([](Operation* op) { + return !op->getParentOfType().getAttrOfType(kTFEntry); + }); + + if (failed(applyPartialConversion(m, target, patterns))) { + signalPassFailure(); + } + } +}; + +} // namespace + +std::unique_ptr > createEmbedTFFrameworkPass() { + return std::make_unique(); +} + +} // namespace tf_framework +} // namespace kernel_gen +} // namespace mlir diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h index 89871ba3faf..c6aaeb92c56 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h @@ -31,6 +31,12 @@ namespace tf_framework { std::unique_ptr > createTestTFFrameworkLegalizeToLLVMPass(); +// Pass to replace some of the Standard ops with TF Framework ops. +// * adds tf_framework::OpKernelContextType argument to the function +// * std.alloc becomes tf_framework.alloc_raw +// * std.dealloc becomes tf_framework.dealloc_raw +std::unique_ptr > createEmbedTFFrameworkPass(); + } // namespace tf_framework } // namespace kernel_gen } // namespace mlir diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td index 71e50379ce7..8c4d5801f51 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td @@ -24,4 +24,9 @@ def TestTFFrameworkLegalizeToLLVMPass let constructor = "createTestTFFrameworkLegalizeToLLVMPass()"; } +def EmbedTFFrameworkPass : Pass<"embed-tf-framework", "ModuleOp"> { + let summary = "Pass to embed TF Framework for allocation and error reporting"; + let constructor = "createEmbedTFFrameworkPass()"; +} + #endif // TF_FRAMEWORK_PASSES diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h index 28dba379738..257e84b4a21 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h @@ -16,19 +16,26 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_MLIR_TOOLS_KERNEL_GEN_TRANSFORMS_REWRITERS_H_ #define TENSORFLOW_COMPILER_MLIR_TOOLS_KERNEL_GEN_TRANSFORMS_REWRITERS_H_ +#include "mlir/IR/MLIRContext.h" // from @llvm-project + namespace mlir { class LLVMTypeConverter; -class LowerToLLVMOptions; +class MLIRContext; class OwningRewritePatternList; +class TypeConverter; namespace kernel_gen { namespace tf_framework { -/// Collect a set of patterns to convert from the TF Framework dialect to LLVM. +/// Collects a set of patterns to convert from the TF Framework dialect to LLVM. void PopulateTFFrameworkToLLVMConversionPatterns( LLVMTypeConverter *converter, OwningRewritePatternList *patterns); +/// Collects a set of patterns to embed TF Framework. +void PopulateEmbedTFFrameworkConversionPatterns( + MLIRContext *context, OwningRewritePatternList *patterns); + } // namespace tf_framework } // namespace kernel_gen } // namespace mlir From 49bb08c055889f90d380d5c28ca7506e02c7e44a Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Sat, 1 Aug 2020 16:04:53 +0700 Subject: [PATCH 1979/2522] Add c interface for `core/platform:logging` --- tensorflow/c/BUILD | 11 ++++++++ tensorflow/c/logging.cc | 59 +++++++++++++++++++++++++++++++++++++++++ tensorflow/c/logging.h | 42 +++++++++++++++++++++++++++++ 3 files changed, 112 insertions(+) create mode 100644 tensorflow/c/logging.cc create mode 100644 tensorflow/c/logging.h diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD index 410fc22069f..5f64c43dfd3 100644 --- a/tensorflow/c/BUILD +++ b/tensorflow/c/BUILD @@ -213,6 +213,17 @@ tf_cuda_library( alwayslink = 1, ) +cc_library( + name = "logging", + srcs = ["logging.cc"], + hdrs = ["logging.h"], + deps = [ + ":c_api", + "//tensorflow/core/platform:logging", + "//tensorflow/core/platform:stringprintf", + ], +) + tf_cuda_library( name = "tf_status_internal", hdrs = [ diff --git a/tensorflow/c/logging.cc b/tensorflow/c/logging.cc new file mode 100644 index 00000000000..bf6bf069fff --- /dev/null +++ b/tensorflow/c/logging.cc @@ -0,0 +1,59 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/c/logging.h" + +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/stringprintf.h" + +static ::tensorflow::string BuildMessage(const char* fmt, va_list args) { + ::tensorflow::string message; + ::tensorflow::strings::Appendv(&message, fmt, args); + return message; +} + +void TF_Log(TF_LogLevel level, const char* fmt, ...) { + if (level < TF_INFO || level > TF_FATAL) return; + va_list args; + va_start(args, fmt); + auto message = BuildMessage(fmt, args); + switch (level) { + case TF_INFO: + LOG(INFO) << message; + break; + case TF_WARNING: + LOG(WARNING) << message; + break; + case TF_ERROR: + LOG(ERROR) << message; + break; + case TF_FATAL: + LOG(FATAL) << message; + break; + } +} + +void TF_VLog(int level, const char* fmt, ...) { + va_list args; + va_start(args, fmt); + auto message = BuildMessage(fmt, args); + VLOG(level) << message; +} + +void TF_DVLog(int level, const char* fmt, ...) { + va_list args; + va_start(args, fmt); + auto message = BuildMessage(fmt, args); + DVLOG(level) << message; +} diff --git a/tensorflow/c/logging.h b/tensorflow/c/logging.h new file mode 100644 index 00000000000..ad97cbf8c8a --- /dev/null +++ b/tensorflow/c/logging.h @@ -0,0 +1,42 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_C_LOGGING_H_ +#define TENSORFLOW_C_LOGGING_H_ + +#include "tensorflow/c/c_api.h" + +// -------------------------------------------------------------------------- +// C API for tensorflow::Logging. + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum TF_LogLevel { + TF_INFO = 0, + TF_WARNING = 1, + TF_ERROR = 2, + TF_FATAL = 3, +} TF_LogLevel; + +TF_CAPI_EXPORT extern void TF_Log(TF_LogLevel level, const char* fmt, ...); +TF_CAPI_EXPORT extern void TF_VLog(int level, const char* fmt, ...); +TF_CAPI_EXPORT extern void TF_DVLog(int level, const char* fmt, ...); + +#ifdef __cplusplus +} +#endif + +#endif // TENSORFLOW_C_LOGGING_H_ From a1b5894af841bc9c267f81bde384427facad074b Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Mon, 3 Aug 2020 11:00:05 -0700 Subject: [PATCH 1980/2522] Update LegalizeTFCommunication to deference optionals instead of calling getValue() explicitly (NFC). PiperOrigin-RevId: 324638102 Change-Id: I56de41274619bb31a4a2212762d560710886022c --- .../transforms/legalize_tf_communication.cc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_communication.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_communication.cc index 42c719da266..588e31ab669 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_communication.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_communication.cc @@ -96,7 +96,7 @@ llvm::SmallDenseMap GetFunctionsToRewrite( for (FuncOp& func : funcs_to_visit) { auto uses = func.getSymbolUses(module); if (!uses) continue; - for (auto& use : uses.getValue()) { + for (auto& use : *uses) { // Only `mlir::CallOp` is supported as this requires knowing how to // rewrite arguments and results to a function. if (!isa(use.getUser())) continue; @@ -189,13 +189,13 @@ Value CreateSendOp(OpBuilder& builder, int64_t& channel_id, Location loc, /*is_host_transfer=*/builder.getBoolAttr(true)); if (index) { - SetFrontendAttributes(send, index.getValue(), key, operand.getType(), + SetFrontendAttributes(send, *index, key, operand.getType(), /*device_to_host=*/true); } else { SetFrontendAttributes(send, key, operand.getType()); } - if (tpu_core) SetOpSharding(send, tpu_core.getValue()); + if (tpu_core) SetOpSharding(send, *tpu_core); return send.getResult(); } @@ -217,22 +217,22 @@ Value CreateRecvOp(OpBuilder& builder, int64_t& channel_id, Location loc, builder.create(loc, recv_result_type, token, channel_handle, /*is_host_transfer=*/builder.getBoolAttr(true)); if (index) { - SetFrontendAttributes(recv, index.getValue(), key, result_type, + SetFrontendAttributes(recv, *index, key, result_type, /*device_to_host=*/false); } else { SetFrontendAttributes(recv, key, result.getType()); } - if (tpu_core) SetOpSharding(recv, tpu_core.getValue()); + if (tpu_core) SetOpSharding(recv, *tpu_core); auto get_tuple_element = builder.create(loc, recv.getResult(), /*index=*/0); - if (tpu_core) SetOpSharding(get_tuple_element, tpu_core.getValue()); + if (tpu_core) SetOpSharding(get_tuple_element, *tpu_core); result.replaceAllUsesWith(get_tuple_element); auto new_token = builder.create(loc, recv.getResult(), /*index=*/1); - if (tpu_core) SetOpSharding(new_token, tpu_core.getValue()); + if (tpu_core) SetOpSharding(new_token, *tpu_core); return new_token.getResult(); } @@ -320,8 +320,8 @@ Value RewriteCallOp(OpBuilder& builder, CallOp call, auto new_result_types = llvm::to_vector<4>(call.getResultTypes()); new_result_types.push_back(token.getType()); auto new_call = builder.create( - call.getLoc(), new_result_types, - new_symbol ? new_symbol.getValue() : call.callee(), new_operands); + call.getLoc(), new_result_types, new_symbol ? *new_symbol : call.callee(), + new_operands); for (auto results : llvm::zip(call.getResults(), new_call.getResults())) std::get<0>(results).replaceAllUsesWith(std::get<1>(results)); From 7fafd216eb116a092a85d35d1e56d486c36d8727 Mon Sep 17 00:00:00 2001 From: amturati Date: Mon, 3 Aug 2020 18:19:43 +0000 Subject: [PATCH 1981/2522] changed OnesLike to ZerosLike --- tensorflow/c/eager/gradients.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/c/eager/gradients.cc b/tensorflow/c/eager/gradients.cc index cf62dcea926..406da1291ae 100644 --- a/tensorflow/c/eager/gradients.cc +++ b/tensorflow/c/eager/gradients.cc @@ -101,7 +101,7 @@ AbstractTensorHandle* TapeTensor::ZerosLike() const { } if (isa(op.get())) { s = dyn_cast(op.get())->SetOpName( - absl::StrCat("OnesLike", ToId(handle_)).c_str()); + absl::StrCat("ZerosLike", ToId(handle_)).c_str()); if (!s.ok()) { return nullptr; } From 9bcf2657a228115381f85275711e90501480690d Mon Sep 17 00:00:00 2001 From: Jose Baiocchi Date: Mon, 3 Aug 2020 11:07:06 -0700 Subject: [PATCH 1982/2522] Fix TraceMe in external/compat.h PiperOrigin-RevId: 324640019 Change-Id: I8248b642248ae8b562d85f05a98bd1d2ce3dbab5 --- .../xla/python/tpu_driver/platform/external/compat.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/python/tpu_driver/platform/external/compat.h b/tensorflow/compiler/xla/python/tpu_driver/platform/external/compat.h index 285d59e2304..0c7cc370e2a 100644 --- a/tensorflow/compiler/xla/python/tpu_driver/platform/external/compat.h +++ b/tensorflow/compiler/xla/python/tpu_driver/platform/external/compat.h @@ -35,7 +35,13 @@ class Thread { class TraceMe { public: - explicit TraceMe(absl::string_view tag, int level = 1) {} + explicit TraceMe(absl::string_view name, int level = 1) {} + explicit TraceMe(std::string&& name, int level = 1) = delete; + explicit TraceMe(const std::string& name, int level = 1) = delete; + explicit TraceMe(const char* raw, int level = 1) + : TraceMe(absl::string_view(raw), level) {} + template + explicit TraceMe(NameGeneratorT name_generator, int level = 1) {} ~TraceMe() {} }; From 043c4d515f352eff052b42dfc5a4bf5fe0dc00f6 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 4 Aug 2020 01:26:23 +0700 Subject: [PATCH 1983/2522] Use c_api_macros instead of c_api --- tensorflow/c/BUILD | 2 +- tensorflow/c/logging.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD index 5f64c43dfd3..e5efe323922 100644 --- a/tensorflow/c/BUILD +++ b/tensorflow/c/BUILD @@ -218,7 +218,7 @@ cc_library( srcs = ["logging.cc"], hdrs = ["logging.h"], deps = [ - ":c_api", + ":c_api_macros", "//tensorflow/core/platform:logging", "//tensorflow/core/platform:stringprintf", ], diff --git a/tensorflow/c/logging.h b/tensorflow/c/logging.h index ad97cbf8c8a..9583777b661 100644 --- a/tensorflow/c/logging.h +++ b/tensorflow/c/logging.h @@ -15,7 +15,7 @@ limitations under the License. #ifndef TENSORFLOW_C_LOGGING_H_ #define TENSORFLOW_C_LOGGING_H_ -#include "tensorflow/c/c_api.h" +#include "tensorflow/c/c_api_macros.h" // -------------------------------------------------------------------------- // C API for tensorflow::Logging. From cf43fd2af62a1d2188462176d6ffd36d9765231a Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Mon, 3 Aug 2020 11:20:03 -0700 Subject: [PATCH 1984/2522] Let linear model support dict inputs. PiperOrigin-RevId: 324642874 Change-Id: I09dd6555671258a15870471649616865972f2be3 --- tensorflow/python/keras/premade/linear.py | 73 ++++++++++++++----- .../python/keras/premade/linear_test.py | 22 +++++- 2 files changed, 76 insertions(+), 19 deletions(-) diff --git a/tensorflow/python/keras/premade/linear.py b/tensorflow/python/keras/premade/linear.py index edb4dc4b442..20f2ce560e2 100644 --- a/tensorflow/python/keras/premade/linear.py +++ b/tensorflow/python/keras/premade/linear.py @@ -18,10 +18,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.framework import tensor_shape from tensorflow.python.keras import activations from tensorflow.python.keras import initializers from tensorflow.python.keras import regularizers from tensorflow.python.keras.engine import base_layer +from tensorflow.python.keras.engine import input_spec from tensorflow.python.keras.engine import training from tensorflow.python.keras.layers import core from tensorflow.python.ops import nn @@ -96,24 +98,42 @@ class LinearModel(training.Model): base_layer._keras_model_gauge.get_cell('Linear').set(True) # pylint: disable=protected-access def build(self, input_shape): - self.dense_layers = [] - if isinstance(input_shape, (tuple, list)): - for shape in input_shape: + if isinstance(input_shape, dict): + names = sorted(list(input_shape.keys())) + self.input_specs = [] + self.dense_layers = [] + for name in names: + shape = input_shape[name] layer = core.Dense( units=self.units, use_bias=False, kernel_initializer=self.kernel_initializer, kernel_regularizer=self.kernel_regularizer, - input_shape=shape) + name=name) + layer.build(shape) + self.input_specs.append( + input_spec.InputSpec(shape=shape, name=name)) + self.dense_layers.append(layer) + elif isinstance(input_shape, (tuple, list)) and all( + isinstance(shape, tensor_shape.TensorShape) for shape in input_shape): + self.dense_layers = [] + for shape in input_shape: + layer = core.Dense( + units=self.units, + use_bias=False, + kernel_initializer=self.kernel_initializer, + kernel_regularizer=self.kernel_regularizer) + layer.build(shape) self.dense_layers.append(layer) else: + # input_shape can be a single TensorShape or a tuple of ints. layer = core.Dense( units=self.units, use_bias=False, kernel_initializer=self.kernel_initializer, - kernel_regularizer=self.kernel_regularizer, - input_shape=input_shape) - self.dense_layers.append(layer) + kernel_regularizer=self.kernel_regularizer) + layer.build(input_shape) + self.dense_layers = [layer] if self.use_bias: self.bias = self.add_weight( @@ -125,20 +145,37 @@ class LinearModel(training.Model): trainable=True) else: self.bias = None + self.built = True def call(self, inputs): - if not isinstance(inputs, (tuple, list)): - inputs = [inputs] - if len(inputs) != len(self.dense_layers): - raise ValueError('Expected {} inputs, but got {} inputs'.format( - len(self.dense_layers), len(inputs))) result = None - for inp, layer in zip(inputs, self.dense_layers): - output = layer(inp) - if result is None: - result = output - else: - result += output + if isinstance(inputs, dict): + names = [layer.name for layer in self.dense_layers] + different_keys = set(names) - set(inputs.keys()) + if different_keys: + raise ValueError( + 'The input dictionary does not match ' + 'the structure expected by the model.' + '\n\tExpected keys: {}' + '\n\tReceived keys: {}' + '\n\tMissing keys: {}'.format(set(names), set(inputs.keys()), + different_keys)) + inputs = [inputs[name] for name in names] + for inp, layer in zip(inputs, self.dense_layers): + output = layer(inp) + if result is None: + result = output + else: + result += output + elif isinstance(inputs, (tuple, list)): + for inp, layer in zip(inputs, self.dense_layers): + output = layer(inp) + if result is None: + result = output + else: + result += output + else: + result = self.dense_layers[0](inputs) if self.use_bias: result = nn.bias_add(result, self.bias) diff --git a/tensorflow/python/keras/premade/linear_test.py b/tensorflow/python/keras/premade/linear_test.py index 676f29bb840..ad57baa7813 100644 --- a/tensorflow/python/keras/premade/linear_test.py +++ b/tensorflow/python/keras/premade/linear_test.py @@ -26,6 +26,7 @@ from tensorflow.python.feature_column import feature_column_v2 as fc from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_shape from tensorflow.python.keras import backend from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras import losses @@ -51,7 +52,7 @@ class LinearModelTest(keras_parameterized.TestCase): model.fit(inp, output, epochs=5) self.assertTrue(model.built) - def test_linear_model_with_multi_input(self): + def test_linear_model_with_list_input(self): model = linear.LinearModel() input_a = np.random.uniform(low=-5, high=5, size=(64, 1)) input_b = np.random.uniform(low=-5, high=5, size=(64, 1)) @@ -59,6 +60,25 @@ class LinearModelTest(keras_parameterized.TestCase): model.compile('sgd', 'mse', []) model.fit([input_a, input_b], output, epochs=5) + def test_linear_model_with_mismatched_dict_inputs(self): + model = linear.LinearModel() + input_a = np.random.uniform(low=-5, high=5, size=(64, 1)) + input_b = np.random.uniform(low=-5, high=5, size=(64, 1)) + output = .3 * input_a + .2 * input_b + model.compile('sgd', 'mse', []) + model.build({'a': tensor_shape.TensorShape([None, 1]), + 'b': tensor_shape.TensorShape([None, 1])}) + with self.assertRaisesRegex(ValueError, 'Missing keys'): + model.fit({'c': input_a, 'b': input_b}, output, epochs=5) + + def test_linear_model_with_dict_input(self): + model = linear.LinearModel() + input_a = np.random.uniform(low=-5, high=5, size=(64, 1)) + input_b = np.random.uniform(low=-5, high=5, size=(64, 1)) + output = .3 * input_a + .2 * input_b + model.compile('sgd', 'mse', []) + model.fit({'a': input_a, 'b': input_b}, output, epochs=5) + def test_linear_model_as_layer(self): input_a = input_layer.Input(shape=(1,), name='a') output_a = linear.LinearModel()(input_a) From dbc843d6ec806def963ed8016aca337595faa10d Mon Sep 17 00:00:00 2001 From: Haoyu Zhang Date: Mon, 3 Aug 2020 11:23:13 -0700 Subject: [PATCH 1985/2522] Garbage collect old WorkerSession when the restarted master task create new one. PiperOrigin-RevId: 324643608 Change-Id: I10165604d7ae03b25f15a31676d90f62aa6181be --- .../distributed_runtime/master_session.cc | 10 +++ .../core/distributed_runtime/session_mgr.cc | 39 +++++++++ .../core/distributed_runtime/session_mgr.h | 22 +++++ .../distributed_runtime/session_mgr_test.cc | 84 +++++++++++++++++++ tensorflow/core/distributed_runtime/worker.cc | 3 +- tensorflow/core/protobuf/worker.proto | 11 +++ tensorflow/python/training/server_lib_test.py | 58 +++++++++++++ 7 files changed, 226 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc index fb3a6659848..af98a0e4997 100644 --- a/tensorflow/core/distributed_runtime/master_session.cc +++ b/tensorflow/core/distributed_runtime/master_session.cc @@ -57,6 +57,7 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/util/device_name_utils.h" namespace tensorflow { @@ -1314,12 +1315,21 @@ Status MasterSession::CreateWorkerSessions( } }); + string task_name; + string local_device_name; + DeviceNameUtils::SplitDeviceName(devices_->client_device()->name(), + &task_name, &local_device_name); + const int64 client_device_incarnation = + devices_->client_device()->attributes().incarnation(); + Status status = Status::OK(); // Create all the workers & kick off the computations. for (size_t i = 0; i < worker_names.size(); ++i) { workers[i].name = &worker_names[i]; workers[i].worker = worker_cache->GetOrCreateWorker(worker_names[i]); workers[i].request.set_session_handle(handle_); + workers[i].request.set_master_task(task_name); + workers[i].request.set_master_incarnation(client_device_incarnation); if (session_opts_.config.share_cluster_devices_in_session() || session_opts_.config.experimental() .share_cluster_devices_in_session()) { diff --git a/tensorflow/core/distributed_runtime/session_mgr.cc b/tensorflow/core/distributed_runtime/session_mgr.cc index 37f47848f75..0dd657c7fd9 100644 --- a/tensorflow/core/distributed_runtime/session_mgr.cc +++ b/tensorflow/core/distributed_runtime/session_mgr.cc @@ -62,11 +62,46 @@ Status SessionMgr::CreateSession( const protobuf::RepeatedPtrField& cluster_device_attributes, bool isolate_session_state) { + return CreateSession(session, server_def, cluster_device_attributes, + isolate_session_state, /*master_task=*/"", + /*master_incarnation=*/0); +} + +Status SessionMgr::CreateSession( + const string& session, const ServerDef& server_def, + const protobuf::RepeatedPtrField& + cluster_device_attributes, + bool isolate_session_state, string master_task, int64 master_incarnation) { mutex_lock l(mu_); if (session.empty()) { return errors::InvalidArgument("Session must be non-empty."); } + // For given master task name, check if one or more `WorkerSession`s have been + // created previously on this worker, and if so garbage collect the expired + // `WorkerSession`s. This happens when the master fails before sending + // `DeleteSession` requests, which can cause `WorkerSession`s to be leaked. + if (!master_task.empty()) { + auto it_range = master_to_associated_sessions_.equal_range(master_task); + if (it_range.first != it_range.second && + it_range.first->second.master_incarnation != master_incarnation) { + LOG(INFO) << "When creating WorkerSession for master task " << master_task + << ", found old WorkerSessions created by the same master task " + << "with a different incarnation. These sessions will " + << "be garbage collected. Current WorkerSession count: " + << sessions_.size(); + + auto it = it_range.first; + while (it != it_range.second) { + auto session_it = sessions_.find(it->second.session_handle); + if (session_it != sessions_.end()) { + sessions_.erase(session_it); + } + it = master_to_associated_sessions_.erase(it); + } + } + } + WorkerCacheInterface* worker_cache = nullptr; string worker_name; if (server_def.cluster().job().empty()) { @@ -141,6 +176,10 @@ Status SessionMgr::CreateSession( } sessions_.insert(std::make_pair(session, std::move(worker_session))); + if (!master_task.empty()) { + MasterAssociatedSession s{master_incarnation, session}; + master_to_associated_sessions_.emplace(master_task, s); + } return Status::OK(); } diff --git a/tensorflow/core/distributed_runtime/session_mgr.h b/tensorflow/core/distributed_runtime/session_mgr.h index a9467708870..dfcc69463c4 100644 --- a/tensorflow/core/distributed_runtime/session_mgr.h +++ b/tensorflow/core/distributed_runtime/session_mgr.h @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/distributed_runtime/worker_session.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/thread_annotations.h" #include "tensorflow/core/protobuf/tensorflow_server.pb.h" #include "tensorflow/core/protobuf/worker.pb.h" @@ -53,6 +54,18 @@ class SessionMgr { const protobuf::RepeatedPtrField& device_attributes, bool isolate_session_state); + // Create WorkerSession from the master with the given `master_task` and + // `master_incarnation`. We first look for existing WorkerSessions associated + // with the specified master task. If there are sessions created by the same + // master but with a different incarnation, it indicates that the remote + // master has restarted before deleting the sessions on worker. When it + // happens, old sessions associated with the master will be automatically + // removed before the new session is created. + Status CreateSession( + const string& session, const ServerDef& server_def, + const protobuf::RepeatedPtrField& device_attributes, + bool isolate_session_state, string master_task, int64 master_incarnation); + void ResetDefaultWorkerCache(WorkerCacheInterface* worker_cache); // Updates state (worker cache, devices) of worker session identified by @@ -107,6 +120,15 @@ class SessionMgr { mutex mu_; // A map from session identifier to internal session structure. std::map> sessions_ TF_GUARDED_BY(mu_); + + // Incarnation and WorkerSession handle associated with a master task. + struct MasterAssociatedSession { + const int64 master_incarnation; + const string session_handle; + }; + // A map from master task name to its associated worker sessions. + std::unordered_multimap + master_to_associated_sessions_ TF_GUARDED_BY(mu_); }; } // namespace tensorflow diff --git a/tensorflow/core/distributed_runtime/session_mgr_test.cc b/tensorflow/core/distributed_runtime/session_mgr_test.cc index f6e0551ff56..1f5e26b7a0b 100644 --- a/tensorflow/core/distributed_runtime/session_mgr_test.cc +++ b/tensorflow/core/distributed_runtime/session_mgr_test.cc @@ -152,6 +152,90 @@ TEST_F(SessionMgrTest, CreateSessionIsolateSessionState) { EXPECT_NE(devices_3[0]->resource_manager(), devices_4[0]->resource_manager()); } +TEST_F(SessionMgrTest, CreateSessionWithMasterName) { + ServerDef server_def; + server_def.set_job_name("worker"); + server_def.set_task_index(3); + auto job = server_def.mutable_cluster()->add_job(); + job->set_name("worker"); + job->mutable_tasks()->insert({3, "localhost:3333"}); + + protobuf::RepeatedPtrField cluster_device_attributes; + + const string master_name = "/job:master/replica:0/task:1"; + const int64 old_incarnation = random::New64(); + const int64 new_incarnation = random::New64(); + + // Allow multiple worker sessions to be created by the same master + string sess_handle1 = "test_session_handle_1"; + TF_EXPECT_OK(mgr_.CreateSession(sess_handle1, server_def, + cluster_device_attributes, true, master_name, + old_incarnation)); + string sess_handle2 = "test_session_handle_2"; + TF_EXPECT_OK(mgr_.CreateSession(sess_handle2, server_def, + cluster_device_attributes, true, master_name, + old_incarnation)); + + std::shared_ptr session; + TF_EXPECT_OK(mgr_.WorkerSessionForSession(sess_handle1, &session)); + EXPECT_NE(nullptr, session) << "Session for " << sess_handle1 << "was null"; + + TF_EXPECT_OK(mgr_.WorkerSessionForSession(sess_handle2, &session)); + EXPECT_NE(nullptr, session) << "Session for " << sess_handle2 << "was null"; + + // When the master creates a WorkerSession with new incarnation, the old + // WorkerSessions should be garbage collected. + string sess_handle3 = "test_session_handle_3"; + TF_EXPECT_OK(mgr_.CreateSession(sess_handle3, server_def, + cluster_device_attributes, true, master_name, + new_incarnation)); + + EXPECT_NE(mgr_.WorkerSessionForSession(sess_handle1, &session), + tensorflow::Status::OK()) + << "Session for " << sess_handle1 + << " should have been garbage collected."; + + EXPECT_NE(mgr_.WorkerSessionForSession(sess_handle2, &session), + tensorflow::Status::OK()) + << "Session for " << sess_handle2 + << " should have been garbage collected."; + + TF_EXPECT_OK(mgr_.WorkerSessionForSession(sess_handle3, &session)); + EXPECT_NE(nullptr, session) << "Session for " << sess_handle3 << "was null"; + + TF_EXPECT_OK(mgr_.DeleteSession(sess_handle2)); + TF_EXPECT_OK(mgr_.DeleteSession(sess_handle3)); +} + +TEST_F(SessionMgrTest, CreateSessionWithoutMasterName) { + ServerDef server_def; + server_def.set_job_name("worker"); + server_def.set_task_index(3); + auto job = server_def.mutable_cluster()->add_job(); + job->set_name("worker"); + job->mutable_tasks()->insert({3, "localhost:3333"}); + + protobuf::RepeatedPtrField cluster_device_attributes; + + // WorkerSession will NOT be garbage collected for empty master names. + string sess_handle1 = "test_session_handle_no_master_1"; + TF_EXPECT_OK(mgr_.CreateSession(sess_handle1, server_def, + cluster_device_attributes, true, "", 0)); + string sess_handle2 = "test_session_handle_no_master_2"; + TF_EXPECT_OK(mgr_.CreateSession(sess_handle2, server_def, + cluster_device_attributes, true, "", 0)); + + std::shared_ptr session; + TF_EXPECT_OK(mgr_.WorkerSessionForSession(sess_handle1, &session)); + EXPECT_NE(nullptr, session) << "Session for " << sess_handle1 << "was null"; + + TF_EXPECT_OK(mgr_.WorkerSessionForSession(sess_handle2, &session)); + EXPECT_NE(nullptr, session) << "Session for " << sess_handle2 << "was null"; + + TF_EXPECT_OK(mgr_.DeleteSession(sess_handle1)); + TF_EXPECT_OK(mgr_.DeleteSession(sess_handle2)); +} + TEST_F(SessionMgrTest, LegacySession) { string session_handle = ""; std::shared_ptr session; diff --git a/tensorflow/core/distributed_runtime/worker.cc b/tensorflow/core/distributed_runtime/worker.cc index f857a63e64d..5212f51d491 100644 --- a/tensorflow/core/distributed_runtime/worker.cc +++ b/tensorflow/core/distributed_runtime/worker.cc @@ -53,7 +53,8 @@ void Worker::CreateWorkerSessionAsync(const CreateWorkerSessionRequest* request, StatusCallback done) { Status s = env_->session_mgr->CreateSession( request->session_handle(), request->server_def(), - request->cluster_device_attributes(), request->isolate_session_state()); + request->cluster_device_attributes(), request->isolate_session_state(), + request->master_task(), request->master_incarnation()); done(s); } diff --git a/tensorflow/core/protobuf/worker.proto b/tensorflow/core/protobuf/worker.proto index f10283531da..739ba8e03e6 100644 --- a/tensorflow/core/protobuf/worker.proto +++ b/tensorflow/core/protobuf/worker.proto @@ -70,6 +70,17 @@ message CreateWorkerSessionRequest { // The device attributes of all the devices in the cluster. repeated DeviceAttributes cluster_device_attributes = 4; + + // The master task name from which the request is sent. + string master_task = 5; + + // The incarnation ID of the master task local CPU device. + // If the target worker already has a WorkerSession created previously with + // the same master task name but a different incarnation, it usually indicates + // that the previous master failed before deleting the WorkerSession on the + // worker. To prevent memory leaks, the worker should garbage collect the old + // WorkerSessions. + int64 master_incarnation = 6; } message CreateWorkerSessionResponse {} diff --git a/tensorflow/python/training/server_lib_test.py b/tensorflow/python/training/server_lib_test.py index 54ede81c9ea..75008985aae 100644 --- a/tensorflow/python/training/server_lib_test.py +++ b/tensorflow/python/training/server_lib_test.py @@ -22,6 +22,7 @@ import time import numpy as np +from tensorflow.core.protobuf import cluster_pb2 from tensorflow.core.protobuf import config_pb2 from tensorflow.core.protobuf import tensorflow_server_pb2 from tensorflow.python.client import session @@ -202,6 +203,63 @@ class GrpcServerTest(test.TestCase): self.assertEqual(0.1, server.server_def.default_session_config.gpu_options. per_process_gpu_memory_fraction) + def testRestartedMaster(self): + master_old = server_lib.Server.create_local_server() + master_new = server_lib.Server.create_local_server() + worker = self._cached_server + + def get_cluster_def(master, worker): + cluster_def = cluster_pb2.ClusterDef() + job = cluster_def.job.add() + job.name = "master" + job.tasks[0] = master.target[len("grpc://"):] + job = cluster_def.job.add() + job.name = "worker" + job.tasks[0] = worker.target[len("grpc://"):] + return cluster_def + + def check_session_devices(sess): + # Make sure we have the correct set of cluster devices + devices = sess.list_devices() + device_names = set(d.name for d in devices) + self.assertIn("/job:master/replica:0/task:0/device:CPU:0", device_names) + self.assertIn("/job:worker/replica:0/task:0/device:CPU:0", device_names) + + with ops.Graph().as_default(): + # Construct a simple graph that runs ops on remote worker + with ops.device("/job:worker/replica:0/task:0/device:CPU:0"): + a = constant_op.constant([1.0]) + b = a + a + + config = config_pb2.ConfigProto( + cluster_def=get_cluster_def(master_old, worker)) + sess_old = session.Session(master_old.target, config=config) + check_session_devices(sess_old) + + # Create a session with the new master and the worker. + # The new master has the same task name ('/job:master/replica:0/task:0') + # as the old master, but is initiated from a different server thus has a + # different incarnation. This triggers the WorkerSession on worker with + # the old master incarnation to be garbage collected. + + config = config_pb2.ConfigProto( + cluster_def=get_cluster_def(master_new, worker)) + sess_new = session.Session(master_new.target, config=config) + check_session_devices(sess_new) + + # Running on worker with the new session should work as expected + v = sess_new.run(b) + self.assertAllEqual(v, [2.0]) + + # Running on worker with the old session should raise an exception since + # the WorkerSession of the old session has been garbage collected + with self.assertRaisesRegex(errors_impl.AbortedError, + "Session handle is not found"): + sess_old.run(b) + + sess_old.close() + sess_new.close() + def testInvalidHostname(self): with self.assertRaisesRegex(errors_impl.InvalidArgumentError, "port"): _ = server_lib.Server( From cddca76312f5ae4fb92a101e79eeff6d5ac16932 Mon Sep 17 00:00:00 2001 From: Robert David Date: Mon, 3 Aug 2020 11:35:42 -0700 Subject: [PATCH 1986/2522] Add check for reading input tensors at an index that is out of range. PiperOrigin-RevId: 324646398 Change-Id: I602b23b2f28504c20a6d099874cdba2ddbf5ca83 --- tensorflow/lite/delegates/gpu/common/object_reader.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/lite/delegates/gpu/common/object_reader.h b/tensorflow/lite/delegates/gpu/common/object_reader.h index f360bcf9302..be9a89e1b4e 100644 --- a/tensorflow/lite/delegates/gpu/common/object_reader.h +++ b/tensorflow/lite/delegates/gpu/common/object_reader.h @@ -58,6 +58,11 @@ class ObjectReader { template absl::Status ReadTensor(uint32_t idx, TensorT* t) const { + if (idx < 0 || idx >= node_->inputs->size) { + // If larger, this can be an older model with fewer input tensors than the + // current implementation. + return absl::OutOfRangeError("Invalid data index found."); + } const int32_t tensor_idx = node_->inputs->data[idx]; if (tensor_idx < 0) { return absl::InvalidArgumentError( From 7a4b89dbaf78503882dfeefa634c5bbff303d563 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 3 Aug 2020 11:45:27 -0700 Subject: [PATCH 1987/2522] Adding `doc_controls.decorate_all_class_attributes` to tensorflow_docs. `_hide_layer_and_module` can be removed. PiperOrigin-RevId: 324648586 Change-Id: I30559a92f02d69772fc2719cb69d36470b1f9fd7 --- tensorflow/tools/docs/generate2.py | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/tensorflow/tools/docs/generate2.py b/tensorflow/tools/docs/generate2.py index d61c03548e3..44152ba30ef 100644 --- a/tensorflow/tools/docs/generate2.py +++ b/tensorflow/tools/docs/generate2.py @@ -151,28 +151,6 @@ class TfExportAwareVisitor(doc_generator_visitor.DocGeneratorVisitor): return (canonical_score,) + scores -def _hide_layer_and_module_methods(): - """Hide methods and properties defined in the base classes of keras layers.""" - # __dict__ only sees attributes defined in *this* class, not on parent classes - module_contents = list(tf.Module.__dict__.items()) - layer_contents = list(tf.keras.layers.Layer.__dict__.items()) - - for name, obj in module_contents + layer_contents: - if name == "__init__": - continue - - if isinstance(obj, property): - obj = obj.fget - - if isinstance(obj, (staticmethod, classmethod)): - obj = obj.__func__ - - try: - doc_controls.do_not_doc_in_subclasses(obj) - except AttributeError: - pass - - def build_docs(output_dir, code_url_prefix, search_hints=True): """Build api docs for tensorflow v2. @@ -189,7 +167,11 @@ def build_docs(output_dir, code_url_prefix, search_hints=True): if not name.startswith("_"): doc_controls.hide_from_search(obj) - _hide_layer_and_module_methods() + for cls in [tf.Module, tf.keras.layers.Layer]: + doc_controls.decorate_all_class_attributes( + decorator=doc_controls.do_not_doc_in_subclasses, + cls=cls, + skip=["__init__"]) try: doc_controls.do_not_generate_docs(tf.__operators__) From 3b83a25110d66c12654d74e64b1f7a77b14e00c6 Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Mon, 3 Aug 2020 12:12:20 -0700 Subject: [PATCH 1988/2522] Test with MWMS in custom_training_loop_optimizer_test PiperOrigin-RevId: 324654618 Change-Id: I1a43c1b275bdb9b084432220519044fe4a69e1a8 --- tensorflow/python/keras/distribute/BUILD | 2 +- .../custom_training_loop_optimizer_test.py | 25 ++++++++++++------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/keras/distribute/BUILD b/tensorflow/python/keras/distribute/BUILD index 5a5cff01e33..2c8ba97dbfa 100644 --- a/tensorflow/python/keras/distribute/BUILD +++ b/tensorflow/python/keras/distribute/BUILD @@ -275,7 +275,7 @@ distribute_py_test( "//tensorflow/python:variables", "//tensorflow/python/distribute:combinations", "//tensorflow/python/distribute:strategy_combinations", - "//tensorflow/python/distribute:values", + "//tensorflow/python/distribute:test_util", "//tensorflow/python/eager:def_function", "//tensorflow/python/eager:test", "//tensorflow/python/keras/optimizer_v2", diff --git a/tensorflow/python/keras/distribute/custom_training_loop_optimizer_test.py b/tensorflow/python/keras/distribute/custom_training_loop_optimizer_test.py index b9eee26220a..0a12d85bebd 100644 --- a/tensorflow/python/keras/distribute/custom_training_loop_optimizer_test.py +++ b/tensorflow/python/keras/distribute/custom_training_loop_optimizer_test.py @@ -22,7 +22,7 @@ from absl.testing import parameterized from tensorflow.python.distribute import combinations from tensorflow.python.distribute import strategy_combinations -from tensorflow.python.distribute import values +from tensorflow.python.distribute import test_util from tensorflow.python.eager import def_function from tensorflow.python.eager import test from tensorflow.python.framework import ops @@ -35,7 +35,14 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): @combinations.generate( combinations.times( combinations.combine( - distribution=strategy_combinations.multidevice_strategies, + distribution=[ + strategy_combinations.mirrored_strategy_with_gpu_and_cpu, + strategy_combinations.mirrored_strategy_with_two_gpus, + strategy_combinations.multi_worker_mirrored_2x1_cpu, + strategy_combinations.multi_worker_mirrored_2x1_gpu, + strategy_combinations.tpu_strategy, + strategy_combinations.tpu_strategy_one_step, + ], mode=["eager"], ), combinations.concat( @@ -55,10 +62,10 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): @def_function.function def optimize(): - grads = values.PerReplica([ - ops.convert_to_tensor([1., 1.]), - ops.convert_to_tensor([2., 2.]), - ]) + grads = ops.convert_to_tensor([[1., 1.], + [2., 2.]]) + grads = distribution.experimental_distribute_values_from_function( + lambda ctx: grads[ctx.replica_id_in_sync_group]) def step_fn(grads): optimizer.apply_gradients( @@ -66,8 +73,8 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): experimental_aggregate_gradients=experimental_aggregate_gradients) return v.read_value() - return distribution.experimental_local_results( - distribution.run(step_fn, args=(grads,))) + return test_util.gather(distribution, + distribution.run(step_fn, args=(grads,))) self.assertAllClose(optimize(), expected) @@ -118,4 +125,4 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): if __name__ == "__main__": - test.main() + combinations.main() From 99fc31e82fc8d5a5506a8f2de4dd7eb5f7f160e2 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Mon, 3 Aug 2020 12:35:28 -0700 Subject: [PATCH 1989/2522] Add a module config option to enable hlo deduplication. PiperOrigin-RevId: 324660155 Change-Id: Ic7aac0daf851bb93b4f6c24e56b20234200efdbc --- .../compiler/xla/client/executable_build_options.cc | 6 ++++++ .../compiler/xla/client/executable_build_options.h | 4 ++++ .../compiler/xla/service/compile_only_service.cc | 1 + tensorflow/compiler/xla/service/compiler.h | 1 + tensorflow/compiler/xla/service/hlo_module.cc | 1 + tensorflow/compiler/xla/service/hlo_module_config.h | 11 +++++++++++ tensorflow/compiler/xla/service/local_service.cc | 1 + tensorflow/compiler/xla/service/service.cc | 1 + tensorflow/compiler/xla/xla.proto | 4 ++++ 9 files changed, 30 insertions(+) diff --git a/tensorflow/compiler/xla/client/executable_build_options.cc b/tensorflow/compiler/xla/client/executable_build_options.cc index 404f9eb7519..f39a3e79fe5 100644 --- a/tensorflow/compiler/xla/client/executable_build_options.cc +++ b/tensorflow/compiler/xla/client/executable_build_options.cc @@ -76,6 +76,12 @@ ExecutableBuildOptions& ExecutableBuildOptions::set_use_spmd_partitioning( return *this; } +ExecutableBuildOptions& ExecutableBuildOptions::set_deduplicate_hlo( + bool deduplicate_hlo) { + deduplicate_hlo_ = deduplicate_hlo; + return *this; +} + ExecutableBuildOptions& ExecutableBuildOptions::set_device_assignment( const DeviceAssignment& device_assignment) { device_assignment_ = device_assignment; diff --git a/tensorflow/compiler/xla/client/executable_build_options.h b/tensorflow/compiler/xla/client/executable_build_options.h index 9a7fdd974b1..d034eaa7fd6 100644 --- a/tensorflow/compiler/xla/client/executable_build_options.h +++ b/tensorflow/compiler/xla/client/executable_build_options.h @@ -82,6 +82,9 @@ class ExecutableBuildOptions { bool use_spmd_partitioning() const { return use_spmd_partitioning_; } ExecutableBuildOptions& set_use_spmd_partitioning(bool use_spmd_partitioning); + bool deduplicate_hlo() const { return deduplicate_hlo_; } + ExecutableBuildOptions& set_deduplicate_hlo(bool deduplicate_hlo); + // If set, this specifies a static device assignment for the computation. // Otherwise, the computation will be compiled generically and can be run with // any device assignment compatible with the computation's replica and @@ -110,6 +113,7 @@ class ExecutableBuildOptions { int num_replicas_ = 1; int num_partitions_ = 1; bool use_spmd_partitioning_ = false; + bool deduplicate_hlo_ = false; absl::optional device_assignment_; bool alias_passthrough_params_ = false; }; diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc index ce9c8a4ea62..f8e4f591a5d 100644 --- a/tensorflow/compiler/xla/service/compile_only_service.cc +++ b/tensorflow/compiler/xla/service/compile_only_service.cc @@ -92,6 +92,7 @@ CompileOnlyService::CompileAheadOfTime( execution_options.mutable_device_assignment())); } execution_options.set_use_spmd_partitioning(options.use_spmd_partitioning()); + execution_options.set_deduplicate_hlo(options.deduplicate_hlo()); for (const AotXlaComputationInstance& instance : computations) { TF_RET_CHECK(instance.computation.has_host_program_shape()); *execution_options.mutable_shape_with_output_layout() = diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h index 57b24e372e6..312a068ba65 100644 --- a/tensorflow/compiler/xla/service/compiler.h +++ b/tensorflow/compiler/xla/service/compiler.h @@ -77,6 +77,7 @@ class AotCompilationOptions { virtual int64 replica_count() const { return 0; } virtual int64 num_cores() const { return 0; } virtual bool use_spmd_partitioning() const { return false; } + virtual bool deduplicate_hlo() const { return false; } // Optional allocator that may be used for allocating temp space on the device // during compilation. diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index 308b8e8f095..4a67c1d2146 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -443,6 +443,7 @@ StatusOr HloModule::CreateModuleConfigFromShape( } module_config.set_use_spmd_partitioning( execution_options->use_spmd_partitioning()); + module_config.set_deduplicate_hlo(execution_options->deduplicate_hlo()); if (execution_options->has_device_assignment()) { TF_ASSIGN_OR_RETURN(std::unique_ptr device_assignment, DeviceAssignment::Deserialize( diff --git a/tensorflow/compiler/xla/service/hlo_module_config.h b/tensorflow/compiler/xla/service/hlo_module_config.h index 7ab0f24d06e..ae0a8aae838 100644 --- a/tensorflow/compiler/xla/service/hlo_module_config.h +++ b/tensorflow/compiler/xla/service/hlo_module_config.h @@ -138,6 +138,13 @@ class HloModuleConfig { } bool use_spmd_partitioning() const { return use_spmd_partitioning_; } + // If enabled, deduplicate equivalent hlos into function calls to reduce code + // size. + void set_deduplicate_hlo(bool deduplicate_hlo) { + deduplicate_hlo_ = deduplicate_hlo; + } + bool deduplicate_hlo() const { return deduplicate_hlo_; } + // Return a string which unambiguously represents all the fields of this data // structure. Used for generating a cache key for storing the compiled // executable. @@ -246,6 +253,10 @@ class HloModuleConfig { // needs to partition the module. bool use_spmd_partitioning_ = false; + // If enabled, deduplicate equivalent hlos into function calls to reduce code + // size. + bool deduplicate_hlo_ = false; + // The target maximum parallelism at which to partition HLOs for parallel // execution on the CPU backend. int64 intra_op_parallelism_threads_ = -1; diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc index c80646e0c70..5def5bbe9db 100644 --- a/tensorflow/compiler/xla/service/local_service.cc +++ b/tensorflow/compiler/xla/service/local_service.cc @@ -114,6 +114,7 @@ ExecutionOptions CreateExecutionOptions( execution_options.set_num_partitions(build_options.num_partitions()); execution_options.set_use_spmd_partitioning( build_options.use_spmd_partitioning()); + execution_options.set_deduplicate_hlo(build_options.deduplicate_hlo()); if (build_options.has_device_assignment()) { TF_CHECK_OK(build_options.device_assignment().Serialize( execution_options.mutable_device_assignment())); diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index 2ed5e709d81..4437ec3d452 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -315,6 +315,7 @@ StatusOr> Service::CreateModuleConfig( } config->set_use_spmd_partitioning( execution_options->use_spmd_partitioning()); + config->set_deduplicate_hlo(execution_options->deduplicate_hlo()); config->set_seed(execution_options->seed()); config->set_launch_id(execution_options->launch_id()); config->set_debug_options(execution_options->debug_options()); diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto index 6b9917eac53..1cf30b10373 100644 --- a/tensorflow/compiler/xla/xla.proto +++ b/tensorflow/compiler/xla/xla.proto @@ -349,6 +349,10 @@ message ExecutionOptions { // Indicates whether to use SPMD (true) or MPMD (false) partitioning when // num_partitions > 1 and XLA is requested to partition the input program. bool use_spmd_partitioning = 11; + + // If set, deduplicate hlo into function calls to reduce binary size. Only + // works on TPU. + bool deduplicate_hlo = 12; } message GetDeviceHandlesRequest { From ec87d847f222e6ad5d53126d37264e02c5772587 Mon Sep 17 00:00:00 2001 From: HanBin Yoon Date: Mon, 3 Aug 2020 13:20:16 -0700 Subject: [PATCH 1990/2522] Add compiler pass to remove duplicate 'tf_saved_model.bound_input' bindings. Consolidate identical bound inputs so that resource variables do not alias in modules with tf_saved_model semantics. PiperOrigin-RevId: 324669700 Change-Id: I546c212e50c889aa6526c0df88f7b9051ed28fa8 --- tensorflow/compiler/mlir/tensorflow/BUILD | 1 + .../mlir/tensorflow/ir/tf_saved_model.cc | 1 + .../tf_saved_model/hash_table_asset_v1.py | 17 +++-- ...odel_deduplicate_bound_input_bindings.mlir | 33 ++++++++++ .../tensorflow/tests/tf_saved_model_ops.mlir | 13 ++++ .../tests/tf_saved_model_ops_invalid.mlir | 14 ++++ .../deduplicate_bound_input_bindings.cc | 65 +++++++++++++++++++ .../transforms/tf_saved_model_passes.h | 3 + .../mlir/tensorflow/translate/import_model.cc | 4 +- 9 files changed, 145 insertions(+), 6 deletions(-) create mode 100644 tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_deduplicate_bound_input_bindings.mlir create mode 100644 tensorflow/compiler/mlir/tensorflow/transforms/deduplicate_bound_input_bindings.cc diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index 518992d03db..c6f0083fc92 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -676,6 +676,7 @@ cc_library( cc_library( name = "tf_saved_model_passes", srcs = [ + "transforms/deduplicate_bound_input_bindings.cc", "transforms/freeze_global_tensors.cc", "transforms/lift_variables_pass.cc", "transforms/optimize_global_tensors.cc", diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc index edfc7feefd5..94a792ec3db 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc @@ -337,6 +337,7 @@ LogicalResult VerifyExportedFunc(FuncOp func) { if (auto attr = func.getArgAttrOfType( i, "tf_saved_model.bound_input")) { if (!unique_bound_inputs.insert(attr.getValue()).second) { + if (module.getAttr("tf_saved_model.under_construction")) continue; return func.emitError() << "duplicate 'tf_saved_model.bound_input' binding"; } diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/hash_table_asset_v1.py b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/hash_table_asset_v1.py index 7e86953eb8f..4cb931253b3 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/hash_table_asset_v1.py +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/hash_table_asset_v1.py @@ -27,13 +27,15 @@ import tensorflow.compat.v1 as tf from tensorflow.compiler.mlir.tensorflow.tests.tf_saved_model import common_v1 # CHECK: "tf_saved_model.session_initializer"() {initializer = [[init:@.*]]} : () -> () -# CHECK: "tf_saved_model.asset"() {filename = {{.*}}, sym_name = "[[asset:.*]]"} +# CHECK: "tf_saved_model.asset"() {filename = {{.*}}, sym_name = "[[asset1:__tf_saved_model_asset1_.*]]"} +# CHECK: "tf_saved_model.asset"() {filename = {{.*}}, sym_name = "[[asset0:__tf_saved_model_asset0_.*]]"} # CHECK: func [[init]] -# CHECK-SAME: [[ARG:%.*]]: tensor {tf_saved_model.bound_input = @[[asset]]} +# CHECK-SAME: [[ARG0:%.*]]: tensor {tf_saved_model.bound_input = @[[asset0]]} +# CHECK-SAME: [[ARG1:%.*]]: tensor {tf_saved_model.bound_input = @[[asset1]]} # CHECK-NEXT: [[R0:%.*]] = "tf.HashTableV2"() # CHECK-SAME: shared_name = "[[hash_table:.*]]" -# CHECK-NEXT: "tf.InitializeTableFromTextFileV2"([[R0]], [[ARG]]) +# CHECK-NEXT: "tf.InitializeTableFromTextFileV2"([[R0]], [[ARG0]]) def write_vocabulary_file(vocabulary): @@ -48,11 +50,16 @@ def write_vocabulary_file(vocabulary): def test(): + vocabulary_file = write_vocabulary_file(['cat', 'is', 'on', 'the', 'mat']) table_initializer = tf.lookup.TextFileInitializer( - write_vocabulary_file(['cat', 'is', 'on', 'the', 'mat']), tf.string, - tf.lookup.TextFileIndex.WHOLE_LINE, tf.int64, + vocabulary_file, tf.string, tf.lookup.TextFileIndex.WHOLE_LINE, tf.int64, tf.lookup.TextFileIndex.LINE_NUMBER) + # Incur another bound_input on the asset, but with a different sym_name, i.e., + # __tf_saved_model_asset1_tokens.txt vs. __tf_saved_model_asset0_tokens.txt. table = tf.lookup.StaticVocabularyTable(table_initializer, num_oov_buckets=10) + vocab_file_tensor = tf.convert_to_tensor(vocabulary_file, tf.string, + name='asset_filepath') + tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, vocab_file_tensor) x = tf.placeholder(tf.string, shape=(), name='input') r = table.lookup(x) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_deduplicate_bound_input_bindings.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_deduplicate_bound_input_bindings.mlir new file mode 100644 index 00000000000..22fd3d86068 --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_deduplicate_bound_input_bindings.mlir @@ -0,0 +1,33 @@ +// RUN: tf-opt %s -split-input-file -verify-diagnostics -tf-saved-model-dedup-bound-input-binding-pass | FileCheck %s + +module attributes {tf_saved_model.semantics, tf_saved_model.under_construction} { + // Test case: Remove duplicate bound_input symbols. + "tf_saved_model.global_tensor"() { is_mutable, sym_name = "v", type = tensor, value = dense<42.0> : tensor } : () -> () + "tf_saved_model.global_tensor"() { is_mutable, sym_name = "w", type = tensor, value = dense<43.0> : tensor } : () -> () + "tf_saved_model.global_tensor"() { is_mutable, sym_name = "x", type = tensor, value = dense<44.0> : tensor } : () -> () + // CHECK: func @f + // CHECK: %arg0: tensor>> {tf_saved_model.bound_input = @v} + // CHECK: %arg1: tensor>> {tf_saved_model.bound_input = @w} + // CHECK: %arg2: tensor>> {tf_saved_model.bound_input = @x} + // CHECK-NOT: %arg3 + // CHECK-NOT: %arg4 + func @f( + %arg0: tensor>> {tf_saved_model.bound_input = @v}, + %arg1: tensor>> {tf_saved_model.bound_input = @w}, + %arg2: tensor>> {tf_saved_model.bound_input = @v}, + %arg3: tensor>> {tf_saved_model.bound_input = @x}, + %arg4: tensor>> {tf_saved_model.bound_input = @v} + ) attributes {tf_saved_model.exported_names = ["f"]} { + // CHECK: "tf.ReadVariableOp"(%arg0) : (tensor>>) -> tensor + // CHECK: "tf.ReadVariableOp"(%arg1) : (tensor>>) -> tensor + // CHECK: "tf.ReadVariableOp"(%arg0) : (tensor>>) -> tensor + // CHECK: "tf.ReadVariableOp"(%arg2) : (tensor>>) -> tensor + // CHECK: "tf.ReadVariableOp"(%arg0) : (tensor>>) -> tensor + %val0 = "tf.ReadVariableOp"(%arg0) : (tensor>>) -> tensor + %val1 = "tf.ReadVariableOp"(%arg1) : (tensor>>) -> tensor + %val2 = "tf.ReadVariableOp"(%arg2) : (tensor>>) -> tensor + %val3 = "tf.ReadVariableOp"(%arg3) : (tensor>>) -> tensor + %val4 = "tf.ReadVariableOp"(%arg4) : (tensor>>) -> tensor + return + } +} diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops.mlir index 7156a1fab63..d2c5509b52d 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops.mlir @@ -76,3 +76,16 @@ module attributes {tf_saved_model.semantics, tf_saved_model.under_construction} } } + +// ----- + +module attributes {tf_saved_model.semantics, tf_saved_model.under_construction} { + "tf_saved_model.global_tensor"() { is_mutable, sym_name = "v", type = tensor, value = dense<42.0> : tensor } : () -> () + // CHECK: func @f + func @f( + %arg0: tensor>> {tf_saved_model.bound_input = @v}, + %arg1: tensor>> {tf_saved_model.bound_input = @v} + ) attributes {tf_saved_model.exported_names = ["f"]} { + return + } +} diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops_invalid.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops_invalid.mlir index dcb889ff99e..714c8908825 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops_invalid.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops_invalid.mlir @@ -400,3 +400,17 @@ module attributes {tf_saved_model.semantics} { } } + +// ----- + +module attributes {tf_saved_model.semantics} { + + "tf_saved_model.global_tensor"() { is_mutable, sym_name = "v", type = tensor, value = dense<42.0> : tensor } : () -> () + // expected-error@+1 {{duplicate 'tf_saved_model.bound_input' binding}} + func @f( + %arg0: tensor>> {tf_saved_model.bound_input = @v}, + %arg1: tensor>> {tf_saved_model.bound_input = @v} + ) attributes {tf_saved_model.exported_names = ["f"]} { + return + } +} diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/deduplicate_bound_input_bindings.cc b/tensorflow/compiler/mlir/tensorflow/transforms/deduplicate_bound_input_bindings.cc new file mode 100644 index 00000000000..c1514dfa357 --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/transforms/deduplicate_bound_input_bindings.cc @@ -0,0 +1,65 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "llvm/ADT/DenseMap.h" +#include "mlir/IR/Function.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h" + +namespace mlir { +namespace tf_saved_model { +namespace { + +class DedupBoundInputBindingPass + : public PassWrapper { + public: + void runOnFunction() override; +}; + +void DedupBoundInputBindingPass::runOnFunction() { + FuncOp func = getFunction(); + if (!mlir::tf_saved_model::IsExported(func)) return; + llvm::SmallDenseMap unique_bound_inputs; + llvm::SmallVector arg_indices_to_erase; + for (unsigned i = 0, e = func.getNumArguments(); i < e; i++) { + auto attr = func.getArgAttrOfType( + i, "tf_saved_model.bound_input"); + if (!attr) continue; + auto inserted = unique_bound_inputs.insert(std::make_pair(attr, i)); + if (inserted.second) continue; + auto duplicate_arg = func.getArgument(i); + auto original_arg = func.getArgument(unique_bound_inputs[attr]); + duplicate_arg.replaceAllUsesWith(original_arg); + arg_indices_to_erase.push_back(i); + } + func.eraseArguments(arg_indices_to_erase); +} + +} // namespace + +static PassRegistration pass( + "tf-saved-model-dedup-bound-input-binding-pass", + "Remove duplicate 'tf_saved_model.bound_input' bindings."); + +std::unique_ptr> CreateDedupBoundInputBindingPass() { + return std::make_unique(); +} + +} // namespace tf_saved_model +} // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tf_saved_model_passes.h b/tensorflow/compiler/mlir/tensorflow/transforms/tf_saved_model_passes.h index f7a73dc1561..d46b81156f9 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tf_saved_model_passes.h +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tf_saved_model_passes.h @@ -46,6 +46,9 @@ CreateRemoveVariablesInSessionInitializerPass(); std::unique_ptr> CreateLiftVariablesPass( ::tensorflow::Session* session); +// Creates a pass that removes duplicate 'tf_saved_model.bound_input' bindings. +std::unique_ptr> CreateDedupBoundInputBindingPass(); + } // namespace tf_saved_model } // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc index 2c44aaa5c42..27385e81262 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc @@ -3368,12 +3368,13 @@ SavedModelSignatureDefImporter::ConvertAssets() { results.reserve(asset_file_defs.size()); mlir::OpBuilder builder(module_->getBodyRegion()); + unsigned i = 0; // Use to generate unique sym_name(s) for duplicate assets. for (const auto& asset : asset_file_defs) { auto asset_op = builder.create( module_->getLoc(), /*sym_name=*/ builder.getStringAttr( - absl::StrCat("__tf_saved_model_asset_", asset.filename())), + absl::StrCat("__tf_saved_model_asset", i++, "_", asset.filename())), /*filename=*/ builder.getStringAttr( io::JoinPath(kSavedModelAssetsDirectory, asset.filename()))); @@ -3569,6 +3570,7 @@ Status SavedModelSignatureDefImporter::LiftVariables() { pm.addPass(mlir::TF::CreatePromoteVarHandlesToArgsPass()); pm.addPass( mlir::tf_saved_model::CreateLiftVariablesPass(bundle_.GetSession())); + pm.addPass(mlir::tf_saved_model::CreateDedupBoundInputBindingPass()); if (mlir::failed(pm.run(*module_))) return diag_handler.Combine(errors::Internal("Failed to lift variables.")); From cec4c62fa2545135065003d3ca19f8df3e688efd Mon Sep 17 00:00:00 2001 From: Nat Jeffries Date: Mon, 3 Aug 2020 13:20:41 -0700 Subject: [PATCH 1991/2522] Remove all instances of initializer_list from TF Micro kernel tests and clean up tests. PiperOrigin-RevId: 324669800 Change-Id: Ib47abca412a47f30bfc804f21e3194ded3e72bc5 --- .../micro_speech/recognize_commands_test.cc | 45 +- .../lite/micro/kernels/concatenation_test.cc | 127 +- .../micro/kernels/maximum_minimum_test.cc | 205 ++- tensorflow/lite/micro/kernels/mul_test.cc | 467 ++---- tensorflow/lite/micro/kernels/neg_test.cc | 38 +- tensorflow/lite/micro/kernels/pack_test.cc | 316 ++-- tensorflow/lite/micro/kernels/pooling_test.cc | 1319 +++++++---------- tensorflow/lite/micro/kernels/prelu_test.cc | 240 +-- tensorflow/lite/micro/kernels/reshape_test.cc | 9 +- tensorflow/lite/micro/testing/test_utils.cc | 28 - tensorflow/lite/micro/testing/test_utils.h | 34 - 11 files changed, 1121 insertions(+), 1707 deletions(-) diff --git a/tensorflow/lite/micro/examples/micro_speech/recognize_commands_test.cc b/tensorflow/lite/micro/examples/micro_speech/recognize_commands_test.cc index eff7b4eb37b..089da9173c7 100644 --- a/tensorflow/lite/micro/examples/micro_speech/recognize_commands_test.cc +++ b/tensorflow/lite/micro/examples/micro_speech/recognize_commands_test.cc @@ -75,11 +75,11 @@ TF_LITE_MICRO_TEST(RecognizeCommandsTestBasic) { RecognizeCommands recognize_commands(µ_error_reporter); - std::initializer_list result_data = {127, -128, -128, -128}; - auto result_dims = {2, 1, 4}; + const int8_t result_data[] = {127, -128, -128, -128}; + const int result_dims[] = {2, 1, 4}; TfLiteTensor results = tflite::testing::CreateQuantizedTensor( - result_data, tflite::testing::IntArrayFromInitializer(result_dims), - -128.0f, 127.0f); + result_data, tflite::testing::IntArrayFromInts(result_dims), -128.0f, + 127.0f); const char* found_command; uint8_t score; @@ -94,11 +94,10 @@ TF_LITE_MICRO_TEST(RecognizeCommandsTestFindCommands) { RecognizeCommands recognize_commands(µ_error_reporter, 1000, 51); - std::initializer_list yes_data = {-128, -128, 127, -128}; - auto yes_dims = {2, 1, 4}; + const int8_t yes_data[] = {-128, -128, 127, -128}; + const int yes_dims[] = {2, 1, 4}; TfLiteTensor yes_results = tflite::testing::CreateQuantizedTensor( - yes_data, tflite::testing::IntArrayFromInitializer(yes_dims), -128.0f, - 127.0f); + yes_data, tflite::testing::IntArrayFromInts(yes_dims), -128.0f, 127.0f); bool has_found_new_command = false; const char* new_command; @@ -122,11 +121,10 @@ TF_LITE_MICRO_TEST(RecognizeCommandsTestFindCommands) { TF_LITE_MICRO_EXPECT_EQ(0, tflite::testing::TestStrcmp("yes", new_command)); } - std::initializer_list no_data = {-128, -128, -128, 127}; - auto no_dims = {2, 1, 4}; + const int8_t no_data[] = {-128, -128, -128, 127}; + const int no_dims[] = {2, 1, 4}; TfLiteTensor no_results = tflite::testing::CreateQuantizedTensor( - no_data, tflite::testing::IntArrayFromInitializer(no_dims), -128.0f, - 127.0f); + no_data, tflite::testing::IntArrayFromInts(no_dims), -128.0f, 127.0f); has_found_new_command = false; new_command = ""; uint8_t score; @@ -156,11 +154,10 @@ TF_LITE_MICRO_TEST(RecognizeCommandsTestBadInputLength) { RecognizeCommands recognize_commands(µ_error_reporter, 1000, 51); - std::initializer_list bad_data = {-128, -128, 127}; - auto bad_dims = {2, 1, 3}; + const int8_t bad_data[] = {-128, -128, 127}; + const int bad_dims[] = {2, 1, 3}; TfLiteTensor bad_results = tflite::testing::CreateQuantizedTensor( - bad_data, tflite::testing::IntArrayFromInitializer(bad_dims), -128.0f, - 127.0f); + bad_data, tflite::testing::IntArrayFromInts(bad_dims), -128.0f, 127.0f); const char* found_command; uint8_t score; @@ -175,11 +172,11 @@ TF_LITE_MICRO_TEST(RecognizeCommandsTestBadInputTimes) { RecognizeCommands recognize_commands(µ_error_reporter, 1000, 51); - std::initializer_list result_data = {-128, -128, 127, -128}; - auto result_dims = {2, 1, 4}; + const int8_t result_data[] = {-128, -128, 127, -128}; + const int result_dims[] = {2, 1, 4}; TfLiteTensor results = tflite::testing::CreateQuantizedTensor( - result_data, tflite::testing::IntArrayFromInitializer(result_dims), - -128.0f, 127.0f); + result_data, tflite::testing::IntArrayFromInts(result_dims), -128.0f, + 127.0f); const char* found_command; uint8_t score; @@ -197,11 +194,11 @@ TF_LITE_MICRO_TEST(RecognizeCommandsTestTooFewInputs) { RecognizeCommands recognize_commands(µ_error_reporter, 1000, 51); - std::initializer_list result_data = {-128, -128, 127, -128}; - auto result_dims = {2, 1, 4}; + const int8_t result_data[] = {-128, -128, 127, -128}; + const int result_dims[] = {2, 1, 4}; TfLiteTensor results = tflite::testing::CreateQuantizedTensor( - result_data, tflite::testing::IntArrayFromInitializer(result_dims), - -128.0f, 127.0f); + result_data, tflite::testing::IntArrayFromInts(result_dims), -128.0f, + 127.0f); const char* found_command; uint8_t score; diff --git a/tensorflow/lite/micro/kernels/concatenation_test.cc b/tensorflow/lite/micro/kernels/concatenation_test.cc index c3fa395600f..d82a804e659 100644 --- a/tensorflow/lite/micro/kernels/concatenation_test.cc +++ b/tensorflow/lite/micro/kernels/concatenation_test.cc @@ -24,17 +24,16 @@ namespace tflite { namespace testing { namespace { -void TestConcatenateTwoInputs(std::initializer_list input1_dims_data, - std::initializer_list input1_data, - std::initializer_list input2_dims_data, - std::initializer_list input2_data, - int axis, - std::initializer_list output_dims_data, - std::initializer_list expected_output_data, +void TestConcatenateTwoInputs(const int* input1_dims_data, + const float* input1_data, + const int* input2_dims_data, + const float* input2_data, int axis, + const int* output_dims_data, + const float* expected_output_data, float* output_data) { - TfLiteIntArray* input1_dims = IntArrayFromInitializer(input1_dims_data); - TfLiteIntArray* input2_dims = IntArrayFromInitializer(input2_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); + TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); constexpr int input_size = 2; constexpr int output_size = 1; @@ -65,30 +64,31 @@ void TestConcatenateTwoInputs(std::initializer_list input1_dims_data, const int output_dims_count = ElementCount(*output_dims); for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], - 1e-5f); + TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i], 1e-5f); } } void TestConcatenateQuantizedTwoInputs( - std::initializer_list input1_dims_data, - std::initializer_list input1_data, - std::initializer_list input2_dims_data, - std::initializer_list input2_data, float input_min, - float input_max, int axis, std::initializer_list output_dims_data, - std::initializer_list expected_output_data, float output_min, - float output_max, uint8_t* output_data) { - TfLiteIntArray* input1_dims = IntArrayFromInitializer(input1_dims_data); - TfLiteIntArray* input2_dims = IntArrayFromInitializer(input2_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + const int* input1_dims_data, const uint8_t* input1_data, + const int* input2_dims_data, const uint8_t* input2_data, + const float input_scale, const int input_zero_point, int axis, + const int* output_dims_data, const uint8_t* expected_output_data, + const float output_scale, const int output_zero_point, + uint8_t* output_data) { + TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); + TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); constexpr int input_size = 2; constexpr int output_size = 1; constexpr int tensors_size = input_size + output_size; TfLiteTensor tensors[tensors_size] = { - CreateQuantizedTensor(input1_data, input1_dims, input_min, input_max), - CreateQuantizedTensor(input2_data, input2_dims, input_min, input_max), - CreateQuantizedTensor(output_data, output_dims, output_min, output_max)}; + CreateQuantizedTensor(input1_data, input1_dims, input_scale, + input_zero_point), + CreateQuantizedTensor(input2_data, input2_dims, input_scale, + input_zero_point), + CreateQuantizedTensor(output_data, output_dims, output_scale, + output_zero_point)}; int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); @@ -111,7 +111,7 @@ void TestConcatenateQuantizedTwoInputs( const int output_dims_count = ElementCount(*output_dims); for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]); + TF_LITE_MICRO_EXPECT_EQ(expected_output_data[i], output_data[i]); } } @@ -124,19 +124,19 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TwoInputsAllAxesCombinations) { // Concatenate the same two input tensors along all possible axes. - auto input_shape = {2, 2, 3}; - auto input1_value = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; - auto input2_value = {7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; + const int input_shape[] = {2, 2, 3}; + const float input1_value[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + const float input2_value[] = {7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; // expected output when concatenating on axis 0 - auto output_shape_axis0 = {2, 4, 3}; - auto output_value_axis0 = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, - 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; + const int output_shape_axis0[] = {2, 4, 3}; + const float output_value_axis0[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, + 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; // expected output when concatenating on axis 1 - auto output_shape_axis1 = {2, 2, 6}; - auto output_value_axis1 = {1.0f, 2.0f, 3.0f, 7.0f, 8.0f, 9.0f, - 4.0f, 5.0f, 6.0f, 10.0f, 11.0f, 12.0f}; + const int output_shape_axis1[] = {2, 2, 6}; + const float output_value_axis1[] = {1.0f, 2.0f, 3.0f, 7.0f, 8.0f, 9.0f, + 4.0f, 5.0f, 6.0f, 10.0f, 11.0f, 12.0f}; float output_data[12]; @@ -162,59 +162,48 @@ TF_LITE_MICRO_TEST(TwoInputsAllAxesCombinations) { } TF_LITE_MICRO_TEST(TwoInputsQuantizedUint8) { - using tflite::testing::F2Q; - const int axis = 2; - auto input_shape = {3, 2, 1, 2}; - auto output_shape = {3, 2, 1, 4}; + const int input_shape[] = {3, 2, 1, 2}; + const int output_shape[] = {3, 2, 1, 4}; - const float input_min = -12.7f; - const float input_max = 12.8f; - const float output_min = -12.7f; - const float output_max = 12.8f; + const float input_scale = 0.1f; + const int input_zero_point = 127; + const float output_scale = 0.1f; + const int output_zero_point = 127; - auto input1_value = { - F2Q(1.0, input_min, input_max), - F2Q(3.0, input_min, input_max), - F2Q(4.0, input_min, input_max), - F2Q(7.0, input_min, input_max), - }; + const uint8_t input1_values[] = {137, 157, 167, 197}; - auto input2_value = { - F2Q(1.1, input_min, input_max), - F2Q(3.1, input_min, input_max), - F2Q(4.1, input_min, input_max), - F2Q(7.1, input_min, input_max), - }; + const uint8_t input2_values[] = {138, 158, 168, 198}; - std::initializer_list output_value = { + const uint8_t output_value[] = { 137, 157, 138, 158, 167, 197, 168, 198, }; uint8_t output_data[8]; tflite::testing::TestConcatenateQuantizedTwoInputs( - input_shape, input1_value, input_shape, input2_value, input_min, - input_max, axis, output_shape, output_value, output_min, output_max, - output_data); + input_shape, input1_values, input_shape, input2_values, input_scale, + input_zero_point, axis, output_shape, output_value, output_scale, + output_zero_point, output_data); } TF_LITE_MICRO_TEST(ThreeDimensionalTwoInputsDifferentShapes) { const int axis = 1; - auto input1_shape = {3, 2, 1, 2}; - auto input2_shape = {3, 2, 3, 2}; - auto output_shape = {3, 2, 4, 2}; + const int input1_shape[] = {3, 2, 1, 2}; + const int input2_shape[] = {3, 2, 3, 2}; + const int output_shape[] = {3, 2, 4, 2}; - auto input1_value = {1.0f, 3.0f, 4.0f, 7.0f}; - auto input2_value = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, - 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; - auto output_value = {1.0f, 3.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, - 4.0f, 7.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; + const float input1_values[] = {1.0f, 3.0f, 4.0f, 7.0f}; + const float input2_values[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, + 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; + const float output_values[] = {1.0f, 3.0f, 1.0f, 2.0f, 3.0f, 4.0f, + 5.0f, 6.0f, 4.0f, 7.0f, 7.0f, 8.0f, + 9.0f, 10.0f, 11.0f, 12.0f}; float output_data[16]; tflite::testing::TestConcatenateTwoInputs( - input1_shape, input1_value, input2_shape, input2_value, axis, - output_shape, output_value, output_data); + input1_shape, input1_values, input2_shape, input2_values, axis, + output_shape, output_values, output_data); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/kernels/maximum_minimum_test.cc b/tensorflow/lite/micro/kernels/maximum_minimum_test.cc index ee84fcba497..7fab5407cdb 100644 --- a/tensorflow/lite/micro/kernels/maximum_minimum_test.cc +++ b/tensorflow/lite/micro/kernels/maximum_minimum_test.cc @@ -25,16 +25,13 @@ namespace testing { namespace { void TestMaxMinFloat(const TfLiteRegistration& registration, - std::initializer_list input1_dims_data, - std::initializer_list input1_data, - std::initializer_list input2_dims_data, - std::initializer_list input2_data, - std::initializer_list expected_output_data, - std::initializer_list output_dims_data, - float* output_data) { - TfLiteIntArray* input1_dims = IntArrayFromInitializer(input1_dims_data); - TfLiteIntArray* input2_dims = IntArrayFromInitializer(input2_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + const int* input1_dims_data, const float* input1_data, + const int* input2_dims_data, const float* input2_data, + const float* expected_output_data, + const int* output_dims_data, float* output_data) { + TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); + TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); const int output_dims_count = ElementCount(*output_dims); constexpr int inputs_size = 2; @@ -59,34 +56,35 @@ void TestMaxMinFloat(const TfLiteRegistration& registration, TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], - 1e-5f); + TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i], 1e-5f); } } void TestMaxMinQuantized(const TfLiteRegistration& registration, - std::initializer_list input1_dims_data, - std::initializer_list input1_data, - float input1_min, float input1_max, - std::initializer_list input2_dims_data, - std::initializer_list input2_data, - float input2_min, float input2_max, - std::initializer_list expected_output_data, - float output_min, float output_max, - std::initializer_list output_dims_data, - uint8_t* output_data) { - TfLiteIntArray* input1_dims = IntArrayFromInitializer(input1_dims_data); - TfLiteIntArray* input2_dims = IntArrayFromInitializer(input2_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + const int* input1_dims_data, + const uint8_t* input1_data, float const input1_scale, + const int input1_zero_point, + const int* input2_dims_data, + const uint8_t* input2_data, const float input2_scale, + const int input2_zero_point, + const uint8_t* expected_output_data, + const float output_scale, const int output_zero_point, + const int* output_dims_data, uint8_t* output_data) { + TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); + TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); const int output_dims_count = ElementCount(*output_dims); constexpr int inputs_size = 2; constexpr int outputs_size = 1; constexpr int tensors_size = inputs_size + outputs_size; TfLiteTensor tensors[tensors_size] = { - CreateQuantizedTensor(input1_data, input1_dims, input1_min, input1_max), - CreateQuantizedTensor(input2_data, input2_dims, input2_min, input2_max), - CreateQuantizedTensor(output_data, output_dims, output_min, output_max), + CreateQuantizedTensor(input1_data, input1_dims, input1_scale, + input1_zero_point), + CreateQuantizedTensor(input2_data, input2_dims, input2_scale, + input2_zero_point), + CreateQuantizedTensor(output_data, output_dims, output_scale, + output_zero_point), }; int inputs_array_data[] = {2, 0, 1}; @@ -102,21 +100,21 @@ void TestMaxMinQuantized(const TfLiteRegistration& registration, TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]); + TF_LITE_MICRO_EXPECT_EQ(expected_output_data[i], output_data[i]); } } -void TestMaxMinQuantizedInt32( - const TfLiteRegistration& registration, - std::initializer_list input1_dims_data, - std::initializer_list input1_data, float input1_scale, - std::initializer_list input2_dims_data, - std::initializer_list input2_data, float input2_scale, - std::initializer_list expected_output_data, float output_scale, - std::initializer_list output_dims_data, int32_t* output_data) { - TfLiteIntArray* input1_dims = IntArrayFromInitializer(input1_dims_data); - TfLiteIntArray* input2_dims = IntArrayFromInitializer(input2_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); +void TestMaxMinQuantizedInt32(const TfLiteRegistration& registration, + const int* input1_dims_data, + const int32_t* input1_data, float input1_scale, + const int* input2_dims_data, + const int32_t* input2_data, float input2_scale, + const int32_t* expected_output_data, + float output_scale, const int* output_dims_data, + int32_t* output_data) { + TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); + TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); const int output_dims_count = ElementCount(*output_dims); constexpr int inputs_size = 2; @@ -141,7 +139,7 @@ void TestMaxMinQuantizedInt32( TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]); + TF_LITE_MICRO_EXPECT_EQ(expected_output_data[i], output_data[i]); } } @@ -152,109 +150,86 @@ void TestMaxMinQuantizedInt32( TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(FloatTest) { - std::initializer_list data1 = {1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; - std::initializer_list data2 = {-1.0, 0.0, 1.0, 12.0, -3.0, -1.43}; + const int dims[] = {3, 3, 1, 2}; + const float data1[] = {1.0, 0.0, -1.0, 11.0, -2.0, -1.44}; + const float data2[] = {-1.0, 0.0, 1.0, 12.0, -3.0, -1.43}; + const float golden_max[] = {1.0, 0.0, 1.0, 12.0, -2.0, -1.43}; + const float golden_min[] = {-1.0, 0.0, -1.0, 11.0, -3.0, -1.44}; float output_data[6]; - tflite::testing::TestMaxMinFloat( - tflite::ops::micro::Register_MAXIMUM(), {3, 3, 1, 2}, - data1, // input1 shape and data - {3, 3, 1, 2}, data2, // input2 shape and data - {1.0, 0.0, 1.0, 12.0, -2.0, -1.43}, // expected output - {3, 3, 1, 2}, output_data); // output shape and data buffer + tflite::testing::TestMaxMinFloat(tflite::ops::micro::Register_MAXIMUM(), dims, + data1, dims, data2, golden_max, dims, + output_data); - tflite::testing::TestMaxMinFloat( - tflite::ops::micro::Register_MINIMUM(), {3, 3, 1, 2}, - data1, // input1 shape and data - {3, 3, 1, 2}, data2, // input2 shape and data - {-1.0, 0.0, -1.0, 11.0, -3.0, -1.44}, // expected output - {3, 3, 1, 2}, output_data); // output shape and data buffer + tflite::testing::TestMaxMinFloat(tflite::ops::micro::Register_MINIMUM(), dims, + data1, dims, data2, golden_min, dims, + output_data); } TF_LITE_MICRO_TEST(Uint8Test) { - std::initializer_list data1 = {1, 0, 2, 11, 2, 23}; - std::initializer_list data2 = {0, 0, 1, 12, 255, 1}; - const float input1_min = -63.5; - const float input1_max = 64; - const float input2_min = -63.5; - const float input2_max = 64; - const float output_min = -63.5; - const float output_max = 64; + const int dims[] = {3, 3, 1, 2}; + const uint8_t data1[] = {1, 0, 2, 11, 2, 23}; + const uint8_t data2[] = {0, 0, 1, 12, 255, 1}; + const uint8_t golden_max[] = {1, 0, 2, 12, 255, 23}; + const uint8_t golden_min[] = {0, 0, 1, 11, 2, 1}; + + const float input_scale = 1.0; + const int input_zero_point = 0; + const float output_scale = 1.0; + const int output_zero_point = 0; uint8_t output_data[6]; tflite::testing::TestMaxMinQuantized( - tflite::ops::micro::Register_MAXIMUM(), - // input1 shape, data and bounds - {3, 3, 1, 2}, data1, input1_min, input1_max, - // input2 shape, data and bounds - {3, 3, 1, 2}, data2, input2_min, input2_max, - // expected output - {1, 0, 2, 12, 255, 23}, - // output bounds, shape and data buffer - output_min, output_max, {3, 3, 1, 2}, output_data); + tflite::ops::micro::Register_MAXIMUM(), dims, data1, input_scale, + input_zero_point, dims, data2, input_scale, input_zero_point, golden_max, + output_scale, output_zero_point, dims, output_data); tflite::testing::TestMaxMinQuantized( - tflite::ops::micro::Register_MINIMUM(), - // input1 shape, data and bounds - {3, 3, 1, 2}, data1, input1_min, input1_max, - // input2 shape, data and bounds - {3, 3, 1, 2}, data2, input2_min, input2_max, - // expected output - {0, 0, 1, 11, 2, 1}, - // output bounds, shape and data buffer - output_min, output_max, {3, 3, 1, 2}, output_data); + tflite::ops::micro::Register_MINIMUM(), dims, data1, input_scale, + input_zero_point, dims, data2, input_scale, input_zero_point, golden_min, + output_scale, output_zero_point, dims, output_data); } TF_LITE_MICRO_TEST(FloatWithBroadcastTest) { - std::initializer_list data1 = {1.0, 0.0, -1.0, -2.0, -1.44, 11.0}; - std::initializer_list data2 = {0.5, 2.0}; + const int dims[] = {3, 3, 1, 2}; + const int dims_scalar[] = {1, 2}; + const float data1[] = {1.0, 0.0, -1.0, -2.0, -1.44, 11.0}; + const float data2[] = {0.5, 2.0}; + const float golden_max[] = {1.0, 2.0, 0.5, 2.0, 0.5, 11.0}; + const float golden_min[] = {0.5, 0.0, -1.0, -2.0, -1.44, 2.0}; float output_data[6]; - tflite::testing::TestMaxMinFloat( - tflite::ops::micro::Register_MAXIMUM(), {3, 3, 1, 2}, - data1, // input1 shape and data - {1, 2}, data2, // input2 shape and data - {1.0, 2.0, 0.5, 2.0, 0.5, 11.0}, // expected output - {3, 3, 1, 2}, output_data); // output shape and data buffer + tflite::testing::TestMaxMinFloat(tflite::ops::micro::Register_MAXIMUM(), dims, + data1, dims_scalar, data2, golden_max, dims, + output_data); - tflite::testing::TestMaxMinFloat( - tflite::ops::micro::Register_MINIMUM(), {3, 3, 1, 2}, - data1, // input1 shape and data - {1, 2}, data2, // input2 shape and data - {0.5, 0.0, -1.0, -2.0, -1.44, 2.0}, // expected output - {3, 3, 1, 2}, output_data); // output shape and data buffer + tflite::testing::TestMaxMinFloat(tflite::ops::micro::Register_MINIMUM(), dims, + data1, dims_scalar, data2, golden_min, dims, + output_data); } TF_LITE_MICRO_TEST(Int32WithBroadcastTest) { const float input1_scale = 0.5; const float input2_scale = 0.5; const float output_scale = 0.5; - std::initializer_list data1 = {1, 0, -1, -2, 3, 11}; - std::initializer_list data2 = {2}; + const int dims[] = {3, 3, 1, 2}; + const int dims_scalar[] = {1, 1}; + const int32_t data1[] = {1, 0, -1, -2, 3, 11}; + const int32_t data2[] = {2}; + const int32_t golden_max[] = {2, 2, 2, 2, 3, 11}; + const int32_t golden_min[] = {1, 0, -1, -2, 2, 2}; int32_t output_data[6]; tflite::testing::TestMaxMinQuantizedInt32( - tflite::ops::micro::Register_MAXIMUM(), - // input1 shape, data and scale - {3, 3, 1, 2}, data1, input1_scale, - // input2 shape, data and scale - {1, 1}, data2, input2_scale, - // expected output - {2, 2, 2, 2, 3, 11}, - // output scale, shape and data buffer - output_scale, {3, 3, 1, 2}, output_data); + tflite::ops::micro::Register_MAXIMUM(), dims, data1, input1_scale, + dims_scalar, data2, input2_scale, golden_max, output_scale, dims, + output_data); tflite::testing::TestMaxMinQuantizedInt32( - tflite::ops::micro::Register_MINIMUM(), - // input1 shape, data and scale - {3, 3, 1, 2}, data1, input1_scale, - // input2 shape, data and scale - {1, 1}, data2, input2_scale, - // expected output - {1, 0, -1, -2, 2, 2}, - // output scale, shape and data buffer - output_scale, {3, 3, 1, 2}, output_data); + tflite::ops::micro::Register_MINIMUM(), dims, data1, input1_scale, + dims_scalar, data2, input2_scale, golden_min, output_scale, dims, + output_data); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/kernels/mul_test.cc b/tensorflow/lite/micro/kernels/mul_test.cc index e39f6693fcd..86b4d8be57c 100644 --- a/tensorflow/lite/micro/kernels/mul_test.cc +++ b/tensorflow/lite/micro/kernels/mul_test.cc @@ -23,16 +23,57 @@ namespace tflite { namespace testing { namespace { -void TestMulFloat(std::initializer_list input1_dims_data, - std::initializer_list input1_data, - std::initializer_list input2_dims_data, - std::initializer_list input2_data, - std::initializer_list output_dims_data, - std::initializer_list expected_output_data, +const int flat_size_simple = 4; +const float scale_simple = 0.01; +const int dims_simple[] = {4, 1, 2, 2, 1}; +const float input1_simple[] = {-0.8, 0.2, 0.9, 0.7}; +const float input2_simple[] = {0.6, 0.4, 0.9, 0.8}; +const float golden_simple[] = {-0.48, 0.08, 0.81, 0.56}; +const float golden_simple_relu[] = {0.0, 0.08, 0.81, 0.56}; + +const int flat_size_broadcast = 6; +const float input_scale_broadcast = 0.05f; +const float output_scale_broadcast = 0.01f; +const int dims_broadcast[] = {4, 1, 3, 1, 2}; +const int dims_scalar_broadcast[] = {1, 1}; +const float input1_broadcast[] = {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0}; +const float input2_broadcast[] = {0.1}; +const float golden_broadcast[] = {-0.2, 0.02, 0.07, 0.08, 0.11, 0.2}; +const float golden_broadcast_relu[] = {0, 0.02, 0.07, 0.08, 0.11, 0.2}; + +template +void ValidateMulGoldens(TfLiteTensor* tensors, int tensors_size, + TfLiteFusedActivation activation, const T* golden, + int output_len, float tolerance, T* output) { + TfLiteMulParams builtin_data = { + .activation = activation, + }; + + int inputs_array_data[] = {2, 0, 1}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 2}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + + const TfLiteRegistration registration = tflite::ops::micro::Register_MUL(); + micro::KernelRunner runner( + registration, tensors, tensors_size, inputs_array, outputs_array, + reinterpret_cast(&builtin_data), micro_test::reporter); + + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); + + for (int i = 0; i < output_len; i++) { + TF_LITE_MICRO_EXPECT_NEAR(golden[i], output[i], tolerance); + } +} + +void TestMulFloat(const int* input1_dims_data, const float* input1_data, + const int* input2_dims_data, const float* input2_data, + const int* output_dims_data, const float* golden, float* output_data, TfLiteFusedActivation activation) { - TfLiteIntArray* input1_dims = IntArrayFromInitializer(input1_dims_data); - TfLiteIntArray* input2_dims = IntArrayFromInitializer(input2_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); + TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); const int output_dims_count = ElementCount(*output_dims); constexpr int inputs_size = 2; @@ -44,75 +85,40 @@ void TestMulFloat(std::initializer_list input1_dims_data, CreateFloatTensor(output_data, output_dims), }; - TfLiteMulParams builtin_data = { - .activation = activation, - }; - - int inputs_array_data[] = {2, 0, 1}; - TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - int outputs_array_data[] = {1, 2}; - TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - - const TfLiteRegistration registration = tflite::ops::micro::Register_MUL(); - micro::KernelRunner runner( - registration, tensors, tensors_size, inputs_array, outputs_array, - reinterpret_cast(&builtin_data), micro_test::reporter); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); - - for (int i = 0; i < output_dims_count; i++) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], - 1e-5f); - } + ValidateMulGoldens(tensors, tensors_size, activation, golden, + output_dims_count, 1e-5, output_data); } template -void TestMulQuantized(std::initializer_list input1_dims_data, - std::initializer_list input1_data, - std::initializer_list input2_dims_data, - std::initializer_list input2_data, - const float input_min, const float input_max, - std::initializer_list output_dims_data, - const float output_min, const float output_max, - std::initializer_list expected_output_data, - T* output_data, TfLiteFusedActivation activation, - int error_tolerance) { - TfLiteIntArray* input1_dims = IntArrayFromInitializer(input1_dims_data); - TfLiteIntArray* input2_dims = IntArrayFromInitializer(input2_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); +void TestMulQuantized(const int* input1_dims_data, const float* input1_data, + T* input1_quantized, const int* input2_dims_data, + const float* input2_data, T* input2_quantized, + const float input_scale, const int input_zero_point, + const int* output_dims_data, const float* golden, + T* golden_quantized, const float output_scale, + const int output_zero_point, T* output_data, + TfLiteFusedActivation activation) { + TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); + TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); const int output_dims_count = ElementCount(*output_dims); constexpr int inputs_size = 2; constexpr int outputs_size = 1; constexpr int tensors_size = inputs_size + outputs_size; TfLiteTensor tensors[tensors_size] = { - CreateQuantizedTensor(input1_data, input1_dims, input_min, input_max), - CreateQuantizedTensor(input2_data, input2_dims, input_min, input_max), - CreateQuantizedTensor(output_data, output_dims, output_min, output_max), - }; + CreateQuantizedTensor(input1_data, input1_quantized, input1_dims, + input_scale, input_zero_point), + CreateQuantizedTensor(input2_data, input2_quantized, input2_dims, + input_scale, input_zero_point), + CreateQuantizedTensor(output_data, output_dims, output_scale, + output_zero_point)}; - TfLiteMulParams builtin_data = { - .activation = activation, - }; + AsymmetricQuantize(golden, golden_quantized, output_dims_count, output_scale, + output_zero_point); - int inputs_array_data[] = {2, 0, 1}; - TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - int outputs_array_data[] = {1, 2}; - TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - - const TfLiteRegistration registration = tflite::ops::micro::Register_MUL(); - micro::KernelRunner runner( - registration, tensors, tensors_size, inputs_array, outputs_array, - reinterpret_cast(&builtin_data), micro_test::reporter); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); - - for (int i = 0; i < output_dims_count; i++) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], - error_tolerance); - } + ValidateMulGoldens(tensors, tensors_size, activation, golden_quantized, + output_dims_count, 1.0f, output_data); } } // namespace @@ -122,250 +128,105 @@ void TestMulQuantized(std::initializer_list input1_dims_data, TF_LITE_MICRO_TESTS_BEGIN -TF_LITE_MICRO_TEST(Int8NoActivation) { - using tflite::testing::F2QS; - const float input_min = -1; - const float input_max = 1; - const float output_min = -1; - const float output_max = 1; +TF_LITE_MICRO_TEST(SimpleFloatNoAcativationShouldMatchGolden) { + float output_data[tflite::testing::flat_size_simple]; - int8_t output_data[4]; - tflite::testing::TestMulQuantized({4, 1, 2, 2, 1}, // input1 dims - { - F2QS(-0.8, input_min, input_max), - F2QS(0.2, input_min, input_max), - F2QS(0.9, input_min, input_max), - F2QS(0.7, input_min, input_max), - }, // input1 data - {4, 1, 2, 2, 1}, // input2 dims - { - F2QS(0.6, input_min, input_max), - F2QS(0.4, input_min, input_max), - F2QS(0.9, input_min, input_max), - F2QS(0.8, input_min, input_max), - }, // input2 data - input_min, input_max, - {4, 1, 2, 2, 1}, // output dims - output_min, output_max, - { - F2QS(-0.48, output_min, output_max), - F2QS(0.08, output_min, output_max), - F2QS(0.81, output_min, output_max), - F2QS(0.56, output_min, output_max), - }, // expected output data - output_data, kTfLiteActNone, 1); -} - -TF_LITE_MICRO_TEST(Int8NoActivationLargeMultiplier) { - using tflite::testing::F2QS; - const float input_min = -100; - const float input_max = 100; - const float output_min = -10; - const float output_max = 10; - - int8_t output_data[4]; - tflite::testing::TestMulQuantized( - {4, 1, 2, 2, 1}, - { - F2QS(-4, input_min, input_max), - F2QS(2, input_min, input_max), - F2QS(3, input_min, input_max), - F2QS(1, input_min, input_max), - }, - {4, 1, 2, 2, 1}, - { - /* F2QS(-1, input_min, input_max), F2QS(-3, input_min, input_max), */ - F2QS(-1, input_min, input_max), - F2QS(-3, input_min, input_max), - F2QS(4, input_min, input_max), - F2QS(2, input_min, input_max), - }, - input_min, input_max, {4, 1, 2, 2, 1}, output_min, output_max, - { - F2QS(4, output_min, output_max), - F2QS(-6, output_min, output_max), - F2QS(12, output_min, output_max), - F2QS(2, output_min, output_max), - }, - // In Tensorflow Lite, this test have a max allowed error of 1.4f. - // A difference of 1.4 in floating points corresponds to 18 quantized - // for the output min/max [-10, 10]. - output_data, kTfLiteActNone, 18); -} - -TF_LITE_MICRO_TEST(Int8NoActivationBroadcast) { - using tflite::testing::F2QS; - const float input_min = -3.0; - const float input_max = 3.0; - const float output_min = -3.0; - const float output_max = 3.0; - - int8_t output_data[6]; - tflite::testing::TestMulQuantized({4, 1, 3, 1, 2}, // input1 shape - { - F2QS(-2.0, input_min, input_max), - F2QS(0.2, input_min, input_max), - F2QS(0.7, input_min, input_max), - F2QS(0.8, input_min, input_max), - F2QS(1.1, input_min, input_max), - F2QS(2.0, input_min, input_max), - }, // input1 data - {1, 1}, // input2 shape - { - F2QS(0.1, input_min, input_max), - }, // input2 data - input_min, input_max, - {4, 1, 3, 1, 2}, // output shape - output_min, output_max, - { - F2QS(-0.2, output_min, output_max), - F2QS(0.02, output_min, output_max), - F2QS(0.07, output_min, output_max), - F2QS(0.08, output_min, output_max), - F2QS(0.11, output_min, output_max), - F2QS(0.2, output_min, output_max), - }, // expected output data - output_data, kTfLiteActNone, 1); -} - -TF_LITE_MICRO_TEST(UInt8NoActivation) { - using tflite::testing::F2Q; - const float input_min = -1; - const float input_max = 1; - const float output_min = -1; - const float output_max = 1; - - uint8_t output_data[4]; - tflite::testing::TestMulQuantized({4, 1, 2, 2, 1}, // input1 dims - { - F2Q(-0.8, input_min, input_max), - F2Q(0.2, input_min, input_max), - F2Q(0.9, input_min, input_max), - F2Q(0.7, input_min, input_max), - }, // input1 data - {4, 1, 2, 2, 1}, // input2 dims - { - F2Q(0.6, input_min, input_max), - F2Q(0.4, input_min, input_max), - F2Q(0.9, input_min, input_max), - F2Q(0.8, input_min, input_max), - }, // input2 data - input_min, input_max, - {4, 1, 2, 2, 1}, // output dims - output_min, output_max, - { - F2Q(-0.48, output_min, output_max), - F2Q(0.08, output_min, output_max), - F2Q(0.81, output_min, output_max), - F2Q(0.56, output_min, output_max), - }, // expected output data - output_data, kTfLiteActNone, 1); -} - -TF_LITE_MICRO_TEST(UInt8NoActivationLargeMultiplier) { - using tflite::testing::F2Q; - const float input_min = -100; - const float input_max = 100; - const float output_min = -10; - const float output_max = 10; - - uint8_t output_data[4]; - tflite::testing::TestMulQuantized( - {4, 1, 2, 2, 1}, - { - F2Q(-4, input_min, input_max), - F2Q(2, input_min, input_max), - F2Q(3, input_min, input_max), - F2Q(1, input_min, input_max), - }, - {4, 1, 2, 2, 1}, - { - F2Q(-1, input_min, input_max), - F2Q(-3, input_min, input_max), - F2Q(4, input_min, input_max), - F2Q(2, input_min, input_max), - }, - input_min, input_max, {4, 1, 2, 2, 1}, output_min, output_max, - { - F2Q(4, output_min, output_max), - F2Q(-6, output_min, output_max), - F2Q(12, output_min, output_max), - F2Q(2, output_min, output_max), - }, - // In Tensorflow Lite, this test have a max allowed error of 1.4f. - // A difference of 1.4 in floating points corresponds to 18 quantized - // for the output min/max [-10, 10]. - output_data, kTfLiteActNone, 18); -} - -TF_LITE_MICRO_TEST(UInt8NoActivationBroadcast) { - using tflite::testing::F2Q; - const float input_min = -3.0; - const float input_max = 3.0; - const float output_min = -3.0; - const float output_max = 3.0; - - uint8_t output_data[6]; - tflite::testing::TestMulQuantized({4, 1, 3, 1, 2}, // input1 shape - { - F2Q(-2.0, input_min, input_max), - F2Q(0.2, input_min, input_max), - F2Q(0.7, input_min, input_max), - F2Q(0.8, input_min, input_max), - F2Q(1.1, input_min, input_max), - F2Q(2.0, input_min, input_max), - }, // input1 data - {1, 1}, // input2 shape - { - F2Q(0.1, input_min, input_max), - }, // input2 data - input_min, input_max, - {4, 1, 3, 1, 2}, // output shape - output_min, output_max, - { - F2Q(-0.2, output_min, output_max), - F2Q(0.02, output_min, output_max), - F2Q(0.07, output_min, output_max), - F2Q(0.08, output_min, output_max), - F2Q(0.11, output_min, output_max), - F2Q(0.2, output_min, output_max), - }, // expected output data - output_data, kTfLiteActNone, 1); -} - -TF_LITE_MICRO_TEST(FloatNoActivation) { - float output_data[4]; tflite::testing::TestMulFloat( - {4, 1, 2, 2, 1}, // input1 shape - {-2.0, 0.2, 0.7, 0.8}, // input1 data - {4, 1, 2, 2, 1}, // input2 shape - {0.1, 0.2, 0.3, 0.5}, // input2 data - {4, 1, 2, 2, 1}, // output shape - {-0.2, 0.04, 0.21, 0.4}, // expected output data + tflite::testing::dims_simple, tflite::testing::input1_simple, + tflite::testing::dims_simple, tflite::testing::input2_simple, + tflite::testing::dims_simple, tflite::testing::golden_simple, output_data, + kTfLiteActNone); +} + +TF_LITE_MICRO_TEST(SimpleFloatReluShouldMatchGolden) { + float output_data[tflite::testing::flat_size_simple]; + + tflite::testing::TestMulFloat( + tflite::testing::dims_simple, tflite::testing::input1_simple, + tflite::testing::dims_simple, tflite::testing::input2_simple, + tflite::testing::dims_simple, tflite::testing::golden_simple_relu, + output_data, kTfLiteActRelu); +} + +TF_LITE_MICRO_TEST(SimpleInt8NoAcativationShouldMatchGolden) { + int8_t input1_quantized[tflite::testing::flat_size_simple]; + int8_t input2_quantized[tflite::testing::flat_size_simple]; + int8_t golden_quantized[tflite::testing::flat_size_simple]; + int8_t output_data[tflite::testing::flat_size_simple]; + + tflite::testing::TestMulQuantized( + tflite::testing::dims_simple, tflite::testing::input1_simple, + input1_quantized, tflite::testing::dims_simple, + tflite::testing::input2_simple, input2_quantized, + tflite::testing::scale_simple, 0, tflite::testing::dims_simple, + tflite::testing::golden_simple, golden_quantized, + tflite::testing::scale_simple, 0, output_data, kTfLiteActNone); +} + +TF_LITE_MICRO_TEST(SimpleUInt8NoAcativationShouldMatchGolden) { + uint8_t input1_quantized[tflite::testing::flat_size_simple]; + uint8_t input2_quantized[tflite::testing::flat_size_simple]; + uint8_t golden_quantized[tflite::testing::flat_size_simple]; + uint8_t output_data[tflite::testing::flat_size_simple]; + + tflite::testing::TestMulQuantized( + tflite::testing::dims_simple, tflite::testing::input1_simple, + input1_quantized, tflite::testing::dims_simple, + tflite::testing::input2_simple, input2_quantized, + tflite::testing::scale_simple, 128, tflite::testing::dims_simple, + tflite::testing::golden_simple, golden_quantized, + tflite::testing::scale_simple, 128, output_data, kTfLiteActNone); +} + +TF_LITE_MICRO_TEST(BroadcastFloatNoActivationShouldMatchGolden) { + float output_data[tflite::testing::flat_size_broadcast]; + + tflite::testing::TestMulFloat( + tflite::testing::dims_broadcast, tflite::testing::input1_broadcast, + tflite::testing::dims_scalar_broadcast, tflite::testing::input2_broadcast, + tflite::testing::dims_broadcast, tflite::testing::golden_broadcast, output_data, kTfLiteActNone); } -TF_LITE_MICRO_TEST(FloatRelu) { - float output_data[4]; +TF_LITE_MICRO_TEST(BroadcastFloatReluShouldMatchGolden) { + float output_data[tflite::testing::flat_size_broadcast]; + tflite::testing::TestMulFloat( - {4, 1, 2, 2, 1}, // input1 shape - {-2.0, 0.2, 0.7, 0.8}, // input1 data - {4, 1, 2, 2, 1}, // input2 shape - {0.1, 0.2, 0.3, 0.5}, // input2 data - {4, 1, 2, 2, 1}, // output shape - {-0.2, 0.04, 0.21, 0.4}, // expected output data - output_data, kTfLiteActReluN1To1); + tflite::testing::dims_broadcast, tflite::testing::input1_broadcast, + tflite::testing::dims_scalar_broadcast, tflite::testing::input2_broadcast, + tflite::testing::dims_broadcast, tflite::testing::golden_broadcast_relu, + output_data, kTfLiteActRelu); } -TF_LITE_MICRO_TEST(FloatBroadcast) { - float output_data[6]; - tflite::testing::TestMulFloat( - {4, 1, 3, 1, 2}, // input1 shape - {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0}, // input1 data - {1, 1}, // input2 shape - {0.1}, // input2 data - {4, 1, 3, 1, 2}, // output shape - {-0.2, 0.02, 0.07, 0.08, 0.11, 0.2}, // expected output data +TF_LITE_MICRO_TEST(BroadcastInt8NoAcativationShouldMatchGolden) { + int8_t input1_quantized[tflite::testing::flat_size_broadcast]; + int8_t input2_quantized[tflite::testing::flat_size_broadcast]; + int8_t golden_quantized[tflite::testing::flat_size_broadcast]; + int8_t output_data[tflite::testing::flat_size_broadcast]; + + tflite::testing::TestMulQuantized( + tflite::testing::dims_broadcast, tflite::testing::input1_broadcast, + input1_quantized, tflite::testing::dims_scalar_broadcast, + tflite::testing::input2_broadcast, input2_quantized, + tflite::testing::input_scale_broadcast, 0, + tflite::testing::dims_broadcast, tflite::testing::golden_broadcast, + golden_quantized, tflite::testing::output_scale_broadcast, 0, output_data, + kTfLiteActNone); +} + +TF_LITE_MICRO_TEST(BroadcastUInt8NoAcativationShouldMatchGolden) { + uint8_t input1_quantized[tflite::testing::flat_size_broadcast]; + uint8_t input2_quantized[1]; + uint8_t golden_quantized[tflite::testing::flat_size_broadcast]; + uint8_t output_data[tflite::testing::flat_size_broadcast]; + + tflite::testing::TestMulQuantized( + tflite::testing::dims_broadcast, tflite::testing::input1_broadcast, + input1_quantized, tflite::testing::dims_scalar_broadcast, + tflite::testing::input2_broadcast, input2_quantized, + tflite::testing::input_scale_broadcast, 128, + tflite::testing::dims_broadcast, tflite::testing::golden_broadcast, + golden_quantized, tflite::testing::output_scale_broadcast, 128, output_data, kTfLiteActNone); } diff --git a/tensorflow/lite/micro/kernels/neg_test.cc b/tensorflow/lite/micro/kernels/neg_test.cc index 2d7c449fcef..544a3eddc1c 100644 --- a/tensorflow/lite/micro/kernels/neg_test.cc +++ b/tensorflow/lite/micro/kernels/neg_test.cc @@ -24,13 +24,11 @@ namespace tflite { namespace testing { namespace { -void TestNegFloat(std::initializer_list input_dims_data, - std::initializer_list input_data, - std::initializer_list expected_output_data, - std::initializer_list output_dims_data, - float* output_data) { - TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); +void TestNegFloat(const int* input_dims_data, const float* input_data, + const float* expected_output_data, + const int* output_dims_data, float* output_data) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); const int output_dims_count = ElementCount(*output_dims); constexpr int inputs_size = 1; constexpr int outputs_size = 1; @@ -53,9 +51,9 @@ void TestNegFloat(std::initializer_list input_dims_data, TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); - TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[0], output_data[0]); + TF_LITE_MICRO_EXPECT_EQ(expected_output_data[0], output_data[0]); for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]); + TF_LITE_MICRO_EXPECT_EQ(expected_output_data[i], output_data[i]); } } @@ -66,23 +64,21 @@ void TestNegFloat(std::initializer_list input_dims_data, TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(NegOpSingleFloat) { + const int dims[] = {1, 2}; + const float input_data[] = {8.5, 0.0}; + const float golden[] = {-8.5, 0.0}; float output_data[2]; - tflite::testing::TestNegFloat(/*input_dims_data=*/{1, 2}, - /*input_data=*/{8.5f, 0.0f}, - /*expected_output_data=*/{-8.5f, 0.0f}, - /*output_dims_data*/ {1, 2}, - /*output_data=*/output_data); + + tflite::testing::TestNegFloat(dims, input_data, golden, dims, output_data); } TF_LITE_MICRO_TEST(NegOpFloat) { + const int dims[] = {2, 2, 3}; + const float input_data[] = {-2.0f, -1.0f, 0.f, 1.0f, 2.0f, 3.0f}; + const float golden[] = {2.0f, 1.0f, -0.f, -1.0f, -2.0f, -3.0f}; float output_data[6]; - tflite::testing::TestNegFloat(/*input_dims_data=*/{2, 2, 3}, - /*input_data=*/ - {-2.0f, -1.0f, 0.f, 1.0f, 2.0f, 3.0f}, - /*expected_output_data=*/ - {2.0f, 1.0f, -0.f, -1.0f, -2.0f, -3.0f}, - /*output_dims_data=*/{2, 2, 3}, - /*output_data=*/output_data); + + tflite::testing::TestNegFloat(dims, input_data, golden, dims, output_data); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/kernels/pack_test.cc b/tensorflow/lite/micro/kernels/pack_test.cc index 45d5e32ef48..c05595df146 100644 --- a/tensorflow/lite/micro/kernels/pack_test.cc +++ b/tensorflow/lite/micro/kernels/pack_test.cc @@ -23,16 +23,39 @@ limitations under the License. namespace tflite { namespace testing { -void TestPackTwoInputsFloat(std::initializer_list input1_dims_data, - std::initializer_list input1_data, - std::initializer_list input2_dims_data, - std::initializer_list input2_data, int axis, - std::initializer_list output_dims_data, - std::initializer_list expected_output_data, +template +void ValidatePackGoldens(TfLiteTensor* tensors, int tensors_size, + TfLitePackParams params, TfLiteIntArray* inputs_array, + TfLiteIntArray* outputs_array, const T* golden, + int output_len, float tolerance, T* output) { + // Place a unique value in the uninitialized output buffer. + for (int i = 0; i < output_len; ++i) { + output[i] = 23; + } + + const TfLiteRegistration registration = tflite::ops::micro::Register_PACK(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, reinterpret_cast(¶ms), + micro_test::reporter); + + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); + + for (int i = 0; i < output_len; ++i) { + TF_LITE_MICRO_EXPECT_NEAR(golden[i], output[i], tolerance); + } +} + +void TestPackTwoInputsFloat(const int* input1_dims_data, + const float* input1_data, + const int* input2_dims_data, + const float* input2_data, int axis, + const int* output_dims_data, + const float* expected_output_data, float* output_data) { - TfLiteIntArray* input1_dims = IntArrayFromInitializer(input1_dims_data); - TfLiteIntArray* input2_dims = IntArrayFromInitializer(input2_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); + TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); const int output_dims_count = ElementCount(*output_dims); constexpr int input_size = 2; @@ -43,11 +66,6 @@ void TestPackTwoInputsFloat(std::initializer_list input1_dims_data, CreateFloatTensor(input2_data, input2_dims), CreateFloatTensor(output_data, output_dims)}; - // Place a unique value in the uninitialized output buffer. - for (int i = 0; i < output_dims_count; ++i) { - output_data[i] = 23; - } - TfLitePackParams builtin_data = { .values_count = 2, .axis = axis, @@ -57,34 +75,21 @@ void TestPackTwoInputsFloat(std::initializer_list input1_dims_data, int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - const TfLiteRegistration registration = tflite::ops::micro::Register_PACK(); - micro::KernelRunner runner( - registration, tensors, tensors_size, inputs_array, outputs_array, - reinterpret_cast(&builtin_data), micro_test::reporter); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); - - for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], - 1e-5f); - } + ValidatePackGoldens(tensors, tensors_size, builtin_data, inputs_array, + outputs_array, expected_output_data, output_dims_count, + 1e-5f, output_data); } -void TestPackThreeInputsFloat(std::initializer_list input1_dims_data, - std::initializer_list input1_data, - std::initializer_list input2_dims_data, - std::initializer_list input2_data, - std::initializer_list input3_dims_data, - std::initializer_list input3_data, - int axis, - std::initializer_list output_dims_data, - std::initializer_list expected_output_data, - float* output_data) { - TfLiteIntArray* input1_dims = IntArrayFromInitializer(input1_dims_data); - TfLiteIntArray* input2_dims = IntArrayFromInitializer(input2_dims_data); - TfLiteIntArray* input3_dims = IntArrayFromInitializer(input3_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); +void TestPackThreeInputsFloat( + const int* input1_dims_data, const float* input1_data, + const int* input2_dims_data, const float* input2_data, + const int* input3_dims_data, const float* input3_data, int axis, + const int* output_dims_data, const float* expected_output_data, + float* output_data) { + TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); + TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); + TfLiteIntArray* input3_dims = IntArrayFromInts(input3_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); const int output_dims_count = ElementCount(*output_dims); constexpr int input_size = 3; @@ -96,95 +101,67 @@ void TestPackThreeInputsFloat(std::initializer_list input1_dims_data, CreateFloatTensor(input3_data, input3_dims), CreateFloatTensor(output_data, output_dims)}; - // Place a unique value in the uninitialized output buffer. - for (int i = 0; i < output_dims_count; ++i) { - output_data[i] = 23; - } - TfLitePackParams builtin_data = { .values_count = 3, .axis = axis, }; - int inputs_array_data[] = {3, 0, 1, 2}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 3}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - const TfLiteRegistration registration = tflite::ops::micro::Register_PACK(); - micro::KernelRunner runner( - registration, tensors, tensors_size, inputs_array, outputs_array, - reinterpret_cast(&builtin_data), micro_test::reporter); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); - - for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], - 1e-5f); - } + ValidatePackGoldens(tensors, tensors_size, builtin_data, inputs_array, + outputs_array, expected_output_data, output_dims_count, + 1e-5f, output_data); } -void TestPackTwoInputsQuantized( - std::initializer_list input1_dims_data, - std::initializer_list input1_data, - std::initializer_list input2_dims_data, - std::initializer_list input2_data, int axis, - std::initializer_list output_dims_data, - std::initializer_list expected_output_data, uint8_t* output_data) { - TfLiteIntArray* input1_dims = IntArrayFromInitializer(input1_dims_data); - TfLiteIntArray* input2_dims = IntArrayFromInitializer(input2_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); +void TestPackTwoInputsQuantized(const int* input1_dims_data, + const uint8_t* input1_data, + const int* input2_dims_data, + const uint8_t* input2_data, int axis, + const int* output_dims_data, + const uint8_t* expected_output_data, + uint8_t* output_data) { + TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); + TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); const int output_dims_count = ElementCount(*output_dims); constexpr int input_size = 2; constexpr int output_size = 1; constexpr int tensors_size = input_size + output_size; TfLiteTensor tensors[tensors_size] = { - // CreateQuantizedTensor needs min/max values as input, but these values - // don't matter as to the functionality of PACK, so just set as 0 and 10. - CreateQuantizedTensor(input1_data, input1_dims, 0, 10), - CreateQuantizedTensor(input2_data, input2_dims, 0, 10), - CreateQuantizedTensor(output_data, output_dims, 0, 10)}; - - // Place a unique value in the uninitialized output buffer. - for (int i = 0; i < output_dims_count; ++i) { - output_data[i] = 23; - } + // CreateQuantizedTensor needs scale/zero_point values as input, but these + // values don't matter as to the functionality of PACK, so just set as 1.0 + // and 128. + CreateQuantizedTensor(input1_data, input1_dims, 1.0, 128), + CreateQuantizedTensor(input2_data, input2_dims, 1.0, 128), + CreateQuantizedTensor(output_data, output_dims, 1.0, 128)}; TfLitePackParams builtin_data = { .values_count = 2, .axis = axis, }; - int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - const TfLiteRegistration registration = tflite::ops::micro::Register_PACK(); - micro::KernelRunner runner( - registration, tensors, tensors_size, inputs_array, outputs_array, - reinterpret_cast(&builtin_data), micro_test::reporter); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); - - for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]); - } + ValidatePackGoldens(tensors, tensors_size, builtin_data, inputs_array, + outputs_array, expected_output_data, output_dims_count, + 1e-5f, output_data); } -void TestPackTwoInputsQuantized32( - std::initializer_list input1_dims_data, - std::initializer_list input1_data, - std::initializer_list input2_dims_data, - std::initializer_list input2_data, int axis, - std::initializer_list output_dims_data, - std::initializer_list expected_output_data, int32_t* output_data) { - TfLiteIntArray* input1_dims = IntArrayFromInitializer(input1_dims_data); - TfLiteIntArray* input2_dims = IntArrayFromInitializer(input2_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); +void TestPackTwoInputsQuantized32(const int* input1_dims_data, + const int32_t* input1_data, + const int* input2_dims_data, + const int32_t* input2_data, int axis, + const int* output_dims_data, + const int32_t* expected_output_data, + int32_t* output_data) { + TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); + TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); const int output_dims_count = ElementCount(*output_dims); constexpr int input_size = 2; @@ -195,32 +172,18 @@ void TestPackTwoInputsQuantized32( CreateQuantized32Tensor(input2_data, input2_dims, 1.0), CreateQuantized32Tensor(output_data, output_dims, 1.0)}; - // Place a unique value in the uninitialized output buffer. - for (int i = 0; i < output_dims_count; ++i) { - output_data[i] = 23; - } - TfLitePackParams builtin_data = { .values_count = 2, .axis = axis, }; - int inputs_array_data[] = {2, 0, 1}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 2}; TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - const TfLiteRegistration registration = tflite::ops::micro::Register_PACK(); - micro::KernelRunner runner( - registration, tensors, tensors_size, inputs_array, outputs_array, - reinterpret_cast(&builtin_data), micro_test::reporter); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); - - for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]); - } + ValidatePackGoldens(tensors, tensors_size, builtin_data, inputs_array, + outputs_array, expected_output_data, output_dims_count, + 1e-5f, output_data); } } // namespace testing @@ -229,99 +192,96 @@ void TestPackTwoInputsQuantized32( TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(PackFloatThreeInputs) { + const int input_shape[] = {1, 2}; + const int output_shape[] = {2, 3, 2}; + const float input1_values[] = {1, 4}; + const float input2_values[] = {2, 5}; + const float input3_values[] = {3, 6}; + const float golden[] = {1, 4, 2, 5, 3, 6}; + const int axis = 0; constexpr int output_dims_count = 6; float output_data[output_dims_count]; + tflite::testing::TestPackThreeInputsFloat( - {1, 2}, // Input1 shape - {1, 4}, // Input1 values - {1, 2}, // Input2 shape - {2, 5}, // Input2 values - {1, 2}, // Input3 shape - {3, 6}, // Input3 values - 0, {2, 3, 2}, // Output shape - { - 1, 4, 2, 5, 3, 6 // Output values - }, - output_data); + input_shape, input1_values, input_shape, input2_values, input_shape, + input3_values, axis, output_shape, golden, output_data); } TF_LITE_MICRO_TEST(PackFloatThreeInputsDifferentAxis) { + const int input_shape[] = {1, 2}; + const int output_shape[] = {2, 2, 3}; + const float input1_values[] = {1, 4}; + const float input2_values[] = {2, 5}; + const float input3_values[] = {3, 6}; + const float golden[] = {1, 2, 3, 4, 5, 6}; + const int axis = 1; constexpr int output_dims_count = 6; float output_data[output_dims_count]; + tflite::testing::TestPackThreeInputsFloat( - {1, 2}, // Input1 shape - {1, 4}, // Input1 values - {1, 2}, // Input2 shape - {2, 5}, // Input2 values - {1, 2}, // Input3 shape - {3, 6}, // Input3 values - 1, {2, 2, 3}, // Output shape - { - 1, 2, 3, 4, 5, 6 // Output values - }, - output_data); + input_shape, input1_values, input_shape, input2_values, input_shape, + input3_values, axis, output_shape, golden, output_data); } TF_LITE_MICRO_TEST(PackFloatThreeInputsNegativeAxis) { + const int input_shape[] = {1, 2}; + const int output_shape[] = {2, 2, 3}; + const float input1_values[] = {1, 4}; + const float input2_values[] = {2, 5}; + const float input3_values[] = {3, 6}; + const float golden[] = {1, 2, 3, 4, 5, 6}; + const int axis = -1; constexpr int output_dims_count = 6; float output_data[output_dims_count]; + tflite::testing::TestPackThreeInputsFloat( - {1, 2}, // Input1 shape - {1, 4}, // Input1 values - {1, 2}, // Input2 shape - {2, 5}, // Input2 values - {1, 2}, // Input3 shape - {3, 6}, // Input3 values - -1, {2, 2, 3}, // Output shape - { - 1, 2, 3, 4, 5, 6 // Output values - }, - output_data); + input_shape, input1_values, input_shape, input2_values, input_shape, + input3_values, axis, output_shape, golden, output_data); } TF_LITE_MICRO_TEST(PackFloatMultilDimensions) { + const int input_shape[] = {2, 2, 3}; + const int output_shape[] = {3, 2, 2, 3}; + const float input1_values[] = {1, 2, 3, 4, 5, 6}; + const float input2_values[] = {7, 8, 9, 10, 11, 12}; + const float golden[] = {1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}; + const int axis = 1; constexpr int output_dims_count = 12; float output_data[output_dims_count]; - tflite::testing::TestPackTwoInputsFloat( - {2, 2, 3}, // Input1 shape - {1, 2, 3, 4, 5, 6}, // Input1 values - {2, 2, 3}, // Input2 shape - {7, 8, 9, 10, 11, 12}, // Input2 values - 1, {3, 2, 2, 3}, // Output shape - { - 1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12 // Output values - }, - output_data); + + tflite::testing::TestPackTwoInputsFloat(input_shape, input1_values, + input_shape, input2_values, axis, + output_shape, golden, output_data); } TF_LITE_MICRO_TEST(PackQuantizedMultilDimensions) { + const int input_shape[] = {2, 2, 3}; + const int output_shape[] = {3, 2, 2, 3}; + const uint8_t input1_values[] = {1, 2, 3, 4, 5, 6}; + const uint8_t input2_values[] = {7, 8, 9, 10, 11, 12}; + const uint8_t golden[] = {1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}; + const int axis = 1; constexpr int output_dims_count = 12; uint8_t output_data[output_dims_count]; + tflite::testing::TestPackTwoInputsQuantized( - {2, 2, 3}, // Input1 shape - {1, 2, 3, 4, 5, 6}, // Input1 values - {2, 2, 3}, // Input2 shape - {7, 8, 9, 10, 11, 12}, // Input2 values - 1, {3, 2, 2, 3}, // Output shape - { - 1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12 // Output values - }, - output_data); + input_shape, input1_values, input_shape, input2_values, axis, + output_shape, golden, output_data); } TF_LITE_MICRO_TEST(PackQuantized32MultilDimensions) { + const int input_shape[] = {2, 2, 3}; + const int output_shape[] = {3, 2, 2, 3}; + const int32_t input1_values[] = {1, 2, 3, 4, 5, 6}; + const int32_t input2_values[] = {7, 8, 9, 10, 11, 12}; + const int32_t golden[] = {1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}; + const int axis = 1; constexpr int output_dims_count = 12; int32_t output_data[output_dims_count]; + tflite::testing::TestPackTwoInputsQuantized32( - {2, 2, 3}, // Input1 shape - {1, 2, 3, 4, 5, 6}, // Input1 values - {2, 2, 3}, // Input2 shape - {7, 8, 9, 10, 11, 12}, // Input2 values - 1, {3, 2, 2, 3}, // Output shape - { - 1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12 // Output values - }, - output_data); + input_shape, input1_values, input_shape, input2_values, axis, + output_shape, golden, output_data); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/kernels/pooling_test.cc b/tensorflow/lite/micro/kernels/pooling_test.cc index ec5eb47d0da..a33f5df6fd4 100644 --- a/tensorflow/lite/micro/kernels/pooling_test.cc +++ b/tensorflow/lite/micro/kernels/pooling_test.cc @@ -25,27 +25,14 @@ namespace tflite { namespace testing { namespace { -void TestAveragePoolingFloat(std::initializer_list input_dims_data, - std::initializer_list input_data, - const int filter_height, const int filter_width, - const int stride_height, const int stride_width, - std::initializer_list expected_output_data, - std::initializer_list output_dims_data, - TfLitePadding padding, - TfLiteFusedActivation activation, - float* output_data) { - TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); - const int output_dims_count = ElementCount(*output_dims); - - constexpr int inputs_size = 1; - constexpr int outputs_size = 1; - constexpr int tensors_size = inputs_size + outputs_size; - TfLiteTensor tensors[tensors_size] = { - CreateFloatTensor(input_data, input_dims), - CreateFloatTensor(output_data, output_dims), - }; - +template +void ValidatePoolingGoldens(TfLiteTensor* tensors, int tensors_size, + const TfLiteRegistration registration, + const int filter_height, const int filter_width, + const int stride_height, const int stride_width, + const T* golden, const int output_length, + TfLitePadding padding, + TfLiteFusedActivation activation, T* output_data) { int inputs_array_data[] = {1, 0}; TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); int outputs_array_data[] = {1, 1}; @@ -59,8 +46,6 @@ void TestAveragePoolingFloat(std::initializer_list input_dims_data, activation, {}}; - const TfLiteRegistration registration = - tflite::ops::micro::Register_AVERAGE_POOL_2D(); micro::KernelRunner runner( registration, tensors, tensors_size, inputs_array, outputs_array, reinterpret_cast(&builtin_data), micro_test::reporter); @@ -68,73 +53,78 @@ void TestAveragePoolingFloat(std::initializer_list input_dims_data, TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); - for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], - 1e-5f); + for (int i = 0; i < output_length; ++i) { + TF_LITE_MICRO_EXPECT_NEAR(golden[i], output_data[i], 1e-5f); } } +void TestAveragePoolFloat(const int* input_dims_data, const float* input_data, + const int filter_height, const int filter_width, + const int stride_height, const int stride_width, + const float* expected_output_data, + const int* output_dims_data, TfLitePadding padding, + TfLiteFusedActivation activation, + float* output_data) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); + const int output_dims_count = ElementCount(*output_dims); + + constexpr int inputs_size = 1; + constexpr int outputs_size = 1; + constexpr int tensors_size = inputs_size + outputs_size; + TfLiteTensor tensors[tensors_size] = { + CreateFloatTensor(input_data, input_dims), + CreateFloatTensor(output_data, output_dims), + }; + + const TfLiteRegistration registration = + tflite::ops::micro::Register_AVERAGE_POOL_2D(); + + ValidatePoolingGoldens(tensors, tensors_size, registration, filter_height, + filter_width, stride_height, stride_width, + expected_output_data, output_dims_count, padding, + activation, output_data); +} + template -void TestAveragePoolingQuantized( - std::initializer_list input_dims_data, - std::initializer_list input_data, const float input_min, - const float input_max, const int filter_height, const int filter_width, +void TestAveragePoolQuantized( + const int* input_dims_data, const T* input_data, const float input_scale, + const int input_zero_point, const int filter_height, const int filter_width, const int stride_height, const int stride_width, - std::initializer_list expected_output_data, - std::initializer_list output_dims_data, float output_min, - float output_max, TfLitePadding padding, TfLiteFusedActivation activation, - T* output_data) { + const T* expected_output_data, const int* output_dims_data, + const float output_scale, const int output_zero_point, + TfLitePadding padding, TfLiteFusedActivation activation, T* output_data) { static_assert(sizeof(T) == 1, "Only int8_t/uint8_t data types allowed."); - TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); const int output_dims_count = ElementCount(*output_dims); constexpr int inputs_size = 1; constexpr int outputs_size = 1; constexpr int tensors_size = inputs_size + outputs_size; TfLiteTensor tensors[tensors_size] = { - CreateQuantizedTensor(input_data, input_dims, input_min, input_max), - CreateQuantizedTensor(output_data, output_dims, output_min, output_max), + CreateQuantizedTensor(input_data, input_dims, input_scale, + input_zero_point), + CreateQuantizedTensor(output_data, output_dims, output_scale, + output_zero_point), }; - int inputs_array_data[] = {1, 0}; - TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - int outputs_array_data[] = {1, 1}; - TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - - TfLitePoolParams builtin_data = {padding, - stride_width, - stride_height, - filter_width, - filter_height, - activation, - {}}; - const TfLiteRegistration registration = tflite::ops::micro::Register_AVERAGE_POOL_2D(); - micro::KernelRunner runner( - registration, tensors, tensors_size, inputs_array, outputs_array, - reinterpret_cast(&builtin_data), micro_test::reporter); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); - - for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], - 1e-5f); - } + ValidatePoolingGoldens(tensors, tensors_size, registration, filter_height, + filter_width, stride_height, stride_width, + expected_output_data, output_dims_count, padding, + activation, output_data); } -void TestMaxPoolFloat(std::initializer_list input_dims_data, - std::initializer_list input_data, int filter_width, - int filter_height, int stride_width, int stride_height, - std::initializer_list expected_output_data, - std::initializer_list output_dims_data, - TfLitePadding padding, TfLiteFusedActivation activation, - float* output_data) { - TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); +void TestMaxPoolFloat(const int* input_dims_data, const float* input_data, + int filter_width, int filter_height, int stride_width, + int stride_height, const float* expected_output_data, + const int* output_dims_data, TfLitePadding padding, + TfLiteFusedActivation activation, float* output_data) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); const int output_dims_count = ElementCount(*output_dims); constexpr int inputs_size = 1; @@ -145,83 +135,43 @@ void TestMaxPoolFloat(std::initializer_list input_dims_data, CreateFloatTensor(output_data, output_dims), }; - int inputs_array_data[] = {1, 0}; - TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - int outputs_array_data[] = {1, 1}; - TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - - TfLitePoolParams builtin_data = {padding, - stride_width, - stride_height, - filter_width, - filter_height, - activation, - {}}; - const TfLiteRegistration registration = tflite::ops::micro::Register_MAX_POOL_2D(); - micro::KernelRunner runner( - registration, tensors, tensors_size, inputs_array, outputs_array, - reinterpret_cast(&builtin_data), micro_test::reporter); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); - - for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], - 1e-5f); - } + ValidatePoolingGoldens(tensors, tensors_size, registration, filter_height, + filter_width, stride_height, stride_width, + expected_output_data, output_dims_count, padding, + activation, output_data); } template -void TestMaxPoolQuantized(std::initializer_list input_dims_data, - std::initializer_list input_data, float input_min, - float input_max, int filter_width, int filter_height, - int stride_width, int stride_height, - std::initializer_list expected_output_data, - float output_min, float output_max, - std::initializer_list output_dims_data, - TfLitePadding padding, +void TestMaxPoolQuantized(const int* input_dims_data, const T* input_data, + const float input_scale, const int input_zero_point, + const int filter_height, const int filter_width, + const int stride_height, const int stride_width, + const T* expected_output_data, + const int* output_dims_data, const float output_scale, + const int output_zero_point, TfLitePadding padding, TfLiteFusedActivation activation, T* output_data) { - static_assert(sizeof(T) == 1, "Only int8_t/uint8_t data types allowed."); - - TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); const int output_dims_count = ElementCount(*output_dims); constexpr int inputs_size = 1; constexpr int outputs_size = 1; constexpr int tensors_size = inputs_size + outputs_size; TfLiteTensor tensors[tensors_size] = { - CreateQuantizedTensor(input_data, input_dims, input_min, input_max), - CreateQuantizedTensor(output_data, output_dims, output_min, output_max), + CreateQuantizedTensor(input_data, input_dims, input_scale, + input_zero_point), + CreateQuantizedTensor(output_data, output_dims, output_scale, + output_zero_point), }; - int inputs_array_data[] = {1, 0}; - TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - int outputs_array_data[] = {1, 1}; - TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - - TfLitePoolParams builtin_data = {padding, - stride_width, - stride_height, - filter_width, - filter_height, - activation, - {}}; - const TfLiteRegistration registration = tflite::ops::micro::Register_MAX_POOL_2D(); - micro::KernelRunner runner( - registration, tensors, tensors_size, inputs_array, outputs_array, - reinterpret_cast(&builtin_data), micro_test::reporter); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); - - for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]); - } + ValidatePoolingGoldens(tensors, tensors_size, registration, filter_height, + filter_width, stride_height, stride_width, + expected_output_data, output_dims_count, padding, + activation, output_data); } } // namespace @@ -232,798 +182,535 @@ void TestMaxPoolQuantized(std::initializer_list input_dims_data, TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(SimpleAveragePoolTestFloat) { + const int input_shape[] = {4, 1, 2, 4, 1}; + const float input_values[] = {0, 6, 2, 4, 3, 2, 10, 7}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 2; + const int stride_height = 2; + const float golden[] = {2.75, 5.75}; + const int output_shape[] = {4, 1, 1, 2, 1}; float output_data[2]; - tflite::testing::TestAveragePoolingFloat({4, 1, 2, 4, 1}, // Input shape - { // Input values - 0., 6., 2., 4., 3., 2., 10., 7.}, - 2, 2, // filter width, filter height - 2, 2, // stride width, stride height - { - // Output values - 2.75, - 5.75, - }, - {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActNone, - output_data); + tflite::testing::TestAveragePoolFloat( + input_shape, input_values, filter_height, filter_width, stride_height, + stride_width, golden, output_shape, kTfLitePaddingValid, kTfLiteActNone, + output_data); } TF_LITE_MICRO_TEST(SimpleAveragePoolTestUint8) { - using tflite::testing::F2Q; - - const float input_min = -15.9375; - const float input_max = 15.9375; - const float output_min = -15.9375; - const float output_max = 15.9375; + const int input_shape[] = {4, 1, 2, 4, 1}; + const uint8_t input_values[] = {0, 24, 8, 16, 12, 8, 40, 28}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 2; + const int stride_height = 2; + const uint8_t golden[] = {11, 23}; + const int output_shape[] = {4, 1, 1, 2, 1}; uint8_t output_data[2]; - tflite::testing::TestAveragePoolingQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2Q(0., input_min, input_max), - F2Q(-6., input_min, input_max), - F2Q(2., input_min, input_max), - F2Q(4., input_min, input_max), - F2Q(3., input_min, input_max), - F2Q(2., input_min, input_max), - F2Q(-10., input_min, input_max), - F2Q(7., input_min, input_max), - }, - input_min, input_max, // input quantization range - 2, 2, // filter width, filter height - 2, 2, // stride width, stride height - { - // Output values - F2Q(0., output_min, output_max), - F2Q(0.75, output_min, output_max), - }, - {4, 1, 1, 2, 1}, // Output shape - output_min, output_max, // output quantization range - kTfLitePaddingValid, kTfLiteActRelu, output_data); + + const float input_scale = 0.25; + const int input_zero_point = 0; + const float output_scale = .25; + const int output_zero_point = 0; + tflite::testing::TestAveragePoolQuantized( + input_shape, input_values, input_scale, input_zero_point, filter_height, + filter_width, stride_height, stride_width, golden, output_shape, + output_scale, output_zero_point, kTfLitePaddingValid, kTfLiteActNone, + output_data); } TF_LITE_MICRO_TEST(SimpleAveragePoolTestInt8PaddingValidStride2ActNone) { - using tflite::testing::F2QS; - - const float input_min = -15.9375; - const float input_max = 15.8130; - const float output_min = -15.9375; - const float output_max = 15.8130; + const int input_shape[] = {4, 1, 2, 4, 1}; + const int8_t input_values[] = {0, -24, 8, 16, 12, 8, -40, 28}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 2; + const int stride_height = 2; + const int8_t golden[] = {-1, 3}; + const int output_shape[] = {4, 1, 1, 2, 1}; int8_t output_data[2]; - tflite::testing::TestAveragePoolingQuantized( - {4, 1, 2, 4, 1}, // Input shape - { // Input values - F2QS(0., input_min, input_max), F2QS(-6., input_min, input_max), - F2QS(2., input_min, input_max), F2QS(4., input_min, input_max), - F2QS(3., input_min, input_max), F2QS(2., input_min, input_max), - F2QS(-10., input_min, input_max), F2QS(7., input_min, input_max)}, - input_min, input_max, // input quantization range - 2, 2, // filter height, filter width - 2, 2, // stride height, stride width - { // Output values - F2QS(-0.25, output_min, output_max), F2QS(0.75, output_min, output_max)}, - {4, 1, 1, 2, 1}, // Output shape - output_min, output_max, // output quantization range - kTfLitePaddingValid, kTfLiteActNone, output_data); + + const float input_scale = .25; + const int input_zero_point = 0; + const float output_scale = .25; + const int output_zero_point = 0; + tflite::testing::TestAveragePoolQuantized( + input_shape, input_values, input_scale, input_zero_point, filter_height, + filter_width, stride_height, stride_width, golden, output_shape, + output_scale, output_zero_point, kTfLitePaddingValid, kTfLiteActNone, + output_data); } TF_LITE_MICRO_TEST(SimpleAveragePoolTestInt8PaddingValidStride1Stride2Relu) { - using tflite::testing::F2QS; - - const float input_min = -15.9375; - const float input_max = 15.8130; - const float output_min = -15.9375; - const float output_max = 15.8130; + const int input_shape[] = {4, 1, 2, 4, 1}; + const int8_t input_values[] = {0, -24, 8, 16, 12, 8, -40, 28}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 1; + const int stride_height = 2; + const int8_t golden[] = {0, 0, 3}; + const int output_shape[] = {4, 1, 1, 3, 1}; int8_t output_data[3]; - tflite::testing::TestAveragePoolingQuantized( - {4, 1, 2, 4, 1}, // Input shape - { // Input values - F2QS(0., input_min, input_max), F2QS(-6., input_min, input_max), - F2QS(2., input_min, input_max), F2QS(4., input_min, input_max), - F2QS(3., input_min, input_max), F2QS(2., input_min, input_max), - F2QS(-10., input_min, input_max), F2QS(7., input_min, input_max)}, - input_min, input_max, // input quantization range - 2, 2, // filter height, filter width - 2, 1, // stride height, stride width - { // Output values - F2QS(0., output_min, output_max), F2QS(0., output_min, output_max), - F2QS(0.75, output_min, output_max)}, - {4, 1, 1, 3, 1}, // Output shape - output_min, output_max, // output quantization range - kTfLitePaddingValid, kTfLiteActRelu, output_data); + + const float input_scale = .25; + const int input_zero_point = 0; + const float output_scale = .25; + const int output_zero_point = 0; + tflite::testing::TestAveragePoolQuantized( + input_shape, input_values, input_scale, input_zero_point, filter_height, + filter_width, stride_height, stride_width, golden, output_shape, + output_scale, output_zero_point, kTfLitePaddingValid, kTfLiteActRelu, + output_data); } TF_LITE_MICRO_TEST( SimpleAveragePoolTestInt8PaddingValidStride2Stride1ReluN1To1) { - using tflite::testing::F2QS; - - const float input_min = -15.9375; - const float input_max = 15.8130; - const float output_min = -15.9375; - const float output_max = 15.8130; + const int input_shape[] = {4, 1, 2, 4, 1}; + const int8_t input_values[] = {0, -24, 8, 16, 12, 8, -40, 28}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 2; + const int stride_height = 1; + const int8_t golden[] = {-1, 3}; + const int output_shape[] = {4, 1, 1, 2, 1}; int8_t output_data[2]; - tflite::testing::TestAveragePoolingQuantized( - {4, 1, 2, 4, 1}, // Input shape - { // Input values - F2QS(0., input_min, input_max), F2QS(-6., input_min, input_max), - F2QS(2., input_min, input_max), F2QS(4., input_min, input_max), - F2QS(3., input_min, input_max), F2QS(2., input_min, input_max), - F2QS(-10., input_min, input_max), F2QS(7., input_min, input_max)}, - input_min, input_max, // input quantization range - 2, 2, // filter height, filter width - 1, 2, // stride height, stride width - { // Output values - F2QS(-0.25, output_min, output_max), F2QS(0.75, output_min, output_max)}, - {4, 1, 1, 2, 1}, // Output shape - output_min, output_max, // output quantization range - kTfLitePaddingValid, kTfLiteActReluN1To1, output_data); + + const float input_scale = .25; + const int input_zero_point = 0; + const float output_scale = .25; + const int output_zero_point = 0; + tflite::testing::TestAveragePoolQuantized( + input_shape, input_values, input_scale, input_zero_point, filter_height, + filter_width, stride_height, stride_width, golden, output_shape, + output_scale, output_zero_point, kTfLitePaddingValid, kTfLiteActReluN1To1, + output_data); } TF_LITE_MICRO_TEST(SimpleAveragePoolTestInt8PaddingValidStride2Relu6) { - using tflite::testing::F2QS; - - const float input_min = -15.9375; - const float input_max = 15.8130; - const float output_min = -15.9375; - const float output_max = 15.8130; + const int input_shape[] = {4, 1, 2, 4, 1}; + const int8_t input_values[] = {12, -24, 32, 16, 12, 8, 40, 28}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 2; + const int stride_height = 2; + const int8_t golden[] = {2, 24}; + const int output_shape[] = {4, 1, 1, 2, 1}; int8_t output_data[2]; - tflite::testing::TestAveragePoolingQuantized( - {4, 1, 2, 4, 1}, // Input shape - { // Input values - F2QS(3., input_min, input_max), F2QS(-6., input_min, input_max), - F2QS(8., input_min, input_max), F2QS(4., input_min, input_max), - F2QS(3., input_min, input_max), F2QS(2., input_min, input_max), - F2QS(10., input_min, input_max), F2QS(7., input_min, input_max)}, - input_min, input_max, // input quantization range - 2, 2, // filter height, filter width - 2, 2, // stride height, stride width - { // Output values - F2QS(0.5, output_min, output_max), F2QS(6., output_min, output_max)}, - {4, 1, 1, 2, 1}, // Output shape - output_min, output_max, // output quantization range - kTfLitePaddingValid, kTfLiteActRelu6, output_data); + + const float input_scale = .25; + const int input_zero_point = 0; + const float output_scale = .25; + const int output_zero_point = 0; + tflite::testing::TestAveragePoolQuantized( + input_shape, input_values, input_scale, input_zero_point, filter_height, + filter_width, stride_height, stride_width, golden, output_shape, + output_scale, output_zero_point, kTfLitePaddingValid, kTfLiteActRelu6, + output_data); } TF_LITE_MICRO_TEST(SimpleAveragePoolTestInt8PaddingSameStride1ActNone) { - using tflite::testing::F2QS; - - const float input_min = -15.9375; - const float input_max = 15.8130; - const float output_min = -15.9375; - const float output_max = 15.8130; + const int input_shape[] = {4, 1, 2, 4, 1}; + const int8_t input_values[] = {12, -24, 32, 16, 12, 8, 40, 28}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 1; + const int stride_height = 1; + const int8_t golden[] = {2, 14, 29, 22, 10, 24, 34, 28}; + const int output_shape[] = {4, 1, 2, 4, 1}; int8_t output_data[8]; - tflite::testing::TestAveragePoolingQuantized( - {4, 1, 2, 4, 1}, // Input shape - { // Input values - F2QS(3., input_min, input_max), F2QS(-6., input_min, input_max), - F2QS(8., input_min, input_max), F2QS(4., input_min, input_max), - F2QS(3., input_min, input_max), F2QS(2., input_min, input_max), - F2QS(10., input_min, input_max), F2QS(7., input_min, input_max)}, - input_min, input_max, // input quantization range - 2, 2, // filter height, filter width - 1, 1, // stride height, stride width - { // Output values - F2QS(0.5, output_min, output_max), F2QS(3.5, output_min, output_max), - F2QS(7.25, output_min, output_max), F2QS(5.5, output_min, output_max), - F2QS(2.5, output_min, output_max), F2QS(6., output_min, output_max), - F2QS(8.5, output_min, output_max), F2QS(7., output_min, output_max)}, - {4, 1, 2, 4, 1}, // Output shape - output_min, output_max, // output quantization range - kTfLitePaddingSame, kTfLiteActNone, output_data); + + const float input_scale = .25; + const int input_zero_point = 0; + const float output_scale = .25; + const int output_zero_point = 0; + tflite::testing::TestAveragePoolQuantized( + input_shape, input_values, input_scale, input_zero_point, filter_height, + filter_width, stride_height, stride_width, golden, output_shape, + output_scale, output_zero_point, kTfLitePaddingValid, kTfLiteActNone, + output_data); } TF_LITE_MICRO_TEST(SimpleMaxPoolTestFloat) { + const int input_shape[] = {4, 1, 2, 4, 1}; + const float input_values[] = {0, 6, 2, 4, 3, 2, 10, 7}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 2; + const int stride_height = 2; + const float golden[] = {6, 10}; + const int output_shape[] = {4, 1, 1, 2, 1}; float output_data[2]; - tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape - { // Input values - 0, 6, 2, 4, 3, 2, 10, 7}, - 2, 2, // filter width, filter height - 2, 2, // stride width, stride height - { - // Output values - 6, - 10, - }, - {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActNone, - output_data); + tflite::testing::TestMaxPoolFloat(input_shape, input_values, filter_height, + filter_width, stride_height, stride_width, + golden, output_shape, kTfLitePaddingValid, + kTfLiteActNone, output_data); } TF_LITE_MICRO_TEST(SimpleMaxPoolTestFloatRelu) { + const int input_shape[] = {4, 1, 2, 4, 1}; + const float input_values[] = {-1, -6, 2, 4, -3, -2, 10.5, 7}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 2; + const int stride_height = 2; + const float golden[] = {0, 10.5}; + const int output_shape[] = {4, 1, 1, 2, 1}; float output_data[2]; - tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape - { - // Input values - -1, -6, 2, 4, // - -3, -2, 10.5, 7, // - }, - 2, 2, // filter width, filter height - 2, 2, // stride width, stride height - { - // Output values - 0.0, - 10.5, - }, - {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu, - output_data); + tflite::testing::TestMaxPoolFloat(input_shape, input_values, filter_height, + filter_width, stride_height, stride_width, + golden, output_shape, kTfLitePaddingValid, + kTfLiteActRelu, output_data); } TF_LITE_MICRO_TEST(SimpleMaxPoolTestFloatReluN1To1) { + const int input_shape[] = {4, 1, 2, 4, 1}; + const float input_values1[] = {-2.75, -6, 0.2, 0.4, -3, -2, -0.3, 0.7}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 2; + const int stride_height = 2; + const float golden1[] = {-1.0, 0.7}; + const int output_shape[] = {4, 1, 1, 2, 1}; float output_data[2]; - tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape - { - // Input values - -2.75, -6, 0.2, 0.4, // - -3, -2, -0.3, 0.7, // - }, - 2, 2, // filter width, filter height - 2, 2, // stride width, stride height - { - // Output values - -1.0, - 0.7, - }, - {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActReluN1To1, - output_data); + tflite::testing::TestMaxPoolFloat(input_shape, input_values1, filter_height, + filter_width, stride_height, stride_width, + golden1, output_shape, kTfLitePaddingValid, + kTfLiteActReluN1To1, output_data); - tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape - { - // Input values - -2.75, -6, -2, -4, // - -3, -2, 10, -7, // - }, - 2, 2, // filter width, filter height - 2, 2, // stride width, stride height - { - // Output values - -1.0, - 1.0, - }, - {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActReluN1To1, - output_data); + const float input_values2[] = {-2.75, -6, -2, -4, -3, -2, 10, -7}; + const float golden2[] = {-1.0, 1.0}; + tflite::testing::TestMaxPoolFloat(input_shape, input_values2, filter_height, + filter_width, stride_height, stride_width, + golden2, output_shape, kTfLitePaddingValid, + kTfLiteActReluN1To1, output_data); } TF_LITE_MICRO_TEST(SimpleMaxPoolTestFloatRelu6) { + const int input_shape[] = {4, 1, 2, 4, 1}; + const float input_values1[] = {-1.5, -6, 12, 4, -3, -2, 10, 7}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 2; + const int stride_height = 2; + const float golden1[] = {0, 6}; + const int output_shape[] = {4, 1, 1, 2, 1}; float output_data[2]; - tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape - { - // Input values - -1.5, -6, 12, 4, // - -3, -2, 10, 7, // - }, - 2, 2, // filter width, filter height - 2, 2, // stride width, stride height - { - // Output values - 0.0, - 6.0, - }, - {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu6, - output_data); + tflite::testing::TestMaxPoolFloat(input_shape, input_values1, filter_height, + filter_width, stride_height, stride_width, + golden1, output_shape, kTfLitePaddingValid, + kTfLiteActRelu6, output_data); - tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape - { - // Input values - 0, 4.5, 12, 4, // - 3, 2, 10, 7, // - }, - 2, 2, // filter width, filter height - 2, 2, // stride width, stride height - { - // Output values - 4.5, - 6.0, - }, - {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu6, - output_data); + const float input_values2[] = {0, 4.5, 12, 4, 3, 2, 10, 7}; + const float golden2[] = {4.5, 6}; + tflite::testing::TestMaxPoolFloat(input_shape, input_values2, filter_height, + filter_width, stride_height, stride_width, + golden2, output_shape, kTfLitePaddingValid, + kTfLiteActRelu6, output_data); } TF_LITE_MICRO_TEST(SimpleMaxPoolTestPaddingSameStride1) { + const int input_shape[] = {4, 1, 2, 4, 1}; + const float input_values[] = {0, 6, 2, 4, 3, 2, 10, 7}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 1; + const int stride_height = 1; + const float golden[] = {6, 10, 10, 7, 3, 10, 10, 7}; + const int output_shape[] = {4, 1, 2, 4, 1}; float output_data[8]; - tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape - { - // Input values - 0, 6, 2, 4, // - 3, 2, 10, 7, // - }, - 2, 2, // filter width, filter height - 1, 1, // stride width, stride height - { - // Output values - 6, 10, 10, 7, // - 3, 10, 10, 7, // - }, - {4, 1, 2, 4, 1}, // Output shape - kTfLitePaddingSame, kTfLiteActNone, - output_data); + tflite::testing::TestMaxPoolFloat(input_shape, input_values, filter_height, + filter_width, stride_height, stride_width, + golden, output_shape, kTfLitePaddingSame, + kTfLiteActNone, output_data); } TF_LITE_MICRO_TEST(SimpleMaxPoolTestPaddingValidStride1) { - float output_data[3]; - tflite::testing::TestMaxPoolFloat({4, 1, 2, 4, 1}, // Input shape - { - // Input values - 0, 6, 2, 4, // - 3, 2, 10, 7, // - }, - 2, 2, // filter width, filter height - 1, 1, // stride width, stride height - { - // Output values - 6, - 10, - 10, - }, - {4, 1, 1, 3, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActNone, - output_data); + const int input_shape[] = {4, 1, 2, 4, 1}; + const float input_values[] = {0, 6, 2, 4, 3, 2, 10, 7}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 1; + const int stride_height = 1; + const float golden[] = {6, 10, 10}; + const int output_shape[] = {4, 1, 1, 3, 1}; + float output_data[8]; + tflite::testing::TestMaxPoolFloat(input_shape, input_values, filter_height, + filter_width, stride_height, stride_width, + golden, output_shape, kTfLitePaddingValid, + kTfLiteActNone, output_data); } TF_LITE_MICRO_TEST(SimpleMaxPoolTestUInt8ActNone) { - using tflite::testing::F2Q; - + const int input_shape[] = {4, 1, 2, 4, 1}; + const uint8_t input_values[] = {0, 12, 4, 8, 6, 4, 20, 14}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 2; + const int stride_height = 2; + const uint8_t golden[] = {12, 20}; + const int output_shape[] = {4, 1, 1, 2, 1}; uint8_t output_data[2]; - float input_min = 0; - float input_max = 15.9375; - float output_min = 0; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 2; - int stride_height = 2; + + const float input_scale = 1.0; + const int input_zero_point = 0; + const float output_scale = 1.0; + const int output_zero_point = 0; tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2Q(0, input_min, input_max), - F2Q(6, input_min, input_max), - F2Q(2, input_min, input_max), - F2Q(4, input_min, input_max), - F2Q(3, input_min, input_max), - F2Q(2, input_min, input_max), - F2Q(10, input_min, input_max), - F2Q(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - {// Output values - F2Q(6, output_min, output_max), F2Q(10, output_min, output_max)}, - output_min, output_max, {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActNone, output_data); + input_shape, input_values, input_scale, input_zero_point, filter_height, + filter_width, stride_height, stride_width, golden, output_shape, + output_scale, output_zero_point, kTfLitePaddingValid, kTfLiteActNone, + output_data); } TF_LITE_MICRO_TEST(MaxPoolTestUInt8ActRelu) { - using tflite::testing::F2Q; - + const int input_shape[] = {4, 1, 2, 4, 1}; + const uint8_t input_values[] = {0, 4, 2, 4, 3, 2, 14, 7}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 2; + const int stride_height = 2; + const uint8_t golden[] = {4, 14}; + const int output_shape[] = {4, 1, 1, 2, 1}; uint8_t output_data[2]; - float input_min = -15.9375; - float input_max = 15.9375; - float output_min = -15.9375; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 2; - int stride_height = 2; + + const float input_scale = 1.0; + const int input_zero_point = 4; + const float output_scale = 1.0; + const int output_zero_point = 4; tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2Q(-1.5, input_min, input_max), - F2Q(-6, input_min, input_max), - F2Q(2, input_min, input_max), - F2Q(4, input_min, input_max), - F2Q(-3, input_min, input_max), - F2Q(-2, input_min, input_max), - F2Q(10, input_min, input_max), - F2Q(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - {// Output values - F2Q(0, output_min, output_max), F2Q(10, output_min, output_max)}, - output_min, output_max, {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu, output_data); + input_shape, input_values, input_scale, input_zero_point, filter_height, + filter_width, stride_height, stride_width, golden, output_shape, + output_scale, output_zero_point, kTfLitePaddingValid, kTfLiteActRelu, + output_data); } TF_LITE_MICRO_TEST(MaxPoolTestUInt8ActReluN1To1) { - using tflite::testing::F2Q; - + const int input_shape[] = {4, 1, 2, 4, 1}; + const uint8_t input_values[] = {0, 4, 2, 4, 3, 2, 14, 7}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 2; + const int stride_height = 2; + const uint8_t golden[] = {3, 5}; + const int output_shape[] = {4, 1, 1, 2, 1}; uint8_t output_data[2]; - float input_min = -15.9375; - float input_max = 15.9375; - float output_min = -15.9375; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 2; - int stride_height = 2; - tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2Q(-1.7, input_min, input_max), - F2Q(-6, input_min, input_max), - F2Q(2, input_min, input_max), - F2Q(4, input_min, input_max), - F2Q(-3, input_min, input_max), - F2Q(-2, input_min, input_max), - F2Q(-10, input_min, input_max), - F2Q(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - {// Output values - F2Q(-1.0, output_min, output_max), F2Q(1.0, output_min, output_max)}, - output_min, output_max, {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActReluN1To1, output_data); + + const float input_scale = 1.0; + const int input_zero_point = 4; + const float output_scale = 1.0; + const int output_zero_point = 4; + tflite::testing::TestAveragePoolQuantized( + input_shape, input_values, input_scale, input_zero_point, filter_height, + filter_width, stride_height, stride_width, golden, output_shape, + output_scale, output_zero_point, kTfLitePaddingValid, kTfLiteActReluN1To1, + output_data); } TF_LITE_MICRO_TEST(MaxPoolTestUInt8ActRelu6) { - using tflite::testing::F2Q; - + const int input_shape[] = {4, 1, 2, 4, 1}; + const uint8_t input_values1[] = {12, 0, 36, 20, 6, 8, 32, 26}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 2; + const int stride_height = 2; + const uint8_t golden1[] = {12, 24}; + const int output_shape[] = {4, 1, 1, 2, 1}; uint8_t output_data[8]; - float input_min = -15.9375; - float input_max = 15.9375; - float output_min = -15.9375; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 2; - int stride_height = 2; - tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2Q(0, input_min, input_max), - F2Q(-6, input_min, input_max), - F2Q(12, input_min, input_max), - F2Q(4, input_min, input_max), - F2Q(-3, input_min, input_max), - F2Q(-2, input_min, input_max), - F2Q(10, input_min, input_max), - F2Q(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - {// Output values - F2Q(0.0, output_min, output_max), F2Q(6.0, output_min, output_max)}, - output_min, output_max, {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu6, output_data); + const float input_scale = 0.5; + const int input_zero_point = 12; + const float output_scale = 0.5; + const int output_zero_point = 12; tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2Q(0, input_min, input_max), - F2Q(4.5, input_min, input_max), - F2Q(12, input_min, input_max), - F2Q(4, input_min, input_max), - F2Q(3, input_min, input_max), - F2Q(2, input_min, input_max), - F2Q(10, input_min, input_max), - F2Q(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - {// Output values - F2Q(4.5, output_min, output_max), F2Q(6.0, output_min, output_max)}, - output_min, output_max, {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu6, output_data); + input_shape, input_values1, input_scale, input_zero_point, filter_height, + filter_width, stride_height, stride_width, golden1, output_shape, + output_scale, output_zero_point, kTfLitePaddingValid, kTfLiteActRelu6, + output_data); + + const uint8_t input_values2[] = {12, 21, 36, 16, 18, 16, 32, 26}; + + const uint8_t golden2[] = {21, 24}; + tflite::testing::TestMaxPoolQuantized( + input_shape, input_values2, input_scale, input_zero_point, filter_height, + filter_width, stride_height, stride_width, golden2, output_shape, + output_scale, output_zero_point, kTfLitePaddingValid, kTfLiteActRelu6, + output_data); } TF_LITE_MICRO_TEST(MaxPoolTestUInt8PaddingSameStride1) { - using tflite::testing::F2Q; - + const int input_shape[] = {4, 1, 2, 4, 1}; + const uint8_t input_values1[] = {0, 6, 2, 4, 3, 2, 10, 7}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 1; + const int stride_height = 1; + const uint8_t golden1[] = {6, 10, 10, 7, 3, 10, 10, 7}; + const int output_shape[] = {4, 1, 2, 4, 1}; uint8_t output_data[8]; - float input_min = 0; - float input_max = 15.9375; - float output_min = 0; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 1; - int stride_height = 1; + + const float input_scale = 1.0; + const int input_zero_point = 0; + const float output_scale = 1.0; + const int output_zero_point = 0; tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2Q(0, input_min, input_max), - F2Q(6, input_min, input_max), - F2Q(2, input_min, input_max), - F2Q(4, input_min, input_max), - F2Q(3, input_min, input_max), - F2Q(2, input_min, input_max), - F2Q(10, input_min, input_max), - F2Q(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - { - // Output values - F2Q(6, output_min, output_max), - F2Q(10, output_min, output_max), - F2Q(10, output_min, output_max), - F2Q(7, output_min, output_max), - F2Q(3, output_min, output_max), - F2Q(10, output_min, output_max), - F2Q(10, output_min, output_max), - F2Q(7, output_min, output_max), - }, - output_min, output_max, {4, 1, 2, 4, 1}, // Output shape - kTfLitePaddingSame, kTfLiteActNone, output_data); + input_shape, input_values1, input_scale, input_zero_point, filter_height, + filter_width, stride_height, stride_width, golden1, output_shape, + output_scale, output_zero_point, kTfLitePaddingValid, kTfLiteActNone, + output_data); } TF_LITE_MICRO_TEST(MaxPoolTestUInt8PaddingValidStride1) { - using tflite::testing::F2Q; - + const int input_shape[] = {4, 1, 2, 4, 1}; + const uint8_t input_values1[] = {0, 6, 2, 4, 3, 2, 10, 7}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 1; + const int stride_height = 1; + const uint8_t golden1[] = {6, 10, 10}; + const int output_shape[] = {4, 1, 1, 3, 1}; uint8_t output_data[3]; - float input_min = 0; - float input_max = 15.9375; - float output_min = 0; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 1; - int stride_height = 1; + + const float input_scale = 1.0; + const int input_zero_point = 0; + const float output_scale = 1.0; + const int output_zero_point = 0; tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2Q(0, input_min, input_max), - F2Q(6, input_min, input_max), - F2Q(2, input_min, input_max), - F2Q(4, input_min, input_max), - F2Q(3, input_min, input_max), - F2Q(2, input_min, input_max), - F2Q(10, input_min, input_max), - F2Q(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - { - // Output values - F2Q(6, output_min, output_max), - F2Q(10, output_min, output_max), - F2Q(10, output_min, output_max), - }, - output_min, output_max, {4, 1, 1, 3, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActNone, output_data); + input_shape, input_values1, input_scale, input_zero_point, filter_height, + filter_width, stride_height, stride_width, golden1, output_shape, + output_scale, output_zero_point, kTfLitePaddingValid, kTfLiteActNone, + output_data); } TF_LITE_MICRO_TEST(SimpleMaxPoolTestInt8ActNone) { - using tflite::testing::F2QS; - + const int input_shape[] = {4, 1, 2, 4, 1}; + const int8_t input_values1[] = {0, 6, 2, 4, 3, 2, 10, 7}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 2; + const int stride_height = 2; + const int8_t golden1[] = {6, 10}; + const int output_shape[] = {4, 1, 1, 2, 1}; int8_t output_data[2]; - float input_min = 0; - float input_max = 15.9375; - float output_min = 0; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 2; - int stride_height = 2; + + const float input_scale = 1.0; + const int input_zero_point = 0; + const float output_scale = 1.0; + const int output_zero_point = 0; tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2QS(0, input_min, input_max), - F2QS(6, input_min, input_max), - F2QS(2, input_min, input_max), - F2QS(4, input_min, input_max), - F2QS(3, input_min, input_max), - F2QS(2, input_min, input_max), - F2QS(10, input_min, input_max), - F2QS(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - {// Output values - F2QS(6, output_min, output_max), F2QS(10, output_min, output_max)}, - output_min, output_max, {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActNone, output_data); + input_shape, input_values1, input_scale, input_zero_point, filter_height, + filter_width, stride_height, stride_width, golden1, output_shape, + output_scale, output_zero_point, kTfLitePaddingValid, kTfLiteActNone, + output_data); } -TF_LITE_MICRO_TEST(MaxPoolTestUInt8ActRelu) { - using tflite::testing::F2QS; - +TF_LITE_MICRO_TEST(MaxPoolTestInt8ActRelu) { + const int input_shape[] = {4, 1, 2, 4, 1}; + const int8_t input_values1[] = {-3, -12, 4, 8, -6, -4, 20, 14}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 2; + const int stride_height = 2; + const int8_t golden1[] = {0, 20}; + const int output_shape[] = {4, 1, 1, 2, 1}; int8_t output_data[2]; - float input_min = -15.9375; - float input_max = 15.9375; - float output_min = -15.9375; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 2; - int stride_height = 2; + + const float input_scale = 0.5; + const int input_zero_point = 0; + const float output_scale = 0.5; + const int output_zero_point = 0; tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2QS(-1.5, input_min, input_max), - F2QS(-6, input_min, input_max), - F2QS(2, input_min, input_max), - F2QS(4, input_min, input_max), - F2QS(-3, input_min, input_max), - F2QS(-2, input_min, input_max), - F2QS(10, input_min, input_max), - F2QS(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - {// Output values - F2QS(0, output_min, output_max), F2QS(10, output_min, output_max)}, - output_min, output_max, {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu, output_data); + input_shape, input_values1, input_scale, input_zero_point, filter_height, + filter_width, stride_height, stride_width, golden1, output_shape, + output_scale, output_zero_point, kTfLitePaddingValid, kTfLiteActRelu, + output_data); } -TF_LITE_MICRO_TEST(MaxPoolTestUInt8ActReluN1To1) { - using tflite::testing::F2QS; - +TF_LITE_MICRO_TEST(MaxPoolTestInt8ActReluN1To1) { + const int input_shape[] = {4, 1, 2, 4, 1}; + const int8_t input_values1[] = {-2, -6, -2, -4, -3, -2, 10, 7}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 2; + const int stride_height = 2; + const int8_t golden1[] = {-1, 1}; + const int output_shape[] = {4, 1, 1, 2, 1}; int8_t output_data[2]; - float input_min = -15.9375; - float input_max = 15.9375; - float output_min = -15.9375; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 2; - int stride_height = 2; + + const float input_scale = 1.0; + const int input_zero_point = 0; + const float output_scale = 1.0; + const int output_zero_point = 0; tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2QS(-1.7, input_min, input_max), - F2QS(-6, input_min, input_max), - F2QS(2, input_min, input_max), - F2QS(4, input_min, input_max), - F2QS(-3, input_min, input_max), - F2QS(-2, input_min, input_max), - F2QS(-10, input_min, input_max), - F2QS(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - {// Output values - F2QS(-1.0, output_min, output_max), F2QS(1.0, output_min, output_max)}, - output_min, output_max, {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActReluN1To1, output_data); + input_shape, input_values1, input_scale, input_zero_point, filter_height, + filter_width, stride_height, stride_width, golden1, output_shape, + output_scale, output_zero_point, kTfLitePaddingValid, kTfLiteActReluN1To1, + output_data); } -TF_LITE_MICRO_TEST(MaxPoolTestUInt8ActRelu6) { - using tflite::testing::F2QS; +TF_LITE_MICRO_TEST(MaxPoolTestInt8ActRelu6) { + const int input_shape[] = {4, 1, 2, 4, 1}; + const int8_t input_values1[] = {0, -6, 12, 4, -3, -2, 10, 7}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 2; + const int stride_height = 2; + const int8_t golden1[] = {0, 6}; + const int output_shape[] = {4, 1, 1, 2, 1}; + int8_t output_data[2]; - int8_t output_data[8]; - float input_min = -15.9375; - float input_max = 15.9375; - float output_min = -15.9375; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 2; - int stride_height = 2; + const float input_scale = 1.0; + const int input_zero_point = 0; + const float output_scale = 1.0; + const int output_zero_point = 0; tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2QS(0, input_min, input_max), - F2QS(-6, input_min, input_max), - F2QS(12, input_min, input_max), - F2QS(4, input_min, input_max), - F2QS(-3, input_min, input_max), - F2QS(-2, input_min, input_max), - F2QS(10, input_min, input_max), - F2QS(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - {// Output values - F2QS(0.0, output_min, output_max), F2QS(6.0, output_min, output_max)}, - output_min, output_max, {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu6, output_data); - - tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2QS(0, input_min, input_max), - F2QS(4.5, input_min, input_max), - F2QS(12, input_min, input_max), - F2QS(4, input_min, input_max), - F2QS(3, input_min, input_max), - F2QS(2, input_min, input_max), - F2QS(10, input_min, input_max), - F2QS(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - {// Output values - F2QS(4.5, output_min, output_max), F2QS(6.0, output_min, output_max)}, - output_min, output_max, {4, 1, 1, 2, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActRelu6, output_data); + input_shape, input_values1, input_scale, input_zero_point, filter_height, + filter_width, stride_height, stride_width, golden1, output_shape, + output_scale, output_zero_point, kTfLitePaddingValid, kTfLiteActRelu6, + output_data); } TF_LITE_MICRO_TEST(MaxPoolTestUInt8PaddingSameStride1) { - using tflite::testing::F2QS; + const int input_shape[] = {4, 1, 2, 4, 1}; + const uint8_t input_values1[] = {0, 6, 2, 4, 3, 2, 10, 7}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 1; + const int stride_height = 1; + const uint8_t golden1[] = {6, 10, 10, 7, 3, 10, 10, 7}; + const int output_shape[] = {4, 1, 2, 4, 1}; + uint8_t output_data[8]; - int8_t output_data[8]; - float input_min = 0; - float input_max = 15.9375; - float output_min = 0; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 1; - int stride_height = 1; + const float input_scale = 1.0; + const int input_zero_point = 0; + const float output_scale = 1.0; + const int output_zero_point = 0; tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2QS(0, input_min, input_max), - F2QS(6, input_min, input_max), - F2QS(2, input_min, input_max), - F2QS(4, input_min, input_max), - F2QS(3, input_min, input_max), - F2QS(2, input_min, input_max), - F2QS(10, input_min, input_max), - F2QS(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - { - // Output values - F2QS(6, output_min, output_max), - F2QS(10, output_min, output_max), - F2QS(10, output_min, output_max), - F2QS(7, output_min, output_max), - F2QS(3, output_min, output_max), - F2QS(10, output_min, output_max), - F2QS(10, output_min, output_max), - F2QS(7, output_min, output_max), - }, - output_min, output_max, {4, 1, 2, 4, 1}, // Output shape - kTfLitePaddingSame, kTfLiteActNone, output_data); + input_shape, input_values1, input_scale, input_zero_point, filter_height, + filter_width, stride_height, stride_width, golden1, output_shape, + output_scale, output_zero_point, kTfLitePaddingSame, kTfLiteActNone, + output_data); } TF_LITE_MICRO_TEST(MaxPoolTestUInt8PaddingValidStride1) { - using tflite::testing::F2QS; + const int input_shape[] = {4, 1, 2, 4, 1}; + const uint8_t input_values1[] = {0, 6, 2, 4, 3, 2, 10, 7}; + const int filter_width = 2; + const int filter_height = 2; + const int stride_width = 1; + const int stride_height = 1; + const uint8_t golden1[] = {6, 10, 10}; + const int output_shape[] = {4, 1, 1, 3, 1}; + uint8_t output_data[3]; - int8_t output_data[3]; - float input_min = 0; - float input_max = 15.9375; - float output_min = 0; - float output_max = 15.9375; - int filter_width = 2; - int filter_height = 2; - int stride_width = 1; - int stride_height = 1; + const float input_scale = 1.0; + const int input_zero_point = 0; + const float output_scale = 1.0; + const int output_zero_point = 0; tflite::testing::TestMaxPoolQuantized( - {4, 1, 2, 4, 1}, // Input shape - { - // Input values - F2QS(0, input_min, input_max), - F2QS(6, input_min, input_max), - F2QS(2, input_min, input_max), - F2QS(4, input_min, input_max), - F2QS(3, input_min, input_max), - F2QS(2, input_min, input_max), - F2QS(10, input_min, input_max), - F2QS(7, input_min, input_max), - }, - input_min, input_max, filter_width, filter_height, stride_width, - stride_height, - { - // Output values - F2QS(6, output_min, output_max), - F2QS(10, output_min, output_max), - F2QS(10, output_min, output_max), - }, - output_min, output_max, {4, 1, 1, 3, 1}, // Output shape - kTfLitePaddingValid, kTfLiteActNone, output_data); + input_shape, input_values1, input_scale, input_zero_point, filter_height, + filter_width, stride_height, stride_width, golden1, output_shape, + output_scale, output_zero_point, kTfLitePaddingValid, kTfLiteActNone, + output_data); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/kernels/prelu_test.cc b/tensorflow/lite/micro/kernels/prelu_test.cc index 4e352763cec..f559ddff993 100644 --- a/tensorflow/lite/micro/kernels/prelu_test.cc +++ b/tensorflow/lite/micro/kernels/prelu_test.cc @@ -23,16 +23,35 @@ namespace tflite { namespace testing { namespace { -void TestPreluFloat(std::initializer_list input_dims_data, - std::initializer_list input_data, - std::initializer_list alpha_dims_data, - std::initializer_list alpha_data, - std::initializer_list expected_output_data, - std::initializer_list output_dims_data, - float* output_data) { - TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); - TfLiteIntArray* alpha_dims = IntArrayFromInitializer(alpha_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); +template +void ValidatePreluGoldens(TfLiteTensor* tensors, int tensors_size, + const T* golden, const int output_length, + T* output_data) { + int inputs_array_data[] = {2, 0, 1}; + TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); + int outputs_array_data[] = {1, 2}; + TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + + const TfLiteRegistration registration = tflite::ops::micro::Register_PRELU(); + micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, + outputs_array, + /*builtin_data=*/nullptr, micro_test::reporter); + + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); + + for (int i = 0; i < output_length; ++i) { + TF_LITE_MICRO_EXPECT_NEAR(golden[i], output_data[i], 1e-5f); + } +} + +void TestPreluFloat(const int* input_dims_data, const float* input_data, + const int* alpha_dims_data, const float* alpha_data, + const float* expected_output_data, + const int* output_dims_data, float* output_data) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* alpha_dims = IntArrayFromInts(alpha_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); const int output_dims_count = ElementCount(*output_dims); constexpr int inputs_size = 2; constexpr int outputs_size = 1; @@ -43,66 +62,42 @@ void TestPreluFloat(std::initializer_list input_dims_data, CreateFloatTensor(output_data, output_dims), }; - int inputs_array_data[] = {2, 0, 1}; - TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - int outputs_array_data[] = {1, 2}; - TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); - - const TfLiteRegistration registration = tflite::ops::micro::Register_PRELU(); - micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, - outputs_array, - /*builtin_data=*/nullptr, micro_test::reporter); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); - - for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], - 1e-5f); - } + ValidatePreluGoldens(tensors, tensors_size, expected_output_data, + output_dims_count, output_data); } // Template argument T can be either uint8_t or int8_t depending on which type // of quantization required to be tested. template -void TestPreluQuantized(std::initializer_list input_dims_data, - std::initializer_list input_data, float input_min, - float input_max, - std::initializer_list alpha_dims_data, - std::initializer_list alpha_data, float alpha_min, - float alpha_max, - std::initializer_list expected_output_data, - std::initializer_list output_dims_data, - float output_min, float output_max, T* output_data) { - TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data); - TfLiteIntArray* alpha_dims = IntArrayFromInitializer(alpha_dims_data); - TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data); +void TestPreluQuantized(const int* input_dims_data, const float* input_data, + T* input_quantized, const float input_scale, + const int input_zero_point, const int* alpha_dims_data, + const float* alpha_data, T* alpha_quantized, + const float alpha_scale, const int alpha_zero_point, + const float* golden, T* golden_quantized, + const float output_scale, const int output_zero_point, + const int* output_dims_data, T* output_data) { + TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); + TfLiteIntArray* alpha_dims = IntArrayFromInts(alpha_dims_data); + TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); const int output_dims_count = ElementCount(*output_dims); constexpr int inputs_size = 2; constexpr int outputs_size = 1; constexpr int tensors_size = inputs_size + outputs_size; TfLiteTensor tensors[tensors_size] = { - CreateQuantizedTensor(input_data, input_dims, input_min, input_max), - CreateQuantizedTensor(alpha_data, alpha_dims, alpha_min, alpha_max), - CreateQuantizedTensor(output_data, output_dims, output_min, output_max), + CreateQuantizedTensor(input_data, input_quantized, input_dims, + input_scale, input_zero_point), + CreateQuantizedTensor(alpha_data, alpha_quantized, alpha_dims, + alpha_scale, alpha_zero_point), + CreateQuantizedTensor(output_data, output_dims, output_scale, + output_zero_point), }; - int inputs_array_data[] = {2, 0, 1}; - TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data); - int outputs_array_data[] = {1, 2}; - TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data); + AsymmetricQuantize(golden, golden_quantized, output_dims_count, output_scale, + output_zero_point); - const TfLiteRegistration registration = tflite::ops::micro::Register_PRELU(); - micro::KernelRunner runner(registration, tensors, tensors_size, inputs_array, - outputs_array, - /*builtin_data=*/nullptr, micro_test::reporter); - - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.InitAndPrepare()); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, runner.Invoke()); - - for (int i = 0; i < output_dims_count; ++i) { - TF_LITE_MICRO_EXPECT_EQ(expected_output_data.begin()[i], output_data[i]); - } + ValidatePreluGoldens(tensors, tensors_size, golden_quantized, + output_dims_count, output_data); } } // namespace } // namespace testing @@ -111,74 +106,89 @@ void TestPreluQuantized(std::initializer_list input_dims_data, TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(FloatPreluActivationsOpTest) { + const int input_shape[] = {3, 2, 2, 3}; + const float input_values[] = { + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 1.0f, 1.0f, 1.0f, // Row 1, Column 2 + -1.0f, -1.0f, -1.0f, // Row 2, Column 1 + -2.0f, -2.0f, -2.0f, // Row 1, Column 2 + }; + const int alpha_shape[] = {3, 1, 1, 3}; + const float alpha_values[] = {0.0f, 1.0f, 2.0f}; + const int output_shape[] = {3, 2, 2, 3}; + const float golden[] = { + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 1.0f, 1.0f, 1.0f, // Row 1, Column 2 + 0.0f, -1.0f, -2.0f, // Row 2, Column 1 + 0.0f, -2.0f, -4.0f, // Row 1, Column 2 + }; const int output_dims_count = 12; float output_data[output_dims_count]; - tflite::testing::TestPreluFloat({3, 2, 2, 3}, // input shape - { - 0.0f, 0.0f, 0.0f, // Row 1, Column 1 - 1.0f, 1.0f, 1.0f, // Row 1, Column 2 - -1.0f, -1.0f, -1.0f, // Row 2, Column 1 - -2.0f, -2.0f, -2.0f, // Row 1, Column 2 - }, - {3, 1, 1, 3}, // alpha shape - {0.0f, 1.0f, 2.0f}, // alpha values - { - 0.0f, 0.0f, 0.0f, // Row 1, Column 1 - 1.0f, 1.0f, 1.0f, // Row 1, Column 2 - 0.0f, -1.0f, -2.0f, // Row 2, Column 1 - 0.0f, -2.0f, -4.0f, // Row 1, Column 2 - }, - {3, 2, 2, 3}, // output shape + tflite::testing::TestPreluFloat(input_shape, input_values, alpha_shape, + alpha_values, golden, output_shape, output_data); } TF_LITE_MICRO_TEST(QuantizedUint8PreluActivationsOpTest) { - using tflite::testing::F2Q; - const float kMin = -4; - const float kMax = 127.f / 32.f; - const int output_dims_count = 12; - uint8_t output_data[output_dims_count]; + const int input_shape[] = {3, 2, 2, 3}; + const float input_values[] = { + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 0.5f, 0.5f, 0.5f, // Row 1, Column 2 + -1.0f, -1.0f, -1.0f, // Row 2, Column 1 + -0.25f, -0.25f, -0.25f, // Row 1, Column 2 + }; + const int alpha_shape[] = {3, 1, 1, 3}; + const float alpha_values[] = {0.0f, 0.5f, -0.5f}; + const int output_shape[] = {3, 2, 2, 3}; + const float golden[] = { + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 0.5f, 0.5f, 0.5f, // Row 1, Column 2 + 0.0f, -0.5f, 0.5f, // Row 2, Column 1 + 0.0f, -0.125f, 0.125f, // Row 1, Column 2 + }; + + const int dims_count = 12; + + uint8_t input_quantized[dims_count]; + uint8_t alpha_quantized[3]; + uint8_t golden_quantized[dims_count]; + float scale = 0.125; + int zero_point = 127; + uint8_t output_data[dims_count]; + tflite::testing::TestPreluQuantized( - {3, 2, 2, 3}, // input shape - {F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax), - F2Q(0.5f, kMin, kMax), F2Q(0.5f, kMin, kMax), F2Q(0.5f, kMin, kMax), - F2Q(-1.0f, kMin, kMax), F2Q(-1.0f, kMin, kMax), F2Q(-1.0f, kMin, kMax), - F2Q(-0.25f, kMin, kMax), F2Q(-0.25f, kMin, kMax), - F2Q(-0.25f, kMin, kMax)}, - kMin, kMax, {3, 1, 1, 3}, // alpha shape - {F2Q(0.0f, kMin, kMax), F2Q(0.5f, kMin, kMax), F2Q(-0.5f, kMin, kMax)}, - kMin, kMax, - {F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax), F2Q(0.0f, kMin, kMax), - F2Q(0.5f, kMin, kMax), F2Q(0.5f, kMin, kMax), F2Q(0.5f, kMin, kMax), - F2Q(0.0f, kMin, kMax), F2Q(-0.5f, kMin, kMax), F2Q(0.5f, kMin, kMax), - F2Q(0.0f, kMin, kMax), F2Q(-0.125f, kMin, kMax), - F2Q(0.125f, kMin, kMax)}, - {3, 2, 2, 3}, // output shape - kMin, kMax, output_data); + input_shape, input_values, input_quantized, scale, zero_point, + alpha_shape, alpha_values, alpha_quantized, scale, zero_point, golden, + golden_quantized, scale, zero_point, output_shape, output_data); } TF_LITE_MICRO_TEST(QuantizedInt8PreluActivationsOpTest) { - using tflite::testing::F2QS; - const float kMin = -1; - const float kMax = 127.f / 128.f; - const int output_dims_count = 12; - int8_t output_data[output_dims_count]; + const int input_shape[] = {3, 2, 2, 3}; + const float input_values[] = { + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 0.5f, 0.5f, 0.5f, // Row 1, Column 2 + -1.0f, -1.0f, -1.0f, // Row 2, Column 1 + -0.25f, -0.25f, -0.25f, // Row 1, Column 2 + }; + const int alpha_shape[] = {3, 1, 1, 3}; + const float alpha_values[] = {0.0f, 0.5f, -0.5f}; + const int output_shape[] = {3, 2, 2, 3}; + const float golden[] = { + 0.0f, 0.0f, 0.0f, // Row 1, Column 1 + 0.5f, 0.5f, 0.5f, // Row 1, Column 2 + 0.0f, -0.5f, 0.5f, // Row 2, Column 1 + 0.0f, -0.125f, 0.125f, // Row 1, Column 2 + }; + const int dims_count = 12; + int8_t input_quantized[dims_count]; + int8_t alpha_quantized[3]; + int8_t golden_quantized[dims_count]; + float scale = 2.0 / 255.0; + int zero_point = 0; + int8_t output_data[dims_count]; tflite::testing::TestPreluQuantized( - {3, 2, 2, 3}, // input shape - {F2QS(0.0f, kMin, kMax), F2QS(0.0f, kMin, kMax), F2QS(0.0f, kMin, kMax), - F2QS(0.5f, kMin, kMax), F2QS(0.5f, kMin, kMax), F2QS(0.5f, kMin, kMax), - F2QS(-1.0f, kMin, kMax), F2QS(-1.0f, kMin, kMax), - F2QS(-1.0f, kMin, kMax), F2QS(-0.25f, kMin, kMax), - F2QS(-0.25f, kMin, kMax), F2QS(-0.25f, kMin, kMax)}, - kMin, kMax, {3, 1, 1, 3}, // alpha shape - {F2QS(0.0f, kMin, kMax), F2QS(0.5f, kMin, kMax), F2QS(-0.5f, kMin, kMax)}, - kMin, kMax, - {F2QS(0.0f, kMin, kMax), F2QS(0.0f, kMin, kMax), F2QS(0.0f, kMin, kMax), - F2QS(0.5f, kMin, kMax), F2QS(0.5f, kMin, kMax), F2QS(0.5f, kMin, kMax), - F2QS(0.0f, kMin, kMax), F2QS(-0.5f, kMin, kMax), F2QS(0.5f, kMin, kMax), - F2QS(0.0f, kMin, kMax), F2QS(-0.125f, kMin, kMax), - F2QS(0.125f, kMin, kMax)}, - {3, 2, 2, 3}, // output shape - kMin, kMax, output_data); + input_shape, input_values, input_quantized, scale, zero_point, + alpha_shape, alpha_values, alpha_quantized, scale, zero_point, golden, + golden_quantized, scale, zero_point, output_shape, output_data); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/kernels/reshape_test.cc b/tensorflow/lite/micro/kernels/reshape_test.cc index 59d5577774f..91ecbdc7a49 100644 --- a/tensorflow/lite/micro/kernels/reshape_test.cc +++ b/tensorflow/lite/micro/kernels/reshape_test.cc @@ -194,7 +194,7 @@ TF_LITE_MICRO_TEST(ReshapeWithInvalidShapeShouldFail) { int input_dims_data[] = {3, 1, 2, 2}; TfLiteIntArray* input_dims = tflite::testing::IntArrayFromInts(input_dims_data); - auto input_data = {3.0f}; + const float input_data[] = {3.0f}; auto input_tensor = tflite::testing::CreateFloatTensor(input_data, input_dims); float output_data[4]; @@ -315,7 +315,7 @@ TF_LITE_MICRO_TEST(ReshapeWithLegacyScalarOutputShouldSucceed) { int input_dims_data[] = {1, 1}; TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); - auto input_data = {3.0f}; + const float input_data[] = {3.0f}; auto input_tensor = CreateFloatTensor(input_data, input_dims); float output_data[1]; @@ -326,8 +326,9 @@ TF_LITE_MICRO_TEST(ReshapeWithLegacyScalarOutputShouldSucceed) { int shape_dims_data[] = {1, 0}; TfLiteIntArray* shape_dims = IntArrayFromInts(shape_dims_data); - auto shape_tensor = - tflite::testing::CreateTensor({0}, shape_dims); + const int32_t shape_data[] = {0}; + auto shape_tensor = tflite::testing::CreateTensor( + shape_data, shape_dims); const float expected_output_with_shape[] = {}; const int expected_output_with_shape_len = 0; const float expected_output_no_shape[] = {3}; diff --git a/tensorflow/lite/micro/testing/test_utils.cc b/tensorflow/lite/micro/testing/test_utils.cc index 0bb97854a41..4d931bdd33b 100644 --- a/tensorflow/lite/micro/testing/test_utils.cc +++ b/tensorflow/lite/micro/testing/test_utils.cc @@ -150,16 +150,6 @@ void PopulateContext(TfLiteTensor* tensors, int tensors_size, } } -TfLiteTensor CreateFloatTensor(std::initializer_list data, - TfLiteIntArray* dims, bool is_variable) { - return CreateFloatTensor(data.begin(), dims, is_variable); -} - -TfLiteTensor CreateBoolTensor(std::initializer_list data, - TfLiteIntArray* dims, bool is_variable) { - return CreateBoolTensor(data.begin(), dims, is_variable); -} - TfLiteTensor CreateQuantizedTensor(const uint8_t* data, TfLiteIntArray* dims, float min, float max, bool is_variable) { TfLiteTensor result; @@ -174,12 +164,6 @@ TfLiteTensor CreateQuantizedTensor(const uint8_t* data, TfLiteIntArray* dims, return result; } -TfLiteTensor CreateQuantizedTensor(std::initializer_list data, - TfLiteIntArray* dims, float min, float max, - bool is_variable) { - return CreateQuantizedTensor(data.begin(), dims, min, max, is_variable); -} - TfLiteTensor CreateQuantizedTensor(const int8_t* data, TfLiteIntArray* dims, float min, float max, bool is_variable) { TfLiteTensor result; @@ -194,12 +178,6 @@ TfLiteTensor CreateQuantizedTensor(const int8_t* data, TfLiteIntArray* dims, return result; } -TfLiteTensor CreateQuantizedTensor(std::initializer_list data, - TfLiteIntArray* dims, float min, float max, - bool is_variable) { - return CreateQuantizedTensor(data.begin(), dims, min, max, is_variable); -} - TfLiteTensor CreateQuantizedTensor(float* data, uint8_t* quantized_data, TfLiteIntArray* dims, bool is_variable) { TfLiteTensor result; @@ -258,11 +236,5 @@ TfLiteTensor CreateQuantized32Tensor(const int32_t* data, TfLiteIntArray* dims, return result; } -TfLiteTensor CreateQuantized32Tensor(std::initializer_list data, - TfLiteIntArray* dims, float scale, - bool is_variable) { - return CreateQuantized32Tensor(data.begin(), dims, scale, is_variable); -} - } // namespace testing } // namespace tflite diff --git a/tensorflow/lite/micro/testing/test_utils.h b/tensorflow/lite/micro/testing/test_utils.h index 053c4417f52..e83ac806d8a 100644 --- a/tensorflow/lite/micro/testing/test_utils.h +++ b/tensorflow/lite/micro/testing/test_utils.h @@ -17,7 +17,6 @@ limitations under the License. #include #include -#include #include #include "tensorflow/lite/c/common.h" @@ -31,12 +30,6 @@ namespace testing { // Note: These methods are deprecated, do not use. See b/141332970. -// TODO(kreeger): Don't use this anymore in our tests. Optimized compiler -// settings can play with pointer placement on the stack (b/140130236). -inline TfLiteIntArray* IntArrayFromInitializer( - std::initializer_list int_initializer) { - return IntArrayFromInts(int_initializer.begin()); -} // Derives the quantization range max from scaling factor and zero point. template @@ -80,28 +73,14 @@ int32_t F2Q32(const float value, const float scale); void PopulateContext(TfLiteTensor* tensors, int tensors_size, ErrorReporter* error_reporter, TfLiteContext* context); -TfLiteTensor CreateFloatTensor(std::initializer_list data, - TfLiteIntArray* dims, bool is_variable = false); - -TfLiteTensor CreateBoolTensor(std::initializer_list data, - TfLiteIntArray* dims, bool is_variable = false); - TfLiteTensor CreateQuantizedTensor(const uint8_t* data, TfLiteIntArray* dims, float min, float max, bool is_variable = false); -TfLiteTensor CreateQuantizedTensor(std::initializer_list data, - TfLiteIntArray* dims, float min, float max, - bool is_variable = false); - TfLiteTensor CreateQuantizedTensor(const int8_t* data, TfLiteIntArray* dims, float min, float max, bool is_variable = false); -TfLiteTensor CreateQuantizedTensor(std::initializer_list data, - TfLiteIntArray* dims, float min, float max, - bool is_variable = false); - TfLiteTensor CreateQuantizedTensor(float* data, uint8_t* quantized_data, TfLiteIntArray* dims, bool is_variable = false); @@ -117,10 +96,6 @@ TfLiteTensor CreateQuantizedTensor(float* data, int16_t* quantized_data, TfLiteTensor CreateQuantized32Tensor(const int32_t* data, TfLiteIntArray* dims, float scale, bool is_variable = false); -TfLiteTensor CreateQuantized32Tensor(std::initializer_list data, - TfLiteIntArray* dims, float scale, - bool is_variable = false); - template inline TfLiteTensor CreateTensor(const input_type* data, TfLiteIntArray* dims, @@ -135,15 +110,6 @@ inline TfLiteTensor CreateTensor(const input_type* data, TfLiteIntArray* dims, return result; } -template -inline TfLiteTensor CreateTensor(std::initializer_list data, - TfLiteIntArray* dims, - bool is_variable = false) { - return CreateTensor(data.begin(), dims, - is_variable); -} - } // namespace testing } // namespace tflite From d9586bfdbec7138be08176c639ec8d5bd36a41a6 Mon Sep 17 00:00:00 2001 From: Meghna Natraj Date: Mon, 3 Aug 2020 13:22:18 -0700 Subject: [PATCH 1992/2522] Fix error message for missing `quantized_input_stats` flag PiperOrigin-RevId: 324670162 Change-Id: I0521c697183b246bad1525b0fe2d313d2d7017bc --- tensorflow/lite/python/lite.py | 7 ++++--- tensorflow/lite/python/lite_test.py | 12 ++++++------ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index c0a8c33331b..a853cc953fd 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -1245,7 +1245,7 @@ class TFLiteConverterBaseV1(TFLiteConverterBase): return object.__getattribute__(self, name) def _validate_quantized_input_stats(self, converter_kwargs, calibrate): - """Ensure quantized_input_stats provided if required.""" + """Ensure the `quantized_input_stats` flag is provided if required.""" quantized_types = frozenset({constants.INT8, constants.QUANTIZED_UINT8}) @@ -1256,8 +1256,9 @@ class TFLiteConverterBaseV1(TFLiteConverterBase): if (requires_quantized_input_stats and not converter_kwargs["quantized_input_stats"]): - raise ValueError("std_dev and mean must be defined when inference_type " - "or inference_input_type is QUANTIZED_UINT8 or INT8.") + raise ValueError("The `quantized_input_stats` flag must be defined when " + "either `inference_type` flag or `inference_input_type` " + "flag is set to tf.uint8 or tf.int8.") def convert(self): """Converts a TensorFlow GraphDef based on instance variables. diff --git a/tensorflow/lite/python/lite_test.py b/tensorflow/lite/python/lite_test.py index ede24b2ede5..e9853c7f17c 100644 --- a/tensorflow/lite/python/lite_test.py +++ b/tensorflow/lite/python/lite_test.py @@ -1126,18 +1126,18 @@ class FromSessionTest(TestModels, parameterized.TestCase): quantized_converter.inference_type = quantized_type quantized_converter.convert() self.assertEqual( - 'std_dev and mean must be defined when inference_type or ' - 'inference_input_type is QUANTIZED_UINT8 or INT8.', - str(error.exception)) + 'The `quantized_input_stats` flag must be defined when ' + 'either `inference_type` flag or `inference_input_type` ' + 'flag is set to tf.uint8 or tf.int8.', str(error.exception)) with self.assertRaises(ValueError) as error: quantized_converter.inference_type = lite_constants.FLOAT quantized_converter.inference_input_type = quantized_type quantized_converter.convert() self.assertEqual( - 'std_dev and mean must be defined when inference_type or ' - 'inference_input_type is QUANTIZED_UINT8 or INT8.', - str(error.exception)) + 'The `quantized_input_stats` flag must be defined when ' + 'either `inference_type` flag or `inference_input_type` ' + 'flag is set to tf.uint8 or tf.int8.', str(error.exception)) quantized_converter.inference_type = quantized_type quantized_converter.inference_input_type = quantized_type From fcec95e19189cadc00a33bf65d0b894ed6722a2b Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Mon, 3 Aug 2020 13:28:30 -0700 Subject: [PATCH 1993/2522] In tf.nondifferentiable_batch_function (https://www.tensorflow.org/api_docs/python/tf/nondifferentiable_batch_function), flip `enable_large_batch_splitting` to True. PiperOrigin-RevId: 324671442 Change-Id: I890d191a778016db9b58ca7638463aba7997e3ad --- tensorflow/python/ops/batch_ops.py | 13 ++++++++++++- tensorflow/tools/api/golden/v1/tensorflow.pbtxt | 2 +- tensorflow/tools/api/golden/v2/tensorflow.pbtxt | 2 +- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/ops/batch_ops.py b/tensorflow/python/ops/batch_ops.py index 4c470270975..8f163d078ff 100644 --- a/tensorflow/python/ops/batch_ops.py +++ b/tensorflow/python/ops/batch_ops.py @@ -34,7 +34,8 @@ def batch_function(num_batch_threads, batch_timeout_micros, allowed_batch_sizes=None, max_enqueued_batches=10, - autograph=True): + autograph=True, + enable_large_batch_splitting=True): """Batches the computation done by the decorated function. So, for example, in the following code @@ -71,6 +72,15 @@ def batch_function(num_batch_threads, max_enqueued_batches: The maximum depth of the batch queue. Defaults to 10. autograph: Whether to use autograph to compile python and eager style code for efficient graph-mode execution. + enable_large_batch_splitting: The value of this option doesn't affect + processing output given the same input; it affects implementation details + as stated below: 1. Improve batching efficiency by eliminating unnecessary + adding. 2.`max_batch_size` specifies the limit of input and + `allowed_batch_sizes` specifies the limit of a task to be processed. API + user can give an input of size 128 when 'max_execution_batch_size' + is 32 -> implementation can split input of 128 into 4 x 32, schedule + concurrent processing, and then return concatenated results corresponding + to 128. Returns: The decorated function will return the unbatched computation output Tensors. @@ -101,6 +111,7 @@ def batch_function(num_batch_threads, allowed_batch_sizes=allowed_batch_sizes, max_enqueued_batches=max_enqueued_batches, shared_name=name, + enable_large_batch_splitting=enable_large_batch_splitting, f=computation, in_tensors=list(args), captured_tensors=computation.captured_inputs, diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt index 6adfb231c38..ba64d009908 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt @@ -1734,7 +1734,7 @@ tf_module { } member_method { name: "nondifferentiable_batch_function" - argspec: "args=[\'num_batch_threads\', \'max_batch_size\', \'batch_timeout_micros\', \'allowed_batch_sizes\', \'max_enqueued_batches\', \'autograph\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'True\'], " + argspec: "args=[\'num_batch_threads\', \'max_batch_size\', \'batch_timeout_micros\', \'allowed_batch_sizes\', \'max_enqueued_batches\', \'autograph\', \'enable_large_batch_splitting\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'True\', \'True\'], " } member_method { name: "norm" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt index 7cf617ddf8b..83baba1b1ce 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt @@ -834,7 +834,7 @@ tf_module { } member_method { name: "nondifferentiable_batch_function" - argspec: "args=[\'num_batch_threads\', \'max_batch_size\', \'batch_timeout_micros\', \'allowed_batch_sizes\', \'max_enqueued_batches\', \'autograph\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'True\'], " + argspec: "args=[\'num_batch_threads\', \'max_batch_size\', \'batch_timeout_micros\', \'allowed_batch_sizes\', \'max_enqueued_batches\', \'autograph\', \'enable_large_batch_splitting\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'True\', \'True\'], " } member_method { name: "norm" From af7bad693cdc07e59e866c0f1a914d6d20a06d2a Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Mon, 3 Aug 2020 13:51:06 -0700 Subject: [PATCH 1994/2522] Expose device memory allocator mem usage via context PiperOrigin-RevId: 324676035 Change-Id: Ib6ad38ccb2f0f6c399c53f650d077b8db4e2c8a8 --- tensorflow/python/eager/context.py | 6 ++ tensorflow/python/eager/context_test.py | 23 +++++++ tensorflow/python/lib/core/pybind11_lib.h | 5 ++ tensorflow/python/tfe_wrapper.cc | 78 +++++++++++++++++++++++ 4 files changed, 112 insertions(+) diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index fbd63f764cf..765c77af7cd 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -1413,6 +1413,12 @@ class Context(object): self._visible_device_list = visible_device_list + def get_total_memory_usage(self, dev): + """Returns total memory usage in bytes for the current device.""" + self._initialize_physical_devices() + self.ensure_initialized() + return pywrap_tfe.TFE_GetTotalMemoryUsage(self._context_handle, dev) + def get_memory_growth(self, dev): """Get if memory growth is enabled for a PhysicalDevice.""" self._initialize_physical_devices() diff --git a/tensorflow/python/eager/context_test.py b/tensorflow/python/eager/context_test.py index fd815fe7433..086f943b3b0 100644 --- a/tensorflow/python/eager/context_test.py +++ b/tensorflow/python/eager/context_test.py @@ -26,6 +26,8 @@ from tensorflow.python.eager import def_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import errors from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops from tensorflow.python.platform import test @@ -108,6 +110,27 @@ class ContextTest(test.TestCase): with self.assertRaises(errors.NotFoundError): _ = context.get_function_def('this_should_not_be_found') + @test_util.run_gpu_only + def testGetMemoryUsage(self): + array_ops.zeros([10]) # Allocate some memory on the GPU. + self.assertGreater( + context.context().get_total_memory_usage('GPU:0'), 0) + + def testGetMemoryUsageCPU(self): + with self.assertRaisesRegex(ValueError, 'CPU does not support'): + context.context().get_total_memory_usage('CPU:0') + + def testGetMemoryUsageUnknownDevice(self): + with self.assertRaisesRegex(ValueError, 'Failed parsing device name'): + context.context().get_total_memory_usage('unknown_device') + + @test_util.run_gpu_only + def testGetMemoryUsageAmbiguousDevice(self): + if len(context.context().list_physical_devices('GPU')) < 2: + self.skipTest('Need at least 2 GPUs') + with self.assertRaisesRegex(ValueError, 'Multiple devices'): + context.context().get_total_memory_usage('GPU') + if __name__ == '__main__': ops.enable_eager_execution() diff --git a/tensorflow/python/lib/core/pybind11_lib.h b/tensorflow/python/lib/core/pybind11_lib.h index 6a0471cb4da..a0fb45a5152 100644 --- a/tensorflow/python/lib/core/pybind11_lib.h +++ b/tensorflow/python/lib/core/pybind11_lib.h @@ -60,6 +60,11 @@ void ThrowTypeError(const char* error_message) { throw pybind11::error_already_set(); } +void ThrowValueError(const char* error_message) { + PyErr_SetString(PyExc_ValueError, error_message); + throw pybind11::error_already_set(); +} + } // namespace tensorflow #endif // TENSORFLOW_PYTHON_LIB_CORE_PYBIND11_LIB_H_ diff --git a/tensorflow/python/tfe_wrapper.cc b/tensorflow/python/tfe_wrapper.cc index 9234ecd7102..ec0a1ac1c23 100644 --- a/tensorflow/python/tfe_wrapper.cc +++ b/tensorflow/python/tfe_wrapper.cc @@ -16,6 +16,7 @@ limitations under the License. #include #include "Python.h" +#include "absl/strings/str_format.h" #include "pybind11/chrono.h" #include "pybind11/complex.h" #include "pybind11/functional.h" @@ -351,6 +352,83 @@ PYBIND11_MODULE(_pywrap_tfe, m) { TFE_Py_RegisterFallbackExceptionClass(e.ptr())); }); + m.def( + "TFE_GetTotalMemoryUsage", [](py::handle& ctx, const char* device_name) { + tensorflow::EagerContext* context = tensorflow::ContextFromInterface( + reinterpret_cast( + tensorflow::InputTFE_Context(ctx))); + + tensorflow::DeviceNameUtils::ParsedName input_device_name; + if (!tensorflow::DeviceNameUtils::ParseFullName(device_name, + &input_device_name) && + !tensorflow::DeviceNameUtils::ParseLocalName(device_name, + &input_device_name)) { + tensorflow::ThrowValueError( + absl::StrFormat("Failed parsing device name: '%s'", device_name) + .c_str()); + } + + std::vector devices = + context->local_device_mgr()->ListDevices(); + + tensorflow::Device* matched_device = nullptr; + for (int device_idx = 0; device_idx < devices.size(); device_idx++) { + tensorflow::Device* device = devices[device_idx]; + + if (absl::StrContains(device->name(), "XLA") && + !absl::StrContains(device_name, "XLA")) { + continue; + } + + if (tensorflow::DeviceNameUtils::AreCompatibleDevNames( + input_device_name, device->parsed_name())) { + if (device->device_type() == tensorflow::DEVICE_CPU) { + tensorflow::ThrowValueError( + "CPU does not support getting allocator information"); + } + + if (absl::StrContains(device->device_type(), "XLA") && + !absl::StrContains(device_name, "XLA")) { + // TODO(b/140134773): Remove this workaround. + // Do not accidentally match XLA devices. + continue; + } + + if (matched_device != nullptr) { + tensorflow::ThrowValueError( + absl::StrFormat( + "Multiple devices matching the provided string " + "'%s': '%s' and " + "'%s' ", + device_name, matched_device->name(), device->name()) + .c_str()); + } + matched_device = device; + } + } + + if (matched_device == nullptr) { + tensorflow::ThrowValueError( + absl::StrFormat("No matching devices found for '%s'", device_name) + .c_str()); + } + CHECK(matched_device); + + tensorflow::AllocatorAttributes attrs; + tensorflow::Allocator* allocator = matched_device->GetAllocator(attrs); + + if (absl::optional stats = + allocator->GetStats()) { + return stats->bytes_in_use; + } + + tensorflow::ThrowTypeError( + absl::StrFormat("Allocator stats not available for device '%s'", + matched_device->name()) + .c_str()); + LOG(FATAL) << "Unreachable"; + }); + // XLA Eager Logic m.def("TF_SetXlaEnableLazyCompilation", &TF_SetXlaEnableLazyCompilation); m.def("TF_SetTfXlaCpuGlobalJit", &TF_SetTfXlaCpuGlobalJit); From 2ffd0a6fb6a10c93d5cfe9ad621e84efa9346c76 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 3 Aug 2020 13:54:39 -0700 Subject: [PATCH 1995/2522] tracking device memory transfer in steps_db. PiperOrigin-RevId: 324676713 Change-Id: I7d42f3746f35458ecf2d68a00c65f900ff673449 --- .../convert/step_events_to_steps_db.cc | 6 +++ .../profiler/convert/xplane_to_step_events.cc | 49 ++++++++++++++++--- .../core/profiler/protobuf/steps_db.proto | 15 +++++- tensorflow/core/profiler/utils/event_span.cc | 47 ++++++++++++++++++ tensorflow/core/profiler/utils/event_span.h | 20 ++++++++ 5 files changed, 129 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/profiler/convert/step_events_to_steps_db.cc b/tensorflow/core/profiler/convert/step_events_to_steps_db.cc index 6841929dea7..f37cd6ed103 100644 --- a/tensorflow/core/profiler/convert/step_events_to_steps_db.cc +++ b/tensorflow/core/profiler/convert/step_events_to_steps_db.cc @@ -142,6 +142,12 @@ StepDatabaseResult ConvertStepEventsToStepDb( for (const auto& it : step_details->Collectives()) { collectives[it.first] = it.second; } + // Populates the device transfer stats for this step. + auto& device_memory_transfers = + *per_core_step_info.mutable_device_memory_transfers(); + for (const auto& dma : step_details->DeviceMemoryTransfers()) { + *device_memory_transfers.Add() = dma; + } // The remaining fields in PerCoreStepInfo are not filled. *step_db.add_step_sequence() = per_core_step_info; } diff --git a/tensorflow/core/profiler/convert/xplane_to_step_events.cc b/tensorflow/core/profiler/convert/xplane_to_step_events.cc index 1d80d308193..0af9ecaf4d3 100644 --- a/tensorflow/core/profiler/convert/xplane_to_step_events.cc +++ b/tensorflow/core/profiler/convert/xplane_to_step_events.cc @@ -17,6 +17,7 @@ limitations under the License. #include "absl/container/flat_hash_map.h" #include "absl/strings/match.h" +#include "absl/strings/str_split.h" #include "absl/strings/string_view.h" #include "absl/types/optional.h" #include "tensorflow/core/platform/types.h" @@ -51,6 +52,20 @@ inline bool IsRealCpuCompute(absl::string_view event_name) { return !not_real; } +uint64 ParseNumBytesFromMemcpyDetail(absl::string_view memcpy_detail) { + const std::vector params = + absl::StrSplit(memcpy_detail, absl::ByAnyChar(":\n")); + + // Processes value pairs. + for (uint32 ii = 0; ii < params.size(); ii += 2) { + if (params[ii] != "num_bytes") continue; + uint64 value = 0; + if (absl::SimpleAtoi(params[ii + 1], &value)) return value; + break; + } + return 0ULL; +} + } // namespace StepEvents ConvertHostThreadsXLineToStepEvents( @@ -134,6 +149,7 @@ StepEvents ConvertDeviceTraceXLineToStepEvents(const uint64 device_id, int64 correlation_id = -1; int64 group_id = -1; absl::string_view tensor_shapes; + absl::string_view memcpy_details; event.ForEachStat([&](const XStatVisitor& stat) { if (!stat.Type().has_value()) return; switch (stat.Type().value()) { @@ -146,6 +162,9 @@ StepEvents ConvertDeviceTraceXLineToStepEvents(const uint64 device_id, case StatType::kTensorShapes: tensor_shapes = stat.StrOrRefValue(); break; + case StatType::kMemcpyDetails: + memcpy_details = stat.StrOrRefValue(); + break; } }); @@ -153,13 +172,29 @@ StepEvents ConvertDeviceTraceXLineToStepEvents(const uint64 device_id, EventType event_type = ClassifyGpuEvent(event.Name(), tensor_shapes); EventTypeSpan event_type_span(event_type, event.GetTimespan()); result[group_id].AddEvent(event_type_span); - if (event_type == DEVICE_COLLECTIVES) { - AllReduceInfo collective_ops; - collective_ops.set_name(string(event.Name())); - collective_ops.set_start_time_ps(event.TimestampPs()); - collective_ops.set_end_time_ps(event.EndOffsetPs()); - // TODO(jiesun): figure out how to get size info etc. - result[group_id].AddCollectiveOpEvent(device_id, collective_ops); + switch (event_type) { + case DEVICE_COLLECTIVES: { + AllReduceInfo collective_ops; + collective_ops.set_name(string(event.Name())); + collective_ops.set_start_time_ps(event.TimestampPs()); + collective_ops.set_end_time_ps(event.EndOffsetPs()); + // TODO(jiesun): figure out how to get size info etc. + result[group_id].AddCollectiveOpEvent(device_id, collective_ops); + break; + } + case HOST_TO_DEVICE: + case DEVICE_TO_DEVICE: + case DEVICE_TO_HOST: { + // TODO(jiesun): not all memcpy events are grouped, figure out a + // better way to attribute them to steps. + uint64 bytes_transferred = + ParseNumBytesFromMemcpyDetail(memcpy_details); + result[group_id].AddDeviceMemoryTransferEvent( + event_type, event.GetTimespan(), bytes_transferred); + break; + } + default: + return; } } }); diff --git a/tensorflow/core/profiler/protobuf/steps_db.proto b/tensorflow/core/profiler/protobuf/steps_db.proto index 7d5e87fad5a..cf44b817ac8 100644 --- a/tensorflow/core/profiler/protobuf/steps_db.proto +++ b/tensorflow/core/profiler/protobuf/steps_db.proto @@ -15,6 +15,13 @@ message GenericStepBreakdown { map type_ps = 1; } +// Information about memory transfer to/from device memory. +message DeviceMemoryTransfer { + uint64 occurrence = 1; + double time_us = 2; + uint64 bytes_transferred = 3; +} + // Next ID: 5 // Result proto for StepInfo. message StepInfoResult { @@ -90,8 +97,14 @@ message PerCoreStepInfo { // A map from core ID to program replica id. Replica id map could change // during a profile session, but should stay stable within a step. map core_id_to_replica_id_map = 5; - // The result for all-reduce ops.hlo_metrics_db + // A map from core_id to all-reduce ops. map all_reduce_db_per_core = 6; + // Information about deivce memory transfers, categoried by source and + // destination. Ordered by following categories: + // 1. HostToDevice + // 2. DeviceToHost + // 3. DeviceToDevice + repeated DeviceMemoryTransfer device_memory_transfers = 7; } // Result proto for a StepDatabase. diff --git a/tensorflow/core/profiler/utils/event_span.cc b/tensorflow/core/profiler/utils/event_span.cc index acb037420e0..137a798c7f8 100644 --- a/tensorflow/core/profiler/utils/event_span.cc +++ b/tensorflow/core/profiler/utils/event_span.cc @@ -141,6 +141,7 @@ void CombineStepDetails(const StepDetails& src, StepDetails* dst) { dst->AppendMarkers(src.Markers()); dst->AppendEvents(src.Events()); dst->AppendCollectives(src.Collectives()); + dst->AggregateDeviceMemoryTransfers(src.DeviceMemoryTransfers()); } EventType ClassifyDeviceCompute(absl::string_view event_name, @@ -288,6 +289,8 @@ StepEvents ToNonOverlappedStepEvents(const StepEvents& overlapped_step_events) { ToNonOverlappedEvents(step_details.Events()); *non_overlapped_step_events[step_id].MutableCollectives() = step_details.Collectives(); + *non_overlapped_step_events[step_id].MutableDeviceMemoryTransfers() = + step_details.DeviceMemoryTransfers(); } return non_overlapped_step_events; } @@ -311,10 +314,54 @@ void StepDetails::AppendCollectives( } } +void StepDetails::AggregateDeviceMemoryTransfers( + const std::vector device_memory_transfers) { + if (device_memory_transfers.size() != device_memory_transfers_.size()) { + return; // Sanity check. + } + for (size_t i = 0; i < device_memory_transfers.size(); ++i) { + device_memory_transfers_[i].set_occurrence( + device_memory_transfers_[i].occurrence() + + device_memory_transfers[i].occurrence()); + device_memory_transfers_[i].set_bytes_transferred( + device_memory_transfers_[i].bytes_transferred() + + device_memory_transfers[i].bytes_transferred()); + device_memory_transfers_[i].set_time_us( + device_memory_transfers_[i].time_us() + + device_memory_transfers[i].time_us()); + } +} + void StepDetails::AddCollectiveOpEvent(uint64 core_id, const AllReduceInfo& e) { *collectives_[core_id].add_all_reduce_info() = e; } +void StepDetails::AddDeviceMemoryTransferEvent(EventType event_type, + const Timespan& time_span, + uint64 bytes) { + int index = 0; + switch (event_type) { + case HOST_TO_DEVICE: + index = 0; + break; + case DEVICE_TO_HOST: + index = 1; + break; + case DEVICE_TO_DEVICE: + index = 2; + break; + default: + return; + } + device_memory_transfers_[index].set_occurrence( + device_memory_transfers_[index].occurrence() + 1); + device_memory_transfers_[index].set_time_us( + device_memory_transfers_[index].time_us() + + time_span.duration_ps() / 1000000.0); + device_memory_transfers_[index].set_bytes_transferred( + device_memory_transfers_[index].bytes_transferred() + bytes); +} + Timespan StepDetails::StepTime() const { Timespan max_host_step_time; Timespan max_device_step_time; diff --git a/tensorflow/core/profiler/utils/event_span.h b/tensorflow/core/profiler/utils/event_span.h index b1f325b08e2..6ffbd228d5e 100644 --- a/tensorflow/core/profiler/utils/event_span.h +++ b/tensorflow/core/profiler/utils/event_span.h @@ -112,11 +112,16 @@ struct StepMarker { // StepDetails of the same step executed on different cores. class StepDetails { public: + StepDetails() : device_memory_transfers_(3) {} + const std::vector& Markers() const { return markers_; } const std::vector& Events() const { return events_; } const absl::flat_hash_map& Collectives() const { return collectives_; } + const std::vector& DeviceMemoryTransfers() const { + return device_memory_transfers_; + } // Returns the step time. Timespan StepTime() const; std::vector* MutableMarkers() { return &markers_; } @@ -124,12 +129,20 @@ class StepDetails { absl::flat_hash_map* MutableCollectives() { return &collectives_; } + std::vector* MutableDeviceMemoryTransfers() { + return &device_memory_transfers_; + } // Adds a step-marker to this step. void AddMarker(const StepMarker& m); // Adds an EventTypeSpan to this step. void AddEvent(const EventTypeSpan& e); // Adds a collective op to this step. void AddCollectiveOpEvent(uint64 core_id, const AllReduceInfo& e); + // Appends device memory transfer events to this step. + // Only event type of HOST_TO_DEVICE/DEVICE_TO_DEVICE/DEVICE_TO_HOST are + // allowed. + void AddDeviceMemoryTransferEvent(EventType event_type, + const Timespan& time_span, uint64 bytes); // Appends the step-markers from another step to this step. void AppendMarkers(const std::vector& other_markers); // Appends the events from another step to this step. @@ -137,6 +150,9 @@ class StepDetails { // Appends the collectives from another step to this step. void AppendCollectives( const absl::flat_hash_map& collectives); + // Accumulates the device memory transfers from another step to this step. + void AggregateDeviceMemoryTransfers( + const std::vector device_memory_transfers); // Equality test. bool operator==(const StepDetails& other) const; // Inequality test. @@ -155,6 +171,10 @@ class StepDetails { std::vector events_; // Collective operation related events such as all-reduce etc. absl::flat_hash_map collectives_; + // Device memory transfers (including time and bytes involved). + // TODO(jiesun): Consider to use IntervalSet instead of just sum up the event + // durations. + std::vector device_memory_transfers_; }; // Map from step_id to the events happened in that step. From a3df6cff1bd7e43329e4dc8600d1e4defed72b90 Mon Sep 17 00:00:00 2001 From: Robert David Date: Mon, 3 Aug 2020 14:05:37 -0700 Subject: [PATCH 1996/2522] Add MeanStddevNormalization to the list of operations and the OpenCL operation selector. PiperOrigin-RevId: 324678979 Change-Id: Ie4ba8df7d57f184f2bdcd790d4ec761ba1681dee --- .../delegates/gpu/cl/selectors/operation_selector.cc | 7 +++++++ tensorflow/lite/delegates/gpu/common/operations.cc | 9 ++++++--- tensorflow/lite/delegates/gpu/common/operations.h | 1 + tensorflow/lite/delegates/gpu/gl/kernels/registry.cc | 1 + tensorflow/lite/delegates/gpu/metal/api.cc | 2 ++ 5 files changed, 17 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc index ffe9acb8299..088677ba7e2 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc @@ -19,6 +19,7 @@ limitations under the License. #include "absl/types/any.h" #include "tensorflow/lite/delegates/gpu/cl/cl_device.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h" +#include "tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h" #include "tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.h" #include "tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.h" #include "tensorflow/lite/delegates/gpu/cl/selectors/default_selector.h" @@ -286,6 +287,12 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, return SelectMean(attr, op_def, creation_context.device->GetInfo(), gpu_op); } + case OperationType::MEAN_STDDEV_NORMALIZATION: { + MeanStdDevNormalization operation = CreateMeanStdDevNormalization(op_def); + *gpu_op = + absl::make_unique(std::move(operation)); + return absl::OkStatus(); + } case OperationType::MUL: { if (inputs.size() == 2) { ElementwiseTwoInput operation = diff --git a/tensorflow/lite/delegates/gpu/common/operations.cc b/tensorflow/lite/delegates/gpu/common/operations.cc index dd0a91b2705..245a5a80639 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.cc +++ b/tensorflow/lite/delegates/gpu/common/operations.cc @@ -110,6 +110,8 @@ std::string ToString(enum OperationType op) { return "max_unpooling"; case OperationType::MEAN: return "mean"; + case OperationType::MEAN_STDDEV_NORMALIZATION: + return "mean_stddev_normalization"; case OperationType::MINIMUM: return "minimum"; case OperationType::MUL: @@ -156,10 +158,9 @@ std::string ToString(enum OperationType op) { return "tanh"; case OperationType::TRANSPOSE: return "transpose"; - default: - break; + case OperationType::UNKNOWN: + return "unknown_operation"; } - return "unknown_operation"; } OperationType OperationTypeFromString(const std::string& name) { @@ -185,6 +186,8 @@ OperationType OperationTypeFromString(const std::string& name) { {"maximum", OperationType::MAXIMUM}, {"max_unpooling", OperationType::MAX_UNPOOLING_2D}, {"mean", OperationType::MEAN}, + {"mean_stddev_normalization", + OperationType::MEAN_STDDEV_NORMALIZATION}, {"minimum", OperationType::MINIMUM}, {"mul", OperationType::MUL}, {"pad", OperationType::PAD}, diff --git a/tensorflow/lite/delegates/gpu/common/operations.h b/tensorflow/lite/delegates/gpu/common/operations.h index fcce6532c1d..225165589ae 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.h +++ b/tensorflow/lite/delegates/gpu/common/operations.h @@ -53,6 +53,7 @@ enum class OperationType { MAXIMUM, MAX_UNPOOLING_2D, MEAN, + MEAN_STDDEV_NORMALIZATION, MINIMUM, MUL, PAD, diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc b/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc index b4bfbcd8f56..0d2438aacc6 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc @@ -82,6 +82,7 @@ class Registry : public NodeShader { insert_op(Type::FULLY_CONNECTED, NewFullyConnectedNodeShader); insert_op(Type::LSTM, NewLstmNodeShader); insert_op(Type::MEAN, NewMeanNodeShader); + // TODO(b/162763635): implement MeanStddevNormalization for OpenGL. insert_op(Type::MUL, NewMultiplyNodeShader); insert_op(Type::PAD, NewPadNodeShader); insert_op(Type::POOLING_2D, NewPoolingNodeShader); diff --git a/tensorflow/lite/delegates/gpu/metal/api.cc b/tensorflow/lite/delegates/gpu/metal/api.cc index 7b086a6d130..fcab962ee61 100644 --- a/tensorflow/lite/delegates/gpu/metal/api.cc +++ b/tensorflow/lite/delegates/gpu/metal/api.cc @@ -406,6 +406,8 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node, case OperationType::BATCH_TO_SPACE: case OperationType::CONST: case OperationType::LSTM: + // TODO(b/162763635): implement MeanStddevNormalization for Metal. + case OperationType::MEAN_STDDEV_NORMALIZATION: case OperationType::SPACE_TO_BATCH: case OperationType::TRANSPOSE: case OperationType::UNKNOWN: From 729b23995f7a655194748afe24a8e7a065f4309b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 3 Aug 2020 14:06:04 -0700 Subject: [PATCH 1997/2522] Test with MWMS in custom_training_loop_optimizer_test PiperOrigin-RevId: 324679085 Change-Id: I3a550fde03380d0327906dcc9d449837a5f6cc8b --- tensorflow/python/keras/distribute/BUILD | 2 +- .../custom_training_loop_optimizer_test.py | 25 +++++++------------ 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/tensorflow/python/keras/distribute/BUILD b/tensorflow/python/keras/distribute/BUILD index 2c8ba97dbfa..5a5cff01e33 100644 --- a/tensorflow/python/keras/distribute/BUILD +++ b/tensorflow/python/keras/distribute/BUILD @@ -275,7 +275,7 @@ distribute_py_test( "//tensorflow/python:variables", "//tensorflow/python/distribute:combinations", "//tensorflow/python/distribute:strategy_combinations", - "//tensorflow/python/distribute:test_util", + "//tensorflow/python/distribute:values", "//tensorflow/python/eager:def_function", "//tensorflow/python/eager:test", "//tensorflow/python/keras/optimizer_v2", diff --git a/tensorflow/python/keras/distribute/custom_training_loop_optimizer_test.py b/tensorflow/python/keras/distribute/custom_training_loop_optimizer_test.py index 0a12d85bebd..b9eee26220a 100644 --- a/tensorflow/python/keras/distribute/custom_training_loop_optimizer_test.py +++ b/tensorflow/python/keras/distribute/custom_training_loop_optimizer_test.py @@ -22,7 +22,7 @@ from absl.testing import parameterized from tensorflow.python.distribute import combinations from tensorflow.python.distribute import strategy_combinations -from tensorflow.python.distribute import test_util +from tensorflow.python.distribute import values from tensorflow.python.eager import def_function from tensorflow.python.eager import test from tensorflow.python.framework import ops @@ -35,14 +35,7 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): @combinations.generate( combinations.times( combinations.combine( - distribution=[ - strategy_combinations.mirrored_strategy_with_gpu_and_cpu, - strategy_combinations.mirrored_strategy_with_two_gpus, - strategy_combinations.multi_worker_mirrored_2x1_cpu, - strategy_combinations.multi_worker_mirrored_2x1_gpu, - strategy_combinations.tpu_strategy, - strategy_combinations.tpu_strategy_one_step, - ], + distribution=strategy_combinations.multidevice_strategies, mode=["eager"], ), combinations.concat( @@ -62,10 +55,10 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): @def_function.function def optimize(): - grads = ops.convert_to_tensor([[1., 1.], - [2., 2.]]) - grads = distribution.experimental_distribute_values_from_function( - lambda ctx: grads[ctx.replica_id_in_sync_group]) + grads = values.PerReplica([ + ops.convert_to_tensor([1., 1.]), + ops.convert_to_tensor([2., 2.]), + ]) def step_fn(grads): optimizer.apply_gradients( @@ -73,8 +66,8 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): experimental_aggregate_gradients=experimental_aggregate_gradients) return v.read_value() - return test_util.gather(distribution, - distribution.run(step_fn, args=(grads,))) + return distribution.experimental_local_results( + distribution.run(step_fn, args=(grads,))) self.assertAllClose(optimize(), expected) @@ -125,4 +118,4 @@ class OptimizerTest(test.TestCase, parameterized.TestCase): if __name__ == "__main__": - combinations.main() + test.main() From 6b3990c84e1740d3a543cb934762a6bd4815c241 Mon Sep 17 00:00:00 2001 From: Andy Lou Date: Mon, 3 Aug 2020 14:10:33 -0700 Subject: [PATCH 1998/2522] Fix asan error, when there are no matching files in matching_files_op. PiperOrigin-RevId: 324680130 Change-Id: Idec9a7826bf85780eb5adcebe6a168f1858144e7 --- tensorflow/core/kernels/matching_files_op.cc | 12 +++++++----- .../python/data/kernel_tests/list_files_test.py | 5 ++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/kernels/matching_files_op.cc b/tensorflow/core/kernels/matching_files_op.cc index 0ba718c88ec..515e58d518a 100644 --- a/tensorflow/core/kernels/matching_files_op.cc +++ b/tensorflow/core/kernels/matching_files_op.cc @@ -54,13 +54,15 @@ class MatchingFilesOp : public OpKernel { context, context->allocate_output("filenames", TensorShape({num_files}), &output_t)); auto output = output_t->vec(); - int index = 0; - for (int i = 0; i < num_patterns; ++i) { - for (int j = 0; j < all_fnames[i].size(); j++) { - output(index++) = all_fnames[i][j]; + if (output.size() > 0) { + int index = 0; + for (int i = 0; i < num_patterns; ++i) { + for (int j = 0; j < all_fnames[i].size(); j++) { + output(index++) = all_fnames[i][j]; + } } + std::sort(&output(0), &output(0) + num_files); } - std::sort(&output(0), &output(0) + num_files); } }; diff --git a/tensorflow/python/data/kernel_tests/list_files_test.py b/tensorflow/python/data/kernel_tests/list_files_test.py index 40b4b77116c..b57bfb7293b 100644 --- a/tensorflow/python/data/kernel_tests/list_files_test.py +++ b/tensorflow/python/data/kernel_tests/list_files_test.py @@ -113,7 +113,7 @@ class ListFilesTest(test_base.DatasetTestBase, parameterized.TestCase): # Each run should produce the same set of filenames, which may be # different from the order of `expected_filenames`. - self.assertItemsEqual(expected_filenames, all_actual_filenames[0]) + self.assertCountEqual(expected_filenames, all_actual_filenames[0]) # However, the different runs should produce filenames in the same order # as each other. self.assertEqual(all_actual_filenames[0], all_actual_filenames[1]) @@ -199,7 +199,7 @@ class ListFilesTest(test_base.DatasetTestBase, parameterized.TestCase): actual_filenames.append(compat.as_bytes(self.evaluate(next_element()))) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element()) - self.assertItemsEqual(expected_filenames, actual_filenames) + self.assertCountEqual(expected_filenames, actual_filenames) self.assertEqual(actual_filenames[:len(filenames)], actual_filenames[len(filenames):]) @@ -234,6 +234,5 @@ class ListFilesTest(test_base.DatasetTestBase, parameterized.TestCase): assert_items_equal=True) - if __name__ == '__main__': test.main() From b58a8717b1aad125f893c520e897e6f56c6345b4 Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Mon, 3 Aug 2020 14:22:52 -0700 Subject: [PATCH 1999/2522] Add auto_restart to multi_process_runner This helps creating fault tolerance test cases. MWMS currently requires an external system which brings back tasks that are down, otherwise the remaining workers may hang forever. Ideally the remaining workers should error, which is what I'm working on. But it's beneficial to have test cases reflecting the current behavior since in many deployment, we do have a cluster management system that does the restart (e.g. k8s). This also changes the behavior of dependence_on_chief. We used to terminate the cluster if chief exits when join() is called. Now with a watchdog thread, that happens immediately after the chief's terminate. PiperOrigin-RevId: 324682642 Change-Id: I56ce27658298916d1ddd4507b90b79db0a2d4673 --- .../python/distribute/multi_process_runner.py | 287 +++++++++++++----- .../distribute/multi_process_runner_test.py | 98 +++++- 2 files changed, 298 insertions(+), 87 deletions(-) diff --git a/tensorflow/python/distribute/multi_process_runner.py b/tensorflow/python/distribute/multi_process_runner.py index e5be4fa4a14..4ded663e588 100644 --- a/tensorflow/python/distribute/multi_process_runner.py +++ b/tensorflow/python/distribute/multi_process_runner.py @@ -67,7 +67,8 @@ except ImportError: # exception stack trace info is stored in exc_info to pass on to parent process # to be re-raised. _ProcessStatusInfo = collections.namedtuple( - '_ProcessStatusInfo', ['is_successful', 'exc_info', 'return_value']) + '_ProcessStatusInfo', + ['task_type', 'task_id', 'is_successful', 'exc_info', 'return_value']) # Information returned from a successful MultiProcessRunner run. MultiProcessRunnerResult = collections.namedtuple('MultiProcessRunnerResult', @@ -97,6 +98,11 @@ Resources = collections.namedtuple('Resources', [ # "medium" timeout of the test runs. _DEFAULT_TIMEOUT_SEC = 200 +# The timeout in seconds to wait to force kill a child process. When a child +# process times out we first try to SIGTERM it so that it has a chance to dump +# stacktraces. However dumping stacktrace can take a long time. +_FORCE_KILL_WAIT_SEC = 30 + class MultiProcessRunner(object): """A utility class to start multiple processes to simulate a cluster. @@ -124,6 +130,8 @@ class MultiProcessRunner(object): list_stdout=False, use_dill_for_args=True, daemon=False, + dependence_on_chief=True, + auto_restart=False, args=None, kwargs=None): """Creates a multi-process runner. @@ -161,6 +169,11 @@ class MultiProcessRunner(object): can pickle more objects, but doesn't work with types in `multiprocessing` library like `Mutex`. daemon: Whether to start processes as daemons. + dependence_on_chief: Whether to terminates the cluster if the chief exits. + If auto_restart is True, it only terminates the cluster if the chief + exits with a zero exit code. + auto_restart: Whether to automatically restart processes that exit with + non-zero exit code. args: Positional arguments to be sent to functions run on processes. kwargs: Keyword arguments to be sent to functions run on processes. @@ -190,9 +203,10 @@ class MultiProcessRunner(object): self._stream_stdout = stream_stdout # TODO(rchao): Revisit list_stdout argument to consider other solution. self._list_stdout = list_stdout - self._dependence_on_chief = True + self._dependence_on_chief = dependence_on_chief self._use_dill_for_args = use_dill_for_args self._daemon = daemon + self._auto_restart = auto_restart self._args = args or () self._kwargs = kwargs or {} @@ -201,8 +215,15 @@ class MultiProcessRunner(object): self._executing_eagerly = context.executing_eagerly() self._joined = False + self._process_lock = threading.Lock() + # Guarded by self._process_lock. self._processes = {} - self._outstanding_subprocess_count = 0 + # Record which processes are terminated. Due to a bug in Python<3.7, + # terminated processes return 255 exit code, which should cause an exception + # in join(). + # https://bugs.python.org/issue30589 + # Guarded by self._process_lock. + self._terminated = set() self._reading_threads = [] self._manager = manager() @@ -215,8 +236,7 @@ class MultiProcessRunner(object): # safe. self._streaming_queue = self._manager.Queue() - # This flag will be set to True once terminate_all() is called. - self._all_forced_terminated = False + self._watchdog_thread = None def set_args(self, args=None, kwargs=None): self._args = args or self._args @@ -281,7 +301,7 @@ class MultiProcessRunner(object): daemon=self._daemon) p.start() self._processes[(task_type, task_id)] = p - self._outstanding_subprocess_count += 1 + self._terminated.discard((task_type, task_id)) # For each subprocess, we dedicate a thread continuously reading lines # from them. @@ -291,17 +311,26 @@ class MultiProcessRunner(object): thread.start() self._reading_threads.append(thread) + if self._watchdog_thread is None or not self._watchdog_thread.is_alive(): + self._watchdog_thread = threading.Thread(target=self._process_watchdog) + self._watchdog_thread.start() + def start(self): """Starts processes, one for each task in `cluster_spec`. Note that this is best effort by the applicable multiprocessing library, and it may take up to seconds for a subprocess to be successfully started. """ - if self._processes: - raise ValueError('MultiProcessRunner already started.') - for task_type, addresses in self._cluster_spec.items(): - for task_id, _ in enumerate(addresses): - self._start_subprocess_and_reading_thread(task_type, task_id) + with self._process_lock: + if self._processes: + raise ValueError('MultiProcessRunner already started.') + if self._joined: + raise ValueError('cannot start new processes after' + 'MultiProcessRunner.join() is called') + + for task_type, addresses in self._cluster_spec.items(): + for task_id, _ in enumerate(addresses): + self._start_subprocess_and_reading_thread(task_type, task_id) # TODO(rchao): Remove the need of using SIGALRM if possible. At this time, # without this the tests become very flaky. @@ -353,10 +382,14 @@ class MultiProcessRunner(object): """ if self._processes: raise ValueError('MultiProcessRunner already started.') - for task_type, addresses in self._cluster_spec.items(): - for task_id, _ in enumerate(addresses): - if not (task_type == as_task_type and task_id == as_task_id): - self._start_subprocess_and_reading_thread(task_type, task_id) + with self._process_lock: + if self._joined: + raise ValueError('cannot start new processes after' + 'MultiProcessRunner.join() is called') + for task_type, addresses in self._cluster_spec.items(): + for task_id, _ in enumerate(addresses): + if not (task_type == as_task_type and task_id == as_task_id): + self._start_subprocess_and_reading_thread(task_type, task_id) _set_tf_config(as_task_type, as_task_id, self._cluster_spec, self._rpc_layer) @@ -392,13 +425,17 @@ class MultiProcessRunner(object): args: Optional positional arguments to be supplied in `proc_func`. kwargs: Optional keyword arguments to be supplied in `proc_func`. """ - self._start_subprocess_and_reading_thread( - task_type, - task_id, - cluster_spec=cluster_spec, - proc_func=proc_func, - args=args or (), - kwargs=kwargs or {}) + with self._process_lock: + if self._joined: + raise ValueError('cannot start new processes after' + 'MultiProcessRunner.join() is called') + self._start_subprocess_and_reading_thread( + task_type, + task_id, + cluster_spec=cluster_spec, + proc_func=proc_func, + args=args or (), + kwargs=kwargs or {}) def _queue_to_list(self, queue_to_convert): """Convert `queue.Queue` to `list`.""" @@ -411,9 +448,17 @@ class MultiProcessRunner(object): break return list_to_return + def _get_process_statuses(self): + # One worker may have multiple statuses. We only keep the last one. + statuses = {} + for status in self._queue_to_list(self._process_status_queue): + statuses[(status.task_type, status.task_id)] = status + return statuses + def get_process_id(self, task_type, task_id): """Returns the subprocess id given the task type and task id.""" - p = self._processes.get((task_type, task_id), None) + with self._process_lock: + p = self._processes.get((task_type, task_id), None) return p.pid if p else None def get_process_exit_code(self, task_type, task_id): @@ -430,22 +475,54 @@ class MultiProcessRunner(object): KeyError: If the corresponding subprocess is not found with `task_type` and `task_id`. """ - p = self._processes[(task_type, task_id)] + with self._process_lock: + p = self._processes[(task_type, task_id)] return p.exitcode if p else None - def _join_or_terminate(self, task_type, task_id, process, timeout): - """Joins a process. If it times out, terminate all procsses.""" - logging.info('joining %s-%d', task_type, task_id) - process.join(timeout) - # If exitcode is None, the process aren't terminated and this is a - # timeout. - if process.exitcode is None: - # Force termination to dump worker processes stack trace. - self.terminate_all(sig=signal.SIGTERM) - process_statuses = self._queue_to_list(self._process_status_queue) - raise SubprocessTimeoutError( - '%s-%d and possibly more subprocesses timed out.' % - (task_type, task_id), self._get_mpr_result(process_statuses)) + def _process_watchdog(self): + """Simulates a cluster management system. + + - If auto_restart is True, it restarts processes that exit with a non-zero + exit code. Note that when join() times out it overrides auto_restart to + False. + - If dependence_on_chief is True, it terminates all processes once the chief + exits. If auto_restart is also True, it only terminates all processes if + the chief exit with a zero exit code, otherwise it restarts the chief. + + This runs in self._watchdog_thread. + """ + while True: + time.sleep(1) + with self._process_lock: + chief = self._processes.get(('chief', 0), None) + # Terminate the cluster when _dependence_on_chief is True if either: + # - chief has exited with zero exit code. + # - chief has exited with non-zero exit code and self._auto_restart is + # False. + if chief and self._dependence_on_chief and chief.exitcode is not None: + if chief.exitcode == 0 or (not self._auto_restart): + for p in self._processes.values(): + # Give other processes a chance to exit on their own. + p.join(timeout=3) + self._terminate_all() + for p in self._processes.values(): + p.join() + return + + # Auto restart failed processes if self._auto_restart is True. + if self._auto_restart: + has_failure = False + for (task_type, task_id), p in self._processes.items(): + if p.exitcode is not None and p.exitcode != 0: + has_failure = True + logging.info('Restarting failed %s-%d', task_type, task_id) + self._start_subprocess_and_reading_thread(task_type, task_id) + if has_failure: + continue + + # Exit the thread if all processes have exited at this point. + if all(p.exitcode is not None for p in self._processes.values()): + return def join(self, timeout=_DEFAULT_TIMEOUT_SEC): """Joins all the processes with timeout. @@ -489,41 +566,48 @@ class MultiProcessRunner(object): cases. Exception: if there is an Exception propagated from any subprocess. """ - if self._joined: - raise ValueError("MultiProcessRunner can't be joined twice.") - self._joined = True + with self._process_lock: + if self._joined: + raise ValueError("MultiProcessRunner can't be joined twice.") + self._joined = True - chief = self._processes.get(('chief', 0), None) - if self._dependence_on_chief and chief: - self._join_or_terminate('chief', 0, chief, timeout) - # Give other processes a chance to exit on their own. - for p in self._processes.values(): - p.join(timeout=3) - self.terminate_all() - else: - for (task_type, task_id), p in self._processes.items(): - self._join_or_terminate(task_type, task_id, p, timeout) + self._watchdog_thread.join(timeout) + if self._watchdog_thread.is_alive(): + # Timeout. Force termination to dump worker processes stack trace. + with self._process_lock: + self._auto_restart = False + logging.error('Timeout when joining for child processes. Terminating...') + self.terminate_all(sig=signal.SIGTERM) + # Wait for the processes to terminate by themselves first, so they have a + # chance to dump stacktraces. After _FORCE_KILL_WAIT_SEC, we SIGKILL them. + self._watchdog_thread.join(_FORCE_KILL_WAIT_SEC) + if self._watchdog_thread.is_alive(): + logging.error('Timeout when waiting for child processes to ' + 'print stacktrace. Sending SIGKILL...') + self.terminate_all() + self._watchdog_thread.join() + process_statuses = self._get_process_statuses() + raise SubprocessTimeoutError('one or more subprocesses timed out.', + self._get_mpr_result(process_statuses)) for (task_type, task_id), p in self._processes.items(): logging.info('%s-%d exit code: %s', task_type, task_id, p.exitcode) - process_statuses = self._queue_to_list(self._process_status_queue) - for process_status in process_statuses: + process_statuses = self._get_process_statuses() + for process_status in process_statuses.values(): assert isinstance(process_status, _ProcessStatusInfo) if not process_status.is_successful: six.reraise(*process_status.exc_info) # Checking all the processes that are expected to exit properly. for (task_type, task_id), p in self._processes.items(): - if self._dependence_on_chief and chief and task_type != 'chief': - # If _dependence_on_chief, other processes may have been - # forced-terminated, which is expected. - continue - # Successfully exiting process has exit code 0. - if p.exitcode is None or p.exitcode > 0: + # Successfully exiting process has exit code 0. We ignore processes that + # are terminated. + assert p.exitcode is not None + if (p.exitcode > 0 and (task_type, task_id) not in self._terminated): raise UnexpectedSubprocessExitError( - 'Subprocess %s-%d exited with exit code %d. See logs for details.' % - (task_type, task_id, p.exitcode), + 'Subprocess %s-%d exited with exit code %s. See logs for details.' + % (task_type, task_id, p.exitcode), self._get_mpr_result(process_statuses)) logging.info('Joining log reading threads.') @@ -539,34 +623,60 @@ class MultiProcessRunner(object): def _get_mpr_result(self, process_statuses): stdout = self._queue_to_list(self._streaming_queue) return_values = [] - for process_status in process_statuses: + for process_status in process_statuses.values(): if process_status.return_value is not None: return_values.append(process_status.return_value) return MultiProcessRunnerResult(stdout=stdout, return_value=return_values) def terminate(self, task_type, task_id): - """Terminates the process with `task_type` and `task_id`.""" - p = self._processes.get((task_type, task_id), None) - if p is None: - raise ValueError('{}-{} does not exist'.format(task_type, task_id)) - # TODO(crccw): change to use Process.terminate() as well. - self._parent_to_sub_queue.put('terminate {} {}'.format(task_type, task_id)) - p.join() + """Terminates the process with `task_type` and `task_id`. + + If auto_retart=True, the terminated task will be restarted unless the chief + has already exited with zero exit code. + + Args: + task_type: the task type. + task_id: the task id. + + """ + with self._process_lock: + p = self._processes.get((task_type, task_id), None) + if p is None: + raise ValueError('{}-{} does not exist'.format(task_type, task_id)) + self._terminated.add((task_type, task_id)) + # TODO(crccw): change to use Process.terminate() as well. + self._parent_to_sub_queue.put('terminate {} {}'.format( + task_type, task_id)) + p.join() + + def _terminate_all(self, sig=None): + """Terminates all subprocesses. + + The caller is required to hold self._process_lock. + + Args: + sig: the signal used to terminate the process. The default is SIGKILL. + """ - def terminate_all(self, sig=None): - """Terminates all subprocesses.""" # Use SIGKILL as default. In systems where that's unavailable such as # windows, use SIGTERM. sig = sig or getattr(signal, 'SIGKILL', signal.SIGTERM) for (task_type, task_id), p in self._processes.items(): + if p.exitcode is not None: + continue try: os.kill(p.pid, sig) + self._terminated.add((task_type, task_id)) logging.info('%s-%d terminated with signal %r.', task_type, task_id, sig) except ProcessLookupError: logging.info('Attempting to kill %s-%d but it does not exist.', task_type, task_id) - self._all_forced_terminated = True + + def terminate_all(self, sig=None): + """Terminates all subprocesses.""" + with self._process_lock: + self._terminate_all(sig) class _Process(multi_process_lib.Process): @@ -625,11 +735,13 @@ class _ProcFunc(object): time.sleep(0.1) self._resources.process_status_queue.put( _ProcessStatusInfo( + task_type=task_type, + task_id=task_id, is_successful=True, exc_info=None, return_value=None)) - # `os._exit(0)` is used to more reliably terminate a subprocess. - os._exit(0) # pylint: disable=protected-access + # `os._exit(1)` is used to more reliably terminate a subprocess. + os._exit(1) # pylint: disable=protected-access def _close_streaming(self): """Close stdout, stderr and streaming pipe. @@ -685,7 +797,8 @@ class _ProcFunc(object): v2_compat.enable_v2_behavior() with self._runtime_mode(test_env.executing_eagerly): - info = _run_contained(proc_func, args, kwargs) + info = _run_contained(test_env.task_type, test_env.task_id, proc_func, + args, kwargs) self._resources.process_status_queue.put(info) # Re-raise the exception in addition to reporting it to the parent @@ -774,7 +887,7 @@ class MultiProcessPoolRunner(object): task_type, task_id, proc_func=_pool_runner_worker, - args=(initializer, conn2)) + args=(task_type, task_id, initializer, conn2)) def run(self, proc_func, args=None, kwargs=None): """Runs `proc_func` with `args` and `kwargs` on all jobs. @@ -819,7 +932,7 @@ class MultiProcessPoolRunner(object): return return_values -def _pool_runner_worker(initializer, conn): +def _pool_runner_worker(task_type, task_id, initializer, conn): """Function that runs on the workers in a pool. It listens for callables to run and returns the result until `conn` is closed. @@ -827,8 +940,10 @@ def _pool_runner_worker(initializer, conn): `conn`. Args: - initializer: A callable to execute during startup. - conn: A multiprocessing.Connection object to listen for tasks and send + task_type: the task type. + task_id: the task index. + initializer: a callable to execute during startup. + conn: a multiprocessing.Connection object to listen for tasks and send results. """ if initializer: @@ -840,22 +955,24 @@ def _pool_runner_worker(initializer, conn): except EOFError: break proc_func = dill.loads(proc_func) - info = _run_contained(proc_func, args, kwargs) + info = _run_contained(task_type, task_id, proc_func, args, kwargs) sys.stdout.flush() sys.stderr.flush() conn.send(info) -def _run_contained(proc_func, args, kwargs): +def _run_contained(task_type, task_id, proc_func, args, kwargs): """Runs `proc_func` with `args` and `kwargs`. The function returns _ProcessStatusInfo which captures the return value and the exception. Args: - proc_func: The function to be run. - args: Optional positional arguments to be supplied in `proc_func`. - kwargs: Optional keyword arguments to be supplied in `proc_func`. + task_type: the task type. + task_id: the task index. + proc_func: the function to be run. + args: optional positional arguments to be supplied in `proc_func`. + kwargs: optional keyword arguments to be supplied in `proc_func`. Returns: a _ProcessStatusInfo. @@ -868,6 +985,8 @@ def _run_contained(proc_func, args, kwargs): return_value = proc_func(*args, **kwargs) is_successful = True return _ProcessStatusInfo( + task_type=task_type, + task_id=task_id, is_successful=is_successful, exc_info=exc_info, return_value=return_value) @@ -877,6 +996,8 @@ def _run_contained(proc_func, args, kwargs): except Exception: # pylint: disable=broad-except exc_info = sys.exc_info() return _ProcessStatusInfo( + task_type=task_type, + task_id=task_id, is_successful=is_successful, exc_info=exc_info, return_value=return_value) diff --git a/tensorflow/python/distribute/multi_process_runner_test.py b/tensorflow/python/distribute/multi_process_runner_test.py index c6266a5be26..0aa214d3ca4 100644 --- a/tensorflow/python/distribute/multi_process_runner_test.py +++ b/tensorflow/python/distribute/multi_process_runner_test.py @@ -156,11 +156,8 @@ class MultiProcessRunnerTest(test.TestCase): mpr.start() time.sleep(5) mpr.terminate('worker', 0) - with self.assertRaises( - multi_process_runner.UnexpectedSubprocessExitError) as cm: - mpr.join() - std_stream_results = cm.exception.mpr_result.stdout + std_stream_results = mpr.join().stdout # Worker 0 is terminated in the middle, so it should not have iteration 9 # printed. @@ -388,6 +385,99 @@ class MultiProcessRunnerTest(test.TestCase): 'Subprocess worker-0 exited with exit code 10'): mpr.join() + def test_auto_restart(self): + + def proc_func(counter): + counter.value += 1 + if counter.value == 1: + raise ValueError + + manager = multi_process_runner.manager() + counter = manager.Value(int, 0) + mpr = multi_process_runner.MultiProcessRunner( + proc_func, + multi_worker_test_base.create_cluster_spec(num_workers=1), + args=(counter,), + auto_restart=True) + mpr.start() + mpr.join() + self.assertEqual(counter.value, 2) + + def test_auto_restart_and_timeout(self): + + def proc_func(): + time.sleep(1) + raise ValueError + + mpr = multi_process_runner.MultiProcessRunner( + proc_func, + multi_worker_test_base.create_cluster_spec(num_workers=1), + auto_restart=True) + mpr.start() + with self.assertRaises(multi_process_runner.SubprocessTimeoutError): + mpr.join(timeout=10) + + def test_auto_restart_and_chief(self): + # If the chief has exited with zero exit code, auto restart should stop + # restarting other tasks even if they fail. + + def proc_func(): + time.sleep(1) + if multi_worker_test_base.get_task_type() != 'chief': + raise ValueError + + manager = multi_process_runner.manager() + mpr = multi_process_runner.MultiProcessRunner( + proc_func, + multi_worker_test_base.create_cluster_spec( + has_chief=True, num_workers=1), + auto_restart=True) + mpr.start() + with self.assertRaises(ValueError): + mpr.join(timeout=10) + + def test_auto_restart_failure_immediate_after_restart(self): + # Test the case when worker-0 fails immediately after worker-1 restarts. + + def proc_func(): + time.sleep(5) + + mpr = multi_process_runner.MultiProcessRunner( + proc_func, + multi_worker_test_base.create_cluster_spec( + has_chief=False, num_workers=2), + auto_restart=True) + mpr.start() + pid = mpr.get_process_id('worker', 1) + mpr.terminate('worker', 1) + while mpr.get_process_id('worker', 1) == pid: + time.sleep(0.1) + mpr.terminate('worker', 0) + mpr.join(timeout=20) + + def test_auto_restart_terminate(self): + # Tasks terminated by the user should also be restarted. + + def proc_func(counter): + counter.value += 1 + if counter.value == 1: + time.sleep(100) + + manager = multi_process_runner.manager() + counter = manager.Value(int, 0) + + mpr = multi_process_runner.MultiProcessRunner( + proc_func, + multi_worker_test_base.create_cluster_spec( + has_chief=False, num_workers=1), + args=(counter,), + auto_restart=True) + mpr.start() + time.sleep(3) + mpr.terminate('worker', 0) + mpr.join(timeout=20) + self.assertEqual(counter.value, 2) + class MultiProcessPoolRunnerTest(test.TestCase): From 6dbc50195e516d6d1144c6eb29184d75e2a1da1e Mon Sep 17 00:00:00 2001 From: Robert David Date: Mon, 3 Aug 2020 14:22:57 -0700 Subject: [PATCH 2000/2522] Automatic readability finding fixes. PiperOrigin-RevId: 324682656 Change-Id: I009a949fbf08eeb5e45d19d8a1918a988960311b --- .../lite/delegates/gpu/gl/object_manager.cc | 11 +++++------ .../delegates/gpu/metal/kernels/elementwise.cc | 2 +- .../delegates/gpu/metal/kernels/max_unpooling.cc | 16 +++++++--------- .../lite/delegates/gpu/metal/kernels/mean.cc | 3 +-- .../lite/delegates/gpu/metal/kernels/winograd.cc | 3 +-- 5 files changed, 15 insertions(+), 20 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/gl/object_manager.cc b/tensorflow/lite/delegates/gpu/gl/object_manager.cc index c37be507b2b..ba48b7323a9 100644 --- a/tensorflow/lite/delegates/gpu/gl/object_manager.cc +++ b/tensorflow/lite/delegates/gpu/gl/object_manager.cc @@ -40,12 +40,11 @@ absl::Status CreatePHWC4BufferFromTensorRef(const TensorRef& tensor_ref, absl::Status CopyFromPHWC4Buffer(const GlBuffer& buffer, TensorFloat32* tensor) { - return buffer.MappedRead( - [tensor, &buffer](absl::Span data) { - tensor->data.resize(tensor->shape.DimensionsProduct()); - return ConvertFromPHWC4(absl::MakeConstSpan(data), tensor->shape, - absl::MakeSpan(tensor->data)); - }); + return buffer.MappedRead([tensor](absl::Span data) { + tensor->data.resize(tensor->shape.DimensionsProduct()); + return ConvertFromPHWC4(absl::MakeConstSpan(data), tensor->shape, + absl::MakeSpan(tensor->data)); + }); } absl::Status ObjectManager::RegisterBuffer(uint32_t id, GlBuffer buffer) { diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/elementwise.cc b/tensorflow/lite/delegates/gpu/metal/kernels/elementwise.cc index 7bac1402fd2..53c1c5b38dd 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/elementwise.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/elementwise.cc @@ -115,7 +115,7 @@ std::vector ElementwiseWithTwoInputs( desc->uniform_buffers = { {"constant int2&", - [input_ids, output_id](const std::map& buffers) { + [input_ids](const std::map& buffers) { const auto& input_dim_1 = buffers.find(input_ids[1])->second; std::vector uniform_params{ input_dim_1.w, diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/max_unpooling.cc b/tensorflow/lite/delegates/gpu/metal/kernels/max_unpooling.cc index d0e326baf2c..39b4c8fde0e 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/max_unpooling.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/max_unpooling.cc @@ -99,17 +99,15 @@ std::vector MaxUnpooling( {input_indices_id, "device FLT4* const src_indices_buffer"}, }; - desc->output_buffer = {output_id, "device FLT4* output_buffer", - [input_id, input_indices_id, - params](const std::map& buffers) { - return CalculateOutputShape( - buffers.find(input_id)->second, params); - }}; + desc->output_buffer = { + output_id, "device FLT4* output_buffer", + [input_id, params](const std::map& buffers) { + return CalculateOutputShape(buffers.find(input_id)->second, params); + }}; desc->uniform_buffers = { {"constant uniforms& params", - [input_id, input_indices_id, output_id, - params](const std::map& buffers) { + [input_id, output_id, params](const std::map& buffers) { const auto& dimension = buffers.find(input_id)->second; const auto& output_dimension = buffers.find(output_id)->second; std::vector uniform_params{ @@ -126,7 +124,7 @@ std::vector MaxUnpooling( }}, }; - desc->resize_function = [input_id, input_indices_id, + desc->resize_function = [input_id, params](const std::map& buffers) { const auto& src_shape = buffers.find(input_id)->second; BHWC dst_shape = CalculateOutputShape(src_shape, params); diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/mean.cc b/tensorflow/lite/delegates/gpu/metal/kernels/mean.cc index 431b1e5d6db..d67c9e7f275 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/mean.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/mean.cc @@ -130,8 +130,7 @@ std::vector Mean(int id, ValueId input_id, }}; desc->uniform_buffers = { {"constant uniforms& params", - [input_id, output_id, - work_group_size](const std::map& buffers) { + [input_id, work_group_size](const std::map& buffers) { const auto& src_shape = buffers.find(input_id)->second; const int src_slices = DivideRoundUp(src_shape.c, 4); struct uniforms { diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/winograd.cc b/tensorflow/lite/delegates/gpu/metal/kernels/winograd.cc index 2098155888d..d62c6a7fcbe 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/winograd.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/winograd.cc @@ -613,8 +613,7 @@ std::vector Winograd4x4To36TileX6( }}, }; - desc->resize_function = [output_id, - attr](const std::map& buffers) { + desc->resize_function = [output_id](const std::map& buffers) { const uint3 groups_size{4, 6, 1}; const auto& dst_shape = buffers.find(output_id)->second; int grid_x = dst_shape.w; From a4ff9ed01fa5299f17c560fbe05891e31f5e71e5 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Mon, 3 Aug 2020 14:34:06 -0700 Subject: [PATCH 2001/2522] Add CI job name to sizetracker table schema PiperOrigin-RevId: 324684951 Change-Id: Iab9f8fdf6c54f9001beeeaeaee87156abcab93f5 --- tensorflow/tools/ci_build/sizetrack_helper.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tensorflow/tools/ci_build/sizetrack_helper.py b/tensorflow/tools/ci_build/sizetrack_helper.py index 675a16d9a97..032dbdf7490 100755 --- a/tensorflow/tools/ci_build/sizetrack_helper.py +++ b/tensorflow/tools/ci_build/sizetrack_helper.py @@ -52,6 +52,7 @@ from __future__ import print_function import argparse import csv import datetime +import os import os.path import platform import subprocess @@ -92,6 +93,10 @@ parser.add_argument( "--dry_run", action="store_true", help="Dry run: do not load to BigQuery or upload to GCS.") +parser.add_argument( + "--job", + type=str, + help="Name of job calling this script. Default: $KOKORO_JOB_NAME.") parser.add_argument( "--print_schema", action="store_true", @@ -140,6 +145,7 @@ SCHEMA = ",".join([ "team:string", "logged_date:timestamp", "uploaded_to:string", + "job:string", ]) # Select the earliest recorded commit in the same table for the same artifact # and team. Used to determine the full range of tested commits for each @@ -313,6 +319,7 @@ def build_row(): FLAGS.team, current_time, get_upload_path(), + FLAGS.job, ] @@ -330,6 +337,9 @@ def main(): "\nPass -h or --help for usage.") exit(1) + if not FLAGS.job: + FLAGS.job = os.environ.get("KOKORO_JOB_NAME", "NO_JOB") + # Generate data about this artifact into a Tab Separated Value file next_tsv_row = build_row() From d84acd6e45d5c33743d032885e4f5ee727f57db8 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Mon, 3 Aug 2020 14:43:36 -0700 Subject: [PATCH 2002/2522] Remove unused symbols in vars_test. PiperOrigin-RevId: 324686959 Change-Id: If5a8d2ccf6d4baa4e1f19d83a2d54d359c6e6514 --- tensorflow/python/distribute/vars_test.py | 28 ----------------------- 1 file changed, 28 deletions(-) diff --git a/tensorflow/python/distribute/vars_test.py b/tensorflow/python/distribute/vars_test.py index 98d0c1bb2d2..efbb6c23aaa 100644 --- a/tensorflow/python/distribute/vars_test.py +++ b/tensorflow/python/distribute/vars_test.py @@ -26,7 +26,6 @@ from tensorflow.python.distribute import combinations from tensorflow.python.distribute import distribution_strategy_context from tensorflow.python.distribute import strategy_combinations from tensorflow.python.distribute import tpu_strategy -from tensorflow.python.distribute import tpu_values from tensorflow.python.distribute import values from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver from tensorflow.python.eager import context @@ -664,26 +663,6 @@ class OnWriteVariableSyncScatterTests(test.TestCase, parameterized.TestCase): self.assertAllEqual([1, 1, 1], self.evaluate(v2.read_value())) -def _make_replica_local(method, strategy=None): - if strategy is None: - devices = ("/device:GPU:0", "/device:CPU:0") - else: - devices = strategy.extended.worker_devices - - v = [] - for d, n, init in zip(devices, ["v", "v/replica"], [1., 2.]): - with ops.device(d): - v.append(variable_scope.get_variable( - name=n, initializer=init, use_resource=True)) - - if (strategy is not None) and isinstance(strategy, _TPU_STRATEGIES): - var_cls = tpu_values.TPUSyncOnReadVariable - else: - var_cls = values.SyncOnReadVariable - replica_local = var_cls(strategy, v, method) - return v, replica_local - - class OnReadVariableSyncTest(test.TestCase, parameterized.TestCase): @combinations.generate(strategy_and_run_tf_function_combinations()) @@ -1258,12 +1237,5 @@ class SyncOnReadScatterReplicaTest(test.TestCase, parameterized.TestCase): self.evaluate(distribution.run(v.scatter_min, args=(delta,))) -def _make_index_slices(vals, indices, dense_shape=None): - if dense_shape: - dense_shape = array_ops.identity(dense_shape) - return indexed_slices.IndexedSlices( - array_ops.identity(vals), array_ops.identity(indices), dense_shape) - - if __name__ == "__main__": test.main() From 64c753b9ee94b11493abfceefd234fa6b8497f71 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Mon, 3 Aug 2020 14:49:34 -0700 Subject: [PATCH 2003/2522] When a non-KerasTensor Tensor is passed to the `tensor` argument of keras.Input or InputLayer, make a KerasTensor directly from that tensor rather than erroring out. PiperOrigin-RevId: 324688220 Change-Id: I2b06682f8ea706be4e36e0b8807c0f07bec55a4e --- tensorflow/python/keras/engine/BUILD | 17 ++ tensorflow/python/keras/engine/functional.py | 5 +- tensorflow/python/keras/engine/input_layer.py | 24 ++- .../python/keras/engine/input_layer_test.py | 148 ++++++++++++++++++ 4 files changed, 179 insertions(+), 15 deletions(-) create mode 100644 tensorflow/python/keras/engine/input_layer_test.py diff --git a/tensorflow/python/keras/engine/BUILD b/tensorflow/python/keras/engine/BUILD index c71069b3657..0d2ddb46049 100644 --- a/tensorflow/python/keras/engine/BUILD +++ b/tensorflow/python/keras/engine/BUILD @@ -545,6 +545,23 @@ tf_py_test( ], ) +tf_py_test( + name = "input_layer_test", + size = "medium", + srcs = ["input_layer_test.py"], + python_version = "PY3", + shard_count = 3, + tags = [ + "nomac", # TODO(mihaimaruseac): b/127695564 + ], + deps = [ + ":base_layer", + ":engine", + "//tensorflow/python/keras:testing_utils", + "//tensorflow/python/keras/utils:layer_utils", + ], +) + tf_py_test( name = "functional_test", size = "medium", diff --git a/tensorflow/python/keras/engine/functional.py b/tensorflow/python/keras/engine/functional.py index 71d6faa71b6..8422bf923d8 100644 --- a/tensorflow/python/keras/engine/functional.py +++ b/tensorflow/python/keras/engine/functional.py @@ -135,8 +135,9 @@ class Functional(training_lib.Model): (isinstance(self._nested_inputs, (list, tuple, dict)) and not any(nest.is_nested(t) for t in self._nested_inputs))) - if any(not hasattr(tensor, '_keras_history') for tensor in self.outputs): - base_layer_utils.create_keras_history(self._nested_outputs) + if not keras_tensor.keras_tensors_enabled(): + if any(not hasattr(tensor, '_keras_history') for tensor in self.outputs): + base_layer_utils.create_keras_history(self._nested_outputs) self._validate_graph_inputs_and_outputs() diff --git a/tensorflow/python/keras/engine/input_layer.py b/tensorflow/python/keras/engine/input_layer.py index 4818c5c59a7..33f9320e516 100644 --- a/tensorflow/python/keras/engine/input_layer.py +++ b/tensorflow/python/keras/engine/input_layer.py @@ -76,8 +76,9 @@ class InputLayer(base_layer.Layer): batch_size: Optional input batch size (integer or None). dtype: Optional datatype of the input. When not provided, the Keras default float type will be used. - input_tensor: Optional tensor to use as layer input - instead of creating a placeholder. + input_tensor: Optional tensor to use as layer input. If set, the layer + will use the `tf.TypeSpec` of this tensor rather + than creating a new placeholder tensor. sparse: Boolean, whether the placeholder created is meant to be sparse. Default to False. ragged: Boolean, whether the placeholder created is meant to be ragged. @@ -162,19 +163,15 @@ class InputLayer(base_layer.Layer): self.is_placeholder = True self._batch_input_shape = batch_input_shape else: - raise_eager_tensor_error = False if keras_tensor.keras_tensors_enabled(): - if (not isinstance(input_tensor, keras_tensor.KerasTensor) and - not tf_utils.is_symbolic_tensor(input_tensor)): - raise_eager_tensor_error = True + if not isinstance(input_tensor, keras_tensor.KerasTensor): + input_tensor = keras_tensor.keras_tensor_from_tensor(input_tensor) else: if not tf_utils.is_symbolic_tensor(input_tensor): - raise_eager_tensor_error = True - if raise_eager_tensor_error: - raise ValueError('You should not pass an EagerTensor to `Input`. ' - 'For example, instead of creating an ' - 'InputLayer, you should instantiate your model and ' - 'directly call it on your input.') + raise ValueError('You should not pass an EagerTensor to `Input`. ' + 'For example, instead of creating an ' + 'InputLayer, you should instantiate your model and ' + 'directly call it on your input.') self.is_placeholder = False try: self._batch_input_shape = tuple(input_tensor.shape.as_list()) @@ -245,7 +242,8 @@ def Input( # pylint: disable=invalid-name if `sparse` is False, sparse tensors can still be passed into the input - they will be densified with a default value of 0. tensor: Optional existing tensor to wrap into the `Input` layer. - If set, the layer will not create a placeholder tensor. + If set, the layer will use the `tf.TypeSpec` of this tensor rather + than creating a new placeholder tensor. ragged: A boolean specifying whether the placeholder to be created is ragged. Only one of 'ragged' and 'sparse' can be True. In this case, values of 'None' in the 'shape' argument represent ragged dimensions. diff --git a/tensorflow/python/keras/engine/input_layer_test.py b/tensorflow/python/keras/engine/input_layer_test.py new file mode 100644 index 00000000000..1b15f34458c --- /dev/null +++ b/tensorflow/python/keras/engine/input_layer_test.py @@ -0,0 +1,148 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#,============================================================================ +"""Tests for InputLayer construction.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.eager import def_function +from tensorflow.python.keras import combinations +from tensorflow.python.keras import keras_parameterized +from tensorflow.python.keras import testing_utils +from tensorflow.python.keras.engine import functional +from tensorflow.python.keras.engine import input_layer as input_layer_lib +from tensorflow.python.ops import array_ops +from tensorflow.python.ops.ragged import ragged_tensor +from tensorflow.python.platform import test + + +class InputLayerTest(keras_parameterized.TestCase): + + @combinations.generate(combinations.combine(mode=['graph', 'eager'])) + def testBasicOutputShapeNoBatchSize(self): + # Create a Keras Input + x = input_layer_lib.Input(shape=(32,), name='input_a') + self.assertAllEqual(x.shape.as_list(), [None, 32]) + + # Verify you can construct and use a model w/ this input + model = functional.Functional(x, x * 2.0) + self.assertAllEqual(model(array_ops.ones((3, 32))), + array_ops.ones((3, 32)) * 2.0) + + @combinations.generate(combinations.combine(mode=['graph', 'eager'])) + def testBasicOutputShapeWithBatchSize(self): + # Create a Keras Input + x = input_layer_lib.Input(batch_size=6, shape=(32,), name='input_b') + self.assertAllEqual(x.shape.as_list(), [6, 32]) + + # Verify you can construct and use a model w/ this input + model = functional.Functional(x, x * 2.0) + self.assertAllEqual(model(array_ops.ones(x.shape)), + array_ops.ones(x.shape) * 2.0) + + @combinations.generate(combinations.combine(mode=['eager'])) + def testBasicOutputShapeNoBatchSizeInTFFunction(self): + model = None + @def_function.function + def run_model(inp): + nonlocal model + if not model: + # Create a Keras Input + x = input_layer_lib.Input(shape=(8,), name='input_a') + self.assertAllEqual(x.shape.as_list(), [None, 8]) + + # Verify you can construct and use a model w/ this input + model = functional.Functional(x, x * 2.0) + return model(inp) + + self.assertAllEqual(run_model(array_ops.ones((10, 8))), + array_ops.ones((10, 8)) * 2.0) + + @combinations.generate(combinations.combine(mode=['graph', 'eager'])) + def testInputTensorArg(self): + with testing_utils.use_keras_tensors_scope(True): + # Create a Keras Input + x = input_layer_lib.Input(tensor=array_ops.zeros((7, 32))) + self.assertAllEqual(x.shape.as_list(), [7, 32]) + + # Verify you can construct and use a model w/ this input + model = functional.Functional(x, x * 2.0) + self.assertAllEqual(model(array_ops.ones(x.shape)), + array_ops.ones(x.shape) * 2.0) + + @combinations.generate(combinations.combine(mode=['eager'])) + def testInputTensorArgInTFFunction(self): + with testing_utils.use_keras_tensors_scope(True): + # We use a mutable model container instead of a model python variable, + # because python 2.7 does not have `nonlocal` + model_container = {} + + @def_function.function + def run_model(inp): + if not model_container: + # Create a Keras Input + x = input_layer_lib.Input(tensor=array_ops.zeros((10, 16))) + self.assertAllEqual(x.shape.as_list(), [10, 16]) + + # Verify you can construct and use a model w/ this input + model_container['model'] = functional.Functional(x, x * 3.0) + return model_container['model'](inp) + + self.assertAllEqual(run_model(array_ops.ones((10, 16))), + array_ops.ones((10, 16)) * 3.0) + + @combinations.generate(combinations.combine(mode=['eager'])) + def testCompositeInputTensorArg(self): + with testing_utils.use_keras_tensors_scope(True): + # Create a Keras Input + rt = ragged_tensor.RaggedTensor.from_row_splits( + values=[3, 1, 4, 1, 5, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8]) + x = input_layer_lib.Input(tensor=rt) + + # Verify you can construct and use a model w/ this input + model = functional.Functional(x, x * 2) + + # And that the model works + rt = ragged_tensor.RaggedTensor.from_row_splits( + values=[3, 21, 4, 1, 53, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8]) + self.assertAllEqual(model(rt), rt * 2) + + @combinations.generate(combinations.combine(mode=['eager'])) + def testCompositeInputTensorArgInTFFunction(self): + with testing_utils.use_keras_tensors_scope(True): + # We use a mutable model container instead of a model python variable, + # because python 2.7 does not have `nonlocal` + model_container = {} + + @def_function.function + def run_model(inp): + if not model_container: + # Create a Keras Input + rt = ragged_tensor.RaggedTensor.from_row_splits( + values=[3, 1, 4, 1, 5, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8]) + x = input_layer_lib.Input(tensor=rt) + + # Verify you can construct and use a model w/ this input + model_container['model'] = functional.Functional(x, x * 3) + return model_container['model'](inp) + + # And verify the model works + rt = ragged_tensor.RaggedTensor.from_row_splits( + values=[3, 21, 4, 1, 53, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8]) + self.assertAllEqual(run_model(rt), rt * 3) + +if __name__ == '__main__': + test.main() From 3d273680d5428cc7426b78f129f4d69ba2dd0bb2 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Mon, 3 Aug 2020 14:38:10 -0700 Subject: [PATCH 2004/2522] Adding a macro for exposing base class methods to FileSystem derived classes to workaround name-hiding issue. --- tensorflow/core/platform/file_system.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tensorflow/core/platform/file_system.h b/tensorflow/core/platform/file_system.h index b2086b5968e..77ba34bcdc6 100644 --- a/tensorflow/core/platform/file_system.h +++ b/tensorflow/core/platform/file_system.h @@ -691,6 +691,32 @@ class WrappedFileSystem : public FileSystem { TransactionToken* token_; }; +/// This macro adds forwarding methods from FileSystem class to +/// used class since name hiding will prevent these to be accessed from +/// derived classes and would require all use locations to migrate to +/// Transactional API. This is an interim solution until ModularFileSystem class +/// becomes a singleton. +// TODO(sami): Remove this macro when filesystem plugins migration is complete. + +#define TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT \ + using FileSystem::NewRandomAccessFile; \ + using FileSystem::NewWritableFile; \ + using FileSystem::NewAppendableFile; \ + using FileSystem::NewReadOnlyMemoryRegionFromFile; \ + using FileSystem::FileExists; \ + using FileSystem::GetChildren; \ + using FileSystem::GetMatchingPaths; \ + using FileSystem::Stat; \ + using FileSystem::DeleteFile; \ + using FileSystem::RecursivelyCreateDir; \ + using FileSystem::DeleteDir; \ + using FileSystem::DeleteRecursively; \ + using FileSystem::GetFileSize; \ + using FileSystem::RenameFile; \ + using FileSystem::CopyFile; \ + using FileSystem::IsDirectory; \ + using FileSystem::FlushCaches + /// A file abstraction for randomly reading the contents of a file. class RandomAccessFile { public: From 2e680a5dde23f242ee1cb45a627898e3746b877d Mon Sep 17 00:00:00 2001 From: Jose Baiocchi Date: Mon, 3 Aug 2020 14:52:03 -0700 Subject: [PATCH 2005/2522] Remove ProfilerSession::Create with no arguments PiperOrigin-RevId: 324688779 Change-Id: Ia285b30de66e04ad3e6286c315a4563420287cf8 --- tensorflow/core/distributed_runtime/worker.cc | 4 +++- tensorflow/core/profiler/lib/BUILD | 1 - .../core/profiler/lib/profiler_session.cc | 22 +++++-------------- .../core/profiler/lib/profiler_session.h | 3 +-- 4 files changed, 9 insertions(+), 21 deletions(-) diff --git a/tensorflow/core/distributed_runtime/worker.cc b/tensorflow/core/distributed_runtime/worker.cc index 5212f51d491..c4dc51ce47d 100644 --- a/tensorflow/core/distributed_runtime/worker.cc +++ b/tensorflow/core/distributed_runtime/worker.cc @@ -198,7 +198,9 @@ void Worker::DoRunGraph(CallOptions* opts, RunGraphRequestWrapper* request, ProfilerSession* profiler_session = nullptr; if (collector && request->exec_opts().record_timeline()) { // If timeline was requested, assume we want hardware level tracing. - profiler_session = ProfilerSession::Create().release(); + ProfileOptions options = ProfilerSession::DefaultOptions(); + options.set_host_tracer_level(0); + profiler_session = ProfilerSession::Create(options).release(); } CancellationManager* cm = new CancellationManager; opts->SetCancelCallback([this, cm, step_id]() { diff --git a/tensorflow/core/profiler/lib/BUILD b/tensorflow/core/profiler/lib/BUILD index 0f92ffd5a70..57a3fa8a586 100644 --- a/tensorflow/core/profiler/lib/BUILD +++ b/tensorflow/core/profiler/lib/BUILD @@ -46,7 +46,6 @@ cc_library( ], deps = [ "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", "//tensorflow/core/platform", "//tensorflow/core/profiler/internal:profiler_interface", "//tensorflow/core/profiler/protobuf:xplane_proto_cc", diff --git a/tensorflow/core/profiler/lib/profiler_session.cc b/tensorflow/core/profiler/lib/profiler_session.cc index 90857ea8b51..ee6eb55300e 100644 --- a/tensorflow/core/profiler/lib/profiler_session.cc +++ b/tensorflow/core/profiler/lib/profiler_session.cc @@ -29,7 +29,6 @@ limitations under the License. #include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/protobuf/config.pb.h" #include "tensorflow/core/protobuf/error_codes.pb.h" -#include "tensorflow/core/util/env_var.h" #if !defined(IS_MOBILE_PLATFORM) #include "tensorflow/core/profiler/internal/profiler_factory.h" @@ -41,31 +40,20 @@ limitations under the License. #endif namespace tensorflow { - namespace { + ProfileOptions GetOptions(const ProfileOptions& opts) { if (opts.version()) return opts; ProfileOptions options = ProfilerSession::DefaultOptions(); options.set_include_dataset_ops(opts.include_dataset_ops()); return options; } + }; // namespace /*static*/ std::unique_ptr ProfilerSession::Create( const ProfileOptions& options) { - return absl::WrapUnique(new ProfilerSession(options)); -} - -/*static*/ std::unique_ptr ProfilerSession::Create() { - int64 host_tracer_level = 2; - tensorflow::Status s = ReadInt64FromEnvVar("TF_PROFILER_HOST_TRACER_LEVEL", 2, - &host_tracer_level); - if (!s.ok()) { - LOG(WARNING) << "ProfilerSession: " << s.error_message(); - } - ProfileOptions options = DefaultOptions(); - options.set_host_tracer_level(host_tracer_level); - return Create(options); + return absl::WrapUnique(new ProfilerSession(GetOptions(options))); } tensorflow::Status ProfilerSession::Status() { @@ -141,14 +129,14 @@ Status ProfilerSession::CollectData(RunMetadata* run_metadata) { return Status::OK(); } -ProfilerSession::ProfilerSession(const ProfileOptions& options) +ProfilerSession::ProfilerSession(ProfileOptions options) #if !defined(IS_MOBILE_PLATFORM) : active_(profiler::AcquireProfilerLock()), #else : active_(false), #endif start_time_ns_(EnvTime::NowNanos()), - options_(GetOptions(options)) { + options_(std::move(options)) { if (!active_) { #if !defined(IS_MOBILE_PLATFORM) status_ = tensorflow::Status(error::UNAVAILABLE, diff --git a/tensorflow/core/profiler/lib/profiler_session.h b/tensorflow/core/profiler/lib/profiler_session.h index 6f92b047eb7..93541f501ce 100644 --- a/tensorflow/core/profiler/lib/profiler_session.h +++ b/tensorflow/core/profiler/lib/profiler_session.h @@ -40,7 +40,6 @@ class ProfilerSession { public: // Creates and ProfilerSession and starts profiling. static std::unique_ptr Create(const ProfileOptions& options); - static std::unique_ptr Create(); static ProfileOptions DefaultOptions() { ProfileOptions options; @@ -67,7 +66,7 @@ class ProfilerSession { private: // Constructs an instance of the class and starts profiling - explicit ProfilerSession(const ProfileOptions& options); + explicit ProfilerSession(ProfileOptions options); // ProfilerSession is neither copyable or movable. ProfilerSession(const ProfilerSession&) = delete; From fcb71ce45b544ff8702b8faca84a763b3c2ebb57 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 3 Aug 2020 15:04:48 -0700 Subject: [PATCH 2006/2522] Added beta parameter (as in https://research.google.com/pubs/archive/41159.pdf) to FTRL implementation for TPU embeddings. PiperOrigin-RevId: 324691573 Change-Id: I6d41c7d631e034ad95e12c6b1a3c24d4482e9171 --- RELEASE.md | 3 +++ .../core/protobuf/tpu/optimization_parameters.proto | 12 ++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 0c6b0f556e8..b0c785c7d68 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -111,6 +111,9 @@ * Math and Linear Algebra: * * TPU Enhancements: + * Added support for the `beta` parameter of the FTRL optimizer for TPU + embeddings. Users of other TensorFlow platforms can implement equivalent + behavior by adjusting the `l2` parameter. * * XLA Support: * diff --git a/tensorflow/core/protobuf/tpu/optimization_parameters.proto b/tensorflow/core/protobuf/tpu/optimization_parameters.proto index f29beb3bc48..53905a33a3b 100644 --- a/tensorflow/core/protobuf/tpu/optimization_parameters.proto +++ b/tensorflow/core/protobuf/tpu/optimization_parameters.proto @@ -85,10 +85,13 @@ message StochasticGradientDescentParameters {} // https://github.com/tensorflow/tensorflow/blob/6b6471f3ffb7f1fefe42d814aa5fb9ab7a535b58/tensorflow/core/kernels/training_ops.cc#L2646 // // The hyperparameters for FTRL are the same as for the Keras implementation, -// with some additions. When the multiply_linear_by_lr field is set to true, a -// modified formula is used for FTRL that treats the "linear" accumulator as -// being pre-multiplied by the learning rate (i.e., the accumulator named -// "linear" actually stores "linear * learning_rate"). Other than checkpoint +// with some additions. The "beta" parameter matches the behavior described in +// the second link above; "beta" / (2 * learning rate) should be added to "l2" +// to get equivalent behavior in the other TensorFlow implementations of this +// optimizer. When the multiply_linear_by_lr field is set to true, a modified +// formula is used for FTRL that treats the "linear" accumulator as being +// pre-multiplied by the learning rate (i.e., the accumulator named "linear" +// actually stores "linear * learning_rate"). Other than checkpoint // compatibility, this is mathematically equivalent for a static learning rate; // for a dynamic learning rate, it is nearly the same as long as the learning // rate does not change quickly. The benefit of setting multiply_linear_by_lr to @@ -98,6 +101,7 @@ message FtrlParameters { float l1 = 1; float l2 = 2; float lr_power = 3; + float beta = 7; bool multiply_linear_by_lr = 6; // Old initial accumulator parameters. From f292f31b57480d0b33f5c0feb5fb128e43c865dc Mon Sep 17 00:00:00 2001 From: Chuanhao Zhuge Date: Mon, 3 Aug 2020 15:22:17 -0700 Subject: [PATCH 2007/2522] Disabling benchmarkScanDefun for TFRT due to lack of MLIR lowering support. PiperOrigin-RevId: 324694962 Change-Id: I2398161dff9403ac115a031c5942f753daff7871 --- tensorflow/python/eager/benchmarks_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index d8d331a8fc7..93766d809f2 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -1260,6 +1260,8 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._run(scan, 100) + @test_util.disable_tfrt( + "tf.While not supported in TF to CoreRT lowing. b/162685874") def benchmarkScanDefun(self): elems = math_ops.range(1600) From f18d09553b2f26a07b0b5cd2ee96f68834fd3c10 Mon Sep 17 00:00:00 2001 From: Jiho Choi Date: Mon, 3 Aug 2020 15:31:34 -0700 Subject: [PATCH 2008/2522] Add element tracing for tf.data.experimental.parallel_interleave. PiperOrigin-RevId: 324696858 Change-Id: I099b9b8935a38e263bd24f008e123c0623432e40 --- .../core/kernels/data/experimental/BUILD | 2 ++ .../parallel_interleave_dataset_op.cc | 20 ++++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/data/experimental/BUILD b/tensorflow/core/kernels/data/experimental/BUILD index d2142ee69d1..bf28d175e6d 100644 --- a/tensorflow/core/kernels/data/experimental/BUILD +++ b/tensorflow/core/kernels/data/experimental/BUILD @@ -394,6 +394,8 @@ tf_kernel_library( "//tensorflow/core/kernels/data:captured_function", "//tensorflow/core/kernels/data:dataset_utils", "//tensorflow/core/kernels/data:name_utils", + "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/core/profiler/lib:traceme_encode", ], ) diff --git a/tensorflow/core/kernels/data/experimental/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/experimental/parallel_interleave_dataset_op.cc index 2167c5d9b98..9c344e01c6a 100644 --- a/tensorflow/core/kernels/data/experimental/parallel_interleave_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/parallel_interleave_dataset_op.cc @@ -31,6 +31,8 @@ limitations under the License. #include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/platform/blocking_counter.h" #include "tensorflow/core/platform/stringprintf.h" +#include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/core/profiler/lib/traceme_encode.h" namespace tensorflow { namespace data { @@ -323,6 +325,11 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { } *end_of_sequence = false; Status s = current_worker->outputs.front().status; + profiler::TraceMe traceme([&] { + return profiler::TraceMeEncode( + "ParallelInterleaveConsume", + {{"element_id", current_worker->outputs.front().id}}); + }); current_worker->outputs.front().output.swap(*out_tensors); current_worker->outputs.pop_front(); current_worker->cond_var.notify_one(); @@ -564,8 +571,10 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { Status status; // The buffered data element. std::vector output; + int64 id = -1; explicit OutputElem(const Status& s) : status(s) {} + OutputElem(const Status& s, int64 id) : status(s), id(id) {} }; // Worker threads operate on their relevant WorkerState structs. @@ -813,6 +822,14 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { worker_thread_states_[thread_index] .output_elem.output.empty() && !worker_thread_states_[thread_index].end_of_sequence) { + int64& id = worker_thread_states_[thread_index].output_elem.id; + profiler::TraceMe traceme( + [&] { + id = profiler::TraceMe::NewActivityId(); + return profiler::TraceMeEncode( + "ParallelInterleaveProduce", {{"element_id", id}}); + }, + profiler::kInfo); worker_thread_states_[thread_index].output_elem.status = worker_thread_states_[thread_index].iterator->GetNext( ctx.get(), @@ -856,7 +873,8 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { worker_thread_states_[thread_index].end_of_sequence = false; } else { workers_[thread_index].outputs.emplace_back( - worker_thread_states_[thread_index].output_elem.status); + worker_thread_states_[thread_index].output_elem.status, + worker_thread_states_[thread_index].output_elem.id); workers_[thread_index].outputs.back().output.swap( worker_thread_states_[thread_index].output_elem.output); } From a0f7e214ae4f79f959a96c106b2379c17ea3b60f Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Mon, 3 Aug 2020 15:34:22 -0700 Subject: [PATCH 2009/2522] Fixit for sequence feature column test. PiperOrigin-RevId: 324697411 Change-Id: Idd0568a80b4b3f82c5c676920a592154efdcc604 --- .../sequence_feature_column_test.py | 114 +++++++++--------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/tensorflow/python/feature_column/sequence_feature_column_test.py b/tensorflow/python/feature_column/sequence_feature_column_test.py index da6d1dee4ba..e98a202bc5a 100644 --- a/tensorflow/python/feature_column/sequence_feature_column_test.py +++ b/tensorflow/python/feature_column/sequence_feature_column_test.py @@ -516,7 +516,6 @@ class SequenceEmbeddingColumnTest( class SequenceSharedEmbeddingColumnTest(test.TestCase): - @test_util.run_deprecated_v1 def test_get_sequence_dense_tensor(self): vocabulary_size = 3 embedding_dimension = 2 @@ -532,67 +531,68 @@ class SequenceSharedEmbeddingColumnTest(test.TestCase): self.assertIsNone(partition_info) return embedding_values - sparse_input_a = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 1), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 2)) - sparse_input_b = sparse_tensor.SparseTensorValue( - # example 0, ids [1] - # example 1, ids [0, 2] - # example 2, ids [0] - # example 3, ids [] - indices=((0, 0), (1, 0), (1, 1), (2, 0)), - values=(1, 0, 2, 0), - dense_shape=(4, 2)) + with ops.Graph().as_default(): + sparse_input_a = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 1), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 2)) + sparse_input_b = sparse_tensor.SparseTensorValue( + # example 0, ids [1] + # example 1, ids [0, 2] + # example 2, ids [0] + # example 3, ids [] + indices=((0, 0), (1, 0), (1, 1), (2, 0)), + values=(1, 0, 2, 0), + dense_shape=(4, 2)) - expected_lookups_a = [ - # example 0, ids [2] - [[7., 11.], [0., 0.]], - # example 1, ids [0, 1] - [[1., 2.], [3., 5.]], - # example 2, ids [] - [[0., 0.], [0., 0.]], - # example 3, ids [1] - [[3., 5.], [0., 0.]], - ] + expected_lookups_a = [ + # example 0, ids [2] + [[7., 11.], [0., 0.]], + # example 1, ids [0, 1] + [[1., 2.], [3., 5.]], + # example 2, ids [] + [[0., 0.], [0., 0.]], + # example 3, ids [1] + [[3., 5.], [0., 0.]], + ] - expected_lookups_b = [ - # example 0, ids [1] - [[3., 5.], [0., 0.]], - # example 1, ids [0, 2] - [[1., 2.], [7., 11.]], - # example 2, ids [0] - [[1., 2.], [0., 0.]], - # example 3, ids [] - [[0., 0.], [0., 0.]], - ] + expected_lookups_b = [ + # example 0, ids [1] + [[3., 5.], [0., 0.]], + # example 1, ids [0, 2] + [[1., 2.], [7., 11.]], + # example 2, ids [0] + [[1., 2.], [0., 0.]], + # example 3, ids [] + [[0., 0.], [0., 0.]], + ] - categorical_column_a = sfc.sequence_categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - categorical_column_b = sfc.sequence_categorical_column_with_identity( - key='bbb', num_buckets=vocabulary_size) - shared_embedding_columns = fc.shared_embedding_columns_v2( - [categorical_column_a, categorical_column_b], - dimension=embedding_dimension, - initializer=_initializer) + categorical_column_a = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + categorical_column_b = sfc.sequence_categorical_column_with_identity( + key='bbb', num_buckets=vocabulary_size) + shared_embedding_columns = fc.shared_embedding_columns_v2( + [categorical_column_a, categorical_column_b], + dimension=embedding_dimension, + initializer=_initializer) - embedding_lookup_a = _get_sequence_dense_tensor( - shared_embedding_columns[0], {'aaa': sparse_input_a})[0] - embedding_lookup_b = _get_sequence_dense_tensor( - shared_embedding_columns[1], {'bbb': sparse_input_b})[0] + embedding_lookup_a = _get_sequence_dense_tensor( + shared_embedding_columns[0], {'aaa': sparse_input_a})[0] + embedding_lookup_b = _get_sequence_dense_tensor( + shared_embedding_columns[1], {'bbb': sparse_input_b})[0] - self.evaluate(variables_lib.global_variables_initializer()) - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertItemsEqual(('aaa_bbb_shared_embedding:0',), - tuple([v.name for v in global_vars])) - self.assertAllEqual(embedding_values, self.evaluate(global_vars[0])) - self.assertAllEqual( - expected_lookups_a, self.evaluate(embedding_lookup_a)) - self.assertAllEqual(expected_lookups_b, self.evaluate(embedding_lookup_b)) + self.evaluate(variables_lib.global_variables_initializer()) + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertItemsEqual(('aaa_bbb_shared_embedding:0',), + tuple([v.name for v in global_vars])) + self.assertAllEqual(embedding_values, self.evaluate(global_vars[0])) + self.assertAllEqual( + expected_lookups_a, self.evaluate(embedding_lookup_a)) + self.assertAllEqual(expected_lookups_b, self.evaluate(embedding_lookup_b)) def test_sequence_length(self): with ops.Graph().as_default(): From fc9c057b1ac7883551e72c833a5429f4bb6dc47a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 3 Aug 2020 15:36:31 -0700 Subject: [PATCH 2010/2522] Adds ParseShardingFromEdgeSource(). Makes DistributedTPURewritePass::AssignArgsAndRetvalsToCores() support TUPLE sharding for return values. PiperOrigin-RevId: 324697874 Change-Id: I3039da1731c9622ebeb0bf9c3b45185e220267af --- tensorflow/compiler/tf2xla/sharding_util.cc | 24 +++++++++++++++++++ tensorflow/compiler/tf2xla/sharding_util.h | 3 +++ .../distributed_tpu_rewrite_pass.cc | 20 ++++++++++------ .../distributed_tpu_rewrite_pass.h | 4 +++- tensorflow/core/tpu/kernels/BUILD | 1 - .../core/tpu/kernels/tpu_compile_op_common.cc | 2 +- 6 files changed, 44 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/tf2xla/sharding_util.cc b/tensorflow/compiler/tf2xla/sharding_util.cc index 366e8d49228..90585c9d98a 100644 --- a/tensorflow/compiler/tf2xla/sharding_util.cc +++ b/tensorflow/compiler/tf2xla/sharding_util.cc @@ -80,6 +80,30 @@ xla::StatusOr> ParseShardingFromDevice( return ParseShardingFromDevice(device_name, num_cores_per_replica, sharding); } +xla::StatusOr> ParseShardingFromEdgeSource( + const Edge& edge, int num_cores_per_replica) { + if (edge.src() == nullptr) { + return tensorflow::errors::InvalidArgument( + "Null src for ParseShardingFromEdgeSource edge=", edge.DebugString()); + } + TF_ASSIGN_OR_RETURN( + absl::optional sharding, + ParseShardingFromDevice(*edge.src(), num_cores_per_replica)); + if (sharding.has_value() && + sharding.value().type() == xla::OpSharding::TUPLE) { + if (edge.src_output() < 0 || + edge.src_output() >= sharding.value().tuple_shardings_size()) { + return tensorflow::errors::InvalidArgument( + "Tuple index out of bound: edge=", edge.DebugString(), + " sharding=", sharding->DebugString()); + } + absl::optional subsharding = + sharding.value().tuple_shardings(edge.src_output()); + return subsharding; + } + return sharding; +} + void SetShardingDeviceAssignmentFromNode(const Node& src, Node* dst) { string device_name = src.assigned_device_name(); if (device_name.empty()) { diff --git a/tensorflow/compiler/tf2xla/sharding_util.h b/tensorflow/compiler/tf2xla/sharding_util.h index 196434826f9..07657c656d3 100644 --- a/tensorflow/compiler/tf2xla/sharding_util.h +++ b/tensorflow/compiler/tf2xla/sharding_util.h @@ -43,6 +43,9 @@ xla::StatusOr> ParseShardingFromDevice( xla::StatusOr> ParseShardingFromDevice( const NodeDef& node_def, int num_cores_per_replica); +xla::StatusOr> ParseShardingFromEdgeSource( + const Edge& edge, int num_cores_per_replica); + void SetShardingDeviceAssignmentFromNode(const Node& src, Node* dst); // Get sharding inforamtion from node. diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc index 075a1ec9069..5fdc74b79fc 100644 --- a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc +++ b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc @@ -1813,7 +1813,8 @@ Status DistributedTPURewritePass::AssignArgsAndRetvalsToCores( } else if (sharding->type() != xla::OpSharding::REPLICATED && sharding->type() != xla::OpSharding::OTHER) { return tensorflow::errors::InvalidArgument( - "Unsupported argument sharding: ", sharding->DebugString()); + "Unsupported argument sharding (for arg ", n->DebugString(), + "): ", sharding->DebugString()); } if (assigned_core.has_value()) { args_device_selector.ReportDeviceAssigned(*assigned_core, i); @@ -1855,7 +1856,7 @@ Status DistributedTPURewritePass::AssignArgsAndRetvalsToCores( TF_ASSIGN_OR_RETURN( absl::optional sharding, - ParseShardingFromDevice(*edge->src(), num_cores_per_replica)); + ParseShardingFromEdgeSource(*edge, num_cores_per_replica)); if (partitioned_output_nodes.contains(i)) { Node* output_node = partitioned_output_nodes[i]; @@ -1883,7 +1884,9 @@ Status DistributedTPURewritePass::AssignArgsAndRetvalsToCores( } else if (sharding.value().type() != xla::OpSharding::REPLICATED && sharding.value().type() != xla::OpSharding::OTHER) { return tensorflow::errors::InvalidArgument( - "Unsupported argument sharding: ", sharding->DebugString()); + "Unsupported argument sharding for retval ", + retvals[i]->DebugString(), " edge=", edge->DebugString(), ": ", + sharding->DebugString()); } } else { if (use_spmd) { @@ -2472,7 +2475,8 @@ xla::StatusOr CreateOrGetPerHostVariableCopy( Status DistributedTPURewritePass::BuildExecuteNodes( const ParameterInfo& params_info, int num_tasks, int num_cores_per_replica, - const Node& replicate_node, const DataTypeVector& arg_types, + const Node& replicate_node, const std::vector& arg_names, + const DataTypeVector& arg_types, const std::vector& arg_shapes, const DataTypeVector& retval_types, const std::vector& arg_shardings, @@ -2595,7 +2599,9 @@ Status DistributedTPURewritePass::BuildExecuteNodes( } } else { return tensorflow::errors::InvalidArgument( - "Unsupported argument sharding: ", sharding.DebugString()); + "Unsupported argument sharding for arg=", arg_names[i], + " shape=", arg_shapes[i].shape.DebugString(), ": ", + sharding.DebugString()); } } std::vector> core_retval_nums(num_cores_per_replica); @@ -3922,8 +3928,8 @@ Status DistributedTPURewritePass::FingerprintFunctionLibrary( std::vector variable_writes; TF_RETURN_IF_ERROR(BuildExecuteNodes( - params_info, num_tasks, num_cores_per_replica, *replicate_node, arg_types, - arg_shapes, retval_types, arg_sharding, retval_sharding, + params_info, num_tasks, num_cores_per_replica, *replicate_node, arg_names, + arg_types, arg_shapes, retval_types, arg_sharding, retval_sharding, tf_device_assignment, compile_node, variable_reads, control_after_compilation, control_after, &variable_writes, graph)); bool contains_resource_write_op = diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h index 1931b4ac80f..a9692cc0edb 100644 --- a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h +++ b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.h @@ -413,9 +413,10 @@ class DistributedTPURewritePass : public GraphOptimizationPass { // * `num_cores_per_replica` is the number of cores which are dedicated to // each replica. // * `replicate_node` is the original TPUReplicate node. - // * `arg_types` are the types of the arguments to the computation function + // * `arg_names` are the names of the arguments to the computation function // passed as argument to TPUReplicate, including per-replica, // broadcast, and variable arguments. + // * `arg_types` are the corresponding types of the arguments. // * `arg_shapes` are the corresponding shapes (and handle types/shapes, if // applicable). // * `arg_shardings` and `retval_shardings` are mappings from @@ -431,6 +432,7 @@ class DistributedTPURewritePass : public GraphOptimizationPass { static Status BuildExecuteNodes( const ParameterInfo& params_info, int num_tasks, int num_cores_per_replica, const Node& replicate_node, + const std::vector& arg_names, const DataTypeVector& arg_types, const std::vector& arg_shapes, const DataTypeVector& retval_types, diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 6f74123131f..1336f52ed34 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -71,7 +71,6 @@ cc_library( "//tensorflow/core/tpu:tpu_api", "//tensorflow/core/tpu:tpu_configuration", "//tensorflow/core/tpu:tpu_defs", - "//tensorflow/stream_executor/tpu:status_helper", "//tensorflow/stream_executor/tpu:tpu_platform_interface", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:span", diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc index 8bd45db2206..ce18e844e66 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc @@ -117,7 +117,7 @@ Status SetPerCoreArgShapes( } else { TF_RET_CHECK(proto_arg.sharding().type() == xla::OpSharding::REPLICATED) << "Unsupported argument sharding: " - << proto_arg.sharding().DebugString(); + << " proto_arg=" << proto_arg.DebugString(); for (int core = 0; core < per_core_arg_shapes->size(); ++core) { (*arg_core_mapping)[arg_index].indices.push_back( (*per_core_arg_shapes)[core].size()); From 0e5562be23660c29f5625e750ea5afcba981dc9f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 3 Aug 2020 15:45:48 -0700 Subject: [PATCH 2011/2522] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 324699433 Change-Id: I980612c967b3d5948fe2cbf614060425a261091c --- tensorflow/go/op/wrappers.go | 1716 +++++++++++++++++----------------- 1 file changed, 858 insertions(+), 858 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 1bff193830a..34ff57636ca 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -27463,524 +27463,6 @@ func DecodePaddedRaw(scope *Scope, input_bytes tf.Output, fixed_length tf.Output return op.Output(0) } -// QuantizeV2Attr is an optional argument to QuantizeV2. -type QuantizeV2Attr func(optionalAttr) - -// QuantizeV2Mode sets the optional mode attribute to value. -// If not specified, defaults to "MIN_COMBINED" -func QuantizeV2Mode(value string) QuantizeV2Attr { - return func(m optionalAttr) { - m["mode"] = value - } -} - -// QuantizeV2RoundMode sets the optional round_mode attribute to value. -// If not specified, defaults to "HALF_AWAY_FROM_ZERO" -func QuantizeV2RoundMode(value string) QuantizeV2Attr { - return func(m optionalAttr) { - m["round_mode"] = value - } -} - -// QuantizeV2NarrowRange sets the optional narrow_range attribute to value. -// If not specified, defaults to false -func QuantizeV2NarrowRange(value bool) QuantizeV2Attr { - return func(m optionalAttr) { - m["narrow_range"] = value - } -} - -// QuantizeV2Axis sets the optional axis attribute to value. -// If not specified, defaults to -1 -func QuantizeV2Axis(value int64) QuantizeV2Attr { - return func(m optionalAttr) { - m["axis"] = value - } -} - -// QuantizeV2EnsureMinimumRange sets the optional ensure_minimum_range attribute to value. -// If not specified, defaults to 0.01 -func QuantizeV2EnsureMinimumRange(value float32) QuantizeV2Attr { - return func(m optionalAttr) { - m["ensure_minimum_range"] = value - } -} - -// Quantize the 'input' tensor of type float to 'output' tensor of type 'T'. -// -// [min_range, max_range] are scalar floats that specify the range for -// the 'input' data. The 'mode' attribute controls exactly which calculations are -// used to convert the float values to their quantized equivalents. The -// 'round_mode' attribute controls which rounding tie-breaking algorithm is used -// when rounding float values to their quantized equivalents. -// -// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: -// -// ``` -// out[i] = (in[i] - min_range) * range(T) / (max_range - min_range) -// if T == qint8: out[i] -= (range(T) + 1) / 2.0 -// ``` -// -// here `range(T) = numeric_limits::max() - numeric_limits::min()` -// -// *MIN_COMBINED Mode Example* -// -// Assume the input is type float and has a possible range of [0.0, 6.0] and the -// output type is quint8 ([0, 255]). The min_range and max_range values should be -// specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each -// value of the input by 255/6 and cast to quint8. -// -// If the output type was qint8 ([-128, 127]), the operation will additionally -// subtract each value by 128 prior to casting, so that the range of values aligns -// with the range of qint8. -// -// If the mode is 'MIN_FIRST', then this approach is used: -// -// ``` -// num_discrete_values = 1 << (# of bits in T) -// range_adjust = num_discrete_values / (num_discrete_values - 1) -// range = (range_max - range_min) * range_adjust -// range_scale = num_discrete_values / range -// quantized = round(input * range_scale) - round(range_min * range_scale) + -// numeric_limits::min() -// quantized = max(quantized, numeric_limits::min()) -// quantized = min(quantized, numeric_limits::max()) -// ``` -// -// The biggest difference between this and MIN_COMBINED is that the minimum range -// is rounded first, before it's subtracted from the rounded value. With -// MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing -// and dequantizing will introduce a larger and larger error. -// -// *SCALED mode Example* -// -// `SCALED` mode matches the quantization approach used in -// `QuantizeAndDequantize{V2|V3}`. -// -// If the mode is `SCALED`, the quantization is performed by multiplying each -// input value by a scaling_factor. -// The scaling_factor is determined from `min_range` and `max_range` to be as large -// as possible such that the range from `min_range` to `max_range` is representable -// within values of type T. -// -// ```c++ -// -// const int min_T = std::numeric_limits::min(); -// const int max_T = std::numeric_limits::max(); -// const float max_float = std::numeric_limits::max(); -// -// const float scale_factor_from_min_side = -// (min_T * min_range > 0) ? min_T / min_range : max_float; -// const float scale_factor_from_max_side = -// (max_T * max_range > 0) ? max_T / max_range : max_float; -// -// const float scale_factor = std::min(scale_factor_from_min_side, -// scale_factor_from_max_side); -// ``` -// -// We next use the scale_factor to adjust min_range and max_range as follows: -// -// ```c++ -// min_range = min_T / scale_factor; -// max_range = max_T / scale_factor; -// ``` -// -// -// e.g. if T = qint8, and initially min_range = -10, and max_range = 9, we would -// compare -128/-10.0 = 12.8 to 127/9.0 = 14.11, and set scaling_factor = 12.8 -// In this case, min_range would remain -10, but max_range would be adjusted to -// 127 / 12.8 = 9.921875 -// -// So we will quantize input values in the range (-10, 9.921875) to (-128, 127). -// -// The input tensor can now be quantized by clipping values to the range -// `min_range` to `max_range`, then multiplying by scale_factor as follows: -// -// ```c++ -// result = round(min(max_range, max(min_range, input)) * scale_factor) -// ``` -// -// The adjusted `min_range` and `max_range` are returned as outputs 2 and 3 of -// this operation. These outputs should be used as the range for any further -// calculations. -// -// -// *narrow_range (bool) attribute* -// -// If true, we do not use the minimum quantized value. -// i.e. for int8 the quantized output, it would be restricted to the range -// -127..127 instead of the full -128..127 range. -// This is provided for compatibility with certain inference backends. -// (Only applies to SCALED mode) -// -// -// *axis (int) attribute* -// -// An optional `axis` attribute can specify a dimension index of the input tensor, -// such that quantization ranges will be calculated and applied separately for each -// slice of the tensor along that dimension. This is useful for per-channel -// quantization. -// -// If axis is specified, min_range and max_range -// -// if `axis`=None, per-tensor quantization is performed as normal. -// -// -// *ensure_minimum_range (float) attribute* -// -// Ensures the minimum quantization range is at least this value. -// The legacy default value for this is 0.01, but it is strongly suggested to -// set it to 0 for new uses. -// -// -// Arguments: -// -// min_range: The minimum value of the quantization range. This value may be adjusted by the -// op depending on other parameters. The adjusted value is written to `output_min`. -// If the `axis` attribute is specified, this must be a 1-D tensor whose size -// matches the `axis` dimension of the input and output tensors. -// max_range: The maximum value of the quantization range. This value may be adjusted by the -// op depending on other parameters. The adjusted value is written to `output_max`. -// If the `axis` attribute is specified, this must be a 1-D tensor whose size -// matches the `axis` dimension of the input and output tensors. -// -// -// Returns: -// output: The quantized data produced from the float input. -// output_min: The final quantization range minimum, used to clip input values before scaling -// and rounding them to quantized values. -// If the `axis` attribute is specified, this will be a 1-D tensor whose size -// matches the `axis` dimension of the input and output tensors. -// output_max: The final quantization range maximum, used to clip input values before scaling -// and rounding them to quantized values. -// If the `axis` attribute is specified, this will be a 1-D tensor whose size -// matches the `axis` dimension of the input and output tensors. -func QuantizeV2(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, T tf.DataType, optional ...QuantizeV2Attr) (output tf.Output, output_min tf.Output, output_max tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"T": T} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizeV2", - Input: []tf.Input{ - input, min_range, max_range, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Returns the truth value of (x >= y) element-wise. -// -// *NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -// -// Example: -// -// ```python -// x = tf.constant([5, 4, 6, 7]) -// y = tf.constant([5, 2, 5, 10]) -// tf.math.greater_equal(x, y) ==> [True, True, True, False] -// -// x = tf.constant([5, 4, 6, 7]) -// y = tf.constant([5]) -// tf.math.greater_equal(x, y) ==> [True, False, True, True] -// ``` -func GreaterEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "GreaterEqual", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// BatchAttr is an optional argument to Batch. -type BatchAttr func(optionalAttr) - -// BatchMaxEnqueuedBatches sets the optional max_enqueued_batches attribute to value. -// If not specified, defaults to 10 -func BatchMaxEnqueuedBatches(value int64) BatchAttr { - return func(m optionalAttr) { - m["max_enqueued_batches"] = value - } -} - -// BatchAllowedBatchSizes sets the optional allowed_batch_sizes attribute to value. -// If not specified, defaults to <> -func BatchAllowedBatchSizes(value []int64) BatchAttr { - return func(m optionalAttr) { - m["allowed_batch_sizes"] = value - } -} - -// BatchContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func BatchContainer(value string) BatchAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// BatchSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func BatchSharedName(value string) BatchAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// BatchBatchingQueue sets the optional batching_queue attribute to value. -// If not specified, defaults to "" -func BatchBatchingQueue(value string) BatchAttr { - return func(m optionalAttr) { - m["batching_queue"] = value - } -} - -// Batches all input tensors nondeterministically. -// -// When many instances of this Op are being run concurrently with the same -// container/shared_name in the same device, some will output zero-shaped Tensors -// and others will output Tensors of size up to max_batch_size. -// -// All Tensors in in_tensors are batched together (so, for example, labels and -// features should be batched with a single instance of this operation. -// -// Each invocation of batch emits an `id` scalar which will be used to identify -// this particular invocation when doing unbatch or its gradient. -// -// Each op which emits a non-empty batch will also emit a non-empty batch_index -// Tensor, which, is a [K, 3] matrix where each row contains the invocation's id, -// start, and length of elements of each set of Tensors present in batched_tensors. -// -// Batched tensors are concatenated along the first dimension, and all tensors in -// in_tensors must have the first dimension of the same size. -// -// in_tensors: The tensors to be batched. -// num_batch_threads: Number of scheduling threads for processing batches of work. -// Determines the number of batches processed in parallel. -// max_batch_size: Batch sizes will never be bigger than this. -// batch_timeout_micros: Maximum number of microseconds to wait before outputting -// an incomplete batch. -// allowed_batch_sizes: Optional list of allowed batch sizes. If left empty, does -// nothing. Otherwise, supplies a list of batch sizes, causing the op to pad -// batches up to one of those sizes. The entries must increase monotonically, and -// the final entry must equal max_batch_size. -// grad_timeout_micros: The timeout to use for the gradient. See Unbatch. -// batched_tensors: Either empty tensors or a batch of concatenated Tensors. -// batch_index: If out_tensors is non-empty, has information to invert it. -// container: Controls the scope of sharing of this batch. -// id: always contains a scalar with a unique ID for this invocation of Batch. -// shared_name: Concurrently running instances of batch in the same device with the -// same container and shared_name will batch their elements together. If left -// empty, the op name will be used as the shared name. -// T: the types of tensors to be batched. -func Batch(scope *Scope, in_tensors []tf.Output, num_batch_threads int64, max_batch_size int64, batch_timeout_micros int64, grad_timeout_micros int64, optional ...BatchAttr) (batched_tensors []tf.Output, batch_index tf.Output, id tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_batch_threads": num_batch_threads, "max_batch_size": max_batch_size, "batch_timeout_micros": batch_timeout_micros, "grad_timeout_micros": grad_timeout_micros} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Batch", - Input: []tf.Input{ - tf.OutputList(in_tensors), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if batched_tensors, idx, err = makeOutputList(op, idx, "batched_tensors"); err != nil { - scope.UpdateErr("Batch", err) - return - } - batch_index = op.Output(idx) - id = op.Output(idx) - return batched_tensors, batch_index, id -} - -// UnicodeDecodeAttr is an optional argument to UnicodeDecode. -type UnicodeDecodeAttr func(optionalAttr) - -// UnicodeDecodeErrors sets the optional errors attribute to value. -// -// value: Error handling policy when there is invalid formatting found in the input. -// The value of 'strict' will cause the operation to produce a InvalidArgument -// error on any invalid input formatting. A value of 'replace' (the default) will -// cause the operation to replace any invalid formatting in the input with the -// `replacement_char` codepoint. A value of 'ignore' will cause the operation to -// skip any invalid formatting in the input and produce no corresponding output -// character. -// If not specified, defaults to "replace" -func UnicodeDecodeErrors(value string) UnicodeDecodeAttr { - return func(m optionalAttr) { - m["errors"] = value - } -} - -// UnicodeDecodeReplacementChar sets the optional replacement_char attribute to value. -// -// value: The replacement character codepoint to be used in place of any invalid -// formatting in the input when `errors='replace'`. Any valid unicode codepoint may -// be used. The default value is the default unicode replacement character is -// 0xFFFD or U+65533.) -// If not specified, defaults to 65533 -func UnicodeDecodeReplacementChar(value int64) UnicodeDecodeAttr { - return func(m optionalAttr) { - m["replacement_char"] = value - } -} - -// UnicodeDecodeReplaceControlCharacters sets the optional replace_control_characters attribute to value. -// -// value: Whether to replace the C0 control characters (00-1F) with the -// `replacement_char`. Default is false. -// If not specified, defaults to false -func UnicodeDecodeReplaceControlCharacters(value bool) UnicodeDecodeAttr { - return func(m optionalAttr) { - m["replace_control_characters"] = value - } -} - -// UnicodeDecodeTsplits sets the optional Tsplits attribute to value. -// If not specified, defaults to DT_INT64 -func UnicodeDecodeTsplits(value tf.DataType) UnicodeDecodeAttr { - return func(m optionalAttr) { - m["Tsplits"] = value - } -} - -// Decodes each string in `input` into a sequence of Unicode code points. -// -// The character codepoints for all strings are returned using a single vector -// `char_values`, with strings expanded to characters in row-major order. -// -// The `row_splits` tensor indicates where the codepoints for -// each input string begin and end within the `char_values` tensor. -// In particular, the values for the `i`th -// string (in row-major order) are stored in the slice -// `[row_splits[i]:row_splits[i+1]]`. Thus: -// -// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th -// character in the `i`th string (in row-major order). -// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th -// string (in row-major order). -// -// Arguments: -// input: The text to be decoded. Can have any shape. Note that the output is flattened -// to a vector of char values. -// input_encoding: Text encoding of the input strings. This is any of the encodings supported -// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. -// -// Returns: -// row_splits: A 1D int32 tensor containing the row splits. -// char_values: A 1D int32 Tensor containing the decoded codepoints. -func UnicodeDecode(scope *Scope, input tf.Output, input_encoding string, optional ...UnicodeDecodeAttr) (row_splits tf.Output, char_values tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"input_encoding": input_encoding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "UnicodeDecode", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Create a dense tensor from a ragged tensor, possibly altering its shape. -// -// The `ragged_to_dense` op creates a dense tensor from a list of row partition -// tensors, a value vector, and default values. If the shape is unspecified, the -// minimal shape required to contain all the elements in the ragged tensor (the -// natural shape) will be used. If some dimensions are left unspecified, then the -// size of the natural shape is used in that dimension. -// -// The default_value will be broadcast to the output shape. After that, the values -// from the ragged tensor overwrite the default values. Note that the default_value -// must have less dimensions than the value. -// -// The row partition tensors are in the order of the dimensions. -// At present, the types can be: -// * "ROW_SPLITS": the row_splits tensor from the ragged tensor. -// * "VALUE_ROWIDS": the value_rowids tensor from the ragged tensor. -// * "FIRST_DIM_SIZE": if value_rowids is used for the first dimension, then it -// is preceded by "FIRST_DIM_SIZE". -// -// Arguments: -// shape: The desired shape of the the output tensor. If left unspecified (empty), -// the minimal shape required to contain all the elements in the ragged tensor -// (the natural shape) will be used. If some dimensions are left unspecified, then -// the size of the natural shape is used in that dimension. -// -// Note that dense dimensions cannot be modified by the shape argument. Trying to -// change the size of a dense dimension will cause the op to fail. -// Examples: -// natural shape: [4, 5, 6] -// shape: -1 -// output shape: [4, 5, 6] -// -// natural shape: [4, 5, 6] -// shape: [3, -1, 2] -// output shape: [3, 5, 2] -// -// natural shape: [4, 5, 6] -// shape: [3, 7, 2] -// output shape: [3, 7, 2] -// -// values: A 1D tensor representing the values of the ragged tensor. -// default_value: The default_value when the shape is larger than the ragged tensor. The -// default_value is broadcast until it is the shape of the output tensor, and -// then overwritten by values in the ragged tensor. The default value must be -// compatible with this broadcast operation, and must have fewer dimensions than -// the value tensor. -// -// row_partition_types: The types of the row partition tensors. At present, these can be: -// * "ROW_SPLITS": the row_splits tensor from the ragged tensor. -// * "VALUE_ROWIDS": the value_rowids tensor from the ragged tensor. -// * "FIRST_DIM_SIZE": if value_rowids is used for the first dimension, then it -// is preceeded by "FIRST_DIM_SIZE". -// The tensors are in the order of the dimensions. -// -// Returns The resulting dense tensor. -func RaggedTensorToTensor(scope *Scope, shape tf.Output, values tf.Output, default_value tf.Output, row_partition_tensors []tf.Output, row_partition_types []string) (result tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"row_partition_types": row_partition_types} - opspec := tf.OpSpec{ - Type: "RaggedTensorToTensor", - Input: []tf.Input{ - shape, values, default_value, tf.OutputList(row_partition_tensors), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // BatchMatMulAttr is an optional argument to BatchMatMul. type BatchMatMulAttr func(optionalAttr) @@ -28653,45 +28135,6 @@ func BlockLSTMV2(scope *Scope, seq_len_max tf.Output, x tf.Output, cs_prev tf.Ou return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6) } -// Return a tensor with the same shape and contents as the input tensor or value. -func Identity(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Identity", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Outputs a `Summary` protocol buffer with scalar values. -// -// The input `tags` and `values` must have the same shape. The generated summary -// has a summary value for each tag-value pair in `tags` and `values`. -// -// Arguments: -// tags: Tags for the summary. -// values: Same shape as `tags. Values for the summary. -// -// Returns Scalar. Serialized `Summary` protocol buffer. -func ScalarSummary(scope *Scope, tags tf.Output, values tf.Output) (summary tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ScalarSummary", - Input: []tf.Input{ - tags, values, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // ResourceSparseApplyProximalAdagradAttr is an optional argument to ResourceSparseApplyProximalAdagrad. type ResourceSparseApplyProximalAdagradAttr func(optionalAttr) @@ -28759,6 +28202,45 @@ func Neg(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } +// Return a tensor with the same shape and contents as the input tensor or value. +func Identity(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Identity", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Outputs a `Summary` protocol buffer with scalar values. +// +// The input `tags` and `values` must have the same shape. The generated summary +// has a summary value for each tag-value pair in `tags` and `values`. +// +// Arguments: +// tags: Tags for the summary. +// values: Same shape as `tags. Values for the summary. +// +// Returns Scalar. Serialized `Summary` protocol buffer. +func ScalarSummary(scope *Scope, tags tf.Output, values tf.Output) (summary tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ScalarSummary", + Input: []tf.Input{ + tags, values, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Concatenates tensors along one dimension. // // Arguments: @@ -34257,307 +33739,6 @@ func SparseCrossV2(scope *Scope, indices []tf.Output, values []tf.Output, shapes return op.Output(0), op.Output(1), op.Output(2) } -// Pads a tensor with mirrored values. -// -// This operation pads a `input` with mirrored values according to the `paddings` -// you specify. `paddings` is an integer tensor with shape `[n, 2]`, where n is -// the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates -// how many values to add before the contents of `input` in that dimension, and -// `paddings[D, 1]` indicates how many values to add after the contents of `input` -// in that dimension. Both `paddings[D, 0]` and `paddings[D, 1]` must be no greater -// than `input.dim_size(D)` (or `input.dim_size(D) - 1`) if `copy_border` is true -// (if false, respectively). -// -// The padded size of each dimension D of the output is: -// -// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` -// -// For example: -// -// ``` -// # 't' is [[1, 2, 3], [4, 5, 6]]. -// # 'paddings' is [[1, 1]], [2, 2]]. -// # 'mode' is SYMMETRIC. -// # rank of 't' is 2. -// pad(t, paddings) ==> [[2, 1, 1, 2, 3, 3, 2] -// [2, 1, 1, 2, 3, 3, 2] -// [5, 4, 4, 5, 6, 6, 5] -// [5, 4, 4, 5, 6, 6, 5]] -// ``` -// -// Arguments: -// input: The input tensor to be padded. -// paddings: A two-column matrix specifying the padding sizes. The number of -// rows must be the same as the rank of `input`. -// mode: Either `REFLECT` or `SYMMETRIC`. In reflect mode the padded regions -// do not include the borders, while in symmetric mode the padded regions -// do include the borders. For example, if `input` is `[1, 2, 3]` and `paddings` -// is `[0, 2]`, then the output is `[1, 2, 3, 2, 1]` in reflect mode, and -// it is `[1, 2, 3, 3, 2]` in symmetric mode. -// -// Returns The padded tensor. -func MirrorPad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"mode": mode} - opspec := tf.OpSpec{ - Type: "MirrorPad", - Input: []tf.Input{ - input, paddings, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// TensorArrayV3Attr is an optional argument to TensorArrayV3. -type TensorArrayV3Attr func(optionalAttr) - -// TensorArrayV3ElementShape sets the optional element_shape attribute to value. -// -// value: The expected shape of an element, if known. Used to -// validate the shapes of TensorArray elements. If this shape is not -// fully specified, gathering zero-size TensorArrays is an error. -// If not specified, defaults to -func TensorArrayV3ElementShape(value tf.Shape) TensorArrayV3Attr { - return func(m optionalAttr) { - m["element_shape"] = value - } -} - -// TensorArrayV3DynamicSize sets the optional dynamic_size attribute to value. -// -// value: A boolean that determines whether writes to the TensorArray -// are allowed to grow the size. By default, this is not allowed. -// If not specified, defaults to false -func TensorArrayV3DynamicSize(value bool) TensorArrayV3Attr { - return func(m optionalAttr) { - m["dynamic_size"] = value - } -} - -// TensorArrayV3ClearAfterRead sets the optional clear_after_read attribute to value. -// -// value: If true (default), Tensors in the TensorArray are cleared -// after being read. This disables multiple read semantics but allows early -// release of memory. -// If not specified, defaults to true -func TensorArrayV3ClearAfterRead(value bool) TensorArrayV3Attr { - return func(m optionalAttr) { - m["clear_after_read"] = value - } -} - -// TensorArrayV3IdenticalElementShapes sets the optional identical_element_shapes attribute to value. -// -// value: If true (default is false), then all -// elements in the TensorArray will be expected to have have identical shapes. -// This allows certain behaviors, like dynamically checking for -// consistent shapes on write, and being able to fill in properly -// shaped zero tensors on stack -- even if the element_shape attribute -// is not fully defined. -// If not specified, defaults to false -func TensorArrayV3IdenticalElementShapes(value bool) TensorArrayV3Attr { - return func(m optionalAttr) { - m["identical_element_shapes"] = value - } -} - -// TensorArrayV3TensorArrayName sets the optional tensor_array_name attribute to value. -// -// value: Overrides the name used for the temporary tensor_array -// resource. Default value is the name of the 'TensorArray' op (which -// is guaranteed unique). -// If not specified, defaults to "" -func TensorArrayV3TensorArrayName(value string) TensorArrayV3Attr { - return func(m optionalAttr) { - m["tensor_array_name"] = value - } -} - -// An array of Tensors of given size. -// -// Write data via Write and read via Read or Pack. -// -// Arguments: -// size: The size of the array. -// dtype: The type of the elements on the tensor_array. -// -// Returns: -// handle: The handle to the TensorArray. -// flow: A scalar used to control gradient flow. -func TensorArrayV3(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV3Attr) (handle tf.Output, flow tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TensorArrayV3", - Input: []tf.Input{ - size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// MatrixSolveLsAttr is an optional argument to MatrixSolveLs. -type MatrixSolveLsAttr func(optionalAttr) - -// MatrixSolveLsFast sets the optional fast attribute to value. -// If not specified, defaults to true -func MatrixSolveLsFast(value bool) MatrixSolveLsAttr { - return func(m optionalAttr) { - m["fast"] = value - } -} - -// Solves one or more linear least-squares problems. -// -// `matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions -// form real or complex matrices of size `[M, N]`. `Rhs` is a tensor of the same -// type as `matrix` and shape `[..., M, K]`. -// The output is a tensor shape `[..., N, K]` where each output matrix solves -// each of the equations -// `matrix[..., :, :]` * `output[..., :, :]` = `rhs[..., :, :]` -// in the least squares sense. -// -// We use the following notation for (complex) matrix and right-hand sides -// in the batch: -// -// `matrix`=\\(A \in \mathbb{C}^{m \times n}\\), -// `rhs`=\\(B \in \mathbb{C}^{m \times k}\\), -// `output`=\\(X \in \mathbb{C}^{n \times k}\\), -// `l2_regularizer`=\\(\lambda \in \mathbb{R}\\). -// -// If `fast` is `True`, then the solution is computed by solving the normal -// equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then -// \\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares -// problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + \lambda ||Z||_F^2\\). -// If \\(m \lt n\\) then `output` is computed as -// \\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the -// minimum-norm solution to the under-determined linear system, i.e. -// \\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\), -// subject to \\(A Z = B\\). Notice that the fast path is only numerically stable -// when \\(A\\) is numerically full rank and has a condition number -// \\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or \\(\lambda\\) is -// sufficiently large. -// -// If `fast` is `False` an algorithm based on the numerically robust complete -// orthogonal decomposition is used. This computes the minimum-norm -// least-squares solution, even when \\(A\\) is rank deficient. This path is -// typically 6-7 times slower than the fast path. If `fast` is `False` then -// `l2_regularizer` is ignored. -// -// Arguments: -// matrix: Shape is `[..., M, N]`. -// rhs: Shape is `[..., M, K]`. -// l2_regularizer: Scalar tensor. -// -// @compatibility(numpy) -// Equivalent to np.linalg.lstsq -// @end_compatibility -// -// Returns Shape is `[..., N, K]`. -func MatrixSolveLs(scope *Scope, matrix tf.Output, rhs tf.Output, l2_regularizer tf.Output, optional ...MatrixSolveLsAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MatrixSolveLs", - Input: []tf.Input{ - matrix, rhs, l2_regularizer, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Generates sparse cross from a list of sparse and dense tensors. -// -// The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each -// representing features of one feature column. It outputs a 2D `SparseTensor` with -// the batchwise crosses of these features. -// -// For example, if the inputs are -// -// inputs[0]: SparseTensor with shape = [2, 2] -// [0, 0]: "a" -// [1, 0]: "b" -// [1, 1]: "c" -// -// inputs[1]: SparseTensor with shape = [2, 1] -// [0, 0]: "d" -// [1, 0]: "e" -// -// inputs[2]: Tensor [["f"], ["g"]] -// -// then the output will be -// -// shape = [2, 2] -// [0, 0]: "a_X_d_X_f" -// [1, 0]: "b_X_e_X_g" -// [1, 1]: "c_X_e_X_g" -// -// if hashed_output=true then the output will be -// -// shape = [2, 2] -// [0, 0]: FingerprintCat64( -// Fingerprint64("f"), FingerprintCat64( -// Fingerprint64("d"), Fingerprint64("a"))) -// [1, 0]: FingerprintCat64( -// Fingerprint64("g"), FingerprintCat64( -// Fingerprint64("e"), Fingerprint64("b"))) -// [1, 1]: FingerprintCat64( -// Fingerprint64("g"), FingerprintCat64( -// Fingerprint64("e"), Fingerprint64("c"))) -// -// Arguments: -// indices: 2-D. Indices of each input `SparseTensor`. -// values: 1-D. values of each `SparseTensor`. -// shapes: 1-D. Shapes of each `SparseTensor`. -// dense_inputs: 2-D. Columns represented by dense `Tensor`. -// hashed_output: If true, returns the hash of the cross instead of the string. -// This will allow us avoiding string manipulations. -// num_buckets: It is used if hashed_output is true. -// output = hashed_value%num_buckets if num_buckets > 0 else hashed_value. -// hash_key: Specify the hash_key that will be used by the `FingerprintCat64` -// function to combine the crosses fingerprints. -// -// -// -// Returns: -// output_indices: 2-D. Indices of the concatenated `SparseTensor`. -// output_values: 1-D. Non-empty values of the concatenated or hashed -// `SparseTensor`. -// output_shape: 1-D. Shape of the concatenated `SparseTensor`. -func SparseCross(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, dense_inputs []tf.Output, hashed_output bool, num_buckets int64, hash_key int64, out_type tf.DataType, internal_type tf.DataType) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"hashed_output": hashed_output, "num_buckets": num_buckets, "hash_key": hash_key, "out_type": out_type, "internal_type": internal_type} - opspec := tf.OpSpec{ - Type: "SparseCross", - Input: []tf.Input{ - tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), tf.OutputList(dense_inputs), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - // Generate a glob pattern matching all sharded file names. func ShardedFilespec(scope *Scope, basename tf.Output, num_shards tf.Output) (filename tf.Output) { if scope.Err() != nil { @@ -37294,6 +36475,77 @@ func InfeedEnqueuePrelinearizedBuffer(scope *Scope, input tf.Output, optional .. return scope.AddOperation(opspec) } +// Create a dense tensor from a ragged tensor, possibly altering its shape. +// +// The `ragged_to_dense` op creates a dense tensor from a list of row partition +// tensors, a value vector, and default values. If the shape is unspecified, the +// minimal shape required to contain all the elements in the ragged tensor (the +// natural shape) will be used. If some dimensions are left unspecified, then the +// size of the natural shape is used in that dimension. +// +// The default_value will be broadcast to the output shape. After that, the values +// from the ragged tensor overwrite the default values. Note that the default_value +// must have less dimensions than the value. +// +// The row partition tensors are in the order of the dimensions. +// At present, the types can be: +// * "ROW_SPLITS": the row_splits tensor from the ragged tensor. +// * "VALUE_ROWIDS": the value_rowids tensor from the ragged tensor. +// * "FIRST_DIM_SIZE": if value_rowids is used for the first dimension, then it +// is preceded by "FIRST_DIM_SIZE". +// +// Arguments: +// shape: The desired shape of the the output tensor. If left unspecified (empty), +// the minimal shape required to contain all the elements in the ragged tensor +// (the natural shape) will be used. If some dimensions are left unspecified, then +// the size of the natural shape is used in that dimension. +// +// Note that dense dimensions cannot be modified by the shape argument. Trying to +// change the size of a dense dimension will cause the op to fail. +// Examples: +// natural shape: [4, 5, 6] +// shape: -1 +// output shape: [4, 5, 6] +// +// natural shape: [4, 5, 6] +// shape: [3, -1, 2] +// output shape: [3, 5, 2] +// +// natural shape: [4, 5, 6] +// shape: [3, 7, 2] +// output shape: [3, 7, 2] +// +// values: A 1D tensor representing the values of the ragged tensor. +// default_value: The default_value when the shape is larger than the ragged tensor. The +// default_value is broadcast until it is the shape of the output tensor, and +// then overwritten by values in the ragged tensor. The default value must be +// compatible with this broadcast operation, and must have fewer dimensions than +// the value tensor. +// +// row_partition_types: The types of the row partition tensors. At present, these can be: +// * "ROW_SPLITS": the row_splits tensor from the ragged tensor. +// * "VALUE_ROWIDS": the value_rowids tensor from the ragged tensor. +// * "FIRST_DIM_SIZE": if value_rowids is used for the first dimension, then it +// is preceeded by "FIRST_DIM_SIZE". +// The tensors are in the order of the dimensions. +// +// Returns The resulting dense tensor. +func RaggedTensorToTensor(scope *Scope, shape tf.Output, values tf.Output, default_value tf.Output, row_partition_tensors []tf.Output, row_partition_types []string) (result tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"row_partition_types": row_partition_types} + opspec := tf.OpSpec{ + Type: "RaggedTensorToTensor", + Input: []tf.Input{ + shape, values, default_value, tf.OutputList(row_partition_tensors), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Computes the derivative of a Gamma random sample w.r.t. `alpha`. func RandomGammaGrad(scope *Scope, alpha tf.Output, sample tf.Output) (output tf.Output) { if scope.Err() != nil { @@ -41826,6 +41078,453 @@ func SparseSparseMaximum(scope *Scope, a_indices tf.Output, a_values tf.Output, return op.Output(0), op.Output(1) } +// QuantizeV2Attr is an optional argument to QuantizeV2. +type QuantizeV2Attr func(optionalAttr) + +// QuantizeV2Mode sets the optional mode attribute to value. +// If not specified, defaults to "MIN_COMBINED" +func QuantizeV2Mode(value string) QuantizeV2Attr { + return func(m optionalAttr) { + m["mode"] = value + } +} + +// QuantizeV2RoundMode sets the optional round_mode attribute to value. +// If not specified, defaults to "HALF_AWAY_FROM_ZERO" +func QuantizeV2RoundMode(value string) QuantizeV2Attr { + return func(m optionalAttr) { + m["round_mode"] = value + } +} + +// QuantizeV2NarrowRange sets the optional narrow_range attribute to value. +// If not specified, defaults to false +func QuantizeV2NarrowRange(value bool) QuantizeV2Attr { + return func(m optionalAttr) { + m["narrow_range"] = value + } +} + +// QuantizeV2Axis sets the optional axis attribute to value. +// If not specified, defaults to -1 +func QuantizeV2Axis(value int64) QuantizeV2Attr { + return func(m optionalAttr) { + m["axis"] = value + } +} + +// QuantizeV2EnsureMinimumRange sets the optional ensure_minimum_range attribute to value. +// If not specified, defaults to 0.01 +func QuantizeV2EnsureMinimumRange(value float32) QuantizeV2Attr { + return func(m optionalAttr) { + m["ensure_minimum_range"] = value + } +} + +// Quantize the 'input' tensor of type float to 'output' tensor of type 'T'. +// +// [min_range, max_range] are scalar floats that specify the range for +// the 'input' data. The 'mode' attribute controls exactly which calculations are +// used to convert the float values to their quantized equivalents. The +// 'round_mode' attribute controls which rounding tie-breaking algorithm is used +// when rounding float values to their quantized equivalents. +// +// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: +// +// ``` +// out[i] = (in[i] - min_range) * range(T) / (max_range - min_range) +// if T == qint8: out[i] -= (range(T) + 1) / 2.0 +// ``` +// +// here `range(T) = numeric_limits::max() - numeric_limits::min()` +// +// *MIN_COMBINED Mode Example* +// +// Assume the input is type float and has a possible range of [0.0, 6.0] and the +// output type is quint8 ([0, 255]). The min_range and max_range values should be +// specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each +// value of the input by 255/6 and cast to quint8. +// +// If the output type was qint8 ([-128, 127]), the operation will additionally +// subtract each value by 128 prior to casting, so that the range of values aligns +// with the range of qint8. +// +// If the mode is 'MIN_FIRST', then this approach is used: +// +// ``` +// num_discrete_values = 1 << (# of bits in T) +// range_adjust = num_discrete_values / (num_discrete_values - 1) +// range = (range_max - range_min) * range_adjust +// range_scale = num_discrete_values / range +// quantized = round(input * range_scale) - round(range_min * range_scale) + +// numeric_limits::min() +// quantized = max(quantized, numeric_limits::min()) +// quantized = min(quantized, numeric_limits::max()) +// ``` +// +// The biggest difference between this and MIN_COMBINED is that the minimum range +// is rounded first, before it's subtracted from the rounded value. With +// MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing +// and dequantizing will introduce a larger and larger error. +// +// *SCALED mode Example* +// +// `SCALED` mode matches the quantization approach used in +// `QuantizeAndDequantize{V2|V3}`. +// +// If the mode is `SCALED`, the quantization is performed by multiplying each +// input value by a scaling_factor. +// The scaling_factor is determined from `min_range` and `max_range` to be as large +// as possible such that the range from `min_range` to `max_range` is representable +// within values of type T. +// +// ```c++ +// +// const int min_T = std::numeric_limits::min(); +// const int max_T = std::numeric_limits::max(); +// const float max_float = std::numeric_limits::max(); +// +// const float scale_factor_from_min_side = +// (min_T * min_range > 0) ? min_T / min_range : max_float; +// const float scale_factor_from_max_side = +// (max_T * max_range > 0) ? max_T / max_range : max_float; +// +// const float scale_factor = std::min(scale_factor_from_min_side, +// scale_factor_from_max_side); +// ``` +// +// We next use the scale_factor to adjust min_range and max_range as follows: +// +// ```c++ +// min_range = min_T / scale_factor; +// max_range = max_T / scale_factor; +// ``` +// +// +// e.g. if T = qint8, and initially min_range = -10, and max_range = 9, we would +// compare -128/-10.0 = 12.8 to 127/9.0 = 14.11, and set scaling_factor = 12.8 +// In this case, min_range would remain -10, but max_range would be adjusted to +// 127 / 12.8 = 9.921875 +// +// So we will quantize input values in the range (-10, 9.921875) to (-128, 127). +// +// The input tensor can now be quantized by clipping values to the range +// `min_range` to `max_range`, then multiplying by scale_factor as follows: +// +// ```c++ +// result = round(min(max_range, max(min_range, input)) * scale_factor) +// ``` +// +// The adjusted `min_range` and `max_range` are returned as outputs 2 and 3 of +// this operation. These outputs should be used as the range for any further +// calculations. +// +// +// *narrow_range (bool) attribute* +// +// If true, we do not use the minimum quantized value. +// i.e. for int8 the quantized output, it would be restricted to the range +// -127..127 instead of the full -128..127 range. +// This is provided for compatibility with certain inference backends. +// (Only applies to SCALED mode) +// +// +// *axis (int) attribute* +// +// An optional `axis` attribute can specify a dimension index of the input tensor, +// such that quantization ranges will be calculated and applied separately for each +// slice of the tensor along that dimension. This is useful for per-channel +// quantization. +// +// If axis is specified, min_range and max_range +// +// if `axis`=None, per-tensor quantization is performed as normal. +// +// +// *ensure_minimum_range (float) attribute* +// +// Ensures the minimum quantization range is at least this value. +// The legacy default value for this is 0.01, but it is strongly suggested to +// set it to 0 for new uses. +// +// +// Arguments: +// +// min_range: The minimum value of the quantization range. This value may be adjusted by the +// op depending on other parameters. The adjusted value is written to `output_min`. +// If the `axis` attribute is specified, this must be a 1-D tensor whose size +// matches the `axis` dimension of the input and output tensors. +// max_range: The maximum value of the quantization range. This value may be adjusted by the +// op depending on other parameters. The adjusted value is written to `output_max`. +// If the `axis` attribute is specified, this must be a 1-D tensor whose size +// matches the `axis` dimension of the input and output tensors. +// +// +// Returns: +// output: The quantized data produced from the float input. +// output_min: The final quantization range minimum, used to clip input values before scaling +// and rounding them to quantized values. +// If the `axis` attribute is specified, this will be a 1-D tensor whose size +// matches the `axis` dimension of the input and output tensors. +// output_max: The final quantization range maximum, used to clip input values before scaling +// and rounding them to quantized values. +// If the `axis` attribute is specified, this will be a 1-D tensor whose size +// matches the `axis` dimension of the input and output tensors. +func QuantizeV2(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, T tf.DataType, optional ...QuantizeV2Attr) (output tf.Output, output_min tf.Output, output_max tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"T": T} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "QuantizeV2", + Input: []tf.Input{ + input, min_range, max_range, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// Returns the truth value of (x >= y) element-wise. +// +// *NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +// +// Example: +// +// ```python +// x = tf.constant([5, 4, 6, 7]) +// y = tf.constant([5, 2, 5, 10]) +// tf.math.greater_equal(x, y) ==> [True, True, True, False] +// +// x = tf.constant([5, 4, 6, 7]) +// y = tf.constant([5]) +// tf.math.greater_equal(x, y) ==> [True, False, True, True] +// ``` +func GreaterEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "GreaterEqual", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// BatchAttr is an optional argument to Batch. +type BatchAttr func(optionalAttr) + +// BatchMaxEnqueuedBatches sets the optional max_enqueued_batches attribute to value. +// If not specified, defaults to 10 +func BatchMaxEnqueuedBatches(value int64) BatchAttr { + return func(m optionalAttr) { + m["max_enqueued_batches"] = value + } +} + +// BatchAllowedBatchSizes sets the optional allowed_batch_sizes attribute to value. +// If not specified, defaults to <> +func BatchAllowedBatchSizes(value []int64) BatchAttr { + return func(m optionalAttr) { + m["allowed_batch_sizes"] = value + } +} + +// BatchContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func BatchContainer(value string) BatchAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// BatchSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func BatchSharedName(value string) BatchAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// BatchBatchingQueue sets the optional batching_queue attribute to value. +// If not specified, defaults to "" +func BatchBatchingQueue(value string) BatchAttr { + return func(m optionalAttr) { + m["batching_queue"] = value + } +} + +// Batches all input tensors nondeterministically. +// +// When many instances of this Op are being run concurrently with the same +// container/shared_name in the same device, some will output zero-shaped Tensors +// and others will output Tensors of size up to max_batch_size. +// +// All Tensors in in_tensors are batched together (so, for example, labels and +// features should be batched with a single instance of this operation. +// +// Each invocation of batch emits an `id` scalar which will be used to identify +// this particular invocation when doing unbatch or its gradient. +// +// Each op which emits a non-empty batch will also emit a non-empty batch_index +// Tensor, which, is a [K, 3] matrix where each row contains the invocation's id, +// start, and length of elements of each set of Tensors present in batched_tensors. +// +// Batched tensors are concatenated along the first dimension, and all tensors in +// in_tensors must have the first dimension of the same size. +// +// in_tensors: The tensors to be batched. +// num_batch_threads: Number of scheduling threads for processing batches of work. +// Determines the number of batches processed in parallel. +// max_batch_size: Batch sizes will never be bigger than this. +// batch_timeout_micros: Maximum number of microseconds to wait before outputting +// an incomplete batch. +// allowed_batch_sizes: Optional list of allowed batch sizes. If left empty, does +// nothing. Otherwise, supplies a list of batch sizes, causing the op to pad +// batches up to one of those sizes. The entries must increase monotonically, and +// the final entry must equal max_batch_size. +// grad_timeout_micros: The timeout to use for the gradient. See Unbatch. +// batched_tensors: Either empty tensors or a batch of concatenated Tensors. +// batch_index: If out_tensors is non-empty, has information to invert it. +// container: Controls the scope of sharing of this batch. +// id: always contains a scalar with a unique ID for this invocation of Batch. +// shared_name: Concurrently running instances of batch in the same device with the +// same container and shared_name will batch their elements together. If left +// empty, the op name will be used as the shared name. +// T: the types of tensors to be batched. +func Batch(scope *Scope, in_tensors []tf.Output, num_batch_threads int64, max_batch_size int64, batch_timeout_micros int64, grad_timeout_micros int64, optional ...BatchAttr) (batched_tensors []tf.Output, batch_index tf.Output, id tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_batch_threads": num_batch_threads, "max_batch_size": max_batch_size, "batch_timeout_micros": batch_timeout_micros, "grad_timeout_micros": grad_timeout_micros} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Batch", + Input: []tf.Input{ + tf.OutputList(in_tensors), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if batched_tensors, idx, err = makeOutputList(op, idx, "batched_tensors"); err != nil { + scope.UpdateErr("Batch", err) + return + } + batch_index = op.Output(idx) + id = op.Output(idx) + return batched_tensors, batch_index, id +} + +// UnicodeDecodeAttr is an optional argument to UnicodeDecode. +type UnicodeDecodeAttr func(optionalAttr) + +// UnicodeDecodeErrors sets the optional errors attribute to value. +// +// value: Error handling policy when there is invalid formatting found in the input. +// The value of 'strict' will cause the operation to produce a InvalidArgument +// error on any invalid input formatting. A value of 'replace' (the default) will +// cause the operation to replace any invalid formatting in the input with the +// `replacement_char` codepoint. A value of 'ignore' will cause the operation to +// skip any invalid formatting in the input and produce no corresponding output +// character. +// If not specified, defaults to "replace" +func UnicodeDecodeErrors(value string) UnicodeDecodeAttr { + return func(m optionalAttr) { + m["errors"] = value + } +} + +// UnicodeDecodeReplacementChar sets the optional replacement_char attribute to value. +// +// value: The replacement character codepoint to be used in place of any invalid +// formatting in the input when `errors='replace'`. Any valid unicode codepoint may +// be used. The default value is the default unicode replacement character is +// 0xFFFD or U+65533.) +// If not specified, defaults to 65533 +func UnicodeDecodeReplacementChar(value int64) UnicodeDecodeAttr { + return func(m optionalAttr) { + m["replacement_char"] = value + } +} + +// UnicodeDecodeReplaceControlCharacters sets the optional replace_control_characters attribute to value. +// +// value: Whether to replace the C0 control characters (00-1F) with the +// `replacement_char`. Default is false. +// If not specified, defaults to false +func UnicodeDecodeReplaceControlCharacters(value bool) UnicodeDecodeAttr { + return func(m optionalAttr) { + m["replace_control_characters"] = value + } +} + +// UnicodeDecodeTsplits sets the optional Tsplits attribute to value. +// If not specified, defaults to DT_INT64 +func UnicodeDecodeTsplits(value tf.DataType) UnicodeDecodeAttr { + return func(m optionalAttr) { + m["Tsplits"] = value + } +} + +// Decodes each string in `input` into a sequence of Unicode code points. +// +// The character codepoints for all strings are returned using a single vector +// `char_values`, with strings expanded to characters in row-major order. +// +// The `row_splits` tensor indicates where the codepoints for +// each input string begin and end within the `char_values` tensor. +// In particular, the values for the `i`th +// string (in row-major order) are stored in the slice +// `[row_splits[i]:row_splits[i+1]]`. Thus: +// +// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th +// character in the `i`th string (in row-major order). +// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th +// string (in row-major order). +// +// Arguments: +// input: The text to be decoded. Can have any shape. Note that the output is flattened +// to a vector of char values. +// input_encoding: Text encoding of the input strings. This is any of the encodings supported +// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. +// +// Returns: +// row_splits: A 1D int32 tensor containing the row splits. +// char_values: A 1D int32 Tensor containing the decoded codepoints. +func UnicodeDecode(scope *Scope, input tf.Output, input_encoding string, optional ...UnicodeDecodeAttr) (row_splits tf.Output, char_values tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"input_encoding": input_encoding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "UnicodeDecode", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + // LSTMBlockCellAttr is an optional argument to LSTMBlockCell. type LSTMBlockCellAttr func(optionalAttr) @@ -43215,6 +42914,307 @@ func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug(scope *Scope, num_shar return op.Output(0), op.Output(1), op.Output(2), op.Output(3) } +// Pads a tensor with mirrored values. +// +// This operation pads a `input` with mirrored values according to the `paddings` +// you specify. `paddings` is an integer tensor with shape `[n, 2]`, where n is +// the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates +// how many values to add before the contents of `input` in that dimension, and +// `paddings[D, 1]` indicates how many values to add after the contents of `input` +// in that dimension. Both `paddings[D, 0]` and `paddings[D, 1]` must be no greater +// than `input.dim_size(D)` (or `input.dim_size(D) - 1`) if `copy_border` is true +// (if false, respectively). +// +// The padded size of each dimension D of the output is: +// +// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` +// +// For example: +// +// ``` +// # 't' is [[1, 2, 3], [4, 5, 6]]. +// # 'paddings' is [[1, 1]], [2, 2]]. +// # 'mode' is SYMMETRIC. +// # rank of 't' is 2. +// pad(t, paddings) ==> [[2, 1, 1, 2, 3, 3, 2] +// [2, 1, 1, 2, 3, 3, 2] +// [5, 4, 4, 5, 6, 6, 5] +// [5, 4, 4, 5, 6, 6, 5]] +// ``` +// +// Arguments: +// input: The input tensor to be padded. +// paddings: A two-column matrix specifying the padding sizes. The number of +// rows must be the same as the rank of `input`. +// mode: Either `REFLECT` or `SYMMETRIC`. In reflect mode the padded regions +// do not include the borders, while in symmetric mode the padded regions +// do include the borders. For example, if `input` is `[1, 2, 3]` and `paddings` +// is `[0, 2]`, then the output is `[1, 2, 3, 2, 1]` in reflect mode, and +// it is `[1, 2, 3, 3, 2]` in symmetric mode. +// +// Returns The padded tensor. +func MirrorPad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"mode": mode} + opspec := tf.OpSpec{ + Type: "MirrorPad", + Input: []tf.Input{ + input, paddings, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// TensorArrayV3Attr is an optional argument to TensorArrayV3. +type TensorArrayV3Attr func(optionalAttr) + +// TensorArrayV3ElementShape sets the optional element_shape attribute to value. +// +// value: The expected shape of an element, if known. Used to +// validate the shapes of TensorArray elements. If this shape is not +// fully specified, gathering zero-size TensorArrays is an error. +// If not specified, defaults to +func TensorArrayV3ElementShape(value tf.Shape) TensorArrayV3Attr { + return func(m optionalAttr) { + m["element_shape"] = value + } +} + +// TensorArrayV3DynamicSize sets the optional dynamic_size attribute to value. +// +// value: A boolean that determines whether writes to the TensorArray +// are allowed to grow the size. By default, this is not allowed. +// If not specified, defaults to false +func TensorArrayV3DynamicSize(value bool) TensorArrayV3Attr { + return func(m optionalAttr) { + m["dynamic_size"] = value + } +} + +// TensorArrayV3ClearAfterRead sets the optional clear_after_read attribute to value. +// +// value: If true (default), Tensors in the TensorArray are cleared +// after being read. This disables multiple read semantics but allows early +// release of memory. +// If not specified, defaults to true +func TensorArrayV3ClearAfterRead(value bool) TensorArrayV3Attr { + return func(m optionalAttr) { + m["clear_after_read"] = value + } +} + +// TensorArrayV3IdenticalElementShapes sets the optional identical_element_shapes attribute to value. +// +// value: If true (default is false), then all +// elements in the TensorArray will be expected to have have identical shapes. +// This allows certain behaviors, like dynamically checking for +// consistent shapes on write, and being able to fill in properly +// shaped zero tensors on stack -- even if the element_shape attribute +// is not fully defined. +// If not specified, defaults to false +func TensorArrayV3IdenticalElementShapes(value bool) TensorArrayV3Attr { + return func(m optionalAttr) { + m["identical_element_shapes"] = value + } +} + +// TensorArrayV3TensorArrayName sets the optional tensor_array_name attribute to value. +// +// value: Overrides the name used for the temporary tensor_array +// resource. Default value is the name of the 'TensorArray' op (which +// is guaranteed unique). +// If not specified, defaults to "" +func TensorArrayV3TensorArrayName(value string) TensorArrayV3Attr { + return func(m optionalAttr) { + m["tensor_array_name"] = value + } +} + +// An array of Tensors of given size. +// +// Write data via Write and read via Read or Pack. +// +// Arguments: +// size: The size of the array. +// dtype: The type of the elements on the tensor_array. +// +// Returns: +// handle: The handle to the TensorArray. +// flow: A scalar used to control gradient flow. +func TensorArrayV3(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV3Attr) (handle tf.Output, flow tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "TensorArrayV3", + Input: []tf.Input{ + size, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// MatrixSolveLsAttr is an optional argument to MatrixSolveLs. +type MatrixSolveLsAttr func(optionalAttr) + +// MatrixSolveLsFast sets the optional fast attribute to value. +// If not specified, defaults to true +func MatrixSolveLsFast(value bool) MatrixSolveLsAttr { + return func(m optionalAttr) { + m["fast"] = value + } +} + +// Solves one or more linear least-squares problems. +// +// `matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions +// form real or complex matrices of size `[M, N]`. `Rhs` is a tensor of the same +// type as `matrix` and shape `[..., M, K]`. +// The output is a tensor shape `[..., N, K]` where each output matrix solves +// each of the equations +// `matrix[..., :, :]` * `output[..., :, :]` = `rhs[..., :, :]` +// in the least squares sense. +// +// We use the following notation for (complex) matrix and right-hand sides +// in the batch: +// +// `matrix`=\\(A \in \mathbb{C}^{m \times n}\\), +// `rhs`=\\(B \in \mathbb{C}^{m \times k}\\), +// `output`=\\(X \in \mathbb{C}^{n \times k}\\), +// `l2_regularizer`=\\(\lambda \in \mathbb{R}\\). +// +// If `fast` is `True`, then the solution is computed by solving the normal +// equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then +// \\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares +// problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + \lambda ||Z||_F^2\\). +// If \\(m \lt n\\) then `output` is computed as +// \\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the +// minimum-norm solution to the under-determined linear system, i.e. +// \\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\), +// subject to \\(A Z = B\\). Notice that the fast path is only numerically stable +// when \\(A\\) is numerically full rank and has a condition number +// \\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or \\(\lambda\\) is +// sufficiently large. +// +// If `fast` is `False` an algorithm based on the numerically robust complete +// orthogonal decomposition is used. This computes the minimum-norm +// least-squares solution, even when \\(A\\) is rank deficient. This path is +// typically 6-7 times slower than the fast path. If `fast` is `False` then +// `l2_regularizer` is ignored. +// +// Arguments: +// matrix: Shape is `[..., M, N]`. +// rhs: Shape is `[..., M, K]`. +// l2_regularizer: Scalar tensor. +// +// @compatibility(numpy) +// Equivalent to np.linalg.lstsq +// @end_compatibility +// +// Returns Shape is `[..., N, K]`. +func MatrixSolveLs(scope *Scope, matrix tf.Output, rhs tf.Output, l2_regularizer tf.Output, optional ...MatrixSolveLsAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MatrixSolveLs", + Input: []tf.Input{ + matrix, rhs, l2_regularizer, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Generates sparse cross from a list of sparse and dense tensors. +// +// The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each +// representing features of one feature column. It outputs a 2D `SparseTensor` with +// the batchwise crosses of these features. +// +// For example, if the inputs are +// +// inputs[0]: SparseTensor with shape = [2, 2] +// [0, 0]: "a" +// [1, 0]: "b" +// [1, 1]: "c" +// +// inputs[1]: SparseTensor with shape = [2, 1] +// [0, 0]: "d" +// [1, 0]: "e" +// +// inputs[2]: Tensor [["f"], ["g"]] +// +// then the output will be +// +// shape = [2, 2] +// [0, 0]: "a_X_d_X_f" +// [1, 0]: "b_X_e_X_g" +// [1, 1]: "c_X_e_X_g" +// +// if hashed_output=true then the output will be +// +// shape = [2, 2] +// [0, 0]: FingerprintCat64( +// Fingerprint64("f"), FingerprintCat64( +// Fingerprint64("d"), Fingerprint64("a"))) +// [1, 0]: FingerprintCat64( +// Fingerprint64("g"), FingerprintCat64( +// Fingerprint64("e"), Fingerprint64("b"))) +// [1, 1]: FingerprintCat64( +// Fingerprint64("g"), FingerprintCat64( +// Fingerprint64("e"), Fingerprint64("c"))) +// +// Arguments: +// indices: 2-D. Indices of each input `SparseTensor`. +// values: 1-D. values of each `SparseTensor`. +// shapes: 1-D. Shapes of each `SparseTensor`. +// dense_inputs: 2-D. Columns represented by dense `Tensor`. +// hashed_output: If true, returns the hash of the cross instead of the string. +// This will allow us avoiding string manipulations. +// num_buckets: It is used if hashed_output is true. +// output = hashed_value%num_buckets if num_buckets > 0 else hashed_value. +// hash_key: Specify the hash_key that will be used by the `FingerprintCat64` +// function to combine the crosses fingerprints. +// +// +// +// Returns: +// output_indices: 2-D. Indices of the concatenated `SparseTensor`. +// output_values: 1-D. Non-empty values of the concatenated or hashed +// `SparseTensor`. +// output_shape: 1-D. Shape of the concatenated `SparseTensor`. +func SparseCross(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, dense_inputs []tf.Output, hashed_output bool, num_buckets int64, hash_key int64, out_type tf.DataType, internal_type tf.DataType) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"hashed_output": hashed_output, "num_buckets": num_buckets, "hash_key": hash_key, "out_type": out_type, "internal_type": internal_type} + opspec := tf.OpSpec{ + Type: "SparseCross", + Input: []tf.Input{ + tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), tf.OutputList(dense_inputs), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + // Reverses specific dimensions of a tensor. // // Given a `tensor`, and a `bool` tensor `dims` representing the dimensions From 52821ea9359775a16cfd31cc649d4b08d93bb53d Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 3 Aug 2020 15:50:36 -0700 Subject: [PATCH 2012/2522] Add error if calling `jacobian` or `batch_jacobian` on an exhausted tape. Currently, this is failing silently: returns None. PiperOrigin-RevId: 324700276 Change-Id: If5b4fc76bc3bfd2280ca67395015aca5bcf62f91 --- tensorflow/python/eager/backprop.py | 28 +++++++++++++++++++----- tensorflow/python/eager/backprop_test.py | 26 +++++++++++++++++++++- 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 3c6ffc99fa4..7cb3abf4e07 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -997,6 +997,9 @@ class GradientTape(object): unconnected_gradients=UnconnectedGradients.NONE): """Computes the gradient using operations recorded in context of this tape. + Note: Unless you set `persistent=True` a GradientTape can only be used to + compute one set of gradients (or jacobians). + Args: target: a list or nested structure of Tensors or Variables to be differentiated. @@ -1015,14 +1018,14 @@ class GradientTape(object): the structure of `sources`. Raises: - RuntimeError: if called inside the context of the tape, or if called more - than once on a non-persistent tape. - ValueError: if the target is a variable or if unconnected gradients is + RuntimeError: If called on a used, non-persistent tape. + RuntimeError: If called inside the context of the tape. + ValueError: If the target is a variable or if unconnected gradients is called with an unknown value. """ if self._tape is None: - raise RuntimeError("GradientTape.gradient can only be called once on " - "non-persistent tapes.") + raise RuntimeError("A non-persistent GradientTape can only be used to" + "compute one set of gradients (or jacobians)") if self._recording: if not self._persistent: self._pop_tape() @@ -1101,6 +1104,9 @@ class GradientTape(object): experimental_use_pfor=True): """Computes the jacobian using operations recorded in context of this tape. + Note: Unless you set `persistent=True` a GradientTape can only be used to + compute one set of gradients (or jacobians). + See[wikipedia article](http://en.wikipedia.org/wiki/jacobian_matrix_and_determinant) for the definition of a Jacobian. @@ -1139,10 +1145,15 @@ class GradientTape(object): Raises: + RuntimeError: If called on a used, non-persistent tape. RuntimeError: If called on a non-persistent tape with eager execution enabled and without enabling experimental_use_pfor. ValueError: If vectorization of jacobian computation fails. """ + if self._tape is None: + raise RuntimeError("A non-persistent GradientTape can only be used to" + "compute one set of gradients (or jacobians)") + flat_sources = nest.flatten(sources) rewrap_as_ndarray = False if isinstance(target, np_arrays.ndarray): @@ -1225,6 +1236,9 @@ class GradientTape(object): are lower dimensional and avoid a bunch of redundant zeros which would result in the jacobian computation given the independence assumption. + Note: Unless you set `persistent=True` a GradientTape can only be used to + compute one set of gradients (or jacobians). + Example usage: ```python @@ -1255,11 +1269,15 @@ class GradientTape(object): per-example jacobians. Raises: + RuntimeError: If called on a used, non-persistent tape. RuntimeError: If called on a non-persistent tape with eager execution enabled and without enabling experimental_use_pfor. ValueError: If vectorization of jacobian computation fails or if first dimension of `target` and `source` do not match. """ + if self._tape is None: + raise RuntimeError("A non-persistent GradientTape can only be used to" + "compute one set of gradients (or jacobians)") rewrap_as_ndarray = False if isinstance(target, np_arrays.ndarray): target = target.data diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index 6ae2a4c9a5e..0adb4698529 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -837,9 +837,33 @@ class BackpropTest(test.TestCase, parameterized.TestCase): z = y * y g.gradient(z, [x]) with self.assertRaisesRegex( - RuntimeError, 'GradientTape.gradient can only be called once'): + RuntimeError, 'A non-persistent GradientTape can only'): g.gradient(y, [x]) + @test_util.assert_no_new_tensors + def testGradientTapeJacobianCalledMultipleTimes(self): + with backprop.GradientTape() as g: + x = constant_op.constant(3.0) + g.watch(x) + y = x * x + z = y * y + g.jacobian(z, [x]) + with self.assertRaisesRegex( + RuntimeError, 'A non-persistent GradientTape can only'): + g.jacobian(y, [x]) + + @test_util.assert_no_new_tensors + def testGradientTapeBatchJacobianCalledMultipleTimes(self): + with backprop.GradientTape() as g: + x = constant_op.constant([[3.0]]) + g.watch(x) + y = x * x + z = y * y + g.batch_jacobian(z, x) + with self.assertRaisesRegex( + RuntimeError, 'A non-persistent GradientTape can only'): + g.batch_jacobian(y, [x]) + @test_util.assert_no_new_tensors @test_util.run_in_graph_and_eager_modes @test_util.run_v1_only('b/120545219') From 9474df4a1273601606018ae788a165e703253ed3 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Mon, 3 Aug 2020 16:04:39 -0700 Subject: [PATCH 2013/2522] [MLIR][NFC] Adopt SymbolTable::UseRange::empty() PiperOrigin-RevId: 324703092 Change-Id: Ieb4303522df0215cc2df2461e56dcfe25b3d834a --- tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc index 7a791afb24d..abff4c21cf1 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc @@ -113,7 +113,7 @@ bool HasSingleUse(FuncOp func) { // If no uses in this scope, continue looking in parent module SymbolTable::UseRange func_uses = func_uses_optional.getValue(); - if (llvm::empty(func_uses)) continue; + if (func_uses.empty()) continue; // Check if multiple uses at this scope or another use already seen. if (!llvm::hasSingleElement(func_uses) || use_seen) return false; From 3cf7683cfea4277223b940bfb5563efd541badd4 Mon Sep 17 00:00:00 2001 From: Russell Power Date: Mon, 3 Aug 2020 16:16:10 -0700 Subject: [PATCH 2014/2522] Refactor `TpuCompilationCacheEntry` interface to return `TpuProgramGroupInterface` and `core_index` and makes CacheEntry less transparent and move application specific logics outside of cache. PiperOrigin-RevId: 324705343 Change-Id: I9dc421df069dbe7dc9bb57695f06e8b636fbc945 --- tensorflow/core/tpu/kernels/BUILD | 28 ++- .../kernels/tpu_compilation_cache_entry.cc | 54 +++++ .../tpu/kernels/tpu_compilation_cache_entry.h | 26 ++- .../tpu_compilation_cache_entry_impl.h | 94 +++++++++ .../kernels/tpu_compilation_cache_external.cc | 53 ++--- .../kernels/tpu_compilation_cache_external.h | 12 ++ .../tpu_compilation_cache_interface.cc | 144 ++----------- .../kernels/tpu_compilation_cache_interface.h | 111 ++++++---- .../tpu_compilation_cache_local_lookup.cc | 43 +++- .../tpu_compilation_cache_local_lookup.h | 13 +- .../kernels/tpu_compilation_cache_lookup.h | 18 +- .../core/tpu/kernels/tpu_compile_op_common.cc | 40 ++++ .../core/tpu/kernels/tpu_compile_op_common.h | 9 + .../tpu/kernels/tpu_compile_op_support.cc | 38 ---- .../core/tpu/kernels/tpu_compile_op_support.h | 8 - .../core/tpu/kernels/tpu_configuration_ops.cc | 13 -- tensorflow/core/tpu/kernels/tpu_execute_op.cc | 58 +++--- .../core/tpu/kernels/tpu_program_c_api.h | 14 -- .../core/tpu/kernels/tpu_program_group.cc | 189 ++++++------------ .../core/tpu/kernels/tpu_program_group.h | 58 ++++-- .../tpu/kernels/tpu_program_group_interface.h | 7 +- tensorflow/core/tpu/tpu_library_init_fns.inc | 2 - 22 files changed, 523 insertions(+), 509 deletions(-) create mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.cc create mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache_entry_impl.h diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 1336f52ed34..3b7d0e09c08 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -92,8 +92,6 @@ tf_kernel_library( deps = [ ":tpu_compilation_cache_factory", ":tpu_compilation_cache_interface", - ":tpu_compilation_cache_local_lookup", - ":tpu_compilation_cache_lookup", ":tpu_mesh_state_interface", ":tpu_op_consts", "//tensorflow/c:tf_status", @@ -210,14 +208,30 @@ cc_library( cc_library( name = "tpu_compilation_cache_entry", + srcs = ["tpu_compilation_cache_entry.cc"], hdrs = [ "tpu_compilation_cache_entry.h", ], deps = [ + ":compiled_subgraph", + ":tpu_compilation_cache_proto_cc", ":tpu_executable_info_proto_cc", - ":tpu_program_group_interface", + ":tpu_program_group", "//tensorflow/compiler/xla/service:hlo_proto_cc", + "//tensorflow/core:framework", "//tensorflow/core/lib/core:refcount", + "//tensorflow/core/platform:casts", + ], +) + +cc_library( + name = "tpu_compilation_cache_entry_impl", + srcs = [], + hdrs = ["tpu_compilation_cache_entry_impl.h"], + deps = [ + ":compiled_subgraph", + ":tpu_compilation_cache_interface", + ":tpu_executable_info_proto_cc", ], ) @@ -288,8 +302,6 @@ cc_library( "//tensorflow/compiler/tf2xla:host_compute_metadata_proto_cc", "//tensorflow/compiler/xla/service:hlo_proto_cc", "//tensorflow/core/lib/core:status", - "@com_google_absl//absl/time", - "@com_google_absl//absl/types:span", ], ) @@ -329,7 +341,6 @@ cc_library( hdrs = ["tpu_compilation_cache_interface.h"], deps = [ ":compiled_subgraph", - ":tpu_compilation_cache_entry", ":tpu_compilation_cache_key", ":tpu_compilation_cache_proto_cc", ":tpu_compilation_metrics_hdrs", @@ -361,6 +372,7 @@ cc_library( deps = [ ":compiled_subgraph", ":tpu_compilation_cache_entry", + ":tpu_compilation_cache_entry_impl", ":tpu_compilation_cache_interface", ":tpu_compilation_cache_key", ":tpu_compilation_cache_proto_cc", @@ -370,7 +382,6 @@ cc_library( ":tpu_compile_op_support", ":tpu_mesh_state_interface", ":tpu_op_consts", - ":tpu_program_c_api_hdrs", ":tpu_program_group", ":tpu_util", ":trace_util_hdrs", @@ -380,10 +391,10 @@ cc_library( "//tensorflow/core:framework_internal", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", "//tensorflow/core/profiler/lib:traceme", "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", "@com_google_absl//absl/container:node_hash_map", - "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/types:span", @@ -604,7 +615,6 @@ cc_library( deps = [ ":tpu_compilation_cache_entry", ":tpu_compilation_cache_external", - ":tpu_compilation_cache_interface", ":tpu_compilation_cache_local_lookup", ":tpu_compilation_cache_lookup", ":tpu_executable_info_proto_cc", diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.cc new file mode 100644 index 00000000000..73f55853306 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.cc @@ -0,0 +1,54 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" + +#include "tensorflow/core/platform/casts.h" + +namespace tensorflow { +namespace tpu { + +TpuCompilationCacheEntry::TpuCompilationCacheEntry( + const TpuProgramGroupInterface* tpu_program_group, int core_index) + : tpu_program_group_( + tensorflow::down_cast(tpu_program_group)), + core_index_(core_index) {} + +// Constructor for an empty entry. +TpuCompilationCacheEntry::TpuCompilationCacheEntry() + : tpu_program_group_(nullptr) {} + +const TPUExecutableInfoProto* TpuCompilationCacheEntry::get_executable_info() + const { + return &(tpu_program_group_->executable_info()); +} + +const TPUHostTransferInfoProto* +TpuCompilationCacheEntry::get_host_transfer_info() const { + return &(tpu_program_group_->host_transfer_info()); +} + +const xla::HloProto* TpuCompilationCacheEntry::get_hlo_metadata() const { + return tpu_program_group_->hlo_metadatas()[core_index_]; +} + +// TODO(henrytan,jiawenhao): When should we expect more than one +// XLA_TpuProgram* per TpuProgram? Remove the program_count CHECK below then. +const XLA_TpuProgram* TpuCompilationCacheEntry::get_tpu_program() const { + CHECK_EQ(tpu_program_group_->program_count(), 1); + return tpu_program_group_->tpu_programs()[core_index_]; +} + +} // namespace tpu +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h index 832d76bfceb..b3766b8b4dd 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h @@ -18,32 +18,30 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/core/lib/core/refcount.h" #include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" -#include "tensorflow/core/tpu/kernels/tpu_program_group_interface.h" +#include "tensorflow/core/tpu/kernels/tpu_program_group.h" namespace tensorflow { namespace tpu { -// Cache entry to hold a `TpuProgramGroupInterface` object that can be used to -// fetch a TPU program for a given TPU core index. +// A version of `CompilationCacheEntry` to access Tpu binary program +// `XLA_TpuProgram`. class TpuCompilationCacheEntry { public: explicit TpuCompilationCacheEntry( - const TpuProgramGroupInterface* tpu_program_group, int core_index) - : tpu_program_group_(tpu_program_group), core_index_(core_index) {} - + const TpuProgramGroupInterface* tpu_program_group, int core_index); // Constructor for an empty entry. - TpuCompilationCacheEntry() : tpu_program_group_(nullptr), core_index_(-1) {} - - const TpuProgramGroupInterface* tpu_program_group() const { - return tpu_program_group_; - } - - int core_index() const { return core_index_; } + TpuCompilationCacheEntry(); + const TPUExecutableInfoProto* get_executable_info() const; + const TPUHostTransferInfoProto* get_host_transfer_info() const; + const xla::HloProto* get_hlo_metadata() const; + // TODO(henrytan): maybe nicer to return C++ wrapper of `XLA_TpuProgram` + const XLA_TpuProgram* get_tpu_program() const; private: - const TpuProgramGroupInterface* tpu_program_group_; + const TpuProgramGroup* tpu_program_group_; int core_index_; }; + } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry_impl.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry_impl.h new file mode 100644 index 00000000000..0632d9a163f --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry_impl.h @@ -0,0 +1,94 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_ENTRY_IMPL_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_ENTRY_IMPL_H_ +#include "tensorflow/core/tpu/kernels/compiled_subgraph.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" +#include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" +namespace tensorflow { +namespace tpu { +// Wrapper for a cache entry that holds a reference to the entry until the +// wrapper is deleted. This wrapper is the concrete type of +// CompilationCacheEntryRef returned by Lookup. +template +class CompilationCacheEntryRefImpl + : public CompilationCacheEntryRef { + public: + CompilationCacheEntryRefImpl(TpuCompilationCacheInterface* parent, + CompiledSubgraph* entry, int index); + ~CompilationCacheEntryRefImpl() override; + Status ToSubEntryRef(CompilationCacheFetchTarget fetch_target) override; + + protected: + TpuCompilationCacheInterface* parent_; // Not owned. + // A reference to entry_ is acquired in the constructor and released via + // parent->DiscardEntryRefs in the destructor. + CompiledSubgraph* entry_; + // The index of the program in entry_ that is returned by the get method. + int index_; +}; +template +CompilationCacheEntryRefImpl::CompilationCacheEntryRefImpl( + TpuCompilationCacheInterface* parent, CompiledSubgraph* entry, int index) + : parent_(parent), entry_(entry), index_(index) { + if (entry_ == nullptr) { + return; + } + if (entry_->main_entry == nullptr) { + entry_->Ref(); + } else { + // This is a sharding/unsharding entry nested in a main entry. Only + // refcount the main entry. + entry_->main_entry->Ref(); + } +} +template +CompilationCacheEntryRefImpl::~CompilationCacheEntryRefImpl() { + if (entry_ == nullptr) { + return; + } + if (entry_->main_entry == nullptr) { + parent_->DiscardEntryRefs({entry_}); + } else { + parent_->DiscardEntryRefs({entry_->main_entry}); + } +} +template +Status CompilationCacheEntryRefImpl::ToSubEntryRef( + CompilationCacheFetchTarget fetch_target) { + CompiledSubgraph* target = nullptr; + switch (fetch_target) { + case CompilationCacheFetchTarget::MAIN: + target = entry_; + break; + case CompilationCacheFetchTarget::SHARDING: + target = entry_->sharding_entry.get(); + break; + case CompilationCacheFetchTarget::UNSHARDING: + target = entry_->unsharding_entry.get(); + break; + default: + return xla::InvalidArgument("Invalid fetch target: %d", fetch_target); + } + if (target == nullptr) { + // Cache entry does not have an unsharding subentry. Unref and replace + // with nullptr. + parent_->DiscardEntryRefs({entry_}); + } + // Otherwise, since the refcount is always on the main entry, we don't + // need ref/unref. + entry_ = target; + return Status::OK(); +} +} // namespace tpu +} // namespace tensorflow +#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_ENTRY_IMPL_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc index 80010d70cd4..b4b18d1743b 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc @@ -16,18 +16,15 @@ limitations under the License. #include -#include "absl/memory/memory.h" #include "absl/strings/str_cat.h" #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/platform/random.h" #include "tensorflow/core/profiler/lib/traceme.h" -#include "tensorflow/core/tpu/kernels/compiled_subgraph.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_metrics.h" #include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" #include "tensorflow/core/tpu/kernels/tpu_compile_op_support.h" -#include "tensorflow/core/tpu/kernels/tpu_program_c_api.h" #include "tensorflow/core/tpu/kernels/tpu_util.h" #include "tensorflow/core/tpu/kernels/trace_util.h" @@ -51,22 +48,23 @@ void PopulateEntry(const std::string& key, CompiledSubgraph* entry, entry->tpu_program_group = absl::make_unique(std::move(tpu_program_group)); entry->initialized = true; - - if (entry->initialization_status.ok()) { - // Compute the entries total size once all members are initialized. - entry->total_size = entry->ComputeTotalSize(); - } -} - -std::unique_ptr CreateAndInitializeCompiledSubgraph( - CompiledSubgraph* main_entry) { - auto entry = absl::make_unique(); - entry->main_entry = main_entry; - entry->tpu_program_group = absl::make_unique(); - return entry; } } // namespace +TpuCompilationCacheExternal::EntryRefImpl::EntryRefImpl( + TpuCompilationCacheInterface* parent, CompiledSubgraph* entry, int index) + : CompilationCacheEntryRefImpl(parent, entry, + index) {} + +TpuCompilationCacheEntry TpuCompilationCacheExternal::EntryRefImpl::get() { + if (entry_ == nullptr) { + // Create an empty entry if the entry is nullptr. This corresponds to + // non-existing sharding/unsharding entries. + return TpuCompilationCacheEntry(); + } + return TpuCompilationCacheEntry(entry_->tpu_program_group.get(), index_); +} + CompiledSubgraph* TpuCompilationCacheExternal::InitializeEntry( const string& key, const std::function& initialize_program, @@ -75,6 +73,7 @@ CompiledSubgraph* TpuCompilationCacheExternal::InitializeEntry( main_entry->parent = this; main_entry->subgraph_key = key; main_entry->uid = get_uid(); + // TODO(henrytan): implement TpuCompilationCacheKey.debug_string. main_entry->cache_entry_debug_string = subgraph_key.prefix; VLOG(1) << "Cache Initializing Entry Session Debug " << main_entry->cache_entry_debug_string; @@ -113,29 +112,17 @@ CompiledSubgraph* TpuCompilationCacheExternal::InitializeEntry( std::pair(main_entry->uid, main_entry)); CHECK(uid_inserted.second); - if (tpu_program_group.has_sharding_program()) { - main_entry->sharding_entry = - CreateAndInitializeCompiledSubgraph(main_entry); - TpuProgramGroup sharding_programs; - sharding_programs.Initialize( - tpu_program_group.tpu_programs(TpuProgramShardingType::kSharding)); - PopulateEntry(key, main_entry->sharding_entry.get(), - std::move(sharding_programs)); - - main_entry->unsharding_entry = - CreateAndInitializeCompiledSubgraph(main_entry); - TpuProgramGroup unsharding_programs; - unsharding_programs.Initialize( - tpu_program_group.tpu_programs(TpuProgramShardingType::kUnsharding)); - PopulateEntry(key, main_entry->unsharding_entry.get(), - std::move(unsharding_programs)); + if (initialization_status.ok()) { + // Compute the entries total size once all members are initialized. + main_entry->total_size = tpu_program_group.program_size(); } + // TODO(henrytan): handle sharding/unsharding. PopulateEntry(key, main_entry, std::move(tpu_program_group)); for (int64 i = 0; i < main_entry->proto_key.size(); ++i) { auto entry_inserted = entries_by_proto_key_.insert( - std::pair>( + std::pair>( main_entry->proto_key[i], std::make_pair(main_entry, i))); CHECK(entry_inserted.second); } diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h index 51b5ffbed0d..86615b15d4c 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h @@ -32,6 +32,7 @@ limitations under the License. #include "tensorflow/core/tpu/kernels/compiled_subgraph.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache.pb.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry_impl.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_key.h" #include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" @@ -45,6 +46,17 @@ namespace tpu { class TpuCompilationCacheExternal : public TpuCompilationCacheInterface { public: + using Status = ::stream_executor::port::Status; + + class EntryRefImpl + : public CompilationCacheEntryRefImpl { + public: + EntryRefImpl(TpuCompilationCacheInterface* parent, CompiledSubgraph* entry, + int index); + + TpuCompilationCacheEntry get() override; + }; + explicit TpuCompilationCacheExternal(int64 max_cache_size) : TpuCompilationCacheInterface(max_cache_size) {} diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc index 4cd2b864203..9e1aedf92ce 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc @@ -38,77 +38,10 @@ void TpuCompilationCacheInterface::RefHolder::AddRef(CompiledSubgraph* entry) { entries_.push_back(entry); } -std::string TpuCompilationCacheInterface::RefHolder::DebugString() const { +string TpuCompilationCacheInterface::RefHolder::DebugString() const { return "TpuCompilationCacheRefHolder"; } -CompilationCacheEntryRef::CompilationCacheEntryRef() - : parent_(nullptr), entry_(nullptr), index_(0) {} - -CompilationCacheEntryRef::CompilationCacheEntryRef( - TpuCompilationCacheInterface* parent, CompiledSubgraph* entry, int index) - : parent_(parent), entry_(entry), index_(index) { - if (entry_ == nullptr) { - return; - } - if (entry_->main_entry == nullptr) { - entry_->Ref(); - } else { - // This is a sharding/unsharding entry nested in a main entry. Only - // refcount the main entry. - entry_->main_entry->Ref(); - } -} - -CompilationCacheEntryRef::~CompilationCacheEntryRef() { - if (entry_ == nullptr) { - return; - } - if (entry_->main_entry == nullptr) { - parent_->DiscardEntryRefs({entry_}); - } else { - parent_->DiscardEntryRefs({entry_->main_entry}); - } -} - -TpuCompilationCacheEntry CompilationCacheEntryRef::get() { - if (entry_ == nullptr) { - // Create an empty entry if the entry is nullptr. This corresponds to - // non-existing sharding/unsharding entries. - return TpuCompilationCacheEntry(); - } - - return TpuCompilationCacheEntry(entry_->tpu_program_group.get(), index_); -} - -Status CompilationCacheEntryRef::ToSubEntryRef( - CompilationCacheFetchTarget fetch_target) { - CompiledSubgraph* target = nullptr; - switch (fetch_target) { - case CompilationCacheFetchTarget::MAIN: - target = entry_; - break; - case CompilationCacheFetchTarget::SHARDING: - target = entry_->sharding_entry.get(); - break; - case CompilationCacheFetchTarget::UNSHARDING: - target = entry_->unsharding_entry.get(); - break; - default: - return xla::InvalidArgument("Invalid fetch target: %d", fetch_target); - } - - if (target == nullptr) { - // Cache entry does not have an unsharding subentry. Unref and replace - // with nullptr. - parent_->DiscardEntryRefs({entry_}); - } - // Otherwise, since the refcount is always on the main entry, we don't - // need ref/unref. - entry_ = target; - return Status::OK(); -} - TpuCompilationCacheInterface::TpuCompilationCacheInterface(int64 max_cache_size) : max_cache_size_(max_cache_size) { CHECK_GE(max_cache_size_, 0); @@ -223,7 +156,7 @@ void TpuCompilationCacheInterface::UnloadAndDestroy(CompiledSubgraph* entry) { entry->Unref(); } -size_t TpuCompilationCacheInterface::RemoveEntry(const std::string& key) { +size_t TpuCompilationCacheInterface::RemoveEntry(const string& key) { auto erased = cache_.erase(key); TpuCompilationMetrics::SetCacheEntryCount(cache_.size()); @@ -263,7 +196,7 @@ CompiledSubgraph* TpuCompilationCacheInterface::DiscardEntryRef( } erased = entries_by_uid_.erase(entry->uid); CHECK_EQ(erased, 1); - for (const std::string& key : entry->proto_key) { + for (const string& key : entry->proto_key) { erased = entries_by_proto_key_.erase(key); CHECK_EQ(erased, 1); } @@ -336,10 +269,10 @@ void TpuCompilationCacheInterface::LookupEntryMarkedForEviction( } } -void TpuCompilationCacheInterface::InsertEntry(const std::string& key, +void TpuCompilationCacheInterface::InsertEntry(const string& key, CompiledSubgraph* entry) { auto cache_inserted = - cache_.insert(std::pair(key, entry)); + cache_.insert(std::pair(key, entry)); CHECK(cache_inserted.second); TpuCompilationMetrics::SetCacheEntryCount(cache_.size()); @@ -362,8 +295,7 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsent( const TpuCompilationCacheKey& subgraph_key, const SessionMetadata* session_metadata, CompilationRefHolder* per_step_ref_holder, int64* uid, - std::vector* proto_key, - std::vector* may_modify_variables, + std::vector* proto_key, std::vector* may_modify_variables, absl::Span* hlo_metadatas, const std::function& compile_function) { std::vector removed_entries; @@ -376,7 +308,7 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsent( return status; } -std::string TpuCompilationCacheInterface::FindCacheKey( +string TpuCompilationCacheInterface::FindCacheKey( const TpuCompilationCacheKey& subgraph_key) { if (!subgraph_key.has_guaranteed_const) { return subgraph_key.prefix; @@ -399,8 +331,7 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( const TpuCompilationCacheKey& subgraph_key, const SessionMetadata* session_metadata, CompilationRefHolder* per_step_ref_holder, int64* uid, - std::vector* proto_key, - std::vector* may_modify_variables, + std::vector* proto_key, std::vector* may_modify_variables, std::vector* removed_entries, absl::Span* hlo_metadatas, const std::function& compile_function) { @@ -414,18 +345,17 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( // for the lifetime of the object, see InitializeEntry() call below. absl::MutexLock lock(&mu_); - std::string cache_key = FindCacheKey(subgraph_key); + string cache_key = FindCacheKey(subgraph_key); auto iter = cache_.find(cache_key); bool is_new_key = iter == cache_.end(); - const std::string session_name = - tpu::SessionNameFromMetadata(session_metadata); + const string session_name = tpu::SessionNameFromMetadata(session_metadata); if (is_new_key) { cache_key = subgraph_key.ToString(); TpuCompilationMetrics::IncrementCacheLookupCount( /*is_cache_hit=*/false, session_name); - const std::string msg = + const string msg = strings::StrCat("TPU host compilation cache miss: cache_key(", cache_key, "), session_name(", session_name, ")"); TRACESTRING(msg); @@ -434,7 +364,7 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( // Check if caller has disabled compilation. Set using // internal::ScopedTpuCompileDisabler. if (!UtilApiFn()->TpuCompile_IsTpuCompilationEnabledFn()) { - const std::string error_msg = strings::StrCat( + const string error_msg = strings::StrCat( "[TpuCompilationDisabled]: Compilation cache miss, but compilation " "disabled, session_name(", session_name, ") Debug String: ", subgraph_key.debug_string); @@ -473,7 +403,7 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( } else { TpuCompilationMetrics::IncrementCacheLookupCount( /*is_cache_hit=*/true, session_name); - const std::string msg = + const string msg = strings::StrCat("TPU host compilation cache hit: cache_key(", cache_key, "), session_name(", session_name, ")"); TRACESTRING(msg); @@ -536,8 +466,8 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( return entry->initialization_status; } -Status TpuCompilationCacheInterface::GetKeysFromUid( - int64 uid, std::vector* keys) { +Status TpuCompilationCacheInterface::GetKeysFromUid(int64 uid, + std::vector* keys) { keys->clear(); absl::MutexLock lock(&mu_); @@ -549,49 +479,5 @@ Status TpuCompilationCacheInterface::GetKeysFromUid( return Status::OK(); } -Status TpuCompilationCacheInterface::Lookup( - int64 uid, int proto_index, - std::unique_ptr* entry) { - entry->reset(); - - profiler::TraceMe proto_lookup_traceme( - "TPU compilation cache proto lookup by uid", - /*level=*/2); - - absl::MutexLock lock(&mu_); - const auto iter = entries_by_uid_.find(uid); - if (iter == entries_by_uid_.end()) { - return errors::NotFound("No subgraph found for uid ", uid); - } - CompiledSubgraph* cache_entry = iter->second; - if (proto_index < 0 || - proto_index >= cache_entry->tpu_program_group->program_count()) { - return errors::NotFound("No proto found for core index ", proto_index, - " in subgraph with uid ", uid); - } - *entry = absl::make_unique(this, cache_entry, - proto_index); - return Status::OK(); -} - -Status TpuCompilationCacheInterface::Lookup( - const std::string& proto_key, - std::unique_ptr* entry) { - entry->reset(); - - profiler::TraceMe proto_lookup_traceme("TPU compilation cache proto lookup", - /*level=*/2); - - absl::MutexLock lock(&mu_); - const auto iter = entries_by_proto_key_.find(proto_key); - if (iter == entries_by_proto_key_.end()) { - return errors::NotFound("No proto found for key ", proto_key); - } - CompiledSubgraph* cache_entry = iter->second.first; - int proto_index = iter->second.second; - *entry = absl::make_unique(this, cache_entry, - proto_index); - return Status::OK(); -} } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h index 7b206fb1cf4..cde6467b7af 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h @@ -32,7 +32,6 @@ limitations under the License. #include "tensorflow/core/protobuf/config.pb.h" #include "tensorflow/core/tpu/kernels/compiled_subgraph.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache.pb.h" -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_key.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_metrics.h" #include "tensorflow/core/tpu/kernels/trace_util.h" @@ -49,20 +48,18 @@ class CompilationRefHolder : public ResourceBase { ~CompilationRefHolder() override = default; }; -// Wrapper for a cache entry returned by all the TpuCompilationCacheInterface -// `Lookup` methods, and ensures the underlying proto is not garbage-collected -// until the client discards the ptr. +// Base class for a reference to a cached tpu program. A unique_ptr to a +// CompilationCacheEntryRef is returned by all the cache Lookup methods below, +// and ensures the underlying proto is not garbage-collected until the client +// discards the ptr. +template class CompilationCacheEntryRef { public: - CompilationCacheEntryRef(); - CompilationCacheEntryRef(TpuCompilationCacheInterface* parent, - CompiledSubgraph* entry, int index); + virtual ~CompilationCacheEntryRef() = default; - virtual ~CompilationCacheEntryRef(); - - // Returns a TpuCompilationCacheEntry that should not be used beyond the - // lifetime of the CompilationCacheEntryRef. - virtual TpuCompilationCacheEntry get(); + // Returns a CompilationCacheEntry that should not be used beyond the lifetime + // of the tpu::CompilationCacheEntryRef. + virtual CacheEntryType get() = 0; // Mutates this ref to point to the entry's subentry (for // sharding/unsharding) or main entry (unchanged) as specified by @@ -72,15 +69,7 @@ class CompilationCacheEntryRef { // // If the requested subentry does not exist, the ref will point to a nullptr // entry, and the original entry will be unref'ed. - virtual Status ToSubEntryRef(CompilationCacheFetchTarget fetch_target); - - protected: - TpuCompilationCacheInterface* parent_; // Not owned. - // A reference to entry_ is acquired in the constructor and released via - // parent->DiscardEntryRefs in the destructor. - CompiledSubgraph* entry_; - // The index of the program in entry_ that is returned by the get method. - int index_; + virtual Status ToSubEntryRef(CompilationCacheFetchTarget fetch_target) = 0; }; class TpuCompilationCacheInterface : public ResourceBase { @@ -108,8 +97,7 @@ class TpuCompilationCacheInterface : public ResourceBase { const TpuCompilationCacheKey& subgraph_key, const SessionMetadata* session_metadata, CompilationRefHolder* per_step_ref_holder, int64* uid, - std::vector* proto_key, - std::vector* may_modify_variables, + std::vector* proto_key, std::vector* may_modify_variables, absl::Span* hlo_metadatas, const std::function& compile_function); @@ -136,18 +124,19 @@ class TpuCompilationCacheInterface : public ResourceBase { // Looks up an executable corresponding to the model-parallel core index of // the subgraph represented by key. On success a pointer to an EntryRef // holding the program is returned in entry. - Status Lookup(const std::string& proto_key, - std::unique_ptr* entry); + template + Status Lookup(const string& proto_key, std::unique_ptr* entry); // Looks up an executable corresponding to the model-parallel core index of // the subgraph represented by uid. On success a pointer to an EntryRef // holding the program is returned in entry. + template Status Lookup(int64 uid, int proto_index, - std::unique_ptr* entry); + std::unique_ptr* entry); // Looks up the subgraph represented by uid, and returns the vector of keys, // one per core, corresponding to that subgraph. - Status GetKeysFromUid(int64 uid, std::vector* keys); + Status GetKeysFromUid(int64 uid, std::vector* keys); // Makes a reference holder for this cache, that can be stored in the per-step // resource manager and will ensure that compiled entries persist until the @@ -181,7 +170,7 @@ class TpuCompilationCacheInterface : public ResourceBase { // parent_->DiscardEntryRefs. void AddRef(CompiledSubgraph* entry); - std::string DebugString() const override; + string DebugString() const override; private: TpuCompilationCacheInterface* parent_; // Not owned. @@ -196,8 +185,7 @@ class TpuCompilationCacheInterface : public ResourceBase { const TpuCompilationCacheKey& subgraph_key, const SessionMetadata* session_metadata, CompilationRefHolder* per_step_ref_holder, int64* uid, - std::vector* proto_key, - std::vector* may_modify_variables, + std::vector* proto_key, std::vector* may_modify_variables, std::vector* removed_entries, absl::Span* hlo_metadatas, const std::function& compile_function); @@ -242,14 +230,14 @@ class TpuCompilationCacheInterface : public ResourceBase { ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); // Removes the entry with given key from cache. - size_t RemoveEntry(const std::string& key) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); + size_t RemoveEntry(const string& key) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); // Inserts the given key and entry to cache. - void InsertEntry(const std::string& key, CompiledSubgraph* entry) + void InsertEntry(const string& key, CompiledSubgraph* entry) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); // Returns the cache key matching given subgraph_key. - std::string FindCacheKey(const TpuCompilationCacheKey& subgraph_key) + string FindCacheKey(const TpuCompilationCacheKey& subgraph_key) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); // Creates a new entry by running initialize_programs and places it in the @@ -259,7 +247,7 @@ class TpuCompilationCacheInterface : public ResourceBase { // // **InitializeEntry releases mu_ during the call to initialize_programs.** virtual CompiledSubgraph* InitializeEntry( - const std::string& key, + const string& key, const std::function& initialize_programs, const TpuCompilationCacheKey& subgraph_key) @@ -288,16 +276,13 @@ class TpuCompilationCacheInterface : public ResourceBase { // cache_ key matching a given subgraph key. When doing a lookup, check // session_key_map_ first to avoid unnecessay fingerprint computation. // Map from key prefix + session_handle to a cache_ key. - absl::node_hash_map session_key_map_ - ABSL_GUARDED_BY(mu_); + absl::node_hash_map session_key_map_ ABSL_GUARDED_BY(mu_); // Map from key prefix + fingerprint to a cache_ key. - absl::node_hash_map fingerprint_key_map_ - ABSL_GUARDED_BY(mu_); + absl::node_hash_map fingerprint_key_map_ ABSL_GUARDED_BY(mu_); // All the subgraph entries that can be looked up in the cache. An entry is // marked for eviction iff it is present in cache_ and not in // entries_by_last_use_. - std::unordered_map cache_ - ABSL_GUARDED_BY(mu_); + std::unordered_map cache_ ABSL_GUARDED_BY(mu_); // All the subgraph entries that can be looked up in the cache, indexed by // uid. absl::node_hash_map entries_by_uid_ @@ -305,7 +290,7 @@ class TpuCompilationCacheInterface : public ResourceBase { // All the protos that can be looked up in the cache, indexed by proto // key. The value of the map is a subgraph and the index of the proto compiled // for that subgraph. - std::unordered_map> + std::unordered_map> entries_by_proto_key_ ABSL_GUARDED_BY(mu_); // Map from last_use to entry, used to mark entries for eviction in LRU // order. If an entry's last_use counter is not present as a key in @@ -319,6 +304,50 @@ class TpuCompilationCacheInterface : public ResourceBase { TpuCompilationCacheInterface& operator=(const TpuCompilationCacheInterface&) = delete; }; + +template +Status TpuCompilationCacheInterface::Lookup( + int64 uid, int proto_index, std::unique_ptr* entry) { + entry->reset(); + + profiler::TraceMe proto_lookup_traceme( + "TPU compilation cache proto lookup by uid", + /*level=*/2); + + absl::MutexLock lock(&mu_); + const auto iter = entries_by_uid_.find(uid); + if (iter == entries_by_uid_.end()) { + return errors::NotFound("No subgraph found for uid ", uid); + } + CompiledSubgraph* cache_entry = iter->second; + if (proto_index < 0 || + proto_index >= cache_entry->tpu_program_group->program_count()) { + return errors::NotFound("No proto found for core index ", proto_index, + " in subgraph with uid ", uid); + } + *entry = absl::make_unique(this, cache_entry, proto_index); + return Status::OK(); +} + +template +Status TpuCompilationCacheInterface::Lookup( + const string& proto_key, std::unique_ptr* entry) { + entry->reset(); + + profiler::TraceMe proto_lookup_traceme("TPU compilation cache proto lookup", + /*level=*/2); + + absl::MutexLock lock(&mu_); + const auto iter = entries_by_proto_key_.find(proto_key); + if (iter == entries_by_proto_key_.end()) { + return errors::NotFound("No proto found for key ", proto_key); + } + CompiledSubgraph* cache_entry = iter->second.first; + int proto_index = iter->second.second; + *entry = absl::make_unique(this, cache_entry, proto_index); + return Status::OK(); +} + } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.cc index 29864a310d1..f30a503d2d2 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.cc +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.cc @@ -16,50 +16,70 @@ limitations under the License. namespace tensorflow { namespace tpu { +namespace { +class CompilationCacheFetchTargetUtility { + public: + CompilationCacheFetchTargetUtility() + : names_({"Invalid", "Main", "Sharding", "Unsharding"}) {} + + std::string name(CompilationCacheFetchTarget target) const { + return names_[static_cast(target)]; + } + + private: + const std::vector names_; +}; + +std::string GetName(CompilationCacheFetchTarget target) { + static const auto* util = new CompilationCacheFetchTargetUtility(); + return util->name(target); +} + +} // namespace TpuCompilationCacheLocalLookup::TpuCompilationCacheLocalLookup( TpuCompilationCacheInterface* cache) - : cache_(cache) { - cache_->Ref(); -} + : cache_(cache) {} TpuCompilationCacheLocalLookup::~TpuCompilationCacheLocalLookup() { cache_->Unref(); } Status TpuCompilationCacheLocalLookup::Lookup( - const string& proto_key, std::unique_ptr* entry, + const string& proto_key, + std::unique_ptr* entry, CompilationCacheFetchTarget fetch_target) { profiler::TraceMe proto_lookup_traceme("Local TPU proto cache lookup", /*level=*/2); - Status s = cache_->Lookup(proto_key, entry); + Status s = cache_->Lookup( + proto_key, entry); VLOG(1) << "Looked up key " << proto_key << " in local subgraph cache status " << s; if (!s.ok()) { return s; } s = (*entry)->ToSubEntryRef(fetch_target); - VLOG(1) << "Fetched subentry: " - << CompilationCacheFetchTarget_Name(fetch_target) << " with status " + + VLOG(1) << "Fetched subentry: " << GetName(fetch_target) << " with status " << s; return s; } Status TpuCompilationCacheLocalLookup::Lookup( int64 uid, int proto_index, - std::unique_ptr* entry, + std::unique_ptr* entry, CompilationCacheFetchTarget fetch_target) { profiler::TraceMe proto_lookup_traceme("Local TPU proto cache lookup by uid", /*level=*/2); - Status s = cache_->Lookup(uid, proto_index, entry); + Status s = cache_->Lookup( + uid, proto_index, entry); VLOG(1) << "Looked up uid " << uid << ", index " << proto_index << " in local subgraph cache status " << s; if (!s.ok()) { return s; } s = (*entry)->ToSubEntryRef(fetch_target); - VLOG(1) << "Fetched subentry: " - << CompilationCacheFetchTarget_Name(fetch_target) << " with status " + VLOG(1) << "Fetched subentry: " << GetName(fetch_target) << " with status " << s; return s; } @@ -67,5 +87,6 @@ Status TpuCompilationCacheLocalLookup::Lookup( string TpuCompilationCacheLocalLookup::DebugString() const { return "TpuCompilationCacheLocalLookup"; } + } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.h index 8db4c11ebea..eb5aadcd3e2 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.h @@ -28,17 +28,24 @@ namespace tpu { // Class for looking up TPU programs when the execute and compile Op are in the // same address space. The proto is simply looked up in the compilation cache, // without any serialization taking place. -class TpuCompilationCacheLocalLookup : public TpuCompilationCacheLookup { +class TpuCompilationCacheLocalLookup + : public TpuCompilationCacheLookup< + CompilationCacheEntryRef> { public: + using TpuCompilationCacheEntryRef = + ::tensorflow::tpu::CompilationCacheEntryRef; + using EntryRefImpl = + ::tensorflow::tpu::TpuCompilationCacheExternal::EntryRefImpl; + explicit TpuCompilationCacheLocalLookup(TpuCompilationCacheInterface* cache); ~TpuCompilationCacheLocalLookup() override; Status Lookup(const string& proto_key, - std::unique_ptr* entry, + std::unique_ptr* entry, CompilationCacheFetchTarget fetch_target) override; Status Lookup(int64 uid, int proto_index, - std::unique_ptr* entry, + std::unique_ptr* entry, CompilationCacheFetchTarget fetch_target) override; string DebugString() const override; diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h index ab476322a8a..0d1a53d31d2 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h @@ -23,11 +23,10 @@ limitations under the License. namespace tensorflow { namespace tpu { -// TODO(b/162241759): consider merging TpuCompilationCacheLookup and -// TpuCompilationCacheInterface. // Base class allowing Execute Ops to look up TPU programs. Different subclasses // are used when the execute Op is in the same address space as the compile Op, // and when they need to communicate over RPC. +template class TpuCompilationCacheLookup : public ResourceBase { public: ~TpuCompilationCacheLookup() override = default; @@ -44,11 +43,12 @@ class TpuCompilationCacheLookup : public ResourceBase { // fetch_target requests one of them, then after this call // (*entry)->get().get_executable() will return nullptr. virtual Status Lookup(const string& proto_key, - std::unique_ptr* entry, + std::unique_ptr* entry, CompilationCacheFetchTarget fetch_target) = 0; - virtual Status Lookup(const string& proto_key, - std::unique_ptr* entry) { + virtual Status Lookup( + const string& proto_key, + std::unique_ptr* entry) { return Lookup(proto_key, std::move(entry), CompilationCacheFetchTarget::MAIN); } @@ -58,15 +58,17 @@ class TpuCompilationCacheLookup : public ResourceBase { // returned in program. The wrapper is guaranteed to be valid only during the // execution of the Op requesting the proto. virtual Status Lookup(int64 uid, int proto_index, - std::unique_ptr* entry, + std::unique_ptr* entry, CompilationCacheFetchTarget fetch_target) = 0; - virtual Status Lookup(int64 uid, int proto_index, - std::unique_ptr* entry) { + virtual Status Lookup( + int64 uid, int proto_index, + std::unique_ptr* entry) { return Lookup(uid, proto_index, std::move(entry), CompilationCacheFetchTarget::MAIN); } }; + } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc index ce18e844e66..4ed646af302 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc @@ -413,6 +413,46 @@ Status TpuCompileOpKernelCommon::CompileTFFunctionToHlo( return Status::OK(); } +/* static */ +Status TpuCompileOpKernelCommon::ComputeArgumentShapes( + const tpu::TPUCompileMetadataProto& metadata, + const std::vector& dynamic_shapes, + std::vector* arg_shapes) { + arg_shapes->resize(metadata.args_size()); + int dynamic_shape_pos = 0; + for (int i = 0; i < metadata.args_size(); ++i) { + const tpu::TPUCompileMetadataProto::Arg& arg = metadata.args(i); + // The XLA compiler determines the shape of each constant by inspecting the + // value of its corresponding host-memory tensor. As a result, we don't need + // to give the compiler graph-inferred shapes for constant arguments. + if (arg.kind() == tpu::TPUCompileMetadataProto::Arg::GUARANTEED_CONSTANT) { + continue; + } + TF_RETURN_IF_ERROR(PartialTensorShape::IsValidShape(arg.shape())); + PartialTensorShape static_shape(arg.shape()); + + TensorShape& shape = (*arg_shapes)[i]; + if (static_shape.IsFullyDefined()) { + TF_RET_CHECK(static_shape.AsTensorShape(&shape)); + } else { + TF_RET_CHECK(dynamic_shape_pos < dynamic_shapes.size()) + << "Too few dynamic shapes"; + shape = dynamic_shapes[dynamic_shape_pos++]; + if (!static_shape.IsCompatibleWith(shape)) { + return errors::InvalidArgument( + "Mismatch between static and dynamic shape for argument. Static " + "shape: ", + static_shape.DebugString(), + "; dynamic shape: ", shape.DebugString()); + } + } + } + // Checks we consumed all of the dynamic shapes. + TF_RET_CHECK(dynamic_shape_pos == dynamic_shapes.size()) + << "Too many dynamic shapes"; + return Status::OK(); +} + // Function arguments and return values lose their device assignments, so we // must recreate them. /* static */ Status TpuCompileOpKernelCommon::AssignDevicesToArgsAndRetvals( diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_common.h b/tensorflow/core/tpu/kernels/tpu_compile_op_common.h index 327aa460ddd..3d3f0afcdb7 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_common.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_common.h @@ -99,6 +99,15 @@ class TpuCompileOpKernelCommon { const std::vector& arg_shapes, TpuProgramGroupInterface* tpu_program_group) = 0; + // Computes shapes for each argument. Uses both the static shape from the + // metadata, and the dynamic shapes where the static shape is not + // defined. There must be one dynamic_shape for each argument with a + // partially defined shape, in index order. + static Status ComputeArgumentShapes( + const tpu::TPUCompileMetadataProto& metadata, + const std::vector& dynamic_shapes, + std::vector* arg_shapes); + // Performs shape inference on `computation`, filling shape_info with operator // shapes. The shapes of the _Arg nodes are taken from `arg_shapes`. static Status RunShapeInferenceOnComputation( diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc index 3440b6d265a..5cc35a07e66 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc @@ -540,43 +540,5 @@ Status CompileOpMetadataFromContext(OpKernelConstruction* ctx, } return Status::OK(); } - -Status ComputeArgumentShapes(const tpu::TPUCompileMetadataProto& metadata, - const std::vector& dynamic_shapes, - std::vector* arg_shapes) { - arg_shapes->resize(metadata.args_size()); - int dynamic_shape_pos = 0; - for (int i = 0; i < metadata.args_size(); ++i) { - const tpu::TPUCompileMetadataProto::Arg& arg = metadata.args(i); - // The XLA compiler determines the shape of each constant by inspecting the - // value of its corresponding host-memory tensor. As a result, we don't need - // to give the compiler graph-inferred shapes for constant arguments. - if (arg.kind() == tpu::TPUCompileMetadataProto::Arg::GUARANTEED_CONSTANT) { - continue; - } - TF_RETURN_IF_ERROR(PartialTensorShape::IsValidShape(arg.shape())); - PartialTensorShape static_shape(arg.shape()); - - TensorShape& shape = (*arg_shapes)[i]; - if (static_shape.IsFullyDefined()) { - TF_RET_CHECK(static_shape.AsTensorShape(&shape)); - } else { - TF_RET_CHECK(dynamic_shape_pos < dynamic_shapes.size()) - << "Too few dynamic shapes"; - shape = dynamic_shapes[dynamic_shape_pos++]; - if (!static_shape.IsCompatibleWith(shape)) { - return errors::InvalidArgument( - "Mismatch between static and dynamic shape for argument. Static " - "shape: ", - static_shape.DebugString(), - "; dynamic shape: ", shape.DebugString()); - } - } - } - // Checks we consumed all of the dynamic shapes. - TF_RET_CHECK(dynamic_shape_pos == dynamic_shapes.size()) - << "Too many dynamic shapes"; - return Status::OK(); -} } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_support.h b/tensorflow/core/tpu/kernels/tpu_compile_op_support.h index ea13d33b521..bc60f64286a 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_support.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_support.h @@ -159,14 +159,6 @@ se::port::Status CompileOpMetadataFromContext(OpKernelConstruction* ctx, TPUCompileMetadataProto* metadata, NameAttrList* function_name, std::string* mlir_module); - -// Computes shapes for each argument. Uses both the static shape from the -// metadata, and the dynamic shapes where the static shape is not -// defined. There must be one dynamic_shape for each argument with a -// partially defined shape, in index order. -Status ComputeArgumentShapes(const TPUCompileMetadataProto& metadata, - const std::vector& dynamic_shapes, - std::vector* arg_shapes); } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc b/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc index 5a8c283c7c2..e098dbd682c 100644 --- a/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc +++ b/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc @@ -25,8 +25,6 @@ limitations under the License. #include "tensorflow/core/platform/refcount.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.h" -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h" #include "tensorflow/core/tpu/kernels/tpu_mesh_state_interface.h" #include "tensorflow/core/tpu/kernels/tpu_op_consts.h" #include "tensorflow/core/tpu/tpu_api.h" @@ -255,10 +253,6 @@ void InitializeHostForDistributedTpuOp::Compute(OpKernelContext* ctx) { mesh_state_interface)); } - VLOG(1) << "Removing existing proto compilation cache lookup if it exists"; - OP_REQUIRES_OK(ctx, DeleteIfExists( - rmgr, tpu::kCompiledProtoCacheResourceName)); - if (enable_whole_mesh_compilations_) { // If this is a whole mesh compilation mode, create the compilation cache, // if missing. @@ -282,13 +276,6 @@ void InitializeHostForDistributedTpuOp::Compute(OpKernelContext* ctx) { if (local_compilation_cache != nullptr) { local_compilation_cache->Unref(); - - tpu::TpuCompilationCacheLookup* proto_lookup; - proto_lookup = - new tpu::TpuCompilationCacheLocalLookup(local_compilation_cache); - OP_REQUIRES_OK( - ctx, rmgr->Create(rmgr->default_container(), - tpu::kCompiledProtoCacheResourceName, proto_lookup)); } Tensor* ctx_output; diff --git a/tensorflow/core/tpu/kernels/tpu_execute_op.cc b/tensorflow/core/tpu/kernels/tpu_execute_op.cc index 3522ace379a..51c9dd481a3 100644 --- a/tensorflow/core/tpu/kernels/tpu_execute_op.cc +++ b/tensorflow/core/tpu/kernels/tpu_execute_op.cc @@ -40,12 +40,10 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/platform/casts.h" #include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/profiler/lib/traceme.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h" -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h" #include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" @@ -58,10 +56,14 @@ limitations under the License. #include "tensorflow/stream_executor/tpu/tpu_node_context.h" namespace tensorflow { + namespace { -using ::tensorflow::tpu::CompilationCacheEntryRef; -using ::tensorflow::tpu::TpuCompilationCacheLookup; + using ::tensorflow::tpu::TpuNodeContext; +using CompilationCacheEntryRef = ::tensorflow::tpu::CompilationCacheEntryRef< + ::tensorflow::tpu::TpuCompilationCacheEntry>; +using TpuCompilationCacheLookup = + ::tensorflow::tpu::TpuCompilationCacheLookup; // Looks up the input `key` in the compilation cache, populating // `*rendezvous_key_base` and `*entry`. @@ -639,35 +641,28 @@ Status TPUExecuteOp::DoWork(OpKernelContext* context) { profiler::TraceMe trace_me_init("TPUExecuteOp::Init", /*level=*/2); string rendezvous_key_base; - std::unique_ptr entry_ref; + std::unique_ptr entry; TF_RETURN_IF_ERROR( - GetComputationCacheEntry(context, &rendezvous_key_base, &entry_ref)); + GetComputationCacheEntry(context, &rendezvous_key_base, &entry)); // Shapes of the inputs and outputs, in xla::Shape form. - tpu::TpuCompilationCacheEntry entry = entry_ref->get(); - const tpu::TpuProgramGroup* tpu_program_group = - tensorflow::down_cast( - entry.tpu_program_group()); - CHECK_NE(tpu_program_group, nullptr); - const int core_index = entry.core_index(); - const TPUExecutableInfoProto& executable = - tpu_program_group->executable_info(core_index); + const TPUExecutableInfoProto* proto = entry->get().get_executable_info(); xla::Backend* const backend = node_context->backend(); xla::TransferManager* const transfer_manager = backend->transfer_manager(); TF_RET_CHECK(context->op_device_context()); se::Stream* stream = context->op_device_context()->stream(); - TF_RET_CHECK(executable.input_shapes_size() == 1); + TF_RET_CHECK(proto->input_shapes_size() == 1); - xla::Shape host_shape(executable.input_shapes(0)); + xla::Shape host_shape(proto->input_shapes(0)); TF_ASSIGN_OR_RETURN( auto variable_update_map, - BuildVariableUpdateMap(executable.variable_indices(), + BuildVariableUpdateMap(proto->variable_indices(), fused_device_var_reads_in_computation_inputs_, fused_device_var_updates_in_computation_outputs_, - executable.output_tensor_shapes().size())); + proto->output_tensor_shapes().size())); TF_ASSIGN_OR_RETURN( std::unique_ptr input_buffers, BuildComputationInputs(context, host_shape, variable_update_map, backend, @@ -702,9 +697,8 @@ Status TPUExecuteOp::DoWork(OpKernelContext* context) { // Snapshot the inputs, if a snapshot was requested. std::shared_ptr hlo_snapshot; - if (executable.has_session_module()) { - hlo_snapshot = - std::make_shared(executable.session_module()); + if (proto->has_session_module()) { + hlo_snapshot = std::make_shared(proto->session_module()); auto literal = std::make_shared(shaped_buffer.on_host_shape()); transfer_manager->TransferLiteralFromDevice( @@ -729,9 +723,9 @@ Status TPUExecuteOp::DoWork(OpKernelContext* context) { const uint32 rng_seed = GetXLARandomSeed(); std::unique_ptr device_assignment; - if (executable.has_device_assignment()) { + if (proto->has_device_assignment()) { TF_ASSIGN_OR_RETURN(device_assignment, xla::DeviceAssignment::Deserialize( - executable.device_assignment())); + proto->device_assignment())); } VLOG(4) << "Input buffers after alias resolution: " @@ -749,24 +743,24 @@ Status TPUExecuteOp::DoWork(OpKernelContext* context) { // we free a memory and reassign it to other users while a program is running, // all subsequent writes to the program that could possibly clobber the memory // will depend on the program to finish. - const TPUHostTransferInfoProto& host_transfer_info = - tpu_program_group->host_transfer_info(core_index); + const TPUHostTransferInfoProto* host_transfer_info = + entry->get().get_host_transfer_info(); + const xla::HloProto* hlo_metadata = entry->get().get_hlo_metadata(); TF_ASSIGN_OR_RETURN( xla::ExecutionOutput output, - TPUExecute(executable, host_transfer_info, - *tpu_program_group->hlo_metadata(core_index), std::move(input), + TPUExecute(*proto, *host_transfer_info, *hlo_metadata, std::move(input), rendezvous_key_base, rng_seed, node_context.get(), device_assignment.get(), context->cancellation_manager(), context, stream, transfer_stream_ptr.get(), - tpu_program_group->tpu_program(core_index))); + entry->get().get_tpu_program())); stream->ThenRecordEvent(definition_event.get()); TF_ASSIGN_OR_RETURN( std::unique_ptr output_buffers, - AllocateOutputTensors( - context, output.ConsumeResult(), executable.output_tensor_shapes(), - variable_update_map, node_context.get(), stream, device_ordinal, - input_buffers.get(), definition_event)); + AllocateOutputTensors(context, output.ConsumeResult(), + proto->output_tensor_shapes(), variable_update_map, + node_context.get(), stream, device_ordinal, + input_buffers.get(), definition_event)); // Transfer the outputs and save the snapshot to disk. if (hlo_snapshot) { diff --git a/tensorflow/core/tpu/kernels/tpu_program_c_api.h b/tensorflow/core/tpu/kernels/tpu_program_c_api.h index 41c7d47cf97..c9951e4d5ce 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_c_api.h +++ b/tensorflow/core/tpu/kernels/tpu_program_c_api.h @@ -21,9 +21,6 @@ limitations under the License. typedef struct XLA_TpuProgram XLA_TpuProgram; -// Enum for choosing sharding/unsharding program from a `XLA_TpuProgram` obj. -enum TpuProgramShardingType { kInvalid = 0, kMain, kSharding, kUnsharding }; - extern "C" { // Creates a new TPU program. @@ -67,15 +64,6 @@ TFTPU_CAPI_EXPORT void TpuProgram_GetHloMetadata( TFTPU_CAPI_EXPORT void TpuProgram_GetMayModifyVariables( const XLA_TpuProgram* tpu_program, bool* may_modify_variables); -// Check if TPU program has sharding. -TFTPU_CAPI_EXPORT bool TpuProgram_HasSharding( - const XLA_TpuProgram* tpu_program); - -// Gets TPU program by sharding type. Return value is valid only when the -// `status.status()` returns `OK`. -TFTPU_CAPI_EXPORT XLA_TpuProgram* TpuProgram_GetTpuProgram( - XLA_TpuProgram* tpu_program, TpuProgramShardingType type); - struct TfTpu_TpuProgramApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuProgram_New); TFTPU_ADD_FN_IN_STRUCT(TpuProgram_Free); @@ -88,8 +76,6 @@ struct TfTpu_TpuProgramApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetHostTransferInfo); TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetHloMetadata); TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetMayModifyVariables); - TFTPU_ADD_FN_IN_STRUCT(TpuProgram_HasSharding); - TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetTpuProgram); }; } // extern "C" diff --git a/tensorflow/core/tpu/kernels/tpu_program_group.cc b/tensorflow/core/tpu/kernels/tpu_program_group.cc index 39d1f38b104..e22175af270 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_group.cc +++ b/tensorflow/core/tpu/kernels/tpu_program_group.cc @@ -22,7 +22,6 @@ limitations under the License. #include "tensorflow/core/tpu/kernels/tpu_compile.pb.h" #include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" #include "tensorflow/core/tpu/kernels/tpu_compile_op_support.h" -#include "tensorflow/core/tpu/kernels/tpu_program_c_api.h" #include "tensorflow/core/tpu/tpu_api.h" #include "tensorflow/stream_executor/tpu/proto_helper.h" #include "tensorflow/stream_executor/tpu/status_helper.h" @@ -99,71 +98,55 @@ StatusOr> CompileAheadOfTime( compilation_result, metadata, per_core_arg_shapes, per_core_output_shapes, per_core_variable_indices, device_assignment); } -} // namespace -void TpuProgramGroup::Initialize( - absl::Span xla_tpu_programs) { +Status CreateTpuProgramGroup( + absl::Span xla_tpu_programs, + TpuProgramGroupInterface* tpu_program_group_interface) { CHECK_GT(xla_tpu_programs.size(), 0); - set_tpu_programs(xla_tpu_programs); + TpuProgramGroup* tpu_program_group = + tensorflow::down_cast(tpu_program_group_interface); + CHECK_NE(tpu_program_group, nullptr); + tpu_program_group->set_tpu_programs(xla_tpu_programs); - std::vector may_modify_variables_array(xla_tpu_programs.size(), false); - std::vector executable_infos(xla_tpu_programs.size()); - std::vector host_transfer_infos( - xla_tpu_programs.size()); - std::vector hlo_metadatas(xla_tpu_programs.size()); - for (size_t i = 0; i < xla_tpu_programs.size(); ++i) { - const XLA_TpuProgram* xla_tpu_program = xla_tpu_programs[i]; - bool may_modify_variables; - TpuProgramApiFn()->TpuProgram_GetMayModifyVariablesFn( - xla_tpu_program, &may_modify_variables); - may_modify_variables_array[i] = may_modify_variables; + // TODO(jiawenhao): Handle the case of xla_tpu_programs.size() > 1. + bool may_modify_variables; + TpuProgramApiFn()->TpuProgram_GetMayModifyVariablesFn(xla_tpu_programs[0], + &may_modify_variables); + tpu_program_group->set_may_modify_variables( + std::vector(1, may_modify_variables)); - TpuSerializedProto serialized_executable_info; - TpuProgramApiFn()->TpuProgram_GetExecutableInfoFn( - xla_tpu_program, &serialized_executable_info); - TPUExecutableInfoProto executable_info = - se_tpu::DeserializeProto( - serialized_executable_info); - executable_infos[i] = executable_info; - StreamExecutor_Tpu_FreeSerializedProto(&serialized_executable_info); + TpuSerializedProto serialized_executable_info; + TpuProgramApiFn()->TpuProgram_GetExecutableInfoFn( + xla_tpu_programs[0], &serialized_executable_info); + TPUExecutableInfoProto executable_info = + se_tpu::DeserializeProto( + serialized_executable_info); + tpu_program_group->set_executable_info(executable_info); + StreamExecutor_Tpu_FreeSerializedProto(&serialized_executable_info); - TPUHostTransferInfoProto host_transfer_info; - TpuSerializedProto serialized_host_transfer_info; - TpuProgramApiFn()->TpuProgram_GetHostTransferInfoFn( - xla_tpu_program, &serialized_host_transfer_info); - if (serialized_host_transfer_info.size > 0) { - host_transfer_info = se_tpu::DeserializeProto( - serialized_host_transfer_info); - StreamExecutor_Tpu_FreeSerializedProto(&serialized_host_transfer_info); - } - host_transfer_infos[i] = host_transfer_info; - - TpuSerializedProto serialized_hlo_metadata; - TpuProgramApiFn()->TpuProgram_GetHloMetadataFn(xla_tpu_program, - &serialized_hlo_metadata); - xla::HloProto hlo_metadata = - se_tpu::DeserializeProto(serialized_hlo_metadata); - hlo_metadatas[i] = hlo_metadata; - StreamExecutor_Tpu_FreeSerializedProto(&serialized_hlo_metadata); + TPUHostTransferInfoProto host_transfer_info; + TpuSerializedProto serialized_host_transfer_info; + TpuProgramApiFn()->TpuProgram_GetHostTransferInfoFn( + xla_tpu_programs[0], &serialized_host_transfer_info); + if (serialized_host_transfer_info.size > 0) { + host_transfer_info = se_tpu::DeserializeProto( + serialized_host_transfer_info); + StreamExecutor_Tpu_FreeSerializedProto(&serialized_host_transfer_info); } + tpu_program_group->set_host_transfer_info(host_transfer_info); - may_modify_variables_ = may_modify_variables_array; - executable_infos_ = executable_infos; - host_transfer_infos_ = host_transfer_infos; - hlo_metadatas_ = hlo_metadatas; - RefreshHloMetadatasPtrs(); + TpuSerializedProto serialized_hlo_metadata; + TpuProgramApiFn()->TpuProgram_GetHloMetadataFn(xla_tpu_programs[0], + &serialized_hlo_metadata); + xla::HloProto hlo_metadata = + se_tpu::DeserializeProto(serialized_hlo_metadata); + tpu_program_group->set_hlo_metadata(hlo_metadata); + StreamExecutor_Tpu_FreeSerializedProto(&serialized_hlo_metadata); + + return Status::OK(); } -bool TpuProgramGroup::has_sharding_program() const { - for (const XLA_TpuProgram* tpu_program : tpu_programs_) { - if (!TpuProgramApiFn()->TpuProgram_HasShardingFn(tpu_program)) { - return false; - } - } - return true; -} - -size_t TpuProgramGroup::program_count() const { return tpu_programs_.size(); } +} // namespace int64_t TpuProgramGroup::program_size() const { int64_t total_size = 0; @@ -218,6 +201,12 @@ void TpuProgramGroup::UnloadAndDestroyPrograms() { TF_RET_CHECK(per_core_output_shapes.size() == per_core_variable_indices.size()); + // TODO(henrytan): add an interface to TpuProgramGroupInterface to set + // may_modify_variables. + TpuProgramGroup* tpu_program_group = + tensorflow::down_cast(tpu_program_group_interface); + tpu_program_group->may_modify_variables_ = may_modify_variables; + // With shardable input/output pairs, XLA could generate separate // sharding/unsharding programs along with the main program. The // sharding/unsharding programs will be in nested entries of the AOT @@ -232,20 +221,17 @@ void TpuProgramGroup::UnloadAndDestroyPrograms() { TF_RET_CHECK(xla_tpu_programs.size() == 1 || xla_tpu_programs.size() == metadata.num_cores_per_replica()); - // TODO(henrytan): add an interface to TpuProgramGroupInterface to set - // may_modify_variables. - TpuProgramGroup* tpu_program_group = - tensorflow::down_cast(tpu_program_group_interface); - tpu_program_group->Initialize(xla_tpu_programs); - tpu_program_group->may_modify_variables_ = may_modify_variables; + TF_RETURN_IF_ERROR( + CreateTpuProgramGroup(xla_tpu_programs, tpu_program_group)); return Status::OK(); } TpuProgramGroup::TpuProgramGroup(TpuProgramGroup&& other) : may_modify_variables_(std::move(other.may_modify_variables_)), + host_compute_metadata_(std::move(other.host_compute_metadata_)), tpu_programs_(std::move(other.tpu_programs_)), - executable_infos_(std::move(other.executable_infos_)), - host_transfer_infos_(std::move(other.host_transfer_infos_)), + executable_info_(std::move(other.executable_info_)), + host_transfer_info_(std::move(other.host_transfer_info_)), hlo_metadatas_(std::move(other.hlo_metadatas_)) { RefreshHloMetadatasPtrs(); } @@ -262,12 +248,6 @@ absl::Span TpuProgramGroup::hlo_metadatas() const { return hlo_metadatas_ptrs_; } -const xla::HloProto* TpuProgramGroup::hlo_metadata(int index) const { - CHECK_GE(index, 0); - CHECK_LT(index, hlo_metadatas_ptrs_.size()); - return hlo_metadatas_ptrs_[index]; -} - void TpuProgramGroup::RefreshHloMetadatasPtrs() { hlo_metadatas_ptrs_.reserve(hlo_metadatas_.size()); for (const auto& hlo_metadata_internal_ : hlo_metadatas_) { @@ -282,47 +262,6 @@ Status TpuProgramGroup::LogCompilationStats(const TpuCompilationCacheKey& key, return Status::OK(); } -const std::vector& TpuProgramGroup::may_modify_variables() const { - return may_modify_variables_; -} - -void TpuProgramGroup::set_may_modify_variables( - const std::vector& may_modify_variables) { - may_modify_variables_ = may_modify_variables; -} - -const std::vector& TpuProgramGroup::tpu_programs() const { - return tpu_programs_; -} - -const XLA_TpuProgram* TpuProgramGroup::tpu_program(int index) const { - CHECK_GE(index, 0); - CHECK_LT(index, tpu_programs_.size()); - return tpu_programs_[index]; -} - -void TpuProgramGroup::set_tpu_programs( - absl::Span tpu_programs) { - tpu_programs_.resize(tpu_programs.size()); - for (size_t i = 0; i < tpu_programs.size(); ++i) { - tpu_programs_[i] = tpu_programs[i]; - } -} - -const TPUExecutableInfoProto& TpuProgramGroup::executable_info( - int index) const { - CHECK_GE(index, 0); - CHECK_LT(index, executable_infos_.size()); - return executable_infos_[index]; -} - -const TPUHostTransferInfoProto& TpuProgramGroup::host_transfer_info( - int index) const { - CHECK_GE(index, 0); - CHECK_LT(index, host_transfer_infos_.size()); - return host_transfer_infos_[index]; -} - /*static*/ Status TpuProgramGroup::CompileAndBuild( const TpuCompilationRequestProto& compilation_request, @@ -348,27 +287,15 @@ Status TpuProgramGroup::CompileAndBuild( TF_RET_CHECK(count == 1 || count == compilation_request.metadata().num_cores_per_replica()); - VLOG(1) << "Initialize TpuProgramGroup."; - TpuProgramGroup* tpu_program_group = - tensorflow::down_cast(tpu_program_group_interface); - tpu_program_group->Initialize( - absl::MakeConstSpan(&xla_tpu_programs[0], count)); + VLOG(1) << "CreateTpuProgramGroup"; + Status serialize_status = + CreateTpuProgramGroup(absl::MakeConstSpan(&xla_tpu_programs[0], count), + tpu_program_group_interface); + VLOG(1) << absl::StrCat("Run CreateTpuProgramGroup completed. StatusCode: ", + serialize_status.code()); TpuProgramApiFn()->TpuProgram_FreeArrayFn(xla_tpu_programs); - return status.status(); + return serialize_status; } -std::vector TpuProgramGroup::tpu_programs( - TpuProgramShardingType sharding_type) const { - std::vector tpu_programs; - tpu_programs.reserve(tpu_programs_.size()); - for (size_t i = 0; i < tpu_programs_.size(); ++i) { - if (TpuProgramApiFn()->TpuProgram_HasShardingFn(tpu_programs_[i])) { - tpu_programs.push_back(TpuProgramApiFn()->TpuProgram_GetTpuProgramFn( - tpu_programs_[i], sharding_type)); - CHECK_NE(tpu_programs[i], nullptr); - } - } - return tpu_programs; -} } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_program_group.h b/tensorflow/core/tpu/kernels/tpu_program_group.h index b76ef3d507a..4bc8cdd003a 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_group.h +++ b/tensorflow/core/tpu/kernels/tpu_program_group.h @@ -102,16 +102,11 @@ class TpuProgramGroup : public TpuProgramGroupInterface { const absl::optional& xla_device_assignment, TpuProgramGroupInterface* tpu_program_group_interface); - // Initializes `TpuProgramGroup` object with `xla_tpu_programs`. - void Initialize(absl::Span xla_tpu_programs); - TpuProgramGroup() = default; TpuProgramGroup(TpuProgramGroup&& other); TpuProgramGroup& operator=(TpuProgramGroup&&) = delete; - bool has_sharding_program() const override; - - size_t program_count() const override; + size_t program_count() const override { return tpu_programs_.size(); } int64_t program_size() const override; @@ -122,29 +117,58 @@ class TpuProgramGroup : public TpuProgramGroupInterface { Status LogCompilationStats(const TpuCompilationCacheKey& key, absl::Duration duration) override; - const std::vector& may_modify_variables() const override; - void set_may_modify_variables(const std::vector& may_modify_variables); + const std::vector& may_modify_variables() const override { + return may_modify_variables_; + } + void set_may_modify_variables(const std::vector& may_modify_variables) { + may_modify_variables_ = may_modify_variables; + } - const std::vector& tpu_programs() const; - std::vector tpu_programs(TpuProgramShardingType type) const; - const XLA_TpuProgram* tpu_program(int index) const; - void set_tpu_programs(absl::Span tpu_programs); + const tf2xla::HostComputeMetadata& host_compute_metadata() const { + return host_compute_metadata_; + } + void set_host_compute_metadata( + const tf2xla::HostComputeMetadata& host_compute_metadata) { + host_compute_metadata_ = host_compute_metadata; + } - const TPUExecutableInfoProto& executable_info(int index) const; + const std::vector& tpu_programs() const { + return tpu_programs_; + } + void set_tpu_programs(absl::Span tpu_programs) { + tpu_programs_.resize(tpu_programs.size()); + for (size_t i = 0; i < tpu_programs.size(); ++i) { + tpu_programs_[i] = tpu_programs[i]; + } + } + + const TPUExecutableInfoProto& executable_info() const { + return executable_info_; + } + void set_executable_info(const TPUExecutableInfoProto& executable_info) { + executable_info_ = executable_info; + } + + const TPUHostTransferInfoProto& host_transfer_info() const { + return host_transfer_info_; + } + void set_host_transfer_info( + const TPUHostTransferInfoProto& host_transfer_info) { + host_transfer_info_ = host_transfer_info; + } - const TPUHostTransferInfoProto& host_transfer_info(int index) const; void set_hlo_metadata(const xla::HloProto& hlo_metadata); - const xla::HloProto* hlo_metadata(int index) const; absl::Span hlo_metadatas() const override; private: void RefreshHloMetadatasPtrs(); std::vector may_modify_variables_; + tf2xla::HostComputeMetadata host_compute_metadata_; std::vector tpu_programs_; // Not owned. - std::vector executable_infos_; - std::vector host_transfer_infos_; + TPUExecutableInfoProto executable_info_; + TPUHostTransferInfoProto host_transfer_info_; // To be consistent with the TpuProgramGroupInterface::hlo_metadatas() // signature, we store HloProto values in hlo_metadatas_ when diff --git a/tensorflow/core/tpu/kernels/tpu_program_group_interface.h b/tensorflow/core/tpu/kernels/tpu_program_group_interface.h index 4af94f8e1ad..cb7347783b1 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_group_interface.h +++ b/tensorflow/core/tpu/kernels/tpu_program_group_interface.h @@ -20,8 +20,6 @@ limitations under the License. #include #include -#include "absl/time/time.h" -#include "absl/types/span.h" #include "tensorflow/compiler/tf2xla/host_compute_metadata.pb.h" #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/core/lib/core/status.h" @@ -36,16 +34,13 @@ class TpuProgramGroupInterface { public: virtual ~TpuProgramGroupInterface() = default; - // Check if whether sharding/unsharding program exists. - virtual bool has_sharding_program() const = 0; - // Computes program count. virtual size_t program_count() const = 0; // Computes total program size. virtual int64_t program_size() const = 0; - // Unloads and destroys safely TPU programs. + // Unloads and destroys safely Tpu programs. virtual void UnloadAndDestroyPrograms() = 0; // Logs program memory summary. diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index 6914a8cd102..682cc8b1c13 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -64,8 +64,6 @@ tensorflow::Status SetTpuProgramStructFn(void* library_handle) { TFTPU_SET_FN(tpu_program_fn, TpuProgram_GetHostTransferInfo); TFTPU_SET_FN(tpu_program_fn, TpuProgram_GetHloMetadata); TFTPU_SET_FN(tpu_program_fn, TpuProgram_GetMayModifyVariables); - TFTPU_SET_FN(tpu_program_fn, TpuProgram_HasSharding); - TFTPU_SET_FN(tpu_program_fn, TpuProgram_GetTpuProgram); return tensorflow::Status::OK(); } From 474d3df724c402048ae2f8773bfc49f1587229ff Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Mon, 3 Aug 2020 16:19:08 -0700 Subject: [PATCH 2015/2522] [TF2XLA] [NFC] Test that aliased updates do not actually increase memory usage PiperOrigin-RevId: 324705992 Change-Id: I1c7c19867c3b7086dab61427da7e7b78547e4c59 --- .../python/eager/def_function_xla_jit_test.py | 37 +++++++++++-------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/tensorflow/python/eager/def_function_xla_jit_test.py b/tensorflow/python/eager/def_function_xla_jit_test.py index 10982070c00..44a4c99f5d6 100644 --- a/tensorflow/python/eager/def_function_xla_jit_test.py +++ b/tensorflow/python/eager/def_function_xla_jit_test.py @@ -461,26 +461,23 @@ class DefFunctionTest(xla_test.XLATestCase): def testUpdateVariable(self): with ops.device('device:{}:0'.format(self.device)): - v = variables.Variable(3.1) + + on_gpu = 'gpu' in self.device.lower() + v = variables.Variable([3.1, 3.2]) @def_function.function(experimental_compile=True) def update_var(a, b): v.assign_add(a * b) - update_var(constant_op.constant(0.7), constant_op.constant(0.6)) - self.assertAllClose(v, 3.52) + arg1 = random_ops.random_normal([2]) + arg2 = random_ops.random_normal([2]) - def testUpdateVariableVector(self): - with ops.device('device:{}:0'.format(self.device)): - v = variables.Variable([3.1, 3.1]) - - @def_function.function(experimental_compile=True) - def update_var(a, b): - v.assign_add(a * b) - - update_var( - constant_op.constant([0.7, 0.7]), constant_op.constant([0.6, 0.6])) - self.assertAllClose(v, [3.52, 3.52]) + initial_usage = context.context().get_total_memory_usage( + v.device) if on_gpu else 0 + update_var(arg1, arg2) + final_usage = context.context().get_total_memory_usage( + v.device) if on_gpu else 0 + self.assertEqual(initial_usage, final_usage) @test_util.disable_mlir_bridge('TODO(b/162381930): MLIR bridge renames ' ' functions') @@ -524,11 +521,19 @@ class DefFunctionTest(xla_test.XLATestCase): def f(a, b): return (a, b) - a = constant_op.constant([0.7]) - b = constant_op.constant([0.6]) + a = random_ops.random_normal([10, 10]) + b = random_ops.random_normal([10, 10]) + + on_gpu = 'gpu' in self.device.lower() + initial_usage = context.context().get_total_memory_usage( + b.backing_device) if on_gpu else 0 f(a, b) + final_usage = context.context().get_total_memory_usage( + b.backing_device) if on_gpu else 0 + self.assertEqual(initial_usage, final_usage) + if __name__ == '__main__': ops.enable_eager_execution() From 7e10ef560d3bb0cbe837bfd2b17c0e999f84d9da Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Mon, 3 Aug 2020 16:20:05 -0700 Subject: [PATCH 2016/2522] [tf2xla] Add support for PopulationCount PiperOrigin-RevId: 324706175 Change-Id: I567c1fec90022573acf7a6dbe18a06191a088c90 --- .../compiler/jit/mark_for_compilation_pass.cc | 1 + tensorflow/compiler/tests/unary_ops_test.py | 37 +++++++++++++++++++ .../compiler/tf2xla/kernels/unary_ops.cc | 3 ++ 3 files changed, 41 insertions(+) diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc index d1ec66b1559..19eb61b6f72 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc @@ -1952,6 +1952,7 @@ absl::flat_hash_set GetKnownXLAAllowlistOp() { "ParallelDynamicStitch", "ParameterizedTruncatedNormal", "PartitionedCall", + "PopulationCount", "Qr", "QuantizeAndDequantizeV2", "QuantizeAndDequantizeV3", diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py index f5fe6986a98..e3a82610027 100644 --- a/tensorflow/compiler/tests/unary_ops_test.py +++ b/tensorflow/compiler/tests/unary_ops_test.py @@ -21,6 +21,7 @@ from __future__ import print_function import unittest import numpy as np +import six from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.compiler.tests import xla_test @@ -90,6 +91,10 @@ class UnaryOpsTest(xla_test.XLATestCase): self.assertAllClose(result, expected, rtol, atol) self.assertAllEqual(np.sort(result), result) + def AssertAllEqual(self, result, expected, rtol, atol): + """Tests that result and expeted are exactly equal.""" + self.assertAllEqual(result, expected) + @test_util.disable_mlir_bridge( "MlirHloBuilder::Iota missing required for xla::Diag") def testAllTypeOps(self): @@ -779,6 +784,10 @@ class UnaryOpsTest(xla_test.XLATestCase): np.array([1 + 3j, -4 + 7j, 2.7, -3j], dtype=dtype), expected=np.array([1, -4, 2.7, 0], dtype=ctypes[dtype])) + @test_util.disable_mlir_bridge( + "TF_PopulationCount is missing and is required to translate to " + "xla::PopulationCount." + ) def testIntOps(self): for dtype in self.int_types: self._assertOpOutputMatchesExpected( @@ -786,6 +795,34 @@ class UnaryOpsTest(xla_test.XLATestCase): np.array([0, -1, 1, 16, 42], dtype=dtype), expected=np.array([-1, 0, -2, -17, -43], dtype=dtype)) + # Test population_count for array inputs. + raw_inputs = [ + 0, 1, -1, 3, -3, 5, -5, 14, -14, 127, 128, 255, 256, 65535, 65536, + 2**31 - 1, 2**31, 2**32 - 1, 2**32, -2**32 + 1, -2**32, -2**63 + 1, + 2**63 - 1 + ] + inputs = np.array(raw_inputs, dtype=dtype) + + def count_bits(x): + return sum(bin(z).count("1") for z in six.iterbytes(x.tobytes())) + + truth = [count_bits(x) for x in inputs] + self._assertOpOutputMatchesExpected( + bitwise_ops.population_count, + inputs, + expected=np.array(truth, dtype=np.uint8), + equality_test=self.AssertAllEqual) + + # Test population_count for scalar inputs. + for raw_inp in raw_inputs: + inp = dtype(raw_inp) + truth = count_bits(inp) + self._assertOpOutputMatchesExpected( + bitwise_ops.population_count, + inp, + expected=np.uint8(truth), + equality_test=self.AssertAllEqual) + def testNumericOps(self): for dtype in self.numeric_types - {np.int8, np.uint8}: self._assertOpOutputMatchesExpected( diff --git a/tensorflow/compiler/tf2xla/kernels/unary_ops.cc b/tensorflow/compiler/tf2xla/kernels/unary_ops.cc index 6d4393ee006..6fe6b164951 100644 --- a/tensorflow/compiler/tf2xla/kernels/unary_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/unary_ops.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/lib/math.h" #include "tensorflow/compiler/xla/client/xla_builder.h" #include "tensorflow/compiler/xla/primitive_util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/framework/kernel_def_builder.h" namespace tensorflow { @@ -76,6 +77,8 @@ XLAJIT_MAKE_UNARY(Log1p, xla::Log1p(x)); XLAJIT_MAKE_UNARY(Invert, xla::Not(x)); XLAJIT_MAKE_UNARY(LogicalNot, xla::Not(x)); +XLAJIT_MAKE_UNARY(PopulationCount, + xla::ConvertElementType(xla::PopulationCount(x), xla::U8)); XLAJIT_MAKE_UNARY(Neg, -x); XLAJIT_MAKE_UNARY(Rint, xla::RoundToEven(x)); From 9fefb3b50ac69a074faf5829e9c7b4666fe906d0 Mon Sep 17 00:00:00 2001 From: Robert David Date: Mon, 3 Aug 2020 16:22:21 -0700 Subject: [PATCH 2017/2522] Tool generated fixes for unused dependencies. PiperOrigin-RevId: 324706583 Change-Id: Ia8512b20d27791d935d207fd50180593ca5e2ccc --- tensorflow/lite/delegates/gpu/cl/BUILD | 1 - .../lite/delegates/gpu/cl/kernels/BUILD | 29 ------------------- tensorflow/lite/delegates/gpu/common/BUILD | 1 - .../testing/feature_parity/generators/BUILD | 1 - .../gpu/common/transformations/BUILD | 1 - .../lite/delegates/gpu/gl/kernels/BUILD | 5 ---- .../lite/delegates/gpu/gl/runtime/BUILD | 1 - tensorflow/lite/delegates/gpu/metal/BUILD | 1 - .../lite/delegates/gpu/metal/kernels/BUILD | 26 ----------------- 9 files changed, 66 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/BUILD b/tensorflow/lite/delegates/gpu/cl/BUILD index 36cafdb4d3b..2344a7c6c40 100644 --- a/tensorflow/lite/delegates/gpu/cl/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/BUILD @@ -259,7 +259,6 @@ cc_library( "EGL_EGLEXT_PROTOTYPES", ], deps = [ - ":cl_device", "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/gl:gl_call", ], diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD index 35ed09633a0..b89e7d7252a 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD @@ -172,7 +172,6 @@ cc_test( deps = [ ":cl_test", ":conv_buffer_1x1", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -212,7 +211,6 @@ cc_test( deps = [ ":cl_test", ":conv_constants", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -302,7 +300,6 @@ cc_test( deps = [ ":cl_test", ":conv_texture", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -380,7 +377,6 @@ cc_test( deps = [ ":cl_test", ":convolution_transposed", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -445,7 +441,6 @@ cc_test( deps = [ ":cl_test", ":convolution_transposed_3x3", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -484,7 +479,6 @@ cc_test( deps = [ ":cl_test", ":convolution_transposed_3x3_thin", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -525,7 +519,6 @@ cc_test( deps = [ ":cl_test", ":convolution_transposed_4x4", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -565,7 +558,6 @@ cc_test( deps = [ ":cl_test", ":convolution_transposed_thin", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -606,7 +598,6 @@ cc_test( deps = [ ":cl_test", ":depthwise_conv", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -646,7 +637,6 @@ cc_test( deps = [ ":cl_test", ":depthwise_conv_3x3", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -678,7 +668,6 @@ cc_test( deps = [ ":cl_test", ":elementwise", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -716,7 +705,6 @@ cc_test( deps = [ ":cl_test", ":fully_connected", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -772,7 +760,6 @@ cc_test( deps = [ ":cl_test", ":lstm", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -837,7 +824,6 @@ cc_test( deps = [ ":cl_test", ":max_unpooling", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -872,7 +858,6 @@ cc_test( deps = [ ":cl_test", ":mean", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -904,7 +889,6 @@ cc_test( deps = [ ":cl_test", ":padding", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -938,7 +922,6 @@ cc_test( deps = [ ":cl_test", ":pooling", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -975,7 +958,6 @@ cc_test( deps = [ ":cl_test", ":prelu", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -1012,7 +994,6 @@ cc_test( deps = [ ":cl_test", ":quantize_and_dequantize", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/kernels/internal:quantization_util", @@ -1043,7 +1024,6 @@ cc_test( deps = [ ":cl_test", ":relu", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -1075,7 +1055,6 @@ cc_test( deps = [ ":cl_test", ":reshape", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -1108,7 +1087,6 @@ cc_test( deps = [ ":cl_test", ":reshapex4", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -1142,7 +1120,6 @@ cc_test( deps = [ ":cl_test", ":softmax", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -1174,7 +1151,6 @@ cc_test( deps = [ ":cl_test", ":softmax1x1", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -1207,7 +1183,6 @@ cc_test( deps = [ ":cl_test", ":space_to_depth", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -1238,7 +1213,6 @@ cc_test( deps = [ ":cl_test", ":strided_slice", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -1270,7 +1244,6 @@ cc_test( deps = [ ":cl_test", ":transpose", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -1312,7 +1285,6 @@ cc_test( deps = [ ":cl_test", ":resize", - "//tensorflow/lite/delegates/gpu/cl:tensor", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "@com_google_googletest//:gtest_main", @@ -1357,7 +1329,6 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:shape", "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:winograd_util", - "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", ], ) diff --git a/tensorflow/lite/delegates/gpu/common/BUILD b/tensorflow/lite/delegates/gpu/common/BUILD index e9877b63fb3..ab2d5d033f7 100644 --- a/tensorflow/lite/delegates/gpu/common/BUILD +++ b/tensorflow/lite/delegates/gpu/common/BUILD @@ -97,7 +97,6 @@ cc_test( srcs = ["model_test.cc"], deps = [ ":model", - ":status", "@com_google_googletest//:gtest_main", ], ) diff --git a/tensorflow/lite/delegates/gpu/common/testing/feature_parity/generators/BUILD b/tensorflow/lite/delegates/gpu/common/testing/feature_parity/generators/BUILD index ae746cdb08d..4fef0a28525 100644 --- a/tensorflow/lite/delegates/gpu/common/testing/feature_parity/generators/BUILD +++ b/tensorflow/lite/delegates/gpu/common/testing/feature_parity/generators/BUILD @@ -24,7 +24,6 @@ cc_library( "//tensorflow/lite:schema_fbs_version", "//tensorflow/lite/delegates/gpu/common:shape", "//tensorflow/lite/delegates/gpu/common/testing/feature_parity:utils", - "//tensorflow/lite/kernels:builtin_ops", "@flatbuffers", ], ) diff --git a/tensorflow/lite/delegates/gpu/common/transformations/BUILD b/tensorflow/lite/delegates/gpu/common/transformations/BUILD index 4c76e4a81d3..bf26b03f534 100644 --- a/tensorflow/lite/delegates/gpu/common/transformations/BUILD +++ b/tensorflow/lite/delegates/gpu/common/transformations/BUILD @@ -59,7 +59,6 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:model_transformer", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", - "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/BUILD b/tensorflow/lite/delegates/gpu/gl/kernels/BUILD index 700a553a125..a367a60ba41 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/gl/kernels/BUILD @@ -317,13 +317,11 @@ cc_library( srcs = ["mean.cc"], hdrs = ["mean.h"], deps = [ - "//tensorflow/lite/delegates/gpu/common:data_type", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/gl:node_shader", "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", ], ) @@ -470,7 +468,6 @@ cc_library( srcs = ["quantize_and_dequantize.cc"], hdrs = ["quantize_and_dequantize.h"], deps = [ - "//tensorflow/lite/delegates/gpu/common:convert", "//tensorflow/lite/delegates/gpu/common:data_type", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:shape", @@ -645,7 +642,6 @@ cc_test( ":space_to_depth", ":test_util", "//tensorflow/lite/delegates/gpu/common:operations", - "//tensorflow/lite/delegates/gpu/common:shape", "@com_google_googletest//:gtest", ], ) @@ -720,7 +716,6 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/gl:node_shader", - "//tensorflow/lite/delegates/gpu/gl:variable", "@com_google_absl//absl/memory", ], ) diff --git a/tensorflow/lite/delegates/gpu/gl/runtime/BUILD b/tensorflow/lite/delegates/gpu/gl/runtime/BUILD index 20b307359db..c7418810f2d 100644 --- a/tensorflow/lite/delegates/gpu/gl/runtime/BUILD +++ b/tensorflow/lite/delegates/gpu/gl/runtime/BUILD @@ -10,7 +10,6 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/gl:gl_buffer", - "//tensorflow/lite/delegates/gpu/gl:gl_call", "//tensorflow/lite/delegates/gpu/gl:object", "//tensorflow/lite/delegates/gpu/gl:portable", ], diff --git a/tensorflow/lite/delegates/gpu/metal/BUILD b/tensorflow/lite/delegates/gpu/metal/BUILD index 4db8f3d071d..c4e7ca7c10d 100644 --- a/tensorflow/lite/delegates/gpu/metal/BUILD +++ b/tensorflow/lite/delegates/gpu/metal/BUILD @@ -100,7 +100,6 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common:util", - "//tensorflow/lite/delegates/gpu/metal:runtime_options", ], ) diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD index 6385b87c403..f4f4c180976 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/metal/kernels/BUILD @@ -91,7 +91,6 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", - "//tensorflow/lite/delegates/gpu/metal:runtime_options", ], ) @@ -176,7 +175,6 @@ cc_library( deps = [ "//tensorflow/lite/delegates/gpu/common:model", "//tensorflow/lite/delegates/gpu/common:status", - "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", "//tensorflow/lite/delegates/gpu/metal:runtime_options", ], @@ -230,12 +228,9 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:convert", "//tensorflow/lite/delegates/gpu/common:model", "//tensorflow/lite/delegates/gpu/common:operations", - "//tensorflow/lite/delegates/gpu/common:shape", - "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", "//tensorflow/lite/delegates/gpu/metal:environment", - "//tensorflow/lite/delegates/gpu/metal:runtime_options", "@com_google_absl//absl/strings", ], ) @@ -270,7 +265,6 @@ cc_library( deps = [ "//tensorflow/lite/delegates/gpu/common:model", "//tensorflow/lite/delegates/gpu/common:operations", - "//tensorflow/lite/delegates/gpu/common:shape", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", @@ -314,7 +308,6 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", - "//tensorflow/lite/delegates/gpu/metal:runtime_options", "@com_google_absl//absl/strings", ], ) @@ -388,11 +381,9 @@ cc_library( deps = [ "//tensorflow/lite/delegates/gpu/common:model", "//tensorflow/lite/delegates/gpu/common:operations", - "//tensorflow/lite/delegates/gpu/common:shape", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", - "//tensorflow/lite/delegates/gpu/metal:runtime_options", "@com_google_absl//absl/strings", ], ) @@ -429,10 +420,8 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:model", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:shape", - "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", - "//tensorflow/lite/delegates/gpu/metal:runtime_options", "@com_google_absl//absl/strings", ], ) @@ -469,8 +458,6 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:model", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:shape", - "//tensorflow/lite/delegates/gpu/common:types", - "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", "//tensorflow/lite/delegates/gpu/metal:runtime_options", "@com_google_absl//absl/strings", @@ -509,10 +496,8 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:shape", "//tensorflow/lite/delegates/gpu/common:types", - "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", "//tensorflow/lite/delegates/gpu/metal:runtime_options", - "@com_google_absl//absl/strings", ], ) @@ -556,9 +541,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:shape", "//tensorflow/lite/delegates/gpu/common:types", - "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", - "//tensorflow/lite/delegates/gpu/metal:runtime_options", "@com_google_absl//absl/strings", ], ) @@ -591,13 +574,10 @@ cc_library( srcs = ["resize.cc"], hdrs = ["resize.h"], deps = [ - ":util", "//tensorflow/lite/delegates/gpu/common:model", "//tensorflow/lite/delegates/gpu/common:operations", - "//tensorflow/lite/delegates/gpu/common:tensor", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", - "//tensorflow/lite/delegates/gpu/metal:runtime_options", "@com_google_absl//absl/types:variant", ], ) @@ -637,7 +617,6 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", - "//tensorflow/lite/delegates/gpu/metal:runtime_options", "@com_google_absl//absl/strings", ], ) @@ -676,7 +655,6 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", - "//tensorflow/lite/delegates/gpu/metal:runtime_options", "@com_google_absl//absl/strings", ], ) @@ -717,7 +695,6 @@ cc_library( "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", "//tensorflow/lite/delegates/gpu/metal:environment", "//tensorflow/lite/delegates/gpu/metal:runtime_options", - "@com_google_absl//absl/strings", ], ) @@ -753,7 +730,6 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", - "//tensorflow/lite/delegates/gpu/metal:runtime_options", "//tensorflow/lite/delegates/gpu/metal/kernels:util", ], ) @@ -789,7 +765,6 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:model", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:shape", - "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", "//tensorflow/lite/delegates/gpu/metal:environment", @@ -873,7 +848,6 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:winograd_util", "//tensorflow/lite/delegates/gpu/metal:compute_task_descriptor", "//tensorflow/lite/delegates/gpu/metal:runtime_options", - "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", ], ) From a73b5ce940b3eeb5e322463e42c862e46c49a58e Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Mon, 3 Aug 2020 16:58:43 -0700 Subject: [PATCH 2018/2522] partial fixit for feature_columns_test PiperOrigin-RevId: 324713509 Change-Id: Ie1b69ed70ac787d8782f48fa2f9831c9bd622a17 --- .../feature_column/feature_column_test.py | 51 ++++++------------- 1 file changed, 15 insertions(+), 36 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index 2ea7face467..d6d4d2eb1a1 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -171,7 +171,6 @@ class LazyColumnTest(test.TestCase): TypeError, '"key" must be either a "str" or "_FeatureColumn".'): builder.get(NotAFeatureColumn()) - @test_util.run_deprecated_v1 def test_expand_dim_rank_1_sparse_tensor_empty_batch(self): # empty 1-D sparse tensor: builder = _LazyBuilder(features={'a': sparse_tensor.SparseTensor( @@ -179,7 +178,7 @@ class LazyColumnTest(test.TestCase): dense_shape=[0], values=np.array([]))}) with self.cached_session(): - spv = builder.get('a').eval() + spv = builder.get('a') self.assertAllEqual(np.array([0, 1], dtype=np.int64), spv.dense_shape) self.assertAllEqual( np.reshape(np.array([], dtype=np.int64), (0, 2)), spv.indices) @@ -187,7 +186,6 @@ class LazyColumnTest(test.TestCase): class NumericColumnTest(test.TestCase): - @test_util.run_deprecated_v1 def test_defaults(self): a = fc._numeric_column('aaa') self.assertEqual('aaa', a.key) @@ -266,7 +264,6 @@ class NumericColumnTest(test.TestCase): 'aaa': parsing_ops.FixedLenFeature((2, 3), dtype=dtypes.int32) }, a._parse_example_spec) - @test_util.run_deprecated_v1 def test_parse_example_no_default_value(self): price = fc._numeric_column('price', shape=[2]) data = example_pb2.Example(features=feature_pb2.Features( @@ -309,7 +306,6 @@ class NumericColumnTest(test.TestCase): with self.assertRaisesRegex(TypeError, 'must be a callable'): fc._numeric_column('price', normalizer_fn='NotACallable') - @test_util.run_deprecated_v1 def test_normalizer_fn_transform_feature(self): def _increment_two(input_tensor): @@ -328,7 +324,7 @@ class NumericColumnTest(test.TestCase): price = fc._numeric_column('price', shape=[2], normalizer_fn=_increment_two) builder = _LazyBuilder({'price': [[1., 2.], [5., 6.]]}) - self.assertEqual(builder.get(price), price._get_dense_tensor(builder)) + self.assertAllClose(builder.get(price), price._get_dense_tensor(builder)) def test_sparse_tensor_not_supported(self): price = fc._numeric_column('price') @@ -340,7 +336,6 @@ class NumericColumnTest(test.TestCase): with self.assertRaisesRegex(ValueError, 'must be a Tensor'): price._transform_feature(builder) - @test_util.run_deprecated_v1 def test_deep_copy(self): a = fc._numeric_column('aaa', shape=[1, 2], default_value=[[3., 2.]]) a_copy = copy.deepcopy(a) @@ -353,7 +348,6 @@ class NumericColumnTest(test.TestCase): 'aaa', shape=[1, 2], default_value=np.array([[3., 2.]])) self.assertEqual(a.default_value, ((3., 2.),)) - @test_util.run_deprecated_v1 def test_linear_model(self): price = fc._numeric_column('price') with ops.Graph().as_default(): @@ -368,7 +362,6 @@ class NumericColumnTest(test.TestCase): sess.run(price_var.assign([[10.]])) self.assertAllClose([[10.], [50.]], self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_keras_linear_model(self): price = fc._numeric_column('price') with ops.Graph().as_default(): @@ -465,8 +458,8 @@ class BucketizedColumnTest(test.TestCase): 'price': [[-1., 1.], [5., 6.]] }, [bucketized_price]) with _initialized_session(): - self.assertAllEqual([[0, 1], [3, 4]], - transformed_tensor[bucketized_price].eval()) + self.assertAllClose([[0, 1], [3, 4]], + transformed_tensor[bucketized_price]) def test_get_dense_tensor_one_input_value(self): """Tests _get_dense_tensor() for input with shape=[1].""" @@ -539,7 +532,6 @@ class BucketizedColumnTest(test.TestCase): with self.assertRaisesRegex(ValueError, 'must be a Tensor'): bucketized_price._transform_feature(builder) - @test_util.run_deprecated_v1 def test_deep_copy(self): a = fc._numeric_column('aaa', shape=[2]) a_bucketized = fc._bucketized_column(a, boundaries=[0, 1]) @@ -667,7 +659,6 @@ class BucketizedColumnTest(test.TestCase): class HashedCategoricalColumnTest(test.TestCase): - @test_util.run_deprecated_v1 def test_defaults(self): a = fc._categorical_column_with_hash_bucket('aaa', 10) self.assertEqual('aaa', a.name) @@ -695,7 +686,6 @@ class HashedCategoricalColumnTest(test.TestCase): with self.assertRaisesRegex(ValueError, 'dtype must be string or integer'): fc._categorical_column_with_hash_bucket('aaa', 10, dtype=dtypes.float32) - @test_util.run_deprecated_v1 def test_deep_copy(self): original = fc._categorical_column_with_hash_bucket('aaa', 10) for column in (original, copy.deepcopy(original)): @@ -735,10 +725,8 @@ class HashedCategoricalColumnTest(test.TestCase): sparse_tensor.SparseTensorValue( indices=[[0, 0], [0, 1]], values=np.array([b'omar', b'stringer'], dtype=np.object_), - dense_shape=[1, 2]), - features['aaa'].eval()) + dense_shape=[1, 2]), features['aaa'].eval()) - @test_util.run_deprecated_v1 def test_strings_should_be_hashed(self): hashed_sparse = fc._categorical_column_with_hash_bucket('wire', 10) wire_tensor = sparse_tensor.SparseTensor( @@ -753,7 +741,7 @@ class HashedCategoricalColumnTest(test.TestCase): self.assertEqual(dtypes.int64, output.values.dtype) self.assertAllEqual(expected_values, output.values) self.assertAllEqual(wire_tensor.indices, output.indices) - self.assertAllEqual(wire_tensor.dense_shape, output.dense_shape.eval()) + self.assertAllEqual(wire_tensor.dense_shape, output.dense_shape) def test_tensor_dtype_should_be_string_or_integer(self): string_fc = fc._categorical_column_with_hash_bucket( @@ -793,7 +781,6 @@ class HashedCategoricalColumnTest(test.TestCase): with self.assertRaisesRegex(ValueError, 'dtype must be compatible'): builder.get(hashed_sparse) - @test_util.run_deprecated_v1 def test_ints_should_be_hashed(self): hashed_sparse = fc._categorical_column_with_hash_bucket( 'wire', 10, dtype=dtypes.int64) @@ -852,7 +839,6 @@ class HashedCategoricalColumnTest(test.TestCase): ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)) self.assertCountEqual([], ops.get_collection('my_weights')) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_dense_input(self): hashed_sparse = fc._categorical_column_with_hash_bucket('wire', 10) builder = _LazyBuilder({'wire': (('omar', ''), ('stringer', 'marlo'))}) @@ -860,7 +846,6 @@ class HashedCategoricalColumnTest(test.TestCase): self.assertIsNone(id_weight_pair.weight_tensor) self.assertEqual(builder.get(hashed_sparse), id_weight_pair.id_tensor) - @test_util.run_deprecated_v1 def test_linear_model(self): wire_column = fc._categorical_column_with_hash_bucket('wire', 4) self.assertEqual(4, wire_column._num_buckets) @@ -878,12 +863,11 @@ class HashedCategoricalColumnTest(test.TestCase): self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), self.evaluate(wire_var)) self.assertAllClose(((0.,), (0.,)), self.evaluate(predictions)) - wire_var.assign(((1.,), (2.,), (3.,), (4.,))).eval() + self.evaluate(wire_var.assign(((1.,), (2.,), (3.,), (4.,)))) # 'marlo' -> 3: wire_var[3] = 4 # 'skywalker' -> 2, 'omar' -> 2: wire_var[2] + wire_var[2] = 3+3 = 6 self.assertAllClose(((4.,), (6.,)), self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_keras_linear_model(self): wire_column = fc._categorical_column_with_hash_bucket('wire', 4) self.assertEqual(4, wire_column._num_buckets) @@ -902,7 +886,7 @@ class HashedCategoricalColumnTest(test.TestCase): self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), self.evaluate(wire_var)) self.assertAllClose(((0.,), (0.,)), self.evaluate(predictions)) - wire_var.assign(((1.,), (2.,), (3.,), (4.,))).eval() + self.evaluate(wire_var.assign(((1.,), (2.,), (3.,), (4.,)))) # 'marlo' -> 3: wire_var[3] = 4 # 'skywalker' -> 2, 'omar' -> 2: wire_var[2] + wire_var[2] = 3+3 = 6 self.assertAllClose(((4.,), (6.,)), self.evaluate(predictions)) @@ -990,7 +974,6 @@ class CrossedColumnTest(test.TestCase): crossed = fc._crossed_column([b, 'c'], 15) self.assertEqual(15, crossed._num_buckets) - @test_util.run_deprecated_v1 def test_deep_copy(self): a = fc._numeric_column('a', dtype=dtypes.int32) b = fc._bucketized_column(a, boundaries=[0, 1]) @@ -1001,7 +984,6 @@ class CrossedColumnTest(test.TestCase): self.assertEqual(15, crossed2_copy.hash_bucket_size) self.assertEqual(5, crossed2_copy.hash_key) - @test_util.run_deprecated_v1 def test_parse_example(self): price = fc._numeric_column('price', shape=[2]) bucketized_price = fc._bucketized_column(price, boundaries=[0, 50]) @@ -1044,7 +1026,7 @@ class CrossedColumnTest(test.TestCase): } outputs = _transform_features(features, [price_cross_wire]) output = outputs[price_cross_wire] - with self.cached_session() as sess: + with self.cached_session(): output_val = self.evaluate(output) self.assertAllEqual( [[0, 0], [0, 1], [1, 0], [1, 1], [1, 2], [1, 3]], output_val.indices) @@ -1052,7 +1034,6 @@ class CrossedColumnTest(test.TestCase): self.assertIn(val, list(range(hash_bucket_size))) self.assertAllEqual([2, 4], output_val.dense_shape) - @test_util.run_deprecated_v1 def test_get_sparse_tensors(self): a = fc._numeric_column('a', dtype=dtypes.int32, shape=(2,)) b = fc._bucketized_column(a, boundaries=(0, 1)) @@ -1120,7 +1101,6 @@ class CrossedColumnTest(test.TestCase): self.assertAllEqual(expected_values, id_tensor_eval.values) self.assertAllEqual((2, 4), id_tensor_eval.dense_shape) - @test_util.run_deprecated_v1 def test_linear_model(self): """Tests linear_model. @@ -1139,15 +1119,15 @@ class CrossedColumnTest(test.TestCase): }, (crossed,)) bias = get_linear_model_bias() crossed_var = get_linear_model_column_var(crossed) - with _initialized_session() as sess: + with _initialized_session(): self.assertAllClose((0.,), self.evaluate(bias)) self.assertAllClose(((0.,), (0.,), (0.,), (0.,), (0.,)), self.evaluate(crossed_var)) self.assertAllClose(((0.,), (0.,)), self.evaluate(predictions)) - sess.run(crossed_var.assign(((1.,), (2.,), (3.,), (4.,), (5.,)))) + self.evaluate(crossed_var.assign(((1.,), (2.,), (3.,), (4.,), (5.,)))) # Expected ids after cross = (1, 0, 1, 3, 4, 2) self.assertAllClose(((3.,), (14.,)), self.evaluate(predictions)) - sess.run(bias.assign((.1,))) + self.evaluate(bias.assign((.1,))) self.assertAllClose(((3.1,), (14.1,)), self.evaluate(predictions)) def test_linear_model_with_weights(self): @@ -1202,7 +1182,6 @@ class CrossedColumnTest(test.TestCase): dense_shape=(2, 2)), }, (crossed,)) - @test_util.run_deprecated_v1 def test_keras_linear_model(self): """Tests _LinearModel. @@ -1223,15 +1202,15 @@ class CrossedColumnTest(test.TestCase): }, (crossed,)) bias = get_linear_model_bias() crossed_var = get_linear_model_column_var(crossed) - with _initialized_session() as sess: + with _initialized_session(): self.assertAllClose((0.,), self.evaluate(bias)) self.assertAllClose(((0.,), (0.,), (0.,), (0.,), (0.,)), self.evaluate(crossed_var)) self.assertAllClose(((0.,), (0.,)), self.evaluate(predictions)) - sess.run(crossed_var.assign(((1.,), (2.,), (3.,), (4.,), (5.,)))) + self.evaluate(crossed_var.assign(((1.,), (2.,), (3.,), (4.,), (5.,)))) # Expected ids after cross = (1, 0, 1, 3, 4, 2) self.assertAllClose(((3.,), (14.,)), self.evaluate(predictions)) - sess.run(bias.assign((.1,))) + self.evaluate(bias.assign((.1,))) self.assertAllClose(((3.1,), (14.1,)), self.evaluate(predictions)) def test_keras_linear_model_with_weights(self): From bb55b48537eb8373941cc812fa140e14599c3d17 Mon Sep 17 00:00:00 2001 From: Jonathan Chu Date: Tue, 4 Aug 2020 00:04:45 +0000 Subject: [PATCH 2019/2522] Passing function_test --- tensorflow/python/eager/def_function.py | 11 ++++++----- tensorflow/python/eager/function.py | 6 ++++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py index a3bc0516403..03bc48050e0 100644 --- a/tensorflow/python/eager/def_function.py +++ b/tensorflow/python/eager/def_function.py @@ -861,7 +861,7 @@ class Function(object): # If we did not create any variables the trace we have is good enough. return self._concrete_stateful_fn._filtered_call(flat_args, flat_kwds) # pylint: disable=protected-access - def fn_with_cond(*inner_args, **inner_kwds): + def fn_with_cond(inner_args, inner_kwds, inner_flat_args, inner_flat_kwds): """Conditionally runs initialization if it's needed.""" condition = True for wr in self._created_variables: @@ -910,15 +910,16 @@ class Function(object): condition, lambda: self._stateless_fn(*inner_args, **inner_kwds), functools.partial(self._concrete_stateful_fn._filtered_call, # pylint: disable=protected-access - inner_args, inner_kwds)) + inner_flat_args, inner_flat_kwds)) # We've created variables and are unable to lift the initialization graphs, # so we fall back to initializing with conds while running the function. - canon_args, canon_kwds, _, _ = \ + canon_args, canon_kwds, flat_args, flat_kwds = \ self._stateful_fn._function_spec.canonicalize_function_inputs( # pylint: disable=protected-access *args, **kwds) - # TODO(jlchu): fix arguments for this, two cases for fn_with_cond - return function_lib.defun(fn_with_cond)(*canon_args, **canon_kwds) + # TODO(jlchu): verify that mdofication to fn_with_cond works + return function_lib.defun(fn_with_cond)(canon_args, canon_kwds, + flat_args, flat_kwds) @property def python_function(self): diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 8c5815e21b3..b0a2b996ab8 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -3285,6 +3285,8 @@ class Function(object): if self.input_signature is None or args is not None or kwargs is not None: args, kwargs, flat_args, flat_kwargs = \ self._function_spec.canonicalize_function_inputs(*args, **kwargs) + else: + flat_args, flat_kwargs = [], [] cache_key = self._cache_key(args, kwargs) @@ -3324,7 +3326,7 @@ class Function(object): and self.input_signature is None and call_context_key in self._function_cache.missed): return_function, _, _ = \ - self.define_function_with_shape_relaxation(args, kwargs) + self._define_function_with_shape_relaxation(args, kwargs) #TODO(jlchu): Investigate modifying above function sig directly return return_function, flat_args, flat_kwargs @@ -3334,7 +3336,7 @@ class Function(object): if ops.get_default_graph()._distribution_strategy_stack: self._traced_with_distribution_strategy = True - + return graph_function, flat_args, flat_kwargs From 2e89544a158acf0e0cd9fd4e34d7bb869dea5256 Mon Sep 17 00:00:00 2001 From: Jonathan Chu Date: Tue, 4 Aug 2020 00:13:49 +0000 Subject: [PATCH 2020/2522] Pylint polishing --- tensorflow/python/eager/def_function.py | 2 +- tensorflow/python/eager/function.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py index 03bc48050e0..a1ffd2f9efc 100644 --- a/tensorflow/python/eager/def_function.py +++ b/tensorflow/python/eager/def_function.py @@ -918,7 +918,7 @@ class Function(object): self._stateful_fn._function_spec.canonicalize_function_inputs( # pylint: disable=protected-access *args, **kwds) # TODO(jlchu): verify that mdofication to fn_with_cond works - return function_lib.defun(fn_with_cond)(canon_args, canon_kwds, + return function_lib.defun(fn_with_cond)(canon_args, canon_kwds, flat_args, flat_kwds) @property diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index b0a2b996ab8..d9b141e049a 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -3336,7 +3336,7 @@ class Function(object): if ops.get_default_graph()._distribution_strategy_stack: self._traced_with_distribution_strategy = True - + return graph_function, flat_args, flat_kwargs From dab856a93fdecb88880e08bc94928f3e0f141cf9 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Mon, 3 Aug 2020 17:10:40 -0700 Subject: [PATCH 2021/2522] Removed ElementwiseOperation. Simplified ex-ElementwiseOperations, ReLU, PReLU, etc. PiperOrigin-RevId: 324715510 Change-Id: I3d98cdbcc8075bb91f20e065b0aca2ab16a4e8e5 --- .../delegates/gpu/cl/inference_context.cc | 8 +- .../lite/delegates/gpu/cl/kernels/add.cc | 43 ++-- .../lite/delegates/gpu/cl/kernels/add.h | 22 +- .../lite/delegates/gpu/cl/kernels/add_test.cc | 6 +- .../delegates/gpu/cl/kernels/elementwise.cc | 215 ++++++------------ .../delegates/gpu/cl/kernels/elementwise.h | 91 ++------ .../gpu/cl/kernels/elementwise_test.cc | 62 +++-- .../delegates/gpu/cl/kernels/gpu_operation.cc | 158 +++++++------ .../delegates/gpu/cl/kernels/gpu_operation.h | 76 ++----- .../lite/delegates/gpu/cl/kernels/prelu.cc | 46 ++-- .../lite/delegates/gpu/cl/kernels/prelu.h | 41 +--- .../delegates/gpu/cl/kernels/prelu_test.cc | 4 +- .../gpu/cl/kernels/quantize_and_dequantize.cc | 66 ++---- .../gpu/cl/kernels/quantize_and_dequantize.h | 38 +--- .../kernels/quantize_and_dequantize_test.cc | 20 +- .../lite/delegates/gpu/cl/kernels/relu.cc | 42 ++-- .../lite/delegates/gpu/cl/kernels/relu.h | 22 +- .../delegates/gpu/cl/kernels/relu_test.cc | 8 +- .../gpu/cl/selectors/operation_selector.cc | 75 +++--- .../gpu/cl/selectors/simple_selectors.cc | 28 ++- .../gpu/cl/selectors/simple_selectors.h | 8 +- 21 files changed, 368 insertions(+), 711 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.cc b/tensorflow/lite/delegates/gpu/cl/inference_context.cc index 3067c81ec94..8e23eb1bcee 100644 --- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc +++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc @@ -390,9 +390,7 @@ void InferenceContext::Merge() { continue; } auto& linkable_node = nodes_[next_nodes[0]]; - auto* elementwise = - dynamic_cast(linkable_node.operations[0].get()); - if (!elementwise || !elementwise->IsLinkable() || + if (!linkable_node.operations[0]->IsLinkable() || linkable_node.outputs.size() != 1 || !IsReady(ready_tensors, linkable_node)) { continue; @@ -410,9 +408,7 @@ void InferenceContext::Merge() { } for (auto& node : nodes_) { for (int j = 1; j < node.operations.size(); ++j) { - auto* elementwise = - dynamic_cast(node.operations[j].get()); - node.operations[0]->AddOperation(elementwise); + node.operations[0]->AddOperation(node.operations[j].get()); } } } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/add.cc b/tensorflow/lite/delegates/gpu/cl/kernels/add.cc index 1d09e39b83b..1cb41e79d88 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/add.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/add.cc @@ -25,42 +25,29 @@ namespace tflite { namespace gpu { namespace cl { -Add::Add(const OperationDef& definition, const std::vector& channels, - int dst_channels) - : ElementwiseOperation(definition) { +GPUOperation CreateAdd(const OperationDef& definition, + const std::vector& channels, int dst_channels) { + GPUOperation add(definition); int dst_depth = DivideRoundUp(dst_channels, 4); int src0_depth = DivideRoundUp(channels[0], 4); - linkable_ = dst_depth == src0_depth; + add.elementwise_ = true; + add.linkable_ = dst_depth == src0_depth; if (src0_depth < dst_depth) { - check_src_channels_size_ = true; + add.check_src_channels_size_ = true; } - for (int i = 1; i < definition_.src_tensors.size(); ++i) { + for (int i = 1; i < definition.src_tensors.size(); ++i) { const std::string tensor_name = absl::StrCat("src_data_", i); - auto src_desc = definition_.src_tensors[i]; - if (definition_.IsBatchSupported()) { + auto src_desc = definition.src_tensors[i]; + if (definition.IsBatchSupported()) { src_desc.SetStateVar("BatchedWidth", "true"); } - AddSrcTensor(tensor_name, src_desc); - code_ += "if (S_COORD < args." + tensor_name + ".Slices()) {\n"; - code_ += " in_out_value += args." + tensor_name + - ".Read(X_COORD, Y_COORD, S_COORD);\n"; - code_ += "}\n"; + add.AddSrcTensor(tensor_name, src_desc); + add.code_ += "if (S_COORD < args." + tensor_name + ".Slices()) {\n"; + add.code_ += " in_out_value += args." + tensor_name + + ".Read(X_COORD, Y_COORD, S_COORD);\n"; + add.code_ += "}\n"; } -} - -Add::Add(Add&& operation) : ElementwiseOperation(std::move(operation)) {} - -Add& Add::operator=(Add&& operation) { - if (this != &operation) { - ElementwiseOperation::operator=(std::move(operation)); - } - return *this; -} - -Add CreateAdd(const OperationDef& definition, const std::vector& channels, - int dst_channels) { - Add operation(definition, channels, dst_channels); - return operation; + return add; } } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/add.h b/tensorflow/lite/delegates/gpu/cl/kernels/add.h index 81b2fed116f..0e9d7e0d333 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/add.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/add.h @@ -27,24 +27,10 @@ namespace tflite { namespace gpu { namespace cl { -// Add operation inherited from ElementwiseOperation, but it is more -// complicated than usual elementwise, that is why it has own versions for -// Compile. Add operation support not equal tensors on input (for possibility to -// remove Padding operation with zeroes in Z dimension) -class Add : public ElementwiseOperation { - public: - Add(const OperationDef& definition, const std::vector& channels, - int dst_channels); - - // Move only - Add(Add&& operation); - Add& operator=(Add&& operation); - Add(const Add&) = delete; - Add& operator=(const Add&) = delete; -}; - -Add CreateAdd(const OperationDef& definition, const std::vector& channels, - int dst_channels); +// Add operation supports not equal tensors on input (for possibility to +// remove Padding operation with zeroes in channels dimension) +GPUOperation CreateAdd(const OperationDef& definition, + const std::vector& channels, int dst_channels); } // namespace cl } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/add_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/add_test.cc index 1eccab87646..2856b37a497 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/add_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/add_test.cc @@ -49,7 +49,7 @@ TEST_F(OpenCLOperationTest, AddTwoEqualTensors) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - Add operation = CreateAdd(op_def, channels, channels[0]); + GPUOperation operation = CreateAdd(op_def, channels, channels[0]); ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -77,7 +77,7 @@ TEST_F(OpenCLOperationTest, AddFirstTensorHasMoreChannelsThanSecond) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - Add operation = CreateAdd(op_def, channels, channels[0]); + GPUOperation operation = CreateAdd(op_def, channels, channels[0]); ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation, BHWC(1, 2, 1, 6), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -107,7 +107,7 @@ TEST_F(OpenCLOperationTest, AddFirstTensorHasLessChannelsThanSecond) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - Add operation = CreateAdd(op_def, channels, 6); + GPUOperation operation = CreateAdd(op_def, channels, 6); ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation, BHWC(1, 2, 1, 6), &dst_tensor)); EXPECT_THAT(dst_tensor.data, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc index 21866021e91..063b15c1b69 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc @@ -134,128 +134,33 @@ std::string GetTwoInputCode(const OperationType& op_type, } } // namespace -ElementwiseOneInput::ElementwiseOneInput(const OperationDef& definition, - const OperationType& op_type) - : ElementwiseOperation(definition) { - code_ = GetOneInputCode(op_type, definition.precision, "in_out_value"); +GPUOperation CreateElementwiseOneInput(const OperationDef& definition, + const OperationType& op_type) { + GPUOperation op(definition); + op.elementwise_ = true; + op.code_ = GetOneInputCode(op_type, definition.precision, "in_out_value"); + return op; } -ElementwiseOneInput::ElementwiseOneInput(ElementwiseOneInput&& operation) - : ElementwiseOperation(std::move(operation)) {} - -ElementwiseOneInput& ElementwiseOneInput::operator=( - ElementwiseOneInput&& operation) { - if (this != &operation) { - ElementwiseOperation::operator=(std::move(operation)); - } - return *this; -} - -ElementwiseOneInput CreateElementwiseOneInput(const OperationDef& definition, - const OperationType& op_type) { - ElementwiseOneInput operation(definition, op_type); - return operation; -} - -ElementwiseOneRuntimeOneScalar::ElementwiseOneRuntimeOneScalar( - const OperationDef& definition, const OperationType& op_type, - float scalar_parameter, CalculationsPrecision scalar_precision) - : ElementwiseOperation(definition) { - if (definition.precision == CalculationsPrecision::F32) { - args_.AddFloat("scalar", scalar_parameter); - } else { - args_.AddHalf("scalar", half(scalar_parameter)); - } - code_ = GetTwoInputCode(op_type, "in_out_value", "args.scalar"); -} - -ElementwiseOneRuntimeOneScalar::ElementwiseOneRuntimeOneScalar( - ElementwiseOneRuntimeOneScalar&& operation) - : ElementwiseOperation(std::move(operation)) {} - -ElementwiseOneRuntimeOneScalar& ElementwiseOneRuntimeOneScalar::operator=( - ElementwiseOneRuntimeOneScalar&& operation) { - if (this != &operation) { - ElementwiseOperation::operator=(std::move(operation)); - } - return *this; -} - -ElementwiseOneRuntimeOneScalar CreateElementwiseOneRuntimeOneScalar( +GPUOperation CreateElementwiseOneRuntimeOneScalar( const CreationContext& creation_context, const OperationDef& definition, const OperationType& op_type, float scalar_parameter) { - const auto scalar_precision = creation_context.device->IsPowerVR() - ? CalculationsPrecision::F32 - : definition.precision; - ElementwiseOneRuntimeOneScalar operation(definition, op_type, - scalar_parameter, scalar_precision); - return operation; -} - -ElementwiseTwoInput::ElementwiseTwoInput(const OperationDef& definition, - const OperationType& op_type, - const BroadcastSettings& broadcast) - : ElementwiseOperation(definition), - broadcast_(broadcast) { - auto src_desc = definition.src_tensors[1]; - if (definition.IsBatchSupported()) { - src_desc.SetStateVar("BatchedWidth", "true"); + GPUOperation op(definition); + op.elementwise_ = true; + if (definition.precision == CalculationsPrecision::F32) { + op.args_.AddFloat("scalar", scalar_parameter); + } else { + op.args_.AddHalf("scalar", half(scalar_parameter)); } - AddSrcTensor("second_tensor", src_desc); - const std::string x_coord = broadcast.width ? "0" : "X_COORD"; - const std::string y_coord = broadcast.height ? "0" : "Y_COORD"; - const std::string s_coord = broadcast.channels ? "0" : "S_COORD"; - code_ = absl::StrCat("FLT4 second_val = args.second_tensor.Read(", x_coord, - ", ", y_coord, ", ", s_coord, ");\n"); - if (broadcast.channels) { - code_ += " second_val.y = second_val.x;\n"; - code_ += " second_val.z = second_val.x;\n"; - code_ += " second_val.w = second_val.x;\n"; - } - code_ += GetTwoInputCode(op_type, "in_out_value", "second_val"); -} - -ElementwiseTwoInput::ElementwiseTwoInput(const OperationDef& definition, - const OperationType& op_type, - const BroadcastSettings& broadcast, - Tensor&& constant_tensor) - : ElementwiseOperation(definition), - broadcast_(broadcast) { - auto descriptor = constant_tensor.GetDescriptor(); - args_.AddObject("second_tensor", AccessType::READ, - absl::make_unique(std::move(constant_tensor)), - absl::make_unique(descriptor)); - const std::string x_coord = broadcast.width ? "0" : "X_COORD"; - const std::string y_coord = broadcast.height ? "0" : "Y_COORD"; - const std::string s_coord = broadcast.channels ? "0" : "S_COORD"; - code_ = absl::StrCat("FLT4 second_val = args.second_tensor.Read(", x_coord, - ", ", y_coord, ", ", s_coord, ");\n"); - if (broadcast.channels) { - code_ += " second_val.y = second_val.x;\n"; - code_ += " second_val.z = second_val.x;\n"; - code_ += " second_val.w = second_val.x;\n"; - } - code_ += GetTwoInputCode(op_type, "in_out_value", "second_val"); -} - -ElementwiseTwoInput::ElementwiseTwoInput(ElementwiseTwoInput&& operation) - : ElementwiseOperation(std::move(operation)), - broadcast_(operation.broadcast_) {} - -ElementwiseTwoInput& ElementwiseTwoInput::operator=( - ElementwiseTwoInput&& operation) { - if (this != &operation) { - broadcast_ = operation.broadcast_; - ElementwiseOperation::operator=(std::move(operation)); - } - return *this; + op.code_ = GetTwoInputCode(op_type, "in_out_value", "args.scalar"); + return op; } absl::Status CreateElementwiseTwoInput( const CreationContext& creation_context, const OperationDef& definition, const OperationType& op_type, const tflite::gpu::Tensor& constant_tensor, - ElementwiseTwoInput* result) { + GPUOperation* result) { const BHWC shape = BHWC(1, 1, 1, constant_tensor.shape.v); TensorStorageType storage_type = SelectBestStorageType(*creation_context.context, *creation_context.device, @@ -268,12 +173,21 @@ absl::Status CreateElementwiseTwoInput( &gpu_tensor)); RETURN_IF_ERROR( gpu_tensor.WriteData(creation_context.queue, constant_tensor)); - BroadcastSettings broadcast; - broadcast.width = true; - broadcast.height = true; - broadcast.channels = shape.c == 1; - *result = ElementwiseTwoInput(definition, op_type, broadcast, - std::move(gpu_tensor)); + + *result = GPUOperation(definition); + result->elementwise_ = true; + result->args_.AddObject("second_tensor", AccessType::READ, + absl::make_unique(std::move(gpu_tensor)), + absl::make_unique(desc)); + const std::string s_coord = shape.c == 1 ? "0" : "S_COORD"; + result->code_ = absl::StrCat( + "FLT4 second_val = args.second_tensor.Read(0, 0, ", s_coord, ");\n"); + if (shape.c == 1) { + result->code_ += " second_val.y = second_val.x;\n"; + result->code_ += " second_val.z = second_val.x;\n"; + result->code_ += " second_val.w = second_val.x;\n"; + } + result->code_ += GetTwoInputCode(op_type, "in_out_value", "second_val"); return absl::OkStatus(); } @@ -281,7 +195,7 @@ absl::Status CreateElementwiseTwoInput( const CreationContext& creation_context, const OperationDef& definition, const OperationType& op_type, const tflite::gpu::Tensor& constant_tensor, - ElementwiseTwoInput* result) { + GPUOperation* result) { const BHWC shape = BHWC(1, constant_tensor.shape.h, constant_tensor.shape.w, constant_tensor.shape.c); TensorStorageType storage_type = @@ -295,34 +209,49 @@ absl::Status CreateElementwiseTwoInput( &gpu_tensor)); RETURN_IF_ERROR( gpu_tensor.WriteData(creation_context.queue, constant_tensor)); - BroadcastSettings broadcast; - broadcast.width = shape.w == 1; - broadcast.height = shape.h == 1; - broadcast.channels = shape.c == 1; - *result = ElementwiseTwoInput(definition, op_type, broadcast, - std::move(gpu_tensor)); + + *result = GPUOperation(definition); + result->elementwise_ = true; + result->args_.AddObject("second_tensor", AccessType::READ, + absl::make_unique(std::move(gpu_tensor)), + absl::make_unique(desc)); + const std::string x_coord = shape.w == 1 ? "0" : "X_COORD"; + const std::string y_coord = shape.h == 1 ? "0" : "Y_COORD"; + const std::string s_coord = shape.c == 1 ? "0" : "S_COORD"; + result->code_ = absl::StrCat("FLT4 second_val = args.second_tensor.Read(", + x_coord, ", ", y_coord, ", ", s_coord, ");\n"); + if (shape.c == 1) { + result->code_ += " second_val.y = second_val.x;\n"; + result->code_ += " second_val.z = second_val.x;\n"; + result->code_ += " second_val.w = second_val.x;\n"; + } + result->code_ += GetTwoInputCode(op_type, "in_out_value", "second_val"); + return absl::OkStatus(); } -ElementwiseTwoInput CreateElementwiseTwoInput(const OperationDef& definition, - const OperationType& op_type, - const BHWC& shape) { - BroadcastSettings broadcast; - broadcast.width = shape.w == 1; - broadcast.height = shape.h == 1; - broadcast.channels = shape.c == 1; - ElementwiseTwoInput operation(definition, op_type, broadcast); - return operation; -} - -ElementwiseTwoInput CreateElementwiseTwoInput(const OperationDef& definition, - const OperationType& op_type) { - BroadcastSettings broadcast; - broadcast.width = false; - broadcast.height = false; - broadcast.channels = false; - ElementwiseTwoInput operation(definition, op_type, broadcast); - return operation; +GPUOperation CreateElementwiseTwoInput(const OperationDef& definition, + const OperationType& op_type, + const BHWC& shape) { + GPUOperation op(definition); + op.elementwise_ = true; + auto src_desc = definition.src_tensors[1]; + if (definition.IsBatchSupported()) { + src_desc.SetStateVar("BatchedWidth", "true"); + } + op.AddSrcTensor("second_tensor", src_desc); + const std::string x_coord = shape.w == 1 ? "0" : "X_COORD"; + const std::string y_coord = shape.h == 1 ? "0" : "Y_COORD"; + const std::string s_coord = shape.c == 1 ? "0" : "S_COORD"; + op.code_ = absl::StrCat("FLT4 second_val = args.second_tensor.Read(", x_coord, + ", ", y_coord, ", ", s_coord, ");\n"); + if (shape.c == 1) { + op.code_ += " second_val.y = second_val.x;\n"; + op.code_ += " second_val.z = second_val.x;\n"; + op.code_ += " second_val.w = second_val.x;\n"; + } + op.code_ += GetTwoInputCode(op_type, "in_out_value", "second_val"); + return op; } } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h index 9712ee96b90..d03d535b39a 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h @@ -26,93 +26,38 @@ namespace tflite { namespace gpu { namespace cl { -// Class for simple one input operations without any parameters, for example -// log, sin, cos and etc. -class ElementwiseOneInput : public ElementwiseOperation { - public: - ElementwiseOneInput(const OperationDef& definition, - const OperationType& op_type); +// Creates simple one input operation without any parameters, for example +// log, sin, cos, etc. +GPUOperation CreateElementwiseOneInput(const OperationDef& definition, + const OperationType& op_type); - // Move only - ElementwiseOneInput(ElementwiseOneInput&& operation); - ElementwiseOneInput& operator=(ElementwiseOneInput&& operation); - ElementwiseOneInput(const ElementwiseOneInput&) = delete; - ElementwiseOneInput& operator=(const ElementwiseOneInput&) = delete; -}; - -ElementwiseOneInput CreateElementwiseOneInput(const OperationDef& definition, - const OperationType& op_type); - -// Class for simple two input (first input is runtime tensor and second input is -// scalar argument) operations without any parameters, for example sub, div and -// etc. -class ElementwiseOneRuntimeOneScalar : public ElementwiseOperation { - public: - ElementwiseOneRuntimeOneScalar(const OperationDef& definition, - const OperationType& op_type, - float scalar_parameter, - CalculationsPrecision scalar_precision); - - // Move only - ElementwiseOneRuntimeOneScalar(ElementwiseOneRuntimeOneScalar&& operation); - ElementwiseOneRuntimeOneScalar& operator=( - ElementwiseOneRuntimeOneScalar&& operation); - ElementwiseOneRuntimeOneScalar(const ElementwiseOneRuntimeOneScalar&) = - delete; - ElementwiseOneRuntimeOneScalar& operator=( - const ElementwiseOneRuntimeOneScalar&) = delete; -}; - -ElementwiseOneRuntimeOneScalar CreateElementwiseOneRuntimeOneScalar( +// Creates simple two input (first input is runtime tensor and second input is +// scalar argument) operation, for example sub, div, pow, etc. +GPUOperation CreateElementwiseOneRuntimeOneScalar( const CreationContext& creation_context, const OperationDef& definition, const OperationType& op_type, float scalar_parameter); -struct BroadcastSettings { - bool width; - bool height; - bool channels; -}; - -// Class for simple two input(first input is runtime tensor and second input is -// runtime or constant tensor) operations without any parameters, for example -// sub, div and etc. -class ElementwiseTwoInput : public ElementwiseOperation { - public: - ElementwiseTwoInput() = default; - ElementwiseTwoInput(const OperationDef& definition, - const OperationType& op_type, - const BroadcastSettings& broadcast); - - ElementwiseTwoInput(const OperationDef& definition, - const OperationType& op_type, - const BroadcastSettings& broadcast, - Tensor&& constant_tensor); - - // Move only - ElementwiseTwoInput(ElementwiseTwoInput&& operation); - ElementwiseTwoInput& operator=(ElementwiseTwoInput&& operation); - ElementwiseTwoInput(const ElementwiseTwoInput&) = delete; - ElementwiseTwoInput& operator=(const ElementwiseTwoInput&) = delete; - - private: - BroadcastSettings broadcast_; -}; - +// Creates simple two input(first input is runtime tensor and second input is +// constant linear tensor) operation, for example sub, div and etc. absl::Status CreateElementwiseTwoInput( const CreationContext& creation_context, const OperationDef& definition, const OperationType& op_type, const tflite::gpu::Tensor& constant_tensor, - ElementwiseTwoInput* result); + GPUOperation* result); +// Creates simple two input(first input is runtime tensor and second input is +// constant HWC tensor) operation, for example sub, div and etc. absl::Status CreateElementwiseTwoInput( const CreationContext& creation_context, const OperationDef& definition, const OperationType& op_type, const tflite::gpu::Tensor& constant_tensor, - ElementwiseTwoInput* result); + GPUOperation* result); -ElementwiseTwoInput CreateElementwiseTwoInput(const OperationDef& definition, - const OperationType& op_type, - const BHWC& shape); +// Creates simple two input(2 runtime tensors) operation, for example +// sub, div and etc. +GPUOperation CreateElementwiseTwoInput(const OperationDef& definition, + const OperationType& op_type, + const BHWC& shape); } // namespace cl } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc index ac825c0cdfc..11a651df901 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc @@ -45,7 +45,7 @@ TEST_F(OpenCLOperationTest, Abs) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseOneInput operation = + GPUOperation operation = CreateElementwiseOneInput(op_def, OperationType::ABS); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); @@ -70,7 +70,7 @@ TEST_F(OpenCLOperationTest, Cos) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseOneInput operation = + GPUOperation operation = CreateElementwiseOneInput(op_def, OperationType::COS); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); @@ -95,7 +95,7 @@ TEST_F(OpenCLOperationTest, Copy) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseOneInput operation = + GPUOperation operation = CreateElementwiseOneInput(op_def, OperationType::COPY); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); @@ -118,7 +118,7 @@ TEST_F(OpenCLOperationTest, Elu) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseOneInput operation = + GPUOperation operation = CreateElementwiseOneInput(op_def, OperationType::ELU); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 1, 1, 7), &dst_tensor)); @@ -144,7 +144,7 @@ TEST_F(OpenCLOperationTest, Exp) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseOneInput operation = + GPUOperation operation = CreateElementwiseOneInput(op_def, OperationType::EXP); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 1, 1, 7), &dst_tensor)); @@ -171,7 +171,7 @@ TEST_F(OpenCLOperationTest, HardSwish) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseOneInput operation = + GPUOperation operation = CreateElementwiseOneInput(op_def, OperationType::HARD_SWISH); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, src_tensor.shape, &dst_tensor)); @@ -197,7 +197,7 @@ TEST_F(OpenCLOperationTest, Log) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseOneInput operation = + GPUOperation operation = CreateElementwiseOneInput(op_def, OperationType::LOG); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); @@ -222,7 +222,7 @@ TEST_F(OpenCLOperationTest, Rsqrt) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseOneInput operation = + GPUOperation operation = CreateElementwiseOneInput(op_def, OperationType::RSQRT); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); @@ -249,7 +249,7 @@ TEST_F(OpenCLOperationTest, Sigmoid) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseOneInput operation = + GPUOperation operation = CreateElementwiseOneInput(op_def, OperationType::SIGMOID); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); @@ -273,7 +273,7 @@ TEST_F(OpenCLOperationTest, Sin) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseOneInput operation = + GPUOperation operation = CreateElementwiseOneInput(op_def, OperationType::SIN); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); @@ -299,7 +299,7 @@ TEST_F(OpenCLOperationTest, Sqrt) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseOneInput operation = + GPUOperation operation = CreateElementwiseOneInput(op_def, OperationType::SQRT); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); @@ -325,7 +325,7 @@ TEST_F(OpenCLOperationTest, Square) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseOneInput operation = + GPUOperation operation = CreateElementwiseOneInput(op_def, OperationType::SQUARE); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); @@ -349,7 +349,7 @@ TEST_F(OpenCLOperationTest, Tanh) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseOneInput operation = + GPUOperation operation = CreateElementwiseOneInput(op_def, OperationType::TANH); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); @@ -378,7 +378,7 @@ TEST_F(OpenCLOperationTest, Sub) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseTwoInput operation = CreateElementwiseTwoInput( + GPUOperation operation = CreateElementwiseTwoInput( op_def, OperationType::SUB, src_tensor_1.shape); ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1}, creation_context_, &operation, @@ -406,7 +406,7 @@ TEST_F(OpenCLOperationTest, SquaredDiff) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseTwoInput operation = CreateElementwiseTwoInput( + GPUOperation operation = CreateElementwiseTwoInput( op_def, OperationType::SQUARED_DIFF, src_tensor_1.shape); ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1}, creation_context_, &operation, @@ -434,7 +434,7 @@ TEST_F(OpenCLOperationTest, Div) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseTwoInput operation = CreateElementwiseTwoInput( + GPUOperation operation = CreateElementwiseTwoInput( op_def, OperationType::DIV, src_tensor_1.shape); ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1}, creation_context_, &operation, @@ -462,7 +462,7 @@ TEST_F(OpenCLOperationTest, Pow) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseTwoInput operation = CreateElementwiseTwoInput( + GPUOperation operation = CreateElementwiseTwoInput( op_def, OperationType::POW, src_tensor_1.shape); ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1}, creation_context_, &operation, @@ -490,7 +490,7 @@ TEST_F(OpenCLOperationTest, Add) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseTwoInput operation = CreateElementwiseTwoInput( + GPUOperation operation = CreateElementwiseTwoInput( op_def, OperationType::ADD, src_tensor_1.shape); ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1}, creation_context_, &operation, @@ -518,7 +518,7 @@ TEST_F(OpenCLOperationTest, Maximum) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseTwoInput operation = CreateElementwiseTwoInput( + GPUOperation operation = CreateElementwiseTwoInput( op_def, OperationType::MAXIMUM, src_tensor_1.shape); ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1}, creation_context_, &operation, @@ -547,9 +547,8 @@ TEST_F(OpenCLOperationTest, MaximumWithScalar) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; const float* scalar = absl::get_if(&attr.param); - ElementwiseOneRuntimeOneScalar operation = - CreateElementwiseOneRuntimeOneScalar(creation_context_, op_def, - OperationType::MAXIMUM, *scalar); + GPUOperation operation = CreateElementwiseOneRuntimeOneScalar( + creation_context_, op_def, OperationType::MAXIMUM, *scalar); ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation, BHWC(1, 4, 1, 1), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -578,7 +577,7 @@ TEST_F(OpenCLOperationTest, MaximumWithConstantLinearTensor) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseTwoInput operation; + GPUOperation operation; ASSERT_OK(CreateElementwiseTwoInput(creation_context_, op_def, OperationType::MAXIMUM, linear_tensor, &operation)); @@ -608,7 +607,7 @@ TEST_F(OpenCLOperationTest, MaximumWithConstantHWCTensor) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseTwoInput operation; + GPUOperation operation; ASSERT_OK(CreateElementwiseTwoInput(creation_context_, op_def, OperationType::MAXIMUM, hwc_tensor, &operation)); @@ -637,7 +636,7 @@ TEST_F(OpenCLOperationTest, MaximumWithConstantHWCTensorBroadcastChannels) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseTwoInput operation; + GPUOperation operation; ASSERT_OK(CreateElementwiseTwoInput(creation_context_, op_def, OperationType::MAXIMUM, hwc_tensor, &operation)); @@ -666,7 +665,7 @@ TEST_F(OpenCLOperationTest, Minimum) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseTwoInput operation = CreateElementwiseTwoInput( + GPUOperation operation = CreateElementwiseTwoInput( op_def, OperationType::MINIMUM, src_tensor_1.shape); ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1}, creation_context_, &operation, @@ -695,9 +694,8 @@ TEST_F(OpenCLOperationTest, MinimumWithScalar) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; const float* scalar = absl::get_if(&attr.param); - ElementwiseOneRuntimeOneScalar operation = - CreateElementwiseOneRuntimeOneScalar(creation_context_, op_def, - OperationType::MINIMUM, *scalar); + GPUOperation operation = CreateElementwiseOneRuntimeOneScalar( + creation_context_, op_def, OperationType::MINIMUM, *scalar); ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation, BHWC(1, 4, 1, 1), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -723,7 +721,7 @@ TEST_F(OpenCLOperationTest, Mul) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseTwoInput operation = CreateElementwiseTwoInput( + GPUOperation operation = CreateElementwiseTwoInput( op_def, OperationType::MUL, src_tensor_1.shape); ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1}, creation_context_, &operation, @@ -751,7 +749,7 @@ TEST_F(OpenCLOperationTest, MulBroadcastHW) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseTwoInput operation = CreateElementwiseTwoInput( + GPUOperation operation = CreateElementwiseTwoInput( op_def, OperationType::MUL, src_tensor_1.shape); ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1}, creation_context_, &operation, @@ -779,7 +777,7 @@ TEST_F(OpenCLOperationTest, MulBroadcastChannels) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ElementwiseTwoInput operation = CreateElementwiseTwoInput( + GPUOperation operation = CreateElementwiseTwoInput( op_def, OperationType::MUL, src_tensor_1.shape); ASSERT_OK(ExecuteGPUOperation({src_tensor_0, src_tensor_1}, creation_context_, &operation, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc index beb62632099..7260048c6d3 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc @@ -49,6 +49,20 @@ std::string GetElementWiseCode(const OperationDef& op_def, return c; } +absl::Status MergeOperations(const std::vector& linked_ops, + Arguments* merged_args, std::string* merged_code) { + for (int i = 0; i < linked_ops.size(); ++i) { + std::string code = linked_ops[i]->code_; + std::string unique_postfix = absl::StrCat("_link", i + 1); + linked_ops[i]->args_.RenameArgs(unique_postfix, &code); + *merged_code += "{\n" + code + "\n}\n"; + RETURN_IF_ERROR( + merged_args->Merge(std::move(linked_ops[i]->args_), unique_postfix)); + linked_ops[i]->AddUniquePostfix(unique_postfix); + } + return absl::OkStatus(); +} + } // namespace DataType OperationDef::GetDataType() const { @@ -108,14 +122,17 @@ void GPUOperation::SetDst(Tensor* ptr, int index) { } GPUOperation::GPUOperation(GPUOperation&& operation) - : definition_(std::move(operation.definition_)), + : args_(std::move(operation.args_)), + code_(std::move(operation.code_)), + elementwise_(operation.elementwise_), + linkable_(operation.linkable_), + check_src_channels_size_(operation.check_src_channels_size_), + definition_(std::move(operation.definition_)), src_(std::move(operation.src_)), dst_(std::move(operation.dst_)), - args_(std::move(operation.args_)), kernel_(std::move(operation.kernel_)), work_group_size_(operation.work_group_size_), grid_size_(operation.grid_size_), - code_(std::move(operation.code_)), src_tensors_names_(std::move(operation.src_tensors_names_)), dst_tensors_names_(std::move(operation.dst_tensors_names_)), compiler_options_(std::move(operation.compiler_options_)), @@ -123,14 +140,17 @@ GPUOperation::GPUOperation(GPUOperation&& operation) GPUOperation& GPUOperation::operator=(GPUOperation&& operation) { if (this != &operation) { + args_ = std::move(operation.args_); + code_ = std::move(operation.code_); + elementwise_ = operation.elementwise_; + linkable_ = operation.linkable_; + check_src_channels_size_ = operation.check_src_channels_size_; definition_ = std::move(operation.definition_); src_ = std::move(operation.src_); dst_ = std::move(operation.dst_); - args_ = std::move(operation.args_); kernel_ = std::move(operation.kernel_); std::swap(work_group_size_, operation.work_group_size_); std::swap(grid_size_, operation.grid_size_); - code_ = std::move(operation.code_); src_tensors_names_ = std::move(operation.src_tensors_names_); dst_tensors_names_ = std::move(operation.dst_tensors_names_); compiler_options_ = std::move(operation.compiler_options_); @@ -139,7 +159,7 @@ GPUOperation& GPUOperation::operator=(GPUOperation&& operation) { return *this; } -void GPUOperation::AddOperation(ElementwiseOperation* operation) { +void GPUOperation::AddOperation(GPUOperation* operation) { linked_operations_.push_back(operation); } @@ -183,73 +203,62 @@ absl::Status GPUOperation::UpdateParams() { } absl::Status GPUOperation::Compile(const CreationContext& creation_context) { - std::string element_wise_code; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode( - creation_context.device->GetInfo(), - {{dst_tensors_names_[0], element_wise_code}}, &code_)); - RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel( - code_, "main_function", compiler_options_, *creation_context.context, - *creation_context.device, &kernel_)); + if (elementwise_) { + auto src_desc = + absl::make_unique(definition_.src_tensors[0]); + if (definition_.IsBatchSupported()) { + src_desc->SetStateVar("BatchedWidth", "true"); + } + src_tensors_names_.insert(src_tensors_names_.begin(), "src_tensor"); + args_.AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); + + auto dst_desc = + absl::make_unique(definition_.dst_tensors[0]); + if (definition_.IsBatchSupported()) { + dst_desc->SetStateVar("BatchedWidth", "true"); + } + dst_tensors_names_.insert(dst_tensors_names_.begin(), "dst_tensor"); + args_.AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); + + std::string code = + GetElementWiseCode(definition_, check_src_channels_size_); + std::string element_wise_code; + element_wise_code += "{\n" + code_ + "\n}\n"; + RETURN_IF_ERROR( + MergeOperations(linked_operations_, &args_, &element_wise_code)); + RETURN_IF_ERROR(args_.TransformToCLCode( + creation_context.device->GetInfo(), + {{dst_tensors_names_[0], element_wise_code}}, &code)); + code = absl::Substitute(code, args_.GetListOfArgs()); + RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel( + code, "main_function", *creation_context.context, + *creation_context.device, &kernel_)); + } else { + std::string element_wise_code; + RETURN_IF_ERROR( + MergeOperations(linked_operations_, &args_, &element_wise_code)); + RETURN_IF_ERROR(args_.TransformToCLCode( + creation_context.device->GetInfo(), + {{dst_tensors_names_[0], element_wise_code}}, &code_)); + RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel( + code_, "main_function", compiler_options_, *creation_context.context, + *creation_context.device, &kernel_)); + } return PostCompileCheck(creation_context.device->GetInfo()); } -ElementwiseOperation::ElementwiseOperation(ElementwiseOperation&& operation) - : GPUOperation(std::move(operation)), - check_src_channels_size_(operation.check_src_channels_size_), - linkable_(operation.linkable_) {} - -ElementwiseOperation& ElementwiseOperation::operator=( - ElementwiseOperation&& operation) { - if (this != &operation) { - check_src_channels_size_ = operation.check_src_channels_size_; - linkable_ = operation.linkable_; - GPUOperation::operator=(std::move(operation)); +int3 GPUOperation::GetGridSize() const { + if (elementwise_) { + const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); + const int grid_y = dst_[0]->Height(); + const int grid_z = dst_[0]->Slices(); + return int3(grid_x, grid_y, grid_z); + } else { + return int3(0, 0, 0); } - return *this; } -int3 ElementwiseOperation::GetGridSize() const { - const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); - const int grid_y = dst_[0]->Height(); - const int grid_z = dst_[0]->Slices(); - return int3(grid_x, grid_y, grid_z); -} - -absl::Status ElementwiseOperation::Compile( - const CreationContext& creation_context) { - auto src_desc = - absl::make_unique(definition_.src_tensors[0]); - if (definition_.IsBatchSupported()) { - src_desc->SetStateVar("BatchedWidth", "true"); - } - src_tensors_names_.insert(src_tensors_names_.begin(), "src_tensor"); - args_.AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); - - auto dst_desc = - absl::make_unique(definition_.dst_tensors[0]); - if (definition_.IsBatchSupported()) { - dst_desc->SetStateVar("BatchedWidth", "true"); - } - dst_tensors_names_.insert(dst_tensors_names_.begin(), "dst_tensor"); - args_.AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc)); - - std::string code = GetElementWiseCode(definition_, check_src_channels_size_); - std::string element_wise_code; - element_wise_code += "{\n" + code_ + "\n}\n"; - RETURN_IF_ERROR( - MergeOperations(linked_operations_, &args_, &element_wise_code)); - RETURN_IF_ERROR(args_.TransformToCLCode( - creation_context.device->GetInfo(), - {{dst_tensors_names_[0], element_wise_code}}, &code)); - code = absl::Substitute(code, args_.GetListOfArgs()); - return creation_context.cache->GetOrCreateCLKernel( - code, "main_function", *creation_context.context, - *creation_context.device, &kernel_); -} - -void ElementwiseOperation::AddUniquePostfix(const std::string& unique_postfix) { +void GPUOperation::AddUniquePostfix(const std::string& unique_postfix) { for (int i = 0; i < src_tensors_names_.size(); ++i) { src_tensors_names_[i] += unique_postfix; } @@ -258,21 +267,6 @@ void ElementwiseOperation::AddUniquePostfix(const std::string& unique_postfix) { } } -absl::Status MergeOperations( - const std::vector& linked_ops, - Arguments* merged_args, std::string* merged_code) { - for (int i = 0; i < linked_ops.size(); ++i) { - std::string code = linked_ops[i]->GetCode(); - std::string unique_postfix = absl::StrCat("_link", i + 1); - auto&& link_args = linked_ops[i]->MoveArgs(); - link_args.RenameArgs(unique_postfix, &code); - *merged_code += "{\n" + code + "\n}\n"; - RETURN_IF_ERROR(merged_args->Merge(std::move(link_args), unique_postfix)); - linked_ops[i]->AddUniquePostfix(unique_postfix); - } - return absl::OkStatus(); -} - } // namespace cl } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h index 01e11f3ea64..620883f26f4 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h @@ -59,18 +59,15 @@ struct OperationDef { bool IsBatchSupported() const; }; -class ElementwiseOperation; - // GPUOperation represents some implementation of neural network operation on -// GPU. GPUOperation can contain ElementwiseOperation operations, in this case, -// ElementwiseOperation still hold necessary data and should be alive. -// When GPUOperation contains ElementwiseOperations, this GPUoperation replaces -// some sequence of operations Op + el_op0 + el_op1 + ... +// GPU. GPUOperation can contain another GPU operations with flag elementwise_. +// When GPUOperation contains another GPU ops, this GPUoperation replaces +// some sequence of operations Op + op0 + op1 + ... // Because of this abilities of GPUOperation, usage scenario is next: // Create instance of GPUOperation. -// Create all instances of ElementwiseOperations that we will(probably) attach -// to GPUOperation. Attach all ElementwiseOperations to GPUOperation. Call -// GPUOperation.Compile(). Don't call ElementwiseOperation.Compile() if it +// Create all instances of GPUOperations that we will(probably) attach +// to GPUOperation. Attach all GPUOperations to GPUOperation. Call +// GPUOperation.Compile(). Don't call GPUOperations.Compile() if it // attached, it useless(and may be error) class GPUOperation { public: @@ -83,7 +80,7 @@ class GPUOperation { GPUOperation(const GPUOperation&) = delete; GPUOperation& operator=(const GPUOperation&) = delete; - void AddOperation(ElementwiseOperation* operation); + void AddOperation(GPUOperation* operation); void SetSrc(Tensor* ptr, int index = 0); void SetDst(Tensor* ptr, int index = 0); @@ -116,64 +113,37 @@ class GPUOperation { void AddDstTensor(const std::string& tensor_name, const TensorDescriptor& desc); + bool IsLinkable() const { return elementwise_ && linkable_; } + + // for linking + void AddUniquePostfix(const std::string& unique_postfix); + + Arguments args_; + std::string code_; + + bool elementwise_ = false; + // applicable only with elementwise_ = true; + bool linkable_ = true; // by default every elementwise is linkable + // applicable only with elementwise_ = true; + bool check_src_channels_size_ = false; + protected: virtual absl::Status BindArguments() { return absl::OkStatus(); } - virtual int3 GetGridSize() const = 0; + virtual int3 GetGridSize() const; // Defines operation calculation precision and format of src/dst tensors. OperationDef definition_; std::vector src_; std::vector dst_; - Arguments args_; CLKernel kernel_; int3 work_group_size_ = int3(8, 4, 1); int3 grid_size_ = int3(0, 0, 0); - std::string code_; std::vector src_tensors_names_; std::vector dst_tensors_names_; std::vector compiler_options_; - std::vector linked_operations_; + std::vector linked_operations_; }; -// ElementwiseOperation can be fused(linked) to another operation. -// field linked_ indicate about this -// link_index_ used mostly for generating of correct names for -// linked code variables -// link_index_ is number of operation in sequence of linked operations -// and should be unique in this sequence -// link_index_ = 0 is equivalent that operation not linked. -class ElementwiseOperation : public GPUOperation { - public: - ElementwiseOperation() {} - explicit ElementwiseOperation(const OperationDef& definition) - : GPUOperation(definition) {} - - virtual ~ElementwiseOperation() {} - - absl::Status Compile(const CreationContext& creation_context) override; - int3 GetGridSize() const override; - - // Move only - ElementwiseOperation(ElementwiseOperation&& operation); - ElementwiseOperation& operator=(ElementwiseOperation&& operation); - ElementwiseOperation(const ElementwiseOperation&) = delete; - ElementwiseOperation& operator=(const ElementwiseOperation&) = delete; - - Arguments&& MoveArgs() { return std::move(args_); } - std::string GetCode() const { return code_; } - void AddUniquePostfix(const std::string& unique_postfix); - - bool IsLinkable() const { return linkable_; } - - protected: - bool check_src_channels_size_ = false; - bool linkable_ = true; -}; - -absl::Status MergeOperations( - const std::vector& linked_ops, - Arguments* merged_args, std::string* merged_code); - } // namespace cl } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/prelu.cc b/tensorflow/lite/delegates/gpu/cl/kernels/prelu.cc index 85c88f3b51b..1ca2e096a0e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/prelu.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/prelu.cc @@ -24,47 +24,43 @@ namespace tflite { namespace gpu { namespace cl { -PReLU::PReLU(const OperationDef& definition, const PReLUAttributes& attr, - CalculationsPrecision scalar_precision) - : ElementwiseOperation(definition) { +absl::Status CreatePReLU(const CreationContext& creation_context, + const OperationDef& definition, + const PReLUAttributes& attr, GPUOperation* result) { + *result = GPUOperation(definition); + result->elementwise_ = true; if (attr.clip != 0) { if (definition.precision == CalculationsPrecision::F32) { - args_.AddFloat("clip", attr.clip); + result->args_.AddFloat("clip", attr.clip); } else { - args_.AddHalf("clip", half(attr.clip)); + result->args_.AddHalf("clip", half(attr.clip)); } - code_ = + result->code_ = "in_out_value = clamp(in_out_value, (FLT4)(0.0f), (FLT4)(args.clip)) + " "min((FLT4)(0.0f), in_out_value) * args.alpha.Read(S_COORD);"; } else { - code_ = + result->code_ = "in_out_value = max((FLT4)(0.0f), in_out_value) + min((FLT4)(0.0f), " "in_out_value) * args.alpha.Read(S_COORD);"; } -} -PReLU::PReLU(PReLU&& operation) : ElementwiseOperation(std::move(operation)) {} - -PReLU& PReLU::operator=(PReLU&& operation) { - if (this != &operation) { - ElementwiseOperation::operator=(std::move(operation)); - } - return *this; -} - -absl::Status CreatePReLU(const CreationContext& creation_context, - const OperationDef& definition, - const PReLUAttributes& attr, PReLU* result) { auto alpha = absl::get_if>(&attr.alpha); if (!alpha) { return absl::InvalidArgumentError("Alpha is missing"); } - const auto scalar_precision = creation_context.device->IsPowerVR() - ? CalculationsPrecision::F32 - : definition.precision; - *result = PReLU(definition, attr, scalar_precision); - RETURN_IF_ERROR(result->UploadParameters(*alpha, creation_context.context)); + TensorLinearDescriptor desc; + desc.storage_type = + DeduceLinearStorageType(definition.GetPrimaryStorageType()); + desc.element_type = definition.GetPrimaryDataType(); + + LinearStorage lt; + RETURN_IF_ERROR( + CreateLinearStorage(desc, *alpha, creation_context.context, <)); + result->args_.AddObject("alpha", AccessType::READ, + absl::make_unique(std::move(lt)), + absl::make_unique(desc)); + return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/prelu.h b/tensorflow/lite/delegates/gpu/cl/kernels/prelu.h index e65559cf7c7..b673217c799 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/prelu.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/prelu.h @@ -31,48 +31,9 @@ namespace tflite { namespace gpu { namespace cl { -class PReLU : public ElementwiseOperation { - public: - PReLU() = default; - // Move only - PReLU(PReLU&& operation); - PReLU& operator=(PReLU&& operation); - PReLU(const PReLU&) = delete; - PReLU& operator=(const PReLU&) = delete; - - friend absl::Status CreatePReLU(const CreationContext& creation_context, - const OperationDef& definition, - const PReLUAttributes& attr, PReLU* result); - - private: - PReLU(const OperationDef& definition, const PReLUAttributes& attr, - CalculationsPrecision scalar_precision); - - template - absl::Status UploadParameters( - const tflite::gpu::Tensor& parameters, CLContext* context); -}; - absl::Status CreatePReLU(const CreationContext& creation_context, const OperationDef& definition, - const PReLUAttributes& attr, PReLU* result); - -template -absl::Status PReLU::UploadParameters( - const tflite::gpu::Tensor& parameters, CLContext* context) { - TensorLinearDescriptor desc; - desc.storage_type = - DeduceLinearStorageType(definition_.GetPrimaryStorageType()); - desc.element_type = definition_.GetPrimaryDataType(); - - LinearStorage lt; - RETURN_IF_ERROR(CreateLinearStorage(desc, parameters, context, <)); - args_.AddObject("alpha", AccessType::READ, - absl::make_unique(std::move(lt)), - absl::make_unique(desc)); - - return absl::OkStatus(); -} + const PReLUAttributes& attr, GPUOperation* result); } // namespace cl } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/prelu_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/prelu_test.cc index 4b0006c7f32..06ff09ccca7 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/prelu_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/prelu_test.cc @@ -52,7 +52,7 @@ TEST_F(OpenCLOperationTest, PReLUAlpha) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - PReLU operation; + GPUOperation operation; ASSERT_OK(CreatePReLU(creation_context_, op_def, attr, &operation)); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); @@ -83,7 +83,7 @@ TEST_F(OpenCLOperationTest, PReLUAlphaClip) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - PReLU operation; + GPUOperation operation; ASSERT_OK(CreatePReLU(creation_context_, op_def, attr, &operation)); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize.cc b/tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize.cc index 957fc9bbb98..e0c44e1cda7 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize.cc @@ -25,59 +25,37 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { - -QuantizeAndDequantize::QuantizeAndDequantize( - const OperationDef& definition, const QuantizeAndDequantizeAttributes& attr, - CalculationsPrecision scalar_precision) - : ElementwiseOperation(definition) { - if (definition.precision == CalculationsPrecision::F32) { - args_.AddFloat("min", attr.min); - args_.AddFloat("max", attr.max); - args_.AddFloat("scale", attr.scale); - } else { - args_.AddHalf("min", half(attr.min)); - args_.AddHalf("max", half(attr.max)); - args_.AddHalf("scale", half(attr.scale)); - } - code_ = R"( -FLT4 clamped_value = min((FLT4)(args.max), max((FLT4)(args.min), in_out_value)); -FLT4 quantized_value = round((clamped_value - (FLT4)(args.min)) / (FLT4)(args.scale)); -FLT4 dequantized_value = quantized_value * (FLT4)(args.scale) + (FLT4)(args.min); -in_out_value = dequantized_value;)"; -} - -QuantizeAndDequantize::QuantizeAndDequantize(QuantizeAndDequantize&& operation) - : ElementwiseOperation(std::move(operation)) {} - -QuantizeAndDequantize& QuantizeAndDequantize::operator=( - QuantizeAndDequantize&& operation) { - if (this != &operation) { - ElementwiseOperation::operator=(std::move(operation)); - } - return *this; -} - -absl::Status CreateQuantizeAndDequantize( +GPUOperation CreateQuantizeAndDequantize( const CreationContext& creation_context, const OperationDef& definition, - const QuantizeAndDequantizeAttributes& attr, - QuantizeAndDequantize* result) { - const auto scalar_precision = creation_context.device->IsPowerVR() - ? CalculationsPrecision::F32 - : definition.precision; + const QuantizeAndDequantizeAttributes& attr) { + QuantizeAndDequantizeAttributes adjusted_attr = attr; const bool is_fp16 = definition.precision == CalculationsPrecision::F16 || definition.precision == CalculationsPrecision::F32_F16; if (is_fp16 && attr.scale < 0.000062f) { // The smallest positive normal number for Half-precision floating-point // format is 2^-14 ~ 0.000062f. Therefore, if the scale is lesser than this // number, we just reset it accordingly. - QuantizeAndDequantizeAttributes adjusted_attr = attr; adjusted_attr.scale = 0.000062f; - *result = - QuantizeAndDequantize(definition, adjusted_attr, scalar_precision); - } else { - *result = QuantizeAndDequantize(definition, attr, scalar_precision); } - return absl::OkStatus(); + + GPUOperation op(definition); + op.elementwise_ = true; + if (definition.precision == CalculationsPrecision::F32) { + op.args_.AddFloat("min", adjusted_attr.min); + op.args_.AddFloat("max", adjusted_attr.max); + op.args_.AddFloat("scale", adjusted_attr.scale); + } else { + op.args_.AddHalf("min", half(adjusted_attr.min)); + op.args_.AddHalf("max", half(adjusted_attr.max)); + op.args_.AddHalf("scale", half(adjusted_attr.scale)); + } + op.code_ = R"( +FLT4 clamped_value = min((FLT4)(args.max), max((FLT4)(args.min), in_out_value)); +FLT4 quantized_value = round((clamped_value - (FLT4)(args.min)) / (FLT4)(args.scale)); +FLT4 dequantized_value = quantized_value * (FLT4)(args.scale) + (FLT4)(args.min); +in_out_value = dequantized_value;)"; + + return op; } } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize.h b/tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize.h index a40aa21d23c..6e028625852 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize.h @@ -43,43 +43,9 @@ namespace cl { // // NOTE: We do not need to nudge min/max values in this op, since they would // already be adjusted while generating the quantized model. -class QuantizeAndDequantize : public ElementwiseOperation { - public: - QuantizeAndDequantize() = default; - // Move only - QuantizeAndDequantize(QuantizeAndDequantize&& operation); - QuantizeAndDequantize& operator=(QuantizeAndDequantize&& operation); - QuantizeAndDequantize(const QuantizeAndDequantize&) = delete; - QuantizeAndDequantize& operator=(const QuantizeAndDequantize&) = delete; - - friend absl::Status CreateQuantizeAndDequantize( - const CreationContext& creation_context, const OperationDef& definition, - const QuantizeAndDequantizeAttributes& attr, - QuantizeAndDequantize* result); - - private: - QuantizeAndDequantize(const OperationDef& definition, - const QuantizeAndDequantizeAttributes& attr, - CalculationsPrecision scalar_precision); - - template - absl::Status UploadParameters( - const tflite::gpu::Tensor& parameters, CLContext* context); -}; - -absl::Status CreateQuantizeAndDequantize( +GPUOperation CreateQuantizeAndDequantize( const CreationContext& creation_context, const OperationDef& definition, - const QuantizeAndDequantizeAttributes& attr, QuantizeAndDequantize* result); - -template -absl::Status QuantizeAndDequantize::UploadParameters( - const tflite::gpu::Tensor& parameters, CLContext* context) { - LinearStorageCreateInfo create_info; - create_info.storage_type = - DeduceLinearStorageType(definition_.GetPrimaryStorageType()); - create_info.data_type = definition_.GetPrimaryDataType(); - return absl::OkStatus(); -} + const QuantizeAndDequantizeAttributes& attr); } // namespace cl } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize_test.cc index 71d6d066b9b..43b5d69323d 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/quantize_and_dequantize_test.cc @@ -56,9 +56,8 @@ TEST_F(OpenCLOperationTest, QuantAndDequant_Dim2Bits8) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - QuantizeAndDequantize operation; - ASSERT_OK(CreateQuantizeAndDequantize(creation_context_, op_def, attr, - &operation)); + GPUOperation operation = + CreateQuantizeAndDequantize(creation_context_, op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 3, 2, 1), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -92,9 +91,8 @@ TEST_F(OpenCLOperationTest, QuantAndDequant_Dim3Bits8_NegativeRange) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - QuantizeAndDequantize operation; - ASSERT_OK(CreateQuantizeAndDequantize(creation_context_, op_def, attr, - &operation)); + GPUOperation operation = + CreateQuantizeAndDequantize(creation_context_, op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 3, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -128,9 +126,8 @@ TEST_F(OpenCLOperationTest, QuantAndDequant_Dim3Bits16) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - QuantizeAndDequantize operation; - ASSERT_OK(CreateQuantizeAndDequantize(creation_context_, op_def, attr, - &operation)); + GPUOperation operation = + CreateQuantizeAndDequantize(creation_context_, op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 3, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -164,9 +161,8 @@ TEST_F(OpenCLOperationTest, QuantAndDequant_Dim2Bits16_NegativeRange) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - QuantizeAndDequantize operation; - ASSERT_OK(CreateQuantizeAndDequantize(creation_context_, op_def, attr, - &operation)); + GPUOperation operation = + CreateQuantizeAndDequantize(creation_context_, op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 3, 2, 1), &dst_tensor)); EXPECT_THAT(dst_tensor.data, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/relu.cc b/tensorflow/lite/delegates/gpu/cl/kernels/relu.cc index 774c030545a..a80dccd6259 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/relu.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/relu.cc @@ -21,50 +21,36 @@ limitations under the License. namespace tflite { namespace gpu { namespace cl { +GPUOperation CreateReLU(const CreationContext& creation_context, + const OperationDef& definition, + const ReLUAttributes& attr) { + GPUOperation op(definition); + op.elementwise_ = true; -ReLU::ReLU(const OperationDef& definition, const ReLUAttributes& attr, - CalculationsPrecision scalar_precision) - : ElementwiseOperation(definition) { std::string min_func; if (attr.alpha != 0.0f) { min_func = "min(in_out_value * args.alpha, (FLT)(0.0f))"; if (definition.precision == CalculationsPrecision::F32) { - args_.AddFloat("alpha", attr.alpha); + op.args_.AddFloat("alpha", attr.alpha); } else { - args_.AddHalf("alpha", half(attr.alpha)); + op.args_.AddHalf("alpha", half(attr.alpha)); } } else { min_func = "(FLT)(0.0f)"; } if (attr.clip != 0.0f) { if (definition.precision == CalculationsPrecision::F32) { - args_.AddFloat("clip", attr.clip); + op.args_.AddFloat("clip", attr.clip); } else { - args_.AddHalf("clip", half(attr.clip)); + op.args_.AddHalf("clip", half(attr.clip)); } - code_ = absl::StrCat("in_out_value = clamp(in_out_value, " + min_func + - ", args.clip);"); + op.code_ = absl::StrCat("in_out_value = clamp(in_out_value, " + min_func + + ", args.clip);"); } else { - code_ = absl::StrCat("in_out_value = max(in_out_value, ", min_func, ");"); + op.code_ = + absl::StrCat("in_out_value = max(in_out_value, ", min_func, ");"); } -} - -ReLU::ReLU(ReLU&& operation) : ElementwiseOperation(std::move(operation)) {} - -ReLU& ReLU::operator=(ReLU&& operation) { - if (this != &operation) { - ElementwiseOperation::operator=(std::move(operation)); - } - return *this; -} - -ReLU CreateReLU(const CreationContext& creation_context, - const OperationDef& definition, const ReLUAttributes& attr) { - const auto scalar_precision = creation_context.device->IsPowerVR() - ? CalculationsPrecision::F32 - : definition.precision; - ReLU operation(definition, attr, scalar_precision); - return operation; + return op; } } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/relu.h b/tensorflow/lite/delegates/gpu/cl/kernels/relu.h index ccb6f6ca37f..001e23da41c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/relu.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/relu.h @@ -25,25 +25,9 @@ namespace tflite { namespace gpu { namespace cl { -class ReLU : public ElementwiseOperation { - public: - // Move only - ReLU(ReLU&& operation); - ReLU& operator=(ReLU&& operation); - ReLU(const ReLU&) = delete; - ReLU& operator=(const ReLU&) = delete; - - friend ReLU CreateReLU(const CreationContext& creation_context, - const OperationDef& definition, - const ReLUAttributes& attr); - - private: - ReLU(const OperationDef& definition, const ReLUAttributes& attr, - CalculationsPrecision scalar_precision); -}; - -ReLU CreateReLU(const CreationContext& creation_context, - const OperationDef& definition, const ReLUAttributes& attr); +GPUOperation CreateReLU(const CreationContext& creation_context, + const OperationDef& definition, + const ReLUAttributes& attr); } // namespace cl } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/relu_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/relu_test.cc index cebc9886ba5..f741a408661 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/relu_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/relu_test.cc @@ -49,7 +49,7 @@ TEST_F(OpenCLOperationTest, ReLUNoClipNoAlpha) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ReLU operation = CreateReLU(creation_context_, op_def, attr); + GPUOperation operation = CreateReLU(creation_context_, op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -76,7 +76,7 @@ TEST_F(OpenCLOperationTest, ReLUClip) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ReLU operation = CreateReLU(creation_context_, op_def, attr); + GPUOperation operation = CreateReLU(creation_context_, op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -103,7 +103,7 @@ TEST_F(OpenCLOperationTest, ReLUAlpha) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ReLU operation = CreateReLU(creation_context_, op_def, attr); + GPUOperation operation = CreateReLU(creation_context_, op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -130,7 +130,7 @@ TEST_F(OpenCLOperationTest, ReLUAlphaClip) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - ReLU operation = CreateReLU(creation_context_, op_def, attr); + GPUOperation operation = CreateReLU(creation_context_, op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc index 088677ba7e2..f60af5f730d 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc @@ -144,9 +144,9 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, if (inputs.size() == 2 && (inputs[0]->tensor.shape.c == inputs[1]->tensor.shape.c || inputs[1]->tensor.shape.c == 1)) { - ElementwiseTwoInput operation = + GPUOperation operation = CreateElementwiseTwoInput(op_def, op_type, inputs[1]->tensor.shape); - *gpu_op = absl::make_unique(std::move(operation)); + *gpu_op = absl::make_unique(std::move(operation)); return absl::OkStatus(); } else if (inputs.size() >= 2) { auto output = outputs[0]; @@ -167,25 +167,21 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, absl::get_if>( &attr.param); if (scalar) { - ElementwiseOneRuntimeOneScalar operation = - CreateElementwiseOneRuntimeOneScalar(creation_context, op_def, - op_type, *scalar); - *gpu_op = absl::make_unique( - std::move(operation)); + GPUOperation operation = CreateElementwiseOneRuntimeOneScalar( + creation_context, op_def, op_type, *scalar); + *gpu_op = absl::make_unique(std::move(operation)); return absl::OkStatus(); } else if (linear_tensor) { - ElementwiseTwoInput operation; + GPUOperation operation; RETURN_IF_ERROR(CreateElementwiseTwoInput( creation_context, op_def, op_type, *linear_tensor, &operation)); - *gpu_op = - absl::make_unique(std::move(operation)); + *gpu_op = absl::make_unique(std::move(operation)); return absl::OkStatus(); } else if (hwc_tensor) { - ElementwiseTwoInput operation; + GPUOperation operation; RETURN_IF_ERROR(CreateElementwiseTwoInput( creation_context, op_def, op_type, *hwc_tensor, &operation)); - *gpu_op = - absl::make_unique(std::move(operation)); + *gpu_op = absl::make_unique(std::move(operation)); return absl::OkStatus(); } } @@ -295,9 +291,9 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, } case OperationType::MUL: { if (inputs.size() == 2) { - ElementwiseTwoInput operation = + GPUOperation operation = CreateElementwiseTwoInput(op_def, op_type, inputs[1]->tensor.shape); - *gpu_op = absl::make_unique(std::move(operation)); + *gpu_op = absl::make_unique(std::move(operation)); return absl::OkStatus(); } else if (inputs.size() == 1 && node.operation.attributes.has_value()) { auto attr = @@ -310,25 +306,21 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, absl::get_if>( &attr.param); if (scalar) { - ElementwiseOneRuntimeOneScalar operation = - CreateElementwiseOneRuntimeOneScalar(creation_context, op_def, - op_type, *scalar); - *gpu_op = absl::make_unique( - std::move(operation)); + GPUOperation operation = CreateElementwiseOneRuntimeOneScalar( + creation_context, op_def, op_type, *scalar); + *gpu_op = absl::make_unique(std::move(operation)); return absl::OkStatus(); } else if (linear_tensor) { - ElementwiseTwoInput operation; + GPUOperation operation; RETURN_IF_ERROR(CreateElementwiseTwoInput( creation_context, op_def, op_type, *linear_tensor, &operation)); - *gpu_op = - absl::make_unique(std::move(operation)); + *gpu_op = absl::make_unique(std::move(operation)); return absl::OkStatus(); } else if (hwc_tensor) { - ElementwiseTwoInput operation; + GPUOperation operation; RETURN_IF_ERROR(CreateElementwiseTwoInput( creation_context, op_def, op_type, *hwc_tensor, &operation)); - *gpu_op = - absl::make_unique(std::move(operation)); + *gpu_op = absl::make_unique(std::move(operation)); return absl::OkStatus(); } } @@ -353,8 +345,8 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, case OperationType::QUANTIZE_AND_DEQUANTIZE: { auto attr = absl::any_cast( node.operation.attributes); - return SelectQuantizeAndDequantize(attr, creation_context, op_def, - gpu_op); + SelectQuantizeAndDequantize(attr, creation_context, op_def, gpu_op); + return absl::OkStatus(); } case OperationType::RELU: { auto attr = absl::any_cast(node.operation.attributes); @@ -405,9 +397,8 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, case OperationType::SQRT: case OperationType::SQUARE: case OperationType::TANH: { - ElementwiseOneInput operation = - CreateElementwiseOneInput(op_def, op_type); - *gpu_op = absl::make_unique(std::move(operation)); + GPUOperation operation = CreateElementwiseOneInput(op_def, op_type); + *gpu_op = absl::make_unique(std::move(operation)); return absl::OkStatus(); } case OperationType::DIV: @@ -417,9 +408,9 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, case OperationType::SQUARED_DIFF: case OperationType::SUB: { if (inputs.size() == 2) { - ElementwiseTwoInput operation = + GPUOperation operation = CreateElementwiseTwoInput(op_def, op_type, inputs[1]->tensor.shape); - *gpu_op = absl::make_unique(std::move(operation)); + *gpu_op = absl::make_unique(std::move(operation)); return absl::OkStatus(); } else if (inputs.size() == 1 && node.operation.attributes.has_value()) { auto attr = @@ -432,25 +423,21 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, absl::get_if>( &attr.param); if (scalar) { - ElementwiseOneRuntimeOneScalar operation = - CreateElementwiseOneRuntimeOneScalar(creation_context, op_def, - op_type, *scalar); - *gpu_op = absl::make_unique( - std::move(operation)); + GPUOperation operation = CreateElementwiseOneRuntimeOneScalar( + creation_context, op_def, op_type, *scalar); + *gpu_op = absl::make_unique(std::move(operation)); return absl::OkStatus(); } else if (linear_tensor) { - ElementwiseTwoInput operation; + GPUOperation operation; RETURN_IF_ERROR(CreateElementwiseTwoInput( creation_context, op_def, op_type, *linear_tensor, &operation)); - *gpu_op = - absl::make_unique(std::move(operation)); + *gpu_op = absl::make_unique(std::move(operation)); return absl::OkStatus(); } else if (hwc_tensor) { - ElementwiseTwoInput operation; + GPUOperation operation; RETURN_IF_ERROR(CreateElementwiseTwoInput( creation_context, op_def, op_type, *hwc_tensor, &operation)); - *gpu_op = - absl::make_unique(std::move(operation)); + *gpu_op = absl::make_unique(std::move(operation)); return absl::OkStatus(); } } diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc b/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc index a32efd5dd2c..1c0bed74422 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc @@ -54,17 +54,17 @@ void SelectLSTM(const OperationDef& op_def, const DeviceInfo& device_info, void SelectReLU(const CreationContext& creation_context, const ReLUAttributes& attr, const OperationDef& op_def, std::unique_ptr* ptr) { - ReLU relu = CreateReLU(creation_context, op_def, attr); - *ptr = absl::make_unique(std::move(relu)); + GPUOperation relu = CreateReLU(creation_context, op_def, attr); + *ptr = absl::make_unique(std::move(relu)); } absl::Status SelectPReLU(const PReLUAttributes& attr, const CreationContext& creation_context, const OperationDef& op_def, std::unique_ptr* ptr) { - PReLU operation; + GPUOperation operation; RETURN_IF_ERROR(CreatePReLU(creation_context, op_def, attr, &operation)); - *ptr = absl::make_unique(std::move(operation)); + *ptr = absl::make_unique(std::move(operation)); return absl::OkStatus(); } @@ -85,8 +85,8 @@ void SelectMaxUnpooling(const MaxUnpooling2DAttributes& attr, void SelectAdd(const OperationDef& op_def, const std::vector& channels, int dst_channels, std::unique_ptr* ptr) { - Add operation = CreateAdd(op_def, channels, dst_channels); - *ptr = absl::make_unique(std::move(operation)); + GPUOperation operation = CreateAdd(op_def, channels, dst_channels); + *ptr = absl::make_unique(std::move(operation)); } absl::Status SelectResize(const Resize2DAttributes& attr, @@ -203,15 +203,13 @@ absl::Status SelectWinograd36To4x4( return absl::OkStatus(); } -absl::Status SelectQuantizeAndDequantize( - const QuantizeAndDequantizeAttributes& attr, - const CreationContext& creation_context, const OperationDef& op_def, - std::unique_ptr* ptr) { - QuantizeAndDequantize operation; - RETURN_IF_ERROR( - CreateQuantizeAndDequantize(creation_context, op_def, attr, &operation)); - *ptr = absl::make_unique(std::move(operation)); - return absl::OkStatus(); +void SelectQuantizeAndDequantize(const QuantizeAndDequantizeAttributes& attr, + const CreationContext& creation_context, + const OperationDef& op_def, + std::unique_ptr* ptr) { + GPUOperation operation = + CreateQuantizeAndDequantize(creation_context, op_def, attr); + *ptr = absl::make_unique(std::move(operation)); } } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h b/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h index f266882a458..7133aa94502 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h +++ b/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h @@ -97,10 +97,10 @@ absl::Status SelectWinograd36To4x4( const tflite::gpu::Tensor& biases, std::unique_ptr* ptr); -absl::Status SelectQuantizeAndDequantize( - const QuantizeAndDequantizeAttributes& attr, - const CreationContext& creation_context, const OperationDef& op_def, - std::unique_ptr* ptr); +void SelectQuantizeAndDequantize(const QuantizeAndDequantizeAttributes& attr, + const CreationContext& creation_context, + const OperationDef& op_def, + std::unique_ptr* ptr); } // namespace cl } // namespace gpu From 1a8f8965b242e186d1082dca088433823d784a60 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Mon, 3 Aug 2020 17:38:51 -0700 Subject: [PATCH 2022/2522] Device and DeviceInfo separated. DeviceInfo doesn't has OpenCL API calls/elements. PiperOrigin-RevId: 324719724 Change-Id: I0ffb6eaf6cd7c1edc77e14b28528710aced34519 --- tensorflow/lite/delegates/gpu/cl/BUILD | 10 + tensorflow/lite/delegates/gpu/cl/arguments.cc | 2 +- .../lite/delegates/gpu/cl/cl_command_queue.cc | 8 +- tensorflow/lite/delegates/gpu/cl/cl_device.cc | 337 ++++-------------- tensorflow/lite/delegates/gpu/cl/cl_device.h | 134 +------ .../lite/delegates/gpu/cl/device_info.cc | 268 ++++++++++++++ .../lite/delegates/gpu/cl/device_info.h | 168 +++++++++ .../gpu/cl/kernels/conv_constants.cc | 2 +- .../gpu/cl/kernels/work_group_picking.cc | 11 +- .../gpu/cl/selectors/convolution_selector.cc | 77 ++-- .../convolution_transposed_selector.cc | 27 +- .../cl/selectors/dw_convolution_selector.cc | 18 +- .../cl/selectors/fully_connected_selector.cc | 28 +- 13 files changed, 589 insertions(+), 501 deletions(-) create mode 100644 tensorflow/lite/delegates/gpu/cl/device_info.cc create mode 100644 tensorflow/lite/delegates/gpu/cl/device_info.h diff --git a/tensorflow/lite/delegates/gpu/cl/BUILD b/tensorflow/lite/delegates/gpu/cl/BUILD index 2344a7c6c40..ebfb2cff41b 100644 --- a/tensorflow/lite/delegates/gpu/cl/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/BUILD @@ -166,6 +166,7 @@ cc_library( srcs = ["cl_device.cc"], hdrs = ["cl_device.h"], deps = [ + ":device_info", ":opencl_wrapper", ":util", "//tensorflow/lite/delegates/gpu/common:status", @@ -251,6 +252,15 @@ flatbuffer_cc_library( ], ) +cc_library( + name = "device_info", + srcs = ["device_info.cc"], + hdrs = ["device_info.h"], + deps = [ + "@com_google_absl//absl/strings", + ], +) + cc_library( name = "egl_sync", srcs = ["egl_sync.cc"], diff --git a/tensorflow/lite/delegates/gpu/cl/arguments.cc b/tensorflow/lite/delegates/gpu/cl/arguments.cc index 79241091b14..ed72bcc7c97 100644 --- a/tensorflow/lite/delegates/gpu/cl/arguments.cc +++ b/tensorflow/lite/delegates/gpu/cl/arguments.cc @@ -690,7 +690,7 @@ std::string Arguments::AddActiveArgument(const std::string& arg_name, void Arguments::ResolveArgsPass(const DeviceInfo& device_info, std::string* code) { - bool use_f32_for_half_arguments = device_info.vendor == Vendor::POWERVR; + bool use_f32_for_half_arguments = device_info.IsPowerVR(); size_t position = 0; size_t next_position = code->find(kArgsPrefix); while (next_position != std::string::npos) { diff --git a/tensorflow/lite/delegates/gpu/cl/cl_command_queue.cc b/tensorflow/lite/delegates/gpu/cl/cl_command_queue.cc index f7501dab5af..a1795b18b27 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_command_queue.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_command_queue.cc @@ -216,16 +216,14 @@ absl::Status ProfilingCommandQueue::GetBestWorkGroupIndex( const CLKernel& kernel, const DeviceInfo& device_info, const int3& grid, const std::vector& work_group_sizes, int* index) { // Some Adreno 3xx can have wrong numbers for some events - const bool possible_bug_with_events = - device_info.vendor == Vendor::QUALCOMM && - device_info.adreno_info.gpu_version < 400; + const bool possible_bug_with_events = device_info.IsAdreno3xx(); events_.resize(work_group_sizes.size()); for (int i = 0; i < work_group_sizes.size(); ++i) { RETURN_IF_ERROR(CLCommandQueue::DispatchImplicit( kernel, grid, work_group_sizes[i], &events_[i])); // reducing the speed of memory leak on Mali for some kernels - if (device_info.vendor == Vendor::MALI && i % 8 == 7) { + if (device_info.IsMali() && i % 8 == 7) { events_[i - 7].Wait(); } if (possible_bug_with_events) { @@ -237,7 +235,7 @@ absl::Status ProfilingCommandQueue::GetBestWorkGroupIndex( RETURN_IF_ERROR(WaitForCompletion()); // To release memory of some kernel pool on Mali. - if (device_info.vendor == Vendor::MALI) { + if (device_info.IsMali()) { RETURN_IF_ERROR(kernel.ReInit()); } diff --git a/tensorflow/lite/delegates/gpu/cl/cl_device.cc b/tensorflow/lite/delegates/gpu/cl/cl_device.cc index f4f1f1c923f..b93bfb25ad1 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_device.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_device.cc @@ -128,24 +128,24 @@ Vendor ParseVendor(const std::string& device_name, std::transform(v_name.begin(), v_name.end(), v_name.begin(), ::tolower); if (d_name.find("qualcomm") != std::string::npos || v_name.find("qualcomm") != std::string::npos) { - return Vendor::QUALCOMM; + return Vendor::kQualcomm; } else if (d_name.find("mali") != std::string::npos || v_name.find("mali") != std::string::npos) { - return Vendor::MALI; + return Vendor::kMali; } else if (d_name.find("power") != std::string::npos || v_name.find("power") != std::string::npos) { - return Vendor::POWERVR; + return Vendor::kPowerVR; } else if (d_name.find("nvidia") != std::string::npos || v_name.find("nvidia") != std::string::npos) { - return Vendor::NVIDIA; + return Vendor::kNvidia; } else if (d_name.find("advanced micro devices") != std::string::npos || v_name.find("advanced micro devices") != std::string::npos) { - return Vendor::AMD; + return Vendor::kAMD; } else if (d_name.find("intel") != std::string::npos || v_name.find("intel") != std::string::npos) { - return Vendor::INTEL; + return Vendor::kIntel; } else { - return Vendor::UNKNOWN; + return Vendor::kUnknown; } } @@ -156,316 +156,99 @@ bool IsGPUVersionInRange(int gpu_version, int min_version, int max_version) { } } // namespace -// There is no rule for gpu version encoding, but we found these samples: -// Version: OpenCL C 2.0 Adreno(TM) 540 // Pixel 2 -// Version: OpenCL C 2.0 Adreno(TM) 630 // Sony Compact XZ2 -// Version: OpenCL C 2.0 Adreno(TM) 630 // Pixel 3 -// Version: OpenCL C 2.0 Adreno(TM) 540 // Samsung S8 -// Version: OpenCL C 1.2 Adreno(TM) 430 // HTC One M9 -// Version: OpenCL C 2.0 Adreno(TM) 530 // Samsung S7 Edge -// Version: OpenCL C 1.2 Adreno(TM) 405 // Motorola Moto G(4) -// After the number string ends. -// It is assumed that the for Adreno GPUs has -// the following format: -// Adreno(TM) -// Returns -1 if vendor-specific information cannot be parsed -int GetAdrenoGPUVersion(const std::string& gpu_version) { - const std::string gpu = absl::AsciiStrToLower(gpu_version); - const std::vector words = absl::StrSplit(gpu, ' '); - int i = 0; - for (; i < words.size(); ++i) { - if (words[i].find("adreno") != words[i].npos) { - break; - } - } - i += 1; - for (; i < words.size(); ++i) { - int number; - bool is_number = absl::SimpleAtoi(words[i], &number); - // Adreno GPUs starts from 2xx, but opencl support should be only from 3xx - if (is_number && number >= 300) { - return number; - } - } - return -1; -} - -MaliGPU GetMaliGPUVersion(const std::string& device_name) { - const std::map kMapping = { - {"T604", MaliGPU::T604}, {"T622", MaliGPU::T622}, {"T624", MaliGPU::T624}, - {"T628", MaliGPU::T628}, {"T658", MaliGPU::T658}, {"T678", MaliGPU::T678}, - {"T720", MaliGPU::T720}, {"T760", MaliGPU::T760}, {"T820", MaliGPU::T820}, - {"T830", MaliGPU::T830}, {"T860", MaliGPU::T860}, {"T880", MaliGPU::T880}, - {"G31", MaliGPU::G31}, {"G51", MaliGPU::G51}, {"G71", MaliGPU::G71}, - {"G52", MaliGPU::G52}, {"G72", MaliGPU::G72}, {"G76", MaliGPU::G76}, - {"G57", MaliGPU::G57}, {"G77", MaliGPU::G77}, - }; - for (const auto& v : kMapping) { - if (device_name.find(v.first) != std::string::npos) { - return v.second; - } - } - return MaliGPU::UNKNOWN; -} - -std::string VendorToString(Vendor v) { - switch (v) { - case Vendor::QUALCOMM: - return "Qualcomm"; - case Vendor::MALI: - return "Mali"; - case Vendor::POWERVR: - return "PowerVR"; - case Vendor::NVIDIA: - return "NVIDIA"; - case Vendor::AMD: - return "AMD"; - case Vendor::INTEL: - return "Intel"; - case Vendor::UNKNOWN: - return "unknown vendor"; - } -} - -std::string OpenCLVersionToString(OpenCLVersion version) { - switch (version) { - case OpenCLVersion::CL_1_0: - return "1.0"; - case OpenCLVersion::CL_1_1: - return "1.1"; - case OpenCLVersion::CL_1_2: - return "1.2"; - case OpenCLVersion::CL_2_0: - return "2.0"; - case OpenCLVersion::CL_2_1: - return "2.1"; - case OpenCLVersion::CL_2_2: - return "2.2"; - case OpenCLVersion::CL_3_0: - return "3.0"; - } -} - -AdrenoInfo::AdrenoInfo(const std::string& device_version) - : gpu_version(GetAdrenoGPUVersion(device_version)) {} - -int AdrenoInfo::GetMaximumWavesCount() const { - if (gpu_version < 400) { - return -1; // Adreno 3xx does not support it currently - } else if (gpu_version >= 400 && gpu_version < 500) { - return -1; // Adreno 4xx does not support it currently - } else if (gpu_version >= 500 && gpu_version < 600) { - return -1; // Adreno 5xx does not support it currently - } else if (gpu_version >= 600 && gpu_version < 700) { - return gpu_version == 640 ? 30 : 16; - } else { - return -1; // Adreno 7xx and higher does not exist yet - } -} - -int AdrenoInfo::GetRegisterMemorySizePerComputeUnit() const { - if (gpu_version < 400) { - return -1; // Adreno 3xx does not support it currently - } else if (gpu_version >= 400 && gpu_version < 500) { - return -1; // Adreno 4xx does not support it currently - } else if (gpu_version >= 500 && gpu_version < 600) { - return -1; // Adreno 5xx does not support it currently - } else if (gpu_version >= 600 && gpu_version < 700) { - return gpu_version == 640 ? 128 * 144 * 16 : 128 * 96 * 16; - } else { - return -1; // Adreno 7xx and higher does not exist yet - } -} - -int AdrenoInfo::GetMaximumWavesCount(int register_footprint_per_tread, - bool full_wave) const { - const int register_usage_per_wave = - GetWaveSize(full_wave) * register_footprint_per_tread; - const int possible_waves_count = - GetRegisterMemorySizePerComputeUnit() / register_usage_per_wave; - return std::min(possible_waves_count, GetMaximumWavesCount()); -} - -int AdrenoInfo::GetWaveSize(bool full_wave) const { - if (gpu_version < 400) { - return -1; // Adreno 3xx does not support it currently - } else if (gpu_version < 600) { - return full_wave ? 64 : 32; - } else { - return full_wave ? 128 : 64; - } -} - -MaliInfo::MaliInfo(const std::string& device_name) - : gpu_version(GetMaliGPUVersion(device_name)) {} - -bool MaliInfo::IsMaliT6xx() const { - return gpu_version == MaliGPU::T604 || gpu_version == MaliGPU::T622 || - gpu_version == MaliGPU::T624 || gpu_version == MaliGPU::T628 || - gpu_version == MaliGPU::T658 || gpu_version == MaliGPU::T678; -} - -bool MaliInfo::IsMaliT7xx() const { - return gpu_version == MaliGPU::T720 || gpu_version == MaliGPU::T760; -} - -bool MaliInfo::IsMaliT8xx() const { - return gpu_version == MaliGPU::T820 || gpu_version == MaliGPU::T830 || - gpu_version == MaliGPU::T860 || gpu_version == MaliGPU::T880; -} - -bool MaliInfo::IsMidgard() const { - return IsMaliT6xx() || IsMaliT7xx() || IsMaliT8xx(); -} - -bool MaliInfo::IsBifrostGen1() const { - return gpu_version == MaliGPU::G31 || gpu_version == MaliGPU::G51 || - gpu_version == MaliGPU::G71; -} - -bool MaliInfo::IsBifrostGen2() const { - return gpu_version == MaliGPU::G52 || gpu_version == MaliGPU::G72; -} - -bool MaliInfo::IsBifrostGen3() const { return gpu_version == MaliGPU::G76; } - -bool MaliInfo::IsBifrost() const { - return IsBifrostGen1() || IsBifrostGen2() || IsBifrostGen3(); -} - -bool MaliInfo::IsValhall() const { - return gpu_version == MaliGPU::G57 || gpu_version == MaliGPU::G77; -} - -DeviceInfo::DeviceInfo(cl_device_id id) { +DeviceInfo DeviceInfoFromDeviceID(cl_device_id id) { + DeviceInfo info; const auto device_name = GetDeviceInfo(id, CL_DEVICE_NAME); const auto vendor_name = GetDeviceInfo(id, CL_DEVICE_VENDOR); const auto opencl_c_version = GetDeviceInfo(id, CL_DEVICE_OPENCL_C_VERSION); - vendor = ParseVendor(device_name, vendor_name); - if (vendor == Vendor::QUALCOMM) { - adreno_info = AdrenoInfo(opencl_c_version); - } else if (vendor == Vendor::MALI) { - mali_info = MaliInfo(device_name); + info.vendor = ParseVendor(device_name, vendor_name); + if (info.vendor == Vendor::kQualcomm) { + info.adreno_info = AdrenoInfo(opencl_c_version); + } else if (info.vendor == Vendor::kMali) { + info.mali_info = MaliInfo(device_name); } - cl_version = ParseCLVersion(opencl_c_version); - extensions = + info.cl_version = ParseCLVersion(opencl_c_version); + info.extensions = absl::StrSplit(GetDeviceInfo(id, CL_DEVICE_EXTENSIONS), ' '); - supports_fp16 = false; - supports_image3d_writes = false; - for (const auto& ext : extensions) { + info.supports_fp16 = false; + info.supports_image3d_writes = false; + for (const auto& ext : info.extensions) { if (ext == "cl_khr_fp16") { - supports_fp16 = true; + info.supports_fp16 = true; } if (ext == "cl_khr_3d_image_writes") { - supports_image3d_writes = true; + info.supports_image3d_writes = true; } } - f32_config = + cl_device_fp_config f32_config = GetDeviceInfo(id, CL_DEVICE_SINGLE_FP_CONFIG); - supports_fp32_rtn = f32_config & CL_FP_ROUND_TO_NEAREST; + info.supports_fp32_rtn = f32_config & CL_FP_ROUND_TO_NEAREST; - if (supports_fp16) { + if (info.supports_fp16) { + cl_device_fp_config f16_config; auto status = GetDeviceInfo( id, CL_DEVICE_HALF_FP_CONFIG, &f16_config); // AMD supports cl_khr_fp16 but CL_DEVICE_HALF_FP_CONFIG is empty. - if (status.ok() && vendor != Vendor::AMD) { - supports_fp16_rtn = f16_config & CL_FP_ROUND_TO_NEAREST; + if (status.ok() && info.vendor != Vendor::kAMD) { + info.supports_fp16_rtn = f16_config & CL_FP_ROUND_TO_NEAREST; } else { // happens on PowerVR f16_config = f32_config; - supports_fp16_rtn = supports_fp32_rtn; + info.supports_fp16_rtn = info.supports_fp32_rtn; } } else { - f16_config = 0; - supports_fp16_rtn = false; + info.supports_fp16_rtn = false; } - if (vendor == Vendor::POWERVR && !supports_fp16) { + if (info.vendor == Vendor::kPowerVR && !info.supports_fp16) { // PowerVR doesn't have full support of fp16 and so doesn't list this // extension. But it can support fp16 in MADs and as buffers/textures types, // so we will use it. - supports_fp16 = true; - f16_config = f32_config; - supports_fp16_rtn = supports_fp32_rtn; + info.supports_fp16 = true; + info.supports_fp16_rtn = info.supports_fp32_rtn; } - if (!supports_image3d_writes && - ((vendor == Vendor::QUALCOMM && - IsGPUVersionInRange(adreno_info.gpu_version, 400, 500)) || - vendor == Vendor::NVIDIA)) { + if (!info.supports_image3d_writes && + ((info.vendor == Vendor::kQualcomm && + IsGPUVersionInRange(info.adreno_info.gpu_version, 400, 500)) || + info.vendor == Vendor::kNvidia)) { // in local tests Adreno 430 can write in image 3d, at least on small sizes, // but it doesn't have cl_khr_3d_image_writes in list of available // extensions // The same for NVidia - supports_image3d_writes = true; + info.supports_image3d_writes = true; } - compute_units_count = GetDeviceInfo(id, CL_DEVICE_MAX_COMPUTE_UNITS); - image2d_max_width = GetDeviceInfo(id, CL_DEVICE_IMAGE2D_MAX_WIDTH); - image2d_max_height = GetDeviceInfo(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT); - buffer_max_size = GetDeviceInfo(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE); - if (cl_version >= OpenCLVersion::CL_1_2) { - image_buffer_max_size = + info.compute_units_count = + GetDeviceInfo(id, CL_DEVICE_MAX_COMPUTE_UNITS); + info.image2d_max_width = + GetDeviceInfo(id, CL_DEVICE_IMAGE2D_MAX_WIDTH); + info.image2d_max_height = + GetDeviceInfo(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT); + info.buffer_max_size = + GetDeviceInfo(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE); + if (info.cl_version >= OpenCLVersion::CL_1_2) { + info.image_buffer_max_size = GetDeviceInfo(id, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE); - image_array_max_layers = + info.image_array_max_layers = GetDeviceInfo(id, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE); } - image3d_max_width = GetDeviceInfo(id, CL_DEVICE_IMAGE3D_MAX_WIDTH); - image3d_max_height = GetDeviceInfo(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT); - image3d_max_depth = GetDeviceInfo(id, CL_DEVICE_IMAGE3D_MAX_DEPTH); + info.image3d_max_width = + GetDeviceInfo(id, CL_DEVICE_IMAGE3D_MAX_WIDTH); + info.image3d_max_height = + GetDeviceInfo(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT); + info.image3d_max_depth = + GetDeviceInfo(id, CL_DEVICE_IMAGE3D_MAX_DEPTH); + int3 max_work_group_sizes; GetDeviceWorkDimsSizes(id, &max_work_group_sizes); + info.max_work_group_size_x = max_work_group_sizes.x; + info.max_work_group_size_y = max_work_group_sizes.y; + info.max_work_group_size_z = max_work_group_sizes.z; + return info; } -bool DeviceInfo::SupportsTextureArray() const { - return cl_version >= OpenCLVersion::CL_1_2; -} - -bool DeviceInfo::SupportsImageBuffer() const { - return cl_version >= OpenCLVersion::CL_1_2; -} - -bool DeviceInfo::SupportsImage3D() const { - if (vendor == Vendor::MALI) { - // On Mali T880 read_imageh doesn't compile with image3d_t - return false; - } - return supports_image3d_writes; -} - -bool DeviceInfo::IsAdreno() const { return vendor == Vendor::QUALCOMM; } - -bool DeviceInfo::IsAdreno3xx() const { - return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 300, 400); -} - -bool DeviceInfo::IsAdreno4xx() const { - return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 400, 500); -} - -bool DeviceInfo::IsAdreno5xx() const { - return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 500, 600); -} - -bool DeviceInfo::IsAdreno6xx() const { - return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 600, 700); -} - -bool DeviceInfo::IsAdreno6xxOrHigher() const { - return IsAdreno() && adreno_info.gpu_version >= 600; -} - -bool DeviceInfo::IsPowerVR() const { return vendor == Vendor::POWERVR; } - -bool DeviceInfo::IsNvidia() const { return vendor == Vendor::NVIDIA; } - -bool DeviceInfo::IsMali() const { return vendor == Vendor::MALI; } - -bool DeviceInfo::IsAMD() const { return vendor == Vendor::AMD; } - -bool DeviceInfo::IsIntel() const { return vendor == Vendor::INTEL; } - CLDevice::CLDevice(cl_device_id id, cl_platform_id platform_id) - : id_(id), platform_id_(platform_id), info_(id) {} + : id_(id), platform_id_(platform_id), info_(DeviceInfoFromDeviceID(id)) {} CLDevice::CLDevice(const CLDevice& device) : id_(device.id_), platform_id_(device.platform_id_), info_(device.info_) {} diff --git a/tensorflow/lite/delegates/gpu/cl/cl_device.h b/tensorflow/lite/delegates/gpu/cl/cl_device.h index 217111c475d..7e4792b0a53 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_device.h +++ b/tensorflow/lite/delegates/gpu/cl/cl_device.h @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "tensorflow/lite/delegates/gpu/cl/device_info.h" #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h" #include "tensorflow/lite/delegates/gpu/cl/util.h" #include "tensorflow/lite/delegates/gpu/common/status.h" @@ -28,139 +29,6 @@ namespace tflite { namespace gpu { namespace cl { -enum class Vendor { QUALCOMM, MALI, POWERVR, NVIDIA, AMD, INTEL, UNKNOWN }; -std::string VendorToString(Vendor v); - -enum class OpenCLVersion { - CL_1_0, - CL_1_1, - CL_1_2, - CL_2_0, - CL_2_1, - CL_2_2, - CL_3_0 -}; -std::string OpenCLVersionToString(OpenCLVersion version); - -// for use only in cl_device.cc, but putted here to make tests -int GetAdrenoGPUVersion(const std::string& gpu_version); - -struct AdrenoInfo { - AdrenoInfo() = default; - explicit AdrenoInfo(const std::string& device_version); - int gpu_version = -1; // can be, for example, 405/430/540/530/630 etc. - - // This function returns some not very documented physical parameter of - // Adreno6xx GPU. - // We obtained it using Snapdragon Profiler. - int GetMaximumWavesCount() const; - - // returns amount of register memory per CU(Compute Unit) in bytes. - int GetRegisterMemorySizePerComputeUnit() const; - - // returns maximum possible amount of waves based on register usage. - int GetMaximumWavesCount(int register_footprint_per_tread, - bool full_wave = true) const; - - int GetWaveSize(bool full_wave) const; - - // Not supported on some Adreno devices with specific driver version. - // b/131099086 - bool support_one_layer_texture_array = true; -}; - -enum class MaliGPU { - T604, - T622, - T624, - T628, - T658, - T678, - T720, - T760, - T820, - T830, - T860, - T880, - G31, - G51, - G71, - G52, - G72, - G76, - G57, - G77, - UNKNOWN -}; - -struct MaliInfo { - MaliInfo() = default; - explicit MaliInfo(const std::string& device_name); - MaliGPU gpu_version; - - bool IsMaliT6xx() const; - bool IsMaliT7xx() const; - bool IsMaliT8xx() const; - bool IsMidgard() const; - bool IsBifrostGen1() const; - bool IsBifrostGen2() const; - bool IsBifrostGen3() const; - bool IsBifrost() const; - bool IsValhall() const; -}; - -struct DeviceInfo { - DeviceInfo() = default; - explicit DeviceInfo(cl_device_id id); - - bool IsAdreno() const; - bool IsAdreno3xx() const; - bool IsAdreno4xx() const; - bool IsAdreno5xx() const; - bool IsAdreno6xx() const; - bool IsAdreno6xxOrHigher() const; - bool IsPowerVR() const; - bool IsNvidia() const; - bool IsMali() const; - bool IsAMD() const; - bool IsIntel() const; - - bool SupportsTextureArray() const; - bool SupportsImageBuffer() const; - bool SupportsImage3D() const; - - std::vector extensions; - bool supports_fp16; - bool supports_image3d_writes; - Vendor vendor; - OpenCLVersion cl_version; - int compute_units_count; - uint64_t buffer_max_size; - uint64_t image2d_max_width; - uint64_t image2d_max_height; - uint64_t image_buffer_max_size; - uint64_t image_array_max_layers; - uint64_t image3d_max_width; - uint64_t image3d_max_height; - uint64_t image3d_max_depth; - int3 max_work_group_sizes; - - cl_device_fp_config f32_config; - // valid only with cl_khr_fp16 - cl_device_fp_config f16_config; - - // rtn is ROUND_TO_NEAREST - // with rtn precision is much better then with rtz (ROUND_TO_ZERO) - // Adreno 3xx supports only rtz, Adreno 4xx and more support rtn - // Mali from T6xx supports rtn - // PowerVR supports only rtz - bool supports_fp32_rtn; - bool supports_fp16_rtn; - - AdrenoInfo adreno_info; - MaliInfo mali_info; -}; - // A wrapper around opencl device id class CLDevice { public: diff --git a/tensorflow/lite/delegates/gpu/cl/device_info.cc b/tensorflow/lite/delegates/gpu/cl/device_info.cc new file mode 100644 index 00000000000..7e0acb87ab7 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/cl/device_info.cc @@ -0,0 +1,268 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/delegates/gpu/cl/device_info.h" + +#include +#include +#include + +#include "absl/strings/numbers.h" +#include "absl/strings/str_split.h" + +namespace tflite { +namespace gpu { +namespace cl { +namespace { +// check that gpu_version belong to range min_version-max_version +// min_version is included and max_version is excluded. +bool IsGPUVersionInRange(int gpu_version, int min_version, int max_version) { + return gpu_version >= min_version && gpu_version < max_version; +} + +MaliGPU GetMaliGPUVersion(const std::string& device_name) { + const std::map kMapping = { + {"T604", MaliGPU::T604}, {"T622", MaliGPU::T622}, {"T624", MaliGPU::T624}, + {"T628", MaliGPU::T628}, {"T658", MaliGPU::T658}, {"T678", MaliGPU::T678}, + {"T720", MaliGPU::T720}, {"T760", MaliGPU::T760}, {"T820", MaliGPU::T820}, + {"T830", MaliGPU::T830}, {"T860", MaliGPU::T860}, {"T880", MaliGPU::T880}, + {"G31", MaliGPU::G31}, {"G51", MaliGPU::G51}, {"G71", MaliGPU::G71}, + {"G52", MaliGPU::G52}, {"G72", MaliGPU::G72}, {"G76", MaliGPU::G76}, + {"G57", MaliGPU::G57}, {"G77", MaliGPU::G77}, + }; + for (const auto& v : kMapping) { + if (device_name.find(v.first) != std::string::npos) { + return v.second; + } + } + return MaliGPU::UNKNOWN; +} + +} // namespace + +// There is no rule for gpu version encoding, but we found these samples: +// Version: OpenCL C 2.0 Adreno(TM) 540 // Pixel 2 +// Version: OpenCL C 2.0 Adreno(TM) 630 // Sony Compact XZ2 +// Version: OpenCL C 2.0 Adreno(TM) 630 // Pixel 3 +// Version: OpenCL C 2.0 Adreno(TM) 540 // Samsung S8 +// Version: OpenCL C 1.2 Adreno(TM) 430 // HTC One M9 +// Version: OpenCL C 2.0 Adreno(TM) 530 // Samsung S7 Edge +// Version: OpenCL C 1.2 Adreno(TM) 405 // Motorola Moto G(4) +// After the number string ends. +// It is assumed that the for Adreno GPUs has +// the following format: +// Adreno(TM) +// Returns -1 if vendor-specific information cannot be parsed +int GetAdrenoGPUVersion(const std::string& gpu_version) { + const std::string gpu = absl::AsciiStrToLower(gpu_version); + const std::vector words = absl::StrSplit(gpu, ' '); + int i = 0; + for (; i < words.size(); ++i) { + if (words[i].find("adreno") != words[i].npos) { + break; + } + } + i += 1; + for (; i < words.size(); ++i) { + int number; + bool is_number = absl::SimpleAtoi(words[i], &number); + // Adreno GPUs starts from 2xx, but opencl support should be only from 3xx + if (is_number && number >= 300) { + return number; + } + } + return -1; +} + +std::string VendorToString(Vendor v) { + switch (v) { + case Vendor::kQualcomm: + return "Qualcomm"; + case Vendor::kMali: + return "Mali"; + case Vendor::kPowerVR: + return "PowerVR"; + case Vendor::kNvidia: + return "NVIDIA"; + case Vendor::kAMD: + return "AMD"; + case Vendor::kIntel: + return "Intel"; + case Vendor::kUnknown: + return "unknown vendor"; + } +} + +std::string OpenCLVersionToString(OpenCLVersion version) { + switch (version) { + case OpenCLVersion::CL_1_0: + return "1.0"; + case OpenCLVersion::CL_1_1: + return "1.1"; + case OpenCLVersion::CL_1_2: + return "1.2"; + case OpenCLVersion::CL_2_0: + return "2.0"; + case OpenCLVersion::CL_2_1: + return "2.1"; + case OpenCLVersion::CL_2_2: + return "2.2"; + case OpenCLVersion::CL_3_0: + return "3.0"; + } +} + +AdrenoInfo::AdrenoInfo(const std::string& device_version) + : gpu_version(GetAdrenoGPUVersion(device_version)) {} + +int AdrenoInfo::GetMaximumWavesCount() const { + if (gpu_version < 400) { + return -1; // Adreno 3xx does not support it currently + } else if (gpu_version >= 400 && gpu_version < 500) { + return -1; // Adreno 4xx does not support it currently + } else if (gpu_version >= 500 && gpu_version < 600) { + return -1; // Adreno 5xx does not support it currently + } else if (gpu_version >= 600 && gpu_version < 700) { + return gpu_version == 640 ? 30 : 16; + } else { + return -1; // Adreno 7xx and higher does not exist yet + } +} + +int AdrenoInfo::GetRegisterMemorySizePerComputeUnit() const { + if (gpu_version < 400) { + return -1; // Adreno 3xx does not support it currently + } else if (gpu_version >= 400 && gpu_version < 500) { + return -1; // Adreno 4xx does not support it currently + } else if (gpu_version >= 500 && gpu_version < 600) { + return -1; // Adreno 5xx does not support it currently + } else if (gpu_version >= 600 && gpu_version < 700) { + return gpu_version == 640 ? 128 * 144 * 16 : 128 * 96 * 16; + } else { + return -1; // Adreno 7xx and higher does not exist yet + } +} + +int AdrenoInfo::GetMaximumWavesCount(int register_footprint_per_tread, + bool full_wave) const { + const int register_usage_per_wave = + GetWaveSize(full_wave) * register_footprint_per_tread; + const int possible_waves_count = + GetRegisterMemorySizePerComputeUnit() / register_usage_per_wave; + return std::min(possible_waves_count, GetMaximumWavesCount()); +} + +int AdrenoInfo::GetWaveSize(bool full_wave) const { + if (gpu_version < 400) { + return -1; // Adreno 3xx does not support it currently + } else if (gpu_version < 600) { + return full_wave ? 64 : 32; + } else { + return full_wave ? 128 : 64; + } +} + +MaliInfo::MaliInfo(const std::string& device_name) + : gpu_version(GetMaliGPUVersion(device_name)) {} + +bool MaliInfo::IsMaliT6xx() const { + return gpu_version == MaliGPU::T604 || gpu_version == MaliGPU::T622 || + gpu_version == MaliGPU::T624 || gpu_version == MaliGPU::T628 || + gpu_version == MaliGPU::T658 || gpu_version == MaliGPU::T678; +} + +bool MaliInfo::IsMaliT7xx() const { + return gpu_version == MaliGPU::T720 || gpu_version == MaliGPU::T760; +} + +bool MaliInfo::IsMaliT8xx() const { + return gpu_version == MaliGPU::T820 || gpu_version == MaliGPU::T830 || + gpu_version == MaliGPU::T860 || gpu_version == MaliGPU::T880; +} + +bool MaliInfo::IsMidgard() const { + return IsMaliT6xx() || IsMaliT7xx() || IsMaliT8xx(); +} + +bool MaliInfo::IsBifrostGen1() const { + return gpu_version == MaliGPU::G31 || gpu_version == MaliGPU::G51 || + gpu_version == MaliGPU::G71; +} + +bool MaliInfo::IsBifrostGen2() const { + return gpu_version == MaliGPU::G52 || gpu_version == MaliGPU::G72; +} + +bool MaliInfo::IsBifrostGen3() const { return gpu_version == MaliGPU::G76; } + +bool MaliInfo::IsBifrost() const { + return IsBifrostGen1() || IsBifrostGen2() || IsBifrostGen3(); +} + +bool MaliInfo::IsValhall() const { + return gpu_version == MaliGPU::G57 || gpu_version == MaliGPU::G77; +} + +bool DeviceInfo::SupportsTextureArray() const { + return cl_version >= OpenCLVersion::CL_1_2; +} + +bool DeviceInfo::SupportsImageBuffer() const { + return cl_version >= OpenCLVersion::CL_1_2; +} + +bool DeviceInfo::SupportsImage3D() const { + if (vendor == Vendor::kMali) { + // On Mali T880 read_imageh doesn't compile with image3d_t + return false; + } + return supports_image3d_writes; +} + +bool DeviceInfo::IsAdreno() const { return vendor == Vendor::kQualcomm; } + +bool DeviceInfo::IsAdreno3xx() const { + return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 300, 400); +} + +bool DeviceInfo::IsAdreno4xx() const { + return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 400, 500); +} + +bool DeviceInfo::IsAdreno5xx() const { + return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 500, 600); +} + +bool DeviceInfo::IsAdreno6xx() const { + return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 600, 700); +} + +bool DeviceInfo::IsAdreno6xxOrHigher() const { + return IsAdreno() && adreno_info.gpu_version >= 600; +} + +bool DeviceInfo::IsPowerVR() const { return vendor == Vendor::kPowerVR; } + +bool DeviceInfo::IsNvidia() const { return vendor == Vendor::kNvidia; } + +bool DeviceInfo::IsMali() const { return vendor == Vendor::kMali; } + +bool DeviceInfo::IsAMD() const { return vendor == Vendor::kAMD; } + +bool DeviceInfo::IsIntel() const { return vendor == Vendor::kIntel; } + +} // namespace cl +} // namespace gpu +} // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/cl/device_info.h b/tensorflow/lite/delegates/gpu/cl/device_info.h new file mode 100644 index 00000000000..b13fe3df846 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/cl/device_info.h @@ -0,0 +1,168 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_DEVICE_INFO_H_ +#define TENSORFLOW_LITE_DELEGATES_GPU_CL_DEVICE_INFO_H_ + +#include +#include + +// for use only in device_info.cc, but keep here to make tests +int GetAdrenoGPUVersion(const std::string& gpu_version); + +namespace tflite { +namespace gpu { +namespace cl { + +enum class Vendor { + kQualcomm, + kMali, + kPowerVR, + kNvidia, + kAMD, + kIntel, + kUnknown +}; +std::string VendorToString(Vendor v); + +enum class OpenCLVersion { + CL_1_0, + CL_1_1, + CL_1_2, + CL_2_0, + CL_2_1, + CL_2_2, + CL_3_0 +}; +std::string OpenCLVersionToString(OpenCLVersion version); + +struct AdrenoInfo { + AdrenoInfo() = default; + explicit AdrenoInfo(const std::string& device_version); + int gpu_version = -1; // can be, for example, 405/430/540/530/630 etc. + + // This function returns some not very documented physical parameter of + // Adreno6xx GPU. + // We obtained it using Snapdragon Profiler. + int GetMaximumWavesCount() const; + + // returns amount of register memory per CU(Compute Unit) in bytes. + int GetRegisterMemorySizePerComputeUnit() const; + + // returns maximum possible amount of waves based on register usage. + int GetMaximumWavesCount(int register_footprint_per_tread, + bool full_wave = true) const; + + int GetWaveSize(bool full_wave) const; + + // Not supported on some Adreno devices with specific driver version. + // b/131099086 + bool support_one_layer_texture_array = true; +}; + +enum class MaliGPU { + T604, + T622, + T624, + T628, + T658, + T678, + T720, + T760, + T820, + T830, + T860, + T880, + G31, + G51, + G71, + G52, + G72, + G76, + G57, + G77, + UNKNOWN +}; + +struct MaliInfo { + MaliInfo() = default; + explicit MaliInfo(const std::string& device_name); + MaliGPU gpu_version; + + bool IsMaliT6xx() const; + bool IsMaliT7xx() const; + bool IsMaliT8xx() const; + bool IsMidgard() const; + bool IsBifrostGen1() const; + bool IsBifrostGen2() const; + bool IsBifrostGen3() const; + bool IsBifrost() const; + bool IsValhall() const; +}; + +struct DeviceInfo { + DeviceInfo() = default; + + bool IsAdreno() const; + bool IsAdreno3xx() const; + bool IsAdreno4xx() const; + bool IsAdreno5xx() const; + bool IsAdreno6xx() const; + bool IsAdreno6xxOrHigher() const; + bool IsPowerVR() const; + bool IsNvidia() const; + bool IsMali() const; + bool IsAMD() const; + bool IsIntel() const; + + bool SupportsTextureArray() const; + bool SupportsImageBuffer() const; + bool SupportsImage3D() const; + + std::vector extensions; + bool supports_fp16; + bool supports_image3d_writes; + Vendor vendor; + OpenCLVersion cl_version; + int compute_units_count; + uint64_t buffer_max_size; + uint64_t image2d_max_width; + uint64_t image2d_max_height; + uint64_t image_buffer_max_size; + uint64_t image_array_max_layers; + uint64_t image3d_max_width; + uint64_t image3d_max_height; + uint64_t image3d_max_depth; + int max_work_group_size_x; + int max_work_group_size_y; + int max_work_group_size_z; + + // rtn is ROUND_TO_NEAREST + // with rtn precision is much better then with rtz (ROUND_TO_ZERO) + // Adreno 3xx supports only rtz, Adreno 4xx and more support rtn + // Mali from T6xx supports rtn + // PowerVR supports only rtz + bool supports_fp32_rtn; + bool supports_fp16_rtn; + + AdrenoInfo adreno_info; + MaliInfo mali_info; +}; + +} // namespace cl +} // namespace gpu +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_DEVICE_INFO_H_ diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc index b93fe113d89..ed1ec8be7b1 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc @@ -37,7 +37,7 @@ int GetAdrenoOptimalMaxConstantSize(int gpu_version) { } int GetOptimalMaxConstantSize(const DeviceInfo& info) { - if (info.vendor != Vendor::QUALCOMM) { + if (!info.IsAdreno()) { // In general we do not expect that this kernel will be used with non Adreno // so as it tuned for __constant memory that have big profit on Adreno return 1024; // 1KB diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc index 5e280d5f98b..3771a5b033a 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc @@ -80,9 +80,12 @@ absl::Status GetBestWorkGroupAlignedToGrid(const TuningParameters& params, const int3& grid, int3* best_work_group) { std::vector work_groups; + int3 max_wg_size; + max_wg_size.x = params.info->max_work_group_size_x; + max_wg_size.y = params.info->max_work_group_size_y; + max_wg_size.z = params.info->max_work_group_size_z; RETURN_IF_ERROR(GenerateWorkGroupSizesAlignedToGrid( - grid, params.info->max_work_group_sizes, kernel.GetMaxWorkGroupSize(), - &work_groups)); + grid, max_wg_size, kernel.GetMaxWorkGroupSize(), &work_groups)); int best_work_group_index; RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex( kernel, *params.info, grid, work_groups, &best_work_group_index)); @@ -268,10 +271,10 @@ absl::Status GetBestWorkGroupConv(const TuningParameters& params, switch (params.tuning_type) { case TuningType::FAST: { int max_z_size = 16; - if (params.info->vendor == Vendor::QUALCOMM) { + if (params.info->IsAdreno()) { max_z_size = params.info->adreno_info.gpu_version < 400 ? 16 : 64; } - max_z_size = std::min(max_z_size, params.info->max_work_group_sizes.z); + max_z_size = std::min(max_z_size, params.info->max_work_group_size_z); *best_work_group = GetWorkGroupConv(grid, kernel.GetMaxWorkGroupSize(), max_z_size); return absl::OkStatus(); diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc index 3e2531c02b3..b577757057e 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc @@ -167,22 +167,21 @@ absl::Status SelectConvolution(const Convolution2DAttributes& attr, const CreationContext& creation_context, const OperationDef& op_def, ModelHints hints, std::unique_ptr* ptr) { - switch (creation_context.device->vendor()) { - case Vendor::QUALCOMM: - return SelectConvolutionAdreno(attr, dst_shape, creation_context, op_def, + const auto& device_info = creation_context.device->GetInfo(); + if (device_info.IsAdreno()) { + return SelectConvolutionAdreno(attr, dst_shape, creation_context, op_def, hints, ptr); - case Vendor::POWERVR: - case Vendor::INTEL: - case Vendor::AMD: - return SelectConvolutionPowerVR(attr, creation_context, op_def, ptr); - case Vendor::NVIDIA: - return SelectConvolutionNVidia(attr, dst_shape, creation_context, op_def, + } else if (device_info.IsPowerVR() || device_info.IsAMD() || + device_info.IsIntel()) { + return SelectConvolutionPowerVR(attr, creation_context, op_def, ptr); + } else if (device_info.IsNvidia()) { + return SelectConvolutionNVidia(attr, dst_shape, creation_context, op_def, ptr); - case Vendor::MALI: - return SelectConvolutionMali(attr, dst_shape, creation_context, op_def, + } else if (device_info.IsMali()) { + return SelectConvolutionMali(attr, dst_shape, creation_context, op_def, ptr); - default: - return SelectConvolutionAdreno(attr, dst_shape, creation_context, op_def, + } else { + return SelectConvolutionAdreno(attr, dst_shape, creation_context, op_def, hints, ptr); } } @@ -191,25 +190,22 @@ absl::Status SelectConvolutionForWinograd( const Convolution2DAttributes& attr, const BHWC& dst_shape, const CreationContext& creation_context, const OperationDef& op_def, ModelHints hints, std::unique_ptr* ptr) { - switch (creation_context.device->vendor()) { - case Vendor::QUALCOMM: - return SelectConvolutionWinogradAdreno(attr, dst_shape, creation_context, + const auto& device_info = creation_context.device->GetInfo(); + if (device_info.IsAdreno()) { + return SelectConvolutionWinogradAdreno(attr, dst_shape, creation_context, op_def, hints, ptr); - case Vendor::POWERVR: - case Vendor::AMD: - case Vendor::INTEL: - case Vendor::NVIDIA: { - ConvPowerVR conv; + } else if (device_info.IsPowerVR() || device_info.IsAMD() || + device_info.IsNvidia() || device_info.IsIntel()) { + ConvPowerVR conv; RETURN_IF_ERROR(CreateConvPowerVRWino4x4To6x6(creation_context, op_def, attr, &conv, &dst_shape)); *ptr = absl::make_unique(std::move(conv)); return absl::OkStatus(); - } - case Vendor::MALI: - return SelectConvolutionWinogradMali(attr, dst_shape, creation_context, + } else if (device_info.IsMali()) { + return SelectConvolutionWinogradMali(attr, dst_shape, creation_context, op_def, ptr); - default: - return SelectConvolutionWinogradAdreno(attr, dst_shape, creation_context, + } else { + return SelectConvolutionWinogradAdreno(attr, dst_shape, creation_context, op_def, hints, ptr); } } @@ -219,23 +215,22 @@ absl::Status SelectConvolutionWithDynamicWeights( const BHWC& dst_shape, const CreationContext& creation_context, const OperationDef& op_def, ModelHints hints, std::unique_ptr* ptr, ConvWeightsDescription* weights_desc) { - switch (creation_context.device->vendor()) { - case Vendor::QUALCOMM: - return SelectConvolutionDynamicWeightsAdreno( - attr, weights_shape, dst_shape, creation_context, op_def, hints, ptr, - weights_desc); - case Vendor::MALI: - return SelectConvolutionDynamicWeightsMali(attr, weights_shape, dst_shape, + const auto& device_info = creation_context.device->GetInfo(); + if (device_info.IsAdreno()) { + return SelectConvolutionDynamicWeightsAdreno(attr, weights_shape, dst_shape, creation_context, op_def, hints, ptr, weights_desc); - default: { - ConvPowerVR conv; - RETURN_IF_ERROR(CreateConvPowerVRDynamicWeights( - creation_context, op_def, attr, weights_shape, &conv, &dst_shape)); - *weights_desc = conv.GetConvWeightsDescription(); - *ptr = absl::make_unique(std::move(conv)); - return absl::OkStatus(); - } + } else if (device_info.IsMali()) { + return SelectConvolutionDynamicWeightsMali(attr, weights_shape, dst_shape, + creation_context, op_def, hints, + ptr, weights_desc); + } else { + ConvPowerVR conv; + RETURN_IF_ERROR(CreateConvPowerVRDynamicWeights( + creation_context, op_def, attr, weights_shape, &conv, &dst_shape)); + *weights_desc = conv.GetConvWeightsDescription(); + *ptr = absl::make_unique(std::move(conv)); + return absl::OkStatus(); } } diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc index 5fdfdca073e..56864f2c575 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc @@ -105,22 +105,19 @@ absl::Status SelectConvolutionTransposed( const ConvolutionTransposedAttributes& attr, const CreationContext& creation_context, const OperationDef& op_def, std::unique_ptr* ptr) { - switch (creation_context.device->vendor()) { - case Vendor::QUALCOMM: - return SelectConvolutionTransposedAdreno(attr, creation_context, op_def, - ptr); - case Vendor::POWERVR: - case Vendor::NVIDIA: - case Vendor::AMD: - case Vendor::INTEL: - return SelectConvolutionTransposedPowerVR(attr, creation_context, op_def, - ptr); - case Vendor::MALI: - return SelectConvolutionTransposedMali(attr, creation_context, op_def, + const auto& device_info = creation_context.device->GetInfo(); + if (device_info.IsAdreno()) { + return SelectConvolutionTransposedAdreno(attr, creation_context, op_def, + ptr); + } else if (device_info.IsPowerVR() || device_info.IsAMD() || + device_info.IsNvidia() || device_info.IsIntel()) { + return SelectConvolutionTransposedPowerVR(attr, creation_context, op_def, + ptr); + } else if (device_info.IsMali()) { + return SelectConvolutionTransposedMali(attr, creation_context, op_def, ptr); + } else { + return SelectConvolutionTransposedAdreno(attr, creation_context, op_def, ptr); - default: - return SelectConvolutionTransposedAdreno(attr, creation_context, op_def, - ptr); } } diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc index 54ff45d182a..fafd9078f6f 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc @@ -90,15 +90,15 @@ absl::Status SelectDWConvolution(const DepthwiseConvolution2DAttributes& attr, const CreationContext& creation_context, const OperationDef& op_def, std::unique_ptr* ptr) { - switch (creation_context.device->vendor()) { - case Vendor::QUALCOMM: - return SelectDWConvolutionAdreno(attr, creation_context, op_def, ptr); - case Vendor::POWERVR: - return SelectDWConvolutionPowerVR(attr, creation_context, op_def, ptr); - case Vendor::MALI: - return SelectDWConvolutionMali(attr, creation_context, op_def, ptr); - default: - return SelectDWConvolutionAdreno(attr, creation_context, op_def, ptr); + const auto& device_info = creation_context.device->GetInfo(); + if (device_info.IsAdreno()) { + return SelectDWConvolutionAdreno(attr, creation_context, op_def, ptr); + } else if (device_info.IsPowerVR()) { + return SelectDWConvolutionPowerVR(attr, creation_context, op_def, ptr); + } else if (device_info.IsMali()) { + return SelectDWConvolutionMali(attr, creation_context, op_def, ptr); + } else { + return SelectDWConvolutionAdreno(attr, creation_context, op_def, ptr); } } diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc index eacbea8b586..cb967e45b52 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc @@ -104,22 +104,20 @@ absl::Status SelectFullyConnected(const FullyConnectedAttributes& attr, const CreationContext& creation_context, const OperationDef& op_def, int batch_size, std::unique_ptr* ptr) { - switch (creation_context.device->vendor()) { - case Vendor::QUALCOMM: - return SelectFullyConnectedAdreno(attr, creation_context, op_def, - batch_size, ptr); - case Vendor::POWERVR: - case Vendor::AMD: - case Vendor::NVIDIA: - case Vendor::INTEL: - return SelectFullyConnectedPowerVR(attr, creation_context, op_def, - batch_size, ptr); - case Vendor::MALI: - return SelectFullyConnectedMali(attr, creation_context, op_def, + const auto& device_info = creation_context.device->GetInfo(); + if (device_info.IsAdreno()) { + return SelectFullyConnectedAdreno(attr, creation_context, op_def, batch_size, ptr); - default: - return SelectFullyConnectedGeneric(attr, creation_context, op_def, - batch_size, ptr); + } else if (device_info.IsPowerVR() || device_info.IsAMD() || + device_info.IsNvidia() || device_info.IsIntel()) { + return SelectFullyConnectedPowerVR(attr, creation_context, op_def, + batch_size, ptr); + } else if (device_info.IsMali()) { + return SelectFullyConnectedMali(attr, creation_context, op_def, batch_size, + ptr); + } else { + return SelectFullyConnectedGeneric(attr, creation_context, op_def, + batch_size, ptr); } } From a04564d58816e10bcedd1b9911605fb1d878781d Mon Sep 17 00:00:00 2001 From: Jonathan Chu Date: Tue, 4 Aug 2020 00:58:27 +0000 Subject: [PATCH 2023/2522] Modify _define_function_with_shape_relaxation, remove some calls to nest.flatten --- tensorflow/python/eager/function.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index d9b141e049a..6febd72b54f 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -3198,10 +3198,16 @@ class Function(object): shared_func_graph=False) return graph_function - def _define_function_with_shape_relaxation(self, args, kwargs): + def _define_function_with_shape_relaxation(self, + args, + kwargs, + flat_args, + flat_kwargs): """Define a function, relaxing arg shapes to avoid unnecessary retracing.""" + flat_args_all = nest.flatten((args, kwargs), expand_composites=False) + any_composite_args = any(isinstance(x, composite_tensor.CompositeTensor) - for x in nest.flatten((args, kwargs))) + for x in flat_args_all) # Build a cache key where TensorShapes include only rank information (and # not information about the size of each dimension). @@ -3216,7 +3222,7 @@ class Function(object): rank_only_cache_key = self._cache_key( cache_key_args, cache_key_kwargs, include_tensor_ranks_only=True) - arg_specs = [_type_spec_for(x) for x in nest.flatten((args, kwargs))] + arg_specs = [_type_spec_for(x) for x in flat_args_all] relaxed_arg_specs = self._function_cache.arg_relaxed_specs.get( rank_only_cache_key, None) relaxed_arg_function = self._function_cache.arg_relaxed.get( @@ -3225,7 +3231,7 @@ class Function(object): if (relaxed_arg_function is not None and all(_is_type_subset(x, y) for (x, y) in zip(relaxed_arg_specs, arg_specs))): - return relaxed_arg_function, args, kwargs + return relaxed_arg_function, flat_args, flat_kwargs if relaxed_arg_specs is None: relaxed_arg_specs = arg_specs @@ -3251,14 +3257,16 @@ class Function(object): (args, kwargs), relaxed_arg_specs, expand_composites=False) (args, kwargs) = nest.pack_sequence_as( (relaxed_arg_specs, relaxed_kwarg_specs), - nest.flatten((args, kwargs), expand_composites=True), + flat_args + flat_kwargs, expand_composites=True) graph_function = self._create_graph_function( args, kwargs, override_flat_arg_shapes=relaxed_arg_shapes) self._function_cache.arg_relaxed[rank_only_cache_key] = graph_function - return graph_function, args, kwargs + return (graph_function, + nest.flatten(args, expand_composites=True), + nest.flatten(kwargs, expand_composites=True)) def _maybe_define_function(self, args, kwargs): """Gets a function for these inputs, defining it if necessary. @@ -3286,6 +3294,7 @@ class Function(object): args, kwargs, flat_args, flat_kwargs = \ self._function_spec.canonicalize_function_inputs(*args, **kwargs) else: + # TODO(jlchu): Check - empty lists or Nones? flat_args, flat_kwargs = [], [] cache_key = self._cache_key(args, kwargs) @@ -3325,10 +3334,8 @@ class Function(object): if (self._experimental_relax_shapes and self.input_signature is None and call_context_key in self._function_cache.missed): - return_function, _, _ = \ - self._define_function_with_shape_relaxation(args, kwargs) - #TODO(jlchu): Investigate modifying above function sig directly - return return_function, flat_args, flat_kwargs + return self._define_function_with_shape_relaxation( + args, kwargs, flat_args, flat_kwargs) self._function_cache.missed.add(call_context_key) graph_function = self._create_graph_function(args, kwargs) From c94f5813e65b4a48901053ef86d48f13b2571b24 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Mon, 3 Aug 2020 17:59:24 -0700 Subject: [PATCH 2024/2522] Address comments --- .../compiler/mlir/tensorflow/ir/tf_ops_n_z.cc | 43 +++++++++---------- .../mlir/tensorflow/tests/tf-ops.mlir | 2 +- 2 files changed, 21 insertions(+), 24 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc index 0b9b757da55..0941345a76c 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc @@ -1760,6 +1760,9 @@ void ToBoolOp::getCanonicalizationPatterns(OwningRewritePatternList &results, static LogicalResult Verify(TransposeOp op) { auto perm_type = op.perm().getType().dyn_cast(); + auto x_type = op.x().getType().dyn_cast(); + auto y_type = op.y().getType().dyn_cast(); + if (!perm_type) { return success(); } @@ -1770,33 +1773,23 @@ static LogicalResult Verify(TransposeOp op) { << perm_type.getRank(); } - if (!perm_type.hasStaticShape()) { + if (x_type && y_type && x_type.getRank() != y_type.getRank()) { + return op.emitOpError() + << "x should be of the same rank with y, got " + << "x of rank " << x_type.getRank() << ", and y of rank " + << y_type.getRank(); + } + + if (!x_type || !y_type || !perm_type.hasStaticShape()) { return success(); } - auto x_type = op.x().getType().dyn_cast(); - if (!x_type) { - return success(); - } - - const int64_t x_rank = x_type.getRank(); - if (x_rank != perm_type.getNumElements()) { + if (x_type.getRank() != perm_type.getNumElements()) { return op.emitOpError() << "expected perm to be a 1-D Tensor of size " << "equal to the rank of x, got perm of size " - << perm_type.getNumElements() << ", and x of rank " << x_rank; - } - - auto y_type = op.y().getType().dyn_cast(); - if (!y_type) { - return success(); - } - - const int64_t y_rank = y_type.getRank(); - if (x_rank != y_rank) { - return op.emitOpError() - << "x should be of the same rank with y, got " - << "x of rank " << x_rank << ", and y of rank " << y_rank; + << perm_type.getNumElements() << ", and x of rank " + << x_type.getRank(); } DenseIntElementsAttr attr_perm; @@ -1808,10 +1801,14 @@ static LogicalResult Verify(TransposeOp op) { const int64_t y_dim = y_type.getDimSize(y_idx); const int64_t x_idx = e.value().getSExtValue(); const int64_t x_dim = x_type.getDimSize(x_idx); + if (y_dim == ShapedType::kDynamicSize || x_dim == ShapedType::kDynamicSize) { + continue; + } if (y_dim != x_dim) { return op.emitOpError() - << "y.shape[" << y_idx << "] = " << y_dim - << " != x.shape[perm[" << x_idx << "]] = " << x_dim; + << "requires y.shape[" << y_idx << "] (" << y_dim << ") " + << "to be equal to x.shape[perm[" << x_idx << "]] " + << "(" << x_dim << ")"; } } } diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir index 4fd691cc104..04469e69684 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir @@ -2091,7 +2091,7 @@ func @testTranspose(tensor<2x3xf32>) -> tensor<3x2x1xf32> { func @testTranspose(tensor<2x3x4xf32>) -> tensor<3x2x4xf32> { ^bb0(%arg0: tensor<2x3x4xf32>): %cst = constant dense<[2, 0, 1]> : tensor<3xi32> - // expected-error @+1 {{y.shape[0] = 3 != x.shape[perm[2]] = 4}} + // expected-error @+1 {{requires y.shape[0] (3) to be equal to x.shape[perm[2]] (4)}} %0 = "tf.Transpose"(%arg0, %cst) {T = "tfdtype$DT_FLOAT", Tperm = "tfdtype$DT_INT32"} : (tensor<2x3x4xf32>, tensor<3xi32>) -> tensor<3x2x4xf32> return %0 : tensor<3x2x4xf32> } From 3410880bd3447efad4a273b9e65948d2a26d03ae Mon Sep 17 00:00:00 2001 From: Jonathan Chu Date: Tue, 4 Aug 2020 01:07:52 +0000 Subject: [PATCH 2025/2522] Remove / modify temp comments --- tensorflow/python/eager/function.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 6febd72b54f..e5dc33b513a 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -1853,8 +1853,7 @@ class ConcreteFunction(object): `flat_args` and `flat_kwargs`. """ return self._call_flat( - [t for t in flat_args + flat_kwargs \ - # TODO(jlchu): delete when final [t for t in nest.flatten((args, kwargs), expand_composites=True) + [t for t in flat_args + flat_kwargs if isinstance(t, (ops.Tensor, resource_variable_ops.BaseResourceVariable))], captured_inputs=self.captured_inputs, @@ -2727,9 +2726,11 @@ def _is_ndarray(value): def _convert_numpy_inputs(inputs): """Convert numpy array inputs to tensors.""" - ## TODO(jlchu): Modify/delete comment when change is final!!! # We assume that any CompositeTensors have already converted their components - # from numpy arrays to Tensors, so we don't need to expand composites here. + # from numpy arrays to Tensors, so we don't need to expand composites here for + # the numpy array conversion. Instead, we do so because the flattened inputs + # are eventually passed to ConcreteFunction()._filtered_call, which requires + # expanded composites. flat_inputs = nest.flatten(inputs, expand_composites=True) # Check for NumPy arrays in arguments and convert them to Tensors. From 69e1ca4cdea60cbff7881f83c21b5a3578615bfc Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 3 Aug 2020 18:03:08 -0700 Subject: [PATCH 2026/2522] Add Compilation Cache local lookup feature to TF-TPU support PiperOrigin-RevId: 324722948 Change-Id: Iab0cadf375fb2a23e2aa6d3c5bcb7aa08328a9c3 --- tensorflow/core/tpu/kernels/BUILD | 6 ++++++ .../core/tpu/kernels/tpu_configuration_ops.cc | 17 +++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 3b7d0e09c08..75d12f89426 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -89,9 +89,15 @@ tf_kernel_library( name = "tpu_configuration_ops", srcs = ["tpu_configuration_ops.cc"], hdrs = ["tpu_configuration_ops.h"], + copts = select({ + WITH_TPU_SUPPORT: ["-DLIBTFTPU"], + DEFAULT: [], + }), deps = [ ":tpu_compilation_cache_factory", ":tpu_compilation_cache_interface", + ":tpu_compilation_cache_local_lookup", + ":tpu_compilation_cache_lookup", ":tpu_mesh_state_interface", ":tpu_op_consts", "//tensorflow/c:tf_status", diff --git a/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc b/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc index e098dbd682c..4030cf86910 100644 --- a/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc +++ b/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc @@ -25,6 +25,8 @@ limitations under the License. #include "tensorflow/core/platform/refcount.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h" #include "tensorflow/core/tpu/kernels/tpu_mesh_state_interface.h" #include "tensorflow/core/tpu/kernels/tpu_op_consts.h" #include "tensorflow/core/tpu/tpu_api.h" @@ -253,6 +255,12 @@ void InitializeHostForDistributedTpuOp::Compute(OpKernelContext* ctx) { mesh_state_interface)); } +#if defined(LIBTFTPU) + VLOG(1) << "Removing existing proto compilation cache lookup if it exists"; + OP_REQUIRES_OK(ctx, DeleteIfExists( + rmgr, tpu::kCompiledProtoCacheResourceName)); +#endif + if (enable_whole_mesh_compilations_) { // If this is a whole mesh compilation mode, create the compilation cache, // if missing. @@ -276,6 +284,15 @@ void InitializeHostForDistributedTpuOp::Compute(OpKernelContext* ctx) { if (local_compilation_cache != nullptr) { local_compilation_cache->Unref(); + +#if defined(LIBTFTPU) + tpu::TpuCompilationCacheLookup* proto_lookup; + proto_lookup = + new tpu::TpuCompilationCacheLocalLookup(local_compilation_cache); + OP_REQUIRES_OK( + ctx, rmgr->Create(rmgr->default_container(), + tpu::kCompiledProtoCacheResourceName, proto_lookup)); +#endif } Tensor* ctx_output; From 4d765c332e6c6046e7ab71f7aea9d5c0bd890913 Mon Sep 17 00:00:00 2001 From: bbbboom Date: Tue, 4 Aug 2020 09:10:04 +0800 Subject: [PATCH 2027/2522] Update generate.sh Add corresponding annotation. --- tensorflow/go/genop/generate.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/go/genop/generate.sh b/tensorflow/go/genop/generate.sh index 54541106f13..547dd790e05 100644 --- a/tensorflow/go/genop/generate.sh +++ b/tensorflow/go/genop/generate.sh @@ -24,10 +24,14 @@ then GOPATH=$(go env GOPATH) fi -# change GOPATH style +# convert GOPATH's Windows style to UNIX style if [ $1 == "win" ]; then + # eg: convert "D:\go-14;D:\go-13" to "D\go-14;D\go-13" + GOPATH=${GOPATH//:\\/\\} + # eg: convert "D\go-14;D\go-13" to "\D\go-14:\D\go-13" + GOPATH=\\${GOPATH//;/:\\} + # eg: convert "\D\go-14:\D\go-13" to "/D/go-14:/D/go-13" GOPATH=${GOPATH//\\/\/} - GOPATH=/${GOPATH//:/} fi cd $(dirname $0) From 2d968de1433b9afc9752aa29954b013cea7d110b Mon Sep 17 00:00:00 2001 From: Jonathan Chu Date: Tue, 4 Aug 2020 01:23:13 +0000 Subject: [PATCH 2028/2522] Resolve minor TODO in function.py --- tensorflow/python/eager/def_function.py | 2 +- tensorflow/python/eager/function.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py index a1ffd2f9efc..8447245b524 100644 --- a/tensorflow/python/eager/def_function.py +++ b/tensorflow/python/eager/def_function.py @@ -917,7 +917,7 @@ class Function(object): canon_args, canon_kwds, flat_args, flat_kwds = \ self._stateful_fn._function_spec.canonicalize_function_inputs( # pylint: disable=protected-access *args, **kwds) - # TODO(jlchu): verify that mdofication to fn_with_cond works + # TODO(jlchu): verify that modification to fn_with_cond works return function_lib.defun(fn_with_cond)(canon_args, canon_kwds, flat_args, flat_kwds) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index e5dc33b513a..a833d351c84 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -3295,8 +3295,7 @@ class Function(object): args, kwargs, flat_args, flat_kwargs = \ self._function_spec.canonicalize_function_inputs(*args, **kwargs) else: - # TODO(jlchu): Check - empty lists or Nones? - flat_args, flat_kwargs = [], [] + flat_args, flat_kwargs = [None], [None] cache_key = self._cache_key(args, kwargs) From 9e6c87150b098953584acca770a397065ea548e3 Mon Sep 17 00:00:00 2001 From: Feng Liu Date: Mon, 3 Aug 2020 18:21:39 -0700 Subject: [PATCH 2029/2522] run shape inference after the constants are frozen PiperOrigin-RevId: 324725487 Change-Id: I7fa6dd741d502c83817cbd0009b03b07ac761170 --- tensorflow/compiler/mlir/lite/tf_tfl_passes.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc index c49d9a10716..d63eb481376 100644 --- a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc +++ b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc @@ -166,6 +166,10 @@ void AddTFToTFLConversionPasses(const mlir::TFL::PassConfig& pass_config, // The below passes only make sense if Builtin TFLite ops are enabled // for emission. if (pass_config.emit_builtin_tflite_ops) { + // Run shape inference after variables are converted to constants. + if (pass_config.shape_inference) { + pass_manager->addPass(mlir::TF::CreateTFShapeInferencePass()); + } // Prepare for TFLite dialect, rerun canonicalization, and then legalize to // the TFLite dialect. pass_manager->addPass( @@ -173,6 +177,9 @@ void AddTFToTFLConversionPasses(const mlir::TFL::PassConfig& pass_config, pass_manager->addNestedPass(mlir::createCanonicalizerPass()); if (pass_config.shape_inference) { // Add a shape inference pass to optimize away the unnecessary casts. + // This also fixes the unranked shapes due to TF ops constant folding. + // TODO(fengliuai): remove this pass if TableGen patterns have a better + // to control the shapes for the intermediate results. pass_manager->addPass(mlir::TF::CreateTFShapeInferencePass()); } From 23d2e73f82418f395cab1b4eec9c6f9bde63615b Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Tue, 4 Aug 2020 01:24:52 +0000 Subject: [PATCH 2030/2522] Update tensorflow/core/platform/file_system.h --- tensorflow/core/platform/file_system.h | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/platform/file_system.h b/tensorflow/core/platform/file_system.h index 77ba34bcdc6..4a8d9e63023 100644 --- a/tensorflow/core/platform/file_system.h +++ b/tensorflow/core/platform/file_system.h @@ -697,7 +697,6 @@ class WrappedFileSystem : public FileSystem { /// Transactional API. This is an interim solution until ModularFileSystem class /// becomes a singleton. // TODO(sami): Remove this macro when filesystem plugins migration is complete. - #define TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT \ using FileSystem::NewRandomAccessFile; \ using FileSystem::NewWritableFile; \ From e2865bb150fb9371f96dc4f33fc6cea26b1c143b Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Mon, 3 Aug 2020 18:26:43 -0700 Subject: [PATCH 2031/2522] Add MWMS combinations to custom_training_loop_metrics_test. PiperOrigin-RevId: 324726066 Change-Id: I05fcf563c34c216d9a59a1656e3ce4d409e2a8f3 --- .../keras/distribute/custom_training_loop_metrics_test.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/keras/distribute/custom_training_loop_metrics_test.py b/tensorflow/python/keras/distribute/custom_training_loop_metrics_test.py index 8704b8378bf..a41d1f369a4 100644 --- a/tensorflow/python/keras/distribute/custom_training_loop_metrics_test.py +++ b/tensorflow/python/keras/distribute/custom_training_loop_metrics_test.py @@ -34,7 +34,8 @@ class KerasMetricsTest(test.TestCase, parameterized.TestCase): @combinations.generate( combinations.combine( - distribution=strategy_combinations.all_strategies, + distribution=strategy_combinations.all_strategies + + strategy_combinations.multiworker_strategies, mode=["eager"] )) def test_multiple_keras_metrics_experimental_run(self, distribution): @@ -58,7 +59,8 @@ class KerasMetricsTest(test.TestCase, parameterized.TestCase): @combinations.generate( combinations.combine( - distribution=strategy_combinations.all_strategies, + distribution=strategy_combinations.all_strategies+ + strategy_combinations.multiworker_strategies, mode=["eager"] )) def test_update_keras_metric_declared_in_strategy_scope(self, distribution): @@ -98,4 +100,4 @@ class KerasMetricsTest(test.TestCase, parameterized.TestCase): if __name__ == "__main__": - test.main() + combinations.main() From d353f49989edd1e9e8cf41466fb2d46c226eea5d Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Mon, 3 Aug 2020 18:28:29 -0700 Subject: [PATCH 2032/2522] Extended support of SUB (and other elementwise ops). OpenCL delegate supports SUB with runtime tensor as second argument. PiperOrigin-RevId: 324726289 Change-Id: If26a72a5214bffc7b664f1902344ab04038ed3f5 --- .../delegates/gpu/cl/kernels/elementwise.cc | 103 +++++++++++++----- .../delegates/gpu/cl/kernels/elementwise.h | 26 +---- .../gpu/cl/kernels/elementwise_test.cc | 60 +++++++--- .../gpu/cl/selectors/operation_selector.cc | 99 ++--------------- .../delegates/gpu/common/model_builder.cc | 2 + .../lite/delegates/gpu/common/operations.h | 4 + 6 files changed, 143 insertions(+), 151 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc index 063b15c1b69..f735f1aa047 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc @@ -98,53 +98,51 @@ std::string GetOneInputCode(const OperationType& op_type, } std::string GetTwoInputCode(const OperationType& op_type, + const std::string& result_var, const std::string& input0, - const std::string& input1) { + const std::string& input1, + bool swap_inputs = false) { std::string result; switch (op_type) { case OperationType::ADD: - result += "$0 += $1;\n"; + result += "$0 = $1 + $2;\n"; break; case OperationType::DIV: - result += "$0 /= $1;\n"; + result += "$0 = $1 / $2;\n"; break; case OperationType::MAXIMUM: - result += "$0 = max($0, $1);\n"; + result += "$0 = max($1, $2);\n"; break; case OperationType::MINIMUM: - result += "$0 = min($0, $1);\n"; + result += "$0 = min($1, $2);\n"; break; case OperationType::MUL: - result += "$0 *= $1;\n"; + result += "$0 = $1 * $2;\n"; break; case OperationType::POW: - result += "$0 = pow($0, $1);\n"; + result += "$0 = pow($1, $2);\n"; break; case OperationType::SQUARED_DIFF: - result += "$0 -= $1;\n"; - result += "$0 *= $0;\n"; + result += "$0 = ($1 - $2) * ($1 - $2);\n"; break; case OperationType::SUB: - result += "$0 -= $1;\n"; + result += "$0 = $1 - $2;\n"; break; default: return "Unknown operation type;\n"; } - return absl::Substitute(result, input0, input1); -} -} // namespace - -GPUOperation CreateElementwiseOneInput(const OperationDef& definition, - const OperationType& op_type) { - GPUOperation op(definition); - op.elementwise_ = true; - op.code_ = GetOneInputCode(op_type, definition.precision, "in_out_value"); - return op; + if (swap_inputs) { + return absl::Substitute(result, result_var, input1, input0); + } else { + return absl::Substitute(result, result_var, input0, input1); + } } +// Creates simple two input (first input is runtime tensor and second input is +// scalar argument) operation, for example sub, div, pow, etc. GPUOperation CreateElementwiseOneRuntimeOneScalar( - const CreationContext& creation_context, const OperationDef& definition, - const OperationType& op_type, float scalar_parameter) { + const OperationDef& definition, const OperationType& op_type, + float scalar_parameter, bool swap_inputs) { GPUOperation op(definition); op.elementwise_ = true; if (definition.precision == CalculationsPrecision::F32) { @@ -152,15 +150,21 @@ GPUOperation CreateElementwiseOneRuntimeOneScalar( } else { op.args_.AddHalf("scalar", half(scalar_parameter)); } - op.code_ = GetTwoInputCode(op_type, "in_out_value", "args.scalar"); + op.code_ = + "FLT4 second_val = (FLT4)(args.scalar, args.scalar, args.scalar, " + "args.scalar);\n"; + op.code_ += GetTwoInputCode(op_type, "in_out_value", "in_out_value", + "second_val", swap_inputs); return op; } +// Creates simple two input(first input is runtime tensor and second input is +// constant linear tensor) operation, for example sub, div and etc. absl::Status CreateElementwiseTwoInput( const CreationContext& creation_context, const OperationDef& definition, const OperationType& op_type, const tflite::gpu::Tensor& constant_tensor, - GPUOperation* result) { + bool swap_inputs, GPUOperation* result) { const BHWC shape = BHWC(1, 1, 1, constant_tensor.shape.v); TensorStorageType storage_type = SelectBestStorageType(*creation_context.context, *creation_context.device, @@ -187,15 +191,18 @@ absl::Status CreateElementwiseTwoInput( result->code_ += " second_val.z = second_val.x;\n"; result->code_ += " second_val.w = second_val.x;\n"; } - result->code_ += GetTwoInputCode(op_type, "in_out_value", "second_val"); + result->code_ += GetTwoInputCode(op_type, "in_out_value", "in_out_value", + "second_val", swap_inputs); return absl::OkStatus(); } +// Creates simple two input(first input is runtime tensor and second input is +// constant HWC tensor) operation, for example sub, div and etc. absl::Status CreateElementwiseTwoInput( const CreationContext& creation_context, const OperationDef& definition, const OperationType& op_type, const tflite::gpu::Tensor& constant_tensor, - GPUOperation* result) { + bool swap_inputs, GPUOperation* result) { const BHWC shape = BHWC(1, constant_tensor.shape.h, constant_tensor.shape.w, constant_tensor.shape.c); TensorStorageType storage_type = @@ -225,11 +232,50 @@ absl::Status CreateElementwiseTwoInput( result->code_ += " second_val.z = second_val.x;\n"; result->code_ += " second_val.w = second_val.x;\n"; } - result->code_ += GetTwoInputCode(op_type, "in_out_value", "second_val"); + result->code_ += GetTwoInputCode(op_type, "in_out_value", "in_out_value", + "second_val", swap_inputs); return absl::OkStatus(); } +} // namespace + +GPUOperation CreateElementwiseOneInput(const OperationDef& definition, + const OperationType& op_type) { + GPUOperation op(definition); + op.elementwise_ = true; + op.code_ = GetOneInputCode(op_type, definition.precision, "in_out_value"); + return op; +} + +absl::Status CreateElementwise(const CreationContext& creation_context, + const OperationDef& definition, + const OperationType& op_type, + const ElementwiseAttributes& attr, + GPUOperation* result) { + const float* scalar = absl::get_if(&attr.param); + const auto* linear_tensor = + absl::get_if>(&attr.param); + const auto* hwc_tensor = + absl::get_if>(&attr.param); + + if (scalar) { + *result = CreateElementwiseOneRuntimeOneScalar( + definition, op_type, *scalar, attr.runtime_tensor_is_second); + return absl::OkStatus(); + } else if (linear_tensor) { + return CreateElementwiseTwoInput(creation_context, definition, op_type, + *linear_tensor, + attr.runtime_tensor_is_second, result); + } else if (hwc_tensor) { + return CreateElementwiseTwoInput(creation_context, definition, op_type, + *hwc_tensor, attr.runtime_tensor_is_second, + result); + } + return absl::UnimplementedError( + "No elementwise implementation for this case"); +} + GPUOperation CreateElementwiseTwoInput(const OperationDef& definition, const OperationType& op_type, const BHWC& shape) { @@ -250,7 +296,8 @@ GPUOperation CreateElementwiseTwoInput(const OperationDef& definition, op.code_ += " second_val.z = second_val.x;\n"; op.code_ += " second_val.w = second_val.x;\n"; } - op.code_ += GetTwoInputCode(op_type, "in_out_value", "second_val"); + op.code_ += GetTwoInputCode(op_type, "in_out_value", "in_out_value", + "second_val", false); return op; } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h index d03d535b39a..f841cdba9fb 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.h @@ -31,27 +31,13 @@ namespace cl { GPUOperation CreateElementwiseOneInput(const OperationDef& definition, const OperationType& op_type); -// Creates simple two input (first input is runtime tensor and second input is -// scalar argument) operation, for example sub, div, pow, etc. -GPUOperation CreateElementwiseOneRuntimeOneScalar( - const CreationContext& creation_context, const OperationDef& definition, - const OperationType& op_type, float scalar_parameter); - // Creates simple two input(first input is runtime tensor and second input is -// constant linear tensor) operation, for example sub, div and etc. -absl::Status CreateElementwiseTwoInput( - const CreationContext& creation_context, const OperationDef& definition, - const OperationType& op_type, - const tflite::gpu::Tensor& constant_tensor, - GPUOperation* result); - -// Creates simple two input(first input is runtime tensor and second input is -// constant HWC tensor) operation, for example sub, div and etc. -absl::Status CreateElementwiseTwoInput( - const CreationContext& creation_context, const OperationDef& definition, - const OperationType& op_type, - const tflite::gpu::Tensor& constant_tensor, - GPUOperation* result); +// constant or linear/hwc tensor) operation, for example sub, div and etc. +absl::Status CreateElementwise(const CreationContext& creation_context, + const OperationDef& definition, + const OperationType& op_type, + const ElementwiseAttributes& attr, + GPUOperation* result); // Creates simple two input(2 runtime tensors) operation, for example // sub, div and etc. diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc index 11a651df901..23ee6622e8c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise_test.cc @@ -546,9 +546,9 @@ TEST_F(OpenCLOperationTest, MaximumWithScalar) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - const float* scalar = absl::get_if(&attr.param); - GPUOperation operation = CreateElementwiseOneRuntimeOneScalar( - creation_context_, op_def, OperationType::MAXIMUM, *scalar); + GPUOperation operation; + ASSERT_OK(CreateElementwise(creation_context_, op_def, + OperationType::MAXIMUM, attr, &operation)); ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation, BHWC(1, 4, 1, 1), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -578,9 +578,8 @@ TEST_F(OpenCLOperationTest, MaximumWithConstantLinearTensor) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; GPUOperation operation; - ASSERT_OK(CreateElementwiseTwoInput(creation_context_, op_def, - OperationType::MAXIMUM, linear_tensor, - &operation)); + ASSERT_OK(CreateElementwise(creation_context_, op_def, + OperationType::MAXIMUM, attr, &operation)); ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -597,6 +596,8 @@ TEST_F(OpenCLOperationTest, MaximumWithConstantHWCTensor) { ::tflite::gpu::Tensor hwc_tensor; hwc_tensor.shape = HWC(2, 1, 2); hwc_tensor.data = {0.5f, 2.0f, 0.7f, 4.7f}; + ElementwiseAttributes attr; + attr.param = hwc_tensor; for (auto storage : env_.GetSupportedStorages()) { for (auto precision : env_.GetSupportedPrecisions()) { @@ -608,9 +609,8 @@ TEST_F(OpenCLOperationTest, MaximumWithConstantHWCTensor) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; GPUOperation operation; - ASSERT_OK(CreateElementwiseTwoInput(creation_context_, op_def, - OperationType::MAXIMUM, hwc_tensor, - &operation)); + ASSERT_OK(CreateElementwise(creation_context_, op_def, + OperationType::MAXIMUM, attr, &operation)); ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -626,6 +626,8 @@ TEST_F(OpenCLOperationTest, MaximumWithConstantHWCTensorBroadcastChannels) { ::tflite::gpu::Tensor hwc_tensor; hwc_tensor.shape = HWC(2, 1, 1); hwc_tensor.data = {0.5f, 2.0f}; + ElementwiseAttributes attr; + attr.param = hwc_tensor; for (auto storage : env_.GetSupportedStorages()) { for (auto precision : env_.GetSupportedPrecisions()) { @@ -637,9 +639,8 @@ TEST_F(OpenCLOperationTest, MaximumWithConstantHWCTensorBroadcastChannels) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; GPUOperation operation; - ASSERT_OK(CreateElementwiseTwoInput(creation_context_, op_def, - OperationType::MAXIMUM, hwc_tensor, - &operation)); + ASSERT_OK(CreateElementwise(creation_context_, op_def, + OperationType::MAXIMUM, attr, &operation)); ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation, BHWC(1, 2, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -693,9 +694,9 @@ TEST_F(OpenCLOperationTest, MinimumWithScalar) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - const float* scalar = absl::get_if(&attr.param); - GPUOperation operation = CreateElementwiseOneRuntimeOneScalar( - creation_context_, op_def, OperationType::MINIMUM, *scalar); + GPUOperation operation; + ASSERT_OK(CreateElementwise(creation_context_, op_def, + OperationType::MINIMUM, attr, &operation)); ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation, BHWC(1, 4, 1, 1), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -788,6 +789,35 @@ TEST_F(OpenCLOperationTest, MulBroadcastChannels) { } } +TEST_F(OpenCLOperationTest, SubWithScalarAtFirstPosition) { + TensorFloat32 src_tensor_0; + src_tensor_0.shape = BHWC(1, 4, 1, 1); + src_tensor_0.data = {0.0f, -6.2f, 2.0f, -3.0f}; + + ElementwiseAttributes attr; + attr.param = 4.0f; + attr.runtime_tensor_is_second = true; + + for (auto storage : env_.GetSupportedStorages()) { + for (auto precision : env_.GetSupportedPrecisions()) { + const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-2f; + OperationDef op_def; + op_def.precision = precision; + auto data_type = DeduceDataTypeFromPrecision(precision); + op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); + op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); + TensorFloat32 dst_tensor; + GPUOperation operation; + ASSERT_OK(CreateElementwise(creation_context_, op_def, OperationType::SUB, + attr, &operation)); + ASSERT_OK(ExecuteGPUOperation(src_tensor_0, creation_context_, &operation, + BHWC(1, 4, 1, 1), &dst_tensor)); + EXPECT_THAT(dst_tensor.data, + Pointwise(FloatNear(eps), {4.0f, 10.2f, 2.0f, 7.0f})); + } + } +} + } // namespace } // namespace cl } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc index f60af5f730d..e1225e83e95 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc @@ -159,31 +159,11 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, } else if (inputs.size() == 1 && node.operation.attributes.has_value()) { auto attr = absl::any_cast(node.operation.attributes); - const float* scalar = absl::get_if(&attr.param); - const auto* linear_tensor = - absl::get_if>( - &attr.param); - const auto* hwc_tensor = - absl::get_if>( - &attr.param); - if (scalar) { - GPUOperation operation = CreateElementwiseOneRuntimeOneScalar( - creation_context, op_def, op_type, *scalar); - *gpu_op = absl::make_unique(std::move(operation)); - return absl::OkStatus(); - } else if (linear_tensor) { - GPUOperation operation; - RETURN_IF_ERROR(CreateElementwiseTwoInput( - creation_context, op_def, op_type, *linear_tensor, &operation)); - *gpu_op = absl::make_unique(std::move(operation)); - return absl::OkStatus(); - } else if (hwc_tensor) { - GPUOperation operation; - RETURN_IF_ERROR(CreateElementwiseTwoInput( - creation_context, op_def, op_type, *hwc_tensor, &operation)); - *gpu_op = absl::make_unique(std::move(operation)); - return absl::OkStatus(); - } + GPUOperation operation; + RETURN_IF_ERROR(CreateElementwise(creation_context, op_def, op_type, + attr, &operation)); + *gpu_op = absl::make_unique(std::move(operation)); + return absl::OkStatus(); } return absl::UnimplementedError(absl::StrCat( "No support of ", node.operation.type, " with this parameters")); @@ -289,44 +269,6 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, absl::make_unique(std::move(operation)); return absl::OkStatus(); } - case OperationType::MUL: { - if (inputs.size() == 2) { - GPUOperation operation = - CreateElementwiseTwoInput(op_def, op_type, inputs[1]->tensor.shape); - *gpu_op = absl::make_unique(std::move(operation)); - return absl::OkStatus(); - } else if (inputs.size() == 1 && node.operation.attributes.has_value()) { - auto attr = - absl::any_cast(node.operation.attributes); - const float* scalar = absl::get_if(&attr.param); - const auto* linear_tensor = - absl::get_if>( - &attr.param); - const auto* hwc_tensor = - absl::get_if>( - &attr.param); - if (scalar) { - GPUOperation operation = CreateElementwiseOneRuntimeOneScalar( - creation_context, op_def, op_type, *scalar); - *gpu_op = absl::make_unique(std::move(operation)); - return absl::OkStatus(); - } else if (linear_tensor) { - GPUOperation operation; - RETURN_IF_ERROR(CreateElementwiseTwoInput( - creation_context, op_def, op_type, *linear_tensor, &operation)); - *gpu_op = absl::make_unique(std::move(operation)); - return absl::OkStatus(); - } else if (hwc_tensor) { - GPUOperation operation; - RETURN_IF_ERROR(CreateElementwiseTwoInput( - creation_context, op_def, op_type, *hwc_tensor, &operation)); - *gpu_op = absl::make_unique(std::move(operation)); - return absl::OkStatus(); - } - } - return absl::UnimplementedError(absl::StrCat( - "No support of ", node.operation.type, " with this parameters")); - } case OperationType::PAD: { auto attr = absl::any_cast(node.operation.attributes); SelectPadding(attr, op_def, gpu_op); @@ -404,6 +346,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, case OperationType::DIV: case OperationType::MAXIMUM: case OperationType::MINIMUM: + case OperationType::MUL: case OperationType::POW: case OperationType::SQUARED_DIFF: case OperationType::SUB: { @@ -415,31 +358,11 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, } else if (inputs.size() == 1 && node.operation.attributes.has_value()) { auto attr = absl::any_cast(node.operation.attributes); - const float* scalar = absl::get_if(&attr.param); - const auto* linear_tensor = - absl::get_if>( - &attr.param); - const auto* hwc_tensor = - absl::get_if>( - &attr.param); - if (scalar) { - GPUOperation operation = CreateElementwiseOneRuntimeOneScalar( - creation_context, op_def, op_type, *scalar); - *gpu_op = absl::make_unique(std::move(operation)); - return absl::OkStatus(); - } else if (linear_tensor) { - GPUOperation operation; - RETURN_IF_ERROR(CreateElementwiseTwoInput( - creation_context, op_def, op_type, *linear_tensor, &operation)); - *gpu_op = absl::make_unique(std::move(operation)); - return absl::OkStatus(); - } else if (hwc_tensor) { - GPUOperation operation; - RETURN_IF_ERROR(CreateElementwiseTwoInput( - creation_context, op_def, op_type, *hwc_tensor, &operation)); - *gpu_op = absl::make_unique(std::move(operation)); - return absl::OkStatus(); - } + GPUOperation operation; + RETURN_IF_ERROR(CreateElementwise(creation_context, op_def, op_type, + attr, &operation)); + *gpu_op = absl::make_unique(std::move(operation)); + return absl::OkStatus(); } return absl::UnimplementedError(absl::StrCat( "No support of ", node.operation.type, " with this parameters")); diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.cc b/tensorflow/lite/delegates/gpu/common/model_builder.cc index bf24e0d9eff..4c0fd827834 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc @@ -847,6 +847,8 @@ class ElementwiseOperationParser : public TFLiteOperationParser { /*outputs=*/1)); ElementwiseAttributes attr; RETURN_IF_ERROR(ParseInputsWithConstTensor(node, reader, &attr.param)); + attr.runtime_tensor_is_second = + IsConstantTensor(reader->GetInputTensor(0)); node->operation.attributes = std::move(attr); } else { return absl::InvalidArgumentError("Incorrect operation type passed"); diff --git a/tensorflow/lite/delegates/gpu/common/operations.h b/tensorflow/lite/delegates/gpu/common/operations.h index 225165589ae..563dbdec96e 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.h +++ b/tensorflow/lite/delegates/gpu/common/operations.h @@ -490,6 +490,10 @@ BHWC CalculateOutputShape(const BHWC& input, const MeanAttributes& attr); struct ElementwiseAttributes { TensorOrScalar param; + // For elementwise operation with 2 inputs op(A, B), runtime_tensor_is_second + // true when runtime tensor is B(on second position). this is important for + // ops that non commutative, for example substract. + bool runtime_tensor_is_second = false; }; struct ReshapeAttributes { From e4592dad255abb37e6ad675d4caa50161aba7e82 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 3 Aug 2020 18:29:31 -0700 Subject: [PATCH 2033/2522] Internal change PiperOrigin-RevId: 324726407 Change-Id: Ie720508eeb1375dfd82d7ac5ef208c5f2e6edb45 --- tensorflow/core/kernels/cwise_op_exp.cc | 4 ++-- tensorflow/python/kernel_tests/cwise_ops_unary_test.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/kernels/cwise_op_exp.cc b/tensorflow/core/kernels/cwise_op_exp.cc index 48b6823cbdc..2b157f0e7a9 100644 --- a/tensorflow/core/kernels/cwise_op_exp.cc +++ b/tensorflow/core/kernels/cwise_op_exp.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER6(UnaryOp, CPU, "Exp", functor::exp, float, Eigen::half, double, - bfloat16, complex64, complex128); +REGISTER5(UnaryOp, CPU, "Exp", functor::exp, float, Eigen::half, double, + complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER5(UnaryOp, GPU, "Exp", functor::exp, float, Eigen::half, double, diff --git a/tensorflow/python/kernel_tests/cwise_ops_unary_test.py b/tensorflow/python/kernel_tests/cwise_ops_unary_test.py index 368f3509dc6..df848a653d4 100644 --- a/tensorflow/python/kernel_tests/cwise_ops_unary_test.py +++ b/tensorflow/python/kernel_tests/cwise_ops_unary_test.py @@ -389,7 +389,6 @@ class UnaryOpTest(test.TestCase): 2).reshape(1, 3, 2).astype(dtypes_lib.bfloat16.as_numpy_dtype) self._compareCpu(x, np.abs, math_ops.abs) self._compareCpu(x, np.abs, _ABS) - self._compareCpu(x, np.exp, math_ops.exp) self._compareBoth(x, np.negative, math_ops.negative) self._compareBoth(x, np.negative, _NEG) From 1277f67514466d3d571071d9c6b6f0119677ccc2 Mon Sep 17 00:00:00 2001 From: Meghna Natraj Date: Mon, 3 Aug 2020 18:37:33 -0700 Subject: [PATCH 2034/2522] Refactor and Fix lint errors in util.py and lite*.py files PiperOrigin-RevId: 324727472 Change-Id: I3766b0724564f91216bffcc8b55f70744fd94334 --- tensorflow/lite/python/lite.py | 11 +- tensorflow/lite/python/lite_test.py | 1031 +++++++++---------- tensorflow/lite/python/lite_v2_test.py | 148 +-- tensorflow/lite/python/lite_v2_test_util.py | 1 + tensorflow/lite/python/util.py | 11 +- tensorflow/lite/python/util_test.py | 70 +- 6 files changed, 607 insertions(+), 665 deletions(-) diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index a853cc953fd..56397110e5b 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -125,7 +125,7 @@ class Optimize(enum.Enum): OPTIMIZE_FOR_LATENCY = "OPTIMIZE_FOR_LATENCY" def __str__(self): - return self.value + return str(self.value) @_tf_export("lite.RepresentativeDataset") @@ -230,7 +230,7 @@ class QuantizationMode(object): def post_training_int16x8_allow_float(self): """Post training int16x8 quantize, allow float fallback.""" - return (self._is_int16x8_target_required() and self._is_allow_float()) + return self._is_int16x8_target_required() and self._is_allow_float() def post_training_dynamic_range_int8(self): """Post training int8 const, on-the-fly int8 quantize of dynamic tensors.""" @@ -907,7 +907,7 @@ class TFLiteFrozenGraphConverterV2(TFLiteConverterBaseV2): """ # TODO(b/130297984): Add support for converting multiple function. - if len(self._funcs) == 0: + if len(self._funcs) == 0: # pylint: disable=g-explicit-length-test raise ValueError("No ConcreteFunction is specified.") if len(self._funcs) > 1: @@ -1127,7 +1127,7 @@ class TFLiteConverterBaseV1(TFLiteConverterBase): parameter is ignored. (default tf.float32) inference_input_type: Target data type of real-number input arrays. Allows for a different type for input arrays. If an integer type is provided and - `optimizations` are not used, `quantized_inputs_stats` must be provided. + `optimizations` are not used, `quantized_input_stats` must be provided. If `inference_type` is tf.uint8, signaling conversion to a fully quantized model from a quantization-aware trained input model, then `inference_input_type` defaults to tf.uint8. In all other cases, @@ -1681,7 +1681,7 @@ class TFLiteConverter(TFLiteFrozenGraphConverter): inference_input_type: Target data type of real-number input arrays. Allows for a different type for input arrays. If an integer type is provided and `optimizations` are not used, - `quantized_inputs_stats` must be provided. + `quantized_input_stats` must be provided. If `inference_type` is tf.uint8, signaling conversion to a fully quantized model from a quantization-aware trained input model, then `inference_input_type` defaults to tf.uint8. @@ -2012,6 +2012,7 @@ class TFLiteConverter(TFLiteFrozenGraphConverter): """ return super(TFLiteConverter, self).convert() + @_tf_export(v1=["lite.TocoConverter"]) class TocoConverter(object): """Convert a TensorFlow model into `output_format` using TOCO. diff --git a/tensorflow/lite/python/lite_test.py b/tensorflow/lite/python/lite_test.py index e9853c7f17c..d17fc94cd20 100644 --- a/tensorflow/lite/python/lite_test.py +++ b/tensorflow/lite/python/lite_test.py @@ -114,7 +114,7 @@ class FromConstructor(TestModels): class FromSessionTest(TestModels, parameterized.TestCase): - def testFloat(self): + def testFloatModel(self): with ops.Graph().as_default(): in_tensor = array_ops.placeholder( shape=[1, 16, 16, 3], dtype=dtypes.float32) @@ -125,130 +125,27 @@ class FromSessionTest(TestModels, parameterized.TestCase): converter = lite.TFLiteConverter.from_session(sess, [in_tensor], [out_tensor]) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) + self.assertLen(input_details, 1) self.assertEqual('Placeholder', input_details[0]['name']) self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], input_details[0]['shape']) self.assertEqual((0., 0.), input_details[0]['quantization']) output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) + self.assertLen(output_details, 1) self.assertEqual('add', output_details[0]['name']) self.assertEqual(np.float32, output_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], output_details[0]['shape']) self.assertEqual((0., 0.), output_details[0]['quantization']) - def testForgottenCallToAllocateTensors(self): - with ops.Graph().as_default(): - in_tensor = array_ops.placeholder( - shape=[1, 16, 16, 3], dtype=dtypes.float32) - out_tensor = in_tensor + in_tensor - sess = session.Session() - # Convert model and ensure model is not None. - converter = lite.TFLiteConverter.from_session(sess, [in_tensor], - [out_tensor]) - tflite_model = converter.convert() - self.assertTrue(tflite_model) - - # Check values from converted model. - interpreter = Interpreter(model_content=tflite_model) - input_index = interpreter.get_input_details()[0]['index'] - dummy_tensor = np.ones(shape=[1, 16, 16, 3], dtype=np.float32) - with self.assertRaises(ValueError): - interpreter.set_tensor(input_index, dummy_tensor) - - @parameterized.named_parameters( - ('EnableMlirConverter', True), # enable mlir - ('DisableMlirConverter', False)) # disable mlir - def testString(self, enable_mlir): - with ops.Graph().as_default(): - in_tensor = array_ops.placeholder(shape=[4], dtype=dtypes.string) - out_tensor = array_ops.reshape(in_tensor, shape=[2, 2]) - sess = session.Session() - - # Convert model and ensure model is not None. - converter = lite.TFLiteConverter.from_session(sess, [in_tensor], - [out_tensor]) - converter.experimental_new_converter = enable_mlir - tflite_model = converter.convert() - self.assertTrue(tflite_model) - - # Check values from converted model. - interpreter = Interpreter(model_content=tflite_model) - interpreter.allocate_tensors() - - input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) - self.assertEqual('Placeholder', input_details[0]['name']) - self.assertEqual(np.string_, input_details[0]['dtype']) - self.assertTrue(([4] == input_details[0]['shape']).all()) - - output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) - self.assertEqual('Reshape', output_details[0]['name']) - self.assertEqual(np.string_, output_details[0]['dtype']) - self.assertTrue(([2, 2] == output_details[0]['shape']).all()) - # TODO(b/122659643): Test setting/getting string data via the python - # interpreter API after support has been added. - - @parameterized.named_parameters( - ('EnableMlirConverter', True), # enable mlir - ('DisableMlirConverter', False)) # disable mlir - def testQuantization(self, enable_mlir): - with ops.Graph().as_default(): - in_tensor_1 = array_ops.placeholder( - shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputA') - in_tensor_2 = array_ops.placeholder( - shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputB') - out_tensor = array_ops.fake_quant_with_min_max_args( - in_tensor_1 + in_tensor_2, min=0., max=1., name='output') - sess = session.Session() - - # Convert model and ensure model is not None. - converter = lite.TFLiteConverter.from_session(sess, - [in_tensor_1, in_tensor_2], - [out_tensor]) - converter.inference_type = lite_constants.QUANTIZED_UINT8 - converter.quantized_input_stats = { - 'inputA': (0., 1.), - 'inputB': (0., 1.) - } # mean, std_dev - converter.experimental_new_converter = enable_mlir - tflite_model = converter.convert() - self.assertTrue(tflite_model) - - # Check values from converted model. - interpreter = Interpreter(model_content=tflite_model) - interpreter.allocate_tensors() - - input_details = interpreter.get_input_details() - self.assertEqual(2, len(input_details)) - self.assertEqual('inputA', input_details[0]['name']) - self.assertEqual(np.uint8, input_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) - self.assertEqual((1., 0.), - input_details[0]['quantization']) # scale, zero_point - - self.assertEqual('inputB', input_details[1]['name']) - self.assertEqual(np.uint8, input_details[1]['dtype']) - self.assertTrue(([1, 16, 16, 3] == input_details[1]['shape']).all()) - self.assertEqual((1., 0.), - input_details[1]['quantization']) # scale, zero_point - - output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) - self.assertEqual(np.uint8, output_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) - self.assertTrue(output_details[0]['quantization'][0] > 0) # scale - - def testQuantizedInput(self): + def testFloatModelQuantizedInput(self): with ops.Graph().as_default(): in_tensor = array_ops.placeholder( shape=[1, 16, 16, 3], dtype=dtypes.float32) @@ -262,7 +159,7 @@ class FromSessionTest(TestModels, parameterized.TestCase): converter.inference_type = lite_constants.FLOAT converter.quantized_input_stats = {'Placeholder': (0., 1.)} # mean, std_dev tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) @@ -272,38 +169,68 @@ class FromSessionTest(TestModels, parameterized.TestCase): self.assertLen(input_details, 1) self.assertEqual('Placeholder', input_details[0]['name']) self.assertEqual(np.uint8, input_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) - self.assertEqual((1., 0.), - input_details[0]['quantization']) # scale, zero_point + self.assertAllEqual([1, 16, 16, 3], input_details[0]['shape']) + self.assertEqual((1., 0.), input_details[0]['quantization']) output_details = interpreter.get_output_details() self.assertLen(output_details, 1) self.assertEqual('add', output_details[0]['name']) self.assertEqual(np.float32, output_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], output_details[0]['shape']) self.assertEqual((0., 0.), output_details[0]['quantization']) # float - def testQuantizationInvalid(self): + def testForgottenCallToAllocateTensors(self): with ops.Graph().as_default(): - in_tensor_1 = array_ops.placeholder( - shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputA') - in_tensor_2 = array_ops.placeholder( - shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputB') - out_tensor = array_ops.fake_quant_with_min_max_args( - in_tensor_1 + in_tensor_2, min=0., max=1., name='output') + in_tensor = array_ops.placeholder( + shape=[1, 16, 16, 3], dtype=dtypes.float32) + out_tensor = in_tensor + in_tensor + sess = session.Session() + # Convert model and ensure model is not None. + converter = lite.TFLiteConverter.from_session(sess, [in_tensor], + [out_tensor]) + tflite_model = converter.convert() + self.assertIsNotNone(tflite_model) + + # Check values from converted model. + interpreter = Interpreter(model_content=tflite_model) + input_index = interpreter.get_input_details()[0]['index'] + dummy_tensor = np.ones(shape=[1, 16, 16, 3], dtype=np.float32) + with self.assertRaises(ValueError): + interpreter.set_tensor(input_index, dummy_tensor) + + @parameterized.named_parameters( + ('EnableMlirConverter', True), # enable mlir + ('DisableMlirConverter', False)) # disable mlir + def testString(self, enable_mlir_converter): + with ops.Graph().as_default(): + in_tensor = array_ops.placeholder(shape=[4], dtype=dtypes.string) + out_tensor = array_ops.reshape(in_tensor, shape=[2, 2]) sess = session.Session() # Convert model and ensure model is not None. - converter = lite.TFLiteConverter.from_session(sess, - [in_tensor_1, in_tensor_2], + converter = lite.TFLiteConverter.from_session(sess, [in_tensor], [out_tensor]) - converter.inference_type = lite_constants.QUANTIZED_UINT8 - converter.quantized_input_stats = {'inputA': (0., 1.)} # mean, std_dev - with self.assertRaises(ValueError) as error: - converter.convert() - self.assertEqual( - 'Quantization input stats are not available for input tensors ' - '\'inputB\'.', str(error.exception)) + converter.experimental_new_converter = enable_mlir_converter + tflite_model = converter.convert() + self.assertIsNotNone(tflite_model) + + # Check values from converted model. + interpreter = Interpreter(model_content=tflite_model) + interpreter.allocate_tensors() + + input_details = interpreter.get_input_details() + self.assertLen(input_details, 1) + self.assertEqual('Placeholder', input_details[0]['name']) + self.assertEqual(np.string_, input_details[0]['dtype']) + self.assertAllEqual([4], input_details[0]['shape']) + + output_details = interpreter.get_output_details() + self.assertLen(output_details, 1) + self.assertEqual('Reshape', output_details[0]['name']) + self.assertEqual(np.string_, output_details[0]['dtype']) + self.assertAllEqual([2, 2], output_details[0]['shape']) + # TODO(b/122659643): Test setting/getting string data via the python + # interpreter API after support has been added. def testIntermediateInputArray(self): """Convert a model from an intermediate input array.""" @@ -318,24 +245,24 @@ class FromSessionTest(TestModels, parameterized.TestCase): converter = lite.TFLiteConverter.from_session(sess, [in_tensor_final], [out_tensor]) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) + self.assertLen(input_details, 1) self.assertEqual('add', input_details[0]['name']) self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], input_details[0]['shape']) self.assertEqual((0., 0.), input_details[0]['quantization']) output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) + self.assertLen(output_details, 1) self.assertEqual('add_1', output_details[0]['name']) self.assertEqual(np.float32, output_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], output_details[0]['shape']) self.assertEqual((0., 0.), output_details[0]['quantization']) def testSizeNoneInvalid(self): @@ -357,7 +284,7 @@ class FromSessionTest(TestModels, parameterized.TestCase): @parameterized.named_parameters( ('EnableMlirConverter', True), # enable mlir ('DisableMlirConverter', False)) # disable mlir - def testScalarValid(self, enable_mlir): + def testScalarValid(self, enable_mlir_converter): # Construct a graph using a scalar (empty shape) input. with ops.Graph().as_default(): in_tensor = array_ops.placeholder(dtype=dtypes.float32, shape=[]) @@ -367,25 +294,25 @@ class FromSessionTest(TestModels, parameterized.TestCase): # Test conversion with the scalar input shape. converter = lite.TFLiteConverter.from_session(sess, [in_tensor], [out_tensor]) - converter.experimental_new_converter = enable_mlir + converter.experimental_new_converter = enable_mlir_converter tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) + self.assertLen(input_details, 1) self.assertEqual('Placeholder', input_details[0]['name']) self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue(([] == input_details[0]['shape']).all()) + self.assertEmpty(input_details[0]['shape']) output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) + self.assertLen(output_details, 1) self.assertEqual('add', output_details[0]['name']) self.assertEqual(np.float32, output_details[0]['dtype']) - self.assertTrue(([] == input_details[0]['shape']).all()) + self.assertEmpty(input_details[0]['shape']) # Validate inference using the scalar inputs/outputs. test_input = np.array(4.0, dtype=np.float32) @@ -394,7 +321,7 @@ class FromSessionTest(TestModels, parameterized.TestCase): interpreter.invoke() output_data = interpreter.get_tensor(output_details[0]['index']) - self.assertTrue((expected_output == output_data).all()) + self.assertEqual(expected_output, output_data) def testSizeInvalid(self): with ops.Graph().as_default(): @@ -433,9 +360,8 @@ class FromSessionTest(TestModels, parameterized.TestCase): self.assertLen(input_details, 1) self.assertEqual('Placeholder', input_details[0]['name']) self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue(([1, 1, 16, 3] == input_details[0]['shape']).all()) - self.assertTrue(([1, -1, 16, - 3] == input_details[0]['shape_signature']).all()) + self.assertAllEqual([1, 1, 16, 3], input_details[0]['shape']) + self.assertAllEqual([1, -1, 16, 3], input_details[0]['shape_signature']) self.assertEqual((0., 0.), input_details[0]['quantization']) # Resize tensor with strict checking. @@ -452,13 +378,11 @@ class FromSessionTest(TestModels, parameterized.TestCase): input_details = interpreter.get_input_details() self.assertLen(input_details, 1) - self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) - self.assertTrue(([1, -1, 16, - 3] == input_details[0]['shape_signature']).all()) + self.assertAllEqual([1, 16, 16, 3], input_details[0]['shape']) + self.assertAllEqual([1, -1, 16, 3], input_details[0]['shape_signature']) output_details = interpreter.get_output_details() - self.assertTrue(([1, -1, 16, - 3] == output_details[0]['shape_signature']).all()) + self.assertAllEqual([1, -1, 16, 3], output_details[0]['shape_signature']) def testResizeTensorInputStrict(self): # Ensures that resize_tensor_input(strict=True) works as expected. @@ -472,7 +396,7 @@ class FromSessionTest(TestModels, parameterized.TestCase): converter = lite.TFLiteConverter.from_session(sess, [in_tensor], [out_tensor]) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) @@ -499,24 +423,24 @@ class FromSessionTest(TestModels, parameterized.TestCase): converter = lite.TFLiteConverter.from_session(sess, [in_tensor], [out_tensor]) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) + self.assertLen(input_details, 1) self.assertEqual('Placeholder', input_details[0]['name']) self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], input_details[0]['shape']) self.assertEqual((0., 0.), input_details[0]['quantization']) output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) + self.assertLen(output_details, 1) self.assertEqual('add', output_details[0]['name']) self.assertEqual(np.float32, output_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], output_details[0]['shape']) self.assertEqual((0., 0.), output_details[0]['quantization']) def testBatchSizeNonZero(self): @@ -533,7 +457,7 @@ class FromSessionTest(TestModels, parameterized.TestCase): [in_tensor_1, in_tensor_2], [out_tensor]) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) @@ -542,9 +466,9 @@ class FromSessionTest(TestModels, parameterized.TestCase): input_details = interpreter.get_input_details() self.assertLen(input_details, 2) self.assertEqual('input1', input_details[0]['name']) - self.assertTrue(([1, 4] == input_details[0]['shape']).all()) + self.assertAllEqual([1, 4], input_details[0]['shape']) self.assertEqual('input2', input_details[1]['name']) - self.assertTrue(([4, 10] == input_details[1]['shape']).all()) + self.assertAllEqual([4, 10], input_details[1]['shape']) def testFreezeGraph(self): with ops.Graph().as_default(): @@ -562,24 +486,24 @@ class FromSessionTest(TestModels, parameterized.TestCase): converter = lite.TFLiteConverter.from_session(sess, [in_tensor], [out_tensor]) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) + self.assertLen(input_details, 1) self.assertEqual('Placeholder', input_details[0]['name']) self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], input_details[0]['shape']) self.assertEqual((0., 0.), input_details[0]['quantization']) output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) + self.assertLen(output_details, 1) self.assertEqual('top_k:1', output_details[0]['name']) self.assertEqual(np.int32, output_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 1] == output_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 1], output_details[0]['shape']) self.assertEqual((0., 0.), output_details[0]['quantization']) def testGraphviz(self): @@ -594,12 +518,12 @@ class FromSessionTest(TestModels, parameterized.TestCase): [out_tensor]) converter.output_format = lite_constants.GRAPHVIZ_DOT graphviz_output = converter.convert() - self.assertTrue(graphviz_output) + self.assertIsNotNone(graphviz_output) @parameterized.named_parameters( ('EnableMlirConverter', True), # enable mlir ('DisableMlirConverter', False)) # disable mlir - def testDumpGraphviz(self, enable_mlir): + def testDumpGraphviz(self, enable_mlir_converter): with ops.Graph().as_default(): in_tensor = array_ops.placeholder( shape=[1, 16, 16, 3], dtype=dtypes.float32) @@ -609,35 +533,35 @@ class FromSessionTest(TestModels, parameterized.TestCase): # Convert model and ensure model is not None. converter = lite.TFLiteConverter.from_session(sess, [in_tensor], [out_tensor]) - converter.experimental_new_converter = enable_mlir + converter.experimental_new_converter = enable_mlir_converter graphviz_dir = self.get_temp_dir() converter.dump_graphviz_dir = graphviz_dir tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Ensure interpreter is able to allocate and check graphviz data. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() num_items_graphviz = len(os.listdir(graphviz_dir)) - self.assertTrue(num_items_graphviz) - self.assertTrue( + self.assertIsNotNone(num_items_graphviz) + self.assertIsNotNone( os.path.exists(os.path.join(graphviz_dir, 'toco_AT_IMPORT.dot'))) - self.assertTrue( + self.assertIsNotNone( os.path.exists( os.path.join(graphviz_dir, 'toco_AFTER_TRANSFORMATIONS.dot'))) # new converter doesn't support `dump_graphviz_video` flag - if not enable_mlir: + if not enable_mlir_converter: # Convert model and ensure model is not None. converter = lite.TFLiteConverter.from_session(sess, [in_tensor], [out_tensor]) - converter.experimental_new_converter = enable_mlir + converter.experimental_new_converter = enable_mlir_converter graphviz_dir = self.get_temp_dir() converter.dump_graphviz_dir = graphviz_dir converter.dump_graphviz_video = True tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Ensure graphviz folder has more data after using video flag. num_items_graphviz_video = len(os.listdir(graphviz_dir)) @@ -656,10 +580,9 @@ class FromSessionTest(TestModels, parameterized.TestCase): log_dir = self.get_temp_dir() converter.conversion_summary_dir = log_dir tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) - num_items_conversion_summary = len(os.listdir(log_dir)) - self.assertTrue(num_items_conversion_summary) + self.assertNotEmpty(os.listdir(log_dir)) def testDumpConversionSummaryWithOldConverter(self): with ops.Graph().as_default(): @@ -675,7 +598,7 @@ class FromSessionTest(TestModels, parameterized.TestCase): log_dir = self.get_temp_dir() converter.conversion_summary_dir = log_dir tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check nothing is generated under the conversion summary path. num_items_conversion_summary = len(os.listdir(log_dir)) self.assertEqual(num_items_conversion_summary, 0) @@ -683,104 +606,7 @@ class FromSessionTest(TestModels, parameterized.TestCase): @parameterized.named_parameters( ('EnableMlirConverter', True), # enable mlir ('DisableMlirConverter', False)) # disable mlir - def testInferenceInputType(self, enable_mlir): - with ops.Graph().as_default(): - in_tensor = array_ops.placeholder( - shape=[1, 16, 16, 3], dtype=dtypes.float32) - out_tensor = in_tensor + in_tensor - sess = session.Session() - - # Convert model and ensure model is not None. - converter = lite.TFLiteConverter.from_session(sess, [in_tensor], - [out_tensor]) - converter.experimental_new_converter = enable_mlir - converter.inference_input_type = lite_constants.QUANTIZED_UINT8 - converter.quantized_input_stats = {'Placeholder': (0., 1.)} # mean, std_dev - tflite_model = converter.convert() - self.assertTrue(tflite_model) - - # Check values from converted model. - interpreter = Interpreter(model_content=tflite_model) - interpreter.allocate_tensors() - - input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) - self.assertEqual('Placeholder', input_details[0]['name']) - self.assertEqual(np.uint8, input_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) - self.assertEqual((1., 0.), input_details[0]['quantization']) - - output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) - self.assertEqual('add', output_details[0]['name']) - self.assertEqual(np.float32, output_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) - - def testDefaultRangesStats(self): - with ops.Graph().as_default(): - in_tensor = array_ops.placeholder( - shape=[1, 16, 16, 3], dtype=dtypes.float32) - out_tensor = in_tensor + in_tensor - sess = session.Session() - - # Convert model and ensure model is not None. - converter = lite.TFLiteConverter.from_session(sess, [in_tensor], - [out_tensor]) - converter.inference_type = lite_constants.QUANTIZED_UINT8 - converter.quantized_input_stats = {'Placeholder': (0., 1.)} # mean, std_dev - converter.default_ranges_stats = (0, 6) # min, max - tflite_model = converter.convert() - self.assertTrue(tflite_model) - - # Check values from converted model. - interpreter = Interpreter(model_content=tflite_model) - interpreter.allocate_tensors() - - input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) - self.assertEqual('Placeholder', input_details[0]['name']) - self.assertEqual(np.uint8, input_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) - self.assertEqual((1., 0.), input_details[0]['quantization']) - - output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) - self.assertEqual('add', output_details[0]['name']) - self.assertEqual(np.uint8, output_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) - self.assertTrue(output_details[0]['quantization'][0] > 0) # scale - - @parameterized.named_parameters( - ('EnableMlirConverter', True), # enable mlir - ('DisableMlirConverter', False)) # disable mlir - def testPostTrainingQuantizeDeprecatedAttribute(self, enable_mlir): - with ops.Graph().as_default(): - in_tensor_1 = array_ops.placeholder( - shape=[33, 33], dtype=dtypes.float32, name='inputA') - in_tensor_2 = constant_op.constant( - np.random.uniform(low=-10., high=10., size=(33, 33)), - shape=[33, 33], - dtype=dtypes.float32, - name='inputB') - out_tensor = math_ops.matmul(in_tensor_1, in_tensor_2, name='output') - sess = session.Session() - - quantized_converter = lite.TFLiteConverter.from_session( - sess, [in_tensor_1], [out_tensor]) - self.assertFalse(quantized_converter.post_training_quantize) - quantized_converter.experimental_new_converter = enable_mlir - - quantized_converter.post_training_quantize = True - self.assertTrue(quantized_converter.post_training_quantize) - self.assertEqual(quantized_converter.optimizations, [lite.Optimize.DEFAULT]) - - quantized_tflite = quantized_converter.convert() - self.assertTrue(quantized_tflite) - - @parameterized.named_parameters( - ('EnableMlirConverter', True), # enable mlir - ('DisableMlirConverter', False)) # disable mlir - def testPostTrainingQuantize(self, enable_mlir): + def testQuantizeDynamicRange(self, enable_mlir_converter): np.random.seed(0) with ops.Graph().as_default(): # We need the tensor to have more than 1024 elements for quantize_weights @@ -796,26 +622,53 @@ class FromSessionTest(TestModels, parameterized.TestCase): sess = session.Session() # Convert float model. - float_converter = lite.TFLiteConverter.from_session(sess, [in_tensor_1], - [out_tensor]) - float_converter.experimental_new_converter = enable_mlir - float_tflite = float_converter.convert() - self.assertTrue(float_tflite) + float_converter = lite.TFLiteConverter.from_session( + sess, [in_tensor_1], [out_tensor]) + float_converter.experimental_new_converter = enable_mlir_converter + float_tflite_model = float_converter.convert() + self.assertIsNotNone(float_tflite_model) # Convert quantized weights model. quantized_converter = lite.TFLiteConverter.from_session( sess, [in_tensor_1], [out_tensor]) - quantized_converter.experimental_new_converter = enable_mlir quantized_converter.optimizations = [lite.Optimize.DEFAULT] - quantized_converter.experimental_new_converter = enable_mlir - quantized_tflite = quantized_converter.convert() - self.assertTrue(quantized_tflite) + quantized_converter.experimental_new_converter = enable_mlir_converter + quantized_tflite_model = quantized_converter.convert() + self.assertIsNotNone(quantized_tflite_model) # Ensure that the quantized weights tflite model is smaller. - self.assertTrue(len(quantized_tflite) < len(float_tflite)) + self.assertLess(len(quantized_tflite_model), len(float_tflite_model)) - def _getCalibrationQuantizeModel(self): + @parameterized.named_parameters( + ('EnableMlirConverter', True), # enable mlir + ('DisableMlirConverter', False)) # disable mlir + def testQuantizeDynamicRangeDeprecatedPostTrainingQuantizeAttribute( + self, enable_mlir_converter): + with ops.Graph().as_default(): + in_tensor_1 = array_ops.placeholder( + shape=[33, 33], dtype=dtypes.float32, name='inputA') + in_tensor_2 = constant_op.constant( + np.random.uniform(low=-10., high=10., size=(33, 33)), + shape=[33, 33], + dtype=dtypes.float32, + name='inputB') + out_tensor = math_ops.matmul(in_tensor_1, in_tensor_2, name='output') + sess = session.Session() + + quantized_converter = lite.TFLiteConverter.from_session( + sess, [in_tensor_1], [out_tensor]) + self.assertFalse(quantized_converter.post_training_quantize) + quantized_converter.experimental_new_converter = enable_mlir_converter + + quantized_converter.post_training_quantize = True + self.assertTrue(quantized_converter.post_training_quantize) + self.assertEqual(quantized_converter.optimizations, [lite.Optimize.DEFAULT]) + + quantized_tflite_model = quantized_converter.convert() + self.assertIsNotNone(quantized_tflite_model) + + def _getIntegerQuantizeModel(self): np.random.seed(0) inp = array_ops.placeholder( dtype=dtypes.float32, shape=(1, 5, 5, 3), name='input') @@ -835,37 +688,37 @@ class FromSessionTest(TestModels, parameterized.TestCase): @parameterized.named_parameters( ('EnableMlirConverter', True), # enable mlir ('DisableMlirConverter', False)) # disable mlir - def testPostTrainingCalibrateAndQuantize(self, enable_mlir): + def testQuantizeInt8AllowFloat(self, enable_mlir_converter): with ops.Graph().as_default(): - inp, output, calibration_gen = self._getCalibrationQuantizeModel() + inp, output, calibration_gen = self._getIntegerQuantizeModel() sess = session.Session() # Convert float model. float_converter = lite.TFLiteConverter.from_session(sess, [inp], [output]) - float_tflite = float_converter.convert() - self.assertTrue(float_tflite) + float_tflite_model = float_converter.convert() + self.assertIsNotNone(float_tflite_model) # Convert quantized model. quantized_converter = lite.TFLiteConverter.from_session( sess, [inp], [output]) - quantized_converter.experimental_new_converter = enable_mlir + quantized_converter.experimental_new_converter = enable_mlir_converter quantized_converter.optimizations = [lite.Optimize.DEFAULT] quantized_converter.representative_dataset = calibration_gen - quantized_tflite = quantized_converter.convert() - self.assertTrue(quantized_tflite) + quantized_tflite_model = quantized_converter.convert() + self.assertIsNotNone(quantized_tflite_model) # The default input and output types should be float. - interpreter = Interpreter(model_content=quantized_tflite) + interpreter = Interpreter(model_content=quantized_tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) + self.assertLen(input_details, 1) self.assertEqual(np.float32, input_details[0]['dtype']) output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) + self.assertLen(output_details, 1) self.assertEqual(np.float32, output_details[0]['dtype']) # Ensure that the quantized weights tflite model is smaller. - self.assertLess(len(quantized_tflite), len(float_tflite)) + self.assertLess(len(quantized_tflite_model), len(float_tflite_model)) @parameterized.named_parameters( # Quantize model to Int8: with enable mlir @@ -883,138 +736,82 @@ class FromSessionTest(TestModels, parameterized.TestCase): [lite.OpsSet.\ EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8], True)) - def testCalibrateAndQuantizeBuiltinInt(self, supported_ops, enable_mlir): + def testQuantizeInt8And16x8(self, supported_ops, enable_mlir_converter): with ops.Graph().as_default(): - inp, output, calibration_gen = self._getCalibrationQuantizeModel() + inp, output, calibration_gen = self._getIntegerQuantizeModel() sess = session.Session() # Convert float model. float_converter = lite.TFLiteConverter.from_session(sess, [inp], [output]) - float_converter.experimental_new_converter = enable_mlir - float_tflite = float_converter.convert() - self.assertTrue(float_tflite) + float_converter.experimental_new_converter = enable_mlir_converter + float_tflite_model = float_converter.convert() + self.assertIsNotNone(float_tflite_model) # Convert model by specifying target spec (instead of optimizations), since # when targeting an integer only backend, quantization is mandatory. quantized_converter = lite.TFLiteConverter.from_session( sess, [inp], [output]) - quantized_converter.experimental_new_converter = enable_mlir + quantized_converter.experimental_new_converter = enable_mlir_converter quantized_converter.target_spec.supported_ops = supported_ops quantized_converter.representative_dataset = calibration_gen - quantized_tflite = quantized_converter.convert() - self.assertTrue(quantized_tflite) + quantized_tflite_model = quantized_converter.convert() + self.assertIsNotNone(quantized_tflite_model) # The default input and output types should be float. - interpreter = Interpreter(model_content=quantized_tflite) + interpreter = Interpreter(model_content=quantized_tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) + self.assertLen(input_details, 1) self.assertEqual(np.float32, input_details[0]['dtype']) output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) + self.assertLen(output_details, 1) self.assertEqual(np.float32, output_details[0]['dtype']) # Ensure that the quantized weights tflite model is smaller. - self.assertLess(len(quantized_tflite), len(float_tflite)) + self.assertLess(len(quantized_tflite_model), len(float_tflite_model)) @parameterized.named_parameters( - # Quantize to Float16 even if rep data provided. - ('UseRepresentativeData', True, False, True, False, False, False), - # Quantize to Float16 if no rep data provided. - ('NoRepresentativeData', False, False, True, False, False, False), - # Post training quantization if both rep data and int8 included. - ('UseSampleDataIncludeInt8', True, True, False, False, True, False), - - # Quantize to Float16 even if rep data provided with mlir. - ('UseRepresentativeDataMlir', True, False, True, False, False, True), - # Quantize to Float16 if no rep data provided with mlir. - ('NoRepresentativeDataMlir', False, False, True, False, False, True), - # Post training quantization if both rep data and int8 included with mlir. - ('SampleDataIncludeInt8Mlir', True, True, False, False, True, True)) - def testQuantizeFloat16(self, use_rep_data, include_int8, - is_float16_quantized, is_error, - is_post_training_quantized, enable_mlir): + ('EnableMlirConverter', True), # enable mlir + ('DisableMlirConverter', False)) # disable mlir + def testQuantizeInt8InputOutput(self, enable_mlir_converter): with ops.Graph().as_default(): - inp, output, calibration_gen = self._getCalibrationQuantizeModel() + inp, output, calibration_gen = self._getIntegerQuantizeModel() sess = session.Session() - idx = 1 if enable_mlir else 0 - node_name = 'Conv2D' if enable_mlir else 'Conv2D_bias' # Convert float model. float_converter = lite.TFLiteConverter.from_session(sess, [inp], [output]) - float_converter.experimental_new_converter = enable_mlir - float_tflite = float_converter.convert() - self.assertTrue(float_tflite) - interpreter = Interpreter(model_content=float_tflite) + float_converter.experimental_new_converter = enable_mlir_converter + float_tflite_model = float_converter.convert() + self.assertIsNotNone(float_tflite_model) + + # Convert quantized weights model. + quantized_converter = lite.TFLiteConverter.from_session( + sess, [inp], [output]) + quantized_converter.experimental_new_converter = enable_mlir_converter + quantized_converter.inference_input_type = lite_constants.INT8 + quantized_converter.inference_output_type = lite_constants.INT8 + quantized_converter.optimizations = [lite.Optimize.DEFAULT] + quantized_converter.representative_dataset = calibration_gen + quantized_tflite_model = quantized_converter.convert() + self.assertIsNotNone(quantized_tflite_model) + + # The input and output types should be int8. + interpreter = Interpreter(model_content=quantized_tflite_model) interpreter.allocate_tensors() - self.assertEqual(interpreter.get_tensor_details()[idx]['name'], node_name) - self.assertEqual(interpreter.get_tensor_details()[idx]['dtype'], - lite.constants.FLOAT) - # Convert model to quantized version - quantized_converter = lite.TFLiteConverter.from_session( - sess, [inp], [output]) - quantized_converter.experimental_new_converter = enable_mlir - quantized_converter.optimizations = [lite.Optimize.DEFAULT] - quantized_converter.target_spec.supported_types = [lite.constants.FLOAT16] - if include_int8: - quantized_converter.target_spec.supported_types.append( - lite.constants.INT8) - if use_rep_data: - quantized_converter.representative_dataset = calibration_gen + input_details = interpreter.get_input_details() + self.assertLen(input_details, 1) + self.assertEqual(np.int8, input_details[0]['dtype']) + output_details = interpreter.get_output_details() + self.assertLen(output_details, 1) + self.assertEqual(np.int8, output_details[0]['dtype']) - if is_error: - with self.assertRaises(ValueError) as error: - quantized_converter.convert() - self.assertEqual( - 'representative_dataset is required when specifying ' - 'TFLITE_BUILTINS_INT8 or INT8 supported types.', str(error.exception)) - - else: - quantized_tflite = quantized_converter.convert() - self.assertTrue(quantized_tflite) - interpreter = Interpreter(model_content=quantized_tflite) - interpreter.allocate_tensors() - self.assertEqual(interpreter.get_tensor_details()[idx]['name'], node_name) - - if is_float16_quantized: - # Verify that bias constant is float16 type. - self.assertEqual(interpreter.get_tensor_details()[idx]['dtype'], - lite.constants.FLOAT16) - elif is_post_training_quantized: - # Verify that bias constants is int32 type. - self.assertEqual(interpreter.get_tensor_details()[idx]['dtype'], - lite.constants.INT32) - else: - raise ValueError('Invalid test options.') + # Ensure that the quantized weights tflite model is smaller. + self.assertLess(len(quantized_tflite_model), len(float_tflite_model)) @parameterized.named_parameters( ('EnableMlirConverter', True), # enable mlir ('DisableMlirConverter', False)) # disable mlir - def testInvalidQuantizeFloat16(self, enable_mlir): - with ops.Graph().as_default(): - inp, output, _ = self._getCalibrationQuantizeModel() - sess = session.Session() - - # Specify float16 quantization - quantized_converter = lite.TFLiteConverter.from_session( - sess, [inp], [output]) - quantized_converter.experimental_new_converter = enable_mlir - quantized_converter.optimizations = [lite.Optimize.DEFAULT] - quantized_converter.target_spec.supported_types = [lite.constants.FLOAT16] - # Specify only int8 builtin ops - quantized_converter.target_spec.supported_ops = [ - lite.OpsSet.TFLITE_BUILTINS_INT8 - ] - with self.assertRaises(ValueError) as error: - quantized_converter.convert() - self.assertEqual( - 'TFLITE_BUILTINS_INT8 requires smallest supported type to be INT8.', - str(error.exception)) - - @parameterized.named_parameters( - ('EnableMlirConverter', True), # enable mlir - ('DisableMlirConverter', False)) # disable mlir - def testInvalidPostTrainingQuantize(self, enable_mlir): + def testInvalidQuantizeInt8(self, enable_mlir_converter): np.random.seed(0) with ops.Graph().as_default(): # We need the tensor to have more than 1024 elements for quantize_weights @@ -1032,7 +829,7 @@ class FromSessionTest(TestModels, parameterized.TestCase): # Attempt to convert to quantized weights model. quantized_converter = lite.TFLiteConverter.from_session( sess, [in_tensor_1], [out_tensor]) - quantized_converter.experimental_new_converter = enable_mlir + quantized_converter.experimental_new_converter = enable_mlir_converter quantized_converter.optimizations = [lite.Optimize.DEFAULT] # Restricting to int8 type only quantized_converter.target_spec.supported_types = [lite.constants.INT8] @@ -1046,72 +843,183 @@ class FromSessionTest(TestModels, parameterized.TestCase): @parameterized.named_parameters( ('EnableMlirConverter', True), # enable mlir ('DisableMlirConverter', False)) # disable mlir - def testPostTrainingCalibrateAndQuantizeFloatNotAllowed(self, enable_mlir): + def testQuantizeUInt8(self, enable_mlir_converter): with ops.Graph().as_default(): - inp, output, calibration_gen = self._getCalibrationQuantizeModel() + in_tensor_1 = array_ops.placeholder( + shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputA') + in_tensor_2 = array_ops.placeholder( + shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputB') + out_tensor = array_ops.fake_quant_with_min_max_args( + in_tensor_1 + in_tensor_2, min=0., max=1., name='output') sess = session.Session() + # Convert model and ensure model is not None. + converter = lite.TFLiteConverter.from_session(sess, + [in_tensor_1, in_tensor_2], + [out_tensor]) + converter.inference_type = lite_constants.QUANTIZED_UINT8 + converter.quantized_input_stats = { + 'inputA': (0., 1.), + 'inputB': (0., 1.) + } # mean, std_dev + converter.experimental_new_converter = enable_mlir_converter + tflite_model = converter.convert() + self.assertIsNotNone(tflite_model) + + # Check values from converted model. + interpreter = Interpreter(model_content=tflite_model) + interpreter.allocate_tensors() + + input_details = interpreter.get_input_details() + self.assertLen(input_details, 2) + self.assertEqual('inputA', input_details[0]['name']) + self.assertEqual(np.uint8, input_details[0]['dtype']) + self.assertAllEqual([1, 16, 16, 3], input_details[0]['shape']) + self.assertEqual((1., 0.), input_details[0]['quantization']) + + self.assertEqual('inputB', input_details[1]['name']) + self.assertEqual(np.uint8, input_details[1]['dtype']) + self.assertAllEqual([1, 16, 16, 3], input_details[1]['shape']) + self.assertEqual((1., 0.), input_details[1]['quantization']) + + output_details = interpreter.get_output_details() + self.assertLen(output_details, 1) + self.assertEqual(np.uint8, output_details[0]['dtype']) + self.assertAllEqual([1, 16, 16, 3], output_details[0]['shape']) + self.assertGreater(output_details[0]['quantization'][0], 0) # scale + + def testQuantizeUInt8UsingDefaultRangeStats(self): + with ops.Graph().as_default(): + in_tensor = array_ops.placeholder( + shape=[1, 16, 16, 3], dtype=dtypes.float32) + out_tensor = in_tensor + in_tensor + sess = session.Session() + + # Convert model and ensure model is not None. + converter = lite.TFLiteConverter.from_session(sess, [in_tensor], + [out_tensor]) + converter.inference_type = lite_constants.QUANTIZED_UINT8 + converter.quantized_input_stats = {'Placeholder': (0., 1.)} # mean, std_dev + converter.default_ranges_stats = (0, 6) # min, max + tflite_model = converter.convert() + self.assertIsNotNone(tflite_model) + + # Check values from converted model. + interpreter = Interpreter(model_content=tflite_model) + interpreter.allocate_tensors() + + input_details = interpreter.get_input_details() + self.assertLen(input_details, 1) + self.assertEqual('Placeholder', input_details[0]['name']) + self.assertEqual(np.uint8, input_details[0]['dtype']) + self.assertAllEqual([1, 16, 16, 3], input_details[0]['shape']) + self.assertEqual((1., 0.), input_details[0]['quantization']) + + output_details = interpreter.get_output_details() + self.assertLen(output_details, 1) + self.assertEqual('add', output_details[0]['name']) + self.assertEqual(np.uint8, output_details[0]['dtype']) + self.assertAllEqual([1, 16, 16, 3], output_details[0]['shape']) + self.assertGreater(output_details[0]['quantization'][0], 0) # scale + + @parameterized.named_parameters( + # Quantize to Float16 even if rep data provided. + ('UseRepresentativeData', True, False, True, False, False, False), + # Quantize to Float16 if no rep data provided. + ('NoRepresentativeData', False, False, True, False, False, False), + # Post training quantization if both rep data and int8 included. + ('UseSampleDataIncludeInt8', True, True, False, False, True, False), + + # Quantize to Float16 even if rep data provided with mlir. + ('UseRepresentativeDataMlir', True, False, True, False, False, True), + # Quantize to Float16 if no rep data provided with mlir. + ('NoRepresentativeDataMlir', False, False, True, False, False, True), + # Post training quantization if both rep data and int8 included with mlir. + ('SampleDataIncludeInt8Mlir', True, True, False, False, True, True)) + def testQuantizeFloat16(self, use_rep_data, include_int8, + is_float16_quantized, is_error, + is_post_training_quantized, enable_mlir_converter): + with ops.Graph().as_default(): + inp, output, calibration_gen = self._getIntegerQuantizeModel() + sess = session.Session() + + idx = 1 if enable_mlir_converter else 0 + node_name = 'Conv2D' if enable_mlir_converter else 'Conv2D_bias' # Convert float model. float_converter = lite.TFLiteConverter.from_session(sess, [inp], [output]) - float_converter.experimental_new_converter = enable_mlir - float_tflite = float_converter.convert() - self.assertTrue(float_tflite) - - # Convert quantized model. + float_converter.experimental_new_converter = enable_mlir_converter + float_tflite_model = float_converter.convert() + self.assertIsNotNone(float_tflite_model) + interpreter = Interpreter(model_content=float_tflite_model) + interpreter.allocate_tensors() + self.assertEqual(interpreter.get_tensor_details()[idx]['name'], node_name) + self.assertEqual(interpreter.get_tensor_details()[idx]['dtype'], + lite.constants.FLOAT) + # Convert model to quantized version quantized_converter = lite.TFLiteConverter.from_session( sess, [inp], [output]) - quantized_converter.experimental_new_converter = enable_mlir + quantized_converter.experimental_new_converter = enable_mlir_converter quantized_converter.optimizations = [lite.Optimize.DEFAULT] - quantized_converter.representative_dataset = calibration_gen - quantized_converter.target_spec.supported_types = [lite.constants.INT8] - quantized_tflite = quantized_converter.convert() - self.assertTrue(quantized_tflite) + quantized_converter.target_spec.supported_types = [lite.constants.FLOAT16] + if include_int8: + quantized_converter.target_spec.supported_types.append( + lite.constants.INT8) + if use_rep_data: + quantized_converter.representative_dataset = calibration_gen - # Ensure that the quantized weights tflite model is smaller. - self.assertLess(len(quantized_tflite), len(float_tflite)) + if is_error: + with self.assertRaises(ValueError) as error: + quantized_converter.convert() + self.assertEqual( + 'representative_dataset is required when specifying ' + 'TFLITE_BUILTINS_INT8 or INT8 supported types.', str(error.exception)) + + else: + quantized_tflite_model = quantized_converter.convert() + self.assertIsNotNone(quantized_tflite_model) + interpreter = Interpreter(model_content=quantized_tflite_model) + interpreter.allocate_tensors() + self.assertEqual(interpreter.get_tensor_details()[idx]['name'], node_name) + + if is_float16_quantized: + # Verify that bias constant is float16 type. + self.assertEqual(interpreter.get_tensor_details()[idx]['dtype'], + lite.constants.FLOAT16) + elif is_post_training_quantized: + # Verify that bias constants is int32 type. + self.assertEqual(interpreter.get_tensor_details()[idx]['dtype'], + lite.constants.INT32) + else: + raise ValueError('Invalid test options.') @parameterized.named_parameters( ('EnableMlirConverter', True), # enable mlir ('DisableMlirConverter', False)) # disable mlir - def testPostTrainingCalibrateAndQuantizeInt8Inputs(self, enable_mlir): + def testInvalidQuantizeFloat16(self, enable_mlir_converter): with ops.Graph().as_default(): - inp, output, calibration_gen = self._getCalibrationQuantizeModel() + inp, output, _ = self._getIntegerQuantizeModel() sess = session.Session() - # Convert float model. - float_converter = lite.TFLiteConverter.from_session(sess, [inp], [output]) - float_converter.experimental_new_converter = enable_mlir - float_tflite = float_converter.convert() - self.assertTrue(float_tflite) - - # Convert quantized weights model. + # Specify float16 quantization quantized_converter = lite.TFLiteConverter.from_session( sess, [inp], [output]) - quantized_converter.experimental_new_converter = enable_mlir - quantized_converter.inference_input_type = lite_constants.INT8 - quantized_converter.inference_output_type = lite_constants.INT8 + quantized_converter.experimental_new_converter = enable_mlir_converter quantized_converter.optimizations = [lite.Optimize.DEFAULT] - quantized_converter.representative_dataset = calibration_gen - quantized_tflite = quantized_converter.convert() - self.assertTrue(quantized_tflite) - - # The input and output types should be int8. - interpreter = Interpreter(model_content=quantized_tflite) - interpreter.allocate_tensors() - input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) - self.assertEqual(np.int8, input_details[0]['dtype']) - output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) - self.assertEqual(np.int8, output_details[0]['dtype']) - - # Ensure that the quantized weights tflite model is smaller. - self.assertLess(len(quantized_tflite), len(float_tflite)) + quantized_converter.target_spec.supported_types = [lite.constants.FLOAT16] + # Specify only int8 builtin ops + quantized_converter.target_spec.supported_ops = [ + lite.OpsSet.TFLITE_BUILTINS_INT8 + ] + with self.assertRaises(ValueError) as error: + quantized_converter.convert() + self.assertEqual( + 'TFLITE_BUILTINS_INT8 requires smallest supported type to be INT8.', + str(error.exception)) @parameterized.named_parameters( ('InferenceType_INT8', lite_constants.INT8), - ('InferenceType_QUANTIZED_INT8', lite_constants.QUANTIZED_UINT8)) - def testRequiresInputStatsForTrainingTimeQuantization(self, quantized_type): + ('InferenceType_UINT8', lite_constants.QUANTIZED_UINT8)) + def testInvalidQuantizeQATModelRequiresInputStats(self, quantized_type): with ops.Graph().as_default(): in_tensor = array_ops.placeholder( shape=[1, 16, 16, 3], dtype=dtypes.float32) @@ -1148,15 +1056,37 @@ class FromSessionTest(TestModels, parameterized.TestCase): } quantized_converter.convert() + def testInvalidQuantizeQATModelMissingInputStats(self): + with ops.Graph().as_default(): + in_tensor_1 = array_ops.placeholder( + shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputA') + in_tensor_2 = array_ops.placeholder( + shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputB') + out_tensor = array_ops.fake_quant_with_min_max_args( + in_tensor_1 + in_tensor_2, min=0., max=1., name='output') + sess = session.Session() + + # Convert model and ensure model is not None. + converter = lite.TFLiteConverter.from_session(sess, + [in_tensor_1, in_tensor_2], + [out_tensor]) + converter.inference_type = lite_constants.QUANTIZED_UINT8 + converter.quantized_input_stats = {'inputA': (0., 1.)} # mean, std_dev + with self.assertRaises(ValueError) as error: + converter.convert() + self.assertEqual( + 'Quantization input stats are not available for input tensors ' + '\'inputB\'.', str(error.exception)) + def testTrainingTimeAndPostTrainingCalibrateAndQuantize(self): with ops.Graph().as_default(): - inp, output, calibration_gen = self._getCalibrationQuantizeModel() + inp, output, calibration_gen = self._getIntegerQuantizeModel() sess = session.Session() # Convert float model. float_converter = lite.TFLiteConverter.from_session(sess, [inp], [output]) - float_tflite = float_converter.convert() - self.assertTrue(float_tflite) + float_tflite_model = float_converter.convert() + self.assertIsNotNone(float_tflite_model) converter = lite.TFLiteConverter.from_session(sess, [inp], [output]) @@ -1172,15 +1102,16 @@ class FromSessionTest(TestModels, parameterized.TestCase): converter.optimizations = [lite.Optimize.DEFAULT] converter.representative_dataset = calibration_gen converter._experimental_new_quantizer = True - quantized_tflite = converter.convert() - self.assertTrue(quantized_tflite) - self.assertLess(len(quantized_tflite), len(float_tflite)) + quantized_tflite_model = converter.convert() + self.assertIsNotNone(quantized_tflite_model) + self.assertLess(len(quantized_tflite_model), len(float_tflite_model)) # calibration only api converter._experimental_calibrate_only = True calibrated_tflite = converter.convert() - quantized_tflite = mlir_quantize(calibrated_tflite, fully_quantize=True) - interpreter = Interpreter(model_content=quantized_tflite) + quantized_tflite_model = mlir_quantize( + calibrated_tflite, fully_quantize=True) + interpreter = Interpreter(model_content=quantized_tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() self.assertEqual(np.int8, input_details[0]['dtype']) @@ -1200,7 +1131,7 @@ class FromSessionTest(TestModels, parameterized.TestCase): # Convert model and ensure model is not None. converter = lite.TocoConverter.from_session(sess, [in_tensor], [out_tensor]) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Ensure the interpreter is able to load. interpreter = Interpreter(model_content=tflite_model) @@ -1218,20 +1149,20 @@ class FromSessionTest(TestModels, parameterized.TestCase): converter = lite.TFLiteConverter.from_session(sess, [input_tensor], [out0, out1, out2, out3]) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) + self.assertLen(input_details, 1) interpreter.set_tensor(input_details[0]['index'], np.asarray([1.0, 2.0, 3.0, 4.0], dtype=np.float32)) interpreter.invoke() output_details = interpreter.get_output_details() - self.assertEqual(4, len(output_details)) + self.assertLen(output_details, 4) self.assertEqual(1.0, interpreter.get_tensor(output_details[0]['index'])) self.assertEqual(2.0, interpreter.get_tensor(output_details[1]['index'])) self.assertEqual(3.0, interpreter.get_tensor(output_details[2]['index'])) @@ -1241,7 +1172,7 @@ class FromSessionTest(TestModels, parameterized.TestCase): ('EnableMlirConverter', True), # enable mlir ('DisableMlirConverter', False)) # disable mlir @test_util.run_in_graph_and_eager_modes - def testFunctions(self, enable_mlir): + def testFunctions(self, enable_mlir_converter): """Tests tf.function in 1.X.""" @def_function.function @@ -1262,26 +1193,26 @@ class FromSessionTest(TestModels, parameterized.TestCase): # Convert model and ensure model is not None. converter = lite.TFLiteConverter.from_session(sess, [placeholder], [output_node]) - converter.experimental_new_converter = enable_mlir + converter.experimental_new_converter = enable_mlir_converter tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) + self.assertLen(input_details, 1) self.assertEqual('input', input_details[0]['name']) self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue(([1] == input_details[0]['shape']).all()) + self.assertAllEqual([1], input_details[0]['shape']) self.assertEqual((0., 0.), input_details[0]['quantization']) output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) + self.assertLen(output_details, 1) self.assertEqual('output_node', output_details[0]['name']) self.assertEqual(np.float32, output_details[0]['dtype']) - self.assertTrue(([1] == output_details[0]['shape']).all()) + self.assertAllEqual([1], output_details[0]['shape']) self.assertEqual((0., 0.), output_details[0]['quantization']) def testInferenceInputOutputTypeFloatDefault(self): @@ -1295,23 +1226,23 @@ class FromSessionTest(TestModels, parameterized.TestCase): converter = lite.TFLiteConverter.from_session(sess, [in_tensor], [out_tensor]) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) + self.assertLen(input_details, 1) self.assertEqual('Placeholder', input_details[0]['name']) self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], input_details[0]['shape']) output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) + self.assertLen(output_details, 1) self.assertEqual('add', output_details[0]['name']) self.assertEqual(np.float32, output_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], output_details[0]['shape']) def testInferenceInputOutputTypeQuantizedUint8Default(self): with ops.Graph().as_default(): @@ -1327,23 +1258,23 @@ class FromSessionTest(TestModels, parameterized.TestCase): converter.inference_type = lite_constants.QUANTIZED_UINT8 converter.quantized_input_stats = {'Placeholder': (0., 1.)} # mean, std_dev tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) + self.assertLen(input_details, 1) self.assertEqual('Placeholder', input_details[0]['name']) self.assertEqual(np.uint8, input_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], input_details[0]['shape']) output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) + self.assertLen(output_details, 1) self.assertEqual('output', output_details[0]['name']) self.assertEqual(np.uint8, output_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], output_details[0]['shape']) def testReusingConverterWithDifferentPostTrainingQuantization(self): with ops.Graph().as_default(): @@ -1359,11 +1290,11 @@ class FromSessionTest(TestModels, parameterized.TestCase): converter.post_training_quantize = True tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) converter.post_training_quantize = False tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) def testResizeWithShape(self): with ops.Graph().as_default(): @@ -1383,8 +1314,8 @@ class FromSessionTest(TestModels, parameterized.TestCase): interpreter = Interpreter(model_content=tflite_model) input_details = interpreter.get_input_details() self.assertLen(input_details, 1) - self.assertTrue(([1, 1] == input_details[0]['shape']).all()) - self.assertTrue(([-1, -1] == input_details[0]['shape_signature']).all()) + self.assertAllEqual([1, 1], input_details[0]['shape']) + self.assertAllEqual([-1, -1], input_details[0]['shape_signature']) # Resize tensor and invoke. interpreter.resize_tensor_input(0, [4]) @@ -1395,9 +1326,9 @@ class FromSessionTest(TestModels, parameterized.TestCase): output_details = interpreter.get_output_details() self.assertLen(output_details, 1) self.assertEqual(np.int32, output_details[0]['dtype']) - self.assertTrue(([4] == output_details[0]['shape']).all()) + self.assertAllEqual([4], output_details[0]['shape']) output_data = interpreter.get_tensor(output_details[0]['index']) - self.assertTrue(([1, 2, 3, 4] == output_data).all()) + self.assertAllEqual([1, 2, 3, 4], output_data) def testResizingIntermediateDynamicTensor(self): # This is a regression test for the case where shape of dynamic output @@ -1479,24 +1410,24 @@ class FromFrozenGraphFile(LiteTest): converter = lite.TFLiteConverter.from_frozen_graph(graph_def_file, ['Placeholder'], ['add']) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) + self.assertLen(input_details, 1) self.assertEqual('Placeholder', input_details[0]['name']) self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], input_details[0]['shape']) self.assertEqual((0., 0.), input_details[0]['quantization']) output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) + self.assertLen(output_details, 1) self.assertEqual('add', output_details[0]['name']) self.assertEqual(np.float32, output_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], output_details[0]['shape']) self.assertEqual((0., 0.), output_details[0]['quantization']) def testFloatWithShapesArray(self): @@ -1516,15 +1447,15 @@ class FromFrozenGraphFile(LiteTest): graph_def_file, ['Placeholder'], ['add'], input_shapes={'Placeholder': [1, 16, 16, 3]}) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) - self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) + self.assertLen(input_details, 1) + self.assertAllEqual([1, 16, 16, 3], input_details[0]['shape']) def testFreezeGraph(self): with ops.Graph().as_default(): @@ -1563,24 +1494,24 @@ class FromFrozenGraphFile(LiteTest): converter = lite.TFLiteConverter.from_frozen_graph(graph_def_file, ['Placeholder'], ['add']) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) + self.assertLen(input_details, 1) self.assertEqual('Placeholder', input_details[0]['name']) self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], input_details[0]['shape']) self.assertEqual((0., 0.), input_details[0]['quantization']) output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) + self.assertLen(output_details, 1) self.assertEqual('add', output_details[0]['name']) self.assertEqual(np.float32, output_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], output_details[0]['shape']) self.assertEqual((0., 0.), output_details[0]['quantization']) def testInvalidFileNotFound(self): @@ -1620,7 +1551,7 @@ class FromFrozenGraphFile(LiteTest): converter = lite.TocoConverter.from_frozen_graph(graph_def_file, ['Placeholder'], ['add']) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Ensure the model is able to load. interpreter = Interpreter(model_content=tflite_model) @@ -1644,7 +1575,7 @@ class FromFrozenGraphFile(LiteTest): ['Placeholder'], ['add']) converter.convert() # GraphDebugInfo should be none for frozen graph. - self.assertTrue(not converter._debug_info) + self.assertFalse(converter._debug_info) class FromFrozenGraphObjectDetection(LiteTest): @@ -1679,35 +1610,35 @@ class FromFrozenGraphObjectDetection(LiteTest): self._input_shapes) converter.allow_custom_ops = True tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) + self.assertLen(input_details, 1) self.assertEqual('normalized_input_image_tensor', input_details[0]['name']) self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue(([1, 300, 300, 3] == input_details[0]['shape']).all()) + self.assertAllEqual([1, 300, 300, 3], input_details[0]['shape']) self.assertEqual((0., 0.), input_details[0]['quantization']) output_details = interpreter.get_output_details() - self.assertEqual(4, len(output_details)) + self.assertLen(output_details, 4) self.assertEqual('TFLite_Detection_PostProcess', output_details[0]['name']) self.assertEqual(np.float32, output_details[0]['dtype']) - self.assertTrue(([1, 10, 4] == output_details[0]['shape']).all()) + self.assertAllEqual([1, 10, 4], output_details[0]['shape']) self.assertEqual((0., 0.), output_details[0]['quantization']) self.assertEqual('TFLite_Detection_PostProcess:1', output_details[1]['name']) - self.assertTrue(([1, 10] == output_details[1]['shape']).all()) + self.assertAllEqual([1, 10], output_details[1]['shape']) self.assertEqual('TFLite_Detection_PostProcess:2', output_details[2]['name']) - self.assertTrue(([1, 10] == output_details[2]['shape']).all()) + self.assertAllEqual([1, 10], output_details[2]['shape']) self.assertEqual('TFLite_Detection_PostProcess:3', output_details[3]['name']) - self.assertTrue(([1] == output_details[3]['shape']).all()) + self.assertAllEqual([1], output_details[3]['shape']) class FromSavedModelTest(TestModels): @@ -1734,28 +1665,28 @@ class FromSavedModelTest(TestModels): # Convert model and ensure model is not None. converter = lite.TFLiteConverter.from_saved_model(saved_model_dir) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() - self.assertEqual(2, len(input_details)) + self.assertLen(input_details, 2) self.assertStartsWith(input_details[0]['name'], 'inputA') self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], input_details[0]['shape']) self.assertEqual((0., 0.), input_details[0]['quantization']) self.assertStartsWith(input_details[1]['name'], 'inputB') self.assertEqual(np.float32, input_details[1]['dtype']) - self.assertTrue(([1, 16, 16, 3] == input_details[1]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], input_details[1]['shape']) self.assertEqual((0., 0.), input_details[1]['quantization']) output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) + self.assertLen(output_details, 1) self.assertStartsWith(output_details[0]['name'], 'add') self.assertEqual(np.float32, output_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], output_details[0]['shape']) self.assertEqual((0., 0.), output_details[0]['quantization']) def testOldConverterWarning(self): @@ -1769,7 +1700,7 @@ class FromSavedModelTest(TestModels): converter = lite.TFLiteConverter.from_saved_model(saved_model_dir) converter.experimental_new_converter = False tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) self.assertIn(warning_message, log.getvalue()) logging.root.removeHandler(handler) @@ -1784,7 +1715,7 @@ class FromSavedModelTest(TestModels): # Convert model and ensure model is not None. converter = lite.TFLiteConverter.from_saved_model(saved_model_dir) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) self.assertIn(optout_message, log.getvalue()) logging.root.removeHandler(handler) @@ -1794,29 +1725,29 @@ class FromSavedModelTest(TestModels): converter = lite.TFLiteConverter.from_saved_model(saved_model_dir) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() - self.assertEqual(2, len(input_details)) + self.assertLen(input_details, 2) self.assertStartsWith(input_details[0]['name'], 'inputA') self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], input_details[0]['shape']) self.assertEqual((0., 0.), input_details[0]['quantization']) self.assertStartsWith(input_details[1]['name'], 'inputB') self.assertEqual(np.float32, input_details[1]['dtype']) - self.assertTrue(([1, 16, 16, 3] == input_details[1]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], input_details[1]['shape']) self.assertEqual((0., 0.), input_details[1]['quantization']) output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) + self.assertLen(output_details, 1) self.assertStartsWith(output_details[0]['name'], 'add') self.assertEqual(np.float32, output_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], output_details[0]['shape']) self.assertEqual((0., 0.), output_details[0]['quantization']) def testOrderInputArrays(self): @@ -1826,29 +1757,29 @@ class FromSavedModelTest(TestModels): converter = lite.TFLiteConverter.from_saved_model( saved_model_dir, input_arrays=['inputB', 'inputA']) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() - self.assertEqual(2, len(input_details)) + self.assertLen(input_details, 2) self.assertStartsWith(input_details[0]['name'], 'inputA') self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], input_details[0]['shape']) self.assertEqual((0., 0.), input_details[0]['quantization']) self.assertStartsWith(input_details[1]['name'], 'inputB') self.assertEqual(np.float32, input_details[1]['dtype']) - self.assertTrue(([1, 16, 16, 3] == input_details[1]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], input_details[1]['shape']) self.assertEqual((0., 0.), input_details[1]['quantization']) output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) + self.assertLen(output_details, 1) self.assertStartsWith(output_details[0]['name'], 'add') self.assertEqual(np.float32, output_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], output_details[0]['shape']) self.assertEqual((0., 0.), output_details[0]['quantization']) def testSubsetInputArrays(self): @@ -1880,7 +1811,7 @@ class FromSavedModelTest(TestModels): # Convert model and ensure model is not None. converter = lite.TocoConverter.from_saved_model(saved_model_dir) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Ensure the model is able to load. interpreter = Interpreter(model_content=tflite_model) @@ -1958,7 +1889,7 @@ class FromKerasFile(TestModels, parameterized.TestCase): converter = lite.TFLiteConverter.from_keras_model_file(self._keras_file) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check tensor details of converted model. interpreter = Interpreter(model_content=tflite_model) @@ -1968,13 +1899,13 @@ class FromKerasFile(TestModels, parameterized.TestCase): self.assertLen(input_details, 1) self.assertEndsWith(input_details[0]['name'], 'dense_input') self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue(([1, 3] == input_details[0]['shape']).all()) + self.assertAllEqual([1, 3], input_details[0]['shape']) self.assertEqual((0., 0.), input_details[0]['quantization']) output_details = interpreter.get_output_details() self.assertLen(output_details, 1) self.assertEqual(np.float32, output_details[0]['dtype']) - self.assertTrue(([1, 3, 3] == output_details[0]['shape']).all()) + self.assertAllEqual([1, 3, 3], output_details[0]['shape']) self.assertEqual((0., 0.), output_details[0]['quantization']) # Check inference of converted model. @@ -1998,7 +1929,7 @@ class FromKerasFile(TestModels, parameterized.TestCase): converter = lite.TFLiteConverter.from_keras_model_file( self._keras_file, custom_objects=self._custom_objects) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check tensor details of converted model. interpreter = Interpreter(model_content=tflite_model) @@ -2035,7 +1966,7 @@ class FromKerasFile(TestModels, parameterized.TestCase): converter = lite.TFLiteConverter.from_keras_model_file( self._keras_file, input_arrays=['dense_input']) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) def testSequentialModelInputShape(self): """Test a Sequential tf.keras model testing input shapes argument.""" @@ -2053,7 +1984,7 @@ class FromKerasFile(TestModels, parameterized.TestCase): converter = lite.TFLiteConverter.from_keras_model_file( self._keras_file, input_shapes={'dense_input': [2, 3]}) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check input shape from converted model. interpreter = Interpreter(model_content=tflite_model) @@ -2062,7 +1993,7 @@ class FromKerasFile(TestModels, parameterized.TestCase): input_details = interpreter.get_input_details() self.assertLen(input_details, 1) self.assertEndsWith(input_details[0]['name'], 'dense_input') - self.assertTrue(([2, 3] == input_details[0]['shape']).all()) + self.assertAllEqual([2, 3], input_details[0]['shape']) def testSequentialModelOutputArray(self): """Test a Sequential tf.keras model testing output arrays argument.""" @@ -2080,7 +2011,7 @@ class FromKerasFile(TestModels, parameterized.TestCase): converter = lite.TFLiteConverter.from_keras_model_file( self._keras_file, output_arrays=['time_distributed/Reshape_1']) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) @parameterized.named_parameters(('_graph', context.graph_mode), ('_eager', context.eager_mode)) @@ -2110,7 +2041,7 @@ class FromKerasFile(TestModels, parameterized.TestCase): # Convert to TFLite model. converter = lite.TFLiteConverter.from_keras_model_file(self._keras_file) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check tensor details of converted model. interpreter = Interpreter(model_content=tflite_model) @@ -2120,13 +2051,13 @@ class FromKerasFile(TestModels, parameterized.TestCase): self.assertLen(input_details, 1) self.assertEqual('input', input_details[0]['name']) self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue(([1, 3] == input_details[0]['shape']).all()) + self.assertAllEqual([1, 3], input_details[0]['shape']) self.assertEqual((0., 0.), input_details[0]['quantization']) output_details = interpreter.get_output_details() self.assertLen(output_details, 1) self.assertEqual(np.float32, output_details[0]['dtype']) - self.assertTrue(([1, 3] == output_details[0]['shape']).all()) + self.assertAllEqual([1, 3], output_details[0]['shape']) self.assertEqual((0., 0.), output_details[0]['quantization']) # Check inference of converted model. @@ -2172,7 +2103,7 @@ class FromKerasFile(TestModels, parameterized.TestCase): # Convert to TFLite model. converter = lite.TFLiteConverter.from_keras_model_file(self._keras_file) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) @@ -2182,22 +2113,22 @@ class FromKerasFile(TestModels, parameterized.TestCase): self.assertLen(input_details, 2) self.assertEndsWith(input_details[0]['name'], 'input_a') self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue(([1, 3] == input_details[0]['shape']).all()) + self.assertAllEqual([1, 3], input_details[0]['shape']) self.assertEqual((0., 0.), input_details[0]['quantization']) self.assertEndsWith(input_details[1]['name'], 'input_b') self.assertEqual(np.float32, input_details[1]['dtype']) - self.assertTrue(([1, 3] == input_details[1]['shape']).all()) + self.assertAllEqual([1, 3], input_details[1]['shape']) self.assertEqual((0., 0.), input_details[1]['quantization']) output_details = interpreter.get_output_details() self.assertLen(output_details, 2) self.assertEqual(np.float32, output_details[0]['dtype']) - self.assertTrue(([1, 4] == output_details[0]['shape']).all()) + self.assertAllEqual([1, 4], output_details[0]['shape']) self.assertEqual((0., 0.), output_details[0]['quantization']) self.assertEqual(np.float32, output_details[1]['dtype']) - self.assertTrue(([1, 4] == output_details[1]['shape']).all()) + self.assertAllEqual([1, 4], output_details[1]['shape']) self.assertEqual((0., 0.), output_details[1]['quantization']) def testFunctionalSequentialModel(self): @@ -2228,7 +2159,7 @@ class FromKerasFile(TestModels, parameterized.TestCase): # Convert to TFLite model. converter = lite.TFLiteConverter.from_keras_model_file(self._keras_file) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Check tensor details of converted model. interpreter = Interpreter(model_content=tflite_model) @@ -2238,13 +2169,13 @@ class FromKerasFile(TestModels, parameterized.TestCase): self.assertLen(input_details, 1) self.assertEndsWith(input_details[0]['name'], 'dense_input') self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue(([1, 3] == input_details[0]['shape']).all()) + self.assertAllEqual([1, 3], input_details[0]['shape']) self.assertEqual((0., 0.), input_details[0]['quantization']) output_details = interpreter.get_output_details() self.assertLen(output_details, 1) self.assertEqual(np.float32, output_details[0]['dtype']) - self.assertTrue(([1, 3, 3] == output_details[0]['shape']).all()) + self.assertAllEqual([1, 3, 3], output_details[0]['shape']) self.assertEqual((0., 0.), output_details[0]['quantization']) # Check inference of converted model. @@ -2264,7 +2195,7 @@ class FromKerasFile(TestModels, parameterized.TestCase): converter = lite.TocoConverter.from_keras_model_file(self._keras_file) tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) # Ensure the model is able to load. interpreter = Interpreter(model_content=tflite_model) @@ -2286,7 +2217,7 @@ class FromKerasFile(TestModels, parameterized.TestCase): converter = lite.TocoConverter.from_keras_model_file(self._keras_file) converter._experimental_sparsify_model = True tflite_model = converter.convert() - self.assertTrue(tflite_model) + self.assertIsNotNone(tflite_model) class GrapplerTest(TestModels, parameterized.TestCase): @@ -2312,21 +2243,21 @@ class GrapplerTest(TestModels, parameterized.TestCase): interpreter.allocate_tensors() input_details = interpreter.get_input_details() - self.assertEqual(1, len(input_details)) + self.assertLen(input_details, 1) self.assertEqual('Placeholder', input_details[0]['name']) self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue(([3, 3] == input_details[0]['shape']).all()) + self.assertAllEqual([3, 3], input_details[0]['shape']) output_details = interpreter.get_output_details() - self.assertEqual(1, len(output_details)) + self.assertLen(output_details, 1) self.assertEqual('output', output_details[0]['name']) self.assertEqual(np.float32, output_details[0]['dtype']) - self.assertTrue(([3, 3] == output_details[0]['shape']).all()) + self.assertAllEqual([3, 3], output_details[0]['shape']) @parameterized.named_parameters( ('EnableMlirConverter', True), # enable mlir ('DisableMlirConverter', False)) # disable mlir - def testInputNodeIsNotFolded(self, enable_mlir): + def testInputNodeIsNotFolded(self, enable_mlir_converter): ops.disable_eager_execution() # Constant folding handles the tf.broadcast_to operation which was not # supported by the TFLite at the time this test was added. @@ -2340,7 +2271,7 @@ class GrapplerTest(TestModels, parameterized.TestCase): # Convert model. converter = lite.TFLiteConverter.from_session(sess, [in_tensor, y_const], [out_tensor]) - converter.experimental_new_converter = enable_mlir + converter.experimental_new_converter = enable_mlir_converter tflite_model = converter.convert() # Check values from converted model. @@ -2416,7 +2347,7 @@ class DefaultConverterAttrsTest(LiteTest): self.assertFalse(converter.change_concat_input_ranges) # Assert dropping control dependency is enabled by default. - self.assertTrue(converter.drop_control_dependency) + self.assertIsNotNone(converter.drop_control_dependency) # Assert dumping extra information is disabled by default. self.assertIsNone(converter.dump_graphviz_dir) diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py index c1b566ff8ad..714eb249ec9 100644 --- a/tensorflow/lite/python/lite_v2_test.py +++ b/tensorflow/lite/python/lite_v2_test.py @@ -58,14 +58,14 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): ('EnableMlirConverter', True), # enable mlir ('DisableMlirConverter', False)) # disable mlir @test_util.run_v2_only - def testFloat(self, enable_mlir): + def testFloat(self, enable_mlir_converter): root = self._getSimpleVariableModel() input_data = tf.constant(1., shape=[1]) concrete_func = root.f.get_concrete_function(input_data) # Convert model. converter = lite.TFLiteConverterV2.from_concrete_functions([concrete_func]) - converter.experimental_new_converter = enable_mlir + converter.experimental_new_converter = enable_mlir_converter tflite_model = converter.convert() # Check values from converted model. @@ -142,7 +142,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): self.assertIn('can only convert a single ConcreteFunction', str(error.exception)) - def _getCalibrationQuantizeModel(self): + def _getIntegerQuantizeModel(self): np.random.seed(0) root = tracking.AutoTrackable() @@ -167,23 +167,23 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): ('EnableMlirQuantizer', True), # enable mlir quantizer ('DisableMlirQuantizer', False)) # disable mlir quantizer def testPostTrainingCalibrateAndQuantize(self, mlir_quantizer): - func, calibration_gen = self._getCalibrationQuantizeModel() + func, calibration_gen = self._getIntegerQuantizeModel() # Convert float model. float_converter = lite.TFLiteConverterV2.from_concrete_functions([func]) - float_tflite = float_converter.convert() - self.assertTrue(float_tflite) + float_tflite_model = float_converter.convert() + self.assertIsNotNone(float_tflite_model) # Convert quantized model. quantized_converter = lite.TFLiteConverterV2.from_concrete_functions([func]) quantized_converter.optimizations = [lite.Optimize.DEFAULT] quantized_converter.representative_dataset = calibration_gen quantized_converter._experimental_new_quantizer = mlir_quantizer - quantized_tflite = quantized_converter.convert() - self.assertTrue(quantized_tflite) + quantized_tflite_model = quantized_converter.convert() + self.assertIsNotNone(quantized_tflite_model) # The default input and output types should be float. - interpreter = Interpreter(model_content=quantized_tflite) + interpreter = Interpreter(model_content=quantized_tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() self.assertLen(input_details, 1) @@ -193,7 +193,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): self.assertEqual(np.float32, output_details[0]['dtype']) # Ensure that the quantized weights tflite model is smaller. - self.assertLess(len(quantized_tflite), len(float_tflite)) + self.assertLess(len(quantized_tflite_model), len(float_tflite_model)) @parameterized.named_parameters( ('_INT8InputOutput', lite.constants.INT8), @@ -202,7 +202,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): @test_util.run_v2_only def testInvalidPostTrainingDynamicRangeQuantization( self, inference_input_output_type): - func, _ = self._getCalibrationQuantizeModel() + func, _ = self._getIntegerQuantizeModel() # Convert float model. converter = lite.TFLiteConverterV2.from_concrete_functions([func]) @@ -228,7 +228,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8)) def testPostTrainingIntegerAllowFloatQuantization( self, inference_input_output_type): - func, calibration_gen = self._getCalibrationQuantizeModel() + func, calibration_gen = self._getIntegerQuantizeModel() # Convert float model. converter = lite.TFLiteConverterV2.from_concrete_functions([func]) @@ -242,7 +242,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): quantized_converter.inference_input_type = inference_input_output_type quantized_converter.inference_output_type = inference_input_output_type quantized_tflite_model = quantized_converter.convert() - self.assertTrue(quantized_tflite_model) + self.assertIsNotNone(quantized_tflite_model) interpreter = Interpreter(model_content=quantized_tflite_model) interpreter.allocate_tensors() @@ -259,7 +259,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): self.assertLess(len(quantized_tflite_model), len(tflite_model)) def testPostTrainingIntegerAllowFloatQuantizationINT16InputOutput(self): - func, calibration_gen = self._getCalibrationQuantizeModel() + func, calibration_gen = self._getIntegerQuantizeModel() # Convert float model. converter = lite.TFLiteConverterV2.from_concrete_functions([func]) @@ -279,7 +279,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): quantized_converter.inference_input_type = inference_input_output_type quantized_converter.inference_output_type = inference_input_output_type quantized_tflite_model = quantized_converter.convert() - self.assertTrue(quantized_tflite_model) + self.assertIsNotNone(quantized_tflite_model) interpreter = Interpreter(model_content=quantized_tflite_model) interpreter.allocate_tensors() @@ -299,7 +299,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): # In this test we check that when we do 16x8 post-training # quantization and set inference_input(output)_type to # constants.INT8, we have an error. - func, calibration_gen = self._getCalibrationQuantizeModel() + func, calibration_gen = self._getIntegerQuantizeModel() # Convert quantized model. quantized_converter = lite.TFLiteConverterV2.from_concrete_functions([func]) @@ -330,7 +330,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): inference_input_output_type, use_target_ops_flag, quantization_16x8): - func, calibration_gen = self._getCalibrationQuantizeModel() + func, calibration_gen = self._getIntegerQuantizeModel() # Convert float model. converter = lite.TFLiteConverterV2.from_concrete_functions([func]) @@ -357,7 +357,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): quantized_converter.inference_input_type = inference_input_output_type quantized_converter.inference_output_type = inference_input_output_type quantized_tflite_model = quantized_converter.convert() - self.assertTrue(quantized_tflite_model) + self.assertIsNotNone(quantized_tflite_model) interpreter = Interpreter(model_content=quantized_tflite_model) interpreter.allocate_tensors() @@ -374,12 +374,12 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): self.assertLess(len(quantized_tflite_model), len(tflite_model)) def testCalibrateAndQuantizeBuiltinInt16(self): - func, calibration_gen = self._getCalibrationQuantizeModel() + func, calibration_gen = self._getIntegerQuantizeModel() # Convert float model. float_converter = lite.TFLiteConverterV2.from_concrete_functions([func]) - float_tflite = float_converter.convert() - self.assertTrue(float_tflite) + float_tflite_model = float_converter.convert() + self.assertIsNotNone(float_tflite_model) converter = lite.TFLiteConverterV2.from_concrete_functions([func]) # TODO(b/156309549): We should add INT16 to the builtin types. @@ -389,13 +389,13 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): converter.representative_dataset = calibration_gen converter._experimental_calibrate_only = True calibrated_tflite = converter.convert() - quantized_tflite = mlir_quantize(calibrated_tflite, - inference_type=_types_pb2.QUANTIZED_INT16) + quantized_tflite_model = mlir_quantize( + calibrated_tflite, inference_type=_types_pb2.QUANTIZED_INT16) - self.assertTrue(quantized_tflite) + self.assertIsNotNone(quantized_tflite_model) # The default input and output types should be float. - interpreter = Interpreter(model_content=quantized_tflite) + interpreter = Interpreter(model_content=quantized_tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() self.assertLen(input_details, 1) @@ -405,7 +405,7 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): self.assertEqual(np.float32, output_details[0]['dtype']) # Ensure that the quantized weights tflite model is smaller. - self.assertLess(len(quantized_tflite), len(float_tflite)) + self.assertLess(len(quantized_tflite_model), len(float_tflite_model)) def _getTrainingTimeQuantizedModel(self): @@ -454,17 +454,17 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): model = self._getTrainingTimeQuantizedModel() float_converter = lite.TFLiteConverterV2.from_keras_model(model) - float_tflite = float_converter.convert() - self.assertTrue(float_tflite) + float_tflite_model = float_converter.convert() + self.assertIsNotNone(float_tflite_model) quantized_converter = lite.TFLiteConverterV2.from_keras_model(model) quantized_converter.optimizations = [lite.Optimize.DEFAULT] quantized_converter.inference_input_type = inference_input_output_type quantized_converter.inference_output_type = inference_input_output_type - quantized_tflite = quantized_converter.convert() - self.assertTrue(quantized_tflite) + quantized_tflite_model = quantized_converter.convert() + self.assertIsNotNone(quantized_tflite_model) - interpreter = Interpreter(model_content=quantized_tflite) + interpreter = Interpreter(model_content=quantized_tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() self.assertLen(input_details, 1) @@ -476,12 +476,12 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): output_details[0]['dtype']) # Ensure that the quantized tflite model is smaller. - self.assertLess(len(quantized_tflite), len(float_tflite)) + self.assertLess(len(quantized_tflite_model), len(float_tflite_model)) @test_util.run_v2_only def testNewQuantizer(self): """Test the model quantized by the new converter.""" - func, calibration_gen = self._getCalibrationQuantizeModel() + func, calibration_gen = self._getIntegerQuantizeModel() quantized_converter = lite.TFLiteConverterV2.from_concrete_functions([func]) quantized_converter.target_spec.supported_ops = [ @@ -502,13 +502,13 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): np.random.uniform(-1, 1, size=(1, 5, 5, 3)).astype(np.float32)) old_value = self._evaluateTFLiteModel(old_tflite, [input_data]) new_value = self._evaluateTFLiteModel(new_tflite, [input_data]) - np.testing.assert_almost_equal(old_value, new_value, 1) + self.assertAllClose(old_value, new_value, atol=1e-01) @parameterized.named_parameters( ('EnableMlirConverter', True), # enable mlir ('DisableMlirConverter', False)) # disable mlir @test_util.run_v2_only - def testEmbeddings(self, enable_mlir): + def testEmbeddings(self, enable_mlir_converter): """Test model with embeddings.""" input_data = tf.constant( np.array(np.random.random_sample((20)), dtype=np.int32)) @@ -534,13 +534,13 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): # Convert model. converter = lite.TFLiteConverterV2.from_concrete_functions([concrete_func]) - converter.experimental_new_converter = enable_mlir + converter.experimental_new_converter = enable_mlir_converter tflite_model = converter.convert() # Check values from converted model. expected_value = root.func(input_data) actual_value = self._evaluateTFLiteModel(tflite_model, [input_data]) - np.testing.assert_almost_equal(expected_value.numpy(), actual_value[0], 5) + self.assertAllClose(expected_value.numpy(), actual_value[0], atol=1e-05) @test_util.run_v2_only def testGraphDebugInfo(self): @@ -594,7 +594,7 @@ class FromSavedModelTest(lite_v2_test_util.ModelTest): self.assertLen(input_details, 2) self.assertStartsWith(input_details[0]['name'], 'inputA') self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == input_details[0]['shape']).all()) + self.assertAllEqual([1, 16, 16, 3], input_details[0]['shape']) self.assertEqual((0., 0.), input_details[0]['quantization']) self.assertStartsWith( @@ -602,14 +602,14 @@ class FromSavedModelTest(lite_v2_test_util.ModelTest): 'inputB', ) self.assertEqual(np.float32, input_details[1]['dtype']) - self.assertTrue(([1, 16, 16, 3] == input_details[1]['shape']).all()) + self.assertTrue([1, 16, 16, 3], input_details[1]['shape']) self.assertEqual((0., 0.), input_details[1]['quantization']) output_details = interpreter.get_output_details() self.assertLen(output_details, 1) self.assertStartsWith(output_details[0]['name'], 'add') self.assertEqual(np.float32, output_details[0]['dtype']) - self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) + self.assertTrue([1, 16, 16, 3], output_details[0]['shape']) self.assertEqual((0., 0.), output_details[0]['quantization']) @test_util.run_v2_only @@ -715,7 +715,6 @@ class FromSavedModelTest(lite_v2_test_util.ModelTest): @test_util.run_v2_only def testNoConcreteFunctionModel(self): root = self._getMultiFunctionModel() - input_data = tf.constant(1., shape=[1]) save_dir = os.path.join(self.get_temp_dir(), 'saved_model') save(root, save_dir) @@ -836,7 +835,7 @@ class FromKerasModelTest(lite_v2_test_util.ModelTest): expected_value = model.predict(input_data) actual_value = self._evaluateTFLiteModel(tflite_model, input_data) for tf_result, tflite_result in zip(expected_value, actual_value): - np.testing.assert_almost_equal(tf_result, tflite_result, 5) + self.assertAllClose(tf_result, tflite_result, atol=1e-05) @test_util.run_v2_only def testGraphDebugInfo(self): @@ -919,7 +918,7 @@ class ControlFlowTest(lite_v2_test_util.ModelTest): expected_value = concrete_func(**input_data) actual_value = self._evaluateTFLiteModel( tflite_model, [input_data['x'], input_data['b']])[0] - np.testing.assert_almost_equal(expected_value.numpy(), actual_value) + self.assertAllClose(expected_value, actual_value) @test_util.run_v2_only def testStaticRnn(self): @@ -945,7 +944,7 @@ class ControlFlowTest(lite_v2_test_util.ModelTest): expected_value = concrete_func(input_data)[0] actual_value = self._evaluateTFLiteModel(tflite_model, [input_data]) for expected, actual in zip(expected_value, actual_value): - np.testing.assert_almost_equal(expected.numpy(), actual) + self.assertAllClose(expected, actual) @test_util.run_v2_only def testWhileLoop(self): @@ -973,7 +972,7 @@ class ControlFlowTest(lite_v2_test_util.ModelTest): # Check values from converted model. expected_value = concrete_func(input_data)[0] actual_value = self._evaluateTFLiteModel(tflite_model, [input_data])[0] - np.testing.assert_almost_equal(expected_value.numpy(), actual_value) + self.assertAllClose(expected_value, actual_value) @test_util.run_v2_only def testDynamicRnn(self): @@ -997,11 +996,9 @@ class ControlFlowTest(lite_v2_test_util.ModelTest): expected_value = concrete_func(input_data) actual_value = self._evaluateTFLiteModel(tflite_model, [input_data]) for expected, actual in zip(expected_value, actual_value): - if isinstance(expected, ops.EagerTensor): - expected = expected.numpy() - else: - expected = expected.c.numpy() - np.testing.assert_almost_equal(expected, actual) + if not isinstance(expected, ops.EagerTensor): + expected = expected.c + self.assertAllClose(expected, actual) @parameterized.named_parameters(('LSTM', recurrent_v2.LSTM), ('SimpleRNN', recurrent.SimpleRNN), @@ -1025,7 +1022,7 @@ class ControlFlowTest(lite_v2_test_util.ModelTest): # Check values from converted model. expected_value = model.predict(input_data) - np.testing.assert_almost_equal(expected_value, actual_value, decimal=5) + self.assertAllClose(expected_value, actual_value, atol=1e-05) @parameterized.named_parameters(('LSTM', recurrent_v2.LSTM), ('SimpleRNN', recurrent.SimpleRNN), @@ -1046,7 +1043,7 @@ class ControlFlowTest(lite_v2_test_util.ModelTest): # Check values from converted model. expected_value = model.predict(input_data) - np.testing.assert_almost_equal(expected_value, actual_value, decimal=5) + self.assertAllClose(expected_value, actual_value, atol=1e-05) @test_util.run_v2_only def testKerasBidirectionalRNN(self): @@ -1069,7 +1066,7 @@ class ControlFlowTest(lite_v2_test_util.ModelTest): # Check values from converted model. expected_value = model.predict(input_data) - np.testing.assert_almost_equal(expected_value, actual_value, decimal=5) + self.assertAllClose(expected_value, actual_value, atol=1e-05) class GrapplerTest(lite_v2_test_util.ModelTest): @@ -1096,14 +1093,14 @@ class GrapplerTest(lite_v2_test_util.ModelTest): # Check values from converted model. expected_value = root.f(input_data) - actual_value = self._evaluateTFLiteModel(tflite_model, [input_data]) - np.testing.assert_almost_equal(expected_value.numpy(), actual_value[0]) + actual_value = self._evaluateTFLiteModel(tflite_model, [input_data])[0] + self.assertAllClose(expected_value, actual_value) # Enable hybrid quantization, same result converter.optimizations = [lite.Optimize.DEFAULT] - hybrid_tflite_model = converter.convert() - actual_value = self._evaluateTFLiteModel(hybrid_tflite_model, [input_data]) - np.testing.assert_almost_equal(expected_value.numpy(), actual_value[0]) + tflite_model = converter.convert() + actual_value = self._evaluateTFLiteModel(tflite_model, [input_data])[0] + self.assertAllClose(expected_value, actual_value) class UnknownShapes(lite_v2_test_util.ModelTest): @@ -1128,15 +1125,16 @@ class UnknownShapes(lite_v2_test_util.ModelTest): # Check values from converted model. expected_value = concrete_func(input_data) actual_value = self._evaluateTFLiteModel( - tflite_model, [input_data], input_shapes=[([-1, 4], [10, 4])]) - np.testing.assert_almost_equal( - expected_value.numpy(), actual_value[0], decimal=6) + tflite_model, [input_data], input_shapes=[([-1, 4], [10, 4])])[0] + self.assertAllClose(expected_value, actual_value, atol=1e-06) + + def _getIntegerQuantizeModelWithUnknownShapes(self): + np.random.seed(0) - def _getQuantizedModel(self): - # Returns a model with tf.MatMul and unknown dimensions. @tf.function( input_signature=[tf.TensorSpec(shape=[None, 33], dtype=tf.float32)]) - def model(in_tensor): + def model(input_tensor): + """Define a model with tf.MatMul and unknown shapes.""" # We need the tensor to have more than 1024 elements for quantize_weights # to kick in. Thus, the [33, 33] shape. const_tensor = tf.constant( @@ -1145,12 +1143,14 @@ class UnknownShapes(lite_v2_test_util.ModelTest): dtype=tf.float32, name='inputB') - shape = tf.shape(in_tensor) + shape = tf.shape(input_tensor) fill = tf.transpose(tf.fill(shape, 1.)) - mult = tf.matmul(fill, in_tensor) + mult = tf.matmul(fill, input_tensor) return tf.matmul(mult, const_tensor) - concrete_func = model.get_concrete_function() + root = tracking.AutoTrackable() + root.f = model + concrete_func = root.f.get_concrete_function() def calibration_gen(): for batch in range(5, 20, 5): @@ -1161,7 +1161,7 @@ class UnknownShapes(lite_v2_test_util.ModelTest): @test_util.run_v2_only def testMatMulQuantize(self): - concrete_func, _ = self._getQuantizedModel() + concrete_func, _ = self._getIntegerQuantizeModelWithUnknownShapes() float_converter = lite.TFLiteConverterV2.from_concrete_functions( [concrete_func]) float_tflite_model = float_converter.convert() @@ -1177,14 +1177,15 @@ class UnknownShapes(lite_v2_test_util.ModelTest): input_details = quantized_interpreter.get_input_details() self.assertLen(input_details, 1) self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue((input_details[0]['shape_signature'] == [-1, 33]).all()) + self.assertAllEqual([-1, 33], input_details[0]['shape_signature']) # Ensure that the quantized weights tflite model is smaller. self.assertLess(len(quantized_tflite_model), len(float_tflite_model)) @test_util.run_v2_only def testMatMulCalibrateAndQuantize(self): - concrete_func, calibration_gen = self._getQuantizedModel() + concrete_func, calibration_gen = \ + self._getIntegerQuantizeModelWithUnknownShapes() float_converter = lite.TFLiteConverterV2.from_concrete_functions( [concrete_func]) float_tflite_model = float_converter.convert() @@ -1201,7 +1202,7 @@ class UnknownShapes(lite_v2_test_util.ModelTest): input_details = quantized_interpreter.get_input_details() self.assertLen(input_details, 1) self.assertEqual(np.float32, input_details[0]['dtype']) - self.assertTrue((input_details[0]['shape_signature'] == [-1, 33]).all()) + self.assertAllEqual([-1, 33], input_details[0]['shape_signature']) # Ensure that the quantized weights tflite model is smaller. self.assertLess(len(quantized_tflite_model), len(float_tflite_model)) @@ -1228,9 +1229,8 @@ class UnknownShapes(lite_v2_test_util.ModelTest): expected_value = concrete_func(input_data_1, input_data_2) actual_value = self._evaluateTFLiteModel( tflite_model, [input_data_1, input_data_2], - input_shapes=[([-1, 256, 256], [1, 256, 256])]) - np.testing.assert_almost_equal( - expected_value.numpy(), actual_value[0], decimal=4) + input_shapes=[([-1, 256, 256], [1, 256, 256])])[0] + self.assertAllClose(expected_value, actual_value, atol=4) def testSizeInvalid(self): diff --git a/tensorflow/lite/python/lite_v2_test_util.py b/tensorflow/lite/python/lite_v2_test_util.py index d8f764711cd..1493b240913 100644 --- a/tensorflow/lite/python/lite_v2_test_util.py +++ b/tensorflow/lite/python/lite_v2_test_util.py @@ -77,6 +77,7 @@ class ModelTest(test_util.TensorFlowTestCase, parameterized.TestCase): def _getMultiFunctionModel(self): class BasicModel(tracking.AutoTrackable): + """Basic model with multiple functions.""" def __init__(self): self.y = None diff --git a/tensorflow/lite/python/util.py b/tensorflow/lite/python/util.py index 720e53de509..79d2775d1dc 100644 --- a/tensorflow/lite/python/util.py +++ b/tensorflow/lite/python/util.py @@ -48,16 +48,16 @@ from tensorflow.python.training.saver import export_meta_graph as _export_meta_g _MAP_TF_TO_TFLITE_TYPES = { dtypes.float32: _types_pb2.FLOAT, dtypes.float16: _types_pb2.FLOAT16, - dtypes.float64: _types_pb2.FLOAT64, dtypes.int32: _types_pb2.INT32, + dtypes.uint8: _types_pb2.QUANTIZED_UINT8, dtypes.int64: _types_pb2.INT64, dtypes.string: _types_pb2.STRING, - dtypes.uint8: _types_pb2.QUANTIZED_UINT8, - dtypes.int8: _types_pb2.INT8, + dtypes.bool: _types_pb2.BOOL, dtypes.int16: _types_pb2.QUANTIZED_INT16, dtypes.complex64: _types_pb2.COMPLEX64, + dtypes.int8: _types_pb2.INT8, + dtypes.float64: _types_pb2.FLOAT64, dtypes.complex128: _types_pb2.COMPLEX128, - dtypes.bool: _types_pb2.BOOL, } _MAP_TFLITE_ENUM_TO_TF_TYPES = { @@ -72,6 +72,7 @@ _MAP_TFLITE_ENUM_TO_TF_TYPES = { 8: dtypes.complex64, 9: dtypes.int8, 10: dtypes.float64, + 11: dtypes.complex128, } _TFLITE_FILE_IDENTIFIER = b"TFL3" @@ -113,7 +114,7 @@ def _convert_tflite_enum_type_to_tf_type(tflite_enum_type): tf_type = _MAP_TFLITE_ENUM_TO_TF_TYPES.get(tflite_enum_type) if tf_type is None: raise ValueError( - "Unsupported enum {}. The valid map of enum to tf.dtypes is : {}" + "Unsupported enum {}. The valid map of enum to tf types is : {}" .format(tflite_enum_type, _MAP_TFLITE_ENUM_TO_TF_TYPES)) return tf_type diff --git a/tensorflow/lite/python/util_test.py b/tensorflow/lite/python/util_test.py index 0e9cbc1e58a..820cda4c7d6 100644 --- a/tensorflow/lite/python/util_test.py +++ b/tensorflow/lite/python/util_test.py @@ -42,27 +42,34 @@ from tensorflow.python.platform import test class UtilTest(test_util.TensorFlowTestCase): def testConvertDtype(self): - self.assertEqual( - util.convert_dtype_to_tflite_type(lite_constants.FLOAT), - _types_pb2.FLOAT) self.assertEqual( util.convert_dtype_to_tflite_type(dtypes.float32), _types_pb2.FLOAT) + self.assertEqual( + util.convert_dtype_to_tflite_type(dtypes.float16), _types_pb2.FLOAT16) self.assertEqual( util.convert_dtype_to_tflite_type(dtypes.int32), _types_pb2.INT32) + self.assertEqual( + util.convert_dtype_to_tflite_type(dtypes.uint8), + _types_pb2.QUANTIZED_UINT8) self.assertEqual( util.convert_dtype_to_tflite_type(dtypes.int64), _types_pb2.INT64) self.assertEqual( util.convert_dtype_to_tflite_type(dtypes.string), _types_pb2.STRING) self.assertEqual( - util.convert_dtype_to_tflite_type(dtypes.uint8), - _types_pb2.QUANTIZED_UINT8) + util.convert_dtype_to_tflite_type(dtypes.bool), _types_pb2.BOOL) + self.assertEqual( + util.convert_dtype_to_tflite_type(dtypes.int16), + _types_pb2.QUANTIZED_INT16) self.assertEqual( util.convert_dtype_to_tflite_type(dtypes.complex64), _types_pb2.COMPLEX64) self.assertEqual( - util.convert_dtype_to_tflite_type(dtypes.half), _types_pb2.FLOAT16) + util.convert_dtype_to_tflite_type(dtypes.int8), _types_pb2.INT8) self.assertEqual( - util.convert_dtype_to_tflite_type(dtypes.bool), _types_pb2.BOOL) + util.convert_dtype_to_tflite_type(dtypes.float64), _types_pb2.FLOAT64) + self.assertEqual( + util.convert_dtype_to_tflite_type(dtypes.complex128), + _types_pb2.COMPLEX128) def testConvertEnumToDtype(self): self.assertEqual( @@ -81,17 +88,19 @@ class UtilTest(test_util.TensorFlowTestCase): self.assertEqual(util._convert_tflite_enum_type_to_tf_type(9), dtypes.int8) self.assertEqual( util._convert_tflite_enum_type_to_tf_type(10), dtypes.float64) - with self.assertRaises(ValueError) as error: - util._convert_tflite_enum_type_to_tf_type(11) self.assertEqual( - "Unsupported enum 11. The valid map of enum to tf.dtypes is : " + util._convert_tflite_enum_type_to_tf_type(11), dtypes.complex128) + with self.assertRaises(ValueError) as error: + util._convert_tflite_enum_type_to_tf_type(20) + self.assertEqual( + "Unsupported enum 20. The valid map of enum to tf types is : " "{0: tf.float32, 1: tf.float16, 2: tf.int32, 3: tf.uint8, 4: tf.int64, " "5: tf.string, 6: tf.bool, 7: tf.int16, 8: tf.complex64, 9: tf.int8, " - "10: tf.float64}", str(error.exception)) + "10: tf.float64, 11: tf.complex128}", str(error.exception)) def testTensorName(self): with ops.Graph().as_default(): - in_tensor = array_ops.placeholder(shape=[4], dtype=dtypes.float32) + in_tensor = array_ops.placeholder(dtype=dtypes.float32, shape=[4]) out_tensors = array_ops.split( value=in_tensor, num_or_size_splits=[1, 1, 1, 1], axis=0) @@ -103,7 +112,7 @@ class UtilTest(test_util.TensorFlowTestCase): @test_util.enable_control_flow_v2 def testRemoveLowerUsingSwitchMerge(self): with ops.Graph().as_default(): - i = array_ops.placeholder(shape=(), dtype=dtypes.int32) + i = array_ops.placeholder(dtype=dtypes.int32, shape=()) c = lambda i: math_ops.less(i, 10) b = lambda i: math_ops.add(i, 1) control_flow_ops.while_loop(c, b, [i]) @@ -116,7 +125,7 @@ class UtilTest(test_util.TensorFlowTestCase): if node.op == "While" or node.op == "StatelessWhile": if not node.attr["_lower_using_switch_merge"].b: lower_using_switch_merge_is_removed = True - self.assertEqual(lower_using_switch_merge_is_removed, True) + self.assertTrue(lower_using_switch_merge_is_removed) def testConvertBytes(self): source, header = util.convert_bytes_to_c_source( @@ -154,7 +163,7 @@ class TensorFunctionsTest(test_util.TensorFlowTestCase): def testGetTensorsValid(self): with ops.Graph().as_default(): in_tensor = array_ops.placeholder( - shape=[1, 16, 16, 3], dtype=dtypes.float32) + dtype=dtypes.float32, shape=[1, 16, 16, 3]) _ = in_tensor + in_tensor sess = session.Session() @@ -164,7 +173,7 @@ class TensorFunctionsTest(test_util.TensorFlowTestCase): def testGetTensorsInvalid(self): with ops.Graph().as_default(): in_tensor = array_ops.placeholder( - shape=[1, 16, 16, 3], dtype=dtypes.float32) + dtype=dtypes.float32, shape=[1, 16, 16, 3]) _ = in_tensor + in_tensor sess = session.Session() @@ -175,52 +184,51 @@ class TensorFunctionsTest(test_util.TensorFlowTestCase): def testSetTensorShapeValid(self): with ops.Graph().as_default(): - tensor = array_ops.placeholder(shape=[None, 3, 5], dtype=dtypes.float32) - self.assertEqual([None, 3, 5], tensor.shape.as_list()) + tensor = array_ops.placeholder(dtype=dtypes.float32, shape=[None, 3, 5]) + self.assertAllEqual([None, 3, 5], tensor.shape) util.set_tensor_shapes([tensor], {"Placeholder": [5, 3, 5]}) - self.assertEqual([5, 3, 5], tensor.shape.as_list()) + self.assertAllEqual([5, 3, 5], tensor.shape) def testSetTensorShapeNoneValid(self): with ops.Graph().as_default(): tensor = array_ops.placeholder(dtype=dtypes.float32) - self.assertEqual(None, tensor.shape) util.set_tensor_shapes([tensor], {"Placeholder": [1, 3, 5]}) - self.assertEqual([1, 3, 5], tensor.shape.as_list()) + self.assertAllEqual([1, 3, 5], tensor.shape) def testSetTensorShapeArrayInvalid(self): # Tests set_tensor_shape where the tensor name passed in doesn't exist. with ops.Graph().as_default(): - tensor = array_ops.placeholder(shape=[None, 3, 5], dtype=dtypes.float32) - self.assertEqual([None, 3, 5], tensor.shape.as_list()) + tensor = array_ops.placeholder(dtype=dtypes.float32, shape=[None, 3, 5]) + self.assertAllEqual([None, 3, 5], tensor.shape) with self.assertRaises(ValueError) as error: util.set_tensor_shapes([tensor], {"invalid-input": [5, 3, 5]}) self.assertEqual( "Invalid tensor 'invalid-input' found in tensor shapes map.", str(error.exception)) - self.assertEqual([None, 3, 5], tensor.shape.as_list()) + self.assertAllEqual([None, 3, 5], tensor.shape) def testSetTensorShapeDimensionInvalid(self): # Tests set_tensor_shape where the shape passed in is incompatible. with ops.Graph().as_default(): - tensor = array_ops.placeholder(shape=[None, 3, 5], dtype=dtypes.float32) - self.assertEqual([None, 3, 5], tensor.shape.as_list()) + tensor = array_ops.placeholder(dtype=dtypes.float32, shape=[None, 3, 5]) + self.assertAllEqual([None, 3, 5], tensor.shape) with self.assertRaises(ValueError) as error: util.set_tensor_shapes([tensor], {"Placeholder": [1, 5, 5]}) self.assertIn("The shape of tensor 'Placeholder' cannot be changed", str(error.exception)) - self.assertEqual([None, 3, 5], tensor.shape.as_list()) + self.assertAllEqual([None, 3, 5], tensor.shape) def testSetTensorShapeEmpty(self): with ops.Graph().as_default(): - tensor = array_ops.placeholder(shape=[None, 3, 5], dtype=dtypes.float32) - self.assertEqual([None, 3, 5], tensor.shape.as_list()) + tensor = array_ops.placeholder(dtype=dtypes.float32, shape=[None, 3, 5]) + self.assertAllEqual([None, 3, 5], tensor.shape) util.set_tensor_shapes([tensor], {}) - self.assertEqual([None, 3, 5], tensor.shape.as_list()) + self.assertAllEqual([None, 3, 5], tensor.shape) def _generate_integer_tflite_model(): @@ -355,7 +363,7 @@ class UtilModifyIntegerQuantizedModelIOTypeTest( output_io_data = _run_tflite_inference(model_io, in_tftype, out_tftype) # Validate that both the outputs are the same - self.assertTrue(np.allclose(output_data, output_io_data, atol=1.0)) + self.assertAllClose(output_data, output_io_data, atol=1.0) if __name__ == "__main__": From fc424cb9c535c0bca6f69437f7a9ec6f0c084b0f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 3 Aug 2020 19:01:27 -0700 Subject: [PATCH 2035/2522] Internal change PiperOrigin-RevId: 324730398 Change-Id: I9c45c92bac9702ee8ef18599e2f85402bd2f0cac --- .../optimizer_v2/gradient_descent_test.py | 658 +++++++++--------- 1 file changed, 333 insertions(+), 325 deletions(-) diff --git a/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py b/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py index 56f0b217578..0f25beacc9a 100644 --- a/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py +++ b/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py @@ -37,9 +37,9 @@ from tensorflow.python.ops import variables from tensorflow.python.platform import test -@combinations.generate(combinations.combine(mode=["graph", "eager"])) class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasic(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: var0 = variables.Variable([1.0, 2.0], dtype=dtype) @@ -88,6 +88,7 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): [3.0 - 3.0 * 0.01 - 2.0 * 0.01, 4.0 - 3.0 * 0.01 - 2.0 * 0.01], self.evaluate(var1)) + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasicWithLearningRateDecay(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: learning_rate = 3.0 @@ -95,6 +96,7 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): sgd = gradient_descent.SGD(learning_rate=learning_rate, decay=decay) self._test_basic_sgd_with_learning_rate_decay(sgd, dtype) + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasicWithLearningRateInverseTimeDecay(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: learning_rate = learning_rate_schedule.InverseTimeDecay( @@ -102,6 +104,7 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): sgd = gradient_descent.SGD(learning_rate=learning_rate) self._test_basic_sgd_with_learning_rate_decay(sgd, dtype) + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasicWithLearningRateInverseTimeDecaySerializeAndDeserialize(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: learning_rate = learning_rate_schedule.InverseTimeDecay( @@ -110,6 +113,7 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): sgd = gradient_descent.SGD.from_config(sgd.get_config()) self._test_basic_sgd_with_learning_rate_decay(sgd, dtype) + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasicCallableParams(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: var0 = variables.Variable([1.0, 2.0], dtype=dtype) @@ -128,6 +132,7 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], self.evaluate(var1)) + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testMinimizeResourceVariable(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: var0 = variables.Variable([[1.0, 2.0]], dtype=dtype) @@ -145,26 +150,28 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): self.assertAllCloseAccordingToType([3.0 - 1.0], self.evaluate(var1)) def testMinimizeSparseResourceVariable(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - var0 = variables.Variable([[1.0, 2.0]], dtype=dtype) - var1 = variables.Variable([3.0], dtype=dtype) - x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with ops.Graph().as_default(): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + var0 = variables.Variable([[1.0, 2.0]], dtype=dtype) + var1 = variables.Variable([3.0], dtype=dtype) + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) - def loss(): - pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) # pylint: disable=cell-var-from-loop - pred += var1 # pylint: disable=cell-var-from-loop - return pred * pred + def loss(): + pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) # pylint: disable=cell-var-from-loop + pred += var1 # pylint: disable=cell-var-from-loop + return pred * pred - sgd_op = gradient_descent.SGD(1.0).minimize(loss, [var0, var1]) - self.evaluate(variables.global_variables_initializer()) - # Run 1 step of sgd - self.evaluate(sgd_op) - # Validate updated params - np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0 - np_grad = 2 * np_pred - self.assertAllCloseAccordingToType( - [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]], self.evaluate(var0)) - self.assertAllCloseAccordingToType([3.0 - np_grad], self.evaluate(var1)) + sgd_op = gradient_descent.SGD(1.0).minimize(loss, [var0, var1]) + self.evaluate(variables.global_variables_initializer()) + # Run 1 step of sgd + self.evaluate(sgd_op) + # Validate updated params + np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0 + np_grad = 2 * np_pred + self.assertAllCloseAccordingToType( + [[1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0]], self.evaluate(var0)) + self.assertAllCloseAccordingToType([3.0 - np_grad], self.evaluate(var1)) def testTensorLearningRate(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: @@ -185,68 +192,71 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): self.evaluate(var1)) def testGradWrtRef(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - opt = gradient_descent.SGD(3.0) - values = [1.0, 3.0] - vars_ = [variables.Variable([v], dtype=dtype) for v in values] - loss = lambda: vars_[0] + vars_[1] # pylint: disable=cell-var-from-loop - grads_and_vars = opt._compute_gradients(loss, vars_) - self.evaluate(variables.global_variables_initializer()) - for grad, _ in grads_and_vars: - self.assertAllCloseAccordingToType([1.0], self.evaluate(grad)) + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with ops.Graph().as_default(): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + opt = gradient_descent.SGD(3.0) + values = [1.0, 3.0] + vars_ = [variables.Variable([v], dtype=dtype) for v in values] + loss = lambda: vars_[0] + vars_[1] # pylint: disable=cell-var-from-loop + grads_and_vars = opt._compute_gradients(loss, vars_) + self.evaluate(variables.global_variables_initializer()) + for grad, _ in grads_and_vars: + self.assertAllCloseAccordingToType([1.0], self.evaluate(grad)) def testSparseBasic(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - var0 = variables.Variable([[1.0], [2.0]], dtype=dtype) - var1 = variables.Variable([[3.0], [4.0]], dtype=dtype) - grads0 = ops.IndexedSlices( - constant_op.constant([0.1], shape=[1, 1], dtype=dtype), - constant_op.constant([0]), constant_op.constant([2, 1])) - grads1 = ops.IndexedSlices( - constant_op.constant([0.01], shape=[1, 1], dtype=dtype), - constant_op.constant([1]), constant_op.constant([2, 1])) - sgd_op = gradient_descent.SGD(3.0).apply_gradients( - zip([grads0, grads1], [var0, var1])) - self.evaluate(variables.global_variables_initializer()) - # Run 1 step of sgd - self.evaluate(sgd_op) - # Validate updated params - self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]], - self.evaluate(var0)) - self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]], - self.evaluate(var1)) + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with ops.Graph().as_default(): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + var0 = variables.Variable([[1.0], [2.0]], dtype=dtype) + var1 = variables.Variable([[3.0], [4.0]], dtype=dtype) + grads0 = ops.IndexedSlices( + constant_op.constant([0.1], shape=[1, 1], dtype=dtype), + constant_op.constant([0]), constant_op.constant([2, 1])) + grads1 = ops.IndexedSlices( + constant_op.constant([0.01], shape=[1, 1], dtype=dtype), + constant_op.constant([1]), constant_op.constant([2, 1])) + sgd_op = gradient_descent.SGD(3.0).apply_gradients( + zip([grads0, grads1], [var0, var1])) + self.evaluate(variables.global_variables_initializer()) + # Run 1 step of sgd + self.evaluate(sgd_op) + # Validate updated params + self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]], + self.evaluate(var0)) + self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]], + self.evaluate(var1)) def testSparseBasicWithLearningRateDecay(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - var0 = variables.Variable([[1.0], [2.0]], dtype=dtype) - var1 = variables.Variable([[3.0], [4.0]], dtype=dtype) - grads0 = ops.IndexedSlices( - constant_op.constant([0.1], shape=[1, 1], dtype=dtype), - constant_op.constant([0]), constant_op.constant([2, 1])) - grads1 = ops.IndexedSlices( - constant_op.constant([0.01], shape=[1, 1], dtype=dtype), - constant_op.constant([1]), constant_op.constant([2, 1])) + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with ops.Graph().as_default(): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + var0 = variables.Variable([[1.0], [2.0]], dtype=dtype) + var1 = variables.Variable([[3.0], [4.0]], dtype=dtype) + grads0 = ops.IndexedSlices( + constant_op.constant([0.1], shape=[1, 1], dtype=dtype), + constant_op.constant([0]), constant_op.constant([2, 1])) + grads1 = ops.IndexedSlices( + constant_op.constant([0.01], shape=[1, 1], dtype=dtype), + constant_op.constant([1]), constant_op.constant([2, 1])) + sgd_op = gradient_descent.SGD( + 3.0, decay=0.5).apply_gradients( + zip([grads0, grads1], [var0, var1])) + self.evaluate(variables.global_variables_initializer()) + # Run 2 steps of sgd + self.evaluate(sgd_op) + # Validate updated params + self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]], + self.evaluate(var0)) + self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]], + self.evaluate(var1)) - opt = gradient_descent.SGD(3.0, decay=0.5) - update_op = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(variables.global_variables_initializer()) - # Run 2 steps of sgd - self.evaluate(update_op) - # Validate updated params - self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1], [2.0]], - self.evaluate(var0)) - self.assertAllCloseAccordingToType([[3.0], [4.0 - 3.0 * 0.01]], - self.evaluate(var1)) - - if context.executing_eagerly(): - opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - else: - self.evaluate(update_op) - # Validate updated params - self.assertAllCloseAccordingToType([[1.0 - 3.0 * 0.1 - 2.0 * 0.1], [2.0]], - self.evaluate(var0)) - self.assertAllCloseAccordingToType( - [[3.0], [4.0 - 3.0 * 0.01 - 2.0 * 0.01]], self.evaluate(var1)) + self.evaluate(sgd_op) + # Validate updated params + self.assertAllCloseAccordingToType( + [[1.0 - 3.0 * 0.1 - 2.0 * 0.1], [2.0]], self.evaluate(var0)) + self.assertAllCloseAccordingToType( + [[3.0], [4.0 - 3.0 * 0.01 - 2.0 * 0.01]], self.evaluate(var1)) def testCapturingInDefunWhileExecutingEagerly(self): with context.eager_mode(): @@ -282,7 +292,6 @@ class GradientDescentOptimizerTest(test.TestCase, parameterized.TestCase): self.assertAllClose(self.evaluate(opt_3.lr), (0.1)) -@combinations.generate(combinations.combine(mode=["graph", "eager"])) class MomentumOptimizerTest(test.TestCase, parameterized.TestCase): def _update_nesterov_momentum_numpy(self, var, accum, g, lr, momentum): @@ -290,6 +299,7 @@ class MomentumOptimizerTest(test.TestCase, parameterized.TestCase): var += (accum * momentum - g * lr) return var, accum + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testBasic(self): for _, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): var0 = variables.Variable([1.0, 2.0], dtype=dtype, name="var0") @@ -350,97 +360,91 @@ class MomentumOptimizerTest(test.TestCase, parameterized.TestCase): ]), self.evaluate(var1)) def testNesterovMomentum(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - var0 = variables.Variable([1.0, 2.0], dtype=dtype, name="var0") - var1 = variables.Variable([3.0, 4.0], dtype=dtype, name="var1") - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - loss = lambda: 5 * var0 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop - mom_op = gradient_descent.SGD( - learning_rate=2.0, momentum=0.9, nesterov=True) - opt_op = mom_op.minimize(loss, [var0, var1]) - self.evaluate(variables.global_variables_initializer()) - for i in range(1, 5): - # already updated once in eager mode - if i != 1 and context.executing_eagerly(): - mom_op.minimize(loss, [var0, var1]) - else: + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with ops.Graph().as_default(): + for dtype in [dtypes.float32, dtypes.float64]: + var0 = variables.Variable([1.0, 2.0], dtype=dtype, name="var0") + var1 = variables.Variable([3.0, 4.0], dtype=dtype, name="var1") + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + loss = lambda: 5 * var0 * var0 + 3 * var1 # pylint: disable=cell-var-from-loop + mom_op = gradient_descent.SGD( + learning_rate=2.0, momentum=0.9, nesterov=True) + opt_op = mom_op.minimize(loss, [var0, var1]) + self.evaluate(variables.global_variables_initializer()) + for _ in range(1, 5): self.evaluate(opt_op) - var0_np, accum0_np = self._update_nesterov_momentum_numpy( - var0_np, accum0_np, var0_np * 10, 2.0, 0.9) - var1_np, accum1_np = self._update_nesterov_momentum_numpy( - var1_np, accum1_np, 3, 2.0, 0.9) - self.assertAllClose(var0_np, self.evaluate(var0)) - self.assertAllClose(var1_np, self.evaluate(var1)) + var0_np, accum0_np = self._update_nesterov_momentum_numpy( + var0_np, accum0_np, var0_np * 10, 2.0, 0.9) + var1_np, accum1_np = self._update_nesterov_momentum_numpy( + var1_np, accum1_np, 3, 2.0, 0.9) + self.assertAllClose(var0_np, self.evaluate(var0)) + self.assertAllClose(var1_np, self.evaluate(var1)) def testSparseNesterovMomentum(self): + # TODO(tanzheny, omalleyt): Fix test in eager mode. for dtype in [dtypes.float32, dtypes.float64]: - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - grads = [] - for t in range(1, 5): - grads.append(var0_np * 10) - var0_np, accum0_np = self._update_nesterov_momentum_numpy( - var0_np, accum0_np, var0_np * 10, 2.0, 0.9) - var1_np, accum1_np = self._update_nesterov_momentum_numpy( - var1_np, accum1_np, 3, 2.0, 0.9) - var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) - var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) - accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) - var0 = variables.Variable(var0_np, dtype=dtype, name="var0") - var1 = variables.Variable(var1_np, dtype=dtype, name="var1") - mom_op = gradient_descent.SGD( - learning_rate=2.0, momentum=0.9, nesterov=True) - grads_and_vars = [] - for t in range(1, 5): - y = ops.IndexedSlices( - constant_op.constant(grads[t - 1], dtype=dtype), - constant_op.constant([0, 1]), constant_op.constant([2])) - grads_and_vars.append([(y, var0), - (constant_op.constant([3.0, 3.0], - dtype=dtype), var1)]) - if not context.executing_eagerly(): - opt_update = [] + with ops.Graph().as_default(), self.cached_session() as sess: + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + grads = [] for t in range(1, 5): - opt_update.append(mom_op.apply_gradients(grads_and_vars[t - 1])) - self.evaluate(variables.global_variables_initializer()) - for t in range(1, 5): - if context.executing_eagerly(): - mom_op.apply_gradients(grads_and_vars[t - 1]) - else: - self.evaluate(opt_update[t - 1]) - var0_np, accum0_np = self._update_nesterov_momentum_numpy( - var0_np, accum0_np, var0_np * 10, 2.0, 0.9) - var1_np, accum1_np = self._update_nesterov_momentum_numpy( - var1_np, accum1_np, 3, 2.0, 0.9) - self.assertAllClose(var0_np, self.evaluate(var0)) - self.assertAllClose(var1_np, self.evaluate(var1)) + grads.append(var0_np * 10) + var0_np, accum0_np = self._update_nesterov_momentum_numpy( + var0_np, accum0_np, var0_np * 10, 2.0, 0.9) + var1_np, accum1_np = self._update_nesterov_momentum_numpy( + var1_np, accum1_np, 3, 2.0, 0.9) + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype) + var0 = variables.Variable(var0_np, dtype=dtype, name="var0") + var1 = variables.Variable(var1_np, dtype=dtype, name="var1") + mom_op = gradient_descent.SGD( + learning_rate=2.0, momentum=0.9, nesterov=True) + x_feed = array_ops.placeholder(dtype) + y_feed = ops.IndexedSlices(x_feed, constant_op.constant([0, 1]), + constant_op.constant([2])) + grads_and_vars = [(y_feed, var0), + (constant_op.constant([3.0, 3.0], dtype=dtype), var1)] + opt_update = mom_op.apply_gradients(grads_and_vars) + self.evaluate(variables.global_variables_initializer()) + for t in range(1, 5): + sess.run(opt_update, feed_dict={x_feed: grads[t - 1]}) + var0_np, accum0_np = self._update_nesterov_momentum_numpy( + var0_np, accum0_np, var0_np * 10, 2.0, 0.9) + var1_np, accum1_np = self._update_nesterov_momentum_numpy( + var1_np, accum1_np, 3, 2.0, 0.9) + self.assertAllClose(var0_np, self.evaluate(var0)) + self.assertAllClose(var1_np, self.evaluate(var1)) def testMinimizeSparseResourceVariable(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - var0 = variables.Variable([[1.0, 2.0]], dtype=dtype) + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with ops.Graph().as_default(): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + var0 = variables.Variable([[1.0, 2.0]], dtype=dtype) - # pylint: disable=cell-var-from-loop - def loss(): - x = constant_op.constant([[4.0], [5.0]], dtype=dtype) - pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) - return pred * pred + # pylint: disable=cell-var-from-loop + def loss(): + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) + return pred * pred - # pylint: enable=cell-var-from-loop + # pylint: enable=cell-var-from-loop - opt = gradient_descent.SGD(learning_rate=1.0, momentum=0.9) - sgd_op = opt.minimize(loss, [var0]) - self.evaluate(variables.global_variables_initializer()) - # Run 1 step of sgd - self.evaluate(sgd_op) - # Validate updated params - self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0)) + opt = gradient_descent.SGD(learning_rate=1.0, momentum=0.9) + sgd_op = opt.minimize(loss, [var0]) + self.evaluate(variables.global_variables_initializer()) + # Run 1 step of sgd + self.evaluate(sgd_op) + # Validate updated params + self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0)) + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testMinimizeWith2DIndicesForEmbeddingLookup(self): var0 = variables.Variable(array_ops.ones([2, 2])) @@ -454,140 +458,150 @@ class MomentumOptimizerTest(test.TestCase, parameterized.TestCase): self.assertAllCloseAccordingToType([[1, 1], [0, 0]], self.evaluate(var0)) def testTensorLearningRateAndMomentum(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - var0 = variables.Variable([1.0, 2.0], dtype=dtype) - var1 = variables.Variable([3.0, 4.0], dtype=dtype) - grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) - grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) - mom_opt = gradient_descent.SGD( - learning_rate=constant_op.constant(2.0), - momentum=constant_op.constant(0.9)) - mom_update = mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(variables.global_variables_initializer()) - # Check we have slots - slot0 = mom_opt.get_slot(var0, "momentum") - self.assertEqual(slot0.shape, var0.shape) - slot1 = mom_opt.get_slot(var1, "momentum") - self.assertEqual(slot1.shape, var1.shape) + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with ops.Graph().as_default(): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + mom_opt = gradient_descent.SGD( + learning_rate=constant_op.constant(2.0), + momentum=constant_op.constant(0.9)) + mom_update = mom_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + self.evaluate(variables.global_variables_initializer()) + # Check we have slots + slot0 = mom_opt.get_slot(var0, "momentum") + self.assertEqual(slot0.shape, var0.shape) + slot1 = mom_opt.get_slot(var1, "momentum") + self.assertEqual(slot1.shape, var1.shape) - # Step 1: the momentum accumulators where 0. So we should see a normal - # update: v -= grad * learning_rate - self.evaluate(mom_update) - # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType( - np.array([-0.2, -0.2]), self.evaluate(slot0)) - self.assertAllCloseAccordingToType( - np.array([-0.02, -0.02]), self.evaluate(slot1)) - # Check that the parameters have been updated. - self.assertAllCloseAccordingToType( - np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), self.evaluate(var0)) - self.assertAllCloseAccordingToType( - np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), - self.evaluate(var1)) - # Step 2: the momentum accumulators contain the previous update. - if context.executing_eagerly(): - mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - else: + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + # Step 1: the momentum accumulators where 0. So we should see a normal + # update: v -= grad * learning_rate self.evaluate(mom_update) - # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType( - np.array([(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)]), - self.evaluate(slot0)) - self.assertAllCloseAccordingToType( - np.array([(0.9 * (-0.02) - 2.0 * 0.01), - (0.9 * (-0.02) - 2.0 * 0.01)]), self.evaluate(slot1)) - # Check that the parameters have been updated. - self.assertAllCloseAccordingToType( - np.array([ - 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), - 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) - ]), self.evaluate(var0)) - self.assertAllCloseAccordingToType( - np.array([ - 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), - 3.98 - ((0.9 * 0.01 + 0.01) * 2.0) - ]), self.evaluate(var1)) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array([-0.2, -0.2]), self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([-0.02, -0.02]), self.evaluate(slot1)) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), + self.evaluate(var0)) + self.assertAllCloseAccordingToType( + np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), + self.evaluate(var1)) + # Step 2: the momentum accumulators contain the previous update. + self.evaluate(mom_update) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array([(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)]), + self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([(0.9 * (-0.02) - 2.0 * 0.01), + (0.9 * (-0.02) - 2.0 * 0.01)]), self.evaluate(slot1)) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([ + 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), + 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) + ]), self.evaluate(var0)) + self.assertAllCloseAccordingToType( + np.array([ + 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), + 3.98 - ((0.9 * 0.01 + 0.01) * 2.0) + ]), self.evaluate(var1)) def testSparse(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - var0 = variables.Variable(array_ops.zeros([4, 2], dtype=dtype)) - var1 = variables.Variable(constant_op.constant(1.0, dtype, [4, 2])) - grads0 = ops.IndexedSlices( - constant_op.constant([[.1, .1]], dtype=dtype), - constant_op.constant([1]), constant_op.constant([4, 2])) - grads1 = ops.IndexedSlices( - constant_op.constant([[.01, .01], [.01, .01]], dtype=dtype), - constant_op.constant([2, 3]), constant_op.constant([4, 2])) - mom_opt = gradient_descent.SGD(learning_rate=2.0, momentum=0.9) - mom_update = mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - self.evaluate(variables.global_variables_initializer()) + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with ops.Graph().as_default(): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + var0 = variables.Variable(array_ops.zeros([4, 2], dtype=dtype)) + var1 = variables.Variable(constant_op.constant(1.0, dtype, [4, 2])) + grads0 = ops.IndexedSlices( + constant_op.constant([[.1, .1]], dtype=dtype), + constant_op.constant([1]), constant_op.constant([4, 2])) + grads1 = ops.IndexedSlices( + constant_op.constant([[.01, .01], [.01, .01]], dtype=dtype), + constant_op.constant([2, 3]), constant_op.constant([4, 2])) + mom_opt = gradient_descent.SGD(learning_rate=2.0, momentum=0.9) + mom_update = mom_opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + self.evaluate(variables.global_variables_initializer()) - # Check we have slots - slot0 = mom_opt.get_slot(var0, "momentum") - self.assertEqual(slot0.shape, var0.shape) - slot1 = mom_opt.get_slot(var1, "momentum") - self.assertEqual(slot1.shape, var1.shape) + # Check we have slots + slot0 = mom_opt.get_slot(var0, "momentum") + self.assertEqual(slot0.shape, var0.shape) + slot1 = mom_opt.get_slot(var1, "momentum") + self.assertEqual(slot1.shape, var1.shape) - # Step 1: the momentum accumulators are 0. So we should see a normal - # update: v -= grad * learning_rate - self.evaluate(mom_update) - # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType( - np.array([0, 0]), - self.evaluate(slot0)[0]) - self.assertAllCloseAccordingToType( - np.array([-2.0 * .1, -2.0 * .1]), - self.evaluate(slot0)[1]) - self.assertAllCloseAccordingToType( - np.array([-2.0 * .01, -2.0 * .01]), - self.evaluate(slot1)[2]) - # Check that the parameters have been updated. - self.assertAllCloseAccordingToType( - np.array([0, 0]), - self.evaluate(var0)[0]) - self.assertAllCloseAccordingToType( - np.array([-(0.1 * 2.0), -(0.1 * 2.0)]), - self.evaluate(var0)[1]) - self.assertAllCloseAccordingToType( - np.array([1.0 - (0.01 * 2.0), 1.0 - (0.01 * 2.0)]), - self.evaluate(var1)[2]) - # Step 2: the momentum accumulators contain the previous update. - if context.executing_eagerly(): - mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - else: + # Fetch params to validate initial values + self.assertAllClose([0, 0], self.evaluate(var0)[0]) + self.assertAllClose([0, 0], self.evaluate(var0)[1]) + self.assertAllClose([1, 1], self.evaluate(var1)[2]) + + # Step 1: the momentum accumulators are 0. So we should see a normal + # update: v -= grad * learning_rate self.evaluate(mom_update) - # Check that the momentum accumulators have been updated. - self.assertAllClose(np.array([0, 0]), self.evaluate(slot0)[0]) - self.assertAllCloseAccordingToType( - np.array([(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)]), - self.evaluate(slot0)[1]) - self.assertAllCloseAccordingToType( - np.array([(0.9 * (-0.02) - 2.0 * 0.01), - (0.9 * (-0.02) - 2.0 * 0.01)]), - self.evaluate(slot1)[2]) - # Check that the parameters have been updated. - self.assertAllClose(np.array([0, 0]), self.evaluate(var0)[0]) - self.assertAllCloseAccordingToType( - np.array([ - -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), - -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) - ]), - self.evaluate(var0)[1]) - self.assertAllCloseAccordingToType( - np.array([ - 0.98 - ((0.9 * 0.01 + 0.01) * 2.0), - 0.98 - ((0.9 * 0.01 + 0.01) * 2.0) - ]), - self.evaluate(var1)[2]) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array([0, 0]), + self.evaluate(slot0)[0]) + self.assertAllCloseAccordingToType( + np.array([-2.0 * .1, -2.0 * .1]), + self.evaluate(slot0)[1]) + self.assertAllCloseAccordingToType( + np.array([-2.0 * .01, -2.0 * .01]), + self.evaluate(slot1)[2]) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([0, 0]), + self.evaluate(var0)[0]) + self.assertAllCloseAccordingToType( + np.array([-(0.1 * 2.0), -(0.1 * 2.0)]), + self.evaluate(var0)[1]) + self.assertAllCloseAccordingToType( + np.array([1.0 - (0.01 * 2.0), 1.0 - (0.01 * 2.0)]), + self.evaluate(var1)[2]) + # Step 2: the momentum accumulators contain the previous update. + self.evaluate(mom_update) + # Check that the momentum accumulators have been updated. + self.assertAllClose(np.array([0, 0]), self.evaluate(slot0)[0]) + self.assertAllCloseAccordingToType( + np.array([(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)]), + self.evaluate(slot0)[1]) + self.assertAllCloseAccordingToType( + np.array([(0.9 * (-0.02) - 2.0 * 0.01), + (0.9 * (-0.02) - 2.0 * 0.01)]), + self.evaluate(slot1)[2]) + # Check that the parameters have been updated. + self.assertAllClose(np.array([0, 0]), self.evaluate(var0)[0]) + self.assertAllCloseAccordingToType( + np.array([ + -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), + -(0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) + ]), + self.evaluate(var0)[1]) + self.assertAllCloseAccordingToType( + np.array([ + 0.98 - ((0.9 * 0.01 + 0.01) * 2.0), + 0.98 - ((0.9 * 0.01 + 0.01) * 2.0) + ]), + self.evaluate(var1)[2]) def testSharing(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - var0 = variables.Variable([1.0, 2.0], dtype=dtype) - var1 = variables.Variable([3.0, 4.0], dtype=dtype) - grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) - grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) - mom_opt = gradient_descent.SGD(learning_rate=2.0, momentum=0.9) - if not context.executing_eagerly(): + # TODO(tanzheny, omalleyt): Fix test in eager mode. + with ops.Graph().as_default(): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([3.0, 4.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) + mom_opt = gradient_descent.SGD(learning_rate=2.0, momentum=0.9) mom_update1 = mom_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) mom_update2 = mom_opt.apply_gradients( @@ -599,52 +613,46 @@ class MomentumOptimizerTest(test.TestCase, parameterized.TestCase): slot1 = mom_opt.get_slot(var1, "momentum") self.assertEqual(slot1.shape, var1.shape) - # Step 1: the momentum accumulators where 0. So we should see a normal - # update: v -= grad * learning_rate - if context.executing_eagerly(): - mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) - slot0 = mom_opt.get_slot(var0, "momentum") - self.assertEqual(slot0.shape, var0.shape) - slot1 = mom_opt.get_slot(var1, "momentum") - self.assertEqual(slot1.shape, var1.shape) - else: + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + # Step 1: the momentum accumulators where 0. So we should see a normal + # update: v -= grad * learning_rate self.evaluate(mom_update1) - # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType( - np.array([-0.2, -0.2]), self.evaluate(slot0)) - self.assertAllCloseAccordingToType( - np.array([-0.02, -0.02]), self.evaluate(slot1)) - # Check that the parameters have been updated. - self.assertAllCloseAccordingToType( - np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), self.evaluate(var0)) - self.assertAllCloseAccordingToType( - np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), - self.evaluate(var1)) - # Step 2: the second momentum accumulators contain the previous update. - if context.executing_eagerly(): - mom_update2 = mom_opt.apply_gradients( - zip([grads0, grads1], [var0, var1])) - else: + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array([-0.2, -0.2]), self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([-0.02, -0.02]), self.evaluate(slot1)) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), + self.evaluate(var0)) + self.assertAllCloseAccordingToType( + np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), + self.evaluate(var1)) + # Step 2: the second momentum accumulators contain the previous update. self.evaluate(mom_update2) - # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType( - np.array([(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)]), - self.evaluate(slot0)) - self.assertAllCloseAccordingToType( - np.array([(0.9 * (-0.02) - 2.0 * 0.01), - (0.9 * (-0.02) - 2.0 * 0.01)]), self.evaluate(slot1)) - # Check that the parameters have been updated. - self.assertAllCloseAccordingToType( - np.array([ - 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), - 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) - ]), self.evaluate(var0)) - self.assertAllCloseAccordingToType( - np.array([ - 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), - 3.98 - ((0.9 * 0.01 + 0.01) * 2.0) - ]), self.evaluate(var1)) + # Check that the momentum accumulators have been updated. + self.assertAllCloseAccordingToType( + np.array([(0.9 * (-0.2) - 2.0 * 0.1), (0.9 * (-0.2) - 2.0 * 0.1)]), + self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([(0.9 * (-0.02) - 2.0 * 0.01), + (0.9 * (-0.02) - 2.0 * 0.01)]), self.evaluate(slot1)) + # Check that the parameters have been updated. + self.assertAllCloseAccordingToType( + np.array([ + 1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0), + 2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0) + ]), self.evaluate(var0)) + self.assertAllCloseAccordingToType( + np.array([ + 2.98 - ((0.9 * 0.01 + 0.01) * 2.0), + 3.98 - ((0.9 * 0.01 + 0.01) * 2.0) + ]), self.evaluate(var1)) + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testConfig(self): opt = gradient_descent.SGD(learning_rate=1.0, momentum=0.9, nesterov=True) config = opt.get_config() From f6741ff2261e8d8112736d2443f1c580762ceecc Mon Sep 17 00:00:00 2001 From: bigcat-himax Date: Tue, 4 Aug 2020 10:06:05 +0800 Subject: [PATCH 2036/2522] TFLM:update HIMAX_WE1_SDK_URL --- .../tools/make/third_party_downloads.inc | 59 ++++++++++--------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/tensorflow/lite/micro/tools/make/third_party_downloads.inc b/tensorflow/lite/micro/tools/make/third_party_downloads.inc index d4d5c1c73be..e2777d9fbb5 100644 --- a/tensorflow/lite/micro/tools/make/third_party_downloads.inc +++ b/tensorflow/lite/micro/tools/make/third_party_downloads.inc @@ -1,65 +1,70 @@ # Add URLs and MD5 checksums for third-party libraries here. +# We use mirror.tensorflow.org to cache copies of third-party files, +# but this is just an optimization applied manually by TensorFlow +# engineers, so add non-mirrored URLs if you need to update this +# in a pull request and we'll periodically copy them and update +# the URL. GEMMLOWP_URL := "https://github.com/google/gemmlowp/archive/719139ce755a0f31cbf1c37f7f98adcc7fc9f425.zip" GEMMLOWP_MD5 := "7e8191b24853d75de2af87622ad293ba" ifeq ($(HOST_OS),windows) - FLATBUFFERS_URL := "https://github.com/google/flatbuffers/archive/v1.12.0.zip" + FLATBUFFERS_URL := "http://mirror.tensorflow.org/github.com/google/flatbuffers/archive/v1.12.0.zip" FLATBUFFERS_MD5 := "a1afdbf114dec01a861c1b8c917d0fc7" else - FLATBUFFERS_URL := "https://github.com/google/flatbuffers/archive/v1.12.0.tar.gz" + FLATBUFFERS_URL := "http://mirror.tensorflow.org/github.com/google/flatbuffers/archive/v1.12.0.tar.gz" FLATBUFFERS_MD5 := "c62ffefb3d4548b127cca14ce047f16c" endif ifeq ($(HOST_OS),osx) - GCC_EMBEDDED_URL := "https://developer.arm.com/-/media/Files/downloads/gnu-rm/7-2018q2/gcc-arm-none-eabi-7-2018-q2-update-mac.tar.bz2" + GCC_EMBEDDED_URL := "http://mirror.tensorflow.org/developer.arm.com/-/media/Files/downloads/gnu-rm/7-2018q2/gcc-arm-none-eabi-7-2018-q2-update-mac.tar.bz2" GCC_EMBEDDED_MD5 := "a66be9828cf3c57d7d21178e07cd8904" else ifeq ($(HOST_OS),windows) - GCC_EMBEDDED_URL := "https://developer.arm.com/-/media/Files/downloads/gnu-rm/7-2018q2/gcc-arm-none-eabi-7-2018-q2-update-win32.zip" + GCC_EMBEDDED_URL := "http://mirror.tensorflow.org/developer.arm.com/-/media/Files/downloads/gnu-rm/7-2018q2/gcc-arm-none-eabi-7-2018-q2-update-win32.zip" GCC_EMBEDDED_MD5 := "bc8ae26d7c429f30d583a605a4bcf9bc" else - GCC_EMBEDDED_URL := "https://developer.arm.com/-/media/Files/downloads/gnu-rm/7-2018q2/gcc-arm-none-eabi-7-2018-q2-update-linux.tar.bz2" + GCC_EMBEDDED_URL := "http://mirror.tensorflow.org/developer.arm.com/-/media/Files/downloads/gnu-rm/7-2018q2/gcc-arm-none-eabi-7-2018-q2-update-linux.tar.bz2" GCC_EMBEDDED_MD5 := "299ebd3f1c2c90930d28ab82e5d8d6c0" endif -LEON_BCC2_URL := "https://www.gaisler.com/anonftp/bcc2/bin/bcc-2.0.7-gcc-linux64.tar.xz" +LEON_BCC2_URL := "http://mirror.tensorflow.org/www.gaisler.com/anonftp/bcc2/bin/bcc-2.0.7-gcc-linux64.tar.xz" LEON_BCC2_MD5 := "cdf78082be4882da2a92c9baa82fe765" -TSIM_URL := "https://www.gaisler.com/anonftp/tsim/tsim-eval-2.0.63.tar.gz" +TSIM_URL := "http://mirror.tensorflow.org/www.gaisler.com/anonftp/tsim/tsim-eval-2.0.63.tar.gz" TSIM_MD5 := "afa0095d3ed989a949e1467f94e41d2f" -CMSIS_URL := "https://github.com/ARM-software/CMSIS_5/archive/9daaa7a34a5627a24009462b8fa8413a00c4fdb1.zip" +CMSIS_URL := "http://mirror.tensorflow.org/github.com/ARM-software/CMSIS_5/archive/9daaa7a34a5627a24009462b8fa8413a00c4fdb1.zip" CMSIS_MD5 := "b988dacff8925ffffcb7e5079cc713b7" -AM_SDK_URL := "http://s3.asia.ambiqmicro.com/downloads/AmbiqSuite-Rel2.2.0.zip" +AM_SDK_URL := "http://mirror.tensorflow.org/s3.asia.ambiqmicro.com/downloads/AmbiqSuite-Rel2.2.0.zip" AM_SDK_MD5 := "7605fa2d4d97e6bb7a1190c92b66b597" AM_SDK_DEST := AmbiqSuite-Rel2.2.0 -SF_BSPS_URL := "https://github.com/sparkfun/SparkFun_Apollo3_AmbiqSuite_BSPs/archive/v0.0.7.zip" +SF_BSPS_URL := "http://mirror.tensorflow.org/github.com/sparkfun/SparkFun_Apollo3_AmbiqSuite_BSPs/archive/v0.0.7.zip" SF_BSPS_MD5 := "34199f7e754735661d1c8a70a40ca7a3" SF_BSPS_DEST := boards_sfe -STM32_BARE_LIB_URL := "https://github.com/google/stm32_bare_lib/archive/c07d611fb0af58450c5a3e0ab4d52b47f99bc82d.zip" +STM32_BARE_LIB_URL := "http://mirror.tensorflow.org/github.com/google/stm32_bare_lib/archive/c07d611fb0af58450c5a3e0ab4d52b47f99bc82d.zip" STM32_BARE_LIB_MD5 := "282bff40d4d0b92278fd123a3b6e3123" ifeq ($(HOST_OS),osx) - RISCV_TOOLCHAIN_URL := "https://static.dev.sifive.com/dev-tools/riscv64-unknown-elf-gcc-8.1.0-2019.01.0-x86_64-apple-darwin.tar.gz" + RISCV_TOOLCHAIN_URL := "http://mirror.tensorflow.org/static.dev.sifive.com/dev-tools/riscv64-unknown-elf-gcc-8.1.0-2019.01.0-x86_64-apple-darwin.tar.gz" RISCV_TOOLCHAIN_MD5 := "2ac2fa00618b9ab7fa0c7d0ec173de94" else - RISCV_TOOLCHAIN_URL := "https://static.dev.sifive.com/dev-tools/riscv64-unknown-elf-gcc-20181030-x86_64-linux-ubuntu14.tar.gz" + RISCV_TOOLCHAIN_URL := "http://mirror.tensorflow.org/static.dev.sifive.com/dev-tools/riscv64-unknown-elf-gcc-20181030-x86_64-linux-ubuntu14.tar.gz" RISCV_TOOLCHAIN_MD5="2366b7afe36a54dc94fb0ff8a0830934" endif -SIFIVE_FE310_LIB_URL := "https://github.com/sifive/freedom-e-sdk/archive/baeeb8fd497a99b3c141d7494309ec2e64f19bdf.zip" +SIFIVE_FE310_LIB_URL := "http://mirror.tensorflow.org/github.com/sifive/freedom-e-sdk/archive/baeeb8fd497a99b3c141d7494309ec2e64f19bdf.zip" SIFIVE_FE310_LIB_MD5 := "06ee24c4956f8e21670ab3395861fe64" -KISSFFT_URL="https://github.com/mborgerding/kissfft/archive/v130.zip" +KISSFFT_URL="http://mirror.tensorflow.org/github.com/mborgerding/kissfft/archive/v130.zip" KISSFFT_MD5="438ba1fef5783cc5f5f201395cc477ca" -RUY_URL="https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip" -RUY_MD5="18613212e9c01aba85c7d19010b194a9" +RUY_URL="https://github.com/google/ruy/archive/5bb02fbf90824c2eb6cd7418f766c593106a332b.zip" +RUY_MD5="c720b1743360259ac45809a321f8f26c" -CIFAR10_DATASET_URL="https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz" +CIFAR10_DATASET_URL="http://mirror.tensorflow.org/www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz" CIFAR10_DATASET_MD5="c32a1d4ab5d03f1284b67883e8d87530" IMAGE_RECOGNITION_MODEL_URL := "https://storage.googleapis.com/download.tensorflow.org/models/tflite/cifar_image_recognition_model_2020_05_27.zip" @@ -68,25 +73,23 @@ IMAGE_RECOGNITION_MODEL_MD5 := "1f4607b05ac45b8a6146fb883dbc2d7b" PERSON_MODEL_URL := "https://storage.googleapis.com/download.tensorflow.org/data/tf_lite_micro_person_data_grayscale_2020_05_27.zip" PERSON_MODEL_MD5 := "55b85f76e2995153e660391d4a209ef1" -PERSON_MODEL_INT8_URL := "https://storage.googleapis.com/download.tensorflow.org/data/tf_lite_micro_person_data_int8_grayscale_2020_05_27.zip" -PERSON_MODEL_INT8_MD5 := "a0ede2d058aa2a1d413893455dd55352" +PERSON_MODEL_INT8_URL := "https://storage.googleapis.com/download.tensorflow.org/data/tf_lite_micro_person_data_int8_grayscale_2020_06_23.zip" +PERSON_MODEL_INT8_MD5 := "9b5b6d4677dd0a91b1bb992d1c4c0417" -EMBARC_MLI_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/archive/58284867ca52d1f43b25045e8601999d7359d986.zip" +EMBARC_MLI_URL := "http://mirror.tensorflow.org/github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/archive/58284867ca52d1f43b25045e8601999d7359d986.zip" EMBARC_MLI_MD5 := "2bf4982a327fdaa9d475803ce014d1ef" -EMBARC_MLI_PRE_COMPILED_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/releases/download/Release_1.1_RC2/embARC_MLI_package.zip" +EMBARC_MLI_PRE_COMPILED_URL := "http://mirror.tensorflow.org/github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/releases/download/Release_1.1_RC2/embARC_MLI_package.zip" EMBARC_MLI_PRE_COMPILED_MD5 := "a95ff9e0370434484f14e7e4114327f6" -ZEPHYR_URL := "https://github.com/antmicro/zephyr/archive/55e36b9.zip" +ZEPHYR_URL := "http://mirror.tensorflow.org/github.com/antmicro/zephyr/archive/55e36b9.zip" ZEPHYR_MD5 := "755622eb4812fde918a6382b65d50c3b" -XTENSA_HIFI4_URL :="https://github.com/foss-xtensa/nnlib-hifi4/raw/master/archive/xa_nnlib_04_07.zip" -XTENSA_HIFI4_MD5 :="f234764928f9a42901df33a27e118c8b" +XTENSA_HIFI4_URL :="http://mirror.tensorflow.org/github.com/foss-xtensa/nnlib-hifi4/raw/master/archive/xa_nnlib_06_27.zip" +XTENSA_HIFI4_MD5 :="45fdc1209a8da62ab568aa6040f7eabf" -ETHOSU_URL := "https://git.mlplatform.org/ml/ethos-u/ethos-u-core-driver.git/snapshot/ethos-u-core-driver-bcb5aaa99756f1b5c1295b079ebdd60996bc75a5.tar.gz" +ETHOSU_URL := "http://mirror.tensorflow.org/git.mlplatform.org/ml/ethos-u/ethos-u-core-driver.git/snapshot/ethos-u-core-driver-bcb5aaa99756f1b5c1295b079ebdd60996bc75a5.tar.gz" ETHOSU_MD5 := "d2073c8d88fc167fd5c46b5dcda58ea1" HIMAX_WE1_SDK_URL ="https://www.himax.com.tw/we-i/himax_we1_sdk_v03.zip" HIMAX_WE1_SDK_MD5 ="1cd9b17f3fdb3e9a1dfd1cc356694325" - - From ebbe7dd6c4f6385633dedae18301cf4ad95b93da Mon Sep 17 00:00:00 2001 From: Souradeep Nanda Date: Tue, 4 Aug 2020 07:47:34 +0530 Subject: [PATCH 2037/2522] Fixed indentation --- tensorflow/python/ops/custom_gradient.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tensorflow/python/ops/custom_gradient.py b/tensorflow/python/ops/custom_gradient.py index f2675b422ac..ebbb0e4aede 100644 --- a/tensorflow/python/ops/custom_gradient.py +++ b/tensorflow/python/ops/custom_gradient.py @@ -139,23 +139,23 @@ def custom_gradient(f=None): the same number of variables. We take the function `z = x * y` as an example. >>> @tf.custom_gradient - def bar(x, y): - def grad(upstream): - dz_dx = y - dz_dy = x - return upstream * dz_dx, upstream * dz_dy - - z = x * y - - return z, grad + def bar(x, y): + def grad(upstream): + dz_dx = y + dz_dy = x + return upstream * dz_dx, upstream * dz_dy + + z = x * y + + return z, grad >>> x = tf.constant(2.0, dtype=tf.float32) >>> y = tf.constant(3.0, dtype=tf.float32) >>> with tf.GradientTape(persistent=True) as tape: - tape.watch(x) - tape.watch(y) - z = bar(x, y) + tape.watch(x) + tape.watch(y) + z = bar(x, y) >>> z 6 From 4457dcc8c57cf970ef9403bf367863e22eaa8d7d Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 3 Aug 2020 19:23:23 -0700 Subject: [PATCH 2038/2522] Introduce early documentation on mlir-hlo PiperOrigin-RevId: 324732753 Change-Id: I48c355931959b4aa05fc21cb643e48b58f11b92e --- tensorflow/compiler/mlir/hlo/README.md | 200 +++++++++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 tensorflow/compiler/mlir/hlo/README.md diff --git a/tensorflow/compiler/mlir/hlo/README.md b/tensorflow/compiler/mlir/hlo/README.md new file mode 100644 index 00000000000..1be6fb29d13 --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/README.md @@ -0,0 +1,200 @@ +# MLIR-HLO + +The code here exists in two places: + +* https://github.com/tensorflow/tensorflow/tree/master/tensorflow/compiler/mlir/hlo; + this is the canonical location and where contributions should be made using + GitHub pull-requests. +* https://github.com/tensorflow/mlir-hlo; this is a standalone repository with + a view to the same code to allow other projects to use this without + depending on the entire TF monorepo. + +This implements a self-contained compiler for a linear algebra set of operations +inspired by XLA +[HLO IR](https://www.tensorflow.org/xla/architecture#how_does_xla_work) using +MLIR components. It is designed to provide an end-to-end flow independent of +TensorFlow and XLA, but usable inside of these projects. + +Coding practice and conventions in this repository follow the +[MLIR Developer Guide](https://mlir.llvm.org/getting_started/DeveloperGuide/) in +this repo as part of the intent to act as an incubator for technology to +upstream. + +## QuickStart: building and testing + +TODO + +## Overview + +[XLA](https://www.tensorflow.org/xla/) (Accelerated Linear Algebra) is a +domain-specific compiler framework and execution environment for linear algebra, +which powers code-generation for ML frameworks like TensorFlow, JAX, and others. + +A cornerstone of XLA is the HLO (High Level Optimizer) IR, which offers a +carefully fixed selected list of operations, mostly orthogonal to each other. It +provides an efficient optimizer for computations expressed with this set of +operations and generate codes for hardware platforms like CPU, GPU, and TPUs. +Its goal is to provide a uniform interface to compile and execute these +optimized HLO programs independently of the targeted device. It is not a +front-end ML system like TensorFlow or JAX, rather it is a backend framework +that optimizes HLO and lowers to machine code. + +The HLO set of operations is closed and has well defined semantics. HLO +operations operate on immutable Tensors with static shapes (actually bounded +shapes to be exact) and explicit broadcasts. + +[MLIR](https://mlir.llvm.org/) is a compiler infrastructure which intends to +come with "battery included", as such it intends to provide all the blocks +required to assemble graph optimization and codegen pipelines. The longer term +roadmap for MLIR is to provide a +[Tensor Compute Primitive](https://llvm.discourse.group/c/mlir/MLIR-TCP-WG/36) +(TCP) dialect, which should hopefully be general enough to model what HLO +represents today (see +[slides](https://drive.google.com/open?id=1iljcpTQ5NPaMfGpoPDFml1XkYxjK_6A4) and +[recording](https://drive.google.com/open?id=1jSPa8TwPKUt0WuLquGc8OgSUVYJHMvWZ) +for a technical discussion on this topic). + +The work on MLIR-HLO can be seen as a stepping stone towards building TCP, while +integrating intermediate components into XLA itself by relying on the +well-proven HLO IR and introducing more pieces from upstream MLIR +([Linalg](https://mlir.llvm.org/docs/Dialects/Linalg/), +[Vector](https://mlir.llvm.org/docs/Dialects/Vector/), +[GPU](https://mlir.llvm.org/docs/Dialects/GPU/) dialect, ...). +[This document](https://www.tensorflow.org/mlir/xla_gpu_codegen) provides more +information on the current migration of the XLA GPU codegen. + +## MLIR Dialects for XLA-style compilation + +This repository defines three dialects to support a HLO-like compilation +pipeline using MLIR: + +* `chlo`: the "client" HLO dialect, intended to be closer to the frontend + (including implicit broadcast semantics). +* `mhlo`: "meta"-HLO dialect ; similar to `xla_hlo`, but with extensions for + dynamic shape support. +* `lmhlo`: "late"-"meta"-HLO, it is the IR after buffer allocation is + performed. In XLA the buffer allocation is a side-datastructure which keeps + track of these informations, while this separate dialect materializes it in + the IR. + +We describe these in more details below. + +### HLO Client Dialect: `chlo`. + +* It was originaly designed to map the + [XLA client APIs](https://www.tensorflow.org/xla/operation_semantics) (e.g., + ops supports implicit broadcast and roughly modeled on XlaBuilder API) + modulo support for dynamic shapes and additional ops required to support + dynamic client side HLOs. +* Ops can be from either the XlaBuilder or XLA helper functions can be + converted into ops (e.g., given ambiguity in what constitutes these ops, + there is some freedom to decide), the goal of this dialect is to correspond + close to client level and enable a thin layer between client use and op + construction (making it cheap to construct and optimizations on the dialect + close to optimizations on the client ops). + +Entry: + +* The vast majority of old "client" interactions are via the XlaBuilder APIs. + These APIs are used by TF2XLA kernels, JAX, PyTorch bridge and directly. The + legalization path (described below) can also reuse the XlaBuilder's APIs to + construct XLA Client HLO ops directly (this uses MlirXlaBuilder which is a + subclass of XlaBuilder). +* The other entry point is during legalization from TensorFlow ops in the TF + Graph Compiler and other tools (e.g., SavedModel lowering and TFCompile). + +Exit: + +* MHLO +* May be exported to xla::HloInstructionProto by invoking the XlaBuilder APIs + (with regular XlaBuilder) + +The `chlo` dialect started originally as mapping to the XLA client Builder APIs. +It enables it to both be constructed and converted back to existing XLA +interfaces using the XlaBuilder API. Due to the way that translation into and +out of the dialect works, there is no expectation that this dialect roundtrips +to XLA (e.g., it is only intended to be translated to MLIR and then legalized to +another dialect or translated to HloInstructionProto). + +The export approach of reusing the XlaBuilders enables reusing a lot of logic +that was already implemented in terms of computing shapes, inserting broadcasts +etc. + +An important topic here is that XLA Client HLO ops are not a well defined set. +And in particular what some would consider helper functions, others would +consider ops. It should be easy to move between these and so define a new op +along with the helper function or autogenerate the helper functions from the +descriptions of the ops. For the former, a simple approach would be to simply +consider the context in which the op is being constructed and if an MLIR one, +construct a op in the client dialect instead of further calls into XlaBuilder. +The latter could be implemented by adding the op and a legalization of the op to +other known ops, from which a helper function can get generated that could be +used as regular. + +Status: Exists but need to be cleaned up. + +### Meta HLO Dialect `mhlo` + +* Dialect is closer to current HLO server ops (e.g., no implicit broadcast) +* MHLO dialect where we can deviate from the requirements of the client or + server dialect, in particular: + * Control flow ops with implicit capture to enable simpler optimizations + (e.g., generic LICM, unroll & jam, etc.) + * Multiple results ops (e.g., no tuples) + * More ops (for example, unique op or assert op), and ops that don't need + to be added to either client or server dialect. + * Op set not constrained by implementation (e.g., hlo.add operating on say + i79 or !mydialect.weird_type is allowed even though no XLA backend + supports it). Verification on types happening at the boundaries. + * It does not need to preserve some deprecated XLA constructs (e.g. + stateful RNG HLO). + * More dynamic shape support ops without need for updating all + users/backends. +* This dialect enables evolving HLO independently from XLA in order to + experiment with features we'd like to upstream in MLIR TCP. In particular it + intends to be user-extensible through + [interfaces](https://mlir.llvm.org/docs/Interfaces/). +* It should have no TensorFlow, or proto, or other Google internal + dependencies. +* It need not be a complete superset of ops compared to XLA HLO dialect. + +Entry: + +* Legalization from `chlo` dialect or conversion from XLA HLO. +* Directly emitted from TF Graph Compiler; +* Builder call (e.g., EDSL); + +Exit: + +* LMHLO, Linalg IREE, directly used in codegen. +* XLA HLO. + +The MHLO dialect has no direct export format, it is only meant as an +intermediate optimization dialect/format. It is also where we can experiment +cheaply with new ops. This format will be where the representation would differ +from existing end points. + +Status: Exists but need to be cleaned up and evolved, in particular with respect +to supporting dynamic shapes. + +### LMHLO + +LMHLO corresponds to late `mhlo` and operates on buffer domain (e.g., memref) +with side-effecting operations. The lowering from `mhlo` dialect proceeds by way +of scheduling, memory and buffer allocation. The current mapping is directly on +XLA Client HLOs but without implicit broadcast and with operation on memrefs. +This dialect will instead be rebased on `mhlo` dialect but operating on buffers +still. + +Entry: + +* Post buffer assignment on `mhlo` dialect, or from XLA after buffer + assignment. + +Exit: + +* Codegen (LLVM IR in the common cases at the moment) + +## End-to-End pipeline + +TODO From 6a5edccf1d0a93a52f6e13a3ab0720f1e8562240 Mon Sep 17 00:00:00 2001 From: Souradeep Nanda Date: Tue, 4 Aug 2020 08:12:23 +0530 Subject: [PATCH 2039/2522] tidy up doctest --- tensorflow/python/ops/custom_gradient.py | 32 +++++++++++------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/tensorflow/python/ops/custom_gradient.py b/tensorflow/python/ops/custom_gradient.py index ebbb0e4aede..db5690f6e80 100644 --- a/tensorflow/python/ops/custom_gradient.py +++ b/tensorflow/python/ops/custom_gradient.py @@ -139,23 +139,21 @@ def custom_gradient(f=None): the same number of variables. We take the function `z = x * y` as an example. >>> @tf.custom_gradient - def bar(x, y): - def grad(upstream): - dz_dx = y - dz_dy = x - return upstream * dz_dx, upstream * dz_dy - - z = x * y - - return z, grad - - >>> x = tf.constant(2.0, dtype=tf.float32) - >>> y = tf.constant(3.0, dtype=tf.float32) - - >>> with tf.GradientTape(persistent=True) as tape: - tape.watch(x) - tape.watch(y) - z = bar(x, y) + ... def bar(x, y): + ... def grad(upstream): + ... dz_dx = y + ... dz_dy = x + ... return upstream * dz_dx, upstream * dz_dy + ... z = x * y + ... return z, grad + ... + ... x = tf.constant(2.0, dtype=tf.float32) + ... y = tf.constant(3.0, dtype=tf.float32) + ... + ... with tf.GradientTape(persistent=True) as tape: + ... tape.watch(x) + ... tape.watch(y) + ... z = bar(x, y) >>> z 6 From f4d60080b0b5f96907fe44af5e607b4cca9391f2 Mon Sep 17 00:00:00 2001 From: Souradeep Nanda Date: Tue, 4 Aug 2020 08:25:36 +0530 Subject: [PATCH 2040/2522] Fixed indentation errors --- tensorflow/python/ops/custom_gradient.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/ops/custom_gradient.py b/tensorflow/python/ops/custom_gradient.py index db5690f6e80..fc1f7f6fbc9 100644 --- a/tensorflow/python/ops/custom_gradient.py +++ b/tensorflow/python/ops/custom_gradient.py @@ -146,23 +146,18 @@ def custom_gradient(f=None): ... return upstream * dz_dx, upstream * dz_dy ... z = x * y ... return z, grad - ... - ... x = tf.constant(2.0, dtype=tf.float32) - ... y = tf.constant(3.0, dtype=tf.float32) - ... - ... with tf.GradientTape(persistent=True) as tape: + >>> x = tf.constant(2.0, dtype=tf.float32) + >>> y = tf.constant(3.0, dtype=tf.float32) + >>> with tf.GradientTape(persistent=True) as tape: ... tape.watch(x) ... tape.watch(y) ... z = bar(x, y) - >>> z - 6 + >>> tape.gradient(z, x) - 3 + >>> tape.gradient(z, y) - 2 - >>> tape.gradient(x, y) - None + Nesting custom gradients can lead to unintuitive results. The default behavior does not correspond to n-th order derivatives. For example From 90d864cd1d03d554014466495e4dec9164fb6c05 Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Mon, 3 Aug 2020 19:50:45 -0700 Subject: [PATCH 2041/2522] [XLA] Allow kBitcast to also change types. PiperOrigin-RevId: 324735064 Change-Id: Iba6ca80b5511feeaa738334e6746a3dd7d53dea3 --- tensorflow/compiler/xla/service/BUILD | 1 + .../compiler/xla/service/hlo_creation_utils.cc | 6 ++++++ .../compiler/xla/service/hlo_instruction.cc | 2 +- .../compiler/xla/service/hlo_verifier.cc | 8 -------- .../compiler/xla/service/hlo_verifier_test.cc | 18 ------------------ 5 files changed, 8 insertions(+), 27 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 540cd7fecd2..4d15bc432a2 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1705,6 +1705,7 @@ cc_library( ":hlo", ":hlo_module_config", ":shape_inference", + "//tensorflow/compiler/xla:comparison_util", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:statusor", diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.cc b/tensorflow/compiler/xla/service/hlo_creation_utils.cc index 0f5267e9fbc..4ba67888409 100644 --- a/tensorflow/compiler/xla/service/hlo_creation_utils.cc +++ b/tensorflow/compiler/xla/service/hlo_creation_utils.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/lib/comparators.h" #include "tensorflow/compiler/xla/client/xla_builder.h" #include "tensorflow/compiler/xla/client/xla_computation.h" +#include "tensorflow/compiler/xla/comparison_util.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/hlo_clone_context.h" @@ -258,6 +259,11 @@ HloInstruction* MakeBitcastConvertToHlo(HloInstruction* hlo, PrimitiveType type) { CHECK_NE(hlo->shape().element_type(), type); Shape shape = ShapeUtil::ChangeElementType(hlo->shape(), type); + // PRED are stored as one byte, PRED have a BitWidth of 1, avoid this problem + // by using a convert instead of bitcast convert. + if (type == PRED || hlo->shape().element_type() == PRED) { + return MakeConvertToHlo(hlo, type); + } hlo = hlo->parent()->AddInstruction( HloInstruction::CreateBitcastConvert(shape, hlo)); CHECK_EQ(hlo->shape().element_type(), type); diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 4335ed312c3..94d53ebe0b1 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -2207,7 +2207,7 @@ Status HloInstruction::ReplaceUsesWith(absl::Span users, Status HloInstruction::ReplaceAllUsesWithDifferentShape( absl::Span users, HloInstruction* new_producer) { for (HloInstruction* user : users) { - TF_RETURN_IF_ERROR(ReplaceUseWith(user, new_producer)); + TF_RETURN_IF_ERROR(ReplaceUseWithDifferentShape(user, new_producer)); } if (parent_ && parent_->root_instruction() == this) { diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index 62b0d98418c..d395fddcc5d 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -670,14 +670,6 @@ Status ShapeVerifier::HandleReduce(HloInstruction* reduce) { } Status ShapeVerifier::HandleBitcast(HloInstruction* bitcast) { - // Bitcasts are not allowed to change the element type. - if (bitcast->operand(0)->shape().element_type() != - bitcast->shape().element_type()) { - return InternalError( - "Bitcast can not change the element type from %s to %s", - PrimitiveType_Name(bitcast->operand(0)->shape().element_type()), - PrimitiveType_Name(bitcast->shape().element_type())); - } if (layout_sensitive_ && shape_size_function_(bitcast->shape()) != shape_size_function_(bitcast->operand(0)->shape())) { diff --git a/tensorflow/compiler/xla/service/hlo_verifier_test.cc b/tensorflow/compiler/xla/service/hlo_verifier_test.cc index d9709c50df9..1f71c9586d5 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier_test.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier_test.cc @@ -540,24 +540,6 @@ TEST_F(HloVerifierTestLayoutSensitive, ConcatWithLayoutChangeNotAllowed) { HasSubstr("Instruction shouldn't change layouts")); } -TEST_F(HloVerifierTest, BitcastCanNotChangeElementType) { - const char* const hlo_string = R"( - HloModule Module - - ENTRY BitcastCanNotChangeElementType { - constant.0 = f32[2] constant({0.0, 0.0}) - ROOT bitcast = s32[2] bitcast(constant.0) - } - )"; - TF_ASSERT_OK_AND_ASSIGN(auto module, - ParseAndReturnUnverifiedModule(hlo_string)); - - auto status = verifier().Run(module.get()).status(); - ASSERT_FALSE(status.ok()); - EXPECT_THAT(status.error_message(), - HasSubstr("Bitcast can not change the element type")); -} - TEST_F(HloVerifierTestLayoutSensitive, BitcastNeedsSameNumberOfElements) { const char* const hlo_string = R"( HloModule Module From 5297c4ada6c631125908cbbdaa389b85e9ca6f2f Mon Sep 17 00:00:00 2001 From: Souradeep Nanda Date: Tue, 4 Aug 2020 09:03:06 +0530 Subject: [PATCH 2042/2522] Fix pylint --- tensorflow/python/ops/custom_gradient.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/ops/custom_gradient.py b/tensorflow/python/ops/custom_gradient.py index fc1f7f6fbc9..f081f036b58 100644 --- a/tensorflow/python/ops/custom_gradient.py +++ b/tensorflow/python/ops/custom_gradient.py @@ -131,12 +131,14 @@ def custom_gradient(f=None): By chain rule we know that `dy/dx = dy/x_0 * dx_0/dx_1 * ... * dx_i/dx_i+1 * ... * dx_n/dx` - In this case the gradient of our current function defined as `dx_i/dx_i+1 = (1 - 1 / (1 + e))`. - The upstream gradient `dy` would be `dx_i+1/dx_i+2 * dx_i+2/dx_i+3 * ... * dx_n/dx`. - The upstream gradient multiplied by the current gradient is then passed downstream. + In this case the gradient of our current function defined as + `dx_i/dx_i+1 = (1 - 1 / (1 + e))`. The upstream gradient `dy` would be + `dx_i+1/dx_i+2 * dx_i+2/dx_i+3 * ... * dx_n/dx`. The upstream gradient + multiplied by the current gradient is then passed downstream. - In case the function takes multiple variables as input, the `grad` function must also return - the same number of variables. We take the function `z = x * y` as an example. + In case the function takes multiple variables as input, the `grad` + function must also return the same number of variables. + We take the function `z = x * y` as an example. >>> @tf.custom_gradient ... def bar(x, y): From 66dc5f61c9f19050133a1026780c9ae6d918af46 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Mon, 3 Aug 2020 20:41:27 -0700 Subject: [PATCH 2043/2522] Fix the Windows build PiperOrigin-RevId: 324740036 Change-Id: I5bb28a33cf85393c41a6792e4238cc66b62944c3 --- tensorflow/compiler/tests/unary_ops_test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py index e3a82610027..eb022da6895 100644 --- a/tensorflow/compiler/tests/unary_ops_test.py +++ b/tensorflow/compiler/tests/unary_ops_test.py @@ -801,6 +801,10 @@ class UnaryOpsTest(xla_test.XLATestCase): 2**31 - 1, 2**31, 2**32 - 1, 2**32, -2**32 + 1, -2**32, -2**63 + 1, 2**63 - 1 ] + # Only choose inputs which fit in the int dtype. + raw_inputs = list( + filter(lambda x: np.iinfo(dtype).min <= x <= np.iinfo(dtype).max, + raw_inputs)) inputs = np.array(raw_inputs, dtype=dtype) def count_bits(x): From d46875f207c1e45808b699bc3fb218392e4bf8a5 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 3 Aug 2020 20:46:54 -0700 Subject: [PATCH 2044/2522] Unify TF and XLA error messages for the SplitV op PiperOrigin-RevId: 324740588 Change-Id: I8bb30059cbf2c474087a040d786b632360f1ecb4 --- tensorflow/compiler/tf2xla/kernels/split_op.cc | 4 ++++ tensorflow/python/kernel_tests/split_op_test.py | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/tf2xla/kernels/split_op.cc b/tensorflow/compiler/tf2xla/kernels/split_op.cc index 7a0e240400b..dbaa84c223d 100644 --- a/tensorflow/compiler/tf2xla/kernels/split_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/split_op.cc @@ -105,6 +105,10 @@ class SplitVOp : public XlaOpKernel { const TensorShape input_shape = ctx->InputShape(0); const TensorShape index_shape = ctx->InputShape(2); + OP_REQUIRES(ctx, index_shape.num_elements() == 1, + errors::InvalidArgument( + "split_dim_tensor must have exactly one element.")); + int64 split_dim_orig; OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntScalar(2, &split_dim_orig)); int64 split_dim = split_dim_orig < 0 ? split_dim_orig + input_shape.dims() diff --git a/tensorflow/python/kernel_tests/split_op_test.py b/tensorflow/python/kernel_tests/split_op_test.py index ef66d8dda0b..16f92dbd875 100644 --- a/tensorflow/python/kernel_tests/split_op_test.py +++ b/tensorflow/python/kernel_tests/split_op_test.py @@ -373,7 +373,6 @@ class SplitOpTest(test.TestCase): assert s1.shape.as_list() == [1] @test_util.run_deprecated_v1 - @test_util.disable_xla("b/123337890") # Error messages differ def testNonexistentDimTensor(self): x = array_ops.placeholder(dtypes.int32) values = np.zeros([5, 30]) From 3a4385ca6943e626a82be44acccc37c7743e723a Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Mon, 3 Aug 2020 20:50:01 -0700 Subject: [PATCH 2045/2522] Address comment --- .../compiler/mlir/tensorflow/ir/tf_ops_n_z.cc | 13 +++---------- .../compiler/mlir/tensorflow/tests/tf-ops.mlir | 11 +++++++++++ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc index 0941345a76c..1275591e6ed 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc @@ -1763,11 +1763,7 @@ static LogicalResult Verify(TransposeOp op) { auto x_type = op.x().getType().dyn_cast(); auto y_type = op.y().getType().dyn_cast(); - if (!perm_type) { - return success(); - } - - if (perm_type.getRank() != 1) { + if (perm_type && perm_type.getRank() != 1) { return op.emitOpError() << "expected perm to be a 1-D Tensor, got perm of rank " << perm_type.getRank(); @@ -1780,7 +1776,7 @@ static LogicalResult Verify(TransposeOp op) { << y_type.getRank(); } - if (!x_type || !y_type || !perm_type.hasStaticShape()) { + if (!x_type || !y_type || !perm_type || !perm_type.hasStaticShape()) { return success(); } @@ -1801,10 +1797,7 @@ static LogicalResult Verify(TransposeOp op) { const int64_t y_dim = y_type.getDimSize(y_idx); const int64_t x_idx = e.value().getSExtValue(); const int64_t x_dim = x_type.getDimSize(x_idx); - if (y_dim == ShapedType::kDynamicSize || x_dim == ShapedType::kDynamicSize) { - continue; - } - if (y_dim != x_dim) { + if (y_dim != ShapedType::kDynamicSize && x_dim != ShapedType::kDynamicSize && y_dim != x_dim) { return op.emitOpError() << "requires y.shape[" << y_idx << "] (" << y_dim << ") " << "to be equal to x.shape[perm[" << x_idx << "]] " diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir index 04469e69684..20a0e22c48e 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir @@ -2055,6 +2055,17 @@ func @testTranspose(tensor<2x?xf32>) -> tensor { // ----- +// Test tf.Transpose with different partial unknown shape +// CHECK-LABEL: testTranspose +func @testTranspose(tensor<2x?x?xf32>) -> tensor<3x?x2xf32> { +^bb0(%arg0: tensor<2x?x?xf32>): + %cst = constant dense<[2, 1, 0]> : tensor<3xi32> + %0 = "tf.Transpose"(%arg0, %cst) {T = "tfdtype$DT_FLOAT", Tperm = "tfdtype$DT_INT32"} : (tensor<2x?x?xf32>, tensor<3xi32>) -> tensor<3x?x2xf32> + return %0 : tensor<3x?x2xf32> +} + +// ----- + // Test tf.Transpose with invalid rank of perm func @testTranspose(tensor<2x3xf32>, tensor<1x2xi32>) -> tensor<3x2xf32> { ^bb0(%arg0: tensor<2x3xf32>, %arg1: tensor<1x2xi32>): From be269ac3d2ca0d283747ba0fef3b7f680b2a87c8 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Mon, 3 Aug 2020 21:15:52 -0700 Subject: [PATCH 2046/2522] Break too long line --- tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc index 1275591e6ed..b917be76500 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc @@ -1797,7 +1797,8 @@ static LogicalResult Verify(TransposeOp op) { const int64_t y_dim = y_type.getDimSize(y_idx); const int64_t x_idx = e.value().getSExtValue(); const int64_t x_dim = x_type.getDimSize(x_idx); - if (y_dim != ShapedType::kDynamicSize && x_dim != ShapedType::kDynamicSize && y_dim != x_dim) { + if (y_dim != ShapedType::kDynamicSize && + x_dim != ShapedType::kDynamicSize && y_dim != x_dim) { return op.emitOpError() << "requires y.shape[" << y_idx << "] (" << y_dim << ") " << "to be equal to x.shape[perm[" << x_idx << "]] " From befe7e95c6e4fd9758d506f1b9656ab74cb3c270 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 3 Aug 2020 21:27:05 -0700 Subject: [PATCH 2047/2522] [XLA] Allow kBitcast to also change types. PiperOrigin-RevId: 324744627 Change-Id: Ie672116cf39d4bdef3a5dea788793ee5674a2f45 --- tensorflow/compiler/xla/service/BUILD | 1 - .../compiler/xla/service/hlo_creation_utils.cc | 6 ------ .../compiler/xla/service/hlo_instruction.cc | 2 +- .../compiler/xla/service/hlo_verifier.cc | 8 ++++++++ .../compiler/xla/service/hlo_verifier_test.cc | 18 ++++++++++++++++++ 5 files changed, 27 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 4d15bc432a2..540cd7fecd2 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1705,7 +1705,6 @@ cc_library( ":hlo", ":hlo_module_config", ":shape_inference", - "//tensorflow/compiler/xla:comparison_util", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:statusor", diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.cc b/tensorflow/compiler/xla/service/hlo_creation_utils.cc index 4ba67888409..0f5267e9fbc 100644 --- a/tensorflow/compiler/xla/service/hlo_creation_utils.cc +++ b/tensorflow/compiler/xla/service/hlo_creation_utils.cc @@ -21,7 +21,6 @@ limitations under the License. #include "tensorflow/compiler/xla/client/lib/comparators.h" #include "tensorflow/compiler/xla/client/xla_builder.h" #include "tensorflow/compiler/xla/client/xla_computation.h" -#include "tensorflow/compiler/xla/comparison_util.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/hlo_clone_context.h" @@ -259,11 +258,6 @@ HloInstruction* MakeBitcastConvertToHlo(HloInstruction* hlo, PrimitiveType type) { CHECK_NE(hlo->shape().element_type(), type); Shape shape = ShapeUtil::ChangeElementType(hlo->shape(), type); - // PRED are stored as one byte, PRED have a BitWidth of 1, avoid this problem - // by using a convert instead of bitcast convert. - if (type == PRED || hlo->shape().element_type() == PRED) { - return MakeConvertToHlo(hlo, type); - } hlo = hlo->parent()->AddInstruction( HloInstruction::CreateBitcastConvert(shape, hlo)); CHECK_EQ(hlo->shape().element_type(), type); diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 94d53ebe0b1..4335ed312c3 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -2207,7 +2207,7 @@ Status HloInstruction::ReplaceUsesWith(absl::Span users, Status HloInstruction::ReplaceAllUsesWithDifferentShape( absl::Span users, HloInstruction* new_producer) { for (HloInstruction* user : users) { - TF_RETURN_IF_ERROR(ReplaceUseWithDifferentShape(user, new_producer)); + TF_RETURN_IF_ERROR(ReplaceUseWith(user, new_producer)); } if (parent_ && parent_->root_instruction() == this) { diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index d395fddcc5d..62b0d98418c 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -670,6 +670,14 @@ Status ShapeVerifier::HandleReduce(HloInstruction* reduce) { } Status ShapeVerifier::HandleBitcast(HloInstruction* bitcast) { + // Bitcasts are not allowed to change the element type. + if (bitcast->operand(0)->shape().element_type() != + bitcast->shape().element_type()) { + return InternalError( + "Bitcast can not change the element type from %s to %s", + PrimitiveType_Name(bitcast->operand(0)->shape().element_type()), + PrimitiveType_Name(bitcast->shape().element_type())); + } if (layout_sensitive_ && shape_size_function_(bitcast->shape()) != shape_size_function_(bitcast->operand(0)->shape())) { diff --git a/tensorflow/compiler/xla/service/hlo_verifier_test.cc b/tensorflow/compiler/xla/service/hlo_verifier_test.cc index 1f71c9586d5..d9709c50df9 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier_test.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier_test.cc @@ -540,6 +540,24 @@ TEST_F(HloVerifierTestLayoutSensitive, ConcatWithLayoutChangeNotAllowed) { HasSubstr("Instruction shouldn't change layouts")); } +TEST_F(HloVerifierTest, BitcastCanNotChangeElementType) { + const char* const hlo_string = R"( + HloModule Module + + ENTRY BitcastCanNotChangeElementType { + constant.0 = f32[2] constant({0.0, 0.0}) + ROOT bitcast = s32[2] bitcast(constant.0) + } + )"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnUnverifiedModule(hlo_string)); + + auto status = verifier().Run(module.get()).status(); + ASSERT_FALSE(status.ok()); + EXPECT_THAT(status.error_message(), + HasSubstr("Bitcast can not change the element type")); +} + TEST_F(HloVerifierTestLayoutSensitive, BitcastNeedsSameNumberOfElements) { const char* const hlo_string = R"( HloModule Module From 486296671c8302af98edc17effa3edcb28931b58 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 3 Aug 2020 22:03:17 -0700 Subject: [PATCH 2048/2522] Fix tf2xla error message in the ConcatOffset and MatrixDiagPart ops PiperOrigin-RevId: 324748010 Change-Id: I22846e9fe5d30049f2272d7b8c9140c18ea00da8 --- tensorflow/compiler/tf2xla/kernels/concat_op.cc | 8 +++++--- tensorflow/compiler/tf2xla/kernels/matrix_diag_ops.cc | 5 +++-- tensorflow/python/kernel_tests/concat_op_test.py | 1 - tensorflow/python/kernel_tests/diag_op_test.py | 2 -- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/concat_op.cc b/tensorflow/compiler/tf2xla/kernels/concat_op.cc index 09c97de13eb..d0f24b5f561 100644 --- a/tensorflow/compiler/tf2xla/kernels/concat_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/concat_op.cc @@ -186,9 +186,11 @@ class ConcatOffsetOp : public XlaOpKernel { const int32 inp0_element = inp0_dims[j]; const int32 inp_element = inp_dims[j]; OP_REQUIRES(ctx, inp0_element == inp_element, - errors::InvalidArgument("input[", i, ",", j, - "] mismatch: ", inp0_element, - " vs. ", inp_element)); + errors::InvalidArgument( + "All dimensions except ", axis, " must match. Input ", + i, " has shape [", absl::StrJoin(inp_dims, " "), + "] and doesn't match input 0 with shape [", + absl::StrJoin(inp0_dims, " "), "].")); out_vec(j) = 0; } } diff --git a/tensorflow/compiler/tf2xla/kernels/matrix_diag_ops.cc b/tensorflow/compiler/tf2xla/kernels/matrix_diag_ops.cc index 57e961917cc..c8da75157fc 100644 --- a/tensorflow/compiler/tf2xla/kernels/matrix_diag_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/matrix_diag_ops.cc @@ -243,8 +243,9 @@ class MatrixDiagOp : public XlaOpKernel { errors::InvalidArgument("MatrixDiag op must have at least one input")); const TensorShape diag_shape = context->InputShape(0); OP_REQUIRES(context, TensorShapeUtils::IsVectorOrHigher(diag_shape), - errors::InvalidArgument("Expected >= 1 dims, got shape ", - diag_shape.DebugString())); + errors::InvalidArgument( + "diagonal must be at least 1-dim, received shape: ", + diag_shape.DebugString())); const DataType dtype = context->expected_output_dtype(0); const xla::XlaOp zero = XlaHelpers::Zero(context->builder(), dtype); diff --git a/tensorflow/python/kernel_tests/concat_op_test.py b/tensorflow/python/kernel_tests/concat_op_test.py index 8d05b278aa6..334e25cfc4e 100644 --- a/tensorflow/python/kernel_tests/concat_op_test.py +++ b/tensorflow/python/kernel_tests/concat_op_test.py @@ -701,7 +701,6 @@ class ConcatOffsetTest(test.TestCase): self.evaluate(off) @test_util.run_deprecated_v1 - @test_util.disable_xla("b/123337890") # Error messages differ def testSizeMismatch(self): cdim = constant_op.constant(1, dtypes.int32) s0 = constant_op.constant([2, 3, 5], dtypes.int32) diff --git a/tensorflow/python/kernel_tests/diag_op_test.py b/tensorflow/python/kernel_tests/diag_op_test.py index 9c679ff34c9..8e8586b88d1 100644 --- a/tensorflow/python/kernel_tests/diag_op_test.py +++ b/tensorflow/python/kernel_tests/diag_op_test.py @@ -541,7 +541,6 @@ class MatrixDiagTest(test.TestCase): array_ops.matrix_diag(0) @test_util.run_deprecated_v1 - @test_util.disable_xla("b/123337890") # Error messages differ def testInvalidShapeAtEval(self): with self.session(use_gpu=True): v = array_ops.placeholder(dtype=dtypes_lib.float32) @@ -891,7 +890,6 @@ class MatrixDiagPartTest(test.TestCase): array_ops.matrix_diag_part(0) @test_util.run_deprecated_v1 - @test_util.disable_xla("b/123337890") # Error messages differ def testInvalidShapeAtEval(self): with self.session(use_gpu=True): v = array_ops.placeholder(dtype=dtypes_lib.float32) From 6032b6a8888f5a1a38a7a719b4086dd5a7041061 Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Mon, 3 Aug 2020 22:12:15 -0700 Subject: [PATCH 2049/2522] [tf.data service] Write dispatcher state changes to a journal. When a work directory is configured, the dispatcher will write journal entries for its state changes to a journal within the work directory. If no work directory is configured, the dispatcher uses a NoopJournalWriter, which writes nothing. We don't yet read from the journal on dispatcher start. This support will be added in the next CL. PiperOrigin-RevId: 324749061 Change-Id: I0baf10cef05a53e0fa94139d84d5cfd284550acf --- tensorflow/core/data/service/BUILD | 3 ++ .../core/data/service/dispatcher_impl.cc | 26 +++++++-- .../core/data/service/dispatcher_impl.h | 3 ++ .../core/data/service/dispatcher_state.cc | 4 +- .../core/data/service/dispatcher_state.h | 3 +- .../data/service/dispatcher_state_test.cc | 16 ++++-- tensorflow/core/data/service/journal.cc | 16 +++--- tensorflow/core/data/service/journal.h | 54 ++++++++++++++----- tensorflow/core/data/service/journal_test.cc | 12 ++--- .../data/experimental/service_config.proto | 3 ++ 10 files changed, 103 insertions(+), 37 deletions(-) diff --git a/tensorflow/core/data/service/BUILD b/tensorflow/core/data/service/BUILD index 55dccdf080b..19fe0263df2 100644 --- a/tensorflow/core/data/service/BUILD +++ b/tensorflow/core/data/service/BUILD @@ -61,6 +61,7 @@ cc_library( ":dispatcher_proto_cc", ":dispatcher_state", ":grpc_util", + ":journal", ":worker_cc_grpc_proto", ":worker_proto_cc", "//tensorflow/c:c_api_internal", @@ -87,6 +88,7 @@ cc_library( deps = [ ":common_proto_cc", ":data_service", + ":journal", ":journal_proto_cc", "//tensorflow/core:lib", "@com_google_absl//absl/container:flat_hash_map", @@ -100,6 +102,7 @@ tf_cc_test( deps = [ ":common_proto_cc", ":dispatcher_state", + ":journal", ":journal_proto_cc", "//tensorflow/core:lib", "//tensorflow/core:test", diff --git a/tensorflow/core/data/service/dispatcher_impl.cc b/tensorflow/core/data/service/dispatcher_impl.cc index 7a20f553b19..4bc4d409fd7 100644 --- a/tensorflow/core/data/service/dispatcher_impl.cc +++ b/tensorflow/core/data/service/dispatcher_impl.cc @@ -28,11 +28,13 @@ limitations under the License. #include "tensorflow/core/data/service/data_service.h" #include "tensorflow/core/data/service/dispatcher.pb.h" #include "tensorflow/core/data/service/grpc_util.h" +#include "tensorflow/core/data/service/journal.h" #include "tensorflow/core/data/service/worker.grpc.pb.h" #include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/kernels/data/dataset_utils.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/path.h" #include "tensorflow/core/protobuf/data/experimental/service_config.pb.h" #include "tensorflow/core/public/session_options.h" @@ -40,6 +42,9 @@ namespace tensorflow { namespace data { namespace { +// The name of the journal directory inside the dispatcher's working directory. +constexpr StringPiece kJournalDir = "journal"; + using Dataset = DispatcherState::Dataset; using NamedJobKey = DispatcherState::NamedJobKey; using Job = DispatcherState::Job; @@ -59,7 +64,15 @@ Status CreateWorkerStub(const std::string& address, const std::string& protocol, DataServiceDispatcherImpl::DataServiceDispatcherImpl( const experimental::DispatcherConfig& config) - : config_(config) {} + : config_(config) { + if (config_.work_dir().empty()) { + journal_writer_ = absl::make_unique(); + } else { + std::string journal_dir = io::JoinPath(config_.work_dir(), kJournalDir); + journal_writer_ = + absl::make_unique(Env::Default(), journal_dir); + } +} Status DataServiceDispatcherImpl::RegisterWorker( const RegisterWorkerRequest* request, RegisterWorkerResponse* response) { @@ -124,7 +137,7 @@ Status DataServiceDispatcherImpl::WorkerUpdate( Update update; FinishJobUpdate* finish_job = update.mutable_finish_job(); finish_job->set_job_id(task->job_id); - TF_RETURN_IF_ERROR(state_.Apply(update)); + TF_RETURN_IF_ERROR(Apply(update)); } VLOG(3) << "Task " << task_id << " from job " << task->job_id << " completed"; @@ -170,7 +183,7 @@ Status DataServiceDispatcherImpl::RegisterDataset(uint64 fingerprint, register_dataset->set_dataset_id(*dataset_id); register_dataset->set_fingerprint(fingerprint); *register_dataset->mutable_dataset_def() = dataset; - return state_.Apply(update); + return Apply(update); } Status DataServiceDispatcherImpl::CreateJob(const CreateJobRequest* request, @@ -278,7 +291,7 @@ Status DataServiceDispatcherImpl::CreateJob( key->set_name(named_job_key->name); key->set_index(named_job_key->index); } - TF_RETURN_IF_ERROR(state_.Apply(update)); + TF_RETURN_IF_ERROR(Apply(update)); TF_RETURN_IF_ERROR(state_.JobFromId(job_id, job)); return Status::OK(); } @@ -394,5 +407,10 @@ Status DataServiceDispatcherImpl::GetWorkers(const GetWorkersRequest* request, return Status::OK(); } +Status DataServiceDispatcherImpl::Apply(const Update& update) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + return state_.Apply(update, journal_writer_.get()); +} + } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/data/service/dispatcher_impl.h b/tensorflow/core/data/service/dispatcher_impl.h index f44f9e8d807..3e8b8dc6fbe 100644 --- a/tensorflow/core/data/service/dispatcher_impl.h +++ b/tensorflow/core/data/service/dispatcher_impl.h @@ -124,6 +124,8 @@ class DataServiceDispatcherImpl { Status ValidateMatchingJob(std::shared_ptr job, ProcessingMode processing_mode, int64 dataset_id) EXCLUSIVE_LOCKS_REQUIRED(mu_); + // Applies a state update, updating both the journal and the in-memory state. + Status Apply(const Update& update) EXCLUSIVE_LOCKS_REQUIRED(mu_); const experimental::DispatcherConfig& config_; @@ -141,6 +143,7 @@ class DataServiceDispatcherImpl { absl::flat_hash_map>> tasks_by_job_ TF_GUARDED_BY(mu_); + std::unique_ptr journal_writer_ TF_GUARDED_BY(mu_); DispatcherState state_ TF_GUARDED_BY(mu_); TF_DISALLOW_COPY_AND_ASSIGN(DataServiceDispatcherImpl); diff --git a/tensorflow/core/data/service/dispatcher_state.cc b/tensorflow/core/data/service/dispatcher_state.cc index 2e6709b2287..f22672c4363 100644 --- a/tensorflow/core/data/service/dispatcher_state.cc +++ b/tensorflow/core/data/service/dispatcher_state.cc @@ -16,6 +16,7 @@ limitations under the License. #include +#include "tensorflow/core/data/service/journal.h" #include "tensorflow/core/data/service/journal.pb.h" #include "tensorflow/core/platform/errors.h" @@ -24,7 +25,8 @@ namespace data { DispatcherState::DispatcherState() {} -Status DispatcherState::Apply(Update update) { +Status DispatcherState::Apply(Update update, JournalWriter* journal_writer) { + TF_RETURN_IF_ERROR(journal_writer->Write(update)); switch (update.update_type_case()) { case Update::kRegisterDataset: RegisterDataset(update.register_dataset()); diff --git a/tensorflow/core/data/service/dispatcher_state.h b/tensorflow/core/data/service/dispatcher_state.h index 936558e55a2..1959afa61eb 100644 --- a/tensorflow/core/data/service/dispatcher_state.h +++ b/tensorflow/core/data/service/dispatcher_state.h @@ -18,6 +18,7 @@ limitations under the License. #include "absl/container/flat_hash_map.h" #include "tensorflow/core/data/service/common.pb.h" #include "tensorflow/core/data/service/data_service.h" +#include "tensorflow/core/data/service/journal.h" #include "tensorflow/core/data/service/journal.pb.h" #include "tensorflow/core/lib/core/status.h" @@ -60,7 +61,7 @@ class DispatcherState { DispatcherState& operator=(const DispatcherState&) = delete; // Applies the given update to the dispatcher's state. - Status Apply(Update update); + Status Apply(Update update, JournalWriter* journal_writer); // A dataset registered with the dispatcher. struct Dataset { diff --git a/tensorflow/core/data/service/dispatcher_state_test.cc b/tensorflow/core/data/service/dispatcher_state_test.cc index 0a943b2dd92..e1fd47805a7 100644 --- a/tensorflow/core/data/service/dispatcher_state_test.cc +++ b/tensorflow/core/data/service/dispatcher_state_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/data/service/dispatcher_state.h" #include "tensorflow/core/data/service/common.pb.h" +#include "tensorflow/core/data/service/journal.h" #include "tensorflow/core/data/service/journal.pb.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/errors.h" @@ -27,28 +28,31 @@ namespace data { namespace { Status RegisterDatasetWithIdAndFingerprint(int64 id, uint64 fingerprint, DispatcherState* state) { + NoopJournalWriter journal_writer; Update update; RegisterDatasetUpdate* register_dataset = update.mutable_register_dataset(); register_dataset->set_dataset_id(id); register_dataset->set_fingerprint(fingerprint); - TF_RETURN_IF_ERROR(state->Apply(update)); + TF_RETURN_IF_ERROR(state->Apply(update, &journal_writer)); return Status::OK(); } Status CreateAnonymousJob(int64 job_id, int64 dataset_id, DispatcherState* state) { + NoopJournalWriter journal_writer; Update update; CreateJobUpdate* create_job = update.mutable_create_job(); create_job->set_job_id(job_id); create_job->set_dataset_id(dataset_id); create_job->set_processing_mode(ProcessingModeDef::PARALLEL_EPOCHS); - TF_RETURN_IF_ERROR(state->Apply(update)); + TF_RETURN_IF_ERROR(state->Apply(update, &journal_writer)); return Status::OK(); } Status CreateNamedJob(int64 job_id, int64 dataset_id, DispatcherState::NamedJobKey named_job_key, DispatcherState* state) { + NoopJournalWriter journal_writer; Update update; CreateJobUpdate* create_job = update.mutable_create_job(); create_job->set_job_id(job_id); @@ -57,15 +61,16 @@ Status CreateNamedJob(int64 job_id, int64 dataset_id, NamedJobKeyDef* key = create_job->mutable_named_job_key(); key->set_name(named_job_key.name); key->set_index(named_job_key.index); - TF_RETURN_IF_ERROR(state->Apply(update)); + TF_RETURN_IF_ERROR(state->Apply(update, &journal_writer)); return Status::OK(); } Status FinishJob(int64 job_id, DispatcherState* state) { + NoopJournalWriter journal_writer; Update update; FinishJobUpdate* finish_job = update.mutable_finish_job(); finish_job->set_job_id(job_id); - TF_RETURN_IF_ERROR(state->Apply(update)); + TF_RETURN_IF_ERROR(state->Apply(update, &journal_writer)); return Status::OK(); } } // namespace @@ -112,9 +117,10 @@ TEST(DispatcherState, NextAvailableDatasetId) { } TEST(DispatcherState, UnknownUpdate) { + NoopJournalWriter journal_writer; DispatcherState state; Update update; - Status s = state.Apply(update); + Status s = state.Apply(update, &journal_writer); EXPECT_EQ(s.code(), error::INTERNAL); } diff --git a/tensorflow/core/data/service/journal.cc b/tensorflow/core/data/service/journal.cc index 9f2d4908f05..6856c69deb3 100644 --- a/tensorflow/core/data/service/journal.cc +++ b/tensorflow/core/data/service/journal.cc @@ -34,10 +34,10 @@ std::string DataServiceJournalFile(StringPiece journal_dir) { return io::JoinPath(journal_dir, kJournal); } -JournalWriter::JournalWriter(Env* env, StringPiece journal_dir) +FileJournalWriter::FileJournalWriter(Env* env, StringPiece journal_dir) : env_(env), journal_dir_(journal_dir) {} -Status JournalWriter::EnsureInitialized() { +Status FileJournalWriter::EnsureInitialized() { if (writer_) { return Status::OK(); } @@ -48,7 +48,7 @@ Status JournalWriter::EnsureInitialized() { return Status::OK(); } -Status JournalWriter::Write(Update update) { +Status FileJournalWriter::Write(Update update) { TF_RETURN_IF_ERROR(EnsureInitialized()); std::string s = update.SerializeAsString(); if (s.empty()) { @@ -61,10 +61,14 @@ Status JournalWriter::Write(Update update) { return Status::OK(); } -JournalReader::JournalReader(Env* env, StringPiece journal_dir) +NoopJournalWriter::NoopJournalWriter() {} + +Status NoopJournalWriter::Write(Update update) { return Status::OK(); } + +FileJournalReader::FileJournalReader(Env* env, StringPiece journal_dir) : env_(env), journal_dir_(journal_dir) {} -Status JournalReader::EnsureInitialized() { +Status FileJournalReader::EnsureInitialized() { if (reader_) { return Status::OK(); } @@ -74,7 +78,7 @@ Status JournalReader::EnsureInitialized() { return Status::OK(); } -Status JournalReader::Read(Update* update, bool* end_of_journal) { +Status FileJournalReader::Read(Update* update, bool* end_of_journal) { TF_RETURN_IF_ERROR(EnsureInitialized()); tstring record; Status s = reader_->ReadRecord(&offset_, &record); diff --git a/tensorflow/core/data/service/journal.h b/tensorflow/core/data/service/journal.h index b2d718ad652..f5b3e26ba18 100644 --- a/tensorflow/core/data/service/journal.h +++ b/tensorflow/core/data/service/journal.h @@ -27,19 +27,26 @@ namespace data { // Returns the location of the journal file within the journal directory. std::string DataServiceJournalFile(StringPiece journal_dir); -// JournalWriter is not thread-safe, requiring external synchronization when -// used by multiple threads. +// Interface for writing to a journal. class JournalWriter { + public: + virtual ~JournalWriter() = default; + // Writes and syncs an update to the journal. + virtual Status Write(Update update) = 0; +}; + +// FileJournalWriter is not thread-safe, requiring external synchronization when +// used by multiple threads. +class FileJournalWriter : public JournalWriter { public: // Creates a journal writer to write to the given journal directory. // If there is already journal data there, the journal writer will append to // the existing journal. - explicit JournalWriter(Env* env, StringPiece journal_dir); - JournalWriter(const JournalWriter&) = delete; - JournalWriter& operator=(const JournalWriter&) = delete; + explicit FileJournalWriter(Env* env, StringPiece journal_dir); + FileJournalWriter(const FileJournalWriter&) = delete; + FileJournalWriter& operator=(const FileJournalWriter&) = delete; - // Writes and syncs an update to the journal. - Status Write(Update update); + Status Write(Update update) override; private: // Initializes the writer if it is not yet initialized. @@ -51,17 +58,36 @@ class JournalWriter { std::unique_ptr writer_; }; -// JournalReader is not thread-safe, requiring external synchronization when -// used by multiple threads. +// NoopJournalWriter implements the JournalWriter interface, but doesn't +// actually write journal entries anywhere. +class NoopJournalWriter : public JournalWriter { + public: + // Creates a journal writer which does nothing. + explicit NoopJournalWriter(); + NoopJournalWriter(const NoopJournalWriter&) = delete; + NoopJournalWriter& operator=(const NoopJournalWriter&) = delete; + + Status Write(Update update) override; +}; + +// Interface for reading from a journal. class JournalReader { public: - explicit JournalReader(Env* env, StringPiece journal_dir); - JournalReader(const JournalReader&) = delete; - JournalReader& operator=(const JournalReader&) = delete; - + virtual ~JournalReader() = default; // Reads the next update from the journal. Sets `*end_of_journal=true` if // there are no more updates left in the journal. - Status Read(Update* update, bool* end_of_journal); + virtual Status Read(Update* update, bool* end_of_journal) = 0; +}; + +// JournalReader is not thread-safe, requiring external synchronization when +// used by multiple threads. +class FileJournalReader : public JournalReader { + public: + explicit FileJournalReader(Env* env, StringPiece journal_dir); + FileJournalReader(const FileJournalReader&) = delete; + FileJournalReader& operator=(const FileJournalReader&) = delete; + + Status Read(Update* update, bool* end_of_journal) override; private: // Initializes the reader if it is not yet initialized. diff --git a/tensorflow/core/data/service/journal_test.cc b/tensorflow/core/data/service/journal_test.cc index dc1006e280e..3c43cf763e9 100644 --- a/tensorflow/core/data/service/journal_test.cc +++ b/tensorflow/core/data/service/journal_test.cc @@ -63,7 +63,7 @@ Update MakeRegisterDatasetUpdate() { Status CheckJournalContent(StringPiece journal_dir, const std::vector& expected) { - JournalReader reader(Env::Default(), journal_dir); + FileJournalReader reader(Env::Default(), journal_dir); for (const auto& update : expected) { Update result; bool end_of_journal = true; @@ -87,7 +87,7 @@ TEST(Journal, RoundTripMultiple) { std::vector updates = {MakeCreateJobUpdate(), MakeRegisterDatasetUpdate(), MakeFinishJobUpdate()}; - JournalWriter writer(Env::Default(), journal_dir); + FileJournalWriter writer(Env::Default(), journal_dir); for (const auto& update : updates) { TF_EXPECT_OK(writer.Write(update)); } @@ -102,7 +102,7 @@ TEST(Journal, AppendExistingFile) { MakeRegisterDatasetUpdate(), MakeFinishJobUpdate()}; for (const auto& update : updates) { - JournalWriter writer(Env::Default(), journal_dir); + FileJournalWriter writer(Env::Default(), journal_dir); TF_EXPECT_OK(writer.Write(update)); } @@ -112,7 +112,7 @@ TEST(Journal, AppendExistingFile) { TEST(Journal, MissingFile) { std::string journal_dir; EXPECT_TRUE(NewJournalDir(&journal_dir)); - JournalReader reader(Env::Default(), journal_dir); + FileJournalReader reader(Env::Default(), journal_dir); Update result; bool end_of_journal = true; Status s = reader.Read(&result, &end_of_journal); @@ -131,7 +131,7 @@ TEST(Journal, NonRecordData) { TF_ASSERT_OK(file->Append("not record data")); } - JournalReader reader(Env::Default(), journal_dir); + FileJournalReader reader(Env::Default(), journal_dir); Update result; bool end_of_journal = true; Status s = reader.Read(&result, &end_of_journal); @@ -152,7 +152,7 @@ TEST(Journal, InvalidRecordData) { TF_ASSERT_OK(writer->WriteRecord("not serializd proto")); } - JournalReader reader(Env::Default(), journal_dir); + FileJournalReader reader(Env::Default(), journal_dir); Update result; bool end_of_journal = true; Status s = reader.Read(&result, &end_of_journal); diff --git a/tensorflow/core/protobuf/data/experimental/service_config.proto b/tensorflow/core/protobuf/data/experimental/service_config.proto index 8708b923720..872a47013eb 100644 --- a/tensorflow/core/protobuf/data/experimental/service_config.proto +++ b/tensorflow/core/protobuf/data/experimental/service_config.proto @@ -9,6 +9,9 @@ message DispatcherConfig { int64 port = 1; // The protocol for the dispatcher to use when connecting to workers. string protocol = 2; + // An optional work directory to use for storing dispatcher state, and for + // recovering during restarts. + string work_dir = 3; } // Configuration for a tf.data service WorkerServer. From f6c698c9eba6e32550da26f3606101ee348c9cea Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 3 Aug 2020 22:35:38 -0700 Subject: [PATCH 2050/2522] Fix compilation issue in TensorFlow TPU support PiperOrigin-RevId: 324751447 Change-Id: Icb96e301dc0bf080bc37ee31fbcb0c510a94f1c5 --- tensorflow/core/tpu/kernels/tpu_configuration_ops.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc b/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc index 4030cf86910..71735f0639f 100644 --- a/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc +++ b/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc @@ -257,8 +257,10 @@ void InitializeHostForDistributedTpuOp::Compute(OpKernelContext* ctx) { #if defined(LIBTFTPU) VLOG(1) << "Removing existing proto compilation cache lookup if it exists"; - OP_REQUIRES_OK(ctx, DeleteIfExists( - rmgr, tpu::kCompiledProtoCacheResourceName)); + OP_REQUIRES_OK( + ctx, DeleteIfExists>>( + rmgr, tpu::kCompiledProtoCacheResourceName)); #endif if (enable_whole_mesh_compilations_) { @@ -286,7 +288,9 @@ void InitializeHostForDistributedTpuOp::Compute(OpKernelContext* ctx) { local_compilation_cache->Unref(); #if defined(LIBTFTPU) - tpu::TpuCompilationCacheLookup* proto_lookup; + tpu::TpuCompilationCacheLookup< + tpu::CompilationCacheEntryRef>* + proto_lookup; proto_lookup = new tpu::TpuCompilationCacheLocalLookup(local_compilation_cache); OP_REQUIRES_OK( From 68bfd34e2644178f8af446692e4993c93e1837e6 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Mon, 3 Aug 2020 22:48:18 -0700 Subject: [PATCH 2051/2522] [TF2XLA] Enable more tests with MLIR bridge and on TPUs PiperOrigin-RevId: 324752612 Change-Id: Ia5dd37ac768461082be514618986a054f13c2aa0 --- tensorflow/python/eager/BUILD | 8 ++- .../python/eager/def_function_xla_jit_test.py | 51 +++++++++++-------- 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index 68afa637daf..3c0c3894a64 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -853,12 +853,10 @@ tf_py_test( tf_xla_py_test( name = "def_function_xla_jit_test", srcs = ["def_function_xla_jit_test.py"], - enable_mlir_bridge = True, - enabled_backends = [ - # TODO(b/162438052): Enable the test on TPU. - "cpu", - "gpu", + disabled_backends = [ + "cpu_ondemand", ], + enable_mlir_bridge = True, python_version = "PY3", tags = [ "no_mac", diff --git a/tensorflow/python/eager/def_function_xla_jit_test.py b/tensorflow/python/eager/def_function_xla_jit_test.py index 44a4c99f5d6..b9457159217 100644 --- a/tensorflow/python/eager/def_function_xla_jit_test.py +++ b/tensorflow/python/eager/def_function_xla_jit_test.py @@ -39,9 +39,10 @@ from tensorflow.python.platform import test class DefFunctionTest(xla_test.XLATestCase): - @test_util.disable_mlir_bridge('TODO(b/162381930): MLIR bridge renames ' - ' functions') def testAutoclusteringWithTfFunction(self): + if 'tpu' in self.device.lower(): + self.skipTest('Autoclustering does not run on TPU') + with ops.device('device:{}:0'.format(self.device)): @def_function.function(experimental_compile=False) @@ -80,16 +81,16 @@ class DefFunctionTest(xla_test.XLATestCase): self.assertAllClose([2, 3, 3, 4, 4], xla_func(inputs, 1)) def testBasicInt32(self): + with ops.device('device:{}:0'.format(self.device)): - def fn(x, a): - return x + a + @def_function.function(experimental_compile=True) + def fn(x, a): + return x + a - xla_func = def_function.function(fn, experimental_compile=True) - - inputs = constant_op.constant([1, 2, 2, 3, 3], dtype=dtypes.int32) - if not test.is_built_with_rocm(): - # XLA support is not yet enabled for TF ROCm - self.assertAllClose([2, 3, 3, 4, 4], xla_func(inputs, 1)) + inputs = constant_op.constant([1, 2, 2, 3, 3], dtype=dtypes.int32) + if not test.is_built_with_rocm(): + # XLA support is not yet enabled for TF ROCm + self.assertAllClose([2, 3, 3, 4, 4], fn(inputs, 1)) def testDerivative(self): with ops.device('device:{}:0'.format(self.device)): @@ -119,25 +120,24 @@ class DefFunctionTest(xla_test.XLATestCase): # Calling function with experimental_compile=True from # experimental_compile=False should compile the inner func. - @test_util.disable_mlir_bridge('TODO(b/162381930): MLIR bridge renames ' - ' functions') def testNestedCall(self): + if 'tpu' in self.device.lower(): + self.skipTest('b/162800687: Inner function runs on host') + with ops.device('device:{}:0'.format(self.device)): + @def_function.function(experimental_compile=True) def fn(x, a): return x + a - xla_func = def_function.function(fn, experimental_compile=True) - + @def_function.function(experimental_compile=False) def fn2(x, a): - return xla_func(x, a) - - func = def_function.function(fn2, experimental_compile=False) + return fn(x, a) inputs = constant_op.constant([1, 2, 2, 3, 3]) if not test.is_built_with_rocm(): # XLA support is not yet enabled for TF ROCm - self.assertAllClose([2, 3, 3, 4, 4], func(inputs, 1)) + self.assertAllClose([2, 3, 3, 4, 4], fn2(inputs, 1)) @test_util.disable_mlir_bridge('TODO(b/162272821): MLIR bridge returns' ' wrong status type') @@ -268,9 +268,10 @@ class DefFunctionTest(xla_test.XLATestCase): 'not compilable'): c.f1(inputs) - @test_util.disable_mlir_bridge('TODO(b/162381930): MLIR bridge renames ' - ' functions') def testMustBeConstantPropagation(self): + if 'tpu' in self.device.lower(): + self.skipTest('b/162799319: Cannot resolve constant on TPU') + with ops.device('device:{}:0'.format(self.device)): if test.is_built_with_rocm(): return @@ -292,6 +293,9 @@ class DefFunctionTest(xla_test.XLATestCase): @test_util.disable_mlir_bridge('TODO(b/162271237): argmax gives different' ' results in MLIR-based bridge') def testArgMinMax(self): + if 'tpu' in self.device.lower(): + self.skipTest('b/162800904: Tie resolution is wrong on TPU for tf.func') + with ops.device('device:{}:0'.format(self.device)): @def_function.function(experimental_compile=True) @@ -429,6 +433,9 @@ class DefFunctionTest(xla_test.XLATestCase): self.assertAllClose([5.0, 5.0, 5.0], g()) def testCumsum(self): + if 'tpu' in self.device.lower(): + self.skipTest('b/162771302: 64bit rewrite of cumsum not supported') + with ops.device('device:{}:0'.format(self.device)): @def_function.function(experimental_compile=True) @@ -438,8 +445,6 @@ class DefFunctionTest(xla_test.XLATestCase): f64_input = constant_op.constant([1.1, 2.2, 3.3], dtype=dtypes.float64) self.assertAllClose([1.1, 3.3, 6.6], f(f64_input)) - @test_util.disable_mlir_bridge('TODO(b/162381930): MLIR bridge renames ' - ' functions') def testNoExcessiveRetracing(self): with ops.device('device:{}:0'.format(self.device)): inner_retracings = 0 @@ -501,6 +506,8 @@ class DefFunctionTest(xla_test.XLATestCase): outer() self.assertAllClose(c.v, 3.52) + @test_util.disable_mlir_bridge('TODO(b/162801728): MLIR bridge causes ' + ' invalid free on TPUs') def testUpdateVariableMultipleOutputs(self): with ops.device('device:{}:0'.format(self.device)): v = variables.Variable(3.1) From 19d6fdc42d51ddbc978f9e2bcbf921a1460acc86 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 3 Aug 2020 22:59:23 -0700 Subject: [PATCH 2052/2522] Internal change PiperOrigin-RevId: 324753700 Change-Id: I8305c64636ef358daa691059d719debb3d1228f9 --- .../Dialect/mhlo/transforms/sink_constants_to_control_flow.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/sink_constants_to_control_flow.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/sink_constants_to_control_flow.cc index 14d89a7e196..8d677f45c19 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/sink_constants_to_control_flow.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/sink_constants_to_control_flow.cc @@ -16,12 +16,12 @@ limitations under the License. #include "llvm/ADT/DenseMap.h" #include "llvm/Support/Casting.h" #include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/IR/Operation.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Support/LLVM.h" #include "mlir/Transforms/RegionUtils.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project namespace mlir { namespace mhlo { From 88ab72153e13d925f951e9464a121cda47083e3b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 3 Aug 2020 23:04:41 -0700 Subject: [PATCH 2053/2522] Integrate LLVM at llvm/llvm-project@b5059b714023 Updates LLVM usage to match [b5059b714023](https://github.com/llvm/llvm-project/commit/b5059b714023) PiperOrigin-RevId: 324754243 Change-Id: I54dfd7b80179c7a25699bb41e91868037d0eaf89 --- tensorflow/compiler/mlir/hlo/BUILD | 4 +- .../Dialect/mhlo/transforms/register_passes.h | 8 +-- .../mlir/tools/kernel_gen/transforms/BUILD | 2 +- .../kernel_gen/transforms/register_passes.cc | 5 +- tensorflow/workspace.bzl | 4 +- third_party/mlir/BUILD | 72 ++++++++++++++----- third_party/mlir/test.BUILD | 12 ++++ 7 files changed, 78 insertions(+), 29 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/BUILD b/tensorflow/compiler/mlir/hlo/BUILD index e003e9ba279..9eee39894e4 100644 --- a/tensorflow/compiler/mlir/hlo/BUILD +++ b/tensorflow/compiler/mlir/hlo/BUILD @@ -60,7 +60,7 @@ gentbl( strip_include_prefix = "include/mlir-hlo/Dialect/mhlo/transforms/", tbl_outs = [ ( - "-gen-pass-decls", + "-gen-pass-decls -name MHLO", "include/mlir-hlo/Dialect/mhlo/transforms/mhlo_passes.h.inc", ), ], @@ -76,7 +76,7 @@ gentbl( strip_include_prefix = "include/mlir-hlo/Dialect/mhlo/transforms/", tbl_outs = [ ( - "-gen-pass-decls", + "-gen-pass-decls -name LMHLO", "include/mlir-hlo/Dialect/mhlo/transforms/lmhlo_passes.h.inc", ), ], diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/register_passes.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/register_passes.h index 5c862d83fee..8f70f64359b 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/register_passes.h +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/register_passes.h @@ -27,10 +27,10 @@ std::unique_ptr createTestInferShapedTypeMethodsPass(); std::unique_ptr createTestMaterializeBroadcastsPass(); std::unique_ptr createTestUnfuseBatchNormPass(); -inline void registerAllMhloPasses() { #define GEN_PASS_REGISTRATION #include "mlir-hlo/Dialect/mhlo/transforms/mhlo_passes.h.inc" -} + +inline void registerAllMhloPasses() { registerMHLOPasses(); } } // namespace mhlo @@ -38,10 +38,10 @@ namespace lmhlo { std::unique_ptr createTestLhloToLLVMPass(); -inline void registerAllLmhloPasses() { #define GEN_PASS_REGISTRATION #include "mlir-hlo/Dialect/mhlo/transforms/lmhlo_passes.h.inc" -} + +inline void registerAllLmhloPasses() { registerLMHLOPasses(); } } // namespace lmhlo } // namespace mlir diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD index 0119b2e46ea..613422e6128 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD @@ -37,7 +37,7 @@ cc_library( gentbl( name = "tf_framework_passes_inc_gen", - tbl_outs = [("-gen-pass-decls", "tf_framework_passes.h.inc")], + tbl_outs = [("-gen-pass-decls -name TFFramework", "tf_framework_passes.h.inc")], tblgen = "@llvm-project//mlir:mlir-tblgen", td_file = "passes.td", td_srcs = ["@llvm-project//mlir:PassBaseTdFiles"], diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/register_passes.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/register_passes.cc index b9bad8e18d2..b9cdb2085a3 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/register_passes.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/register_passes.cc @@ -19,11 +19,10 @@ limitations under the License. namespace mlir { namespace kernel_gen { namespace tf_framework { - -bool register_all_passes = ([] { #define GEN_PASS_REGISTRATION #include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_passes.h.inc" -}(), true); + +bool register_all_passes = ([] { registerTFFrameworkPasses(); }(), true); } // namespace tf_framework } // namespace kernel_gen diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 47cc5951579..29cba080fa1 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -712,8 +712,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "b7cfa6ca92830b3c331cb44706bb279996663439" - LLVM_SHA256 = "bad1849f86e5b83571d8a83c849e07dd66c5ddbc01a73432d4fef4da2db21543" + LLVM_COMMIT = "b5059b7140232559ed123cb94d4e8f75ca9a44dc" + LLVM_SHA256 = "3075583f88b572da4afb1340281b0e170d51ef03ba6eb2965e7dc8288cbff153" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD index 94e8f4520a6..04238bae943 100644 --- a/third_party/mlir/BUILD +++ b/third_party/mlir/BUILD @@ -99,7 +99,6 @@ cc_library( "-lpthread", ], deps = [ - ":Analysis", ":IR", ":Support", "@llvm-project//llvm:Support", @@ -344,11 +343,11 @@ gentbl( ) gentbl( - name = "LoopPassIncGen", + name = "SCFPassIncGen", strip_include_prefix = "include", tbl_outs = [ ( - "-gen-pass-decls", + "-gen-pass-decls -name SCF", "include/mlir/Dialect/SCF/Passes.h.inc", ), ], @@ -370,9 +369,9 @@ cc_library( deps = [ ":Affine", ":IR", - ":LoopPassIncGen", ":Pass", ":SCFDialect", + ":SCFPassIncGen", ":StandardOps", ":Transforms", "@llvm-project//llvm:Support", @@ -433,6 +432,7 @@ cc_library( ]), hdrs = glob([ "include/mlir/Dialect/*.h", + "include/mlir/Dialect/*.h", ]), includes = ["include"], deps = [ @@ -510,7 +510,7 @@ gentbl( strip_include_prefix = "include", tbl_outs = [ ( - "-gen-pass-decls", + "-gen-pass-decls -name Affine", "include/mlir/Dialect/Affine/Passes.h.inc", ), ], @@ -552,7 +552,7 @@ gentbl( strip_include_prefix = "include", tbl_outs = [ ( - "-gen-pass-decls", + "-gen-pass-decls -name Conversion", "include/mlir/Conversion/Passes.h.inc", ), ], @@ -563,6 +563,35 @@ gentbl( ], ) +cc_library( + name = "ConversionPasses", + hdrs = ["include/mlir/Conversion/Passes.h"], + includes = ["include"], + deps = [ + ":AVX512ToLLVM", + ":AffineToStandard", + ":ConversionPassIncGen", + ":GPUToGPURuntimeTransforms", + ":GPUToNVVMTransforms", + ":GPUToROCDLTransforms", + ":GPUToSPIRVTransforms", + ":GPUToVulkanTransforms", + ":LinalgToLLVM", + ":LinalgToSPIRV", + ":LinalgToStandard", + ":SCFToGPUPass", + ":SCFToStandard", + ":SPIRVToLLVM", + ":ShapeToSCF", + ":ShapeToStandard", + ":StandardToLLVM", + ":StandardToSPIRVTransforms", + ":VectorToLLVM", + ":VectorToROCDL", + ":VectorToSCF", + ], +) + cc_library( name = "AffineToStandard", srcs = glob([ @@ -628,7 +657,9 @@ cc_library( ":EDSC", ":IR", ":LoopLikeInterface", + ":Pass", ":SCFIncGen", + ":SCFPassIncGen", ":SideEffectInterfaces", ":StandardOps", ":Support", @@ -788,7 +819,7 @@ gentbl( name = "ShapeTransformsPassIncGen", strip_include_prefix = "include", tbl_outs = [( - "-gen-pass-decls", + "-gen-pass-decls -name Shape", "include/mlir/Dialect/Shape/Transforms/Passes.h.inc", )], tblgen = ":mlir-tblgen", @@ -847,7 +878,7 @@ gentbl( name = "StandardOpsTransformsPassIncGen", strip_include_prefix = "include", tbl_outs = [( - "-gen-pass-decls", + "-gen-pass-decls -name Standard", "include/mlir/Dialect/StandardOps/Transforms/Passes.h.inc", )], tblgen = ":mlir-tblgen", @@ -897,6 +928,7 @@ cc_library( ":DialectUtils", ":EDSC", ":IR", + ":SCFDialect", ":SideEffectInterfaces", ":StandardOps", ":Support", @@ -1087,7 +1119,7 @@ gentbl( strip_include_prefix = "include", tbl_outs = [ ( - "-gen-pass-decls", + "-gen-pass-decls -name LLVM", "include/mlir/Dialect/LLVMIR/Transforms/Passes.h.inc", ), ], @@ -1206,7 +1238,7 @@ gentbl( strip_include_prefix = "include", tbl_outs = [ ( - "-gen-pass-decls", + "-gen-pass-decls -name GPU", "include/mlir/Dialect/GPU/Passes.h.inc", ), ], @@ -1896,6 +1928,7 @@ cc_library( ":SPIRVCanonicalizationIncGen", ":SPIRVOpUtilsIncGen", ":SPIRVOpsIncGen", + ":SPIRVPassIncGen", ":SPIRVSerializationGen", ":SPIRVTargetAndABIStructGen", ":SideEffectInterfaces", @@ -1910,7 +1943,7 @@ gentbl( strip_include_prefix = "include", tbl_outs = [ ( - "-gen-pass-decls", + "-gen-pass-decls -name SPIRV", "include/mlir/Dialect/SPIRV/Passes.h.inc", ), ], @@ -2041,11 +2074,12 @@ cc_library( ":Analysis", ":ControlFlowInterfaces", ":IR", - ":LoopLikeInterface", + ":Pass", ":SCFDialect", ":SideEffectInterfaces", ":StandardOps", ":Support", + ":TransformsPassIncGen", "@llvm-project//llvm:Support", ], ) @@ -2172,7 +2206,7 @@ gentbl( strip_include_prefix = "include", tbl_outs = [ ( - "-gen-pass-decls", + "-gen-pass-decls -name Transforms", "include/mlir/Transforms/Passes.h.inc", ), ], @@ -2709,6 +2743,7 @@ cc_library( includes = ["include"], deps = [ ":Analysis", + ":ConversionPasses", ":GPUToGPURuntimeTransforms", ":GPUToNVVMTransforms", ":GPUToROCDLTransforms", @@ -2741,6 +2776,7 @@ cc_library( "@llvm-project//mlir/test:TestReducer", "@llvm-project//mlir/test:TestSPIRV", "@llvm-project//mlir/test:TestTransforms", + "@llvm-project//mlir/test:TestTypeDialect", ], ) @@ -2788,8 +2824,9 @@ cc_library( ":AVX512ToLLVM", ":Affine", ":AffinePassIncGen", + ":AffineToStandard", ":AffineTransforms", - ":ConversionPassIncGen", + ":ConversionPasses", ":GPUDialect", ":GPUPassIncGen", ":GPUToGPURuntimeTransforms", @@ -2809,13 +2846,13 @@ cc_library( ":LinalgToSPIRV", ":LinalgToStandard", ":LinalgTransforms", - ":LoopPassIncGen", ":NVVMDialect", ":OpenMPDialect", ":QuantOps", ":QuantPassIncGen", ":ROCDLDialect", ":SCFDialect", + ":SCFPassIncGen", ":SCFToGPUPass", ":SCFToStandard", ":SCFTransforms", @@ -2890,6 +2927,7 @@ cc_binary( "@llvm-project//mlir/test:TestReducer", "@llvm-project//mlir/test:TestSPIRV", "@llvm-project//mlir/test:TestTransforms", + "@llvm-project//mlir/test:TestTypeDialect", ], ) @@ -3211,7 +3249,7 @@ gentbl( strip_include_prefix = "include", tbl_outs = [ ( - "-gen-pass-decls", + "-gen-pass-decls -name Quant", "include/mlir/Dialect/Quant/Passes.h.inc", ), ], @@ -3506,7 +3544,7 @@ gentbl( strip_include_prefix = "include", tbl_outs = [ ( - "-gen-pass-decls", + "-gen-pass-decls -name Linalg", "include/mlir/Dialect/Linalg/Passes.h.inc", ), ], diff --git a/third_party/mlir/test.BUILD b/third_party/mlir/test.BUILD index a1dd9f0c168..f507842a639 100644 --- a/third_party/mlir/test.BUILD +++ b/third_party/mlir/test.BUILD @@ -229,3 +229,15 @@ cc_library( "@llvm-project//mlir:SPIRVLowering", ], ) + +cc_library( + name = "TestTypeDialect", + srcs = glob([ + "lib/Dialect/LLVMIR/*.cpp", + ]), + deps = [ + ":TestDialect", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:LLVMDialect", + ], +) From 451ca2badeb4851e01ad0780dc42aa66eac30b41 Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Tue, 4 Aug 2020 00:56:21 -0700 Subject: [PATCH 2054/2522] Support User's defined library in Flex delegate The user's libraries should be listed in the additional_deps as follows: tflite_flex_cc_library( name = "sample_delegate", models = ["model1.tflite", "model2.tflite"], additional_deps = ["your_custom_ops_lib"], ) Converter support will be done in a separate cl. PiperOrigin-RevId: 324764988 Change-Id: I3c284ee154c5cb17a98b148634c2bcccc4db530d --- tensorflow/lite/delegates/flex/BUILD | 12 +- tensorflow/lite/delegates/flex/build_def.bzl | 126 +++++++++---------- tensorflow/lite/tools/BUILD | 15 ++- 3 files changed, 84 insertions(+), 69 deletions(-) diff --git a/tensorflow/lite/delegates/flex/BUILD b/tensorflow/lite/delegates/flex/BUILD index a6d71881a3d..6210007361a 100644 --- a/tensorflow/lite/delegates/flex/BUILD +++ b/tensorflow/lite/delegates/flex/BUILD @@ -58,12 +58,14 @@ tf_cc_test( # Define the standard flex delegate library, that pulls in the standard set # of TensorFlow ops and kernels, using tflite_flex_cc_library with no -# portable_tensorflow_lib parameter. Custom flex delegate can be defined with -# tflite_flex_cc_library if the parameter portable_tensorflow_lib -# is provided. Ex: +# models parameter. Custom flex delegate can be defined with +# tflite_flex_cc_library if the parameter models is provided. Tensorflow +# user-provided ops could also be supported by passing to additional_deps. +# Ex: # tflite_flex_cc_library( -# name = "sample", -# portable_tensorflow_lib = custom_portable_tensorflow_lib, +# name = "sample_delegate", +# models = ["model1.tflite", "model2.tflite"], +# additional_deps = ["your_custom_ops_lib"], # ) tflite_flex_cc_library( name = "delegate", diff --git a/tensorflow/lite/delegates/flex/build_def.bzl b/tensorflow/lite/delegates/flex/build_def.bzl index 2ff762b658b..9b0771e79e6 100644 --- a/tensorflow/lite/delegates/flex/build_def.bzl +++ b/tensorflow/lite/delegates/flex/build_def.bzl @@ -5,6 +5,7 @@ load( "if_android", "if_ios", "if_mobile", + "tf_cc_binary", "tf_copts", "tf_defines_nortti_if_lite_protos", "tf_features_nomodules_if_mobile", @@ -21,12 +22,14 @@ load("@build_bazel_rules_android//android:rules.bzl", "android_library") def generate_flex_kernel_header( name, - models): + models, + additional_deps = []): """A rule to generate a header file listing only used operators. Args: name: Name of the generated library. models: TFLite models to interpret. + additional_deps: Dependencies for additional TF ops. Returns: A struct with 'header' and 'include_path' fields that @@ -44,6 +47,14 @@ def generate_flex_kernel_header( ) list_ops_output = include_path + "/list_flex_ops" list_ops_tool = "//tensorflow/lite/tools:list_flex_ops_main" + if additional_deps: + tf_cc_binary( + name = "%s_list_flex_ops_main" % name, + deps = [ + "//tensorflow/lite/tools:list_flex_ops_main_lib", + ] + additional_deps, + ) + list_ops_tool = ":%s_list_flex_ops_main" % name native.genrule( name = "%s_list_flex_ops" % name, srcs = models, @@ -70,59 +81,18 @@ def generate_flex_kernel_header( return struct(include_path = include_path, header = header) def tflite_flex_cc_library( - name, - portable_tensorflow_lib = "//tensorflow/core:portable_tensorflow_lib", - visibility = ["//visibility:public"]): - """A rule to generate a flex delegate with custom portable tensorflow lib. - - This lib should be a custom version of portable_tensorflow_lib and contains ops - registrations and kernels. If not defined, the default libs will be used. - - Args: - name: Name of the generated rule. - portable_tensorflow_lib: the tensorflow_lib to be added in deps for android and ios, - can be a full or trimmed version. - visibility: visibility of the generated rule. - """ - native.cc_library( - name = name, - hdrs = [ - "//tensorflow/lite/delegates/flex:delegate.h", - ], - visibility = visibility, - deps = [ - "//tensorflow/lite/delegates/flex:delegate_data", - "//tensorflow/lite/delegates/flex:delegate_only_runtime", - "//tensorflow/lite/delegates/utils:simple_delegate", - ] + select({ - "//tensorflow:android": [ - portable_tensorflow_lib, - ], - "//tensorflow:ios": [ - portable_tensorflow_lib, - ], - "//conditions:default": [ - "//tensorflow/core:tensorflow", - "//tensorflow/lite/c:common", - ], - }), - alwayslink = 1, - ) - -def tflite_flex_jni_library( name, models = [], - visibility = ["//visibility:private"]): - """A rule to generate a jni library listing only used operators. - - The libtensorflowlite_flex_jni.so name is fixed due to a limitation in JNI - Java wrapper, so please make sure there is no naming conflicts. + additional_deps = [], + visibility = ["//visibility:public"]): + """A rule to generate a flex delegate with only ops to run listed models. Args: - name: Prefix of the generated libraries. + name: Name of the generated flex delegate. models: TFLite models to interpret. The library will only include ops and kernels to support these models. If empty, the library will include all Tensorflow ops and kernels. + additional_deps: Dependencies for additional TF ops. visibility: visibility of the generated rules. """ portable_tensorflow_lib = "//tensorflow/core:portable_tensorflow_lib" @@ -130,6 +100,7 @@ def tflite_flex_jni_library( CUSTOM_KERNEL_HEADER = generate_flex_kernel_header( name = "%s_tf_op_headers" % name, models = models, + additional_deps = additional_deps, ) # Define a custom tensorflow_lib with selective registration. @@ -172,52 +143,81 @@ def tflite_flex_jni_library( ) portable_tensorflow_lib = ":%s_tensorflow_lib" % name - # Define a custom init_tensorflow that depends on the above tensorflow_lib. - # This will avoid the symbols re-definition errors. + # Define a custom flex delegate with above tensorflow_lib. native.cc_library( - name = "%s_init_tensorflow" % name, - srcs = [ - "//tensorflow/lite/testing:init_tensorflow.cc", - ], + name = name, hdrs = [ - "//tensorflow/lite/testing:init_tensorflow.h", + "//tensorflow/lite/delegates/flex:delegate.h", ], visibility = visibility, - deps = select({ - "//conditions:default": [ - "//tensorflow/core:lib", - ], + deps = [ + "//tensorflow/lite/delegates/flex:delegate_data", + "//tensorflow/lite/delegates/flex:delegate_only_runtime", + "//tensorflow/lite/delegates/utils:simple_delegate", + ] + select({ "//tensorflow:android": [ portable_tensorflow_lib, ], "//tensorflow:ios": [ portable_tensorflow_lib, ], - }), + "//conditions:default": [ + "//tensorflow/core:tensorflow", + "//tensorflow/lite/c:common", + ], + }) + additional_deps, + alwayslink = 1, ) +def tflite_flex_jni_library( + name, + models = [], + additional_deps = [], + visibility = ["//visibility:private"]): + """A rule to generate a jni library listing only used operators. + + The libtensorflowlite_flex_jni.so name is fixed due to a limitation in JNI + Java wrapper, so please make sure there is no naming conflicts. + + Args: + name: Prefix of the generated libraries. + models: TFLite models to interpret. The library will only include ops and kernels + to support these models. If empty, the library will include all Tensorflow + ops and kernels. + additional_deps: Dependencies for additional TF ops. + visibility: visibility of the generated rules. + """ + # Define a custom flex_delegate that depends on above tensorflow_lib. # This will reduce the binary size comparing to the original flex delegate. tflite_flex_cc_library( name = "%s_flex_delegate" % name, - portable_tensorflow_lib = portable_tensorflow_lib, + models = models, + additional_deps = additional_deps, visibility = visibility, ) - # Define a custom flex_native that depends on above flex_delegate and init_tensorflow. + # Define a custom flex_native that depends on above flex_delegate. native.cc_library( name = "%s_flex_native" % name, srcs = [ + "//tensorflow/lite/testing:init_tensorflow.h", + "//tensorflow/lite/testing:init_tensorflow.cc", "//tensorflow/lite/delegates/flex/java/src/main/native:flex_delegate_jni.cc", ], copts = tflite_copts(), visibility = visibility, deps = [ ":%s_flex_delegate" % name, - ":%s_init_tensorflow" % name, "//tensorflow/lite/java/jni", "//tensorflow/lite/delegates/utils:simple_delegate", - ], + ] + select({ + "//tensorflow:android": [], + "//tensorflow:ios": [], + "//conditions:default": [ + "//tensorflow/core:lib", + ], + }), alwayslink = 1, ) diff --git a/tensorflow/lite/tools/BUILD b/tensorflow/lite/tools/BUILD index 89d3da1ec6a..1f57cad7f7a 100644 --- a/tensorflow/lite/tools/BUILD +++ b/tensorflow/lite/tools/BUILD @@ -9,7 +9,9 @@ package( licenses = ["notice"], # Apache 2.0 ) -exports_files(["logging.h"]) +exports_files([ + "logging.h", +]) common_copts = ["-Wall"] @@ -283,6 +285,17 @@ tf_cc_binary( ], ) +cc_library( + name = "list_flex_ops_main_lib", + srcs = ["list_flex_ops_main.cc"], + visibility = ["//visibility:public"], + deps = [ + ":list_flex_ops", + "//tensorflow/lite/tools:command_line_flags", + "@com_google_absl//absl/strings", + ], +) + tf_cc_test( name = "list_flex_ops_test", srcs = ["list_flex_ops_test.cc"], From 4187ccbe2536d43b2fb6e794c4ca72ad631da4c3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 4 Aug 2020 02:02:26 -0700 Subject: [PATCH 2055/2522] compat: Update forward compatibility horizon to 2020-08-04 PiperOrigin-RevId: 324771488 Change-Id: I2c256c3a92b758fde313f9fa27f590e9899513aa --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index e0f751a4376..c40337f00be 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 3) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 4) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From c7fa71b32a3635eb25596ae80d007b41007769c4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 4 Aug 2020 02:02:27 -0700 Subject: [PATCH 2056/2522] Update GraphDef version to 483. PiperOrigin-RevId: 324771489 Change-Id: Idc9972f0e6200d41978a5ec0f6b6f3e5b8edd320 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index dae48097aa8..7febc640348 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 482 // Updated: 2020/8/3 +#define TF_GRAPH_DEF_VERSION 483 // Updated: 2020/8/4 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 0f592c3c557267a1055dee892eaa52e9c61b8888 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 4 Aug 2020 16:28:17 +0700 Subject: [PATCH 2057/2522] Add NewReadOnlyMemoryRegionFromFile --- .../filesystem/plugins/hadoop/hadoop_filesystem.cc | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc index de0a36816e0..20c907affee 100644 --- a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc @@ -440,6 +440,20 @@ void NewWritableFile(const TF_Filesystem* filesystem, const char* path, TF_SetStatus(status, TF_OK, ""); } +void NewReadOnlyMemoryRegionFromFile(const TF_Filesystem* filesystem, + const char* path, + TF_ReadOnlyMemoryRegion* region, + TF_Status* status) { + // hadoopReadZero() technically supports this call with the following + // caveats: + // - It only works up to 2 GB. We'd have to Stat() the file to ensure that + // it fits. + // - If not on the local filesystem, the entire file will be read, making + // it inefficient for callers that assume typical mmap() behavior. + TF_SetStatus(status, TF_UNIMPLEMENTED, + "HDFS does not support ReadOnlyMemoryRegion"); +} + // TODO(vnvo2409): Implement later } // namespace tf_hadoop_filesystem From cb7fd5197e2e435b9d15c6b21c7659a493563d64 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 4 Aug 2020 16:29:01 +0700 Subject: [PATCH 2058/2522] Add PathExists --- .../plugins/hadoop/hadoop_filesystem.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc index 20c907affee..1bd72bdf059 100644 --- a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc @@ -454,6 +454,22 @@ void NewReadOnlyMemoryRegionFromFile(const TF_Filesystem* filesystem, "HDFS does not support ReadOnlyMemoryRegion"); } +void PathExists(const TF_Filesystem* filesystem, const char* path, + TF_Status* status) { + auto libhdfs = static_cast(filesystem->plugin_filesystem); + auto fs = Connect(libhdfs, path, status); + if (TF_GetCode(status) != TF_OK) return; + + std::string scheme, namenode, hdfs_path; + ParseHadoopPath(path, &scheme, &namenode, &hdfs_path); + + if (libhdfs->hdfsExists(fs, hdfs_path.c_str()) == 0) + TF_SetStatus(status, TF_OK, ""); + else + TF_SetStatus(status, TF_NOT_FOUND, + (std::string(path) + " not found").c_str()); +} + // TODO(vnvo2409): Implement later } // namespace tf_hadoop_filesystem From b3c8cc141a5c6c4becc642ea09f34537eba3b1d2 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 4 Aug 2020 16:29:25 +0700 Subject: [PATCH 2059/2522] Add Stat --- .../plugins/hadoop/hadoop_filesystem.cc | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc index 1bd72bdf059..b3ba79cbc77 100644 --- a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc @@ -470,6 +470,25 @@ void PathExists(const TF_Filesystem* filesystem, const char* path, (std::string(path) + " not found").c_str()); } +void Stat(const TF_Filesystem* filesystem, const char* path, + TF_FileStatistics* stats, TF_Status* status) { + auto libhdfs = static_cast(filesystem->plugin_filesystem); + auto fs = Connect(libhdfs, path, status); + if (TF_GetCode(status) != TF_OK) return; + + std::string scheme, namenode, hdfs_path; + ParseHadoopPath(path, &scheme, &namenode, &hdfs_path); + + auto info = libhdfs->hdfsGetPathInfo(fs, hdfs_path.c_str()); + if (info == nullptr) return TF_SetStatusFromIOError(status, errno, path); + + stats->length = static_cast(info->mSize); + stats->mtime_nsec = static_cast(info->mLastMod) * 1e9; + stats->is_directory = info->mKind == kObjectKindDirectory; + libhdfs->hdfsFreeFileInfo(info, 1); + TF_SetStatus(status, TF_OK, ""); +} + // TODO(vnvo2409): Implement later } // namespace tf_hadoop_filesystem From 9fee1f54b2ce504cbdc31d5f2958a6277cadb3c0 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 4 Aug 2020 16:29:39 +0700 Subject: [PATCH 2060/2522] Add GetFileSize --- .../plugins/hadoop/hadoop_filesystem.cc | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc index b3ba79cbc77..dce6e27f9ee 100644 --- a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc @@ -489,6 +489,27 @@ void Stat(const TF_Filesystem* filesystem, const char* path, TF_SetStatus(status, TF_OK, ""); } +int64_t GetFileSize(const TF_Filesystem* filesystem, const char* path, + TF_Status* status) { + auto libhdfs = static_cast(filesystem->plugin_filesystem); + auto fs = Connect(libhdfs, path, status); + if (TF_GetCode(status) != TF_OK) return -1; + + std::string scheme, namenode, hdfs_path; + ParseHadoopPath(path, &scheme, &namenode, &hdfs_path); + + auto info = libhdfs->hdfsGetPathInfo(fs, hdfs_path.c_str()); + if (info == nullptr) { + TF_SetStatusFromIOError(status, errno, path); + return -1; + } + + TF_SetStatus(status, TF_OK, ""); + auto size = static_cast(info->mSize); + libhdfs->hdfsFreeFileInfo(info, 1); + return size; +} + // TODO(vnvo2409): Implement later } // namespace tf_hadoop_filesystem From 557663d0ac0c5e2f000cb8fde679b6ca3649a28d Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 4 Aug 2020 16:33:24 +0700 Subject: [PATCH 2061/2522] Add DeleteFile --- .../plugins/hadoop/hadoop_filesystem.cc | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc index dce6e27f9ee..7b7a575873a 100644 --- a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc @@ -510,6 +510,21 @@ int64_t GetFileSize(const TF_Filesystem* filesystem, const char* path, return size; } +void DeleteFile(const TF_Filesystem* filesystem, const char* path, + TF_Status* status) { + auto libhdfs = static_cast(filesystem->plugin_filesystem); + auto fs = Connect(libhdfs, path, status); + if (TF_GetCode(status) != TF_OK) return; + + std::string scheme, namenode, hdfs_path; + ParseHadoopPath(path, &scheme, &namenode, &hdfs_path); + + if (libhdfs->hdfsDelete(fs, hdfs_path.c_str(), /*recursive=*/0) != 0) + TF_SetStatusFromIOError(status, errno, path); + else + TF_SetStatus(status, TF_OK, ""); +} + // TODO(vnvo2409): Implement later } // namespace tf_hadoop_filesystem From 56a86ce36e09fdedeb84b5ebfa8f83f7778edf4a Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 4 Aug 2020 16:38:02 +0700 Subject: [PATCH 2062/2522] Add CreateDir --- .../plugins/hadoop/hadoop_filesystem.cc | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc index 7b7a575873a..7e12fc9483f 100644 --- a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc @@ -525,6 +525,21 @@ void DeleteFile(const TF_Filesystem* filesystem, const char* path, TF_SetStatus(status, TF_OK, ""); } +void CreateDir(const TF_Filesystem* filesystem, const char* path, + TF_Status* status) { + auto libhdfs = static_cast(filesystem->plugin_filesystem); + auto fs = Connect(libhdfs, path, status); + if (TF_GetCode(status) != TF_OK) return; + + std::string scheme, namenode, hdfs_path; + ParseHadoopPath(path, &scheme, &namenode, &hdfs_path); + + if (libhdfs->hdfsCreateDirectory(fs, hdfs_path.c_str()) != 0) + TF_SetStatusFromIOError(status, errno, path); + else + TF_SetStatus(status, TF_OK, ""); +} + // TODO(vnvo2409): Implement later } // namespace tf_hadoop_filesystem From 209ff596bd429599c8bc7039c15094b169ff605f Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 4 Aug 2020 16:46:07 +0700 Subject: [PATCH 2063/2522] Add DeleteDir --- .../plugins/hadoop/hadoop_filesystem.cc | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc index 7e12fc9483f..76d2538cd31 100644 --- a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc @@ -540,6 +540,42 @@ void CreateDir(const TF_Filesystem* filesystem, const char* path, TF_SetStatus(status, TF_OK, ""); } +void DeleteDir(const TF_Filesystem* filesystem, const char* path, + TF_Status* status) { + auto libhdfs = static_cast(filesystem->plugin_filesystem); + auto fs = Connect(libhdfs, path, status); + if (TF_GetCode(status) != TF_OK) return; + + std::string scheme, namenode, hdfs_path; + ParseHadoopPath(path, &scheme, &namenode, &hdfs_path); + + // Count the number of entries in the directory, and only delete if it's + // non-empty. This is consistent with the interface, but note that there's + // a race condition where a file may be added after this check, in which + // case the directory will still be deleted. + int entries = 0; + auto info = libhdfs->hdfsListDirectory(fs, hdfs_path.c_str(), &entries); + if (info != nullptr) libhdfs->hdfsFreeFileInfo(info, entries); + + // Due to HDFS bug HDFS-8407, we can't distinguish between an error and empty + // folder, especially for Kerberos enable setup, EAGAIN is quite common when + // the call is actually successful. Check again by Stat. + if (info == nullptr && errno != 0) { + TF_FileStatistics stat; + Stat(filesystem, path, &stat, status); + if (TF_GetCode(status) != TF_OK) return; + } + + if (entries > 0) + return TF_SetStatus(status, TF_FAILED_PRECONDITION, + "Cannot delete a non-empty directory."); + + if (libhdfs->hdfsDelete(fs, hdfs_path.c_str(), /*recursive=*/1) != 0) + TF_SetStatusFromIOError(status, errno, path); + else + TF_SetStatus(status, TF_OK, ""); +} + // TODO(vnvo2409): Implement later } // namespace tf_hadoop_filesystem From 0579ea25ffc4230ab2d9c327ac79aeeeaddc56ff Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Tue, 4 Aug 2020 02:51:28 -0700 Subject: [PATCH 2064/2522] Roll back "Enable mlir generated GPU kernels by default for cuda builds." It breaks some internal builds. PiperOrigin-RevId: 324776590 Change-Id: If5c7cebc54e450a91f13aec7969c86265253e90c --- .bazelrc | 5 +++++ tensorflow/core/kernels/mlir_generated/BUILD | 4 ++-- tensorflow/core/kernels/mlir_generated/build_defs.bzl | 4 ++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.bazelrc b/.bazelrc index 6a448b267e0..da988e4c928 100644 --- a/.bazelrc +++ b/.bazelrc @@ -173,6 +173,11 @@ build:using_cuda --define=using_cuda=true build:using_cuda --action_env TF_NEED_CUDA=1 build:using_cuda --crosstool_top=@local_config_cuda//crosstool:toolchain +# Enable the mlir generated GPU kernels only for cuda builds. +build --define=tensorflow_enable_mlir_generated_gpu_kernels=0 +# This is a more specific option, so it takes precedence over the line above for cuda builds. +build:using_cuda --define=tensorflow_enable_mlir_generated_gpu_kernels=1 + # This config refers to building CUDA op kernels with nvcc. build:cuda --config=using_cuda build:cuda --define=using_cuda_nvcc=true diff --git a/tensorflow/core/kernels/mlir_generated/BUILD b/tensorflow/core/kernels/mlir_generated/BUILD index 79ccda50c87..9f3efe9d972 100644 --- a/tensorflow/core/kernels/mlir_generated/BUILD +++ b/tensorflow/core/kernels/mlir_generated/BUILD @@ -18,9 +18,9 @@ package( ) config_setting( - name = "mlir_generated_gpu_kernels_enabled", + name = "mlir_generated_gpu_kernels_disabled", define_values = { - "tensorflow_enable_mlir_generated_gpu_kernels": "1", + "tensorflow_enable_mlir_generated_gpu_kernels": "0", }, ) diff --git a/tensorflow/core/kernels/mlir_generated/build_defs.bzl b/tensorflow/core/kernels/mlir_generated/build_defs.bzl index 3426aba94a4..2bf6e8fa3bb 100644 --- a/tensorflow/core/kernels/mlir_generated/build_defs.bzl +++ b/tensorflow/core/kernels/mlir_generated/build_defs.bzl @@ -4,8 +4,8 @@ load("@local_config_cuda//cuda:build_defs.bzl", "cuda_gpu_architectures", "if_cu def if_mlir_generated_gpu_kernels_enabled(if_true, if_false = []): return select({ - "//tensorflow/core/kernels/mlir_generated:mlir_generated_gpu_kernels_enabled": if_true, - "//conditions:default": if_false, + "//tensorflow/core/kernels/mlir_generated:mlir_generated_gpu_kernels_disabled": if_false, + "//conditions:default": if_true, }) def _lookup_file(filegroup, path): From 79b1c712df008964833a2d920b5398d58c48e03b Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 4 Aug 2020 17:00:08 +0700 Subject: [PATCH 2065/2522] Add RenameFile --- .../plugins/hadoop/hadoop_filesystem.cc | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc index 76d2538cd31..37b0fdc55fb 100644 --- a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc @@ -576,6 +576,27 @@ void DeleteDir(const TF_Filesystem* filesystem, const char* path, TF_SetStatus(status, TF_OK, ""); } +void RenameFile(const TF_Filesystem* filesystem, const char* src, + const char* dst, TF_Status* status) { + auto libhdfs = static_cast(filesystem->plugin_filesystem); + auto fs = Connect(libhdfs, src, status); + if (TF_GetCode(status) != TF_OK) return; + + std::string scheme, namenode, hdfs_path_src, hdfs_path_dst; + ParseHadoopPath(src, &scheme, &namenode, &hdfs_path_src); + ParseHadoopPath(dst, &scheme, &namenode, &hdfs_path_dst); + + if (libhdfs->hdfsExists(fs, hdfs_path_dst.c_str()) == 0 && + libhdfs->hdfsDelete(fs, hdfs_path_dst.c_str(), /*recursive=*/0) != 0) + return TF_SetStatusFromIOError(status, errno, dst); + + if (libhdfs->hdfsRename(fs, hdfs_path_src.c_str(), hdfs_path_dst.c_str()) != + 0) + TF_SetStatusFromIOError(status, errno, src); + else + TF_SetStatus(status, TF_OK, ""); +} + // TODO(vnvo2409): Implement later } // namespace tf_hadoop_filesystem From 2bdbc19821f6b299d5e8c97e4f2d5311c925a5d8 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 4 Aug 2020 17:20:22 +0700 Subject: [PATCH 2066/2522] Add GetChildren --- .../plugins/hadoop/hadoop_filesystem.cc | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc index 37b0fdc55fb..fd481d38075 100644 --- a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc @@ -597,6 +597,44 @@ void RenameFile(const TF_Filesystem* filesystem, const char* src, TF_SetStatus(status, TF_OK, ""); } +int GetChildren(const TF_Filesystem* filesystem, const char* path, + char*** entries, TF_Status* status) { + auto libhdfs = static_cast(filesystem->plugin_filesystem); + auto fs = Connect(libhdfs, path, status); + if (TF_GetCode(status) != TF_OK) return; + + std::string scheme, namenode, hdfs_path; + ParseHadoopPath(path, &scheme, &namenode, &hdfs_path); + + // hdfsListDirectory returns nullptr if the directory is empty. Do a separate + // check to verify the directory exists first. + TF_FileStatistics stat; + Stat(filesystem, path, &stat, status); + if (TF_GetCode(status) != TF_OK) return; + + int num_entries = 0; + auto info = libhdfs->hdfsListDirectory(fs, hdfs_path.c_str(), &num_entries); + if (info == nullptr) { + if (stat.is_directory) { + // Assume it's an empty directory. + TF_SetStatus(status, TF_OK, ""); + return 0; + } + TF_SetStatusFromIOError(status, errno, path); + return -1; + } + *entries = static_cast( + plugin_memory_allocate(num_entries * sizeof((*entries)[0]))); + auto BaseName = [](const std::string& name) { + return name.substr(name.find_last_of('/') + 1); + }; + for (int i = 0; i < num_entries; i++) { + (*entries)[i] = strdup(BaseName(info[i].mName).c_str()); + } + libhdfs->hdfsFreeFileInfo(info, num_entries); + TF_SetStatus(status, TF_OK, ""); +} + // TODO(vnvo2409): Implement later } // namespace tf_hadoop_filesystem From 2c652e599091155a4bd26d5b5dbf4f0af08f65dd Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 4 Aug 2020 17:31:46 +0700 Subject: [PATCH 2067/2522] Fix GetChildrens --- .../filesystem/plugins/hadoop/hadoop_filesystem.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc index fd481d38075..fe5e1992ff2 100644 --- a/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/hadoop/hadoop_filesystem.cc @@ -601,7 +601,7 @@ int GetChildren(const TF_Filesystem* filesystem, const char* path, char*** entries, TF_Status* status) { auto libhdfs = static_cast(filesystem->plugin_filesystem); auto fs = Connect(libhdfs, path, status); - if (TF_GetCode(status) != TF_OK) return; + if (TF_GetCode(status) != TF_OK) return -1; std::string scheme, namenode, hdfs_path; ParseHadoopPath(path, &scheme, &namenode, &hdfs_path); @@ -610,7 +610,7 @@ int GetChildren(const TF_Filesystem* filesystem, const char* path, // check to verify the directory exists first. TF_FileStatistics stat; Stat(filesystem, path, &stat, status); - if (TF_GetCode(status) != TF_OK) return; + if (TF_GetCode(status) != TF_OK) return -1; int num_entries = 0; auto info = libhdfs->hdfsListDirectory(fs, hdfs_path.c_str(), &num_entries); @@ -633,6 +633,7 @@ int GetChildren(const TF_Filesystem* filesystem, const char* path, } libhdfs->hdfsFreeFileInfo(info, num_entries); TF_SetStatus(status, TF_OK, ""); + return num_entries; } // TODO(vnvo2409): Implement later From ec4a78f4431203c2c352feaf07b9f8194dfa1052 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 4 Aug 2020 17:30:19 +0700 Subject: [PATCH 2068/2522] Bump google-cloud-cpp to 1.16.0 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 76d23dd81ab..a74e287a30f 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -336,8 +336,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "com_github_googlecloudplatform_google_cloud_cpp", - sha256 = "839b2d4dcb36a671734dac6b30ea8c298bbeaafcf7a45ee4a7d7aa5986b16569", - strip_prefix = "google-cloud-cpp-1.14.0", + sha256 = "d9d1358f464328b8fd6d24a98d4c2876fde0d3fdb06c8b6bd617be7fb9b0fbac", + strip_prefix = "google-cloud-cpp-1.16.0", repo_mapping = { "@com_github_curl_curl": "@curl", }, @@ -346,8 +346,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): "//third_party/systemlibs:google_cloud_cpp.google.cloud.bigtable.BUILD": "google/cloud/bigtable/BUILD", }, urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/googleapis/google-cloud-cpp/archive/v1.14.0.tar.gz", - "https://github.com/googleapis/google-cloud-cpp/archive/v1.14.0.tar.gz", + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/googleapis/google-cloud-cpp/archive/v1.16.0.tar.gz", + "https://github.com/googleapis/google-cloud-cpp/archive/v1.16.0.tar.gz", ], ) From 2f00b55557703302d0f4ff43212638574e63ab87 Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Tue, 4 Aug 2020 20:55:49 +1000 Subject: [PATCH 2069/2522] Enable depthwise convs in auto_mixed_precision - These are well-supported as of CUDNN v8. - Also adds a Python test. --- .../optimizers/auto_mixed_precision_lists.h | 10 +++--- .../grappler/auto_mixed_precision_test.py | 35 +++++++++++++++++++ 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/auto_mixed_precision_lists.h b/tensorflow/core/grappler/optimizers/auto_mixed_precision_lists.h index 6643149a6e5..ce0af4ac4b1 100644 --- a/tensorflow/core/grappler/optimizers/auto_mixed_precision_lists.h +++ b/tensorflow/core/grappler/optimizers/auto_mixed_precision_lists.h @@ -126,11 +126,6 @@ class AutoMixedPrecisionListsCuda : public AutoMixedPrecisionLists { "GRUBlockCellGrad", "LSTMBlockCell", "LSTMBlockCellGrad", - // TODO(benbarsdell): Enable these when fast and safe fp16 kernels are - // available for depthwise convolutions. - // "DepthwiseConv2dNative", - // "DepthwiseConv2dNativeBackpropFilter", - // "DepthwiseConv2dNativeBackpropInput", "MatMul", }; if (cuda_version_ >= 9010) { @@ -146,6 +141,11 @@ class AutoMixedPrecisionListsCuda : public AutoMixedPrecisionLists { list.insert("Conv3DBackpropInput"); list.insert("Conv3DBackpropInputV2"); } + if (cudnn_version_ >= 8000) { + list.insert("DepthwiseConv2dNative"); + list.insert("DepthwiseConv2dNativeBackpropFilter"); + list.insert("DepthwiseConv2dNativeBackpropInput"); + } UpdateList("ALLOWLIST", &list); // For backwards compatibility, keeping the original env variable here. // TODO(reedwm): This should be removed if we don't have active users. diff --git a/tensorflow/python/grappler/auto_mixed_precision_test.py b/tensorflow/python/grappler/auto_mixed_precision_test.py index 539c2bca9f3..f7f3777f7a9 100644 --- a/tensorflow/python/grappler/auto_mixed_precision_test.py +++ b/tensorflow/python/grappler/auto_mixed_precision_test.py @@ -138,6 +138,11 @@ def _conv_pool(x): return h_pool2 +def _depthwise_conv2d(x, w): + """Returns a 2d depthwise convolution layer with full stride.""" + return nn.depthwise_conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME') + + def _simple_loop(x, functor): """Simple loop whose body is provided by the functor.""" init = (constant_op.constant(0), x) @@ -566,6 +571,36 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase): tol = 5e-3 if mode == 'mkl' else 1e-3 self.assertAllClose(output_val_ref, output_val, atol=tol, rtol=tol) + # TODO(benbarsdell): This test has not been tried with MKL. + @parameterized.parameters(['cuda']) + @test_util.run_deprecated_v1 + @test_util.disable_xla('This test does not pass with XLA') + def test_depthwise_conv2d(self, mode): + """Test grad ops with depthwise convolution2d graph.""" + self._maybe_skip(mode) + random_seed.set_random_seed(0) + x = _input([2, 8, 8, 1]) + f = _weight([3, 3, 1, 4]) + y = _depthwise_conv2d(x, f) + y = array_ops.identity(y) + optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=0.01) + g = optimizer.compute_gradients(y, [x, f]) + output = (y, g) + + output_val_ref, output_val, cost_graph = self._run(mode, output) + node_map = _build_node_map(cost_graph.node) + self._assert_output_f16(mode, node_map, 'depthwise') + self._assert_output_f16( + mode, node_map, + 'gradients/depthwise_grad/DepthwiseConv2dNativeBackpropInput') + self._assert_output_f16( + mode, node_map, + 'gradients/depthwise_grad/DepthwiseConv2dNativeBackpropFilter') + + output_val_ref, output_val, cost_graph = self._run(mode, output) + tol = 2e-3 + self.assertAllClose(output_val_ref, output_val, atol=tol, rtol=tol) + @parameterized.parameters(['cuda', 'mkl']) @test_util.run_v1_only('b/138749235') @test_util.disable_xla('This test does not pass with XLA') From 1b64996606a5a17335f1ef52ae902bae359dfea0 Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Tue, 4 Aug 2020 05:56:30 -0700 Subject: [PATCH 2070/2522] Add support for additional deps in tflite_flex_android_library PiperOrigin-RevId: 324796318 Change-Id: I8590cf68360f486c6ef4c6014d91969a0fab3954 --- tensorflow/lite/delegates/flex/build_def.bzl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/lite/delegates/flex/build_def.bzl b/tensorflow/lite/delegates/flex/build_def.bzl index 9b0771e79e6..b4965d1076e 100644 --- a/tensorflow/lite/delegates/flex/build_def.bzl +++ b/tensorflow/lite/delegates/flex/build_def.bzl @@ -234,6 +234,7 @@ def tflite_flex_jni_library( def tflite_flex_android_library( name, models = [], + additional_deps = [], custom_package = "org.tensorflow.lite.flex", visibility = ["//visibility:private"]): """A rule to generate an android library based on the selective-built jni library. @@ -243,12 +244,14 @@ def tflite_flex_android_library( models: TFLite models used for selective build. The library will only include ops and kernels to support these models. If empty, the library will include all Tensorflow ops and kernels. + additional_deps: Dependencies for additional TF ops. custom_package: Java package for which java sources will be generated. visibility: visibility of the generated rules. """ tflite_flex_jni_library( name = name, models = models, + additional_deps = additional_deps, visibility = visibility, ) From ea6516dcadbaf0f6cc01d2c2168d3ba0b5dab895 Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Tue, 4 Aug 2020 06:06:23 -0700 Subject: [PATCH 2071/2522] Add a shape_to_descriptors pass that combines the various shape lowering patterns needed for code generation with descriptors. PiperOrigin-RevId: 324797518 Change-Id: Icb0a836668c273773834249ce6ad7b06c6aa200a --- .../compiler/mlir/tools/kernel_gen/BUILD | 1 - .../mlir/tools/kernel_gen/tests/BUILD | 19 ----- .../kernel_gen/tests/embed_tf_framework.mlir | 37 --------- .../mlir/tools/kernel_gen/tests/invalid.mlir | 7 -- .../mlir/tools/kernel_gen/tests/ops.mlir | 19 ----- .../tests/tf_framework_legalize_to_llvm.mlir | 75 ------------------- .../mlir/tools/kernel_gen/transforms/BUILD | 22 ++++++ .../mlir/tools/kernel_gen/transforms/passes.h | 9 +++ .../tools/kernel_gen/transforms/passes.td | 9 ++- .../kernel_gen/transforms/register_passes.cc | 2 - .../transforms/shape_to_descriptors.cc | 72 ++++++++++++++++++ 11 files changed, 110 insertions(+), 162 deletions(-) delete mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/tests/BUILD delete mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/tests/embed_tf_framework.mlir delete mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/tests/invalid.mlir delete mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/tests/ops.mlir delete mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_framework_legalize_to_llvm.mlir create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/transforms/shape_to_descriptors.cc diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD index 32fae8a8305..b40d6cb3abf 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/BUILD +++ b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD @@ -61,7 +61,6 @@ tf_cc_binary( name = "kernel-gen-opt", visibility = ["//tensorflow/compiler/mlir/tools/kernel_gen/tests:__pkg__"], deps = [ - "//tensorflow/compiler/mlir/tensorflow:tensorflow_dialect_registration", "//tensorflow/compiler/mlir/tools/kernel_gen/ir:tf_framework_dialect_registration", "//tensorflow/compiler/mlir/tools/kernel_gen/transforms:passes", "@llvm-project//mlir:AllPassesAndDialects", diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/tests/BUILD deleted file mode 100644 index db878df991b..00000000000 --- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/BUILD +++ /dev/null @@ -1,19 +0,0 @@ -load("//tensorflow/compiler/mlir:glob_lit_test.bzl", "glob_lit_tests") - -package(licenses = ["notice"]) - -glob_lit_tests( - data = [":test_utilities"], - driver = "@llvm-project//mlir:run_lit.sh", - test_file_exts = ["mlir"], -) - -# Bundle together all of the test utilities that are used by tests. -filegroup( - name = "test_utilities", - testonly = True, - data = [ - "//tensorflow/compiler/mlir/tools/kernel_gen:kernel-gen-opt", - "@llvm-project//llvm:FileCheck", - ], -) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/embed_tf_framework.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/embed_tf_framework.mlir deleted file mode 100644 index bb0f1926cda..00000000000 --- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/embed_tf_framework.mlir +++ /dev/null @@ -1,37 +0,0 @@ -// RUN: kernel-gen-opt %s -embed-tf-framework -split-input-file | FileCheck %s - -// CHECK-LABEL: func @tf_entry( -// CHECK-SAME: [[CTX:%.*]]: !tf_framework.op_kernel_context, -// CHECK-SAME: [[SIZE_0:%.*]]: index, -// CHECK-SAME: [[SIZE_2:%.*]]: index) -> index attributes {tf_entry} { -func @tf_entry(%size_0 : index , %size_2 : index) -> index - attributes {tf_entry} { - %buf = alloc(%size_0, %size_2)[] : memref - dealloc %buf : memref - std.return %size_0 : index -} -// CHECK-NEXT: [[VAL_3:%.*]] = tf_framework.alloc_raw -// CHECK-SAME: ([[CTX]], [[SIZE_0]], [[SIZE_2]]) : memref -// CHECK-NEXT: tf_framework.dealloc_raw([[CTX]], [[VAL_3]]) : memref -// CHECK-NEXT: return [[SIZE_0]] : index - -// ----- - -// CHECK-LABEL: func @non_tf_entry( -// CHECK-SAME: [[SIZE_0:%.*]]: index, [[SIZE_2:%.*]]: index) -> index -func @non_tf_entry(%size_0 : index , %size_2 : index) -> index { - std.return %size_0 : index -} - -// ----- - -// CHECK-LABEL: func @tf_entry( -func @tf_entry(%size : index) attributes {tf_entry} { - %buf = alloc()[%size] : memref<64xf32, affine_map<(d0)[s0] -> (d0 + s0)>> - dealloc %buf : memref<64xf32, affine_map<(d0)[s0] -> (d0 + s0)>> - std.return -} -// CHECK_NOT: alloc_raw -// CHECK: alloc() -// CHECK_NOT: dealloc_raw -// CHECK: dealloc % diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/invalid.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/invalid.mlir deleted file mode 100644 index 1d1b3319515..00000000000 --- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/invalid.mlir +++ /dev/null @@ -1,7 +0,0 @@ -// RUN: kernel-gen-opt %s -split-input-file -verify-diagnostics - -func @alloc_raw(%ctx: !tf_framework.op_kernel_context, %size : index) { - // expected-error @+1 {{`dyn_sizes` count 1 does not match dynamic dimensions}} - %buf = tf_framework.alloc_raw(%ctx, %size) : memref - return -} diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/ops.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/ops.mlir deleted file mode 100644 index 19974ec9482..00000000000 --- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/ops.mlir +++ /dev/null @@ -1,19 +0,0 @@ -// RUN: kernel-gen-opt %s | FileCheck %s -// Verify the printed output can be parsed. -// RUN: kernel-gen-opt %s | kernel-gen-opt | FileCheck %s -// Verify the generic form can be parsed. -// RUN: kernel-gen-opt -mlir-print-op-generic %s | kernel-gen-opt | FileCheck %s - -// CHECK-LABEL: func @alloc_raw -func @alloc_raw(%ctx: !tf_framework.op_kernel_context, - %size_0 : index , %size_2 : index) { - %buf_0 = tf_framework.alloc_raw(%ctx) : memref<10xi8> - %buf_1 = tf_framework.alloc_raw(%ctx, %size_0, %size_2) : memref - return -} - -// CHECK-LABEL: func @dealloc_raw -func @dealloc_raw(%ctx: !tf_framework.op_kernel_context, %memref : memref) { - tf_framework.dealloc_raw(%ctx, %memref) : memref - return -} diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_framework_legalize_to_llvm.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_framework_legalize_to_llvm.mlir deleted file mode 100644 index 77328aa7738..00000000000 --- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_framework_legalize_to_llvm.mlir +++ /dev/null @@ -1,75 +0,0 @@ -// RUN: kernel-gen-opt %s -test-tf-framework-legalize-to-llvm -split-input-file | FileCheck %s - -// CHECK: llvm.func @_mlir_ciface_tf_alloc_raw -// CHECK-SAME: (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*"> - -// CHECK-LABEL: llvm.func @alloc_raw( -// CHECK-SAME: [[TF_CTX:%.*]]: !llvm<"i8*">, -// CHECK-SAME: [[SIZE_0:%.*]]: !llvm.i64, -// CHECK-SAME: [[SIZE_2:%.*]]: !llvm.i64) -> [[DESC_TY:!.*]] { -func @alloc_raw(%ctx: !tf_framework.op_kernel_context, - %size_0 : index , %size_2 : index) -> memref { - %buf = tf_framework.alloc_raw(%ctx, %size_0, %size_2) : memref - std.return %buf : memref -} -// Compute number of elements. -// CHECK: [[SIZE_1:%.*]] = llvm.mlir.constant(10 : index) : !llvm.i64 -// CHECK: [[NUM_ELEM_0:%.*]] = llvm.mul [[SIZE_0]], [[SIZE_1]] : !llvm.i64 -// CHECK: [[NUM_ELEM_1:%.*]] = llvm.mul [[NUM_ELEM_0]], [[SIZE_2]] : !llvm.i64 - -// Compute the size of an individual element. -// CHECK: [[NULL:%.*]] = llvm.mlir.null : !llvm<"float*"> -// CHECK: [[C1:%.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK: [[GEP:%.*]] = llvm.getelementptr [[NULL]]{{\[}}[[C1]]] -// CHECK-SAME: (!llvm<"float*">, !llvm.i64) -> !llvm<"float*"> -// CHECK: [[SIZE_OF_FLOAT:%.*]] = llvm.ptrtoint [[GEP]] -// CHECK-SAME: !llvm<"float*"> to !llvm.i64 - -// Allocate memory. -// CHECK: [[NUM_BYTES:%.*]] = llvm.mul [[NUM_ELEM_1]], [[SIZE_OF_FLOAT]] -// CHECK: [[BYTES_PTR:%.*]] = llvm.call @{{.*}}([[TF_CTX]], [[NUM_BYTES]]) -// CHECK-SAME: (!llvm<"i8*">, !llvm.i64) -> !llvm<"i8*"> - -// Build memref descriptor. -// CHECK: [[DESC_0:%.*]] = llvm.mlir.undef : [[DESC_TY]] - -// Set pointers and offset. -// CHECK: [[FLOAT_PTR:%.*]] = llvm.bitcast [[BYTES_PTR]] -// CHECK-SAME: !llvm<"i8*"> to !llvm<"float*"> -// CHECK: [[DESC_1:%.*]] = llvm.insertvalue [[FLOAT_PTR]], [[DESC_0]][0] -// CHECK: [[DESC_2:%.*]] = llvm.insertvalue [[FLOAT_PTR]], [[DESC_1]][1] -// CHECK: [[C0:%.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 -// CHECK: [[DESC_3:%.*]] = llvm.insertvalue [[C0]], [[DESC_2]][2] : [[DESC_TY]] - -// Set sizes and strides. -// CHECK: [[STRIDE_2:%.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 -// CHECK: [[DESC_4:%.*]] = llvm.insertvalue [[SIZE_2]], [[DESC_3]][3, 2] -// CHECK: [[DESC_5:%.*]] = llvm.insertvalue [[STRIDE_2]], [[DESC_4]][4, 2] -// CHECK: [[STRIDE_1:%.*]] = llvm.mul [[STRIDE_2]], [[SIZE_2]] : !llvm.i64 -// CHECK: [[DESC_6:%.*]] = llvm.insertvalue [[SIZE_1]], [[DESC_5]][3, 1] -// CHECK: [[DESC_7:%.*]] = llvm.insertvalue [[STRIDE_1]], [[DESC_6]][4, 1] -// CHECK: [[STRIDE_0:%.*]] = llvm.mul [[STRIDE_1]], [[SIZE_1]] : !llvm.i64 -// CHECK: [[DESC_8:%.*]] = llvm.insertvalue [[SIZE_0]], [[DESC_7]][3, 0] -// CHECK: [[DESC_9:%.*]] = llvm.insertvalue [[STRIDE_0]], [[DESC_8]][4, 0] -// CHECK: llvm.return [[DESC_9]] : [[DESC_TY]] - -// ----- - -// CHECK: llvm.func @_mlir_ciface_tf_dealloc_raw(!llvm<"i8*">) - -// CHECK-LABEL: llvm.func @dealloc_raw( -// CHECK-SAME: [[TF_CTX:%.*]]: !llvm<"i8*">, -func @dealloc_raw(%ctx: !tf_framework.op_kernel_context, - %memref : memref) { - tf_framework.dealloc_raw(%ctx, %memref) : memref - return -} -// Extract allocated ptr from the memref descriptor. -// CHECK: %{{.*}} = llvm.mlir.undef : [[DESC_TY:!.*]] -// CHECK: [[FLOAT_PTR:%.*]] = llvm.extractvalue %{{.*}}[0] : [[DESC_TY]] -// CHECK-NEXT: [[VOID_PTR:%.*]] = llvm.bitcast [[FLOAT_PTR]] -// CHECK-SAME: !llvm<"float*"> to !llvm<"i8*"> - -// Deallocate. -// CHECK: llvm.call @_mlir_ciface_tf_dealloc_raw( -// CHECK-SAME: [[TF_CTX]], [[VOID_PTR]]) : (!llvm<"i8*">, !llvm<"i8*">) -> () diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD index 613422e6128..c0808ae08c4 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD @@ -35,6 +35,27 @@ cc_library( ], ) +cc_library( + name = "shape_to_descriptors", + srcs = ["shape_to_descriptors.cc"], + hdrs = [ + "passes.h", + ], + deps = [ + "@llvm-project//llvm:Support", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:Pass", + "@llvm-project//mlir:SCFDialect", + "@llvm-project//mlir:Shape", + "@llvm-project//mlir:ShapeToSCF", + "@llvm-project//mlir:ShapeToStandard", + "@llvm-project//mlir:ShapeTransforms", + "@llvm-project//mlir:StandardOps", + "@llvm-project//mlir:Support", + "@llvm-project//mlir:Transforms", + ], +) + gentbl( name = "tf_framework_passes_inc_gen", tbl_outs = [("-gen-pass-decls -name TFFramework", "tf_framework_passes.h.inc")], @@ -53,6 +74,7 @@ cc_library( hdrs = ["passes.h"], deps = [ ":embed_tf_framework", + ":shape_to_descriptors", ":tf_framework_legalize_to_llvm", ":tf_framework_passes_inc_gen", "//tensorflow/compiler/mlir/tools/kernel_gen/ir:tf_framework_ops", diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h index c6aaeb92c56..5e240b8d01c 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h @@ -23,6 +23,7 @@ namespace mlir { class ModuleOp; template class OperationPass; +class Pass; namespace kernel_gen { namespace tf_framework { @@ -38,6 +39,14 @@ createTestTFFrameworkLegalizeToLLVMPass(); std::unique_ptr > createEmbedTFFrameworkPass(); } // namespace tf_framework + +namespace transforms { + +// Pass to tranform shape computations in shape dialect to standard and scf +// using memref descriptors. +std::unique_ptr CreateShapeToDescriptorsPass(); + +} // namespace transforms } // namespace kernel_gen } // namespace mlir diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td index 8c4d5801f51..61720674926 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td @@ -21,12 +21,17 @@ include "mlir/Pass/PassBase.td" def TestTFFrameworkLegalizeToLLVMPass : Pass<"test-tf-framework-legalize-to-llvm", "ModuleOp"> { let summary = "Test pass for applying TF Framework -> LLVM patterns."; - let constructor = "createTestTFFrameworkLegalizeToLLVMPass()"; + let constructor = "tf_framework::createTestTFFrameworkLegalizeToLLVMPass()"; } def EmbedTFFrameworkPass : Pass<"embed-tf-framework", "ModuleOp"> { let summary = "Pass to embed TF Framework for allocation and error reporting"; - let constructor = "createEmbedTFFrameworkPass()"; + let constructor = "tf_framework::createEmbedTFFrameworkPass()"; +} + +def ShapeToDescriptorsPass : Pass<"test-shape-to-descriptors", "ModuleOp"> { + let summary = "Pass to transform shape computations to descriptors"; + let constructor = "transforms::CreateShapeToDescriptorsPass()"; } #endif // TF_FRAMEWORK_PASSES diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/register_passes.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/register_passes.cc index b9cdb2085a3..3a42d03355c 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/register_passes.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/register_passes.cc @@ -18,12 +18,10 @@ limitations under the License. namespace mlir { namespace kernel_gen { -namespace tf_framework { #define GEN_PASS_REGISTRATION #include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_passes.h.inc" bool register_all_passes = ([] { registerTFFrameworkPasses(); }(), true); -} // namespace tf_framework } // namespace kernel_gen } // namespace mlir diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/shape_to_descriptors.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/shape_to_descriptors.cc new file mode 100644 index 00000000000..32c2f9641b5 --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/shape_to_descriptors.cc @@ -0,0 +1,72 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This file combines patterns for lowering shape dialect to standard ops, +// structured control flow and descriptors. + +#include "mlir/Conversion/ShapeToSCF/ShapeToSCF.h" // from @llvm-project +#include "mlir/Conversion/ShapeToStandard/ShapeToStandard.h" // from @llvm-project +#include "mlir/Dialect/SCF/SCF.h" // from @llvm-project +#include "mlir/Dialect/Shape/IR/Shape.h" // from @llvm-project +#include "mlir/Dialect/Shape/Transforms/Passes.h" // from @llvm-project +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/PatternMatch.h" // from @llvm-project +#include "mlir/Transforms/DialectConversion.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h" + +namespace mlir { +namespace kernel_gen { +namespace transforms { + +namespace { + +struct ShapeToDescriptorsPass + : public PassWrapper> { + public: + ShapeToDescriptorsPass() = default; + + void runOnOperation() override { + MLIRContext &ctx = getContext(); + + // Setup target legality. + ConversionTarget target(ctx); + target.addIllegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + + // Setup conversion patterns. + OwningRewritePatternList patterns; + populateShapeRewritePatterns(&ctx, patterns); + populateShapeToStandardConversionPatterns(patterns, &ctx); + populateShapeToSCFConversionPatterns(patterns, &ctx); + + // Apply conversion. + auto module = getOperation(); + if (failed(applyPartialConversion(module, target, patterns))) + signalPassFailure(); + } +}; + +} // namespace + +std::unique_ptr CreateShapeToDescriptorsPass() { + return std::make_unique(); +} + +} // namespace transforms +} // namespace kernel_gen +} // namespace mlir From 3b6172f744fca952af09113eee709d1bed0db4de Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Tue, 2 Jun 2020 20:11:18 +0200 Subject: [PATCH 2072/2522] Add TF-TRT op converter tests for reduce ops --- .../tf2tensorrt/convert/convert_nodes.cc | 2 +- .../tf2tensorrt/convert/convert_nodes_test.cc | 142 ++++++++++++++++++ 2 files changed, 143 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index 369b339d01a..eaff361f09d 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -4479,7 +4479,7 @@ Status ConvertReduce(OpConverterParams* params) { int trt_axis; TF_RETURN_IF_ERROR( ConvertAxis(tf_axes_list[i], tensor->getDimensions().nbDims, - node_def.name(), /*use_implicit_batch=*/true, &trt_axis)); + node_def.name(), params->use_implicit_batch, &trt_axis)); axes |= (1 << trt_axis); } diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc index 52d05ff8225..0de2916857b 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc @@ -5052,6 +5052,148 @@ TEST_P(OpConverterTest3, ConvertGather) { } } +template +NodeDef CreateReduceOp(DataType tf_type, bool keep_dims) { + Scope s = Scope::NewRootScope(); + auto input = ops::Placeholder(s.WithOpName("input"), tf_type); + auto axis = ops::Placeholder(s.WithOpName("axis"), DT_INT32); + typename OpType::Attrs op_attrs; + op_attrs.keep_dims_ = keep_dims; + auto op = OpType(s.WithOpName("my_reduce"), input, axis, op_attrs); + return op.operation.node()->def(); +} + +// Applies reduction op on sub-sequences of input +// output[i] = reduce(input[m * i : m * (i +1)]) +std::vector CalcReduce(string op_name, std::vector input, int m, + float (*op)(float, float), float init) { + std::vector output(input.size() / m); + for (int i = 0; i < output.size(); i++) { + auto begin = input.begin() + i * m; + auto end = input.begin() + (i + 1) * m; + output[i] = std::accumulate(begin, end, init, op); + if (op_name == "Mean") { + output[i] /= m; + } + } + return output; +} +TEST_P(OpConverterTest1, ConvertReduce) { + { + // Input is weights, should fail. + Reset(); + const NodeDef node_def = CreateReduceOp(tf_type, false); + AddTestWeights("input", {1, 2, 3}, {-3, -2, -1, 0, 1, 2}); + AddTestWeights("axis", {1}, {1}); + RunValidationAndConversion( + node_def, error::UNIMPLEMENTED, + "The input \"input\" for Sum must be a tensor, at my_reduce"); + } + { + // Axis is weights, should fail. + Reset(); + const NodeDef node_def = CreateReduceOp(tf_type, false); + AddTestTensor("input", {1, 2, 3}, {-3, -2, -1, 0, 1, 2}); + AddTestTensor("axis", {1}, DT_INT32, {1}); + RunValidationAndConversion( + node_def, error::UNIMPLEMENTED, + "The input \"axis\" for Sum must be a constant, at my_reduce"); + } + using OpFunc = std::function; + using ValFunc = float (*)(float, float); + struct ReduceTestDescriptor { + string name; + OpFunc get_node; + ValFunc val_func; + float init_val; + }; + std::vector op_test_info{ + {"Sum", CreateReduceOp, [](float x, float y) { return x + y; }, + 0}, + {"Prod", CreateReduceOp, + [](float x, float y) { return x * y; }, 1}, + {"Mean", CreateReduceOp, + [](float x, float y) { return x + y; }, 0}, + {"Min", CreateReduceOp, + [](float x, float y) { return y < x ? y : x; }, 1000}, + {"Max", CreateReduceOp, + [](float x, float y) { return x < y ? y : x; }, -1000}}; + + std::vector input_values{1, 2, 3, 4, 5, 6}; + struct TestParams { + std::vector input_dims; + std::vector input_values; + // Helper array contains the same elements as input but permuted in a way + // that the reduction can be calculated over contiguous elements using + // CalcReduce + std::vector helper_array; + std::vector axis; + int stride; // product of input_dims along axis + Status conversion_status; + }; + std::vector params{ + // Out of range tests + TestParams{{2, 3, 1}, input_values, input_values, {3}, 3}, + TestParams{{2, 3, 1}, input_values, input_values, {-4}, 3}, + // Ok tests + TestParams{{2, 3, 1}, input_values, {1, 4, 2, 5, 3, 6}, {0}, 2}, + TestParams{{2, 3, 1}, input_values, input_values, {1}, 3}, + TestParams{{2, 3, 1}, input_values, input_values, {2}, 1}, + TestParams{{2, 3, 1}, input_values, input_values, {0, 1}, 6}, + // Ok tests with negative axis values + TestParams{{2, 3, 1}, input_values, {1, 4, 2, 5, 3, 6}, {-3}, 2}, + TestParams{{2, 3, 1}, input_values, input_values, {-2}, 3}, + TestParams{{2, 3, 1}, input_values, input_values, {-1}, 1}, + TestParams{{2, 3, 1}, input_values, input_values, {-3, 1}, 6}, + }; + + for (bool keep_dims : {false, true}) { + for (auto& op : op_test_info) { + for (auto p : params) { + SCOPED_TRACE(StrCat(op.name, keep_dims ? "keep_dims" : "")); + Reset(); + NodeDef node_def = op.get_node(tf_type, keep_dims); + + AddTestTensor("input", p.input_dims, p.input_values); + AddTestWeights("axis", {static_cast(p.axis.size())}, + p.axis); + std::vector expected_output_dims(p.input_dims); + + // Set expected output dim and conversion error messages + for (int ax : p.axis) { + int rank = p.input_dims.size(); + if (ax >= rank || ax < -rank) { + p.conversion_status = + errors::InvalidArgument("Axis value of ", ax, + " is out of bounds, must be in " + "range [", + -rank, ", ", rank, "), at my_reduce"); + } else { + int ax_positive = ax >= 0 ? ax : ax + rank; + // Zero marks elements that we will remove later. + expected_output_dims[ax_positive] = keep_dims ? 1 : 0; + if (trt_mode == TrtTestMode::kImplicitBatch && + (ax == 0 || ax == -rank)) { + p.conversion_status = errors::Unimplemented( + "TensorRT does not allow manipulation of the batch " + "dimension, at my_reduce"); + } + } + } + expected_output_dims.erase(std::remove(expected_output_dims.begin(), + expected_output_dims.end(), 0), + expected_output_dims.end()); + VLOG(2) << "out dims " << expected_output_dims; + std::vector expected_values = CalcReduce( + op.name, p.helper_array, p.stride, op.val_func, op.init_val); + TestOpConverter("my_reduce", node_def, expected_output_dims, + p.conversion_status, Status::OK(), + ArrayFloatNear(expected_values)); + } + } + } +} + NodeDef CreateCastOp(DataType tf_type) { Scope s = Scope::NewRootScope(); auto input = ops::Placeholder(s.WithOpName("input"), DT_HALF); From 447537756f834098176fba4abf44b19cce59434e Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Tue, 4 Aug 2020 08:12:40 -0700 Subject: [PATCH 2073/2522] fix skip test feature column part 2: Embedding Column PiperOrigin-RevId: 324813679 Change-Id: I7849ac67794d759e8b3cbd5fb1279e5bbb775b28 --- .../feature_column/feature_column_test.py | 663 +++++++++--------- 1 file changed, 328 insertions(+), 335 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index d6d4d2eb1a1..e351c5da572 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -4794,7 +4794,6 @@ class IndicatorColumnTest(test.TestCase): class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): - @test_util.run_deprecated_v1 def test_defaults(self): categorical_column = fc._categorical_column_with_identity( key='aaa', num_buckets=3) @@ -4816,7 +4815,6 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): 'aaa': parsing_ops.VarLenFeature(dtypes.int64) }, embedding_column._parse_example_spec) - @test_util.run_deprecated_v1 def test_all_constructor_args(self): categorical_column = fc._categorical_column_with_identity( key='aaa', num_buckets=3) @@ -4845,7 +4843,6 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): 'aaa': parsing_ops.VarLenFeature(dtypes.int64) }, embedding_column._parse_example_spec) - @test_util.run_deprecated_v1 def test_deep_copy(self): categorical_column = fc._categorical_column_with_identity( key='aaa', num_buckets=3) @@ -4879,7 +4876,6 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): 'aaa': parsing_ops.VarLenFeature(dtypes.int64) }, embedding_column._parse_example_spec) - @test_util.run_deprecated_v1 def test_invalid_initializer(self): categorical_column = fc._categorical_column_with_identity( key='aaa', num_buckets=3) @@ -4908,25 +4904,24 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): sparse_tensor.SparseTensorValue( indices=[[0, 0], [0, 1]], values=np.array([b'omar', b'stringer'], dtype=np.object_), - dense_shape=[1, 2]), - features['aaa'].eval()) + dense_shape=[1, 2]), features['aaa'].eval()) - @test_util.run_deprecated_v1 def test_transform_feature(self): - a = fc._categorical_column_with_identity(key='aaa', num_buckets=3) - a_embedded = fc._embedding_column(a, dimension=2) - features = { - 'aaa': sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 1, 0), - dense_shape=(2, 2)) - } - outputs = _transform_features(features, [a, a_embedded]) - output_a = outputs[a] - output_embedded = outputs[a_embedded] - with _initialized_session(): - _assert_sparse_tensor_value(self, self.evaluate(output_a), - self.evaluate(output_embedded)) + with ops.Graph().as_default(): + a = fc._categorical_column_with_identity(key='aaa', num_buckets=3) + a_embedded = fc._embedding_column(a, dimension=2) + features = { + 'aaa': sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2)) + } + outputs = _transform_features(features, [a, a_embedded]) + output_a = outputs[a] + output_embedded = outputs[a_embedded] + with _initialized_session(): + _assert_sparse_tensor_value(self, self.evaluate(output_a), + self.evaluate(output_embedded)) @parameterized.named_parameters( { @@ -4946,184 +4941,183 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): 'use_safe_embedding_lookup': False, 'partition_variables': True, }) - @test_util.run_deprecated_v1 + def test_get_dense_tensor(self, use_safe_embedding_lookup, partition_variables): - # Inputs. - vocabulary_size = 4 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 4), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 5)) + with ops.Graph().as_default(): + # Inputs. + vocabulary_size = 4 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 4), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 5)) - # Embedding variable. - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.), # id 2 - (9., 13.) # id 3 - ) + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.), # id 2 + (9., 13.) # id 3 + ) - def _initializer(shape, dtype, partition_info=None): + def _initializer(shape, dtype, partition_info=None): + if partition_variables: + self.assertEqual([vocabulary_size, embedding_dimension], + partition_info.full_shape) + self.assertAllEqual((2, embedding_dimension), shape) + else: + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertIsNone(partition_info) + + self.assertEqual(dtypes.float32, dtype) + return embedding_values + + # Expected lookup result, using combiner='mean'. + expected_lookups = ( + # example 0, ids [2], embedding = [7, 11] + (7., 11.), + # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] + (2., 3.5), + # example 2, ids [], embedding = [0, 0] + (0., 0.), + # example 3, ids [1], embedding = [3, 5] + (3., 5.), + ) + + # Build columns. + categorical_column = fc._categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + partitioner = None if partition_variables: - self.assertEqual([vocabulary_size, embedding_dimension], - partition_info.full_shape) - self.assertAllEqual((2, embedding_dimension), shape) + partitioner = partitioned_variables.fixed_size_partitioner(2, axis=0) + with variable_scope.variable_scope('vars', partitioner=partitioner): + embedding_column = fc._embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_initializer, + use_safe_embedding_lookup=use_safe_embedding_lookup) + + # Provide sparse input and get dense result. + embedding_lookup = embedding_column._get_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + # Assert expected embedding variable and lookups. + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + if partition_variables: + self.assertCountEqual(('vars/embedding_weights/part_0:0', + 'vars/embedding_weights/part_1:0'), + tuple([v.name for v in global_vars])) else: + self.assertCountEqual(('vars/embedding_weights:0',), + tuple([v.name for v in global_vars])) + for v in global_vars: + self.assertIsInstance(v, variables_lib.Variable) + with _initialized_session(): + self.assertAllEqual(embedding_values, global_vars[0]) + self.assertAllEqual(expected_lookups, self.evaluate(embedding_lookup)) + + if use_safe_embedding_lookup: + self.assertIn( + 'SparseFillEmptyRows', + [x.type for x in ops.get_default_graph().get_operations()]) + else: + self.assertNotIn( + 'SparseFillEmptyRows', + [x.type for x in ops.get_default_graph().get_operations()]) + + def test_get_dense_tensor_3d(self): + with ops.Graph().as_default(): + # Inputs. + vocabulary_size = 4 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0, 0), (1, 1, 0), (1, 1, 4), (3, 0, 0), (3, 1, 2)), + values=(2, 0, 1, 1, 2), + dense_shape=(4, 2, 5)) + + # Embedding variable. + embedding_dimension = 3 + embedding_values = ( + (1., 2., 4.), # id 0 + (3., 5., 1.), # id 1 + (7., 11., 2.), # id 2 + (2., 7., 12.) # id 3 + ) + + def _initializer(shape, dtype, partition_info): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) + return embedding_values - self.assertEqual(dtypes.float32, dtype) - return embedding_values + # Expected lookup result, using combiner='mean'. + expected_lookups = ( + # example 0, ids [[2], []], embedding = [[7, 11, 2], [0, 0, 0]] + ((7., 11., 2.), (0., 0., 0.)), + # example 1, ids [[], [0, 1]], embedding + # = mean([[], [1, 2, 4] + [3, 5, 1]]) = [[0, 0, 0], [2, 3.5, 2.5]] + ((0., 0., 0.), (2., 3.5, 2.5)), + # example 2, ids [[], []], embedding = [[0, 0, 0], [0, 0, 0]] + ((0., 0., 0.), (0., 0., 0.)), + # example 3, ids [[1], [2]], embedding = [[3, 5, 1], [7, 11, 2]] + ((3., 5., 1.), (7., 11., 2.)), + ) - # Expected lookup result, using combiner='mean'. - expected_lookups = ( - # example 0, ids [2], embedding = [7, 11] - (7., 11.), - # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] - (2., 3.5), - # example 2, ids [], embedding = [0, 0] - (0., 0.), - # example 3, ids [1], embedding = [3, 5] - (3., 5.), - ) - - # Build columns. - categorical_column = fc._categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - partitioner = None - if partition_variables: - partitioner = partitioned_variables.fixed_size_partitioner(2, axis=0) - with variable_scope.variable_scope('vars', partitioner=partitioner): + # Build columns. + categorical_column = fc._categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) embedding_column = fc._embedding_column( categorical_column, dimension=embedding_dimension, - initializer=_initializer, - use_safe_embedding_lookup=use_safe_embedding_lookup) + initializer=_initializer) # Provide sparse input and get dense result. embedding_lookup = embedding_column._get_dense_tensor( _LazyBuilder({'aaa': sparse_input})) - # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - if partition_variables: - self.assertCountEqual(('vars/embedding_weights/part_0:0', - 'vars/embedding_weights/part_1:0'), + # Assert expected embedding variable and lookups. + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertCountEqual(('embedding_weights:0',), tuple([v.name for v in global_vars])) - else: - self.assertCountEqual(('vars/embedding_weights:0',), - tuple([v.name for v in global_vars])) - for v in global_vars: - self.assertIsInstance(v, variables_lib.Variable) - with _initialized_session(): - self.assertAllEqual(embedding_values, global_vars[0]) - self.assertAllEqual(expected_lookups, self.evaluate(embedding_lookup)) + with _initialized_session(): + self.assertAllEqual(embedding_values, global_vars[0]) + self.assertAllEqual(expected_lookups, self.evaluate(embedding_lookup)) - if use_safe_embedding_lookup: - self.assertIn('SparseFillEmptyRows', - [x.type for x in ops.get_default_graph().get_operations()]) - else: - self.assertNotIn( - 'SparseFillEmptyRows', - [x.type for x in ops.get_default_graph().get_operations()]) - - @test_util.run_deprecated_v1 - def test_get_dense_tensor_3d(self): - # Inputs. - vocabulary_size = 4 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0, 0), (1, 1, 0), (1, 1, 4), (3, 0, 0), (3, 1, 2)), - values=(2, 0, 1, 1, 2), - dense_shape=(4, 2, 5)) - - # Embedding variable. - embedding_dimension = 3 - embedding_values = ( - (1., 2., 4.), # id 0 - (3., 5., 1.), # id 1 - (7., 11., 2.), # id 2 - (2., 7., 12.) # id 3 - ) - def _initializer(shape, dtype, partition_info): - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) - self.assertEqual(dtypes.float32, dtype) - self.assertIsNone(partition_info) - return embedding_values - - # Expected lookup result, using combiner='mean'. - expected_lookups = ( - # example 0, ids [[2], []], embedding = [[7, 11, 2], [0, 0, 0]] - ((7., 11., 2.), (0., 0., 0.)), - # example 1, ids [[], [0, 1]], embedding - # = mean([[], [1, 2, 4] + [3, 5, 1]]) = [[0, 0, 0], [2, 3.5, 2.5]] - ((0., 0., 0.), (2., 3.5, 2.5)), - # example 2, ids [[], []], embedding = [[0, 0, 0], [0, 0, 0]] - ((0., 0., 0.), (0., 0., 0.)), - # example 3, ids [[1], [2]], embedding = [[3, 5, 1], [7, 11, 2]] - ((3., 5., 1.), (7., 11., 2.)), - ) - - # Build columns. - categorical_column = fc._categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - embedding_column = fc._embedding_column( - categorical_column, - dimension=embedding_dimension, - initializer=_initializer) - - # Provide sparse input and get dense result. - embedding_lookup = embedding_column._get_dense_tensor( - _LazyBuilder({ - 'aaa': sparse_input - })) - - # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertCountEqual(('embedding_weights:0',), - tuple([v.name for v in global_vars])) - with _initialized_session(): - self.assertAllEqual(embedding_values, global_vars[0]) - self.assertAllEqual(expected_lookups, self.evaluate(embedding_lookup)) - - @test_util.run_deprecated_v1 def test_get_dense_tensor_weight_collections(self): - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 4), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 5)) + with ops.Graph().as_default(): + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 4), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 5)) - # Build columns. - categorical_column = fc._categorical_column_with_identity( - key='aaa', num_buckets=3) - embedding_column = fc._embedding_column(categorical_column, dimension=2) + # Build columns. + categorical_column = fc._categorical_column_with_identity( + key='aaa', num_buckets=3) + embedding_column = fc._embedding_column(categorical_column, dimension=2) - # Provide sparse input and get dense result. - embedding_column._get_dense_tensor( - _LazyBuilder({ - 'aaa': sparse_input - }), weight_collections=('my_vars',)) + # Provide sparse input and get dense result. + embedding_column._get_dense_tensor( + _LazyBuilder({'aaa': sparse_input}), weight_collections=('my_vars',)) - # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertCountEqual(('embedding_weights:0',), - tuple([v.name for v in global_vars])) - my_vars = ops.get_collection('my_vars') - self.assertCountEqual(('embedding_weights:0',), - tuple([v.name for v in my_vars])) + # Assert expected embedding variable and lookups. + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertCountEqual(('embedding_weights:0',), + tuple([v.name for v in global_vars])) + my_vars = ops.get_collection('my_vars') + self.assertCountEqual(('embedding_weights:0',), + tuple([v.name for v in my_vars])) @test_util.run_deprecated_v1 def test_get_dense_tensor_placeholder_inputs(self): @@ -5197,66 +5191,63 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): input_shape: sparse_input.dense_shape, })) - @test_util.run_deprecated_v1 def test_get_dense_tensor_restore_from_ckpt(self): - # Inputs. - vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 4), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 5)) + with ops.Graph().as_default(): + # Inputs. + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 4), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 5)) - # Embedding variable. The checkpoint file contains _embedding_values. - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.) # id 2 - ) - ckpt_path = test.test_src_dir_path( - 'python/feature_column/testdata/embedding.ckpt') - ckpt_tensor = 'my_embedding' + # Embedding variable. The checkpoint file contains _embedding_values. + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + ) + ckpt_path = test.test_src_dir_path( + 'python/feature_column/testdata/embedding.ckpt') + ckpt_tensor = 'my_embedding' - # Expected lookup result, using combiner='mean'. - expected_lookups = ( - # example 0, ids [2], embedding = [7, 11] - (7., 11.), - # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] - (2., 3.5), - # example 2, ids [], embedding = [0, 0] - (0., 0.), - # example 3, ids [1], embedding = [3, 5] - (3., 5.), - ) + # Expected lookup result, using combiner='mean'. + expected_lookups = ( + # example 0, ids [2], embedding = [7, 11] + (7., 11.), + # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] + (2., 3.5), + # example 2, ids [], embedding = [0, 0] + (0., 0.), + # example 3, ids [1], embedding = [3, 5] + (3., 5.), + ) - # Build columns. - categorical_column = fc._categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - embedding_column = fc._embedding_column( - categorical_column, - dimension=embedding_dimension, - ckpt_to_load_from=ckpt_path, - tensor_name_in_ckpt=ckpt_tensor) + # Build columns. + categorical_column = fc._categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = fc._embedding_column( + categorical_column, + dimension=embedding_dimension, + ckpt_to_load_from=ckpt_path, + tensor_name_in_ckpt=ckpt_tensor) - # Provide sparse input and get dense result. - embedding_lookup = embedding_column._get_dense_tensor( - _LazyBuilder({ - 'aaa': sparse_input - })) + # Provide sparse input and get dense result. + embedding_lookup = embedding_column._get_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) - # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertCountEqual(('embedding_weights:0',), - tuple([v.name for v in global_vars])) - with _initialized_session(): - self.assertAllEqual(embedding_values, global_vars[0]) - self.assertAllEqual(expected_lookups, self.evaluate(embedding_lookup)) + # Assert expected embedding variable and lookups. + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertCountEqual(('embedding_weights:0',), + tuple([v.name for v in global_vars])) + with _initialized_session(): + self.assertAllEqual(embedding_values, global_vars[0]) + self.assertAllEqual(expected_lookups, self.evaluate(embedding_lookup)) - @test_util.run_deprecated_v1 def test_linear_model(self): # Inputs. batch_size = 4 @@ -5336,7 +5327,6 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_keras_linear_model(self): # Inputs. batch_size = 4 @@ -5416,125 +5406,128 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_input_layer(self): - # Inputs. - vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 4), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 5)) + with ops.Graph().as_default(): + # Inputs. + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 4), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 5)) - # Embedding variable. - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.) # id 2 - ) - def _initializer(shape, dtype, partition_info): - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) - self.assertEqual(dtypes.float32, dtype) - self.assertIsNone(partition_info) - return embedding_values + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + ) - # Expected lookup result, using combiner='mean'. - expected_lookups = ( - # example 0, ids [2], embedding = [7, 11] - (7., 11.), - # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] - (2., 3.5), - # example 2, ids [], embedding = [0, 0] - (0., 0.), - # example 3, ids [1], embedding = [3, 5] - (3., 5.), - ) + def _initializer(shape, dtype, partition_info): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values - # Build columns. - categorical_column = fc._categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - embedding_column = fc._embedding_column( - categorical_column, - dimension=embedding_dimension, - initializer=_initializer) + # Expected lookup result, using combiner='mean'. + expected_lookups = ( + # example 0, ids [2], embedding = [7, 11] + (7., 11.), + # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] + (2., 3.5), + # example 2, ids [], embedding = [0, 0] + (0., 0.), + # example 3, ids [1], embedding = [3, 5] + (3., 5.), + ) - # Provide sparse input and get dense result. - input_layer = fc.input_layer({'aaa': sparse_input}, (embedding_column,)) + # Build columns. + categorical_column = fc._categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = fc._embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_initializer) - # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertCountEqual(('input_layer/aaa_embedding/embedding_weights:0',), - tuple([v.name for v in global_vars])) - trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) - self.assertCountEqual(('input_layer/aaa_embedding/embedding_weights:0',), - tuple([v.name for v in trainable_vars])) - with _initialized_session(): - self.assertAllEqual(embedding_values, trainable_vars[0]) - self.assertAllEqual(expected_lookups, self.evaluate(input_layer)) + # Provide sparse input and get dense result. + input_layer = fc.input_layer({'aaa': sparse_input}, (embedding_column,)) + + # Assert expected embedding variable and lookups. + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertCountEqual(('input_layer/aaa_embedding/embedding_weights:0',), + tuple([v.name for v in global_vars])) + trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + self.assertCountEqual(('input_layer/aaa_embedding/embedding_weights:0',), + tuple([v.name for v in trainable_vars])) + with _initialized_session(): + self.assertAllEqual(embedding_values, trainable_vars[0]) + self.assertAllEqual(expected_lookups, self.evaluate(input_layer)) - @test_util.run_deprecated_v1 def test_input_layer_not_trainable(self): - # Inputs. - vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 4), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 5)) + with ops.Graph().as_default(): + # Inputs. + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 4), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 5)) - # Embedding variable. - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.) # id 2 - ) - def _initializer(shape, dtype, partition_info): - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) - self.assertEqual(dtypes.float32, dtype) - self.assertIsNone(partition_info) - return embedding_values + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + ) - # Expected lookup result, using combiner='mean'. - expected_lookups = ( - # example 0, ids [2], embedding = [7, 11] - (7., 11.), - # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] - (2., 3.5), - # example 2, ids [], embedding = [0, 0] - (0., 0.), - # example 3, ids [1], embedding = [3, 5] - (3., 5.), - ) + def _initializer(shape, dtype, partition_info): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values - # Build columns. - categorical_column = fc._categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - embedding_column = fc._embedding_column( - categorical_column, - dimension=embedding_dimension, - initializer=_initializer, - trainable=False) + # Expected lookup result, using combiner='mean'. + expected_lookups = ( + # example 0, ids [2], embedding = [7, 11] + (7., 11.), + # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] + (2., 3.5), + # example 2, ids [], embedding = [0, 0] + (0., 0.), + # example 3, ids [1], embedding = [3, 5] + (3., 5.), + ) - # Provide sparse input and get dense result. - input_layer = fc.input_layer({'aaa': sparse_input}, (embedding_column,)) + # Build columns. + categorical_column = fc._categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = fc._embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_initializer, + trainable=False) - # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertCountEqual(('input_layer/aaa_embedding/embedding_weights:0',), - tuple([v.name for v in global_vars])) - self.assertCountEqual([], - ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)) - with _initialized_session(): - self.assertAllEqual(embedding_values, global_vars[0]) - self.assertAllEqual(expected_lookups, self.evaluate(input_layer)) + # Provide sparse input and get dense result. + input_layer = fc.input_layer({'aaa': sparse_input}, (embedding_column,)) + + # Assert expected embedding variable and lookups. + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertCountEqual(('input_layer/aaa_embedding/embedding_weights:0',), + tuple([v.name for v in global_vars])) + self.assertCountEqual([], + ops.get_collection( + ops.GraphKeys.TRAINABLE_VARIABLES)) + with _initialized_session(): + self.assertAllEqual(embedding_values, global_vars[0]) + self.assertAllEqual(expected_lookups, self.evaluate(input_layer)) class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase): From 14a136df8632233c81d46e331be53e9d3b30f8cd Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Tue, 4 Aug 2020 09:01:21 -0700 Subject: [PATCH 2074/2522] [TF:TRT] Disable some python tests for TensorRT 7.1.3. Add IsTensorRTVersionGreaterEqual to check the linked TensorRT version and use the routine to skip the tests that fail with TensorRT 7.1.3. Modify lru_cache_test to not use Conv2D. This is to workaround a bug in TensorRT 7.1.3. PiperOrigin-RevId: 324821921 Change-Id: Icf7f0b239917e0417f090f904192837181380eae --- .../python/compiler/tensorrt/test/base_test.py | 12 ++++++++++++ .../tensorrt/test/combined_nms_test.py | 3 +++ .../tensorrt/test/const_broadcast_test.py | 6 ++++++ .../compiler/tensorrt/test/conv2d_test.py | 18 ++++++++++++++++++ .../tensorrt/test/dynamic_input_shapes_test.py | 3 +++ .../compiler/tensorrt/test/lru_cache_test.py | 14 +++----------- .../tensorrt/test/memory_alignment_test.py | 6 ++++++ .../multi_connection_neighbor_engine_test.py | 6 ++++++ .../tensorrt/test/neighboring_engine_test.py | 6 ++++++ .../tensorrt/test/quantization_mnist_test.py | 8 ++++++-- .../test/tf_trt_integration_test_base.py | 7 +++++++ .../tensorrt/test/vgg_block_nchw_test.py | 6 ++++++ .../compiler/tensorrt/test/vgg_block_test.py | 6 ++++++ 13 files changed, 88 insertions(+), 13 deletions(-) diff --git a/tensorflow/python/compiler/tensorrt/test/base_test.py b/tensorflow/python/compiler/tensorrt/test/base_test.py index 9d2d3abd4fb..195382cd8ed 100644 --- a/tensorflow/python/compiler/tensorrt/test/base_test.py +++ b/tensorflow/python/compiler/tensorrt/test/base_test.py @@ -70,6 +70,12 @@ class SimpleSingleEngineTest(trt_test.TfTrtIntegrationTestBase): ] } + def ShouldRunTest(self, run_params): + # TODO(b/162448349): Enable the test for TRT 7.1.3. + if trt_test.IsTensorRTVersionGreaterEqual(7, 1, 3): + return (False, "Skip test due to b/162448349") + return super().ShouldRunTest(run_params) + class SimpleMultiEnginesTest(trt_test.TfTrtIntegrationTestBase): @@ -130,6 +136,12 @@ class SimpleMultiEnginesTest(trt_test.TfTrtIntegrationTestBase): return conversion_params._replace( rewriter_config_template=rewrite_config_with_trt) + def ShouldRunTest(self, run_params): + # TODO(b/162448349): Enable the test for TRT 7.1.3. + if trt_test.IsTensorRTVersionGreaterEqual(7, 1, 3): + return (False, "Skip test due to b/162448349") + return super().ShouldRunTest(run_params) + class SimpleMultiEnginesTest2(trt_test.TfTrtIntegrationTestBase): diff --git a/tensorflow/python/compiler/tensorrt/test/combined_nms_test.py b/tensorflow/python/compiler/tensorrt/test/combined_nms_test.py index ffb1bf85e87..71f5139d049 100644 --- a/tensorflow/python/compiler/tensorrt/test/combined_nms_test.py +++ b/tensorflow/python/compiler/tensorrt/test/combined_nms_test.py @@ -90,6 +90,9 @@ class CombinedNmsTest(trt_test.TfTrtIntegrationTestBase): } def ShouldRunTest(self, run_params): + # TODO(b/162447069): Enable the test for TRT 7.1.3. + if trt_test.IsTensorRTVersionGreaterEqual(7, 1, 3): + return (False, 'Skip test due to b/162447069') # There is no CombinedNonMaxSuppression op for GPU at the moment, so # calibration will fail. # TODO(laigd): fix this. diff --git a/tensorflow/python/compiler/tensorrt/test/const_broadcast_test.py b/tensorflow/python/compiler/tensorrt/test/const_broadcast_test.py index ccbaf9e52fa..9e71b9e3f75 100644 --- a/tensorflow/python/compiler/tensorrt/test/const_broadcast_test.py +++ b/tensorflow/python/compiler/tensorrt/test/const_broadcast_test.py @@ -60,6 +60,12 @@ class ConstBroadcastTest(trt_test.TfTrtIntegrationTestBase): """The relative tolerance to compare floating point results.""" return 1.e-04 if run_params.precision_mode == 'FP32' else 1.e-02 + def ShouldRunTest(self, run_params): + # TODO(b/162448349): Enable the test for TRT 7.1.3. + if trt_test.IsTensorRTVersionGreaterEqual(7, 1, 3): + return (False, 'Skip test due to b/162448349') + return super().ShouldRunTest(run_params) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/compiler/tensorrt/test/conv2d_test.py b/tensorflow/python/compiler/tensorrt/test/conv2d_test.py index df1adce2178..400c17b343e 100644 --- a/tensorflow/python/compiler/tensorrt/test/conv2d_test.py +++ b/tensorflow/python/compiler/tensorrt/test/conv2d_test.py @@ -114,6 +114,12 @@ class Conv2DNCHWTest(trt_test.TfTrtIntegrationTestBase): return 4e-02 return super(Conv2DNCHWTest, self).ExpectedRelativeTolerance(run_params) + def ShouldRunTest(self, run_params): + # TODO(b/162448349): Enable the test for TRT 7.1.3. + if trt_test.IsTensorRTVersionGreaterEqual(7, 1, 3): + return (False, "Skip test due to b/162448349") + return super().ShouldRunTest(run_params) + class Conv2DNHWCTest(trt_test.TfTrtIntegrationTestBase): """Testing conversion of Conv2D (data_format=NCHW) in TF-TRT conversion.""" @@ -137,6 +143,12 @@ class Conv2DNHWCTest(trt_test.TfTrtIntegrationTestBase): """Return the expected engines to build.""" return ["TRTEngineOp_0"] + def ShouldRunTest(self, run_params): + # TODO(b/162448349): Enable the test for TRT 7.1.3. + if trt_test.IsTensorRTVersionGreaterEqual(7, 1, 3): + return (False, "Skip test due to b/162448349") + return super().ShouldRunTest(run_params) + class Conv2DStridedNCHWTest(trt_test.TfTrtIntegrationTestBase): """Testing conversion of strided Conv2D (data_format=NCHW).""" @@ -168,6 +180,12 @@ class Conv2DStridedNCHWTest(trt_test.TfTrtIntegrationTestBase): """Return the expected engines to build.""" return ["TRTEngineOp_0"] + def ShouldRunTest(self, run_params): + # TODO(b/162448349): Enable the test for TRT 7.1.3. + if trt_test.IsTensorRTVersionGreaterEqual(7, 1, 3): + return (False, "Skip test due to b/162448349") + return super().ShouldRunTest(run_params) + class Conv2DTranposeTest(trt_test.TfTrtIntegrationTestBase): """Testing conversion of conv2d_transpose (AKA Conv2DBackpropInput)""" diff --git a/tensorflow/python/compiler/tensorrt/test/dynamic_input_shapes_test.py b/tensorflow/python/compiler/tensorrt/test/dynamic_input_shapes_test.py index 95dbe727ac3..f02ad08777e 100644 --- a/tensorflow/python/compiler/tensorrt/test/dynamic_input_shapes_test.py +++ b/tensorflow/python/compiler/tensorrt/test/dynamic_input_shapes_test.py @@ -98,6 +98,9 @@ class DynamicInputShapesTest(trt_test.TfTrtIntegrationTestBase): return ["TRTEngineOp_0"] def ShouldRunTest(self, run_params): + # TODO(b/162448349): Enable the test for TRT 7.1.3. + if trt_test.IsTensorRTVersionGreaterEqual(7, 1, 3): + return (False, "Skip test due to b/162448349") return (run_params.dynamic_engine and not trt_test.IsQuantizationMode( run_params.precision_mode)), "test dynamic engine and non-INT8" diff --git a/tensorflow/python/compiler/tensorrt/test/lru_cache_test.py b/tensorflow/python/compiler/tensorrt/test/lru_cache_test.py index 9ebbfd51bc6..a2caa070011 100644 --- a/tensorflow/python/compiler/tensorrt/test/lru_cache_test.py +++ b/tensorflow/python/compiler/tensorrt/test/lru_cache_test.py @@ -33,14 +33,6 @@ from tensorflow.python.platform import test class LRUCacheTest(trt_test.TfTrtIntegrationTestBase): def GraphFn(self, x): - conv_filter = constant_op.constant( - np.random.randn(3, 3, 2, 1), dtype=dtypes.float32) - x = nn.conv2d( - input=x, - filter=conv_filter, - strides=[1, 1, 1, 1], - padding="SAME", - name="conv") bias = constant_op.constant( np.random.randn(1, 10, 10, 1), dtype=dtypes.float32) x = math_ops.add(x, bias) @@ -51,9 +43,9 @@ class LRUCacheTest(trt_test.TfTrtIntegrationTestBase): dtype = dtypes.float32 input_dims = [[[1, 10, 10, 2]], [[2, 10, 10, 2]], [[4, 10, 10, 2]], [[2, 10, 10, 2]]] - expected_output_dims = [[[1, 10, 10, 1]], [[2, 10, 10, 1]], [[4, 10, 10, - 1]], - [[2, 10, 10, 1]]] + expected_output_dims = [[[1, 10, 10, 2]], [[2, 10, 10, 2]], [[4, 10, 10, + 2]], + [[2, 10, 10, 2]]] return trt_test.TfTrtIntegrationTestParams( graph_fn=self.GraphFn, input_specs=[ diff --git a/tensorflow/python/compiler/tensorrt/test/memory_alignment_test.py b/tensorflow/python/compiler/tensorrt/test/memory_alignment_test.py index 056edc3e4d4..c1f0a007bf8 100644 --- a/tensorflow/python/compiler/tensorrt/test/memory_alignment_test.py +++ b/tensorflow/python/compiler/tensorrt/test/memory_alignment_test.py @@ -67,6 +67,12 @@ class MemoryAlignmentTest(trt_test.TfTrtIntegrationTestBase): """The relative tolerance to compare floating point results.""" return 0.1 + def ShouldRunTest(self, run_params): + # TODO(b/162448349): Enable the test for TRT 7.1.3. + if trt_test.IsTensorRTVersionGreaterEqual(7, 1, 3): + return (False, "Skip test due to b/162448349") + return super().ShouldRunTest(run_params) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/compiler/tensorrt/test/multi_connection_neighbor_engine_test.py b/tensorflow/python/compiler/tensorrt/test/multi_connection_neighbor_engine_test.py index b57bee6c5d7..687a12486b7 100644 --- a/tensorflow/python/compiler/tensorrt/test/multi_connection_neighbor_engine_test.py +++ b/tensorflow/python/compiler/tensorrt/test/multi_connection_neighbor_engine_test.py @@ -72,6 +72,12 @@ class MultiConnectionNeighborEngineTest(trt_test.TfTrtIntegrationTestBase): """Return the expected engines to build.""" return ["TRTEngineOp_0", "TRTEngineOp_1"] + def ShouldRunTest(self, run_params): + # TODO(b/162447069): Enable the test for TRT 7.1.3. + if trt_test.IsTensorRTVersionGreaterEqual(7, 1, 3): + return (False, "Skip test due to b/162447069") + return super().ShouldRunTest(run_params) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/compiler/tensorrt/test/neighboring_engine_test.py b/tensorflow/python/compiler/tensorrt/test/neighboring_engine_test.py index f377fe8dceb..39fee5cba5d 100644 --- a/tensorflow/python/compiler/tensorrt/test/neighboring_engine_test.py +++ b/tensorflow/python/compiler/tensorrt/test/neighboring_engine_test.py @@ -61,6 +61,12 @@ class NeighboringEngineTest(trt_test.TfTrtIntegrationTestBase): "TRTEngineOp_1": ["weights", "conv"] } + def ShouldRunTest(self, run_params): + # TODO(b/162447069): Enable the test for TRT 7.1.3. + if trt_test.IsTensorRTVersionGreaterEqual(7, 1, 3): + return (False, "Skip test due to b/162447069") + return super().ShouldRunTest(run_params) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/compiler/tensorrt/test/quantization_mnist_test.py b/tensorflow/python/compiler/tensorrt/test/quantization_mnist_test.py index 2716a933336..6b2b4ba77c2 100644 --- a/tensorflow/python/compiler/tensorrt/test/quantization_mnist_test.py +++ b/tensorflow/python/compiler/tensorrt/test/quantization_mnist_test.py @@ -18,13 +18,13 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function - import tensorflow_datasets as tfds from tensorflow.compiler.tf2tensorrt._pywrap_py_utils import get_linked_tensorrt_version from tensorflow.compiler.tf2tensorrt._pywrap_py_utils import is_tensorrt_enabled from tensorflow.core.protobuf import config_pb2 from tensorflow.python.compiler.tensorrt import trt_convert +from tensorflow.python.compiler.tensorrt.test import tf_trt_integration_test_base as trt_test from tensorflow.python.data.ops import dataset_ops from tensorflow.python.estimator.estimator import Estimator from tensorflow.python.estimator.model_fn import EstimatorSpec @@ -262,6 +262,11 @@ class QuantizationAwareTrainingMNISTTest(test_util.TensorFlowTestCase): def testEval(self): if not is_tensorrt_enabled(): return + + # TODO(b/162447069): Enable the test for TRT 7.1.3. + if trt_test.IsTensorRTVersionGreaterEqual(7, 1, 3): + return + model_dir = test.test_src_dir_path( 'python/compiler/tensorrt/test/testdata/mnist') @@ -286,6 +291,5 @@ class QuantizationAwareTrainingMNISTTest(test_util.TensorFlowTestCase): logging.info('accuracy_tf_trt: %f', accuracy_tf_trt) self.assertAllClose(0.9675, accuracy_tf_trt, rtol=1e-3, atol=1e-3) - if __name__ == '__main__': test.main() diff --git a/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py b/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py index 87fa55a32bd..27133a14203 100644 --- a/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py +++ b/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py @@ -31,6 +31,7 @@ import warnings import numpy as np import six +from tensorflow.compiler.tf2tensorrt._pywrap_py_utils import get_linked_tensorrt_version from tensorflow.compiler.tf2tensorrt._pywrap_py_utils import is_tensorrt_enabled from tensorflow.core.framework import graph_pb2 from tensorflow.core.protobuf import config_pb2 @@ -100,6 +101,12 @@ def IsQuantizationWithCalibration(params): return IsQuantizationMode(params.precision_mode) and params.use_calibration +def IsTensorRTVersionGreaterEqual(major, minor=0, patch=0): + ver = get_linked_tensorrt_version() + return ver[0] > major or (ver[0] == major and ver[1] > minor) or ( + ver[0] == major and ver[1] == minor and ver[2] >= patch) + + class GraphState(object): ORIGINAL = 0 CALIBRATE = 1 diff --git a/tensorflow/python/compiler/tensorrt/test/vgg_block_nchw_test.py b/tensorflow/python/compiler/tensorrt/test/vgg_block_nchw_test.py index 8fd9606812d..43034e8b31e 100644 --- a/tensorflow/python/compiler/tensorrt/test/vgg_block_nchw_test.py +++ b/tensorflow/python/compiler/tensorrt/test/vgg_block_nchw_test.py @@ -76,6 +76,12 @@ class VGGBlockNCHWTest(trt_test.TfTrtIntegrationTestBase): super(trt_test.TfTrtIntegrationTestBase, self).setUp() os.environ["TF_TRT_ALLOW_ENGINE_NATIVE_SEGMENT_EXECUTION"] = "True" + def ShouldRunTest(self, run_params): + # TODO(b/162448349): Enable the test for TRT 7.1.3. + if trt_test.IsTensorRTVersionGreaterEqual(7, 1, 3): + return (False, "Skip test due to b/162448349") + return super().ShouldRunTest(run_params) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/compiler/tensorrt/test/vgg_block_test.py b/tensorflow/python/compiler/tensorrt/test/vgg_block_test.py index 9d81cd6dcc3..7b1f7e062d7 100644 --- a/tensorflow/python/compiler/tensorrt/test/vgg_block_test.py +++ b/tensorflow/python/compiler/tensorrt/test/vgg_block_test.py @@ -67,6 +67,12 @@ class VGGBlockTest(trt_test.TfTrtIntegrationTestBase): super(trt_test.TfTrtIntegrationTestBase, self).setUp() os.environ["TF_TRT_ALLOW_ENGINE_NATIVE_SEGMENT_EXECUTION"] = "True" + def ShouldRunTest(self, run_params): + # TODO(b/162448349): Enable the test for TRT 7.1.3. + if trt_test.IsTensorRTVersionGreaterEqual(7, 1, 3): + return (False, "Skip test due to b/162448349") + return super().ShouldRunTest(run_params) + if __name__ == "__main__": test.main() From 465b9f4258dfe922c6d385551233b9ab7319af37 Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Tue, 4 Aug 2020 09:27:04 -0700 Subject: [PATCH 2075/2522] [TF:TRT] Use IsTensorRTVersionGreaterEqual to check TensorRT version. Rewrite a few places that check TensorRT version to use IsTensorRTVersionGreaterEqual. PiperOrigin-RevId: 324826641 Change-Id: I667d317574f025422cedbed9b14cb8df647a5a70 --- .../python/compiler/tensorrt/test/combined_nms_test.py | 8 +++----- .../compiler/tensorrt/test/quantization_mnist_test.py | 4 +--- .../python/compiler/tensorrt/test/quantization_test.py | 5 ++--- tensorflow/python/compiler/tensorrt/test/trt_mode_test.py | 7 ++----- 4 files changed, 8 insertions(+), 16 deletions(-) diff --git a/tensorflow/python/compiler/tensorrt/test/combined_nms_test.py b/tensorflow/python/compiler/tensorrt/test/combined_nms_test.py index 71f5139d049..26e911e3b0b 100644 --- a/tensorflow/python/compiler/tensorrt/test/combined_nms_test.py +++ b/tensorflow/python/compiler/tensorrt/test/combined_nms_test.py @@ -18,7 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.compiler.tf2tensorrt._pywrap_py_utils import get_linked_tensorrt_version from tensorflow.python.compiler.tensorrt.test import tf_trt_integration_test_base as trt_test from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -97,10 +96,9 @@ class CombinedNmsTest(trt_test.TfTrtIntegrationTestBase): # calibration will fail. # TODO(laigd): fix this. # Only run for TRT 5.1 and above. - ver = get_linked_tensorrt_version() - return (ver[0] > 5 or - (ver[0] == 5 and ver[1] >= 1)) and not trt_test.IsQuantizationMode( - run_params.precision_mode), 'test >=TRT5.1 and non-INT8' + return trt_test.IsTensorRTVersionGreaterEqual( + 5, 1) and not trt_test.IsQuantizationMode( + run_params.precision_mode), 'test >=TRT5.1 and non-INT8' if __name__ == '__main__': diff --git a/tensorflow/python/compiler/tensorrt/test/quantization_mnist_test.py b/tensorflow/python/compiler/tensorrt/test/quantization_mnist_test.py index 6b2b4ba77c2..d859407f1f7 100644 --- a/tensorflow/python/compiler/tensorrt/test/quantization_mnist_test.py +++ b/tensorflow/python/compiler/tensorrt/test/quantization_mnist_test.py @@ -19,8 +19,6 @@ from __future__ import division from __future__ import print_function import tensorflow_datasets as tfds - -from tensorflow.compiler.tf2tensorrt._pywrap_py_utils import get_linked_tensorrt_version from tensorflow.compiler.tf2tensorrt._pywrap_py_utils import is_tensorrt_enabled from tensorflow.core.protobuf import config_pb2 from tensorflow.python.compiler.tensorrt import trt_convert @@ -279,7 +277,7 @@ class QuantizationAwareTrainingMNISTTest(test_util.TensorFlowTestCase): logging.info('accuracy_tf_native: %f', accuracy_tf_native) self.assertAllClose(0.9662, accuracy_tf_native, rtol=3e-3, atol=3e-3) - if get_linked_tensorrt_version()[0] < 5: + if not trt_test.IsTensorRTVersionGreaterEqual(5): return accuracy_tf_trt = self._Run( diff --git a/tensorflow/python/compiler/tensorrt/test/quantization_test.py b/tensorflow/python/compiler/tensorrt/test/quantization_test.py index 7ed3414817c..c41afbb29c5 100644 --- a/tensorflow/python/compiler/tensorrt/test/quantization_test.py +++ b/tensorflow/python/compiler/tensorrt/test/quantization_test.py @@ -20,7 +20,6 @@ from __future__ import print_function import numpy as np -from tensorflow.compiler.tf2tensorrt._pywrap_py_utils import get_linked_tensorrt_version from tensorflow.python.compiler.tensorrt.test import tf_trt_integration_test_base as trt_test from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -65,7 +64,7 @@ class QuantizationMissingAllRangesTest(trt_test.TfTrtIntegrationTestBase): def ShouldRunTest(self, run_params): # Only test static engine mode, with or without calibration. - return (get_linked_tensorrt_version()[0] >= 5 and + return (trt_test.IsTensorRTVersionGreaterEqual(5) and trt_test.IsQuantizationMode(run_params.precision_mode) and not run_params.convert_online and not run_params.dynamic_engine ), "test static engine, offline conversion and INT8" @@ -90,7 +89,7 @@ class QuantizationWithRangesTest(trt_test.TfTrtIntegrationTestBase): def ShouldRunTest(self, run_params): # Test static/dynamic engine with/without calibration. - return (get_linked_tensorrt_version()[0] >= 5 and + return (trt_test.IsTensorRTVersionGreaterEqual(5) and trt_test.IsQuantizationMode(run_params.precision_mode) and not run_params.convert_online), "test offline conversion and INT8" diff --git a/tensorflow/python/compiler/tensorrt/test/trt_mode_test.py b/tensorflow/python/compiler/tensorrt/test/trt_mode_test.py index c67de7432cd..7d991678748 100644 --- a/tensorflow/python/compiler/tensorrt/test/trt_mode_test.py +++ b/tensorflow/python/compiler/tensorrt/test/trt_mode_test.py @@ -20,7 +20,6 @@ from __future__ import print_function from unittest import SkipTest # pylint: disable=g-importing-member -from tensorflow.compiler.tf2tensorrt._pywrap_py_utils import get_linked_tensorrt_version from tensorflow.python.compiler.tensorrt.test import tf_trt_integration_test_base as trt_test from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops @@ -132,8 +131,7 @@ class ExplicitBatchTest(TrtModeTestBase): def ShouldRunTest(self, run_params): # Only run for TRT 6 and above. - ver = get_linked_tensorrt_version() - return run_params.is_v2 and ver[0] >= 6 and ( + return run_params.is_v2 and trt_test.IsTensorRTVersionGreaterEqual(6) and ( not run_params.use_calibration), "test v2, >=TRT6 and non-calibration" @@ -169,8 +167,7 @@ class DynamicShapesTest(TrtModeTestBase): def ShouldRunTest(self, run_params): # Only run for TRT 6 and above. - ver = get_linked_tensorrt_version() - return run_params.is_v2 and ver[0] >= 6 and ( + return run_params.is_v2 and trt_test.IsTensorRTVersionGreaterEqual(6) and ( not run_params.use_calibration), "test v2 >=TRT6 and non-calibration" From 935a1f07c60867aa627d4cb898faab8f83ce0004 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 4 Aug 2020 09:28:45 -0700 Subject: [PATCH 2076/2522] Add COPY operation to node shader registry in OpenGL PiperOrigin-RevId: 324826897 Change-Id: Ieb7a43609551f277f9a7dc24eb8ab8a33406be33 --- tensorflow/lite/delegates/gpu/gl/kernels/registry.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc b/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc index 0d2438aacc6..da6aad720a2 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc @@ -96,6 +96,7 @@ class Registry : public NodeShader { insert_op(Type::SOFTMAX, NewSoftmaxNodeShader); insert_elementwise_op(Type::ABS); + insert_elementwise_op(Type::COPY); insert_elementwise_op(Type::COS); insert_elementwise_op(Type::DIV); insert_elementwise_op(Type::ELU); From 920c7f93633b87f7f703617a72aa504509d59939 Mon Sep 17 00:00:00 2001 From: Geoffrey Martin-Noble Date: Tue, 4 Aug 2020 09:37:22 -0700 Subject: [PATCH 2077/2522] Delete duplicate glob expression PiperOrigin-RevId: 324828618 Change-Id: I6ebb0dcf912ff4813a47e344651d6092088bb108 --- third_party/mlir/BUILD | 1 - 1 file changed, 1 deletion(-) diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD index 04238bae943..4f2873af3dd 100644 --- a/third_party/mlir/BUILD +++ b/third_party/mlir/BUILD @@ -432,7 +432,6 @@ cc_library( ]), hdrs = glob([ "include/mlir/Dialect/*.h", - "include/mlir/Dialect/*.h", ]), includes = ["include"], deps = [ From 1cbe64ec056ae48d66df51cdcb8c0a8d61fe6cf4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 4 Aug 2020 09:45:46 -0700 Subject: [PATCH 2078/2522] Move scalar multiply to the smaller side of convolution. PiperOrigin-RevId: 324830166 Change-Id: Ia71f882d838356158118c7f9d69d41d8a0ff6512 --- .../xla/service/algebraic_simplifier.cc | 202 ++++++++++++++++++ .../xla/service/algebraic_simplifier.h | 12 ++ .../xla/service/algebraic_simplifier_test.cc | 53 +++++ 3 files changed, 267 insertions(+) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 1f82c062df9..0b588048e4a 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -428,6 +428,10 @@ class AlgebraicSimplifierVisitor : public DfsHloRewriteVisitor { shape, hlo, zero, dims, AddReduce_computation)); } + // Move scalar multiply to the smallest side of convolution to + // reduce multiply computations. + Status ScalarMultiplyReduction(HloInstruction* dot); + // Convenience method for replacing an instruction with a bitcast. If operand // is not null, then the bitcast will use the specified operand instead of the // operand of the instruction. @@ -563,6 +567,200 @@ bool AlgebraicSimplifierVisitor::SameShape(const HloInstruction* lhs, } } +namespace { + +float GetConstantValue(HloInstruction* inst) { + switch (inst->shape().element_type()) { + case BF16: + return static_cast(inst->literal().GetFirstElement()); + case F32: + return inst->literal().GetFirstElement(); + default: + LOG(FATAL) << "Unsupported data type: " << inst->shape().element_type(); + } +} + +bool IsOpCodeMultiplyCommutative(HloOpcode opcode) { + switch (opcode) { + case HloOpcode::kMultiply: + case HloOpcode::kTranspose: + case HloOpcode::kReshape: + case HloOpcode::kSelect: + return true; + default: + return false; + } +} + +std::unique_ptr MakeScalarInstruction(HloInstruction* target, + float multiplier) { + switch (target->shape().element_type()) { + case BF16: + return HloInstruction::CreateConstant(LiteralUtil::ConvertF32ToBF16( + LiteralUtil::CreateR0(multiplier))); + break; + case F32: + return HloInstruction::CreateConstant( + LiteralUtil::CreateR0(multiplier)); + break; + default: + LOG(FATAL) << "Unsupported data type: " << target->shape().element_type(); + } +} + +} // namespace + +Status AlgebraicSimplifierVisitor::ScalarMultiplyReduction( + HloInstruction* dot) { + // We only process bfloat16 and float32 for now. + if (dot->shape().element_type() != BF16 && + dot->shape().element_type() != F32) { + return Status::OK(); + } + + auto lhs = dot->mutable_operand(0); + auto rhs = dot->mutable_operand(1); + + const int64 dot_size = ShapeUtil::ElementsIn(dot->shape()); + const int64 lhs_size = ShapeUtil::ElementsIn(lhs->shape()); + const int64 rhs_size = ShapeUtil::ElementsIn(rhs->shape()); + + HloInstruction* target = nullptr; + // (current node, user, operand_index) + std::vector> operands; + std::vector users; + + // Find which side of dot has the smallest size: + // operand 0, operand 1, or output. + if (dot_size <= std::min(lhs_size, rhs_size)) { + target = dot; + if (dot_size < lhs_size) { + operands.emplace_back(lhs, dot, 0); + } + if (dot_size < rhs_size) { + operands.emplace_back(rhs, dot, 1); + } + } else if (lhs_size <= rhs_size) { + target = lhs; + if (lhs_size < rhs_size) { + operands.emplace_back(rhs, dot, 1); + } + if (lhs_size < dot_size && dot->user_count() == 1) { + users.push_back(dot->users().front()); + } + } else { + target = rhs; + if (rhs_size < lhs_size) { + operands.emplace_back(lhs, dot, 0); + } + if (rhs_size < dot_size && dot->user_count() == 1) { + users.push_back(dot->users().front()); + } + } + + std::vector values; + + // DFS to find scalar multiply ops from the operands. + while (!operands.empty()) { + HloInstruction* inst; + HloInstruction* user; + int64 index; + std::tie (inst, user, index) = operands.back(); + operands.pop_back(); + + // Skip the op types that are not commutative with multiply. + if (!IsOpCodeMultiplyCommutative(inst->opcode())) { + continue; + } + + HloInstruction* operand; + HloInstruction* multiplier; + // Pattern match a scalar multiply. + if (Match(inst, m::MultiplyAnyOrder( + m::Op(&operand), + m::Broadcast(m::ConstantScalar(&multiplier))))) { + CHECK_LT(index, user->operand_count()); + CHECK_EQ(inst, user->operands()[index]); + + // When found a scalar multiply, save its scalar value. + values.push_back(GetConstantValue(multiplier)); + // And remove the scalar multiply op. + TF_RETURN_IF_ERROR(user->ReplaceOperandWith(index, operand)); + inst = operand; + } + + // Push the operands of inst. + int64 i = 0; + for (auto* operand : inst->operands()) { + operands.emplace_back(operand, inst, i++); + } + } + + // DFS to find scalar multiply ops from the users. + while (!users.empty()) { + auto inst = users.back(); + users.pop_back(); + + if (!IsOpCodeMultiplyCommutative(inst->opcode())) { + continue; + } + + HloInstruction* operand; + HloInstruction* multiplier; + if (Match(inst, m::MultiplyAnyOrder( + m::Op(&operand), + m::Broadcast(m::ConstantScalar(&multiplier))))) { + values.push_back(GetConstantValue(multiplier)); + + TF_RETURN_IF_ERROR(inst->ReplaceAllUsesWith(operand)); + inst = operand; + } + + // Process the instructions with only one user. + // Otherwise moving scalar multiply to the operands changes the values of + // other users. + if (inst->user_count() == 1) { + users.push_back(inst->users().front()); + } + } + + if (values.empty()) { + return Status::OK(); + } + + changed_ = true; + + // Combine all constant multipliers. + float multiplier = 1.0; + for (const float v : values) { + multiplier *= v; + } + + // Create a new const scalar multiply instruction. + HloInstruction* new_const_inst; + new_const_inst = + computation_->AddInstruction(MakeScalarInstruction(target, multiplier)); + + // Broadcast the scalar multiplier. + HloInstruction* new_broadcast = computation_->AddInstruction( + HloInstruction::CreateBroadcast(target->shape(), new_const_inst, {})); + // Create a new scalar multiply instruction. + HloInstruction* new_multiply = + computation_->AddInstruction(HloInstruction::CreateBinary( + target->shape(), HloOpcode::kMultiply, target, new_broadcast)); + CHECK_EQ(new_multiply->shape(), target->shape()); + + // Update the dependency with the rest of the instructions. + if (target == lhs) { + return dot->ReplaceOperandWith(0, new_multiply); + } else if (target == rhs) { + return dot->ReplaceOperandWith(1, new_multiply); + } else { + CHECK_EQ(target, dot); + return dot->ReplaceAllUsesWith(new_multiply); + } +} + void AlgebraicSimplifierVisitor::ReplaceWithBitcast(HloInstruction* instruction, HloInstruction* operand) { CHECK_EQ(1, instruction->operand_count()); @@ -5042,6 +5240,10 @@ StatusOr AlgebraicSimplifierVisitor::SimplifyConvToDot( Status AlgebraicSimplifierVisitor::HandleConvolution( HloInstruction* convolution) { + if (options_.enable_scalar_multiply_reduction()) { + TF_RETURN_IF_ERROR(ScalarMultiplyReduction(convolution)); + } + // Zero-sized input or filter. if (ShapeUtil::IsZeroElementArray(convolution->operand(0)->shape()) || ShapeUtil::IsZeroElementArray(convolution->operand(1)->shape())) { diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.h b/tensorflow/compiler/xla/service/algebraic_simplifier.h index 9f29df3c209..9f2a3404116 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.h +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.h @@ -86,6 +86,17 @@ class AlgebraicSimplifierOptions { } bool enable_conv_operand_swap() const { return enable_conv_operand_swap_; } + // Move constant scalar multiply to one operand or output of convolutions with + // the smallest tensor size, to reduce the number of scalar multiply. + void set_enable_scalar_multiply_reduction( + bool enable_scalar_multiply_reduction) { + enable_scalar_multiply_reduction_ = enable_scalar_multiply_reduction; + } + + bool enable_scalar_multiply_reduction() const { + return enable_scalar_multiply_reduction_; + } + // If enable_window_reduce_replacement is true, the kReduceWindow instruction // can be optimized by replacement with simpler operations. void set_enable_window_reduce_to_reduce_replacement( @@ -146,6 +157,7 @@ class AlgebraicSimplifierOptions { bool enable_dot_to_multiply_rewrite_{true}; bool enable_conv_simplification_{true}; bool enable_conv_operand_swap_{true}; + bool enable_scalar_multiply_reduction_{false}; bool enable_window_reduce_to_reduce_replacement_{true}; bool enable_reduce_of_reshape_{true}; bool replace_transpose_with_bitcast_{true}; diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 034d8ec4361..90ca44714f7 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -5343,6 +5343,59 @@ ENTRY AddBroadcastZeroWithDynamicSlice { EXPECT_THAT(root->operand(1)->opcode(), HloOpcode::kPad); } +TEST_F(AlgebraicSimplifierTest, ScalarMultiplyReduction) { + const char* hlo_string = R"( +HloModule ConstScalarMultiply +ENTRY ConstScalarMultiply { + param0 = f32[16,512,4096]{2,1,0} parameter(0) + constant.0 = f32[] constant(0.5) + broadcast.0 = f32[16,512,4096] broadcast(constant.0), dimensions={} + multiply.0 = f32[16,512,4096]{2,1,0} multiply(param0, broadcast.0) + param1 = f32[16,512,4096]{2,1,0} parameter(1) + multiply.1 = f32[16,512,4096]{2,1,0} multiply(multiply.0, param1) + param2 = f32[16,512,1024]{2,1,0} parameter(2) + constant.1 = f32[] constant(1.109) + broadcast.1 = f32[16,512,1024] broadcast(constant.1), dimensions={} + multiply.2 = f32[16,512,1024]{2,1,0} multiply(param2, broadcast.1) + ROOT convolution = f32[4096,1024,1]{1,0,2} convolution(multiply.1, multiply.2), window={size=16}, dim_labels=0fb_0io->bf0 +} +)"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + AlgebraicSimplifierOptions options; + options.set_enable_scalar_multiply_reduction(true); + AlgebraicSimplifier simplifier(options); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + auto root = module->entry_computation()->root_instruction(); + EXPECT_EQ(root->opcode(), HloOpcode::kMultiply); + EXPECT_THAT(root, + GmockMatch(m::MultiplyAnyOrder( + m::Op(), m::Broadcast(m::ConstantScalar(0.5f * 1.109f))))); +} + +TEST_F(AlgebraicSimplifierTest, ScalarMultiplyReductionMultiUser) { + const char* hlo_string = R"( +HloModule ConstScalarMultiply +ENTRY ConstScalarMultiply { + param0 = f32[16,512,1024] parameter(0) + param1 = f32[4096,1024,1] parameter(1) + convolution = f32[16,512,4096] convolution(param0, param1), window={size=1}, dim_labels=0bf_oi0->0bf + constant.1 = f32[] constant(0.5) + broadcast.1 = f32[16,512,4096] broadcast(constant.1), dimensions={} + multiply.1 = f32[16,512,4096] multiply(convolution, broadcast.1) + param2 = f32[16,512,4096] parameter(2) + multiply.2 = f32[16,512,4096] multiply(convolution, param2) + ROOT add.1 = f32[16,512,4096] add(multiply.1, multiply.2) +} +)"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + AlgebraicSimplifierOptions options; + options.set_enable_scalar_multiply_reduction(true); + AlgebraicSimplifier simplifier(options); + ASSERT_FALSE(simplifier.Run(module.get()).ValueOrDie()); +} + INSTANTIATE_TEST_SUITE_P(DotOfConcatSimplificationTestInstantiation, DotOfConcatSimplificationTest, ::testing::ValuesIn(kDotOfConcatTestSpecs)); From 0224b563c6d614ef61fdcec1ba1953dfabb1a70b Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Tue, 4 Aug 2020 09:46:34 -0700 Subject: [PATCH 2079/2522] fix skip test feature column part 3: Shared Embedding Column PiperOrigin-RevId: 324830308 Change-Id: I5fe94e971a64fba18a750ce08b86f09d1a1b3cff --- .../feature_column/feature_column_test.py | 992 +++++++++--------- 1 file changed, 499 insertions(+), 493 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index e351c5da572..c8e24e46c2f 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -5532,251 +5532,251 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase): - @test_util.run_deprecated_v1 def test_defaults(self): - categorical_column_a = fc._categorical_column_with_identity( - key='aaa', num_buckets=3) - categorical_column_b = fc._categorical_column_with_identity( - key='bbb', num_buckets=3) - embedding_dimension = 2 - embedding_column_b, embedding_column_a = fc_new.shared_embedding_columns( - [categorical_column_b, categorical_column_a], - dimension=embedding_dimension) - self.assertIs(categorical_column_a, embedding_column_a.categorical_column) - self.assertIs(categorical_column_b, embedding_column_b.categorical_column) - self.assertEqual(embedding_dimension, embedding_column_a.dimension) - self.assertEqual(embedding_dimension, embedding_column_b.dimension) - self.assertEqual('mean', embedding_column_a.combiner) - self.assertEqual('mean', embedding_column_b.combiner) - self.assertIsNone(embedding_column_a.ckpt_to_load_from) - self.assertIsNone(embedding_column_b.ckpt_to_load_from) - self.assertEqual('aaa_bbb_shared_embedding', - embedding_column_a.shared_embedding_collection_name) - self.assertEqual('aaa_bbb_shared_embedding', - embedding_column_b.shared_embedding_collection_name) - self.assertIsNone(embedding_column_a.tensor_name_in_ckpt) - self.assertIsNone(embedding_column_b.tensor_name_in_ckpt) - self.assertIsNone(embedding_column_a.max_norm) - self.assertIsNone(embedding_column_b.max_norm) - self.assertTrue(embedding_column_a.trainable) - self.assertTrue(embedding_column_b.trainable) - self.assertEqual('aaa_shared_embedding', embedding_column_a.name) - self.assertEqual('bbb_shared_embedding', embedding_column_b.name) - self.assertEqual( - 'aaa_bbb_shared_embedding', embedding_column_a._var_scope_name) - self.assertEqual( - 'aaa_bbb_shared_embedding', embedding_column_b._var_scope_name) - self.assertEqual( - (embedding_dimension,), embedding_column_a._variable_shape) - self.assertEqual( - (embedding_dimension,), embedding_column_b._variable_shape) - self.assertEqual({ - 'aaa': parsing_ops.VarLenFeature(dtypes.int64) - }, embedding_column_a._parse_example_spec) - self.assertEqual({ - 'bbb': parsing_ops.VarLenFeature(dtypes.int64) - }, embedding_column_b._parse_example_spec) - - @test_util.run_deprecated_v1 - def test_all_constructor_args(self): - categorical_column_a = fc._categorical_column_with_identity( - key='aaa', num_buckets=3) - categorical_column_b = fc._categorical_column_with_identity( - key='bbb', num_buckets=3) - embedding_dimension = 2 - embedding_column_a, embedding_column_b = fc_new.shared_embedding_columns( - [categorical_column_a, categorical_column_b], - dimension=embedding_dimension, - combiner='my_combiner', - initializer=lambda: 'my_initializer', - shared_embedding_collection_name='shared_embedding_collection_name', - ckpt_to_load_from='my_ckpt', - tensor_name_in_ckpt='my_ckpt_tensor', - max_norm=42., - trainable=False) - self.assertIs(categorical_column_a, embedding_column_a.categorical_column) - self.assertIs(categorical_column_b, embedding_column_b.categorical_column) - self.assertEqual(embedding_dimension, embedding_column_a.dimension) - self.assertEqual(embedding_dimension, embedding_column_b.dimension) - self.assertEqual('my_combiner', embedding_column_a.combiner) - self.assertEqual('my_combiner', embedding_column_b.combiner) - self.assertEqual('shared_embedding_collection_name', - embedding_column_a.shared_embedding_collection_name) - self.assertEqual('shared_embedding_collection_name', - embedding_column_b.shared_embedding_collection_name) - self.assertEqual('my_ckpt', embedding_column_a.ckpt_to_load_from) - self.assertEqual('my_ckpt', embedding_column_b.ckpt_to_load_from) - self.assertEqual('my_ckpt_tensor', embedding_column_a.tensor_name_in_ckpt) - self.assertEqual('my_ckpt_tensor', embedding_column_b.tensor_name_in_ckpt) - self.assertEqual(42., embedding_column_a.max_norm) - self.assertEqual(42., embedding_column_b.max_norm) - self.assertFalse(embedding_column_a.trainable) - self.assertFalse(embedding_column_b.trainable) - self.assertEqual('aaa_shared_embedding', embedding_column_a.name) - self.assertEqual('bbb_shared_embedding', embedding_column_b.name) - self.assertEqual( - 'shared_embedding_collection_name', embedding_column_a._var_scope_name) - self.assertEqual( - 'shared_embedding_collection_name', embedding_column_b._var_scope_name) - self.assertEqual( - (embedding_dimension,), embedding_column_a._variable_shape) - self.assertEqual( - (embedding_dimension,), embedding_column_b._variable_shape) - self.assertEqual({ - 'aaa': parsing_ops.VarLenFeature(dtypes.int64) - }, embedding_column_a._parse_example_spec) - self.assertEqual({ - 'bbb': parsing_ops.VarLenFeature(dtypes.int64) - }, embedding_column_b._parse_example_spec) - - @test_util.run_deprecated_v1 - def test_deep_copy(self): - categorical_column_a = fc._categorical_column_with_identity( - key='aaa', num_buckets=3) - categorical_column_b = fc._categorical_column_with_identity( - key='bbb', num_buckets=3) - embedding_dimension = 2 - original_a, _ = fc_new.shared_embedding_columns( - [categorical_column_a, categorical_column_b], - dimension=embedding_dimension, - combiner='my_combiner', - initializer=lambda: 'my_initializer', - shared_embedding_collection_name='shared_embedding_collection_name', - ckpt_to_load_from='my_ckpt', - tensor_name_in_ckpt='my_ckpt_tensor', - max_norm=42., - trainable=False) - for embedding_column_a in (original_a, copy.deepcopy(original_a)): - self.assertEqual('aaa', embedding_column_a.categorical_column.name) - self.assertEqual(3, embedding_column_a.categorical_column._num_buckets) - self.assertEqual({ - 'aaa': parsing_ops.VarLenFeature(dtypes.int64) - }, embedding_column_a.categorical_column._parse_example_spec) - + with ops.Graph().as_default(): + categorical_column_a = fc._categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc._categorical_column_with_identity( + key='bbb', num_buckets=3) + embedding_dimension = 2 + embedding_column_b, embedding_column_a = fc_new.shared_embedding_columns( + [categorical_column_b, categorical_column_a], + dimension=embedding_dimension) + self.assertIs(categorical_column_a, embedding_column_a.categorical_column) + self.assertIs(categorical_column_b, embedding_column_b.categorical_column) self.assertEqual(embedding_dimension, embedding_column_a.dimension) + self.assertEqual(embedding_dimension, embedding_column_b.dimension) + self.assertEqual('mean', embedding_column_a.combiner) + self.assertEqual('mean', embedding_column_b.combiner) + self.assertIsNone(embedding_column_a.ckpt_to_load_from) + self.assertIsNone(embedding_column_b.ckpt_to_load_from) + self.assertEqual('aaa_bbb_shared_embedding', + embedding_column_a.shared_embedding_collection_name) + self.assertEqual('aaa_bbb_shared_embedding', + embedding_column_b.shared_embedding_collection_name) + self.assertIsNone(embedding_column_a.tensor_name_in_ckpt) + self.assertIsNone(embedding_column_b.tensor_name_in_ckpt) + self.assertIsNone(embedding_column_a.max_norm) + self.assertIsNone(embedding_column_b.max_norm) + self.assertTrue(embedding_column_a.trainable) + self.assertTrue(embedding_column_b.trainable) + self.assertEqual('aaa_shared_embedding', embedding_column_a.name) + self.assertEqual('bbb_shared_embedding', embedding_column_b.name) + self.assertEqual('aaa_bbb_shared_embedding', + embedding_column_a._var_scope_name) + self.assertEqual('aaa_bbb_shared_embedding', + embedding_column_b._var_scope_name) + self.assertEqual((embedding_dimension,), + embedding_column_a._variable_shape) + self.assertEqual((embedding_dimension,), + embedding_column_b._variable_shape) + self.assertEqual({'aaa': parsing_ops.VarLenFeature(dtypes.int64)}, + embedding_column_a._parse_example_spec) + self.assertEqual({'bbb': parsing_ops.VarLenFeature(dtypes.int64)}, + embedding_column_b._parse_example_spec) + + def test_all_constructor_args(self): + with ops.Graph().as_default(): + categorical_column_a = fc._categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc._categorical_column_with_identity( + key='bbb', num_buckets=3) + embedding_dimension = 2 + embedding_column_a, embedding_column_b = fc_new.shared_embedding_columns( + [categorical_column_a, categorical_column_b], + dimension=embedding_dimension, + combiner='my_combiner', + initializer=lambda: 'my_initializer', + shared_embedding_collection_name='shared_embedding_collection_name', + ckpt_to_load_from='my_ckpt', + tensor_name_in_ckpt='my_ckpt_tensor', + max_norm=42., + trainable=False) + self.assertIs(categorical_column_a, embedding_column_a.categorical_column) + self.assertIs(categorical_column_b, embedding_column_b.categorical_column) + self.assertEqual(embedding_dimension, embedding_column_a.dimension) + self.assertEqual(embedding_dimension, embedding_column_b.dimension) self.assertEqual('my_combiner', embedding_column_a.combiner) + self.assertEqual('my_combiner', embedding_column_b.combiner) self.assertEqual('shared_embedding_collection_name', embedding_column_a.shared_embedding_collection_name) + self.assertEqual('shared_embedding_collection_name', + embedding_column_b.shared_embedding_collection_name) self.assertEqual('my_ckpt', embedding_column_a.ckpt_to_load_from) + self.assertEqual('my_ckpt', embedding_column_b.ckpt_to_load_from) self.assertEqual('my_ckpt_tensor', embedding_column_a.tensor_name_in_ckpt) + self.assertEqual('my_ckpt_tensor', embedding_column_b.tensor_name_in_ckpt) self.assertEqual(42., embedding_column_a.max_norm) + self.assertEqual(42., embedding_column_b.max_norm) self.assertFalse(embedding_column_a.trainable) + self.assertFalse(embedding_column_b.trainable) self.assertEqual('aaa_shared_embedding', embedding_column_a.name) + self.assertEqual('bbb_shared_embedding', embedding_column_b.name) + self.assertEqual('shared_embedding_collection_name', + embedding_column_a._var_scope_name) + self.assertEqual('shared_embedding_collection_name', + embedding_column_b._var_scope_name) self.assertEqual( (embedding_dimension,), embedding_column_a._variable_shape) + self.assertEqual((embedding_dimension,), + embedding_column_b._variable_shape) self.assertEqual({ 'aaa': parsing_ops.VarLenFeature(dtypes.int64) }, embedding_column_a._parse_example_spec) + self.assertEqual({'bbb': parsing_ops.VarLenFeature(dtypes.int64)}, + embedding_column_b._parse_example_spec) - @test_util.run_deprecated_v1 - def test_invalid_initializer(self): - categorical_column_a = fc._categorical_column_with_identity( - key='aaa', num_buckets=3) - categorical_column_b = fc._categorical_column_with_identity( - key='bbb', num_buckets=3) - with self.assertRaisesRegex(ValueError, 'initializer must be callable'): - fc_new.shared_embedding_columns( + def test_deep_copy(self): + with ops.Graph().as_default(): + categorical_column_a = fc._categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc._categorical_column_with_identity( + key='bbb', num_buckets=3) + embedding_dimension = 2 + original_a, _ = fc_new.shared_embedding_columns( [categorical_column_a, categorical_column_b], - dimension=2, - initializer='not_fn') + dimension=embedding_dimension, + combiner='my_combiner', + initializer=lambda: 'my_initializer', + shared_embedding_collection_name='shared_embedding_collection_name', + ckpt_to_load_from='my_ckpt', + tensor_name_in_ckpt='my_ckpt_tensor', + max_norm=42., + trainable=False) + for embedding_column_a in (original_a, copy.deepcopy(original_a)): + self.assertEqual('aaa', embedding_column_a.categorical_column.name) + self.assertEqual(3, embedding_column_a.categorical_column._num_buckets) + self.assertEqual( + {'aaa': parsing_ops.VarLenFeature(dtypes.int64)}, + embedding_column_a.categorical_column._parse_example_spec) + + self.assertEqual(embedding_dimension, embedding_column_a.dimension) + self.assertEqual('my_combiner', embedding_column_a.combiner) + self.assertEqual('shared_embedding_collection_name', + embedding_column_a.shared_embedding_collection_name) + self.assertEqual('my_ckpt', embedding_column_a.ckpt_to_load_from) + self.assertEqual('my_ckpt_tensor', + embedding_column_a.tensor_name_in_ckpt) + self.assertEqual(42., embedding_column_a.max_norm) + self.assertFalse(embedding_column_a.trainable) + self.assertEqual('aaa_shared_embedding', embedding_column_a.name) + self.assertEqual((embedding_dimension,), + embedding_column_a._variable_shape) + self.assertEqual({'aaa': parsing_ops.VarLenFeature(dtypes.int64)}, + embedding_column_a._parse_example_spec) + + def test_invalid_initializer(self): + with ops.Graph().as_default(): + categorical_column_a = fc._categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc._categorical_column_with_identity( + key='bbb', num_buckets=3) + with self.assertRaisesRegex(ValueError, 'initializer must be callable'): + fc_new.shared_embedding_columns( + [categorical_column_a, categorical_column_b], + dimension=2, + initializer='not_fn') - @test_util.run_deprecated_v1 def test_incompatible_column_type(self): - categorical_column_a = fc._categorical_column_with_identity( - key='aaa', num_buckets=3) - categorical_column_b = fc._categorical_column_with_identity( - key='bbb', num_buckets=3) - categorical_column_c = fc._categorical_column_with_hash_bucket( - key='ccc', hash_bucket_size=3) - with self.assertRaisesRegex( - ValueError, 'all categorical_columns must have the same type.*' - '_IdentityCategoricalColumn.*_HashedCategoricalColumn'): + with ops.Graph().as_default(): + categorical_column_a = fc._categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc._categorical_column_with_identity( + key='bbb', num_buckets=3) + categorical_column_c = fc._categorical_column_with_hash_bucket( + key='ccc', hash_bucket_size=3) + with self.assertRaisesRegex( + ValueError, 'all categorical_columns must have the same type.*' + '_IdentityCategoricalColumn.*_HashedCategoricalColumn'): + fc_new.shared_embedding_columns( + [categorical_column_a, categorical_column_b, categorical_column_c], + dimension=2) + + def test_weighted_categorical_column_ok(self): + with ops.Graph().as_default(): + categorical_column_a = fc._categorical_column_with_identity( + key='aaa', num_buckets=3) + weighted_categorical_column_a = fc._weighted_categorical_column( + categorical_column_a, weight_feature_key='aaa_weights') + categorical_column_b = fc._categorical_column_with_identity( + key='bbb', num_buckets=3) + weighted_categorical_column_b = fc._weighted_categorical_column( + categorical_column_b, weight_feature_key='bbb_weights') fc_new.shared_embedding_columns( - [categorical_column_a, categorical_column_b, categorical_column_c], + [weighted_categorical_column_a, categorical_column_b], dimension=2) + fc_new.shared_embedding_columns( + [categorical_column_a, weighted_categorical_column_b], dimension=2) + fc_new.shared_embedding_columns( + [weighted_categorical_column_a, weighted_categorical_column_b], dimension=2) - @test_util.run_deprecated_v1 - def test_weighted_categorical_column_ok(self): - categorical_column_a = fc._categorical_column_with_identity( - key='aaa', num_buckets=3) - weighted_categorical_column_a = fc._weighted_categorical_column( - categorical_column_a, weight_feature_key='aaa_weights') - categorical_column_b = fc._categorical_column_with_identity( - key='bbb', num_buckets=3) - weighted_categorical_column_b = fc._weighted_categorical_column( - categorical_column_b, weight_feature_key='bbb_weights') - fc_new.shared_embedding_columns( - [weighted_categorical_column_a, categorical_column_b], dimension=2) - fc_new.shared_embedding_columns( - [categorical_column_a, weighted_categorical_column_b], dimension=2) - fc_new.shared_embedding_columns( - [weighted_categorical_column_a, weighted_categorical_column_b], - dimension=2) - - @test_util.run_deprecated_v1 def test_parse_example(self): - a = fc._categorical_column_with_vocabulary_list( - key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) - b = fc._categorical_column_with_vocabulary_list( - key='bbb', vocabulary_list=('omar', 'stringer', 'marlo')) - a_embedded, b_embedded = fc_new.shared_embedding_columns([a, b], - dimension=2) - data = example_pb2.Example(features=feature_pb2.Features( - feature={ - 'aaa': - feature_pb2.Feature(bytes_list=feature_pb2.BytesList( - value=[b'omar', b'stringer'])), - 'bbb': - feature_pb2.Feature(bytes_list=feature_pb2.BytesList( - value=[b'stringer', b'marlo'])), - })) - features = parsing_ops.parse_example( - serialized=[data.SerializeToString()], - features=fc.make_parse_example_spec([a_embedded, b_embedded])) - self.assertIn('aaa', features) - self.assertIn('bbb', features) - with self.cached_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=[[0, 0], [0, 1]], - values=np.array([b'omar', b'stringer'], dtype=np.object_), - dense_shape=[1, 2]), - features['aaa'].eval()) - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=[[0, 0], [0, 1]], - values=np.array([b'stringer', b'marlo'], dtype=np.object_), - dense_shape=[1, 2]), - features['bbb'].eval()) + with ops.Graph().as_default(): + a = fc._categorical_column_with_vocabulary_list( + key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) + b = fc._categorical_column_with_vocabulary_list( + key='bbb', vocabulary_list=('omar', 'stringer', 'marlo')) + a_embedded, b_embedded = fc_new.shared_embedding_columns([a, b], + dimension=2) + data = example_pb2.Example( + features=feature_pb2.Features( + feature={ + 'aaa': + feature_pb2.Feature( + bytes_list=feature_pb2.BytesList( + value=[b'omar', b'stringer'])), + 'bbb': + feature_pb2.Feature( + bytes_list=feature_pb2.BytesList( + value=[b'stringer', b'marlo'])), + })) + features = parsing_ops.parse_example( + serialized=[data.SerializeToString()], + features=fc.make_parse_example_spec([a_embedded, b_embedded])) + self.assertIn('aaa', features) + self.assertIn('bbb', features) + with self.cached_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=[[0, 0], [0, 1]], + values=np.array([b'omar', b'stringer'], dtype=np.object_), + dense_shape=[1, 2]), features['aaa'].eval()) + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=[[0, 0], [0, 1]], + values=np.array([b'stringer', b'marlo'], dtype=np.object_), + dense_shape=[1, 2]), features['bbb'].eval()) - @test_util.run_deprecated_v1 def test_transform_feature(self): - a = fc._categorical_column_with_identity(key='aaa', num_buckets=3) - b = fc._categorical_column_with_identity(key='bbb', num_buckets=3) - a_embedded, b_embedded = fc_new.shared_embedding_columns([a, b], - dimension=2) - features = { - 'aaa': sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 1, 0), - dense_shape=(2, 2)), - 'bbb': sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 1), - dense_shape=(2, 2)), - } - outputs = _transform_features(features, [a, a_embedded, b, b_embedded]) - output_a = outputs[a] - output_a_embedded = outputs[a_embedded] - output_b = outputs[b] - output_b_embedded = outputs[b_embedded] - with _initialized_session(): - _assert_sparse_tensor_value(self, self.evaluate(output_a), - self.evaluate(output_a_embedded)) - _assert_sparse_tensor_value(self, self.evaluate(output_b), - self.evaluate(output_b_embedded)) + with ops.Graph().as_default(): + a = fc._categorical_column_with_identity(key='aaa', num_buckets=3) + b = fc._categorical_column_with_identity(key='bbb', num_buckets=3) + a_embedded, b_embedded = fc_new.shared_embedding_columns([a, b], + dimension=2) + features = { + 'aaa': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2)), + 'bbb': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 1), + dense_shape=(2, 2)), + } + outputs = _transform_features(features, [a, a_embedded, b, b_embedded]) + output_a = outputs[a] + output_a_embedded = outputs[a_embedded] + output_b = outputs[b] + output_b_embedded = outputs[b_embedded] + with _initialized_session(): + _assert_sparse_tensor_value(self, self.evaluate(output_a), + self.evaluate(output_a_embedded)) + _assert_sparse_tensor_value(self, self.evaluate(output_b), + self.evaluate(output_b_embedded)) @parameterized.named_parameters( { @@ -5796,162 +5796,164 @@ class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase): 'use_safe_embedding_lookup': False, 'partition_variables': True, }) - @test_util.run_deprecated_v1 + def test_get_dense_tensor(self, use_safe_embedding_lookup, partition_variables): - # Inputs. - vocabulary_size = 4 - # -1 values are ignored. - input_a = np.array([ - [2, -1, -1], # example 0, ids [2] - [0, 1, -1] - ]) # example 1, ids [0, 1] - input_b = np.array([ - [0, -1, -1], # example 0, ids [0] - [-1, -1, -1] - ]) # example 1, ids [] - input_features = {'aaa': input_a, 'bbb': input_b} + with ops.Graph().as_default(): + # Inputs. + vocabulary_size = 4 + # -1 values are ignored. + input_a = np.array([ + [2, -1, -1], # example 0, ids [2] + [0, 1, -1] + ]) # example 1, ids [0, 1] + input_b = np.array([ + [0, -1, -1], # example 0, ids [0] + [-1, -1, -1] + ]) # example 1, ids [] + input_features = {'aaa': input_a, 'bbb': input_b} - # Embedding variable. - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.), # id 2 - (9., 13.) # id 3 - ) + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.), # id 2 + (9., 13.) # id 3 + ) - def _initializer(shape, dtype, partition_info=None): - if partition_variables: - self.assertEqual([vocabulary_size, embedding_dimension], - partition_info.full_shape) - self.assertAllEqual((2, embedding_dimension), shape) - else: - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) - self.assertIsNone(partition_info) + def _initializer(shape, dtype, partition_info=None): + if partition_variables: + self.assertEqual([vocabulary_size, embedding_dimension], + partition_info.full_shape) + self.assertAllEqual((2, embedding_dimension), shape) + else: + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertIsNone(partition_info) - self.assertEqual(dtypes.float32, dtype) - return embedding_values + self.assertEqual(dtypes.float32, dtype) + return embedding_values - # Expected lookup result, using combiner='mean'. - expected_lookups_a = ( - # example 0: - (7., 11.), # ids [2], embedding = [7, 11] - # example 1: - (2., 3.5), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] - ) - if use_safe_embedding_lookup: - expected_lookups_b = ( + # Expected lookup result, using combiner='mean'. + expected_lookups_a = ( # example 0: - (1., 2.), # ids [0], embedding = [1, 2] + (7., 11.), # ids [2], embedding = [7, 11] # example 1: - (0., 0.), # ids [], embedding = [0, 0] + (2., 3.5), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] ) - else: - expected_lookups_b = ( - # example 0: - (1., 2.), # ids [0], embedding = [1, 2] + if use_safe_embedding_lookup: + expected_lookups_b = ( + # example 0: + (1., 2.), # ids [0], embedding = [1, 2] + # example 1: + (0., 0.), # ids [], embedding = [0, 0] + ) + else: + expected_lookups_b = ( + # example 0: + (1., 2.), # ids [0], embedding = [1, 2] + ) + + # Build columns. + categorical_column_a = fc._categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + categorical_column_b = fc._categorical_column_with_identity( + key='bbb', num_buckets=vocabulary_size) + + partitioner = None + if partition_variables: + partitioner = partitioned_variables.fixed_size_partitioner(2, axis=0) + + with variable_scope.variable_scope('vars', partitioner=partitioner): + embedding_column_a, embedding_column_b = fc_new.shared_embedding_columns( + [categorical_column_a, categorical_column_b], + dimension=embedding_dimension, + initializer=_initializer, + use_safe_embedding_lookup=use_safe_embedding_lookup) + # Provide sparse input and get dense result. + embedding_lookup_a = embedding_column_a._get_dense_tensor( + _LazyBuilder(input_features)) + embedding_lookup_b = embedding_column_b._get_dense_tensor( + _LazyBuilder(input_features)) + # Assert expected embedding variable and lookups. + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + if partition_variables: + self.assertCountEqual(('vars/embedding_weights/part_0:0', + 'vars/embedding_weights/part_1:0'), + tuple([v.name for v in global_vars])) + else: + self.assertCountEqual(('vars/embedding_weights:0',), + tuple([v.name for v in global_vars])) + embedding_var = global_vars[0] + + self.evaluate(variables_lib.global_variables_initializer()) + self.evaluate(lookup_ops.tables_initializer()) + + self.assertAllEqual(embedding_values, self.evaluate(embedding_var)) + self.assertAllEqual(expected_lookups_a, self.evaluate(embedding_lookup_a)) + self.assertAllEqual(expected_lookups_b, self.evaluate(embedding_lookup_b)) + + if use_safe_embedding_lookup: + self.assertIn( + 'SparseFillEmptyRows', + [x.type for x in ops.get_default_graph().get_operations()]) + else: + self.assertNotIn( + 'SparseFillEmptyRows', + [x.type for x in ops.get_default_graph().get_operations()]) + + def test_get_dense_tensor_weight_collections(self): + with ops.Graph().as_default(): + # Inputs. + vocabulary_size = 3 + # -1 values are ignored. + input_a = np.array([ + [2, -1, -1], # example 0, ids [2] + [0, 1, -1] + ]) # example 1, ids [0, 1] + input_b = np.array([ + [0, -1, -1], # example 0, ids [0] + [-1, -1, -1] + ]) # example 1, ids [] + input_features = {'aaa': input_a, 'bbb': input_b} + + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 ) - # Build columns. - categorical_column_a = fc._categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - categorical_column_b = fc._categorical_column_with_identity( - key='bbb', num_buckets=vocabulary_size) + def _initializer(shape, dtype, partition_info): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values - partitioner = None - if partition_variables: - partitioner = partitioned_variables.fixed_size_partitioner(2, axis=0) - - with variable_scope.variable_scope('vars', partitioner=partitioner): + # Build columns. + categorical_column_a = fc._categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + categorical_column_b = fc._categorical_column_with_identity( + key='bbb', num_buckets=vocabulary_size) embedding_column_a, embedding_column_b = fc_new.shared_embedding_columns( [categorical_column_a, categorical_column_b], dimension=embedding_dimension, - initializer=_initializer, - use_safe_embedding_lookup=use_safe_embedding_lookup) - # Provide sparse input and get dense result. - embedding_lookup_a = embedding_column_a._get_dense_tensor( - _LazyBuilder(input_features)) - embedding_lookup_b = embedding_column_b._get_dense_tensor( - _LazyBuilder(input_features)) - # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - if partition_variables: - self.assertCountEqual(('vars/embedding_weights/part_0:0', - 'vars/embedding_weights/part_1:0'), - tuple([v.name for v in global_vars])) - else: - self.assertCountEqual(('vars/embedding_weights:0',), - tuple([v.name for v in global_vars])) - embedding_var = global_vars[0] + initializer=_initializer) - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) + fc.input_layer( + input_features, [embedding_column_a, embedding_column_b], + weight_collections=('my_vars',)) - self.assertAllEqual(embedding_values, self.evaluate(embedding_var)) - self.assertAllEqual(expected_lookups_a, self.evaluate(embedding_lookup_a)) - self.assertAllEqual(expected_lookups_b, self.evaluate(embedding_lookup_b)) - - if use_safe_embedding_lookup: - self.assertIn('SparseFillEmptyRows', - [x.type for x in ops.get_default_graph().get_operations()]) - else: - self.assertNotIn( - 'SparseFillEmptyRows', - [x.type for x in ops.get_default_graph().get_operations()]) - - @test_util.run_deprecated_v1 - def test_get_dense_tensor_weight_collections(self): - # Inputs. - vocabulary_size = 3 - # -1 values are ignored. - input_a = np.array([ - [2, -1, -1], # example 0, ids [2] - [0, 1, -1] - ]) # example 1, ids [0, 1] - input_b = np.array([ - [0, -1, -1], # example 0, ids [0] - [-1, -1, -1] - ]) # example 1, ids [] - input_features = {'aaa': input_a, 'bbb': input_b} - - # Embedding variable. - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.) # id 2 - ) - - def _initializer(shape, dtype, partition_info): - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) - self.assertEqual(dtypes.float32, dtype) - self.assertIsNone(partition_info) - return embedding_values - - # Build columns. - categorical_column_a = fc._categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - categorical_column_b = fc._categorical_column_with_identity( - key='bbb', num_buckets=vocabulary_size) - embedding_column_a, embedding_column_b = fc_new.shared_embedding_columns( - [categorical_column_a, categorical_column_b], - dimension=embedding_dimension, - initializer=_initializer) - - fc.input_layer( - input_features, [embedding_column_a, embedding_column_b], - weight_collections=('my_vars',)) - - # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertCountEqual( - ('input_layer/aaa_bbb_shared_embedding/embedding_weights:0',), - tuple(v.name for v in global_vars)) - my_vars = ops.get_collection('my_vars') - self.assertCountEqual( - ('input_layer/aaa_bbb_shared_embedding/embedding_weights:0',), - tuple(v.name for v in my_vars)) + # Assert expected embedding variable and lookups. + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertCountEqual( + ('input_layer/aaa_bbb_shared_embedding/embedding_weights:0',), + tuple(v.name for v in global_vars)) + my_vars = ops.get_collection('my_vars') + self.assertCountEqual( + ('input_layer/aaa_bbb_shared_embedding/embedding_weights:0',), + tuple(v.name for v in my_vars)) @test_util.run_deprecated_v1 def test_get_dense_tensor_placeholder_inputs(self): @@ -6010,40 +6012,42 @@ class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase): with _initialized_session() as sess: sess.run([embedding_lookup_a, embedding_lookup_b], feed_dict=feed_dict) - @test_util.run_deprecated_v1 def test_linear_model(self): - # Inputs. - batch_size = 2 - vocabulary_size = 3 - # -1 values are ignored. - input_a = np.array( - [[2, -1, -1], # example 0, ids [2] - [0, 1, -1]]) # example 1, ids [0, 1] - input_b = np.array( - [[0, -1, -1], # example 0, ids [0] - [-1, -1, -1]]) # example 1, ids [] - - # Embedding variable. - embedding_dimension = 2 - embedding_shape = (vocabulary_size, embedding_dimension) - zeros_embedding_values = np.zeros(embedding_shape) - def _initializer(shape, dtype, partition_info): - self.assertAllEqual(embedding_shape, shape) - self.assertEqual(dtypes.float32, dtype) - self.assertIsNone(partition_info) - return zeros_embedding_values - - # Build columns. - categorical_column_a = fc._categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - categorical_column_b = fc._categorical_column_with_identity( - key='bbb', num_buckets=vocabulary_size) - embedding_column_a, embedding_column_b = fc_new.shared_embedding_columns( - [categorical_column_a, categorical_column_b], - dimension=embedding_dimension, - initializer=_initializer) - with ops.Graph().as_default(): + # Inputs. + batch_size = 2 + vocabulary_size = 3 + # -1 values are ignored. + input_a = np.array([ + [2, -1, -1], # example 0, ids [2] + [0, 1, -1] + ]) # example 1, ids [0, 1] + input_b = np.array([ + [0, -1, -1], # example 0, ids [0] + [-1, -1, -1] + ]) # example 1, ids [] + + # Embedding variable. + embedding_dimension = 2 + embedding_shape = (vocabulary_size, embedding_dimension) + zeros_embedding_values = np.zeros(embedding_shape) + + def _initializer(shape, dtype, partition_info): + self.assertAllEqual(embedding_shape, shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return zeros_embedding_values + + # Build columns. + categorical_column_a = fc._categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + categorical_column_b = fc._categorical_column_with_identity( + key='bbb', num_buckets=vocabulary_size) + embedding_column_a, embedding_column_b = fc_new.shared_embedding_columns( + [categorical_column_a, categorical_column_b], + dimension=embedding_dimension, + initializer=_initializer) + predictions = fc.linear_model({ categorical_column_a.name: input_a, categorical_column_b.name: input_b, @@ -6101,43 +6105,42 @@ class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase): # = [3*1 + 5*2, 3*0 +5*0] = [13, 0] self.assertAllClose([[94. + 13.], [29.]], self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_keras_linear_model(self): - # Inputs. - batch_size = 2 - vocabulary_size = 3 - # -1 values are ignored. - input_a = np.array([ - [2, -1, -1], # example 0, ids [2] - [0, 1, -1] - ]) # example 1, ids [0, 1] - input_b = np.array([ - [0, -1, -1], # example 0, ids [0] - [-1, -1, -1] - ]) # example 1, ids [] - - # Embedding variable. - embedding_dimension = 2 - embedding_shape = (vocabulary_size, embedding_dimension) - zeros_embedding_values = np.zeros(embedding_shape) - - def _initializer(shape, dtype, partition_info): - self.assertAllEqual(embedding_shape, shape) - self.assertEqual(dtypes.float32, dtype) - self.assertIsNone(partition_info) - return zeros_embedding_values - - # Build columns. - categorical_column_a = fc._categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - categorical_column_b = fc._categorical_column_with_identity( - key='bbb', num_buckets=vocabulary_size) - embedding_column_a, embedding_column_b = fc_new.shared_embedding_columns( - [categorical_column_a, categorical_column_b], - dimension=embedding_dimension, - initializer=_initializer) - with ops.Graph().as_default(): + # Inputs. + batch_size = 2 + vocabulary_size = 3 + # -1 values are ignored. + input_a = np.array([ + [2, -1, -1], # example 0, ids [2] + [0, 1, -1] + ]) # example 1, ids [0, 1] + input_b = np.array([ + [0, -1, -1], # example 0, ids [0] + [-1, -1, -1] + ]) # example 1, ids [] + + # Embedding variable. + embedding_dimension = 2 + embedding_shape = (vocabulary_size, embedding_dimension) + zeros_embedding_values = np.zeros(embedding_shape) + + def _initializer(shape, dtype, partition_info): + self.assertAllEqual(embedding_shape, shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return zeros_embedding_values + + # Build columns. + categorical_column_a = fc._categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + categorical_column_b = fc._categorical_column_with_identity( + key='bbb', num_buckets=vocabulary_size) + embedding_column_a, embedding_column_b = fc_new.shared_embedding_columns( + [categorical_column_a, categorical_column_b], + dimension=embedding_dimension, + initializer=_initializer) + predictions = get_keras_linear_model_predictions({ categorical_column_a.name: input_a, categorical_column_b.name: input_b, @@ -6196,84 +6199,87 @@ class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase): self.assertAllClose([[94. + 13.], [29.]], self.evaluate(predictions)) def _test_input_layer(self, trainable=True): - # Inputs. - vocabulary_size = 3 - sparse_input_a = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 4)), - values=(2, 0, 1), - dense_shape=(2, 5)) - sparse_input_b = sparse_tensor.SparseTensorValue( - # example 0, ids [0] - # example 1, ids [] - indices=((0, 0),), - values=(0,), - dense_shape=(2, 5)) + with ops.Graph().as_default(): + # Inputs. + vocabulary_size = 3 + sparse_input_a = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 4)), + values=(2, 0, 1), + dense_shape=(2, 5)) + sparse_input_b = sparse_tensor.SparseTensorValue( + # example 0, ids [0] + # example 1, ids [] + indices=((0, 0),), + values=(0,), + dense_shape=(2, 5)) - # Embedding variable. - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.) # id 2 - ) - def _initializer(shape, dtype, partition_info): - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) - self.assertEqual(dtypes.float32, dtype) - self.assertIsNone(partition_info) - return embedding_values + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + ) - # Expected lookup result, using combiner='mean'. - expected_lookups = ( - # example 0: - # A ids [2], embedding = [7, 11] - # B ids [0], embedding = [1, 2] - (7., 11., 1., 2.), - # example 1: - # A ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] - # B ids [], embedding = [0, 0] - (2., 3.5, 0., 0.), - ) + def _initializer(shape, dtype, partition_info): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values - # Build columns. - categorical_column_a = fc._categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - categorical_column_b = fc._categorical_column_with_identity( - key='bbb', num_buckets=vocabulary_size) - embedding_column_a, embedding_column_b = fc_new.shared_embedding_columns( - [categorical_column_a, categorical_column_b], - dimension=embedding_dimension, - initializer=_initializer, - trainable=trainable) + # Expected lookup result, using combiner='mean'. + expected_lookups = ( + # example 0: + # A ids [2], embedding = [7, 11] + # B ids [0], embedding = [1, 2] + (7., 11., 1., 2.), + # example 1: + # A ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] + # B ids [], embedding = [0, 0] + (2., 3.5, 0., 0.), + ) - # Provide sparse input and get dense result. - input_layer = fc.input_layer( - features={'aaa': sparse_input_a, 'bbb': sparse_input_b}, - feature_columns=(embedding_column_b, embedding_column_a)) + # Build columns. + categorical_column_a = fc._categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + categorical_column_b = fc._categorical_column_with_identity( + key='bbb', num_buckets=vocabulary_size) + embedding_column_a, embedding_column_b = fc_new.shared_embedding_columns( + [categorical_column_a, categorical_column_b], + dimension=embedding_dimension, + initializer=_initializer, + trainable=trainable) - # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertCountEqual( - ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'], - tuple([v.name for v in global_vars])) - trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) - if trainable: + # Provide sparse input and get dense result. + input_layer = fc.input_layer( + features={ + 'aaa': sparse_input_a, + 'bbb': sparse_input_b + }, + feature_columns=(embedding_column_b, embedding_column_a)) + + # Assert expected embedding variable and lookups. + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertCountEqual( ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'], - tuple([v.name for v in trainable_vars])) - else: - self.assertCountEqual([], tuple([v.name for v in trainable_vars])) - shared_embedding_vars = global_vars - with _initialized_session(): - self.assertAllEqual(embedding_values, shared_embedding_vars[0]) - self.assertAllEqual(expected_lookups, self.evaluate(input_layer)) + tuple([v.name for v in global_vars])) + trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + if trainable: + self.assertCountEqual( + ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'], + tuple([v.name for v in trainable_vars])) + else: + self.assertCountEqual([], tuple([v.name for v in trainable_vars])) + shared_embedding_vars = global_vars + with _initialized_session(): + self.assertAllEqual(embedding_values, shared_embedding_vars[0]) + self.assertAllEqual(expected_lookups, self.evaluate(input_layer)) - @test_util.run_deprecated_v1 def test_input_layer(self): self._test_input_layer() - @test_util.run_deprecated_v1 def test_input_layer_no_trainable(self): self._test_input_layer(trainable=False) From 9ce5ad0f0a8ddc901b74b590473ff52480132df6 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Tue, 4 Aug 2020 09:46:49 -0700 Subject: [PATCH 2080/2522] Sampler resolve moved to TransformToCLCode. Removed DeviceInfo from many generation functions. PiperOrigin-RevId: 324830359 Change-Id: Ia640efdb558bfaa0e1bb854f1a048cea385690ec --- tensorflow/lite/delegates/gpu/cl/arguments.cc | 28 +++++++++++++++++ .../gpu/cl/kernels/conv_constants.cc | 10 +++---- .../delegates/gpu/cl/kernels/conv_constants.h | 8 +++-- .../delegates/gpu/cl/kernels/conv_powervr.cc | 2 +- .../delegates/gpu/cl/kernels/conv_texture.cc | 11 ++++--- .../delegates/gpu/cl/kernels/conv_texture.h | 3 +- .../delegates/gpu/cl/kernels/converter.cc | 2 -- .../gpu/cl/kernels/convolution_transposed.cc | 24 +++++++-------- .../gpu/cl/kernels/convolution_transposed.h | 4 +-- .../cl/kernels/convolution_transposed_3d.cc | 18 ++++++----- .../cl/kernels/convolution_transposed_3d.h | 3 +- .../convolution_transposed_3x3_thin.cc | 13 ++++---- .../kernels/convolution_transposed_3x3_thin.h | 6 ++-- .../gpu/cl/kernels/depthwise_conv.cc | 26 +++++++--------- .../delegates/gpu/cl/kernels/depthwise_conv.h | 7 ++--- .../gpu/cl/kernels/depthwise_conv_3x3.cc | 10 +++---- .../gpu/cl/kernels/depthwise_conv_3x3.h | 1 - .../delegates/gpu/cl/kernels/max_unpooling.cc | 26 +++++++--------- .../delegates/gpu/cl/kernels/max_unpooling.h | 15 ++++------ .../gpu/cl/kernels/max_unpooling_test.cc | 3 +- .../lite/delegates/gpu/cl/kernels/pooling.cc | 30 ++++++++----------- .../lite/delegates/gpu/cl/kernels/pooling.h | 17 ++++------- .../delegates/gpu/cl/kernels/pooling_test.cc | 12 +++----- .../special/depthwise_conv_plus_1x1_conv.cc | 14 ++++----- .../special/depthwise_conv_plus_1x1_conv.h | 5 ++-- .../lite/delegates/gpu/cl/kernels/util.cc | 11 ------- .../lite/delegates/gpu/cl/kernels/util.h | 14 --------- .../gpu/cl/selectors/operation_selector.cc | 5 ++-- .../gpu/cl/selectors/simple_selectors.cc | 6 ++-- .../gpu/cl/selectors/simple_selectors.h | 2 -- 30 files changed, 147 insertions(+), 189 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/arguments.cc b/tensorflow/lite/delegates/gpu/cl/arguments.cc index ed72bcc7c97..8db58e5e81b 100644 --- a/tensorflow/lite/delegates/gpu/cl/arguments.cc +++ b/tensorflow/lite/delegates/gpu/cl/arguments.cc @@ -145,6 +145,33 @@ std::string GetImageModifier(AccessType access) { } } +std::string GetDefaultSamplers(const DeviceInfo& device_info) { + std::string result; + result += + "__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | " + "CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n"; + if (device_info.IsAdreno3xx()) { + // Unfortunately, CLK_ADDRESS_CLAMP is very slow on Adreno3xx and + // we can observe huge register overhead when compared to other modes. + + // While using CLK_ADDRESS_NONE with out-of-range image coordinates is + // undefined in the OpenCL specification, we have observed that + // CLK_ADDRESS_NONE works like CLK_ADDRESS_CLAMP for out-of-range image + // coordinates for RGBA F16/F32 textures on Adreno3xx devices. Using + // CLK_ADDRESS_NONE is significantly faster than CLK_ADDRESS_CLAMP on Adreno + // 3xx. + result += + "__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | " + "CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n"; + } else { + result += + "__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | " + "CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n"; + } + + return result; +} + } // namespace // Static @@ -483,6 +510,7 @@ absl::Status Arguments::TransformToCLCode( RETURN_IF_ERROR(ResolveSelectorsPass(linkables, code)); ResolveArgsPass(device_info, code); *code = absl::Substitute(*code, GetListOfArgs()); + *code = GetDefaultSamplers(device_info) + *code; return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc index ed1ec8be7b1..d5a2a56c19c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc @@ -59,9 +59,9 @@ ConvConstants::ConvConstants(const OperationDef& definition, dst_channels_(attr.weights.shape.o) { const bool stride_correction = definition_.IsBatchSupported() && stride_.x != 1; - code_ = GenerateConvolutionConstantCode(definition_, kernel_size_, - src_channels_, dst_channels_, - stride_correction, device_info); + code_ = + GenerateConvolutionConstantCode(definition_, kernel_size_, src_channels_, + dst_channels_, stride_correction); if (definition_.precision == CalculationsPrecision::F16 && device_info.IsAdreno3xx()) { compiler_options_.push_back(CompilerOptions::ADRENO_FULL_SIMD_LINE); @@ -97,9 +97,9 @@ ConvConstants& ConvConstants::operator=(ConvConstants&& kernel) { std::string ConvConstants::GenerateConvolutionConstantCode( const OperationDef& op_def, const int2& kernel_size, int src_channels, - int dst_channels, bool stride_correction, const DeviceInfo& device_info) { + int dst_channels, bool stride_correction) { auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(GetFastestZeroMode(device_info)); + src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); if (op_def.IsBatchSupported()) { src_desc.SetStateVar("BatchedWidth", "true"); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h index 877f32bdf4c..6504b828158 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h @@ -60,9 +60,11 @@ class ConvConstants : public GPUOperation { void RearrangeWeightsData(const tflite::gpu::Tensor& weights, absl::Span dst); - std::string GenerateConvolutionConstantCode( - const OperationDef& op_def, const int2& kernel_size, int src_channels, - int dst_channels, bool stride_correction, const DeviceInfo& device_info); + std::string GenerateConvolutionConstantCode(const OperationDef& op_def, + const int2& kernel_size, + int src_channels, + int dst_channels, + bool stride_correction); int2 kernel_size_; int2 stride_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc index 40060007b4e..c4e26725f74 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc @@ -269,7 +269,7 @@ std::string ConvPowerVR::GenerateConv(const DeviceInfo& device_info, bool stride_correction, const ConvParams& conv_params) { auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(GetFastestZeroMode(device_info)); + src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); if (op_def.IsBatchSupported()) { src_desc.SetStateVar("BatchedWidth", "true"); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc index 6f7ebf2b64b..88035556c86 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc @@ -95,10 +95,9 @@ std::string ConvTexture::GenerateConvCode(const OperationDef& op_def, const int3& block_size, bool is1x1, bool adreno4xx_optimization, bool stride_correction, - bool different_weights_for_height, - const DeviceInfo& device_info) { + bool different_weights_for_height) { auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(GetFastestZeroMode(device_info)); + src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); if (op_def.IsBatchSupported()) { src_desc.SetStateVar("BatchedWidth", "true"); } @@ -390,9 +389,9 @@ void ConvTexture::GenerateCode(const DeviceInfo& device_info) { definition_.precision == CalculationsPrecision::F16; const bool stride_correction = definition_.IsBatchSupported() && stride_.x != 1; - code_ = GenerateConvCode(definition_, block_size_, is1x1, - adreno4xx_optimization, stride_correction, - different_weights_for_height_, device_info); + code_ = + GenerateConvCode(definition_, block_size_, is1x1, adreno4xx_optimization, + stride_correction, different_weights_for_height_); if (UseFP16SIMD(device_info, definition_.precision, is1x1)) { compiler_options_.push_back(CompilerOptions::ADRENO_FULL_SIMD_LINE); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h index 9d50f0291da..10efc23a044 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h @@ -94,8 +94,7 @@ class ConvTexture : public GPUOperation { const int3& block_size, bool is1x1, bool adreno4xx_optimization, bool stride_correction, - bool different_weights_for_height, - const DeviceInfo& device_info); + bool different_weights_for_height); int2 kernel_size_; int2 stride_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc b/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc index 69873aa9922..bd5aaed8bc3 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc @@ -136,8 +136,6 @@ class FromTensorConverter : public OpenClConverterImpl { R"( #pragma OPENCL EXTENSION cl_khr_fp16 : enable -const sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; - __kernel void from_tensor()" + params_kernel.first + R"(, $0) { int linear_id = get_global_id(0); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc index ecd2fcbc6e1..a139b3affc9 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc @@ -31,17 +31,16 @@ namespace cl { ConvolutionTransposed::ConvolutionTransposed( const OperationDef& definition, const ConvolutionTransposedAttributes& attr, - const CLDevice& device) + const DeviceInfo& device_info) : GPUOperation(definition), - weights_are_buffer_(device.IsMali()), + weights_are_buffer_(device_info.IsMali()), kernel_size_(attr.weights.shape.w, attr.weights.shape.h), stride_(attr.stride.w, attr.stride.h), padding_(attr.padding.prepended.w, attr.padding.prepended.h), block_size_(2, 2, 2) { const bool is_f16 = definition.precision == CalculationsPrecision::F16; - if (device.IsMali()) { - MaliInfo mali_info = device.GetInfo().mali_info; - if (mali_info.IsMidgard()) { + if (device_info.IsMali()) { + if (device_info.mali_info.IsMidgard()) { block_size_ = is_f16 ? int3(2, 1, 2) : int3(2, 1, 1); } else { block_size_ = is_f16 ? int3(2, 2, 2) : int3(2, 2, 1); @@ -49,13 +48,13 @@ ConvolutionTransposed::ConvolutionTransposed( } const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); if (dst_depth == 1 || dst_depth == 3) { - if (!device.IsMali()) { + if (!device_info.IsMali()) { block_size_.y *= block_size_.z; } block_size_.z = 1; } - code_ = GenerateConvolutionTransposedCode(definition_, device, + code_ = GenerateConvolutionTransposedCode(definition_, device_info, weights_are_buffer_, block_size_); } @@ -81,10 +80,10 @@ ConvolutionTransposed& ConvolutionTransposed::operator=( } std::string ConvolutionTransposed::GenerateConvolutionTransposedCode( - const OperationDef& op_def, const CLDevice& device, bool weights_are_buffer, - const int3& block_size) { + const OperationDef& op_def, const DeviceInfo& device_info, + bool weights_are_buffer, const int3& block_size) { auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(GetFastestZeroMode(device)); + src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); AddSrcTensor("src_tensor", src_desc); AddDstTensor("dst_tensor", op_def.dst_tensors[0]); @@ -256,7 +255,7 @@ std::string ConvolutionTransposed::GenerateConvolutionTransposedCode( c += " int x_c = kernel_index * args.src_tensor.Slices();\n"; } c += " for (int s = 0; s < args.src_tensor.Slices(); ++s) {\n"; - const bool conditional_read = device.IsMali(); + const bool conditional_read = device_info.IsMali(); for (int y = 0; y < block_size.y; ++y) { const std::string yindex = std::to_string(y); for (int x = 0; x < block_size.x; ++x) { @@ -361,7 +360,8 @@ absl::Status CreateConvolutionTransposed( const CreationContext& creation_context, const OperationDef& definition, const ConvolutionTransposedAttributes& attr, ConvolutionTransposed* result) { - *result = ConvolutionTransposed(definition, attr, *creation_context.device); + *result = ConvolutionTransposed(definition, attr, + creation_context.device->GetInfo()); RETURN_IF_ERROR( result->UploadWeights(attr.weights, creation_context.context)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h index 2263e7d2e4f..44e1c942925 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h @@ -55,7 +55,7 @@ class ConvolutionTransposed : public GPUOperation { ConvolutionTransposed* result); explicit ConvolutionTransposed(const OperationDef& definition, const ConvolutionTransposedAttributes& attr, - const CLDevice& device); + const DeviceInfo& device_info); template absl::Status UploadWeights(const tflite::gpu::Tensor& weights, CLContext* context); @@ -65,7 +65,7 @@ class ConvolutionTransposed : public GPUOperation { absl::Span dst); std::string GenerateConvolutionTransposedCode(const OperationDef& op_def, - const CLDevice& device, + const DeviceInfo& device_info, bool weights_are_buffer, const int3& block_size); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc index 5385c09eb0f..eeb3ae15e51 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc @@ -30,18 +30,19 @@ namespace cl { ConvolutionTransposed3D::ConvolutionTransposed3D( const OperationDef& definition, - const ConvolutionTransposed3DAttributes& attr, const CLDevice& device) + const ConvolutionTransposed3DAttributes& attr, + const DeviceInfo& device_info) : GPUOperation(definition), - weights_are_buffer_(device.IsMali()), + weights_are_buffer_(device_info.IsMali()), kernel_size_(attr.weights.shape.w, attr.weights.shape.h, attr.weights.shape.d), stride_(attr.stride.w, attr.stride.h, attr.stride.d), padding_(attr.padding.prepended.w, attr.padding.prepended.h, attr.padding.prepended.d), block_size_(2, 2, 1, 2) { - code_ = GenerateConvolutionTransposed3DCode(definition_, device, - weights_are_buffer_, block_size_); - if (device.IsPowerVR() && block_size_.y != 1) { + code_ = GenerateConvolutionTransposed3DCode(definition_, weights_are_buffer_, + block_size_); + if (device_info.IsPowerVR() && block_size_.y != 1) { bool is_texture3d = definition_.src_tensors[0].storage_type == TensorStorageType::TEXTURE_3D; bool is_texture_array = definition_.src_tensors[0].storage_type == @@ -75,10 +76,10 @@ ConvolutionTransposed3D& ConvolutionTransposed3D::operator=( } std::string ConvolutionTransposed3D::GenerateConvolutionTransposed3DCode( - const OperationDef& op_def, const CLDevice& device, bool weights_are_buffer, + const OperationDef& op_def, bool weights_are_buffer, const int4& block_size) { auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(GetFastestZeroMode(device)); + src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); AddSrcTensor("src_tensor", src_desc); AddDstTensor("dst_tensor", op_def.dst_tensors[0]); @@ -402,7 +403,8 @@ absl::Status CreateConvolutionTransposed3D( const CreationContext& creation_context, const OperationDef& definition, const ConvolutionTransposed3DAttributes& attr, ConvolutionTransposed3D* result) { - *result = ConvolutionTransposed3D(definition, attr, *creation_context.device); + *result = ConvolutionTransposed3D(definition, attr, + creation_context.device->GetInfo()); RETURN_IF_ERROR( result->UploadWeights(attr.weights, creation_context.context)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h index b8b4aa75df2..0025d9da7b6 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h @@ -55,7 +55,7 @@ class ConvolutionTransposed3D : public GPUOperation { ConvolutionTransposed3D* result); ConvolutionTransposed3D(const OperationDef& definition, const ConvolutionTransposed3DAttributes& attr, - const CLDevice& device); + const DeviceInfo& device_info); template absl::Status UploadWeights(const tflite::gpu::Tensor& weights, CLContext* context); @@ -65,7 +65,6 @@ class ConvolutionTransposed3D : public GPUOperation { absl::Span dst); std::string GenerateConvolutionTransposed3DCode(const OperationDef& op_def, - const CLDevice& device, bool weights_are_buffer, const int4& block_size); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc index 8f8282781df..4fb93dd3263 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc @@ -27,12 +27,11 @@ namespace gpu { namespace cl { ConvolutionTransposed3x3Thin::ConvolutionTransposed3x3Thin( - const OperationDef& definition, const ConvolutionTransposedAttributes& attr, - const DeviceInfo& device_info) + const OperationDef& definition, const ConvolutionTransposedAttributes& attr) : GPUOperation(definition) { code_ = GenerateConvolutionTransposedCode( definition_, DivideRoundUp(attr.weights.shape.i, 4), - DivideRoundUp(attr.weights.shape.o, 4), device_info); + DivideRoundUp(attr.weights.shape.o, 4)); } ConvolutionTransposed3x3Thin::ConvolutionTransposed3x3Thin( @@ -48,10 +47,9 @@ ConvolutionTransposed3x3Thin& ConvolutionTransposed3x3Thin::operator=( } std::string ConvolutionTransposed3x3Thin::GenerateConvolutionTransposedCode( - const OperationDef& op_def, int src_depth, int dst_depth, - const DeviceInfo& device_info) { + const OperationDef& op_def, int src_depth, int dst_depth) { auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(GetFastestZeroMode(device_info)); + src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); AddSrcTensor("src_tensor", src_desc); AddDstTensor("dst_tensor", op_def.dst_tensors[0]); @@ -208,8 +206,7 @@ absl::Status CreateConvolutionTransposed3x3Thin( return absl::InvalidArgumentError( "ConvolutionTransposed3x3Thin doesn't support this attributes"); } - *result = ConvolutionTransposed3x3Thin(definition, attr, - creation_context.device->GetInfo()); + *result = ConvolutionTransposed3x3Thin(definition, attr); RETURN_IF_ERROR( result->UploadData(attr.weights, attr.bias, creation_context.context)); return absl::OkStatus(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h index 274d75cb167..5b4c4d05bac 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h @@ -54,8 +54,7 @@ class ConvolutionTransposed3x3Thin : public GPUOperation { ConvolutionTransposed3x3Thin* result); explicit ConvolutionTransposed3x3Thin( const OperationDef& definition, - const ConvolutionTransposedAttributes& attr, - const DeviceInfo& device_info); + const ConvolutionTransposedAttributes& attr); template absl::Status UploadData(const tflite::gpu::Tensor& weights, const tflite::gpu::Tensor& biases, @@ -66,8 +65,7 @@ class ConvolutionTransposed3x3Thin : public GPUOperation { absl::Span dst); std::string GenerateConvolutionTransposedCode(const OperationDef& op_def, - int src_depth, int dst_depth, - const DeviceInfo& device_info); + int src_depth, int dst_depth); }; template diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc index 3ab05134bd6..4b4416751fb 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc @@ -70,8 +70,7 @@ std::string GetSrcValue(int channel_multiplier, const std::string coords) { DepthwiseConvolution::DepthwiseConvolution( const OperationDef& definition, - const DepthwiseConvolution2DAttributes& attr, bool weights_are_buffer, - const DeviceInfo& device_info) + const DepthwiseConvolution2DAttributes& attr, bool weights_are_buffer) : GPUOperation(definition), weights_are_buffer_(weights_are_buffer), kernel_size_(attr.weights.shape.w, attr.weights.shape.h, 0, 0), @@ -82,15 +81,13 @@ DepthwiseConvolution::DepthwiseConvolution( work_group_size_ = int3(8, 8, 1); const bool stride_correction = definition_.IsBatchSupported() && stride_.x != 1; - code_ = GenerateDepthwiseConvolutionCode(definition_, stride_correction, - channel_multiplier_, - weights_are_buffer_, device_info); + code_ = GenerateDepthwiseConvolutionCode( + definition_, stride_correction, channel_multiplier_, weights_are_buffer_); } DepthwiseConvolution::DepthwiseConvolution( const OperationDef& definition, - const DepthwiseConvolution3DAttributes& attr, bool weights_are_buffer, - const DeviceInfo& device_info) + const DepthwiseConvolution3DAttributes& attr, bool weights_are_buffer) : GPUOperation(definition), weights_are_buffer_(weights_are_buffer), kernel_size_(attr.weights.shape.w, attr.weights.shape.h, @@ -103,9 +100,8 @@ DepthwiseConvolution::DepthwiseConvolution( work_group_size_ = int3(8, 8, 1); const bool stride_correction = definition_.IsBatchSupported() && stride_.x != 1; - code_ = GenerateDepthwiseConvolutionCode(definition_, stride_correction, - channel_multiplier_, - weights_are_buffer_, device_info); + code_ = GenerateDepthwiseConvolutionCode( + definition_, stride_correction, channel_multiplier_, weights_are_buffer_); } DepthwiseConvolution::DepthwiseConvolution(DepthwiseConvolution&& operation) @@ -133,9 +129,9 @@ DepthwiseConvolution& DepthwiseConvolution::operator=( std::string DepthwiseConvolution::GenerateDepthwiseConvolutionCode( const OperationDef& op_def, bool stride_correction, int channel_multiplier, - bool weights_are_buffer, const DeviceInfo& device_info) { + bool weights_are_buffer) { auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(GetFastestZeroMode(device_info)); + src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); if (op_def.IsBatchSupported()) { src_desc.SetStateVar("BatchedWidth", "true"); } @@ -315,8 +311,7 @@ absl::Status CreateDepthwiseConvolution( const DepthwiseConvolution2DAttributes& attr, DepthwiseConvolution* result) { bool weights_are_buffer = creation_context.device->IsMali(); - *result = DepthwiseConvolution(definition, attr, weights_are_buffer, - creation_context.device->GetInfo()); + *result = DepthwiseConvolution(definition, attr, weights_are_buffer); RETURN_IF_ERROR( result->UploadWeights(attr.weights, creation_context.context)); @@ -339,8 +334,7 @@ absl::Status CreateDepthwiseConvolution( const DepthwiseConvolution3DAttributes& attr, DepthwiseConvolution* result) { bool weights_are_buffer = creation_context.device->IsMali(); - *result = DepthwiseConvolution(definition, attr, weights_are_buffer, - creation_context.device->GetInfo()); + *result = DepthwiseConvolution(definition, attr, weights_are_buffer); RETURN_IF_ERROR( result->UploadWeights(attr.weights, creation_context.context)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h index be87c182880..9a841db82ab 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h @@ -58,10 +58,10 @@ class DepthwiseConvolution : public GPUOperation { DepthwiseConvolution* result); DepthwiseConvolution(const OperationDef& definition, const DepthwiseConvolution2DAttributes& attr, - bool weights_are_buffer, const DeviceInfo& device_info); + bool weights_are_buffer); DepthwiseConvolution(const OperationDef& definition, const DepthwiseConvolution3DAttributes& attr, - bool weights_are_buffer, const DeviceInfo& device_info); + bool weights_are_buffer); template absl::Status UploadWeights(const tflite::gpu::Tensor& weights, @@ -82,8 +82,7 @@ class DepthwiseConvolution : public GPUOperation { std::string GenerateDepthwiseConvolutionCode(const OperationDef& op_def, bool stride_correction, int channel_multiplier, - bool weights_are_buffer, - const DeviceInfo& device_info); + bool weights_are_buffer); bool weights_are_buffer_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc index 0bd84c3b116..e171231fc0a 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc @@ -35,8 +35,8 @@ DepthwiseConv3x3::DepthwiseConv3x3(const OperationDef& definition, weights_are_buffer_(weights_are_buffer), local_mem_uploads_(local_mem_uploads) { work_group_size_ = int3(8, 4, 1); - code_ = GenerateDepthwiseConvCode(definition_, device_info, - weights_are_buffer_, local_mem_uploads_); + code_ = GenerateDepthwiseConvCode(definition_, weights_are_buffer_, + local_mem_uploads_); if (definition_.precision == CalculationsPrecision::F16 && device_info.IsPowerVR()) { @@ -59,10 +59,10 @@ DepthwiseConv3x3& DepthwiseConv3x3::operator=(DepthwiseConv3x3&& operation) { } std::string DepthwiseConv3x3::GenerateDepthwiseConvCode( - const OperationDef& op_def, const DeviceInfo& device_info, - bool weights_are_buffer, bool local_mem_uploads) { + const OperationDef& op_def, bool weights_are_buffer, + bool local_mem_uploads) { auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(GetFastestZeroMode(device_info)); + src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); AddSrcTensor("src_tensor", src_desc); AddDstTensor("dst_tensor", op_def.dst_tensors[0]); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h index d02d65b4e38..dedc9b530bb 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h @@ -67,7 +67,6 @@ class DepthwiseConv3x3 : public GPUOperation { const tflite::gpu::Tensor& biases, absl::Span dst); std::string GenerateDepthwiseConvCode(const OperationDef& op_def, - const DeviceInfo& device_info, bool weights_are_buffer, bool local_mem_uploads); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc index 7be6cc0b9b4..97ee4878572 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc @@ -25,24 +25,22 @@ namespace gpu { namespace cl { MaxUnpooling::MaxUnpooling(const OperationDef& definition, - const MaxUnpooling2DAttributes& attr, - const DeviceInfo& device_info) + const MaxUnpooling2DAttributes& attr) : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, 0, 0), padding_(attr.padding.appended.w, attr.padding.appended.h, 0, 0), kernel_size_(attr.kernel.w, attr.kernel.h, 0, 0) { - code_ = GetMaxUnpoolingKernelCode(definition_, device_info); + code_ = GetMaxUnpoolingKernelCode(definition_); } MaxUnpooling::MaxUnpooling(const OperationDef& definition, - const MaxUnpooling3DAttributes& attr, - const DeviceInfo& device_info) + const MaxUnpooling3DAttributes& attr) : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, attr.strides.d, 0), padding_(attr.padding.appended.w, attr.padding.appended.h, attr.padding.appended.d, 0), kernel_size_(attr.kernel.w, attr.kernel.h, attr.kernel.d, 0) { - code_ = GetMaxUnpoolingKernelCode(definition_, device_info); + code_ = GetMaxUnpoolingKernelCode(definition_); } MaxUnpooling::MaxUnpooling(MaxUnpooling&& kernel) @@ -62,15 +60,15 @@ MaxUnpooling& MaxUnpooling::operator=(MaxUnpooling&& kernel) { } std::string MaxUnpooling::GetMaxUnpoolingKernelCode( - const OperationDef& op_def, const DeviceInfo& device_info) { + const OperationDef& op_def) { auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(GetFastestZeroMode(device_info)); + src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); if (op_def.IsBatchSupported()) { src_desc.SetStateVar("BatchedWidth", "true"); } AddSrcTensor("src_tensor", src_desc); auto src_ind_desc = op_def.src_tensors[1]; - src_ind_desc.SetTextureAddressMode(GetFastestZeroMode(device_info)); + src_ind_desc.SetTextureAddressMode(TextureAddressMode::ZERO); if (op_def.IsBatchSupported()) { src_ind_desc.SetStateVar("BatchedWidth", "true"); } @@ -202,15 +200,13 @@ int3 MaxUnpooling::GetGridSize() const { } MaxUnpooling CreateMaxUnpooling(const OperationDef& definition, - const MaxUnpooling2DAttributes& attr, - const DeviceInfo& device_info) { - return MaxUnpooling(definition, attr, device_info); + const MaxUnpooling2DAttributes& attr) { + return MaxUnpooling(definition, attr); } MaxUnpooling CreateMaxUnpooling(const OperationDef& definition, - const MaxUnpooling3DAttributes& attr, - const DeviceInfo& device_info) { - return MaxUnpooling(definition, attr, device_info); + const MaxUnpooling3DAttributes& attr) { + return MaxUnpooling(definition, attr); } } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h index da4b0e28cec..0b1420a67c9 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h @@ -28,11 +28,9 @@ namespace cl { class MaxUnpooling : public GPUOperation { public: MaxUnpooling(const OperationDef& definition, - const MaxUnpooling2DAttributes& attr, - const DeviceInfo& device_info); + const MaxUnpooling2DAttributes& attr); MaxUnpooling(const OperationDef& definition, - const MaxUnpooling3DAttributes& attr, - const DeviceInfo& device_info); + const MaxUnpooling3DAttributes& attr); absl::Status BindArguments() override; int3 GetGridSize() const override; @@ -44,8 +42,7 @@ class MaxUnpooling : public GPUOperation { MaxUnpooling& operator=(const MaxUnpooling&) = delete; private: - std::string GetMaxUnpoolingKernelCode(const OperationDef& op_def, - const DeviceInfo& device_info); + std::string GetMaxUnpoolingKernelCode(const OperationDef& op_def); int4 stride_; int4 padding_; @@ -53,12 +50,10 @@ class MaxUnpooling : public GPUOperation { }; MaxUnpooling CreateMaxUnpooling(const OperationDef& definition, - const MaxUnpooling2DAttributes& attr, - const DeviceInfo& device_info); + const MaxUnpooling2DAttributes& attr); MaxUnpooling CreateMaxUnpooling(const OperationDef& definition, - const MaxUnpooling3DAttributes& attr, - const DeviceInfo& device_info); + const MaxUnpooling3DAttributes& attr); } // namespace cl } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling_test.cc index 77e92c8950b..c03cb4f89d7 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling_test.cc @@ -55,8 +55,7 @@ TEST_F(OpenCLOperationTest, MaxUnpooling) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - MaxUnpooling operation = - CreateMaxUnpooling(op_def, attr, env_.GetDevicePtr()->GetInfo()); + MaxUnpooling operation = CreateMaxUnpooling(op_def, attr); ASSERT_OK(ExecuteGPUOperation({src_tensor, src_ind_tensor}, creation_context_, &operation, BHWC(1, 4, 4, 1), &dst_tensor)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc index 0c5a7a64d15..fb077fe4a1a 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc @@ -25,18 +25,18 @@ namespace gpu { namespace cl { Pooling::Pooling(const OperationDef& definition, - const Pooling2DAttributes& attr, const DeviceInfo& device_info) + const Pooling2DAttributes& attr) : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, 0, 0), padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, 0, 0), kernel_size_(attr.kernel.w, attr.kernel.h, 0, 0), type_(attr.type), output_indices_(attr.output_indices) { - GenerateCode(device_info); + GenerateCode(); } Pooling::Pooling(const OperationDef& definition, - const Pooling3DAttributes& attr, const DeviceInfo& device_info) + const Pooling3DAttributes& attr) : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, attr.strides.d, 0), padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, @@ -44,7 +44,7 @@ Pooling::Pooling(const OperationDef& definition, kernel_size_(attr.kernel.w, attr.kernel.h, attr.kernel.d, 0), type_(attr.type), output_indices_(attr.output_indices) { - GenerateCode(device_info); + GenerateCode(); } Pooling::Pooling(Pooling&& kernel) @@ -67,11 +67,10 @@ Pooling& Pooling::operator=(Pooling&& kernel) { return *this; } -std::string Pooling::GetAveragePoolingKernelCode( - const OperationDef& op_def, bool stride_correction, - const DeviceInfo& device_info) { +std::string Pooling::GetAveragePoolingKernelCode(const OperationDef& op_def, + bool stride_correction) { auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(GetFastestZeroMode(device_info)); + src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); if (op_def.IsBatchSupported()) { src_desc.SetStateVar("BatchedWidth", "true"); } @@ -348,12 +347,11 @@ std::string Pooling::GetMaxPoolingKernelCode(const OperationDef& op_def, return c; } -void Pooling::GenerateCode(const DeviceInfo& device_info) { +void Pooling::GenerateCode() { const bool stride_correction = definition_.IsBatchSupported() && stride_.x != 1; if (type_ == PoolingType::AVERAGE) { - code_ = GetAveragePoolingKernelCode(definition_, stride_correction, - device_info); + code_ = GetAveragePoolingKernelCode(definition_, stride_correction); } else if (type_ == PoolingType::MAX) { code_ = GetMaxPoolingKernelCode(definition_, stride_correction, output_indices_); @@ -387,15 +385,13 @@ int3 Pooling::GetGridSize() const { } Pooling CreatePooling(const OperationDef& definition, - const Pooling2DAttributes& attr, - const DeviceInfo& device_info) { - return Pooling(definition, attr, device_info); + const Pooling2DAttributes& attr) { + return Pooling(definition, attr); } Pooling CreatePooling(const OperationDef& definition, - const Pooling3DAttributes& attr, - const DeviceInfo& device_info) { - return Pooling(definition, attr, device_info); + const Pooling3DAttributes& attr) { + return Pooling(definition, attr); } } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h index 07c3c6d85da..18bb426f259 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h @@ -29,10 +29,8 @@ namespace cl { class Pooling : public GPUOperation { public: - Pooling(const OperationDef& definition, const Pooling2DAttributes& attr, - const DeviceInfo& device_info); - Pooling(const OperationDef& definition, const Pooling3DAttributes& attr, - const DeviceInfo& device_info); + Pooling(const OperationDef& definition, const Pooling2DAttributes& attr); + Pooling(const OperationDef& definition, const Pooling3DAttributes& attr); absl::Status BindArguments() override; int3 GetGridSize() const override; @@ -45,13 +43,12 @@ class Pooling : public GPUOperation { private: std::string GetAveragePoolingKernelCode(const OperationDef& op_def, - bool stride_correction, - const DeviceInfo& device_info); + bool stride_correction); std::string GetMaxPoolingKernelCode(const OperationDef& op_def, bool stride_correction, bool output_indices); - void GenerateCode(const DeviceInfo& device_info); + void GenerateCode(); int4 stride_; int4 padding_; @@ -62,12 +59,10 @@ class Pooling : public GPUOperation { }; Pooling CreatePooling(const OperationDef& definition, - const Pooling2DAttributes& attr, - const DeviceInfo& device_info); + const Pooling2DAttributes& attr); Pooling CreatePooling(const OperationDef& definition, - const Pooling3DAttributes& attr, - const DeviceInfo& device_info); + const Pooling3DAttributes& attr); } // namespace cl } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/pooling_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/pooling_test.cc index 7ebcc4871c5..12efd56f5d2 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/pooling_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/pooling_test.cc @@ -52,8 +52,7 @@ TEST_F(OpenCLOperationTest, AveragePooling) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - Pooling operation = - CreatePooling(op_def, attr, env_.GetDevicePtr()->GetInfo()); + Pooling operation = CreatePooling(op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 1, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {3.0f, 4.0f})); @@ -82,8 +81,7 @@ TEST_F(OpenCLOperationTest, AveragePoolingNonEmptyPadding) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - Pooling operation = - CreatePooling(op_def, attr, env_.GetDevicePtr()->GetInfo()); + Pooling operation = CreatePooling(op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 2, 2, 1), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -113,8 +111,7 @@ TEST_F(OpenCLOperationTest, MaxPooling) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - Pooling operation = - CreatePooling(op_def, attr, env_.GetDevicePtr()->GetInfo()); + Pooling operation = CreatePooling(op_def, attr); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 1, 1, 2), &dst_tensor)); EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {8.0f, 7.0f})); @@ -146,8 +143,7 @@ TEST_F(OpenCLOperationTest, MaxPoolingIndices) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; TensorFloat32 dst_tensor_ind; - Pooling operation = - CreatePooling(op_def, attr, env_.GetDevicePtr()->GetInfo()); + Pooling operation = CreatePooling(op_def, attr); ASSERT_OK(ExecuteGPUOperation({src_tensor}, creation_context_, &operation, {BHWC(1, 1, 1, 2), BHWC(1, 1, 1, 2)}, {&dst_tensor, &dst_tensor_ind})); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.cc b/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.cc index 88417ce6f1e..e95e758fc95 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.cc @@ -30,12 +30,11 @@ namespace cl { DepthwiseConvPlus1x1Conv::DepthwiseConvPlus1x1Conv( const OperationDef& definition, const DepthwiseConvolution2DAttributes& dw_attr, - const Convolution2DAttributes& conv_attr, const DeviceInfo& device_info) + const Convolution2DAttributes& conv_attr) : GPUOperation(definition), dw_attr_(dw_attr) { work_group_size_ = int3(8, 8, 1); - code_ = - GenerateCode(definition_, dw_attr_, - DivideRoundUp(conv_attr.weights.shape.o, 4), device_info); + code_ = GenerateCode(definition_, dw_attr_, + DivideRoundUp(conv_attr.weights.shape.o, 4)); } DepthwiseConvPlus1x1Conv::DepthwiseConvPlus1x1Conv( @@ -146,9 +145,9 @@ absl::Status DepthwiseConvPlus1x1Conv::UploadWeights( std::string DepthwiseConvPlus1x1Conv::GenerateCode( const OperationDef& op_def, const DepthwiseConvolution2DAttributes& dw_attr, - int result_depth, const DeviceInfo& device_info) { + int result_depth) { auto src_desc = op_def.src_tensors[0]; - src_desc.SetTextureAddressMode(GetFastestZeroMode(device_info)); + src_desc.SetTextureAddressMode(TextureAddressMode::ZERO); AddSrcTensor("src_tensor", src_desc); AddDstTensor("dst_tensor", op_def.dst_tensors[0]); @@ -273,8 +272,7 @@ absl::Status CreateDepthwiseConvPlus1x1Conv( const DepthwiseConvolution2DAttributes& dw_attr, const Convolution2DAttributes& conv_attr, DepthwiseConvPlus1x1Conv* result) { - *result = DepthwiseConvPlus1x1Conv(definition, dw_attr, conv_attr, - creation_context.device->GetInfo()); + *result = DepthwiseConvPlus1x1Conv(definition, dw_attr, conv_attr); RETURN_IF_ERROR( result->UploadWeights(dw_attr, conv_attr, creation_context.context)); return absl::OkStatus(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.h b/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.h index d4037c83b30..b2d3b05d285 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.h @@ -52,8 +52,7 @@ class DepthwiseConvPlus1x1Conv : public GPUOperation { DepthwiseConvPlus1x1Conv* result); DepthwiseConvPlus1x1Conv(const OperationDef& definition, const DepthwiseConvolution2DAttributes& dw_attr, - const Convolution2DAttributes& conv_attr, - const DeviceInfo& device_info); + const Convolution2DAttributes& conv_attr); absl::Status UploadWeights(const DepthwiseConvolution2DAttributes& dw_attr, const Convolution2DAttributes& conv_attr, @@ -61,7 +60,7 @@ class DepthwiseConvPlus1x1Conv : public GPUOperation { std::string GenerateCode(const OperationDef& op_def, const DepthwiseConvolution2DAttributes& dw_attr, - int result_depth, const DeviceInfo& device_info); + int result_depth); DepthwiseConvolution2DAttributes dw_attr_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/util.cc b/tensorflow/lite/delegates/gpu/cl/kernels/util.cc index e3599eb5044..3fe4ffb4acd 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/util.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/util.cc @@ -69,17 +69,6 @@ std::string GetCommonDefines(CalculationsPrecision precision) { result += "#define TO_ACCUM_FLT convert_float\n"; break; } - - result += - "__constant sampler_t smp_edge = CLK_NORMALIZED_COORDS_FALSE | " - "CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"; - result += - "__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | " - "CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n"; - result += - "__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | " - "CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n"; - return result; } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/util.h b/tensorflow/lite/delegates/gpu/cl/kernels/util.h index b41d0efb91e..173a4d43072 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/util.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/util.h @@ -83,20 +83,6 @@ void RearrangeWeightsToOHWIOGroupI4O4( } } -// Returns fastest TextureAddressMode that return ZERO for out-of-range image -// coordinates. -// -// Unfortunately, CLK_ADDRESS_CLAMP is very slow on Adreno3xx and -// we can observe huge register overhead when compared to other modes. - -// While using CLK_ADDRESS_NONE with out-of-range image coordinates is undefined -// in the OpenCL specification, we have observed that CLK_ADDRESS_NONE works -// like CLK_ADDRESS_CLAMP for out-of-range image coordinates for RGBA F16/F32 -// textures on Adreno3xx devices. Using CLK_ADDRESS_NONE is significantly faster -// than CLK_ADDRESS_CLAMP on Adreno 3xx. -TextureAddressMode GetFastestZeroMode(const CLDevice& device); -TextureAddressMode GetFastestZeroMode(const DeviceInfo& device_info); - // Returns float4 mask for last plane(batch of 4 channels) // assumes that plane size is 4; // for example we have 7 channels, in our data structures we align it to 8 diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc index e1225e83e95..5661c3d0a37 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc @@ -254,8 +254,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, case OperationType::MAX_UNPOOLING_2D: { auto attr = absl::any_cast(node.operation.attributes); - SelectMaxUnpooling(attr, op_def, creation_context.device->GetInfo(), - gpu_op); + SelectMaxUnpooling(attr, op_def, gpu_op); return absl::OkStatus(); } case OperationType::MEAN: { @@ -277,7 +276,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, case OperationType::POOLING_2D: { auto attr = absl::any_cast(node.operation.attributes); - SelectPooling(attr, op_def, creation_context.device->GetInfo(), gpu_op); + SelectPooling(attr, op_def, gpu_op); return absl::OkStatus(); } case OperationType::PRELU: { diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc b/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc index 1c0bed74422..ca5ec9f4f23 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.cc @@ -69,17 +69,15 @@ absl::Status SelectPReLU(const PReLUAttributes& attr, } void SelectPooling(const Pooling2DAttributes& attr, const OperationDef& op_def, - const DeviceInfo& device_info, std::unique_ptr* ptr) { - Pooling pooling = CreatePooling(op_def, attr, device_info); + Pooling pooling = CreatePooling(op_def, attr); *ptr = absl::make_unique(std::move(pooling)); } void SelectMaxUnpooling(const MaxUnpooling2DAttributes& attr, const OperationDef& op_def, - const DeviceInfo& device_info, std::unique_ptr* ptr) { - MaxUnpooling operation = CreateMaxUnpooling(op_def, attr, device_info); + MaxUnpooling operation = CreateMaxUnpooling(op_def, attr); *ptr = absl::make_unique(std::move(operation)); } diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h b/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h index 7133aa94502..556698ef62f 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h +++ b/tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h @@ -41,12 +41,10 @@ absl::Status SelectPReLU(const PReLUAttributes& attr, std::unique_ptr* ptr); void SelectPooling(const Pooling2DAttributes& attr, const OperationDef& op_def, - const DeviceInfo& device_info, std::unique_ptr* ptr); void SelectMaxUnpooling(const MaxUnpooling2DAttributes& attr, const OperationDef& op_def, - const DeviceInfo& device_info, std::unique_ptr* ptr); void SelectAdd(const OperationDef& op_def, const std::vector& channels, From 6ccee7b92432a2a0c41dc3b8ae207f3b570d6737 Mon Sep 17 00:00:00 2001 From: Doe Hyun Yoon Date: Tue, 4 Aug 2020 10:02:42 -0700 Subject: [PATCH 2081/2522] Add ImmutableNodeMap for const GraphDef. NodeMap and ImmutableNodeMap are subclass of NodeMapInternal. PiperOrigin-RevId: 324833428 Change-Id: I6fbc95fa7ee8ff38e0409314589806aa217aa5b3 --- tensorflow/core/grappler/utils.cc | 31 ++++---- tensorflow/core/grappler/utils.h | 64 ++++++++++++--- tensorflow/core/grappler/utils_test.cc | 103 ++++++++++++++++++------- 3 files changed, 138 insertions(+), 60 deletions(-) diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index 151bb9d5d86..e342f7dfdf0 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -73,26 +73,21 @@ bool IsShapeConsumer(const NodeDef& node) { } // namespace -NodeMap::NodeMap(GraphDef* graph) { - CHECK(graph != nullptr); - nodes_.reserve(graph->node_size()); - outputs_.reserve(graph->node_size()); - for (int i = 0; i < graph->node_size(); i++) { - NodeDef* node = graph->mutable_node(i); - const string& node_name = node->name(); - auto rslt = nodes_.emplace(node_name, node); - // Check that the graph doesn't contain multiple nodes with the same name. - if (!rslt.second) { - // The first node found with a given name becomes the canonical. - LOG(WARNING) << "Duplicated node in the graph: " << node_name; - } - NodeDef* canonical = rslt.second ? node : rslt.first->second; - for (const auto& input : node->input()) { - outputs_[NodeName(input)].insert(canonical); - } - } +namespace internal { +// Specialized template class method GetNodeDefFromGraph. +template <> +NodeDef* NodeMapInternal::GetNodeDefFromGraph( + GraphDef* graph, int64 i) const { + return graph->mutable_node(i); } +template <> +const NodeDef* +NodeMapInternal::GetNodeDefFromGraph( + const GraphDef* graph, int64 i) const { + return &graph->node(i); +} +} // namespace internal string TensorIdToString(const TensorId& tensor_id) { return tensor_id.index() == 0 ? string(tensor_id.node()) : tensor_id.ToString(); diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h index e529d5fb4ad..e9ab5b7da12 100644 --- a/tensorflow/core/grappler/utils.h +++ b/tensorflow/core/grappler/utils.h @@ -98,16 +98,39 @@ inline int NodePosition(const string& name) { return position; } -// A utility class to lookup a node and its outputs by node name. -class NodeMap { +namespace internal { +// Base template class for NodeMap and ImmutableNodeMap. +template +class NodeMapInternal { public: // Note: The NodeMap will store pointers to nodes in graph, which may become // invalid if graph is changed. - explicit NodeMap(GraphDef* graph); + explicit NodeMapInternal(GraphDefT* graph) { + if (graph == nullptr) { + LOG(WARNING) << "NodeMapInternal constructor is called with a nullptr!"; + return; + } + nodes_.reserve(graph->node_size()); + outputs_.reserve(graph->node_size()); + for (int i = 0; i < graph->node_size(); i++) { + NodeDefT* node = GetNodeDefFromGraph(graph, i); + const string& node_name = node->name(); + auto rslt = nodes_.emplace(node_name, node); + // Check that the graph doesn't contain multiple nodes with the same name. + if (!rslt.second) { + // The first node found with a given name becomes the canonical. + LOG(WARNING) << "Duplicated node in the graph: " << node_name; + } + NodeDefT* canonical = rslt.second ? node : rslt.first->second; + for (const auto& input : node->input()) { + outputs_[NodeName(input)].insert(canonical); + } + } + } // Get unordered list of fanouts from node. Notice, that the order is // non-deterministic. - const absl::flat_hash_set& GetOutputs( + const absl::flat_hash_set& GetOutputs( const string& node_name) const { auto it = outputs_.find(node_name); if (it == outputs_.end()) { @@ -117,12 +140,12 @@ class NodeMap { } // Get fanouts ordered by name. - std::vector GetOutputsOrderedByNodeName( + std::vector GetOutputsOrderedByNodeName( const string& node_name) const { - std::vector result; + std::vector result; auto it = outputs_.find(node_name); if (it != outputs_.end()) { - const absl::flat_hash_set& outputs = it->second; + const absl::flat_hash_set& outputs = it->second; result.reserve(outputs.size()); result.assign(outputs.begin(), outputs.end()); std::sort(result.begin(), result.end(), @@ -135,7 +158,7 @@ class NodeMap { // This method doesn't record the outputs of the added node; the outputs need // to be explicitly added by the AddOutput method. - void AddNode(const string& node_name, NodeDef* node) { + void AddNode(const string& node_name, NodeDefT* node) { DCHECK(node != nullptr); auto ret = nodes_.emplace(node_name, node); DCHECK(ret.second) @@ -148,7 +171,7 @@ class NodeMap { outputs_.erase(NodeName(name)); } - NodeDef* GetNode(const string& name) const { + NodeDefT* GetNode(const string& name) const { const string node_name = NodeName(name); auto it = nodes_.find(node_name); if (it == nodes_.end()) { @@ -197,9 +220,26 @@ class NodeMap { } private: - const absl::flat_hash_set empty_set_; - absl::node_hash_map nodes_; - absl::node_hash_map> outputs_; + // Helper method to get the NodeDef pointer of i-th node in a graph. + NodeDefT* GetNodeDefFromGraph(GraphDefT* graph, int64 i) const; + + const absl::flat_hash_set empty_set_; + absl::node_hash_map nodes_; + absl::node_hash_map> outputs_; +}; +} // namespace internal + +// A utility class to lookup a node and its outputs by node name. +class NodeMap : public internal::NodeMapInternal { + public: + explicit NodeMap(GraphDef* graph) : NodeMapInternal(graph) {} +}; + +// Same to NodeMap, but uses const GraphDef. +class ImmutableNodeMap + : public internal::NodeMapInternal { + public: + explicit ImmutableNodeMap(const GraphDef* graph) : NodeMapInternal(graph) {} }; // A vector with a set. The set stores the same elements as the vector, and diff --git a/tensorflow/core/grappler/utils_test.cc b/tensorflow/core/grappler/utils_test.cc index 6231fb7a780..e7e57e9b7d7 100644 --- a/tensorflow/core/grappler/utils_test.cc +++ b/tensorflow/core/grappler/utils_test.cc @@ -349,39 +349,69 @@ TEST_F(UtilsTest, NumNonControlOutputs) { GraphDef graph; TF_CHECK_OK(s.ToGraphDef(&graph)); - NodeMap node_map(&graph); - const NodeDef* add_node = node_map.GetNode("add"); - const NodeDef* mul_node = node_map.GetNode("mul"); - ASSERT_NE(add_node, nullptr); + { + NodeMap node_map(&graph); - // [a, b] are only non-control inputs - EXPECT_EQ(NumNonControlInputs(*add_node), 2); - EXPECT_EQ(NumControlInputs(*add_node), 1); - // [sqrt, shape] are non control outputs - EXPECT_EQ(NumNonControlOutputs(*add_node, node_map), 2); - // sqrt is the only data output - EXPECT_EQ(NumNonControlDataOutputs(*add_node, node_map), 1); - EXPECT_EQ(NumControlInputs(*mul_node), 0); + const NodeDef* add_node = node_map.GetNode("add"); + const NodeDef* mul_node = node_map.GetNode("mul"); + ASSERT_NE(add_node, nullptr); - EXPECT_TRUE(HasControlInputs(*add_node)); - EXPECT_TRUE(HasRegularInputs(*add_node)); - EXPECT_TRUE(HasControlOutputs(*add_node, node_map)); - EXPECT_TRUE(HasRegularOutputs(*add_node, node_map)); + // [a, b] are only non-control inputs + EXPECT_EQ(NumNonControlInputs(*add_node), 2); + EXPECT_EQ(NumControlInputs(*add_node), 1); + // [sqrt, shape] are non control outputs + EXPECT_EQ(NumNonControlOutputs(*add_node, node_map), 2); + // sqrt is the only data output + EXPECT_EQ(NumNonControlDataOutputs(*add_node, node_map), 1); + EXPECT_EQ(NumControlInputs(*mul_node), 0); - const NodeDef* x_node = node_map.GetNode("x"); - ASSERT_NE(x_node, nullptr); - EXPECT_FALSE(HasControlInputs(*x_node)); - EXPECT_FALSE(HasRegularInputs(*x_node)); - EXPECT_FALSE(HasControlOutputs(*x_node, node_map)); - EXPECT_TRUE(HasRegularOutputs(*x_node, node_map)); + EXPECT_TRUE(HasControlInputs(*add_node)); + EXPECT_TRUE(HasRegularInputs(*add_node)); + EXPECT_TRUE(HasControlOutputs(*add_node, node_map)); + EXPECT_TRUE(HasRegularOutputs(*add_node, node_map)); - const NodeDef* round_node = node_map.GetNode("round"); - ASSERT_NE(round_node, nullptr); - EXPECT_TRUE(HasControlInputs(*round_node)); - EXPECT_TRUE(HasRegularInputs(*round_node)); - EXPECT_FALSE(HasControlOutputs(*round_node, node_map)); - EXPECT_FALSE(HasRegularOutputs(*round_node, node_map)); + const NodeDef* x_node = node_map.GetNode("x"); + ASSERT_NE(x_node, nullptr); + EXPECT_FALSE(HasControlInputs(*x_node)); + EXPECT_FALSE(HasRegularInputs(*x_node)); + EXPECT_FALSE(HasControlOutputs(*x_node, node_map)); + EXPECT_TRUE(HasRegularOutputs(*x_node, node_map)); + + const NodeDef* round_node = node_map.GetNode("round"); + ASSERT_NE(round_node, nullptr); + EXPECT_TRUE(HasControlInputs(*round_node)); + EXPECT_TRUE(HasRegularInputs(*round_node)); + EXPECT_FALSE(HasControlOutputs(*round_node, node_map)); + EXPECT_FALSE(HasRegularOutputs(*round_node, node_map)); + } + + { + // Similar test for ImmutableNodeMap. + ImmutableNodeMap node_map(&graph); + + const NodeDef* add_node = node_map.GetNode("add"); + const NodeDef* mul_node = node_map.GetNode("mul"); + ASSERT_NE(add_node, nullptr); + + // [a, b] are only non-control inputs + EXPECT_EQ(NumNonControlInputs(*add_node), 2); + EXPECT_EQ(NumControlInputs(*add_node), 1); + EXPECT_EQ(NumControlInputs(*mul_node), 0); + + EXPECT_TRUE(HasControlInputs(*add_node)); + EXPECT_TRUE(HasRegularInputs(*add_node)); + + const NodeDef* x_node = node_map.GetNode("x"); + ASSERT_NE(x_node, nullptr); + EXPECT_FALSE(HasControlInputs(*x_node)); + EXPECT_FALSE(HasRegularInputs(*x_node)); + + const NodeDef* round_node = node_map.GetNode("round"); + ASSERT_NE(round_node, nullptr); + EXPECT_TRUE(HasControlInputs(*round_node)); + EXPECT_TRUE(HasRegularInputs(*round_node)); + } } TEST(CheckAttrExists, All) { @@ -653,17 +683,30 @@ TEST(SetTensorValueTest, Quantized) { /*error_msg=*/""); } -static void BM_NodeMapConstruct(int iters, int size) { +static void BM_NodeMapConstruct(benchmark::State& state) { + const int size = state.range(0); testing::StopTiming(); GraphDef graph = test::CreateRandomGraph(size); testing::StartTiming(); - for (int i = 0; i < iters; i++) { + for (auto s : state) { NodeMap node_map(&graph); } testing::StopTiming(); } BENCHMARK(BM_NodeMapConstruct)->Range(1, 1 << 20); +static void BM_ImmutableNodeMapConstruct(benchmark::State& state) { + const int size = state.range(0); + testing::StopTiming(); + GraphDef graph = test::CreateRandomGraph(size); + testing::StartTiming(); + for (auto s : state) { + ImmutableNodeMap node_map(&graph); + } + testing::StopTiming(); +} +BENCHMARK(BM_ImmutableNodeMapConstruct)->Range(1, 1 << 20); + } // namespace } // namespace grappler } // namespace tensorflow From c8ddf5a1dfae741079955bd11e09894ac1ee2f46 Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Tue, 4 Aug 2020 10:05:23 -0700 Subject: [PATCH 2082/2522] [XLA] Allow kBitcast to also change types. PiperOrigin-RevId: 324834056 Change-Id: I3376ff3984564a043c5b1f7f077bc31b30adef2c --- tensorflow/compiler/xla/service/BUILD | 1 + .../compiler/xla/service/hlo_creation_utils.cc | 6 ++++++ .../compiler/xla/service/hlo_instruction.cc | 2 +- .../compiler/xla/service/hlo_verifier.cc | 8 -------- .../compiler/xla/service/hlo_verifier_test.cc | 18 ------------------ 5 files changed, 8 insertions(+), 27 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 540cd7fecd2..4d15bc432a2 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1705,6 +1705,7 @@ cc_library( ":hlo", ":hlo_module_config", ":shape_inference", + "//tensorflow/compiler/xla:comparison_util", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:statusor", diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.cc b/tensorflow/compiler/xla/service/hlo_creation_utils.cc index 0f5267e9fbc..4ba67888409 100644 --- a/tensorflow/compiler/xla/service/hlo_creation_utils.cc +++ b/tensorflow/compiler/xla/service/hlo_creation_utils.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/lib/comparators.h" #include "tensorflow/compiler/xla/client/xla_builder.h" #include "tensorflow/compiler/xla/client/xla_computation.h" +#include "tensorflow/compiler/xla/comparison_util.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/hlo_clone_context.h" @@ -258,6 +259,11 @@ HloInstruction* MakeBitcastConvertToHlo(HloInstruction* hlo, PrimitiveType type) { CHECK_NE(hlo->shape().element_type(), type); Shape shape = ShapeUtil::ChangeElementType(hlo->shape(), type); + // PRED are stored as one byte, PRED have a BitWidth of 1, avoid this problem + // by using a convert instead of bitcast convert. + if (type == PRED || hlo->shape().element_type() == PRED) { + return MakeConvertToHlo(hlo, type); + } hlo = hlo->parent()->AddInstruction( HloInstruction::CreateBitcastConvert(shape, hlo)); CHECK_EQ(hlo->shape().element_type(), type); diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 4335ed312c3..94d53ebe0b1 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -2207,7 +2207,7 @@ Status HloInstruction::ReplaceUsesWith(absl::Span users, Status HloInstruction::ReplaceAllUsesWithDifferentShape( absl::Span users, HloInstruction* new_producer) { for (HloInstruction* user : users) { - TF_RETURN_IF_ERROR(ReplaceUseWith(user, new_producer)); + TF_RETURN_IF_ERROR(ReplaceUseWithDifferentShape(user, new_producer)); } if (parent_ && parent_->root_instruction() == this) { diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index 62b0d98418c..d395fddcc5d 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -670,14 +670,6 @@ Status ShapeVerifier::HandleReduce(HloInstruction* reduce) { } Status ShapeVerifier::HandleBitcast(HloInstruction* bitcast) { - // Bitcasts are not allowed to change the element type. - if (bitcast->operand(0)->shape().element_type() != - bitcast->shape().element_type()) { - return InternalError( - "Bitcast can not change the element type from %s to %s", - PrimitiveType_Name(bitcast->operand(0)->shape().element_type()), - PrimitiveType_Name(bitcast->shape().element_type())); - } if (layout_sensitive_ && shape_size_function_(bitcast->shape()) != shape_size_function_(bitcast->operand(0)->shape())) { diff --git a/tensorflow/compiler/xla/service/hlo_verifier_test.cc b/tensorflow/compiler/xla/service/hlo_verifier_test.cc index d9709c50df9..1f71c9586d5 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier_test.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier_test.cc @@ -540,24 +540,6 @@ TEST_F(HloVerifierTestLayoutSensitive, ConcatWithLayoutChangeNotAllowed) { HasSubstr("Instruction shouldn't change layouts")); } -TEST_F(HloVerifierTest, BitcastCanNotChangeElementType) { - const char* const hlo_string = R"( - HloModule Module - - ENTRY BitcastCanNotChangeElementType { - constant.0 = f32[2] constant({0.0, 0.0}) - ROOT bitcast = s32[2] bitcast(constant.0) - } - )"; - TF_ASSERT_OK_AND_ASSIGN(auto module, - ParseAndReturnUnverifiedModule(hlo_string)); - - auto status = verifier().Run(module.get()).status(); - ASSERT_FALSE(status.ok()); - EXPECT_THAT(status.error_message(), - HasSubstr("Bitcast can not change the element type")); -} - TEST_F(HloVerifierTestLayoutSensitive, BitcastNeedsSameNumberOfElements) { const char* const hlo_string = R"( HloModule Module From 12b7b9e06d9d7b0f29ebe4b8a3645daa058c3bd1 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Tue, 4 Aug 2020 10:08:00 -0700 Subject: [PATCH 2083/2522] The `linalg.LinearOperator*` Module APIs do not support top-level dispatching because they are classes w/ methods instead of top-level methods in TF's APIs. But, their class methods call out to APIs that do support dispatching. This CL updates the convert_to_tensor calls in the `linalg.LinearOperator*` APIs to use the publicly exposed, dispatching `convert_to_tensor_v2_with_dispatch`, which enables the Operators to effectively work with dispatching as the APIs they call out to support dispatching as well. PiperOrigin-RevId: 324834645 Change-Id: If2e9f17be101e74f8835497d8ca51a0174055053 --- .../python/ops/linalg/linear_operator.py | 16 +++---- .../ops/linalg/linear_operator_block_diag.py | 18 ++++---- .../linear_operator_block_lower_triangular.py | 18 ++++---- .../ops/linalg/linear_operator_circulant.py | 4 +- .../python/ops/linalg/linear_operator_diag.py | 2 +- .../ops/linalg/linear_operator_full_matrix.py | 2 +- .../ops/linalg/linear_operator_householder.py | 9 ++-- .../ops/linalg/linear_operator_identity.py | 4 +- .../ops/linalg/linear_operator_permutation.py | 6 +-- .../ops/linalg/linear_operator_toeplitz.py | 8 ++-- .../ops/linalg/linear_operator_tridiag.py | 6 ++- .../python/ops/linalg/linear_operator_util.py | 22 ++++----- tensorflow/python/util/dispatch_test.py | 45 +++++++++++++++++++ 13 files changed, 106 insertions(+), 54 deletions(-) diff --git a/tensorflow/python/ops/linalg/linear_operator.py b/tensorflow/python/ops/linalg/linear_operator.py index 8e1967f63c1..cf14cdb6eae 100644 --- a/tensorflow/python/ops/linalg/linear_operator.py +++ b/tensorflow/python/ops/linalg/linear_operator.py @@ -385,7 +385,7 @@ class LinearOperator(module.Module): # `shape` may be passed in if this can be pre-computed in a # more efficient manner, e.g. without excessive Tensor conversions. if self.tensor_rank is not None: - return ops.convert_to_tensor(self.tensor_rank) + return ops.convert_to_tensor_v2_with_dispatch(self.tensor_rank) else: shape = self.shape_tensor() if shape is None else shape return array_ops.size(shape) @@ -429,7 +429,7 @@ class LinearOperator(module.Module): # more efficient manner, e.g. without excessive Tensor conversions. dim_value = tensor_shape.dimension_value(self.domain_dimension) if dim_value is not None: - return ops.convert_to_tensor(dim_value) + return ops.convert_to_tensor_v2_with_dispatch(dim_value) else: shape = self.shape_tensor() if shape is None else shape return shape[-1] @@ -473,7 +473,7 @@ class LinearOperator(module.Module): # more efficient manner, e.g. without excessive Tensor conversions. dim_value = tensor_shape.dimension_value(self.range_dimension) if dim_value is not None: - return ops.convert_to_tensor(dim_value) + return ops.convert_to_tensor_v2_with_dispatch(dim_value) else: shape = self.shape_tensor() if shape is None else shape return shape[-2] @@ -641,7 +641,7 @@ class LinearOperator(module.Module): return linear_operator_algebra.matmul(left_operator, right_operator) with self._name_scope(name): - x = ops.convert_to_tensor(x, name="x") + x = ops.convert_to_tensor_v2_with_dispatch(x, name="x") self._check_input_dtype(x) self_dim = -2 if adjoint else -1 @@ -688,7 +688,7 @@ class LinearOperator(module.Module): A `Tensor` with shape `[..., M]` and same `dtype` as `self`. """ with self._name_scope(name): - x = ops.convert_to_tensor(x, name="x") + x = ops.convert_to_tensor_v2_with_dispatch(x, name="x") self._check_input_dtype(x) self_dim = -2 if adjoint else -1 tensor_shape.dimension_at_index( @@ -834,7 +834,7 @@ class LinearOperator(module.Module): return linear_operator_algebra.solve(left_operator, right_operator) with self._name_scope(name): - rhs = ops.convert_to_tensor(rhs, name="rhs") + rhs = ops.convert_to_tensor_v2_with_dispatch(rhs, name="rhs") self._check_input_dtype(rhs) self_dim = -1 if adjoint else -2 @@ -891,7 +891,7 @@ class LinearOperator(module.Module): NotImplementedError: If `self.is_non_singular` or `is_square` is False. """ with self._name_scope(name): - rhs = ops.convert_to_tensor(rhs, name="rhs") + rhs = ops.convert_to_tensor_v2_with_dispatch(rhs, name="rhs") self._check_input_dtype(rhs) self_dim = -1 if adjoint else -2 tensor_shape.dimension_at_index( @@ -1054,7 +1054,7 @@ class LinearOperator(module.Module): A `Tensor` with broadcast shape and same `dtype` as `self`. """ with self._name_scope(name): - x = ops.convert_to_tensor(x, name="x") + x = ops.convert_to_tensor_v2_with_dispatch(x, name="x") self._check_input_dtype(x) return self._add_to_tensor(x) diff --git a/tensorflow/python/ops/linalg/linear_operator_block_diag.py b/tensorflow/python/ops/linalg/linear_operator_block_diag.py index 7c50d00a055..7afa15ae069 100644 --- a/tensorflow/python/ops/linalg/linear_operator_block_diag.py +++ b/tensorflow/python/ops/linalg/linear_operator_block_diag.py @@ -263,7 +263,7 @@ class LinearOperatorBlockDiag(linear_operator.LinearOperator): def _shape_tensor(self): # Avoid messy broadcasting if possible. if self.shape.is_fully_defined(): - return ops.convert_to_tensor( + return ops.convert_to_tensor_v2_with_dispatch( self.shape.as_list(), dtype=dtypes.int32, name="shape") domain_dimension = sum(self._block_domain_dimension_tensors()) @@ -330,12 +330,12 @@ class LinearOperatorBlockDiag(linear_operator.LinearOperator): if linear_operator_util.arg_is_blockwise(block_dimensions, x, arg_dim): for i, block in enumerate(x): if not isinstance(block, linear_operator.LinearOperator): - block = ops.convert_to_tensor(block) + block = ops.convert_to_tensor_v2_with_dispatch(block) self._check_input_dtype(block) block_dimensions[i].assert_is_compatible_with(block.shape[arg_dim]) x[i] = block else: - x = ops.convert_to_tensor(x, name="x") + x = ops.convert_to_tensor_v2_with_dispatch(x, name="x") self._check_input_dtype(x) op_dimension = (self.range_dimension if adjoint else self.domain_dimension) @@ -404,7 +404,7 @@ class LinearOperatorBlockDiag(linear_operator.LinearOperator): if linear_operator_util.arg_is_blockwise(block_dimensions, x, -1): for i, block in enumerate(x): if not isinstance(block, linear_operator.LinearOperator): - block = ops.convert_to_tensor(block) + block = ops.convert_to_tensor_v2_with_dispatch(block) self._check_input_dtype(block) block_dimensions[i].assert_is_compatible_with(block.shape[-1]) x[i] = block @@ -412,7 +412,7 @@ class LinearOperatorBlockDiag(linear_operator.LinearOperator): y_mat = self.matmul(x_mat, adjoint=adjoint) return [array_ops.squeeze(y, axis=-1) for y in y_mat] - x = ops.convert_to_tensor(x, name="x") + x = ops.convert_to_tensor_v2_with_dispatch(x, name="x") self._check_input_dtype(x) op_dimension = (self.range_dimension if adjoint else self.domain_dimension) @@ -508,12 +508,12 @@ class LinearOperatorBlockDiag(linear_operator.LinearOperator): split_rhs = rhs for i, block in enumerate(split_rhs): if not isinstance(block, linear_operator.LinearOperator): - block = ops.convert_to_tensor(block) + block = ops.convert_to_tensor_v2_with_dispatch(block) self._check_input_dtype(block) block_dimensions[i].assert_is_compatible_with(block.shape[arg_dim]) split_rhs[i] = block else: - rhs = ops.convert_to_tensor(rhs, name="rhs") + rhs = ops.convert_to_tensor_v2_with_dispatch(rhs, name="rhs") self._check_input_dtype(rhs) op_dimension = (self.domain_dimension if adjoint else self.range_dimension) @@ -583,7 +583,7 @@ class LinearOperatorBlockDiag(linear_operator.LinearOperator): if linear_operator_util.arg_is_blockwise(block_dimensions, rhs, -1): for i, block in enumerate(rhs): if not isinstance(block, linear_operator.LinearOperator): - block = ops.convert_to_tensor(block) + block = ops.convert_to_tensor_v2_with_dispatch(block) self._check_input_dtype(block) block_dimensions[i].assert_is_compatible_with(block.shape[-1]) rhs[i] = block @@ -591,7 +591,7 @@ class LinearOperatorBlockDiag(linear_operator.LinearOperator): solution_mat = self.solve(rhs_mat, adjoint=adjoint) return [array_ops.squeeze(x, axis=-1) for x in solution_mat] - rhs = ops.convert_to_tensor(rhs, name="rhs") + rhs = ops.convert_to_tensor_v2_with_dispatch(rhs, name="rhs") self._check_input_dtype(rhs) op_dimension = (self.domain_dimension if adjoint else self.range_dimension) diff --git a/tensorflow/python/ops/linalg/linear_operator_block_lower_triangular.py b/tensorflow/python/ops/linalg/linear_operator_block_lower_triangular.py index b4bf8bdb142..84f2ff15345 100644 --- a/tensorflow/python/ops/linalg/linear_operator_block_lower_triangular.py +++ b/tensorflow/python/ops/linalg/linear_operator_block_lower_triangular.py @@ -366,7 +366,7 @@ class LinearOperatorBlockLowerTriangular(linear_operator.LinearOperator): def _shape_tensor(self): # Avoid messy broadcasting if possible. if self.shape.is_fully_defined(): - return ops.convert_to_tensor( + return ops.convert_to_tensor_v2_with_dispatch( self.shape.as_list(), dtype=dtypes.int32, name="shape") domain_dimension = sum(self._block_domain_dimension_tensors()) @@ -433,12 +433,12 @@ class LinearOperatorBlockLowerTriangular(linear_operator.LinearOperator): if linear_operator_util.arg_is_blockwise(block_dimensions, x, arg_dim): for i, block in enumerate(x): if not isinstance(block, linear_operator.LinearOperator): - block = ops.convert_to_tensor(block) + block = ops.convert_to_tensor_v2_with_dispatch(block) self._check_input_dtype(block) block_dimensions[i].assert_is_compatible_with(block.shape[arg_dim]) x[i] = block else: - x = ops.convert_to_tensor(x, name="x") + x = ops.convert_to_tensor_v2_with_dispatch(x, name="x") self._check_input_dtype(x) op_dimension = (self.range_dimension if adjoint else self.domain_dimension) @@ -543,7 +543,7 @@ class LinearOperatorBlockLowerTriangular(linear_operator.LinearOperator): if linear_operator_util.arg_is_blockwise(block_dimensions, x, -1): for i, block in enumerate(x): if not isinstance(block, linear_operator.LinearOperator): - block = ops.convert_to_tensor(block) + block = ops.convert_to_tensor_v2_with_dispatch(block) self._check_input_dtype(block) block_dimensions[i].assert_is_compatible_with(block.shape[-1]) x[i] = block @@ -551,7 +551,7 @@ class LinearOperatorBlockLowerTriangular(linear_operator.LinearOperator): y_mat = self.matmul(x_mat, adjoint=adjoint) return [array_ops.squeeze(y, axis=-1) for y in y_mat] - x = ops.convert_to_tensor(x, name="x") + x = ops.convert_to_tensor_v2_with_dispatch(x, name="x") self._check_input_dtype(x) op_dimension = (self.range_dimension if adjoint else self.domain_dimension) @@ -674,7 +674,7 @@ class LinearOperatorBlockLowerTriangular(linear_operator.LinearOperator): if blockwise_arg: for i, block in enumerate(rhs): if not isinstance(block, linear_operator.LinearOperator): - block = ops.convert_to_tensor(block) + block = ops.convert_to_tensor_v2_with_dispatch(block) self._check_input_dtype(block) block_dimensions[i].assert_is_compatible_with(block.shape[arg_dim]) rhs[i] = block @@ -684,7 +684,7 @@ class LinearOperatorBlockLowerTriangular(linear_operator.LinearOperator): split_rhs = rhs else: - rhs = ops.convert_to_tensor(rhs, name="rhs") + rhs = ops.convert_to_tensor_v2_with_dispatch(rhs, name="rhs") self._check_input_dtype(rhs) op_dimension = (self.domain_dimension if adjoint else self.range_dimension) @@ -795,14 +795,14 @@ class LinearOperatorBlockLowerTriangular(linear_operator.LinearOperator): if linear_operator_util.arg_is_blockwise(block_dimensions, rhs, -1): for i, block in enumerate(rhs): if not isinstance(block, linear_operator.LinearOperator): - block = ops.convert_to_tensor(block) + block = ops.convert_to_tensor_v2_with_dispatch(block) self._check_input_dtype(block) block_dimensions[i].assert_is_compatible_with(block.shape[-1]) rhs[i] = block rhs_mat = [array_ops.expand_dims(block, axis=-1) for block in rhs] solution_mat = self.solve(rhs_mat, adjoint=adjoint) return [array_ops.squeeze(x, axis=-1) for x in solution_mat] - rhs = ops.convert_to_tensor(rhs, name="rhs") + rhs = ops.convert_to_tensor_v2_with_dispatch(rhs, name="rhs") self._check_input_dtype(rhs) op_dimension = (self.domain_dimension if adjoint else self.range_dimension) diff --git a/tensorflow/python/ops/linalg/linear_operator_circulant.py b/tensorflow/python/ops/linalg/linear_operator_circulant.py index ace276900fc..d4b671c53bd 100644 --- a/tensorflow/python/ops/linalg/linear_operator_circulant.py +++ b/tensorflow/python/ops/linalg/linear_operator_circulant.py @@ -378,7 +378,7 @@ class _BaseLinearOperatorCirculant(linear_operator.LinearOperator): def _broadcast_batch_dims(self, x, spectrum): """Broadcast batch dims of batch matrix `x` and spectrum.""" - spectrum = ops.convert_to_tensor(spectrum, name="spectrum") + spectrum = ops.convert_to_tensor_v2_with_dispatch(spectrum, name="spectrum") # spectrum.shape = batch_shape + block_shape # First make spectrum a batch matrix with # spectrum.shape = batch_shape + [prod(block_shape), 1] @@ -755,7 +755,7 @@ class LinearOperatorCirculant(_BaseLinearOperatorCirculant): name=name) def _eigvals(self): - return ops.convert_to_tensor(self.spectrum) + return ops.convert_to_tensor_v2_with_dispatch(self.spectrum) @tf_export("linalg.LinearOperatorCirculant2D") diff --git a/tensorflow/python/ops/linalg/linear_operator_diag.py b/tensorflow/python/ops/linalg/linear_operator_diag.py index d51d6b81c5d..b5e81b267ce 100644 --- a/tensorflow/python/ops/linalg/linear_operator_diag.py +++ b/tensorflow/python/ops/linalg/linear_operator_diag.py @@ -251,7 +251,7 @@ class LinearOperatorDiag(linear_operator.LinearOperator): return array_ops.matrix_set_diag(x, new_diag) def _eigvals(self): - return ops.convert_to_tensor(self.diag) + return ops.convert_to_tensor_v2_with_dispatch(self.diag) def _cond(self): abs_diag = math_ops.abs(self.diag) diff --git a/tensorflow/python/ops/linalg/linear_operator_full_matrix.py b/tensorflow/python/ops/linalg/linear_operator_full_matrix.py index 8d92d1accaa..b10822589d5 100644 --- a/tensorflow/python/ops/linalg/linear_operator_full_matrix.py +++ b/tensorflow/python/ops/linalg/linear_operator_full_matrix.py @@ -160,7 +160,7 @@ class LinearOperatorFullMatrix(linear_operator.LinearOperator): dtypes.complex128, ] - matrix = ops.convert_to_tensor(matrix, name="matrix") + matrix = ops.convert_to_tensor_v2_with_dispatch(matrix, name="matrix") dtype = matrix.dtype if dtype not in allowed_dtypes: diff --git a/tensorflow/python/ops/linalg/linear_operator_householder.py b/tensorflow/python/ops/linalg/linear_operator_householder.py index 142d48c5331..265c862ea03 100644 --- a/tensorflow/python/ops/linalg/linear_operator_householder.py +++ b/tensorflow/python/ops/linalg/linear_operator_householder.py @@ -198,7 +198,8 @@ class LinearOperatorHouseholder(linear_operator.LinearOperator): # Note that because this is a reflection, it lies in O(n) (for real vector # spaces) or U(n) (for complex vector spaces), and thus is its own adjoint. - reflection_axis = ops.convert_to_tensor(self.reflection_axis) + reflection_axis = ops.convert_to_tensor_v2_with_dispatch( + self.reflection_axis) x = linalg.adjoint(x) if adjoint_arg else x normalized_axis = reflection_axis / linalg.norm( reflection_axis, axis=-1, keepdims=True) @@ -229,7 +230,8 @@ class LinearOperatorHouseholder(linear_operator.LinearOperator): return self._matmul(rhs, adjoint, adjoint_arg) def _to_dense(self): - reflection_axis = ops.convert_to_tensor(self.reflection_axis) + reflection_axis = ops.convert_to_tensor_v2_with_dispatch( + self.reflection_axis) normalized_axis = reflection_axis / linalg.norm( reflection_axis, axis=-1, keepdims=True) mat = normalized_axis[..., array_ops.newaxis] @@ -238,7 +240,8 @@ class LinearOperatorHouseholder(linear_operator.LinearOperator): matrix, 1. + array_ops.matrix_diag_part(matrix)) def _diag_part(self): - reflection_axis = ops.convert_to_tensor(self.reflection_axis) + reflection_axis = ops.convert_to_tensor_v2_with_dispatch( + self.reflection_axis) normalized_axis = reflection_axis / linalg.norm( reflection_axis, axis=-1, keepdims=True) return 1. - 2 * normalized_axis * math_ops.conj(normalized_axis) diff --git a/tensorflow/python/ops/linalg/linear_operator_identity.py b/tensorflow/python/ops/linalg/linear_operator_identity.py index 8226e74bacd..a0f7ead42d6 100644 --- a/tensorflow/python/ops/linalg/linear_operator_identity.py +++ b/tensorflow/python/ops/linalg/linear_operator_identity.py @@ -394,7 +394,7 @@ class LinearOperatorIdentity(BaseLinearOperatorIdentity): A `Tensor` with broadcast shape and same `dtype` as `self`. """ with self._name_scope(name): - mat = ops.convert_to_tensor(mat, name="mat") + mat = ops.convert_to_tensor_v2_with_dispatch(mat, name="mat") mat_diag = array_ops.matrix_diag_part(mat) new_diag = 1 + mat_diag return array_ops.matrix_set_diag(mat, new_diag) @@ -720,7 +720,7 @@ class LinearOperatorScaledIdentity(BaseLinearOperatorIdentity): multiplier_vector = array_ops.expand_dims(self.multiplier, -1) # Shape [C1,...,Cc, M, M] - mat = ops.convert_to_tensor(mat, name="mat") + mat = ops.convert_to_tensor_v2_with_dispatch(mat, name="mat") # Shape [C1,...,Cc, M] mat_diag = array_ops.matrix_diag_part(mat) diff --git a/tensorflow/python/ops/linalg/linear_operator_permutation.py b/tensorflow/python/ops/linalg/linear_operator_permutation.py index 3a44cd5ef1b..9cc8e158a21 100644 --- a/tensorflow/python/ops/linalg/linear_operator_permutation.py +++ b/tensorflow/python/ops/linalg/linear_operator_permutation.py @@ -197,7 +197,7 @@ class LinearOperatorPermutation(linear_operator.LinearOperator): return array_ops.shape(perm)[-1] def _matmul(self, x, adjoint=False, adjoint_arg=False): - perm = ops.convert_to_tensor(self.perm) + perm = ops.convert_to_tensor_v2_with_dispatch(self.perm) if adjoint and not self.is_self_adjoint: # TODO(srvasude): invert_permutation doesn't work on batches so we use # argsort. @@ -232,13 +232,13 @@ class LinearOperatorPermutation(linear_operator.LinearOperator): return self._matmul(rhs, adjoint=(not adjoint), adjoint_arg=adjoint_arg) def _to_dense(self): - perm = ops.convert_to_tensor(self.perm) + perm = ops.convert_to_tensor_v2_with_dispatch(self.perm) return math_ops.cast(math_ops.equal( math_ops.range(0, self._domain_dimension_tensor(perm)), perm[..., array_ops.newaxis]), self.dtype) def _diag_part(self): - perm = ops.convert_to_tensor(self.perm) + perm = ops.convert_to_tensor_v2_with_dispatch(self.perm) return math_ops.cast(math_ops.equal( math_ops.range(0, self._domain_dimension_tensor(perm)), perm), self.dtype) diff --git a/tensorflow/python/ops/linalg/linear_operator_toeplitz.py b/tensorflow/python/ops/linalg/linear_operator_toeplitz.py index 71fff44da44..2d61a536e29 100644 --- a/tensorflow/python/ops/linalg/linear_operator_toeplitz.py +++ b/tensorflow/python/ops/linalg/linear_operator_toeplitz.py @@ -209,8 +209,8 @@ class LinearOperatorToeplitz(linear_operator.LinearOperator): # for more details. x = linalg.adjoint(x) if adjoint_arg else x expanded_x = array_ops.concat([x, array_ops.zeros_like(x)], axis=-2) - col = ops.convert_to_tensor(self.col) - row = ops.convert_to_tensor(self.row) + col = ops.convert_to_tensor_v2_with_dispatch(self.col) + row = ops.convert_to_tensor_v2_with_dispatch(self.row) circulant_col = array_ops.concat( [col, array_ops.zeros_like(col[..., 0:1]), @@ -236,8 +236,8 @@ class LinearOperatorToeplitz(linear_operator.LinearOperator): [self.domain_dimension_tensor()], self.dtype) def _to_dense(self): - row = ops.convert_to_tensor(self.row) - col = ops.convert_to_tensor(self.col) + row = ops.convert_to_tensor_v2_with_dispatch(self.row) + col = ops.convert_to_tensor_v2_with_dispatch(self.col) total_shape = array_ops.broadcast_dynamic_shape( array_ops.shape(row), array_ops.shape(col)) n = array_ops.shape(row)[-1] diff --git a/tensorflow/python/ops/linalg/linear_operator_tridiag.py b/tensorflow/python/ops/linalg/linear_operator_tridiag.py index 422747848c0..2ba310f75bf 100644 --- a/tensorflow/python/ops/linalg/linear_operator_tridiag.py +++ b/tensorflow/python/ops/linalg/linear_operator_tridiag.py @@ -246,7 +246,7 @@ class LinearOperatorTridiag(linear_operator.LinearOperator): self.diagonals, linalg.adjoint(self.diagonals), message='Matrix was not equal to its adjoint.')] elif self.diagonals_format == _COMPACT: - diagonals = ops.convert_to_tensor(self.diagonals) + diagonals = ops.convert_to_tensor_v2_with_dispatch(self.diagonals) asserts += [linear_operator_util.assert_zero_imag_part( diagonals[..., 1, :], message=diag_message)] # Roll the subdiagonal so the shifted argument is at the end. @@ -353,7 +353,9 @@ class LinearOperatorTridiag(linear_operator.LinearOperator): align='LEFT_RIGHT', padding_value=0.) - diagonals = [ops.convert_to_tensor(d) for d in self.diagonals] + diagonals = [ + ops.convert_to_tensor_v2_with_dispatch(d) for d in self.diagonals + ] diagonals = array_ops.stack(diagonals, axis=-2) return gen_array_ops.matrix_diag_v3( diff --git a/tensorflow/python/ops/linalg/linear_operator_util.py b/tensorflow/python/ops/linalg/linear_operator_util.py index 948f2f86a53..096ad3fb4bb 100644 --- a/tensorflow/python/ops/linalg/linear_operator_util.py +++ b/tensorflow/python/ops/linalg/linear_operator_util.py @@ -114,7 +114,7 @@ def convert_nonref_to_tensor(value, dtype=None, dtype_hint=None, name=None): raise TypeError('Mutable type must be of dtype "{}" but is "{}".'.format( dtype_name(dtype_base), dtype_name(value_dtype_base))) return value - return ops.convert_to_tensor( + return ops.convert_to_tensor_v2_with_dispatch( value, dtype=dtype, dtype_hint=dtype_hint, name=name) @@ -189,10 +189,10 @@ def assert_no_entries_with_modulus_zero( An `Op` that asserts `x` has no entries with modulus zero. """ with ops.name_scope(name, values=[x]): - x = ops.convert_to_tensor(x, name="x") + x = ops.convert_to_tensor_v2_with_dispatch(x, name="x") dtype = x.dtype.base_dtype should_be_nonzero = math_ops.abs(x) - zero = ops.convert_to_tensor(0, dtype=dtype.real_dtype) + zero = ops.convert_to_tensor_v2_with_dispatch(0, dtype=dtype.real_dtype) return check_ops.assert_less(zero, should_be_nonzero, message=message) @@ -208,13 +208,13 @@ def assert_zero_imag_part(x, message=None, name="assert_zero_imag_part"): An `Op` that asserts `x` has no entries with modulus zero. """ with ops.name_scope(name, values=[x]): - x = ops.convert_to_tensor(x, name="x") + x = ops.convert_to_tensor_v2_with_dispatch(x, name="x") dtype = x.dtype.base_dtype if dtype.is_floating: return control_flow_ops.no_op() - zero = ops.convert_to_tensor(0, dtype=dtype.real_dtype) + zero = ops.convert_to_tensor_v2_with_dispatch(0, dtype=dtype.real_dtype) return check_ops.assert_equal(zero, math_ops.imag(x), message=message) @@ -261,7 +261,7 @@ def shape_tensor(shape, name=None): dtype = dtypes.int32 else: dtype = None - return ops.convert_to_tensor(shape, dtype=dtype, name=name) + return ops.convert_to_tensor_v2_with_dispatch(shape, dtype=dtype, name=name) ################################################################################ @@ -323,7 +323,7 @@ def broadcast_matrix_batch_dims(batch_matrices, name=None): batch_matrices = list(batch_matrices) for i, mat in enumerate(batch_matrices): - batch_matrices[i] = ops.convert_to_tensor(mat) + batch_matrices[i] = ops.convert_to_tensor_v2_with_dispatch(mat) assert_is_batch_matrix(batch_matrices[i]) if len(batch_matrices) < 2: @@ -366,8 +366,9 @@ def broadcast_matrix_batch_dims(batch_matrices, name=None): def matrix_solve_with_broadcast(matrix, rhs, adjoint=False, name=None): """Solve systems of linear equations.""" with ops.name_scope(name, "MatrixSolveWithBroadcast", [matrix, rhs]): - matrix = ops.convert_to_tensor(matrix, name="matrix") - rhs = ops.convert_to_tensor(rhs, name="rhs", dtype=matrix.dtype) + matrix = ops.convert_to_tensor_v2_with_dispatch(matrix, name="matrix") + rhs = ops.convert_to_tensor_v2_with_dispatch( + rhs, name="rhs", dtype=matrix.dtype) # If either matrix/rhs has extra dims, we can reshape to get rid of them. matrix, rhs, reshape_inv, still_need_to_transpose = _reshape_for_efficiency( @@ -526,7 +527,8 @@ def arg_is_blockwise(block_dimensions, arg, arg_split_dim): if not any(nest.is_nested(x) for x in arg): return True else: - arg_dims = [ops.convert_to_tensor(x).shape[arg_split_dim] for x in arg] + arg_dims = [ops.convert_to_tensor_v2_with_dispatch( + x).shape[arg_split_dim] for x in arg] self_dims = [dim.value for dim in block_dimensions] # If none of the operator dimensions are known, interpret the input as diff --git a/tensorflow/python/util/dispatch_test.py b/tensorflow/python/util/dispatch_test.py index 2b3946ce9f7..f06f2fda7e3 100644 --- a/tensorflow/python/util/dispatch_test.py +++ b/tensorflow/python/util/dispatch_test.py @@ -18,11 +18,13 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops.linalg import linear_operator_diag from tensorflow.python.ops.proto_ops import decode_proto from tensorflow.python.platform import googletest from tensorflow.python.platform import test @@ -60,6 +62,8 @@ class TensorTracer(object): self.name = name self.args = args self.kwargs = kwargs + self.shape = array_ops.ones(shape=(4, 4)).shape + self.dtype = dtypes.float32 def __repr__(self): if self.args is None and self.kwargs is None: @@ -70,6 +74,10 @@ class TensorTracer(object): ["{}={}".format(name, x) for (name, x) in self.kwargs.items()]) return "{}({})".format(self.name, ", ".join(args)) + @property + def is_tensor_like(self): + return True + @classmethod def _overload_all_operators(cls): # pylint: disable=invalid-name """Register overloads for all operators.""" @@ -282,5 +290,42 @@ class DispatchTest(test_util.TensorFlowTestCase): # Clean up. dispatch._GLOBAL_DISPATCHERS = original_global_dispatchers + def testGlobalDispatcherLinearOperators(self): + original_global_dispatchers = dispatch._GLOBAL_DISPATCHERS + try: + TensorTracerOpDispatcher().register() + + x = TensorTracer("x") + + # To grab the eigenvalues the diag operator just calls convert_to_tensor + # (twice) in this case. + trace = linear_operator_diag.LinearOperatorDiag(x).eigvals() + self.assertEqual( + str(trace), + "convert_to_tensor(convert_to_tensor(x, dtype=None, dtype_hint=None, " + "name=diag))") + + # The diagonal tensor addition gets traced even though the linear_operator + # API only uses dispatchable ops instead of directly exposing dispatching. + trace = linear_operator_diag.LinearOperatorDiag(x).add_to_tensor(x) + self.assertIn( + "linalg.set_diag(convert_to_tensor(x, name=x), __operators__.add(" + "convert_to_tensor(x, dtype=None, dtype_hint=None, name=diag), " + "linalg.diag_part(convert_to_tensor(x, name=x)), " + "name=", + str(trace)) + + # The dispatch-supporting ops the non-singular check calls out to + # get traced. + trace = linear_operator_diag.LinearOperatorDiag(x).assert_non_singular() + self.assertIn("debugging.assert_less", str(trace)) + self.assertIn( + "message=Singular operator: Diagonal contained zero values.", + str(trace)) + + finally: + # Clean up. + dispatch._GLOBAL_DISPATCHERS = original_global_dispatchers + if __name__ == "__main__": googletest.main() From b4b0d9da40feb4c7b056ead6b3a69b280adc7943 Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Tue, 4 Aug 2020 10:22:57 -0700 Subject: [PATCH 2084/2522] fix for lint in feature column test. PiperOrigin-RevId: 324837891 Change-Id: Ia86ecb5dd962f7aa70f676644e9f6b9b24707e46 --- .../python/feature_column/feature_column_test.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index c8e24e46c2f..e598848282f 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -5865,11 +5865,12 @@ class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase): partitioner = partitioned_variables.fixed_size_partitioner(2, axis=0) with variable_scope.variable_scope('vars', partitioner=partitioner): - embedding_column_a, embedding_column_b = fc_new.shared_embedding_columns( - [categorical_column_a, categorical_column_b], - dimension=embedding_dimension, - initializer=_initializer, - use_safe_embedding_lookup=use_safe_embedding_lookup) + embedding_column_a, embedding_column_b = ( + fc_new.shared_embedding_columns( + [categorical_column_a, categorical_column_b], + dimension=embedding_dimension, + initializer=_initializer, + use_safe_embedding_lookup=use_safe_embedding_lookup)) # Provide sparse input and get dense result. embedding_lookup_a = embedding_column_a._get_dense_tensor( _LazyBuilder(input_features)) From be1cfeb96e23a8e24be6b141a15e39bf78116c10 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 4 Aug 2020 10:45:09 -0700 Subject: [PATCH 2085/2522] Add build helper to mlir-hlo PiperOrigin-RevId: 324843181 Change-Id: I3ab290395883173c590651fbf3d2bfb2ae7cb9fc --- .../mlir/hlo/build_tools/build_mlir.sh | 52 +++++++++++++++++++ .../mlir/hlo/build_tools/llvm_version.txt | 2 + 2 files changed, 54 insertions(+) create mode 100755 tensorflow/compiler/mlir/hlo/build_tools/build_mlir.sh create mode 100644 tensorflow/compiler/mlir/hlo/build_tools/llvm_version.txt diff --git a/tensorflow/compiler/mlir/hlo/build_tools/build_mlir.sh b/tensorflow/compiler/mlir/hlo/build_tools/build_mlir.sh new file mode 100755 index 00000000000..5ccefb9416f --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/build_tools/build_mlir.sh @@ -0,0 +1,52 @@ +#!/bin/bash +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +if [[ $# -ne 2 ]] ; then + echo "Usage: $0 " + exit 1 +fi + +# LLVM source +LLVM_SRC_DIR="$1" +build_dir="$2" + +if ! [ -f "$LLVM_SRC_DIR/llvm/CMakeLists.txt" ]; then + echo "Expected the path to LLVM to be set correctly (got '$LLVM_SRC_DIR'): can't find CMakeLists.txt" + exit 1 +fi +echo "Using LLVM source dir: $LLVM_SRC_DIR" + +# Setup directories. +echo "Building MLIR in $build_dir" +mkdir -p "$build_dir" + +echo "Beginning build (commands will echo)" +set -x + +cmake -GNinja \ + "-H$LLVM_SRC_DIR/llvm" \ + "-B$build_dir" \ + -DLLVM_INSTALL_UTILS=ON \ + -DLLVM_ENABLE_LLD=ON \ + -DLLVM_ENABLE_PROJECTS=mlir \ + -DLLVM_TARGETS_TO_BUILD="X86;NVPTX;AMDGPU" \ + -DLLVM_INCLUDE_TOOLS=ON \ + -DLLVM_BUILD_TOOLS=OFF \ + -DLLVM_INCLUDE_TESTS=OFF \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DLLVM_ENABLE_ASSERTIONS=On + +cmake --build "$build_dir" --target all --target mlir-cpu-runner diff --git a/tensorflow/compiler/mlir/hlo/build_tools/llvm_version.txt b/tensorflow/compiler/mlir/hlo/build_tools/llvm_version.txt new file mode 100644 index 00000000000..0d5446142ec --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/build_tools/llvm_version.txt @@ -0,0 +1,2 @@ + + From e1da2cba6db064c83b827a775d92804e7783f99e Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Tue, 4 Aug 2020 10:47:39 -0700 Subject: [PATCH 2086/2522] Make kernel-gen-opt its own tool with a cc file. PiperOrigin-RevId: 324843789 Change-Id: I2ce05b73ea3ef6272ab131f7ca11d9cd301f2d59 --- .../compiler/mlir/tools/kernel_gen/BUILD | 8 +- .../tools/kernel-gen-opt/kernel-gen-opt.cc | 122 ++++++++++++++++++ .../mlir/tools/kernel_gen/transforms/BUILD | 55 +++----- .../transforms/embed_tf_framework_pass.cc | 2 +- .../mlir/tools/kernel_gen/transforms/passes.h | 13 +- .../kernel_gen/transforms/register_passes.cc | 27 ---- ...iptors.cc => shape_to_descriptors_pass.cc} | 15 +-- .../tf_framework_legalize_to_llvm_pass.cc | 2 +- 8 files changed, 165 insertions(+), 79 deletions(-) create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/tools/kernel-gen-opt/kernel-gen-opt.cc delete mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/transforms/register_passes.cc rename tensorflow/compiler/mlir/tools/kernel_gen/transforms/{shape_to_descriptors.cc => shape_to_descriptors_pass.cc} (88%) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD index b40d6cb3abf..066ca221d5d 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/BUILD +++ b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD @@ -59,12 +59,18 @@ tf_cc_binary( tf_cc_binary( name = "kernel-gen-opt", + srcs = ["tools/kernel-gen-opt/kernel-gen-opt.cc"], visibility = ["//tensorflow/compiler/mlir/tools/kernel_gen/tests:__pkg__"], deps = [ + "//tensorflow/compiler/mlir/hlo:hlo_dialect_registration", "//tensorflow/compiler/mlir/tools/kernel_gen/ir:tf_framework_dialect_registration", "//tensorflow/compiler/mlir/tools/kernel_gen/transforms:passes", - "@llvm-project//mlir:AllPassesAndDialects", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:AllPassesAndDialectsNoRegistration", + "@llvm-project//mlir:IR", "@llvm-project//mlir:MlirOptLib", "@llvm-project//mlir:MlirOptMain", + "@llvm-project//mlir:Pass", + "@llvm-project//mlir:Support", ], ) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tools/kernel-gen-opt/kernel-gen-opt.cc b/tensorflow/compiler/mlir/tools/kernel_gen/tools/kernel-gen-opt/kernel-gen-opt.cc new file mode 100644 index 00000000000..c1af35617b1 --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tools/kernel-gen-opt/kernel-gen-opt.cc @@ -0,0 +1,122 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/ToolOutputFile.h" +#include "mlir/IR/AsmState.h" // from @llvm-project +#include "mlir/IR/Dialect.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/InitAllDialects.h" // from @llvm-project +#include "mlir/InitAllPasses.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "mlir/Pass/PassManager.h" // from @llvm-project +#include "mlir/Support/FileUtilities.h" // from @llvm-project +#include "mlir/Support/MlirOptMain.h" // from @llvm-project +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/register.h" +#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h" + +// NOLINTNEXTLINE +static llvm::cl::opt inputFilename(llvm::cl::Positional, + llvm::cl::desc(""), + llvm::cl::init("-")); + +// NOLINTNEXTLINE +static llvm::cl::opt outputFilename( + "o", llvm::cl::desc("Output filename"), llvm::cl::value_desc("filename"), + llvm::cl::init("-")); + +// NOLINTNEXTLINE +static llvm::cl::opt splitInputFile( + "split-input-file", + llvm::cl::desc("Split the input file into pieces and process each " + "chunk independently"), + llvm::cl::init(false)); + +// NOLINTNEXTLINE +static llvm::cl::opt verifyDiagnostics( + "verify-diagnostics", + llvm::cl::desc("Check that emitted diagnostics match " + "expected-* lines on the corresponding line"), + llvm::cl::init(false)); + +// NOLINTNEXTLINE +static llvm::cl::opt verifyPasses( + "verify-each", + llvm::cl::desc("Run the verifier after each transformation pass"), + llvm::cl::init(true)); + +// NOLINTNEXTLINE +static llvm::cl::opt allowUnregisteredDialects( + "allow-unregistered-dialect", + llvm::cl::desc("Allow operation with no registered dialects"), + llvm::cl::init(false)); + +// NOLINTNEXTLINE +static llvm::cl::opt showDialects( + "show-dialects", llvm::cl::desc("Print the list of registered dialects"), + llvm::cl::init(false)); + +int main(int argc, char **argv) { + mlir::registerAllDialects(); + mlir::registerAllPasses(); + + mlir::mhlo::registerAllDialects(); + mlir::kernel_gen::registerKernelGenPasses(); + + llvm::InitLLVM y(argc, argv); + + // Register any pass manager command line options. + mlir::registerAsmPrinterCLOptions(); + mlir::registerPassManagerCLOptions(); + mlir::PassPipelineCLParser passPipeline("", "Compiler passes to run"); + + // Parse pass names in main to ensure static initialization completed. + llvm::cl::ParseCommandLineOptions(argc, argv, + "MLIR modular optimizer driver\n"); + + if (showDialects) { + mlir::MLIRContext context; + llvm::outs() << "Registered Dialects:\n"; + for (mlir::Dialect *dialect : context.getRegisteredDialects()) { + llvm::outs() << dialect->getNamespace() << "\n"; + } + return 0; + } + + // Set up the input file. + std::string errorMessage; + auto file = mlir::openInputFile(inputFilename, &errorMessage); + if (!file) { + llvm::errs() << errorMessage << "\n"; + return 1; + } + + auto output = mlir::openOutputFile(outputFilename, &errorMessage); + if (!output) { + llvm::errs() << errorMessage << "\n"; + exit(1); + } + + if (failed(MlirOptMain(output->os(), std::move(file), passPipeline, + splitInputFile, verifyDiagnostics, verifyPasses, + allowUnregisteredDialects))) { + return 1; + } + // Keep the output file if the invocation of MlirOptMain was successful. + output->keep(); + return 0; +} diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD index c0808ae08c4..0d346da9956 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD @@ -35,15 +35,31 @@ cc_library( ], ) +gentbl( + name = "tf_framework_passes_inc_gen", + tbl_outs = [("-gen-pass-decls -name KernelGen", "kernel_gen_passes.h.inc")], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "passes.td", + td_srcs = ["@llvm-project//mlir:PassBaseTdFiles"], +) + cc_library( - name = "shape_to_descriptors", - srcs = ["shape_to_descriptors.cc"], - hdrs = [ - "passes.h", + name = "passes", + srcs = [ + "embed_tf_framework_pass.cc", + "shape_to_descriptors_pass.cc", + "tf_framework_legalize_to_llvm_pass.cc", ], + hdrs = ["passes.h"], deps = [ + ":embed_tf_framework", + ":tf_framework_legalize_to_llvm", + ":tf_framework_passes_inc_gen", + "//tensorflow/compiler/mlir/tools/kernel_gen/ir:tf_framework_ops", "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", + "@llvm-project//mlir:LLVMDialect", + "@llvm-project//mlir:LLVMTransforms", "@llvm-project//mlir:Pass", "@llvm-project//mlir:SCFDialect", "@llvm-project//mlir:Shape", @@ -55,34 +71,3 @@ cc_library( "@llvm-project//mlir:Transforms", ], ) - -gentbl( - name = "tf_framework_passes_inc_gen", - tbl_outs = [("-gen-pass-decls -name TFFramework", "tf_framework_passes.h.inc")], - tblgen = "@llvm-project//mlir:mlir-tblgen", - td_file = "passes.td", - td_srcs = ["@llvm-project//mlir:PassBaseTdFiles"], -) - -cc_library( - name = "passes", - srcs = [ - "embed_tf_framework_pass.cc", - "register_passes.cc", - "tf_framework_legalize_to_llvm_pass.cc", - ], - hdrs = ["passes.h"], - deps = [ - ":embed_tf_framework", - ":shape_to_descriptors", - ":tf_framework_legalize_to_llvm", - ":tf_framework_passes_inc_gen", - "//tensorflow/compiler/mlir/tools/kernel_gen/ir:tf_framework_ops", - "@llvm-project//mlir:LLVMDialect", - "@llvm-project//mlir:LLVMTransforms", - "@llvm-project//mlir:Pass", - "@llvm-project//mlir:StandardOps", - "@llvm-project//mlir:Transforms", - ], - alwayslink = 1, -) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_tf_framework_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_tf_framework_pass.cc index 615c596e353..a0cfcae65d1 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_tf_framework_pass.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_tf_framework_pass.cc @@ -26,7 +26,7 @@ namespace tf_framework { namespace { #define GEN_PASS_CLASSES -#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_passes.h.inc" +#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/kernel_gen_passes.h.inc" static constexpr StringRef kTFEntry = "tf_entry"; diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h index 5e240b8d01c..13f367c9fe4 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h @@ -18,13 +18,10 @@ limitations under the License. #include +#include "mlir/IR/Module.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project + namespace mlir { - -class ModuleOp; -template -class OperationPass; -class Pass; - namespace kernel_gen { namespace tf_framework { @@ -47,6 +44,10 @@ namespace transforms { std::unique_ptr CreateShapeToDescriptorsPass(); } // namespace transforms + +#define GEN_PASS_REGISTRATION +#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/kernel_gen_passes.h.inc" + } // namespace kernel_gen } // namespace mlir diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/register_passes.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/register_passes.cc deleted file mode 100644 index 3a42d03355c..00000000000 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/register_passes.cc +++ /dev/null @@ -1,27 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h" - -namespace mlir { -namespace kernel_gen { -#define GEN_PASS_REGISTRATION -#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_passes.h.inc" - -bool register_all_passes = ([] { registerTFFrameworkPasses(); }(), true); - -} // namespace kernel_gen -} // namespace mlir diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/shape_to_descriptors.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/shape_to_descriptors_pass.cc similarity index 88% rename from tensorflow/compiler/mlir/tools/kernel_gen/transforms/shape_to_descriptors.cc rename to tensorflow/compiler/mlir/tools/kernel_gen/transforms/shape_to_descriptors_pass.cc index 32c2f9641b5..9c1b434b9b2 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/shape_to_descriptors.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/shape_to_descriptors_pass.cc @@ -24,21 +24,20 @@ limitations under the License. #include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project #include "mlir/IR/MLIRContext.h" // from @llvm-project #include "mlir/IR/PatternMatch.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Transforms/DialectConversion.h" // from @llvm-project -#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h" namespace mlir { namespace kernel_gen { namespace transforms { - namespace { -struct ShapeToDescriptorsPass - : public PassWrapper> { - public: - ShapeToDescriptorsPass() = default; +#define GEN_PASS_CLASSES +#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/kernel_gen_passes.h.inc" +struct ShapeToDescriptorsPass + : public ShapeToDescriptorsPassBase { + public: void runOnOperation() override { MLIRContext &ctx = getContext(); @@ -63,7 +62,7 @@ struct ShapeToDescriptorsPass } // namespace -std::unique_ptr CreateShapeToDescriptorsPass() { +std::unique_ptr > CreateShapeToDescriptorsPass() { return std::make_unique(); } diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm_pass.cc index 8439e1617e0..916eedb55de 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm_pass.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm_pass.cc @@ -28,7 +28,7 @@ namespace tf_framework { namespace { #define GEN_PASS_CLASSES -#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_passes.h.inc" +#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/kernel_gen_passes.h.inc" class TestTFFrameworkToLLVMPass : public TestTFFrameworkLegalizeToLLVMPassBase { From f1f65d45f70af76d62a019838a8d939969a74428 Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Tue, 4 Aug 2020 10:48:46 -0700 Subject: [PATCH 2087/2522] [tf.data service] Support restoring state from the journal on dispatcher startup. PiperOrigin-RevId: 324844108 Change-Id: I805c59ee6cf40a08fa5348c4232f92c0a6de8d3c --- .../core/data/service/dispatcher_impl.cc | 46 ++++++++++++++++--- .../core/data/service/dispatcher_impl.h | 11 ++++- .../core/data/service/dispatcher_state.cc | 3 +- .../core/data/service/dispatcher_state.h | 2 +- .../data/service/dispatcher_state_test.cc | 15 ++---- .../core/data/service/grpc_dispatcher_impl.cc | 13 +++--- .../core/data/service/grpc_dispatcher_impl.h | 2 + tensorflow/core/data/service/journal.cc | 4 -- tensorflow/core/data/service/journal.h | 12 ----- tensorflow/core/data/service/server_lib.cc | 4 ++ tensorflow/core/data/service/server_lib.h | 2 +- 11 files changed, 70 insertions(+), 44 deletions(-) diff --git a/tensorflow/core/data/service/dispatcher_impl.cc b/tensorflow/core/data/service/dispatcher_impl.cc index 4bc4d409fd7..4a3764ecea3 100644 --- a/tensorflow/core/data/service/dispatcher_impl.cc +++ b/tensorflow/core/data/service/dispatcher_impl.cc @@ -49,6 +49,10 @@ using Dataset = DispatcherState::Dataset; using NamedJobKey = DispatcherState::NamedJobKey; using Job = DispatcherState::Job; +std::string JournalDir(StringPiece work_dir) { + return io::JoinPath(work_dir, kJournalDir); +} + Status CreateWorkerStub(const std::string& address, const std::string& protocol, std::unique_ptr* stub) { ::grpc::ChannelArguments args; @@ -65,15 +69,35 @@ Status CreateWorkerStub(const std::string& address, const std::string& protocol, DataServiceDispatcherImpl::DataServiceDispatcherImpl( const experimental::DispatcherConfig& config) : config_(config) { - if (config_.work_dir().empty()) { - journal_writer_ = absl::make_unique(); - } else { - std::string journal_dir = io::JoinPath(config_.work_dir(), kJournalDir); - journal_writer_ = - absl::make_unique(Env::Default(), journal_dir); + if (!config_.work_dir().empty()) { + journal_writer_ = absl::make_unique( + Env::Default(), JournalDir(config_.work_dir())); } } +Status DataServiceDispatcherImpl::Start() { + if (config_.work_dir().empty()) { + return Status::OK(); + } + mutex_lock l(mu_); + Update update; + bool end_of_journal = false; + FileJournalReader reader(Env::Default(), JournalDir(config_.work_dir())); + Status s = reader.Read(&update, &end_of_journal); + if (errors::IsNotFound(s)) { + LOG(INFO) << "No journal found. Starting dispatcher from new state."; + return Status::OK(); + } + TF_RETURN_IF_ERROR(s); + LOG(INFO) << "Restoring dispatcher state from journal in " + << JournalDir(config_.work_dir()); + while (!end_of_journal) { + TF_RETURN_IF_ERROR(ApplyWithoutJournaling(update)); + TF_RETURN_IF_ERROR(reader.Read(&update, &end_of_journal)); + } + return Status::OK(); +} + Status DataServiceDispatcherImpl::RegisterWorker( const RegisterWorkerRequest* request, RegisterWorkerResponse* response) { VLOG(3) << "Received register worker request"; @@ -407,9 +431,17 @@ Status DataServiceDispatcherImpl::GetWorkers(const GetWorkersRequest* request, return Status::OK(); } +Status DataServiceDispatcherImpl::ApplyWithoutJournaling(const Update& update) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + return state_.Apply(update); +} + Status DataServiceDispatcherImpl::Apply(const Update& update) EXCLUSIVE_LOCKS_REQUIRED(mu_) { - return state_.Apply(update, journal_writer_.get()); + if (journal_writer_.has_value()) { + TF_RETURN_IF_ERROR(journal_writer_.value()->Write(update)); + } + return state_.Apply(update); } } // namespace data diff --git a/tensorflow/core/data/service/dispatcher_impl.h b/tensorflow/core/data/service/dispatcher_impl.h index 3e8b8dc6fbe..e39f3269d02 100644 --- a/tensorflow/core/data/service/dispatcher_impl.h +++ b/tensorflow/core/data/service/dispatcher_impl.h @@ -47,6 +47,10 @@ class DataServiceDispatcherImpl { explicit DataServiceDispatcherImpl( const experimental::DispatcherConfig& config); + // Starts the dispatcher. If there is a journal, this will read from the + // journal to restore the dispatcher's state. + Status Start(); + // See dispatcher.proto for API documentation. /// Worker-facing API. @@ -126,6 +130,10 @@ class DataServiceDispatcherImpl { EXCLUSIVE_LOCKS_REQUIRED(mu_); // Applies a state update, updating both the journal and the in-memory state. Status Apply(const Update& update) EXCLUSIVE_LOCKS_REQUIRED(mu_); + // Applies a state update, but doesn't update the journal. Only meant to be + // used when recovering state when the dispatcher starts. + Status ApplyWithoutJournaling(const Update& update) + EXCLUSIVE_LOCKS_REQUIRED(mu_); const experimental::DispatcherConfig& config_; @@ -143,7 +151,8 @@ class DataServiceDispatcherImpl { absl::flat_hash_map>> tasks_by_job_ TF_GUARDED_BY(mu_); - std::unique_ptr journal_writer_ TF_GUARDED_BY(mu_); + absl::optional> journal_writer_ + TF_GUARDED_BY(mu_); DispatcherState state_ TF_GUARDED_BY(mu_); TF_DISALLOW_COPY_AND_ASSIGN(DataServiceDispatcherImpl); diff --git a/tensorflow/core/data/service/dispatcher_state.cc b/tensorflow/core/data/service/dispatcher_state.cc index f22672c4363..64be7fbc54e 100644 --- a/tensorflow/core/data/service/dispatcher_state.cc +++ b/tensorflow/core/data/service/dispatcher_state.cc @@ -25,8 +25,7 @@ namespace data { DispatcherState::DispatcherState() {} -Status DispatcherState::Apply(Update update, JournalWriter* journal_writer) { - TF_RETURN_IF_ERROR(journal_writer->Write(update)); +Status DispatcherState::Apply(Update update) { switch (update.update_type_case()) { case Update::kRegisterDataset: RegisterDataset(update.register_dataset()); diff --git a/tensorflow/core/data/service/dispatcher_state.h b/tensorflow/core/data/service/dispatcher_state.h index 1959afa61eb..e54f51ba499 100644 --- a/tensorflow/core/data/service/dispatcher_state.h +++ b/tensorflow/core/data/service/dispatcher_state.h @@ -61,7 +61,7 @@ class DispatcherState { DispatcherState& operator=(const DispatcherState&) = delete; // Applies the given update to the dispatcher's state. - Status Apply(Update update, JournalWriter* journal_writer); + Status Apply(Update update); // A dataset registered with the dispatcher. struct Dataset { diff --git a/tensorflow/core/data/service/dispatcher_state_test.cc b/tensorflow/core/data/service/dispatcher_state_test.cc index e1fd47805a7..02961d5bd1d 100644 --- a/tensorflow/core/data/service/dispatcher_state_test.cc +++ b/tensorflow/core/data/service/dispatcher_state_test.cc @@ -28,31 +28,28 @@ namespace data { namespace { Status RegisterDatasetWithIdAndFingerprint(int64 id, uint64 fingerprint, DispatcherState* state) { - NoopJournalWriter journal_writer; Update update; RegisterDatasetUpdate* register_dataset = update.mutable_register_dataset(); register_dataset->set_dataset_id(id); register_dataset->set_fingerprint(fingerprint); - TF_RETURN_IF_ERROR(state->Apply(update, &journal_writer)); + TF_RETURN_IF_ERROR(state->Apply(update)); return Status::OK(); } Status CreateAnonymousJob(int64 job_id, int64 dataset_id, DispatcherState* state) { - NoopJournalWriter journal_writer; Update update; CreateJobUpdate* create_job = update.mutable_create_job(); create_job->set_job_id(job_id); create_job->set_dataset_id(dataset_id); create_job->set_processing_mode(ProcessingModeDef::PARALLEL_EPOCHS); - TF_RETURN_IF_ERROR(state->Apply(update, &journal_writer)); + TF_RETURN_IF_ERROR(state->Apply(update)); return Status::OK(); } Status CreateNamedJob(int64 job_id, int64 dataset_id, DispatcherState::NamedJobKey named_job_key, DispatcherState* state) { - NoopJournalWriter journal_writer; Update update; CreateJobUpdate* create_job = update.mutable_create_job(); create_job->set_job_id(job_id); @@ -61,16 +58,15 @@ Status CreateNamedJob(int64 job_id, int64 dataset_id, NamedJobKeyDef* key = create_job->mutable_named_job_key(); key->set_name(named_job_key.name); key->set_index(named_job_key.index); - TF_RETURN_IF_ERROR(state->Apply(update, &journal_writer)); + TF_RETURN_IF_ERROR(state->Apply(update)); return Status::OK(); } Status FinishJob(int64 job_id, DispatcherState* state) { - NoopJournalWriter journal_writer; Update update; FinishJobUpdate* finish_job = update.mutable_finish_job(); finish_job->set_job_id(job_id); - TF_RETURN_IF_ERROR(state->Apply(update, &journal_writer)); + TF_RETURN_IF_ERROR(state->Apply(update)); return Status::OK(); } } // namespace @@ -117,10 +113,9 @@ TEST(DispatcherState, NextAvailableDatasetId) { } TEST(DispatcherState, UnknownUpdate) { - NoopJournalWriter journal_writer; DispatcherState state; Update update; - Status s = state.Apply(update, &journal_writer); + Status s = state.Apply(update); EXPECT_EQ(s.code(), error::INTERNAL); } diff --git a/tensorflow/core/data/service/grpc_dispatcher_impl.cc b/tensorflow/core/data/service/grpc_dispatcher_impl.cc index a26164ed48f..f62b487fcdf 100644 --- a/tensorflow/core/data/service/grpc_dispatcher_impl.cc +++ b/tensorflow/core/data/service/grpc_dispatcher_impl.cc @@ -24,7 +24,6 @@ namespace data { using ::grpc::ServerBuilder; using ::grpc::ServerContext; -using ::grpc::Status; GrpcDispatcherImpl::GrpcDispatcherImpl( ServerBuilder* server_builder, const experimental::DispatcherConfig& config) @@ -33,11 +32,13 @@ GrpcDispatcherImpl::GrpcDispatcherImpl( VLOG(1) << "Registered data service dispatcher"; } -#define HANDLER(method) \ - Status GrpcDispatcherImpl::method(ServerContext* context, \ - const method##Request* request, \ - method##Response* response) { \ - return ToGrpcStatus(impl_.method(request, response)); \ +Status GrpcDispatcherImpl::Start() { return impl_.Start(); } + +#define HANDLER(method) \ + grpc::Status GrpcDispatcherImpl::method(ServerContext* context, \ + const method##Request* request, \ + method##Response* response) { \ + return ToGrpcStatus(impl_.method(request, response)); \ } HANDLER(RegisterWorker); HANDLER(WorkerUpdate); diff --git a/tensorflow/core/data/service/grpc_dispatcher_impl.h b/tensorflow/core/data/service/grpc_dispatcher_impl.h index 24bf2d79061..1810c3fb6ac 100644 --- a/tensorflow/core/data/service/grpc_dispatcher_impl.h +++ b/tensorflow/core/data/service/grpc_dispatcher_impl.h @@ -39,6 +39,8 @@ class GrpcDispatcherImpl : public DispatcherService::Service { const experimental::DispatcherConfig& config); ~GrpcDispatcherImpl() override {} + Status Start(); + #define HANDLER(method) \ grpc::Status method(grpc::ServerContext* context, \ const method##Request* request, \ diff --git a/tensorflow/core/data/service/journal.cc b/tensorflow/core/data/service/journal.cc index 6856c69deb3..a9aa43b9758 100644 --- a/tensorflow/core/data/service/journal.cc +++ b/tensorflow/core/data/service/journal.cc @@ -61,10 +61,6 @@ Status FileJournalWriter::Write(Update update) { return Status::OK(); } -NoopJournalWriter::NoopJournalWriter() {} - -Status NoopJournalWriter::Write(Update update) { return Status::OK(); } - FileJournalReader::FileJournalReader(Env* env, StringPiece journal_dir) : env_(env), journal_dir_(journal_dir) {} diff --git a/tensorflow/core/data/service/journal.h b/tensorflow/core/data/service/journal.h index f5b3e26ba18..112c3b614be 100644 --- a/tensorflow/core/data/service/journal.h +++ b/tensorflow/core/data/service/journal.h @@ -58,18 +58,6 @@ class FileJournalWriter : public JournalWriter { std::unique_ptr writer_; }; -// NoopJournalWriter implements the JournalWriter interface, but doesn't -// actually write journal entries anywhere. -class NoopJournalWriter : public JournalWriter { - public: - // Creates a journal writer which does nothing. - explicit NoopJournalWriter(); - NoopJournalWriter(const NoopJournalWriter&) = delete; - NoopJournalWriter& operator=(const NoopJournalWriter&) = delete; - - Status Write(Update update) override; -}; - // Interface for reading from a journal. class JournalReader { public: diff --git a/tensorflow/core/data/service/server_lib.cc b/tensorflow/core/data/service/server_lib.cc index 648a189717e..751fa6ca2a8 100644 --- a/tensorflow/core/data/service/server_lib.cc +++ b/tensorflow/core/data/service/server_lib.cc @@ -82,6 +82,10 @@ void DispatchGrpcDataServer::AddServiceToBuilder(grpc::ServerBuilder* builder) { service_ = absl::make_unique(builder, config_).release(); } +Status DispatchGrpcDataServer::StartServiceInternal() { + return service_->Start(); +} + Status DispatchGrpcDataServer::NumWorkers(int* num_workers) { GetWorkersRequest req; GetWorkersResponse resp; diff --git a/tensorflow/core/data/service/server_lib.h b/tensorflow/core/data/service/server_lib.h index 365241753fb..2c300947f63 100644 --- a/tensorflow/core/data/service/server_lib.h +++ b/tensorflow/core/data/service/server_lib.h @@ -81,7 +81,7 @@ class DispatchGrpcDataServer : public GrpcDataServerBase { protected: void AddServiceToBuilder(grpc::ServerBuilder* builder) override; - Status StartServiceInternal() override { return Status::OK(); } + Status StartServiceInternal() override; private: const experimental::DispatcherConfig config_; From 0907c6e0547b9bdd2bfde9acdaea33392c45617b Mon Sep 17 00:00:00 2001 From: Katherine Wu Date: Tue, 4 Aug 2020 10:54:17 -0700 Subject: [PATCH 2088/2522] Add explanation for v1-only tests (training_ops_tests). PiperOrigin-RevId: 324845593 Change-Id: I0c167e3a90514e83740e84f032917df62a1fddf6 --- .../python/training/training_ops_test.py | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/training/training_ops_test.py b/tensorflow/python/training/training_ops_test.py index 118636c551e..3dd1283c924 100644 --- a/tensorflow/python/training/training_ops_test.py +++ b/tensorflow/python/training/training_ops_test.py @@ -60,7 +60,8 @@ class TrainingOpsTest(TensorFlowTestCase): self.assertShapeEqual(out, apply_sgd) self.assertAllCloseAccordingToType(x - alpha * delta, out) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("ApplyGradientDescent op returns a ref, so it is not " + "supported in eager mode.") def testApplyGradientDescent(self): for (dtype, use_gpu) in itertools.product( [np.float16, np.float32, np.float64], [False, True]): @@ -184,7 +185,8 @@ class TrainingOpsTest(TensorFlowTestCase): self.assertAllClose(linear_update, self.evaluate(linear)) self.assertAllClose(expected_out, out) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("ApplyAdagrad op returns a ref, so it is not " + "supported in eager mode.") def testApplyAdagrad(self): for (dtype, use_gpu) in itertools.product( [np.float16, np.float32, np.float64], [False, True]): @@ -194,7 +196,8 @@ class TrainingOpsTest(TensorFlowTestCase): grad = np.arange(100).astype(dtype) self._testTypesForAdagrad(x, y, lr, grad, use_gpu) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("ApplyFtrl op returns a ref, so it is not " + "supported in eager mode.") def testApplyFtrl(self): for dtype in [np.float16, np.float32, np.float64]: x = np.arange(100).astype(dtype) @@ -206,7 +209,8 @@ class TrainingOpsTest(TensorFlowTestCase): grad = np.arange(100).astype(dtype) self._testTypesForFtrl(x, y, z, lr, grad, use_gpu=False, l1=l1, l2=l2) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("ApplyFtrlMultiplyLinearByLr op returns a ref, so it " + "is not supported in eager mode.") def testApplyFtrlMultiplyLinearByLr(self): for dtype in [np.float16, np.float32, np.float64]: x = np.arange(100).astype(dtype) @@ -320,7 +324,8 @@ class TrainingOpsTest(TensorFlowTestCase): self.assertAllCloseAccordingToType(y[index] + grad[i] * grad[i], self.evaluate(accum)[index]) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("SparseApplyAdagrad op returns a ref, so it is not " + "supported in eager mode.") def testSparseApplyAdagrad(self): for (dtype, index_type) in itertools.product( [np.float16, np.float32, np.float64], [np.int32, np.int64]): @@ -334,7 +339,8 @@ class TrainingOpsTest(TensorFlowTestCase): indices = np.array([0, 2]).astype(index_type) self._testTypesForSparseAdagrad(x, y, lr, grad, indices) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("SparseApplyAdagrad op returns a ref, so it is not " + "supported in eager mode.") def testSparseApplyAdagradDim1(self): for (dtype, index_type) in itertools.product( [np.float16, np.float32, np.float64], [np.int32, np.int64]): @@ -348,7 +354,8 @@ class TrainingOpsTest(TensorFlowTestCase): indices = np.array([0, 2]).astype(index_type) self._testTypesForSparseAdagrad(x, y, lr, grad, indices) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("SparseApplyFtrl op returns a ref, so it is not " + "supported in eager mode.") def testSparseApplyFtrlDim1(self): for (dtype, index_type) in itertools.product( [np.float16, np.float32, np.float64], [np.int32, np.int64]): @@ -364,7 +371,8 @@ class TrainingOpsTest(TensorFlowTestCase): indices = np.array([0, 2]).astype(index_type) self._testTypesForSparseFtrl(x, y, z, lr, grad, indices) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("SparseApplyFtrlMultiplyLinearByLr op returns a ref, " + "so it is not supported in eager mode.") def testSparseApplyFtrlMultiplyLinearByLrDim1(self): for (dtype, index_type) in itertools.product([np.float16, np.float32, np.float64], @@ -381,7 +389,8 @@ class TrainingOpsTest(TensorFlowTestCase): indices = np.array([0, 2]).astype(index_type) self._testTypesForSparseFtrlMultiplyLinearByLr(x, y, z, lr, grad, indices) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("ApplyAdam op returns a ref, so it is not " + "supported in eager mode.") def testApplyAdam(self): for dtype, use_gpu in itertools.product( [np.float16, np.float32, np.float64], [False, True]): From bafcc6de33d1c5e976777e425b90a23b3e609d5d Mon Sep 17 00:00:00 2001 From: Haoyu Zhang Date: Tue, 4 Aug 2020 11:00:20 -0700 Subject: [PATCH 2089/2522] Disable client_test on windows. PiperOrigin-RevId: 324847100 Change-Id: I1181459063c65cbbecd7aa15ab9ede12cbb30d69 --- tensorflow/python/distribute/client/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/distribute/client/BUILD b/tensorflow/python/distribute/client/BUILD index 35d8de95276..d37d855a390 100644 --- a/tensorflow/python/distribute/client/BUILD +++ b/tensorflow/python/distribute/client/BUILD @@ -49,6 +49,7 @@ tf_py_test( srcs = ["client_test.py"], python_version = "PY3", shard_count = 12, + tags = ["no_windows"], # TODO(b/162751266) deps = [ ":client", "//tensorflow/python:client_testlib", From 7a2383c18f2f5be28d9c70e6362911d2c011f729 Mon Sep 17 00:00:00 2001 From: Pete Warden Date: Tue, 4 Aug 2020 11:16:42 -0700 Subject: [PATCH 2090/2522] Improve error catching for downloading PiperOrigin-RevId: 324850944 Change-Id: I1f00bf4ab6ea0e4f96e391b8129b8df4126755a2 --- tensorflow/lite/micro/tools/make/download_and_extract.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/lite/micro/tools/make/download_and_extract.sh b/tensorflow/lite/micro/tools/make/download_and_extract.sh index fa5e57dd91a..e72fd7a0184 100755 --- a/tensorflow/lite/micro/tools/make/download_and_extract.sh +++ b/tensorflow/lite/micro/tools/make/download_and_extract.sh @@ -171,12 +171,20 @@ download_and_extract() { # loop to attempt to recover from them. for (( i=1; i<=$curl_retries; ++i )) do + # We have to use this approach because we normally halt the script when + # there's an error, and instead we want to catch errors so we can retry. + set +e curl -Ls --fail --retry 5 "${url}" > ${tempfile} CURL_RESULT=$? + set -e + + # Was the command successful? If so, continue. if [[ $CURL_RESULT -eq 0 ]] then break fi + + # Keep trying if we see the '56' error code. if [[ ( $CURL_RESULT -ne 56 ) || ( $i -eq $curl_retries ) ]] then echo "Error $CURL_RESULT downloading '${url}'" From 4e03f13e6a7c8fc760abb5badc32d7f6557f73c7 Mon Sep 17 00:00:00 2001 From: Yujing Zhang Date: Tue, 4 Aug 2020 11:17:45 -0700 Subject: [PATCH 2091/2522] Support packed TensorHandles on CustomDevices. PiperOrigin-RevId: 324851163 Change-Id: I017665b5e2bd90da37f4d45aa94e1278be7bd2f7 --- tensorflow/core/common_runtime/eager/tensor_handle.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.cc b/tensorflow/core/common_runtime/eager/tensor_handle.cc index 12bd70d705d..d7b2ef4be1e 100644 --- a/tensorflow/core/common_runtime/eager/tensor_handle.cc +++ b/tensorflow/core/common_runtime/eager/tensor_handle.cc @@ -316,8 +316,7 @@ Status TensorHandle::CreatePackedHandle(std::vector&& handles, std::vector devices; for (auto* handle : handles) { if (VariantDeviceIsCustom(handle->device())) { - return errors::InvalidArgument( - "CustomDevice is not supported for packing."); + devices.push_back(absl::get(handle->device())->name()); } else { devices.push_back(handle->op_device() ? handle->op_device()->name() : ctx->HostCPU()->name()); From 6acd86d539464b611d37b8dc13251fafab25fb5c Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Tue, 4 Aug 2020 11:18:29 -0700 Subject: [PATCH 2092/2522] [TF2XLA] Make tf.argmin stable on XLA:TPU PiperOrigin-RevId: 324851314 Change-Id: Icdecbe87c545d4254bcdb508f76e31de30bc8f86 --- tensorflow/compiler/tf2xla/kernels/index_ops.cc | 2 +- tensorflow/python/eager/def_function_xla_jit_test.py | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/index_ops.cc b/tensorflow/compiler/tf2xla/kernels/index_ops.cc index 31637d9d8a0..df6d9b475dc 100644 --- a/tensorflow/compiler/tf2xla/kernels/index_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/index_ops.cc @@ -71,7 +71,7 @@ void XlaArgMinMaxOp::Compile(XlaOpKernelContext* ctx) { if (is_gpu_) { output = xla::ArgMinTwoPass(input, index_xla_type, axis); } else { - output = xla::ArgMin(input, index_xla_type, axis); + output = xla::ArgMin(input, index_xla_type, axis, /*stable=*/true); } } else { if (is_gpu_) { diff --git a/tensorflow/python/eager/def_function_xla_jit_test.py b/tensorflow/python/eager/def_function_xla_jit_test.py index b9457159217..813c1377cd9 100644 --- a/tensorflow/python/eager/def_function_xla_jit_test.py +++ b/tensorflow/python/eager/def_function_xla_jit_test.py @@ -293,9 +293,6 @@ class DefFunctionTest(xla_test.XLATestCase): @test_util.disable_mlir_bridge('TODO(b/162271237): argmax gives different' ' results in MLIR-based bridge') def testArgMinMax(self): - if 'tpu' in self.device.lower(): - self.skipTest('b/162800904: Tie resolution is wrong on TPU for tf.func') - with ops.device('device:{}:0'.format(self.device)): @def_function.function(experimental_compile=True) From 8c5f64c993b2a6a24335664db9891c8b7ecb2c73 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Tue, 4 Aug 2020 18:40:33 +0000 Subject: [PATCH 2093/2522] fix build --- tensorflow/core/kernels/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 589165fcd2f..47c7d41d0fe 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2960,6 +2960,7 @@ tf_kernel_library( ":fill_functor", ":tensor_map", "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", "//tensorflow/core:lib", "//third_party/eigen3", ], From adc09622af25ab8b48ef5f18ed25d18c50db8e13 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Mon, 3 Aug 2020 10:33:01 -0700 Subject: [PATCH 2094/2522] Fix python math doc Run tests --- tensorflow/python/ops/math_ops.py | 46 ++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 0a16c18f7b2..d5c3650916b 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -367,8 +367,17 @@ def abs(x, name=None): # pylint: disable=redefined-builtin Given a tensor `x` of complex numbers, this operation returns a tensor of type `float32` or `float64` that is the absolute value of each element in `x`. For a complex number \\(a + bj\\), its absolute value is computed as - \\(\sqrt{a^2 + b^2}\\). For example: + \\(\sqrt{a^2 + b^2}\\). + For example: + + >>> # real number + >>> x = tf.constant([-2.25, 3.25]) + >>> tf.abs(x) + + + >>> # complex number >>> x = tf.constant([[-2.25 + 4.75j], [-3.25 + 5.75j]]) >>> tf.abs(x) 0. + `y = sign(x) = -1 if x < 0; 0 if x == 0; 1 if x > 0`. - For complex numbers, y = sign(x) = x / |x| if x != 0, otherwise y = 0. + For complex numbers, `y = sign(x) = x / |x| if x != 0, otherwise y = 0`. Example usage: + >>> # real number >>> tf.math.sign([0., 2., -3.]) - + + + >>> # complex number + >>> tf.math.sign([1 + 1j, 0 + 0j]) + Args: x: A Tensor. Must be one of the following types: bfloat16, half, float32, @@ -708,7 +724,7 @@ def sign(x, name=None): tf.math.sign(x.values, ...), x.dense_shape). """ x = ops.convert_to_tensor(x) - if x.dtype in (dtypes.complex64, dtypes.complex128): + if x.dtype.is_complex: return gen_math_ops.div_no_nan( x, cast( @@ -3615,9 +3631,9 @@ def _accumulate_n_grad(op, grad): def sigmoid(x, name=None): r"""Computes sigmoid of `x` element-wise. - Formula for calculating sigmoid(x): `y = 1 / (1 + exp(-x))`. + Formula for calculating $\mathrm{sigmoid}(x) = y = 1 / (1 + \exp(-x))$. - For x \in (-inf, inf) => sigmoid(x) \in (0, 1) + For $x \in (-\infty, \infty)$, $\mathrm{sigmoid}(x) \in (0, 1)$. Example Usage: @@ -4568,12 +4584,12 @@ def polyval(coeffs, x, name=None): If `x` is a tensor and `coeffs` is a list n + 1 tensors, this function returns the value of the n-th order polynomial - p(x) = coeffs[n-1] + coeffs[n-2] * x + ... + coeffs[0] * x**(n-1) + `p(x) = coeffs[n-1] + coeffs[n-2] * x + ... + coeffs[0] * x**(n-1)` evaluated using Horner's method, i.e. - p(x) = coeffs[n-1] + x * (coeffs[n-2] + ... + x * (coeffs[1] + - x * coeffs[0])) + `p(x) = coeffs[n-1] + x * (coeffs[n-2] + ... + x * (coeffs[1] + + x * coeffs[0]))` Usage Example: @@ -4820,10 +4836,14 @@ def exp(x, name=None): numpy=array([ 7.389056, 2980.958 ], dtype=float32)> For complex numbers, the exponential value is calculated as - \\(e^{x+iy}={e^x}{e^{iy}}={e^x}{\\cos(y)+i\\sin(y)}\\) + $$ + e^{x+iy} = {e^x} {e^{iy}} = {e^x} ({\cos (y) + i \sin (y)}) + $$ For `1+1j` the value would be computed as: - \\(e^1{\\cos(1)+i\\sin(1)} = 2.7182817 \\times (0.5403023+0.84147096j)\\) + $$ + e^1 (\cos (1) + i \sin (1)) = 2.7182817 \times (0.5403023+0.84147096j) + $$ >>> x = tf.constant(1 + 1j) >>> tf.math.exp(x) From e1e264594584d740ce5b077d92d58ca167318f22 Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Tue, 4 Aug 2020 11:56:38 -0700 Subject: [PATCH 2095/2522] Put distribution and cluster parameter modifiers in their own combination It's easier to add more strategy/cluster modifiers. PiperOrigin-RevId: 324859132 Change-Id: Ided1a3d1106d71e86335ca3c59d7698550fa2155 --- .../integration_tests/saved_model_test.py | 4 +- tensorflow/python/distribute/combinations.py | 43 +++++++++---------- .../python/distribute/combinations_test.py | 14 +++--- tensorflow/python/distribute/vars_test.py | 31 ++++++------- 4 files changed, 42 insertions(+), 50 deletions(-) diff --git a/tensorflow/examples/saved_model/integration_tests/saved_model_test.py b/tensorflow/examples/saved_model/integration_tests/saved_model_test.py index 6333e55999e..434d5ed4ad5 100644 --- a/tensorflow/examples/saved_model/integration_tests/saved_model_test.py +++ b/tensorflow/examples/saved_model/integration_tests/saved_model_test.py @@ -90,8 +90,8 @@ class SavedModelTest(scripts.TestCase, parameterized.TestCase): retrain_flag_value=["true", "false"], regularization_loss_multiplier=[None, 2], # Test for b/134528831. )), - test_combinations=(distribute_combinations.NamedGPUCombination(), - distribute_combinations.NamedTPUCombination())) + test_combinations=(distribute_combinations.GPUCombination(), + distribute_combinations.TPUCombination())) @combinations.generate(**TEST_MNIST_CNN_GENERATE_KWARGS) def test_mnist_cnn(self, use_keras_save_api, named_strategy, diff --git a/tensorflow/python/distribute/combinations.py b/tensorflow/python/distribute/combinations.py index 17bc285b222..3856b6fd132 100644 --- a/tensorflow/python/distribute/combinations.py +++ b/tensorflow/python/distribute/combinations.py @@ -99,7 +99,24 @@ class ClusterParameters(combinations_lib.ParameterModifier): return update -class NamedGPUCombination(combinations_lib.TestCombination): +class DistributionCombination(combinations_lib.TestCombination): + """Sets up distribution strategy for tests.""" + + def parameter_modifiers(self): + return [ + DistributionParameter(), + combinations_lib.OptionalParameter("use_var_policy"), + ] + + +class ClusterCombination(combinations_lib.TestCombination): + """Sets up multi worker tests.""" + + def parameter_modifiers(self): + return [ClusterParameters()] + + +class GPUCombination(combinations_lib.TestCombination): """Enable tests to request GPU hardware and skip non-GPU combinations. This class expects test_combinations to be generated with `NamedDistribution` @@ -141,17 +158,7 @@ class NamedGPUCombination(combinations_lib.TestCombination): return [combinations_lib.OptionalParameter("required_gpus")] -class GPUCombination(NamedGPUCombination): - """NamedGPUCombination that passes `tf.distribute.Strategy` to the tests.""" - - def parameter_modifiers(self): - return [ - ClusterParameters(), - DistributionParameter(), - ] + NamedGPUCombination.parameter_modifiers(self) - - -class NamedTPUCombination(combinations_lib.TestCombination): +class TPUCombination(combinations_lib.TestCombination): """Allow to request TPU hardware and skip non-TPU combinations. This class expects test_combinations to be generated with `NamedDistribution` @@ -213,16 +220,6 @@ class NamedTPUCombination(combinations_lib.TestCombination): ] -class TPUCombination(NamedTPUCombination): - """NamedTPUCombination that passes `tf.distribute.Strategy` to the tests.""" - - def parameter_modifiers(self): - return [ - ClusterParameters(), - DistributionParameter(), - ] + NamedTPUCombination.parameter_modifiers(self) - - class NamedDistribution(object): """Wraps a `tf.distribute.Strategy` and adds a name for test titles.""" @@ -304,6 +301,8 @@ def generate(combinations, test_combinations=()): default_combinations = ( framework_combinations.EagerGraphCombination(), framework_combinations.TFVersionCombination(), + ClusterCombination(), + DistributionCombination(), GPUCombination(), TPUCombination(), ) diff --git a/tensorflow/python/distribute/combinations_test.py b/tensorflow/python/distribute/combinations_test.py index 6d9d0b2570f..3fc3735d560 100644 --- a/tensorflow/python/distribute/combinations_test.py +++ b/tensorflow/python/distribute/combinations_test.py @@ -30,7 +30,7 @@ from tensorflow.python.framework import combinations as framework_combinations from tensorflow.python.platform import test -class ClusterParametersTest(test.TestCase, parameterized.TestCase): +class ClusterCombinationTest(test.TestCase, parameterized.TestCase): # For this test we need to use `framework.test_combinations` because our # `generate` eats the cluster parameters. # @@ -42,7 +42,7 @@ class ClusterParametersTest(test.TestCase, parameterized.TestCase): combinations.NamedDistribution( "HasClusterParams", lambda: None, has_chief=True, num_workers=2), ]), - test_combinations=(combinations.GPUCombination(),)) + test_combinations=(combinations.ClusterCombination(),)) def testClusterParams(self, distribution, has_chief, num_workers): self.assertTrue(has_chief) self.assertEqual(num_workers, 2) @@ -51,14 +51,14 @@ class ClusterParametersTest(test.TestCase, parameterized.TestCase): framework_combinations.combine(distribution=[ combinations.NamedDistribution("NoClusterParams", lambda: None), ]), - test_combinations=(combinations.GPUCombination(),)) + test_combinations=(combinations.ClusterCombination(),)) def testClusterParamsHasDefault(self, distribution, has_chief, num_workers): self.assertFalse(has_chief) self.assertEqual(num_workers, 1) @framework_combinations.generate( framework_combinations.combine(v=1), - test_combinations=(combinations.GPUCombination(),)) + test_combinations=(combinations.ClusterCombination(),)) def testClusterParamsNoStrategy(self, v, has_chief, num_workers): self.assertFalse(has_chief) self.assertEqual(num_workers, 1) @@ -69,7 +69,7 @@ class ClusterParametersTest(test.TestCase, parameterized.TestCase): "WithClusterParams", lambda: None, has_chief=True, num_workers=2), combinations.NamedDistribution("WithoutClusterParams", lambda: None), ]), - test_combinations=(combinations.GPUCombination(),)) + test_combinations=(combinations.ClusterCombination(),)) def testClusterParamsAreOptional(self, distribution): # If combinations library doesn't raise an exception, the test is passed. pass @@ -83,7 +83,7 @@ class ClusterParametersTest(test.TestCase, parameterized.TestCase): ds3=combinations.NamedDistribution( "Strategy3", lambda: None, has_chief=True, num_workers=0), ), - test_combinations=(combinations.GPUCombination(),)) + test_combinations=(combinations.ClusterCombination(),)) def testMultipleDistributionSingleWorker(self, ds1, ds2, ds3): # If combinations library doesn't raise an exception, the test is passed. pass @@ -101,7 +101,7 @@ class ClusterParametersShouldFailTest(test.TestCase, parameterized.TestCase): ds2=combinations.NamedDistribution( "Strategy2", lambda: None, has_chief=True, num_workers=2), ), - test_combinations=(combinations.GPUCombination(),)) + test_combinations=(combinations.ClusterCombination(),)) def testMultipleDistributionMultiWorker(self, ds1, ds2): # combinations library should raise an exception. pass diff --git a/tensorflow/python/distribute/vars_test.py b/tensorflow/python/distribute/vars_test.py index efbb6c23aaa..a8605a3f2da 100644 --- a/tensorflow/python/distribute/vars_test.py +++ b/tensorflow/python/distribute/vars_test.py @@ -95,8 +95,7 @@ class OnWriteVariableSync(test.TestCase, parameterized.TestCase): sess.run({"complicated": mirrored}) @combinations.generate(strategy_and_run_tf_function_combinations()) - def testAssign(self, distribution, experimental_run_tf_function, - use_var_policy): + def testAssign(self, distribution, experimental_run_tf_function): def assign(fn, v, update_value, cross_replica): update_fn = lambda: getattr(v, fn)(update_value) @@ -136,8 +135,7 @@ class OnWriteVariableSync(test.TestCase, parameterized.TestCase): self.evaluate(array_ops.ones_like(component))) @combinations.generate(strategy_and_run_tf_function_combinations()) - def testAssignOnWriteVar(self, distribution, experimental_run_tf_function, - use_var_policy): + def testAssignOnWriteVar(self, distribution, experimental_run_tf_function): with distribution.scope(): v_to_assign = variable_scope.variable( @@ -182,8 +180,7 @@ class OnWriteVariableSync(test.TestCase, parameterized.TestCase): self.assertAllEqual(2.0, self.evaluate(component.read_value())) @combinations.generate(strategy_and_run_tf_function_combinations()) - def testAssignPerReplicaVal(self, distribution, experimental_run_tf_function, - use_var_policy): + def testAssignPerReplicaVal(self, distribution, experimental_run_tf_function): if isinstance(distribution, _TPU_STRATEGIES): self.skipTest("Assigning PerReplica values is not supported. See" @@ -241,7 +238,7 @@ class OnWriteVariableSync(test.TestCase, parameterized.TestCase): self.assertAllEqual(expected, self.evaluate(component.read_value())) @combinations.generate(strategy_with_var_policy()) - def testValueInReplicaContext(self, distribution, use_var_policy): + def testValueInReplicaContext(self, distribution): with distribution.scope(): v = variables_lib.Variable( 1., aggregation=variables_lib.VariableAggregation.MEAN) @@ -260,8 +257,7 @@ class OnWriteVariableSync(test.TestCase, parameterized.TestCase): @combinations.generate(strategy_and_run_tf_function_combinations()) def testReadValueInReplicaContext(self, distribution, - experimental_run_tf_function, - use_var_policy): + experimental_run_tf_function): aggregations = [ variables_lib.VariableAggregation.NONE, variables_lib.VariableAggregation.SUM, @@ -286,8 +282,7 @@ class OnWriteVariableSync(test.TestCase, parameterized.TestCase): @combinations.generate(strategy_and_run_tf_function_combinations()) def testReadValueInCrossReplicaContext(self, distribution, - experimental_run_tf_function, - use_var_policy): + experimental_run_tf_function): aggregations = [ variables_lib.VariableAggregation.NONE, variables_lib.VariableAggregation.SUM, @@ -312,7 +307,7 @@ class OnWriteVariableSync(test.TestCase, parameterized.TestCase): self.evaluate(results)) @combinations.generate(strategy_with_var_policy()) - def testAssignOutOfScope(self, distribution, use_var_policy): + def testAssignOutOfScope(self, distribution): with distribution.scope(): mirrored = variables_lib.Variable(1.) self.evaluate(mirrored.assign(3.)) @@ -321,8 +316,7 @@ class OnWriteVariableSync(test.TestCase, parameterized.TestCase): self.assertEqual(self.evaluate(component.read_value()), 3.) @combinations.generate(strategy_with_var_policy()) - def testAssignAggregationMeanDTypeNonFloat(self, distribution, - use_var_policy): + def testAssignAggregationMeanDTypeNonFloat(self, distribution): if isinstance(distribution, _TPU_STRATEGIES): self.skipTest("Fix sponge/6e8ab540-4c0f-4da5-aedf-86505ff810c9 before " "reenabling test.") @@ -379,8 +373,7 @@ class OnWriteVariableSync(test.TestCase, parameterized.TestCase): self.assertEqual(self.evaluate(v.read_value()), 4) @combinations.generate(strategy_with_var_policy()) - def testInitializedToSameValueInsideEagerRun(self, distribution, - use_var_policy): + def testInitializedToSameValueInsideEagerRun(self, distribution): if not context.executing_eagerly(): self.skipTest("eager only test") v = [None] @@ -399,7 +392,7 @@ class OnWriteVariableSync(test.TestCase, parameterized.TestCase): self.assertAllEqual(vals[0], vals[1]) @combinations.generate(strategy_with_var_policy()) - def testAggregationOnlyFirstReplica(self, distribution, use_var_policy): + def testAggregationOnlyFirstReplica(self, distribution): with distribution.scope(): v = variable_scope.variable( 15., @@ -420,7 +413,7 @@ class OnWriteVariableSync(test.TestCase, parameterized.TestCase): per_replica_results) @combinations.generate(strategy_with_var_policy()) - def testInitScope(self, distribution, use_var_policy): + def testInitScope(self, distribution): if not context.executing_eagerly(): self.skipTest("eager only") class C(object): @@ -448,7 +441,7 @@ class OnWriteVariableSync(test.TestCase, parameterized.TestCase): self.assertAllEqual([2, 2], per_replica_results) @combinations.generate(strategy_with_var_policy()) - def testOperatorOverride(self, distribution, use_var_policy): + def testOperatorOverride(self, distribution): with distribution.scope(): v = variable_scope.variable( From 773541470562915763bd7fd61d76cda49df78aa2 Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Tue, 4 Aug 2020 15:08:19 -0400 Subject: [PATCH 2096/2522] Add README.md for keras_examples_benchmarks folder --- .../keras_examples_benchmarks/README.md | 183 ++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100644 tensorflow/python/keras/benchmarks/keras_examples_benchmarks/README.md diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/README.md b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/README.md new file mode 100644 index 00000000000..595f94b7eda --- /dev/null +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/README.md @@ -0,0 +1,183 @@ +# Benchmarks for keras model exmaples + +- [Benchmarks for keras model exmaples](#benchmarks-for-keras-model-exmaples) + - [Keras Benchmarks](#keras-benchmarks) + - [Available models](#available-models) + - [Computer Vision examples](#computer-vision-examples) + - [Text & Sequence examples](#text--sequence-examples) + - [Other examples](#other-examples) + - [Available benchmark results](#available-benchmark-results) + - [Cifar10 CNN benchmark](#cifar10-cnn-benchmark) + - [MNIST Conv benchmark](#mnist-conv-benchmark) + - [MNIST Hierarchical RNN (HRNN) benchmark](#mnist-hierarchical-rnn-hrnn-benchmark) + - [Bidirectional LSTM benchmark](#bidirectional-lstm-benchmark) + - [Text classification with transformer benchmark](#text-classification-with-transformer-benchmark) + - [MLP benchmark](#mlp-benchmark) + - [Antirectifier benchmark](#antirectifier-benchmark) + - [IRNN benchmark](#irnn-benchmark) + - [Installing Bazel](#installing-bazel) + - [How to run benchmarks](#how-to-run-benchmarks) + - [How to add new benchmark tests that use `fit`](#how-to-add-new-benchmark-tests-that-use-fit) + - [Troubleshooting](#troubleshooting) + +## Keras Benchmarks + +These are benchmark tests running on keras models: models from [keras/examples](https://github.com/keras-team/keras/tree/master/examples). Benchmarks in the current folder (`tensorflow/python/keras/benchmarks/keras_examples_benchmarks`) use Keras [built-in dataset](https://keras.io/api/datasets/) or synthetic data. In addition, these benchmarks support different distribution strategies and measure the performance with distributed training. + +### Available models + +These examples are implemented by functional API and Sequential API. + +#### Computer Vision examples + +- [cifar10_cnn_benchmark_test.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py): Simple CNN on CIFAR10 image dataset. +- [mnist_conv_benchmark_test.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_conv_benchmark_test.py): Simple Convnet that achieves ~99% test accuracy on MNIST. +- [mnist_hierarchical_rnn_benchmark_test.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py): Hierarchical RNN (HRNN) to classify MNIST digits. + +#### Text & Sequence examples + +[Bidirectional_lstm_benchmark_test.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py): 2-layer bidirectional LSTM on IMDB movie review dataset. +[text_classification_transformer_benchmark_test.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py): Text classification with custom transformer block. +[reuters_mlp_benchmark_test.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py): Simple MLP on Reuters newswire topic classification dataset. + +#### Other examples + +[antirectifier_benchmark_test.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py): Simple custom layer example. +[mnist_irnn_benchmark_test.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py): Reproduction of the IRNN experiment with pixel-by-pixel sequential MNIST in ["A Simple Way to Initialize Recurrent Networks of Rectified Linear Units"](https://arxiv.org/abs/1504.00941) by Le et al. + +### Available benchmark results + +We run benchmarks on Google Cloud Platform (GCP) and here is current environment for running benchmarks tests:
    +GPU: 2 x Tesla V100 (only for GPU test)
    +OS: Ubuntu 18.04
    +CPU: 8 x vCPUs, 30 GB memory
    +CUDA: 10.1
    +Bazel: 3.1.0
    + +If you want to run benchmark tests on GPU, please make sure you already installed CUDA and other dependencies and you can follow the instructions from the [official tutorial](https://www.tensorflow.org/install/gpu) for GPU support. + +#### Cifar10 CNN benchmark + +| | Batch_size | Wall_time | Avg_epoch_time | Exp_per_sec | Distribution_Strategy | +| :---: | :--------: | :-------: | :------------: | :---------: | :-------------------: | +| CPU | 256 | 1393.4896 | 3.21 | 15397.69 | `off` | +| GPU:2 | 256 | 76.49 | 2.59 | 18758.01 | `mirrored` | + +#### MNIST Conv benchmark + +| | Batch_size | Wall_time | Avg_epoch_time | Exp_per_sec | Distribution_Strategy | +| :---: | :--------: | :-------: | :------------: | :---------: | :-------------------: | +| CPU | 256 | 196.52 | 12.19 | 4915.26 | `off` | +| GPU:2 | 256 | 24.5794 | 1.21 | 47899.32 | `mirrored` | + +#### MNIST Hierarchical RNN (HRNN) benchmark + +| | Batch_size | Wall_time | Avg_epoch_time | Exp_per_sec | Distribution_Strategy | +| :---: | :--------: | :-------: | :------------: | :---------: | :-------------------: | +| CPU | 256 | 654.05 | 218.68 | 274.24 | `off` | +| GPU:2 | 256 | 20.77 | 3.73 | 15088.06 | `mirrored` | + +#### Bidirectional LSTM benchmark + +| | Batch_size | Wall_time | Avg_epoch_time | Exp_per_sec | Distribution_Strategy | +| :---: | :--------: | :-------: | :------------: | :---------: | :-------------------: | +| CPU | 512 | 225.57 | 72.55 | 344.70 | `off` | +| GPU:2 | 512 | 23.54 | 3.23 | 7532.53 | `mirrored` | + +#### Text classification with transformer benchmark + +| | Batch_size | Wall_time | Avg_epoch_time | Exp_per_sec | Distribution_Strategy | +| :---: | :--------: | :-------: | :------------: | :---------: | :-------------------: | +| CPU | 512 | 109.22 | 35.93 | 698.10 | `off` | +| GPU:2 | 512 | 9.28 | 0.83 | 26567.54 | `mirrored` | + +#### MLP benchmark + +| | Batch_size | Wall_time | Avg_epoch_time | Exp_per_sec | Distribution_Strategy | +| :---: | :--------: | :-------: | :------------: | :---------: | :-------------------: | +| CPU | 128 | 3.76 | 0.54 | 17678.54 | `off` | +| GPU:2 | 128 | 5.91 | 0.30 | 25435.14 | `mirrored` | + +#### Antirectifier benchmark + +| | Batch_size | Wall_time | Avg_epoch_time | Exp_per_sec | Distribution_Strategy | +| :---: | :--------: | :-------: | :------------: | :---------: | :-------------------: | +| CPU | 512 | 6.77 | 1.79 | 30916.39 | `off` | +| GPU:2 | 512 | 6.81 | 0.66 | 66563.17 | `mirrored` | + +#### IRNN benchmark + +| | Batch_size | Wall_time | Avg_epoch_time | Exp_per_sec | Distribution_Strategy | +| :---: | :--------: | :-------: | :------------: | :---------: | :-------------------: | +| CPU | 1024 | 213.00 | 69.01 | 868.08 | `off` | +| GPU:2 | 1024 | 92.71 | 29.12 | 2042.94 | `mirrored` | + +**Note**: For the smaller models, running models with GPU may be slower than running models with CPU as training small models is not computation dominant and there might be some overhead on model replication and data sharding with distributed training on GPUs. + +## Installing Bazel + +This step can be skipped if Bazel is already installed.
    + +We need to use [Bazel](https://bazel.build/) to build targets based on BUILD files. It will take a while for the first time because it will compile all dependencies from your BUILD file. For the next time, Bazel will use the cache and it’ll be much faster. Since we use Ubuntu OS, we can install bazel by using apt repository. + +1. Add bazel as package source + + ```shell + sudo apt install curl gnupg + ``` + + ```shell + curl https://bazel.build/bazel-release.pub.gpg | sudo apt-key add - + ``` + + ```shell + echo "deb [arch=amd64] https://storage.googleapis.com/bazel-apt stable jdk1.8" | sudo tee /etc/apt/sources.list.d/bazel.list + ``` + + Before we install the bazel, We should take a look for a bazel version that can build the specific tensorflow version, you can check it from [here](https://www.tensorflow.org/install/source#tested_build_configurations). In addition, you can follow the instructions from [Bazel website](https://docs.bazel.build/versions/3.4.0/install.html). + +2. Install Bazel + + ```shell + sudo apt update && sudo apt install bazel-`version` + ``` + +## How to run benchmarks + +To run benchmarks in [keras/benchmarks](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/keras/benchmarks), please take the following steps: + +1. Pull the latest tensorflow repo from github. +2. Install the Bazel tool which works with tensorflow, please take a look for the Tool installation section. +3. To run benchmarks with Bazel, use the `--benchmarks=.` flags to specify the benchmarks to run. + - To run all benchmarks on CPU + + ```shell + bazel run -c opt benchmark_test -- --benchmarks=. + ``` + + - To run all benchmarks on GPU + + ```shell + bazel run run --config=cuda -c opt --copt="-mavx" benchmarks_test -- \ --benchmarks=. + ``` + + - To run a subset of benchmarks using `--benchmarks` flag, `--benchmarks`: the list of benchmarks to run. The specified value is interpreted as a regular expression and any benchmarks whose name contains a partial match to the regular expression is executed. e.g. `--benchmarks=".*lstm*."`, will run all lstm layer related benchmarks. + +## How to add new benchmark tests that use `fit` + +To add a new benchmark, please follow the steps: + +1. Create your own benchmark test file, `xxxx_benchmark_test.py`. +2. Import `benchmark_util` to measure and track performance. +3. Create class which inherits from `tf.test.Benchmark` +4. Define and load dataset in `__init__` method. +5. Design and create a model in `_build_model` method. +6. Define the `benchmark_xxx` method and it will pass essential parameters, which includes `batch_size`, `run_iters`, `train_data` and etc. You can check examples from [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks). +7. In addition, you need to add a benchmark target in the [BUILD](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/BUILD) file and write the target name and dependencies. You can take current BUILD as a reference. + +## Troubleshooting + +1. tensorflow.python.framework.errors_impl.InternalError: CUDA runtime implicit initialization on GPU:0 failed. Status: device kernel image is invalid + + - Make sure CUDA was installed on your machine. + - Pull the latest tensorflow repo and run the `./configure` in the root folder of tensorflow, it will help you to create the configuration file which shows your local environment. Please check [this post](https://www.tensorflow.org/install/source#configure_the_build) to know the details. \ No newline at end of file From beab9b83eaf0a5227cc8c314726bb00a32c866ed Mon Sep 17 00:00:00 2001 From: Robert David Date: Tue, 4 Aug 2020 12:05:27 -0700 Subject: [PATCH 2097/2522] Fix missing dependency for GPU delegate PiperOrigin-RevId: 324861084 Change-Id: I6a894d90b5844ea1f5a04526ec396bfa75ff9cb0 --- tensorflow/lite/delegates/gpu/cl/selectors/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/BUILD b/tensorflow/lite/delegates/gpu/cl/selectors/BUILD index ebee4b03b6e..7ea0ac35f89 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/selectors/BUILD @@ -108,6 +108,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/cl/kernels:conv_common", "//tensorflow/lite/delegates/gpu/cl/kernels:elementwise", "//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation", + "//tensorflow/lite/delegates/gpu/cl/kernels:mean_stddev_normalization", "//tensorflow/lite/delegates/gpu/cl/selectors:default_selector", "//tensorflow/lite/delegates/gpu/common:data_type", "//tensorflow/lite/delegates/gpu/common:model", From 6be604aaacd9d270de01c37ec6e9a9a077397848 Mon Sep 17 00:00:00 2001 From: Jared Duke Date: Tue, 4 Aug 2020 12:26:02 -0700 Subject: [PATCH 2098/2522] Reland (Attempt #3) PR #35985: [TFLite int16] 16-bit version of ADD/SUB reference kernel operators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/35985 This PR is one of steps to extend 8-bit quantization to support symmetric 16-bit activations. Each activation is of type int16 and symmetric around zero. The weight tensor precision remains at 8-bit signed values. The bias is set to int64 precision. In this PR we introduce implementation and tests for ADD/SUB kernel reference function. The specification of this operator: SUB   Input 0:     data_type  : int16     range      : [-32768, 32767]     granularity: per-tensor, zero_point=0   Input 1:     data_type  : int16     range      : [-32768, 32767]     granularity: per-tensor, zero_point=0   Output 0:     data_type  : int16     range      : [-32768, 32767]     granularity: per-tensor, zero_point=0 ADD   Input 0:     data_type  : int16     range      : [-32768, 32767]     granularity: per-tensor, zero_point=0   Input 1:     data_type  : int16     range      : [-32768, 32767]     granularity: per-tensor, zero_point=0   Output 0:     data_type  : int16     range      : [-32768, 32767]     granularity: per-tensor, zero_point=0 Copybara import of the project: -- b94cb4732ab536828e565fd1c7b557f124432e29 by Elena Zhelezina : Added 16-bit version of ADD/SUB operators. Broadcasting is included. -- 924d0b72c568f249f2fd224a942f8922524bfede by Elena Zhelezina : Addressed reviewer comments. -- dd0d9e8f03d1fb1b887609fffb8ea5a86638c63e by Elena Zhelezina : Added versioning to ADD/SUB + some rework of the existing code. -- abae3fd9a9b894c07d13c9ef416092c9004bc913 by Elena Zhelezina : Added versioning for ADD/SUB with new option in the schema.fbs schema_generated.h is edited manually. -- 24f3f5593a06d24fa1ca6be257f1265b5293d492 by Elena Zhelezina : Fix for broken build. -- d252fe175aef3a1a08c65155815efb706aa80afd by Elena Zhelezina : Fix for the failing internal test for NN delegates. -- 2223a5c380bb821eb05f8034703c687269353e32 by Elena Zhelezina : Fix for asan failures. Change-Id: I2cf421ddda7f9e802202239136ab062bcd63b4aa -- 3c219a46ce5888e8e402b64cc943ac6522156ef5 by Elena Zhelezina : Added broadcast params to addsub structure. Change-Id: I61d7d4a94087d052a782890799211031f6ed3015 -- 9131a38c776109cdbcfa60be602667ec7aafe00f by Elena Zhelezina : Corrected defaults. Change-Id: I9ea50c75014cc03ac91fdef0f5b4fe11395f7074 PiperOrigin-RevId: 324865496 --- tensorflow/lite/c/builtin_op_data.h | 4 + .../lite/core/api/flatbuffer_conversions.cc | 2 + .../experimental/writer/writer_lib_test.cc | 4 + tensorflow/lite/kernels/add.cc | 85 ++++++++++++---- tensorflow/lite/kernels/add_test.cc | 31 ++++-- .../lite/kernels/internal/reference/add.h | 66 ++++++++++--- tensorflow/lite/kernels/register.cc | 6 +- tensorflow/lite/kernels/sub.cc | 97 +++++++++++++++---- tensorflow/lite/kernels/sub_test.cc | 12 +++ tensorflow/lite/schema/schema.fbs | 4 + tensorflow/lite/schema/schema_generated.h | 22 ++++- tensorflow/lite/toco/tflite/op_version.cc | 3 + tensorflow/lite/toco/tflite/operator.cc | 4 +- .../lite/tools/versioning/op_version.cc | 46 ++++++++- tensorflow/lite/tools/versioning/op_version.h | 5 + .../lite/tools/versioning/runtime_version.cc | 3 + 16 files changed, 321 insertions(+), 73 deletions(-) diff --git a/tensorflow/lite/c/builtin_op_data.h b/tensorflow/lite/c/builtin_op_data.h index 232f5f95928..e205f075b43 100644 --- a/tensorflow/lite/c/builtin_op_data.h +++ b/tensorflow/lite/c/builtin_op_data.h @@ -199,6 +199,8 @@ typedef struct { typedef struct { TfLiteFusedActivation activation; + // Parameter added for the version 4. + bool pot_scale_int16; } TfLiteAddParams; typedef struct { @@ -220,6 +222,8 @@ typedef struct { typedef struct { TfLiteFusedActivation activation; + // Parameter added for the version 5. + bool pot_scale_int16; } TfLiteSubParams; typedef struct { diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.cc b/tensorflow/lite/core/api/flatbuffer_conversions.cc index 0652c64f6c2..7fb04f5b89e 100644 --- a/tensorflow/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/lite/core/api/flatbuffer_conversions.cc @@ -896,6 +896,7 @@ TfLiteStatus ParseAdd(const Operator* op, ErrorReporter* error_reporter, if (schema_params != nullptr) { params->activation = ConvertActivation(schema_params->fused_activation_function()); + params->pot_scale_int16 = schema_params->pot_scale_int16(); } else { // TODO(b/157480169): We should either return kTfLiteError or fill in some // reasonable defaults in the params struct. We are not doing so until we @@ -1631,6 +1632,7 @@ TfLiteStatus ParseSub(const Operator* op, ErrorReporter* error_reporter, if (schema_params != nullptr) { params->activation = ConvertActivation(schema_params->fused_activation_function()); + params->pot_scale_int16 = schema_params->pot_scale_int16(); } else { // TODO(b/157480169): We should either return kTfLiteError or fill in some // reasonable defaults in the params struct. We are not doing so until we diff --git a/tensorflow/lite/experimental/writer/writer_lib_test.cc b/tensorflow/lite/experimental/writer/writer_lib_test.cc index fb59482f705..bf50d4944f1 100644 --- a/tensorflow/lite/experimental/writer/writer_lib_test.cc +++ b/tensorflow/lite/experimental/writer/writer_lib_test.cc @@ -47,6 +47,7 @@ TEST(Writer, FloatModelTest) { TfLiteAddParams* builtin_data = reinterpret_cast(malloc(sizeof(TfLiteAddParams))); builtin_data->activation = kTfLiteActNone; + builtin_data->pot_scale_int16 = false; const TfLiteRegistration* reg = resolver.FindOp(BuiltinOperator_ADD, 1); interpreter.AddNodeWithParameters({0, 1}, {2}, initial_data, 0, reinterpret_cast(builtin_data), reg); @@ -84,6 +85,7 @@ TEST(Writer, CustomInputOutputTest) { TfLiteAddParams* builtin_data = reinterpret_cast(malloc(sizeof(TfLiteAddParams))); builtin_data->activation = kTfLiteActNone; + builtin_data->pot_scale_int16 = false; const TfLiteRegistration* reg = resolver.FindOp(BuiltinOperator_ADD, 1); interpreter.AddNodeWithParameters({0, 1}, {2}, initial_data, 0, reinterpret_cast(builtin_data), reg); @@ -131,6 +133,7 @@ TEST(Writer, CustomInputOutputErrorCasesTest) { TfLiteAddParams* builtin_data = reinterpret_cast(malloc(sizeof(TfLiteAddParams))); builtin_data->activation = kTfLiteActNone; + builtin_data->pot_scale_int16 = false; const TfLiteRegistration* reg = resolver.FindOp(BuiltinOperator_ADD, 1); interpreter.AddNodeWithParameters({0, 1}, {2}, initial_data, 0, reinterpret_cast(builtin_data), reg); @@ -173,6 +176,7 @@ TEST(Writer, PerTensorQuantizedModelTest) { TfLiteAddParams* builtin_data = reinterpret_cast(malloc(sizeof(TfLiteAddParams))); builtin_data->activation = kTfLiteActNone; + builtin_data->pot_scale_int16 = false; const TfLiteRegistration* reg = resolver.FindOp(BuiltinOperator_ADD, 1); interpreter.AddNodeWithParameters({0, 1}, {2}, initial_data, 0, reinterpret_cast(builtin_data), reg); diff --git a/tensorflow/lite/kernels/add.cc b/tensorflow/lite/kernels/add.cc index bda475bdc35..7692ae9e54b 100644 --- a/tensorflow/lite/kernels/add.cc +++ b/tensorflow/lite/kernels/add.cc @@ -68,6 +68,11 @@ struct OpData { int32 input1_offset; int32 input2_offset; int32 output_offset; + + // This parameter is used to indicate whether + // parameter scale is power of two. + // It is used in 16-bit -> 16-bit quantization. + bool pot_scale_int16; }; void* Init(TfLiteContext* context, const char* buffer, size_t length) { @@ -103,12 +108,55 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { output_size = TfLiteIntArrayCopy(input1->dims); } - if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { + // 8bit -> 8bit general quantized path, with general rescalings + // as well as, int16 -> int16 with general rescalings + bool pot_scale_int16 = true; + + bool input1_scale_is_pot = false; + bool input2_scale_is_pot = false; + bool output_scale_is_pot = false; + + int input1_scale_log2_rounded{0}; + int input2_scale_log2_rounded{0}; + int output_scale_log2_rounded{0}; + + if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 && + output->type == kTfLiteInt16) { + // In case of 16-bit, there are two implementation: + // the scale parameter is a general number + // the scale parameter is POT and + // zero_point is zero for inputs/output. + pot_scale_int16 = (input1->params.zero_point == 0) && + (input2->params.zero_point == 0) && + (output->params.zero_point == 0); + + input1_scale_is_pot = + CheckedLog2(input1->params.scale, &input1_scale_log2_rounded); + + input2_scale_is_pot = + CheckedLog2(input2->params.scale, &input2_scale_log2_rounded); + + output_scale_is_pot = + CheckedLog2(output->params.scale, &output_scale_log2_rounded); + + pot_scale_int16 &= + input1_scale_is_pot && input2_scale_is_pot && output_scale_is_pot; + } + + data->pot_scale_int16 = pot_scale_int16; + + if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || + !pot_scale_int16) { // 8bit -> 8bit general quantized path, with general rescalings + // as well as, 16bit -> 16bit with general rescalings data->input1_offset = -input1->params.zero_point; data->input2_offset = -input2->params.zero_point; data->output_offset = output->params.zero_point; - data->left_shift = 20; + + // The shift is set to 15 for 16-bit and 20 in case of 8-bit, accordingly. + // In case of 16-bit we have 65535 << 15 which is less than 1 << 31, + // therefore the addition will still fit in a 32 bit accumulator. + data->left_shift = !pot_scale_int16 ? 15 : 20; const double twice_max_input_scale = 2 * std::max(input1->params.scale, input2->params.scale); const double real_input1_multiplier = @@ -144,19 +192,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, input2->params.zero_point, 0); TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); - int input1_scale_log2_rounded; - bool input1_scale_is_pot = - CheckedLog2(input1->params.scale, &input1_scale_log2_rounded); TF_LITE_ENSURE(context, input1_scale_is_pot); - - int input2_scale_log2_rounded; - bool input2_scale_is_pot = - CheckedLog2(input2->params.scale, &input2_scale_log2_rounded); TF_LITE_ENSURE(context, input2_scale_is_pot); - - int output_scale_log2_rounded; - bool output_scale_is_pot = - CheckedLog2(output->params.scale, &output_scale_log2_rounded); TF_LITE_ENSURE(context, output_scale_is_pot); data->input1_shift = input1_scale_log2_rounded - output_scale_log2_rounded; @@ -231,7 +268,8 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input1, const TfLiteTensor* input2, TfLiteTensor* output) { - if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { + if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || + !data->pot_scale_int16) { tflite::ArithmeticParams op_params; op_params.left_shift = data->left_shift; op_params.input1_offset = data->input1_offset; @@ -266,6 +304,15 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, TF_LITE_ADD(optimized_integer_ops, Add, int8_t); } } + } else if (output->type == kTfLiteInt16) { + if (need_broadcast) { + TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, int16_t); + } else { + reference_ops::Add( + op_params, GetTensorShape(input1), GetTensorData(input1), + GetTensorShape(input2), GetTensorData(input2), + GetTensorShape(output), GetTensorData(output), false); + } } else { if (kernel_type == kReference) { if (need_broadcast) { @@ -283,12 +330,12 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, } #undef TF_LITE_ADD } else if (output->type == kTfLiteInt16) { + tflite::ArithmeticParams op_params; + op_params.input1_shift = data->input1_shift; + op_params.input2_shift = data->input2_shift; + SetActivationParams(data->output_activation_min, + data->output_activation_max, &op_params); #define TF_LITE_ADD(type, opname) \ - tflite::ArithmeticParams op_params; \ - op_params.input1_shift = data->input1_shift; \ - op_params.input2_shift = data->input2_shift; \ - SetActivationParams(data->output_activation_min, \ - data->output_activation_max, &op_params); \ type::opname(op_params, GetTensorShape(input1), \ GetTensorData(input1), GetTensorShape(input2), \ GetTensorData(input2), GetTensorShape(output), \ diff --git a/tensorflow/lite/kernels/add_test.cc b/tensorflow/lite/kernels/add_test.cc index bb883dd9b05..fc78f930897 100644 --- a/tensorflow/lite/kernels/add_test.cc +++ b/tensorflow/lite/kernels/add_test.cc @@ -310,15 +310,18 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) { const float kMin = -1.f; const float kMax = 32767.f / 32768.f; float kQuantizedTolerance = GetToleranceInt16(kMin, kMax); - std::vector> inputs1 = { - {0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, 0.7, 0.3}}; - std::vector> inputs2 = { - {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.8}, {0.6, 0.4, -0.8, 0.5}}; - std::vector> results = { - {0.7, 0.6, 0.6, 0.5}, {-0.2, 0.6, 0.9, -0.1}, {-0.2, 0.6, -0.1, 0.8}}; + std::vector> inputs1 = {{0.1, 0.2, 0.3, 0.4, 0.9, 0.7}, + {-0.8, 0.2, 0.4, 0.7, 0.1, 0.0}, + {-0.8, 0.2, 0.7, 0.3, 0.9, 0.1}}; + std::vector> inputs2 = {{0.6, 0.4, 0.3, 0.1, -0.1, 0.3}, + {0.6, 0.4, 0.5, -0.8, 0.0, -1.0}, + {0.6, 0.4, -0.8, 0.5, -0.9, 0.1}}; + std::vector> results = {{0.7, 0.6, 0.6, 0.5, 0.8, 1.0}, + {-0.2, 0.6, 0.9, -0.1, 0.1, -1.0}, + {-0.2, 0.6, -0.1, 0.8, 0.0, 0.2}}; for (size_t i = 0; i < inputs1.size(); ++i) { - QuantizedAddOpModel m({TensorType_INT16, {1, 2, 2, 1}, kMin, kMax}, - {TensorType_INT16, {1, 2, 2, 1}, kMin, kMax}, + QuantizedAddOpModel m({TensorType_INT16, {1, 2, 3, 1}, kMin, kMax}, + {TensorType_INT16, {1, 2, 3, 1}, kMin, kMax}, {TensorType_INT16, {}, kMin, kMax}, ActivationFunctionType_NONE); m.QuantizeAndPopulate(m.input1(), inputs1[i]); @@ -439,6 +442,10 @@ TEST(QuantizedAddOpModel, QuantizedWithScalarBroadcastInt8) { QuantizedWithScalarBroadcast(); } +TEST(QuantizedAddOpModel, QuantizedWithScalarBroadcastInt16) { + QuantizedWithScalarBroadcast(); +} + template void QuantizedWithMixedBroadcast() { float kQuantizedTolerance = GetTolerance(-3.f, 3.f); @@ -501,6 +508,10 @@ TEST(QuantizedAddOpModel, QuantizedWithMixedBroadcastInt8) { QuantizedWithMixedBroadcast(); } +TEST(QuantizedAddOpModel, QuantizedWithMixedBroadcastInt16) { + QuantizedWithMixedBroadcast(); +} + template void QuantizedWithGenericBroadcast() { float kQuantizedTolerance = GetTolerance(-1.0, 1.0); @@ -527,5 +538,9 @@ TEST(QuantizedAddOpModel, QuantizedWithGenericdBroadcastInt8) { QuantizedWithGenericBroadcast(); } +TEST(QuantizedAddOpModel, QuantizedWithGenericdBroadcastInt16) { + QuantizedWithGenericBroadcast(); +} + } // namespace } // namespace tflite diff --git a/tensorflow/lite/kernels/internal/reference/add.h b/tensorflow/lite/kernels/internal/reference/add.h index 94c58097154..5be7ab4dc0c 100644 --- a/tensorflow/lite/kernels/internal/reference/add.h +++ b/tensorflow/lite/kernels/internal/reference/add.h @@ -51,13 +51,18 @@ inline void Add(const ArithmeticParams& params, // Element-wise add that can often be used for inner loop of broadcast add as // well as the non-broadcast add. + +// This function is used for 8-bit as well as for 16-bit, but the accumulator +// is 32-bit for both cases. The overflow does not happen due to the +// choice of the shift (20 or 15, accordingly - see add.cc for more comments). +template inline void AddElementwise(int size, const ArithmeticParams& params, - const uint8_t* input1_data, - const uint8_t* input2_data, uint8_t* output_data) { - TFLITE_DCHECK_GT(params.input1_offset, -256); - TFLITE_DCHECK_GT(params.input2_offset, -256); - TFLITE_DCHECK_LT(params.input1_offset, 256); - TFLITE_DCHECK_LT(params.input2_offset, 256); + const T* input1_data, const T* input2_data, + T* output_data) { + TFLITE_DCHECK_GT(params.input1_offset, -std::numeric_limits::max()); + TFLITE_DCHECK_GT(params.input2_offset, -std::numeric_limits::max()); + TFLITE_DCHECK_LT(params.input1_offset, std::numeric_limits::max()); + TFLITE_DCHECK_LT(params.input2_offset, std::numeric_limits::max()); for (int i = 0; i < size; ++i) { const int32_t input1_val = params.input1_offset + input1_data[i]; @@ -78,7 +83,7 @@ inline void AddElementwise(int size, const ArithmeticParams& params, const int32_t clamped_output = std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output)); - output_data[i] = static_cast(clamped_output); + output_data[i] = static_cast(clamped_output); } } @@ -132,10 +137,38 @@ inline void Add(const ArithmeticParams& params, AddElementwise(flat_size, params, input1_data, input2_data, output_data); } +inline void AddGeneralParamScale(const ArithmeticParams& params, + const RuntimeShape& input1_shape, + const int16_t* input1_data, + const RuntimeShape& input2_shape, + const int16_t* input2_data, + const RuntimeShape& output_shape, + int16_t* output_data) { + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + const int flat_size = + MatchingElementsSize(input1_shape, input2_shape, output_shape); + + int max_value = std::numeric_limits::max(); + + TFLITE_DCHECK_GT(params.input1_offset, -max_value); + TFLITE_DCHECK_GT(params.input2_offset, -max_value); + TFLITE_DCHECK_LT(params.input1_offset, max_value); + TFLITE_DCHECK_LT(params.input2_offset, max_value); + AddElementwise(flat_size, params, input1_data, input2_data, output_data); +} + inline void Add(const ArithmeticParams& params, const RuntimeShape& input1_shape, const int16_t* input1_data, const RuntimeShape& input2_shape, const int16_t* input2_data, - const RuntimeShape& output_shape, int16_t* output_data) { + const RuntimeShape& output_shape, int16_t* output_data, + bool pot_scale = true) { + if (!pot_scale) { + AddGeneralParamScale(params, input1_shape, input1_data, input2_shape, + input2_data, output_shape, output_data); + return; + } + TFLITE_DCHECK_LE(params.quantized_activation_min, params.quantized_activation_max); @@ -258,13 +291,14 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params, } } -inline void BroadcastAdd4DSlow(const ArithmeticParams& params, - const RuntimeShape& input1_shape, - const uint8_t* input1_data, - const RuntimeShape& input2_shape, - const uint8_t* input2_data, - const RuntimeShape& output_shape, - uint8_t* output_data) { +// This function is used for 8-bit as well as for 16-bit, but the accumulator +// is 32-bit for both cases. The overflow does not happen due to the +// choice of the shift (20 or 15, accordingly - see add.cc for more comments). +template +inline void BroadcastAdd4DSlow( + const ArithmeticParams& params, const RuntimeShape& input1_shape, + const T* input1_data, const RuntimeShape& input2_shape, + const T* input2_data, const RuntimeShape& output_shape, T* output_data) { NdArrayDesc<4> desc1; NdArrayDesc<4> desc2; NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, @@ -314,7 +348,7 @@ inline void BroadcastAdd4DSlow(const ArithmeticParams& params, std::min(params.quantized_activation_max, std::max(params.quantized_activation_min, raw_output)); output_data[Offset(extended_output_shape, b, y, x, c)] = - static_cast(clamped_output); + static_cast(clamped_output); } } } diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc index adffa19c4e1..1d1db9e0403 100644 --- a/tensorflow/lite/kernels/register.cc +++ b/tensorflow/lite/kernels/register.cc @@ -89,8 +89,8 @@ BuiltinOpResolver::BuiltinOpResolver() { /* min_version = */ 1, /* max_version = */ 3); AddBuiltin(BuiltinOperator_ADD, Register_ADD(), - /* min_version = */ 1, - /* max_version = */ 2); + /* min_version */ 1, + /* max_version */ 4); AddBuiltin(BuiltinOperator_SPACE_TO_BATCH_ND, Register_SPACE_TO_BATCH_ND(), /* min_version = */ 1, /* max_version = */ 3); @@ -143,7 +143,7 @@ BuiltinOpResolver::BuiltinOpResolver() { /* max_version */ 2); AddBuiltin(BuiltinOperator_SUB, Register_SUB(), /* min_version = */ 1, - /* max_version = */ 4); + /* max_version = */ 5); AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT(), /* min_version = */ 1, /* max_version = */ 4); diff --git a/tensorflow/lite/kernels/sub.cc b/tensorflow/lite/kernels/sub.cc index 4cd9dd7ff60..f93ebecd46d 100644 --- a/tensorflow/lite/kernels/sub.cc +++ b/tensorflow/lite/kernels/sub.cc @@ -71,6 +71,11 @@ struct OpData { int32 input1_offset; int32 input2_offset; int32 output_offset; + + // This parameter is used to indicate whether + // parameter scale is power of two. + // It is used in 16-bit -> 16-bit quantization. + bool pot_scale_int16; }; void* Init(TfLiteContext* context, const char* buffer, size_t length) { @@ -83,13 +88,14 @@ void Free(TfLiteContext* context, void* buffer) { delete reinterpret_cast(buffer); } -TfLiteStatus Prepare8BitSubOp(TfLiteContext* context, - const TfLiteTensor* input_1, - const TfLiteTensor* input_2, TfLiteTensor* output, - TfLiteSubParams* params, OpData* op_params, - int op_sign) { - TF_LITE_ENSURE(context, - output->type == kTfLiteUInt8 || output->type == kTfLiteInt8); +TfLiteStatus PrepareGeneralSubOp(TfLiteContext* context, + const TfLiteTensor* input_1, + const TfLiteTensor* input_2, + TfLiteTensor* output, TfLiteSubParams* params, + OpData* op_params, int op_sign) { + TF_LITE_ENSURE(context, output->type == kTfLiteUInt8 || + output->type == kTfLiteInt8 || + output->type == kTfLiteInt16); const auto& input1_quantization_params = input_1->params; const auto& input2_quantization_params = input_2->params; const auto& output_quantization_params = output->params; @@ -98,6 +104,9 @@ TfLiteStatus Prepare8BitSubOp(TfLiteContext* context, if (output->type == kTfLiteUInt8) { integer_type_min = std::numeric_limits::min(); integer_type_max = std::numeric_limits::max(); + } else if (output->type == kTfLiteInt16) { + integer_type_min = std::numeric_limits::min(); + integer_type_max = std::numeric_limits::max(); } else { // output->type == kTfLiteInt8 integer_type_min = std::numeric_limits::min(); @@ -120,7 +129,11 @@ TfLiteStatus Prepare8BitSubOp(TfLiteContext* context, op_params->input1_offset = -input1_quantization_params.zero_point; op_params->input2_offset = -input2_quantization_params.zero_point; op_params->output_offset = output_quantization_params.zero_point; - op_params->left_shift = 20; + + // The shift is set to 15 in case of 16-bit and 20 in case of 8-bit, + // accordingly. In case of 16-bit we have 65535 << 15 which is less than 1 << + // 31, therefore the addition will still fit in a 32 bit accumulator. + op_params->left_shift = output->type == kTfLiteInt16 ? 15 : 20; const double twice_max_input_scale = 2 * std::max(input1_quantization_params.scale, input2_quantization_params.scale); @@ -146,13 +159,15 @@ TfLiteStatus Prepare8BitSubOp(TfLiteContext* context, TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( context, params->activation, output, &op_params->output_activation_min, &op_params->output_activation_max)); + return kTfLiteOk; } -TfLiteStatus PrepareInt16SubOp(TfLiteContext* context, - const TfLiteTensor* input1, - const TfLiteTensor* input2, TfLiteTensor* output, - TfLiteSubParams* params, OpData* data) { +TfLiteStatus PrepareInt16SubOpPOT(TfLiteContext* context, + const TfLiteTensor* input1, + const TfLiteTensor* input2, + TfLiteTensor* output, TfLiteSubParams* params, + OpData* data) { // 16bit -> 16bit special quantized path, supporting only a rather // narrow case of quantization parameters: zero_points must all be 0 // ("symmetric quantization") and scales must be power-of-two (which @@ -219,12 +234,51 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { output_size = TfLiteIntArrayCopy(input1->dims); } - if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { - TF_LITE_ENSURE_OK(context, Prepare8BitSubOp(context, input1, input2, output, - params, data, -1)); + // 8bit -> 8bit general quantized path, with general rescalings + // as well as, 16bit -> 16bit with general rescalings + bool pot_scale_int16 = true; + + bool input1_scale_is_pot = false; + bool input2_scale_is_pot = false; + bool output_scale_is_pot = false; + + int input1_scale_log2_rounded{0}; + int input2_scale_log2_rounded{0}; + int output_scale_log2_rounded{0}; + + if (input1->type == kTfLiteInt16 && input2->type == kTfLiteInt16 && + output->type == kTfLiteInt16) { + // In case of 16-bit, there are two implementation: + // the scale parameter is a general number + // the scale parameter is POT and + // zero_point is zero for inputs/output. + pot_scale_int16 = (input1->params.zero_point == 0) && + (input2->params.zero_point == 0) && + (output->params.zero_point == 0); + + input1_scale_is_pot = + CheckedLog2(input1->params.scale, &input1_scale_log2_rounded); + + input2_scale_is_pot = + CheckedLog2(input2->params.scale, &input2_scale_log2_rounded); + + output_scale_is_pot = + CheckedLog2(output->params.scale, &output_scale_log2_rounded); + + pot_scale_int16 &= + input1_scale_is_pot && input2_scale_is_pot && output_scale_is_pot; + } + + data->pot_scale_int16 = pot_scale_int16; + + if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || + !pot_scale_int16) { + TF_LITE_ENSURE_OK(context, PrepareGeneralSubOp(context, input1, input2, + output, params, data, -1)); } else if (output->type == kTfLiteInt16) { - TF_LITE_ENSURE_OK(context, PrepareInt16SubOp(context, input1, input2, - output, params, data)); + // LSTM-special case with scale parameter of POT + TF_LITE_ENSURE_OK(context, PrepareInt16SubOpPOT(context, input1, input2, + output, params, data)); } return context->ResizeTensor(context, output, output_size); @@ -332,6 +386,15 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, } else { TF_LITE_SUB(reference_integer_ops, Add, int8_t); } + } else if (!data->pot_scale_int16) { + if (need_broadcast) { + TF_LITE_SUB(reference_ops, BroadcastAdd4DSlow, int16_t); + } else { + reference_ops::Add(op_params, GetTensorShape(input1), + GetTensorData(input1), GetTensorShape(input2), + GetTensorData(input2), GetTensorShape(output), + GetTensorData(output), false); + } } else if (output->type == kTfLiteUInt8) { if (kernel_type == kReference) { if (need_broadcast) { diff --git a/tensorflow/lite/kernels/sub_test.cc b/tensorflow/lite/kernels/sub_test.cc index 67054fe4903..24d9c251afb 100644 --- a/tensorflow/lite/kernels/sub_test.cc +++ b/tensorflow/lite/kernels/sub_test.cc @@ -304,6 +304,10 @@ TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt8) { QuantizedTestsNoActivation(); } +TEST(QuantizedSubOpModel, QuantizedTestsNoActivationGenericInt16) { + QuantizedTestsNoActivation(); +} + template void QuantizedTestsActivationRELU_N1_TO_1() { float kQuantizedTolerance = GetTolerance(-1.0, 1.0); @@ -365,6 +369,10 @@ TEST(QuantizedSubOpModel, QuantizedVariousInputShapesInt8) { QuantizedVariousInputShapes(); } +TEST(QuantizedSubOpModel, QuantizedVariousInputShapesInt16) { + QuantizedVariousInputShapes(); +} + template void QuantizedWithBroadcast() { float kQuantizedTolerance = GetTolerance(-3.0, 3.0); @@ -393,6 +401,10 @@ TEST(QuantizedSubOpModel, QuantizedWithBroadcastInt8) { QuantizedWithBroadcast(); } +TEST(QuantizedSubOpModel, QuantizedWithBroadcastInt16) { + QuantizedWithBroadcast(); +} + TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt16) { const float kMin = -1.f; const float kMax = diff --git a/tensorflow/lite/schema/schema.fbs b/tensorflow/lite/schema/schema.fbs index 878acde1e16..baeb49f7b7a 100644 --- a/tensorflow/lite/schema/schema.fbs +++ b/tensorflow/lite/schema/schema.fbs @@ -583,6 +583,8 @@ table ConcatenationOptions { table AddOptions { fused_activation_function:ActivationFunctionType; + // Parameters supported by version 4. + pot_scale_int16:bool = true; } table MulOptions { @@ -704,6 +706,8 @@ table DepthToSpaceOptions { table SubOptions { fused_activation_function:ActivationFunctionType; + // Parameters supported by version 5 + pot_scale_int16:bool = true; } table DivOptions { diff --git a/tensorflow/lite/schema/schema_generated.h b/tensorflow/lite/schema/schema_generated.h index a6117dc72ab..a4691b70e49 100755 --- a/tensorflow/lite/schema/schema_generated.h +++ b/tensorflow/lite/schema/schema_generated.h @@ -4742,22 +4742,29 @@ flatbuffers::Offset CreateConcatenationOptions(flatbuffers struct AddOptionsT : public flatbuffers::NativeTable { typedef AddOptions TableType; + bool pot_scale_int16; tflite::ActivationFunctionType fused_activation_function; AddOptionsT() - : fused_activation_function(tflite::ActivationFunctionType_NONE) { + : pot_scale_int16(true), + fused_activation_function(tflite::ActivationFunctionType_NONE) { } }; struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef AddOptionsT NativeTableType; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_FUSED_ACTIVATION_FUNCTION = 4 + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_POT_SCALE_INT16 = 6 }; + bool pot_scale_int16() const { + return GetField(VT_POT_SCALE_INT16, 0) != 0; + } tflite::ActivationFunctionType fused_activation_function() const { return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && + VerifyField(verifier, VT_POT_SCALE_INT16) && VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); } @@ -5907,22 +5914,29 @@ flatbuffers::Offset CreateDepthToSpaceOptions(flatbuffers:: struct SubOptionsT : public flatbuffers::NativeTable { typedef SubOptions TableType; + bool pot_scale_int16; tflite::ActivationFunctionType fused_activation_function; SubOptionsT() - : fused_activation_function(tflite::ActivationFunctionType_NONE) { + : pot_scale_int16(true), + fused_activation_function(tflite::ActivationFunctionType_NONE) { } }; struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef SubOptionsT NativeTableType; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_FUSED_ACTIVATION_FUNCTION = 4 + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_POT_SCALE_INT16 = 6 }; + bool pot_scale_int16() const { + return GetField(VT_POT_SCALE_INT16, 0) != 0; + } tflite::ActivationFunctionType fused_activation_function() const { return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && + VerifyField(verifier, VT_POT_SCALE_INT16) && VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); } diff --git a/tensorflow/lite/toco/tflite/op_version.cc b/tensorflow/lite/toco/tflite/op_version.cc index 567d000dab6..222be969560 100644 --- a/tensorflow/lite/toco/tflite/op_version.cc +++ b/tensorflow/lite/toco/tflite/op_version.cc @@ -53,12 +53,15 @@ std::string GetMinimumRuntimeVersionForModel(const Model& model) { {{OperatorType::kDepthwiseConv, 5}, kPendingReleaseOpVersion}, {{OperatorType::kAdd, 1}, "1.5.0"}, {{OperatorType::kAdd, 2}, "1.14.0"}, + {{OperatorType::kAdd, 3}, kPendingReleaseOpVersion}, {{OperatorType::kAddN, 1}, "1.14.0"}, {{OperatorType::kSpaceToBatchND, 1}, "1.6.0"}, {{OperatorType::kSpaceToBatchND, 2}, "1.14.0"}, {{OperatorType::kSub, 1}, "1.6.0"}, {{OperatorType::kSub, 2}, "1.14.0"}, + {{OperatorType::kSub, 3}, "1.15.0"}, {{OperatorType::kSub, 4}, kPendingReleaseOpVersion}, + {{OperatorType::kSub, 5}, kPendingReleaseOpVersion}, {{OperatorType::kDiv, 1}, "1.6.0"}, {{OperatorType::kBatchToSpaceND, 1}, "1.6.0"}, {{OperatorType::kBatchToSpaceND, 2}, "1.14.0"}, diff --git a/tensorflow/lite/toco/tflite/operator.cc b/tensorflow/lite/toco/tflite/operator.cc index 794691f5724..585b15bae2e 100644 --- a/tensorflow/lite/toco/tflite/operator.cc +++ b/tensorflow/lite/toco/tflite/operator.cc @@ -276,10 +276,10 @@ class Sub : public BuiltinOperator 4) { + if (op_sig.options.addsub.need_broadcast && + op_sig.options.addsub.num_dims > 4) { return 3; } if (op_sig.input_types.at(0) == TensorType_INT8) { @@ -542,7 +560,7 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) { } } return 1; - case BuiltinOperator_ADD: + case BuiltinOperator_SPACE_TO_DEPTH: case BuiltinOperator_SPLIT_V: case BuiltinOperator_SUM: @@ -669,6 +687,26 @@ OpSignature GetOpSignature(const OperatorCode* op_code, const Operator* op, } } break; + case BuiltinOperator_ADD: { + auto add_option = op->builtin_options_as_AddOptions(); + op_sig.options.addsub.pot_scale_int16 = true; + if (add_option) { + op_sig.options.addsub.pot_scale_int16 = add_option->pot_scale_int16(); + } + } break; + + case BuiltinOperator_SUB: { + auto sub_option = op->builtin_options_as_SubOptions(); + op_sig.options.addsub.need_broadcast = + !HaveSameShapes(subgraph, op, 0, 1); + op_sig.options.addsub.num_dims = + std::max(GetNumDims(subgraph, op, 0), GetNumDims(subgraph, op, 1)); + op_sig.options.addsub.pot_scale_int16 = true; + if (sub_option) { + op_sig.options.addsub.pot_scale_int16 = sub_option->pot_scale_int16(); + } + } break; + case BuiltinOperator_LSTM: { auto lstm_option = op->builtin_options_as_LSTMOptions(); if (lstm_option) { @@ -714,7 +752,7 @@ OpSignature GetOpSignature(const OperatorCode* op_code, const Operator* op, case BuiltinOperator_TRANSPOSE: { op_sig.options.single_input_op.num_dims = GetNumDims(subgraph, op, 0); } break; - case BuiltinOperator_SUB: + case BuiltinOperator_DIV: case BuiltinOperator_MAXIMUM: case BuiltinOperator_MINIMUM: { diff --git a/tensorflow/lite/tools/versioning/op_version.h b/tensorflow/lite/tools/versioning/op_version.h index 71362001387..67a7b79fe38 100644 --- a/tensorflow/lite/tools/versioning/op_version.h +++ b/tensorflow/lite/tools/versioning/op_version.h @@ -63,6 +63,11 @@ typedef struct { int32_t num_dims; bool need_broadcast; } broadcast; + struct { + bool pot_scale_int16; + int32_t num_dims; + bool need_broadcast; + } addsub; struct { bool is_per_channel_quantized; } conv_2d; diff --git a/tensorflow/lite/tools/versioning/runtime_version.cc b/tensorflow/lite/tools/versioning/runtime_version.cc index ccbbaa27d68..5a454224b92 100644 --- a/tensorflow/lite/tools/versioning/runtime_version.cc +++ b/tensorflow/lite/tools/versioning/runtime_version.cc @@ -72,6 +72,8 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code, {{BuiltinOperator_DEPTHWISE_CONV_2D, 6}, "2.3.0"}, {{BuiltinOperator_ADD, 1}, "1.5.0"}, {{BuiltinOperator_ADD, 2}, "1.14.0"}, + {{BuiltinOperator_ADD, 3}, kPendingReleaseVersion}, + {{BuiltinOperator_ADD, 4}, kPendingReleaseVersion}, {{BuiltinOperator_ADD_N, 1}, "1.14.0"}, {{BuiltinOperator_SPACE_TO_BATCH_ND, 1}, "1.6.0"}, {{BuiltinOperator_SPACE_TO_BATCH_ND, 2}, "1.14.0"}, @@ -80,6 +82,7 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code, {{BuiltinOperator_SUB, 2}, "1.14.0"}, {{BuiltinOperator_SUB, 3}, "2.3.0"}, {{BuiltinOperator_SUB, 4}, kPendingReleaseVersion}, + {{BuiltinOperator_SUB, 5}, kPendingReleaseVersion}, {{BuiltinOperator_DENSIFY, 1}, "2.2.0"}, {{BuiltinOperator_DIV, 1}, "1.6.0"}, {{BuiltinOperator_DIV, 2}, "2.3.0"}, From 52c4411c26fd27377da12da6d1158e8b49b22034 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 4 Aug 2020 12:35:41 -0700 Subject: [PATCH 2099/2522] Split GPU Compatibility Lib into two. PiperOrigin-RevId: 324867524 Change-Id: Ibf5403a725aee72ebeba4d6681660c289326eb69 --- .../delegates/gpu/java/src/main/native/BUILD | 2 +- .../java/src/main/native/gpu_delegate_jni.cc | 6 +- .../acceleration/compatibility/BUILD | 38 ++++++- .../compatibility/gpu_compatibility.cc | 9 +- .../compatibility/gpu_compatibility.h | 38 +++---- .../gpu_compatibility_recommender.cc | 30 ++++++ .../gpu_compatibility_recommender.h | 64 +++++++++++ .../gpu_compatibility_recommender_test.cc | 100 ++++++++++++++++++ .../compatibility/gpu_compatibility_test.cc | 61 +++++++++++ 9 files changed, 310 insertions(+), 38 deletions(-) create mode 100644 tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.cc create mode 100644 tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.h create mode 100644 tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender_test.cc create mode 100644 tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_test.cc diff --git a/tensorflow/lite/delegates/gpu/java/src/main/native/BUILD b/tensorflow/lite/delegates/gpu/java/src/main/native/BUILD index 00b56bb0c06..7b340e20562 100644 --- a/tensorflow/lite/delegates/gpu/java/src/main/native/BUILD +++ b/tensorflow/lite/delegates/gpu/java/src/main/native/BUILD @@ -30,7 +30,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/gl:egl_environment", "//tensorflow/lite/delegates/gpu/gl:request_gpu_info", "//tensorflow/lite/experimental/acceleration/compatibility:android_info", - "//tensorflow/lite/experimental/acceleration/compatibility:gpu_compatibility", + "//tensorflow/lite/experimental/acceleration/compatibility:gpu_compatibility_recommender", "//tensorflow/lite/java/jni", "@com_google_absl//absl/status", ], diff --git a/tensorflow/lite/delegates/gpu/java/src/main/native/gpu_delegate_jni.cc b/tensorflow/lite/delegates/gpu/java/src/main/native/gpu_delegate_jni.cc index d31d058b796..c4571100818 100644 --- a/tensorflow/lite/delegates/gpu/java/src/main/native/gpu_delegate_jni.cc +++ b/tensorflow/lite/delegates/gpu/java/src/main/native/gpu_delegate_jni.cc @@ -21,7 +21,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/gl/egl_environment.h" #include "tensorflow/lite/delegates/gpu/gl/request_gpu_info.h" #include "tensorflow/lite/experimental/acceleration/compatibility/android_info.h" -#include "tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.h" +#include "tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.h" #ifdef __cplusplus extern "C" { @@ -74,13 +74,13 @@ class CompatibilityListHelper { } bool IsDelegateSupportedOnThisDevice() { - return compatibility_list_.Includes(android_info_, gpu_info_); + return compatibility_recommender_.Includes(android_info_, gpu_info_); } private: tflite::acceleration::AndroidInfo android_info_; tflite::gpu::GpuInfo gpu_info_; - tflite::acceleration::GPUCompatibilityList compatibility_list_; + tflite::acceleration::GPUCompatibilityRecommender compatibility_recommender_; }; } // namespace diff --git a/tensorflow/lite/experimental/acceleration/compatibility/BUILD b/tensorflow/lite/experimental/acceleration/compatibility/BUILD index 78a9d2eb8d8..6adb6daaa6f 100644 --- a/tensorflow/lite/experimental/acceleration/compatibility/BUILD +++ b/tensorflow/lite/experimental/acceleration/compatibility/BUILD @@ -152,7 +152,6 @@ cc_library( ":android_info", ":database_fbs", ":devicedb", - "//tensorflow/lite/delegates/gpu:delegate", "//tensorflow/lite/delegates/gpu/common:gpu_info", "@com_google_absl//absl/status", "@com_google_absl//absl/strings", @@ -160,4 +159,41 @@ cc_library( ], ) +cc_test( + name = "gpu_compatibility_test", + srcs = ["gpu_compatibility_test.cc"], + deps = [ + ":gpu_compatibility", + "@com_google_googletest//:gtest", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "gpu_compatibility_recommender", + srcs = [ + "gpu_compatibility_recommender.cc", + ], + hdrs = [ + "gpu_compatibility_recommender.h", + ], + deps = [ + ":android_info", + ":gpu_compatibility", + "//tensorflow/lite/delegates/gpu:delegate", + "//tensorflow/lite/delegates/gpu/common:gpu_info", + ], +) + +cc_test( + name = "gpu_compatibility_recommender_test", + srcs = ["gpu_compatibility_recommender_test.cc"], + tags = ["notap"], # Needs to be built with --copt=-DCL_DELEGATE_NO_GL + deps = [ + ":gpu_compatibility_recommender", + "@com_google_googletest//:gtest", + "@com_google_googletest//:gtest_main", + ], +) + tflite_portable_test_suite() diff --git a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.cc b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.cc index e04f5d18db4..1911d26b8df 100644 --- a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.cc +++ b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.cc @@ -89,13 +89,8 @@ bool GPUCompatibilityList::Includes( return variables[gpu::kStatus] == std::string(gpu::kStatusSupported); } -TfLiteGpuDelegateOptionsV2 GPUCompatibilityList::GetBestOptionsFor( - const AndroidInfo& /* android_info */, - const ::tflite::gpu::GpuInfo& /* gpu_info */) const { - // This method is for forwards-compatibility: the list may later include - // information about which backend to choose (OpenGL/OpenCL/Vulkan) or other - // options. - return TfLiteGpuDelegateOptionsV2Default(); +bool GPUCompatibilityList::IsDatabaseLoaded() const { + return database_ != nullptr; } } // namespace acceleration diff --git a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.h b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.h index f975fe04f22..873151dca66 100644 --- a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.h +++ b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.h @@ -19,7 +19,6 @@ limitations under the License. #include #include "tensorflow/lite/delegates/gpu/common/gpu_info.h" -#include "tensorflow/lite/delegates/gpu/delegate.h" #include "tensorflow/lite/experimental/acceleration/compatibility/android_info.h" #include "tensorflow/lite/experimental/acceleration/compatibility/devicedb.h" @@ -32,54 +31,41 @@ namespace acceleration { // Android version, OpenGL ES version, GPU chipset etc. The support is based on // measure stability, correctness and peformance. For more detail see README.md. // +// Reads from the flatbuffer. // Example usage: -// tflite::Interpreter* interpreter = ... ; +// tflite::acceleration::GPUCompatibilityList list; // tflite::acceleration::AndroidInfo android_info; // tflite::gpu::GpuInfo gpu_info; -// EXPECT_OK(tflite::acceleration::RequestAndroidInfo(&android_info)); -// EXPECT_OK(tflite::gpu::gl::EglEnvironment::NewEglEnvironment(&env)); -// EXPECT_OK(tflite::gpu::gl::RequestGpuInfo(&tflite_gpu_info)); -// tflite::acceleration::GPUCompatibilityList list; -// TfLiteDelegate* gpu_delegate = nullptr; -// TfLiteGpuDelegateOptions gpu_options; -// if (list.Includes(android_info, gpu_info)) { -// gpu_options = list.BestOptionsFor(android_info, gpu_info); -// gpu_delegate = TfLiteGpuDelegateCreate(&gpu_options); -// EXPECT_EQ(interpreter->ModifyGraphWithDelegate(gpu_delegate), TfLiteOk); -// } else { -// // Fallback path. +// ... +// if(list.Includes(android_info, gpu_info)){ +// // SUPPORTED. +// } else{ +// // UNSUPPORTED. // } class GPUCompatibilityList { public: // Construct list from bundled data. GPUCompatibilityList(); + // Constructs list from the given flatbuffer. + explicit GPUCompatibilityList( + const unsigned char* compatibility_list_flatbuffer); // Returns true if the provided device specs are supported by the database. bool Includes(const AndroidInfo& android_info, const ::tflite::gpu::GpuInfo& gpu_info) const; - - // Returns the best TfLiteGpuDelegateOptionsV2 for the provided device specs - // based on the database. The output can be modified as desired before passing - // to delegate creation. - TfLiteGpuDelegateOptionsV2 GetBestOptionsFor( - const AndroidInfo& android_info, - const ::tflite::gpu::GpuInfo& gpu_info) const; - // Convert android_info and gpu_info into a set of variables used for querying // the list, and update variables from list data. See variables.h // and devicedb.h for more information. std::map CalculateVariables( const AndroidInfo& android_info, const ::tflite::gpu::GpuInfo& gpu_info) const; - GPUCompatibilityList(const GPUCompatibilityList&) = delete; GPUCompatibilityList& operator=(const GPUCompatibilityList&) = delete; + // Indicates if the database is loaded. + bool IsDatabaseLoaded() const; protected: - explicit GPUCompatibilityList( - const unsigned char* compatibility_list_flatbuffer); const DeviceDatabase* database_; }; - } // namespace acceleration } // namespace tflite diff --git a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.cc b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.cc new file mode 100644 index 00000000000..1b625913323 --- /dev/null +++ b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.cc @@ -0,0 +1,30 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.h" + +namespace tflite { +namespace acceleration { + +TfLiteGpuDelegateOptionsV2 GPUCompatibilityRecommender::GetBestOptionsFor( + const AndroidInfo& /* android_info */, + const ::tflite::gpu::GpuInfo& /* gpu_info */) const { + // This method is for forwards-compatibility: the list may later include + // information about which backend to choose (OpenGL/OpenCL/Vulkan) or other + // options. + return TfLiteGpuDelegateOptionsV2Default(); +} + +} // namespace acceleration +} // namespace tflite diff --git a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.h b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.h new file mode 100644 index 00000000000..4443cfdf70f --- /dev/null +++ b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.h @@ -0,0 +1,64 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_EXPERIMENTAL_ACCELERATION_COMPATIBILITY_GPU_COMPATIBILITY_RECOMMENDER_H_ +#define TENSORFLOW_LITE_EXPERIMENTAL_ACCELERATION_COMPATIBILITY_GPU_COMPATIBILITY_RECOMMENDER_H_ + +#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" +#include "tensorflow/lite/delegates/gpu/delegate.h" +#include "tensorflow/lite/experimental/acceleration/compatibility/android_info.h" +#include "tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.h" + +namespace tflite { +namespace acceleration { + +// This class recommends best TfLiteGPU delegate options for Android devices. +// +// Example usage: +// tflite::Interpreter* interpreter = ... ; +// tflite::acceleration::AndroidInfo android_info; +// tflite::gpu::GpuInfo gpu_info; +// CHECK(tflite::acceleration::RequestAndroidInfo(&android_info)); +// CHECK(tflite::gpu::gl::EglEnvironment::NewEglEnvironment(&env)); +// CHECK(tflite::gpu::gl::RequestGpuInfo(&tflite_gpu_info)); +// tflite::acceleration::GPUCompatibilityRecommender recommender; +// TfLiteDelegate* gpu_delegate = nullptr; +// TfLiteGpuDelegateOptions gpu_options; +// if (list.Includes(android_info, gpu_info)) { +// gpu_options = recommender.BestOptionsFor(android_info, gpu_info); +// gpu_delegate = TfLiteGpuDelegateCreate(&gpu_options); +// CHECK_EQ(interpreter->ModifyGraphWithDelegate(gpu_delegate), TfLiteOk); +// } else { +// // Fallback path. +// } + +class GPUCompatibilityRecommender : public GPUCompatibilityList { + public: + GPUCompatibilityRecommender() {} + GPUCompatibilityRecommender(const GPUCompatibilityRecommender&) = delete; + GPUCompatibilityRecommender& operator=(const GPUCompatibilityRecommender&) = + delete; + + // Returns the best TfLiteGpuDelegateOptionsV2 for the provided device specs + // based on the database. The output can be modified as desired before passing + // to delegate creation. + TfLiteGpuDelegateOptionsV2 GetBestOptionsFor( + const AndroidInfo& android_info, + const ::tflite::gpu::GpuInfo& gpu_info) const; +}; + +} // namespace acceleration +} // namespace tflite + +#endif // TENSORFLOW_LITE_EXPERIMENTAL_ACCELERATION_COMPATIBILITY_GPU_COMPATIBILITY_RECOMMENDER_H_ diff --git a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender_test.cc b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender_test.cc new file mode 100644 index 00000000000..ebf793d5a94 --- /dev/null +++ b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender_test.cc @@ -0,0 +1,100 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.h" + +#include +#include + +namespace { + +class GPUCompatibilityRecommenderTest : public ::testing::Test { + protected: + GPUCompatibilityRecommenderTest() { + recommender_ = + absl::make_unique(); + } + + std::unique_ptr + recommender_; +}; + +TEST_F(GPUCompatibilityRecommenderTest, Load) { + EXPECT_TRUE(recommender_->IsDatabaseLoaded()); +} + +TEST_F(GPUCompatibilityRecommenderTest, ReturnsSupportedForFullMatch) { + tflite::acceleration::AndroidInfo android_info = { + .android_sdk_version = "28", + .model = "redmi_note_7G960F", + .device = "lavender", + .manufacturer = "xiaomi"}; + tflite::gpu::GpuInfo tflite_gpu_info = { + .renderer_name = "adreno_(tm)_512", + .major_version = 3, + .minor_version = 2, + }; + EXPECT_TRUE(recommender_->Includes(android_info, tflite_gpu_info)); +} + +TEST_F(GPUCompatibilityRecommenderTest, ReturnsUnsupported) { + tflite::acceleration::AndroidInfo android_info = {.android_sdk_version = "28", + .model = "sm_g960f", + .device = "starlte", + .manufacturer = "samsung"}; + tflite::gpu::GpuInfo tflite_gpu_info = { + .renderer_name = "mali_g72", + .major_version = 3, + .minor_version = 2, + }; + + EXPECT_FALSE(recommender_->Includes(android_info, tflite_gpu_info)); +} + +TEST_F(GPUCompatibilityRecommenderTest, MissingInfoReturnsUnsupported) { + tflite::acceleration::AndroidInfo android_info = {.android_sdk_version = "23", + .model = "sm_g532f", + .device = "grandpplte", + .manufacturer = "samsung"}; + tflite::gpu::GpuInfo tflite_gpu_info = { + .renderer_name = "mali_t720", + .major_version = 3, + .minor_version = 1, + }; + EXPECT_FALSE(recommender_->Includes(android_info, tflite_gpu_info)); +} + +TEST_F(GPUCompatibilityRecommenderTest, ReturnsDefaultOptions) { + tflite::acceleration::AndroidInfo android_info; + tflite::gpu::GpuInfo tflite_gpu_info; + auto default_options = TfLiteGpuDelegateOptionsV2Default(); + auto best_options = + recommender_->GetBestOptionsFor(android_info, tflite_gpu_info); + EXPECT_EQ(best_options.is_precision_loss_allowed, + default_options.is_precision_loss_allowed); + EXPECT_EQ(best_options.inference_preference, + default_options.inference_preference); + EXPECT_EQ(best_options.inference_priority1, + default_options.inference_priority1); + EXPECT_EQ(best_options.inference_priority2, + default_options.inference_priority2); + EXPECT_EQ(best_options.inference_priority3, + default_options.inference_priority3); + EXPECT_EQ(best_options.experimental_flags, + default_options.experimental_flags); + EXPECT_EQ(best_options.max_delegated_partitions, + default_options.max_delegated_partitions); +} + +} // namespace diff --git a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_test.cc b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_test.cc new file mode 100644 index 00000000000..d300867a8b0 --- /dev/null +++ b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_test.cc @@ -0,0 +1,61 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.h" + +#include + +#include +#include + +namespace { + +class GPUCompatibilityTest : public ::testing::Test { + protected: + GPUCompatibilityTest() { + list_ = absl::make_unique(); + } + + std::unique_ptr list_; +}; + +TEST_F(GPUCompatibilityTest, Load) { EXPECT_TRUE(list_->IsDatabaseLoaded()); } + +TEST_F(GPUCompatibilityTest, ReturnsSupportedForFullMatch) { + tflite::acceleration::AndroidInfo android_info = {.android_sdk_version = "27", + .model = "cph1803", + .device = "cph1803", + .manufacturer = "Oppo"}; + tflite::gpu::GpuInfo tflite_gpu_info = { + .renderer_name = "Adreno (TM) 506", + .major_version = 3, + .minor_version = 2, + }; + EXPECT_TRUE(list_->Includes(android_info, tflite_gpu_info)); +} + +TEST_F(GPUCompatibilityTest, ReturnsUnsupportedForFullMatch) { + tflite::acceleration::AndroidInfo android_info = {.android_sdk_version = "28", + .model = "SM-G960F", + .device = "starlte", + .manufacturer = "Samsung"}; + tflite::gpu::GpuInfo tflite_gpu_info = { + .renderer_name = "Mali-G72", + .major_version = 3, + .minor_version = 2, + }; + EXPECT_FALSE(list_->Includes(android_info, tflite_gpu_info)); +} + +} // namespace From 53f0fa916fad58b5f914dc983f1068a005e98606 Mon Sep 17 00:00:00 2001 From: Karim Nosir Date: Tue, 4 Aug 2020 12:53:40 -0700 Subject: [PATCH 2100/2522] NFC: Remove unused dependencies PiperOrigin-RevId: 324871458 Change-Id: I9c11f4a67a27a38d4288a80df6bc4b4f9f096955 --- tensorflow/lite/tools/optimize/BUILD | 8 -------- tensorflow/lite/tools/optimize/calibration/BUILD | 4 ---- 2 files changed, 12 deletions(-) diff --git a/tensorflow/lite/tools/optimize/BUILD b/tensorflow/lite/tools/optimize/BUILD index 146f869a906..ab153afc2cf 100644 --- a/tensorflow/lite/tools/optimize/BUILD +++ b/tensorflow/lite/tools/optimize/BUILD @@ -49,7 +49,6 @@ cc_binary( srcs = ["modify_model_interface_main.cc"], deps = [ ":modify_model_interface", - ":quantize_model", ], ) @@ -90,8 +89,6 @@ cc_library( hdrs = ["quantization_wrapper.h"], deps = [ ":quantization_wrapper_utils", - "//tensorflow/lite:framework", - "//tensorflow/lite/core/api", "//tensorflow/lite/schema:schema_fbs", "//tensorflow/lite/tools/optimize:quantize_model", "@flatbuffers", @@ -115,7 +112,6 @@ cc_library( "//tensorflow/lite/schema:schema_fbs", "//third_party/eigen3", "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", ], ) @@ -130,7 +126,6 @@ cc_library( "//tensorflow/lite/kernels/internal:types", "//tensorflow/lite/schema:schema_fbs", "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", ], ) @@ -159,7 +154,6 @@ cc_library( hdrs = ["operator_property.h"], deps = [ "//tensorflow/lite:framework", - "//tensorflow/lite/kernels/internal:types", "//tensorflow/lite/schema:schema_fbs", ], ) @@ -200,7 +194,6 @@ cc_library( ":quantization_utils", ":model_utils", "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", "@com_google_absl//absl/container:flat_hash_map", "@flatbuffers", "//tensorflow/lite:framework", @@ -245,7 +238,6 @@ cc_library( srcs = ["test_util.cc"], hdrs = ["test_util.h"], deps = [ - "//tensorflow/lite:framework", "//tensorflow/lite/core/api", "@com_google_googletest//:gtest", "@flatbuffers", diff --git a/tensorflow/lite/tools/optimize/calibration/BUILD b/tensorflow/lite/tools/optimize/calibration/BUILD index 06183353e44..f641b151aa9 100644 --- a/tensorflow/lite/tools/optimize/calibration/BUILD +++ b/tensorflow/lite/tools/optimize/calibration/BUILD @@ -51,7 +51,6 @@ cc_library( "//tensorflow/lite/schema:schema_fbs", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", "@flatbuffers", ], ) @@ -105,7 +104,6 @@ cc_test( deps = [ ":logging_op_resolver", "//tensorflow/lite:framework", - "//tensorflow/lite/kernels:builtin_ops", "@com_google_googletest//:gtest", ], ) @@ -120,7 +118,6 @@ cc_library( "//tensorflow/lite:framework", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", ], ) @@ -130,7 +127,6 @@ cc_library( hdrs = ["calibration_logger.h"], copts = tflite_copts(), deps = [ - "//tensorflow/lite:framework", "//tensorflow/lite:minimal_logging", "//tensorflow/lite/c:common", "//tensorflow/lite/core/api", From 9bf172a8d8b83c7c21fffc39943f9eb069d2ede2 Mon Sep 17 00:00:00 2001 From: Robert David Date: Tue, 4 Aug 2020 13:24:03 -0700 Subject: [PATCH 2101/2522] Fix MeanStddevNormalization on ARM Mali: The OpenCL standard requires __local variables to be declared at __kernel scope. PiperOrigin-RevId: 324877612 Change-Id: I4dd665948878f04a1155d1583942940cbac38390 --- .../cl/kernels/mean_stddev_normalization.cc | 34 ++++++++++++------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc index a6ce7e55253..bf2ae33ec6d 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc @@ -37,29 +37,32 @@ std::string GetReduceCode(size_t work_group_size_x, size_t work_group_size_y) { // Otherwise, implement a reduction using __local memory. Note this only works // with power-of-two work group sizes. return R"( -static inline float local_reduce(float input) { -#if (__OPENCL_C_VERSION__ >= 300 && __opencl_c_work_group_collective_functions) || \ - (__OPENCL_C_VERSION__ >= 200) - return work_group_reduce_add(input); -#else - __local float data[)" + +#if (__OPENCL_C_VERSION__ >= 200) && (__OPENCL_C_VERSION__ < 300) && \ + !defined(__opencl_c_work_group_collective_functions) + #define __opencl_c_work_group_collective_functions 1 +#endif + +#ifdef __opencl_c_work_group_collective_functions +#define local_reduce(input, tmp) work_group_reduce_add(input) +#else // !defined(__opencl_c_work_group_collective_functions) +static inline float local_reduce(float input, __local float tmp[)" + std::to_string(work_group_size_y) + "][" + - std::to_string(work_group_size_x) + R"(]; + std::to_string(work_group_size_x) + R"(]) { const size_t local_id_x = get_local_id(0); const size_t local_id_y = get_local_id(1); - data[local_id_y][local_id_x] = input; + tmp[local_id_y][local_id_x] = input; mem_fence(CLK_LOCAL_MEM_FENCE); size_t reduction_size = get_local_size(0) / 2; while (reduction_size > 0) { if (local_id_x < reduction_size) { - data[local_id_y][local_id_x] += data[local_id_y][local_id_x + reduction_size]; + tmp[local_id_y][local_id_x] += tmp[local_id_y][local_id_x + reduction_size]; } mem_fence(CLK_LOCAL_MEM_FENCE); reduction_size /= 2; } - return data[local_id_y][0]; + return tmp[local_id_y][0]; } -#endif +#endif // defined(__opencl_c_work_group_collective_functions) )"; } } // namespace @@ -86,6 +89,11 @@ std::string MeanStdDevNormalization::GetNormalizationCode() { c += R"(__attribute__((reqd_work_group_size(128, 1, 1))) __kernel void main_function( $0) { +#ifndef __opencl_c_work_group_collective_functions + __local float tmp[)" + + std::to_string(work_group_size_.y) + "][" + + std::to_string(work_group_size_.x) + R"(]; +#endif size_t B = get_global_id(1); if (get_global_id(2) > 0) { return; } if (B >= args.src_tensor.Batch()) { return; } @@ -101,7 +109,7 @@ $0) { } // Reduce the vector to a single float and do a workgroup reduce. const float private_sum = reduce_vector(private_sum4); - const float sum = local_reduce(private_sum); + const float sum = local_reduce(private_sum, tmp); // Calculate the mean const float mean = sum / args.src_tensor.Channels(); // Calculate the squared sum of the difference from the mean. @@ -117,7 +125,7 @@ $0) { } // Reduce const float private_sum_diff_sq = reduce_vector(private_sum_diff_sq4); - const float sum_diff_sq = local_reduce(private_sum_diff_sq); + const float sum_diff_sq = local_reduce(private_sum_diff_sq, tmp); // Calculate 1/stddev (with the 'regulazing constant' as in tensor_utils.cc) const float variance = sum_diff_sq / args.src_tensor.Channels(); const float stddev_inv = rsqrt(variance + 1.0e-8f); From 6de5b8dea2e2c51ec94705d852ebd01201547dbd Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Tue, 4 Aug 2020 13:32:37 -0700 Subject: [PATCH 2102/2522] Add ImmutableNodeMap for const GraphDef. NodeMap and ImmutableNodeMap are subclass of NodeMapInternal. PiperOrigin-RevId: 324879172 Change-Id: If5c311e7a4992e327f4cbe6a04c727825d2920d2 --- tensorflow/core/grappler/utils.cc | 30 +++---- tensorflow/core/grappler/utils.h | 64 +++------------ tensorflow/core/grappler/utils_test.cc | 103 +++++++------------------ 3 files changed, 59 insertions(+), 138 deletions(-) diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index e342f7dfdf0..7cf303654ed 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -73,21 +73,25 @@ bool IsShapeConsumer(const NodeDef& node) { } // namespace -namespace internal { -// Specialized template class method GetNodeDefFromGraph. -template <> -NodeDef* NodeMapInternal::GetNodeDefFromGraph( - GraphDef* graph, int64 i) const { - return graph->mutable_node(i); +NodeMap::NodeMap(GraphDef* graph) { + nodes_.reserve(graph->node_size()); + outputs_.reserve(graph->node_size()); + for (int i = 0; i < graph->node_size(); i++) { + NodeDef* node = graph->mutable_node(i); + const string& node_name = node->name(); + auto rslt = nodes_.emplace(node_name, node); + // Check that the graph doesn't contain multiple nodes with the same name. + if (!rslt.second) { + // The first node found with a given name becomes the canonical. + LOG(WARNING) << "Duplicated node in the graph: " << node_name; + } + NodeDef* canonical = rslt.second ? node : rslt.first->second; + for (const auto& input : node->input()) { + outputs_[NodeName(input)].insert(canonical); + } + } } -template <> -const NodeDef* -NodeMapInternal::GetNodeDefFromGraph( - const GraphDef* graph, int64 i) const { - return &graph->node(i); -} -} // namespace internal string TensorIdToString(const TensorId& tensor_id) { return tensor_id.index() == 0 ? string(tensor_id.node()) : tensor_id.ToString(); diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h index e9ab5b7da12..e529d5fb4ad 100644 --- a/tensorflow/core/grappler/utils.h +++ b/tensorflow/core/grappler/utils.h @@ -98,39 +98,16 @@ inline int NodePosition(const string& name) { return position; } -namespace internal { -// Base template class for NodeMap and ImmutableNodeMap. -template -class NodeMapInternal { +// A utility class to lookup a node and its outputs by node name. +class NodeMap { public: // Note: The NodeMap will store pointers to nodes in graph, which may become // invalid if graph is changed. - explicit NodeMapInternal(GraphDefT* graph) { - if (graph == nullptr) { - LOG(WARNING) << "NodeMapInternal constructor is called with a nullptr!"; - return; - } - nodes_.reserve(graph->node_size()); - outputs_.reserve(graph->node_size()); - for (int i = 0; i < graph->node_size(); i++) { - NodeDefT* node = GetNodeDefFromGraph(graph, i); - const string& node_name = node->name(); - auto rslt = nodes_.emplace(node_name, node); - // Check that the graph doesn't contain multiple nodes with the same name. - if (!rslt.second) { - // The first node found with a given name becomes the canonical. - LOG(WARNING) << "Duplicated node in the graph: " << node_name; - } - NodeDefT* canonical = rslt.second ? node : rslt.first->second; - for (const auto& input : node->input()) { - outputs_[NodeName(input)].insert(canonical); - } - } - } + explicit NodeMap(GraphDef* graph); // Get unordered list of fanouts from node. Notice, that the order is // non-deterministic. - const absl::flat_hash_set& GetOutputs( + const absl::flat_hash_set& GetOutputs( const string& node_name) const { auto it = outputs_.find(node_name); if (it == outputs_.end()) { @@ -140,12 +117,12 @@ class NodeMapInternal { } // Get fanouts ordered by name. - std::vector GetOutputsOrderedByNodeName( + std::vector GetOutputsOrderedByNodeName( const string& node_name) const { - std::vector result; + std::vector result; auto it = outputs_.find(node_name); if (it != outputs_.end()) { - const absl::flat_hash_set& outputs = it->second; + const absl::flat_hash_set& outputs = it->second; result.reserve(outputs.size()); result.assign(outputs.begin(), outputs.end()); std::sort(result.begin(), result.end(), @@ -158,7 +135,7 @@ class NodeMapInternal { // This method doesn't record the outputs of the added node; the outputs need // to be explicitly added by the AddOutput method. - void AddNode(const string& node_name, NodeDefT* node) { + void AddNode(const string& node_name, NodeDef* node) { DCHECK(node != nullptr); auto ret = nodes_.emplace(node_name, node); DCHECK(ret.second) @@ -171,7 +148,7 @@ class NodeMapInternal { outputs_.erase(NodeName(name)); } - NodeDefT* GetNode(const string& name) const { + NodeDef* GetNode(const string& name) const { const string node_name = NodeName(name); auto it = nodes_.find(node_name); if (it == nodes_.end()) { @@ -220,26 +197,9 @@ class NodeMapInternal { } private: - // Helper method to get the NodeDef pointer of i-th node in a graph. - NodeDefT* GetNodeDefFromGraph(GraphDefT* graph, int64 i) const; - - const absl::flat_hash_set empty_set_; - absl::node_hash_map nodes_; - absl::node_hash_map> outputs_; -}; -} // namespace internal - -// A utility class to lookup a node and its outputs by node name. -class NodeMap : public internal::NodeMapInternal { - public: - explicit NodeMap(GraphDef* graph) : NodeMapInternal(graph) {} -}; - -// Same to NodeMap, but uses const GraphDef. -class ImmutableNodeMap - : public internal::NodeMapInternal { - public: - explicit ImmutableNodeMap(const GraphDef* graph) : NodeMapInternal(graph) {} + const absl::flat_hash_set empty_set_; + absl::node_hash_map nodes_; + absl::node_hash_map> outputs_; }; // A vector with a set. The set stores the same elements as the vector, and diff --git a/tensorflow/core/grappler/utils_test.cc b/tensorflow/core/grappler/utils_test.cc index e7e57e9b7d7..6231fb7a780 100644 --- a/tensorflow/core/grappler/utils_test.cc +++ b/tensorflow/core/grappler/utils_test.cc @@ -349,69 +349,39 @@ TEST_F(UtilsTest, NumNonControlOutputs) { GraphDef graph; TF_CHECK_OK(s.ToGraphDef(&graph)); + NodeMap node_map(&graph); - { - NodeMap node_map(&graph); + const NodeDef* add_node = node_map.GetNode("add"); + const NodeDef* mul_node = node_map.GetNode("mul"); + ASSERT_NE(add_node, nullptr); - const NodeDef* add_node = node_map.GetNode("add"); - const NodeDef* mul_node = node_map.GetNode("mul"); - ASSERT_NE(add_node, nullptr); + // [a, b] are only non-control inputs + EXPECT_EQ(NumNonControlInputs(*add_node), 2); + EXPECT_EQ(NumControlInputs(*add_node), 1); + // [sqrt, shape] are non control outputs + EXPECT_EQ(NumNonControlOutputs(*add_node, node_map), 2); + // sqrt is the only data output + EXPECT_EQ(NumNonControlDataOutputs(*add_node, node_map), 1); + EXPECT_EQ(NumControlInputs(*mul_node), 0); - // [a, b] are only non-control inputs - EXPECT_EQ(NumNonControlInputs(*add_node), 2); - EXPECT_EQ(NumControlInputs(*add_node), 1); - // [sqrt, shape] are non control outputs - EXPECT_EQ(NumNonControlOutputs(*add_node, node_map), 2); - // sqrt is the only data output - EXPECT_EQ(NumNonControlDataOutputs(*add_node, node_map), 1); - EXPECT_EQ(NumControlInputs(*mul_node), 0); + EXPECT_TRUE(HasControlInputs(*add_node)); + EXPECT_TRUE(HasRegularInputs(*add_node)); + EXPECT_TRUE(HasControlOutputs(*add_node, node_map)); + EXPECT_TRUE(HasRegularOutputs(*add_node, node_map)); - EXPECT_TRUE(HasControlInputs(*add_node)); - EXPECT_TRUE(HasRegularInputs(*add_node)); - EXPECT_TRUE(HasControlOutputs(*add_node, node_map)); - EXPECT_TRUE(HasRegularOutputs(*add_node, node_map)); + const NodeDef* x_node = node_map.GetNode("x"); + ASSERT_NE(x_node, nullptr); + EXPECT_FALSE(HasControlInputs(*x_node)); + EXPECT_FALSE(HasRegularInputs(*x_node)); + EXPECT_FALSE(HasControlOutputs(*x_node, node_map)); + EXPECT_TRUE(HasRegularOutputs(*x_node, node_map)); - const NodeDef* x_node = node_map.GetNode("x"); - ASSERT_NE(x_node, nullptr); - EXPECT_FALSE(HasControlInputs(*x_node)); - EXPECT_FALSE(HasRegularInputs(*x_node)); - EXPECT_FALSE(HasControlOutputs(*x_node, node_map)); - EXPECT_TRUE(HasRegularOutputs(*x_node, node_map)); - - const NodeDef* round_node = node_map.GetNode("round"); - ASSERT_NE(round_node, nullptr); - EXPECT_TRUE(HasControlInputs(*round_node)); - EXPECT_TRUE(HasRegularInputs(*round_node)); - EXPECT_FALSE(HasControlOutputs(*round_node, node_map)); - EXPECT_FALSE(HasRegularOutputs(*round_node, node_map)); - } - - { - // Similar test for ImmutableNodeMap. - ImmutableNodeMap node_map(&graph); - - const NodeDef* add_node = node_map.GetNode("add"); - const NodeDef* mul_node = node_map.GetNode("mul"); - ASSERT_NE(add_node, nullptr); - - // [a, b] are only non-control inputs - EXPECT_EQ(NumNonControlInputs(*add_node), 2); - EXPECT_EQ(NumControlInputs(*add_node), 1); - EXPECT_EQ(NumControlInputs(*mul_node), 0); - - EXPECT_TRUE(HasControlInputs(*add_node)); - EXPECT_TRUE(HasRegularInputs(*add_node)); - - const NodeDef* x_node = node_map.GetNode("x"); - ASSERT_NE(x_node, nullptr); - EXPECT_FALSE(HasControlInputs(*x_node)); - EXPECT_FALSE(HasRegularInputs(*x_node)); - - const NodeDef* round_node = node_map.GetNode("round"); - ASSERT_NE(round_node, nullptr); - EXPECT_TRUE(HasControlInputs(*round_node)); - EXPECT_TRUE(HasRegularInputs(*round_node)); - } + const NodeDef* round_node = node_map.GetNode("round"); + ASSERT_NE(round_node, nullptr); + EXPECT_TRUE(HasControlInputs(*round_node)); + EXPECT_TRUE(HasRegularInputs(*round_node)); + EXPECT_FALSE(HasControlOutputs(*round_node, node_map)); + EXPECT_FALSE(HasRegularOutputs(*round_node, node_map)); } TEST(CheckAttrExists, All) { @@ -683,30 +653,17 @@ TEST(SetTensorValueTest, Quantized) { /*error_msg=*/""); } -static void BM_NodeMapConstruct(benchmark::State& state) { - const int size = state.range(0); +static void BM_NodeMapConstruct(int iters, int size) { testing::StopTiming(); GraphDef graph = test::CreateRandomGraph(size); testing::StartTiming(); - for (auto s : state) { + for (int i = 0; i < iters; i++) { NodeMap node_map(&graph); } testing::StopTiming(); } BENCHMARK(BM_NodeMapConstruct)->Range(1, 1 << 20); -static void BM_ImmutableNodeMapConstruct(benchmark::State& state) { - const int size = state.range(0); - testing::StopTiming(); - GraphDef graph = test::CreateRandomGraph(size); - testing::StartTiming(); - for (auto s : state) { - ImmutableNodeMap node_map(&graph); - } - testing::StopTiming(); -} -BENCHMARK(BM_ImmutableNodeMapConstruct)->Range(1, 1 << 20); - } // namespace } // namespace grappler } // namespace tensorflow From 3b30b1b495e018a23025f71877024324eaddb00f Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Tue, 4 Aug 2020 13:33:31 -0700 Subject: [PATCH 2103/2522] Allow index typed memrefs in reshape_memref_cast. With the recent change to allow memref of index in MLIR core, we should also allow this in the mhlo dialect. PiperOrigin-RevId: 324879354 Change-Id: Id18d6a5951906d4f5b4438e93c49d3518cff5a3d --- .../mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.td | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.td b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.td index 87082219db7..3fa46584ca2 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.td +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.td @@ -66,6 +66,8 @@ def LHLO_PredOrIntBuffer : MemRefOf<[HLO_Int, HLO_Pred]>; def LHLO_Buffer : MemRefOf<[AnyFloat, AnySignlessInteger, AnyComplex]>; +def LHLO_ExtentBuffer : MemRefRankOf<[AnySignlessInteger, Index], [1]>; + //===----------------------------------------------------------------------===// // LMHLO nullary op definitions. //===----------------------------------------------------------------------===// @@ -467,7 +469,7 @@ def ReshapeMemRefCastOp: Op:$shape + LHLO_ExtentBuffer:$shape ); let results = (outs AnyRankedOrUnrankedMemRef:$result); From 442c7015fce8889fac11f415f20d43e0576520a3 Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Tue, 4 Aug 2020 13:44:26 -0700 Subject: [PATCH 2104/2522] Remove usages of `smart_cond` module from Keras. We have a version of smart_cond in keras/utils/tf_utils.py, removing that and adding smart_cond from smart_cond TF module to keras/utils/control_flow_util.py PiperOrigin-RevId: 324881600 Change-Id: I94a2e9666d877b49703d2aa9dd10c1e954e70fda --- .../python/keras/engine/data_adapter.py | 4 +- .../python/keras/engine/training_utils.py | 4 +- .../python/keras/layers/normalization.py | 4 +- tensorflow/python/keras/losses.py | 15 +++--- .../experimental/loss_scale_optimizer.py | 6 +-- .../python/keras/utils/control_flow_util.py | 46 ++++++++----------- 6 files changed, 34 insertions(+), 45 deletions(-) diff --git a/tensorflow/python/keras/engine/data_adapter.py b/tensorflow/python/keras/engine/data_adapter.py index 0e4886fc8cb..e9662da73e7 100644 --- a/tensorflow/python/keras/engine/data_adapter.py +++ b/tensorflow/python/keras/engine/data_adapter.py @@ -37,12 +37,12 @@ from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops +from tensorflow.python.framework import smart_cond from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.framework.ops import composite_tensor from tensorflow.python.keras import backend from tensorflow.python.keras.engine import training_utils -from tensorflow.python.keras.utils import control_flow_util from tensorflow.python.keras.utils import data_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops @@ -1296,7 +1296,7 @@ def _make_class_weight_map_fn(class_weight): raise ValueError("`class_weight` not supported for " "3+ dimensional targets.") - y_classes = control_flow_util.smart_cond( + y_classes = smart_cond.smart_cond( y.shape.rank == 2 and backend.shape(y)[1] > 1, lambda: backend.argmax(y, axis=1), lambda: math_ops.cast(backend.reshape(y, (-1,)), dtypes.int64)) diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py index 157a0c77ebf..84bcd99922f 100644 --- a/tensorflow/python/keras/engine/training_utils.py +++ b/tensorflow/python/keras/engine/training_utils.py @@ -40,6 +40,7 @@ from tensorflow.python.framework import composite_tensor_utils from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops +from tensorflow.python.framework import smart_cond from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_spec from tensorflow.python.framework import tensor_util @@ -47,7 +48,6 @@ from tensorflow.python.keras import backend as K from tensorflow.python.keras import callbacks as cbks from tensorflow.python.keras import losses from tensorflow.python.keras import metrics as metrics_module -from tensorflow.python.keras.utils import control_flow_util from tensorflow.python.keras.utils import data_utils from tensorflow.python.keras.utils import generic_utils from tensorflow.python.keras.utils import losses_utils @@ -997,7 +997,7 @@ def standardize_weights(y, weight_vector[:] = np.nan weight_vector[keys] = values - y_classes = control_flow_util.smart_cond( + y_classes = smart_cond.smart_cond( len(y.shape.as_list()) == 2 and K.shape(y)[1] > 1, lambda: K.argmax(y, axis=1), lambda: math_ops.cast(K.reshape(y, (-1,)), dtypes.int64)) diff --git a/tensorflow/python/keras/layers/normalization.py b/tensorflow/python/keras/layers/normalization.py index fd77cddb08d..12013882ff5 100644 --- a/tensorflow/python/keras/layers/normalization.py +++ b/tensorflow/python/keras/layers/normalization.py @@ -577,7 +577,7 @@ class BatchNormalizationBase(Layer): training, train_op, _fused_batch_norm_inference) variance = _maybe_add_or_remove_bessels_correction(variance, remove=True) - training_value = control_flow_util.smart_constant_value(training) + training_value = control_flow_util.constant_value(training) if training_value or training_value is None: if not use_fused_avg_updates: if training_value is None: @@ -762,7 +762,7 @@ class BatchNormalizationBase(Layer): return (scale, offset) # Determine a boolean value for `training`: could be True, False, or None. - training_value = control_flow_util.smart_constant_value(training) + training_value = control_flow_util.constant_value(training) if training_value == False: # pylint: disable=singleton-comparison,g-explicit-bool-comparison mean, variance = self.moving_mean, self.moving_variance else: diff --git a/tensorflow/python/keras/losses.py b/tensorflow/python/keras/losses.py index a149418fdd8..f75e6af6e30 100644 --- a/tensorflow/python/keras/losses.py +++ b/tensorflow/python/keras/losses.py @@ -26,9 +26,9 @@ from tensorflow.python.autograph.core import ag_ctx from tensorflow.python.autograph.impl import api as autograph from tensorflow.python.distribute import distribution_strategy_context from tensorflow.python.framework import ops +from tensorflow.python.framework import smart_cond from tensorflow.python.framework import tensor_util from tensorflow.python.keras import backend as K -from tensorflow.python.keras.utils import control_flow_util from tensorflow.python.keras.utils import losses_utils from tensorflow.python.keras.utils import tf_utils from tensorflow.python.keras.utils.generic_utils import deserialize_keras_object @@ -1313,9 +1313,8 @@ def _maybe_convert_labels(y_true): # Convert the binary labels to -1 or 1. return 2. * y_true - 1. - updated_y_true = control_flow_util.smart_cond(is_binary, - _convert_binary_labels, - lambda: y_true) + updated_y_true = smart_cond.smart_cond(is_binary, _convert_binary_labels, + lambda: y_true) return updated_y_true @@ -1527,8 +1526,8 @@ def categorical_crossentropy(y_true, num_classes = math_ops.cast(array_ops.shape(y_true)[-1], y_pred.dtype) return y_true * (1.0 - label_smoothing) + (label_smoothing / num_classes) - y_true = control_flow_util.smart_cond(label_smoothing, _smooth_labels, - lambda: y_true) + y_true = smart_cond.smart_cond(label_smoothing, _smooth_labels, + lambda: y_true) return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits) @@ -1596,8 +1595,8 @@ def binary_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0): def _smooth_labels(): return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing - y_true = control_flow_util.smart_cond(label_smoothing, _smooth_labels, - lambda: y_true) + y_true = smart_cond.smart_cond(label_smoothing, _smooth_labels, + lambda: y_true) return K.mean( K.binary_crossentropy(y_true, y_pred, from_logits=from_logits), axis=-1) diff --git a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py index f09c8c92e8c..4a3f459de80 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py +++ b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py @@ -24,11 +24,11 @@ from tensorflow.python.distribute import one_device_strategy from tensorflow.python.distribute import tpu_strategy from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import smart_cond from tensorflow.python.keras import backend from tensorflow.python.keras import optimizers from tensorflow.python.keras.mixed_precision.experimental import loss_scale as keras_loss_scale_module from tensorflow.python.keras.optimizer_v2 import optimizer_v2 -from tensorflow.python.keras.utils import control_flow_util from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.training.experimental import mixed_precision @@ -406,8 +406,8 @@ class LossScaleOptimizer(_DelegatingTrackableMixin, optimizer_v2.OptimizerV2): # DistributionStrategy does not support having a cond in a replica context # with a branch that calls `merge_call`, and self._optimizer.apply_gradients # calls `merge_call`. - maybe_apply_op = control_flow_util.smart_cond(should_apply_grads, apply_fn, - do_not_apply_fn) + maybe_apply_op = smart_cond.smart_cond(should_apply_grads, apply_fn, + do_not_apply_fn) return control_flow_ops.group(maybe_apply_op, loss_scale_update_op) def _apply_gradients(self, grads, wrapped_vars, name, diff --git a/tensorflow/python/keras/utils/control_flow_util.py b/tensorflow/python/keras/utils/control_flow_util.py index 4aadf691d70..8d13c573149 100644 --- a/tensorflow/python/keras/utils/control_flow_util.py +++ b/tensorflow/python/keras/utils/control_flow_util.py @@ -22,8 +22,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_util +from tensorflow.python.framework import smart_cond as smart_module from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import variables @@ -107,43 +106,34 @@ def smart_cond(pred, true_fn=None, false_fn=None, name=None): # pylint: disable Raises: TypeError: If `true_fn` or `false_fn` is not callable. """ - if not callable(true_fn): - raise TypeError("`true_fn` must be callable.") - if not callable(false_fn): - raise TypeError("`false_fn` must be callable.") - pred_value = smart_constant_value(pred) - if pred_value is not None: - if pred_value: - return true_fn() - else: - return false_fn() - else: + if isinstance(pred, variables.Variable): return control_flow_ops.cond( pred, true_fn=true_fn, false_fn=false_fn, name=name) + return smart_module.smart_cond( + pred, true_fn=true_fn, false_fn=false_fn, name=name) -def smart_constant_value(pred): # pylint: disable=invalid-name +def constant_value(pred): # pylint: disable=invalid-name """Return the bool value for `pred`, or None if `pred` had a dynamic value. Arguments: - pred: A scalar, either a Python bool or tensor. + pred: A scalar, either a Python bool or a TensorFlow boolean variable + or tensor, or the Python integer 1 or 0. Returns: True or False if `pred` has a constant boolean value, None otherwise. Raises: - TypeError: If `pred` is not a Tensor or bool. + TypeError: If `pred` is not a Variable, Tensor or bool, or Python + integer 1 or 0. """ - if isinstance(pred, ops.Tensor): - pred_value = tensor_util.constant_value(pred) - elif isinstance(pred, variables.Variable): - pred_value = None - elif pred in {0, 1}: # Accept 1/0 as valid boolean values - pred_value = bool(pred) - elif isinstance(pred, bool): - pred_value = pred - else: - raise TypeError("`pred` must be a Tensor, or a Python bool, or 1 or 0. " - "Found instead: %s" % type(pred)) + # Allow integer booleans. + if isinstance(pred, int): + if pred == 1: + pred = True + elif pred == 0: + pred = False - return pred_value + if isinstance(pred, variables.Variable): + return None + return smart_module.smart_constant_value(pred) From 895ed30e83b57c8c7702839f3a4c2697a4b4e52a Mon Sep 17 00:00:00 2001 From: Rachel Lim Date: Tue, 4 Aug 2020 13:46:05 -0700 Subject: [PATCH 2105/2522] Update visibility of build target. PiperOrigin-RevId: 324881891 Change-Id: I2a71619158aebc895b8cd708e25c3d12bc3ad08d --- .../mlir/tensorflow/ir/tf_generated_ops.td | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index 63138489ef7..84f3fa9c463 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -7226,6 +7226,28 @@ tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15] ]; } +def TF_RangeDatasetOp : TF_Op<"RangeDataset", []> { + let summary = [{ +Creates a dataset with a range of values. Corresponds to python's xrange. + }]; + + let description = [{ + }]; + + let arguments = (ins + I64Tensor:$start, + I64Tensor:$stop, + I64Tensor:$step, + + Confined]>:$output_types, + Confined]>:$output_shapes + ); + + let results = (outs + TF_VariantTensor:$handle + ); +} + def TF_RankOp : TF_Op<"Rank", [NoSideEffect]> { let summary = "Returns the rank of a tensor."; From 6dae832b2cebab6792abcfa55a39263f5f4f483a Mon Sep 17 00:00:00 2001 From: Scott Main Date: Tue, 4 Aug 2020 13:54:10 -0700 Subject: [PATCH 2106/2522] Update post-training quant tutorial to use int-only quant with TF2.3 PiperOrigin-RevId: 324883545 Change-Id: I425debca1604354d37939e1141ac7cf425422067 --- .../post_training_integer_quant.ipynb | 957 +++++++++++------- 1 file changed, 576 insertions(+), 381 deletions(-) diff --git a/tensorflow/lite/g3doc/performance/post_training_integer_quant.ipynb b/tensorflow/lite/g3doc/performance/post_training_integer_quant.ipynb index cff1e773938..a2835f53d82 100644 --- a/tensorflow/lite/g3doc/performance/post_training_integer_quant.ipynb +++ b/tensorflow/lite/g3doc/performance/post_training_integer_quant.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": { "cellView": "form", "colab": {}, @@ -76,22 +76,15 @@ "source": [ "## Overview\n", "\n", - "[TensorFlow Lite](https://www.tensorflow.org/lite/) now supports\n", - "converting all model values (weights and activations) to 8-bit integers when converting from TensorFlow to TensorFlow Lite's flat buffer format. This results in a 4x reduction in model size and a 3 to 4x performance improvement on CPU performance. In addition, this fully quantized model can be consumed by integer-only hardware accelerators.\n", + "Integer quantization is an optimization strategy that converts 32-bit floating-point numbers (such as weights and activation outputs) to the nearest 8-bit fixed-point numbers. This results in a smaller model and increased inferencing speed, which is valuable for low-power devices such as [microcontrollers](https://www.tensorflow.org/lite/microcontrollers). This data format is also required by integer-only accelerators such as the [Edge TPU](https://coral.ai/).\n", "\n", - "In contrast to [post-training \"on-the-fly\" quantization](https://colab.sandbox.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/tutorials/post_training_quant.ipynb)—which stores only the weights as 8-bit integers—this technique statically quantizes all weights *and* activations during model conversion.\n", + "In this tutorial, you'll train an MNIST model from scratch, convert it into a Tensorflow Lite file, and quantize it using [post-training quantization](https://www.tensorflow.org/lite/performance/post_training_quantization). Finally, you'll check the accuracy of the converted model and compare it to the original float model.\n", "\n", - "In this tutorial, you'll train an MNIST model from scratch, check its accuracy in TensorFlow, and then convert the model into a Tensorflow Lite flatbuffer with full quantization. Finally, you'll check the accuracy of the converted model and compare it to the original float model." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "2XsEP17Zelz9" - }, - "source": [ - "## Build an MNIST model" + "You actually have several options as to how much you want to quantize a model. In this tutorial, you'll perform \"full integer quantization,\" which converts all weights and activation outputs into 8-bit integer data—whereas other strategies may leave some amount of data in floating-point.\n", + "\n", + "To learn more about the various quantization strategies, read about [TensorFlow Lite model optimization](https://www.tensorflow.org/lite/performance/model_optimization).\n", + "\n", + "\n" ] }, { @@ -101,12 +94,22 @@ "id": "dDqqUIZjZjac" }, "source": [ - "### Setup" + "## Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "I0nR5AMEWq0H" + }, + "source": [ + "In order to quantize both the input and output tensors, we need to use APIs added in TensorFlow r2.3:" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -118,82 +121,18 @@ "logging.getLogger(\"tensorflow\").setLevel(logging.DEBUG)\n", "\n", "import tensorflow as tf\n", - "from tensorflow import keras\n", "import numpy as np\n", - "import pathlib" + "assert float(tf.__version__[:3]) \u003e= 2.3" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", - "id": "eQ6Q0qqKZogR" + "id": "2XsEP17Zelz9" }, "source": [ - "### Train and export the model" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "colab": { - "height": 51 - }, - "colab_type": "code", - "id": "eMsw_6HujaqM", - "outputId": "5662a5f3-fc64-458f-958a-98f9c6348143" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1875/1875 [==============================] - 2s 1ms/step - loss: 0.2782 - accuracy: 0.9221 - val_loss: 0.1230 - val_accuracy: 0.9664\n" - ] - }, - { - "data": { - "text/plain": [ - "\u003ctensorflow.python.keras.callbacks.History at 0x7f33f1817588\u003e" - ] - }, - "execution_count": 19, - "metadata": { - "tags": [] - }, - "output_type": "execute_result" - } - ], - "source": [ - "# Load MNIST dataset\n", - "mnist = keras.datasets.mnist\n", - "(train_images, train_labels), (test_images, test_labels) = mnist.load_data()\n", - "\n", - "# Normalize the input image so that each pixel value is between 0 to 1.\n", - "train_images = train_images / 255.0\n", - "test_images = test_images / 255.0\n", - "\n", - "# Define the model architecture\n", - "model = keras.Sequential([\n", - " keras.layers.InputLayer(input_shape=(28, 28)),\n", - " keras.layers.Reshape(target_shape=(28, 28, 1)),\n", - " keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'),\n", - " keras.layers.MaxPooling2D(pool_size=(2, 2)),\n", - " keras.layers.Flatten(),\n", - " keras.layers.Dense(10)\n", - "])\n", - "\n", - "# Train the digit classification model\n", - "model.compile(optimizer='adam',\n", - " loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", - " metrics=['accuracy'])\n", - "model.fit(\n", - " train_images,\n", - " train_labels,\n", - " epochs=1,\n", - " validation_data=(test_images, test_labels)\n", - ")" + "## Generate a TensorFlow Model" ] }, { @@ -203,7 +142,94 @@ "id": "5NMaNZQCkW9X" }, "source": [ - "This training won't take long because you're training the model for just a single epoch, which trains to about 96% accuracy." + "We'll build a simple model to classify numbers from the [MNIST dataset](https://www.tensorflow.org/datasets/catalog/mnist).\n", + "\n", + "This training won't take long because you're training the model for just a 5 epochs, which trains to about ~98% accuracy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "height": 51 + }, + "colab_type": "code", + "id": "eMsw_6HujaqM", + "outputId": "0f362bef-a5b8-46f2-c41c-cba008998b72" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz\n", + "11493376/11490434 [==============================] - 0s 0us/step\n", + "Epoch 1/5\n", + "1875/1875 [==============================] - 5s 2ms/step - loss: 0.2793 - accuracy: 0.9227 - val_loss: 0.1392 - val_accuracy: 0.9618\n", + "Epoch 2/5\n", + "1875/1875 [==============================] - 5s 2ms/step - loss: 0.1179 - accuracy: 0.9667 - val_loss: 0.0928 - val_accuracy: 0.9719\n", + "Epoch 3/5\n", + "1875/1875 [==============================] - 4s 2ms/step - loss: 0.0860 - accuracy: 0.9754 - val_loss: 0.0742 - val_accuracy: 0.9755\n", + "Epoch 4/5\n", + "1875/1875 [==============================] - 4s 2ms/step - loss: 0.0691 - accuracy: 0.9796 - val_loss: 0.0686 - val_accuracy: 0.9776\n", + "Epoch 5/5\n", + "1875/1875 [==============================] - 4s 2ms/step - loss: 0.0589 - accuracy: 0.9823 - val_loss: 0.0654 - val_accuracy: 0.9787\n" + ] + }, + { + "data": { + "text/plain": [ + "\u003ctensorflow.python.keras.callbacks.History at 0x7f69e0275a58\u003e" + ] + }, + "execution_count": null, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "# Load MNIST dataset\n", + "mnist = tf.keras.datasets.mnist\n", + "(train_images, train_labels), (test_images, test_labels) = mnist.load_data()\n", + "\n", + "# Normalize the input image so that each pixel value is between 0 to 1.\n", + "train_images = train_images.astype(np.float32) / 255.0\n", + "test_images = test_images.astype(np.float32) / 255.0\n", + "\n", + "# Define the model architecture\n", + "model = tf.keras.Sequential([\n", + " tf.keras.layers.InputLayer(input_shape=(28, 28)),\n", + " tf.keras.layers.Reshape(target_shape=(28, 28, 1)),\n", + " tf.keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'),\n", + " tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),\n", + " tf.keras.layers.Flatten(),\n", + " tf.keras.layers.Dense(10)\n", + "])\n", + "\n", + "# Train the digit classification model\n", + "model.compile(optimizer='adam',\n", + " loss=tf.keras.losses.SparseCategoricalCrossentropy(\n", + " from_logits=True),\n", + " metrics=['accuracy'])\n", + "model.fit(\n", + " train_images,\n", + " train_labels,\n", + " epochs=5,\n", + " validation_data=(test_images, test_labels)\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "KuTEoGFYd8aM" + }, + "source": [ + "## Convert to a TensorFlow Lite model" ] }, { @@ -213,16 +239,16 @@ "id": "xl8_fzVAZwOh" }, "source": [ - "### Convert to a TensorFlow Lite model\n", + "Now you can convert the trained model to TensorFlow Lite format using the [`TFLiteConverter`](https://www.tensorflow.org/lite/convert/python_api) API, and apply varying degrees of quantization.\n", "\n", - "Using the Python [TFLiteConverter](https://www.tensorflow.org/lite/convert/python_api), you can now convert the trained model into a TensorFlow Lite model.\n", + "Beware that some versions of quantization leave some of the data in float format. So the following sections show each option with increasing amounts of quantization, until we get a model that's entirely int8 or uint8 data. (Notice we duplicate some code in each section so you can see all the quantization steps for each option.)\n", "\n", - "Now load the model using the `TFLiteConverter`:" + "First, here's a converted model with no quantization:" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", @@ -231,63 +257,10 @@ "outputs": [], "source": [ "converter = tf.lite.TFLiteConverter.from_keras_model(model)\n", + "\n", "tflite_model = converter.convert()" ] }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "F2o2ZfF0aiCx" - }, - "source": [ - "Write it out to a `.tflite` file:" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "vptWZq2xnclo" - }, - "outputs": [], - "source": [ - "tflite_models_dir = pathlib.Path(\"/tmp/mnist_tflite_models/\")\n", - "tflite_models_dir.mkdir(exist_ok=True, parents=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "colab": { - "height": 34 - }, - "colab_type": "code", - "id": "Ie9pQaQrn5ue", - "outputId": "8580b835-61f0-42b3-a21e-b8d476042c11" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "84528" - ] - }, - "execution_count": 22, - "metadata": { - "tags": [] - }, - "output_type": "execute_result" - } - ], - "source": [ - "tflite_model_file = tflite_models_dir/\"mnist_model.tflite\"\n", - "tflite_model_file.write_bytes(tflite_model)" - ] - }, { "cell_type": "markdown", "metadata": { @@ -295,25 +268,81 @@ "id": "7BONhYtYocQY" }, "source": [ - "Now you have a trained MNIST model that's converted to a `.tflite` file, but it's still using 32-bit float values for all parameter data.\n", - "\n", - "So let's convert the model again, this time using quantization...\n", - "\n", - "#### Convert using quantization\n", - "First, first set the `optimizations` flag to optimize for size:" + "It's now a TensorFlow Lite model, but it's still using 32-bit float values for all parameter data." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "jPYZwgZTwJMT" + }, + "source": [ + "### Convert using dynamic range quantization\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Hjvq1vpJd4U_" + }, + "source": [ + "Now let's enable the default `optimizations` flag to quantize all fixed parameters (such as weights):" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": { - "colab": {}, + "colab": { + "height": 34 + }, "colab_type": "code", - "id": "HEZ6ET1AHAS3" + "id": "HEZ6ET1AHAS3", + "outputId": "82a75458-10d2-484a-8e09-a8af56212e10" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmpcojyiqri/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmpcojyiqri/assets\n" + ] + } + ], "source": [ - "converter.optimizations = [tf.lite.Optimize.DEFAULT]" + "converter = tf.lite.TFLiteConverter.from_keras_model(model)\n", + "converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "\n", + "tflite_model_quant = converter.convert()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "o5wuE-RcdX_3" + }, + "source": [ + "The model is now a bit smaller with quantized weights, but other variable data is still in float format." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "UgKDdnHQEhpb" + }, + "source": [ + "### Convert using float fallback quantization" ] }, { @@ -323,107 +352,87 @@ "id": "rTe8avZJHMDO" }, "source": [ - "Now, in order to create quantized values with an accurate dynamic range of activations, you need to provide a representative dataset.\n", + "To quantize the variable data (such as model input/output and intermediates between layers), you need to provide a [`RepresentativeDataset`](https://www.tensorflow.org/api_docs/python/tf/lite/RepresentativeDataset). This is a generator function that provides a set of input data that's large enough to represent typical values. It allows the converter to estimate a dynamic range for all the variable data. (The dataset does not need to be unique compared to the training or evaluation dataset.)\n", "To support multiple inputs, each representative data point is a list and elements in the list are fed to the model according to their indices.\n" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", - "id": "FiwiWU3gHdkW" - }, - "outputs": [], - "source": [ - "mnist_train, _ = tf.keras.datasets.mnist.load_data()\n", - "images = tf.cast(mnist_train[0], tf.float32) / 255.0\n", - "mnist_ds = tf.data.Dataset.from_tensor_slices((images)).batch(1)\n", - "def representative_data_gen():\n", - " for input_value in mnist_ds.take(100):\n", - " # Model has only one input so each data point has one element.\n", - " yield [input_value]\n", - "\n", - "converter.representative_dataset = representative_data_gen" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "xW84iMYjHd9t" - }, - "source": [ - "Finally, convert the model to TensorFlow Lite format:" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "colab": { - "height": 34 - }, - "colab_type": "code", - "id": "yuNfl3CoHNK3", - "outputId": "79a19679-87a2-4dc6-eee4-b33f3e5c1c5d" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "24720" - ] - }, - "execution_count": 25, - "metadata": { - "tags": [] - }, - "output_type": "execute_result" - } - ], - "source": [ - "tflite_model_quant = converter.convert()\n", - "tflite_model_quant_file = tflite_models_dir/\"mnist_model_quant.tflite\"\n", - "tflite_model_quant_file.write_bytes(tflite_model_quant)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "PhMmUTl4sbkz" - }, - "source": [ - "Note how the resulting file is approximately `1/4` the size:" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "colab": { - "height": 85 - }, - "colab_type": "code", - "id": "JExfcfLDscu4", - "outputId": "58238f92-01b0-4faa-e293-35451d08dd7c" + "id": "FiwiWU3gHdkW", + "outputId": "61093d59-5b47-4e59-a577-46f056281bab" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "total 140K\n", - "-rw-rw-r-- 1 yashkatariya 10086651 25K Jun 23 06:06 mnist_model_quant_io.tflite\n", - "-rw-rw-r-- 1 yashkatariya 10086651 25K Jun 23 06:07 mnist_model_quant.tflite\n", - "-rw-rw-r-- 1 yashkatariya 10086651 83K Jun 23 06:06 mnist_model.tflite\n" + "INFO:tensorflow:Assets written to: /tmp/tmp1bvfr71i/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmp1bvfr71i/assets\n" ] } ], "source": [ - "!ls -lh {tflite_models_dir}" + "def representative_data_gen():\n", + " for input_value in tf.data.Dataset.from_tensor_slices(train_images).batch(1).take(100):\n", + " # Model has only one input so each data point has one element.\n", + " yield [input_value]\n", + "\n", + "converter = tf.lite.TFLiteConverter.from_keras_model(model)\n", + "converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "converter.representative_dataset = representative_data_gen\n", + "\n", + "tflite_model_quant = converter.convert()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "_GC3HFlptf7x" + }, + "source": [ + "Now all weights and variable data are quantized, and the model is significantly smaller compared to the original TensorFlow Lite model.\n", + "\n", + "However, to maintain compatibility with applications that traditionally use float model input and output tensors, the TensorFlow Lite Converter leaves the model input and output tensors in float:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "height": 51 + }, + "colab_type": "code", + "id": "id1OEKFELQwp", + "outputId": "024a710f-44cc-43d1-89a7-456a1727523c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "input: \u003cclass 'numpy.float32'\u003e\n", + "output: \u003cclass 'numpy.float32'\u003e\n" + ] + } + ], + "source": [ + "interpreter = tf.lite.Interpreter(model_content=tflite_model_quant)\n", + "input_type = interpreter.get_input_details()[0]['dtype']\n", + "print('input: ', input_type)\n", + "output_type = interpreter.get_output_details()[0]['dtype']\n", + "print('output: ', output_type)" ] }, { @@ -433,44 +442,75 @@ "id": "RACBJuj2XO8x" }, "source": [ - "Your model should now be fully quantized. However, if you convert a model that includes any operations that TensorFlow Lite cannot quantize, those ops are left in floating point. This allows for conversion to complete so you have a smaller and more efficient model, but the model won't be compatible with some ML accelerators that require full integer quantization. Also, by default, the converted model still use float input and outputs, which also is not compatible with some accelerators.\n", + "That's usually good for compatibility, but it won't be compatible with devices that perform only integer-based operations, such as the Edge TPU.\n", "\n", - "So to ensure that the converted model is fully quantized (make the converter throw an error if it encounters an operation it cannot quantize), and to use integers for the model's input and output, you need to convert the model again using these additional configurations:" + "Additionally, the above process may leave an operation in float format if TensorFlow Lite doesn't include a quantized implementation for that operation. This strategy allows conversion to complete so you have a smaller and more efficient model, but again, it won't be compatible with integer-only hardware. (All ops in this MNIST model have a quantized implementation.)\n", + "\n", + "So to ensure an end-to-end integer-only model, you need a couple more parameters..." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "FQgTqbvPvxGJ" + }, + "source": [ + "### Convert using integer-only quantization" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "mwR9keYAwArA" + }, + "source": [ + "To quantize the input and output tensors, and make the converter throw an error if it encounters an operation it cannot quantize, convert the model again with some additional parameters:" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "metadata": { "colab": { - "height": 34 + "height": 51 }, "colab_type": "code", "id": "kzjEjcDs3BHa", - "outputId": "8d7370ec-3f3f-41a2-8afb-4ecdd40e9efc" + "outputId": "0462645b-f8e1-489a-f703-8093f83645d5" }, "outputs": [ { - "data": { - "text/plain": [ - "24784" - ] - }, - "execution_count": 27, - "metadata": { - "tags": [] - }, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmpvnuxq9pa/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /tmp/tmpvnuxq9pa/assets\n" + ] } ], "source": [ + "def representative_data_gen():\n", + " for input_value in tf.data.Dataset.from_tensor_slices(train_images).batch(1).take(100):\n", + " yield [input_value]\n", + "\n", + "converter = tf.lite.TFLiteConverter.from_keras_model(model)\n", + "converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "converter.representative_dataset = representative_data_gen\n", + "# Ensure that if any ops can't be quantized, the converter throws an error\n", "converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]\n", + "# Set the input and output tensors to uint8 (APIs added in r2.3)\n", "converter.inference_input_type = tf.uint8\n", "converter.inference_output_type = tf.uint8\n", "\n", - "tflite_model_quant = converter.convert()\n", - "tflite_model_quant_file = tflite_models_dir/\"mnist_model_quant_io.tflite\"\n", - "tflite_model_quant_file.write_bytes(tflite_model_quant)" + "tflite_model_quant = converter.convert()" ] }, { @@ -480,9 +520,115 @@ "id": "wYd6NxD03yjB" }, "source": [ - "In this example, the resulting model size remains the same because all operations successfully quantized to begin with. However, this new model now uses quantized input and output, making it compatible with more accelerators, such as the Coral Edge TPU.\n", + "The internal quantization remains the same as above, but you can see the input and output tensors are now integer format:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "height": 51 + }, + "colab_type": "code", + "id": "PaNkOS-twz4k", + "outputId": "b7b22b48-c305-4b4c-80c6-506d9f3c2013" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "input: \u003cclass 'numpy.uint8'\u003e\n", + "output: \u003cclass 'numpy.uint8'\u003e\n" + ] + } + ], + "source": [ + "interpreter = tf.lite.Interpreter(model_content=tflite_model_quant)\n", + "input_type = interpreter.get_input_details()[0]['dtype']\n", + "print('input: ', input_type)\n", + "output_type = interpreter.get_output_details()[0]['dtype']\n", + "print('output: ', output_type)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "TO17AP84wzBb" + }, + "source": [ + "Now you have an integer quantized model that uses integer data for the model's input and output tensors, so it's compatible with integer-only hardware such as the [Edge TPU](https://coral.ai)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "sse224YJ4KMm" + }, + "source": [ + "### Save the models as files" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "4_9nZ4nv4b9P" + }, + "source": [ + "You'll need a `.tflite` file to deploy your model on other devices. So let's save the converted models to files and then load them when we run inferences below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "height": 34 + }, + "colab_type": "code", + "id": "BEY59dC14uRv", + "outputId": "20a3397a-1466-48eb-f421-adc8ebf3f60f" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "24720" + ] + }, + "execution_count": null, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "import pathlib\n", "\n", - "In the following sections, notice that we are now handling two TensorFlow Lite models: `tflite_model_file` is the converted model that still uses floating-point parameters, and `tflite_model_quant_file` is the same model converted with full integer quantization, including uint8 input and output." + "tflite_models_dir = pathlib.Path(\"/tmp/mnist_tflite_models/\")\n", + "tflite_models_dir.mkdir(exist_ok=True, parents=True)\n", + "\n", + "# Save the unquantized/float model:\n", + "tflite_model_file = tflite_models_dir/\"mnist_model.tflite\"\n", + "tflite_model_file.write_bytes(tflite_model)\n", + "# Save the quantized model:\n", + "tflite_model_quant_file = tflite_models_dir/\"mnist_model_quant.tflite\"\n", + "tflite_model_quant_file.write_bytes(tflite_model_quant)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "9t9yaTeF9fyM" + }, + "source": [ + "## Run the TensorFlow Lite models" ] }, { @@ -492,50 +638,56 @@ "id": "L8lQHMp_asCq" }, "source": [ - "## Run the TensorFlow Lite models\n", + "Now we'll run inferences using the TensorFlow Lite [`Interpreter`](https://www.tensorflow.org/api_docs/python/tf/lite/Interpreter) to compare the model accuracies.\n", "\n", - "Run the TensorFlow Lite model using the Python TensorFlow Lite\n", - "Interpreter. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Ap_jE7QRvhPf" - }, - "source": [ - "### Load the model into the interpreters" + "First, we need a function that runs inference with a given model and images, and then returns the predictions:\n" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", - "id": "Jn16Rc23zTss" + "id": "X092SbeWfd1A" }, "outputs": [], "source": [ - "interpreter = tf.lite.Interpreter(model_path=str(tflite_model_file))\n", - "interpreter.allocate_tensors()" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "J8Pztk1mvNVL" - }, - "outputs": [], - "source": [ - "interpreter_quant = tf.lite.Interpreter(model_path=str(tflite_model_quant_file))\n", - "interpreter_quant.allocate_tensors()\n", - "input_index_quant = interpreter_quant.get_input_details()[0][\"index\"]\n", - "output_index_quant = interpreter_quant.get_output_details()[0][\"index\"]" + "# Helper function to run inference on a TFLite model\n", + "def run_tflite_model(tflite_file, test_image_indices):\n", + " global test_images\n", + "\n", + " # Initialize the interpreter\n", + " interpreter = tf.lite.Interpreter(model_path=str(tflite_file))\n", + " interpreter.allocate_tensors()\n", + "\n", + " input_details = interpreter.get_input_details()[0]\n", + " output_details = interpreter.get_output_details()[0]\n", + "\n", + " predictions = np.zeros((len(test_image_indices),), dtype=int)\n", + " for i, test_image_index in enumerate(test_image_indices):\n", + " test_image = test_images[test_image_index]\n", + " test_label = test_labels[test_image_index]\n", + "\n", + " # Check if the input type is quantized, then rescale input data to uint8\n", + " if input_details['dtype'] == np.uint8:\n", + " input_scale, input_zero_point = input_details[\"quantization\"]\n", + " test_image = test_image / input_scale + input_zero_point\n", + "\n", + " test_image = np.expand_dims(test_image, axis=0).astype(input_details[\"dtype\"])\n", + " interpreter.set_tensor(input_details[\"index\"], test_image)\n", + " interpreter.invoke()\n", + " output = interpreter.get_tensor(output_details[\"index\"])[0]\n", + "\n", + " # Check if the output type is quantized, then rescale output data to float\n", + " if output_details['dtype'] == np.uint8:\n", + " output_scale, output_zero_point = output_details[\"quantization\"]\n", + " test_image = test_image.astype(np.float32)\n", + " test_image = test_image / input_scale + input_zero_point\n", + "\n", + " predictions[i] = output.argmax()\n", + "\n", + " return predictions\n" ] }, { @@ -546,62 +698,88 @@ }, "source": [ "### Test the models on one image\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "QpPpFPaz7eEM" + }, + "source": [ + "Now we'll compare the performance of the float model and quantized model:\n", + "+ `tflite_model_file` is the original TensorFlow Lite model with floating-point data.\n", + "+ `tflite_model_quant_file` is the last model we converted using integer-only quantization (it uses uint8 data for input and output).\n", "\n", - "First test it on the float model:" + "Let's create another function to print our predictions:" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", - "id": "AKslvo2kwWac" + "id": "zR2cHRUcUZ6e" }, "outputs": [], "source": [ - "test_image = np.expand_dims(test_images[0], axis=0).astype(np.float32)\n", + "import matplotlib.pylab as plt\n", "\n", - "input_index = interpreter.get_input_details()[0][\"index\"]\n", - "output_index = interpreter.get_output_details()[0][\"index\"]\n", - "interpreter.set_tensor(input_index, test_image)\n", - "interpreter.invoke()\n", - "predictions = interpreter.get_tensor(output_index)" + "# Change this to test a different image\n", + "test_image_index = 1\n", + "\n", + "## Helper function to test the models on one image\n", + "def test_model(tflite_file, test_image_index, model_type):\n", + " global test_labels\n", + "\n", + " predictions = run_tflite_model(tflite_file, [test_image_index])\n", + "\n", + " plt.imshow(test_images[test_image_index])\n", + " template = model_type + \" Model \\n True:{true}, Predicted:{predict}\"\n", + " _ = plt.title(template.format(true= str(test_labels[test_image_index]), predict=str(predictions[0])))\n", + " plt.grid(False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "A5OTJ_6Vcslt" + }, + "source": [ + "Now test the float model:" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "metadata": { "colab": { - "height": 281 + "height": 296 }, "colab_type": "code", - "id": "XZClM2vo3_bm", - "outputId": "3af2e31c-44c6-41f2-c51f-da9d7b71bdfb" + "id": "iTK0x980coto", + "outputId": "1881b045-e953-416f-a25f-6c083409c7be" }, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAEICAYAAACQ6CLfAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFxZJREFUeJzt3XtU1HXeB/D3cE0RVDSG4eKMPJBL\nIrI6ZqXhBTFrVwwpw5WEAGnLc9ZL2nbbI1arPPV4nix99jRR7aiFz7qmtIu6KhulVrJj4baYHiKI\nq6DCE4pyG7/PH51mI5nf4DAX9Pt+neM5zO/z/f2+H37ynt/M/GbmpxJCCBCRdDzc3QARuQfDTyQp\nhp9IUgw/kaQYfiJJMfxEkmL4yeF6enqgUqlQXV0NAMjOzsaGDRucPm9+fj5mzpzp9HluFgy/nYYN\nG2b55+HhgSFDhlhuv/vuu06fPzs7u1cPvr6+GDlypNPntUd+fj6effZZm+OmT5+OP/7xj07p4Ztv\nvum1v4YNGwaVSoXNmzc7Zb4bgZe7G7hRXbp0yfKzTqdDfn4+5syZY3V8T08PvLwct7vz8/ORn59v\nuZ2WloahQ4c6bPs/Zjab4enp6ZRtu0pERESv/7Ovv/4a48aNw8KFC93YlXvxyO8kzz//PB5++GEs\nXrwY/v7+2LFjB9LS0pCbm2sZc/jwYeh0Osvturo6JCcn49Zbb8XYsWOxdevWfs118eJF7NmzB+np\n6f0a/8O8L7zwAkaNGoWxY8di586dlnpaWhqWL1+OefPmwc/PD0eOHEFHRwdWr16N8PBwqNVqPPHE\nE+jo6LCsk5eXh+DgYISGhsJoNPaa76e/9/vvv4+4uDgEBAQgMjISBw8exG9/+1t8+umn+PWvf41h\nw4Zh5cqVAIBTp05hzpw5CAwMxM9+9jPs3r3bsp1z587hl7/8JQICAnDnnXeiqqqqX78/ABiNRsye\nPRvh4eH9XuemI2jAtFqtOHToUK9lzz33nPD29hYffPCBMJvN4vLly2LJkiVi3bp1ljGHDh0SWq1W\nCCFET0+PmDhxovj9738vOjs7RUVFhdBqteLw4cNCCCFKSkrEqFGj+pz/rbfeEpGRkf3u99ChQ8LT\n01OsWbNGdHR0iOLiYjFkyBBRUVEhhBBiyZIlYsSIEeKTTz4RZrNZdHR0iOXLl4sHHnhAtLS0iO++\n+07cd9994vnnnxdCCPGXv/xFBAcHi/LycnHp0iXx0EMPCQCiqqrKsr0ffu9jx46J4cOHi8OHDwuz\n2SxqamrE6dOnhRBCTJs2TbzzzjuWPtva2kRISIgwGo2iu7tbmEwmERgYaBmfkpIiUlNTRXt7uzh5\n8qQIDg4WM2bMsKw/b9488corr1zz+1+9elVotVqxffv2fu+zmxHD7wDWwj9r1qxey5TCf/ToUTF2\n7Nhe41944QWRnZ1tc/74+Hjx4osv9rvfQ4cOCW9vb9He3m5ZlpycLDZs2GDp89FHH7XUzGaz8PX1\nFdXV1ZZlH3/8seUO55FHHhHPPfecpVZeXm41/JmZmWLNmjV99vXT8O/YsUPMnDmz15jMzEzx0ksv\nia6uLuHp6Wm5wxJCiLVr1/YKvzV///vfhb+/f6/fX0Z8zu9E1/OQ8ttvv0VNTQ1GjBhhWWY2m22+\nel1VVYWjR49i27Zt19XbqFGjer1GoNVq0dDQYLn9497Pnj2Lzs5OTJw40bJM/OjzYA0NDZg2bVqv\nbVlTW1uLKVOm9KvHb7/9FseOHeu1T3p6epCRkYGmpiaYzeZefWq1WpSWltrcrtFoxEMPPeS010hu\nFAy/E6lUql63/fz8cPnyZcvts2fPWn4ODw9HVFQUvvrqq+uaY9u2bZgxY4Zi4Ppy4cIFXLlyBUOG\nDAEA1NTUQK/X99m7Wq2Gj48Pzpw5A7Vafc22NBoNamtrLbdramqszhseHo7Kyso+az/dX+Hh4UhI\nSMD+/fuvGdvd3Q0PDw/U1tYiMjLS5rw/aG9vx+7du1FUVGRz7M2OL/i5UFxcHIqKitDa2orGxka8\n9tprltpdd90FHx8fbNq0CR0dHTCbzfjyyy9x4sQJxW1u27YNGRkZ1yxPS0tDdna21fWuXr2K3Nxc\ndHV1oaSkBPv378eDDz7Y51hPT09kZ2dj5cqVOHfuHIQQqKurw8GDBwEAixYtwttvv43Tp0+jvb0d\n69evtzpvVlYW8vPz8eGHH+Lq1auoq6vDmTNnAHx/J/PNN99YxiYlJaG8vBzvvfceuru70d3djdLS\nUpw5cwbe3t544IEHsG7dOly5cgX/+te/sH37dsV9BQC7d+9GUFAQ7rnnHptjb3YMvwtlZGQgOjoa\nWq0W8+bNQ2pqqqXm5eWFffv2obS0FDqdDqNHj8Zjjz2GtrY2AEBJSUmvh78AcOTIETQ1NSElJeWa\nuWpra3s9FP+psLAw+Pn5QaPRID09Hfn5+YiKirI6ftOmTdBqtbjjjjswfPhwzJ07FxUVFQCA+fPn\nY/ny5ZgxYwZuu+02JCYmWt3O3XffjTfffBO/+c1vMHz4cMyaNcvyqGHlypUoKCjAiBEjsHr1agwf\nPhx/+9vfsGPHDmg0GgQHB+OZZ55BZ2cnAOAPf/gDWltboVarkZWVhUcffbTXXHPnzsXLL7/ca5nR\naMTSpUuveZQhI5UQ/DKPm01HRwd+/vOf48svv+zzvQWHDx9Gdna25R14JCc+578J3XLLLdf92gHJ\nhw/7iSTFh/1EkuKRn0hSLn3O76PyxS3wc+WURFLpQDu6RGe/xg4o/AcOHMCKFStgNpuRnZ2Np59+\nWnH8LfDDVFXCQKYkIgXHRXG/x9r9sN9sNmP58uXYv38/Tp06hYKCApw6dcrezRGRi9kd/tLSUkRG\nRiIiIgI+Pj5ITU1FYWGhI3sjIieyO/z19fW9PlQRFhaG+vr6a8YZDAbo9Xro9Xp0o3/PRYjI+ewO\nf19nCPt6y2ROTg5MJhNMJhO84WvvdETkYHaHPywsrNcnuerq6hASEuKQpojI+ewO/5QpU1BRUYGq\nqip0dXVh586dSEpKcmRvROREdp/q8/LywpYtW3DvvffCbDYjMzMT48ePd2RvRORELn17b4AqkOf5\niZzouChGm2jp11i+vZdIUgw/kaQYfiJJMfxEkmL4iSTF8BNJiuEnkhTDTyQphp9IUgw/kaQYfiJJ\nMfxEkmL4iSTF8BNJiuEnkhTDTyQphp9IUgw/kaQYfiJJMfxEkmL4iSTF8BNJiuEnkhTDTyQphp9I\nUgw/kaQYfiJJMfxEkmL4iSTF8BNJymsgK+t0Ovj7+8PT0xNeXl4wmUyO6ouInGxA4QeADz/8EKNH\nj3ZEL0TkQnzYTySpAYVfpVJh7ty5mDx5MgwGQ59jDAYD9Ho99Ho9utE5kOmIyIFUQghh78oNDQ0I\nCQlBc3MzEhMT8frrryM+Pt7q+ABVIKaqEuydjohsOC6K0SZa+jV2QEf+kJAQAEBQUBCSk5NRWlo6\nkM0RkQvZHf729nZcvHjR8vPBgwcRExPjsMaIyLnsfrW/qakJycnJAICenh786le/wrx58xzWGBE5\nl93hj4iIwMmTJx3ZCxG5EE/1EUmK4SeSFMNPJCmGn0hSDD+RpAb8wR5ZXFh2l9XamEe+Vlz3dLNa\nsd7V6a1YDy1Qrg+tu2S1drXslOK6JC8e+YkkxfATSYrhJ5IUw08kKYafSFIMP5GkGH4iSfE8fz89\ntfY9q7UUv1bllf9jgJPPVC5X91y2Wtt8btYAJ79xlTZrrdb8Ng1XXNer+ISj2xl0eOQnkhTDTyQp\nhp9IUgw/kaQYfiJJMfxEkmL4iSQ1oCv2XK8b+Yo97Q9OtVo7H6t8HzryK+Vd3BqtUqz7xP6fYv3l\nmPet1hKHXFFct+jyMMX6L4Za/66AgboiuhTrxzv9FOszb+m2e+7IoscU67fl/MPubbuTy67YQ0Q3\nLoafSFIMP5GkGH4iSTH8RJJi+IkkxfATSYqf5+8nvz8fV6gNbNsBA1sdrwfPtFp7aZpOee6PlK85\n8PLMSDs66h+vK1cV637/bFSsj/p4t2J9go/16x0MrVa+FoIMbB75MzMzERQUhJiYGMuylpYWJCYm\nIioqComJiWhttfFlFkQ06NgMf0ZGBg4cONBrWV5eHhISElBRUYGEhATk5eU5rUEicg6b4Y+Pj0dg\nYGCvZYWFhUhPTwcApKenY+/evc7pjoicxq7n/E1NTdBoNAAAjUaD5uZmq2MNBgMMBgMAoBud9kxH\nRE7g9Ff7c3JyYDKZYDKZ4A1fZ09HRP1kV/jVajUaG79/JbaxsRFBQUEObYqInM+u8CclJcFoNAIA\njEYjFixY4NCmiMj5bD7nX7x4MUpKSnD+/HmEhYVh/fr1ePrpp7Fo0SK89dZbGDNmDHbt2uWKXsmK\nnrNNVmt+u63XAMBsY9t+f75gR0eO0ZR9l2J9vI/yn+9/tYyzWtO9843iuj2K1ZuDzfAXFBT0uby4\nuNjhzRCR6/DtvUSSYviJJMXwE0mK4SeSFMNPJCl+pJfcxksbrljf8uwWxbq3ylOxvmvzHKu1UY2f\nKq4rAx75iSTF8BNJiuEnkhTDTyQphp9IUgw/kaQYfiJJ8Tw/uc3pVaGK9Sm+ypcuL+9Svvx44KnL\n192TTHjkJ5IUw08kKYafSFIMP5GkGH4iSTH8RJJi+IkkxfP85FSdv5hitfb5g/9tY23lKzw9vmKF\nYn3IJ6U2ti83HvmJJMXwE0mK4SeSFMNPJCmGn0hSDD+RpBh+IknxPD85Vc191o8vw1TK5/EXVyUq\n1oceOKlYF4pVsnnkz8zMRFBQEGJiYizLcnNzERoairi4OMTFxWHfvn1ObZKIHM9m+DMyMnDgwIFr\nlq9atQplZWUoKyvD/fff75TmiMh5bIY/Pj4egYGBruiFiFzI7hf8tmzZgtjYWGRmZqK1tdXqOIPB\nAL1eD71ej2502jsdETmYXeF//PHHUVlZibKyMmg0Gjz55JNWx+bk5MBkMsFkMsHbxgc1iMh17Aq/\nWq2Gp6cnPDw8sGzZMpSW8tNTRDcau8Lf2Nho+XnPnj29zgQQ0Y3B5nn+xYsXo6SkBOfPn0dYWBjW\nr1+PkpISlJWVQaVSQafT4Y033nBFrzQIefj7K9Yfueeo1Vrb1Q7FdZs3RCjWfTv/oVgnZTbDX1BQ\ncM2yrKwspzRDRK7Dt/cSSYrhJ5IUw08kKYafSFIMP5Gk+JFeGpCK3PGK9b+O/h+rtQUVKYrr+u7j\nqTxn4pGfSFIMP5GkGH4iSTH8RJJi+IkkxfATSYrhJ5IUz/OTou/S7lSs//Ph1xTrlT3dVmuX/jNM\ncV1fNCrWaWB45CeSFMNPJCmGn0hSDD+RpBh+Ikkx/ESSYviJJMXz/JLzCg1RrK/83f8q1n1Vyn9C\nqScfsVq7dT8/r+9OPPITSYrhJ5IUw08kKYafSFIMP5GkGH4iSTH8RJKyeZ6/trYWS5cuxdmzZ+Hh\n4YGcnBysWLECLS0tePjhh1FdXQ2dToc//elPGDlypCt6puug8lL+L5741zrF+kPDLijW370YpFhX\n/8768eWq4prkbDaP/F5eXti0aRO++uorfPbZZ9i6dStOnTqFvLw8JCQkoKKiAgkJCcjLy3NFv0Tk\nIDbDr9FoMGnSJACAv78/oqOjUV9fj8LCQqSnpwMA0tPTsXfvXud2SkQOdV3P+aurq/HFF19g6tSp\naGpqgkajAfD9HURzc7NTGiQi5+j3e/svXbqElJQUvPrqqwgICOj3BAaDAQaDAQDQjc7r75CInKJf\nR/7u7m6kpKRgyZIlWLhwIQBArVajsfH7L1hsbGxEUFDfL/zk5OTAZDLBZDLBG74OapuIBspm+IUQ\nyMrKQnR0NFavXm1ZnpSUBKPRCAAwGo1YsGCB87okIodTCSGE0oCjR4/innvuwYQJE+Dh8f19xYYN\nGzB16lQsWrQINTU1GDNmDHbt2oXAwEDFyQJUgZiqSnBc92STarLyJbSLPtg+oO3f/cxyxfqIbZ8O\naPt0fY6LYrSJln6Ntfmcf/r06bB2/1BcXHx9nRHRoMF3+BFJiuEnkhTDTyQphp9IUgw/kaQYfiJJ\n8au7bwKet99mtZazs3BA2779beXz+Lrtnw1o++Q+PPITSYrhJ5IUw08kKYafSFIMP5GkGH4iSTH8\nRJLief6bwOknrH9l+vyhbQPadlhJl/IA5a+DoEGMR34iSTH8RJJi+IkkxfATSYrhJ5IUw08kKYaf\nSFI8z38D6Jh/h2K9eP4mhepQxzZDNw0e+YkkxfATSYrhJ5IUw08kKYafSFIMP5GkGH4iSdk8z19b\nW4ulS5fi7Nmz8PDwQE5ODlasWIHc3Fy8+eabuPXWWwEAGzZswP333+/0hmXUMM1TsT7Gy/5z+e9e\nDFKse7cpf56fn+a/cdkMv5eXFzZt2oRJkybh4sWLmDx5MhITEwEAq1atwpo1a5zeJBE5ns3wazQa\naDQaAIC/vz+io6NRX1/v9MaIyLmu6zl/dXU1vvjiC0ydOhUAsGXLFsTGxiIzMxOtra19rmMwGKDX\n66HX69GNzoF3TEQO0e/wX7p0CSkpKXj11VcREBCAxx9/HJWVlSgrK4NGo8GTTz7Z53o5OTkwmUww\nmUzwhq/DGieigelX+Lu7u5GSkoIlS5Zg4cKFAAC1Wg1PT094eHhg2bJlKC0tdWqjRORYNsMvhEBW\nVhaio6OxevVqy/LGxkbLz3v27EFMTIxzOiQip7D5gt+xY8ewfft2TJgwAXFxcQC+P61XUFCAsrIy\nqFQq6HQ6vPHGG05vlq7fxgu3K9Y/vVenWBeNXzqwGxpMbIZ/+vTpEH18NzvP6RPd2PgOPyJJMfxE\nkmL4iSTF8BNJiuEnkhTDTyQplejrPJ6TBKgCMVWV4KrpiKRzXBSjTbT0ayyP/ESSYviJJMXwE0mK\n4SeSFMNPJCmGn0hSDD+RpFx6iW6fUR5o1VVZbp87d87y1d+DzWDtbbD2BbA3ezmyN5/q/h/PXfom\nn5/S6/UwmUzuml7RYO1tsPYFsDd7uas3PuwnkhTDTyQpz9zc3Fx3NjB58mR3Tq9osPY2WPsC2Ju9\n3NGbW5/zE5H78GE/kaQYfiJJuSX8Bw4cwLhx4xAZGYm8vDx3tGCVTqezXKNAr9e7tZfMzEwEBQX1\nuiBKS0sLEhMTERUVhcTERKvXSHRHb7m5uQgNDUVcXBzi4uKwb98+t/RWW1uLWbNmITo6GuPHj8fm\nzZsBuH/fWevLbftNuFhPT4+IiIgQlZWVorOzU8TGxory8nJXt2GVVqsV586dc3cbQgghPvroI3Hi\nxAkxfvx4y7K1a9eKjRs3CiGE2Lhxo3jqqacGTW/r1q0Tr7zyilv6+bGGhgZx4sQJIYQQbW1tIioq\nSpSXl7t931nry137zeVH/tLSUkRGRiIiIgI+Pj5ITU1FYWGhq9u4IcTHxyMwMLDXssLCQqSnpwMA\n0tPTsXfvXne01mdvg4VGo8GkSZMA9L6svLv3nbW+3MXl4a+vr0d4eLjldlhYmFt3wE+pVCrMnTsX\nkydPhsFgcHc712hqaoJGowHw/R9Tc3OzmzvqrT+XbXelH19WfjDtO3sud+9oLg+/6OPMokqlcnUb\nVh07dgyff/459u/fj61bt+Ljjz92d0s3jP5ett1VfnpZ+cHC3svdO5rLwx8WFoba2lrL7bq6OoSE\nhLi6Dat+6CUoKAjJycmD7tLjarXacoXkxsZGBAUFubmjfxtMl223dll5d++7wXS5e5eHf8qUKaio\nqEBVVRW6urqwc+dOJCUlubqNPrW3t+PixYuWnw8ePDjoLj2elJQEo9EIADAajViwYIGbO/q3wXLZ\ndmHlsvLu3nfW+nLbfnP5S4xCiKKiIhEVFSUiIiLESy+95I4W+lRZWSliY2NFbGysuP32293eW2pq\nqggODhZeXl4iNDRU5Ofni/Pnz4vZs2eLyMhIMXv2bHHhwoVB01taWpqIiYkREyZMEPPnzxcNDQ1u\n6e3IkSMCgJgwYYKYOHGimDhxoigqKnL7vrPWl7v2G9/eSyQpvsOPSFIMP5GkGH4iSTH8RJJi+Ikk\nxfATSYrhJ5LU/wOdAGX9nfSgHgAAAABJRU5ErkJggg==\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAEXCAYAAABrgzLrAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAVZUlEQVR4nO3de9RVdZ3H8fcH5aKICsIQIEFeWN5mxGK8pGM2aBplWtNYTBk2GjVljrOYlWatpEmdVpNZM5VGaqJ5ibximomUYxqhaCgqlTcU6EE0YEArLo/f+WPvpw6Pz9nn4dwffp/XWmdxzv7ty5cDn7Ovv70VEZjZ9q9fqwsws+Zw2M0S4bCbJcJhN0uEw26WCIfdLBEOex8habykkLRjq2uplqSrJF3Qy3GXSTq20TWlxGFvM/l/8j9KeqXkNbrOywhJ+xS0n5aPc0m34Sflw6+qZz3WHA57ezoxInYpef2uBTU8A5zSbUtiGvDbFtRideCw91GSRkuaK2mNpKclfayk7VBJCyStk9Qh6ZuSBuRt9+WjPZpvNXygzCJWAUuA4/PphgFvBeZ2q+M9kp7Il3WvpP1L2g6R9IikDZJ+AAzqNu27JS3Op/2FpL+p8WuxAg5733UDsAIYDbwfuEjS3+dtncC/AcOBI4DJwCcBIuLofJyD862GHxQs42rgI/n7DwK3ARu7GiVNAK4HzgZGAHcCt0sakP+43ApcAwwDfgj8Q8m0hwBXAh8H9gC+A8yVNHCbvwnrFYe9Pd2ar+3WSbq1e6OkscCRwDkR8aeIWAxcTh7MiHg4In4ZEVsiYhlZkN5WRR23AMdI2i2f99Xd2j8A3BER8yJiM/BVYCeyLYDDgf7A1yNic0TcCDxUMu104DsRsTAiOiNiNtkPyeFV1Gm94LC3p5MjYvf8dXIP7aOBNRGxoWTY88AYyNa4kn4kaZWk9cBFZGv5bRIRfwTuAD4P7BERD/RQx/Ml478GLM/rGA2sjK17Wj1f8n4cMKPkR20dMDafzhrAYe+bfgcMkzSkZNgbgZX5+0uBXwP7RsSuwHmAqlzW1cAM4Ptl6hjX9UGSyAK7EugAxuTDSmvsshy4sORHbfeI2Dkirq+yTqvAYe+DImI58AvgPyUNyg9snc5fAjkEWA+8Imk/4F+6zeJFYK9eLu5/geOA/+mhbQ7wLkmTJfUn+1HYmNe2ANgCnCWpv6T3AYeWTPtd4BOSDlNmsKR3dfsBszpy2PuuqcB4srXrLcD5EXFP3vbvwD8BG8hC1f0g3Exgdr75fErRQiIzPyLW9ND2G+DDZD8ELwMnkp023BQRm4D3AacBa8j2728umXYR8DHgm8Ba4Ol8XGsQ+eYVZmnwmt0sEQ67WSIcdrNEOOxmiXDYrSFKu6hKOk/S5U1Y5jGSVjR6OX2Vw14jSW/s1h01JL1a8vnvGrjsd0m6Pz+FtkrS5b09T13SP76rzmWSzm1EnRFxUUSc0Yuaet3ffVtJGijpCknP5x1zFkt6ZyOW1a4c9hpFxAul3VHzwQeXDPt517gNuPHEbsAFZJeY7k92mep/beM8ds/rngp8QdIJ3UfoyzfMKLEj2VV7byP73j4PzJE0voU1NZXD3kD5TSAekHSJpN8DMyXNlPT9knG2ugONpN3yNVCHpJWSLpC0Q0/zj4jrIuKuiPhDRKwlu4DmyGpqjYgFwBPAQV2bw5LOkbQK+J6kfpLOlfSMpN9LmpN3e+36e5yarzV/L+lz3b6H7n/no/IureskLc+/p+nAh4DP5Fsat+fjjpZ0k6SXJD0n6ayS+eyUbw2slfQk8LcFf79XI2JmRCyLiNci4kfAc8Bbqvm++iKHvfEOA54FRgIX9mL8q8guM90HOAR4B3AG/HmXYZ2kN5aZ9miywG6T/HLVI4EDgV/lg99A1jV1HFkPtU8DJ5OtGUeTXfX2rXz6A8iuxz81b9sD2LPMssYBPya76m4EMBFYHBGzgGuBr+RbRCdK6gfcDjxKttUyGThb0vH57M4H9s5fx5PdXKN0Wd+W9O0ydYwEJlDF99VnRYRfdXwBAeyTvz8NeKFb+0zg+yWfx+fT7Ej2g7AR2KmkfSrws14s9ziyAE7oZZ1dy12XT7cUOCtvOwbYBAwqGX8pMLnk8yhgc173F4AbStoG59Mf2/3vDHwWuKVMTVcBF5R8PqyH7++zwPfy988CJ5S0TQdW9OLv3h+4h6yLbcv/zzTrtT3si7W75dsw7jiy/4gdJZ3F+lWah6TDgeuA90fEtt42anhEbOlh+EsR8adutd0i6bWSYZ1kP1CjS2uMiFfz3ZaejCW75VVvjANGK+v+2mUHoOs4yFbLZesutD3KtxauIfsxOrOXdWwXHPbG69754FVg55LPbyh5v5xszV4ugK+j7I4vc4F/joj5tRTaTfe6l+fL6N6nHUkdZAcIuz7vTLYp35PlbN37rdIyn4uIfcuM30H249G1KV5u96arLgFXkP1ATYnshhvJ8D578y0Gjs73v3cj2ywFICI6gLuBiyXtmh8U21tSj3eZkXQQcBfw6Yi4vYf2mZLurVPdlwEX5vvcSBoh6aS87Ubg3fmBtwHAf1D+/9a1wLGSTpG0o6Q9JE3M27p3vX0Q2JAfKNxJ0g6SDpLUdSBuDvBZSUMl7Ul2XKHIpWQ/SidGdmOOpDjsTRYR88i6nD4GPAz8qNsoHwEGAE+S7UvfSLZ/XHpOv2sNNoPsINcVJefLSw84jQVetyau0jfItiDulrQB+CXZPjUR8QTwKbJdiY687h4vbomIF4Apee1ryH78Ds6brwAOyA9C3hoRncC7yQ7iPUfWjfZyslNnAF8k23R/juxH8prSZUm6TNJl+ftxZPe7mwisKvm+PlTLl9KXuIvrdkzSYrKDauX2ny0hDrtZIrwZb5YIh90sEQ67WSKaep59gAbGIAY3c5FmSfkTr7IpNvZ42/Cawp73kPoG2VVNl0fEl4vGH8RgDtPkWhZpZgUWFlxXVfVmfN4T61vAO4EDgKl5hwgza0O17LMfCjwdEc9Gdo/wG4CTKkxjZi1SS9jHsHUnhBX5sK1Imi5pkaRFm//yAFAza7KGH42PiFkRMSkiJvXHT+M1a5Vawr6S7NrrLnvylwcLmlmbqSXsDwH7SnpT3tPpg2QdJcysDVV96i0itkg6E/gJ2am3K/PeT2bWhmo6zx4RdwJ31qkWM2sgXy5rlgiH3SwRDrtZIhx2s0Q47GaJcNjNEuGwmyXCYTdLhMNulgiH3SwRDrtZIhx2s0Q47GaJ8COb+4BlFxxR2N45qPwjvEYc+FLhtAsOvqmqmrrs/dOPFrYPeXCnsm0j//sXNS3bto3X7GaJcNjNEuGwmyXCYTdLhMNulgiH3SwRDrtZInyevQ2svWPfwvbHJ36zYcveXP4Ufa/8+u2XF7ZfO2lU2bY5895WOG3n0qeqqsl65jW7WSIcdrNEOOxmiXDYzRLhsJslwmE3S4TDbpYIn2dvgkrn0R+YeEPDln3Zur0K27+24LjC9vHjivvD333AzYXtHxrSUbbtwtOGF0671zk+z15PNYVd0jJgA9AJbImISfUoyszqrx5r9rdHxMt1mI+ZNZD32c0SUWvYA7hb0sOSpvc0gqTpkhZJWrSZjTUuzsyqVetm/FERsVLSXwHzJP06Iu4rHSEiZgGzAHbVsBq7XZhZtWpas0fEyvzP1cAtwKH1KMrM6q/qsEsaLGlI13vgHcDj9SrMzOqrls34kcAtkrrmc11E3FWXqvqYLZPfUtj+04O/VWEO/Qtbv752QmH7zz5QcMbzd6sLp52wdlFhe79BgwrbL1r414Xt5w1fUrZty9AthdNafVUd9oh4Fji4jrWYWQP51JtZIhx2s0Q47GaJcNjNEuGwmyXCXVzr4JUxAwrb+1X4Ta10au3e9xSf3up89jeF7bV4+ouHFLZfN+ziCnMYWLZlz7u8rmkmf9tmiXDYzRLhsJslwmE3S4TDbpYIh90sEQ67WSJ8nr0Odr96QWH7+xd9uLBda9cXtm/pWLaNFdXPGVPuKWzfpV/58+jWXrxmN0uEw26WCIfdLBEOu1kiHHazRDjsZolw2M0S4fPsTdD55G9bXUJZyy48orD99N2/WmEOxbeantFxeNm2IfcsLZy2s8KSbdt4zW6WCIfdLBEOu1kiHHazRDjsZolw2M0S4bCbJcLn2bdz604tPo/+wEeKz6Pv1q/4PPqCjTsUti++oPx953da/2DhtFZfFdfskq6UtFrS4yXDhkmaJ+mp/M+hjS3TzGrVm834q4ATug07F5gfEfsC8/PPZtbGKoY9Iu4D1nQbfBIwO38/Gzi5znWZWZ1Vu88+MiI68vergJHlRpQ0HZgOMIidq1ycmdWq5qPxERFAFLTPiohJETGpf8FD/syssaoN+4uSRgHkf66uX0lm1gjVhn0uMC1/Pw24rT7lmFmjVNxnl3Q9cAwwXNIK4Hzgy8AcSacDzwOnNLJIq97Lby67hwVUPo9eybR7zyhsn3Crz6W3i4phj4ipZZom17kWM2sgXy5rlgiH3SwRDrtZIhx2s0Q47GaJcBfX7cCmeePKti3Y7+IKUxefejt4wbTC9v1nPFPY7ttBtw+v2c0S4bCbJcJhN0uEw26WCIfdLBEOu1kiHHazRPg8ex+w417jC9u/tM8Py7YNrdCF9eGNxcse96XiM+Wda9cWz8DahtfsZolw2M0S4bCbJcJhN0uEw26WCIfdLBEOu1kifJ69D9h7zsrC9kMGVP+bPXX+JwrbJzz6UNXztvbiNbtZIhx2s0Q47GaJcNjNEuGwmyXCYTdLhMNulgifZ28Da6cdUdj+xZGV7v0+sGzLtGXHFk65/2eeLmz3fd+3HxXX7JKulLRa0uMlw2ZKWilpcf6a0tgyzaxWvdmMvwo4oYfhl0TExPx1Z33LMrN6qxj2iLgPWNOEWsysgWo5QHempMfyzfyh5UaSNF3SIkmLNlPhhmdm1jDVhv1SYG9gItABlD2CFBGzImJSREzqX3Agycwaq6qwR8SLEdEZEa8B3wUOrW9ZZlZvVYVd0qiSj+8FHi83rpm1h4rn2SVdDxwDDJe0AjgfOEbSRCCAZcDHG1hjn7fjmNGF7X931sLC9l36Vb/7s+DJfQrbJ6x1f/VUVAx7REztYfAVDajFzBrIl8uaJcJhN0uEw26WCIfdLBEOu1ki3MW1CZaeN7aw/dY33F7T/N++5B/LtrkLq3Xxmt0sEQ67WSIcdrNEOOxmiXDYzRLhsJslwmE3S4TPszfBw++5pMIYtd3BZ7dPvla2bcvatTXN27YfXrObJcJhN0uEw26WCIfdLBEOu1kiHHazRDjsZonwefbtwOaRu5Vt679pTBMreb3Ol14u2xYbix8HpoHF1x/sMGJ4VTUBdI7YvbD9qRkDqp53b0Snyrbt9+kK9yBYv76qZXrNbpYIh90sEQ67WSIcdrNEOOxmiXDYzRLhsJslojePbB4LXA2MJHtE86yI+IakYcAPgPFkj20+JSLceboF7rjxylaXUNZbf9XTQ4AzL7+4a+G0Q0dsKGxf+Jbrqqqp3R3w+TML2/f6zIKq5tubNfsWYEZEHAAcDnxK0gHAucD8iNgXmJ9/NrM2VTHsEdEREY/k7zcAS4ExwEnA7Hy02cDJjSrSzGq3TfvsksYDhwALgZER0ZE3rSLbzDezNtXrsEvaBbgJODsitro4NyKCbH++p+mmS1okadFmiq+FNrPG6VXYJfUnC/q1EXFzPvhFSaPy9lHA6p6mjYhZETEpIib1r/HGimZWvYphlyTgCmBpRHytpGkuMC1/Pw24rf7lmVm9KNsCLxhBOgr4ObAE6Lpn8Xlk++1zgDcCz5OdeltTNK9dNSwO0+Raa+5z/viTNxW2zz/oxiZVkpY/xKaybZuj/O23e2PKY6cVtv/f4uq73466f0th+8AfP1S2bWHMZ32s6bH/bMXz7BFxP1Cu8216yTXro3wFnVkiHHazRDjsZolw2M0S4bCbJcJhN0uEbyXdBDsd/1xh+4EXFXdpjAb+Kw3Zr/DSiIZ2Iz3w5x8tbI8XBtc0/71ufKV844NLapr3UJ6qqb0VvGY3S4TDbpYIh90sEQ67WSIcdrNEOOxmiXDYzRJRsT97PaXan92sWYr6s3vNbpYIh90sEQ67WSIcdrNEOOxmiXDYzRLhsJslwmE3S4TDbpYIh90sEQ67WSIcdrNEOOxmiXDYzRLhsJslomLYJY2V9DNJT0p6QtK/5sNnSlopaXH+mtL4cs2sWr15/MAWYEZEPCJpCPCwpHl52yUR8dXGlWdm9VIx7BHRAXTk7zdIWgqMaXRhZlZf27TPLmk8cAiwMB90pqTHJF0paWiZaaZLWiRp0WY21lSsmVWv12GXtAtwE3B2RKwHLgX2BiaSrfkv7mm6iJgVEZMiYlJ/BtahZDOrRq/CLqk/WdCvjYibASLixYjojIjXgO8ChzauTDOrVW+Oxgu4AlgaEV8rGT6qZLT3Ao/Xvzwzq5feHI0/EjgVWCJpcT7sPGCqpIlAAMuAjzekQjOri94cjb8f6Ok+1HfWvxwzaxRfQWeWCIfdLBEOu1kiHHazRDjsZolw2M0S4bCbJcJhN0uEw26WCIfdLBEOu1kiHHazRDjsZolw2M0SoYho3sKkl4DnSwYNB15uWgHbpl1ra9e6wLVVq561jYuIET01NDXsr1u4tCgiJrWsgALtWlu71gWurVrNqs2b8WaJcNjNEtHqsM9q8fKLtGtt7VoXuLZqNaW2lu6zm1nztHrNbmZN4rCbJaIlYZd0gqTfSHpa0rmtqKEcScskLckfQ72oxbVcKWm1pMdLhg2TNE/SU/mfPT5jr0W1tcVjvAseM97S767Vjz9v+j67pB2A3wLHASuAh4CpEfFkUwspQ9IyYFJEtPwCDElHA68AV0fEQfmwrwBrIuLL+Q/l0Ig4p01qmwm80urHeOdPKxpV+phx4GTgNFr43RXUdQpN+N5asWY/FHg6Ip6NiE3ADcBJLaij7UXEfcCaboNPAmbn72eT/WdpujK1tYWI6IiIR/L3G4Cux4y39LsrqKspWhH2McDyks8raK/nvQdwt6SHJU1vdTE9GBkRHfn7VcDIVhbTg4qP8W6mbo8Zb5vvrprHn9fKB+he76iIeDPwTuBT+eZqW4psH6ydzp326jHezdLDY8b/rJXfXbWPP69VK8K+Ehhb8nnPfFhbiIiV+Z+rgVtov0dRv9j1BN38z9UtrufP2ukx3j09Zpw2+O5a+fjzVoT9IWBfSW+SNAD4IDC3BXW8jqTB+YETJA0G3kH7PYp6LjAtfz8NuK2FtWylXR7jXe4x47T4u2v5488joukvYArZEflngM+1ooYyde0FPJq/nmh1bcD1ZJt1m8mObZwO7AHMB54C7gGGtVFt1wBLgMfIgjWqRbUdRbaJ/hiwOH9NafV3V1BXU743Xy5rlggfoDNLhMNulgiH3SwRDrtZIhx2s0Q47GaJcNjNEvH/9ALsS7Cy9ngAAAAASUVORK5CYII=\n", "text/plain": [ - "\u003cFigure size 600x400 with 1 Axes\u003e" + "\u003cFigure size 432x288 with 1 Axes\u003e" ] }, "metadata": { + "needs_background": "light", "tags": [] }, "output_type": "display_data" } ], "source": [ - "import matplotlib.pylab as plt\n", - "\n", - "plt.imshow(test_images[0])\n", - "template = \"True:{true}, predicted:{predict}\"\n", - "_ = plt.title(template.format(true= str(test_labels[0]),\n", - " predict=str(np.argmax(predictions[0]))))\n", - "plt.grid(False)" + "test_model(tflite_model_file, test_image_index, model_type=\"Float\")" ] }, { @@ -611,41 +789,37 @@ "id": "o3N6-UGl1dfE" }, "source": [ - "Now test the quantized model (using the uint8 data):" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "3gwhv4lKbYZ4" - }, - "outputs": [], - "source": [ - "input_index = interpreter_quant.get_input_details()[0][\"index\"]\n", - "output_index = interpreter_quant.get_output_details()[0][\"index\"]\n", - "interpreter_quant.set_tensor(input_index, test_image)\n", - "interpreter_quant.invoke()\n", - "predictions = interpreter_quant.get_tensor(output_index)" + "And test the quantized model:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "colab": {}, + "colab": { + "height": 296 + }, "colab_type": "code", - "id": "CIH7G_MwbY2x" + "id": "rc1i9umMcp0t", + "outputId": "480bc68f-812b-460e-82fe-d66f70b4345e" }, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAEXCAYAAABrgzLrAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAWRklEQVR4nO3de9RVdZ3H8fcHRVRQBHEQ0SBvldoSi9FKKxu1lKm0Vjk5jWLlYGuyci3XlGlTNKPWNJrZTQcvqeUl0kwtM5VyeYkx0UhQKm94oUfRwEQtBPzOH/v32PHhnH0O5w6/z2utZ3HO/u3L9zk8n7Ovv70VEZjZhm9Yrwsws+5w2M0y4bCbZcJhN8uEw26WCYfdLBMOu1kmHPYMSHpO0o5tnufNko5p5zzbuUxJIWnnTte0PnHYO0DS0ZIWSHpB0hOSviNpdJeWvVYgImJURDzUjeWnGmamsH16yPBPp+Ezu1WL/Y3D3maSTgD+G/h3YDTwJmAycIOk4T0srdv+ABw1ZNj0NNx6wGFvI0lbAl8CPhkR10fEqohYDBwO7Aj8cxrvQkmnVEy3v6THK96fKOlBSSsk3SfpfRVtR0u6TdLpkpZLeljSIantVOCtwLfSpvu30vCQtLOk7dLwwZ8XJEXFvD8qaVGa788lTapoO0jS7yT9Oc1XdT6OO4HNJe2ept8d2DQNr/zM/lXSA5KWSbpG0naNLrOsXlubw95eb6H4g/5R5cCIeA64Dnhng/N5kCK0oym+PL4vaUJF+z7A74FxwFeB8yUpIk4GbgWOS5vuxw2p449p+KiIGAVcBVwOIOlQ4CTg/cA2aT6XpbZx6Xf6fFrmg8C+Dfwe3+Nva/fp6f3LJP0D8GWKL8MJwCMV9ZQus6xeq85hb69xwNMRsbpK2wDFH2VdEfHDFMyXIuIHwP3A3hWjPBIR50bEGuAiiqCMX5dCJX0WeC3w0TTo48CXI2JRqv80YEpaW04D7o2IKyJiFfB14IkGFvN94Ii0+/Kh9L7Sh4ELIuLuiFgJfA54s6TJDSyzrF6rwmFvr6eBcZI2rtI2IbXXJekoSfMlPSPpGWAPii+SQS//0UfEC+nlqEaLTJv9nwYOi4i/pMGTgLMqlrmMYrN5IrAd8FjFMqPyfS0R8SjwAEUQ74+IodNsR7E2Hxz/OeBPDS6zrF6rwmFvr7nASopNy5dJGgUcAtycBj0PbF4xyrYV404CzgWOA7aOiK2AhdTfRx5U2mdZ0msotgYOHxK+x4BjI2Krip/NIuJXFFslO1TMQ5Xv67gYOCH9O9QfKUI7ON+RwNbAkgaWWVavVeGwt1FE/JliH/ubkg6WNDxtks6mWKtfkkadD0yTNFbStsDxFbMZSRHYpwAkfYRizd6oJykOBq4lHUC8Gjg5Im4b0nwO8LmKA2qjJX0wtf0U2F3S+9NWy6eo+IKq4wcUxypmV2m7DPiIpCmSRlBsAdyRDmrWW2ZZvVaFw95mEfFVigNHpwMrgIcp1uIHRsTzabTvAb8FFgM3UARicPr7gDMothKeBF4P3L4OJZwFfCAdof7GkLY3AK8Bzqw8Kp+WexXFKcPLJT1LsTVxSGp7Gvgg8BWKzexdGq0pIv4SETdV7C5Utt0E/AdwJcWafCeKffu6yyyr16qT71TTWWnN/J/Avmkf1qwnHPYukHQksCoiLu91LZYvh90sE95nN8uEw24dIWmxpAPT65MkndeFZb7ismN7JYe9RZJeNeR685D0fMX7t3Zw2f+YrpN/RkXvuvMkbdHgtJNTrYN1LpZ0YifqjIjTIqJu11QN6TPQTpJGSDpf0iMq+hzMTxcXZcNhb1FEPDrkenOAPSuG3To4bo0r61oxGjiF4mqz11FcPfY/6ziPrVLdRwBfkHTw0BE6UHcvbExxIc7bKT63zwOz03UQWXDYO0hFD7XbJZ0p6U/ATBV9vb9fMc7gGnbj9H50WgMNSFoi6RRJG1Wbf0RcmnrXvRARyymuvGukg0q1ec0F7gX2GNwclvRZSU8A35U0TH/rjfcnSbMlja34PY5Ma80/STp5yOcw9HfeT9Kv0hbJY+lzmkFxrfxn0pbGtWnc7SRdKekpFT38PlUxn83S1sBySfcBf1/y+z0fETMjYnHqc/ATimsg3tjM57U+ctg7bx/gIYqOKqc2MP6FwGpgZ2AviqvPjoGXdxmekfSqGtO+jSKw60SFfYHdgd+kwdsCYykuZ50BfBI4jGLNuB2wHPh2mn434GzgyNS2NbB9jWVNAn4GfJOiY9AUYH5EzKK4wvCraYvoPZKGAddSXIA0ETgAOF7Su9LsvkhxIc5OwLsoetZVLus7kr5To47xwK408XmttyLCP238objUdef0+mjg0SHtM4HvV7yfnKbZmOILYSWwWUX7EcAvG1juQRQB3LXBOgeX+0yabhHwqdS2P/AisGnF+IuAAyreTwBWpbq/AFxe0TYyTX/g0N+ZomfbVTVquhA4peL9PlU+v88B302vHwIOrmibATzewO8+HLgJ+N9e/71082dD2Bfrd3V7h1WYRPGHOCC93O9lWL15SHoTcCnwgYhY1zvBjIvqXXKfioi/DqntKkkvVQxbQ/EFNbSH2vNpt6WaHSj6pjdiErCdil5tgzai6LvO0OVS0YOulrS18D2KL6Pj6oy+QXHYO2/oVUs1e7xR/OGupHYA1yJpL+Aa4KMRMaeVQocYWvdjaRlrXRMvaYDiAOHg+80pNuWreYxX9s2vt8yHI2KXGuMP9owb3BSvtXszWJeA8ym+oKZF0U8+G95n7775wNvS/vdois1SACJigKJjzBmStkwHxXaS9PZqM5K0B3A9xW2wrq3SPlPSzW2q+xzg1LTPjaRtVNwtBuAK4N3pwNsmFH0Bav1tXQIcKOlwSRtL2lrSlNQ2tMfer4EV6UDhZpI2krSHpMEDcbMper6NkbQ9xXGFMmdTfCm9J6p0zNnQOexdFhE3UvRyuwe4C/jJkFGOAjYB7qPYl76CYv+48pz+4BrsBIqDXOdXnC+vPOC0A+vWY67MWRRbEDdIWgH8H8U+NRFxL/AJil2JgVR31YtbougMNC3Vvoziy2/P1Hw+sFs6CPnjKO7E826Kg3gPU3QTPo/i1BkU3YkfSW03sPZtr86RdE56PQk4Ns3riYrP68OtfCjrE18bvwGTNJ/ioFqt/WfLiMNulglvxptlwmE3y4TDbpaJrp5n30QjYlNGdnORZln5K8/zYqyseifilsKeekidRXFV03kR8ZWy8TdlJPvogFYWaWYl7ii5rqrpzfjUE+vbFHf03I3iyR+7NTs/M+usVvbZ9wYeiIiHIuJFimd0HVpnGjPrkVbCPpFXdkJ4nCqP3pE0Q9I8SfNWsbKFxZlZKzp+ND4iZkXE1IiYOpwRnV6cmdXQStiX8Mpnb22fhplZH2ol7HcCu0h6derp9CGKjhJm1oeaPvUWEaslHQf8nOLU2wWp95OZ9aGWzrNHxHXAdW2qxcw6yJfLmmXCYTfLhMNulgmH3SwTDrtZJhx2s0w47GaZcNjNMuGwm2XCYTfLhMNulgmH3SwTDrtZJvzI5vXA4lPeXNq+ZtPaj/DaZvenSqedu+eVTdU0aKdffKS0fYtfb1azbfw3ftXSsm3deM1ulgmH3SwTDrtZJhx2s0w47GaZcNjNMuGwm2XC59n7wPKf7lLavnDKtzq27FW1T9E35HfvOK+0/ZKpE2q2zb7x7aXTrll0f1M1WXVes5tlwmE3y4TDbpYJh90sEw67WSYcdrNMOOxmmfB59i6odx799imXd2zZ5zyzY2n71+YeVNo+eVJ5f/gbdvtRafuHtxio2Xbq0eNKp93xsz7P3k4thV3SYmAFsAZYHRFT21GUmbVfO9bs74iIp9swHzPrIO+zm2Wi1bAHcIOkuyTNqDaCpBmS5kmat4qVLS7OzJrV6mb8fhGxRNLfATdK+l1E3FI5QkTMAmYBbKmxLXa7MLNmtbRmj4gl6d+lwFXA3u0oyszar+mwSxopaYvB18A7gYXtKszM2quVzfjxwFWSBudzaURc35aq1jOrD3hjafsv9vx2nTkML239+vJdS9t/+U8lZzz/uLR02l2XzyttH7bppqXtp93x+tL2k8YtqNm2eszq0mmtvZoOe0Q8BOzZxlrMrIN86s0sEw67WSYcdrNMOOxmmXDYzTLhLq5t8NzETUrbh9X5Tq13au3m95af3lrz0O9L21vxwJf2Km2/dOwZdeYwombL9td7XdNN/rTNMuGwm2XCYTfLhMNulgmH3SwTDrtZJhx2s0z4PHsbbHXx3NL2D8z7l9J2LX+2tH31wOJ1rKh9jpl2U2n7qGG1z6Nbf/Ga3SwTDrtZJhx2s0w47GaZcNjNMuGwm2XCYTfLhM+zd8Ga+/7Q6xJqWnzqm0vbP7bV6XXmUH6r6RMG3lSzbYubFpVOu6bOkm3deM1ulgmH3SwTDrtZJhx2s0w47GaZcNjNMuGwm2XC59k3cM8cWX4e/fajys+jjx5Wfh597sqNStvnn1L7vvObPfvr0mmtvequ2SVdIGmppIUVw8ZKulHS/enfMZ0t08xa1chm/IXAwUOGnQjMiYhdgDnpvZn1sbphj4hbgGVDBh8KXJReXwQc1ua6zKzNmt1nHx8RA+n1E8D4WiNKmgHMANiUzZtcnJm1quWj8RERQJS0z4qIqRExdXjJQ/7MrLOaDfuTkiYApH+Xtq8kM+uEZsN+DTA9vZ4OXN2ecsysU+rus0u6DNgfGCfpceCLwFeA2ZI+BjwCHN7JIq15T7+h5h4WUP88ej3Tbz6mtH3XH/tcer+oG/aIOKJG0wFtrsXMOsiXy5plwmE3y4TDbpYJh90sEw67WSbcxXUD8OKNk2q2zX3tGXWmLj/1tufc6aXtrzvhwdJ23w66f3jNbpYJh90sEw67WSYcdrNMOOxmmXDYzTLhsJtlwufZ1wMb7zi5tP2/dv5hzbYxdbqw3rWyfNmT/qv8TPma5cvLZ2B9w2t2s0w47GaZcNjNMuGwm2XCYTfLhMNulgmH3SwTPs++Hthp9pLS9r02af47+4g5Hy9t3/W3dzY9b+svXrObZcJhN8uEw26WCYfdLBMOu1kmHHazTDjsZpnwefY+sHz6m0vbvzS+3r3fR9Rsmb74wNIpX/eZB0rbfd/3DUfdNbukCyQtlbSwYthMSUskzU8/0zpbppm1qpHN+AuBg6sMPzMipqSf69pblpm1W92wR8QtwLIu1GJmHdTKAbrjJN2TNvPH1BpJ0gxJ8yTNW0WdG56ZWcc0G/azgZ2AKcAAUPMIUkTMioipETF1eMmBJDPrrKbCHhFPRsSaiHgJOBfYu71lmVm7NRV2SRMq3r4PWFhrXDPrD3XPs0u6DNgfGCfpceCLwP6SpgABLAaO7WCN672NJ25X2v7WT91R2j5qWPO7P3Pv27m0fdfl7q+ei7phj4gjqgw+vwO1mFkH+XJZs0w47GaZcNjNMuGwm2XCYTfLhLu4dsGik3Yobf/xtte2NP93LPhgzTZ3YbVBXrObZcJhN8uEw26WCYfdLBMOu1kmHHazTDjsZpnwefYuuOu9Z9YZo7U7+Iz+t5dqtq1evryleduGw2t2s0w47GaZcNjNMuGwm2XCYTfLhMNulgmH3SwTPs++AVg1fnTNtuEvTuxiJWtb89TTNdtiZfnjwDSi/PqDjbYZ11RNAGu22aq0/f4TNml63o2INarZ9tpP1rkHwbPPNrVMr9nNMuGwm2XCYTfLhMNulgmH3SwTDrtZJhx2s0w08sjmHYCLgfEUj2ieFRFnSRoL/ACYTPHY5sMjwp2ne+CnV1zQ6xJqestvqj0EuPD0k1uWTjtmmxWl7Xe88dKmaup3u33+uNL2HT8zt6n5NrJmXw2cEBG7AW8CPiFpN+BEYE5E7ALMSe/NrE/VDXtEDETE3en1CmARMBE4FLgojXYRcFinijSz1q3TPrukycBewB3A+IgYSE1PUGzmm1mfajjskkYBVwLHR8QrLs6NiKDYn6823QxJ8yTNW0X5tdBm1jkNhV3ScIqgXxIRP0qDn5Q0IbVPAJZWmzYiZkXE1IiYOrzFGyuaWfPqhl2SgPOBRRHxtYqma4Dp6fV04Or2l2dm7aJiC7xkBGk/4FZgATB4z+KTKPbbZwOvAh6hOPW2rGxeW2ps7KMDWq15vfOXn7+6tH3OHld0qZK8vBAv1mxbFbVvv92IafccXdr+5/nNd7+dcNvq0vYRP7uzZtsdMYdnY1nV/rN1z7NHxG1Arc63+SXXbD3lK+jMMuGwm2XCYTfLhMNulgmH3SwTDrtZJnwr6S7Y7F0Pl7bvflp5l8bo4P/SFq8tvTSio91Id7/1I6Xt8ejIlua/4xXP1W789YKW5j2G+1tq7wWv2c0y4bCbZcJhN8uEw26WCYfdLBMOu1kmHHazTNTtz95OufZnN+uWsv7sXrObZcJhN8uEw26WCYfdLBMOu1kmHHazTDjsZplw2M0y4bCbZcJhN8uEw26WCYfdLBMOu1kmHHazTDjsZpmoG3ZJO0j6paT7JN0r6dNp+ExJSyTNTz/TOl+umTWrkccPrAZOiIi7JW0B3CXpxtR2ZkSc3rnyzKxd6oY9IgaAgfR6haRFwMROF2Zm7bVO++ySJgN7AXekQcdJukfSBZLG1JhmhqR5kuatYmVLxZpZ8xoOu6RRwJXA8RHxLHA2sBMwhWLNf0a16SJiVkRMjYipwxnRhpLNrBkNhV3ScIqgXxIRPwKIiCcjYk1EvAScC+zduTLNrFWNHI0XcD6wKCK+VjF8QsVo7wMWtr88M2uXRo7G7wscCSyQND8NOwk4QtIUIIDFwLEdqdDM2qKRo/G3AdXuQ31d+8sxs07xFXRmmXDYzTLhsJtlwmE3y4TDbpYJh90sEw67WSYcdrNMOOxmmXDYzTLhsJtlwmE3y4TDbpYJh90sE4qI7i1Megp4pGLQOODprhWwbvq1tn6tC1xbs9pZ26SI2KZaQ1fDvtbCpXkRMbVnBZTo19r6tS5wbc3qVm3ejDfLhMNuloleh31Wj5dfpl9r69e6wLU1qyu19XSf3cy6p9drdjPrEofdLBM9CbukgyX9XtIDkk7sRQ21SFosaUF6DPW8HtdygaSlkhZWDBsr6UZJ96d/qz5jr0e19cVjvEseM97Tz67Xjz/v+j67pI2APwAHAY8DdwJHRMR9XS2kBkmLgakR0fMLMCS9DXgOuDgi9kjDvgosi4ivpC/KMRHx2T6pbSbwXK8f452eVjSh8jHjwGHA0fTwsyup63C68Ln1Ys2+N/BARDwUES8ClwOH9qCOvhcRtwDLhgw+FLgovb6I4o+l62rU1hciYiAi7k6vVwCDjxnv6WdXUldX9CLsE4HHKt4/Tn897z2AGyTdJWlGr4upYnxEDKTXTwDje1lMFXUf491NQx4z3jefXTOPP2+VD9Ctbb+IeANwCPCJtLnal6LYB+unc6cNPca7W6o8Zvxlvfzsmn38eat6EfYlwA4V77dPw/pCRCxJ/y4FrqL/HkX95OATdNO/S3tcz8v66THe1R4zTh98dr18/Hkvwn4nsIukV0vaBPgQcE0P6liLpJHpwAmSRgLvpP8eRX0NMD29ng5c3cNaXqFfHuNd6zHj9Piz6/njzyOi6z/ANIoj8g8CJ/eihhp17Qj8Nv3c2+vagMsoNutWURzb+BiwNTAHuB+4CRjbR7V9D1gA3EMRrAk9qm0/ik30e4D56Wdarz+7krq68rn5clmzTPgAnVkmHHazTDjsZplw2M0y4bCbZcJhN8uEw26Wif8HteKJB66NhMUAAAAASUVORK5CYII=\n", + "text/plain": [ + "\u003cFigure size 432x288 with 1 Axes\u003e" + ] + }, + "metadata": { + "needs_background": "light", + "tags": [] + }, + "output_type": "display_data" + } + ], "source": [ - "plt.imshow(test_images[0])\n", - "template = \"True:{true}, predicted:{predict}\"\n", - "_ = plt.title(template.format(true= str(test_labels[0]),\n", - " predict=str(np.argmax(predictions[0]))))\n", - "plt.grid(False)" + "test_model(tflite_model_quant_file, test_image_index, model_type=\"Quantized\")" ] }, { @@ -655,7 +829,17 @@ "id": "LwN7uIdCd8Gw" }, "source": [ - "### Evaluate the models" + "### Evaluate the models on all images" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "RFKOD4DG8XmU" + }, + "source": [ + "Now let's run both models using all the test images we loaded at the beginning of this tutorial:" ] }, { @@ -668,49 +852,52 @@ }, "outputs": [], "source": [ - "# A helper function to evaluate the TF Lite model using \"test\" dataset.\n", - "def evaluate_model(interpreter):\n", - " input_index = interpreter.get_input_details()[0][\"index\"]\n", - " output_index = interpreter.get_output_details()[0][\"index\"]\n", + "# Helper function to evaluate a TFLite model on all images\n", + "def evaluate_model(tflite_file, model_type):\n", + " global test_images\n", + " global test_labels\n", "\n", - " # Run predictions on every image in the \"test\" dataset.\n", - " prediction_digits = []\n", - " for test_image in test_images:\n", - " # Pre-processing: add batch dimension and convert to float32 to match with\n", - " # the model's input data format.\n", - " test_image = np.expand_dims(test_image, axis=0).astype(np.float32)\n", - " interpreter.set_tensor(input_index, test_image)\n", + " test_image_indices = range(test_images.shape[0])\n", + " predictions = run_tflite_model(tflite_file, test_image_indices)\n", "\n", - " # Run inference.\n", - " interpreter.invoke()\n", + " accuracy = (np.sum(test_labels== predictions) * 100) / len(test_images)\n", "\n", - " # Post-processing: remove batch dimension and find the digit with highest\n", - " # probability.\n", - " output = interpreter.tensor(output_index)\n", - " digit = np.argmax(output()[0])\n", - " prediction_digits.append(digit)\n", - "\n", - " # Compare prediction results with ground truth labels to calculate accuracy.\n", - " accurate_count = 0\n", - " for index in range(len(prediction_digits)):\n", - " if prediction_digits[index] == test_labels[index]:\n", - " accurate_count += 1\n", - " accuracy = accurate_count * 1.0 / len(prediction_digits)\n", - "\n", - " return accuracy" + " print('%s model accuracy is %.4f%% (Number of test samples=%d)' % (\n", + " model_type, accuracy, len(test_images)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "xnFilQpBuMh5" + }, + "source": [ + "Evaluate the float model:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "colab": {}, + "colab": { + "height": 34 + }, "colab_type": "code", - "id": "T5mWkSbMcU5z" + "id": "T5mWkSbMcU5z", + "outputId": "7e05d400-1455-4c1a-f3f0-b81422c3a0ba" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Float model accuracy is 97.8700% (Number of test samples=10000)\n" + ] + } + ], "source": [ - "print(evaluate_model(interpreter))" + "evaluate_model(tflite_model_file, model_type=\"Float\")" ] }, { @@ -720,25 +907,31 @@ "id": "Km3cY9ry8ZlG" }, "source": [ - "Repeat the evaluation on the fully quantized model using the uint8 data:" + "Evaluate the quantized model:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "colab": {}, + "colab": { + "height": 34 + }, "colab_type": "code", - "id": "-9cnwiPp6EGm" + "id": "-9cnwiPp6EGm", + "outputId": "1e7409cf-748d-45c9-aa2f-36ccd9454f45" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Quantized model accuracy is 97.8100% (Number of test samples=10000)\n" + ] + } + ], "source": [ - "# NOTE: Colab runs on server CPUs, and TensorFlow Lite currently\n", - "# doesn't have super optimized server CPU kernels. So this part may be\n", - "# slower than the above float interpreter. But for mobile CPUs, considerable\n", - "# speedup can be observed.\n", - "\n", - "print(evaluate_model(interpreter_quant))" + "evaluate_model(tflite_model_quant_file, model_type=\"Quantized\")" ] }, { @@ -748,7 +941,9 @@ "id": "L7lfxkor8pgv" }, "source": [ - "In this example, you have fully quantized a model with almost no difference in the accuracy, compared to the above float model." + "So you now have an integer quantized a model with almost no difference in the accuracy, compared to the float model.\n", + "\n", + "To learn more about other quantization strategies, read about [TensorFlow Lite model optimization](https://www.tensorflow.org/lite/performance/model_optimization)." ] } ], From dcbf62e6b2588c83c3b0fdfcb1fe101eb2afae47 Mon Sep 17 00:00:00 2001 From: Terry Huang Date: Tue, 4 Aug 2020 14:14:34 -0700 Subject: [PATCH 2107/2522] reduce over-broad dependencies in regex_split library PiperOrigin-RevId: 324887712 Change-Id: I1368673dd1f4b04331de98f656f8d642a1d210c0 --- tensorflow/core/platform/BUILD | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD index 4fe09498e93..a889666c608 100644 --- a/tensorflow/core/platform/BUILD +++ b/tensorflow/core/platform/BUILD @@ -780,7 +780,10 @@ cc_library( name = "types", hdrs = ["types.h"], # TODO(b/161569340): Short-term fix. Remove this visibility rule. - visibility = ["//tensorflow:__subpackages__"], + visibility = [ + "//tensorflow:__subpackages__", + "//tensorflow_text:__subpackages__", + ], deps = [ ":platform", ":tstring", From 5c6f844c0208a605a9909953cca00ece1373e990 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 4 Aug 2020 14:41:18 -0700 Subject: [PATCH 2108/2522] In preparation for making the proto ParseFromString() method accept string_view, this change makes the conversion from py::bytes to std::string explicit, since this will no longer happen implicitly after the parameter type change. PiperOrigin-RevId: 324893229 Change-Id: I5ba90b32d5e68b45281c0000b747df3d9b6bf532 --- .../python/profiler/internal/profiler_wrapper.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/profiler/internal/profiler_wrapper.cc b/tensorflow/python/profiler/internal/profiler_wrapper.cc index 0f57204d1d0..0984a8b45c5 100644 --- a/tensorflow/python/profiler/internal/profiler_wrapper.cc +++ b/tensorflow/python/profiler/internal/profiler_wrapper.cc @@ -177,7 +177,7 @@ PYBIND11_MODULE(_pywrap_profiler, m) { m.def("xspace_to_trace_events", [](const py::bytes& serialized_xspace_proto) { tensorflow::string content; tensorflow::profiler::XSpace xspace; - xspace.ParseFromString(serialized_xspace_proto); + xspace.ParseFromString(std::string(serialized_xspace_proto)); tensorflow::profiler::ConvertXSpaceToTraceEventsString(xspace, &content); return py::bytes(content); }); @@ -185,7 +185,7 @@ PYBIND11_MODULE(_pywrap_profiler, m) { m.def("xspace_to_overview_page", [](const py::bytes& serialized_xspace_proto) { tensorflow::profiler::XSpace xspace; - xspace.ParseFromString(serialized_xspace_proto); + xspace.ParseFromString(std::string(serialized_xspace_proto)); tensorflow::profiler::OverviewPage overview_page = tensorflow::profiler::ConvertOpStatsToOverviewPage( ConvertXSpaceToOpStats( @@ -196,7 +196,7 @@ PYBIND11_MODULE(_pywrap_profiler, m) { m.def("xspace_to_input_pipeline", [](const py::bytes& serialized_xspace_proto) { tensorflow::profiler::XSpace xspace; - xspace.ParseFromString(serialized_xspace_proto); + xspace.ParseFromString(std::string(serialized_xspace_proto)); tensorflow::profiler::InputPipelineAnalysisResult input_pipeline = tensorflow::profiler::ConvertOpStatsToInputPipelineAnalysis( ConvertXSpaceToOpStats(xspace, {OP_METRICS_DB, STEP_DB})); @@ -205,7 +205,7 @@ PYBIND11_MODULE(_pywrap_profiler, m) { m.def("xspace_to_tf_stats", [](const py::bytes& serialized_xspace_proto) { tensorflow::profiler::XSpace xspace; - xspace.ParseFromString(serialized_xspace_proto); + xspace.ParseFromString(std::string(serialized_xspace_proto)); tensorflow::profiler::TfStatsDatabase tf_stats_db = tensorflow::profiler::ConvertOpStatsToTfStats( ConvertXSpaceToOpStats(xspace, {OP_METRICS_DB})); @@ -214,7 +214,7 @@ PYBIND11_MODULE(_pywrap_profiler, m) { m.def("xspace_to_kernel_stats", [](const py::bytes& serialized_xspace_proto) { tensorflow::profiler::XSpace xspace; - xspace.ParseFromString(serialized_xspace_proto); + xspace.ParseFromString(std::string(serialized_xspace_proto)); tensorflow::profiler::OpStats op_stats = ConvertXSpaceToOpStats(xspace, {KERNEL_STATS_DB}); return py::bytes(op_stats.kernel_stats_db().SerializeAsString()); @@ -223,7 +223,7 @@ PYBIND11_MODULE(_pywrap_profiler, m) { m.def("xspace_to_memory_profile", [](const py::bytes& serialized_xspace_proto) { tensorflow::profiler::XSpace xspace; - xspace.ParseFromString(serialized_xspace_proto); + xspace.ParseFromString(std::string(serialized_xspace_proto)); std::string json_output; tensorflow::profiler::ConvertXSpaceToMemoryProfileJson(xspace, &json_output); From 12208cd82db68e19b81680a2b98a6f593256bfff Mon Sep 17 00:00:00 2001 From: Brian Zhao Date: Tue, 4 Aug 2020 14:42:20 -0700 Subject: [PATCH 2109/2522] Cleaning up unused TF_TensorHandleList, now that captures are not exposed on the C API. PiperOrigin-RevId: 324893440 Change-Id: Ie89e64894e19af2e72a187bc07065595d0704925 --- .../c/experimental/saved_model/internal/BUILD | 35 --------------- .../saved_model/internal/concrete_function.cc | 1 - .../internal/saved_model_api_test.cc | 1 - .../saved_model/internal/tensorhandle_list.cc | 36 ---------------- .../internal/tensorhandle_list_type.h | 37 ---------------- .../c/experimental/saved_model/public/BUILD | 7 --- .../saved_model/public/c_saved_model_api.h | 1 - .../saved_model/public/concrete_function.h | 1 - .../saved_model/public/tensorhandle_list.h | 43 ------------------- 9 files changed, 162 deletions(-) delete mode 100644 tensorflow/c/experimental/saved_model/internal/tensorhandle_list.cc delete mode 100644 tensorflow/c/experimental/saved_model/internal/tensorhandle_list_type.h delete mode 100644 tensorflow/c/experimental/saved_model/public/tensorhandle_list.h diff --git a/tensorflow/c/experimental/saved_model/internal/BUILD b/tensorflow/c/experimental/saved_model/internal/BUILD index 60ca0134602..323298c5fc1 100644 --- a/tensorflow/c/experimental/saved_model/internal/BUILD +++ b/tensorflow/c/experimental/saved_model/internal/BUILD @@ -38,8 +38,6 @@ cc_library( ":concrete_function_type", ":function_metadata", ":function_metadata_type", - ":tensorhandle_list", - ":tensorhandle_list_type", "//tensorflow/c:c_api_macros", "//tensorflow/c:tf_status_internal", "//tensorflow/c/eager:abstract_tensor_handle", @@ -167,38 +165,6 @@ cc_library( ], ) -cc_library( - name = "tensorhandle_list", - srcs = [ - "tensorhandle_list.cc", - ], - hdrs = [ - "//tensorflow/c/experimental/saved_model/public:tensorhandle_list.h", - ], - copts = tf_copts(), - visibility = [ - "//tensorflow/c/experimental/saved_model/public:__pkg__", - ], - deps = [ - ":tensorhandle_list_type", - "//tensorflow/c:c_api_macros", - "//tensorflow/c/eager:c_api", - "//tensorflow/c/eager:immediate_execution_tensor_handle", - "//tensorflow/c/eager:tfe_tensorhandle_internal", - ], -) - -cc_library( - name = "tensorhandle_list_type", - hdrs = [ - "tensorhandle_list_type.h", - ], - deps = [ - "//tensorflow/c:conversion_macros", - "//tensorflow/c/eager:immediate_execution_tensor_handle", - ], -) - tf_cc_test( name = "saved_model_api_test", size = "small", @@ -216,7 +182,6 @@ tf_cc_test( "//tensorflow/c/eager:c_api_test_util", "//tensorflow/c/experimental/saved_model/public:concrete_function", "//tensorflow/c/experimental/saved_model/public:saved_model_api", - "//tensorflow/c/experimental/saved_model/public:tensorhandle_list", "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", diff --git a/tensorflow/c/experimental/saved_model/internal/concrete_function.cc b/tensorflow/c/experimental/saved_model/internal/concrete_function.cc index 9f421a7b9b7..65c6eca5623 100644 --- a/tensorflow/c/experimental/saved_model/internal/concrete_function.cc +++ b/tensorflow/c/experimental/saved_model/internal/concrete_function.cc @@ -24,7 +24,6 @@ limitations under the License. #include "tensorflow/c/experimental/saved_model/core/function_metadata.h" #include "tensorflow/c/experimental/saved_model/internal/concrete_function_type.h" #include "tensorflow/c/experimental/saved_model/internal/function_metadata_type.h" -#include "tensorflow/c/experimental/saved_model/internal/tensorhandle_list_type.h" #include "tensorflow/c/tf_status_internal.h" #include "tensorflow/core/platform/status.h" diff --git a/tensorflow/c/experimental/saved_model/internal/saved_model_api_test.cc b/tensorflow/c/experimental/saved_model/internal/saved_model_api_test.cc index 10b5677a48b..e58b232f9c9 100644 --- a/tensorflow/c/experimental/saved_model/internal/saved_model_api_test.cc +++ b/tensorflow/c/experimental/saved_model/internal/saved_model_api_test.cc @@ -22,7 +22,6 @@ limitations under the License. #include "tensorflow/c/eager/c_api_experimental.h" #include "tensorflow/c/eager/c_api_test_util.h" #include "tensorflow/c/experimental/saved_model/public/concrete_function.h" -#include "tensorflow/c/experimental/saved_model/public/tensorhandle_list.h" #include "tensorflow/c/tf_status.h" #include "tensorflow/c/tf_tensor.h" #include "tensorflow/core/lib/io/path.h" diff --git a/tensorflow/c/experimental/saved_model/internal/tensorhandle_list.cc b/tensorflow/c/experimental/saved_model/internal/tensorhandle_list.cc deleted file mode 100644 index c8f00c1f7c0..00000000000 --- a/tensorflow/c/experimental/saved_model/internal/tensorhandle_list.cc +++ /dev/null @@ -1,36 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/c/experimental/saved_model/public/tensorhandle_list.h" - -#include - -#include "tensorflow/c/eager/immediate_execution_tensor_handle.h" -#include "tensorflow/c/eager/tfe_tensorhandle_internal.h" -#include "tensorflow/c/experimental/saved_model/internal/tensorhandle_list_type.h" - -extern "C" { - -size_t TF_TensorHandleListSize(const TF_TensorHandleList* list) { - return tensorflow::unwrap(list)->size(); -} - -TFE_TensorHandle* TF_TensorHandleListGet(const TF_TensorHandleList* list, - int i) { - return tensorflow::wrap((*tensorflow::unwrap(list))[i]); -} - - -} // end extern "C" diff --git a/tensorflow/c/experimental/saved_model/internal/tensorhandle_list_type.h b/tensorflow/c/experimental/saved_model/internal/tensorhandle_list_type.h deleted file mode 100644 index 566417df025..00000000000 --- a/tensorflow/c/experimental/saved_model/internal/tensorhandle_list_type.h +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_INTERNAL_CONCRETE_FUNCTION_LIST_TYPE_H_ -#define TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_INTERNAL_CONCRETE_FUNCTION_LIST_TYPE_H_ - -#include - -#include "tensorflow/c/conversion_macros.h" -#include "tensorflow/c/eager/immediate_execution_tensor_handle.h" - -// Internal structures used by the SavedModel C API. These are likely to -// change and should not be depended on. - -typedef struct TF_TensorHandleList TF_TensorHandleList; - -namespace tensorflow { - -DEFINE_CONVERSION_FUNCTIONS( - std::vector, - TF_TensorHandleList) - -} // namespace tensorflow - -#endif // TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_INTERNAL_CONCRETE_FUNCTION_LIST_TYPE_H_ diff --git a/tensorflow/c/experimental/saved_model/public/BUILD b/tensorflow/c/experimental/saved_model/public/BUILD index 0cfa0a2c005..af65e05e7f6 100644 --- a/tensorflow/c/experimental/saved_model/public/BUILD +++ b/tensorflow/c/experimental/saved_model/public/BUILD @@ -24,7 +24,6 @@ exports_files( "concrete_function_list.h", "function_metadata.h", "saved_model_api.h", - "tensorhandle_list.h", ], visibility = ["//tensorflow/c/experimental/saved_model/internal:__pkg__"], ) @@ -40,7 +39,6 @@ cc_library( ":concrete_function_list", ":function_metadata", ":saved_model_api", - ":tensorhandle_list", ], ) @@ -63,8 +61,3 @@ alias( name = "saved_model_api", actual = "//tensorflow/c/experimental/saved_model/internal:saved_model_api", ) - -alias( - name = "tensorhandle_list", - actual = "//tensorflow/c/experimental/saved_model/internal:tensorhandle_list", -) diff --git a/tensorflow/c/experimental/saved_model/public/c_saved_model_api.h b/tensorflow/c/experimental/saved_model/public/c_saved_model_api.h index aae95a5477c..30f533f140a 100644 --- a/tensorflow/c/experimental/saved_model/public/c_saved_model_api.h +++ b/tensorflow/c/experimental/saved_model/public/c_saved_model_api.h @@ -21,7 +21,6 @@ limitations under the License. #include "tensorflow/c/experimental/saved_model/public/concrete_function_list.h" #include "tensorflow/c/experimental/saved_model/public/function_metadata.h" #include "tensorflow/c/experimental/saved_model/public/saved_model_api.h" -#include "tensorflow/c/experimental/saved_model/public/tensorhandle_list.h" // IWYU pragma: end_exports #endif // TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_PUBLIC_C_SAVED_MODEL_API_H_ diff --git a/tensorflow/c/experimental/saved_model/public/concrete_function.h b/tensorflow/c/experimental/saved_model/public/concrete_function.h index 4cc2a4b4f05..ee5292294d6 100644 --- a/tensorflow/c/experimental/saved_model/public/concrete_function.h +++ b/tensorflow/c/experimental/saved_model/public/concrete_function.h @@ -19,7 +19,6 @@ limitations under the License. #include "tensorflow/c/c_api_macros.h" #include "tensorflow/c/eager/c_api.h" #include "tensorflow/c/experimental/saved_model/public/function_metadata.h" -#include "tensorflow/c/experimental/saved_model/public/tensorhandle_list.h" #ifdef __cplusplus extern "C" { diff --git a/tensorflow/c/experimental/saved_model/public/tensorhandle_list.h b/tensorflow/c/experimental/saved_model/public/tensorhandle_list.h deleted file mode 100644 index a1e88db3474..00000000000 --- a/tensorflow/c/experimental/saved_model/public/tensorhandle_list.h +++ /dev/null @@ -1,43 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_PUBLIC_TENSORHANDLE_LIST_H_ -#define TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_PUBLIC_TENSORHANDLE_LIST_H_ - -#include - -#include "tensorflow/c/c_api_macros.h" -#include "tensorflow/c/eager/c_api.h" - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -// An opaque type that is acts like a list of TF_ConcreteFunction pointers. -typedef struct TF_TensorHandleList TF_TensorHandleList; - -// Returns the size of `list`. -TF_CAPI_EXPORT extern size_t TF_TensorHandleListSize( - const TF_TensorHandleList* list); - -// Returns the `i`th TFE_TensorHandle in the list. -TF_CAPI_EXPORT extern TFE_TensorHandle* TF_TensorHandleListGet( - const TF_TensorHandleList* list, int i); - -#ifdef __cplusplus -} // end extern "C" -#endif // __cplusplus - -#endif // TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_PUBLIC_TENSORHANDLE_LIST_H_ From 541832fca1728779929f3c1416e585ffe57c8373 Mon Sep 17 00:00:00 2001 From: Jiho Choi Date: Tue, 4 Aug 2020 14:51:22 -0700 Subject: [PATCH 2110/2522] Support multiple producers of the same (iterator_id, element_id) pair. PiperOrigin-RevId: 324895347 Change-Id: I4c560807486d43b977624dc47217738a12ecda02 --- .../core/profiler/utils/group_events.cc | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/profiler/utils/group_events.cc b/tensorflow/core/profiler/utils/group_events.cc index 2be2da3b445..86566822252 100644 --- a/tensorflow/core/profiler/utils/group_events.cc +++ b/tensorflow/core/profiler/utils/group_events.cc @@ -649,8 +649,9 @@ void EventForest::ProcessModelIds() { void EventForest::ProcessTfDataEvents() { absl::flat_hash_map, - EventNode*> - produce_iterators; + std::vector> + produce_iterator_map; + uint64 num_producers = 0; for (HostEventType event_type : {HostEventType::kPrefetchProduce, HostEventType::kParallelInterleaveProduce, @@ -670,14 +671,16 @@ void EventForest::ProcessTfDataEvents() { absl::optional iterator_id = produce_iterator->GetEventVisitor().GetStat(StatType::kParentId); if (!iterator_id.has_value()) break; - produce_iterators[{iterator_id->IntValue(), element_id->IntValue()}] = - produce_iterator; + produce_iterator_map[{iterator_id->IntValue(), + element_id->IntValue()}] + .push_back(produce_iterator); + ++num_producers; break; } } } } - VLOG(1) << produce_iterators.size() << " producer iterators found."; + VLOG(1) << num_producers << " producer iterators found."; uint64 num_matched = 0; for (HostEventType event_type : {HostEventType::kPrefetchConsume, @@ -701,11 +704,13 @@ void EventForest::ProcessTfDataEvents() { absl::optional iterator_id = consume_iterator->GetEventVisitor().GetStat(StatType::kStepId); if (!iterator_id.has_value()) continue; - if (auto produce_iterator = gtl::FindOrNull( - produce_iterators, std::make_pair(iterator_id->IntValue(), - element_id->IntValue()))) { - consume_iterator->AddChild(*produce_iterator); - ++num_matched; + if (auto produce_iterators = gtl::FindOrNull( + produce_iterator_map, std::make_pair(iterator_id->IntValue(), + element_id->IntValue()))) { + for (EventNode* produce_iterator : *produce_iterators) { + consume_iterator->AddChild(produce_iterator); + ++num_matched; + } } } } From 2b220dae89d64be05276018e171fcfefa95c1bc5 Mon Sep 17 00:00:00 2001 From: Jose Baiocchi Date: Tue, 4 Aug 2020 15:04:20 -0700 Subject: [PATCH 2111/2522] Export CombineOpMetrics via header PiperOrigin-RevId: 324898471 Change-Id: I77fb4c4fa86d695a701b51110485324dc76fd20a --- .../convert/op_metrics_db_combiner.cc | 43 +++++++++++++------ .../profiler/convert/op_metrics_db_combiner.h | 3 ++ 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/profiler/convert/op_metrics_db_combiner.cc b/tensorflow/core/profiler/convert/op_metrics_db_combiner.cc index e91869885c5..ad1d4bf380a 100644 --- a/tensorflow/core/profiler/convert/op_metrics_db_combiner.cc +++ b/tensorflow/core/profiler/convert/op_metrics_db_combiner.cc @@ -25,21 +25,43 @@ namespace { using OperationType = OpMetrics::MemoryAccessed::OperationType; -// Combines the src OpMetrics into the dst OpMetrics. -void CombineOpMetrics(const OpMetrics& src, OpMetrics* dst) { +// Copies OpMetrics symbol data from src to dst. +void CopyOpMetricsSymbolData(const OpMetrics& src, OpMetrics* dst) { DCHECK(dst != nullptr); DCHECK_EQ(src.hlo_module_id(), dst->hlo_module_id()); DCHECK_EQ(src.name(), dst->name()); - dst->set_category(src.category()); - dst->set_provenance(src.provenance()); - dst->set_is_eager(dst->is_eager() || src.is_eager()); - dst->set_deduplicated_name(src.deduplicated_name()); + if (dst->category().empty()) { + dst->set_category(src.category()); + } + if (dst->provenance().empty()) { + dst->set_provenance(src.provenance()); + } + if (dst->deduplicated_name().empty()) { + dst->set_deduplicated_name(src.deduplicated_name()); + } if (!dst->has_layout() && src.has_layout()) { *dst->mutable_layout() = src.layout(); } if (!dst->has_children() && src.has_children()) { *dst->mutable_children() = src.children(); } +} + +void CombinePrecisionStats(const PrecisionStats& src, PrecisionStats* dst) { + dst->set_compute_16bit_ps(src.compute_16bit_ps() + dst->compute_16bit_ps()); + dst->set_compute_32bit_ps(src.compute_32bit_ps() + dst->compute_32bit_ps()); +} + +} // namespace + +void CombineOpMetrics(const OpMetrics& src, OpMetrics* dst) { + DCHECK(dst != nullptr); + if (dst->occurrences() == 0) { + dst->set_min_time_ps(src.min_time_ps()); + } else { + dst->set_min_time_ps(std::min(src.min_time_ps(), dst->min_time_ps())); + } + dst->set_is_eager(dst->is_eager() || src.is_eager()); dst->set_occurrences(src.occurrences() + dst->occurrences()); dst->set_time_ps(src.time_ps() + dst->time_ps()); dst->set_self_time_ps(src.self_time_ps() + dst->self_time_ps()); @@ -50,16 +72,10 @@ void CombineOpMetrics(const OpMetrics& src, OpMetrics* dst) { dst->set_dma_stall_ps(src.dma_stall_ps() + dst->dma_stall_ps()); } -void CombinePrecisionStats(const PrecisionStats& src, PrecisionStats* dst) { - dst->set_compute_16bit_ps(src.compute_16bit_ps() + dst->compute_16bit_ps()); - dst->set_compute_32bit_ps(src.compute_32bit_ps() + dst->compute_32bit_ps()); -} - -} // namespace - void CombineMemoryAccessedBreakdown( const protobuf::RepeatedPtrField& src, protobuf::RepeatedPtrField* dst) { + if (src.empty()) return; absl::flat_hash_map, OpMetrics_MemoryAccessed*> dst_memory_accessed_map; @@ -99,6 +115,7 @@ void OpMetricsDbCombiner::Combine(const OpMetricsDb& src) { for (const auto& src_metrics : src.metrics_db()) { auto* dst_metrics = LookupOrInsertNewOpMetrics(src_metrics.hlo_module_id(), src_metrics.name()); + CopyOpMetricsSymbolData(src_metrics, dst_metrics); CombineOpMetrics(src_metrics, dst_metrics); } } diff --git a/tensorflow/core/profiler/convert/op_metrics_db_combiner.h b/tensorflow/core/profiler/convert/op_metrics_db_combiner.h index a0ca3387e7a..a87a2b53500 100644 --- a/tensorflow/core/profiler/convert/op_metrics_db_combiner.h +++ b/tensorflow/core/profiler/convert/op_metrics_db_combiner.h @@ -23,6 +23,9 @@ limitations under the License. namespace tensorflow { namespace profiler { +// Combines the src OpMetrics into the dst OpMetrics. +void CombineOpMetrics(const OpMetrics& src, OpMetrics* dst); + // Combines the memory access breakdown. void CombineMemoryAccessedBreakdown( const protobuf::RepeatedPtrField& src, From 087d17541a0e7ff0f8bdd36145dbcac93be91235 Mon Sep 17 00:00:00 2001 From: Pete Warden Date: Tue, 4 Aug 2020 15:23:21 -0700 Subject: [PATCH 2112/2522] Fix for hello world Colab dependency error PiperOrigin-RevId: 324902066 Change-Id: Ib111d520f4de962c4c506c5641da09d3683ad6be --- .../examples/hello_world/train/train_hello_world_model.ipynb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.ipynb b/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.ipynb index d0fb0eaa1b5..aea609cbb39 100644 --- a/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.ipynb +++ b/tensorflow/lite/micro/examples/hello_world/train/train_hello_world_model.ipynb @@ -103,7 +103,8 @@ } }, "source": [ - "! pip install -q tensorflow==2" + "! pip2 install gast==0.3.3\n", + "! pip install -q tensorflow==2\n" ], "execution_count": 2, "outputs": [ From 5ad23f0dd45a53fb2781f11f6f5ae8eb9924c6d5 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Tue, 4 Aug 2020 15:32:27 -0700 Subject: [PATCH 2113/2522] Internal change PiperOrigin-RevId: 324903902 Change-Id: I5eba083589d52f72cb26bcebabff1a4795ba586a --- tensorflow/python/keras/engine/base_layer.py | 11 ++++++---- .../python/keras/engine/base_layer_v1.py | 2 ++ tensorflow/python/keras/engine/functional.py | 1 + tensorflow/python/keras/engine/sequential.py | 1 + tensorflow/python/keras/engine/training.py | 20 +++++++------------ tensorflow/python/keras/engine/training_v1.py | 14 +++++-------- tensorflow/python/keras/premade/linear.py | 2 +- tensorflow/python/keras/premade/wide_deep.py | 2 +- 8 files changed, 25 insertions(+), 28 deletions(-) diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py index d7cc3fd38a8..373e17a4004 100644 --- a/tensorflow/python/keras/engine/base_layer.py +++ b/tensorflow/python/keras/engine/base_layer.py @@ -94,9 +94,11 @@ _TF_OP_LAYER_NAME_PREFIX = 'tf_op_layer_' _AUTOCAST_TYPES = (ops.Tensor, sparse_tensor.SparseTensor, ragged_tensor.RaggedTensor) -_keras_layers_gauge = monitoring.BoolGauge('/tensorflow/api/keras/layers', - 'keras layers usage', 'method') -_keras_model_gauge = monitoring.BoolGauge( +keras_layers_gauge = monitoring.BoolGauge('/tensorflow/api/keras/layers', + 'keras layers usage', 'method') +keras_api_gauge = monitoring.BoolGauge('/tensorflow/api/keras', + 'keras api usage', 'method') +keras_model_gauge = monitoring.BoolGauge( '/tensorflow/api/keras/premade_models', 'premade keras model usage', 'type') @@ -301,6 +303,8 @@ class Layer(module.Module, version_utils.LayerVersionSelector): dtype=None, dynamic=False, **kwargs): + keras_api_gauge.get_cell('layer').set(True) + keras_layers_gauge.get_cell(self.__class__.__name__).set(True) # These properties should be set by the user via keyword arguments. # note that 'dtype', 'input_shape' and 'batch_input_shape' # are only applicable to input layers: do not pass these keywords @@ -3084,7 +3088,6 @@ class TensorFlowOpLayer(Layer): super(TensorFlowOpLayer, self).__init__( name=_TF_OP_LAYER_NAME_PREFIX + name, trainable=trainable, dtype=dtype, autocast=False) - _keras_layers_gauge.get_cell('TensorflowOpLayer').set(True) if isinstance(node_def, dict): self.node_def = json_format.ParseDict(node_def, node_def_pb2.NodeDef()) else: diff --git a/tensorflow/python/keras/engine/base_layer_v1.py b/tensorflow/python/keras/engine/base_layer_v1.py index 9822094df26..e9ebc170b96 100644 --- a/tensorflow/python/keras/engine/base_layer_v1.py +++ b/tensorflow/python/keras/engine/base_layer_v1.py @@ -153,6 +153,8 @@ class Layer(base_layer.Layer): @trackable.no_automatic_dependency_tracking def __init__(self, trainable=True, name=None, dtype=None, dynamic=False, **kwargs): + base_layer.keras_api_gauge.get_cell('layer v1').set(True) + base_layer.keras_layers_gauge.get_cell(self.__class__.__name__).set(True) # These properties should be set by the user via keyword arguments. # note that 'dtype', 'input_shape' and 'batch_input_shape' # are only applicable to input layers: do not pass these keywords diff --git a/tensorflow/python/keras/engine/functional.py b/tensorflow/python/keras/engine/functional.py index 8422bf923d8..7c1fd4d1c72 100644 --- a/tensorflow/python/keras/engine/functional.py +++ b/tensorflow/python/keras/engine/functional.py @@ -113,6 +113,7 @@ class Functional(training_lib.Model): @trackable.no_automatic_dependency_tracking def _init_graph_network(self, inputs, outputs): + base_layer.keras_api_gauge.get_cell('Functional').set(True) # This method is needed for Sequential to reinitialize graph network when # layer is added or removed. self._is_graph_network = True diff --git a/tensorflow/python/keras/engine/sequential.py b/tensorflow/python/keras/engine/sequential.py index e22c4921102..3b50506370b 100644 --- a/tensorflow/python/keras/engine/sequential.py +++ b/tensorflow/python/keras/engine/sequential.py @@ -114,6 +114,7 @@ class Sequential(functional.Functional): # Skip the init in FunctionalModel since model doesn't have input/output yet super(functional.Functional, self).__init__( # pylint: disable=bad-super-call name=name, autocast=False) + base_layer.keras_api_gauge.get_cell('Sequential').set(True) self.supports_masking = True self._compute_output_and_mask_jointly = True self._auto_track_sub_layers = False diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index 15f77ab8a96..bf542129e5c 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -33,7 +33,6 @@ from tensorflow.python.distribute import values as ds_values from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.eager import def_function -from tensorflow.python.eager import monitoring from tensorflow.python.framework import errors from tensorflow.python.framework import errors_impl from tensorflow.python.framework import func_graph @@ -96,10 +95,6 @@ except ImportError: # pylint: enable=g-import-not-at-top -_keras_api_gauge = monitoring.BoolGauge('/tensorflow/api/keras', - 'keras api usage', 'method') - - def enable_multi_worker(method): """Decorator that handles running `method` with multi-worker strategy.""" @@ -245,6 +240,8 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): @trackable.no_automatic_dependency_tracking def __init__(self, *args, **kwargs): + base_layer.keras_api_gauge.get_cell('model').set(True) + # Special case for Subclassed Functional Model, which we couldn't detect # when __new__ is called. We only realize it is a functional model when it # calls super.__init__ with input and output tensor. @@ -255,6 +252,7 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): functional.Functional.__init__(self, *args, **kwargs) return + base_layer.keras_api_gauge.get_cell('Model subclass').set(True) # The following are implemented as property functions: # self.trainable_weights # self.non_trainable_weights @@ -309,7 +307,6 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): self._init_batch_counters() self._base_model_initialized = True - _keras_api_gauge.get_cell('model').set(True) @trackable.no_automatic_dependency_tracking def _init_batch_counters(self): @@ -538,7 +535,7 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): ValueError: In case of invalid arguments for `optimizer`, `loss` or `metrics`. """ - _keras_api_gauge.get_cell('compile').set(True) + base_layer.keras_api_gauge.get_cell('compile').set(True) with self.distribute_strategy.scope(): self._validate_compile(optimizer, metrics, **kwargs) self._run_eagerly = run_eagerly @@ -1031,7 +1028,7 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): ValueError: In case of mismatch between the provided input data and what the model expects or when the input data is empty. """ - _keras_api_gauge.get_cell('fit').set(True) + base_layer.keras_api_gauge.get_cell('fit').set(True) # Legacy graph support is contained in `training_v1.Model`. version_utils.disallow_legacy_graph('Model', 'fit') self._assert_compile_was_called() @@ -1340,7 +1337,7 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): RuntimeError: If `model.evaluate` is wrapped in `tf.function`. ValueError: in case of invalid arguments. """ - _keras_api_gauge.get_cell('evaluate').set(True) + base_layer.keras_api_gauge.get_cell('evaluate').set(True) version_utils.disallow_legacy_graph('Model', 'evaluate') self._assert_compile_was_called() self._check_call_args('evaluate') @@ -1568,7 +1565,7 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): or in case a stateful model receives a number of samples that is not a multiple of the batch size. """ - _keras_api_gauge.get_cell('predict').set(True) + base_layer.keras_api_gauge.get_cell('predict').set(True) version_utils.disallow_legacy_graph('Model', 'predict') self._check_call_args('predict') _disallow_inside_tf_function('predict') @@ -1824,7 +1821,6 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): `Model.fit` now supports generators, so there is no longer any need to use this endpoint. """ - _keras_api_gauge.get_cell('fit_generator').set(True) return self.fit( generator, steps_per_epoch=steps_per_epoch, @@ -1857,7 +1853,6 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): `Model.evaluate` now supports generators, so there is no longer any need to use this endpoint. """ - _keras_api_gauge.get_cell('evaluate_generator').set(True) self._check_call_args('evaluate_generator') return self.evaluate( @@ -1885,7 +1880,6 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): `Model.predict` now supports generators, so there is no longer any need to use this endpoint. """ - _keras_api_gauge.get_cell('predict_generator').set(True) return self.predict( generator, steps=steps, diff --git a/tensorflow/python/keras/engine/training_v1.py b/tensorflow/python/keras/engine/training_v1.py index 29591e8ffb7..2ac3337948a 100644 --- a/tensorflow/python/keras/engine/training_v1.py +++ b/tensorflow/python/keras/engine/training_v1.py @@ -28,7 +28,6 @@ from tensorflow.python.distribute import distribution_strategy_context from tensorflow.python.distribute import parameter_server_strategy from tensorflow.python.eager import context from tensorflow.python.eager import def_function -from tensorflow.python.eager import monitoring from tensorflow.python.framework import composite_tensor from tensorflow.python.framework import composite_tensor_utils from tensorflow.python.framework import constant_op @@ -72,9 +71,6 @@ try: except ImportError: issparse = None -_keras_api_gauge = monitoring.BoolGauge('/tensorflow/api/keras/model_v1', - 'keras model v1 usage', 'method') - class Model(training_lib.Model): """`Model` groups layers into an object with training and inference features. @@ -142,7 +138,7 @@ class Model(training_lib.Model): def __init__(self, *args, **kwargs): super(Model, self).__init__(*args, **kwargs) - _keras_api_gauge.get_cell('model_v1').set(True) + base_layer.keras_api_gauge.get_cell('model v1').set(True) # initializing _distribution_strategy here since it is possible to call # predict on a model without compiling it. self._distribution_strategy = None @@ -413,7 +409,7 @@ class Model(training_lib.Model): # time the model gets called on training data. return self._is_compiled = True - _keras_api_gauge.get_cell('compile_v1').set(True) + base_layer.keras_api_gauge.get_cell('compile_v1').set(True) # Prepare list of loss functions, same size of model outputs. self.loss_functions = training_utils.prepare_loss_functions( @@ -774,7 +770,7 @@ class Model(training_lib.Model): and what the model expects. """ self._assert_built_as_v1() - _keras_api_gauge.get_cell('fit_v1').set(True) + base_layer.keras_api_gauge.get_cell('fit_v1').set(True) # Legacy support if 'nb_epoch' in kwargs: logging.warning( @@ -895,7 +891,7 @@ class Model(training_lib.Model): ValueError: in case of invalid arguments. """ self._assert_built_as_v1() - _keras_api_gauge.get_cell('evaluate_v1').set(True) + base_layer.keras_api_gauge.get_cell('evaluate_v1').set(True) self._assert_compile_was_called() self._check_call_args('evaluate') @@ -975,7 +971,7 @@ class Model(training_lib.Model): that is not a multiple of the batch size. """ self._assert_built_as_v1() - _keras_api_gauge.get_cell('predict_v1').set(True) + base_layer.keras_api_gauge.get_cell('predict_v1').set(True) self._check_call_args('predict') func = self._select_training_loop(x) diff --git a/tensorflow/python/keras/premade/linear.py b/tensorflow/python/keras/premade/linear.py index 20f2ce560e2..438e3270021 100644 --- a/tensorflow/python/keras/premade/linear.py +++ b/tensorflow/python/keras/premade/linear.py @@ -95,7 +95,7 @@ class LinearModel(training.Model): self.kernel_regularizer = regularizers.get(kernel_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) super(LinearModel, self).__init__(**kwargs) - base_layer._keras_model_gauge.get_cell('Linear').set(True) # pylint: disable=protected-access + base_layer.keras_model_gauge.get_cell('Linear').set(True) def build(self, input_shape): if isinstance(input_shape, dict): diff --git a/tensorflow/python/keras/premade/wide_deep.py b/tensorflow/python/keras/premade/wide_deep.py index 8638d3afc71..edb0124276f 100644 --- a/tensorflow/python/keras/premade/wide_deep.py +++ b/tensorflow/python/keras/premade/wide_deep.py @@ -85,7 +85,7 @@ class WideDeepModel(keras_training.Model): Allowed keyword arguments include `name`. """ super(WideDeepModel, self).__init__(**kwargs) - base_layer._keras_model_gauge.get_cell('WideDeep').set(True) # pylint: disable=protected-access + base_layer.keras_model_gauge.get_cell('WideDeep').set(True) self.linear_model = linear_model self.dnn_model = dnn_model self.activation = activations.get(activation) From cf59ede2e4b98b7f4ec868fe9c6c8e6f8dbffee3 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Tue, 4 Aug 2020 15:40:08 -0700 Subject: [PATCH 2114/2522] If an input-output pair is configured to be must-alias(off by default), they must be aliased at runtime. PiperOrigin-RevId: 324905361 Change-Id: Id12e9583ec25d6464f29479c48ddef37027ef61a --- .../utils/compile_mlir_util_test.cc | 2 +- tensorflow/compiler/xla/client/xla_builder.cc | 2 +- tensorflow/compiler/xla/client/xla_builder.h | 17 ++++-- .../xla/service/cpu/cpu_executable.cc | 6 ++ .../xla/service/gpu/gpu_executable.cc | 6 ++ tensorflow/compiler/xla/service/hlo.proto | 14 ++++- .../service/hlo_input_output_alias_config.cc | 38 +++++++++--- .../service/hlo_input_output_alias_config.h | 32 +++++++--- tensorflow/compiler/xla/service/hlo_parser.cc | 59 +++++++++++-------- .../compiler/xla/service/hlo_parser_test.cc | 41 ++----------- .../xla/tests/buffer_donation_test.cc | 49 +++++++++++++-- .../tpu/tpu_executable_interface.cc | 18 ++++++ 12 files changed, 194 insertions(+), 90 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc index 6ebf6897bb1..8a07aab11e1 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc @@ -524,7 +524,7 @@ TEST(CompileGraphToXlaHlo, Resources) { ASSERT_TRUE(status_or_hlo_module.ok()); constexpr char expected_hlo_module_string[] = - R"(HloModule main.4, input_output_alias={ {0}: 1 } + R"(HloModule main.4, input_output_alias={ {0}: (1, {}, may_alias) } ENTRY %main.4 (Arg_0.1: f32[2], Arg_1.2: f32[2]) -> (f32[2]) { %Arg_1.2 = f32[2]{0} parameter(1) diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index 52f61408cbb..484fb0aabe7 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -446,7 +446,7 @@ StatusOr XlaBuilder::Build(int64 root_id, alias.param_index.ToString().c_str()); } TF_RETURN_IF_ERROR(config.SetUpAlias(alias.output_index, alias.param_number, - alias.param_index)); + alias.param_index, alias.kind)); } *module->mutable_input_output_alias() = config.ToProto(); return Status::OK(); diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h index 1960d0c4632..aa5074d28d9 100644 --- a/tensorflow/compiler/xla/client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_builder.h @@ -32,6 +32,7 @@ limitations under the License. #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/dynamic_parameter_binding.h" #include "tensorflow/compiler/xla/service/hlo.pb.h" +#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" @@ -349,12 +350,16 @@ class XlaBuilder { // not available until the computation is built, and eventual error in the // arguments of this API will be detected only at computation Build() time. // - // Note: Aliasing API is 'may-alias' and only donated buffer at runtime will - // be aliased with output. If a buffer is not donated at runtime, a copy will - // be inserted by XLA to prevent buffer clobbering. + // Note: Except when 'must-alias' is true, alias is assumed to be 'may-alias' + // and only donated buffer at runtime will be aliased with output. If a buffer + // is not donated at runtime, a copy will be inserted by XLA to prevent buffer + // clobbering. void SetUpAlias(const ShapeIndex& output_index, int64 param_number, - const ShapeIndex& param_index) { - input_output_aliases_.push_back({output_index, param_number, param_index}); + const ShapeIndex& param_index, + HloInputOutputAliasConfig::AliasKind kind = + HloInputOutputAliasConfig::AliasKind::kMayAlias) { + input_output_aliases_.push_back( + {output_index, param_number, param_index, kind}); } // Describes an input/output alias as inserted by the SetUpAlias() API. @@ -365,6 +370,8 @@ class XlaBuilder { int64 param_number; // Specifies the index of the aliased buffer in the parameter ShapeIndex param_index; + // Specifies if the alias is a must alias or may alias. + HloInputOutputAliasConfig::AliasKind kind; }; // Looks up the HloInstruction and sets the frontend attribute "attribute" to diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc index 0abcc91a1d7..7431e829b8e 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc @@ -247,6 +247,12 @@ StatusOr CpuExecutable::CreateResultShapedBuffer( ExecutionInput& input = arguments[alias->parameter_number]; MaybeOwningDeviceMemory* maybe_owning_memory = input.MutableBuffer(alias->parameter_index); + if (alias->must_alias() && !maybe_owning_memory->HasOwnership()) { + return InvalidArgument( + "An input was configured to be must-alias at " + "compile time but not donated at runtime: %s", + alias->ToString()); + } if (absl::optional owning = maybe_owning_memory->Release()) { // If the caller passes the ownership of the device memory, reuse it diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc index 469f2919fba..726f1963545 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc @@ -480,6 +480,12 @@ StatusOr GpuExecutable::ExecuteAsyncOnStream( ExecutionInput& input = arguments[alias->parameter_number]; MaybeOwningDeviceMemory* maybe_owning_memory = input.MutableBuffer(alias->parameter_index); + if (alias->must_alias() && !maybe_owning_memory->HasOwnership()) { + return InvalidArgument( + "An input was configured to be must-alias at " + "compile time but not donated at runtime: %s", + alias->ToString()); + } if (absl::optional owning = maybe_owning_memory->Release()) { // If the caller passes the ownership of the device memory, reuse it diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index 960f60fe882..e043216c17e 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -283,6 +283,16 @@ message HloScheduleProto { map sequences = 1; } +enum Kind { + // Define a UNDEFINED_ALIAS equal to zero to get around the default-0 proto3 + // behavior and missing has_*() APIs. + UNDEFINED_ALIAS = 0; + // The buffers may or may not alias at runtime. + MAY_ALIAS = 1; + // The buffers must alias at runtime. + MUST_ALIAS = 2; +} + message HloInputOutputAliasProto { // The following proto describes a pair of aliased an input // (described by parameter number and a ShapeIndex of the parameter) @@ -304,8 +314,8 @@ message HloInputOutputAliasProto { int64 parameter_number = 2; // ShapeIndex of the parameter instruction. repeated int64 parameter_shape_index = 3; - reserved 4; - reserved "kind"; + // The kind of alias to be setup. + Kind kind = 4; } repeated AliasEntryProto entries = 1; diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc index e123161720b..34bc30d641f 100644 --- a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc +++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" +#include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/compiler/xla/service/hlo_module.h" namespace xla { @@ -24,9 +25,10 @@ bool HloInputOutputAliasConfig::OutputHasAlias( return alias_.element(output_index).has_value(); } -Status HloInputOutputAliasConfig::SetUpAlias(const ShapeIndex& output_index, - int64 param_number, - const ShapeIndex& param_index) { +Status HloInputOutputAliasConfig::SetUpAlias( + const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& param_index, + HloInputOutputAliasConfig::AliasKind must_alias) { TF_RET_CHECK(ShapeUtil::IndexIsValid(alias_.shape(), output_index)) << "Trying to set up alias at " << output_index.ToString() << " which is an invalid index for shape " @@ -41,7 +43,8 @@ Status HloInputOutputAliasConfig::SetUpAlias(const ShapeIndex& output_index, param_number, param_index.ToString(), output_index.ToString(), alias_.element(output_index)->parameter_number, alias_.element(output_index)->parameter_index.ToString()); - (*alias_.mutable_element(output_index)) = Alias(param_number, param_index); + (*alias_.mutable_element(output_index)) = + Alias(param_number, param_index, must_alias); VLOG(4) << "Set up alias between output index " << output_index.ToString() << " and parameter " << param_index << " at index " << param_index.ToString(); @@ -61,6 +64,11 @@ HloInputOutputAliasProto HloInputOutputAliasConfig::ToProto() const { for (int64 i : data->parameter_index) { entry.add_parameter_shape_index(i); } + if (data->must_alias()) { + entry.set_kind(Kind::MUST_ALIAS); + } else { + entry.set_kind(Kind::MAY_ALIAS); + } result.add_entries()->Swap(&entry); } }); @@ -77,8 +85,9 @@ StatusOr HloInputOutputAliasConfig::CreateFromProto( int64 param_number = entry.parameter_number(); ShapeIndex param_index(entry.parameter_shape_index().begin(), entry.parameter_shape_index().end()); + AliasKind kind = entry.kind() == Kind::MAY_ALIAS ? kMayAlias : kMustAlias; TF_RETURN_IF_ERROR( - result.SetUpAlias(output_index, param_number, param_index)); + result.SetUpAlias(output_index, param_number, param_index, kind)); } return result; } @@ -93,9 +102,9 @@ string HloInputOutputAliasConfig::ToString() const { ForEachAlias([&](const ShapeIndex& output_index, const Alias& alias) { pieces.push_back(absl::StrFormat( - " OutputIndex %s is aliased with parameter %lld at %s:", - output_index.ToString(), alias.parameter_number, - alias.parameter_index.ToString())); + " OutputIndex %s is %saliased with parameter %lld at %s:", + output_index.ToString(), alias.kind == kMustAlias ? "must-" : "may-", + alias.parameter_number, alias.parameter_index.ToString())); }); return absl::StrJoin(pieces, "\n"); } @@ -112,6 +121,19 @@ string HloInputOutputAliasConfig::ToShortString() const { return absl::StrJoin(pieces, ", "); } +bool HloInputOutputAliasConfig::ParameterMustAlias( + int64 param_number, const ShapeIndex& param_index) const { + bool result = false; + alias_.ForEachElement( + [&](const xla::ShapeIndex&, absl::optional alias) { + if (alias && alias->parameter_number == param_number && + alias->parameter_index == param_index && alias->must_alias()) { + result = true; + } + }); + return result; +} + absl::optional HloInputOutputAliasConfig::GetAliasedOutput( int64 param_number, const ShapeIndex& param_index) const { absl::optional output; diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h index d5ca28e9387..6b84bdb6a68 100644 --- a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h +++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h @@ -32,22 +32,32 @@ class HloModule; // parameter index in the entry computation. class HloInputOutputAliasConfig { public: + // The kind of aliases which can be set. A kMayAlias is one setup at + // compilation time by the user, and has to be respected. A kMustAlias one + // might be setup by the compiler, if it decides it is convenient to do so. + enum AliasKind { + kMayAlias, + kMustAlias, + }; // Defines the alias information for a given output buffer. A given output // buffer shape index can refer only to one parameter+index. struct Alias { - Alias(int64 parameter_number, ShapeIndex parameter_index) + Alias(int64 parameter_number, ShapeIndex parameter_index, + AliasKind kind = kMayAlias) : parameter_number(parameter_number), - parameter_index(std::move(parameter_index)) {} + parameter_index(std::move(parameter_index)), + kind(kind) {} int64 parameter_number; ShapeIndex parameter_index; + AliasKind kind; + + bool must_alias() const { return kind == kMustAlias; } std::string ToString() { - if (parameter_index.empty()) { - return absl::StrCat(parameter_number); - } - return absl::StrFormat("(%lld, %s)", parameter_number, - parameter_index.ToString()); + return absl::StrFormat("(%lld, %s, %s)", parameter_number, + parameter_index.ToString(), + kind == kMustAlias ? "must_alias" : "may_alias"); } }; @@ -61,7 +71,8 @@ class HloInputOutputAliasConfig { // Sets up alias config from `output_index` to `param_index` at // `param_number`. Status SetUpAlias(const ShapeIndex& output_index, int64 param_number, - const ShapeIndex& param_index); + const ShapeIndex& param_index, + AliasKind must_alias = kMayAlias); // Returns true if the given parameter is aliased with one of the output // buffers. @@ -92,6 +103,11 @@ class HloInputOutputAliasConfig { absl::optional GetAliasedParameter( const ShapeIndex& output_index) const; + // Returns if the parameter at the given parameter number and parameter + // index must-alias with an output. + bool ParameterMustAlias(int64 param_number, + const ShapeIndex& param_index) const; + using AliasFn = std::function; diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index 0530062c43b..31afe2a3673 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -552,33 +552,39 @@ bool HloParserImpl::ParseAliasing(AliasingData* data) { return false; } - if (lexer_.GetKind() != TokKind::kLparen) { - // Short form: "{0}: 0", output index "{}" is assumed. - int64 param_num; - ParseInt64(¶m_num); - data->emplace(std::piecewise_construct, std::forward_as_tuple(out), - std::forward_as_tuple(param_num, ShapeIndex{})); - } else { - // Long form: "{0}: (0, {0})", output index is explicitly specified. - if (!ParseToken(TokKind::kLparen, errmsg)) { - return false; - } - int64 param_num; - ParseInt64(¶m_num); - if (!ParseToken(TokKind::kComma, errmsg)) { - return false; - } - ShapeIndex param_idx; - if (!ParseShapeIndex(¶m_idx)) { - return false; - } - data->emplace(std::piecewise_construct, std::forward_as_tuple(out), - std::forward_as_tuple(param_num, param_idx)); - if (!ParseToken(TokKind::kRparen, errmsg)) { - return false; + if (!ParseToken(TokKind::kLparen, errmsg)) { + return false; + } + int64 param_num; + ParseInt64(¶m_num); + if (!ParseToken(TokKind::kComma, errmsg)) { + return false; + } + ShapeIndex param_idx; + if (!ParseShapeIndex(¶m_idx)) { + return false; + } + + HloInputOutputAliasConfig::AliasKind alias_kind = + HloInputOutputAliasConfig::kMayAlias; + if (EatIfPresent(TokKind::kComma)) { + std::string type; + ParseName(&type); + if (type == "must-alias") { + alias_kind = HloInputOutputAliasConfig::kMustAlias; + } else if (type == "may-alias") { + alias_kind = HloInputOutputAliasConfig::kMayAlias; + } else { + return TokenError("Unexpected aliasing kind; expected SYSTEM or USER"); } } + data->emplace(std::piecewise_construct, std::forward_as_tuple(out), + std::forward_as_tuple(param_num, param_idx, alias_kind)); + if (!ParseToken(TokKind::kRparen, errmsg)) { + return false; + } + if (!EatIfPresent(TokKind::kComma)) { break; } @@ -624,8 +630,9 @@ bool HloParserImpl::ParseHloModule(HloModule* module) { if (aliasing_data) { HloInputOutputAliasConfig alias_config(module->result_shape()); for (auto& p : *aliasing_data) { - Status st = alias_config.SetUpAlias(p.first, p.second.parameter_number, - p.second.parameter_index); + Status st = + alias_config.SetUpAlias(p.first, p.second.parameter_number, + p.second.parameter_index, p.second.kind); if (!st.ok()) { return TokenError(st.error_message()); } diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc index 484578e5e0e..86b6b1bedd9 100644 --- a/tensorflow/compiler/xla/service/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc @@ -2399,7 +2399,7 @@ ENTRY c2 { TEST_F(HloParserTest, SimpleAliasing) { const string original = R"( -HloModule Module, input_output_alias={ {0}: (0, {0}), {1}: (0, {1}) } +HloModule Module, input_output_alias={ {0}: (0, {0}, must-alias), {1}: (0, {1}) } ENTRY entry { %p = (f32[], f32[]) parameter(0) @@ -2413,42 +2413,13 @@ ENTRY entry { std::unique_ptr parsed_module = module.ConsumeValueOrDie(); EXPECT_EQ(parsed_module->input_output_alias_config().GetAliasedOutput(0, {0}), ShapeIndex{0}); + + EXPECT_TRUE( + parsed_module->input_output_alias_config().ParameterMustAlias(0, {0})); EXPECT_EQ(parsed_module->input_output_alias_config().GetAliasedOutput(0, {1}), ShapeIndex{1}); -} - -TEST_F(HloParserTest, SimpleAliasingShortForm) { - const string original = R"( -HloModule Module, input_output_alias={ {0}: 0, {1}: 1 } - -ENTRY entry { - %p0 = f32[] parameter(0) - %p1 = f32[] parameter(1) - ROOT %out = (f32[], f32[]) tuple(%p0, %p1) -} - )"; - auto module = ParseAndReturnVerifiedModule(original); - TF_ASSERT_OK(module.status()); - std::unique_ptr parsed_module = module.ConsumeValueOrDie(); - EXPECT_EQ(parsed_module->input_output_alias_config().GetAliasedOutput(0, {}), - ShapeIndex{0}); - EXPECT_EQ(parsed_module->input_output_alias_config().GetAliasedOutput(1, {}), - ShapeIndex{1}); -} - -TEST_F(HloParserTest, SimpleAliasingShortFormError) { - const string original = R"( -HloModule Module, input_output_alias={ {0}: A, {1}: 1 } - -ENTRY entry { - %p0 = f32[] parameter(0) - %p1 = f32[] parameter(1) - ROOT %out = (f32[], f32[]) tuple(%p0, %p1) -} - )"; - ExpectHasSubstr( - ParseAndReturnUnverifiedModule(original).status().error_message(), - "expects integer"); + EXPECT_FALSE( + parsed_module->input_output_alias_config().ParameterMustAlias(0, {1})); } TEST_F(HloParserTest, NestedAliasing) { diff --git a/tensorflow/compiler/xla/tests/buffer_donation_test.cc b/tensorflow/compiler/xla/tests/buffer_donation_test.cc index 856ea7c9b44..f78083fe2af 100644 --- a/tensorflow/compiler/xla/tests/buffer_donation_test.cc +++ b/tensorflow/compiler/xla/tests/buffer_donation_test.cc @@ -61,7 +61,7 @@ class BufferDonationTest : public HloTestBase { absl::Span argument_literals, absl::Span donate_arguments, absl::Span expected_runtime_aliasing, - const Literal& expected) { + const Literal& expected, std::string expected_failure = "") { // Create a copy of the output shape because the HLO module is std::moved // into the compiler and may be deallocated. const Shape output_shape = hlo_module->result_shape(); @@ -123,10 +123,19 @@ class BufferDonationTest : public HloTestBase { ExecutionInput(std::move(owned_buffers), argument_literal.shape())); } - TF_ASSERT_OK_AND_ASSIGN( - ExecutionOutput output, + StatusOr output_status = executable->ExecuteAsyncOnStream(&service_run_options, std::move(args), - /*hlo_execution_profile=*/nullptr)); + /*hlo_execution_profile=*/nullptr); + if (!expected_failure.empty()) { + ASSERT_FALSE(output_status.ok()); + ASSERT_TRUE(absl::StrContains(output_status.status().error_message(), + expected_failure)) + << "got: \n" + << output_status.status().error_message() << " \nvs want\n" + << expected_failure; + return; + } + ExecutionOutput output = output_status.ConsumeValueOrDie(); se::DeviceMemoryBase result_root_buffer = output.Result().root_buffer(); LOG(INFO) << "result allocation = " << result_root_buffer.opaque() @@ -303,5 +312,37 @@ ENTRY entry { #endif } +TEST_F(BufferDonationTest, TestMustAliasNotDonated) { + HloModuleConfig config; + + StatusOr> module = + ParseAndReturnVerifiedModule(R"( +HloModule module + +ENTRY entry { + a = f32[] parameter(0) + b = f32[] parameter(1) + ROOT out = (f32[], f32[]) tuple(a, b) +} + )", + config); + + TF_ASSERT_OK(module->get()->input_output_alias_config().SetUpAlias( + {0}, 0, {}, HloInputOutputAliasConfig::kMustAlias)); + + std::vector args; + args.push_back(LiteralUtil::CreateR0(0.1)); + args.push_back(LiteralUtil::CreateR0(0.2)); + Literal expected = LiteralUtil::MakeTupleFromSlices( + {LiteralUtil::CreateR0(0.1), LiteralUtil::CreateR0(0.2)}); + +#ifndef XLA_TEST_BACKEND_INTERPRETER + RunAndCheck(std::move(*module), args, + /*donate_arguments=*/{false, false}, {true, false}, expected, + "An input was configured to be must-alias at " + "compile time but not donated at runtime:"); +#endif +} + } // namespace } // namespace xla diff --git a/tensorflow/stream_executor/tpu/tpu_executable_interface.cc b/tensorflow/stream_executor/tpu/tpu_executable_interface.cc index 13f9db98e5d..f260cc1631f 100644 --- a/tensorflow/stream_executor/tpu/tpu_executable_interface.cc +++ b/tensorflow/stream_executor/tpu/tpu_executable_interface.cc @@ -62,6 +62,24 @@ TpuExecutableInterface::AllocateOutputMemoryWithInputReuse( << " host_shape = " << ShapeUtil::HumanStringWithLayout(host_shape); Shape device_shape = HostShapeToDeviceShape(host_shape); + TF_RETURN_IF_ERROR(alias_config.ForEachAliasWithStatus( + [&](const ShapeIndex& output_index, + absl::optional alias) { + if (alias && alias->must_alias()) { + VLOG(1) << alias->ToString(); + const MaybeOwningDeviceMemory& original_input = + (*arguments)[alias->parameter_number].Buffers().element( + alias->parameter_index); + if (!original_input.HasOwnership()) { + return InvalidArgument( + "An input was configured to be must-alias at " + "compile time but not donated at runtime: %s", + alias->ToString()); + } + } + return Status::OK(); + })); + if (VLOG_IS_ON(3)) { VLOG(3) << "AllocateOutputMemoryWithInputReuse, device = " << device_ordinal << " host_shape = " << ShapeUtil::HumanStringWithLayout(host_shape); From c96f601e0caabe294c906aa0355b18e970713091 Mon Sep 17 00:00:00 2001 From: Yanhua Sun Date: Tue, 4 Aug 2020 15:47:24 -0700 Subject: [PATCH 2115/2522] remove obsolete comment, the bug is already fixed PiperOrigin-RevId: 324906751 Change-Id: I625e5f9509044c40130733168d7c6a92c25d57bd --- tensorflow/python/eager/function.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 53d4b62b9b5..289b8a32cdb 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -2756,9 +2756,6 @@ def _convert_inputs_to_signature(inputs, input_signature, flat_input_signature): ",\n ".join(str(i) for i in input_signature) + ")") try: - # TODO(b/124370185): Use all elements as inputs to throw an error if there - # are ignored arguments. Calling with arguments that are not part of the - # signature should throw an error. flatten_inputs = nest.flatten_up_to( input_signature, inputs[:len(input_signature)], From 683b1bbc357da95a144ed7e7f7b368b9b7e468c3 Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Tue, 4 Aug 2020 16:02:27 -0700 Subject: [PATCH 2116/2522] [tf.data service] Track task creation in the dispatcher journal. This way, the dispatcher will remember which tasks exist on startup, so that when workers reconnect the dispatcher will understand their tasks and allow them to continue processing their existing tasks. Without this journaling, the dispatcher won't know what task ids to assign to new tasks, since not-yet-reconnected workers could already be using task ids. Now that task state is managed by dispatcher_state, we can replace FinishJobUpdate with FinishTaskUpdate, since dispatcher_state can identify a job as finished when its last task finishes. PiperOrigin-RevId: 324909615 Change-Id: Iee5b877aac79046662231e4b9d2a01a271a71d5d --- .../core/data/service/dispatcher_impl.cc | 95 +++++------ .../core/data/service/dispatcher_impl.h | 46 ++--- .../core/data/service/dispatcher_state.cc | 65 ++++++- .../core/data/service/dispatcher_state.h | 33 +++- .../data/service/dispatcher_state_test.cc | 160 ++++++++++++++++-- tensorflow/core/data/service/journal.proto | 14 +- tensorflow/core/data/service/journal_test.cc | 10 +- .../experimental/data_service_dataset_op.cc | 12 +- 8 files changed, 312 insertions(+), 123 deletions(-) diff --git a/tensorflow/core/data/service/dispatcher_impl.cc b/tensorflow/core/data/service/dispatcher_impl.cc index 4a3764ecea3..77477df71e4 100644 --- a/tensorflow/core/data/service/dispatcher_impl.cc +++ b/tensorflow/core/data/service/dispatcher_impl.cc @@ -43,13 +43,14 @@ namespace data { namespace { // The name of the journal directory inside the dispatcher's working directory. -constexpr StringPiece kJournalDir = "journal"; +constexpr char kJournalDir[] = "journal"; using Dataset = DispatcherState::Dataset; using NamedJobKey = DispatcherState::NamedJobKey; using Job = DispatcherState::Job; +using Task = DispatcherState::Task; -std::string JournalDir(StringPiece work_dir) { +std::string JournalDir(const std::string& work_dir) { return io::JoinPath(work_dir, kJournalDir); } @@ -115,8 +116,8 @@ Status DataServiceDispatcherImpl::RegisterWorker( if (job->finished) { continue; } - std::shared_ptr task = CreateTask(job, worker_address); - + std::shared_ptr task; + TF_RETURN_IF_ERROR(CreateTask(job, worker_address, &task)); TaskDef* task_def = response->add_tasks(); std::shared_ptr dataset; TF_RETURN_IF_ERROR(state_.DatasetFromId(job->dataset_id, &dataset)); @@ -134,35 +135,20 @@ Status DataServiceDispatcherImpl::RegisterWorker( Status DataServiceDispatcherImpl::WorkerUpdate( const WorkerUpdateRequest* request, WorkerUpdateResponse* response) { mutex_lock l(mu_); - int64 worker_id = request->worker_id(); for (auto& update : request->updates()) { int64 task_id = update.task_id(); - const auto it = tasks_.find(task_id); - if (it == tasks_.end()) { - return errors::NotFound("WorkerUpdate called for worker ", worker_id, - " with unknown task id ", task_id); - } - std::shared_ptr task = it->second; + std::shared_ptr task; + TF_RETURN_IF_ERROR(state_.TaskFromId(task_id, &task)); if (update.completed()) { if (task->finished) { VLOG(1) << "Received completion update for already-finished task " << task->task_id << " on worker " << task->worker_address; continue; } - task->finished = true; - bool finished = true; - for (const auto& job_task : tasks_by_job_[task->job_id]) { - if (!job_task->finished) { - finished = false; - break; - } - } - if (finished) { - Update update; - FinishJobUpdate* finish_job = update.mutable_finish_job(); - finish_job->set_job_id(task->job_id); - TF_RETURN_IF_ERROR(Apply(update)); - } + Update update; + FinishTaskUpdate* finish_task = update.mutable_finish_task(); + finish_task->set_task_id(task_id); + TF_RETURN_IF_ERROR(Apply(update)); VLOG(3) << "Task " << task_id << " from job " << task->job_id << " completed"; } @@ -221,7 +207,7 @@ Status DataServiceDispatcherImpl::CreateJob(const CreateJobRequest* request, mutex_lock l(mu_); TF_RETURN_IF_ERROR(CreateJob(request->dataset_id(), processing_mode, absl::optional(), &job)); - tasks = CreateTasksForJob(job); + TF_RETURN_IF_ERROR(CreateTasksForJob(job, &tasks)); } response->set_job_id(job->job_id); TF_RETURN_IF_ERROR(AssignTasks(tasks)); @@ -256,7 +242,7 @@ Status DataServiceDispatcherImpl::GetOrCreateJob( } TF_RETURN_IF_ERROR( CreateJob(request->dataset_id(), requested_processing_mode, key, &job)); - tasks = CreateTasksForJob(job); + TF_RETURN_IF_ERROR(CreateTasksForJob(job, &tasks)); } TF_RETURN_IF_ERROR(AssignTasks(tasks)); response->set_job_id(job->job_id); @@ -320,28 +306,35 @@ Status DataServiceDispatcherImpl::CreateJob( return Status::OK(); } -std::vector> -DataServiceDispatcherImpl::CreateTasksForJob(std::shared_ptr job) +Status DataServiceDispatcherImpl::CreateTasksForJob( + std::shared_ptr job, + std::vector>* tasks) EXCLUSIVE_LOCKS_REQUIRED(mu_) { - std::vector> tasks; - tasks.reserve(workers_.size()); + tasks->clear(); + tasks->reserve(workers_.size()); for (const auto& it : workers_) { std::shared_ptr worker = it.second; - tasks.push_back(CreateTask(job, worker->address)); + std::shared_ptr task; + TF_RETURN_IF_ERROR(CreateTask(job, worker->address, &task)); + tasks->push_back(task); } - return tasks; + return Status::OK(); } -std::shared_ptr -DataServiceDispatcherImpl::CreateTask(std::shared_ptr job, - const std::string& worker_address) +Status DataServiceDispatcherImpl::CreateTask(std::shared_ptr job, + const std::string& worker_address, + std::shared_ptr* task) EXCLUSIVE_LOCKS_REQUIRED(mu_) { - int64 task_id = next_task_id_++; - DCHECK(!tasks_.contains(task_id)); - tasks_[task_id] = std::make_shared(task_id, job->job_id, - job->dataset_id, worker_address); - tasks_by_job_[job->job_id].push_back(tasks_[task_id]); - return tasks_[task_id]; + int64 task_id = state_.NextAvailableTaskId(); + Update update; + CreateTaskUpdate* create_task = update.mutable_create_task(); + create_task->set_task_id(task_id); + create_task->set_job_id(job->job_id); + create_task->set_dataset_id(job->dataset_id); + create_task->set_worker_address(worker_address); + TF_RETURN_IF_ERROR(Apply(update)); + TF_RETURN_IF_ERROR(state_.TaskFromId(task_id, task)); + return Status::OK(); } Status DataServiceDispatcherImpl::AssignTasks( @@ -393,24 +386,16 @@ Status DataServiceDispatcherImpl::GetTasks(const GetTasksRequest* request, GetTasksResponse* response) { mutex_lock l(mu_); VLOG(3) << "Looking up tasks for job id " << request->job_id(); - auto it = tasks_by_job_.find(request->job_id()); - if (it == tasks_by_job_.end()) { - return errors::NotFound("GetTasks failed. Job id <", request->job_id(), - "> not found."); - } - std::vector>& tasks = it->second; - bool has_finished_tasks = false; + std::vector> tasks; + TF_RETURN_IF_ERROR(state_.TasksForJob(request->job_id(), &tasks)); for (const auto& task : tasks) { - if (task->finished) { - has_finished_tasks = true; - continue; - } TaskInfo* task_info = response->mutable_task_info()->Add(); task_info->set_worker_address(task->worker_address); task_info->set_id(task->task_id); } - response->set_job_finished(has_finished_tasks && - response->task_info_size() == 0); + std::shared_ptr job; + TF_RETURN_IF_ERROR(state_.JobFromId(request->job_id(), &job)); + response->set_job_finished(job->finished); VLOG(3) << "Found " << response->task_info_size() << " tasks for job id " << request->job_id(); return Status::OK(); diff --git a/tensorflow/core/data/service/dispatcher_impl.h b/tensorflow/core/data/service/dispatcher_impl.h index e39f3269d02..6fa1815e9eb 100644 --- a/tensorflow/core/data/service/dispatcher_impl.h +++ b/tensorflow/core/data/service/dispatcher_impl.h @@ -80,21 +80,6 @@ class DataServiceDispatcherImpl { std::unique_ptr stub; }; - struct Task { - Task(int64 task_id, int64 job_id, int64 dataset_id, - const std::string& worker_address) - : task_id(task_id), - job_id(job_id), - dataset_id(dataset_id), - worker_address(worker_address) {} - - const int64 task_id; - const int64 job_id; - const int64 dataset_id; - const std::string worker_address; - bool finished = false; - }; - // Registers a dataset with the given fingerprint, storing the new dataset's // id in `*dataset-id`. Status RegisterDataset(uint64 fingerprint, const DatasetDef& dataset, @@ -107,22 +92,26 @@ class DataServiceDispatcherImpl { absl::optional named_job_key, std::shared_ptr* job) EXCLUSIVE_LOCKS_REQUIRED(mu_); - // Creates one task for each worker, for the given job. This method only - // updates dispatcher metadata with the new tasks, but doesn't assign the - // tasks to the workers. - std::vector> CreateTasksForJob( - std::shared_ptr job) - EXCLUSIVE_LOCKS_REQUIRED(mu_); - // Creates a new task for a job, returning a pointer to the created task. - std::shared_ptr CreateTask( + // Creates one task for each worker, for the given job. The created tasks are + // stored in `*tasks`. This method only updates dispatcher metadata with the + // new tasks, but doesn't assign the tasks to the workers. + Status CreateTasksForJob( std::shared_ptr job, - const std::string& worker_address) EXCLUSIVE_LOCKS_REQUIRED(mu_); + std::vector>* tasks) + EXCLUSIVE_LOCKS_REQUIRED(mu_); + + // Creates a new task for a job, storing the created task in `*task`. + Status CreateTask(std::shared_ptr job, + const std::string& worker_address, + std::shared_ptr* task); // Assigns the list of tasks to the workers indicated by their // `worker_address` fields. - Status AssignTasks(std::vector> tasks) + Status AssignTasks( + std::vector> tasks) LOCKS_EXCLUDED(mu_); // Assigns a task to the worker indicated by its `worker_address` field. - Status AssignTask(std::shared_ptr task) LOCKS_EXCLUDED(mu_); + Status AssignTask(std::shared_ptr task) + LOCKS_EXCLUDED(mu_); // Validates that an existing job matches the given processing_mode and // dataset_id, returning an error status describing any difference. Status ValidateMatchingJob(std::shared_ptr job, @@ -145,11 +134,6 @@ class DataServiceDispatcherImpl { // Registered workers, keyed by their addresses. absl::flat_hash_map> workers_ TF_GUARDED_BY(mu_); - // Tasks, keyed by task ids. - absl::flat_hash_map> tasks_ TF_GUARDED_BY(mu_); - // Mapping from job id to the tasks for that job. - absl::flat_hash_map>> tasks_by_job_ - TF_GUARDED_BY(mu_); absl::optional> journal_writer_ TF_GUARDED_BY(mu_); diff --git a/tensorflow/core/data/service/dispatcher_state.cc b/tensorflow/core/data/service/dispatcher_state.cc index 64be7fbc54e..093457a55af 100644 --- a/tensorflow/core/data/service/dispatcher_state.cc +++ b/tensorflow/core/data/service/dispatcher_state.cc @@ -33,8 +33,11 @@ Status DispatcherState::Apply(Update update) { case Update::kCreateJob: CreateJob(update.create_job()); break; - case Update::kFinishJob: - FinishJob(update.finish_job()); + case Update::kCreateTask: + CreateTask(update.create_task()); + break; + case Update::kFinishTask: + FinishTask(update.finish_task()); break; case Update::UPDATE_TYPE_NOT_SET: return errors::Internal("Update type not set."); @@ -68,7 +71,6 @@ void DispatcherState::CreateJob(const CreateJobUpdate& create_job) { named_job_key); DCHECK(!jobs_.contains(job_id)); jobs_[job_id] = job; - LOG(INFO) << "Created a new job with id " << job_id; if (named_job_key.has_value()) { DCHECK(!named_jobs_.contains(named_job_key.value())); named_jobs_[named_job_key.value()] = job; @@ -76,10 +78,31 @@ void DispatcherState::CreateJob(const CreateJobUpdate& create_job) { next_available_job_id_ = std::max(next_available_job_id_, job_id + 1); } -void DispatcherState::FinishJob(const FinishJobUpdate& finish_job) { - int64 job_id = finish_job.job_id(); - DCHECK(jobs_.contains(job_id)); - jobs_[job_id]->finished = true; +void DispatcherState::CreateTask(const CreateTaskUpdate& create_task) { + int64 task_id = create_task.task_id(); + auto& task = tasks_[task_id]; + DCHECK_EQ(task, nullptr); + task = std::make_shared(task_id, create_task.job_id(), + create_task.dataset_id(), + create_task.worker_address()); + tasks_by_job_[create_task.job_id()].push_back(task); + next_available_task_id_ = std::max(next_available_task_id_, task_id + 1); +} + +void DispatcherState::FinishTask(const FinishTaskUpdate& finish_task) { + VLOG(2) << "Marking task " << finish_task.task_id() << " as finished"; + int64 task_id = finish_task.task_id(); + auto& task = tasks_[task_id]; + DCHECK(task != nullptr); + task->finished = true; + bool all_finished = true; + for (const auto& task_for_job : tasks_by_job_[task->job_id]) { + if (!task_for_job->finished) { + all_finished = false; + } + } + VLOG(3) << "Job " << task->job_id << " finished: " << all_finished; + jobs_[task->job_id]->finished = all_finished; } int64 DispatcherState::NextAvailableDatasetId() const { @@ -141,5 +164,33 @@ int64 DispatcherState::NextAvailableJobId() const { return next_available_job_id_; } +Status DispatcherState::TaskFromId(int64 id, + std::shared_ptr* task) const { + auto it = tasks_.find(id); + if (it == tasks_.end()) { + return errors::NotFound("Task ", id, " not found"); + } + *task = it->second; + return Status::OK(); +} + +Status DispatcherState::TasksForJob( + int64 job_id, std::vector>* tasks) const { + auto it = tasks_by_job_.find(job_id); + if (it == tasks_by_job_.end()) { + return errors::NotFound("Job ", job_id, " not found"); + } + tasks->clear(); + tasks->reserve(it->second.size()); + for (const auto& task : it->second) { + tasks->push_back(task); + } + return Status::OK(); +} + +int64 DispatcherState::NextAvailableTaskId() const { + return next_available_task_id_; +} + } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/data/service/dispatcher_state.h b/tensorflow/core/data/service/dispatcher_state.h index e54f51ba499..7313274ae71 100644 --- a/tensorflow/core/data/service/dispatcher_state.h +++ b/tensorflow/core/data/service/dispatcher_state.h @@ -110,6 +110,21 @@ class DispatcherState { bool finished = false; }; + struct Task { + Task(int64 task_id, int64 job_id, int64 dataset_id, + const std::string& worker_address) + : task_id(task_id), + job_id(job_id), + dataset_id(dataset_id), + worker_address(worker_address) {} + + const int64 task_id; + const int64 job_id; + const int64 dataset_id; + const std::string worker_address; + bool finished = false; + }; + // Returns the next available dataset id. int64 NextAvailableDatasetId() const; // Gets a dataset by id. Returns NOT_FOUND if there is no such dataset. @@ -128,11 +143,21 @@ class DispatcherState { // Gets a named job by key. Returns NOT_FOUND if there is no such job. Status NamedJobByKey(NamedJobKey key, std::shared_ptr* job) const; + // Returns the next available task id. + int64 NextAvailableTaskId() const; + // Gets a task by id. Returns NOT_FOUND if there is no such task. + Status TaskFromId(int64 id, std::shared_ptr* task) const; + // Stores a list of all tasks for the given job to `*tasks`. Returns NOT_FOUND + // if there is no such job. + Status TasksForJob(int64 job_id, + std::vector>* tasks) const; + private: // Registers a dataset. The dataset must not already be registered. void RegisterDataset(const RegisterDatasetUpdate& register_dataset); void CreateJob(const CreateJobUpdate& create_job); - void FinishJob(const FinishJobUpdate& finish_job); + void CreateTask(const CreateTaskUpdate& create_task); + void FinishTask(const FinishTaskUpdate& finish_task); int64 next_available_dataset_id_ = 0; // Registered datasets, keyed by dataset ids. @@ -147,6 +172,12 @@ class DispatcherState { // Named jobs, keyed by their names and indices. Not all jobs have names, so // this is a subset of the jobs stored in `jobs_`. absl::flat_hash_map> named_jobs_; + + int64 next_available_task_id_ = 0; + // Tasks, keyed by task ids. + absl::flat_hash_map> tasks_; + // Tasks, keyed by job ids. + absl::flat_hash_map>> tasks_by_job_; }; } // namespace data diff --git a/tensorflow/core/data/service/dispatcher_state_test.cc b/tensorflow/core/data/service/dispatcher_state_test.cc index 02961d5bd1d..933d783d227 100644 --- a/tensorflow/core/data/service/dispatcher_state_test.cc +++ b/tensorflow/core/data/service/dispatcher_state_test.cc @@ -26,6 +26,12 @@ namespace tensorflow { namespace data { namespace { +using Dataset = DispatcherState::Dataset; +using NamedJobKey = DispatcherState::NamedJobKey; +using Job = DispatcherState::Job; +using Task = DispatcherState::Task; +using ::testing::SizeIs; + Status RegisterDatasetWithIdAndFingerprint(int64 id, uint64 fingerprint, DispatcherState* state) { Update update; @@ -47,8 +53,7 @@ Status CreateAnonymousJob(int64 job_id, int64 dataset_id, return Status::OK(); } -Status CreateNamedJob(int64 job_id, int64 dataset_id, - DispatcherState::NamedJobKey named_job_key, +Status CreateNamedJob(int64 job_id, int64 dataset_id, NamedJobKey named_job_key, DispatcherState* state) { Update update; CreateJobUpdate* create_job = update.mutable_create_job(); @@ -62,10 +67,22 @@ Status CreateNamedJob(int64 job_id, int64 dataset_id, return Status::OK(); } -Status FinishJob(int64 job_id, DispatcherState* state) { +Status CreateTask(int64 task_id, int64 job_id, int64 dataset_id, + StringPiece worker_address, DispatcherState* state) { Update update; - FinishJobUpdate* finish_job = update.mutable_finish_job(); - finish_job->set_job_id(job_id); + CreateTaskUpdate* create_task = update.mutable_create_task(); + create_task->set_task_id(task_id); + create_task->set_job_id(job_id); + create_task->set_dataset_id(dataset_id); + create_task->set_worker_address(worker_address); + TF_RETURN_IF_ERROR(state->Apply(update)); + return Status::OK(); +} + +Status FinishTask(int64 task_id, DispatcherState* state) { + Update update; + FinishTaskUpdate* finish_task = update.mutable_finish_task(); + finish_task->set_task_id(task_id); TF_RETURN_IF_ERROR(state->Apply(update)); return Status::OK(); } @@ -76,14 +93,15 @@ TEST(DispatcherState, RegisterDataset) { uint64 fingerprint = 20; DispatcherState state; TF_EXPECT_OK(RegisterDatasetWithIdAndFingerprint(id, fingerprint, &state)); + EXPECT_EQ(state.NextAvailableDatasetId(), id + 1); { - std::shared_ptr dataset; + std::shared_ptr dataset; TF_EXPECT_OK(state.DatasetFromFingerprint(fingerprint, &dataset)); EXPECT_EQ(id, dataset->dataset_id); } { - std::shared_ptr dataset; + std::shared_ptr dataset; TF_EXPECT_OK(state.DatasetFromId(id, &dataset)); EXPECT_EQ(fingerprint, dataset->fingerprint); } @@ -91,14 +109,14 @@ TEST(DispatcherState, RegisterDataset) { TEST(DispatcherState, MissingDatasetId) { DispatcherState state; - std::shared_ptr dataset; + std::shared_ptr dataset; Status s = state.DatasetFromId(0, &dataset); EXPECT_EQ(s.code(), error::NOT_FOUND); } TEST(DispatcherState, MissingDatasetFingerprint) { DispatcherState state; - std::shared_ptr dataset; + std::shared_ptr dataset; Status s = state.DatasetFromFingerprint(0, &dataset); EXPECT_EQ(s.code(), error::NOT_FOUND); } @@ -123,11 +141,11 @@ TEST(DispatcherState, AnonymousJob) { int64 job_id = 3; int64 dataset_id = 10; DispatcherState state; - Update update; TF_EXPECT_OK(RegisterDatasetWithIdAndFingerprint(dataset_id, 1, &state)); TF_EXPECT_OK(CreateAnonymousJob(job_id, dataset_id, &state)); - std::shared_ptr job; + std::shared_ptr job; TF_EXPECT_OK(state.JobFromId(job_id, &job)); + EXPECT_EQ(state.NextAvailableJobId(), job_id + 1); EXPECT_EQ(dataset_id, job->dataset_id); EXPECT_EQ(job_id, job->job_id); EXPECT_FALSE(job->finished); @@ -137,29 +155,135 @@ TEST(DispatcherState, NamedJob) { int64 job_id = 3; int64 dataset_id = 10; DispatcherState state; - Update update; TF_EXPECT_OK(RegisterDatasetWithIdAndFingerprint(dataset_id, 1, &state)); - DispatcherState::NamedJobKey named_job_key("test", 1); + NamedJobKey named_job_key("test", 1); TF_EXPECT_OK(CreateNamedJob(job_id, dataset_id, named_job_key, &state)); - std::shared_ptr job; + std::shared_ptr job; TF_EXPECT_OK(state.NamedJobByKey(named_job_key, &job)); + EXPECT_EQ(state.NextAvailableJobId(), job_id + 1); EXPECT_EQ(dataset_id, job->dataset_id); EXPECT_EQ(job_id, job->job_id); EXPECT_FALSE(job->finished); } -TEST(DispatcherState, FinishJob) { +TEST(DispatcherState, CreateTask) { int64 job_id = 3; int64 dataset_id = 10; + int64 task_id = 8; + std::string worker_address = "test_worker_address"; DispatcherState state; - Update update; TF_EXPECT_OK(RegisterDatasetWithIdAndFingerprint(dataset_id, 1, &state)); TF_EXPECT_OK(CreateAnonymousJob(job_id, dataset_id, &state)); - TF_EXPECT_OK(FinishJob(job_id, &state)); - std::shared_ptr job; + TF_EXPECT_OK(CreateTask(task_id, job_id, dataset_id, worker_address, &state)); + EXPECT_EQ(state.NextAvailableTaskId(), task_id + 1); + { + std::shared_ptr task; + TF_EXPECT_OK(state.TaskFromId(task_id, &task)); + EXPECT_EQ(task_id, task->task_id); + EXPECT_EQ(job_id, task->job_id); + EXPECT_EQ(dataset_id, task->dataset_id); + EXPECT_EQ(worker_address, task->worker_address); + } + { + std::vector> tasks; + TF_EXPECT_OK(state.TasksForJob(job_id, &tasks)); + EXPECT_THAT(tasks, SizeIs(1)); + } +} + +TEST(DispatcherState, CreateTasksForSameJob) { + int64 job_id = 3; + int64 dataset_id = 10; + int64 task_id_1 = 8; + int64 task_id_2 = 9; + std::string worker_address = "test_worker_address"; + DispatcherState state; + TF_EXPECT_OK(RegisterDatasetWithIdAndFingerprint(dataset_id, 1, &state)); + TF_EXPECT_OK(CreateAnonymousJob(job_id, dataset_id, &state)); + TF_EXPECT_OK( + CreateTask(task_id_1, job_id, dataset_id, worker_address, &state)); + TF_EXPECT_OK( + CreateTask(task_id_2, job_id, dataset_id, worker_address, &state)); + { + std::vector> tasks; + TF_EXPECT_OK(state.TasksForJob(job_id, &tasks)); + EXPECT_EQ(2, tasks.size()); + } +} + +TEST(DispatcherState, CreateTasksForDifferentJobs) { + int64 job_id_1 = 3; + int64 job_id_2 = 4; + int64 dataset_id = 10; + int64 task_id_1 = 8; + int64 task_id_2 = 9; + std::string worker_address = "test_worker_address"; + DispatcherState state; + TF_EXPECT_OK(RegisterDatasetWithIdAndFingerprint(dataset_id, 1, &state)); + TF_EXPECT_OK(CreateAnonymousJob(job_id_1, dataset_id, &state)); + TF_EXPECT_OK(CreateAnonymousJob(job_id_2, dataset_id, &state)); + TF_EXPECT_OK( + CreateTask(task_id_1, job_id_1, dataset_id, worker_address, &state)); + TF_EXPECT_OK( + CreateTask(task_id_2, job_id_2, dataset_id, worker_address, &state)); + { + std::vector> tasks; + TF_EXPECT_OK(state.TasksForJob(job_id_1, &tasks)); + EXPECT_EQ(1, tasks.size()); + } + { + std::vector> tasks; + TF_EXPECT_OK(state.TasksForJob(job_id_2, &tasks)); + EXPECT_EQ(1, tasks.size()); + } +} + +TEST(DispatcherState, FinishTask) { + int64 job_id = 3; + int64 dataset_id = 10; + int64 task_id = 4; + std::string worker_address = "test_worker_address"; + DispatcherState state; + TF_EXPECT_OK(RegisterDatasetWithIdAndFingerprint(dataset_id, 1, &state)); + TF_EXPECT_OK(CreateAnonymousJob(job_id, dataset_id, &state)); + TF_EXPECT_OK(CreateTask(task_id, job_id, dataset_id, worker_address, &state)); + TF_EXPECT_OK(FinishTask(task_id, &state)); + std::shared_ptr task; + TF_EXPECT_OK(state.TaskFromId(task_id, &task)); + EXPECT_TRUE(task->finished); + std::shared_ptr job; TF_EXPECT_OK(state.JobFromId(job_id, &job)); EXPECT_TRUE(job->finished); } +TEST(DispatcherState, FinishMultiTaskJob) { + int64 job_id = 3; + int64 dataset_id = 10; + int64 task_id_1 = 4; + int64 task_id_2 = 5; + std::string worker_address = "test_worker_address"; + DispatcherState state; + TF_EXPECT_OK(RegisterDatasetWithIdAndFingerprint(dataset_id, 1, &state)); + TF_EXPECT_OK(CreateAnonymousJob(job_id, dataset_id, &state)); + TF_EXPECT_OK( + CreateTask(task_id_1, job_id, dataset_id, worker_address, &state)); + TF_EXPECT_OK( + CreateTask(task_id_2, job_id, dataset_id, worker_address, &state)); + + TF_EXPECT_OK(FinishTask(task_id_1, &state)); + { + std::shared_ptr job; + TF_EXPECT_OK(state.JobFromId(job_id, &job)); + EXPECT_FALSE(job->finished); + } + + TF_EXPECT_OK(FinishTask(task_id_2, &state)); + { + std::shared_ptr job; + TF_EXPECT_OK(state.JobFromId(job_id, &job)); + EXPECT_TRUE(job->finished); + } +} + } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/data/service/journal.proto b/tensorflow/core/data/service/journal.proto index 944b77b87f1..fd4c5863ca9 100644 --- a/tensorflow/core/data/service/journal.proto +++ b/tensorflow/core/data/service/journal.proto @@ -11,7 +11,8 @@ message Update { oneof update_type { RegisterDatasetUpdate register_dataset = 1; CreateJobUpdate create_job = 2; - FinishJobUpdate finish_job = 3; + CreateTaskUpdate create_task = 3; + FinishTaskUpdate finish_task = 4; } } @@ -34,6 +35,13 @@ message CreateJobUpdate { NamedJobKeyDef named_job_key = 4; } -message FinishJobUpdate { - int64 job_id = 1; +message CreateTaskUpdate { + int64 task_id = 1; + int64 job_id = 2; + int64 dataset_id = 3; + string worker_address = 4; +} + +message FinishTaskUpdate { + int64 task_id = 1; } diff --git a/tensorflow/core/data/service/journal_test.cc b/tensorflow/core/data/service/journal_test.cc index 3c43cf763e9..169e58ed048 100644 --- a/tensorflow/core/data/service/journal_test.cc +++ b/tensorflow/core/data/service/journal_test.cc @@ -46,10 +46,10 @@ Update MakeCreateJobUpdate() { return update; } -Update MakeFinishJobUpdate() { +Update MakeFinishTaskUpdate() { Update update; - FinishJobUpdate* finish_job = update.mutable_finish_job(); - finish_job->set_job_id(8); + FinishTaskUpdate* finish_task = update.mutable_finish_task(); + finish_task->set_task_id(8); return update; } @@ -86,7 +86,7 @@ TEST(Journal, RoundTripMultiple) { EXPECT_TRUE(NewJournalDir(&journal_dir)); std::vector updates = {MakeCreateJobUpdate(), MakeRegisterDatasetUpdate(), - MakeFinishJobUpdate()}; + MakeFinishTaskUpdate()}; FileJournalWriter writer(Env::Default(), journal_dir); for (const auto& update : updates) { TF_EXPECT_OK(writer.Write(update)); @@ -100,7 +100,7 @@ TEST(Journal, AppendExistingFile) { EXPECT_TRUE(NewJournalDir(&journal_dir)); std::vector updates = {MakeCreateJobUpdate(), MakeRegisterDatasetUpdate(), - MakeFinishJobUpdate()}; + MakeFinishTaskUpdate()}; for (const auto& update : updates) { FileJournalWriter writer(Env::Default(), journal_dir); TF_EXPECT_OK(writer.Write(update)); diff --git a/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc b/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc index 0b4e8cbbbae..8e1713e2d77 100644 --- a/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc @@ -195,6 +195,7 @@ class DataServiceDatasetOp::Dataset : public DatasetBase { void CancelThreads() TF_LOCKS_EXCLUDED(mu_) { mutex_lock l(mu_); + VLOG(1) << "Cancelling threads in DataServiceDataset::Iterator"; cancelled_ = true; worker_thread_cv_.notify_all(); manager_thread_cv_.notify_all(); @@ -295,7 +296,9 @@ class DataServiceDatasetOp::Dataset : public DatasetBase { // TODO(aaudibert): Instead of polling, have dispatcher send updates when // the list of tasks changes. void TaskThreadManager(std::unique_ptr ctx) { - VLOG(3) << "Starting task thread manager"; + auto cleanup = + gtl::MakeCleanup([] { VLOG(1) << "Task thread manager exiting"; }); + VLOG(1) << "Starting task thread manager"; DataServiceDispatcherClient dispatcher(dataset()->address_, dataset()->protocol_); uint64 next_check = Env::Default()->NowMicros(); @@ -396,8 +399,11 @@ class DataServiceDatasetOp::Dataset : public DatasetBase { } void RunWorkerThread(std::function done) { - auto cleanup = gtl::MakeCleanup([done = std::move(done)]() { done(); }); - VLOG(3) << "Starting worker thread"; + auto cleanup = gtl::MakeCleanup([done = std::move(done)]() { + done(); + VLOG(1) << "Worker thread exiting"; + }); + VLOG(1) << "Starting worker thread"; std::shared_ptr task_to_process; while (true) { { From 0f142c8ae150cef022f9fea0d3185c886db0ba02 Mon Sep 17 00:00:00 2001 From: Ruoxin Sang Date: Tue, 4 Aug 2020 16:09:51 -0700 Subject: [PATCH 2117/2522] Add a sync point before DistributedDataset `__iter__` method returns. This is to avoid cases if users are writing code as below: ``` iterator = iter(dist_dataset) outputs = multi_device_function(iterator) ``` In async eager, function and eager ops go into different execution queues. The iterator may not finish initialization when the multi device function is called. PiperOrigin-RevId: 324911136 Change-Id: I0833756c5e48775a7abee6968dce41549beb9669 --- tensorflow/python/distribute/input_lib.py | 28 +++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tensorflow/python/distribute/input_lib.py b/tensorflow/python/distribute/input_lib.py index 6484cf14e42..b77739c1274 100644 --- a/tensorflow/python/distribute/input_lib.py +++ b/tensorflow/python/distribute/input_lib.py @@ -1032,6 +1032,13 @@ class DistributedDataset(_IterableInput): iterator = DistributedIterator(self._input_workers, worker_iterators, self._strategy) iterator._element_spec = self.element_spec # pylint: disable=protected-access + + # When async eager is enabled, sometimes the iterator may not finish + # initialization before passing to a multi device function, add a sync point + # here to make sure all underlying iterators are initialized. + if context.executing_eagerly(): + context.async_wait() + return iterator @property @@ -1106,6 +1113,13 @@ class DistributedDatasetV1(DistributedDataset): iterator = DistributedIteratorV1(self._input_workers, worker_iterators, self._strategy) iterator._element_spec = self.element_spec # pylint: disable=protected-access + + # When async eager is enabled, sometimes the iterator may not finish + # initialization before passing to a multi device function, add a sync point + # here to make sure all underlying iterators are initialized. + if context.executing_eagerly(): + context.async_wait() + return iterator def __iter__(self): @@ -1173,6 +1187,13 @@ class DistributedDatasetsFromFunction(_IterableInput): iterator = DistributedIterator(self._input_workers, iterators, self._strategy) iterator._element_spec = self._element_spec # pylint: disable=protected-access + + # When async eager is enabled, sometimes the iterator may not finish + # initialization before passing to a multi device function, add a sync + # point here to make sure all underlying iterators are initialized. + if context.executing_eagerly(): + context.async_wait() + return iterator raise RuntimeError("__iter__() is only supported inside of tf.function " @@ -1213,6 +1234,13 @@ class DistributedDatasetsFromFunctionV1(DistributedDatasetsFromFunction): iterator = DistributedIteratorV1(self._input_workers, iterators, self._strategy) iterator._element_spec = self._element_spec # pylint: disable=protected-access + + # When async eager is enabled, sometimes the iterator may not finish + # initialization before passing to a multi device function, add a sync point + # here to make sure all underlying iterators are initialized. + if context.executing_eagerly(): + context.async_wait() + return iterator def __iter__(self): From 460115529023a3b8a2b0ed743c152f0467f2daa1 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 4 Aug 2020 16:26:17 -0700 Subject: [PATCH 2118/2522] [MLIR:TF] Hoist cwise binary op out of concat PiperOrigin-RevId: 324914057 Change-Id: I4e007a3dfc64c6182920c1bea4dd6b217f4c1866 --- .../mlir/tensorflow/ir/tf_generated_ops.td | 10 +- .../compiler/mlir/tensorflow/ir/tf_op_base.td | 4 + .../compiler/mlir/tensorflow/ir/tf_ops_a_m.cc | 153 ++++++++++++++++++ .../compiler/mlir/tensorflow/ir/tf_traits.h | 5 + .../mlir/tensorflow/tests/canonicalize.mlir | 37 +++++ 5 files changed, 205 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index 84f3fa9c463..bba468acddb 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -136,7 +136,7 @@ Inputs must be of same size and shape. let hasFolder = 1; } -def TF_AddV2Op : TF_Op<"AddV2", [Commutative, NoSideEffect, ResultsBroadcastableShape, TF_LayoutAgnostic, TF_SameOperandsAndResultElementTypeResolveRef]>, +def TF_AddV2Op : TF_Op<"AddV2", [Commutative, NoSideEffect, ResultsBroadcastableShape, TF_LayoutAgnostic, TF_SameOperandsAndResultElementTypeResolveRef, TF_CwiseBinary]>, WithBroadcastableBinOpBuilder { let summary = "Returns x + y element-wise."; @@ -1711,6 +1711,8 @@ def TF_ConcatV2Op : TF_Op<"ConcatV2", [NoSideEffect]> { let verifier = [{ return Verify(*this); }]; + + let hasCanonicalizer = 1; } def TF_ConjOp : TF_Op<"Conj", [NoSideEffect, SameOperandsAndResultType]> { @@ -6070,7 +6072,7 @@ the result here is consistent with a truncating divide. E.g. TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_MulOp : TF_Op<"Mul", [Commutative, NoSideEffect, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef]>, +def TF_MulOp : TF_Op<"Mul", [Commutative, NoSideEffect, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef, TF_CwiseBinary]>, WithBroadcastableBinOpBuilder { let summary = "Returns x * y element-wise."; @@ -7338,7 +7340,7 @@ tf.real(input) ==> [-2.25, 3.25] TF_DerivedResultTypeAttr Tout = TF_DerivedResultTypeAttr<0>; } -def TF_RealDivOp : TF_Op<"RealDiv", [NoSideEffect, ResultsBroadcastableShape]>, +def TF_RealDivOp : TF_Op<"RealDiv", [NoSideEffect, ResultsBroadcastableShape, TF_CwiseBinary]>, WithBroadcastableBinOpBuilder { let summary = "Returns x / y element-wise for real types."; @@ -9887,7 +9889,7 @@ Examples: TF_DerivedOperandSizeAttr N = TF_DerivedOperandSizeAttr<0>; } -def TF_SubOp : TF_Op<"Sub", [NoSideEffect, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef]>, +def TF_SubOp : TF_Op<"Sub", [NoSideEffect, ResultsBroadcastableShape, TF_SameOperandsAndResultElementTypeResolveRef, TF_CwiseBinary]>, WithBroadcastableBinOpBuilder { let summary = "Returns x - y element-wise."; diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td index 544cfb8af64..81a0e1bd1a5 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td @@ -73,6 +73,10 @@ def TF_LayoutAgnostic : NativeOpTrait<"TF::LayoutAgnostic">; // certain state around within their implementations. def TF_CannotDuplicate : NativeOpTrait<"TF::CannotDuplicate">; +// Coefficient wise binary operation with implicit broadcasting support, for +// example tf.Sub operation. +def TF_CwiseBinary : NativeOpTrait<"TF::CwiseBinary">; + // Variant of broadcastable trait that considers TF's subtype behavior. class TF_OpIsBroadcastableToRes : And<[ TCOpResIsShapedTypePred, diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc index 485e4fa5315..2ed44fd3fc7 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc @@ -512,6 +512,159 @@ void ConcatOp::getCanonicalizationPatterns(OwningRewritePatternList &results, results.insert(context); } +namespace { + +// Hoist coefficient-wise binary operation out of the Concat op: +// +// %0 = tf.Mul(%lhs_0, %rhs_0) +// %1 = tf.Mul(%lhs_1, %rhs_1) +// ... +// %n = tf.Mul(%lhs_n, %rhs_n) +// %m = tf.ConcatV2(%0, %1, ..., %n, %axis) +// +// Rewrite it to: +// +// %0 = tf.ConcatV2(%lhs0, %lhs1, ..., %lhs_n, %lhs_concat_axis) +// %1 = tf.ConcatV2(%rhs0, %rhs1, ..., %rhs_n, %rhs_concat_axis) +// %2 = tf.Mul(%0, %1) +// +// Because coefficient-wise binary operations support implicit broadcasting, we +// should be very careful with this optimization, and do not accidentally +// produce incorrect concat operations. +class HoistCwiseBinaryOutOfConcat : public OpRewritePattern { + public: + explicit HoistCwiseBinaryOutOfConcat(MLIRContext *context) + : OpRewritePattern(context) {} + LogicalResult matchAndRewrite(TF::ConcatV2Op op, + PatternRewriter &rewriter) const override; + + private: + struct HoistParams { + SmallVector lhs_args; + SmallVector rhs_args; + int64_t lhs_axis; + int64_t rhs_axis; + Type lhs_concat_type; + Type rhs_concat_type; + }; + + // Returns parameters of a binary op hoisting out of concatenation if all of + // the operands are in one of the compatible configurations. + Optional GetHoistParams(TF::ConcatV2Op op, int64_t axis) const; +}; + +LogicalResult HoistCwiseBinaryOutOfConcat::matchAndRewrite( + TF::ConcatV2Op op, PatternRewriter &rewriter) const { + auto loc = op.getLoc(); + + // Axis must be a constant scalar value. + DenseIntElementsAttr axis_attr; + if (!matchPattern(op.axis(), m_Constant(&axis_attr))) return failure(); + if (axis_attr.getNumElements() != 1) return failure(); + int64_t axis = + axis_attr.getSplatValue().getValue().getSExtValue(); + + // All concat operands must be defined by ops. + Operation *first_arg_op = op.values().front().getDefiningOp(); + if (first_arg_op == nullptr) return failure(); + + // All concat operands must be produced by the coeff-wise binary operation. + if (!first_arg_op->hasTrait()) return failure(); + + // All concat operands must be defined by the op of same kind. + bool args_same_op = llvm::all_of(op.values(), [&](Value arg) -> bool { + Operation *arg_op = arg.getDefiningOp(); + return arg_op && arg_op->getName() == first_arg_op->getName(); + }); + if (!args_same_op) return failure(); + + // Compute binary operands hoist parameters. + auto hoist_params = GetHoistParams(op, axis); + if (!hoist_params.hasValue()) return failure(); + + // New lhs and rhs concatenation axis. + auto axis_type = mlir::RankedTensorType::get({}, rewriter.getIntegerType(64)); + auto lhs_axis = rewriter.create( + loc, DenseIntElementsAttr::get(axis_type, hoist_params->lhs_axis)); + auto rhs_axis = rewriter.create( + loc, DenseIntElementsAttr::get(axis_type, hoist_params->rhs_axis)); + + // Concatenate binary ops operands on the new axis. + auto lhs_concat = rewriter.create( + loc, hoist_params->lhs_concat_type, hoist_params->lhs_args, lhs_axis); + auto rhs_concat = rewriter.create( + loc, hoist_params->rhs_concat_type, hoist_params->rhs_args, rhs_axis); + + // Replace original concat with a binary op. + OperationState new_binary_op_state( + loc, first_arg_op->getName().getStringRef(), + {lhs_concat.getResult(), rhs_concat.getResult()}, + op.getResult().getType(), ArrayRef()); + Operation *new_binary_op = rewriter.createOperation(new_binary_op_state); + + rewriter.replaceOp(op, new_binary_op->getResults()); + + return success(); +} + +Optional +HoistCwiseBinaryOutOfConcat::GetHoistParams(TF::ConcatV2Op op, + int64_t axis) const { + // Collects lhs or rhs arguments of concat op operands. + auto args = [&](int operand_idx) -> SmallVector { + auto range = llvm::map_range(op.values(), [&](Value arg) { + return arg.getDefiningOp()->getOperand(operand_idx); + }); + return {range.begin(), range.end()}; + }; + + // Returns true if all binary ops operands at `operand_idx` index are tensors + // of `axis + 1` rank and axis dim has size `1`. + auto is_all_tensors = [&](int operand_idx, int axis) -> bool { + return llvm::all_of(op.values(), [&](Value arg) -> bool { + auto lhs = arg.getDefiningOp()->getOperand(operand_idx); + auto ranked = lhs.getType().dyn_cast(); + return ranked && ranked.getRank() == (axis + 1) && + ranked.getShape()[axis] == 1; + }); + }; + + // Returns true if all binary ops operands at `operand_idx` index are scalars. + auto is_all_scalars = [&](int operand_idx) -> bool { + return llvm::all_of(op.values(), [&](Value arg) -> bool { + auto lhs = arg.getDefiningOp()->getOperand(operand_idx); + auto ranked = lhs.getType().dyn_cast(); + return ranked && ranked.hasRank() && ranked.getRank() == 0; + }); + }; + + auto ranked = op.getType().cast(); + if (!ranked) return None; + + // TODO(ezhulenev): Add support for more valid concat patterns. + + // Tensor + Scalar: [..., 1] + [] <- scalar + // ^ + // \- axis is the innermost dimension. + // + // Concatenate tensor arguments on the same axis as the original operation, + // and concatenate scalars into the vector. + if (is_all_tensors(0, axis) && is_all_scalars(1)) { + std::array rhs_dims{static_cast(op.values().size())}; + auto rhs_type = RankedTensorType::get(rhs_dims, ranked.getElementType()); + return HoistParams{args(0), args(1), axis, 0, op.getType(), rhs_type}; + } + + return None; +} + +} // namespace + +void ConcatV2Op::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + //===----------------------------------------------------------------------===// // ConcatOffsetOp //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h index b9a781b99e7..6cf2df60a3f 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h @@ -124,6 +124,11 @@ class CannotDuplicate : public TraitBase { } }; +// Coefficient wise binary operation with implicit broadcasting support, for +// example tf.Sub operation. +template +class CwiseBinary : public TraitBase {}; + } // namespace TF } // namespace OpTrait } // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir index 007c123a034..5808b03c909 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir @@ -143,6 +143,43 @@ func @testConcatCanonicalization(%arg0: tensor<2x1xi32>, %arg1: tensor<2x1xi32>) return %1 : tensor<2x2xi32> } +// CHECK-LABEL: testConcatCwiseBinaryOnInnerDim +func @testConcatCwiseBinaryOnInnerDim(%arg0: tensor, + %arg1: tensor, %arg2: tensor, %arg3: tensor) -> tensor { + + // CHECK: %[[LHS_AXIS:.*]] = "tf.Const"() {value = dense<1> : tensor} + // CHECK: %[[RHS_AXIS:.*]] = "tf.Const"() {value = dense<0> : tensor} + + // CHECK: %[[LHS_CONCAT:.*]] = "tf.ConcatV2"(%arg0, %arg1, %[[LHS_AXIS]]) + // CHECK: %[[RHS_CONCAT:.*]] = "tf.ConcatV2"(%arg2, %arg3, %[[RHS_AXIS]]) + + // CHECK: %[[MUL:.*]] = "tf.Mul"(%[[LHS_CONCAT]], %[[RHS_CONCAT]]) + // CHECK-SAME: (tensor, tensor<2xf32>) -> tensor + // CHECK: return %[[MUL]] + + %0 = "tf.Const"() { value = dense<1> : tensor } : () -> tensor + %1 = "tf.Mul"(%arg0, %arg2) : (tensor, tensor) -> tensor + %2 = "tf.Mul"(%arg1, %arg3) : (tensor, tensor) -> tensor + %3 = "tf.ConcatV2"(%1, %2, %0) : (tensor, tensor, tensor) -> tensor + + return %3 : tensor +} + +// CHECK-LABEL: testConcatCwiseBinaryInvalidInnerDim +func @testConcatCwiseBinaryInvalidInnerDim(%arg0: tensor, + %arg1: tensor, %arg2: tensor, %arg3: tensor) -> tensor { + // Each individual binary operation has an implicit broadcast that will be + // lost if we would reorder them with the concat. + + // CHECK: "tf.ConcatV2"(%1, %2, %0) + %0 = "tf.Const"() { value = dense<1> : tensor } : () -> tensor + %1 = "tf.Mul"(%arg0, %arg2) : (tensor, tensor) -> tensor + %2 = "tf.Mul"(%arg1, %arg3) : (tensor, tensor) -> tensor + %3 = "tf.ConcatV2"(%1, %2, %0) : (tensor, tensor, tensor) -> tensor + + return %3 : tensor +} + // CHECK-LABEL: testLogOfSoftmax func @testLogOfSoftmax(%arg0: tensor<8x16xf32>) -> tensor<8x16xf32> { %0 = "tf.Softmax"(%arg0) : (tensor<8x16xf32>) -> tensor<8x16xf32> From 60040ffabfce4d7c89b7cb3b57528aa721e0ead2 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Tue, 4 Aug 2020 16:30:17 -0700 Subject: [PATCH 2119/2522] NFC: Remove unused dependencies PiperOrigin-RevId: 324914796 Change-Id: I230ad19f82048474f77170a6244fcf4e7a835ba8 --- tensorflow/lite/tools/optimize/BUILD | 8 ++++++++ tensorflow/lite/tools/optimize/calibration/BUILD | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/tensorflow/lite/tools/optimize/BUILD b/tensorflow/lite/tools/optimize/BUILD index ab153afc2cf..146f869a906 100644 --- a/tensorflow/lite/tools/optimize/BUILD +++ b/tensorflow/lite/tools/optimize/BUILD @@ -49,6 +49,7 @@ cc_binary( srcs = ["modify_model_interface_main.cc"], deps = [ ":modify_model_interface", + ":quantize_model", ], ) @@ -89,6 +90,8 @@ cc_library( hdrs = ["quantization_wrapper.h"], deps = [ ":quantization_wrapper_utils", + "//tensorflow/lite:framework", + "//tensorflow/lite/core/api", "//tensorflow/lite/schema:schema_fbs", "//tensorflow/lite/tools/optimize:quantize_model", "@flatbuffers", @@ -112,6 +115,7 @@ cc_library( "//tensorflow/lite/schema:schema_fbs", "//third_party/eigen3", "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", ], ) @@ -126,6 +130,7 @@ cc_library( "//tensorflow/lite/kernels/internal:types", "//tensorflow/lite/schema:schema_fbs", "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", ], ) @@ -154,6 +159,7 @@ cc_library( hdrs = ["operator_property.h"], deps = [ "//tensorflow/lite:framework", + "//tensorflow/lite/kernels/internal:types", "//tensorflow/lite/schema:schema_fbs", ], ) @@ -194,6 +200,7 @@ cc_library( ":quantization_utils", ":model_utils", "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", "@com_google_absl//absl/container:flat_hash_map", "@flatbuffers", "//tensorflow/lite:framework", @@ -238,6 +245,7 @@ cc_library( srcs = ["test_util.cc"], hdrs = ["test_util.h"], deps = [ + "//tensorflow/lite:framework", "//tensorflow/lite/core/api", "@com_google_googletest//:gtest", "@flatbuffers", diff --git a/tensorflow/lite/tools/optimize/calibration/BUILD b/tensorflow/lite/tools/optimize/calibration/BUILD index f641b151aa9..06183353e44 100644 --- a/tensorflow/lite/tools/optimize/calibration/BUILD +++ b/tensorflow/lite/tools/optimize/calibration/BUILD @@ -51,6 +51,7 @@ cc_library( "//tensorflow/lite/schema:schema_fbs", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", "@flatbuffers", ], ) @@ -104,6 +105,7 @@ cc_test( deps = [ ":logging_op_resolver", "//tensorflow/lite:framework", + "//tensorflow/lite/kernels:builtin_ops", "@com_google_googletest//:gtest", ], ) @@ -118,6 +120,7 @@ cc_library( "//tensorflow/lite:framework", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", ], ) @@ -127,6 +130,7 @@ cc_library( hdrs = ["calibration_logger.h"], copts = tflite_copts(), deps = [ + "//tensorflow/lite:framework", "//tensorflow/lite:minimal_logging", "//tensorflow/lite/c:common", "//tensorflow/lite/core/api", From 5a0d95f7eb6476e015d9dff644567076325f242e Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 4 Aug 2020 16:33:13 -0700 Subject: [PATCH 2120/2522] [MLIR:TF] Hoist cwise unary op out of concat PiperOrigin-RevId: 324915363 Change-Id: I3537157af5762054a068f4aadaa6dd9761b8204d --- .../mlir/tensorflow/ir/tf_generated_ops.td | 2 +- .../compiler/mlir/tensorflow/ir/tf_op_base.td | 3 + .../compiler/mlir/tensorflow/ir/tf_ops_a_m.cc | 63 ++++++++++++++++++- .../compiler/mlir/tensorflow/ir/tf_traits.h | 6 +- .../mlir/tensorflow/tests/canonicalize.mlir | 13 ++++ 5 files changed, 84 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index bba468acddb..081903d13cf 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -4799,7 +4799,7 @@ tf.math.log(x) ==> [-inf, -0.6931472, 0. , 1.609438] let hasCanonicalizer = 1; } -def TF_Log1pOp : TF_Op<"Log1p", [NoSideEffect, SameOperandsAndResultType]> { +def TF_Log1pOp : TF_Op<"Log1p", [NoSideEffect, SameOperandsAndResultType, TF_CwiseUnary]> { let summary = "Computes natural logarithm of (1 + x) element-wise."; let description = [{ diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td index 81a0e1bd1a5..1755c975c23 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td @@ -77,6 +77,9 @@ def TF_CannotDuplicate : NativeOpTrait<"TF::CannotDuplicate">; // example tf.Sub operation. def TF_CwiseBinary : NativeOpTrait<"TF::CwiseBinary">; +// Coefficient wise unary operation, for example tf.Sqrt operation. +def TF_CwiseUnary : NativeOpTrait<"TF::CwiseUnary">; + // Variant of broadcastable trait that considers TF's subtype behavior. class TF_OpIsBroadcastableToRes : And<[ TCOpResIsShapedTypePred, diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc index 2ed44fd3fc7..791323ca992 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc @@ -514,6 +514,66 @@ void ConcatOp::getCanonicalizationPatterns(OwningRewritePatternList &results, namespace { +// Hoist coefficient-wise unary operation out of the Concat op: +// +// %0 = "tf.Log1p"(%arg_0) +// %1 = "tf.Log1p"(%arg_1) +// ... +// %n = "tf.Log1p"(%arg_n) +// %m = "tf.ConcatV2"(%0, %1, ..., %n, %axis) +// +// Rewrite it to: +// +// %0 = "tf.ConcatV2"(%arg_0, %arg_1, ..., %arg_n, %axis) +// %1 = "tf.Log1p"(%0) +class HoistCwiseUnaryOutOfConcat : public OpRewritePattern { + public: + explicit HoistCwiseUnaryOutOfConcat(MLIRContext *context) + : OpRewritePattern(context) {} + LogicalResult matchAndRewrite(TF::ConcatV2Op op, + PatternRewriter &rewriter) const override; +}; + +LogicalResult HoistCwiseUnaryOutOfConcat::matchAndRewrite( + TF::ConcatV2Op op, PatternRewriter &rewriter) const { + auto loc = op.getLoc(); + + // All concat operands must be defined by ops. + Operation *first_arg_op = op.values().front().getDefiningOp(); + if (first_arg_op == nullptr) return failure(); + + // All concat operands must be produced by the coeff-wise unary operation. + if (!first_arg_op->hasTrait()) return failure(); + + // All concat operands must be defined by the op of same kind. + bool args_same_op = llvm::all_of(op.values(), [&](Value arg) -> bool { + Operation *arg_op = arg.getDefiningOp(); + return arg_op && arg_op->getName() == first_arg_op->getName(); + }); + if (!args_same_op) return failure(); + + // Collect unary operations operands. + auto unary_operands = llvm::map_range(op.values(), [](Value arg) -> Value { + return arg.getDefiningOp()->getOperand(0); + }); + SmallVector unary_ops_args(unary_operands); + + // Concatenate unary ops operands. + auto concat_unary_operands = + rewriter.create(loc, op.getType(), unary_ops_args, op.axis()); + + // Replace original concat with an unary op. + OperationState new_unary_op_state(loc, first_arg_op->getName().getStringRef(), + concat_unary_operands.getResult(), + op.getResult().getType(), + ArrayRef()); + Operation *new_unary_op = rewriter.createOperation(new_unary_op_state); + + rewriter.replaceOp(op, new_unary_op->getResults()); + + return success(); +} + // Hoist coefficient-wise binary operation out of the Concat op: // // %0 = tf.Mul(%lhs_0, %rhs_0) @@ -662,7 +722,8 @@ HoistCwiseBinaryOutOfConcat::GetHoistParams(TF::ConcatV2Op op, void ConcatV2Op::getCanonicalizationPatterns(OwningRewritePatternList &results, MLIRContext *context) { - results.insert(context); + results.insert( + context); } //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h index 6cf2df60a3f..fc8e6f40f65 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h @@ -124,11 +124,15 @@ class CannotDuplicate : public TraitBase { } }; -// Coefficient wise binary operation with implicit broadcasting support, for +// Coefficient-wise binary operation with implicit broadcasting support, for // example tf.Sub operation. template class CwiseBinary : public TraitBase {}; +// Coefficient-wise unary operation, for example tf.Sqrt operation. +template +class CwiseUnary : public TraitBase {}; + } // namespace TF } // namespace OpTrait } // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir index 5808b03c909..595bdce5be4 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir @@ -143,6 +143,19 @@ func @testConcatCanonicalization(%arg0: tensor<2x1xi32>, %arg1: tensor<2x1xi32>) return %1 : tensor<2x2xi32> } +// CHECK-LABEL: testConcatCwiseUnary +func @testConcatCwiseUnary(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { + + // CHECK: %[[CONCAT:.*]] = "tf.ConcatV2"(%arg0, %arg1, %arg2) + // CHECK: %[[LOG1P:.*]] = "tf.Log1p"(%[[CONCAT]]) + // CHECK: return %[[LOG1P]] + %0 = "tf.Log1p"(%arg0) : (tensor) -> tensor + %1 = "tf.Log1p"(%arg1) : (tensor) -> tensor + %2 = "tf.ConcatV2"(%0, %1, %arg2) : (tensor, tensor, tensor) -> tensor + + return %2 : tensor +} + // CHECK-LABEL: testConcatCwiseBinaryOnInnerDim func @testConcatCwiseBinaryOnInnerDim(%arg0: tensor, %arg1: tensor, %arg2: tensor, %arg3: tensor) -> tensor { From 53576063848800b6ee906c6e94d840fbdbbecd1b Mon Sep 17 00:00:00 2001 From: Thomas O'Malley Date: Tue, 4 Aug 2020 16:33:15 -0700 Subject: [PATCH 2121/2522] Enable clipnorm and clipvalue arguments in Optimizer with tf.distribute.Strategy. Apply gradient clipping after aggregation. CentralStorageStrategy is still not supported with these arguments. PiperOrigin-RevId: 324915370 Change-Id: Ib9b41511b5b9b77ec95ff9543b9aa68e4ed6b4d8 --- RELEASE.md | 6 + .../distribute/distribute_strategy_test.py | 31 +++++ tensorflow/python/keras/engine/training.py | 1 - .../python/keras/engine/training_eager.py | 1 - .../experimental/loss_scale_optimizer.py | 4 +- tensorflow/python/keras/optimizer_v2/BUILD | 1 + .../python/keras/optimizer_v2/optimizer_v2.py | 108 +++++++++--------- tensorflow/python/keras/optimizer_v2/utils.py | 38 ++++++ ...n.experimental.-loss-scale-optimizer.pbtxt | 8 ++ ...ensorflow.keras.optimizers.-adadelta.pbtxt | 8 ++ ...tensorflow.keras.optimizers.-adagrad.pbtxt | 8 ++ .../tensorflow.keras.optimizers.-adam.pbtxt | 8 ++ .../tensorflow.keras.optimizers.-adamax.pbtxt | 8 ++ .../tensorflow.keras.optimizers.-ftrl.pbtxt | 8 ++ .../tensorflow.keras.optimizers.-nadam.pbtxt | 8 ++ ...nsorflow.keras.optimizers.-optimizer.pbtxt | 8 ++ ...nsorflow.keras.optimizers.-r-m-sprop.pbtxt | 8 ++ .../tensorflow.keras.optimizers.-s-g-d.pbtxt | 8 ++ ...n.experimental.-loss-scale-optimizer.pbtxt | 8 ++ ...ensorflow.keras.optimizers.-adadelta.pbtxt | 8 ++ ...tensorflow.keras.optimizers.-adagrad.pbtxt | 8 ++ .../tensorflow.keras.optimizers.-adam.pbtxt | 8 ++ .../tensorflow.keras.optimizers.-adamax.pbtxt | 8 ++ .../tensorflow.keras.optimizers.-ftrl.pbtxt | 8 ++ .../tensorflow.keras.optimizers.-nadam.pbtxt | 8 ++ ...nsorflow.keras.optimizers.-optimizer.pbtxt | 8 ++ ...nsorflow.keras.optimizers.-r-m-sprop.pbtxt | 8 ++ .../tensorflow.keras.optimizers.-s-g-d.pbtxt | 8 ++ .../v2/tensorflow.optimizers.-adadelta.pbtxt | 8 ++ .../v2/tensorflow.optimizers.-adagrad.pbtxt | 8 ++ .../v2/tensorflow.optimizers.-adam.pbtxt | 8 ++ .../v2/tensorflow.optimizers.-adamax.pbtxt | 8 ++ .../v2/tensorflow.optimizers.-ftrl.pbtxt | 8 ++ .../v2/tensorflow.optimizers.-nadam.pbtxt | 8 ++ .../v2/tensorflow.optimizers.-optimizer.pbtxt | 8 ++ .../v2/tensorflow.optimizers.-r-m-sprop.pbtxt | 8 ++ .../v2/tensorflow.optimizers.-s-g-d.pbtxt | 8 ++ 37 files changed, 365 insertions(+), 57 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index b0c785c7d68..d7a345c7c76 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -33,6 +33,10 @@ shape assumptions (note that you can pass shapes with `None` entries for axes that are meant to be dynamic). You can also disable the input checking entirely by setting `model.input_spec = None`. +* `tf.keras.optimizers.Optimizer.get_gradients` no longer performs gradient + clipping. Instead, gradient clipping is performed in + `tf.keras.optimizers.Optimizer.apply_gradients`, after the gradients on each + device have been aggregated. ## Known Caveats @@ -95,6 +99,8 @@ * Error messages when Functional API construction goes wrong (and when ops cannot be converted to Keras layers automatically) should be clearer and easier to understand. * `Optimizer.minimize` can now accept a loss `Tensor` and a `GradientTape` as an alternative to accepting a `callable` loss. + * `Optimizer` arguments `clipnorm` and `clipvalue` are now supported with + `tf.distribute.Strategy` (`CentralStorageStrategy` is not yet supported). * `tf.function` / AutoGraph: * Added `experimental_follow_type_hints` argument for `tf.function`. When True, the function may use type annotations to optimize the tracing diff --git a/tensorflow/python/keras/distribute/distribute_strategy_test.py b/tensorflow/python/keras/distribute/distribute_strategy_test.py index 4b6d3a80730..abcb5d1c0e8 100644 --- a/tensorflow/python/keras/distribute/distribute_strategy_test.py +++ b/tensorflow/python/keras/distribute/distribute_strategy_test.py @@ -22,6 +22,7 @@ import numpy as np from tensorflow.python import keras from tensorflow.python.data.experimental.ops import cardinality from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.distribute import central_storage_strategy from tensorflow.python.distribute import combinations from tensorflow.python.distribute import distribution_strategy_context from tensorflow.python.distribute import mirrored_strategy @@ -1863,6 +1864,36 @@ class TestDistributionStrategyWithKerasModels(test.TestCase, self.assertEqual(bc.predict_begin_batches, [0]) self.assertEqual(bc.predict_end_batches, [24]) + @combinations.generate( + combinations.combine(distribution=all_strategies, mode=['eager'])) + def test_gradient_clipping(self, distribution): + + class MyLayer(keras.layers.Layer): + + def build(self, _): + self.v1 = variables.Variable(1.) + self.v2 = variables.Variable(1.) + + def call(self, x): + return 3 * self.v1 - 3 * self.v2 + + x, y = np.ones((10, 1)), np.ones((10, 1)) + + with distribution.scope(): + layer = MyLayer() + model = keras.Sequential([layer]) + optimizer = gradient_descent_keras.SGD(1., clipnorm=2., clipvalue=2.) + model.compile(optimizer, 'mae') + + if isinstance(distribution, + central_storage_strategy.CentralStorageStrategy): + with self.assertRaisesRegex(ValueError, 'not supported'): + model.fit(x, y, batch_size=10, epochs=1) + else: + model.fit(x, y, batch_size=10, epochs=1) + self.assertAllClose(self.evaluate(layer.v1), 3.) + self.assertAllClose(self.evaluate(layer.v2), -1.) + @combinations.generate( combinations.times( all_strategy_combinations_minus_default())) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index bf542129e5c..a1fb329feab 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -2744,7 +2744,6 @@ def _minimize(strategy, tape, optimizer, loss, trainable_variables): trainable_variables)) if isinstance(optimizer, lso.LossScaleOptimizer): gradients = optimizer.get_unscaled_gradients(gradients) - gradients = optimizer._clip_gradients(gradients) # pylint: disable=protected-access if trainable_variables: if aggregate_grads_outside_optimizer: optimizer.apply_gradients( diff --git a/tensorflow/python/keras/engine/training_eager.py b/tensorflow/python/keras/engine/training_eager.py index 8064bf2a7ab..b3ce3d13ed7 100644 --- a/tensorflow/python/keras/engine/training_eager.py +++ b/tensorflow/python/keras/engine/training_eager.py @@ -273,7 +273,6 @@ def _process_single_batch(model, if isinstance(model.optimizer, loss_scale_optimizer.LossScaleOptimizer): grads = model.optimizer.get_unscaled_gradients(grads) - grads = model.optimizer._clip_gradients(grads) model.optimizer.apply_gradients(zip(grads, trainable_weights)) else: logging.warning('The list of trainable weights is empty. Make sure that' diff --git a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py index 4a3f459de80..59a49b03ad5 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py +++ b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py @@ -258,8 +258,8 @@ class LossScaleOptimizer(_DelegatingTrackableMixin, optimizer_v2.OptimizerV2): 'clipvalue %s' % (optimizer, optimizer.clipvalue)) self._raise_if_strategy_unsupported() - self.clipnorm = None - self.clipvalue = None + self._clipnorm = None + self._clipvalue = None self._optimizer = optimizer self._loss_scale = keras_loss_scale_module.get(loss_scale) diff --git a/tensorflow/python/keras/optimizer_v2/BUILD b/tensorflow/python/keras/optimizer_v2/BUILD index b519ec7fb3d..9a317e5d114 100644 --- a/tensorflow/python/keras/optimizer_v2/BUILD +++ b/tensorflow/python/keras/optimizer_v2/BUILD @@ -40,6 +40,7 @@ py_library( "//tensorflow/python:state_ops", "//tensorflow/python:variable_scope", "//tensorflow/python:variables", + "//tensorflow/python/distribute:central_storage_strategy", "//tensorflow/python/distribute:distribute_lib", "//tensorflow/python/distribute:parameter_server_strategy", "//tensorflow/python/distribute:reduce_util", diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py index 18d94594542..0ecca63a64f 100644 --- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py +++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py @@ -41,7 +41,6 @@ from tensorflow.python.keras.optimizer_v2 import utils as optimizer_utils from tensorflow.python.keras.utils import generic_utils from tensorflow.python.keras.utils import tf_utils from tensorflow.python.ops import array_ops -from tensorflow.python.ops import clip_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_resource_variable_ops from tensorflow.python.ops import gradients @@ -332,15 +331,6 @@ class OptimizerV2(trackable.Trackable): raise ValueError("decay cannot be less than 0: {}".format(decay)) self._initial_decay = decay - # Set the gradient clipping properties - self.clipnorm = kwargs.pop("clipnorm", None) - self.clipvalue = kwargs.pop("clipvalue", None) - if ((self.clipnorm is not None or self.clipvalue is not None) - and distribute_ctx.has_strategy()): - raise ValueError("Gradient clipping in the optimizer " - "(by setting clipnorm or clipvalue) is currently " - "unsupported when using a distribution strategy.") - self._hypers_created = False # Store the distribution strategy object if the optimizer is created inside @@ -350,6 +340,33 @@ class OptimizerV2(trackable.Trackable): else: self._distribution_strategy = None + # Set the gradient clipping properties + self._clipnorm = kwargs.pop("clipnorm", None) + self._clipvalue = kwargs.pop("clipvalue", None) + + # Configure gradient transforms. + self._transform_gradients_fns = [] + + if self._clipnorm is not None: + self._transform_gradients_fns.append( + optimizer_utils.make_gradient_clipnorm_fn(self._clipnorm)) + if self._clipvalue is not None: + self._transform_gradients_fns.append( + optimizer_utils.make_gradient_clipvalue_fn(self._clipvalue)) + + @property + def clipnorm(self): + """`float` or `None`. If set, clips gradients to this maximum norm.""" + return self._clipnorm + + @property + def clipvalue(self): + """`float` or `None`. + + If set, clips gradients to this maximum absolute value. + """ + return self._clipvalue + def minimize(self, loss, var_list, grad_loss=None, name=None, tape=None): """Minimize `loss` by updating `var_list`. @@ -385,26 +402,6 @@ class OptimizerV2(trackable.Trackable): loss, var_list=var_list, grad_loss=grad_loss, tape=tape) return self.apply_gradients(grads_and_vars, name=name) - def _clip_gradients(self, grads): - """Clip gradients according to the clipnorm and clipvalue attributes.""" - if self.clipnorm is not None: - if distribute_ctx.has_strategy(): - raise ValueError("Gradient clipping in the optimizer " - "(by setting clipnorm or clipvalue) is currently " - "unsupported when using a distribution strategy.") - grads = [None if g is None else clip_ops.clip_by_norm(g, self.clipnorm) - for g in grads] - if self.clipvalue is not None: - if distribute_ctx.has_strategy(): - raise ValueError("Gradient clipping in the optimizer " - "(by setting clipnorm or clipvalue) is currently " - "unsupported when using a distribution strategy.") - v = self.clipvalue - grads = [ - None if g is None else clip_ops.clip_by_value(g, -v, v) for g in grads - ] - return grads - def _compute_gradients(self, loss, var_list, grad_loss=None, tape=None): """Compute gradients of `loss` for the variables in `var_list`. @@ -454,8 +451,6 @@ class OptimizerV2(trackable.Trackable): var_list = nest.flatten(var_list) with ops.name_scope_v2(self._name + "/gradients"): grads = tape.gradient(loss, var_list, grad_loss) - # TODO(omalleyt): Move to post-aggregation. - grads = self._clip_gradients(grads) grads_and_vars = list(zip(grads, var_list)) self._assert_valid_dtypes([ @@ -465,6 +460,12 @@ class OptimizerV2(trackable.Trackable): return grads_and_vars + def _transform_gradients(self, grads_and_vars): + """Transformations to apply aggregated gradients.""" + for fn in self._transform_gradients_fns: + grads_and_vars = fn(grads_and_vars) + return grads_and_vars + def get_gradients(self, loss, params): """Returns gradients of `loss` with respect to `params`. @@ -483,14 +484,15 @@ class OptimizerV2(trackable.Trackable): with backend.get_graph().as_default(), backend.name_scope(self._name + "/gradients"): grads = gradients.gradients(loss, params) - for grad, param in zip(grads, params): + grads_and_vars = list(zip(grads, params)) + for grad, param in grads_and_vars: if grad is None: raise ValueError("Variable {} has `None` for gradient. " "Please make sure that all of your ops have a " "gradient defined (i.e. are differentiable). " "Common ops without gradient: " "K.argmax, K.round, K.eval.".format(param)) - grads = self._clip_gradients(grads) + grads = [g for g, _ in grads_and_vars] return grads def apply_gradients(self, @@ -534,10 +536,23 @@ class OptimizerV2(trackable.Trackable): ValueError: If none of the variables have gradients. RuntimeError: If called in a cross-replica context. """ - grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars) - var_list = [v for (_, v) in grads_and_vars] + if distribute_ctx.in_cross_replica_context(): + raise RuntimeError( + "`apply_gradients() cannot be called in cross-replica context. " + "Use `tf.distribute.Strategy.run` to enter replica " + "context.") - with backend.name_scope(self._name): + strategy = distribute_ctx.get_strategy() + if (not experimental_aggregate_gradients and strategy and + isinstance(strategy.extended, + parameter_server_strategy.ParameterServerStrategyExtended)): + raise NotImplementedError( + "`experimental_aggregate_gradients=False is not supported for " + "ParameterServerStrategy and CentralStorageStrategy") + + grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars) + var_list = [v for _, v in grads_and_vars] + with ops.name_scope_v2(self._name): # Create iteration if necessary. with ops.init_scope(): self._create_all_weights(var_list) @@ -547,25 +562,12 @@ class OptimizerV2(trackable.Trackable): # gradients return control_flow_ops.no_op() - if distribute_ctx.in_cross_replica_context(): - raise RuntimeError( - "`apply_gradients() cannot be called in cross-replica context. " - "Use `tf.distribute.Strategy.run` to enter replica " - "context.") - - strategy = distribute_ctx.get_strategy() - if (not experimental_aggregate_gradients and strategy and isinstance( - strategy.extended, - parameter_server_strategy.ParameterServerStrategyExtended)): - raise NotImplementedError( - "`experimental_aggregate_gradients=False is not supported for " - "ParameterServerStrategy and CentralStorageStrategy") - - apply_state = self._prepare(var_list) if experimental_aggregate_gradients: reduced_grads = self._aggregate_gradients(grads_and_vars) - var_list = [v for _, v in grads_and_vars] grads_and_vars = list(zip(reduced_grads, var_list)) + grads_and_vars = self._transform_gradients(grads_and_vars) + + apply_state = self._prepare(var_list) return distribute_ctx.get_replica_context().merge_call( functools.partial(self._distributed_apply, apply_state=apply_state), args=(grads_and_vars,), diff --git a/tensorflow/python/keras/optimizer_v2/utils.py b/tensorflow/python/keras/optimizer_v2/utils.py index 9f680e04dd6..f723c6d8b64 100644 --- a/tensorflow/python/keras/optimizer_v2/utils.py +++ b/tensorflow/python/keras/optimizer_v2/utils.py @@ -18,8 +18,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.distribute import central_storage_strategy from tensorflow.python.distribute import distribution_strategy_context as distribute_ctx from tensorflow.python.distribute import reduce_util as ds_reduce_util +from tensorflow.python.ops import clip_ops from tensorflow.python.platform import tf_logging as logging @@ -57,6 +59,42 @@ def all_reduce_sum_gradients(grads_and_vars): return reduced_with_nones +def make_gradient_clipnorm_fn(clipnorm): + """Creates a gradient transformation function for clipping by norm.""" + + def gradient_clipnorm_fn(grads_and_vars): + + if isinstance(distribute_ctx.get_strategy(), + central_storage_strategy.CentralStorageStrategy): + raise ValueError( + "`clipnorm` is not supported with `CenteralStorageStrategy`") + + clipped_grads_and_vars = [ + (clip_ops.clip_by_norm(g, clipnorm), v) for g, v in grads_and_vars + ] + return clipped_grads_and_vars + + return gradient_clipnorm_fn + + +def make_gradient_clipvalue_fn(clipvalue): + """Creates a gradient transformation function for clipping by value.""" + + def gradient_clipvalue_fn(grads_and_vars): + + if isinstance(distribute_ctx.get_strategy(), + central_storage_strategy.CentralStorageStrategy): + raise ValueError( + "`clipvalue` is not supported with `CenteralStorageStrategy`") + + clipped_grads_and_vars = [(clip_ops.clip_by_value(g, -clipvalue, + clipvalue), v) + for g, v in grads_and_vars] + return clipped_grads_and_vars + + return gradient_clipvalue_fn + + def filter_empty_gradients(grads_and_vars): """Filter out `(grad, var)` pairs that have a gradient equal to `None`.""" grads_and_vars = tuple(grads_and_vars) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt index dbab3abae8e..58f8cf24495 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt @@ -5,6 +5,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt index af854e98013..fb341cb24dd 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt index e89cc5cef75..d8039ed21ef 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt index 15414d7234f..912f92f83a6 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt index 8b3c429e6b5..3abc6d39b3f 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-ftrl.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-ftrl.pbtxt index 51ab675db74..00880d3f73b 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-ftrl.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-ftrl.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-nadam.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-nadam.pbtxt index 342c0951bbe..2ce311d3504 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-nadam.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-nadam.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt index f007b4b971a..2020de9fa5c 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt @@ -3,6 +3,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt index d5bf6fa7f47..80a1449613c 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt index df904f72511..8acfe214256 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt index dbab3abae8e..58f8cf24495 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt @@ -5,6 +5,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt index af854e98013..fb341cb24dd 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt index e89cc5cef75..d8039ed21ef 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt index 15414d7234f..912f92f83a6 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt index 8b3c429e6b5..3abc6d39b3f 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-ftrl.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-ftrl.pbtxt index 51ab675db74..00880d3f73b 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-ftrl.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-ftrl.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-nadam.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-nadam.pbtxt index 342c0951bbe..2ce311d3504 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-nadam.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-nadam.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt index f007b4b971a..2020de9fa5c 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt @@ -3,6 +3,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt index d5bf6fa7f47..80a1449613c 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt index df904f72511..8acfe214256 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adadelta.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adadelta.pbtxt index cb3d38246a7..06212bdc95d 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adadelta.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adadelta.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adagrad.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adagrad.pbtxt index c7b2bca4b6b..09fff0514d8 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adagrad.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adagrad.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adam.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adam.pbtxt index 209c9fe6620..195ba9e4f56 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adam.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adam.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adamax.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adamax.pbtxt index 12bbb14fb71..9859da430bd 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adamax.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adamax.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-ftrl.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-ftrl.pbtxt index 1482ed54eb9..a4ed911e39d 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-ftrl.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-ftrl.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-nadam.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-nadam.pbtxt index 2a422fa2340..128f223fdc7 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-nadam.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-nadam.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-optimizer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-optimizer.pbtxt index e7021e02772..5ea1ed521ef 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-optimizer.pbtxt @@ -3,6 +3,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-r-m-sprop.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-r-m-sprop.pbtxt index 6543f4023a4..db89ecbabe7 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-r-m-sprop.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-r-m-sprop.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-s-g-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-s-g-d.pbtxt index 94ff8dfcdfc..0cb0205e65e 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-s-g-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-s-g-d.pbtxt @@ -4,6 +4,14 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + member { + name: "clipnorm" + mtype: "" + } + member { + name: "clipvalue" + mtype: "" + } member { name: "iterations" mtype: "" From 2d68aaede743bac80efe8dc427f34838388e0a73 Mon Sep 17 00:00:00 2001 From: Doe Hyun Yoon Date: Tue, 4 Aug 2020 16:45:28 -0700 Subject: [PATCH 2122/2522] Add ImmutableNodeMap for const GraphDef. NodeMap and ImmutableNodeMap are subclass of NodeMapInternal. PiperOrigin-RevId: 324917672 Change-Id: I9be113ae4134934f6c8b402e5ceb0fd5e90b2e80 --- tensorflow/core/grappler/utils.cc | 30 ++++---- tensorflow/core/grappler/utils.h | 64 +++++++++++++---- tensorflow/core/grappler/utils_test.cc | 97 ++++++++++++++++++-------- 3 files changed, 134 insertions(+), 57 deletions(-) diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index 7cf303654ed..e342f7dfdf0 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -73,25 +73,21 @@ bool IsShapeConsumer(const NodeDef& node) { } // namespace -NodeMap::NodeMap(GraphDef* graph) { - nodes_.reserve(graph->node_size()); - outputs_.reserve(graph->node_size()); - for (int i = 0; i < graph->node_size(); i++) { - NodeDef* node = graph->mutable_node(i); - const string& node_name = node->name(); - auto rslt = nodes_.emplace(node_name, node); - // Check that the graph doesn't contain multiple nodes with the same name. - if (!rslt.second) { - // The first node found with a given name becomes the canonical. - LOG(WARNING) << "Duplicated node in the graph: " << node_name; - } - NodeDef* canonical = rslt.second ? node : rslt.first->second; - for (const auto& input : node->input()) { - outputs_[NodeName(input)].insert(canonical); - } - } +namespace internal { +// Specialized template class method GetNodeDefFromGraph. +template <> +NodeDef* NodeMapInternal::GetNodeDefFromGraph( + GraphDef* graph, int64 i) const { + return graph->mutable_node(i); } +template <> +const NodeDef* +NodeMapInternal::GetNodeDefFromGraph( + const GraphDef* graph, int64 i) const { + return &graph->node(i); +} +} // namespace internal string TensorIdToString(const TensorId& tensor_id) { return tensor_id.index() == 0 ? string(tensor_id.node()) : tensor_id.ToString(); diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h index e529d5fb4ad..e9ab5b7da12 100644 --- a/tensorflow/core/grappler/utils.h +++ b/tensorflow/core/grappler/utils.h @@ -98,16 +98,39 @@ inline int NodePosition(const string& name) { return position; } -// A utility class to lookup a node and its outputs by node name. -class NodeMap { +namespace internal { +// Base template class for NodeMap and ImmutableNodeMap. +template +class NodeMapInternal { public: // Note: The NodeMap will store pointers to nodes in graph, which may become // invalid if graph is changed. - explicit NodeMap(GraphDef* graph); + explicit NodeMapInternal(GraphDefT* graph) { + if (graph == nullptr) { + LOG(WARNING) << "NodeMapInternal constructor is called with a nullptr!"; + return; + } + nodes_.reserve(graph->node_size()); + outputs_.reserve(graph->node_size()); + for (int i = 0; i < graph->node_size(); i++) { + NodeDefT* node = GetNodeDefFromGraph(graph, i); + const string& node_name = node->name(); + auto rslt = nodes_.emplace(node_name, node); + // Check that the graph doesn't contain multiple nodes with the same name. + if (!rslt.second) { + // The first node found with a given name becomes the canonical. + LOG(WARNING) << "Duplicated node in the graph: " << node_name; + } + NodeDefT* canonical = rslt.second ? node : rslt.first->second; + for (const auto& input : node->input()) { + outputs_[NodeName(input)].insert(canonical); + } + } + } // Get unordered list of fanouts from node. Notice, that the order is // non-deterministic. - const absl::flat_hash_set& GetOutputs( + const absl::flat_hash_set& GetOutputs( const string& node_name) const { auto it = outputs_.find(node_name); if (it == outputs_.end()) { @@ -117,12 +140,12 @@ class NodeMap { } // Get fanouts ordered by name. - std::vector GetOutputsOrderedByNodeName( + std::vector GetOutputsOrderedByNodeName( const string& node_name) const { - std::vector result; + std::vector result; auto it = outputs_.find(node_name); if (it != outputs_.end()) { - const absl::flat_hash_set& outputs = it->second; + const absl::flat_hash_set& outputs = it->second; result.reserve(outputs.size()); result.assign(outputs.begin(), outputs.end()); std::sort(result.begin(), result.end(), @@ -135,7 +158,7 @@ class NodeMap { // This method doesn't record the outputs of the added node; the outputs need // to be explicitly added by the AddOutput method. - void AddNode(const string& node_name, NodeDef* node) { + void AddNode(const string& node_name, NodeDefT* node) { DCHECK(node != nullptr); auto ret = nodes_.emplace(node_name, node); DCHECK(ret.second) @@ -148,7 +171,7 @@ class NodeMap { outputs_.erase(NodeName(name)); } - NodeDef* GetNode(const string& name) const { + NodeDefT* GetNode(const string& name) const { const string node_name = NodeName(name); auto it = nodes_.find(node_name); if (it == nodes_.end()) { @@ -197,9 +220,26 @@ class NodeMap { } private: - const absl::flat_hash_set empty_set_; - absl::node_hash_map nodes_; - absl::node_hash_map> outputs_; + // Helper method to get the NodeDef pointer of i-th node in a graph. + NodeDefT* GetNodeDefFromGraph(GraphDefT* graph, int64 i) const; + + const absl::flat_hash_set empty_set_; + absl::node_hash_map nodes_; + absl::node_hash_map> outputs_; +}; +} // namespace internal + +// A utility class to lookup a node and its outputs by node name. +class NodeMap : public internal::NodeMapInternal { + public: + explicit NodeMap(GraphDef* graph) : NodeMapInternal(graph) {} +}; + +// Same to NodeMap, but uses const GraphDef. +class ImmutableNodeMap + : public internal::NodeMapInternal { + public: + explicit ImmutableNodeMap(const GraphDef* graph) : NodeMapInternal(graph) {} }; // A vector with a set. The set stores the same elements as the vector, and diff --git a/tensorflow/core/grappler/utils_test.cc b/tensorflow/core/grappler/utils_test.cc index 6231fb7a780..31444735b20 100644 --- a/tensorflow/core/grappler/utils_test.cc +++ b/tensorflow/core/grappler/utils_test.cc @@ -349,39 +349,69 @@ TEST_F(UtilsTest, NumNonControlOutputs) { GraphDef graph; TF_CHECK_OK(s.ToGraphDef(&graph)); - NodeMap node_map(&graph); - const NodeDef* add_node = node_map.GetNode("add"); - const NodeDef* mul_node = node_map.GetNode("mul"); - ASSERT_NE(add_node, nullptr); + { + NodeMap node_map(&graph); - // [a, b] are only non-control inputs - EXPECT_EQ(NumNonControlInputs(*add_node), 2); - EXPECT_EQ(NumControlInputs(*add_node), 1); - // [sqrt, shape] are non control outputs - EXPECT_EQ(NumNonControlOutputs(*add_node, node_map), 2); - // sqrt is the only data output - EXPECT_EQ(NumNonControlDataOutputs(*add_node, node_map), 1); - EXPECT_EQ(NumControlInputs(*mul_node), 0); + const NodeDef* add_node = node_map.GetNode("add"); + const NodeDef* mul_node = node_map.GetNode("mul"); + ASSERT_NE(add_node, nullptr); - EXPECT_TRUE(HasControlInputs(*add_node)); - EXPECT_TRUE(HasRegularInputs(*add_node)); - EXPECT_TRUE(HasControlOutputs(*add_node, node_map)); - EXPECT_TRUE(HasRegularOutputs(*add_node, node_map)); + // [a, b] are only non-control inputs + EXPECT_EQ(NumNonControlInputs(*add_node), 2); + EXPECT_EQ(NumControlInputs(*add_node), 1); + // [sqrt, shape] are non control outputs + EXPECT_EQ(NumNonControlOutputs(*add_node, node_map), 2); + // sqrt is the only data output + EXPECT_EQ(NumNonControlDataOutputs(*add_node, node_map), 1); + EXPECT_EQ(NumControlInputs(*mul_node), 0); - const NodeDef* x_node = node_map.GetNode("x"); - ASSERT_NE(x_node, nullptr); - EXPECT_FALSE(HasControlInputs(*x_node)); - EXPECT_FALSE(HasRegularInputs(*x_node)); - EXPECT_FALSE(HasControlOutputs(*x_node, node_map)); - EXPECT_TRUE(HasRegularOutputs(*x_node, node_map)); + EXPECT_TRUE(HasControlInputs(*add_node)); + EXPECT_TRUE(HasRegularInputs(*add_node)); + EXPECT_TRUE(HasControlOutputs(*add_node, node_map)); + EXPECT_TRUE(HasRegularOutputs(*add_node, node_map)); - const NodeDef* round_node = node_map.GetNode("round"); - ASSERT_NE(round_node, nullptr); - EXPECT_TRUE(HasControlInputs(*round_node)); - EXPECT_TRUE(HasRegularInputs(*round_node)); - EXPECT_FALSE(HasControlOutputs(*round_node, node_map)); - EXPECT_FALSE(HasRegularOutputs(*round_node, node_map)); + const NodeDef* x_node = node_map.GetNode("x"); + ASSERT_NE(x_node, nullptr); + EXPECT_FALSE(HasControlInputs(*x_node)); + EXPECT_FALSE(HasRegularInputs(*x_node)); + EXPECT_FALSE(HasControlOutputs(*x_node, node_map)); + EXPECT_TRUE(HasRegularOutputs(*x_node, node_map)); + + const NodeDef* round_node = node_map.GetNode("round"); + ASSERT_NE(round_node, nullptr); + EXPECT_TRUE(HasControlInputs(*round_node)); + EXPECT_TRUE(HasRegularInputs(*round_node)); + EXPECT_FALSE(HasControlOutputs(*round_node, node_map)); + EXPECT_FALSE(HasRegularOutputs(*round_node, node_map)); + } + + { + // Similar test for ImmutableNodeMap. + ImmutableNodeMap node_map(&graph); + + const NodeDef* add_node = node_map.GetNode("add"); + const NodeDef* mul_node = node_map.GetNode("mul"); + ASSERT_NE(add_node, nullptr); + + // [a, b] are only non-control inputs + EXPECT_EQ(NumNonControlInputs(*add_node), 2); + EXPECT_EQ(NumControlInputs(*add_node), 1); + EXPECT_EQ(NumControlInputs(*mul_node), 0); + + EXPECT_TRUE(HasControlInputs(*add_node)); + EXPECT_TRUE(HasRegularInputs(*add_node)); + + const NodeDef* x_node = node_map.GetNode("x"); + ASSERT_NE(x_node, nullptr); + EXPECT_FALSE(HasControlInputs(*x_node)); + EXPECT_FALSE(HasRegularInputs(*x_node)); + + const NodeDef* round_node = node_map.GetNode("round"); + ASSERT_NE(round_node, nullptr); + EXPECT_TRUE(HasControlInputs(*round_node)); + EXPECT_TRUE(HasRegularInputs(*round_node)); + } } TEST(CheckAttrExists, All) { @@ -664,6 +694,17 @@ static void BM_NodeMapConstruct(int iters, int size) { } BENCHMARK(BM_NodeMapConstruct)->Range(1, 1 << 20); +static void BM_ImmutableNodeMapConstruct(int iters, int size) { + testing::StopTiming(); + GraphDef graph = test::CreateRandomGraph(size); + testing::StartTiming(); + for (int i = 0; i < iters; i++) { + ImmutableNodeMap node_map(&graph); + } + testing::StopTiming(); +} +BENCHMARK(BM_ImmutableNodeMapConstruct)->Range(1, 1 << 20); + } // namespace } // namespace grappler } // namespace tensorflow From 79594069bb6b5b0e43aeb7d6c6504f10ab8b8be8 Mon Sep 17 00:00:00 2001 From: Yujing Zhang Date: Tue, 4 Aug 2020 16:57:42 -0700 Subject: [PATCH 2123/2522] Preserve composite devices when cloning a ProcessFunctionLibraryRuntime. PiperOrigin-RevId: 324919856 Change-Id: Ibfe2df7e511593730ebe54e57204b136b0fff5a9 --- .../process_function_library_runtime.cc | 4 ++++ .../process_function_library_runtime.h | 4 ++++ .../process_function_library_runtime_test.cc | 22 +++++++++++++++++++ 3 files changed, 30 insertions(+) diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.cc b/tensorflow/core/common_runtime/process_function_library_runtime.cc index 515477cd16a..aee482d92da 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime.cc +++ b/tensorflow/core/common_runtime/process_function_library_runtime.cc @@ -1667,6 +1667,10 @@ Status ProcessFunctionLibraryRuntime::Clone( device_mgr_, env, config_ ? &(*config_) : nullptr, graph_def_version, out_lib_def->get(), optimizer_options, default_thread_pool_, parent_, custom_kernel_creator, session_metadata_, rendezvous_factory_); + { + tf_shared_lock l(mu_); + for (auto* d : composite_devices_) (*out_pflr)->AddCompositeDevice(d); + } return Status::OK(); } diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.h b/tensorflow/core/common_runtime/process_function_library_runtime.h index bc68c9c2807..0bd85c62df5 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime.h +++ b/tensorflow/core/common_runtime/process_function_library_runtime.h @@ -221,6 +221,7 @@ class ProcessFunctionLibraryRuntime { void AddCompositeDevice(CompositeDevice* d) TF_LOCKS_EXCLUDED(mu_) { mutex_lock l(mu_); device_set_->AddDevice(d); + composite_devices_.push_back(d); } protected: @@ -452,6 +453,9 @@ class ProcessFunctionLibraryRuntime { // fail if it spans the changed remote devices. std::shared_ptr device_set_ TF_GUARDED_BY(mu_); + // Composite devices owned by a EagerContext. + std::vector composite_devices_ TF_GUARDED_BY(mu_); + // Holds all the function instantiations. Maps function_keys to handles. std::unordered_map table_ TF_GUARDED_BY(mu_); diff --git a/tensorflow/core/common_runtime/process_function_library_runtime_test.cc b/tensorflow/core/common_runtime/process_function_library_runtime_test.cc index 19c33a53d20..be279c84d1a 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime_test.cc +++ b/tensorflow/core/common_runtime/process_function_library_runtime_test.cc @@ -1188,6 +1188,28 @@ TEST_F(ProcessFunctionLibraryRuntimeTest, SessionMetadataPresent) { EXPECT_EQ(session_metadata.version(), read_metadata.version()); } +TEST_F(ProcessFunctionLibraryRuntimeTest, CompositeDevicesAfterCloning) { + Init({AddVarAcrossDevices()}); + + Status s; + std::unique_ptr composite_device = + CompositeDevice::MakeDevice({device0_->name(), device1_->name()}, + /*unique_device_id=*/0, + device_mgr_->HostCPU()->parsed_name(), &s); + TF_ASSERT_OK(s); + AddCompositeDevice(composite_device.get()); + + auto* flr = proc_flr_->GetFLR("/job:a/replica:0/task:0/cpu:0"); + ASSERT_NE(nullptr, flr); + std::unique_ptr cloned_lib_def; + std::unique_ptr cloned_proc_flr; + FunctionLibraryRuntime* cloned_flr; + TF_ASSERT_OK(flr->Clone(&cloned_lib_def, &cloned_proc_flr, &cloned_flr)); + EXPECT_EQ( + cloned_proc_flr->device_set()->FindDeviceByName(composite_device->name()), + composite_device.get()); +} + TEST_F(ProcessFunctionLibraryRuntimeTest, SessionMetadataPresentAfterCloning) { const SessionMetadata session_metadata = GenerateSessionMetadata(); Init({SessionMetadataReaderOpFn()}, &session_metadata); From 84d053187cb80d975ef2b9684d4b61981bca0c41 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 4 Aug 2020 17:19:50 -0700 Subject: [PATCH 2124/2522] Break up core/kernels/BUILD (part 1 of N): Move linear algebra kernels to subdirectory tensorflow/core/kernels/linalg with its own BUILD file. PiperOrigin-RevId: 324923762 Change-Id: Id17aac690729b62ae97525df5bb57d6a073d6b0c --- tensorflow/core/BUILD | 2 +- tensorflow/core/kernels/BUILD | 412 ++---------------- tensorflow/core/kernels/linalg/BUILD | 376 ++++++++++++++++ .../banded_triangular_solve_op.cc | 2 +- .../banded_triangular_solve_op_test.cc | 2 +- .../kernels/{ => linalg}/cholesky_grad.cc | 2 +- .../core/kernels/{ => linalg}/cholesky_op.cc | 6 +- .../kernels/{ => linalg}/determinant_op.cc | 6 +- .../kernels/{ => linalg}/determinant_op.h | 6 +- .../{ => linalg}/determinant_op_gpu.cu.cc | 4 +- .../kernels/{ => linalg}/eig_op_complex128.cc | 2 +- .../kernels/{ => linalg}/eig_op_complex64.cc | 2 +- .../kernels/{ => linalg}/eig_op_double.cc | 2 +- .../core/kernels/{ => linalg}/eig_op_float.cc | 2 +- .../core/kernels/{ => linalg}/eig_op_impl.h | 8 +- .../core/kernels/{ => linalg}/einsum_op.h | 4 +- .../kernels/{ => linalg}/einsum_op_gpu.cu.cc | 2 +- .../kernels/{ => linalg}/einsum_op_impl.h | 8 +- .../{ => linalg}/einsum_op_impl_bfloat16.cc | 2 +- .../{ => linalg}/einsum_op_impl_complex128.cc | 2 +- .../{ => linalg}/einsum_op_impl_complex64.cc | 2 +- .../{ => linalg}/einsum_op_impl_double.cc | 2 +- .../{ => linalg}/einsum_op_impl_float.cc | 2 +- .../{ => linalg}/einsum_op_impl_half.cc | 2 +- .../{ => linalg}/einsum_op_impl_int32.cc | 2 +- .../{ => linalg}/einsum_op_impl_int64.cc | 2 +- .../core/kernels/{ => linalg}/eye_functor.h | 4 +- .../{ => linalg}/eye_functor_gpu.cu.cc | 2 +- .../kernels/{ => linalg}/linalg_ops_common.cc | 2 +- .../core/kernels/linalg/linalg_ops_common.h | 221 ++++++++++ tensorflow/core/kernels/{ => linalg}/lu_op.cc | 0 .../core/kernels/{ => linalg}/lu_op_gpu.cu.cc | 2 +- .../{ => linalg}/matrix_band_part_op.cc | 3 +- .../{ => linalg}/matrix_band_part_op.h | 6 +- .../matrix_band_part_op_gpu.cu.cc | 2 +- .../kernels/{ => linalg}/matrix_diag_op.cc | 2 +- .../kernels/{ => linalg}/matrix_diag_op.h | 6 +- .../{ => linalg}/matrix_diag_op_gpu.cu.cc | 2 +- .../{ => linalg}/matrix_exponential_op.cc | 2 +- .../kernels/{ => linalg}/matrix_inverse_op.cc | 6 +- .../{ => linalg}/matrix_logarithm_op.cc | 2 +- .../{ => linalg}/matrix_set_diag_op.cc | 4 +- .../kernels/{ => linalg}/matrix_set_diag_op.h | 6 +- .../{ => linalg}/matrix_set_diag_op_gpu.cu.cc | 2 +- .../matrix_solve_ls_op_complex128.cc | 2 +- .../matrix_solve_ls_op_complex64.cc | 2 +- .../{ => linalg}/matrix_solve_ls_op_double.cc | 2 +- .../{ => linalg}/matrix_solve_ls_op_float.cc | 2 +- .../{ => linalg}/matrix_solve_ls_op_impl.h | 8 +- .../kernels/{ => linalg}/matrix_solve_op.cc | 4 +- .../{ => linalg}/matrix_square_root_op.cc | 2 +- .../matrix_triangular_solve_op_complex.cc | 2 +- .../matrix_triangular_solve_op_impl.h | 12 +- .../matrix_triangular_solve_op_real.cc | 2 +- .../matrix_triangular_solve_op_test.cc | 0 .../kernels/{ => linalg}/qr_op_complex128.cc | 2 +- .../kernels/{ => linalg}/qr_op_complex64.cc | 2 +- .../core/kernels/{ => linalg}/qr_op_double.cc | 2 +- .../core/kernels/{ => linalg}/qr_op_float.cc | 2 +- .../core/kernels/{ => linalg}/qr_op_impl.h | 14 +- .../{ => linalg}/self_adjoint_eig_op.cc | 2 +- .../self_adjoint_eig_v2_op_complex128.cc | 2 +- .../self_adjoint_eig_v2_op_complex64.cc | 2 +- .../self_adjoint_eig_v2_op_double.cc | 2 +- .../self_adjoint_eig_v2_op_float.cc | 2 +- .../self_adjoint_eig_v2_op_gpu.cc | 2 +- .../self_adjoint_eig_v2_op_impl.h | 8 +- .../kernels/{ => linalg}/svd_op_complex128.cc | 2 +- .../kernels/{ => linalg}/svd_op_complex64.cc | 2 +- .../kernels/{ => linalg}/svd_op_double.cc | 2 +- .../core/kernels/{ => linalg}/svd_op_float.cc | 2 +- .../kernels/{ => linalg}/svd_op_gpu.cu.cc | 6 +- .../core/kernels/{ => linalg}/svd_op_impl.h | 8 +- .../{ => linalg}/tridiagonal_matmul_op.cc | 2 +- .../tridiagonal_matmul_op_gpu.cu.cc | 6 +- .../{ => linalg}/tridiagonal_solve_op.cc | 2 +- .../tridiagonal_solve_op_gpu.cu.cc | 6 +- tensorflow/core/kernels/linalg_ops_common.h | 205 +-------- .../core/kernels/segment_reduction_ops_impl.h | 4 +- tensorflow/core/kernels/sparse/BUILD | 4 +- tensorflow/core/kernels/sparse/add_op.cc | 4 +- tensorflow/core/kernels/sparse/conj_op.cc | 4 +- .../sparse/csr_sparse_matrix_to_dense_op.cc | 4 +- .../csr_sparse_matrix_to_sparse_tensor_op.cc | 4 +- .../sparse/dense_to_csr_sparse_matrix_op.cc | 4 +- .../core/kernels/sparse/kernels_gpu.cu.cc | 2 +- tensorflow/core/kernels/sparse/mat_mul_op.cc | 4 +- tensorflow/core/kernels/sparse/mul_op.cc | 2 +- tensorflow/core/kernels/sparse/nnz_op.cc | 4 +- tensorflow/core/kernels/sparse/softmax_op.cc | 2 +- .../core/kernels/sparse/sparse_mat_mul_op.cc | 4 +- .../sparse/sparse_matrix_components_op.cc | 4 +- .../sparse_tensor_to_csr_sparse_matrix_op.cc | 4 +- .../core/kernels/sparse/transpose_op.cc | 2 +- tensorflow/core/kernels/where_op.cc | 2 +- tensorflow/core/util/BUILD | 63 +++ .../core/{kernels => util}/cuda_solvers.cc | 2 +- .../core/{kernels => util}/cuda_solvers.h | 8 +- .../core/{kernels => util}/cuda_sparse.cc | 4 +- .../core/{kernels => util}/cuda_sparse.h | 49 +-- .../core/{kernels => util}/rocm_solvers.cc | 2 +- .../core/{kernels => util}/rocm_solvers.h | 6 +- .../core/{kernels => util}/rocm_sparse.cc | 4 +- 103 files changed, 885 insertions(+), 772 deletions(-) create mode 100644 tensorflow/core/kernels/linalg/BUILD rename tensorflow/core/kernels/{ => linalg}/banded_triangular_solve_op.cc (99%) rename tensorflow/core/kernels/{ => linalg}/banded_triangular_solve_op_test.cc (99%) rename tensorflow/core/kernels/{ => linalg}/cholesky_grad.cc (99%) rename tensorflow/core/kernels/{ => linalg}/cholesky_op.cc (98%) rename tensorflow/core/kernels/{ => linalg}/determinant_op.cc (99%) rename tensorflow/core/kernels/{ => linalg}/determinant_op.h (90%) rename tensorflow/core/kernels/{ => linalg}/determinant_op_gpu.cu.cc (98%) rename tensorflow/core/kernels/{ => linalg}/eig_op_complex128.cc (93%) rename tensorflow/core/kernels/{ => linalg}/eig_op_complex64.cc (93%) rename tensorflow/core/kernels/{ => linalg}/eig_op_double.cc (93%) rename tensorflow/core/kernels/{ => linalg}/eig_op_float.cc (93%) rename tensorflow/core/kernels/{ => linalg}/eig_op_impl.h (93%) rename tensorflow/core/kernels/{ => linalg}/einsum_op.h (94%) rename tensorflow/core/kernels/{ => linalg}/einsum_op_gpu.cu.cc (96%) rename tensorflow/core/kernels/{ => linalg}/einsum_op_impl.h (99%) rename tensorflow/core/kernels/{ => linalg}/einsum_op_impl_bfloat16.cc (94%) rename tensorflow/core/kernels/{ => linalg}/einsum_op_impl_complex128.cc (95%) rename tensorflow/core/kernels/{ => linalg}/einsum_op_impl_complex64.cc (95%) rename tensorflow/core/kernels/{ => linalg}/einsum_op_impl_double.cc (95%) rename tensorflow/core/kernels/{ => linalg}/einsum_op_impl_float.cc (95%) rename tensorflow/core/kernels/{ => linalg}/einsum_op_impl_half.cc (95%) rename tensorflow/core/kernels/{ => linalg}/einsum_op_impl_int32.cc (94%) rename tensorflow/core/kernels/{ => linalg}/einsum_op_impl_int64.cc (94%) rename tensorflow/core/kernels/{ => linalg}/eye_functor.h (90%) rename tensorflow/core/kernels/{ => linalg}/eye_functor_gpu.cu.cc (97%) rename tensorflow/core/kernels/{ => linalg}/linalg_ops_common.cc (99%) create mode 100644 tensorflow/core/kernels/linalg/linalg_ops_common.h rename tensorflow/core/kernels/{ => linalg}/lu_op.cc (100%) rename tensorflow/core/kernels/{ => linalg}/lu_op_gpu.cu.cc (99%) rename tensorflow/core/kernels/{ => linalg}/matrix_band_part_op.cc (99%) rename tensorflow/core/kernels/{ => linalg}/matrix_band_part_op.h (86%) rename tensorflow/core/kernels/{ => linalg}/matrix_band_part_op_gpu.cu.cc (97%) rename tensorflow/core/kernels/{ => linalg}/matrix_diag_op.cc (99%) rename tensorflow/core/kernels/{ => linalg}/matrix_diag_op.h (94%) rename tensorflow/core/kernels/{ => linalg}/matrix_diag_op_gpu.cu.cc (99%) rename tensorflow/core/kernels/{ => linalg}/matrix_exponential_op.cc (97%) rename tensorflow/core/kernels/{ => linalg}/matrix_inverse_op.cc (98%) rename tensorflow/core/kernels/{ => linalg}/matrix_logarithm_op.cc (97%) rename tensorflow/core/kernels/{ => linalg}/matrix_set_diag_op.cc (99%) rename tensorflow/core/kernels/{ => linalg}/matrix_set_diag_op.h (89%) rename tensorflow/core/kernels/{ => linalg}/matrix_set_diag_op_gpu.cu.cc (99%) rename tensorflow/core/kernels/{ => linalg}/matrix_solve_ls_op_complex128.cc (92%) rename tensorflow/core/kernels/{ => linalg}/matrix_solve_ls_op_complex64.cc (92%) rename tensorflow/core/kernels/{ => linalg}/matrix_solve_ls_op_double.cc (92%) rename tensorflow/core/kernels/{ => linalg}/matrix_solve_ls_op_float.cc (92%) rename tensorflow/core/kernels/{ => linalg}/matrix_solve_ls_op_impl.h (96%) rename tensorflow/core/kernels/{ => linalg}/matrix_solve_op.cc (99%) rename tensorflow/core/kernels/{ => linalg}/matrix_square_root_op.cc (97%) rename tensorflow/core/kernels/{ => linalg}/matrix_triangular_solve_op_complex.cc (92%) rename tensorflow/core/kernels/{ => linalg}/matrix_triangular_solve_op_impl.h (97%) rename tensorflow/core/kernels/{ => linalg}/matrix_triangular_solve_op_real.cc (93%) rename tensorflow/core/kernels/{ => linalg}/matrix_triangular_solve_op_test.cc (100%) rename tensorflow/core/kernels/{ => linalg}/qr_op_complex128.cc (96%) rename tensorflow/core/kernels/{ => linalg}/qr_op_complex64.cc (95%) rename tensorflow/core/kernels/{ => linalg}/qr_op_double.cc (96%) rename tensorflow/core/kernels/{ => linalg}/qr_op_float.cc (96%) rename tensorflow/core/kernels/{ => linalg}/qr_op_impl.h (96%) rename tensorflow/core/kernels/{ => linalg}/self_adjoint_eig_op.cc (98%) rename tensorflow/core/kernels/{ => linalg}/self_adjoint_eig_v2_op_complex128.cc (93%) rename tensorflow/core/kernels/{ => linalg}/self_adjoint_eig_v2_op_complex64.cc (93%) rename tensorflow/core/kernels/{ => linalg}/self_adjoint_eig_v2_op_double.cc (92%) rename tensorflow/core/kernels/{ => linalg}/self_adjoint_eig_v2_op_float.cc (92%) rename tensorflow/core/kernels/{ => linalg}/self_adjoint_eig_v2_op_gpu.cc (99%) rename tensorflow/core/kernels/{ => linalg}/self_adjoint_eig_v2_op_impl.h (91%) rename tensorflow/core/kernels/{ => linalg}/svd_op_complex128.cc (93%) rename tensorflow/core/kernels/{ => linalg}/svd_op_complex64.cc (93%) rename tensorflow/core/kernels/{ => linalg}/svd_op_double.cc (93%) rename tensorflow/core/kernels/{ => linalg}/svd_op_float.cc (93%) rename tensorflow/core/kernels/{ => linalg}/svd_op_gpu.cu.cc (99%) rename tensorflow/core/kernels/{ => linalg}/svd_op_impl.h (95%) rename tensorflow/core/kernels/{ => linalg}/tridiagonal_matmul_op.cc (98%) rename tensorflow/core/kernels/{ => linalg}/tridiagonal_matmul_op_gpu.cu.cc (96%) rename tensorflow/core/kernels/{ => linalg}/tridiagonal_solve_op.cc (99%) rename tensorflow/core/kernels/{ => linalg}/tridiagonal_solve_op_gpu.cu.cc (99%) rename tensorflow/core/{kernels => util}/cuda_solvers.cc (99%) rename tensorflow/core/{kernels => util}/cuda_solvers.h (99%) rename tensorflow/core/{kernels => util}/cuda_sparse.cc (99%) rename tensorflow/core/{kernels => util}/cuda_sparse.h (93%) rename tensorflow/core/{kernels => util}/rocm_solvers.cc (99%) rename tensorflow/core/{kernels => util}/rocm_solvers.h (96%) rename tensorflow/core/{kernels => util}/rocm_sparse.cc (99%) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 86c9d1fc665..161a0a95856 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1010,7 +1010,7 @@ cc_library( "//tensorflow/core/kernels:histogram_op", "//tensorflow/core/kernels:image", "//tensorflow/core/kernels:io", - "//tensorflow/core/kernels:linalg", + "//tensorflow/core/kernels/linalg:linalg", "//tensorflow/core/kernels:lookup", "//tensorflow/core/kernels:logging", "//tensorflow/core/kernels:manip", diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 34a3ee800d8..12d4f1c5574 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -1039,9 +1039,6 @@ cc_library( ":immutable_constant_op", ":inplace_ops", ":listdiff_op", - ":matrix_band_part_op", - ":matrix_diag_op", - ":matrix_set_diag_op", ":mirror_pad_op", ":one_hot_op", ":pack_op", @@ -1174,26 +1171,6 @@ tf_kernel_library( deps = ARRAY_DEPS, ) -tf_kernel_library( - name = "matrix_band_part_op", - prefix = "matrix_band_part_op", - deps = if_cuda([ - ":cuda_solvers", - ]) + ARRAY_DEPS, -) - -tf_kernel_library( - name = "matrix_diag_op", - prefix = "matrix_diag_op", - deps = ARRAY_DEPS, -) - -tf_kernel_library( - name = "matrix_set_diag_op", - prefix = "matrix_set_diag_op", - deps = ARRAY_DEPS + [":matrix_diag_op"], -) - tf_kernel_library( name = "mirror_pad_op", prefix = "mirror_pad_op", @@ -1405,7 +1382,7 @@ tf_kernel_library( "where_op_gpu_impl_8.cu.cc", ], deps = if_cuda_or_rocm([ - ":cuda_solvers", + "//tensorflow/core/util:cuda_solvers", ]) + [":gpu_prim_hdrs"] + ARRAY_DEPS, ) @@ -2785,21 +2762,6 @@ tf_cuda_cc_tests( ], ) -tf_kernel_library( - name = "eye_functor", - hdrs = ["eye_functor.h"], - gpu_srcs = [ - "eye_functor_gpu.cu.cc", - "eye_functor.h", - ], - visibility = [":friends"], - deps = [ - "//tensorflow/core:framework", - "//third_party/eigen3", - ], - alwayslink = 0, -) - cc_library( name = "fifo_queue", srcs = ["fifo_queue.cc"], @@ -3558,289 +3520,6 @@ tf_cc_tests( ], ) -cc_library( - name = "linalg", - deps = [ - ":banded_triangular_solve_op", - ":cholesky_grad", - ":cholesky_op", - ":determinant_op", - ":eig_op", - ":einsum_op", - ":lu_op", - ":matrix_exponential_op", - ":matrix_inverse_op", - ":matrix_logarithm_op", - ":matrix_solve_ls_op", - ":matrix_solve_op", - ":matrix_square_root_op", - ":matrix_triangular_solve_op", - ":qr_op", - ":self_adjoint_eig_op", - ":self_adjoint_eig_v2_op", - ":svd_op", - ":tridiagonal_matmul_op", - ":tridiagonal_solve_op", - ], -) - -tf_kernel_library( - name = "cuda_solvers", - srcs = ["cuda_solvers.cc"], - hdrs = ["cuda_solvers.h"], - # @local_config_cuda//cuda:cusolver_static, //third_party/eigen3:blas, - # and //third_party/libf2c all contain various parts of BLAS, LAPACK, - # and f2c helper functions in global namespace. Tell the compiler to - # allow multiple definitions when linking this. - linkopts = select({ - "//tensorflow:macos": [], - "//tensorflow:windows": [], - "//conditions:default": ["-Wl,-z,muldefs"], - }), - visibility = [":friends"], - deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core/platform/default/build_config:cublas_plugin", - "//tensorflow/stream_executor/cuda:cublas_lib", - "//tensorflow/stream_executor/cuda:cusolver_lib", - ], -) - -tf_kernel_library( - name = "rocm_solvers", - srcs = ["rocm_solvers.cc"], - hdrs = ["rocm_solvers.h"], - visibility = [":friends"], - deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", - "//tensorflow/core:lib", - "//tensorflow/stream_executor/lib", - "//tensorflow/stream_executor/platform:dso_loader", - "//tensorflow/stream_executor/rocm:rocblas_plugin", - "//tensorflow/stream_executor/rocm:rocm_gpu_executor", - ] + if_rocm([ - "@local_config_rocm//rocm:rocprim", - ]), -) - -tf_kernel_library( - name = "cuda_sparse", - srcs = if_cuda(["cuda_sparse.cc"]) + if_rocm(["rocm_sparse.cc"]), - hdrs = ["cuda_sparse.h"], - deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core/kernels:cuda_solvers", - ] + if_cuda([ - "//tensorflow/stream_executor/cuda:cusparse_lib", - "@cub_archive//:cub", - ]) + if_rocm([ - "@local_config_rocm//rocm:hipsparse", - ]), -) - -LINALG_DEPS = [ - ":linalg_ops_common", - "//third_party/eigen3", - "//tensorflow/core:framework", - "//tensorflow/core:lib", -] + if_cuda([ - ":cuda_solvers", - ":transpose_functor", -]) + if_rocm([ - ":rocm_solvers", -]) - -tf_kernel_library( - name = "cholesky_op", - prefix = "cholesky_op", - deps = if_cuda([ - ":matrix_band_part_op", - ]) + LINALG_DEPS, -) - -tf_kernel_library( - name = "cholesky_grad", - prefix = "cholesky_grad", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "determinant_op", - prefix = "determinant_op", - deps = if_cuda([ - ":fill_functor", - ]) + LINALG_DEPS, -) - -tf_kernel_library( - name = "matrix_exponential_op", - prefix = "matrix_exponential_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "matrix_logarithm_op", - prefix = "matrix_logarithm_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "self_adjoint_eig_op", - prefix = "self_adjoint_eig_op", - deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"], -) - -tf_kernel_library( - name = "self_adjoint_eig_v2_op", - prefix = "self_adjoint_eig_v2_op", - deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"] + if_cuda([ - ":cast_op", - ":cwise_op", - ]), -) - -tf_kernel_library( - name = "eig_op", - prefix = "eig_op", - deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"] + if_cuda([ - ":cast_op", - ":cwise_op", - ]), -) - -tf_kernel_library( - name = "matrix_inverse_op", - prefix = "matrix_inverse_op", - deps = LINALG_DEPS + if_cuda([":eye_functor"]), -) - -tf_kernel_library( - name = "matrix_solve_ls_op", - prefix = "matrix_solve_ls_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "matrix_solve_op", - prefix = "matrix_solve_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "matrix_square_root_op", - prefix = "matrix_square_root_op", - deps = LINALG_DEPS, -) - -tf_kernel_library( - name = "banded_triangular_solve_op", - prefix = "banded_triangular_solve_op", - deps = LINALG_DEPS + [":fill_functor"], -) - -tf_kernel_library( - name = "matrix_triangular_solve_op", - hdrs = ["matrix_triangular_solve_op_impl.h"], - prefix = "matrix_triangular_solve_op", - deps = [ - ":linalg_ops_common", - "//third_party/eigen3", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - ":fill_functor", - "//tensorflow/core:stream_executor", - ] + if_cuda([ - "//tensorflow/core/platform/default/build_config:cublas_plugin", - ":cuda_solvers", - ]) + if_rocm([ - "@local_config_rocm//rocm:rocprim", - ":rocm_solvers", - ]) + if_cuda_or_rocm([ - ":transpose_functor", - ]), -) - -tf_kernel_library( - name = "tridiagonal_matmul_op", - srcs = ["tridiagonal_matmul_op.cc"], - gpu_srcs = ["tridiagonal_matmul_op_gpu.cu.cc"], - deps = LINALG_DEPS + if_cuda([ - ":cuda_sparse", - ]), -) - -tf_kernel_library( - name = "tridiagonal_solve_op", - srcs = ["tridiagonal_solve_op.cc"], - gpu_srcs = ["tridiagonal_solve_op_gpu.cu.cc"], - deps = LINALG_DEPS + if_cuda([ - ":cuda_sparse", - ]), -) - -tf_kernel_library( - name = "qr_op", - prefix = "qr_op", - deps = LINALG_DEPS + if_cuda([ - ":cwise_op", - ":eye_functor", - ":matrix_band_part_op", - ]), -) - -tf_kernel_library( - name = "svd_op", - prefix = "svd_op", - deps = LINALG_DEPS + if_cuda([ - ":eye_functor", - ]), -) - -tf_kernel_library( - name = "lu_op", - prefix = "lu_op", - deps = if_cuda([ - ":cuda_solvers", - ":transpose_functor", - ]) + [ - "//third_party/eigen3", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - ], -) - -tf_kernel_library( - name = "einsum_op", - prefix = "einsum_op", - deps = [ - ":batch_matmul_op", - ":fill_functor", - ":reduction_ops", - ":transpose_functor", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core/profiler/lib:traceme", - "//third_party/eigen3", - "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/strings", - ], -) - -cc_library( - name = "linalg_ops_common", - srcs = ["linalg_ops_common.cc"], - hdrs = ["linalg_ops_common.h"], - visibility = ["//visibility:private"], - deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//third_party/eigen3", - ], -) - cc_library( name = "logging", deps = [ @@ -4208,7 +3887,7 @@ tf_kernel_library( name = "segment_reduction_ops", prefix = "segment_reduction_ops", deps = MATH_DEPS + if_cuda_or_rocm([ - ":cuda_solvers", + "//tensorflow/core/util:cuda_solvers", ]), ) @@ -4405,45 +4084,6 @@ tf_cuda_cc_test( ], ) -tf_cuda_cc_test( - name = "banded_triangular_solve_op_test", - size = "small", - srcs = ["banded_triangular_solve_op_test.cc"], - deps = [ - ":banded_triangular_solve_op", - ":matrix_set_diag_op", - ":matrix_triangular_solve_op", - ":ops_testutil", - ":ops_util", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - -tf_cuda_cc_test( - name = "matrix_triangular_solve_op_test", - size = "small", - srcs = ["matrix_triangular_solve_op_test.cc"], - deps = [ - ":broadcast_to_op", - ":matrix_triangular_solve_op", - ":ops_testutil", - ":ops_util", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - tf_cuda_cc_test( name = "scan_ops_test", size = "small", @@ -6672,10 +6312,7 @@ filegroup( "lookup_table_init_op.h", "lookup_table_op.h", "lookup_util.h", - "linalg_ops_common.h", "list_kernels.h", - "matrix_diag_op.h", - "matrix_set_diag_op.h", "maxpooling_op.h", "mfcc.h", "mfcc_dct.h", @@ -6723,6 +6360,9 @@ filegroup( "xent_op.h", ] + [ "//tensorflow/core/kernels/boosted_trees/quantiles:weighted_quantiles_hdrs", + "//tensorflow/core/kernels/linalg:linalg_ops_common.h", + "//tensorflow/core/kernels/linalg:matrix_diag_op.h", + "//tensorflow/core/kernels/linalg:matrix_set_diag_op.h", ], ) @@ -6823,16 +6463,6 @@ filegroup( "encode_wav_op.cc", "eigen_contraction_kernel.cc", "eigen_contraction_kernel.h", - "einsum_op_impl_half.cc", - "einsum_op_impl_bfloat16.cc", - "einsum_op_impl_int32.cc", - "einsum_op_impl_int64.cc", - "einsum_op_impl_float.cc", - "einsum_op_impl_double.cc", - "einsum_op_impl_complex64.cc", - "einsum_op_impl_complex128.cc", - "einsum_op_impl.h", - "einsum_op.h", "fake_quant_ops.cc", "fifo_queue.cc", "fifo_queue_op.cc", @@ -6844,6 +6474,17 @@ filegroup( "population_count_op.h", "winograd_transform.h", ":android_extended_ops_headers", + ] + [ + "//tensorflow/core/kernels/linalg:einsum_op_impl_half.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl_bfloat16.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl_int32.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl_int64.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl_float.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl_double.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl_complex64.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl_complex128.cc", + "//tensorflow/core/kernels/linalg:einsum_op_impl.h", + "//tensorflow/core/kernels/linalg:einsum_op.h", ] + select({ ":xsmm_convolutions": [ "xsmm_conv2d.h", @@ -6874,7 +6515,6 @@ filegroup( "in_topk_op.cc", "in_topk_op.h", "initializable_lookup_table.cc", - "linalg_ops_common.cc", "list_kernels.cc", "logging_ops.cc", "logging_ops.h", @@ -6882,9 +6522,6 @@ filegroup( "lookup_table_op.cc", "lookup_util.cc", "lrn_op.cc", - "matrix_diag_op.cc", - "matrix_inverse_op.cc", - "matrix_set_diag_op.cc", "maxpooling_op.cc", "mfcc.cc", "mfcc_dct.cc", @@ -7006,6 +6643,10 @@ filegroup( ":android_extended_ops_headers", ] + [ "//tensorflow/core/kernels/boosted_trees:quantile_ops.cc", + "//tensorflow/core/kernels/linalg:linalg_ops_common.cc", + "//tensorflow/core/kernels/linalg:matrix_diag_op.cc", + "//tensorflow/core/kernels/linalg:matrix_inverse_op.cc", + "//tensorflow/core/kernels/linalg:matrix_set_diag_op.cc", ], ) @@ -7059,6 +6700,7 @@ filegroup( srcs = [ "//tensorflow/c/kernels:android_all_op_kernels", "//tensorflow/core/kernels/data:android_all_op_kernels", + "//tensorflow/core/kernels/linalg:android_all_op_kernels", ] + glob( [ "*.cc", @@ -8827,3 +8469,15 @@ tf_kernel_library( "@sobol_data", ], ) + +# ---- temporary forwarding declaration for libraries in linalg +# TODO(b/160344057): Remove after updating dependencies. +tf_kernel_library( + name = "matrix_inverse_op", + deps = ["//tensorflow/core/kernels/linalg:matrix_inverse_op"], +) + +tf_kernel_library( + name = "einsum_op", + deps = ["//tensorflow/core/kernels/linalg:einsum_op"], +) diff --git a/tensorflow/core/kernels/linalg/BUILD b/tensorflow/core/kernels/linalg/BUILD new file mode 100644 index 00000000000..c735f58ae51 --- /dev/null +++ b/tensorflow/core/kernels/linalg/BUILD @@ -0,0 +1,376 @@ +load( + "//tensorflow:tensorflow.bzl", + "if_cuda_or_rocm", + "tf_kernel_library", +) +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") +load( + "@local_config_rocm//rocm:build_defs.bzl", + "if_rocm", +) +load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test") + +# Description: +# Op kernel implementations for TensorFlow. +# +# Note: Any test that uses GPU support and which we would like to +# benchmark should be linked statically so that it can be executed +# from a py_binary or cuda_py_test test logger. For such a test, +# append "_gpu" to the test name to invoke the GPU benchmarks. Example: +# +# # for CPU tests +# $ bazel test --config opt //third_party/tensorflow/core/kernels:my_op_test +# # for GPU benchmarks +# $ bazel run --config opt --config=cuda //third_party/tensorflow/core/kernels:my_op_test_gpu -- --benchmarks=.. +# +package( + default_visibility = [ + "//tensorflow:__subpackages__", + "//tensorflow:internal", + ], + licenses = ["notice"], # Apache 2.0 +) + +# TODO(rmlarsen): Remove ASAP. +package_group( + name = "friends", + packages = ["//tensorflow/..."], +) + +# Export a few files for use on Android. +exports_files([ + "einsum_op_impl_half.cc", + "einsum_op_impl_bfloat16.cc", + "einsum_op_impl_int32.cc", + "einsum_op_impl_int64.cc", + "einsum_op_impl_float.cc", + "einsum_op_impl_double.cc", + "einsum_op_impl_complex64.cc", + "einsum_op_impl_complex128.cc", + "einsum_op_impl.h", + "einsum_op.h", + "linalg_ops_common.h", + "linalg_ops_common.cc", + "matrix_diag_op.h", + "matrix_diag_op.cc", + "matrix_inverse_op.cc", + "matrix_set_diag_op.h", + "matrix_set_diag_op.cc", +]) + +# Public support libraries ---------------------------------------------------- + +cc_library( + name = "linalg", + deps = [ + ":banded_triangular_solve_op", + ":cholesky_grad", + ":cholesky_op", + ":determinant_op", + ":eig_op", + ":einsum_op", + ":lu_op", + ":matrix_band_part_op", + ":matrix_diag_op", + ":matrix_exponential_op", + ":matrix_inverse_op", + ":matrix_logarithm_op", + ":matrix_set_diag_op", + ":matrix_solve_ls_op", + ":matrix_solve_op", + ":matrix_square_root_op", + ":matrix_triangular_solve_op", + ":qr_op", + ":self_adjoint_eig_op", + ":self_adjoint_eig_v2_op", + ":svd_op", + ":tridiagonal_matmul_op", + ":tridiagonal_solve_op", + ], +) + +LINALG_DEPS = [ + ":linalg_ops_common", + "//third_party/eigen3", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/kernels:cast_op", + "//tensorflow/core/kernels:fill_functor", +] + if_cuda([ + ":eye_functor", + "//tensorflow/core/util:cuda_solvers", + "//tensorflow/core/kernels:transpose_functor", +]) + if_rocm([ + "//tensorflow/core/util:rocm_solvers", +]) + +tf_kernel_library( + name = "matrix_band_part_op", + prefix = "matrix_band_part_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_diag_op", + prefix = "matrix_diag_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_set_diag_op", + prefix = "matrix_set_diag_op", + deps = LINALG_DEPS + [":matrix_diag_op"], +) + +tf_kernel_library( + name = "cholesky_op", + prefix = "cholesky_op", + deps = if_cuda([ + ":matrix_band_part_op", + ]) + LINALG_DEPS, +) + +tf_kernel_library( + name = "cholesky_grad", + prefix = "cholesky_grad", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "determinant_op", + prefix = "determinant_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_exponential_op", + prefix = "matrix_exponential_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_logarithm_op", + prefix = "matrix_logarithm_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "self_adjoint_eig_op", + prefix = "self_adjoint_eig_op", + deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"], +) + +tf_kernel_library( + name = "self_adjoint_eig_v2_op", + prefix = "self_adjoint_eig_v2_op", + deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"] + if_cuda([ + "//tensorflow/core/kernels:cwise_op", + ]), +) + +tf_kernel_library( + name = "eig_op", + prefix = "eig_op", + deps = LINALG_DEPS + ["//tensorflow/core:lib_internal"] + if_cuda([ + "//tensorflow/core/kernels:cwise_op", + ]), +) + +tf_kernel_library( + name = "matrix_inverse_op", + prefix = "matrix_inverse_op", + visibility = [":friends"], + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_solve_ls_op", + prefix = "matrix_solve_ls_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_solve_op", + prefix = "matrix_solve_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_square_root_op", + prefix = "matrix_square_root_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "banded_triangular_solve_op", + prefix = "banded_triangular_solve_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "matrix_triangular_solve_op", + hdrs = ["matrix_triangular_solve_op_impl.h"], + prefix = "matrix_triangular_solve_op", + deps = [ + ":linalg_ops_common", + "//third_party/eigen3", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/kernels:fill_functor", + "//tensorflow/core:stream_executor", + ] + if_cuda([ + "//tensorflow/core/platform/default/build_config:cublas_plugin", + "//tensorflow/core/util:cuda_solvers", + ]) + if_rocm([ + "@local_config_rocm//rocm:rocprim", + "//tensorflow/core/util:rocm_solvers", + ]) + if_cuda_or_rocm([ + "//tensorflow/core/kernels:transpose_functor", + ]), +) + +tf_kernel_library( + name = "tridiagonal_matmul_op", + srcs = ["tridiagonal_matmul_op.cc"], + gpu_srcs = ["tridiagonal_matmul_op_gpu.cu.cc"], + deps = LINALG_DEPS + if_cuda([ + "//tensorflow/core/util:cuda_sparse", + ]), +) + +tf_kernel_library( + name = "tridiagonal_solve_op", + srcs = ["tridiagonal_solve_op.cc"], + gpu_srcs = ["tridiagonal_solve_op_gpu.cu.cc"], + deps = LINALG_DEPS + if_cuda([ + "//tensorflow/core/util:cuda_sparse", + ]), +) + +tf_kernel_library( + name = "qr_op", + prefix = "qr_op", + deps = LINALG_DEPS + if_cuda([ + "//tensorflow/core/kernels:cwise_op", + ":matrix_band_part_op", + ]), +) + +tf_kernel_library( + name = "svd_op", + prefix = "svd_op", + deps = LINALG_DEPS, +) + +tf_kernel_library( + name = "lu_op", + prefix = "lu_op", + deps = if_cuda([ + "//tensorflow/core/util:cuda_solvers", + "//tensorflow/core/kernels:transpose_functor", + ]) + [ + "//third_party/eigen3", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ], +) + +tf_kernel_library( + name = "einsum_op", + prefix = "einsum_op", + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/kernels:batch_matmul_op", + "//tensorflow/core/kernels:fill_functor", + "//tensorflow/core/kernels:reduction_ops", + "//tensorflow/core/kernels:transpose_functor", + "//tensorflow/core/profiler/lib:traceme", + "//third_party/eigen3", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/strings", + ], +) + +cc_library( + name = "linalg_ops_common", + srcs = ["linalg_ops_common.cc"], + hdrs = ["linalg_ops_common.h"], + visibility = ["//visibility:private"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//third_party/eigen3", + ], +) + +tf_cuda_cc_test( + name = "banded_triangular_solve_op_test", + size = "small", + srcs = ["banded_triangular_solve_op_test.cc"], + deps = [ + ":banded_triangular_solve_op", + ":matrix_set_diag_op", + ":matrix_triangular_solve_op", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + +tf_kernel_library( + name = "eye_functor", + hdrs = ["eye_functor.h"], + gpu_srcs = [ + "eye_functor_gpu.cu.cc", + "eye_functor.h", + ], + visibility = ["//tensorflow/core/kernels:friends"], + deps = [ + "//tensorflow/core:framework", + "//third_party/eigen3", + ], + alwayslink = 0, +) + +tf_cuda_cc_test( + name = "matrix_triangular_solve_op_test", + size = "small", + srcs = ["matrix_triangular_solve_op_test.cc"], + deps = [ + ":matrix_triangular_solve_op", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:broadcast_to_op", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + +# A file group which contains all operators which are known to work on mobile. +filegroup( + name = "android_all_op_kernels", + srcs = glob( + [ + "*.cc", + "*.h", + ], + exclude = [ + "*test.cc", + "*test.h", + "*_test_*", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/core/kernels/banded_triangular_solve_op.cc b/tensorflow/core/kernels/linalg/banded_triangular_solve_op.cc similarity index 99% rename from tensorflow/core/kernels/banded_triangular_solve_op.cc rename to tensorflow/core/kernels/linalg/banded_triangular_solve_op.cc index d01a015502a..6758dcf5b8b 100644 --- a/tensorflow/core/kernels/banded_triangular_solve_op.cc +++ b/tensorflow/core/kernels/linalg/banded_triangular_solve_op.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/kernels/fill_functor.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/kernels/banded_triangular_solve_op_test.cc b/tensorflow/core/kernels/linalg/banded_triangular_solve_op_test.cc similarity index 99% rename from tensorflow/core/kernels/banded_triangular_solve_op_test.cc rename to tensorflow/core/kernels/linalg/banded_triangular_solve_op_test.cc index 37e904a3e0e..7c20b88845f 100644 --- a/tensorflow/core/kernels/banded_triangular_solve_op_test.cc +++ b/tensorflow/core/kernels/linalg/banded_triangular_solve_op_test.cc @@ -21,7 +21,7 @@ limitations under the License. #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/node_builder.h" #include "tensorflow/core/graph/testlib.h" -#include "tensorflow/core/kernels/matrix_set_diag_op.h" +#include "tensorflow/core/kernels/linalg/matrix_set_diag_op.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" diff --git a/tensorflow/core/kernels/cholesky_grad.cc b/tensorflow/core/kernels/linalg/cholesky_grad.cc similarity index 99% rename from tensorflow/core/kernels/cholesky_grad.cc rename to tensorflow/core/kernels/linalg/cholesky_grad.cc index eac66e580dd..31a5570cddf 100644 --- a/tensorflow/core/kernels/cholesky_grad.cc +++ b/tensorflow/core/kernels/linalg/cholesky_grad.cc @@ -18,7 +18,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/cholesky_op.cc b/tensorflow/core/kernels/linalg/cholesky_op.cc similarity index 98% rename from tensorflow/core/kernels/cholesky_op.cc rename to tensorflow/core/kernels/linalg/cholesky_op.cc index ff8fd08f228..eae09124b36 100644 --- a/tensorflow/core/kernels/cholesky_op.cc +++ b/tensorflow/core/kernels/linalg/cholesky_op.cc @@ -25,16 +25,16 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" #if GOOGLE_CUDA #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/matrix_band_part_op.h" +#include "tensorflow/core/kernels/linalg/matrix_band_part_op.h" #include "tensorflow/core/platform/stream_executor.h" +#include "tensorflow/core/util/cuda_solvers.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/determinant_op.cc b/tensorflow/core/kernels/linalg/determinant_op.cc similarity index 99% rename from tensorflow/core/kernels/determinant_op.cc rename to tensorflow/core/kernels/linalg/determinant_op.cc index b06f42384eb..8f0b0b618cf 100644 --- a/tensorflow/core/kernels/determinant_op.cc +++ b/tensorflow/core/kernels/linalg/determinant_op.cc @@ -20,7 +20,7 @@ limitations under the License. #if GOOGLE_CUDA #define EIGEN_USE_GPU #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/determinant_op.h" +#include "tensorflow/core/kernels/linalg/determinant_op.h" #endif #include "third_party/eigen3/Eigen/LU" @@ -28,14 +28,14 @@ limitations under the License. #include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" #if GOOGLE_CUDA -#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/kernels/fill_functor.h" +#include "tensorflow/core/util/cuda_solvers.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/determinant_op.h b/tensorflow/core/kernels/linalg/determinant_op.h similarity index 90% rename from tensorflow/core/kernels/determinant_op.h rename to tensorflow/core/kernels/linalg/determinant_op.h index eefdfe0ae40..6ace1bef44b 100644 --- a/tensorflow/core/kernels/determinant_op.h +++ b/tensorflow/core/kernels/linalg/determinant_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_DETERMINANT_OP_H_ -#define TENSORFLOW_CORE_KERNELS_DETERMINANT_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_DETERMINANT_OP_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_DETERMINANT_OP_H_ #include "tensorflow/core/framework/tensor_types.h" @@ -44,4 +44,4 @@ struct LogDeterminantFromPivotedLUFunctor { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_DETERMINANT_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_DETERMINANT_OP_H_ diff --git a/tensorflow/core/kernels/determinant_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/determinant_op_gpu.cu.cc similarity index 98% rename from tensorflow/core/kernels/determinant_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/determinant_op_gpu.cu.cc index 9aa64b3a7da..f6ab327bce0 100644 --- a/tensorflow/core/kernels/determinant_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/determinant_op_gpu.cu.cc @@ -21,8 +21,8 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/determinant_op.h" +#include "tensorflow/core/kernels/linalg/determinant_op.h" +#include "tensorflow/core/util/cuda_solvers.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/eig_op_complex128.cc b/tensorflow/core/kernels/linalg/eig_op_complex128.cc similarity index 93% rename from tensorflow/core/kernels/eig_op_complex128.cc rename to tensorflow/core/kernels/linalg/eig_op_complex128.cc index 988cc2f98d9..bd4b6fe36d0 100644 --- a/tensorflow/core/kernels/eig_op_complex128.cc +++ b/tensorflow/core/kernels/linalg/eig_op_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/eig_op_impl.h" +#include "tensorflow/core/kernels/linalg/eig_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/eig_op_complex64.cc b/tensorflow/core/kernels/linalg/eig_op_complex64.cc similarity index 93% rename from tensorflow/core/kernels/eig_op_complex64.cc rename to tensorflow/core/kernels/linalg/eig_op_complex64.cc index 6a3f7928715..b5b4a26ee85 100644 --- a/tensorflow/core/kernels/eig_op_complex64.cc +++ b/tensorflow/core/kernels/linalg/eig_op_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/eig_op_impl.h" +#include "tensorflow/core/kernels/linalg/eig_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/eig_op_double.cc b/tensorflow/core/kernels/linalg/eig_op_double.cc similarity index 93% rename from tensorflow/core/kernels/eig_op_double.cc rename to tensorflow/core/kernels/linalg/eig_op_double.cc index 2cd931cc135..c360637c84a 100644 --- a/tensorflow/core/kernels/eig_op_double.cc +++ b/tensorflow/core/kernels/linalg/eig_op_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/eig_op_impl.h" +#include "tensorflow/core/kernels/linalg/eig_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/eig_op_float.cc b/tensorflow/core/kernels/linalg/eig_op_float.cc similarity index 93% rename from tensorflow/core/kernels/eig_op_float.cc rename to tensorflow/core/kernels/linalg/eig_op_float.cc index a06f76e935f..18f576fcc19 100644 --- a/tensorflow/core/kernels/eig_op_float.cc +++ b/tensorflow/core/kernels/linalg/eig_op_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/eig_op_impl.h" +#include "tensorflow/core/kernels/linalg/eig_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/eig_op_impl.h b/tensorflow/core/kernels/linalg/eig_op_impl.h similarity index 93% rename from tensorflow/core/kernels/eig_op_impl.h rename to tensorflow/core/kernels/linalg/eig_op_impl.h index 4ebb6bde08b..a7aff7c2a5d 100644 --- a/tensorflow/core/kernels/eig_op_impl.h +++ b/tensorflow/core/kernels/linalg/eig_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_EIG_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_EIG_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_EIG_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_EIG_OP_IMPL_H_ // See docs in ../ops/linalg_ops.cc. @@ -23,7 +23,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/logging.h" @@ -95,4 +95,4 @@ class EigOp : public LinearAlgebraOp { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_EIG_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_EIG_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/einsum_op.h b/tensorflow/core/kernels/linalg/einsum_op.h similarity index 94% rename from tensorflow/core/kernels/einsum_op.h rename to tensorflow/core/kernels/linalg/einsum_op.h index 31d1109004c..f22f33c600a 100644 --- a/tensorflow/core/kernels/einsum_op.h +++ b/tensorflow/core/kernels/linalg/einsum_op.h @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_EINSUM_OP_H_ -#define TENSORFLOW_CORE_KERNELS_EINSUM_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_H_ #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor_types.h" diff --git a/tensorflow/core/kernels/einsum_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/einsum_op_gpu.cu.cc similarity index 96% rename from tensorflow/core/kernels/einsum_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/einsum_op_gpu.cu.cc index 2935b7fd02a..5461e43e0ab 100644 --- a/tensorflow/core/kernels/einsum_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_gpu.cu.cc @@ -17,7 +17,7 @@ limitations under the License. #define EIGEN_USE_GPU #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/einsum_op.h" +#include "tensorflow/core/kernels/linalg/einsum_op.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl.h b/tensorflow/core/kernels/linalg/einsum_op_impl.h similarity index 99% rename from tensorflow/core/kernels/einsum_op_impl.h rename to tensorflow/core/kernels/linalg/einsum_op_impl.h index 312738442b8..b9b2d1f0eae 100644 --- a/tensorflow/core/kernels/einsum_op_impl.h +++ b/tensorflow/core/kernels/linalg/einsum_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_EINSUM_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_EINSUM_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_IMPL_H_ #define EIGEN_USE_THREADS #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -31,8 +31,8 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/kernels/batch_matmul_op_impl.h" -#include "tensorflow/core/kernels/einsum_op.h" #include "tensorflow/core/kernels/fill_functor.h" +#include "tensorflow/core/kernels/linalg/einsum_op.h" #include "tensorflow/core/kernels/reduction_ops_common.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/lib/core/errors.h" @@ -780,4 +780,4 @@ DECLARE_GPU_SPECS(complex128); } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_EINSUM_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_EINSUM_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/einsum_op_impl_bfloat16.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_bfloat16.cc similarity index 94% rename from tensorflow/core/kernels/einsum_op_impl_bfloat16.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_bfloat16.cc index 44508f86a5e..e2e13052df5 100644 --- a/tensorflow/core/kernels/einsum_op_impl_bfloat16.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_bfloat16.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl_complex128.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_complex128.cc similarity index 95% rename from tensorflow/core/kernels/einsum_op_impl_complex128.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_complex128.cc index 8473cbf545d..ff78d460acf 100644 --- a/tensorflow/core/kernels/einsum_op_impl_complex128.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl_complex64.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_complex64.cc similarity index 95% rename from tensorflow/core/kernels/einsum_op_impl_complex64.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_complex64.cc index bd506a04f5f..cd3788846b2 100644 --- a/tensorflow/core/kernels/einsum_op_impl_complex64.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl_double.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_double.cc similarity index 95% rename from tensorflow/core/kernels/einsum_op_impl_double.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_double.cc index f994590779b..e0c093fa4a9 100644 --- a/tensorflow/core/kernels/einsum_op_impl_double.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl_float.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_float.cc similarity index 95% rename from tensorflow/core/kernels/einsum_op_impl_float.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_float.cc index 1875310b687..ad9135c991c 100644 --- a/tensorflow/core/kernels/einsum_op_impl_float.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl_half.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_half.cc similarity index 95% rename from tensorflow/core/kernels/einsum_op_impl_half.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_half.cc index 0486b133e62..72a9f6bec4f 100644 --- a/tensorflow/core/kernels/einsum_op_impl_half.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_half.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl_int32.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_int32.cc similarity index 94% rename from tensorflow/core/kernels/einsum_op_impl_int32.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_int32.cc index db5169498d9..7569c979c59 100644 --- a/tensorflow/core/kernels/einsum_op_impl_int32.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_int32.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/einsum_op_impl_int64.cc b/tensorflow/core/kernels/linalg/einsum_op_impl_int64.cc similarity index 94% rename from tensorflow/core/kernels/einsum_op_impl_int64.cc rename to tensorflow/core/kernels/linalg/einsum_op_impl_int64.cc index 7f1a1eac411..6ee0ebc9637 100644 --- a/tensorflow/core/kernels/einsum_op_impl_int64.cc +++ b/tensorflow/core/kernels/linalg/einsum_op_impl_int64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/einsum_op_impl.h" +#include "tensorflow/core/kernels/linalg/einsum_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/eye_functor.h b/tensorflow/core/kernels/linalg/eye_functor.h similarity index 90% rename from tensorflow/core/kernels/eye_functor.h rename to tensorflow/core/kernels/linalg/eye_functor.h index 3799cfba9ae..c77372f089a 100644 --- a/tensorflow/core/kernels/eye_functor.h +++ b/tensorflow/core/kernels/linalg/eye_functor.h @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_EYE_FUNCTOR_H_ -#define TENSORFLOW_CORE_KERNELS_EYE_FUNCTOR_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_EYE_FUNCTOR_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_EYE_FUNCTOR_H_ #include "tensorflow/core/framework/tensor_types.h" diff --git a/tensorflow/core/kernels/eye_functor_gpu.cu.cc b/tensorflow/core/kernels/linalg/eye_functor_gpu.cu.cc similarity index 97% rename from tensorflow/core/kernels/eye_functor_gpu.cu.cc rename to tensorflow/core/kernels/linalg/eye_functor_gpu.cu.cc index 90df538dd2c..85865588f2c 100644 --- a/tensorflow/core/kernels/eye_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/eye_functor_gpu.cu.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/type_traits.h" -#include "tensorflow/core/kernels/eye_functor.h" +#include "tensorflow/core/kernels/linalg/eye_functor.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/linalg_ops_common.cc b/tensorflow/core/kernels/linalg/linalg_ops_common.cc similarity index 99% rename from tensorflow/core/kernels/linalg_ops_common.cc rename to tensorflow/core/kernels/linalg/linalg_ops_common.cc index 56a941fbd1f..c8d33e435c7 100644 --- a/tensorflow/core/kernels/linalg_ops_common.cc +++ b/tensorflow/core/kernels/linalg/linalg_ops_common.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include diff --git a/tensorflow/core/kernels/linalg/linalg_ops_common.h b/tensorflow/core/kernels/linalg/linalg_ops_common.h new file mode 100644 index 00000000000..3ab37480c90 --- /dev/null +++ b/tensorflow/core/kernels/linalg/linalg_ops_common.h @@ -0,0 +1,221 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_LINALG_OPS_COMMON_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_LINALG_OPS_COMMON_H_ + +// Classes to support linear algebra functionality, similar to the numpy.linalg +// module. Supports batch computation on several matrices at once, sharding the +// computations across different threads if necessary. +#include + +#include "third_party/eigen3/Eigen/Core" +#include "tensorflow/core/framework/kernel_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/work_sharder.h" + +namespace tensorflow { + +// Base class for linear algebra operators. +template +class LinearAlgebraOp : public OpKernel { + public: + explicit LinearAlgebraOp(OpKernelConstruction* context) : OpKernel(context) {} + + void Compute(OpKernelContext* context) override; + + protected: + using TensorShapes = gtl::InlinedVector; + // Returns the number of leading inputs that are to be treated as matrix + // inputs. By default this is all the inputs. Derived classes can override + // this to tell the base class to ignore one or more trailing inputs. + virtual int NumMatrixInputs(const OpKernelContext* context) const { + return context->num_inputs(); + } + + // Returns true if the number of inputs and their shapes are as expected. + // Many ops take a single square input matrix, so we provide that as a default + // implementation for convenience. + virtual void ValidateInputMatrixShapes( + OpKernelContext* context, const TensorShapes& input_matrix_shapes) const { + ValidateSingleSquareMatrix(context, input_matrix_shapes); + } + + // Convenience validators for common cases: + // + // Validate op taking a single matrix A. + static void ValidateSingleMatrix(OpKernelContext* context, + const TensorShapes& input_matrix_shapes); + // Validate op taking a single square matrix A. + static void ValidateSingleSquareMatrix( + OpKernelContext* context, const TensorShapes& input_matrix_shapes); + // Validate op taking two matrices A and B that have the same number of rows. + static void ValidateSolver(OpKernelContext* context, + const TensorShapes& input_matrix_shapes); + // Validate op taking two matrices A and B that have the same number of rows + // and A is square. + static void ValidateSquareSolver(OpKernelContext* context, + const TensorShapes& input_matrix_shapes); + + // Returns the output shapes of each individual matrix operation. Output + // matrices shapes must be rank 0, 1, or 2. Scalar outputs are rank 0. + // + // The derived class may return a number of shapes (N) less than + // context->num_outputs() (M) to indicate that a only leading subset of + // the outputs will be populated. In this case, a dummy scalar tensor with + // value zero will be return for the last M-N outputs. + // + // For many ops, the output dimensions are the same as the input dimensions, + // so we provide that as a default implementation for convenience. + virtual TensorShapes GetOutputMatrixShapes( + const TensorShapes& input_matrix_shapes) const { + return input_matrix_shapes; + } + + // Returns the cost per matrix operation. This is used to determine the + // number of threads to use for parallelizing calls to ComputeMatrix in + // batch mode. Cost per unit is assumed to be roughly 1ns, based on comments + // in core/util/work_sharder.cc. Many linear algebra ops take roughly max(m,n) + // * min(m,n)^2, where the first input matrix is m-by-n. We provide that as a + // default implementation for convenience. + virtual int64 GetCostPerUnit(const TensorShapes& input_matrix_shapes) const { + double m = static_cast(input_matrix_shapes[0].dim_size(0)); + double n = static_cast(input_matrix_shapes[0].dim_size(1)); + double cost = std::max(m, n) * std::min(m, n) * std::min(m, n); + return cost >= static_cast(kint64max) ? kint64max + : static_cast(cost); + } + + // Returns true if it is safe to forward (alias) input to output buffer + // and expect the kernel to perform the computation inplace. + virtual bool EnableInputForwarding() const { return true; } + + using InputMatrix = Eigen::Matrix; + using InputConstMatrixMap = Eigen::Map; + using InputMatrixMap = Eigen::Map; + using InputConstVectorMap = + Eigen::Map>; + using InputConstMatrixMaps = gtl::InlinedVector; + using InputMatrixMaps = gtl::InlinedVector; + using InputRealScalar = typename Eigen::NumTraits::Real; + + using OutputMatrix = Eigen::Matrix; + using OutputConstMatrixMap = Eigen::Map; + using OutputMatrixMap = Eigen::Map; + using OutputConstVectorMap = + Eigen::Map>; + using OutputConstMatrixMaps = gtl::InlinedVector; + using OutputMatrixMaps = gtl::InlinedVector; + using OutputRealScalar = typename Eigen::NumTraits::Real; + + // backward compatibility + using Scalar = OutputScalar; + using Matrix = + Eigen::Matrix; + using ConstMatrixMap = Eigen::Map; + using MatrixMap = Eigen::Map; + using ConstVectorMap = + Eigen::Map>; + using ConstMatrixMaps = gtl::InlinedVector; + using MatrixMaps = gtl::InlinedVector; + using RealScalar = typename Eigen::NumTraits::Real; + + // Performs a single matrix computation given input matrices, and + // stores the result in outputs. For batch operations, this will be called + // repeatedly for a single call to Compute() when multiple matrices exist in + // input Tensors with rank > 2. In this case the calls to ComputeMatrix are + // parallelized. The number of threads used is determined by a cost model from + // the value returned by GetCostPerUnit(). + virtual void ComputeMatrix(OpKernelContext* context, + const InputConstMatrixMaps& inputs, + OutputMatrixMaps* outputs) = 0; + + private: + using TensorInputs = gtl::InlinedVector; + using TensorOutputs = gtl::InlinedVector; + // This function maps 2-d slices (matrices) of the input and output tensors + // using Eigen::Map and calls ComputeMatrix implemented in terms of the + // Eigen::MatrixBase API by the derived class. + // + // The 'matrix_index' parameter specifies the index of the matrix to be used + // from each input tensor, and the index of the matrix to be written to each + // output tensor. The input matrices are in row major order, and located at + // the memory addresses + // inputs[i].flat().data() + + // matrix_index * input_matrix_shapes[i].num_elements() + // for i in 0...inputs.size()-1. + // The output matrices are in row major order, and located at the memory + // address + // outputs[i]->flat().data() + + // matrix_index * output_matrix_shapes[i].num_elements(). + // for i in 0...outputs.size()-1. + // + void ComputeTensorSlice(OpKernelContext* context, int64 matrix_index, + const TensorInputs& inputs, + const TensorShapes& input_matrix_shapes, + const TensorOutputs& outputs, + const TensorShapes& output_matrix_shapes); + + void AnalyzeInputs(OpKernelContext* context, TensorInputs* inputs, + TensorShapes* input_matrix_shapes, + TensorShape* batch_shape); + + void PrepareOutputs(OpKernelContext* context, + const TensorShapes& input_matrix_shapes, + const TensorShape& batch_shape, TensorOutputs* outputs, + TensorShapes* output_matrix_shapes); +}; + +// Declare LinearAlgebraOp, which is explicitly instantiated in +// linalg_ops_common.cc for float, double, complex64, and complex128. +extern template class LinearAlgebraOp; +extern template class LinearAlgebraOp; +extern template class LinearAlgebraOp; +extern template class LinearAlgebraOp; + +} // namespace tensorflow + +#define INHERIT_LINALG_TYPEDEFS(Scalar) \ + typedef LinearAlgebraOp Base; \ + using RealScalar = typename Eigen::NumTraits::Real; \ + using Matrix = typename Base::Matrix; \ + using MatrixMap = typename Base::MatrixMap; \ + using MatrixMaps = typename Base::MatrixMaps; \ + using ConstMatrixMap = typename Base::ConstMatrixMap; \ + using ConstMatrixMaps = typename Base::ConstMatrixMaps; \ + using ConstVectorMap = typename Base::ConstVectorMap; \ + using TensorShapes = typename Base::TensorShapes; + +#define REGISTER_LINALG_OP_CPU(OpName, OpClass, Scalar) \ + REGISTER_KERNEL_BUILDER( \ + Name(OpName).Device(DEVICE_CPU).TypeConstraint("T"), OpClass) + +#define REGISTER_LINALG_OP_GPU(OpName, OpClass, Scalar) \ + REGISTER_KERNEL_BUILDER( \ + Name(OpName).Device(DEVICE_GPU).TypeConstraint("T"), OpClass) + +// Deprecated, use one of the device-specific macros above. +#define REGISTER_LINALG_OP(OpName, OpClass, Scalar) \ + REGISTER_LINALG_OP_CPU(OpName, OpClass, Scalar) + +#endif // TENSORFLOW_CORE_KERNELS_LINALG_LINALG_OPS_COMMON_H_ diff --git a/tensorflow/core/kernels/lu_op.cc b/tensorflow/core/kernels/linalg/lu_op.cc similarity index 100% rename from tensorflow/core/kernels/lu_op.cc rename to tensorflow/core/kernels/linalg/lu_op.cc diff --git a/tensorflow/core/kernels/lu_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/lu_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/lu_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/lu_op_gpu.cu.cc index 47b37ed7f7a..9d23a35057d 100644 --- a/tensorflow/core/kernels/lu_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/lu_op_gpu.cu.cc @@ -25,9 +25,9 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_solvers.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_band_part_op.cc b/tensorflow/core/kernels/linalg/matrix_band_part_op.cc similarity index 99% rename from tensorflow/core/kernels/matrix_band_part_op.cc rename to tensorflow/core/kernels/linalg/matrix_band_part_op.cc index 4dcce5a8f58..23619bacc33 100644 --- a/tensorflow/core/kernels/matrix_band_part_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_band_part_op.cc @@ -21,11 +21,12 @@ limitations under the License. #define EIGEN_USE_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/matrix_band_part_op.h" +#include "tensorflow/core/kernels/linalg/matrix_band_part_op.h" #include #include #include + #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" diff --git a/tensorflow/core/kernels/matrix_band_part_op.h b/tensorflow/core/kernels/linalg/matrix_band_part_op.h similarity index 86% rename from tensorflow/core/kernels/matrix_band_part_op.h rename to tensorflow/core/kernels/linalg/matrix_band_part_op.h index b04e36db8ed..2f68eba6dcd 100644 --- a/tensorflow/core/kernels/matrix_band_part_op.h +++ b/tensorflow/core/kernels/linalg/matrix_band_part_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_BAND_PART_OP_H_ -#define TENSORFLOW_CORE_KERNELS_MATRIX_BAND_PART_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_BAND_PART_OP_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_BAND_PART_OP_H_ #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" @@ -34,4 +34,4 @@ struct MatrixBandPartFunctor { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_MATRIX_BAND_PART_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_BAND_PART_OP_H_ diff --git a/tensorflow/core/kernels/matrix_band_part_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/matrix_band_part_op_gpu.cu.cc similarity index 97% rename from tensorflow/core/kernels/matrix_band_part_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/matrix_band_part_op_gpu.cu.cc index 9eb3e4f72a2..9c734b7fd6e 100644 --- a/tensorflow/core/kernels/matrix_band_part_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/matrix_band_part_op_gpu.cu.cc @@ -21,7 +21,7 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/matrix_band_part_op.h" +#include "tensorflow/core/kernels/linalg/matrix_band_part_op.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_diag_op.cc b/tensorflow/core/kernels/linalg/matrix_diag_op.cc similarity index 99% rename from tensorflow/core/kernels/matrix_diag_op.cc rename to tensorflow/core/kernels/linalg/matrix_diag_op.cc index 05d7e4e6f86..69cc8170793 100644 --- a/tensorflow/core/kernels/matrix_diag_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_diag_op.cc @@ -20,7 +20,7 @@ limitations under the License. #define EIGEN_USE_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/matrix_diag_op.h" +#include "tensorflow/core/kernels/linalg/matrix_diag_op.h" #include #include diff --git a/tensorflow/core/kernels/matrix_diag_op.h b/tensorflow/core/kernels/linalg/matrix_diag_op.h similarity index 94% rename from tensorflow/core/kernels/matrix_diag_op.h rename to tensorflow/core/kernels/linalg/matrix_diag_op.h index 707fd9b6c14..5758ba664cc 100644 --- a/tensorflow/core/kernels/matrix_diag_op.h +++ b/tensorflow/core/kernels/linalg/matrix_diag_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_DIAG_OP_H_ -#define TENSORFLOW_CORE_KERNELS_MATRIX_DIAG_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_DIAG_OP_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_DIAG_OP_H_ // Generator definition for MatrixDiagOp, must be compilable by nvcc. @@ -69,4 +69,4 @@ struct MatrixDiag { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_MATRIX_DIAG_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_DIAG_OP_H_ diff --git a/tensorflow/core/kernels/matrix_diag_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/matrix_diag_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/matrix_diag_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/matrix_diag_op_gpu.cu.cc index 76271798d5f..6b52e70716d 100644 --- a/tensorflow/core/kernels/matrix_diag_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/matrix_diag_op_gpu.cu.cc @@ -18,7 +18,7 @@ limitations under the License. #define EIGEN_USE_GPU #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/matrix_diag_op.h" +#include "tensorflow/core/kernels/linalg/matrix_diag_op.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_exponential_op.cc b/tensorflow/core/kernels/linalg/matrix_exponential_op.cc similarity index 97% rename from tensorflow/core/kernels/matrix_exponential_op.cc rename to tensorflow/core/kernels/linalg/matrix_exponential_op.cc index 01d4894438c..73407614955 100644 --- a/tensorflow/core/kernels/matrix_exponential_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_exponential_op.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/kernels/matrix_inverse_op.cc b/tensorflow/core/kernels/linalg/matrix_inverse_op.cc similarity index 98% rename from tensorflow/core/kernels/matrix_inverse_op.cc rename to tensorflow/core/kernels/linalg/matrix_inverse_op.cc index 52afdd15ba6..dc51776f2fe 100644 --- a/tensorflow/core/kernels/matrix_inverse_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_inverse_op.cc @@ -24,7 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -32,9 +32,9 @@ limitations under the License. #if GOOGLE_CUDA #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/eye_functor.h" +#include "tensorflow/core/kernels/linalg/eye_functor.h" #include "tensorflow/core/kernels/transpose_functor.h" +#include "tensorflow/core/util/cuda_solvers.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_logarithm_op.cc b/tensorflow/core/kernels/linalg/matrix_logarithm_op.cc similarity index 97% rename from tensorflow/core/kernels/matrix_logarithm_op.cc rename to tensorflow/core/kernels/linalg/matrix_logarithm_op.cc index 22ca094e243..79d5472f140 100644 --- a/tensorflow/core/kernels/matrix_logarithm_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_logarithm_op.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/kernels/matrix_set_diag_op.cc b/tensorflow/core/kernels/linalg/matrix_set_diag_op.cc similarity index 99% rename from tensorflow/core/kernels/matrix_set_diag_op.cc rename to tensorflow/core/kernels/linalg/matrix_set_diag_op.cc index bf98fd0d47d..df32228d0f2 100644 --- a/tensorflow/core/kernels/matrix_set_diag_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_set_diag_op.cc @@ -21,7 +21,7 @@ limitations under the License. #define EIGEN_USE_GPU #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/matrix_set_diag_op.h" +#include "tensorflow/core/kernels/linalg/matrix_set_diag_op.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" @@ -30,7 +30,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/matrix_diag_op.h" +#include "tensorflow/core/kernels/linalg/matrix_diag_op.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/kernels/matrix_set_diag_op.h b/tensorflow/core/kernels/linalg/matrix_set_diag_op.h similarity index 89% rename from tensorflow/core/kernels/matrix_set_diag_op.h rename to tensorflow/core/kernels/linalg/matrix_set_diag_op.h index 04877cd34ca..449a3607ede 100644 --- a/tensorflow/core/kernels/matrix_set_diag_op.h +++ b/tensorflow/core/kernels/linalg/matrix_set_diag_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_SET_DIAG_OP_H_ -#define TENSORFLOW_CORE_KERNELS_MATRIX_SET_DIAG_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SET_DIAG_OP_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SET_DIAG_OP_H_ #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" @@ -39,4 +39,4 @@ struct MatrixSetDiag { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_MATRIX_SET_DIAG_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SET_DIAG_OP_H_ diff --git a/tensorflow/core/kernels/matrix_set_diag_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/matrix_set_diag_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/matrix_set_diag_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/matrix_set_diag_op_gpu.cu.cc index 4e32f8a52e8..0cdb457db03 100644 --- a/tensorflow/core/kernels/matrix_set_diag_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/matrix_set_diag_op_gpu.cu.cc @@ -18,7 +18,7 @@ limitations under the License. #define EIGEN_USE_GPU #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/matrix_set_diag_op.h" +#include "tensorflow/core/kernels/linalg/matrix_set_diag_op.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_solve_ls_op_complex128.cc b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex128.cc similarity index 92% rename from tensorflow/core/kernels/matrix_solve_ls_op_complex128.cc rename to tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex128.cc index 22274cc3daf..4e64eb42371 100644 --- a/tensorflow/core/kernels/matrix_solve_ls_op_complex128.cc +++ b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/matrix_solve_ls_op_impl.h" +#include "tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_solve_ls_op_complex64.cc b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex64.cc similarity index 92% rename from tensorflow/core/kernels/matrix_solve_ls_op_complex64.cc rename to tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex64.cc index c8421a3efba..719201f3f9e 100644 --- a/tensorflow/core/kernels/matrix_solve_ls_op_complex64.cc +++ b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/matrix_solve_ls_op_impl.h" +#include "tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_solve_ls_op_double.cc b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_double.cc similarity index 92% rename from tensorflow/core/kernels/matrix_solve_ls_op_double.cc rename to tensorflow/core/kernels/linalg/matrix_solve_ls_op_double.cc index c7d03cb1052..614ecee4e23 100644 --- a/tensorflow/core/kernels/matrix_solve_ls_op_double.cc +++ b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/matrix_solve_ls_op_impl.h" +#include "tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_solve_ls_op_float.cc b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_float.cc similarity index 92% rename from tensorflow/core/kernels/matrix_solve_ls_op_float.cc rename to tensorflow/core/kernels/linalg/matrix_solve_ls_op_float.cc index c98a84beded..809cff8148c 100644 --- a/tensorflow/core/kernels/matrix_solve_ls_op_float.cc +++ b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/matrix_solve_ls_op_impl.h" +#include "tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_solve_ls_op_impl.h b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h similarity index 96% rename from tensorflow/core/kernels/matrix_solve_ls_op_impl.h rename to tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h index 00a05a87a3a..1c8101a05b4 100644 --- a/tensorflow/core/kernels/matrix_solve_ls_op_impl.h +++ b/tensorflow/core/kernels/linalg/matrix_solve_ls_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_SOLVE_LS_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_MATRIX_SOLVE_LS_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SOLVE_LS_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SOLVE_LS_OP_IMPL_H_ // See docs in ../ops/linalg_ops.cc. @@ -24,7 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" @@ -163,4 +163,4 @@ class MatrixSolveLsOp : public LinearAlgebraOp { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_MATRIX_SOLVE_LS_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_SOLVE_LS_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/matrix_solve_op.cc b/tensorflow/core/kernels/linalg/matrix_solve_op.cc similarity index 99% rename from tensorflow/core/kernels/matrix_solve_op.cc rename to tensorflow/core/kernels/linalg/matrix_solve_op.cc index 3a75054f4ea..70f02bddf9b 100644 --- a/tensorflow/core/kernels/matrix_solve_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_solve_op.cc @@ -25,7 +25,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -33,8 +33,8 @@ limitations under the License. #if GOOGLE_CUDA #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/kernels/transpose_functor.h" +#include "tensorflow/core/util/cuda_solvers.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_square_root_op.cc b/tensorflow/core/kernels/linalg/matrix_square_root_op.cc similarity index 97% rename from tensorflow/core/kernels/matrix_square_root_op.cc rename to tensorflow/core/kernels/linalg/matrix_square_root_op.cc index fe3d3043c26..ce43e358350 100644 --- a/tensorflow/core/kernels/matrix_square_root_op.cc +++ b/tensorflow/core/kernels/linalg/matrix_square_root_op.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/kernels/matrix_triangular_solve_op_complex.cc b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_complex.cc similarity index 92% rename from tensorflow/core/kernels/matrix_triangular_solve_op_complex.cc rename to tensorflow/core/kernels/linalg/matrix_triangular_solve_op_complex.cc index ae3702078a0..27f3e77e29c 100644 --- a/tensorflow/core/kernels/matrix_triangular_solve_op_complex.cc +++ b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_complex.cc @@ -14,7 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/matrix_triangular_solve_op_impl.h" +#include "tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/matrix_triangular_solve_op_impl.h b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h similarity index 97% rename from tensorflow/core/kernels/matrix_triangular_solve_op_impl.h rename to tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h index fb7e6f0f5ff..99249f792b6 100644 --- a/tensorflow/core/kernels/matrix_triangular_solve_op_impl.h +++ b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h @@ -15,8 +15,8 @@ limitations under the License. // See docs in ../ops/linalg_ops.cc. // -#ifndef TENSORFLOW_CORE_KERNELS_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ #include "third_party/eigen3/Eigen/Core" #include "tensorflow/core/framework/kernel_def_builder.h" @@ -24,7 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/kernels/fill_functor.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -38,9 +38,9 @@ limitations under the License. #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if GOOGLE_CUDA -#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/util/cuda_solvers.h" #elif TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/rocm_solvers.h" +#include "tensorflow/core/util/rocm_solvers.h" #endif namespace tensorflow { @@ -434,4 +434,4 @@ struct LaunchBatchMatrixTriangularSolve { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_MATRIX_TRIANGULAR_SOLVE_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/matrix_triangular_solve_op_real.cc b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_real.cc similarity index 93% rename from tensorflow/core/kernels/matrix_triangular_solve_op_real.cc rename to tensorflow/core/kernels/linalg/matrix_triangular_solve_op_real.cc index 0f92964dd72..71a62441dc4 100644 --- a/tensorflow/core/kernels/matrix_triangular_solve_op_real.cc +++ b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_real.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/matrix_triangular_solve_op_impl.h" +#include "tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h" #if GOOGLE_CUDA #include "third_party/gpus/cuda/include/cuda.h" diff --git a/tensorflow/core/kernels/matrix_triangular_solve_op_test.cc b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_test.cc similarity index 100% rename from tensorflow/core/kernels/matrix_triangular_solve_op_test.cc rename to tensorflow/core/kernels/linalg/matrix_triangular_solve_op_test.cc diff --git a/tensorflow/core/kernels/qr_op_complex128.cc b/tensorflow/core/kernels/linalg/qr_op_complex128.cc similarity index 96% rename from tensorflow/core/kernels/qr_op_complex128.cc rename to tensorflow/core/kernels/linalg/qr_op_complex128.cc index 8a3e3dc0a92..0c14c6d2818 100644 --- a/tensorflow/core/kernels/qr_op_complex128.cc +++ b/tensorflow/core/kernels/linalg/qr_op_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/qr_op_impl.h" +#include "tensorflow/core/kernels/linalg/qr_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/qr_op_complex64.cc b/tensorflow/core/kernels/linalg/qr_op_complex64.cc similarity index 95% rename from tensorflow/core/kernels/qr_op_complex64.cc rename to tensorflow/core/kernels/linalg/qr_op_complex64.cc index 467fa6c2d6a..fc0227ef7f9 100644 --- a/tensorflow/core/kernels/qr_op_complex64.cc +++ b/tensorflow/core/kernels/linalg/qr_op_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/qr_op_impl.h" +#include "tensorflow/core/kernels/linalg/qr_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/qr_op_double.cc b/tensorflow/core/kernels/linalg/qr_op_double.cc similarity index 96% rename from tensorflow/core/kernels/qr_op_double.cc rename to tensorflow/core/kernels/linalg/qr_op_double.cc index 05537a0eaa3..ae00b3e7921 100644 --- a/tensorflow/core/kernels/qr_op_double.cc +++ b/tensorflow/core/kernels/linalg/qr_op_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/qr_op_impl.h" +#include "tensorflow/core/kernels/linalg/qr_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/qr_op_float.cc b/tensorflow/core/kernels/linalg/qr_op_float.cc similarity index 96% rename from tensorflow/core/kernels/qr_op_float.cc rename to tensorflow/core/kernels/linalg/qr_op_float.cc index 6aebd981865..77b8eeb0286 100644 --- a/tensorflow/core/kernels/qr_op_float.cc +++ b/tensorflow/core/kernels/linalg/qr_op_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/qr_op_impl.h" +#include "tensorflow/core/kernels/linalg/qr_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/qr_op_impl.h b/tensorflow/core/kernels/linalg/qr_op_impl.h similarity index 96% rename from tensorflow/core/kernels/qr_op_impl.h rename to tensorflow/core/kernels/linalg/qr_op_impl.h index 535df9d160d..876594bc511 100644 --- a/tensorflow/core/kernels/qr_op_impl.h +++ b/tensorflow/core/kernels/linalg/qr_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_QR_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_QR_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_QR_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_QR_OP_IMPL_H_ // See docs in ../ops/linalg_ops.cc. // @@ -33,7 +33,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -41,11 +41,11 @@ limitations under the License. #if GOOGLE_CUDA #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/kernels/cwise_ops.h" -#include "tensorflow/core/kernels/eye_functor.h" -#include "tensorflow/core/kernels/matrix_band_part_op.h" +#include "tensorflow/core/kernels/linalg/eye_functor.h" +#include "tensorflow/core/kernels/linalg/matrix_band_part_op.h" #include "tensorflow/core/kernels/transpose_functor.h" +#include "tensorflow/core/util/cuda_solvers.h" #endif namespace tensorflow { @@ -299,4 +299,4 @@ class QrOpGpu : public AsyncOpKernel { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_QR_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_QR_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/self_adjoint_eig_op.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_op.cc similarity index 98% rename from tensorflow/core/kernels/self_adjoint_eig_op.cc rename to tensorflow/core/kernels/linalg/self_adjoint_eig_op.cc index cea5883db7b..ebf1955b8ff 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_op.cc +++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_op.cc @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/logging.h" diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_complex128.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex128.cc similarity index 93% rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_complex128.cc rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex128.cc index 4c7a391d56c..424c33a7ac1 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_complex128.cc +++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h" +#include "tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_complex64.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex64.cc similarity index 93% rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_complex64.cc rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex64.cc index 0ec5ec24dd1..bdd20998e3c 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_complex64.cc +++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h" +#include "tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_double.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_double.cc similarity index 92% rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_double.cc rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_double.cc index 7f81bb69021..afc50500d40 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_double.cc +++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h" +#include "tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_float.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_float.cc similarity index 92% rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_float.cc rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_float.cc index bf30952d1e7..1f795777a2e 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_float.cc +++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h" +#include "tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_gpu.cc b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_gpu.cc similarity index 99% rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_gpu.cc rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_gpu.cc index 3a84df07a9a..778c50ff408 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_gpu.cc +++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_gpu.cc @@ -26,12 +26,12 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/kernels/cast_op.h" -#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/kernels/cwise_ops.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_solvers.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h similarity index 91% rename from tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h rename to tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h index b5274f8788b..56f2936a66e 100644 --- a/tensorflow/core/kernels/self_adjoint_eig_v2_op_impl.h +++ b/tensorflow/core/kernels/linalg/self_adjoint_eig_v2_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ // See docs in ../ops/linalg_ops.cc. @@ -23,7 +23,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/logging.h" @@ -89,4 +89,4 @@ class SelfAdjointEigV2Op : public LinearAlgebraOp { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_SELF_ADJOINT_EIG_V2_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/svd_op_complex128.cc b/tensorflow/core/kernels/linalg/svd_op_complex128.cc similarity index 93% rename from tensorflow/core/kernels/svd_op_complex128.cc rename to tensorflow/core/kernels/linalg/svd_op_complex128.cc index a0f39418aca..36ac629e38a 100644 --- a/tensorflow/core/kernels/svd_op_complex128.cc +++ b/tensorflow/core/kernels/linalg/svd_op_complex128.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/svd_op_impl.h" +#include "tensorflow/core/kernels/linalg/svd_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/svd_op_complex64.cc b/tensorflow/core/kernels/linalg/svd_op_complex64.cc similarity index 93% rename from tensorflow/core/kernels/svd_op_complex64.cc rename to tensorflow/core/kernels/linalg/svd_op_complex64.cc index a8fd50c67d1..50d940b534a 100644 --- a/tensorflow/core/kernels/svd_op_complex64.cc +++ b/tensorflow/core/kernels/linalg/svd_op_complex64.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/svd_op_impl.h" +#include "tensorflow/core/kernels/linalg/svd_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/svd_op_double.cc b/tensorflow/core/kernels/linalg/svd_op_double.cc similarity index 93% rename from tensorflow/core/kernels/svd_op_double.cc rename to tensorflow/core/kernels/linalg/svd_op_double.cc index 539dae3a081..85bbe08d8c9 100644 --- a/tensorflow/core/kernels/svd_op_double.cc +++ b/tensorflow/core/kernels/linalg/svd_op_double.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/svd_op_impl.h" +#include "tensorflow/core/kernels/linalg/svd_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/svd_op_float.cc b/tensorflow/core/kernels/linalg/svd_op_float.cc similarity index 93% rename from tensorflow/core/kernels/svd_op_float.cc rename to tensorflow/core/kernels/linalg/svd_op_float.cc index 03839aa49c3..961d131293b 100644 --- a/tensorflow/core/kernels/svd_op_float.cc +++ b/tensorflow/core/kernels/linalg/svd_op_float.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/svd_op_impl.h" +#include "tensorflow/core/kernels/linalg/svd_op_impl.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/svd_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/svd_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc index 482fd057e4e..06d1efe6dd5 100644 --- a/tensorflow/core/kernels/svd_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc @@ -36,14 +36,14 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/eye_functor.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/eye_functor.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_solvers.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/svd_op_impl.h b/tensorflow/core/kernels/linalg/svd_op_impl.h similarity index 95% rename from tensorflow/core/kernels/svd_op_impl.h rename to tensorflow/core/kernels/linalg/svd_op_impl.h index 675826a057c..c43aaaa4b7b 100644 --- a/tensorflow/core/kernels/svd_op_impl.h +++ b/tensorflow/core/kernels/linalg/svd_op_impl.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_SVD_OP_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_SVD_OP_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_SVD_OP_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_SVD_OP_IMPL_H_ // See docs in ../ops/linalg_ops.cc. // @@ -27,7 +27,7 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -118,4 +118,4 @@ class SvdOp : public LinearAlgebraOp { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_SVD_OP_IMPL_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_SVD_OP_IMPL_H_ diff --git a/tensorflow/core/kernels/tridiagonal_matmul_op.cc b/tensorflow/core/kernels/linalg/tridiagonal_matmul_op.cc similarity index 98% rename from tensorflow/core/kernels/tridiagonal_matmul_op.cc rename to tensorflow/core/kernels/linalg/tridiagonal_matmul_op.cc index 3ddf22012de..9d17c574148 100644 --- a/tensorflow/core/kernels/tridiagonal_matmul_op.cc +++ b/tensorflow/core/kernels/linalg/tridiagonal_matmul_op.cc @@ -19,7 +19,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/core/kernels/tridiagonal_matmul_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/tridiagonal_matmul_op_gpu.cu.cc similarity index 96% rename from tensorflow/core/kernels/tridiagonal_matmul_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/tridiagonal_matmul_op_gpu.cu.cc index 1c82cc18e32..a65db40d822 100644 --- a/tensorflow/core/kernels/tridiagonal_matmul_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/tridiagonal_matmul_op_gpu.cu.cc @@ -22,11 +22,11 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #include "tensorflow/core/util/gpu_device_functions.h" #include "tensorflow/core/util/gpu_kernel_helper.h" #include "tensorflow/core/util/gpu_launch_config.h" diff --git a/tensorflow/core/kernels/tridiagonal_solve_op.cc b/tensorflow/core/kernels/linalg/tridiagonal_solve_op.cc similarity index 99% rename from tensorflow/core/kernels/tridiagonal_solve_op.cc rename to tensorflow/core/kernels/linalg/tridiagonal_solve_op.cc index 88931ff3e66..8fe04125f9a 100644 --- a/tensorflow/core/kernels/tridiagonal_solve_op.cc +++ b/tensorflow/core/kernels/linalg/tridiagonal_solve_op.cc @@ -19,7 +19,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/core/kernels/tridiagonal_solve_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/tridiagonal_solve_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/tridiagonal_solve_op_gpu.cu.cc rename to tensorflow/core/kernels/linalg/tridiagonal_solve_op_gpu.cu.cc index 089fa8c040f..86514cfb033 100644 --- a/tensorflow/core/kernels/tridiagonal_solve_op_gpu.cu.cc +++ b/tensorflow/core/kernels/linalg/tridiagonal_solve_op_gpu.cu.cc @@ -23,11 +23,11 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" -#include "tensorflow/core/kernels/linalg_ops_common.h" +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #include "tensorflow/core/util/gpu_device_functions.h" #include "tensorflow/core/util/gpu_kernel_helper.h" #include "tensorflow/core/util/gpu_launch_config.h" diff --git a/tensorflow/core/kernels/linalg_ops_common.h b/tensorflow/core/kernels/linalg_ops_common.h index 65c2fb90f0e..0aa69801f19 100644 --- a/tensorflow/core/kernels/linalg_ops_common.h +++ b/tensorflow/core/kernels/linalg_ops_common.h @@ -12,211 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ - #ifndef TENSORFLOW_CORE_KERNELS_LINALG_OPS_COMMON_H_ #define TENSORFLOW_CORE_KERNELS_LINALG_OPS_COMMON_H_ -// Classes to support linear algebra functionality, similar to the numpy.linalg -// module. Supports batch computation on several matrices at once, sharding the -// computations across different threads if necessary. -#include - -#include "third_party/eigen3/Eigen/Core" -#include "tensorflow/core/framework/kernel_def_builder.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/framework/types.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/gtl/inlined_vector.h" -#include "tensorflow/core/platform/types.h" -#include "tensorflow/core/util/work_sharder.h" - -namespace tensorflow { - -// Base class for linear algebra operators. -template -class LinearAlgebraOp : public OpKernel { - public: - explicit LinearAlgebraOp(OpKernelConstruction* context) : OpKernel(context) {} - - void Compute(OpKernelContext* context) override; - - protected: - using TensorShapes = gtl::InlinedVector; - // Returns the number of leading inputs that are to be treated as matrix - // inputs. By default this is all the inputs. Derived classes can override - // this to tell the base class to ignore one or more trailing inputs. - virtual int NumMatrixInputs(const OpKernelContext* context) const { - return context->num_inputs(); - } - - // Returns true if the number of inputs and their shapes are as expected. - // Many ops take a single square input matrix, so we provide that as a default - // implementation for convenience. - virtual void ValidateInputMatrixShapes( - OpKernelContext* context, const TensorShapes& input_matrix_shapes) const { - ValidateSingleSquareMatrix(context, input_matrix_shapes); - } - - // Convenience validators for common cases: - // - // Validate op taking a single matrix A. - static void ValidateSingleMatrix(OpKernelContext* context, - const TensorShapes& input_matrix_shapes); - // Validate op taking a single square matrix A. - static void ValidateSingleSquareMatrix( - OpKernelContext* context, const TensorShapes& input_matrix_shapes); - // Validate op taking two matrices A and B that have the same number of rows. - static void ValidateSolver(OpKernelContext* context, - const TensorShapes& input_matrix_shapes); - // Validate op taking two matrices A and B that have the same number of rows - // and A is square. - static void ValidateSquareSolver(OpKernelContext* context, - const TensorShapes& input_matrix_shapes); - - // Returns the output shapes of each individual matrix operation. Output - // matrices shapes must be rank 0, 1, or 2. Scalar outputs are rank 0. - // - // The derived class may return a number of shapes (N) less than - // context->num_outputs() (M) to indicate that a only leading subset of - // the outputs will be populated. In this case, a dummy scalar tensor with - // value zero will be return for the last M-N outputs. - // - // For many ops, the output dimensions are the same as the input dimensions, - // so we provide that as a default implementation for convenience. - virtual TensorShapes GetOutputMatrixShapes( - const TensorShapes& input_matrix_shapes) const { - return input_matrix_shapes; - } - - // Returns the cost per matrix operation. This is used to determine the - // number of threads to use for parallelizing calls to ComputeMatrix in - // batch mode. Cost per unit is assumed to be roughly 1ns, based on comments - // in core/util/work_sharder.cc. Many linear algebra ops take roughly max(m,n) - // * min(m,n)^2, where the first input matrix is m-by-n. We provide that as a - // default implementation for convenience. - virtual int64 GetCostPerUnit(const TensorShapes& input_matrix_shapes) const { - double m = static_cast(input_matrix_shapes[0].dim_size(0)); - double n = static_cast(input_matrix_shapes[0].dim_size(1)); - double cost = std::max(m, n) * std::min(m, n) * std::min(m, n); - return cost >= static_cast(kint64max) ? kint64max - : static_cast(cost); - } - - // Returns true if it is safe to forward (alias) input to output buffer - // and expect the kernel to perform the computation inplace. - virtual bool EnableInputForwarding() const { return true; } - - using InputMatrix = Eigen::Matrix; - using InputConstMatrixMap = Eigen::Map; - using InputMatrixMap = Eigen::Map; - using InputConstVectorMap = - Eigen::Map>; - using InputConstMatrixMaps = gtl::InlinedVector; - using InputMatrixMaps = gtl::InlinedVector; - using InputRealScalar = typename Eigen::NumTraits::Real; - - using OutputMatrix = Eigen::Matrix; - using OutputConstMatrixMap = Eigen::Map; - using OutputMatrixMap = Eigen::Map; - using OutputConstVectorMap = - Eigen::Map>; - using OutputConstMatrixMaps = gtl::InlinedVector; - using OutputMatrixMaps = gtl::InlinedVector; - using OutputRealScalar = typename Eigen::NumTraits::Real; - - // backward compatibility - using Scalar = OutputScalar; - using Matrix = - Eigen::Matrix; - using ConstMatrixMap = Eigen::Map; - using MatrixMap = Eigen::Map; - using ConstVectorMap = - Eigen::Map>; - using ConstMatrixMaps = gtl::InlinedVector; - using MatrixMaps = gtl::InlinedVector; - using RealScalar = typename Eigen::NumTraits::Real; - - // Performs a single matrix computation given input matrices, and - // stores the result in outputs. For batch operations, this will be called - // repeatedly for a single call to Compute() when multiple matrices exist in - // input Tensors with rank > 2. In this case the calls to ComputeMatrix are - // parallelized. The number of threads used is determined by a cost model from - // the value returned by GetCostPerUnit(). - virtual void ComputeMatrix(OpKernelContext* context, - const InputConstMatrixMaps& inputs, - OutputMatrixMaps* outputs) = 0; - - private: - using TensorInputs = gtl::InlinedVector; - using TensorOutputs = gtl::InlinedVector; - // This function maps 2-d slices (matrices) of the input and output tensors - // using Eigen::Map and calls ComputeMatrix implemented in terms of the - // Eigen::MatrixBase API by the derived class. - // - // The 'matrix_index' parameter specifies the index of the matrix to be used - // from each input tensor, and the index of the matrix to be written to each - // output tensor. The input matrices are in row major order, and located at - // the memory addresses - // inputs[i].flat().data() + - // matrix_index * input_matrix_shapes[i].num_elements() - // for i in 0...inputs.size()-1. - // The output matrices are in row major order, and located at the memory - // address - // outputs[i]->flat().data() + - // matrix_index * output_matrix_shapes[i].num_elements(). - // for i in 0...outputs.size()-1. - // - void ComputeTensorSlice(OpKernelContext* context, int64 matrix_index, - const TensorInputs& inputs, - const TensorShapes& input_matrix_shapes, - const TensorOutputs& outputs, - const TensorShapes& output_matrix_shapes); - - void AnalyzeInputs(OpKernelContext* context, TensorInputs* inputs, - TensorShapes* input_matrix_shapes, - TensorShape* batch_shape); - - void PrepareOutputs(OpKernelContext* context, - const TensorShapes& input_matrix_shapes, - const TensorShape& batch_shape, TensorOutputs* outputs, - TensorShapes* output_matrix_shapes); -}; - -// Declare LinearAlgebraOp, which is explicitly instantiated in -// linalg_ops_common.cc for float, double, complex64, and complex128. -extern template class LinearAlgebraOp; -extern template class LinearAlgebraOp; -extern template class LinearAlgebraOp; -extern template class LinearAlgebraOp; - -} // namespace tensorflow - -#define INHERIT_LINALG_TYPEDEFS(Scalar) \ - typedef LinearAlgebraOp Base; \ - using RealScalar = typename Eigen::NumTraits::Real; \ - using Matrix = typename Base::Matrix; \ - using MatrixMap = typename Base::MatrixMap; \ - using MatrixMaps = typename Base::MatrixMaps; \ - using ConstMatrixMap = typename Base::ConstMatrixMap; \ - using ConstMatrixMaps = typename Base::ConstMatrixMaps; \ - using ConstVectorMap = typename Base::ConstVectorMap; \ - using TensorShapes = typename Base::TensorShapes; - -#define REGISTER_LINALG_OP_CPU(OpName, OpClass, Scalar) \ - REGISTER_KERNEL_BUILDER( \ - Name(OpName).Device(DEVICE_CPU).TypeConstraint("T"), OpClass) - -#define REGISTER_LINALG_OP_GPU(OpName, OpClass, Scalar) \ - REGISTER_KERNEL_BUILDER( \ - Name(OpName).Device(DEVICE_GPU).TypeConstraint("T"), OpClass) - -// Deprecated, use one of the device-specific macros above. -#define REGISTER_LINALG_OP(OpName, OpClass, Scalar) \ - REGISTER_LINALG_OP_CPU(OpName, OpClass, Scalar) +// Temporary forwarding header. +#include "tensorflow/core/kernels/linalg/linalg_ops_common.h" #endif // TENSORFLOW_CORE_KERNELS_LINALG_OPS_COMMON_H_ diff --git a/tensorflow/core/kernels/segment_reduction_ops_impl.h b/tensorflow/core/kernels/segment_reduction_ops_impl.h index 6c3fad668ae..7cf15ef5b72 100644 --- a/tensorflow/core/kernels/segment_reduction_ops_impl.h +++ b/tensorflow/core/kernels/segment_reduction_ops_impl.h @@ -45,13 +45,13 @@ limitations under the License. #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #if GOOGLE_CUDA -#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/util/cuda_solvers.h" #include "tensorflow/stream_executor/cuda/cuda_activation.h" using stream_executor::cuda::ScopedActivateExecutorContext; #elif TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/platform/rocm.h" +#include "tensorflow/core/util/cuda_solvers.h" using stream_executor::rocm::ScopedActivateExecutorContext; #endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/sparse/BUILD b/tensorflow/core/kernels/sparse/BUILD index 1d281bc1d61..bfb6c4934bb 100644 --- a/tensorflow/core/kernels/sparse/BUILD +++ b/tensorflow/core/kernels/sparse/BUILD @@ -80,8 +80,8 @@ tf_kernel_library( "//tensorflow/core/kernels:transpose_functor", "//tensorflow/core/kernels:gpu_prim_hdrs", ] + if_cuda_or_rocm([ - "//tensorflow/core/kernels:cuda_solvers", - "//tensorflow/core/kernels:cuda_sparse", + "//tensorflow/core/util:cuda_solvers", + "//tensorflow/core/util:cuda_sparse", ]), alwayslink = 1, ) diff --git a/tensorflow/core/kernels/sparse/add_op.cc b/tensorflow/core/kernels/sparse/add_op.cc index b6265a1412c..06fe1cd042e 100644 --- a/tensorflow/core/kernels/sparse/add_op.cc +++ b/tensorflow/core/kernels/sparse/add_op.cc @@ -32,8 +32,8 @@ limitations under the License. #include "tensorflow/core/kernels/fill_functor.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/conj_op.cc b/tensorflow/core/kernels/sparse/conj_op.cc index 7275262c1f0..147160fbe6c 100644 --- a/tensorflow/core/kernels/sparse/conj_op.cc +++ b/tensorflow/core/kernels/sparse/conj_op.cc @@ -32,8 +32,8 @@ limitations under the License. #include "tensorflow/core/kernels/sparse/sparse_matrix.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc index 364c2c07bd8..2e5afbdcad7 100644 --- a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc +++ b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_dense_op.cc @@ -34,8 +34,8 @@ limitations under the License. #include "tensorflow/core/util/work_sharder.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc index 55ebfa4fc10..a81ccfa562e 100644 --- a/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc +++ b/tensorflow/core/kernels/sparse/csr_sparse_matrix_to_sparse_tensor_op.cc @@ -32,8 +32,8 @@ limitations under the License. #include "tensorflow/core/util/work_sharder.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc b/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc index 459bb219343..5c62a44f9ba 100644 --- a/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc +++ b/tensorflow/core/kernels/sparse/dense_to_csr_sparse_matrix_op.cc @@ -35,8 +35,8 @@ limitations under the License. #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif #if GOOGLE_CUDA diff --git a/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc b/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc index 1c014db3d0a..6b11e64307a 100644 --- a/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc +++ b/tensorflow/core/kernels/sparse/kernels_gpu.cu.cc @@ -20,13 +20,13 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/kernels/cuda_sparse.h" #include "tensorflow/core/kernels/gpu_device_array.h" #include "tensorflow/core/kernels/gpu_device_array_gpu.h" #include "tensorflow/core/kernels/gpu_prim.h" #include "tensorflow/core/kernels/sparse/kernels.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_sparse.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/mat_mul_op.cc b/tensorflow/core/kernels/sparse/mat_mul_op.cc index 50fa0ec88ea..bf9de570fbf 100644 --- a/tensorflow/core/kernels/sparse/mat_mul_op.cc +++ b/tensorflow/core/kernels/sparse/mat_mul_op.cc @@ -37,8 +37,8 @@ limitations under the License. #include "tensorflow/core/platform/threadpool.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/mul_op.cc b/tensorflow/core/kernels/sparse/mul_op.cc index 33c3756ce58..d08f1568db1 100644 --- a/tensorflow/core/kernels/sparse/mul_op.cc +++ b/tensorflow/core/kernels/sparse/mul_op.cc @@ -29,7 +29,7 @@ limitations under the License. #include "tensorflow/core/kernels/sparse/sparse_matrix.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/nnz_op.cc b/tensorflow/core/kernels/sparse/nnz_op.cc index ebc48c3e9a4..d67620443f0 100644 --- a/tensorflow/core/kernels/sparse/nnz_op.cc +++ b/tensorflow/core/kernels/sparse/nnz_op.cc @@ -29,8 +29,8 @@ limitations under the License. #include "tensorflow/core/kernels/sparse/sparse_matrix.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/softmax_op.cc b/tensorflow/core/kernels/sparse/softmax_op.cc index 25025bfe2a6..f1a5db8d0f0 100644 --- a/tensorflow/core/kernels/sparse/softmax_op.cc +++ b/tensorflow/core/kernels/sparse/softmax_op.cc @@ -20,7 +20,7 @@ limitations under the License. #define EIGEN_USE_THREADS #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_sparse.h" #define EIGEN_USE_GPU #endif diff --git a/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc b/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc index fb652e13d15..fecee9e4555 100644 --- a/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc +++ b/tensorflow/core/kernels/sparse/sparse_mat_mul_op.cc @@ -36,8 +36,8 @@ limitations under the License. #include "tensorflow/core/util/work_sharder.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc b/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc index 59540f63846..2eaf9bd5310 100644 --- a/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc +++ b/tensorflow/core/kernels/sparse/sparse_matrix_components_op.cc @@ -30,8 +30,8 @@ limitations under the License. #include "tensorflow/core/kernels/sparse/sparse_matrix.h" #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc b/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc index e1a4b4194d2..2548ceaa57c 100644 --- a/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc +++ b/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc @@ -33,8 +33,8 @@ limitations under the License. #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" #endif #if GOOGLE_CUDA diff --git a/tensorflow/core/kernels/sparse/transpose_op.cc b/tensorflow/core/kernels/sparse/transpose_op.cc index 3158eb5016d..08d37fa1692 100644 --- a/tensorflow/core/kernels/sparse/transpose_op.cc +++ b/tensorflow/core/kernels/sparse/transpose_op.cc @@ -20,7 +20,7 @@ limitations under the License. #define EIGEN_USE_THREADS #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_sparse.h" #define EIGEN_USE_GPU #endif diff --git a/tensorflow/core/kernels/where_op.cc b/tensorflow/core/kernels/where_op.cc index 598cb526d77..d504ec9b2ed 100644 --- a/tensorflow/core/kernels/where_op.cc +++ b/tensorflow/core/kernels/where_op.cc @@ -39,7 +39,7 @@ limitations under the License. #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" -#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/util/cuda_solvers.h" #if GOOGLE_CUDA #include "tensorflow/stream_executor/cuda/cuda_activation.h" using stream_executor::cuda::ScopedActivateExecutorContext; diff --git a/tensorflow/core/util/BUILD b/tensorflow/core/util/BUILD index bb2b9ff429e..dcb2787e309 100644 --- a/tensorflow/core/util/BUILD +++ b/tensorflow/core/util/BUILD @@ -14,6 +14,7 @@ load( "tf_copts", "tf_cuda_library", "tf_cuda_only_cc_test", + "tf_kernel_library", ) load("//tensorflow:tensorflow.bzl", "tf_version_info_genrule") load( @@ -24,6 +25,11 @@ load( "//tensorflow/core/platform:build_config_root.bzl", "if_static", ) +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") +load( + "@local_config_rocm//rocm:build_defs.bzl", + "if_rocm", +) default_package_visibility = [ "//tensorflow/core:__subpackages__", @@ -567,6 +573,63 @@ cc_library( ], ) +tf_kernel_library( + name = "cuda_solvers", + srcs = ["cuda_solvers.cc"], + hdrs = ["cuda_solvers.h"], + # @local_config_cuda//cuda:cusolver_static, //third_party/eigen3:blas, + # and //third_party/libf2c all contain various parts of BLAS, LAPACK, + # and f2c helper functions in global namespace. Tell the compiler to + # allow multiple definitions when linking this. + linkopts = select({ + "//tensorflow:macos": [], + "//tensorflow:windows": [], + "//conditions:default": ["-Wl,-z,muldefs"], + }), + visibility = ["//tensorflow/core/kernels:friends"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/platform/default/build_config:cublas_plugin", + "//tensorflow/stream_executor/cuda:cublas_lib", + "//tensorflow/stream_executor/cuda:cusolver_lib", + ], +) + +tf_kernel_library( + name = "rocm_solvers", + srcs = ["rocm_solvers.cc"], + hdrs = ["rocm_solvers.h"], + visibility = ["//tensorflow/core/kernels:friends"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/stream_executor/lib", + "//tensorflow/stream_executor/platform:dso_loader", + "//tensorflow/stream_executor/rocm:rocblas_plugin", + "//tensorflow/stream_executor/rocm:rocm_gpu_executor", + ] + if_rocm([ + "@local_config_rocm//rocm:rocprim", + ]), +) + +tf_kernel_library( + name = "cuda_sparse", + srcs = if_cuda(["cuda_sparse.cc"]) + if_rocm(["rocm_sparse.cc"]), + hdrs = ["cuda_sparse.h"], + deps = [ + ":cuda_solvers", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ] + if_cuda([ + "//tensorflow/stream_executor/cuda:cusparse_lib", + "@cub_archive//:cub", + ]) + if_rocm([ + "@local_config_rocm//rocm:hipsparse", + ]), +) + # Tests. tf_cc_test( diff --git a/tensorflow/core/kernels/cuda_solvers.cc b/tensorflow/core/util/cuda_solvers.cc similarity index 99% rename from tensorflow/core/kernels/cuda_solvers.cc rename to tensorflow/core/util/cuda_solvers.cc index f41ce2a5d27..3e4d2a05ac6 100644 --- a/tensorflow/core/kernels/cuda_solvers.cc +++ b/tensorflow/core/util/cuda_solvers.cc @@ -14,7 +14,7 @@ ============================================================================== */ #ifdef GOOGLE_CUDA -#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/util/cuda_solvers.h" #include #include diff --git a/tensorflow/core/kernels/cuda_solvers.h b/tensorflow/core/util/cuda_solvers.h similarity index 99% rename from tensorflow/core/kernels/cuda_solvers.h rename to tensorflow/core/util/cuda_solvers.h index eb1d5c8a200..79f45c9b0ea 100644 --- a/tensorflow/core/kernels/cuda_solvers.h +++ b/tensorflow/core/util/cuda_solvers.h @@ -14,8 +14,8 @@ limitations under the License. ============================================================================== */ -#ifndef TENSORFLOW_CORE_KERNELS_CUDA_SOLVERS_H_ -#define TENSORFLOW_CORE_KERNELS_CUDA_SOLVERS_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SOLVERS_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SOLVERS_H_ // This header declares the class CudaSolver, which contains wrappers of linear // algebra solvers in the cuBlas and cuSolverDN libraries for use in TensorFlow @@ -435,7 +435,7 @@ class HostLapackInfo : public ScratchSpace { public: HostLapackInfo(OpKernelContext* context, int64 size, const std::string& debug_info) - : ScratchSpace(context, size, debug_info, /* on_host */ true){}; + : ScratchSpace(context, size, debug_info, /* on_host */ true) {} }; class DeviceLapackInfo : public ScratchSpace { @@ -489,4 +489,4 @@ inline DeviceLapackInfo CudaSolver::GetDeviceLapackInfo( #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#endif // TENSORFLOW_CORE_KERNELS_CUDA_SOLVERS_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SOLVERS_H_ diff --git a/tensorflow/core/kernels/cuda_sparse.cc b/tensorflow/core/util/cuda_sparse.cc similarity index 99% rename from tensorflow/core/kernels/cuda_sparse.cc rename to tensorflow/core/util/cuda_sparse.cc index 141aae61571..47e018560e1 100644 --- a/tensorflow/core/kernels/cuda_sparse.cc +++ b/tensorflow/core/util/cuda_sparse.cc @@ -15,7 +15,7 @@ limitations under the License. #ifdef GOOGLE_CUDA -#include "tensorflow/core/kernels/cuda_sparse.h" +#include "tensorflow/core/util/cuda_sparse.h" #include #include @@ -28,7 +28,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/cuda_solvers.h" #include "tensorflow/core/lib/core/blocking_counter.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/stringpiece.h" @@ -38,6 +37,7 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_solvers.h" // TODO(rmlarsen,penporn): Investigate using newer kernels in CUDA 10.1+. diff --git a/tensorflow/core/kernels/cuda_sparse.h b/tensorflow/core/util/cuda_sparse.h similarity index 93% rename from tensorflow/core/kernels/cuda_sparse.h rename to tensorflow/core/util/cuda_sparse.h index 978bc9005ed..76580766d69 100644 --- a/tensorflow/core/kernels/cuda_sparse.h +++ b/tensorflow/core/util/cuda_sparse.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_CUDA_SPARSE_H_ -#define TENSORFLOW_CORE_KERNELS_CUDA_SPARSE_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SPARSE_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SPARSE_H_ // This header declares the class GpuSparse, which contains wrappers of // cuSparse libraries for use in TensorFlow kernels. @@ -75,8 +75,7 @@ using gpuStream_t = hipStream_t; namespace tensorflow { -inline std::string ConvertGPUSparseErrorToString( - const gpusparseStatus_t status) { +inline string ConvertGPUSparseErrorToString(const gpusparseStatus_t status) { switch (status) { #define STRINGIZE(q) #q #define RETURN_IF_STATUS(err) \ @@ -206,49 +205,49 @@ class GpuSparse { // Solves tridiagonal system of equations. // See: https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2 template - Status Gtsv2(int m, int n, const Scalar *dl, const Scalar *d, - const Scalar *du, Scalar *B, int ldb, void *pBuffer) const; + Status Gtsv2(int m, int n, const Scalar* dl, const Scalar* d, + const Scalar* du, Scalar* B, int ldb, void* pBuffer) const; // Computes the size of a temporary buffer used by Gtsv2. // See: https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2_bufferSize template - Status Gtsv2BufferSizeExt(int m, int n, const Scalar *dl, const Scalar *d, - const Scalar *du, const Scalar *B, int ldb, - size_t *bufferSizeInBytes) const; + Status Gtsv2BufferSizeExt(int m, int n, const Scalar* dl, const Scalar* d, + const Scalar* du, const Scalar* B, int ldb, + size_t* bufferSizeInBytes) const; // Solves tridiagonal system of equations without partial pivoting. // See: https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2_nopivot template - Status Gtsv2NoPivot(int m, int n, const Scalar *dl, const Scalar *d, - const Scalar *du, Scalar *B, int ldb, - void *pBuffer) const; + Status Gtsv2NoPivot(int m, int n, const Scalar* dl, const Scalar* d, + const Scalar* du, Scalar* B, int ldb, + void* pBuffer) const; // Computes the size of a temporary buffer used by Gtsv2NoPivot. // See: // https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2_nopivot_bufferSize template - Status Gtsv2NoPivotBufferSizeExt(int m, int n, const Scalar *dl, - const Scalar *d, const Scalar *du, - const Scalar *B, int ldb, - size_t *bufferSizeInBytes) const; + Status Gtsv2NoPivotBufferSizeExt(int m, int n, const Scalar* dl, + const Scalar* d, const Scalar* du, + const Scalar* B, int ldb, + size_t* bufferSizeInBytes) const; // Solves a batch of tridiagonal systems of equations. Doesn't support // multiple right-hand sides per each system. Doesn't do pivoting. // See: https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2stridedbatch template - Status Gtsv2StridedBatch(int m, const Scalar *dl, const Scalar *d, - const Scalar *du, Scalar *x, int batchCount, - int batchStride, void *pBuffer) const; + Status Gtsv2StridedBatch(int m, const Scalar* dl, const Scalar* d, + const Scalar* du, Scalar* x, int batchCount, + int batchStride, void* pBuffer) const; // Computes the size of a temporary buffer used by Gtsv2StridedBatch. // See: // https://docs.nvidia.com/cuda/cusparse/index.html#gtsv2stridedbatch_bufferSize template - Status Gtsv2StridedBatchBufferSizeExt(int m, const Scalar *dl, - const Scalar *d, const Scalar *du, - const Scalar *x, int batchCount, + Status Gtsv2StridedBatchBufferSizeExt(int m, const Scalar* dl, + const Scalar* d, const Scalar* du, + const Scalar* x, int batchCount, int batchStride, - size_t *bufferSizeInBytes) const; + size_t* bufferSizeInBytes) const; // Compresses the indices of rows or columns. It can be interpreted as a // conversion from COO to CSR sparse storage format. See: @@ -449,7 +448,7 @@ class GpuSparse { private: bool initialized_; - OpKernelContext *context_; // not owned. + OpKernelContext* context_; // not owned. gpuStream_t gpu_stream_; gpusparseHandle_t* gpusparse_handle_; // not owned. @@ -585,4 +584,4 @@ class GpuSparseCsrSortingConversionInfo { #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#endif // TENSORFLOW_CORE_KERNELS_CUDA_SPARSE_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_CUDA_SPARSE_H_ diff --git a/tensorflow/core/kernels/rocm_solvers.cc b/tensorflow/core/util/rocm_solvers.cc similarity index 99% rename from tensorflow/core/kernels/rocm_solvers.cc rename to tensorflow/core/util/rocm_solvers.cc index 5faf718332e..13dadf602a7 100644 --- a/tensorflow/core/kernels/rocm_solvers.cc +++ b/tensorflow/core/util/rocm_solvers.cc @@ -14,7 +14,7 @@ ============================================================================== */ #if TENSORFLOW_USE_ROCM -#include "tensorflow/core/kernels/rocm_solvers.h" +#include "tensorflow/core/util/rocm_solvers.h" #include #include diff --git a/tensorflow/core/kernels/rocm_solvers.h b/tensorflow/core/util/rocm_solvers.h similarity index 96% rename from tensorflow/core/kernels/rocm_solvers.h rename to tensorflow/core/util/rocm_solvers.h index 94d3c82a497..afc8b936d05 100644 --- a/tensorflow/core/kernels/rocm_solvers.h +++ b/tensorflow/core/util/rocm_solvers.h @@ -14,8 +14,8 @@ limitations under the License. ============================================================================== */ -#ifndef TENSORFLOW_CORE_KERNELS_ROCM_SOLVERS_H_ -#define TENSORFLOW_CORE_KERNELS_ROCM_SOLVERS_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LINALG_ROCM_SOLVERS_H_ +#define TENSORFLOW_CORE_KERNELS_LINALG_ROCM_SOLVERS_H_ // This header declares the class ROCmSolver, which contains wrappers of linear // algebra solvers in the cuBlas and cuSolverDN libraries for use in TensorFlow @@ -158,4 +158,4 @@ class ScratchSpace { #endif // TENSORFLOW_USE_ROCM -#endif // TENSORFLOW_CORE_KERNELS_ROCM_SOLVERS_H_ +#endif // TENSORFLOW_CORE_KERNELS_LINALG_ROCM_SOLVERS_H_ diff --git a/tensorflow/core/kernels/rocm_sparse.cc b/tensorflow/core/util/rocm_sparse.cc similarity index 99% rename from tensorflow/core/kernels/rocm_sparse.cc rename to tensorflow/core/util/rocm_sparse.cc index 97488692bc1..cc7b56fdc01 100644 --- a/tensorflow/core/kernels/rocm_sparse.cc +++ b/tensorflow/core/util/rocm_sparse.cc @@ -24,8 +24,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/cuda_solvers.h" -#include "tensorflow/core/kernels/cuda_sparse.h" #include "tensorflow/core/lib/core/blocking_counter.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/stringpiece.h" @@ -35,6 +33,8 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_solvers.h" +#include "tensorflow/core/util/cuda_sparse.h" namespace tensorflow { namespace { From 3f45c33ba54968010ef551f448a843ab2f1427fd Mon Sep 17 00:00:00 2001 From: Katherine Wu Date: Tue, 4 Aug 2020 17:20:06 -0700 Subject: [PATCH 2125/2522] Remove tracking of inbound nodes/outbound nodes. When building a Sequential model, clear_previously_created_nodes accidentally added tracking to the nodes attributes, which causes unnecessary warnings when loading a checkpoint. PiperOrigin-RevId: 324923805 Change-Id: I7ee4457b70b16bb1a3b410f41327bba269a128e5 --- tensorflow/python/keras/engine/base_layer.py | 22 +++++++++++++++++-- .../python/keras/engine/base_layer_v1.py | 22 +++++++++++++++++-- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py index 373e17a4004..c01c3d96aec 100644 --- a/tensorflow/python/keras/engine/base_layer.py +++ b/tensorflow/python/keras/engine/base_layer.py @@ -382,8 +382,8 @@ class Layer(module.Module, version_utils.LayerVersionSelector): # These lists will be filled via successive calls # to self._add_inbound_node(). # Used in symbolic mode only, only in conjunction with graph-networks - self._inbound_nodes = [] - self._outbound_nodes = [] + self._inbound_nodes_value = [] + self._outbound_nodes_value = [] self._init_call_fn_args() @@ -2268,6 +2268,24 @@ class Layer(module.Module, version_utils.LayerVersionSelector): # Methods & attributes below are all private and only used by the framework. # ############################################################################## + @property + def _inbound_nodes(self): + return self._inbound_nodes_value + + @_inbound_nodes.setter + @trackable.no_automatic_dependency_tracking + def _inbound_nodes(self, value): + self._inbound_nodes_value = value + + @property + def _outbound_nodes(self): + return self._outbound_nodes_value + + @_outbound_nodes.setter + @trackable.no_automatic_dependency_tracking + def _outbound_nodes(self, value): + self._outbound_nodes_value = value + def _set_dtype_policy(self, dtype): """Sets self._dtype_policy.""" if isinstance(dtype, policy.Policy): diff --git a/tensorflow/python/keras/engine/base_layer_v1.py b/tensorflow/python/keras/engine/base_layer_v1.py index e9ebc170b96..85d390f2360 100644 --- a/tensorflow/python/keras/engine/base_layer_v1.py +++ b/tensorflow/python/keras/engine/base_layer_v1.py @@ -217,8 +217,8 @@ class Layer(base_layer.Layer): # These lists will be filled via successive calls # to self._add_inbound_node(). # Used in symbolic mode only, only in conjunction with graph-networks - self._inbound_nodes = [] - self._outbound_nodes = [] + self._inbound_nodes_value = [] + self._outbound_nodes_value = [] self._init_call_fn_args() @@ -1740,6 +1740,24 @@ class Layer(base_layer.Layer): # Methods & attributes below are all private and only used by the framework. # ############################################################################## + @property + def _inbound_nodes(self): + return self._inbound_nodes_value + + @_inbound_nodes.setter + @trackable.no_automatic_dependency_tracking + def _inbound_nodes(self, value): + self._inbound_nodes_value = value + + @property + def _outbound_nodes(self): + return self._outbound_nodes_value + + @_outbound_nodes.setter + @trackable.no_automatic_dependency_tracking + def _outbound_nodes(self, value): + self._outbound_nodes_value = value + def _set_dtype_policy(self, dtype): """Sets self._dtype_policy.""" if isinstance(dtype, policy.Policy): From 92d021c1ae9bbcd500d13f24cd7c3c399ab82e6f Mon Sep 17 00:00:00 2001 From: ShengYang1 Date: Wed, 5 Aug 2020 08:28:55 +0800 Subject: [PATCH 2126/2522] Fix some error --- .../core/grappler/optimizers/remapper_test.cc | 79 +++++++++---------- 1 file changed, 38 insertions(+), 41 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/remapper_test.cc b/tensorflow/core/grappler/optimizers/remapper_test.cc index 417ecd6dd44..7b99c038c53 100644 --- a/tensorflow/core/grappler/optimizers/remapper_test.cc +++ b/tensorflow/core/grappler/optimizers/remapper_test.cc @@ -930,48 +930,45 @@ class FusedCmpAndCastTest : public GrapplerTest { template void TestFusedCmpAndCast() { using ::tensorflow::ops::Placeholder; - for (bool is_training : {true, false}) { - tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - const int num_channels = 24; - TensorShape channel_shape({num_channels}); - TensorShape empty_shape({0}); - auto x = Placeholder(s.WithOpName("x"), TYPE, - ops::Placeholder::Shape({2, 8, 8, num_channels})); - auto y = Placeholder(s.WithOpName("y"), TYPE, - ops::Placeholder::Shape({2, 8, 8, num_channels})); - float epsilon = 0.1f; - auto comparator = ops::Equal(s.WithOpName("Equal"), x, y); - auto cast = ops::Cast(s.WithOpName("cast"), comparator.z, TYPE); - auto fetch = ops::Identity(s.WithOpName("fetch"), cast); - auto input1_t = GenerateRandomTensor({2, 8, 8, num_channels}); - auto input2_t = GenerateRandomTensor({2, 8, 8, num_channels}); - GrapplerItem item; - item.fetch = {"fetch"}; - item.feed = {{"x", input1_t}, {"y", input2_t}}; - TF_ASSERT_OK(s.ToGraphDef(&item.graph)); - for (int i = 0; i < item.graph.node_size(); ++i) { - item.graph.mutable_node(i)->set_device("/device:CPU:0"); - } - Remapper optimizer(RewriterConfig::AGGRESSIVE); - GraphDef output; - TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); - int found = 0; - for (const NodeDef& node : output.node()) { - if (node.name() == "cast") { - EXPECT_EQ(node.op(), "_EqualWithCast"); - ASSERT_EQ(node.input_size(), 2); - EXPECT_EQ(node.input(0), "x"); - EXPECT_EQ(node.input(1), "y"); - found++; - } - } - EXPECT_EQ(found, 1); - auto tensors_expected = EvaluateNodes(item.graph, item.fetch, item.feed); - ASSERT_EQ(tensors_expected.size(), 1); - auto tensors = EvaluateNodes(output, item.fetch, item.feed); - ASSERT_EQ(tensors.size(), 1); - test::ExpectClose(tensors[0], tensors_expected[0], 1e-2, 1e-2); + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + const int num_channels = 24; + TensorShape channel_shape({num_channels}); + TensorShape empty_shape({0}); + auto x = Placeholder(s.WithOpName("x"), TYPE, + ops::Placeholder::Shape({2, 8, 8, num_channels})); + auto y = Placeholder(s.WithOpName("y"), TYPE, + ops::Placeholder::Shape({2, 8, 8, num_channels})); + auto comparator = ops::Equal(s.WithOpName("Equal"), x, y); + auto cast = ops::Cast(s.WithOpName("cast"), comparator.z, TYPE); + auto fetch = ops::Identity(s.WithOpName("fetch"), cast); + auto input1_t = GenerateRandomTensor({2, 8, 8, num_channels}); + auto input2_t = GenerateRandomTensor({2, 8, 8, num_channels}); + GrapplerItem item; + item.fetch = {"fetch"}; + item.feed = {{"x", input1_t}, {"y", input2_t}}; + TF_ASSERT_OK(s.ToGraphDef(&item.graph)); + for (int i = 0; i < item.graph.node_size(); ++i) { + item.graph.mutable_node(i)->set_device("/device:CPU:0"); } + Remapper optimizer(RewriterConfig::AGGRESSIVE); + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); + int found = 0; + for (const NodeDef& node : output.node()) { + if (node.name() == "cast") { + EXPECT_EQ(node.op(), "_EqualWithCast"); + ASSERT_EQ(node.input_size(), 2); + EXPECT_EQ(node.input(0), "x"); + EXPECT_EQ(node.input(1), "y"); + found++; + } + } + EXPECT_EQ(found, 1); + auto tensors_expected = EvaluateNodes(item.graph, item.fetch, item.feed); + ASSERT_EQ(tensors_expected.size(), 1); + auto tensors = EvaluateNodes(output, item.fetch, item.feed); + ASSERT_EQ(tensors.size(), 1); + test::ExpectClose(tensors[0], tensors_expected[0], 1e-2, 1e-2); } }; From f9bb6295251bdd44b1d533c2813b6997c829f591 Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Tue, 4 Aug 2020 17:46:36 -0700 Subject: [PATCH 2127/2522] [tf.data service] Use the journal to keep track of registered workers. As part of this change, we stop using integer worker ids, and instead use workers addresses as their identifiers. PiperOrigin-RevId: 324927652 Change-Id: If6ef5a08aac6bf32cc603108f9045887619488f1 --- tensorflow/core/data/service/common.proto | 9 ++ tensorflow/core/data/service/dispatcher.proto | 12 +-- .../core/data/service/dispatcher_impl.cc | 85 ++++++++++++------- .../core/data/service/dispatcher_impl.h | 23 ++--- .../core/data/service/dispatcher_state.cc | 31 +++++++ .../core/data/service/dispatcher_state.h | 36 +++++--- .../data/service/dispatcher_state_test.cc | 78 ++++++++++++++--- tensorflow/core/data/service/journal.cc | 2 +- tensorflow/core/data/service/journal.h | 4 +- tensorflow/core/data/service/journal.proto | 5 ++ tensorflow/core/data/service/worker_impl.cc | 3 - .../experimental/data_service_dataset_op.cc | 7 +- .../data/experimental/service_config.proto | 3 + 13 files changed, 206 insertions(+), 92 deletions(-) diff --git a/tensorflow/core/data/service/common.proto b/tensorflow/core/data/service/common.proto index 6d5398d9cd9..aeeb1371171 100644 --- a/tensorflow/core/data/service/common.proto +++ b/tensorflow/core/data/service/common.proto @@ -19,6 +19,15 @@ message TaskDef { int64 job_id = 4; } +message TaskInfo { + // The address of the worker processing the task. + string worker_address = 1; + // The task id. + int64 task_id = 2; + // The id of the job that the task is part of. + int64 job_id = 3; +} + enum ProcessingModeDef { // Each tf.data worker processes an entire epoch. PARALLEL_EPOCHS = 0; diff --git a/tensorflow/core/data/service/dispatcher.proto b/tensorflow/core/data/service/dispatcher.proto index 2a2d48ab93d..057fc58de52 100644 --- a/tensorflow/core/data/service/dispatcher.proto +++ b/tensorflow/core/data/service/dispatcher.proto @@ -10,8 +10,6 @@ message RegisterWorkerRequest { } message RegisterWorkerResponse { - // An id for the worker. - int64 worker_id = 1; // Tasks to begin processing. repeated TaskDef tasks = 2; } @@ -24,8 +22,7 @@ message TaskProgress { } message WorkerUpdateRequest { - // The worker id that the update is for. - int64 worker_id = 1; + string worker_address = 1; repeated TaskProgress updates = 2; } @@ -75,13 +72,6 @@ message GetTasksRequest { int64 job_id = 1; } -message TaskInfo { - // The address of the worker processing the task. - string worker_address = 1; - // The task id. - int64 id = 2; -} - message GetTasksResponse { // A list of all tasks for a job. repeated TaskInfo task_info = 1; diff --git a/tensorflow/core/data/service/dispatcher_impl.cc b/tensorflow/core/data/service/dispatcher_impl.cc index 77477df71e4..ffeae96c117 100644 --- a/tensorflow/core/data/service/dispatcher_impl.cc +++ b/tensorflow/core/data/service/dispatcher_impl.cc @@ -46,6 +46,7 @@ namespace { constexpr char kJournalDir[] = "journal"; using Dataset = DispatcherState::Dataset; +using Worker = DispatcherState::Worker; using NamedJobKey = DispatcherState::NamedJobKey; using Job = DispatcherState::Job; using Task = DispatcherState::Task; @@ -77,10 +78,16 @@ DataServiceDispatcherImpl::DataServiceDispatcherImpl( } Status DataServiceDispatcherImpl::Start() { - if (config_.work_dir().empty()) { + if (!config_.fault_tolerant_mode()) { + LOG(INFO) << "Running with fault_tolerant_mode=False. The dispatcher will " + "not be able to recover its state on restart."; return Status::OK(); } mutex_lock l(mu_); + if (config_.work_dir().empty()) { + return errors::InvalidArgument( + "fault_tolerant_mode is True, but no work_dir is configured."); + } Update update; bool end_of_journal = false; FileJournalReader reader(Env::Default(), JournalDir(config_.work_dir())); @@ -104,12 +111,16 @@ Status DataServiceDispatcherImpl::RegisterWorker( VLOG(3) << "Received register worker request"; mutex_lock l(mu_); std::string worker_address = request->worker_address(); - if (!workers_.contains(worker_address)) { - workers_[worker_address] = - std::make_shared(next_worker_id_++, worker_address); + std::shared_ptr worker; + Status s = state_.WorkerFromAddress(worker_address, &worker); + if (errors::IsNotFound(s)) { + Update update; + update.mutable_register_worker()->set_worker_address(worker_address); + TF_RETURN_IF_ERROR(Apply(update)); + } else if (!s.ok()) { + return s; } - int64 worker_id = workers_[worker_address]->worker_id; - response->set_worker_id(worker_id); + std::vector> jobs = state_.ListJobs(); // Allocate tasks to the worker. for (const auto& job : jobs) { @@ -127,8 +138,7 @@ Status DataServiceDispatcherImpl::RegisterWorker( task_def->set_task_id(task->task_id); } - VLOG(1) << "Registered worker at address " << request->worker_address() - << " with id " << worker_id; + VLOG(1) << "Registered worker at address " << request->worker_address(); return Status::OK(); } @@ -146,8 +156,7 @@ Status DataServiceDispatcherImpl::WorkerUpdate( continue; } Update update; - FinishTaskUpdate* finish_task = update.mutable_finish_task(); - finish_task->set_task_id(task_id); + update.mutable_finish_task()->set_task_id(task_id); TF_RETURN_IF_ERROR(Apply(update)); VLOG(3) << "Task " << task_id << " from job " << task->job_id << " completed"; @@ -310,10 +319,10 @@ Status DataServiceDispatcherImpl::CreateTasksForJob( std::shared_ptr job, std::vector>* tasks) EXCLUSIVE_LOCKS_REQUIRED(mu_) { + std::vector> workers = state_.ListWorkers(); tasks->clear(); - tasks->reserve(workers_.size()); - for (const auto& it : workers_) { - std::shared_ptr worker = it.second; + tasks->reserve(workers.size()); + for (const auto& worker : workers) { std::shared_ptr task; TF_RETURN_IF_ERROR(CreateTask(job, worker->address, &task)); tasks->push_back(task); @@ -345,10 +354,28 @@ Status DataServiceDispatcherImpl::AssignTasks( return Status::OK(); } -Status DataServiceDispatcherImpl::EnsureWorkerStubInitialized(Worker* worker) { - if (!worker->stub) { - TF_RETURN_IF_ERROR( - CreateWorkerStub(worker->address, config_.protocol(), &worker->stub)); +Status DataServiceDispatcherImpl::GetOrCreateWorkerStub( + const std::string& worker_address, WorkerService::Stub** out_stub) + LOCKS_EXCLUDED(mu_) { + { + mutex_lock l(mu_); + auto it = worker_stubs_.find(worker_address); + if (it != worker_stubs_.end()) { + *out_stub = it->second.get(); + return Status::OK(); + } + } + std::unique_ptr stub; + TF_RETURN_IF_ERROR( + CreateWorkerStub(worker_address, config_.protocol(), &stub)); + { + mutex_lock l(mu_); + // A concurrent call could have already created the stub. + auto& worker = worker_stubs_[worker_address]; + if (worker == nullptr) { + worker = std::move(stub); + } + *out_stub = worker.get(); } return Status::OK(); } @@ -359,25 +386,21 @@ Status DataServiceDispatcherImpl::AssignTask(std::shared_ptr task) ProcessTaskRequest req; TaskDef* task_def = req.mutable_task(); task_def->set_dataset_id(task->dataset_id); - std::shared_ptr worker; { mutex_lock l(mu_); - worker = workers_[task->worker_address]; std::shared_ptr dataset; TF_RETURN_IF_ERROR(state_.DatasetFromId(task->dataset_id, &dataset)); *task_def->mutable_dataset() = dataset->dataset_def; } - if (!worker) { - return errors::NotFound("No worker found for address ", - task->worker_address); - } task_def->set_task_id(task->task_id); ProcessTaskResponse resp; - TF_RETURN_IF_ERROR(EnsureWorkerStubInitialized(worker.get())); - grpc::Status s = worker->stub->ProcessTask(&client_ctx, req, &resp); + WorkerService::Stub* stub; + TF_RETURN_IF_ERROR(GetOrCreateWorkerStub(task->worker_address, &stub)); + grpc::Status s = stub->ProcessTask(&client_ctx, req, &resp); if (!s.ok()) { return grpc_util::WrapError( - absl::StrCat("Failed to submit task to worker ", worker->address), s); + absl::StrCat("Failed to submit task to worker ", task->worker_address), + s); } return Status::OK(); } @@ -391,7 +414,8 @@ Status DataServiceDispatcherImpl::GetTasks(const GetTasksRequest* request, for (const auto& task : tasks) { TaskInfo* task_info = response->mutable_task_info()->Add(); task_info->set_worker_address(task->worker_address); - task_info->set_id(task->task_id); + task_info->set_task_id(task->task_id); + task_info->set_job_id(task->job_id); } std::shared_ptr job; TF_RETURN_IF_ERROR(state_.JobFromId(request->job_id(), &job)); @@ -405,13 +429,12 @@ Status DataServiceDispatcherImpl::GetWorkers(const GetWorkersRequest* request, GetWorkersResponse* response) { mutex_lock l(mu_); VLOG(3) << "Enter GetWorkers"; - for (const auto& it : workers_) { - std::shared_ptr worker = it.second; + std::vector> workers = state_.ListWorkers(); + for (const auto& worker : workers) { WorkerInfo* info = response->add_workers(); info->set_address(worker->address); - info->set_id(worker->worker_id); } - VLOG(3) << "Returning list of " << workers_.size() + VLOG(3) << "Returning list of " << response->workers_size() << " workers from GetWorkers"; return Status::OK(); } diff --git a/tensorflow/core/data/service/dispatcher_impl.h b/tensorflow/core/data/service/dispatcher_impl.h index 6fa1815e9eb..f4cc6954fe8 100644 --- a/tensorflow/core/data/service/dispatcher_impl.h +++ b/tensorflow/core/data/service/dispatcher_impl.h @@ -71,21 +71,15 @@ class DataServiceDispatcherImpl { GetWorkersResponse* response); private: - struct Worker { - Worker(int64 worker_id, const std::string& address) - : worker_id(worker_id), address(address) {} - - const int64 worker_id; - const std::string address; - std::unique_ptr stub; - }; - // Registers a dataset with the given fingerprint, storing the new dataset's // id in `*dataset-id`. Status RegisterDataset(uint64 fingerprint, const DatasetDef& dataset, int64* dataset_id) EXCLUSIVE_LOCKS_REQUIRED(mu_); - // Initializes a workers stub, if it hasn't been initialized already. - Status EnsureWorkerStubInitialized(Worker* worker); + // Gets a worker's stub from `worker_stubs_`, or if none exists, creates a + // stub and stores it in `worker_stubs_`. + Status GetOrCreateWorkerStub(const std::string& worker_address, + WorkerService::Stub** out_stub) + LOCKS_EXCLUDED(mu_); // Creates a job and stores it in `*job`. This method updates the // dispatcher state with the new job, but does not assign tasks to workers. Status CreateJob(int64 dataset_id, ProcessingMode processing_mode, @@ -128,12 +122,11 @@ class DataServiceDispatcherImpl { mutex mu_; - int64 next_worker_id_ TF_GUARDED_BY(mu_) = 0; int64 next_task_id_ TF_GUARDED_BY(mu_) = 0; - // Registered workers, keyed by their addresses. - absl::flat_hash_map> workers_ - TF_GUARDED_BY(mu_); + // Cached worker stubs for communicating with workers. + absl::flat_hash_map> + worker_stubs_ TF_GUARDED_BY(mu_); absl::optional> journal_writer_ TF_GUARDED_BY(mu_); diff --git a/tensorflow/core/data/service/dispatcher_state.cc b/tensorflow/core/data/service/dispatcher_state.cc index 093457a55af..1e914b69e5b 100644 --- a/tensorflow/core/data/service/dispatcher_state.cc +++ b/tensorflow/core/data/service/dispatcher_state.cc @@ -30,6 +30,9 @@ Status DispatcherState::Apply(Update update) { case Update::kRegisterDataset: RegisterDataset(update.register_dataset()); break; + case Update::kRegisterWorker: + RegisterWorker(update.register_worker()); + break; case Update::kCreateJob: CreateJob(update.create_job()); break; @@ -59,6 +62,13 @@ void DispatcherState::RegisterDataset( next_available_dataset_id_ = std::max(next_available_dataset_id_, id + 1); } +void DispatcherState::RegisterWorker( + const RegisterWorkerUpdate& register_worker) { + std::string address = register_worker.worker_address(); + DCHECK(!workers_.contains(address)); + workers_[address] = std::make_shared(address); +} + void DispatcherState::CreateJob(const CreateJobUpdate& create_job) { int64 job_id = create_job.job_id(); absl::optional named_job_key; @@ -71,6 +81,7 @@ void DispatcherState::CreateJob(const CreateJobUpdate& create_job) { named_job_key); DCHECK(!jobs_.contains(job_id)); jobs_[job_id] = job; + tasks_by_job_[job_id] = std::vector>(); if (named_job_key.has_value()) { DCHECK(!named_jobs_.contains(named_job_key.value())); named_jobs_[named_job_key.value()] = job; @@ -129,6 +140,26 @@ Status DispatcherState::DatasetFromFingerprint( return Status::OK(); } +Status DispatcherState::WorkerFromAddress( + const std::string& address, std::shared_ptr* worker) const { + auto it = workers_.find(address); + if (it == workers_.end()) { + return errors::NotFound("Worker with address ", address, " not found."); + } + *worker = it->second; + return Status::OK(); +} + +std::vector> +DispatcherState::ListWorkers() const { + std::vector> workers; + workers.reserve(workers_.size()); + for (const auto& it : workers_) { + workers.push_back(it.second); + } + return workers; +} + std::vector> DispatcherState::ListJobs() { std::vector> jobs; diff --git a/tensorflow/core/data/service/dispatcher_state.h b/tensorflow/core/data/service/dispatcher_state.h index 7313274ae71..b1aa0aa3979 100644 --- a/tensorflow/core/data/service/dispatcher_state.h +++ b/tensorflow/core/data/service/dispatcher_state.h @@ -48,11 +48,6 @@ namespace data { // DispatcherImpl and for providing DispatcherImpl with read-only access to // the state. // -// Note that not all state needs to be journaled, and in general we journal -// as little state as possible. For example, worker and task state doesn't need -// to be journaled because we can recover that information from workers when -// they reconnect to a restarted dispatcher. -// // DispatcherState is thread-compatible but not thread-safe. class DispatcherState { public: @@ -65,7 +60,8 @@ class DispatcherState { // A dataset registered with the dispatcher. struct Dataset { - Dataset(int64 dataset_id, int64 fingerprint, const DatasetDef& dataset_def) + explicit Dataset(int64 dataset_id, int64 fingerprint, + const DatasetDef& dataset_def) : dataset_id(dataset_id), fingerprint(fingerprint), dataset_def(dataset_def) {} @@ -75,10 +71,17 @@ class DispatcherState { const DatasetDef dataset_def; }; + // A worker registered with the dispatcher. + struct Worker { + explicit Worker(const std::string& address) : address(address) {} + + const std::string address; + }; + // A key for identifying a named job. The key contains a user-specified name, // as well as an index describing which iteration of the job we are on. struct NamedJobKey { - NamedJobKey(absl::string_view name, int64 index) + explicit NamedJobKey(absl::string_view name, int64 index) : name(name), index(index) {} friend bool operator==(const NamedJobKey& lhs, const NamedJobKey& rhs) { @@ -96,8 +99,8 @@ class DispatcherState { // A job for processing a dataset. struct Job { - Job(int64 job_id, int64 dataset_id, ProcessingMode processing_mode, - absl::optional named_job_key) + explicit Job(int64 job_id, int64 dataset_id, ProcessingMode processing_mode, + absl::optional named_job_key) : job_id(job_id), dataset_id(dataset_id), processing_mode(processing_mode), @@ -111,8 +114,8 @@ class DispatcherState { }; struct Task { - Task(int64 task_id, int64 job_id, int64 dataset_id, - const std::string& worker_address) + explicit Task(int64 task_id, int64 job_id, int64 dataset_id, + const std::string& worker_address) : task_id(task_id), job_id(job_id), dataset_id(dataset_id), @@ -134,6 +137,12 @@ class DispatcherState { Status DatasetFromFingerprint(uint64 fingerprint, std::shared_ptr* dataset) const; + // Gets a worker by address. Returns NOT_FOUND if there is no such worker. + Status WorkerFromAddress(const std::string& address, + std::shared_ptr* worker) const; + // Lists all workers registered with the dispatcher. + std::vector> ListWorkers() const; + // Returns the next available job id. int64 NextAvailableJobId() const; // Returns a list of all jobs. @@ -153,8 +162,8 @@ class DispatcherState { std::vector>* tasks) const; private: - // Registers a dataset. The dataset must not already be registered. void RegisterDataset(const RegisterDatasetUpdate& register_dataset); + void RegisterWorker(const RegisterWorkerUpdate& register_worker); void CreateJob(const CreateJobUpdate& create_job); void CreateTask(const CreateTaskUpdate& create_task); void FinishTask(const FinishTaskUpdate& finish_task); @@ -166,6 +175,9 @@ class DispatcherState { absl::flat_hash_map> datasets_by_fingerprint_; + // Registered workers, keyed by address. + absl::flat_hash_map> workers_; + int64 next_available_job_id_ = 0; // Jobs, keyed by job ids. absl::flat_hash_map> jobs_; diff --git a/tensorflow/core/data/service/dispatcher_state_test.cc b/tensorflow/core/data/service/dispatcher_state_test.cc index 933d783d227..b5529951efb 100644 --- a/tensorflow/core/data/service/dispatcher_state_test.cc +++ b/tensorflow/core/data/service/dispatcher_state_test.cc @@ -14,6 +14,8 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/data/service/dispatcher_state.h" +#include + #include "tensorflow/core/data/service/common.pb.h" #include "tensorflow/core/data/service/journal.h" #include "tensorflow/core/data/service/journal.pb.h" @@ -27,9 +29,11 @@ namespace data { namespace { using Dataset = DispatcherState::Dataset; +using Worker = DispatcherState::Worker; using NamedJobKey = DispatcherState::NamedJobKey; using Job = DispatcherState::Job; using Task = DispatcherState::Task; +using ::testing::IsEmpty; using ::testing::SizeIs; Status RegisterDatasetWithIdAndFingerprint(int64 id, uint64 fingerprint, @@ -42,6 +46,13 @@ Status RegisterDatasetWithIdAndFingerprint(int64 id, uint64 fingerprint, return Status::OK(); } +Status RegisterWorker(std::string worker_address, DispatcherState* state) { + Update update; + update.mutable_register_worker()->set_worker_address(worker_address); + TF_RETURN_IF_ERROR(state->Apply(update)); + return Status::OK(); +} + Status CreateAnonymousJob(int64 job_id, int64 dataset_id, DispatcherState* state) { Update update; @@ -98,12 +109,12 @@ TEST(DispatcherState, RegisterDataset) { { std::shared_ptr dataset; TF_EXPECT_OK(state.DatasetFromFingerprint(fingerprint, &dataset)); - EXPECT_EQ(id, dataset->dataset_id); + EXPECT_EQ(dataset->dataset_id, id); } { std::shared_ptr dataset; TF_EXPECT_OK(state.DatasetFromId(id, &dataset)); - EXPECT_EQ(fingerprint, dataset->fingerprint); + EXPECT_EQ(dataset->fingerprint, fingerprint); } } @@ -126,10 +137,46 @@ TEST(DispatcherState, NextAvailableDatasetId) { int64 id = state.NextAvailableDatasetId(); uint64 fingerprint = 20; TF_EXPECT_OK(RegisterDatasetWithIdAndFingerprint(id, fingerprint, &state)); - EXPECT_NE(id, state.NextAvailableDatasetId()); + EXPECT_NE(state.NextAvailableDatasetId(), id); EXPECT_EQ(state.NextAvailableDatasetId(), state.NextAvailableDatasetId()); } +TEST(DispatcherState, RegisterWorker) { + DispatcherState state; + std::string address = "test_worker_address"; + TF_EXPECT_OK(RegisterWorker(address, &state)); + std::shared_ptr worker; + TF_EXPECT_OK(state.WorkerFromAddress(address, &worker)); + EXPECT_EQ(worker->address, address); +} + +TEST(DispatcherState, ListWorkers) { + DispatcherState state; + std::string address_1 = "address_1"; + std::string address_2 = "address_2"; + { + std::vector> workers = state.ListWorkers(); + EXPECT_THAT(workers, IsEmpty()); + } + TF_EXPECT_OK(RegisterWorker(address_1, &state)); + { + std::vector> workers = state.ListWorkers(); + EXPECT_THAT(workers, SizeIs(1)); + } + TF_EXPECT_OK(RegisterWorker(address_2, &state)); + { + std::vector> workers = state.ListWorkers(); + EXPECT_THAT(workers, SizeIs(2)); + } +} + +TEST(DispatcherState, MissingWorker) { + DispatcherState state; + std::shared_ptr worker; + Status s = state.WorkerFromAddress("test_worker_address", &worker); + EXPECT_EQ(s.code(), error::NOT_FOUND); +} + TEST(DispatcherState, UnknownUpdate) { DispatcherState state; Update update; @@ -146,8 +193,11 @@ TEST(DispatcherState, AnonymousJob) { std::shared_ptr job; TF_EXPECT_OK(state.JobFromId(job_id, &job)); EXPECT_EQ(state.NextAvailableJobId(), job_id + 1); - EXPECT_EQ(dataset_id, job->dataset_id); - EXPECT_EQ(job_id, job->job_id); + EXPECT_EQ(job->dataset_id, dataset_id); + EXPECT_EQ(job->job_id, job_id); + std::vector> tasks; + TF_EXPECT_OK(state.TasksForJob(job_id, &tasks)); + EXPECT_THAT(tasks, IsEmpty()); EXPECT_FALSE(job->finished); } @@ -161,8 +211,8 @@ TEST(DispatcherState, NamedJob) { std::shared_ptr job; TF_EXPECT_OK(state.NamedJobByKey(named_job_key, &job)); EXPECT_EQ(state.NextAvailableJobId(), job_id + 1); - EXPECT_EQ(dataset_id, job->dataset_id); - EXPECT_EQ(job_id, job->job_id); + EXPECT_EQ(job->dataset_id, dataset_id); + EXPECT_EQ(job->job_id, job_id); EXPECT_FALSE(job->finished); } @@ -179,10 +229,10 @@ TEST(DispatcherState, CreateTask) { { std::shared_ptr task; TF_EXPECT_OK(state.TaskFromId(task_id, &task)); - EXPECT_EQ(task_id, task->task_id); - EXPECT_EQ(job_id, task->job_id); - EXPECT_EQ(dataset_id, task->dataset_id); - EXPECT_EQ(worker_address, task->worker_address); + EXPECT_EQ(task->task_id, task_id); + EXPECT_EQ(task->job_id, job_id); + EXPECT_EQ(task->dataset_id, dataset_id); + EXPECT_EQ(task->worker_address, worker_address); } { std::vector> tasks; @@ -207,7 +257,7 @@ TEST(DispatcherState, CreateTasksForSameJob) { { std::vector> tasks; TF_EXPECT_OK(state.TasksForJob(job_id, &tasks)); - EXPECT_EQ(2, tasks.size()); + EXPECT_THAT(tasks, SizeIs(2)); } } @@ -229,12 +279,12 @@ TEST(DispatcherState, CreateTasksForDifferentJobs) { { std::vector> tasks; TF_EXPECT_OK(state.TasksForJob(job_id_1, &tasks)); - EXPECT_EQ(1, tasks.size()); + EXPECT_THAT(tasks, SizeIs(1)); } { std::vector> tasks; TF_EXPECT_OK(state.TasksForJob(job_id_2, &tasks)); - EXPECT_EQ(1, tasks.size()); + EXPECT_THAT(tasks, SizeIs(1)); } } diff --git a/tensorflow/core/data/service/journal.cc b/tensorflow/core/data/service/journal.cc index a9aa43b9758..11952b0dfd9 100644 --- a/tensorflow/core/data/service/journal.cc +++ b/tensorflow/core/data/service/journal.cc @@ -48,7 +48,7 @@ Status FileJournalWriter::EnsureInitialized() { return Status::OK(); } -Status FileJournalWriter::Write(Update update) { +Status FileJournalWriter::Write(const Update& update) { TF_RETURN_IF_ERROR(EnsureInitialized()); std::string s = update.SerializeAsString(); if (s.empty()) { diff --git a/tensorflow/core/data/service/journal.h b/tensorflow/core/data/service/journal.h index 112c3b614be..c627c21756c 100644 --- a/tensorflow/core/data/service/journal.h +++ b/tensorflow/core/data/service/journal.h @@ -32,7 +32,7 @@ class JournalWriter { public: virtual ~JournalWriter() = default; // Writes and syncs an update to the journal. - virtual Status Write(Update update) = 0; + virtual Status Write(const Update& update) = 0; }; // FileJournalWriter is not thread-safe, requiring external synchronization when @@ -46,7 +46,7 @@ class FileJournalWriter : public JournalWriter { FileJournalWriter(const FileJournalWriter&) = delete; FileJournalWriter& operator=(const FileJournalWriter&) = delete; - Status Write(Update update) override; + Status Write(const Update& update) override; private: // Initializes the writer if it is not yet initialized. diff --git a/tensorflow/core/data/service/journal.proto b/tensorflow/core/data/service/journal.proto index fd4c5863ca9..725724a5cd5 100644 --- a/tensorflow/core/data/service/journal.proto +++ b/tensorflow/core/data/service/journal.proto @@ -10,6 +10,7 @@ import "tensorflow/core/data/service/common.proto"; message Update { oneof update_type { RegisterDatasetUpdate register_dataset = 1; + RegisterWorkerUpdate register_worker = 5; CreateJobUpdate create_job = 2; CreateTaskUpdate create_task = 3; FinishTaskUpdate finish_task = 4; @@ -22,6 +23,10 @@ message RegisterDatasetUpdate { uint64 fingerprint = 3; } +message RegisterWorkerUpdate { + string worker_address = 1; +} + message NamedJobKeyDef { string name = 1; int64 index = 2; diff --git a/tensorflow/core/data/service/worker_impl.cc b/tensorflow/core/data/service/worker_impl.cc index c6338d540f8..6326d65782b 100644 --- a/tensorflow/core/data/service/worker_impl.cc +++ b/tensorflow/core/data/service/worker_impl.cc @@ -197,8 +197,6 @@ Status DataServiceWorkerImpl::Register() EXCLUSIVE_LOCKS_REQUIRED(mu_) { for (const TaskDef& task : resp.tasks()) { TF_RETURN_IF_ERROR(ProcessTaskInternal(task)); } - worker_id_ = resp.worker_id(); - VLOG(3) << "Registered worker with id " << resp.worker_id(); return Status::OK(); } @@ -207,7 +205,6 @@ Status DataServiceWorkerImpl::SendTaskUpdate() EXCLUSIVE_LOCKS_REQUIRED(mu_) { << " task updates to dispatcher"; TF_RETURN_IF_ERROR(EnsureDispatcherStubInitialized()); WorkerUpdateRequest req; - req.set_worker_id(worker_id_); for (int task_id : pending_completed_tasks_) { TaskProgress* update = req.add_updates(); update->set_task_id(task_id); diff --git a/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc b/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc index 8e1713e2d77..233a61f440e 100644 --- a/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc @@ -338,7 +338,7 @@ class DataServiceDatasetOp::Dataset : public DatasetBase { } absl::flat_hash_map task_id_to_task; for (auto& task : tasks) { - task_id_to_task[task.id()] = task; + task_id_to_task[task.task_id()] = task; } mutex_lock l(mu_); job_finished_ = job_finished; @@ -371,8 +371,9 @@ class DataServiceDatasetOp::Dataset : public DatasetBase { get_next_cv_.notify_all(); continue; } - tasks_.push_back(std::make_shared( - task_info.id(), task_info.worker_address(), std::move(worker))); + tasks_.push_back(std::make_shared(task_info.task_id(), + task_info.worker_address(), + std::move(worker))); } if (dataset()->max_outstanding_requests_ == model::kAutotune) { // Adjust max_outstanding_requests to account for newly added tasks. diff --git a/tensorflow/core/protobuf/data/experimental/service_config.proto b/tensorflow/core/protobuf/data/experimental/service_config.proto index 872a47013eb..017aaa2a960 100644 --- a/tensorflow/core/protobuf/data/experimental/service_config.proto +++ b/tensorflow/core/protobuf/data/experimental/service_config.proto @@ -12,6 +12,9 @@ message DispatcherConfig { // An optional work directory to use for storing dispatcher state, and for // recovering during restarts. string work_dir = 3; + // Whether to run in fault tolerant mode, where dispatcher state is saved + // across restarts. + bool fault_tolerant_mode = 4; } // Configuration for a tf.data service WorkerServer. From 46a8319ee74337182c7aadf80acbeb7f01eb7ffd Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Tue, 4 Aug 2020 18:04:52 -0700 Subject: [PATCH 2128/2522] Moving rest of the filesystems to Transactional API --- .../platform/default/posix_file_system.cc | 59 +++++++------- .../core/platform/default/posix_file_system.h | 68 +++++++---------- .../platform/hadoop/hadoop_file_system.cc | 57 +++++++------- .../core/platform/hadoop/hadoop_file_system.h | 67 ++++++---------- .../platform/windows/windows_file_system.cc | 60 +++++++-------- .../platform/windows/windows_file_system.h | 76 +++++++------------ tensorflow/core/util/memmapped_file_system.cc | 62 +++++++-------- tensorflow/core/util/memmapped_file_system.h | 63 ++++++--------- .../asset_manager_filesystem.cc | 61 +++++++-------- .../asset_manager_filesystem.h | 66 +++++++--------- 10 files changed, 276 insertions(+), 363 deletions(-) diff --git a/tensorflow/core/platform/default/posix_file_system.cc b/tensorflow/core/platform/default/posix_file_system.cc index 8533e34fc3f..18fea3fe15d 100644 --- a/tensorflow/core/platform/default/posix_file_system.cc +++ b/tensorflow/core/platform/default/posix_file_system.cc @@ -178,8 +178,8 @@ class PosixReadOnlyMemoryRegion : public ReadOnlyMemoryRegion { }; Status PosixFileSystem::NewRandomAccessFile( - const string& fname, - std::unique_ptr* result /*, TransactionToken* token */) { + const string& fname, TransactionToken* token, + std::unique_ptr* result) { string translated_fname = TranslateName(fname); Status s; int fd = open(translated_fname.c_str(), O_RDONLY); @@ -191,9 +191,9 @@ Status PosixFileSystem::NewRandomAccessFile( return s; } -Status PosixFileSystem::NewWritableFile( - const string& fname, - std::unique_ptr* result /*, TransactionToken* token */) { +Status PosixFileSystem::NewWritableFile(const string& fname, + TransactionToken* token, + std::unique_ptr* result) { string translated_fname = TranslateName(fname); Status s; FILE* f = fopen(translated_fname.c_str(), "w"); @@ -206,8 +206,8 @@ Status PosixFileSystem::NewWritableFile( } Status PosixFileSystem::NewAppendableFile( - const string& fname, - std::unique_ptr* result /*, TransactionToken* token */) { + const string& fname, TransactionToken* token, + std::unique_ptr* result) { string translated_fname = TranslateName(fname); Status s; FILE* f = fopen(translated_fname.c_str(), "a"); @@ -220,8 +220,8 @@ Status PosixFileSystem::NewAppendableFile( } Status PosixFileSystem::NewReadOnlyMemoryRegionFromFile( - const string& fname, std::unique_ptr* - result /*, TransactionToken* token */) { + const string& fname, TransactionToken* token, + std::unique_ptr* result) { string translated_fname = TranslateName(fname); Status s = Status::OK(); int fd = open(translated_fname.c_str(), O_RDONLY); @@ -244,17 +244,16 @@ Status PosixFileSystem::NewReadOnlyMemoryRegionFromFile( return s; } -Status PosixFileSystem::FileExists( - const string& fname /*, TransactionToken* token */) { +Status PosixFileSystem::FileExists(const string& fname, + TransactionToken* token) { if (access(TranslateName(fname).c_str(), F_OK) == 0) { return Status::OK(); } return errors::NotFound(fname, " not found"); } -Status PosixFileSystem::GetChildren( - const string& dir, - std::vector* result /*, TransactionToken* token */) { +Status PosixFileSystem::GetChildren(const string& dir, TransactionToken* token, + std::vector* result) { string translated_dir = TranslateName(dir); result->clear(); DIR* d = opendir(translated_dir.c_str()); @@ -274,14 +273,14 @@ Status PosixFileSystem::GetChildren( return Status::OK(); } -Status PosixFileSystem::GetMatchingPaths( - const string& pattern, - std::vector* results /*, TransactionToken* token */) { +Status PosixFileSystem::GetMatchingPaths(const string& pattern, + TransactionToken* token, + std::vector* results) { return internal::GetMatchingPaths(this, Env::Default(), pattern, results); } -Status PosixFileSystem::DeleteFile( - const string& fname /*, TransactionToken* token */) { +Status PosixFileSystem::DeleteFile(const string& fname, + TransactionToken* token) { Status result; if (unlink(TranslateName(fname).c_str()) != 0) { result = IOError(fname, errno); @@ -289,8 +288,7 @@ Status PosixFileSystem::DeleteFile( return result; } -Status PosixFileSystem::CreateDir( - const string& name /*, TransactionToken* token */) { +Status PosixFileSystem::CreateDir(const string& name, TransactionToken* token) { string translated = TranslateName(name); if (translated.empty()) { return errors::AlreadyExists(name); @@ -301,8 +299,7 @@ Status PosixFileSystem::CreateDir( return Status::OK(); } -Status PosixFileSystem::DeleteDir( - const string& name /*, TransactionToken* token */) { +Status PosixFileSystem::DeleteDir(const string& name, TransactionToken* token) { Status result; if (rmdir(TranslateName(name).c_str()) != 0) { result = IOError(name, errno); @@ -310,8 +307,8 @@ Status PosixFileSystem::DeleteDir( return result; } -Status PosixFileSystem::GetFileSize( - const string& fname, uint64* size /*, TransactionToken* token */) { +Status PosixFileSystem::GetFileSize(const string& fname, + TransactionToken* token, uint64* size) { Status s; struct stat sbuf; if (stat(TranslateName(fname).c_str(), &sbuf) != 0) { @@ -323,8 +320,8 @@ Status PosixFileSystem::GetFileSize( return s; } -Status PosixFileSystem::Stat( - const string& fname, FileStatistics* stats /*, TransactionToken* token */) { +Status PosixFileSystem::Stat(const string& fname, TransactionToken* token, + FileStatistics* stats) { Status s; struct stat sbuf; if (stat(TranslateName(fname).c_str(), &sbuf) != 0) { @@ -337,8 +334,8 @@ Status PosixFileSystem::Stat( return s; } -Status PosixFileSystem::RenameFile( - const string& src, const string& target /*, TransactionToken* token */) { +Status PosixFileSystem::RenameFile(const string& src, const string& target, + TransactionToken* token) { Status result; if (rename(TranslateName(src).c_str(), TranslateName(target).c_str()) != 0) { result = IOError(src, errno); @@ -346,8 +343,8 @@ Status PosixFileSystem::RenameFile( return result; } -Status PosixFileSystem::CopyFile( - const string& src, const string& target /*, TransactionToken* token */) { +Status PosixFileSystem::CopyFile(const string& src, const string& target, + TransactionToken* token) { string translated_src = TranslateName(src); struct stat sbuf; if (stat(translated_src.c_str(), &sbuf) != 0) { diff --git a/tensorflow/core/platform/default/posix_file_system.h b/tensorflow/core/platform/default/posix_file_system.h index a1c6f34ad65..8e301c8b2e4 100644 --- a/tensorflow/core/platform/default/posix_file_system.h +++ b/tensorflow/core/platform/default/posix_file_system.h @@ -27,63 +27,47 @@ class PosixFileSystem : public FileSystem { ~PosixFileSystem() {} + TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; + Status NewRandomAccessFile( - const string& filename, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + const string& filename, TransactionToken* token, + std::unique_ptr* result) override; - Status NewWritableFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + Status NewWritableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override; - Status NewAppendableFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + Status NewAppendableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override; Status NewReadOnlyMemoryRegionFromFile( - const string& filename, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + const string& filename, TransactionToken* token, + std::unique_ptr* result) override; - Status FileExists( - const string& fname /*, TransactionToken* token = nullptr */) override; + Status FileExists(const string& fname, TransactionToken* token) override; - Status GetChildren( - const string& dir, - std::vector* result /*, TransactionToken* token = nullptr */) - override; + Status GetChildren(const string& dir, TransactionToken* token, + std::vector* result) override; - Status Stat( - const string& fname, - FileStatistics* stats /*, TransactionToken* token = nullptr */) override; + Status Stat(const string& fname, TransactionToken* token, + FileStatistics* stats) override; - Status GetMatchingPaths( - const string& pattern, - std::vector* results /*, TransactionToken* token = nullptr */) - override; + Status GetMatchingPaths(const string& pattern, TransactionToken* token, + std::vector* results) override; - Status DeleteFile( - const string& fname /*, TransactionToken* token = nullptr */) override; + Status DeleteFile(const string& fname, TransactionToken* token) override; - Status CreateDir( - const string& name /*, TransactionToken* token = nullptr */) override; + Status CreateDir(const string& name, TransactionToken* token) override; - Status DeleteDir( - const string& name /*, TransactionToken* token = nullptr */) override; + Status DeleteDir(const string& name, TransactionToken* token) override; - Status GetFileSize( - const string& fname, - uint64* size /*, TransactionToken* token = nullptr */) override; + Status GetFileSize(const string& fname, TransactionToken* token, + uint64* size) override; - Status RenameFile( - const string& src, - const string& target /*, TransactionToken* token = nullptr */) override; + Status RenameFile(const string& src, const string& target, + TransactionToken* token) override; - Status CopyFile( - const string& src, - const string& target /*, TransactionToken* token = nullptr */) override; + Status CopyFile(const string& src, const string& target, + TransactionToken* token) override; }; Status IOError(const string& context, int err_number); diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.cc b/tensorflow/core/platform/hadoop/hadoop_file_system.cc index 5b2c5a76aae..f8ed61c3ac9 100644 --- a/tensorflow/core/platform/hadoop/hadoop_file_system.cc +++ b/tensorflow/core/platform/hadoop/hadoop_file_system.cc @@ -280,8 +280,8 @@ class HDFSRandomAccessFile : public RandomAccessFile { }; Status HadoopFileSystem::NewRandomAccessFile( - const string& fname, - std::unique_ptr* result /*, TransactionToken* token */) { + const string& fname, TransactionToken* token, + std::unique_ptr* result) { hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(fname, &fs)); @@ -373,8 +373,8 @@ class HDFSWritableFile : public WritableFile { }; Status HadoopFileSystem::NewWritableFile( - const string& fname, - std::unique_ptr* result /*, TransactionToken* token */) { + const string& fname, TransactionToken* token, + std::unique_ptr* result) { hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(fname, &fs)); @@ -388,8 +388,8 @@ Status HadoopFileSystem::NewWritableFile( } Status HadoopFileSystem::NewAppendableFile( - const string& fname, - std::unique_ptr* result /*, TransactionToken* token */) { + const string& fname, TransactionToken* token, + std::unique_ptr* result) { hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(fname, &fs)); @@ -403,8 +403,8 @@ Status HadoopFileSystem::NewAppendableFile( } Status HadoopFileSystem::NewReadOnlyMemoryRegionFromFile( - const string& fname, std::unique_ptr* - result /*, TransactionToken* token */) { + const string& fname, TransactionToken* token, + std::unique_ptr* result) { // hadoopReadZero() technically supports this call with the following // caveats: // - It only works up to 2 GB. We'd have to Stat() the file to ensure that @@ -414,8 +414,8 @@ Status HadoopFileSystem::NewReadOnlyMemoryRegionFromFile( return errors::Unimplemented("HDFS does not support ReadOnlyMemoryRegion"); } -Status HadoopFileSystem::FileExists( - const string& fname /*, TransactionToken* token */) { +Status HadoopFileSystem::FileExists(const string& fname, + TransactionToken* token) { hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(fname, &fs)); if (libhdfs()->hdfsExists(fs, TranslateName(fname).c_str()) == 0) { @@ -424,9 +424,8 @@ Status HadoopFileSystem::FileExists( return errors::NotFound(fname, " not found."); } -Status HadoopFileSystem::GetChildren( - const string& dir, - std::vector* result /*, TransactionToken* token */) { +Status HadoopFileSystem::GetChildren(const string& dir, TransactionToken* token, + std::vector* result) { result->clear(); hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(dir, &fs)); @@ -434,7 +433,7 @@ Status HadoopFileSystem::GetChildren( // hdfsListDirectory returns nullptr if the directory is empty. Do a separate // check to verify the directory exists first. FileStatistics stat; - TF_RETURN_IF_ERROR(Stat(dir, &stat)); + TF_RETURN_IF_ERROR(Stat(dir, token, &stat)); int entries = 0; hdfsFileInfo* info = @@ -453,14 +452,14 @@ Status HadoopFileSystem::GetChildren( return Status::OK(); } -Status HadoopFileSystem::GetMatchingPaths( - const string& pattern, - std::vector* results /*, TransactionToken* token */) { +Status HadoopFileSystem::GetMatchingPaths(const string& pattern, + TransactionToken* token, + std::vector* results) { return internal::GetMatchingPaths(this, Env::Default(), pattern, results); } -Status HadoopFileSystem::DeleteFile( - const string& fname /*, TransactionToken* token */) { +Status HadoopFileSystem::DeleteFile(const string& fname, + TransactionToken* token) { hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(fname, &fs)); @@ -471,8 +470,7 @@ Status HadoopFileSystem::DeleteFile( return Status::OK(); } -Status HadoopFileSystem::CreateDir( - const string& dir /*, TransactionToken* token */) { +Status HadoopFileSystem::CreateDir(const string& dir, TransactionToken* token) { hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(dir, &fs)); @@ -482,8 +480,7 @@ Status HadoopFileSystem::CreateDir( return Status::OK(); } -Status HadoopFileSystem::DeleteDir( - const string& dir /*, TransactionToken* token */) { +Status HadoopFileSystem::DeleteDir(const string& dir, TransactionToken* token) { hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(dir, &fs)); @@ -502,7 +499,7 @@ Status HadoopFileSystem::DeleteDir( // the call is actually successful. Check again by Stat. if (info == nullptr && errno != 0) { FileStatistics stat; - TF_RETURN_IF_ERROR(Stat(dir, &stat)); + TF_RETURN_IF_ERROR(Stat(dir, token, &stat)); } if (entries > 0) { @@ -515,8 +512,8 @@ Status HadoopFileSystem::DeleteDir( return Status::OK(); } -Status HadoopFileSystem::GetFileSize( - const string& fname, uint64* size /*, TransactionToken* token */) { +Status HadoopFileSystem::GetFileSize(const string& fname, + TransactionToken* token, uint64* size) { hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(fname, &fs)); @@ -530,8 +527,8 @@ Status HadoopFileSystem::GetFileSize( return Status::OK(); } -Status HadoopFileSystem::RenameFile( - const string& src, const string& target /*, TransactionToken* token */) { +Status HadoopFileSystem::RenameFile(const string& src, const string& target, + TransactionToken* token) { hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(src, &fs)); @@ -548,8 +545,8 @@ Status HadoopFileSystem::RenameFile( return Status::OK(); } -Status HadoopFileSystem::Stat( - const string& fname, FileStatistics* stats /*, TransactionToken* token */) { +Status HadoopFileSystem::Stat(const string& fname, TransactionToken* token, + FileStatistics* stats) { hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(fname, &fs)); diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.h b/tensorflow/core/platform/hadoop/hadoop_file_system.h index 13abc067cd8..5e7233633a6 100644 --- a/tensorflow/core/platform/hadoop/hadoop_file_system.h +++ b/tensorflow/core/platform/hadoop/hadoop_file_system.h @@ -32,63 +32,46 @@ class HadoopFileSystem : public FileSystem { HadoopFileSystem(); ~HadoopFileSystem(); + TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; + Status NewRandomAccessFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr*/) override; + const string& fname, TransactionToken* token, + std::unique_ptr* result) override; - Status NewWritableFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr*/) override; + Status NewWritableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override; - Status NewAppendableFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr*/) override; + Status NewAppendableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override; Status NewReadOnlyMemoryRegionFromFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr*/) override; + const string& fname, TransactionToken* token, + std::unique_ptr* result) override; - Status FileExists( - const string& fname /*, TransactionToken* token = nullptr*/) override; + Status FileExists(const string& fname, TransactionToken* token) override; - Status GetChildren( - const string& dir, - std::vector* result /*, TransactionToken* token = nullptr*/) - override; + Status GetChildren(const string& dir, TransactionToken* token, + std::vector* result) override; - Status GetMatchingPaths( - const string& pattern, - std::vector* results /*, TransactionToken* token = nullptr*/) - override; + Status GetMatchingPaths(const string& pattern, TransactionToken* token, + std::vector* results) override; - Status DeleteFile( - const string& fname /*, TransactionToken* token = nullptr*/) override; + Status DeleteFile(const string& fname, TransactionToken* token) override; - Status CreateDir( - const string& name /*, TransactionToken* token = nullptr*/) override; + Status CreateDir(const string& name, TransactionToken* token) override; - Status DeleteDir( - const string& name /*, TransactionToken* token = nullptr*/) override; + Status DeleteDir(const string& name, TransactionToken* token) override; - Status GetFileSize( - const string& fname, - uint64* size /*, TransactionToken* token = nullptr*/) override; + Status GetFileSize(const string& fname, TransactionToken* token, + uint64* size) override; - Status RenameFile( - const string& src, - const string& target /*, TransactionToken* token = nullptr*/) override; + Status RenameFile(const string& src, const string& target, + TransactionToken* token) override; - Status Stat( - const string& fname, - FileStatistics* stat /*, TransactionToken* token = nullptr*/) override; + Status Stat(const string& fname, TransactionToken* token, + FileStatistics* stat) override; - string TranslateName( - const string& name /*, TransactionToken* token = nullptr*/) - const override; + string TranslateName(const string& name) const override; private: Status Connect(StringPiece fname, hdfsFS* fs); diff --git a/tensorflow/core/platform/windows/windows_file_system.cc b/tensorflow/core/platform/windows/windows_file_system.cc index e1e8656bce5..475f8791144 100644 --- a/tensorflow/core/platform/windows/windows_file_system.cc +++ b/tensorflow/core/platform/windows/windows_file_system.cc @@ -261,8 +261,8 @@ class WinReadOnlyMemoryRegion : public ReadOnlyMemoryRegion { } // namespace Status WindowsFileSystem::NewRandomAccessFile( - const string& fname, - std::unique_ptr* result /*, TransactionToken* token */) { + const string& fname, TransactionToken* token, + std::unique_ptr* result) { string translated_fname = TranslateName(fname); std::wstring ws_translated_fname = Utf8ToWideChar(translated_fname); result->reset(); @@ -289,8 +289,8 @@ Status WindowsFileSystem::NewRandomAccessFile( } Status WindowsFileSystem::NewWritableFile( - const string& fname, - std::unique_ptr* result /*, TransactionToken* token */) { + const string& fname, TransactionToken* token, + std::unique_ptr* result) { string translated_fname = TranslateName(fname); std::wstring ws_translated_fname = Utf8ToWideChar(translated_fname); result->reset(); @@ -310,8 +310,8 @@ Status WindowsFileSystem::NewWritableFile( } Status WindowsFileSystem::NewAppendableFile( - const string& fname, - std::unique_ptr* result /*, TransactionToken* token */) { + const string& fname, TransactionToken* token, + std::unique_ptr* result) { string translated_fname = TranslateName(fname); std::wstring ws_translated_fname = Utf8ToWideChar(translated_fname); result->reset(); @@ -341,8 +341,8 @@ Status WindowsFileSystem::NewAppendableFile( } Status WindowsFileSystem::NewReadOnlyMemoryRegionFromFile( - const string& fname, std::unique_ptr* - result /*, TransactionToken* token */) { + const string& fname, TransactionToken* token, + std::unique_ptr* result) { string translated_fname = TranslateName(fname); std::wstring ws_translated_fname = Utf8ToWideChar(translated_fname); result->reset(); @@ -418,8 +418,8 @@ Status WindowsFileSystem::NewReadOnlyMemoryRegionFromFile( return s; } -Status WindowsFileSystem::FileExists( - const string& fname /*, TransactionToken* token */) { +Status WindowsFileSystem::FileExists(const string& fname, + TransactionToken* token) { constexpr int kOk = 0; std::wstring ws_translated_fname = Utf8ToWideChar(TranslateName(fname)); if (_waccess(ws_translated_fname.c_str(), kOk) == 0) { @@ -428,9 +428,9 @@ Status WindowsFileSystem::FileExists( return errors::NotFound(fname, " not found"); } -Status WindowsFileSystem::GetChildren( - const string& dir, - std::vector* result /*, TransactionToken* token */) { +Status WindowsFileSystem::GetChildren(const string& dir, + TransactionToken* token, + std::vector* result) { string translated_dir = TranslateName(dir); std::wstring ws_translated_dir = Utf8ToWideChar(translated_dir); result->clear(); @@ -465,8 +465,8 @@ Status WindowsFileSystem::GetChildren( return Status::OK(); } -Status WindowsFileSystem::DeleteFile( - const string& fname /*, TransactionToken* token */) { +Status WindowsFileSystem::DeleteFile(const string& fname, + TransactionToken* token) { Status result; std::wstring file_name = Utf8ToWideChar(fname); if (_wunlink(file_name.c_str()) != 0) { @@ -475,8 +475,8 @@ Status WindowsFileSystem::DeleteFile( return result; } -Status WindowsFileSystem::CreateDir( - const string& name /*, TransactionToken* token */) { +Status WindowsFileSystem::CreateDir(const string& name, + TransactionToken* token) { Status result; std::wstring ws_name = Utf8ToWideChar(name); if (ws_name.empty()) { @@ -488,8 +488,8 @@ Status WindowsFileSystem::CreateDir( return result; } -Status WindowsFileSystem::DeleteDir( - const string& name /*, TransactionToken* token */) { +Status WindowsFileSystem::DeleteDir(const string& name, + TransactionToken* token) { Status result; std::wstring ws_name = Utf8ToWideChar(name); if (_wrmdir(ws_name.c_str()) != 0) { @@ -498,8 +498,8 @@ Status WindowsFileSystem::DeleteDir( return result; } -Status WindowsFileSystem::GetFileSize( - const string& fname, uint64* size /*, TransactionToken* token */) { +Status WindowsFileSystem::GetFileSize(const string& fname, + TransactionToken* token, uint64* size) { string translated_fname = TranslateName(fname); std::wstring ws_translated_dir = Utf8ToWideChar(translated_fname); Status result; @@ -517,8 +517,8 @@ Status WindowsFileSystem::GetFileSize( return result; } -Status WindowsFileSystem::IsDirectory( - const string& fname /*, TransactionToken* token */) { +Status WindowsFileSystem::IsDirectory(const string& fname, + TransactionToken* token) { TF_RETURN_IF_ERROR(FileExists(fname)); std::wstring ws_translated_fname = Utf8ToWideChar(TranslateName(fname)); if (PathIsDirectoryW(ws_translated_fname.c_str())) { @@ -527,8 +527,8 @@ Status WindowsFileSystem::IsDirectory( return Status(tensorflow::error::FAILED_PRECONDITION, "Not a directory"); } -Status WindowsFileSystem::RenameFile( - const string& src, const string& target /*, TransactionToken* token */) { +Status WindowsFileSystem::RenameFile(const string& src, const string& target, + TransactionToken* token) { Status result; // rename() is not capable of replacing the existing file as on Linux // so use OS API directly @@ -542,9 +542,9 @@ Status WindowsFileSystem::RenameFile( return result; } -Status WindowsFileSystem::GetMatchingPaths( - const string& pattern, - std::vector* results /*, TransactionToken* token */) { +Status WindowsFileSystem::GetMatchingPaths(const string& pattern, + TransactionToken* token, + std::vector* results) { // NOTE(mrry): The existing implementation of FileSystem::GetMatchingPaths() // does not handle Windows paths containing backslashes correctly. Since // Windows APIs will accept forward and backslashes equivalently, we @@ -567,8 +567,8 @@ bool WindowsFileSystem::Match(const string& filename, const string& pattern) { return PathMatchSpecW(ws_path.c_str(), ws_pattern.c_str()) == TRUE; } -Status WindowsFileSystem::Stat( - const string& fname, FileStatistics* stat /*, TransactionToken* token */) { +Status WindowsFileSystem::Stat(const string& fname, TransactionToken* token, + FileStatistics* stat) { Status result; struct _stat sbuf; std::wstring ws_translated_fname = Utf8ToWideChar(TranslateName(fname)); diff --git a/tensorflow/core/platform/windows/windows_file_system.h b/tensorflow/core/platform/windows/windows_file_system.h index 604cd141e40..8c550f53b84 100644 --- a/tensorflow/core/platform/windows/windows_file_system.h +++ b/tensorflow/core/platform/windows/windows_file_system.h @@ -32,72 +32,50 @@ class WindowsFileSystem : public FileSystem { ~WindowsFileSystem() {} + TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; + Status NewRandomAccessFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + const string& fname, TransactionToken* token, + std::unique_ptr* result) override; - Status NewWritableFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + Status NewWritableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override; - Status NewAppendableFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + Status NewAppendableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override; Status NewReadOnlyMemoryRegionFromFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + const string& fname, TransactionToken* token, + std::unique_ptr* result) override; - Status FileExists( - const string& fname /*, TransactionToken* token = nullptr */) override; + Status FileExists(const string& fname, TransactionToken* token) override; - Status GetChildren( - const string& dir, - std::vector* result /*, TransactionToken* token = nullptr */) - override; + Status GetChildren(const string& dir, TransactionToken* token, + std::vector* result) override; - Status GetMatchingPaths( - const string& pattern, - std::vector* result /*, TransactionToken* token = nullptr */) - override; + Status GetMatchingPaths(const string& pattern, TransactionToken* token, + std::vector* result) override; - bool Match( - const string& filename, - const string& pattern /*, TransactionToken* token = nullptr */) override; + bool Match(const string& filename, const string& pattern) override; - Status Stat( - const string& fname, - FileStatistics* stat /*, TransactionToken* token = nullptr */) override; + Status Stat(const string& fname, TransactionToken* token, + FileStatistics* stat) override; - Status DeleteFile( - const string& fname /*, TransactionToken* token = nullptr */) override; + Status DeleteFile(const string& fname, TransactionToken* token) override; - Status CreateDir( - const string& name /*, TransactionToken* token = nullptr */) override; + Status CreateDir(const string& name, TransactionToken* token) override; - Status DeleteDir( - const string& name /*, TransactionToken* token = nullptr */) override; + Status DeleteDir(const string& name, TransactionToken* token) override; - Status GetFileSize( - const string& fname, - uint64* size /*, TransactionToken* token = nullptr */) override; + Status GetFileSize(const string& fname, TransactionToken* token, + uint64* size) override; - Status IsDirectory( - const string& fname /*, TransactionToken* token = nullptr */) override; + Status IsDirectory(const string& fname, TransactionToken* token) override; - Status RenameFile( - const string& src, - const string& target /*, TransactionToken* token = nullptr */) override; + Status RenameFile(const string& src, const string& target, + TransactionToken* token) override; - string TranslateName( - const string& name /*, TransactionToken* token = nullptr */) - const override { - return name; - } + string TranslateName(const string& name) const override { return name; } char Separator() const override { return '\\'; }; }; diff --git a/tensorflow/core/util/memmapped_file_system.cc b/tensorflow/core/util/memmapped_file_system.cc index 1451d6350ce..c6bda8b07e9 100644 --- a/tensorflow/core/util/memmapped_file_system.cc +++ b/tensorflow/core/util/memmapped_file_system.cc @@ -86,8 +86,8 @@ class RandomAccessFileFromMemmapped : public RandomAccessFile { MemmappedFileSystem::MemmappedFileSystem() {} -Status MemmappedFileSystem::FileExists( - const string& fname /*, TransactionToken* token */) { +Status MemmappedFileSystem::FileExists(const string& fname, + TransactionToken* token) { if (!mapped_memory_) { return errors::FailedPrecondition("MemmappedEnv is not initialized"); } @@ -99,8 +99,8 @@ Status MemmappedFileSystem::FileExists( } Status MemmappedFileSystem::NewRandomAccessFile( - const string& filename, - std::unique_ptr* result /*, TransactionToken* token */) { + const string& filename, TransactionToken* token, + std::unique_ptr* result) { if (!mapped_memory_) { return errors::FailedPrecondition("MemmappedEnv is not initialized"); } @@ -115,8 +115,8 @@ Status MemmappedFileSystem::NewRandomAccessFile( } Status MemmappedFileSystem::NewReadOnlyMemoryRegionFromFile( - const string& filename, std::unique_ptr* - result /*, TransactionToken* token */) { + const string& filename, TransactionToken* token, + std::unique_ptr* result) { if (!mapped_memory_) { return errors::FailedPrecondition("MemmappedEnv is not initialized"); } @@ -130,8 +130,8 @@ Status MemmappedFileSystem::NewReadOnlyMemoryRegionFromFile( return Status::OK(); } -Status MemmappedFileSystem::GetFileSize( - const string& filename, uint64* size /*, TransactionToken* token */) { +Status MemmappedFileSystem::GetFileSize(const string& filename, + TransactionToken* token, uint64* size) { if (!mapped_memory_) { return errors::FailedPrecondition("MemmappedEnv is not initialized"); } @@ -143,59 +143,59 @@ Status MemmappedFileSystem::GetFileSize( return Status::OK(); } -Status MemmappedFileSystem::Stat( - const string& fname, FileStatistics* stat /*, TransactionToken* token */) { +Status MemmappedFileSystem::Stat(const string& fname, TransactionToken* token, + FileStatistics* stat) { uint64 size; - auto status = GetFileSize(fname, &size); + auto status = GetFileSize(fname, token, &size); if (status.ok()) { stat->length = size; } return status; } -Status MemmappedFileSystem::NewWritableFile( - const string& filename, - std::unique_ptr* wf /*, TransactionToken* token */) { +Status MemmappedFileSystem::NewWritableFile(const string& filename, + TransactionToken* token, + std::unique_ptr* wf) { return errors::Unimplemented("memmapped format doesn't support writing"); } Status MemmappedFileSystem::NewAppendableFile( - const string& filename, - std::unique_ptr* result /*, TransactionToken* token */) { + const string& filename, TransactionToken* token, + std::unique_ptr* result) { return errors::Unimplemented("memmapped format doesn't support writing"); } -Status MemmappedFileSystem::GetChildren( - const string& filename, - std::vector* strings /*, TransactionToken* token */) { +Status MemmappedFileSystem::GetChildren(const string& filename, + TransactionToken* token, + std::vector* strings) { return errors::Unimplemented("memmapped format doesn't support GetChildren"); } -Status MemmappedFileSystem::GetMatchingPaths( - const string& pattern, - std::vector* results /*, TransactionToken* token */) { +Status MemmappedFileSystem::GetMatchingPaths(const string& pattern, + TransactionToken* token, + std::vector* results) { return errors::Unimplemented( "memmapped format doesn't support GetMatchingPaths"); } -Status MemmappedFileSystem::DeleteFile( - const string& filename /*, TransactionToken* token */) { +Status MemmappedFileSystem::DeleteFile(const string& filename, + TransactionToken* token) { return errors::Unimplemented("memmapped format doesn't support DeleteFile"); } -Status MemmappedFileSystem::CreateDir( - const string& dirname /*, TransactionToken* token */) { +Status MemmappedFileSystem::CreateDir(const string& dirname, + TransactionToken* token) { return errors::Unimplemented("memmapped format doesn't support CreateDir"); } -Status MemmappedFileSystem::DeleteDir( - const string& dirname /*, TransactionToken* token */) { +Status MemmappedFileSystem::DeleteDir(const string& dirname, + TransactionToken* token) { return errors::Unimplemented("memmapped format doesn't support DeleteDir"); } -Status MemmappedFileSystem::RenameFile( - const string& filename_from, - const string& filename_to /*, TransactionToken* token */) { +Status MemmappedFileSystem::RenameFile(const string& filename_from, + const string& filename_to, + TransactionToken* token) { return errors::Unimplemented("memmapped format doesn't support RenameFile"); } diff --git a/tensorflow/core/util/memmapped_file_system.h b/tensorflow/core/util/memmapped_file_system.h index d8f19444454..27305a500f5 100644 --- a/tensorflow/core/util/memmapped_file_system.h +++ b/tensorflow/core/util/memmapped_file_system.h @@ -60,52 +60,39 @@ class MemmappedFileSystem : public FileSystem { MemmappedFileSystem(); ~MemmappedFileSystem() override = default; - Status FileExists( - const string& fname /*, TransactionToken* token = nullptr */) override; + + TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; + + Status FileExists(const string& fname, TransactionToken* token) override; Status NewRandomAccessFile( - const string& filename, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + const string& filename, TransactionToken* token, + std::unique_ptr* result) override; Status NewReadOnlyMemoryRegionFromFile( - const string& filename, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + const string& filename, TransactionToken* token, + std::unique_ptr* result) override; // All these functions return Unimplemented error, the memmapped storage is // read only. - Status NewWritableFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; - Status NewAppendableFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; - Status GetChildren(const string& dir, - std::vector* - r /*, TransactionToken* token = nullptr */) override; - Status GetMatchingPaths( - const string& pattern, - std::vector* results /*, TransactionToken* token = nullptr */) - override; - Status DeleteFile( - const string& f /*, TransactionToken* token = nullptr */) override; - Status CreateDir( - const string& d /*, TransactionToken* token = nullptr */) override; - Status DeleteDir( - const string& d /*, TransactionToken* token = nullptr */) override; - Status RenameFile( - const string& s, - const string& t /*, TransactionToken* token = nullptr */) override; + Status NewWritableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override; + Status NewAppendableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override; + Status GetChildren(const string& dir, TransactionToken* token, + std::vector* r) override; + Status GetMatchingPaths(const string& pattern, TransactionToken* token, + std::vector* results) override; + Status DeleteFile(const string& f, TransactionToken* token) override; + Status CreateDir(const string& d, TransactionToken* token) override; + Status DeleteDir(const string& d, TransactionToken* token) override; + Status RenameFile(const string& s, const string& t, + TransactionToken* token) override; // These functions are implemented. - Status GetFileSize( - const string& f, - uint64* s /*, TransactionToken* token = nullptr */) override; + Status GetFileSize(const string& f, TransactionToken* token, + uint64* s) override; // Currently just returns size. - Status Stat( - const string& fname, - FileStatistics* stat /*, TransactionToken* token = nullptr */) override; + Status Stat(const string& fname, TransactionToken* token, + FileStatistics* stat) override; // Initializes filesystem from a file in memmapped format. Status InitializeFromFile(Env* env, const string& filename); diff --git a/tensorflow/tools/android/inference_interface/asset_manager_filesystem.cc b/tensorflow/tools/android/inference_interface/asset_manager_filesystem.cc index 04d5774adb8..648affc9926 100644 --- a/tensorflow/tools/android/inference_interface/asset_manager_filesystem.cc +++ b/tensorflow/tools/android/inference_interface/asset_manager_filesystem.cc @@ -124,8 +124,8 @@ AssetManagerFileSystem::AssetManagerFileSystem(AAssetManager* asset_manager, const string& prefix) : asset_manager_(asset_manager), prefix_(prefix) {} -Status AssetManagerFileSystem::FileExists( - const string& fname /*, TransactionToken* token */) { +Status AssetManagerFileSystem::FileExists(const string& fname, + TransactionToken* token) { string path = RemoveAssetPrefix(fname); auto asset = ScopedAsset( AAssetManager_open(asset_manager_, path.c_str(), AASSET_MODE_RANDOM)); @@ -136,8 +136,8 @@ Status AssetManagerFileSystem::FileExists( } Status AssetManagerFileSystem::NewRandomAccessFile( - const string& fname, - std::unique_ptr* result /*, TransactionToken* token */) { + const string& fname, TransactionToken* token, + std::unique_ptr* result) { string path = RemoveAssetPrefix(fname); auto asset = ScopedAsset( AAssetManager_open(asset_manager_, path.c_str(), AASSET_MODE_RANDOM)); @@ -149,8 +149,8 @@ Status AssetManagerFileSystem::NewRandomAccessFile( } Status AssetManagerFileSystem::NewReadOnlyMemoryRegionFromFile( - const string& fname, std::unique_ptr* - result /*, TransactionToken* token */) { + const string& fname, TransactionToken* token, + std::unique_ptr* result) { string path = RemoveAssetPrefix(fname); auto asset = ScopedAsset( AAssetManager_open(asset_manager_, path.c_str(), AASSET_MODE_STREAMING)); @@ -186,9 +186,9 @@ Status AssetManagerFileSystem::NewReadOnlyMemoryRegionFromFile( return Status::OK(); } -Status AssetManagerFileSystem::GetChildren( - const string& prefixed_dir, - std::vector* r /*, TransactionToken* token */) { +Status AssetManagerFileSystem::GetChildren(const string& prefixed_dir, + TransactionToken* token, + std::vector* r) { std::string path = NormalizeDirectoryPath(prefixed_dir); auto dir = ScopedAssetDir(AAssetManager_openDir(asset_manager_, path.c_str())); @@ -203,8 +203,8 @@ Status AssetManagerFileSystem::GetChildren( return Status::OK(); } -Status AssetManagerFileSystem::GetFileSize( - const string& fname, uint64* s /*, TransactionToken* token */) { +Status AssetManagerFileSystem::GetFileSize(const string& fname, + TransactionToken* token, uint64* s) { // If fname corresponds to a directory, return early. It doesn't map to an // AAsset, and would otherwise return NotFound. if (DirectoryExists(fname)) { @@ -221,8 +221,9 @@ Status AssetManagerFileSystem::GetFileSize( return Status::OK(); } -Status AssetManagerFileSystem::Stat( - const string& fname, FileStatistics* stat /*, TransactionToken* token */) { +Status AssetManagerFileSystem::Stat(const string& fname, + TransactionToken* token, + FileStatistics* stat) { uint64 size; stat->is_directory = DirectoryExists(fname); TF_RETURN_IF_ERROR(GetFileSize(fname, &size)); @@ -240,8 +241,8 @@ string AssetManagerFileSystem::RemoveAssetPrefix(const string& name) { return string(piece); } -bool AssetManagerFileSystem::DirectoryExists( - const std::string& fname /*, TransactionToken* token */) { +bool AssetManagerFileSystem::DirectoryExists(const std::string& fname, + TransactionToken* token) { std::string path = NormalizeDirectoryPath(fname); auto dir = ScopedAssetDir(AAssetManager_openDir(asset_manager_, path.c_str())); @@ -250,36 +251,36 @@ bool AssetManagerFileSystem::DirectoryExists( return AAssetDir_getNextFileName(dir.get()) != NULL; } -Status AssetManagerFileSystem::GetMatchingPaths( - const string& pattern, - std::vector* results /*, TransactionToken* token */) { +Status AssetManagerFileSystem::GetMatchingPaths(const string& pattern, + TransactionToken* token, + std::vector* results) { return internal::GetMatchingPaths(this, Env::Default(), pattern, results); } Status AssetManagerFileSystem::NewWritableFile( - const string& fname, - std::unique_ptr* result /*, TransactionToken* token */) { + const string& fname, TransactionToken* token, + std::unique_ptr* result) { return errors::Unimplemented("Asset storage is read only."); } Status AssetManagerFileSystem::NewAppendableFile( - const string& fname, - std::unique_ptr* result /*, TransactionToken* token */) { + const string& fname, TransactionToken* token, + std::unique_ptr* result) { return errors::Unimplemented("Asset storage is read only."); } -Status AssetManagerFileSystem::DeleteFile( - const string& f /*, TransactionToken* token */) { +Status AssetManagerFileSystem::DeleteFile(const string& f, + TransactionToken* token) { return errors::Unimplemented("Asset storage is read only."); } -Status AssetManagerFileSystem::CreateDir( - const string& d /*, TransactionToken* token */) { +Status AssetManagerFileSystem::CreateDir(const string& d, + TransactionToken* token) { return errors::Unimplemented("Asset storage is read only."); } -Status AssetManagerFileSystem::DeleteDir( - const string& d /*, TransactionToken* token */) { +Status AssetManagerFileSystem::DeleteDir(const string& d, + TransactionToken* token) { return errors::Unimplemented("Asset storage is read only."); } -Status AssetManagerFileSystem::RenameFile( - const string& s, const string& t /*, TransactionToken* token */) { +Status AssetManagerFileSystem::RenameFile(const string& s, const string& t, + TransactionToken* token) { return errors::Unimplemented("Asset storage is read only."); } diff --git a/tensorflow/tools/android/inference_interface/asset_manager_filesystem.h b/tensorflow/tools/android/inference_interface/asset_manager_filesystem.h index 329e55d6cc7..893d5ccb90a 100644 --- a/tensorflow/tools/android/inference_interface/asset_manager_filesystem.h +++ b/tensorflow/tools/android/inference_interface/asset_manager_filesystem.h @@ -42,52 +42,38 @@ class AssetManagerFileSystem : public FileSystem { AssetManagerFileSystem(AAssetManager* asset_manager, const string& prefix); ~AssetManagerFileSystem() override = default; - Status FileExists( - const string& fname /*, TransactionToken* token = nullptr*/) override; - Status NewRandomAccessFile( - const string& filename, - std::unique_ptr* - result /*, TransactionToken* token = nullptr*/) override; - Status NewReadOnlyMemoryRegionFromFile( - const string& filename, - std::unique_ptr* - result /*, TransactionToken* token = nullptr*/) override; + TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; - Status GetFileSize( - const string& f, - uint64* s /*, TransactionToken* token = nullptr*/) override; + Status FileExists(const string& fname, TransactionToken* token) override; + Status NewRandomAccessFile( + const string& filename, TransactionToken* token, + std::unique_ptr* result) override; + Status NewReadOnlyMemoryRegionFromFile( + const string& filename, TransactionToken* token, + std::unique_ptr* result) override; + + Status GetFileSize(const string& f, TransactionToken* token, + uint64* s) override; // Currently just returns size. - Status Stat( - const string& fname, - FileStatistics* stat /*, TransactionToken* token = nullptr*/) override; - Status GetChildren( - const string& dir, - std::vector* r /*, TransactionToken* token = nullptr*/) override; + Status Stat(const string& fname, TransactionToken* token, + FileStatistics* stat) override; + Status GetChildren(const string& dir, TransactionToken* token, + std::vector* r) override; // All these functions return Unimplemented error. Asset storage is // read only. - Status NewWritableFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr*/) override; - Status NewAppendableFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr*/) override; - Status DeleteFile( - const string& f /*, TransactionToken* token = nullptr*/) override; - Status CreateDir( - const string& d /*, TransactionToken* token = nullptr*/) override; - Status DeleteDir( - const string& d /*, TransactionToken* token = nullptr*/) override; - Status RenameFile( - const string& s, - const string& t /*, TransactionToken* token = nullptr*/) override; + Status NewWritableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override; + Status NewAppendableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override; + Status DeleteFile(const string& f, TransactionToken* token) override; + Status CreateDir(const string& d, TransactionToken* token) override; + Status DeleteDir(const string& d, TransactionToken* token) override; + Status RenameFile(const string& s, const string& t, + TransactionToken* token) override; - Status GetMatchingPaths( - const string& pattern, - std::vector* results /*, TransactionToken* token = nullptr*/) - override; + Status GetMatchingPaths(const string& pattern, TransactionToken* token, + std::vector* results) override; private: string RemoveAssetPrefix(const string& name); From f05d6a01eefa65104fac428f6c23898694849dfa Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Tue, 4 Aug 2020 18:04:38 -0700 Subject: [PATCH 2129/2522] Fix passing StringPiece to proto string setter in dispatcher_state_test. PiperOrigin-RevId: 324930108 Change-Id: If7324887c5a8136cef4598150e97a701c83cfd1c --- tensorflow/core/data/service/dispatcher_state_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/data/service/dispatcher_state_test.cc b/tensorflow/core/data/service/dispatcher_state_test.cc index b5529951efb..78f507ec349 100644 --- a/tensorflow/core/data/service/dispatcher_state_test.cc +++ b/tensorflow/core/data/service/dispatcher_state_test.cc @@ -79,7 +79,7 @@ Status CreateNamedJob(int64 job_id, int64 dataset_id, NamedJobKey named_job_key, } Status CreateTask(int64 task_id, int64 job_id, int64 dataset_id, - StringPiece worker_address, DispatcherState* state) { + const std::string& worker_address, DispatcherState* state) { Update update; CreateTaskUpdate* create_task = update.mutable_create_task(); create_task->set_task_id(task_id); From 754dffaf7812d94233b6dbcba3abd2a10cb118e9 Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Tue, 4 Aug 2020 18:17:22 -0700 Subject: [PATCH 2130/2522] PSv2: Attempt of a workaround for client_test's logging in windows/cpu_py38_full/nightly. PiperOrigin-RevId: 324931764 Change-Id: Id37db85a085c59a3c12382dd4eebeb0e5b288ec1 --- tensorflow/python/distribute/client/BUILD | 1 - tensorflow/python/distribute/client/client_test.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/distribute/client/BUILD b/tensorflow/python/distribute/client/BUILD index d37d855a390..35d8de95276 100644 --- a/tensorflow/python/distribute/client/BUILD +++ b/tensorflow/python/distribute/client/BUILD @@ -49,7 +49,6 @@ tf_py_test( srcs = ["client_test.py"], python_version = "PY3", shard_count = 12, - tags = ["no_windows"], # TODO(b/162751266) deps = [ ":client", "//tensorflow/python:client_testlib", diff --git a/tensorflow/python/distribute/client/client_test.py b/tensorflow/python/distribute/client/client_test.py index 459633aca2b..19deab26f63 100644 --- a/tensorflow/python/distribute/client/client_test.py +++ b/tensorflow/python/distribute/client/client_test.py @@ -80,10 +80,10 @@ class CoordinatedClosureQueueTest(test.TestCase): def get_func(label): def func(): - logging.info('Label: %s, before waiting 3 sec', label) + logging.info('Label: ' + label + ', before waiting 3 sec') # pylint: disable=logging-not-lazy time.sleep(3) processed_count[label] += 1 - logging.info('Label: %s, after waiting 3 sec', label) + logging.info('Label: ' + label + ', after waiting 3 sec') # pylint: disable=logging-not-lazy return func From deefe8cafb146ea7a41e47b85afbdeb886088573 Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Tue, 4 Aug 2020 18:17:40 -0700 Subject: [PATCH 2131/2522] [tf.data service] Only create journal writer when fault_tolerant_mode is enabled. PiperOrigin-RevId: 324931812 Change-Id: Ie0acbf5359d2db642118af886a3c754b3b07a6cd --- tensorflow/core/data/service/dispatcher_impl.cc | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/data/service/dispatcher_impl.cc b/tensorflow/core/data/service/dispatcher_impl.cc index ffeae96c117..9e705d51ea8 100644 --- a/tensorflow/core/data/service/dispatcher_impl.cc +++ b/tensorflow/core/data/service/dispatcher_impl.cc @@ -71,23 +71,21 @@ Status CreateWorkerStub(const std::string& address, const std::string& protocol, DataServiceDispatcherImpl::DataServiceDispatcherImpl( const experimental::DispatcherConfig& config) : config_(config) { - if (!config_.work_dir().empty()) { - journal_writer_ = absl::make_unique( - Env::Default(), JournalDir(config_.work_dir())); - } } Status DataServiceDispatcherImpl::Start() { + mutex_lock l(mu_); if (!config_.fault_tolerant_mode()) { LOG(INFO) << "Running with fault_tolerant_mode=False. The dispatcher will " "not be able to recover its state on restart."; return Status::OK(); } - mutex_lock l(mu_); if (config_.work_dir().empty()) { return errors::InvalidArgument( "fault_tolerant_mode is True, but no work_dir is configured."); } + journal_writer_ = absl::make_unique( + Env::Default(), JournalDir(config_.work_dir())); Update update; bool end_of_journal = false; FileJournalReader reader(Env::Default(), JournalDir(config_.work_dir())); From e95a955af8045240333ea4599de7bd11deae18ae Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Tue, 4 Aug 2020 18:35:44 -0700 Subject: [PATCH 2132/2522] Fixed a bug with Functional model serialization when a layer that produces kwarg args of another layer had already been used to define a different functional model. PiperOrigin-RevId: 324934074 Change-Id: Ibcee3958120e50bc72eb3a3c95410f8e5e1135ea --- tensorflow/python/keras/engine/functional.py | 13 +++- .../python/keras/engine/functional_test.py | 67 ++++++++++++++++--- tensorflow/python/keras/engine/node.py | 32 ++++----- tensorflow/python/keras/models.py | 2 + 4 files changed, 88 insertions(+), 26 deletions(-) diff --git a/tensorflow/python/keras/engine/functional.py b/tensorflow/python/keras/engine/functional.py index 7c1fd4d1c72..42c706a923d 100644 --- a/tensorflow/python/keras/engine/functional.py +++ b/tensorflow/python/keras/engine/functional.py @@ -1129,7 +1129,18 @@ def reconstruct_from_config(config, custom_objects=None, created_layers=None): tensor_index = t[2] layer = layer_map[layer_name] - node = layer._inbound_nodes[get_node_index(layer, node_index)] + new_node_index = get_node_index(layer, node_index) + if new_node_index is None: + # The inbound node may not have been processed yet, + # (This can happen e.g. if it depends on a different set + # of inputs than those that have been processed already). + # raise an IndexError so that the current node puts itself + # back on the unprocessed queue. + # Caution: This may lead to infinite loops for malformed + # network configurations! (or when there is a bug in + # the network config loading code). + raise IndexError + node = layer._inbound_nodes[new_node_index] return nest.flatten(node.outputs)[tensor_index] return t diff --git a/tensorflow/python/keras/engine/functional_test.py b/tensorflow/python/keras/engine/functional_test.py index 1b6d15863e6..dc87098d71f 100644 --- a/tensorflow/python/keras/engine/functional_test.py +++ b/tensorflow/python/keras/engine/functional_test.py @@ -998,8 +998,11 @@ class NetworkConstructionTest(keras_parameterized.TestCase): # Check that second input was correctly added to first. self.assertEqual(history.history['loss'][0], 0.0) - @combinations.generate(combinations.keras_mode_combinations()) - def test_call_kwarg_derived_from_keras_layer(self): + @combinations.generate( + combinations.times( + combinations.keras_mode_combinations(), + combinations.combine(share_already_used_layer=[True, False]))) + def test_call_kwarg_derived_from_keras_layer(self, share_already_used_layer): class MaybeAdd(layers.Layer): @@ -1008,9 +1011,26 @@ class NetworkConstructionTest(keras_parameterized.TestCase): return x1 + x2 return x1 + class IdentityLayer(layers.Layer): + + def call(self, x): + return x + input1 = input_layer_lib.Input(10) input2 = input_layer_lib.Input(10) - outputs = MaybeAdd()(input1, x2=input2) + identity_layer = IdentityLayer() + + if share_already_used_layer: + # We have had model serialization/deserialization break in the past: + # when a layer was previously used to construct other functional models + # and had a non-empty list of inbound nodes before being used to define + # the model being serialized/deserialized. + # (The serialization/deserialization was not correctly adjusting + # the node_index serialization/deserialization). + # So, we explicitly test this case. + training_lib.Model([input1], identity_layer(input1)) + + outputs = MaybeAdd()(input1, x2=identity_layer(input2)) model = training_lib.Model([input1, input2], outputs) model.compile( 'sgd', @@ -1024,7 +1044,11 @@ class NetworkConstructionTest(keras_parameterized.TestCase): self.assertEqual(history.history['loss'][0], 0.0) model = training_lib.Model.from_config( - model.get_config(), custom_objects={'MaybeAdd': MaybeAdd}) + model.get_config(), + custom_objects={ + 'MaybeAdd': MaybeAdd, + 'IdentityLayer': IdentityLayer + }) model.compile( 'sgd', 'mse', @@ -1107,10 +1131,18 @@ class NetworkConstructionTest(keras_parameterized.TestCase): TypeError, 'Layer double was passed non-JSON-serializable arguments.'): model.get_config() - @combinations.generate(combinations.times( - combinations.keras_mode_combinations(), - combinations.keras_tensor_combinations())) - def test_call_kwarg_derived_from_keras_layer_and_first_arg_is_constant(self): + @combinations.generate( + combinations.times( + combinations.keras_mode_combinations(), + combinations.keras_tensor_combinations(), + combinations.combine(share_already_used_layer=[True, False]))) + def test_call_kwarg_derived_from_keras_layer_and_first_arg_is_constant( + self, share_already_used_layer): + + class IdentityLayer(layers.Layer): + + def call(self, x): + return x class MaybeAdd(layers.Layer): @@ -1120,7 +1152,18 @@ class NetworkConstructionTest(keras_parameterized.TestCase): return x1 input2 = input_layer_lib.Input(10) - outputs = MaybeAdd()(3., x2=input2) + identity_layer = IdentityLayer() + if share_already_used_layer: + # We have had model serialization/deserialization break in the past: + # when a layer was previously used to construct other functional models + # and had a non-empty list of inbound nodes before being used to define + # the model being serialized/deserialized. + # (The serialization/deserialization was not correctly adjusting + # the node_index serialization/deserialization). + # So, we explicitly test this case. + training_lib.Model([input2], identity_layer(input2)) + + outputs = MaybeAdd()(3., x2=identity_layer(input2)) model = training_lib.Model([input2], outputs) model.compile( 'sgd', @@ -1134,7 +1177,11 @@ class NetworkConstructionTest(keras_parameterized.TestCase): self.assertEqual(history.history['loss'][0], 0.0) model = training_lib.Model.from_config( - model.get_config(), custom_objects={'MaybeAdd': MaybeAdd}) + model.get_config(), + custom_objects={ + 'MaybeAdd': MaybeAdd, + 'IdentityLayer': IdentityLayer + }) model.compile( 'sgd', 'mse', diff --git a/tensorflow/python/keras/engine/node.py b/tensorflow/python/keras/engine/node.py index eb85bce7e75..2a35477eea2 100644 --- a/tensorflow/python/keras/engine/node.py +++ b/tensorflow/python/keras/engine/node.py @@ -169,6 +169,23 @@ class Node(object): arguments.update(kwargs) kwargs = arguments + def _serialize_keras_tensor(t): + """Serializes a single Tensor passed to `call`.""" + if hasattr(t, '_keras_history'): + kh = t._keras_history + node_index = kh.node_index + node_key = make_node_key(kh.layer.name, node_index) + new_node_index = node_conversion_map.get(node_key, 0) + return [kh.layer.name, new_node_index, kh.tensor_index] + + if isinstance(t, np.ndarray): + return t.tolist() + + if isinstance(t, ops.Tensor): + return backend.get_value(t).tolist() + + return t + kwargs = nest.map_structure(_serialize_keras_tensor, kwargs) try: json.dumps(kwargs, default=json_utils.get_json_type) @@ -273,18 +290,3 @@ class KerasHistory( def is_keras_tensor(obj): return hasattr(obj, '_keras_history') - - -def _serialize_keras_tensor(t): - """Serializes a single Tensor passed to `call`.""" - if hasattr(t, '_keras_history'): - kh = t._keras_history - return [kh.layer.name, kh.node_index, kh.tensor_index] - - if isinstance(t, np.ndarray): - return t.tolist() - - if isinstance(t, ops.Tensor): - return backend.get_value(t).tolist() - - return t diff --git a/tensorflow/python/keras/models.py b/tensorflow/python/keras/models.py index 37a3f01272f..76324621a8b 100644 --- a/tensorflow/python/keras/models.py +++ b/tensorflow/python/keras/models.py @@ -206,6 +206,8 @@ def _clone_functional_model(model, input_tensors=None, layer_fn=_clone_layer): ancillary_layers = [ layer for layer in created_layers.values() if layer not in model.layers ] + # TODO(b/162887610): This may need to adjust the inbound node index if the + # created layers had already been used to define other models. if ancillary_layers: new_nodes = nest.flatten([ layer.inbound_nodes[1:] From 6388aa43d7dc1bc1d87887c680f153caab30268f Mon Sep 17 00:00:00 2001 From: Yujing Zhang Date: Tue, 4 Aug 2020 19:09:39 -0700 Subject: [PATCH 2133/2522] Change the function output type, either a Tensor for a local output or a TensorShape for a remote output, preparing for the support of function outputs placed on remote workers. PiperOrigin-RevId: 324938354 Change-Id: I126822bd75bb284c917af7a72f2868601e798f09 --- .../common_runtime/eager/kernel_and_device.cc | 18 +++++- .../process_function_library_runtime.cc | 61 ++++++++++++++++--- .../process_function_library_runtime.h | 7 ++- .../process_function_library_runtime_test.cc | 25 ++++---- .../cluster_function_library_runtime.cc | 14 ++++- .../cluster_function_library_runtime.h | 2 +- .../eager/cluster_function_library_runtime.cc | 30 ++++++++- .../eager/cluster_function_library_runtime.h | 7 ++- .../eager/eager_service_impl_test.cc | 6 +- tensorflow/core/framework/function.h | 16 +++-- 10 files changed, 144 insertions(+), 42 deletions(-) diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.cc b/tensorflow/core/common_runtime/eager/kernel_and_device.cc index 1f506c318bc..5b7232f539a 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device.cc +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc @@ -395,13 +395,25 @@ void KernelAndDeviceFunc::RunAsync( }, profiler::ContextType::kTfExecutor, opts->step_id, profiler::TraceMeLevel::kInfo); - pflr_->Run(*opts, handle_, inputs, outputs, - [opts, rendezvous, local_cm, step_container, this, - done = std::move(done)](const Status& s) { + std::vector* function_rets = new std::vector; + pflr_->Run(*opts, handle_, inputs, function_rets, + [opts, outputs, function_rets, rendezvous, local_cm, + step_container, this, done = std::move(done)](const Status& s) { rendezvous->Unref(); if (step_container == nullptr) { this->step_container_.CleanUp(); } + if (s.ok()) { + // TODO(b/162618595): Change the type of `outputs` to + // support TensorShapes for remote outputs and remove the + // FunctionRet to Tensor conversion here. + for (const auto& ret : *function_rets) { + if (ret.index() == 0) { + outputs->push_back(absl::get(ret)); + } + } + } + delete function_rets; done(s); }); } diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.cc b/tensorflow/core/common_runtime/process_function_library_runtime.cc index aee482d92da..b31b2b78bf0 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime.cc +++ b/tensorflow/core/common_runtime/process_function_library_runtime.cc @@ -398,6 +398,21 @@ std::vector GetLocalArgs(gtl::ArraySlice args) { return tensors; } +// Update the done callback to push Tensors in `tensors` into `rets`. +FunctionLibraryRuntime::DoneCallback TensorsToFunctionRetsDoneCallback( + std::vector* rets, std::vector* tensors, + FunctionLibraryRuntime::DoneCallback done) { + return [rets, tensors, done = std::move(done)](const Status& s) { + if (s.ok()) { + for (const auto& t : *tensors) { + rets->push_back(t); + } + } + delete tensors; + done(s); + }; +} + } // anonymous namespace Status ProcessFunctionLibraryRuntime::PinArgsAndRets( @@ -1021,7 +1036,7 @@ Status ProcessFunctionLibraryRuntime::GetOutputDevices( void ProcessFunctionLibraryRuntime::RunMultiDevice( const FunctionLibraryRuntime::Options& opts, - FunctionLibraryRuntime::Handle handle, std::vector* rets, + FunctionLibraryRuntime::Handle handle, std::vector* rets, std::vector>* cleanup_items, FunctionLibraryRuntime::DoneCallback done, std::functionStartCancel(); continue; } - std::vector* comp_rets = new std::vector; + std::vector* comp_rets = new std::vector; rets->resize(data->num_outputs_); auto component_fn_callback = [comp_rets, rets, comp_data, refcounted_done, @@ -1136,8 +1151,11 @@ void ProcessFunctionLibraryRuntime::RunMultiDevice( << " with handle " << handle; VLOG(4) << " with " << opts_copy.DebugString(); - flr->Run(opts_copy, handle, GetLocalArgs(comp_args.args), comp_rets, - std::move(component_fn_callback)); + std::vector* comp_tensor_rets = new std::vector; + flr->Run( + opts_copy, handle, GetLocalArgs(comp_args.args), comp_tensor_rets, + TensorsToFunctionRetsDoneCallback(comp_rets, comp_tensor_rets, + std::move(component_fn_callback))); } else { opts_copy.remote_execution = true; @@ -1362,6 +1380,23 @@ void ProcessFunctionLibraryRuntime::Run( auto* cleanup_items = new std::vector>; done = ApplyCleanUpToDoneCallback(cleanup_items, std::move(done), new_opts.step_id, created_rendezvous); + std::vector* function_rets = new std::vector; + done = [rets, function_rets, done = std::move(done)](const Status& s) { + Status status = s; + if (status.ok()) { + for (const auto& ret : *function_rets) { + if (ret.index() == 0) { + rets->push_back(absl::get(ret)); + } else { + status.Update(errors::Internal( + "Expect a Tensor as a function output but got a TensorShape.")); + break; + } + } + } + delete function_rets; + done(status); + }; bool multi_device; { tf_shared_lock l(mu_); @@ -1392,21 +1427,21 @@ void ProcessFunctionLibraryRuntime::Run( } return Status::OK(); }; - return RunMultiDevice(new_opts, handle, rets, cleanup_items, + return RunMultiDevice(new_opts, handle, function_rets, cleanup_items, std::move(done), std::move(get_component_args)); } std::vector local_args; for (const auto& tensor : args) { local_args.push_back(tensor); } - RunInternal(new_opts, handle, local_args, rets, cleanup_items, + RunInternal(new_opts, handle, local_args, function_rets, cleanup_items, std::move(done)); } void ProcessFunctionLibraryRuntime::RunInternal( const FunctionLibraryRuntime::Options& opts, FunctionLibraryRuntime::Handle handle, gtl::ArraySlice args, - std::vector* rets, + std::vector* rets, std::vector>* cleanup_items, FunctionLibraryRuntime::DoneCallback done) const { FunctionLibraryRuntime* flr = nullptr; @@ -1475,10 +1510,13 @@ void ProcessFunctionLibraryRuntime::RunInternal( int64 num_returns = remote_rets->size(); delete remote_rets; // Now receive the return values from the target. + std::vector* recv_tensors = new std::vector; ReceiveTensorsAsync(target_device, source_device, "ret_", target_incarnation, num_returns, device_context, rets_alloc_attrs, rendezvous, - rets, std::move(done)); + recv_tensors, + TensorsToFunctionRetsDoneCallback( + rets, recv_tensors, std::move(done))); }); return; } @@ -1570,11 +1608,14 @@ Status ProcessFunctionLibraryRuntime::RunSync( void ProcessFunctionLibraryRuntime::Run( const FunctionLibraryRuntime::Options& opts, FunctionLibraryRuntime::Handle handle, const FunctionArgsInterface& args, - std::vector* rets, + std::vector* rets, FunctionLibraryRuntime::DoneCallback done) const { if (!args.HasRemoteOrPackedInputs()) { const std::vector local_inputs = args.GetLocalTensors(); - return Run(opts, handle, local_inputs, rets, std::move(done)); + std::vector* tensor_rets = new std::vector; + return Run( + opts, handle, local_inputs, tensor_rets, + TensorsToFunctionRetsDoneCallback(rets, tensor_rets, std::move(done))); } FunctionLibraryRuntime::Options new_opts = opts; diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.h b/tensorflow/core/common_runtime/process_function_library_runtime.h index 0bd85c62df5..3ba04f17880 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime.h +++ b/tensorflow/core/common_runtime/process_function_library_runtime.h @@ -191,7 +191,7 @@ class ProcessFunctionLibraryRuntime { void Run(const FunctionLibraryRuntime::Options& opts, FunctionLibraryRuntime::Handle handle, - const FunctionArgsInterface& args, std::vector* rets, + const FunctionArgsInterface& args, std::vector* rets, FunctionLibraryRuntime::DoneCallback done) const; Status RunSync(const FunctionLibraryRuntime::Options& opts, @@ -304,7 +304,7 @@ class ProcessFunctionLibraryRuntime { void RunMultiDevice( const FunctionLibraryRuntime::Options& opts, - FunctionLibraryRuntime::Handle handle, std::vector* rets, + FunctionLibraryRuntime::Handle handle, std::vector* rets, std::vector>* cleanup_items, FunctionLibraryRuntime::DoneCallback done, std::function args, std::vector* rets, + gtl::ArraySlice args, + std::vector* rets, std::vector>* cleanup_items, FunctionLibraryRuntime::DoneCallback done) const; diff --git a/tensorflow/core/common_runtime/process_function_library_runtime_test.cc b/tensorflow/core/common_runtime/process_function_library_runtime_test.cc index be279c84d1a..54c821d282a 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime_test.cc +++ b/tensorflow/core/common_runtime/process_function_library_runtime_test.cc @@ -72,7 +72,7 @@ class TestClusterFLR : public DistributedFunctionLibraryRuntime { void Run(const FunctionLibraryRuntime::Options& opts, FunctionLibraryRuntime::LocalHandle handle, - gtl::ArraySlice args, std::vector* rets, + gtl::ArraySlice args, std::vector* rets, FunctionLibraryRuntime::DoneCallback done) override {} void CleanUp(uint64 step_id, FunctionLibraryRuntime::LocalHandle handle, @@ -209,12 +209,12 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test { #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM } - template + template Status RunWithRuntime( const string& name, FunctionLibraryRuntime::Options opts, test::function::Attrs attrs, const FunctionLibraryRuntime::InstantiateOptions& instantiate_opts, - const T& args, std::vector rets, + const T& args, std::vector rets, ProcessFunctionLibraryRuntime* pflr) { FunctionLibraryRuntime::Handle handle; Status status = pflr->Instantiate(name, attrs, instantiate_opts, &handle); @@ -234,7 +234,7 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test { Notification done; opts.runner = &runner; - std::vector out; + std::vector out; pflr->Run(opts, handle, args, &out, [&status, &done](const Status& s) { status = s; done.Notify(); @@ -273,7 +273,7 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test { const FunctionLibraryRuntime::InstantiateOptions& instantiate_opts, const std::vector& args, std::vector rets, ProcessFunctionLibraryRuntime* pflr = nullptr) { - return RunWithRuntime>( + return RunWithRuntime, Tensor>( name, opts, attrs, instantiate_opts, args, rets, proc_flr_.get()); } @@ -281,9 +281,9 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test { const string& name, FunctionLibraryRuntime::Options opts, test::function::Attrs attrs, const FunctionLibraryRuntime::InstantiateOptions& instantiate_opts, - const FunctionArgsInterface& args, std::vector rets, + const FunctionArgsInterface& args, std::vector rets, ProcessFunctionLibraryRuntime* pflr = nullptr) { - return RunWithRuntime( + return RunWithRuntime( name, opts, attrs, instantiate_opts, args, rets, proc_flr_.get()); } @@ -879,10 +879,12 @@ TEST_F(ProcessFunctionLibraryRuntimeTest, MultiDevice_CompositeDevice) { handles.push_back(TensorValue(&resource_handle0)); handles.push_back(TensorValue(&resource_handle1)); TestFunctionPackedArgs args(0, std::move(handles)); - Tensor ret; + FunctionRet ret; TF_CHECK_OK(RunWithPackedArgs("AddVarAcrossDevices", opts, {{"T", DT_FLOAT}}, inst_opts, args, {&ret})); - test::ExpectTensorEqual(ret, test::AsTensor({40, 60})); + EXPECT_EQ(ret.index(), 0); + test::ExpectTensorEqual(absl::get(ret), + test::AsTensor({40, 60})); } // Packed Tensor @@ -1226,9 +1228,10 @@ TEST_F(ProcessFunctionLibraryRuntimeTest, SessionMetadataPresentAfterCloning) { instantiate_opts.target = "/job:a/replica:0/task:0/cpu:0"; const auto x = test::AsTensor({17}); Tensor y; - TF_CHECK_OK(RunWithRuntime>( + Status s = RunWithRuntime, Tensor>( "SessionMetadataReaderFn", opts, {}, instantiate_opts, {x}, {&y}, - cloned_proc_flr.get())); + cloned_proc_flr.get()); + TF_CHECK_OK(s); SessionMetadata read_metadata; ASSERT_TRUE(protobuf::TextFormat::ParseFromString(y.scalar()(), &read_metadata)); diff --git a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc index 7ddba8811b4..3f7867200f8 100644 --- a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc +++ b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc @@ -333,7 +333,7 @@ void ClusterFunctionLibraryRuntime::Run( void ClusterFunctionLibraryRuntime::Run( const FunctionLibraryRuntime::Options& opts, FunctionLibraryRuntime::LocalHandle handle, - gtl::ArraySlice args, std::vector* rets, + gtl::ArraySlice args, std::vector* rets, FunctionLibraryRuntime::DoneCallback done) { std::vector tensors; for (const auto& arg : args) { @@ -346,7 +346,17 @@ void ClusterFunctionLibraryRuntime::Run( return; } } - return Run(opts, handle, tensors, rets, std::move(done)); + std::vector* ret_tensors = new std::vector; + return Run(opts, handle, tensors, ret_tensors, + [rets, ret_tensors, done = std::move(done)](const Status& s) { + if (s.ok()) { + for (const auto& t : *ret_tensors) { + rets->push_back(t); + } + } + delete ret_tensors; + done(s); + }); } void ClusterFunctionLibraryRuntime::CleanUp( diff --git a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.h b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.h index b720fe7ad6d..eb9ce64bcdb 100644 --- a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.h +++ b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.h @@ -49,7 +49,7 @@ class ClusterFunctionLibraryRuntime : public DistributedFunctionLibraryRuntime { void Run(const FunctionLibraryRuntime::Options& opts, FunctionLibraryRuntime::LocalHandle handle, - gtl::ArraySlice args, std::vector* rets, + gtl::ArraySlice args, std::vector* rets, FunctionLibraryRuntime::DoneCallback done) override; void CleanUp(uint64 step_id, FunctionLibraryRuntime::LocalHandle handle, diff --git a/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.cc b/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.cc index d8613e5f9b9..03944e12590 100644 --- a/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.cc +++ b/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.cc @@ -118,13 +118,31 @@ void EagerClusterFunctionLibraryRuntime::Run( for (const auto& tensor : args) { function_args.push_back(tensor); } - Run(opts, handle, function_args, rets, std::move(done)); + std::vector* function_rets = new std::vector; + Run(opts, handle, function_args, function_rets, + [rets, function_rets, done = std::move(done)](const Status& s) { + Status status = s; + if (status.ok()) { + for (const auto& t : *function_rets) { + if (t.index() == 0) { + rets->push_back(absl::get(t)); + } else { + status.Update( + errors::Internal("Expect a Tensor as a remote function " + "output but got a TensorShape.")); + break; + } + } + } + delete function_rets; + done(status); + }); } void EagerClusterFunctionLibraryRuntime::Run( const FunctionLibraryRuntime::Options& opts, FunctionLibraryRuntime::LocalHandle handle, - gtl::ArraySlice args, std::vector* rets, + gtl::ArraySlice args, std::vector* rets, FunctionLibraryRuntime::DoneCallback done) { FunctionData* function_data = nullptr; { @@ -204,6 +222,14 @@ void EagerClusterFunctionLibraryRuntime::Run( done(s); return; } + if (!response->shape().empty() && !response->tensor().empty()) { + done(errors::Internal( + "Both shape and tensor are specified in the same response")); + return; + } + for (const auto& shape : response->shape()) { + rets->push_back(shape); + } for (const auto& tensor_proto : response->tensor()) { Tensor t; if (t.FromProto(tensor_proto)) { diff --git a/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.h b/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.h index 9df9d1aecc1..6e60ee0b13d 100644 --- a/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.h +++ b/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.h @@ -64,11 +64,12 @@ class EagerClusterFunctionLibraryRuntime gtl::ArraySlice args, std::vector* rets, FunctionLibraryRuntime::DoneCallback done) override; - // The component function inputs `args` can be RemoteTensorHandles, which will - // be lazily resolved remotely where the inputs are actually consumed. + // The component function inputs `args` and outputs `rets` may refer to remote + // tensors on a remote device, which will be lazily resolved remotely where + // the inputs/outputs are actually consumed. void Run(const FunctionLibraryRuntime::Options& opts, FunctionLibraryRuntime::LocalHandle handle, - gtl::ArraySlice args, std::vector* rets, + gtl::ArraySlice args, std::vector* rets, FunctionLibraryRuntime::DoneCallback done) override; void CleanUp(uint64 step_id, FunctionLibraryRuntime::LocalHandle handle, diff --git a/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc b/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc index a2412eb9625..be81355cbc8 100644 --- a/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc +++ b/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc @@ -830,7 +830,7 @@ TEST_F(FunctionWithRemoteInputsTest, EagerPFLRTest) { input.set_op_device(local_device_); input.set_device(local_device_); std::vector inputs = {input}; - std::vector outputs; + std::vector outputs; gtl::InlinedVector tensor_args = {TensorValue()}; TestExecuteNodeArgs args( std::move(tensor_args), @@ -845,6 +845,10 @@ TEST_F(FunctionWithRemoteInputsTest, EagerPFLRTest) { }); done.WaitForNotification(); TF_ASSERT_OK(status); + EXPECT_EQ(outputs.size(), 1); + EXPECT_EQ(outputs.at(0).index(), 1); + const TensorShape& shape = absl::get(outputs.at(0)); + EXPECT_EQ(shape, TensorShape({2, 2})); CheckOutputsAndClose(op_id); } diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h index 95f733d23a6..c7e6e2d158c 100644 --- a/tensorflow/core/framework/function.h +++ b/tensorflow/core/framework/function.h @@ -901,6 +901,9 @@ typedef FunctionArg; #endif +// Either a local tensor or the shape of a remote tensor. +typedef absl::variant FunctionRet; + // Used to instantiate and run functions in a distributed system. class DistributedFunctionLibraryRuntime { public: @@ -929,14 +932,15 @@ class DistributedFunctionLibraryRuntime { // Run an instantiated remote function (specified by `handle`) with a list of // input Tensors or RemoteTensorHandles as `args` and get its output Tensors - // in `rets`. When using RemoteTensorHandles as function inputs, the - // corresponding tensor data will be resolved on the remote worker, so it is - // not required to be locally available on the caller side. Using - // RemoteTensorHandle inputs is not supported in TensorFlow v1 runtime. - // TODO(yujingzhang): Support outputting tensors on remote devices. + // or TensorShapes in `rets`. When using RemoteTensorHandles as function + // inputs or TensorShapes as outputs, the corresponding tensor data will be + // resolved on the remote worker, so it is not required to be locally + // available on the caller side. Using RemoteTensorHandle inputs is not + // supported in TensorFlow v1 runtime. virtual void Run(const FunctionLibraryRuntime::Options& opts, FunctionLibraryRuntime::LocalHandle handle, - gtl::ArraySlice args, std::vector* rets, + gtl::ArraySlice args, + std::vector* rets, FunctionLibraryRuntime::DoneCallback done) = 0; // Clean up a previously instantiated function on remote worker. From eaa5235e003799784dcea14c528b570ee2634a55 Mon Sep 17 00:00:00 2001 From: Yuefeng Zhou Date: Tue, 4 Aug 2020 19:12:35 -0700 Subject: [PATCH 2134/2522] Improve docstring of strategy.run. PiperOrigin-RevId: 324938654 Change-Id: I3e13d90c026fad42657bb8094ccb32dc86e36b4b --- .../python/distribute/distribute_lib.py | 62 ++++++++++++++----- 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/tensorflow/python/distribute/distribute_lib.py b/tensorflow/python/distribute/distribute_lib.py index 43af23ab096..522849ac951 100644 --- a/tensorflow/python/distribute/distribute_lib.py +++ b/tensorflow/python/distribute/distribute_lib.py @@ -1146,10 +1146,11 @@ class StrategyBase(object): dataset_fn, options) def run(self, fn, args=(), kwargs=None, options=None): - """Run `fn` on each replica, with the given arguments. + """Invokes `fn` on each replica, with the given arguments. - Executes ops specified by `fn` on each replica. If `args` or `kwargs` have - `tf.distribute.DistributedValues`, such as those produced by a + This method is the primary way to distribute your computation with a + tf.distribute object. It invokes `fn` on each replica. If `args` or `kwargs` + have `tf.distribute.DistributedValues`, such as those produced by a `tf.distribute.DistributedDataset` from `tf.distribute.Strategy.experimental_distribute_dataset` or `tf.distribute.Strategy.experimental_distribute_datasets_from_function`, @@ -1157,20 +1158,27 @@ class StrategyBase(object): component of `tf.distribute.DistributedValues` that correspond to that replica. - `fn` may call `tf.distribute.get_replica_context()` to access members such - as `all_reduce`. + `fn` is invoked under a replica context. `fn` may call + `tf.distribute.get_replica_context()` to access members such as + `all_reduce`. Please see the module-level docstring of tf.distribute for the + concept of replica context. - All arguments in `args` or `kwargs` should either be nest of tensors or - `tf.distribute.DistributedValues` containing tensors or composite tensors. + All arguments in `args` or `kwargs` should either be Python values of a + nested structure of tensors, e.g. a list of tensors, in which case `args` + and `kwargs` will be passed to the `fn` invoked on each replica. Or `args` + or `kwargs` can be `tf.distribute.DistributedValues` containing tensors or + composite tensors, i.e. `tf.compat.v1.TensorInfo.CompositeTensor`, in which + case each `fn` call will get the component of a + `tf.distribute.DistributedValues` corresponding to its replica. IMPORTANT: Depending on the implementation of `tf.distribute.Strategy` and whether eager execution is enabled, `fn` may be called one or more times. If `fn` is annotated with `tf.function` or `tf.distribute.Strategy.run` is - called inside a `tf.function`, eager execution is disabled and `fn` is - called once (or once per replica, if you are using MirroredStrategy) to - generate a Tensorflow graph, which will then be reused for execution with - new inputs. Otherwise, if eager execution is enabled, `fn` will be called - every step just like regular python code. + called inside a `tf.function` (eager execution is disabled inside a + `tf.function` by default), `fn` is called once per replica to generate a + Tensorflow graph, which will then be reused for execution with new inputs. + Otherwise, if eager execution is enabled, `fn` will be called once per + replica every step just like regular python code. Example usage: @@ -1205,11 +1213,33 @@ class StrategyBase(object): >>> result + 3. Use `tf.distribute.ReplicaContext` to allreduce values. + + >>> strategy = tf.distribute.MirroredStrategy(["gpu:0", "gpu:1"]) + >>> @tf.function + ... def run(): + ... def value_fn(value_context): + ... return tf.constant(value_context.replica_id_in_sync_group) + ... distributed_values = ( + ... strategy.experimental_distribute_values_from_function( + ... value_fn)) + ... def replica_fn(input): + ... return tf.distribute.get_replica_context().all_reduce("sum", input) + ... return strategy.run(replica_fn, args=(distributed_values,)) + >>> result = run() + >>> result + PerReplica:{ + 0: , + 1: + } + Args: - fn: The function to run. The output must be a `tf.nest` of `Tensor`s. - args: (Optional) Positional arguments to `fn`. - kwargs: (Optional) Keyword arguments to `fn`. - options: (Optional) An instance of `tf.distribute.RunOptions` specifying + fn: The function to run on each replica. + args: Optional positional arguments to `fn`. Its element can be a Python + value, a tensor or a `tf.distribute.DistributedValues`. + kwargs: Optional keyword arguments to `fn`. Its element can be a Python + value, a tensor or a `tf.distribute.DistributedValues`. + options: An optional instance of `tf.distribute.RunOptions` specifying the options to run `fn`. Returns: From 77ee5e02721ba797fe01d47019e6017d2bb09ab7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 4 Aug 2020 19:20:06 -0700 Subject: [PATCH 2135/2522] Rename LoadLibrary to avoid conflict with Windows macros PiperOrigin-RevId: 324939413 Change-Id: I2ad9f90c302f56ba4dfe847da44f9cd104457fbd --- tensorflow/c/c_api.cc | 7 +++---- tensorflow/c/env.cc | 4 ++-- .../c/experimental/filesystem/modular_filesystem.cc | 2 +- .../c/experimental/filesystem/modular_filesystem_test.cc | 1 - tensorflow/core/framework/load_library.cc | 6 +++--- tensorflow/core/framework/op_kernel.cc | 3 ++- tensorflow/core/platform/default/env.cc | 5 +++-- tensorflow/core/platform/default/load_library.cc | 2 +- tensorflow/core/platform/env.h | 8 +++++--- tensorflow/core/platform/hadoop/hadoop_file_system.cc | 2 +- tensorflow/core/platform/load_library.h | 2 +- tensorflow/core/platform/windows/env.cc | 6 +++--- tensorflow/core/platform/windows/load_library.cc | 3 +-- tensorflow/stream_executor/platform/default/dso_loader.cc | 2 +- third_party/eigen3/unsupported/Eigen/CXX11/Tensor | 1 - 15 files changed, 27 insertions(+), 27 deletions(-) diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc index 36a08c8cfc9..2e1759ecea0 100644 --- a/tensorflow/c/c_api.cc +++ b/tensorflow/c/c_api.cc @@ -213,7 +213,6 @@ void TF_Reset(const TF_SessionOptions* opt, const char** containers, namespace tensorflow { - Status MessageToBuffer(const tensorflow::protobuf::MessageLite& in, TF_Buffer* out) { if (out->data != nullptr) { @@ -306,8 +305,8 @@ void TF_GraphSetOutputHandleShapesAndTypes(TF_Graph* graph, TF_Output output, } // Helpers for loading a TensorFlow plugin (a .so file). -Status LoadLibrary(const char* library_filename, void** result, - const void** buf, size_t* len); +Status LoadDynamicLibrary(const char* library_filename, void** result, + const void** buf, size_t* len); // TODO(josh11b,mrry): Change Session to be able to use a Graph* // directly, instead of requiring us to serialize to a GraphDef and @@ -552,7 +551,7 @@ void TF_PRun(TF_DeprecatedSession* s, const char* handle, TF_Library* TF_LoadLibrary(const char* library_filename, TF_Status* status) { TF_Library* lib_handle = new TF_Library; - status->status = tensorflow::LoadLibrary( + status->status = tensorflow::LoadDynamicLibrary( library_filename, &lib_handle->lib_handle, &lib_handle->op_list.data, &lib_handle->op_list.length); if (!status->status.ok()) { diff --git a/tensorflow/c/env.cc b/tensorflow/c/env.cc index e731c0659a7..fbde13dea5a 100644 --- a/tensorflow/c/env.cc +++ b/tensorflow/c/env.cc @@ -191,8 +191,8 @@ void* TF_LoadSharedLibrary(const char* library_filename, TF_Status* status) { void* handle = nullptr; TF_SetStatus(status, TF_OK, ""); ::tensorflow::Set_TF_Status_from_Status( - status, - ::tensorflow::Env::Default()->LoadLibrary(library_filename, &handle)); + status, ::tensorflow::Env::Default()->LoadDynamicLibrary(library_filename, + &handle)); return handle; } diff --git a/tensorflow/c/experimental/filesystem/modular_filesystem.cc b/tensorflow/c/experimental/filesystem/modular_filesystem.cc index 40258e43801..00a587521fd 100644 --- a/tensorflow/c/experimental/filesystem/modular_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/modular_filesystem.cc @@ -462,7 +462,7 @@ Status RegisterFilesystemPlugin(const std::string& dso_path) { // Step 1: Load plugin Env* env = Env::Default(); void* dso_handle; - TF_RETURN_IF_ERROR(env->LoadLibrary(dso_path.c_str(), &dso_handle)); + TF_RETURN_IF_ERROR(env->LoadDynamicLibrary(dso_path.c_str(), &dso_handle)); // Step 2: Load symbol for `TF_InitPlugin` void* dso_symbol; diff --git a/tensorflow/c/experimental/filesystem/modular_filesystem_test.cc b/tensorflow/c/experimental/filesystem/modular_filesystem_test.cc index 8ee47da01dd..7e0a95cc915 100644 --- a/tensorflow/c/experimental/filesystem/modular_filesystem_test.cc +++ b/tensorflow/c/experimental/filesystem/modular_filesystem_test.cc @@ -33,7 +33,6 @@ limitations under the License. // Windows defines the following macros to convert foo to fooA or fooW, // depending on the type of the string argument. We don't use these macros, so // undefine them here. -#undef LoadLibrary #undef CopyFile #undef DeleteFile #undef TranslateName diff --git a/tensorflow/core/framework/load_library.cc b/tensorflow/core/framework/load_library.cc index b9e33b148f7..34cd4b3386b 100644 --- a/tensorflow/core/framework/load_library.cc +++ b/tensorflow/core/framework/load_library.cc @@ -43,8 +43,8 @@ struct Library { // and OpList. Ops and kernels are registered as globals when a library is // loaded for the first time. Without caching, every subsequent load would not // perform initialization again, so the OpList would be empty. -Status LoadLibrary(const char* library_filename, void** result, - const void** buf, size_t* len) { +Status LoadDynamicLibrary(const char* library_filename, void** result, + const void** buf, size_t* len) { static mutex mu(LINKER_INITIALIZED); static std::unordered_map loaded_libs; Env* env = Env::Default(); @@ -76,7 +76,7 @@ Status LoadLibrary(const char* library_filename, void** result, return s; })); OpRegistry::Global()->DeferRegistrations(); - s = env->LoadLibrary(library_filename, &library.handle); + s = env->LoadDynamicLibrary(library_filename, &library.handle); if (s.ok()) { s = OpRegistry::Global()->ProcessRegistrations(); } diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc index 1930cc98da1..d9b679534ee 100644 --- a/tensorflow/core/framework/op_kernel.cc +++ b/tensorflow/core/framework/op_kernel.cc @@ -1211,7 +1211,8 @@ void LoadDynamicKernelsInternal() { if (s.ok() || override_abi_check) { // TODO(gunan): Store the handles to the opened files. void* unused_filehandle; - TF_CHECK_OK(env->LoadLibrary(fullpath.c_str(), &unused_filehandle)); + TF_CHECK_OK( + env->LoadDynamicLibrary(fullpath.c_str(), &unused_filehandle)); } else { LOG(WARNING) << "Not loading plugin library " << fullpath << ": " << s.error_message(); diff --git a/tensorflow/core/platform/default/env.cc b/tensorflow/core/platform/default/env.cc index 90e0ee97355..b933fa005a7 100644 --- a/tensorflow/core/platform/default/env.cc +++ b/tensorflow/core/platform/default/env.cc @@ -185,8 +185,9 @@ class PosixEnv : public Env { }); } - Status LoadLibrary(const char* library_filename, void** handle) override { - return tensorflow::internal::LoadLibrary(library_filename, handle); + Status LoadDynamicLibrary(const char* library_filename, + void** handle) override { + return tensorflow::internal::LoadDynamicLibrary(library_filename, handle); } Status GetSymbolFromLibrary(void* handle, const char* symbol_name, diff --git a/tensorflow/core/platform/default/load_library.cc b/tensorflow/core/platform/default/load_library.cc index ef9edcc4501..bbe5824acfa 100644 --- a/tensorflow/core/platform/default/load_library.cc +++ b/tensorflow/core/platform/default/load_library.cc @@ -23,7 +23,7 @@ namespace tensorflow { namespace internal { -Status LoadLibrary(const char* library_filename, void** handle) { +Status LoadDynamicLibrary(const char* library_filename, void** handle) { *handle = dlopen(library_filename, RTLD_NOW | RTLD_LOCAL); if (!*handle) { return errors::NotFound(dlerror()); diff --git a/tensorflow/core/platform/env.h b/tensorflow/core/platform/env.h index 25544e87702..7b716798c28 100644 --- a/tensorflow/core/platform/env.h +++ b/tensorflow/core/platform/env.h @@ -334,7 +334,8 @@ class Env { // OK from the function. // Otherwise returns nullptr in "*handle" and an error status from the // function. - virtual Status LoadLibrary(const char* library_filename, void** handle) = 0; + virtual Status LoadDynamicLibrary(const char* library_filename, + void** handle) = 0; // \brief Get a pointer to a symbol from a dynamic library. // @@ -411,8 +412,9 @@ class EnvWrapper : public Env { void SchedClosureAfter(int64 micros, std::function closure) override { target_->SchedClosureAfter(micros, closure); } - Status LoadLibrary(const char* library_filename, void** handle) override { - return target_->LoadLibrary(library_filename, handle); + Status LoadDynamicLibrary(const char* library_filename, + void** handle) override { + return target_->LoadDynamicLibrary(library_filename, handle); } Status GetSymbolFromLibrary(void* handle, const char* symbol_name, void** symbol) override { diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.cc b/tensorflow/core/platform/hadoop/hadoop_file_system.cc index 5b2c5a76aae..327f506665f 100644 --- a/tensorflow/core/platform/hadoop/hadoop_file_system.cc +++ b/tensorflow/core/platform/hadoop/hadoop_file_system.cc @@ -70,7 +70,7 @@ class LibHDFS { private: void LoadAndBind() { auto TryLoadAndBind = [this](const char* name, void** handle) -> Status { - TF_RETURN_IF_ERROR(Env::Default()->LoadLibrary(name, handle)); + TF_RETURN_IF_ERROR(Env::Default()->LoadDynamicLibrary(name, handle)); #define BIND_HDFS_FUNC(function) \ TF_RETURN_IF_ERROR(BindFunc(*handle, #function, &function)); diff --git a/tensorflow/core/platform/load_library.h b/tensorflow/core/platform/load_library.h index 01efd4c1d01..60e84238487 100644 --- a/tensorflow/core/platform/load_library.h +++ b/tensorflow/core/platform/load_library.h @@ -22,7 +22,7 @@ namespace tensorflow { namespace internal { -Status LoadLibrary(const char* library_filename, void** handle); +Status LoadDynamicLibrary(const char* library_filename, void** handle); Status GetSymbolFromLibrary(void* handle, const char* symbol_name, void** symbol); string FormatLibraryFileName(const string& name, const string& version); diff --git a/tensorflow/core/platform/windows/env.cc b/tensorflow/core/platform/windows/env.cc index d75d2d5773d..ea6d1424529 100644 --- a/tensorflow/core/platform/windows/env.cc +++ b/tensorflow/core/platform/windows/env.cc @@ -22,7 +22,6 @@ limitations under the License. #include #include #include -#undef LoadLibrary #undef ERROR #include @@ -156,8 +155,9 @@ class WindowsEnv : public Env { SetThreadpoolTimer(timer, &FileDueTime, 0, 0); } - Status LoadLibrary(const char* library_filename, void** handle) override { - return tensorflow::internal::LoadLibrary(library_filename, handle); + Status LoadDynamicLibrary(const char* library_filename, + void** handle) override { + return tensorflow::internal::LoadDynamicLibrary(library_filename, handle); } Status GetSymbolFromLibrary(void* handle, const char* symbol_name, diff --git a/tensorflow/core/platform/windows/load_library.cc b/tensorflow/core/platform/windows/load_library.cc index f95e770cc6b..67fdffeca15 100644 --- a/tensorflow/core/platform/windows/load_library.cc +++ b/tensorflow/core/platform/windows/load_library.cc @@ -22,7 +22,6 @@ limitations under the License. #include #include #include -#undef LoadLibrary #undef ERROR #include "tensorflow/core/platform/errors.h" @@ -34,7 +33,7 @@ namespace tensorflow { namespace internal { -Status LoadLibrary(const char* library_filename, void** handle) { +Status LoadDynamicLibrary(const char* library_filename, void** handle) { string file_name = library_filename; std::replace(file_name.begin(), file_name.end(), '/', '\\'); diff --git a/tensorflow/stream_executor/platform/default/dso_loader.cc b/tensorflow/stream_executor/platform/default/dso_loader.cc index 01af4114536..6e0113ab05a 100644 --- a/tensorflow/stream_executor/platform/default/dso_loader.cc +++ b/tensorflow/stream_executor/platform/default/dso_loader.cc @@ -43,7 +43,7 @@ port::StatusOr GetDsoHandle(const string& name, const string& version) { auto filename = port::Env::Default()->FormatLibraryFileName(name, version); void* dso_handle; port::Status status = - port::Env::Default()->LoadLibrary(filename.c_str(), &dso_handle); + port::Env::Default()->LoadDynamicLibrary(filename.c_str(), &dso_handle); if (status.ok()) { LOG(INFO) << "Successfully opened dynamic library " << filename; return dso_handle; diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/Tensor b/third_party/eigen3/unsupported/Eigen/CXX11/Tensor index 861a87b68bf..5bb7ca95db5 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/Tensor +++ b/third_party/eigen3/unsupported/Eigen/CXX11/Tensor @@ -11,5 +11,4 @@ inline void sleep(unsigned int seconds) { Sleep(1000*seconds); } // prevent clashes. #undef DeleteFile #undef ERROR -#undef LoadLibrary #endif // _WIN32 From a778b8f95e5915573e26f8e3d62a4cedaed10a8c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 4 Aug 2020 19:35:56 -0700 Subject: [PATCH 2136/2522] If an input-output pair is configured to be must-alias(off by default), they must be aliased at runtime. PiperOrigin-RevId: 324941010 Change-Id: I80995e25ce367ca17e5f884da55874252265f487 --- .../utils/compile_mlir_util_test.cc | 2 +- tensorflow/compiler/xla/client/xla_builder.cc | 2 +- tensorflow/compiler/xla/client/xla_builder.h | 17 ++---- .../xla/service/cpu/cpu_executable.cc | 6 -- .../xla/service/gpu/gpu_executable.cc | 6 -- tensorflow/compiler/xla/service/hlo.proto | 14 +---- .../service/hlo_input_output_alias_config.cc | 38 +++--------- .../service/hlo_input_output_alias_config.h | 32 +++------- tensorflow/compiler/xla/service/hlo_parser.cc | 59 ++++++++----------- .../compiler/xla/service/hlo_parser_test.cc | 41 +++++++++++-- .../xla/tests/buffer_donation_test.cc | 49 ++------------- .../tpu/tpu_executable_interface.cc | 18 ------ 12 files changed, 90 insertions(+), 194 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc index 8a07aab11e1..6ebf6897bb1 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc @@ -524,7 +524,7 @@ TEST(CompileGraphToXlaHlo, Resources) { ASSERT_TRUE(status_or_hlo_module.ok()); constexpr char expected_hlo_module_string[] = - R"(HloModule main.4, input_output_alias={ {0}: (1, {}, may_alias) } + R"(HloModule main.4, input_output_alias={ {0}: 1 } ENTRY %main.4 (Arg_0.1: f32[2], Arg_1.2: f32[2]) -> (f32[2]) { %Arg_1.2 = f32[2]{0} parameter(1) diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index 484fb0aabe7..52f61408cbb 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -446,7 +446,7 @@ StatusOr XlaBuilder::Build(int64 root_id, alias.param_index.ToString().c_str()); } TF_RETURN_IF_ERROR(config.SetUpAlias(alias.output_index, alias.param_number, - alias.param_index, alias.kind)); + alias.param_index)); } *module->mutable_input_output_alias() = config.ToProto(); return Status::OK(); diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h index aa5074d28d9..1960d0c4632 100644 --- a/tensorflow/compiler/xla/client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_builder.h @@ -32,7 +32,6 @@ limitations under the License. #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/dynamic_parameter_binding.h" #include "tensorflow/compiler/xla/service/hlo.pb.h" -#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" @@ -350,16 +349,12 @@ class XlaBuilder { // not available until the computation is built, and eventual error in the // arguments of this API will be detected only at computation Build() time. // - // Note: Except when 'must-alias' is true, alias is assumed to be 'may-alias' - // and only donated buffer at runtime will be aliased with output. If a buffer - // is not donated at runtime, a copy will be inserted by XLA to prevent buffer - // clobbering. + // Note: Aliasing API is 'may-alias' and only donated buffer at runtime will + // be aliased with output. If a buffer is not donated at runtime, a copy will + // be inserted by XLA to prevent buffer clobbering. void SetUpAlias(const ShapeIndex& output_index, int64 param_number, - const ShapeIndex& param_index, - HloInputOutputAliasConfig::AliasKind kind = - HloInputOutputAliasConfig::AliasKind::kMayAlias) { - input_output_aliases_.push_back( - {output_index, param_number, param_index, kind}); + const ShapeIndex& param_index) { + input_output_aliases_.push_back({output_index, param_number, param_index}); } // Describes an input/output alias as inserted by the SetUpAlias() API. @@ -370,8 +365,6 @@ class XlaBuilder { int64 param_number; // Specifies the index of the aliased buffer in the parameter ShapeIndex param_index; - // Specifies if the alias is a must alias or may alias. - HloInputOutputAliasConfig::AliasKind kind; }; // Looks up the HloInstruction and sets the frontend attribute "attribute" to diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc index 7431e829b8e..0abcc91a1d7 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc @@ -247,12 +247,6 @@ StatusOr CpuExecutable::CreateResultShapedBuffer( ExecutionInput& input = arguments[alias->parameter_number]; MaybeOwningDeviceMemory* maybe_owning_memory = input.MutableBuffer(alias->parameter_index); - if (alias->must_alias() && !maybe_owning_memory->HasOwnership()) { - return InvalidArgument( - "An input was configured to be must-alias at " - "compile time but not donated at runtime: %s", - alias->ToString()); - } if (absl::optional owning = maybe_owning_memory->Release()) { // If the caller passes the ownership of the device memory, reuse it diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc index 726f1963545..469f2919fba 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc @@ -480,12 +480,6 @@ StatusOr GpuExecutable::ExecuteAsyncOnStream( ExecutionInput& input = arguments[alias->parameter_number]; MaybeOwningDeviceMemory* maybe_owning_memory = input.MutableBuffer(alias->parameter_index); - if (alias->must_alias() && !maybe_owning_memory->HasOwnership()) { - return InvalidArgument( - "An input was configured to be must-alias at " - "compile time but not donated at runtime: %s", - alias->ToString()); - } if (absl::optional owning = maybe_owning_memory->Release()) { // If the caller passes the ownership of the device memory, reuse it diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index e043216c17e..960f60fe882 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -283,16 +283,6 @@ message HloScheduleProto { map sequences = 1; } -enum Kind { - // Define a UNDEFINED_ALIAS equal to zero to get around the default-0 proto3 - // behavior and missing has_*() APIs. - UNDEFINED_ALIAS = 0; - // The buffers may or may not alias at runtime. - MAY_ALIAS = 1; - // The buffers must alias at runtime. - MUST_ALIAS = 2; -} - message HloInputOutputAliasProto { // The following proto describes a pair of aliased an input // (described by parameter number and a ShapeIndex of the parameter) @@ -314,8 +304,8 @@ message HloInputOutputAliasProto { int64 parameter_number = 2; // ShapeIndex of the parameter instruction. repeated int64 parameter_shape_index = 3; - // The kind of alias to be setup. - Kind kind = 4; + reserved 4; + reserved "kind"; } repeated AliasEntryProto entries = 1; diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc index 34bc30d641f..e123161720b 100644 --- a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc +++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc @@ -15,7 +15,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" -#include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/compiler/xla/service/hlo_module.h" namespace xla { @@ -25,10 +24,9 @@ bool HloInputOutputAliasConfig::OutputHasAlias( return alias_.element(output_index).has_value(); } -Status HloInputOutputAliasConfig::SetUpAlias( - const ShapeIndex& output_index, int64 param_number, - const ShapeIndex& param_index, - HloInputOutputAliasConfig::AliasKind must_alias) { +Status HloInputOutputAliasConfig::SetUpAlias(const ShapeIndex& output_index, + int64 param_number, + const ShapeIndex& param_index) { TF_RET_CHECK(ShapeUtil::IndexIsValid(alias_.shape(), output_index)) << "Trying to set up alias at " << output_index.ToString() << " which is an invalid index for shape " @@ -43,8 +41,7 @@ Status HloInputOutputAliasConfig::SetUpAlias( param_number, param_index.ToString(), output_index.ToString(), alias_.element(output_index)->parameter_number, alias_.element(output_index)->parameter_index.ToString()); - (*alias_.mutable_element(output_index)) = - Alias(param_number, param_index, must_alias); + (*alias_.mutable_element(output_index)) = Alias(param_number, param_index); VLOG(4) << "Set up alias between output index " << output_index.ToString() << " and parameter " << param_index << " at index " << param_index.ToString(); @@ -64,11 +61,6 @@ HloInputOutputAliasProto HloInputOutputAliasConfig::ToProto() const { for (int64 i : data->parameter_index) { entry.add_parameter_shape_index(i); } - if (data->must_alias()) { - entry.set_kind(Kind::MUST_ALIAS); - } else { - entry.set_kind(Kind::MAY_ALIAS); - } result.add_entries()->Swap(&entry); } }); @@ -85,9 +77,8 @@ StatusOr HloInputOutputAliasConfig::CreateFromProto( int64 param_number = entry.parameter_number(); ShapeIndex param_index(entry.parameter_shape_index().begin(), entry.parameter_shape_index().end()); - AliasKind kind = entry.kind() == Kind::MAY_ALIAS ? kMayAlias : kMustAlias; TF_RETURN_IF_ERROR( - result.SetUpAlias(output_index, param_number, param_index, kind)); + result.SetUpAlias(output_index, param_number, param_index)); } return result; } @@ -102,9 +93,9 @@ string HloInputOutputAliasConfig::ToString() const { ForEachAlias([&](const ShapeIndex& output_index, const Alias& alias) { pieces.push_back(absl::StrFormat( - " OutputIndex %s is %saliased with parameter %lld at %s:", - output_index.ToString(), alias.kind == kMustAlias ? "must-" : "may-", - alias.parameter_number, alias.parameter_index.ToString())); + " OutputIndex %s is aliased with parameter %lld at %s:", + output_index.ToString(), alias.parameter_number, + alias.parameter_index.ToString())); }); return absl::StrJoin(pieces, "\n"); } @@ -121,19 +112,6 @@ string HloInputOutputAliasConfig::ToShortString() const { return absl::StrJoin(pieces, ", "); } -bool HloInputOutputAliasConfig::ParameterMustAlias( - int64 param_number, const ShapeIndex& param_index) const { - bool result = false; - alias_.ForEachElement( - [&](const xla::ShapeIndex&, absl::optional alias) { - if (alias && alias->parameter_number == param_number && - alias->parameter_index == param_index && alias->must_alias()) { - result = true; - } - }); - return result; -} - absl::optional HloInputOutputAliasConfig::GetAliasedOutput( int64 param_number, const ShapeIndex& param_index) const { absl::optional output; diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h index 6b84bdb6a68..d5ca28e9387 100644 --- a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h +++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h @@ -32,32 +32,22 @@ class HloModule; // parameter index in the entry computation. class HloInputOutputAliasConfig { public: - // The kind of aliases which can be set. A kMayAlias is one setup at - // compilation time by the user, and has to be respected. A kMustAlias one - // might be setup by the compiler, if it decides it is convenient to do so. - enum AliasKind { - kMayAlias, - kMustAlias, - }; // Defines the alias information for a given output buffer. A given output // buffer shape index can refer only to one parameter+index. struct Alias { - Alias(int64 parameter_number, ShapeIndex parameter_index, - AliasKind kind = kMayAlias) + Alias(int64 parameter_number, ShapeIndex parameter_index) : parameter_number(parameter_number), - parameter_index(std::move(parameter_index)), - kind(kind) {} + parameter_index(std::move(parameter_index)) {} int64 parameter_number; ShapeIndex parameter_index; - AliasKind kind; - - bool must_alias() const { return kind == kMustAlias; } std::string ToString() { - return absl::StrFormat("(%lld, %s, %s)", parameter_number, - parameter_index.ToString(), - kind == kMustAlias ? "must_alias" : "may_alias"); + if (parameter_index.empty()) { + return absl::StrCat(parameter_number); + } + return absl::StrFormat("(%lld, %s)", parameter_number, + parameter_index.ToString()); } }; @@ -71,8 +61,7 @@ class HloInputOutputAliasConfig { // Sets up alias config from `output_index` to `param_index` at // `param_number`. Status SetUpAlias(const ShapeIndex& output_index, int64 param_number, - const ShapeIndex& param_index, - AliasKind must_alias = kMayAlias); + const ShapeIndex& param_index); // Returns true if the given parameter is aliased with one of the output // buffers. @@ -103,11 +92,6 @@ class HloInputOutputAliasConfig { absl::optional GetAliasedParameter( const ShapeIndex& output_index) const; - // Returns if the parameter at the given parameter number and parameter - // index must-alias with an output. - bool ParameterMustAlias(int64 param_number, - const ShapeIndex& param_index) const; - using AliasFn = std::function; diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index 31afe2a3673..0530062c43b 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -552,37 +552,31 @@ bool HloParserImpl::ParseAliasing(AliasingData* data) { return false; } - if (!ParseToken(TokKind::kLparen, errmsg)) { - return false; - } - int64 param_num; - ParseInt64(¶m_num); - if (!ParseToken(TokKind::kComma, errmsg)) { - return false; - } - ShapeIndex param_idx; - if (!ParseShapeIndex(¶m_idx)) { - return false; - } - - HloInputOutputAliasConfig::AliasKind alias_kind = - HloInputOutputAliasConfig::kMayAlias; - if (EatIfPresent(TokKind::kComma)) { - std::string type; - ParseName(&type); - if (type == "must-alias") { - alias_kind = HloInputOutputAliasConfig::kMustAlias; - } else if (type == "may-alias") { - alias_kind = HloInputOutputAliasConfig::kMayAlias; - } else { - return TokenError("Unexpected aliasing kind; expected SYSTEM or USER"); + if (lexer_.GetKind() != TokKind::kLparen) { + // Short form: "{0}: 0", output index "{}" is assumed. + int64 param_num; + ParseInt64(¶m_num); + data->emplace(std::piecewise_construct, std::forward_as_tuple(out), + std::forward_as_tuple(param_num, ShapeIndex{})); + } else { + // Long form: "{0}: (0, {0})", output index is explicitly specified. + if (!ParseToken(TokKind::kLparen, errmsg)) { + return false; + } + int64 param_num; + ParseInt64(¶m_num); + if (!ParseToken(TokKind::kComma, errmsg)) { + return false; + } + ShapeIndex param_idx; + if (!ParseShapeIndex(¶m_idx)) { + return false; + } + data->emplace(std::piecewise_construct, std::forward_as_tuple(out), + std::forward_as_tuple(param_num, param_idx)); + if (!ParseToken(TokKind::kRparen, errmsg)) { + return false; } - } - - data->emplace(std::piecewise_construct, std::forward_as_tuple(out), - std::forward_as_tuple(param_num, param_idx, alias_kind)); - if (!ParseToken(TokKind::kRparen, errmsg)) { - return false; } if (!EatIfPresent(TokKind::kComma)) { @@ -630,9 +624,8 @@ bool HloParserImpl::ParseHloModule(HloModule* module) { if (aliasing_data) { HloInputOutputAliasConfig alias_config(module->result_shape()); for (auto& p : *aliasing_data) { - Status st = - alias_config.SetUpAlias(p.first, p.second.parameter_number, - p.second.parameter_index, p.second.kind); + Status st = alias_config.SetUpAlias(p.first, p.second.parameter_number, + p.second.parameter_index); if (!st.ok()) { return TokenError(st.error_message()); } diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc index 86b6b1bedd9..484578e5e0e 100644 --- a/tensorflow/compiler/xla/service/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc @@ -2399,7 +2399,7 @@ ENTRY c2 { TEST_F(HloParserTest, SimpleAliasing) { const string original = R"( -HloModule Module, input_output_alias={ {0}: (0, {0}, must-alias), {1}: (0, {1}) } +HloModule Module, input_output_alias={ {0}: (0, {0}), {1}: (0, {1}) } ENTRY entry { %p = (f32[], f32[]) parameter(0) @@ -2413,13 +2413,42 @@ ENTRY entry { std::unique_ptr parsed_module = module.ConsumeValueOrDie(); EXPECT_EQ(parsed_module->input_output_alias_config().GetAliasedOutput(0, {0}), ShapeIndex{0}); - - EXPECT_TRUE( - parsed_module->input_output_alias_config().ParameterMustAlias(0, {0})); EXPECT_EQ(parsed_module->input_output_alias_config().GetAliasedOutput(0, {1}), ShapeIndex{1}); - EXPECT_FALSE( - parsed_module->input_output_alias_config().ParameterMustAlias(0, {1})); +} + +TEST_F(HloParserTest, SimpleAliasingShortForm) { + const string original = R"( +HloModule Module, input_output_alias={ {0}: 0, {1}: 1 } + +ENTRY entry { + %p0 = f32[] parameter(0) + %p1 = f32[] parameter(1) + ROOT %out = (f32[], f32[]) tuple(%p0, %p1) +} + )"; + auto module = ParseAndReturnVerifiedModule(original); + TF_ASSERT_OK(module.status()); + std::unique_ptr parsed_module = module.ConsumeValueOrDie(); + EXPECT_EQ(parsed_module->input_output_alias_config().GetAliasedOutput(0, {}), + ShapeIndex{0}); + EXPECT_EQ(parsed_module->input_output_alias_config().GetAliasedOutput(1, {}), + ShapeIndex{1}); +} + +TEST_F(HloParserTest, SimpleAliasingShortFormError) { + const string original = R"( +HloModule Module, input_output_alias={ {0}: A, {1}: 1 } + +ENTRY entry { + %p0 = f32[] parameter(0) + %p1 = f32[] parameter(1) + ROOT %out = (f32[], f32[]) tuple(%p0, %p1) +} + )"; + ExpectHasSubstr( + ParseAndReturnUnverifiedModule(original).status().error_message(), + "expects integer"); } TEST_F(HloParserTest, NestedAliasing) { diff --git a/tensorflow/compiler/xla/tests/buffer_donation_test.cc b/tensorflow/compiler/xla/tests/buffer_donation_test.cc index f78083fe2af..856ea7c9b44 100644 --- a/tensorflow/compiler/xla/tests/buffer_donation_test.cc +++ b/tensorflow/compiler/xla/tests/buffer_donation_test.cc @@ -61,7 +61,7 @@ class BufferDonationTest : public HloTestBase { absl::Span argument_literals, absl::Span donate_arguments, absl::Span expected_runtime_aliasing, - const Literal& expected, std::string expected_failure = "") { + const Literal& expected) { // Create a copy of the output shape because the HLO module is std::moved // into the compiler and may be deallocated. const Shape output_shape = hlo_module->result_shape(); @@ -123,19 +123,10 @@ class BufferDonationTest : public HloTestBase { ExecutionInput(std::move(owned_buffers), argument_literal.shape())); } - StatusOr output_status = + TF_ASSERT_OK_AND_ASSIGN( + ExecutionOutput output, executable->ExecuteAsyncOnStream(&service_run_options, std::move(args), - /*hlo_execution_profile=*/nullptr); - if (!expected_failure.empty()) { - ASSERT_FALSE(output_status.ok()); - ASSERT_TRUE(absl::StrContains(output_status.status().error_message(), - expected_failure)) - << "got: \n" - << output_status.status().error_message() << " \nvs want\n" - << expected_failure; - return; - } - ExecutionOutput output = output_status.ConsumeValueOrDie(); + /*hlo_execution_profile=*/nullptr)); se::DeviceMemoryBase result_root_buffer = output.Result().root_buffer(); LOG(INFO) << "result allocation = " << result_root_buffer.opaque() @@ -312,37 +303,5 @@ ENTRY entry { #endif } -TEST_F(BufferDonationTest, TestMustAliasNotDonated) { - HloModuleConfig config; - - StatusOr> module = - ParseAndReturnVerifiedModule(R"( -HloModule module - -ENTRY entry { - a = f32[] parameter(0) - b = f32[] parameter(1) - ROOT out = (f32[], f32[]) tuple(a, b) -} - )", - config); - - TF_ASSERT_OK(module->get()->input_output_alias_config().SetUpAlias( - {0}, 0, {}, HloInputOutputAliasConfig::kMustAlias)); - - std::vector args; - args.push_back(LiteralUtil::CreateR0(0.1)); - args.push_back(LiteralUtil::CreateR0(0.2)); - Literal expected = LiteralUtil::MakeTupleFromSlices( - {LiteralUtil::CreateR0(0.1), LiteralUtil::CreateR0(0.2)}); - -#ifndef XLA_TEST_BACKEND_INTERPRETER - RunAndCheck(std::move(*module), args, - /*donate_arguments=*/{false, false}, {true, false}, expected, - "An input was configured to be must-alias at " - "compile time but not donated at runtime:"); -#endif -} - } // namespace } // namespace xla diff --git a/tensorflow/stream_executor/tpu/tpu_executable_interface.cc b/tensorflow/stream_executor/tpu/tpu_executable_interface.cc index f260cc1631f..13f9db98e5d 100644 --- a/tensorflow/stream_executor/tpu/tpu_executable_interface.cc +++ b/tensorflow/stream_executor/tpu/tpu_executable_interface.cc @@ -62,24 +62,6 @@ TpuExecutableInterface::AllocateOutputMemoryWithInputReuse( << " host_shape = " << ShapeUtil::HumanStringWithLayout(host_shape); Shape device_shape = HostShapeToDeviceShape(host_shape); - TF_RETURN_IF_ERROR(alias_config.ForEachAliasWithStatus( - [&](const ShapeIndex& output_index, - absl::optional alias) { - if (alias && alias->must_alias()) { - VLOG(1) << alias->ToString(); - const MaybeOwningDeviceMemory& original_input = - (*arguments)[alias->parameter_number].Buffers().element( - alias->parameter_index); - if (!original_input.HasOwnership()) { - return InvalidArgument( - "An input was configured to be must-alias at " - "compile time but not donated at runtime: %s", - alias->ToString()); - } - } - return Status::OK(); - })); - if (VLOG_IS_ON(3)) { VLOG(3) << "AllocateOutputMemoryWithInputReuse, device = " << device_ordinal << " host_shape = " << ShapeUtil::HumanStringWithLayout(host_shape); From 8491e4ec502c7f4af64a6a3602552e7efb12c633 Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Tue, 4 Aug 2020 19:36:00 -0700 Subject: [PATCH 2137/2522] Disable python/keras/distribute:collective_all_reduce_strategy_test on msan as it flakily fails. PiperOrigin-RevId: 324941016 Change-Id: Ic842c9b4f62f79f4cd2814718ff959b0e0b43594 --- tensorflow/python/keras/distribute/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/keras/distribute/BUILD b/tensorflow/python/keras/distribute/BUILD index 5a5cff01e33..56a6a9d0e1f 100644 --- a/tensorflow/python/keras/distribute/BUILD +++ b/tensorflow/python/keras/distribute/BUILD @@ -163,6 +163,7 @@ cuda_py_test( python_version = "PY3", tags = [ "multi_and_single_gpu", + "nomsan", # TODO(b/162894966) ], # b/155301154 broken with XLA:GPU xla_enable_strict_auto_jit = True, From 70c23b653fc258f5481474b920962a69b662de72 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 4 Aug 2020 19:41:45 -0700 Subject: [PATCH 2138/2522] Enable clipnorm and clipvalue arguments in Optimizer with tf.distribute.Strategy. Apply gradient clipping after aggregation. CentralStorageStrategy is still not supported with these arguments. PiperOrigin-RevId: 324941564 Change-Id: If224a86efabbb28ffe589f14d3b4c15787ce735b --- RELEASE.md | 6 - .../distribute/distribute_strategy_test.py | 31 ----- tensorflow/python/keras/engine/training.py | 1 + .../python/keras/engine/training_eager.py | 1 + .../experimental/loss_scale_optimizer.py | 4 +- tensorflow/python/keras/optimizer_v2/BUILD | 1 - .../python/keras/optimizer_v2/optimizer_v2.py | 110 +++++++++--------- tensorflow/python/keras/optimizer_v2/utils.py | 38 ------ ...n.experimental.-loss-scale-optimizer.pbtxt | 8 -- ...ensorflow.keras.optimizers.-adadelta.pbtxt | 8 -- ...tensorflow.keras.optimizers.-adagrad.pbtxt | 8 -- .../tensorflow.keras.optimizers.-adam.pbtxt | 8 -- .../tensorflow.keras.optimizers.-adamax.pbtxt | 8 -- .../tensorflow.keras.optimizers.-ftrl.pbtxt | 8 -- .../tensorflow.keras.optimizers.-nadam.pbtxt | 8 -- ...nsorflow.keras.optimizers.-optimizer.pbtxt | 8 -- ...nsorflow.keras.optimizers.-r-m-sprop.pbtxt | 8 -- .../tensorflow.keras.optimizers.-s-g-d.pbtxt | 8 -- ...n.experimental.-loss-scale-optimizer.pbtxt | 8 -- ...ensorflow.keras.optimizers.-adadelta.pbtxt | 8 -- ...tensorflow.keras.optimizers.-adagrad.pbtxt | 8 -- .../tensorflow.keras.optimizers.-adam.pbtxt | 8 -- .../tensorflow.keras.optimizers.-adamax.pbtxt | 8 -- .../tensorflow.keras.optimizers.-ftrl.pbtxt | 8 -- .../tensorflow.keras.optimizers.-nadam.pbtxt | 8 -- ...nsorflow.keras.optimizers.-optimizer.pbtxt | 8 -- ...nsorflow.keras.optimizers.-r-m-sprop.pbtxt | 8 -- .../tensorflow.keras.optimizers.-s-g-d.pbtxt | 8 -- .../v2/tensorflow.optimizers.-adadelta.pbtxt | 8 -- .../v2/tensorflow.optimizers.-adagrad.pbtxt | 8 -- .../v2/tensorflow.optimizers.-adam.pbtxt | 8 -- .../v2/tensorflow.optimizers.-adamax.pbtxt | 8 -- .../v2/tensorflow.optimizers.-ftrl.pbtxt | 8 -- .../v2/tensorflow.optimizers.-nadam.pbtxt | 8 -- .../v2/tensorflow.optimizers.-optimizer.pbtxt | 8 -- .../v2/tensorflow.optimizers.-r-m-sprop.pbtxt | 8 -- .../v2/tensorflow.optimizers.-s-g-d.pbtxt | 8 -- 37 files changed, 58 insertions(+), 366 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index d7a345c7c76..b0c785c7d68 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -33,10 +33,6 @@ shape assumptions (note that you can pass shapes with `None` entries for axes that are meant to be dynamic). You can also disable the input checking entirely by setting `model.input_spec = None`. -* `tf.keras.optimizers.Optimizer.get_gradients` no longer performs gradient - clipping. Instead, gradient clipping is performed in - `tf.keras.optimizers.Optimizer.apply_gradients`, after the gradients on each - device have been aggregated. ## Known Caveats @@ -99,8 +95,6 @@ * Error messages when Functional API construction goes wrong (and when ops cannot be converted to Keras layers automatically) should be clearer and easier to understand. * `Optimizer.minimize` can now accept a loss `Tensor` and a `GradientTape` as an alternative to accepting a `callable` loss. - * `Optimizer` arguments `clipnorm` and `clipvalue` are now supported with - `tf.distribute.Strategy` (`CentralStorageStrategy` is not yet supported). * `tf.function` / AutoGraph: * Added `experimental_follow_type_hints` argument for `tf.function`. When True, the function may use type annotations to optimize the tracing diff --git a/tensorflow/python/keras/distribute/distribute_strategy_test.py b/tensorflow/python/keras/distribute/distribute_strategy_test.py index abcb5d1c0e8..4b6d3a80730 100644 --- a/tensorflow/python/keras/distribute/distribute_strategy_test.py +++ b/tensorflow/python/keras/distribute/distribute_strategy_test.py @@ -22,7 +22,6 @@ import numpy as np from tensorflow.python import keras from tensorflow.python.data.experimental.ops import cardinality from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.distribute import central_storage_strategy from tensorflow.python.distribute import combinations from tensorflow.python.distribute import distribution_strategy_context from tensorflow.python.distribute import mirrored_strategy @@ -1864,36 +1863,6 @@ class TestDistributionStrategyWithKerasModels(test.TestCase, self.assertEqual(bc.predict_begin_batches, [0]) self.assertEqual(bc.predict_end_batches, [24]) - @combinations.generate( - combinations.combine(distribution=all_strategies, mode=['eager'])) - def test_gradient_clipping(self, distribution): - - class MyLayer(keras.layers.Layer): - - def build(self, _): - self.v1 = variables.Variable(1.) - self.v2 = variables.Variable(1.) - - def call(self, x): - return 3 * self.v1 - 3 * self.v2 - - x, y = np.ones((10, 1)), np.ones((10, 1)) - - with distribution.scope(): - layer = MyLayer() - model = keras.Sequential([layer]) - optimizer = gradient_descent_keras.SGD(1., clipnorm=2., clipvalue=2.) - model.compile(optimizer, 'mae') - - if isinstance(distribution, - central_storage_strategy.CentralStorageStrategy): - with self.assertRaisesRegex(ValueError, 'not supported'): - model.fit(x, y, batch_size=10, epochs=1) - else: - model.fit(x, y, batch_size=10, epochs=1) - self.assertAllClose(self.evaluate(layer.v1), 3.) - self.assertAllClose(self.evaluate(layer.v2), -1.) - @combinations.generate( combinations.times( all_strategy_combinations_minus_default())) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index a1fb329feab..bf542129e5c 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -2744,6 +2744,7 @@ def _minimize(strategy, tape, optimizer, loss, trainable_variables): trainable_variables)) if isinstance(optimizer, lso.LossScaleOptimizer): gradients = optimizer.get_unscaled_gradients(gradients) + gradients = optimizer._clip_gradients(gradients) # pylint: disable=protected-access if trainable_variables: if aggregate_grads_outside_optimizer: optimizer.apply_gradients( diff --git a/tensorflow/python/keras/engine/training_eager.py b/tensorflow/python/keras/engine/training_eager.py index b3ce3d13ed7..8064bf2a7ab 100644 --- a/tensorflow/python/keras/engine/training_eager.py +++ b/tensorflow/python/keras/engine/training_eager.py @@ -273,6 +273,7 @@ def _process_single_batch(model, if isinstance(model.optimizer, loss_scale_optimizer.LossScaleOptimizer): grads = model.optimizer.get_unscaled_gradients(grads) + grads = model.optimizer._clip_gradients(grads) model.optimizer.apply_gradients(zip(grads, trainable_weights)) else: logging.warning('The list of trainable weights is empty. Make sure that' diff --git a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py index 59a49b03ad5..4a3f459de80 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py +++ b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py @@ -258,8 +258,8 @@ class LossScaleOptimizer(_DelegatingTrackableMixin, optimizer_v2.OptimizerV2): 'clipvalue %s' % (optimizer, optimizer.clipvalue)) self._raise_if_strategy_unsupported() - self._clipnorm = None - self._clipvalue = None + self.clipnorm = None + self.clipvalue = None self._optimizer = optimizer self._loss_scale = keras_loss_scale_module.get(loss_scale) diff --git a/tensorflow/python/keras/optimizer_v2/BUILD b/tensorflow/python/keras/optimizer_v2/BUILD index 9a317e5d114..b519ec7fb3d 100644 --- a/tensorflow/python/keras/optimizer_v2/BUILD +++ b/tensorflow/python/keras/optimizer_v2/BUILD @@ -40,7 +40,6 @@ py_library( "//tensorflow/python:state_ops", "//tensorflow/python:variable_scope", "//tensorflow/python:variables", - "//tensorflow/python/distribute:central_storage_strategy", "//tensorflow/python/distribute:distribute_lib", "//tensorflow/python/distribute:parameter_server_strategy", "//tensorflow/python/distribute:reduce_util", diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py index 0ecca63a64f..18d94594542 100644 --- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py +++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py @@ -41,6 +41,7 @@ from tensorflow.python.keras.optimizer_v2 import utils as optimizer_utils from tensorflow.python.keras.utils import generic_utils from tensorflow.python.keras.utils import tf_utils from tensorflow.python.ops import array_ops +from tensorflow.python.ops import clip_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_resource_variable_ops from tensorflow.python.ops import gradients @@ -331,6 +332,15 @@ class OptimizerV2(trackable.Trackable): raise ValueError("decay cannot be less than 0: {}".format(decay)) self._initial_decay = decay + # Set the gradient clipping properties + self.clipnorm = kwargs.pop("clipnorm", None) + self.clipvalue = kwargs.pop("clipvalue", None) + if ((self.clipnorm is not None or self.clipvalue is not None) + and distribute_ctx.has_strategy()): + raise ValueError("Gradient clipping in the optimizer " + "(by setting clipnorm or clipvalue) is currently " + "unsupported when using a distribution strategy.") + self._hypers_created = False # Store the distribution strategy object if the optimizer is created inside @@ -340,33 +350,6 @@ class OptimizerV2(trackable.Trackable): else: self._distribution_strategy = None - # Set the gradient clipping properties - self._clipnorm = kwargs.pop("clipnorm", None) - self._clipvalue = kwargs.pop("clipvalue", None) - - # Configure gradient transforms. - self._transform_gradients_fns = [] - - if self._clipnorm is not None: - self._transform_gradients_fns.append( - optimizer_utils.make_gradient_clipnorm_fn(self._clipnorm)) - if self._clipvalue is not None: - self._transform_gradients_fns.append( - optimizer_utils.make_gradient_clipvalue_fn(self._clipvalue)) - - @property - def clipnorm(self): - """`float` or `None`. If set, clips gradients to this maximum norm.""" - return self._clipnorm - - @property - def clipvalue(self): - """`float` or `None`. - - If set, clips gradients to this maximum absolute value. - """ - return self._clipvalue - def minimize(self, loss, var_list, grad_loss=None, name=None, tape=None): """Minimize `loss` by updating `var_list`. @@ -402,6 +385,26 @@ class OptimizerV2(trackable.Trackable): loss, var_list=var_list, grad_loss=grad_loss, tape=tape) return self.apply_gradients(grads_and_vars, name=name) + def _clip_gradients(self, grads): + """Clip gradients according to the clipnorm and clipvalue attributes.""" + if self.clipnorm is not None: + if distribute_ctx.has_strategy(): + raise ValueError("Gradient clipping in the optimizer " + "(by setting clipnorm or clipvalue) is currently " + "unsupported when using a distribution strategy.") + grads = [None if g is None else clip_ops.clip_by_norm(g, self.clipnorm) + for g in grads] + if self.clipvalue is not None: + if distribute_ctx.has_strategy(): + raise ValueError("Gradient clipping in the optimizer " + "(by setting clipnorm or clipvalue) is currently " + "unsupported when using a distribution strategy.") + v = self.clipvalue + grads = [ + None if g is None else clip_ops.clip_by_value(g, -v, v) for g in grads + ] + return grads + def _compute_gradients(self, loss, var_list, grad_loss=None, tape=None): """Compute gradients of `loss` for the variables in `var_list`. @@ -451,6 +454,8 @@ class OptimizerV2(trackable.Trackable): var_list = nest.flatten(var_list) with ops.name_scope_v2(self._name + "/gradients"): grads = tape.gradient(loss, var_list, grad_loss) + # TODO(omalleyt): Move to post-aggregation. + grads = self._clip_gradients(grads) grads_and_vars = list(zip(grads, var_list)) self._assert_valid_dtypes([ @@ -460,12 +465,6 @@ class OptimizerV2(trackable.Trackable): return grads_and_vars - def _transform_gradients(self, grads_and_vars): - """Transformations to apply aggregated gradients.""" - for fn in self._transform_gradients_fns: - grads_and_vars = fn(grads_and_vars) - return grads_and_vars - def get_gradients(self, loss, params): """Returns gradients of `loss` with respect to `params`. @@ -484,15 +483,14 @@ class OptimizerV2(trackable.Trackable): with backend.get_graph().as_default(), backend.name_scope(self._name + "/gradients"): grads = gradients.gradients(loss, params) - grads_and_vars = list(zip(grads, params)) - for grad, param in grads_and_vars: + for grad, param in zip(grads, params): if grad is None: raise ValueError("Variable {} has `None` for gradient. " "Please make sure that all of your ops have a " "gradient defined (i.e. are differentiable). " "Common ops without gradient: " "K.argmax, K.round, K.eval.".format(param)) - grads = [g for g, _ in grads_and_vars] + grads = self._clip_gradients(grads) return grads def apply_gradients(self, @@ -536,23 +534,10 @@ class OptimizerV2(trackable.Trackable): ValueError: If none of the variables have gradients. RuntimeError: If called in a cross-replica context. """ - if distribute_ctx.in_cross_replica_context(): - raise RuntimeError( - "`apply_gradients() cannot be called in cross-replica context. " - "Use `tf.distribute.Strategy.run` to enter replica " - "context.") - - strategy = distribute_ctx.get_strategy() - if (not experimental_aggregate_gradients and strategy and - isinstance(strategy.extended, - parameter_server_strategy.ParameterServerStrategyExtended)): - raise NotImplementedError( - "`experimental_aggregate_gradients=False is not supported for " - "ParameterServerStrategy and CentralStorageStrategy") - grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars) - var_list = [v for _, v in grads_and_vars] - with ops.name_scope_v2(self._name): + var_list = [v for (_, v) in grads_and_vars] + + with backend.name_scope(self._name): # Create iteration if necessary. with ops.init_scope(): self._create_all_weights(var_list) @@ -562,12 +547,25 @@ class OptimizerV2(trackable.Trackable): # gradients return control_flow_ops.no_op() - if experimental_aggregate_gradients: - reduced_grads = self._aggregate_gradients(grads_and_vars) - grads_and_vars = list(zip(reduced_grads, var_list)) - grads_and_vars = self._transform_gradients(grads_and_vars) + if distribute_ctx.in_cross_replica_context(): + raise RuntimeError( + "`apply_gradients() cannot be called in cross-replica context. " + "Use `tf.distribute.Strategy.run` to enter replica " + "context.") + + strategy = distribute_ctx.get_strategy() + if (not experimental_aggregate_gradients and strategy and isinstance( + strategy.extended, + parameter_server_strategy.ParameterServerStrategyExtended)): + raise NotImplementedError( + "`experimental_aggregate_gradients=False is not supported for " + "ParameterServerStrategy and CentralStorageStrategy") apply_state = self._prepare(var_list) + if experimental_aggregate_gradients: + reduced_grads = self._aggregate_gradients(grads_and_vars) + var_list = [v for _, v in grads_and_vars] + grads_and_vars = list(zip(reduced_grads, var_list)) return distribute_ctx.get_replica_context().merge_call( functools.partial(self._distributed_apply, apply_state=apply_state), args=(grads_and_vars,), diff --git a/tensorflow/python/keras/optimizer_v2/utils.py b/tensorflow/python/keras/optimizer_v2/utils.py index f723c6d8b64..9f680e04dd6 100644 --- a/tensorflow/python/keras/optimizer_v2/utils.py +++ b/tensorflow/python/keras/optimizer_v2/utils.py @@ -18,10 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.distribute import central_storage_strategy from tensorflow.python.distribute import distribution_strategy_context as distribute_ctx from tensorflow.python.distribute import reduce_util as ds_reduce_util -from tensorflow.python.ops import clip_ops from tensorflow.python.platform import tf_logging as logging @@ -59,42 +57,6 @@ def all_reduce_sum_gradients(grads_and_vars): return reduced_with_nones -def make_gradient_clipnorm_fn(clipnorm): - """Creates a gradient transformation function for clipping by norm.""" - - def gradient_clipnorm_fn(grads_and_vars): - - if isinstance(distribute_ctx.get_strategy(), - central_storage_strategy.CentralStorageStrategy): - raise ValueError( - "`clipnorm` is not supported with `CenteralStorageStrategy`") - - clipped_grads_and_vars = [ - (clip_ops.clip_by_norm(g, clipnorm), v) for g, v in grads_and_vars - ] - return clipped_grads_and_vars - - return gradient_clipnorm_fn - - -def make_gradient_clipvalue_fn(clipvalue): - """Creates a gradient transformation function for clipping by value.""" - - def gradient_clipvalue_fn(grads_and_vars): - - if isinstance(distribute_ctx.get_strategy(), - central_storage_strategy.CentralStorageStrategy): - raise ValueError( - "`clipvalue` is not supported with `CenteralStorageStrategy`") - - clipped_grads_and_vars = [(clip_ops.clip_by_value(g, -clipvalue, - clipvalue), v) - for g, v in grads_and_vars] - return clipped_grads_and_vars - - return gradient_clipvalue_fn - - def filter_empty_gradients(grads_and_vars): """Filter out `(grad, var)` pairs that have a gradient equal to `None`.""" grads_and_vars = tuple(grads_and_vars) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt index 58f8cf24495..dbab3abae8e 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt @@ -5,14 +5,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt index fb341cb24dd..af854e98013 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt index d8039ed21ef..e89cc5cef75 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt index 912f92f83a6..15414d7234f 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt index 3abc6d39b3f..8b3c429e6b5 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-ftrl.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-ftrl.pbtxt index 00880d3f73b..51ab675db74 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-ftrl.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-ftrl.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-nadam.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-nadam.pbtxt index 2ce311d3504..342c0951bbe 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-nadam.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-nadam.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt index 2020de9fa5c..f007b4b971a 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt @@ -3,14 +3,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt index 80a1449613c..d5bf6fa7f47 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt index 8acfe214256..df904f72511 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt index 58f8cf24495..dbab3abae8e 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.mixed_precision.experimental.-loss-scale-optimizer.pbtxt @@ -5,14 +5,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt index fb341cb24dd..af854e98013 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt index d8039ed21ef..e89cc5cef75 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt index 912f92f83a6..15414d7234f 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt index 3abc6d39b3f..8b3c429e6b5 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-ftrl.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-ftrl.pbtxt index 00880d3f73b..51ab675db74 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-ftrl.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-ftrl.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-nadam.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-nadam.pbtxt index 2ce311d3504..342c0951bbe 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-nadam.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-nadam.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt index 2020de9fa5c..f007b4b971a 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt @@ -3,14 +3,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt index 80a1449613c..d5bf6fa7f47 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt index 8acfe214256..df904f72511 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adadelta.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adadelta.pbtxt index 06212bdc95d..cb3d38246a7 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adadelta.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adadelta.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adagrad.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adagrad.pbtxt index 09fff0514d8..c7b2bca4b6b 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adagrad.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adagrad.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adam.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adam.pbtxt index 195ba9e4f56..209c9fe6620 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adam.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adam.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adamax.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adamax.pbtxt index 9859da430bd..12bbb14fb71 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adamax.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-adamax.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-ftrl.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-ftrl.pbtxt index a4ed911e39d..1482ed54eb9 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-ftrl.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-ftrl.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-nadam.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-nadam.pbtxt index 128f223fdc7..2a422fa2340 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-nadam.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-nadam.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-optimizer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-optimizer.pbtxt index 5ea1ed521ef..e7021e02772 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-optimizer.pbtxt @@ -3,14 +3,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-r-m-sprop.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-r-m-sprop.pbtxt index db89ecbabe7..6543f4023a4 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-r-m-sprop.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-r-m-sprop.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-s-g-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-s-g-d.pbtxt index 0cb0205e65e..94ff8dfcdfc 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-s-g-d.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.optimizers.-s-g-d.pbtxt @@ -4,14 +4,6 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - member { - name: "clipnorm" - mtype: "" - } - member { - name: "clipvalue" - mtype: "" - } member { name: "iterations" mtype: "" From c0b2748f6bc35dcbdc775e77d00f7da4fab75cde Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Tue, 4 Aug 2020 19:51:27 -0700 Subject: [PATCH 2139/2522] Split GPU Compatibility Lib into two. PiperOrigin-RevId: 324942428 Change-Id: I8d5ac8aca0f2ec8d889822f9d4c2ed8c340a3ae0 --- .../delegates/gpu/java/src/main/native/BUILD | 2 +- .../java/src/main/native/gpu_delegate_jni.cc | 6 +- .../acceleration/compatibility/BUILD | 38 +------ .../compatibility/gpu_compatibility.cc | 9 +- .../compatibility/gpu_compatibility.h | 38 ++++--- .../gpu_compatibility_recommender.cc | 30 ------ .../gpu_compatibility_recommender.h | 64 ----------- .../gpu_compatibility_recommender_test.cc | 100 ------------------ .../compatibility/gpu_compatibility_test.cc | 61 ----------- 9 files changed, 38 insertions(+), 310 deletions(-) delete mode 100644 tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.cc delete mode 100644 tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.h delete mode 100644 tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender_test.cc delete mode 100644 tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_test.cc diff --git a/tensorflow/lite/delegates/gpu/java/src/main/native/BUILD b/tensorflow/lite/delegates/gpu/java/src/main/native/BUILD index 7b340e20562..00b56bb0c06 100644 --- a/tensorflow/lite/delegates/gpu/java/src/main/native/BUILD +++ b/tensorflow/lite/delegates/gpu/java/src/main/native/BUILD @@ -30,7 +30,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/gl:egl_environment", "//tensorflow/lite/delegates/gpu/gl:request_gpu_info", "//tensorflow/lite/experimental/acceleration/compatibility:android_info", - "//tensorflow/lite/experimental/acceleration/compatibility:gpu_compatibility_recommender", + "//tensorflow/lite/experimental/acceleration/compatibility:gpu_compatibility", "//tensorflow/lite/java/jni", "@com_google_absl//absl/status", ], diff --git a/tensorflow/lite/delegates/gpu/java/src/main/native/gpu_delegate_jni.cc b/tensorflow/lite/delegates/gpu/java/src/main/native/gpu_delegate_jni.cc index c4571100818..d31d058b796 100644 --- a/tensorflow/lite/delegates/gpu/java/src/main/native/gpu_delegate_jni.cc +++ b/tensorflow/lite/delegates/gpu/java/src/main/native/gpu_delegate_jni.cc @@ -21,7 +21,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/gl/egl_environment.h" #include "tensorflow/lite/delegates/gpu/gl/request_gpu_info.h" #include "tensorflow/lite/experimental/acceleration/compatibility/android_info.h" -#include "tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.h" +#include "tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.h" #ifdef __cplusplus extern "C" { @@ -74,13 +74,13 @@ class CompatibilityListHelper { } bool IsDelegateSupportedOnThisDevice() { - return compatibility_recommender_.Includes(android_info_, gpu_info_); + return compatibility_list_.Includes(android_info_, gpu_info_); } private: tflite::acceleration::AndroidInfo android_info_; tflite::gpu::GpuInfo gpu_info_; - tflite::acceleration::GPUCompatibilityRecommender compatibility_recommender_; + tflite::acceleration::GPUCompatibilityList compatibility_list_; }; } // namespace diff --git a/tensorflow/lite/experimental/acceleration/compatibility/BUILD b/tensorflow/lite/experimental/acceleration/compatibility/BUILD index 6adb6daaa6f..78a9d2eb8d8 100644 --- a/tensorflow/lite/experimental/acceleration/compatibility/BUILD +++ b/tensorflow/lite/experimental/acceleration/compatibility/BUILD @@ -152,6 +152,7 @@ cc_library( ":android_info", ":database_fbs", ":devicedb", + "//tensorflow/lite/delegates/gpu:delegate", "//tensorflow/lite/delegates/gpu/common:gpu_info", "@com_google_absl//absl/status", "@com_google_absl//absl/strings", @@ -159,41 +160,4 @@ cc_library( ], ) -cc_test( - name = "gpu_compatibility_test", - srcs = ["gpu_compatibility_test.cc"], - deps = [ - ":gpu_compatibility", - "@com_google_googletest//:gtest", - "@com_google_googletest//:gtest_main", - ], -) - -cc_library( - name = "gpu_compatibility_recommender", - srcs = [ - "gpu_compatibility_recommender.cc", - ], - hdrs = [ - "gpu_compatibility_recommender.h", - ], - deps = [ - ":android_info", - ":gpu_compatibility", - "//tensorflow/lite/delegates/gpu:delegate", - "//tensorflow/lite/delegates/gpu/common:gpu_info", - ], -) - -cc_test( - name = "gpu_compatibility_recommender_test", - srcs = ["gpu_compatibility_recommender_test.cc"], - tags = ["notap"], # Needs to be built with --copt=-DCL_DELEGATE_NO_GL - deps = [ - ":gpu_compatibility_recommender", - "@com_google_googletest//:gtest", - "@com_google_googletest//:gtest_main", - ], -) - tflite_portable_test_suite() diff --git a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.cc b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.cc index 1911d26b8df..e04f5d18db4 100644 --- a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.cc +++ b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.cc @@ -89,8 +89,13 @@ bool GPUCompatibilityList::Includes( return variables[gpu::kStatus] == std::string(gpu::kStatusSupported); } -bool GPUCompatibilityList::IsDatabaseLoaded() const { - return database_ != nullptr; +TfLiteGpuDelegateOptionsV2 GPUCompatibilityList::GetBestOptionsFor( + const AndroidInfo& /* android_info */, + const ::tflite::gpu::GpuInfo& /* gpu_info */) const { + // This method is for forwards-compatibility: the list may later include + // information about which backend to choose (OpenGL/OpenCL/Vulkan) or other + // options. + return TfLiteGpuDelegateOptionsV2Default(); } } // namespace acceleration diff --git a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.h b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.h index 873151dca66..f975fe04f22 100644 --- a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.h +++ b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.h @@ -19,6 +19,7 @@ limitations under the License. #include #include "tensorflow/lite/delegates/gpu/common/gpu_info.h" +#include "tensorflow/lite/delegates/gpu/delegate.h" #include "tensorflow/lite/experimental/acceleration/compatibility/android_info.h" #include "tensorflow/lite/experimental/acceleration/compatibility/devicedb.h" @@ -31,41 +32,54 @@ namespace acceleration { // Android version, OpenGL ES version, GPU chipset etc. The support is based on // measure stability, correctness and peformance. For more detail see README.md. // -// Reads from the flatbuffer. // Example usage: -// tflite::acceleration::GPUCompatibilityList list; +// tflite::Interpreter* interpreter = ... ; // tflite::acceleration::AndroidInfo android_info; // tflite::gpu::GpuInfo gpu_info; -// ... -// if(list.Includes(android_info, gpu_info)){ -// // SUPPORTED. -// } else{ -// // UNSUPPORTED. +// EXPECT_OK(tflite::acceleration::RequestAndroidInfo(&android_info)); +// EXPECT_OK(tflite::gpu::gl::EglEnvironment::NewEglEnvironment(&env)); +// EXPECT_OK(tflite::gpu::gl::RequestGpuInfo(&tflite_gpu_info)); +// tflite::acceleration::GPUCompatibilityList list; +// TfLiteDelegate* gpu_delegate = nullptr; +// TfLiteGpuDelegateOptions gpu_options; +// if (list.Includes(android_info, gpu_info)) { +// gpu_options = list.BestOptionsFor(android_info, gpu_info); +// gpu_delegate = TfLiteGpuDelegateCreate(&gpu_options); +// EXPECT_EQ(interpreter->ModifyGraphWithDelegate(gpu_delegate), TfLiteOk); +// } else { +// // Fallback path. // } class GPUCompatibilityList { public: // Construct list from bundled data. GPUCompatibilityList(); - // Constructs list from the given flatbuffer. - explicit GPUCompatibilityList( - const unsigned char* compatibility_list_flatbuffer); // Returns true if the provided device specs are supported by the database. bool Includes(const AndroidInfo& android_info, const ::tflite::gpu::GpuInfo& gpu_info) const; + + // Returns the best TfLiteGpuDelegateOptionsV2 for the provided device specs + // based on the database. The output can be modified as desired before passing + // to delegate creation. + TfLiteGpuDelegateOptionsV2 GetBestOptionsFor( + const AndroidInfo& android_info, + const ::tflite::gpu::GpuInfo& gpu_info) const; + // Convert android_info and gpu_info into a set of variables used for querying // the list, and update variables from list data. See variables.h // and devicedb.h for more information. std::map CalculateVariables( const AndroidInfo& android_info, const ::tflite::gpu::GpuInfo& gpu_info) const; + GPUCompatibilityList(const GPUCompatibilityList&) = delete; GPUCompatibilityList& operator=(const GPUCompatibilityList&) = delete; - // Indicates if the database is loaded. - bool IsDatabaseLoaded() const; protected: + explicit GPUCompatibilityList( + const unsigned char* compatibility_list_flatbuffer); const DeviceDatabase* database_; }; + } // namespace acceleration } // namespace tflite diff --git a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.cc b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.cc deleted file mode 100644 index 1b625913323..00000000000 --- a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.cc +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.h" - -namespace tflite { -namespace acceleration { - -TfLiteGpuDelegateOptionsV2 GPUCompatibilityRecommender::GetBestOptionsFor( - const AndroidInfo& /* android_info */, - const ::tflite::gpu::GpuInfo& /* gpu_info */) const { - // This method is for forwards-compatibility: the list may later include - // information about which backend to choose (OpenGL/OpenCL/Vulkan) or other - // options. - return TfLiteGpuDelegateOptionsV2Default(); -} - -} // namespace acceleration -} // namespace tflite diff --git a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.h b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.h deleted file mode 100644 index 4443cfdf70f..00000000000 --- a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.h +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_EXPERIMENTAL_ACCELERATION_COMPATIBILITY_GPU_COMPATIBILITY_RECOMMENDER_H_ -#define TENSORFLOW_LITE_EXPERIMENTAL_ACCELERATION_COMPATIBILITY_GPU_COMPATIBILITY_RECOMMENDER_H_ - -#include "tensorflow/lite/delegates/gpu/common/gpu_info.h" -#include "tensorflow/lite/delegates/gpu/delegate.h" -#include "tensorflow/lite/experimental/acceleration/compatibility/android_info.h" -#include "tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.h" - -namespace tflite { -namespace acceleration { - -// This class recommends best TfLiteGPU delegate options for Android devices. -// -// Example usage: -// tflite::Interpreter* interpreter = ... ; -// tflite::acceleration::AndroidInfo android_info; -// tflite::gpu::GpuInfo gpu_info; -// CHECK(tflite::acceleration::RequestAndroidInfo(&android_info)); -// CHECK(tflite::gpu::gl::EglEnvironment::NewEglEnvironment(&env)); -// CHECK(tflite::gpu::gl::RequestGpuInfo(&tflite_gpu_info)); -// tflite::acceleration::GPUCompatibilityRecommender recommender; -// TfLiteDelegate* gpu_delegate = nullptr; -// TfLiteGpuDelegateOptions gpu_options; -// if (list.Includes(android_info, gpu_info)) { -// gpu_options = recommender.BestOptionsFor(android_info, gpu_info); -// gpu_delegate = TfLiteGpuDelegateCreate(&gpu_options); -// CHECK_EQ(interpreter->ModifyGraphWithDelegate(gpu_delegate), TfLiteOk); -// } else { -// // Fallback path. -// } - -class GPUCompatibilityRecommender : public GPUCompatibilityList { - public: - GPUCompatibilityRecommender() {} - GPUCompatibilityRecommender(const GPUCompatibilityRecommender&) = delete; - GPUCompatibilityRecommender& operator=(const GPUCompatibilityRecommender&) = - delete; - - // Returns the best TfLiteGpuDelegateOptionsV2 for the provided device specs - // based on the database. The output can be modified as desired before passing - // to delegate creation. - TfLiteGpuDelegateOptionsV2 GetBestOptionsFor( - const AndroidInfo& android_info, - const ::tflite::gpu::GpuInfo& gpu_info) const; -}; - -} // namespace acceleration -} // namespace tflite - -#endif // TENSORFLOW_LITE_EXPERIMENTAL_ACCELERATION_COMPATIBILITY_GPU_COMPATIBILITY_RECOMMENDER_H_ diff --git a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender_test.cc b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender_test.cc deleted file mode 100644 index ebf793d5a94..00000000000 --- a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender_test.cc +++ /dev/null @@ -1,100 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_recommender.h" - -#include -#include - -namespace { - -class GPUCompatibilityRecommenderTest : public ::testing::Test { - protected: - GPUCompatibilityRecommenderTest() { - recommender_ = - absl::make_unique(); - } - - std::unique_ptr - recommender_; -}; - -TEST_F(GPUCompatibilityRecommenderTest, Load) { - EXPECT_TRUE(recommender_->IsDatabaseLoaded()); -} - -TEST_F(GPUCompatibilityRecommenderTest, ReturnsSupportedForFullMatch) { - tflite::acceleration::AndroidInfo android_info = { - .android_sdk_version = "28", - .model = "redmi_note_7G960F", - .device = "lavender", - .manufacturer = "xiaomi"}; - tflite::gpu::GpuInfo tflite_gpu_info = { - .renderer_name = "adreno_(tm)_512", - .major_version = 3, - .minor_version = 2, - }; - EXPECT_TRUE(recommender_->Includes(android_info, tflite_gpu_info)); -} - -TEST_F(GPUCompatibilityRecommenderTest, ReturnsUnsupported) { - tflite::acceleration::AndroidInfo android_info = {.android_sdk_version = "28", - .model = "sm_g960f", - .device = "starlte", - .manufacturer = "samsung"}; - tflite::gpu::GpuInfo tflite_gpu_info = { - .renderer_name = "mali_g72", - .major_version = 3, - .minor_version = 2, - }; - - EXPECT_FALSE(recommender_->Includes(android_info, tflite_gpu_info)); -} - -TEST_F(GPUCompatibilityRecommenderTest, MissingInfoReturnsUnsupported) { - tflite::acceleration::AndroidInfo android_info = {.android_sdk_version = "23", - .model = "sm_g532f", - .device = "grandpplte", - .manufacturer = "samsung"}; - tflite::gpu::GpuInfo tflite_gpu_info = { - .renderer_name = "mali_t720", - .major_version = 3, - .minor_version = 1, - }; - EXPECT_FALSE(recommender_->Includes(android_info, tflite_gpu_info)); -} - -TEST_F(GPUCompatibilityRecommenderTest, ReturnsDefaultOptions) { - tflite::acceleration::AndroidInfo android_info; - tflite::gpu::GpuInfo tflite_gpu_info; - auto default_options = TfLiteGpuDelegateOptionsV2Default(); - auto best_options = - recommender_->GetBestOptionsFor(android_info, tflite_gpu_info); - EXPECT_EQ(best_options.is_precision_loss_allowed, - default_options.is_precision_loss_allowed); - EXPECT_EQ(best_options.inference_preference, - default_options.inference_preference); - EXPECT_EQ(best_options.inference_priority1, - default_options.inference_priority1); - EXPECT_EQ(best_options.inference_priority2, - default_options.inference_priority2); - EXPECT_EQ(best_options.inference_priority3, - default_options.inference_priority3); - EXPECT_EQ(best_options.experimental_flags, - default_options.experimental_flags); - EXPECT_EQ(best_options.max_delegated_partitions, - default_options.max_delegated_partitions); -} - -} // namespace diff --git a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_test.cc b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_test.cc deleted file mode 100644 index d300867a8b0..00000000000 --- a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_test.cc +++ /dev/null @@ -1,61 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.h" - -#include - -#include -#include - -namespace { - -class GPUCompatibilityTest : public ::testing::Test { - protected: - GPUCompatibilityTest() { - list_ = absl::make_unique(); - } - - std::unique_ptr list_; -}; - -TEST_F(GPUCompatibilityTest, Load) { EXPECT_TRUE(list_->IsDatabaseLoaded()); } - -TEST_F(GPUCompatibilityTest, ReturnsSupportedForFullMatch) { - tflite::acceleration::AndroidInfo android_info = {.android_sdk_version = "27", - .model = "cph1803", - .device = "cph1803", - .manufacturer = "Oppo"}; - tflite::gpu::GpuInfo tflite_gpu_info = { - .renderer_name = "Adreno (TM) 506", - .major_version = 3, - .minor_version = 2, - }; - EXPECT_TRUE(list_->Includes(android_info, tflite_gpu_info)); -} - -TEST_F(GPUCompatibilityTest, ReturnsUnsupportedForFullMatch) { - tflite::acceleration::AndroidInfo android_info = {.android_sdk_version = "28", - .model = "SM-G960F", - .device = "starlte", - .manufacturer = "Samsung"}; - tflite::gpu::GpuInfo tflite_gpu_info = { - .renderer_name = "Mali-G72", - .major_version = 3, - .minor_version = 2, - }; - EXPECT_FALSE(list_->Includes(android_info, tflite_gpu_info)); -} - -} // namespace From d38f00677fff3e7060789de8fea5a07018cca7fe Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Tue, 4 Aug 2020 21:32:32 -0700 Subject: [PATCH 2140/2522] Add `is_control_flow_graph` property to `CondBranchFuncGraph`. PiperOrigin-RevId: 324952128 Change-Id: Ie75edbb09c366f20c913868834af3626bf403249 --- tensorflow/python/framework/func_graph.py | 1 + .../python/kernel_tests/cond_v2_test.py | 24 +++++++++++++++++++ .../python/kernel_tests/while_v2_test.py | 20 ++++++++++++++++ .../python/ops/control_flow_v2_func_graphs.py | 3 +++ 4 files changed, 48 insertions(+) diff --git a/tensorflow/python/framework/func_graph.py b/tensorflow/python/framework/func_graph.py index 55508c4803b..dbe0d57759b 100644 --- a/tensorflow/python/framework/func_graph.py +++ b/tensorflow/python/framework/func_graph.py @@ -192,6 +192,7 @@ class FuncGraph(ops.Graph): self.structured_outputs = None self._weak_variables = [] self._watched_variables = object_identity.ObjectIdentityWeakSet() + self.is_control_flow_graph = False outer_graph = ops.get_default_graph() self._weak_outer_graph = weakref.ref(outer_graph) diff --git a/tensorflow/python/kernel_tests/cond_v2_test.py b/tensorflow/python/kernel_tests/cond_v2_test.py index c64b608a253..b8829181747 100644 --- a/tensorflow/python/kernel_tests/cond_v2_test.py +++ b/tensorflow/python/kernel_tests/cond_v2_test.py @@ -1237,6 +1237,30 @@ class CondV2Test(test.TestCase): self.assertEqual(len(if_op.outputs), 1) # pylint: enable=g-deprecated-assert + def testIsControlFlowGraph(self): + x = constant_op.constant(1.0, name="x") + + @def_function.function + def f(c): + + def then_branch(): + i = x + 1 + self.assertTrue(i.graph.is_control_flow_graph) + return i + + def else_branch(): + i = x + 1 + self.assertTrue(i.graph.is_control_flow_graph) + return i + + return cond_v2.cond_v2(c, then_branch, else_branch) + + i = f(constant_op.constant(True)) + self.assertEqual(self.evaluate(i), 2.0) + + i = f(constant_op.constant(False)) + self.assertEqual(self.evaluate(i), 2.0) + class CondV2CollectionTest(test.TestCase): diff --git a/tensorflow/python/kernel_tests/while_v2_test.py b/tensorflow/python/kernel_tests/while_v2_test.py index e829edb0dfc..de2e8e3cc8d 100644 --- a/tensorflow/python/kernel_tests/while_v2_test.py +++ b/tensorflow/python/kernel_tests/while_v2_test.py @@ -1241,6 +1241,26 @@ class WhileV2Test(test.TestCase, parameterized.TestCase): config.experimental.executor_type = "SINGLE_THREADED_EXECUTOR" self._runBasicWithConfig(config) + def testIsControlFlowGraph(self): + x = constant_op.constant(0) + + @def_function.function + def F(c): + + def Cond(i): + self.assertTrue(i.graph.is_control_flow_graph) + return i < 2 + + def Body(i): + i = i + 1 + self.assertTrue(i.graph.is_control_flow_graph) + return i + + return while_loop_v2(Cond, Body, [c]) + + ret, = F(x) + self.assertEqual(2, self.evaluate(ret)) + def testImportFromSerializedWithFunctionInBody(self): serialized = """node { name: "Const" diff --git a/tensorflow/python/ops/control_flow_v2_func_graphs.py b/tensorflow/python/ops/control_flow_v2_func_graphs.py index 97e04f8d73d..23edd712797 100644 --- a/tensorflow/python/ops/control_flow_v2_func_graphs.py +++ b/tensorflow/python/ops/control_flow_v2_func_graphs.py @@ -30,6 +30,7 @@ class CondBranchFuncGraph(func_graph.FuncGraph): def __init__(self, *args, **kwargs): super(CondBranchFuncGraph, self).__init__(*args, **kwargs) + self.is_control_flow_graph = True if ops.executing_eagerly_outside_functions(): func_graph.override_func_graph_name_scope( self, self.outer_graph.get_name_scope()) @@ -43,6 +44,7 @@ class WhileCondFuncGraph(func_graph.FuncGraph): def __init__(self, *args, **kwargs): super(WhileCondFuncGraph, self).__init__(*args, **kwargs) + self.is_control_flow_graph = True if ops.executing_eagerly_outside_functions(): func_graph.override_func_graph_name_scope( self, self.outer_graph.get_name_scope()) @@ -56,6 +58,7 @@ class WhileBodyFuncGraph(func_graph.FuncGraph): def __init__(self, *args, **kwargs): super(WhileBodyFuncGraph, self).__init__(*args, **kwargs) + self.is_control_flow_graph = True if ops.executing_eagerly_outside_functions(): func_graph.override_func_graph_name_scope( self, self.outer_graph.get_name_scope()) From 54312c0e8fd3c2f2d2f3ee4812c8b0078fb24a8c Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Wed, 5 Aug 2020 05:06:49 +0000 Subject: [PATCH 2141/2522] fix build --- tensorflow/core/kernels/BUILD | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 47c7d41d0fe..dd51d6fcc26 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2946,7 +2946,9 @@ cc_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", + "//tensorflow/core/framework:tensor", "//tensorflow/core/framework:tensor_shape_proto_cc", + "//tensorflow/core/framework:variant", "//tensorflow/core/lib/core:refcount", ], ) From 267aebacb555bf862bf1cb503f3af289cccd1d27 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 4 Aug 2020 22:30:46 -0700 Subject: [PATCH 2142/2522] Integrate LLVM at llvm/llvm-project@28e322ea9393 Updates LLVM usage to match [28e322ea9393](https://github.com/llvm/llvm-project/commit/28e322ea9393) PiperOrigin-RevId: 324957510 Change-Id: I9199f451363a62e89a4237cfd6ff44e90ea9768c --- .../mlir/hlo/tests/lhlo-legalize-to-llvm.mlir | 8 +-- .../gpu/tests/execute_memzero_thunk.mlir | 6 +- .../service/mlir_gpu/tests/add_as_kernel.hlo | 56 +++++++++---------- tensorflow/workspace.bzl | 4 +- third_party/mlir/BUILD | 8 +++ third_party/mlir/test.BUILD | 27 +++++++++ 6 files changed, 72 insertions(+), 37 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/tests/lhlo-legalize-to-llvm.mlir b/tensorflow/compiler/mlir/hlo/tests/lhlo-legalize-to-llvm.mlir index a25a508b2d3..5bb1d475b24 100644 --- a/tensorflow/compiler/mlir/hlo/tests/lhlo-legalize-to-llvm.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/lhlo-legalize-to-llvm.mlir @@ -11,11 +11,11 @@ func @static_memref_cast(%buf : memref<10x1x5xf32>) { // CHECK: %[[MEMREF_BLDR_0:.*]] = llvm.mlir.undef : [[DESCRIPTOR_TYPE_2D:!.*]] // CHECK: %[[IN_PTR:.*]] = llvm.extractvalue %[[INPUT_MEMREF:.*]][0] : [[DESCRIPTOR_TYPE_3D]] -// CHECK: %[[PTR:.*]] = llvm.bitcast %[[IN_PTR]] : !llvm<"float*"> to !llvm<"float*"> +// CHECK: %[[PTR:.*]] = llvm.bitcast %[[IN_PTR]] : !llvm.ptr to !llvm.ptr // CHECK: %[[MEMREF_BLDR_1:.*]] = llvm.insertvalue %[[PTR]], %[[MEMREF_BLDR_0]][0] : [[DESCRIPTOR_TYPE_2D]] // CHECK: %[[IN_ALIGNED_PTR:.*]] = llvm.extractvalue %[[INPUT_MEMREF]][1] : [[DESCRIPTOR_TYPE_3D]] -// CHECK: %[[ALIGNED_PTR:.*]] = llvm.bitcast %[[IN_ALIGNED_PTR]] : !llvm<"float*"> to !llvm<"float*"> +// CHECK: %[[ALIGNED_PTR:.*]] = llvm.bitcast %[[IN_ALIGNED_PTR]] : !llvm.ptr to !llvm.ptr // CHECK: %[[MEMREF_BLDR_2:.*]] = llvm.insertvalue %[[ALIGNED_PTR]], %[[MEMREF_BLDR_1]][1] : [[DESCRIPTOR_TYPE_2D]] // CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : index) : !llvm.i64 @@ -50,11 +50,11 @@ func @dynamic_memref_cast(%buf : memref) { // CHECK: %[[MEMREF_BLDR_0:.*]] = llvm.mlir.undef : [[DESCRIPTOR_TYPE:!.*]] // CHECK: %[[IN_PTR:.*]] = llvm.extractvalue %[[INPUT_MEMREF:.*]][0] : [[DESCRIPTOR_TYPE]] -// CHECK: %[[PTR:.*]] = llvm.bitcast %[[IN_PTR]] : !llvm<"float*"> to !llvm<"float*"> +// CHECK: %[[PTR:.*]] = llvm.bitcast %[[IN_PTR]] : !llvm.ptr to !llvm.ptr // CHECK: %[[MEMREF_BLDR_1:.*]] = llvm.insertvalue %[[PTR]], %[[MEMREF_BLDR_0]][0] : [[DESCRIPTOR_TYPE]] // CHECK: %[[IN_ALIGNED_PTR:.*]] = llvm.extractvalue %[[INPUT_MEMREF]][1] : [[DESCRIPTOR_TYPE]] -// CHECK: %[[ALIGNED_PTR:.*]] = llvm.bitcast %[[IN_ALIGNED_PTR]] : !llvm<"float*"> to !llvm<"float*"> +// CHECK: %[[ALIGNED_PTR:.*]] = llvm.bitcast %[[IN_ALIGNED_PTR]] : !llvm.ptr to !llvm.ptr // CHECK: %[[MEMREF_BLDR_2:.*]] = llvm.insertvalue %[[ALIGNED_PTR]], %[[MEMREF_BLDR_1]][1] : [[DESCRIPTOR_TYPE]] // CHECK: %[[SRC_OFFSET:.*]] = llvm.extractvalue %[[INPUT_MEMREF]][2] : [[DESCRIPTOR_TYPE]] diff --git a/tensorflow/compiler/xla/service/gpu/tests/execute_memzero_thunk.mlir b/tensorflow/compiler/xla/service/gpu/tests/execute_memzero_thunk.mlir index 0a891833cd3..82f3f06db5c 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/execute_memzero_thunk.mlir +++ b/tensorflow/compiler/xla/service/gpu/tests/execute_memzero_thunk.mlir @@ -1,15 +1,15 @@ // RUN: xla-thunks-opt %s | FileCheck --color --dump-input=fail %s -func @main( %execute_params: !llvm<"i8*"> ) { +func @main( %execute_params: !llvm.ptr ) { // CHECK: "xla_thunks.execute_memzero_thunk" // CHECK-SAME: {allocation_index = 0 : i64, offset = 128 : i64, size = 1024 : i64} - // CHECK-SAME: (!llvm<"i8*">) -> (i1, !llvm<"i8*">) + // CHECK-SAME: (!llvm.ptr) -> (i1, !llvm.ptr) %ok, %error_message = "xla_thunks.execute_memzero_thunk"( %execute_params ) { allocation_slice = { allocation_index = 0 , offset = 128 , size = 1024 } } - : (!llvm<"i8*">) -> (i1, !llvm<"i8*">) + : (!llvm.ptr) -> (i1, !llvm.ptr) return } diff --git a/tensorflow/compiler/xla/service/mlir_gpu/tests/add_as_kernel.hlo b/tensorflow/compiler/xla/service/mlir_gpu/tests/add_as_kernel.hlo index 953eb2022f8..8d7930ea8c0 100644 --- a/tensorflow/compiler/xla/service/mlir_gpu/tests/add_as_kernel.hlo +++ b/tensorflow/compiler/xla/service/mlir_gpu/tests/add_as_kernel.hlo @@ -7,24 +7,24 @@ ENTRY %Add (x: f32[2,2], y: f32[2,2]) -> f32[2,2] { ROOT %add = f32[2,2]{1,0} add(f32[2,2]{1,0} %x, f32[2,2]{1,0} %y) } -// CHECK: func @add_kernel(%[[ARG0:.*]]: [[TYPE:!llvm<.*]], %[[ARG1:.*]]: [[TYPE]], %[[ARG2:.*]]: [[TYPE]] +// CHECK: func @add_kernel(%[[ARG0:.*]]: [[TYPE:!llvm\..*]], %[[ARG1:.*]]: [[TYPE]], %[[ARG2:.*]]: [[TYPE]] // // Check that relevant sizes and strides are emitted. // -// CHECK: %[[CAST0:.*]] = llvm.bitcast %[[ARG0:.*]] : !llvm<"i8*"> to !llvm<"float*"> +// CHECK: %[[CAST0:.*]] = llvm.bitcast %[[ARG0:.*]] : !llvm.ptr to !llvm.ptr // CHECK: %[[SIZE00:.*]] = llvm.mlir.constant(2 : i64) : !llvm.i64 // CHECK: %[[SIZE01:.*]] = llvm.mlir.constant(2 : i64) : !llvm.i64 // CHECK: %[[STRIDE01:.*]] = llvm.mlir.constant(1 : i64) : !llvm.i64 // CHECK: %[[STRIDE00:.*]] = llvm.mlir.constant(2 : i64) : !llvm.i64 -// CHECK: %[[CAST1:.*]] = llvm.bitcast %[[ARG1:.*]] : !llvm<"i8*"> to !llvm<"float*"> +// CHECK: %[[CAST1:.*]] = llvm.bitcast %[[ARG1:.*]] : !llvm.ptr to !llvm.ptr // CHECK: %[[SIZE10:.*]] = llvm.mlir.constant(2 : i64) : !llvm.i64 // CHECK: %[[SIZE11:.*]] = llvm.mlir.constant(2 : i64) : !llvm.i64 // CHECK: %[[STRIDE11:.*]] = llvm.mlir.constant(1 : i64) : !llvm.i64 // CHECK: %[[STRIDE10:.*]] = llvm.mlir.constant(2 : i64) : !llvm.i64 -// CHECK: %[[CAST2:.*]] = llvm.bitcast %[[ARG2:.*]] : !llvm<"i8*"> to !llvm<"float*"> +// CHECK: %[[CAST2:.*]] = llvm.bitcast %[[ARG2:.*]] : !llvm.ptr to !llvm.ptr // CHECK: %[[SIZE20:.*]] = llvm.mlir.constant(2 : i64) : !llvm.i64 // CHECK: %[[SIZE21:.*]] = llvm.mlir.constant(2 : i64) : !llvm.i64 // CHECK: %[[STRIDE21:.*]] = llvm.mlir.constant(1 : i64) : !llvm.i64 @@ -34,30 +34,30 @@ ENTRY %Add (x: f32[2,2], y: f32[2,2]) -> f32[2,2] { // Check that the emitted sizes and strides, as well the pointers to HLO buffers, // are inserted into the memref descriptors. // -// CHECK: %[[DESC0:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %[[DESC01:.*]] = llvm.insertvalue %[[CAST0]], %[[DESC0]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %[[DESC02:.*]] = llvm.insertvalue %[[CAST0]], %[[DESC01]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %[[DESC03:.*]] = llvm.insertvalue %{{.*}}, %[[DESC02]][2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %[[DESC04:.*]] = llvm.insertvalue %[[SIZE00]], %[[DESC03]][3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %[[DESC05:.*]] = llvm.insertvalue %[[STRIDE00]], %[[DESC04]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %[[DESC06:.*]] = llvm.insertvalue %[[SIZE01]], %[[DESC05]][3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %{{.*}} = llvm.insertvalue %[[STRIDE01]], %[[DESC06]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: %[[DESC0:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[DESC01:.*]] = llvm.insertvalue %[[CAST0]], %[[DESC0]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[DESC02:.*]] = llvm.insertvalue %[[CAST0]], %[[DESC01]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[DESC03:.*]] = llvm.insertvalue %{{.*}}, %[[DESC02]][2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[DESC04:.*]] = llvm.insertvalue %[[SIZE00]], %[[DESC03]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[DESC05:.*]] = llvm.insertvalue %[[STRIDE00]], %[[DESC04]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[DESC06:.*]] = llvm.insertvalue %[[SIZE01]], %[[DESC05]][3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %{{.*}} = llvm.insertvalue %[[STRIDE01]], %[[DESC06]][4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[DESC1:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %[[DESC11:.*]] = llvm.insertvalue %[[CAST1]], %[[DESC1]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %[[DESC12:.*]] = llvm.insertvalue %[[CAST1]], %[[DESC11]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %[[DESC13:.*]] = llvm.insertvalue %{{.*}}, %[[DESC12]][2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %[[DESC14:.*]] = llvm.insertvalue %[[SIZE10]], %[[DESC13]][3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %[[DESC15:.*]] = llvm.insertvalue %[[STRIDE10]], %[[DESC14]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %[[DESC16:.*]] = llvm.insertvalue %[[SIZE11]], %[[DESC15]][3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %{{.*}} = llvm.insertvalue %[[STRIDE11]], %[[DESC16]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: %[[DESC1:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[DESC11:.*]] = llvm.insertvalue %[[CAST1]], %[[DESC1]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[DESC12:.*]] = llvm.insertvalue %[[CAST1]], %[[DESC11]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[DESC13:.*]] = llvm.insertvalue %{{.*}}, %[[DESC12]][2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[DESC14:.*]] = llvm.insertvalue %[[SIZE10]], %[[DESC13]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[DESC15:.*]] = llvm.insertvalue %[[STRIDE10]], %[[DESC14]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[DESC16:.*]] = llvm.insertvalue %[[SIZE11]], %[[DESC15]][3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %{{.*}} = llvm.insertvalue %[[STRIDE11]], %[[DESC16]][4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[DESC2:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %[[DESC21:.*]] = llvm.insertvalue %[[CAST2]], %[[DESC2]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %[[DESC22:.*]] = llvm.insertvalue %[[CAST2]], %[[DESC21]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %[[DESC23:.*]] = llvm.insertvalue %{{.*}}, %[[DESC22]][2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %[[DESC24:.*]] = llvm.insertvalue %[[SIZE20]], %[[DESC23]][3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %[[DESC25:.*]] = llvm.insertvalue %[[STRIDE20]], %[[DESC24]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %[[DESC26:.*]] = llvm.insertvalue %[[SIZE21]], %[[DESC25]][3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> -// CHECK: %{{.*}} = llvm.insertvalue %[[STRIDE21]], %[[DESC26]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> +// CHECK: %[[DESC2:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[DESC21:.*]] = llvm.insertvalue %[[CAST2]], %[[DESC2]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[DESC22:.*]] = llvm.insertvalue %[[CAST2]], %[[DESC21]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[DESC23:.*]] = llvm.insertvalue %{{.*}}, %[[DESC22]][2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[DESC24:.*]] = llvm.insertvalue %[[SIZE20]], %[[DESC23]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[DESC25:.*]] = llvm.insertvalue %[[STRIDE20]], %[[DESC24]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[DESC26:.*]] = llvm.insertvalue %[[SIZE21]], %[[DESC25]][3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %{{.*}} = llvm.insertvalue %[[STRIDE21]], %[[DESC26]][4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 29cba080fa1..b770dfeead5 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -712,8 +712,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "b5059b7140232559ed123cb94d4e8f75ca9a44dc" - LLVM_SHA256 = "3075583f88b572da4afb1340281b0e170d51ef03ba6eb2965e7dc8288cbff153" + LLVM_COMMIT = "28e322ea9393e6b3841886006dd170ddd810fd9b" + LLVM_SHA256 = "438268a47b69687ea5e588a285a2255de414addc36e0405e1d70f7cb5208aa75" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD index 4f2873af3dd..3941375bc02 100644 --- a/third_party/mlir/BUILD +++ b/third_party/mlir/BUILD @@ -927,6 +927,7 @@ cc_library( ":DialectUtils", ":EDSC", ":IR", + ":LinalgOps", ":SCFDialect", ":SideEffectInterfaces", ":StandardOps", @@ -2368,6 +2369,7 @@ cc_library( ":ConversionPassIncGen", ":IR", ":LLVMDialect", + ":Parser", ":Pass", ":StandardOps", ":Support", @@ -2596,9 +2598,11 @@ cc_library( "lib/Target/LLVMIR/DebugTranslation.cpp", "lib/Target/LLVMIR/DebugTranslation.h", "lib/Target/LLVMIR/ModuleTranslation.cpp", + "lib/Target/LLVMIR/TypeTranslation.cpp", ], hdrs = [ "include/mlir/Target/LLVMIR/ModuleTranslation.h", + "include/mlir/Target/LLVMIR/TypeTranslation.h", ], includes = ["include"], deps = [ @@ -2771,6 +2775,7 @@ cc_library( "@llvm-project//mlir/test:TestAffine", "@llvm-project//mlir/test:TestDialect", "@llvm-project//mlir/test:TestIR", + "@llvm-project//mlir/test:TestLLVMIR", "@llvm-project//mlir/test:TestPass", "@llvm-project//mlir/test:TestReducer", "@llvm-project//mlir/test:TestSPIRV", @@ -2801,6 +2806,7 @@ cc_library( ":Support", ":Translation", "@llvm-project//llvm:Support", + "@llvm-project//mlir/test:TestLLVMTypeTranslation", ], ) @@ -2922,6 +2928,7 @@ cc_binary( "@llvm-project//mlir/test:TestAffine", "@llvm-project//mlir/test:TestDialect", "@llvm-project//mlir/test:TestIR", + "@llvm-project//mlir/test:TestLLVMIR", "@llvm-project//mlir/test:TestPass", "@llvm-project//mlir/test:TestReducer", "@llvm-project//mlir/test:TestSPIRV", @@ -3655,6 +3662,7 @@ cc_library( ":EDSC", ":IR", ":LLVMDialect", + ":LLVMIRModuleTranslation", ":Pass", ":StandardOps", ":StandardToLLVM", diff --git a/third_party/mlir/test.BUILD b/third_party/mlir/test.BUILD index f507842a639..6c4eeecc346 100644 --- a/third_party/mlir/test.BUILD +++ b/third_party/mlir/test.BUILD @@ -165,6 +165,20 @@ cc_library( ], ) +cc_library( + name = "TestLLVMTypeTranslation", + srcs = [ + "lib/Target/TestLLVMTypeTranslation.cpp", + ], + deps = [ + ":TestLLVMIR", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:LLVMDialect", + "@llvm-project//mlir:LLVMIRModuleTranslation", + "@llvm-project//mlir:Translation", + ], +) + cc_library( name = "TestTransforms", srcs = glob(["lib/Transforms/*.cpp"]), @@ -216,6 +230,19 @@ cc_library( ], ) +cc_library( + name = "TestLLVMIR", + srcs = [ + "lib/Dialect/LLVMIR/LLVMTypeTestDialect.cpp", + ], + deps = [ + "@llvm-project//llvm:Support", + "@llvm-project//mlir:Dialect", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:LLVMDialect", + ], +) + cc_library( name = "TestSPIRV", srcs = glob([ From 822eeba7ed45399ec4cdde8d584d5fc997d340bf Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Tue, 4 Aug 2020 22:57:02 -0700 Subject: [PATCH 2143/2522] MultiProcessRunner: When subprocesses time out, prioritize error reporting from subprocesses over the fact that it times out. PiperOrigin-RevId: 324960220 Change-Id: Ic5258466053e01eee78d6021fb1d5680676433d9 --- .../python/distribute/multi_process_runner.py | 12 ++++++++---- .../distribute/multi_process_runner_test.py | 19 ++++++++++++++++++- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/distribute/multi_process_runner.py b/tensorflow/python/distribute/multi_process_runner.py index 4ded663e588..6d0854f18d7 100644 --- a/tensorflow/python/distribute/multi_process_runner.py +++ b/tensorflow/python/distribute/multi_process_runner.py @@ -524,6 +524,12 @@ class MultiProcessRunner(object): if all(p.exitcode is not None for p in self._processes.values()): return + def _reraise_if_subprocess_error(self, process_statuses): + for process_status in process_statuses.values(): + assert isinstance(process_status, _ProcessStatusInfo) + if not process_status.is_successful: + six.reraise(*process_status.exc_info) + def join(self, timeout=_DEFAULT_TIMEOUT_SEC): """Joins all the processes with timeout. @@ -587,6 +593,7 @@ class MultiProcessRunner(object): self.terminate_all() self._watchdog_thread.join() process_statuses = self._get_process_statuses() + self._reraise_if_subprocess_error(process_statuses) raise SubprocessTimeoutError('one or more subprocesses timed out.', self._get_mpr_result(process_statuses)) @@ -594,10 +601,7 @@ class MultiProcessRunner(object): logging.info('%s-%d exit code: %s', task_type, task_id, p.exitcode) process_statuses = self._get_process_statuses() - for process_status in process_statuses.values(): - assert isinstance(process_status, _ProcessStatusInfo) - if not process_status.is_successful: - six.reraise(*process_status.exc_info) + self._reraise_if_subprocess_error(process_statuses) # Checking all the processes that are expected to exit properly. for (task_type, task_id), p in self._processes.items(): diff --git a/tensorflow/python/distribute/multi_process_runner_test.py b/tensorflow/python/distribute/multi_process_runner_test.py index 0aa214d3ca4..b7d8acf55c6 100644 --- a/tensorflow/python/distribute/multi_process_runner_test.py +++ b/tensorflow/python/distribute/multi_process_runner_test.py @@ -414,7 +414,7 @@ class MultiProcessRunnerTest(test.TestCase): multi_worker_test_base.create_cluster_spec(num_workers=1), auto_restart=True) mpr.start() - with self.assertRaises(multi_process_runner.SubprocessTimeoutError): + with self.assertRaises(ValueError): mpr.join(timeout=10) def test_auto_restart_and_chief(self): @@ -478,6 +478,23 @@ class MultiProcessRunnerTest(test.TestCase): mpr.join(timeout=20) self.assertEqual(counter.value, 2) + def test_error_reporting_overrides_timeout_reporting(self): + + def proc_func(): + if self._worker_idx() == 1: + time.sleep(10000) + raise ValueError('Worker 0 errored') + + mpr = multi_process_runner.MultiProcessRunner( + proc_func, + multi_worker_test_base.create_cluster_spec(num_workers=2)) + mpr.start() + + with self.assertRaisesRegex( + ValueError, + 'Worker 0 errored'): + mpr.join(timeout=20) + class MultiProcessPoolRunnerTest(test.TestCase): From e2cba3e0a29c63dc7c255da4bbea034438c59736 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Fri, 31 Jul 2020 14:22:53 -0700 Subject: [PATCH 2144/2522] Moving some filesystems to Transactional API --- .../filesystem/modular_filesystem.cc | 93 +++++++++-------- .../filesystem/modular_filesystem.h | 99 +++++++------------ .../common_runtime/constant_folding_test.cc | 2 +- .../kernels/immutable_constant_op_test.cc | 2 +- tensorflow/core/platform/env_test.cc | 8 +- tensorflow/core/platform/file_system.h | 8 +- tensorflow/core/platform/file_system_test.cc | 14 +-- tensorflow/core/platform/null_file_system.h | 61 +++++------- tensorflow/core/platform/ram_file_system.h | 67 +++++-------- .../core/platform/retrying_file_system.h | 88 +++++++---------- .../platform/retrying_file_system_test.cc | 73 ++++++-------- .../python/framework/test_file_system.cc | 6 +- 12 files changed, 215 insertions(+), 306 deletions(-) diff --git a/tensorflow/c/experimental/filesystem/modular_filesystem.cc b/tensorflow/c/experimental/filesystem/modular_filesystem.cc index 00a587521fd..9c8d3518800 100644 --- a/tensorflow/c/experimental/filesystem/modular_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/modular_filesystem.cc @@ -35,8 +35,8 @@ using UniquePtrTo_TF_Status = ::std::unique_ptr; Status ModularFileSystem::NewRandomAccessFile( - const std::string& fname, - std::unique_ptr* result /*, TransactionToken* token */) { + const std::string& fname, TransactionToken* token, + std::unique_ptr* result) { if (ops_->new_random_access_file == nullptr) return errors::Unimplemented(tensorflow::strings::StrCat( "Filesystem for ", fname, " does not support NewRandomAccessFile()")); @@ -55,8 +55,8 @@ Status ModularFileSystem::NewRandomAccessFile( } Status ModularFileSystem::NewWritableFile( - const std::string& fname, - std::unique_ptr* result /*, TransactionToken* token */) { + const std::string& fname, TransactionToken* token, + std::unique_ptr* result) { if (ops_->new_writable_file == nullptr) return errors::Unimplemented(tensorflow::strings::StrCat( "Filesystem for ", fname, " does not support NewWritableFile()")); @@ -75,8 +75,8 @@ Status ModularFileSystem::NewWritableFile( } Status ModularFileSystem::NewAppendableFile( - const std::string& fname, - std::unique_ptr* result /*, TransactionToken* token */) { + const std::string& fname, TransactionToken* token, + std::unique_ptr* result) { if (ops_->new_appendable_file == nullptr) return errors::Unimplemented(tensorflow::strings::StrCat( "Filesystem for ", fname, " does not support NewAppendableFile()")); @@ -95,8 +95,8 @@ Status ModularFileSystem::NewAppendableFile( } Status ModularFileSystem::NewReadOnlyMemoryRegionFromFile( - const std::string& fname, std::unique_ptr* - result /*, TransactionToken* token */) { + const std::string& fname, TransactionToken* token, + std::unique_ptr* result) { if (ops_->new_read_only_memory_region_from_file == nullptr) return errors::Unimplemented(tensorflow::strings::StrCat( "Filesystem for ", fname, @@ -116,8 +116,8 @@ Status ModularFileSystem::NewReadOnlyMemoryRegionFromFile( return StatusFromTF_Status(plugin_status.get()); } -Status ModularFileSystem::FileExists( - const std::string& fname /*, TransactionToken* token */) { +Status ModularFileSystem::FileExists(const std::string& fname, + TransactionToken* token) { if (ops_->path_exists == nullptr) return errors::Unimplemented(tensorflow::strings::StrCat( "Filesystem for ", fname, " does not support FileExists()")); @@ -129,9 +129,9 @@ Status ModularFileSystem::FileExists( return StatusFromTF_Status(plugin_status.get()); } -bool ModularFileSystem::FilesExist( - const std::vector& files, - std::vector* status /*, TransactionToken* token */) { +bool ModularFileSystem::FilesExist(const std::vector& files, + TransactionToken* token, + std::vector* status) { if (ops_->paths_exist == nullptr) return FileSystem::FilesExist(files, status); @@ -162,9 +162,9 @@ bool ModularFileSystem::FilesExist( return result; } -Status ModularFileSystem::GetChildren( - const std::string& dir, - std::vector* result /*, TransactionToken* token */) { +Status ModularFileSystem::GetChildren(const std::string& dir, + TransactionToken* token, + std::vector* result) { if (ops_->get_children == nullptr) return errors::Unimplemented(tensorflow::strings::StrCat( "Filesystem for ", dir, " does not support GetChildren()")); @@ -188,9 +188,9 @@ Status ModularFileSystem::GetChildren( return StatusFromTF_Status(plugin_status.get()); } -Status ModularFileSystem::GetMatchingPaths( - const std::string& pattern, - std::vector* result /*, TransactionToken* token */) { +Status ModularFileSystem::GetMatchingPaths(const std::string& pattern, + TransactionToken* token, + std::vector* result) { if (ops_->get_matching_paths == nullptr) return internal::GetMatchingPaths(this, Env::Default(), pattern, result); @@ -211,8 +211,8 @@ Status ModularFileSystem::GetMatchingPaths( return StatusFromTF_Status(plugin_status.get()); } -Status ModularFileSystem::DeleteFile( - const std::string& fname /*, TransactionToken* token */) { +Status ModularFileSystem::DeleteFile(const std::string& fname, + TransactionToken* token) { if (ops_->delete_file == nullptr) return errors::Unimplemented(tensorflow::strings::StrCat( "Filesystem for ", fname, " does not support DeleteFile()")); @@ -224,9 +224,10 @@ Status ModularFileSystem::DeleteFile( return StatusFromTF_Status(plugin_status.get()); } -Status ModularFileSystem::DeleteRecursively( - const std::string& dirname, int64* undeleted_files, - int64* undeleted_dirs /*, TransactionToken* token */) { +Status ModularFileSystem::DeleteRecursively(const std::string& dirname, + TransactionToken* token, + int64* undeleted_files, + int64* undeleted_dirs) { if (undeleted_files == nullptr || undeleted_dirs == nullptr) return errors::FailedPrecondition( "DeleteRecursively must not be called with `undeleted_files` or " @@ -247,8 +248,8 @@ Status ModularFileSystem::DeleteRecursively( return StatusFromTF_Status(plugin_status.get()); } -Status ModularFileSystem::DeleteDir( - const std::string& dirname /*, TransactionToken* token */) { +Status ModularFileSystem::DeleteDir(const std::string& dirname, + TransactionToken* token) { if (ops_->delete_dir == nullptr) return errors::Unimplemented(tensorflow::strings::StrCat( "Filesystem for ", dirname, " does not support DeleteDir()")); @@ -260,8 +261,8 @@ Status ModularFileSystem::DeleteDir( return StatusFromTF_Status(plugin_status.get()); } -Status ModularFileSystem::RecursivelyCreateDir( - const std::string& dirname /*, TransactionToken* token */) { +Status ModularFileSystem::RecursivelyCreateDir(const std::string& dirname, + TransactionToken* token) { if (ops_->recursively_create_dir == nullptr) return FileSystem::RecursivelyCreateDir(dirname); @@ -272,8 +273,8 @@ Status ModularFileSystem::RecursivelyCreateDir( return StatusFromTF_Status(plugin_status.get()); } -Status ModularFileSystem::CreateDir( - const std::string& dirname /*, TransactionToken* token */) { +Status ModularFileSystem::CreateDir(const std::string& dirname, + TransactionToken* token) { if (ops_->create_dir == nullptr) return errors::Unimplemented(tensorflow::strings::StrCat( "Filesystem for ", dirname, " does not support CreateDir()")); @@ -285,9 +286,8 @@ Status ModularFileSystem::CreateDir( return StatusFromTF_Status(plugin_status.get()); } -Status ModularFileSystem::Stat( - const std::string& fname, - FileStatistics* stat /*, TransactionToken* token */) { +Status ModularFileSystem::Stat(const std::string& fname, + TransactionToken* token, FileStatistics* stat) { if (ops_->stat == nullptr) return errors::Unimplemented(tensorflow::strings::StrCat( "Filesystem for ", fname, " does not support Stat()")); @@ -310,8 +310,8 @@ Status ModularFileSystem::Stat( return StatusFromTF_Status(plugin_status.get()); } -Status ModularFileSystem::IsDirectory( - const std::string& name /*, TransactionToken* token */) { +Status ModularFileSystem::IsDirectory(const std::string& name, + TransactionToken* token) { if (ops_->is_directory == nullptr) return FileSystem::IsDirectory(name); UniquePtrTo_TF_Status plugin_status(TF_NewStatus(), TF_DeleteStatus); @@ -321,9 +321,9 @@ Status ModularFileSystem::IsDirectory( return StatusFromTF_Status(plugin_status.get()); } -Status ModularFileSystem::GetFileSize( - const std::string& fname, - uint64* file_size /*, TransactionToken* token */) { +Status ModularFileSystem::GetFileSize(const std::string& fname, + TransactionToken* token, + uint64* file_size) { if (ops_->get_file_size == nullptr) { FileStatistics stat; Status status = Stat(fname, &stat); @@ -342,9 +342,9 @@ Status ModularFileSystem::GetFileSize( return StatusFromTF_Status(plugin_status.get()); } -Status ModularFileSystem::RenameFile( - const std::string& src, - const std::string& target /*, TransactionToken* token */) { +Status ModularFileSystem::RenameFile(const std::string& src, + const std::string& target, + TransactionToken* token) { if (ops_->rename_file == nullptr) { Status status = CopyFile(src, target); if (status.ok()) status = DeleteFile(src); @@ -359,9 +359,9 @@ Status ModularFileSystem::RenameFile( return StatusFromTF_Status(plugin_status.get()); } -Status ModularFileSystem::CopyFile( - const std::string& src, - const std::string& target /*, TransactionToken* token */) { +Status ModularFileSystem::CopyFile(const std::string& src, + const std::string& target, + TransactionToken* token) { if (ops_->copy_file == nullptr) return FileSystem::CopyFile(src, target); UniquePtrTo_TF_Status plugin_status(TF_NewStatus(), TF_DeleteStatus); @@ -372,8 +372,7 @@ Status ModularFileSystem::CopyFile( return StatusFromTF_Status(plugin_status.get()); } -std::string ModularFileSystem::TranslateName( - const std::string& name /*, TransactionToken* token */) const { +std::string ModularFileSystem::TranslateName(const std::string& name) const { if (ops_->translate_name == nullptr) return FileSystem::TranslateName(name); char* p = ops_->translate_name(filesystem_.get(), name.c_str()); @@ -385,7 +384,7 @@ std::string ModularFileSystem::TranslateName( return ret; } -void ModularFileSystem::FlushCaches(/*TransactionToken* token*/) { +void ModularFileSystem::FlushCaches(TransactionToken* token) { if (ops_->flush_caches != nullptr) ops_->flush_caches(filesystem_.get()); } diff --git a/tensorflow/c/experimental/filesystem/modular_filesystem.h b/tensorflow/c/experimental/filesystem/modular_filesystem.h index a2639152eff..6495d97ebf1 100644 --- a/tensorflow/c/experimental/filesystem/modular_filesystem.h +++ b/tensorflow/c/experimental/filesystem/modular_filesystem.h @@ -60,70 +60,45 @@ class ModularFileSystem final : public FileSystem { ~ModularFileSystem() override { ops_->cleanup(filesystem_.get()); } Status NewRandomAccessFile( - const std::string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; - Status NewWritableFile( - const std::string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; - Status NewAppendableFile( - const std::string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + const std::string& fname, TransactionToken* token, + std::unique_ptr* result) override; + Status NewWritableFile(const std::string& fname, TransactionToken* token, + std::unique_ptr* result) override; + Status NewAppendableFile(const std::string& fname, TransactionToken* token, + std::unique_ptr* result) override; Status NewReadOnlyMemoryRegionFromFile( - const std::string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; - Status FileExists( - const std::string& fname /*, TransactionToken* token = nullptr */) - override; + const std::string& fname, TransactionToken* token, + std::unique_ptr* result) override; + Status FileExists(const std::string& fname, TransactionToken* token) override; bool FilesExist(const std::vector& files, - std::vector* - status /*, TransactionToken* token = nullptr */) override; - Status GetChildren( - const std::string& dir, - std::vector* result /*, TransactionToken* token = nullptr */) - override; - Status GetMatchingPaths( - const std::string& pattern, - std::vector* - results /*, TransactionToken* token = nullptr */) override; - Status DeleteFile( - const std::string& fname /*, TransactionToken* token = nullptr */) - override; - Status DeleteRecursively( - const std::string& dirname, int64* undeleted_files, - int64* undeleted_dirs /*, TransactionToken* token = nullptr */) override; - Status DeleteDir( - const std::string& dirname /*, TransactionToken* token = nullptr */) - override; - Status RecursivelyCreateDir( - const std::string& dirname /*, TransactionToken* token = nullptr */) - override; - Status CreateDir( - const std::string& dirname /*, TransactionToken* token = nullptr */) - override; - Status Stat( - const std::string& fname, - FileStatistics* stat /*, TransactionToken* token = nullptr */) override; - Status IsDirectory( - const std::string& fname /*, TransactionToken* token = nullptr */) - override; - Status GetFileSize( - const std::string& fname, - uint64* file_size /*, TransactionToken* token = nullptr */) override; - Status RenameFile( - const std::string& src, - const std::string& target /*, TransactionToken* token = nullptr */) - override; - Status CopyFile(const std::string& src, - const std::string& - target /*, TransactionToken* token = nullptr */) override; - std::string TranslateName( - const std::string& name /*, TransactionToken* token = nullptr */) - const override; - void FlushCaches(/* TransactionToken* token=nullptr */) override; + TransactionToken* token, + std::vector* status) override; + Status GetChildren(const std::string& dir, TransactionToken* token, + std::vector* result) override; + Status GetMatchingPaths(const std::string& pattern, TransactionToken* token, + std::vector* results) override; + Status DeleteFile(const std::string& fname, TransactionToken* token) override; + Status DeleteRecursively(const std::string& dirname, TransactionToken* token, + int64* undeleted_files, + int64* undeleted_dirs) override; + Status DeleteDir(const std::string& dirname, + TransactionToken* token) override; + Status RecursivelyCreateDir(const std::string& dirname, + TransactionToken* token) override; + Status CreateDir(const std::string& dirname, + TransactionToken* token) override; + Status Stat(const std::string& fname, TransactionToken* token, + FileStatistics* stat) override; + Status IsDirectory(const std::string& fname, + TransactionToken* token) override; + Status GetFileSize(const std::string& fname, TransactionToken* token, + uint64* file_size) override; + Status RenameFile(const std::string& src, const std::string& target, + TransactionToken* token) override; + Status CopyFile(const std::string& src, const std::string& target, + TransactionToken* token) override; + std::string TranslateName(const std::string& name) const override; + void FlushCaches(TransactionToken* token) override; private: std::unique_ptr filesystem_; diff --git a/tensorflow/core/common_runtime/constant_folding_test.cc b/tensorflow/core/common_runtime/constant_folding_test.cc index e621b3b5006..b348117bb9e 100644 --- a/tensorflow/core/common_runtime/constant_folding_test.cc +++ b/tensorflow/core/common_runtime/constant_folding_test.cc @@ -688,7 +688,7 @@ class TestTFFileSystem : public ::tensorflow::NullFileSystem { data_tensor_(test::AsTensor({1., 2., 3., 4.}, {2, 2})) {} ::tensorflow::Status NewReadOnlyMemoryRegionFromFile( - const string& fname, + const string& fname, ::tensorflow::TransactionToken* token, std::unique_ptr<::tensorflow::ReadOnlyMemoryRegion>* result) override { if (fname != kTestMemRegionName) { return ::tensorflow::errors::Unimplemented( diff --git a/tensorflow/core/kernels/immutable_constant_op_test.cc b/tensorflow/core/kernels/immutable_constant_op_test.cc index 7eceba7ad8b..5c3f96a312d 100644 --- a/tensorflow/core/kernels/immutable_constant_op_test.cc +++ b/tensorflow/core/kernels/immutable_constant_op_test.cc @@ -61,7 +61,7 @@ class TestFileSystem : public NullFileSystem { public: ~TestFileSystem() override = default; Status NewReadOnlyMemoryRegionFromFile( - const string& fname, + const string& fname, TransactionToken* token, std::unique_ptr* result) override { float val = 0; StringPiece scheme, host, path; diff --git a/tensorflow/core/platform/env_test.cc b/tensorflow/core/platform/env_test.cc index f013aff9703..35374d65ee3 100644 --- a/tensorflow/core/platform/env_test.cc +++ b/tensorflow/core/platform/env_test.cc @@ -295,7 +295,7 @@ TEST_F(DefaultEnvTest, SleepForMicroseconds) { class TmpDirFileSystem : public NullFileSystem { public: - Status FileExists(const string& dir) override { + Status FileExists(const string& dir,TransactionToken* token) override { StringPiece scheme, host, path; io::ParseURI(dir, &scheme, &host, &path); if (path.empty()) return errors::NotFound(dir, " not found"); @@ -311,7 +311,7 @@ class TmpDirFileSystem : public NullFileSystem { return Env::Default()->FileExists(io::JoinPath(BaseDir(), path)); } - Status CreateDir(const string& dir) override { + Status CreateDir(const string& dir,TransactionToken* token) override { StringPiece scheme, host, path; io::ParseURI(dir, &scheme, &host, &path); if (scheme != "tmpdirfs") { @@ -328,7 +328,7 @@ class TmpDirFileSystem : public NullFileSystem { return status; } - Status IsDirectory(const string& dir) override { + Status IsDirectory(const string& dir,TransactionToken* token) override { StringPiece scheme, host, path; io::ParseURI(dir, &scheme, &host, &path); for (const auto& existing_dir : created_directories_) @@ -336,7 +336,7 @@ class TmpDirFileSystem : public NullFileSystem { return errors::NotFound(dir, " not found"); } - void FlushCaches() override { flushed_ = true; } + void FlushCaches(TransactionToken* token) override { flushed_ = true; } private: bool flushed_ = false; diff --git a/tensorflow/core/platform/file_system.h b/tensorflow/core/platform/file_system.h index 4a8d9e63023..c4094d3a5a2 100644 --- a/tensorflow/core/platform/file_system.h +++ b/tensorflow/core/platform/file_system.h @@ -481,7 +481,7 @@ class FileSystem { /// \brief Starts a new transaction virtual tensorflow::Status StartTransaction(TransactionToken** token) { - token = nullptr; + *token = nullptr; return Status::OK(); } @@ -499,15 +499,15 @@ class FileSystem { /// \brief Get token for `path` or start a new transaction and add `path` to /// it. virtual tensorflow::Status GetTokenOrStartTransaction( - const std::string& path, TransactionToken** token) { - token = nullptr; + const string& path, TransactionToken** token) { + *token = nullptr; return Status::OK(); } /// \brief Return transaction for `path` or nullptr in `token` virtual tensorflow::Status GetTransactionForPath(const std::string& path, TransactionToken** token) { - token = nullptr; + *token = nullptr; return Status::OK(); } diff --git a/tensorflow/core/platform/file_system_test.cc b/tensorflow/core/platform/file_system_test.cc index 0af45185612..dd02da32073 100644 --- a/tensorflow/core/platform/file_system_test.cc +++ b/tensorflow/core/platform/file_system_test.cc @@ -32,7 +32,7 @@ static const char* const kPrefix = "ipfs://solarsystem"; // cannot have children further. class InterPlanetaryFileSystem : public NullFileSystem { public: - Status FileExists(const string& fname) override { + Status FileExists(const string& fname, TransactionToken* token) override { string parsed_path; ParsePath(fname, &parsed_path); if (BodyExists(parsed_path)) { @@ -42,7 +42,7 @@ class InterPlanetaryFileSystem : public NullFileSystem { } // Adds the dir to the parent's children list and creates an entry for itself. - Status CreateDir(const string& dirname) override { + Status CreateDir(const string& dirname, TransactionToken* token) override { string parsed_path; ParsePath(dirname, &parsed_path); // If the directory already exists, throw an error. @@ -88,7 +88,7 @@ class InterPlanetaryFileSystem : public NullFileSystem { return Status(tensorflow::error::FAILED_PRECONDITION, "Failed to create"); } - Status IsDirectory(const string& dirname) override { + Status IsDirectory(const string& dirname, TransactionToken* token) override { string parsed_path; ParsePath(dirname, &parsed_path); // Simulate evil_directory has bad permissions by throwing a LOG(FATAL) @@ -105,7 +105,8 @@ class InterPlanetaryFileSystem : public NullFileSystem { return Status(tensorflow::error::FAILED_PRECONDITION, "Not a dir"); } - Status GetChildren(const string& dir, std::vector* result) override { + Status GetChildren(const string& dir, TransactionToken* token, + std::vector* result) override { TF_RETURN_IF_ERROR(IsDirectory(dir)); string parsed_path; ParsePath(dir, &parsed_path); @@ -273,7 +274,7 @@ TEST(InterPlanetaryFileSystemTest, HasAtomicMove) { class TestFileSystem : public NullFileSystem { public: // Only allow for a single root directory. - Status IsDirectory(const string& dirname) override { + Status IsDirectory(const string& dirname, TransactionToken* token) override { if (dirname == "." || dirname.empty()) { return Status::OK(); } @@ -281,7 +282,8 @@ class TestFileSystem : public NullFileSystem { } // Simulating a FS with a root dir and a single file underneath it. - Status GetChildren(const string& dir, std::vector* result) override { + Status GetChildren(const string& dir, TransactionToken* token, + std::vector* result) override { if (dir == "." || dir.empty()) { result->push_back("test"); } diff --git a/tensorflow/core/platform/null_file_system.h b/tensorflow/core/platform/null_file_system.h index ef8879090e9..0af34258169 100644 --- a/tensorflow/core/platform/null_file_system.h +++ b/tensorflow/core/platform/null_file_system.h @@ -37,83 +37,66 @@ class NullFileSystem : public FileSystem { ~NullFileSystem() override = default; Status NewRandomAccessFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override { + const string& fname, TransactionToken* token, + std::unique_ptr* result) override { return errors::Unimplemented("NewRandomAccessFile unimplemented"); } - Status NewWritableFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override { + Status NewWritableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override { return errors::Unimplemented("NewWritableFile unimplemented"); } - Status NewAppendableFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override { + Status NewAppendableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override { return errors::Unimplemented("NewAppendableFile unimplemented"); } Status NewReadOnlyMemoryRegionFromFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override { + const string& fname, TransactionToken* token, + std::unique_ptr* result) override { return errors::Unimplemented( "NewReadOnlyMemoryRegionFromFile unimplemented"); } - Status FileExists( - const string& fname /*, TransactionToken* token = nullptr */) override { + Status FileExists(const string& fname, TransactionToken* token) override { return errors::Unimplemented("FileExists unimplemented"); } - Status GetChildren( - const string& dir, - std::vector* result /*, TransactionToken* token = nullptr */) - override { + Status GetChildren(const string& dir, TransactionToken* token, + std::vector* result) override { return errors::Unimplemented("GetChildren unimplemented"); } - Status GetMatchingPaths( - const string& pattern, - std::vector* results /*, TransactionToken* token = nullptr */) - override { + Status GetMatchingPaths(const string& pattern, TransactionToken* token, + std::vector* results) override { return internal::GetMatchingPaths(this, Env::Default(), pattern, results); } - Status DeleteFile( - const string& fname /*, TransactionToken* token = nullptr */) override { + Status DeleteFile(const string& fname, TransactionToken* token) override { return errors::Unimplemented("DeleteFile unimplemented"); } - Status CreateDir( - const string& dirname /*, TransactionToken* token = nullptr */) override { + Status CreateDir(const string& dirname, TransactionToken* token) override { return errors::Unimplemented("CreateDir unimplemented"); } - Status DeleteDir( - const string& dirname /*, TransactionToken* token = nullptr */) override { + Status DeleteDir(const string& dirname, TransactionToken* token) override { return errors::Unimplemented("DeleteDir unimplemented"); } - Status GetFileSize( - const string& fname, - uint64* file_size /*, TransactionToken* token = nullptr */) override { + Status GetFileSize(const string& fname, TransactionToken* token, + uint64* file_size) override { return errors::Unimplemented("GetFileSize unimplemented"); } - Status RenameFile( - const string& src, - const string& target /*, TransactionToken* token = nullptr */) override { + Status RenameFile(const string& src, const string& target, + TransactionToken* token) override { return errors::Unimplemented("RenameFile unimplemented"); } - Status Stat( - const string& fname, - FileStatistics* stat /*, TransactionToken* token = nullptr */) override { + Status Stat(const string& fname, TransactionToken* token, + FileStatistics* stat) override { return errors::Unimplemented("Stat unimplemented"); } }; diff --git a/tensorflow/core/platform/ram_file_system.h b/tensorflow/core/platform/ram_file_system.h index 9437a7174a9..ba8bb2d7630 100644 --- a/tensorflow/core/platform/ram_file_system.h +++ b/tensorflow/core/platform/ram_file_system.h @@ -104,9 +104,8 @@ class RamRandomAccessFile : public RandomAccessFile, public WritableFile { class RamFileSystem : public FileSystem { public: Status NewRandomAccessFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override { + const string& fname, TransactionToken* token, + std::unique_ptr* result) override { mutex_lock m(mu_); if (fs_.find(fname) == fs_.end()) { return errors::NotFound(""); @@ -116,10 +115,8 @@ class RamFileSystem : public FileSystem { return Status::OK(); } - Status NewWritableFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override { + Status NewWritableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override { mutex_lock m(mu_); if (fs_.find(fname) == fs_.end()) { fs_[fname] = std::make_shared(); @@ -128,10 +125,8 @@ class RamFileSystem : public FileSystem { new RamRandomAccessFile(fname, fs_[fname])); return Status::OK(); } - Status NewAppendableFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override { + Status NewAppendableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override { mutex_lock m(mu_); if (fs_.find(fname) == fs_.end()) { fs_[fname] = std::make_shared(); @@ -142,22 +137,18 @@ class RamFileSystem : public FileSystem { } Status NewReadOnlyMemoryRegionFromFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override { + const string& fname, TransactionToken* token, + std::unique_ptr* result) override { return errors::Unimplemented(""); } - Status FileExists( - const string& fname /*, TransactionToken* token = nullptr */) override { + Status FileExists(const string& fname, TransactionToken* token) override { FileStatistics stat; - return Stat(fname, &stat); + return Stat(fname, token, &stat); } - Status GetChildren( - const string& dir, - std::vector* result /*, TransactionToken* token = nullptr */) - override { + Status GetChildren(const string& dir, TransactionToken* token, + std::vector* result) override { mutex_lock m(mu_); auto it = fs_.lower_bound(dir); while (it != fs_.end() && absl::StartsWith(it->first, dir)) { @@ -168,10 +159,8 @@ class RamFileSystem : public FileSystem { return Status::OK(); } - Status GetMatchingPaths( - const string& pattern, - std::vector* results /*, TransactionToken* token = nullptr */) - override { + Status GetMatchingPaths(const string& pattern, TransactionToken* token, + std::vector* results) override { mutex_lock m(mu_); Env* env = Env::Default(); for (auto it = fs_.begin(); it != fs_.end(); ++it) { @@ -182,9 +171,8 @@ class RamFileSystem : public FileSystem { return Status::OK(); } - Status Stat( - const string& fname, - FileStatistics* stat /*, TransactionToken* token = nullptr */) override { + Status Stat(const string& fname, TransactionToken* token, + FileStatistics* stat) override { mutex_lock m(mu_); auto it = fs_.lower_bound(fname); if (it == fs_.end()) { @@ -204,8 +192,7 @@ class RamFileSystem : public FileSystem { return Status::OK(); } - Status DeleteFile( - const string& fname /*, TransactionToken* token = nullptr */) override { + Status DeleteFile(const string& fname, TransactionToken* token) override { mutex_lock m(mu_); if (fs_.find(fname) != fs_.end()) { fs_.erase(fname); @@ -215,24 +202,21 @@ class RamFileSystem : public FileSystem { return errors::NotFound(""); } - Status CreateDir( - const string& dirname /*, TransactionToken* token = nullptr */) override { + Status CreateDir(const string& dirname, TransactionToken* token) override { return Status::OK(); } - Status RecursivelyCreateDir( - const string& dirname /*, TransactionToken* token = nullptr */) override { + Status RecursivelyCreateDir(const string& dirname, + TransactionToken* token) override { return Status::OK(); } - Status DeleteDir( - const string& dirname /*, TransactionToken* token = nullptr */) override { + Status DeleteDir(const string& dirname, TransactionToken* token) override { return Status::OK(); } - Status GetFileSize( - const string& fname, - uint64* file_size /*, TransactionToken* token = nullptr */) override { + Status GetFileSize(const string& fname, TransactionToken* token, + uint64* file_size) override { mutex_lock m(mu_); if (fs_.find(fname) != fs_.end()) { *file_size = fs_[fname]->size(); @@ -241,9 +225,8 @@ class RamFileSystem : public FileSystem { return errors::NotFound(""); } - Status RenameFile( - const string& src, - const string& target /*, TransactionToken* token = nullptr */) override { + Status RenameFile(const string& src, const string& target, + TransactionToken* token) override { mutex_lock m(mu_); if (fs_.find(src) != fs_.end()) { fs_[target] = fs_[src]; diff --git a/tensorflow/core/platform/retrying_file_system.h b/tensorflow/core/platform/retrying_file_system.h index 3891ce7499f..2f50b6cd5df 100644 --- a/tensorflow/core/platform/retrying_file_system.h +++ b/tensorflow/core/platform/retrying_file_system.h @@ -39,36 +39,27 @@ class RetryingFileSystem : public FileSystem { retry_config_(retry_config) {} Status NewRandomAccessFile( - const string& filename, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + const string& filename, TransactionToken* token, + std::unique_ptr* result) override; - Status NewWritableFile( - const string& filename, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + Status NewWritableFile(const string& filename, TransactionToken* token, + std::unique_ptr* result) override; - Status NewAppendableFile( - const string& filename, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + Status NewAppendableFile(const string& filename, TransactionToken* token, + std::unique_ptr* result) override; Status NewReadOnlyMemoryRegionFromFile( - const string& filename, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + const string& filename, TransactionToken* token, + std::unique_ptr* result) override; - Status FileExists( - const string& fname /*, TransactionToken* token = nullptr */) override { + Status FileExists(const string& fname, TransactionToken* token) override { return RetryingUtils::CallWithRetries( [this, &fname]() { return base_file_system_->FileExists(fname); }, retry_config_); } - Status GetChildren( - const string& dir, - std::vector* result /*, TransactionToken* token = nullptr */) - override { + Status GetChildren(const string& dir, TransactionToken* token, + std::vector* result) override { return RetryingUtils::CallWithRetries( [this, &dir, result]() { return base_file_system_->GetChildren(dir, result); @@ -76,10 +67,8 @@ class RetryingFileSystem : public FileSystem { retry_config_); } - Status GetMatchingPaths( - const string& pattern, - std::vector* result /*, TransactionToken* token = nullptr */) - override { + Status GetMatchingPaths(const string& pattern, TransactionToken* token, + std::vector* result) override { return RetryingUtils::CallWithRetries( [this, &pattern, result]() { return base_file_system_->GetMatchingPaths(pattern, result); @@ -87,38 +76,33 @@ class RetryingFileSystem : public FileSystem { retry_config_); } - Status Stat( - const string& fname, - FileStatistics* stat /*, TransactionToken* token = nullptr */) override { + Status Stat(const string& fname, TransactionToken* token, + FileStatistics* stat) override { return RetryingUtils::CallWithRetries( [this, &fname, stat]() { return base_file_system_->Stat(fname, stat); }, retry_config_); } - Status DeleteFile( - const string& fname /*, TransactionToken* token = nullptr */) override { + Status DeleteFile(const string& fname, TransactionToken* token) override { return RetryingUtils::DeleteWithRetries( [this, &fname]() { return base_file_system_->DeleteFile(fname); }, retry_config_); } - Status CreateDir( - const string& dirname /*, TransactionToken* token = nullptr */) override { + Status CreateDir(const string& dirname, TransactionToken* token) override { return RetryingUtils::CallWithRetries( [this, &dirname]() { return base_file_system_->CreateDir(dirname); }, retry_config_); } - Status DeleteDir( - const string& dirname /*, TransactionToken* token = nullptr */) override { + Status DeleteDir(const string& dirname, TransactionToken* token) override { return RetryingUtils::DeleteWithRetries( [this, &dirname]() { return base_file_system_->DeleteDir(dirname); }, retry_config_); } - Status GetFileSize( - const string& fname, - uint64* file_size /*, TransactionToken* token = nullptr */) override { + Status GetFileSize(const string& fname, TransactionToken* token, + uint64* file_size) override { return RetryingUtils::CallWithRetries( [this, &fname, file_size]() { return base_file_system_->GetFileSize(fname, file_size); @@ -126,9 +110,8 @@ class RetryingFileSystem : public FileSystem { retry_config_); } - Status RenameFile( - const string& src, - const string& target /*, TransactionToken* token = nullptr */) override { + Status RenameFile(const string& src, const string& target, + TransactionToken* token) override { return RetryingUtils::CallWithRetries( [this, &src, &target]() { return base_file_system_->RenameFile(src, target); @@ -136,8 +119,7 @@ class RetryingFileSystem : public FileSystem { retry_config_); } - Status IsDirectory( - const string& dirname /*, TransactionToken* token = nullptr */) override { + Status IsDirectory(const string& dirname, TransactionToken* token) override { return RetryingUtils::CallWithRetries( [this, &dirname]() { return base_file_system_->IsDirectory(dirname); }, retry_config_); @@ -148,9 +130,9 @@ class RetryingFileSystem : public FileSystem { return base_file_system_->HasAtomicMove(path, has_atomic_move); } - Status DeleteRecursively( - const string& dirname, int64* undeleted_files, - int64* undeleted_dirs /*, TransactionToken* token = nullptr */) override { + Status DeleteRecursively(const string& dirname, TransactionToken* token, + int64* undeleted_files, + int64* undeleted_dirs) override { return RetryingUtils::DeleteWithRetries( [this, &dirname, undeleted_files, undeleted_dirs]() { return base_file_system_->DeleteRecursively(dirname, undeleted_files, @@ -159,7 +141,7 @@ class RetryingFileSystem : public FileSystem { retry_config_); } - void FlushCaches(/* TransactionToken* token=nullptr */) override { + void FlushCaches(TransactionToken* token) override { base_file_system_->FlushCaches(); } @@ -243,8 +225,8 @@ class RetryingWritableFile : public WritableFile { template Status RetryingFileSystem::NewRandomAccessFile( - const string& filename, - std::unique_ptr* result /*, TransactionToken* token */) { + const string& filename, TransactionToken* token, + std::unique_ptr* result) { std::unique_ptr base_file; TF_RETURN_IF_ERROR(RetryingUtils::CallWithRetries( [this, &filename, &base_file]() { @@ -258,8 +240,8 @@ Status RetryingFileSystem::NewRandomAccessFile( template Status RetryingFileSystem::NewWritableFile( - const string& filename, - std::unique_ptr* result /*, TransactionToken* token */) { + const string& filename, TransactionToken* token, + std::unique_ptr* result) { std::unique_ptr base_file; TF_RETURN_IF_ERROR(RetryingUtils::CallWithRetries( [this, &filename, &base_file]() { @@ -273,8 +255,8 @@ Status RetryingFileSystem::NewWritableFile( template Status RetryingFileSystem::NewAppendableFile( - const string& filename, - std::unique_ptr* result /*, TransactionToken* token */) { + const string& filename, TransactionToken* token, + std::unique_ptr* result) { std::unique_ptr base_file; TF_RETURN_IF_ERROR(RetryingUtils::CallWithRetries( [this, &filename, &base_file]() { @@ -288,8 +270,8 @@ Status RetryingFileSystem::NewAppendableFile( template Status RetryingFileSystem::NewReadOnlyMemoryRegionFromFile( - const string& filename, std::unique_ptr* - result /*, TransactionToken* token */) { + const string& filename, TransactionToken* token, + std::unique_ptr* result) { return RetryingUtils::CallWithRetries( [this, &filename, result]() { return base_file_system_->NewReadOnlyMemoryRegionFromFile(filename, diff --git a/tensorflow/core/platform/retrying_file_system_test.cc b/tensorflow/core/platform/retrying_file_system_test.cc index 439abd6f3ec..0cada5a5651 100644 --- a/tensorflow/core/platform/retrying_file_system_test.cc +++ b/tensorflow/core/platform/retrying_file_system_test.cc @@ -100,100 +100,83 @@ class MockFileSystem : public FileSystem { : calls_(calls), flushed_(flushed) {} Status NewRandomAccessFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override { + const string& fname, TransactionToken* token, + std::unique_ptr* result) override { *result = std::move(random_access_file_to_return); return calls_.ConsumeNextCall("NewRandomAccessFile"); } - Status NewWritableFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override { + Status NewWritableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override { *result = std::move(writable_file_to_return); return calls_.ConsumeNextCall("NewWritableFile"); } - Status NewAppendableFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override { + Status NewAppendableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override { *result = std::move(writable_file_to_return); return calls_.ConsumeNextCall("NewAppendableFile"); } Status NewReadOnlyMemoryRegionFromFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override { + const string& fname, TransactionToken* token, + std::unique_ptr* result) override { return calls_.ConsumeNextCall("NewReadOnlyMemoryRegionFromFile"); } - Status FileExists( - const string& fname /*, TransactionToken* token = nullptr */) override { + Status FileExists(const string& fname, TransactionToken* token) override { return calls_.ConsumeNextCall("FileExists"); } - Status GetChildren( - const string& dir, - std::vector* result /*, TransactionToken* token = nullptr */) - override { + Status GetChildren(const string& dir, TransactionToken* token, + std::vector* result) override { return calls_.ConsumeNextCall("GetChildren"); } - Status GetMatchingPaths( - const string& dir, - std::vector* result /*, TransactionToken* token = nullptr */) - override { + Status GetMatchingPaths(const string& dir, TransactionToken* token, + std::vector* result) override { return calls_.ConsumeNextCall("GetMatchingPaths"); } - Status Stat( - const string& fname, - FileStatistics* stat /*, TransactionToken* token = nullptr */) override { + Status Stat(const string& fname, TransactionToken* token, + FileStatistics* stat) override { return calls_.ConsumeNextCall("Stat"); } - Status DeleteFile( - const string& fname /*, TransactionToken* token = nullptr */) override { + Status DeleteFile(const string& fname, TransactionToken* token) override { return calls_.ConsumeNextCall("DeleteFile"); } - Status CreateDir( - const string& dirname /*, TransactionToken* token = nullptr */) override { + Status CreateDir(const string& dirname, TransactionToken* token) override { return calls_.ConsumeNextCall("CreateDir"); } - Status DeleteDir( - const string& dirname /*, TransactionToken* token = nullptr */) override { + Status DeleteDir(const string& dirname, TransactionToken* token) override { return calls_.ConsumeNextCall("DeleteDir"); } - Status GetFileSize( - const string& fname, - uint64* file_size /*, TransactionToken* token = nullptr */) override { + Status GetFileSize(const string& fname, TransactionToken* token, + uint64* file_size) override { return calls_.ConsumeNextCall("GetFileSize"); } - Status RenameFile( - const string& src, - const string& target /*, TransactionToken* token = nullptr */) override { + Status RenameFile(const string& src, const string& target, + TransactionToken* token) override { return calls_.ConsumeNextCall("RenameFile"); } - Status IsDirectory( - const string& dirname /*, TransactionToken* token = nullptr */) override { + Status IsDirectory(const string& dirname, TransactionToken* token) override { return calls_.ConsumeNextCall("IsDirectory"); } - Status DeleteRecursively( - const string& dirname, int64* undeleted_files, - int64* undeleted_dirs /*, TransactionToken* token = nullptr */) override { + Status DeleteRecursively(const string& dirname, TransactionToken* token, + int64* undeleted_files, + int64* undeleted_dirs) override { return calls_.ConsumeNextCall("DeleteRecursively"); } - void FlushCaches(/* TransactionToken* token=nullptr */) override { + void FlushCaches( + TransactionToken* token) override { if (flushed_) { *flushed_ = true; } diff --git a/tensorflow/python/framework/test_file_system.cc b/tensorflow/python/framework/test_file_system.cc index 6e9915adbb6..ed0a66fbefd 100644 --- a/tensorflow/python/framework/test_file_system.cc +++ b/tensorflow/python/framework/test_file_system.cc @@ -39,12 +39,14 @@ class TestRandomAccessFile : public RandomAccessFile { class TestFileSystem : public NullFileSystem { public: Status NewRandomAccessFile( - const string& fname, std::unique_ptr* result) override { + const string& fname, TransactionToken* token, + std::unique_ptr* result) override { result->reset(new TestRandomAccessFile); return Status::OK(); } // Always return size of 10 - Status GetFileSize(const string& fname, uint64* file_size) override { + Status GetFileSize(const string& fname, TransactionToken* token, + uint64* file_size) override { *file_size = 10; return Status::OK(); } From 18e4ffd5be8bd0f946fdbfb002d428d4cdb9bc1f Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Fri, 31 Jul 2020 18:48:50 -0700 Subject: [PATCH 2145/2522] Add all retrying file systems to PR --- .../core/platform/cloud/gcs_file_system.cc | 95 +++--- .../core/platform/cloud/gcs_file_system.h | 72 ++-- .../platform/cloud/gcs_file_system_test.cc | 320 ++++++++++-------- tensorflow/core/platform/file_system_test.cc | 46 +-- .../core/platform/retrying_file_system.h | 68 ++-- .../platform/retrying_file_system_test.cc | 80 ++--- tensorflow/core/platform/s3/s3_file_system.cc | 81 +++-- tensorflow/core/platform/s3/s3_file_system.h | 48 +-- 8 files changed, 421 insertions(+), 389 deletions(-) diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index 63c601f2244..88e92f0f84c 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -648,7 +648,7 @@ class GcsWritableFile : public WritableFile { TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when composing to ", GetGcsPath()); TF_RETURN_WITH_CONTEXT_IF_ERROR( - filesystem_->DeleteFile(GetGcsPathWithObject(append_object)), + filesystem_->DeleteFile(GetGcsPathWithObject(append_object),nullptr), " when cleaning up."); return Status::OK(); }, @@ -929,8 +929,8 @@ GcsFileSystem::GcsFileSystem( additional_header_(additional_header) {} Status GcsFileSystem::NewRandomAccessFile( - const string& fname, - std::unique_ptr* result /*, TransactionToken* token */) { + const string& fname, TransactionToken* token, + std::unique_ptr* result) { string bucket, object; TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object)); TF_RETURN_IF_ERROR(CheckBucketLocationConstraint(bucket)); @@ -1231,9 +1231,9 @@ void GcsFileSystem::ClearFileCaches(const string& fname) { // MatchingPathsCache as well. } -Status GcsFileSystem::NewWritableFile( - const string& fname, - std::unique_ptr* result /*, TransactionToken* token */) { +Status GcsFileSystem::NewWritableFile(const string& fname, + TransactionToken* token, + std::unique_ptr* result) { string bucket, object; TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object)); @@ -1267,11 +1267,11 @@ Status GcsFileSystem::NewWritableFile( // Reads the file from GCS in chunks and stores it in a tmp file, // which is then passed to GcsWritableFile. -Status GcsFileSystem::NewAppendableFile( - const string& fname, - std::unique_ptr* result /*, TransactionToken* token */) { +Status GcsFileSystem::NewAppendableFile(const string& fname, + TransactionToken* token, + std::unique_ptr* result) { std::unique_ptr reader; - TF_RETURN_IF_ERROR(NewRandomAccessFile(fname, &reader)); + TF_RETURN_IF_ERROR(NewRandomAccessFile(fname, token, &reader)); std::unique_ptr buffer(new char[kReadAppendableFileBufferSize]); Status status; uint64 offset = 0; @@ -1330,14 +1330,14 @@ Status GcsFileSystem::NewAppendableFile( } Status GcsFileSystem::NewReadOnlyMemoryRegionFromFile( - const string& fname, std::unique_ptr* - result /*, TransactionToken* token */) { + const string& fname, TransactionToken* token, + std::unique_ptr* result) { uint64 size; - TF_RETURN_IF_ERROR(GetFileSize(fname, &size)); + TF_RETURN_IF_ERROR(GetFileSize(fname,token, &size)); std::unique_ptr data(new char[size]); std::unique_ptr file; - TF_RETURN_IF_ERROR(NewRandomAccessFile(fname, &file)); + TF_RETURN_IF_ERROR(NewRandomAccessFile(fname, token, &file)); StringPiece piece; TF_RETURN_IF_ERROR(file->Read(0, size, &piece, data.get())); @@ -1346,8 +1346,7 @@ Status GcsFileSystem::NewReadOnlyMemoryRegionFromFile( return Status::OK(); } -Status GcsFileSystem::FileExists( - const string& fname /*, TransactionToken* token */) { +Status GcsFileSystem::FileExists(const string& fname, TransactionToken* token) { string bucket, object; TF_RETURN_IF_ERROR(ParseGcsPath(fname, true, &bucket, &object)); if (object.empty()) { @@ -1561,17 +1560,17 @@ Status GcsFileSystem::FolderExists(const string& dirname, bool* result) { return s; } -Status GcsFileSystem::GetChildren( - const string& dirname, - std::vector* result /*, TransactionToken* token */) { +Status GcsFileSystem::GetChildren(const string& dirname, + TransactionToken* token, + std::vector* result) { return GetChildrenBounded(dirname, UINT64_MAX, result, false /* recursively */, false /* include_self_directory_marker */); } -Status GcsFileSystem::GetMatchingPaths( - const string& pattern, - std::vector* results /*, TransactionToken* token */) { +Status GcsFileSystem::GetMatchingPaths(const string& pattern, + TransactionToken* token, + std::vector* results) { MatchingPathsCache::ComputeFunc compute_func = [this](const string& pattern, std::vector* results) { results->clear(); @@ -1731,8 +1730,8 @@ Status GcsFileSystem::GetChildrenBounded(const string& dirname, } } -Status GcsFileSystem::Stat( - const string& fname, FileStatistics* stat /*, TransactionToken* token */) { +Status GcsFileSystem::Stat(const string& fname, TransactionToken* token, + FileStatistics* stat) { if (!stat) { return errors::Internal("'stat' cannot be nullptr."); } @@ -1766,8 +1765,7 @@ Status GcsFileSystem::Stat( return errors::NotFound("The specified path ", fname, " was not found."); } -Status GcsFileSystem::DeleteFile( - const string& fname /*, TransactionToken* token */) { +Status GcsFileSystem::DeleteFile(const string& fname, TransactionToken* token) { string bucket, object; TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object)); @@ -1783,8 +1781,8 @@ Status GcsFileSystem::DeleteFile( return Status::OK(); } -Status GcsFileSystem::CreateDir( - const string& dirname /*, TransactionToken* token */) { +Status GcsFileSystem::CreateDir(const string& dirname, + TransactionToken* token) { string dirname_with_slash = MaybeAppendSlash(dirname); VLOG(3) << "CreateDir: creating directory with dirname: " << dirname << " and dirname_with_slash: " << dirname_with_slash; @@ -1799,7 +1797,7 @@ Status GcsFileSystem::CreateDir( dirname_with_slash, " was not found."); } - if (FileExists(dirname_with_slash).ok()) { + if (FileExists(dirname_with_slash,token).ok()) { // Use the original name for a correct error here. VLOG(3) << "CreateDir: directory already exists, not uploading " << dirname; return errors::AlreadyExists(dirname); @@ -1833,8 +1831,8 @@ Status GcsFileSystem::CreateDir( // Checks that the directory is empty (i.e no objects with this prefix exist). // Deletes the GCS directory marker if it exists. -Status GcsFileSystem::DeleteDir( - const string& dirname /*, TransactionToken* token */) { +Status GcsFileSystem::DeleteDir(const string& dirname, + TransactionToken* token) { std::vector children; // A directory is considered empty either if there are no matching objects // with the corresponding name prefix or if there is exactly one matching @@ -1849,13 +1847,13 @@ Status GcsFileSystem::DeleteDir( } if (children.size() == 1 && children[0].empty()) { // This is the directory marker object. Delete it. - return DeleteFile(MaybeAppendSlash(dirname)); + return DeleteFile(MaybeAppendSlash(dirname),token); } return Status::OK(); } -Status GcsFileSystem::GetFileSize( - const string& fname, uint64* file_size /*, TransactionToken* token */) { +Status GcsFileSystem::GetFileSize(const string& fname, TransactionToken* token, + uint64* file_size) { if (!file_size) { return errors::Internal("'file_size' cannot be nullptr."); } @@ -1865,14 +1863,14 @@ Status GcsFileSystem::GetFileSize( TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object)); FileStatistics stat; - TF_RETURN_IF_ERROR(Stat(fname, &stat)); + TF_RETURN_IF_ERROR(Stat(fname,token, &stat)); *file_size = stat.length; return Status::OK(); } -Status GcsFileSystem::RenameFile( - const string& src, const string& target /*, TransactionToken* token */) { - if (!IsDirectory(src).ok()) { +Status GcsFileSystem::RenameFile(const string& src, const string& target, + TransactionToken* token) { + if (!IsDirectory(src,token).ok()) { return RenameObject(src, target); } // Rename all individual objects in the directory one by one. @@ -1930,11 +1928,11 @@ Status GcsFileSystem::RenameObject(const string& src, const string& target) { // on the server side, we can't just retry the whole RenameFile operation // because the source object is already gone. return RetryingUtils::DeleteWithRetries( - [this, &src]() { return DeleteFile(src); }, retry_config_); + [this, &src]() { return DeleteFile(src,nullptr); }, retry_config_); } -Status GcsFileSystem::IsDirectory( - const string& fname /*, TransactionToken* token */) { +Status GcsFileSystem::IsDirectory(const string& fname, + TransactionToken* token) { string bucket, object; TF_RETURN_IF_ERROR(ParseGcsPath(fname, true, &bucket, &object)); if (object.empty()) { @@ -1960,16 +1958,17 @@ Status GcsFileSystem::IsDirectory( return errors::NotFound("The specified path ", fname, " was not found."); } -Status GcsFileSystem::DeleteRecursively( - const string& dirname, int64* undeleted_files, - int64* undeleted_dirs /*, TransactionToken* token */) { +Status GcsFileSystem::DeleteRecursively(const string& dirname, + TransactionToken* token, + int64* undeleted_files, + int64* undeleted_dirs) { if (!undeleted_files || !undeleted_dirs) { return errors::Internal( "'undeleted_files' and 'undeleted_dirs' cannot be nullptr."); } *undeleted_files = 0; *undeleted_dirs = 0; - if (!IsDirectory(dirname).ok()) { + if (!IsDirectory(dirname,token).ok()) { *undeleted_dirs = 1; return Status( error::NOT_FOUND, @@ -1987,9 +1986,9 @@ Status GcsFileSystem::DeleteRecursively( // and therefore RetryingFileSystem won't pay attention to the failures, // we need to make sure these failures are properly retried. const auto& delete_file_status = RetryingUtils::DeleteWithRetries( - [this, &full_path]() { return DeleteFile(full_path); }, retry_config_); + [this, &full_path,token]() { return DeleteFile(full_path,token); }, retry_config_); if (!delete_file_status.ok()) { - if (IsDirectory(full_path).ok()) { + if (IsDirectory(full_path,token).ok()) { // The object is a directory marker. (*undeleted_dirs)++; } else { @@ -2003,7 +2002,7 @@ Status GcsFileSystem::DeleteRecursively( // Flushes all caches for filesystem metadata and file contents. Useful for // reclaiming memory once filesystem operations are done (e.g. model is loaded), // or for resetting the filesystem to a consistent state. -void GcsFileSystem::FlushCaches(/* TransactionToken* token */) { +void GcsFileSystem::FlushCaches(TransactionToken* token) { tf_shared_lock l(block_cache_lock_); file_block_cache_->Flush(); stat_cache_->Clear(); diff --git a/tensorflow/core/platform/cloud/gcs_file_system.h b/tensorflow/core/platform/cloud/gcs_file_system.h index 6f0e9535bfe..0a27aba35c1 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.h +++ b/tensorflow/core/platform/cloud/gcs_file_system.h @@ -126,67 +126,49 @@ class GcsFileSystem : public FileSystem { bool compose_append); Status NewRandomAccessFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + const string& fname, TransactionToken* token, + std::unique_ptr* result) override; - Status NewWritableFile( - const string& fname, - std::unique_ptr* - result) /*, TransactionToken* token = nullptr */ override; + Status NewWritableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override; - Status NewAppendableFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + Status NewAppendableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override; Status NewReadOnlyMemoryRegionFromFile( - const string& fname, - std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + const string& fname, TransactionToken* token, + std::unique_ptr* result) override; - Status FileExists( - const string& fname /*, TransactionToken* token = nullptr */) override; + Status FileExists(const string& fname, TransactionToken* token) override; - Status Stat( - const string& fname, - FileStatistics* stat /*, TransactionToken* token = nullptr */) override; + Status Stat(const string& fname, TransactionToken* token, + FileStatistics* stat) override; - Status GetChildren( - const string& dir, - std::vector* result /*, TransactionToken* token = nullptr */) - override; + Status GetChildren(const string& dir, TransactionToken* token, + std::vector* result) override; - Status GetMatchingPaths( - const string& pattern, - std::vector* results /*, TransactionToken* token = nullptr */) - override; + Status GetMatchingPaths(const string& pattern, TransactionToken* token, + std::vector* results) override; - Status DeleteFile( - const string& fname /*, TransactionToken* token = nullptr */) override; + Status DeleteFile(const string& fname, TransactionToken* token) override; - Status CreateDir( - const string& dirname /*, TransactionToken* token = nullptr */) override; + Status CreateDir(const string& dirname, TransactionToken* token) override; - Status DeleteDir( - const string& dirname /*, TransactionToken* token = nullptr */) override; + Status DeleteDir(const string& dirname, TransactionToken* token) override; - Status GetFileSize( - const string& fname, - uint64* file_size /*, TransactionToken* token = nullptr */) override; + Status GetFileSize(const string& fname, TransactionToken* token, + uint64* file_size) override; - Status RenameFile( - const string& src, - const string& target /*, TransactionToken* token = nullptr */) override; + Status RenameFile(const string& src, const string& target, + TransactionToken* token) override; - Status IsDirectory( - const string& fname /*, TransactionToken* token = nullptr */) override; + Status IsDirectory(const string& fname, TransactionToken* token) override; - Status DeleteRecursively( - const string& dirname, int64* undeleted_files, - int64* undeleted_dirs /*, TransactionToken* token = nullptr */) override; + Status DeleteRecursively(const string& dirname, TransactionToken* token, + int64* undeleted_files, + int64* undeleted_dirs) override; - void FlushCaches(/* TransactionToken* token = nullptr */) override; + void FlushCaches(TransactionToken* token) override; /// Set an object to collect runtime statistics from the GcsFilesystem. void SetStats(GcsStatsInterface* stats); diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc index c8e72487bbe..b216281d630 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc @@ -86,7 +86,8 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache) { nullptr /* gcs additional header */, false /* compose append */); std::unique_ptr file; - TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("gs://bucket/random_access.txt", nullptr, &file)); StringPiece filename; TF_EXPECT_OK(file->Name(&filename)); @@ -133,7 +134,8 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_Buffered) { nullptr /* gcs additional header */, false /* compose append */); std::unique_ptr file; - TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("gs://bucket/random_access.txt", nullptr, &file)); StringPiece filename; TF_EXPECT_OK(file->Name(&filename)); @@ -181,7 +183,8 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_Buffered_Errors) { nullptr /* gcs additional header */, false /* compose append */); std::unique_ptr file; - TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("gs://bucket/random_access.txt", nullptr, &file)); StringPiece filename; TF_EXPECT_OK(file->Name(&filename)); @@ -228,7 +231,8 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_Buffered_ReadAtEOF) { nullptr /* gcs additional header */, false /* compose append */); std::unique_ptr file; - TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("gs://bucket/random_access.txt", nullptr, &file)); StringPiece filename; TF_EXPECT_OK(file->Name(&filename)); @@ -269,7 +273,8 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_Buffered_CachedOutOfRange) { nullptr /* gcs additional header */, false /* compose append */); std::unique_ptr file; - TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("gs://bucket/random_access.txt", nullptr, &file)); StringPiece filename; TF_EXPECT_OK(file->Name(&filename)); @@ -320,7 +325,8 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_Buffered_CachedNotSequential) { nullptr /* gcs additional header */, false /* compose append */); std::unique_ptr file; - TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("gs://bucket/random_access.txt", nullptr, &file)); StringPiece filename; TF_EXPECT_OK(file->Name(&filename)); @@ -361,7 +367,8 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_Buffered_Growing) { nullptr /* gcs additional header */, false /* compose append */); std::unique_ptr file; - TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("gs://bucket/random_access.txt", nullptr, &file)); StringPiece filename; TF_EXPECT_OK(file->Name(&filename)); @@ -408,7 +415,8 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_Buffered_ReadBackwards) { nullptr /* gcs additional header */, false /* compose append */); std::unique_ptr file; - TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("gs://bucket/random_access.txt", nullptr, &file)); StringPiece filename; TF_EXPECT_OK(file->Name(&filename)); @@ -450,7 +458,8 @@ TEST(GcsFileSystemTest, nullptr /* gcs additional header */, false /* compose append */); std::unique_ptr file; - TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("gs://bucket/random_access.txt", nullptr, &file)); } TEST(GcsFileSystemTest, NewRandomAccessFile_WithLocationConstraintCaching) { @@ -496,18 +505,18 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithLocationConstraintCaching) { string bucket = "gs://bucket/random_access.txt"; string another_bucket = "gs://anotherbucket/random_access.txt"; // Multiple calls should only cause one request to the location api. - TF_EXPECT_OK(fs.NewRandomAccessFile(bucket, &file)); - TF_EXPECT_OK(fs.NewRandomAccessFile(bucket, &file)); + TF_EXPECT_OK(fs.NewRandomAccessFile(bucket, nullptr, &file)); + TF_EXPECT_OK(fs.NewRandomAccessFile(bucket, nullptr, &file)); // A new bucket should have one cache miss - TF_EXPECT_OK(fs.NewRandomAccessFile(another_bucket, &file)); + TF_EXPECT_OK(fs.NewRandomAccessFile(another_bucket, nullptr, &file)); // And then future calls to both should be cached - TF_EXPECT_OK(fs.NewRandomAccessFile(bucket, &file)); - TF_EXPECT_OK(fs.NewRandomAccessFile(another_bucket, &file)); + TF_EXPECT_OK(fs.NewRandomAccessFile(bucket, nullptr, &file)); + TF_EXPECT_OK(fs.NewRandomAccessFile(another_bucket, nullptr, &file)); // Trigger a flush, should then require one more call - fs.FlushCaches(); - TF_EXPECT_OK(fs.NewRandomAccessFile(bucket, &file)); + fs.FlushCaches(nullptr); + TF_EXPECT_OK(fs.NewRandomAccessFile(bucket, nullptr, &file)); } TEST(GcsFileSystemTest, @@ -533,10 +542,11 @@ TEST(GcsFileSystemTest, nullptr /* gcs additional header */, false /* compose append */); std::unique_ptr file; - EXPECT_EQ(tensorflow::errors::FailedPrecondition( - "Bucket 'bucket' is in 'barfoo' location, allowed locations " - "are: (us-east1)."), - fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file)); + EXPECT_EQ( + tensorflow::errors::FailedPrecondition( + "Bucket 'bucket' is in 'barfoo' location, allowed locations " + "are: (us-east1)."), + fs.NewRandomAccessFile("gs://bucket/random_access.txt", nullptr, &file)); } TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache_DifferentN) { @@ -565,7 +575,8 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoBlockCache_DifferentN) { nullptr /* gcs additional header */, false /* compose append */); std::unique_ptr file; - TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("gs://bucket/random_access.txt", nullptr, &file)); char small_scratch[3]; StringPiece result; @@ -630,8 +641,8 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache) { // We are instantiating this in an enclosed scope to make sure after the // unique ptr goes out of scope, we can still access result. std::unique_ptr file; - TF_EXPECT_OK( - fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file)); + TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", + nullptr, &file)); // Read the first chunk. The cache will be populated with the first block of // 9 bytes. @@ -716,7 +727,8 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache_Flush) { char scratch[100]; StringPiece result; std::unique_ptr file; - TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("gs://bucket/random_access.txt", nullptr, &file)); // Read the first chunk. The cache will be populated with the first block of // 9 bytes. scratch[5] = 'x'; @@ -725,7 +737,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache_Flush) { EXPECT_EQ(scratch[5], 'x'); // Make sure we only copied 4 bytes. // Flush caches and read the second chunk. This will be a cache miss, and // the same block will be fetched again. - fs.FlushCaches(); + fs.FlushCaches(nullptr); TF_EXPECT_OK(file->Read(4, 4, &result, scratch)); EXPECT_EQ("4567", result); } @@ -772,8 +784,8 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_WithBlockCache_MaxStaleness) { // staleness of the filesystem is > 0, they will share the same blocks. std::unique_ptr file1; std::unique_ptr file2; - TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/object", &file1)); - TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/object", &file2)); + TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/object", nullptr, &file1)); + TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/object", nullptr, &file2)); // Reading the first block from file1 should load it once. TF_EXPECT_OK(file1->Read(0, 8, &result, scratch)); EXPECT_EQ("01234567", result); @@ -834,7 +846,8 @@ TEST(GcsFileSystemTest, nullptr /* gcs additional header */, false /* compose append */); std::unique_ptr file; - TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("gs://bucket/random_access.txt", nullptr, &file)); char scratch[5]; StringPiece result; @@ -864,7 +877,7 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_NoObjectName) { std::unique_ptr file; EXPECT_EQ(errors::Code::INVALID_ARGUMENT, - fs.NewRandomAccessFile("gs://bucket/", &file).code()); + fs.NewRandomAccessFile("gs://bucket/", nullptr, &file).code()); } TEST(GcsFileSystemTest, NewRandomAccessFile_InconsistentRead) { @@ -897,10 +910,11 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_InconsistentRead) { // Stat the file first so that the file stats are cached. FileStatistics stat; - TF_ASSERT_OK(fs.Stat("gs://bucket/random_access.txt", &stat)); + TF_ASSERT_OK(fs.Stat("gs://bucket/random_access.txt", nullptr, &stat)); std::unique_ptr file; - TF_ASSERT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file)); + TF_ASSERT_OK( + fs.NewRandomAccessFile("gs://bucket/random_access.txt", nullptr, &file)); char scratch[6]; StringPiece result; @@ -964,14 +978,16 @@ TEST(GcsFileSystemTest, NewWritableFile) { // Read from the file first, to fill the block cache. std::unique_ptr rfile; - TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/path/writeable", &rfile)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("gs://bucket/path/writeable", nullptr, &rfile)); char scratch[100]; StringPiece result; TF_EXPECT_OK(rfile->Read(0, 4, &result, scratch)); EXPECT_EQ("0123", result); // Open the writable file. std::unique_ptr wfile; - TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable", &wfile)); + TF_EXPECT_OK( + fs.NewWritableFile("gs://bucket/path/writeable", nullptr, &wfile)); TF_EXPECT_OK(wfile->Append("content1,")); int64 pos; TF_EXPECT_OK(wfile->Tell(&pos)); @@ -1055,7 +1071,8 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceeds) { nullptr /* gcs additional header */, false /* compose append */); std::unique_ptr file; - TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable.txt", &file)); + TF_EXPECT_OK( + fs.NewWritableFile("gs://bucket/path/writeable.txt", nullptr, &file)); TF_EXPECT_OK(file->Append("content1,")); TF_EXPECT_OK(file->Append("content2")); @@ -1127,7 +1144,8 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceedsOnGetStatus) { // Pull the file's first block into the cache. This will trigger the first // HTTP request to GCS. std::unique_ptr rfile; - TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/path/writeable", &rfile)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("gs://bucket/path/writeable", nullptr, &rfile)); char scratch[100]; StringPiece result; TF_EXPECT_OK(rfile->Read(0, 4, &result, scratch)); @@ -1135,7 +1153,8 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceedsOnGetStatus) { // Now write to the same file. Once the write succeeds, the cached block will // be flushed. std::unique_ptr wfile; - TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable", &wfile)); + TF_EXPECT_OK( + fs.NewWritableFile("gs://bucket/path/writeable", nullptr, &wfile)); TF_EXPECT_OK(wfile->Append("content1,")); TF_EXPECT_OK(wfile->Append("content2")); // Appending doesn't invalidate the read cache - only flushing does. This read @@ -1213,7 +1232,8 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadAllAttemptsFail) { false /* compose append */); std::unique_ptr file; - TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable.txt", &file)); + TF_EXPECT_OK( + fs.NewWritableFile("gs://bucket/path/writeable.txt", nullptr, &file)); TF_EXPECT_OK(file->Append("content1,")); TF_EXPECT_OK(file->Append("content2")); @@ -1277,7 +1297,8 @@ TEST(GcsFileSystemTest, NewWritableFile_UploadReturns410) { { std::unique_ptr file; - TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable.txt", &file)); + TF_EXPECT_OK( + fs.NewWritableFile("gs://bucket/path/writeable.txt", nullptr, &file)); TF_EXPECT_OK(file->Append("content1,")); TF_EXPECT_OK(file->Append("content2")); @@ -1317,7 +1338,7 @@ TEST(GcsFileSystemTest, NewWritableFile_NoObjectName) { std::unique_ptr file; EXPECT_EQ(errors::Code::INVALID_ARGUMENT, - fs.NewWritableFile("gs://bucket/", &file).code()); + fs.NewWritableFile("gs://bucket/", nullptr, &file).code()); } TEST(GcsFileSystemTest, NewAppendableFile) { @@ -1382,12 +1403,14 @@ TEST(GcsFileSystemTest, NewAppendableFile) { // Create an appendable file. This should read the file from GCS, and pull its // contents into the block cache. std::unique_ptr wfile; - TF_EXPECT_OK(fs.NewAppendableFile("gs://bucket/path/appendable", &wfile)); + TF_EXPECT_OK( + fs.NewAppendableFile("gs://bucket/path/appendable", nullptr, &wfile)); TF_EXPECT_OK(wfile->Append("content2")); // Verify that the file contents are in the block cache. This read should not // trigger an HTTP request to GCS. std::unique_ptr rfile; - TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/path/appendable", &rfile)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("gs://bucket/path/appendable", nullptr, &rfile)); char scratch[100]; StringPiece result; TF_EXPECT_OK(rfile->Read(0, 8, &result, scratch)); @@ -1416,7 +1439,7 @@ TEST(GcsFileSystemTest, NewAppendableFile_NoObjectName) { std::unique_ptr file; EXPECT_EQ(errors::Code::INVALID_ARGUMENT, - fs.NewAppendableFile("gs://bucket/", &file).code()); + fs.NewAppendableFile("gs://bucket/", nullptr, &file).code()); } TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) { @@ -1450,7 +1473,7 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) { std::unique_ptr region; TF_EXPECT_OK(fs.NewReadOnlyMemoryRegionFromFile( - "gs://bucket/path/random_access.txt", ®ion)); + "gs://bucket/path/random_access.txt", nullptr, ®ion)); EXPECT_EQ(content, StringPiece(reinterpret_cast(region->data()), region->length())); @@ -1471,7 +1494,8 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile_NoObjectName) { std::unique_ptr region; EXPECT_EQ(errors::Code::INVALID_ARGUMENT, - fs.NewReadOnlyMemoryRegionFromFile("gs://bucket/", ®ion).code()); + fs.NewReadOnlyMemoryRegionFromFile("gs://bucket/", nullptr, ®ion) + .code()); } TEST(GcsFileSystemTest, FileExists_YesAsObject) { @@ -1493,7 +1517,7 @@ TEST(GcsFileSystemTest, FileExists_YesAsObject) { kTestTimeoutConfig, *kAllowedLocationsDefault, nullptr /* gcs additional header */, false /* compose append */); - TF_EXPECT_OK(fs.FileExists("gs://bucket/path/file1.txt")); + TF_EXPECT_OK(fs.FileExists("gs://bucket/path/file1.txt", nullptr)); } TEST(GcsFileSystemTest, FileExists_YesAsFolder) { @@ -1523,7 +1547,7 @@ TEST(GcsFileSystemTest, FileExists_YesAsFolder) { kTestTimeoutConfig, *kAllowedLocationsDefault, nullptr /* gcs additional header */, false /* compose append */); - TF_EXPECT_OK(fs.FileExists("gs://bucket/path/subfolder")); + TF_EXPECT_OK(fs.FileExists("gs://bucket/path/subfolder", nullptr)); } TEST(GcsFileSystemTest, FileExists_YesAsBucket) { @@ -1549,8 +1573,8 @@ TEST(GcsFileSystemTest, FileExists_YesAsBucket) { kTestTimeoutConfig, *kAllowedLocationsDefault, nullptr /* gcs additional header */, false /* compose append */); - TF_EXPECT_OK(fs.FileExists("gs://bucket1")); - TF_EXPECT_OK(fs.FileExists("gs://bucket1/")); + TF_EXPECT_OK(fs.FileExists("gs://bucket1", nullptr)); + TF_EXPECT_OK(fs.FileExists("gs://bucket1/", nullptr)); } TEST(GcsFileSystemTest, FileExists_NotAsObjectOrFolder) { @@ -1580,7 +1604,7 @@ TEST(GcsFileSystemTest, FileExists_NotAsObjectOrFolder) { nullptr /* gcs additional header */, false /* compose append */); EXPECT_EQ(errors::Code::NOT_FOUND, - fs.FileExists("gs://bucket/path/file1.txt").code()); + fs.FileExists("gs://bucket/path/file1.txt", nullptr).code()); } TEST(GcsFileSystemTest, FileExists_NotAsBucket) { @@ -1606,9 +1630,9 @@ TEST(GcsFileSystemTest, FileExists_NotAsBucket) { kTestTimeoutConfig, *kAllowedLocationsDefault, nullptr /* gcs additional header */, false /* compose append */); EXPECT_EQ(errors::Code::INVALID_ARGUMENT, - fs.FileExists("gs://bucket2/").code()); + fs.FileExists("gs://bucket2/", nullptr).code()); EXPECT_EQ(errors::Code::INVALID_ARGUMENT, - fs.FileExists("gs://bucket2").code()); + fs.FileExists("gs://bucket2", nullptr).code()); } TEST(GcsFileSystemTest, FileExists_StatCache) { @@ -1648,8 +1672,8 @@ TEST(GcsFileSystemTest, FileExists_StatCache) { // The stat cache will ensure that repeated lookups don't trigger additional // HTTP requests. for (int i = 0; i < 10; i++) { - TF_EXPECT_OK(fs.FileExists("gs://bucket/path/file1.txt")); - TF_EXPECT_OK(fs.FileExists("gs://bucket/path/subfolder/")); + TF_EXPECT_OK(fs.FileExists("gs://bucket/path/file1.txt", nullptr)); + TF_EXPECT_OK(fs.FileExists("gs://bucket/path/subfolder/", nullptr)); } } @@ -1672,8 +1696,8 @@ TEST(GcsFileSystemTest, FileExists_DirectoryMark) { kTestTimeoutConfig, *kAllowedLocationsDefault, nullptr /* gcs additional header */, false /* compose append */); - TF_EXPECT_OK(fs.FileExists("gs://bucket/dir/")); - TF_EXPECT_OK(fs.IsDirectory("gs://bucket/dir/")); + TF_EXPECT_OK(fs.FileExists("gs://bucket/dir/", nullptr)); + TF_EXPECT_OK(fs.IsDirectory("gs://bucket/dir/", nullptr)); } TEST(GcsFileSystemTest, GetChildren_NoItems) { @@ -1696,7 +1720,7 @@ TEST(GcsFileSystemTest, GetChildren_NoItems) { nullptr /* gcs additional header */, false /* compose append */); std::vector children; - TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children)); + TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", nullptr, &children)); EXPECT_EQ(std::vector({"subpath/"}), children); } @@ -1724,7 +1748,7 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles) { nullptr /* gcs additional header */, false /* compose append */); std::vector children; - TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children)); + TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", nullptr, &children)); EXPECT_EQ(std::vector({"file1.txt", "file3.txt", "subpath/"}), children); @@ -1753,7 +1777,7 @@ TEST(GcsFileSystemTest, GetChildren_SelfDirectoryMarker) { nullptr /* gcs additional header */, false /* compose append */); std::vector children; - TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children)); + TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", nullptr, &children)); EXPECT_EQ(std::vector({"file3.txt", "subpath/"}), children); } @@ -1781,7 +1805,7 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles_NoSlash) { nullptr /* gcs additional header */, false /* compose append */); std::vector children; - TF_EXPECT_OK(fs.GetChildren("gs://bucket/path", &children)); + TF_EXPECT_OK(fs.GetChildren("gs://bucket/path", nullptr, &children)); EXPECT_EQ(std::vector({"file1.txt", "file3.txt", "subpath/"}), children); @@ -1806,7 +1830,7 @@ TEST(GcsFileSystemTest, GetChildren_Root) { nullptr /* gcs additional header */, false /* compose append */); std::vector children; - TF_EXPECT_OK(fs.GetChildren("gs://bucket-a-b-c", &children)); + TF_EXPECT_OK(fs.GetChildren("gs://bucket-a-b-c", nullptr, &children)); EXPECT_EQ(0, children.size()); } @@ -1831,7 +1855,7 @@ TEST(GcsFileSystemTest, GetChildren_Empty) { nullptr /* gcs additional header */, false /* compose append */); std::vector children; - TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", &children)); + TF_EXPECT_OK(fs.GetChildren("gs://bucket/path/", nullptr, &children)); EXPECT_EQ(0, children.size()); } @@ -1872,7 +1896,7 @@ TEST(GcsFileSystemTest, GetChildren_Pagination) { nullptr /* gcs additional header */, false /* compose append */); std::vector children; - TF_EXPECT_OK(fs.GetChildren("gs://bucket/path", &children)); + TF_EXPECT_OK(fs.GetChildren("gs://bucket/path", nullptr, &children)); EXPECT_EQ(std::vector({"file1.txt", "file3.txt", "subpath/", "file4.txt", "file5.txt"}), @@ -1899,8 +1923,8 @@ TEST(GcsFileSystemTest, GetMatchingPaths_NoWildcard) { nullptr /* gcs additional header */, false /* compose append */); std::vector result; - TF_EXPECT_OK( - fs.GetMatchingPaths("gs://bucket/path/subpath/file2.txt", &result)); + TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/subpath/file2.txt", + nullptr, &result)); EXPECT_EQ(std::vector({"gs://bucket/path/subpath/file2.txt"}), result); } @@ -1927,7 +1951,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_BucketAndWildcard) { nullptr /* gcs additional header */, false /* compose append */); std::vector result; - TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/*/*", &result)); + TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/*/*", nullptr, &result)); EXPECT_EQ(std::vector({"gs://bucket/path/file1.txt", "gs://bucket/path/file3.txt", "gs://bucket/path/subpath"}), @@ -1956,7 +1980,8 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_Matches) { nullptr /* gcs additional header */, false /* compose append */); std::vector result; - TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*/file2.txt", &result)); + TF_EXPECT_OK( + fs.GetMatchingPaths("gs://bucket/path/*/file2.txt", nullptr, &result)); EXPECT_EQ(std::vector({"gs://bucket/path/subpath/file2.txt"}), result); } @@ -1982,7 +2007,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_SelfDirectoryMarker) { nullptr /* gcs additional header */, false /* compose append */); std::vector result; - TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*", &result)); + TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*", nullptr, &result)); EXPECT_EQ(std::vector({"gs://bucket/path/file3.txt"}), result); } @@ -2007,7 +2032,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_SlashInObjectName) { nullptr /* gcs additional header */, false /* compose append */); std::vector result; - TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*", &result)); + TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*", nullptr, &result)); EXPECT_EQ(std::vector(), result); } @@ -2032,7 +2057,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_SlashInObjectNameEscaped) { nullptr /* gcs additional header */, false /* compose append */); std::vector result; - TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/\\/*", &result)); + TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/\\/*", nullptr, &result)); EXPECT_EQ(std::vector({"gs://bucket/path//foo.txt"}), result); } @@ -2058,7 +2083,8 @@ TEST(GcsFileSystemTest, GetMatchingPaths_FolderAndWildcard_NoMatches) { nullptr /* gcs additional header */, false /* compose append */); std::vector result; - TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/*/file3.txt", &result)); + TF_EXPECT_OK( + fs.GetMatchingPaths("gs://bucket/path/*/file3.txt", nullptr, &result)); EXPECT_EQ(std::vector(), result); } @@ -2077,7 +2103,7 @@ TEST(GcsFileSystemTest, GetMatchingPaths_OnlyWildcard) { std::vector result; EXPECT_EQ(errors::Code::INVALID_ARGUMENT, - fs.GetMatchingPaths("gs://*", &result).code()); + fs.GetMatchingPaths("gs://*", nullptr, &result).code()); } TEST(GcsFileSystemTest, GetMatchingPaths_Cache) { @@ -2113,11 +2139,11 @@ TEST(GcsFileSystemTest, GetMatchingPaths_Cache) { // any additional HTTP requests to GCS. for (int i = 0; i < 10; i++) { std::vector result; - TF_EXPECT_OK( - fs.GetMatchingPaths("gs://bucket/path/subpath/file2.txt", &result)); + TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/subpath/file2.txt", + nullptr, &result)); EXPECT_EQ(std::vector({"gs://bucket/path/subpath/file2.txt"}), result); - TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/*/*", &result)); + TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/*/*", nullptr, &result)); EXPECT_EQ(std::vector({"gs://bucket/path/file1.txt", "gs://bucket/path/file3.txt", "gs://bucket/path/subpath"}), @@ -2155,17 +2181,17 @@ TEST(GcsFileSystemTest, GetMatchingPaths_Cache_Flush) { // This loop should trigger the first HTTP request to GCS. for (int i = 0; i < 10; i++) { std::vector result; - TF_EXPECT_OK( - fs.GetMatchingPaths("gs://bucket/path/subpath/file2.txt", &result)); + TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/subpath/file2.txt", + nullptr, &result)); EXPECT_EQ(std::vector({"gs://bucket/path/subpath/file2.txt"}), result); } // After flushing caches, there should be another (identical) request to GCS. - fs.FlushCaches(); + fs.FlushCaches(nullptr); for (int i = 0; i < 10; i++) { std::vector result; - TF_EXPECT_OK( - fs.GetMatchingPaths("gs://bucket/path/subpath/file2.txt", &result)); + TF_EXPECT_OK(fs.GetMatchingPaths("gs://bucket/path/subpath/file2.txt", + nullptr, &result)); EXPECT_EQ(std::vector({"gs://bucket/path/subpath/file2.txt"}), result); } @@ -2220,11 +2246,12 @@ TEST(GcsFileSystemTest, DeleteFile) { char scratch[100]; StringPiece result; std::unique_ptr file; - TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/path/file1.txt", &file)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("gs://bucket/path/file1.txt", nullptr, &file)); TF_EXPECT_OK(file->Read(0, 8, &result, scratch)); EXPECT_EQ("01234567", result); // Deleting the file triggers the next HTTP request to GCS. - TF_EXPECT_OK(fs.DeleteFile("gs://bucket/path/file1.txt")); + TF_EXPECT_OK(fs.DeleteFile("gs://bucket/path/file1.txt", nullptr)); // Re-reading the file causes its contents to be reloaded from GCS and not // from the block cache. TF_EXPECT_OK(file->Read(0, 8, &result, scratch)); @@ -2245,7 +2272,7 @@ TEST(GcsFileSystemTest, DeleteFile_NoObjectName) { nullptr /* gcs additional header */, false /* compose append */); EXPECT_EQ(errors::Code::INVALID_ARGUMENT, - fs.DeleteFile("gs://bucket/").code()); + fs.DeleteFile("gs://bucket/", nullptr).code()); } TEST(GcsFileSystemTest, DeleteFile_StatCacheRemoved) { @@ -2289,14 +2316,15 @@ TEST(GcsFileSystemTest, DeleteFile_StatCacheRemoved) { // Stats the file first so the stat is cached. FileStatistics stat_before_deletion; - TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", &stat_before_deletion)); + TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", nullptr, &stat_before_deletion)); EXPECT_EQ(1010, stat_before_deletion.length); - TF_EXPECT_OK(fs.DeleteFile("gs://bucket/file.txt")); + TF_EXPECT_OK(fs.DeleteFile("gs://bucket/file.txt", nullptr)); FileStatistics stat_after_deletion; - EXPECT_EQ(error::Code::NOT_FOUND, - fs.Stat("gs://bucket/file.txt", &stat_after_deletion).code()); + EXPECT_EQ( + error::Code::NOT_FOUND, + fs.Stat("gs://bucket/file.txt", nullptr, &stat_after_deletion).code()); } TEST(GcsFileSystemTest, DeleteDir_Empty) { @@ -2317,7 +2345,7 @@ TEST(GcsFileSystemTest, DeleteDir_Empty) { kTestTimeoutConfig, *kAllowedLocationsDefault, nullptr /* gcs additional header */, false /* compose append */); - TF_EXPECT_OK(fs.DeleteDir("gs://bucket/path/")); + TF_EXPECT_OK(fs.DeleteDir("gs://bucket/path/", nullptr)); } TEST(GcsFileSystemTest, DeleteDir_OnlyDirMarkerLeft) { @@ -2346,7 +2374,7 @@ TEST(GcsFileSystemTest, DeleteDir_OnlyDirMarkerLeft) { kTestTimeoutConfig, *kAllowedLocationsDefault, nullptr /* gcs additional header */, false /* compose append */); - TF_EXPECT_OK(fs.DeleteDir("gs://bucket/path/")); + TF_EXPECT_OK(fs.DeleteDir("gs://bucket/path/", nullptr)); } TEST(GcsFileSystemTest, DeleteDir_BucketOnly) { @@ -2366,7 +2394,7 @@ TEST(GcsFileSystemTest, DeleteDir_BucketOnly) { kTestTimeoutConfig, *kAllowedLocationsDefault, nullptr /* gcs additional header */, false /* compose append */); - TF_EXPECT_OK(fs.DeleteDir("gs://bucket")); + TF_EXPECT_OK(fs.DeleteDir("gs://bucket", nullptr)); } TEST(GcsFileSystemTest, DeleteDir_NonEmpty) { @@ -2389,7 +2417,7 @@ TEST(GcsFileSystemTest, DeleteDir_NonEmpty) { nullptr /* gcs additional header */, false /* compose append */); EXPECT_EQ(error::Code::FAILED_PRECONDITION, - fs.DeleteDir("gs://bucket/path/").code()); + fs.DeleteDir("gs://bucket/path/", nullptr).code()); } TEST(GcsFileSystemTest, GetFileSize) { @@ -2412,7 +2440,7 @@ TEST(GcsFileSystemTest, GetFileSize) { nullptr /* gcs additional header */, false /* compose append */); uint64 size; - TF_EXPECT_OK(fs.GetFileSize("gs://bucket/file.txt", &size)); + TF_EXPECT_OK(fs.GetFileSize("gs://bucket/file.txt", nullptr, &size)); EXPECT_EQ(1010, size); } @@ -2431,7 +2459,7 @@ TEST(GcsFileSystemTest, GetFileSize_NoObjectName) { uint64 size; EXPECT_EQ(errors::Code::INVALID_ARGUMENT, - fs.GetFileSize("gs://bucket/", &size).code()); + fs.GetFileSize("gs://bucket/", nullptr, &size).code()); } TEST(GcsFileSystemTest, RenameFile_Folder) { @@ -2515,7 +2543,8 @@ TEST(GcsFileSystemTest, RenameFile_Folder) { kTestTimeoutConfig, *kAllowedLocationsDefault, nullptr /* gcs additional header */, false /* compose append */); - TF_EXPECT_OK(fs.RenameFile("gs://bucket/path1", "gs://bucket/path2/")); + TF_EXPECT_OK( + fs.RenameFile("gs://bucket/path1", "gs://bucket/path2/", nullptr)); } TEST(GcsFileSystemTest, RenameFile_Object) { @@ -2612,15 +2641,17 @@ TEST(GcsFileSystemTest, RenameFile_Object) { StringPiece result; std::unique_ptr src; std::unique_ptr dst; - TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/path/src.txt", &src)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("gs://bucket/path/src.txt", nullptr, &src)); TF_EXPECT_OK(src->Read(0, 8, &result, scratch)); EXPECT_EQ("01234567", result); - TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/path/dst.txt", &dst)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("gs://bucket/path/dst.txt", nullptr, &dst)); TF_EXPECT_OK(dst->Read(0, 8, &result, scratch)); EXPECT_EQ("76543210", result); // Now rename src to dst. This should flush the block cache for both files. - TF_EXPECT_OK( - fs.RenameFile("gs://bucket/path/src.txt", "gs://bucket/path/dst.txt")); + TF_EXPECT_OK(fs.RenameFile("gs://bucket/path/src.txt", + "gs://bucket/path/dst.txt", nullptr)); // Re-read both files. This should reload their contents from GCS. TF_EXPECT_OK(src->Read(0, 8, &result, scratch)); EXPECT_EQ("89abcdef", result); @@ -2690,14 +2721,16 @@ TEST(GcsFileSystemTest, RenameFile_Object_FlushTargetStatCache) { // Do an initial stat of the destination file to load their contents into the // stat cache. FileStatistics stat_before_renaming; - TF_EXPECT_OK(fs.Stat("gs://bucket/path/dst.txt", &stat_before_renaming)); + TF_EXPECT_OK( + fs.Stat("gs://bucket/path/dst.txt", nullptr, &stat_before_renaming)); EXPECT_EQ(1000, stat_before_renaming.length); - TF_EXPECT_OK( - fs.RenameFile("gs://bucket/path/src.txt", "gs://bucket/path/dst.txt")); + TF_EXPECT_OK(fs.RenameFile("gs://bucket/path/src.txt", + "gs://bucket/path/dst.txt", nullptr)); FileStatistics stat_after_renaming; - TF_EXPECT_OK(fs.Stat("gs://bucket/path/dst.txt", &stat_after_renaming)); + TF_EXPECT_OK( + fs.Stat("gs://bucket/path/dst.txt", nullptr, &stat_after_renaming)); EXPECT_EQ(1010, stat_after_renaming.length); } @@ -2755,8 +2788,8 @@ TEST(GcsFileSystemTest, RenameFile_Object_DeletionRetried) { kTestTimeoutConfig, *kAllowedLocationsDefault, nullptr /* gcs additional header */, false /* compose append */); - TF_EXPECT_OK( - fs.RenameFile("gs://bucket/path/src.txt", "gs://bucket/path/dst.txt")); + TF_EXPECT_OK(fs.RenameFile("gs://bucket/path/src.txt", + "gs://bucket/path/dst.txt", nullptr)); } /// Tests the case when rewrite couldn't complete in one RPC. @@ -2797,10 +2830,10 @@ TEST(GcsFileSystemTest, RenameFile_Object_Incomplete) { kTestTimeoutConfig, *kAllowedLocationsDefault, nullptr /* gcs additional header */, false /* compose append */); - EXPECT_EQ( - errors::Code::UNIMPLEMENTED, - fs.RenameFile("gs://bucket/path/src.txt", "gs://bucket/path/dst.txt") - .code()); + EXPECT_EQ(errors::Code::UNIMPLEMENTED, + fs.RenameFile("gs://bucket/path/src.txt", + "gs://bucket/path/dst.txt", nullptr) + .code()); } TEST(GcsFileSystemTest, Stat_Object) { @@ -2823,7 +2856,7 @@ TEST(GcsFileSystemTest, Stat_Object) { nullptr /* gcs additional header */, false /* compose append */); FileStatistics stat; - TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", &stat)); + TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", nullptr, &stat)); EXPECT_EQ(1010, stat.length); EXPECT_NEAR(1461971724896, stat.mtime_nsec / 1000 / 1000, 1); EXPECT_FALSE(stat.is_directory); @@ -2857,7 +2890,7 @@ TEST(GcsFileSystemTest, Stat_Folder) { nullptr /* gcs additional header */, false /* compose append */); FileStatistics stat; - TF_EXPECT_OK(fs.Stat("gs://bucket/subfolder", &stat)); + TF_EXPECT_OK(fs.Stat("gs://bucket/subfolder", nullptr, &stat)); EXPECT_EQ(0, stat.length); EXPECT_EQ(0, stat.mtime_nsec); EXPECT_TRUE(stat.is_directory); @@ -2890,7 +2923,8 @@ TEST(GcsFileSystemTest, Stat_ObjectOrFolderNotFound) { nullptr /* gcs additional header */, false /* compose append */); FileStatistics stat; - EXPECT_EQ(error::Code::NOT_FOUND, fs.Stat("gs://bucket/path", &stat).code()); + EXPECT_EQ(error::Code::NOT_FOUND, + fs.Stat("gs://bucket/path", nullptr, &stat).code()); } TEST(GcsFileSystemTest, Stat_Bucket) { @@ -2911,7 +2945,7 @@ TEST(GcsFileSystemTest, Stat_Bucket) { nullptr /* gcs additional header */, false /* compose append */); FileStatistics stat; - TF_EXPECT_OK(fs.Stat("gs://bucket/", &stat)); + TF_EXPECT_OK(fs.Stat("gs://bucket/", nullptr, &stat)); EXPECT_EQ(0, stat.length); EXPECT_EQ(0, stat.mtime_nsec); EXPECT_TRUE(stat.is_directory); @@ -2935,7 +2969,8 @@ TEST(GcsFileSystemTest, Stat_BucketNotFound) { nullptr /* gcs additional header */, false /* compose append */); FileStatistics stat; - EXPECT_EQ(error::Code::NOT_FOUND, fs.Stat("gs://bucket/", &stat).code()); + EXPECT_EQ(error::Code::NOT_FOUND, + fs.Stat("gs://bucket/", nullptr, &stat).code()); } TEST(GcsFileSystemTest, Stat_Cache) { @@ -2976,11 +3011,11 @@ TEST(GcsFileSystemTest, Stat_Cache) { // HTTP requests to GCS. for (int i = 0; i < 10; i++) { FileStatistics stat; - TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", &stat)); + TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", nullptr, &stat)); EXPECT_EQ(1010, stat.length); EXPECT_NEAR(1461971724896, stat.mtime_nsec / 1000 / 1000, 1); EXPECT_FALSE(stat.is_directory); - TF_EXPECT_OK(fs.Stat("gs://bucket/subfolder/", &stat)); + TF_EXPECT_OK(fs.Stat("gs://bucket/subfolder/", nullptr, &stat)); EXPECT_EQ(0, stat.length); EXPECT_EQ(0, stat.mtime_nsec); EXPECT_TRUE(stat.is_directory); @@ -3016,16 +3051,16 @@ TEST(GcsFileSystemTest, Stat_Cache_Flush) { // There should be a single HTTP request to GCS for fs.Stat in this loop. for (int i = 0; i < 10; i++) { FileStatistics stat; - TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", &stat)); + TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", nullptr, &stat)); EXPECT_EQ(1010, stat.length); EXPECT_NEAR(1461971724896, stat.mtime_nsec / 1000 / 1000, 1); EXPECT_FALSE(stat.is_directory); } // After flushing caches, there should be a second request to GCS for fs.Stat. - fs.FlushCaches(); + fs.FlushCaches(nullptr); for (int i = 0; i < 10; i++) { FileStatistics stat; - TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", &stat)); + TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", nullptr, &stat)); EXPECT_EQ(1010, stat.length); EXPECT_NEAR(1461971724896, stat.mtime_nsec / 1000 / 1000, 1); EXPECT_FALSE(stat.is_directory); @@ -3052,7 +3087,7 @@ TEST(GcsFileSystemTest, Stat_FilenameEndingWithSlash) { nullptr /* gcs additional header */, false /* compose append */); FileStatistics stat; - TF_EXPECT_OK(fs.Stat("gs://bucket/dir/", &stat)); + TF_EXPECT_OK(fs.Stat("gs://bucket/dir/", nullptr, &stat)); EXPECT_EQ(5, stat.length); EXPECT_TRUE(stat.is_directory); } @@ -3084,7 +3119,7 @@ TEST(GcsFileSystemTest, IsDirectory_NotFound) { nullptr /* gcs additional header */, false /* compose append */); EXPECT_EQ(error::Code::NOT_FOUND, - fs.IsDirectory("gs://bucket/file.txt").code()); + fs.IsDirectory("gs://bucket/file.txt", nullptr).code()); } TEST(GcsFileSystemTest, IsDirectory_NotDirectoryButObject) { @@ -3115,7 +3150,7 @@ TEST(GcsFileSystemTest, IsDirectory_NotDirectoryButObject) { nullptr /* gcs additional header */, false /* compose append */); EXPECT_EQ(error::Code::FAILED_PRECONDITION, - fs.IsDirectory("gs://bucket/file.txt").code()); + fs.IsDirectory("gs://bucket/file.txt", nullptr).code()); } TEST(GcsFileSystemTest, IsDirectory_Yes) { @@ -3145,8 +3180,8 @@ TEST(GcsFileSystemTest, IsDirectory_Yes) { kTestTimeoutConfig, *kAllowedLocationsDefault, nullptr /* gcs additional header */, false /* compose append */); - TF_EXPECT_OK(fs.IsDirectory("gs://bucket/subfolder")); - TF_EXPECT_OK(fs.IsDirectory("gs://bucket/subfolder/")); + TF_EXPECT_OK(fs.IsDirectory("gs://bucket/subfolder", nullptr)); + TF_EXPECT_OK(fs.IsDirectory("gs://bucket/subfolder/", nullptr)); } TEST(GcsFileSystemTest, IsDirectory_Bucket) { @@ -3172,8 +3207,8 @@ TEST(GcsFileSystemTest, IsDirectory_Bucket) { kTestTimeoutConfig, *kAllowedLocationsDefault, nullptr /* gcs additional header */, false /* compose append */); - TF_EXPECT_OK(fs.IsDirectory("gs://bucket")); - TF_EXPECT_OK(fs.IsDirectory("gs://bucket/")); + TF_EXPECT_OK(fs.IsDirectory("gs://bucket", nullptr)); + TF_EXPECT_OK(fs.IsDirectory("gs://bucket/", nullptr)); } TEST(GcsFileSystemTest, IsDirectory_BucketNotFound) { @@ -3193,7 +3228,8 @@ TEST(GcsFileSystemTest, IsDirectory_BucketNotFound) { kTestTimeoutConfig, *kAllowedLocationsDefault, nullptr /* gcs additional header */, false /* compose append */); - EXPECT_EQ(error::Code::NOT_FOUND, fs.IsDirectory("gs://bucket/").code()); + EXPECT_EQ(error::Code::NOT_FOUND, + fs.IsDirectory("gs://bucket/", nullptr).code()); } TEST(GcsFileSystemTest, CreateDir_Folder) { @@ -3250,15 +3286,15 @@ TEST(GcsFileSystemTest, CreateDir_Folder) { kTestTimeoutConfig, *kAllowedLocationsDefault, nullptr /* gcs additional header */, false /* compose append */); - TF_EXPECT_OK(fs.CreateDir("gs://bucket/subpath")); + TF_EXPECT_OK(fs.CreateDir("gs://bucket/subpath", nullptr)); // Check that when GCS returns the object already exists return that the // directory already exists. EXPECT_EQ(errors::AlreadyExists("gs://bucket/subpath"), - fs.CreateDir("gs://bucket/subpath")); + fs.CreateDir("gs://bucket/subpath", nullptr)); // Check that when GCS returns the object already has a version (failed // precondition) return directory already exists. EXPECT_EQ(errors::AlreadyExists("gs://bucket/subpath"), - fs.CreateDir("gs://bucket/subpath")); + fs.CreateDir("gs://bucket/subpath", nullptr)); } TEST(GcsFileSystemTest, CreateDir_Bucket) { @@ -3284,8 +3320,8 @@ TEST(GcsFileSystemTest, CreateDir_Bucket) { kTestTimeoutConfig, *kAllowedLocationsDefault, nullptr /* gcs additional header */, false /* compose append */); - TF_EXPECT_OK(fs.CreateDir("gs://bucket/")); - TF_EXPECT_OK(fs.CreateDir("gs://bucket")); + TF_EXPECT_OK(fs.CreateDir("gs://bucket/", nullptr)); + TF_EXPECT_OK(fs.CreateDir("gs://bucket", nullptr)); } TEST(GcsFileSystemTest, DeleteRecursively_Ok) { @@ -3357,8 +3393,8 @@ TEST(GcsFileSystemTest, DeleteRecursively_Ok) { nullptr /* gcs additional header */, false /* compose append */); int64 undeleted_files, undeleted_dirs; - TF_EXPECT_OK(fs.DeleteRecursively("gs://bucket/path", &undeleted_files, - &undeleted_dirs)); + TF_EXPECT_OK(fs.DeleteRecursively("gs://bucket/path", nullptr, + &undeleted_files, &undeleted_dirs)); EXPECT_EQ(0, undeleted_files); EXPECT_EQ(0, undeleted_dirs); } @@ -3450,8 +3486,8 @@ TEST(GcsFileSystemTest, DeleteRecursively_DeletionErrors) { nullptr /* gcs additional header */, false /* compose append */); int64 undeleted_files, undeleted_dirs; - TF_EXPECT_OK(fs.DeleteRecursively("gs://bucket/path", &undeleted_files, - &undeleted_dirs)); + TF_EXPECT_OK(fs.DeleteRecursively("gs://bucket/path", nullptr, + &undeleted_files, &undeleted_dirs)); EXPECT_EQ(1, undeleted_files); EXPECT_EQ(1, undeleted_dirs); } @@ -3486,7 +3522,7 @@ TEST(GcsFileSystemTest, DeleteRecursively_NotAFolder) { int64 undeleted_files, undeleted_dirs; EXPECT_EQ(error::Code::NOT_FOUND, - fs.DeleteRecursively("gs://bucket/path", &undeleted_files, + fs.DeleteRecursively("gs://bucket/path", nullptr, &undeleted_files, &undeleted_dirs) .code()); EXPECT_EQ(0, undeleted_files); @@ -3501,7 +3537,7 @@ TEST(GcsFileSystemTest, NoConstraintsEnvironmentVariableTest) { // Cover cache initialization code, any uninitialized cache will cause this to // fail - fs1.FlushCaches(); + fs1.FlushCaches(nullptr); } TEST(GcsFileSystemTest, BucketLocationConstraintEnvironmentVariableTest) { @@ -3715,7 +3751,7 @@ TEST(GcsFileSystemTest, Stat_StatsRecording) { EXPECT_EQ(stats.fs_, &fs); FileStatistics stat; - TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", &stat)); + TF_EXPECT_OK(fs.Stat("gs://bucket/file.txt", nullptr, &stat)); EXPECT_EQ(1, stats.stat_object_request_count_); } @@ -3742,7 +3778,8 @@ TEST(GcsFileSystemTest, NewRandomAccessFile_StatsRecording) { EXPECT_EQ(stats.fs_, &fs); std::unique_ptr file; - TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("gs://bucket/random_access.txt", nullptr, &file)); char scratch[6]; StringPiece result; @@ -3883,8 +3920,8 @@ TEST(GcsFileSystemTest, NewAppendableFile_MultipleFlushesWithCompose) { // Create an appendable file. This should read the file from GCS, and pull its // contents into the block cache. std::unique_ptr wfile; - TF_EXPECT_OK( - fs.NewAppendableFile("gs://bucket/some/path/appendable", &wfile)); + TF_EXPECT_OK(fs.NewAppendableFile("gs://bucket/some/path/appendable", nullptr, + &wfile)); TF_EXPECT_OK(wfile->Append(contents[1])); TF_EXPECT_OK(wfile->Flush()); TF_EXPECT_OK(wfile->Append(contents[2])); @@ -3981,7 +4018,8 @@ TEST(GcsFileSystemTest, NewAppendableFile_MultipleFlushesWithoutCompose) { // Create an appendable file. This should read the file from GCS, and pull its // contents into the block cache. std::unique_ptr wfile; - TF_EXPECT_OK(fs.NewAppendableFile("gs://bucket/path/appendable", &wfile)); + TF_EXPECT_OK( + fs.NewAppendableFile("gs://bucket/path/appendable", nullptr, &wfile)); TF_EXPECT_OK(wfile->Append(contents[1])); TF_EXPECT_OK(wfile->Flush()); TF_EXPECT_OK(wfile->Append(contents[2])); diff --git a/tensorflow/core/platform/file_system_test.cc b/tensorflow/core/platform/file_system_test.cc index dd02da32073..1e23a2b853c 100644 --- a/tensorflow/core/platform/file_system_test.cc +++ b/tensorflow/core/platform/file_system_test.cc @@ -107,7 +107,7 @@ class InterPlanetaryFileSystem : public NullFileSystem { Status GetChildren(const string& dir, TransactionToken* token, std::vector* result) override { - TF_RETURN_IF_ERROR(IsDirectory(dir)); + TF_RETURN_IF_ERROR(IsDirectory(dir, nullptr)); string parsed_path; ParsePath(dir, &parsed_path); result->insert(result->begin(), celestial_bodies_[parsed_path].begin(), @@ -153,7 +153,7 @@ class InterPlanetaryFileSystem : public NullFileSystem { string Match(InterPlanetaryFileSystem* ipfs, const string& suffix_pattern) { std::vector results; Status s = - ipfs->GetMatchingPaths(ipfs->JoinPath(kPrefix, suffix_pattern), &results); + ipfs->GetMatchingPaths(ipfs->JoinPath(kPrefix, suffix_pattern), nullptr, &results); if (!s.ok()) { return s.ToString(); } else { @@ -180,18 +180,18 @@ TEST(InterPlanetaryFileSystemTest, IPFSMatch) { // Returns Jupiter's and Earth's moons. EXPECT_EQ(Match(&ipfs, "*/*"), "Earth/Moon,Jupiter/Europa,Jupiter/Ganymede,Jupiter/Io"); - TF_EXPECT_OK(ipfs.CreateDir(ipfs.JoinPath(kPrefix, "Planet0"))); - TF_EXPECT_OK(ipfs.CreateDir(ipfs.JoinPath(kPrefix, "Planet1"))); + TF_EXPECT_OK(ipfs.CreateDir(ipfs.JoinPath(kPrefix, "Planet0"), nullptr)); + TF_EXPECT_OK(ipfs.CreateDir(ipfs.JoinPath(kPrefix, "Planet1"), nullptr)); EXPECT_EQ(Match(&ipfs, "Planet[0-1]"), "Planet0,Planet1"); EXPECT_EQ(Match(&ipfs, "Planet?"), "Planet0,Planet1"); } TEST(InterPlanetaryFileSystemTest, MatchSimple) { InterPlanetaryFileSystem ipfs; - TF_EXPECT_OK(ipfs.CreateDir(ipfs.JoinPath(kPrefix, "match-00"))); - TF_EXPECT_OK(ipfs.CreateDir(ipfs.JoinPath(kPrefix, "match-0a"))); - TF_EXPECT_OK(ipfs.CreateDir(ipfs.JoinPath(kPrefix, "match-01"))); - TF_EXPECT_OK(ipfs.CreateDir(ipfs.JoinPath(kPrefix, "match-aaa"))); + TF_EXPECT_OK(ipfs.CreateDir(ipfs.JoinPath(kPrefix, "match-00"), nullptr)); + TF_EXPECT_OK(ipfs.CreateDir(ipfs.JoinPath(kPrefix, "match-0a"), nullptr)); + TF_EXPECT_OK(ipfs.CreateDir(ipfs.JoinPath(kPrefix, "match-01"), nullptr)); + TF_EXPECT_OK(ipfs.CreateDir(ipfs.JoinPath(kPrefix, "match-aaa"), nullptr)); EXPECT_EQ(Match(&ipfs, "match-*"), "match-00,match-01,match-0a,match-aaa"); EXPECT_EQ(Match(&ipfs, "match-0[0-9]"), "match-00,match-01"); @@ -204,8 +204,8 @@ TEST(InterPlanetaryFileSystemTest, MatchSimple) { // that evil_directory isn't accessed. TEST(InterPlanetaryFileSystemTest, MatchOnlyNeeded) { InterPlanetaryFileSystem ipfs; - TF_EXPECT_OK(ipfs.CreateDir(ipfs.JoinPath(kPrefix, "abcd"))); - TF_EXPECT_OK(ipfs.CreateDir(ipfs.JoinPath(kPrefix, "evil_directory"))); + TF_EXPECT_OK(ipfs.CreateDir(ipfs.JoinPath(kPrefix, "abcd"), nullptr)); + TF_EXPECT_OK(ipfs.CreateDir(ipfs.JoinPath(kPrefix, "evil_directory"), nullptr)); EXPECT_EQ(Match(&ipfs, "abcd"), "abcd"); } @@ -213,13 +213,13 @@ TEST(InterPlanetaryFileSystemTest, MatchOnlyNeeded) { TEST(InterPlanetaryFileSystemTest, MatchDirectory) { InterPlanetaryFileSystem ipfs; TF_EXPECT_OK( - ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-00/abc/x"))); + ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-00/abc/x"), nullptr)); TF_EXPECT_OK( - ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-0a/abc/x"))); + ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-0a/abc/x"), nullptr)); TF_EXPECT_OK( - ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-01/abc/x"))); + ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-01/abc/x"), nullptr)); TF_EXPECT_OK( - ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-aaa/abc/x"))); + ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-aaa/abc/x"), nullptr)); EXPECT_EQ(Match(&ipfs, "match-*/abc/x"), "match-00/abc/x,match-01/abc/x,match-0a/abc/x,match-aaa/abc/x"); @@ -234,19 +234,19 @@ TEST(InterPlanetaryFileSystemTest, MatchDirectory) { TEST(InterPlanetaryFileSystemTest, MatchMultipleWildcards) { InterPlanetaryFileSystem ipfs; TF_EXPECT_OK( - ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-00/abc/00"))); + ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-00/abc/00"), nullptr)); TF_EXPECT_OK( - ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-00/abc/01"))); + ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-00/abc/01"), nullptr)); TF_EXPECT_OK( - ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-00/abc/09"))); + ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-00/abc/09"), nullptr)); TF_EXPECT_OK( - ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-01/abc/00"))); + ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-01/abc/00"), nullptr)); TF_EXPECT_OK( - ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-01/abc/04"))); + ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-01/abc/04"), nullptr)); TF_EXPECT_OK( - ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-01/abc/10"))); + ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-01/abc/10"), nullptr)); TF_EXPECT_OK( - ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-02/abc/00"))); + ipfs.RecursivelyCreateDir(ipfs.JoinPath(kPrefix, "match-02/abc/00"), nullptr)); EXPECT_EQ(Match(&ipfs, "match-0[0-1]/abc/0[0-8]"), "match-00/abc/00,match-00/abc/01,match-01/abc/00,match-01/abc/04"); @@ -295,10 +295,10 @@ class TestFileSystem : public NullFileSystem { TEST(TestFileSystemTest, RootDirectory) { TestFileSystem fs; std::vector results; - auto ret = fs.GetMatchingPaths("./te*", &results); + auto ret = fs.GetMatchingPaths("./te*", nullptr, &results); EXPECT_EQ(1, results.size()); EXPECT_EQ("./test", results[0]); - ret = fs.GetMatchingPaths("te*", &results); + ret = fs.GetMatchingPaths("te*", nullptr, &results); EXPECT_EQ(1, results.size()); EXPECT_EQ("./test", results[0]); } diff --git a/tensorflow/core/platform/retrying_file_system.h b/tensorflow/core/platform/retrying_file_system.h index 2f50b6cd5df..ddbf255af2e 100644 --- a/tensorflow/core/platform/retrying_file_system.h +++ b/tensorflow/core/platform/retrying_file_system.h @@ -54,15 +54,17 @@ class RetryingFileSystem : public FileSystem { Status FileExists(const string& fname, TransactionToken* token) override { return RetryingUtils::CallWithRetries( - [this, &fname]() { return base_file_system_->FileExists(fname); }, + [this, &fname, token]() { + return base_file_system_->FileExists(fname, token); + }, retry_config_); } Status GetChildren(const string& dir, TransactionToken* token, std::vector* result) override { return RetryingUtils::CallWithRetries( - [this, &dir, result]() { - return base_file_system_->GetChildren(dir, result); + [this, &dir, result, token]() { + return base_file_system_->GetChildren(dir, token, result); }, retry_config_); } @@ -70,8 +72,8 @@ class RetryingFileSystem : public FileSystem { Status GetMatchingPaths(const string& pattern, TransactionToken* token, std::vector* result) override { return RetryingUtils::CallWithRetries( - [this, &pattern, result]() { - return base_file_system_->GetMatchingPaths(pattern, result); + [this, &pattern, result, token]() { + return base_file_system_->GetMatchingPaths(pattern, token, result); }, retry_config_); } @@ -79,33 +81,41 @@ class RetryingFileSystem : public FileSystem { Status Stat(const string& fname, TransactionToken* token, FileStatistics* stat) override { return RetryingUtils::CallWithRetries( - [this, &fname, stat]() { return base_file_system_->Stat(fname, stat); }, + [this, &fname, stat, token]() { + return base_file_system_->Stat(fname, token, stat); + }, retry_config_); } Status DeleteFile(const string& fname, TransactionToken* token) override { return RetryingUtils::DeleteWithRetries( - [this, &fname]() { return base_file_system_->DeleteFile(fname); }, + [this, &fname, token]() { + return base_file_system_->DeleteFile(fname, token); + }, retry_config_); } Status CreateDir(const string& dirname, TransactionToken* token) override { return RetryingUtils::CallWithRetries( - [this, &dirname]() { return base_file_system_->CreateDir(dirname); }, + [this, &dirname, token]() { + return base_file_system_->CreateDir(dirname, token); + }, retry_config_); } Status DeleteDir(const string& dirname, TransactionToken* token) override { return RetryingUtils::DeleteWithRetries( - [this, &dirname]() { return base_file_system_->DeleteDir(dirname); }, + [this, &dirname, token]() { + return base_file_system_->DeleteDir(dirname, token); + }, retry_config_); } Status GetFileSize(const string& fname, TransactionToken* token, uint64* file_size) override { return RetryingUtils::CallWithRetries( - [this, &fname, file_size]() { - return base_file_system_->GetFileSize(fname, file_size); + [this, &fname, file_size, token]() { + return base_file_system_->GetFileSize(fname, token, file_size); }, retry_config_); } @@ -113,15 +123,17 @@ class RetryingFileSystem : public FileSystem { Status RenameFile(const string& src, const string& target, TransactionToken* token) override { return RetryingUtils::CallWithRetries( - [this, &src, &target]() { - return base_file_system_->RenameFile(src, target); + [this, &src, &target, token]() { + return base_file_system_->RenameFile(src, target, token); }, retry_config_); } Status IsDirectory(const string& dirname, TransactionToken* token) override { return RetryingUtils::CallWithRetries( - [this, &dirname]() { return base_file_system_->IsDirectory(dirname); }, + [this, &dirname, token]() { + return base_file_system_->IsDirectory(dirname, token); + }, retry_config_); } @@ -134,15 +146,15 @@ class RetryingFileSystem : public FileSystem { int64* undeleted_files, int64* undeleted_dirs) override { return RetryingUtils::DeleteWithRetries( - [this, &dirname, undeleted_files, undeleted_dirs]() { - return base_file_system_->DeleteRecursively(dirname, undeleted_files, - undeleted_dirs); + [this, &dirname, token, undeleted_files, undeleted_dirs]() { + return base_file_system_->DeleteRecursively( + dirname, token, undeleted_files, undeleted_dirs); }, retry_config_); } void FlushCaches(TransactionToken* token) override { - base_file_system_->FlushCaches(); + base_file_system_->FlushCaches(token); } Underlying* underlying() const { return base_file_system_.get(); } @@ -229,8 +241,9 @@ Status RetryingFileSystem::NewRandomAccessFile( std::unique_ptr* result) { std::unique_ptr base_file; TF_RETURN_IF_ERROR(RetryingUtils::CallWithRetries( - [this, &filename, &base_file]() { - return base_file_system_->NewRandomAccessFile(filename, &base_file); + [this, &filename, &base_file, token]() { + return base_file_system_->NewRandomAccessFile(filename, token, + &base_file); }, retry_config_)); result->reset(new retrying_internals::RetryingRandomAccessFile( @@ -244,8 +257,8 @@ Status RetryingFileSystem::NewWritableFile( std::unique_ptr* result) { std::unique_ptr base_file; TF_RETURN_IF_ERROR(RetryingUtils::CallWithRetries( - [this, &filename, &base_file]() { - return base_file_system_->NewWritableFile(filename, &base_file); + [this, &filename, &base_file, token]() { + return base_file_system_->NewWritableFile(filename, token, &base_file); }, retry_config_)); result->reset(new retrying_internals::RetryingWritableFile( @@ -259,8 +272,9 @@ Status RetryingFileSystem::NewAppendableFile( std::unique_ptr* result) { std::unique_ptr base_file; TF_RETURN_IF_ERROR(RetryingUtils::CallWithRetries( - [this, &filename, &base_file]() { - return base_file_system_->NewAppendableFile(filename, &base_file); + [this, &filename, &base_file, token]() { + return base_file_system_->NewAppendableFile(filename, token, + &base_file); }, retry_config_)); result->reset(new retrying_internals::RetryingWritableFile( @@ -273,9 +287,9 @@ Status RetryingFileSystem::NewReadOnlyMemoryRegionFromFile( const string& filename, TransactionToken* token, std::unique_ptr* result) { return RetryingUtils::CallWithRetries( - [this, &filename, result]() { - return base_file_system_->NewReadOnlyMemoryRegionFromFile(filename, - result); + [this, &filename, result, token]() { + return base_file_system_->NewReadOnlyMemoryRegionFromFile( + filename, token, result); }, retry_config_); } diff --git a/tensorflow/core/platform/retrying_file_system_test.cc b/tensorflow/core/platform/retrying_file_system_test.cc index 0cada5a5651..8c8cafbeecd 100644 --- a/tensorflow/core/platform/retrying_file_system_test.cc +++ b/tensorflow/core/platform/retrying_file_system_test.cc @@ -175,8 +175,7 @@ class MockFileSystem : public FileSystem { return calls_.ConsumeNextCall("DeleteRecursively"); } - void FlushCaches( - TransactionToken* token) override { + void FlushCaches(TransactionToken* token) override { if (flushed_) { *flushed_ = true; } @@ -208,7 +207,8 @@ TEST(RetryingFileSystemTest, NewRandomAccessFile_ImmediateSuccess) { // Retrieve the wrapped random access file. std::unique_ptr random_access_file; - TF_EXPECT_OK(fs.NewRandomAccessFile("filename.txt", &random_access_file)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("filename.txt", nullptr, &random_access_file)); // Use it and check the results. StringPiece result; @@ -239,7 +239,8 @@ TEST(RetryingFileSystemTest, NewRandomAccessFile_SuccessWith3rdTry) { // Retrieve the wrapped random access file. std::unique_ptr random_access_file; - TF_EXPECT_OK(fs.NewRandomAccessFile("filename.txt", &random_access_file)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("filename.txt", nullptr, &random_access_file)); // Use it and check the results. StringPiece result; @@ -264,7 +265,8 @@ TEST(RetryingFileSystemTest, NewRandomAccessFile_AllRetriesFailed) { // Retrieve the wrapped random access file. std::unique_ptr random_access_file; - TF_EXPECT_OK(fs.NewRandomAccessFile("filename.txt", &random_access_file)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("filename.txt", nullptr, &random_access_file)); // Use it and check the results. StringPiece result; @@ -294,7 +296,8 @@ TEST(RetryingFileSystemTest, NewRandomAccessFile_NoRetriesForSomeErrors) { // Retrieve the wrapped random access file. std::unique_ptr random_access_file; - TF_EXPECT_OK(fs.NewRandomAccessFile("filename.txt", &random_access_file)); + TF_EXPECT_OK( + fs.NewRandomAccessFile("filename.txt", nullptr, &random_access_file)); // Use it and check the results. StringPiece result; @@ -322,7 +325,7 @@ TEST(RetryingFileSystemTest, NewWritableFile_ImmediateSuccess) { // Retrieve the wrapped writable file. std::unique_ptr writable_file; - TF_EXPECT_OK(fs.NewWritableFile("filename.txt", &writable_file)); + TF_EXPECT_OK(fs.NewWritableFile("filename.txt", nullptr, &writable_file)); StringPiece result; TF_EXPECT_OK(writable_file->Name(&result)); @@ -353,7 +356,7 @@ TEST(RetryingFileSystemTest, NewWritableFile_SuccessWith3rdTry) { // Retrieve the wrapped writable file. std::unique_ptr writable_file; - TF_EXPECT_OK(fs.NewWritableFile("filename.txt", &writable_file)); + TF_EXPECT_OK(fs.NewWritableFile("filename.txt", nullptr, &writable_file)); // Use it and check the results. TF_EXPECT_OK(writable_file->Sync()); @@ -380,7 +383,7 @@ TEST(RetryingFileSystemTest, NewWritableFile_SuccessWith3rdTry_ViaDestructor) { // Retrieve the wrapped writable file. std::unique_ptr writable_file; - TF_EXPECT_OK(fs.NewWritableFile("filename.txt", &writable_file)); + TF_EXPECT_OK(fs.NewWritableFile("filename.txt", nullptr, &writable_file)); writable_file.reset(); // Trigger Close() via destructor. } @@ -406,7 +409,7 @@ TEST(RetryingFileSystemTest, NewAppendableFile_SuccessWith3rdTry) { // Retrieve the wrapped appendable file. std::unique_ptr writable_file; - TF_EXPECT_OK(fs.NewAppendableFile("filename.txt", &writable_file)); + TF_EXPECT_OK(fs.NewAppendableFile("filename.txt", nullptr, &writable_file)); // Use it and check the results. TF_EXPECT_OK(writable_file->Sync()); @@ -430,7 +433,7 @@ TEST(RetryingFileSystemTest, NewWritableFile_AllRetriesFailed) { // Retrieve the wrapped writable file. std::unique_ptr writable_file; - TF_EXPECT_OK(fs.NewWritableFile("filename.txt", &writable_file)); + TF_EXPECT_OK(fs.NewWritableFile("filename.txt", nullptr, &writable_file)); // Use it and check the results. const auto& status = writable_file->Sync(); @@ -450,7 +453,8 @@ TEST(RetryingFileSystemTest, std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); std::unique_ptr result; - TF_EXPECT_OK(fs.NewReadOnlyMemoryRegionFromFile("filename.txt", &result)); + TF_EXPECT_OK( + fs.NewReadOnlyMemoryRegionFromFile("filename.txt", nullptr, &result)); } TEST(RetryingFileSystemTest, NewReadOnlyMemoryRegionFromFile_AllRetriesFailed) { @@ -463,7 +467,7 @@ TEST(RetryingFileSystemTest, NewReadOnlyMemoryRegionFromFile_AllRetriesFailed) { std::unique_ptr result; const auto& status = - fs.NewReadOnlyMemoryRegionFromFile("filename.txt", &result); + fs.NewReadOnlyMemoryRegionFromFile("filename.txt", nullptr, &result); EXPECT_TRUE(absl::StrContains(status.error_message(), "Retriable error #10")) << status; } @@ -479,7 +483,7 @@ TEST(RetryingFileSystemTest, GetChildren_SuccessWith2ndTry) { std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); std::vector result; - TF_EXPECT_OK(fs.GetChildren("gs://path", &result)); + TF_EXPECT_OK(fs.GetChildren("gs://path", nullptr, &result)); } TEST(RetryingFileSystemTest, GetChildren_AllRetriesFailed) { @@ -490,7 +494,7 @@ TEST(RetryingFileSystemTest, GetChildren_AllRetriesFailed) { std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); std::vector result; - const auto& status = fs.GetChildren("gs://path", &result); + const auto& status = fs.GetChildren("gs://path", nullptr, &result); EXPECT_TRUE(absl::StrContains(status.error_message(), "Retriable error #10")) << status; } @@ -506,7 +510,7 @@ TEST(RetryingFileSystemTest, GetMatchingPaths_SuccessWith2ndTry) { std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); std::vector result; - TF_EXPECT_OK(fs.GetMatchingPaths("gs://path/dir", &result)); + TF_EXPECT_OK(fs.GetMatchingPaths("gs://path/dir", nullptr, &result)); } TEST(RetryingFileSystemTest, GetMatchingPaths_AllRetriesFailed) { @@ -518,7 +522,7 @@ TEST(RetryingFileSystemTest, GetMatchingPaths_AllRetriesFailed) { std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); std::vector result; - const auto& status = fs.GetMatchingPaths("gs://path/dir", &result); + const auto& status = fs.GetMatchingPaths("gs://path/dir", nullptr, &result); EXPECT_TRUE(absl::StrContains(status.error_message(), "Retriable error #10")) << status; } @@ -532,7 +536,7 @@ TEST(RetryingFileSystemTest, DeleteFile_SuccessWith2ndTry) { RetryingFileSystem fs( std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); - TF_EXPECT_OK(fs.DeleteFile("gs://path/file.txt")); + TF_EXPECT_OK(fs.DeleteFile("gs://path/file.txt", nullptr)); } TEST(RetryingFileSystemTest, DeleteFile_AllRetriesFailed) { @@ -542,7 +546,7 @@ TEST(RetryingFileSystemTest, DeleteFile_AllRetriesFailed) { RetryingFileSystem fs( std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); - const auto& status = fs.DeleteFile("gs://path/file.txt"); + const auto& status = fs.DeleteFile("gs://path/file.txt", nullptr); EXPECT_TRUE(absl::StrContains(status.error_message(), "Retriable error #10")) << status; } @@ -556,7 +560,7 @@ TEST(RetryingFileSystemTest, CreateDir_SuccessWith2ndTry) { RetryingFileSystem fs( std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); - TF_EXPECT_OK(fs.CreateDir("gs://path/newdir")); + TF_EXPECT_OK(fs.CreateDir("gs://path/newdir", nullptr)); } TEST(RetryingFileSystemTest, CreateDir_AllRetriesFailed) { @@ -566,7 +570,7 @@ TEST(RetryingFileSystemTest, CreateDir_AllRetriesFailed) { RetryingFileSystem fs( std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); - const auto& status = fs.CreateDir("gs://path/newdir"); + const auto& status = fs.CreateDir("gs://path/newdir", nullptr); EXPECT_TRUE(absl::StrContains(status.error_message(), "Retriable error #10")) << status; } @@ -580,7 +584,7 @@ TEST(RetryingFileSystemTest, DeleteDir_SuccessWith2ndTry) { RetryingFileSystem fs( std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); - TF_EXPECT_OK(fs.DeleteDir("gs://path/dir")); + TF_EXPECT_OK(fs.DeleteDir("gs://path/dir", nullptr)); } TEST(RetryingFileSystemTest, DeleteDir_AllRetriesFailed) { @@ -590,7 +594,7 @@ TEST(RetryingFileSystemTest, DeleteDir_AllRetriesFailed) { RetryingFileSystem fs( std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); - const auto& status = fs.DeleteDir("gs://path/dir"); + const auto& status = fs.DeleteDir("gs://path/dir", nullptr); EXPECT_TRUE(absl::StrContains(status.error_message(), "Retriable error #10")) << status; } @@ -606,7 +610,7 @@ TEST(RetryingFileSystemTest, GetFileSize_SuccessWith2ndTry) { std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); uint64 size; - TF_EXPECT_OK(fs.GetFileSize("gs://path/file.txt", &size)); + TF_EXPECT_OK(fs.GetFileSize("gs://path/file.txt", nullptr, &size)); } TEST(RetryingFileSystemTest, GetFileSize_AllRetriesFailed) { @@ -617,7 +621,7 @@ TEST(RetryingFileSystemTest, GetFileSize_AllRetriesFailed) { std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); uint64 size; - const auto& status = fs.GetFileSize("gs://path/file.txt", &size); + const auto& status = fs.GetFileSize("gs://path/file.txt", nullptr, &size); EXPECT_TRUE(absl::StrContains(status.error_message(), "Retriable error #10")) << status; } @@ -631,7 +635,7 @@ TEST(RetryingFileSystemTest, RenameFile_SuccessWith2ndTry) { RetryingFileSystem fs( std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); - TF_EXPECT_OK(fs.RenameFile("old_name", "new_name")); + TF_EXPECT_OK(fs.RenameFile("old_name", "new_name", nullptr)); } TEST(RetryingFileSystemTest, RenameFile_AllRetriesFailed) { @@ -641,7 +645,7 @@ TEST(RetryingFileSystemTest, RenameFile_AllRetriesFailed) { RetryingFileSystem fs( std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); - const auto& status = fs.RenameFile("old_name", "new_name"); + const auto& status = fs.RenameFile("old_name", "new_name", nullptr); EXPECT_TRUE(absl::StrContains(status.error_message(), "Retriable error #10")) << status; } @@ -656,7 +660,7 @@ TEST(RetryingFileSystemTest, Stat_SuccessWith2ndTry) { std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); FileStatistics stat; - TF_EXPECT_OK(fs.Stat("file_name", &stat)); + TF_EXPECT_OK(fs.Stat("file_name", nullptr, &stat)); } TEST(RetryingFileSystemTest, Stat_AllRetriesFailed) { @@ -667,7 +671,7 @@ TEST(RetryingFileSystemTest, Stat_AllRetriesFailed) { std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); FileStatistics stat; - const auto& status = fs.Stat("file_name", &stat); + const auto& status = fs.Stat("file_name", nullptr, &stat); EXPECT_TRUE(absl::StrContains(status.error_message(), "Retriable error #10")) << status; } @@ -679,7 +683,7 @@ TEST(RetryingFileSystemTest, FileExists_AllRetriesFailed) { RetryingFileSystem fs( std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); - const auto& status = fs.FileExists("file_name"); + const auto& status = fs.FileExists("file_name", nullptr); EXPECT_TRUE(absl::StrContains(status.error_message(), "Retriable error #10")) << status; } @@ -693,7 +697,7 @@ TEST(RetryingFileSystemTest, FileExists_SuccessWith2ndTry) { RetryingFileSystem fs( std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); - TF_EXPECT_OK(fs.FileExists("gs://path/dir")); + TF_EXPECT_OK(fs.FileExists("gs://path/dir", nullptr)); } TEST(RetryingFileSystemTest, IsDirectory_SuccessWith2ndTry) { @@ -706,7 +710,7 @@ TEST(RetryingFileSystemTest, IsDirectory_SuccessWith2ndTry) { RetryingFileSystem fs( std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); - TF_EXPECT_OK(fs.IsDirectory("gs://path/dir")); + TF_EXPECT_OK(fs.IsDirectory("gs://path/dir", nullptr)); } TEST(RetryingFileSystemTest, IsDirectory_AllRetriesFailed) { @@ -716,7 +720,7 @@ TEST(RetryingFileSystemTest, IsDirectory_AllRetriesFailed) { RetryingFileSystem fs( std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); - const auto& status = fs.IsDirectory("gs://path/dir"); + const auto& status = fs.IsDirectory("gs://path/dir", nullptr); EXPECT_TRUE(absl::StrContains(status.error_message(), "Retriable error #10")) << status; } @@ -732,8 +736,8 @@ TEST(RetryingFileSystemTest, DeleteRecursively_SuccessWith2ndTry) { std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); int64 undeleted_files, undeleted_dirs; - TF_EXPECT_OK( - fs.DeleteRecursively("gs://path/dir", &undeleted_files, &undeleted_dirs)); + TF_EXPECT_OK(fs.DeleteRecursively("gs://path/dir", nullptr, &undeleted_files, + &undeleted_dirs)); } TEST(RetryingFileSystemTest, DeleteRecursively_AllRetriesFailed) { @@ -745,8 +749,8 @@ TEST(RetryingFileSystemTest, DeleteRecursively_AllRetriesFailed) { std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); int64 undeleted_files, undeleted_dirs; - const auto& status = - fs.DeleteRecursively("gs://path/dir", &undeleted_files, &undeleted_dirs); + const auto& status = fs.DeleteRecursively("gs://path/dir", nullptr, + &undeleted_files, &undeleted_dirs); EXPECT_TRUE(absl::StrContains(status.error_message(), "Retriable error #10")) << status; } @@ -757,7 +761,7 @@ TEST(RetryingFileSystemTest, FlushCaches) { std::unique_ptr base_fs(new MockFileSystem(none, &flushed)); RetryingFileSystem fs( std::move(base_fs), RetryConfig(0 /* init_delay_time_us */)); - fs.FlushCaches(); + fs.FlushCaches(nullptr); EXPECT_TRUE(flushed); } diff --git a/tensorflow/core/platform/s3/s3_file_system.cc b/tensorflow/core/platform/s3/s3_file_system.cc index 8812424e89d..201694c994c 100644 --- a/tensorflow/core/platform/s3/s3_file_system.cc +++ b/tensorflow/core/platform/s3/s3_file_system.cc @@ -58,7 +58,7 @@ static const char* kS3TempFileTemplate = "/tmp/s3_filesystem_XXXXXX"; #endif static const char* kS3FileSystemAllocationTag = "S3FileSystemAllocation"; static const size_t kS3ReadAppendableFileBufferSize = 1024 * 1024; -static const int64 kS3TimeoutMsec = 300000; // 5 min +static const int64 kS3TimeoutMsec = 300000; // 5 min static const uint64 kS3MultiPartUploadChunkSize = 50 * 1024 * 1024; // 50 MB static const uint64 kS3MultiPartDownloadChunkSize = 2 * 1024 * 1024; // 50 MB static const int kS3GetChildrenMaxKeys = 100; @@ -568,14 +568,14 @@ S3FileSystem::GetExecutor() { } Status S3FileSystem::NewRandomAccessFile( - const string& fname, - std::unique_ptr* result /*, TransactionToken* token */) { - return NewRandomAccessFile(fname, result, true); + const string& fname, TransactionToken* token, + std::unique_ptr* result) { + return NewRandomAccessFile(fname, token, result, true); } Status S3FileSystem::NewRandomAccessFile( - const string& fname, std::unique_ptr* result, - bool use_multi_part_download /*, TransactionToken* token */) { + const string& fname, TransactionToken* token, + std::unique_ptr* result, bool use_multi_part_download) { string bucket, object; TF_RETURN_IF_ERROR(ParseS3Path(fname, false, &bucket, &object)); @@ -588,9 +588,9 @@ Status S3FileSystem::NewRandomAccessFile( return Status::OK(); } -Status S3FileSystem::NewWritableFile( - const string& fname, - std::unique_ptr* result /*, TransactionToken* token */) { +Status S3FileSystem::NewWritableFile(const string& fname, + TransactionToken* token, + std::unique_ptr* result) { string bucket, object; TF_RETURN_IF_ERROR(ParseS3Path(fname, false, &bucket, &object)); result->reset(new S3WritableFile( @@ -601,11 +601,11 @@ Status S3FileSystem::NewWritableFile( return Status::OK(); } -Status S3FileSystem::NewAppendableFile( - const string& fname, - std::unique_ptr* result /*, TransactionToken* token */) { +Status S3FileSystem::NewAppendableFile(const string& fname, + TransactionToken* token, + std::unique_ptr* result) { std::unique_ptr reader; - TF_RETURN_IF_ERROR(NewRandomAccessFile(fname, &reader)); + TF_RETURN_IF_ERROR(NewRandomAccessFile(fname, token, &reader)); std::unique_ptr buffer(new char[kS3ReadAppendableFileBufferSize]); Status status; uint64 offset = 0; @@ -637,14 +637,14 @@ Status S3FileSystem::NewAppendableFile( } Status S3FileSystem::NewReadOnlyMemoryRegionFromFile( - const string& fname, std::unique_ptr* - result /*, TransactionToken* token */) { + const string& fname, TransactionToken* token, + std::unique_ptr* result) { uint64 size; - TF_RETURN_IF_ERROR(GetFileSize(fname, &size)); + TF_RETURN_IF_ERROR(GetFileSize(fname, token, &size)); std::unique_ptr data(new char[size]); std::unique_ptr file; - TF_RETURN_IF_ERROR(NewRandomAccessFile(fname, &file)); + TF_RETURN_IF_ERROR(NewRandomAccessFile(fname, token, &file)); StringPiece piece; TF_RETURN_IF_ERROR(file->Read(0, size, &piece, data.get())); @@ -653,16 +653,14 @@ Status S3FileSystem::NewReadOnlyMemoryRegionFromFile( return Status::OK(); } -Status S3FileSystem::FileExists( - const string& fname /*, TransactionToken* token */) { +Status S3FileSystem::FileExists(const string& fname, TransactionToken* token) { FileStatistics stats; - TF_RETURN_IF_ERROR(this->Stat(fname, &stats)); + TF_RETURN_IF_ERROR(this->Stat(fname, token, &stats)); return Status::OK(); } -Status S3FileSystem::GetChildren( - const string& dir, - std::vector* result /*, TransactionToken* token */) { +Status S3FileSystem::GetChildren(const string& dir, TransactionToken* token, + std::vector* result) { VLOG(1) << "GetChildren for path: " << dir; string bucket, prefix; TF_RETURN_IF_ERROR(ParseS3Path(dir, true, &bucket, &prefix)); @@ -709,8 +707,8 @@ Status S3FileSystem::GetChildren( return Status::OK(); } -Status S3FileSystem::Stat( - const string& fname, FileStatistics* stats /*, TransactionToken* token */) { +Status S3FileSystem::Stat(const string& fname, TransactionToken* token, + FileStatistics* stats) { VLOG(1) << "Stat on path: " << fname; string bucket, object; TF_RETURN_IF_ERROR(ParseS3Path(fname, true, &bucket, &object)); @@ -772,14 +770,13 @@ Status S3FileSystem::Stat( return Status::OK(); } -Status S3FileSystem::GetMatchingPaths( - const string& pattern, - std::vector* results /*, TransactionToken* token */) { +Status S3FileSystem::GetMatchingPaths(const string& pattern, + TransactionToken* token, + std::vector* results) { return internal::GetMatchingPaths(this, Env::Default(), pattern, results); } -Status S3FileSystem::DeleteFile( - const string& fname /*, TransactionToken* token */) { +Status S3FileSystem::DeleteFile(const string& fname, TransactionToken* token) { VLOG(1) << "DeleteFile: " << fname; string bucket, object; TF_RETURN_IF_ERROR(ParseS3Path(fname, false, &bucket, &object)); @@ -795,8 +792,7 @@ Status S3FileSystem::DeleteFile( return Status::OK(); } -Status S3FileSystem::CreateDir( - const string& dirname /*, TransactionToken* token */) { +Status S3FileSystem::CreateDir(const string& dirname, TransactionToken* token) { VLOG(1) << "CreateDir: " << dirname; string bucket, object; TF_RETURN_IF_ERROR(ParseS3Path(dirname, true, &bucket, &object)); @@ -815,16 +811,15 @@ Status S3FileSystem::CreateDir( if (filename.back() != '/') { filename.push_back('/'); } - if (!this->FileExists(filename).ok()) { + if (!this->FileExists(filename,token).ok()) { std::unique_ptr file; - TF_RETURN_IF_ERROR(NewWritableFile(filename, &file)); + TF_RETURN_IF_ERROR(NewWritableFile(filename,token, &file)); TF_RETURN_IF_ERROR(file->Close()); } return Status::OK(); } -Status S3FileSystem::DeleteDir( - const string& dirname /*, TransactionToken* token */) { +Status S3FileSystem::DeleteDir(const string& dirname, TransactionToken* token) { VLOG(1) << "DeleteDir: " << dirname; string bucket, object; TF_RETURN_IF_ERROR(ParseS3Path(dirname, false, &bucket, &object)); @@ -855,7 +850,7 @@ Status S3FileSystem::DeleteDir( if (filename.back() != '/') { filename.push_back('/'); } - return DeleteFile(filename); + return DeleteFile(filename,token); } } else { TF_RETURN_IF_ERROR(CheckForbiddenError(listObjectsOutcome.GetError())); @@ -863,10 +858,10 @@ Status S3FileSystem::DeleteDir( return Status::OK(); } -Status S3FileSystem::GetFileSize( - const string& fname, uint64* file_size /*, TransactionToken* token */) { +Status S3FileSystem::GetFileSize(const string& fname, TransactionToken* token, + uint64* file_size) { FileStatistics stats; - TF_RETURN_IF_ERROR(this->Stat(fname, &stats)); + TF_RETURN_IF_ERROR(this->Stat(fname, token, &stats)); *file_size = stats.length; return Status::OK(); } @@ -917,7 +912,7 @@ Status S3FileSystem::CopyFile(const Aws::String& source_bucket, Aws::String source_full_path = Aws::String("s3://") + source; uint64 file_length; TF_RETURN_IF_ERROR( - this->GetFileSize(string(source_full_path.c_str()), &file_length)); + this->GetFileSize(string(source_full_path.c_str()), nullptr, &file_length)); int num_parts; if (file_length <= multi_part_chunk_size_[Aws::Transfer::TransferDirection::UPLOAD]) { @@ -1135,8 +1130,8 @@ Status S3FileSystem::CompleteMultiPartCopy( return Status::OK(); } -Status S3FileSystem::RenameFile( - const string& src, const string& target /*, TransactionToken* token */) { +Status S3FileSystem::RenameFile(const string& src, const string& target, + TransactionToken* token) { VLOG(1) << "RenameFile from: " << src << " to: " << target; string src_bucket, src_object, target_bucket, target_object; TF_RETURN_IF_ERROR(ParseS3Path(src, false, &src_bucket, &src_object)); diff --git a/tensorflow/core/platform/s3/s3_file_system.h b/tensorflow/core/platform/s3/s3_file_system.h index 41a5195efec..e592a174183 100644 --- a/tensorflow/core/platform/s3/s3_file_system.h +++ b/tensorflow/core/platform/s3/s3_file_system.h @@ -50,66 +50,66 @@ class S3FileSystem : public FileSystem { ~S3FileSystem(); Status NewRandomAccessFile( - const string& fname, + const string& fname, TransactionToken * token, std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + result ) override; Status NewRandomAccessFile( - const string& fname, std::unique_ptr* result, - bool use_multi_part_download /*, TransactionToken* token = nullptr */); + const string& fname, TransactionToken * token,std::unique_ptr* result, + bool use_multi_part_download ); Status NewWritableFile( - const string& fname, + const string& fname,TransactionToken * token, std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + result ) override; Status NewAppendableFile( - const string& fname, + const string& fname,TransactionToken * token, std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + result ) override; Status NewReadOnlyMemoryRegionFromFile( - const string& fname, + const string& fname,TransactionToken * token, std::unique_ptr* - result /*, TransactionToken* token = nullptr */) override; + result ) override; Status FileExists( - const string& fname /*, TransactionToken* token = nullptr */) override; + const string& fname,TransactionToken * token ) override; Status GetChildren( - const string& dir, - std::vector* result /*, TransactionToken* token = nullptr */) + const string& dir,TransactionToken * token, + std::vector* result ) override; Status Stat( - const string& fname, - FileStatistics* stat /*, TransactionToken* token = nullptr */) override; + const string& fname,TransactionToken * token, + FileStatistics* stat ) override; Status GetMatchingPaths( - const string& pattern, - std::vector* results /*, TransactionToken* token = nullptr */) + const string& pattern,TransactionToken * token, + std::vector* results ) override; Status DeleteFile( - const string& fname /*, TransactionToken* token = nullptr */) override; + const string& fname,TransactionToken * token ) override; Status CreateDir( - const string& name /*, TransactionToken* token = nullptr */) override; + const string& name, TransactionToken * token) override; Status DeleteDir( - const string& name /*, TransactionToken* token = nullptr */) override; + const string& name,TransactionToken * token ) override; Status GetFileSize( - const string& fname, - uint64* size /*, TransactionToken* token = nullptr */) override; + const string& fname,TransactionToken * token, + uint64* size ) override; Status RenameFile( const string& src, - const string& target /*, TransactionToken* token = nullptr */) override; + const string& target,TransactionToken * token ) override; Status HasAtomicMove( const string& path, - bool* has_atomic_move /*, TransactionToken* token = nullptr */) override; + bool* has_atomic_move ) override; private: // Returns the member S3 client, initializing as-needed. From 8e75f3b993504c0090ab851c8b4313a8bbbd747a Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Tue, 4 Aug 2020 23:20:15 -0700 Subject: [PATCH 2146/2522] Add TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT macro to modified classes --- .../filesystem/modular_filesystem.h | 2 + .../common_runtime/constant_folding_test.cc | 5 +- .../kernels/immutable_constant_op_test.cc | 4 + .../core/platform/cloud/gcs_file_system.h | 2 + tensorflow/core/platform/env_test.cc | 8 +- tensorflow/core/platform/file_system.h | 51 ++++++------- tensorflow/core/platform/file_system_test.cc | 2 + tensorflow/core/platform/null_file_system.h | 2 + tensorflow/core/platform/ram_file_system.h | 2 + .../core/platform/retrying_file_system.h | 2 + .../platform/retrying_file_system_test.cc | 2 + tensorflow/core/platform/s3/s3_file_system.cc | 10 +-- tensorflow/core/platform/s3/s3_file_system.h | 73 +++++++------------ 13 files changed, 84 insertions(+), 81 deletions(-) diff --git a/tensorflow/c/experimental/filesystem/modular_filesystem.h b/tensorflow/c/experimental/filesystem/modular_filesystem.h index 6495d97ebf1..061a1aa446b 100644 --- a/tensorflow/c/experimental/filesystem/modular_filesystem.h +++ b/tensorflow/c/experimental/filesystem/modular_filesystem.h @@ -59,6 +59,8 @@ class ModularFileSystem final : public FileSystem { ~ModularFileSystem() override { ops_->cleanup(filesystem_.get()); } + TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; + Status NewRandomAccessFile( const std::string& fname, TransactionToken* token, std::unique_ptr* result) override; diff --git a/tensorflow/core/common_runtime/constant_folding_test.cc b/tensorflow/core/common_runtime/constant_folding_test.cc index b348117bb9e..2edc92eac5e 100644 --- a/tensorflow/core/common_runtime/constant_folding_test.cc +++ b/tensorflow/core/common_runtime/constant_folding_test.cc @@ -19,9 +19,6 @@ limitations under the License. #include #include "tensorflow/cc/ops/nn_ops.h" -#include "tensorflow/core/common_runtime/constant_folding.h" - -#include "tensorflow/cc/ops/array_ops_internal.h" #include "tensorflow/cc/ops/sendrecv_ops.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/common_runtime/device.h" @@ -687,6 +684,8 @@ class TestTFFileSystem : public ::tensorflow::NullFileSystem { : ::tensorflow::NullFileSystem(), data_tensor_(test::AsTensor({1., 2., 3., 4.}, {2, 2})) {} + using ::tensorflow::NullFileSystem::NewReadOnlyMemoryRegionFromFile; + ::tensorflow::Status NewReadOnlyMemoryRegionFromFile( const string& fname, ::tensorflow::TransactionToken* token, std::unique_ptr<::tensorflow::ReadOnlyMemoryRegion>* result) override { diff --git a/tensorflow/core/kernels/immutable_constant_op_test.cc b/tensorflow/core/kernels/immutable_constant_op_test.cc index 5c3f96a312d..d52a8b55a35 100644 --- a/tensorflow/core/kernels/immutable_constant_op_test.cc +++ b/tensorflow/core/kernels/immutable_constant_op_test.cc @@ -60,6 +60,10 @@ class TestReadOnlyMemoryRegion : public ReadOnlyMemoryRegion { class TestFileSystem : public NullFileSystem { public: ~TestFileSystem() override = default; + + // import non-transactional method from the base class + using NullFileSystem::NewReadOnlyMemoryRegionFromFile; + Status NewReadOnlyMemoryRegionFromFile( const string& fname, TransactionToken* token, std::unique_ptr* result) override { diff --git a/tensorflow/core/platform/cloud/gcs_file_system.h b/tensorflow/core/platform/cloud/gcs_file_system.h index 0a27aba35c1..203c501ff4c 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.h +++ b/tensorflow/core/platform/cloud/gcs_file_system.h @@ -125,6 +125,8 @@ class GcsFileSystem : public FileSystem { std::pair* additional_header, bool compose_append); + TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; + Status NewRandomAccessFile( const string& fname, TransactionToken* token, std::unique_ptr* result) override; diff --git a/tensorflow/core/platform/env_test.cc b/tensorflow/core/platform/env_test.cc index 35374d65ee3..79d793ee636 100644 --- a/tensorflow/core/platform/env_test.cc +++ b/tensorflow/core/platform/env_test.cc @@ -295,7 +295,9 @@ TEST_F(DefaultEnvTest, SleepForMicroseconds) { class TmpDirFileSystem : public NullFileSystem { public: - Status FileExists(const string& dir,TransactionToken* token) override { + TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; + + Status FileExists(const string& dir, TransactionToken* token) override { StringPiece scheme, host, path; io::ParseURI(dir, &scheme, &host, &path); if (path.empty()) return errors::NotFound(dir, " not found"); @@ -311,7 +313,7 @@ class TmpDirFileSystem : public NullFileSystem { return Env::Default()->FileExists(io::JoinPath(BaseDir(), path)); } - Status CreateDir(const string& dir,TransactionToken* token) override { + Status CreateDir(const string& dir, TransactionToken* token) override { StringPiece scheme, host, path; io::ParseURI(dir, &scheme, &host, &path); if (scheme != "tmpdirfs") { @@ -328,7 +330,7 @@ class TmpDirFileSystem : public NullFileSystem { return status; } - Status IsDirectory(const string& dir,TransactionToken* token) override { + Status IsDirectory(const string& dir, TransactionToken* token) override { StringPiece scheme, host, path; io::ParseURI(dir, &scheme, &host, &path); for (const auto& existing_dir : created_directories_) diff --git a/tensorflow/core/platform/file_system.h b/tensorflow/core/platform/file_system.h index c4094d3a5a2..28d09c39db1 100644 --- a/tensorflow/core/platform/file_system.h +++ b/tensorflow/core/platform/file_system.h @@ -518,6 +518,30 @@ class FileSystem { virtual ~FileSystem() = default; }; +/// This macro adds forwarding methods from FileSystem class to +/// used class since name hiding will prevent these to be accessed from +/// derived classes and would require all use locations to migrate to +/// Transactional API. This is an interim solution until ModularFileSystem class +/// becomes a singleton. +// TODO(sami): Remove this macro when filesystem plugins migration is complete. +#define TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT \ + using FileSystem::NewRandomAccessFile; \ + using FileSystem::NewWritableFile; \ + using FileSystem::NewAppendableFile; \ + using FileSystem::NewReadOnlyMemoryRegionFromFile; \ + using FileSystem::FileExists; \ + using FileSystem::GetChildren; \ + using FileSystem::GetMatchingPaths; \ + using FileSystem::Stat; \ + using FileSystem::DeleteFile; \ + using FileSystem::RecursivelyCreateDir; \ + using FileSystem::DeleteDir; \ + using FileSystem::DeleteRecursively; \ + using FileSystem::GetFileSize; \ + using FileSystem::RenameFile; \ + using FileSystem::CopyFile; \ + using FileSystem::IsDirectory; \ + using FileSystem::FlushCaches /// A Wrapper class for Transactional FileSystem support. /// This provides means to make use of the transactions with minimal code change @@ -529,6 +553,8 @@ class FileSystem { /// transactional filesystem access with minimal code change. class WrappedFileSystem : public FileSystem { public: + TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; + tensorflow::Status NewRandomAccessFile( const std::string& fname, TransactionToken* token, std::unique_ptr* result) override { @@ -691,31 +717,6 @@ class WrappedFileSystem : public FileSystem { TransactionToken* token_; }; -/// This macro adds forwarding methods from FileSystem class to -/// used class since name hiding will prevent these to be accessed from -/// derived classes and would require all use locations to migrate to -/// Transactional API. This is an interim solution until ModularFileSystem class -/// becomes a singleton. -// TODO(sami): Remove this macro when filesystem plugins migration is complete. -#define TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT \ - using FileSystem::NewRandomAccessFile; \ - using FileSystem::NewWritableFile; \ - using FileSystem::NewAppendableFile; \ - using FileSystem::NewReadOnlyMemoryRegionFromFile; \ - using FileSystem::FileExists; \ - using FileSystem::GetChildren; \ - using FileSystem::GetMatchingPaths; \ - using FileSystem::Stat; \ - using FileSystem::DeleteFile; \ - using FileSystem::RecursivelyCreateDir; \ - using FileSystem::DeleteDir; \ - using FileSystem::DeleteRecursively; \ - using FileSystem::GetFileSize; \ - using FileSystem::RenameFile; \ - using FileSystem::CopyFile; \ - using FileSystem::IsDirectory; \ - using FileSystem::FlushCaches - /// A file abstraction for randomly reading the contents of a file. class RandomAccessFile { public: diff --git a/tensorflow/core/platform/file_system_test.cc b/tensorflow/core/platform/file_system_test.cc index 1e23a2b853c..1707fce4bb5 100644 --- a/tensorflow/core/platform/file_system_test.cc +++ b/tensorflow/core/platform/file_system_test.cc @@ -32,6 +32,8 @@ static const char* const kPrefix = "ipfs://solarsystem"; // cannot have children further. class InterPlanetaryFileSystem : public NullFileSystem { public: + TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; + Status FileExists(const string& fname, TransactionToken* token) override { string parsed_path; ParsePath(fname, &parsed_path); diff --git a/tensorflow/core/platform/null_file_system.h b/tensorflow/core/platform/null_file_system.h index 0af34258169..d7deca32da2 100644 --- a/tensorflow/core/platform/null_file_system.h +++ b/tensorflow/core/platform/null_file_system.h @@ -36,6 +36,8 @@ class NullFileSystem : public FileSystem { ~NullFileSystem() override = default; + TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; + Status NewRandomAccessFile( const string& fname, TransactionToken* token, std::unique_ptr* result) override { diff --git a/tensorflow/core/platform/ram_file_system.h b/tensorflow/core/platform/ram_file_system.h index ba8bb2d7630..407bcb3ba0f 100644 --- a/tensorflow/core/platform/ram_file_system.h +++ b/tensorflow/core/platform/ram_file_system.h @@ -103,6 +103,8 @@ class RamRandomAccessFile : public RandomAccessFile, public WritableFile { class RamFileSystem : public FileSystem { public: + TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; + Status NewRandomAccessFile( const string& fname, TransactionToken* token, std::unique_ptr* result) override { diff --git a/tensorflow/core/platform/retrying_file_system.h b/tensorflow/core/platform/retrying_file_system.h index ddbf255af2e..52e2caf8398 100644 --- a/tensorflow/core/platform/retrying_file_system.h +++ b/tensorflow/core/platform/retrying_file_system.h @@ -38,6 +38,8 @@ class RetryingFileSystem : public FileSystem { : base_file_system_(std::move(base_file_system)), retry_config_(retry_config) {} + TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; + Status NewRandomAccessFile( const string& filename, TransactionToken* token, std::unique_ptr* result) override; diff --git a/tensorflow/core/platform/retrying_file_system_test.cc b/tensorflow/core/platform/retrying_file_system_test.cc index 8c8cafbeecd..093b85a1afc 100644 --- a/tensorflow/core/platform/retrying_file_system_test.cc +++ b/tensorflow/core/platform/retrying_file_system_test.cc @@ -99,6 +99,8 @@ class MockFileSystem : public FileSystem { explicit MockFileSystem(const ExpectedCalls& calls, bool* flushed = nullptr) : calls_(calls), flushed_(flushed) {} + TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; + Status NewRandomAccessFile( const string& fname, TransactionToken* token, std::unique_ptr* result) override { diff --git a/tensorflow/core/platform/s3/s3_file_system.cc b/tensorflow/core/platform/s3/s3_file_system.cc index 201694c994c..8d74ea6aff6 100644 --- a/tensorflow/core/platform/s3/s3_file_system.cc +++ b/tensorflow/core/platform/s3/s3_file_system.cc @@ -811,9 +811,9 @@ Status S3FileSystem::CreateDir(const string& dirname, TransactionToken* token) { if (filename.back() != '/') { filename.push_back('/'); } - if (!this->FileExists(filename,token).ok()) { + if (!this->FileExists(filename, token).ok()) { std::unique_ptr file; - TF_RETURN_IF_ERROR(NewWritableFile(filename,token, &file)); + TF_RETURN_IF_ERROR(NewWritableFile(filename, token, &file)); TF_RETURN_IF_ERROR(file->Close()); } return Status::OK(); @@ -850,7 +850,7 @@ Status S3FileSystem::DeleteDir(const string& dirname, TransactionToken* token) { if (filename.back() != '/') { filename.push_back('/'); } - return DeleteFile(filename,token); + return DeleteFile(filename, token); } } else { TF_RETURN_IF_ERROR(CheckForbiddenError(listObjectsOutcome.GetError())); @@ -911,8 +911,8 @@ Status S3FileSystem::CopyFile(const Aws::String& source_bucket, Aws::String source = Aws::String((source_bucket + "/" + source_key).c_str()); Aws::String source_full_path = Aws::String("s3://") + source; uint64 file_length; - TF_RETURN_IF_ERROR( - this->GetFileSize(string(source_full_path.c_str()), nullptr, &file_length)); + TF_RETURN_IF_ERROR(this->GetFileSize(string(source_full_path.c_str()), + nullptr, &file_length)); int num_parts; if (file_length <= multi_part_chunk_size_[Aws::Transfer::TransferDirection::UPLOAD]) { diff --git a/tensorflow/core/platform/s3/s3_file_system.h b/tensorflow/core/platform/s3/s3_file_system.h index e592a174183..8da74c668d1 100644 --- a/tensorflow/core/platform/s3/s3_file_system.h +++ b/tensorflow/core/platform/s3/s3_file_system.h @@ -49,67 +49,50 @@ class S3FileSystem : public FileSystem { S3FileSystem(); ~S3FileSystem(); - Status NewRandomAccessFile( - const string& fname, TransactionToken * token, - std::unique_ptr* - result ) override; + TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT; Status NewRandomAccessFile( - const string& fname, TransactionToken * token,std::unique_ptr* result, - bool use_multi_part_download ); + const string& fname, TransactionToken* token, + std::unique_ptr* result) override; - Status NewWritableFile( - const string& fname,TransactionToken * token, - std::unique_ptr* - result ) override; + Status NewRandomAccessFile(const string& fname, TransactionToken* token, + std::unique_ptr* result, + bool use_multi_part_download); - Status NewAppendableFile( - const string& fname,TransactionToken * token, - std::unique_ptr* - result ) override; + Status NewWritableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override; + + Status NewAppendableFile(const string& fname, TransactionToken* token, + std::unique_ptr* result) override; Status NewReadOnlyMemoryRegionFromFile( - const string& fname,TransactionToken * token, - std::unique_ptr* - result ) override; + const string& fname, TransactionToken* token, + std::unique_ptr* result) override; - Status FileExists( - const string& fname,TransactionToken * token ) override; + Status FileExists(const string& fname, TransactionToken* token) override; - Status GetChildren( - const string& dir,TransactionToken * token, - std::vector* result ) - override; + Status GetChildren(const string& dir, TransactionToken* token, + std::vector* result) override; - Status Stat( - const string& fname,TransactionToken * token, - FileStatistics* stat ) override; + Status Stat(const string& fname, TransactionToken* token, + FileStatistics* stat) override; - Status GetMatchingPaths( - const string& pattern,TransactionToken * token, - std::vector* results ) - override; + Status GetMatchingPaths(const string& pattern, TransactionToken* token, + std::vector* results) override; - Status DeleteFile( - const string& fname,TransactionToken * token ) override; + Status DeleteFile(const string& fname, TransactionToken* token) override; - Status CreateDir( - const string& name, TransactionToken * token) override; + Status CreateDir(const string& name, TransactionToken* token) override; - Status DeleteDir( - const string& name,TransactionToken * token ) override; + Status DeleteDir(const string& name, TransactionToken* token) override; - Status GetFileSize( - const string& fname,TransactionToken * token, - uint64* size ) override; + Status GetFileSize(const string& fname, TransactionToken* token, + uint64* size) override; - Status RenameFile( - const string& src, - const string& target,TransactionToken * token ) override; + Status RenameFile(const string& src, const string& target, + TransactionToken* token) override; - Status HasAtomicMove( - const string& path, - bool* has_atomic_move ) override; + Status HasAtomicMove(const string& path, bool* has_atomic_move) override; private: // Returns the member S3 client, initializing as-needed. From 0b84561324672ee3045ee23112dbc68540b39cdc Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 4 Aug 2020 23:19:26 -0700 Subject: [PATCH 2147/2522] [MLIR:TF] Fix a bug in binary out of concat hoisting PiperOrigin-RevId: 324962240 Change-Id: I77aa3559a3a7244ee92e952c384a86a655bf4e4b --- tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc index 791323ca992..5c19f9c3daa 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc @@ -682,8 +682,8 @@ HoistCwiseBinaryOutOfConcat::GetHoistParams(TF::ConcatV2Op op, // of `axis + 1` rank and axis dim has size `1`. auto is_all_tensors = [&](int operand_idx, int axis) -> bool { return llvm::all_of(op.values(), [&](Value arg) -> bool { - auto lhs = arg.getDefiningOp()->getOperand(operand_idx); - auto ranked = lhs.getType().dyn_cast(); + auto operand = arg.getDefiningOp()->getOperand(operand_idx); + auto ranked = operand.getType().dyn_cast(); return ranked && ranked.getRank() == (axis + 1) && ranked.getShape()[axis] == 1; }); @@ -692,13 +692,14 @@ HoistCwiseBinaryOutOfConcat::GetHoistParams(TF::ConcatV2Op op, // Returns true if all binary ops operands at `operand_idx` index are scalars. auto is_all_scalars = [&](int operand_idx) -> bool { return llvm::all_of(op.values(), [&](Value arg) -> bool { - auto lhs = arg.getDefiningOp()->getOperand(operand_idx); - auto ranked = lhs.getType().dyn_cast(); + auto operand = arg.getDefiningOp()->getOperand(operand_idx); + auto ranked = operand.getType().dyn_cast(); return ranked && ranked.hasRank() && ranked.getRank() == 0; }); }; - auto ranked = op.getType().cast(); + // Concat result type must be a ranked tensor. + auto ranked = op.getType().dyn_cast(); if (!ranked) return None; // TODO(ezhulenev): Add support for more valid concat patterns. From b89e12c5a3e4a95804f30bc1426e3e9b9d20570d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 5 Aug 2020 01:12:02 -0700 Subject: [PATCH 2148/2522] This is a near no-op. PiperOrigin-RevId: 324973737 Change-Id: Id8a50959b16d9e5f1eff5471d978a9568cace521 --- tensorflow/lite/g3doc/guide/codegen.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/g3doc/guide/codegen.md b/tensorflow/lite/g3doc/guide/codegen.md index b74bfc5ed40..84dd2ffade9 100644 --- a/tensorflow/lite/g3doc/guide/codegen.md +++ b/tensorflow/lite/g3doc/guide/codegen.md @@ -171,7 +171,7 @@ generated by the Android Studio ML Model Binding. ## Read the metadata from models -The Metadata Extractor library is a convinient tool to read the metadata and +The Metadata Extractor library is a convenient tool to read the metadata and associated files from a models across different platforms (see the [Java version](https://github.com/tensorflow/tflite-support/tree/master/tensorflow_lite_support/metadata) and the C++ version is coming soon). Users can also build their own metadata @@ -198,7 +198,7 @@ information. As long as the file identifer is satisfied, the metadata extractor will not fail when reading metadata generated from an old or a future scheme due to the Flatbuffers forward and backwards compatibility mechanism. But fields from -future shcemas cannot be extracted by older metadata extractors. The +future schemas cannot be extracted by older metadata extractors. The [minimum necessary parser version](../convert/metadata.md#the-minimum-necessary-metadata-parser-version) of the metadata indicates the minimum version of metadata parser that can read the metadata Flatbuffers in full. You can use the following method to verify if From 5f6c13cb1081be062638fee8d32c3788afc940ba Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 5 Aug 2020 01:38:50 -0700 Subject: [PATCH 2149/2522] [XLA] Add TopK rewriter pass This pass pattern matches sort HLOs into a custom call. This will be useful for CPU. PiperOrigin-RevId: 324976268 Change-Id: I56224ad39e1cb2960bde9a366a7b47deffa9955f --- tensorflow/compiler/xla/service/BUILD | 31 +++ .../compiler/xla/service/topk_rewriter.cc | 187 ++++++++++++++++++ .../compiler/xla/service/topk_rewriter.h | 44 +++++ .../xla/service/topk_rewriter_test.cc | 153 ++++++++++++++ 4 files changed, 415 insertions(+) create mode 100644 tensorflow/compiler/xla/service/topk_rewriter.cc create mode 100644 tensorflow/compiler/xla/service/topk_rewriter.h create mode 100644 tensorflow/compiler/xla/service/topk_rewriter_test.cc diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 4d15bc432a2..49431b19a69 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -4985,3 +4985,34 @@ cc_library( "//tensorflow/stream_executor/lib", ], ) + +cc_library( + name = "topk_rewriter", + srcs = ["topk_rewriter.cc"], + hdrs = ["topk_rewriter.h"], + deps = [ + ":hlo", + ":hlo_casting_utils", + ":hlo_pass", + ":pattern_matcher", + "//tensorflow/compiler/xla:shape_util", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/types:optional", + ], +) + +tf_cc_test( + name = "topk_rewriter_test", + srcs = ["topk_rewriter_test.cc"], + deps = [ + ":hlo", + ":hlo_dce", + ":hlo_matchers", + ":topk_rewriter", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:test_macros_cpu", + "//tensorflow/compiler/xla/tests:test_utils", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:test", + ], +) diff --git a/tensorflow/compiler/xla/service/topk_rewriter.cc b/tensorflow/compiler/xla/service/topk_rewriter.cc new file mode 100644 index 00000000000..ae843760a8d --- /dev/null +++ b/tensorflow/compiler/xla/service/topk_rewriter.cc @@ -0,0 +1,187 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/topk_rewriter.h" + +#include "absl/algorithm/container.h" +#include "absl/types/optional.h" +#include "tensorflow/compiler/xla/service/hlo_casting_utils.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/pattern_matcher.h" +#include "tensorflow/compiler/xla/shape_util.h" + +namespace xla { + +static bool IsNanSafeGt(HloComputation* comp) { + namespace m = match; + auto match_bitcast_f32 = [](int64 parameter_number) { + auto param = m::Parameter(parameter_number) + .WithShape(m::Shape().WithElementType(F32)); + auto param_s32 = + m::BitcastConvert(param).WithShape(m::Shape().WithElementType(S32)); + auto param_u32 = + m::BitcastConvert(param).WithShape(m::Shape().WithElementType(U32)); + return m::Select( + m::Lt(param_s32, m::ConstantScalar(0)), + m::BitcastConvert( + m::Subtract(m::ConstantScalar(std::numeric_limits::max()), + param_u32)) + .WithShape(m::Shape().WithElementType(S32)), + param_s32); + }; + auto match_bitcast_bf16 = [](int64 parameter_number) { + auto param = m::Convert(m::Parameter(parameter_number) + .WithShape(m::Shape().WithElementType(BF16))) + .WithShape(m::Shape().WithElementType(F32)); + auto param_s32 = + m::BitcastConvert(param).WithShape(m::Shape().WithElementType(S32)); + auto param_u32 = + m::BitcastConvert(param).WithShape(m::Shape().WithElementType(U32)); + return m::Select( + m::Lt(param_s32, m::ConstantScalar(0)), + m::BitcastConvert( + m::Subtract(m::ConstantScalar(std::numeric_limits::max()), + param_u32)) + .WithShape(m::Shape().WithElementType(S32)), + param_s32); + }; + return Match(comp->root_instruction(), + m::Gt(match_bitcast_f32(0), match_bitcast_f32(1))) || + Match(comp->root_instruction(), + m::Gt(match_bitcast_bf16(0), match_bitcast_bf16(1))); +} + +StatusOr TopkRewriter::Run(HloModule* module) { + bool changed = false; + for (HloComputation* comp : module->computations()) { + for (HloInstruction* inst : comp->MakeInstructionPostOrder()) { + HloSortInstruction* sort = DynCast(inst); + if (sort == nullptr || sort->operand_count() != 2) { + continue; + } + HloInstruction* data = sort->mutable_operand(0); + HloIotaInstruction* iota = + DynCast(sort->mutable_operand(1)); + const PrimitiveType element_type = data->shape().element_type(); + if (data->shape().rank() != 2 || + (element_type != F32 && element_type != BF16)) { + continue; + } + if (iota == nullptr || iota->shape().rank() != 2 || + iota->shape().element_type() != S32 || + iota->opcode() != HloOpcode::kIota || + iota->iota_dimension() != sort->sort_dimension()) { + continue; + } + if (!IsNanSafeGt(sort->to_apply())) { + continue; + } + const int64 sort_dim = sort->sort_dimension(); + const int64 batch_dim = sort_dim == 1 ? 0 : 1; + + bool supported = true; + absl::optional k; + for (HloInstruction* gte : sort->users()) { + if (gte->opcode() != HloOpcode::kGetTupleElement || + gte->user_count() != 1) { + supported = false; + break; + } + const HloInstruction* slice = gte->users()[0]; + if (slice->opcode() != HloOpcode::kSlice) { + // Non-slice user means we are not doing a TopK + supported = false; + break; + } + if (absl::c_any_of(slice->slice_starts(), + [](int x) { return x != 0; }) || + absl::c_any_of(slice->slice_strides(), + [](int x) { return x != 1; })) { + // Strided slice or slicing at the beginning isn't supported. + supported = false; + break; + } + if (slice->slice_limits(batch_dim) != + slice->operand(0)->shape().dimensions(batch_dim)) { + // Slicing along the batch dimension isn't supported. + supported = false; + break; + } + if (k == absl::nullopt) { + k = slice->slice_limits(sort_dim); + } else if (k != slice->slice_limits(sort_dim)) { + // Different k for the different operands isn't supported. + supported = false; + break; + } + } + if (k == absl::nullopt || !supported) { + continue; + } + + // Profitability check. + if (!is_profitable_to_convert_(sort, *k)) { + continue; + } + + const int64 batch_size = sort->operand(0)->shape().dimensions(batch_dim); + const int64 input_size = sort->operand(0)->shape().dimensions(sort_dim); + HloInstruction* input = sort->mutable_operand(0); + if (sort_dim == 0) { + input = comp->AddInstruction(HloInstruction::CreateTranspose( + ShapeUtil::MakeShape(element_type, {batch_size, input_size}), input, + {1, 0})); + } + + Shape topk_shape = ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShape(element_type, {batch_size, k.value()}), + ShapeUtil::MakeShape(S32, {batch_size, k.value()})}); + HloInstruction* topk = comp->AddInstruction( + HloInstruction::CreateCustomCall(topk_shape, {input}, "TopK")); + HloInstruction* value_gte = + comp->AddInstruction(HloInstruction::CreateGetTupleElement( + topk->shape().tuple_shapes(0), topk, 0)); + HloInstruction* index_gte = + comp->AddInstruction(HloInstruction::CreateGetTupleElement( + topk->shape().tuple_shapes(1), topk, 1)); + + if (sort_dim == 0) { + value_gte = comp->AddInstruction(HloInstruction::CreateTranspose( + ShapeUtil::MakeShape(element_type, {k.value(), batch_size}), + value_gte, {1, 0})); + index_gte = comp->AddInstruction(HloInstruction::CreateTranspose( + ShapeUtil::MakeShape(S32, {k.value(), batch_size}), index_gte, + {1, 0})); + } + + for (HloInstruction* gte : sort->users()) { + for (HloInstruction* slice : gte->users()) { + if (gte->tuple_index() == 0) { + TF_RETURN_IF_ERROR(slice->ReplaceAllUsesWith(value_gte)); + } else if (gte->tuple_index() == 1) { + TF_RETURN_IF_ERROR(slice->ReplaceAllUsesWith(index_gte)); + } else { + LOG(FATAL) << "Sort with more than 2 output isn't supported in " + "topk rewriter"; + } + } + } + changed = true; + } + } + return changed; +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/service/topk_rewriter.h b/tensorflow/compiler/xla/service/topk_rewriter.h new file mode 100644 index 00000000000..68f8a8145e2 --- /dev/null +++ b/tensorflow/compiler/xla/service/topk_rewriter.h @@ -0,0 +1,44 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_TOPK_REWRITER_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_TOPK_REWRITER_H_ + +#include "tensorflow/compiler/xla/service/hlo_instructions.h" +#include "tensorflow/compiler/xla/service/hlo_pass_interface.h" + +namespace xla { +// This pass pattern-matches soups of HLOs executing a TopK operation and +// replaces them with a TopK CustomCall when the given values are supported by +// the CustomCall and it is more efficient to use that implementation. +class TopkRewriter : public HloModulePass { + public: + explicit TopkRewriter(std::function + is_profitable_to_convert) + : is_profitable_to_convert_(std::move(is_profitable_to_convert)) {} + + absl::string_view name() const override { return "topk-rewriter"; } + + StatusOr Run(HloModule* module) override; + + private: + // Predicate that returns true if a sort instruction is profitable to be + // converted into a custom call. + std::function + is_profitable_to_convert_; +}; +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_TOPK_REWRITER_H_ diff --git a/tensorflow/compiler/xla/service/topk_rewriter_test.cc b/tensorflow/compiler/xla/service/topk_rewriter_test.cc new file mode 100644 index 00000000000..e440da5b163 --- /dev/null +++ b/tensorflow/compiler/xla/service/topk_rewriter_test.cc @@ -0,0 +1,153 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/topk_rewriter.h" + +#include "tensorflow/compiler/xla/service/hlo_dce.h" +#include "tensorflow/compiler/xla/service/hlo_matchers.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/tests/test_utils.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace op = xla::testing::opcode_matchers; + +namespace xla { +namespace { + +using TopkRewriterTest = HloTestBase; + +TEST_F(TopkRewriterTest, Rewrite) { + const char* const hlo_string = R"( +HloModule module + +%compare { + %p.1.lhs.8 = s32[] parameter(2) + %p.1.rhs.9 = s32[] parameter(3) + %p.0.lhs.6 = f32[] parameter(0) + %bitcast-convert.11 = s32[] bitcast-convert(%p.0.lhs.6) + %constant.15 = s32[] constant(0) + %compare.16 = pred[] compare(%bitcast-convert.11, %constant.15), direction=LT + %constant.10 = u32[] constant(2147483647) + %bitcast-convert.12 = u32[] bitcast-convert(%p.0.lhs.6) + %subtract.13 = u32[] subtract(%constant.10, %bitcast-convert.12) + %bitcast-convert.14 = s32[] bitcast-convert(%subtract.13) + %select.17 = s32[] select(%compare.16, %bitcast-convert.14, + %bitcast-convert.11) + %p.0.rhs.7 = f32[] parameter(1) + %bitcast-convert.19 = s32[] bitcast-convert(%p.0.rhs.7) + %constant.23 = s32[] constant(0) + %compare.24 = pred[] compare(%bitcast-convert.19, %constant.23), direction=LT + %constant.18 = u32[] constant(2147483647) + %bitcast-convert.20 = u32[] bitcast-convert(%p.0.rhs.7) + %subtract.21 = u32[] subtract(%constant.18, %bitcast-convert.20) + %bitcast-convert.22 = s32[] bitcast-convert(%subtract.21) + %select.25 = s32[] select(%compare.24, %bitcast-convert.22, + %bitcast-convert.19) + ROOT %compare.26 = pred[] compare(%select.17, %select.25), direction=GT +} + +ENTRY cluster { + %arg_tuple.1 = f32[8,1234567] parameter(0) + %iota.4 = s32[8,1234567] iota(), iota_dimension=1 + %sort.27 = (f32[8,1234567], s32[8,1234567]) sort(%arg_tuple.1, %iota.4), + dimensions={1}, is_stable=true, to_apply=%compare + %get-tuple-element.28 = f32[8,1234567] get-tuple-element(%sort.27), index=0 + %slice.29 = f32[8,5] slice(%get-tuple-element.28), slice={[0:8], [0:5]} + %get-tuple-element.30 = s32[8,1234567] get-tuple-element(%sort.27), index=1 + %slice.31 = s32[8,5] slice(%get-tuple-element.30), slice={[0:8], [0:5]} + ROOT %tuple.32 = (f32[8,5], s32[8,5]) tuple(%slice.29, %slice.31) +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + TopkRewriter rewriter([](const HloSortInstruction*, int64) { return true; }); + TF_ASSERT_OK_AND_ASSIGN(bool changed, rewriter.Run(module.get())); + TF_ASSERT_OK(HloDCE().Run(module.get()).status()); + EXPECT_TRUE(changed); + EXPECT_THAT( + module->entry_computation()->root_instruction(), + op::Tuple(op::GetTupleElement(op::CustomCall(op::Parameter(0)), 0), + op::GetTupleElement(op::CustomCall(op::Parameter(0)), 1))); + const HloInstruction* cc = + module->entry_computation()->root_instruction()->operand(0)->operand(0); + EXPECT_THAT(cc->custom_call_target(), "TopK"); +} + +TEST_F(TopkRewriterTest, RewriteTranspose) { + const char* const hlo_string = R"( +HloModule module + +%compare { + %p.1.lhs.8 = s32[] parameter(2) + %p.1.rhs.9 = s32[] parameter(3) + %p.0.lhs.6 = f32[] parameter(0) + %bitcast-convert.11 = s32[] bitcast-convert(%p.0.lhs.6) + %constant.15 = s32[] constant(0) + %compare.16 = pred[] compare(%bitcast-convert.11, %constant.15), direction=LT + %constant.10 = u32[] constant(2147483647) + %bitcast-convert.12 = u32[] bitcast-convert(%p.0.lhs.6) + %subtract.13 = u32[] subtract(%constant.10, %bitcast-convert.12) + %bitcast-convert.14 = s32[] bitcast-convert(%subtract.13) + %select.17 = s32[] select(%compare.16, %bitcast-convert.14, + %bitcast-convert.11) + %p.0.rhs.7 = f32[] parameter(1) + %bitcast-convert.19 = s32[] bitcast-convert(%p.0.rhs.7) + %constant.23 = s32[] constant(0) + %compare.24 = pred[] compare(%bitcast-convert.19, %constant.23), direction=LT + %constant.18 = u32[] constant(2147483647) + %bitcast-convert.20 = u32[] bitcast-convert(%p.0.rhs.7) + %subtract.21 = u32[] subtract(%constant.18, %bitcast-convert.20) + %bitcast-convert.22 = s32[] bitcast-convert(%subtract.21) + %select.25 = s32[] select(%compare.24, %bitcast-convert.22, + %bitcast-convert.19) + ROOT %compare.26 = pred[] compare(%select.17, %select.25), direction=GT +} + +ENTRY cluster { + %arg_tuple.1 = f32[1234567,8] parameter(0) + %iota.4 = s32[1234567,8] iota(), iota_dimension=0 + %sort.27 = (f32[1234567,8], s32[1234567,8]) sort(%arg_tuple.1, %iota.4), + dimensions={0}, is_stable=true, to_apply=%compare + %get-tuple-element.28 = f32[1234567,8] get-tuple-element(%sort.27), index=0 + %slice.29 = f32[5,8] slice(%get-tuple-element.28), slice={[0:5], [0:8]} + %get-tuple-element.30 = s32[1234567,8] get-tuple-element(%sort.27), index=1 + %slice.31 = s32[5,8] slice(%get-tuple-element.30), slice={[0:5], [0:8]} + ROOT %tuple.32 = (f32[5,8], s32[5,8]) tuple(%slice.29, %slice.31) +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + TopkRewriter rewriter([](const HloSortInstruction*, int64) { return true; }); + TF_ASSERT_OK_AND_ASSIGN(bool changed, rewriter.Run(module.get())); + TF_ASSERT_OK(HloDCE().Run(module.get()).status()); + EXPECT_TRUE(changed); + LOG(INFO) << module->entry_computation()->ToString(); + EXPECT_THAT( + module->entry_computation()->root_instruction(), + op::Tuple(op::Transpose(op::GetTupleElement( + op::CustomCall(op::Transpose(op::Parameter(0))), 0)), + op::Transpose(op::GetTupleElement( + op::CustomCall(op::Transpose(op::Parameter(0))), 1)))); + const HloInstruction* cc = module->entry_computation() + ->root_instruction() + ->operand(0) + ->operand(0) + ->operand(0); + EXPECT_THAT(cc->custom_call_target(), "TopK"); +} + +} // namespace +} // namespace xla From 1033515e9ede0996f0213da293d0a7cff6dc094c Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Thu, 7 May 2020 18:17:52 +0200 Subject: [PATCH 2150/2522] Add TF-TRT converter for Shape op. The unit test includes test cases when the TensorRT network is called with no input tensor. The helper routines are adjusted to handle this case. --- .../tf2tensorrt/convert/convert_nodes.cc | 35 ++++++++++ .../tf2tensorrt/convert/convert_nodes_test.cc | 69 +++++++++++++++++-- .../utils/trt_shape_optimization_profiles.cc | 17 +++-- 3 files changed, 107 insertions(+), 14 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index 369b339d01a..6674081011f 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -2410,6 +2410,40 @@ Status ConvertTranspose(OpConverterParams* params) { return Status::OK(); } +Status ConvertShape(OpConverterParams* params) { + const auto& inputs = params->inputs; + TF_RETURN_IF_ERROR( + CheckInputsWeights(*params, {{"input", TrtInputArg::kBoth}})); + if (params->use_implicit_batch) { + return errors::Unimplemented( + "Shape is only supported for explicit batch mode."); + } + if (HasStaticShape(inputs.at(0).GetTrtDims())) { + if (params->validation_only) return Status::OK(); + nvinfer1::Dims input_dims = inputs.at(0).GetTrtDims(); + nvinfer1::Dims output_dims{1, {input_dims.nbDims}}; + // Create a const node with the values of output_dims + TRT_ShapedWeights weight = params->weight_store->GetTempWeights( + nvinfer1::DataType::kINT32, output_dims); + int32* values_ptr = static_cast(weight.GetValues()); + std::copy(input_dims.d, input_dims.d + input_dims.nbDims, values_ptr); + auto output = params->converter->CreateConstantLayer(weight, output_dims); + params->outputs->push_back(TRT_TensorOrWeights(output)); + return Status::OK(); + } +#if IS_TRT_VERSION_GE(6, 0, 0, 0) + if (params->validation_only) return Status::OK(); + nvinfer1::IShapeLayer* shape_layer = + params->converter->network()->addShape(*inputs.at(0).tensor()); + TFTRT_RETURN_ERROR_IF_NULLPTR(shape_layer, params->node_def.name()); + params->outputs->push_back(TRT_TensorOrWeights(shape_layer->getOutput(0))); + return Status::OK(); +#else + return errors::Unavailable( + "Shape op conversion requires TensorRT 6 or above"); +#endif +} + Status ConvertReshape(OpConverterParams* params) { const auto& inputs = params->inputs; TF_RETURN_IF_ERROR( @@ -5958,6 +5992,7 @@ static void RegisterValidatableOpConverters( (*registration)[pool_op_type] = ConvertPool3D; } #endif + (*registration)["Shape"] = ConvertShape; (*registration)["Rsqrt"] = ConvertRsqrt; (*registration)["Slice"] = ConvertSlice; (*registration)["Softmax"] = ConvertSoftmax; diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc index 52d05ff8225..9ca1c8c4c9f 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc @@ -1781,7 +1781,8 @@ class ParameterizedOpConverterTestBase void BuildAndRun(const string& name, const std::vector>& expected_output_dims, const Status& expected_runtime_status, - const std::vector>>& matcher) { + const std::vector>>& matcher, + const std::vector& out_tf_types = {}) { TensorShape shape; const int n_output = expected_output_dims.size(); ASSERT_EQ(n_output, matcher.size()); @@ -1790,12 +1791,14 @@ class ParameterizedOpConverterTestBase TF_EXPECT_OK( TensorShapeUtils::MakeShape(expected_output_dims[i], &shape)); string out_name = (n_output == 1) ? name : StrCat(name, ":", i); - InputOutputData data{out_name, - ConstructTensor(shape.num_elements(), 0, tf_type)}; + DataType out_tf_type = + out_tf_types.size() > i ? out_tf_types[i] : tf_type; + InputOutputData data{ + out_name, ConstructTensor(shape.num_elements(), 0, out_tf_type)}; output_data.push_back(data); } - ASSERT_FALSE(input_data_.empty()); - const int batch_size = input_data_[0].tensor.shape().dim_size(0); + const int batch_size = + input_data_.empty() ? 1 : input_data_[0].tensor.shape().dim_size(0); Status stat = OpConverterTest::BuildAndRun(input_data_, &output_data, batch_size); ASSERT_EQ(expected_runtime_status.ok(), stat.ok()) @@ -1820,13 +1823,15 @@ class ParameterizedOpConverterTestBase const std::vector& expected_output_dims, const Status& expected_conversion_status, const Status& expected_runtime_status, - const Matcher>& matcher) { + const Matcher>& matcher, + const std::vector& out_tf_types = {}) { RunValidationAndConversion(node_def, expected_conversion_status, name.c_str(), expected_output_dims); if (expected_conversion_status.ok()) { BuildAndRun(name, std::vector>({expected_output_dims}), expected_runtime_status, - std::vector>>({matcher})); + std::vector>>({matcher}), + out_tf_types); } } @@ -2169,6 +2174,56 @@ TEST_F(OpConverterTest, ConvertReshape) { } } +TEST_P(OpConverterTest1, ConvertShape) { + // Get the NodeDef for Shape op. + Scope s = Scope::NewRootScope(); + auto input = ops::Placeholder(s.WithOpName("input"), tf_type); + auto shape = ops::Shape(s.WithOpName("my_shape"), input); + const NodeDef& node_def = shape.operation.node()->def(); + + Status conversion_status = + (trt_mode == TrtTestMode::kImplicitBatch) + ? errors::Unimplemented( + "Shape is only supported for explicit batch mode.") + : Status::OK(); + std::vector test_params = { + TestParamBase{{1, 2, 3}, {}, {3}, {}, conversion_status}, + // Add input as weight (we use non empty param ({1}) to trigger this). + TestParamBase{{1, 2, 3}, {}, {3}, {1}, conversion_status}, + }; + + auto input_is_weight = [](const TestParamBase p) { return !p.param.empty(); }; + for (auto p : test_params) { + SCOPED_TRACE(p); + Reset(); + // Number of elements of the input tensor. We leave it 0 in case we do + // not need to add an input tensor. This happens in explicit batch mode: the + // shape is known at conversion time and therefore the shape is added to the + // network as a constant layer. (In this case the single node network that + // we use for the unit test will have no actual input tensor when converted + // to a TensorRT network.) + int n_elements = 0; + // In explicit batch mode the shape is known at conversion time and + // therefore the shape is added to the network as a constant layer. As + // a result, the single node network that we use for this unit test will + // have no actual input tensor when converted to a TensorRT network. + if (input_is_weight(p) || trt_mode != TrtTestMode::kExplicitBatch) { + // Calculate the number of elements for adding input data. + n_elements = std::accumulate(p.input_dims.begin(), p.input_dims.end(), 1, + std::multiplies()); + } + std::vector input_val(n_elements, 1); + if (!input_is_weight(p)) { + AddTestTensor("input", p.input_dims, input_val); + } else { + AddTestWeights("input", p.input_dims, input_val, tf_type); + } + TestOpConverter("my_shape", node_def, p.expected_output_dims, p.status, + p.runtime_status, ElementsAreArray(p.input_dims), + {DT_INT32}); + } +} + // Helper function for testing MatMul and BatchMatMul // get_matmul corresponds to the function used to generate the node. It should // accept (DataType, transpose_a, transpose_b) as parameters. diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles.cc b/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles.cc index 70a0a9a7b65..2f31865751f 100644 --- a/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles.cc +++ b/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include +#include "absl/algorithm/container.h" #include "tensorflow/compiler/tf2tensorrt/convert/utils.h" #if GOOGLE_CUDA && GOOGLE_TENSORRT @@ -35,14 +36,16 @@ void TrtShapeOptimizationProfile::InitProfiles() { << "for each input (min=opt=max)."; } for (auto& shape_vec : input_shapes_) { - std::vector dimvec; - for (auto& shape : shape_vec) { - dimvec.push_back(TensorShapeToTrtDims(shape, false)); + if (!shape_vec.empty()) { + std::vector dimvec(shape_vec.size()); + absl::c_transform(shape_vec, dimvec.begin(), [](TensorShape shape) { + return TensorShapeToTrtDims(shape, false); + }); + // Set min=opt=max. + OptimizationProfileConfig profConfig{dimvec, dimvec, dimvec}; + profiles_.push_back(std::move(profConfig)); + VLOG(1) << "Created profile " << profiles_.back().DebugString(); } - // We set min=opt=max. - OptimizationProfileConfig profConfig{dimvec, dimvec, dimvec}; - profiles_.push_back(std::move(profConfig)); - VLOG(1) << "Created profile " << profiles_.back().DebugString(); } } From ccbd5ff0e6a77c4463d76926bef9c8bdaad8826a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 5 Aug 2020 02:01:47 -0700 Subject: [PATCH 2151/2522] Update GraphDef version to 484. PiperOrigin-RevId: 324978474 Change-Id: Ib5d72c7e1955a8964dedf75e1a6a6ee6c582cfef --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 7febc640348..acaa48f251c 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 483 // Updated: 2020/8/4 +#define TF_GRAPH_DEF_VERSION 484 // Updated: 2020/8/5 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From f576f29e3d14c36dfe79f014372e49acd75ce406 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 5 Aug 2020 02:01:48 -0700 Subject: [PATCH 2152/2522] compat: Update forward compatibility horizon to 2020-08-05 PiperOrigin-RevId: 324978475 Change-Id: I3875bc841ac816ae7123acc82621acfdcb0fce17 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index c40337f00be..a274743d124 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 4) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 5) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 926642ea7a1a8095cddf3b063c5d19d4478eaeda Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 5 Aug 2020 02:05:57 -0700 Subject: [PATCH 2153/2522] [XLA:CPU] Add a runtime function for F32 TopK and use TopkRewriter to target it This just delegates the hard work to std::partial_sort. PiperOrigin-RevId: 324979038 Change-Id: I16a7c4d840948f3744f4f920bac29d4e6d7333b3 --- tensorflow/compiler/tests/sort_ops_test.py | 59 +++++++------- tensorflow/compiler/xla/service/cpu/BUILD | 17 +++++ .../compiler/xla/service/cpu/cpu_compiler.cc | 4 + .../compiler/xla/service/cpu/cpu_runtime.cc | 1 + .../compiler/xla/service/cpu/cpu_runtime.h | 1 + .../compiler/xla/service/cpu/ir_emitter.cc | 38 ++++++++++ .../compiler/xla/service/cpu/ir_emitter.h | 1 + .../compiler/xla/service/cpu/runtime_topk.cc | 76 +++++++++++++++++++ .../compiler/xla/service/cpu/runtime_topk.h | 32 ++++++++ .../xla/service/cpu/simple_orc_jit.cc | 2 + .../compiler/xla/service/cpu/tests/BUILD | 16 ++++ .../xla/service/cpu/tests/cpu_topk_test.cc | 59 ++++++++++++++ 12 files changed, 273 insertions(+), 33 deletions(-) create mode 100644 tensorflow/compiler/xla/service/cpu/runtime_topk.cc create mode 100644 tensorflow/compiler/xla/service/cpu/runtime_topk.h create mode 100644 tensorflow/compiler/xla/service/cpu/tests/cpu_topk_test.cc diff --git a/tensorflow/compiler/tests/sort_ops_test.py b/tensorflow/compiler/tests/sort_ops_test.py index d50fdec7c63..838718aa1e3 100644 --- a/tensorflow/compiler/tests/sort_ops_test.py +++ b/tensorflow/compiler/tests/sort_ops_test.py @@ -129,42 +129,35 @@ class XlaSortOpTest(xla_test.XLATestCase): def testTopKZeros(self): """Tests that positive and negative zeros sort correctly.""" - # Only bfloat16 is implemented. - bfloat16 = dtypes.bfloat16.as_numpy_dtype - if bfloat16 not in self.numeric_types: - return - - with self.session() as sess: - p = array_ops.placeholder(dtypes.bfloat16) - with self.test_scope(): - topk = nn_ops.top_k(p, k=4) - results = sess.run( - topk, - {p: np.array([0., -0., 0., 3., -0., -4., 0., -0.], dtype=bfloat16)}) - self.assertAllEqual( - np.array([3., 0., 0., 0.], dtype=bfloat16), results[0]) - self.assertEqual(list([3, 0, 2, 6]), list(results[1])) + supported_types = set([dtypes.bfloat16.as_numpy_dtype, np.float32]) + for dtype in supported_types.intersection(self.numeric_types): + with self.session() as sess: + p = array_ops.placeholder(dtype) + with self.test_scope(): + topk = nn_ops.top_k(p, k=4) + results = sess.run( + topk, + {p: np.array([0., -0., 0., 3., -0., -4., 0., -0.], dtype=dtype)}) + self.assertAllEqual(np.array([3., 0., 0., 0.], dtype=dtype), results[0]) + self.assertEqual(list([3, 0, 2, 6]), list(results[1])) def testTopKInfinities(self): """Tests that positive and negative infinity sort correctly.""" - # Only bfloat16 is implemented. - bfloat16 = dtypes.bfloat16.as_numpy_dtype - if bfloat16 not in self.numeric_types: - return - - with self.session() as sess: - p = array_ops.placeholder(dtypes.bfloat16) - with self.test_scope(): - topk = nn_ops.top_k(p, k=6) - results = sess.run(topk, { - p: np.array( - [1, 2, float("inf"), -float("inf"), -1, -2], dtype=bfloat16) - }) - self.assertAllEqual( - np.array( - [float("inf"), 2.0, 1.0, -1.0, -2.0, -float("inf")], - dtype=bfloat16), results[0]) - self.assertEqual(list([2, 1, 0, 4, 5, 3]), list(results[1])) + supported_types = set([dtypes.bfloat16.as_numpy_dtype, np.float32]) + for dtype in supported_types.intersection(self.numeric_types): + with self.session() as sess: + p = array_ops.placeholder(dtype) + with self.test_scope(): + topk = nn_ops.top_k(p, k=6) + results = sess.run(topk, { + p: + np.array([1, 2, float("inf"), -float("inf"), -1, -2], + dtype=dtype) + }) + self.assertAllEqual( + np.array([float("inf"), 2.0, 1.0, -1.0, -2.0, -float("inf")], + dtype=dtype), results[0]) + self.assertEqual(list([2, 1, 0, 4, 5, 3]), list(results[1])) def testInTopK(self): supported_types = set([np.int32, np.int64]) diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 782d08296f0..6eaf43902fe 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -49,6 +49,7 @@ filegroup( "runtime_single_threaded_conv2d.cc", "runtime_single_threaded_fft.cc", "runtime_single_threaded_matmul.cc", + "runtime_topk.cc", ], visibility = [":friends"], ) @@ -64,6 +65,7 @@ filegroup( "runtime_single_threaded_conv2d.h", "runtime_single_threaded_fft.h", "runtime_single_threaded_matmul.h", + "runtime_topk.h", ], visibility = [":friends"], ) @@ -134,6 +136,7 @@ cc_library( "//tensorflow/compiler/xla/service:copy_insertion", "//tensorflow/compiler/xla/service:hlo_casting_utils", "//tensorflow/compiler/xla/service:dump", + "//tensorflow/compiler/xla/service:topk_rewriter", "//tensorflow/compiler/xla/service:map_inliner", "//tensorflow/compiler/xla/service:rng_bit_generator_expander", "//tensorflow/compiler/xla/service:tree_reduction_rewriter", @@ -230,6 +233,7 @@ cc_library( ":runtime_fft", ":runtime_fork_join", ":runtime_key_value_sort", + ":runtime_topk", ":runtime_matmul", ":runtime_matmul_mkl", ":runtime_single_threaded_conv2d", @@ -759,6 +763,19 @@ cc_library( ], ) +cc_library( + name = "runtime_topk", + srcs = ["runtime_topk.cc"], + hdrs = ["runtime_topk.h"], + copts = runtime_copts(), + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/core/platform:dynamic_annotations", + "//tensorflow/core/platform:macros", + "//tensorflow/core/platform:types", + ], +) + cc_library( name = "runtime_fork_join", srcs = ["runtime_fork_join.cc"], diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index 04d703fdd59..0826d7b8ce1 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -104,6 +104,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/slice_sinker.h" #include "tensorflow/compiler/xla/service/slow_operation_alarm.h" #include "tensorflow/compiler/xla/service/sort_simplifier.h" +#include "tensorflow/compiler/xla/service/topk_rewriter.h" #include "tensorflow/compiler/xla/service/transpose_folding.h" #include "tensorflow/compiler/xla/service/tree_reduction_rewriter.h" #include "tensorflow/compiler/xla/service/triangular_solve_expander.h" @@ -320,6 +321,9 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn( pass.AddPass(); pass.AddPass(); } + pipeline.AddPass([](const HloSortInstruction* sort, int64) { + return sort->operand(0)->shape().element_type() == F32; + }); pipeline.AddPass(); pipeline.AddPass( [&](const HloInstruction& dot, diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc index 2231ecfa1e8..5bee6049a5e 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc @@ -117,6 +117,7 @@ extern const char* const kParallelForkJoinSymbolName = "__xla_cpu_runtime_ParallelForkJoin"; extern const char* const kKeyValueSortSymbolName = "__xla_cpu_runtime_KeyValueSort"; +extern const char* const kTopKF32SymbolName = "__xla_cpu_runtime_TopKF32"; extern const char* const kTracingStartSymbolName = "__xla_cpu_runtime_TracingStart"; extern const char* const kTracingEndSymbolName = "__xla_cpu_runtime_TracingEnd"; diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h index ee75b97e4dc..eb24e0bc334 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h @@ -72,6 +72,7 @@ extern const char* const kAcquireOutfeedBufferForPopulationSymbolName; extern const char* const kReleaseOutfeedBufferAfterPopulationSymbolName; extern const char* const kParallelForkJoinSymbolName; extern const char* const kKeyValueSortSymbolName; +extern const char* const kTopKF32SymbolName; extern const char* const kAllReduceSymbolName; extern const char* const kCollectivePermuteSymbolName; extern const char* const kReplicaIdSymbolName; diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 278e6479e48..2688a7898af 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -2387,6 +2387,41 @@ Status IrEmitter::HandlePadToStatic(HloInstruction* hlo) { return Status::OK(); } +Status IrEmitter::HandleTopK(HloInstruction* hlo) { + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(hlo)); + const HloInstruction* input = hlo->operand(0); + int64 k = hlo->shape().tuple_shapes(0).dimensions(1); + TF_RET_CHECK(input->shape().element_type() == F32); + TF_RET_CHECK(LayoutUtil::IsMonotonicWithDim0Major( + hlo->shape().tuple_shapes(0).layout())); + TF_RET_CHECK(LayoutUtil::IsMonotonicWithDim0Major( + hlo->shape().tuple_shapes(1).layout())); + TF_RET_CHECK( + LayoutUtil::IsMonotonicWithDim0Major(hlo->operand(0)->shape().layout())); + + TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice values_slice, + assignment_.GetUniqueSlice(hlo->operand(0), {})); + TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice out_values_slice, + assignment_.GetUniqueSlice(hlo, {0})); + TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice out_indices_slice, + assignment_.GetUniqueSlice(hlo, {1})); + llvm::Value* values_ptr = + EmitBufferPointer(values_slice, hlo->operand(0)->shape()); + llvm::Value* out_values_ptr = + EmitBufferPointer(out_values_slice, hlo->shape().tuple_shapes(0)); + llvm::Value* out_indices_ptr = + EmitBufferPointer(out_indices_slice, hlo->shape().tuple_shapes(1)); + EmitCallToFunc(runtime::kTopKF32SymbolName, + {b_.getInt64(input->shape().dimensions(0)), + b_.getInt64(input->shape().dimensions(1)), b_.getInt64(k), + values_ptr, out_values_ptr, out_indices_ptr}, + b_.getVoidTy()); + + llvm_ir::EmitTuple(GetIrArrayFor(hlo), {out_values_ptr, out_indices_ptr}, + &b_); + return Status::OK(); +} + Status IrEmitter::HandleCustomCall(HloInstruction* custom_call) { if (custom_call->custom_call_target() == "PadToStatic") { return HandlePadToStatic(custom_call); @@ -2394,6 +2429,9 @@ Status IrEmitter::HandleCustomCall(HloInstruction* custom_call) { if (custom_call->custom_call_target() == "SliceToDynamic") { return HandleSliceToDynamic(custom_call); } + if (custom_call->custom_call_target() == "TopK") { + return HandleTopK(custom_call); + } absl::Span operands(custom_call->operands()); llvm::Type* i8_ptr_type = b_.getInt8PtrTy(); llvm::AllocaInst* operands_alloca = diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h index 3955deefbea..f136e3470e5 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h @@ -190,6 +190,7 @@ class IrEmitter : public DfsHloVisitorWithDefault, private: Status HandleSliceToDynamic(HloInstruction* hlo); Status HandlePadToStatic(HloInstruction* hlo); + Status HandleTopK(HloInstruction* hlo); Status HandleAllReduceSingleReplica(HloInstruction* crs); Status HandleAllReduceMultipleReplica(HloInstruction* crs); diff --git a/tensorflow/compiler/xla/service/cpu/runtime_topk.cc b/tensorflow/compiler/xla/service/cpu/runtime_topk.cc new file mode 100644 index 00000000000..5174a3329fb --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/runtime_topk.cc @@ -0,0 +1,76 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/cpu/runtime_topk.h" + +#include +#include +#include +#include + +#include "tensorflow/core/platform/dynamic_annotations.h" +#include "tensorflow/core/platform/macros.h" + +template +static void TopK(tensorflow::int64 batch_size, tensorflow::int64 input_size, + tensorflow::int64 k, const T* values, T* out_values, + tensorflow::int32* out_indices) { + // 'values' is managed by the JIT code, so msan can't tell they are + // initialized. + TF_ANNOTATE_MEMORY_IS_INITIALIZED(values, + input_size * batch_size * sizeof(T)); + + std::vector temp_indices(input_size); + for (tensorflow::int64 batch = 0; batch != batch_size; ++batch) { + std::iota(temp_indices.begin(), temp_indices.end(), 0); + + const T* values_batch = values + batch * input_size; + + auto convert_to_int = [](T value) { + tensorflow::uint32 x; + std::memcpy(&x, &value, sizeof(x)); + return static_cast(x) < 0 + ? std::numeric_limits::max() - x + : x; + }; + + auto kth_element = temp_indices.begin() + k; + std::partial_sort(temp_indices.begin(), kth_element, temp_indices.end(), + [&](size_t i1, size_t i2) { + // Do the comparison in integers to enforce a total + // order of -NaN < -Inf < -0 < +0 < +Inf < +NaN. + tensorflow::int32 v1 = convert_to_int(values_batch[i1]); + tensorflow::int32 v2 = convert_to_int(values_batch[i2]); + if (v1 == v2) { + return i1 < i2; // Stabilize sorting. + } + return v1 > v2; + }); + + T* out_values_batch = out_values + batch * k; + tensorflow::int32* out_indices_batch = out_indices + batch * k; + std::copy(temp_indices.begin(), kth_element, out_indices_batch); + for (tensorflow::int64 i = 0; i < k; i++) { + out_values_batch[i] = values_batch[temp_indices[i]]; + } + } +} + +TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_TopKF32( + tensorflow::int64 batch_size, tensorflow::int64 input_size, + tensorflow::int64 k, const float* values, float* out_values, + tensorflow::int32* out_indices) { + TopK(batch_size, input_size, k, values, out_values, out_indices); +} diff --git a/tensorflow/compiler/xla/service/cpu/runtime_topk.h b/tensorflow/compiler/xla/service/cpu/runtime_topk.h new file mode 100644 index 00000000000..de69c0603e3 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/runtime_topk.h @@ -0,0 +1,32 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_TOPK_H +#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_TOPK_H + +#include "tensorflow/core/platform/types.h" + +extern "C" { + +// Calculates `batch_size` topk operations with `input_size` inputs each. The +// outputs are written to `out_values` and `out_indices`. +extern void __xla_cpu_runtime_TopKF32(tensorflow::int64 batch_size, + tensorflow::int64 input_size, + tensorflow::int64 k, const float* values, + float* out_values, + tensorflow::int32* out_indices); +} + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_TOPK_H diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc index 631c6985b03..28508bde4cd 100644 --- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc +++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc @@ -44,6 +44,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h" #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h" #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h" +#include "tensorflow/compiler/xla/service/cpu/runtime_topk.h" #include "tensorflow/compiler/xla/service/cpu/windows_compatibility.h" #include "tensorflow/compiler/xla/service/custom_call_target_registry.h" #include "tensorflow/compiler/xla/types.h" @@ -270,6 +271,7 @@ bool RegisterKnownJITSymbols() { REGISTER_CPU_RUNTIME_SYMBOL(ReleaseInfeedBufferAfterDequeue); REGISTER_CPU_RUNTIME_SYMBOL(ReleaseOutfeedBufferAfterPopulation); REGISTER_CPU_RUNTIME_SYMBOL(KeyValueSort); + REGISTER_CPU_RUNTIME_SYMBOL(TopKF32); REGISTER_CPU_RUNTIME_SYMBOL(TracingStart); REGISTER_CPU_RUNTIME_SYMBOL(TracingEnd); diff --git a/tensorflow/compiler/xla/service/cpu/tests/BUILD b/tensorflow/compiler/xla/service/cpu/tests/BUILD index d7c50dce3ca..527071d5f31 100644 --- a/tensorflow/compiler/xla/service/cpu/tests/BUILD +++ b/tensorflow/compiler/xla/service/cpu/tests/BUILD @@ -253,6 +253,22 @@ tf_cc_test( ], ) +tf_cc_test( + name = "cpu_topk_test", + srcs = ["cpu_topk_test.cc"], + deps = [ + ":cpu_codegen_test", + "//tensorflow/compiler/xla/client:xla_builder", + "//tensorflow/compiler/xla/client/lib:sorting", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service/cpu:cpu_compiler", + "//tensorflow/compiler/xla/service/cpu:test_header_helper", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + tf_cc_test( name = "cpu_vectorization_test", srcs = ["cpu_vectorization_test.cc"], diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_topk_test.cc b/tensorflow/compiler/xla/service/cpu/tests/cpu_topk_test.cc new file mode 100644 index 00000000000..a4c74cfb8a2 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_topk_test.cc @@ -0,0 +1,59 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/compiler/xla/client/lib/sorting.h" +#include "tensorflow/compiler/xla/client/xla_builder.h" +#include "tensorflow/compiler/xla/service/cpu/cpu_compiler.h" +#include "tensorflow/compiler/xla/service/cpu/test_target_triple_helper.h" +#include "tensorflow/compiler/xla/service/cpu/tests/cpu_codegen_test.h" + +namespace xla { +namespace cpu { +namespace { + +using CpuTopKTest = CpuCodegenTest; + +TEST_F(CpuTopKTest, CallRuntime) { + XlaBuilder builder(TestName()); + XlaOp input = + Parameter(&builder, 0, ShapeUtil::MakeShape(F32, {5, 100}), "input"); + TopK(input, 10); + TF_ASSERT_OK_AND_ASSIGN(XlaComputation xla_computation, builder.Build()); + + TF_ASSERT_OK_AND_ASSIGN(ProgramShape program_shape, + xla_computation.GetProgramShape()); + HloModuleConfig config(program_shape); + TF_ASSERT_OK_AND_ASSIGN( + auto module, HloModule::CreateFromProto(xla_computation.proto(), config)); + + constexpr char filecheck_pattern[] = R"( + CHECK: call void @__xla_cpu_runtime_TopKF32(i64 5, i64 100, i64 10, + )"; + + CpuAotCompilationOptions options{ + /*triple=*/kTargetTripleForHost, /*cpu_name=*/kTargetCpuForHost, + /*features=*/"", + /*entry_point_name=*/"entry", + /*relocation_model=*/CpuAotCompilationOptions::RelocationModel::Static}; + + CompileAheadOfTimeAndVerifyIr(std::move(module), options, filecheck_pattern, + /*match_optimized_ir=*/true); +} + +} // namespace +} // namespace cpu +} // namespace xla From 2d2523c8c71f1c4aec8ca25fc7ece5a7f2ede9a6 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 5 Aug 2020 02:37:26 -0700 Subject: [PATCH 2154/2522] [XLA] Rewrite 1d sort to TopK PiperOrigin-RevId: 324982391 Change-Id: Ia324c70137647117154ca21db3fc640c3e29606f --- .../compiler/xla/service/cpu/ir_emitter.cc | 9 +-- .../xla/service/cpu/tests/cpu_topk_test.cc | 29 +++++++- .../compiler/xla/service/topk_rewriter.cc | 27 +++++--- .../xla/service/topk_rewriter_test.cc | 69 ++++++++++--------- 4 files changed, 88 insertions(+), 46 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 2688a7898af..72f4d5369c8 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -2390,7 +2390,8 @@ Status IrEmitter::HandlePadToStatic(HloInstruction* hlo) { Status IrEmitter::HandleTopK(HloInstruction* hlo) { TF_RETURN_IF_ERROR(EmitTargetAddressForOp(hlo)); const HloInstruction* input = hlo->operand(0); - int64 k = hlo->shape().tuple_shapes(0).dimensions(1); + const int64 k = hlo->shape().tuple_shapes(0).dimensions().back(); + const bool has_batch = hlo->shape().tuple_shapes(0).dimensions_size() == 2; TF_RET_CHECK(input->shape().element_type() == F32); TF_RET_CHECK(LayoutUtil::IsMonotonicWithDim0Major( hlo->shape().tuple_shapes(0).layout())); @@ -2412,9 +2413,9 @@ Status IrEmitter::HandleTopK(HloInstruction* hlo) { llvm::Value* out_indices_ptr = EmitBufferPointer(out_indices_slice, hlo->shape().tuple_shapes(1)); EmitCallToFunc(runtime::kTopKF32SymbolName, - {b_.getInt64(input->shape().dimensions(0)), - b_.getInt64(input->shape().dimensions(1)), b_.getInt64(k), - values_ptr, out_values_ptr, out_indices_ptr}, + {b_.getInt64(has_batch ? input->shape().dimensions(0) : 1), + b_.getInt64(input->shape().dimensions().back()), + b_.getInt64(k), values_ptr, out_values_ptr, out_indices_ptr}, b_.getVoidTy()); llvm_ir::EmitTuple(GetIrArrayFor(hlo), {out_values_ptr, out_indices_ptr}, diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_topk_test.cc b/tensorflow/compiler/xla/service/cpu/tests/cpu_topk_test.cc index a4c74cfb8a2..b7647fb4b16 100644 --- a/tensorflow/compiler/xla/service/cpu/tests/cpu_topk_test.cc +++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_topk_test.cc @@ -27,7 +27,34 @@ namespace { using CpuTopKTest = CpuCodegenTest; -TEST_F(CpuTopKTest, CallRuntime) { +TEST_F(CpuTopKTest, CallRuntimeUnbatched) { + XlaBuilder builder(TestName()); + XlaOp input = + Parameter(&builder, 0, ShapeUtil::MakeShape(F32, {100}), "input"); + TopK(input, 10); + TF_ASSERT_OK_AND_ASSIGN(XlaComputation xla_computation, builder.Build()); + + TF_ASSERT_OK_AND_ASSIGN(ProgramShape program_shape, + xla_computation.GetProgramShape()); + HloModuleConfig config(program_shape); + TF_ASSERT_OK_AND_ASSIGN( + auto module, HloModule::CreateFromProto(xla_computation.proto(), config)); + + constexpr char filecheck_pattern[] = R"( + CHECK: call void @__xla_cpu_runtime_TopKF32(i64 1, i64 100, i64 10, + )"; + + CpuAotCompilationOptions options{ + /*triple=*/kTargetTripleForHost, /*cpu_name=*/kTargetCpuForHost, + /*features=*/"", + /*entry_point_name=*/"entry", + /*relocation_model=*/CpuAotCompilationOptions::RelocationModel::Static}; + + CompileAheadOfTimeAndVerifyIr(std::move(module), options, filecheck_pattern, + /*match_optimized_ir=*/true); +} + +TEST_F(CpuTopKTest, CallRuntimeBatched) { XlaBuilder builder(TestName()); XlaOp input = Parameter(&builder, 0, ShapeUtil::MakeShape(F32, {5, 100}), "input"); diff --git a/tensorflow/compiler/xla/service/topk_rewriter.cc b/tensorflow/compiler/xla/service/topk_rewriter.cc index ae843760a8d..000b1e94ece 100644 --- a/tensorflow/compiler/xla/service/topk_rewriter.cc +++ b/tensorflow/compiler/xla/service/topk_rewriter.cc @@ -75,11 +75,11 @@ StatusOr TopkRewriter::Run(HloModule* module) { HloIotaInstruction* iota = DynCast(sort->mutable_operand(1)); const PrimitiveType element_type = data->shape().element_type(); - if (data->shape().rank() != 2 || + if ((data->shape().rank() != 1 && data->shape().rank() != 2) || (element_type != F32 && element_type != BF16)) { continue; } - if (iota == nullptr || iota->shape().rank() != 2 || + if (iota == nullptr || iota->shape().rank() != data->shape().rank() || iota->shape().element_type() != S32 || iota->opcode() != HloOpcode::kIota || iota->iota_dimension() != sort->sort_dimension()) { @@ -90,6 +90,7 @@ StatusOr TopkRewriter::Run(HloModule* module) { } const int64 sort_dim = sort->sort_dimension(); const int64 batch_dim = sort_dim == 1 ? 0 : 1; + const bool has_batch = data->shape().rank() == 2; bool supported = true; absl::optional k; @@ -113,8 +114,8 @@ StatusOr TopkRewriter::Run(HloModule* module) { supported = false; break; } - if (slice->slice_limits(batch_dim) != - slice->operand(0)->shape().dimensions(batch_dim)) { + if (has_batch && slice->slice_limits(batch_dim) != + slice->operand(0)->shape().dimensions(batch_dim)) { // Slicing along the batch dimension isn't supported. supported = false; break; @@ -136,18 +137,24 @@ StatusOr TopkRewriter::Run(HloModule* module) { continue; } - const int64 batch_size = sort->operand(0)->shape().dimensions(batch_dim); + const int64 batch_size = + has_batch ? sort->operand(0)->shape().dimensions(batch_dim) : 1; const int64 input_size = sort->operand(0)->shape().dimensions(sort_dim); HloInstruction* input = sort->mutable_operand(0); - if (sort_dim == 0) { + if (has_batch && sort_dim == 0) { input = comp->AddInstruction(HloInstruction::CreateTranspose( ShapeUtil::MakeShape(element_type, {batch_size, input_size}), input, {1, 0})); } - Shape topk_shape = ShapeUtil::MakeTupleShape( - {ShapeUtil::MakeShape(element_type, {batch_size, k.value()}), - ShapeUtil::MakeShape(S32, {batch_size, k.value()})}); + Shape topk_shape = + has_batch ? ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShape(element_type, + {batch_size, k.value()}), + ShapeUtil::MakeShape(S32, {batch_size, k.value()})}) + : ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShape(element_type, {k.value()}), + ShapeUtil::MakeShape(S32, {k.value()})}); HloInstruction* topk = comp->AddInstruction( HloInstruction::CreateCustomCall(topk_shape, {input}, "TopK")); HloInstruction* value_gte = @@ -157,7 +164,7 @@ StatusOr TopkRewriter::Run(HloModule* module) { comp->AddInstruction(HloInstruction::CreateGetTupleElement( topk->shape().tuple_shapes(1), topk, 1)); - if (sort_dim == 0) { + if (has_batch && sort_dim == 0) { value_gte = comp->AddInstruction(HloInstruction::CreateTranspose( ShapeUtil::MakeShape(element_type, {k.value(), batch_size}), value_gte, {1, 0})); diff --git a/tensorflow/compiler/xla/service/topk_rewriter_test.cc b/tensorflow/compiler/xla/service/topk_rewriter_test.cc index e440da5b163..ec5b34b1c0a 100644 --- a/tensorflow/compiler/xla/service/topk_rewriter_test.cc +++ b/tensorflow/compiler/xla/service/topk_rewriter_test.cc @@ -31,10 +31,8 @@ namespace { using TopkRewriterTest = HloTestBase; -TEST_F(TopkRewriterTest, Rewrite) { - const char* const hlo_string = R"( -HloModule module - +std::string getComparator() { + return R"( %compare { %p.1.lhs.8 = s32[] parameter(2) %p.1.rhs.9 = s32[] parameter(3) @@ -59,8 +57,13 @@ HloModule module %select.25 = s32[] select(%compare.24, %bitcast-convert.22, %bitcast-convert.19) ROOT %compare.26 = pred[] compare(%select.17, %select.25), direction=GT +})"; } +TEST_F(TopkRewriterTest, Rewrite) { + const std::string hlo_string = R"( +HloModule module +)" + getComparator() + R"( ENTRY cluster { %arg_tuple.1 = f32[8,1234567] parameter(0) %iota.4 = s32[8,1234567] iota(), iota_dimension=1 @@ -87,36 +90,40 @@ ENTRY cluster { EXPECT_THAT(cc->custom_call_target(), "TopK"); } -TEST_F(TopkRewriterTest, RewriteTranspose) { - const char* const hlo_string = R"( +TEST_F(TopkRewriterTest, RewriteUnbatched) { + const std::string hlo_string = R"( HloModule module - -%compare { - %p.1.lhs.8 = s32[] parameter(2) - %p.1.rhs.9 = s32[] parameter(3) - %p.0.lhs.6 = f32[] parameter(0) - %bitcast-convert.11 = s32[] bitcast-convert(%p.0.lhs.6) - %constant.15 = s32[] constant(0) - %compare.16 = pred[] compare(%bitcast-convert.11, %constant.15), direction=LT - %constant.10 = u32[] constant(2147483647) - %bitcast-convert.12 = u32[] bitcast-convert(%p.0.lhs.6) - %subtract.13 = u32[] subtract(%constant.10, %bitcast-convert.12) - %bitcast-convert.14 = s32[] bitcast-convert(%subtract.13) - %select.17 = s32[] select(%compare.16, %bitcast-convert.14, - %bitcast-convert.11) - %p.0.rhs.7 = f32[] parameter(1) - %bitcast-convert.19 = s32[] bitcast-convert(%p.0.rhs.7) - %constant.23 = s32[] constant(0) - %compare.24 = pred[] compare(%bitcast-convert.19, %constant.23), direction=LT - %constant.18 = u32[] constant(2147483647) - %bitcast-convert.20 = u32[] bitcast-convert(%p.0.rhs.7) - %subtract.21 = u32[] subtract(%constant.18, %bitcast-convert.20) - %bitcast-convert.22 = s32[] bitcast-convert(%subtract.21) - %select.25 = s32[] select(%compare.24, %bitcast-convert.22, - %bitcast-convert.19) - ROOT %compare.26 = pred[] compare(%select.17, %select.25), direction=GT +)" + getComparator() + R"( +ENTRY cluster { + %arg_tuple.1 = f32[1234567] parameter(0) + %iota.4 = s32[1234567] iota(), iota_dimension=0 + %sort.27 = (f32[1234567], s32[1234567]) sort(%arg_tuple.1, %iota.4), + dimensions={0}, is_stable=true, to_apply=%compare + %get-tuple-element.28 = f32[1234567] get-tuple-element(%sort.27), index=0 + %slice.29 = f32[5] slice(%get-tuple-element.28), slice={[0:5]} + %get-tuple-element.30 = s32[1234567] get-tuple-element(%sort.27), index=1 + %slice.31 = s32[5] slice(%get-tuple-element.30), slice={[0:5]} + ROOT %tuple.32 = (f32[5], s32[5]) tuple(%slice.29, %slice.31) +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + TopkRewriter rewriter([](const HloSortInstruction*, int64) { return true; }); + TF_ASSERT_OK_AND_ASSIGN(bool changed, rewriter.Run(module.get())); + TF_ASSERT_OK(HloDCE().Run(module.get()).status()); + EXPECT_TRUE(changed); + EXPECT_THAT( + module->entry_computation()->root_instruction(), + op::Tuple(op::GetTupleElement(op::CustomCall(op::Parameter(0)), 0), + op::GetTupleElement(op::CustomCall(op::Parameter(0)), 1))); + const HloInstruction* cc = + module->entry_computation()->root_instruction()->operand(0)->operand(0); + EXPECT_THAT(cc->custom_call_target(), "TopK"); } +TEST_F(TopkRewriterTest, RewriteTranspose) { + const std::string hlo_string = R"( +HloModule module +)" + getComparator() + R"( ENTRY cluster { %arg_tuple.1 = f32[1234567,8] parameter(0) %iota.4 = s32[1234567,8] iota(), iota_dimension=0 From a833385e49b0c79d5f73cd505de217bab85a8afc Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Wed, 5 Aug 2020 03:23:12 -0700 Subject: [PATCH 2155/2522] Fix flex delegate selective build in OSS PiperOrigin-RevId: 324987504 Change-Id: I833b4f62f10f16f072cd776225379cded9dea775 --- tensorflow/lite/delegates/flex/build_def.bzl | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/lite/delegates/flex/build_def.bzl b/tensorflow/lite/delegates/flex/build_def.bzl index b4965d1076e..9b9f1b2c4cb 100644 --- a/tensorflow/lite/delegates/flex/build_def.bzl +++ b/tensorflow/lite/delegates/flex/build_def.bzl @@ -136,6 +136,7 @@ def tflite_flex_cc_library( "@com_google_absl//absl/types:optional", "@gemmlowp", "//tensorflow/core:protos_all_cc", + "@icu//:common", "//tensorflow/core:portable_tensorflow_lib_lite", "//tensorflow/core/platform:strong_hash", ], From 3f07f84a0e8e09cbd7261fb2eb2b51a31f20d51a Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Wed, 5 Aug 2020 03:44:37 -0700 Subject: [PATCH 2156/2522] Add bufferization pass that transforms hlo and some standard ops. This is good enough to do a tanh operation. PiperOrigin-RevId: 324989254 Change-Id: Ief17856bd17dc9d21feba4ed909d7499a54bdc9d --- .../mlir/tools/kernel_gen/transforms/BUILD | 23 +++- .../tools/kernel_gen/transforms/bufferize.cc | 110 ++++++++++++++++++ .../kernel_gen/transforms/bufferize_pass.cc | 107 +++++++++++++++++ .../mlir/tools/kernel_gen/transforms/passes.h | 6 +- .../tools/kernel_gen/transforms/passes.td | 5 + .../tools/kernel_gen/transforms/rewriters.h | 11 ++ .../transforms/shape_to_descriptors_pass.cc | 1 + 7 files changed, 260 insertions(+), 3 deletions(-) create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize.cc create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD index 0d346da9956..66a378d5990 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD @@ -20,6 +20,20 @@ cc_library( ], ) +cc_library( + name = "bufferize", + srcs = ["bufferize.cc"], + hdrs = ["rewriters.h"], + deps = [ + "@llvm-project//llvm:Support", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:Pass", + "@llvm-project//mlir:StandardOps", + "@llvm-project//mlir:Support", + "@llvm-project//mlir:Transforms", + ], +) + cc_library( name = "embed_tf_framework", srcs = ["embed_tf_framework.cc"], @@ -36,7 +50,7 @@ cc_library( ) gentbl( - name = "tf_framework_passes_inc_gen", + name = "kernel_gen_passes_inc_gen", tbl_outs = [("-gen-pass-decls -name KernelGen", "kernel_gen_passes.h.inc")], tblgen = "@llvm-project//mlir:mlir-tblgen", td_file = "passes.td", @@ -46,15 +60,20 @@ gentbl( cc_library( name = "passes", srcs = [ + "bufferize_pass.cc", "embed_tf_framework_pass.cc", "shape_to_descriptors_pass.cc", "tf_framework_legalize_to_llvm_pass.cc", ], hdrs = ["passes.h"], deps = [ + ":bufferize", ":embed_tf_framework", + ":kernel_gen_passes_inc_gen", ":tf_framework_legalize_to_llvm", - ":tf_framework_passes_inc_gen", + "//tensorflow/compiler/mlir/hlo", + "//tensorflow/compiler/mlir/hlo:hlo_legalize_to_lhlo", + "//tensorflow/compiler/mlir/hlo:lhlo", "//tensorflow/compiler/mlir/tools/kernel_gen/ir:tf_framework_ops", "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize.cc new file mode 100644 index 00000000000..3d5c820e6dd --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize.cc @@ -0,0 +1,110 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This file implements logic for translating mixed IR to buffer form. + +#include +#include + +#include "llvm/ADT/STLExtras.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/IR/Function.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "mlir/Transforms/BufferPlacement.h" // from @llvm-project +#include "mlir/Transforms/DialectConversion.h" // from @llvm-project + +namespace mlir { +namespace kernel_gen { +namespace transforms { + +namespace { + +class TensorFromElementsOpConverter + : public BufferAssignmentOpConversionPattern { + public: + using BufferAssignmentOpConversionPattern< + TensorFromElementsOp>::BufferAssignmentOpConversionPattern; + + LogicalResult matchAndRewrite( + TensorFromElementsOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + Location loc = op.getLoc(); + ShapedType result_type = op.getType().cast(); + int number_of_elements = op.elements().size(); + MemRefType memref_type = + MemRefType::get({number_of_elements}, result_type.getElementType()); + Value result = rewriter.create(loc, memref_type); + for (auto operand : llvm::enumerate(operands)) { + Value index = rewriter.create(loc, operand.index()); + rewriter.create(loc, operand.value(), result, index); + } + rewriter.replaceOp(op, {result}); + return success(); + } +}; + +class TensorLoadOpConversion + : public BufferAssignmentOpConversionPattern { + public: + using BufferAssignmentOpConversionPattern< + TensorLoadOp>::BufferAssignmentOpConversionPattern; + + LogicalResult matchAndRewrite( + TensorLoadOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + TensorLoadOpAdaptor adaptor(operands); + rewriter.replaceOp(op, {adaptor.memref()}); + return success(); + } +}; + +class ExtractElementOpConversion + : public BufferAssignmentOpConversionPattern { + public: + using BufferAssignmentOpConversionPattern< + ExtractElementOp>::BufferAssignmentOpConversionPattern; + + LogicalResult matchAndRewrite( + ExtractElementOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const final { + ExtractElementOpAdaptor adaptor(operands); + + if (!adaptor.aggregate().getType().isa()) { + return failure(); + } + + rewriter.replaceOpWithNewOp(op, adaptor.aggregate(), + adaptor.indices()); + return success(); + } +}; + +} // namespace + +void populateStandardBufferizePattern(MLIRContext *context, + BufferAssignmentPlacer *bufferAssignment, + TypeConverter *converter, + OwningRewritePatternList *patterns) { + patterns->insert(context, bufferAssignment, + converter); +} + +} // namespace transforms +} // namespace kernel_gen +} // namespace mlir diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc new file mode 100644 index 00000000000..ebbc92f64c7 --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc @@ -0,0 +1,107 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This file implements logic for translating mixed IR to buffer form. +// Currently it supports MHLO and some operations from the Standard dialect. + +#include + +#include "mlir/Dialect/SCF/SCF.h" // from @llvm-project +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/IR/Function.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/PatternMatch.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "mlir/IR/Visitors.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "mlir/Transforms/BufferPlacement.h" // from @llvm-project +#include "mlir/Transforms/DialectConversion.h" // from @llvm-project +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/passes.h" +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h" +#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h" + +namespace mlir { +namespace kernel_gen { +namespace transforms { +namespace { + +#define GEN_PASS_CLASSES +#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/kernel_gen_passes.h.inc" + +struct BufferizePass : public BufferizePassBase { + public: + void runOnOperation() override { + OwningRewritePatternList patterns; + auto& context = getContext(); + ConversionTarget target(context); + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalDialect(); + target.addLegalOp(); + target.addLegalOp(); + target.addIllegalDialect(); + target.addIllegalOp(); + target.addIllegalOp(); + target.addIllegalOp(); + + BufferAssignmentTypeConverter converter; + auto typesAreLegal = [&converter](Operation* op) { + return converter.isLegal(op->getOperandTypes()) && + converter.isLegal(op->getResultTypes()); + }; + target.addDynamicallyLegalOp([&](FuncOp op) { + auto inputs = op.getType().getInputs(); + auto results = op.getType().getResults(); + return converter.isLegal(inputs) && converter.isLegal(results) && + converter.isLegal(&op.getBody()); + }); + target.addDynamicallyLegalOp(typesAreLegal); + target.addDynamicallyLegalOp(typesAreLegal); + + auto module = getOperation(); + WalkResult result = module.walk([&](FuncOp func) -> WalkResult { + BufferAssignmentPlacer bufferAssignment(func); + OwningRewritePatternList patterns; + mhlo::populateHLOToLHLOConversionPattern( + func.getContext(), &bufferAssignment, &converter, &patterns); + populateWithBufferAssignmentOpConversionPatterns< + ReturnOp, ReturnOp, lmhlo::CopyOp, + /*allowMemrefFunctionResults=*/true>(&context, &bufferAssignment, + &converter, &patterns); + populateStandardBufferizePattern(func.getContext(), &bufferAssignment, + &converter, &patterns); + + return applyFullConversion(func, target, patterns); + }); + module.dump(); + if (result.wasInterrupted()) { + signalPassFailure(); + } + } +}; + +} // namespace + +std::unique_ptr > CreateBufferizePass() { + return std::make_unique(); +} + +} // namespace transforms +} // namespace kernel_gen +} // namespace mlir diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h index 13f367c9fe4..e65d8402fb2 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h @@ -41,7 +41,11 @@ namespace transforms { // Pass to tranform shape computations in shape dialect to standard and scf // using memref descriptors. -std::unique_ptr CreateShapeToDescriptorsPass(); +std::unique_ptr > CreateShapeToDescriptorsPass(); + +// Pass to tranform computations on values to their corresponding parts on +// buffers. +std::unique_ptr > CreateBufferizePass(); } // namespace transforms diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td index 61720674926..6a0e328f212 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td @@ -34,4 +34,9 @@ def ShapeToDescriptorsPass : Pass<"test-shape-to-descriptors", "ModuleOp"> { let constructor = "transforms::CreateShapeToDescriptorsPass()"; } +def BufferizePass : Pass<"test-bufferize", "ModuleOp"> { + let summary = "Pass to transform operations on values to buffer based ones"; + let constructor = "transforms::CreateBufferizePass()"; +} + #endif // TF_FRAMEWORK_PASSES diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h index 257e84b4a21..4efc1e95bc8 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h @@ -20,6 +20,7 @@ limitations under the License. namespace mlir { +class BufferAssignmentPlacer; class LLVMTypeConverter; class MLIRContext; class OwningRewritePatternList; @@ -37,6 +38,16 @@ void PopulateEmbedTFFrameworkConversionPatterns( MLIRContext *context, OwningRewritePatternList *patterns); } // namespace tf_framework + +namespace transforms { + +/// Collects a set of patterns that bufferize operations from the standard +/// dialect. +void populateStandardBufferizePattern(MLIRContext *context, + BufferAssignmentPlacer *bufferAssignment, + TypeConverter *converter, + OwningRewritePatternList *patterns); +} // namespace transforms } // namespace kernel_gen } // namespace mlir diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/shape_to_descriptors_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/shape_to_descriptors_pass.cc index 9c1b434b9b2..28d3647bb63 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/shape_to_descriptors_pass.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/shape_to_descriptors_pass.cc @@ -26,6 +26,7 @@ limitations under the License. #include "mlir/IR/PatternMatch.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Transforms/DialectConversion.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h" namespace mlir { namespace kernel_gen { From da84c4fd367d157578f111e36dd69a821cd5179e Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Wed, 5 Aug 2020 03:52:27 -0700 Subject: [PATCH 2157/2522] Remove optional static registration for hlo dialects again. Instead, we invoke multiple test tools in a row in end to end tests now. For hlo dialects and passes, we use mlir-hlo-opt explicitly. PiperOrigin-RevId: 324989884 Change-Id: I2601dee460075d05cf0befe250abb91967317f1b --- tensorflow/compiler/mlir/hlo/BUILD | 7 ----- .../mhlo/transforms/register_all_passes.cc | 28 ------------------- 2 files changed, 35 deletions(-) delete mode 100644 tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/register_all_passes.cc diff --git a/tensorflow/compiler/mlir/hlo/BUILD b/tensorflow/compiler/mlir/hlo/BUILD index 9eee39894e4..3bbe628cccd 100644 --- a/tensorflow/compiler/mlir/hlo/BUILD +++ b/tensorflow/compiler/mlir/hlo/BUILD @@ -807,13 +807,6 @@ cc_library( ], ) -cc_library( - name = "register_all_passes", - srcs = ["lib/Dialect/mhlo/transforms/register_all_passes.cc"], - deps = [":all_passes"], - alwayslink = 1, -) - cc_binary( name = "mlir-hlo-opt", srcs = [ diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/register_all_passes.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/register_all_passes.cc deleted file mode 100644 index 9349bee041e..00000000000 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/register_all_passes.cc +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "mlir-hlo/Dialect/mhlo/transforms/register_passes.h" - -namespace mlir { - -namespace { - -bool register_all_passes = ([] { - mhlo::registerAllMhloPasses(); - lmhlo::registerAllLmhloPasses(); -}(), true); - -} // namespace -} // namespace mlir From 671844cd4186d04de4aaec2df4458a9753d0dac5 Mon Sep 17 00:00:00 2001 From: Chao Mei Date: Wed, 5 Aug 2020 04:08:12 -0700 Subject: [PATCH 2158/2522] Support to generate zip tests (i.e. cc_test) against a particular delegate, like xnnpack delegate. PiperOrigin-RevId: 324991483 Change-Id: I1876d23363a854791ddc8e24d398e9f49c625be3 --- tensorflow/lite/build_def.bzl | 28 ++++++++++++++++++++++++++-- tensorflow/lite/testing/BUILD | 21 ++++++++++++--------- 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/tensorflow/lite/build_def.bzl b/tensorflow/lite/build_def.bzl index 001b2fc791e..4de0be7c3fa 100644 --- a/tensorflow/lite/build_def.bzl +++ b/tensorflow/lite/build_def.bzl @@ -578,7 +578,14 @@ def flags_for_merged_test_models(test_name, conversion_mode): tests_csv = tests_csv[:-1] # Remove trailing comma. return " --no_tests_limit --test_sets=%s" % tests_csv -def gen_zip_test(name, test_name, conversion_mode, **kwargs): +def gen_zip_test( + name, + test_name, + conversion_mode, + test_tags, + test_args, + additional_test_args = {}, + **kwargs): """Generate a zipped-example test and its dependent zip files. Args: @@ -586,6 +593,11 @@ def gen_zip_test(name, test_name, conversion_mode, **kwargs): test_name: str. Test targets this model. Comes from the list above. conversion_mode: str. Which conversion mode to run with. Comes from the list above. + test_tags: tags for the generated cc_test. + test_args: the basic cc_test args to be used. + additional_test_args: a dictionary of additional args to be used together + with test_args. The key is an identifier to be used in test tag, and + the value is a list of additional test args to be used. **kwargs: tf_cc_test kwargs """ toco = "//tensorflow/lite/toco:toco" @@ -603,7 +615,19 @@ def gen_zip_test(name, test_name, conversion_mode, **kwargs): toco = toco, flags = flags + " --save_graphdefs", ) - tf_cc_test(name, **kwargs) + tf_cc_test( + name, + args = test_args, + tags = test_tags + ["gen_zip_test"], + **kwargs + ) + for key, value in additional_test_args.items(): + tf_cc_test( + name = "%s_%s" % (name, key), + args = test_args + value, + tags = test_tags + ["gen_zip_test_%s" % key], + **kwargs + ) def gen_zipped_test_file(name, file, toco, flags): """Generate a zip file of tests by using :generate_examples. diff --git a/tensorflow/lite/testing/BUILD b/tensorflow/lite/testing/BUILD index 3d4527e926e..4bfc17dc509 100644 --- a/tensorflow/lite/testing/BUILD +++ b/tensorflow/lite/testing/BUILD @@ -35,7 +35,16 @@ exports_files([ name = "zip_test_%s" % test_name, size = "medium", srcs = ["generated_examples_zip_test.cc"], - args = args + select({ + additional_test_args = { + # TODO(b/162696268): uncomment once the bug is fixed. + # "xnnpack": ["--use_xnnpack=true"], + }, + conversion_mode = conversion_mode, + data = [ + ":zip_%s" % test_name, + ], + shard_count = 20, + test_args = args + select({ "//tensorflow:android": [], "//conditions:default": [ "--zip_file_path=$(location :zip_%s)" % test_name, @@ -44,18 +53,12 @@ exports_files([ "--unzip_binary_path=/usr/bin/unzip", ], }), - conversion_mode = conversion_mode, - data = [ - ":zip_%s" % test_name, - ], - shard_count = 20, - tags = tags + [ - "gen_zip_test", + test_name = test_name, + test_tags = tags + [ "no_gpu", # Executing with TF GPU configurations is redundant. "no_oss", "tflite_not_portable_intentional", ], - test_name = test_name, deps = [ ":parse_testdata_lib", ":tflite_driver", From 4a64fa3df8455d06a81c126a28262f64db60d15b Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 5 Aug 2020 04:20:39 -0700 Subject: [PATCH 2159/2522] [XLA:CPU] Fix a crash in topk emission by canonicalizing pointers If there are multiple topk of different shapes in the same module, the signature of our runtime function will contain the shapes of the first instance. Subsequent instances clash with that signature. Canonicalize the types so all signatures become identical. PiperOrigin-RevId: 324992669 Change-Id: Ibbbfdd671dedfcdfdb85706e3cffdf8d64859da6 --- tensorflow/compiler/xla/service/cpu/ir_emitter.cc | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 72f4d5369c8..242f3c6ceb7 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -2412,11 +2412,14 @@ Status IrEmitter::HandleTopK(HloInstruction* hlo) { EmitBufferPointer(out_values_slice, hlo->shape().tuple_shapes(0)); llvm::Value* out_indices_ptr = EmitBufferPointer(out_indices_slice, hlo->shape().tuple_shapes(1)); - EmitCallToFunc(runtime::kTopKF32SymbolName, - {b_.getInt64(has_batch ? input->shape().dimensions(0) : 1), - b_.getInt64(input->shape().dimensions().back()), - b_.getInt64(k), values_ptr, out_values_ptr, out_indices_ptr}, - b_.getVoidTy()); + EmitCallToFunc( + runtime::kTopKF32SymbolName, + {b_.getInt64(has_batch ? input->shape().dimensions(0) : 1), + b_.getInt64(input->shape().dimensions().back()), b_.getInt64(k), + BitCast(values_ptr, b_.getFloatTy()->getPointerTo()), + BitCast(out_values_ptr, b_.getFloatTy()->getPointerTo()), + BitCast(out_indices_ptr, b_.getInt32Ty()->getPointerTo())}, + b_.getVoidTy()); llvm_ir::EmitTuple(GetIrArrayFor(hlo), {out_values_ptr, out_indices_ptr}, &b_); From 3c945050395eb6f283e8838257256ab4574b7e4a Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 5 Aug 2020 04:30:28 -0700 Subject: [PATCH 2160/2522] Integrate LLVM at llvm/llvm-project@c558c22cab9a Updates LLVM usage to match [c558c22cab9a](https://github.com/llvm/llvm-project/commit/c558c22cab9a) PiperOrigin-RevId: 324993578 Change-Id: I5de7a4aa5c53170f2a749b93e7038f49b9d0721c --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index b770dfeead5..ffe1e95beff 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -712,8 +712,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "28e322ea9393e6b3841886006dd170ddd810fd9b" - LLVM_SHA256 = "438268a47b69687ea5e588a285a2255de414addc36e0405e1d70f7cb5208aa75" + LLVM_COMMIT = "c558c22cab9a555d2e521102b775759381e9727f" + LLVM_SHA256 = "b3651e78f4f3b372273c71cb58e0d0767b61e7d9c93b79fd399065c1148089f5" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 428d74a880b481665c1a5b47153f9ea4d21db08f Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Wed, 5 Aug 2020 05:16:47 -0700 Subject: [PATCH 2161/2522] [MLIR][KERNEL_GEN] Add tf framework C interface. All alloc and dealloc ops in the kernel have to use TF OpKernelContext. std.alloc and std.dealloc are converted to tf_framework.alloc_raw and tf_framework.dealloc_raw which are lowered to external calls. This PR adds a C++ library with the external functions and also adds a fake implementation for testing. PiperOrigin-RevId: 324998567 Change-Id: I8528ea3fdf68d0b653cc7b185563ffb50eca8571 --- tensorflow/compiler/mlir/hlo/BUILD | 1 + .../compiler/mlir/tools/kernel_gen/BUILD | 17 ++++++- .../tools/kernel_gen/ir/tf_framework_ops.cc | 13 ++--- .../tools/kernel_gen/ir/tf_framework_ops.td | 13 ++++- .../kernel_gen/tf_framework_c_interface.cc | 49 +++++++++++++++++++ .../kernel_gen/tf_framework_c_interface.h | 35 +++++++++++++ .../mlir/tools/kernel_gen/transforms/BUILD | 1 + .../tf_framework_legalize_to_llvm.cc | 14 ++++++ .../tf_framework_legalize_to_llvm_pass.cc | 2 + 9 files changed, 137 insertions(+), 8 deletions(-) create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/tf_framework_c_interface.cc create mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/tf_framework_c_interface.h diff --git a/tensorflow/compiler/mlir/hlo/BUILD b/tensorflow/compiler/mlir/hlo/BUILD index 3bbe628cccd..dd63b68b890 100644 --- a/tensorflow/compiler/mlir/hlo/BUILD +++ b/tensorflow/compiler/mlir/hlo/BUILD @@ -404,6 +404,7 @@ cc_library( cc_library( name = "lhlo_legalize_to_llvm", srcs = ["lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm.cc"], + hdrs = ["include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h"], deps = [ ":lhlo", "@llvm-project//mlir:IR", diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD index 066ca221d5d..5befdcdc513 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/BUILD +++ b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD @@ -1,7 +1,10 @@ load("//tensorflow:tensorflow.bzl", "tf_cc_binary") load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") -licenses(["notice"]) +package( + default_visibility = [":friends"], + licenses = ["notice"], # Apache 2.0 +) package_group( name = "friends", @@ -74,3 +77,15 @@ tf_cc_binary( "@llvm-project//mlir:Support", ], ) + +exports_files(["tf_framework_c_interface.h"]) + +cc_library( + name = "tf_framework_c_interface", + srcs = ["tf_framework_c_interface.cc"], + hdrs = ["tf_framework_c_interface.h"], + deps = [ + "//tensorflow/core:framework", + "@llvm-project//mlir:mlir_runner_utils", + ], +) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc index e67b5fd7f85..f85f1229fe8 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc @@ -58,10 +58,16 @@ void TFFrameworkDialect::printType(Type type, DialectAsmPrinter &os) const { } } +template +LogicalResult Verify(OpTy op) { + return success(); +} + //===----------------------------------------------------------------------===// // AllocRawOp //===----------------------------------------------------------------------===// -static LogicalResult Verify(AllocRawOp op) { +template <> +LogicalResult Verify(AllocRawOp op) { // Check that the total number of operands matches the number of dynamic // dimensions specified in the memref type. unsigned result_dyn_dims = op.getType().getNumDynamicDims(); @@ -74,11 +80,6 @@ static LogicalResult Verify(AllocRawOp op) { return success(); } -//===----------------------------------------------------------------------===// -// DeallocRawOp -//===----------------------------------------------------------------------===// -static LogicalResult Verify(DeallocRawOp op) { return success(); } - #define GET_OP_CLASSES #include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc.inc" diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.td b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.td index 65481ad377f..bc390a5aaa5 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.td +++ b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.td @@ -45,7 +45,7 @@ def TFFramework_OpKernelContextType : DialectType traits = []> : Op { - let verifier = "return Verify(*this);"; + let verifier = "return Verify<$cppClass>(*this);"; } //===----------------------------------------------------------------------===// @@ -111,4 +111,15 @@ def TFFramework_DeallocRawOp : TFFramework_Op<"dealloc_raw", let assemblyFormat = "`(` $ctx `,` $memref `)` attr-dict `:` type($memref)"; } +//===----------------------------------------------------------------------===// +// NullContextOp +//===----------------------------------------------------------------------===// +def TFFramework_NullContextOp : TFFramework_Op<"null_context", + [NoSideEffect]> { + let summary = "Creates a fake TF context that will be lowered to nullptr"; + let description = [{Needed for testing}]; + let results = (outs TFFramework_OpKernelContextType:$result); + let assemblyFormat = "`(` `)` attr-dict `:` type($result)"; +} + #endif // TF_FRAMEWORK_OPS diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tf_framework_c_interface.cc b/tensorflow/compiler/mlir/tools/kernel_gen/tf_framework_c_interface.cc new file mode 100644 index 00000000000..e75db59d885 --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tf_framework_c_interface.cc @@ -0,0 +1,49 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/tools/kernel_gen/tf_framework_c_interface.h" + +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/op_kernel.h" + +namespace mlir { +namespace kernel_gen { +namespace tf_framework { +namespace { + +using tensorflow::Allocator; + +Allocator* GetAllocator(void* op_kernel_ctx) { + auto* ctx = static_cast(op_kernel_ctx); + // TODO(pifon): Figure out how to set AllocatorAttributes correctly. + tensorflow::AllocatorAttributes attrs; + return ctx->get_allocator(attrs); +} + +} // namespace + +extern "C" void* _mlir_ciface_tf_alloc_raw(void* op_kernel_ctx, + size_t num_bytes) { + return GetAllocator(op_kernel_ctx) + ->AllocateRaw(Allocator::kAllocatorAlignment, num_bytes); +} + +extern "C" void _mlir_ciface_tf_dealloc_raw(void* op_kernel_ctx, void* ptr) { + GetAllocator(op_kernel_ctx)->DeallocateRaw(ptr); +} + +} // namespace tf_framework +} // namespace kernel_gen +} // namespace mlir diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tf_framework_c_interface.h b/tensorflow/compiler/mlir/tools/kernel_gen/tf_framework_c_interface.h new file mode 100644 index 00000000000..143ebc95932 --- /dev/null +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tf_framework_c_interface.h @@ -0,0 +1,35 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_MLIR_TOOLS_KERNEL_GEN_TESTS_TF_FRAMEWORK_C_INTERFACE_H_ +#define TENSORFLOW_COMPILER_MLIR_TOOLS_KERNEL_GEN_TESTS_TF_FRAMEWORK_C_INTERFACE_H_ + +#include "mlir/ExecutionEngine/RunnerUtils.h" // from @llvm-project + +namespace mlir { +namespace kernel_gen { +namespace tf_framework { + +extern "C" MLIR_RUNNERUTILS_EXPORT void* _mlir_ciface_tf_alloc_raw( + void* op_kernel_ctx, size_t num_bytes); + +extern "C" MLIR_RUNNERUTILS_EXPORT void _mlir_ciface_tf_dealloc_raw( + void* op_kernel_ctx, void* ptr); + +} // namespace tf_framework +} // namespace kernel_gen +} // namespace mlir + +#endif // TENSORFLOW_COMPILER_MLIR_TOOLS_KERNEL_GEN_TESTS_TF_FRAMEWORK_C_INTERFACE_H_ diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD index 66a378d5990..b0f22b40f5b 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD @@ -74,6 +74,7 @@ cc_library( "//tensorflow/compiler/mlir/hlo", "//tensorflow/compiler/mlir/hlo:hlo_legalize_to_lhlo", "//tensorflow/compiler/mlir/hlo:lhlo", + "//tensorflow/compiler/mlir/hlo:lhlo_legalize_to_llvm", "//tensorflow/compiler/mlir/tools/kernel_gen/ir:tf_framework_ops", "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm.cc index 2edcaabd7b4..3ce111ff3ff 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm.cc @@ -101,6 +101,7 @@ class AllocRawOpConverter : public ConvertToLLVMCallOpPattern { protected: StringRef GetFuncName() const override { return kCInterfaceAlloc; } + LLVMType GetFuncType() const override { LLVMType llvm_void_ptr_type = getVoidPtrType(); return LLVM::LLVMType::getFunctionTy( @@ -175,10 +176,23 @@ class DeallocRawOpConverter : public ConvertToLLVMCallOpPattern { } }; +class NullContextOpConverter : public ConvertOpToLLVMPattern { + public: + using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + + LogicalResult matchAndRewrite( + Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + rewriter.replaceOpWithNewOp(op, getVoidPtrType()); + return success(); + } +}; + } // namespace void PopulateTFFrameworkToLLVMConversionPatterns( LLVMTypeConverter *converter, OwningRewritePatternList *patterns) { + patterns->insert(*converter); patterns->insert(*converter); } diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm_pass.cc index 916eedb55de..41b38bb574f 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm_pass.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm_pass.cc @@ -18,6 +18,7 @@ limitations under the License. #include "mlir/Dialect/LLVMIR/LLVMDialect.h" // from @llvm-project #include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h" #include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h" #include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h" #include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h" @@ -46,6 +47,7 @@ class TestTFFrameworkToLLVMPass OwningRewritePatternList patterns; populateStdToLLVMConversionPatterns(type_converter, patterns); PopulateTFFrameworkToLLVMConversionPatterns(&type_converter, &patterns); + lmhlo::PopulateLhloToLLVMConversionPatterns(&type_converter, &patterns); // Set target. ConversionTarget target(getContext()); From 739c7560a18eb550615f0e98d8f36d82eab9ab8f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 5 Aug 2020 07:18:54 -0700 Subject: [PATCH 2162/2522] Add support for `RaggedTensors` as lookup keys to InitializableLookupTableBase, IdTableWithHashBuckets, and StaticVocabularyTable. PiperOrigin-RevId: 325014854 Change-Id: I0ae8fbf5af8d6fadee1fa60b1c06f4ae6e4eb094 --- .../python/kernel_tests/lookup_ops_test.py | 156 ++++++++++++++++++ tensorflow/python/ops/lookup_ops.py | 25 ++- 2 files changed, 175 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/kernel_tests/lookup_ops_test.py b/tensorflow/python/kernel_tests/lookup_ops_test.py index a1ded4a9e3b..045dafc3089 100644 --- a/tensorflow/python/kernel_tests/lookup_ops_test.py +++ b/tensorflow/python/kernel_tests/lookup_ops_test.py @@ -45,6 +45,7 @@ from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import map_fn from tensorflow.python.ops import string_ops from tensorflow.python.ops import variables +from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.platform import test from tensorflow.python.training import saver from tensorflow.python.training import server_lib @@ -213,6 +214,25 @@ class StaticHashTableTest(BaseLookupTableTest): self.assertAllEqual(sp_indices, out_indices) self.assertAllEqual(sp_shape, out_shape) + def testStaticHashTableWithRaggedTensorInput(self): + default_val = constant_op.constant(-1, dtypes.int64) + keys = constant_op.constant(["brain", "salad", "surgery"]) + values = constant_op.constant([0, 1, 2], dtypes.int64) + table = self.getHashTable()( + lookup_ops.KeyValueTensorInitializer(keys, values), default_val) + self.initialize_table(table) + + row_splits = [0, 2, 3] + input_tensor = ragged_tensor.RaggedTensor.from_row_splits( + constant_op.constant(["brain", "salad", "tank"]), + constant_op.constant(row_splits, dtypes.int64)) + output = table.lookup(input_tensor) + + out = self.evaluate(output) + + self.assertAllEqual([0, 1, -1], out.values) + self.assertAllEqual(row_splits, out.row_splits) + def testSignatureMismatch(self): default_val = -1 keys = constant_op.constant(["brain", "salad", "surgery"]) @@ -1081,6 +1101,28 @@ class StaticVocabularyTableTest(BaseLookupTableTest): self.assertAllEqual([0, 1, 0, 2, 3], sp_ids_val) self.assertAllEqual(input_shape, sp_ids_shape) + def testRaggedTensor(self): + vocab_file = self._createVocabFile("feat_to_id_7.txt") + input_row_splits = [0, 2, 4, 5] + ragged_features = ragged_tensor.RaggedTensor.from_row_splits( + constant_op.constant(["brain", "salad", "brain", "surgery", "tarkus"], + dtypes.string), + constant_op.constant(input_row_splits, dtypes.int64)) + + table = self.getVocabularyTable()(lookup_ops.TextFileIdTableInitializer( + vocab_file, vocab_size=3), 1) + self.initialize_table(table) + + ragged_ids = table.lookup(ragged_features) + + self.assertAllEqual([5], ragged_ids.values._shape_as_list()) + + ragged_ids_val, ragged_ids_row_splits = self.evaluate( + [ragged_ids.values, ragged_ids.row_splits]) + + self.assertAllEqual([0, 1, 0, 2, 3], ragged_ids_val) + self.assertAllEqual(input_row_splits, ragged_ids_row_splits) + def testInt32SparseTensor(self): input_indices = [[0, 0], [0, 1], [2, 0], [2, 2], [3, 0]] input_shape = [4, 4] @@ -1107,6 +1149,29 @@ class StaticVocabularyTableTest(BaseLookupTableTest): self.assertAllEqual([0, 1, 0, 2, 3], sp_ids_val) self.assertAllEqual(input_shape, sp_ids_shape) + def testInt32RaggedTensor(self): + input_row_splits = [0, 2, 4, 5] + ragged_features = ragged_tensor.RaggedTensor.from_row_splits( + constant_op.constant([42, 1, 42, -1000, 11], dtypes.int32), + constant_op.constant(input_row_splits, dtypes.int64)) + + table = self.getVocabularyTable()( + lookup_ops.KeyValueTensorInitializer((42, 1, -1000), (0, 1, 2), + dtypes.int64, dtypes.int64), + 1, + lookup_key_dtype=dtypes.int32) + self.initialize_table(table) + + ragged_ids = table.lookup(ragged_features) + + self.assertAllEqual([5], ragged_ids.values._shape_as_list()) + + ragged_ids_val, ragged_ids_row_splits = self.evaluate( + [ragged_ids.values, ragged_ids.row_splits]) + + self.assertAllEqual([0, 1, 0, 2, 3], ragged_ids_val) + self.assertAllEqual(input_row_splits, ragged_ids_row_splits) + def testInt64SparseTensor(self): input_indices = [[0, 0], [0, 1], [2, 0], [2, 2], [3, 0]] input_shape = [4, 4] @@ -1130,6 +1195,26 @@ class StaticVocabularyTableTest(BaseLookupTableTest): self.assertAllEqual([0, 1, 0, 2, 3], sp_ids_val) self.assertAllEqual(input_shape, sp_ids_shape) + def testInt64RaggedTensor(self): + input_row_splits = [0, 2, 4, 5] + ragged_features = ragged_tensor.RaggedTensor.from_row_splits( + constant_op.constant([42, 1, 42, -1000, 11], dtypes.int64), + constant_op.constant(input_row_splits, dtypes.int64)) + + table = self.getVocabularyTable()(lookup_ops.KeyValueTensorInitializer( + (42, 1, -1000), (0, 1, 2), dtypes.int64, dtypes.int64), 1) + self.initialize_table(table) + + ragged_ids = table.lookup(ragged_features) + + self.assertAllEqual([5], ragged_ids.values._shape_as_list()) + + ragged_ids_val, ragged_ids_row_splits = self.evaluate( + [ragged_ids.values, ragged_ids.row_splits]) + + self.assertAllEqual([0, 1, 0, 2, 3], ragged_ids_val) + self.assertAllEqual(input_row_splits, ragged_ids_row_splits) + def testStaticVocabularyTableNoInnerTable(self): table = self.getVocabularyTable()(None, num_oov_buckets=1) self.assertIsNone(table.resource_handle) @@ -2682,6 +2767,29 @@ class IdTableWithHashBucketsTest(test.TestCase): self.assertAllEqual([0, 1, 0, 2, 3], sp_ids_val) self.assertAllEqual(input_shape, sp_ids_shape) + def testRaggedTensor(self): + vocab_file = self._createVocabFile("feat_to_id_7.txt") + input_row_splits = [0, 2, 4, 5] + ragged_features = ragged_tensor.RaggedTensor.from_row_splits( + constant_op.constant(["brain", "salad", "brain", "surgery", "tarkus"], + dtypes.string), + constant_op.constant(input_row_splits, dtypes.int64)) + + table = lookup_ops.IdTableWithHashBuckets( + lookup_ops.StaticHashTable( + lookup_ops.TextFileIdTableInitializer(vocab_file, vocab_size=3), + -1), 1) + self.evaluate(table.initializer) + + ragged_ids = table.lookup(ragged_features) + self.assertAllEqual([5], ragged_ids.values._shape_as_list()) + + ragged_ids_val, ragged_ids_row_splits = self.evaluate( + [ragged_ids.values, ragged_ids.row_splits]) + + self.assertAllEqual([0, 1, 0, 2, 3], ragged_ids_val) + self.assertAllEqual(input_row_splits, ragged_ids_row_splits) + def testInt32SparseTensor(self): input_indices = [[0, 0], [0, 1], [2, 0], [2, 2], [3, 0]] input_shape = [4, 4] @@ -2709,6 +2817,30 @@ class IdTableWithHashBucketsTest(test.TestCase): self.assertAllEqual([0, 1, 0, 2, 3], sp_ids_val) self.assertAllEqual(input_shape, sp_ids_shape) + def testInt32RaggedTensor(self): + input_row_splits = [0, 2, 4, 5] + ragged_features = ragged_tensor.RaggedTensor.from_row_splits( + constant_op.constant([42, 1, 42, -1000, 11], dtypes.int32), + constant_op.constant(input_row_splits, dtypes.int32)) + + table = lookup_ops.IdTableWithHashBuckets( + lookup_ops.StaticHashTable( + lookup_ops.KeyValueTensorInitializer( + (42, 1, -1000), (0, 1, 2), dtypes.int64, dtypes.int64), -1), + 1, + key_dtype=dtypes.int32) + self.evaluate(table.initializer) + + ragged_ids = table.lookup(ragged_features) + + self.assertAllEqual([5], ragged_ids.values._shape_as_list()) + + ragged_ids_val, ragged_ids_row_splits = self.evaluate( + [ragged_ids.values, ragged_ids.row_splits]) + + self.assertAllEqual([0, 1, 0, 2, 3], ragged_ids_val) + self.assertAllEqual(input_row_splits, ragged_ids_row_splits) + def testInt64SparseTensor(self): input_indices = [[0, 0], [0, 1], [2, 0], [2, 2], [3, 0]] input_shape = [4, 4] @@ -2736,6 +2868,30 @@ class IdTableWithHashBucketsTest(test.TestCase): self.assertAllEqual([0, 1, 0, 2, 3], sp_ids_val) self.assertAllEqual(input_shape, sp_ids_shape) + def testInt64RaggedTensor(self): + input_row_splits = [0, 2, 4, 5] + ragged_features = ragged_tensor.RaggedTensor.from_row_splits( + constant_op.constant([42, 1, 42, -1000, 11], dtypes.int64), + constant_op.constant(input_row_splits, dtypes.int64)) + + table = lookup_ops.IdTableWithHashBuckets( + lookup_ops.StaticHashTable( + lookup_ops.KeyValueTensorInitializer( + (42, 1, -1000), (0, 1, 2), dtypes.int64, dtypes.int64), -1), + 1, + key_dtype=dtypes.int64) + self.evaluate(table.initializer) + + ragged_ids = table.lookup(ragged_features) + + self.assertAllEqual([5], ragged_ids.values._shape_as_list()) + + ragged_ids_val, ragged_ids_row_splits = self.evaluate( + [ragged_ids.values, ragged_ids.row_splits]) + + self.assertAllEqual([0, 1, 0, 2, 3], ragged_ids_val) + self.assertAllEqual(input_row_splits, ragged_ids_row_splits) + def testIdTableWithHashBucketsWithInvalidHashers(self): vocab_file = self._createVocabFile("feat_to_id_4.txt") with self.cached_session(): diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py index 87b8aaa30bd..9f27ccf9a1c 100644 --- a/tensorflow/python/ops/lookup_ops.py +++ b/tensorflow/python/ops/lookup_ops.py @@ -40,6 +40,7 @@ from tensorflow.python.ops import string_ops # go/tf-wildcard-import # pylint: disable=wildcard-import from tensorflow.python.ops.gen_lookup_ops import * +from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.training.saver import BaseSaverBuilder # pylint: enable=wildcard-import from tensorflow.python.training.tracking import base as trackable_base @@ -209,14 +210,16 @@ class InitializableLookupTableBase(LookupInterface): name: A name for the operation (optional). Returns: - A `SparseTensor` if keys are sparse, otherwise a dense `Tensor`. + A `SparseTensor` if keys are sparse, a `RaggedTensor` if keys are ragged, + otherwise a dense `Tensor`. Raises: TypeError: when `keys` or `default_value` doesn't match the table data types. """ key_tensor = keys - if isinstance(keys, sparse_tensor.SparseTensor): + if isinstance(keys, + (sparse_tensor.SparseTensor, ragged_tensor.RaggedTensor)): key_tensor = keys.values if keys.dtype.base_dtype != self._key_dtype: @@ -233,6 +236,8 @@ class InitializableLookupTableBase(LookupInterface): values.set_shape(key_tensor.get_shape()) if isinstance(keys, sparse_tensor.SparseTensor): return sparse_tensor.SparseTensor(keys.indices, values, keys.dense_shape) + elif isinstance(keys, ragged_tensor.RaggedTensor): + return keys.with_values(values) else: return values @@ -1058,7 +1063,8 @@ class IdTableWithHashBuckets(LookupInterface): name: Optional name for the op. Returns: - A `SparseTensor` if keys are sparse, otherwise a dense `Tensor`. + A `SparseTensor` if keys are sparse, a `RaggedTensor` if keys are ragged, + otherwise a dense `Tensor`. Raises: TypeError: when `keys` doesn't match the table key data type. @@ -1067,7 +1073,8 @@ class IdTableWithHashBuckets(LookupInterface): raise TypeError("Signature mismatch. Keys must be dtype %s, got %s." % (self._key_dtype, keys.dtype)) values = keys - if isinstance(keys, sparse_tensor.SparseTensor): + if isinstance(keys, + (sparse_tensor.SparseTensor, ragged_tensor.RaggedTensor)): values = keys.values if self._table and (self._table.key_dtype.base_dtype == dtypes.int64): values = math_ops.cast(values, dtypes.int64) @@ -1092,6 +1099,8 @@ class IdTableWithHashBuckets(LookupInterface): ids = buckets if isinstance(keys, sparse_tensor.SparseTensor): return sparse_tensor.SparseTensor(keys.indices, ids, keys.dense_shape) + elif isinstance(keys, ragged_tensor.RaggedTensor): + return keys.with_values(ids) return ids @@ -1244,7 +1253,8 @@ class StaticVocabularyTable(LookupInterface): name: Optional name for the op. Returns: - A `SparseTensor` if keys are sparse, otherwise a dense `Tensor`. + A `SparseTensor` if keys are sparse, a `RaggedTensor` if keys are ragged, + otherwise a dense `Tensor`. Raises: TypeError: when `keys` doesn't match the table key data type. @@ -1253,7 +1263,8 @@ class StaticVocabularyTable(LookupInterface): raise TypeError("Signature mismatch. Keys must be dtype %s, got %s." % (self._key_dtype, keys.dtype)) values = keys - if isinstance(keys, sparse_tensor.SparseTensor): + if isinstance(keys, + (sparse_tensor.SparseTensor, ragged_tensor.RaggedTensor)): values = keys.values if self._table and (self._table.key_dtype.base_dtype == dtypes.int64): values = math_ops.cast(values, dtypes.int64) @@ -1273,6 +1284,8 @@ class StaticVocabularyTable(LookupInterface): ids = buckets if isinstance(keys, sparse_tensor.SparseTensor): return sparse_tensor.SparseTensor(keys.indices, ids, keys.dense_shape) + elif isinstance(keys, ragged_tensor.RaggedTensor): + return keys.with_values(ids) return ids From 27495c23c6d6efcd59bbcdb7d8346d424683e228 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Wed, 5 Aug 2020 15:36:37 +0000 Subject: [PATCH 2163/2522] fix build --- tensorflow/core/kernels/BUILD | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index dd51d6fcc26..c4f43ecdea7 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2944,11 +2944,10 @@ cc_library( hdrs = ["tensor_map.h"], deps = [ "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", - "//tensorflow/core/framework:tensor", "//tensorflow/core/framework:tensor_shape_proto_cc", - "//tensorflow/core/framework:variant", "//tensorflow/core/lib/core:refcount", ], ) From 6ed841fbcf0f11eee1ccd6233292a3ab99f311cb Mon Sep 17 00:00:00 2001 From: Ken Franko Date: Wed, 5 Aug 2020 09:04:46 -0700 Subject: [PATCH 2164/2522] Handle tf.IfRegionOp control flow when determining which ops to mark for outside compilation. PiperOrigin-RevId: 325030612 Change-Id: Ib16e81e125e5cf11ec6fc727a6e4c9ff4d748678 --- .../mark_ops_for_outside_compilation.mlir | 101 ++++++++++++++++++ .../mark_ops_for_outside_compilation.cc | 45 ++++++-- 2 files changed, 139 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir index 3efa0b09439..9b28b3b922c 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir @@ -51,3 +51,104 @@ func @op_string_operand_string_result(%arg0: tensor) -> tensor tensor return %0 : tensor } + + +// Test that a tf.IfRegion op with a captured string operand is marked for outside compilation. +// CHECK-LABEL: func @if_region_captured_string +func @if_region_captured_string(%arg0: tensor, %arg1: tensor) -> tensor { + %0 = "tf_device.cluster"() ( { + // CHECK: "tf.A" + // CHECK-NOT: _xla_outside_compilation + // CHECK: "tf.IfRegion" + // CHECK: "tf.D" + // CHECK-SAME: _xla_outside_compilation + // CHECK: _xla_outside_compilation + // CHECK-SAME: is_stateless = true + %1 = "tf.A"() : () -> tensor + %2 = "tf.IfRegion"(%arg0) ( { + %3 = "tf.D"(%arg1) : (tensor) -> tensor + "tf.Yield"(%3) : (tensor) -> () + }, { + %4 = "tf.H"() : () -> tensor + "tf.Yield"(%4) : (tensor) -> () + }) {is_stateless = true} : (tensor) -> (tensor) + %5 = "tf.C"(%2) : (tensor) -> tensor + tf_device.return %5 : tensor + }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor + return %0 : tensor +} + +// Test that op with a string results/operands inside a tf.IfRegion branch is marked for outside compilation. + +// CHECK-LABEL: func @if_region_string_op +func @if_region_string_op(%arg0: tensor, %arg1: tensor) -> tensor { + %0 = "tf_device.cluster"() ( { + // CHECK: "tf.A" + // CHECK-NOT: _xla_outside_compilation + // CHECK: "tf.IfRegion" + // CHECK-NOT: _xla_outside_compilation + %1 = "tf.A"() : () -> tensor + %2 = "tf.IfRegion"(%arg0)({ + // CHECK: "tf.D" + // CHECK-NOT: _xla_outside_compilation + %3 = "tf.D"(%arg1) : (tensor) -> tensor + "tf.Yield"(%3) : (tensor) -> () + }, { + // CHECK: "tf.F" + // CHECK-SAME: _xla_outside_compilation + %4 = "tf.F"() : () -> tensor + // CHECK: "tf.G" + // CHECK-SAME: _xla_outside_compilation + %5 = "tf.G"(%4) : (tensor) -> tensor + %6 = "tf.H"() : () -> tensor + "tf.Yield"(%6) : (tensor) -> () + }) {is_stateless = true} : (tensor) -> tensor + // CHECK: "tf.C" + // CHECK-NOT: _xla_outside_compilation + %7 = "tf.C"(%2) : (tensor) -> tensor + tf_device.return %7 : tensor + }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor + return %0 : tensor +} + +// Test that op with a string results/operands inside a tf.IfRegion branch is marked for outside compilation. + +// CHECK-LABEL: func @nested_if_region_string_op +func @nested_if_region_string_op(%arg0: tensor, %arg1: tensor) -> tensor { + %0 = "tf_device.cluster"() ( { + // CHECK: "tf.A" + // CHECK-NOT: _xla_outside_compilation + // CHECK: "tf.IfRegion" + // CHECK-NOT: _xla_outside_compilation + %1 = "tf.A"() : () -> tensor + %2 = "tf.IfRegion"(%arg0)({ + // CHECK: "tf.D" + // CHECK-NOT: _xla_outside_compilation + %3 = "tf.D"(%arg1) : (tensor) -> tensor + "tf.Yield"(%3) : (tensor) -> () + }, { + %4 = "tf.E"() : () -> tensor + %5 = "tf.IfRegion"(%4)({ + // CHECK: "tf.F" + // CHECK-NOT: _xla_outside_compilation + %6 = "tf.F"(%arg1) : (tensor) -> tensor + "tf.Yield"(%6) : (tensor) -> () + }, { + // CHECK: "tf.G" + // CHECK-SAME: _xla_outside_compilation + %7 = "tf.G"() : () -> tensor + // CHECK: "tf.H" + // CHECK-SAME: _xla_outside_compilation + %8 = "tf.H"(%7) : (tensor) -> tensor + %9 = "tf.I"() : () -> tensor + "tf.Yield"(%9) : (tensor) -> () + }) {is_stateless = true} : (tensor) -> tensor + "tf.Yield"(%5) : (tensor) -> () + }) {is_stateless = true} : (tensor) -> tensor + // CHECK: "tf.C" + // CHECK-NOT: _xla_outside_compilation + %10 = "tf.C"(%2) : (tensor) -> tensor + tf_device.return %10 : tensor + }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor + return %0 : tensor +} diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc index 72f7a3a438c..8f1f3ecd3a8 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc @@ -20,7 +20,9 @@ limitations under the License. #include "mlir/IR/TypeUtilities.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Pass/PassRegistry.h" // from @llvm-project +#include "mlir/Transforms/RegionUtils.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" @@ -63,22 +65,51 @@ bool IsSupportedOp(Operation& op) { return true; } +bool HasCapturedStringOperand(TF::IfRegionOp* if_op) { + bool string_operand = false; + mlir::visitUsedValuesDefinedAbove( + if_op->then_branch(), if_op->then_branch(), + [&](mlir::OpOperand* operand) { + if (getElementTypeOrSelf(operand->get()).isa()) + string_operand = true; + }); + if (string_operand) return string_operand; + mlir::visitUsedValuesDefinedAbove( + if_op->else_branch(), if_op->else_branch(), + [&](mlir::OpOperand* operand) { + if (getElementTypeOrSelf(operand->get()).isa()) + string_operand = true; + }); + return string_operand; +} + LogicalResult MarkUncompilableOps(Block* block) { - for (Operation& op : *block) { - if (!IsSupportedOp(op)) { - op.setAttr(kXlaOutsideCompilationAttr, - StringAttr::get("auto", op.getContext())); + block->walk([&](Operation* op) { + if (!IsSupportedOp(*op)) { + op->setAttr(kXlaOutsideCompilationAttr, + StringAttr::get("auto", op->getContext())); } - } + if (auto if_op = llvm::dyn_cast(op)) { + if (HasCapturedStringOperand(&if_op)) { + op->setAttr(kXlaOutsideCompilationAttr, + StringAttr::get("auto", op->getContext())); + } + } + }); return success(); } void MarkOpsForOutsideCompilation::runOnOperation() { auto module = getOperation(); - module.walk([&](tf_device::ClusterOp cluster) { - MarkUncompilableOps(&cluster.GetBody()); + auto result = module.walk([&](tf_device::ClusterOp cluster) { + if (failed(MarkUncompilableOps(&cluster.GetBody()))) + return WalkResult::interrupt(); + + return WalkResult::advance(); }); + + if (result.wasInterrupted()) return signalPassFailure(); } } // namespace From 0c944514c1f933a420f6f35e96c66ad4714d989d Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Wed, 5 Aug 2020 09:31:21 -0700 Subject: [PATCH 2165/2522] Remove left-over debugging in bufferize pass. PiperOrigin-RevId: 325035330 Change-Id: I25aa3f413106a74a76847ecfc32810b61b1ed3e4 --- .../compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc index ebbc92f64c7..7d195c69c37 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc @@ -89,7 +89,6 @@ struct BufferizePass : public BufferizePassBase { return applyFullConversion(func, target, patterns); }); - module.dump(); if (result.wasInterrupted()) { signalPassFailure(); } From b21bc388a142c2c15a57af59f9d57ca3413f0c07 Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Wed, 5 Aug 2020 09:40:25 -0700 Subject: [PATCH 2166/2522] [XLA] Convert Abs(a)*Abs(a) to a*a and add an option to allow for numerically unsafe algebraic simplifications PiperOrigin-RevId: 325037045 Change-Id: Ic0a6ee59d2639311f67c55d447e510c93f2efcbf --- .../xla/service/algebraic_simplifier.cc | 117 ++++++++++-------- .../xla/service/algebraic_simplifier.h | 9 ++ .../xla/service/algebraic_simplifier_test.cc | 16 +++ 3 files changed, 92 insertions(+), 50 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 0b588048e4a..d04a428d349 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -665,7 +665,7 @@ Status AlgebraicSimplifierVisitor::ScalarMultiplyReduction( HloInstruction* inst; HloInstruction* user; int64 index; - std::tie (inst, user, index) = operands.back(); + std::tie(inst, user, index) = operands.back(); operands.pop_back(); // Skip the op types that are not commutative with multiply. @@ -913,7 +913,7 @@ Status AlgebraicSimplifierVisitor::HandleAdd(HloInstruction* add) { (Match(lhs, m::Multiply(m::Op(&c), m::Op(&a))) && Match(rhs, m::MultiplyAnyOrder(m::Op().Is(c), m::Op(&b))))) && (ShapeUtil::ElementIsIntegral(add->shape()) || - IsAllFpConstantPowerOf2(c))) { + options_.enable_floats_are_real() || IsAllFpConstantPowerOf2(c))) { return ReplaceWithNewInstruction( add, HloInstruction::CreateBinary( add->shape(), HloOpcode::kMultiply, @@ -2667,6 +2667,17 @@ Status AlgebraicSimplifierVisitor::HandleMultiply(HloInstruction* multiply) { return Status::OK(); } + { + HloInstruction* abs_operand; + if (lhs == rhs && Match(lhs, m::Abs(m::Op(&abs_operand))) && + !ShapeUtil::ElementIsComplex(abs_operand->shape())) { + TF_RETURN_IF_ERROR(multiply->ReplaceOperandWith(0, abs_operand)); + TF_RETURN_IF_ERROR(multiply->ReplaceOperandWith(1, abs_operand)); + changed_ = true; + return Status::OK(); + } + } + { HloInstruction *convert_operand, *operand; // Mul(Convert(Pred), operand) => select(pred, operand, 0) @@ -3037,8 +3048,8 @@ Status AlgebraicSimplifierVisitor::HandleBroadcast(HloInstruction* broadcast) { HloInstruction* new_broadcast = computation_->AddInstruction( HloInstruction::CreateBroadcast(user->shape(), operand, {})); // Use HloInstruction::ReplaceAllUsesWith instead of - // HloComputation::ReplaceWithNewInstruction because we are replacing an - // instruction other than the visited instruction. + // HloComputation::ReplaceWithNewInstruction because we are replacing + // an instruction other than the visited instruction. changed_ = true; return user->ReplaceAllUsesWith(new_broadcast); } @@ -3155,9 +3166,11 @@ Status AlgebraicSimplifierVisitor::HandleConvert(HloInstruction* convert) { // Eliminate a convert pair if it is a no-op. The following are a few // example cases that are being handled: - // 1. convert(convert(A, $TYPE1), $TYPE2) is simplified to A if A is of $TYPE2 + // 1. convert(convert(A, $TYPE1), $TYPE2) is simplified to A if A is of + // $TYPE2 // and convert(A, $TYPE1) is an upcast - // 2. convert(convert(A, $TYPE1),$TYPE2) is simplified to A if A is of $TYPE2 + // 2. convert(convert(A, $TYPE1),$TYPE2) is simplified to A if A is of + // $TYPE2 // and convert(A, $TYPE1) is an upcast and is an integral conversion from // unsigned to signed (only signed to unsigned conversion is NOT allowed) // 3. Tuple(convert(A, $TYPE1) , floor(convert(convert(A, $TYPE1), $TYPE2)), @@ -3293,7 +3306,8 @@ Status AlgebraicSimplifierVisitor::HandlePad(HloInstruction* pad) { pad->shape(), nonzero_pad->mutable_shape())); simplifier_->UpdateLayout(nonzero_pad->mutable_shape()); - // Second, construct the slice instruction to perform the negative padding. + // Second, construct the slice instruction to perform the negative + // padding. std::vector start_indices; std::vector end_indices; std::vector strides; @@ -3446,8 +3460,8 @@ AlgebraicSimplifierVisitor::TryToSinkBroadcastAfterOpWithUniqueNonScalarOperand( Shape changed_shape; for (HloInstruction* user_operand : user->operands()) { - // If this is a broadcast operand that is not our original broadcast input - // to this function then we might need to change the input. + // If this is a broadcast operand that is not our original broadcast + // input to this function then we might need to change the input. if (is_compatible_broadcast(user_operand)) { // If this is a broadcast from a scalar value rewrite a broadcast from // the scalar to the new shape enforced from the other broadcast @@ -3618,16 +3632,16 @@ Status AlgebraicSimplifierVisitor::HandleRemainder(HloInstruction* remainder) { // If M < N, then {0, ..., M} % N ==> {0, ..., M}. // // Currently this only covers the case when N is a broadcasted constant - // scalar. We could also cover the case when N is a non-broadcasted constant - // with the same value repeated. + // scalar. We could also cover the case when N is a non-broadcasted + // constant with the same value repeated. HloInstruction* iota; HloInstruction* divisor; if (Match(remainder, m::Remainder(m::Iota(&iota), m::Broadcast(m::ConstantEffectiveScalar(&divisor))))) { // The iota counts {0, ..., iota_upper_bound - 1}. (Actually this is - // conservative; the iota may overflow and count up to a smaller value than - // this. But that's OK for our purposes here.) + // conservative; the iota may overflow and count up to a smaller value + // than this. But that's OK for our purposes here.) int64 iota_upper_bound = iota->shape().dimensions( Cast(iota)->iota_dimension()); absl::optional divisor_val = divisor->literal().GetIntegralAsS64( @@ -3640,8 +3654,8 @@ Status AlgebraicSimplifierVisitor::HandleRemainder(HloInstruction* remainder) { // (X + N) % N = X % N, so long as X + N does not overflow. // // We don't have range tracking in XLA that would let us know whether X + N - // overflows, so for now we only do this simplification when X is an iota. We - // could add other operations where it's easy to see a range, such as + // overflows, so for now we only do this simplification when X is an iota. + // We could add other operations where it's easy to see a range, such as // remainder, convert, etc., though at some point we'd probably want a // range-tracking analysis. HloInstruction* bcast; @@ -3653,9 +3667,9 @@ Status AlgebraicSimplifierVisitor::HandleRemainder(HloInstruction* remainder) { m::Broadcast(m::ConstantEffectiveScalar(&addend))), m::Broadcast(&bcast, m::ConstantEffectiveScalar(&divisor)))) && addend == divisor) { - // The iota counts {0, ...iota_upper_bound - 1}, with the same caveat above - // that iota_upper_bound is conservative, and the true upper bound may be - // smaller. + // The iota counts {0, ...iota_upper_bound - 1}, with the same caveat + // above that iota_upper_bound is conservative, and the true upper bound + // may be smaller. int64 iota_upper_bound = iota->shape().dimensions( Cast(iota)->iota_dimension()); absl::optional divisor_val = divisor->literal().GetIntegralAsS64( @@ -3760,9 +3774,9 @@ Status AlgebraicSimplifierVisitor::HandleReverse(HloInstruction* reverse) { StatusOr AlgebraicSimplifierVisitor::TrySimplifyScalarSlice( HloInstruction* slice) { - // Only try to do this for effective scalars. We could do the same for slicing - // out larger pieces of padding (replacing with a broadcast of the padding - // value), but this is probably not worth it. + // Only try to do this for effective scalars. We could do the same for + // slicing out larger pieces of padding (replacing with a broadcast of the + // padding value), but this is probably not worth it. if (!ShapeUtil::IsEffectiveScalar(slice->shape())) { return false; } @@ -3863,8 +3877,8 @@ StatusOr AlgebraicSimplifierVisitor::TryToReorderSliceAndReshape( return false; } -// Allowing a slice to move through a reverse with any necessary updates to the -// slice config. +// Allowing a slice to move through a reverse with any necessary updates to +// the slice config. StatusOr AlgebraicSimplifierVisitor::TryToReorderSliceAndReverse( HloInstruction* slice) { VLOG(2) << "Entered TryToReorderSliceAndReverse for slice:" @@ -3892,8 +3906,8 @@ StatusOr AlgebraicSimplifierVisitor::TryToReorderSliceAndReverse( << new_limits[rdim]; } // New slice formed from the reverse_operand, but strides and shape of the - // slice output remains the same. New slice's starts and limits are updated - // for ONLY the reversed dimensions as indicated above. + // slice output remains the same. New slice's starts and limits are + // updated for ONLY the reversed dimensions as indicated above. HloInstruction* new_slice = computation_->AddInstruction( HloInstruction::CreateSlice(slice->shape(), reverse_operand, new_starts, new_limits, new_strides)); @@ -3920,7 +3934,8 @@ Status AlgebraicSimplifierVisitor::HandleSlice(HloInstruction* slice) { if (Match(slice, m::Slice(m::Pad(&pad, m::Op(&pad_operand), m::Op())))) { // Is the result of the slice the pad operand. bool slice_undoes_pad = true; - // Can the slice be moved to the pad_operand without any padding being read. + // Can the slice be moved to the pad_operand without any padding being + // read. bool slice_inside_pad = true; // Does this slice slice out pading only. bool slice_in_padding = false; @@ -4055,8 +4070,8 @@ Status AlgebraicSimplifierVisitor::HandleSlice(HloInstruction* slice) { } } - // Do not try to reorder slices and reshapes after layout assignment as it may - // be invalid. + // Do not try to reorder slices and reshapes after layout assignment as it + // may be invalid. if (!options_.is_layout_sensitive()) { TF_ASSIGN_OR_RETURN(replaced, TryToReorderSliceAndReshape(slice)); } @@ -4106,8 +4121,8 @@ Status AlgebraicSimplifierVisitor::HandleDynamicSlice( if (ShapeUtil::IsScalar(dynamic_slice->shape())) { return ReplaceInstruction(dynamic_slice, operand); } - // DynamicSlice where operand has the same size as the output is simply equal - // to operand. + // DynamicSlice where operand has the same size as the output is simply + // equal to operand. if (SameShape(operand, dynamic_slice)) { return ReplaceInstruction(dynamic_slice, operand); } @@ -4438,8 +4453,8 @@ Status AlgebraicSimplifierVisitor::HandleReduce(HloInstruction* hlo) { // Convert Reduce(concat({a,b,...})) to // map(reduce(a),map(reduce(b),...,)) // - // This should make fusion easier or use less memory bandwidth in the unfused - // case. + // This should make fusion easier or use less memory bandwidth in the + // unfused case. if (arg->opcode() == HloOpcode::kConcatenate && absl::c_linear_search(reduce->dimensions(), arg->concatenate_dimension())) { @@ -4458,9 +4473,9 @@ Status AlgebraicSimplifierVisitor::HandleReduce(HloInstruction* hlo) { } HloInstruction *dot, *lhs, *rhs; - // Convert Reduce(Dot(X,Y)) to Dot(X,Y) if any of the dimensions reduced were - // batch dimensions of the dot. The transformation supports reducing other - // dimensions as well. + // Convert Reduce(Dot(X,Y)) to Dot(X,Y) if any of the dimensions reduced + // were batch dimensions of the dot. The transformation supports reducing + // other dimensions as well. if (options_.enable_dot_strength_reduction() && Match(arg, m::Dot(&dot, m::Op(&lhs), m::Op(&rhs)).WithOneUser()) && Match(reduce->to_apply()->root_instruction(), @@ -4532,13 +4547,13 @@ Status AlgebraicSimplifierVisitor::HandleReduceWindow( if (options_.enable_window_reduce_to_reduce_replacement()) { // A reduce window can be expressed as a reduce and a reshape if all // dimensions either have a window size of one or the entire dimension. If - // there is no stride, dilation, or padding, this is as easy as checking the - // size of the output shape and window dimension. + // there is no stride, dilation, or padding, this is as easy as checking + // the size of the output shape and window dimension. // - // The reshape is a bitcast since it adds one-sized dimensions. Often these - // ones are immediately removed as well with another reshape. The - // implementation of reduce tends to be slightly more efficient at reducing - // entire dimensions compared to reduce window. + // The reshape is a bitcast since it adds one-sized dimensions. Often + // these ones are immediately removed as well with another reshape. The + // implementation of reduce tends to be slightly more efficient at + // reducing entire dimensions compared to reduce window. auto effective_reduce_dims = [&] { if (window_util::HasStride(window) || window_util::HasDilation(window) || window_util::HasPadding(window)) { @@ -5053,7 +5068,8 @@ StatusOr AlgebraicSimplifierVisitor::SwapConvOperands( auto new_dim = swapped_window.add_dimensions(); new_dim->set_size(input_size); - // If the kernel is not reversed, the activations must be manually reversed. + // If the kernel is not reversed, the activations must be manually + // reversed. if (!window_dims[spatial_dim].window_reversal()) { reverse_dimensions.push_back( dnums.kernel_spatial_dimensions(spatial_dim)); @@ -5073,8 +5089,8 @@ StatusOr AlgebraicSimplifierVisitor::SwapConvOperands( dilated_kernel_size); } - // Don't transform if a naive convolution implementation would not have fewer - // flops. + // Don't transform if a naive convolution implementation would not have + // fewer flops. if (kernel_product <= swapped_kernel_product) { return false; } @@ -5152,11 +5168,11 @@ StatusOr AlgebraicSimplifierVisitor::SimplifyConvToDot( } } - // Stride ignores part of the output, which matrix multiplication does not do, - // so require no stride. Padding and base (lhs) dilation both implicitly + // Stride ignores part of the output, which matrix multiplication does not + // do, so require no stride. Padding and base (lhs) dilation both implicitly // extend the data, which matrix multiplication also does not do, so require - // no padding and no base (lhs) dilation. Window (rhs) dilation has no effect - // for a 1x1 window, so window dilation is no problem. + // no padding and no base (lhs) dilation. Window (rhs) dilation has no + // effect for a 1x1 window, so window dilation is no problem. if (window_util::HasStride(window) || window_util::HasPadding(window) || window_util::HasBaseDilation(window)) { return false; @@ -5209,8 +5225,9 @@ StatusOr AlgebraicSimplifierVisitor::SimplifyConvToDot( } } - // We already checked feature_dimension is most minor, so data in input_shape - // and row-major {conv_width,input_channels} are bitwise identical. + // We already checked feature_dimension is most minor, so data in + // input_shape and row-major {conv_width,input_channels} are bitwise + // identical. Shape new_input_shape = ShapeUtil::MakeShapeWithDescendingLayout( input_shape.element_type(), {conv_width, input_channels}); simplifier_->UpdateLayout(&new_input_shape); diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.h b/tensorflow/compiler/xla/service/algebraic_simplifier.h index 9f2a3404116..cabecec4eb8 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.h +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.h @@ -97,6 +97,14 @@ class AlgebraicSimplifierOptions { return enable_scalar_multiply_reduction_; } + // Also the algebraic simplifer to treat floating point values like real + // numbers. + void set_enable_floats_are_real(bool enable_floats_are_real) { + enable_floats_are_real_ = enable_floats_are_real; + } + + bool enable_floats_are_real() const { return enable_floats_are_real_; } + // If enable_window_reduce_replacement is true, the kReduceWindow instruction // can be optimized by replacement with simpler operations. void set_enable_window_reduce_to_reduce_replacement( @@ -158,6 +166,7 @@ class AlgebraicSimplifierOptions { bool enable_conv_simplification_{true}; bool enable_conv_operand_swap_{true}; bool enable_scalar_multiply_reduction_{false}; + bool enable_floats_are_real_{false}; bool enable_window_reduce_to_reduce_replacement_{true}; bool enable_reduce_of_reshape_{true}; bool replace_transpose_with_bitcast_{true}; diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 90ca44714f7..fdd9fb04941 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -117,6 +117,22 @@ TEST_F(AlgebraicSimplifierTest, FactorFpAddition) { m::ConstantScalar(0.125)))); } +// (Abs(A)) * (Abs(A)) => (A*A) +TEST_F(AlgebraicSimplifierTest, SquareOfAbs) { + const char* kModuleStr = R"( + HloModule m + test { + p = f32[] parameter(0) + a = f32[] abs(p) + ROOT z = f32[] multiply(a, a) + } + )"; + TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(kModuleStr)); + ASSERT_TRUE(AlgebraicSimplifier(default_options_).Run(m.get()).ValueOrDie()); + EXPECT_THAT(m->entry_computation()->root_instruction(), + GmockMatch(m::Multiply(m::Parameter(0), m::Parameter(0)))); +} + // (A*C1) * (B*C2) => (A*B)*(C1*C2) TEST_F(AlgebraicSimplifierTest, MultiplyChain) { const char* kModuleStr = R"( From 541120e68cbbd522ce206fc7aa54af2f9aabe070 Mon Sep 17 00:00:00 2001 From: Ken Franko Date: Wed, 5 Aug 2020 10:06:40 -0700 Subject: [PATCH 2167/2522] Use TF legalization patterns to determine if op should be marked for OutsideCompilation. PiperOrigin-RevId: 325042393 Change-Id: I075db3bb540e9cbc682b699bdf468021ce5debdb --- tensorflow/compiler/mlir/tensorflow/BUILD | 1 + .../mark_ops_for_outside_compilation.mlir | 221 +++++++++--------- .../mark_ops_for_outside_compilation.cc | 44 +++- 3 files changed, 150 insertions(+), 116 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index c6f0083fc92..63908c822c8 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -827,6 +827,7 @@ cc_library( ":xla_sharding_util", "//tensorflow/compiler/mlir:op_or_arg_name_mapper", "//tensorflow/compiler/mlir/lite:validators", + "//tensorflow/compiler/mlir/xla:xla_legalize_tf", "//tensorflow/compiler/xla:xla_data_proto_cc", "//tensorflow/compiler/xla:xla_proto_cc", "//tensorflow/compiler/xla/client:sharding_builder", diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir index 9b28b3b922c..afad117beae 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir @@ -1,154 +1,159 @@ // RUN: tf-opt %s -tf-mark-ops-for-outside-compilation | FILECHECK_OPTS="" FileCheck %s - -// CHECK-LABEL: func @op_string_result -func @op_string_result() -> tensor { +// CHECK-LABEL: func @unsupported_op +func @unsupported_op() -> tensor { %0 = "tf_device.cluster"() ( { - // CHECK: "tf.A" - // CHECK-NOT: _xla_outside_compilation - // CHECK: "tf.B" + // CHECK: "tf.UnsupportedOp" // CHECK-SAME: _xla_outside_compilation - // CHECK: "tf.C" + // CHECK: "tf.Identity" // CHECK-NOT: _xla_outside_compilation - %1 = "tf.A"() : () -> tensor - %2 = "tf.B"(%1) : (tensor) -> tensor - %3 = "tf.C"(%1) : (tensor) -> tensor - tf_device.return %3 : tensor - }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor - return %0 : tensor + %1 = "tf.UnsupportedOp"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Identity"(%1) : (tensor) -> tensor + tf_device.return %2 : tensor + }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor + return %0 : tensor } -// CHECK-LABEL: func @op_string_operand -func @op_string_operand(%arg0: tensor) -> tensor { +// CHECK-LABEL: func @op_string_result +func @op_string_result() -> tensor { %0 = "tf_device.cluster"() ( { - // CHECK: "tf.A" + // CHECK: "tf.Const"() {value = dense<1> : tensor} // CHECK-NOT: _xla_outside_compilation - // CHECK: "tf.B" + // CHECK: "tf.Const" // CHECK-SAME: _xla_outside_compilation - // CHECK: "tf.C" + // CHECK-SAME: tf.string + // CHECK: "tf.Identity" // CHECK-NOT: _xla_outside_compilation - %1 = "tf.A"() : () -> tensor - %2 = "tf.B"(%arg0) : (tensor) -> tensor - %3 = "tf.C"(%2) : (tensor) -> tensor - tf_device.return %3 : tensor - }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor - return %0 : tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x"> : tensor} : () -> tensor + %3 = "tf.Identity"(%1) : (tensor) -> tensor + tf_device.return %3 : tensor + }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor + return %0 : tensor +} +// CHECK-LABEL: func @op_string_operand +func @op_string_operand(%arg0: tensor) -> tensor { + %0 = "tf_device.cluster"() ( { + // CHECK: "tf.Const"() {value = dense<1> : tensor} + // CHECK-NOT: _xla_outside_compilation + // CHECK: "tf.StringToNumber" + // CHECK-SAME: _xla_outside_compilation + // CHECK-SAME: tf.string + // CHECK: "tf.Identity" + // CHECK-NOT: _xla_outside_compilation + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.StringToNumber"(%arg0) {out_type = f32} : (tensor) -> tensor + %3 = "tf.Identity"(%1) : (tensor) -> tensor + tf_device.return %3 : tensor + }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor + return %0 : tensor } // CHECK-LABEL: func @op_string_operand_string_result -func @op_string_operand_string_result(%arg0: tensor) -> tensor { +func @op_string_operand_string_result(%arg0: tensor) -> tensor { %0 = "tf_device.cluster"() ( { - // CHECK: "tf.A" + // CHECK: "tf.Const"() {value = dense<1> : tensor} // CHECK-NOT: _xla_outside_compilation - // CHECK: "tf.B" + // CHECK: "tf.Identity" // CHECK-SAME: _xla_outside_compilation - // CHECK: "tf.C" + // CHECK-SAME: tf.string + // CHECK: "tf.Identity" // CHECK-NOT: _xla_outside_compilation - %1 = "tf.A"() : () -> tensor - %2 = "tf.B"(%arg0) : (tensor) -> tensor - %3 = "tf.C"(%1) : (tensor) -> tensor - tf_device.return %3 : tensor - }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor - return %0 : tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Identity"(%arg0) : (tensor) -> tensor + %3 = "tf.Identity"(%1) : (tensor) -> tensor + tf_device.return %3 : tensor + }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor + return %0 : tensor } - // Test that a tf.IfRegion op with a captured string operand is marked for outside compilation. // CHECK-LABEL: func @if_region_captured_string -func @if_region_captured_string(%arg0: tensor, %arg1: tensor) -> tensor { +func @if_region_captured_string(%arg0: tensor, %arg1: tensor) -> tensor { %0 = "tf_device.cluster"() ( { - // CHECK: "tf.A" + // CHECK: "tf.Const"() {value = dense<1> : tensor} // CHECK-NOT: _xla_outside_compilation // CHECK: "tf.IfRegion" - // CHECK: "tf.D" - // CHECK-SAME: _xla_outside_compilation - // CHECK: _xla_outside_compilation - // CHECK-SAME: is_stateless = true - %1 = "tf.A"() : () -> tensor + // CHECK: "tf.StringToNumber" + // CHECK: _xla_outside_compilation = "auto", is_stateless = true + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor %2 = "tf.IfRegion"(%arg0) ( { - %3 = "tf.D"(%arg1) : (tensor) -> tensor - "tf.Yield"(%3) : (tensor) -> () + %3 = "tf.StringToNumber"(%arg1) {out_type = f32} : (tensor) -> tensor + "tf.Yield"(%3) : (tensor) -> () }, { - %4 = "tf.H"() : () -> tensor - "tf.Yield"(%4) : (tensor) -> () - }) {is_stateless = true} : (tensor) -> (tensor) - %5 = "tf.C"(%2) : (tensor) -> tensor - tf_device.return %5 : tensor - }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor - return %0 : tensor + %4 = "tf.Const"() {value = dense<1.0> : tensor} : () -> tensor + "tf.Yield"(%4) : (tensor) -> () + }) {is_stateless = true} : (tensor) -> (tensor) + %5 = "tf.Identity"(%2) : (tensor) -> tensor + tf_device.return %5 : tensor + }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor + return %0 : tensor } -// Test that op with a string results/operands inside a tf.IfRegion branch is marked for outside compilation. +// Test that ops with string results/operands inside a tf.IfRegion branch are marked for outside compilation. // CHECK-LABEL: func @if_region_string_op -func @if_region_string_op(%arg0: tensor, %arg1: tensor) -> tensor { +func @if_region_string_op(%arg0: tensor, %arg1: tensor) -> tensor { %0 = "tf_device.cluster"() ( { - // CHECK: "tf.A" + // CHECK: "tf.Const"() {value = dense<1> : tensor} // CHECK-NOT: _xla_outside_compilation // CHECK: "tf.IfRegion" // CHECK-NOT: _xla_outside_compilation - %1 = "tf.A"() : () -> tensor - %2 = "tf.IfRegion"(%arg0)({ - // CHECK: "tf.D" - // CHECK-NOT: _xla_outside_compilation - %3 = "tf.D"(%arg1) : (tensor) -> tensor - "tf.Yield"(%3) : (tensor) -> () - }, { - // CHECK: "tf.F" + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.IfRegion"(%arg0) ( { + %3 = "tf.Const"() {value = dense<1.0> : tensor} : () -> tensor + "tf.Yield"(%3) : (tensor) -> () + }, { + // CHECK: "tf.Const"() {_xla_outside_compilation = "auto", value = dense<"1.0"> : tensor} + // CHECK-NEXT: "tf.StringToNumber" // CHECK-SAME: _xla_outside_compilation - %4 = "tf.F"() : () -> tensor - // CHECK: "tf.G" - // CHECK-SAME: _xla_outside_compilation - %5 = "tf.G"(%4) : (tensor) -> tensor - %6 = "tf.H"() : () -> tensor - "tf.Yield"(%6) : (tensor) -> () - }) {is_stateless = true} : (tensor) -> tensor - // CHECK: "tf.C" - // CHECK-NOT: _xla_outside_compilation - %7 = "tf.C"(%2) : (tensor) -> tensor - tf_device.return %7 : tensor - }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor - return %0 : tensor + %4 = "tf.Const"() {value = dense<"1.0"> : tensor} : () -> tensor + %5 = "tf.StringToNumber"(%4) {out_type = f32} : (tensor) -> tensor + "tf.Yield"(%5) : (tensor) -> () + // CHECK: {is_stateless + }) {is_stateless = true} : (tensor) -> (tensor) + %6 = "tf.Identity"(%2) : (tensor) -> tensor + tf_device.return %6: tensor + }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor + return %0 : tensor } -// Test that op with a string results/operands inside a tf.IfRegion branch is marked for outside compilation. +// Test that ops with string results/operands inside a nested tf.IfRegion branch are marked for outside compilation. // CHECK-LABEL: func @nested_if_region_string_op -func @nested_if_region_string_op(%arg0: tensor, %arg1: tensor) -> tensor { +func @nested_if_region_string_op(%arg0: tensor, %arg1: tensor) -> tensor { %0 = "tf_device.cluster"() ( { - // CHECK: "tf.A" + // CHECK: "tf.Const"() {value = dense<1> : tensor} // CHECK-NOT: _xla_outside_compilation // CHECK: "tf.IfRegion" // CHECK-NOT: _xla_outside_compilation - %1 = "tf.A"() : () -> tensor - %2 = "tf.IfRegion"(%arg0)({ - // CHECK: "tf.D" - // CHECK-NOT: _xla_outside_compilation - %3 = "tf.D"(%arg1) : (tensor) -> tensor - "tf.Yield"(%3) : (tensor) -> () - }, { - %4 = "tf.E"() : () -> tensor - %5 = "tf.IfRegion"(%4)({ - // CHECK: "tf.F" - // CHECK-NOT: _xla_outside_compilation - %6 = "tf.F"(%arg1) : (tensor) -> tensor - "tf.Yield"(%6) : (tensor) -> () + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.IfRegion"(%arg0) ( { + %3 = "tf.Const"() {value = dense<1.0> : tensor} : () -> tensor + "tf.Yield"(%3) : (tensor) -> () }, { - // CHECK: "tf.G" - // CHECK-SAME: _xla_outside_compilation - %7 = "tf.G"() : () -> tensor - // CHECK: "tf.H" - // CHECK-SAME: _xla_outside_compilation - %8 = "tf.H"(%7) : (tensor) -> tensor - %9 = "tf.I"() : () -> tensor - "tf.Yield"(%9) : (tensor) -> () - }) {is_stateless = true} : (tensor) -> tensor - "tf.Yield"(%5) : (tensor) -> () - }) {is_stateless = true} : (tensor) -> tensor - // CHECK: "tf.C" - // CHECK-NOT: _xla_outside_compilation - %10 = "tf.C"(%2) : (tensor) -> tensor - tf_device.return %10 : tensor - }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor - return %0 : tensor + // CHECK: "tf.Const"() {value = dense : tensor} + // CHECK-NOT: _xla_outside_compilation + %4 = "tf.Const"() {value = dense : tensor} : () -> tensor + %5 = "tf.IfRegion"(%4)({ + // CHECK: "tf.Const"() {_xla_outside_compilation = "auto", value = dense<"1.0"> : tensor} + // CHECK-NEXT: "tf.StringToNumber" + // CHECK-SAME: _xla_outside_compilation + %6 = "tf.Const"() {value = dense<"1.0"> : tensor} : () -> tensor + %7 = "tf.StringToNumber"(%6) {out_type = f32} : (tensor) -> tensor + "tf.Yield"(%7) : (tensor) -> () + }, { + // CHECK: "tf.Const"() {value = dense<1.000000e+00> : tensor} + // CHECK-NOT: _xla_outside_compilation + %8 = "tf.Const"() {value = dense<1.000000e+00> : tensor} : () -> tensor + "tf.Yield"(%8) : (tensor) -> () + // CHECK: {is_stateless + }){is_stateless = true} : (tensor) -> (tensor) + "tf.Yield"(%5) : (tensor) -> () + // CHECK: {is_stateless + }) {is_stateless = true} : (tensor) -> (tensor) + %9 = "tf.Identity"(%2) : (tensor) -> tensor + tf_device.return %9: tensor + }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor + return %0 : tensor } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc index 8f1f3ecd3a8..71146cf5e2b 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" +#include "tensorflow/compiler/mlir/xla/transforms/passes.h" namespace mlir { namespace TFDevice { @@ -43,6 +44,14 @@ struct MarkOpsForOutsideCompilation void runOnOperation() override; }; +// TODO(b/159128666): Check the control flow legalization passes instead once +// added. +void AddSupportedControlFlowOps(MLIRContext* context, + llvm::DenseSet* supported_ops) { + supported_ops->insert(OperationName("tf.IfRegion", context)); + supported_ops->insert(OperationName("tf.Yield", context)); +} + bool HasStringOperand(Operation& op) { for (auto operand : op.getOperands()) { if (getElementTypeOrSelf(operand).isa()) return true; @@ -57,12 +66,18 @@ bool HasStringResult(Operation& op) { return false; } +bool MatchesPattern(Operation& op, + const llvm::DenseSet& supported_ops) { + return (supported_ops.contains(op.getName())); +} + // Checks if the op is supported inside of a device cluster. -bool IsSupportedOp(Operation& op) { - if (HasStringOperand(op) || HasStringResult(op)) { - return false; - } - return true; +bool IsSupportedOp(Operation& op, + const llvm::DenseSet& supported_ops) { + // TODO(b/161726307): Check the allowed ops list in LegalizeTfWithTf2XlaPass + // as well. + return !HasStringOperand(op) && !HasStringResult(op) && + MatchesPattern(op, supported_ops); } bool HasCapturedStringOperand(TF::IfRegionOp* if_op) { @@ -83,9 +98,10 @@ bool HasCapturedStringOperand(TF::IfRegionOp* if_op) { return string_operand; } -LogicalResult MarkUncompilableOps(Block* block) { +LogicalResult MarkUncompilableOps( + Block* block, llvm::DenseSet& supported_ops) { block->walk([&](Operation* op) { - if (!IsSupportedOp(*op)) { + if (!IsSupportedOp(*op, supported_ops)) { op->setAttr(kXlaOutsideCompilationAttr, StringAttr::get("auto", op->getContext())); } @@ -101,9 +117,21 @@ LogicalResult MarkUncompilableOps(Block* block) { void MarkOpsForOutsideCompilation::runOnOperation() { auto module = getOperation(); + OwningRewritePatternList patterns; + mhlo::PopulateLegalizeTfPatterns(module.getContext(), &patterns); + + // `supported_ops` contains the name of all of the ops that can potentially be + // lowered into HLO on the device. This doesn't always mean that the op can + // be lowered in the future passes but if the op is not in this set, it can't + // be lowered in a subsequent pass. + llvm::DenseSet supported_ops; + for (auto& pattern : patterns) { + supported_ops.insert(*pattern->getRootKind()); + } + AddSupportedControlFlowOps(module.getContext(), &supported_ops); auto result = module.walk([&](tf_device::ClusterOp cluster) { - if (failed(MarkUncompilableOps(&cluster.GetBody()))) + if (failed(MarkUncompilableOps(&cluster.GetBody(), supported_ops))) return WalkResult::interrupt(); return WalkResult::advance(); From fdd6b2f9b6709fca6e4faf8982dc738e1f0a2d34 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 5 Aug 2020 10:07:46 -0700 Subject: [PATCH 2168/2522] Added Select and Where to the estimator. PiperOrigin-RevId: 325042638 Change-Id: I0ccb9e440bfb8b43b0d53b61096cfc031fb7eca0 --- .../grappler/costs/op_level_cost_estimator.cc | 3 ++ .../costs/op_level_cost_estimator_test.cc | 33 +++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index 62e6e361ef8..ed86e92a2e7 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -600,6 +600,8 @@ OpLevelCostEstimator::OpLevelCostEstimator() { EIGEN_COST(scalar_product_op)); elementwise_ops_.emplace("RealDiv", EIGEN_COST(scalar_quotient_op)); elementwise_ops_.emplace("ReluGrad", EIGEN_COST(scalar_max_op)); + elementwise_ops_.emplace("Select", EIGEN_COST(scalar_boolean_or_op)); + elementwise_ops_.emplace("SelectV2", EIGEN_COST(scalar_boolean_or_op)); elementwise_ops_.emplace("SquaredDifference", EIGEN_COST(scalar_square_op) + EIGEN_COST(scalar_difference_op)); @@ -607,6 +609,7 @@ OpLevelCostEstimator::OpLevelCostEstimator() { elementwise_ops_.emplace("TruncateDiv", EIGEN_COST(scalar_quotient_op)); elementwise_ops_.emplace("TruncateMod", EIGEN_COST(scalar_mod_op)); + elementwise_ops_.emplace("Where", 1); #undef EIGEN_COST diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc index 0f19b54feec..90f3e969df9 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc @@ -964,6 +964,39 @@ TEST_F(OpLevelCostEstimatorTest, UnaryOpExecutionTime) { } } +TEST_F(OpLevelCostEstimatorTest, BinaryOpExecutionTime) { + std::vector> binary_ops = { + {"Select", 1}, + {"SelectV2", 1}, + {"SquaredDifference", 2}, + {"Where", 1}, + }; + + const int kTensorSize1 = 1000; + const int kTensorSize2 = 2; + for (auto binary_op : binary_ops) { + OpContext op_context = + DescribeBinaryOp(binary_op.first, kTensorSize1, kTensorSize2); + + const int kExpectedMemoryTime = 3600; + int expected_compute_time = std::ceil( + binary_op.second * kTensorSize1 * kTensorSize2 * 2 / + estimator_.GetDeviceInfo(op_context.op_info.device()).gigaops); + + auto cost = PredictCosts(op_context); + EXPECT_EQ(Costs::Duration(kExpectedMemoryTime), cost.memory_time) + << binary_op.first; + EXPECT_EQ(Costs::Duration(expected_compute_time), cost.compute_time) + << binary_op.first; + EXPECT_EQ(Costs::Duration(expected_compute_time + kExpectedMemoryTime), + cost.execution_time) + << binary_op.first; + EXPECT_EQ(1, cost.num_ops_total); + EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); + } +} + TEST_F(OpLevelCostEstimatorTest, BroadcastAddExecutionTime) { OpContext op_context; SetCpuDevice(&op_context.op_info); From 47aab0b49ef7432261f60430f5e29bb331cd5233 Mon Sep 17 00:00:00 2001 From: Anna R Date: Wed, 5 Aug 2020 10:30:35 -0700 Subject: [PATCH 2169/2522] Change tensorflow_core reference to tensorflow. We don't have tensorflow_core directory since some months back. PiperOrigin-RevId: 325047837 Change-Id: Ie842c414b4c2e0c5ae99fd6b91640482daf1938f --- tensorflow/api_template.__init__.py | 2 +- tensorflow/api_template_v1.__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py index 0cd2b7da139..5932dda514d 100644 --- a/tensorflow/api_template.__init__.py +++ b/tensorflow/api_template.__init__.py @@ -137,7 +137,7 @@ if _running_from_pip_package(): # TODO(gunan): Add sanity checks to loaded modules here. for _s in _site_packages_dirs: # Load first party dynamic kernels. - _main_dir = _os.path.join(_s, 'tensorflow_core/core/kernels') + _main_dir = _os.path.join(_s, 'tensorflow/core/kernels') if _fi.file_exists(_main_dir): _ll.load_library(_main_dir) diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py index b73af197f7b..0d1d2e56fae 100644 --- a/tensorflow/api_template_v1.__init__.py +++ b/tensorflow/api_template_v1.__init__.py @@ -147,7 +147,7 @@ if _running_from_pip_package(): # TODO(gunan): Add sanity checks to loaded modules here. for _s in _site_packages_dirs: # Load first party dynamic kernels. - _main_dir = _os.path.join(_s, 'tensorflow_core/core/kernels') + _main_dir = _os.path.join(_s, 'tensorflow/core/kernels') if _fi.file_exists(_main_dir): _ll.load_library(_main_dir) From b55bb7f5a1a72a90ede1d902a5e57760f135f23c Mon Sep 17 00:00:00 2001 From: jerryyin Date: Wed, 5 Aug 2020 17:45:36 +0000 Subject: [PATCH 2170/2522] [ROCm][XLA] Fixing no_alias_test --- tensorflow/compiler/xla/service/gpu/tests/gpu_noalias_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/gpu/tests/gpu_noalias_test.cc b/tensorflow/compiler/xla/service/gpu/tests/gpu_noalias_test.cc index 1e39a4deaa7..8ec00d73711 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/gpu_noalias_test.cc +++ b/tensorflow/compiler/xla/service/gpu/tests/gpu_noalias_test.cc @@ -51,7 +51,7 @@ TEST_F(GpuNoAliasTest, Concat) { hlo_module->AddEntryComputation(std::move(computation)); CompileAndVerifyIr(std::move(hlo_module), - R"(CHECK-LABEL: define void @fusion + R"(CHECK-LABEL: define{{.*}}void @fusion CHECK-SAME: i8* noalias align {{[0-9]*}} dereferenceable({{[0-9]*}}) %[[OUTPUT_ALLOC:[a-z0-9]*]] CHECK: %fusion.raw = {{.*}} %[[OUTPUT_ALLOC]])", /*match_optimized_ir=*/false); From f648d335f08fe159d541c2c3fb6bf2d091bbefd9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 5 Aug 2020 11:08:20 -0700 Subject: [PATCH 2171/2522] [MLIR][XLA] Allow for choice of safe/unsafe variant in broadcast utils Create safe or unsafe variants of `shape.broadcast` depending on the context. The representation by means of an extent tensor is only legal if the operands are known to be broadcastable. Currently, there is no use in a safe context in the codebase but it will be used for shape inference eventually. PiperOrigin-RevId: 325056915 Change-Id: I0da3c577a08273a51da8c83d82397f5457722e33 --- .../include/mlir-hlo/utils/broadcast_utils.h | 10 ++++--- .../mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc | 2 +- .../mhlo/transforms/chlo_legalize_to_hlo.cc | 4 +-- .../mlir/hlo/lib/utils/broadcast_utils.cc | 28 ++++++++++++------- .../tests/chlo_infer_shape_type_methods.mlir | 11 ++++---- .../chlo_legalize_to_hlo_broadcasts.mlir | 10 +++---- .../tests/legalize-tf-binary-elementwise.mlir | 12 ++++---- 7 files changed, 43 insertions(+), 34 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h index 1e2404299b2..1c57073f4ab 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h @@ -38,10 +38,12 @@ bool IsLegalNumpyRankedBroadcast(Value lhs, Value rhs, // Emits shape dialect ops to compute the result shape for a broadcasting // binary elementwise op which broadcasts according to "numpy" semantics -// (see above), returning an extents tensor of the resulting shape. -Value ComputeBinaryElementwiseBroadcastingResultExtents(Location loc, Value lhs, - Value rhs, - OpBuilder& builder); +// (see above), returning a `shape.shape` or an extent tensor of the resulting +// shape. The result should only be an extent tensor in contexts that ensure +// both operands to be broadcastable. +Value ComputeBinaryElementwiseBroadcastingResultExtents( + Location loc, Value lhs, Value rhs, OpBuilder& builder, + bool unsafe_as_extent_tensor); } // namespace hlo } // namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc index 99ed8bcb849..81389c3be89 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc @@ -151,7 +151,7 @@ LogicalResult ReifyBroadcastBinaryOpReturnTypeShapes( } Value computed_shape = hlo::ComputeBinaryElementwiseBroadcastingResultExtents( - loc, lhs, rhs, builder); + loc, lhs, rhs, builder, /*unsafe_as_extent_tensor=*/false); if (!computed_shape) return failure(); reifiedReturnShapes.push_back(computed_shape); return success(); diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc index adbd2e5a628..c2db4880632 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc @@ -124,8 +124,8 @@ struct ConvertRankedDynamicBroadcastBinaryOp int64_t result_rank = std::max(lhs_type.getRank(), rhs_type.getRank()); Value result_extents = - hlo::ComputeBinaryElementwiseBroadcastingResultExtents(loc, lhs, rhs, - rewriter); + hlo::ComputeBinaryElementwiseBroadcastingResultExtents( + loc, lhs, rhs, rewriter, /*unsafe_as_extent_tensor=*/true); // Note that we unconditionally emit DynamicBroadcastInDim ops and let // downstream canonicalizations fold them away if possible. This is diff --git a/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc b/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc index a3ce4d44436..71b1a4e164f 100644 --- a/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc +++ b/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc @@ -20,6 +20,7 @@ limitations under the License. #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallVector.h" #include "mlir/Dialect/Shape/IR/Shape.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/IR/Diagnostics.h" #include "mlir/IR/StandardTypes.h" @@ -46,9 +47,9 @@ bool IsLegalNumpyRankedBroadcast(Value lhs, Value rhs, broadcast_dims.getIntValues().begin()); } -Value ComputeBinaryElementwiseBroadcastingResultExtents(Location loc, Value lhs, - Value rhs, - OpBuilder& builder) { +Value ComputeBinaryElementwiseBroadcastingResultExtents( + Location loc, Value lhs, Value rhs, OpBuilder& builder, + bool unsafe_as_extent_tensor) { auto lhs_type = lhs.getType().dyn_cast(); auto rhs_type = rhs.getType().dyn_cast(); if (!lhs_type || !rhs_type) { @@ -57,15 +58,22 @@ Value ComputeBinaryElementwiseBroadcastingResultExtents(Location loc, Value lhs, return nullptr; } - int64_t result_rank = std::max(lhs_type.getRank(), rhs_type.getRank()); Value lhs_shape_v = builder.createOrFold(loc, lhs); Value rhs_shape_v = builder.createOrFold(loc, rhs); - Value result_shape_v = builder.createOrFold( - loc, shape::ShapeType::get(builder.getContext()), lhs_shape_v, - rhs_shape_v, nullptr /* error */); - return builder.createOrFold( - loc, RankedTensorType::get({result_rank}, builder.getIndexType()), - result_shape_v); + + if (unsafe_as_extent_tensor) { + int64_t result_rank = std::max(lhs_type.getRank(), rhs_type.getRank()); + Value result_shape_v = builder.createOrFold( + loc, shape::getExtentTensorType(builder.getContext()), lhs_shape_v, + rhs_shape_v, nullptr /* error */); + return builder.createOrFold( + loc, RankedTensorType::get({result_rank}, builder.getIndexType()), + result_shape_v); + } + + return builder.createOrFold( + loc, builder.getType(), lhs_shape_v, rhs_shape_v, + nullptr /* error */); } } // namespace hlo diff --git a/tensorflow/compiler/mlir/hlo/tests/chlo_infer_shape_type_methods.mlir b/tensorflow/compiler/mlir/hlo/tests/chlo_infer_shape_type_methods.mlir index 99aab532688..d226c92858a 100644 --- a/tensorflow/compiler/mlir/hlo/tests/chlo_infer_shape_type_methods.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/chlo_infer_shape_type_methods.mlir @@ -5,15 +5,14 @@ // only test reification on an examplar op. // CHECK-SAME: %[[ARG0:.+]]: tensor, // CHECK-SAME: %[[ARG1:.+]]: tensor -func @broadcast_add(%arg0: tensor, %arg1: tensor) -> tensor<1xindex> { +func @broadcast_add(%arg0: tensor, %arg1: tensor) -> !shape.shape { // CHECK-DAG: %[[ARG0_S:.+]] = shape.shape_of %[[ARG0]] // CHECK-DAG: %[[ARG1_S:.+]] = shape.shape_of %[[ARG1]] - // CHECK-DAG: %[[BCAST_S:.+]] = shape.broadcast %[[ARG0_S]], %[[ARG1_S]] - // CHECK: %[[EXTENTS:.+]] = shape.to_extent_tensor %[[BCAST_S]] - // CHECK: return %[[EXTENTS]] + // CHECK-DAG: %[[BCAST_S:.+]] = shape.broadcast %[[ARG0_S]], %[[ARG1_S]] : tensor, tensor -> !shape.shape + // CHECK: return %[[BCAST_S]] : !shape.shape %0 = chlo.broadcast_add %arg0, %arg1 : (tensor, tensor) -> tensor - %1 = "mhlo_test.reify_return_type_shapes"(%0) : (tensor) -> tensor<1xindex> - return %1 : tensor<1xindex> + %1 = "mhlo_test.reify_return_type_shapes"(%0) : (tensor) -> !shape.shape + return %1 : !shape.shape } // ----- diff --git a/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir b/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir index c08ead5081e..9670372a864 100644 --- a/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir @@ -19,7 +19,7 @@ func @dynamicBroadcast(%arg0: tensor, %arg1: tensor) -> tensor to tensor<2xindex> // CHECK-DAG: %[[ARG0_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG0]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} // CHECK-DAG: %[[ARG1_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG1]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} // CHECK-NEXT: %[[RESULT:.+]] = mhlo.add %[[ARG0_B]], %[[ARG1_B]] @@ -40,7 +40,7 @@ func @dynamicBroadcastComplex(%arg0: tensor, %arg1: tensor) -> t // CHECK-NEXT: %[[WITNESS:.+]] = shape.cstr_broadcastable %[[ARG0_S]], %[[ARG1_S]] // CHECK-NEXT: %[[FINAL_RESULT:.+]] = shape.assuming %[[WITNESS]] // CHECK-NEXT: %[[RESULT_S:.+]] = shape.broadcast %[[ARG0_S]], %[[ARG1_S]] - // CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_S]] + // CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = tensor_cast %[[RESULT_S]] : tensor to tensor<2xindex> // CHECK-DAG: %[[ARG0_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG0]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor, tensor<2xindex>) -> tensor // CHECK-DAG: %[[ARG1_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG1]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor, tensor<2xindex>) -> tensor // CHECK-NEXT: %[[RESULT:.+]] = "mhlo.complex"(%[[ARG0_B]], %[[ARG1_B]]) : (tensor, tensor) -> tensor> @@ -61,7 +61,7 @@ func @dynamicBroadcastCompare(%arg0: tensor, %arg1: tensor) -> t // CHECK: %[[WITNESS:.+]] = shape.cstr_broadcastable %[[ARG0_S]], %[[ARG1_S]] // CHECK: %[[FINAL_RESULT:.+]] = shape.assuming %[[WITNESS]] // CHECK: %[[RESULT_S:.+]] = shape.broadcast %[[ARG0_S]], %[[ARG1_S]] - // CHECK: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_S]] + // CHECK: %[[RESULT_EXTENTS:.+]] = tensor_cast %[[RESULT_S]] : tensor to tensor<2xindex> // CHECK-DAG: %[[ARG0_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG0]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor, tensor<2xindex>) -> tensor // CHECK-DAG: %[[ARG1_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG1]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor, tensor<2xindex>) -> tensor // CHECK: %[[RESULT:.+]] = "mhlo.compare"(%[[ARG0_B]], %[[ARG1_B]]) {comparison_direction = "EQ"} : (tensor, tensor) -> tensor @@ -263,7 +263,7 @@ func @addScalarUnranked(%arg0: tensor, %arg1: tensor<*xf32>) -> tensor<*xf3 // CHECK: %[[ASSUMING_RESULT:.*]] = shape.assuming %[[WITNESS]] -> (tensor) { // CHECK: %[[SCALAR_SHAPE:.*]] = shape.const_shape [] // CHECK: %[[BROADCASTED_SHAPE:.*]] = shape.broadcast %[[SCALAR_SHAPE]], %[[SHAPE_RESHAPED]] -// CHECK: %[[SHAPE_TENSOR:.*]] = shape.to_extent_tensor %[[BROADCASTED_SHAPE]] : !shape.shape -> tensor<1xindex> +// CHECK: %[[SHAPE_TENSOR:.*]] = tensor_cast %[[BROADCASTED_SHAPE]] : tensor to tensor<1xindex> // CHECK: %[[BROADCASTED_LHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG_0]], %[[SHAPE_TENSOR]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor, tensor<1xindex>) -> tensor // CHECK: %[[BROADCASTED_RHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[RESHAPED]], %[[SHAPE_TENSOR]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor, tensor<1xindex>) -> tensor // CHECK: %[[BROADCASTED_RESULT:.*]] = mhlo.add %[[BROADCASTED_LHS]], %[[BROADCASTED_RHS]] : tensor @@ -296,7 +296,7 @@ func @addUnrankedScalar(%arg0: tensor<*xf32>, %arg1: tensor) -> tensor<*xf3 // CHECK: %[[SHAPE_1:.*]] = shape.shape_of %[[ARG_1]] : tensor // CHECK: %[[WITNESS:.*]] = shape.cstr_broadcastable %[[SHAPE_RESHAPED]], %[[SHAPE_1]] // CHECK: %[[ASSUMING_RESULT:.*]] = shape.assuming %[[WITNESS]] -> (tensor) { -// CHECK: %[[ASTENSOR:.*]] = shape.to_extent_tensor %[[SHAPE_RESHAPED]] +// CHECK: %[[ASTENSOR:.*]] = tensor_cast %[[SHAPE_RESHAPED]] // CHECK: %[[BROADCASTED_LHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[RESHAPED]], %[[ASTENSOR]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor, tensor<1xindex>) -> tensor // CHECK: %[[BROADCASTED_RHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG_1]], %[[ASTENSOR]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor, tensor<1xindex>) -> tensor // CHECK: %[[BROADCASTED_RESULT:.*]] = mhlo.add %[[BROADCASTED_LHS]], %[[BROADCASTED_RHS]] : tensor diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf-binary-elementwise.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf-binary-elementwise.mlir index fd9c14c7c0f..5f3e40f923f 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf-binary-elementwise.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf-binary-elementwise.mlir @@ -48,8 +48,8 @@ func @add_dynamic(%arg0: tensor, %arg1: tensor) -> tensor, tensor -> tensor + // CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = tensor_cast %[[RESULT_SHAPE]] : tensor to tensor<2xindex> // CHECK-NEXT: %[[LHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg0, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} // CHECK-NEXT: %[[RHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg1, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} // CHECK-NEXT: %[[RESULT:.+]] = mhlo.add %[[LHS_BCAST]], %[[RHS_BCAST]] : tensor @@ -201,8 +201,8 @@ func @equal_dynamic(%arg0: tensor, %arg1: tensor<1xi32>) -> tensor // NOT-CHECK-NEXT: %[[WITNESS:.+]] = shape.cstr_broadcastable %[[LHS_SHAPE]], %[[RHS_SHAPE]] // NOT-CHECK-NEXT: shape.assuming %[[WITNESS]] -> (tensor) { // NOT-CHECK-DAG: %[[LHS_SHAPE1:.+]] = shape.shape_of %arg0 - // NOT-CHECK-NEXT: %[[RESULT_SHAPE:.+]] = shape.broadcast %[[LHS_SHAPE1]], %[[RHS_SHAPE]] - // NOT-CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_SHAPE]] + // NOT-CHECK-NEXT: %[[RESULT_SHAPE:.+]] = shape.broadcast %[[LHS_SHAPE1]], %[[RHS_SHAPE]] : tensor, tensor -> tensor + // NOT-CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = tensor_cast %[[RESULT_SHAPE]] : tensor to tensor<1xindex> // NOT-CHECK-DAG: %[[LHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg0, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} // NOT-CHECK-DAG: %[[RHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg1, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} // NOT-CHECK-NEXT: %[[RESULT:.+]] = "mhlo.compare"(%[[LHS_BCAST]], %[[RHS_BCAST]]) {comparison_direction = "EQ"} @@ -290,8 +290,8 @@ func @greater_dynamic(%arg0: tensor, %arg1: tensor) -> tensor, tensor -> tensor + // CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = tensor_cast %[[RESULT_SHAPE]] : tensor to tensor<1xindex> // CHECK-DAG: %[[LHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg0, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} // CHECK-DAG: %[[RHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg1, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} // CHECK-NEXT: "mhlo.compare"(%[[LHS_BCAST]], %[[RHS_BCAST]]) {comparison_direction = "GT"} From 1dd5f497b1cb082b4cb006334fc9d12fd245656d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 5 Aug 2020 11:10:20 -0700 Subject: [PATCH 2172/2522] [MLIR][HLO] Remove redundant casts from unranked to ranked transformation The transformation of unranked to ranked operations no longer generates cast operations for shapes and sizes. Instead, we use the newly introduced support for extent tensor and index types directly. PiperOrigin-RevId: 325057440 Change-Id: Ia8e0750ecc38dcc97b318b2fd1c220b8c6bf210e --- .../mhlo/transforms/transform_unranked_hlo.cc | 53 ++++++++----------- .../hlo/tests/mhlo-transform-unranked.mlir | 20 +++---- 2 files changed, 29 insertions(+), 44 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/transform_unranked_hlo.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/transform_unranked_hlo.cc index 8db5d849322..b6e55a9322f 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/transform_unranked_hlo.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/transform_unranked_hlo.cc @@ -46,7 +46,6 @@ namespace { sep fn(ShiftLeftOp) sep fn(ShiftRightArithmeticOp) \ sep fn(ShiftRightLogicalOp) sep fn(SubOp) -// TODO(frgossen): Make it variadic. template inline void AddLegalOpOnRankedTensor(ConversionTarget *target) { target->addDynamicallyLegalOp([](OpTy op) { @@ -75,28 +74,24 @@ struct UnaryElementwiseOpConversion : public OpRewritePattern { // Generate IR to flatten the operand. auto loc = op.getLoc(); - Value shape = rewriter.create(loc, operand); - Value numElements = rewriter.create(loc, shape); - Value numElementsAsIndex = - rewriter.create(loc, numElements); - Value flatShapeAsDimTensor = - rewriter.create(loc, numElementsAsIndex); + Type extentTensorTy = shape::getExtentTensorType(rewriter.getContext()); + Value shape = + rewriter.create(loc, extentTensorTy, operand); + Type indexTy = rewriter.getIndexType(); + Value numElements = + rewriter.create(loc, indexTy, shape); + Value flatShape = rewriter.create(loc, numElements); auto flatTensorTy = RankedTensorType::get({ShapedType::kDynamicSize}, operandTy.getElementType()); Value flatOperand = rewriter.create( - loc, flatTensorTy, operand, flatShapeAsDimTensor); + loc, flatTensorTy, operand, flatShape); // Generate IR for the actual operation. Value flatResult = rewriter.create(loc, flatTensorTy, flatOperand); // Generate IR to restore the original shape. - auto extentTensorTy = RankedTensorType::get({ShapedType::kDynamicSize}, - rewriter.getIndexType()); - Value shapeAsExtentTensor = - rewriter.create(loc, extentTensorTy, shape); - Value result = rewriter.create( - loc, operandTy, flatResult, shapeAsExtentTensor); - rewriter.replaceOp(op, result); + rewriter.replaceOpWithNewOp(op, operandTy, + flatResult, shape); return success(); } @@ -122,17 +117,18 @@ struct BinaryElementwiseOpConversion : public OpRewritePattern { } // Flatten operands. - Type shapeTy = shape::ShapeType::get(rewriter.getContext()); auto loc = op.getLoc(); - Value shapeLhs = rewriter.create(loc, op.lhs()); - Value shapeRhs = rewriter.create(loc, op.rhs()); - Value shape = rewriter.create(loc, shapeTy, + Type extentTensorTy = shape::getExtentTensorType(rewriter.getContext()); + Value shapeLhs = + rewriter.create(loc, extentTensorTy, op.lhs()); + Value shapeRhs = + rewriter.create(loc, extentTensorTy, op.rhs()); + Value shape = rewriter.create(loc, extentTensorTy, ValueRange{shapeLhs, shapeRhs}); - Value numElements = rewriter.create(loc, shape); - Value numElementsAsIndex = - rewriter.create(loc, numElements); - Value flatShape = - rewriter.create(loc, numElementsAsIndex); + Type indexTy = rewriter.getIndexType(); + Value numElements = + rewriter.create(loc, indexTy, shape); + Value flatShape = rewriter.create(loc, numElements); TensorType lhsTy = op.lhs().getType().template cast(); Type flatLhsTy = RankedTensorType::get({ShapedType::kDynamicSize}, lhsTy.getElementType()); @@ -148,13 +144,8 @@ struct BinaryElementwiseOpConversion : public OpRewritePattern { Value flatResult = rewriter.create(loc, flatLhs, flatRhs); // Restore original shape. - auto extentTensorTy = RankedTensorType::get({ShapedType::kDynamicSize}, - rewriter.getIndexType()); - Value shapeAsExtentTensor = - rewriter.create(loc, extentTensorTy, shape); - Value result = rewriter.create( - loc, op.getType(), flatResult, shapeAsExtentTensor); - rewriter.replaceOp(op, result); + rewriter.replaceOpWithNewOp(op, op.getType(), flatResult, + shape); return success(); } diff --git a/tensorflow/compiler/mlir/hlo/tests/mhlo-transform-unranked.mlir b/tensorflow/compiler/mlir/hlo/tests/mhlo-transform-unranked.mlir index 6cc07e0460c..56a7cf7294c 100644 --- a/tensorflow/compiler/mlir/hlo/tests/mhlo-transform-unranked.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/mhlo-transform-unranked.mlir @@ -7,8 +7,7 @@ func @sqr_transform_result(%a: tensor<*xf32>) -> tensor<*xf32> { // Flatten operand shape. %shape = shape.shape_of %a : tensor<*xf32> -> tensor %num_elements = shape.num_elements %shape : tensor -> index - %num_elements_as_index = shape.size_to_index %num_elements : index - %flat_shape = tensor_from_elements(%num_elements_as_index) : tensor<1xindex> + %flat_shape = tensor_from_elements(%num_elements) : tensor<1xindex> %flat_a = "mhlo.dynamic_reshape"(%a, %flat_shape) : (tensor<*xf32>, tensor<1xindex>) -> tensor @@ -16,8 +15,7 @@ func @sqr_transform_result(%a: tensor<*xf32>) -> tensor<*xf32> { %flat_b = "mhlo.sqrt"(%flat_a) : (tensor) -> tensor // Restore original shape. - %shape_as_extent_tensor = shape.to_extent_tensor %shape : tensor -> tensor - %b = "mhlo.dynamic_reshape"(%flat_b, %shape_as_extent_tensor) + %b = "mhlo.dynamic_reshape"(%flat_b, %shape) : (tensor, tensor) -> tensor<*xf32> return %b : tensor<*xf32> @@ -29,14 +27,12 @@ func @sqr_transform_result(%a: tensor<*xf32>) -> tensor<*xf32> { // CHECK-LABEL: @sqrt // CHECK-SAME: (%[[A:.*]]: tensor<*xf32>) func @sqrt(%a: tensor<*xf32>) -> tensor<*xf32> { - // CHECK-NEXT: %[[SHAPE:.*]] = shape.shape_of %[[A]] : tensor<*xf32> + // CHECK-NEXT: %[[SHAPE:.*]] = shape.shape_of %[[A]] : tensor<*xf32> -> tensor // CHECK-NEXT: %[[NUM_ELEMENTS:.*]] = shape.num_elements %[[SHAPE]] - // CHECK-NEXT: %[[NUM_ELEMENTS_AS_INDEX:.*]] = shape.size_to_index %[[NUM_ELEMENTS]] - // CHECK-NEXT: %[[FLAT_SHAPE:.*]] = tensor_from_elements(%[[NUM_ELEMENTS_AS_INDEX]]) : tensor<1xindex> + // CHECK-NEXT: %[[FLAT_SHAPE:.*]] = tensor_from_elements(%[[NUM_ELEMENTS]]) : tensor<1xindex> // CHECK-NEXT: %[[FLAT_A:.*]] = "mhlo.dynamic_reshape"(%[[A]], %[[FLAT_SHAPE]]) : (tensor<*xf32>, tensor<1xindex>) -> tensor // CHECK-NEXT: %[[FLAT_B:.*]] = "mhlo.sqrt"(%[[FLAT_A]]) : (tensor) -> tensor - // CHECK-NEXT: %[[SHAPE_AS_EXTENT_TENSOR:.*]] = shape.to_extent_tensor %[[SHAPE]] : tensor - // CHECK-NEXT: %[[B:.*]] = "mhlo.dynamic_reshape"(%[[FLAT_B]], %[[SHAPE_AS_EXTENT_TENSOR]]) : (tensor, tensor) -> tensor<*xf32> + // CHECK-NEXT: %[[B:.*]] = "mhlo.dynamic_reshape"(%[[FLAT_B]], %[[SHAPE]]) : (tensor, tensor) -> tensor<*xf32> // CHECK-NEXT: return %[[B]] : tensor<*xf32> %b = "mhlo.sqrt"(%a) : (tensor<*xf32>) -> tensor<*xf32> return %b : tensor<*xf32> @@ -75,13 +71,11 @@ func @add_unranked(%a : tensor<*xf32>, %b : tensor<*xf32>) -> tensor<*xf32> { // CHECK: %[[SHAPE_B:.*]] = shape.shape_of %[[B]] // CHECK: %[[SHAPE:.*]] = "shape.any"(%[[SHAPE_A]], %[[SHAPE_B]]) // CHECK: %[[NUM_ELEMENTS:.*]] = shape.num_elements %[[SHAPE]] - // CHECK: %[[NUM_ELEMENTS_AS_INDEX:.*]] = shape.size_to_index %[[NUM_ELEMENTS]] - // CHECK: %[[FLAT_SHAPE:.*]] = tensor_from_elements(%[[NUM_ELEMENTS_AS_INDEX]]) : tensor<1xindex> + // CHECK: %[[FLAT_SHAPE:.*]] = tensor_from_elements(%[[NUM_ELEMENTS]]) : tensor<1xindex> // CHECK: %[[FLAT_A:.*]] = "mhlo.dynamic_reshape"(%[[A]], %[[FLAT_SHAPE]]) : (tensor<*xf32>, tensor<1xindex>) -> tensor // CHECK: %[[FLAT_B:.*]] = "mhlo.dynamic_reshape"(%[[B]], %[[FLAT_SHAPE]]) : (tensor<*xf32>, tensor<1xindex>) -> tensor // CHECK: %[[FLAT_RESULT:.*]] = mhlo.add %[[FLAT_A]], %[[FLAT_B]] : tensor - // CHECK: %[[SHAPE_AS_EXTENT_TENSOR:.*]] = shape.to_extent_tensor %[[SHAPE]] - // CHECK: %[[RESULT:.*]] = "mhlo.dynamic_reshape"(%[[FLAT_RESULT]], %[[SHAPE_AS_EXTENT_TENSOR]]) : (tensor, tensor) -> tensor<*xf32> + // CHECK: %[[RESULT:.*]] = "mhlo.dynamic_reshape"(%[[FLAT_RESULT]], %[[SHAPE]]) : (tensor, tensor) -> tensor<*xf32> // CHECK: return %[[RESULT]] : tensor<*xf32> %result = mhlo.add %a, %b : tensor<*xf32> return %result : tensor<*xf32> From 5b35196035427b1a5a20a00bad40b710b3a9f1be Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Wed, 5 Aug 2020 11:12:20 -0700 Subject: [PATCH 2173/2522] [tf.data] Fix internal usage of deprecated API. PiperOrigin-RevId: 325057857 Change-Id: Ic9118189276bdd79f19aa748711c6d59b59624c2 --- tensorflow/python/data/ops/iterator_ops.py | 9 +++++++++ tensorflow/python/data/ops/multi_device_iterator_ops.py | 6 ++---- tensorflow/python/distribute/distribute_lib.py | 3 +-- .../tools/api/golden/v1/tensorflow.data.-iterator.pbtxt | 4 ++++ 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/data/ops/iterator_ops.py b/tensorflow/python/data/ops/iterator_ops.py index 023cee88a5c..f6f2da0939e 100644 --- a/tensorflow/python/data/ops/iterator_ops.py +++ b/tensorflow/python/data/ops/iterator_ops.py @@ -432,6 +432,15 @@ class Iterator(trackable.Trackable): name=name) return structure.from_tensor_list(self._element_spec, flat_ret) + def get_next_as_optional(self): + # pylint: disable=protected-access + return optional_ops._OptionalImpl( + gen_dataset_ops.iterator_get_next_as_optional( + self._iterator_resource, + output_types=structure.get_flat_tensor_types(self.element_spec), + output_shapes=structure.get_flat_tensor_shapes( + self.element_spec)), self.element_spec) + def string_handle(self, name=None): """Returns a string-valued `tf.Tensor` that represents this iterator. diff --git a/tensorflow/python/data/ops/multi_device_iterator_ops.py b/tensorflow/python/data/ops/multi_device_iterator_ops.py index 7fa49a13fe6..187e1e988e8 100644 --- a/tensorflow/python/data/ops/multi_device_iterator_ops.py +++ b/tensorflow/python/data/ops/multi_device_iterator_ops.py @@ -335,8 +335,7 @@ class MultiDeviceIterator(object): result = [] for i, device in enumerate(self._devices): with ops.device(device): - result.append( - iterator_ops.get_next_as_optional(self._device_iterators[i])) + result.append(self._device_iterators[i].get_next_as_optional()) return result @property @@ -602,8 +601,7 @@ class OwnedMultiDeviceIterator(composite_tensor.CompositeTensor): result = [] for i, device in enumerate(self._devices): with ops.device(device): - result.append( - iterator_ops.get_next_as_optional(self._device_iterators[i])) + result.append(self._device_iterators[i].get_next_as_optional()) return result @property diff --git a/tensorflow/python/distribute/distribute_lib.py b/tensorflow/python/distribute/distribute_lib.py index 522849ac951..e593830f038 100644 --- a/tensorflow/python/distribute/distribute_lib.py +++ b/tensorflow/python/distribute/distribute_lib.py @@ -200,7 +200,6 @@ import six from tensorflow.python.autograph.core import ag_ctx as autograph_ctx from tensorflow.python.autograph.impl import api as autograph from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.data.ops import iterator_ops from tensorflow.python.distribute import collective_util from tensorflow.python.distribute import device_util from tensorflow.python.distribute import distribution_strategy_context @@ -3292,7 +3291,7 @@ class _DefaultDistributionExtended(StrategyExtendedV1): return self._iterator.get_next() def get_next_as_optional(self): - return iterator_ops.get_next_as_optional(self._iterator) + return self._iterator.get_next_as_optional() @deprecated(None, "Use the iterator's `initializer` property instead.") def initialize(self): diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-iterator.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-iterator.pbtxt index f276879275d..0c8af2ec6c9 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.data.-iterator.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-iterator.pbtxt @@ -39,6 +39,10 @@ tf_class { name: "get_next" argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "get_next_as_optional" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "make_initializer" argspec: "args=[\'self\', \'dataset\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " From 36b9f867011acc8c3655ab1ce5d7376a6ae45d51 Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Wed, 5 Aug 2020 11:13:16 -0700 Subject: [PATCH 2174/2522] fixit for VocabFileCatColumn PiperOrigin-RevId: 325058056 Change-Id: Ibebfa8c473dfc6bdad2a84ace8239dba5540c58c --- .../feature_column/feature_column_test.py | 598 +++++++++--------- 1 file changed, 295 insertions(+), 303 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index e598848282f..3a38f86b95f 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -2821,27 +2821,26 @@ class FunctionalInputLayerTest(test.TestCase): variables_lib.Variable) self.assertAllEqual(cols_to_vars[some_embedding_column][0].shape, [5, 10]) - @test_util.run_deprecated_v1 def test_fills_cols_to_vars_shared_embedding(self): # Provide 5 DenseColumn's to input_layer: a NumericColumn, a # BucketizedColumn, an EmbeddingColumn, two SharedEmbeddingColumns. The # EmbeddingColumn creates a Variable and the two SharedEmbeddingColumns # shared one variable. - price1 = fc._numeric_column('price1') - dense_feature = fc._numeric_column('dense_feature') - dense_feature_bucketized = fc._bucketized_column( - dense_feature, boundaries=[0.]) - some_sparse_column = fc._categorical_column_with_hash_bucket( - 'sparse_feature', hash_bucket_size=5) - some_embedding_column = fc._embedding_column( - some_sparse_column, dimension=10) - categorical_column_a = fc._categorical_column_with_identity( - key='aaa', num_buckets=3) - categorical_column_b = fc._categorical_column_with_identity( - key='bbb', num_buckets=3) - shared_embedding_a, shared_embedding_b = fc_new.shared_embedding_columns( - [categorical_column_a, categorical_column_b], dimension=2) with ops.Graph().as_default(): + price1 = fc._numeric_column('price1') + dense_feature = fc._numeric_column('dense_feature') + dense_feature_bucketized = fc._bucketized_column( + dense_feature, boundaries=[0.]) + some_sparse_column = fc._categorical_column_with_hash_bucket( + 'sparse_feature', hash_bucket_size=5) + some_embedding_column = fc._embedding_column( + some_sparse_column, dimension=10) + categorical_column_a = fc._categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc._categorical_column_with_identity( + key='bbb', num_buckets=3) + shared_embedding_a, shared_embedding_b = fc_new.shared_embedding_columns( + [categorical_column_a, categorical_column_b], dimension=2) features = { 'price1': [[3.], [4.]], 'dense_feature': [[-1.], [4.]], @@ -3019,18 +3018,17 @@ class FunctionalInputLayerTest(test.TestCase): expected_var_names, [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) - @test_util.run_deprecated_v1 def test_multiple_layers_with_same_shared_embedding_column(self): - categorical_column_a = fc._categorical_column_with_identity( - key='aaa', num_buckets=3) - categorical_column_b = fc._categorical_column_with_identity( - key='bbb', num_buckets=3) - embedding_dimension = 2 - embedding_column_b, embedding_column_a = fc_new.shared_embedding_columns( - [categorical_column_b, categorical_column_a], - dimension=embedding_dimension) - with ops.Graph().as_default(): + categorical_column_a = fc._categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc._categorical_column_with_identity( + key='bbb', num_buckets=3) + embedding_dimension = 2 + embedding_column_b, embedding_column_a = fc_new.shared_embedding_columns( + [categorical_column_b, categorical_column_a], + dimension=embedding_dimension) + features = { 'aaa': sparse_tensor.SparseTensor( @@ -3053,19 +3051,18 @@ class FunctionalInputLayerTest(test.TestCase): ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'], [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) - @test_util.run_deprecated_v1 def test_multiple_layers_with_same_shared_embedding_column_diff_graphs(self): - categorical_column_a = fc._categorical_column_with_identity( - key='aaa', num_buckets=3) - categorical_column_b = fc._categorical_column_with_identity( - key='bbb', num_buckets=3) - embedding_dimension = 2 - embedding_column_b, embedding_column_a = fc_new.shared_embedding_columns( - [categorical_column_b, categorical_column_a], - dimension=embedding_dimension) - all_cols = [embedding_column_a, embedding_column_b] - with ops.Graph().as_default(): + categorical_column_a = fc._categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc._categorical_column_with_identity( + key='bbb', num_buckets=3) + embedding_dimension = 2 + embedding_column_b, embedding_column_a = fc_new.shared_embedding_columns( + [categorical_column_b, categorical_column_a], + dimension=embedding_dimension) + all_cols = [embedding_column_a, embedding_column_b] + features = { 'aaa': sparse_tensor.SparseTensor( @@ -3105,56 +3102,56 @@ class FunctionalInputLayerTest(test.TestCase): ['input_layer/aaa_bbb_shared_embedding/embedding_weights:0'], [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) - @test_util.run_deprecated_v1 def test_with_1d_sparse_tensor(self): - embedding_values = ( - (1., 2., 3., 4., 5.), # id 0 - (6., 7., 8., 9., 10.), # id 1 - (11., 12., 13., 14., 15.) # id 2 - ) - def _initializer(shape, dtype, partition_info): - del shape, dtype, partition_info - return embedding_values + with ops.Graph().as_default(): + embedding_values = ( + (1., 2., 3., 4., 5.), # id 0 + (6., 7., 8., 9., 10.), # id 1 + (11., 12., 13., 14., 15.) # id 2 + ) + def _initializer(shape, dtype, partition_info): + del shape, dtype, partition_info + return embedding_values - # price has 1 dimension in input_layer - price = fc._numeric_column('price') + # price has 1 dimension in input_layer + price = fc._numeric_column('price') - # one_hot_body_style has 3 dims in input_layer. - body_style = fc._categorical_column_with_vocabulary_list( - 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) - one_hot_body_style = fc._indicator_column(body_style) + # one_hot_body_style has 3 dims in input_layer. + body_style = fc._categorical_column_with_vocabulary_list( + 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) + one_hot_body_style = fc._indicator_column(body_style) - # embedded_body_style has 5 dims in input_layer. - country = fc._categorical_column_with_vocabulary_list( - 'country', vocabulary_list=['US', 'JP', 'CA']) - embedded_country = fc._embedding_column( - country, dimension=5, initializer=_initializer) + # embedded_body_style has 5 dims in input_layer. + country = fc._categorical_column_with_vocabulary_list( + 'country', vocabulary_list=['US', 'JP', 'CA']) + embedded_country = fc._embedding_column( + country, dimension=5, initializer=_initializer) - # Provides 1-dim tensor and dense tensor. - features = { - 'price': constant_op.constant([11., 12.,]), - 'body-style': sparse_tensor.SparseTensor( - indices=((0,), (1,)), - values=('sedan', 'hardtop'), - dense_shape=(2,)), - # This is dense tensor for the categorical_column. - 'country': constant_op.constant(['CA', 'US']), - } - self.assertEqual(1, features['price'].shape.ndims) - self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0]) - self.assertEqual(1, features['country'].shape.ndims) + # Provides 1-dim tensor and dense tensor. + features = { + 'price': constant_op.constant([11., 12.,]), + 'body-style': sparse_tensor.SparseTensor( + indices=((0,), (1,)), + values=('sedan', 'hardtop'), + dense_shape=(2,)), + # This is dense tensor for the categorical_column. + 'country': constant_op.constant(['CA', 'US']), + } + self.assertEqual(1, features['price'].shape.ndims) + self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0]) + self.assertEqual(1, features['country'].shape.ndims) - net = fc.input_layer(features, - [price, one_hot_body_style, embedded_country]) - self.assertEqual(1 + 3 + 5, net.shape[1]) - with _initialized_session() as sess: + net = fc.input_layer(features, + [price, one_hot_body_style, embedded_country]) + self.assertEqual(1 + 3 + 5, net.shape[1]) + with _initialized_session(): - # Each row is formed by concatenating `embedded_body_style`, - # `one_hot_body_style`, and `price` in order. - self.assertAllEqual( - [[0., 0., 1., 11., 12., 13., 14., 15., 11.], - [1., 0., 0., 1., 2., 3., 4., 5., 12.]], - sess.run(net)) + # Each row is formed by concatenating `embedded_body_style`, + # `one_hot_body_style`, and `price` in order. + self.assertAllEqual( + [[0., 0., 1., 11., 12., 13., 14., 15., 11.], + [1., 0., 0., 1., 2., 3., 4., 5., 12.]], + self.evaluate(net)) @test_util.run_deprecated_v1 def test_with_1d_unknown_shape_sparse_tensor(self): @@ -3347,7 +3344,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): 'python/feature_column/testdata/wire_vocabulary.txt') self._wire_vocabulary_size = 3 - @test_util.run_deprecated_v1 def test_defaults(self): column = fc._categorical_column_with_vocabulary_file( key='aaa', vocabulary_file='path_to_file', vocabulary_size=3) @@ -3364,7 +3360,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): fc._categorical_column_with_vocabulary_file( key=('aaa',), vocabulary_file='path_to_file', vocabulary_size=3) - @test_util.run_deprecated_v1 def test_all_constructor_args(self): column = fc._categorical_column_with_vocabulary_file( key='aaa', @@ -3377,7 +3372,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): 'aaa': parsing_ops.VarLenFeature(dtypes.int32) }, column._parse_example_spec) - @test_util.run_deprecated_v1 def test_deep_copy(self): original = fc._categorical_column_with_vocabulary_file( key='aaa', @@ -3402,18 +3396,18 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): fc._categorical_column_with_vocabulary_file( key='aaa', vocabulary_file='', vocabulary_size=3) - @test_util.run_deprecated_v1 def test_invalid_vocabulary_file(self): - column = fc._categorical_column_with_vocabulary_file( - key='aaa', vocabulary_file='file_does_not_exist', vocabulary_size=10) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) - with self.assertRaisesRegex(errors.OpError, 'file_does_not_exist'): - with self.cached_session(): - lookup_ops.tables_initializer().run() + with ops.Graph().as_default(): + column = fc._categorical_column_with_vocabulary_file( + key='aaa', vocabulary_file='file_does_not_exist', vocabulary_size=10) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + with self.assertRaisesRegex(errors.OpError, 'file_does_not_exist'): + with self.cached_session(): + lookup_ops.tables_initializer().run() def test_invalid_vocabulary_size(self): with self.assertRaisesRegex(ValueError, 'Invalid vocabulary_size'): @@ -3427,20 +3421,20 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): vocabulary_file=self._wire_vocabulary_file_name, vocabulary_size=0) - @test_util.run_deprecated_v1 def test_too_large_vocabulary_size(self): - column = fc._categorical_column_with_vocabulary_file( - key='aaa', - vocabulary_file=self._wire_vocabulary_file_name, - vocabulary_size=self._wire_vocabulary_size + 1) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) - with self.assertRaisesRegex(errors.OpError, 'Invalid vocab_size'): - with self.cached_session(): - lookup_ops.tables_initializer().run() + with ops.Graph().as_default(): + column = fc._categorical_column_with_vocabulary_file( + key='aaa', + vocabulary_file=self._wire_vocabulary_file_name, + vocabulary_size=self._wire_vocabulary_size + 1) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + with self.assertRaisesRegex(errors.OpError, 'Invalid vocab_size'): + with self.cached_session(): + lookup_ops.tables_initializer().run() def test_invalid_num_oov_buckets(self): with self.assertRaisesRegex(ValueError, 'Invalid num_oov_buckets'): @@ -3517,64 +3511,64 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): dense_shape=[1, 2]), features['aaa'].eval()) - @test_util.run_deprecated_v1 def test_get_sparse_tensors(self): - column = fc._categorical_column_with_vocabulary_file( - key='aaa', - vocabulary_file=self._wire_vocabulary_file_name, - vocabulary_size=self._wire_vocabulary_size) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) - self.assertIsNone(id_weight_pair.weight_tensor) - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=inputs.indices, - values=np.array((2, -1, 0), dtype=np.int64), - dense_shape=inputs.dense_shape), - id_weight_pair.id_tensor.eval()) + with ops.Graph().as_default(): + column = fc._categorical_column_with_vocabulary_file( + key='aaa', + vocabulary_file=self._wire_vocabulary_file_name, + vocabulary_size=self._wire_vocabulary_size) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + self.assertIsNone(id_weight_pair.weight_tensor) + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=inputs.indices, + values=np.array((2, -1, 0), dtype=np.int64), + dense_shape=inputs.dense_shape), + id_weight_pair.id_tensor.eval()) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_none_vocabulary_size(self): - column = fc._categorical_column_with_vocabulary_file( - key='aaa', vocabulary_file=self._wire_vocabulary_file_name) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) - self.assertIsNone(id_weight_pair.weight_tensor) - with _initialized_session(): - _assert_sparse_tensor_value(self, - sparse_tensor.SparseTensorValue( - indices=inputs.indices, - values=np.array( - (2, -1, 0), dtype=np.int64), - dense_shape=inputs.dense_shape), - id_weight_pair.id_tensor.eval()) + with ops.Graph().as_default(): + column = fc._categorical_column_with_vocabulary_file( + key='aaa', vocabulary_file=self._wire_vocabulary_file_name) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + self.assertIsNone(id_weight_pair.weight_tensor) + with _initialized_session(): + _assert_sparse_tensor_value(self, + sparse_tensor.SparseTensorValue( + indices=inputs.indices, + values=np.array( + (2, -1, 0), dtype=np.int64), + dense_shape=inputs.dense_shape), + id_weight_pair.id_tensor.eval()) - @test_util.run_deprecated_v1 def test_transform_feature(self): - column = fc._categorical_column_with_vocabulary_file( - key='aaa', - vocabulary_file=self._wire_vocabulary_file_name, - vocabulary_size=self._wire_vocabulary_size) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - id_tensor = _transform_features({'aaa': inputs}, [column])[column] - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=inputs.indices, - values=np.array((2, -1, 0), dtype=np.int64), - dense_shape=inputs.dense_shape), self.evaluate(id_tensor)) + with ops.Graph().as_default(): + column = fc._categorical_column_with_vocabulary_file( + key='aaa', + vocabulary_file=self._wire_vocabulary_file_name, + vocabulary_size=self._wire_vocabulary_size) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + id_tensor = _transform_features({'aaa': inputs}, [column])[column] + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=inputs.indices, + values=np.array((2, -1, 0), dtype=np.int64), + dense_shape=inputs.dense_shape), self.evaluate(id_tensor)) def test_get_sparse_tensors_weight_collections(self): column = fc._categorical_column_with_vocabulary_file( @@ -3594,163 +3588,162 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)) self.assertCountEqual([], ops.get_collection('my_weights')) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_dense_input(self): - column = fc._categorical_column_with_vocabulary_file( - key='aaa', - vocabulary_file=self._wire_vocabulary_file_name, - vocabulary_size=self._wire_vocabulary_size) - id_weight_pair = column._get_sparse_tensors( - _LazyBuilder({ - 'aaa': (('marlo', ''), ('skywalker', 'omar')) - })) - self.assertIsNone(id_weight_pair.weight_tensor) - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=np.array((2, -1, 0), dtype=np.int64), - dense_shape=(2, 2)), - id_weight_pair.id_tensor.eval()) + with ops.Graph().as_default(): + column = fc._categorical_column_with_vocabulary_file( + key='aaa', + vocabulary_file=self._wire_vocabulary_file_name, + vocabulary_size=self._wire_vocabulary_size) + id_weight_pair = column._get_sparse_tensors( + _LazyBuilder({ + 'aaa': (('marlo', ''), ('skywalker', 'omar')) + })) + self.assertIsNone(id_weight_pair.weight_tensor) + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=np.array((2, -1, 0), dtype=np.int64), + dense_shape=(2, 2)), + id_weight_pair.id_tensor.eval()) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_default_value_in_vocabulary(self): - column = fc._categorical_column_with_vocabulary_file( - key='aaa', - vocabulary_file=self._wire_vocabulary_file_name, - vocabulary_size=self._wire_vocabulary_size, - default_value=2) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) - self.assertIsNone(id_weight_pair.weight_tensor) - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=inputs.indices, - values=np.array((2, 2, 0), dtype=np.int64), - dense_shape=inputs.dense_shape), - id_weight_pair.id_tensor.eval()) + with ops.Graph().as_default(): + column = fc._categorical_column_with_vocabulary_file( + key='aaa', + vocabulary_file=self._wire_vocabulary_file_name, + vocabulary_size=self._wire_vocabulary_size, + default_value=2) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + self.assertIsNone(id_weight_pair.weight_tensor) + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=inputs.indices, + values=np.array((2, 2, 0), dtype=np.int64), + dense_shape=inputs.dense_shape), + id_weight_pair.id_tensor.eval()) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_with_oov_buckets(self): - column = fc._categorical_column_with_vocabulary_file( - key='aaa', - vocabulary_file=self._wire_vocabulary_file_name, - vocabulary_size=self._wire_vocabulary_size, - num_oov_buckets=100) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1), (1, 2)), - values=('marlo', 'skywalker', 'omar', 'heisenberg'), - dense_shape=(2, 3)) - id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) - self.assertIsNone(id_weight_pair.weight_tensor) - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=inputs.indices, - values=np.array((2, 33, 0, 62), dtype=np.int64), - dense_shape=inputs.dense_shape), - id_weight_pair.id_tensor.eval()) + with ops.Graph().as_default(): + column = fc._categorical_column_with_vocabulary_file( + key='aaa', + vocabulary_file=self._wire_vocabulary_file_name, + vocabulary_size=self._wire_vocabulary_size, + num_oov_buckets=100) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1), (1, 2)), + values=('marlo', 'skywalker', 'omar', 'heisenberg'), + dense_shape=(2, 3)) + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + self.assertIsNone(id_weight_pair.weight_tensor) + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=inputs.indices, + values=np.array((2, 33, 0, 62), dtype=np.int64), + dense_shape=inputs.dense_shape), + id_weight_pair.id_tensor.eval()) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_small_vocabulary_size(self): - # 'marlo' is the last entry in our vocabulary file, so be setting - # `vocabulary_size` to 1 less than number of entries in file, we take - # 'marlo' out of the vocabulary. - column = fc._categorical_column_with_vocabulary_file( - key='aaa', - vocabulary_file=self._wire_vocabulary_file_name, - vocabulary_size=self._wire_vocabulary_size - 1) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) - self.assertIsNone(id_weight_pair.weight_tensor) - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=inputs.indices, - values=np.array((-1, -1, 0), dtype=np.int64), - dense_shape=inputs.dense_shape), - id_weight_pair.id_tensor.eval()) + with ops.Graph().as_default(): + # 'marlo' is the last entry in our vocabulary file, so be setting + # `vocabulary_size` to 1 less than number of entries in file, we take + # 'marlo' out of the vocabulary. + column = fc._categorical_column_with_vocabulary_file( + key='aaa', + vocabulary_file=self._wire_vocabulary_file_name, + vocabulary_size=self._wire_vocabulary_size - 1) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + self.assertIsNone(id_weight_pair.weight_tensor) + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=inputs.indices, + values=np.array((-1, -1, 0), dtype=np.int64), + dense_shape=inputs.dense_shape), + id_weight_pair.id_tensor.eval()) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_int32(self): - column = fc._categorical_column_with_vocabulary_file( - key='aaa', - vocabulary_file=self._warriors_vocabulary_file_name, - vocabulary_size=self._warriors_vocabulary_size, - dtype=dtypes.int32) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1), (2, 2)), - values=(11, 100, 30, 22), - dense_shape=(3, 3)) - id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) - self.assertIsNone(id_weight_pair.weight_tensor) - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=inputs.indices, - values=np.array((2, -1, 0, 4), dtype=np.int64), - dense_shape=inputs.dense_shape), - id_weight_pair.id_tensor.eval()) + with ops.Graph().as_default(): + column = fc._categorical_column_with_vocabulary_file( + key='aaa', + vocabulary_file=self._warriors_vocabulary_file_name, + vocabulary_size=self._warriors_vocabulary_size, + dtype=dtypes.int32) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1), (2, 2)), + values=(11, 100, 30, 22), + dense_shape=(3, 3)) + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + self.assertIsNone(id_weight_pair.weight_tensor) + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=inputs.indices, + values=np.array((2, -1, 0, 4), dtype=np.int64), + dense_shape=inputs.dense_shape), + id_weight_pair.id_tensor.eval()) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_int32_dense_input(self): - default_value = -100 - column = fc._categorical_column_with_vocabulary_file( - key='aaa', - vocabulary_file=self._warriors_vocabulary_file_name, - vocabulary_size=self._warriors_vocabulary_size, - dtype=dtypes.int32, - default_value=default_value) - id_weight_pair = column._get_sparse_tensors( - _LazyBuilder({ - 'aaa': ((11, -1, -1), (100, 30, -1), (-1, -1, 22)) - })) - self.assertIsNone(id_weight_pair.weight_tensor) - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1), (2, 2)), - values=np.array((2, default_value, 0, 4), dtype=np.int64), - dense_shape=(3, 3)), - id_weight_pair.id_tensor.eval()) + with ops.Graph().as_default(): + default_value = -100 + column = fc._categorical_column_with_vocabulary_file( + key='aaa', + vocabulary_file=self._warriors_vocabulary_file_name, + vocabulary_size=self._warriors_vocabulary_size, + dtype=dtypes.int32, + default_value=default_value) + id_weight_pair = column._get_sparse_tensors( + _LazyBuilder({ + 'aaa': ((11, -1, -1), (100, 30, -1), (-1, -1, 22)) + })) + self.assertIsNone(id_weight_pair.weight_tensor) + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1), (2, 2)), + values=np.array((2, default_value, 0, 4), dtype=np.int64), + dense_shape=(3, 3)), + id_weight_pair.id_tensor.eval()) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_int32_with_oov_buckets(self): - column = fc._categorical_column_with_vocabulary_file( - key='aaa', - vocabulary_file=self._warriors_vocabulary_file_name, - vocabulary_size=self._warriors_vocabulary_size, - dtype=dtypes.int32, - num_oov_buckets=100) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1), (2, 2)), - values=(11, 100, 30, 22), - dense_shape=(3, 3)) - id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) - self.assertIsNone(id_weight_pair.weight_tensor) - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=inputs.indices, - values=np.array((2, 60, 0, 4), dtype=np.int64), - dense_shape=inputs.dense_shape), - id_weight_pair.id_tensor.eval()) + with ops.Graph().as_default(): + column = fc._categorical_column_with_vocabulary_file( + key='aaa', + vocabulary_file=self._warriors_vocabulary_file_name, + vocabulary_size=self._warriors_vocabulary_size, + dtype=dtypes.int32, + num_oov_buckets=100) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1), (2, 2)), + values=(11, 100, 30, 22), + dense_shape=(3, 3)) + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + self.assertIsNone(id_weight_pair.weight_tensor) + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=inputs.indices, + values=np.array((2, 60, 0, 4), dtype=np.int64), + dense_shape=inputs.dense_shape), + id_weight_pair.id_tensor.eval()) - @test_util.run_deprecated_v1 def test_linear_model(self): wire_column = fc._categorical_column_with_vocabulary_file( key='wire', @@ -3777,7 +3770,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5 self.assertAllClose(((3.,), (5.,)), self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_keras_linear_model(self): wire_column = fc._categorical_column_with_vocabulary_file( key='wire', From bfb9bebd133101bb8eac3860d33109b0b32e22ea Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Wed, 5 Aug 2020 11:35:40 -0700 Subject: [PATCH 2175/2522] Add accidentally removed headers back --- tensorflow/core/common_runtime/constant_folding_test.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/core/common_runtime/constant_folding_test.cc b/tensorflow/core/common_runtime/constant_folding_test.cc index 2edc92eac5e..b38588f04fc 100644 --- a/tensorflow/core/common_runtime/constant_folding_test.cc +++ b/tensorflow/core/common_runtime/constant_folding_test.cc @@ -17,7 +17,9 @@ limitations under the License. #include #include #include +#include "tensorflow/core/common_runtime/constant_folding.h" +#include "tensorflow/cc/ops/array_ops_internal.h" #include "tensorflow/cc/ops/nn_ops.h" #include "tensorflow/cc/ops/sendrecv_ops.h" #include "tensorflow/cc/ops/standard_ops.h" From cf77a7186afb130f9e8d97b0fd90ac288e253291 Mon Sep 17 00:00:00 2001 From: Marissa Ikonomidis Date: Wed, 5 Aug 2020 11:29:13 -0700 Subject: [PATCH 2176/2522] Enable presubmit testing for the mlir bridge. These tests will run with both the new bridge and the old bridge. PiperOrigin-RevId: 325061701 Change-Id: Id8802c48ac1693aa5b801207bc3f87cd303f03c4 --- tensorflow/python/distribute/BUILD | 11 +++++++++++ tensorflow/python/distribute/integration_test/BUILD | 1 + tensorflow/python/keras/distribute/BUILD | 12 ++++++++++++ tensorflow/python/keras/integration_test/BUILD | 1 + tensorflow/python/keras/layers/preprocessing/BUILD | 3 +++ tensorflow/python/tpu/BUILD | 2 ++ 6 files changed, 30 insertions(+) diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index c1b0ee6ce23..7965be1d90f 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -851,6 +851,7 @@ py_library( distribute_py_test( name = "strategy_combinations_test", srcs = ["strategy_combinations_test.py"], + disable_mlir_bridge = False, python_version = "PY3", deps = [ ":combinations", @@ -905,6 +906,7 @@ cuda_py_test( distribute_py_test( name = "checkpointing_test", srcs = ["checkpointing_test.py"], + disable_mlir_bridge = False, main = "checkpointing_test.py", tags = [ "multi_and_single_gpu", @@ -920,6 +922,7 @@ distribute_py_test( distribute_py_test( name = "input_lib_test", srcs = ["input_lib_test.py"], + disable_mlir_bridge = False, main = "input_lib_test.py", shard_count = 10, tags = [ @@ -1116,6 +1119,7 @@ distribute_py_test( name = "values_test", size = "medium", srcs = ["values_test.py"], + disable_mlir_bridge = False, main = "values_test.py", shard_count = 5, tags = [ @@ -1173,6 +1177,7 @@ distribute_py_test( distribute_py_test( name = "distribute_utils_test", srcs = ["distribute_utils_test.py"], + disable_mlir_bridge = False, main = "distribute_utils_test.py", tags = [ "multi_and_single_gpu", @@ -1238,6 +1243,7 @@ distribute_py_test( name = "ps_values_test", size = "medium", srcs = ["ps_values_test.py"], + disable_mlir_bridge = False, main = "ps_values_test.py", tags = [ "multi_and_single_gpu", @@ -1257,6 +1263,7 @@ distribute_py_test( distribute_py_test( name = "moving_averages_test", srcs = ["moving_averages_test.py"], + disable_mlir_bridge = False, main = "moving_averages_test.py", deps = [ ":combinations", @@ -1274,6 +1281,7 @@ distribute_py_test( distribute_py_test( name = "custom_training_loop_gradient_test", srcs = ["custom_training_loop_gradient_test.py"], + disable_mlir_bridge = False, main = "custom_training_loop_gradient_test.py", tags = [ "multi_and_single_gpu", @@ -1472,6 +1480,7 @@ distribute_py_test( distribute_py_test( name = "zero_batch_test", srcs = ["zero_batch_test.py"], + disable_mlir_bridge = False, main = "zero_batch_test.py", deps = [ ":combinations", @@ -1671,6 +1680,7 @@ py_test( distribute_py_test( name = "strategy_common_test", srcs = ["strategy_common_test.py"], + disable_mlir_bridge = False, python_version = "PY3", shard_count = 2, tags = [ @@ -1703,6 +1713,7 @@ distribute_py_test( distribute_py_test( name = "tf_function_test", srcs = ["tf_function_test.py"], + disable_mlir_bridge = False, main = "tf_function_test.py", tags = [ "multi_and_single_gpu", diff --git a/tensorflow/python/distribute/integration_test/BUILD b/tensorflow/python/distribute/integration_test/BUILD index d997e64be05..156699fad7b 100644 --- a/tensorflow/python/distribute/integration_test/BUILD +++ b/tensorflow/python/distribute/integration_test/BUILD @@ -8,6 +8,7 @@ package( distribute_py_test( name = "saved_model_test", srcs = ["saved_model_test.py"], + disable_mlir_bridge = False, deps = [ "//tensorflow:tensorflow_py", "//tensorflow/python/distribute:combinations", diff --git a/tensorflow/python/keras/distribute/BUILD b/tensorflow/python/keras/distribute/BUILD index 56a6a9d0e1f..2a0421cf998 100644 --- a/tensorflow/python/keras/distribute/BUILD +++ b/tensorflow/python/keras/distribute/BUILD @@ -144,6 +144,7 @@ cuda_py_test( distribute_py_test( name = "checkpointing_test", srcs = ["checkpointing_test.py"], + disable_mlir_bridge = False, main = "checkpointing_test.py", tags = [ "multi_and_single_gpu", @@ -221,6 +222,7 @@ distribute_py_test( distribute_py_test( name = "custom_training_loop_metrics_test", srcs = ["custom_training_loop_metrics_test.py"], + disable_mlir_bridge = False, main = "custom_training_loop_metrics_test.py", tags = [ "multi_and_single_gpu", @@ -267,6 +269,7 @@ distribute_py_test( distribute_py_test( name = "custom_training_loop_optimizer_test", srcs = ["custom_training_loop_optimizer_test.py"], + disable_mlir_bridge = False, main = "custom_training_loop_optimizer_test.py", tags = [ "multi_and_single_gpu", @@ -311,6 +314,7 @@ py_library( distribute_py_test( name = "keras_premade_models_test", srcs = ["keras_premade_models_test.py"], + disable_mlir_bridge = False, full_precision = True, main = "keras_premade_models_test.py", shard_count = 4, @@ -347,6 +351,7 @@ distribute_py_test( distribute_py_test( name = "distributed_training_utils_test", srcs = ["distributed_training_utils_test.py"], + disable_mlir_bridge = False, full_precision = True, main = "distributed_training_utils_test.py", deps = [ @@ -389,6 +394,7 @@ distribute_py_test( name = "keras_dnn_correctness_test", size = "medium", srcs = ["keras_dnn_correctness_test.py"], + disable_mlir_bridge = False, full_precision = True, main = "keras_dnn_correctness_test.py", # Shard count is set to an odd number to distribute tasks across @@ -426,6 +432,7 @@ distribute_py_test( name = "keras_image_model_correctness_test", size = "medium", srcs = ["keras_image_model_correctness_test.py"], + disable_mlir_bridge = False, full_precision = True, main = "keras_image_model_correctness_test.py", shard_count = 16, @@ -444,6 +451,7 @@ distribute_py_test( distribute_py_test( name = "keras_metrics_test", srcs = ["keras_metrics_test.py"], + disable_mlir_bridge = False, main = "keras_metrics_test.py", tags = [ "multi_and_single_gpu", @@ -484,6 +492,7 @@ distribute_py_test( name = "keras_save_load_test", size = "medium", srcs = ["keras_save_load_test.py"], + disable_mlir_bridge = False, full_precision = True, main = "keras_save_load_test.py", shard_count = 7, @@ -500,6 +509,7 @@ distribute_py_test( name = "keras_stateful_lstm_model_correctness_test", size = "medium", srcs = ["keras_stateful_lstm_model_correctness_test.py"], + disable_mlir_bridge = False, full_precision = True, main = "keras_stateful_lstm_model_correctness_test.py", shard_count = 4, @@ -749,6 +759,7 @@ distribute_py_test( name = "saved_model_save_load_test", size = "medium", srcs = ["saved_model_save_load_test.py"], + disable_mlir_bridge = False, full_precision = True, main = "saved_model_save_load_test.py", shard_count = 7, @@ -766,6 +777,7 @@ distribute_py_test( name = "saved_model_mixed_api_test", size = "medium", srcs = ["saved_model_mixed_api_test.py"], + disable_mlir_bridge = False, full_precision = True, main = "saved_model_mixed_api_test.py", shard_count = 7, diff --git a/tensorflow/python/keras/integration_test/BUILD b/tensorflow/python/keras/integration_test/BUILD index b23dcc59b97..20e1a886d4e 100644 --- a/tensorflow/python/keras/integration_test/BUILD +++ b/tensorflow/python/keras/integration_test/BUILD @@ -97,6 +97,7 @@ tpu_py_test( name = "tpu_strategy_test", srcs = ["tpu_strategy_test.py"], disable_experimental = True, + disable_mlir_bridge = False, python_version = "PY3", tags = ["no_oss"], deps = [ diff --git a/tensorflow/python/keras/layers/preprocessing/BUILD b/tensorflow/python/keras/layers/preprocessing/BUILD index 1fa6deb8cd9..3e6624bac40 100644 --- a/tensorflow/python/keras/layers/preprocessing/BUILD +++ b/tensorflow/python/keras/layers/preprocessing/BUILD @@ -326,6 +326,7 @@ tf_py_test( distribute_py_test( name = "category_encoding_distribution_test", srcs = ["category_encoding_distribution_test.py"], + disable_mlir_bridge = False, main = "category_encoding_distribution_test.py", python_version = "PY3", tags = [ @@ -423,6 +424,7 @@ cuda_py_test( tpu_py_test( name = "hashing_distribution_test", srcs = ["hashing_distribution_test.py"], + disable_mlir_bridge = False, main = "hashing_distribution_test.py", python_version = "PY3", tags = ["multi_and_single_gpu"], @@ -453,6 +455,7 @@ tf_py_test( tpu_py_test( name = "index_lookup_distribution_test", srcs = ["index_lookup_distribution_test.py"], + disable_mlir_bridge = False, main = "index_lookup_distribution_test.py", python_version = "PY3", tags = ["no_oss"], diff --git a/tensorflow/python/tpu/BUILD b/tensorflow/python/tpu/BUILD index e1e71e62692..9cd75d1bed7 100644 --- a/tensorflow/python/tpu/BUILD +++ b/tensorflow/python/tpu/BUILD @@ -474,6 +474,7 @@ tpu_py_test( "tpu_embedding_v2_test.py", ], disable_experimental = True, + disable_mlir_bridge = False, python_version = "PY3", shard_count = 4, srcs_version = "PY2AND3", @@ -505,6 +506,7 @@ tpu_py_test( "tpu_embedding_v2_correctness_test.py", ], disable_experimental = True, + disable_mlir_bridge = False, python_version = "PY3", shard_count = 4, srcs_version = "PY2AND3", From ef0f08f5ddef020725f09499ca90812f93de3c9e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 5 Aug 2020 11:49:51 -0700 Subject: [PATCH 2177/2522] Added GatherNd and StridedSlice to the estimator. PiperOrigin-RevId: 325066413 Change-Id: I15ea90b8ad01d127d028756d7267193594bd15ac --- .../grappler/costs/op_level_cost_estimator.cc | 23 +++++--- .../costs/op_level_cost_estimator_test.cc | 53 ++++++++++++++----- 2 files changed, 56 insertions(+), 20 deletions(-) diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index ed86e92a2e7..d76ff4359c1 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -73,6 +73,7 @@ constexpr char kSize[] = "Size"; constexpr char kStopGradient[] = "StopGradient"; constexpr char kPreventGradient[] = "PreventGradient"; constexpr char kGather[] = "Gather"; +constexpr char kGatherNd[] = "GatherNd"; constexpr char kGatherV2[] = "GatherV2"; constexpr char kScatterAdd[] = "ScatterAdd"; constexpr char kScatterDiv[] = "ScatterDiv"; @@ -82,6 +83,7 @@ constexpr char kScatterMul[] = "ScatterMul"; constexpr char kScatterSub[] = "ScatterSub"; constexpr char kScatterUpdate[] = "ScatterUpdate"; constexpr char kSlice[] = "Slice"; +constexpr char kStridedSlice[] = "StridedSlice"; constexpr char kSpaceToDepth[] = "SpaceToDepth"; constexpr char kTranspose[] = "Transpose"; constexpr char kMaxPool[] = "MaxPool"; @@ -402,6 +404,8 @@ OpLevelCostEstimator::OpLevelCostEstimator() { device_cost_impl_.emplace(kGather, wrap(&OpLevelCostEstimator::PredictGatherOrSlice)); + device_cost_impl_.emplace(kGatherNd, + wrap(&OpLevelCostEstimator::PredictGatherOrSlice)); device_cost_impl_.emplace(kGatherV2, wrap(&OpLevelCostEstimator::PredictGatherOrSlice)); device_cost_impl_.emplace(kScatterAdd, @@ -421,6 +425,8 @@ OpLevelCostEstimator::OpLevelCostEstimator() { device_cost_impl_.emplace(kSlice, wrap(&OpLevelCostEstimator::PredictGatherOrSlice)); + device_cost_impl_.emplace(kStridedSlice, + wrap(&OpLevelCostEstimator::PredictGatherOrSlice)); device_cost_impl_.emplace(kPlaceholder, wrap(&OpLevelCostEstimator::PredictIdentity)); @@ -1799,15 +1805,20 @@ Costs OpLevelCostEstimator::PredictGatherOrSlice( const double output_size = CalculateOutputSize(op_info, &unknown_shapes); double input_size = output_size; + int begin_input_index = 1, end_input_index; if (op_info.op() == "Slice") { - // Add 'begin' & 'size' tensors sizes. - input_size += - CalculateTensorElementCount(op_info.inputs(1), &unknown_shapes) + - CalculateTensorElementCount(op_info.inputs(2), &unknown_shapes); + // Slice: 'input' (omitted), 'begin', 'size' + end_input_index = 3; + } else if (op_info.op() == "StridedSlice") { + // StridedSlice: 'input' (omitted), 'begin', 'end', 'strides' + end_input_index = 4; } else { - // Assuming this is "Gather" or "GatherV2" op, add 'indices' size. + // Gather, GatherV2, GatherNd: 'params' (omitted), 'indices' + end_input_index = 2; + } + for (int i = begin_input_index; i < end_input_index; ++i) { input_size += - CalculateTensorElementCount(op_info.inputs(1), &unknown_shapes); + CalculateTensorElementCount(op_info.inputs(i), &unknown_shapes); } Costs costs = diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc index 90f3e969df9..c5209753a90 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc @@ -641,22 +641,26 @@ TEST_F(OpLevelCostEstimatorTest, TestPersistentOpCosts) { } TEST_F(OpLevelCostEstimatorTest, TestGatherCosts) { - OpContext op_context; - SetCpuDevice(&op_context.op_info); - op_context.op_info.set_op("Gather"); + std::vector gather_ops = {"Gather", "GatherNd", "GatherV2"}; - // Huge first input shouldn't affect Gather execution and memory costs. - DescribeArbitraryRankInput({10000000, 10}, DT_FLOAT, &op_context.op_info); - DescribeArbitraryRankInput({16}, DT_INT64, &op_context.op_info); - DescribeArbitraryRankOutput({16, 10}, DT_FLOAT, &op_context.op_info); + for (const auto& op : gather_ops) { + OpContext op_context; + SetCpuDevice(&op_context.op_info); + op_context.op_info.set_op(op); - auto cost = estimator_.PredictCosts(op_context); - EXPECT_EQ(Costs::Duration(130), cost.memory_time); - EXPECT_EQ(Costs::Duration(16), cost.compute_time); - EXPECT_EQ(Costs::Duration(146), cost.execution_time); - EXPECT_EQ(1, cost.num_ops_total); - EXPECT_FALSE(cost.inaccurate); - EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); + // Huge first input shouldn't affect Gather execution and memory costs. + DescribeArbitraryRankInput({10000000, 10}, DT_FLOAT, &op_context.op_info); + DescribeArbitraryRankInput({16}, DT_INT64, &op_context.op_info); + DescribeArbitraryRankOutput({16, 10}, DT_FLOAT, &op_context.op_info); + + auto cost = estimator_.PredictCosts(op_context); + EXPECT_EQ(Costs::Duration(130), cost.memory_time); + EXPECT_EQ(Costs::Duration(16), cost.compute_time); + EXPECT_EQ(Costs::Duration(146), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); + EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); + } } TEST_F(OpLevelCostEstimatorTest, TestGatherCostsWithoutOutput) { @@ -697,6 +701,27 @@ TEST_F(OpLevelCostEstimatorTest, TestSliceCosts) { EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } +TEST_F(OpLevelCostEstimatorTest, TestStridedSliceCosts) { + OpContext op_context; + SetCpuDevice(&op_context.op_info); + op_context.op_info.set_op("StridedSlice"); + + // Huge first input shouldn't affect StridedSlice execution and memory costs. + DescribeArbitraryRankInput({10000000, 10}, DT_FLOAT, &op_context.op_info); + DescribeArbitraryRankInput({2}, DT_INT64, &op_context.op_info); + DescribeArbitraryRankInput({2}, DT_INT64, &op_context.op_info); + DescribeArbitraryRankInput({2}, DT_INT64, &op_context.op_info); + DescribeArbitraryRankOutput({10, 10}, DT_FLOAT, &op_context.op_info); + + auto cost = estimator_.PredictCosts(op_context); + EXPECT_EQ(Costs::Duration(81), cost.memory_time); + EXPECT_EQ(Costs::Duration(10), cost.compute_time); + EXPECT_EQ(Costs::Duration(91), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); + EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); +} + TEST_F(OpLevelCostEstimatorTest, TestScatterOps) { std::vector scatter_ops = {"ScatterAdd", "ScatterDiv", "ScatterMax", "ScatterMin", "ScatterMul", "ScatterSub", From bbc57503e629d9e11938c205d2ec945f643b50bd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 5 Aug 2020 11:58:54 -0700 Subject: [PATCH 2178/2522] Fix an indentations in TF_FloorDivOp and TF_FloorModOp. Since I'm learning TableGen, the old indenting looked like some kind of multi-result op. Also fix a spelling mistake. PiperOrigin-RevId: 325068361 Change-Id: Iab046c326a588b4f09c2410d127a5bd6a29f2fb2 --- .../mlir/xla/transforms/legalize_tf_patterns.td | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td index 0ef62deed7d..f0ad04c8246 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td @@ -128,7 +128,7 @@ def : Pat<(TF_FloorDivOp AnyRankedTensor:$l, AnyRankedTensor:$r), // return x / y; // } // -// BraodcastToDimensions is used to compute the broadcast attr to higher +// BroadcastToDimensions is used to compute the broadcast attr to higher // dimensions. This computes the broadcast of 'l' to broadcast('l', 'r') // without returning the broadcast of 'r' to broadcast('l', 'r'). // @@ -143,14 +143,14 @@ def : Pat<(TF_FloorDivOp AnyRankedTensor:$l, AnyRankedTensor:$r), (HLOClient_BroadcastCompareOp $r, (HLO_ConstOp (GetScalarOfType<0> $r)), (NullDenseIntElementsAttr), HLO_COMPARISON_DIRECTION_LT), (BinBroadcastDimensions $l, $r), HLO_COMPARISON_DIRECTION_EQ), - (HLOClient_BroadcastDivOp $l, $r, (BinBroadcastDimensions $l, $r)), - (HLOClient_BroadcastDivOp - (HLO_NegOp:$neg (HLOClient_BroadcastAddOp (HLO_AbsOp $l), + (HLOClient_BroadcastDivOp $l, $r, (BinBroadcastDimensions $l, $r)), + (HLOClient_BroadcastDivOp + (HLO_NegOp:$neg (HLOClient_BroadcastAddOp (HLO_AbsOp $l), (HLOClient_BroadcastSubOp (HLO_AbsOp $r), (HLO_ConstOp (GetScalarOfType<1> $r)), (NullDenseIntElementsAttr)), (BinBroadcastDimensions $l, $r))), - (HLO_AbsOp:$abs $r), (BinBroadcastDimensions $neg, $abs))), + (HLO_AbsOp:$abs $r), (BinBroadcastDimensions $neg, $abs))), [(SignedIntTensor $l)]>; // Performs a substitution of FloorMod designed to correct for possibly negative @@ -175,8 +175,8 @@ def : Pat<(TF_FloorModOp AnyRankedTensor:$l, AnyRankedTensor:$r), (BinBroadcastDimensions $rem, $r_zeros), HLO_COMPARISON_DIRECTION_LT), (BinBroadcastDimensions $r_cmp, $rem_cmp), HLO_COMPARISON_DIRECTION_NE), (NullDenseIntElementsAttr)), - (HLOClient_BroadcastAddOp $r, - $rem, (BinBroadcastDimensions $r, $rem)), $rem)>; + (HLOClient_BroadcastAddOp $r, + $rem, (BinBroadcastDimensions $r, $rem)), $rem)>; //===----------------------------------------------------------------------===// // Logical & bitwise binary op patterns. From ebf88156d0637675d209a9a7ba84f7ac86e5591c Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Wed, 5 Aug 2020 12:14:14 -0700 Subject: [PATCH 2179/2522] Pick up nightly script changes from master PiperOrigin-RevId: 325071992 Change-Id: I16d6a48729039001c7f3551a31053a9232d3b824 --- .../ci_build/rel/macos/cpu_libtensorflow.sh | 16 ++++++++++++--- .../tools/ci_build/rel/macos/cpu_py35_pip.sh | 4 +--- .../tools/ci_build/rel/macos/cpu_py36_pip.sh | 4 +--- .../tools/ci_build/rel/macos/cpu_py37_pip.sh | 4 +--- .../ci_build/rel/macos/cpu_py38_nonpip.sh | 0 .../tools/ci_build/rel/macos/cpu_py38_pip.sh | 4 +--- .../ci_build/rel/ubuntu/cpu_libtensorflow.sh | 0 .../ci_build/rel/ubuntu/cpu_py35_nonpip.sh | 0 .../tools/ci_build/rel/ubuntu/cpu_py35_pip.sh | 7 +------ .../ci_build/rel/ubuntu/cpu_py36_nonpip.sh | 0 .../tools/ci_build/rel/ubuntu/cpu_py36_pip.sh | 7 +------ .../ci_build/rel/ubuntu/cpu_py37_nonpip.sh | 0 .../tools/ci_build/rel/ubuntu/cpu_py37_pip.sh | 7 +------ .../ci_build/rel/ubuntu/cpu_py38_nonpip.sh | 0 .../tools/ci_build/rel/ubuntu/cpu_py38_pip.sh | 7 +------ .../ci_build/rel/ubuntu/gpu_libtensorflow.sh | 0 .../ci_build/rel/ubuntu/gpu_py35_nonpip.sh | 0 .../tools/ci_build/rel/ubuntu/gpu_py35_pip.sh | 20 +++---------------- .../ci_build/rel/ubuntu/gpu_py36_nonpip.sh | 0 .../tools/ci_build/rel/ubuntu/gpu_py36_pip.sh | 20 +++---------------- .../ci_build/rel/ubuntu/gpu_py37_nonpip.sh | 0 .../tools/ci_build/rel/ubuntu/gpu_py37_pip.sh | 20 +++---------------- .../ci_build/rel/ubuntu/gpu_py38_nonpip.sh | 0 .../tools/ci_build/rel/ubuntu/gpu_py38_pip.sh | 20 +++---------------- .../tools/ci_build/rel/ubuntu/sanity.sh | 0 .../rel/windows/cpu_libtensorflow.bat | 0 .../tools/ci_build/rel/windows/cpu_py35.bat | 2 +- .../tools/ci_build/rel/windows/cpu_py36.bat | 2 +- .../tools/ci_build/rel/windows/cpu_py37.bat | 2 +- .../tools/ci_build/rel/windows/cpu_py38.bat | 2 +- .../rel/windows/gpu_libtensorflow.bat | 0 .../ci_build/rel/windows/gpu_pip_on_cpu.bat | 0 .../tools/ci_build/rel/windows/gpu_py35.bat | 2 +- .../tools/ci_build/rel/windows/gpu_py36.bat | 2 +- .../tools/ci_build/rel/windows/gpu_py37.bat | 2 +- .../tools/ci_build/rel/windows/gpu_py38.bat | 2 +- 36 files changed, 41 insertions(+), 115 deletions(-) mode change 100755 => 100644 tensorflow/tools/ci_build/rel/macos/cpu_py38_nonpip.sh mode change 100755 => 100644 tensorflow/tools/ci_build/rel/ubuntu/cpu_libtensorflow.sh mode change 100755 => 100644 tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_nonpip.sh mode change 100755 => 100644 tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_pip.sh mode change 100755 => 100644 tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_nonpip.sh mode change 100755 => 100644 tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_pip.sh mode change 100755 => 100644 tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_nonpip.sh mode change 100755 => 100644 tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_pip.sh mode change 100755 => 100644 tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_nonpip.sh mode change 100755 => 100644 tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_pip.sh mode change 100755 => 100644 tensorflow/tools/ci_build/rel/ubuntu/gpu_libtensorflow.sh mode change 100755 => 100644 tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_nonpip.sh mode change 100755 => 100644 tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_pip.sh mode change 100755 => 100644 tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_nonpip.sh mode change 100755 => 100644 tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_pip.sh mode change 100755 => 100644 tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_nonpip.sh mode change 100755 => 100644 tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_pip.sh mode change 100755 => 100644 tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_nonpip.sh mode change 100755 => 100644 tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_pip.sh mode change 100755 => 100644 tensorflow/tools/ci_build/rel/ubuntu/sanity.sh mode change 100755 => 100644 tensorflow/tools/ci_build/rel/windows/cpu_libtensorflow.bat mode change 100755 => 100644 tensorflow/tools/ci_build/rel/windows/cpu_py35.bat mode change 100755 => 100644 tensorflow/tools/ci_build/rel/windows/cpu_py36.bat mode change 100755 => 100644 tensorflow/tools/ci_build/rel/windows/cpu_py37.bat mode change 100755 => 100644 tensorflow/tools/ci_build/rel/windows/cpu_py38.bat mode change 100755 => 100644 tensorflow/tools/ci_build/rel/windows/gpu_libtensorflow.bat mode change 100755 => 100644 tensorflow/tools/ci_build/rel/windows/gpu_pip_on_cpu.bat mode change 100755 => 100644 tensorflow/tools/ci_build/rel/windows/gpu_py35.bat mode change 100755 => 100644 tensorflow/tools/ci_build/rel/windows/gpu_py36.bat mode change 100755 => 100644 tensorflow/tools/ci_build/rel/windows/gpu_py37.bat mode change 100755 => 100644 tensorflow/tools/ci_build/rel/windows/gpu_py38.bat diff --git a/tensorflow/tools/ci_build/rel/macos/cpu_libtensorflow.sh b/tensorflow/tools/ci_build/rel/macos/cpu_libtensorflow.sh index ccc80e1bafd..3dfab5a2aaa 100644 --- a/tensorflow/tools/ci_build/rel/macos/cpu_libtensorflow.sh +++ b/tensorflow/tools/ci_build/rel/macos/cpu_libtensorflow.sh @@ -13,11 +13,21 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - -set -e -set -x +echo "chmod go+w lib_package/*" >> tensorflow/tools/ci_build/linux/libtensorflow.sh +echo "bazel clean --expunge" >> tensorflow/tools/ci_build/linux/libtensorflow.sh # Install latest bazel source tensorflow/tools/ci_build/release/common.sh install_bazelisk + +# Pick a version of xcode +export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer +sudo xcode-select -s "${DEVELOPER_DIR}" + +# Update the version string to nightly +./tensorflow/tools/ci_build/update_version.py --nightly + tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh + +# Copy the nightly version update script +cp tensorflow/tools/ci_build/builds/libtensorflow_nightly_symlink.sh lib_package diff --git a/tensorflow/tools/ci_build/rel/macos/cpu_py35_pip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py35_pip.sh index 3f31033b2ac..99c2a149394 100644 --- a/tensorflow/tools/ci_build/rel/macos/cpu_py35_pip.sh +++ b/tensorflow/tools/ci_build/rel/macos/cpu_py35_pip.sh @@ -33,13 +33,11 @@ export TF_PYTHON_VERSION='python3.5' export TF_BUILD_BOTH_CPU_PACKAGES=1 # Run configure. -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=opt --config=v2" +export TF_BUILD_FLAGS="--config=release_cpu_macos" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" export TF_TEST_TARGETS="//tensorflow/python/..." export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" diff --git a/tensorflow/tools/ci_build/rel/macos/cpu_py36_pip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py36_pip.sh index 26ee4ea8edb..375a8c705fa 100644 --- a/tensorflow/tools/ci_build/rel/macos/cpu_py36_pip.sh +++ b/tensorflow/tools/ci_build/rel/macos/cpu_py36_pip.sh @@ -33,13 +33,11 @@ export TF_PYTHON_VERSION='python3.6' export TF_BUILD_BOTH_CPU_PACKAGES=1 # Run configure. -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=opt --config=v2" +export TF_BUILD_FLAGS="--config=release_cpu_macos" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" export TF_TEST_TARGETS="//tensorflow/python/..." export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" diff --git a/tensorflow/tools/ci_build/rel/macos/cpu_py37_pip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py37_pip.sh index ed577db961a..ea6779be698 100644 --- a/tensorflow/tools/ci_build/rel/macos/cpu_py37_pip.sh +++ b/tensorflow/tools/ci_build/rel/macos/cpu_py37_pip.sh @@ -33,13 +33,11 @@ export TF_PYTHON_VERSION='python3.7' export TF_BUILD_BOTH_CPU_PACKAGES=1 # Run configure. -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=opt --config=v2" +export TF_BUILD_FLAGS="--config=release_cpu_macos" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" export TF_TEST_TARGETS="//tensorflow/python/..." export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" diff --git a/tensorflow/tools/ci_build/rel/macos/cpu_py38_nonpip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py38_nonpip.sh old mode 100755 new mode 100644 diff --git a/tensorflow/tools/ci_build/rel/macos/cpu_py38_pip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py38_pip.sh index f8eda5a7520..f0ef8e89766 100644 --- a/tensorflow/tools/ci_build/rel/macos/cpu_py38_pip.sh +++ b/tensorflow/tools/ci_build/rel/macos/cpu_py38_pip.sh @@ -33,13 +33,11 @@ export TF_PYTHON_VERSION='python3.8' export TF_BUILD_BOTH_CPU_PACKAGES=1 # Run configure. -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=opt --config=v2" +export TF_BUILD_FLAGS="--config=release_cpu_macos" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" export TF_TEST_TARGETS="//tensorflow/python/..." export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_libtensorflow.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_libtensorflow.sh old mode 100755 new mode 100644 diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_nonpip.sh old mode 100755 new mode 100644 diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_pip.sh old mode 100755 new mode 100644 index b938ed2fde1..bdbb7f15e34 --- a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_pip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_pip.sh @@ -28,11 +28,6 @@ export CONTAINER_TYPE="CPU" export TF_PYTHON_VERSION='python3.5' # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py @@ -40,7 +35,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=opt --config=v2 --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain" +export TF_BUILD_FLAGS="--config=release_cpu_linux" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_nonpip.sh old mode 100755 new mode 100644 diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_pip.sh old mode 100755 new mode 100644 index 44ae820c507..6277291043c --- a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_pip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_pip.sh @@ -28,11 +28,6 @@ export CONTAINER_TYPE="CPU" export TF_PYTHON_VERSION='python3.6' # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py @@ -40,7 +35,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=opt --config=v2 --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain" +export TF_BUILD_FLAGS="--config=release_cpu_linux" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_nonpip.sh old mode 100755 new mode 100644 diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_pip.sh old mode 100755 new mode 100644 index 28784f9f499..ff88ae46f39 --- a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_pip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_pip.sh @@ -28,11 +28,6 @@ export CONTAINER_TYPE="CPU" export TF_PYTHON_VERSION='python3.7' # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py @@ -40,7 +35,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=opt --config=v2 --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain" +export TF_BUILD_FLAGS="--config=release_cpu_linux" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_nonpip.sh old mode 100755 new mode 100644 diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_pip.sh old mode 100755 new mode 100644 index ace3257479a..52872cfd0a6 --- a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_pip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py38_pip.sh @@ -28,11 +28,6 @@ export CONTAINER_TYPE="CPU" export TF_PYTHON_VERSION='python3.8' # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) yes "" | "$PYTHON_BIN_PATH" configure.py @@ -40,7 +35,7 @@ yes "" | "$PYTHON_BIN_PATH" configure.py source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh -export TF_BUILD_FLAGS="--config=opt --config=v2 --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain" +export TF_BUILD_FLAGS="--config=release_cpu_linux" export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_libtensorflow.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_libtensorflow.sh old mode 100755 new mode 100644 diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_nonpip.sh old mode 100755 new mode 100644 diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_pip.sh old mode 100755 new mode 100644 index 5b0ee602cfa..2a5c550890b --- a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_pip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_pip.sh @@ -28,20 +28,7 @@ export CONTAINER_TYPE="GPU" export TF_PYTHON_VERSION='python3.5' # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=1 -export TF_CUDA_VERSION=10 -export TF_CUDNN_VERSION=7 -export TF_NEED_TENSORRT=1 -export TENSORRT_INSTALL_PATH=/usr/local/tensorrt -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) -export PROJECT_NAME="tensorflow_gpu" -export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 - yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. @@ -49,18 +36,17 @@ source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py35' -export TF_BUILD_FLAGS="--config=opt --config=v2 --config=cuda --distinct_host_configuration=false \ ---action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain " +export TF_BUILD_FLAGS="--config=release_gpu_linux " export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filters=${TF_TEST_FILTER_TAGS} \ --distinct_host_configuration=false \ ---action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --test_env=TF2_BEHAVIOR=1 \ +--action_env=TF_CUDA_VERSION=10 --action_env=TF_CUDNN_VERSION=7 --test_env=TF2_BEHAVIOR=1 \ --config=cuda --test_output=errors --local_test_jobs=4 --test_lang_filters=py \ --verbose_failures=true --keep_going --define=no_tensorflow_py_deps=true \ --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute " export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" #export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. -export TF_PROJECT_NAME=${PROJECT_NAME} +export TF_PROJECT_NAME="tensorflow_gpu" export TF_PIP_TEST_ROOT="pip_test" # To build both tensorflow and tensorflow-gpu pip packages diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_nonpip.sh old mode 100755 new mode 100644 diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_pip.sh old mode 100755 new mode 100644 index 3223149f5a4..9aa724c27b9 --- a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_pip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_pip.sh @@ -28,20 +28,7 @@ export CONTAINER_TYPE="GPU" export TF_PYTHON_VERSION='python3.6' # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=1 -export TF_CUDA_VERSION=10 -export TF_CUDNN_VERSION=7 -export TF_NEED_TENSORRT=1 -export TENSORRT_INSTALL_PATH=/usr/local/tensorrt -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) -export PROJECT_NAME="tensorflow_gpu" -export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 - yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. @@ -49,18 +36,17 @@ source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py36' -export TF_BUILD_FLAGS="--config=opt --config=v2 --config=cuda --distinct_host_configuration=false \ ---action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain " +export TF_BUILD_FLAGS="--config=release_gpu_linux " export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filters=${TF_TEST_FILTER_TAGS} \ --distinct_host_configuration=false \ ---action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --test_env=TF2_BEHAVIOR=1 \ +--action_env=TF_CUDA_VERSION=10 --action_env=TF_CUDNN_VERSION=7 --test_env=TF2_BEHAVIOR=1 \ --config=cuda --test_output=errors --local_test_jobs=4 --test_lang_filters=py \ --verbose_failures=true --keep_going --define=no_tensorflow_py_deps=true \ --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute " export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" #export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. -export TF_PROJECT_NAME=${PROJECT_NAME} +export TF_PROJECT_NAME=="tensorflow_gpu" export TF_PIP_TEST_ROOT="pip_test" # To build both tensorflow and tensorflow-gpu pip packages diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_nonpip.sh old mode 100755 new mode 100644 diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_pip.sh old mode 100755 new mode 100644 index 5dfffbe3fe1..9bfc6608a0b --- a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_pip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_pip.sh @@ -28,20 +28,7 @@ export CONTAINER_TYPE="GPU" export TF_PYTHON_VERSION='python3.7' # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=1 -export TF_CUDA_VERSION=10 -export TF_CUDNN_VERSION=7 -export TF_NEED_TENSORRT=1 -export TENSORRT_INSTALL_PATH=/usr/local/tensorrt -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) -export PROJECT_NAME="tensorflow_gpu" -export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 - yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. @@ -49,18 +36,17 @@ source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py37' -export TF_BUILD_FLAGS="--config=opt --config=v2 --config=cuda --distinct_host_configuration=false \ ---action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain " +export TF_BUILD_FLAGS="--config=release_gpu_linux " export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filters=${TF_TEST_FILTER_TAGS} \ --distinct_host_configuration=false \ ---action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --test_env=TF2_BEHAVIOR=1 \ +--action_env=TF_CUDA_VERSION=10 --action_env=TF_CUDNN_VERSION=7 --test_env=TF2_BEHAVIOR=1 \ --config=cuda --test_output=errors --local_test_jobs=4 --test_lang_filters=py \ --verbose_failures=true --keep_going --define=no_tensorflow_py_deps=true \ --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute " export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" #export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. -export TF_PROJECT_NAME=${PROJECT_NAME} +export TF_PROJECT_NAME=="tensorflow_gpu" export TF_PIP_TEST_ROOT="pip_test" # To build both tensorflow and tensorflow-gpu pip packages diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_nonpip.sh old mode 100755 new mode 100644 diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_pip.sh old mode 100755 new mode 100644 index cc0a5254607..d8838e7704a --- a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_pip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py38_pip.sh @@ -28,20 +28,7 @@ export CONTAINER_TYPE="GPU" export TF_PYTHON_VERSION='python3.8' # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=1 -export TF_CUDA_VERSION=10 -export TF_CUDNN_VERSION=7 -export TF_NEED_TENSORRT=1 -export TENSORRT_INSTALL_PATH=/usr/local/tensorrt -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) -export PROJECT_NAME="tensorflow_gpu" -export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 - yes "" | "$PYTHON_BIN_PATH" configure.py # Get the default test targets for bazel. @@ -49,18 +36,17 @@ source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh # Export optional variables for running pip.sh export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py38' -export TF_BUILD_FLAGS="--config=opt --config=v2 --config=cuda --distinct_host_configuration=false \ ---action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain " +export TF_BUILD_FLAGS="--config=release_gpu_linux " export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filters=${TF_TEST_FILTER_TAGS} \ --distinct_host_configuration=false \ ---action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --test_env=TF2_BEHAVIOR=1 \ +--action_env=TF_CUDA_VERSION=10 --action_env=TF_CUDNN_VERSION=7 --test_env=TF2_BEHAVIOR=1 \ --config=cuda --test_output=errors --local_test_jobs=4 --test_lang_filters=py \ --verbose_failures=true --keep_going --define=no_tensorflow_py_deps=true \ --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute " export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" #export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. -export TF_PROJECT_NAME=${PROJECT_NAME} +export TF_PROJECT_NAME=="tensorflow_gpu" export TF_PIP_TEST_ROOT="pip_test" # To build both tensorflow and tensorflow-gpu pip packages diff --git a/tensorflow/tools/ci_build/rel/ubuntu/sanity.sh b/tensorflow/tools/ci_build/rel/ubuntu/sanity.sh old mode 100755 new mode 100644 diff --git a/tensorflow/tools/ci_build/rel/windows/cpu_libtensorflow.bat b/tensorflow/tools/ci_build/rel/windows/cpu_libtensorflow.bat old mode 100755 new mode 100644 diff --git a/tensorflow/tools/ci_build/rel/windows/cpu_py35.bat b/tensorflow/tools/ci_build/rel/windows/cpu_py35.bat old mode 100755 new mode 100644 index 02b12c7650a..175917d7cad --- a/tensorflow/tools/ci_build/rel/windows/cpu_py35.bat +++ b/tensorflow/tools/ci_build/rel/windows/cpu_py35.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python35 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2 --define=no_tensorflow_py_deps=true" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" diff --git a/tensorflow/tools/ci_build/rel/windows/cpu_py36.bat b/tensorflow/tools/ci_build/rel/windows/cpu_py36.bat old mode 100755 new mode 100644 index e44e6ca6e18..85b75053eff --- a/tensorflow/tools/ci_build/rel/windows/cpu_py36.bat +++ b/tensorflow/tools/ci_build/rel/windows/cpu_py36.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python36 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2 --define=no_tensorflow_py_deps=true" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" diff --git a/tensorflow/tools/ci_build/rel/windows/cpu_py37.bat b/tensorflow/tools/ci_build/rel/windows/cpu_py37.bat old mode 100755 new mode 100644 index c65167a5dc6..d8a6673ba4c --- a/tensorflow/tools/ci_build/rel/windows/cpu_py37.bat +++ b/tensorflow/tools/ci_build/rel/windows/cpu_py37.bat @@ -17,4 +17,4 @@ SET PYTHON_DIRECTORY=Python37 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2 --define=no_tensorflow_py_deps=true" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" diff --git a/tensorflow/tools/ci_build/rel/windows/cpu_py38.bat b/tensorflow/tools/ci_build/rel/windows/cpu_py38.bat old mode 100755 new mode 100644 index 06599fc0d8c..86adcda0bb9 --- a/tensorflow/tools/ci_build/rel/windows/cpu_py38.bat +++ b/tensorflow/tools/ci_build/rel/windows/cpu_py38.bat @@ -17,5 +17,5 @@ SET PYTHON_DIRECTORY=Python38 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2 --define=no_tensorflow_py_deps=true" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" diff --git a/tensorflow/tools/ci_build/rel/windows/gpu_libtensorflow.bat b/tensorflow/tools/ci_build/rel/windows/gpu_libtensorflow.bat old mode 100755 new mode 100644 diff --git a/tensorflow/tools/ci_build/rel/windows/gpu_pip_on_cpu.bat b/tensorflow/tools/ci_build/rel/windows/gpu_pip_on_cpu.bat old mode 100755 new mode 100644 diff --git a/tensorflow/tools/ci_build/rel/windows/gpu_py35.bat b/tensorflow/tools/ci_build/rel/windows/gpu_py35.bat old mode 100755 new mode 100644 index cba62225bee..86c118b2f83 --- a/tensorflow/tools/ci_build/rel/windows/gpu_py35.bat +++ b/tensorflow/tools/ci_build/rel/windows/gpu_py35.bat @@ -17,7 +17,7 @@ SET PYTHON_DIRECTORY=Python35 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa" bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh diff --git a/tensorflow/tools/ci_build/rel/windows/gpu_py36.bat b/tensorflow/tools/ci_build/rel/windows/gpu_py36.bat old mode 100755 new mode 100644 index ede8bd35f52..cc4f84afbee --- a/tensorflow/tools/ci_build/rel/windows/gpu_py36.bat +++ b/tensorflow/tools/ci_build/rel/windows/gpu_py36.bat @@ -17,7 +17,7 @@ SET PYTHON_DIRECTORY=Python36 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa" bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh \ No newline at end of file diff --git a/tensorflow/tools/ci_build/rel/windows/gpu_py37.bat b/tensorflow/tools/ci_build/rel/windows/gpu_py37.bat old mode 100755 new mode 100644 index 7509270fc43..5fa798e3eb8 --- a/tensorflow/tools/ci_build/rel/windows/gpu_py37.bat +++ b/tensorflow/tools/ci_build/rel/windows/gpu_py37.bat @@ -17,7 +17,7 @@ SET PYTHON_DIRECTORY=Python37 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa" bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh \ No newline at end of file diff --git a/tensorflow/tools/ci_build/rel/windows/gpu_py38.bat b/tensorflow/tools/ci_build/rel/windows/gpu_py38.bat old mode 100755 new mode 100644 index fc1c600fa5e..fa1fc131145 --- a/tensorflow/tools/ci_build/rel/windows/gpu_py38.bat +++ b/tensorflow/tools/ci_build/rel/windows/gpu_py38.bat @@ -17,7 +17,7 @@ SET PYTHON_DIRECTORY=Python38 CALL tensorflow\tools\ci_build\release\common_win.bat -call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa" bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh From c2e594440e1d9839546b93a93d8646b06891d7de Mon Sep 17 00:00:00 2001 From: Dave Moore Date: Wed, 5 Aug 2020 12:25:43 -0700 Subject: [PATCH 2180/2522] Add support for broadcasting to `tf.vectorized_map`. PiperOrigin-RevId: 325075594 Change-Id: I0ed84eaaa230b62cf2be230d96101082ead81aab --- tensorflow/python/ops/parallel_for/BUILD | 1 + .../ops/parallel_for/control_flow_ops.py | 40 ++++++++++++++----- .../ops/parallel_for/control_flow_ops_test.py | 21 ++++++++++ 3 files changed, 53 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/ops/parallel_for/BUILD b/tensorflow/python/ops/parallel_for/BUILD index 2f3f7309395..b189ac57bb9 100644 --- a/tensorflow/python/ops/parallel_for/BUILD +++ b/tensorflow/python/ops/parallel_for/BUILD @@ -85,6 +85,7 @@ py_library( "//tensorflow/python:framework_ops", "//tensorflow/python:math_ops", "//tensorflow/python:tensor_array_ops", + "//tensorflow/python:tensor_shape", "//tensorflow/python:tensor_util", "//tensorflow/python:util", "//tensorflow/python/eager:context", diff --git a/tensorflow/python/ops/parallel_for/control_flow_ops.py b/tensorflow/python/ops/parallel_for/control_flow_ops.py index deb41873347..e7a5c38381e 100644 --- a/tensorflow/python/ops/parallel_for/control_flow_ops.py +++ b/tensorflow/python/ops/parallel_for/control_flow_ops.py @@ -26,6 +26,7 @@ from tensorflow.python.eager import def_function from tensorflow.python.framework import indexed_slices from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops @@ -349,11 +350,20 @@ def _pfor_impl(loop_fn, return nest.pack_sequence_as(loop_fn_outputs, nest.flatten(outputs)) +def _broadcasting_gather(x, i): + """Wrapper for gather that implicitly broadcasts unit dimensions.""" + static_first_dim = tensor_shape.dimension_value(x.shape[0]) + if static_first_dim == 1: + i = 0 + elif static_first_dim is None: + i = array_ops.where_v2(array_ops.shape(x)[0] > 1, i, 0) + return array_ops.gather(x, i) + + @tf_export("vectorized_map") def vectorized_map(fn, elems, fallback_to_while_loop=True): """Parallel map on the list of tensors unpacked from `elems` on dimension 0. - This method works similar to `tf.map_fn` but is optimized to run much faster, possibly with a much larger memory footprint. The speedups are obtained by vectorization (see [Auto-Vectorizing TensorFlow Graphs: Jacobians, @@ -420,7 +430,10 @@ def vectorized_map(fn, elems, fallback_to_while_loop=True): the structure of `elems`. elems: A tensor or (possibly nested) sequence of tensors, each of which will be unpacked along their first dimension. The nested sequence of the - resulting slices will be mapped over by `fn`. + resulting slices will be mapped over by `fn`. The first dimensions of all + elements must broadcast to a consistent value; equivalently, each + element tensor must have first dimension of either `B` or `1`, for some + common batch size `B >= 1`. fallback_to_while_loop: If true, on failing to vectorize an operation, the unsupported op is wrapped in a tf.while_loop to execute the map iterations. Note that this fallback only happens for unsupported ops and @@ -437,14 +450,23 @@ def vectorized_map(fn, elems, fallback_to_while_loop=True): Raises: ValueError: If vectorization fails and fallback_to_while_loop is False. """ + elems = nest.map_structure(ops.convert_to_tensor, elems) + def loop_fn(i): - gathered_elems = nest.map_structure(lambda x: array_ops.gather(x, i), elems) + gathered_elems = nest.map_structure(lambda x: _broadcasting_gather(x, i), + elems) return fn(gathered_elems) - batch_size = None - first_elem = ops.convert_to_tensor(nest.flatten(elems)[0]) - if first_elem.shape.rank is not None: - batch_size = first_elem.shape.as_list()[0] - if batch_size is None: - batch_size = array_ops.shape(first_elem)[0] + + # Extract batch size from the maximum first dimension of any element. + flat_elems = nest.flatten(elems) + static_first_dims = [elem.shape.as_list()[0] + if elem.shape.rank is not None else None + for elem in flat_elems] + if any([s is None for s in static_first_dims]): + batch_size = math_ops.reduce_max( + [array_ops.shape(elem)[0] for elem in flat_elems]) + else: + batch_size = max(static_first_dims) + return pfor(loop_fn, batch_size, fallback_to_while_loop=fallback_to_while_loop) diff --git a/tensorflow/python/ops/parallel_for/control_flow_ops_test.py b/tensorflow/python/ops/parallel_for/control_flow_ops_test.py index f8e4e4762ac..fe3d5f55d4e 100644 --- a/tensorflow/python/ops/parallel_for/control_flow_ops_test.py +++ b/tensorflow/python/ops/parallel_for/control_flow_ops_test.py @@ -132,6 +132,27 @@ class PForTest(PForTestCase): result = pfor_control_flow_ops.vectorized_map(compute, x) self.run_and_assert_equal(result, array_ops.ones((10, 1, 3))) + def test_vectorized_map_broadcasts_unit_dimensions(self): + convert_with_static_shape = ops.convert_to_tensor + convert_with_dynamic_shape = ( + lambda x: array_ops.placeholder_with_default(x, shape=None)) + + for convert in (convert_with_static_shape, convert_with_dynamic_shape): + a = convert([3.1]) + b = convert([-2., 6., 9.]) + + # One elem with leading unit dimension. + a_plus_1 = pfor_control_flow_ops.vectorized_map(lambda a: a + 1, a) + self.assertAllEqual(*self.evaluate((a_plus_1, a + 1))) + + # Two elems, both with leading unit dimension. + a_plus_a = pfor_control_flow_ops.vectorized_map(sum, (a, a)) + self.assertAllEqual(*self.evaluate((a_plus_a, a + a))) + + # Elem w/ unit dimension broadcast against elem with batch dim. + a_plus_b = pfor_control_flow_ops.vectorized_map(sum, (a, b)) + self.assertAllEqual(*self.evaluate((a_plus_b, a + b))) + def test_vectorized_map_example_1(self): def outer_product(a): From 1ae71606d07288853b6ae5c7bed507a6a317a2a2 Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Wed, 5 Aug 2020 12:42:11 -0700 Subject: [PATCH 2181/2522] fixit for server_lib container test. PiperOrigin-RevId: 325079700 Change-Id: Iad7abfb5493c568cd31bc9a88886aa04ef9ddcd6 --- ...lib_same_variables_clear_container_test.py | 56 +++++++++---------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/tensorflow/python/training/server_lib_same_variables_clear_container_test.py b/tensorflow/python/training/server_lib_same_variables_clear_container_test.py index e0ab21bbd97..f6b041b2907 100644 --- a/tensorflow/python/training/server_lib_same_variables_clear_container_test.py +++ b/tensorflow/python/training/server_lib_same_variables_clear_container_test.py @@ -20,7 +20,7 @@ from __future__ import print_function from tensorflow.python.client import session from tensorflow.python.framework import errors_impl -from tensorflow.python.framework import test_util +from tensorflow.python.framework import ops from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.training import server_lib @@ -33,7 +33,6 @@ class SameVariablesClearContainerTest(test.TestCase): # TODO(b/34465411): Starting multiple servers with different configurations # in the same test is flaky. Move this test case back into # "server_lib_test.py" when this is no longer the case. - @test_util.run_deprecated_v1 def testSameVariablesClearContainer(self): # Starts two servers with different names so they map to different # resource "containers". @@ -47,36 +46,37 @@ class SameVariablesClearContainerTest(test.TestCase): }, protocol="grpc", start=True) # Creates a graph with 2 variables. - v0 = variables.Variable(1.0, name="v0") - v1 = variables.Variable(2.0, name="v0") + with ops.Graph().as_default(): + v0 = variables.Variable(1.0, name="v0") + v1 = variables.Variable(2.0, name="v0") - # Initializes the variables. Verifies that the values are correct. - sess_0 = session.Session(server0.target) - sess_1 = session.Session(server1.target) - sess_0.run(v0.initializer) - sess_1.run(v1.initializer) - self.assertAllEqual(1.0, sess_0.run(v0)) - self.assertAllEqual(2.0, sess_1.run(v1)) + # Initializes the variables. Verifies that the values are correct. + sess_0 = session.Session(server0.target) + sess_1 = session.Session(server1.target) + sess_0.run(v0.initializer) + sess_1.run(v1.initializer) + self.assertAllEqual(1.0, sess_0.run(v0)) + self.assertAllEqual(2.0, sess_1.run(v1)) - # Resets container "local0". Verifies that v0 is no longer initialized. - session.Session.reset(server0.target, ["local0"]) - sess = session.Session(server0.target) - with self.assertRaises(errors_impl.FailedPreconditionError): - self.evaluate(v0) - # Reinitializes v0 for the following test. - self.evaluate(v0.initializer) + # Resets container "local0". Verifies that v0 is no longer initialized. + session.Session.reset(server0.target, ["local0"]) + _ = session.Session(server0.target) + with self.assertRaises(errors_impl.FailedPreconditionError): + self.evaluate(v0) + # Reinitializes v0 for the following test. + self.evaluate(v0.initializer) - # Verifies that v1 is still valid. - self.assertAllEqual(2.0, sess_1.run(v1)) + # Verifies that v1 is still valid. + self.assertAllEqual(2.0, sess_1.run(v1)) - # Resets container "local1". Verifies that v1 is no longer initialized. - session.Session.reset(server1.target, ["local1"]) - sess = session.Session(server1.target) - with self.assertRaises(errors_impl.FailedPreconditionError): - self.evaluate(v1) - # Verifies that v0 is still valid. - sess = session.Session(server0.target) - self.assertAllEqual(1.0, self.evaluate(v0)) + # Resets container "local1". Verifies that v1 is no longer initialized. + session.Session.reset(server1.target, ["local1"]) + _ = session.Session(server1.target) + with self.assertRaises(errors_impl.FailedPreconditionError): + self.evaluate(v1) + # Verifies that v0 is still valid. + _ = session.Session(server0.target) + self.assertAllEqual(1.0, self.evaluate(v0)) if __name__ == "__main__": From b9c6a8278d532e4ec0d0a2e149bf7e31a276c432 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 5 Aug 2020 12:42:49 -0700 Subject: [PATCH 2182/2522] [MLIR][XLA] Allow for choice of safe/unsafe variant in broadcast utils Create safe or unsafe variants of `shape.broadcast` depending on the context. The representation by means of an extent tensor is only legal if the operands are known to be broadcastable. Currently, there is no use in a safe context in the codebase but it will be used for shape inference eventually. PiperOrigin-RevId: 325079842 Change-Id: I4a158bd89b3c9ec801d2b695f51cc0a795672ff9 --- .../include/mlir-hlo/utils/broadcast_utils.h | 10 +++---- .../mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc | 2 +- .../mhlo/transforms/chlo_legalize_to_hlo.cc | 4 +-- .../mlir/hlo/lib/utils/broadcast_utils.cc | 28 +++++++------------ .../tests/chlo_infer_shape_type_methods.mlir | 11 ++++---- .../chlo_legalize_to_hlo_broadcasts.mlir | 10 +++---- .../tests/legalize-tf-binary-elementwise.mlir | 12 ++++---- 7 files changed, 34 insertions(+), 43 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h index 1c57073f4ab..1e2404299b2 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h @@ -38,12 +38,10 @@ bool IsLegalNumpyRankedBroadcast(Value lhs, Value rhs, // Emits shape dialect ops to compute the result shape for a broadcasting // binary elementwise op which broadcasts according to "numpy" semantics -// (see above), returning a `shape.shape` or an extent tensor of the resulting -// shape. The result should only be an extent tensor in contexts that ensure -// both operands to be broadcastable. -Value ComputeBinaryElementwiseBroadcastingResultExtents( - Location loc, Value lhs, Value rhs, OpBuilder& builder, - bool unsafe_as_extent_tensor); +// (see above), returning an extents tensor of the resulting shape. +Value ComputeBinaryElementwiseBroadcastingResultExtents(Location loc, Value lhs, + Value rhs, + OpBuilder& builder); } // namespace hlo } // namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc index 81389c3be89..99ed8bcb849 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc @@ -151,7 +151,7 @@ LogicalResult ReifyBroadcastBinaryOpReturnTypeShapes( } Value computed_shape = hlo::ComputeBinaryElementwiseBroadcastingResultExtents( - loc, lhs, rhs, builder, /*unsafe_as_extent_tensor=*/false); + loc, lhs, rhs, builder); if (!computed_shape) return failure(); reifiedReturnShapes.push_back(computed_shape); return success(); diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc index c2db4880632..adbd2e5a628 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc @@ -124,8 +124,8 @@ struct ConvertRankedDynamicBroadcastBinaryOp int64_t result_rank = std::max(lhs_type.getRank(), rhs_type.getRank()); Value result_extents = - hlo::ComputeBinaryElementwiseBroadcastingResultExtents( - loc, lhs, rhs, rewriter, /*unsafe_as_extent_tensor=*/true); + hlo::ComputeBinaryElementwiseBroadcastingResultExtents(loc, lhs, rhs, + rewriter); // Note that we unconditionally emit DynamicBroadcastInDim ops and let // downstream canonicalizations fold them away if possible. This is diff --git a/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc b/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc index 71b1a4e164f..a3ce4d44436 100644 --- a/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc +++ b/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc @@ -20,7 +20,6 @@ limitations under the License. #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallVector.h" #include "mlir/Dialect/Shape/IR/Shape.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/IR/Diagnostics.h" #include "mlir/IR/StandardTypes.h" @@ -47,9 +46,9 @@ bool IsLegalNumpyRankedBroadcast(Value lhs, Value rhs, broadcast_dims.getIntValues().begin()); } -Value ComputeBinaryElementwiseBroadcastingResultExtents( - Location loc, Value lhs, Value rhs, OpBuilder& builder, - bool unsafe_as_extent_tensor) { +Value ComputeBinaryElementwiseBroadcastingResultExtents(Location loc, Value lhs, + Value rhs, + OpBuilder& builder) { auto lhs_type = lhs.getType().dyn_cast(); auto rhs_type = rhs.getType().dyn_cast(); if (!lhs_type || !rhs_type) { @@ -58,22 +57,15 @@ Value ComputeBinaryElementwiseBroadcastingResultExtents( return nullptr; } + int64_t result_rank = std::max(lhs_type.getRank(), rhs_type.getRank()); Value lhs_shape_v = builder.createOrFold(loc, lhs); Value rhs_shape_v = builder.createOrFold(loc, rhs); - - if (unsafe_as_extent_tensor) { - int64_t result_rank = std::max(lhs_type.getRank(), rhs_type.getRank()); - Value result_shape_v = builder.createOrFold( - loc, shape::getExtentTensorType(builder.getContext()), lhs_shape_v, - rhs_shape_v, nullptr /* error */); - return builder.createOrFold( - loc, RankedTensorType::get({result_rank}, builder.getIndexType()), - result_shape_v); - } - - return builder.createOrFold( - loc, builder.getType(), lhs_shape_v, rhs_shape_v, - nullptr /* error */); + Value result_shape_v = builder.createOrFold( + loc, shape::ShapeType::get(builder.getContext()), lhs_shape_v, + rhs_shape_v, nullptr /* error */); + return builder.createOrFold( + loc, RankedTensorType::get({result_rank}, builder.getIndexType()), + result_shape_v); } } // namespace hlo diff --git a/tensorflow/compiler/mlir/hlo/tests/chlo_infer_shape_type_methods.mlir b/tensorflow/compiler/mlir/hlo/tests/chlo_infer_shape_type_methods.mlir index d226c92858a..99aab532688 100644 --- a/tensorflow/compiler/mlir/hlo/tests/chlo_infer_shape_type_methods.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/chlo_infer_shape_type_methods.mlir @@ -5,14 +5,15 @@ // only test reification on an examplar op. // CHECK-SAME: %[[ARG0:.+]]: tensor, // CHECK-SAME: %[[ARG1:.+]]: tensor -func @broadcast_add(%arg0: tensor, %arg1: tensor) -> !shape.shape { +func @broadcast_add(%arg0: tensor, %arg1: tensor) -> tensor<1xindex> { // CHECK-DAG: %[[ARG0_S:.+]] = shape.shape_of %[[ARG0]] // CHECK-DAG: %[[ARG1_S:.+]] = shape.shape_of %[[ARG1]] - // CHECK-DAG: %[[BCAST_S:.+]] = shape.broadcast %[[ARG0_S]], %[[ARG1_S]] : tensor, tensor -> !shape.shape - // CHECK: return %[[BCAST_S]] : !shape.shape + // CHECK-DAG: %[[BCAST_S:.+]] = shape.broadcast %[[ARG0_S]], %[[ARG1_S]] + // CHECK: %[[EXTENTS:.+]] = shape.to_extent_tensor %[[BCAST_S]] + // CHECK: return %[[EXTENTS]] %0 = chlo.broadcast_add %arg0, %arg1 : (tensor, tensor) -> tensor - %1 = "mhlo_test.reify_return_type_shapes"(%0) : (tensor) -> !shape.shape - return %1 : !shape.shape + %1 = "mhlo_test.reify_return_type_shapes"(%0) : (tensor) -> tensor<1xindex> + return %1 : tensor<1xindex> } // ----- diff --git a/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir b/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir index 9670372a864..c08ead5081e 100644 --- a/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir @@ -19,7 +19,7 @@ func @dynamicBroadcast(%arg0: tensor, %arg1: tensor) -> tensor to tensor<2xindex> + // CHECK: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_S]] // CHECK-DAG: %[[ARG0_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG0]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} // CHECK-DAG: %[[ARG1_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG1]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} // CHECK-NEXT: %[[RESULT:.+]] = mhlo.add %[[ARG0_B]], %[[ARG1_B]] @@ -40,7 +40,7 @@ func @dynamicBroadcastComplex(%arg0: tensor, %arg1: tensor) -> t // CHECK-NEXT: %[[WITNESS:.+]] = shape.cstr_broadcastable %[[ARG0_S]], %[[ARG1_S]] // CHECK-NEXT: %[[FINAL_RESULT:.+]] = shape.assuming %[[WITNESS]] // CHECK-NEXT: %[[RESULT_S:.+]] = shape.broadcast %[[ARG0_S]], %[[ARG1_S]] - // CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = tensor_cast %[[RESULT_S]] : tensor to tensor<2xindex> + // CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_S]] // CHECK-DAG: %[[ARG0_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG0]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor, tensor<2xindex>) -> tensor // CHECK-DAG: %[[ARG1_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG1]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor, tensor<2xindex>) -> tensor // CHECK-NEXT: %[[RESULT:.+]] = "mhlo.complex"(%[[ARG0_B]], %[[ARG1_B]]) : (tensor, tensor) -> tensor> @@ -61,7 +61,7 @@ func @dynamicBroadcastCompare(%arg0: tensor, %arg1: tensor) -> t // CHECK: %[[WITNESS:.+]] = shape.cstr_broadcastable %[[ARG0_S]], %[[ARG1_S]] // CHECK: %[[FINAL_RESULT:.+]] = shape.assuming %[[WITNESS]] // CHECK: %[[RESULT_S:.+]] = shape.broadcast %[[ARG0_S]], %[[ARG1_S]] - // CHECK: %[[RESULT_EXTENTS:.+]] = tensor_cast %[[RESULT_S]] : tensor to tensor<2xindex> + // CHECK: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_S]] // CHECK-DAG: %[[ARG0_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG0]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor, tensor<2xindex>) -> tensor // CHECK-DAG: %[[ARG1_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG1]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor, tensor<2xindex>) -> tensor // CHECK: %[[RESULT:.+]] = "mhlo.compare"(%[[ARG0_B]], %[[ARG1_B]]) {comparison_direction = "EQ"} : (tensor, tensor) -> tensor @@ -263,7 +263,7 @@ func @addScalarUnranked(%arg0: tensor, %arg1: tensor<*xf32>) -> tensor<*xf3 // CHECK: %[[ASSUMING_RESULT:.*]] = shape.assuming %[[WITNESS]] -> (tensor) { // CHECK: %[[SCALAR_SHAPE:.*]] = shape.const_shape [] // CHECK: %[[BROADCASTED_SHAPE:.*]] = shape.broadcast %[[SCALAR_SHAPE]], %[[SHAPE_RESHAPED]] -// CHECK: %[[SHAPE_TENSOR:.*]] = tensor_cast %[[BROADCASTED_SHAPE]] : tensor to tensor<1xindex> +// CHECK: %[[SHAPE_TENSOR:.*]] = shape.to_extent_tensor %[[BROADCASTED_SHAPE]] : !shape.shape -> tensor<1xindex> // CHECK: %[[BROADCASTED_LHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG_0]], %[[SHAPE_TENSOR]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor, tensor<1xindex>) -> tensor // CHECK: %[[BROADCASTED_RHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[RESHAPED]], %[[SHAPE_TENSOR]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor, tensor<1xindex>) -> tensor // CHECK: %[[BROADCASTED_RESULT:.*]] = mhlo.add %[[BROADCASTED_LHS]], %[[BROADCASTED_RHS]] : tensor @@ -296,7 +296,7 @@ func @addUnrankedScalar(%arg0: tensor<*xf32>, %arg1: tensor) -> tensor<*xf3 // CHECK: %[[SHAPE_1:.*]] = shape.shape_of %[[ARG_1]] : tensor // CHECK: %[[WITNESS:.*]] = shape.cstr_broadcastable %[[SHAPE_RESHAPED]], %[[SHAPE_1]] // CHECK: %[[ASSUMING_RESULT:.*]] = shape.assuming %[[WITNESS]] -> (tensor) { -// CHECK: %[[ASTENSOR:.*]] = tensor_cast %[[SHAPE_RESHAPED]] +// CHECK: %[[ASTENSOR:.*]] = shape.to_extent_tensor %[[SHAPE_RESHAPED]] // CHECK: %[[BROADCASTED_LHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[RESHAPED]], %[[ASTENSOR]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor, tensor<1xindex>) -> tensor // CHECK: %[[BROADCASTED_RHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG_1]], %[[ASTENSOR]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor, tensor<1xindex>) -> tensor // CHECK: %[[BROADCASTED_RESULT:.*]] = mhlo.add %[[BROADCASTED_LHS]], %[[BROADCASTED_RHS]] : tensor diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf-binary-elementwise.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf-binary-elementwise.mlir index 5f3e40f923f..fd9c14c7c0f 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf-binary-elementwise.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf-binary-elementwise.mlir @@ -48,8 +48,8 @@ func @add_dynamic(%arg0: tensor, %arg1: tensor) -> tensor, tensor -> tensor - // CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = tensor_cast %[[RESULT_SHAPE]] : tensor to tensor<2xindex> + // CHECK-NEXT: %[[RESULT_SHAPE:.+]] = shape.broadcast %[[LHS_SHAPE]], %[[RHS_SHAPE]] + // CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_SHAPE]] // CHECK-NEXT: %[[LHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg0, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} // CHECK-NEXT: %[[RHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg1, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} // CHECK-NEXT: %[[RESULT:.+]] = mhlo.add %[[LHS_BCAST]], %[[RHS_BCAST]] : tensor @@ -201,8 +201,8 @@ func @equal_dynamic(%arg0: tensor, %arg1: tensor<1xi32>) -> tensor // NOT-CHECK-NEXT: %[[WITNESS:.+]] = shape.cstr_broadcastable %[[LHS_SHAPE]], %[[RHS_SHAPE]] // NOT-CHECK-NEXT: shape.assuming %[[WITNESS]] -> (tensor) { // NOT-CHECK-DAG: %[[LHS_SHAPE1:.+]] = shape.shape_of %arg0 - // NOT-CHECK-NEXT: %[[RESULT_SHAPE:.+]] = shape.broadcast %[[LHS_SHAPE1]], %[[RHS_SHAPE]] : tensor, tensor -> tensor - // NOT-CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = tensor_cast %[[RESULT_SHAPE]] : tensor to tensor<1xindex> + // NOT-CHECK-NEXT: %[[RESULT_SHAPE:.+]] = shape.broadcast %[[LHS_SHAPE1]], %[[RHS_SHAPE]] + // NOT-CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_SHAPE]] // NOT-CHECK-DAG: %[[LHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg0, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} // NOT-CHECK-DAG: %[[RHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg1, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} // NOT-CHECK-NEXT: %[[RESULT:.+]] = "mhlo.compare"(%[[LHS_BCAST]], %[[RHS_BCAST]]) {comparison_direction = "EQ"} @@ -290,8 +290,8 @@ func @greater_dynamic(%arg0: tensor, %arg1: tensor) -> tensor, tensor -> tensor - // CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = tensor_cast %[[RESULT_SHAPE]] : tensor to tensor<1xindex> + // CHECK-NEXT: %[[RESULT_SHAPE:.+]] = shape.broadcast %[[LHS_SHAPE1]], %[[RHS_SHAPE1]] + // CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_SHAPE]] // CHECK-DAG: %[[LHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg0, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} // CHECK-DAG: %[[RHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg1, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} // CHECK-NEXT: "mhlo.compare"(%[[LHS_BCAST]], %[[RHS_BCAST]]) {comparison_direction = "GT"} From a07effec7d21c247d6b43955a434c9a3abd15b4c Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Wed, 5 Aug 2020 12:56:50 -0700 Subject: [PATCH 2183/2522] Update ReplicateToIslandPass to assign device ordinals based on the first TPU core device id. `tf._XlaSendFromHost` and `tf._XlaRecvAtHost` device ordinals will be overwritten, when the replicate body is expanded. PiperOrigin-RevId: 325082771 Change-Id: Id76ec6516c73d451bcc69433e98fd1bc315e865e --- tensorflow/compiler/mlir/tensorflow/BUILD | 1 + .../tensorflow/tests/replicate_to_island.mlir | 33 +++++++++++++++ .../transforms/replicate_to_island.cc | 21 ++++++++++ .../mlir/tensorflow/utils/device_util.cc | 18 ++++++++ .../mlir/tensorflow/utils/device_util.h | 7 ++++ .../mlir/tensorflow/utils/device_util_test.cc | 42 +++++++++++++++++++ 6 files changed, 122 insertions(+) diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index 63908c822c8..34caf1e2473 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -1680,6 +1680,7 @@ cc_library( ":tensorflow", "//tensorflow/core:core_cpu_lib", "//tensorflow/core:framework", + "@com_google_absl//absl/strings", "@llvm-project//llvm:Support", "@llvm-project//mlir:IR", "@llvm-project//mlir:Support", diff --git a/tensorflow/compiler/mlir/tensorflow/tests/replicate_to_island.mlir b/tensorflow/compiler/mlir/tensorflow/tests/replicate_to_island.mlir index 9931a45f995..ddcfde5cbcd 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/replicate_to_island.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/replicate_to_island.mlir @@ -223,3 +223,36 @@ func @replica_id_attr_added(%arg0: tensor, %arg1: tensor // CHECK: "tf.A" // CHECK-NOT: _xla_replica_id // CHECK: tf_executor.fetch + + +// Tests device ordinals are added to `tf._XlaSendFromHost`/`tf._XlaRecvAtHost` +// based on the first TPU core device id. +// CHECK-LABEL: func @device_ordinals +func @device_ordinals(%arg0: tensor, %arg1: tensor<2x!tf.string>) { + tf_executor.graph { + tf_executor.island { + tf_device.replicate([%arg0, %arg0] as %arg2: tensor) {n = 2 : i32, devices = {TPU_REPLICATED_CORE_0 = ["/job:worker/replica:0/task:0/device:TPU:1", "/job:worker/replica:0/task:0/device:TPU:2"]}} { + %0 = "tf._XlaRecvAtHost"(%arg1) {_xla_has_host_transfer = true, device_ordinal = 0 : i64, key = "host_compute_channel_send_0"} : (tensor<2x!tf.string>) -> tensor + "tf._XlaSendFromHost"(%0, %arg1) {_xla_has_host_transfer = true, device_ordinal = 0 : i64, key = "host_compute_channel_recv_0"} : (tensor, tensor<2x!tf.string>) -> () + "tf.NoOp"() : () -> () + tf_device.return + } + tf_executor.yield + } + tf_executor.fetch + } + return +} + +// CHECK: tf_executor.island +// CHECK: "tf._XlaRecvAtHost" +// CHECK-SAME: device_ordinal = 1 +// CHECK: "tf._XlaSendFromHost" +// CHECK-SAME: device_ordinal = 1 +// CHECK: "tf.NoOp" +// CHECK: tf_executor.island +// CHECK: "tf._XlaRecvAtHost" +// CHECK-SAME: device_ordinal = 2 +// CHECK: "tf._XlaSendFromHost" +// CHECK-SAME: device_ordinal = 2 +// CHECK: "tf.NoOp" diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc b/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc index b16868311f0..fcf0bb98a61 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc @@ -38,6 +38,7 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/utils/device_util.h" #include "tensorflow/core/platform/logging.h" namespace mlir { @@ -45,6 +46,7 @@ namespace TFDevice { namespace { constexpr char kDeviceAttr[] = "device"; constexpr char kReplicaIdAttr[] = "_xla_replica_id"; +constexpr char kDeviceOrdinalAttr[] = "device_ordinal"; struct ReplicateToIslandPass : public PassWrapper { @@ -57,6 +59,11 @@ bool RequiresReplicaIDAttribute(Operation* op) { TF::EnqueueTPUEmbeddingRaggedTensorBatchOp>(op); } +bool RequiresDeviceOrdinalAttribute(Operation* op) { + return llvm::isa(op) || + llvm::isa(op); +} + // Adds integer attribute that represents replica id for replicated ops that // require replica id attribute. void AddReplicaIdToOpsInReplicatedRegion(OpBuilder* builder, Region* region, @@ -125,6 +132,20 @@ llvm::SmallVector ExpandReplicateIntoReplicas( kDeviceAttr, device_by_replica.cast()[i].cast()); }); + + if (auto tpu_replica_0 = + devices.getValue().get("TPU_REPLICATED_CORE_0")) { + int64_t device_ordinal = 0; + tensorflow::GetDeviceOrdinalFromDeviceString( + replicate_op.getLoc(), + tpu_replica_0.cast()[i].cast().getValue(), + &device_ordinal); + replica.walk([&](Operation* op) { + if (RequiresDeviceOrdinalAttribute(op)) + op->setAttr(kDeviceOrdinalAttr, + builder->getI64IntegerAttr(device_ordinal)); + }); + } } replicas.push_back(replica); diff --git a/tensorflow/compiler/mlir/tensorflow/utils/device_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/device_util.cc index bf0b3b75ace..81892934efe 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/device_util.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/device_util.cc @@ -17,6 +17,7 @@ limitations under the License. #include +#include "absl/strings/string_view.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -25,6 +26,8 @@ limitations under the License. #include "llvm/Support/Regex.h" #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/Diagnostics.h" // from @llvm-project +#include "mlir/IR/Location.h" // from @llvm-project #include "mlir/IR/Operation.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project #include "tensorflow/core/common_runtime/device.h" @@ -155,4 +158,19 @@ mlir::LogicalResult GetDevicesFromOp(mlir::Operation* op, llvm::formatv("unsupported '{0}' attribute", kDevicesAttr)); } +mlir::LogicalResult GetDeviceOrdinalFromDeviceString(mlir::Location loc, + llvm::StringRef device, + int64_t* device_ordinal) { + DeviceNameUtils::ParsedName parsed_name; + if (!DeviceNameUtils::ParseFullName( + absl::string_view(device.data(), device.size()), &parsed_name)) + return mlir::emitError(loc) << "invalid device '" << device << "'"; + + if (!parsed_name.has_id) + return mlir::emitError(loc) << "device '" << device << "' has no id"; + + *device_ordinal = parsed_name.id; + return mlir::success(); +} + } // namespace tensorflow diff --git a/tensorflow/compiler/mlir/tensorflow/utils/device_util.h b/tensorflow/compiler/mlir/tensorflow/utils/device_util.h index 893e118024c..14e48bf7710 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/device_util.h +++ b/tensorflow/compiler/mlir/tensorflow/utils/device_util.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_UTILS_DEVICE_UTIL_H_ #include "llvm/ADT/SmallVector.h" +#include "mlir/IR/Location.h" // from @llvm-project #include "mlir/IR/Operation.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h" @@ -41,6 +42,12 @@ void AddDevicesToOp(mlir::Operation* op, const DeviceSet* device_set); mlir::LogicalResult GetDevicesFromOp(mlir::Operation* op, mlir::TF::RuntimeDevices* devices); +// Parses a device string and returns its ordinal (id). This will return an +// error if the device string is invalid or has no id. +mlir::LogicalResult GetDeviceOrdinalFromDeviceString(mlir::Location loc, + llvm::StringRef device, + int64_t* device_ordinal); + } // namespace tensorflow #endif // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_UTILS_DEVICE_UTIL_H_ diff --git a/tensorflow/compiler/mlir/tensorflow/utils/device_util_test.cc b/tensorflow/compiler/mlir/tensorflow/utils/device_util_test.cc index bc849e1d116..1da1f5973f6 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/device_util_test.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/device_util_test.cc @@ -205,5 +205,47 @@ TEST(DeviceUtilTest, GetGpuDeviceMetadata) { ASSERT_FALSE(meta_1.hasValue()); } +TEST(DeviceUtilTest, GetDeviceOrdinalFromDeviceString) { + const std::string tpu0 = "/job:worker/replica:0/task:0/device:TPU:0"; + const std::string tpu1 = "/job:worker/replica:0/task:0/device:TPU:1"; + + mlir::MLIRContext context; + auto unknown_loc = mlir::UnknownLoc::get(&context); + + int64_t device_ordinal0 = -1; + mlir::LogicalResult result0 = + GetDeviceOrdinalFromDeviceString(unknown_loc, tpu0, &device_ordinal0); + EXPECT_TRUE(mlir::succeeded(result0)); + EXPECT_EQ(device_ordinal0, 0); + + int64_t device_ordinal1 = -1; + mlir::LogicalResult result1 = + GetDeviceOrdinalFromDeviceString(unknown_loc, tpu1, &device_ordinal1); + EXPECT_TRUE(mlir::succeeded(result1)); + EXPECT_EQ(device_ordinal1, 1); +} + +TEST(DeviceUtilTest, GetDeviceOrdinalFromDeviceStringInvalid) { + mlir::MLIRContext context; + auto unknown_loc = mlir::UnknownLoc::get(&context); + + int64_t device_ordinal = -1; + mlir::LogicalResult result = GetDeviceOrdinalFromDeviceString( + unknown_loc, "bad_device", &device_ordinal); + EXPECT_TRUE(mlir::failed(result)); +} + +TEST(DeviceUtilTest, GetDeviceOrdinalFromDeviceStringNoId) { + const std::string tpu_no_id = "/job:worker/replica:0/task:0/device:TPU"; + + mlir::MLIRContext context; + auto unknown_loc = mlir::UnknownLoc::get(&context); + + int64_t device_ordinal = -1; + mlir::LogicalResult result = + GetDeviceOrdinalFromDeviceString(unknown_loc, tpu_no_id, &device_ordinal); + EXPECT_TRUE(mlir::failed(result)); +} + } // anonymous namespace } // namespace tensorflow From 88461053262f02bbc15887daa172c02db7419780 Mon Sep 17 00:00:00 2001 From: Nat Jeffries Date: Wed, 5 Aug 2020 12:57:01 -0700 Subject: [PATCH 2184/2522] Port the cmsis-nn optimized kernels to the new TfLiteEvalTensor API. PiperOrigin-RevId: 325082800 Change-Id: Ib7ce474449f1f8b537c5965dbdf685d1984cf983 --- tensorflow/lite/micro/kernels/cmsis-nn/add.cc | 57 +++-- .../lite/micro/kernels/cmsis-nn/conv.cc | 240 +++++++++--------- .../micro/kernels/cmsis-nn/depthwise_conv.cc | 143 +++++++---- .../micro/kernels/cmsis-nn/fully_connected.cc | 117 +++++---- tensorflow/lite/micro/kernels/cmsis-nn/mul.cc | 110 +++++--- .../lite/micro/kernels/cmsis-nn/pooling.cc | 78 +++--- .../lite/micro/kernels/cmsis-nn/softmax.cc | 71 ++++-- 7 files changed, 476 insertions(+), 340 deletions(-) diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/add.cc b/tensorflow/lite/micro/kernels/cmsis-nn/add.cc index c98e7a2c329..6db88839073 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/add.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/add.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/op_macros.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/micro/memory_helpers.h" namespace tflite { @@ -96,18 +97,20 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params, } void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params, - const OpData* data, const TfLiteTensor* input1, - const TfLiteTensor* input2, TfLiteTensor* output) { + const OpData* data, const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRange(params->activation, &output_activation_min, &output_activation_max); tflite::ArithmeticParams op_params; SetActivationParams(output_activation_min, output_activation_max, &op_params); -#define TF_LITE_ADD(opname) \ - reference_ops::opname(op_params, GetTensorShape(input1), \ - GetTensorData(input1), GetTensorShape(input2), \ - GetTensorData(input2), GetTensorShape(output), \ - GetTensorData(output)) +#define TF_LITE_ADD(opname) \ + reference_ops::opname(op_params, tflite::micro::GetTensorShape(input1), \ + tflite::micro::GetTensorData(input1), \ + tflite::micro::GetTensorShape(input2), \ + tflite::micro::GetTensorData(input2), \ + tflite::micro::GetTensorShape(output), \ + tflite::micro::GetTensorData(output)) if (data->requires_broadcast) { TF_LITE_ADD(BroadcastAdd4DSlow); } else { @@ -118,9 +121,9 @@ void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params, TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params, const OpData* data, - const TfLiteTensor* input1, - const TfLiteTensor* input2, - TfLiteTensor* output) { + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, + TfLiteEvalTensor* output) { if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { tflite::ArithmeticParams op_params; op_params.left_shift = data->left_shift; @@ -136,27 +139,32 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, SetActivationParams(data->output_activation_min, data->output_activation_max, &op_params); bool need_broadcast = reference_ops::ProcessBroadcastShapes( - GetTensorShape(input1), GetTensorShape(input2), &op_params); -#define TF_LITE_ADD(type, opname, dtype) \ - type::opname(op_params, GetTensorShape(input1), \ - GetTensorData(input1), GetTensorShape(input2), \ - GetTensorData(input2), GetTensorShape(output), \ - GetTensorData(output)); + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); +#define TF_LITE_ADD(type, opname, dtype) \ + type::opname(op_params, tflite::micro::GetTensorShape(input1), \ + tflite::micro::GetTensorData(input1), \ + tflite::micro::GetTensorShape(input2), \ + tflite::micro::GetTensorData(input2), \ + tflite::micro::GetTensorShape(output), \ + tflite::micro::GetTensorData(output)); if (output->type == kTfLiteInt8) { if (need_broadcast) { TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t); } else { arm_elementwise_add_s8( - GetTensorData(input1), GetTensorData(input2), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorData(input2), op_params.input1_offset, op_params.input1_multiplier, op_params.input1_shift, op_params.input2_offset, op_params.input2_multiplier, op_params.input2_shift, - op_params.left_shift, GetTensorData(output), + op_params.left_shift, tflite::micro::GetTensorData(output), op_params.output_offset, op_params.output_multiplier, op_params.output_shift, op_params.quantized_activation_min, op_params.quantized_activation_max, - MatchingElementsSize(GetTensorShape(input1), GetTensorShape(input2), - GetTensorShape(output))); + MatchingElementsSize(tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorShape(output))); } } else { if (need_broadcast) { @@ -196,9 +204,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); - const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); - const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInputTensor1); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInputTensor2); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); TFLITE_DCHECK(node->user_data != nullptr); const OpData* data = static_cast(node->user_data); diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc b/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc index 834f107dad0..cf1ce8cb5cb 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/conv.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -43,6 +44,12 @@ constexpr int kConvQuantizedDimension = 0; struct OpData { TfLitePaddingValues padding; + + // Cached tensor zero point values for quantized operations. + int32_t input_zero_point; + int32_t filter_zero_point; + int32_t output_zero_point; + // The scaling factor from input to output (aka the 'real multiplier') can // be represented as a fixed point multiplier plus a left shift. int32_t output_multiplier; @@ -57,6 +64,9 @@ struct OpData { // uint8_t these would be 0 and 255. int32_t output_activation_min; int32_t output_activation_max; + + // Index to buffer for optimizations if applicable. + int buffer_idx; }; inline PaddingType RuntimePaddingType(TfLitePadding padding) { @@ -110,16 +120,17 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, void* Init(TfLiteContext* context, const char* buffer, size_t length) { TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); - return context->AllocatePersistentBuffer(context, sizeof(int)); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); } TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { #if defined(__ARM_FEATURE_DSP) || defined(__ARM_FEATURE_MVE) - OpData data; int32_t buf_size = 0; + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); auto* params = reinterpret_cast(node->builtin_data); - + auto* data = reinterpret_cast(node->user_data); const TfLiteTensor* input = GetInput(context, node, kInputTensor); const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); const TfLiteTensor* output = GetOutput(context, node, kOutputTensor); @@ -148,11 +159,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { output_dims.w = output->dims->data[2]; output_dims.c = output_shape.Dims(3); - int* buffer_idx = reinterpret_cast(node->user_data); - TF_LITE_ENSURE_STATUS(CalculateOpData( context, node, params, input_dims.w, input_dims.h, filter_dims.w, - filter_dims.h, output_dims.w, output_dims.h, input->type, &data)); + filter_dims.h, output_dims.w, output_dims.h, input->type, data)); + + data->input_zero_point = input->params.zero_point; + data->filter_zero_point = filter->params.zero_point; + data->output_zero_point = output->params.zero_point; if (input->type == kTfLiteInt8) { // Initialize cmsis-nn convolution parameters @@ -163,40 +176,41 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { conv_params.stride.w = params->stride_width; conv_params.dilation.h = params->dilation_height_factor; conv_params.dilation.w = params->dilation_width_factor; - conv_params.padding.h = data.padding.height; - conv_params.padding.w = data.padding.width; - conv_params.activation.min = data.output_activation_min; - conv_params.activation.max = data.output_activation_max; + conv_params.padding.h = data->padding.height; + conv_params.padding.w = data->padding.width; + conv_params.activation.min = data->output_activation_min; + conv_params.activation.max = data->output_activation_max; buf_size = arm_convolve_wrapper_s8_get_buffer_size( &conv_params, &input_dims, &filter_dims, &output_dims); } - node->user_data = buffer_idx; if (buf_size > 0) { - TF_LITE_ENSURE_STATUS( - context->RequestScratchBufferInArena(context, buf_size, buffer_idx)); + TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena( + context, buf_size, &data->buffer_idx)); } else { - *buffer_idx = -1; + data->buffer_idx = -1; } #endif return kTfLiteOk; } TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, - TfLiteConvParams* params, OpData* data, - const TfLiteTensor* input, - const TfLiteTensor* filter, const TfLiteTensor* bias, - TfLiteTensor* im2col, TfLiteTensor* hwcn_weights, - TfLiteTensor* output) { - const int32_t input_offset = -input->params.zero_point; - const int32_t filter_offset = -filter->params.zero_point; - const int32_t output_offset = output->params.zero_point; + TfLiteConvParams* params, const OpData& data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* im2col, + TfLiteEvalTensor* hwcn_weights, + TfLiteEvalTensor* output) { + const int32_t input_offset = -data.input_zero_point; + const int32_t filter_offset = -data.filter_zero_point; + const int32_t output_offset = data.output_zero_point; ConvParams op_params; op_params.padding_type = RuntimePaddingType(params->padding); - op_params.padding_values.width = data->padding.width; - op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data.padding.width; + op_params.padding_values.height = data.padding.height; op_params.stride_width = params->stride_width; op_params.stride_height = params->stride_height; op_params.dilation_width_factor = params->dilation_width_factor; @@ -204,46 +218,52 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, op_params.input_offset = input_offset; op_params.weights_offset = filter_offset; op_params.output_offset = output_offset; - op_params.output_multiplier = data->output_multiplier; - op_params.output_shift = -data->output_shift; - op_params.quantized_activation_min = data->output_activation_min; - op_params.quantized_activation_max = data->output_activation_max; - reference_ops::Conv(op_params, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output), GetTensorShape(im2col), - GetTensorData(im2col), nullptr); + op_params.output_multiplier = data.output_multiplier; + op_params.output_shift = -data.output_shift; + op_params.quantized_activation_min = data.output_activation_min; + op_params.quantized_activation_max = data.output_activation_max; + reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + tflite::micro::GetTensorShape(im2col), + tflite::micro::GetTensorData(im2col), nullptr); return kTfLiteOk; } TfLiteStatus EvalQuantizedPerChannel( TfLiteContext* context, TfLiteNode* node, TfLiteConvParams* params, - OpData* data, const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output, TfLiteTensor* im2col) { + const OpData& data, const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output, TfLiteEvalTensor* im2col) { // Initialize cmsis-nn convolution parameters cmsis_nn_conv_params conv_params; - conv_params.input_offset = -input->params.zero_point; - conv_params.output_offset = output->params.zero_point; + conv_params.input_offset = -data.input_zero_point; + conv_params.output_offset = data.output_zero_point; conv_params.stride.h = params->stride_height; conv_params.stride.w = params->stride_width; conv_params.dilation.h = params->dilation_height_factor; conv_params.dilation.w = params->dilation_width_factor; - conv_params.padding.h = data->padding.height; - conv_params.padding.w = data->padding.width; - conv_params.activation.min = data->output_activation_min; - conv_params.activation.max = data->output_activation_max; + conv_params.padding.h = data.padding.height; + conv_params.padding.w = data.padding.width; + conv_params.activation.min = data.output_activation_min; + conv_params.activation.max = data.output_activation_max; // Initialize cmsis-nn per channel quantization parameters cmsis_nn_per_channel_quant_params quant_params; - quant_params.multiplier = data->per_channel_output_multiplier; - quant_params.shift = data->per_channel_output_shift; + quant_params.multiplier = + const_cast(data.per_channel_output_multiplier); + quant_params.shift = const_cast(data.per_channel_output_shift); #if defined(__ARM_FEATURE_DSP) || defined(__ARM_FEATURE_MVE) - RuntimeShape filter_shape = GetTensorShape(filter); - RuntimeShape input_shape = GetTensorShape(input); - RuntimeShape output_shape = GetTensorShape(output); - RuntimeShape bias_shape = GetTensorShape(bias); + RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter); + RuntimeShape input_shape = tflite::micro::GetTensorShape(input); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias); // Consistency check. TFLITE_DCHECK_LE(conv_params.activation.min, conv_params.activation.max); @@ -253,7 +273,7 @@ TfLiteStatus EvalQuantizedPerChannel( const int batch_size = MatchingDim(input_shape, 0, output_shape, 0); const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); - if (GetTensorData(bias)) { + if (tflite::micro::GetTensorData(bias)) { TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); } @@ -291,9 +311,8 @@ TfLiteStatus EvalQuantizedPerChannel( ctx.buf = nullptr; ctx.size = 0; - auto* buffer_idx = reinterpret_cast(node->user_data); - if (*buffer_idx > -1) { - ctx.buf = context->GetScratchBuffer(context, *buffer_idx); + if (data.buffer_idx > -1) { + ctx.buf = context->GetScratchBuffer(context, data.buffer_idx); // Note: ctx.size is currently not used in cmsis-nn. // The buffer should be allocated in the Prepare function through // arm_convolve_wrapper_s8_get_buffer_size @@ -303,9 +322,10 @@ TfLiteStatus EvalQuantizedPerChannel( // the parameters passed arm_status status = arm_convolve_wrapper_s8( &ctx, &conv_params, &quant_params, &input_dims, - GetTensorData(input), &filter_dims, GetTensorData(filter), - &bias_dims, GetTensorData(bias), &output_dims, - GetTensorData(output)); + tflite::micro::GetTensorData(input), &filter_dims, + tflite::micro::GetTensorData(filter), &bias_dims, + tflite::micro::GetTensorData(bias), &output_dims, + tflite::micro::GetTensorData(output)); if (status == ARM_MATH_SUCCESS) { return kTfLiteOk; @@ -318,42 +338,47 @@ TfLiteStatus EvalQuantizedPerChannel( "CMSIS-NN optimization for conv not available for this target. Using reference kernel.") ConvParams op_params; - op_params.input_offset = -input->params.zero_point; - op_params.output_offset = output->params.zero_point; + conv_params.input_offset = -data.input_zero_point; + conv_params.output_offset = data.output_zero_point; op_params.stride_height = params->stride_height; op_params.stride_width = params->stride_width; op_params.dilation_height_factor = params->dilation_height_factor; op_params.dilation_width_factor = params->dilation_width_factor; - op_params.padding_values.height = data->padding.height; - op_params.padding_values.width = data->padding.width; + op_params.padding_values.height = data.padding.height; + op_params.padding_values.width = data.padding.width; op_params.quantized_activation_min = data->output_activation_min; op_params.quantized_activation_max = data->output_activation_max; reference_integer_ops::ConvPerChannel( op_params, data->per_channel_output_multiplier, - data->per_channel_output_shift, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + data->per_channel_output_shift, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); #endif return kTfLiteOk; } TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, - TfLiteConvParams* params, OpData* data, - const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* im2col, - TfLiteTensor* hwcn_weights, TfLiteTensor* output) { + TfLiteConvParams* params, const OpData& data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, TfLiteEvalTensor* im2col, + TfLiteEvalTensor* hwcn_weights, + TfLiteEvalTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRange(params->activation, &output_activation_min, &output_activation_max); - + // TODO(b/154032858): Investigate removing extra copies. ConvParams op_params; op_params.padding_type = RuntimePaddingType(params->padding); - op_params.padding_values.width = data->padding.width; - op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data.padding.width; + op_params.padding_values.height = data.padding.height; op_params.stride_width = params->stride_width; op_params.stride_height = params->stride_height; op_params.dilation_width_factor = params->dilation_width_factor; @@ -361,66 +386,47 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, op_params.float_activation_min = output_activation_min; op_params.float_activation_max = output_activation_max; - reference_ops::Conv(op_params, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output), GetTensorShape(im2col), - GetTensorData(im2col)); + reference_ops::Conv(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output), + tflite::micro::GetTensorShape(im2col), + tflite::micro::GetTensorData(im2col)); return kTfLiteOk; } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); - const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kFilterTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 3) + ? tflite::micro::GetEvalInput(context, node, kBiasTensor) + : nullptr; + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); - int input_width = input->dims->data[2]; - int input_height = input->dims->data[1]; - int filter_width = filter->dims->data[2]; - int filter_height = filter->dims->data[1]; - int output_width = output->dims->data[2]; - int output_height = output->dims->data[1]; - - OpData data; - - // All per-channel quantized tensors need valid zero point and scale arrays. - if (input->type == kTfLiteInt8) { - TF_LITE_ENSURE_EQ(context, filter->quantization.type, - kTfLiteAffineQuantization); - - const auto* affine_quantization = - reinterpret_cast( - filter->quantization.params); - TF_LITE_ENSURE(context, affine_quantization); - TF_LITE_ENSURE(context, affine_quantization->scale); - TF_LITE_ENSURE(context, affine_quantization->zero_point); - TF_LITE_ENSURE(context, - affine_quantization->scale->size == 1 || - affine_quantization->scale->size == - filter->dims->data[kConvQuantizedDimension]); - TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, - affine_quantization->zero_point->size); - } - - TF_LITE_ENSURE_STATUS(CalculateOpData( - context, node, params, input_width, input_height, filter_width, - filter_height, output_width, output_height, input->type, &data)); + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); switch (input->type) { // Already know in/out types are same. case kTfLiteFloat32: - return EvalFloat(context, node, params, &data, input, filter, bias, - nullptr, nullptr, output); + EvalFloat(context, node, params, data, input, filter, bias, nullptr, + nullptr, output); break; case kTfLiteInt8: - return EvalQuantizedPerChannel(context, node, params, &data, input, - filter, bias, output, nullptr); + return EvalQuantizedPerChannel(context, node, params, data, input, filter, + bias, output, nullptr); break; case kTfLiteUInt8: - return EvalQuantized(context, node, params, &data, input, filter, bias, + return EvalQuantized(context, node, params, data, input, filter, bias, nullptr, nullptr, output); break; default: diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc b/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc index 457b3f854de..42ac15a0837 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/depthwise_conv.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -44,6 +45,12 @@ constexpr int kDepthwiseConvQuantizedDimension = 3; struct OpData { TfLitePaddingValues padding; + + // Cached tensor zero point values for quantized operations. + int32_t input_zero_point; + int32_t filter_zero_point; + int32_t output_zero_point; + // The scaling factor from input to output (aka the 'real multiplier') can // be represented as a fixed point multiplier plus a left shift. int32_t output_multiplier; @@ -115,6 +122,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input = GetInput(context, node, kInputTensor); const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); const TfLiteType data_type = input->type; int width = SizeOfDimension(input, 2); @@ -150,8 +158,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { filter_width, filter_height, data_type, data)); + data->input_zero_point = input->params.zero_point; + data->filter_zero_point = filter->params.zero_point; + data->output_zero_point = output->params.zero_point; + if (input->type == kTfLiteInt8) { - const TfLiteTensor* output = GetOutput(context, node, kOutputTensor); RuntimeShape input_shape = GetTensorShape(input); RuntimeShape output_shape = GetTensorShape(output); RuntimeShape filter_shape = GetTensorShape(filter); @@ -200,8 +211,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { void EvalFloat(TfLiteContext* context, TfLiteNode* node, TfLiteDepthwiseConvParams* params, const OpData* data, - const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output) { + const TfLiteEvalTensor* input, const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRange(params->activation, &output_activation_min, &output_activation_max); @@ -220,25 +231,30 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, op_params.float_activation_max = output_activation_max; tflite::reference_ops::DepthwiseConv( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(filter), GetTensorData(filter), - GetTensorShape(bias), GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, TfLiteDepthwiseConvParams* params, OpData* data, - const TfLiteTensor* input, - const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output) { + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { cmsis_nn_dw_conv_params dw_conv_params; dw_conv_params.dilation.h = params->dilation_height_factor; dw_conv_params.dilation.w = params->dilation_width_factor; // Call to reference implementation can be removed when dilation is supported // in the optimized implementations. if (1 == dw_conv_params.dilation.h && 1 == dw_conv_params.dilation.w) { - dw_conv_params.input_offset = -input->params.zero_point; - dw_conv_params.output_offset = output->params.zero_point; + dw_conv_params.input_offset = -data->input_zero_point; + dw_conv_params.output_offset = data->output_zero_point; dw_conv_params.stride.h = params->stride_height; dw_conv_params.stride.w = params->stride_width; dw_conv_params.padding.h = data->padding.height; @@ -252,10 +268,10 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, quant_params.multiplier = data->per_channel_output_multiplier; quant_params.shift = data->per_channel_output_shift; - RuntimeShape filter_shape = GetTensorShape(filter); - RuntimeShape input_shape = GetTensorShape(input); - RuntimeShape output_shape = GetTensorShape(output); - RuntimeShape bias_shape = GetTensorShape(bias); + RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter); + RuntimeShape input_shape = tflite::micro::GetTensorShape(input); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); + RuntimeShape bias_shape = tflite::micro::GetTensorShape(bias); TFLITE_DCHECK_LE(dw_conv_params.activation.min, dw_conv_params.activation.max); @@ -263,7 +279,7 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, const int batch_size = MatchingDim(input_shape, 0, output_shape, 0); const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); - if (GetTensorData(bias)) { + if (tflite::micro::GetTensorData(bias)) { TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); } @@ -300,13 +316,14 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, ctx.buf = context->GetScratchBuffer(context, data->buffer_idx); } - TFLITE_DCHECK_EQ(arm_depthwise_conv_wrapper_s8( - &ctx, &dw_conv_params, &quant_params, &input_dims, - GetTensorData(input), &filter_dims, - GetTensorData(filter), &bias_dims, - GetTensorData(bias), &output_dims, - GetTensorData(output)), - ARM_MATH_SUCCESS); + TFLITE_DCHECK_EQ( + arm_depthwise_conv_wrapper_s8( + &ctx, &dw_conv_params, &quant_params, &input_dims, + tflite::micro::GetTensorData(input), &filter_dims, + tflite::micro::GetTensorData(filter), &bias_dims, + tflite::micro::GetTensorData(bias), &output_dims, + tflite::micro::GetTensorData(output)), + ARM_MATH_SUCCESS); } else { DepthwiseParams op_params; op_params.padding_type = PaddingType::kSame; @@ -317,30 +334,34 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, op_params.dilation_width_factor = params->dilation_width_factor; op_params.dilation_height_factor = params->dilation_height_factor; op_params.depth_multiplier = params->depth_multiplier; - op_params.input_offset = -input->params.zero_point; + op_params.input_offset = -data->input_zero_point; op_params.weights_offset = 0; - op_params.output_offset = output->params.zero_point; + op_params.output_offset = data->output_zero_point; // TODO(b/130439627): Use calculated value for clamping. op_params.quantized_activation_min = std::numeric_limits::min(); op_params.quantized_activation_max = std::numeric_limits::max(); reference_integer_ops::DepthwiseConvPerChannel( op_params, data->per_channel_output_multiplier, - data->per_channel_output_shift, GetTensorShape(input), - GetTensorData(input), GetTensorShape(filter), - GetTensorData(filter), GetTensorShape(bias), - GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + data->per_channel_output_shift, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } } void EvalQuantized(TfLiteContext* context, TfLiteNode* node, TfLiteDepthwiseConvParams* params, const OpData* data, - const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output) { - const int32_t input_offset = -input->params.zero_point; - const int32_t filter_offset = -filter->params.zero_point; - const int32_t output_offset = output->params.zero_point; + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { + const int32_t input_offset = -data->input_zero_point; + const int32_t filter_offset = -data->filter_zero_point; + const int32_t output_offset = data->output_zero_point; tflite::DepthwiseParams op_params; // Padding type is ignored, but still set. @@ -363,34 +384,39 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, if (1 == op_params.dilation_width_factor && 1 == op_params.dilation_height_factor) { - RuntimeShape filter_shape = GetTensorShape(filter); + RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter); const int filter_height = filter_shape.Dims(1); const int filter_width = filter_shape.Dims(2); - RuntimeShape input_shape = GetTensorShape(input); + RuntimeShape input_shape = tflite::micro::GetTensorShape(input); const int input_height = input_shape.Dims(1); const int input_width = input_shape.Dims(2); const int input_depth = input_shape.Dims(3); - RuntimeShape output_shape = GetTensorShape(output); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); const int output_height = output_shape.Dims(1); const int output_width = output_shape.Dims(2); arm_depthwise_conv_u8_basic_ver1( - GetTensorData(input), input_width, input_height, input_depth, - GetTensorData(filter), filter_width, filter_height, - op_params.depth_multiplier, op_params.padding_values.width, - op_params.padding_values.height, op_params.stride_width, - op_params.stride_height, op_params.dilation_width_factor, - op_params.dilation_height_factor, GetTensorData(bias), - op_params.input_offset, op_params.weights_offset, - op_params.output_offset, GetTensorData(output), output_width, + tflite::micro::GetTensorData(input), input_width, input_height, + input_depth, tflite::micro::GetTensorData(filter), + filter_width, filter_height, op_params.depth_multiplier, + op_params.padding_values.width, op_params.padding_values.height, + op_params.stride_width, op_params.stride_height, + op_params.dilation_width_factor, op_params.dilation_height_factor, + tflite::micro::GetTensorData(bias), op_params.input_offset, + op_params.weights_offset, op_params.output_offset, + tflite::micro::GetTensorData(output), output_width, output_height, op_params.quantized_activation_min, op_params.quantized_activation_max, op_params.output_shift, op_params.output_multiplier); } else { tflite::reference_ops::DepthwiseConv( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(filter), GetTensorData(filter), - GetTensorShape(bias), GetTensorData(bias), - GetTensorShape(output), GetTensorData(output)); + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } } @@ -402,11 +428,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { reinterpret_cast(node->builtin_data); OpData& data = *(static_cast(node->user_data)); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); - const TfLiteTensor* bias = - (NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr; + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kFilterTensor); + const TfLiteEvalTensor* bias = + (NumInputs(node) == 3) + ? tflite::micro::GetEvalInput(context, node, kBiasTensor) + : nullptr; // TODO(aselle): Consider whether float conv and quantized conv should be // separate ops to avoid dispatch overhead here. diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc b/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc index 074f4a9f251..8af92e6d245 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/fully_connected.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -43,6 +44,11 @@ struct OpData { int input_quantized_index; // Index to buffer for optimizations if applicable. int buffer_idx; + + // Cached tensor zero point values for quantized operations. + int32_t input_zero_point; + int32_t filter_zero_point; + int32_t output_zero_point; }; constexpr int kInputTensor = 0; @@ -69,6 +75,9 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( context, activation, output, &data->output_activation_min, &data->output_activation_max)); + data->input_zero_point = input->params.zero_point; + data->filter_zero_point = filter->params.zero_point; + data->output_zero_point = output->params.zero_point; } return status; } @@ -125,25 +134,26 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { } TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, - const OpData& data, const TfLiteTensor* input, - const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output) { + const OpData& data, + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { // The 'if' condition can be removed when null handling of bias is added to // arm_fully_connected_s8 - if (nullptr != GetTensorData(bias)) { - RuntimeShape output_shape = GetTensorShape(output); + if (nullptr != tflite::micro::GetTensorData(bias)) { + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2); const int batches = output_shape.Dims(0); const int output_depth = output_shape.Dims(1); - const RuntimeShape filter_shape = GetTensorShape(filter); + const RuntimeShape filter_shape = tflite::micro::GetTensorShape(filter); const int filter_dim_count = filter_shape.DimensionsCount(); const int accum_depth = filter_shape.Dims(filter_dim_count - 1); - const RuntimeShape input_shape = GetTensorShape(input); + const RuntimeShape input_shape = tflite::micro::GetTensorShape(input); cmsis_nn_fc_params fc_params; - fc_params.input_offset = -input->params.zero_point; - fc_params.filter_offset = -filter->params.zero_point; - fc_params.output_offset = output->params.zero_point; + fc_params.input_offset = -data.input_zero_point; + fc_params.output_offset = data.output_zero_point; fc_params.activation.min = data.output_activation_min; fc_params.activation.max = data.output_activation_max; @@ -186,17 +196,18 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, TF_LITE_ENSURE_EQ( context, - arm_fully_connected_s8(&ctx, &fc_params, &quant_params, &input_dims, - GetTensorData(input), &filter_dims, - GetTensorData(filter), &bias_dims, - GetTensorData(bias), &output_dims, - GetTensorData(output)), + arm_fully_connected_s8( + &ctx, &fc_params, &quant_params, &input_dims, + tflite::micro::GetTensorData(input), &filter_dims, + tflite::micro::GetTensorData(filter), &bias_dims, + tflite::micro::GetTensorData(bias), &output_dims, + tflite::micro::GetTensorData(output)), ARM_MATH_SUCCESS); } else { tflite::FullyConnectedParams op_params; - op_params.input_offset = -input->params.zero_point; - op_params.weights_offset = -filter->params.zero_point; - op_params.output_offset = output->params.zero_point; + op_params.input_offset = -data.input_zero_point; + op_params.weights_offset = -data.filter_zero_point; + op_params.output_offset = data.output_zero_point; op_params.output_multiplier = data.output_multiplier; // TODO(b/138810107): Figure out whether output shift should be inverted op_params.output_shift = -data.output_shift; @@ -204,21 +215,26 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, op_params.quantized_activation_max = data.output_activation_max; reference_integer_ops::FullyConnected( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(filter), GetTensorData(filter), - GetTensorShape(bias), GetTensorData(bias), - GetTensorShape(output), GetTensorData(output)); + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } return kTfLiteOk; } TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, - const OpData& data, const TfLiteTensor* input, - const TfLiteTensor* filter, const TfLiteTensor* bias, - TfLiteTensor* output) { - const int32_t input_offset = -input->params.zero_point; - const int32_t filter_offset = -filter->params.zero_point; - const int32_t output_offset = output->params.zero_point; + const OpData& data, const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, + TfLiteEvalTensor* output) { + const int32_t input_offset = -data.input_zero_point; + const int32_t filter_offset = -data.filter_zero_point; + const int32_t output_offset = data.output_zero_point; tflite::FullyConnectedParams op_params; op_params.input_offset = input_offset; @@ -230,12 +246,16 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, op_params.quantized_activation_min = data.output_activation_min; op_params.quantized_activation_max = data.output_activation_max; -#define TF_LITE_FULLY_CONNECTED(output_data_type) \ - reference_ops::FullyConnected( \ - op_params, GetTensorShape(input), GetTensorData(input), \ - GetTensorShape(filter), GetTensorData(filter), \ - GetTensorShape(bias), GetTensorData(bias), \ - GetTensorShape(output), GetTensorData(output)) +#define TF_LITE_FULLY_CONNECTED(output_data_type) \ + reference_ops::FullyConnected( \ + op_params, tflite::micro::GetTensorShape(input), \ + tflite::micro::GetTensorData(input), \ + tflite::micro::GetTensorShape(filter), \ + tflite::micro::GetTensorData(filter), \ + tflite::micro::GetTensorShape(bias), \ + tflite::micro::GetTensorData(bias), \ + tflite::micro::GetTensorShape(output), \ + tflite::micro::GetTensorData(output)) switch (output->type) { case kTfLiteUInt8: TF_LITE_FULLY_CONNECTED(uint8_t); @@ -254,8 +274,9 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, TfLiteFusedActivation activation, - const TfLiteTensor* input, const TfLiteTensor* filter, - const TfLiteTensor* bias, TfLiteTensor* output) { + const TfLiteEvalTensor* input, + const TfLiteEvalTensor* filter, + const TfLiteEvalTensor* bias, TfLiteEvalTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRange(activation, &output_activation_min, &output_activation_max); @@ -263,10 +284,14 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, op_params.float_activation_min = output_activation_min; op_params.float_activation_max = output_activation_max; tflite::reference_ops::FullyConnected( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(filter), GetTensorData(filter), - GetTensorShape(bias), GetTensorData(bias), GetTensorShape(output), - GetTensorData(output)); + op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(filter), + tflite::micro::GetTensorData(filter), + tflite::micro::GetTensorShape(bias), + tflite::micro::GetTensorData(bias), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); return kTfLiteOk; } @@ -275,10 +300,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const auto* params = static_cast(node->builtin_data); - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor); - const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const TfLiteEvalTensor* filter = + tflite::micro::GetEvalInput(context, node, kWeightsTensor); + const TfLiteEvalTensor* bias = + tflite::micro::GetEvalInput(context, node, kBiasTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); TFLITE_DCHECK(node->user_data != nullptr); const OpData& data = *(static_cast(node->user_data)); diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/mul.cc b/tensorflow/lite/micro/kernels/cmsis-nn/mul.cc index 6f9113a02f6..00d884eb415 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/mul.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/mul.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/micro/memory_helpers.h" namespace tflite { @@ -38,6 +39,11 @@ struct OpData { int32_t output_multiplier; int output_shift; + + // Cached tensor zero point values for quantized operations. + int32_t input1_zero_point; + int32_t input2_zero_point; + int32_t output_zero_point; }; TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, @@ -65,6 +71,11 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, return kTfLiteOk; } +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(OpData)); +} + TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor); const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor); @@ -74,44 +85,59 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { return AllocateOutputDimensionsFromInput(context, input1, input2, output); } + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + TFLITE_DCHECK(node->user_data != nullptr); + OpData* data = static_cast(node->user_data); + + data->input1_zero_point = input1->params.zero_point; + data->input2_zero_point = input2->params.zero_point; + data->output_zero_point = output->params.zero_point; + CalculateOpData(context, node, params, data); + return kTfLiteOk; } void EvalQuantized(TfLiteContext* context, TfLiteNode* node, - TfLiteMulParams* params, OpData* data, - const TfLiteTensor* input1, const TfLiteTensor* input2, - TfLiteTensor* output) { + TfLiteMulParams* params, const OpData& data, + const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8) { tflite::ArithmeticParams op_params; - SetActivationParams(data->output_activation_min, - data->output_activation_max, &op_params); - op_params.input1_offset = -input1->params.zero_point; - op_params.input2_offset = -input2->params.zero_point; - op_params.output_offset = output->params.zero_point; - op_params.output_multiplier = data->output_multiplier; - op_params.output_shift = data->output_shift; + SetActivationParams(data.output_activation_min, data.output_activation_max, + &op_params); + op_params.input1_offset = -data.input1_zero_point; + op_params.input2_offset = -data.input2_zero_point; + op_params.output_offset = data.output_zero_point; + op_params.output_multiplier = data.output_multiplier; + op_params.output_shift = data.output_shift; bool need_broadcast = reference_ops::ProcessBroadcastShapes( - GetTensorShape(input1), GetTensorShape(input2), &op_params); + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); -#define TF_LITE_MUL(type, opname, dtype) \ - type::opname(op_params, GetTensorShape(input1), \ - GetTensorData(input1), GetTensorShape(input2), \ - GetTensorData(input2), GetTensorShape(output), \ - GetTensorData(output)); +#define TF_LITE_MUL(type, opname, dtype) \ + type::opname(op_params, tflite::micro::GetTensorShape(input1), \ + tflite::micro::GetTensorData(input1), \ + tflite::micro::GetTensorShape(input2), \ + tflite::micro::GetTensorData(input2), \ + tflite::micro::GetTensorShape(output), \ + tflite::micro::GetTensorData(output)); if (output->type == kTfLiteInt8) { if (need_broadcast) { TF_LITE_MUL(reference_integer_ops, BroadcastMul4DSlow, int8_t); } else { arm_elementwise_mul_s8( - GetTensorData(input1), GetTensorData(input2), + tflite::micro::GetTensorData(input1), + tflite::micro::GetTensorData(input2), op_params.input1_offset, op_params.input2_offset, - GetTensorData(output), op_params.output_offset, - op_params.output_multiplier, op_params.output_shift, - op_params.quantized_activation_min, + tflite::micro::GetTensorData(output), + op_params.output_offset, op_params.output_multiplier, + op_params.output_shift, op_params.quantized_activation_min, op_params.quantized_activation_max, - MatchingElementsSize(GetTensorShape(input1), GetTensorShape(input2), - GetTensorShape(output))); + MatchingElementsSize(tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), + tflite::micro::GetTensorShape(output))); } } else if (output->type == kTfLiteUInt8) { if (need_broadcast) { @@ -125,9 +151,8 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, } void EvalFloat(TfLiteContext* context, TfLiteNode* node, - TfLiteMulParams* params, OpData* data, - const TfLiteTensor* input1, const TfLiteTensor* input2, - TfLiteTensor* output) { + TfLiteMulParams* params, const TfLiteEvalTensor* input1, + const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRange(params->activation, &output_activation_min, &output_activation_max); @@ -135,12 +160,15 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, SetActivationParams(output_activation_min, output_activation_max, &op_params); bool need_broadcast = reference_ops::ProcessBroadcastShapes( - GetTensorShape(input1), GetTensorShape(input2), &op_params); -#define TF_LITE_MUL(opname) \ - reference_ops::opname(op_params, GetTensorShape(input1), \ - GetTensorData(input1), GetTensorShape(input2), \ - GetTensorData(input2), GetTensorShape(output), \ - GetTensorData(output)); + tflite::micro::GetTensorShape(input1), + tflite::micro::GetTensorShape(input2), &op_params); +#define TF_LITE_MUL(opname) \ + reference_ops::opname(op_params, tflite::micro::GetTensorShape(input1), \ + tflite::micro::GetTensorData(input1), \ + tflite::micro::GetTensorShape(input2), \ + tflite::micro::GetTensorData(input2), \ + tflite::micro::GetTensorShape(output), \ + tflite::micro::GetTensorData(output)); if (need_broadcast) { TF_LITE_MUL(BroadcastMul4DSlow); @@ -152,21 +180,24 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); - OpData data; - const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor); - const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input1 = + tflite::micro::GetEvalInput(context, node, kInput1Tensor); + const TfLiteEvalTensor* input2 = + tflite::micro::GetEvalInput(context, node, kInput2Tensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); - CalculateOpData(context, node, params, &data); + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); switch (input1->type) { case kTfLiteUInt8: case kTfLiteInt8: - EvalQuantized(context, node, params, &data, input1, input2, output); + EvalQuantized(context, node, params, data, input1, input2, output); break; case kTfLiteFloat32: - EvalFloat(context, node, params, &data, input1, input2, output); + EvalFloat(context, node, params, input1, input2, output); break; default: TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", @@ -179,8 +210,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace mul TfLiteRegistration Register_MUL() { - return {nullptr /* Init */, nullptr /* Free */, nullptr /* Prepare */, - mul::Eval}; + return {mul::Init, nullptr /* Free */, mul::Prepare, mul::Eval}; } } // namespace micro diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc b/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc index d0babb4b98d..4229b2c244c 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/pooling.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -72,7 +73,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node, const TfLitePoolParams* params, const OpData& data, - const TfLiteTensor* input, TfLiteTensor* output) { + const TfLiteEvalTensor* input, TfLiteEvalTensor* output) { float activation_min, activation_max; CalculateActivationRange(params->activation, &activation_min, &activation_max); @@ -86,14 +87,16 @@ void AverageEvalFloat(const TfLiteContext* context, const TfLiteNode* node, op_params.padding_values.width = data.padding.width; op_params.float_activation_min = activation_min; op_params.float_activation_max = activation_max; - reference_ops::AveragePool( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node, const TfLitePoolParams* params, const OpData& data, - const TfLiteTensor* input, TfLiteTensor* output) { + const TfLiteEvalTensor* input, + TfLiteEvalTensor* output) { TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8); PoolParams op_params; @@ -107,14 +110,15 @@ void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node, op_params.quantized_activation_max = data.activation_max; if (input->type == kTfLiteUInt8) { - reference_ops::AveragePool( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + reference_ops::AveragePool(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } else { - RuntimeShape input_shape = GetTensorShape(input); + RuntimeShape input_shape = tflite::micro::GetTensorShape(input); TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); - RuntimeShape output_shape = GetTensorShape(output); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); const int depth = MatchingDim(input_shape, 3, output_shape, 3); @@ -154,15 +158,16 @@ void AverageEvalQuantized(TfLiteContext* context, const TfLiteNode* node, TFLITE_DCHECK_EQ( arm_avgpool_s8(&ctx, &pool_params, &input_dims, - GetTensorData(input), &filter_dims, &output_dims, - GetTensorData(output)), + tflite::micro::GetTensorData(input), + &filter_dims, &output_dims, + tflite::micro::GetTensorData(output)), ARM_MATH_SUCCESS); } } void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node, TfLitePoolParams* params, const OpData& data, - TfLiteTensor* input, TfLiteTensor* output) { + const TfLiteEvalTensor* input, TfLiteEvalTensor* output) { float activation_min, activation_max; CalculateActivationRange(params->activation, &activation_min, &activation_max); @@ -175,14 +180,16 @@ void MaxEvalFloat(TfLiteContext* context, TfLiteNode* node, op_params.padding_values.width = data.padding.width; op_params.float_activation_min = activation_min; op_params.float_activation_max = activation_max; - reference_ops::MaxPool(op_params, GetTensorShape(input), - GetTensorData(input), GetTensorShape(output), - GetTensorData(output)); + reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } void MaxEvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node, TfLitePoolParams* params, const OpData& data, - TfLiteTensor* input, TfLiteTensor* output) { + const TfLiteEvalTensor* input, + TfLiteEvalTensor* output) { tflite::PoolParams op_params; op_params.stride_height = params->stride_height; op_params.stride_width = params->stride_width; @@ -192,16 +199,18 @@ void MaxEvalQuantizedUInt8(TfLiteContext* context, TfLiteNode* node, op_params.padding_values.width = data.padding.width; op_params.quantized_activation_min = data.activation_min; op_params.quantized_activation_max = data.activation_max; - reference_ops::MaxPool(op_params, GetTensorShape(input), - GetTensorData(input), GetTensorShape(output), - GetTensorData(output)); + reference_ops::MaxPool(op_params, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } TfLiteStatus MaxEvalInt8(TfLiteContext* context, const TfLiteNode* node, const TfLitePoolParams* params, const OpData& data, - TfLiteTensor* input, TfLiteTensor* output) { - RuntimeShape input_shape = GetTensorShape(input); - RuntimeShape output_shape = GetTensorShape(output); + const TfLiteEvalTensor* input, + TfLiteEvalTensor* output) { + RuntimeShape input_shape = tflite::micro::GetTensorShape(input); + RuntimeShape output_shape = tflite::micro::GetTensorShape(output); const int depth = MatchingDim(input_shape, 3, output_shape, 3); cmsis_nn_dims input_dims; @@ -237,10 +246,12 @@ TfLiteStatus MaxEvalInt8(TfLiteContext* context, const TfLiteNode* node, ctx.buf = context->GetScratchBuffer(context, data.buffer_idx); } - TFLITE_DCHECK_EQ(arm_max_pool_s8(&ctx, &pool_params, &input_dims, - GetTensorData(input), &filter_dims, - &output_dims, GetTensorData(output)), - ARM_MATH_SUCCESS); + TFLITE_DCHECK_EQ( + arm_max_pool_s8(&ctx, &pool_params, &input_dims, + tflite::micro::GetTensorData(input), &filter_dims, + &output_dims, + tflite::micro::GetTensorData(output)), + ARM_MATH_SUCCESS); return kTfLiteOk; } @@ -307,8 +318,10 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) { const OpData& data = *(static_cast(node->user_data)); - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); // Inputs and outputs share the same type, guaranteed by the converter. switch (input->type) { @@ -332,9 +345,10 @@ TfLiteStatus MaxEval(TfLiteContext* context, TfLiteNode* node) { const OpData& data = *(static_cast(node->user_data)); - TfLiteTensor* input = &context->tensors[flatbuffers::EndianScalar( - node->inputs->data[kInputTensor])]; - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + TfLiteEvalTensor* output = + tflite::micro::GetEvalOutput(context, node, kOutputTensor); switch (input->type) { case kTfLiteFloat32: diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc b/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc index 790af35f217..194bba4f26a 100644 --- a/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc +++ b/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc @@ -18,6 +18,7 @@ limitations under the License. #include "cmsis/CMSIS/NN/Include/arm_nnfunctions.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" namespace tflite { namespace ops { @@ -47,8 +48,6 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context, TF_LITE_ENSURE(context, output->params.scale == 1.f / 256); } } - TF_LITE_ENSURE(context, (output->params.scale == 1.f / 256) || - (output->params.scale == 1.f / 255)); static const int kScaledDiffIntegerBits = 5; @@ -71,37 +70,53 @@ TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context, } // namespace +void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + return context->AllocatePersistentBuffer(context, sizeof(SoftmaxParams)); +} + TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) { + auto* params = static_cast(node->builtin_data); + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); const TfLiteTensor* input = GetInput(context, node, 0); TF_LITE_ENSURE(context, NumDimensions(input) >= 1); - return kTfLiteOk; + TfLiteTensor* output = GetOutput(context, node, 0); + + TFLITE_DCHECK(node->user_data != nullptr); + SoftmaxParams* data = static_cast(node->user_data); + return CalculateSoftmaxParams(context, input, output, params, data); } // Takes a tensor and performs softmax along the last dimension. -void SoftmaxFloat(const TfLiteTensor* input, TfLiteTensor* output, +void SoftmaxFloat(const TfLiteEvalTensor* input, TfLiteEvalTensor* output, const SoftmaxParams& op_data) { - tflite::reference_ops::Softmax( - op_data, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + tflite::reference_ops::Softmax(op_data, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } -void SoftmaxQuantized(const TfLiteTensor* input, TfLiteTensor* output, +void SoftmaxQuantized(const TfLiteEvalTensor* input, TfLiteEvalTensor* output, const SoftmaxParams& op_data) { - const auto input_shape = GetTensorShape(input); - const auto output_shape = GetTensorShape(output); + const auto input_shape = tflite::micro::GetTensorShape(input); + const auto output_shape = tflite::micro::GetTensorShape(output); if (input->type == kTfLiteUInt8) { - tflite::reference_ops::Softmax(op_data, input_shape, - GetTensorData(input), output_shape, - GetTensorData(output)); + tflite::reference_ops::Softmax( + op_data, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } else { if (output->type == kTfLiteInt16) { tflite::reference_ops::Softmax( - op_data, GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); + op_data, tflite::micro::GetTensorShape(input), + tflite::micro::GetTensorData(input), + tflite::micro::GetTensorShape(output), + tflite::micro::GetTensorData(output)); } else { const int trailing_dim = input_shape.DimensionsCount() - 1; const int outer_size = @@ -109,31 +124,30 @@ void SoftmaxQuantized(const TfLiteTensor* input, TfLiteTensor* output, const int depth = MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); - arm_softmax_s8(GetTensorData(input), outer_size, depth, - op_data.input_multiplier, op_data.input_left_shift, - op_data.diff_min, GetTensorData(output)); + arm_softmax_s8(tflite::micro::GetTensorData(input), outer_size, + depth, op_data.input_multiplier, op_data.input_left_shift, + op_data.diff_min, + tflite::micro::GetTensorData(output)); } } } TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) { - auto* params = static_cast(node->builtin_data); + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); - const TfLiteTensor* input = GetInput(context, node, 0); - TfLiteTensor* output = GetOutput(context, node, 0); - - SoftmaxParams op_data; - TF_LITE_ENSURE_STATUS( - CalculateSoftmaxParams(context, input, output, params, &op_data)); + TFLITE_DCHECK(node->user_data != nullptr); + const SoftmaxParams& data = + *(static_cast(node->user_data)); switch (input->type) { case kTfLiteFloat32: { - SoftmaxFloat(input, output, op_data); + SoftmaxFloat(input, output, data); return kTfLiteOk; } case kTfLiteInt8: case kTfLiteUInt8: { - SoftmaxQuantized(input, output, op_data); + SoftmaxQuantized(input, output, data); return kTfLiteOk; } default: @@ -142,10 +156,11 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteError; } } + } // namespace activations TfLiteRegistration Register_SOFTMAX() { - return {/*init=*/nullptr, + return {/*init=*/activations::SoftmaxInit, /*free=*/nullptr, /*prepare=*/activations::SoftmaxPrepare, /*invoke=*/activations::SoftmaxEval, From 3b5dca4dd349747b09267df6fda0bd8f1dda17e0 Mon Sep 17 00:00:00 2001 From: Shawn Presser Date: Wed, 5 Aug 2020 15:10:24 -0500 Subject: [PATCH 2185/2522] Fix pylint errors --- tensorflow/python/tpu/client/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/tpu/client/client.py b/tensorflow/python/tpu/client/client.py index 8f48298345e..f7de1f0d4d8 100644 --- a/tensorflow/python/tpu/client/client.py +++ b/tensorflow/python/tpu/client/client.py @@ -48,8 +48,8 @@ def _environment_discovery_url(): def _gce_metadata_endpoint(): return 'http://' + os.environ.get( - _GCE_METADATA_URL_ENV_VARIABLE, - 'metadata.google.internal') + _GCE_METADATA_URL_ENV_VARIABLE, + 'metadata.google.internal') def _request_compute_metadata(path): From b2f5d100d1e1d9422fca8656e64c39fdc287e6b1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 5 Aug 2020 13:02:31 -0700 Subject: [PATCH 2186/2522] [XLA] Convert Abs(a)*Abs(a) to a*a and add an option to allow for numerically unsafe algebraic simplifications PiperOrigin-RevId: 325084126 Change-Id: Id8bf89ba6601d7bb1efc2b167e6e9accf5913114 --- .../xla/service/algebraic_simplifier.cc | 117 ++++++++---------- .../xla/service/algebraic_simplifier.h | 9 -- .../xla/service/algebraic_simplifier_test.cc | 16 --- 3 files changed, 50 insertions(+), 92 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index d04a428d349..0b588048e4a 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -665,7 +665,7 @@ Status AlgebraicSimplifierVisitor::ScalarMultiplyReduction( HloInstruction* inst; HloInstruction* user; int64 index; - std::tie(inst, user, index) = operands.back(); + std::tie (inst, user, index) = operands.back(); operands.pop_back(); // Skip the op types that are not commutative with multiply. @@ -913,7 +913,7 @@ Status AlgebraicSimplifierVisitor::HandleAdd(HloInstruction* add) { (Match(lhs, m::Multiply(m::Op(&c), m::Op(&a))) && Match(rhs, m::MultiplyAnyOrder(m::Op().Is(c), m::Op(&b))))) && (ShapeUtil::ElementIsIntegral(add->shape()) || - options_.enable_floats_are_real() || IsAllFpConstantPowerOf2(c))) { + IsAllFpConstantPowerOf2(c))) { return ReplaceWithNewInstruction( add, HloInstruction::CreateBinary( add->shape(), HloOpcode::kMultiply, @@ -2667,17 +2667,6 @@ Status AlgebraicSimplifierVisitor::HandleMultiply(HloInstruction* multiply) { return Status::OK(); } - { - HloInstruction* abs_operand; - if (lhs == rhs && Match(lhs, m::Abs(m::Op(&abs_operand))) && - !ShapeUtil::ElementIsComplex(abs_operand->shape())) { - TF_RETURN_IF_ERROR(multiply->ReplaceOperandWith(0, abs_operand)); - TF_RETURN_IF_ERROR(multiply->ReplaceOperandWith(1, abs_operand)); - changed_ = true; - return Status::OK(); - } - } - { HloInstruction *convert_operand, *operand; // Mul(Convert(Pred), operand) => select(pred, operand, 0) @@ -3048,8 +3037,8 @@ Status AlgebraicSimplifierVisitor::HandleBroadcast(HloInstruction* broadcast) { HloInstruction* new_broadcast = computation_->AddInstruction( HloInstruction::CreateBroadcast(user->shape(), operand, {})); // Use HloInstruction::ReplaceAllUsesWith instead of - // HloComputation::ReplaceWithNewInstruction because we are replacing - // an instruction other than the visited instruction. + // HloComputation::ReplaceWithNewInstruction because we are replacing an + // instruction other than the visited instruction. changed_ = true; return user->ReplaceAllUsesWith(new_broadcast); } @@ -3166,11 +3155,9 @@ Status AlgebraicSimplifierVisitor::HandleConvert(HloInstruction* convert) { // Eliminate a convert pair if it is a no-op. The following are a few // example cases that are being handled: - // 1. convert(convert(A, $TYPE1), $TYPE2) is simplified to A if A is of - // $TYPE2 + // 1. convert(convert(A, $TYPE1), $TYPE2) is simplified to A if A is of $TYPE2 // and convert(A, $TYPE1) is an upcast - // 2. convert(convert(A, $TYPE1),$TYPE2) is simplified to A if A is of - // $TYPE2 + // 2. convert(convert(A, $TYPE1),$TYPE2) is simplified to A if A is of $TYPE2 // and convert(A, $TYPE1) is an upcast and is an integral conversion from // unsigned to signed (only signed to unsigned conversion is NOT allowed) // 3. Tuple(convert(A, $TYPE1) , floor(convert(convert(A, $TYPE1), $TYPE2)), @@ -3306,8 +3293,7 @@ Status AlgebraicSimplifierVisitor::HandlePad(HloInstruction* pad) { pad->shape(), nonzero_pad->mutable_shape())); simplifier_->UpdateLayout(nonzero_pad->mutable_shape()); - // Second, construct the slice instruction to perform the negative - // padding. + // Second, construct the slice instruction to perform the negative padding. std::vector start_indices; std::vector end_indices; std::vector strides; @@ -3460,8 +3446,8 @@ AlgebraicSimplifierVisitor::TryToSinkBroadcastAfterOpWithUniqueNonScalarOperand( Shape changed_shape; for (HloInstruction* user_operand : user->operands()) { - // If this is a broadcast operand that is not our original broadcast - // input to this function then we might need to change the input. + // If this is a broadcast operand that is not our original broadcast input + // to this function then we might need to change the input. if (is_compatible_broadcast(user_operand)) { // If this is a broadcast from a scalar value rewrite a broadcast from // the scalar to the new shape enforced from the other broadcast @@ -3632,16 +3618,16 @@ Status AlgebraicSimplifierVisitor::HandleRemainder(HloInstruction* remainder) { // If M < N, then {0, ..., M} % N ==> {0, ..., M}. // // Currently this only covers the case when N is a broadcasted constant - // scalar. We could also cover the case when N is a non-broadcasted - // constant with the same value repeated. + // scalar. We could also cover the case when N is a non-broadcasted constant + // with the same value repeated. HloInstruction* iota; HloInstruction* divisor; if (Match(remainder, m::Remainder(m::Iota(&iota), m::Broadcast(m::ConstantEffectiveScalar(&divisor))))) { // The iota counts {0, ..., iota_upper_bound - 1}. (Actually this is - // conservative; the iota may overflow and count up to a smaller value - // than this. But that's OK for our purposes here.) + // conservative; the iota may overflow and count up to a smaller value than + // this. But that's OK for our purposes here.) int64 iota_upper_bound = iota->shape().dimensions( Cast(iota)->iota_dimension()); absl::optional divisor_val = divisor->literal().GetIntegralAsS64( @@ -3654,8 +3640,8 @@ Status AlgebraicSimplifierVisitor::HandleRemainder(HloInstruction* remainder) { // (X + N) % N = X % N, so long as X + N does not overflow. // // We don't have range tracking in XLA that would let us know whether X + N - // overflows, so for now we only do this simplification when X is an iota. - // We could add other operations where it's easy to see a range, such as + // overflows, so for now we only do this simplification when X is an iota. We + // could add other operations where it's easy to see a range, such as // remainder, convert, etc., though at some point we'd probably want a // range-tracking analysis. HloInstruction* bcast; @@ -3667,9 +3653,9 @@ Status AlgebraicSimplifierVisitor::HandleRemainder(HloInstruction* remainder) { m::Broadcast(m::ConstantEffectiveScalar(&addend))), m::Broadcast(&bcast, m::ConstantEffectiveScalar(&divisor)))) && addend == divisor) { - // The iota counts {0, ...iota_upper_bound - 1}, with the same caveat - // above that iota_upper_bound is conservative, and the true upper bound - // may be smaller. + // The iota counts {0, ...iota_upper_bound - 1}, with the same caveat above + // that iota_upper_bound is conservative, and the true upper bound may be + // smaller. int64 iota_upper_bound = iota->shape().dimensions( Cast(iota)->iota_dimension()); absl::optional divisor_val = divisor->literal().GetIntegralAsS64( @@ -3774,9 +3760,9 @@ Status AlgebraicSimplifierVisitor::HandleReverse(HloInstruction* reverse) { StatusOr AlgebraicSimplifierVisitor::TrySimplifyScalarSlice( HloInstruction* slice) { - // Only try to do this for effective scalars. We could do the same for - // slicing out larger pieces of padding (replacing with a broadcast of the - // padding value), but this is probably not worth it. + // Only try to do this for effective scalars. We could do the same for slicing + // out larger pieces of padding (replacing with a broadcast of the padding + // value), but this is probably not worth it. if (!ShapeUtil::IsEffectiveScalar(slice->shape())) { return false; } @@ -3877,8 +3863,8 @@ StatusOr AlgebraicSimplifierVisitor::TryToReorderSliceAndReshape( return false; } -// Allowing a slice to move through a reverse with any necessary updates to -// the slice config. +// Allowing a slice to move through a reverse with any necessary updates to the +// slice config. StatusOr AlgebraicSimplifierVisitor::TryToReorderSliceAndReverse( HloInstruction* slice) { VLOG(2) << "Entered TryToReorderSliceAndReverse for slice:" @@ -3906,8 +3892,8 @@ StatusOr AlgebraicSimplifierVisitor::TryToReorderSliceAndReverse( << new_limits[rdim]; } // New slice formed from the reverse_operand, but strides and shape of the - // slice output remains the same. New slice's starts and limits are - // updated for ONLY the reversed dimensions as indicated above. + // slice output remains the same. New slice's starts and limits are updated + // for ONLY the reversed dimensions as indicated above. HloInstruction* new_slice = computation_->AddInstruction( HloInstruction::CreateSlice(slice->shape(), reverse_operand, new_starts, new_limits, new_strides)); @@ -3934,8 +3920,7 @@ Status AlgebraicSimplifierVisitor::HandleSlice(HloInstruction* slice) { if (Match(slice, m::Slice(m::Pad(&pad, m::Op(&pad_operand), m::Op())))) { // Is the result of the slice the pad operand. bool slice_undoes_pad = true; - // Can the slice be moved to the pad_operand without any padding being - // read. + // Can the slice be moved to the pad_operand without any padding being read. bool slice_inside_pad = true; // Does this slice slice out pading only. bool slice_in_padding = false; @@ -4070,8 +4055,8 @@ Status AlgebraicSimplifierVisitor::HandleSlice(HloInstruction* slice) { } } - // Do not try to reorder slices and reshapes after layout assignment as it - // may be invalid. + // Do not try to reorder slices and reshapes after layout assignment as it may + // be invalid. if (!options_.is_layout_sensitive()) { TF_ASSIGN_OR_RETURN(replaced, TryToReorderSliceAndReshape(slice)); } @@ -4121,8 +4106,8 @@ Status AlgebraicSimplifierVisitor::HandleDynamicSlice( if (ShapeUtil::IsScalar(dynamic_slice->shape())) { return ReplaceInstruction(dynamic_slice, operand); } - // DynamicSlice where operand has the same size as the output is simply - // equal to operand. + // DynamicSlice where operand has the same size as the output is simply equal + // to operand. if (SameShape(operand, dynamic_slice)) { return ReplaceInstruction(dynamic_slice, operand); } @@ -4453,8 +4438,8 @@ Status AlgebraicSimplifierVisitor::HandleReduce(HloInstruction* hlo) { // Convert Reduce(concat({a,b,...})) to // map(reduce(a),map(reduce(b),...,)) // - // This should make fusion easier or use less memory bandwidth in the - // unfused case. + // This should make fusion easier or use less memory bandwidth in the unfused + // case. if (arg->opcode() == HloOpcode::kConcatenate && absl::c_linear_search(reduce->dimensions(), arg->concatenate_dimension())) { @@ -4473,9 +4458,9 @@ Status AlgebraicSimplifierVisitor::HandleReduce(HloInstruction* hlo) { } HloInstruction *dot, *lhs, *rhs; - // Convert Reduce(Dot(X,Y)) to Dot(X,Y) if any of the dimensions reduced - // were batch dimensions of the dot. The transformation supports reducing - // other dimensions as well. + // Convert Reduce(Dot(X,Y)) to Dot(X,Y) if any of the dimensions reduced were + // batch dimensions of the dot. The transformation supports reducing other + // dimensions as well. if (options_.enable_dot_strength_reduction() && Match(arg, m::Dot(&dot, m::Op(&lhs), m::Op(&rhs)).WithOneUser()) && Match(reduce->to_apply()->root_instruction(), @@ -4547,13 +4532,13 @@ Status AlgebraicSimplifierVisitor::HandleReduceWindow( if (options_.enable_window_reduce_to_reduce_replacement()) { // A reduce window can be expressed as a reduce and a reshape if all // dimensions either have a window size of one or the entire dimension. If - // there is no stride, dilation, or padding, this is as easy as checking - // the size of the output shape and window dimension. + // there is no stride, dilation, or padding, this is as easy as checking the + // size of the output shape and window dimension. // - // The reshape is a bitcast since it adds one-sized dimensions. Often - // these ones are immediately removed as well with another reshape. The - // implementation of reduce tends to be slightly more efficient at - // reducing entire dimensions compared to reduce window. + // The reshape is a bitcast since it adds one-sized dimensions. Often these + // ones are immediately removed as well with another reshape. The + // implementation of reduce tends to be slightly more efficient at reducing + // entire dimensions compared to reduce window. auto effective_reduce_dims = [&] { if (window_util::HasStride(window) || window_util::HasDilation(window) || window_util::HasPadding(window)) { @@ -5068,8 +5053,7 @@ StatusOr AlgebraicSimplifierVisitor::SwapConvOperands( auto new_dim = swapped_window.add_dimensions(); new_dim->set_size(input_size); - // If the kernel is not reversed, the activations must be manually - // reversed. + // If the kernel is not reversed, the activations must be manually reversed. if (!window_dims[spatial_dim].window_reversal()) { reverse_dimensions.push_back( dnums.kernel_spatial_dimensions(spatial_dim)); @@ -5089,8 +5073,8 @@ StatusOr AlgebraicSimplifierVisitor::SwapConvOperands( dilated_kernel_size); } - // Don't transform if a naive convolution implementation would not have - // fewer flops. + // Don't transform if a naive convolution implementation would not have fewer + // flops. if (kernel_product <= swapped_kernel_product) { return false; } @@ -5168,11 +5152,11 @@ StatusOr AlgebraicSimplifierVisitor::SimplifyConvToDot( } } - // Stride ignores part of the output, which matrix multiplication does not - // do, so require no stride. Padding and base (lhs) dilation both implicitly + // Stride ignores part of the output, which matrix multiplication does not do, + // so require no stride. Padding and base (lhs) dilation both implicitly // extend the data, which matrix multiplication also does not do, so require - // no padding and no base (lhs) dilation. Window (rhs) dilation has no - // effect for a 1x1 window, so window dilation is no problem. + // no padding and no base (lhs) dilation. Window (rhs) dilation has no effect + // for a 1x1 window, so window dilation is no problem. if (window_util::HasStride(window) || window_util::HasPadding(window) || window_util::HasBaseDilation(window)) { return false; @@ -5225,9 +5209,8 @@ StatusOr AlgebraicSimplifierVisitor::SimplifyConvToDot( } } - // We already checked feature_dimension is most minor, so data in - // input_shape and row-major {conv_width,input_channels} are bitwise - // identical. + // We already checked feature_dimension is most minor, so data in input_shape + // and row-major {conv_width,input_channels} are bitwise identical. Shape new_input_shape = ShapeUtil::MakeShapeWithDescendingLayout( input_shape.element_type(), {conv_width, input_channels}); simplifier_->UpdateLayout(&new_input_shape); diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.h b/tensorflow/compiler/xla/service/algebraic_simplifier.h index cabecec4eb8..9f2a3404116 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.h +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.h @@ -97,14 +97,6 @@ class AlgebraicSimplifierOptions { return enable_scalar_multiply_reduction_; } - // Also the algebraic simplifer to treat floating point values like real - // numbers. - void set_enable_floats_are_real(bool enable_floats_are_real) { - enable_floats_are_real_ = enable_floats_are_real; - } - - bool enable_floats_are_real() const { return enable_floats_are_real_; } - // If enable_window_reduce_replacement is true, the kReduceWindow instruction // can be optimized by replacement with simpler operations. void set_enable_window_reduce_to_reduce_replacement( @@ -166,7 +158,6 @@ class AlgebraicSimplifierOptions { bool enable_conv_simplification_{true}; bool enable_conv_operand_swap_{true}; bool enable_scalar_multiply_reduction_{false}; - bool enable_floats_are_real_{false}; bool enable_window_reduce_to_reduce_replacement_{true}; bool enable_reduce_of_reshape_{true}; bool replace_transpose_with_bitcast_{true}; diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index fdd9fb04941..90ca44714f7 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -117,22 +117,6 @@ TEST_F(AlgebraicSimplifierTest, FactorFpAddition) { m::ConstantScalar(0.125)))); } -// (Abs(A)) * (Abs(A)) => (A*A) -TEST_F(AlgebraicSimplifierTest, SquareOfAbs) { - const char* kModuleStr = R"( - HloModule m - test { - p = f32[] parameter(0) - a = f32[] abs(p) - ROOT z = f32[] multiply(a, a) - } - )"; - TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(kModuleStr)); - ASSERT_TRUE(AlgebraicSimplifier(default_options_).Run(m.get()).ValueOrDie()); - EXPECT_THAT(m->entry_computation()->root_instruction(), - GmockMatch(m::Multiply(m::Parameter(0), m::Parameter(0)))); -} - // (A*C1) * (B*C2) => (A*B)*(C1*C2) TEST_F(AlgebraicSimplifierTest, MultiplyChain) { const char* kModuleStr = R"( From 697b531eb24af23a2711e672a56b0b98484c44dd Mon Sep 17 00:00:00 2001 From: Kaixi Hou Date: Wed, 5 Aug 2020 13:21:58 -0700 Subject: [PATCH 2187/2522] Fix narrow-conversion issue --- .../grappler/optimizers/generic_layout_optimizer_transposer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc index 2ac47ec36a4..b3a806e52fd 100644 --- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc +++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc @@ -241,7 +241,7 @@ Status Transposer::CreateConstPermNode(TransposeContext* context, node.mutable_attr()->insert({"dtype", attr_data_type}); AttrValue attr_tensor; - Tensor tensor(DT_INT32, TensorShape({permutation.size()})); + Tensor tensor(DT_INT32, TensorShape({(long long)permutation.size()})); for (int i = 0, end = permutation.size(); i < end; i++) { tensor.flat()(i) = permutation[i]; } From 96cbf4354851e255b664d4cfd9f5c0bb9c001941 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Wed, 5 Aug 2020 13:21:17 -0700 Subject: [PATCH 2188/2522] Remove duplicate symbols and usages for utility functions. PiperOrigin-RevId: 325087800 Change-Id: Ic15366260a57ef22da72c587c45d78e5094d8a41 --- .../python/distribute/mirrored_strategy.py | 4 ++-- tensorflow/python/distribute/values.py | 17 ----------------- 2 files changed, 2 insertions(+), 19 deletions(-) diff --git a/tensorflow/python/distribute/mirrored_strategy.py b/tensorflow/python/distribute/mirrored_strategy.py index 5323f6131ee..07798dc1046 100644 --- a/tensorflow/python/distribute/mirrored_strategy.py +++ b/tensorflow/python/distribute/mirrored_strategy.py @@ -691,9 +691,9 @@ class MirroredExtended(distribute_lib.StrategyExtendedV1): def read_var(self, replica_local_var): """Read the aggregate value of a replica-local variable.""" # pylint: disable=protected-access - if values._is_sync_on_read(replica_local_var): + if distribute_utils.is_sync_on_read(replica_local_var): return replica_local_var._get_cross_replica() - assert values._is_mirrored(replica_local_var) + assert distribute_utils.is_mirrored(replica_local_var) return array_ops.identity(replica_local_var._get()) # pylint: enable=protected-access diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py index ff1c94ae392..a5dabddff94 100644 --- a/tensorflow/python/distribute/values.py +++ b/tensorflow/python/distribute/values.py @@ -1526,22 +1526,5 @@ class OnWritePolicy(AutoPolicy): return _on_write_update_replica(var, update_fn, value, **kwargs) -# Utility functions -# Return True if the Value is Mirrored or the Variable is replicated and kept in -# sync. -def _is_mirrored(val): - if isinstance(val, DistributedVariable): - if val._policy: # pylint: disable=protected-access - return val._policy._is_mirrored() # pylint: disable=protected-access - return isinstance(val, Mirrored) - - -def _is_sync_on_read(val): - if isinstance(val, DistributedVariable): - if val._policy: # pylint: disable=protected-access - return not val._policy._is_mirrored() # pylint: disable=protected-access - return not isinstance(val, Mirrored) - - def _in_update_replica(): return distribute_lib.get_update_replica_id() is not None From 3338e5d816a8ace4ac02c1be34f6fb9715737a16 Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Wed, 5 Aug 2020 13:33:32 -0700 Subject: [PATCH 2189/2522] fixit for feature column VocabularyListFeatureColumnTest, IndicatorColumn, IdentityColumn and WeightCategoricalColumn. PiperOrigin-RevId: 325090178 Change-Id: I427fae37945eeed690b99bc959e7757dea7086ed --- .../feature_column/feature_column_test.py | 891 +++++++++--------- 1 file changed, 437 insertions(+), 454 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index 3a38f86b95f..755df6060d3 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -3827,7 +3827,6 @@ class VocabularyListCategoricalColumnTest(test.TestCase): 'aaa': parsing_ops.VarLenFeature(dtypes.int64) }, column._parse_example_spec) - @test_util.run_deprecated_v1 def test_all_constructor_args(self): column = fc._categorical_column_with_vocabulary_list( key='aaa', @@ -3839,7 +3838,6 @@ class VocabularyListCategoricalColumnTest(test.TestCase): 'aaa': parsing_ops.VarLenFeature(dtypes.int32) }, column._parse_example_spec) - @test_util.run_deprecated_v1 def test_deep_copy(self): original = fc._categorical_column_with_vocabulary_list( key='aaa', vocabulary_list=(12, 24, 36), dtype=dtypes.int32) @@ -3974,41 +3972,41 @@ class VocabularyListCategoricalColumnTest(test.TestCase): dense_shape=[1, 2]), features['aaa'].eval()) - @test_util.run_deprecated_v1 def test_get_sparse_tensors(self): - column = fc._categorical_column_with_vocabulary_list( - key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) - self.assertIsNone(id_weight_pair.weight_tensor) - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=inputs.indices, - values=np.array((2, -1, 0), dtype=np.int64), - dense_shape=inputs.dense_shape), - id_weight_pair.id_tensor.eval()) + with ops.Graph().as_default(): + column = fc._categorical_column_with_vocabulary_list( + key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + self.assertIsNone(id_weight_pair.weight_tensor) + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=inputs.indices, + values=np.array((2, -1, 0), dtype=np.int64), + dense_shape=inputs.dense_shape), + id_weight_pair.id_tensor.eval()) - @test_util.run_deprecated_v1 def test_transform_feature(self): - column = fc._categorical_column_with_vocabulary_list( - key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - id_tensor = _transform_features({'aaa': inputs}, [column])[column] - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=inputs.indices, - values=np.array((2, -1, 0), dtype=np.int64), - dense_shape=inputs.dense_shape), self.evaluate(id_tensor)) + with ops.Graph().as_default(): + column = fc._categorical_column_with_vocabulary_list( + key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + id_tensor = _transform_features({'aaa': inputs}, [column])[column] + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=inputs.indices, + values=np.array((2, -1, 0), dtype=np.int64), + dense_shape=inputs.dense_shape), self.evaluate(id_tensor)) def test_get_sparse_tensors_weight_collections(self): column = fc._categorical_column_with_vocabulary_list( @@ -4026,134 +4024,129 @@ class VocabularyListCategoricalColumnTest(test.TestCase): ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)) self.assertCountEqual([], ops.get_collection('my_weights')) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_dense_input(self): - column = fc._categorical_column_with_vocabulary_list( - key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) - id_weight_pair = column._get_sparse_tensors( - _LazyBuilder({ - 'aaa': (('marlo', ''), ('skywalker', 'omar')) - })) - self.assertIsNone(id_weight_pair.weight_tensor) - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=np.array((2, -1, 0), dtype=np.int64), - dense_shape=(2, 2)), - id_weight_pair.id_tensor.eval()) + with ops.Graph().as_default(): + column = fc._categorical_column_with_vocabulary_list( + key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) + id_weight_pair = column._get_sparse_tensors( + _LazyBuilder({'aaa': (('marlo', ''), ('skywalker', 'omar'))})) + self.assertIsNone(id_weight_pair.weight_tensor) + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=np.array((2, -1, 0), dtype=np.int64), + dense_shape=(2, 2)), id_weight_pair.id_tensor.eval()) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_default_value_in_vocabulary(self): - column = fc._categorical_column_with_vocabulary_list( - key='aaa', - vocabulary_list=('omar', 'stringer', 'marlo'), - default_value=2) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) - self.assertIsNone(id_weight_pair.weight_tensor) - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=inputs.indices, - values=np.array((2, 2, 0), dtype=np.int64), - dense_shape=inputs.dense_shape), - id_weight_pair.id_tensor.eval()) + with ops.Graph().as_default(): + column = fc._categorical_column_with_vocabulary_list( + key='aaa', + vocabulary_list=('omar', 'stringer', 'marlo'), + default_value=2) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + self.assertIsNone(id_weight_pair.weight_tensor) + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=inputs.indices, + values=np.array((2, 2, 0), dtype=np.int64), + dense_shape=inputs.dense_shape), + id_weight_pair.id_tensor.eval()) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_with_oov_buckets(self): - column = fc._categorical_column_with_vocabulary_list( - key='aaa', - vocabulary_list=('omar', 'stringer', 'marlo'), - num_oov_buckets=100) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1), (1, 2)), - values=('marlo', 'skywalker', 'omar', 'heisenberg'), - dense_shape=(2, 3)) - id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) - self.assertIsNone(id_weight_pair.weight_tensor) - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=inputs.indices, - values=np.array((2, 33, 0, 62), dtype=np.int64), - dense_shape=inputs.dense_shape), - id_weight_pair.id_tensor.eval()) + with ops.Graph().as_default(): + column = fc._categorical_column_with_vocabulary_list( + key='aaa', + vocabulary_list=('omar', 'stringer', 'marlo'), + num_oov_buckets=100) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1), (1, 2)), + values=('marlo', 'skywalker', 'omar', 'heisenberg'), + dense_shape=(2, 3)) + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + self.assertIsNone(id_weight_pair.weight_tensor) + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=inputs.indices, + values=np.array((2, 33, 0, 62), dtype=np.int64), + dense_shape=inputs.dense_shape), + id_weight_pair.id_tensor.eval()) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_int32(self): - column = fc._categorical_column_with_vocabulary_list( - key='aaa', - vocabulary_list=np.array((30, 35, 11, 23, 22), dtype=np.int32), - dtype=dtypes.int32) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1), (2, 2)), - values=np.array((11, 100, 30, 22), dtype=np.int32), - dense_shape=(3, 3)) - id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) - self.assertIsNone(id_weight_pair.weight_tensor) - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=inputs.indices, - values=np.array((2, -1, 0, 4), dtype=np.int64), - dense_shape=inputs.dense_shape), - id_weight_pair.id_tensor.eval()) + with ops.Graph().as_default(): + column = fc._categorical_column_with_vocabulary_list( + key='aaa', + vocabulary_list=np.array((30, 35, 11, 23, 22), dtype=np.int32), + dtype=dtypes.int32) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1), (2, 2)), + values=np.array((11, 100, 30, 22), dtype=np.int32), + dense_shape=(3, 3)) + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + self.assertIsNone(id_weight_pair.weight_tensor) + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=inputs.indices, + values=np.array((2, -1, 0, 4), dtype=np.int64), + dense_shape=inputs.dense_shape), + id_weight_pair.id_tensor.eval()) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_int32_dense_input(self): - default_value = -100 - column = fc._categorical_column_with_vocabulary_list( - key='aaa', - vocabulary_list=np.array((30, 35, 11, 23, 22), dtype=np.int32), - dtype=dtypes.int32, - default_value=default_value) - id_weight_pair = column._get_sparse_tensors( - _LazyBuilder({ - 'aaa': - np.array( - ((11, -1, -1), (100, 30, -1), (-1, -1, 22)), dtype=np.int32) - })) - self.assertIsNone(id_weight_pair.weight_tensor) - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1), (2, 2)), - values=np.array((2, default_value, 0, 4), dtype=np.int64), - dense_shape=(3, 3)), - id_weight_pair.id_tensor.eval()) + with ops.Graph().as_default(): + default_value = -100 + column = fc._categorical_column_with_vocabulary_list( + key='aaa', + vocabulary_list=np.array((30, 35, 11, 23, 22), dtype=np.int32), + dtype=dtypes.int32, + default_value=default_value) + id_weight_pair = column._get_sparse_tensors( + _LazyBuilder({ + 'aaa': + np.array(((11, -1, -1), (100, 30, -1), (-1, -1, 22)), + dtype=np.int32) + })) + self.assertIsNone(id_weight_pair.weight_tensor) + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1), (2, 2)), + values=np.array((2, default_value, 0, 4), dtype=np.int64), + dense_shape=(3, 3)), id_weight_pair.id_tensor.eval()) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_int32_with_oov_buckets(self): - column = fc._categorical_column_with_vocabulary_list( - key='aaa', - vocabulary_list=np.array((30, 35, 11, 23, 22), dtype=np.int32), - dtype=dtypes.int32, - num_oov_buckets=100) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1), (2, 2)), - values=(11, 100, 30, 22), - dense_shape=(3, 3)) - id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) - self.assertIsNone(id_weight_pair.weight_tensor) - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=inputs.indices, - values=np.array((2, 60, 0, 4), dtype=np.int64), - dense_shape=inputs.dense_shape), - id_weight_pair.id_tensor.eval()) + with ops.Graph().as_default(): + column = fc._categorical_column_with_vocabulary_list( + key='aaa', + vocabulary_list=np.array((30, 35, 11, 23, 22), dtype=np.int32), + dtype=dtypes.int32, + num_oov_buckets=100) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1), (2, 2)), + values=(11, 100, 30, 22), + dense_shape=(3, 3)) + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + self.assertIsNone(id_weight_pair.weight_tensor) + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=inputs.indices, + values=np.array((2, 60, 0, 4), dtype=np.int64), + dense_shape=inputs.dense_shape), + id_weight_pair.id_tensor.eval()) - @test_util.run_deprecated_v1 def test_linear_model(self): wire_column = fc._categorical_column_with_vocabulary_list( key='aaa', @@ -4179,7 +4172,6 @@ class VocabularyListCategoricalColumnTest(test.TestCase): # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5 self.assertAllClose(((3.,), (5.,)), self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_keras_linear_model(self): wire_column = fc._categorical_column_with_vocabulary_list( key='aaa', @@ -4223,7 +4215,6 @@ class IdentityCategoricalColumnTest(test.TestCase): with self.assertRaisesRegex(ValueError, 'key must be a string.'): fc._categorical_column_with_identity(key=('aaa',), num_buckets=3) - @test_util.run_deprecated_v1 def test_deep_copy(self): original = fc._categorical_column_with_identity(key='aaa', num_buckets=3) for column in (original, copy.deepcopy(original)): @@ -4282,39 +4273,39 @@ class IdentityCategoricalColumnTest(test.TestCase): dense_shape=[1, 2]), features['aaa'].eval()) - @test_util.run_deprecated_v1 def test_get_sparse_tensors(self): - column = fc._categorical_column_with_identity(key='aaa', num_buckets=3) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 1, 0), - dense_shape=(2, 2)) - id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) - self.assertIsNone(id_weight_pair.weight_tensor) - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=inputs.indices, - values=np.array((0, 1, 0), dtype=np.int64), - dense_shape=inputs.dense_shape), - id_weight_pair.id_tensor.eval()) + with ops.Graph().as_default(): + column = fc._categorical_column_with_identity(key='aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2)) + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + self.assertIsNone(id_weight_pair.weight_tensor) + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=inputs.indices, + values=np.array((0, 1, 0), dtype=np.int64), + dense_shape=inputs.dense_shape), + id_weight_pair.id_tensor.eval()) - @test_util.run_deprecated_v1 def test_transform_feature(self): - column = fc._categorical_column_with_identity(key='aaa', num_buckets=3) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 1, 0), - dense_shape=(2, 2)) - id_tensor = _transform_features({'aaa': inputs}, [column])[column] - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=inputs.indices, - values=np.array((0, 1, 0), dtype=np.int64), - dense_shape=inputs.dense_shape), self.evaluate(id_tensor)) + with ops.Graph().as_default(): + column = fc._categorical_column_with_identity(key='aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2)) + id_tensor = _transform_features({'aaa': inputs}, [column])[column] + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=inputs.indices, + values=np.array((0, 1, 0), dtype=np.int64), + dense_shape=inputs.dense_shape), self.evaluate(id_tensor)) def test_get_sparse_tensors_weight_collections(self): column = fc._categorical_column_with_identity(key='aaa', num_buckets=3) @@ -4331,139 +4322,139 @@ class IdentityCategoricalColumnTest(test.TestCase): ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)) self.assertCountEqual([], ops.get_collection('my_weights')) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_dense_input(self): - column = fc._categorical_column_with_identity(key='aaa', num_buckets=3) - id_weight_pair = column._get_sparse_tensors( - _LazyBuilder({ - 'aaa': ((0, -1), (1, 0)) - })) - self.assertIsNone(id_weight_pair.weight_tensor) - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=np.array((0, 1, 0), dtype=np.int64), - dense_shape=(2, 2)), - id_weight_pair.id_tensor.eval()) - - @test_util.run_deprecated_v1 - def test_get_sparse_tensors_with_inputs_too_big(self): - # Inputs. - vocabulary_size = 2 - sparse_input = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), values=(2, 1, 0), dense_shape=(2, 2)) - - # Embedding variable. - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - ) - - def _initializer(shape, dtype, partition_info=None): - del shape, dtype, partition_info - return embedding_values - - # Build columns. - categorical_column = fc._categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - embedding_column = fc._embedding_column( - categorical_column, - dimension=embedding_dimension, - initializer=_initializer) - - # Provide sparse input and get dense result. - embedding_lookup = embedding_column._get_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) - - with _initialized_session(): - with self.assertRaisesRegex(errors.OpError, - r'indices\[0\] .* 2 .* \[0, 2\)'): - self.evaluate(embedding_lookup) - - @test_util.run_deprecated_v1 - def test_get_sparse_tensors_with_inputs_too_small(self): - # Inputs. - vocabulary_size = 2 - sparse_input = sparse_tensor.SparseTensorValue( - indices=((0, 0), (0, 0), (1, 1), (1, 2)), - values=(-9, 0, -6, 1), - dense_shape=(2, 4)) - - # Embedding variable. - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - ) - - def _initializer(shape, dtype, partition_info=None): - del shape, dtype, partition_info - return embedding_values - - # Build columns. - categorical_column = fc._categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - embedding_column = fc._embedding_column( - categorical_column, - dimension=embedding_dimension, - initializer=_initializer) - - # Provide sparse input and get dense result. - embedding_lookup = embedding_column._get_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) - expected_lookups = ((1., 2.), (3., 5)) - with _initialized_session(): - self.assertAllEqual(expected_lookups, self.evaluate(embedding_lookup)) - - @test_util.run_deprecated_v1 - def test_get_sparse_tensors_with_default_value(self): - column = fc._categorical_column_with_identity( - key='aaa', num_buckets=4, default_value=3) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(1, -1, 99), - dense_shape=(2, 2)) - id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) - self.assertIsNone(id_weight_pair.weight_tensor) - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=inputs.indices, - values=np.array((1, 3, 3), dtype=np.int64), - dense_shape=inputs.dense_shape), - id_weight_pair.id_tensor.eval()) - - @test_util.run_deprecated_v1 - def test_get_sparse_tensors_with_default_value_and_placeholder_inputs(self): - column = fc._categorical_column_with_identity( - key='aaa', num_buckets=4, default_value=3) - input_indices = array_ops.placeholder(dtype=dtypes.int64) - input_values = array_ops.placeholder(dtype=dtypes.int32) - input_shape = array_ops.placeholder(dtype=dtypes.int64) - inputs = sparse_tensor.SparseTensorValue( - indices=input_indices, - values=input_values, - dense_shape=input_shape) - id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) - self.assertIsNone(id_weight_pair.weight_tensor) - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=np.array(((0, 0), (1, 0), (1, 1)), dtype=np.int64), - values=np.array((1, 3, 3), dtype=np.int64), - dense_shape=np.array((2, 2), dtype=np.int64)), - id_weight_pair.id_tensor.eval(feed_dict={ - input_indices: ((0, 0), (1, 0), (1, 1)), - input_values: (1, -1, 99), - input_shape: (2, 2), + with ops.Graph().as_default(): + column = fc._categorical_column_with_identity(key='aaa', num_buckets=3) + id_weight_pair = column._get_sparse_tensors( + _LazyBuilder({ + 'aaa': ((0, -1), (1, 0)) })) + self.assertIsNone(id_weight_pair.weight_tensor) + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=np.array((0, 1, 0), dtype=np.int64), + dense_shape=(2, 2)), + id_weight_pair.id_tensor.eval()) + + def test_get_sparse_tensors_with_inputs_too_big(self): + with ops.Graph().as_default(): + # Inputs. + vocabulary_size = 2 + sparse_input = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), values=(2, 1, 0), + dense_shape=(2, 2)) + + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + ) + + def _initializer(shape, dtype, partition_info=None): + del shape, dtype, partition_info + return embedding_values + + # Build columns. + categorical_column = fc._categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = fc._embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_initializer) + + # Provide sparse input and get dense result. + embedding_lookup = embedding_column._get_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with _initialized_session(): + with self.assertRaisesRegex(errors.OpError, + r'indices\[0\] .* 2 .* \[0, 2\)'): + self.evaluate(embedding_lookup) + + def test_get_sparse_tensors_with_inputs_too_small(self): + with ops.Graph().as_default(): + # Inputs. + vocabulary_size = 2 + sparse_input = sparse_tensor.SparseTensorValue( + indices=((0, 0), (0, 0), (1, 1), (1, 2)), + values=(-9, 0, -6, 1), + dense_shape=(2, 4)) + + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + ) + + def _initializer(shape, dtype, partition_info=None): + del shape, dtype, partition_info + return embedding_values + + # Build columns. + categorical_column = fc._categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = fc._embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_initializer) + + # Provide sparse input and get dense result. + embedding_lookup = embedding_column._get_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + expected_lookups = ((1., 2.), (3., 5)) + with _initialized_session(): + self.assertAllEqual(expected_lookups, self.evaluate(embedding_lookup)) + + def test_get_sparse_tensors_with_default_value(self): + with ops.Graph().as_default(): + column = fc._categorical_column_with_identity( + key='aaa', num_buckets=4, default_value=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, -1, 99), + dense_shape=(2, 2)) + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + self.assertIsNone(id_weight_pair.weight_tensor) + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=inputs.indices, + values=np.array((1, 3, 3), dtype=np.int64), + dense_shape=inputs.dense_shape), + id_weight_pair.id_tensor.eval()) + + def test_get_sparse_tensors_with_default_value_and_placeholder_inputs(self): + with ops.Graph().as_default(): + column = fc._categorical_column_with_identity( + key='aaa', num_buckets=4, default_value=3) + input_indices = array_ops.placeholder(dtype=dtypes.int64) + input_values = array_ops.placeholder(dtype=dtypes.int32) + input_shape = array_ops.placeholder(dtype=dtypes.int64) + inputs = sparse_tensor.SparseTensorValue( + indices=input_indices, + values=input_values, + dense_shape=input_shape) + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + self.assertIsNone(id_weight_pair.weight_tensor) + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=np.array(((0, 0), (1, 0), (1, 1)), dtype=np.int64), + values=np.array((1, 3, 3), dtype=np.int64), + dense_shape=np.array((2, 2), dtype=np.int64)), + id_weight_pair.id_tensor.eval(feed_dict={ + input_indices: ((0, 0), (1, 0), (1, 1)), + input_values: (1, -1, 99), + input_shape: (2, 2), + })) - @test_util.run_deprecated_v1 def test_linear_model(self): column = fc._categorical_column_with_identity(key='aaa', num_buckets=3) self.assertEqual(3, column._num_buckets) @@ -4485,7 +4476,6 @@ class IdentityCategoricalColumnTest(test.TestCase): # weight_var[2] + weight_var[1] = 3+2 = 5 self.assertAllClose(((1.,), (5.,)), self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_keras_linear_model(self): column = fc._categorical_column_with_identity(key='aaa', num_buckets=3) self.assertEqual(3, column._num_buckets) @@ -4637,7 +4627,6 @@ class IndicatorColumnTest(test.TestCase): with self.cached_session(): self.assertAllEqual([[0., 1., 1., 0.]], self.evaluate(output)) - @test_util.run_deprecated_v1 def test_deep_copy(self): a = fc._categorical_column_with_hash_bucket('a', 4) column = fc._indicator_column(a) @@ -4670,66 +4659,66 @@ class IndicatorColumnTest(test.TestCase): dense_shape=[1, 2]), features['aaa'].eval()) - @test_util.run_deprecated_v1 def test_transform(self): - a = fc._categorical_column_with_vocabulary_list( - key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) - a_indicator = fc._indicator_column(a) - features = { - 'aaa': sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - } - indicator_tensor = _transform_features(features, [a_indicator])[a_indicator] - with _initialized_session(): - self.assertAllEqual([[0, 0, 1], [1, 0, 0]], - self.evaluate(indicator_tensor)) + with ops.Graph().as_default(): + a = fc._categorical_column_with_vocabulary_list( + key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) + a_indicator = fc._indicator_column(a) + features = { + 'aaa': sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + } + indicator_tensor = _transform_features(features, + [a_indicator])[a_indicator] + with _initialized_session(): + self.assertAllEqual([[0, 0, 1], [1, 0, 0]], + self.evaluate(indicator_tensor)) - @test_util.run_deprecated_v1 def test_transform_with_weighted_column(self): - # Github issue 12557 - ids = fc._categorical_column_with_vocabulary_list( - key='ids', vocabulary_list=('a', 'b', 'c')) - weights = fc._weighted_categorical_column(ids, 'weights') - indicator = fc._indicator_column(weights) - features = { - 'ids': constant_op.constant([['c', 'b', 'a', 'c']]), - 'weights': constant_op.constant([[2., 4., 6., 1.]]) - } - indicator_tensor = _transform_features(features, [indicator])[indicator] - with _initialized_session(): - self.assertAllEqual([[6., 4., 3.]], self.evaluate(indicator_tensor)) + with ops.Graph().as_default(): + # Github issue 12557 + ids = fc._categorical_column_with_vocabulary_list( + key='ids', vocabulary_list=('a', 'b', 'c')) + weights = fc._weighted_categorical_column(ids, 'weights') + indicator = fc._indicator_column(weights) + features = { + 'ids': constant_op.constant([['c', 'b', 'a', 'c']]), + 'weights': constant_op.constant([[2., 4., 6., 1.]]) + } + indicator_tensor = _transform_features(features, [indicator])[indicator] + with _initialized_session(): + self.assertAllEqual([[6., 4., 3.]], self.evaluate(indicator_tensor)) - @test_util.run_deprecated_v1 def test_transform_with_missing_value_in_weighted_column(self): - # Github issue 12583 - ids = fc._categorical_column_with_vocabulary_list( - key='ids', vocabulary_list=('a', 'b', 'c')) - weights = fc._weighted_categorical_column(ids, 'weights') - indicator = fc._indicator_column(weights) - features = { - 'ids': constant_op.constant([['c', 'b', 'unknown']]), - 'weights': constant_op.constant([[2., 4., 6.]]) - } - indicator_tensor = _transform_features(features, [indicator])[indicator] - with _initialized_session(): - self.assertAllEqual([[0., 4., 2.]], self.evaluate(indicator_tensor)) + with ops.Graph().as_default(): + # Github issue 12583 + ids = fc._categorical_column_with_vocabulary_list( + key='ids', vocabulary_list=('a', 'b', 'c')) + weights = fc._weighted_categorical_column(ids, 'weights') + indicator = fc._indicator_column(weights) + features = { + 'ids': constant_op.constant([['c', 'b', 'unknown']]), + 'weights': constant_op.constant([[2., 4., 6.]]) + } + indicator_tensor = _transform_features(features, [indicator])[indicator] + with _initialized_session(): + self.assertAllEqual([[0., 4., 2.]], self.evaluate(indicator_tensor)) - @test_util.run_deprecated_v1 def test_transform_with_missing_value_in_categorical_column(self): - # Github issue 12583 - ids = fc._categorical_column_with_vocabulary_list( - key='ids', vocabulary_list=('a', 'b', 'c')) - indicator = fc._indicator_column(ids) - features = { - 'ids': constant_op.constant([['c', 'b', 'unknown']]), - } - indicator_tensor = _transform_features(features, [indicator])[indicator] - with _initialized_session(): - self.assertAllEqual([[0., 1., 1.]], self.evaluate(indicator_tensor)) + with ops.Graph().as_default(): + # Github issue 12583 + ids = fc._categorical_column_with_vocabulary_list( + key='ids', vocabulary_list=('a', 'b', 'c')) + indicator = fc._indicator_column(ids) + features = { + 'ids': constant_op.constant([['c', 'b', 'unknown']]), + } + indicator_tensor = _transform_features(features, [indicator])[indicator] + with _initialized_session(): + self.assertAllEqual([[0., 1., 1.]], self.evaluate(indicator_tensor)) - @test_util.run_deprecated_v1 def test_linear_model(self): animal = fc._indicator_column( fc._categorical_column_with_identity('animal', num_buckets=4)) @@ -4749,7 +4738,6 @@ class IndicatorColumnTest(test.TestCase): weight_var.assign([[1.], [2.], [3.], [4.]]).eval() self.assertAllClose([[2. + 3.]], self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_keras_linear_model(self): animal = fc._indicator_column( fc._categorical_column_with_identity('animal', num_buckets=4)) @@ -4769,7 +4757,6 @@ class IndicatorColumnTest(test.TestCase): weight_var.assign([[1.], [2.], [3.], [4.]]).eval() self.assertAllClose([[2. + 3.]], self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_input_layer(self): animal = fc._indicator_column( fc._categorical_column_with_identity('animal', num_buckets=4)) @@ -6279,7 +6266,6 @@ class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase): class WeightedCategoricalColumnTest(test.TestCase): - @test_util.run_deprecated_v1 def test_defaults(self): column = fc._weighted_categorical_column( categorical_column=fc._categorical_column_with_identity( @@ -6293,7 +6279,6 @@ class WeightedCategoricalColumnTest(test.TestCase): 'values': parsing_ops.VarLenFeature(dtypes.float32) }, column._parse_example_spec) - @test_util.run_deprecated_v1 def test_deep_copy(self): """Tests deepcopy of categorical_column_with_hash_bucket.""" original = fc._weighted_categorical_column( @@ -6392,95 +6377,94 @@ class WeightedCategoricalColumnTest(test.TestCase): dense_shape=[1, 2]), features['weights'].eval()) - @test_util.run_deprecated_v1 def test_transform_features(self): - column = fc._weighted_categorical_column( - categorical_column=fc._categorical_column_with_identity( - key='ids', num_buckets=3), - weight_feature_key='values') - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 1, 0), - dense_shape=(2, 2)) - weights = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(0.5, 1.0, 0.1), - dense_shape=(2, 2)) - id_tensor, weight_tensor = _transform_features({ - 'ids': inputs, - 'values': weights, - }, (column,))[column] - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=inputs.indices, - values=np.array(inputs.values, dtype=np.int64), - dense_shape=inputs.dense_shape), self.evaluate(id_tensor)) - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=weights.indices, - values=np.array(weights.values, dtype=np.float32), - dense_shape=weights.dense_shape), self.evaluate(weight_tensor)) + with ops.Graph().as_default(): + column = fc._weighted_categorical_column( + categorical_column=fc._categorical_column_with_identity( + key='ids', num_buckets=3), + weight_feature_key='values') + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2)) + weights = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(0.5, 1.0, 0.1), + dense_shape=(2, 2)) + id_tensor, weight_tensor = _transform_features({ + 'ids': inputs, + 'values': weights, + }, (column,))[column] + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=inputs.indices, + values=np.array(inputs.values, dtype=np.int64), + dense_shape=inputs.dense_shape), self.evaluate(id_tensor)) + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=weights.indices, + values=np.array(weights.values, dtype=np.float32), + dense_shape=weights.dense_shape), self.evaluate(weight_tensor)) - @test_util.run_deprecated_v1 def test_transform_features_dense_input(self): - column = fc._weighted_categorical_column( - categorical_column=fc._categorical_column_with_identity( - key='ids', num_buckets=3), - weight_feature_key='values') - weights = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(0.5, 1.0, 0.1), - dense_shape=(2, 2)) - id_tensor, weight_tensor = _transform_features({ - 'ids': ((0, -1), (1, 0)), - 'values': weights, - }, (column,))[column] - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=np.array((0, 1, 0), dtype=np.int64), - dense_shape=(2, 2)), self.evaluate(id_tensor)) - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=weights.indices, - values=np.array(weights.values, dtype=np.float32), - dense_shape=weights.dense_shape), self.evaluate(weight_tensor)) + with ops.Graph().as_default(): + column = fc._weighted_categorical_column( + categorical_column=fc._categorical_column_with_identity( + key='ids', num_buckets=3), + weight_feature_key='values') + weights = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(0.5, 1.0, 0.1), + dense_shape=(2, 2)) + id_tensor, weight_tensor = _transform_features({ + 'ids': ((0, -1), (1, 0)), + 'values': weights, + }, (column,))[column] + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=np.array((0, 1, 0), dtype=np.int64), + dense_shape=(2, 2)), self.evaluate(id_tensor)) + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=weights.indices, + values=np.array(weights.values, dtype=np.float32), + dense_shape=weights.dense_shape), self.evaluate(weight_tensor)) - @test_util.run_deprecated_v1 def test_transform_features_dense_weights(self): - column = fc._weighted_categorical_column( - categorical_column=fc._categorical_column_with_identity( - key='ids', num_buckets=3), - weight_feature_key='values') - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 1, 0), - dense_shape=(2, 2)) - id_tensor, weight_tensor = _transform_features({ - 'ids': inputs, - 'values': ((.5, 0.), (1., .1)), - }, (column,))[column] - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=inputs.indices, - values=np.array(inputs.values, dtype=np.int64), - dense_shape=inputs.dense_shape), self.evaluate(id_tensor)) - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=np.array((.5, 1., .1), dtype=np.float32), - dense_shape=(2, 2)), self.evaluate(weight_tensor)) + with ops.Graph().as_default(): + column = fc._weighted_categorical_column( + categorical_column=fc._categorical_column_with_identity( + key='ids', num_buckets=3), + weight_feature_key='values') + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 1, 0), + dense_shape=(2, 2)) + id_tensor, weight_tensor = _transform_features({ + 'ids': inputs, + 'values': ((.5, 0.), (1., .1)), + }, (column,))[column] + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=inputs.indices, + values=np.array(inputs.values, dtype=np.int64), + dense_shape=inputs.dense_shape), self.evaluate(id_tensor)) + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=np.array((.5, 1., .1), dtype=np.float32), + dense_shape=(2, 2)), self.evaluate(weight_tensor)) - @test_util.run_deprecated_v1 def test_keras_linear_model(self): column = fc._weighted_categorical_column( categorical_column=fc._categorical_column_with_identity( @@ -6583,7 +6567,6 @@ class WeightedCategoricalColumnTest(test.TestCase): # = 3*1 + 2*.1 = 3+.2 = 3.2 self.assertAllClose(((.5,), (3.2,)), self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_linear_model(self): column = fc._weighted_categorical_column( categorical_column=fc._categorical_column_with_identity( From c0cc19b02f010db3eff2a7e6e663d2aeac4e481b Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Wed, 5 Aug 2020 13:34:06 -0700 Subject: [PATCH 2190/2522] MultiProcessRunner: Add `process_exists` convenient method. PiperOrigin-RevId: 325090279 Change-Id: I69802041abfd269e705b8a4fe98e0ddb94fdc772 --- .../python/distribute/multi_process_runner.py | 13 +++++++++++++ .../distribute/multi_process_runner_test.py | 15 +++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/tensorflow/python/distribute/multi_process_runner.py b/tensorflow/python/distribute/multi_process_runner.py index 6d0854f18d7..028a7aad171 100644 --- a/tensorflow/python/distribute/multi_process_runner.py +++ b/tensorflow/python/distribute/multi_process_runner.py @@ -479,6 +479,19 @@ class MultiProcessRunner(object): p = self._processes[(task_type, task_id)] return p.exitcode if p else None + def process_exists(self, task_type, task_id): + """Returns whether the subprocess still exists given the task type and id. + + Args: + task_type: The task type. + task_id: The task id. + + Returns: + Boolean; whether the subprocess still exists. If the subprocess has + exited, this returns False. + """ + return self.get_process_exit_code(task_type, task_id) is None + def _process_watchdog(self): """Simulates a cluster management system. diff --git a/tensorflow/python/distribute/multi_process_runner_test.py b/tensorflow/python/distribute/multi_process_runner_test.py index b7d8acf55c6..8d8099740a3 100644 --- a/tensorflow/python/distribute/multi_process_runner_test.py +++ b/tensorflow/python/distribute/multi_process_runner_test.py @@ -495,6 +495,21 @@ class MultiProcessRunnerTest(test.TestCase): 'Worker 0 errored'): mpr.join(timeout=20) + def test_process_exists(self): + + def proc_func(): + time.sleep(100000) + + mpr = multi_process_runner.MultiProcessRunner( + proc_func, + multi_worker_test_base.create_cluster_spec(num_workers=1)) + mpr.start() + self.assertTrue(mpr.process_exists('worker', 0)) + mpr.terminate('worker', 0) + # Worker 0 should exit at some point, or else the test would time out. + while mpr.process_exists('worker', 0): + time.sleep(1) + class MultiProcessPoolRunnerTest(test.TestCase): From a6339f25b94c023e4b4e4ff2624425d17977d4ab Mon Sep 17 00:00:00 2001 From: Michael Banfield Date: Wed, 5 Aug 2020 13:38:27 -0700 Subject: [PATCH 2191/2522] Make unavailable functions FATAL. PiperOrigin-RevId: 325091138 Change-Id: I61c6188bd9985858eefe3c8fe8bf833991e20f29 --- tensorflow/core/tpu/tpu_api_dlsym_initializer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/tpu/tpu_api_dlsym_initializer.cc b/tensorflow/core/tpu/tpu_api_dlsym_initializer.cc index 47d517270dc..2f11e06cced 100644 --- a/tensorflow/core/tpu/tpu_api_dlsym_initializer.cc +++ b/tensorflow/core/tpu/tpu_api_dlsym_initializer.cc @@ -31,7 +31,7 @@ limitations under the License. Struct->FnName##Fn = \ reinterpret_cast(dlsym(library_handle, #FnName)); \ if (!(Struct->FnName##Fn)) { \ - LOG(ERROR) << #FnName " not available in this library."; \ + LOG(FATAL) << #FnName " not available in this library."; \ return errors::Unimplemented(#FnName " not available in this library."); \ } From ff61ec799e2fa9e3cbed97812eed90b035f39f29 Mon Sep 17 00:00:00 2001 From: Nat Jeffries Date: Wed, 5 Aug 2020 13:54:48 -0700 Subject: [PATCH 2192/2522] Create build target for image recognition test. PiperOrigin-RevId: 325094476 Change-Id: I6e0ed20c868dd80f2f3d8735ecd126108bf20a02 --- .../image_recognition_experimental/BUILD | 36 +++++++++++++++++++ .../Makefile.inc | 1 + 2 files changed, 37 insertions(+) create mode 100644 tensorflow/lite/micro/examples/image_recognition_experimental/BUILD diff --git a/tensorflow/lite/micro/examples/image_recognition_experimental/BUILD b/tensorflow/lite/micro/examples/image_recognition_experimental/BUILD new file mode 100644 index 00000000000..d3bcd69d1c7 --- /dev/null +++ b/tensorflow/lite/micro/examples/image_recognition_experimental/BUILD @@ -0,0 +1,36 @@ +# Description: +# TensorFlow Lite for Microcontrollers image recognition example. + +load( + "//tensorflow/lite/micro/testing:micro_test.bzl", + "tflite_micro_cc_test", +) + +licenses(["notice"]) # Apache 2.0 + +cc_library( + name = "image_model_data", + srcs = [ + "first_10_cifar_images.cc", + "image_recognition_model.cc", + ], + hdrs = [ + "first_10_cifar_images.h", + "image_recognition_model.h", + "util.h", + ], +) + +tflite_micro_cc_test( + name = "image_recognition_test", + srcs = ["image_recognition_test.cc"], + deps = [ + ":image_model_data", + "//tensorflow/lite:schema_fbs_version", + "//tensorflow/lite/micro:micro_error_reporter", + "//tensorflow/lite/micro:micro_framework", + "//tensorflow/lite/micro:op_resolvers", + "//tensorflow/lite/micro/testing:micro_test", + "//tensorflow/lite/schema:schema_fbs", + ], +) diff --git a/tensorflow/lite/micro/examples/image_recognition_experimental/Makefile.inc b/tensorflow/lite/micro/examples/image_recognition_experimental/Makefile.inc index 2fdfb0e6779..76b21cb2580 100644 --- a/tensorflow/lite/micro/examples/image_recognition_experimental/Makefile.inc +++ b/tensorflow/lite/micro/examples/image_recognition_experimental/Makefile.inc @@ -20,6 +20,7 @@ tensorflow/lite/micro/examples/image_recognition_experimental/image_recognition_ $(MAKEFILE_DIR)/downloads/image_recognition_model/image_recognition_model.cc IMAGE_RECOGNITION_TEST_HDRS := \ +tensorflow/lite/micro/examples/image_recognition_experimental/first_10_cifar_images.h \ tensorflow/lite/micro/examples/image_recognition_experimental/image_recognition_model.h \ tensorflow/lite/micro/examples/image_recognition_experimental/util.h From c90cf2e18f808f93d315ba35e6f9103065cd0384 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Wed, 5 Aug 2020 14:00:28 -0700 Subject: [PATCH 2193/2522] [TF2XLA] Merge xla_kernel_creator and xla_kernel_creator_util targets Moreover explicit check for whether XLA is linked in is not needed, as we already check for cpu_compiler/gpu_compiler linkage. PiperOrigin-RevId: 325095702 Change-Id: I82220c7bb15520cf3683fd944fef57d7f3f426e2 --- tensorflow/compiler/jit/BUILD | 36 +--- tensorflow/compiler/jit/flags.cc | 6 - tensorflow/compiler/jit/flags.h | 8 - tensorflow/compiler/jit/xla_kernel_creator.cc | 178 ++++++++++++++++- .../compiler/jit/xla_kernel_creator_util.cc | 186 ------------------ .../compiler/jit/xla_kernel_creator_util.h | 33 ---- tensorflow/core/common_runtime/eager/BUILD | 2 +- .../common_runtime/eager/kernel_and_device.cc | 3 - tensorflow/python/eager/def_function.py | 9 - .../eager/def_function_test_cpu_only.py | 7 +- tensorflow/python/tfe_wrapper.cc | 1 - 11 files changed, 184 insertions(+), 285 deletions(-) delete mode 100644 tensorflow/compiler/jit/xla_kernel_creator_util.cc delete mode 100644 tensorflow/compiler/jit/xla_kernel_creator_util.h diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index ecbb1a5d200..01b02ad3580 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -394,20 +394,23 @@ cc_library( alwayslink = 1, ) -# Linked by tensorflow core, without registration of jit compilation passes -# which is not necessary to create and run a XlaLocalLaunchBase kernel. -# Linking jit compilation passes could cause programs stuck right now (b/140069592). cc_library( - name = "xla_kernel_creator_util", + name = "xla_kernel_creator", srcs = [ - "xla_kernel_creator_util.cc", + "xla_kernel_creator.cc", + "xla_kernel_creator.h", + ], + visibility = [ + ":internal", + "//learning/brain/contrib/tpu_modeling/exp/tpu_inference_converter:__pkg__", + "//tensorflow/core/common_runtime/eager:__pkg__", ], - hdrs = ["xla_kernel_creator_util.h"], - visibility = ["//tensorflow/core/common_runtime/eager:__pkg__"], deps = [ ":common", ":compilability_check_util", ":compilation_passes", + ":flags", + ":jit_compilation_passes", "//tensorflow/compiler/jit/kernels:xla_ops_no_jit_rewrite_registration", "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/compiler/tf2xla:xla_op_registry", @@ -422,25 +425,6 @@ cc_library( alwayslink = 1, ) -cc_library( - name = "xla_kernel_creator", - srcs = [ - "xla_kernel_creator.cc", - "xla_kernel_creator.h", - ], - deps = [ - ":compilability_check_util", - ":flags", - ":jit_compilation_passes", - ":xla_kernel_creator_util", - "//tensorflow/core:core_cpu_internal", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - ], - alwayslink = 1, -) - tf_cc_test( name = "xla_kernel_creator_test", srcs = [ diff --git a/tensorflow/compiler/jit/flags.cc b/tensorflow/compiler/jit/flags.cc index d1301a8c40f..ff085c854c6 100644 --- a/tensorflow/compiler/jit/flags.cc +++ b/tensorflow/compiler/jit/flags.cc @@ -268,10 +268,4 @@ void AppendMarkForCompilationPassFlags(std::vector* flag_list) { AppendMarkForCompilationPassFlagsInternal(flag_list); } -static bool xla_is_enabled = false; - -void SetXlaIsEnabled() { xla_is_enabled = true; } - -bool IsXlaEnabled() { return xla_is_enabled; } - } // namespace tensorflow diff --git a/tensorflow/compiler/jit/flags.h b/tensorflow/compiler/jit/flags.h index 89e20d9f8ea..6c54fc8825e 100644 --- a/tensorflow/compiler/jit/flags.h +++ b/tensorflow/compiler/jit/flags.h @@ -162,14 +162,6 @@ MlirCommonFlags* GetMlirCommonFlags(); void AppendMarkForCompilationPassFlags( std::vector* flag_list); -// Makes all future calls to `IsXlaEnabled()` return `true`. -// -// Should only be called when XLA is linked in. -void SetXlaIsEnabled(); - -// Returns whether XLA is enabled. -bool IsXlaEnabled(); - } // namespace tensorflow #endif // TENSORFLOW_COMPILER_JIT_FLAGS_H_ diff --git a/tensorflow/compiler/jit/xla_kernel_creator.cc b/tensorflow/compiler/jit/xla_kernel_creator.cc index 5ca146969e0..3a6345afe9f 100644 --- a/tensorflow/compiler/jit/xla_kernel_creator.cc +++ b/tensorflow/compiler/jit/xla_kernel_creator.cc @@ -14,10 +14,62 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/jit/xla_kernel_creator.h" +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" #include "tensorflow/compiler/jit/compilability_check_util.h" +#include "tensorflow/compiler/jit/defs.h" #include "tensorflow/compiler/jit/flags.h" -#include "tensorflow/compiler/jit/xla_kernel_creator_util.h" +#include "tensorflow/compiler/jit/kernels/xla_ops.h" +#include "tensorflow/compiler/jit/mark_for_compilation_pass.h" +#include "tensorflow/compiler/tf2xla/const_analysis.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/util/ptr_util.h" + +namespace { + +// Utility which searches for values in a sorted list by scanning over it once. +// No matter how many times ScanForValue is called, the list is scanned at most +// once. However, if a call to ScanForValue skips over a value, that value is +// not revisited in future calls to ScanForValue, so callers must take +// care to order their calls. +// +// Useful for merging multiple sorted lists in O(n) time. +class SinglePassSearch { + public: + // Creates a SinglePassSearch object that can be used to search in `values`. + // Does not take ownership of `values`. `values` must outlive this. + // `values` must be sorted. + explicit SinglePassSearch(const std::vector* values) + : current_index_(0), values_(values) {} + + // Scans forward in the vector looking for "value", updating the internal + // position in to the vector. + // Returns true iff the vector contains the given value at or after current + // position. + // Not thread-safe. + bool ScanForValue(int value) { + while (current_index_ < values_->size() && + (*values_)[current_index_] <= value) { + if ((*values_)[current_index_] == value) { + current_index_++; + return true; + } + current_index_++; + } + return false; + } + + private: + int current_index_; + const std::vector* values_; +}; + +} // end namespace namespace tensorflow { @@ -27,6 +79,121 @@ bool XlaKernelCreator::CanCreateKernel( return CanCreateXlaKernel(props->node_def); } +static Status CreateXlaKernel(FunctionLibraryRuntime* flr, + const NodeDef& node_def, + std::unique_ptr* kernel) { + if (!CanCreateXlaKernel(node_def)) { + return errors::Internal("Invalid node: ", node_def.ShortDebugString()); + } + + VLOG(3) << "Attempting to create XlaLaunchOp for " << node_def.DebugString(); + + // Make sure that kernels have been registered on the JIT device. + XlaOpRegistry::RegisterCompilationKernels(); + + // Only check for compilability if the MLIR bridge is not enabled. + if (!GetMlirCommonFlags()->tf_mlir_enable_mlir_bridge) { + RecursiveCompilabilityChecker::UncompilableNodesMap uncompilable_nodes_map; + if (!IsCompilable(flr, node_def, &uncompilable_nodes_map)) { + std::vector + uncompilable_node_info; + for (const auto& it : uncompilable_nodes_map) { + for (const auto& info : it.second.second) { + uncompilable_node_info.emplace_back(info); + } + } + string message = absl::StrCat( + "Function invoked by the following node is not compilable: ", + SummarizeNodeDef(node_def, /*max_inputs_in_summary=*/10), ".\n"); + absl::StrAppend(&message, "Uncompilable nodes:"); + for (const auto& node_info : uncompilable_node_info) { + string node_message = absl::StrCat("\n", node_info.name, ": ", + node_info.uncompilable_reason, "\n", + "\tStacktrace:\n"); + for (const auto& stack_frame : node_info.stack_trace) { + absl::StrAppendFormat(&node_message, "\t\tNode: %s, function: %s\n", + stack_frame.name, stack_frame.function_name); + } + absl::StrAppend(&message, node_message); + } + VLOG(1) << message; + return errors::InvalidArgument(message); + } + } + + // Get function body, constant args, and resource args. + const FunctionBody* fbody = nullptr; + std::vector constant_arg_indices; + std::vector resource_arg_indices; + TF_RETURN_IF_ERROR(GetBodyAndConstantsAndResources( + flr, node_def, &fbody, &constant_arg_indices, &resource_arg_indices)); + + // Set input and output memory types. + MemoryTypeVector input_memory_types(fbody->arg_types.size(), DEVICE_MEMORY); + // These indices are used only for optimization purposes. They allow us + // to loop over constant_arg_indices and resource_arg_indices only once + // while iterating over all the function arguments checking if it is a + // resource or a constant. + // The reason we optimized this code is because functions can have a lot of + // captured arguments. For example, the backward pass of ResNet50 takes in all + // 214 variables and a similar number of activations. + SinglePassSearch constants_search(&constant_arg_indices); + SinglePassSearch resources_search(&resource_arg_indices); + for (size_t i = 0; i < fbody->arg_types.size(); ++i) { + if (resources_search.ScanForValue(i) || constants_search.ScanForValue(i)) { + // Compile-time constants and resource handles are expected to be in + // host memory. + input_memory_types[i] = HOST_MEMORY; + } + } + // One might wonder, about the case where a compile-time constant argument + // (which must be in host memory) is also used as an input into an op, + // e.g. Add, that expects its inputs in device memory. Here is how it + // works now. + // First, what do we mean by "op expects an input in XYZ memory"? + // There are two types of "ops" here: the tf2xla kernel and the HLO + // computation it builds. The tf2xla kernel needs to retrieve the actual + // numeric value of the compile-time constant tensors, so it really expects + // them to be on in host memory. However, for other inputs, it refers to them + // using xla::ComputationDataHandle, which is just a symbolic handle that + // xla::ComputationBuilder assigns. How does this handle gets assigned for + // constant arguments? Even constant arguments get an _Arg node in the graph + // instantiated for Function compilation. The tf2xla kernel for constant _Arg + // nodes takes the constant value, converts it to XlaLiteral, and feeds it + // to xla::ComputationBuilder.ConstantLiteral, which returns the handle. This + // constant XlaLiteral is included in the HLO graph, and subsequently, in + // the actual executable, which is copied to the device before being + // executed. Thus, when this executable runs, the constant is available in + // device memory. + + // XlaLaunch kernel keeps all outputs (including constants, which it copies), + // in device memory except for resources. + MemoryTypeVector output_memory_types(fbody->ret_types.size(), DEVICE_MEMORY); + for (size_t i = 0; i < fbody->ret_types.size(); ++i) { + if (fbody->ret_types[i] == DT_RESOURCE) { + output_memory_types[i] = HOST_MEMORY; + } + } + + // Create the kernel. + NameAttrList function; + TF_RETURN_IF_ERROR(NameAndAttrsFromFunctionCall(node_def, &function)); + Device* dev = flr->device(); + Status s; + auto props = std::make_shared( + &fbody->fdef.signature(), node_def, fbody->arg_types, fbody->ret_types); + OpKernelConstruction construction(DeviceType(dev->device_type()), dev, + dev->GetAllocator(AllocatorAttributes()), + flr, dev->resource_manager(), props, + input_memory_types, output_memory_types, + flr->graph_def_version(), &s); + + *kernel = absl::make_unique( + &construction, constant_arg_indices, resource_arg_indices, function, + /*has_ref_vars=*/false); + return s; +} + Status XlaKernelCreator::CreateKernel( FunctionLibraryRuntime* flr, const std::shared_ptr& props, @@ -34,19 +201,12 @@ Status XlaKernelCreator::CreateKernel( return CreateXlaKernel(flr, props->node_def, kernel); } -namespace { - -bool RegisterLaunchOpCreator() { +static bool RegisterLaunchOpCreator() { XlaKernelCreator* xla_kernel_creator = new XlaKernelCreator(); RegisterDefaultCustomKernelCreator(xla_kernel_creator); return true; } static bool register_me = RegisterLaunchOpCreator(); -static bool register_xla = [] { - SetXlaIsEnabled(); - return true; -}(); -} // end namespace } // namespace tensorflow diff --git a/tensorflow/compiler/jit/xla_kernel_creator_util.cc b/tensorflow/compiler/jit/xla_kernel_creator_util.cc deleted file mode 100644 index 61c89d8a67a..00000000000 --- a/tensorflow/compiler/jit/xla_kernel_creator_util.cc +++ /dev/null @@ -1,186 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/compiler/jit/xla_kernel_creator_util.h" - -#include "absl/memory/memory.h" -#include "absl/strings/str_cat.h" -#include "absl/strings/str_format.h" -#include "tensorflow/compiler/jit/compilability_check_util.h" -#include "tensorflow/compiler/jit/defs.h" -#include "tensorflow/compiler/jit/kernels/xla_ops.h" -#include "tensorflow/compiler/jit/mark_for_compilation_pass.h" -#include "tensorflow/compiler/tf2xla/const_analysis.h" -#include "tensorflow/compiler/tf2xla/xla_op_registry.h" -#include "tensorflow/core/common_runtime/function.h" -#include "tensorflow/core/framework/node_def_builder.h" -#include "tensorflow/core/framework/node_def_util.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/util/ptr_util.h" - -namespace tensorflow { -namespace { - -// Utility which searches for values in a sorted list by scanning over it once. -// No matter how many times ScanForValue is called, the list is scanned at most -// once. However, if a call to ScanForValue skips over a value, that value is -// not revisited in future calls to ScanForValue, so callers must take -// care to order their calls. -// -// Useful for merging multiple sorted lists in O(n) time. -class SinglePassSearch { - public: - // Creates a SinglePassSearch object that can be used to search in `values`. - // Does not take ownership of `values`. `values` must outlive this. - // `values` must be sorted. - explicit SinglePassSearch(const std::vector* values) - : current_index_(0), values_(values) {} - - // Scans forward in the vector looking for "value", updating the internal - // position in to the vector. - // Returns true iff the vector contains the given value at or after current - // position. - // Not thread-safe. - bool ScanForValue(int value) { - while (current_index_ < values_->size() && - (*values_)[current_index_] <= value) { - if ((*values_)[current_index_] == value) { - current_index_++; - return true; - } - current_index_++; - } - return false; - } - - private: - int current_index_; - const std::vector* values_; -}; -} // namespace - -Status CreateXlaKernel(FunctionLibraryRuntime* flr, const NodeDef& node_def, - std::unique_ptr* kernel) { - if (!CanCreateXlaKernel(node_def)) { - return errors::Internal("Invalid node: ", node_def.ShortDebugString()); - } - - VLOG(3) << "Attempting to create XlaLaunchOp for " << node_def.DebugString(); - - // Make sure that kernels have been registered on the JIT device. - XlaOpRegistry::RegisterCompilationKernels(); - - // Only check for compilability if the MLIR bridge is not enabled. - if (!GetMlirCommonFlags()->tf_mlir_enable_mlir_bridge) { - RecursiveCompilabilityChecker::UncompilableNodesMap uncompilable_nodes_map; - if (!IsCompilable(flr, node_def, &uncompilable_nodes_map)) { - std::vector - uncompilable_node_info; - for (const auto& it : uncompilable_nodes_map) { - for (const auto& info : it.second.second) { - uncompilable_node_info.emplace_back(info); - } - } - string message = absl::StrCat( - "Function invoked by the following node is not compilable: ", - SummarizeNodeDef(node_def, /*max_inputs_in_summary=*/10), ".\n"); - absl::StrAppend(&message, "Uncompilable nodes:"); - for (const auto& node_info : uncompilable_node_info) { - string node_message = absl::StrCat("\n", node_info.name, ": ", - node_info.uncompilable_reason, "\n", - "\tStacktrace:\n"); - for (const auto& stack_frame : node_info.stack_trace) { - absl::StrAppendFormat(&node_message, "\t\tNode: %s, function: %s\n", - stack_frame.name, stack_frame.function_name); - } - absl::StrAppend(&message, node_message); - } - VLOG(1) << message; - return errors::InvalidArgument(message); - } - } - - // Get function body, constant args, and resource args. - const FunctionBody* fbody = nullptr; - std::vector constant_arg_indices; - std::vector resource_arg_indices; - TF_RETURN_IF_ERROR(GetBodyAndConstantsAndResources( - flr, node_def, &fbody, &constant_arg_indices, &resource_arg_indices)); - - // Set input and output memory types. - MemoryTypeVector input_memory_types(fbody->arg_types.size(), DEVICE_MEMORY); - // These indices are used only for optimization purposes. They allow us - // to loop over constant_arg_indices and resource_arg_indices only once - // while iterating over all the function arguments checking if it is a - // resource or a constant. - // The reason we optimized this code is because functions can have a lot of - // captured arguments. For example, the backward pass of ResNet50 takes in all - // 214 variables and a similar number of activations. - SinglePassSearch constants_search(&constant_arg_indices); - SinglePassSearch resources_search(&resource_arg_indices); - for (size_t i = 0; i < fbody->arg_types.size(); ++i) { - if (resources_search.ScanForValue(i) || constants_search.ScanForValue(i)) { - // Compile-time constants and resource handles are expected to be in - // host memory. - input_memory_types[i] = HOST_MEMORY; - } - } - // One might wonder, about the case where a compile-time constant argument - // (which must be in host memory) is also used as an input into an op, - // e.g. Add, that expects its inputs in device memory. Here is how it - // works now. - // First, what do we mean by "op expects an input in XYZ memory"? - // There are two types of "ops" here: the tf2xla kernel and the HLO - // computation it builds. The tf2xla kernel needs to retrieve the actual - // numeric value of the compile-time constant tensors, so it really expects - // them to be on in host memory. However, for other inputs, it refers to them - // using xla::ComputationDataHandle, which is just a symbolic handle that - // xla::ComputationBuilder assigns. How does this handle gets assigned for - // constant arguments? Even constant arguments get an _Arg node in the graph - // instantiated for Function compilation. The tf2xla kernel for constant _Arg - // nodes takes the constant value, converts it to XlaLiteral, and feeds it - // to xla::ComputationBuilder.ConstantLiteral, which returns the handle. This - // constant XlaLiteral is included in the HLO graph, and subsequently, in - // the actual executable, which is copied to the device before being - // executed. Thus, when this executable runs, the constant is available in - // device memory. - - // XlaLaunch kernel keeps all outputs (including constants, which it copies), - // in device memory except for resources. - MemoryTypeVector output_memory_types(fbody->ret_types.size(), DEVICE_MEMORY); - for (size_t i = 0; i < fbody->ret_types.size(); ++i) { - if (fbody->ret_types[i] == DT_RESOURCE) { - output_memory_types[i] = HOST_MEMORY; - } - } - - // Create the kernel. - NameAttrList function; - TF_RETURN_IF_ERROR(NameAndAttrsFromFunctionCall(node_def, &function)); - Device* dev = flr->device(); - Status s; - auto props = std::make_shared( - &fbody->fdef.signature(), node_def, fbody->arg_types, fbody->ret_types); - OpKernelConstruction construction(DeviceType(dev->device_type()), dev, - dev->GetAllocator(AllocatorAttributes()), - flr, dev->resource_manager(), props, - input_memory_types, output_memory_types, - flr->graph_def_version(), &s); - - *kernel = absl::make_unique( - &construction, constant_arg_indices, resource_arg_indices, function, - /*has_ref_vars=*/false); - return s; -} -} // namespace tensorflow diff --git a/tensorflow/compiler/jit/xla_kernel_creator_util.h b/tensorflow/compiler/jit/xla_kernel_creator_util.h deleted file mode 100644 index f090f55f354..00000000000 --- a/tensorflow/compiler/jit/xla_kernel_creator_util.h +++ /dev/null @@ -1,33 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_JIT_XLA_KERNEL_CREATOR_UTIL_H_ -#define TENSORFLOW_COMPILER_JIT_XLA_KERNEL_CREATOR_UTIL_H_ - -#include "tensorflow/core/framework/function.h" -#include "tensorflow/core/framework/node_def.pb.h" -#include "tensorflow/core/lib/core/status.h" - -namespace tensorflow { - -class FunctionLibraryRuntime; -class OpKernel; - -// Given a supported NodeDef, returns a XlaLaunchOp that computes the node. -Status CreateXlaKernel(FunctionLibraryRuntime* flr, const NodeDef& node_def, - std::unique_ptr* kernel); - -} // namespace tensorflow - -#endif // TENSORFLOW_COMPILER_JIT_XLA_KERNEL_CREATOR_UTIL_H_ diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD index b4c905f220e..9108b04ef05 100644 --- a/tensorflow/core/common_runtime/eager/BUILD +++ b/tensorflow/core/common_runtime/eager/BUILD @@ -337,7 +337,7 @@ tf_cuda_library( ], "//tensorflow:windows": KERNEL_AND_DEVICE_DEPS, "//conditions:default": KERNEL_AND_DEVICE_DEPS + [ - "//tensorflow/compiler/jit:xla_kernel_creator_util", + "//tensorflow/compiler/jit:xla_kernel_creator", ], }), ) diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.cc b/tensorflow/core/common_runtime/eager/kernel_and_device.cc index 5b7232f539a..46aea040295 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device.cc +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc @@ -46,9 +46,6 @@ limitations under the License. #include "tensorflow/core/public/version.h" #include "tensorflow/core/util/tensor_slice_reader_cache.h" #if !defined(IS_MOBILE_PLATFORM) -#if !defined(PLATFORM_WINDOWS) -#include "tensorflow/compiler/jit/xla_kernel_creator_util.h" -#endif // !PLATFORM_WINDOWS #include "tensorflow/core/grappler/optimizers/meta_optimizer.h" #endif // !IS_MOBILE_PLATFORM diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py index efc648a2f0c..c0c82fee94e 100644 --- a/tensorflow/python/eager/def_function.py +++ b/tensorflow/python/eager/def_function.py @@ -27,7 +27,6 @@ import six from google.protobuf import text_format as _text_format from google.protobuf.message import DecodeError from tensorflow.core.framework import attr_value_pb2 -from tensorflow.python import pywrap_tfe from tensorflow.python.eager import context from tensorflow.python.eager import function as function_lib from tensorflow.python.eager import lift_to_graph @@ -655,14 +654,6 @@ class Function(object): attributes.update(_XlaMustCompile=bool(self._experimental_compile)) if self._experimental_compile: attributes.update(_noinline=True) - # TODO(b/149755889): Until XLA is always linked, we have to do a runtime - # check. - if not pywrap_tfe.TF_IsXlaEnabled(): - raise ValueError( - "Attempting to use experimental_compile, " - "but XLA support is not linked in. " - "Is the dependency to tensorflow/compiler/jit:xla_gpu_jit " - "(or xla_cpu_jit) present?") if not attributes: attributes = None return function_lib.defun_with_attributes( diff --git a/tensorflow/python/eager/def_function_test_cpu_only.py b/tensorflow/python/eager/def_function_test_cpu_only.py index bd3774269ea..7bb6ade8f6c 100644 --- a/tensorflow/python/eager/def_function_test_cpu_only.py +++ b/tensorflow/python/eager/def_function_test_cpu_only.py @@ -20,9 +20,9 @@ from __future__ import print_function from absl.testing import parameterized from tensorflow.python.eager import def_function +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops from tensorflow.python.platform import test @@ -37,11 +37,12 @@ class DefFunctionCpuOnlyTest(test.TestCase, parameterized.TestCase): if test.is_built_with_rocm() or test_util.is_xla_enabled(): return - with self.assertRaisesRegexp(ValueError, 'XLA support is not'): + with self.assertRaisesRegexp(errors.UnimplementedError, + 'check target linkage'): @def_function.function(experimental_compile=True) def fn(x): - return array_ops.unique(x).y + return x + x fn([1, 1, 2, 3]) diff --git a/tensorflow/python/tfe_wrapper.cc b/tensorflow/python/tfe_wrapper.cc index ec0a1ac1c23..c66397036c0 100644 --- a/tensorflow/python/tfe_wrapper.cc +++ b/tensorflow/python/tfe_wrapper.cc @@ -436,7 +436,6 @@ PYBIND11_MODULE(_pywrap_tfe, m) { m.def("TF_SetXlaConstantFoldingDisabled", &TF_SetXlaConstantFoldingDisabled); m.def("TF_GetXlaConstantFoldingDisabled", &TF_GetXlaConstantFoldingDisabled); m.def("TF_SetXlaMinClusterSize", &TF_SetXlaMinClusterSize); - m.def("TF_IsXlaEnabled", [] { return tensorflow::IsXlaEnabled(); }); // MLIR Logic m.def("TF_IsMlirBridgeEnabled", [] { From c42a7a9c8aa4ae4226875cd0b4469b06ca318190 Mon Sep 17 00:00:00 2001 From: Ayush Dubey Date: Wed, 5 Aug 2020 14:11:12 -0700 Subject: [PATCH 2194/2522] Skip ScopedAllocatorOptimizer when an input is a Const op. Const op does not allocate its output on every execution - rather it allocates a persistent tensor once, with default AllocatorAttributes, and reuses this everywhere. Since Const does not invoke any of the typical allocation functions on the OpKernelContext, such as `allocate_output` and `allocate_temp`, it is not compatible with the ScopedAllocatorOptimizer design. PiperOrigin-RevId: 325098062 Change-Id: Iae92870b2123a6368c2c91af9817a2e663fcbf47 --- .../optimizers/scoped_allocator_optimizer.cc | 15 ++++ .../scoped_allocator_optimizer_test.cc | 77 +++++++++++++++---- 2 files changed, 77 insertions(+), 15 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc index d3d4e2913a9..6fb62019806 100644 --- a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc @@ -362,6 +362,20 @@ class UnaryElementwiseRewriter : public ScopedAllocatorOptimizer::Rewriter { public: ~UnaryElementwiseRewriter() override {} + // Return non-OK if any input is an op that does not use the + // AllocatorAttributes set by executor to allocate its output. + Status CheckUsesAllocatorAttributes(const std::vector& inputs) { + for (const InputDesc& nd : inputs) { + if (IsConstant(*nd.from_node_def)) { + return errors::Aborted( + "Abandoning ScopedAllocatorOptimizer because input ", + nd.from_node_def->name(), + " is a Const op which does not use AllocatorAttributes"); + } + } + return Status::OK(); + } + // Return non-OK if any input is already committed to a ScopedAllocator. // // We insert an identity to ensure that inputs are not committed to different @@ -441,6 +455,7 @@ class UnaryElementwiseRewriter : public ScopedAllocatorOptimizer::Rewriter { LOG_WARNING_AND_RETURN_IF_ERROR( GetInputs(sa_opti, invocation_count, graph, *graph_properties_, sa_opti->node_map(), ops, *dtype, inputs)); + LOG_WARNING_AND_RETURN_IF_ERROR(CheckUsesAllocatorAttributes(*inputs)); LOG_WARNING_AND_RETURN_IF_ERROR(CheckExistingScopedAllocator(*inputs)); LOG_WARNING_AND_RETURN_IF_ERROR( CheckInternalDataDependency(op_instance_names, *inputs)); diff --git a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer_test.cc b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer_test.cc index d67b8acdaa4..905968b5fcb 100644 --- a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include +#include "tensorflow/cc/ops/array_ops.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/tensor.pb.h" // NOLINT @@ -108,7 +109,7 @@ class ScopedAllocatorOptimizerTest : public ::testing::Test { // Constructs the following graph. // (Flow is top to bottom, like nature intends.) // - // a, b, and c are constants. s is an Add op. a1, a2, and a3 are Abs ops. + // a, b, and c are placeholders. s is an Add op. a1, a2, and a3 are Abs ops. // r1, r2, and r3 are Reshape ops. // // After this graph undergoes SA optimization, we expect a, b, and s to be @@ -132,12 +133,12 @@ class ScopedAllocatorOptimizerTest : public ::testing::Test { Scope s = Scope::NewRootScope(); s = s.WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"); - Output a = - ops::Const(s.WithOpName("a"), {1.0, 0.0, 0.0, -1.0}, {2, 2}); - Output b = - ops::Const(s.WithOpName("b"), {1.0, -2.0, 3.0, 4.0}, {2, 2}); - Output c = - ops::Const(s.WithOpName("c"), {-5.0, -2.0, 0.0, -2.0}, {2, 2}); + Output a = ops::Placeholder(s.WithOpName("a"), DT_FLOAT, + ops::Placeholder::Shape({2, 2})); + Output b = ops::Placeholder(s.WithOpName("b"), DT_FLOAT, + ops::Placeholder::Shape({2, 2})); + Output c = ops::Placeholder(s.WithOpName("c"), DT_FLOAT, + ops::Placeholder::Shape({2, 2})); Output s1 = ops::Add(s.WithOpName("s1"), b, c); Output a1 = ops::Abs(s.WithOpName("a1"), a); Output a2 = ops::Abs(s.WithOpName("a2"), b); @@ -167,14 +168,14 @@ class ScopedAllocatorOptimizerTest : public ::testing::Test { Scope s = Scope::NewRootScope(); s = s.WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"); - Output a = - ops::Const(s.WithOpName("a"), {0.0, 0.0, 0.0, 0.0}, {2, 2}); - Output b = - ops::Const(s.WithOpName("b"), {0.0, 0.0, 0.0, 0.0}, {2, 2}); - Output ctl1 = - ops::Const(s.WithOpName("ctl1"), {0.0, 0.0, 0.0, 0.0}, {2, 2}); - Output ctl2 = - ops::Const(s.WithOpName("ctl2"), {0.0, 0.0, 0.0, 0.0}, {2, 2}); + Output a = ops::Placeholder(s.WithOpName("a"), DT_FLOAT, + ops::Placeholder::Shape({2, 2})); + Output b = ops::Placeholder(s.WithOpName("b"), DT_FLOAT, + ops::Placeholder::Shape({2, 2})); + Output ctl1 = ops::Placeholder(s.WithOpName("ctl1"), DT_FLOAT, + ops::Placeholder::Shape({2, 2})); + Output ctl2 = ops::Placeholder(s.WithOpName("ctl2"), DT_FLOAT, + ops::Placeholder::Shape({2, 2})); Output a1 = ops::Abs(s.WithOpName("a1").WithControlDependencies({ctl1}), a); Output a2 = ops::Abs(s.WithOpName("a2").WithControlDependencies({ctl2}), b); Output o1 = ops::Reshape(s.WithOpName("o1"), a1, {1, 4}); @@ -237,6 +238,33 @@ class ScopedAllocatorOptimizerTest : public ::testing::Test { TF_CHECK_OK(root_scope.ToGraphDef(graph_def)); } + // Constructs the following graph. + // + // c1 and c2 are Const ops. a1 and a2 are Abs ops. + // We expect the optimizer to fail, because Const ops do not allocate their + // output on every Compute, and hence are not compatible with ScopedAllocator. + /* + c1 c2 + | | + a1 a2 + | | + r1 r2 + */ + void BuildConstGraph(GraphDef* graph_def, bool forward) { + Scope s = Scope::NewRootScope(); + s = s.WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"); + + Output c1 = + ops::Const(s.WithOpName("c1"), {1.0, 0.0, 0.0, -1.0}, {2, 2}); + Output c2 = + ops::Const(s.WithOpName("c2"), {1.0, -2.0, 3.0, 4.0}, {2, 2}); + Output a1 = ops::Abs(s.WithOpName("a1"), c1); + Output a2 = ops::Abs(s.WithOpName("a2"), c2); + Output r1 = ops::Reshape(s.WithOpName("r1"), a1, {1, 4}); + Output r2 = ops::Reshape(s.WithOpName("r2"), a2, {4, 1}); + TF_CHECK_OK(s.ToGraphDef(graph_def)); + } + void SetShapes(GraphDef* graph_def) { TensorShapeProto shape_proto; shape_proto.add_dim()->set_size(2); @@ -531,6 +559,25 @@ TEST_F(ScopedAllocatorOptimizerTest, ControlEdgeRewire) { EXPECT_EQ(NumControlInputs(&node_map, "ctl4"), 1); } +// Test that the optimization fails when any input is a Const op. +TEST_F(ScopedAllocatorOptimizerTest, ConstInput) { + GrapplerItem item; + BuildConstGraph(&item.graph, false); + SetShapes(&item.graph); + + ScopedAllocatorOptions opts; + opts.add_enable_op("Abs"); + ScopedAllocatorOptimizer sao(RewriterConfig::ON, opts); + ScopedAllocatorOptimizer::OpNameSet ons; + ons.insert("Abs"); + + GraphDef optimized_graph; + auto status = sao.Optimize(nullptr /*cluster*/, item, &optimized_graph); + EXPECT_EQ(status.code(), tensorflow::error::ABORTED); + EXPECT_TRUE(str_util::StrContains(status.error_message(), + "does not use AllocatorAttributes")); +} + } // namespace } // namespace grappler } // namespace tensorflow From 9bf6b0f97379a1c393133dfaa4daa8663e223adb Mon Sep 17 00:00:00 2001 From: Ken Franko Date: Wed, 5 Aug 2020 14:12:45 -0700 Subject: [PATCH 2195/2522] Don't Mark ops for outside compilation if supported by TF2XLA fallback. PiperOrigin-RevId: 325098417 Change-Id: I29833a917181f5c8b21f2dfb55489ab2fd20e3b5 --- tensorflow/compiler/mlir/tensorflow/BUILD | 1 + .../mark_ops_for_outside_compilation.mlir | 18 ++++++++++++++++++ .../mark_ops_for_outside_compilation.cc | 3 ++- .../xla/transforms/legalize_tf_with_tf2xla.cc | 13 +++++++------ .../compiler/mlir/xla/transforms/passes.h | 3 +++ 5 files changed, 31 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index 34caf1e2473..3ee591ce46a 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -828,6 +828,7 @@ cc_library( "//tensorflow/compiler/mlir:op_or_arg_name_mapper", "//tensorflow/compiler/mlir/lite:validators", "//tensorflow/compiler/mlir/xla:xla_legalize_tf", + "//tensorflow/compiler/mlir/xla:xla_legalize_tf_with_tf2xla", "//tensorflow/compiler/xla:xla_data_proto_cc", "//tensorflow/compiler/xla:xla_proto_cc", "//tensorflow/compiler/xla/client:sharding_builder", diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir index afad117beae..d0a4c101bdf 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir @@ -14,6 +14,24 @@ func @unsupported_op() -> tensor { return %0 : tensor } +// CHECK-LABEL: func @tf2xla_fallback_op +func @tf2xla_fallback_op() -> tensor { + %0 = "tf_device.cluster"() ( { + // CHECK: "tf.UnsupportedOp" + // CHECK-SAME: _xla_outside_compilation + // CHECK: "tf.Identity" + // CHECK-NOT: _xla_outside_compilation + // CHECK: "tf.Sinh" + // CHECK-NOT: _xla_outside_compilation + %1 = "tf.UnsupportedOp"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<1.0> : tensor} : () -> tensor + %3 = "tf.Identity"(%1) : (tensor) -> tensor + %4 = "tf.Sinh"(%2) : (tensor) -> tensor + tf_device.return %4 : tensor + }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor + return %0 : tensor +} + // CHECK-LABEL: func @op_string_result func @op_string_result() -> tensor { %0 = "tf_device.cluster"() ( { diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc index 71146cf5e2b..6c93a9eb9cc 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc @@ -77,7 +77,8 @@ bool IsSupportedOp(Operation& op, // TODO(b/161726307): Check the allowed ops list in LegalizeTfWithTf2XlaPass // as well. return !HasStringOperand(op) && !HasStringResult(op) && - MatchesPattern(op, supported_ops); + (MatchesPattern(op, supported_ops) || + mhlo::IsOpAllowedTf2XlaFallback(&op)); } bool HasCapturedStringOperand(TF::IfRegionOp* if_op) { diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc index bb50fc198c8..c63e77f2a47 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc @@ -74,12 +74,8 @@ limitations under the License. namespace mlir { namespace mhlo { -namespace { -template -using InlinedVector = tensorflow::gtl::InlinedVector; // non-absl ok - -static bool IsOpAllowlisted(Operation* op) { +bool IsOpAllowedTf2XlaFallback(Operation* op) { // Allowlisted TensorFlow ops are known to have well behaved tf2xla kernels // building valid MLIR using MlirHloBuilder. // TODO(hinsu): Drop explicit allowlist when MLIR based bridge is enabled for @@ -215,6 +211,11 @@ static bool IsOpAllowlisted(Operation* op) { return ops.count(abstractOp->typeID); } +namespace { + +template +using InlinedVector = tensorflow::gtl::InlinedVector; // non-absl ok + static std::unique_ptr CreateDeviceMgr( const std::string& device_type) { // Register compilation kernels for all registered XLA backends. @@ -497,7 +498,7 @@ class Tf2XlaRewritePattern : public RewritePattern { LogicalResult matchAndRewrite(Operation* op, PatternRewriter& rewriter) const override { - if (!IsOpAllowlisted(op)) return failure(); + if (!IsOpAllowedTf2XlaFallback(op)) return failure(); return Tf2XlaRewriter::RewriteOp(op, rewriter, device_type_); } diff --git a/tensorflow/compiler/mlir/xla/transforms/passes.h b/tensorflow/compiler/mlir/xla/transforms/passes.h index 8850581f0bd..85bdaaa0e31 100644 --- a/tensorflow/compiler/mlir/xla/transforms/passes.h +++ b/tensorflow/compiler/mlir/xla/transforms/passes.h @@ -53,6 +53,9 @@ void PopulateLegalizeTfWithTf2XlaPatterns(llvm::StringRef device_type, void PopulateLegalizeTfPatterns(MLIRContext* context, OwningRewritePatternList* patterns); +/// Checks whether the op is supported by the Tf2Xla fallback for legalization. +bool IsOpAllowedTf2XlaFallback(Operation* op); + /// Lowers from TF dialect's control flow to HLO dialect's control flow. std::unique_ptr> createLegalizeTFControlFlowPass(); From decd75daaa1bfa90d15ac8bd16f635044c7bbd30 Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Wed, 5 Aug 2020 14:25:05 -0700 Subject: [PATCH 2196/2522] Make metrics_utils_test extendable. PiperOrigin-RevId: 325101084 Change-Id: I9f655ef9d3a9e71443d42dfc5c4e0597c987cae3 --- tensorflow/python/distribute/client/metric_utils_test.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/distribute/client/metric_utils_test.py b/tensorflow/python/distribute/client/metric_utils_test.py index 79827e5e9f6..3dab4367e52 100644 --- a/tensorflow/python/distribute/client/metric_utils_test.py +++ b/tensorflow/python/distribute/client/metric_utils_test.py @@ -31,16 +31,19 @@ from tensorflow.python.training.server_lib import ClusterSpec class MetricUtilsTest(test.TestCase): + def get_rpc_layer(self): + return 'grpc' + def testClientMetrics(self): metric_utils.enable_metrics = True cluster_def = multi_worker_test_base.create_in_process_cluster( - num_workers=1, num_ps=1, rpc_layer='grpc') + num_workers=1, num_ps=1, rpc_layer=self.get_rpc_layer()) cluster_def['chief'] = [ 'localhost:%d' % multi_worker_test_base.pick_unused_port() ] cluster_resolver = SimpleClusterResolver( - ClusterSpec(cluster_def), rpc_layer='grpc') + ClusterSpec(cluster_def), rpc_layer=self.get_rpc_layer()) cluster = client.Cluster(cluster_resolver) @def_function.function From 76bb55a2717ae926acaa5b033f7d75f56642d8b6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 5 Aug 2020 14:26:53 -0700 Subject: [PATCH 2197/2522] Fix broken doc links to tf.math.unsorted_segment_sum operator. PiperOrigin-RevId: 325101427 Change-Id: I73831dac9a2a70575ad620f25207326afe3f76b0 --- tensorflow/python/ops/math_ops.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 320791cc292..05f1bef995e 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -4000,8 +4000,7 @@ def unsorted_segment_mean(data, segment_ids, num_segments, name=None): segmentation](https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/math#about_segmentation) for an explanation of segments. - This operator is similar to the unsorted segment sum operator found - [here](../../../api_docs/python/math_ops.md#UnsortedSegmentSum). + This operator is similar to the `tf.math.unsorted_segment_sum` operator. Instead of computing the sum over segments, it computes the mean of all entries belonging to a segment such that: @@ -4047,8 +4046,7 @@ def unsorted_segment_sqrt_n(data, segment_ids, num_segments, name=None): segmentation](https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/math#about_segmentation) for an explanation of segments. - This operator is similar to the unsorted segment sum operator found - [here](../../../api_docs/python/math_ops.md#UnsortedSegmentSum). + This operator is similar to the `tf.math.unsorted_segment_sum` operator. Additionally to computing the sum over segments, it divides the results by sqrt(N). From 44be60723e1a2b45874758b4b9317cd0a82d7aed Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Wed, 5 Aug 2020 15:17:54 -0700 Subject: [PATCH 2198/2522] Add TPU Transfer Manager Interface and Noncopyable Buffer support PiperOrigin-RevId: 325112371 Change-Id: I96a704a62d5acf3305e0328eba621455bb4290bb --- tensorflow/stream_executor/tpu/BUILD | 23 ++++ .../stream_executor/tpu/noncopyable_buffer.h | 112 ++++++++++++++++++ .../tpu/tpu_transfer_manager.h | 9 +- .../tpu/tpu_transfer_manager_interface.h | 34 ++++++ 4 files changed, 177 insertions(+), 1 deletion(-) create mode 100644 tensorflow/stream_executor/tpu/noncopyable_buffer.h create mode 100644 tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.h diff --git a/tensorflow/stream_executor/tpu/BUILD b/tensorflow/stream_executor/tpu/BUILD index 459021043df..a52f9919e6e 100644 --- a/tensorflow/stream_executor/tpu/BUILD +++ b/tensorflow/stream_executor/tpu/BUILD @@ -52,6 +52,18 @@ cc_library( ], ) +cc_library( + name = "noncopyable_buffer", + hdrs = ["noncopyable_buffer.h"], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/core:lib", + "@com_google_absl//absl/base", + "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/types:span", + ], +) + cc_library( name = "tpu_node_context_c_api_hdrs", hdrs = ["tpu_node_context_c_api.h"], @@ -189,6 +201,16 @@ cc_library( ], ) +cc_library( + name = "tpu_transfer_manager_interface", + hdrs = ["tpu_transfer_manager_interface.h"], + visibility = ["//visibility:public"], + deps = [ + ":noncopyable_buffer", + "//tensorflow/compiler/xla/service:transfer_manager", + ], +) + cc_library( name = "tpu_transfer_manager", srcs = ["tpu_transfer_manager_registration.cc"], @@ -210,6 +232,7 @@ cc_library( ":status_helper", ":tpu_executor_base", ":tpu_executor_c_api_hdrs", + ":tpu_transfer_manager_interface", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:xla_data_proto_cc", diff --git a/tensorflow/stream_executor/tpu/noncopyable_buffer.h b/tensorflow/stream_executor/tpu/noncopyable_buffer.h new file mode 100644 index 00000000000..09ea45f0108 --- /dev/null +++ b/tensorflow/stream_executor/tpu/noncopyable_buffer.h @@ -0,0 +1,112 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_NONCOPYABLE_BUFFER_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_NONCOPYABLE_BUFFER_H_ + +#include + +#include "absl/base/casts.h" +#include "absl/types/optional.h" +#include "absl/types/span.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +namespace tpu { + +// Uncopyable buffer type with optional ownership of the underlying data. If +// data is not owned then ensuring lifetime of the data exceeds the lifetime of +// the buffer is the responsibility of the user. +class NoncopyableBuffer { + public: + NoncopyableBuffer() = default; + + // Allocate an owning buffer without initializing the data. Useful when it + // will be filled by a subsequent function and want to avoid initialization + // cost. Size is specified in number of uint32's. + explicit NoncopyableBuffer(size_t size) + : data_(new uint32[size]), buf_(data_.get()), size_(size) {} + + // Allocates an owning buffer and initializes it with the specified data. Size + // is specified in number of uint32's. + NoncopyableBuffer(size_t size, absl::optional value) + : NoncopyableBuffer(size) { +#ifndef MEMORY_SANITIZER + if (!value.has_value()) { + return; + } +#endif + uint32 v = value.value_or(0); + for (int64 i = 0; i < size; ++i) { + data_[i] = v; + } + } + + // Directly use buf pointer without copying it to owning data_. This delays + // the memcpy until mutable access is requested. "buf" is not owned by this + // data structure, so it is the user's duty to ensure the live range of "buf" + // is longer than this data structure. + NoncopyableBuffer(const uint8* buf, uint64 size) // Size is in uint8's. + : buf_(buf), size_(size / sizeof(uint32)) { + CHECK_EQ(size % sizeof(uint32), 0); + } + NoncopyableBuffer(const uint32* buf, uint64 size) // Size is in uint32's. + : buf_(buf), size_(size) {} + + NoncopyableBuffer(const NoncopyableBuffer&) = delete; + NoncopyableBuffer(NoncopyableBuffer&&) = default; + + NoncopyableBuffer& operator=(const NoncopyableBuffer&) = delete; + NoncopyableBuffer& operator=(NoncopyableBuffer&&) = default; + + // Ensure that the buffer owns the data and returns a mutable view into the + // owned data for modification. + absl::Span mutable_data() { + if (data_ == nullptr) { + data_.reset(new uint32[size_]); + memcpy(data_.get(), buf_, size_ * sizeof(uint32)); + buf_ = data_.get(); + } + return absl::Span(data_.get(), size_); + } + + absl::Span const_data() const { + return absl::Span(absl::bit_cast(buf_), size_); + } + // Clone the content to a given buffer. + void CloneTo(void* buf) { memcpy(buf, buf_, size_ * sizeof(uint32)); } + + // Return true if data is owned by this buffer (have been copied to `data_`). + bool owns_data() const { return data_ != nullptr; } + + // Returns a copy of the object that owns its buffer. + NoncopyableBuffer Clone() const { + NoncopyableBuffer clone(size_); + memcpy(clone.data_.get(), buf_, size_ * sizeof(uint32)); + return clone; + } + + private: + // If data_ != nullptr then buf_ == data_.get() + std::unique_ptr data_; // Owning data pointer. + const void* buf_; // Non-owning data pointer. + uint64 size_; // Size in number of uint32's. +}; + +} // namespace tpu +} // namespace tensorflow + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_NONCOPYABLE_BUFFER_H_ diff --git a/tensorflow/stream_executor/tpu/tpu_transfer_manager.h b/tensorflow/stream_executor/tpu/tpu_transfer_manager.h index 163ac81ea5f..c201d63d2d5 100644 --- a/tensorflow/stream_executor/tpu/tpu_transfer_manager.h +++ b/tensorflow/stream_executor/tpu/tpu_transfer_manager.h @@ -22,10 +22,11 @@ limitations under the License. #include "tensorflow/compiler/xla/shape.h" #include "tensorflow/stream_executor/stream_executor.h" #include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" +#include "tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.h" namespace tensorflow { -class TpuTransferManager : public xla::TransferManager { +class TpuTransferManager : public xla::TpuTransferManagerInterface { public: TpuTransferManager(); ~TpuTransferManager() override; @@ -61,6 +62,12 @@ class TpuTransferManager : public xla::TransferManager { LOG(FATAL) << "Not yet implemented"; } + Status TransferBuffersToInfeed( + se::StreamExecutor* executor, + const std::deque& buffers) override { + LOG(FATAL) << "Not yet implemented."; + } + Status ResetDevices( absl::Span executor) override { LOG(FATAL) << "Not yet implemented"; diff --git a/tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.h b/tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.h new file mode 100644 index 00000000000..3f34ed8064d --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.h @@ -0,0 +1,34 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_TRANSFER_MANAGER_INTERFACE_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_TRANSFER_MANAGER_INTERFACE_H_ + +#include + +#include "tensorflow/compiler/xla/service/transfer_manager.h" +#include "tensorflow/stream_executor/tpu/noncopyable_buffer.h" + +namespace xla { + +class TpuTransferManagerInterface : public xla::TransferManager { + virtual Status TransferBuffersToInfeed( + se::StreamExecutor* executor, + const std::deque& buffers) = 0; +}; + +} // namespace xla + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_TRANSFER_MANAGER_INTERFACE_H_ From e4706fdf31d7cd972f69eb4068bf0226a1a51333 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Wed, 5 Aug 2020 15:18:34 -0700 Subject: [PATCH 2199/2522] Fix small issue with error message in Functional model construction. PiperOrigin-RevId: 325112494 Change-Id: Ib0e54e6084ef216b5d696d62682afd7680422f7f --- tensorflow/python/keras/engine/functional.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/engine/functional.py b/tensorflow/python/keras/engine/functional.py index 42c706a923d..67fb5bb2cb1 100644 --- a/tensorflow/python/keras/engine/functional.py +++ b/tensorflow/python/keras/engine/functional.py @@ -668,7 +668,7 @@ class Functional(training_lib.Model): if len(layer._inbound_nodes) > 1 or ( layer._inbound_nodes and not layer._inbound_nodes[0].is_input): cls_name = self.__class__.__name__ - logging.warning(cls_name + ' inputs must come from ' + logging.warning(cls_name + ' model inputs must come from ' '`tf.keras.Input` (thus holding past layer metadata), ' 'they cannot be the output of ' 'a previous non-Input layer. ' @@ -697,7 +697,7 @@ class Functional(training_lib.Model): for x in self.outputs: if not hasattr(x, '_keras_history'): cls_name = self.__class__.__name__ - raise ValueError('Output tensors to a ' + cls_name + ' must be ' + raise ValueError('Output tensors of a ' + cls_name + ' model must be ' 'the output of a TensorFlow `Layer` ' '(thus holding past layer metadata). Found: ' + str(x)) From 83a1ac2d845fdbc326dee0eb936a5ebfcfc96635 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Wed, 5 Aug 2020 15:18:38 -0700 Subject: [PATCH 2200/2522] Simplify string representation of KerasTensor. PiperOrigin-RevId: 325112498 Change-Id: Ibd4641faf6871ebd58477f9fe945cd2e0ac641f8 --- tensorflow/python/keras/backend.py | 2 +- .../python/keras/engine/keras_tensor.py | 12 ++-------- .../python/keras/engine/keras_tensor_test.py | 23 ++++++++----------- 3 files changed, 13 insertions(+), 24 deletions(-) diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py index 5f8a11806db..3f057361cab 100644 --- a/tensorflow/python/keras/backend.py +++ b/tensorflow/python/keras/backend.py @@ -1181,7 +1181,7 @@ def placeholder(shape=None, >>> input_ph = tf.keras.backend.placeholder(shape=(2, 4, 5)) >>> input_ph - + """ if sparse and ragged: diff --git a/tensorflow/python/keras/engine/keras_tensor.py b/tensorflow/python/keras/engine/keras_tensor.py index 07b8bdfea5b..3aa9b595d4f 100644 --- a/tensorflow/python/keras/engine/keras_tensor.py +++ b/tensorflow/python/keras/engine/keras_tensor.py @@ -229,12 +229,8 @@ class KerasTensor(object): if hasattr(self, '_keras_history'): layer = self._keras_history.layer - node_index = self._keras_history.node_index - tensor_index = self._keras_history.tensor_index symbolic_description = ( - ', description="Symbolic value %s from ' - 'symbolic call %s of layer \'%s\'"' % ( - tensor_index, node_index, layer.name)) + ', description="created by layer \'%s\'"' % (layer.name,)) if self._inferred_value is not None: inferred_value_string = ( ', inferred_value=%s' % self._inferred_value) @@ -254,11 +250,7 @@ class KerasTensor(object): if hasattr(self, '_keras_history'): layer = self._keras_history.layer - node_index = self._keras_history.node_index - tensor_index = self._keras_history.tensor_index - symbolic_description = ( - ' (Symbolic value %s from symbolic call %s of layer \'%s\')' % ( - tensor_index, node_index, layer.name)) + symbolic_description = ' (created by layer \'%s\')' % (layer.name,) if self._inferred_value is not None: inferred_value_string = ( ' inferred_value=%s' % self._inferred_value) diff --git a/tensorflow/python/keras/engine/keras_tensor_test.py b/tensorflow/python/keras/engine/keras_tensor_test.py index 374b89202a1..dfe1077ddd9 100644 --- a/tensorflow/python/keras/engine/keras_tensor_test.py +++ b/tensorflow/python/keras/engine/keras_tensor_test.py @@ -68,21 +68,20 @@ class KerasTensorTest(test.TestCase): expected_str = ( "KerasTensor(type_spec=TensorSpec(shape=(None, 3, 10), " "dtype=tf.float32, name=None), name='dense/BiasAdd:0', " - "description=\"Symbolic value 0 from symbolic call 0 " - "of layer 'dense'\")") + "description=\"created by layer 'dense'\")") expected_repr = ( - "") + "") self.assertEqual(expected_str, str(kt)) self.assertEqual(expected_repr, repr(kt)) kt = array_ops.reshape(kt, shape=(3, 5, 2)) expected_str = ( "KerasTensor(type_spec=TensorSpec(shape=(3, 5, 2), dtype=tf.float32, " - "name=None), name='tf.reshape/Reshape:0', description=\"Symbolic " - "value 0 from symbolic call 0 of layer 'tf.reshape'\")") - expected_repr = ("") + "name=None), name='tf.reshape/Reshape:0', description=\"created " + "by layer 'tf.reshape'\")") + expected_repr = ("") self.assertEqual(expected_str, str(kt)) self.assertEqual(expected_repr, repr(kt)) @@ -90,12 +89,10 @@ class KerasTensorTest(test.TestCase): for i in range(3): expected_str = ( "KerasTensor(type_spec=TensorSpec(shape=(5, 2), dtype=tf.float32, " - "name=None), name='tf.unstack/unstack:%s', description=\"Symbolic " - "value %s from symbolic call 0 of layer 'tf.unstack'\")" - ) % (i, i) + "name=None), name='tf.unstack/unstack:%s', description=\"created " + "by layer 'tf.unstack'\")" % (i,)) expected_repr = ("") % i + "(created by layer 'tf.unstack')>") self.assertEqual(expected_str, str(kts[i])) self.assertEqual(expected_repr, repr(kts[i])) From 7b301123019d2b4bbd9c597916ba032f05854074 Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Wed, 5 Aug 2020 15:21:11 -0700 Subject: [PATCH 2201/2522] [XLA/GPU] Add a gold CHECK file for sorting. NFC. PiperOrigin-RevId: 325113003 Change-Id: Ida571463512f3e2c834a25338ba7653b57e3cad9 --- .../xla/service/gpu/tests/sorting.hlo | 394 ++++++++++++++++++ .../compiler/xla/service/llvm_ir/tuple_ops.cc | 7 +- 2 files changed, 398 insertions(+), 3 deletions(-) create mode 100644 tensorflow/compiler/xla/service/gpu/tests/sorting.hlo diff --git a/tensorflow/compiler/xla/service/gpu/tests/sorting.hlo b/tensorflow/compiler/xla/service/gpu/tests/sorting.hlo new file mode 100644 index 00000000000..272c9a25769 --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/tests/sorting.hlo @@ -0,0 +1,394 @@ +// RUN: hlo_to_llvm_ir %s | FileCheck %s + +HloModule TestModule + +compare { + p.0.lhs = f32[] parameter(0) + p.0.rhs = f32[] parameter(1) + ROOT lt = pred[] compare(p.0.lhs, p.0.rhs), direction=LT +} + +// CHECK: define void @sort(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC1:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 +// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1]], i64 0 +// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] +// CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) +// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 +// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK: sort.in_bounds-after: +// CHECK-NEXT: ret void +// CHECK: sort.in_bounds-true: +// CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 1 +// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], [[TMP8]] +// CHECK-NEXT: [[TMP10:%.*]] = icmp slt i64 [[TMP8]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] +// CHECK-NEXT: br i1 [[TMP11]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK: smaller_comparison_index-after: +// CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] +// CHECK: smaller_comparison_index-true: +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: call void @compare(float* [[TMP12]], float* [[TMP13]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP14:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP14]], 0 +// CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] +// CHECK: is_smaller_than-after: +// CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] +// CHECK: is_smaller_than-true: +// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[TMP12]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[TMP13]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: store float [[TMP16]], float* [[TMP18]], align 4 +// CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] + +// CHECK: define internal void @compare(float* dereferenceable(4) [[P_0_LHS_TYPED:%.*]], float* dereferenceable(4) [[P_0_RHS_TYPED:%.*]], i8* dereferenceable(1) [[OUTPUT_ARG:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LT_TYPED:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[P_0_LHS_TYPED]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[P_0_RHS_TYPED]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = fcmp olt float [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i8 +// CHECK-NEXT: store i8 [[TMP3]], i8* [[LT_TYPED]], align 1 +// CHECK-NEXT: [[LOAD_RET_VALUE:%.*]] = load i8, i8* [[LT_TYPED]], align 1 +// CHECK-NEXT: store i8 [[LOAD_RET_VALUE]], i8* [[OUTPUT_ARG]], align 1 +// CHECK-NEXT: ret void + +// CHECK: define void @sort__1(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC1:%.*]]) { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 +// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1]], i64 0 +// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] +// CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) +// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 +// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK: sort.in_bounds-after: +// CHECK-NEXT: ret void +// CHECK: sort.in_bounds-true: +// CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP4]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = icmp slt i64 [[TMP4]], [[TMP7]] +// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP8]], [[TMP9]] +// CHECK-NEXT: br i1 [[TMP10]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK: smaller_comparison_index-after: +// CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] +// CHECK: smaller_comparison_index-true: +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP4]] +// CHECK-NEXT: call void @compare(float* [[TMP11]], float* [[TMP12]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP13]], 0 +// CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] +// CHECK: is_smaller_than-after: +// CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] +// CHECK: is_smaller_than-true: +// CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[TMP11]], align 4 +// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[TMP12]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP4]] +// CHECK-NEXT: store float [[TMP14]], float* [[TMP16]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 +// CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] + +// CHECK: define void @sort__2(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC1:%.*]]) { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 +// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 +// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] +// CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) +// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 +// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK: sort.in_bounds-after: +// CHECK-NEXT: ret void +// CHECK: sort.in_bounds-true: +// CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 1 +// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], [[TMP8]] +// CHECK-NEXT: [[TMP10:%.*]] = icmp slt i64 [[TMP8]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] +// CHECK-NEXT: br i1 [[TMP11]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK: smaller_comparison_index-after: +// CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] +// CHECK: smaller_comparison_index-true: +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: call void @compare(float* [[TMP12]], float* [[TMP13]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP14:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP14]], 0 +// CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] +// CHECK: is_smaller_than-after: +// CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] +// CHECK: is_smaller_than-true: +// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[TMP12]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[TMP13]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: store float [[TMP16]], float* [[TMP18]], align 4 +// CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] +ENTRY main { + x = f32[2, 3] parameter(0) + ROOT sort = f32[2, 3] sort(x), dimensions={1}, to_apply=compare +} + +// ----- + +HloModule TestModule + +compare { + p.0.lhs = s32[] parameter(0) + p.0.rhs = s32[] parameter(1) + p.1.lhs = f32[] parameter(2) + p.1.rhs = f32[] parameter(3) + ROOT lt = pred[] compare(p.1.lhs, p.1.rhs), direction=LT +} + +// CHECK: define void @sort(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC2:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC3:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4]], i64 0 +// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x i8*]* +// CHECK-NEXT: [[SORT_RAW1:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 +// CHECK-NEXT: [[SORT_TYPED2:%.*]] = bitcast i8* [[SORT_RAW1]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[SORT_RAW3:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1]], i64 0 +// CHECK-NEXT: [[SORT_TYPED4:%.*]] = bitcast i8* [[SORT_RAW3]] to [2 x [3 x float]]* +// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC2]], i64 0 +// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[Y_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC3]], i64 0 +// CHECK-NEXT: [[Y_TYPED:%.*]] = bitcast i8* [[Y_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] +// CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) +// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 +// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK: sort.in_bounds-after: +// CHECK-NEXT: ret void +// CHECK: sort.in_bounds-true: +// CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 1 +// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], [[TMP8]] +// CHECK-NEXT: [[TMP10:%.*]] = icmp slt i64 [[TMP8]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] +// CHECK-NEXT: br i1 [[TMP11]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK: smaller_comparison_index-after: +// CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] +// CHECK: smaller_comparison_index-true: +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: call void @compare(i32* [[TMP12]], i32* [[TMP13]], float* [[TMP14]], float* [[TMP15]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP16:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP16]], 0 +// CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] +// CHECK: is_smaller_than-after: +// CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] +// CHECK: is_smaller_than-true: +// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP19]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: store i32 [[TMP18]], i32* [[TMP20]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = load float, float* [[TMP14]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = load float, float* [[TMP15]], align 4 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: store float [[TMP21]], float* [[TMP23]], align 4 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: store float [[TMP22]], float* [[TMP24]], align 4 +// CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] + +// CHECK: define internal void @compare(i32* dereferenceable(4) [[P_0_LHS_TYPED:%.*]], i32* dereferenceable(4) [[P_0_RHS_TYPED:%.*]], float* dereferenceable(4) [[P_1_LHS_TYPED:%.*]], float* dereferenceable(4) [[P_1_RHS_TYPED:%.*]], i8* dereferenceable(1) [[OUTPUT_ARG:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LT_TYPED:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[P_1_LHS_TYPED]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[P_1_RHS_TYPED]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = fcmp olt float [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i8 +// CHECK-NEXT: store i8 [[TMP3]], i8* [[LT_TYPED]], align 1 +// CHECK-NEXT: [[LOAD_RET_VALUE:%.*]] = load i8, i8* [[LT_TYPED]], align 1 +// CHECK-NEXT: store i8 [[LOAD_RET_VALUE]], i8* [[OUTPUT_ARG]], align 1 +// CHECK-NEXT: ret void + +// CHECK: define void @sort__1(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC2:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC3:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 +// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x i8*]* +// CHECK-NEXT: [[SORT_RAW1:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 +// CHECK-NEXT: [[SORT_TYPED2:%.*]] = bitcast i8* [[SORT_RAW1]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[SORT_RAW3:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 +// CHECK-NEXT: [[SORT_TYPED4:%.*]] = bitcast i8* [[SORT_RAW3]] to [2 x [3 x float]]* +// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC2:%.*]], i64 0 +// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[Y_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC3:%.*]], i64 0 +// CHECK-NEXT: [[Y_TYPED:%.*]] = bitcast i8* [[Y_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] +// CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) +// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 +// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK: sort.in_bounds-after: +// CHECK-NEXT: ret void +// CHECK: sort.in_bounds-true: +// CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP4]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = icmp slt i64 [[TMP4]], [[TMP7]] +// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP8]], [[TMP9]] +// CHECK-NEXT: br i1 [[TMP10]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK: smaller_comparison_index-after: +// CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] +// CHECK: smaller_comparison_index-true: +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP4]] +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP4]] +// CHECK-NEXT: call void @compare(i32* [[TMP11]], i32* [[TMP12]], float* [[TMP13]], float* [[TMP14]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP15:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP15]], 0 +// CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] +// CHECK: is_smaller_than-after: +// CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] +// CHECK: is_smaller_than-true: +// CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP4]] +// CHECK-NEXT: store i32 [[TMP16]], i32* [[TMP18]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP19]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[TMP13]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = load float, float* [[TMP14]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP4]] +// CHECK-NEXT: store float [[TMP20]], float* [[TMP22]], align 4 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: store float [[TMP21]], float* [[TMP23]], align 4 +// CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] + +// CHECK: define void @sort__2(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC2:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC3:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) +// CHECK-NEXT: entry: +// CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 +// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x i8*]* +// CHECK-NEXT: [[SORT_RAW1:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 +// CHECK-NEXT: [[SORT_TYPED2:%.*]] = bitcast i8* [[SORT_RAW1]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[SORT_RAW3:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 +// CHECK-NEXT: [[SORT_TYPED4:%.*]] = bitcast i8* [[SORT_RAW3]] to [2 x [3 x float]]* +// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC2:%.*]], i64 0 +// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[Y_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC3:%.*]], i64 0 +// CHECK-NEXT: [[Y_TYPED:%.*]] = bitcast i8* [[Y_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] +// CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) +// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 +// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK: sort.in_bounds-after: +// CHECK-NEXT: [[TMP7:%.*]] = bitcast [2 x [3 x i32]]* [[SORT_TYPED2]] to i8* +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[SORT_TYPED]], i64 0, i64 0 +// CHECK-NEXT: store i8* [[TMP7]], i8** [[TMP8]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = bitcast [2 x [3 x float]]* [[SORT_TYPED4]] to i8* +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[SORT_TYPED]], i64 0, i64 1 +// CHECK-NEXT: store i8* [[TMP9]], i8** [[TMP10]], align 8 +// CHECK-NEXT: ret void +// CHECK: sort.in_bounds-true: +// CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP4]], 2 +// CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 1 +// CHECK-NEXT: [[TMP13:%.*]] = icmp slt i64 [[TMP11]], [[TMP12]] +// CHECK-NEXT: [[TMP14:%.*]] = icmp slt i64 [[TMP12]], 3 +// CHECK-NEXT: [[TMP15:%.*]] = and i1 [[TMP13]], [[TMP14]] +// CHECK-NEXT: br i1 [[TMP15]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK: smaller_comparison_index-after: +// CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] +// CHECK: smaller_comparison_index-true: +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP12]] +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP11]] +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP12]] +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP11]] +// CHECK-NEXT: call void @compare(i32* [[TMP16]], i32* [[TMP17]], float* [[TMP18]], float* [[TMP19]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP20:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP20]], 0 +// CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] +// CHECK: is_smaller_than-after: +// CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] +// CHECK: is_smaller_than-true: +// CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP11]] +// CHECK-NEXT: store i32 [[TMP21]], i32* [[TMP23]], align 4 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP12]] +// CHECK-NEXT: store i32 [[TMP22]], i32* [[TMP24]], align 4 +// CHECK-NEXT: [[TMP25:%.*]] = load float, float* [[TMP18]], align 4 +// CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[TMP19]], align 4 +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP11]] +// CHECK-NEXT: store float [[TMP25]], float* [[TMP27]], align 4 +// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP12]] +// CHECK-NEXT: store float [[TMP26]], float* [[TMP28]], align 4 +// CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] +ENTRY main { + x = s32[2, 3] parameter(0) + y = f32[2, 3] parameter(1) + ROOT sort = (s32[2, 3], f32[2, 3]) sort(x, y), dimensions={1}, to_apply=compare +} diff --git a/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.cc b/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.cc index daf98478194..d89a9c2e0a5 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.cc @@ -62,10 +62,11 @@ void EmitTuple(const IrArray& tuple, absl::Span operands, llvm::IRBuilder<>* b) { llvm::Module* module = getModuleFromBuilder(b); for (size_t i = 0; i < operands.size(); ++i) { + auto* cast = + b->CreatePointerCast(operands[i], PrimitiveTypeToIrType(TUPLE, module)); auto* store = b->CreateStore( - b->CreatePointerCast(operands[i], PrimitiveTypeToIrType(TUPLE, module)), - b->CreateInBoundsGEP(tuple.GetBasePointer(), - {b->getInt64(0), b->getInt64(i)})); + cast, b->CreateInBoundsGEP(tuple.GetBasePointer(), + {b->getInt64(0), b->getInt64(i)})); tuple.AnnotateLoadStoreInstructionWithMetadata(store); } } From 78597f43dc30d13114db67e82c9b41f6a9101d70 Mon Sep 17 00:00:00 2001 From: Sachin Joglekar Date: Wed, 5 Aug 2020 15:37:56 -0700 Subject: [PATCH 2202/2522] Add a new delegate flag to propagate shapes via the runtime PiperOrigin-RevId: 325116169 Change-Id: Ib96e80016131c3b0d5741f5dffbe0362ef21d330 --- tensorflow/lite/c/common.h | 21 +- tensorflow/lite/core/subgraph.cc | 42 ++- tensorflow/lite/core/subgraph.h | 6 + tensorflow/lite/delegates/delegate_test.cc | 309 ++++++++++++++---- .../benchmark/experimental/c/c_api_types.h | 21 +- 5 files changed, 334 insertions(+), 65 deletions(-) diff --git a/tensorflow/lite/c/common.h b/tensorflow/lite/c/common.h index 3398d178561..7ef173c78d2 100644 --- a/tensorflow/lite/c/common.h +++ b/tensorflow/lite/c/common.h @@ -861,7 +861,26 @@ typedef enum TfLiteDelegateFlags { // // If the delegate isn't capable to handle dynamic tensors, this flag need // to be set to false. - kTfLiteDelegateFlagsAllowDynamicTensors = 1 + kTfLiteDelegateFlagsAllowDynamicTensors = 1, + + // This flag can be used by delegates (that allow dynamic tensors) to ensure + // applicable tensor shapes are automatically propagated in the case of tensor + // resizing. + // This means that non-dynamic (allocation_type != kTfLiteDynamic) I/O tensors + // of a delegate kernel will have correct shapes before its Prepare() method + // is called. The runtime leverages TFLite builtin ops in the original + // execution plan to propagate shapes. + // + // A few points to note: + // 1. This requires kTfLiteDelegateFlagsAllowDynamicTensors. If that flag is + // false, this one is redundant since the delegate kernels are re-initialized + // every time tensors are resized. + // 2. Enabling this flag adds some overhead to AllocateTensors(), since extra + // work is required to prepare the original execution plan. + // 3. This flag requires that the original execution plan only have ops with + // valid registrations (and not 'dummy' custom ops like with Flex). + // WARNING: This feature is experimental and subject to change. + kTfLiteDelegateFlagsRequirePropagatedShapes = 2 } TfLiteDelegateFlags; // WARNING: This is an experimental interface that is subject to change. diff --git a/tensorflow/lite/core/subgraph.cc b/tensorflow/lite/core/subgraph.cc index b087ae1901c..beedbe6c5ea 100644 --- a/tensorflow/lite/core/subgraph.cc +++ b/tensorflow/lite/core/subgraph.cc @@ -637,6 +637,7 @@ TfLiteStatus Subgraph::AllocateTensors() { next_execution_plan_index_to_prepare_ = 0; next_execution_plan_index_to_plan_allocation_ = 0; + next_original_execution_plan_index_to_prepare_ = 0; if (memory_planner_) { TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocations()); } @@ -829,13 +830,14 @@ TfLiteStatus Subgraph::OpPrepare(const TfLiteRegistration& op_reg, } TfLiteStatus Subgraph::PrepareOpsStartingAt( - int first_execution_plan_index, int* last_execution_plan_index_prepared) { + int first_execution_plan_index, const std::vector& execution_plan, + int* last_execution_plan_index_prepared) { if (first_execution_plan_index == 0) { has_dynamic_tensors_ = false; } for (int execution_plan_index = first_execution_plan_index; - execution_plan_index < execution_plan_.size(); execution_plan_index++) { - int node_index = execution_plan_[execution_plan_index]; + execution_plan_index < execution_plan.size(); execution_plan_index++) { + int node_index = execution_plan[execution_plan_index]; TfLiteNode& node = nodes_and_registration_[node_index].first; const TfLiteRegistration& registration = nodes_and_registration_[node_index].second; @@ -867,10 +869,33 @@ TfLiteStatus Subgraph::PrepareOpsAndTensors() { memory_planner_->PlanAllocations(); } - int last_exec_plan_index_prepared = 0; + // Prepare original execution plan if any applied delegate wants it. + // If any of the delegates is immutable, this won't be triggered + // post-delegation (since we undo/redo delegation). For all other cases, other + // delegates that do shape propagation themselves would still be able to. + bool prepare_original_plan = false; + if (!pre_delegation_execution_plan_.empty()) { + for (int i = 0; i < delegates_applied_.size(); ++i) { + if ((delegates_applied_[i]->flags & + kTfLiteDelegateFlagsRequirePropagatedShapes)) { + prepare_original_plan = true; + break; + } + } + } + if (prepare_original_plan) { + int last_original_exec_plan_index_prepared = 0; + TF_LITE_ENSURE_STATUS(PrepareOpsStartingAt( + next_execution_plan_index_to_prepare_, pre_delegation_execution_plan_, + &last_original_exec_plan_index_prepared)); + next_original_execution_plan_index_to_prepare_ = + last_original_exec_plan_index_prepared + 1; + } - TF_LITE_ENSURE_STATUS(PrepareOpsStartingAt( - next_execution_plan_index_to_prepare_, &last_exec_plan_index_prepared)); + int last_exec_plan_index_prepared = 0; + TF_LITE_ENSURE_STATUS( + PrepareOpsStartingAt(next_execution_plan_index_to_prepare_, + execution_plan_, &last_exec_plan_index_prepared)); next_execution_plan_index_to_prepare_ = last_exec_plan_index_prepared + 1; TF_LITE_ENSURE_STATUS(memory_planner_->ExecuteAllocations( @@ -1366,8 +1391,9 @@ TfLiteStatus Subgraph::ModifyGraphWithDelegate(TfLiteDelegate* delegate) { if (!(delegate->flags & kTfLiteDelegateFlagsAllowDynamicTensors)) { int last_execution_plan_index_prepared; - TF_LITE_ENSURE_OK(&context_, PrepareOpsStartingAt( - 0, &last_execution_plan_index_prepared)); + TF_LITE_ENSURE_OK( + &context_, PrepareOpsStartingAt(0, execution_plan_, + &last_execution_plan_index_prepared)); if (has_dynamic_tensors_) { // Make sure that we are in a defined ready state before returning. // Plan and allocate tensors before returning. diff --git a/tensorflow/lite/core/subgraph.h b/tensorflow/lite/core/subgraph.h index bee13c9073e..5058273667a 100644 --- a/tensorflow/lite/core/subgraph.h +++ b/tensorflow/lite/core/subgraph.h @@ -419,6 +419,7 @@ class Subgraph { // 'last_node_prepared' with the id of the op containing dynamic tensors, or // the last in the graph. TfLiteStatus PrepareOpsStartingAt(int first_execution_plan_index, + const std::vector& execution_plan, int* last_execution_plan_index_prepared); // Tensors needed by the interpreter. Use `AddTensors` to add more blank @@ -635,6 +636,11 @@ class Subgraph { // NOTE: this relies on the order of nodes that is in topological order. int next_execution_plan_index_to_prepare_; + // Only used in cases where a delegate supporting dynamic tensors is applied. + // This helps prepare the original execution before the post-delegation one, + // so that tensor shapes propagate. + int next_original_execution_plan_index_to_prepare_; + // This is similar to `next_execution_plan_index_to_prepare_`, but it tracks // which nodes' allocation is planned with the arena planner. // diff --git a/tensorflow/lite/delegates/delegate_test.cc b/tensorflow/lite/delegates/delegate_test.cc index 1efe6e44d54..aed4400ed99 100644 --- a/tensorflow/lite/delegates/delegate_test.cc +++ b/tensorflow/lite/delegates/delegate_test.cc @@ -127,15 +127,19 @@ class TestDelegate : public ::testing::Test { // min_ops_per_subset: If >0, partitioning preview is used to choose only // those subsets with min_ops_per_subset number of nodes. // fail_node_invoke: To simulate failure of Delegate node's Invoke(). - explicit SimpleDelegate( - const std::vector& nodes, - TfLiteDelegateFlags delegate_flags = kTfLiteDelegateFlagsNone, - bool fail_node_prepare = false, int min_ops_per_subset = 0, - bool fail_node_invoke = false) + // automatic_shape_propagation: This assumes that the runtime will propagate + // shapes using the original execution plan. + explicit SimpleDelegate(const std::vector& nodes, + int64_t delegate_flags = kTfLiteDelegateFlagsNone, + bool fail_node_prepare = false, + int min_ops_per_subset = 0, + bool fail_node_invoke = false, + bool automatic_shape_propagation = false) : nodes_(nodes), fail_delegate_node_prepare_(fail_node_prepare), min_ops_per_subset_(min_ops_per_subset), - fail_delegate_node_invoke_(fail_node_invoke) { + fail_delegate_node_invoke_(fail_node_invoke), + automatic_shape_propagation_(automatic_shape_propagation) { delegate_.Prepare = [](TfLiteContext* context, TfLiteDelegate* delegate) -> TfLiteStatus { auto* simple = static_cast(delegate->data_); @@ -242,60 +246,80 @@ class TestDelegate : public ::testing::Test { TfLiteRegistration reg = {nullptr}; reg.custom_name = "fake_fused_op"; - reg.invoke = [](TfLiteContext* context, - TfLiteNode* node) -> TfLiteStatus { - // Copy input data to output data. - const TfLiteTensor* a0; - const TfLiteTensor* a1; - if (node->inputs->size == 2) { - a0 = GetInput(context, node, 0); - a1 = GetInput(context, node, 1); - } else { - a0 = GetInput(context, node, 0); - a1 = a0; - } - TfLiteTensor* out = GetOutput(context, node, 0); - int num = 1; - for (int i = 0; i < a0->dims->size; ++i) { - num *= a0->dims->data[i]; - } - for (int i = 0; i < num; i++) { - out->data.f[i] = a0->data.f[i] + a1->data.f[i]; - } - if (out->buffer_handle != kTfLiteNullBufferHandle) { - // Make the data stale so that CopyFromBufferHandle can be invoked - out->data_is_stale = true; - } - return kTfLiteOk; - }; + // Different flavors of the delegate kernel's Invoke(), dependent on + // testing parameters. if (fail_delegate_node_invoke_) { reg.invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus { return kTfLiteError; }; + } else { + reg.invoke = [](TfLiteContext* context, + TfLiteNode* node) -> TfLiteStatus { + // Copy input data to output data. + const TfLiteTensor* a0; + const TfLiteTensor* a1; + if (node->inputs->size == 2) { + a0 = GetInput(context, node, 0); + a1 = GetInput(context, node, 1); + } else { + a0 = GetInput(context, node, 0); + a1 = a0; + } + TfLiteTensor* out = GetOutput(context, node, 0); + int num = 1; + for (int i = 0; i < a0->dims->size; ++i) { + num *= a0->dims->data[i]; + } + for (int i = 0; i < num; i++) { + out->data.f[i] = a0->data.f[i] + a1->data.f[i]; + } + if (out->buffer_handle != kTfLiteNullBufferHandle) { + // Make the data stale so that CopyFromBufferHandle can be invoked + out->data_is_stale = true; + } + return kTfLiteOk; + }; } - reg.prepare = [](TfLiteContext* context, TfLiteNode* node) { - // Set output size to input size - const TfLiteTensor* input1; - const TfLiteTensor* input2; - if (node->inputs->size == 2) { - input1 = GetInput(context, node, 0); - input2 = GetInput(context, node, 1); - } else { - input1 = GetInput(context, node, 0); - input2 = input1; - } - TfLiteTensor* output = GetOutput(context, node, 0); - - TF_LITE_ENSURE_STATUS(context->ResizeTensor( - context, output, TfLiteIntArrayCopy(input1->dims))); - return kTfLiteOk; - }; - if (fail_delegate_node_prepare_) { + // Different flavors of the delegate kernel's Prepare(), dependent on + // testing parameters. + if (automatic_shape_propagation_) { + reg.prepare = [](TfLiteContext* context, TfLiteNode* node) { + // Shapes should already by propagated by the runtime, just need to + // check. + const TfLiteTensor* input1 = GetInput(context, node, 0); + TfLiteTensor* output = GetOutput(context, node, 0); + const int input_dims_size = input1->dims->size; + TF_LITE_ENSURE(context, output->dims->size == input_dims_size); + for (int i = 0; i < input_dims_size; ++i) { + TF_LITE_ENSURE(context, + output->dims->data[i] == input1->dims->data[i]); + } + return kTfLiteOk; + }; + } else if (fail_delegate_node_prepare_) { reg.prepare = [](TfLiteContext* context, TfLiteNode* node) { return kTfLiteError; }; + } else { + reg.prepare = [](TfLiteContext* context, TfLiteNode* node) { + // Set output size to input size + const TfLiteTensor* input1; + const TfLiteTensor* input2; + if (node->inputs->size == 2) { + input1 = GetInput(context, node, 0); + input2 = GetInput(context, node, 1); + } else { + input1 = GetInput(context, node, 0); + input2 = input1; + } + TfLiteTensor* output = GetOutput(context, node, 0); + + TF_LITE_ENSURE_STATUS(context->ResizeTensor( + context, output, TfLiteIntArrayCopy(input1->dims))); + return kTfLiteOk; + }; } return reg; @@ -311,6 +335,7 @@ class TestDelegate : public ::testing::Test { bool fail_delegate_node_prepare_ = false; int min_ops_per_subset_ = 0; bool fail_delegate_node_invoke_ = false; + bool automatic_shape_propagation_ = false; }; std::unique_ptr interpreter_; @@ -744,6 +769,129 @@ TEST_F(TestDelegate, TestResizeInputWithMultipleDelegates) { } } +// If a delegate sets kTfLiteDelegateFlagsRequirePropagatedShapes but not +// kTfLiteDelegateFlagsAllowDynamicTensors, the former is redundant. +TEST_F(TestDelegate, TestRequirePropagatedShapes_NonDynamicDelegate) { + delegate_ = std::unique_ptr(new SimpleDelegate( + {0, 1, 2}, kTfLiteDelegateFlagsRequirePropagatedShapes)); + ASSERT_EQ( + interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()), + kTfLiteOk); + + ASSERT_EQ(interpreter_->ResizeInputTensor(0, {1, 4}), kTfLiteOk); + ASSERT_EQ(interpreter_->ResizeInputTensor(1, {1, 4}), kTfLiteOk); + // Resizing should revert execution plan to original state. + ASSERT_EQ(interpreter_->execution_plan().size(), 3); + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); + ASSERT_EQ(interpreter_->execution_plan().size(), 1); + + std::vector input = {1.0f, 2.0f, 3.0f, 4.0f}; + std::vector expected_output = {2.0f, 4.0f, 6.0f, 8.0f}; + constexpr int kOutputTensorIndex = 3; + TfLiteTensor* tensor = interpreter_->tensor(kOutputTensorIndex); + + memcpy(interpreter_->typed_tensor(0), input.data(), 4 * sizeof(float)); + memcpy(interpreter_->typed_tensor(1), input.data(), 4 * sizeof(float)); + interpreter_->Invoke(); + for (int i = 0; i < 4; ++i) { + EXPECT_EQ(tensor->data.f[i], expected_output[i]) << i; + } +} + +TEST_F(TestDelegate, TestRequirePropagatedShapes_DynamicDelegateWithFlag) { + // Delegate sets both flags and in its Prepare, ensures that shapes have been + // propagated by runtime. + int delegate_flags = kTfLiteDelegateFlagsAllowDynamicTensors | + kTfLiteDelegateFlagsRequirePropagatedShapes; + delegate_ = std::unique_ptr(new SimpleDelegate( + {0, 1, 2}, delegate_flags, false /**fail_node_prepare**/, + 3 /**min_ops_per_subset**/, false /**fail_node_invoke**/, + true /**automatic_shape_propagation**/)); + ASSERT_EQ( + interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()), + kTfLiteOk); + + ASSERT_EQ(interpreter_->ResizeInputTensor(0, {1, 4}), kTfLiteOk); + ASSERT_EQ(interpreter_->ResizeInputTensor(1, {1, 4}), kTfLiteOk); + ASSERT_EQ(interpreter_->execution_plan().size(), 1); + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); + ASSERT_EQ(interpreter_->execution_plan().size(), 1); + + std::vector input = {1.0f, 2.0f, 3.0f, 4.0f}; + std::vector expected_output = {2.0f, 4.0f, 6.0f, 8.0f}; + constexpr int kOutputTensorIndex = 3; + TfLiteTensor* tensor = interpreter_->tensor(kOutputTensorIndex); + + memcpy(interpreter_->typed_tensor(0), input.data(), 4 * sizeof(float)); + memcpy(interpreter_->typed_tensor(1), input.data(), 4 * sizeof(float)); + interpreter_->Invoke(); + for (int i = 0; i < 4; ++i) { + EXPECT_EQ(tensor->data.f[i], expected_output[i]) << i; + } +} + +// If the delegate implementation expects shapes to be automatically propagated +// but does not set the required flag, its Prepare should fail. +TEST_F(TestDelegate, TestRequirePropagatedShapes_DynamicDelegateWithoutFlag) { + // Delegate sets both flags and in its Prepare, ensures that shapes have been + // propagated by runtime. + int delegate_flags = kTfLiteDelegateFlagsAllowDynamicTensors; + delegate_ = std::unique_ptr(new SimpleDelegate( + {0, 1, 2}, delegate_flags, false /**fail_node_prepare**/, + 3 /**min_ops_per_subset**/, false /**fail_node_invoke**/, + true /**automatic_shape_propagation**/)); + ASSERT_EQ( + interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()), + kTfLiteOk); + + ASSERT_EQ(interpreter_->ResizeInputTensor(0, {1, 4}), kTfLiteOk); + ASSERT_EQ(interpreter_->ResizeInputTensor(1, {1, 4}), kTfLiteOk); + ASSERT_EQ(interpreter_->execution_plan().size(), 1); + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteError); +} + +TEST_F(TestDelegate, TestRequirePropagatedShapes_MultipleDelegates) { + // First delegate needs to support dynamic tensors to allow second delegation. + // This delegate does not require automatic propagation. + delegate_ = std::unique_ptr(new SimpleDelegate( + {0}, kTfLiteDelegateFlagsAllowDynamicTensors, + false /**fail_node_prepare**/, 1 /**min_ops_per_subset**/, + false /**fail_node_invoke**/, false /**automatic_shape_propagation**/)); + // Second delegate supports nodes 1 & 2, and requires automatic shape + // propagation. + int delegate_flags = kTfLiteDelegateFlagsAllowDynamicTensors | + kTfLiteDelegateFlagsRequirePropagatedShapes; + delegate2_ = std::unique_ptr(new SimpleDelegate( + {1, 2}, delegate_flags, false /**fail_node_prepare**/, + 1 /**min_ops_per_subset**/, false /**fail_node_invoke**/, + true /**automatic_shape_propagation**/)); + ASSERT_EQ( + interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()), + kTfLiteOk); + ASSERT_EQ( + interpreter_->ModifyGraphWithDelegate(delegate2_->get_tf_lite_delegate()), + kTfLiteOk); + // Should be two delegate nodes. + ASSERT_EQ(interpreter_->execution_plan().size(), 2); + + ASSERT_EQ(interpreter_->ResizeInputTensor(0, {1, 4}), kTfLiteOk); + ASSERT_EQ(interpreter_->ResizeInputTensor(1, {1, 4}), kTfLiteOk); + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); + ASSERT_EQ(interpreter_->execution_plan().size(), 2); + + std::vector input = {1.0f, 2.0f, 3.0f, 4.0f}; + std::vector expected_output = {2.0f, 4.0f, 6.0f, 8.0f}; + constexpr int kOutputTensorIndex = 2; + TfLiteTensor* tensor = interpreter_->tensor(kOutputTensorIndex); + + memcpy(interpreter_->typed_tensor(0), input.data(), 4 * sizeof(float)); + memcpy(interpreter_->typed_tensor(1), input.data(), 4 * sizeof(float)); + interpreter_->Invoke(); + for (int i = 0; i < 4; ++i) { + EXPECT_EQ(tensor->data.f[i], expected_output[i]) << i; + } +} + TEST_F(TestDelegate, TestFallbackWithMultipleDelegates) { // First delegate only supports node 0. // This delegate should support dynamic tensors, otherwise the second won't be @@ -959,16 +1107,18 @@ class TestDelegateWithDynamicTensors : public ::testing::Test { void SetUp() override { interpreter_.reset(new Interpreter); - interpreter_->AddTensors(2); + interpreter_->AddTensors(3); interpreter_->SetInputs({0}); - interpreter_->SetOutputs({1}); + interpreter_->SetOutputs({1, 2}); TfLiteQuantizationParams quant; interpreter_->SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {3}, quant); interpreter_->SetTensorParametersReadWrite(1, kTfLiteFloat32, "", {3}, quant); + interpreter_->SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {3}, + quant); TfLiteRegistration reg = DynamicCopyOpRegistration(); - interpreter_->AddNodeWithParameters({0}, {1}, nullptr, 0, nullptr, ®); + interpreter_->AddNodeWithParameters({0}, {1, 2}, nullptr, 0, nullptr, ®); delegate_.Prepare = [](TfLiteContext* context, TfLiteDelegate* delegate) -> TfLiteStatus { @@ -988,8 +1138,14 @@ class TestDelegateWithDynamicTensors : public ::testing::Test { TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr}; reg.prepare = [](TfLiteContext* context, TfLiteNode* node) { - TfLiteTensor* output = GetOutput(context, node, 0); - SetTensorToDynamic(output); + // Output 0 is dynamic + TfLiteTensor* output0 = GetOutput(context, node, 0); + SetTensorToDynamic(output0); + // Output 1 has the same shape as input. + const TfLiteTensor* input = GetInput(context, node, 0); + TfLiteTensor* output1 = GetOutput(context, node, 1); + TF_LITE_ENSURE_STATUS(context->ResizeTensor( + context, output1, TfLiteIntArrayCopy(input->dims))); return kTfLiteOk; }; @@ -1002,6 +1158,21 @@ class TestDelegateWithDynamicTensors : public ::testing::Test { static TfLiteRegistration DelegateRegistration() { TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr}; + + reg.prepare = [](TfLiteContext* context, TfLiteNode* node) { + // If tensors are resized, the runtime should propagate shapes + // automatically if correct flag is set. Ensure values are correct. + // Output 0 should be dynamic. + TfLiteTensor* output0 = GetOutput(context, node, 0); + TF_LITE_ENSURE(context, IsDynamicTensor(output0)); + // Output 1 has the same shape as input. + const TfLiteTensor* input = GetInput(context, node, 0); + TfLiteTensor* output1 = GetOutput(context, node, 1); + TF_LITE_ENSURE(context, input->dims->size == output1->dims->size); + TF_LITE_ENSURE(context, input->dims->data[0] == output1->dims->data[0]); + return kTfLiteOk; + }; + return reg; } @@ -1041,6 +1212,34 @@ TEST_F(TestDelegateWithDynamicTensors, ModifyGraphAfterAllocate) { ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); } +TEST_F(TestDelegateWithDynamicTensors, ShapePropagation_FlagSet) { + // Trigger allocation *before* delegate application. + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); + + delegate_.flags = kTfLiteDelegateFlagsAllowDynamicTensors | + kTfLiteDelegateFlagsRequirePropagatedShapes; + ASSERT_EQ(interpreter_->ModifyGraphWithDelegate(&delegate_), kTfLiteOk); + + // Allocation before & after resizing tensors should work. + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); + ASSERT_EQ(interpreter_->ResizeInputTensor(0, {4}), kTfLiteOk); + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); +} + +TEST_F(TestDelegateWithDynamicTensors, ShapePropagation_FlagNotSet) { + // Trigger allocation *before* delegate application. + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); + + delegate_.flags = kTfLiteDelegateFlagsAllowDynamicTensors; + ASSERT_EQ(interpreter_->ModifyGraphWithDelegate(&delegate_), kTfLiteOk); + + // Allocation after resizing tensors should NOT work, since runtime won't + // propagate shape - causing delegate kernel to fail. + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); + ASSERT_EQ(interpreter_->ResizeInputTensor(0, {4}), kTfLiteOk); + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteError); +} + } // namespace } // namespace tflite diff --git a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h index 3398d178561..7ef173c78d2 100644 --- a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h +++ b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h @@ -861,7 +861,26 @@ typedef enum TfLiteDelegateFlags { // // If the delegate isn't capable to handle dynamic tensors, this flag need // to be set to false. - kTfLiteDelegateFlagsAllowDynamicTensors = 1 + kTfLiteDelegateFlagsAllowDynamicTensors = 1, + + // This flag can be used by delegates (that allow dynamic tensors) to ensure + // applicable tensor shapes are automatically propagated in the case of tensor + // resizing. + // This means that non-dynamic (allocation_type != kTfLiteDynamic) I/O tensors + // of a delegate kernel will have correct shapes before its Prepare() method + // is called. The runtime leverages TFLite builtin ops in the original + // execution plan to propagate shapes. + // + // A few points to note: + // 1. This requires kTfLiteDelegateFlagsAllowDynamicTensors. If that flag is + // false, this one is redundant since the delegate kernels are re-initialized + // every time tensors are resized. + // 2. Enabling this flag adds some overhead to AllocateTensors(), since extra + // work is required to prepare the original execution plan. + // 3. This flag requires that the original execution plan only have ops with + // valid registrations (and not 'dummy' custom ops like with Flex). + // WARNING: This feature is experimental and subject to change. + kTfLiteDelegateFlagsRequirePropagatedShapes = 2 } TfLiteDelegateFlags; // WARNING: This is an experimental interface that is subject to change. From 82e073b3a891ddce4136dc106d5fe8f0cba75389 Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Wed, 5 Aug 2020 15:45:16 -0700 Subject: [PATCH 2203/2522] [XLA/GPU] Convert the sort emitter to use LHLO. PiperOrigin-RevId: 325117492 Change-Id: I9320d0ba23267d36020186dd823c3bb7dadd152e --- .../non_identity_layouts.hlotxt | 2 +- .../xla/transforms/mhlo_to_lhlo_with_xla.cc | 11 +- .../xla/transforms/mhlo_to_lhlo_with_xla.h | 3 +- tensorflow/compiler/xla/service/gpu/BUILD | 10 + .../compiler/xla/service/gpu/gpu_compiler.cc | 24 +- .../xla/service/gpu/hlo_to_ir_bindings.cc | 20 +- .../xla/service/gpu/hlo_to_ir_bindings.h | 4 + .../xla/service/gpu/ir_emitter_context.h | 7 +- .../xla/service/gpu/ir_emitter_unnested.cc | 405 ++++++++++---- .../xla/service/gpu/ir_emitter_unnested.h | 82 ++- .../compiler/xla/service/gpu/tests/BUILD | 29 + .../xla/service/gpu/tests/sorting.hlo | 504 +++++++++--------- .../xla/service/gpu/tests/sorting_test.cc | 71 +++ .../compiler/xla/service/llvm_ir/llvm_util.cc | 7 +- .../compiler/xla/service/llvm_ir/llvm_util.h | 2 +- 15 files changed, 784 insertions(+), 397 deletions(-) create mode 100644 tensorflow/compiler/xla/service/gpu/tests/sorting_test.cc diff --git a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/non_identity_layouts.hlotxt b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/non_identity_layouts.hlotxt index 3630d2d45e4..a83e36cff64 100644 --- a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/non_identity_layouts.hlotxt +++ b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/non_identity_layouts.hlotxt @@ -8,6 +8,6 @@ HloModule TestModule ENTRY TestComputation { x = f32[3, 2]{1,0} parameter(0) - // CHECK: "lmhlo.copy"(%{{.*}}, %{{.*}}) : (memref<3x2xf32>, memref<3x2xf32, #[[MAP]]>) -> () + // CHECK: "lmhlo.copy"(%{{.*}}, %{{.*}}) {name = "copy.1"} : (memref<3x2xf32>, memref<3x2xf32, #[[MAP]]>) -> () ROOT x.copy = f32[3, 2]{0,1} copy(x) } diff --git a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc index 832bad2dcc8..6ce91599fb1 100644 --- a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc +++ b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc @@ -34,7 +34,6 @@ limitations under the License. #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Pass/PassOptions.h" // from @llvm-project #include "mlir/Translation.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" #include "tensorflow/compiler/mlir/xla/hlo_function_importer.h" #include "tensorflow/compiler/mlir/xla/hlo_utils.h" #include "tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h" @@ -182,7 +181,10 @@ template StatusOr LhloDialectEmitter::CreateOpWithoutAttrs( HloInstruction* instr) { Location loc = getLocation(instr); - ArrayRef> attrs; + std::pair attrs[] = { + {Identifier::get("name", builder_.getContext()), + builder_.getStringAttr(instr->name())}, + }; ArrayRef rets{}; llvm::SmallVector operands; @@ -252,15 +254,14 @@ Status LhloDialectEmitter::DefaultAction(HloInstruction* instr) { return Status::OK(); } -StatusOr LhloDialectEmitter::EmitSortOp( - HloInstruction* instr) { +StatusOr LhloDialectEmitter::EmitSortOp(HloInstruction* instr) { TF_ASSIGN_OR_RETURN(auto sort, CreateOpWithoutAttrs(instr)); auto* sort_instr = ::xla::Cast<::xla::HloSortInstruction>(instr); sort.dimensionAttr(builder_.getI64IntegerAttr(sort_instr->sort_dimension())); sort.is_stableAttr(builder_.getBoolAttr(sort_instr->is_stable())); TF_RETURN_IF_ERROR(::xla::HloFunctionImporter::ImportAsRegion( *sort_instr->called_computations()[0], &sort.comparator(), &builder_)); - return sort.getOperation(); + return sort; } Status LhloDialectEmitter::HandleSort(HloInstruction* instr) { diff --git a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h index bdc977616b1..4000fa01970 100644 --- a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h +++ b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h @@ -19,6 +19,7 @@ limitations under the License. #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Module.h" // from @llvm-project #include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/hlo_module.h" @@ -41,7 +42,7 @@ class LhloDialectEmitter : public ::xla::DfsHloVisitorWithDefault { builder_(module.getContext()), i8_type_(builder_.getIntegerType(8)) {} - ::xla::StatusOr EmitSortOp(::xla::HloInstruction* instr); + ::xla::StatusOr EmitSortOp(::xla::HloInstruction* instr); private: template diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 8dfd73e9a6a..5305f3beec5 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -254,6 +254,11 @@ cc_library( ":target_util", ":thunk", ":thunk_emitter", + "//tensorflow/compiler/mlir/hlo:lhlo", + "//tensorflow/compiler/mlir/xla:hlo_utils", + "//tensorflow/compiler/mlir/xla:mhlo_to_lhlo_with_xla", + "//tensorflow/compiler/mlir/xla:mlir_hlo_to_hlo", + "//tensorflow/compiler/mlir/xla:type_to_shape", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", @@ -290,6 +295,8 @@ cc_library( "@com_google_absl//absl/types:span", "@llvm-project//llvm:Core", "@llvm-project//llvm:Support", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:StandardOps", ], ) @@ -1158,6 +1165,7 @@ cc_library( ":target_constants", ":tree_reduction_rewriter", ":variadic_op_splitter", + "//tensorflow/compiler/mlir/xla:mhlo_to_lhlo_with_xla", "//tensorflow/compiler/xla:protobuf_util", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", @@ -1214,6 +1222,8 @@ cc_library( "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@llvm-project//llvm:Core", + "@llvm-project//mlir:AllPassesAndDialectsNoRegistration", + "@llvm-project//mlir:IR", ], ) diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index f2d29b5d11f..b6f81e963bd 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -29,6 +29,8 @@ limitations under the License. #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" +#include "mlir/IR/Module.h" // from @llvm-project +#include "mlir/InitAllDialects.h" // from @llvm-project #include "tensorflow/compiler/xla/protobuf_util.h" #include "tensorflow/compiler/xla/service/algebraic_simplifier.h" #include "tensorflow/compiler/xla/service/all_reduce_combiner.h" @@ -509,15 +511,22 @@ static Status CompileModuleToLlvmIrImpl( DumpHloModuleIfEnabled(*hlo_module, **buffer_assignment, "after_optimizations"); + mlir::registerAllDialects(); + mlir::MLIRContext mlir_context; + IrEmitterContext ir_emitter_context( hlo_module, buffer_assignment->get(), platform_name, gpu_device_info, - cuda_compute_capability, profile_index_map, llvm_module->get()); + cuda_compute_capability, profile_index_map, &mlir_context, + llvm_module->get()); HloComputation* entry_computation = hlo_module->entry_computation(); - IrEmitterUnnested ir_emitter(hlo_module->config(), entry_computation, - &ir_emitter_context); - TF_RETURN_IF_ERROR(ir_emitter.EmitConstantGlobals()); + TF_ASSIGN_OR_RETURN( + auto ir_emitter, + IrEmitterUnnested::Create(hlo_module->config(), entry_computation, + &ir_emitter_context)); + + TF_RETURN_IF_ERROR(ir_emitter->EmitConstantGlobals()); { XLA_SCOPED_LOGGING_TIMER("GpuCompiler::RunBackend - IR emission"); @@ -526,9 +535,10 @@ static Status CompileModuleToLlvmIrImpl( ThunkSequence thunk_sequence; absl::Span order = hlo_schedule->ThunkLaunchOrder(); for (HloInstruction* instruction : order) { - TF_RETURN_IF_ERROR(instruction->Visit(&ir_emitter)); - TF_RETURN_IF_ERROR(ir_emitter.Postprocess(instruction)); - std::unique_ptr thunks = ir_emitter.ConsumeThunkSequence(); + TF_RETURN_IF_ERROR(instruction->Visit(ir_emitter.get())); + TF_RETURN_IF_ERROR(ir_emitter->Postprocess(instruction)); + std::unique_ptr thunks = + ir_emitter->ConsumeThunkSequence(); // The invariants between each input HloInstruction* and output Thunk* are // not all explicitly checked, but at least we can document them here: diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc index 5d38d1b727c..332db83b6ad 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc +++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc @@ -117,11 +117,11 @@ static bool HasMeaningfulName(llvm::Value* value) { return false; } -llvm::Value* HloToIrBindings::GetTypedIrValue(const HloInstruction& hlo, - ShapeIndexView shape_index, - llvm::Value* ir_value) { - llvm::Type* pointee_type = llvm_ir::ShapeToIrType( - ShapeUtil::GetSubshape(hlo.shape(), shape_index), module_); +llvm::Value* CastToTypedValue(const Shape& shape, llvm::Value* ir_value, + llvm::IRBuilder<>* b) { + llvm::Type* pointee_type = + llvm_ir::ShapeToIrType(shape, b->GetInsertBlock()->getModule()); + llvm::Type* dest_type = pointee_type->getPointerTo(); llvm::Value* typed_ir_value; @@ -129,9 +129,17 @@ llvm::Value* HloToIrBindings::GetTypedIrValue(const HloInstruction& hlo, typed_ir_value = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( llvm::cast(ir_value), dest_type); } else { - typed_ir_value = b_->CreatePointerBitCastOrAddrSpaceCast( + typed_ir_value = b->CreatePointerBitCastOrAddrSpaceCast( ir_value, pointee_type->getPointerTo()); } + return typed_ir_value; +} + +llvm::Value* HloToIrBindings::GetTypedIrValue(const HloInstruction& hlo, + ShapeIndexView shape_index, + llvm::Value* ir_value) { + auto typed_ir_value = CastToTypedValue( + ShapeUtil::GetSubshape(hlo.shape(), shape_index), ir_value, b_); if (!HasMeaningfulName(ir_value)) { ir_value->setName(llvm_ir::IrName(&hlo, "raw")); } diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h index 5eef6727801..3813ec6c949 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h +++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h @@ -116,6 +116,10 @@ class HloToIrBindings { llvm::Value* temp_buffer_base_ = nullptr; }; +// Converts `ir_value` with type i8* to a typed LLVM Value* based on `shape`. +llvm::Value* CastToTypedValue(const Shape& shape, llvm::Value* ir_value, + llvm::IRBuilder<>* b); + } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_context.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_context.h index 9c43f80dc60..7d5a8d032e6 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_context.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_context.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_IR_EMITTER_CONTEXT_H_ #include "llvm/IR/Module.h" +#include "mlir/IR/MLIRContext.h" // from @llvm-project #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/gpu/launch_dimensions.h" #include "tensorflow/compiler/xla/service/hlo_execution_profile.h" @@ -34,13 +35,15 @@ class IrEmitterContext { const HloModule* hlo_module, const BufferAssignment* buffer_assignment, std::string platform_name, GpuDeviceInfo gpu_device_info, absl::optional cuda_compute_capability, - const HloProfileIndexMap* profile_index_map, llvm::Module* llvm_module) + const HloProfileIndexMap* profile_index_map, + mlir::MLIRContext* mlir_context, llvm::Module* llvm_module) : hlo_module_(hlo_module), buffer_assignment_(buffer_assignment), platform_name_(std::move(platform_name)), gpu_device_info_(gpu_device_info), cuda_compute_capability_(cuda_compute_capability), profile_index_map_(profile_index_map), + mlir_context_(mlir_context), llvm_module_(llvm_module) {} // Disallow copy and assign. IrEmitterContext(const IrEmitterContext&) = delete; @@ -57,6 +60,7 @@ class IrEmitterContext { return cuda_compute_capability_; } const HloProfileIndexMap* profile_index_map() { return profile_index_map_; } + mlir::MLIRContext* mlir_context() { return mlir_context_; } llvm::Module* llvm_module() { return llvm_module_; } NameUniquer* name_uniquer() { return &name_uniquer_; } @@ -67,6 +71,7 @@ class IrEmitterContext { GpuDeviceInfo gpu_device_info_; absl::optional cuda_compute_capability_; const HloProfileIndexMap* profile_index_map_; + mlir::MLIRContext* mlir_context_; llvm::Module* llvm_module_; NameUniquer name_uniquer_; }; diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index 34cdfb4ecf0..5473143a8ac 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -36,6 +36,13 @@ limitations under the License. #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/Function.h" // from @llvm-project +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" +#include "tensorflow/compiler/mlir/xla/hlo_utils.h" +#include "tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h" +#include "tensorflow/compiler/mlir/xla/type_to_shape.h" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/service/buffer_assignment.h" @@ -143,13 +150,86 @@ void UpdateLaunchDimensions(const LaunchDimensions& launch_dims, Thunk* thunk, llvm::ConstantAsMetadata::get(threads_per_block_ir_value)})); } +const BufferAllocation* GetAllocation( + mlir::BlockArgument func_arg, const BufferAssignment& buffer_assignment) { + auto func_op = + mlir::cast(func_arg.getParentRegion()->getParentOp()); + int64 allocation_index = func_op + .getArgAttrOfType( + func_arg.getArgNumber(), "lmhlo.alloc") + .getValue() + .getSExtValue(); + return &buffer_assignment.GetAllocation(allocation_index); +} + +StatusOr GetAllocationSliceForMlir( + mlir::Value v, const BufferAssignment& buffer_assignment) { + int64 size = v.getType().cast().getSizeInBits() / 8; + + if (auto arg = v.dyn_cast()) { + return BufferAllocation::Slice(GetAllocation(arg, buffer_assignment), 0, + size); + } + + // We match two patterns here: + // * v = ViewOp(arg); + // * v = StaticMemRefCastOp(ViewOp(arg)); + if (mlir::Operation* op = v.getDefiningOp()) { + if (auto cast = mlir::dyn_cast(op)) { + mlir::Value source = cast.getViewSource(); + op = source.getDefiningOp(); + if (!op) { + return Unimplemented("StaticMemRefCastOp has to wrap an op"); + } + } + if (auto view = mlir::dyn_cast(op)) { + return BufferAllocation::Slice( + GetAllocation(view.source().cast(), + buffer_assignment), + mlir::cast(view.byte_shift().getDefiningOp()) + .value() + .cast() + .getValue() + .getSExtValue(), + size); + } + return Unimplemented("StaticMemRefCastOp has to wrap a ViewOp"); + } + + return Unimplemented( + "Operand has to be in the form of ViewOp(arg) or " + "StaticMemRefCastOp(ViewOp(arg))"); +} + +absl::string_view GetHloName(mlir::Operation* op) { + if (auto attr = op->getAttrOfType("name")) { + auto ref = attr.getValue(); + return absl::string_view(ref.data(), ref.size()); + } + return ""; +} + } // namespace IrEmitterUnnested::IrEmitterUnnested(const HloModuleConfig& hlo_module_config, const HloComputation* hlo_computation, IrEmitterContext* ir_emitter_context) : IrEmitter(hlo_module_config, ir_emitter_context, /*is_nested=*/false), - hlo_computation_(hlo_computation) {} + hlo_computation_(hlo_computation), + mlir_scratch_module_(mlir::ModuleOp::create( + mlir::Builder(ir_emitter_context->mlir_context()).getUnknownLoc())), + lhlo_scratch_emitter_(ir_emitter_context_->buffer_assignment(), + *hlo_computation, mlir_scratch_module_.get()) {} + +StatusOr> IrEmitterUnnested::Create( + const HloModuleConfig& hlo_module_config, + const HloComputation* hlo_computation, + IrEmitterContext* ir_emitter_context) { + auto emitter = std::unique_ptr(new IrEmitterUnnested( + hlo_module_config, hlo_computation, ir_emitter_context)); + TF_RETURN_IF_ERROR(emitter->lhlo_scratch_emitter_.Initialize()); + return std::move(emitter); +} Status IrEmitterUnnested::Postprocess(HloInstruction* hlo) { bindings_.UnbindAllLocalIrValues(); @@ -157,12 +237,11 @@ Status IrEmitterUnnested::Postprocess(HloInstruction* hlo) { } llvm::Function* IrEmitterUnnested::BuildKernelPrototype( - const HloInstruction& inst, - absl::Span args) { + absl::string_view name, absl::Span args) { // Compute the kernel name. The opcode string may contain "-" which cannot be // in a PTX function name, so sanitize the name before uniquifying it. string kernel_name = ir_emitter_context_->name_uniquer()->GetUniqueName( - llvm_ir::SanitizeFunctionName(inst.name())); + llvm_ir::SanitizeFunctionName(std::string(name))); // Create the kernel and add it to the module. llvm::Module* module = ir_emitter_context_->llvm_module(); @@ -358,7 +437,8 @@ Status IrEmitterUnnested::HandleDot(HloInstruction* dot) { } Status IrEmitterUnnested::HandleConditional(HloInstruction* conditional) { - AddThunkToThunkSequence(BuildConditionalThunk(conditional)); + TF_ASSIGN_OR_RETURN(auto thunk, BuildConditionalThunk(conditional)); + AddThunkToThunkSequence(std::move(thunk)); return Status::OK(); } @@ -1037,10 +1117,13 @@ Status IrEmitterUnnested::HandleWhile(HloInstruction* xla_while) { // Build ForThunk for conformant while loops, otherwise build WhileThunk. auto config = xla_while->backend_config(); if (config.ok() && config.ValueOrDie().has_known_trip_count()) { - AddThunkToThunkSequence( + TF_ASSIGN_OR_RETURN( + auto thunk, BuildForThunk(xla_while, config.ValueOrDie().known_trip_count().n())); + AddThunkToThunkSequence(std::move(thunk)); } else { - AddThunkToThunkSequence(BuildWhileThunk(xla_while)); + TF_ASSIGN_OR_RETURN(auto thunk, BuildWhileThunk(xla_while)); + AddThunkToThunkSequence(std::move(thunk)); } return Status::OK(); } @@ -1263,24 +1346,95 @@ Status IrEmitterUnnested::HandleSelect(HloInstruction* select) { return IrEmitter::HandleSelect(select); } +StatusOr +IrEmitterUnnested::GetOrCreateSubComputationFromRegion(mlir::Region* region) { + std::unique_ptr& module = scratch_nested_computations_[region]; + if (module == nullptr) { + xla::XlaComputation xla_computation; + TF_RETURN_IF_ERROR(ConvertRegionToComputation(region, &xla_computation)); + TF_ASSIGN_OR_RETURN(auto program_shape, xla_computation.GetProgramShape()); + TF_ASSIGN_OR_RETURN( + module, HloModule::CreateFromProto(xla_computation.proto(), + HloModuleConfig(program_shape))); + } + return module->entry_computation(); +} + Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { + MlirEmitterContext result; + + TF_ASSIGN_OR_RETURN(auto sort_op, lhlo_scratch_emitter_.EmitSortOp(sort)); + result.op = sort_op; + result.name = GetHloName(sort_op); + // The name in sort op has no semantics, and it's for debug only. If the name + // doesn't exist, we should use a namer (e.g. count-based). + // TODO(timshen): use a namer instead of relying on the HloInstruction names. + if (result.name.empty()) { + result.name = sort->name(); + } + const auto& buffer_assignment = ir_emitter_context_->buffer_assignment(); + auto& slice = result.extra_slice; + TF_ASSIGN_OR_RETURN(slice.buffer_slice, + buffer_assignment.GetUniqueSlice(sort, {})); + slice.written = true; + slice.shape = sort->shape(); + + result.thunk_info = GetThunkInfo(sort); + + return EmitMlirSort(result); +} + +Status IrEmitterUnnested::EmitMlirSort( + MlirEmitterContext mlir_emitter_context) { + const auto& buffer_assignment = ir_emitter_context_->buffer_assignment(); + auto sort_op = mlir::cast(mlir_emitter_context.op); + + int operand_count = sort_op.operands().size(); + std::vector operand_shapes(operand_count); + std::vector slices; + std::vector output_shapes(sort_op.output().size()); + + for (int i = 0; i < operand_count; i++) { + operand_shapes[i] = + TypeToShape(sort_op.operands()[i].getType().cast()); + } + + // Craft n + 1 slices, where the first n are output parameters, and the last + // is the on-device tuple storage. We don't need n operands because sorting + // kernels are always in-place. + for (int i = 0; i < operand_count; i++) { + output_shapes[i] = + TypeToShape(sort_op.output()[i].getType().cast()); + MlirBufferSlice slice; + TF_ASSIGN_OR_RETURN( + slice.buffer_slice, + GetAllocationSliceForMlir(sort_op.output()[i], buffer_assignment)); + slice.written = true; + slice.shape = operand_shapes[i]; + slices.push_back(slice); + } + slices.push_back(mlir_emitter_context.extra_slice); + std::vector> thunks; - Shape keys_shape = sort->operand(0)->shape(); - int64 dimension_to_sort = sort->dimensions(0); - for (int64 i = 0; i < sort->operand_count(); ++i) { - ShapeIndex shape_index = - sort->operand_count() > 1 ? ShapeIndex({i}) : ShapeIndex({}); + + Shape keys_shape = operand_shapes[0]; + int64 dimension_to_sort = sort_op.dimension().getSExtValue(); + for (int64 i = 0; i < operand_count; ++i) { // We assume that the layout of all involved operands and outputs is the // same. - TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual(keys_shape, - sort->operand(i)->shape())); - TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual( - keys_shape, ShapeUtil::GetSubshape(sort->shape(), shape_index))); + TF_RET_CHECK( + LayoutUtil::LayoutsInShapesEqual(keys_shape, operand_shapes[i])); + TF_RET_CHECK( + LayoutUtil::LayoutsInShapesEqual(keys_shape, output_shapes[i])); // If possible, we share buffers. If that is not possible, we need to copy // the values, because the emitter does the sorting in-place. - auto destination_buffer = GetAllocationSlice(*sort, shape_index); - auto source_address = GetAllocationSlice(*sort->operand(i)); + TF_ASSIGN_OR_RETURN( + auto destination_buffer, + GetAllocationSliceForMlir(sort_op.output()[i], buffer_assignment)); + TF_ASSIGN_OR_RETURN( + auto source_address, + GetAllocationSliceForMlir(sort_op.operands()[i], buffer_assignment)); if (destination_buffer != source_address) { // TODO(b/26783907): Figure out why we never seem to share buffers for // key/value sort. @@ -1288,7 +1442,7 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { Thunk::ThunkInfo(), /*source_address=*/source_address, /*destination_buffer=*/destination_buffer, - /*mem_size=*/ShapeUtil::ByteSizeOf(sort->operand(i)->shape()))); + /*mem_size=*/ShapeUtil::ByteSizeOf(operand_shapes[i]))); } } @@ -1357,10 +1511,10 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { // we have not enough threads, or not enough shared memory. Also it does not // give a speedup if the tile size is < 128. int64 total_shared_memory_needed = 0; - for (int64 i = 0; i < sort->operand_count(); ++i) { + for (int64 i = 0; i < operand_count; ++i) { total_shared_memory_needed += - kTileSize * ShapeUtil::ByteSizeOfPrimitiveType( - sort->operand(i)->shape().element_type()); + kTileSize * + ShapeUtil::ByteSizeOfPrimitiveType(operand_shapes[i].element_type()); } bool no_tiling = kTileSize < 128 || @@ -1372,30 +1526,31 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { uint64 num_blocks = CeilOfRatio(num_iterations, kThreadsPerBlock); LaunchDimensions tiled_launch_dimensions(num_blocks, kThreadsPerBlock); + std::vector ir_arrays; auto emit_kernel = [&](absl::Span xor_masks) { - thunks.push_back( - BuildKernelThunk(sort, /*implements_whole_instruction=*/false)); + thunks.push_back(BuildKernelThunkForMlir( + mlir_emitter_context.name, Thunk::ThunkInfo(), slices, &ir_arrays)); LaunchDimensions launch_dimensions = xor_masks.size() > 1 ? tiled_launch_dimensions : standard_launch_dimensions; UpdateLaunchDimensions(launch_dimensions, thunks.back().get(), ir_emitter_context_->llvm_module()); std::vector values_arrays; - values_arrays.reserve(sort->operand_count()); - for (int64 i = 0; i < sort->operand_count(); ++i) { - ShapeIndex shape_index = - sort->operand_count() > 1 ? ShapeIndex({i}) : ShapeIndex({}); - values_arrays.push_back(GetIrArray(*sort, *sort, shape_index)); + values_arrays.reserve(operand_count); + for (int64 i = 0; i < operand_count; ++i) { + values_arrays.push_back(ir_arrays[i]); } + TF_ASSIGN_OR_RETURN( + const HloComputation* comparator, + GetOrCreateSubComputationFromRegion(&sort_op.comparator())); return llvm_ir::EmitSortInPlace( - dimension_to_sort, values_arrays, IrName(sort), xor_masks, &b_, - launch_dimensions, + dimension_to_sort, values_arrays, IrName(mlir_emitter_context.name), + xor_masks, &b_, launch_dimensions, xor_masks.size() > 1 ? num_iterations_in_sort_dim : standard_num_iterations_in_sort_dim, kTileSize, [&](absl::Span operands, llvm::Value* output) { - return EmitCallToNestedComputation(*sort->to_apply(), operands, - output); + return EmitCallToNestedComputation(*comparator, operands, output); }); }; std::vector xor_masks; @@ -1423,13 +1578,14 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { } AddThunkToThunkSequence(absl::make_unique( - GetThunkInfo(sort), std::move(thunks))); - if (sort->operand_count() > 1) { + mlir_emitter_context.thunk_info, std::move(thunks))); + if (operand_count > 1) { // Emit the tuple as part of the last stage of sorting. // We are currently in the block sorted.in_bounds.after. b_.SetInsertPoint(b_.GetInsertBlock()->getTerminator()); - llvm_ir::EmitTuple(GetIrArray(*sort, *sort), - ConstructIrArrayForOutputs(*sort), &b_); + llvm_ir::EmitTuple( + ir_arrays[operand_count], + absl::MakeSpan(ir_arrays).subspan(0, ir_arrays.size() - 1), &b_); } return Status::OK(); } @@ -1567,24 +1723,6 @@ Status IrEmitterUnnested::HandleAfterAll(HloInstruction* after_all) { return Status::OK(); } -// Describes how to access a particular subshape for an HLO. For instance if -// `.hlo_index` is {1} and `.gte_index` is {3, 4} then buffer for `.instr` at -// ShapeIndex {1} (i.e. the buffer for the second tuple element of hlo) is found -// at `.buffer_slice`[3][4]. That is, `.slice` is a void***, which we -// dereference twice -- first at index 3, and then at index 4 -- to get the -// address of our buffer. -struct HloBufferSlice { - const HloInstruction* instr; - ShapeIndex hlo_index; - - // The root buffer to look at. - BufferAllocation::Slice buffer_slice; - - // Describes how to dereference starting at that buffer to get to the buffer - // in question. - ShapeIndex gte_index; -}; - // Figures out how to access the buffers for all subshapes of hlo's operands and // for hlo itself (i.e. all the buffers produced by HLO). // @@ -1693,22 +1831,22 @@ static std::vector GetHloBufferSlices( return result; } -std::unique_ptr IrEmitterUnnested::BuildKernelThunk( - const HloInstruction* inst, bool implements_whole_instruction) { - const BufferAssignment& buffer_assn = - ir_emitter_context_->buffer_assignment(); - - std::vector hlo_slices = - GetHloBufferSlices(inst, buffer_assn); +std::unique_ptr +IrEmitterUnnested::BuildKernelThunkFromBufferSlices( + absl::string_view name, Thunk::ThunkInfo thunk_info, + absl::Span slices, + std::function + bind_slice_to_ir_value) { + const auto& buffer_assn = ir_emitter_context_->buffer_assignment(); // Figure out which buffer allocations need to be passed as arguments to our - // kernel. This is simply all of the allocations referenced in hlo_slices, + // kernel. This is simply all of the allocations referenced in slices, // plus the XLA temp buffer (if we have it). We always include the temp // buffer because even if the kernel itself doesn't use it, a nested // subcomputation within the kernel (e.g. a kMap's computation) might. std::unordered_set buffers_needed; - for (const auto& hlo_buffer_slice : hlo_slices) { - buffers_needed.insert(hlo_buffer_slice.buffer_slice.allocation()); + for (auto* slice : slices) { + buffers_needed.insert(slice->buffer_slice.allocation()); } absl::optional temp_buffer; for (const BufferAllocation& alloc : buffer_assn.Allocations()) { @@ -1737,7 +1875,7 @@ std::unique_ptr IrEmitterUnnested::BuildKernelThunk( return a->index() < b->index(); }); - llvm::Function* kernel = BuildKernelPrototype(*inst, non_constant_buffers); + llvm::Function* kernel = BuildKernelPrototype(name, non_constant_buffers); // Build a map from a BufferAllocation to the corresponding argument in our // kernel. @@ -1771,24 +1909,19 @@ std::unique_ptr IrEmitterUnnested::BuildKernelThunk( // For each buffer our kernel might want to touch, bind it to a value derived // from our kernel args. - for (const auto& hlo_buffer_slice : hlo_slices) { - const HloInstruction* instr = hlo_buffer_slice.instr; - const ShapeIndex& index = hlo_buffer_slice.hlo_index; - const BufferAllocation::Slice& slice = hlo_buffer_slice.buffer_slice; - const ShapeIndex& gte_index = hlo_buffer_slice.gte_index; - - VLOG(3) << "Buffer for " << instr->ToString() << " at " << index.ToString() - << " is found in slice " << slice.ToString() << " at GTE index " - << gte_index.ToString(); + for (auto* slice : slices) { + const BufferAllocation::Slice& buffer_slice = slice->buffer_slice; + const ShapeIndex& gte_index = slice->gte_index; llvm::Value* loc; - if (slice.allocation()->is_constant()) { + if (buffer_slice.allocation()->is_constant()) { loc = ir_emitter_context_->llvm_module()->getGlobalVariable( - llvm_ir::ConstantBufferAllocationToGlobalName(*slice.allocation())); + llvm_ir::ConstantBufferAllocationToGlobalName( + *buffer_slice.allocation())); CHECK_NE(loc, nullptr); } else { - loc = InBoundsGEP(kernel_args.at(slice.allocation()), - {b_.getInt64(slice.offset())}); + loc = InBoundsGEP(kernel_args.at(buffer_slice.allocation()), + {b_.getInt64(buffer_slice.offset())}); } // If gte_index is nonempty, we have to dereference `loc` to get to the @@ -1800,7 +1933,7 @@ std::unique_ptr IrEmitterUnnested::BuildKernelThunk( loc = Load(InBoundsGEP(loc, {b_.getInt64(idx)})); } - bindings_.BindHloToIrValue(*instr, loc, index); + bind_slice_to_ir_value(slice, loc); } // Bind the temp buffer so that nested subcomputations can find it if they @@ -1812,9 +1945,66 @@ std::unique_ptr IrEmitterUnnested::BuildKernelThunk( llvm::ConstantPointerNull::get(b_.getInt8PtrTy())); } - return absl::make_unique( + return absl::make_unique(thunk_info, non_constant_buffers, + std::string(kernel->getName())); +} + +std::unique_ptr IrEmitterUnnested::BuildKernelThunk( + const HloInstruction* inst, bool implements_whole_instruction) { + std::vector hlo_slices = + GetHloBufferSlices(inst, ir_emitter_context_->buffer_assignment()); + + std::vector slice_ptrs; + slice_ptrs.reserve(hlo_slices.size()); + for (auto& slice : hlo_slices) { + slice_ptrs.push_back(&slice); + } + + return BuildKernelThunkFromBufferSlices( + inst->name(), implements_whole_instruction ? GetThunkInfo(inst) : Thunk::ThunkInfo(), - non_constant_buffers, std::string(kernel->getName())); + slice_ptrs, [this](const BufferSlice* slice, llvm::Value* value) { + const HloBufferSlice* hlo_buffer_slice = + static_cast(slice); + const HloInstruction* instr = hlo_buffer_slice->instr; + const ShapeIndex& index = hlo_buffer_slice->hlo_index; + VLOG(3) << "Buffer for " << instr->ToString() << " at " + << index.ToString() << " is found in slice " + << hlo_buffer_slice->buffer_slice.ToString() << " at GTE index " + << hlo_buffer_slice->gte_index.ToString(); + + bindings_.BindHloToIrValue(*instr, value, index); + }); +} + +std::unique_ptr IrEmitterUnnested::BuildKernelThunkForMlir( + absl::string_view name, Thunk::ThunkInfo thunk_info, + absl::Span slices, + std::vector* ir_arrays) { + absl::flat_hash_set buffers_written; + std::vector slice_ptrs; + slice_ptrs.reserve(slices.size()); + for (auto& slice : slices) { + slice_ptrs.push_back(&slice); + if (slice.written) { + buffers_written.insert(slice.buffer_slice); + } + } + + ir_arrays->clear(); + return BuildKernelThunkFromBufferSlices( + name, thunk_info, slice_ptrs, + [&](const BufferSlice* slice, llvm::Value* value) { + const auto& mlir_slice = static_cast(*slice); + + llvm_ir::IrArray ir_array( + CastToTypedValue(mlir_slice.shape, value, &b_), mlir_slice.shape); + if (!buffers_written.contains(slice->buffer_slice)) { + ir_array.MarkInvariantOverWholeProgram(&value->getContext()); + } + + ir_arrays->push_back(ir_array); + }); } StatusOr> IrEmitterUnnested::BuildInitializerThunk( @@ -2021,7 +2211,7 @@ Status CheckConditionalBuffersShareAllocation( } // namespace -std::unique_ptr IrEmitterUnnested::BuildWhileThunk( +StatusOr> IrEmitterUnnested::BuildWhileThunk( const HloInstruction* hlo) { // Check that all while-related buffers share an allocation. TF_CHECK_OK(CheckWhileBuffersShareAllocation( @@ -2029,24 +2219,26 @@ std::unique_ptr IrEmitterUnnested::BuildWhileThunk( // Generate thunk sequence for while 'condition'. HloComputation* condition = hlo->while_condition(); - IrEmitterUnnested ir_emitter_condition(hlo_module_config_, condition, - ir_emitter_context_); - TF_CHECK_OK(condition->Accept(&ir_emitter_condition)); + TF_ASSIGN_OR_RETURN(auto ir_emitter_condition, + IrEmitterUnnested::Create(hlo_module_config_, condition, + ir_emitter_context_)); + TF_RETURN_IF_ERROR(condition->Accept(ir_emitter_condition.get())); // Generate thunk sequence for while 'body'. HloComputation* body = hlo->while_body(); - IrEmitterUnnested ir_emitter_body(hlo_module_config_, body, - ir_emitter_context_); - TF_CHECK_OK(body->Accept(&ir_emitter_body)); + TF_ASSIGN_OR_RETURN( + auto ir_emitter_body, + IrEmitterUnnested::Create(hlo_module_config_, body, ir_emitter_context_)); + TF_RETURN_IF_ERROR(body->Accept(ir_emitter_body.get())); - return absl::make_unique( + return std::unique_ptr(new WhileThunk( GetThunkInfo(hlo), GetAllocationSlice(*condition->root_instruction()), // cond result - ir_emitter_condition.ConsumeThunkSequence(), - ir_emitter_body.ConsumeThunkSequence()); + ir_emitter_condition->ConsumeThunkSequence(), + ir_emitter_body->ConsumeThunkSequence())); } -std::unique_ptr IrEmitterUnnested::BuildForThunk( +StatusOr> IrEmitterUnnested::BuildForThunk( const HloInstruction* hlo, const int64 loop_limit) { // Check that all while-related buffers share an allocation. TF_CHECK_OK(CheckWhileBuffersShareAllocation( @@ -2054,15 +2246,16 @@ std::unique_ptr IrEmitterUnnested::BuildForThunk( // Generate thunk sequence for while 'body' (will be used a For loop body). HloComputation* body = hlo->while_body(); - IrEmitterUnnested ir_emitter_body(hlo_module_config_, body, - ir_emitter_context_); - TF_CHECK_OK(body->Accept(&ir_emitter_body)); + TF_ASSIGN_OR_RETURN( + auto ir_emitter_body, + IrEmitterUnnested::Create(hlo_module_config_, body, ir_emitter_context_)); + TF_RETURN_IF_ERROR(body->Accept(ir_emitter_body.get())); - return absl::make_unique(GetThunkInfo(hlo), loop_limit, - ir_emitter_body.ConsumeThunkSequence()); + return std::unique_ptr(new ForThunk( + GetThunkInfo(hlo), loop_limit, ir_emitter_body->ConsumeThunkSequence())); } -std::unique_ptr IrEmitterUnnested::BuildConditionalThunk( +StatusOr> IrEmitterUnnested::BuildConditionalThunk( const HloInstruction* hlo) { // Check that the buffers used in conditional are shared with the operands and // result appropriately. @@ -2074,15 +2267,17 @@ std::unique_ptr IrEmitterUnnested::BuildConditionalThunk( for (int j = 0; j < hlo->branch_count(); ++j) { branch_operands.emplace_back(GetAllocationSlice(*hlo->operand(j + 1))); HloComputation* branch_computation = hlo->branch_computation(j); - IrEmitterUnnested ir_emitter(hlo_module_config_, branch_computation, - ir_emitter_context_); - TF_CHECK_OK(branch_computation->Accept(&ir_emitter)); - branch_thunks.push_back(std::move(*ir_emitter.ConsumeThunkSequence())); + TF_ASSIGN_OR_RETURN( + auto ir_emitter, + IrEmitterUnnested::Create(hlo_module_config_, branch_computation, + ir_emitter_context_)); + TF_CHECK_OK(branch_computation->Accept(ir_emitter.get())); + branch_thunks.push_back(std::move(*ir_emitter->ConsumeThunkSequence())); } - return absl::make_unique( + return std::unique_ptr(new ConditionalThunk( GetThunkInfo(hlo), GetAllocationSlice(*hlo->operand(0)), branch_operands, - std::move(branch_thunks)); + std::move(branch_thunks))); } Status IrEmitterUnnested::EmitTargetElementLoopInThunk( diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h index 019fcdf21db..18cbd22815c 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_IR_EMITTER_UNNESTED_H_ #include "absl/container/inlined_vector.h" +#include "tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h" #include "tensorflow/compiler/xla/service/gpu/ir_emitter.h" #include "tensorflow/compiler/xla/service/gpu/kernel_mapping_scheme.h" #include "tensorflow/compiler/xla/service/gpu/sequential_thunk.h" @@ -28,6 +29,40 @@ limitations under the License. namespace xla { namespace gpu { +struct BufferSlice { + // The root buffer to look at. + BufferAllocation::Slice buffer_slice; + + // Describes how to dereference starting at that buffer to get to the buffer + // in question. + ShapeIndex gte_index; +}; + +// Describes how to access a particular subshape for an HLO. For instance if +// `.hlo_index` is {1} and `.gte_index` is {3, 4} then buffer for `.instr` at +// ShapeIndex {1} (i.e. the buffer for the second tuple element of hlo) is +// found at `.buffer_slice`[3][4]. That is, `.slice` is a void***, which we +// dereference twice -- first at index 3, and then at index 4 -- to get the +// address of our buffer. +struct HloBufferSlice : public BufferSlice { + const HloInstruction* instr; + ShapeIndex hlo_index; +}; + +struct MlirBufferSlice : public BufferSlice { + // The buffer is modified by the kernel. + bool written; + + Shape shape; +}; + +struct MlirEmitterContext { + mlir::Operation* op; + absl::string_view name; + Thunk::ThunkInfo thunk_info; + MlirBufferSlice extra_slice; +}; + // Emits LLVM IR for an "unnested computation". // // An unnested computation is an HloComputation which you run by executing one @@ -89,12 +124,14 @@ class IrEmitterUnnested : public IrEmitter, const string& loop_name, llvm::Value* tile_height, llvm::Value* tile_width, KernelSupportLibrary* ksl)>; - IrEmitterUnnested(const HloModuleConfig& hlo_module_config, - const HloComputation* hlo_computation, - IrEmitterContext* ir_emitter_context); IrEmitterUnnested(const IrEmitterUnnested&) = delete; IrEmitterUnnested& operator=(const IrEmitterUnnested&) = delete; + static StatusOr> Create( + const HloModuleConfig& hlo_module_config, + const HloComputation* hlo_computation, + IrEmitterContext* ir_emitter_context); + // Transfers the ownship of thunk_sequence_ out. std::unique_ptr ConsumeThunkSequence() { return std::make_unique(std::move(thunk_sequence_)); @@ -124,6 +161,7 @@ class IrEmitterUnnested : public IrEmitter, Status HandleScatter(HloInstruction* scatter) override; Status HandleSelect(HloInstruction* select) override; Status HandleSort(HloInstruction* sort) override; + Status EmitMlirSort(MlirEmitterContext context); Status HandleTriangularSolve(HloInstruction* hlo) override; Status HandleTupleSelect(HloInstruction* tuple_select) override; Status HandleAllReduce(HloInstruction* crs) override; @@ -148,6 +186,10 @@ class IrEmitterUnnested : public IrEmitter, Status Postprocess(HloInstruction* hlo) override; private: + IrEmitterUnnested(const HloModuleConfig& hlo_module_config, + const HloComputation* hlo_computation, + IrEmitterContext* ir_emitter_context); + // Add a owning Thunk object to the thunk sequence. void AddThunkToThunkSequence(std::unique_ptr thunk) override { thunk_sequence_.emplace_back(std::move(thunk)); @@ -264,8 +306,7 @@ class IrEmitterUnnested : public IrEmitter, // Builds the prototype of the IR kernel for `inst` and adds it to the module. // This kernel takes as arguments pointers to the given buffer allocations. llvm::Function* BuildKernelPrototype( - const HloInstruction& inst, - absl::Span args); + absl::string_view name, absl::Span args); // Helper for writing extra outputs from inside a reduce kernel. Status EmitExtraOutputsForReduce( @@ -490,6 +531,12 @@ class IrEmitterUnnested : public IrEmitter, HloComputation* reducer, llvm::Type* element_type, llvm::Value* partial_result_address); + std::unique_ptr BuildKernelThunkFromBufferSlices( + absl::string_view name, Thunk::ThunkInfo thunk_info, + absl::Span slices, + std::function + bind_slice_to_ir_value); + // Returns a KernelThunk that invokes the kernel emitted for `inst`. The // caller needs to make sure `inst` outlives the lifetime of the returned // Thunk object. 'implements_whole_instruction' specifies whether this @@ -498,6 +545,11 @@ class IrEmitterUnnested : public IrEmitter, std::unique_ptr BuildKernelThunk( const HloInstruction* inst, bool implements_whole_instruction); + std::unique_ptr BuildKernelThunkForMlir( + absl::string_view name, Thunk::ThunkInfo thunk_info, + absl::Span slices, + std::vector* ir_arrays); + // Returns a thunk that, given a reduce or select-and-scatter op, // initializes its memory to the appropriate initial value. StatusOr> BuildInitializerThunk( @@ -505,17 +557,18 @@ class IrEmitterUnnested : public IrEmitter, // Returns a WhileThunk that invokes thunk sequences for 'condition' and // 'body' sub-computations of while instruction 'hlo'. - std::unique_ptr BuildWhileThunk(const HloInstruction* hlo); + StatusOr> BuildWhileThunk(const HloInstruction* hlo); // Returns a ForThunk which executes 'loop_limit' invocations of a thunk // sequence from the 'body' sub-computation of the while instruction 'hlo'. - std::unique_ptr BuildForThunk(const HloInstruction* hlo, - const int64 loop_limit); + StatusOr> BuildForThunk(const HloInstruction* hlo, + const int64 loop_limit); // Returns a ConditionalThunk which executes the thunk sequence for the // 'branch_computation' corresponding to the predicate/branch_index of the // given conditional instruction. - std::unique_ptr BuildConditionalThunk(const HloInstruction* hlo); + StatusOr> BuildConditionalThunk( + const HloInstruction* hlo); // Emits current thread id with the given type. // @@ -545,6 +598,9 @@ class IrEmitterUnnested : public IrEmitter, absl::optional thread_id_filter = absl::nullopt, absl::optional block_id_filter = absl::nullopt); + StatusOr GetOrCreateSubComputationFromRegion( + mlir::Region* region); + // Returns the last generated thunk. Thunk* LastThunk() const { return thunk_sequence_.back().get(); } @@ -555,6 +611,14 @@ class IrEmitterUnnested : public IrEmitter, // The HloComputation that this IrEmitter emits code for. const HloComputation* hlo_computation_; + + mlir::OwningModuleRef mlir_scratch_module_; + + // This is for cache-purpose only. It has no significant semantics. + mlir::LhloDialectEmitter lhlo_scratch_emitter_; + + absl::flat_hash_map> + scratch_nested_computations_; }; } // namespace gpu diff --git a/tensorflow/compiler/xla/service/gpu/tests/BUILD b/tensorflow/compiler/xla/service/gpu/tests/BUILD index a2bddd2d0d7..809b277317f 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/BUILD +++ b/tensorflow/compiler/xla/service/gpu/tests/BUILD @@ -458,6 +458,35 @@ xla_test( ], ) +tf_cc_test( + name = "sorting_test", + srcs = [ + "sorting_test.cc", + ], + tags = tf_cuda_tests_tags() + [ + "no_rocm", + ], + deps = [ + ":gpu_codegen_test", + "//tensorflow/compiler/xla:debug_options_flags", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:xla_proto_cc", + "//tensorflow/compiler/xla/service:gpu_plugin", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_module_config", + "//tensorflow/compiler/xla/service:hlo_parser", + "//tensorflow/compiler/xla/service/gpu:gpu_executable", + "//tensorflow/compiler/xla/tests:filecheck", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:llvm_irgen_test_base", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/stream_executor/lib", + "@com_google_absl//absl/memory", + ], +) + tf_cc_binary( name = "hlo_to_llvm_ir", srcs = ["hlo_to_llvm_ir.cc"], diff --git a/tensorflow/compiler/xla/service/gpu/tests/sorting.hlo b/tensorflow/compiler/xla/service/gpu/tests/sorting.hlo index 272c9a25769..4d29a8df116 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/sorting.hlo +++ b/tensorflow/compiler/xla/service/gpu/tests/sorting.hlo @@ -8,162 +8,162 @@ compare { ROOT lt = pred[] compare(p.0.lhs, p.0.rhs), direction=LT } -// CHECK: define void @sort(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC1:%.*]]) +// CHECK: define void @sort(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]]) // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 -// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x [3 x float]]* -// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1]], i64 0 -// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 -// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP4]] to i64 +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP5]] to i64 +// CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP6]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 -// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = urem i64 [[TMP7]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP10]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 1 -// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], [[TMP8]] -// CHECK-NEXT: [[TMP10:%.*]] = icmp slt i64 [[TMP8]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] -// CHECK-NEXT: br i1 [[TMP11]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 2 +// CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 1 +// CHECK-NEXT: [[TMP13:%.*]] = icmp slt i64 [[TMP11]], [[TMP12]] +// CHECK-NEXT: [[TMP14:%.*]] = icmp slt i64 [[TMP12]], 3 +// CHECK-NEXT: [[TMP15:%.*]] = and i1 [[TMP13]], [[TMP14]] +// CHECK-NEXT: br i1 [[TMP15]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP8]] -// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: call void @compare(float* [[TMP12]], float* [[TMP13]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP14:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP14]], 0 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP12]] +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] +// CHECK-NEXT: call void @region_0_4(float* [[TMP16]], float* [[TMP17]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP18:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP18]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[TMP12]], align 4 -// CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[TMP13]], align 4 -// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP8]] -// CHECK-NEXT: store float [[TMP16]], float* [[TMP18]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = load float, float* [[TMP16]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[TMP17]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] +// CHECK-NEXT: store float [[TMP19]], float* [[TMP21]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP12]] +// CHECK-NEXT: store float [[TMP20]], float* [[TMP22]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] -// CHECK: define internal void @compare(float* dereferenceable(4) [[P_0_LHS_TYPED:%.*]], float* dereferenceable(4) [[P_0_RHS_TYPED:%.*]], i8* dereferenceable(1) [[OUTPUT_ARG:%.*]]) +// CHECK: define internal void @region_0_4(float* dereferenceable(4) [[P_0_LHS_TYPED:%.*]], float* dereferenceable(4) [[P_0_RHS_TYPED:%.*]], i8* dereferenceable(1) [[OUTPUT_ARG:%.*]]) // CHECK-NEXT: entry: -// CHECK-NEXT: [[LT_TYPED:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[P_0_LHS_TYPED]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[P_0_RHS_TYPED]], align 4 +// CHECK-NEXT: [[COMPARE_3_TYPED:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARG_0_1_TYPED:%.*]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARG_1_2_TYPED:%.*]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = fcmp olt float [[TMP0]], [[TMP1]] // CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i8 -// CHECK-NEXT: store i8 [[TMP3]], i8* [[LT_TYPED]], align 1 -// CHECK-NEXT: [[LOAD_RET_VALUE:%.*]] = load i8, i8* [[LT_TYPED]], align 1 -// CHECK-NEXT: store i8 [[LOAD_RET_VALUE]], i8* [[OUTPUT_ARG]], align 1 +// CHECK-NEXT: store i8 [[TMP3]], i8* [[COMPARE_3_TYPED]], align 1 +// CHECK-NEXT: [[LOAD_RET_VALUE:%.*]] = load i8, i8* [[COMPARE_3_TYPED]], align 1 +// CHECK-NEXT: store i8 [[LOAD_RET_VALUE]], i8* [[OUTPUT_ARG:%.*]], align 1 // CHECK-NEXT: ret void -// CHECK: define void @sort__1(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC1:%.*]]) { +// CHECK: define void @sort__1(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]]) { // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 -// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x [3 x float]]* -// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1]], i64 0 -// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 -// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP4]] to i64 +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP5]] to i64 +// CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP6]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 -// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = urem i64 [[TMP7]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP10]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP4]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = icmp slt i64 [[TMP4]], [[TMP7]] -// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP8]], [[TMP9]] -// CHECK-NEXT: br i1 [[TMP10]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP8]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = icmp slt i64 [[TMP8]], [[TMP11]] +// CHECK-NEXT: [[TMP13:%.*]] = icmp slt i64 [[TMP11]], 3 +// CHECK-NEXT: [[TMP14:%.*]] = and i1 [[TMP12]], [[TMP13]] +// CHECK-NEXT: br i1 [[TMP14]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP4]] -// CHECK-NEXT: call void @compare(float* [[TMP11]], float* [[TMP12]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP13]], 0 +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP8]] +// CHECK-NEXT: call void @region_0_4(float* [[TMP15]], float* [[TMP16]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP17:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP17]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[TMP11]], align 4 -// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[TMP12]], align 4 -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP4]] -// CHECK-NEXT: store float [[TMP14]], float* [[TMP16]], align 4 -// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = load float, float* [[TMP15]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = load float, float* [[TMP16]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP8]] +// CHECK-NEXT: store float [[TMP18]], float* [[TMP20]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] +// CHECK-NEXT: store float [[TMP19]], float* [[TMP21]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] -// CHECK: define void @sort__2(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC1:%.*]]) { +// CHECK: define void @sort__2(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]]) { // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 -// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x [3 x float]]* -// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 -// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 -// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP4]] to i64 +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP5]] to i64 +// CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP6]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 -// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = urem i64 [[TMP7]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP10]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 1 -// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], [[TMP8]] -// CHECK-NEXT: [[TMP10:%.*]] = icmp slt i64 [[TMP8]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] -// CHECK-NEXT: br i1 [[TMP11]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 2 +// CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 1 +// CHECK-NEXT: [[TMP13:%.*]] = icmp slt i64 [[TMP11]], [[TMP12]] +// CHECK-NEXT: [[TMP14:%.*]] = icmp slt i64 [[TMP12]], 3 +// CHECK-NEXT: [[TMP15:%.*]] = and i1 [[TMP13]], [[TMP14]] +// CHECK-NEXT: br i1 [[TMP15]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP8]] -// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: call void @compare(float* [[TMP12]], float* [[TMP13]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP14:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP14]], 0 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP12]] +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] +// CHECK-NEXT: call void @region_0_4(float* [[TMP16]], float* [[TMP17]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP18:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP18]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[TMP12]], align 4 -// CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[TMP13]], align 4 -// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP8]] -// CHECK-NEXT: store float [[TMP16]], float* [[TMP18]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = load float, float* [[TMP16]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[TMP17]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] +// CHECK-NEXT: store float [[TMP19]], float* [[TMP21]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP12]] +// CHECK-NEXT: store float [[TMP20]], float* [[TMP22]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] ENTRY main { x = f32[2, 3] parameter(0) @@ -182,210 +182,198 @@ compare { ROOT lt = pred[] compare(p.1.lhs, p.1.rhs), direction=LT } -// CHECK: define void @sort(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC2:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC3:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) +// CHECK: define void @sort(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4]], i64 0 -// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x i8*]* -// CHECK-NEXT: [[SORT_RAW1:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 -// CHECK-NEXT: [[SORT_TYPED2:%.*]] = bitcast i8* [[SORT_RAW1]] to [2 x [3 x i32]]* -// CHECK-NEXT: [[SORT_RAW3:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1]], i64 0 -// CHECK-NEXT: [[SORT_TYPED4:%.*]] = bitcast i8* [[SORT_RAW3]] to [2 x [3 x float]]* -// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC2]], i64 0 -// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x i32]]* -// CHECK-NEXT: [[Y_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC3]], i64 0 -// CHECK-NEXT: [[Y_TYPED:%.*]] = bitcast i8* [[Y_RAW]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 -// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 +// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-NEXT: [[TMP8:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP8]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 -// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP10:%.*]] = urem i64 [[TMP9]], 2 +// CHECK-NEXT: [[TMP11:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP12:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP12]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 1 -// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], [[TMP8]] -// CHECK-NEXT: [[TMP10:%.*]] = icmp slt i64 [[TMP8]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] -// CHECK-NEXT: br i1 [[TMP11]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP10]], 2 +// CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 1 +// CHECK-NEXT: [[TMP15:%.*]] = icmp slt i64 [[TMP13]], [[TMP14]] +// CHECK-NEXT: [[TMP16:%.*]] = icmp slt i64 [[TMP14]], 3 +// CHECK-NEXT: [[TMP17:%.*]] = and i1 [[TMP15]], [[TMP16]] +// CHECK-NEXT: br i1 [[TMP17]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP8]] -// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP8]] -// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: call void @compare(i32* [[TMP12]], i32* [[TMP13]], float* [[TMP14]], float* [[TMP15]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP16:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP16]], 0 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP14]] +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP13]] +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP14]] +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP13]] +// CHECK-NEXT: call void @region_0_6(i32* [[TMP18]], i32* [[TMP19]], float* [[TMP20]], float* [[TMP21]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP22:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP22]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP19]], align 4 -// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP8]] -// CHECK-NEXT: store i32 [[TMP18]], i32* [[TMP20]], align 4 -// CHECK-NEXT: [[TMP21:%.*]] = load float, float* [[TMP14]], align 4 -// CHECK-NEXT: [[TMP22:%.*]] = load float, float* [[TMP15]], align 4 -// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: store float [[TMP21]], float* [[TMP23]], align 4 -// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP8]] -// CHECK-NEXT: store float [[TMP22]], float* [[TMP24]], align 4 +// CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP13]] +// CHECK-NEXT: store i32 [[TMP23]], i32* [[TMP25]], align 4 +// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP14]] +// CHECK-NEXT: store i32 [[TMP24]], i32* [[TMP26]], align 4 +// CHECK-NEXT: [[TMP27:%.*]] = load float, float* [[TMP20]], align 4 +// CHECK-NEXT: [[TMP28:%.*]] = load float, float* [[TMP21]], align 4 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP13]] +// CHECK-NEXT: store float [[TMP27]], float* [[TMP29]], align 4 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP14]] +// CHECK-NEXT: store float [[TMP28]], float* [[TMP30]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] -// CHECK: define internal void @compare(i32* dereferenceable(4) [[P_0_LHS_TYPED:%.*]], i32* dereferenceable(4) [[P_0_RHS_TYPED:%.*]], float* dereferenceable(4) [[P_1_LHS_TYPED:%.*]], float* dereferenceable(4) [[P_1_RHS_TYPED:%.*]], i8* dereferenceable(1) [[OUTPUT_ARG:%.*]]) +// CHECK: define internal void @region_0_6(i32* dereferenceable(4) [[P_0_LHS_TYPED:%.*]], i32* dereferenceable(4) [[P_0_RHS_TYPED:%.*]], float* dereferenceable(4) [[P_1_LHS_TYPED:%.*]], float* dereferenceable(4) [[P_1_RHS_TYPED:%.*]], i8* dereferenceable(1) [[OUTPUT_ARG:%.*]]) // CHECK-NEXT: entry: -// CHECK-NEXT: [[LT_TYPED:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[P_1_LHS_TYPED]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[P_1_RHS_TYPED]], align 4 +// CHECK-NEXT: [[COMPARE_5_TYPED:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARG_2_3_TYPED:%.*]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARG_3_4_TYPED:%.*]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = fcmp olt float [[TMP0]], [[TMP1]] // CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i8 -// CHECK-NEXT: store i8 [[TMP3]], i8* [[LT_TYPED]], align 1 -// CHECK-NEXT: [[LOAD_RET_VALUE:%.*]] = load i8, i8* [[LT_TYPED]], align 1 -// CHECK-NEXT: store i8 [[LOAD_RET_VALUE]], i8* [[OUTPUT_ARG]], align 1 +// CHECK-NEXT: store i8 [[TMP3]], i8* [[COMPARE_5_TYPED]], align 1 +// CHECK-NEXT: [[LOAD_RET_VALUE:%.*]] = load i8, i8* [[COMPARE_5_TYPED]], align 1 +// CHECK-NEXT: store i8 [[LOAD_RET_VALUE]], i8* [[OUTPUT_ARG:%.*]], align 1 // CHECK-NEXT: ret void -// CHECK: define void @sort__1(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC2:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC3:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) +// CHECK: define void @sort__1(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 -// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x i8*]* -// CHECK-NEXT: [[SORT_RAW1:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 -// CHECK-NEXT: [[SORT_TYPED2:%.*]] = bitcast i8* [[SORT_RAW1]] to [2 x [3 x i32]]* -// CHECK-NEXT: [[SORT_RAW3:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 -// CHECK-NEXT: [[SORT_TYPED4:%.*]] = bitcast i8* [[SORT_RAW3]] to [2 x [3 x float]]* -// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC2:%.*]], i64 0 -// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x i32]]* -// CHECK-NEXT: [[Y_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC3:%.*]], i64 0 -// CHECK-NEXT: [[Y_TYPED:%.*]] = bitcast i8* [[Y_RAW]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 -// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 +// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-NEXT: [[TMP8:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP8]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 -// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP10:%.*]] = urem i64 [[TMP9]], 2 +// CHECK-NEXT: [[TMP11:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP12:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP12]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP4]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = icmp slt i64 [[TMP4]], [[TMP7]] -// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP8]], [[TMP9]] -// CHECK-NEXT: br i1 [[TMP10]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP10]], 3 +// CHECK-NEXT: [[TMP14:%.*]] = icmp slt i64 [[TMP10]], [[TMP13]] +// CHECK-NEXT: [[TMP15:%.*]] = icmp slt i64 [[TMP13]], 3 +// CHECK-NEXT: [[TMP16:%.*]] = and i1 [[TMP14]], [[TMP15]] +// CHECK-NEXT: br i1 [[TMP16]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP4]] -// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP4]] -// CHECK-NEXT: call void @compare(i32* [[TMP11]], i32* [[TMP12]], float* [[TMP13]], float* [[TMP14]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP15:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP15]], 0 +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP13]] +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP10]] +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP13]] +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP10]] +// CHECK-NEXT: call void @region_0_6(i32* [[TMP17]], i32* [[TMP18]], float* [[TMP19]], float* [[TMP20]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP21:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP21]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP4]] -// CHECK-NEXT: store i32 [[TMP16]], i32* [[TMP18]], align 4 -// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP19]], align 4 -// CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[TMP13]], align 4 -// CHECK-NEXT: [[TMP21:%.*]] = load float, float* [[TMP14]], align 4 -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP4]] -// CHECK-NEXT: store float [[TMP20]], float* [[TMP22]], align 4 -// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: store float [[TMP21]], float* [[TMP23]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP10]] +// CHECK-NEXT: store i32 [[TMP22]], i32* [[TMP24]], align 4 +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP13]] +// CHECK-NEXT: store i32 [[TMP23]], i32* [[TMP25]], align 4 +// CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[TMP19]], align 4 +// CHECK-NEXT: [[TMP27:%.*]] = load float, float* [[TMP20]], align 4 +// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP10]] +// CHECK-NEXT: store float [[TMP26]], float* [[TMP28]], align 4 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP13]] +// CHECK-NEXT: store float [[TMP27]], float* [[TMP29]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] -// CHECK: define void @sort__2(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC2:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC3:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) +// CHECK: define void @sort__2(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 -// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x i8*]* -// CHECK-NEXT: [[SORT_RAW1:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 -// CHECK-NEXT: [[SORT_TYPED2:%.*]] = bitcast i8* [[SORT_RAW1]] to [2 x [3 x i32]]* -// CHECK-NEXT: [[SORT_RAW3:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 -// CHECK-NEXT: [[SORT_TYPED4:%.*]] = bitcast i8* [[SORT_RAW3]] to [2 x [3 x float]]* -// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC2:%.*]], i64 0 -// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x i32]]* -// CHECK-NEXT: [[Y_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC3:%.*]], i64 0 -// CHECK-NEXT: [[Y_TYPED:%.*]] = bitcast i8* [[Y_RAW]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 -// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 +// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-NEXT: [[TMP8:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP8]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 -// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP10:%.*]] = urem i64 [[TMP9]], 2 +// CHECK-NEXT: [[TMP11:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP12:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP12]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: -// CHECK-NEXT: [[TMP7:%.*]] = bitcast [2 x [3 x i32]]* [[SORT_TYPED2]] to i8* -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[SORT_TYPED]], i64 0, i64 0 -// CHECK-NEXT: store i8* [[TMP7]], i8** [[TMP8]], align 8 -// CHECK-NEXT: [[TMP9:%.*]] = bitcast [2 x [3 x float]]* [[SORT_TYPED4]] to i8* -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[SORT_TYPED]], i64 0, i64 1 -// CHECK-NEXT: store i8* [[TMP9]], i8** [[TMP10]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x [3 x i32]]* [[TMP1]] to i8* +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK-NEXT: store i8* [[TMP13]], i8** [[TMP14]], align 8 +// CHECK-NEXT: [[TMP15:%.*]] = bitcast [2 x [3 x float]]* [[TMP3]] to i8* +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 +// CHECK-NEXT: store i8* [[TMP15]], i8** [[TMP16]], align 8 // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP4]], 2 -// CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 1 -// CHECK-NEXT: [[TMP13:%.*]] = icmp slt i64 [[TMP11]], [[TMP12]] -// CHECK-NEXT: [[TMP14:%.*]] = icmp slt i64 [[TMP12]], 3 -// CHECK-NEXT: [[TMP15:%.*]] = and i1 [[TMP13]], [[TMP14]] -// CHECK-NEXT: br i1 [[TMP15]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP10]], 2 +// CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP17]], 1 +// CHECK-NEXT: [[TMP19:%.*]] = icmp slt i64 [[TMP17]], [[TMP18]] +// CHECK-NEXT: [[TMP20:%.*]] = icmp slt i64 [[TMP18]], 3 +// CHECK-NEXT: [[TMP21:%.*]] = and i1 [[TMP19]], [[TMP20]] +// CHECK-NEXT: br i1 [[TMP21]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP12]] -// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP11]] -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP12]] -// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP11]] -// CHECK-NEXT: call void @compare(i32* [[TMP16]], i32* [[TMP17]], float* [[TMP18]], float* [[TMP19]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP20:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP20]], 0 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP18]] +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP17]] +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP18]] +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP17]] +// CHECK-NEXT: call void @region_0_6(i32* [[TMP22]], i32* [[TMP23]], float* [[TMP24]], float* [[TMP25]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP26:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP26]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP11]] -// CHECK-NEXT: store i32 [[TMP21]], i32* [[TMP23]], align 4 -// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP12]] -// CHECK-NEXT: store i32 [[TMP22]], i32* [[TMP24]], align 4 -// CHECK-NEXT: [[TMP25:%.*]] = load float, float* [[TMP18]], align 4 -// CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[TMP19]], align 4 -// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP11]] -// CHECK-NEXT: store float [[TMP25]], float* [[TMP27]], align 4 -// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP12]] -// CHECK-NEXT: store float [[TMP26]], float* [[TMP28]], align 4 +// CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP17]] +// CHECK-NEXT: store i32 [[TMP27]], i32* [[TMP29]], align 4 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP18]] +// CHECK-NEXT: store i32 [[TMP28]], i32* [[TMP30]], align 4 +// CHECK-NEXT: [[TMP31:%.*]] = load float, float* [[TMP24]], align 4 +// CHECK-NEXT: [[TMP32:%.*]] = load float, float* [[TMP25]], align 4 +// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP17]] +// CHECK-NEXT: store float [[TMP31]], float* [[TMP33]], align 4 +// CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP18]] +// CHECK-NEXT: store float [[TMP32]], float* [[TMP34]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] ENTRY main { x = s32[2, 3] parameter(0) diff --git a/tensorflow/compiler/xla/service/gpu/tests/sorting_test.cc b/tensorflow/compiler/xla/service/gpu/tests/sorting_test.cc new file mode 100644 index 00000000000..197a0c6cfeb --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/tests/sorting_test.cc @@ -0,0 +1,71 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/compiler/xla/service/gpu/gpu_executable.h" +#include "tensorflow/compiler/xla/service/gpu/tests/gpu_codegen_test.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module_config.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/tests/filecheck.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/xla.pb.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/stream_executor/lib/statusor.h" + +namespace xla { +namespace gpu { + +namespace { + +class SortingTest : public GpuCodegenTest { + protected: + HloModuleConfig ConfigWithoutLayoutAssignment() { + HloModuleConfig config; + auto debug_options = HloTestBase::GetDebugOptionsForTest(); + // Disable layout_assignment to use the preassigned layouts. + debug_options.add_xla_disable_hlo_passes("layout-assignment"); + config.set_debug_options(debug_options); + return config; + } +}; + +TEST_F(SortingTest, Regression1) { + const char* hlo_text = R"( +HloModule TestModule + +compare { + p.0.lhs = f32[] parameter(0) + p.0.rhs = f32[] parameter(1) + ROOT lt = pred[] compare(p.0.lhs, p.0.rhs), direction=LT +} + +ENTRY TestComputation { + x = f32[3, 2]{1, 0} parameter(0) + x.copy = f32[3, 2]{0, 1} copy(x) + ROOT sort = f32[3, 2]{0, 1} sort(x.copy), dimensions={1}, to_apply=compare +} + +)"; + + EXPECT_TRUE(RunAndCompareNoHloPasses(hlo_text, ErrorSpec{1e-5, 1e-5})); +} + +} // namespace +} // namespace gpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc index b01ae2efe43..2963d546380 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc @@ -415,9 +415,10 @@ llvm::Instruction* AddRangeMetadata(int64 lower, int64 upper, return inst; } -string IrName(string a) { - a.erase(std::remove(a.begin(), a.end(), '%'), a.end()); - return a; +string IrName(absl::string_view a) { + std::string s(a); + s.erase(std::remove(s.begin(), s.end(), '%'), s.end()); + return s; } string IrName(absl::string_view a, absl::string_view b) { diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h index 642965b6470..c0a55e4da33 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h @@ -87,7 +87,7 @@ string DumpModuleToString(const llvm::Module& module); // - joining all of the nonempty inputs by '.', and then // - removing all '%'s. // -string IrName(string a); +string IrName(absl::string_view a); string IrName(absl::string_view a, absl::string_view b); string IrName(const HloInstruction* a, absl::string_view b = ""); From 8de090d4b11d8a42083e42587a173efb7e7ff449 Mon Sep 17 00:00:00 2001 From: Robert David Date: Wed, 5 Aug 2020 15:56:16 -0700 Subject: [PATCH 2204/2522] Enable GPU delegate in benchmark_model on Linux (non-Android) when CL_DELEGATE_NO_GL is defined. PiperOrigin-RevId: 325119498 Change-Id: I8c8c6bdbc51a4c6fbf69ba0b4683371a07ccd4f5 --- tensorflow/lite/delegates/gpu/BUILD | 11 +++++++++++ tensorflow/lite/tools/delegates/BUILD | 4 +++- .../tools/delegates/gpu_delegate_provider.cc | 19 ++++++++++--------- tensorflow/lite/tools/evaluation/BUILD | 6 +++++- tensorflow/lite/tools/evaluation/utils.cc | 8 ++++---- tensorflow/lite/tools/evaluation/utils.h | 11 +++++++++-- 6 files changed, 42 insertions(+), 17 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/BUILD b/tensorflow/lite/delegates/gpu/BUILD index 0e40095f255..4113d34b3f8 100644 --- a/tensorflow/lite/delegates/gpu/BUILD +++ b/tensorflow/lite/delegates/gpu/BUILD @@ -1,3 +1,4 @@ +load("@bazel_skylib//lib:selects.bzl", "selects") load("//tensorflow/lite:special_rules.bzl", "tflite_extra_gles_deps") load("@build_bazel_rules_apple//apple:ios.bzl", "ios_static_framework") load("@build_bazel_rules_apple//apple:macos.bzl", "macos_dylib") @@ -219,6 +220,16 @@ cc_library( ], ) +# Currently the GPU delegate needs to be built on Android (due to EGL dependency), +# or built with -DCL_DELEGATE_NO_GL (disabling OpenGL backend fallback), or both. +selects.config_setting_group( + name = "supports_gpu_delegate", + match_any = [ + "//tensorflow:android", + "//tensorflow/lite/delegates/gpu/cl:opencl_delegate_no_gl", + ], +) + cc_library( name = "delegate", srcs = ["delegate.cc"], diff --git a/tensorflow/lite/tools/delegates/BUILD b/tensorflow/lite/tools/delegates/BUILD index a8b1485d14b..fca64467bdf 100644 --- a/tensorflow/lite/tools/delegates/BUILD +++ b/tensorflow/lite/tools/delegates/BUILD @@ -64,9 +64,11 @@ cc_library( ":delegate_provider_hdr", "//tensorflow/lite/tools/evaluation:utils", ] + select({ - "//tensorflow:android": [ + "//tensorflow/lite/delegates/gpu:supports_gpu_delegate": [ "//tensorflow/lite/delegates/gpu:delegate", ], + "//conditions:default": [], + }) + select({ "//tensorflow:ios": [ "//tensorflow/lite/delegates/gpu:metal_delegate", ], diff --git a/tensorflow/lite/tools/delegates/gpu_delegate_provider.cc b/tensorflow/lite/tools/delegates/gpu_delegate_provider.cc index 32e5e1b117f..de7ace18cf1 100644 --- a/tensorflow/lite/tools/delegates/gpu_delegate_provider.cc +++ b/tensorflow/lite/tools/delegates/gpu_delegate_provider.cc @@ -16,7 +16,7 @@ limitations under the License. #include "tensorflow/lite/tools/delegates/delegate_provider.h" #include "tensorflow/lite/tools/evaluation/utils.h" -#if defined(__ANDROID__) +#if TFLITE_SUPPORTS_GPU_DELEGATE #include "tensorflow/lite/delegates/gpu/delegate.h" #elif defined(__APPLE__) #include "TargetConditionals.h" @@ -34,13 +34,13 @@ class GpuDelegateProvider : public DelegateProvider { public: GpuDelegateProvider() { default_params_.AddParam("use_gpu", ToolParam::Create(false)); -#if defined(__ANDROID__) || defined(REAL_IPHONE_DEVICE) +#if TFLITE_SUPPORTS_GPU_DELEGATE || defined(REAL_IPHONE_DEVICE) default_params_.AddParam("gpu_precision_loss_allowed", ToolParam::Create(true)); default_params_.AddParam("gpu_experimental_enable_quant", ToolParam::Create(true)); #endif -#if defined(__ANDROID__) +#if TFLITE_SUPPORTS_GPU_DELEGATE default_params_.AddParam("gpu_backend", ToolParam::Create("")); #endif #if defined(REAL_IPHONE_DEVICE) @@ -62,7 +62,7 @@ REGISTER_DELEGATE_PROVIDER(GpuDelegateProvider); std::vector GpuDelegateProvider::CreateFlags(ToolParams* params) const { std::vector flags = { CreateFlag("use_gpu", params, "use gpu"), -#if defined(__ANDROID__) || defined(REAL_IPHONE_DEVICE) +#if TFLITE_SUPPORTS_GPU_DELEGATE || defined(REAL_IPHONE_DEVICE) CreateFlag("gpu_precision_loss_allowed", params, "Allow to process computation in lower precision than " "FP32 in GPU. By default, it's enabled."), @@ -70,7 +70,7 @@ std::vector GpuDelegateProvider::CreateFlags(ToolParams* params) const { "Whether to enable the GPU delegate to run quantized " "models or not. By default, it's enabled."), #endif -#if defined(__ANDROID__) +#if TFLITE_SUPPORTS_GPU_DELEGATE CreateFlag( "gpu_backend", params, "Force the GPU delegate to use a particular backend for execution, and " @@ -89,13 +89,13 @@ std::vector GpuDelegateProvider::CreateFlags(ToolParams* params) const { void GpuDelegateProvider::LogParams(const ToolParams& params, bool verbose) const { LOG_TOOL_PARAM(params, bool, "use_gpu", "Use gpu", verbose); -#if defined(__ANDROID__) || defined(REAL_IPHONE_DEVICE) +#if TFLITE_SUPPORTS_GPU_DELEGATE || defined(REAL_IPHONE_DEVICE) LOG_TOOL_PARAM(params, bool, "gpu_precision_loss_allowed", "Allow lower precision in gpu", verbose); LOG_TOOL_PARAM(params, bool, "gpu_experimental_enable_quant", "Enable running quant models in gpu", verbose); #endif -#if defined(__ANDROID__) +#if TFLITE_SUPPORTS_GPU_DELEGATE LOG_TOOL_PARAM(params, std::string, "gpu_backend", "GPU backend", verbose); #endif #if defined(REAL_IPHONE_DEVICE) @@ -109,7 +109,7 @@ TfLiteDelegatePtr GpuDelegateProvider::CreateTfLiteDelegate( TfLiteDelegatePtr delegate(nullptr, [](TfLiteDelegate*) {}); if (params.Get("use_gpu")) { -#if defined(__ANDROID__) +#if TFLITE_SUPPORTS_GPU_DELEGATE TfLiteGpuDelegateOptionsV2 gpu_opts = TfLiteGpuDelegateOptionsV2Default(); if (params.Get("gpu_precision_loss_allowed")) { gpu_opts.inference_priority1 = TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY; @@ -157,7 +157,8 @@ TfLiteDelegatePtr GpuDelegateProvider::CreateTfLiteDelegate( &TFLGpuDelegateDelete); #else TFLITE_LOG(WARN) << "The GPU delegate compile options are only supported " - "on Android or iOS platforms."; + "on Android or iOS platforms or when the tool was " + "built with -DCL_DELEGATE_NO_GL."; delegate = evaluation::CreateGPUDelegate(); #endif diff --git a/tensorflow/lite/tools/evaluation/BUILD b/tensorflow/lite/tools/evaluation/BUILD index 85dfb183254..e653379ef69 100644 --- a/tensorflow/lite/tools/evaluation/BUILD +++ b/tensorflow/lite/tools/evaluation/BUILD @@ -42,8 +42,12 @@ cc_library( deps = [ "//tensorflow/lite/c:common", ] + select({ - "//tensorflow:android": [ + "//tensorflow/lite/delegates/gpu:supports_gpu_delegate": [ "//tensorflow/lite/delegates/gpu:delegate", + ], + "//conditions:default": [], + }) + select({ + "//tensorflow:android": [ "//tensorflow/lite/delegates/nnapi:nnapi_delegate", ], "//conditions:default": [], diff --git a/tensorflow/lite/tools/evaluation/utils.cc b/tensorflow/lite/tools/evaluation/utils.cc index c766a932999..d75270c07e9 100644 --- a/tensorflow/lite/tools/evaluation/utils.cc +++ b/tensorflow/lite/tools/evaluation/utils.cc @@ -114,15 +114,15 @@ TfLiteDelegatePtr CreateNNAPIDelegate(StatefulNnApiDelegate::Options options) { } #endif // defined(__ANDROID__) -#if defined(__ANDROID__) +#if TFLITE_SUPPORTS_GPU_DELEGATE TfLiteDelegatePtr CreateGPUDelegate(TfLiteGpuDelegateOptionsV2* options) { return TfLiteDelegatePtr(TfLiteGpuDelegateV2Create(options), &TfLiteGpuDelegateV2Delete); } -#endif // defined(__ANDROID__) +#endif // TFLITE_SUPPORTS_GPU_DELEGATE TfLiteDelegatePtr CreateGPUDelegate() { -#if defined(__ANDROID__) +#if TFLITE_SUPPORTS_GPU_DELEGATE TfLiteGpuDelegateOptionsV2 options = TfLiteGpuDelegateOptionsV2Default(); options.inference_priority1 = TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY; options.inference_preference = @@ -131,7 +131,7 @@ TfLiteDelegatePtr CreateGPUDelegate() { return CreateGPUDelegate(&options); #else return CreateNullDelegate(); -#endif // defined(__ANDROID__) +#endif // TFLITE_SUPPORTS_GPU_DELEGATE } TfLiteDelegatePtr CreateHexagonDelegate( diff --git a/tensorflow/lite/tools/evaluation/utils.h b/tensorflow/lite/tools/evaluation/utils.h index 5d9920cf22b..02013f3e39a 100644 --- a/tensorflow/lite/tools/evaluation/utils.h +++ b/tensorflow/lite/tools/evaluation/utils.h @@ -21,8 +21,15 @@ limitations under the License. #include #include -#if defined(__ANDROID__) +#if defined(__ANDROID__) || defined(CL_DELEGATE_NO_GL) +#define TFLITE_SUPPORTS_GPU_DELEGATE 1 +#endif + +#if TFLITE_SUPPORTS_GPU_DELEGATE #include "tensorflow/lite/delegates/gpu/delegate.h" +#endif + +#if defined(__ANDROID__) #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h" #if (defined(__arm__) || defined(__aarch64__)) #include "tensorflow/lite/delegates/hexagon/hexagon_delegate.h" @@ -67,7 +74,7 @@ TfLiteDelegatePtr CreateNNAPIDelegate(StatefulNnApiDelegate::Options options); #endif TfLiteDelegatePtr CreateGPUDelegate(); -#if defined(__ANDROID__) +#if TFLITE_SUPPORTS_GPU_DELEGATE TfLiteDelegatePtr CreateGPUDelegate(TfLiteGpuDelegateOptionsV2* options); #endif From 0607c2a41b983fbd1c90159db866950394b86073 Mon Sep 17 00:00:00 2001 From: Berkin Ilbeyi Date: Wed, 5 Aug 2020 16:00:10 -0700 Subject: [PATCH 2205/2522] [XLA] Don't cross-program prefetch buffers that are already pinned to alternate mem. PiperOrigin-RevId: 325120226 Change-Id: I5f7416b3467e17613f10c99871b6326b58988ba8 --- .../xla/service/memory_space_assignment.cc | 3 ++ .../service/memory_space_assignment_test.cc | 41 +++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.cc b/tensorflow/compiler/xla/service/memory_space_assignment.cc index 0f7daa67800..b122fc22e20 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment.cc +++ b/tensorflow/compiler/xla/service/memory_space_assignment.cc @@ -2295,6 +2295,9 @@ bool IsCrossProgramPrefetchCandidate( return value.instruction()->parent() == value.instruction()->GetModule()->entry_computation() && value.instruction()->opcode() == HloOpcode::kParameter && + (!value.shape().has_layout() || + value.shape().layout().memory_space() != + options.alternate_memory_space) && value.index().size() == 1 && value.shape().IsArray() && !value.uses().empty() && options.size_fn(value) <= options.max_size_in_bytes && diff --git a/tensorflow/compiler/xla/service/memory_space_assignment_test.cc b/tensorflow/compiler/xla/service/memory_space_assignment_test.cc index a52a4caa12c..a3f8024bca8 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment_test.cc +++ b/tensorflow/compiler/xla/service/memory_space_assignment_test.cc @@ -4354,6 +4354,47 @@ TEST_P(MemorySpaceAssignmentTest, CrossProgramPrefetchFusionTest) { EXPECT_EQ(cross_program_prefetches.size(), 0); } +TEST_P(MemorySpaceAssignmentTest, CrossProgramPrefetchPinnedTest) { + HloComputation::Builder builder(TestName()); + + constexpr int kBatch = 8; + constexpr int kFeature = 8; + constexpr int kOutput = 2; + + auto lhs_shape = ShapeUtil::MakeShape(F32, {kBatch, kFeature}); + auto rhs_shape = ShapeUtil::MakeShapeWithLayout( + F32, {kFeature, kOutput}, + /*minor_to_major=*/{1, 0}, /*tiles=*/{}, /*element_size_in_bits=*/0, + kAlternateMemorySpace); + auto result_shape = ShapeUtil::MakeShape(F32, {kBatch, kOutput}); + auto tuple_shape = ShapeUtil::MakeTupleShape({lhs_shape, rhs_shape}); + HloInstruction* param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + + auto lhs = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(lhs_shape, param, 0)); + auto rhs = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(rhs_shape, param, 1)); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); + auto dot = builder.AddInstruction(HloInstruction::CreateDot( + result_shape, lhs, rhs, dot_dnums, DefaultPrecisionConfig(2))); + + auto module = CreateNewVerifiedModule(); + HloComputation* computation = module->AddEntryComputation(builder.Build()); + + HloSchedule schedule(module.get()); + schedule.set_sequence(computation, {param, lhs, rhs, dot}); + TF_CHECK_OK(module->set_schedule(schedule)); + + AssignMemorySpace(module.get()); + + auto cross_program_prefetches = module->CrossProgramPrefetches(); + EXPECT_EQ(cross_program_prefetches.size(), 0); +} + using CostAnalysisPrefetchIntervalPickerTest = HloTestBase; TEST_F(CostAnalysisPrefetchIntervalPickerTest, PrefetchIntervalOrder) { From ef9e548c5ffe2c9486f18da62b85bdb76f7cdecc Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Wed, 5 Aug 2020 16:02:38 -0700 Subject: [PATCH 2206/2522] Refactor `TpuCompilationCacheEntry` interface to return `TpuProgramGroupInterface` and `core_index` and makes CacheEntry less transparent and move application specific logics outside of cache. PiperOrigin-RevId: 325120711 Change-Id: I36f03ff585d80dc09a4d468c1b86e01a73e15859 --- tensorflow/core/tpu/kernels/BUILD | 30 +-- .../kernels/tpu_compilation_cache_entry.cc | 54 ----- .../tpu/kernels/tpu_compilation_cache_entry.h | 26 +-- .../tpu_compilation_cache_entry_impl.h | 94 --------- .../kernels/tpu_compilation_cache_external.cc | 53 +++-- .../kernels/tpu_compilation_cache_external.h | 12 -- .../tpu_compilation_cache_interface.cc | 144 +++++++++++-- .../kernels/tpu_compilation_cache_interface.h | 111 ++++------ .../tpu_compilation_cache_local_lookup.cc | 43 +--- .../tpu_compilation_cache_local_lookup.h | 13 +- .../kernels/tpu_compilation_cache_lookup.h | 18 +- .../core/tpu/kernels/tpu_compile_op_common.cc | 40 ---- .../core/tpu/kernels/tpu_compile_op_common.h | 9 - .../tpu/kernels/tpu_compile_op_support.cc | 38 ++++ .../core/tpu/kernels/tpu_compile_op_support.h | 8 + .../core/tpu/kernels/tpu_configuration_ops.cc | 14 +- tensorflow/core/tpu/kernels/tpu_execute_op.cc | 58 +++--- .../core/tpu/kernels/tpu_program_c_api.h | 14 ++ .../core/tpu/kernels/tpu_program_group.cc | 189 ++++++++++++------ .../core/tpu/kernels/tpu_program_group.h | 58 ++---- .../tpu/kernels/tpu_program_group_interface.h | 7 +- tensorflow/core/tpu/tpu_library_init_fns.inc | 2 + 22 files changed, 497 insertions(+), 538 deletions(-) delete mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.cc delete mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache_entry_impl.h diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 75d12f89426..1336f52ed34 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -89,10 +89,6 @@ tf_kernel_library( name = "tpu_configuration_ops", srcs = ["tpu_configuration_ops.cc"], hdrs = ["tpu_configuration_ops.h"], - copts = select({ - WITH_TPU_SUPPORT: ["-DLIBTFTPU"], - DEFAULT: [], - }), deps = [ ":tpu_compilation_cache_factory", ":tpu_compilation_cache_interface", @@ -214,30 +210,14 @@ cc_library( cc_library( name = "tpu_compilation_cache_entry", - srcs = ["tpu_compilation_cache_entry.cc"], hdrs = [ "tpu_compilation_cache_entry.h", ], deps = [ - ":compiled_subgraph", - ":tpu_compilation_cache_proto_cc", ":tpu_executable_info_proto_cc", - ":tpu_program_group", + ":tpu_program_group_interface", "//tensorflow/compiler/xla/service:hlo_proto_cc", - "//tensorflow/core:framework", "//tensorflow/core/lib/core:refcount", - "//tensorflow/core/platform:casts", - ], -) - -cc_library( - name = "tpu_compilation_cache_entry_impl", - srcs = [], - hdrs = ["tpu_compilation_cache_entry_impl.h"], - deps = [ - ":compiled_subgraph", - ":tpu_compilation_cache_interface", - ":tpu_executable_info_proto_cc", ], ) @@ -308,6 +288,8 @@ cc_library( "//tensorflow/compiler/tf2xla:host_compute_metadata_proto_cc", "//tensorflow/compiler/xla/service:hlo_proto_cc", "//tensorflow/core/lib/core:status", + "@com_google_absl//absl/time", + "@com_google_absl//absl/types:span", ], ) @@ -347,6 +329,7 @@ cc_library( hdrs = ["tpu_compilation_cache_interface.h"], deps = [ ":compiled_subgraph", + ":tpu_compilation_cache_entry", ":tpu_compilation_cache_key", ":tpu_compilation_cache_proto_cc", ":tpu_compilation_metrics_hdrs", @@ -378,7 +361,6 @@ cc_library( deps = [ ":compiled_subgraph", ":tpu_compilation_cache_entry", - ":tpu_compilation_cache_entry_impl", ":tpu_compilation_cache_interface", ":tpu_compilation_cache_key", ":tpu_compilation_cache_proto_cc", @@ -388,6 +370,7 @@ cc_library( ":tpu_compile_op_support", ":tpu_mesh_state_interface", ":tpu_op_consts", + ":tpu_program_c_api_hdrs", ":tpu_program_group", ":tpu_util", ":trace_util_hdrs", @@ -397,10 +380,10 @@ cc_library( "//tensorflow/core:framework_internal", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:protos_all_cc", "//tensorflow/core/profiler/lib:traceme", "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", "@com_google_absl//absl/container:node_hash_map", + "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/types:span", @@ -621,6 +604,7 @@ cc_library( deps = [ ":tpu_compilation_cache_entry", ":tpu_compilation_cache_external", + ":tpu_compilation_cache_interface", ":tpu_compilation_cache_local_lookup", ":tpu_compilation_cache_lookup", ":tpu_executable_info_proto_cc", diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.cc deleted file mode 100644 index 73f55853306..00000000000 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.cc +++ /dev/null @@ -1,54 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" - -#include "tensorflow/core/platform/casts.h" - -namespace tensorflow { -namespace tpu { - -TpuCompilationCacheEntry::TpuCompilationCacheEntry( - const TpuProgramGroupInterface* tpu_program_group, int core_index) - : tpu_program_group_( - tensorflow::down_cast(tpu_program_group)), - core_index_(core_index) {} - -// Constructor for an empty entry. -TpuCompilationCacheEntry::TpuCompilationCacheEntry() - : tpu_program_group_(nullptr) {} - -const TPUExecutableInfoProto* TpuCompilationCacheEntry::get_executable_info() - const { - return &(tpu_program_group_->executable_info()); -} - -const TPUHostTransferInfoProto* -TpuCompilationCacheEntry::get_host_transfer_info() const { - return &(tpu_program_group_->host_transfer_info()); -} - -const xla::HloProto* TpuCompilationCacheEntry::get_hlo_metadata() const { - return tpu_program_group_->hlo_metadatas()[core_index_]; -} - -// TODO(henrytan,jiawenhao): When should we expect more than one -// XLA_TpuProgram* per TpuProgram? Remove the program_count CHECK below then. -const XLA_TpuProgram* TpuCompilationCacheEntry::get_tpu_program() const { - CHECK_EQ(tpu_program_group_->program_count(), 1); - return tpu_program_group_->tpu_programs()[core_index_]; -} - -} // namespace tpu -} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h index b3766b8b4dd..832d76bfceb 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h @@ -18,30 +18,32 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/core/lib/core/refcount.h" #include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" -#include "tensorflow/core/tpu/kernels/tpu_program_group.h" +#include "tensorflow/core/tpu/kernels/tpu_program_group_interface.h" namespace tensorflow { namespace tpu { -// A version of `CompilationCacheEntry` to access Tpu binary program -// `XLA_TpuProgram`. +// Cache entry to hold a `TpuProgramGroupInterface` object that can be used to +// fetch a TPU program for a given TPU core index. class TpuCompilationCacheEntry { public: explicit TpuCompilationCacheEntry( - const TpuProgramGroupInterface* tpu_program_group, int core_index); + const TpuProgramGroupInterface* tpu_program_group, int core_index) + : tpu_program_group_(tpu_program_group), core_index_(core_index) {} + // Constructor for an empty entry. - TpuCompilationCacheEntry(); - const TPUExecutableInfoProto* get_executable_info() const; - const TPUHostTransferInfoProto* get_host_transfer_info() const; - const xla::HloProto* get_hlo_metadata() const; - // TODO(henrytan): maybe nicer to return C++ wrapper of `XLA_TpuProgram` - const XLA_TpuProgram* get_tpu_program() const; + TpuCompilationCacheEntry() : tpu_program_group_(nullptr), core_index_(-1) {} + + const TpuProgramGroupInterface* tpu_program_group() const { + return tpu_program_group_; + } + + int core_index() const { return core_index_; } private: - const TpuProgramGroup* tpu_program_group_; + const TpuProgramGroupInterface* tpu_program_group_; int core_index_; }; - } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry_impl.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry_impl.h deleted file mode 100644 index 0632d9a163f..00000000000 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry_impl.h +++ /dev/null @@ -1,94 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_ENTRY_IMPL_H_ -#define TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_ENTRY_IMPL_H_ -#include "tensorflow/core/tpu/kernels/compiled_subgraph.h" -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" -#include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" -namespace tensorflow { -namespace tpu { -// Wrapper for a cache entry that holds a reference to the entry until the -// wrapper is deleted. This wrapper is the concrete type of -// CompilationCacheEntryRef returned by Lookup. -template -class CompilationCacheEntryRefImpl - : public CompilationCacheEntryRef { - public: - CompilationCacheEntryRefImpl(TpuCompilationCacheInterface* parent, - CompiledSubgraph* entry, int index); - ~CompilationCacheEntryRefImpl() override; - Status ToSubEntryRef(CompilationCacheFetchTarget fetch_target) override; - - protected: - TpuCompilationCacheInterface* parent_; // Not owned. - // A reference to entry_ is acquired in the constructor and released via - // parent->DiscardEntryRefs in the destructor. - CompiledSubgraph* entry_; - // The index of the program in entry_ that is returned by the get method. - int index_; -}; -template -CompilationCacheEntryRefImpl::CompilationCacheEntryRefImpl( - TpuCompilationCacheInterface* parent, CompiledSubgraph* entry, int index) - : parent_(parent), entry_(entry), index_(index) { - if (entry_ == nullptr) { - return; - } - if (entry_->main_entry == nullptr) { - entry_->Ref(); - } else { - // This is a sharding/unsharding entry nested in a main entry. Only - // refcount the main entry. - entry_->main_entry->Ref(); - } -} -template -CompilationCacheEntryRefImpl::~CompilationCacheEntryRefImpl() { - if (entry_ == nullptr) { - return; - } - if (entry_->main_entry == nullptr) { - parent_->DiscardEntryRefs({entry_}); - } else { - parent_->DiscardEntryRefs({entry_->main_entry}); - } -} -template -Status CompilationCacheEntryRefImpl::ToSubEntryRef( - CompilationCacheFetchTarget fetch_target) { - CompiledSubgraph* target = nullptr; - switch (fetch_target) { - case CompilationCacheFetchTarget::MAIN: - target = entry_; - break; - case CompilationCacheFetchTarget::SHARDING: - target = entry_->sharding_entry.get(); - break; - case CompilationCacheFetchTarget::UNSHARDING: - target = entry_->unsharding_entry.get(); - break; - default: - return xla::InvalidArgument("Invalid fetch target: %d", fetch_target); - } - if (target == nullptr) { - // Cache entry does not have an unsharding subentry. Unref and replace - // with nullptr. - parent_->DiscardEntryRefs({entry_}); - } - // Otherwise, since the refcount is always on the main entry, we don't - // need ref/unref. - entry_ = target; - return Status::OK(); -} -} // namespace tpu -} // namespace tensorflow -#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_ENTRY_IMPL_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc index b4b18d1743b..80010d70cd4 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc @@ -16,15 +16,18 @@ limitations under the License. #include +#include "absl/memory/memory.h" #include "absl/strings/str_cat.h" #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/platform/random.h" #include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/core/tpu/kernels/compiled_subgraph.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_metrics.h" #include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" #include "tensorflow/core/tpu/kernels/tpu_compile_op_support.h" +#include "tensorflow/core/tpu/kernels/tpu_program_c_api.h" #include "tensorflow/core/tpu/kernels/tpu_util.h" #include "tensorflow/core/tpu/kernels/trace_util.h" @@ -48,23 +51,22 @@ void PopulateEntry(const std::string& key, CompiledSubgraph* entry, entry->tpu_program_group = absl::make_unique(std::move(tpu_program_group)); entry->initialized = true; + + if (entry->initialization_status.ok()) { + // Compute the entries total size once all members are initialized. + entry->total_size = entry->ComputeTotalSize(); + } +} + +std::unique_ptr CreateAndInitializeCompiledSubgraph( + CompiledSubgraph* main_entry) { + auto entry = absl::make_unique(); + entry->main_entry = main_entry; + entry->tpu_program_group = absl::make_unique(); + return entry; } } // namespace -TpuCompilationCacheExternal::EntryRefImpl::EntryRefImpl( - TpuCompilationCacheInterface* parent, CompiledSubgraph* entry, int index) - : CompilationCacheEntryRefImpl(parent, entry, - index) {} - -TpuCompilationCacheEntry TpuCompilationCacheExternal::EntryRefImpl::get() { - if (entry_ == nullptr) { - // Create an empty entry if the entry is nullptr. This corresponds to - // non-existing sharding/unsharding entries. - return TpuCompilationCacheEntry(); - } - return TpuCompilationCacheEntry(entry_->tpu_program_group.get(), index_); -} - CompiledSubgraph* TpuCompilationCacheExternal::InitializeEntry( const string& key, const std::function& initialize_program, @@ -73,7 +75,6 @@ CompiledSubgraph* TpuCompilationCacheExternal::InitializeEntry( main_entry->parent = this; main_entry->subgraph_key = key; main_entry->uid = get_uid(); - // TODO(henrytan): implement TpuCompilationCacheKey.debug_string. main_entry->cache_entry_debug_string = subgraph_key.prefix; VLOG(1) << "Cache Initializing Entry Session Debug " << main_entry->cache_entry_debug_string; @@ -112,17 +113,29 @@ CompiledSubgraph* TpuCompilationCacheExternal::InitializeEntry( std::pair(main_entry->uid, main_entry)); CHECK(uid_inserted.second); - if (initialization_status.ok()) { - // Compute the entries total size once all members are initialized. - main_entry->total_size = tpu_program_group.program_size(); + if (tpu_program_group.has_sharding_program()) { + main_entry->sharding_entry = + CreateAndInitializeCompiledSubgraph(main_entry); + TpuProgramGroup sharding_programs; + sharding_programs.Initialize( + tpu_program_group.tpu_programs(TpuProgramShardingType::kSharding)); + PopulateEntry(key, main_entry->sharding_entry.get(), + std::move(sharding_programs)); + + main_entry->unsharding_entry = + CreateAndInitializeCompiledSubgraph(main_entry); + TpuProgramGroup unsharding_programs; + unsharding_programs.Initialize( + tpu_program_group.tpu_programs(TpuProgramShardingType::kUnsharding)); + PopulateEntry(key, main_entry->unsharding_entry.get(), + std::move(unsharding_programs)); } - // TODO(henrytan): handle sharding/unsharding. PopulateEntry(key, main_entry, std::move(tpu_program_group)); for (int64 i = 0; i < main_entry->proto_key.size(); ++i) { auto entry_inserted = entries_by_proto_key_.insert( - std::pair>( + std::pair>( main_entry->proto_key[i], std::make_pair(main_entry, i))); CHECK(entry_inserted.second); } diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h index 86615b15d4c..51b5ffbed0d 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h @@ -32,7 +32,6 @@ limitations under the License. #include "tensorflow/core/tpu/kernels/compiled_subgraph.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache.pb.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry_impl.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_key.h" #include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" @@ -46,17 +45,6 @@ namespace tpu { class TpuCompilationCacheExternal : public TpuCompilationCacheInterface { public: - using Status = ::stream_executor::port::Status; - - class EntryRefImpl - : public CompilationCacheEntryRefImpl { - public: - EntryRefImpl(TpuCompilationCacheInterface* parent, CompiledSubgraph* entry, - int index); - - TpuCompilationCacheEntry get() override; - }; - explicit TpuCompilationCacheExternal(int64 max_cache_size) : TpuCompilationCacheInterface(max_cache_size) {} diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc index 9e1aedf92ce..4cd2b864203 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.cc @@ -38,10 +38,77 @@ void TpuCompilationCacheInterface::RefHolder::AddRef(CompiledSubgraph* entry) { entries_.push_back(entry); } -string TpuCompilationCacheInterface::RefHolder::DebugString() const { +std::string TpuCompilationCacheInterface::RefHolder::DebugString() const { return "TpuCompilationCacheRefHolder"; } +CompilationCacheEntryRef::CompilationCacheEntryRef() + : parent_(nullptr), entry_(nullptr), index_(0) {} + +CompilationCacheEntryRef::CompilationCacheEntryRef( + TpuCompilationCacheInterface* parent, CompiledSubgraph* entry, int index) + : parent_(parent), entry_(entry), index_(index) { + if (entry_ == nullptr) { + return; + } + if (entry_->main_entry == nullptr) { + entry_->Ref(); + } else { + // This is a sharding/unsharding entry nested in a main entry. Only + // refcount the main entry. + entry_->main_entry->Ref(); + } +} + +CompilationCacheEntryRef::~CompilationCacheEntryRef() { + if (entry_ == nullptr) { + return; + } + if (entry_->main_entry == nullptr) { + parent_->DiscardEntryRefs({entry_}); + } else { + parent_->DiscardEntryRefs({entry_->main_entry}); + } +} + +TpuCompilationCacheEntry CompilationCacheEntryRef::get() { + if (entry_ == nullptr) { + // Create an empty entry if the entry is nullptr. This corresponds to + // non-existing sharding/unsharding entries. + return TpuCompilationCacheEntry(); + } + + return TpuCompilationCacheEntry(entry_->tpu_program_group.get(), index_); +} + +Status CompilationCacheEntryRef::ToSubEntryRef( + CompilationCacheFetchTarget fetch_target) { + CompiledSubgraph* target = nullptr; + switch (fetch_target) { + case CompilationCacheFetchTarget::MAIN: + target = entry_; + break; + case CompilationCacheFetchTarget::SHARDING: + target = entry_->sharding_entry.get(); + break; + case CompilationCacheFetchTarget::UNSHARDING: + target = entry_->unsharding_entry.get(); + break; + default: + return xla::InvalidArgument("Invalid fetch target: %d", fetch_target); + } + + if (target == nullptr) { + // Cache entry does not have an unsharding subentry. Unref and replace + // with nullptr. + parent_->DiscardEntryRefs({entry_}); + } + // Otherwise, since the refcount is always on the main entry, we don't + // need ref/unref. + entry_ = target; + return Status::OK(); +} + TpuCompilationCacheInterface::TpuCompilationCacheInterface(int64 max_cache_size) : max_cache_size_(max_cache_size) { CHECK_GE(max_cache_size_, 0); @@ -156,7 +223,7 @@ void TpuCompilationCacheInterface::UnloadAndDestroy(CompiledSubgraph* entry) { entry->Unref(); } -size_t TpuCompilationCacheInterface::RemoveEntry(const string& key) { +size_t TpuCompilationCacheInterface::RemoveEntry(const std::string& key) { auto erased = cache_.erase(key); TpuCompilationMetrics::SetCacheEntryCount(cache_.size()); @@ -196,7 +263,7 @@ CompiledSubgraph* TpuCompilationCacheInterface::DiscardEntryRef( } erased = entries_by_uid_.erase(entry->uid); CHECK_EQ(erased, 1); - for (const string& key : entry->proto_key) { + for (const std::string& key : entry->proto_key) { erased = entries_by_proto_key_.erase(key); CHECK_EQ(erased, 1); } @@ -269,10 +336,10 @@ void TpuCompilationCacheInterface::LookupEntryMarkedForEviction( } } -void TpuCompilationCacheInterface::InsertEntry(const string& key, +void TpuCompilationCacheInterface::InsertEntry(const std::string& key, CompiledSubgraph* entry) { auto cache_inserted = - cache_.insert(std::pair(key, entry)); + cache_.insert(std::pair(key, entry)); CHECK(cache_inserted.second); TpuCompilationMetrics::SetCacheEntryCount(cache_.size()); @@ -295,7 +362,8 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsent( const TpuCompilationCacheKey& subgraph_key, const SessionMetadata* session_metadata, CompilationRefHolder* per_step_ref_holder, int64* uid, - std::vector* proto_key, std::vector* may_modify_variables, + std::vector* proto_key, + std::vector* may_modify_variables, absl::Span* hlo_metadatas, const std::function& compile_function) { std::vector removed_entries; @@ -308,7 +376,7 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsent( return status; } -string TpuCompilationCacheInterface::FindCacheKey( +std::string TpuCompilationCacheInterface::FindCacheKey( const TpuCompilationCacheKey& subgraph_key) { if (!subgraph_key.has_guaranteed_const) { return subgraph_key.prefix; @@ -331,7 +399,8 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( const TpuCompilationCacheKey& subgraph_key, const SessionMetadata* session_metadata, CompilationRefHolder* per_step_ref_holder, int64* uid, - std::vector* proto_key, std::vector* may_modify_variables, + std::vector* proto_key, + std::vector* may_modify_variables, std::vector* removed_entries, absl::Span* hlo_metadatas, const std::function& compile_function) { @@ -345,17 +414,18 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( // for the lifetime of the object, see InitializeEntry() call below. absl::MutexLock lock(&mu_); - string cache_key = FindCacheKey(subgraph_key); + std::string cache_key = FindCacheKey(subgraph_key); auto iter = cache_.find(cache_key); bool is_new_key = iter == cache_.end(); - const string session_name = tpu::SessionNameFromMetadata(session_metadata); + const std::string session_name = + tpu::SessionNameFromMetadata(session_metadata); if (is_new_key) { cache_key = subgraph_key.ToString(); TpuCompilationMetrics::IncrementCacheLookupCount( /*is_cache_hit=*/false, session_name); - const string msg = + const std::string msg = strings::StrCat("TPU host compilation cache miss: cache_key(", cache_key, "), session_name(", session_name, ")"); TRACESTRING(msg); @@ -364,7 +434,7 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( // Check if caller has disabled compilation. Set using // internal::ScopedTpuCompileDisabler. if (!UtilApiFn()->TpuCompile_IsTpuCompilationEnabledFn()) { - const string error_msg = strings::StrCat( + const std::string error_msg = strings::StrCat( "[TpuCompilationDisabled]: Compilation cache miss, but compilation " "disabled, session_name(", session_name, ") Debug String: ", subgraph_key.debug_string); @@ -403,7 +473,7 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( } else { TpuCompilationMetrics::IncrementCacheLookupCount( /*is_cache_hit=*/true, session_name); - const string msg = + const std::string msg = strings::StrCat("TPU host compilation cache hit: cache_key(", cache_key, "), session_name(", session_name, ")"); TRACESTRING(msg); @@ -466,8 +536,8 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( return entry->initialization_status; } -Status TpuCompilationCacheInterface::GetKeysFromUid(int64 uid, - std::vector* keys) { +Status TpuCompilationCacheInterface::GetKeysFromUid( + int64 uid, std::vector* keys) { keys->clear(); absl::MutexLock lock(&mu_); @@ -479,5 +549,49 @@ Status TpuCompilationCacheInterface::GetKeysFromUid(int64 uid, return Status::OK(); } +Status TpuCompilationCacheInterface::Lookup( + int64 uid, int proto_index, + std::unique_ptr* entry) { + entry->reset(); + + profiler::TraceMe proto_lookup_traceme( + "TPU compilation cache proto lookup by uid", + /*level=*/2); + + absl::MutexLock lock(&mu_); + const auto iter = entries_by_uid_.find(uid); + if (iter == entries_by_uid_.end()) { + return errors::NotFound("No subgraph found for uid ", uid); + } + CompiledSubgraph* cache_entry = iter->second; + if (proto_index < 0 || + proto_index >= cache_entry->tpu_program_group->program_count()) { + return errors::NotFound("No proto found for core index ", proto_index, + " in subgraph with uid ", uid); + } + *entry = absl::make_unique(this, cache_entry, + proto_index); + return Status::OK(); +} + +Status TpuCompilationCacheInterface::Lookup( + const std::string& proto_key, + std::unique_ptr* entry) { + entry->reset(); + + profiler::TraceMe proto_lookup_traceme("TPU compilation cache proto lookup", + /*level=*/2); + + absl::MutexLock lock(&mu_); + const auto iter = entries_by_proto_key_.find(proto_key); + if (iter == entries_by_proto_key_.end()) { + return errors::NotFound("No proto found for key ", proto_key); + } + CompiledSubgraph* cache_entry = iter->second.first; + int proto_index = iter->second.second; + *entry = absl::make_unique(this, cache_entry, + proto_index); + return Status::OK(); +} } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h index cde6467b7af..7b206fb1cf4 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h @@ -32,6 +32,7 @@ limitations under the License. #include "tensorflow/core/protobuf/config.pb.h" #include "tensorflow/core/tpu/kernels/compiled_subgraph.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_key.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_metrics.h" #include "tensorflow/core/tpu/kernels/trace_util.h" @@ -48,18 +49,20 @@ class CompilationRefHolder : public ResourceBase { ~CompilationRefHolder() override = default; }; -// Base class for a reference to a cached tpu program. A unique_ptr to a -// CompilationCacheEntryRef is returned by all the cache Lookup methods below, -// and ensures the underlying proto is not garbage-collected until the client -// discards the ptr. -template +// Wrapper for a cache entry returned by all the TpuCompilationCacheInterface +// `Lookup` methods, and ensures the underlying proto is not garbage-collected +// until the client discards the ptr. class CompilationCacheEntryRef { public: - virtual ~CompilationCacheEntryRef() = default; + CompilationCacheEntryRef(); + CompilationCacheEntryRef(TpuCompilationCacheInterface* parent, + CompiledSubgraph* entry, int index); - // Returns a CompilationCacheEntry that should not be used beyond the lifetime - // of the tpu::CompilationCacheEntryRef. - virtual CacheEntryType get() = 0; + virtual ~CompilationCacheEntryRef(); + + // Returns a TpuCompilationCacheEntry that should not be used beyond the + // lifetime of the CompilationCacheEntryRef. + virtual TpuCompilationCacheEntry get(); // Mutates this ref to point to the entry's subentry (for // sharding/unsharding) or main entry (unchanged) as specified by @@ -69,7 +72,15 @@ class CompilationCacheEntryRef { // // If the requested subentry does not exist, the ref will point to a nullptr // entry, and the original entry will be unref'ed. - virtual Status ToSubEntryRef(CompilationCacheFetchTarget fetch_target) = 0; + virtual Status ToSubEntryRef(CompilationCacheFetchTarget fetch_target); + + protected: + TpuCompilationCacheInterface* parent_; // Not owned. + // A reference to entry_ is acquired in the constructor and released via + // parent->DiscardEntryRefs in the destructor. + CompiledSubgraph* entry_; + // The index of the program in entry_ that is returned by the get method. + int index_; }; class TpuCompilationCacheInterface : public ResourceBase { @@ -97,7 +108,8 @@ class TpuCompilationCacheInterface : public ResourceBase { const TpuCompilationCacheKey& subgraph_key, const SessionMetadata* session_metadata, CompilationRefHolder* per_step_ref_holder, int64* uid, - std::vector* proto_key, std::vector* may_modify_variables, + std::vector* proto_key, + std::vector* may_modify_variables, absl::Span* hlo_metadatas, const std::function& compile_function); @@ -124,19 +136,18 @@ class TpuCompilationCacheInterface : public ResourceBase { // Looks up an executable corresponding to the model-parallel core index of // the subgraph represented by key. On success a pointer to an EntryRef // holding the program is returned in entry. - template - Status Lookup(const string& proto_key, std::unique_ptr* entry); + Status Lookup(const std::string& proto_key, + std::unique_ptr* entry); // Looks up an executable corresponding to the model-parallel core index of // the subgraph represented by uid. On success a pointer to an EntryRef // holding the program is returned in entry. - template Status Lookup(int64 uid, int proto_index, - std::unique_ptr* entry); + std::unique_ptr* entry); // Looks up the subgraph represented by uid, and returns the vector of keys, // one per core, corresponding to that subgraph. - Status GetKeysFromUid(int64 uid, std::vector* keys); + Status GetKeysFromUid(int64 uid, std::vector* keys); // Makes a reference holder for this cache, that can be stored in the per-step // resource manager and will ensure that compiled entries persist until the @@ -170,7 +181,7 @@ class TpuCompilationCacheInterface : public ResourceBase { // parent_->DiscardEntryRefs. void AddRef(CompiledSubgraph* entry); - string DebugString() const override; + std::string DebugString() const override; private: TpuCompilationCacheInterface* parent_; // Not owned. @@ -185,7 +196,8 @@ class TpuCompilationCacheInterface : public ResourceBase { const TpuCompilationCacheKey& subgraph_key, const SessionMetadata* session_metadata, CompilationRefHolder* per_step_ref_holder, int64* uid, - std::vector* proto_key, std::vector* may_modify_variables, + std::vector* proto_key, + std::vector* may_modify_variables, std::vector* removed_entries, absl::Span* hlo_metadatas, const std::function& compile_function); @@ -230,14 +242,14 @@ class TpuCompilationCacheInterface : public ResourceBase { ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); // Removes the entry with given key from cache. - size_t RemoveEntry(const string& key) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); + size_t RemoveEntry(const std::string& key) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); // Inserts the given key and entry to cache. - void InsertEntry(const string& key, CompiledSubgraph* entry) + void InsertEntry(const std::string& key, CompiledSubgraph* entry) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); // Returns the cache key matching given subgraph_key. - string FindCacheKey(const TpuCompilationCacheKey& subgraph_key) + std::string FindCacheKey(const TpuCompilationCacheKey& subgraph_key) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); // Creates a new entry by running initialize_programs and places it in the @@ -247,7 +259,7 @@ class TpuCompilationCacheInterface : public ResourceBase { // // **InitializeEntry releases mu_ during the call to initialize_programs.** virtual CompiledSubgraph* InitializeEntry( - const string& key, + const std::string& key, const std::function& initialize_programs, const TpuCompilationCacheKey& subgraph_key) @@ -276,13 +288,16 @@ class TpuCompilationCacheInterface : public ResourceBase { // cache_ key matching a given subgraph key. When doing a lookup, check // session_key_map_ first to avoid unnecessay fingerprint computation. // Map from key prefix + session_handle to a cache_ key. - absl::node_hash_map session_key_map_ ABSL_GUARDED_BY(mu_); + absl::node_hash_map session_key_map_ + ABSL_GUARDED_BY(mu_); // Map from key prefix + fingerprint to a cache_ key. - absl::node_hash_map fingerprint_key_map_ ABSL_GUARDED_BY(mu_); + absl::node_hash_map fingerprint_key_map_ + ABSL_GUARDED_BY(mu_); // All the subgraph entries that can be looked up in the cache. An entry is // marked for eviction iff it is present in cache_ and not in // entries_by_last_use_. - std::unordered_map cache_ ABSL_GUARDED_BY(mu_); + std::unordered_map cache_ + ABSL_GUARDED_BY(mu_); // All the subgraph entries that can be looked up in the cache, indexed by // uid. absl::node_hash_map entries_by_uid_ @@ -290,7 +305,7 @@ class TpuCompilationCacheInterface : public ResourceBase { // All the protos that can be looked up in the cache, indexed by proto // key. The value of the map is a subgraph and the index of the proto compiled // for that subgraph. - std::unordered_map> + std::unordered_map> entries_by_proto_key_ ABSL_GUARDED_BY(mu_); // Map from last_use to entry, used to mark entries for eviction in LRU // order. If an entry's last_use counter is not present as a key in @@ -304,50 +319,6 @@ class TpuCompilationCacheInterface : public ResourceBase { TpuCompilationCacheInterface& operator=(const TpuCompilationCacheInterface&) = delete; }; - -template -Status TpuCompilationCacheInterface::Lookup( - int64 uid, int proto_index, std::unique_ptr* entry) { - entry->reset(); - - profiler::TraceMe proto_lookup_traceme( - "TPU compilation cache proto lookup by uid", - /*level=*/2); - - absl::MutexLock lock(&mu_); - const auto iter = entries_by_uid_.find(uid); - if (iter == entries_by_uid_.end()) { - return errors::NotFound("No subgraph found for uid ", uid); - } - CompiledSubgraph* cache_entry = iter->second; - if (proto_index < 0 || - proto_index >= cache_entry->tpu_program_group->program_count()) { - return errors::NotFound("No proto found for core index ", proto_index, - " in subgraph with uid ", uid); - } - *entry = absl::make_unique(this, cache_entry, proto_index); - return Status::OK(); -} - -template -Status TpuCompilationCacheInterface::Lookup( - const string& proto_key, std::unique_ptr* entry) { - entry->reset(); - - profiler::TraceMe proto_lookup_traceme("TPU compilation cache proto lookup", - /*level=*/2); - - absl::MutexLock lock(&mu_); - const auto iter = entries_by_proto_key_.find(proto_key); - if (iter == entries_by_proto_key_.end()) { - return errors::NotFound("No proto found for key ", proto_key); - } - CompiledSubgraph* cache_entry = iter->second.first; - int proto_index = iter->second.second; - *entry = absl::make_unique(this, cache_entry, proto_index); - return Status::OK(); -} - } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.cc index f30a503d2d2..29864a310d1 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.cc +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.cc @@ -16,70 +16,50 @@ limitations under the License. namespace tensorflow { namespace tpu { -namespace { -class CompilationCacheFetchTargetUtility { - public: - CompilationCacheFetchTargetUtility() - : names_({"Invalid", "Main", "Sharding", "Unsharding"}) {} - - std::string name(CompilationCacheFetchTarget target) const { - return names_[static_cast(target)]; - } - - private: - const std::vector names_; -}; - -std::string GetName(CompilationCacheFetchTarget target) { - static const auto* util = new CompilationCacheFetchTargetUtility(); - return util->name(target); -} - -} // namespace TpuCompilationCacheLocalLookup::TpuCompilationCacheLocalLookup( TpuCompilationCacheInterface* cache) - : cache_(cache) {} + : cache_(cache) { + cache_->Ref(); +} TpuCompilationCacheLocalLookup::~TpuCompilationCacheLocalLookup() { cache_->Unref(); } Status TpuCompilationCacheLocalLookup::Lookup( - const string& proto_key, - std::unique_ptr* entry, + const string& proto_key, std::unique_ptr* entry, CompilationCacheFetchTarget fetch_target) { profiler::TraceMe proto_lookup_traceme("Local TPU proto cache lookup", /*level=*/2); - Status s = cache_->Lookup( - proto_key, entry); + Status s = cache_->Lookup(proto_key, entry); VLOG(1) << "Looked up key " << proto_key << " in local subgraph cache status " << s; if (!s.ok()) { return s; } s = (*entry)->ToSubEntryRef(fetch_target); - - VLOG(1) << "Fetched subentry: " << GetName(fetch_target) << " with status " + VLOG(1) << "Fetched subentry: " + << CompilationCacheFetchTarget_Name(fetch_target) << " with status " << s; return s; } Status TpuCompilationCacheLocalLookup::Lookup( int64 uid, int proto_index, - std::unique_ptr* entry, + std::unique_ptr* entry, CompilationCacheFetchTarget fetch_target) { profiler::TraceMe proto_lookup_traceme("Local TPU proto cache lookup by uid", /*level=*/2); - Status s = cache_->Lookup( - uid, proto_index, entry); + Status s = cache_->Lookup(uid, proto_index, entry); VLOG(1) << "Looked up uid " << uid << ", index " << proto_index << " in local subgraph cache status " << s; if (!s.ok()) { return s; } s = (*entry)->ToSubEntryRef(fetch_target); - VLOG(1) << "Fetched subentry: " << GetName(fetch_target) << " with status " + VLOG(1) << "Fetched subentry: " + << CompilationCacheFetchTarget_Name(fetch_target) << " with status " << s; return s; } @@ -87,6 +67,5 @@ Status TpuCompilationCacheLocalLookup::Lookup( string TpuCompilationCacheLocalLookup::DebugString() const { return "TpuCompilationCacheLocalLookup"; } - } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.h index eb5aadcd3e2..8db4c11ebea 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.h @@ -28,24 +28,17 @@ namespace tpu { // Class for looking up TPU programs when the execute and compile Op are in the // same address space. The proto is simply looked up in the compilation cache, // without any serialization taking place. -class TpuCompilationCacheLocalLookup - : public TpuCompilationCacheLookup< - CompilationCacheEntryRef> { +class TpuCompilationCacheLocalLookup : public TpuCompilationCacheLookup { public: - using TpuCompilationCacheEntryRef = - ::tensorflow::tpu::CompilationCacheEntryRef; - using EntryRefImpl = - ::tensorflow::tpu::TpuCompilationCacheExternal::EntryRefImpl; - explicit TpuCompilationCacheLocalLookup(TpuCompilationCacheInterface* cache); ~TpuCompilationCacheLocalLookup() override; Status Lookup(const string& proto_key, - std::unique_ptr* entry, + std::unique_ptr* entry, CompilationCacheFetchTarget fetch_target) override; Status Lookup(int64 uid, int proto_index, - std::unique_ptr* entry, + std::unique_ptr* entry, CompilationCacheFetchTarget fetch_target) override; string DebugString() const override; diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h index 0d1a53d31d2..ab476322a8a 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h @@ -23,10 +23,11 @@ limitations under the License. namespace tensorflow { namespace tpu { +// TODO(b/162241759): consider merging TpuCompilationCacheLookup and +// TpuCompilationCacheInterface. // Base class allowing Execute Ops to look up TPU programs. Different subclasses // are used when the execute Op is in the same address space as the compile Op, // and when they need to communicate over RPC. -template class TpuCompilationCacheLookup : public ResourceBase { public: ~TpuCompilationCacheLookup() override = default; @@ -43,12 +44,11 @@ class TpuCompilationCacheLookup : public ResourceBase { // fetch_target requests one of them, then after this call // (*entry)->get().get_executable() will return nullptr. virtual Status Lookup(const string& proto_key, - std::unique_ptr* entry, + std::unique_ptr* entry, CompilationCacheFetchTarget fetch_target) = 0; - virtual Status Lookup( - const string& proto_key, - std::unique_ptr* entry) { + virtual Status Lookup(const string& proto_key, + std::unique_ptr* entry) { return Lookup(proto_key, std::move(entry), CompilationCacheFetchTarget::MAIN); } @@ -58,17 +58,15 @@ class TpuCompilationCacheLookup : public ResourceBase { // returned in program. The wrapper is guaranteed to be valid only during the // execution of the Op requesting the proto. virtual Status Lookup(int64 uid, int proto_index, - std::unique_ptr* entry, + std::unique_ptr* entry, CompilationCacheFetchTarget fetch_target) = 0; - virtual Status Lookup( - int64 uid, int proto_index, - std::unique_ptr* entry) { + virtual Status Lookup(int64 uid, int proto_index, + std::unique_ptr* entry) { return Lookup(uid, proto_index, std::move(entry), CompilationCacheFetchTarget::MAIN); } }; - } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc index 4ed646af302..ce18e844e66 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc @@ -413,46 +413,6 @@ Status TpuCompileOpKernelCommon::CompileTFFunctionToHlo( return Status::OK(); } -/* static */ -Status TpuCompileOpKernelCommon::ComputeArgumentShapes( - const tpu::TPUCompileMetadataProto& metadata, - const std::vector& dynamic_shapes, - std::vector* arg_shapes) { - arg_shapes->resize(metadata.args_size()); - int dynamic_shape_pos = 0; - for (int i = 0; i < metadata.args_size(); ++i) { - const tpu::TPUCompileMetadataProto::Arg& arg = metadata.args(i); - // The XLA compiler determines the shape of each constant by inspecting the - // value of its corresponding host-memory tensor. As a result, we don't need - // to give the compiler graph-inferred shapes for constant arguments. - if (arg.kind() == tpu::TPUCompileMetadataProto::Arg::GUARANTEED_CONSTANT) { - continue; - } - TF_RETURN_IF_ERROR(PartialTensorShape::IsValidShape(arg.shape())); - PartialTensorShape static_shape(arg.shape()); - - TensorShape& shape = (*arg_shapes)[i]; - if (static_shape.IsFullyDefined()) { - TF_RET_CHECK(static_shape.AsTensorShape(&shape)); - } else { - TF_RET_CHECK(dynamic_shape_pos < dynamic_shapes.size()) - << "Too few dynamic shapes"; - shape = dynamic_shapes[dynamic_shape_pos++]; - if (!static_shape.IsCompatibleWith(shape)) { - return errors::InvalidArgument( - "Mismatch between static and dynamic shape for argument. Static " - "shape: ", - static_shape.DebugString(), - "; dynamic shape: ", shape.DebugString()); - } - } - } - // Checks we consumed all of the dynamic shapes. - TF_RET_CHECK(dynamic_shape_pos == dynamic_shapes.size()) - << "Too many dynamic shapes"; - return Status::OK(); -} - // Function arguments and return values lose their device assignments, so we // must recreate them. /* static */ Status TpuCompileOpKernelCommon::AssignDevicesToArgsAndRetvals( diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_common.h b/tensorflow/core/tpu/kernels/tpu_compile_op_common.h index 3d3f0afcdb7..327aa460ddd 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_common.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_common.h @@ -99,15 +99,6 @@ class TpuCompileOpKernelCommon { const std::vector& arg_shapes, TpuProgramGroupInterface* tpu_program_group) = 0; - // Computes shapes for each argument. Uses both the static shape from the - // metadata, and the dynamic shapes where the static shape is not - // defined. There must be one dynamic_shape for each argument with a - // partially defined shape, in index order. - static Status ComputeArgumentShapes( - const tpu::TPUCompileMetadataProto& metadata, - const std::vector& dynamic_shapes, - std::vector* arg_shapes); - // Performs shape inference on `computation`, filling shape_info with operator // shapes. The shapes of the _Arg nodes are taken from `arg_shapes`. static Status RunShapeInferenceOnComputation( diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc index 5cc35a07e66..3440b6d265a 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc @@ -540,5 +540,43 @@ Status CompileOpMetadataFromContext(OpKernelConstruction* ctx, } return Status::OK(); } + +Status ComputeArgumentShapes(const tpu::TPUCompileMetadataProto& metadata, + const std::vector& dynamic_shapes, + std::vector* arg_shapes) { + arg_shapes->resize(metadata.args_size()); + int dynamic_shape_pos = 0; + for (int i = 0; i < metadata.args_size(); ++i) { + const tpu::TPUCompileMetadataProto::Arg& arg = metadata.args(i); + // The XLA compiler determines the shape of each constant by inspecting the + // value of its corresponding host-memory tensor. As a result, we don't need + // to give the compiler graph-inferred shapes for constant arguments. + if (arg.kind() == tpu::TPUCompileMetadataProto::Arg::GUARANTEED_CONSTANT) { + continue; + } + TF_RETURN_IF_ERROR(PartialTensorShape::IsValidShape(arg.shape())); + PartialTensorShape static_shape(arg.shape()); + + TensorShape& shape = (*arg_shapes)[i]; + if (static_shape.IsFullyDefined()) { + TF_RET_CHECK(static_shape.AsTensorShape(&shape)); + } else { + TF_RET_CHECK(dynamic_shape_pos < dynamic_shapes.size()) + << "Too few dynamic shapes"; + shape = dynamic_shapes[dynamic_shape_pos++]; + if (!static_shape.IsCompatibleWith(shape)) { + return errors::InvalidArgument( + "Mismatch between static and dynamic shape for argument. Static " + "shape: ", + static_shape.DebugString(), + "; dynamic shape: ", shape.DebugString()); + } + } + } + // Checks we consumed all of the dynamic shapes. + TF_RET_CHECK(dynamic_shape_pos == dynamic_shapes.size()) + << "Too many dynamic shapes"; + return Status::OK(); +} } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_support.h b/tensorflow/core/tpu/kernels/tpu_compile_op_support.h index bc60f64286a..ea13d33b521 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_support.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_support.h @@ -159,6 +159,14 @@ se::port::Status CompileOpMetadataFromContext(OpKernelConstruction* ctx, TPUCompileMetadataProto* metadata, NameAttrList* function_name, std::string* mlir_module); + +// Computes shapes for each argument. Uses both the static shape from the +// metadata, and the dynamic shapes where the static shape is not +// defined. There must be one dynamic_shape for each argument with a +// partially defined shape, in index order. +Status ComputeArgumentShapes(const TPUCompileMetadataProto& metadata, + const std::vector& dynamic_shapes, + std::vector* arg_shapes); } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc b/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc index 71735f0639f..5a8c283c7c2 100644 --- a/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc +++ b/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc @@ -255,13 +255,9 @@ void InitializeHostForDistributedTpuOp::Compute(OpKernelContext* ctx) { mesh_state_interface)); } -#if defined(LIBTFTPU) VLOG(1) << "Removing existing proto compilation cache lookup if it exists"; - OP_REQUIRES_OK( - ctx, DeleteIfExists>>( - rmgr, tpu::kCompiledProtoCacheResourceName)); -#endif + OP_REQUIRES_OK(ctx, DeleteIfExists( + rmgr, tpu::kCompiledProtoCacheResourceName)); if (enable_whole_mesh_compilations_) { // If this is a whole mesh compilation mode, create the compilation cache, @@ -287,16 +283,12 @@ void InitializeHostForDistributedTpuOp::Compute(OpKernelContext* ctx) { if (local_compilation_cache != nullptr) { local_compilation_cache->Unref(); -#if defined(LIBTFTPU) - tpu::TpuCompilationCacheLookup< - tpu::CompilationCacheEntryRef>* - proto_lookup; + tpu::TpuCompilationCacheLookup* proto_lookup; proto_lookup = new tpu::TpuCompilationCacheLocalLookup(local_compilation_cache); OP_REQUIRES_OK( ctx, rmgr->Create(rmgr->default_container(), tpu::kCompiledProtoCacheResourceName, proto_lookup)); -#endif } Tensor* ctx_output; diff --git a/tensorflow/core/tpu/kernels/tpu_execute_op.cc b/tensorflow/core/tpu/kernels/tpu_execute_op.cc index 51c9dd481a3..3522ace379a 100644 --- a/tensorflow/core/tpu/kernels/tpu_execute_op.cc +++ b/tensorflow/core/tpu/kernels/tpu_execute_op.cc @@ -40,10 +40,12 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/casts.h" #include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/profiler/lib/traceme.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h" #include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" @@ -56,14 +58,10 @@ limitations under the License. #include "tensorflow/stream_executor/tpu/tpu_node_context.h" namespace tensorflow { - namespace { - +using ::tensorflow::tpu::CompilationCacheEntryRef; +using ::tensorflow::tpu::TpuCompilationCacheLookup; using ::tensorflow::tpu::TpuNodeContext; -using CompilationCacheEntryRef = ::tensorflow::tpu::CompilationCacheEntryRef< - ::tensorflow::tpu::TpuCompilationCacheEntry>; -using TpuCompilationCacheLookup = - ::tensorflow::tpu::TpuCompilationCacheLookup; // Looks up the input `key` in the compilation cache, populating // `*rendezvous_key_base` and `*entry`. @@ -641,28 +639,35 @@ Status TPUExecuteOp::DoWork(OpKernelContext* context) { profiler::TraceMe trace_me_init("TPUExecuteOp::Init", /*level=*/2); string rendezvous_key_base; - std::unique_ptr entry; + std::unique_ptr entry_ref; TF_RETURN_IF_ERROR( - GetComputationCacheEntry(context, &rendezvous_key_base, &entry)); + GetComputationCacheEntry(context, &rendezvous_key_base, &entry_ref)); // Shapes of the inputs and outputs, in xla::Shape form. - const TPUExecutableInfoProto* proto = entry->get().get_executable_info(); + tpu::TpuCompilationCacheEntry entry = entry_ref->get(); + const tpu::TpuProgramGroup* tpu_program_group = + tensorflow::down_cast( + entry.tpu_program_group()); + CHECK_NE(tpu_program_group, nullptr); + const int core_index = entry.core_index(); + const TPUExecutableInfoProto& executable = + tpu_program_group->executable_info(core_index); xla::Backend* const backend = node_context->backend(); xla::TransferManager* const transfer_manager = backend->transfer_manager(); TF_RET_CHECK(context->op_device_context()); se::Stream* stream = context->op_device_context()->stream(); - TF_RET_CHECK(proto->input_shapes_size() == 1); + TF_RET_CHECK(executable.input_shapes_size() == 1); - xla::Shape host_shape(proto->input_shapes(0)); + xla::Shape host_shape(executable.input_shapes(0)); TF_ASSIGN_OR_RETURN( auto variable_update_map, - BuildVariableUpdateMap(proto->variable_indices(), + BuildVariableUpdateMap(executable.variable_indices(), fused_device_var_reads_in_computation_inputs_, fused_device_var_updates_in_computation_outputs_, - proto->output_tensor_shapes().size())); + executable.output_tensor_shapes().size())); TF_ASSIGN_OR_RETURN( std::unique_ptr input_buffers, BuildComputationInputs(context, host_shape, variable_update_map, backend, @@ -697,8 +702,9 @@ Status TPUExecuteOp::DoWork(OpKernelContext* context) { // Snapshot the inputs, if a snapshot was requested. std::shared_ptr hlo_snapshot; - if (proto->has_session_module()) { - hlo_snapshot = std::make_shared(proto->session_module()); + if (executable.has_session_module()) { + hlo_snapshot = + std::make_shared(executable.session_module()); auto literal = std::make_shared(shaped_buffer.on_host_shape()); transfer_manager->TransferLiteralFromDevice( @@ -723,9 +729,9 @@ Status TPUExecuteOp::DoWork(OpKernelContext* context) { const uint32 rng_seed = GetXLARandomSeed(); std::unique_ptr device_assignment; - if (proto->has_device_assignment()) { + if (executable.has_device_assignment()) { TF_ASSIGN_OR_RETURN(device_assignment, xla::DeviceAssignment::Deserialize( - proto->device_assignment())); + executable.device_assignment())); } VLOG(4) << "Input buffers after alias resolution: " @@ -743,24 +749,24 @@ Status TPUExecuteOp::DoWork(OpKernelContext* context) { // we free a memory and reassign it to other users while a program is running, // all subsequent writes to the program that could possibly clobber the memory // will depend on the program to finish. - const TPUHostTransferInfoProto* host_transfer_info = - entry->get().get_host_transfer_info(); - const xla::HloProto* hlo_metadata = entry->get().get_hlo_metadata(); + const TPUHostTransferInfoProto& host_transfer_info = + tpu_program_group->host_transfer_info(core_index); TF_ASSIGN_OR_RETURN( xla::ExecutionOutput output, - TPUExecute(*proto, *host_transfer_info, *hlo_metadata, std::move(input), + TPUExecute(executable, host_transfer_info, + *tpu_program_group->hlo_metadata(core_index), std::move(input), rendezvous_key_base, rng_seed, node_context.get(), device_assignment.get(), context->cancellation_manager(), context, stream, transfer_stream_ptr.get(), - entry->get().get_tpu_program())); + tpu_program_group->tpu_program(core_index))); stream->ThenRecordEvent(definition_event.get()); TF_ASSIGN_OR_RETURN( std::unique_ptr output_buffers, - AllocateOutputTensors(context, output.ConsumeResult(), - proto->output_tensor_shapes(), variable_update_map, - node_context.get(), stream, device_ordinal, - input_buffers.get(), definition_event)); + AllocateOutputTensors( + context, output.ConsumeResult(), executable.output_tensor_shapes(), + variable_update_map, node_context.get(), stream, device_ordinal, + input_buffers.get(), definition_event)); // Transfer the outputs and save the snapshot to disk. if (hlo_snapshot) { diff --git a/tensorflow/core/tpu/kernels/tpu_program_c_api.h b/tensorflow/core/tpu/kernels/tpu_program_c_api.h index c9951e4d5ce..41c7d47cf97 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_c_api.h +++ b/tensorflow/core/tpu/kernels/tpu_program_c_api.h @@ -21,6 +21,9 @@ limitations under the License. typedef struct XLA_TpuProgram XLA_TpuProgram; +// Enum for choosing sharding/unsharding program from a `XLA_TpuProgram` obj. +enum TpuProgramShardingType { kInvalid = 0, kMain, kSharding, kUnsharding }; + extern "C" { // Creates a new TPU program. @@ -64,6 +67,15 @@ TFTPU_CAPI_EXPORT void TpuProgram_GetHloMetadata( TFTPU_CAPI_EXPORT void TpuProgram_GetMayModifyVariables( const XLA_TpuProgram* tpu_program, bool* may_modify_variables); +// Check if TPU program has sharding. +TFTPU_CAPI_EXPORT bool TpuProgram_HasSharding( + const XLA_TpuProgram* tpu_program); + +// Gets TPU program by sharding type. Return value is valid only when the +// `status.status()` returns `OK`. +TFTPU_CAPI_EXPORT XLA_TpuProgram* TpuProgram_GetTpuProgram( + XLA_TpuProgram* tpu_program, TpuProgramShardingType type); + struct TfTpu_TpuProgramApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuProgram_New); TFTPU_ADD_FN_IN_STRUCT(TpuProgram_Free); @@ -76,6 +88,8 @@ struct TfTpu_TpuProgramApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetHostTransferInfo); TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetHloMetadata); TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetMayModifyVariables); + TFTPU_ADD_FN_IN_STRUCT(TpuProgram_HasSharding); + TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetTpuProgram); }; } // extern "C" diff --git a/tensorflow/core/tpu/kernels/tpu_program_group.cc b/tensorflow/core/tpu/kernels/tpu_program_group.cc index e22175af270..39d1f38b104 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_group.cc +++ b/tensorflow/core/tpu/kernels/tpu_program_group.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/tpu/kernels/tpu_compile.pb.h" #include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" #include "tensorflow/core/tpu/kernels/tpu_compile_op_support.h" +#include "tensorflow/core/tpu/kernels/tpu_program_c_api.h" #include "tensorflow/core/tpu/tpu_api.h" #include "tensorflow/stream_executor/tpu/proto_helper.h" #include "tensorflow/stream_executor/tpu/status_helper.h" @@ -98,55 +99,71 @@ StatusOr> CompileAheadOfTime( compilation_result, metadata, per_core_arg_shapes, per_core_output_shapes, per_core_variable_indices, device_assignment); } +} // namespace -Status CreateTpuProgramGroup( - absl::Span xla_tpu_programs, - TpuProgramGroupInterface* tpu_program_group_interface) { +void TpuProgramGroup::Initialize( + absl::Span xla_tpu_programs) { CHECK_GT(xla_tpu_programs.size(), 0); - TpuProgramGroup* tpu_program_group = - tensorflow::down_cast(tpu_program_group_interface); - CHECK_NE(tpu_program_group, nullptr); - tpu_program_group->set_tpu_programs(xla_tpu_programs); + set_tpu_programs(xla_tpu_programs); - // TODO(jiawenhao): Handle the case of xla_tpu_programs.size() > 1. - bool may_modify_variables; - TpuProgramApiFn()->TpuProgram_GetMayModifyVariablesFn(xla_tpu_programs[0], - &may_modify_variables); - tpu_program_group->set_may_modify_variables( - std::vector(1, may_modify_variables)); + std::vector may_modify_variables_array(xla_tpu_programs.size(), false); + std::vector executable_infos(xla_tpu_programs.size()); + std::vector host_transfer_infos( + xla_tpu_programs.size()); + std::vector hlo_metadatas(xla_tpu_programs.size()); + for (size_t i = 0; i < xla_tpu_programs.size(); ++i) { + const XLA_TpuProgram* xla_tpu_program = xla_tpu_programs[i]; + bool may_modify_variables; + TpuProgramApiFn()->TpuProgram_GetMayModifyVariablesFn( + xla_tpu_program, &may_modify_variables); + may_modify_variables_array[i] = may_modify_variables; - TpuSerializedProto serialized_executable_info; - TpuProgramApiFn()->TpuProgram_GetExecutableInfoFn( - xla_tpu_programs[0], &serialized_executable_info); - TPUExecutableInfoProto executable_info = - se_tpu::DeserializeProto( - serialized_executable_info); - tpu_program_group->set_executable_info(executable_info); - StreamExecutor_Tpu_FreeSerializedProto(&serialized_executable_info); + TpuSerializedProto serialized_executable_info; + TpuProgramApiFn()->TpuProgram_GetExecutableInfoFn( + xla_tpu_program, &serialized_executable_info); + TPUExecutableInfoProto executable_info = + se_tpu::DeserializeProto( + serialized_executable_info); + executable_infos[i] = executable_info; + StreamExecutor_Tpu_FreeSerializedProto(&serialized_executable_info); - TPUHostTransferInfoProto host_transfer_info; - TpuSerializedProto serialized_host_transfer_info; - TpuProgramApiFn()->TpuProgram_GetHostTransferInfoFn( - xla_tpu_programs[0], &serialized_host_transfer_info); - if (serialized_host_transfer_info.size > 0) { - host_transfer_info = se_tpu::DeserializeProto( - serialized_host_transfer_info); - StreamExecutor_Tpu_FreeSerializedProto(&serialized_host_transfer_info); + TPUHostTransferInfoProto host_transfer_info; + TpuSerializedProto serialized_host_transfer_info; + TpuProgramApiFn()->TpuProgram_GetHostTransferInfoFn( + xla_tpu_program, &serialized_host_transfer_info); + if (serialized_host_transfer_info.size > 0) { + host_transfer_info = se_tpu::DeserializeProto( + serialized_host_transfer_info); + StreamExecutor_Tpu_FreeSerializedProto(&serialized_host_transfer_info); + } + host_transfer_infos[i] = host_transfer_info; + + TpuSerializedProto serialized_hlo_metadata; + TpuProgramApiFn()->TpuProgram_GetHloMetadataFn(xla_tpu_program, + &serialized_hlo_metadata); + xla::HloProto hlo_metadata = + se_tpu::DeserializeProto(serialized_hlo_metadata); + hlo_metadatas[i] = hlo_metadata; + StreamExecutor_Tpu_FreeSerializedProto(&serialized_hlo_metadata); } - tpu_program_group->set_host_transfer_info(host_transfer_info); - TpuSerializedProto serialized_hlo_metadata; - TpuProgramApiFn()->TpuProgram_GetHloMetadataFn(xla_tpu_programs[0], - &serialized_hlo_metadata); - xla::HloProto hlo_metadata = - se_tpu::DeserializeProto(serialized_hlo_metadata); - tpu_program_group->set_hlo_metadata(hlo_metadata); - StreamExecutor_Tpu_FreeSerializedProto(&serialized_hlo_metadata); - - return Status::OK(); + may_modify_variables_ = may_modify_variables_array; + executable_infos_ = executable_infos; + host_transfer_infos_ = host_transfer_infos; + hlo_metadatas_ = hlo_metadatas; + RefreshHloMetadatasPtrs(); } -} // namespace +bool TpuProgramGroup::has_sharding_program() const { + for (const XLA_TpuProgram* tpu_program : tpu_programs_) { + if (!TpuProgramApiFn()->TpuProgram_HasShardingFn(tpu_program)) { + return false; + } + } + return true; +} + +size_t TpuProgramGroup::program_count() const { return tpu_programs_.size(); } int64_t TpuProgramGroup::program_size() const { int64_t total_size = 0; @@ -201,12 +218,6 @@ void TpuProgramGroup::UnloadAndDestroyPrograms() { TF_RET_CHECK(per_core_output_shapes.size() == per_core_variable_indices.size()); - // TODO(henrytan): add an interface to TpuProgramGroupInterface to set - // may_modify_variables. - TpuProgramGroup* tpu_program_group = - tensorflow::down_cast(tpu_program_group_interface); - tpu_program_group->may_modify_variables_ = may_modify_variables; - // With shardable input/output pairs, XLA could generate separate // sharding/unsharding programs along with the main program. The // sharding/unsharding programs will be in nested entries of the AOT @@ -221,17 +232,20 @@ void TpuProgramGroup::UnloadAndDestroyPrograms() { TF_RET_CHECK(xla_tpu_programs.size() == 1 || xla_tpu_programs.size() == metadata.num_cores_per_replica()); - TF_RETURN_IF_ERROR( - CreateTpuProgramGroup(xla_tpu_programs, tpu_program_group)); + // TODO(henrytan): add an interface to TpuProgramGroupInterface to set + // may_modify_variables. + TpuProgramGroup* tpu_program_group = + tensorflow::down_cast(tpu_program_group_interface); + tpu_program_group->Initialize(xla_tpu_programs); + tpu_program_group->may_modify_variables_ = may_modify_variables; return Status::OK(); } TpuProgramGroup::TpuProgramGroup(TpuProgramGroup&& other) : may_modify_variables_(std::move(other.may_modify_variables_)), - host_compute_metadata_(std::move(other.host_compute_metadata_)), tpu_programs_(std::move(other.tpu_programs_)), - executable_info_(std::move(other.executable_info_)), - host_transfer_info_(std::move(other.host_transfer_info_)), + executable_infos_(std::move(other.executable_infos_)), + host_transfer_infos_(std::move(other.host_transfer_infos_)), hlo_metadatas_(std::move(other.hlo_metadatas_)) { RefreshHloMetadatasPtrs(); } @@ -248,6 +262,12 @@ absl::Span TpuProgramGroup::hlo_metadatas() const { return hlo_metadatas_ptrs_; } +const xla::HloProto* TpuProgramGroup::hlo_metadata(int index) const { + CHECK_GE(index, 0); + CHECK_LT(index, hlo_metadatas_ptrs_.size()); + return hlo_metadatas_ptrs_[index]; +} + void TpuProgramGroup::RefreshHloMetadatasPtrs() { hlo_metadatas_ptrs_.reserve(hlo_metadatas_.size()); for (const auto& hlo_metadata_internal_ : hlo_metadatas_) { @@ -262,6 +282,47 @@ Status TpuProgramGroup::LogCompilationStats(const TpuCompilationCacheKey& key, return Status::OK(); } +const std::vector& TpuProgramGroup::may_modify_variables() const { + return may_modify_variables_; +} + +void TpuProgramGroup::set_may_modify_variables( + const std::vector& may_modify_variables) { + may_modify_variables_ = may_modify_variables; +} + +const std::vector& TpuProgramGroup::tpu_programs() const { + return tpu_programs_; +} + +const XLA_TpuProgram* TpuProgramGroup::tpu_program(int index) const { + CHECK_GE(index, 0); + CHECK_LT(index, tpu_programs_.size()); + return tpu_programs_[index]; +} + +void TpuProgramGroup::set_tpu_programs( + absl::Span tpu_programs) { + tpu_programs_.resize(tpu_programs.size()); + for (size_t i = 0; i < tpu_programs.size(); ++i) { + tpu_programs_[i] = tpu_programs[i]; + } +} + +const TPUExecutableInfoProto& TpuProgramGroup::executable_info( + int index) const { + CHECK_GE(index, 0); + CHECK_LT(index, executable_infos_.size()); + return executable_infos_[index]; +} + +const TPUHostTransferInfoProto& TpuProgramGroup::host_transfer_info( + int index) const { + CHECK_GE(index, 0); + CHECK_LT(index, host_transfer_infos_.size()); + return host_transfer_infos_[index]; +} + /*static*/ Status TpuProgramGroup::CompileAndBuild( const TpuCompilationRequestProto& compilation_request, @@ -287,15 +348,27 @@ Status TpuProgramGroup::CompileAndBuild( TF_RET_CHECK(count == 1 || count == compilation_request.metadata().num_cores_per_replica()); - VLOG(1) << "CreateTpuProgramGroup"; - Status serialize_status = - CreateTpuProgramGroup(absl::MakeConstSpan(&xla_tpu_programs[0], count), - tpu_program_group_interface); - VLOG(1) << absl::StrCat("Run CreateTpuProgramGroup completed. StatusCode: ", - serialize_status.code()); + VLOG(1) << "Initialize TpuProgramGroup."; + TpuProgramGroup* tpu_program_group = + tensorflow::down_cast(tpu_program_group_interface); + tpu_program_group->Initialize( + absl::MakeConstSpan(&xla_tpu_programs[0], count)); TpuProgramApiFn()->TpuProgram_FreeArrayFn(xla_tpu_programs); - return serialize_status; + return status.status(); } +std::vector TpuProgramGroup::tpu_programs( + TpuProgramShardingType sharding_type) const { + std::vector tpu_programs; + tpu_programs.reserve(tpu_programs_.size()); + for (size_t i = 0; i < tpu_programs_.size(); ++i) { + if (TpuProgramApiFn()->TpuProgram_HasShardingFn(tpu_programs_[i])) { + tpu_programs.push_back(TpuProgramApiFn()->TpuProgram_GetTpuProgramFn( + tpu_programs_[i], sharding_type)); + CHECK_NE(tpu_programs[i], nullptr); + } + } + return tpu_programs; +} } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_program_group.h b/tensorflow/core/tpu/kernels/tpu_program_group.h index 4bc8cdd003a..b76ef3d507a 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_group.h +++ b/tensorflow/core/tpu/kernels/tpu_program_group.h @@ -102,11 +102,16 @@ class TpuProgramGroup : public TpuProgramGroupInterface { const absl::optional& xla_device_assignment, TpuProgramGroupInterface* tpu_program_group_interface); + // Initializes `TpuProgramGroup` object with `xla_tpu_programs`. + void Initialize(absl::Span xla_tpu_programs); + TpuProgramGroup() = default; TpuProgramGroup(TpuProgramGroup&& other); TpuProgramGroup& operator=(TpuProgramGroup&&) = delete; - size_t program_count() const override { return tpu_programs_.size(); } + bool has_sharding_program() const override; + + size_t program_count() const override; int64_t program_size() const override; @@ -117,58 +122,29 @@ class TpuProgramGroup : public TpuProgramGroupInterface { Status LogCompilationStats(const TpuCompilationCacheKey& key, absl::Duration duration) override; - const std::vector& may_modify_variables() const override { - return may_modify_variables_; - } - void set_may_modify_variables(const std::vector& may_modify_variables) { - may_modify_variables_ = may_modify_variables; - } + const std::vector& may_modify_variables() const override; + void set_may_modify_variables(const std::vector& may_modify_variables); - const tf2xla::HostComputeMetadata& host_compute_metadata() const { - return host_compute_metadata_; - } - void set_host_compute_metadata( - const tf2xla::HostComputeMetadata& host_compute_metadata) { - host_compute_metadata_ = host_compute_metadata; - } + const std::vector& tpu_programs() const; + std::vector tpu_programs(TpuProgramShardingType type) const; + const XLA_TpuProgram* tpu_program(int index) const; + void set_tpu_programs(absl::Span tpu_programs); - const std::vector& tpu_programs() const { - return tpu_programs_; - } - void set_tpu_programs(absl::Span tpu_programs) { - tpu_programs_.resize(tpu_programs.size()); - for (size_t i = 0; i < tpu_programs.size(); ++i) { - tpu_programs_[i] = tpu_programs[i]; - } - } - - const TPUExecutableInfoProto& executable_info() const { - return executable_info_; - } - void set_executable_info(const TPUExecutableInfoProto& executable_info) { - executable_info_ = executable_info; - } - - const TPUHostTransferInfoProto& host_transfer_info() const { - return host_transfer_info_; - } - void set_host_transfer_info( - const TPUHostTransferInfoProto& host_transfer_info) { - host_transfer_info_ = host_transfer_info; - } + const TPUExecutableInfoProto& executable_info(int index) const; + const TPUHostTransferInfoProto& host_transfer_info(int index) const; void set_hlo_metadata(const xla::HloProto& hlo_metadata); + const xla::HloProto* hlo_metadata(int index) const; absl::Span hlo_metadatas() const override; private: void RefreshHloMetadatasPtrs(); std::vector may_modify_variables_; - tf2xla::HostComputeMetadata host_compute_metadata_; std::vector tpu_programs_; // Not owned. - TPUExecutableInfoProto executable_info_; - TPUHostTransferInfoProto host_transfer_info_; + std::vector executable_infos_; + std::vector host_transfer_infos_; // To be consistent with the TpuProgramGroupInterface::hlo_metadatas() // signature, we store HloProto values in hlo_metadatas_ when diff --git a/tensorflow/core/tpu/kernels/tpu_program_group_interface.h b/tensorflow/core/tpu/kernels/tpu_program_group_interface.h index cb7347783b1..4af94f8e1ad 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_group_interface.h +++ b/tensorflow/core/tpu/kernels/tpu_program_group_interface.h @@ -20,6 +20,8 @@ limitations under the License. #include #include +#include "absl/time/time.h" +#include "absl/types/span.h" #include "tensorflow/compiler/tf2xla/host_compute_metadata.pb.h" #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/core/lib/core/status.h" @@ -34,13 +36,16 @@ class TpuProgramGroupInterface { public: virtual ~TpuProgramGroupInterface() = default; + // Check if whether sharding/unsharding program exists. + virtual bool has_sharding_program() const = 0; + // Computes program count. virtual size_t program_count() const = 0; // Computes total program size. virtual int64_t program_size() const = 0; - // Unloads and destroys safely Tpu programs. + // Unloads and destroys safely TPU programs. virtual void UnloadAndDestroyPrograms() = 0; // Logs program memory summary. diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index 682cc8b1c13..6914a8cd102 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -64,6 +64,8 @@ tensorflow::Status SetTpuProgramStructFn(void* library_handle) { TFTPU_SET_FN(tpu_program_fn, TpuProgram_GetHostTransferInfo); TFTPU_SET_FN(tpu_program_fn, TpuProgram_GetHloMetadata); TFTPU_SET_FN(tpu_program_fn, TpuProgram_GetMayModifyVariables); + TFTPU_SET_FN(tpu_program_fn, TpuProgram_HasSharding); + TFTPU_SET_FN(tpu_program_fn, TpuProgram_GetTpuProgram); return tensorflow::Status::OK(); } From 9d21546fea0f749648749ea6bb007a6ddff9dc7d Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Wed, 5 Aug 2020 16:06:22 -0700 Subject: [PATCH 2207/2522] Populate ForwardOperation.outputs. Tested using grad(exp(x)). PiperOrigin-RevId: 325121430 Change-Id: Ib293198698aff00f8f8d8b376a8da62d4c2a8038 --- tensorflow/c/eager/gradients.cc | 4 + tensorflow/c/eager/gradients_test.cc | 102 +++++++++++++++++- tensorflow/c/experimental/gradients/BUILD | 1 + .../c/experimental/gradients/math_grad.cc | 37 ++++++- .../c/experimental/gradients/math_grad.h | 1 + tensorflow/c/experimental/ops/BUILD | 24 +++++ tensorflow/c/experimental/ops/array_ops.cc | 1 + tensorflow/c/experimental/ops/array_ops.h | 2 +- tensorflow/c/experimental/ops/math_ops.cc | 55 ++++++++++ tensorflow/c/experimental/ops/math_ops.h | 31 ++++++ 10 files changed, 252 insertions(+), 6 deletions(-) create mode 100644 tensorflow/c/experimental/ops/math_ops.cc create mode 100644 tensorflow/c/experimental/ops/math_ops.h diff --git a/tensorflow/c/eager/gradients.cc b/tensorflow/c/eager/gradients.cc index 406da1291ae..39cadd421e2 100644 --- a/tensorflow/c/eager/gradients.cc +++ b/tensorflow/c/eager/gradients.cc @@ -363,6 +363,10 @@ Status Execute(AbstractOperation* op_, AbstractContext* ctx, input_ids[i] = ToId(forward_op_->inputs[i]); input_dtypes[i] = forward_op_->inputs[i]->DataType(); } + for (int i = 0; i < *num_retvals; i++) { + // TODO(srbs): Manage refcount of ForwardOperation's inputs/outputs. + forward_op_->outputs.push_back(retvals[i]); + } std::vector tape_tensors; for (auto t : retvals) { tape_tensors.push_back(TapeTensor(t, ctx)); diff --git a/tensorflow/c/eager/gradients_test.cc b/tensorflow/c/eager/gradients_test.cc index e02f189c3d2..41993b3e125 100644 --- a/tensorflow/c/eager/gradients_test.cc +++ b/tensorflow/c/eager/gradients_test.cc @@ -45,7 +45,9 @@ class CppGradients }; Status RegisterGradients(GradientRegistry* registry) { - return registry->Register("Add", AddRegisterer); + TF_RETURN_IF_ERROR(registry->Register("Add", AddRegisterer)); + TF_RETURN_IF_ERROR(registry->Register("Exp", ExpRegisterer)); + return Status::OK(); } // Computes `inputs[0] + inputs[1]` and records it on the tape. @@ -69,6 +71,26 @@ Status Add(AbstractContext* ctx, Tape* tape, registry); } +// Computes `exp(inputs[0])` and records it on the tape. +Status Exp(AbstractContext* ctx, Tape* tape, + absl::Span inputs, + absl::Span outputs, + const GradientRegistry& registry) { + AbstractOperationPtr exp_op(ctx->CreateOperation()); + ForwardOperation forward_op; + forward_op.ctx = ctx; + TF_RETURN_IF_ERROR( + Reset(exp_op.get(), "Exp", /*raw_device_name=*/nullptr, &forward_op)); + if (isa(exp_op.get())) { + TF_RETURN_IF_ERROR( + dyn_cast(exp_op.get())->SetOpName("my_exp")); + } + TF_RETURN_IF_ERROR(AddInput(exp_op.get(), inputs[0], &forward_op)); + int num_retvals = 1; + return Execute(exp_op.get(), ctx, outputs, &num_retvals, &forward_op, tape, + registry); +} + // Computes // y = inputs[0] + inputs[1] // return grad(y, {inputs[0], inputs[1]}) @@ -101,6 +123,35 @@ Status AddGradModel(AbstractContext* ctx, return Status::OK(); } +// Computes +// y = exp(inputs[0]) +// return grad(y, {inputs[0]}) +Status ExpGradModel(AbstractContext* ctx, + absl::Span inputs, + absl::Span outputs, + const GradientRegistry& registry) { + TapeVSpace vspace(ctx); + auto tape = new Tape(/*persistent=*/false); + tape->Watch(ToId(inputs[0])); // Watch x. + std::vector exp_outputs(1); + TF_RETURN_IF_ERROR(Exp(ctx, tape, inputs, absl::MakeSpan(exp_outputs), + registry)); // Compute x+y. + std::unordered_map + source_tensors_that_are_targets; + + std::vector out_grads; + TF_RETURN_IF_ERROR(tape->ComputeGradient( + vspace, /*target_tensor_ids=*/{ToId(exp_outputs[0])}, + /*source_tensor_ids=*/{ToId(inputs[0])}, source_tensors_that_are_targets, + /*output_gradients=*/{}, &out_grads)); + for (auto exp_output : exp_outputs) { + exp_output->Unref(); + } + outputs[0] = out_grads[0]; + delete tape; + return Status::OK(); +} + AbstractContext* BuildFunction(const char* fn_name) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); @@ -150,8 +201,9 @@ Status RunModel(Model model, AbstractContext* ctx, TF_RETURN_IF_ERROR(dyn_cast(func_ctx.get()) ->Finalize(&output_list, &func)); scoped_func.reset(func); - output_list.outputs[0]->Unref(); - output_list.outputs[1]->Unref(); + for (auto output : output_list.outputs) { + output->Unref(); + } TF_RETURN_IF_ERROR(ctx->RegisterFunction(func)); } @@ -264,6 +316,50 @@ TEST_P(CppGradients, TestAddGrad) { TF_DeleteTensor(result_tensor); } +TEST_P(CppGradients, TestExpGrad) { + std::unique_ptr status( + TF_NewStatus(), TF_DeleteStatus); + AbstractContextPtr ctx; + { + AbstractContext* ctx_raw = nullptr; + Status s = + BuildImmediateExecutionContext(std::get<1>(GetParam()), &ctx_raw); + ASSERT_EQ(errors::OK, s.code()) << s.error_message(); + ctx.reset(ctx_raw); + } + + AbstractTensorHandlePtr x; + { + AbstractTensorHandle* x_raw = nullptr; + Status s = TestScalarTensorHandle(ctx.get(), 1.0f, &x_raw); + ASSERT_EQ(errors::OK, s.code()) << s.error_message(); + x.reset(x_raw); + } + + GradientRegistry registry; + Status s = RegisterGradients(®istry); + ASSERT_EQ(errors::OK, s.code()) << s.error_message(); + + // Pseudo-code: + // + // tape.watch(x) + // y = exp(x) + // outputs = tape.gradient(y, x) + std::vector outputs(1); + s = RunModel(ExpGradModel, ctx.get(), {x.get()}, absl::MakeSpan(outputs), + /*use_function=*/!std::get<2>(GetParam()), registry); + ASSERT_EQ(errors::OK, s.code()) << s.error_message(); + + TF_Tensor* result_tensor; + s = getValue(outputs[0], &result_tensor); + ASSERT_EQ(errors::OK, s.code()) << s.error_message(); + auto result_value = static_cast(TF_TensorData(result_tensor)); + EXPECT_NEAR(*result_value, 2.718, 0.001); + outputs[0]->Unref(); + TF_DeleteTensor(result_tensor); + result_tensor = nullptr; +} + // TODO(b/160888630): Enable this test with mlir after AddInputList is // supported. It is needed for AddN op which is used for gradient aggregation. #ifdef PLATFORM_GOOGLE diff --git a/tensorflow/c/experimental/gradients/BUILD b/tensorflow/c/experimental/gradients/BUILD index e3acdf7e2c3..80c4e8d9791 100644 --- a/tensorflow/c/experimental/gradients/BUILD +++ b/tensorflow/c/experimental/gradients/BUILD @@ -18,6 +18,7 @@ cc_library( "//tensorflow/c/eager:c_api_unified_internal", "//tensorflow/c/eager:gradients", "//tensorflow/c/experimental/ops:array_ops", + "//tensorflow/c/experimental/ops:math_ops", "//tensorflow/core/lib/llvm_rtti", ], ) diff --git a/tensorflow/c/experimental/gradients/math_grad.cc b/tensorflow/c/experimental/gradients/math_grad.cc index 47bd8cce23d..d8b70848d4e 100644 --- a/tensorflow/c/experimental/gradients/math_grad.cc +++ b/tensorflow/c/experimental/gradients/math_grad.cc @@ -14,9 +14,14 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/c/experimental/gradients/math_grad.h" +#include "tensorflow/c/eager/abstract_tensor_handle.h" #include "tensorflow/c/experimental/ops/array_ops.h" +#include "tensorflow/c/experimental/ops/math_ops.h" +using std::vector; +using tensorflow::ops::Conj; using tensorflow::ops::Identity; +using tensorflow::ops::Mul; namespace tensorflow { namespace gradients { @@ -26,9 +31,9 @@ class AddGradientFunction : public GradientFunction { public: Status Compute(Context* ctx, absl::Span grad_inputs, - std::vector* grad_outputs) override { + vector* grad_outputs) override { grad_outputs->resize(2); - std::vector identity_outputs(1); + vector identity_outputs(1); // TODO(b/145674566): Handle name unification in tracing code. // TODO(b/161805092): Support broadcasting. TF_RETURN_IF_ERROR(ops::Identity(ctx->ctx, {grad_inputs[0]}, @@ -44,10 +49,38 @@ class AddGradientFunction : public GradientFunction { ~AddGradientFunction() override {} }; +class ExpGradientFunction : public GradientFunction { + public: + explicit ExpGradientFunction(AbstractTensorHandle* exp) : exp_(exp) { + exp->Ref(); + } + Status Compute(Context* ctx, + absl::Span grad_inputs, + vector* grad_outputs) override { + vector conj_outputs(1); + TF_RETURN_IF_ERROR( + Conj(ctx->ctx, {exp_.get()}, absl::MakeSpan(conj_outputs), "ExpConj")); + AbstractTensorHandlePtr conj_output_releaser(conj_outputs[0]); + grad_outputs->resize(1); + TF_RETURN_IF_ERROR(Mul(ctx->ctx, {conj_outputs[0], grad_inputs[0]}, + absl::MakeSpan(*grad_outputs), "ExpGradMul")); + return Status::OK(); + } + ~ExpGradientFunction() override {} + + private: + AbstractTensorHandlePtr exp_; +}; + } // namespace GradientFunction* AddRegisterer(const ForwardOperation& op) { return new AddGradientFunction; } + +GradientFunction* ExpRegisterer(const ForwardOperation& op) { + return new ExpGradientFunction(op.outputs[0]); +} + } // namespace gradients } // namespace tensorflow diff --git a/tensorflow/c/experimental/gradients/math_grad.h b/tensorflow/c/experimental/gradients/math_grad.h index 473253f9b27..6c7242a1a49 100644 --- a/tensorflow/c/experimental/gradients/math_grad.h +++ b/tensorflow/c/experimental/gradients/math_grad.h @@ -20,6 +20,7 @@ limitations under the License. namespace tensorflow { namespace gradients { GradientFunction* AddRegisterer(const ForwardOperation& op); +GradientFunction* ExpRegisterer(const ForwardOperation& op); } // namespace gradients } // namespace tensorflow diff --git a/tensorflow/c/experimental/ops/BUILD b/tensorflow/c/experimental/ops/BUILD index 312709f4332..d13d7a72d3e 100644 --- a/tensorflow/c/experimental/ops/BUILD +++ b/tensorflow/c/experimental/ops/BUILD @@ -15,6 +15,7 @@ cc_library( "//tensorflow:internal", ], deps = [ + "//tensorflow/c/eager:abstract_context", "//tensorflow/c/eager:abstract_operation", "//tensorflow/c/eager:abstract_tensor_handle", "//tensorflow/c/eager:c_api_unified_internal", @@ -22,3 +23,26 @@ cc_library( "//tensorflow/core/platform:errors", ], ) + +cc_library( + name = "math_ops", + srcs = [ + "math_ops.cc", + ], + hdrs = [ + "math_ops.h", + ], + visibility = [ + "//tensorflow:internal", + ], + deps = [ + ":array_ops", + "//tensorflow/c/eager:abstract_context", + "//tensorflow/c/eager:abstract_operation", + "//tensorflow/c/eager:abstract_tensor_handle", + "//tensorflow/c/eager:c_api_unified_internal", + "//tensorflow/core:framework_headers_lib", + "//tensorflow/core/lib/llvm_rtti", + "//tensorflow/core/platform:errors", + ], +) diff --git a/tensorflow/c/experimental/ops/array_ops.cc b/tensorflow/c/experimental/ops/array_ops.cc index e38b00088cf..ab2d114d9d9 100644 --- a/tensorflow/c/experimental/ops/array_ops.cc +++ b/tensorflow/c/experimental/ops/array_ops.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/c/experimental/ops/array_ops.h" +#include "tensorflow/c/eager/c_api_unified_experimental_internal.h" #include "tensorflow/core/platform/errors.h" namespace tensorflow { diff --git a/tensorflow/c/experimental/ops/array_ops.h b/tensorflow/c/experimental/ops/array_ops.h index 8a9db484c2e..226461fd286 100644 --- a/tensorflow/c/experimental/ops/array_ops.h +++ b/tensorflow/c/experimental/ops/array_ops.h @@ -15,9 +15,9 @@ limitations under the License. #ifndef TENSORFLOW_C_EXPERIMENTAL_OPS_ARRAY_OPS_H_ #define TENSORFLOW_C_EXPERIMENTAL_OPS_ARRAY_OPS_H_ +#include "tensorflow/c/eager/abstract_context.h" #include "tensorflow/c/eager/abstract_operation.h" #include "tensorflow/c/eager/abstract_tensor_handle.h" -#include "tensorflow/c/eager/c_api_unified_experimental_internal.h" #include "tensorflow/core/lib/llvm_rtti/llvm_rtti.h" namespace tensorflow { diff --git a/tensorflow/c/experimental/ops/math_ops.cc b/tensorflow/c/experimental/ops/math_ops.cc new file mode 100644 index 00000000000..e91acbd6370 --- /dev/null +++ b/tensorflow/c/experimental/ops/math_ops.cc @@ -0,0 +1,55 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/c/experimental/ops/math_ops.h" + +#include "tensorflow/c/eager/abstract_context.h" +#include "tensorflow/c/eager/abstract_tensor_handle.h" +#include "tensorflow/c/eager/c_api_unified_experimental_internal.h" +#include "tensorflow/c/experimental/ops/array_ops.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/platform/errors.h" +namespace tensorflow { +namespace ops { +using tensorflow::tracing::TracingOperation; + +Status Mul(AbstractContext* ctx, absl::Span inputs, + absl::Span outputs, const char* name) { + AbstractOperationPtr mul_op(ctx->CreateOperation()); + TF_RETURN_IF_ERROR(mul_op->Reset("Mul", /*raw_device_name=*/nullptr)); + if (isa(mul_op.get())) { + TF_RETURN_IF_ERROR( + dyn_cast(mul_op.get())->SetOpName(name)); + } + TF_RETURN_IF_ERROR(mul_op->AddInput(inputs[0])); + TF_RETURN_IF_ERROR(mul_op->AddInput(inputs[1])); + int num_retvals = 1; + return mul_op->Execute(outputs, &num_retvals); +} + +Status Conj(AbstractContext* ctx, + absl::Span inputs, + absl::Span outputs, const char* name) { + auto dtype = inputs[0]->DataType(); + if (DataTypeIsFloating(BaseType(dtype)) || + DataTypeIsInteger(BaseType(dtype))) { + TF_RETURN_IF_ERROR(Identity(ctx, inputs, outputs, name)); + } else { + return errors::Unimplemented("Conj does not support complex types yet."); + } + return Status::OK(); +} + +} // namespace ops +} // namespace tensorflow diff --git a/tensorflow/c/experimental/ops/math_ops.h b/tensorflow/c/experimental/ops/math_ops.h new file mode 100644 index 00000000000..4d7c3d838ce --- /dev/null +++ b/tensorflow/c/experimental/ops/math_ops.h @@ -0,0 +1,31 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_C_EXPERIMENTAL_OPS_MATH_OPS_H_ +#define TENSORFLOW_C_EXPERIMENTAL_OPS_MATH_OPS_H_ + +#include "tensorflow/c/eager/abstract_context.h" +#include "tensorflow/c/eager/abstract_tensor_handle.h" + +namespace tensorflow { +namespace ops { +Status Mul(AbstractContext* ctx, absl::Span inputs, + absl::Span outputs, const char* name); +Status Conj(AbstractContext* ctx, + absl::Span inputs, + absl::Span outputs, const char* name); +} // namespace ops +} // namespace tensorflow + +#endif // TENSORFLOW_C_EXPERIMENTAL_OPS_MATH_OPS_H_ From 6c9ba7128e5822921ae7c3bca80a8005dd0aa508 Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Wed, 5 Aug 2020 16:07:09 -0700 Subject: [PATCH 2208/2522] Recovering some of the xtensa cycle regression. PiperOrigin-RevId: 325121569 Change-Id: I3bee9c46f6d0622781b3fe684a790fab061d6699 --- tensorflow/lite/micro/kernels/BUILD | 1 + tensorflow/lite/micro/kernels/kernel_util.cc | 31 +-------------- tensorflow/lite/micro/kernels/kernel_util.h | 40 +++++++++++++++----- 3 files changed, 34 insertions(+), 38 deletions(-) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index d88bf91688c..dcf2337aa24 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -520,6 +520,7 @@ cc_library( hdrs = ["kernel_util.h"], deps = [ "//tensorflow/lite/c:common", + "//tensorflow/lite/kernels/internal:compatibility", "//tensorflow/lite/kernels/internal:types", ], ) diff --git a/tensorflow/lite/micro/kernels/kernel_util.cc b/tensorflow/lite/micro/kernels/kernel_util.cc index 860887add69..1ddfc1d3a29 100644 --- a/tensorflow/lite/micro/kernels/kernel_util.cc +++ b/tensorflow/lite/micro/kernels/kernel_util.cc @@ -15,38 +15,11 @@ limitations under the License. #include "tensorflow/lite/micro/kernels/kernel_util.h" +#include "tensorflow/lite/c/common.h" + namespace tflite { namespace micro { -const TfLiteEvalTensor* GetEvalInput(const TfLiteContext* context, - const TfLiteNode* node, int index) { - return GetMutableEvalInput(context, node, index); -} - -TfLiteEvalTensor* GetMutableEvalInput(const TfLiteContext* context, - const TfLiteNode* node, int index) { - TFLITE_DCHECK(context != nullptr); - TFLITE_DCHECK(node != nullptr); - return context->GetEvalTensor(context, node->inputs->data[index]); -} - -TfLiteEvalTensor* GetEvalOutput(const TfLiteContext* context, - const TfLiteNode* node, int index) { - TFLITE_DCHECK(context != nullptr); - TFLITE_DCHECK(node != nullptr); - return context->GetEvalTensor(context, node->outputs->data[index]); -} - -const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor) { - if (tensor == nullptr) { - return RuntimeShape(); - } - TfLiteIntArray* dims = tensor->dims; - const int dims_size = dims->size; - const int32_t* dims_data = reinterpret_cast(dims->data); - return RuntimeShape(dims_size, dims_data); -} - bool HaveSameShapes(const TfLiteEvalTensor* input1, const TfLiteEvalTensor* input2) { TFLITE_DCHECK(input1 != nullptr); diff --git a/tensorflow/lite/micro/kernels/kernel_util.h b/tensorflow/lite/micro/kernels/kernel_util.h index fe702dceee0..530e52df5f5 100644 --- a/tensorflow/lite/micro/kernels/kernel_util.h +++ b/tensorflow/lite/micro/kernels/kernel_util.h @@ -16,24 +16,38 @@ limitations under the License. #ifndef TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_ #define TENSORFLOW_LITE_MICRO_KERNELS_KERNEL_UTIL_H_ +#include + #include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/compatibility.h" #include "tensorflow/lite/kernels/internal/types.h" namespace tflite { namespace micro { -// Returns the TfLiteEvalTensor struct for a given input index in a node. -const TfLiteEvalTensor* GetEvalInput(const TfLiteContext* context, - const TfLiteNode* node, int index); - // Returns a mutable tensor for a given input index. is_variable must be checked // during prepare when the full TfLiteTensor is available. -TfLiteEvalTensor* GetMutableEvalInput(const TfLiteContext* context, - const TfLiteNode* node, int index); +inline TfLiteEvalTensor* GetMutableEvalInput(const TfLiteContext* context, + const TfLiteNode* node, + int index) { + TFLITE_DCHECK(context != nullptr); + TFLITE_DCHECK(node != nullptr); + return context->GetEvalTensor(context, node->inputs->data[index]); +} + +// Returns the TfLiteEvalTensor struct for a given input index in a node. +inline const TfLiteEvalTensor* GetEvalInput(const TfLiteContext* context, + const TfLiteNode* node, int index) { + return GetMutableEvalInput(context, node, index); +} // Returns the TfLiteEvalTensor struct for a given output index in a node. -TfLiteEvalTensor* GetEvalOutput(const TfLiteContext* context, - const TfLiteNode* node, int index); +inline TfLiteEvalTensor* GetEvalOutput(const TfLiteContext* context, + const TfLiteNode* node, int index) { + TFLITE_DCHECK(context != nullptr); + TFLITE_DCHECK(node != nullptr); + return context->GetEvalTensor(context, node->outputs->data[index]); +} // Returns data for a TfLiteEvalTensor struct. template @@ -49,7 +63,15 @@ const T* GetTensorData(const TfLiteEvalTensor* tensor) { } // Returns the shape of a TfLiteEvalTensor struct. -const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor); +inline const RuntimeShape GetTensorShape(const TfLiteEvalTensor* tensor) { + if (tensor == nullptr) { + return RuntimeShape(); + } + TfLiteIntArray* dims = tensor->dims; + const int dims_size = dims->size; + const int32_t* dims_data = reinterpret_cast(dims->data); + return RuntimeShape(dims_size, dims_data); +} // Return true if the given tensors have the same shape. bool HaveSameShapes(const TfLiteEvalTensor* input1, From c9f925ac90524d1767fa7d78793b2f6a283cf3fc Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Wed, 5 Aug 2020 16:16:28 -0700 Subject: [PATCH 2209/2522] [XLA] Define partial replication in HloSharding For partial replication partial tiling, add a trailing dimension to tile_assignment, representing the subgroups of replication. PiperOrigin-RevId: 325123131 Change-Id: I0e86b492e286d66d329dd9719faa0d47027f89b1 --- tensorflow/compiler/xla/service/hlo_lexer.cc | 3 ++ tensorflow/compiler/xla/service/hlo_lexer.h | 1 + tensorflow/compiler/xla/service/hlo_parser.cc | 6 +++ .../compiler/xla/service/hlo_parser_test.cc | 15 +++++++ .../compiler/xla/service/hlo_sharding.cc | 43 ++++++++++++++++--- .../compiler/xla/service/hlo_sharding.h | 39 ++++++++++++++--- tensorflow/compiler/xla/xla_data.proto | 5 +++ 7 files changed, 101 insertions(+), 11 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_lexer.cc b/tensorflow/compiler/xla/service/hlo_lexer.cc index 5502665e886..749193a83ef 100644 --- a/tensorflow/compiler/xla/service/hlo_lexer.cc +++ b/tensorflow/compiler/xla/service/hlo_lexer.cc @@ -281,6 +281,7 @@ TokKind HloLexer::LexIdentifier() { KEYWORD(ROOT); KEYWORD(maximal); KEYWORD(replicated); + KEYWORD(last_tile_dim_replicate); #undef KEYWORD @@ -495,6 +496,8 @@ string TokKindToString(TokKind kind) { return "kw_maximal"; case TokKind::kw_replicated: return "kw_replicated"; + case TokKind::kw_last_tile_dim_replicate: + return "kw_last_tile_dim_replicate"; case TokKind::kw_nan: return "kw_nan"; case TokKind::kw_inf: diff --git a/tensorflow/compiler/xla/service/hlo_lexer.h b/tensorflow/compiler/xla/service/hlo_lexer.h index 6a59f180ad8..b8c7debaab4 100644 --- a/tensorflow/compiler/xla/service/hlo_lexer.h +++ b/tensorflow/compiler/xla/service/hlo_lexer.h @@ -61,6 +61,7 @@ enum class TokKind { kw_false, kw_maximal, kw_replicated, + kw_last_tile_dim_replicate, kw_nan, kw_inf, diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index 0530062c43b..b12779e65ce 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -2129,6 +2129,7 @@ bool HloParserImpl::ParseSingleSharding(OpSharding* sharding, LocTy loc = lexer_.GetLoc(); bool maximal = false; bool replicated = false; + bool last_tile_dim_replicate = false; std::vector devices; std::vector tile_assignment_dimensions; while (lexer_.GetKind() != TokKind::kRbrace) { @@ -2180,6 +2181,10 @@ bool HloParserImpl::ParseSingleSharding(OpSharding* sharding, } break; } + case TokKind::kw_last_tile_dim_replicate: + last_tile_dim_replicate = true; + lexer_.Lex(); + break; case TokKind::kRbrace: break; default: @@ -2218,6 +2223,7 @@ bool HloParserImpl::ParseSingleSharding(OpSharding* sharding, for (int64 device : devices) { sharding->add_tile_assignment_devices(device); } + sharding->set_replicate_on_last_tile_dim(last_tile_dim_replicate); } lexer_.Lex(); diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc index 484578e5e0e..1b33cf2f4c3 100644 --- a/tensorflow/compiler/xla/service/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc @@ -2626,6 +2626,21 @@ TEST_F(HloParserTest, ParseSharding) { EXPECT_EQ(sharding.ToString(), original); } +TEST_F(HloParserTest, ParseShardingPartialReplication) { + const string original = "{devices=[2,2]0,1,2,3 last_tile_dim_replicate}"; + TF_ASSERT_OK_AND_ASSIGN(HloSharding sharding, ParseSharding(original)); + EXPECT_EQ(sharding.ToString(), original); + Array group_tiling({2}); + group_tiling(0) = 0; + group_tiling(1) = 1; + std::vector group0_members({0, 1}); + std::vector group1_members({2, 3}); + EXPECT_EQ( + HloSharding::PartialTile(group_tiling, {group0_members, group1_members}) + .ToString(), + original); +} + TEST_F(HloParserTest, ParseFrontendAttributes) { const string original = R"({attr_a="test_a",attr_b="b",attr_c="s64",attr_d="a/b"})"; diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc index b0a03707efb..d522fc8bd14 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding.cc @@ -39,6 +39,22 @@ HloSharding HloSharding::Tile1D(const Shape& input_shape, int64 num_tiles) { return HloSharding(assignment); } +HloSharding HloSharding::PartialTile( + const Array& group_tile_assignment, + absl::Span> replication_groups) { + auto new_tile_dims = group_tile_assignment.dimensions(); + new_tile_dims.push_back(replication_groups[0].size()); + auto new_tile_assignment = Array(new_tile_dims); + new_tile_assignment.Each([&](absl::Span indices, int64* device) { + std::vector group_index(indices.begin(), indices.end()); + group_index.pop_back(); + int64 group = group_tile_assignment(group_index); + *device = replication_groups[group][indices.back()]; + }); + return HloSharding(new_tile_assignment, + /*replicate_on_last_tile_dim=*/true); +} + HloSharding HloSharding::Tuple(const ShapeTree& sub_shardings) { std::vector flattened_list; flattened_list.reserve(sub_shardings.leaf_count()); @@ -101,8 +117,10 @@ string HloSharding::ToString() const { return StrCat( "{maximal device=", static_cast(*tile_assignment_.begin()), "}"); } - return StrCat("{devices=[", StrJoin(tile_assignment_.dimensions(), ","), "]", - StrJoin(tile_assignment_, ","), "}"); + return StrCat( + "{devices=[", StrJoin(tile_assignment_.dimensions(), ","), "]", + StrJoin(tile_assignment_, ","), + replicate_on_last_tile_dim_ ? " last_tile_dim_replicate}" : "}"); } bool HloSharding::UsesDevice(int64 device) const { @@ -148,6 +166,9 @@ std::vector HloSharding::TileIndexForDevice(int64 device) const { } }); CHECK(!ret_index.empty()); + if (replicate_on_last_tile_dim_) { + ret_index.pop_back(); + } return ret_index; } @@ -157,6 +178,12 @@ int64 HloSharding::DeviceForTileIndex(absl::Span index) const { if (maximal_) { return *tile_assignment_.begin(); } + if (replicate_on_last_tile_dim_ && + index.size() < tile_assignment().num_dimensions()) { + std::vector first_replicated_index(index.begin(), index.end()); + first_replicated_index.push_back(0); + return tile_assignment_(first_replicated_index); + } return tile_assignment_(index); } @@ -341,8 +368,10 @@ Status HloSharding::ValidateNonTuple(const Shape& shape, return Status::OK(); } - // The tile assignment tensor must have the same rank as the input. - if (shape.rank() != tile_assignment_.num_dimensions()) { + // The tile assignment tensor must have the same rank as the input, or input + // rank + 1 for replicate_on_last_tile_dim_. + if (shape.rank() + (replicate_on_last_tile_dim_ ? 1 : 0) != + tile_assignment_.num_dimensions()) { return tensorflow::errors::InvalidArgument( "Number of tile assignment dimensions is different to the input rank. " "sharding=", @@ -403,7 +432,7 @@ Status HloSharding::ValidateNonTuple(const Shape& shape, proto.tile_assignment_dimensions().end())); std::copy(proto.tile_assignment_devices().begin(), proto.tile_assignment_devices().end(), tile_assignment.begin()); - return HloSharding(tile_assignment); + return HloSharding(tile_assignment, proto.replicate_on_last_tile_dim()); } OpSharding HloSharding::ToProto() const { @@ -429,6 +458,7 @@ OpSharding HloSharding::ToProto() const { result.set_type(OpSharding::MAXIMAL); } else { result.set_type(OpSharding::OTHER); + result.set_replicate_on_last_tile_dim(ReplicateOnLastTileDim()); } return result; } @@ -516,6 +546,9 @@ size_t HloSharding::Hash() const { for (uint32 v : tile_assignment_) { h = tensorflow::Hash64Combine(h, std::hash{}(v)); } + if (replicate_on_last_tile_dim_) { + h = tensorflow::Hash64Combine(h, std::hash{}(1)); + } return h; } diff --git a/tensorflow/compiler/xla/service/hlo_sharding.h b/tensorflow/compiler/xla/service/hlo_sharding.h index 20fa7232e65..af28df56e68 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.h +++ b/tensorflow/compiler/xla/service/hlo_sharding.h @@ -54,6 +54,13 @@ class HloSharding { return HloSharding(tile_assignment); } + // Creates a new sharding where data is replicated within each replication + // group, and sharded across replication groups according to + // group_tile_assignment. + static HloSharding PartialTile( + const Array& group_tile_assignment, + absl::Span> replication_groups); + // Creates a new sharding which splits a one-dimensional input shape into // `num_tiles` tiles. static HloSharding Tile1D(const Shape& input_shape, int64 num_tiles); @@ -115,6 +122,11 @@ class HloSharding { }); } + // Returns if the sharding has partial replication and partial sharding. If + // true, data is sharded according to other dimensions of tile_assignment(), + // but replicated across devices along the last dimension. + bool ReplicateOnLastTileDim() const { return replicate_on_last_tile_dim_; } + // Returns true if the sharding defines an operation on the given device. bool UsesDevice(int64 device) const; @@ -132,6 +144,10 @@ class HloSharding { // Returns the device that should execute the given tile. // It is an error to call this if is_replicated() is true. + // When ReplicateOnLastTileDim() == true, if index.size() == data rank, it + // returns the first device in that replicated subgroup; otherwise, + // index.size() should be the same as tile_assignment()'s rank and specifies + // the member of the replication subgroup. // REQUIRES: !IsTuple() int64 DeviceForTileIndex(absl::Span index) const; @@ -188,7 +204,8 @@ class HloSharding { bool operator==(const HloSharding& other) const { return replicated_ == other.replicated_ && maximal_ == other.maximal_ && tile_assignment_ == other.tile_assignment_ && - tuple_elements_ == other.tuple_elements_; + tuple_elements_ == other.tuple_elements_ && + replicate_on_last_tile_dim_ == other.replicate_on_last_tile_dim_; } bool operator!=(const HloSharding& other) const { return !(*this == other); } @@ -225,7 +242,8 @@ class HloSharding { : replicated_(true), maximal_(true), tuple_(false), - tile_assignment_({0}) {} + tile_assignment_({0}), + replicate_on_last_tile_dim_(false) {} // device_id values: // -2: magic number to mean unassigned device, used by spatial partitioning // -1: the id of the host @@ -236,18 +254,22 @@ class HloSharding { : replicated_(false), maximal_(true), tuple_(false), - tile_assignment_({1}, device_id) {} - explicit HloSharding(const Array& tile_assignment) + tile_assignment_({1}, device_id), + replicate_on_last_tile_dim_(false) {} + explicit HloSharding(const Array& tile_assignment, + bool replicate_on_last_tile_dim = false) : replicated_(false), maximal_(false), tuple_(false), - tile_assignment_(tile_assignment) {} + tile_assignment_(tile_assignment), + replicate_on_last_tile_dim_(replicate_on_last_tile_dim) {} explicit HloSharding(const std::vector& tuple_shardings) : replicated_(false), maximal_(false), tuple_(true), tile_assignment_({0}), - tuple_elements_(tuple_shardings) {} + tuple_elements_(tuple_shardings), + replicate_on_last_tile_dim_(false) {} // Checks that the number of elements in tuple_elements_ is consistent with // the tuple shape passes as argument. @@ -283,6 +305,11 @@ class HloSharding { // present for the root. This is a flattened list of all the leaf shardings in // a tuple shape, by pre-order walk (ShapeTree iterator order). std::vector tuple_elements_; + // This flag is to support partial replication and partial sharding. If it is + // true, tile_assignment_ will have an extra dimension in addition to the data + // shape rank, and the added last dimension represents the subgroups of + // replications, i.e., elements in slice [..., :] will be replicated. + bool replicate_on_last_tile_dim_; }; std::ostream& operator<<(std::ostream& out, const HloSharding& sharding); diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index e8b6105d3fe..d334f879c3e 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -627,6 +627,11 @@ message OpSharding { // applied, this is inferred from the instruction this sharding gets attached // to. repeated OpSharding tuple_shardings = 5; + + // Only used for OTHER type. If true, data is sharded according to other + // dimensions of tile_assignment(), but replicated across devices along the + // last dimension. (Experimental) + bool replicate_on_last_tile_dim = 6; } // Describes the replica groups in a cross replica op (e.g., all-reduce and From 7e744a368165c0e1b84cf8d6bef5ed3c1bab8bbf Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Wed, 5 Aug 2020 16:24:33 -0700 Subject: [PATCH 2210/2522] Truncate to seconds without using timespec= timespec= isn't compatible with Python 3.5. PiperOrigin-RevId: 325124472 Change-Id: Id0a88eb60d0483d3f29e1c0e64677a637ec5a2b4 --- tensorflow/tools/ci_build/sizetrack_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/sizetrack_helper.py b/tensorflow/tools/ci_build/sizetrack_helper.py index 032dbdf7490..6b4110e265f 100755 --- a/tensorflow/tools/ci_build/sizetrack_helper.py +++ b/tensorflow/tools/ci_build/sizetrack_helper.py @@ -279,7 +279,7 @@ def get_upload_path(): if FLAGS.upload and FLAGS.artifact: artifact_filename = os.path.basename(FLAGS.artifact.name) ts = datetime.datetime.now( - datetime.timezone.utc).isoformat(timespec="seconds") + datetime.timezone.utc).replace(microsecond=0).isoformat() # note: not os.path.join here, because gsutil is always linux-style # Using a timestamp prevents duplicate entries path = "{bucket}/{team}/{artifact_id}/{now}.{artifact_filename}".format( From 9a4ffa19f23829e55a50194c455b3c7a092b2040 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 5 Aug 2020 16:29:14 -0700 Subject: [PATCH 2211/2522] Integrate LLVM at llvm/llvm-project@acb66b9111ba Updates LLVM usage to match [acb66b9111ba](https://github.com/llvm/llvm-project/commit/acb66b9111ba) PiperOrigin-RevId: 325125249 Change-Id: I149d91bf019a0f39a71a1309fe4df4d1c16ba079 --- tensorflow/workspace.bzl | 4 ++-- third_party/mlir/BUILD | 34 ++++++++++++++++++++++++++++++++-- third_party/mlir/test.BUILD | 14 -------------- 3 files changed, 34 insertions(+), 18 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 251f64ce100..15559f991d3 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -712,8 +712,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "c558c22cab9a555d2e521102b775759381e9727f" - LLVM_SHA256 = "b3651e78f4f3b372273c71cb58e0d0767b61e7d9c93b79fd399065c1148089f5" + LLVM_COMMIT = "acb66b9111ba793509b5468a58107108317b7cf5" + LLVM_SHA256 = "0a8053f9b75d796b475b038502d80674c02fe89d02eb14da00bd9ec8f39e6c49" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD index 3941375bc02..8c43ebe6359 100644 --- a/third_party/mlir/BUILD +++ b/third_party/mlir/BUILD @@ -123,6 +123,38 @@ cc_library( ], ) +cc_library( + name = "CAPIIR", + srcs = [ + "lib/CAPI/IR/IR.cpp", + ], + hdrs = [ + "include/mlir-c/IR.h", + ], + includes = ["include"], + deps = [ + ":IR", + ":Parser", + ":Support", + "@llvm-project//llvm:Support", + ], +) + +cc_library( + name = "CAPIRegistration", + srcs = [ + "lib/CAPI/Registration/Registration.cpp", + ], + hdrs = [ + "include/mlir-c/Registration.h", + ], + includes = ["include"], + deps = [ + ":AllPassesAndDialectsNoRegistration", + ":CAPIIR", + ], +) + cc_library( name = "EDSCInterface", srcs = [ @@ -2775,7 +2807,6 @@ cc_library( "@llvm-project//mlir/test:TestAffine", "@llvm-project//mlir/test:TestDialect", "@llvm-project//mlir/test:TestIR", - "@llvm-project//mlir/test:TestLLVMIR", "@llvm-project//mlir/test:TestPass", "@llvm-project//mlir/test:TestReducer", "@llvm-project//mlir/test:TestSPIRV", @@ -2928,7 +2959,6 @@ cc_binary( "@llvm-project//mlir/test:TestAffine", "@llvm-project//mlir/test:TestDialect", "@llvm-project//mlir/test:TestIR", - "@llvm-project//mlir/test:TestLLVMIR", "@llvm-project//mlir/test:TestPass", "@llvm-project//mlir/test:TestReducer", "@llvm-project//mlir/test:TestSPIRV", diff --git a/third_party/mlir/test.BUILD b/third_party/mlir/test.BUILD index 6c4eeecc346..8e72e72773b 100644 --- a/third_party/mlir/test.BUILD +++ b/third_party/mlir/test.BUILD @@ -171,7 +171,6 @@ cc_library( "lib/Target/TestLLVMTypeTranslation.cpp", ], deps = [ - ":TestLLVMIR", "@llvm-project//mlir:IR", "@llvm-project//mlir:LLVMDialect", "@llvm-project//mlir:LLVMIRModuleTranslation", @@ -230,19 +229,6 @@ cc_library( ], ) -cc_library( - name = "TestLLVMIR", - srcs = [ - "lib/Dialect/LLVMIR/LLVMTypeTestDialect.cpp", - ], - deps = [ - "@llvm-project//llvm:Support", - "@llvm-project//mlir:Dialect", - "@llvm-project//mlir:IR", - "@llvm-project//mlir:LLVMDialect", - ], -) - cc_library( name = "TestSPIRV", srcs = glob([ From 766dec30b45ad1fb3a801413222d24a2c0173665 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 5 Aug 2020 23:36:52 +0000 Subject: [PATCH 2212/2522] Fix issue in tf.string.format wheree unicode is not rendered correctly This PR tries to address the issue raised in 42001 where unicode tensor is not rendered correctly in tf.string.format. The issue was that tf.string.format incorrectly escape the tensor. This PR address the issue by adding escape option in tensor print function to selectively control the escape. This PR fixes 42001. Signed-off-by: Yong Tang --- tensorflow/core/framework/tensor.cc | 4 ++-- tensorflow/python/kernel_tests/string_format_op_test.py | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc index a0f30508ee8..03499ec0220 100644 --- a/tensorflow/core/framework/tensor.cc +++ b/tensorflow/core/framework/tensor.cc @@ -1007,9 +1007,9 @@ inline const strings::AlphaNum& PrintOneElement(const strings::AlphaNum& a, } inline string PrintOneElement(const tstring& a, bool print_v2) { if (print_v2) { - return "\"" + absl::CEscape(a) + "\""; + return "\"" + absl::Utf8SafeCEscape(a) + "\""; } else { - return absl::CEscape(a); + return absl::Utf8SafeCEscape(a); } } inline float PrintOneElement(const Eigen::half& h, bool print_v2) { diff --git a/tensorflow/python/kernel_tests/string_format_op_test.py b/tensorflow/python/kernel_tests/string_format_op_test.py index adb8ad6e677..51a59e7a113 100644 --- a/tensorflow/python/kernel_tests/string_format_op_test.py +++ b/tensorflow/python/kernel_tests/string_format_op_test.py @@ -379,6 +379,15 @@ class StringFormatOpTest(test.TestCase): format_output = string_ops.string_format("{}", (tensor, tensor)) self.evaluate(format_output) + @test_util.run_in_graph_and_eager_modes() + def testTensorAndFormatUnicode(self): + with self.cached_session(): + tensor = constant_op.constant('😊') + format_output = string_ops.string_format("😊:{}", tensor) + out = self.evaluate(format_output) + expected = '😊:"😊"' + self.assertEqual(compat.as_text(out), expected) + if __name__ == "__main__": test.main() From 0c2b2a3063dc4334b62eade3dace0e8c171df539 Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Wed, 5 Aug 2020 16:37:13 -0700 Subject: [PATCH 2213/2522] Remove logic that is no longer needed and make GetTensor availability strict. * Most kernels have been ported over to use the new EvalTensor API. The exceptions are kernels in arc_mli, ethos-u, xtensa_hifi and xtensa_hifimini_staging. * The kernel tests were previously changed to where they would fail unless they used EvalTensors * This change enforces the fact that kernels should no longer be using full TfLiteTensors in the Eval functions. Calling GetTensor from a kernel's Eval will now crash due to dereferencing a nullptr. PiperOrigin-RevId: 325126736 Change-Id: I1a1c9af7dcca5765461794a60ef626387fa82638 --- tensorflow/lite/micro/BUILD | 1 + tensorflow/lite/micro/kernels/BUILD | 5 ++++ .../micro/kernels/xtensa_hifimini/svdf.cc | 6 ++-- tensorflow/lite/micro/micro_interpreter.cc | 20 +++++-------- tensorflow/lite/micro/test_helpers.cc | 30 +++++++++++-------- 5 files changed, 33 insertions(+), 29 deletions(-) diff --git a/tensorflow/lite/micro/BUILD b/tensorflow/lite/micro/BUILD index 9b3d0d623cc..a8fec96c3e3 100644 --- a/tensorflow/lite/micro/BUILD +++ b/tensorflow/lite/micro/BUILD @@ -87,6 +87,7 @@ cc_library( "//tensorflow/lite/kernels:kernel_util", "//tensorflow/lite/kernels/internal:compatibility", "//tensorflow/lite/kernels/internal:tensor", + "//tensorflow/lite/micro/kernels:kernel_util", "//tensorflow/lite/schema:schema_fbs", "@flatbuffers//:runtime_cc", ], diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index dcf2337aa24..76ad03991e3 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -518,6 +518,11 @@ cc_library( "kernel_util.cc", ], hdrs = ["kernel_util.h"], + visibility = [ + # Needed for micro:test_helpers but visibility can not be finer-grained + # than a package. + ":micro_top_level", + ], deps = [ "//tensorflow/lite/c:common", "//tensorflow/lite/kernels/internal:compatibility", diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc index 545e91bab3d..00ee9b2e809 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc @@ -343,7 +343,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1], memory_size * num_filters); - TF_LITE_ENSURE_EQ(context, node->inputs->size, 5); + TF_LITE_ENSURE_EQ(context, NumInputs(node), 5); TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8); TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteInt16); TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteInt16); @@ -398,9 +398,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteEvalTensor* weights_time = tflite::micro::GetEvalInput(context, node, kWeightsTimeTensor); const TfLiteEvalTensor* bias = - (NumInputs(node) == 5) - ? tflite::micro::GetEvalInput(context, node, kBiasTensor) - : nullptr; + tflite::micro::GetEvalInput(context, node, kBiasTensor); TfLiteEvalTensor* activation_state = tflite::micro::GetMutableEvalInput( context, node, kInputActivationStateTensor); TfLiteEvalTensor* output = diff --git a/tensorflow/lite/micro/micro_interpreter.cc b/tensorflow/lite/micro/micro_interpreter.cc index 8c2f8e031d8..1c6ebd5953e 100644 --- a/tensorflow/lite/micro/micro_interpreter.cc +++ b/tensorflow/lite/micro/micro_interpreter.cc @@ -166,9 +166,7 @@ void MicroInterpreter::Init(tflite::Profiler* profiler) { context_.impl_ = static_cast(&context_helper_); context_.ReportError = context_helper_.ReportOpError; - context_.GetTensor = context_helper_.GetTensor; context_.GetEvalTensor = context_helper_.GetEvalTensor; - context_.recommended_num_threads = 1; context_.profiler = profiler; initialization_status_ = kTfLiteOk; @@ -277,10 +275,12 @@ TfLiteStatus MicroInterpreter::AllocateTensors() { } context_helper_.SetNodeIndex(-1); - // Both AllocatePersistentBuffer and RequestScratchBufferInArena is - // available in Prepare stage. + // RequestScratchBufferInArena and GetTensor (with associated TempAllocation) + // are also available in Prepare stage. + context_.GetTensor = context_helper_.GetTensor; context_.RequestScratchBufferInArena = context_helper_.RequestScratchBufferInArena; + for (size_t i = 0; i < subgraph_->operators()->size(); ++i) { // Set node idx to annotate the lifetime for scratch buffers. context_helper_.SetNodeIndex(i); @@ -300,11 +300,13 @@ TfLiteStatus MicroInterpreter::AllocateTensors() { } context_helper_.SetNodeIndex(-1); - // Prepare is done, we're ready for Invoke. Memory allocation is no longer - // allowed. Kernels can only fetch scratch buffers via GetScratchBuffer. + // Prepare is done, we're ready for Invoke. Memory allocation and full + // TfLiteTensors (via GetTensor) are no longer allowed. Kernels can only fetch + // scratch buffers via GetScratchBuffer. context_.AllocatePersistentBuffer = nullptr; context_.RequestScratchBufferInArena = nullptr; context_.GetScratchBuffer = context_helper_.GetScratchBuffer; + context_.GetTensor = nullptr; TF_LITE_ENSURE_OK(&context_, allocator_.FinishModelAllocation(model_, eval_tensors_)); @@ -343,12 +345,6 @@ TfLiteStatus MicroInterpreter::Invoke() { #endif invoke_status = registration->invoke(&context_, node); - // All TfLiteTensor structs used in the kernel are allocated from temp - // memory in the allocator. This creates a chain of allocations in the - // temp section. The call below resets the chain of allocations to - // prepare for the next call. - allocator_.ResetTempAllocations(); - if (invoke_status == kTfLiteError) { TF_LITE_REPORT_ERROR( error_reporter_, diff --git a/tensorflow/lite/micro/test_helpers.cc b/tensorflow/lite/micro/test_helpers.cc index 23c7ca96408..a4f716fca06 100644 --- a/tensorflow/lite/micro/test_helpers.cc +++ b/tensorflow/lite/micro/test_helpers.cc @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/micro/all_ops_resolver.h" +#include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/micro/micro_utils.h" #include "tensorflow/lite/schema/schema_generated.h" @@ -601,8 +602,9 @@ TfLiteStatus SimpleStatefulOp::Invoke(TfLiteContext* context, OpData* data = reinterpret_cast(node->user_data); data->invoke_count += 1; - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - const uint8_t* input_data = GetTensorData(input); + const TfLiteEvalTensor* input = + tflite::micro::GetEvalInput(context, node, kInputTensor); + const uint8_t* input_data = tflite::micro::GetTensorData(input); int size = NumElements(input->dims); uint8_t* sorting_buffer = reinterpret_cast( @@ -620,10 +622,13 @@ TfLiteStatus SimpleStatefulOp::Invoke(TfLiteContext* context, } } - TfLiteTensor* median = GetOutput(context, node, kMedianTensor); - uint8_t* median_data = GetTensorData(median); - TfLiteTensor* invoke_count = GetOutput(context, node, kInvokeCount); - int32_t* invoke_count_data = GetTensorData(invoke_count); + TfLiteEvalTensor* median = + tflite::micro::GetEvalOutput(context, node, kMedianTensor); + uint8_t* median_data = tflite::micro::GetTensorData(median); + TfLiteEvalTensor* invoke_count = + tflite::micro::GetEvalOutput(context, node, kInvokeCount); + int32_t* invoke_count_data = + tflite::micro::GetTensorData(invoke_count); median_data[0] = sorting_buffer[size / 2]; invoke_count_data[0] = data->invoke_count; @@ -660,14 +665,13 @@ TfLiteStatus MockCustom::Prepare(TfLiteContext* context, TfLiteNode* node) { } TfLiteStatus MockCustom::Invoke(TfLiteContext* context, TfLiteNode* node) { - const TfLiteTensor* input = tflite::GetInput(context, node, 0); - const int32_t* input_data = input->data.i32; - const TfLiteTensor* weight = tflite::GetInput(context, node, 1); + const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); + const int32_t* input_data = tflite::micro::GetTensorData(input); + const TfLiteEvalTensor* weight = + tflite::micro::GetEvalInput(context, node, 1); const uint8_t* weight_data = weight->data.uint8; - TfLiteTensor* output = GetOutput(context, node, 0); - int32_t* output_data = output->data.i32; - output_data[0] = - 0; // Catch output tensor sharing memory with an input tensor + TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); + int32_t* output_data = tflite::micro::GetTensorData(output); output_data[0] = input_data[0] + weight_data[0]; return kTfLiteOk; } From 0cfab36bcf70db064e8bea40cce97e87648d7e0e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 5 Aug 2020 16:48:58 -0700 Subject: [PATCH 2214/2522] [XLA/GPU] Convert the sort emitter to use LHLO. PiperOrigin-RevId: 325128806 Change-Id: I610db52800e55887e93b45b145718e61ceedbb79 --- .../non_identity_layouts.hlotxt | 2 +- .../xla/transforms/mhlo_to_lhlo_with_xla.cc | 11 +- .../xla/transforms/mhlo_to_lhlo_with_xla.h | 3 +- tensorflow/compiler/xla/service/gpu/BUILD | 10 - .../compiler/xla/service/gpu/gpu_compiler.cc | 24 +- .../xla/service/gpu/hlo_to_ir_bindings.cc | 20 +- .../xla/service/gpu/hlo_to_ir_bindings.h | 4 - .../xla/service/gpu/ir_emitter_context.h | 7 +- .../xla/service/gpu/ir_emitter_unnested.cc | 405 ++++---------- .../xla/service/gpu/ir_emitter_unnested.h | 82 +-- .../compiler/xla/service/gpu/tests/BUILD | 29 - .../xla/service/gpu/tests/sorting.hlo | 504 +++++++++--------- .../xla/service/gpu/tests/sorting_test.cc | 71 --- .../compiler/xla/service/llvm_ir/llvm_util.cc | 7 +- .../compiler/xla/service/llvm_ir/llvm_util.h | 2 +- 15 files changed, 397 insertions(+), 784 deletions(-) delete mode 100644 tensorflow/compiler/xla/service/gpu/tests/sorting_test.cc diff --git a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/non_identity_layouts.hlotxt b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/non_identity_layouts.hlotxt index a83e36cff64..3630d2d45e4 100644 --- a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/non_identity_layouts.hlotxt +++ b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/non_identity_layouts.hlotxt @@ -8,6 +8,6 @@ HloModule TestModule ENTRY TestComputation { x = f32[3, 2]{1,0} parameter(0) - // CHECK: "lmhlo.copy"(%{{.*}}, %{{.*}}) {name = "copy.1"} : (memref<3x2xf32>, memref<3x2xf32, #[[MAP]]>) -> () + // CHECK: "lmhlo.copy"(%{{.*}}, %{{.*}}) : (memref<3x2xf32>, memref<3x2xf32, #[[MAP]]>) -> () ROOT x.copy = f32[3, 2]{0,1} copy(x) } diff --git a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc index 6ce91599fb1..832bad2dcc8 100644 --- a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc +++ b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc @@ -34,6 +34,7 @@ limitations under the License. #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Pass/PassOptions.h" // from @llvm-project #include "mlir/Translation.h" // from @llvm-project +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" #include "tensorflow/compiler/mlir/xla/hlo_function_importer.h" #include "tensorflow/compiler/mlir/xla/hlo_utils.h" #include "tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h" @@ -181,10 +182,7 @@ template StatusOr LhloDialectEmitter::CreateOpWithoutAttrs( HloInstruction* instr) { Location loc = getLocation(instr); - std::pair attrs[] = { - {Identifier::get("name", builder_.getContext()), - builder_.getStringAttr(instr->name())}, - }; + ArrayRef> attrs; ArrayRef rets{}; llvm::SmallVector operands; @@ -254,14 +252,15 @@ Status LhloDialectEmitter::DefaultAction(HloInstruction* instr) { return Status::OK(); } -StatusOr LhloDialectEmitter::EmitSortOp(HloInstruction* instr) { +StatusOr LhloDialectEmitter::EmitSortOp( + HloInstruction* instr) { TF_ASSIGN_OR_RETURN(auto sort, CreateOpWithoutAttrs(instr)); auto* sort_instr = ::xla::Cast<::xla::HloSortInstruction>(instr); sort.dimensionAttr(builder_.getI64IntegerAttr(sort_instr->sort_dimension())); sort.is_stableAttr(builder_.getBoolAttr(sort_instr->is_stable())); TF_RETURN_IF_ERROR(::xla::HloFunctionImporter::ImportAsRegion( *sort_instr->called_computations()[0], &sort.comparator(), &builder_)); - return sort; + return sort.getOperation(); } Status LhloDialectEmitter::HandleSort(HloInstruction* instr) { diff --git a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h index 4000fa01970..bdc977616b1 100644 --- a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h +++ b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h @@ -19,7 +19,6 @@ limitations under the License. #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Module.h" // from @llvm-project #include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/hlo_module.h" @@ -42,7 +41,7 @@ class LhloDialectEmitter : public ::xla::DfsHloVisitorWithDefault { builder_(module.getContext()), i8_type_(builder_.getIntegerType(8)) {} - ::xla::StatusOr EmitSortOp(::xla::HloInstruction* instr); + ::xla::StatusOr EmitSortOp(::xla::HloInstruction* instr); private: template diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 5305f3beec5..8dfd73e9a6a 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -254,11 +254,6 @@ cc_library( ":target_util", ":thunk", ":thunk_emitter", - "//tensorflow/compiler/mlir/hlo:lhlo", - "//tensorflow/compiler/mlir/xla:hlo_utils", - "//tensorflow/compiler/mlir/xla:mhlo_to_lhlo_with_xla", - "//tensorflow/compiler/mlir/xla:mlir_hlo_to_hlo", - "//tensorflow/compiler/mlir/xla:type_to_shape", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", @@ -295,8 +290,6 @@ cc_library( "@com_google_absl//absl/types:span", "@llvm-project//llvm:Core", "@llvm-project//llvm:Support", - "@llvm-project//mlir:IR", - "@llvm-project//mlir:StandardOps", ], ) @@ -1165,7 +1158,6 @@ cc_library( ":target_constants", ":tree_reduction_rewriter", ":variadic_op_splitter", - "//tensorflow/compiler/mlir/xla:mhlo_to_lhlo_with_xla", "//tensorflow/compiler/xla:protobuf_util", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", @@ -1222,8 +1214,6 @@ cc_library( "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@llvm-project//llvm:Core", - "@llvm-project//mlir:AllPassesAndDialectsNoRegistration", - "@llvm-project//mlir:IR", ], ) diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index b6f81e963bd..f2d29b5d11f 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -29,8 +29,6 @@ limitations under the License. #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" -#include "mlir/IR/Module.h" // from @llvm-project -#include "mlir/InitAllDialects.h" // from @llvm-project #include "tensorflow/compiler/xla/protobuf_util.h" #include "tensorflow/compiler/xla/service/algebraic_simplifier.h" #include "tensorflow/compiler/xla/service/all_reduce_combiner.h" @@ -511,22 +509,15 @@ static Status CompileModuleToLlvmIrImpl( DumpHloModuleIfEnabled(*hlo_module, **buffer_assignment, "after_optimizations"); - mlir::registerAllDialects(); - mlir::MLIRContext mlir_context; - IrEmitterContext ir_emitter_context( hlo_module, buffer_assignment->get(), platform_name, gpu_device_info, - cuda_compute_capability, profile_index_map, &mlir_context, - llvm_module->get()); + cuda_compute_capability, profile_index_map, llvm_module->get()); HloComputation* entry_computation = hlo_module->entry_computation(); + IrEmitterUnnested ir_emitter(hlo_module->config(), entry_computation, + &ir_emitter_context); - TF_ASSIGN_OR_RETURN( - auto ir_emitter, - IrEmitterUnnested::Create(hlo_module->config(), entry_computation, - &ir_emitter_context)); - - TF_RETURN_IF_ERROR(ir_emitter->EmitConstantGlobals()); + TF_RETURN_IF_ERROR(ir_emitter.EmitConstantGlobals()); { XLA_SCOPED_LOGGING_TIMER("GpuCompiler::RunBackend - IR emission"); @@ -535,10 +526,9 @@ static Status CompileModuleToLlvmIrImpl( ThunkSequence thunk_sequence; absl::Span order = hlo_schedule->ThunkLaunchOrder(); for (HloInstruction* instruction : order) { - TF_RETURN_IF_ERROR(instruction->Visit(ir_emitter.get())); - TF_RETURN_IF_ERROR(ir_emitter->Postprocess(instruction)); - std::unique_ptr thunks = - ir_emitter->ConsumeThunkSequence(); + TF_RETURN_IF_ERROR(instruction->Visit(&ir_emitter)); + TF_RETURN_IF_ERROR(ir_emitter.Postprocess(instruction)); + std::unique_ptr thunks = ir_emitter.ConsumeThunkSequence(); // The invariants between each input HloInstruction* and output Thunk* are // not all explicitly checked, but at least we can document them here: diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc index 332db83b6ad..5d38d1b727c 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc +++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc @@ -117,11 +117,11 @@ static bool HasMeaningfulName(llvm::Value* value) { return false; } -llvm::Value* CastToTypedValue(const Shape& shape, llvm::Value* ir_value, - llvm::IRBuilder<>* b) { - llvm::Type* pointee_type = - llvm_ir::ShapeToIrType(shape, b->GetInsertBlock()->getModule()); - +llvm::Value* HloToIrBindings::GetTypedIrValue(const HloInstruction& hlo, + ShapeIndexView shape_index, + llvm::Value* ir_value) { + llvm::Type* pointee_type = llvm_ir::ShapeToIrType( + ShapeUtil::GetSubshape(hlo.shape(), shape_index), module_); llvm::Type* dest_type = pointee_type->getPointerTo(); llvm::Value* typed_ir_value; @@ -129,17 +129,9 @@ llvm::Value* CastToTypedValue(const Shape& shape, llvm::Value* ir_value, typed_ir_value = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( llvm::cast(ir_value), dest_type); } else { - typed_ir_value = b->CreatePointerBitCastOrAddrSpaceCast( + typed_ir_value = b_->CreatePointerBitCastOrAddrSpaceCast( ir_value, pointee_type->getPointerTo()); } - return typed_ir_value; -} - -llvm::Value* HloToIrBindings::GetTypedIrValue(const HloInstruction& hlo, - ShapeIndexView shape_index, - llvm::Value* ir_value) { - auto typed_ir_value = CastToTypedValue( - ShapeUtil::GetSubshape(hlo.shape(), shape_index), ir_value, b_); if (!HasMeaningfulName(ir_value)) { ir_value->setName(llvm_ir::IrName(&hlo, "raw")); } diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h index 3813ec6c949..5eef6727801 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h +++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h @@ -116,10 +116,6 @@ class HloToIrBindings { llvm::Value* temp_buffer_base_ = nullptr; }; -// Converts `ir_value` with type i8* to a typed LLVM Value* based on `shape`. -llvm::Value* CastToTypedValue(const Shape& shape, llvm::Value* ir_value, - llvm::IRBuilder<>* b); - } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_context.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_context.h index 7d5a8d032e6..9c43f80dc60 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_context.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_context.h @@ -17,7 +17,6 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_IR_EMITTER_CONTEXT_H_ #include "llvm/IR/Module.h" -#include "mlir/IR/MLIRContext.h" // from @llvm-project #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/gpu/launch_dimensions.h" #include "tensorflow/compiler/xla/service/hlo_execution_profile.h" @@ -35,15 +34,13 @@ class IrEmitterContext { const HloModule* hlo_module, const BufferAssignment* buffer_assignment, std::string platform_name, GpuDeviceInfo gpu_device_info, absl::optional cuda_compute_capability, - const HloProfileIndexMap* profile_index_map, - mlir::MLIRContext* mlir_context, llvm::Module* llvm_module) + const HloProfileIndexMap* profile_index_map, llvm::Module* llvm_module) : hlo_module_(hlo_module), buffer_assignment_(buffer_assignment), platform_name_(std::move(platform_name)), gpu_device_info_(gpu_device_info), cuda_compute_capability_(cuda_compute_capability), profile_index_map_(profile_index_map), - mlir_context_(mlir_context), llvm_module_(llvm_module) {} // Disallow copy and assign. IrEmitterContext(const IrEmitterContext&) = delete; @@ -60,7 +57,6 @@ class IrEmitterContext { return cuda_compute_capability_; } const HloProfileIndexMap* profile_index_map() { return profile_index_map_; } - mlir::MLIRContext* mlir_context() { return mlir_context_; } llvm::Module* llvm_module() { return llvm_module_; } NameUniquer* name_uniquer() { return &name_uniquer_; } @@ -71,7 +67,6 @@ class IrEmitterContext { GpuDeviceInfo gpu_device_info_; absl::optional cuda_compute_capability_; const HloProfileIndexMap* profile_index_map_; - mlir::MLIRContext* mlir_context_; llvm::Module* llvm_module_; NameUniquer name_uniquer_; }; diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index 5473143a8ac..34cdfb4ecf0 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -36,13 +36,6 @@ limitations under the License. #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/Builders.h" // from @llvm-project -#include "mlir/IR/Function.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" -#include "tensorflow/compiler/mlir/xla/hlo_utils.h" -#include "tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h" -#include "tensorflow/compiler/mlir/xla/type_to_shape.h" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/service/buffer_assignment.h" @@ -150,86 +143,13 @@ void UpdateLaunchDimensions(const LaunchDimensions& launch_dims, Thunk* thunk, llvm::ConstantAsMetadata::get(threads_per_block_ir_value)})); } -const BufferAllocation* GetAllocation( - mlir::BlockArgument func_arg, const BufferAssignment& buffer_assignment) { - auto func_op = - mlir::cast(func_arg.getParentRegion()->getParentOp()); - int64 allocation_index = func_op - .getArgAttrOfType( - func_arg.getArgNumber(), "lmhlo.alloc") - .getValue() - .getSExtValue(); - return &buffer_assignment.GetAllocation(allocation_index); -} - -StatusOr GetAllocationSliceForMlir( - mlir::Value v, const BufferAssignment& buffer_assignment) { - int64 size = v.getType().cast().getSizeInBits() / 8; - - if (auto arg = v.dyn_cast()) { - return BufferAllocation::Slice(GetAllocation(arg, buffer_assignment), 0, - size); - } - - // We match two patterns here: - // * v = ViewOp(arg); - // * v = StaticMemRefCastOp(ViewOp(arg)); - if (mlir::Operation* op = v.getDefiningOp()) { - if (auto cast = mlir::dyn_cast(op)) { - mlir::Value source = cast.getViewSource(); - op = source.getDefiningOp(); - if (!op) { - return Unimplemented("StaticMemRefCastOp has to wrap an op"); - } - } - if (auto view = mlir::dyn_cast(op)) { - return BufferAllocation::Slice( - GetAllocation(view.source().cast(), - buffer_assignment), - mlir::cast(view.byte_shift().getDefiningOp()) - .value() - .cast() - .getValue() - .getSExtValue(), - size); - } - return Unimplemented("StaticMemRefCastOp has to wrap a ViewOp"); - } - - return Unimplemented( - "Operand has to be in the form of ViewOp(arg) or " - "StaticMemRefCastOp(ViewOp(arg))"); -} - -absl::string_view GetHloName(mlir::Operation* op) { - if (auto attr = op->getAttrOfType("name")) { - auto ref = attr.getValue(); - return absl::string_view(ref.data(), ref.size()); - } - return ""; -} - } // namespace IrEmitterUnnested::IrEmitterUnnested(const HloModuleConfig& hlo_module_config, const HloComputation* hlo_computation, IrEmitterContext* ir_emitter_context) : IrEmitter(hlo_module_config, ir_emitter_context, /*is_nested=*/false), - hlo_computation_(hlo_computation), - mlir_scratch_module_(mlir::ModuleOp::create( - mlir::Builder(ir_emitter_context->mlir_context()).getUnknownLoc())), - lhlo_scratch_emitter_(ir_emitter_context_->buffer_assignment(), - *hlo_computation, mlir_scratch_module_.get()) {} - -StatusOr> IrEmitterUnnested::Create( - const HloModuleConfig& hlo_module_config, - const HloComputation* hlo_computation, - IrEmitterContext* ir_emitter_context) { - auto emitter = std::unique_ptr(new IrEmitterUnnested( - hlo_module_config, hlo_computation, ir_emitter_context)); - TF_RETURN_IF_ERROR(emitter->lhlo_scratch_emitter_.Initialize()); - return std::move(emitter); -} + hlo_computation_(hlo_computation) {} Status IrEmitterUnnested::Postprocess(HloInstruction* hlo) { bindings_.UnbindAllLocalIrValues(); @@ -237,11 +157,12 @@ Status IrEmitterUnnested::Postprocess(HloInstruction* hlo) { } llvm::Function* IrEmitterUnnested::BuildKernelPrototype( - absl::string_view name, absl::Span args) { + const HloInstruction& inst, + absl::Span args) { // Compute the kernel name. The opcode string may contain "-" which cannot be // in a PTX function name, so sanitize the name before uniquifying it. string kernel_name = ir_emitter_context_->name_uniquer()->GetUniqueName( - llvm_ir::SanitizeFunctionName(std::string(name))); + llvm_ir::SanitizeFunctionName(inst.name())); // Create the kernel and add it to the module. llvm::Module* module = ir_emitter_context_->llvm_module(); @@ -437,8 +358,7 @@ Status IrEmitterUnnested::HandleDot(HloInstruction* dot) { } Status IrEmitterUnnested::HandleConditional(HloInstruction* conditional) { - TF_ASSIGN_OR_RETURN(auto thunk, BuildConditionalThunk(conditional)); - AddThunkToThunkSequence(std::move(thunk)); + AddThunkToThunkSequence(BuildConditionalThunk(conditional)); return Status::OK(); } @@ -1117,13 +1037,10 @@ Status IrEmitterUnnested::HandleWhile(HloInstruction* xla_while) { // Build ForThunk for conformant while loops, otherwise build WhileThunk. auto config = xla_while->backend_config(); if (config.ok() && config.ValueOrDie().has_known_trip_count()) { - TF_ASSIGN_OR_RETURN( - auto thunk, + AddThunkToThunkSequence( BuildForThunk(xla_while, config.ValueOrDie().known_trip_count().n())); - AddThunkToThunkSequence(std::move(thunk)); } else { - TF_ASSIGN_OR_RETURN(auto thunk, BuildWhileThunk(xla_while)); - AddThunkToThunkSequence(std::move(thunk)); + AddThunkToThunkSequence(BuildWhileThunk(xla_while)); } return Status::OK(); } @@ -1346,95 +1263,24 @@ Status IrEmitterUnnested::HandleSelect(HloInstruction* select) { return IrEmitter::HandleSelect(select); } -StatusOr -IrEmitterUnnested::GetOrCreateSubComputationFromRegion(mlir::Region* region) { - std::unique_ptr& module = scratch_nested_computations_[region]; - if (module == nullptr) { - xla::XlaComputation xla_computation; - TF_RETURN_IF_ERROR(ConvertRegionToComputation(region, &xla_computation)); - TF_ASSIGN_OR_RETURN(auto program_shape, xla_computation.GetProgramShape()); - TF_ASSIGN_OR_RETURN( - module, HloModule::CreateFromProto(xla_computation.proto(), - HloModuleConfig(program_shape))); - } - return module->entry_computation(); -} - Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { - MlirEmitterContext result; - - TF_ASSIGN_OR_RETURN(auto sort_op, lhlo_scratch_emitter_.EmitSortOp(sort)); - result.op = sort_op; - result.name = GetHloName(sort_op); - // The name in sort op has no semantics, and it's for debug only. If the name - // doesn't exist, we should use a namer (e.g. count-based). - // TODO(timshen): use a namer instead of relying on the HloInstruction names. - if (result.name.empty()) { - result.name = sort->name(); - } - const auto& buffer_assignment = ir_emitter_context_->buffer_assignment(); - auto& slice = result.extra_slice; - TF_ASSIGN_OR_RETURN(slice.buffer_slice, - buffer_assignment.GetUniqueSlice(sort, {})); - slice.written = true; - slice.shape = sort->shape(); - - result.thunk_info = GetThunkInfo(sort); - - return EmitMlirSort(result); -} - -Status IrEmitterUnnested::EmitMlirSort( - MlirEmitterContext mlir_emitter_context) { - const auto& buffer_assignment = ir_emitter_context_->buffer_assignment(); - auto sort_op = mlir::cast(mlir_emitter_context.op); - - int operand_count = sort_op.operands().size(); - std::vector operand_shapes(operand_count); - std::vector slices; - std::vector output_shapes(sort_op.output().size()); - - for (int i = 0; i < operand_count; i++) { - operand_shapes[i] = - TypeToShape(sort_op.operands()[i].getType().cast()); - } - - // Craft n + 1 slices, where the first n are output parameters, and the last - // is the on-device tuple storage. We don't need n operands because sorting - // kernels are always in-place. - for (int i = 0; i < operand_count; i++) { - output_shapes[i] = - TypeToShape(sort_op.output()[i].getType().cast()); - MlirBufferSlice slice; - TF_ASSIGN_OR_RETURN( - slice.buffer_slice, - GetAllocationSliceForMlir(sort_op.output()[i], buffer_assignment)); - slice.written = true; - slice.shape = operand_shapes[i]; - slices.push_back(slice); - } - slices.push_back(mlir_emitter_context.extra_slice); - std::vector> thunks; - - Shape keys_shape = operand_shapes[0]; - int64 dimension_to_sort = sort_op.dimension().getSExtValue(); - for (int64 i = 0; i < operand_count; ++i) { + Shape keys_shape = sort->operand(0)->shape(); + int64 dimension_to_sort = sort->dimensions(0); + for (int64 i = 0; i < sort->operand_count(); ++i) { + ShapeIndex shape_index = + sort->operand_count() > 1 ? ShapeIndex({i}) : ShapeIndex({}); // We assume that the layout of all involved operands and outputs is the // same. - TF_RET_CHECK( - LayoutUtil::LayoutsInShapesEqual(keys_shape, operand_shapes[i])); - TF_RET_CHECK( - LayoutUtil::LayoutsInShapesEqual(keys_shape, output_shapes[i])); + TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual(keys_shape, + sort->operand(i)->shape())); + TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual( + keys_shape, ShapeUtil::GetSubshape(sort->shape(), shape_index))); // If possible, we share buffers. If that is not possible, we need to copy // the values, because the emitter does the sorting in-place. - TF_ASSIGN_OR_RETURN( - auto destination_buffer, - GetAllocationSliceForMlir(sort_op.output()[i], buffer_assignment)); - TF_ASSIGN_OR_RETURN( - auto source_address, - GetAllocationSliceForMlir(sort_op.operands()[i], buffer_assignment)); + auto destination_buffer = GetAllocationSlice(*sort, shape_index); + auto source_address = GetAllocationSlice(*sort->operand(i)); if (destination_buffer != source_address) { // TODO(b/26783907): Figure out why we never seem to share buffers for // key/value sort. @@ -1442,7 +1288,7 @@ Status IrEmitterUnnested::EmitMlirSort( Thunk::ThunkInfo(), /*source_address=*/source_address, /*destination_buffer=*/destination_buffer, - /*mem_size=*/ShapeUtil::ByteSizeOf(operand_shapes[i]))); + /*mem_size=*/ShapeUtil::ByteSizeOf(sort->operand(i)->shape()))); } } @@ -1511,10 +1357,10 @@ Status IrEmitterUnnested::EmitMlirSort( // we have not enough threads, or not enough shared memory. Also it does not // give a speedup if the tile size is < 128. int64 total_shared_memory_needed = 0; - for (int64 i = 0; i < operand_count; ++i) { + for (int64 i = 0; i < sort->operand_count(); ++i) { total_shared_memory_needed += - kTileSize * - ShapeUtil::ByteSizeOfPrimitiveType(operand_shapes[i].element_type()); + kTileSize * ShapeUtil::ByteSizeOfPrimitiveType( + sort->operand(i)->shape().element_type()); } bool no_tiling = kTileSize < 128 || @@ -1526,31 +1372,30 @@ Status IrEmitterUnnested::EmitMlirSort( uint64 num_blocks = CeilOfRatio(num_iterations, kThreadsPerBlock); LaunchDimensions tiled_launch_dimensions(num_blocks, kThreadsPerBlock); - std::vector ir_arrays; auto emit_kernel = [&](absl::Span xor_masks) { - thunks.push_back(BuildKernelThunkForMlir( - mlir_emitter_context.name, Thunk::ThunkInfo(), slices, &ir_arrays)); + thunks.push_back( + BuildKernelThunk(sort, /*implements_whole_instruction=*/false)); LaunchDimensions launch_dimensions = xor_masks.size() > 1 ? tiled_launch_dimensions : standard_launch_dimensions; UpdateLaunchDimensions(launch_dimensions, thunks.back().get(), ir_emitter_context_->llvm_module()); std::vector values_arrays; - values_arrays.reserve(operand_count); - for (int64 i = 0; i < operand_count; ++i) { - values_arrays.push_back(ir_arrays[i]); + values_arrays.reserve(sort->operand_count()); + for (int64 i = 0; i < sort->operand_count(); ++i) { + ShapeIndex shape_index = + sort->operand_count() > 1 ? ShapeIndex({i}) : ShapeIndex({}); + values_arrays.push_back(GetIrArray(*sort, *sort, shape_index)); } - TF_ASSIGN_OR_RETURN( - const HloComputation* comparator, - GetOrCreateSubComputationFromRegion(&sort_op.comparator())); return llvm_ir::EmitSortInPlace( - dimension_to_sort, values_arrays, IrName(mlir_emitter_context.name), - xor_masks, &b_, launch_dimensions, + dimension_to_sort, values_arrays, IrName(sort), xor_masks, &b_, + launch_dimensions, xor_masks.size() > 1 ? num_iterations_in_sort_dim : standard_num_iterations_in_sort_dim, kTileSize, [&](absl::Span operands, llvm::Value* output) { - return EmitCallToNestedComputation(*comparator, operands, output); + return EmitCallToNestedComputation(*sort->to_apply(), operands, + output); }); }; std::vector xor_masks; @@ -1578,14 +1423,13 @@ Status IrEmitterUnnested::EmitMlirSort( } AddThunkToThunkSequence(absl::make_unique( - mlir_emitter_context.thunk_info, std::move(thunks))); - if (operand_count > 1) { + GetThunkInfo(sort), std::move(thunks))); + if (sort->operand_count() > 1) { // Emit the tuple as part of the last stage of sorting. // We are currently in the block sorted.in_bounds.after. b_.SetInsertPoint(b_.GetInsertBlock()->getTerminator()); - llvm_ir::EmitTuple( - ir_arrays[operand_count], - absl::MakeSpan(ir_arrays).subspan(0, ir_arrays.size() - 1), &b_); + llvm_ir::EmitTuple(GetIrArray(*sort, *sort), + ConstructIrArrayForOutputs(*sort), &b_); } return Status::OK(); } @@ -1723,6 +1567,24 @@ Status IrEmitterUnnested::HandleAfterAll(HloInstruction* after_all) { return Status::OK(); } +// Describes how to access a particular subshape for an HLO. For instance if +// `.hlo_index` is {1} and `.gte_index` is {3, 4} then buffer for `.instr` at +// ShapeIndex {1} (i.e. the buffer for the second tuple element of hlo) is found +// at `.buffer_slice`[3][4]. That is, `.slice` is a void***, which we +// dereference twice -- first at index 3, and then at index 4 -- to get the +// address of our buffer. +struct HloBufferSlice { + const HloInstruction* instr; + ShapeIndex hlo_index; + + // The root buffer to look at. + BufferAllocation::Slice buffer_slice; + + // Describes how to dereference starting at that buffer to get to the buffer + // in question. + ShapeIndex gte_index; +}; + // Figures out how to access the buffers for all subshapes of hlo's operands and // for hlo itself (i.e. all the buffers produced by HLO). // @@ -1831,22 +1693,22 @@ static std::vector GetHloBufferSlices( return result; } -std::unique_ptr -IrEmitterUnnested::BuildKernelThunkFromBufferSlices( - absl::string_view name, Thunk::ThunkInfo thunk_info, - absl::Span slices, - std::function - bind_slice_to_ir_value) { - const auto& buffer_assn = ir_emitter_context_->buffer_assignment(); +std::unique_ptr IrEmitterUnnested::BuildKernelThunk( + const HloInstruction* inst, bool implements_whole_instruction) { + const BufferAssignment& buffer_assn = + ir_emitter_context_->buffer_assignment(); + + std::vector hlo_slices = + GetHloBufferSlices(inst, buffer_assn); // Figure out which buffer allocations need to be passed as arguments to our - // kernel. This is simply all of the allocations referenced in slices, + // kernel. This is simply all of the allocations referenced in hlo_slices, // plus the XLA temp buffer (if we have it). We always include the temp // buffer because even if the kernel itself doesn't use it, a nested // subcomputation within the kernel (e.g. a kMap's computation) might. std::unordered_set buffers_needed; - for (auto* slice : slices) { - buffers_needed.insert(slice->buffer_slice.allocation()); + for (const auto& hlo_buffer_slice : hlo_slices) { + buffers_needed.insert(hlo_buffer_slice.buffer_slice.allocation()); } absl::optional temp_buffer; for (const BufferAllocation& alloc : buffer_assn.Allocations()) { @@ -1875,7 +1737,7 @@ IrEmitterUnnested::BuildKernelThunkFromBufferSlices( return a->index() < b->index(); }); - llvm::Function* kernel = BuildKernelPrototype(name, non_constant_buffers); + llvm::Function* kernel = BuildKernelPrototype(*inst, non_constant_buffers); // Build a map from a BufferAllocation to the corresponding argument in our // kernel. @@ -1909,19 +1771,24 @@ IrEmitterUnnested::BuildKernelThunkFromBufferSlices( // For each buffer our kernel might want to touch, bind it to a value derived // from our kernel args. - for (auto* slice : slices) { - const BufferAllocation::Slice& buffer_slice = slice->buffer_slice; - const ShapeIndex& gte_index = slice->gte_index; + for (const auto& hlo_buffer_slice : hlo_slices) { + const HloInstruction* instr = hlo_buffer_slice.instr; + const ShapeIndex& index = hlo_buffer_slice.hlo_index; + const BufferAllocation::Slice& slice = hlo_buffer_slice.buffer_slice; + const ShapeIndex& gte_index = hlo_buffer_slice.gte_index; + + VLOG(3) << "Buffer for " << instr->ToString() << " at " << index.ToString() + << " is found in slice " << slice.ToString() << " at GTE index " + << gte_index.ToString(); llvm::Value* loc; - if (buffer_slice.allocation()->is_constant()) { + if (slice.allocation()->is_constant()) { loc = ir_emitter_context_->llvm_module()->getGlobalVariable( - llvm_ir::ConstantBufferAllocationToGlobalName( - *buffer_slice.allocation())); + llvm_ir::ConstantBufferAllocationToGlobalName(*slice.allocation())); CHECK_NE(loc, nullptr); } else { - loc = InBoundsGEP(kernel_args.at(buffer_slice.allocation()), - {b_.getInt64(buffer_slice.offset())}); + loc = InBoundsGEP(kernel_args.at(slice.allocation()), + {b_.getInt64(slice.offset())}); } // If gte_index is nonempty, we have to dereference `loc` to get to the @@ -1933,7 +1800,7 @@ IrEmitterUnnested::BuildKernelThunkFromBufferSlices( loc = Load(InBoundsGEP(loc, {b_.getInt64(idx)})); } - bind_slice_to_ir_value(slice, loc); + bindings_.BindHloToIrValue(*instr, loc, index); } // Bind the temp buffer so that nested subcomputations can find it if they @@ -1945,66 +1812,9 @@ IrEmitterUnnested::BuildKernelThunkFromBufferSlices( llvm::ConstantPointerNull::get(b_.getInt8PtrTy())); } - return absl::make_unique(thunk_info, non_constant_buffers, - std::string(kernel->getName())); -} - -std::unique_ptr IrEmitterUnnested::BuildKernelThunk( - const HloInstruction* inst, bool implements_whole_instruction) { - std::vector hlo_slices = - GetHloBufferSlices(inst, ir_emitter_context_->buffer_assignment()); - - std::vector slice_ptrs; - slice_ptrs.reserve(hlo_slices.size()); - for (auto& slice : hlo_slices) { - slice_ptrs.push_back(&slice); - } - - return BuildKernelThunkFromBufferSlices( - inst->name(), + return absl::make_unique( implements_whole_instruction ? GetThunkInfo(inst) : Thunk::ThunkInfo(), - slice_ptrs, [this](const BufferSlice* slice, llvm::Value* value) { - const HloBufferSlice* hlo_buffer_slice = - static_cast(slice); - const HloInstruction* instr = hlo_buffer_slice->instr; - const ShapeIndex& index = hlo_buffer_slice->hlo_index; - VLOG(3) << "Buffer for " << instr->ToString() << " at " - << index.ToString() << " is found in slice " - << hlo_buffer_slice->buffer_slice.ToString() << " at GTE index " - << hlo_buffer_slice->gte_index.ToString(); - - bindings_.BindHloToIrValue(*instr, value, index); - }); -} - -std::unique_ptr IrEmitterUnnested::BuildKernelThunkForMlir( - absl::string_view name, Thunk::ThunkInfo thunk_info, - absl::Span slices, - std::vector* ir_arrays) { - absl::flat_hash_set buffers_written; - std::vector slice_ptrs; - slice_ptrs.reserve(slices.size()); - for (auto& slice : slices) { - slice_ptrs.push_back(&slice); - if (slice.written) { - buffers_written.insert(slice.buffer_slice); - } - } - - ir_arrays->clear(); - return BuildKernelThunkFromBufferSlices( - name, thunk_info, slice_ptrs, - [&](const BufferSlice* slice, llvm::Value* value) { - const auto& mlir_slice = static_cast(*slice); - - llvm_ir::IrArray ir_array( - CastToTypedValue(mlir_slice.shape, value, &b_), mlir_slice.shape); - if (!buffers_written.contains(slice->buffer_slice)) { - ir_array.MarkInvariantOverWholeProgram(&value->getContext()); - } - - ir_arrays->push_back(ir_array); - }); + non_constant_buffers, std::string(kernel->getName())); } StatusOr> IrEmitterUnnested::BuildInitializerThunk( @@ -2211,7 +2021,7 @@ Status CheckConditionalBuffersShareAllocation( } // namespace -StatusOr> IrEmitterUnnested::BuildWhileThunk( +std::unique_ptr IrEmitterUnnested::BuildWhileThunk( const HloInstruction* hlo) { // Check that all while-related buffers share an allocation. TF_CHECK_OK(CheckWhileBuffersShareAllocation( @@ -2219,26 +2029,24 @@ StatusOr> IrEmitterUnnested::BuildWhileThunk( // Generate thunk sequence for while 'condition'. HloComputation* condition = hlo->while_condition(); - TF_ASSIGN_OR_RETURN(auto ir_emitter_condition, - IrEmitterUnnested::Create(hlo_module_config_, condition, - ir_emitter_context_)); - TF_RETURN_IF_ERROR(condition->Accept(ir_emitter_condition.get())); + IrEmitterUnnested ir_emitter_condition(hlo_module_config_, condition, + ir_emitter_context_); + TF_CHECK_OK(condition->Accept(&ir_emitter_condition)); // Generate thunk sequence for while 'body'. HloComputation* body = hlo->while_body(); - TF_ASSIGN_OR_RETURN( - auto ir_emitter_body, - IrEmitterUnnested::Create(hlo_module_config_, body, ir_emitter_context_)); - TF_RETURN_IF_ERROR(body->Accept(ir_emitter_body.get())); + IrEmitterUnnested ir_emitter_body(hlo_module_config_, body, + ir_emitter_context_); + TF_CHECK_OK(body->Accept(&ir_emitter_body)); - return std::unique_ptr(new WhileThunk( + return absl::make_unique( GetThunkInfo(hlo), GetAllocationSlice(*condition->root_instruction()), // cond result - ir_emitter_condition->ConsumeThunkSequence(), - ir_emitter_body->ConsumeThunkSequence())); + ir_emitter_condition.ConsumeThunkSequence(), + ir_emitter_body.ConsumeThunkSequence()); } -StatusOr> IrEmitterUnnested::BuildForThunk( +std::unique_ptr IrEmitterUnnested::BuildForThunk( const HloInstruction* hlo, const int64 loop_limit) { // Check that all while-related buffers share an allocation. TF_CHECK_OK(CheckWhileBuffersShareAllocation( @@ -2246,16 +2054,15 @@ StatusOr> IrEmitterUnnested::BuildForThunk( // Generate thunk sequence for while 'body' (will be used a For loop body). HloComputation* body = hlo->while_body(); - TF_ASSIGN_OR_RETURN( - auto ir_emitter_body, - IrEmitterUnnested::Create(hlo_module_config_, body, ir_emitter_context_)); - TF_RETURN_IF_ERROR(body->Accept(ir_emitter_body.get())); + IrEmitterUnnested ir_emitter_body(hlo_module_config_, body, + ir_emitter_context_); + TF_CHECK_OK(body->Accept(&ir_emitter_body)); - return std::unique_ptr(new ForThunk( - GetThunkInfo(hlo), loop_limit, ir_emitter_body->ConsumeThunkSequence())); + return absl::make_unique(GetThunkInfo(hlo), loop_limit, + ir_emitter_body.ConsumeThunkSequence()); } -StatusOr> IrEmitterUnnested::BuildConditionalThunk( +std::unique_ptr IrEmitterUnnested::BuildConditionalThunk( const HloInstruction* hlo) { // Check that the buffers used in conditional are shared with the operands and // result appropriately. @@ -2267,17 +2074,15 @@ StatusOr> IrEmitterUnnested::BuildConditionalThunk( for (int j = 0; j < hlo->branch_count(); ++j) { branch_operands.emplace_back(GetAllocationSlice(*hlo->operand(j + 1))); HloComputation* branch_computation = hlo->branch_computation(j); - TF_ASSIGN_OR_RETURN( - auto ir_emitter, - IrEmitterUnnested::Create(hlo_module_config_, branch_computation, - ir_emitter_context_)); - TF_CHECK_OK(branch_computation->Accept(ir_emitter.get())); - branch_thunks.push_back(std::move(*ir_emitter->ConsumeThunkSequence())); + IrEmitterUnnested ir_emitter(hlo_module_config_, branch_computation, + ir_emitter_context_); + TF_CHECK_OK(branch_computation->Accept(&ir_emitter)); + branch_thunks.push_back(std::move(*ir_emitter.ConsumeThunkSequence())); } - return std::unique_ptr(new ConditionalThunk( + return absl::make_unique( GetThunkInfo(hlo), GetAllocationSlice(*hlo->operand(0)), branch_operands, - std::move(branch_thunks))); + std::move(branch_thunks)); } Status IrEmitterUnnested::EmitTargetElementLoopInThunk( diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h index 18cbd22815c..019fcdf21db 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h @@ -17,7 +17,6 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_IR_EMITTER_UNNESTED_H_ #include "absl/container/inlined_vector.h" -#include "tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h" #include "tensorflow/compiler/xla/service/gpu/ir_emitter.h" #include "tensorflow/compiler/xla/service/gpu/kernel_mapping_scheme.h" #include "tensorflow/compiler/xla/service/gpu/sequential_thunk.h" @@ -29,40 +28,6 @@ limitations under the License. namespace xla { namespace gpu { -struct BufferSlice { - // The root buffer to look at. - BufferAllocation::Slice buffer_slice; - - // Describes how to dereference starting at that buffer to get to the buffer - // in question. - ShapeIndex gte_index; -}; - -// Describes how to access a particular subshape for an HLO. For instance if -// `.hlo_index` is {1} and `.gte_index` is {3, 4} then buffer for `.instr` at -// ShapeIndex {1} (i.e. the buffer for the second tuple element of hlo) is -// found at `.buffer_slice`[3][4]. That is, `.slice` is a void***, which we -// dereference twice -- first at index 3, and then at index 4 -- to get the -// address of our buffer. -struct HloBufferSlice : public BufferSlice { - const HloInstruction* instr; - ShapeIndex hlo_index; -}; - -struct MlirBufferSlice : public BufferSlice { - // The buffer is modified by the kernel. - bool written; - - Shape shape; -}; - -struct MlirEmitterContext { - mlir::Operation* op; - absl::string_view name; - Thunk::ThunkInfo thunk_info; - MlirBufferSlice extra_slice; -}; - // Emits LLVM IR for an "unnested computation". // // An unnested computation is an HloComputation which you run by executing one @@ -124,14 +89,12 @@ class IrEmitterUnnested : public IrEmitter, const string& loop_name, llvm::Value* tile_height, llvm::Value* tile_width, KernelSupportLibrary* ksl)>; + IrEmitterUnnested(const HloModuleConfig& hlo_module_config, + const HloComputation* hlo_computation, + IrEmitterContext* ir_emitter_context); IrEmitterUnnested(const IrEmitterUnnested&) = delete; IrEmitterUnnested& operator=(const IrEmitterUnnested&) = delete; - static StatusOr> Create( - const HloModuleConfig& hlo_module_config, - const HloComputation* hlo_computation, - IrEmitterContext* ir_emitter_context); - // Transfers the ownship of thunk_sequence_ out. std::unique_ptr ConsumeThunkSequence() { return std::make_unique(std::move(thunk_sequence_)); @@ -161,7 +124,6 @@ class IrEmitterUnnested : public IrEmitter, Status HandleScatter(HloInstruction* scatter) override; Status HandleSelect(HloInstruction* select) override; Status HandleSort(HloInstruction* sort) override; - Status EmitMlirSort(MlirEmitterContext context); Status HandleTriangularSolve(HloInstruction* hlo) override; Status HandleTupleSelect(HloInstruction* tuple_select) override; Status HandleAllReduce(HloInstruction* crs) override; @@ -186,10 +148,6 @@ class IrEmitterUnnested : public IrEmitter, Status Postprocess(HloInstruction* hlo) override; private: - IrEmitterUnnested(const HloModuleConfig& hlo_module_config, - const HloComputation* hlo_computation, - IrEmitterContext* ir_emitter_context); - // Add a owning Thunk object to the thunk sequence. void AddThunkToThunkSequence(std::unique_ptr thunk) override { thunk_sequence_.emplace_back(std::move(thunk)); @@ -306,7 +264,8 @@ class IrEmitterUnnested : public IrEmitter, // Builds the prototype of the IR kernel for `inst` and adds it to the module. // This kernel takes as arguments pointers to the given buffer allocations. llvm::Function* BuildKernelPrototype( - absl::string_view name, absl::Span args); + const HloInstruction& inst, + absl::Span args); // Helper for writing extra outputs from inside a reduce kernel. Status EmitExtraOutputsForReduce( @@ -531,12 +490,6 @@ class IrEmitterUnnested : public IrEmitter, HloComputation* reducer, llvm::Type* element_type, llvm::Value* partial_result_address); - std::unique_ptr BuildKernelThunkFromBufferSlices( - absl::string_view name, Thunk::ThunkInfo thunk_info, - absl::Span slices, - std::function - bind_slice_to_ir_value); - // Returns a KernelThunk that invokes the kernel emitted for `inst`. The // caller needs to make sure `inst` outlives the lifetime of the returned // Thunk object. 'implements_whole_instruction' specifies whether this @@ -545,11 +498,6 @@ class IrEmitterUnnested : public IrEmitter, std::unique_ptr BuildKernelThunk( const HloInstruction* inst, bool implements_whole_instruction); - std::unique_ptr BuildKernelThunkForMlir( - absl::string_view name, Thunk::ThunkInfo thunk_info, - absl::Span slices, - std::vector* ir_arrays); - // Returns a thunk that, given a reduce or select-and-scatter op, // initializes its memory to the appropriate initial value. StatusOr> BuildInitializerThunk( @@ -557,18 +505,17 @@ class IrEmitterUnnested : public IrEmitter, // Returns a WhileThunk that invokes thunk sequences for 'condition' and // 'body' sub-computations of while instruction 'hlo'. - StatusOr> BuildWhileThunk(const HloInstruction* hlo); + std::unique_ptr BuildWhileThunk(const HloInstruction* hlo); // Returns a ForThunk which executes 'loop_limit' invocations of a thunk // sequence from the 'body' sub-computation of the while instruction 'hlo'. - StatusOr> BuildForThunk(const HloInstruction* hlo, - const int64 loop_limit); + std::unique_ptr BuildForThunk(const HloInstruction* hlo, + const int64 loop_limit); // Returns a ConditionalThunk which executes the thunk sequence for the // 'branch_computation' corresponding to the predicate/branch_index of the // given conditional instruction. - StatusOr> BuildConditionalThunk( - const HloInstruction* hlo); + std::unique_ptr BuildConditionalThunk(const HloInstruction* hlo); // Emits current thread id with the given type. // @@ -598,9 +545,6 @@ class IrEmitterUnnested : public IrEmitter, absl::optional thread_id_filter = absl::nullopt, absl::optional block_id_filter = absl::nullopt); - StatusOr GetOrCreateSubComputationFromRegion( - mlir::Region* region); - // Returns the last generated thunk. Thunk* LastThunk() const { return thunk_sequence_.back().get(); } @@ -611,14 +555,6 @@ class IrEmitterUnnested : public IrEmitter, // The HloComputation that this IrEmitter emits code for. const HloComputation* hlo_computation_; - - mlir::OwningModuleRef mlir_scratch_module_; - - // This is for cache-purpose only. It has no significant semantics. - mlir::LhloDialectEmitter lhlo_scratch_emitter_; - - absl::flat_hash_map> - scratch_nested_computations_; }; } // namespace gpu diff --git a/tensorflow/compiler/xla/service/gpu/tests/BUILD b/tensorflow/compiler/xla/service/gpu/tests/BUILD index 809b277317f..a2bddd2d0d7 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/BUILD +++ b/tensorflow/compiler/xla/service/gpu/tests/BUILD @@ -458,35 +458,6 @@ xla_test( ], ) -tf_cc_test( - name = "sorting_test", - srcs = [ - "sorting_test.cc", - ], - tags = tf_cuda_tests_tags() + [ - "no_rocm", - ], - deps = [ - ":gpu_codegen_test", - "//tensorflow/compiler/xla:debug_options_flags", - "//tensorflow/compiler/xla:statusor", - "//tensorflow/compiler/xla:xla_proto_cc", - "//tensorflow/compiler/xla/service:gpu_plugin", - "//tensorflow/compiler/xla/service:hlo", - "//tensorflow/compiler/xla/service:hlo_module_config", - "//tensorflow/compiler/xla/service:hlo_parser", - "//tensorflow/compiler/xla/service/gpu:gpu_executable", - "//tensorflow/compiler/xla/tests:filecheck", - "//tensorflow/compiler/xla/tests:hlo_test_base", - "//tensorflow/compiler/xla/tests:llvm_irgen_test_base", - "//tensorflow/core:lib", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/stream_executor/lib", - "@com_google_absl//absl/memory", - ], -) - tf_cc_binary( name = "hlo_to_llvm_ir", srcs = ["hlo_to_llvm_ir.cc"], diff --git a/tensorflow/compiler/xla/service/gpu/tests/sorting.hlo b/tensorflow/compiler/xla/service/gpu/tests/sorting.hlo index 4d29a8df116..272c9a25769 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/sorting.hlo +++ b/tensorflow/compiler/xla/service/gpu/tests/sorting.hlo @@ -8,162 +8,162 @@ compare { ROOT lt = pred[] compare(p.0.lhs, p.0.rhs), direction=LT } -// CHECK: define void @sort(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]]) +// CHECK: define void @sort(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC1:%.*]]) // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP5]] to i64 -// CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP6]], [[THREAD_ID]] +// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 +// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1]], i64 0 +// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = urem i64 [[TMP7]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP10]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 +// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 2 -// CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 1 -// CHECK-NEXT: [[TMP13:%.*]] = icmp slt i64 [[TMP11]], [[TMP12]] -// CHECK-NEXT: [[TMP14:%.*]] = icmp slt i64 [[TMP12]], 3 -// CHECK-NEXT: [[TMP15:%.*]] = and i1 [[TMP13]], [[TMP14]] -// CHECK-NEXT: br i1 [[TMP15]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 1 +// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], [[TMP8]] +// CHECK-NEXT: [[TMP10:%.*]] = icmp slt i64 [[TMP8]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] +// CHECK-NEXT: br i1 [[TMP11]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP12]] -// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] -// CHECK-NEXT: call void @region_0_4(float* [[TMP16]], float* [[TMP17]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP18:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP18]], 0 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: call void @compare(float* [[TMP12]], float* [[TMP13]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP14:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP14]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP19:%.*]] = load float, float* [[TMP16]], align 4 -// CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[TMP17]], align 4 -// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] -// CHECK-NEXT: store float [[TMP19]], float* [[TMP21]], align 4 -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP12]] -// CHECK-NEXT: store float [[TMP20]], float* [[TMP22]], align 4 +// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[TMP12]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[TMP13]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: store float [[TMP16]], float* [[TMP18]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] -// CHECK: define internal void @region_0_4(float* dereferenceable(4) [[P_0_LHS_TYPED:%.*]], float* dereferenceable(4) [[P_0_RHS_TYPED:%.*]], i8* dereferenceable(1) [[OUTPUT_ARG:%.*]]) +// CHECK: define internal void @compare(float* dereferenceable(4) [[P_0_LHS_TYPED:%.*]], float* dereferenceable(4) [[P_0_RHS_TYPED:%.*]], i8* dereferenceable(1) [[OUTPUT_ARG:%.*]]) // CHECK-NEXT: entry: -// CHECK-NEXT: [[COMPARE_3_TYPED:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARG_0_1_TYPED:%.*]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARG_1_2_TYPED:%.*]], align 4 +// CHECK-NEXT: [[LT_TYPED:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[P_0_LHS_TYPED]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[P_0_RHS_TYPED]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = fcmp olt float [[TMP0]], [[TMP1]] // CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i8 -// CHECK-NEXT: store i8 [[TMP3]], i8* [[COMPARE_3_TYPED]], align 1 -// CHECK-NEXT: [[LOAD_RET_VALUE:%.*]] = load i8, i8* [[COMPARE_3_TYPED]], align 1 -// CHECK-NEXT: store i8 [[LOAD_RET_VALUE]], i8* [[OUTPUT_ARG:%.*]], align 1 +// CHECK-NEXT: store i8 [[TMP3]], i8* [[LT_TYPED]], align 1 +// CHECK-NEXT: [[LOAD_RET_VALUE:%.*]] = load i8, i8* [[LT_TYPED]], align 1 +// CHECK-NEXT: store i8 [[LOAD_RET_VALUE]], i8* [[OUTPUT_ARG]], align 1 // CHECK-NEXT: ret void -// CHECK: define void @sort__1(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]]) { +// CHECK: define void @sort__1(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC1:%.*]]) { // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP5]] to i64 -// CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP6]], [[THREAD_ID]] +// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 +// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1]], i64 0 +// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = urem i64 [[TMP7]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP10]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 +// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP8]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = icmp slt i64 [[TMP8]], [[TMP11]] -// CHECK-NEXT: [[TMP13:%.*]] = icmp slt i64 [[TMP11]], 3 -// CHECK-NEXT: [[TMP14:%.*]] = and i1 [[TMP12]], [[TMP13]] -// CHECK-NEXT: br i1 [[TMP14]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP4]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = icmp slt i64 [[TMP4]], [[TMP7]] +// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP8]], [[TMP9]] +// CHECK-NEXT: br i1 [[TMP10]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP8]] -// CHECK-NEXT: call void @region_0_4(float* [[TMP15]], float* [[TMP16]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP17:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP17]], 0 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP4]] +// CHECK-NEXT: call void @compare(float* [[TMP11]], float* [[TMP12]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP13]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP18:%.*]] = load float, float* [[TMP15]], align 4 -// CHECK-NEXT: [[TMP19:%.*]] = load float, float* [[TMP16]], align 4 -// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP8]] -// CHECK-NEXT: store float [[TMP18]], float* [[TMP20]], align 4 -// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] -// CHECK-NEXT: store float [[TMP19]], float* [[TMP21]], align 4 +// CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[TMP11]], align 4 +// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[TMP12]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP4]] +// CHECK-NEXT: store float [[TMP14]], float* [[TMP16]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] -// CHECK: define void @sort__2(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]]) { +// CHECK: define void @sort__2(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC1:%.*]]) { // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP5]] to i64 -// CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP6]], [[THREAD_ID]] +// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 +// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 +// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = urem i64 [[TMP7]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP10]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 +// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 2 -// CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 1 -// CHECK-NEXT: [[TMP13:%.*]] = icmp slt i64 [[TMP11]], [[TMP12]] -// CHECK-NEXT: [[TMP14:%.*]] = icmp slt i64 [[TMP12]], 3 -// CHECK-NEXT: [[TMP15:%.*]] = and i1 [[TMP13]], [[TMP14]] -// CHECK-NEXT: br i1 [[TMP15]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 1 +// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], [[TMP8]] +// CHECK-NEXT: [[TMP10:%.*]] = icmp slt i64 [[TMP8]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] +// CHECK-NEXT: br i1 [[TMP11]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP12]] -// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] -// CHECK-NEXT: call void @region_0_4(float* [[TMP16]], float* [[TMP17]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP18:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP18]], 0 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: call void @compare(float* [[TMP12]], float* [[TMP13]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP14:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP14]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP19:%.*]] = load float, float* [[TMP16]], align 4 -// CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[TMP17]], align 4 -// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] -// CHECK-NEXT: store float [[TMP19]], float* [[TMP21]], align 4 -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP12]] -// CHECK-NEXT: store float [[TMP20]], float* [[TMP22]], align 4 +// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[TMP12]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[TMP13]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: store float [[TMP16]], float* [[TMP18]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] ENTRY main { x = f32[2, 3] parameter(0) @@ -182,198 +182,210 @@ compare { ROOT lt = pred[] compare(p.1.lhs, p.1.rhs), direction=LT } -// CHECK: define void @sort(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) +// CHECK: define void @sort(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC2:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC3:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x i32]]* -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 -// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK-NEXT: [[TMP8:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP8]], [[THREAD_ID]] +// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4]], i64 0 +// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x i8*]* +// CHECK-NEXT: [[SORT_RAW1:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 +// CHECK-NEXT: [[SORT_TYPED2:%.*]] = bitcast i8* [[SORT_RAW1]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[SORT_RAW3:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1]], i64 0 +// CHECK-NEXT: [[SORT_TYPED4:%.*]] = bitcast i8* [[SORT_RAW3]] to [2 x [3 x float]]* +// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC2]], i64 0 +// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[Y_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC3]], i64 0 +// CHECK-NEXT: [[Y_TYPED:%.*]] = bitcast i8* [[Y_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP10:%.*]] = urem i64 [[TMP9]], 2 -// CHECK-NEXT: [[TMP11:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP12:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP12]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 +// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP10]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 1 -// CHECK-NEXT: [[TMP15:%.*]] = icmp slt i64 [[TMP13]], [[TMP14]] -// CHECK-NEXT: [[TMP16:%.*]] = icmp slt i64 [[TMP14]], 3 -// CHECK-NEXT: [[TMP17:%.*]] = and i1 [[TMP15]], [[TMP16]] -// CHECK-NEXT: br i1 [[TMP17]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 1 +// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], [[TMP8]] +// CHECK-NEXT: [[TMP10:%.*]] = icmp slt i64 [[TMP8]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] +// CHECK-NEXT: br i1 [[TMP11]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP14]] -// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP13]] -// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP14]] -// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP13]] -// CHECK-NEXT: call void @region_0_6(i32* [[TMP18]], i32* [[TMP19]], float* [[TMP20]], float* [[TMP21]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP22:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP22]], 0 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: call void @compare(i32* [[TMP12]], i32* [[TMP13]], float* [[TMP14]], float* [[TMP15]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP16:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP16]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP13]] -// CHECK-NEXT: store i32 [[TMP23]], i32* [[TMP25]], align 4 -// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP14]] -// CHECK-NEXT: store i32 [[TMP24]], i32* [[TMP26]], align 4 -// CHECK-NEXT: [[TMP27:%.*]] = load float, float* [[TMP20]], align 4 -// CHECK-NEXT: [[TMP28:%.*]] = load float, float* [[TMP21]], align 4 -// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP13]] -// CHECK-NEXT: store float [[TMP27]], float* [[TMP29]], align 4 -// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP14]] -// CHECK-NEXT: store float [[TMP28]], float* [[TMP30]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP19]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: store i32 [[TMP18]], i32* [[TMP20]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = load float, float* [[TMP14]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = load float, float* [[TMP15]], align 4 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: store float [[TMP21]], float* [[TMP23]], align 4 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: store float [[TMP22]], float* [[TMP24]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] -// CHECK: define internal void @region_0_6(i32* dereferenceable(4) [[P_0_LHS_TYPED:%.*]], i32* dereferenceable(4) [[P_0_RHS_TYPED:%.*]], float* dereferenceable(4) [[P_1_LHS_TYPED:%.*]], float* dereferenceable(4) [[P_1_RHS_TYPED:%.*]], i8* dereferenceable(1) [[OUTPUT_ARG:%.*]]) +// CHECK: define internal void @compare(i32* dereferenceable(4) [[P_0_LHS_TYPED:%.*]], i32* dereferenceable(4) [[P_0_RHS_TYPED:%.*]], float* dereferenceable(4) [[P_1_LHS_TYPED:%.*]], float* dereferenceable(4) [[P_1_RHS_TYPED:%.*]], i8* dereferenceable(1) [[OUTPUT_ARG:%.*]]) // CHECK-NEXT: entry: -// CHECK-NEXT: [[COMPARE_5_TYPED:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARG_2_3_TYPED:%.*]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARG_3_4_TYPED:%.*]], align 4 +// CHECK-NEXT: [[LT_TYPED:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[P_1_LHS_TYPED]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[P_1_RHS_TYPED]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = fcmp olt float [[TMP0]], [[TMP1]] // CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i8 -// CHECK-NEXT: store i8 [[TMP3]], i8* [[COMPARE_5_TYPED]], align 1 -// CHECK-NEXT: [[LOAD_RET_VALUE:%.*]] = load i8, i8* [[COMPARE_5_TYPED]], align 1 -// CHECK-NEXT: store i8 [[LOAD_RET_VALUE]], i8* [[OUTPUT_ARG:%.*]], align 1 +// CHECK-NEXT: store i8 [[TMP3]], i8* [[LT_TYPED]], align 1 +// CHECK-NEXT: [[LOAD_RET_VALUE:%.*]] = load i8, i8* [[LT_TYPED]], align 1 +// CHECK-NEXT: store i8 [[LOAD_RET_VALUE]], i8* [[OUTPUT_ARG]], align 1 // CHECK-NEXT: ret void -// CHECK: define void @sort__1(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) +// CHECK: define void @sort__1(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC2:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC3:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x i32]]* -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 -// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK-NEXT: [[TMP8:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP8]], [[THREAD_ID]] +// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 +// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x i8*]* +// CHECK-NEXT: [[SORT_RAW1:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 +// CHECK-NEXT: [[SORT_TYPED2:%.*]] = bitcast i8* [[SORT_RAW1]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[SORT_RAW3:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 +// CHECK-NEXT: [[SORT_TYPED4:%.*]] = bitcast i8* [[SORT_RAW3]] to [2 x [3 x float]]* +// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC2:%.*]], i64 0 +// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[Y_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC3:%.*]], i64 0 +// CHECK-NEXT: [[Y_TYPED:%.*]] = bitcast i8* [[Y_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP10:%.*]] = urem i64 [[TMP9]], 2 -// CHECK-NEXT: [[TMP11:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP12:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP12]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 +// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP10]], 3 -// CHECK-NEXT: [[TMP14:%.*]] = icmp slt i64 [[TMP10]], [[TMP13]] -// CHECK-NEXT: [[TMP15:%.*]] = icmp slt i64 [[TMP13]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = and i1 [[TMP14]], [[TMP15]] -// CHECK-NEXT: br i1 [[TMP16]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP4]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = icmp slt i64 [[TMP4]], [[TMP7]] +// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP8]], [[TMP9]] +// CHECK-NEXT: br i1 [[TMP10]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP13]] -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP10]] -// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP13]] -// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP10]] -// CHECK-NEXT: call void @region_0_6(i32* [[TMP17]], i32* [[TMP18]], float* [[TMP19]], float* [[TMP20]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP21:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP21]], 0 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP4]] +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP4]] +// CHECK-NEXT: call void @compare(i32* [[TMP11]], i32* [[TMP12]], float* [[TMP13]], float* [[TMP14]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP15:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP15]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP10]] -// CHECK-NEXT: store i32 [[TMP22]], i32* [[TMP24]], align 4 -// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP13]] -// CHECK-NEXT: store i32 [[TMP23]], i32* [[TMP25]], align 4 -// CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[TMP19]], align 4 -// CHECK-NEXT: [[TMP27:%.*]] = load float, float* [[TMP20]], align 4 -// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP10]] -// CHECK-NEXT: store float [[TMP26]], float* [[TMP28]], align 4 -// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP13]] -// CHECK-NEXT: store float [[TMP27]], float* [[TMP29]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP4]] +// CHECK-NEXT: store i32 [[TMP16]], i32* [[TMP18]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP19]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[TMP13]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = load float, float* [[TMP14]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP4]] +// CHECK-NEXT: store float [[TMP20]], float* [[TMP22]], align 4 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: store float [[TMP21]], float* [[TMP23]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] -// CHECK: define void @sort__2(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) +// CHECK: define void @sort__2(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC2:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC3:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x i32]]* -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 -// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK-NEXT: [[TMP8:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP8]], [[THREAD_ID]] +// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 +// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x i8*]* +// CHECK-NEXT: [[SORT_RAW1:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 +// CHECK-NEXT: [[SORT_TYPED2:%.*]] = bitcast i8* [[SORT_RAW1]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[SORT_RAW3:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 +// CHECK-NEXT: [[SORT_TYPED4:%.*]] = bitcast i8* [[SORT_RAW3]] to [2 x [3 x float]]* +// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC2:%.*]], i64 0 +// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[Y_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC3:%.*]], i64 0 +// CHECK-NEXT: [[Y_TYPED:%.*]] = bitcast i8* [[Y_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP10:%.*]] = urem i64 [[TMP9]], 2 -// CHECK-NEXT: [[TMP11:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP12:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP12]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 +// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: -// CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x [3 x i32]]* [[TMP1]] to i8* -// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK-NEXT: store i8* [[TMP13]], i8** [[TMP14]], align 8 -// CHECK-NEXT: [[TMP15:%.*]] = bitcast [2 x [3 x float]]* [[TMP3]] to i8* -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK-NEXT: store i8* [[TMP15]], i8** [[TMP16]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = bitcast [2 x [3 x i32]]* [[SORT_TYPED2]] to i8* +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[SORT_TYPED]], i64 0, i64 0 +// CHECK-NEXT: store i8* [[TMP7]], i8** [[TMP8]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = bitcast [2 x [3 x float]]* [[SORT_TYPED4]] to i8* +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[SORT_TYPED]], i64 0, i64 1 +// CHECK-NEXT: store i8* [[TMP9]], i8** [[TMP10]], align 8 // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP10]], 2 -// CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP17]], 1 -// CHECK-NEXT: [[TMP19:%.*]] = icmp slt i64 [[TMP17]], [[TMP18]] -// CHECK-NEXT: [[TMP20:%.*]] = icmp slt i64 [[TMP18]], 3 -// CHECK-NEXT: [[TMP21:%.*]] = and i1 [[TMP19]], [[TMP20]] -// CHECK-NEXT: br i1 [[TMP21]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP4]], 2 +// CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 1 +// CHECK-NEXT: [[TMP13:%.*]] = icmp slt i64 [[TMP11]], [[TMP12]] +// CHECK-NEXT: [[TMP14:%.*]] = icmp slt i64 [[TMP12]], 3 +// CHECK-NEXT: [[TMP15:%.*]] = and i1 [[TMP13]], [[TMP14]] +// CHECK-NEXT: br i1 [[TMP15]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP18]] -// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP17]] -// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP18]] -// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP17]] -// CHECK-NEXT: call void @region_0_6(i32* [[TMP22]], i32* [[TMP23]], float* [[TMP24]], float* [[TMP25]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP26:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP26]], 0 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP12]] +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP11]] +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP12]] +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP11]] +// CHECK-NEXT: call void @compare(i32* [[TMP16]], i32* [[TMP17]], float* [[TMP18]], float* [[TMP19]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP20:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP20]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP17]] -// CHECK-NEXT: store i32 [[TMP27]], i32* [[TMP29]], align 4 -// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP18]] -// CHECK-NEXT: store i32 [[TMP28]], i32* [[TMP30]], align 4 -// CHECK-NEXT: [[TMP31:%.*]] = load float, float* [[TMP24]], align 4 -// CHECK-NEXT: [[TMP32:%.*]] = load float, float* [[TMP25]], align 4 -// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP17]] -// CHECK-NEXT: store float [[TMP31]], float* [[TMP33]], align 4 -// CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP18]] -// CHECK-NEXT: store float [[TMP32]], float* [[TMP34]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP11]] +// CHECK-NEXT: store i32 [[TMP21]], i32* [[TMP23]], align 4 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP12]] +// CHECK-NEXT: store i32 [[TMP22]], i32* [[TMP24]], align 4 +// CHECK-NEXT: [[TMP25:%.*]] = load float, float* [[TMP18]], align 4 +// CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[TMP19]], align 4 +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP11]] +// CHECK-NEXT: store float [[TMP25]], float* [[TMP27]], align 4 +// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP12]] +// CHECK-NEXT: store float [[TMP26]], float* [[TMP28]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] ENTRY main { x = s32[2, 3] parameter(0) diff --git a/tensorflow/compiler/xla/service/gpu/tests/sorting_test.cc b/tensorflow/compiler/xla/service/gpu/tests/sorting_test.cc deleted file mode 100644 index 197a0c6cfeb..00000000000 --- a/tensorflow/compiler/xla/service/gpu/tests/sorting_test.cc +++ /dev/null @@ -1,71 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include - -#include "tensorflow/compiler/xla/service/gpu/gpu_executable.h" -#include "tensorflow/compiler/xla/service/gpu/tests/gpu_codegen_test.h" -#include "tensorflow/compiler/xla/service/hlo_instruction.h" -#include "tensorflow/compiler/xla/service/hlo_module_config.h" -#include "tensorflow/compiler/xla/service/hlo_parser.h" -#include "tensorflow/compiler/xla/statusor.h" -#include "tensorflow/compiler/xla/tests/filecheck.h" -#include "tensorflow/compiler/xla/tests/hlo_test_base.h" -#include "tensorflow/compiler/xla/xla.pb.h" -#include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/platform/test.h" -#include "tensorflow/stream_executor/lib/statusor.h" - -namespace xla { -namespace gpu { - -namespace { - -class SortingTest : public GpuCodegenTest { - protected: - HloModuleConfig ConfigWithoutLayoutAssignment() { - HloModuleConfig config; - auto debug_options = HloTestBase::GetDebugOptionsForTest(); - // Disable layout_assignment to use the preassigned layouts. - debug_options.add_xla_disable_hlo_passes("layout-assignment"); - config.set_debug_options(debug_options); - return config; - } -}; - -TEST_F(SortingTest, Regression1) { - const char* hlo_text = R"( -HloModule TestModule - -compare { - p.0.lhs = f32[] parameter(0) - p.0.rhs = f32[] parameter(1) - ROOT lt = pred[] compare(p.0.lhs, p.0.rhs), direction=LT -} - -ENTRY TestComputation { - x = f32[3, 2]{1, 0} parameter(0) - x.copy = f32[3, 2]{0, 1} copy(x) - ROOT sort = f32[3, 2]{0, 1} sort(x.copy), dimensions={1}, to_apply=compare -} - -)"; - - EXPECT_TRUE(RunAndCompareNoHloPasses(hlo_text, ErrorSpec{1e-5, 1e-5})); -} - -} // namespace -} // namespace gpu -} // namespace xla diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc index 2963d546380..b01ae2efe43 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc @@ -415,10 +415,9 @@ llvm::Instruction* AddRangeMetadata(int64 lower, int64 upper, return inst; } -string IrName(absl::string_view a) { - std::string s(a); - s.erase(std::remove(s.begin(), s.end(), '%'), s.end()); - return s; +string IrName(string a) { + a.erase(std::remove(a.begin(), a.end(), '%'), a.end()); + return a; } string IrName(absl::string_view a, absl::string_view b) { diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h index c0a55e4da33..642965b6470 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h @@ -87,7 +87,7 @@ string DumpModuleToString(const llvm::Module& module); // - joining all of the nonempty inputs by '.', and then // - removing all '%'s. // -string IrName(absl::string_view a); +string IrName(string a); string IrName(absl::string_view a, absl::string_view b); string IrName(const HloInstruction* a, absl::string_view b = ""); From 25d2018f26e333d2f4cae9b9b97926dac9c5dd34 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Wed, 5 Aug 2020 17:07:47 -0700 Subject: [PATCH 2215/2522] Cleaned unused stuff in Tensor and LinearStorage. PiperOrigin-RevId: 325132234 Change-Id: I705a857042f4f42906723c3b608c2ff621a17f81 --- .../lite/delegates/gpu/cl/linear_storage.cc | 63 +++++--------- .../lite/delegates/gpu/cl/linear_storage.h | 82 ++++--------------- tensorflow/lite/delegates/gpu/cl/tensor.h | 11 --- 3 files changed, 36 insertions(+), 120 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/linear_storage.cc b/tensorflow/lite/delegates/gpu/cl/linear_storage.cc index eb822b620f7..0ff17d0e3de 100644 --- a/tensorflow/lite/delegates/gpu/cl/linear_storage.cc +++ b/tensorflow/lite/delegates/gpu/cl/linear_storage.cc @@ -81,31 +81,22 @@ absl::Status TensorLinearDescriptor::PerformReadSelector( } } -LinearStorage::LinearStorage(int depth, LinearStorageType storage_type, - DataType data_type) - : depth_(depth), storage_type_(storage_type), data_type_(data_type) {} +LinearStorage::LinearStorage(int depth, LinearStorageType storage_type) + : depth_(depth), storage_type_(storage_type) {} LinearStorage::LinearStorage(LinearStorage&& storage) : GPUObject(std::move(storage)), texture_storage_(std::move(storage.texture_storage_)), buffer_storage_(std::move(storage.buffer_storage_)), - memory_(storage.memory_), depth_(storage.depth_), - name_(std::move(storage.name_)), - storage_type_(storage.storage_type_), - data_type_(storage.data_type_) { - storage.memory_ = nullptr; -} + storage_type_(storage.storage_type_) {} LinearStorage& LinearStorage::operator=(LinearStorage&& storage) { if (this != &storage) { texture_storage_ = std::move(storage.texture_storage_); buffer_storage_ = std::move(storage.buffer_storage_); - std::swap(memory_, storage.memory_); std::swap(depth_, storage.depth_); - name_ = std::move(storage.name_); std::swap(storage_type_, storage.storage_type_); - std::swap(data_type_, storage.data_type_); GPUObject::operator=(std::move(storage)); } return *this; @@ -124,9 +115,9 @@ absl::Status LinearStorage::GetGPUResources( resources->ints.push_back({"length", depth_}); if (storage_type_ == LinearStorageType::BUFFER) { - resources->buffers.push_back({"buffer", memory_}); + resources->buffers.push_back({"buffer", buffer_storage_.GetMemoryPtr()}); } else { - resources->images2d.push_back({"tex2d", memory_}); + resources->images2d.push_back({"tex2d", texture_storage_.GetMemoryPtr()}); } return absl::OkStatus(); @@ -141,37 +132,21 @@ LinearStorageType DeduceLinearStorageType( } } -absl::Status CreateBufferLinearStorage(int size, DataType data_type, void* data, - CLContext* context, - LinearStorage* result) { - const int float4_size = - data_type == DataType::FLOAT32 ? sizeof(float4) : sizeof(half4); - *result = LinearStorage(size, LinearStorageType::BUFFER, data_type); - RETURN_IF_ERROR(CreateReadOnlyBuffer(float4_size * size, data, context, - &result->buffer_storage_)); - result->memory_ = result->buffer_storage_.GetMemoryPtr(); - return absl::OkStatus(); -} - -absl::Status CreateTextureLinearStorage(int size, DataType data_type, - void* data, CLContext* context, - LinearStorage* result) { - *result = LinearStorage(size, LinearStorageType::TEXTURE_2D, data_type); - RETURN_IF_ERROR(CreateTexture2DRGBA(data_type, size, 1, data, context, - &result->texture_storage_)); - result->memory_ = result->texture_storage_.GetMemoryPtr(); - return absl::OkStatus(); -} - -absl::Status CreateLinearStorage(const LinearStorageCreateInfo& creation_info, - int size, void* data, CLContext* context, - LinearStorage* result) { - if (creation_info.storage_type == LinearStorageType::BUFFER) { - return CreateBufferLinearStorage(size, creation_info.data_type, data, - context, result); +absl::Status CreateLinearStorage(LinearStorageType storage_type, + DataType data_type, int size, void* data, + CLContext* context, LinearStorage* result) { + if (storage_type == LinearStorageType::BUFFER) { + const int float4_size = + data_type == DataType::FLOAT32 ? sizeof(float4) : sizeof(half4); + *result = LinearStorage(size, LinearStorageType::BUFFER); + RETURN_IF_ERROR(CreateReadOnlyBuffer(float4_size * size, data, context, + &result->buffer_storage_)); + return absl::OkStatus(); } else { - return CreateTextureLinearStorage(size, creation_info.data_type, data, - context, result); + *result = LinearStorage(size, LinearStorageType::TEXTURE_2D); + RETURN_IF_ERROR(CreateTexture2DRGBA(data_type, size, 1, data, context, + &result->texture_storage_)); + return absl::OkStatus(); } } diff --git a/tensorflow/lite/delegates/gpu/cl/linear_storage.h b/tensorflow/lite/delegates/gpu/cl/linear_storage.h index 2c96c79f596..1bc855f4205 100644 --- a/tensorflow/lite/delegates/gpu/cl/linear_storage.h +++ b/tensorflow/lite/delegates/gpu/cl/linear_storage.h @@ -52,13 +52,6 @@ struct TensorLinearDescriptor : public GPUObjectDescriptor { std::string* result) const; }; -struct LinearStorageCreateInfo { - LinearStorageType storage_type; - DataType data_type; - std::string name; // optional - int aligned_size = 0; // optional, to pad with zeroes -}; - LinearStorageType DeduceLinearStorageType( TensorStorageType tensor_storage_type); @@ -74,87 +67,46 @@ class LinearStorage : public GPUObject { LinearStorage(const LinearStorage&) = delete; LinearStorage& operator=(const LinearStorage&) = delete; - void SetName(const std::string& name) { name_ = name; } - absl::Status GetGPUResources(const GPUObjectDescriptor* obj_ptr, GPUResourcesWithValue* resources) const override; private: - friend absl::Status CreateTextureLinearStorage(int size, DataType data_type, - void* data, CLContext* context, - LinearStorage* result); - friend absl::Status CreateBufferLinearStorage(int size, DataType data_type, - void* data, CLContext* context, - LinearStorage* result); + friend absl::Status CreateLinearStorage(LinearStorageType storage_type, + DataType data_type, int size, + void* data, CLContext* context, + LinearStorage* result); - LinearStorage(int depth, LinearStorageType storage_type, DataType data_type); + LinearStorage(int depth, LinearStorageType storage_type); Texture2D texture_storage_; Buffer buffer_storage_; - cl_mem memory_ = nullptr; // Just a reference to texture_storage_ or - // buffer_storage_ memory, not an owner + int depth_; - std::string name_; LinearStorageType storage_type_; - DataType data_type_; }; -absl::Status CreateBufferLinearStorage(int size, DataType data_type, void* data, - CLContext* context, - LinearStorage* result); - -absl::Status CreateTextureLinearStorage(int size, DataType data_type, - void* data, CLContext* context, - LinearStorage* result); - -absl::Status CreateLinearStorage(const LinearStorageCreateInfo& creation_info, - int size, void* data, CLContext* context, - LinearStorage* result); - -template -absl::Status CreateLinearStorage(const LinearStorageCreateInfo& creation_info, - const tflite::gpu::Tensor& tensor, - CLContext* context, LinearStorage* result) { - int size = creation_info.aligned_size != 0 ? creation_info.aligned_size - : tensor.shape.v; - const int depth = DivideRoundUp(size, 4); - if (creation_info.data_type == DataType::FLOAT32) { - std::vector gpu_data(depth); - CopyLinearFLT4(tensor, absl::MakeSpan(gpu_data)); - RETURN_IF_ERROR(CreateLinearStorage(creation_info, depth, gpu_data.data(), - context, result)); - } else { - std::vector gpu_data(depth); - CopyLinearFLT4(tensor, absl::MakeSpan(gpu_data)); - RETURN_IF_ERROR(CreateLinearStorage(creation_info, depth, gpu_data.data(), - context, result)); - } - result->SetName(creation_info.name); - return absl::OkStatus(); -} +absl::Status CreateLinearStorage(LinearStorageType storage_type, + DataType data_type, int size, void* data, + CLContext* context, LinearStorage* result); template absl::Status CreateLinearStorage(const TensorLinearDescriptor& descriptor, const tflite::gpu::Tensor& tensor, CLContext* context, LinearStorage* result) { - LinearStorageCreateInfo creation_info; - creation_info.storage_type = descriptor.storage_type; - creation_info.data_type = descriptor.element_type; - int size = creation_info.aligned_size != 0 ? creation_info.aligned_size - : tensor.shape.v; - const int depth = DivideRoundUp(size, 4); - if (creation_info.data_type == DataType::FLOAT32) { + const int depth = DivideRoundUp(tensor.shape.v, 4); + if (descriptor.element_type == DataType::FLOAT32) { std::vector gpu_data(depth); CopyLinearFLT4(tensor, absl::MakeSpan(gpu_data)); - RETURN_IF_ERROR(CreateLinearStorage(creation_info, depth, gpu_data.data(), - context, result)); + RETURN_IF_ERROR(CreateLinearStorage(descriptor.storage_type, + descriptor.element_type, depth, + gpu_data.data(), context, result)); } else { std::vector gpu_data(depth); CopyLinearFLT4(tensor, absl::MakeSpan(gpu_data)); - RETURN_IF_ERROR(CreateLinearStorage(creation_info, depth, gpu_data.data(), - context, result)); + RETURN_IF_ERROR(CreateLinearStorage(descriptor.storage_type, + descriptor.element_type, depth, + gpu_data.data(), context, result)); } - result->SetName(creation_info.name); return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/tensor.h b/tensorflow/lite/delegates/gpu/cl/tensor.h index 8d914970743..a204ae9418a 100644 --- a/tensorflow/lite/delegates/gpu/cl/tensor.h +++ b/tensorflow/lite/delegates/gpu/cl/tensor.h @@ -68,17 +68,6 @@ class Tensor : public GPUObject { int Slices() const { return DivideRoundUp(shape_.c, 4); } int Batch() const { return shape_.b; } - // returns int4(width * batch, height, slices, batch) - int4 GetWBatchedHSB() const { - return int4(shape_.w * shape_.b, shape_.h, Slices(), shape_.b); - } - int4 GetWBatchedHDS() const { - return int4(shape_.w * shape_.b, shape_.h, shape_.d, Slices()); - } - - int4 GetWHSB() const { return int4(shape_.w, shape_.h, Slices(), shape_.b); } - int4 GetWHDS() const { return int4(shape_.w, shape_.h, shape_.d, Slices()); } - TensorDescriptor GetDescriptor() const { return descriptor_; } DataType GetDataType() const { return descriptor_.data_type; } TensorStorageType GetStorageType() const { return descriptor_.storage_type; } From c5543bc170cc8013e73c00ece12b19d1a08f3f5c Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Wed, 5 Aug 2020 17:27:28 -0700 Subject: [PATCH 2216/2522] Add TpuCoreLocationExternal::host_coordinates() and TpuTopologyExternal::IdForHost(). This also refactors TpuCoreLocationExternal::chip_coordinates() and downstream plumbing to be more compact. PiperOrigin-RevId: 325135180 Change-Id: Ic350d3d64b01fced4d6d90551f71e97b271e0f2b --- tensorflow/core/tpu/tpu_library_init_fns.inc | 6 ++--- .../stream_executor/tpu/tpu_executor_c_api.h | 15 +++++++----- .../stream_executor/tpu/tpu_topology.cc | 23 ++++++++++++++----- tensorflow/stream_executor/tpu/tpu_topology.h | 6 +++-- 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index 6914a8cd102..bc93b737eb5 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -172,9 +172,9 @@ tensorflow::Status SetExecutorStructFn(void* library_handle) { TFTPU_SET_FN(executor_fn, TpuTopology_ChipBounds_Z); TFTPU_SET_FN(executor_fn, TpuTopology_HasChip); TFTPU_SET_FN(executor_fn, TpuTopology_Core); - TFTPU_SET_FN(executor_fn, TpuCoreLocation_ChipCoordinates_X); - TFTPU_SET_FN(executor_fn, TpuCoreLocation_ChipCoordinates_Y); - TFTPU_SET_FN(executor_fn, TpuCoreLocation_ChipCoordinates_Z); + TFTPU_SET_FN(executor_fn, TpuTopology_IdForHost); + TFTPU_SET_FN(executor_fn, TpuCoreLocation_ChipCoordinates); + TFTPU_SET_FN(executor_fn, TpuCoreLocation_HostCoordinates); TFTPU_SET_FN(executor_fn, TpuCoreLocation_Index); TFTPU_SET_FN(executor_fn, TpuCoreLocation_Id); diff --git a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h index 77806bd338e..a67fc9ddf61 100644 --- a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h +++ b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h @@ -196,9 +196,11 @@ int TpuTopology_ChipBounds_Z(void* tpu_topology); bool TpuTopology_HasChip(void* tpu_topology, int x, int y, int z); void* TpuTopology_Core(void* tpu_topology, int x, int y, int z, TpuCoreTypeEnum tpu_core_type, int index); -int TpuCoreLocation_ChipCoordinates_X(void* tpu_core_location); -int TpuCoreLocation_ChipCoordinates_Y(void* tpu_core_location); -int TpuCoreLocation_ChipCoordinates_Z(void* tpu_core_location); +int TpuTopology_IdForHost(void* tpu_topology, int x, int y, int z); +void TpuCoreLocation_ChipCoordinates(void* tpu_core_location, int* x, int* y, + int* z); +void TpuCoreLocation_HostCoordinates(void* tpu_core_location, int* x, int* y, + int* z); int TpuCoreLocation_Index(void* tpu_core_location); int TpuCoreLocation_Id(void* tpu_core_location); @@ -345,9 +347,10 @@ struct TfTpu_ExecutorApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_Z); TFTPU_ADD_FN_IN_STRUCT(TpuTopology_HasChip); TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Core); - TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_ChipCoordinates_X); - TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_ChipCoordinates_Y); - TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_ChipCoordinates_Z); + TFTPU_ADD_FN_IN_STRUCT(TpuTopology_IdForHost); + + TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_ChipCoordinates); + TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_HostCoordinates); TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Index); TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Id); diff --git a/tensorflow/stream_executor/tpu/tpu_topology.cc b/tensorflow/stream_executor/tpu/tpu_topology.cc index 4499b2d70eb..74eb0aaf607 100644 --- a/tensorflow/stream_executor/tpu/tpu_topology.cc +++ b/tensorflow/stream_executor/tpu/tpu_topology.cc @@ -20,12 +20,18 @@ limitations under the License. namespace tensorflow { namespace tpu { -TpuChipCoordinatesExternal TpuCoreLocationExternal::chip_coordinates() const { - return { - tpu::ExecutorApiFn()->TpuCoreLocation_ChipCoordinates_XFn(core_location_), - tpu::ExecutorApiFn()->TpuCoreLocation_ChipCoordinates_YFn(core_location_), - tpu::ExecutorApiFn()->TpuCoreLocation_ChipCoordinates_ZFn( - core_location_)}; +TpuDimensionsExternal TpuCoreLocationExternal::chip_coordinates() const { + int x, y, z; + tpu::ExecutorApiFn()->TpuCoreLocation_ChipCoordinatesFn(core_location_, &x, + &y, &z); + return {x, y, z}; +} + +TpuDimensionsExternal TpuCoreLocationExternal::host_coordinates() const { + int x, y, z; + tpu::ExecutorApiFn()->TpuCoreLocation_HostCoordinatesFn(core_location_, &x, + &y, &z); + return {x, y, z}; } int32 TpuCoreLocationExternal::index() const { @@ -69,5 +75,10 @@ TpuCoreLocationExternal TpuTopologyExternal::Core(int x, int y, int z, topology_, x, y, z, core_type, index)); } +int TpuTopologyExternal::IdForHost(TpuDimensionsExternal host) const { + return tpu::ExecutorApiFn()->TpuTopology_IdForHostFn(topology_, host.x, + host.y, host.z); +} + } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/stream_executor/tpu/tpu_topology.h b/tensorflow/stream_executor/tpu/tpu_topology.h index d6c169f4fa0..6b64fb64985 100644 --- a/tensorflow/stream_executor/tpu/tpu_topology.h +++ b/tensorflow/stream_executor/tpu/tpu_topology.h @@ -22,7 +22,7 @@ limitations under the License. namespace tensorflow { namespace tpu { -struct TpuChipCoordinatesExternal { +struct TpuDimensionsExternal { int x; int y; int z; @@ -33,7 +33,8 @@ class TpuCoreLocationExternal { TpuCoreLocationExternal() : core_location_(nullptr) {} explicit TpuCoreLocationExternal(void* core_location) : core_location_(core_location) {} - TpuChipCoordinatesExternal chip_coordinates() const; + TpuDimensionsExternal chip_coordinates() const; + TpuDimensionsExternal host_coordinates() const; int32 index() const; int32 Id() const; @@ -66,6 +67,7 @@ class TpuTopologyExternal { bool HasChip(int x, int y, int z) const; TpuCoreLocationExternal Core(int x, int y, int z, TpuCoreTypeEnum core_type, int index) const; + int IdForHost(TpuDimensionsExternal host) const; private: void* topology_; From 5c2ea9fd1a5f164b45c319b8e03212490a9a90f3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 5 Aug 2020 17:46:22 -0700 Subject: [PATCH 2217/2522] Added support for zero accumulators in FTRL optimizer implementation for TPU embeddings, behind an optimization parameter setting because it hurts performance. PiperOrigin-RevId: 325137678 Change-Id: Iee7331beb6f76cdf93a7ffd33f1b8cd083b624a5 --- tensorflow/core/protobuf/tpu/optimization_parameters.proto | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/protobuf/tpu/optimization_parameters.proto b/tensorflow/core/protobuf/tpu/optimization_parameters.proto index 53905a33a3b..f7748ef5689 100644 --- a/tensorflow/core/protobuf/tpu/optimization_parameters.proto +++ b/tensorflow/core/protobuf/tpu/optimization_parameters.proto @@ -96,13 +96,18 @@ message StochasticGradientDescentParameters {} // for a dynamic learning rate, it is nearly the same as long as the learning // rate does not change quickly. The benefit of setting multiply_linear_by_lr to // true is that the modified formula handles zero and near-zero learning rates -// without producing NaNs, improving flexibility for learning rate ramp-up. +// without producing NaNs, improving flexibility for learning rate ramp-up. The +// allow_zero_accumulator parameter changes some internal formulas to allow zero +// and near-zero accumulator values at the cost of some performance; this only +// needs to be set if you are using an initial accumulator value of zero, which +// is uncommon. message FtrlParameters { float l1 = 1; float l2 = 2; float lr_power = 3; float beta = 7; bool multiply_linear_by_lr = 6; + bool allow_zero_accumulator = 8; // Old initial accumulator parameters. reserved "initial_accum", "initial_linear"; From c9cf93af04ca44d71ed8a27768809b24f26eee11 Mon Sep 17 00:00:00 2001 From: Ce Zheng Date: Wed, 5 Aug 2020 18:03:52 -0700 Subject: [PATCH 2218/2522] Refactored BatchResourceBase to be able to extend BatchTask. PiperOrigin-RevId: 325140153 Change-Id: Id3c4821ce4ac1893bf3f1140e94e056f7fd87293 --- .../core/common_runtime/eager/execute.cc | 7 +-- tensorflow/core/kernels/batch_kernels.cc | 3 +- .../batching_util/batch_resource_base.cc | 51 +++++++++++-------- .../batching_util/batch_resource_base.h | 9 +++- 4 files changed, 39 insertions(+), 31 deletions(-) diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc index 577ac4c9c1f..e51456eaa27 100644 --- a/tensorflow/core/common_runtime/eager/execute.cc +++ b/tensorflow/core/common_runtime/eager/execute.cc @@ -94,12 +94,7 @@ bool KernelCacheEnabled(const OpDef& op_def) { return false; } // TODO(b/162540360): Revisit a way to mark kernels as uncachable once we have - // 5+ such kernels to exclude. - // - // RuntimeFallback requires that this kernel should not be cached. - if (op_def.name() == "_BatchFunctionFallback") { - return false; - } + // 5+ kernels to exclude. return true; } diff --git a/tensorflow/core/kernels/batch_kernels.cc b/tensorflow/core/kernels/batch_kernels.cc index 1f430039b40..04071505294 100644 --- a/tensorflow/core/kernels/batch_kernels.cc +++ b/tensorflow/core/kernels/batch_kernels.cc @@ -72,9 +72,10 @@ class BatchResource : public serving::BatchResourceBase { fhandle_(fhandle) {} void ProcessFuncBatchImpl( - OpKernelContext* last_task_context, absl::Span inputs, + const BatchTask& last_task, absl::Span inputs, std::vector* combined_outputs, std::function done) const override { + auto* last_task_context = last_task.context; FunctionLibraryRuntime::Options opts; opts.step_container = last_task_context->step_container(); opts.cancellation_manager = last_task_context->cancellation_manager(); diff --git a/tensorflow/core/kernels/batching_util/batch_resource_base.cc b/tensorflow/core/kernels/batching_util/batch_resource_base.cc index adfa172cf1c..44e2879b9e4 100644 --- a/tensorflow/core/kernels/batching_util/batch_resource_base.cc +++ b/tensorflow/core/kernels/batching_util/batch_resource_base.cc @@ -88,7 +88,8 @@ using TensorMatrix = std::vector>; Status BatchResourceBase::RegisterInput( int64 guid, OpKernelContext* context, const string& batcher_queue_name, AsyncOpKernel::DoneCallback done_callback) { - auto batch_components = absl::make_unique(); + std::unique_ptr batch_components; + TF_RETURN_IF_ERROR(CreateBatchTask(context, &batch_components)); batch_components->start_time = EnvTime::NowNanos(); batch_components->guid = guid; batch_components->propagated_context = Context(ContextKind::kThread); @@ -441,8 +442,8 @@ void BatchResourceBase::ProcessFuncBatch(std::unique_ptr batch) const { // which are running this Session, of which this BatchOp is a part. WithContext wc(batch->task(batch->num_tasks() - 1).propagated_context); - OpKernelContext* last_task_context = - batch->task(batch->num_tasks() - 1).context; + auto& last_task = batch->task(batch->num_tasks() - 1); + OpKernelContext* last_task_context = last_task.context; // Regardless of the outcome, we need to propagate the status to the // individual tasks and signal that they are done. We use MakeCleanup() to @@ -495,25 +496,24 @@ void BatchResourceBase::ProcessFuncBatch(std::unique_ptr batch) const { // Releases the cleanup method here, because the callback of the function // library runtime will handle it now. finally.release(); - ProcessFuncBatchImpl(last_task_context, args, &combined_outputs, - [&](const Status& run_status) { - Status final_status; - auto run_finally = gtl::MakeCleanup([&]() { - // We do the cleanup here as an optimization, so that - // it runs in the underlying TF inter-op threadpool. - // Running it in the threadpool, let's the ensuing - // ops be scheduled faster, because the executor will - // add them to the front of the threadpool's task - // queue rather than the end. - cleanup_fn(final_status); - }); - final_status = run_status; - if (!final_status.ok()) { - return; - } - final_status = - SplitOutputTensors(combined_outputs, batch.get()); - }); + ProcessFuncBatchImpl( + last_task, args, &combined_outputs, [&](const Status& run_status) { + Status final_status; + auto run_finally = gtl::MakeCleanup([&]() { + // We do the cleanup here as an optimization, so that + // it runs in the underlying TF inter-op threadpool. + // Running it in the threadpool, let's the ensuing + // ops be scheduled faster, because the executor will + // add them to the front of the threadpool's task + // queue rather than the end. + cleanup_fn(final_status); + }); + final_status = run_status; + if (!final_status.ok()) { + return; + } + final_status = SplitOutputTensors(combined_outputs, batch.get()); + }); } // Processes a batch of one or more BatchTask entries. @@ -632,5 +632,12 @@ Status BatchResourceBase::LookupOrCreateBatcherQueue(const string& queue_name, return Status::OK(); } +Status BatchResourceBase::CreateBatchTask( + OpKernelContext* context, + std::unique_ptr* output) const { + *output = absl::make_unique(); + return Status::OK(); +} + } // namespace serving } // namespace tensorflow diff --git a/tensorflow/core/kernels/batching_util/batch_resource_base.h b/tensorflow/core/kernels/batching_util/batch_resource_base.h index 0471207c951..39d6e3dd951 100644 --- a/tensorflow/core/kernels/batching_util/batch_resource_base.h +++ b/tensorflow/core/kernels/batching_util/batch_resource_base.h @@ -116,10 +116,15 @@ class BatchResourceBase : public ResourceBase { private: // Implementation of calling the process batch function. virtual void ProcessFuncBatchImpl( - OpKernelContext* last_task_context, absl::Span inputs, - std::vector* combined_outputs, + const BatchResourceBase::BatchTask& last_task, + absl::Span inputs, std::vector* combined_outputs, std::function done) const = 0; + // Factory method for creating a BatchTask, overridable by subclasses. + virtual Status CreateBatchTask( + OpKernelContext* context, + std::unique_ptr* output) const; + // Validates that it's legal to combine the tasks in 'batch' into a batch. // Assumes the batch is non-empty. static Status ValidateBatch(const BatchT& batch); From 96f591d270cecd171af39e92298a37c31614b9ff Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Wed, 5 Aug 2020 18:09:32 -0700 Subject: [PATCH 2219/2522] [tf.data service] Give workers their previous tasks when they rejoin PiperOrigin-RevId: 325140951 Change-Id: Ib855a6b696b6b3cf31808b533c16ce1814c720e7 --- .../core/data/service/dispatcher_impl.cc | 20 +++++- .../core/data/service/dispatcher_state.cc | 17 +++++ .../core/data/service/dispatcher_state.h | 7 +++ .../data/service/dispatcher_state_test.cc | 62 +++++++++++++++++++ 4 files changed, 103 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/data/service/dispatcher_impl.cc b/tensorflow/core/data/service/dispatcher_impl.cc index 9e705d51ea8..b440e9be905 100644 --- a/tensorflow/core/data/service/dispatcher_impl.cc +++ b/tensorflow/core/data/service/dispatcher_impl.cc @@ -22,6 +22,7 @@ limitations under the License. #include "grpcpp/create_channel.h" #include "grpcpp/impl/codegen/server_context.h" #include "grpcpp/security/credentials.h" +#include "absl/container/flat_hash_map.h" #include "absl/memory/memory.h" #include "tensorflow/core/data/service/common.pb.h" #include "tensorflow/core/data/service/credentials_factory.h" @@ -109,8 +110,8 @@ Status DataServiceDispatcherImpl::RegisterWorker( VLOG(3) << "Received register worker request"; mutex_lock l(mu_); std::string worker_address = request->worker_address(); - std::shared_ptr worker; - Status s = state_.WorkerFromAddress(worker_address, &worker); + std::vector> tasks; + Status s = state_.TasksForWorker(worker_address, tasks); if (errors::IsNotFound(s)) { Update update; update.mutable_register_worker()->set_worker_address(worker_address); @@ -119,6 +120,14 @@ Status DataServiceDispatcherImpl::RegisterWorker( return s; } + absl::flat_hash_map> tasks_by_job; + for (const auto& task : tasks) { + // Should never have multiple tasks on the same worker for the same job. + auto& task_for_job = tasks_by_job[task->job_id]; + DCHECK(task_for_job == nullptr); + task_for_job = task; + } + std::vector> jobs = state_.ListJobs(); // Allocate tasks to the worker. for (const auto& job : jobs) { @@ -126,7 +135,12 @@ Status DataServiceDispatcherImpl::RegisterWorker( continue; } std::shared_ptr task; - TF_RETURN_IF_ERROR(CreateTask(job, worker_address, &task)); + auto it = tasks_by_job.find(job->job_id); + if (it != tasks_by_job.end()) { + task = it->second; + } else { + TF_RETURN_IF_ERROR(CreateTask(job, worker_address, &task)); + } TaskDef* task_def = response->add_tasks(); std::shared_ptr dataset; TF_RETURN_IF_ERROR(state_.DatasetFromId(job->dataset_id, &dataset)); diff --git a/tensorflow/core/data/service/dispatcher_state.cc b/tensorflow/core/data/service/dispatcher_state.cc index 1e914b69e5b..aedfab7280b 100644 --- a/tensorflow/core/data/service/dispatcher_state.cc +++ b/tensorflow/core/data/service/dispatcher_state.cc @@ -67,6 +67,7 @@ void DispatcherState::RegisterWorker( std::string address = register_worker.worker_address(); DCHECK(!workers_.contains(address)); workers_[address] = std::make_shared(address); + tasks_by_worker_[address] = std::vector>(); } void DispatcherState::CreateJob(const CreateJobUpdate& create_job) { @@ -97,6 +98,7 @@ void DispatcherState::CreateTask(const CreateTaskUpdate& create_task) { create_task.dataset_id(), create_task.worker_address()); tasks_by_job_[create_task.job_id()].push_back(task); + tasks_by_worker_[create_task.worker_address()].push_back(task); next_available_task_id_ = std::max(next_available_task_id_, task_id + 1); } @@ -219,6 +221,21 @@ Status DispatcherState::TasksForJob( return Status::OK(); } +Status DispatcherState::TasksForWorker( + absl::string_view worker_address, + std::vector>& tasks) const { + auto it = tasks_by_worker_.find(worker_address); + if (it == tasks_by_worker_.end()) { + return errors::NotFound("Worker ", worker_address, " not found"); + } + std::vector> worker_tasks = it->second; + tasks.reserve(worker_tasks.size()); + for (const auto& task : worker_tasks) { + tasks.push_back(task); + } + return Status::OK(); +} + int64 DispatcherState::NextAvailableTaskId() const { return next_available_task_id_; } diff --git a/tensorflow/core/data/service/dispatcher_state.h b/tensorflow/core/data/service/dispatcher_state.h index b1aa0aa3979..8db05064a40 100644 --- a/tensorflow/core/data/service/dispatcher_state.h +++ b/tensorflow/core/data/service/dispatcher_state.h @@ -160,6 +160,10 @@ class DispatcherState { // if there is no such job. Status TasksForJob(int64 job_id, std::vector>* tasks) const; + // Stores a list of all tasks for the given worker to `*tasks`. Returns + // NOT_FOUND if there is no such worker. + Status TasksForWorker(const absl::string_view worker_address, + std::vector>& tasks) const; private: void RegisterDataset(const RegisterDatasetUpdate& register_dataset); @@ -190,6 +194,9 @@ class DispatcherState { absl::flat_hash_map> tasks_; // Tasks, keyed by job ids. absl::flat_hash_map>> tasks_by_job_; + // Tasks, keyed by worker addresses. + absl::flat_hash_map>> + tasks_by_worker_; }; } // namespace data diff --git a/tensorflow/core/data/service/dispatcher_state_test.cc b/tensorflow/core/data/service/dispatcher_state_test.cc index 78f507ec349..004890242c2 100644 --- a/tensorflow/core/data/service/dispatcher_state_test.cc +++ b/tensorflow/core/data/service/dispatcher_state_test.cc @@ -239,6 +239,11 @@ TEST(DispatcherState, CreateTask) { TF_EXPECT_OK(state.TasksForJob(job_id, &tasks)); EXPECT_THAT(tasks, SizeIs(1)); } + { + std::vector> tasks; + TF_EXPECT_OK(state.TasksForWorker(worker_address, tasks)); + EXPECT_EQ(1, tasks.size()); + } } TEST(DispatcherState, CreateTasksForSameJob) { @@ -288,6 +293,63 @@ TEST(DispatcherState, CreateTasksForDifferentJobs) { } } +TEST(DispatcherState, CreateTasksForSameWorker) { + int64 job_id = 3; + int64 dataset_id = 10; + int64 task_id_1 = 8; + int64 task_id_2 = 9; + std::string worker_address = "test_worker_address"; + DispatcherState state; + TF_EXPECT_OK(RegisterDatasetWithIdAndFingerprint(dataset_id, 1, &state)); + TF_EXPECT_OK(CreateAnonymousJob(job_id, dataset_id, &state)); + TF_EXPECT_OK( + CreateTask(task_id_1, job_id, dataset_id, worker_address, &state)); + TF_EXPECT_OK( + CreateTask(task_id_2, job_id, dataset_id, worker_address, &state)); + { + std::vector> tasks; + TF_EXPECT_OK(state.TasksForWorker(worker_address, tasks)); + EXPECT_EQ(2, tasks.size()); + } +} + +TEST(DispatcherState, CreateTasksForDifferentWorkers) { + int64 job_id = 3; + int64 dataset_id = 10; + int64 task_id_1 = 8; + int64 task_id_2 = 9; + std::string worker_address_1 = "test_worker_address_1"; + std::string worker_address_2 = "test_worker_address_2"; + DispatcherState state; + TF_EXPECT_OK(RegisterDatasetWithIdAndFingerprint(dataset_id, 1, &state)); + TF_EXPECT_OK(CreateAnonymousJob(job_id, dataset_id, &state)); + TF_EXPECT_OK( + CreateTask(task_id_1, job_id, dataset_id, worker_address_1, &state)); + TF_EXPECT_OK( + CreateTask(task_id_2, job_id, dataset_id, worker_address_2, &state)); + { + std::vector> tasks; + TF_EXPECT_OK(state.TasksForWorker(worker_address_1, tasks)); + EXPECT_EQ(1, tasks.size()); + } + { + std::vector> tasks; + TF_EXPECT_OK(state.TasksForWorker(worker_address_2, tasks)); + EXPECT_EQ(1, tasks.size()); + } +} + +TEST(DispatcherState, GetTasksForWorkerEmpty) { + std::string worker_address = "test_worker_address"; + DispatcherState state; + TF_EXPECT_OK(RegisterWorker(worker_address, &state)); + { + std::vector> tasks; + TF_EXPECT_OK(state.TasksForWorker(worker_address, tasks)); + EXPECT_EQ(0, tasks.size()); + } +} + TEST(DispatcherState, FinishTask) { int64 job_id = 3; int64 dataset_id = 10; From 792823b4d60a684edae271bec936c05c8c569d6a Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Wed, 5 Aug 2020 22:05:07 -0400 Subject: [PATCH 2220/2522] Update README.md --- .../keras_examples_benchmarks/README.md | 77 +++++++++++-------- 1 file changed, 43 insertions(+), 34 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/README.md b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/README.md index 595f94b7eda..202037fef31 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/README.md +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/README.md @@ -1,7 +1,7 @@ -# Benchmarks for keras model exmaples +# Benchmarks for keras model examples -- [Benchmarks for keras model exmaples](#benchmarks-for-keras-model-exmaples) - - [Keras Benchmarks](#keras-benchmarks) +- [Benchmarks for keras model examples](#benchmarks-for-keras-model-examples) + - [Keras benchmarks](#keras-benchmarks) - [Available models](#available-models) - [Computer Vision examples](#computer-vision-examples) - [Text & Sequence examples](#text--sequence-examples) @@ -15,18 +15,18 @@ - [MLP benchmark](#mlp-benchmark) - [Antirectifier benchmark](#antirectifier-benchmark) - [IRNN benchmark](#irnn-benchmark) - - [Installing Bazel](#installing-bazel) - - [How to run benchmarks](#how-to-run-benchmarks) - - [How to add new benchmark tests that use `fit`](#how-to-add-new-benchmark-tests-that-use-fit) + - [Install Bazel](#install-bazel) + - [Run benchmarks](#run-benchmarks) + - [Add new benchmarks](#add-new-benchmarks) - [Troubleshooting](#troubleshooting) -## Keras Benchmarks +## Keras benchmarks -These are benchmark tests running on keras models: models from [keras/examples](https://github.com/keras-team/keras/tree/master/examples). Benchmarks in the current folder (`tensorflow/python/keras/benchmarks/keras_examples_benchmarks`) use Keras [built-in dataset](https://keras.io/api/datasets/) or synthetic data. In addition, these benchmarks support different distribution strategies and measure the performance with distributed training. +These are benchmark tests running on keras models: models from [keras/examples](https://github.com/keras-team/keras/tree/master/examples). Benchmarks in the current folder (`tensorflow/python/keras/benchmarks/keras_examples_benchmarks`) use Keras [built-in dataset](https://keras.io/api/datasets/) or synthetic data. In addition, these benchmarks support different [distribution strategies](https://www.tensorflow.org/guide/distributed_training) on multiple GPUs. ### Available models -These examples are implemented by functional API and Sequential API. +These examples are implemented by Functional API and Sequential API. #### Computer Vision examples @@ -36,25 +36,34 @@ These examples are implemented by functional API and Sequential API. #### Text & Sequence examples -[Bidirectional_lstm_benchmark_test.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py): 2-layer bidirectional LSTM on IMDB movie review dataset. -[text_classification_transformer_benchmark_test.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py): Text classification with custom transformer block. -[reuters_mlp_benchmark_test.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py): Simple MLP on Reuters newswire topic classification dataset. +- [Bidirectional_lstm_benchmark_test.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py): 2-layer bidirectional LSTM on IMDB movie review dataset. +- [text_classification_transformer_benchmark_test.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py): Text classification with custom transformer block. +- [reuters_mlp_benchmark_test.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py): Simple MLP on Reuters newswire topic classification dataset. #### Other examples -[antirectifier_benchmark_test.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py): Simple custom layer example. -[mnist_irnn_benchmark_test.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py): Reproduction of the IRNN experiment with pixel-by-pixel sequential MNIST in ["A Simple Way to Initialize Recurrent Networks of Rectified Linear Units"](https://arxiv.org/abs/1504.00941) by Le et al. +- [antirectifier_benchmark_test.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py): Simple custom layer example. +- [mnist_irnn_benchmark_test.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py):Reproduction of the IRNN experiment with pixel-by-pixel sequential MNIST in ["A Simple Way to Initialize Recurrent Networks of Rectified Linear Units"](https://arxiv.org/abs/1504.00941) by Le et al. ### Available benchmark results -We run benchmarks on Google Cloud Platform (GCP) and here is current environment for running benchmarks tests:
    -GPU: 2 x Tesla V100 (only for GPU test)
    -OS: Ubuntu 18.04
    -CPU: 8 x vCPUs, 30 GB memory
    -CUDA: 10.1
    -Bazel: 3.1.0
    +The listed benchmark results are obtained by running on Google Cloud Platform (GCP) with the following setup:
    -If you want to run benchmark tests on GPU, please make sure you already installed CUDA and other dependencies and you can follow the instructions from the [official tutorial](https://www.tensorflow.org/install/gpu) for GPU support. +- GPU: 2 x Tesla V100 (only for GPU test)
    +- OS: Ubuntu 18.04
    +- CPU: 8 x vCPUs, 30 GB memory
    +- CUDA: 10.1
    +- Bazel: 3.1.0
    + +If you want to run benchmark tests on GPU, please make sure you already installed CUDA and other dependencies by following the instructions from the [official tutorial](https://www.tensorflow.org/install/gpu) for GPU support.
    + +Metrics for following benchmarks:
    + +- Batch_size: Number of samples per batch of computation.
    +- Wall_time: Total time to run benchmark test in seconds.
    +- Avg_epoch_time: Average time for each epoch.
    +- Exp_per_sec: The number of examples that model processed for each second.
    +- Distribution_Strategy: [Distribution strategies](https://www.tensorflow.org/guide/distributed_training).
    #### Cifar10 CNN benchmark @@ -112,13 +121,13 @@ If you want to run benchmark tests on GPU, please make sure you already installe | CPU | 1024 | 213.00 | 69.01 | 868.08 | `off` | | GPU:2 | 1024 | 92.71 | 29.12 | 2042.94 | `mirrored` | -**Note**: For the smaller models, running models with GPU may be slower than running models with CPU as training small models is not computation dominant and there might be some overhead on model replication and data sharding with distributed training on GPUs. +**Note**: For the small models, running on GPU might be even slower than CPU. The potential reason is, training small models is not computation dominant, and there might be some overhead on model replication and data sharding with distributed training on GPUs. -## Installing Bazel +## Install Bazel This step can be skipped if Bazel is already installed.
    -We need to use [Bazel](https://bazel.build/) to build targets based on BUILD files. It will take a while for the first time because it will compile all dependencies from your BUILD file. For the next time, Bazel will use the cache and it’ll be much faster. Since we use Ubuntu OS, we can install bazel by using apt repository. +[Bazel](https://bazel.build/) is used to build targets based on BUILD files. It will take a while for the first time because it will compile all dependencies from your BUILD file. For the next time, Bazel will use the cache and it’ll be much faster. For Ubuntu OS, please use the following steps for Bazel installation. For other platforms, you may follow the corresponding guide for the installation. 1. Add bazel as package source @@ -142,12 +151,12 @@ We need to use [Bazel](https://bazel.build/) to build targets based on BUILD fil sudo apt update && sudo apt install bazel-`version` ``` -## How to run benchmarks +## Run benchmarks To run benchmarks in [keras/benchmarks](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/keras/benchmarks), please take the following steps: 1. Pull the latest tensorflow repo from github. -2. Install the Bazel tool which works with tensorflow, please take a look for the Tool installation section. +2. Install the Bazel tool which works with tensorflow, please take a look for the [Install bazel](#install-bazel) section. 3. To run benchmarks with Bazel, use the `--benchmarks=.` flags to specify the benchmarks to run. - To run all benchmarks on CPU @@ -158,26 +167,26 @@ To run benchmarks in [keras/benchmarks](https://github.com/tensorflow/tensorflow - To run all benchmarks on GPU ```shell - bazel run run --config=cuda -c opt --copt="-mavx" benchmarks_test -- \ --benchmarks=. + bazel run run --config=cuda -c opt --copt="-mavx" benchmarks_test -- --benchmarks=. ``` - To run a subset of benchmarks using `--benchmarks` flag, `--benchmarks`: the list of benchmarks to run. The specified value is interpreted as a regular expression and any benchmarks whose name contains a partial match to the regular expression is executed. e.g. `--benchmarks=".*lstm*."`, will run all lstm layer related benchmarks. -## How to add new benchmark tests that use `fit` +## Add new benchmarks -To add a new benchmark, please follow the steps: +To add a new benchmark, please take the following steps: 1. Create your own benchmark test file, `xxxx_benchmark_test.py`. -2. Import `benchmark_util` to measure and track performance. +2. Import `benchmark_util` to measure and track performance if needed. 3. Create class which inherits from `tf.test.Benchmark` 4. Define and load dataset in `__init__` method. 5. Design and create a model in `_build_model` method. -6. Define the `benchmark_xxx` method and it will pass essential parameters, which includes `batch_size`, `run_iters`, `train_data` and etc. You can check examples from [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks). -7. In addition, you need to add a benchmark target in the [BUILD](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/BUILD) file and write the target name and dependencies. You can take current BUILD as a reference. +6. Define the `benchmark_xxx` method and it will pass benchmark related hyper parameters, which includes `batch_size`, `run_iters`, `train_data` and etc. You can check examples from [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks). +7. Add the benchmark target to the [BUILD](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/BUILD) file. ## Troubleshooting 1. tensorflow.python.framework.errors_impl.InternalError: CUDA runtime implicit initialization on GPU:0 failed. Status: device kernel image is invalid - - Make sure CUDA was installed on your machine. - - Pull the latest tensorflow repo and run the `./configure` in the root folder of tensorflow, it will help you to create the configuration file which shows your local environment. Please check [this post](https://www.tensorflow.org/install/source#configure_the_build) to know the details. \ No newline at end of file + - Make sure CUDA is installed on your machine. + - Pull the latest tensorflow repo and run the `./configure` in the root folder of tensorflow. It will help you to create the configuration file which shows your local environment. Please check [this post](https://www.tensorflow.org/install/source#configure_the_build) for more details. \ No newline at end of file From e8e2864e95f80e1b5711d1cd975e34b074612365 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 5 Aug 2020 19:01:34 -0700 Subject: [PATCH 2221/2522] Added tests for GPUCompatibililty lib. PiperOrigin-RevId: 325147676 Change-Id: I22d2e5de747c4efb278c9d2b7f2de17e968db9b4 --- .../acceleration/compatibility/BUILD | 26 ++++-- .../compatibility/devicedb_test.cc | 1 - .../compatibility/gpu_compatibility.cc | 4 + .../compatibility/gpu_compatibility.h | 6 +- .../compatibility/gpu_compatibility_test.cc | 87 +++++++++++++++++++ 5 files changed, 116 insertions(+), 8 deletions(-) create mode 100644 tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_test.cc diff --git a/tensorflow/lite/experimental/acceleration/compatibility/BUILD b/tensorflow/lite/experimental/acceleration/compatibility/BUILD index 78a9d2eb8d8..387f475fa17 100644 --- a/tensorflow/lite/experimental/acceleration/compatibility/BUILD +++ b/tensorflow/lite/experimental/acceleration/compatibility/BUILD @@ -14,7 +14,7 @@ # ============================================================================== load("@flatbuffers//:build_defs.bzl", "flatbuffer_cc_library") -load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite") +load("//tensorflow/lite:special_rules.bzl", "tflite_extra_gles_deps", "tflite_portable_test_suite") package( default_visibility = [ @@ -94,16 +94,22 @@ genrule( tools = [":convert_binary_to_cc_source"], ) +cc_library( + name = "devicedb_sample", + srcs = ["devicedb-sample.cc"], + hdrs = ["devicedb-sample.h"], + deps = [":database_fbs"], +) + cc_test( name = "devicedb_test", srcs = [ - "devicedb-sample.cc", - "devicedb-sample.h", "devicedb_test.cc", ], deps = [ ":database_fbs", ":devicedb", + ":devicedb_sample", "//tensorflow/lite/testing:util", "@com_google_googletest//:gtest", "@flatbuffers", @@ -152,11 +158,21 @@ cc_library( ":android_info", ":database_fbs", ":devicedb", - "//tensorflow/lite/delegates/gpu:delegate", - "//tensorflow/lite/delegates/gpu/common:gpu_info", "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@flatbuffers", + "//tensorflow/lite/delegates/gpu:delegate", + "//tensorflow/lite/delegates/gpu/common:gpu_info", + ] + tflite_extra_gles_deps(), +) + +cc_test( + name = "gpu_compatibility_test", + srcs = ["gpu_compatibility_test.cc"], + deps = [ + ":devicedb_sample", + ":gpu_compatibility", + "@com_google_googletest//:gtest_main", ], ) diff --git a/tensorflow/lite/experimental/acceleration/compatibility/devicedb_test.cc b/tensorflow/lite/experimental/acceleration/compatibility/devicedb_test.cc index 4b08c2ff874..c9c6ff831e5 100644 --- a/tensorflow/lite/experimental/acceleration/compatibility/devicedb_test.cc +++ b/tensorflow/lite/experimental/acceleration/compatibility/devicedb_test.cc @@ -19,7 +19,6 @@ limitations under the License. #include #include "flatbuffers/flatbuffers.h" // from @flatbuffers -#include "tensorflow/lite/experimental/acceleration/compatibility/database_generated.h" #include "tensorflow/lite/experimental/acceleration/compatibility/devicedb-sample.h" #include "tensorflow/lite/experimental/acceleration/compatibility/variables.h" #include "tensorflow/lite/testing/util.h" diff --git a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.cc b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.cc index e04f5d18db4..4f40878da22 100644 --- a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.cc +++ b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.cc @@ -98,5 +98,9 @@ TfLiteGpuDelegateOptionsV2 GPUCompatibilityList::GetBestOptionsFor( return TfLiteGpuDelegateOptionsV2Default(); } +bool GPUCompatibilityList::IsDatabaseLoaded() const { + return database_ != nullptr; +} + } // namespace acceleration } // namespace tflite diff --git a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.h b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.h index f975fe04f22..1c5e9dec997 100644 --- a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.h +++ b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.h @@ -53,6 +53,9 @@ class GPUCompatibilityList { public: // Construct list from bundled data. GPUCompatibilityList(); + // Constructs list from the given flatbuffer data. + explicit GPUCompatibilityList( + const unsigned char* compatibility_list_flatbuffer); // Returns true if the provided device specs are supported by the database. bool Includes(const AndroidInfo& android_info, const ::tflite::gpu::GpuInfo& gpu_info) const; @@ -73,10 +76,9 @@ class GPUCompatibilityList { GPUCompatibilityList(const GPUCompatibilityList&) = delete; GPUCompatibilityList& operator=(const GPUCompatibilityList&) = delete; + bool IsDatabaseLoaded() const; protected: - explicit GPUCompatibilityList( - const unsigned char* compatibility_list_flatbuffer); const DeviceDatabase* database_; }; diff --git a/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_test.cc b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_test.cc new file mode 100644 index 00000000000..5576b47dcd9 --- /dev/null +++ b/tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility_test.cc @@ -0,0 +1,87 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/lite/experimental/acceleration/compatibility/gpu_compatibility.h" + +#include + +#include +#include +#include "tensorflow/lite/experimental/acceleration/compatibility/devicedb-sample.h" + +namespace { + +class GPUCompatibilityTest : public ::testing::Test { + protected: + GPUCompatibilityTest() { + list_ = absl::make_unique( + g_tflite_acceleration_devicedb_sample_binary); + } + + std::unique_ptr list_; +}; + +TEST_F(GPUCompatibilityTest, ReturnsSupportedForFullMatch) { + ASSERT_TRUE(list_->IsDatabaseLoaded()); + + tflite::acceleration::AndroidInfo android_info = {.android_sdk_version = "24", + .model = "m712c"}; + + tflite::gpu::GpuInfo tflite_gpu_info = { + .major_version = 3, + .minor_version = 1, + }; + + EXPECT_TRUE(list_->Includes(android_info, tflite_gpu_info)); +} + +TEST_F(GPUCompatibilityTest, ReturnsUnsupportedForFullMatch) { + ASSERT_TRUE(list_->IsDatabaseLoaded()); + + tflite::acceleration::AndroidInfo android_info = {.android_sdk_version = "28", + .model = "SM-G960F", + .device = "starlte", + .manufacturer = "Samsung"}; + tflite::gpu::GpuInfo tflite_gpu_info = { + .renderer_name = "Mali-G72", + .major_version = 3, + .minor_version = 2, + }; + EXPECT_FALSE(list_->Includes(android_info, tflite_gpu_info)); +} + +TEST_F(GPUCompatibilityTest, ReturnsDefaultOptions) { + ASSERT_TRUE(list_->IsDatabaseLoaded()); + + tflite::acceleration::AndroidInfo android_info; + tflite::gpu::GpuInfo tflite_gpu_info; + auto default_options = TfLiteGpuDelegateOptionsV2Default(); + auto best_options = list_->GetBestOptionsFor(android_info, tflite_gpu_info); + EXPECT_EQ(best_options.is_precision_loss_allowed, + default_options.is_precision_loss_allowed); + EXPECT_EQ(best_options.inference_preference, + default_options.inference_preference); + EXPECT_EQ(best_options.inference_priority1, + default_options.inference_priority1); + EXPECT_EQ(best_options.inference_priority2, + default_options.inference_priority2); + EXPECT_EQ(best_options.inference_priority3, + default_options.inference_priority3); + EXPECT_EQ(best_options.experimental_flags, + default_options.experimental_flags); + EXPECT_EQ(best_options.max_delegated_partitions, + default_options.max_delegated_partitions); +} + +} // namespace From fe2919b9725d91627f898bc1cf85956d9343f4ea Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 5 Aug 2020 19:07:26 -0700 Subject: [PATCH 2222/2522] Add MetaOptimizer::GetResultString to help debugging PiperOrigin-RevId: 325148493 Change-Id: I36727dec4e4217bd085d77033db8cac3c1c89b1a --- .../core/grappler/optimizers/meta_optimizer.cc | 14 +++++++++++--- .../core/grappler/optimizers/meta_optimizer.h | 2 ++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 48657a634d0..bce86ba5603 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/meta_optimizer.h" +#include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "absl/strings/substitute.h" #include "tensorflow/core/common_runtime/function.h" @@ -827,15 +828,22 @@ Status MetaOptimizer::OptimizeConsumeItem(Cluster* cluster, GrapplerItem&& item, return Status::OK(); } -void MetaOptimizer::PrintResult() { +string MetaOptimizer::GetResultString() const { + std::string result_string; for (const GraphOptimizationResult& graph_result : optimization_results_) { - LOG(INFO) << "Optimization results for grappler item: " << graph_result.id; + absl::StrAppend(&result_string, + "Optimization results for grappler item: ", graph_result.id, + "\n"); for (const OptimizerResult& result : graph_result.results) { - LOG(INFO) << " " << result.optimizer_name << ": " << result.message; + absl::StrAppend(&result_string, " ", result.optimizer_name, ": ", + result.message, "\n"); } } + return result_string; } +void MetaOptimizer::PrintResult() { LOG(INFO) << GetResultString(); } + bool MetaOptimizerEnabled(const ConfigProto& cfg) { const auto& rewrite_cfg = cfg.graph_options().rewrite_options(); if (rewrite_cfg.disable_meta_optimizer()) { diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.h b/tensorflow/core/grappler/optimizers/meta_optimizer.h index f39f0b62bb6..b21ea68f720 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.h +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.h @@ -50,6 +50,8 @@ class MetaOptimizer : public GraphOptimizer { Status OptimizeConsumeItem(Cluster* cluster, GrapplerItem&& item, GraphDef* optimized_graph); + string GetResultString() const; + void PrintResult(); void Feedback(Cluster* cluster, const GrapplerItem& item, From ebf8343be4affc12947377d260f1f5662f2678af Mon Sep 17 00:00:00 2001 From: Yuqi Li Date: Wed, 5 Aug 2020 19:27:28 -0700 Subject: [PATCH 2223/2522] Add colab for question answer in TFLite Model Maker. PiperOrigin-RevId: 325150437 Change-Id: If1b31f931895d0c656e3a56f96e7e176ef511029 --- tensorflow/lite/g3doc/_book.yaml | 4 +- .../model_maker_question_answer.ipynb | 654 ++++++++++++++++++ 2 files changed, 657 insertions(+), 1 deletion(-) create mode 100644 tensorflow/lite/g3doc/tutorials/model_maker_question_answer.ipynb diff --git a/tensorflow/lite/g3doc/_book.yaml b/tensorflow/lite/g3doc/_book.yaml index e28fee87316..45be4737fd5 100644 --- a/tensorflow/lite/g3doc/_book.yaml +++ b/tensorflow/lite/g3doc/_book.yaml @@ -40,8 +40,10 @@ upper_tabs: status: external - heading: "Text" - - title: "Text classification" + - title: "Text classification with Model Maker" path: /lite/tutorials/model_maker_text_classification + - title: "Question Answer with Model Maker" + path: /lite/tutorials/model_maker_question_answer - heading: "Microcontrollers" - title: "Gesture recognition" diff --git a/tensorflow/lite/g3doc/tutorials/model_maker_question_answer.ipynb b/tensorflow/lite/g3doc/tutorials/model_maker_question_answer.ipynb new file mode 100644 index 00000000000..a1d11115a75 --- /dev/null +++ b/tensorflow/lite/g3doc/tutorials/model_maker_question_answer.ipynb @@ -0,0 +1,654 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "h2q27gKz1H20" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "colab": {}, + "colab_type": "code", + "id": "TUfAcER1oUS6" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Gb7qyhNL1yWt" + }, + "source": [ + "# Question Answer with TensorFlow Lite Model Maker" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Fw5Y7snSuG51" + }, + "source": [ + "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", + " \u003ctd\u003e\n", + " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/lite/tutorials/model_maker_question_answer\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/tutorials/model_maker_question_answer.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/tutorials/model_maker_question_answer.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n", + " \u003c/td\u003e\n", + " \u003ctd\u003e\n", + " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/tensorflow/tensorflow/lite/g3doc/tutorials/model_maker_question_answer.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", + " \u003c/td\u003e\n", + "\u003c/table\u003e" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "sr3q-gvm3cI8" + }, + "source": [ + "The TensorFlow Lite Model Maker library simplifies the process of adapting and converting a TensorFlow model to particular input data when deploying this model for on-device ML applications.\n", + "\n", + "This notebook shows an end-to-end example that utilizes the Model Maker library to illustrate the adaptation and conversion of a commonly-used question answer model for question answer task." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "UxEHFTk755qw" + }, + "source": [ + "# Introduction to Question Answer Task" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "cFbKTCF25-SG" + }, + "source": [ + "The supported task in this library is extractive question answer task, which means given a passage and a question, the answer is the span in the passage. The image below shows an example for question answer.\n", + "\n", + "\n", + "\u003cp align=\"center\"\u003e\u003cimg src=\"https://storage.googleapis.com/download.tensorflow.org/models/tflite/screenshots/model_maker_squad_showcase.png\" width=\"500\"\u003e\u003c/p\u003e\n", + "\n", + "\u003cp align=\"center\"\u003e\n", + " \u003cem\u003eAnswers are spans in the passage (image credit: \u003ca href=\"https://rajpurkar.github.io/mlx/qa-and-squad/\"\u003eSQuAD blog\u003c/a\u003e) \u003c/em\u003e\n", + "\u003c/p\u003e\n", + "\n", + "As for the model of question answer task, the inputs should be the passage and question pair that are already preprocessed, the outputs should be the start logits and end logits for each token in the passage.\n", + "The size of input could be set and adjusted according to the length of passage and question." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "gb7P4WQta8Ub" + }, + "source": [ + "## End-to-End Overview\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "w7cIHjIfbDlG" + }, + "source": [ + "The following code snippet demonstrates how to get the model within a few lines of code. The overall process includes 5 steps: (1) choose a model, (2) load data, (3) retrain the model, (4) evaluate, and (5) export it to TensorFlow Lite format." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "xQPdlxZBYuZG" + }, + "source": [ + "\n", + "```python\n", + "# Chooses a model specification that represents the model.\n", + "spec = model_spec.get('mobilebert_qa')\n", + "\n", + "# Gets the training data and validation data.\n", + "train_data = QuestionAnswerDataLoader.from_squad(train_data_path, spec, is_training=True)\n", + "validation_data = QuestionAnswerDataLoader.from_squad(validation_data_path, spec, is_training=False)\n", + "\n", + "# Fine-tunes the model.\n", + "model = question_answer.create(train_data, model_spec=spec)\n", + "\n", + "# Gets the evaluation result.\n", + "metric = model.evaluate(validation_data)\n", + "\n", + "# Exports the model to the TensorFlow Lite format in the export directory.\n", + "model.export(export_dir)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "exScAdvBbNEi" + }, + "source": [ + "The following sections explain the code in more detail." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "bcLF2PKkSbV3" + }, + "source": [ + "## Prerequisites\n", + "\n", + "To run this example, install the required packages, including the Model Maker package from the [GitHub repo](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "qhl8lqVamEty" + }, + "outputs": [], + "source": [ + "!pip install git+https://github.com/tensorflow/examples.git#egg=tensorflow-examples[model_maker]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "l6lRhVK9Q_0U" + }, + "source": [ + "Import the required packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "XtxiUeZEiXpt" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import os\n", + "\n", + "import tensorflow as tf\n", + "assert tf.__version__.startswith('2')\n", + "\n", + "from tensorflow_examples.lite.model_maker.core.data_util.text_dataloader import QuestionAnswerDataLoader\n", + "from tensorflow_examples.lite.model_maker.core.task import model_spec\n", + "from tensorflow_examples.lite.model_maker.core.task import question_answer\n", + "from tensorflow_examples.lite.model_maker.core.task.configs import QuantizationConfig" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "l65ctmtW7_FF" + }, + "source": [ + "The \"End-to-End Overview\" demonstrates a simple end-to-end example. The following sections walk through the example step by step to show more detail." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "kJ_B8fMDOhMR" + }, + "source": [ + "## Choose a model_spec that represents a model for question answer\n", + "\n", + "Each `model_spec` object represents a specific model for question answer. The Model Maker currently supports MobileBERT and BERT-Base models.\n", + "\n", + "Supported Model | Name of model_spec | Model Description\n", + "--- | --- | ---\n", + "[MobileBERT](https://arxiv.org/pdf/2004.02984.pdf) | 'mobilebert_qa' | 4.3x smaller and 5.5x faster than BERT-Base while achieving competitive results, suitable for on-device scenario.\n", + "[MobileBERT-SQuAD](https://arxiv.org/pdf/2004.02984.pdf) | 'mobilebert_qa_squad' | Same model architecture as MobileBERT model and the initial model is already retrained on [SQuAD1.1](https://rajpurkar.github.io/SQuAD-explorer/).\n", + "[BERT-Base](https://arxiv.org/pdf/1810.04805.pdf) | 'bert_qa' | Standard BERT model that widely used in NLP tasks.\n", + "\n", + "In this tutorial, [MobileBERT-SQuAD](https://arxiv.org/pdf/2004.02984.pdf) is used as an example. Since the model is already retrained on [SQuAD1.1](https://rajpurkar.github.io/SQuAD-explorer/), it could coverage faster for question answer task.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "vEAWuZQ1PFiX" + }, + "outputs": [], + "source": [ + "spec = model_spec.get('mobilebert_qa_squad')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "ygEncJxtl-nQ" + }, + "source": [ + "## Load Input Data Specific to an On-device ML App and Preprocess the Data\n", + "\n", + "The [TriviaQA](https://nlp.cs.washington.edu/triviaqa/) is a reading comprehension dataset containing over 650K question-answer-evidence triples. In this tutorial, you will use a subset of this dataset to learn how to use the Model Maker library.\n", + "\n", + "To load the data, convert the TriviaQA dataset to the [SQuAD1.1](https://rajpurkar.github.io/SQuAD-explorer/) format by running the [converter Python script](https://github.com/mandarjoshi90/triviaqa#miscellaneous) with `--sample_size=8000` and a set of `web` data. Modify the conversion code a little bit by:\n", + "* Skipping the samples that couldn't find any answer in the context document;\n", + "* Getting the original answer in the context without uppercase or lowercase.\n", + "\n", + "Download the archived version of the already converted dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "7tOfUr2KlgpU" + }, + "outputs": [], + "source": [ + "train_data_path = tf.keras.utils.get_file(\n", + " fname='triviaqa-web-train-8000.json',\n", + " origin='https://storage.googleapis.com/download.tensorflow.org/models/tflite/dataset/triviaqa-web-train-8000.json')\n", + "validation_data_path = tf.keras.utils.get_file(\n", + " fname='triviaqa-verified-web-dev.json',\n", + " origin='https://storage.googleapis.com/download.tensorflow.org/models/tflite/dataset/triviaqa-verified-web-dev.json')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "UfZk8GNr_1nc" + }, + "source": [ + "You can also train the MobileBERT model with your own dataset. If you are running this notebook on Colab, upload your data by using the left sidebar.\n", + "\n", + "\u003cimg src=\"https://storage.googleapis.com/download.tensorflow.org/models/tflite/screenshots/model_maker_question_answer.png\" alt=\"Upload File\" width=\"800\" hspace=\"100\"\u003e\n", + "\n", + "If you prefer not to upload your data to the cloud, you can also run the library offline by following the [guide](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "E051HBUM5owi" + }, + "source": [ + "Use the `QuestionAnswerDataLoader.from_squad` method to load and preprocess the [SQuAD format](https://rajpurkar.github.io/SQuAD-explorer/) data according to a specific `model_spec`. You can use either SQuAD2.0 or SQuAD1.1 formats. Setting parameter `version_2_with_negative` as `True` means the formats is SQuAD2.0. Otherwise, the format is SQuAD1.1. By default, `version_2_with_negative` is `False`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "I_fOlZsklmlL" + }, + "outputs": [], + "source": [ + "train_data = QuestionAnswerDataLoader.from_squad(train_data_path, spec, is_training=True)\n", + "validation_data = QuestionAnswerDataLoader.from_squad(validation_data_path, spec, is_training=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "AWuoensX4vDA" + }, + "source": [ + "## Customize the TensorFlow Model\n", + "\n", + "Create a custom question answer model based on the loaded data. The `create` function comprises the following steps:\n", + "\n", + "1. Creates the model for question answer according to `model_spec`.\n", + "2. Train the question answer model. The default epochs and the default batch size are set according to two variables `default_training_epochs` and `default_batch_size` in the `model_spec` object." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "TvYSUuJY3QxR" + }, + "outputs": [], + "source": [ + "model = question_answer.create(train_data, model_spec=spec)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "0JKI-pNc8idH" + }, + "source": [ + "Have a look at the detailed model structure." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "gd7Hs8TF8n3H" + }, + "outputs": [], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "LP5FPk_tOxoZ" + }, + "source": [ + "## Evaluate the Customized Model\n", + "\n", + "Evaluate the model on the validation data and get a dict of metrics including `f1` score and `exact match` etc. Note that metrics are different for SQuAD1.1 and SQuAD2.0." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "A8c2ZQ0J3Riy" + }, + "outputs": [], + "source": [ + "model.evaluate(validation_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "aeHoGAceO2xV" + }, + "source": [ + "## Export to TensorFlow Lite Model\n", + "\n", + "Convert the existing model to TensorFlow Lite model format that you can later use in an on-device ML application." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "TwA2Z2pokQJc" + }, + "source": [ + "Since MobileBERT is too big for on-device applications, use dynamic range quantization on the model to compress MobileBERT by 4x with the minimal loss of performance. First, define the quantization configuration:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "1wBVTO8qkmum" + }, + "outputs": [], + "source": [ + "config = QuantizationConfig.create_dynamic_range_quantization(optimizations=[tf.lite.Optimize.OPTIMIZE_FOR_LATENCY])\n", + "config._experimental_new_quantizer = True" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "qea2YkEGkOTH" + }, + "source": [ + "Export the quantized TFLite model according to the quantization config and save the vocabulary to a vocab file. The default TFLite model filename is `model.tflite`, and the default vocab filename is `vocab`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "Im6wA9lK3TQB" + }, + "outputs": [], + "source": [ + "model.export(export_dir='.', quantization_config=config)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "w12kvDdHJIGH" + }, + "source": [ + "You can use the TensorFlow Lite model file and vocab file in the [bert_qa](https://github.com/tensorflow/examples/tree/master/lite/examples/bert_qa/android) reference app by downloading it from the left sidebar on Colab." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "HZKYthlVrTos" + }, + "source": [ + "You can also evalute the tflite model with the `evaluate_tflite` method. This step is expected to take a long time." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "ochbq95ZrVFX" + }, + "outputs": [], + "source": [ + "model.evaluate_tflite('model.tflite', validation_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "EoWiA_zX8rxE" + }, + "source": [ + "## Advanced Usage\n", + "\n", + "The `create` function is the critical part of this library in which the `model_spec` parameter defines the model specification. The `BertQAModelSpec` class is currently supported. There are 2 models: MobileBERT model, BERT-Base model. The `create` function comprises the following steps:\n", + "\n", + "1. Creates the model for question answer according to `model_spec`.\n", + "2. Train the question answer model.\n", + "\n", + "This section describes several advanced topics, including adjusting the model, tuning the training hyperparameters etc." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "mwtiksguDfhl" + }, + "source": [ + "### Adjust the model\n", + "\n", + "You can adjust the model infrastructure like parameters `seq_len` and `query_len` in the `BertQAModelSpec` class.\n", + "\n", + "Adjustable parameters for model:\n", + "\n", + "* `seq_len`: Length of the passage to feed into the model.\n", + "* `query_len`: Length of the question to feed into the model.\n", + "* `doc_stride`: The stride when doing a sliding window approach to take chunks of the documents.\n", + "* `initializer_range`: The stdev of the truncated_normal_initializer for initializing all weight matrices.\n", + "* `trainable`: Boolean, whether pre-trained layer is trainable.\n", + "\n", + "Adjustable parameters for training pipeline:\n", + "\n", + "* `model_dir`: The location of the model checkpoint files. If not set, temporary directory will be used.\n", + "* `dropout_rate`: The rate for dropout.\n", + "* `learning_rate`: The initial learning rate for Adam.\n", + "* `predict_batch_size`: Batch size for prediction.\n", + "* `tpu`: TPU address to connect to. Only used if using tpu.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "cAOd5_bzH9AQ" + }, + "source": [ + "For example, you can train the model with a longer sequence length. If you change the model, you must first construct a new `model_spec`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "e9WBN0UTQoMN" + }, + "outputs": [], + "source": [ + "new_spec = model_spec.get('mobilebert_qa')\n", + "new_spec.seq_len = 512" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "6LSTdghTP0Cv" + }, + "source": [ + "The remaining steps are the same. Note that you must rerun both the `dataloader` and `create` parts as different model specs may have different preprocessing steps.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "LvQuy7RSDir3" + }, + "source": [ + "### Tune training hyperparameters\n", + "You can also tune the training hyperparameters like `epochs` and `batch_size` to impact the model performance. For instance,\n", + "\n", + "* `epochs`: more epochs could achieve better performance, but may lead to overfitting.\n", + "* `batch_size`: number of samples to use in one training step.\n", + "\n", + "For example, you can train with more epochs and with a bigger batch size like:\n", + "\n", + "```python\n", + "model = question_answer.create(train_data, model_spec=spec, epochs=5, batch_size=64)\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Eq6B9lKMfhS6" + }, + "source": [ + "### Change the Model Architecture\n", + "\n", + "You can change the base model your data trains on by changing the `model_spec`. For example, to change to the BERT-Base model, run:\n", + "\n", + "```python\n", + "spec = model_spec.get('bert_qa')\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "L2d7yycrgu6L" + }, + "source": [ + "The remaining steps are the same." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "question_answer.ipynb", + "private_outputs": true, + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From 4f8ce7437431e9a1a47535ff05ef5011a694f244 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Wed, 5 Aug 2020 19:39:41 -0700 Subject: [PATCH 2224/2522] [TF2XLA] Deprecate xla.experimental.compile PiperOrigin-RevId: 325151668 Change-Id: I1a0ac5d58e8237cf47785034086c7cdc240ba116 --- RELEASE.md | 2 ++ tensorflow/python/compiler/xla/xla.py | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/RELEASE.md b/RELEASE.md index b0c785c7d68..62bdc11aa68 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -116,6 +116,8 @@ behavior by adjusting the `l2` parameter. * * XLA Support: + * xla.experimental.compile is deprecated, use + `tf.function(experimental_compile=True)` instead * * Tracing and Debugging: * diff --git a/tensorflow/python/compiler/xla/xla.py b/tensorflow/python/compiler/xla/xla.py index 51ad5569a30..59b70f2a217 100644 --- a/tensorflow/python/compiler/xla/xla.py +++ b/tensorflow/python/compiler/xla/xla.py @@ -37,6 +37,7 @@ from tensorflow.python.util import compat from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect from tensorflow.python.util.compat import collections_abc +from tensorflow.python.util.deprecation import deprecated from tensorflow.python.util.tf_export import tf_export _XLA_COMPILE_ATTR = '_xla_compile_id' @@ -64,6 +65,10 @@ _UNSUPPORTED_OPS = set([ @tf_export('xla.experimental.compile') +@deprecated( + None, 'xla.experimental.compile is deprecated. Consider using ' + 'tf.function(experimental_compile=True)', + warn_once=True) def compile(computation, inputs=None): # pylint: disable=redefined-builtin """Builds an operator that compiles and runs `computation` with XLA. From 8ea1d3cb88ff1d1d5dab8b9af2bdd7f44829ba61 Mon Sep 17 00:00:00 2001 From: Yuqi Li Date: Wed, 5 Aug 2020 19:53:10 -0700 Subject: [PATCH 2225/2522] Update colab to add MobileBert and change dataset to SST-2 for text classification in TFLite Model Maker. PiperOrigin-RevId: 325152885 Change-Id: I7b1d4745b4bdcefb3308f28a4b26f9f400866cc3 --- .../model_maker_text_classification.ipynb | 413 +++++++++--------- 1 file changed, 213 insertions(+), 200 deletions(-) diff --git a/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb b/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb index 4a620960899..1a839d70e38 100644 --- a/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb +++ b/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb @@ -74,9 +74,9 @@ "id": "sr3q-gvm3cI8" }, "source": [ - "The TensorFlow Lite Model Maker library simplifies the process of adapting and converting a TensorFlow neural-network model to particular input data when deploying this model for on-device ML applications.\n", + "The TensorFlow Lite Model Maker library simplifies the process of adapting and converting a TensorFlow model to particular input data when deploying this model for on-device ML applications.\n", "\n", - "This notebook shows an end-to-end example that utilizes this Model Maker library to illustrate the adaption and conversion of a commonly-used text classification model to classify movie reviews on a mobile device." + "This notebook shows an end-to-end example that utilizes the Model Maker library to illustrate the adaptation and conversion of a commonly-used text classification model to classify movie reviews on a mobile device. The text classification model classifies text into predefined categories.The inputs should be preprocessed text and the outputs are the probabilities of the categories. The dataset used in this tutorial are positive and negative movie reviews." ] }, { @@ -86,9 +86,18 @@ "id": "bcLF2PKkSbV3" }, "source": [ - "## Prerequisites\n", - "\n", - "To run this example, we first need to install several required packages, including Model Maker package that in github [repo](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker)." + "## Prerequisites\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "2vvAObmTqglq" + }, + "source": [ + "### Install the required packages\n", + "To run this example, install the required packages, including the Model Maker package from the [GitHub repo](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker)." ] }, { @@ -131,19 +140,9 @@ "assert tf.__version__.startswith('2')\n", "\n", "from tensorflow_examples.lite.model_maker.core.data_util.text_dataloader import TextClassifierDataLoader\n", - "from tensorflow_examples.lite.model_maker.core.task.model_spec import AverageWordVecModelSpec\n", - "from tensorflow_examples.lite.model_maker.core.task.model_spec import BertClassifierModelSpec\n", - "from tensorflow_examples.lite.model_maker.core.task import text_classifier" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "06sWWfvE6I8e" - }, - "source": [ - "## Simple End-to-End Example" + "from tensorflow_examples.lite.model_maker.core.task import model_spec\n", + "from tensorflow_examples.lite.model_maker.core.task import text_classifier\n", + "from tensorflow_examples.lite.model_maker.core.task.configs import QuantizationConfig" ] }, { @@ -154,7 +153,7 @@ }, "source": [ "### Get the data path\n", - "Let's get some texts to play with this simple end-to-end example." + "Download the dataset for this tutorial." ] }, { @@ -167,10 +166,11 @@ }, "outputs": [], "source": [ - "data_path = tf.keras.utils.get_file(\n", - " fname='aclImdb',\n", - " origin='http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz',\n", - " untar=True)" + "data_dir = tf.keras.utils.get_file(\n", + " fname='SST-2.zip',\n", + " origin='https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FSST-2.zip?alt=media\u0026token=aabc5f6b-e466-44a2-b9b4-cf6337f84ac8',\n", + " extract=True)\n", + "data_dir = os.path.join(os.path.dirname(data_dir), 'SST-2')" ] }, { @@ -180,7 +180,7 @@ "id": "6MSCjPAvs2EQ" }, "source": [ - " You could replace it with your own text folders. As for uploading data to colab, you could find the upload button in the left sidebar shown in the image below with the red rectangle. Just have a try to upload a zip file and unzip it. The root file path is the current path.\n", + "You can also upload your own dataset to work through this tutorial. Upload your dataset by using the left sidebar in Colab.\n", "\n", "\u003cimg src=\"https://storage.googleapis.com/download.tensorflow.org/models/tflite/screenshots/model_maker_text_classification.png\" alt=\"Upload File\" width=\"800\" hspace=\"100\"\u003e\n" ] @@ -192,7 +192,17 @@ "id": "uO5egTlrtWxm" }, "source": [ - "If you prefer not to upload your images to the cloud, you could try to run the library locally following the [guide](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker) in github." + "If you prefer not to upload your dataset to the cloud, you can also locally run the library by following the [guide](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "xushUyZXqP59" + }, + "source": [ + "## End-to-End Workflow" ] }, { @@ -202,9 +212,7 @@ "id": "WlKU3SMX6TnB" }, "source": [ - "### Run the example\n", - "\n", - "The example just consists of 6 lines of code as shown below, representing 5 steps of the overall process." + "This workflow consists of five steps as outlined below:" ] }, { @@ -214,7 +222,9 @@ "id": "PBPUIhEjMjTR" }, "source": [ - "Step 0. Choose a `model_spec` that represents a model for text classifier." + "Step 1. Choose a model specification that represents a text classification model.\n", + "\n", + "This tutorial uses [MobileBERT](https://arxiv.org/pdf/2004.02984.pdf) as an example." ] }, { @@ -227,7 +237,7 @@ }, "outputs": [], "source": [ - "model_spec = AverageWordVecModelSpec()" + "spec = model_spec.get('mobilebert_classifier')" ] }, { @@ -237,7 +247,7 @@ "id": "s5U-A3tw6Y27" }, "source": [ - "Step 1. Load train and test data specific to an on-device ML app and preprocess the data according to specific `model_spec`." + "Step 2. Load train and test data specific to an on-device ML app and preprocess the data according to a specific `model_spec`." ] }, { @@ -250,8 +260,20 @@ }, "outputs": [], "source": [ - "train_data = TextClassifierDataLoader.from_folder(os.path.join(data_path, 'train'), model_spec=model_spec, class_labels=['pos', 'neg'])\n", - "test_data = TextClassifierDataLoader.from_folder(os.path.join(data_path, 'test'), model_spec=model_spec, is_training=False, shuffle=False)" + "train_data = TextClassifierDataLoader.from_csv(\n", + " filename=os.path.join(os.path.join(data_dir, 'train.tsv')),\n", + " text_column='sentence',\n", + " label_column='label',\n", + " model_spec=spec,\n", + " delimiter='\\t',\n", + " is_training=True)\n", + "test_data = TextClassifierDataLoader.from_csv(\n", + " filename=os.path.join(os.path.join(data_dir, 'dev.tsv')),\n", + " text_column='sentence',\n", + " label_column='label',\n", + " model_spec=spec,\n", + " delimiter='\\t',\n", + " is_training=False)" ] }, { @@ -261,7 +283,7 @@ "id": "2uZkLR6N6gDR" }, "source": [ - "Step 2. Customize the TensorFlow model." + "Step 3. Customize the TensorFlow model." ] }, { @@ -274,7 +296,7 @@ }, "outputs": [], "source": [ - "model = text_classifier.create(train_data, model_spec=model_spec)" + "model = text_classifier.create(train_data, model_spec=spec)" ] }, { @@ -284,7 +306,7 @@ "id": "-BzCHLWJ6h7q" }, "source": [ - "Step 3. Evaluate the model." + "Step 4. Evaluate the model." ] }, { @@ -307,8 +329,23 @@ "id": "CgCDMe0e6jlT" }, "source": [ - "Step 4. Export to TensorFlow Lite model.\n", - "You could download it in the left sidebar same as the uploading part for your own use." + "Step 5. Export as a TensorFlow Lite model.\n", + "\n", + "Since MobileBERT is too big for on-device applications, use [dynamic range quantization](https://www.tensorflow.org/lite/performance/post_training_quantization#dynamic_range_quantization) on the model to compress it by almost 4x with minimal performance degradation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "ZQRLmkGumr9Y" + }, + "outputs": [], + "source": [ + "config = QuantizationConfig.create_dynamic_range_quantization(optimizations=[tf.lite.Optimize.OPTIMIZE_FOR_LATENCY])\n", + "config._experimental_new_quantizer = True" ] }, { @@ -321,7 +358,7 @@ }, "outputs": [], "source": [ - "model.export(export_dir='.')" + "model.export(export_dir='mobilebert/', quantization_config=config)" ] }, { @@ -331,7 +368,9 @@ "id": "rVxaf3x_7OfB" }, "source": [ - "After this simple 5 steps, we could further use TensorFlow Lite model file and label file in on-device applications like in [text classification](https://github.com/tensorflow/examples/tree/master/lite/examples/text_classification) reference app." + "You can also download the model using the left sidebar in Colab.\n", + "\n", + "After executing the 5 steps above, you can further use the TensorFlow Lite model file and label file in on-device applications like in a [text classification](https://github.com/tensorflow/examples/tree/master/lite/examples/text_classification) reference app." ] }, { @@ -341,9 +380,7 @@ "id": "l65ctmtW7_FF" }, "source": [ - "## Detailed Process\n", - "\n", - "In the above, we tried the simple end-to-end example. The following walks through the example step by step to show more detail." + "The following sections walk through the example step by step to show more detail." ] }, { @@ -353,9 +390,17 @@ "id": "kJ_B8fMDOhMR" }, "source": [ - "### Step 0: Choose a model_spec that represents a model for text classifier.\n", + "## Choose a `model_spec` that Represents a Model for Text Classifier\n", "\n", - "each `model_spec` object represents a specific model for the text classifier. Currently, we support averging word embedding model and BERT-base model." + "Each `model_spec` object represents a specific model for the text classifier. TensorFlow Lite Model Maker currently supports [MobileBERT](https://arxiv.org/pdf/2004.02984.pdf), averaging word embeddings and [BERT-Base]((https://arxiv.org/pdf/1810.04805.pdf) models.\n", + "\n", + "Supported Model | Name of model_spec | Model Description\n", + "--- | --- | ---\n", + "MobileBERT | 'mobilebert_classifier' | 4.3x smaller and 5.5x faster than BERT-Base while achieving competitive results, suitable for on-device applications.\n", + "BERT-Base | 'bert_classifier' | Standard BERT model that is widely used in NLP tasks.\n", + "averaging word embedding | 'average_word_vec' | Averaging text word embeddings with RELU activation.\n", + "\n", + "This tutorial uses a smaller model, `average_word_vec` that you can retrain multiple times to demonstrate the process." ] }, { @@ -368,7 +413,7 @@ }, "outputs": [], "source": [ - "model_spec = AverageWordVecModelSpec()" + "spec = model_spec.get('average_word_vec')" ] }, { @@ -378,27 +423,13 @@ "id": "ygEncJxtl-nQ" }, "source": [ - "### Step 1: Load Input Data Specific to an On-device ML App\n", + "## Load Input Data Specific to an On-device ML App\n", "\n", - "The IMDB dataset contains 25000 movie reviews for training and 25000 movie reviews for testing from the [Internet Movie Database](https://www.imdb.com/). The dataset has two classes: positive and negative movie reviews.\n", + "The [SST-2](https://nlp.stanford.edu/sentiment/index.html) (Stanford Sentiment Treebank) is one of the tasks in the [GLUE](https://gluebenchmark.com/) benchmark . It contains 67,349 movie reviews for training and 872 movie reviews for validation. The dataset has two classes: positive and negative movie reviews.\n", "\n", - "Download the archive version of the dataset and untar it.\n", + "Download the archived version of the dataset and extract it.\n", "\n", - "The IMDB dataset has the following directory structure:\n", - "\n", - "\u003cpre\u003e\n", - "\u003cb\u003eaclImdb\u003c/b\u003e\n", - "|__ \u003cb\u003etrain\u003c/b\u003e\n", - " |______ \u003cb\u003epos\u003c/b\u003e: [1962_10.txt, 2499_10.txt, ...]\n", - " |______ \u003cb\u003eneg\u003c/b\u003e: [104_3.txt, 109_2.txt, ...]\n", - " |______ unsup: [12099_0.txt, 1424_0.txt, ...]\n", - "|__ \u003cb\u003etest\u003c/b\u003e\n", - " |______ \u003cb\u003epos\u003c/b\u003e: [1384_9.txt, 191_9.txt, ...]\n", - " |______ \u003cb\u003eneg\u003c/b\u003e: [1629_1.txt, 21_1.txt]\n", - "\n", - "\u003c/pre\u003e\n", - "\n", - "Note that the text data under `train/unsup` folder are unlabeled documents for unsupervised learning and such data should be ignored in this tutorial.\n" + "\n" ] }, { @@ -411,10 +442,11 @@ }, "outputs": [], "source": [ - "data_path = tf.keras.utils.get_file(\n", - " fname='aclImdb',\n", - " origin='http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz',\n", - " untar=True)" + "data_dir = tf.keras.utils.get_file(\n", + " fname='SST-2.zip',\n", + " origin='https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FSST-2.zip?alt=media\u0026token=aabc5f6b-e466-44a2-b9b4-cf6337f84ac8',\n", + " extract=True)\n", + "data_dir = os.path.join(os.path.dirname(data_dir), 'SST-2')" ] }, { @@ -424,11 +456,16 @@ "id": "E051HBUM5owi" }, "source": [ - "Use `TextClassifierDataLoader` to load data.\n", + "The SST-2 dataset has `train.tsv` for training and `dev.tsv` for validation. The files have the following format:\n", "\n", - "As for `from_folder()` method, it could load data from the folder. It assumes that the text data of the same class are in the same subdirectory and the subfolder name is the class name. Each text file contains one movie review sample.\n", + "sentence | label\n", + "--- | ---\n", + "it 's a charming and often affecting journey . | 1\n", + "unflinchingly bleak and desperate | 0\n", "\n", - "Parameter `class_labels` is used to specify which subfolder should be considered. As for `train` folder, this parameter is used to skip `unsup` subfolder.\n" + "A positive review is labeled 1 and a negative review is labeled 0.\n", + "\n", + "Use the `TestClassifierDataLoader.from_csv` method to load the data." ] }, { @@ -441,9 +478,30 @@ }, "outputs": [], "source": [ - "train_data = TextClassifierDataLoader.from_folder(os.path.join(data_path, 'train'), model_spec=model_spec, class_labels=['pos', 'neg'])\n", - "test_data = TextClassifierDataLoader.from_folder(os.path.join(data_path, 'test'), model_spec=model_spec, is_training=False, shuffle=False)\n", - "train_data, validation_data = train_data.split(0.9)" + "train_data = TextClassifierDataLoader.from_csv(\n", + " filename=os.path.join(os.path.join(data_dir, 'train.tsv')),\n", + " text_column='sentence',\n", + " label_column='label',\n", + " model_spec=spec,\n", + " delimiter='\\t',\n", + " is_training=True)\n", + "test_data = TextClassifierDataLoader.from_csv(\n", + " filename=os.path.join(os.path.join(data_dir, 'dev.tsv')),\n", + " text_column='sentence',\n", + " label_column='label',\n", + " model_spec=spec,\n", + " delimiter='\\t',\n", + " is_training=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "MlHvVvv2hw4H" + }, + "source": [ + "The Model Maker library also supports the `from_folder()` method to load data. It assumes that the text data of the same class are in the same subdirectory and that the subfolder name is the class name. Each text file contains one movie review sample. The `class_labels` parameter is used to specify which the subfolders." ] }, { @@ -453,9 +511,9 @@ "id": "AWuoensX4vDA" }, "source": [ - "### Step 2: Customize the TensorFlow Model\n", + "## Customize the TensorFlow Model\n", "\n", - "Create a custom text classifier model based on the loaded data. Currently, we support averaging word embedding and BERT-base model." + "Create a custom text classifier model based on the loaded data." ] }, { @@ -468,7 +526,7 @@ }, "outputs": [], "source": [ - "model = text_classifier.create(train_data, model_spec=model_spec, validation_data=validation_data)" + "model = text_classifier.create(train_data, model_spec=spec, epochs=10)" ] }, { @@ -478,7 +536,7 @@ "id": "0JKI-pNc8idH" }, "source": [ - "Have a look at the detailed model structure." + "Examine the detailed model structure." ] }, { @@ -501,11 +559,11 @@ "id": "LP5FPk_tOxoZ" }, "source": [ - "### Step 3: Evaluate the Customized Model\n", + "## Evaluate the Customized Model\n", "\n", - "Evaluate the result of the model, get the loss and accuracy of the model.\n", + "Evaluate the result of the model and get the loss and accuracy of the model.\n", "\n", - "Evaluate the loss and accuracy in `test_data`. If no data is given the results are evaluated on the data that's splitted in the `create` method." + "Evaluate the loss and accuracy in the test data." ] }, { @@ -528,9 +586,9 @@ "id": "aeHoGAceO2xV" }, "source": [ - "### Step 4: Export to TensorFlow Lite Model\n", + "## Export as a TensorFlow Lite Model\n", "\n", - "Convert the existing model to TensorFlow Lite model format that could be later used in on-device ML application. Meanwhile, save the text labels in label file and vocabulary in vocab file. The default TFLite filename is `model.tflite`, the default label filename is `label.txt`, the default vocab filename is `vocab`." + "Convert the existing model to TensorFlow Lite model format that you can later use in an on-device ML application. Save the text labels in a label file and vocabulary in a vocab file. The default TFLite filename is `model.tflite`, the default label filename is `label.txt` and the default vocab filename is `vocab`." ] }, { @@ -543,7 +601,7 @@ }, "outputs": [], "source": [ - "model.export(export_dir='.')" + "model.export(export_dir='average_word_vec/')" ] }, { @@ -553,9 +611,7 @@ "id": "w12kvDdHJIGH" }, "source": [ - "The TensorFlow Lite model file and label file could be used in the [text classification](https://github.com/tensorflow/examples/tree/master/lite/examples/text_classification) reference app.\n", - "\n", - "In detail, we could add `movie_review_classifier.tflite`, `text_label.txt` and `vocab.txt` to the [assets directory](https://github.com/tensorflow/examples/tree/master/lite/examples/text_classification/android/app/src/main/assets) folder. Meanwhile, change the filenames in [code](https://github.com/tensorflow/examples/blob/master/lite/examples/text_classification/android/app/src/main/java/org/tensorflow/lite/examples/textclassification/TextClassificationClient.java#L43). " + "The TensorFlow Lite model file and label file can be used in the [text classification](https://github.com/tensorflow/examples/tree/master/lite/examples/text_classification) reference app by adding `model.tflite`, `text_label.txt` and `vocab.txt` to the [assets directory](https://github.com/tensorflow/examples/tree/master/lite/examples/text_classification/android/app/src/main/assets). Do not forget to also change the filenames in the [code](https://github.com/tensorflow/examples/blob/master/lite/examples/text_classification/android/app/src/main/java/org/tensorflow/lite/examples/textclassification/TextClassificationClient.java#L43)." ] }, { @@ -565,7 +621,7 @@ "id": "HZKYthlVrTos" }, "source": [ - "Here, we also demonstrate how to use the above files to run and evaluate the TensorFlow Lite model." + "You can evalute the tflite model with `evaluate_tflite` method." ] }, { @@ -578,50 +634,7 @@ }, "outputs": [], "source": [ - "# Read TensorFlow Lite model from TensorFlow Lite file.\n", - "with tf.io.gfile.GFile('model.tflite', 'rb') as f:\n", - " model_content = f.read()\n", - "\n", - "# Read label names from label file.\n", - "with tf.io.gfile.GFile('labels.txt', 'r') as f:\n", - " label_names = f.read().split('\\n')\n", - "\n", - "# Initialze TensorFlow Lite inpterpreter.\n", - "interpreter = tf.lite.Interpreter(model_content=model_content)\n", - "interpreter.allocate_tensors()\n", - "input_index = interpreter.get_input_details()[0]['index']\n", - "output = interpreter.tensor(interpreter.get_output_details()[0][\"index\"])\n", - "\n", - "# Run predictions on each test data and calculate accuracy.\n", - "accurate_count = 0\n", - "for text, label in test_data.dataset:\n", - " # Add batch dimension and convert to float32 to match with the model's input\n", - " # data format.\n", - " text = tf.expand_dims(text, 0)\n", - "\n", - " # Run inference.\n", - " interpreter.set_tensor(input_index, text)\n", - " interpreter.invoke()\n", - "\n", - " # Post-processing: remove batch dimension and find the label with highest\n", - " # probability.\n", - " predict_label = np.argmax(output()[0])\n", - " # Get label name with label index.\n", - " predict_label_name = label_names[predict_label]\n", - " accurate_count += (predict_label == label.numpy())\n", - "\n", - "accuracy = accurate_count * 1.0 / test_data.size\n", - "print('TensorFlow Lite model accuracy = %.4f' % accuracy)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "KLKmboKFtgc2" - }, - "source": [ - "Note that preprocessing for inference should be the same as training. Currently, preprocessing contains split the text to tokens by '\\W', encode the tokens to ids, the pad the text with `pad_id` to have the length of `seq_length`." + "model.evaluate_tflite('average_word_vec/model.tflite', test_data)" ] }, { @@ -633,14 +646,12 @@ "source": [ "## Advanced Usage\n", "\n", - "The `create` function is the critical part of this library in which parameter `model_spec` defines the specification of the model, currently `AverageWordVecModelSpec` and `BertModelSpec` is supported. The `create` function contains the following steps for `AverageWordVecModelSpec`:\n", + "The `create` function is the driver function that the Model Maker library uses to create models. The `model spec` parameter defines the model specification. The `AverageWordVecModelSpec` and `BertClassifierModelSpec` classes are currently supported. The `create` function comprises of the following steps:\n", "\n", - "1. Tokenize the text and select the top `num_words` most frequent words to generate the vocubulary. The default value of `num_words` in `AverageWordVecModelSpec` object is `10000`.\n", - "2. Encode the text string tokens to int ids.\n", - "3. Create the text classifier model. Currently, this library supports one model: average the word embedding of the text with RELU activation, then leverage softmax dense layer for classification. As for [Embedding layer](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Embedding), the input dimension is the size of the vocabulary, the output dimension is `AverageWordVecModelSpec` object's variable `wordvec_dim` which default value is `16`, the input length is `AverageWordVecModelSpec` object's variable `seq_len` which default value is `256`.\n", - "4. Train the classifier model. The default epoch is `2` and the default batch size is `32`.\n", + "1. Creates the model for the text classifier according to `model_spec`.\n", + "2. Trains the classifier model. The default epochs and the default batch size are set by the `default_training_epochs` and `default_batch_size` variables in the `model_spec` object.\n", "\n", - "In this section, we describe several advanced topics, including adjusting the model, changing the training hyperparameters etc.\n" + "This section covers advanced usage topics like adjusting the model and the training hyperparameters." ] }, { @@ -650,9 +661,9 @@ "id": "mwtiksguDfhl" }, "source": [ - "## Adjust the model\n", + "### Adjust the model\n", "\n", - "We could adjust the model infrastructure like variables `wordvec_dim`, `seq_len` in `AverageWordVecModelSpec` class.\n" + "You can adjust the model infrastructure like the `wordvec_dim` and the `seq_len` variables in the `AverageWordVecModelSpec` class.\n" ] }, { @@ -662,10 +673,7 @@ "id": "cAOd5_bzH9AQ" }, "source": [ - "* `wordvec_dim`: Dimension of word embedding.\n", - "* `seq_len`: length of sequence.\n", - "\n", - "For example, we could train with larger `wordvec_dim`. If we change the model, we need to construct the new `model_spec` firstly." + "For example, you can train the model with a larger value of `wordvec_dim`. Note that you must construct a new `model_spec` if you modify the model." ] }, { @@ -678,7 +686,7 @@ }, "outputs": [], "source": [ - "new_model_spec = AverageWordVecModelSpec(wordvec_dim=32)" + "new_model_spec = model_spec.AverageWordVecModelSpec(wordvec_dim=32)" ] }, { @@ -688,7 +696,7 @@ "id": "6LSTdghTP0Cv" }, "source": [ - "Secondly, we should get the preprocessed data accordingly." + "Get the preprocessed data." ] }, { @@ -701,8 +709,13 @@ }, "outputs": [], "source": [ - "new_train_data = TextClassifierDataLoader.from_folder(os.path.join(data_path, 'train'), model_spec=new_model_spec, class_labels=['pos', 'neg'])\n", - "new_train_data, new_validation_data = new_train_data.split(0.9)" + "new_train_data = TextClassifierDataLoader.from_csv(\n", + " filename=os.path.join(os.path.join(data_dir, 'train.tsv')),\n", + " text_column='sentence',\n", + " label_column='label',\n", + " model_spec=new_model_spec,\n", + " delimiter='\\t',\n", + " is_training=True)" ] }, { @@ -712,7 +725,7 @@ "id": "tD7QVVHeRZoM" }, "source": [ - "Finally, we could train the new model." + "Train the new model." ] }, { @@ -725,7 +738,46 @@ }, "outputs": [], "source": [ - "model = text_classifier.create(new_train_data, model_spec=new_model_spec, validation_data=new_validation_data)" + "model = text_classifier.create(new_train_data, model_spec=new_model_spec)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "E8VxPiOLy4Gv" + }, + "source": [ + "You can also adjust the MobileBERT model.\n", + "\n", + "The model parameters you can adjust are:\n", + "\n", + "* `seq_len`: Length of the sequence to feed into the model.\n", + "* `initializer_range`: The standard deviation of the truncated_normal_initializer for initializing all weight matrices.\n", + "* `trainable`: Boolean that specifies whether the pre-trained layer is trainable.\n", + "\n", + "The training pipeline parameters you can adjust are:\n", + "\n", + "* `model_dir`: The location of the model checkpoint files. If not set, a temporary directory will be used.\n", + "* `dropout_rate`: The dropout rate.\n", + "* `learning_rate`: The initial learning rate for the Adam optimizer.\n", + "* `tpu`: TPU address to connect to.\n", + "\n", + "For instance, you can set the `seq_len=256` (default is 128). This allows the model to classify longer text." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "4tr9BLcjy4Sh" + }, + "outputs": [], + "source": [ + "new_model_spec = model_spec.get('mobilebert_classifier')\n", + "new_model_spec.seq_len = 256" ] }, { @@ -735,13 +787,13 @@ "id": "LvQuy7RSDir3" }, "source": [ - "### Change the training hyperparameters\n", - "We could also change the training hyperparameters like `epochs` and `batch_size` that could affect the model accuracy. For instance,\n", + "### Tune the training hyperparameters\n", + "You can also tune the training hyperparameters like `epochs` and `batch_size` that affect the model accuracy. For instance,\n", "\n", "* `epochs`: more epochs could achieve better accuracy, but may lead to overfitting.\n", - "* `batch_size`: number of samples to use in one training step.\n", + "* `batch_size`: the number of samples to use in one training step.\n", "\n", - "For example, we could train with more epochs." + "For example, you can train with more epochs." ] }, { @@ -754,7 +806,7 @@ }, "outputs": [], "source": [ - "model = text_classifier.create(train_data, model_spec=model_spec, validation_data=validation_data, epochs=5)" + "model = text_classifier.create(train_data, model_spec=spec, epochs=20)" ] }, { @@ -764,7 +816,7 @@ "id": "nUaKQZBQHBQR" }, "source": [ - "Evaluate the newly retrained model with 5 training epochs." + "Evaluate the newly retrained model with 20 training epochs." ] }, { @@ -787,11 +839,11 @@ "id": "Eq6B9lKMfhS6" }, "source": [ - "### Change the Model\n", + "### Change the Model Architecture\n", "\n", - "We could change the model by changing the `model_spec`. The following shows how we change to BERT-base model.\n", + "You can change the model by changing the `model_spec`. The following shows how to change to BERT-Base model.\n", "\n", - "First, we could change `model_spec` to `BertModelSpec`." + "Change the `model_spec` to BERT-Base model for the text classifier." ] }, { @@ -804,7 +856,7 @@ }, "outputs": [], "source": [ - "model_spec = BertClassifierModelSpec()" + "spec = model_spec.get('bert_classifier')" ] }, { @@ -814,46 +866,7 @@ "id": "L2d7yycrgu6L" }, "source": [ - "The remaining steps remains the same.\n", - "\n", - "Load data and preprocess the data according to `model_spec`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "6GQXQO54iyyE" - }, - "outputs": [], - "source": [ - "train_data = TextClassifierDataLoader.from_folder(os.path.join(data_path, 'train'), model_spec=model_spec, class_labels=['pos', 'neg'])\n", - "test_data = TextClassifierDataLoader.from_folder(os.path.join(data_path, 'test'), model_spec=model_spec, is_training=False, shuffle=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "ZTMqpDXCi11Q" - }, - "source": [ - "Then retrain the model. Note that it could take a long time to retrain the BERT model. we just set `epochs` equals 1 to demonstrate it." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "c991Bdkgi1Bf" - }, - "outputs": [], - "source": [ - "model = text_classifier.create(train_data, model_spec=model_spec, epochs=1)" + "The remaining steps are the same." ] } ], From fea99d08a763265a93e71d4be81d42329cb654b3 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Wed, 5 Aug 2020 20:09:12 -0700 Subject: [PATCH 2226/2522] [TF2XLA] Do not set redundant variable when compiling with XLA If XLA support is already enabled, there is no need to set the environment variable (which is only a trigger to enable this support) PiperOrigin-RevId: 325154534 Change-Id: Iab9bf804050d0c87ad8b1b01290fec2879fd5eb9 --- .bazelrc | 1 - 1 file changed, 1 deletion(-) diff --git a/.bazelrc b/.bazelrc index 73926e5a2f9..ddeb2515d70 100644 --- a/.bazelrc +++ b/.bazelrc @@ -368,7 +368,6 @@ build --config=v2 test --config=v2 # Enable XLA -build:xla --action_env=TF_ENABLE_XLA=1 build:xla --define=with_xla_support=true # BEGIN TF REMOTE BUILD EXECUTION OPTIONS From 9b3d8452d7ce4a804f1f1865ad7890fd910937dc Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Wed, 5 Aug 2020 20:58:22 -0700 Subject: [PATCH 2227/2522] [XLA] Convert concatenate into broadcast if all operands are the same and size one in the concatenate dimension PiperOrigin-RevId: 325159041 Change-Id: Iba80992e57c173039911bd609a646715e1450961 --- .../xla/service/algebraic_simplifier.cc | 23 ++++++++++++++++++- .../xla/service/algebraic_simplifier_test.cc | 19 +++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 0b588048e4a..c793c4958a2 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -665,7 +665,7 @@ Status AlgebraicSimplifierVisitor::ScalarMultiplyReduction( HloInstruction* inst; HloInstruction* user; int64 index; - std::tie (inst, user, index) = operands.back(); + std::tie(inst, user, index) = operands.back(); operands.pop_back(); // Skip the op types that are not commutative with multiply. @@ -1236,6 +1236,10 @@ Status AlgebraicSimplifierVisitor::HandleConcatenate( return Status::OK(); } + if (options_.is_layout_sensitive()) { + return Status::OK(); + } + // Check if we can merge "adjacent" slice operands which take slices from the // same other op. For simplicity we only merge unstrided slices. int64 concatenate_dimension = concatenate->concatenate_dimension(); @@ -1335,6 +1339,23 @@ Status AlgebraicSimplifierVisitor::HandleConcatenate( operands[pad_value_operand]->mutable_operand(0), padding_config)); return ReplaceInstruction(concatenate, pad); } + + if (absl::c_count(operands, operands[0]) == operands.size() && + operands[0]->shape().dimensions(concatenate_dimension) == 1) { + Shape new_shape = operands[0]->shape(); + absl::InlinedVector broadcast_dims; + for (int64 i = 0; i < new_shape.rank(); ++i) { + if (i == concatenate_dimension) { + continue; + } + broadcast_dims.push_back(i); + } + new_shape.DeleteDimension(concatenate_dimension); + return ReplaceInstruction( + concatenate, + MakeBroadcastHlo(MakeReshapeHlo(new_shape, operands[0]).ValueOrDie(), + broadcast_dims, concatenate->shape())); + } return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 90ca44714f7..95700b2a994 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -4823,6 +4823,25 @@ TEST_F(AlgebraicSimplifierTest, SliceOfConcatNonScalarInput) { EXPECT_EQ(root->slice_limits(0), 2); } +TEST_F(AlgebraicSimplifierTest, ConcatToBroadcast) { + const char* hlo_string = R"( + HloModule module + + ENTRY test { + p = f32[2,1,4] parameter(0) + ROOT concat = f32[2,6,4] concatenate(p,p,p,p,p,p), dimensions={1} + } + )"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + AlgebraicSimplifierOptions options; + AlgebraicSimplifier simplifier(options); + EXPECT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, GmockMatch(m::Broadcast(m::Reshape(m::Parameter(0))))); +} + TEST_F(AlgebraicSimplifierTest, NegateNegate) { const char* hlo_string = R"( HloModule module From 2b387c6cb661e8a3128255f7656a44ea098354fb Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Wed, 5 Aug 2020 21:02:40 -0700 Subject: [PATCH 2228/2522] Update outdated comment about updating MirroredVariables. PiperOrigin-RevId: 325159511 Change-Id: I05e6e8a192d054b8db56c4e00d500c95b832ad92 --- tensorflow/python/distribute/values.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py index a5dabddff94..9d148bd029d 100644 --- a/tensorflow/python/distribute/values.py +++ b/tensorflow/python/distribute/values.py @@ -1020,7 +1020,8 @@ class MirroredVariable(DistributedVariable, Mirrored): # TODO(b/154017756): Make _dense_var_to_tensor consistent between ON_READ # and ON_WRITE. # Try to avoid assignments to and other mutations of MirroredVariable - # state except through a DistributionStrategy.extended.update() call. + # state except through a DistributionStrategy.extended.update() or any of + # the `assign*` and `scatter*` calls. if as_ref: # A TF 1.x case where the variable is a boolean variable and used like: # tf.cond(v, true_fn, false_fn). From 6fe75e83928d99dfe6df9176e26a88d6afdfef17 Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Wed, 5 Aug 2020 21:07:02 -0700 Subject: [PATCH 2229/2522] Add RaggedTensorToSparse and DecodeRaw to flex delegate PiperOrigin-RevId: 325160099 Change-Id: I6fcd935790b361f8afdb085ed36f973f24cd7dbd --- tensorflow/core/kernels/BUILD | 2 ++ tensorflow/lite/delegates/flex/allowlisted_flex_ops.cc | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 12d4f1c5574..dfe9f35701c 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -6455,6 +6455,7 @@ filegroup( "cwise_op_xlog1py.cc", "cwise_op_xdivy.cc", "data_format_ops.cc", + "decode_raw_op.cc", "decode_wav_op.cc", "deep_conv2d.cc", "deep_conv2d.h", @@ -6542,6 +6543,7 @@ filegroup( "queue_op.cc", "queue_ops.cc", "ragged_range_op.cc", + "ragged_tensor_to_sparse_kernel.cc", "ragged_tensor_to_tensor_op.cc", "random_op.cc", "random_op_cpu.h", diff --git a/tensorflow/lite/delegates/flex/allowlisted_flex_ops.cc b/tensorflow/lite/delegates/flex/allowlisted_flex_ops.cc index e06410485ba..a932111b403 100644 --- a/tensorflow/lite/delegates/flex/allowlisted_flex_ops.cc +++ b/tensorflow/lite/delegates/flex/allowlisted_flex_ops.cc @@ -112,6 +112,7 @@ const std::set& GetFlexAllowlist() { "DebugGradientIdentity", "DebugGradientRefIdentity", "DecodeBase64", + "DecodeRaw", "DecodeWav", "DeepCopy", "DeleteSessionTensor", @@ -302,6 +303,7 @@ const std::set& GetFlexAllowlist() { "RFFT2D", "RFFT3D", "RaggedRange", + "RaggedTensorToSparse", "RaggedTensorToTensor", "RandomGamma", "RandomStandardNormal", From b9bb63da8fd92c3681a1a0608d54e0c7d44461c9 Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Wed, 5 Aug 2020 21:36:20 -0700 Subject: [PATCH 2230/2522] Add canonicalization for unpacking and repacking the same tuple (e.g. tuple -> get_tuple_element -> tuple). These unpacking and repacking of tuples may be generated when modifying tuple arguments or results. PiperOrigin-RevId: 325162694 Change-Id: Ia58f19e7f20caebe1235f0cb1b86f90f11a25aef --- .../mlir-hlo/Dialect/mhlo/IR/hlo_ops.td | 1 + .../mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc | 40 +++++++++++++++++++ .../compiler/mlir/hlo/tests/canonicalize.mlir | 22 ++++++++++ 3 files changed, 63 insertions(+) diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td index e83bf874c62..4c09c209bd1 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td @@ -671,6 +671,7 @@ def HLO_TupleOp : HLO_Op<"tuple", [NoSideEffect]>, BASE_HLO_TupleOp { "OpBuilder &builder, OperationState &results, " "ValueRange values">]; + let hasCanonicalizer = 1; } def HLO_CompareOp: HLO_Op<"compare", diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc index 69b01009a0d..de3f950c300 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc @@ -506,6 +506,46 @@ static LogicalResult Verify(TupleOp op) { return success(); } +namespace { + +// Pattern for unpacking and repacking the same tuple. +struct UnpackRepackSameTuple : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(TupleOp op, + PatternRewriter& rewriter) const override { + if (op.val().empty()) return failure(); + + Value first_element = op.val().front(); + auto first_element_op = + dyn_cast_or_null(first_element.getDefiningOp()); + if (!first_element_op || first_element_op.indexAttr().getInt() != 0) + return failure(); + + Value tuple_predecessor = first_element_op.getOperand(); + if (tuple_predecessor.getType() != op.getType()) return failure(); + + for (auto element_and_idx : llvm::enumerate(op.val().drop_front(1))) { + auto element_op = dyn_cast_or_null( + element_and_idx.value().getDefiningOp()); + if (!element_op || + element_op.indexAttr().getInt() != element_and_idx.index() + 1 || + element_op.getOperand() != tuple_predecessor) + return failure(); + } + + rewriter.replaceOp(op, tuple_predecessor); + return success(); + } +}; + +} // namespace + +void TupleOp::getCanonicalizationPatterns(OwningRewritePatternList& results, + MLIRContext* context) { + results.insert(context); +} + //===----------------------------------------------------------------------===// // AllToAllOp //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/hlo/tests/canonicalize.mlir b/tensorflow/compiler/mlir/hlo/tests/canonicalize.mlir index f0fe52266f0..e793e213e50 100644 --- a/tensorflow/compiler/mlir/hlo/tests/canonicalize.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/canonicalize.mlir @@ -561,3 +561,25 @@ func @dce_while_without_side_effect(%arg0: tensor) -> tensor { return %arg0 : tensor } + +// CHECK-LABEL: unpack_repack_same_tuple +// CHECK-SAME: ([[ARG0:%.*]]: tuple, !mhlo.token, tensor>) +func @unpack_repack_same_tuple(%arg0: tuple, !mhlo.token, tensor>) -> tuple, !mhlo.token, tensor> { + %0 = "mhlo.get_tuple_element"(%arg0) {index = 0 : i32} : (tuple, !mhlo.token, tensor>) -> tensor + %1 = "mhlo.get_tuple_element"(%arg0) {index = 1 : i32} : (tuple, !mhlo.token, tensor>) -> !mhlo.token + %2 = "mhlo.get_tuple_element"(%arg0) {index = 2 : i32} : (tuple, !mhlo.token, tensor>) -> tensor + %3 = "mhlo.tuple"(%0, %1, %2) : (tensor, !mhlo.token, tensor) -> tuple, !mhlo.token, tensor> + + // CHECK: return [[ARG0]] + return %3 : tuple, !mhlo.token, tensor> +} + +// CHECK-LABEL: unpack_repack_same_tuple_single_element +// CHECK-SAME: ([[ARG0:%.*]]: tuple>) +func @unpack_repack_same_tuple_single_element(%arg0: tuple>) -> tuple> { + %0 = "mhlo.get_tuple_element"(%arg0) {index = 0 : i32} : (tuple>) -> tensor + %3 = "mhlo.tuple"(%0) : (tensor) -> tuple> + + // CHECK: return [[ARG0]] + return %3 : tuple> +} From 5b06bfab0412e8cca09ef1c38cc9e135e087936d Mon Sep 17 00:00:00 2001 From: HyoukJoong Lee Date: Wed, 5 Aug 2020 21:43:03 -0700 Subject: [PATCH 2231/2522] Enable Min/Max scatter update function for SPMD partitioning PiperOrigin-RevId: 325163313 Change-Id: I67a210077a4a43dd67de5d4625bac64fd7d09941 --- .../xla/service/spmd/spmd_partitioner.cc | 12 ++++++- .../xla/service/spmd/spmd_partitioner_test.cc | 34 +++++++++++++++++++ .../xla/service/spmd/spmd_partitioner_util.cc | 17 ++++++++++ .../xla/service/spmd/spmd_partitioner_util.h | 4 +++ 4 files changed, 66 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc index 2d76966a494..906303e1a3c 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc @@ -1209,6 +1209,16 @@ Status SpmdPartitioningVisitor::HandleScatter(HloInstruction* hlo) { case HloOpcode::kAnd: identity = CreateOne(operand.hlo()->shape(), &b_); break; + case HloOpcode::kMinimum: + identity = CreateConstant( + operand.hlo()->shape(), + LiteralUtil::MaxValue(hlo->shape().element_type()), &b_); + break; + case HloOpcode::kMaximum: + identity = CreateConstant( + operand.hlo()->shape(), + LiteralUtil::MinValue(hlo->shape().element_type()), &b_); + break; default: return DefaultAction(hlo); } @@ -1226,7 +1236,7 @@ Status SpmdPartitioningVisitor::HandleScatter(HloInstruction* hlo) { CHECK(new_updates_sharding.has_value()); updates = updates.Reshard(*new_updates_sharding); // To avoid accumulating the initial operand multiple times during - // all-reduce, we use zero operands for all non-zero partitions. + // all-reduce, we use identity operands for all non-zero partitions. auto not_partition_zero = b_.AddInstruction(HloInstruction::CreateConvert( ShapeUtil::MakeScalarShape(PRED), partition_id_)); not_partition_zero = b_.AddInstruction(HloInstruction::CreateBroadcast( diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc index d5342e3e1f4..04641f2f463 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc @@ -3822,6 +3822,40 @@ ENTRY entry { op::Shape("f32[2,9,8]"))); } +TEST_F(SpmdPartitioningTest, IndexPassthroughScatter_Min) { + const char* const hlo_string = R"( +HloModule module + +min (lhs: f32[], rhs: f32[]) -> f32[] { + lhs = f32[] parameter(0) + rhs = f32[] parameter(1) + ROOT min = f32[] minimum(lhs, rhs) +} + +ENTRY entry { + %input = f32[2,9,8] parameter(0), sharding={replicated} + %indices = s32[4,2,4] parameter(1), sharding={devices=[2,1,2]0,1,2,3} + %updates = f32[4,4,8] parameter(2), sharding={devices=[2,2,1]0,1,2,3} + ROOT %scatter = f32[2,9,8] scatter(%input, %indices, %updates), + to_apply=min, + update_window_dims={2}, + inserted_window_dims={0,1}, + scatter_dims_to_operand_dims={0,1}, + index_vector_dim=1, sharding={replicated} +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/4)); + VLOG(1) << module->ToString(); + HloInstruction* root = module->entry_computation()->root_instruction(); + EXPECT_THAT( + root, + AllOf(op::AllReduce(op::Scatter( + op::Select(op::Broadcast(op::Convert(op::PartitionId())), + op::Broadcast(op::Constant()), op::Parameter(0)), + op::Parameter(1), op::Parameter(2))), + op::Shape("f32[2,9,8]"))); +} + TEST_F(SpmdPartitioningTest, ScatterPartitionedOnTrivialSliceDims) { const char* const hlo_string = R"( HloModule module diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc index 29def16f89d..8f94a90de8e 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc @@ -47,6 +47,23 @@ bool HasReplicatedSharding(const HloSharding& sharding) { return sharding.IsReplicated(); } +HloInstruction* CreateConstant(const Shape& shape, Literal value, + SpmdBuilder* b) { + if (shape.IsTuple()) { + std::vector elements; + for (int64 i = 0; i < ShapeUtil::TupleElementCount(shape); ++i) { + elements.push_back(CreateConstant( + ShapeUtil::GetTupleElementShape(shape, i), value.Clone(), b)); + } + return b->AddInstruction(HloInstruction::CreateTuple(elements)); + } + + CHECK( + ShapeUtil::IsScalarWithElementType(value.shape(), shape.element_type())); + auto c = b->AddInstruction(HloInstruction::CreateConstant(std::move(value))); + return b->AddInstruction(HloInstruction::CreateBroadcast(shape, c, {})); +} + HloInstruction* CreateZero(const Shape& shape, SpmdBuilder* b) { if (shape.IsTuple()) { std::vector elements; diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h index 10b630e31ee..e8b51567359 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h @@ -33,6 +33,10 @@ namespace spmd { // Returns true if the given sharding contains any replicated sharding. bool HasReplicatedSharding(const HloSharding& sharding); +// Creates constant value instructions of the given shape. The literal must be a +// scalar shape and is broadcast to the given shape. +HloInstruction* CreateConstant(const Shape& shape, Literal value, + SpmdBuilder* b); // Creates zero value instructions of the given shape. HloInstruction* CreateZero(const Shape& shape, SpmdBuilder* b); From 5580bd506616605b9651fc051ca92a6387458b82 Mon Sep 17 00:00:00 2001 From: YoungSeok Yoon Date: Wed, 5 Aug 2020 23:09:23 -0700 Subject: [PATCH 2232/2522] Correctly strip header inclusion path for Metal delegate This change also consolidates the header inclusion path stripping logic into a predefined macro. PiperOrigin-RevId: 325171594 Change-Id: I8f79d230a14f3687f15b543a4b135627bbd5a2fc --- tensorflow/lite/experimental/ios/BUILD.apple | 55 ++++++-------------- tensorflow/lite/experimental/ios/ios.bzl | 30 ++++++++++- 2 files changed, 46 insertions(+), 39 deletions(-) diff --git a/tensorflow/lite/experimental/ios/BUILD.apple b/tensorflow/lite/experimental/ios/BUILD.apple index b4df93088b3..99ea2f8acbb 100644 --- a/tensorflow/lite/experimental/ios/BUILD.apple +++ b/tensorflow/lite/experimental/ios/BUILD.apple @@ -1,7 +1,12 @@ # TensorFlow Lite for iOS load("@bazel_skylib//rules:build_test.bzl", "build_test") -load("//tensorflow/lite/experimental/ios:ios.bzl", "TFL_MINIMUM_OS_VERSION", "tflite_ios_static_framework") +load( + "//tensorflow/lite/experimental/ios:ios.bzl", + "TFL_MINIMUM_OS_VERSION", + "strip_common_include_path_prefix", + "tflite_ios_static_framework", +) load("@build_bazel_rules_apple//apple:ios.bzl", "ios_static_framework") package( @@ -18,31 +23,14 @@ sh_binary( ], ) -# When the static framework is built with bazel, the all header files are moved -# to the "Headers" directory with no header path prefixes. This auxiliary rule -# is used for stripping the path prefix to the "common.h" file included by the -# "c_api.h" header. -genrule( - name = "strip_c_api_include_hdr", - srcs = ["//tensorflow/lite/c:c_api.h"], - outs = ["c_api.h"], - cmd = """ - sed 's|#include ".*common.h"|#include "common.h"|'\ - "$(location //tensorflow/lite/c:c_api.h)"\ - > "$@" - """, -) - -# Similar rule as above, but for the "xnnpack_delegate.h" header. -genrule( - name = "strip_xnnpack_include_hdr", - srcs = ["//tensorflow/lite/delegates/xnnpack:xnnpack_delegate.h"], - outs = ["xnnpack_delegate.h"], - cmd = """ - sed 's|#include ".*common.h"|#include "common.h"|'\ - "$(location //tensorflow/lite/delegates/xnnpack:xnnpack_delegate.h)"\ - > "$@" - """, +strip_common_include_path_prefix( + name = "strip_common_include_path", + hdr_labels = [ + "//tensorflow/lite/c:c_api.h", + "//tensorflow/lite/delegates/gpu:metal_delegate.h", + "//tensorflow/lite/delegates/xnnpack:xnnpack_delegate.h", + "//tensorflow/lite/experimental/delegates/coreml:coreml_delegate.h", + ], ) # bazel build -c opt --config=ios_fat //tensorflow/lite/experimental/ios:TensorFlowLiteC_framework @@ -79,17 +67,6 @@ ios_static_framework( ], ) -genrule( - name = "strip_coreml_include_hdr", - srcs = ["//tensorflow/lite/experimental/delegates/coreml:coreml_delegate.h"], - outs = ["coreml_delegate.h"], - cmd = """ - sed 's|#include ".*common.h"|#include "TensorFlowLiteC/common.h"|'\ - "$(location //tensorflow/lite/experimental/delegates/coreml:coreml_delegate.h)"\ - > "$@" - """, -) - # This target builds the Core ML delegate as a separate static framework, which # does not include the TensorFlow Lite runtime. As this target does not contain # TensorFlow Lite runtime, it is intended to be linked along with the @@ -100,6 +77,7 @@ tflite_ios_static_framework( name = "TensorFlowLiteCCoreML_framework", hdrs = [ ":coreml_delegate.h", + "//tensorflow/lite/c:common.h", ], allowlist_symbols_file = ":allowlist_TensorFlowLiteCCoreML.txt", bundle_name = "TensorFlowLiteCCoreML", @@ -118,7 +96,8 @@ tflite_ios_static_framework( tflite_ios_static_framework( name = "TensorFlowLiteCMetal_framework", hdrs = [ - "//tensorflow/lite/delegates/gpu:metal_delegate.h", + ":metal_delegate.h", + "//tensorflow/lite/c:common.h", ], allowlist_symbols_file = ":allowlist_TensorFlowLiteCMetal.txt", bundle_name = "TensorFlowLiteCMetal", diff --git a/tensorflow/lite/experimental/ios/ios.bzl b/tensorflow/lite/experimental/ios/ios.bzl index 1125e85f3d6..43ca6ec6010 100644 --- a/tensorflow/lite/experimental/ios/ios.bzl +++ b/tensorflow/lite/experimental/ios/ios.bzl @@ -17,7 +17,7 @@ TFL_DISABLED_SANITIZER_TAGS = [ "notsan", ] -# iOS static framework with symbol allowlist. Exported C++ symbbols might cause +# iOS static framework with symbol allowlist. Exported C++ symbols might cause # symbol collision with other libraries. List of symbols to allowlist can be # generated by running `nm -m -g FRAMEWORK_LIBRARY | grep _TfLite` for framework # built with `ios_static_framework` rule. @@ -71,3 +71,31 @@ def tflite_ios_static_framework( "//tensorflow/lite/experimental/ios:hide_symbols_with_allowlist", ], ) + +# When the static framework is built with bazel, the all header files are moved +# to the "Headers" directory with no header path prefixes. This auxiliary rule +# is used for stripping the path prefix to the "common.h" file included by the +# "c_api.h" header. +def strip_common_include_path_prefix(name, hdr_labels): + """Create modified header files with the common.h include path stripped out. + + Args: + name: The name to be used as a prefix to the generated genrules. + hdr_labels: List of header labels to strip out the include path. Each + label must end with a colon followed by the header file name. + """ + + for hdr_label in hdr_labels: + hdr_filename = hdr_label.split(":")[-1] + hdr_basename = hdr_filename.split(".")[0] + + native.genrule( + name = "{}_{}".format(name, hdr_basename), + srcs = [hdr_label], + outs = [hdr_filename], + cmd = """ + sed 's|#include ".*common.h"|#include "common.h"|'\ + "$(location {})"\ + > "$@" + """.format(hdr_label), + ) From 9ae671b9c7306d65250fa32b1cf808524f458bfc Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Wed, 5 Aug 2020 23:25:14 -0700 Subject: [PATCH 2233/2522] MultiProcessRunner: Make MultiProcessRunnerResult available in the exception re-raised from subprocesses. PiperOrigin-RevId: 325172785 Change-Id: If910cb898b6f5c0c9c532f789533bb5b9d15bd95 --- tensorflow/python/distribute/multi_process_runner.py | 7 ++++++- tensorflow/python/distribute/multi_process_runner_test.py | 8 ++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/distribute/multi_process_runner.py b/tensorflow/python/distribute/multi_process_runner.py index 028a7aad171..b36c8e978b4 100644 --- a/tensorflow/python/distribute/multi_process_runner.py +++ b/tensorflow/python/distribute/multi_process_runner.py @@ -541,6 +541,8 @@ class MultiProcessRunner(object): for process_status in process_statuses.values(): assert isinstance(process_status, _ProcessStatusInfo) if not process_status.is_successful: + process_status.exc_info[1].mpr_result = self._get_mpr_result( + process_statuses) six.reraise(*process_status.exc_info) def join(self, timeout=_DEFAULT_TIMEOUT_SEC): @@ -583,7 +585,10 @@ class MultiProcessRunner(object): is not `None`, it is expected that some subprocesses may be force-killed when `max_run_time` is up, and this is raised in those cases. - Exception: if there is an Exception propagated from any subprocess. + Exception: if there is an Exception propagated from any subprocess. When + this is raised, a `MultiProcessRunnerResult` object can be retrieved by + `UnexpectedSubprocessExitError`'s mpr_result attribute, which has the + same structure as above 'Returns' section describes. """ with self._process_lock: if self._joined: diff --git a/tensorflow/python/distribute/multi_process_runner_test.py b/tensorflow/python/distribute/multi_process_runner_test.py index 8d8099740a3..7c1364b7d7c 100644 --- a/tensorflow/python/distribute/multi_process_runner_test.py +++ b/tensorflow/python/distribute/multi_process_runner_test.py @@ -406,16 +406,20 @@ class MultiProcessRunnerTest(test.TestCase): def test_auto_restart_and_timeout(self): def proc_func(): + logging.info('Running') time.sleep(1) raise ValueError mpr = multi_process_runner.MultiProcessRunner( proc_func, multi_worker_test_base.create_cluster_spec(num_workers=1), - auto_restart=True) + auto_restart=True, + list_stdout=True) mpr.start() - with self.assertRaises(ValueError): + with self.assertRaises(ValueError) as cm: mpr.join(timeout=10) + self.assertGreater( + sum(['Running' in msg for msg in cm.exception.mpr_result.stdout]), 1) def test_auto_restart_and_chief(self): # If the chief has exited with zero exit code, auto restart should stop From 09e5c6387942af88cf7fcc7726593d1ecaa6c987 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 5 Aug 2020 23:27:48 -0700 Subject: [PATCH 2234/2522] Add CMake files and lit configurations, enough for `ninja check-mlir-hlo` to pass on all the tests PiperOrigin-RevId: 325172984 Change-Id: I43f95f02ec4df3233ddddb5c1aa34d303f7c5ef3 --- tensorflow/compiler/mlir/hlo/CMakeLists.txt | 94 +++++++++++ .../mlir/hlo/include/mlir-hlo/CMakeLists.txt | 16 ++ .../include/mlir-hlo/Dialect/CMakeLists.txt | 16 ++ .../mlir-hlo/Dialect/mhlo/CMakeLists.txt | 17 ++ .../mlir-hlo/Dialect/mhlo/IR/CMakeLists.txt | 31 ++++ .../Dialect/mhlo/transforms/CMakeLists.txt | 23 +++ .../compiler/mlir/hlo/lib/CMakeLists.txt | 17 ++ .../mlir/hlo/lib/Dialect/CMakeLists.txt | 16 ++ .../mlir/hlo/lib/Dialect/mhlo/CMakeLists.txt | 17 ++ .../hlo/lib/Dialect/mhlo/IR/CMakeLists.txt | 82 +++++++++ .../Dialect/mhlo/transforms/CMakeLists.txt | 155 ++++++++++++++++++ .../mlir/hlo/lib/utils/CMakeLists.txt | 25 +++ .../compiler/mlir/hlo/tests/CMakeLists.txt | 36 ++++ tensorflow/compiler/mlir/hlo/tests/lit.cfg.py | 82 +++++++++ .../mlir/hlo/tests/lit.site.cfg.py.in | 50 ++++++ .../compiler/mlir/hlo/tools/CMakeLists.txt | 16 ++ .../hlo/tools/mlir-hlo-opt/CMakeLists.txt | 32 ++++ 17 files changed, 725 insertions(+) create mode 100644 tensorflow/compiler/mlir/hlo/CMakeLists.txt create mode 100644 tensorflow/compiler/mlir/hlo/include/mlir-hlo/CMakeLists.txt create mode 100644 tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/CMakeLists.txt create mode 100644 tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/CMakeLists.txt create mode 100644 tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/CMakeLists.txt create mode 100644 tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/CMakeLists.txt create mode 100644 tensorflow/compiler/mlir/hlo/lib/CMakeLists.txt create mode 100644 tensorflow/compiler/mlir/hlo/lib/Dialect/CMakeLists.txt create mode 100644 tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/CMakeLists.txt create mode 100644 tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/CMakeLists.txt create mode 100644 tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/CMakeLists.txt create mode 100644 tensorflow/compiler/mlir/hlo/lib/utils/CMakeLists.txt create mode 100644 tensorflow/compiler/mlir/hlo/tests/CMakeLists.txt create mode 100644 tensorflow/compiler/mlir/hlo/tests/lit.cfg.py create mode 100644 tensorflow/compiler/mlir/hlo/tests/lit.site.cfg.py.in create mode 100644 tensorflow/compiler/mlir/hlo/tools/CMakeLists.txt create mode 100644 tensorflow/compiler/mlir/hlo/tools/mlir-hlo-opt/CMakeLists.txt diff --git a/tensorflow/compiler/mlir/hlo/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/CMakeLists.txt new file mode 100644 index 00000000000..c4e2ea123df --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/CMakeLists.txt @@ -0,0 +1,94 @@ +# +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +cmake_minimum_required(VERSION 3.13.4) + +if(POLICY CMP0068) + cmake_policy(SET CMP0068 NEW) + set(CMAKE_BUILD_WITH_INSTALL_NAME_DIR ON) +endif() + +if(POLICY CMP0075) + cmake_policy(SET CMP0075 NEW) +endif() + +if(POLICY CMP0077) + cmake_policy(SET CMP0077 NEW) +endif() + +#------------------------------------------------------------------------------- +# Project setup and globals +#------------------------------------------------------------------------------- + +project(mlir-hlo LANGUAGES CXX C) +set(CMAKE_C_STANDARD 11) +set(CMAKE_CXX_STANDARD 14) +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules") + +#------------------------------------------------------------------------------- +# Options and settings +#------------------------------------------------------------------------------- + +#------------------------------------------------------------------------------- +# MSVC defaults +#------------------------------------------------------------------------------- + +if(MSVC) + add_compile_options( + $<$:/MD> + $<$:/MD> + $<$:/MD> + ) +endif() + +#------------------------------------------------------------------------------- +# MLIR/LLVM Configuration +#------------------------------------------------------------------------------- + +find_package(MLIR REQUIRED CONFIG) +message(STATUS "Using MLIRConfig.cmake in: ${MLIR_DIR}") +message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") +list(APPEND CMAKE_MODULE_PATH "${MLIR_CMAKE_DIR}") +list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}") + +if(LLVM_ENABLE_ZLIB) + find_package(ZLIB) +endif() + +include(TableGen) +include(AddLLVM) +include(AddMLIR) +include(HandleLLVMOptions) +include_directories(${LLVM_INCLUDE_DIRS}) +include_directories(${MLIR_INCLUDE_DIRS}) +include_directories(${PROJECT_SOURCE_DIR}/include) +include_directories(${PROJECT_BINARY_DIR}/include) +include_directories(${PROJECT_BINARY_DIR}/) +link_directories(${LLVM_BUILD_LIBRARY_DIR}) +add_definitions(${LLVM_DEFINITIONS}) + +#------------------------------------------------------------------------------- +# Directory setup +#------------------------------------------------------------------------------- + +set(MLIR_HLO_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(MLIR_HLO_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) + +add_custom_target(check-mlir-hlo) + +add_subdirectory(include/mlir-hlo) +add_subdirectory(lib) +add_subdirectory(tools) +add_subdirectory(tests) diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/CMakeLists.txt new file mode 100644 index 00000000000..92759d76383 --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/CMakeLists.txt @@ -0,0 +1,16 @@ +# +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +add_subdirectory(Dialect) diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/CMakeLists.txt new file mode 100644 index 00000000000..5ee1a1924ec --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/CMakeLists.txt @@ -0,0 +1,16 @@ +# +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +add_subdirectory(mhlo) diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/CMakeLists.txt new file mode 100644 index 00000000000..e138afa587f --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/CMakeLists.txt @@ -0,0 +1,17 @@ +# +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +add_subdirectory(IR) +add_subdirectory(transforms) diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/CMakeLists.txt new file mode 100644 index 00000000000..09bdca84cd3 --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/CMakeLists.txt @@ -0,0 +1,31 @@ +# +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Need a separate function because of the .cc vs .cpp used in the one provided by MLIR +function(add_mlir_hlo_dialect dialect dialect_namespace) + set(LLVM_TARGET_DEFINITIONS ${dialect}.td) + mlir_tablegen(${dialect}.h.inc -gen-op-decls) + mlir_tablegen(${dialect}.cc.inc -gen-op-defs) + mlir_tablegen(${dialect}_structs.h.inc -gen-struct-attr-decls) + mlir_tablegen(${dialect}_structs.cc.inc -gen-struct-attr-defs) + add_public_tablegen_target(MLIR${dialect}IncGen) + add_dependencies(mlir-headers MLIR${dialect}IncGen) +endfunction() + +add_mlir_hlo_dialect(chlo_ops chlo) +add_mlir_hlo_dialect(hlo_ops mhlo) +add_mlir_hlo_dialect(lhlo_ops lmhlo) + +add_mlir_interface(infer_fusibility_op_interface) diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/CMakeLists.txt new file mode 100644 index 00000000000..6fbc5306a8f --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/CMakeLists.txt @@ -0,0 +1,23 @@ +# +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set(LLVM_TARGET_DEFINITIONS mhlo_passes.td) +mlir_tablegen(mhlo_passes.h.inc -gen-pass-decls) +add_public_tablegen_target(MLIRMhloPassIncGen) + +set(LLVM_TARGET_DEFINITIONS lmhlo_passes.td) +mlir_tablegen(lmhlo_passes.h.inc -gen-pass-decls) +add_public_tablegen_target(MLIRLmhloPassIncGen) diff --git a/tensorflow/compiler/mlir/hlo/lib/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/lib/CMakeLists.txt new file mode 100644 index 00000000000..ec65a5ee882 --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/lib/CMakeLists.txt @@ -0,0 +1,17 @@ +# +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +add_subdirectory(Dialect) +add_subdirectory(utils) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/lib/Dialect/CMakeLists.txt new file mode 100644 index 00000000000..5ee1a1924ec --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/CMakeLists.txt @@ -0,0 +1,16 @@ +# +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +add_subdirectory(mhlo) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/CMakeLists.txt new file mode 100644 index 00000000000..e138afa587f --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/CMakeLists.txt @@ -0,0 +1,17 @@ +# +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +add_subdirectory(IR) +add_subdirectory(transforms) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/CMakeLists.txt new file mode 100644 index 00000000000..d7bb5057b00 --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/CMakeLists.txt @@ -0,0 +1,82 @@ +# +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +include_directories(BEFORE + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}) + +set(LLVM_TARGET_DEFINITIONS hlo_patterns.td) +mlir_tablegen(hlo_patterns.cc.inc -gen-rewriters) +add_public_tablegen_target(MLIRMhloRewriterIncGen) + +set(LLVM_TARGET_DEFINITIONS mhlo_canonicalize.td) +mlir_tablegen(mhlo_canonicalize.inc -gen-rewriters) +add_public_tablegen_target(MLIRMhloCanonicalizeIncGen) + +add_mlir_dialect_library(ChloDialect + chlo_ops.cc + + DEPENDS + MLIRchlo_opsIncGen +) +target_link_libraries(ChloDialect PUBLIC MLIRIR) + +add_mlir_library(MhloInferFusibilityOpInterface + infer_fusibility_op_interface.cc + + DEPENDS + MLIRinfer_fusibility_op_interfaceIncGen +) + + +add_mlir_dialect_library(MhloDialect + hlo_ops.cc + + DEPENDS + MLIRhlo_opsIncGen + MLIRMhloCanonicalizeIncGen + MLIRMhloRewriterIncGen + MLIRinfer_fusibility_op_interfaceIncGen +) +target_link_libraries(MhloDialect + PUBLIC + MLIRIR + MhloInferFusibilityOpInterface + MLIRMhloUtils +) + + +add_mlir_dialect_library(LmhloDialect + lhlo_ops.cc + + DEPENDS + MLIRlhlo_opsIncGen +) +target_link_libraries(LmhloDialect PUBLIC MLIRIR) + + +add_mlir_dialect_library(MhloRegisterDialects + init.cc +DEPENDS + MLIRchlo_opsIncGen + MLIRhlo_opsIncGen + MLIRlhlo_opsIncGen +) +target_link_libraries(MhloRegisterDialects + PUBLIC + ChloDialect + MhloDialect + LmhloDialect +) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/CMakeLists.txt new file mode 100644 index 00000000000..bb9f98d32d3 --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/CMakeLists.txt @@ -0,0 +1,155 @@ +# +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +include_directories(BEFORE + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}) + +set(LLVM_TARGET_DEFINITIONS lower_complex_patterns.td) +mlir_tablegen(generated_lower_complex.inc -gen-rewriters) +add_public_tablegen_target(MLIRMhloLowerComplexIncGen) + +set(LLVM_TARGET_DEFINITIONS legalize_to_standard_patterns.td) +mlir_tablegen(generated_legalize_to_standard.inc -gen-rewriters) +add_public_tablegen_target(MLIRMhloLegalizeToStandardIncGen) + + +add_mlir_library(ChloPasses + chlo_legalize_to_hlo.cc + chlo_legalize_to_hlo_pass.cc + + DEPENDS + MLIRhlo_opsIncGen + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC + ChloDialect + MLIRIR + MLIRPass +) + +add_mlir_library(MhloPasses + legalize_gather_to_torch_index_select.cc + legalize_tanh_to_approximation.cc + lower_complex.cc + lower_complex_patterns.td + lower_general_dot.cc + materialize_broadcasts.cc + materialize_broadcasts_pass.cc + mhlo_fusion.cc + optimize_mhlo.cc + optimize_mhlo_pass.cc + sink_constants_to_control_flow.cc + test_infer_shaped_type_pass.cc + transform_unranked_hlo.cc + unfuse_batch_norm.cc + unfuse_batch_norm_pass.cc + + DEPENDS + MLIRhlo_opsIncGen + MLIRMhloLowerComplexIncGen + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC + MLIRIR + MLIRMhloUtils + MLIRPass + MLIRTransformUtils +) + +add_mlir_library(MhloToLhloConversion + hlo_legalize_to_lhlo.cc + + DEPENDS + MLIRhlo_opsIncGen + MLIRlhlo_opsIncGen + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC + MhloDialect + LmhloDialect + MLIRIR + MLIRPass +) + +add_mlir_library(MhloToStandard + legalize_control_flow.cc + legalize_to_standard.cc + + DEPENDS + MLIRhlo_opsIncGen + MLIRlhlo_opsIncGen + MLIRMhloLegalizeToStandardIncGen + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC + MLIRIR + MLIRPass +) + +add_mlir_library(MhloLhloToLinalg + legalize_to_linalg.cc + + DEPENDS + MLIRhlo_opsIncGen + MLIRlhlo_opsIncGen + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC + MhloDialect + MLIRIR + MLIRPass +) + +add_mlir_library(LmhloPasses + lhlo_copy_removal.cc + lhlo_fuse_linalg.cc + lhlo_legalize_to_affine.cc + lhlo_legalize_to_gpu.cc + lhlo_legalize_to_llvm.cc + lhlo_legalize_to_llvm_pass.cc + lhlo_legalize_to_parallel_loops.cc + + DEPENDS + MLIRlhlo_opsIncGen + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC + LmhloDialect + MLIRIR + MLIRPass +) + +add_library(AllMhloPasses INTERFACE) +target_link_libraries(AllMhloPasses INTERFACE + ChloPasses + MhloPasses + MhloToLhloConversion + MhloToStandard + MhloLhloToLinalg + LmhloPasses +) diff --git a/tensorflow/compiler/mlir/hlo/lib/utils/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/lib/utils/CMakeLists.txt new file mode 100644 index 00000000000..17e86f1caa8 --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/lib/utils/CMakeLists.txt @@ -0,0 +1,25 @@ +# +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +add_mlir_library(MLIRMhloUtils + broadcast_utils.cc + convert_op_folder.cc + cycle_detector.cc + hlo_utils.cc + + LINK_LIBS PUBLIC + MLIRSupport + ) diff --git a/tensorflow/compiler/mlir/hlo/tests/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/tests/CMakeLists.txt new file mode 100644 index 00000000000..36a7eec5a1f --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/tests/CMakeLists.txt @@ -0,0 +1,36 @@ +# +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py + MAIN_CONFIG + ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py +) + +set(MLIR_HLO_TEST_DEPENDS + FileCheck count not + mlir-hlo-opt +) + +add_lit_testsuite(check-mlir-hlo-lit "Running the mlir-hlo regression tests" + ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS ${MLIR_HLO_TEST_DEPENDS} + ) +set_target_properties(check-mlir-hlo-lit PROPERTIES FOLDER "Tests") + +add_lit_testsuites(MLIR_HLO_OPT ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${MLIR_HLO_TEST_DEPENDS}) + +add_dependencies(check-mlir-hlo check-mlir-hlo-lit) diff --git a/tensorflow/compiler/mlir/hlo/tests/lit.cfg.py b/tensorflow/compiler/mlir/hlo/tests/lit.cfg.py new file mode 100644 index 00000000000..f81d47a76cd --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/tests/lit.cfg.py @@ -0,0 +1,82 @@ +"""Lit configuration to drive test in this repo.""" +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- Python -*- +# pylint: disable=undefined-variable + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +import lit.formats +from lit.llvm import llvm_config +from lit.llvm.subst import ToolSubst +import lit.util + +# Configuration file for the 'lit' test runner. + +# name: The name of this test suite. +config.name = 'MLIR_HLO_OPT' + +config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell) + +# suffixes: A list of file extensions to treat as test files. +config.suffixes = ['.mlir', '.mlir.py'] + +# test_source_root: The root path where tests are located. +config.test_source_root = os.path.dirname(__file__) + +# test_exec_root: The root path where tests should be run. +config.test_exec_root = os.path.join(config.mlir_hlo_obj_root, 'test') + +config.substitutions.append(('%PATH%', config.environment['PATH'])) +config.substitutions.append(('%shlibext', config.llvm_shlib_ext)) + +llvm_config.with_system_environment(['HOME', 'INCLUDE', 'LIB', 'TMP', 'TEMP']) + +llvm_config.use_default_substitutions() + +# excludes: A list of directories to exclude from the testsuite. The 'Inputs' +# subdirectories contain auxiliary inputs for various tests in their parent +# directories. +config.excludes = [ + 'Inputs', 'Examples', 'CMakeLists.txt', 'README.txt', 'LICENSE.txt' +] + +# test_source_root: The root path where tests are located. +config.test_source_root = os.path.dirname(__file__) + +# test_exec_root: The root path where tests should be run. +config.test_exec_root = os.path.join(config.mlir_hlo_obj_root, 'test') +config.mlir_hlo_tools_dir = os.path.join(config.mlir_hlo_obj_root, 'tools') + +# Tweak the PATH to include the tools dir. +llvm_config.with_environment('PATH', config.llvm_tools_dir, append_path=True) + +tool_dirs = [ + os.path.join(config.mlir_hlo_tools_dir, 'mlir-hlo-opt'), + config.llvm_tools_dir, +] +tools = [ + 'mlir-hlo-opt', + 'mlir-cpu-runner', + ToolSubst( + '%mlir_runner_utils_dir', + config.mlir_runner_utils_dir, + unresolved='ignore'), +] + +llvm_config.add_tool_substitutions(tools, tool_dirs) diff --git a/tensorflow/compiler/mlir/hlo/tests/lit.site.cfg.py.in b/tensorflow/compiler/mlir/hlo/tests/lit.site.cfg.py.in new file mode 100644 index 00000000000..17b99e983f6 --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/tests/lit.site.cfg.py.in @@ -0,0 +1,50 @@ +@LIT_SITE_CFG_IN_HEADER@ + +import sys + +config.host_triple = "@LLVM_HOST_TRIPLE@" +config.target_triple = "@TARGET_TRIPLE@" +config.llvm_src_root = "@LLVM_SOURCE_DIR@" +config.llvm_obj_root = "@LLVM_BINARY_DIR@" +config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" +config.llvm_lib_dir = "@LLVM_LIBRARY_DIR@" +config.llvm_shlib_dir = "@SHLIBDIR@" +config.llvm_shlib_ext = "@SHLIBEXT@" +config.llvm_exe_ext = "@EXEEXT@" +config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@" +config.python_executable = "@PYTHON_EXECUTABLE@" +config.gold_executable = "@GOLD_EXECUTABLE@" +config.ld64_executable = "@LD64_EXECUTABLE@" +config.enable_shared = @ENABLE_SHARED@ +config.enable_assertions = @ENABLE_ASSERTIONS@ +config.targets_to_build = "@TARGETS_TO_BUILD@" +config.native_target = "@LLVM_NATIVE_ARCH@" +config.llvm_bindings = "@LLVM_BINDINGS@".split(' ') +config.host_os = "@HOST_OS@" +config.host_cc = "@HOST_CC@" +config.host_cxx = "@HOST_CXX@" +# Note: ldflags can contain double-quoted paths, so must use single quotes here. +config.host_ldflags = '@HOST_LDFLAGS@' +config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" +config.llvm_host_triple = '@LLVM_HOST_TRIPLE@' +config.host_arch = "@HOST_ARCH@" +config.mlir_hlo_src_root = "@CMAKE_SOURCE_DIR@" +config.mlir_hlo_obj_root = "@CMAKE_BINARY_DIR@" +config.mlir_runner_utils_dir = os.path.join(config.llvm_obj_root, "lib") + +# Support substitution of the tools_dir with user parameters. This is +# used when we can't determine the tool dir at configuration time. +try: + config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params + config.llvm_shlib_dir = config.llvm_shlib_dir % lit_config.params +except KeyError: + e = sys.exc_info()[1] + key, = e.args + lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key)) + + +import lit.llvm +lit.llvm.initialize(lit_config, config) + +# Let the main config do the real work. +lit_config.load_config(config, "@CMAKE_SOURCE_DIR@/tests/lit.cfg.py") diff --git a/tensorflow/compiler/mlir/hlo/tools/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/tools/CMakeLists.txt new file mode 100644 index 00000000000..0f3d1c85795 --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/tools/CMakeLists.txt @@ -0,0 +1,16 @@ +# +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +add_subdirectory(mlir-hlo-opt) diff --git a/tensorflow/compiler/mlir/hlo/tools/mlir-hlo-opt/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/tools/mlir-hlo-opt/CMakeLists.txt new file mode 100644 index 00000000000..754469a3c84 --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/tools/mlir-hlo-opt/CMakeLists.txt @@ -0,0 +1,32 @@ +# +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) +get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS) +set(LIBS + ${dialect_libs} + ${conversion_libs} + MLIROptLib + + MhloRegisterDialects + AllMhloPasses + ) +add_llvm_executable(mlir-hlo-opt mlir-hlo-opt.cpp + DEPENDS + MLIRLmhloPassIncGen + MLIRMhloPassIncGen +) +llvm_update_compile_flags(mlir-hlo-opt) +target_link_libraries(mlir-hlo-opt PRIVATE ${LIBS}) From cee9a4fa4bdd3df36c7b36079a2f19c1e67b01c5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 5 Aug 2020 23:46:36 -0700 Subject: [PATCH 2235/2522] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 325174603 Change-Id: Id1317087233c835a86157620a5a0423c1d07a405 --- tensorflow/go/op/wrappers.go | 110 +++++++++++++++++------------------ 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 34ff57636ca..ea060f815c6 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -3660,43 +3660,6 @@ func TensorForestTreeIsInitializedOp(scope *Scope, tree_handle tf.Output) (is_in return op.Output(0) } -// TensorForestTreeResourceHandleOpAttr is an optional argument to TensorForestTreeResourceHandleOp. -type TensorForestTreeResourceHandleOpAttr func(optionalAttr) - -// TensorForestTreeResourceHandleOpContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func TensorForestTreeResourceHandleOpContainer(value string) TensorForestTreeResourceHandleOpAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// TensorForestTreeResourceHandleOpSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func TensorForestTreeResourceHandleOpSharedName(value string) TensorForestTreeResourceHandleOpAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Creates a handle to a TensorForestTreeResource -func TensorForestTreeResourceHandleOp(scope *Scope, optional ...TensorForestTreeResourceHandleOpAttr) (resource tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TensorForestTreeResourceHandleOp", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // AllCandidateSamplerAttr is an optional argument to AllCandidateSampler. type AllCandidateSamplerAttr func(optionalAttr) @@ -13369,6 +13332,24 @@ func ResizeArea(scope *Scope, images tf.Output, size tf.Output, optional ...Resi return op.Output(0) } +// Returns the number of work units this Reader has finished processing. +// +// Arguments: +// reader_handle: Handle to a Reader. +func ReaderNumWorkUnitsCompletedV2(scope *Scope, reader_handle tf.Output) (units_completed tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ReaderNumWorkUnitsCompletedV2", + Input: []tf.Input{ + reader_handle, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Returns up to `num_records` (key, value) pairs produced by a Reader. // // Will dequeue from the input queue if necessary (e.g. when the @@ -39085,24 +39066,6 @@ func ResourceApplyAddSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Outpu return scope.AddOperation(opspec) } -// Returns the number of work units this Reader has finished processing. -// -// Arguments: -// reader_handle: Handle to a Reader. -func ReaderNumWorkUnitsCompletedV2(scope *Scope, reader_handle tf.Output) (units_completed tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReaderNumWorkUnitsCompletedV2", - Input: []tf.Input{ - reader_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // FractionalMaxPoolAttr is an optional argument to FractionalMaxPool. type FractionalMaxPoolAttr func(optionalAttr) @@ -49735,6 +49698,43 @@ func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug(scope *Scope, param return scope.AddOperation(opspec) } +// TensorForestTreeResourceHandleOpAttr is an optional argument to TensorForestTreeResourceHandleOp. +type TensorForestTreeResourceHandleOpAttr func(optionalAttr) + +// TensorForestTreeResourceHandleOpContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func TensorForestTreeResourceHandleOpContainer(value string) TensorForestTreeResourceHandleOpAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// TensorForestTreeResourceHandleOpSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func TensorForestTreeResourceHandleOpSharedName(value string) TensorForestTreeResourceHandleOpAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Creates a handle to a TensorForestTreeResource +func TensorForestTreeResourceHandleOp(scope *Scope, optional ...TensorForestTreeResourceHandleOpAttr) (resource tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "TensorForestTreeResourceHandleOp", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // FusedResizeAndPadConv2DAttr is an optional argument to FusedResizeAndPadConv2D. type FusedResizeAndPadConv2DAttr func(optionalAttr) From e3ce476785e0fd71371858e766cdb9b2fa519cfb Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Thu, 6 Aug 2020 00:16:41 -0700 Subject: [PATCH 2236/2522] Support sentencepiece ops in flex delegate PiperOrigin-RevId: 325177655 Change-Id: I97fab846cafa230b0e15514cc2b8084ff70a5cd0 --- .../delegates/flex/allowlisted_flex_ops.cc | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/delegates/flex/allowlisted_flex_ops.cc b/tensorflow/lite/delegates/flex/allowlisted_flex_ops.cc index a932111b403..eefbeb72b15 100644 --- a/tensorflow/lite/delegates/flex/allowlisted_flex_ops.cc +++ b/tensorflow/lite/delegates/flex/allowlisted_flex_ops.cc @@ -576,10 +576,27 @@ bool IsAllowedTFTextOpForFlex(const std::string& op_name) { return tensorflow::OpRegistry::Global()->LookUp(op_name) != nullptr; } +// Allow the sentencepiece ops if they are registered in the global op registry. +bool IsAllowedSentencePieceOpForFlex(const std::string& op_name) { + static const std::set* sentencepiece_flex_ops = + new std::set({ + "SentencepieceGetPieceSize", + "SentencepiecePieceToId", + "SentencepieceIdToPiece", + "SentencepieceEncodeDense", + "SentencepieceEncodeSparse", + "SentencepieceDecode", + }); + if (sentencepiece_flex_ops->count(op_name) == 0) return false; + return tensorflow::OpRegistry::Global()->LookUp(op_name) != nullptr; +} + bool IsAllowlistedFlexOp(const std::string& tensorflow_op_name) { if (GetFlexAllowlist().count(tensorflow_op_name) != 0) return true; - // Check if the op is an allowlisted tf.text op. - return IsAllowedTFTextOpForFlex(tensorflow_op_name); + + // Check if the op is an allowlisted tf.text or sentencepiece op. + return IsAllowedTFTextOpForFlex(tensorflow_op_name) || + IsAllowedSentencePieceOpForFlex(tensorflow_op_name); } } // namespace flex From 3f49de584cb7a9de005457006132f5720cc14dd6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 6 Aug 2020 00:38:21 -0700 Subject: [PATCH 2237/2522] Internal change PiperOrigin-RevId: 325179961 Change-Id: I24304910b14b6972180f9b6ed68171c9ed694373 --- tensorflow/core/framework/tensor_testutil.cc | 2 - tensorflow/core/grappler/op_types.cc | 6 -- tensorflow/core/grappler/op_types.h | 1 - .../core/grappler/optimizers/remapper.cc | 89 ------------------- .../core/grappler/optimizers/remapper_test.cc | 52 ----------- .../core/grappler/utils/grappler_test.h | 2 +- .../core/kernels/cwise_op_equal_to_1.cc | 2 - tensorflow/core/kernels/cwise_op_greater.cc | 2 - .../core/kernels/cwise_op_greater_equal.cc | 2 - tensorflow/core/kernels/cwise_op_less.cc | 2 - .../core/kernels/cwise_op_less_equal.cc | 2 - .../core/kernels/cwise_op_not_equal_to_1.cc | 2 - tensorflow/core/kernels/cwise_ops.h | 26 ------ tensorflow/core/kernels/cwise_ops_test.cc | 74 ++++++++------- tensorflow/core/ops/math_ops.cc | 43 --------- 15 files changed, 36 insertions(+), 271 deletions(-) diff --git a/tensorflow/core/framework/tensor_testutil.cc b/tensorflow/core/framework/tensor_testutil.cc index 804d5df31ed..89a20b9a039 100644 --- a/tensorflow/core/framework/tensor_testutil.cc +++ b/tensorflow/core/framework/tensor_testutil.cc @@ -216,8 +216,6 @@ void ExpectClose(const Tensor& x, const Tensor& y, double atol, double rtol) { switch (x.dtype()) { case DT_HALF: return ExpectClose(x, y, atol, rtol); - case DT_BFLOAT16: - return ExpectClose(x, y, atol, rtol); case DT_FLOAT: return ExpectClose(x, y, atol, rtol); case DT_DOUBLE: diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index c5f53386465..9d30f24e047 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -142,12 +142,6 @@ bool IsCollective(const NodeDef& node) { node.op() == "CollectiveBcastRecv"; } -bool IsComparison(const NodeDef& node) { - return node.op() == "Equal" || node.op() == "NotEqual" || - node.op() == "GreaterEqual" || node.op() == "Greater" || - node.op() == "LessEqual" || node.op() == "Less"; -} - bool IsComplex(const NodeDef& node) { return node.op() == "Complex"; } bool IsComplexAbs(const NodeDef& node) { return node.op() == "ComplexAbs"; } diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index 94fecc56b97..141eda7415a 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -51,7 +51,6 @@ bool IsBroadcastTo(const NodeDef& node); bool IsCast(const NodeDef& node); bool IsCheckNumerics(const NodeDef& node); bool IsCollective(const NodeDef& node); -bool IsComparison(const NodeDef& node); bool IsComplex(const NodeDef& node); bool IsComplexAbs(const NodeDef& node); bool IsConcat(const NodeDef& node); diff --git a/tensorflow/core/grappler/optimizers/remapper.cc b/tensorflow/core/grappler/optimizers/remapper.cc index 4fe39a19d81..46c7afbc53a 100644 --- a/tensorflow/core/grappler/optimizers/remapper.cc +++ b/tensorflow/core/grappler/optimizers/remapper.cc @@ -92,15 +92,6 @@ struct FusedBatchNorm { int fused_batch_norm = kMissingIndex; }; -// Comparison op followed by a cast, e.g., GreaterEqual + Cast. -struct ComparisonWithCast { - ComparisonWithCast() = default; - - int comparison = kMissingIndex; - int cast = kMissingIndex; - string fused_op = "_"; -}; - // FusedBatchNorm[$is_training] with fused side input and/or activation. struct FusedBatchNormEx { FusedBatchNormEx() = default; @@ -928,43 +919,6 @@ bool FindFusedBatchNormEx(const RemapperContext& ctx, int node_index, return false; } -bool FindComparisonWithCast(const RemapperContext& ctx, int node_index, - ComparisonWithCast* matched) { - const auto* node_view = ctx.graph_view.GetNode(node_index); - const auto* node_def = node_view->node(); - - if (!IsCast(*node_def) || HasControlFaninOrFanout(*node_view)) return false; - if (!NodeIsOnCpu(node_def)) return false; - - if (node_view->NumRegularFanins() != 1) return false; - const auto& regular_fanin_0 = node_view->GetRegularFanin(0); - const auto* comparison = regular_fanin_0.node_view(); - const auto* comparison_node_def = comparison->node(); - if (!IsComparison(*comparison_node_def) || - HasControlFaninOrFanout(*comparison)) - return false; - if (!NodeIsOnCpu(comparison_node_def)) return false; - - DataType comparator_dtype = GetDataTypeFromAttr(*comparison_node_def, "T"); - DataType src_dtype = GetDataTypeFromAttr(*node_def, "SrcT"); - DataType dst_dtype = GetDataTypeFromAttr(*node_def, "DstT"); - - if ((comparator_dtype != DT_FLOAT) && (comparator_dtype != DT_BFLOAT16)) - return false; - if ((comparator_dtype != dst_dtype) || (src_dtype != DT_BOOL)) return false; - - // Check that only one node consumes the 0-th output of a comparison. - if (!HasAtMostOneDataFanoutAtPort0(*comparison) || - IsInPreserveSet(ctx, comparison_node_def)) - return false; - - matched->cast = node_index; - matched->comparison = regular_fanin_0.node_index(); - matched->fused_op = - matched->fused_op + comparison_node_def->op() + "WithCast"; - return true; -} - void CopyConv2DAttributes(const NodeDef& conv2d, NodeDef* fused_conv2d) { DCHECK(IsConv2D(conv2d)) << "Input node must be a Conv2D"; @@ -1416,40 +1370,6 @@ Status AddFusedBatchNormExNode(RemapperContext* ctx, return Status::OK(); } -Status AddComparisonWithCastNode(RemapperContext* ctx, - const ComparisonWithCast& matched, - std::vector* invalidated_nodes, - std::vector* nodes_to_delete) { - const GraphDef* graph = ctx->graph_view.graph(); - const NodeDef& comparison = graph->node(matched.comparison); - const NodeDef& cast = graph->node(matched.cast); - - VLOG(2) << "Fuse " << cast.op() << " with comparison:" - << " cast=" << cast.name() << " invalidated=" - << " comparison=" << comparison.name(); - - // Replace Comparison and Cast with ComparisonWithCast. - NodeDef fused_op; - fused_op.set_op(matched.fused_op); - fused_op.set_name(cast.name()); - fused_op.set_device(comparison.device()); - - fused_op.add_input(comparison.input(0)); - fused_op.add_input(comparison.input(1)); - (*fused_op.mutable_attr())["T"] = comparison.attr().at("T"); - - utils::Mutation* mutation = ctx->graph_view.GetMutationBuilder(); - Status status; - mutation->AddNode(std::move(fused_op), &status); - TF_RETURN_IF_ERROR(status); - TF_RETURN_IF_ERROR(mutation->Apply()); - - (*nodes_to_delete)[matched.comparison] = true; - (*invalidated_nodes)[matched.cast] = true; - - return Status::OK(); -} - Status AddBatchNormNodes(RemapperContext* ctx, const FusedBatchNorm& matched) { const GraphDef* graph = ctx->graph_view.graph(); const NodeDef& fused_node = graph->node(matched.fused_batch_norm); @@ -1914,15 +1834,6 @@ Status Remapper::Optimize(Cluster* cluster, const GrapplerItem& item, TF_RETURN_IF_ERROR(AddBatchNormNodes(&ctx, fused_batch_norm)); continue; } - - // Remap Comparison+Cast into the ComparisonWithCast. - ComparisonWithCast comparison_with_cast; - if (allow_non_differentiable_rewrites && - FindComparisonWithCast(ctx, i, &comparison_with_cast)) { - TF_RETURN_IF_ERROR(AddComparisonWithCastNode( - &ctx, comparison_with_cast, &invalidated_nodes, &nodes_to_delete)); - continue; - } } // Remove invalidated nodes. diff --git a/tensorflow/core/grappler/optimizers/remapper_test.cc b/tensorflow/core/grappler/optimizers/remapper_test.cc index 1aa8299ed13..f4bc5e38526 100644 --- a/tensorflow/core/grappler/optimizers/remapper_test.cc +++ b/tensorflow/core/grappler/optimizers/remapper_test.cc @@ -959,57 +959,5 @@ TEST_F(RemapperTest, FuseConv2DWithSqueezeAndBias) { } #endif -class FusedCmpAndCastTest : public GrapplerTest { - protected: - template - void TestFusedCmpAndCast() { - using ::tensorflow::ops::Placeholder; - tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - const int num_channels = 24; - TensorShape channel_shape({num_channels}); - TensorShape empty_shape({0}); - auto x = Placeholder(s.WithOpName("x"), TYPE, - ops::Placeholder::Shape({2, 8, 8, num_channels})); - auto y = Placeholder(s.WithOpName("y"), TYPE, - ops::Placeholder::Shape({2, 8, 8, num_channels})); - auto comparator = ops::Equal(s.WithOpName("Equal"), x, y); - auto cast = ops::Cast(s.WithOpName("cast"), comparator.z, TYPE); - auto fetch = ops::Identity(s.WithOpName("fetch"), cast); - auto input1_t = GenerateRandomTensor({2, 8, 8, num_channels}); - auto input2_t = GenerateRandomTensor({2, 8, 8, num_channels}); - GrapplerItem item; - item.fetch = {"fetch"}; - item.feed = {{"x", input1_t}, {"y", input2_t}}; - TF_ASSERT_OK(s.ToGraphDef(&item.graph)); - for (int i = 0; i < item.graph.node_size(); ++i) { - item.graph.mutable_node(i)->set_device("/device:CPU:0"); - } - Remapper optimizer(RewriterConfig::AGGRESSIVE); - GraphDef output; - TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); - int found = 0; - for (const NodeDef& node : output.node()) { - if (node.name() == "cast") { - EXPECT_EQ(node.op(), "_EqualWithCast"); - ASSERT_EQ(node.input_size(), 2); - EXPECT_EQ(node.input(0), "x"); - EXPECT_EQ(node.input(1), "y"); - found++; - } - } - EXPECT_EQ(found, 1); - auto tensors_expected = EvaluateNodes(item.graph, item.fetch, item.feed); - ASSERT_EQ(tensors_expected.size(), 1); - auto tensors = EvaluateNodes(output, item.fetch, item.feed); - ASSERT_EQ(tensors.size(), 1); - test::ExpectClose(tensors[0], tensors_expected[0], 1e-2, 1e-2); - } -}; - -TEST_F(FusedCmpAndCastTest, FusedCmpAndCast) { - TestFusedCmpAndCast(); - TestFusedCmpAndCast(); -} - } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/utils/grappler_test.h b/tensorflow/core/grappler/utils/grappler_test.h index 0660116b192..9225f9172e8 100644 --- a/tensorflow/core/grappler/utils/grappler_test.h +++ b/tensorflow/core/grappler/utils/grappler_test.h @@ -85,7 +85,7 @@ class GrapplerTest : public ::testing::Test { typedef typename EnumToDataType::Type T; Tensor tensor(DTYPE, shape); for (auto i = 0; i < tensor.NumElements(); i++) - tensor.flat()(i) = static_cast(i + random::New64() % 10); + tensor.flat()(i) = i + random::New64() % 10; return tensor; } diff --git a/tensorflow/core/kernels/cwise_op_equal_to_1.cc b/tensorflow/core/kernels/cwise_op_equal_to_1.cc index 86da7525685..64cd784af73 100644 --- a/tensorflow/core/kernels/cwise_op_equal_to_1.cc +++ b/tensorflow/core/kernels/cwise_op_equal_to_1.cc @@ -19,8 +19,6 @@ namespace tensorflow { REGISTER7(BinaryOp, CPU, "Equal", functor::equal_to, float, Eigen::half, double, uint8, int8, int16, bfloat16); REGISTER3(BinaryOp, CPU, "Equal", functor::equal_to, uint16, uint32, uint64); -REGISTER2(BinaryOp, CPU, "_EqualWithCast", functor::equal_to_with_cast, float, - bfloat16); REGISTER_KERNEL_BUILDER( Name("ApproximateEqual").Device(DEVICE_CPU).TypeConstraint("T"), ApproximateEqualOp); diff --git a/tensorflow/core/kernels/cwise_op_greater.cc b/tensorflow/core/kernels/cwise_op_greater.cc index e905f13f6c6..d70233dc55c 100644 --- a/tensorflow/core/kernels/cwise_op_greater.cc +++ b/tensorflow/core/kernels/cwise_op_greater.cc @@ -18,8 +18,6 @@ limitations under the License. namespace tensorflow { REGISTER9(BinaryOp, CPU, "Greater", functor::greater, float, Eigen::half, double, int32, int64, uint8, int8, int16, bfloat16); -REGISTER2(BinaryOp, CPU, "_GreaterWithCast", functor::greater_with_cast, float, - bfloat16); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER7(BinaryOp, GPU, "Greater", functor::greater, float, Eigen::half, double, int64, uint8, int8, int16); diff --git a/tensorflow/core/kernels/cwise_op_greater_equal.cc b/tensorflow/core/kernels/cwise_op_greater_equal.cc index 8390035b86b..7f6b788eb2e 100644 --- a/tensorflow/core/kernels/cwise_op_greater_equal.cc +++ b/tensorflow/core/kernels/cwise_op_greater_equal.cc @@ -18,8 +18,6 @@ limitations under the License. namespace tensorflow { REGISTER9(BinaryOp, CPU, "GreaterEqual", functor::greater_equal, float, Eigen::half, double, int32, int64, uint8, int8, int16, bfloat16); -REGISTER2(BinaryOp, CPU, "_GreaterEqualWithCast", - functor::greater_equal_with_cast, float, bfloat16); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER7(BinaryOp, GPU, "GreaterEqual", functor::greater_equal, float, Eigen::half, double, int64, uint8, int8, int16); diff --git a/tensorflow/core/kernels/cwise_op_less.cc b/tensorflow/core/kernels/cwise_op_less.cc index 55f165128d8..062a029f069 100644 --- a/tensorflow/core/kernels/cwise_op_less.cc +++ b/tensorflow/core/kernels/cwise_op_less.cc @@ -19,8 +19,6 @@ namespace tensorflow { REGISTER5(BinaryOp, CPU, "Less", functor::less, float, Eigen::half, double, bfloat16, int32); REGISTER4(BinaryOp, CPU, "Less", functor::less, int64, uint8, int8, int16); -REGISTER2(BinaryOp, CPU, "_LessWithCast", functor::less_with_cast, float, - bfloat16); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER7(BinaryOp, GPU, "Less", functor::less, float, Eigen::half, double, diff --git a/tensorflow/core/kernels/cwise_op_less_equal.cc b/tensorflow/core/kernels/cwise_op_less_equal.cc index 2961742f5f4..43af03878e9 100644 --- a/tensorflow/core/kernels/cwise_op_less_equal.cc +++ b/tensorflow/core/kernels/cwise_op_less_equal.cc @@ -20,8 +20,6 @@ REGISTER5(BinaryOp, CPU, "LessEqual", functor::less_equal, float, Eigen::half, bfloat16, double, int32); REGISTER4(BinaryOp, CPU, "LessEqual", functor::less_equal, int64, uint8, int8, int16); -REGISTER2(BinaryOp, CPU, "_LessEqualWithCast", functor::less_equal_with_cast, - float, bfloat16); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER7(BinaryOp, GPU, "LessEqual", functor::less_equal, float, Eigen::half, diff --git a/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc b/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc index 68a996c97b6..4de69edd21d 100644 --- a/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc +++ b/tensorflow/core/kernels/cwise_op_not_equal_to_1.cc @@ -20,8 +20,6 @@ REGISTER7(BinaryOp, CPU, "NotEqual", functor::not_equal_to, float, Eigen::half, double, uint8, int8, int16, bfloat16); REGISTER3(BinaryOp, CPU, "NotEqual", functor::not_equal_to, uint16, uint32, uint64); -REGISTER2(BinaryOp, CPU, "_NotEqualWithCast", functor::not_equal_to_with_cast, - float, bfloat16); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER4(BinaryOp, GPU, "NotEqual", functor::not_equal_to, float, Eigen::half, double, uint8); diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h index 5ee21c121e9..88651d7bfdc 100644 --- a/tensorflow/core/kernels/cwise_ops.h +++ b/tensorflow/core/kernels/cwise_ops.h @@ -1141,32 +1141,6 @@ struct equal_to : base, bool> {}; template struct not_equal_to : base, bool> {}; -template -struct less_with_cast : base> {}; - -template -struct less_equal_with_cast : base> {}; - -template -struct greater_with_cast : base> {}; - -template -struct greater_equal_with_cast - : base> {}; - -template -struct equal_to_with_cast : base> {}; - -template -struct not_equal_to_with_cast - : base> {}; - struct logical_and : base {}; struct logical_or : base {}; diff --git a/tensorflow/core/kernels/cwise_ops_test.cc b/tensorflow/core/kernels/cwise_ops_test.cc index 4fee16fa759..bc77a119f0a 100644 --- a/tensorflow/core/kernels/cwise_ops_test.cc +++ b/tensorflow/core/kernels/cwise_ops_test.cc @@ -96,66 +96,62 @@ BM_UNARY(gpu, Round, float, DT_FLOAT); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM // data func scalar. -template -Graph* BinaryScalar(int num, const string& func, DataType dtype) { +Graph* BinaryScalar(int num, const string& func) { Graph* g = new Graph(OpRegistry::Global()); - Tensor lhs(dtype, TensorShape({64, 64, num / (64 * 64)})); - lhs.flat().setRandom(); - Tensor rhs(dtype, TensorShape({})); - rhs.flat().setRandom(); + Tensor lhs(DT_FLOAT, TensorShape({64, 64, num / (64 * 64)})); + lhs.flat().setRandom(); + Tensor rhs(DT_FLOAT, TensorShape({})); + rhs.flat().setRandom(); test::graph::Binary(g, func, test::graph::Constant(g, lhs), test::graph::Constant(g, rhs)); return g; } -#define BM_BINARY_SCALAR(DEVICE, FUNC, T, TYPE) \ - void BM_##DEVICE##_##FUNC##_scalar##_##TYPE(int iters, int num) { \ - const int64 tot = static_cast(iters) * num; \ - testing::UseRealTime(); \ - testing::ItemsProcessed(tot); \ - testing::BytesProcessed(tot * sizeof(T)); \ - test::Benchmark(#DEVICE, BinaryScalar(num, #FUNC, TYPE)).Run(iters); \ - } \ - BENCHMARK(BM_##DEVICE##_##FUNC##_scalar##_##TYPE) \ - ->Arg(1 << 12) /* must >= 4096 */ \ - ->Arg(1 << 13) \ - ->Arg(1 << 14) \ - ->Arg((1 << 15) - (1 << 13)) \ - ->Arg(1 << 15) \ - ->Arg((1 << 15) + (1 << 14)) \ - ->Arg(1 << 16) \ - ->Arg((1 << 17) - (1 << 15)) \ - ->Arg(1 << 17) \ - ->Arg((1 << 17) + (1 << 16)) \ - ->Arg(1 << 18) \ - ->Arg(1 << 19) \ +#define BM_BINARY_SCALAR(DEVICE, FUNC) \ + void BM_##DEVICE##_##FUNC##_scalar(int iters, int num) { \ + const int64 tot = static_cast(iters) * num; \ + testing::UseRealTime(); \ + testing::ItemsProcessed(tot); \ + testing::BytesProcessed(tot * sizeof(float)); \ + test::Benchmark(#DEVICE, BinaryScalar(num, #FUNC)).Run(iters); \ + } \ + BENCHMARK(BM_##DEVICE##_##FUNC##_scalar) \ + ->Arg(1 << 12) /* must >= 4096 */ \ + ->Arg(1 << 13) \ + ->Arg(1 << 14) \ + ->Arg((1 << 15) - (1 << 13)) \ + ->Arg(1 << 15) \ + ->Arg((1 << 15) + (1 << 14)) \ + ->Arg(1 << 16) \ + ->Arg((1 << 17) - (1 << 15)) \ + ->Arg(1 << 17) \ + ->Arg((1 << 17) + (1 << 16)) \ + ->Arg(1 << 18) \ + ->Arg(1 << 19) \ ->Arg(1 << 20); -BM_BINARY_SCALAR(cpu, Less, float, DT_FLOAT); -BM_BINARY_SCALAR(cpu, Less, bfloat16, DT_BFLOAT16); -BM_BINARY_SCALAR(cpu, _LessWithCast, float, DT_FLOAT); -BM_BINARY_SCALAR(cpu, _LessWithCast, bfloat16, DT_BFLOAT16); +BM_BINARY_SCALAR(cpu, Less); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -BM_BINARY_SCALAR(gpu, Less, float, DT_FLOAT); +BM_BINARY_SCALAR(gpu, Less); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #ifdef TENSORFLOW_USE_SYCL -BM_BINARY_SCALAR(sycl, Less, float, DT_FLOAT); +BM_BINARY_SCALAR(sycl, Less); #endif // TENSORFLOW_USE_SYCL -BM_BINARY_SCALAR(cpu, Add, float, DT_FLOAT); +BM_BINARY_SCALAR(cpu, Add); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -BM_BINARY_SCALAR(gpu, Add, float, DT_FLOAT); +BM_BINARY_SCALAR(gpu, Add); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #ifdef TENSORFLOW_USE_SYCL -BM_BINARY_SCALAR(sycl, Add, float, DT_FLOAT); +BM_BINARY_SCALAR(sycl, Add); #endif // TENSORFLOW_USE_SYCL -BM_BINARY_SCALAR(cpu, DivNoNan, float, DT_FLOAT); +BM_BINARY_SCALAR(cpu, DivNoNan); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM -BM_BINARY_SCALAR(gpu, DivNoNan, float, DT_FLOAT); +BM_BINARY_SCALAR(gpu, DivNoNan); #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #ifdef TENSORFLOW_USE_SYCL -BM_BINARY_SCALAR(sycl, DivNoNan, float, DT_FLOAT); +BM_BINARY_SCALAR(sycl, DivNoNan); #endif // TENSORFLOW_USE_SYCL #undef BM_BINARY_SCALAR diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 91cd835a1d2..cbf1ef53dde 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -701,23 +701,6 @@ REGISTER_OP("GreaterEqual").COMPARISON(); #undef COMPARISON -#define COMPARISON_WITH_CAST() \ - Input("x: T") \ - .Input("y: T") \ - .Output("z: T") \ - .Attr("T: {float, bfloat16}") \ - .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn) - -REGISTER_OP("_LessWithCast").COMPARISON_WITH_CAST(); - -REGISTER_OP("_LessEqualWithCast").COMPARISON_WITH_CAST(); - -REGISTER_OP("_GreaterWithCast").COMPARISON_WITH_CAST(); - -REGISTER_OP("_GreaterEqualWithCast").COMPARISON_WITH_CAST(); - -#undef COMPARISON_WITH_CAST - // -------------------------------------------------------------------------- #define EQUALITY_COMPARISON() \ @@ -749,32 +732,6 @@ REGISTER_OP("NotEqual").EQUALITY_COMPARISON(); #undef EQUALITY_COMPARISON -#define EQUALITY_COMPARISON_WITH_CAST() \ - Input("x: T") \ - .Input("y: T") \ - .Output("z: T") \ - .SetIsCommutative() \ - .Attr("T: {bfloat16, float}") \ - .Attr("incompatible_shape_error: bool = true") \ - .SetShapeFn([](InferenceContext* c) { \ - ShapeHandle x = c->input(0); \ - ShapeHandle y = c->input(1); \ - ShapeHandle output; \ - bool incompatible_shape_error; \ - TF_RETURN_IF_ERROR(c->GetAttr("incompatible_shape_error", \ - &incompatible_shape_error)); \ - TF_RETURN_IF_ERROR(BroadcastBinaryOpOutputShapeFnHelper( \ - c, x, y, incompatible_shape_error, &output)); \ - c->set_output(0, output); \ - return Status::OK(); \ - }) - -REGISTER_OP("_EqualWithCast").EQUALITY_COMPARISON_WITH_CAST(); - -REGISTER_OP("_NotEqualWithCast").EQUALITY_COMPARISON_WITH_CAST(); - -#undef EQUALITY_COMPARISON_WITH_CAST - REGISTER_OP("ApproximateEqual") .Input("x: T") .Input("y: T") From c066122c0dc9ff9549d77516b6cfb950c6e18a2a Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Thu, 6 Aug 2020 01:20:16 -0700 Subject: [PATCH 2238/2522] Add support for using `strategy.extended.update` with VariablePolicy and ONREAD variables. This change has already been made for SyncOnReadVariables but not the Policy implementation classes. PiperOrigin-RevId: 325184447 Change-Id: I6ffd451ec21321f51eadbe9db5a732b14717d80a --- tensorflow/python/distribute/values.py | 30 ++++++++++++--------- tensorflow/python/distribute/values_util.py | 6 +++++ 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py index 9d148bd029d..87b711ce693 100644 --- a/tensorflow/python/distribute/values.py +++ b/tensorflow/python/distribute/values.py @@ -1084,7 +1084,8 @@ class SyncOnReadVariable(DistributedVariable): if values_util.is_saving_non_distributed(): return self._primary.assign_sub(value, use_locking, name, read_value) with ds_context.enter_or_assert_strategy(self._distribute_strategy): - if ds_context.in_cross_replica_context() and not _in_update_replica(): + if (ds_context.in_cross_replica_context() and + not values_util.in_replica_update_context()): return values_util.on_read_assign_sub_cross_replica( self, value, read_value=read_value) else: @@ -1095,7 +1096,8 @@ class SyncOnReadVariable(DistributedVariable): if values_util.is_saving_non_distributed(): return self._primary.assign_add(value, use_locking, name, read_value) with ds_context.enter_or_assert_strategy(self._distribute_strategy): - if ds_context.in_cross_replica_context() and not _in_update_replica(): + if (ds_context.in_cross_replica_context() and + not values_util.in_replica_update_context()): return values_util.on_read_assign_add_cross_replica( self, value, read_value=read_value) else: @@ -1106,7 +1108,8 @@ class SyncOnReadVariable(DistributedVariable): if values_util.is_saving_non_distributed(): return self._primary.assign(value, use_locking, name, read_value) with ds_context.enter_or_assert_strategy(self._distribute_strategy): - if ds_context.in_cross_replica_context() and not _in_update_replica(): + if (ds_context.in_cross_replica_context() and + not values_util.in_replica_update_context()): return values_util.on_read_assign_cross_replica( self, value, read_value=read_value) else: @@ -1157,7 +1160,8 @@ class SyncOnReadVariable(DistributedVariable): if values_util.is_saving_non_distributed(): return self._primary.value() with ds_context.enter_or_assert_strategy(self._distribute_strategy): - if ds_context.in_cross_replica_context() and not _in_update_replica(): + if (ds_context.in_cross_replica_context() and + not values_util.in_replica_update_context()): if self._aggregation == vs.VariableAggregation.ONLY_FIRST_REPLICA: return self._get_replica(0).value() return self._get_cross_replica() @@ -1292,7 +1296,10 @@ class OnReadPolicy(VariablePolicy): def value(self, var): with ds_context.enter_or_assert_strategy(var.distribute_strategy): - if ds_context.in_cross_replica_context(): + if (ds_context.in_cross_replica_context() and + not values_util.in_replica_update_context()): + if self._aggregation == vs.VariableAggregation.ONLY_FIRST_REPLICA: + return var._get_replica(0).value() # pylint: disable=protected-access return var._get_cross_replica() # pylint: disable=protected-access else: return var._get_on_device_or_primary().value() # pylint: disable=protected-access @@ -1325,7 +1332,8 @@ class OnReadPolicy(VariablePolicy): read_value=True): """Subtracts a value from this variable.""" with ds_context.enter_or_assert_strategy(var.distribute_strategy): - if ds_context.in_cross_replica_context(): + if (ds_context.in_cross_replica_context() and + not values_util.in_replica_update_context()): return values_util.on_read_assign_sub_cross_replica( var, value, read_value=read_value) else: @@ -1337,7 +1345,8 @@ class OnReadPolicy(VariablePolicy): read_value=True): """Adds a value to this variable.""" with ds_context.enter_or_assert_strategy(var.distribute_strategy): - if ds_context.in_cross_replica_context(): + if (ds_context.in_cross_replica_context() and + not values_util.in_replica_update_context()): return values_util.on_read_assign_add_cross_replica( var, value, read_value=read_value) else: @@ -1347,7 +1356,8 @@ class OnReadPolicy(VariablePolicy): def assign(self, var, value, use_locking=False, name=None, read_value=True): with ds_context.enter_or_assert_strategy(var.distribute_strategy): - if ds_context.in_cross_replica_context(): + if (ds_context.in_cross_replica_context() and + not values_util.in_replica_update_context()): return values_util.on_read_assign_cross_replica(var, value, read_value=read_value) else: @@ -1525,7 +1535,3 @@ class OnWritePolicy(AutoPolicy): def _update_replica(self, var, update_fn, value, **kwargs): return _on_write_update_replica(var, update_fn, value, **kwargs) - - -def _in_update_replica(): - return distribute_lib.get_update_replica_id() is not None diff --git a/tensorflow/python/distribute/values_util.py b/tensorflow/python/distribute/values_util.py index 692d951cffa..67b5234d82c 100644 --- a/tensorflow/python/distribute/values_util.py +++ b/tensorflow/python/distribute/values_util.py @@ -30,6 +30,12 @@ from tensorflow.python.saved_model import save_context from tensorflow.python.saved_model import save_options +# Utility function that indicates if you are in an UpdateContext when running +# in a replica fn. +def in_replica_update_context(): + return distribute_lib.get_update_replica_id() is not None + + def on_write_assign(var, value, use_locking=False, name=None, read_value=True): assign_fn = lambda var, *a, **kw: var.assign(*a, **kw) return var._update( # pylint: disable=protected-access From 51e645bf3419ec943a0b19805340681fdcb6a2dc Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Thu, 6 Aug 2020 01:21:00 -0700 Subject: [PATCH 2239/2522] Followup CL to add support for saving non distributed version of variables with policy enabled. PiperOrigin-RevId: 325184526 Change-Id: If4519bf62576e908849ddaaca07079395ce45862 --- tensorflow/python/distribute/tpu_values.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tensorflow/python/distribute/tpu_values.py b/tensorflow/python/distribute/tpu_values.py index 901b906e4d9..f734caef5c5 100644 --- a/tensorflow/python/distribute/tpu_values.py +++ b/tensorflow/python/distribute/tpu_values.py @@ -207,42 +207,62 @@ class TPUDistributedVariable(TPUVariableMixin, values.DistributedVariable): self._policy._is_mirrored() # pylint: disable=protected-access def assign_sub(self, value, use_locking=False, name=None, read_value=True): + if values_util.is_saving_non_distributed(): + return self._primary.assign_sub(value, use_locking, name, read_value) return self._policy.assign_sub( self, value, use_locking=use_locking, name=name, read_value=read_value) def assign_add(self, value, use_locking=False, name=None, read_value=True): + if values_util.is_saving_non_distributed(): + return self._primary.assign_add(value, use_locking, name, read_value) return self._policy.assign_add( self, value, use_locking=use_locking, name=name, read_value=read_value) def assign(self, value, use_locking=False, name=None, read_value=True): + if values_util.is_saving_non_distributed(): + return self._primary.assign(value, use_locking, name, read_value) return self._policy.assign( self, value, use_locking=use_locking, name=name, read_value=read_value) def scatter_sub(self, sparse_delta, use_locking=False, name=None): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_sub(sparse_delta, use_locking, name) return self._policy.scatter_sub( self, sparse_delta, use_locking=use_locking, name=name) def scatter_add(self, sparse_delta, use_locking=False, name=None): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_add(sparse_delta, use_locking, name) return self._policy.scatter_add( self, sparse_delta, use_locking=use_locking, name=name) def scatter_mul(self, sparse_delta, use_locking=False, name=None): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_mul(sparse_delta, use_locking, name) return self._policy.scatter_mul( self, sparse_delta, use_locking=use_locking, name=name) def scatter_div(self, sparse_delta, use_locking=False, name=None): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_div(sparse_delta, use_locking, name) return self._policy.scatter_div( self, sparse_delta, use_locking=use_locking, name=name) def scatter_min(self, sparse_delta, use_locking=False, name=None): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_min(sparse_delta, use_locking, name) return self._policy.scatter_min( self, sparse_delta, use_locking=use_locking, name=name) def scatter_max(self, sparse_delta, use_locking=False, name=None): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_max(sparse_delta, use_locking, name) return self._policy.scatter_max( self, sparse_delta, use_locking=use_locking, name=name) def scatter_update(self, sparse_delta, use_locking=False, name=None): + if values_util.is_saving_non_distributed(): + return self._primary.scatter_update(sparse_delta, use_locking, name) return self._policy.scatter_update( self, sparse_delta, use_locking=use_locking, name=name) From 3182f482fd7cc19fe5f75bf502f148c8583e3c18 Mon Sep 17 00:00:00 2001 From: fsx950223 Date: Thu, 6 Aug 2020 08:35:37 +0000 Subject: [PATCH 2240/2522] fix docker envs --- .../tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile | 2 +- tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile | 2 +- .../dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile | 2 +- .../dockerfiles/ppc64le/devel-gpu-ppc64le.Dockerfile | 2 +- .../dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile index b99c384fe20..b8bbbbd7bdf 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile @@ -79,7 +79,7 @@ RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \ && rm -rf /var/lib/apt/lists/*; } # Configure the build for our CUDA configuration. -ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/include/x64_64-linux-gnu:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/include/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH ENV TF_NEED_CUDA 1 ENV TF_NEED_TENSORRT 1 ENV TF_CUDA_VERSION=${CUDA} diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile index 4493964cffc..81d50dccf9d 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile @@ -79,7 +79,7 @@ RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \ && rm -rf /var/lib/apt/lists/*; } # Configure the build for our CUDA configuration. -ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/include/x64_64-linux-gnu:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/include/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH ENV TF_NEED_CUDA 1 ENV TF_NEED_TENSORRT 1 ENV TF_CUDA_VERSION=${CUDA} diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile index c104f6c86cb..946136f0c88 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile @@ -79,7 +79,7 @@ RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \ && rm -rf /var/lib/apt/lists/*; } # Configure the build for our CUDA configuration. -ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/include/x64_64-linux-gnu:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/include/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH ENV TF_NEED_CUDA 1 ENV TF_NEED_TENSORRT 1 ENV TF_CUDA_VERSION=${CUDA} diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le.Dockerfile index 9e2c6385d34..cf84f4a74a8 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le.Dockerfile @@ -79,7 +79,7 @@ RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \ && rm -rf /var/lib/apt/lists/*; } # Configure the build for our CUDA configuration. -ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/include/x64_64-linux-gnu:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/include/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH ENV TF_NEED_CUDA 1 ENV TF_NEED_TENSORRT 1 ENV TF_CUDA_VERSION=${CUDA} diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile index d7e01071a14..5b4b2b7f60b 100644 --- a/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile +++ b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile @@ -56,7 +56,7 @@ RUN [[ "${ARCH}" = "ppc64le" ]] || { apt-get update && \ && rm -rf /var/lib/apt/lists/*; } # Configure the build for our CUDA configuration. -ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/include/x64_64-linux-gnu:$LD_LIBRARY_PATH +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/usr/include/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH ENV TF_NEED_CUDA 1 ENV TF_NEED_TENSORRT 1 ENV TF_CUDA_VERSION=${CUDA} From a41898dfbda9de8f514343ecab6a75be89820749 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 6 Aug 2020 01:46:05 -0700 Subject: [PATCH 2241/2522] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 325186817 Change-Id: I9ecf8eeca0785a1c59a6451bec1b7556ed25eb59 --- tensorflow/go/op/wrappers.go | 110 +++++++++++++++++------------------ 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index ea060f815c6..34ff57636ca 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -3660,6 +3660,43 @@ func TensorForestTreeIsInitializedOp(scope *Scope, tree_handle tf.Output) (is_in return op.Output(0) } +// TensorForestTreeResourceHandleOpAttr is an optional argument to TensorForestTreeResourceHandleOp. +type TensorForestTreeResourceHandleOpAttr func(optionalAttr) + +// TensorForestTreeResourceHandleOpContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func TensorForestTreeResourceHandleOpContainer(value string) TensorForestTreeResourceHandleOpAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// TensorForestTreeResourceHandleOpSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func TensorForestTreeResourceHandleOpSharedName(value string) TensorForestTreeResourceHandleOpAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Creates a handle to a TensorForestTreeResource +func TensorForestTreeResourceHandleOp(scope *Scope, optional ...TensorForestTreeResourceHandleOpAttr) (resource tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "TensorForestTreeResourceHandleOp", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // AllCandidateSamplerAttr is an optional argument to AllCandidateSampler. type AllCandidateSamplerAttr func(optionalAttr) @@ -13332,24 +13369,6 @@ func ResizeArea(scope *Scope, images tf.Output, size tf.Output, optional ...Resi return op.Output(0) } -// Returns the number of work units this Reader has finished processing. -// -// Arguments: -// reader_handle: Handle to a Reader. -func ReaderNumWorkUnitsCompletedV2(scope *Scope, reader_handle tf.Output) (units_completed tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReaderNumWorkUnitsCompletedV2", - Input: []tf.Input{ - reader_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Returns up to `num_records` (key, value) pairs produced by a Reader. // // Will dequeue from the input queue if necessary (e.g. when the @@ -39066,6 +39085,24 @@ func ResourceApplyAddSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Outpu return scope.AddOperation(opspec) } +// Returns the number of work units this Reader has finished processing. +// +// Arguments: +// reader_handle: Handle to a Reader. +func ReaderNumWorkUnitsCompletedV2(scope *Scope, reader_handle tf.Output) (units_completed tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ReaderNumWorkUnitsCompletedV2", + Input: []tf.Input{ + reader_handle, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // FractionalMaxPoolAttr is an optional argument to FractionalMaxPool. type FractionalMaxPoolAttr func(optionalAttr) @@ -49698,43 +49735,6 @@ func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug(scope *Scope, param return scope.AddOperation(opspec) } -// TensorForestTreeResourceHandleOpAttr is an optional argument to TensorForestTreeResourceHandleOp. -type TensorForestTreeResourceHandleOpAttr func(optionalAttr) - -// TensorForestTreeResourceHandleOpContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func TensorForestTreeResourceHandleOpContainer(value string) TensorForestTreeResourceHandleOpAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// TensorForestTreeResourceHandleOpSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func TensorForestTreeResourceHandleOpSharedName(value string) TensorForestTreeResourceHandleOpAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Creates a handle to a TensorForestTreeResource -func TensorForestTreeResourceHandleOp(scope *Scope, optional ...TensorForestTreeResourceHandleOpAttr) (resource tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TensorForestTreeResourceHandleOp", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // FusedResizeAndPadConv2DAttr is an optional argument to FusedResizeAndPadConv2D. type FusedResizeAndPadConv2DAttr func(optionalAttr) From 6657b112c8ba4572c489e18f475fcf465807bc95 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 6 Aug 2020 02:01:52 -0700 Subject: [PATCH 2242/2522] Update GraphDef version to 485. PiperOrigin-RevId: 325188185 Change-Id: I39458e43fdf45c435e1900cba7240fe7f087802a --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index acaa48f251c..431784a5a1a 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 484 // Updated: 2020/8/5 +#define TF_GRAPH_DEF_VERSION 485 // Updated: 2020/8/6 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From ba2e7733e1b1f483ce30c084079129de82b17d81 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 6 Aug 2020 02:01:53 -0700 Subject: [PATCH 2243/2522] compat: Update forward compatibility horizon to 2020-08-06 PiperOrigin-RevId: 325188186 Change-Id: Ie0b41c234035125beeee6a431eb4e76f8e71b869 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index a274743d124..65bb633855a 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 5) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 6) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 4be925e37852e7b80a11f96fee03e4cb085b1408 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 6 Aug 2020 02:13:49 -0700 Subject: [PATCH 2244/2522] Optimize X^2 to X*X and X^1 to X in TfLite PiperOrigin-RevId: 325189476 Change-Id: I8da0707973e4449fdd2705fd1201e90fc000c2c0 --- .../compiler/mlir/lite/tests/optimize.mlir | 19 +++++++++++++++++++ .../mlir/lite/transforms/optimize_patterns.td | 12 ++++++++++++ 2 files changed, 31 insertions(+) diff --git a/tensorflow/compiler/mlir/lite/tests/optimize.mlir b/tensorflow/compiler/mlir/lite/tests/optimize.mlir index b8be96a9159..cf7fe07d729 100644 --- a/tensorflow/compiler/mlir/lite/tests/optimize.mlir +++ b/tensorflow/compiler/mlir/lite/tests/optimize.mlir @@ -1066,3 +1066,22 @@ func @DontConvertSqueezeToReshape(%arg0: tensor<*xf32>) -> tensor<*xf32> { // CHECK: return %[[RESULT]] } +func @ConvertPow1ToIdentity(%arg0: tensor<2x2xf32>) -> tensor<2x2xf32> { + %cst = constant dense<1.000000e+00> : tensor + %0 = "tfl.pow"(%arg0, %cst) : (tensor<2x2xf32>, tensor) -> tensor<2x2xf32> + return %0 : tensor<2x2xf32> + +// CHECK-LABEL: ConvertPow1ToIdentity +// CHECK: return %arg0 +} + +func @ConvertPow2ToSquare(%arg0: tensor<2x2xf32>) -> tensor<2x2xf32> { + %cst = constant dense<2.000000e+00> : tensor + %0 = "tfl.pow"(%arg0, %cst) : (tensor<2x2xf32>, tensor) -> tensor<2x2xf32> + return %0 : tensor<2x2xf32> + +// CHECK-LABEL: ConvertPow2ToSquare +// CHECK: %[[RESULT:.*]] = tfl.mul %arg0, %arg0 {fused_activation_function = "NONE"} : tensor<2x2xf32> +// CHECK: return %[[RESULT]] +} + diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td index 83a09e9dd2b..ef6706875c9 100644 --- a/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td +++ b/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td @@ -508,3 +508,15 @@ foreach ActFun = [TFL_AF_Relu, TFL_AF_Relu6, TFL_AF_Relu1, TFL_AF_None] in { def OptimizeReluSquaredDifference : Pat< (TFL_ReluOp (TFL_SquaredDifferenceOp $l, $r)), (TFL_SquaredDifferenceOp $l, $r)>; + +// Optimize X^1 o X +def OptimizePow1ToIdentity : Pat< + (TFL_PowOp $input, + (ConstantOp ConstantAttr, "1.0f">)), + (replaceWithValue $input)>; + +// Optimize X^2 to X*X +def OptimizePow2ToSquare : Pat< + (TFL_PowOp $input, + (ConstantOp ConstantAttr, "2.0f">)), + (TFL_MulOp $input, $input, TFL_AF_None)>; From 3c7b2f5fc4d765036cea2232975896fbd169ed4e Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Thu, 6 Aug 2020 05:46:28 -0700 Subject: [PATCH 2245/2522] Fix bug causing the local closure to be ignored for variables marked nonlocal. PiperOrigin-RevId: 325212970 Change-Id: I66fc2841ae56bf647c00953271d801782c070b9a --- .../pyct/static_analysis/type_inference.py | 18 +++--- .../static_analysis/type_inference_test.py | 59 +++++++++++++++++++ 2 files changed, 69 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/autograph/pyct/static_analysis/type_inference.py b/tensorflow/python/autograph/pyct/static_analysis/type_inference.py index cf866ad3ec7..755b6c32c64 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/type_inference.py +++ b/tensorflow/python/autograph/pyct/static_analysis/type_inference.py @@ -236,14 +236,16 @@ class StmtInferrer(gast.NodeVisitor): if isinstance(node.ctx, gast.Load): types = self.types_in.types.get(name, None) - if (types is None) and (name not in self.scope.bound): - if name in self.closure_types: - types = self.closure_types[name] - else: - types, value = self.resolver.res_name( - self.namespace, self.types_in.types, name) - if value is not None: - anno.setanno(node, anno.Static.VALUE, value) + if types is None: + if (name not in self.scope.bound) or (name in self.scope.nonlocals): + # TODO(mdan): Test with global variables. + if name in self.closure_types: + types = self.closure_types[name] + else: + types, value = self.resolver.res_name( + self.namespace, self.types_in.types, name) + if value is not None: + anno.setanno(node, anno.Static.VALUE, value) elif isinstance(node.ctx, gast.Param): type_name = anno.getanno(node.annotation, anno.Basic.QN, None) diff --git a/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py b/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py index e3cb7e04c61..de71854d4fe 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py +++ b/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py @@ -463,6 +463,22 @@ class TypeInferenceAnalyzerTest(test.TestCase): self.assertTypes(fn_body[0].body[0].value, 'int') self.assertClosureTypes(fn_body[0], {'x': {'int'}}) + def test_local_function_closure_mutable_var(self): + + def test_fn(x: int): + + def foo(): + nonlocal x + return x + + foo() + + node, _ = TestTranspiler(BasicTestResolver).transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[0].body[1].value, 'int') + self.assertClosureTypes(fn_body[0], {'x': {'int'}}) + def test_local_function_closure_ignored_for_bound_symbols(self): def test_fn(x: float): # pylint:disable=unused-argument @@ -496,6 +512,49 @@ class TypeInferenceAnalyzerTest(test.TestCase): self.assertTypes(fn_body[1].targets[0], float) self.assertClosureTypes(fn_body[0], {'x': {float}}) + def test_side_effects_on_arg_function_closure(self): + + test_self = self + + class Resolver(type_inference.Resolver): + + def res_name(self, ns, types_ns, name): + test_self.assertEqual(name, qual_names.QN('g')) + return None, g + + def res_value(self, ns, value): + test_self.assertEqual(value, 1.0) + return {float} + + def res_arg(self, ns, types_ns, f_name, name, type_anno): + return {str(type_anno)} + + def res_call(self, ns, types_ns, node, args, keywords): + test_self.assertEqual(node.func.id, 'g') + return None, {qual_names.QN('x'): {str}} + + def g(foo): + # The resolver will convey that this function has the following body: + # + # nonlocal x + # x = 'a' + # foo() + del foo + pass + + def test_fn(x: int): # pylint:disable=unused-argument + + def foo(): + return x + + x = 1.0 + g(foo) + + node, _ = TestTranspiler(Resolver).transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[0].body[0].value, str) + def test_subscript(self): test_self = self From 6175b78d8386bd6e5b2beebedb9f40e6b887d5a9 Mon Sep 17 00:00:00 2001 From: acxz <17132214+acxz@users.noreply.github.com> Date: Thu, 6 Aug 2020 09:09:12 -0400 Subject: [PATCH 2246/2522] add /hip suffix to find hip path --- third_party/gpus/rocm_configure.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl index d28337de836..dcc1d52688e 100644 --- a/third_party/gpus/rocm_configure.bzl +++ b/third_party/gpus/rocm_configure.bzl @@ -389,7 +389,7 @@ def _find_libs(repository_ctx, rocm_config, bash_bin): libs_paths = [ (name, _rocm_lib_paths(repository_ctx, name, path)) for name, path in [ - ("hip_hcc", rocm_config.rocm_toolkit_path), + ("hip_hcc", rocm_config.rocm_toolkit_path + "/hip"), ("rocblas", rocm_config.rocm_toolkit_path + "/rocblas"), ("rocfft", rocm_config.rocm_toolkit_path + "/rocfft"), ("hiprand", rocm_config.rocm_toolkit_path + "/hiprand"), From d17db7c0e38200262b5b86cf86b0e01003e1ef1a Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 6 Aug 2020 08:34:40 -0500 Subject: [PATCH 2247/2522] BUG: log_cosh alias missing results in round-trip serialization failure --- tensorflow/python/keras/metrics.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py index 12532a54489..6b53a02ce05 100644 --- a/tensorflow/python/keras/metrics.py +++ b/tensorflow/python/keras/metrics.py @@ -3402,6 +3402,7 @@ mae = MAE = mean_absolute_error mape = MAPE = mean_absolute_percentage_error msle = MSLE = mean_squared_logarithmic_error cosine_similarity = cosine_proximity +log_cosh = logcosh def clone_metric(metric): From 11705356a7108b39fdefc427dbcf3c4f5f7e215c Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Thu, 6 Aug 2020 06:39:01 -0700 Subject: [PATCH 2248/2522] [MLIR] Remove Affine->STD and SCF->STD patterns from lhlo->llvm pass. PiperOrigin-RevId: 325219360 Change-Id: Idf48303c7be7d6efea24216287d5bbb87affca73 --- tensorflow/compiler/mlir/hlo/BUILD | 2 -- .../lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm_pass.cc | 4 ---- tensorflow/compiler/mlir/hlo/tests/lhlo-legalize-to-llvm.mlir | 2 +- 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/BUILD b/tensorflow/compiler/mlir/hlo/BUILD index dd63b68b890..126d44670a0 100644 --- a/tensorflow/compiler/mlir/hlo/BUILD +++ b/tensorflow/compiler/mlir/hlo/BUILD @@ -760,8 +760,6 @@ cc_library( ":lhlo_legalize_to_llvm", # build-cleaner: keep ":materialize_broadcasts", # build-cleaner: keep ":unfuse_batch_norm", # build-cleaner: keep - "@llvm-project//mlir:AffineToStandardTransforms", - "@llvm-project//mlir:CFGTransforms", "@llvm-project//mlir:IR", "@llvm-project//mlir:InferTypeOpInterface", "@llvm-project//mlir:LLVMDialect", diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm_pass.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm_pass.cc index 00252735023..8493a1feb5d 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm_pass.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm_pass.cc @@ -15,8 +15,6 @@ limitations under the License. #include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" #include "mlir-hlo/Dialect/mhlo/transforms/rewriters.h" -#include "mlir/Conversion/AffineToStandard/AffineToStandard.h" -#include "mlir/Conversion/SCFToStandard/SCFToStandard.h" #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" @@ -39,8 +37,6 @@ class TestLhloToLLVMPass LLVMTypeConverter converter(&getContext()); populateStdToLLVMConversionPatterns(converter, patterns); PopulateLhloToLLVMConversionPatterns(&converter, &patterns); - populateLoopToStdConversionPatterns(patterns, &getContext()); - populateAffineToStdConversionPatterns(patterns, &getContext()); ConversionTarget target(getContext()); target.addLegalDialect(); diff --git a/tensorflow/compiler/mlir/hlo/tests/lhlo-legalize-to-llvm.mlir b/tensorflow/compiler/mlir/hlo/tests/lhlo-legalize-to-llvm.mlir index 5bb1d475b24..45c383bd1d6 100644 --- a/tensorflow/compiler/mlir/hlo/tests/lhlo-legalize-to-llvm.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/lhlo-legalize-to-llvm.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-hlo-opt %s --test-lhlo-legalize-to-llvm -split-input-file | FileCheck %s +// RUN: mlir-hlo-opt %s -lower-affine -convert-scf-to-std -test-lhlo-legalize-to-llvm -split-input-file | FileCheck %s // CHECK-LABEL: func @static_memref_cast func @static_memref_cast(%buf : memref<10x1x5xf32>) { From 958484854643044e5931503ac2c90da7f128274e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 6 Aug 2020 07:46:30 -0700 Subject: [PATCH 2249/2522] [MLIR][XLA] Allow for choice of safe/unsafe variant in broadcast utils Create safe or unsafe variants of `shape.broadcast` depending on the context. The representation by means of an extent tensor is only legal if the operands are known to be broadcastable. Currently, there is no use in a safe context in the codebase but it will be used for shape inference eventually. PiperOrigin-RevId: 325228073 Change-Id: I3295f3558786878fc3d0099974033e501574aa35 --- .../include/mlir-hlo/utils/broadcast_utils.h | 10 ++++--- .../mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc | 2 +- .../mhlo/transforms/chlo_legalize_to_hlo.cc | 4 +-- .../mlir/hlo/lib/utils/broadcast_utils.cc | 28 ++++++++++++------- .../tests/chlo_infer_shape_type_methods.mlir | 11 ++++---- .../chlo_legalize_to_hlo_broadcasts.mlir | 10 +++---- .../tests/legalize-tf-binary-elementwise.mlir | 12 ++++---- 7 files changed, 43 insertions(+), 34 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h index 1e2404299b2..1c57073f4ab 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h @@ -38,10 +38,12 @@ bool IsLegalNumpyRankedBroadcast(Value lhs, Value rhs, // Emits shape dialect ops to compute the result shape for a broadcasting // binary elementwise op which broadcasts according to "numpy" semantics -// (see above), returning an extents tensor of the resulting shape. -Value ComputeBinaryElementwiseBroadcastingResultExtents(Location loc, Value lhs, - Value rhs, - OpBuilder& builder); +// (see above), returning a `shape.shape` or an extent tensor of the resulting +// shape. The result should only be an extent tensor in contexts that ensure +// both operands to be broadcastable. +Value ComputeBinaryElementwiseBroadcastingResultExtents( + Location loc, Value lhs, Value rhs, OpBuilder& builder, + bool unsafe_as_extent_tensor); } // namespace hlo } // namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc index 99ed8bcb849..81389c3be89 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc @@ -151,7 +151,7 @@ LogicalResult ReifyBroadcastBinaryOpReturnTypeShapes( } Value computed_shape = hlo::ComputeBinaryElementwiseBroadcastingResultExtents( - loc, lhs, rhs, builder); + loc, lhs, rhs, builder, /*unsafe_as_extent_tensor=*/false); if (!computed_shape) return failure(); reifiedReturnShapes.push_back(computed_shape); return success(); diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc index adbd2e5a628..c2db4880632 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo.cc @@ -124,8 +124,8 @@ struct ConvertRankedDynamicBroadcastBinaryOp int64_t result_rank = std::max(lhs_type.getRank(), rhs_type.getRank()); Value result_extents = - hlo::ComputeBinaryElementwiseBroadcastingResultExtents(loc, lhs, rhs, - rewriter); + hlo::ComputeBinaryElementwiseBroadcastingResultExtents( + loc, lhs, rhs, rewriter, /*unsafe_as_extent_tensor=*/true); // Note that we unconditionally emit DynamicBroadcastInDim ops and let // downstream canonicalizations fold them away if possible. This is diff --git a/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc b/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc index a3ce4d44436..71b1a4e164f 100644 --- a/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc +++ b/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc @@ -20,6 +20,7 @@ limitations under the License. #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallVector.h" #include "mlir/Dialect/Shape/IR/Shape.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/IR/Diagnostics.h" #include "mlir/IR/StandardTypes.h" @@ -46,9 +47,9 @@ bool IsLegalNumpyRankedBroadcast(Value lhs, Value rhs, broadcast_dims.getIntValues().begin()); } -Value ComputeBinaryElementwiseBroadcastingResultExtents(Location loc, Value lhs, - Value rhs, - OpBuilder& builder) { +Value ComputeBinaryElementwiseBroadcastingResultExtents( + Location loc, Value lhs, Value rhs, OpBuilder& builder, + bool unsafe_as_extent_tensor) { auto lhs_type = lhs.getType().dyn_cast(); auto rhs_type = rhs.getType().dyn_cast(); if (!lhs_type || !rhs_type) { @@ -57,15 +58,22 @@ Value ComputeBinaryElementwiseBroadcastingResultExtents(Location loc, Value lhs, return nullptr; } - int64_t result_rank = std::max(lhs_type.getRank(), rhs_type.getRank()); Value lhs_shape_v = builder.createOrFold(loc, lhs); Value rhs_shape_v = builder.createOrFold(loc, rhs); - Value result_shape_v = builder.createOrFold( - loc, shape::ShapeType::get(builder.getContext()), lhs_shape_v, - rhs_shape_v, nullptr /* error */); - return builder.createOrFold( - loc, RankedTensorType::get({result_rank}, builder.getIndexType()), - result_shape_v); + + if (unsafe_as_extent_tensor) { + int64_t result_rank = std::max(lhs_type.getRank(), rhs_type.getRank()); + Value result_shape_v = builder.createOrFold( + loc, shape::getExtentTensorType(builder.getContext()), lhs_shape_v, + rhs_shape_v, nullptr /* error */); + return builder.createOrFold( + loc, RankedTensorType::get({result_rank}, builder.getIndexType()), + result_shape_v); + } + + return builder.createOrFold( + loc, builder.getType(), lhs_shape_v, rhs_shape_v, + nullptr /* error */); } } // namespace hlo diff --git a/tensorflow/compiler/mlir/hlo/tests/chlo_infer_shape_type_methods.mlir b/tensorflow/compiler/mlir/hlo/tests/chlo_infer_shape_type_methods.mlir index 99aab532688..d226c92858a 100644 --- a/tensorflow/compiler/mlir/hlo/tests/chlo_infer_shape_type_methods.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/chlo_infer_shape_type_methods.mlir @@ -5,15 +5,14 @@ // only test reification on an examplar op. // CHECK-SAME: %[[ARG0:.+]]: tensor, // CHECK-SAME: %[[ARG1:.+]]: tensor -func @broadcast_add(%arg0: tensor, %arg1: tensor) -> tensor<1xindex> { +func @broadcast_add(%arg0: tensor, %arg1: tensor) -> !shape.shape { // CHECK-DAG: %[[ARG0_S:.+]] = shape.shape_of %[[ARG0]] // CHECK-DAG: %[[ARG1_S:.+]] = shape.shape_of %[[ARG1]] - // CHECK-DAG: %[[BCAST_S:.+]] = shape.broadcast %[[ARG0_S]], %[[ARG1_S]] - // CHECK: %[[EXTENTS:.+]] = shape.to_extent_tensor %[[BCAST_S]] - // CHECK: return %[[EXTENTS]] + // CHECK-DAG: %[[BCAST_S:.+]] = shape.broadcast %[[ARG0_S]], %[[ARG1_S]] : tensor, tensor -> !shape.shape + // CHECK: return %[[BCAST_S]] : !shape.shape %0 = chlo.broadcast_add %arg0, %arg1 : (tensor, tensor) -> tensor - %1 = "mhlo_test.reify_return_type_shapes"(%0) : (tensor) -> tensor<1xindex> - return %1 : tensor<1xindex> + %1 = "mhlo_test.reify_return_type_shapes"(%0) : (tensor) -> !shape.shape + return %1 : !shape.shape } // ----- diff --git a/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir b/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir index c08ead5081e..9670372a864 100644 --- a/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir @@ -19,7 +19,7 @@ func @dynamicBroadcast(%arg0: tensor, %arg1: tensor) -> tensor to tensor<2xindex> // CHECK-DAG: %[[ARG0_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG0]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} // CHECK-DAG: %[[ARG1_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG1]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} // CHECK-NEXT: %[[RESULT:.+]] = mhlo.add %[[ARG0_B]], %[[ARG1_B]] @@ -40,7 +40,7 @@ func @dynamicBroadcastComplex(%arg0: tensor, %arg1: tensor) -> t // CHECK-NEXT: %[[WITNESS:.+]] = shape.cstr_broadcastable %[[ARG0_S]], %[[ARG1_S]] // CHECK-NEXT: %[[FINAL_RESULT:.+]] = shape.assuming %[[WITNESS]] // CHECK-NEXT: %[[RESULT_S:.+]] = shape.broadcast %[[ARG0_S]], %[[ARG1_S]] - // CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_S]] + // CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = tensor_cast %[[RESULT_S]] : tensor to tensor<2xindex> // CHECK-DAG: %[[ARG0_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG0]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor, tensor<2xindex>) -> tensor // CHECK-DAG: %[[ARG1_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG1]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor, tensor<2xindex>) -> tensor // CHECK-NEXT: %[[RESULT:.+]] = "mhlo.complex"(%[[ARG0_B]], %[[ARG1_B]]) : (tensor, tensor) -> tensor> @@ -61,7 +61,7 @@ func @dynamicBroadcastCompare(%arg0: tensor, %arg1: tensor) -> t // CHECK: %[[WITNESS:.+]] = shape.cstr_broadcastable %[[ARG0_S]], %[[ARG1_S]] // CHECK: %[[FINAL_RESULT:.+]] = shape.assuming %[[WITNESS]] // CHECK: %[[RESULT_S:.+]] = shape.broadcast %[[ARG0_S]], %[[ARG1_S]] - // CHECK: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_S]] + // CHECK: %[[RESULT_EXTENTS:.+]] = tensor_cast %[[RESULT_S]] : tensor to tensor<2xindex> // CHECK-DAG: %[[ARG0_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG0]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor, tensor<2xindex>) -> tensor // CHECK-DAG: %[[ARG1_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG1]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor, tensor<2xindex>) -> tensor // CHECK: %[[RESULT:.+]] = "mhlo.compare"(%[[ARG0_B]], %[[ARG1_B]]) {comparison_direction = "EQ"} : (tensor, tensor) -> tensor @@ -263,7 +263,7 @@ func @addScalarUnranked(%arg0: tensor, %arg1: tensor<*xf32>) -> tensor<*xf3 // CHECK: %[[ASSUMING_RESULT:.*]] = shape.assuming %[[WITNESS]] -> (tensor) { // CHECK: %[[SCALAR_SHAPE:.*]] = shape.const_shape [] // CHECK: %[[BROADCASTED_SHAPE:.*]] = shape.broadcast %[[SCALAR_SHAPE]], %[[SHAPE_RESHAPED]] -// CHECK: %[[SHAPE_TENSOR:.*]] = shape.to_extent_tensor %[[BROADCASTED_SHAPE]] : !shape.shape -> tensor<1xindex> +// CHECK: %[[SHAPE_TENSOR:.*]] = tensor_cast %[[BROADCASTED_SHAPE]] : tensor to tensor<1xindex> // CHECK: %[[BROADCASTED_LHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG_0]], %[[SHAPE_TENSOR]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor, tensor<1xindex>) -> tensor // CHECK: %[[BROADCASTED_RHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[RESHAPED]], %[[SHAPE_TENSOR]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor, tensor<1xindex>) -> tensor // CHECK: %[[BROADCASTED_RESULT:.*]] = mhlo.add %[[BROADCASTED_LHS]], %[[BROADCASTED_RHS]] : tensor @@ -296,7 +296,7 @@ func @addUnrankedScalar(%arg0: tensor<*xf32>, %arg1: tensor) -> tensor<*xf3 // CHECK: %[[SHAPE_1:.*]] = shape.shape_of %[[ARG_1]] : tensor // CHECK: %[[WITNESS:.*]] = shape.cstr_broadcastable %[[SHAPE_RESHAPED]], %[[SHAPE_1]] // CHECK: %[[ASSUMING_RESULT:.*]] = shape.assuming %[[WITNESS]] -> (tensor) { -// CHECK: %[[ASTENSOR:.*]] = shape.to_extent_tensor %[[SHAPE_RESHAPED]] +// CHECK: %[[ASTENSOR:.*]] = tensor_cast %[[SHAPE_RESHAPED]] // CHECK: %[[BROADCASTED_LHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[RESHAPED]], %[[ASTENSOR]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} : (tensor, tensor<1xindex>) -> tensor // CHECK: %[[BROADCASTED_RHS:.*]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG_1]], %[[ASTENSOR]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor, tensor<1xindex>) -> tensor // CHECK: %[[BROADCASTED_RESULT:.*]] = mhlo.add %[[BROADCASTED_LHS]], %[[BROADCASTED_RHS]] : tensor diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf-binary-elementwise.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf-binary-elementwise.mlir index fd9c14c7c0f..5f3e40f923f 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf-binary-elementwise.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf-binary-elementwise.mlir @@ -48,8 +48,8 @@ func @add_dynamic(%arg0: tensor, %arg1: tensor) -> tensor, tensor -> tensor + // CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = tensor_cast %[[RESULT_SHAPE]] : tensor to tensor<2xindex> // CHECK-NEXT: %[[LHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg0, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} // CHECK-NEXT: %[[RHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg1, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} // CHECK-NEXT: %[[RESULT:.+]] = mhlo.add %[[LHS_BCAST]], %[[RHS_BCAST]] : tensor @@ -201,8 +201,8 @@ func @equal_dynamic(%arg0: tensor, %arg1: tensor<1xi32>) -> tensor // NOT-CHECK-NEXT: %[[WITNESS:.+]] = shape.cstr_broadcastable %[[LHS_SHAPE]], %[[RHS_SHAPE]] // NOT-CHECK-NEXT: shape.assuming %[[WITNESS]] -> (tensor) { // NOT-CHECK-DAG: %[[LHS_SHAPE1:.+]] = shape.shape_of %arg0 - // NOT-CHECK-NEXT: %[[RESULT_SHAPE:.+]] = shape.broadcast %[[LHS_SHAPE1]], %[[RHS_SHAPE]] - // NOT-CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_SHAPE]] + // NOT-CHECK-NEXT: %[[RESULT_SHAPE:.+]] = shape.broadcast %[[LHS_SHAPE1]], %[[RHS_SHAPE]] : tensor, tensor -> tensor + // NOT-CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = tensor_cast %[[RESULT_SHAPE]] : tensor to tensor<1xindex> // NOT-CHECK-DAG: %[[LHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg0, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} // NOT-CHECK-DAG: %[[RHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg1, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} // NOT-CHECK-NEXT: %[[RESULT:.+]] = "mhlo.compare"(%[[LHS_BCAST]], %[[RHS_BCAST]]) {comparison_direction = "EQ"} @@ -290,8 +290,8 @@ func @greater_dynamic(%arg0: tensor, %arg1: tensor) -> tensor, tensor -> tensor + // CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = tensor_cast %[[RESULT_SHAPE]] : tensor to tensor<1xindex> // CHECK-DAG: %[[LHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg0, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} // CHECK-DAG: %[[RHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg1, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} // CHECK-NEXT: "mhlo.compare"(%[[LHS_BCAST]], %[[RHS_BCAST]]) {comparison_direction = "GT"} From d9ec032105b20f9687a16cd72db73f7a66919654 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Thu, 6 Aug 2020 21:42:32 +0700 Subject: [PATCH 2250/2522] Add GCS Helper --- .../filesystem/plugins/gcs/gcs_filesystem.cc | 113 ++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc index b6b481cda66..039bc4fd236 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc @@ -19,6 +19,7 @@ limitations under the License. #include "absl/strings/numbers.h" #include "absl/strings/str_cat.h" +#include "absl/types/variant.h" #include "google/cloud/storage/client.h" #include "tensorflow/c/env.h" #include "tensorflow/c/experimental/filesystem/plugins/gcs/gcs_helper.h" @@ -663,6 +664,118 @@ void NewReadOnlyMemoryRegionFromFile(const TF_Filesystem* filesystem, } } +static void StatForObject(GCSFile* gcs_file, const std::string& path, + const std::string& bucket, const std::string& object, + GcsFileStat* stat, TF_Status* status) { + if (object.empty()) + return TF_SetStatus( + status, TF_INVALID_ARGUMENT, + ("'object' must be a non-empty string. (File: " + path + ")").c_str()); + TF_SetStatus(status, TF_OK, ""); + gcs_file->stat_cache->LookupOrCompute( + path, stat, + [gcs_file, bucket, object](const std::string& path, GcsFileStat* stat, + TF_Status* status) { + UncachedStatForObject(bucket, object, stat, &gcs_file->gcs_client, + status); + }, + status); +} + +static bool ObjectExists(GCSFile* gcs_file, const std::string& path, + const std::string& bucket, const std::string& object, + TF_Status* status) { + GcsFileStat stat; + StatForObject(gcs_file, path, bucket, object, &stat, status); + if (TF_GetCode(status) != TF_OK && TF_GetCode(status) != TF_NOT_FOUND) + return false; + if (TF_GetCode(status) == TF_NOT_FOUND) { + TF_SetStatus(status, TF_OK, ""); + return false; + } + return !stat.base.is_directory; +} + +static bool BucketExists(GCSFile* gcs_file, const std::string& bucket, + TF_Status* status) { + auto metadata = gcs_file->gcs_client.GetBucketMetadata(bucket); + TF_SetStatusFromGCSStatus(metadata.status(), status); + if (TF_GetCode(status) != TF_OK && TF_GetCode(status) != TF_NOT_FOUND) + return false; + if (TF_GetCode(status) == TF_NOT_FOUND) { + TF_SetStatus(status, TF_OK, ""); + return false; + } + return true; +} + +static std::vector GetChildrenBounded( + GCSFile* gcs_file, std::string dir, uint64_t max_results, bool recursive, + bool include_self_directory_marker, TF_Status* status) { + std::string bucket, prefix; + MaybeAppendSlash(&dir); + ParseGCSPath(dir, true, &bucket, &prefix, status); + + std::vector result; + uint64_t count = 0; + std::string delimiter = recursive ? "" : "/"; + + for (auto&& item : gcs_file->gcs_client.ListObjectsAndPrefixes( + bucket, gcs::Prefix(prefix), gcs::Delimiter(delimiter))) { + if (count == max_results) { + TF_SetStatus(status, TF_OK, ""); + return result; + } + if (!item) { + TF_SetStatusFromGCSStatus(item.status(), status); + return result; + } + auto value = *std::move(item); + std::string children = absl::holds_alternative(value) + ? absl::get(value) + : absl::get(value).name(); + auto pos = children.find(prefix); + if (pos != 0) { + TF_SetStatus(status, TF_INTERNAL, + ("Unexpected response: the returned file name " + children + + " doesn't match the prefix " + prefix) + .c_str()); + return result; + } + children.erase(0, prefix.length()); + if (!children.empty() || include_self_directory_marker) { + result.emplace_back(children); + } + ++count; + } +} + +static bool FolderExists(GCSFile* gcs_file, std::string dir, + TF_Status* status) { + ExpiringLRUCache::ComputeFunc compute_func = + [gcs_file](const std::string& dir, GcsFileStat* stat, TF_Status* status) { + auto children = + GetChildrenBounded(gcs_file, dir, 1, true, true, status); + if (TF_GetCode(status) != TF_OK) return; + if (!children.empty()) { + stat->base = {0, 0, true}; + return TF_SetStatus(status, TF_OK, ""); + } else { + return TF_SetStatus(status, TF_INVALID_ARGUMENT, "Not a directory!"); + } + }; + GcsFileStat stat; + MaybeAppendSlash(&dir); + gcs_file->stat_cache->LookupOrCompute(dir, &stat, compute_func, status); + if (TF_GetCode(status) != TF_OK && TF_GetCode(status) != TF_INVALID_ARGUMENT) + return false; + if (TF_GetCode(status) == TF_INVALID_ARGUMENT) { + TF_SetStatus(status, TF_OK, ""); + return false; + } + return true; +} + void CreateDir(const TF_Filesystem* filesystem, const char* path, TF_Status* status) { std::string bucket, object; From c4145955566a156695fe6af86a40b721691aa6dc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 6 Aug 2020 08:42:35 -0700 Subject: [PATCH 2251/2522] Integrate LLVM at llvm/llvm-project@633e3dacf27e Updates LLVM usage to match [633e3dacf27e](https://github.com/llvm/llvm-project/commit/633e3dacf27e) PiperOrigin-RevId: 325237772 Change-Id: Ia3ab5f289b3338216a9ea95322958e02a61774ce --- tensorflow/workspace.bzl | 4 ++-- third_party/mlir/BUILD | 1 - third_party/mlir/test.BUILD | 13 ------------- 3 files changed, 2 insertions(+), 16 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 15559f991d3..440aa1b23ec 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -712,8 +712,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "acb66b9111ba793509b5468a58107108317b7cf5" - LLVM_SHA256 = "0a8053f9b75d796b475b038502d80674c02fe89d02eb14da00bd9ec8f39e6c49" + LLVM_COMMIT = "633e3dacf27ea4950b7067803502490597ba96e0" + LLVM_SHA256 = "585299b33c32ea3a39b0cfb70e5dd431f3ab064d9f96baa4787693b3c66af21e" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD index 8c43ebe6359..872d04194f6 100644 --- a/third_party/mlir/BUILD +++ b/third_party/mlir/BUILD @@ -2837,7 +2837,6 @@ cc_library( ":Support", ":Translation", "@llvm-project//llvm:Support", - "@llvm-project//mlir/test:TestLLVMTypeTranslation", ], ) diff --git a/third_party/mlir/test.BUILD b/third_party/mlir/test.BUILD index 8e72e72773b..f507842a639 100644 --- a/third_party/mlir/test.BUILD +++ b/third_party/mlir/test.BUILD @@ -165,19 +165,6 @@ cc_library( ], ) -cc_library( - name = "TestLLVMTypeTranslation", - srcs = [ - "lib/Target/TestLLVMTypeTranslation.cpp", - ], - deps = [ - "@llvm-project//mlir:IR", - "@llvm-project//mlir:LLVMDialect", - "@llvm-project//mlir:LLVMIRModuleTranslation", - "@llvm-project//mlir:Translation", - ], -) - cc_library( name = "TestTransforms", srcs = glob(["lib/Transforms/*.cpp"]), From d029f2f799339bf89d8d5f2b03b829c6017ce31f Mon Sep 17 00:00:00 2001 From: Ayush Dubey Date: Thu, 6 Aug 2020 09:24:59 -0700 Subject: [PATCH 2252/2522] Reenable MWMS CTL correctness test with multiple GPUs. PiperOrigin-RevId: 325245429 Change-Id: Ib4b08c280ac35cdb85b33f6c2779d931eb963e72 --- tensorflow/python/keras/distribute/ctl_correctness_test.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tensorflow/python/keras/distribute/ctl_correctness_test.py b/tensorflow/python/keras/distribute/ctl_correctness_test.py index dcac3f37e71..3af3ee218c9 100644 --- a/tensorflow/python/keras/distribute/ctl_correctness_test.py +++ b/tensorflow/python/keras/distribute/ctl_correctness_test.py @@ -252,11 +252,6 @@ class TestDistributionStrategyDnnCorrectness(test.TestCase, if ('CollectiveAllReduce' in type(distribution).__name__ and test_util.is_xla_enabled()): self.skipTest('XLA tests fail with MWMS.') - # Unable to use required_gpus to check if this is a multiGPU combination - # since required_gpus and NamedDistribution cannot be used together. - if ('CollectiveAllReduce' in type(distribution).__name__ - and not inside_func and iteration_type == 'dataset'): - self.skipTest('MWMS tests fail with multiple GPUs.') self.dnn_correctness(distribution, optimizer_fn, iteration_type, inside_func, sync_batchnorm) From 2bdb7d5ba5743aaf114a688a8b7b1171a9f3275e Mon Sep 17 00:00:00 2001 From: Jonathan Chu Date: Thu, 6 Aug 2020 16:41:37 +0000 Subject: [PATCH 2253/2522] Quick fix to _convert_inputs_to_signature return val --- tensorflow/python/eager/function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index a833d351c84..2215987202a 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -2803,7 +2803,7 @@ def _convert_inputs_to_signature(inputs, input_signature, flat_input_signature): flat_sequence=flatten_inputs, expand_composites=True) - return inputs, flatten_inputs + return inputs, nest.flatten(inputs, expand_composites=True) class FunctionCache(object): From 696a4a76cebdd9150ede235a6edfd223a64e7129 Mon Sep 17 00:00:00 2001 From: Reed Wanderman-Milne Date: Thu, 6 Aug 2020 09:41:59 -0700 Subject: [PATCH 2254/2522] Add bfloat16 support to more cwise CPU ops PiperOrigin-RevId: 325248465 Change-Id: I68b3be2af4f9acedb76ab6077bf5dac9ac6eeb72 --- tensorflow/core/kernels/cwise_op_ceil.cc | 3 +- tensorflow/core/kernels/cwise_op_clip.cc | 2 + tensorflow/core/kernels/cwise_op_cos.cc | 4 +- tensorflow/core/kernels/cwise_op_cosh.cc | 4 +- tensorflow/core/kernels/cwise_op_exp.cc | 4 +- tensorflow/core/kernels/cwise_op_expm1.cc | 4 +- tensorflow/core/kernels/cwise_op_floor.cc | 3 +- tensorflow/core/kernels/cwise_op_floor_div.cc | 4 +- tensorflow/core/kernels/cwise_op_floor_mod.cc | 3 +- tensorflow/core/kernels/cwise_op_isfinite.cc | 4 +- tensorflow/core/kernels/cwise_op_isinf.cc | 3 +- tensorflow/core/kernels/cwise_op_log1p.cc | 4 +- tensorflow/core/kernels/cwise_op_pow.cc | 4 +- .../core/kernels/cwise_op_reciprocal.cc | 13 ++---- tensorflow/core/kernels/cwise_op_sign.cc | 4 +- tensorflow/core/kernels/cwise_op_sin.cc | 4 +- tensorflow/core/kernels/cwise_op_sinh.cc | 4 +- tensorflow/core/kernels/cwise_op_tan.cc | 4 +- tensorflow/core/kernels/cwise_op_tanh.cc | 8 ++-- .../python/kernel_tests/clip_ops_test.py | 4 ++ .../python/kernel_tests/cwise_ops_test.py | 44 ++++++++++++++----- .../kernel_tests/cwise_ops_unary_test.py | 36 +++++++++++---- tensorflow/python/ops/math_ops_test.py | 7 +++ 23 files changed, 112 insertions(+), 62 deletions(-) diff --git a/tensorflow/core/kernels/cwise_op_ceil.cc b/tensorflow/core/kernels/cwise_op_ceil.cc index 4b1847d758c..f8907ff1baa 100644 --- a/tensorflow/core/kernels/cwise_op_ceil.cc +++ b/tensorflow/core/kernels/cwise_op_ceil.cc @@ -16,7 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER3(UnaryOp, CPU, "Ceil", functor::ceil, float, Eigen::half, double); +REGISTER4(UnaryOp, CPU, "Ceil", functor::ceil, float, Eigen::half, bfloat16, + double); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "Ceil", functor::ceil, float, Eigen::half, double); diff --git a/tensorflow/core/kernels/cwise_op_clip.cc b/tensorflow/core/kernels/cwise_op_clip.cc index c0c71c5f638..3d43cf147b1 100644 --- a/tensorflow/core/kernels/cwise_op_clip.cc +++ b/tensorflow/core/kernels/cwise_op_clip.cc @@ -156,6 +156,7 @@ struct TernaryClipOp { INSTANTIATE_CPU(Eigen::half); INSTANTIATE_CPU(float); INSTANTIATE_CPU(double); +INSTANTIATE_CPU(bfloat16); INSTANTIATE_CPU(int8); INSTANTIATE_CPU(int16); INSTANTIATE_CPU(int32); @@ -173,6 +174,7 @@ INSTANTIATE_CPU(uint16); REGISTER_CPU_KERNEL(Eigen::half); REGISTER_CPU_KERNEL(float); REGISTER_CPU_KERNEL(double); +REGISTER_CPU_KERNEL(bfloat16); REGISTER_CPU_KERNEL(int8); REGISTER_CPU_KERNEL(int16); REGISTER_CPU_KERNEL(int32); diff --git a/tensorflow/core/kernels/cwise_op_cos.cc b/tensorflow/core/kernels/cwise_op_cos.cc index 7b434ce4294..3d406fe040a 100644 --- a/tensorflow/core/kernels/cwise_op_cos.cc +++ b/tensorflow/core/kernels/cwise_op_cos.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(UnaryOp, CPU, "Cos", functor::cos, float, Eigen::half, double, - complex64, complex128); +REGISTER6(UnaryOp, CPU, "Cos", functor::cos, float, Eigen::half, bfloat16, + double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "Cos", functor::cos, float, Eigen::half, double); diff --git a/tensorflow/core/kernels/cwise_op_cosh.cc b/tensorflow/core/kernels/cwise_op_cosh.cc index 3388df0096a..e6dff0ea317 100644 --- a/tensorflow/core/kernels/cwise_op_cosh.cc +++ b/tensorflow/core/kernels/cwise_op_cosh.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER4(UnaryOp, CPU, "Cosh", functor::cosh, float, double, complex64, - complex128); +REGISTER5(UnaryOp, CPU, "Cosh", functor::cosh, float, double, bfloat16, + complex64, complex128); #if TENSORFLOW_USE_SYCL #define REGISTER_SYCL_KERNEL(TYPE) \ diff --git a/tensorflow/core/kernels/cwise_op_exp.cc b/tensorflow/core/kernels/cwise_op_exp.cc index 2b157f0e7a9..d937dd0c06d 100644 --- a/tensorflow/core/kernels/cwise_op_exp.cc +++ b/tensorflow/core/kernels/cwise_op_exp.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(UnaryOp, CPU, "Exp", functor::exp, float, Eigen::half, double, - complex64, complex128); +REGISTER6(UnaryOp, CPU, "Exp", functor::exp, float, Eigen::half, bfloat16, + double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER5(UnaryOp, GPU, "Exp", functor::exp, float, Eigen::half, double, diff --git a/tensorflow/core/kernels/cwise_op_expm1.cc b/tensorflow/core/kernels/cwise_op_expm1.cc index 55fdc4763d3..0b145d83e5c 100644 --- a/tensorflow/core/kernels/cwise_op_expm1.cc +++ b/tensorflow/core/kernels/cwise_op_expm1.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(UnaryOp, CPU, "Expm1", functor::expm1, float, Eigen::half, double, - complex64, complex128); +REGISTER6(UnaryOp, CPU, "Expm1", functor::expm1, float, Eigen::half, bfloat16, + double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "Expm1", functor::expm1, float, Eigen::half, double); #endif diff --git a/tensorflow/core/kernels/cwise_op_floor.cc b/tensorflow/core/kernels/cwise_op_floor.cc index 25210a0fa51..1dbd9bf0634 100644 --- a/tensorflow/core/kernels/cwise_op_floor.cc +++ b/tensorflow/core/kernels/cwise_op_floor.cc @@ -16,7 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER3(UnaryOp, CPU, "Floor", functor::floor, float, Eigen::half, double); +REGISTER4(UnaryOp, CPU, "Floor", functor::floor, float, Eigen::half, bfloat16, + double); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "Floor", functor::floor, float, Eigen::half, double); diff --git a/tensorflow/core/kernels/cwise_op_floor_div.cc b/tensorflow/core/kernels/cwise_op_floor_div.cc index 11869e43eaa..d1f6d4c0652 100644 --- a/tensorflow/core/kernels/cwise_op_floor_div.cc +++ b/tensorflow/core/kernels/cwise_op_floor_div.cc @@ -18,8 +18,8 @@ limitations under the License. namespace tensorflow { REGISTER6(BinaryOp, CPU, "FloorDiv", functor::safe_floor_div, uint8, uint16, int8, int16, int32, int64); -REGISTER3(BinaryOp, CPU, "FloorDiv", functor::floor_div_real, float, - Eigen::half, double); +REGISTER4(BinaryOp, CPU, "FloorDiv", functor::floor_div_real, float, + Eigen::half, bfloat16, double); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER4(BinaryOp, GPU, "FloorDiv", functor::floor_div, uint8, uint16, int16, diff --git a/tensorflow/core/kernels/cwise_op_floor_mod.cc b/tensorflow/core/kernels/cwise_op_floor_mod.cc index 3305f54bcca..599ed1a9318 100644 --- a/tensorflow/core/kernels/cwise_op_floor_mod.cc +++ b/tensorflow/core/kernels/cwise_op_floor_mod.cc @@ -18,7 +18,8 @@ limitations under the License. namespace tensorflow { REGISTER3(BinaryOp, CPU, "FloorMod", functor::safe_floor_mod, int32, int64, uint64); -REGISTER2(BinaryOp, CPU, "FloorMod", functor::floor_fmod, float, double); +REGISTER3(BinaryOp, CPU, "FloorMod", functor::floor_fmod, bfloat16, float, + double); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM // A special GPU kernel for int32. diff --git a/tensorflow/core/kernels/cwise_op_isfinite.cc b/tensorflow/core/kernels/cwise_op_isfinite.cc index 061dc8367e2..42c7cbd4fd7 100644 --- a/tensorflow/core/kernels/cwise_op_isfinite.cc +++ b/tensorflow/core/kernels/cwise_op_isfinite.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER3(UnaryOp, CPU, "IsFinite", functor::isfinite, float, Eigen::half, - double); +REGISTER4(UnaryOp, CPU, "IsFinite", functor::isfinite, float, Eigen::half, + bfloat16, double); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "IsFinite", functor::isfinite, float, Eigen::half, diff --git a/tensorflow/core/kernels/cwise_op_isinf.cc b/tensorflow/core/kernels/cwise_op_isinf.cc index f87a24d2085..68141f4924a 100644 --- a/tensorflow/core/kernels/cwise_op_isinf.cc +++ b/tensorflow/core/kernels/cwise_op_isinf.cc @@ -16,7 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER3(UnaryOp, CPU, "IsInf", functor::isinf, float, Eigen::half, double); +REGISTER4(UnaryOp, CPU, "IsInf", functor::isinf, float, Eigen::half, bfloat16, + double); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "IsInf", functor::isinf, float, Eigen::half, double); diff --git a/tensorflow/core/kernels/cwise_op_log1p.cc b/tensorflow/core/kernels/cwise_op_log1p.cc index 06fc764fc75..88ddfd6af26 100644 --- a/tensorflow/core/kernels/cwise_op_log1p.cc +++ b/tensorflow/core/kernels/cwise_op_log1p.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(UnaryOp, CPU, "Log1p", functor::log1p, float, Eigen::half, double, - complex64, complex128); +REGISTER6(UnaryOp, CPU, "Log1p", functor::log1p, float, Eigen::half, bfloat16, + double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "Log1p", functor::log1p, float, Eigen::half, double); diff --git a/tensorflow/core/kernels/cwise_op_pow.cc b/tensorflow/core/kernels/cwise_op_pow.cc index 1b1d626aa57..214d083e11b 100644 --- a/tensorflow/core/kernels/cwise_op_pow.cc +++ b/tensorflow/core/kernels/cwise_op_pow.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(BinaryOp, CPU, "Pow", functor::pow, float, Eigen::half, double, - complex64, complex128); +REGISTER6(BinaryOp, CPU, "Pow", functor::pow, float, Eigen::half, bfloat16, + double, complex64, complex128); REGISTER2(BinaryOp, CPU, "Pow", functor::safe_pow, int32, int64); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM diff --git a/tensorflow/core/kernels/cwise_op_reciprocal.cc b/tensorflow/core/kernels/cwise_op_reciprocal.cc index 8e92691474a..4fe201e9c7b 100644 --- a/tensorflow/core/kernels/cwise_op_reciprocal.cc +++ b/tensorflow/core/kernels/cwise_op_reciprocal.cc @@ -30,15 +30,8 @@ REGISTER3(SimpleBinaryOp, GPU, "InvGrad", functor::inverse_grad, float, Eigen::half, double); #endif -#ifdef ENABLE_INTEL_MKL_BFLOAT16 -// Since Eigen backend does not support bfloat16 ops, we are selectively -// enabling them for MKL backend. REGISTER6(UnaryOp, CPU, "Reciprocal", functor::inverse, float, Eigen::half, - double, complex64, complex128, bfloat16); -#else -REGISTER5(UnaryOp, CPU, "Reciprocal", functor::inverse, float, Eigen::half, - double, complex64, complex128); -#endif // ENABLE_INTEL_MKL_BFLOAT16 + bfloat16, double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER4(UnaryOp, GPU, "Reciprocal", functor::inverse, float, Eigen::half, double, int64); @@ -47,8 +40,8 @@ REGISTER4(UnaryOp, GPU, "Reciprocal", functor::inverse, float, Eigen::half, REGISTER(UnaryOp, SYCL, "Reciprocal", functor::inverse, float); #endif // TENSORFLOW_USE_SYCL -REGISTER5(SimpleBinaryOp, CPU, "ReciprocalGrad", functor::inverse_grad, float, - Eigen::half, double, complex64, complex128); +REGISTER6(SimpleBinaryOp, CPU, "ReciprocalGrad", functor::inverse_grad, float, + Eigen::half, bfloat16, double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(SimpleBinaryOp, GPU, "ReciprocalGrad", functor::inverse_grad, float, Eigen::half, double); diff --git a/tensorflow/core/kernels/cwise_op_sign.cc b/tensorflow/core/kernels/cwise_op_sign.cc index 983cee4c944..200a56eb2d2 100644 --- a/tensorflow/core/kernels/cwise_op_sign.cc +++ b/tensorflow/core/kernels/cwise_op_sign.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER7(UnaryOp, CPU, "Sign", functor::sign, float, double, int32, int64, - complex64, Eigen::half, complex128); +REGISTER8(UnaryOp, CPU, "Sign", functor::sign, float, double, int32, int64, + complex64, Eigen::half, bfloat16, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER6(UnaryOp, GPU, "Sign", functor::sign, float, Eigen::half, double, int64, complex64, complex128); diff --git a/tensorflow/core/kernels/cwise_op_sin.cc b/tensorflow/core/kernels/cwise_op_sin.cc index ab6fb1ccd5e..f0fc2af7366 100644 --- a/tensorflow/core/kernels/cwise_op_sin.cc +++ b/tensorflow/core/kernels/cwise_op_sin.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(UnaryOp, CPU, "Sin", functor::sin, float, Eigen::half, double, - complex64, complex128); +REGISTER6(UnaryOp, CPU, "Sin", functor::sin, float, Eigen::half, bfloat16, + double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "Sin", functor::sin, float, Eigen::half, double); diff --git a/tensorflow/core/kernels/cwise_op_sinh.cc b/tensorflow/core/kernels/cwise_op_sinh.cc index 114a6142bdc..4448d2fef76 100644 --- a/tensorflow/core/kernels/cwise_op_sinh.cc +++ b/tensorflow/core/kernels/cwise_op_sinh.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER4(UnaryOp, CPU, "Sinh", functor::sinh, float, double, complex64, - complex128); +REGISTER5(UnaryOp, CPU, "Sinh", functor::sinh, float, double, bfloat16, + complex64, complex128); #if TENSORFLOW_USE_SYCL #define REGISTER_SYCL_KERNEL(TYPE) \ diff --git a/tensorflow/core/kernels/cwise_op_tan.cc b/tensorflow/core/kernels/cwise_op_tan.cc index d9793501a09..115531213ac 100644 --- a/tensorflow/core/kernels/cwise_op_tan.cc +++ b/tensorflow/core/kernels/cwise_op_tan.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(UnaryOp, CPU, "Tan", functor::tan, Eigen::half, float, double, - complex64, complex128); +REGISTER6(UnaryOp, CPU, "Tan", functor::tan, Eigen::half, bfloat16, float, + double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(UnaryOp, GPU, "Tan", functor::tan, Eigen::half, float, double); diff --git a/tensorflow/core/kernels/cwise_op_tanh.cc b/tensorflow/core/kernels/cwise_op_tanh.cc index 1b6da56e537..de56a5e3e03 100644 --- a/tensorflow/core/kernels/cwise_op_tanh.cc +++ b/tensorflow/core/kernels/cwise_op_tanh.cc @@ -17,8 +17,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_gradients.h" namespace tensorflow { -REGISTER5(UnaryOp, CPU, "Tanh", functor::tanh, float, Eigen::half, double, - complex64, complex128); +REGISTER6(UnaryOp, CPU, "Tanh", functor::tanh, float, Eigen::half, bfloat16, + double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #ifndef MLIR_GENERATED_GPU_KERNELS_ENABLED @@ -30,8 +30,8 @@ REGISTER3(UnaryOp, GPU, "Tanh", functor::tanh, float, Eigen::half, double); REGISTER2(UnaryOp, SYCL, "Tanh", functor::tanh, float, double); #endif // TENSORFLOW_USE_SYCL -REGISTER5(SimpleBinaryOp, CPU, "TanhGrad", functor::tanh_grad, float, - Eigen::half, double, complex64, complex128); +REGISTER6(SimpleBinaryOp, CPU, "TanhGrad", functor::tanh_grad, float, + Eigen::half, bfloat16, double, complex64, complex128); #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM REGISTER3(SimpleBinaryOp, GPU, "TanhGrad", functor::tanh_grad, float, Eigen::half, double); diff --git a/tensorflow/python/kernel_tests/clip_ops_test.py b/tensorflow/python/kernel_tests/clip_ops_test.py index 8d6b475c914..d0c805f96e3 100644 --- a/tensorflow/python/kernel_tests/clip_ops_test.py +++ b/tensorflow/python/kernel_tests/clip_ops_test.py @@ -67,6 +67,7 @@ class ClipTest(test.TestCase): dtypes.float16, dtypes.float32, dtypes.float64, + dtypes.bfloat16, dtypes.int16, dtypes.int32, dtypes.int64, @@ -88,6 +89,7 @@ class ClipTest(test.TestCase): dtypes.float16, dtypes.float32, dtypes.float64, + dtypes.bfloat16, dtypes.int16, dtypes.int32, dtypes.int64, @@ -110,6 +112,7 @@ class ClipTest(test.TestCase): dtypes.float16, dtypes.float32, dtypes.float64, + dtypes.bfloat16, dtypes.int16, dtypes.int32, dtypes.int64, @@ -132,6 +135,7 @@ class ClipTest(test.TestCase): dtypes.float16, dtypes.float32, dtypes.float64, + dtypes.bfloat16, dtypes.int16, dtypes.int32, dtypes.int64, diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py index 78d3af17990..8d628d448db 100644 --- a/tensorflow/python/kernel_tests/cwise_ops_test.py +++ b/tensorflow/python/kernel_tests/cwise_ops_test.py @@ -841,6 +841,9 @@ class MathOpsOverloadTest(test.TestCase): def _compareBinary(self, x, y, dtype, np_func, tf_func): np_ans = np_func(x, y).astype(dtype.as_numpy_dtype) + if dtype == dtypes_lib.bfloat16: + # assertAllClose does not properly handle bfloat16 values + np_ans = np_ans.astype(np.float32) self.assertAllClose(np_ans, self._computeTensorAndLiteral(x, y, dtype, tf_func)) self.assertAllClose(np_ans, @@ -857,6 +860,7 @@ class MathOpsOverloadTest(test.TestCase): dtypes_lib.float16, dtypes_lib.float32, dtypes_lib.float64, + dtypes_lib.bfloat16, dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.complex64, @@ -920,12 +924,16 @@ class MathOpsOverloadTest(test.TestCase): class IsFiniteInfNanTest(test.TestCase): def _compare(self, x, use_gpu): - np_finite, np_inf, np_nan = np.isfinite(x), np.isinf(x), np.isnan(x) with test_util.device(use_gpu=use_gpu): inx = ops.convert_to_tensor(x) ofinite, oinf, onan = math_ops.is_finite(inx), math_ops.is_inf( inx), math_ops.is_nan(inx) tf_finite, tf_inf, tf_nan = self.evaluate([ofinite, oinf, onan]) + if x.dtype == dtypes_lib.bfloat16.as_numpy_dtype: + # Numpy will implicitly convert bfloat16 value to float16, so we cast to + # float32 to avoid this. + x = x.astype(np.float32) + np_finite, np_inf, np_nan = np.isfinite(x), np.isinf(x), np.isnan(x) self.assertAllEqual(np_inf, tf_inf) self.assertAllEqual(np_nan, tf_nan) self.assertAllEqual(np_finite, tf_finite) @@ -934,11 +942,18 @@ class IsFiniteInfNanTest(test.TestCase): self.assertShapeEqual(np_finite, ofinite) def _testDtype(self, dtype): - fi = np.finfo(dtype) - data = np.array([ - 0, -1, 1, fi.resolution, -fi.resolution, fi.min, fi.max, -np.inf, - np.inf, np.nan - ]).astype(dtype) + if dtype != dtypes_lib.bfloat16.as_numpy_dtype: + fi = np.finfo(dtype) + data = np.array([ + 0, -1, 1, fi.resolution, -fi.resolution, fi.min, fi.max, -np.inf, + np.inf, np.nan + ]).astype(dtype) + else: + # np.finfo does not support bfloat16 + data = np.array([ + 0, -1, 1, 0.01, -0.01, -3.3895e+38, 3.3895e+38, -np.inf, np.inf, + np.nan + ]).astype(dtype) self._compare(data, use_gpu=False) self._compare(data, use_gpu=True) @@ -951,6 +966,9 @@ class IsFiniteInfNanTest(test.TestCase): def testDouble(self): self._testDtype(np.float64) + def testBfloat16(self): + self._testDtype(dtypes_lib.bfloat16.as_numpy_dtype) + def testSqrt(self): for dtype in [np.float16, np.float32, np.float64]: fi = np.finfo(dtype) @@ -998,8 +1016,8 @@ class RoundingTest(test.TestCase): def _testDtype(self, dtype): data = (np.arange(-3, 3) / 4.).reshape(1, 3, 2).astype(dtype) self._compare(data) - # TODO: rint op is not supported for float16 - if dtype is np.float16: + # TODO(reedwm): rint op is not supported for float16 and bfloat16 + if dtype in (np.float16, dtypes_lib.bfloat16.as_numpy_dtype): return self._compare_values(data) x = [0.5, 0.5000001] @@ -1012,10 +1030,12 @@ class RoundingTest(test.TestCase): self._compare_values(x, y=y) def testTypes(self): - self.skipTest("b/131162241") - for dtype in [np.float16, np.float32, np.float64]: - with self.subTest(dtype=dtype): - self._testDtype(dtype) + # TODO(b/131162241): Enable test for GPU + with ops.device("/CPU:0"): + for dtype in [np.float16, np.float32, np.float64, + dtypes_lib.bfloat16.as_numpy_dtype]: + with self.subTest(dtype=dtype): + self._testDtype(dtype) class ComplexMakeRealImagTest(test.TestCase): diff --git a/tensorflow/python/kernel_tests/cwise_ops_unary_test.py b/tensorflow/python/kernel_tests/cwise_ops_unary_test.py index df848a653d4..9d46ed35639 100644 --- a/tensorflow/python/kernel_tests/cwise_ops_unary_test.py +++ b/tensorflow/python/kernel_tests/cwise_ops_unary_test.py @@ -61,6 +61,8 @@ def _default_tolerance(dtype): Args: dtype: A datatype. """ + if dtype == dtypes_lib.bfloat16.as_numpy_dtype: + return 5e-3 if dtype == np.float16: return 5e-3 elif dtype in (np.float32, np.complex64): @@ -81,12 +83,7 @@ class UnaryOpTest(test.TestCase): np_ans = np_func(x) with self.cached_session(use_gpu=False): inx = ops.convert_to_tensor(x) - if x.dtype in (np.float32, np.float64, - dtypes_lib.bfloat16.as_numpy_dtype): - y = 1.1 * tf_func(inx) - np_ans *= 1.1 - else: - y = tf_func(inx) + y = tf_func(inx) tf_cpu = self.evaluate(y) self.assertShapeEqual(np_ans, y) if x.dtype == np.float16: @@ -99,7 +96,7 @@ class UnaryOpTest(test.TestCase): if x.dtype in (np.complex64, np.complex128) and tf_func == math_ops.sign: return # Return early - if x.dtype == np.float16: + if x.dtype in (np.float16, dtypes_lib.bfloat16.as_numpy_dtype): s = list(np.shape(x)) jacob_t, _ = gradient_checker.compute_gradient( inx, s, y, s, x_init_value=x) @@ -108,7 +105,7 @@ class UnaryOpTest(test.TestCase): yf = tf_func(inxf) _, jacob_n = gradient_checker.compute_gradient( inxf, s, yf, s, x_init_value=xf, delta=1e-2) - jacob_n = jacob_n.astype(np.float16) + jacob_n = jacob_n.astype(x.dtype) self.assertAllClose(jacob_t, jacob_n, rtol=grad_rtol, atol=grad_atol) elif x.dtype in (np.float32, np.complex64): s = list(np.shape(x)) @@ -384,13 +381,36 @@ class UnaryOpTest(test.TestCase): self._compareBothSparse(y, np.sign, math_ops.sign) self._compareBothSparse(x, np.vectorize(math.erf), math_ops.erf, tol=1e-3) + @test_util.run_deprecated_v1 def testBFloat16Basic(self): + def compute_f32(np_func): + """Decorator to compute Numpy function with float32 math.""" + def f(x): + y = np_func(x.astype(np.float32)) + return y.astype(x.dtype) + return f + + bfloat16 = dtypes_lib.bfloat16.as_numpy_dtype x = np.arange(-6, 6, 2).reshape(1, 3, 2).astype(dtypes_lib.bfloat16.as_numpy_dtype) + y = (x + .5).astype(bfloat16) # no zero + z = (x + 15.5).astype(bfloat16) # all positive self._compareCpu(x, np.abs, math_ops.abs) self._compareCpu(x, np.abs, _ABS) self._compareBoth(x, np.negative, math_ops.negative) self._compareBoth(x, np.negative, _NEG) + self._compareCpu(y, compute_f32(self._inv), math_ops.reciprocal) + self._compareCpu(x, np.exp, math_ops.exp) + self._compareCpu(x, np.expm1, math_ops.expm1) + self._compareCpu(z, compute_f32(np.log), math_ops.log) + self._compareCpu(z, compute_f32(np.log1p), math_ops.log1p) + self._compareCpu(y, np.sign, math_ops.sign) + self._compareBoth(x, compute_f32(np.sin), math_ops.sin) + self._compareBoth(x, compute_f32(np.cos), math_ops.cos) + self._compareBoth(x, compute_f32(np.tan), math_ops.tan) + self._compareBoth(x, compute_f32(np.sinh), math_ops.sinh) + self._compareBoth(x, compute_f32(np.cosh), math_ops.cosh) + self._compareBoth(x, compute_f32(np.tanh), math_ops.tanh) def testInt8Basic(self): x = np.arange(-6, 6, 2).reshape(1, 3, 2).astype(np.int8) diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index 296395d034f..dabf4bb9d33 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -476,6 +476,13 @@ class DivAndModTest(test_util.TensorFlowTestCase): # % array_ops.constant(divs)) # self.assertAllEqual(tf2_result, tf_result) + def testFloorModBfloat64(self): + nums, divs = self.floatTestData() + tf_result = math_ops.floormod(math_ops.cast(nums, dtypes.bfloat16), + math_ops.cast(divs, dtypes.bfloat16)) + np_result = nums % divs + self.assertAllEqual(tf_result, np_result) + def testTruncateModInt(self): nums, divs = self.intTestData() tf_result = math_ops.truncatemod(nums, divs) From 8fd0ab8e6c3e18e63bdba5673e1b85e5025ee58d Mon Sep 17 00:00:00 2001 From: Berkin Ilbeyi Date: Thu, 6 Aug 2020 09:47:35 -0700 Subject: [PATCH 2255/2522] [XLA] Reserve for all pinned buffers first them before starting allocations. We were previously reserving for pinned buffers in alternate memory as we were allocating. This meant that we could discover a pinned buffer after we have other buffers, possibly going beyond the size of the alternate memory. Now we split the logic to reserve buffers from the rest of allocations so that doesn't happen. PiperOrigin-RevId: 325249612 Change-Id: I9fa36b832d86701cdbcb61a808fad763cf664634 --- .../xla/service/memory_space_assignment.cc | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.cc b/tensorflow/compiler/xla/service/memory_space_assignment.cc index b122fc22e20..377c84eaf6b 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment.cc +++ b/tensorflow/compiler/xla/service/memory_space_assignment.cc @@ -971,6 +971,16 @@ HeapSimulator::Result AlternateMemoryBestFitHeap::Finish() { } } + for (const auto& interval : sorted_buffer_intervals) { + auto colocated_intervals = GetSortedColocatedIntervals(interval); + if (AreIntervalsReservedInAlternateMemory(colocated_intervals)) { + // Increment the reserved part of alternate memory so that it is not + // available for other buffers. + reserved_in_bytes_ += options_.size_fn(*interval.buffer); + } + } + VLOG(2) << "Total reserved bytes = " << reserved_in_bytes_; + for (auto& interval : sorted_buffer_intervals) { if (!interval.need_allocation) { continue; @@ -998,8 +1008,7 @@ HeapSimulator::Result AlternateMemoryBestFitHeap::Finish() { if (AreIntervalsReservedInAlternateMemory(colocated_intervals)) { VLOG(3) << "Interval " << interval.buffer->ToShortString() - << " is reserved in the alternate memory. Total reserved bytes = " - << reserved_in_bytes_; + << " is reserved in the alternate memory."; for (const BufferInterval* colocated_interval : colocated_intervals) { const HloValue* value = colocated_interval->buffer; // Color all of the aliased reserved buffers here because reserved @@ -1015,10 +1024,6 @@ HeapSimulator::Result AlternateMemoryBestFitHeap::Finish() { options_.alternate_memory_space); } } - // Increment the reserved part of alternate memory so that it is not - // available for other buffers. Since all colocated intervals should have - // the same size, just use the first one. - reserved_in_bytes_ += options_.size_fn(*colocated_intervals[0]->buffer); continue; } From e29b09b91c5b7d8585b6c26fc02205eb70cb816b Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Thu, 6 Aug 2020 09:03:03 -0700 Subject: [PATCH 2256/2522] Utilize TensorFormat --- .../compiler/mlir/tensorflow/ir/tf_ops_a_m.cc | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc index 5c19f9c3daa..39dae59ecb4 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc @@ -339,15 +339,18 @@ void BatchToSpaceOp::getCanonicalizationPatterns( // are not unknown. // static LogicalResult Verify(BiasAddOp op) { - StringRef format = op.data_format(); - if (format == "NHWC") { + std::string data_format = op.data_format().str(); + tensorflow::TensorFormat format; + bool is_valid = FormatFromString(data_format, &format); + DCHECK(is_valid) << data_format; + if (format == tensorflow::TensorFormat::FORMAT_NHWC) { if (!HasRankAtLeast(op.value(), 2)) return op.emitOpError( "requires value operand to have rank at least two with `NHWC` data " "format"); } else { // Op definition requires data_format to be either NHWC or NCHW. - DCHECK_EQ(format.str(), "NCHW"); + DCHECK_EQ(format, tensorflow::TensorFormat::FORMAT_NCHW); if (!HasRankAtLeast(op.value(), 3)) return op.emitOpError( "requires value operand to have rank at least three with `NCHW` data " @@ -361,9 +364,8 @@ static LogicalResult Verify(BiasAddOp op) { RankedTensorType bias_ty = op.bias().getType().dyn_cast(); if (!bias_ty || !value_ty) return success(); - // TODO(hinsu): Leverage tensor_format.h utility in TensorFlow to compute - // dimension indices based on format. - int64_t feature_dim_idx = format == "NHWC" ? value_ty.getRank() - 1 : 1; + int64_t feature_dim_idx = + tensorflow::GetTensorFeatureDimIndex(value_ty.getRank(), format); int64_t feature_dim = value_ty.getDimSize(feature_dim_idx); int64_t bias_len = bias_ty.getDimSize(0); if (feature_dim != -1 && bias_len != -1 && feature_dim != bias_len) { @@ -383,15 +385,18 @@ static LogicalResult Verify(BiasAddOp op) { // * the out_backprop operands have valid ranks or are unranked. // static LogicalResult Verify(BiasAddGradOp op) { - StringRef format = op.data_format(); - if (format == "NHWC") { + std::string data_format = op.data_format().str(); + tensorflow::TensorFormat format; + bool is_valid = FormatFromString(data_format, &format); + DCHECK(is_valid) << data_format; + if (format == tensorflow::TensorFormat::FORMAT_NHWC) { if (!HasRankAtLeast(op.out_backprop(), 2)) return op.emitOpError( "requires out_backprop operand to have rank at least two with `NHWC` " "data format"); } else { // Op definition requires data_format to be either NHWC or NCHW. - DCHECK_EQ(format.str(), "NCHW"); + DCHECK_EQ(format, tensorflow::TensorFormat::FORMAT_NCHW); if (!HasRankAtLeast(op.out_backprop(), 3)) return op.emitOpError( "requires out_backprop operand to have rank at least three with " From 19806267f85275a395e7a9cd80d5be23d0c70a74 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 6 Aug 2020 10:09:15 -0700 Subject: [PATCH 2257/2522] Set name of layer to node name when converting activation and FusedBatchNorm ops Also fix a potential crash if creating the layer fails. PiperOrigin-RevId: 325254289 Change-Id: I8d38bf11880b8eb4fe92512977ebc304ed202acc --- tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index ae7f4d0d99f..f80c0f42eca 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -3783,6 +3783,7 @@ Status ConvertActivation(OpConverterParams* params) { params->converter->network()->addActivation(*inputs.at(0).tensor(), op_pair->second); TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); + layer->setName(node_def.name().c_str()); // Set parameters. #if IS_TRT_VERSION_GE(5, 1, 2, 0) if (node_def.op() == "Elu") { @@ -3883,9 +3884,10 @@ Status ConvertRelu6(OpConverterParams* params) { nvinfer1::IActivationLayer* layer = params->converter->network()->addActivation( *inputs.at(0).tensor(), nvinfer1::ActivationType::kCLIP); + TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); layer->setAlpha(0.0f); layer->setBeta(6.0f); - TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); + layer->setName(node_def.name().c_str()); nvinfer1::ITensor* output_tensor = layer->getOutput(0); params->converter->ProvideQuantizationRange(output_tensor, 0.0f, 6.0f); params->outputs->push_back(TRT_TensorOrWeights(output_tensor)); @@ -4441,6 +4443,7 @@ Status ConvertUnary(OpConverterParams* params) { nvinfer1::IUnaryLayer* layer = params->converter->network()->addUnary(*tensor, op_pair->second); TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); + layer->setName(node_def.name().c_str()); nvinfer1::ITensor* output_tensor = layer->getOutput(0); // Set quantization ranges. @@ -5089,6 +5092,7 @@ Status ConvertFusedBatchNorm(OpConverterParams* params) { combined_scale_weights.GetTrtWeights(), dummy_power_weights.GetTrtWeights()); TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); + layer->setName(node_def.name().c_str()); nvinfer1::ITensor* output_tensor = layer->getOutput(0); params->outputs->push_back(TRT_TensorOrWeights(output_tensor)); return Status::OK(); From e4ddbcc7400df6e5dd92e477c929401e671caee3 Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Thu, 6 Aug 2020 10:20:33 -0700 Subject: [PATCH 2258/2522] PSv2: Remove the logging in CoordinatedClosureQueueTest as it's still failing windows py38 test. PiperOrigin-RevId: 325256788 Change-Id: I02e2b0fd83b3a4d1e15ab4e932e3bbc72263593c --- tensorflow/python/distribute/client/client_test.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/python/distribute/client/client_test.py b/tensorflow/python/distribute/client/client_test.py index 19deab26f63..cf24f8c17ce 100644 --- a/tensorflow/python/distribute/client/client_test.py +++ b/tensorflow/python/distribute/client/client_test.py @@ -80,10 +80,8 @@ class CoordinatedClosureQueueTest(test.TestCase): def get_func(label): def func(): - logging.info('Label: ' + label + ', before waiting 3 sec') # pylint: disable=logging-not-lazy time.sleep(3) processed_count[label] += 1 - logging.info('Label: ' + label + ', after waiting 3 sec') # pylint: disable=logging-not-lazy return func From 4ead0a60102fa22d57e858026de6aaa5e94cffb0 Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Thu, 6 Aug 2020 13:36:12 -0400 Subject: [PATCH 2259/2522] Update README.md. --- .../keras/benchmarks/keras_examples_benchmarks/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/README.md b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/README.md index 202037fef31..3c34dbc68ab 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/README.md +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/README.md @@ -22,7 +22,7 @@ ## Keras benchmarks -These are benchmark tests running on keras models: models from [keras/examples](https://github.com/keras-team/keras/tree/master/examples). Benchmarks in the current folder (`tensorflow/python/keras/benchmarks/keras_examples_benchmarks`) use Keras [built-in dataset](https://keras.io/api/datasets/) or synthetic data. In addition, these benchmarks support different [distribution strategies](https://www.tensorflow.org/guide/distributed_training) on multiple GPUs. +These are benchmark tests running on keras models: models from [keras/examples](https://github.com/keras-team/keras/tree/master/examples). Benchmarks in the current folder (`tensorflow/python/keras/benchmarks/keras_examples_benchmarks`) use Keras [built-in dataset](https://keras.io/api/datasets/). In addition, these benchmarks support different [distribution strategies](https://www.tensorflow.org/guide/distributed_training) on multiple GPUs. ### Available models @@ -62,8 +62,8 @@ Metrics for following benchmarks:
    - Batch_size: Number of samples per batch of computation.
    - Wall_time: Total time to run benchmark test in seconds.
    - Avg_epoch_time: Average time for each epoch.
    -- Exp_per_sec: The number of examples that model processed for each second.
    -- Distribution_Strategy: [Distribution strategies](https://www.tensorflow.org/guide/distributed_training).
    +- Exp_per_sec: Examples per second. The number of examples processed in one second.
    +- Distribution_Strategy: The [distribution strategies](https://www.tensorflow.org/guide/distributed_training) used in the benchmark.
    #### Cifar10 CNN benchmark @@ -181,7 +181,7 @@ To add a new benchmark, please take the following steps: 3. Create class which inherits from `tf.test.Benchmark` 4. Define and load dataset in `__init__` method. 5. Design and create a model in `_build_model` method. -6. Define the `benchmark_xxx` method and it will pass benchmark related hyper parameters, which includes `batch_size`, `run_iters`, `train_data` and etc. You can check examples from [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks). +6. Define the benchmark_xxx method to measure the performance of benchmarks with different hyper parameters, such as `batch_size`, `run_iters`, `distribution_strategy` and etc. You can check examples from [here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py#L60). 7. Add the benchmark target to the [BUILD](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/benchmarks/BUILD) file. ## Troubleshooting From a80ee49dd073cbdfb271750b2aaee66c7e152bb6 Mon Sep 17 00:00:00 2001 From: Ce Zheng Date: Thu, 6 Aug 2020 10:22:30 -0700 Subject: [PATCH 2260/2522] Reduce a redundant shallow copy of tensorflow::Tensor in TF_TensorFromTensor. PiperOrigin-RevId: 325257291 Change-Id: Id0ae7304ea3a98901eb340445c12b237fe4672a6 --- tensorflow/c/tf_tensor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/c/tf_tensor.cc b/tensorflow/c/tf_tensor.cc index 0feb986ce44..39d2683226f 100644 --- a/tensorflow/c/tf_tensor.cc +++ b/tensorflow/c/tf_tensor.cc @@ -288,7 +288,7 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src, Status* status) { if (!tensor.CopyFrom(src, src.shape())) { return nullptr; } - return new TF_Tensor{new tensorflow::TensorInterface(tensor)}; + return new TF_Tensor{new tensorflow::TensorInterface(std::move(tensor))}; } Status TF_TensorToTensor(const TF_Tensor* src, Tensor* dst) { From d7df643e4c9e532ef34a36cba699287b0a21b095 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Thu, 6 Aug 2020 10:44:02 -0700 Subject: [PATCH 2261/2522] Fixed the Windows build for TF Scala. --- tensorflow/BUILD | 24 ++++++++++++ .../core/util/tensor_bundle/tensor_bundle.cc | 26 ++++++------- .../core/util/tensor_bundle/tensor_bundle.h | 38 +++++++++---------- .../def_file_filter/def_file_filter.py.tpl | 3 ++ 4 files changed, 59 insertions(+), 32 deletions(-) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index d1c1d7dcdef..8a6c1048078 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -882,6 +882,30 @@ genrule( visibility = ["//visibility:public"], ) +# The interface library (tensorflow_framework.dll.if.lib) for linking tensorflow DLL library +# (tensorflow_framework.dll) on Windows. +# To learn more about import library (called interface library in Bazel): +# https://docs.microsoft.com/en-us/cpp/build/linking-an-executable-to-a-dll?view=vs-2017#linking-implicitly +filegroup( + name = "get_tensorflow_framework_dll_import_lib", + srcs = ["//tensorflow:tensorflow_framework.dll"], + output_group = "interface_library", + visibility = ["//visibility:public"], +) + +# Rename the import library for tensorflow_framework.dll from tensorflow_framework.dll.if.lib to +# tensorflow_framework.lib +genrule( + name = "tensorflow_framework_dll_import_lib", + srcs = [":get_tensorflow_framework_dll_import_lib"], + outs = ["tensorflow_framework.lib"], + cmd = select({ + "//tensorflow:windows": "cp -f $< $@", + "//conditions:default": "touch $@", # Just a placeholder for Unix platforms + }), + visibility = ["//visibility:public"], +) + # The interface library (tensorflow_cc.dll.if.lib) for linking tensorflow DLL library (tensorflow_cc.dll) on Windows. # To learn more about import library (called interface library in Bazel): # https://docs.microsoft.com/en-us/cpp/build/linking-an-executable-to-a-dll?view=vs-2017#linking-implicitly diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc index bb18000fcfe..be9648a7c78 100644 --- a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc +++ b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc @@ -741,7 +741,7 @@ Status MergeBundles(Env* env, gtl::ArraySlice prefixes, // Interface for reading a tensor bundle. -BundleReader::BundleReader(Env* env, StringPiece prefix) +TF_EXPORT BundleReader::BundleReader(Env* env, StringPiece prefix) : env_(env), prefix_(prefix), metadata_(nullptr), @@ -796,7 +796,7 @@ BundleReader::BundleReader(Env* env, StringPiece prefix) kTensorBundleMinProducer, "Checkpoint", "checkpoint"); } -BundleReader::~BundleReader() { +TF_EXPORT BundleReader::~BundleReader() { delete metadata_; delete iter_; delete table_; @@ -936,7 +936,7 @@ Status BundleReader::GetValue(const BundleEntryProto& entry, Tensor* val) { return Status::OK(); } -Status BundleReader::Lookup(StringPiece key, Tensor* val) { +TF_EXPORT Status BundleReader::Lookup(StringPiece key, Tensor* val) { CHECK(val != nullptr); BundleEntryProto entry; TF_RETURN_IF_ERROR(GetBundleEntryProto(key, &entry)); @@ -950,7 +950,7 @@ Status BundleReader::Lookup(StringPiece key, Tensor* val) { } } -Status BundleReader::ReadCurrent(Tensor* val) { +TF_EXPORT Status BundleReader::ReadCurrent(Tensor* val) { CHECK(val != nullptr); BundleEntryProto entry; TF_RETURN_IF_ERROR(ParseEntryProto(iter_->key(), iter_->value(), &entry)); @@ -968,8 +968,8 @@ Status BundleReader::ReadCurrent(Tensor* val) { } } -Status BundleReader::LookupTensorSlices(StringPiece key, - std::vector* slices) { +TF_EXPORT Status BundleReader::LookupTensorSlices(StringPiece key, + std::vector* slices) { slices->clear(); BundleEntryProto entry; TF_RETURN_IF_ERROR(GetBundleEntryProto(key, &entry)); @@ -980,8 +980,8 @@ Status BundleReader::LookupTensorSlices(StringPiece key, return Status::OK(); } -Status BundleReader::LookupSlice(StringPiece full_tensor_key, - const TensorSlice& slice_spec, Tensor* val) { +TF_EXPORT Status BundleReader::LookupSlice(StringPiece full_tensor_key, + const TensorSlice& slice_spec, Tensor* val) { CHECK(val != nullptr); BundleEntryProto entry; TF_RETURN_IF_ERROR(GetBundleEntryProto(full_tensor_key, &entry)); @@ -1103,13 +1103,13 @@ Status BundleReader::GetSliceValue(StringPiece full_tensor_key, return Status::OK(); } -bool BundleReader::Contains(StringPiece key) { +TF_EXPORT bool BundleReader::Contains(StringPiece key) { Seek(key); return Valid() && (this->key() == key); } -Status BundleReader::LookupDtypeAndShape(StringPiece key, DataType* dtype, - TensorShape* shape) { +TF_EXPORT Status BundleReader::LookupDtypeAndShape(StringPiece key, DataType* dtype, + TensorShape* shape) { BundleEntryProto entry; TF_RETURN_IF_ERROR(GetBundleEntryProto(key, &entry)); *dtype = entry.dtype(); @@ -1117,12 +1117,12 @@ Status BundleReader::LookupDtypeAndShape(StringPiece key, DataType* dtype, return Status::OK(); } -Status BundleReader::LookupTensorShape(StringPiece key, TensorShape* shape) { +TF_EXPORT Status BundleReader::LookupTensorShape(StringPiece key, TensorShape* shape) { DataType ignored; return LookupDtypeAndShape(key, &ignored, shape); } -string BundleReader::DebugString() { +TF_EXPORT string BundleReader::DebugString() { // Format used below emulates that of TensorSliceReader::DebugString(). string shape_str; BundleEntryProto entry; diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle.h b/tensorflow/core/util/tensor_bundle/tensor_bundle.h index c441000e47d..4c35ba17f01 100644 --- a/tensorflow/core/util/tensor_bundle/tensor_bundle.h +++ b/tensorflow/core/util/tensor_bundle/tensor_bundle.h @@ -182,28 +182,28 @@ Status MergeBundles(Env* env, gtl::ArraySlice prefixes, // All threads accessing the same BundleReader must synchronize. class BundleReader { public: - BundleReader(Env* const env, StringPiece prefix); - ~BundleReader(); + TF_EXPORT BundleReader(Env* const env, StringPiece prefix); + TF_EXPORT ~BundleReader(); // Is ok() iff the reader construction is successful (completed the read of // the metadata). - Status status() const { return status_; } + TF_EXPORT Status status() const { return status_; } // Queries whether the bundle contains an entry keyed by "key". Calls Seek() // internally, so this call invalidates the reader's current position. // REQUIRES: status().ok() - bool Contains(StringPiece key); + TF_EXPORT bool Contains(StringPiece key); // Looks up the dtype and the shape of the tensor keyed by "key". // REQUIRES: status().ok() - Status LookupDtypeAndShape(StringPiece key, DataType* dtype, - TensorShape* shape) TF_MUST_USE_RESULT; + TF_EXPORT Status LookupDtypeAndShape(StringPiece key, DataType* dtype, + TensorShape* shape) TF_MUST_USE_RESULT; // Looks up the shape of the tensor keyed by "key". // Clears "shape" if not found. // REQUIRES: status().ok() - Status LookupTensorShape(StringPiece key, - TensorShape* shape) TF_MUST_USE_RESULT; + TF_EXPORT Status LookupTensorShape(StringPiece key, + TensorShape* shape) TF_MUST_USE_RESULT; // Looks up the tensor keyed by "key". If "key" refers to a partitioned // tensor, attempts to look up the full contents using all stored slices. @@ -217,7 +217,7 @@ class BundleReader { // // Validates the stored crc32c checksum against the restored bytes. // REQUIRES: status().ok() - Status Lookup(StringPiece key, Tensor* val) TF_MUST_USE_RESULT; + TF_EXPORT Status Lookup(StringPiece key, Tensor* val) TF_MUST_USE_RESULT; // Looks up the tensor pointed to by the internal iterator. // @@ -225,7 +225,7 @@ class BundleReader { // // Validates the stored crc32c checksum against the restored bytes. // REQUIRES: status().ok() && Valid() - Status ReadCurrent(Tensor* val) TF_MUST_USE_RESULT; + TF_EXPORT Status ReadCurrent(Tensor* val) TF_MUST_USE_RESULT; // Looks up the slices of the tensor keyed by "key". On OK, "slices" // is non-empty if and only if the tensor is a partitioned tensor. @@ -234,34 +234,34 @@ class BundleReader { // a slice with a larger start index in some dimension could come before // another slice with a smaller start index in the same dimension. // REQUIRES: status().ok() - Status LookupTensorSlices(StringPiece key, std::vector* slices) + TF_EXPORT Status LookupTensorSlices(StringPiece key, std::vector* slices) TF_MUST_USE_RESULT; // Looks up a specific slice of a partitioned tensor. // It is only required that the stored slices cover the requested slice, // namely "slice_spec" is a subset of the union of the stored slices. // REQUIRES: status().ok() - Status LookupSlice(StringPiece full_tensor_key, const TensorSlice& slice_spec, - Tensor* val) TF_MUST_USE_RESULT; + TF_EXPORT Status LookupSlice(StringPiece full_tensor_key, const TensorSlice& slice_spec, + Tensor* val) TF_MUST_USE_RESULT; // Seeks to the first position in the bundle whose key is no less than "key". // REQUIRES: status().ok() - void Seek(StringPiece key) { return iter_->Seek(key); } + TF_EXPORT void Seek(StringPiece key) { return iter_->Seek(key); } // Moves to the next position in the bundle. // REQUIRES: status().ok() - void Next() const { iter_->Next(); } + TF_EXPORT void Next() const { iter_->Next(); } // Returns true iff the reader is positioned to a key/val pair. // REQUIRES: status().ok() - bool Valid() const { return iter_->Valid(); } + TF_EXPORT bool Valid() const { return iter_->Valid(); } // Returns the key at the current position. // REQUIRES: status().ok() && Valid() - StringPiece key() const { return iter_->key(); } + TF_EXPORT StringPiece key() const { return iter_->key(); } // Returns the raw value at the current position. // REQUIRES: status().ok() && Valid() - StringPiece value() const { return iter_->value(); } + TF_EXPORT StringPiece value() const { return iter_->value(); } - string DebugString(); + TF_EXPORT string DebugString(); private: // Seeks for "key" and reads the metadata proto. diff --git a/tensorflow/tools/def_file_filter/def_file_filter.py.tpl b/tensorflow/tools/def_file_filter/def_file_filter.py.tpl index 1049939c94b..8642a6d2e24 100644 --- a/tensorflow/tools/def_file_filter/def_file_filter.py.tpl +++ b/tensorflow/tools/def_file_filter/def_file_filter.py.tpl @@ -51,8 +51,11 @@ INCLUDEPRE_RE = re.compile(r"google::protobuf::internal::ExplicitlyConstructed|" r"google::protobuf::internal::ArenaImpl::AddCleanup|" # for contrib/data/_prefetching_ops r"google::protobuf::internal::LogMessage|" # for contrib/data/_prefetching_ops r"google::protobuf::Arena::OnArenaAllocation|" # for contrib/data/_prefetching_ops + r"google::protobuf::Message::InitializationErrorString|" + r"google::protobuf::MessageLite::ParseFromArray|" r"absl::Mutex::ReaderLock|" # for //tensorflow/contrib/rnn:python/ops/_gru_ops.so and more ops r"absl::Mutex::ReaderUnlock|" # for //tensorflow/contrib/rnn:python/ops/_gru_ops.so and more ops + r"tensorflow::TensorShape|" r"tensorflow::internal::LogMessage|" r"tensorflow::internal::LogString|" r"tensorflow::internal::CheckOpMessageBuilder|" From df615cd5c3b2a79bf8278a23c13be6285cc5752a Mon Sep 17 00:00:00 2001 From: Jose Baiocchi Date: Thu, 6 Aug 2020 10:30:13 -0700 Subject: [PATCH 2262/2522] Remove environment variable TF_ENABLE_OSS_CPU_PROFILER PiperOrigin-RevId: 325259187 Change-Id: Ie7c6520d532bf43c638b278c0129278dc6759b34 --- tensorflow/core/profiler/internal/cpu/BUILD | 3 --- tensorflow/core/profiler/internal/cpu/host_tracer.cc | 7 +------ 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/tensorflow/core/profiler/internal/cpu/BUILD b/tensorflow/core/profiler/internal/cpu/BUILD index 3ec721e7395..d8c84425e2b 100644 --- a/tensorflow/core/profiler/internal/cpu/BUILD +++ b/tensorflow/core/profiler/internal/cpu/BUILD @@ -29,7 +29,6 @@ cc_library( deps = [ ":host_tracer_utils", "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core/profiler:profiler_options_proto_cc", "//tensorflow/core/profiler/internal:profiler_factory", @@ -73,7 +72,6 @@ cc_library( features = ["-use_header_modules"], deps = [ "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core/profiler:profiler_options_proto_cc", "//tensorflow/core/profiler/internal:profiler_factory", @@ -91,7 +89,6 @@ cc_library( "//tensorflow/compiler/xla/service:hlo_proto_cc", "//tensorflow/compiler/xla/service/gpu:gpu_debug_info_manager", "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core/profiler:profiler_options_proto_cc", "//tensorflow/core/profiler/internal:profiler_factory", diff --git a/tensorflow/core/profiler/internal/cpu/host_tracer.cc b/tensorflow/core/profiler/internal/cpu/host_tracer.cc index fa21df004df..c2e8121e5f0 100644 --- a/tensorflow/core/profiler/internal/cpu/host_tracer.cc +++ b/tensorflow/core/profiler/internal/cpu/host_tracer.cc @@ -33,7 +33,6 @@ limitations under the License. #include "tensorflow/core/profiler/utils/xplane_schema.h" #include "tensorflow/core/profiler/utils/xplane_utils.h" #include "tensorflow/core/protobuf/config.pb.h" -#include "tensorflow/core/util/env_var.h" namespace tensorflow { namespace profiler { @@ -162,11 +161,7 @@ std::unique_ptr CreateHostTracer( } auto register_host_tracer_factory = [] { - bool enable; - TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_OSS_CPU_PROFILER", true, &enable)); - if (enable) { - RegisterProfilerFactory(&CreateHostTracer); - } + RegisterProfilerFactory(&CreateHostTracer); return 0; }(); From 916e5e54be39f15e8292b9cf93fcf630aaec995f Mon Sep 17 00:00:00 2001 From: Jose Baiocchi Date: Thu, 6 Aug 2020 10:37:20 -0700 Subject: [PATCH 2263/2522] Cleanup TraceMe idioms PiperOrigin-RevId: 325260871 Change-Id: Ifa970a3529c1c53a5b25899a61c8de940963d423 --- tensorflow/core/profiler/lib/annotated_traceme.h | 6 +++--- .../python/profiler/internal/python_hooks.cc | 14 +++++++++----- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/profiler/lib/annotated_traceme.h b/tensorflow/core/profiler/lib/annotated_traceme.h index c3257e2adbe..636b901e226 100644 --- a/tensorflow/core/profiler/lib/annotated_traceme.h +++ b/tensorflow/core/profiler/lib/annotated_traceme.h @@ -38,12 +38,12 @@ class AnnotatedTraceMe { bool annotation_enabled = ScopedAnnotation::IsEnabled(); bool traceme_enabled = TraceMe::Active(level); if (TF_PREDICT_FALSE(annotation_enabled || traceme_enabled)) { - string label = name_generator(); + string name = name_generator(); if (annotation_enabled) { - scoped_annotation_.emplace(absl::string_view(label)); + scoped_annotation_.emplace(absl::string_view(name)); } if (TF_PREDICT_TRUE(traceme_enabled)) { - trace_me_.emplace(std::move(label), level); + trace_me_.emplace([name = std::move(name)] { return name; }, level); } } } diff --git a/tensorflow/python/profiler/internal/python_hooks.cc b/tensorflow/python/profiler/internal/python_hooks.cc index 33e182f8de0..ee2ad1e254b 100644 --- a/tensorflow/python/profiler/internal/python_hooks.cc +++ b/tensorflow/python/profiler/internal/python_hooks.cc @@ -120,8 +120,11 @@ void PythonHooks::ProfileFast(PyFrameObject* frame, int what, PyObject* arg) { function = py::reinterpret_borrow(f_code->co_name); } - tracemes_[thread_id].push_back(absl::make_unique(absl::StrCat( - "$", io::Basename(filename), ":", line_no, " ", function))); + tracemes_[thread_id].push_back( + absl::make_unique([&filename, line_no, &function] { + return absl::StrCat("$", io::Basename(filename), ":", line_no, " ", + function); + })); } else if (what == PyTrace_C_CALL && PyCFunction_Check(arg)) { // Python stack does not have a filename/line_no for native calls. auto* func = reinterpret_cast(arg); @@ -139,9 +142,10 @@ void PythonHooks::ProfileFast(PyFrameObject* frame, int what, PyObject* arg) { filename = ""; } - string function(func->m_ml->ml_name); - tracemes_[thread_id].push_back(absl::make_unique( - absl::StrCat(filename, " ", func->m_ml->ml_name))); + tracemes_[thread_id].push_back( + absl::make_unique([&filename, func] { + return absl::StrCat(filename, " ", func->m_ml->ml_name); + })); } else if (what == PyTrace_RETURN || what == PyTrace_C_RETURN || what == PyTrace_EXCEPTION || what == PyTrace_C_EXCEPTION) { auto& thread_tracemes = tracemes_[thread_id]; From b3caff096f8f747cf29f1007de09ffb3786066b9 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Thu, 6 Aug 2020 10:55:33 -0700 Subject: [PATCH 2264/2522] Add half support to CrossReplicaSum PiperOrigin-RevId: 325264977 Change-Id: I74d8c5667f8c89fc0c78641ab5be3576f5855c3f --- tensorflow/core/ops/tpu_cross_replica_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/tpu_cross_replica_ops.cc b/tensorflow/core/ops/tpu_cross_replica_ops.cc index adce0b51a05..1f10fe3136d 100644 --- a/tensorflow/core/ops/tpu_cross_replica_ops.cc +++ b/tensorflow/core/ops/tpu_cross_replica_ops.cc @@ -78,7 +78,7 @@ REGISTER_OP("CrossReplicaSum") .Input("input: T") .Input("group_assignment: int32") .Output("output: T") - .Attr("T: {bfloat16, float, int32, uint32}") + .Attr("T: {half, bfloat16, float, int32, uint32}") .SetShapeFn(shape_inference::UnchangedShape); REGISTER_OP("CollectivePermute") From 7ca0373ad3bfeec067c9c604c0973f8455473fdc Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Thu, 6 Aug 2020 11:08:28 -0700 Subject: [PATCH 2265/2522] [tf.data service] Perform retries for dataset registration. This lets us recover when the dispatcher is either not started yet, or was preempted. PiperOrigin-RevId: 325268215 Change-Id: Ieec6d028d06cc87122269b846fa37ce1637086d6 --- tensorflow/core/data/service/BUILD | 1 + tensorflow/core/data/service/grpc_util.cc | 34 ++++++++++++++++++ tensorflow/core/data/service/grpc_util.h | 10 ++++++ tensorflow/core/data/service/server_lib.cc | 2 +- .../core/kernels/data/experimental/BUILD | 1 + .../data/experimental/data_service_ops.cc | 11 +++++- .../data/experimental/data_service_ops.h | 1 + .../kernel_tests/data_service_ops_test.py | 35 +++++++++++++++++++ 8 files changed, 93 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/data/service/BUILD b/tensorflow/core/data/service/BUILD index 19fe0263df2..aed402fb3b9 100644 --- a/tensorflow/core/data/service/BUILD +++ b/tensorflow/core/data/service/BUILD @@ -149,6 +149,7 @@ cc_library( ], deps = [ "//tensorflow/core:lib", + "//tensorflow/core/distributed_runtime/rpc:grpc_util", tf_grpc_cc_dependency(), ], ) diff --git a/tensorflow/core/data/service/grpc_util.cc b/tensorflow/core/data/service/grpc_util.cc index 40950c51efe..7f9d2ac07e7 100644 --- a/tensorflow/core/data/service/grpc_util.cc +++ b/tensorflow/core/data/service/grpc_util.cc @@ -15,7 +15,11 @@ limitations under the License. #include "tensorflow/core/data/service/grpc_util.h" +#include "tensorflow/core/distributed_runtime/rpc/grpc_util.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/env_time.h" +#include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/status.h" namespace tensorflow { @@ -32,6 +36,36 @@ Status WrapError(const std::string& message, const grpc::Status& status) { } } +Status Retry(const std::function& f, const std::string& description, + int64 deadline_micros) { + Status s = f(); + for (int num_retries = 0;; ++num_retries) { + if (!errors::IsUnavailable(s) && !errors::IsAborted(s) && + !errors::IsCancelled(s)) { + return s; + } + int64 now_micros = EnvTime::NowMicros(); + if (now_micros > deadline_micros) { + return s; + } + int64 deadline_with_backoff_micros = + now_micros + ::tensorflow::ComputeBackoffMicroseconds(num_retries); + // Wait for a short period of time before retrying. If our backoff would put + // us past the deadline, we truncate it to ensure our attempt starts before + // the deadline. + int64 backoff_until = + std::min(deadline_with_backoff_micros, deadline_micros); + int64 wait_time_micros = backoff_until - now_micros; + if (wait_time_micros > 100 * 1000) { + LOG(INFO) << "Failed to " << description << ". Will retry in " + << wait_time_micros / 1000 << "ms."; + } + Env::Default()->SleepForMicroseconds(wait_time_micros); + s = f(); + } + return s; +} + } // namespace grpc_util } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/data/service/grpc_util.h b/tensorflow/core/data/service/grpc_util.h index 60ea10669a5..b0e39df79eb 100644 --- a/tensorflow/core/data/service/grpc_util.h +++ b/tensorflow/core/data/service/grpc_util.h @@ -26,6 +26,16 @@ namespace grpc_util { // Wraps a grpc::Status in a tensorflow::Status with the given message. Status WrapError(const std::string& message, const grpc::Status& status); +// Retries the given function if the function produces UNAVAILABLE, ABORTED, or +// CANCELLED status codes. We retry these codes because they can all indicate +// preemption of a server. The retries continue until the deadline is exceeded. +// `description` may be used to log that retries are happening. It should +// contain a description of the action being retried, e.g. "register dataset" +// The retry loop uses exponential backoff between retries. +// `deadline_micros` is interpreted as microseconds since the epoch. +Status Retry(const std::function& f, const std::string& description, + int64 deadline_micros); + } // namespace grpc_util } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/data/service/server_lib.cc b/tensorflow/core/data/service/server_lib.cc index 751fa6ca2a8..7f698f8669b 100644 --- a/tensorflow/core/data/service/server_lib.cc +++ b/tensorflow/core/data/service/server_lib.cc @@ -29,7 +29,7 @@ constexpr char kPortPlaceholder[] = "%port%"; } GrpcDataServerBase::GrpcDataServerBase(int port, const std::string& protocol) - : requested_port_(port), protocol_(protocol) {} + : requested_port_(port), protocol_(protocol), bound_port_(port) {} Status GrpcDataServerBase::Start() { if (stopped_) { diff --git a/tensorflow/core/kernels/data/experimental/BUILD b/tensorflow/core/kernels/data/experimental/BUILD index bf28d175e6d..b7a3b774cc5 100644 --- a/tensorflow/core/kernels/data/experimental/BUILD +++ b/tensorflow/core/kernels/data/experimental/BUILD @@ -189,6 +189,7 @@ tf_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core/data/service:data_service", + "//tensorflow/core/data/service:grpc_util", "//tensorflow/core/kernels/data:dataset_utils", "//tensorflow/core/kernels/data:iterator_ops", ], diff --git a/tensorflow/core/kernels/data/experimental/data_service_ops.cc b/tensorflow/core/kernels/data/experimental/data_service_ops.cc index d9ef42d4afa..ba175815c73 100644 --- a/tensorflow/core/kernels/data/experimental/data_service_ops.cc +++ b/tensorflow/core/kernels/data/experimental/data_service_ops.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/core/kernels/data/experimental/data_service_ops.h" #include "tensorflow/core/data/service/data_service.h" +#include "tensorflow/core/data/service/grpc_util.h" #include "tensorflow/core/framework/dataset.h" #include "tensorflow/core/kernels/data/dataset_utils.h" #include "tensorflow/core/platform/errors.h" @@ -23,6 +24,10 @@ limitations under the License. namespace tensorflow { namespace data { +namespace { +const int64 kRetryTimeoutMicros = 1000LL * 1000 * 60 * 60; // 60 minutes. +} + RegisterDatasetOp::RegisterDatasetOp(OpKernelConstruction* ctx) : OpKernel(ctx) { int64 external_state_policy_int; @@ -55,7 +60,11 @@ void RegisterDatasetOp::Compute(OpKernelContext* ctx) { DataServiceDispatcherClient client(address, protocol); int64 dataset_id; - OP_REQUIRES_OK(ctx, client.RegisterDataset(graph_def, &dataset_id)); + int64 deadline_micros = EnvTime::NowMicros() + kRetryTimeoutMicros; + OP_REQUIRES_OK( + ctx, grpc_util::Retry( + [&]() { return client.RegisterDataset(graph_def, &dataset_id); }, + /*description=*/"register dataset", deadline_micros)); Tensor* output; OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape{}, &output)); diff --git a/tensorflow/core/kernels/data/experimental/data_service_ops.h b/tensorflow/core/kernels/data/experimental/data_service_ops.h index b3d6233aa52..12dbec45b33 100644 --- a/tensorflow/core/kernels/data/experimental/data_service_ops.h +++ b/tensorflow/core/kernels/data/experimental/data_service_ops.h @@ -35,6 +35,7 @@ class RegisterDatasetOp : public OpKernel { static constexpr const char* const kProtocol = "protocol"; static constexpr const char* const kExternalStatePolicy = "external_state_policy"; + static constexpr const char* const kTimeoutMs = "timeout_ms"; explicit RegisterDatasetOp(OpKernelConstruction* ctx); diff --git a/tensorflow/python/data/kernel_tests/data_service_ops_test.py b/tensorflow/python/data/kernel_tests/data_service_ops_test.py index 933654b89a1..c8a4b8262c3 100644 --- a/tensorflow/python/data/kernel_tests/data_service_ops_test.py +++ b/tensorflow/python/data/kernel_tests/data_service_ops_test.py @@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import threading import time from absl.testing import parameterized @@ -226,6 +227,40 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): results = [elem.numpy() for elem in ds] self.assertCountEqual(num_workers * list(range(num_elements)), results) + @combinations.generate(test_base.eager_only_combinations()) + def testStartServersLate(self): + # Test that the data service client performs retries instead of failing when + # the dataset is created before the master and worker are started. + try: + import portpicker # pylint: disable=g-import-not-at-top + dispatcher_port = portpicker.pick_unused_port() + except: + raise self.skipTest("Flakes in portpicker library do not represent " + "TensorFlow errors.") + dispatcher = server_lib.DispatchServer( + port=dispatcher_port, protocol=PROTOCOL, start=False) + worker = server_lib.WorkerServer( + port=0, + dispatcher_address=dispatcher._address, + protocol=PROTOCOL, + start=False) + + def start_servers(): + time.sleep(1) + dispatcher.start() + worker.start() + + start_servers_thread = threading.Thread(target=start_servers, daemon=True) + start_servers_thread.start() + + num_elements = 10 + ds = dataset_ops.Dataset.range(num_elements) + ds = _make_distributed_dataset( + ds, "{}://{}".format(PROTOCOL, dispatcher._address)) + results = [elem.numpy() for elem in ds] + self.assertEqual(list(range(num_elements)), results) + start_servers_thread.join() + @combinations.generate(test_base.eager_only_combinations()) def testAddWorkerMidJob(self): self._dispatcher = server_lib.DispatchServer(port=0, protocol=PROTOCOL) From fd9fbcd3ca429c7c17dcd8bc1ad3fc12afa1a429 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 6 Aug 2020 11:18:26 -0700 Subject: [PATCH 2266/2522] Update ops-related pbtxt files. PiperOrigin-RevId: 325270409 Change-Id: I81333edef51f8ea526548c64de1c24e14bc56996 --- .../ops_history_v2/CrossReplicaSum.pbtxt | 28 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 1 + 2 files changed, 29 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history_v2/CrossReplicaSum.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/CrossReplicaSum.pbtxt index 09c2402cc5a..f879b85bd10 100644 --- a/tensorflow/core/ops/compat/ops_history_v2/CrossReplicaSum.pbtxt +++ b/tensorflow/core/ops/compat/ops_history_v2/CrossReplicaSum.pbtxt @@ -50,3 +50,31 @@ op { } } } +op { + name: "CrossReplicaSum" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "group_assignment" + type: DT_INT32 + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_BFLOAT16 + type: DT_FLOAT + type: DT_INT32 + type: DT_UINT32 + } + } + } +} diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 29facd6e298..931208fdb4a 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -9292,6 +9292,7 @@ op { type: "type" allowed_values { list { + type: DT_HALF type: DT_BFLOAT16 type: DT_FLOAT type: DT_INT32 From a5034fd7af2f5d2e86a925f7ba91cc143e239eeb Mon Sep 17 00:00:00 2001 From: Rachel Lim Date: Thu, 6 Aug 2020 11:27:42 -0700 Subject: [PATCH 2267/2522] [tf.data] distribute.py: In batch_sizes_for_worker function, if batch size is a constant tensor, return a constant result instead of manipulating TF ops. This allows for better downstream shape inference, since _RebatchDataset relies on the static value of its batch_sizes input to set the element spec. PiperOrigin-RevId: 325272428 Change-Id: Ic9e7fa81ab34f85d6f71feeeaa90bdbbe2ec2066 --- .../kernel_tests/rebatch_dataset_test.py | 76 ++++++++++++++----- .../data/experimental/ops/distribute.py | 18 ++++- 2 files changed, 75 insertions(+), 19 deletions(-) diff --git a/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py index 0b614ef0b84..8175480182f 100644 --- a/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py @@ -26,7 +26,10 @@ from tensorflow.python.data.kernel_tests import test_base from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.framework import combinations +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import tensor_util from tensorflow.python.ops import image_ops from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.platform import test @@ -35,8 +38,15 @@ from tensorflow.python.platform import test class BatchSizesForWorkerTest(test_base.DatasetTestBase, parameterized.TestCase): - def _test(self, global_batch_size, num_workers, num_replicas_per_worker): + def _test(self, global_batch_size, num_workers, num_replicas_per_worker, + is_batch_size_static): """Test that all constraints are met for given parameters.""" + if not is_batch_size_static: + # Adding a constant value here prevents downstream computation from + # statically deriving the value of global batch size when running + # in graph mode. + global_batch_size += constant_op.constant(0, dtypes.int64) + batch_sizes_list = [] for i in range(num_workers): batch_sizes_list.append( @@ -65,8 +75,11 @@ class BatchSizesForWorkerTest(test_base.DatasetTestBase, # Constraint (D): Batch size of any two replicas differs by at most one self.assertLessEqual(np.max(batch_sizes_list) - np.min(batch_sizes_list), 1) - @combinations.generate(test_base.default_test_combinations()) - def testBasic(self): + @combinations.generate( + combinations.times( + test_base.default_test_combinations(), + combinations.combine(is_batch_size_static=[True, False]))) + def testBasic(self, is_batch_size_static): # Manually verify basic test case. global_batch_size = 8 num_workers = 2 @@ -76,17 +89,22 @@ class BatchSizesForWorkerTest(test_base.DatasetTestBase, num_workers, num_replicas_per_worker, worker_index) - self.assertAllEqual([2, 2, 2, 2], self.evaluate(batch_sizes)) - self._test(global_batch_size, num_workers, num_replicas_per_worker) + self.assertAllEqual([2, 2, 2, 2], + tensor_util.constant_value(batch_sizes)) + self._test(global_batch_size, num_workers, num_replicas_per_worker, + is_batch_size_static) - @combinations.generate(test_base.default_test_combinations()) - def testBatchSizeIndivisibleByNumWorkers(self): + @combinations.generate( + combinations.times( + test_base.default_test_combinations(), + combinations.combine(is_batch_size_static=[True, False]))) + def testBatchSizeIndivisibleByNumWorkers(self, is_batch_size_static): global_batch_size = 4 num_workers = 3 num_replicas_per_worker = 1 def get_batch_sizes_for_worker(worker_index): - return self.evaluate( + return tensor_util.constant_value( distribute.batch_sizes_for_worker(global_batch_size, num_workers, num_replicas_per_worker, worker_index)) @@ -95,19 +113,41 @@ class BatchSizesForWorkerTest(test_base.DatasetTestBase, self.assertAllEqual([2, 1, 1], get_batch_sizes_for_worker(0)) self.assertAllEqual([1, 1, 2], get_batch_sizes_for_worker(1)) self.assertAllEqual([1, 2, 1], get_batch_sizes_for_worker(2)) - self._test(global_batch_size, num_workers, num_replicas_per_worker) + self._test(global_batch_size, num_workers, num_replicas_per_worker, + is_batch_size_static) - @combinations.generate(test_base.default_test_combinations()) - def testBatchSizeIndivisibleByNumReplicas(self): - self._test(global_batch_size=4, num_workers=1, num_replicas_per_worker=5) + @combinations.generate( + combinations.times( + test_base.default_test_combinations(), + combinations.combine(is_batch_size_static=[True, False]))) + def testBatchSizeIndivisibleByNumReplicas(self, is_batch_size_static): + self._test( + global_batch_size=4, + num_workers=1, + num_replicas_per_worker=5, + is_batch_size_static=is_batch_size_static) - @combinations.generate(test_base.default_test_combinations()) - def testBatchSizeSmallerThanNumReplicas(self): - self._test(global_batch_size=4, num_workers=2, num_replicas_per_worker=5) + @combinations.generate( + combinations.times( + test_base.default_test_combinations(), + combinations.combine(is_batch_size_static=[True, False]))) + def testBatchSizeSmallerThanNumReplicas(self, is_batch_size_static): + self._test( + global_batch_size=4, + num_workers=2, + num_replicas_per_worker=5, + is_batch_size_static=is_batch_size_static) - @combinations.generate(test_base.default_test_combinations()) - def testBatchSizeSmallerThanNumWorkers(self): - self._test(global_batch_size=4, num_workers=5, num_replicas_per_worker=1) + @combinations.generate( + combinations.times( + test_base.default_test_combinations(), + combinations.combine(is_batch_size_static=[True, False]))) + def testBatchSizeSmallerThanNumWorkers(self, is_batch_size_static): + self._test( + global_batch_size=4, + num_workers=5, + num_replicas_per_worker=1, + is_batch_size_static=is_batch_size_static) def _flat_shapes(dataset): diff --git a/tensorflow/python/data/experimental/ops/distribute.py b/tensorflow/python/data/experimental/ops/distribute.py index 7b05b34c110..c5a9048630c 100644 --- a/tensorflow/python/data/experimental/ops/distribute.py +++ b/tensorflow/python/data/experimental/ops/distribute.py @@ -412,6 +412,13 @@ def batch_sizes_for_worker(global_batch_size, num_workers, # Constraint (A) num_subbatches = num_workers * num_replicas_per_worker + offset = worker_index * num_replicas_per_worker + + const_value = tensor_util.constant_value(global_batch_size) + if const_value is not None: + # Use the constant global batch size for further calculations + global_batch_size = const_value + # Let N = W * R. Constraint (B) and (D) jointly mean that the iterations # should have batch size either floor(B/N) or ceil(B/N). Namely, of the N # subbatches a batch is split into, B - N * floor(B/N) of them will have size @@ -422,6 +429,16 @@ def batch_sizes_for_worker(global_batch_size, num_workers, # For worker 0, we assign the first num_ceil subbatches to have size # ceil(B/N), and the remainder to have size floor(B/N). The other workers will # each be offset by R * worker_index in order to meet constraint (C). + if const_value is not None: + # If the global batch size is a known constant value, we return a constant + # tensor directly instead of manipulating it with TF ops. This allows for + # better downstream shape inference. + worker_0 = [floor + 1] * num_ceil + [floor] * (num_subbatches - num_ceil) + return ops.convert_to_tensor( + worker_0[offset:] + worker_0[:offset], + dtype=dtypes.int64, + name="batch_sizes") + worker_0 = array_ops.ones(num_subbatches, dtype=dtypes.int64) worker_0 = floor * worker_0 + array_ops.concat([ array_ops.ones(num_ceil, dtype=dtypes.int64), @@ -429,7 +446,6 @@ def batch_sizes_for_worker(global_batch_size, num_workers, ], axis=0) - offset = worker_index * num_replicas_per_worker return array_ops.concat([worker_0[offset:], worker_0[:offset]], axis=0) From f89cb21f74789ae1b99e09558b98c8eb741ed63a Mon Sep 17 00:00:00 2001 From: Michael Kuchnik Date: Thu, 6 Aug 2020 11:28:21 -0700 Subject: [PATCH 2268/2522] Fix build documentation typo. PiperOrigin-RevId: 325272554 Change-Id: I96d871da73374c6db7c1469927d16bbfcad67290 --- tensorflow/core/framework/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/framework/BUILD b/tensorflow/core/framework/BUILD index 8d2ccd79e77..f1d197c7028 100644 --- a/tensorflow/core/framework/BUILD +++ b/tensorflow/core/framework/BUILD @@ -31,7 +31,7 @@ package( ) # Export all header files for which we do not yet provide a dedicated build -# rule. This avoids breading all the rules in tensorflow/core/BUILD. +# rule. This avoids breaking all the rules in tensorflow/core/BUILD. exports_files( srcs = [ "allocator_registry.h", From a8a50023bb41d7163646324b43184dda156e90f7 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Thu, 6 Aug 2020 11:29:44 -0700 Subject: [PATCH 2269/2522] Add quiet-down bq options to avoid setup messages PiperOrigin-RevId: 325272828 Change-Id: I1317a175584e9138e30a1377a90443562d207424 --- tensorflow/tools/ci_build/sizetrack_helper.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/tools/ci_build/sizetrack_helper.py b/tensorflow/tools/ci_build/sizetrack_helper.py index 6b4110e265f..48e8879e9df 100755 --- a/tensorflow/tools/ci_build/sizetrack_helper.py +++ b/tensorflow/tools/ci_build/sizetrack_helper.py @@ -247,8 +247,8 @@ def get_all_tested_commits(): # COMMIT_HASH earliest_commit = gcloud( "bq", [ - "--project_id", FLAGS.project, "query", "--format", "csv", - "--nouse_legacy_sql" + "--project_id", FLAGS.project, "--headless", "-q", "query", + "--format", "csv", "--nouse_legacy_sql" ], stdin=query_earliest_included_commit) @@ -360,8 +360,9 @@ def main(): writer = csv.writer(tsvfile, delimiter="\t", quoting=csv.QUOTE_MINIMAL) writer.writerow(next_tsv_row) gcloud("bq", [ - "--project_id", FLAGS.project, "load", "--source_format", "CSV", - "--field_delimiter", "tab", PROJECT_LEVEL_TABLE_NAME, "data.tsv", SCHEMA + "--project_id", FLAGS.project, "--headless", "-q", "load", + "--source_format", "CSV", "--field_delimiter", "tab", + PROJECT_LEVEL_TABLE_NAME, "data.tsv", SCHEMA ]) From d9b5042f033a3f1524b678ed24830351086d5c4c Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 6 Aug 2020 11:54:56 -0700 Subject: [PATCH 2270/2522] s/no_retry_on_failure/retry_on_failure; NFC no_XYZ properties tend to be harder to reason about since we they force us to deal with double-negatives. PiperOrigin-RevId: 325278071 Change-Id: I4a30b9dd501de237b487ab6afda787f213d40b1c --- .../compiler/tf2tensorrt/utils/trt_allocator.cc | 2 +- tensorflow/core/common_runtime/bfc_allocator.cc | 2 +- tensorflow/core/framework/allocator.h | 16 +++++++++------- tensorflow/core/framework/op_kernel.cc | 9 +++++---- tensorflow/core/kernels/conv_ops_gpu.h | 2 +- tensorflow/core/kernels/fft_ops.cc | 2 +- .../stream_executor/tf_allocator_adapter.cc | 2 +- 7 files changed, 19 insertions(+), 16 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_allocator.cc b/tensorflow/compiler/tf2tensorrt/utils/trt_allocator.cc index d4f3a524577..a73877bc3cc 100644 --- a/tensorflow/compiler/tf2tensorrt/utils/trt_allocator.cc +++ b/tensorflow/compiler/tf2tensorrt/utils/trt_allocator.cc @@ -74,7 +74,7 @@ void* TRTDeviceAllocator::allocate(uint64_t size, uint64_t alignment, // algorithm uses too much memory. If we don't fail immediately building the // engine can be *very* slow with TensorRT7 when GPU memory is limited. AllocationAttributes attributes; - attributes.no_retry_on_failure = true; + attributes.retry_on_failure = false; void* mem = allocator_->AllocateRaw(alignment, total_size, attributes); if (!mem) return nullptr; diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc index 440ed235455..11f28655f05 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.cc +++ b/tensorflow/core/common_runtime/bfc_allocator.cc @@ -230,7 +230,7 @@ void* BFCAllocator::AllocateRawInternalWithRetry( void* BFCAllocator::AllocateRaw(size_t unused_alignment, size_t num_bytes, const AllocationAttributes& allocation_attr) { VLOG(1) << "AllocateRaw " << Name() << " " << num_bytes; - if (allocation_attr.no_retry_on_failure) { + if (!allocation_attr.retry_on_failure) { // Return immediately upon the first failure if this is for allocating an // optional scratch space. bool dump_log_on_failure = VLOG_IS_ON(2); diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h index dd226b205a9..7b8eba0fda9 100644 --- a/tensorflow/core/framework/allocator.h +++ b/tensorflow/core/framework/allocator.h @@ -39,17 +39,19 @@ class TensorShape; struct AllocationAttributes { AllocationAttributes() = default; - AllocationAttributes(bool no_retry_on_failure, bool allocation_will_be_logged, + AllocationAttributes(bool retry_on_failure, bool allocation_will_be_logged, std::function* freed_by_func) - : no_retry_on_failure(no_retry_on_failure), + : retry_on_failure(retry_on_failure), allocation_will_be_logged(allocation_will_be_logged), freed_by_func(freed_by_func) {} - // If the first attempt to allocate the memory fails, the allocation - // should return immediately without retrying. - // An example use case is optional scratch spaces where a failure - // has only performance impact. - bool no_retry_on_failure = false; + // If the first attempt to allocate the memory fails, the allocation should + // wait and retry (with a timeout). + // + // This is usually set to true, but we may set it to false in cases where a + // failure has only performance impact (e.g. optional scratch space + // allocation). + bool retry_on_failure = true; // If a Tensor is allocated without the following set to true, then // it is logged as an unknown allocation. During execution Tensors // should be allocated through the OpKernelContext which records diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc index d9b679534ee..32bb2200853 100644 --- a/tensorflow/core/framework/op_kernel.cc +++ b/tensorflow/core/framework/op_kernel.cc @@ -709,10 +709,11 @@ Status OpKernelContext::allocate_tensor( DataType type, const TensorShape& shape, Tensor* out_tensor, AllocatorAttributes attr, const AllocationAttributes& allocation_attr) { Allocator* a = get_allocator(attr); - Tensor new_tensor(a, type, shape, - AllocationAttributes(allocation_attr.no_retry_on_failure, - /* allocation_will_be_logged= */ true, - allocation_attr.freed_by_func)); + Tensor new_tensor( + a, type, shape, + AllocationAttributes( + /*retry_on_failure=*/allocation_attr.retry_on_failure, + /*allocation_will_be_logged=*/true, allocation_attr.freed_by_func)); if (!new_tensor.IsInitialized()) { return errors::ResourceExhausted( diff --git a/tensorflow/core/kernels/conv_ops_gpu.h b/tensorflow/core/kernels/conv_ops_gpu.h index ba33224f10a..2e97d486b54 100644 --- a/tensorflow/core/kernels/conv_ops_gpu.h +++ b/tensorflow/core/kernels/conv_ops_gpu.h @@ -68,7 +68,7 @@ class DnnScratchAllocator : public se::ScratchAllocator { memory_limit_, ").")}; } AllocationAttributes allocation_attr; - allocation_attr.no_retry_on_failure = true; + allocation_attr.retry_on_failure = false; Status allocation_status(context_->allocate_temp( DT_UINT8, TensorShape({byte_size}), &temporary_memory, AllocatorAttributes(), allocation_attr)); diff --git a/tensorflow/core/kernels/fft_ops.cc b/tensorflow/core/kernels/fft_ops.cc index 05843594839..050b83980c6 100644 --- a/tensorflow/core/kernels/fft_ops.cc +++ b/tensorflow/core/kernels/fft_ops.cc @@ -372,7 +372,7 @@ class CufftScratchAllocator : public se::ScratchAllocator { return se::port::StatusOr>(); } AllocationAttributes allocation_attr; - allocation_attr.no_retry_on_failure = true; + allocation_attr.retry_on_failure = false; Status allocation_status(context_->allocate_temp( DT_UINT8, TensorShape({byte_size}), &temporary_memory, AllocatorAttributes(), allocation_attr)); diff --git a/tensorflow/stream_executor/tf_allocator_adapter.cc b/tensorflow/stream_executor/tf_allocator_adapter.cc index 0b2d66f7e29..b3483932333 100644 --- a/tensorflow/stream_executor/tf_allocator_adapter.cc +++ b/tensorflow/stream_executor/tf_allocator_adapter.cc @@ -40,7 +40,7 @@ port::StatusOr TfAllocatorAdapter::Allocate( int64 memory_space) { CHECK_EQ(memory_space, 0); tensorflow::AllocationAttributes attrs; - attrs.no_retry_on_failure = !retry_on_failure; + attrs.retry_on_failure = retry_on_failure; void *data = nullptr; if (size != 0) { data = wrapped_->AllocateRaw(tensorflow::Allocator::kAllocatorAlignment, From b9a5452924f8c6f84a832b9e8d33a6e1c2136213 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Thu, 6 Aug 2020 12:04:14 -0700 Subject: [PATCH 2271/2522] Assign device to split node. When tensor is split on multiple dimensions, the later split refers to previous split's assigned device. PiperOrigin-RevId: 325279999 Change-Id: I08220371f52be6582d0a76cd7ac88a5d7b92c170 --- .../core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc index 5fdc74b79fc..73510319b0a 100644 --- a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc +++ b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_pass.cc @@ -685,6 +685,7 @@ xla::StatusOr CreateSplitNode(int num_splits, int dim, split_def.add_input(absl::StrCat(split_dim_node->name(), ":0")); split_def.add_input(absl::StrCat(orig_src->name(), ":", orig_src_output)); Node* split_node = graph->AddNode(split_def, &s); + split_node->set_assigned_device_name(input_assigned_device); TF_RETURN_IF_ERROR(s); graph->AddEdge(split_dim_node, 0, split_node, 0); From c0374a95dfb28e9ff76b01805ae5bba2f80b1a2e Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Thu, 6 Aug 2020 12:13:57 -0700 Subject: [PATCH 2272/2522] Allow creating symbols in TF loops. This is on a best-effort basis, and only works in cases when the symbol being created doesn't depend on previous iterations. This change only adds the feature, but does not enable it yet. It will be enabled separately. PiperOrigin-RevId: 325282159 Change-Id: I29fd9792454c6dac4189d0756d517f9ff0390700 --- .../autograph/core/converter_testing.py | 2 + .../autograph/g3doc/reference/limitations.md | 66 +++++- .../autograph/operators/control_flow.py | 211 ++++++++++++++++-- .../autograph/operators/control_flow_test.py | 77 +++++++ tensorflow/python/autograph/utils/testing.py | 57 ++++- 5 files changed, 387 insertions(+), 26 deletions(-) diff --git a/tensorflow/python/autograph/core/converter_testing.py b/tensorflow/python/autograph/core/converter_testing.py index 9f2604dec94..2909cf3f8bc 100644 --- a/tensorflow/python/autograph/core/converter_testing.py +++ b/tensorflow/python/autograph/core/converter_testing.py @@ -21,6 +21,7 @@ from __future__ import print_function import contextlib import imp import inspect +import os import sys import six @@ -100,6 +101,7 @@ class TestCase(test.TestCase): def setUp(self): # AutoGraph tests must run in graph mode to properly test control flow. + os.environ['AUTOGRAPH_CREATE_SYMBOLS_IN_LOOPS'] = '1' self.graph = ops.Graph().as_default() self.graph.__enter__() diff --git a/tensorflow/python/autograph/g3doc/reference/limitations.md b/tensorflow/python/autograph/g3doc/reference/limitations.md index 70ce5fc7dec..70e3b3a552e 100644 --- a/tensorflow/python/autograph/g3doc/reference/limitations.md +++ b/tensorflow/python/autograph/g3doc/reference/limitations.md @@ -66,22 +66,48 @@ else: pass ``` -Similarly, variables may not be defined inside a TensorFlow loop, unless they -are local to the loop. A variable is local to the loop if (1) it's not used -after the loop and (2) the value from a previour iteration is not used in the -next iteration: +Similarly, variables must usually be defined before a TensorFlow loop. + +The most common example that is not allowed is a loop which initializes some +accumulator variable in the first iteration: ``` del x -while tf.random.uniform(()) > 0.5: # Error -- x must be defined before the loop +for i in tf.range(100): # Error -- x must be defined before the loop + if i == 0: + x = tf.constant(1) + else: + x = x + 1 +tf.print(x) +``` + +When the variable is only used inside the loop and does not depend on previous +iterations, then it's ok to only be initialized inside the loop. + +``` +del x +while tf.random.uniform(()) > 0.5: # Okay -- x is not used after the loop + x = tf.constant(1) +``` + +* New in TF 2.4 * + +As long as it doesn't depend on previous iterations, the variable may also be +used after the loop, however in that case the loop must execute at least one +iteration, and will raise a runtime error otherwise. + +``` +del x +for i in tf.range(10): # Okay -- x does not depend on previous iterations x = tf.constant(1) tf.print(x) ``` ``` del x -while tf.random.uniform(()) > 0.5: # Okay -- x is local to the loop +while tf.constant(False): # Error -- loop must initialize x! x = tf.constant(1) +tf.print(x) ``` Avoid these limitations by defining a default value before the control flow @@ -98,6 +124,34 @@ Note: `None` values and undefined symbols are allowed in Eager control flow, because Eager execution uses Python control flow, rather than TensorFlow control flow ops. +#### Special case: creating Tensors in a loop + +* New in TF 2.4 * + +A very common use-case is to run a training loop that creates some outputs: + +``` +for i in tf.range(num_steps): + outputs = train(next(data_iterator)) +``` + +Often times these outputs can be nested structures of Tensors, which makes them +impractical to initialize ahead of the loop. + +To help with this use-case, AutoGraph lets you run such loops, under certain +conditions: + + * outputs must be a Tensor, Python numeric, or a structure of these + * outputs must not depend on the value from a previous iteration; in other + words, the outputs may only appear to the left of an assignment operation + * the loop must run at least one iteration + +If the type of outputs is not recognized, then the usual +"outputs must be defined before the loop" is raised at graph construction. + +AutoGraph also inserts a `tf.Assert` statement that raises a runtime error +if the loop did not execute at least one iteration. + ### Indirect modifications and hidden side effects in TensorFlow control flow Key Point: We recommend using a functional programming style, immutable Python diff --git a/tensorflow/python/autograph/operators/control_flow.py b/tensorflow/python/autograph/operators/control_flow.py index 03f67d67fee..3418450e813 100644 --- a/tensorflow/python/autograph/operators/control_flow.py +++ b/tensorflow/python/autograph/operators/control_flow.py @@ -60,6 +60,7 @@ from __future__ import division from __future__ import print_function import functools +import os import traceback import numpy as np @@ -79,6 +80,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import func_graph from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import control_flow_util from tensorflow.python.ops import math_ops @@ -99,19 +101,70 @@ INEFFICIENT_UNROLL_MIN_OPS = 1 # datasets. Before it can be used though, we need to standardize the interface. -def _verify_loop_init_vars(values, symbol_names): - """Ensures that all values in the state are defined when entering a loop.""" - for name, value in zip(symbol_names, values): - if value is None: - raise ValueError("'{}' may not be None before the loop.".format(name)) - if isinstance(value, variables.UndefinedReturnValue): - # Assumption: the loop will only capture the variable which tracks the - # return value if the loop contained a return statement. - # TODO(mdan): This should be checked at the place where return occurs. - raise ValueError( - 'return statements are not supported within a TensorFlow loop.') - if isinstance(value, variables.Undefined): - raise ValueError("'{}' must be defined before the loop.".format(name)) +def _is_none_or_undef(value): + """Tests whether a value is None or undefined. + + AutoGraph represents undefined symbols using special objects of type Undefined + or UndefinedReturnValue. + + Args: + value: value to test + Returns: + Boolean + """ + return ((value is None) + or isinstance(value, variables.UndefinedReturnValue) + or isinstance(value, variables.Undefined)) + + +def _verify_loop_init_vars(init_vars, symbol_names, first_iter_vars=None): + """Ensures that all values in the state are valid to use in a TF loop. + + The init_vars may contain placeholder values derived from first_iter_vars. + + Args: + init_vars: initial loop variables (as taken before entering the loop) + symbol_names: corresponding names of the initial loop variables + first_iter_vars: loop variables after one iteration of the loop + """ + if not symbol_names: + return + if first_iter_vars is None: + first_iter_vars = (None,) * len(symbol_names) + + assert len(symbol_names) == len(init_vars) + assert len(symbol_names) == len(first_iter_vars) + for name, val, fi_val in zip(symbol_names, init_vars, first_iter_vars): + if isinstance(val, variables.UndefinedReturnValue): + if fi_val: + raise ValueError( + 'the return value from a TensorFlow loop may only be a {}; got {}' + .format(LEGAL_LOOP_TYPES, type(fi_val))) + else: + # TODO(mdan): This can be handled by removing the return value. + raise NotImplementedError( + 'a return statement cannot be placed inside this TensorFlow loop;' + ' this may happen if a return statement depends on a' + ' static Python condition such as a hyperparameter') + + error_msg = None + if val is None: + error_msg = "'{}' may not be None before the loop".format(name) + elif isinstance(val, variables.Undefined): + error_msg = "'{}' must be defined before the loop".format(name) + + # This only happens when we could not infer a placeholder for the + # variable. The canonical case when that happens is when _placeholder_value + # couldnot infer a placeholder for it. That means it's of an unknown type + # or it's still undefined after staging one iteration. + if error_msg is not None: + if fi_val: + error_msg += (", unless it's a {}; got {}".format( + LEGAL_LOOP_TYPES, type(fi_val))) + else: + # TODO(mdan): This can be handled by removing the loop var. + error_msg += '.' + raise ValueError(error_msg) def _is_subshape(left, right): @@ -876,21 +929,134 @@ def _shape_invariants_mapping_to_positional_list(mapping, keys): return tuple(result) +# Textual description of what a legal TF loop variable is. This description +# summarizes types that _placeholder_value below can handle. Keep the two +# together and in sync. +LEGAL_LOOP_TYPES = 'Tensor, int, float, bool or a list, tuple or dict thereof' + + +def _placeholder_value(like, original): + if isinstance(like, (variables.Undefined, variables.UndefinedReturnValue)): + return original + if isinstance(like, (int, float, bool)): + return type(like)(0) + if tensor_util.is_tensor(like): + return array_ops.zeros(like.shape, like.dtype) + elif isinstance(like, (list, tuple, dict)): + return nest.map_structure(_placeholder_value, like) + return original + + +def _try_handling_undefineds( + body, get_state, set_state, init_vars, nulls, symbol_names): + """Makes a best-effort attempt to substitute undefineds with placeholders. + + Note: this substitution requires two things to happen: + 1. the types of loop variables could be inferred (usually by staging one + iteration) + 2. these types could be replaced by placeholders (e.g. zero values, for + tensors. + + Args: + body: a function representing the loop body. See while_stmt. + get_state: state getter for the loop statement. See while_stmt. + set_state: state getter for the loop statement. See while_stmt. + init_vars: loop variables before entering the loop. See while_stmt. + nulls: list of boolean flags indicating whether the corresponding loop + var is None or undefined. + symbol_names: list of loop variable names. See while_stmt. + Returns: + A tuple (success, new_init_vars). success is a boolean flag indicating + whether types could be successfully inferred (step 1 above). new_init_vars + contains the loop vars, with None or undefined values replaced by + placeholders, where possible (step 2 above). + """ + state_modified = False + + if not os.getenv('AUTOGRAPH_CREATE_SYMBOLS_IN_LOOPS', ''): + _verify_loop_init_vars(init_vars, symbol_names) + return False, init_vars + + try: + # Stage an iteration of the loop body in a temporary graph. + with func_graph.FuncGraph('tmp').as_default(): + # This call to set_state helps report nicer error messages when symbols + # are inconsistently used. + set_state(init_vars) + state_modified = True + + body() + first_iter_vars = get_state() + except (UnboundLocalError, TypeError, ValueError, KeyError): + # Fall back to the old functionality. It will likely result in an input + # validation failure. + first_iter_vars = None + finally: + if state_modified: + set_state(init_vars) + + if first_iter_vars is not None: + # Note: the actual placeholder value doesn't matter, because as the staging + # proved, it will be replaced by an actual value before being read. + init_vars = tuple( + (_placeholder_value(iv, v) if n else v) + for v, n, iv in zip(init_vars, nulls, first_iter_vars)) + success = True + else: + success = False + + # This check runs regardless, in case we captured non-Tensor inputs. + _verify_loop_init_vars(init_vars, symbol_names, first_iter_vars) + + return success, init_vars + + +def _runtime_zero_iterations_errmsg(symbol_names, nulls, init_vars): + """Creates an error message asking for the loop to iterate at least once.""" + var_names = [] + for sn, n, v in zip(symbol_names, nulls, init_vars): + if not n: + continue + if isinstance(v, variables.UndefinedReturnValue): + var_names.append('the function return value') + else: + var_names.append(sn) + var_names = ', '.join(var_names) + return 'loop must iterate at least once to initialize {}'.format(var_names) + + def _tf_while_stmt(test, body, get_state, set_state, symbol_names, opts): """Overload of while_stmt that stages a TF while_stmt.""" init_vars = get_state() - _verify_loop_init_vars(init_vars, symbol_names) + orig_init_vars = init_vars + + nulls = tuple(_is_none_or_undef(v) for v in init_vars) + if any(nulls): + require_one_iteration, init_vars = _try_handling_undefineds( + body, get_state, set_state, init_vars, nulls, symbol_names) + else: + require_one_iteration = False def aug_test(*loop_vars): + if require_one_iteration: + loop_vars = loop_vars[1:] + set_state(loop_vars) return test() def aug_body(*loop_vars): + if require_one_iteration: + loop_vars = loop_vars[1:] + set_state(loop_vars) body() new_loop_vars = get_state() _verify_tf_loop_vars( init_vars, loop_vars, new_loop_vars, symbol_names, opts) + + if require_one_iteration: + new_loop_vars = (True,) + new_loop_vars + return new_loop_vars if 'shape_invariants' in opts: @@ -904,8 +1070,23 @@ def _tf_while_stmt(test, body, get_state, set_state, symbol_names, opts): # This enforces consistency across versions. while_loop_opts['return_same_structure'] = True + if require_one_iteration: + aug_init_vars = (False,) + init_vars + else: + aug_init_vars = init_vars + final_loop_vars = control_flow_ops.while_loop( - aug_test, aug_body, init_vars, **while_loop_opts) + aug_test, aug_body, aug_init_vars, **while_loop_opts) + + if require_one_iteration: + with ops.control_dependencies([ + control_flow_ops.Assert(final_loop_vars[0], [ + _runtime_zero_iterations_errmsg(symbol_names, nulls, orig_init_vars) + ]) + ]): + final_loop_vars = tuple( + array_ops.identity(v) for v in final_loop_vars[1:]) + set_state(final_loop_vars) diff --git a/tensorflow/python/autograph/operators/control_flow_test.py b/tensorflow/python/autograph/operators/control_flow_test.py index 5f0629a163f..553643956f6 100644 --- a/tensorflow/python/autograph/operators/control_flow_test.py +++ b/tensorflow/python/autograph/operators/control_flow_test.py @@ -86,7 +86,9 @@ class ForLoopTest(testing.AutoGraphTestCase): set_state=set_state, symbol_names=('s',), opts={'iterate_names': 'i'}) + self.assertEqual(s, (1234,)) + self.assertOpCreated('StatelessWhile') def test_range_tensor_explicit_limit_delta(self): def body(i): @@ -106,7 +108,9 @@ class ForLoopTest(testing.AutoGraphTestCase): set_state=set_state, symbol_names=('s',), opts={'iterate_names': 'i'}) + self.assertEqual(s, (-171207,)) + self.assertOpCreated('StatelessWhile') def test_range_tensor_explicit_limit_negative_delta(self): def body(i): @@ -126,7 +130,9 @@ class ForLoopTest(testing.AutoGraphTestCase): set_state=set_state, symbol_names=('s',), opts={'iterate_names': 'i'}) + self.assertEqual(s, (171207,)) + self.assertOpCreated('StatelessWhile') def test_range_tensor_random_delta(self): def body(i): @@ -147,7 +153,9 @@ class ForLoopTest(testing.AutoGraphTestCase): set_state=set_state, symbol_names=('s',), opts={'iterate_names': 'i'}) + self.assertEqual(s, (1234,)) + self.assertOpCreated('StatelessWhile') def test_range_tensor_random_negative_delta(self): def body(i): @@ -168,7 +176,9 @@ class ForLoopTest(testing.AutoGraphTestCase): set_state=set_state, symbol_names=('s',), opts={'iterate_names': 'i'}) + self.assertEqual(s, (171207,)) + self.assertOpCreated('StatelessWhile') def test_tensor_with_extra_test_object_vars(self): class MutableObject(object): @@ -194,7 +204,9 @@ class ForLoopTest(testing.AutoGraphTestCase): set_state=set_state, symbol_names=('state.field_1', 'state.field_2'), opts={}) + self.assertEqual((state.field_1, state.field_2), (6, 6)) + self.assertOpCreated('StatelessWhile') def test_python(self): def body(i): @@ -214,7 +226,9 @@ class ForLoopTest(testing.AutoGraphTestCase): set_state=set_state, symbol_names=('s',), opts={}) + self.assertEqual(s, 1234) + self.assertNoOpsCreated() def test_python_generator_with_extra_test(self): def new_generator(): @@ -247,6 +261,8 @@ class ForLoopTest(testing.AutoGraphTestCase): self.assertEqual(next(gen), 4) + self.assertNoOpsCreated() + def test_python_generator_with_extra_test_no_iterations(self): def new_generator(): for i in range(5): @@ -275,6 +291,8 @@ class ForLoopTest(testing.AutoGraphTestCase): self.assertEqual(next(gen), 0) + self.assertNoOpsCreated() + def test_tf_dataset(self): def body(i): nonlocal s @@ -293,7 +311,9 @@ class ForLoopTest(testing.AutoGraphTestCase): set_state=set_state, symbol_names=('s',), opts={}) + self.assertEqual(s, (1234,)) + self.assertOpCreated('ScanDataset') def test_dataset_with_extra_test(self): def body(i): @@ -313,7 +333,9 @@ class ForLoopTest(testing.AutoGraphTestCase): set_state=set_state, symbol_names=('s',), opts={}) + self.assertEqual(s, (12,)) + self.assertOpCreated('ScanDataset') def test_dataset_with_extra_test_collection_vars(self): def body(i): @@ -335,7 +357,9 @@ class ForLoopTest(testing.AutoGraphTestCase): set_state=set_state, symbol_names=('l[0]', 's'), opts={}) + self.assertEqual((l[0], s), (3, 3)) + self.assertOpCreated('ScanDataset') def test_dataset_with_extra_test_iteration_limiting(self): def body(it): @@ -356,7 +380,9 @@ class ForLoopTest(testing.AutoGraphTestCase): set_state=set_state, symbol_names=('i',), opts={}) + self.assertEqual(i, (3,)) + self.assertOpCreated('ScanDataset') def test_tf_dataset_no_loop_vars(self): def body(i): @@ -374,6 +400,7 @@ class ForLoopTest(testing.AutoGraphTestCase): opts={}) self.assertEqual(v.read_value(), 1234) + self.assertOpCreated('ScanDataset') def test_tf_iterator(self): def body(i): @@ -395,6 +422,7 @@ class ForLoopTest(testing.AutoGraphTestCase): opts={}) self.assertEqual(s, 1234) + self.assertOpCreated('IteratorGetNextAsOptional') def test_tf_iterator_shape_invariants(self): def body(i): @@ -416,6 +444,7 @@ class ForLoopTest(testing.AutoGraphTestCase): opts={'shape_invariants': [(s, tensor_shape.TensorShape([None]))]}) self.assertAllEqual(s, [0, 1, 2, 3, 4]) + self.assertOpCreated('IteratorGetNextAsOptional') def test_tf_iterator_no_loop_vars(self): def body(i): @@ -433,6 +462,7 @@ class ForLoopTest(testing.AutoGraphTestCase): opts={}) self.assertEqual(v.read_value(), 1234) + self.assertOpCreated('IteratorGetNextAsOptional') def test_tf_ragged_tensor(self): def body(i): @@ -454,6 +484,7 @@ class ForLoopTest(testing.AutoGraphTestCase): opts={}) self.assertEqual(s, (123,)) + self.assertOpCreated('StatelessWhile') def test_tf_ragged_tensor_higher_dimensional(self): def body(i): @@ -479,6 +510,7 @@ class ForLoopTest(testing.AutoGraphTestCase): opts={}) self.assertEqual(s, (12,)) + self.assertOpCreated('StatelessWhile') def test_tf_ragged_tensor_no_loop_vars(self): v = self.variable('v', 0, dtypes.int32) @@ -497,6 +529,7 @@ class ForLoopTest(testing.AutoGraphTestCase): # Note: 123 = ((0*10 + 1)*10+2)*10+3 (first element of each row). self.assertEqual(v.read_value(), 123) + self.assertOpCreated('While') def _basic_loop(self, init_value, body_fn): def body(i): @@ -540,6 +573,7 @@ class ForLoopTest(testing.AutoGraphTestCase): class WhileLoopTest(testing.AutoGraphTestCase): def test_tensor(self): + def body(): nonlocal i, s s = s * 10 + i @@ -559,8 +593,38 @@ class WhileLoopTest(testing.AutoGraphTestCase): set_state=set_state, symbol_names=('i', 's'), opts={}) + self.assertEqual(i, 5) self.assertEqual(s, 1234) + self.assertOpCreated('StatelessWhile') + + def test_tensor_creating_variable(self): + + def body(): + nonlocal i, s + i = constant_op.constant(2) + s = i ** 5 + + def set_state(loop_vars): + nonlocal i, s + i, s = loop_vars + + i = variable_operators.Undefined('i') + s = constant_op.constant(0) + control_flow.while_stmt( + test=lambda: math_ops.equal(s, 0), + body=body, + get_state=lambda: (i, s), + set_state=set_state, + symbol_names=('i', 's'), + opts={}) + + self.assertEqual(i, 2) + self.assertEqual(s, 32) + self.assertOpCreated('StatelessWhile') + # Check that the temporary staging of the body did not create extra ops. + # Node naming is inconsistent between V1 and V2. + self.assertGraphContains(r'(while/)?pow$', 1) def test_tensor_with_side_effecting_condition(self): v = self.variable('v', 0, dtypes.int32) @@ -589,6 +653,7 @@ class WhileLoopTest(testing.AutoGraphTestCase): self.assertEqual(i, (5,)) self.assertEqual(v, (12345,)) + self.assertOpCreated('While') def test_tensor_with_python_state(self): class MutableObject(object): @@ -613,8 +678,10 @@ class WhileLoopTest(testing.AutoGraphTestCase): set_state=set_state, symbol_names=('i', 'state.field'), opts={}) + self.assertEqual(i, 5) self.assertEqual(state.field, 1234) + self.assertOpCreated('StatelessWhile') def test_python(self): def body(): @@ -632,7 +699,9 @@ class WhileLoopTest(testing.AutoGraphTestCase): set_state=None, symbol_names=('i', 's'), opts={}) + self.assertEqual(s, 1234) + self.assertNoOpsCreated() def test_python_with_tensor_state(self): def body(): @@ -650,8 +719,10 @@ class WhileLoopTest(testing.AutoGraphTestCase): set_state=None, symbol_names=('i', 's'), opts={}) + self.assertEqual(i, 5) self.assertEqual(s, 1234) + self.assertOpsNotCreated(('While', 'StatelessWhile')) def test_python_while_infinite(self): if not __debug__: @@ -732,6 +803,7 @@ class WhileLoopTest(testing.AutoGraphTestCase): r'.* Large unrolled loop.*Add.*', out_capturer.getvalue())) def _basic_loop(self, init_value, body_fn): + def body(): nonlocal i, s s = body_fn(i, s) @@ -802,6 +874,7 @@ class IfStmtTest(testing.AutoGraphTestCase): self.assertEqual(test_fn(constant_op.constant(True)), 1) self.assertEqual(test_fn(constant_op.constant(False)), -1) + self.assertOpCreated('StatelessIf') def test_tensor_no_outputs(self): @@ -831,6 +904,7 @@ class IfStmtTest(testing.AutoGraphTestCase): self.assertIsNone(test_fn(constant_op.constant(True))) self.assertIsNone(test_fn(constant_op.constant(False))) + self.assertOpCreated('StatelessIf') def test_tensor_multiple_returns(self): @@ -862,6 +936,7 @@ class IfStmtTest(testing.AutoGraphTestCase): self.assertEqual(test_fn(constant_op.constant(True)), (1, 2)) self.assertEqual(test_fn(constant_op.constant(False)), (-1, -2)) + self.assertOpCreated('StatelessIf') def test_python(self): @@ -887,6 +962,7 @@ class IfStmtTest(testing.AutoGraphTestCase): self.assertEqual(test_fn(True), 1) self.assertEqual(test_fn(False), -1) + self.assertNoOpsCreated() def test_python_multiple_returns(self): @@ -914,6 +990,7 @@ class IfStmtTest(testing.AutoGraphTestCase): self.assertEqual(test_fn(True), (1, 2)) self.assertEqual(test_fn(False), (-1, -2)) + self.assertNoOpsCreated() def _basic_cond(self, body_fn, else_fn): def body(): diff --git a/tensorflow/python/autograph/utils/testing.py b/tensorflow/python/autograph/utils/testing.py index f4238bea397..1da82db66c8 100644 --- a/tensorflow/python/autograph/utils/testing.py +++ b/tensorflow/python/autograph/utils/testing.py @@ -18,10 +18,13 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os +import re import types import unittest from tensorflow.python.eager import def_function +from tensorflow.python.framework import op_callbacks from tensorflow.python.framework import ops from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -50,6 +53,10 @@ class AutoGraphTestCase(test.TestCase): baz_actual, value_actual = test_fn() self.assertEqual(baz_actual, value_actual) + + Only assertions that require evaluation outside the function are lifted + outside the function scope. The rest execute inline, at function creation + time. """ def __new__(cls, *args): @@ -65,18 +72,31 @@ class AutoGraphTestCase(test.TestCase): return obj + def _op_callback( + self, op_type, inputs, attrs, outputs, op_name=None, graph=None): + self.trace_log.append(op_type) + def _run_as_tf_function(self, fn): def wrapper(self): @def_function.function(autograph=False) # Testing autograph itself. def fn_wrapper(): self.assertions = [] + self.graph_assertions = [] + self.trace_log = [] fn() targets = [args for _, args in self.assertions] return targets - actuals = self.evaluate(fn_wrapper()) - for (_, args), value in zip(self.assertions, actuals): - args[:] = value + + tensors = fn_wrapper() + + for assertion in self.graph_assertions: + assertion(fn_wrapper.get_concrete_function().graph) + + actuals = self.evaluate(tensors) + for (assertion, _), values in zip(self.assertions, actuals): + assertion(*values) + return wrapper def variable(self, name, value, dtype): @@ -88,12 +108,39 @@ class AutoGraphTestCase(test.TestCase): def setUp(self): super().setUp() + os.environ['AUTOGRAPH_CREATE_SYMBOLS_IN_LOOPS'] = '1' self.variables = {} + self.trace_log = [] + op_callbacks.add_op_callback(self._op_callback) def tearDown(self): - for fn, args in self.assertions: - fn(*args) + op_callbacks.remove_op_callback(self._op_callback) + self.trace_log = None + self.variables = None super().tearDown() + def assertGraphContains(self, op_regex, n): + def assertion(graph): + matches = [] + for node in graph.as_graph_def().node: + if re.match(op_regex, node.name): + matches.append(node) + for fn in graph.as_graph_def().library.function: + for node_def in fn.node_def: + if re.match(op_regex, node_def.name): + matches.append(node_def) + self.assertLen(matches, n) + + self.graph_assertions.append(assertion) + + def assertOpCreated(self, op_type): + self.assertIn(op_type, self.trace_log) + + def assertOpsNotCreated(self, op_types): + self.assertEmpty(set(op_types) & set(self.trace_log)) + + def assertNoOpsCreated(self): + self.assertEmpty(self.trace_log) + def assertEqual(self, *args): self.assertions.append((super().assertEqual, list(args))) From c1244778c18ca65152876be6e52b6c1a86a6a74f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 6 Aug 2020 12:17:46 -0700 Subject: [PATCH 2273/2522] Support dynamic dimensions in NNAPI delegate PiperOrigin-RevId: 325282966 Change-Id: I07cc91455c8bc72bd092b4cf257a59ad7809b5ee --- .../lite/delegates/nnapi/nnapi_delegate.cc | 214 ++++++++++++++++-- .../lite/delegates/nnapi/nnapi_delegate.h | 11 + .../delegates/nnapi/nnapi_delegate_kernel.h | 4 +- .../delegates/nnapi/nnapi_delegate_test.cc | 50 ++++ tensorflow/lite/kernels/test_util.h | 16 +- 5 files changed, 266 insertions(+), 29 deletions(-) diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc index ce55d671b5d..2a33d764949 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc @@ -307,6 +307,87 @@ bool IsHybridOperator(const TfLiteContext* context, int builtin_code, } } +bool HasUnspecifiedDimension(const TfLiteTensor* tensor) { + if (tensor->dims_signature) { + for (int i : TfLiteIntArrayView(tensor->dims_signature)) { + if (i == -1) return true; + } + } + return false; +} + +ANeuralNetworksOperandType ConvertTensorTypeToNNType( + const TfLiteTensor* tensor, TfLiteType ann_type_equivalent) { + int32_t nn_type = 0; + float scale = 0.0f; + int32_t zero_point = 0; + switch (tensor->type) { + case kTfLiteFloat32: + nn_type = ANEURALNETWORKS_TENSOR_FLOAT32; + break; + case kTfLiteUInt8: + nn_type = ann_type_equivalent == kTfLiteInt32 + ? ANEURALNETWORKS_TENSOR_INT32 + : ANEURALNETWORKS_TENSOR_QUANT8_ASYMM; + scale = tensor->params.scale; + zero_point = tensor->params.zero_point; + if (scale == 0) { + // TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM + // with zero scale are not valid in NNAPI. + scale = 1; + } + break; + case kTfLiteInt8: + nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM; + scale = tensor->params.scale; + zero_point = tensor->params.zero_point; + if (ann_type_equivalent == kTfLiteUInt8) { + nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM; + zero_point += 128; + } else if (ann_type_equivalent == kTfLiteInt32) { + nn_type = ANEURALNETWORKS_TENSOR_INT32; + zero_point += 128; + } + if (scale == 0) { + // TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM + // with zero scale are not valid in NNAPI. + scale = 1; + } + break; + case kTfLiteInt32: + nn_type = ANEURALNETWORKS_TENSOR_INT32; + scale = tensor->params.scale; + zero_point = tensor->params.zero_point; + break; + case kTfLiteBool: + nn_type = ANEURALNETWORKS_TENSOR_BOOL8; + break; + case kTfLiteInt16: + nn_type = ANEURALNETWORKS_TENSOR_QUANT16_SYMM; + scale = tensor->params.scale; + zero_point = tensor->params.zero_point; + break; + default: + break; + } + uint32_t tensor_rank = static_cast(tensor->dims->size); + uint32_t* tensor_dims = reinterpret_cast(tensor->dims->data); + static uint32_t scalar_rank = 1; + // treat scalar input as single cell tensor in NNAPI. + if (tensor_rank == 0) { + tensor_rank = scalar_rank; + tensor_dims = &scalar_rank; + } + ANeuralNetworksOperandType nn_operand_type{ + .type = nn_type, + .dimensionCount = tensor_rank, + .dimensions = tensor_dims, + .scale = scale, + .zeroPoint = zero_point, + }; + return nn_operand_type; +} + constexpr size_t kDefaultByteAlignmentForNNAPI = 16; static size_t getNumPaddingBytes(size_t byte_size) { @@ -554,7 +635,8 @@ class NNAPIOpBuilder { std::map* allocation_mapping, std::vector* nnapi_to_tflite_op_mapping, - ANeuralNetworksModel* nn_model, int* nnapi_errno) + ANeuralNetworksModel* nn_model, int* nnapi_errno, + bool allow_dynamic_dimensions) : nnapi_(nnapi), context_(context), operand_mapping_(tensor_mapping), @@ -562,7 +644,8 @@ class NNAPIOpBuilder { allocation_memory_mapping_(allocation_mapping), nnapi_to_tflite_op_mapping_(nnapi_to_tflite_op_mapping), nn_model_(nn_model), - nnapi_errno_(nnapi_errno) {} + nnapi_errno_(nnapi_errno), + allow_dynamic_dimensions_(allow_dynamic_dimensions) {} TfLiteStatus AddScalarBoolOperand(bool value) { return AddScalarOperand(value, ANEURALNETWORKS_BOOL); @@ -1171,8 +1254,20 @@ class NNAPIOpBuilder { TfLiteTypeGetName(tensor_type)); return kTfLiteError; } + bool has_unspecified_dimensions = HasUnspecifiedDimension(tensor); uint32_t tensor_rank = static_cast(tensor->dims->size); - uint32_t* tensor_dims = reinterpret_cast(tensor->dims->data); + std::vector dims_unspecified(tensor_rank, 0); + if (has_unspecified_dimensions) { + for (int i = 0; i < tensor->dims_signature->size; i++) { + dims_unspecified[i] = tensor->dims_signature->data[i] == -1 + ? 0 + : tensor->dims_signature->data[i]; + } + } + uint32_t* tensor_dims = + has_unspecified_dimensions && allow_dynamic_dimensions_ + ? dims_unspecified.data() + : reinterpret_cast(tensor->dims->data); if (scalar_as_tensor && tensor_rank == 0) { // Use rank 1, shape {1} operand for TFLite scalar tensors. tensor_rank = 1; @@ -1301,6 +1396,9 @@ class NNAPIOpBuilder { // Return status code of the latest NNAPI call. int* nnapi_errno_; + + // Whether to allow dynamic batch size without re-compilation. + bool allow_dynamic_dimensions_; }; // namespace nnapi namespace { @@ -3317,12 +3415,12 @@ TfLiteStatus NNAPIDelegateKernel::Prepare(TfLiteContext* context, return kTfLiteError; } + const auto delegate_options = + StatefulNnApiDelegate::GetOptions(node->delegate); if (nn_compilation_) { return kTfLiteOk; } - const auto delegate_options = - StatefulNnApiDelegate::GetOptions(node->delegate); ANeuralNetworksCompilation* compilation = nullptr; if (!nnapi_devices_.empty()) { // Compile for the selected accelerator. @@ -3468,6 +3566,53 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context, "setting execution loop timeout", nnapi_errno); } } + // Check if the size of input and output memory pool needs to be resized. + if (delegate_options.allow_dynamic_dimensions) { + size_t total_input_byte_size = 0; + // Make the TensorFlow Lite inputs and outputs to ann_indices. + for (int i : TfLiteIntArrayView(node->inputs)) { + // Constant tensors are not NNAPI inputs. + if (i != kTfLiteOptionalTensor && + context->tensors[i].allocation_type != kTfLiteMmapRo && + // The delegate might not have mapped this input (this can + // happen if one tensor is split in several ones) + operand_mapping_.lite_index_to_ann(i) != -1) { + if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) { + continue; + } + const TfLiteType nn_type_conversion = + operand_mapping_.lite_index_to_ann_type_conversion(i); + int tensor_size = 0; + if (nn_type_conversion == kTfLiteNoType) { + tensor_size = context->tensors[i].bytes; + } else { + size_t type_size; + TF_LITE_ENSURE_OK( + context, GetSizeOfType(context, nn_type_conversion, &type_size)); + tensor_size = NumElements(&context->tensors[i]) * type_size; + } + total_input_byte_size += tensor_size; + total_input_byte_size += getNumPaddingBytes(tensor_size); + } + } + if (total_input_byte_size > nn_input_memory_->get_byte_size()) { + nn_input_memory_.reset( + new NNMemory(nnapi_, "input_pool", total_input_byte_size)); + } + + size_t total_output_byte_size = 0; + for (int i : TfLiteIntArrayView(node->outputs)) { + if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) { + continue; + } + total_output_byte_size += context->tensors[i].bytes; + total_output_byte_size += getNumPaddingBytes(context->tensors[i].bytes); + } + if (total_output_byte_size > nn_output_memory_->get_byte_size()) { + nn_output_memory_.reset( + new NNMemory(nnapi_, "output_pool", total_output_byte_size)); + } + } // Set the input tensor buffers. Note: we access tflite tensors using // absolute indices but NN api indices inputs by relative indices. @@ -3481,14 +3626,25 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context, if (absolute_input_index == kTfLiteOptionalTensor) { continue; } + ANeuralNetworksOperandType input_nn_operand_type; + ANeuralNetworksOperandType* input_nn_operand_type_ptr = nullptr; TfLiteTensor* tensor = &context->tensors[absolute_input_index]; + TfLiteType ann_type_equivalent = + operand_mapping_.lite_index_to_ann_type_conversion( + absolute_input_index); + if (delegate_options.allow_dynamic_dimensions && + HasUnspecifiedDimension(tensor)) { + input_nn_operand_type = + ConvertTensorTypeToNNType(tensor, ann_type_equivalent); + input_nn_operand_type_ptr = &input_nn_operand_type; + } if (tensor->allocation_type != kTfLiteMmapRo) { if (tensor->buffer_handle != kTfLiteNullBufferHandle && tensor->buffer_handle < tensor_memory_map_->size()) { RETURN_TFLITE_ERROR_IF_NN_ERROR( context, nnapi_->ANeuralNetworksExecution_setInputFromMemory( - execution, relative_input_index, nullptr, + execution, relative_input_index, input_nn_operand_type_ptr, tensor_memory_map_->at(tensor->buffer_handle).memory, 0, tensor->bytes), "associating NNAPI execution input with a memory object", @@ -3496,9 +3652,6 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context, relative_input_index++; continue; } - TfLiteType ann_type_equivalent = - operand_mapping_.lite_index_to_ann_type_conversion( - absolute_input_index); int tensor_size = 0; if (ann_type_equivalent != kTfLiteNoType) { const auto num_elements = NumElements(tensor); @@ -3544,7 +3697,7 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context, RETURN_TFLITE_ERROR_IF_NN_ERROR( context, nnapi_->ANeuralNetworksExecution_setInputFromMemory( - execution, relative_input_index, nullptr, + execution, relative_input_index, input_nn_operand_type_ptr, nn_input_memory_->get_handle(), input_offset, tensor_size), "associating NNAPI execution input with a memory object", nnapi_errno); @@ -3555,7 +3708,7 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context, RETURN_TFLITE_ERROR_IF_NN_ERROR( context, nnapi_->ANeuralNetworksExecution_setInputFromMemory( - execution, relative_input_index, nullptr, + execution, relative_input_index, input_nn_operand_type_ptr, nn_input_memory_->get_handle(), input_offset, tensor->bytes), "associating NNAPI execution input with a memory object", nnapi_errno); @@ -3576,13 +3729,23 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context, if (operand_mapping_.lite_index_to_ann(output_index) == -1) { continue; } + ANeuralNetworksOperandType output_nn_operand_type; + ANeuralNetworksOperandType* output_nn_operand_type_ptr = nullptr; TfLiteTensor* tensor = &context->tensors[output_index]; + if (delegate_options.allow_dynamic_dimensions && + HasUnspecifiedDimension(tensor)) { + TfLiteType ann_type_equivalent = + operand_mapping_.lite_index_to_ann_type_conversion(output_index); + output_nn_operand_type = + ConvertTensorTypeToNNType(tensor, ann_type_equivalent); + output_nn_operand_type_ptr = &output_nn_operand_type; + } if (tensor->buffer_handle != kTfLiteNullBufferHandle && tensor->buffer_handle < tensor_memory_map_->size()) { RETURN_TFLITE_ERROR_IF_NN_ERROR( context, nnapi_->ANeuralNetworksExecution_setOutputFromMemory( - execution, relative_output_index, nullptr, + execution, relative_output_index, output_nn_operand_type_ptr, tensor_memory_map_->at(tensor->buffer_handle).memory, 0, tensor->bytes), "associating NNAPI execution output to a memory object", nnapi_errno); @@ -3591,7 +3754,7 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context, RETURN_TFLITE_ERROR_IF_NN_ERROR( context, nnapi_->ANeuralNetworksExecution_setOutputFromMemory( - execution, relative_output_index, nullptr, + execution, relative_output_index, output_nn_operand_type_ptr, nn_output_memory_->get_handle(), output_offset, tensor->bytes), "associating NNAPI execution output to a memory object", nnapi_errno); output_offset += tensor->bytes; @@ -3729,16 +3892,15 @@ void NNAPIDelegateKernel::AddDequantizeOperatorsWhereNeeded( } } -TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context, - int* nnapi_errno) { +TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors( + TfLiteContext* context, int* nnapi_errno, bool allow_dynamic_dimensions) { DequantizeMapping dequantize_mapping; // The operand builder allows creating a single op. It is created outside // the for loop to avoid reallocating the vectors. NNAPIOpBuilder builder(nnapi_, context, &operand_mapping_, &dequantize_mapping, &allocation_memory_mapping_, &nnapi_to_tflite_op_mapping_, nn_model_.get(), - nnapi_errno); - + nnapi_errno, allow_dynamic_dimensions); // If we have target accelerators the target SDK version might be // different than the current android version. target_sdk_version_ = nnapi_->android_sdk_version; @@ -3746,7 +3908,6 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(TfLiteContext* context, TF_LITE_ENSURE_STATUS(GetTargetSdkVersion( context, nnapi_, nnapi_devices_, &target_sdk_version_, nnapi_errno)); } - // Add Tensors. for (auto node_index : nodes_) { // Obtain the op and registration. @@ -4133,7 +4294,8 @@ TfLiteStatus NNAPIDelegateKernel::BuildGraph( const TfLiteIntArray* input_tensors, const TfLiteIntArray* output_tensors, int* nnapi_errno) { // Build the ops and tensors. - TF_LITE_ENSURE_STATUS(AddOpsAndTensors(context, nnapi_errno)); + TF_LITE_ENSURE_STATUS(AddOpsAndTensors( + context, nnapi_errno, delegate_options.allow_dynamic_dimensions)); // Map input and output tensor indices to ANN std::vector inputs; inputs.reserve(input_tensors->size); @@ -4222,6 +4384,9 @@ TfLiteStatus NNAPIDelegateKernel::BuildGraph( } // namespace nnapi } // namespace delegate +using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI; +using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI11; +using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI12; using ::tflite::delegate::nnapi::NNAPIDelegateKernel; StatefulNnApiDelegate::Data::Data(const NnApi* nnapi) : nnapi(nnapi) {} @@ -4284,6 +4449,9 @@ StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi, options.max_execution_timeout_duration_ns; delegate_data_.max_execution_loop_timeout_duration_ns = options.max_execution_loop_timeout_duration_ns; + if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI11) { + delegate_data_.allow_dynamic_dimensions = options.allow_dynamic_dimensions; + } TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO, "Created TensorFlow Lite delegate for NNAPI."); Prepare = DoPrepare; @@ -4291,6 +4459,10 @@ StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi, CopyToBufferHandle = DoCopyToBufferHandle; FreeBufferHandle = DoFreeBufferHandle; data_ = &delegate_data_; + if (delegate_data_.allow_dynamic_dimensions) { + flags |= kTfLiteDelegateFlagsAllowDynamicTensors; + flags |= kTfLiteDelegateFlagsRequirePropagatedShapes; + } } StatefulNnApiDelegate::StatefulNnApiDelegate() @@ -4321,6 +4493,7 @@ const StatefulNnApiDelegate::Options StatefulNnApiDelegate::GetOptions( delegate_data->max_execution_timeout_duration_ns; options.max_execution_loop_timeout_duration_ns = delegate_data->max_execution_loop_timeout_duration_ns; + options.allow_dynamic_dimensions = delegate_data->allow_dynamic_dimensions; return options; } @@ -4384,9 +4557,6 @@ int StatefulNnApiDelegate::GetNnApiErrno() const { return delegate_data_.nnapi_errno; } -using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI; -using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI12; - // static TfLiteStatus StatefulNnApiDelegate::GetNodesSupportedByAccelerator( TfLiteContext* context, TfLiteDelegate* delegate, const NnApi* nnapi, diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate.h index 27add64563d..bd4165d8a17 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.h +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.h @@ -116,6 +116,15 @@ class StatefulNnApiDelegate : public TfLiteDelegate { // within the specified duration, the execution will be aborted. If set to // 0, the default timeout for loops will be used. uint64_t max_execution_loop_timeout_duration_ns = 0; + + // Whether to allow dynamic dimension sizes without re-compilation. + // A tensor of with dynamic dimension must have a valid dim_signature + // defined. + // Only supported in NNAPI 1.1 and newer versions. + // WARNING: Setting this flag to true may result in model being rejected by + // accelerator. This should only be enabled if the target device supports + // dynamic dimensions of the model. + bool allow_dynamic_dimensions = false; }; // Uses default options. @@ -224,6 +233,8 @@ class StatefulNnApiDelegate : public TfLiteDelegate { // Specifies the maximum expected duration in nanosecond for WHILE loops in // the execution uint64_t max_execution_loop_timeout_duration_ns = 0; + // Whether to allow dynamic dimension sizes without re-compilation. + bool allow_dynamic_dimensions = false; explicit Data(const NnApi* nnapi); ~Data(); diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h index dbe3f76bc52..72a64d2404a 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h @@ -163,6 +163,7 @@ class NNMemory { ANeuralNetworksMemory* get_handle() { return nn_memory_handle_; } uint8_t* get_data_ptr() { return data_ptr_; } + size_t get_byte_size() { return byte_size_; } private: // NnApi instance to use. Not owned by this object. @@ -352,7 +353,8 @@ class NNAPIDelegateKernel { const TfLiteContext* context, int builtin_code, const TfLiteNode* node, int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno); - TfLiteStatus AddOpsAndTensors(TfLiteContext* context, int* nnapi_errno); + TfLiteStatus AddOpsAndTensors(TfLiteContext* context, int* nnapi_errno, + bool allow_dynamic_dimensions); TfLiteStatus BuildGraph(TfLiteContext* context, const StatefulNnApiDelegate::Options& options, diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc index 8abf15dacb9..fe022199dd6 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc @@ -222,6 +222,56 @@ TEST(NNAPIDelegate, ResizeInputTensorsWorks) { EXPECT_THAT(m.GetOutput(), ElementsAreArray({1.0, 1.3, 1.1, 1.5})); } +TEST(NNAPIDelegate, ResizeDynamicBatchInputTensorsWorks) { + StatefulNnApiDelegate::Options options; + options.allow_dynamic_dimensions = true; + + FloatAddOpModel m(options, + {TensorType_FLOAT32, /*shape=*/{1, 3, 2, 1}, /*min=*/0.0f, + /*max=*/0.0f, /*scale=*/0.0f, + /*zero_point=*/0, /*per_channel_quantization=*/false, + /*per_channel_quantization_scales=*/{}, + /*per_channel_quantization_offsets=*/{}, + /*channel_index=*/0, /*traversal_order=*/{}, + /*format=*/{}, + /*block_size=*/{}, /*block_map=*/{}, + /*shape_signature=*/{1, -1, 2, 1}}, + {TensorType_FLOAT32, /*shape=*/{1, 3, 2, 1}, /*min=*/0.0f, + /*max=*/0.0f, /*scale=*/0.0f, + /*zero_point=*/0, /*per_channel_quantization=*/false, + /*per_channel_quantization_scales=*/{}, + /*per_channel_quantization_offsets=*/{}, + /*channel_index=*/0, /*traversal_order=*/{}, + /*format=*/{}, + /*block_size=*/{}, /*block_map=*/{}, + /*shape_signature=*/{1, -1, 2, 1}}, + {TensorType_FLOAT32, /*shape=*/{}, /*min=*/0.0f, + /*max=*/0.0f, /*scale=*/0.0f, + /*zero_point=*/0, /*per_channel_quantization=*/false, + /*per_channel_quantization_scales=*/{}, + /*per_channel_quantization_offsets=*/{}, + /*channel_index=*/0, /*traversal_order=*/{}, + /*format=*/{}, + /*block_size=*/{}, /*block_map=*/{}, + /*shape_signature=*/{1, -1, 2, 1}}, + ActivationFunctionType_NONE); + EXPECT_EQ(m.ResizeInputTensor(m.input1(), {1, 3, 2, 1}), kTfLiteOk); + EXPECT_EQ(m.ResizeInputTensor(m.input2(), {1, 3, 2, 1}), kTfLiteOk); + EXPECT_EQ(m.AllocateTensors(), kTfLiteOk); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 0.7, 0.8, 0.9, 0.7}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.3, 0.5, 0.2, 0.8}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.9, 0.4, 1.0, 1.3, 1.1, 1.5})); + + EXPECT_EQ(m.ResizeInputTensor(m.input1(), {1, 2, 2, 1}), kTfLiteOk); + EXPECT_EQ(m.ResizeInputTensor(m.input2(), {1, 2, 2, 1}), kTfLiteOk); + EXPECT_EQ(m.AllocateTensors(), kTfLiteOk); + m.PopulateTensor(m.input1(), {0.7, 0.8, 0.9, 0.7}); + m.PopulateTensor(m.input2(), {0.3, 0.5, 0.2, 0.8}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({1.0, 1.3, 1.1, 1.5})); +} + // Sanity check for the state-ful NNAPI delegate. TEST(NNAPIDelegate, StatefulDelegate) { StatefulNnApiDelegate::Options options; diff --git a/tensorflow/lite/kernels/test_util.h b/tensorflow/lite/kernels/test_util.h index c08a40f06a8..3e13335b160 100644 --- a/tensorflow/lite/kernels/test_util.h +++ b/tensorflow/lite/kernels/test_util.h @@ -112,7 +112,8 @@ struct TensorData { std::vector per_channel_quantization_offsets = {}, int32_t channel_index = 0, std::vector traversal_order = {}, std::vector format = {}, - std::vector block_size = {}, std::vector block_map = {}) + std::vector block_size = {}, std::vector block_map = {}, + std::vector shape_signature = {}) : type(type), shape(shape), min(min), @@ -128,7 +129,8 @@ struct TensorData { traversal_order(traversal_order), format(format), block_size(block_size), - block_map(block_map) {} + block_map(block_map), + shape_signature(shape_signature) {} TensorType type; std::vector shape; float min; @@ -143,6 +145,7 @@ struct TensorData { std::vector format; std::vector block_size; std::vector block_map; + std::vector shape_signature; }; class SingleOpResolver : public OpResolver { @@ -582,10 +585,11 @@ class SingleOpModel { buffers_.push_back(CreateBuffer(builder_, data_buffer)); } - tensors_.push_back(CreateTensor(builder_, - builder_.CreateVector(t.shape), t.type, - /*buffer=*/buffer_id, - /*name=*/0, q_params, is_variable)); + tensors_.push_back(CreateTensor( + builder_, builder_.CreateVector(t.shape), t.type, + /*buffer=*/buffer_id, + /*name=*/0, q_params, is_variable, + /*sparsity=*/0, builder_.CreateVector(t.shape_signature))); tensor_data_[id] = t; From 4d59bcb41eba794155ce2f207a43306617fe24a0 Mon Sep 17 00:00:00 2001 From: Ken Franko Date: Thu, 6 Aug 2020 12:28:42 -0700 Subject: [PATCH 2274/2522] Add simple AutomaticOutsideCompilation test with scalar summary. PiperOrigin-RevId: 325285306 Change-Id: I7dfac98cc2dc592af346c7229a298b48df568845 --- .../tpu/tpu_outside_compilation_test.py | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tensorflow/python/tpu/tpu_outside_compilation_test.py b/tensorflow/python/tpu/tpu_outside_compilation_test.py index 291ab7f8d53..72e9f10d184 100644 --- a/tensorflow/python/tpu/tpu_outside_compilation_test.py +++ b/tensorflow/python/tpu/tpu_outside_compilation_test.py @@ -19,10 +19,12 @@ from __future__ import division from __future__ import print_function import os +import tempfile from absl.testing import parameterized import numpy as np +from tensorflow.core.util import event_pb2 from tensorflow.python.distribute import tpu_strategy as tpu_lib from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver from tensorflow.python.eager import def_function @@ -30,6 +32,7 @@ from tensorflow.python.eager import remote from tensorflow.python.eager import test from tensorflow.python.framework import config from tensorflow.python.framework import constant_op +from tensorflow.python.lib.io import tf_record from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gradients_impl @@ -40,6 +43,7 @@ from tensorflow.python.ops import string_ops from tensorflow.python.ops import summary_ops_v2 as summary from tensorflow.python.ops import variables from tensorflow.python.platform import flags +from tensorflow.python.platform import gfile from tensorflow.python.tpu import tpu from tensorflow.python.tpu import tpu_strategy_util @@ -70,6 +74,20 @@ def computation_with_string_ops(x): return string_ops.string_to_number(output) +def _events_from_logdir(test_case, logdir): + """Reads summary events from log directory.""" + test_case.assertTrue(gfile.Exists(logdir)) + files = gfile.ListDirectory(logdir) + test_case.assertLen(files, 1) + records = list(tf_record.tf_record_iterator(os.path.join(logdir, files[0]))) + result = [] + for r in records: + event = event_pb2.Event() + event.ParseFromString(r) + result.append(event) + return result + + class TpuOutsideCompilationTest(test.TestCase, parameterized.TestCase): def testResourceVariableAssignOnHost(self): @@ -488,6 +506,36 @@ class OutsideCompilationOnUnsupportedOpTest(test.TestCase): strategy.experimental_local_results(train_step(0)), constant_op.constant(10, shape=(strategy.num_replicas_in_sync))) + def testSummaryWithAutoOutsideCompilation(self): + strategy = get_tpu_strategy() + + def host_computation(x): + summary.scalar("x", x, step=0) + return x * 2.0 + + @def_function.function + def step(): + + def computation(x): + x = x + 1.0 + y = host_computation(x) + return y + 1.0 + + return strategy.run(computation, args=(2.0,)) + + logdir = tempfile.mkdtemp() + summary_writer = summary.create_file_writer(logdir, flush_millis=10000) + with summary_writer.as_default(), summary.always_record_summaries(): + self.assertAllEqual( + strategy.experimental_local_results(step()), + constant_op.constant(7., shape=(strategy.num_replicas_in_sync))) + events = _events_from_logdir(self, logdir) + # There will be 2 entries: 1 summary file header entry, and 1 entry + # written by host. + self.assertLen(events, 2) + self.assertEqual(events[1].summary.value[0].tag, "x") + self.assertEqual(events[1].summary.value[0].simple_value, 3.0) + def testAutoOutsideCompilationWithFunctionalNodes(self): strategy = get_tpu_strategy() From d21fe75e1420aa2fb16db1148cf39f5e3093cebe Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Thu, 6 Aug 2020 12:41:18 -0700 Subject: [PATCH 2275/2522] [tf.data service] Retry creation of jobs. This lets us recover from errors when dispatchers are restarted during job creation. PiperOrigin-RevId: 325287906 Change-Id: I50f3348b72e988146dd09f2953a1ab1c102381b8 --- .../core/kernels/data/experimental/BUILD | 1 + .../experimental/data_service_dataset_op.cc | 21 ++++++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/kernels/data/experimental/BUILD b/tensorflow/core/kernels/data/experimental/BUILD index b7a3b774cc5..a4682e09b2a 100644 --- a/tensorflow/core/kernels/data/experimental/BUILD +++ b/tensorflow/core/kernels/data/experimental/BUILD @@ -168,6 +168,7 @@ tf_kernel_library( "//tensorflow/core/data:compression_utils", "//tensorflow/core/data:dataset_proto_cc", "//tensorflow/core/data/service:data_service", + "//tensorflow/core/data/service:grpc_util", "//tensorflow/core/distributed_runtime/rpc:grpc_util", "//tensorflow/core/kernels/data:dataset_utils", "//tensorflow/core/kernels/data:name_utils", diff --git a/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc b/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc index 233a61f440e..ca73799bd24 100644 --- a/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc @@ -23,6 +23,7 @@ limitations under the License. #include "absl/strings/str_cat.h" #include "tensorflow/core/data/dataset.pb.h" #include "tensorflow/core/data/service/data_service.h" +#include "tensorflow/core/data/service/grpc_util.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_util.h" #include "tensorflow/core/framework/dataset.h" #include "tensorflow/core/framework/model.h" @@ -210,13 +211,23 @@ class DataServiceDatasetOp::Dataset : public DatasetBase { &deregister_fn_)); DataServiceDispatcherClient dispatcher(dataset()->address_, dataset()->protocol_); + int64 deadline_micros = ctx->env()->NowMicros() + kRetryTimeoutMicros; if (dataset()->job_name_.empty()) { - TF_RETURN_IF_ERROR(dispatcher.CreateJob( - dataset()->dataset_id_, dataset()->processing_mode_, &job_id_)); + TF_RETURN_IF_ERROR(grpc_util::Retry( + [&]() { + return dispatcher.CreateJob(dataset()->dataset_id_, + dataset()->processing_mode_, + &job_id_); + }, + "create job", deadline_micros)); } else { - TF_RETURN_IF_ERROR(dispatcher.GetOrCreateJob( - dataset()->dataset_id_, dataset()->processing_mode_, - dataset()->job_name_, iterator_index_, &job_id_)); + TF_RETURN_IF_ERROR(grpc_util::Retry( + [&]() { + return dispatcher.GetOrCreateJob( + dataset()->dataset_id_, dataset()->processing_mode_, + dataset()->job_name_, iterator_index_, &job_id_); + }, + "get or create job", deadline_micros)); } VLOG(1) << "Created data service job with id " << job_id_; return Status::OK(); From 3cb04184776559d2e775fb5e501100d98aa28726 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Thu, 6 Aug 2020 12:50:43 -0700 Subject: [PATCH 2276/2522] [XLA:Python] Remove unnecessary Python buffer tree code in BufferFromPyval. BufferFromPyval hasn't supported Python buffer trees for some time. PiperOrigin-RevId: 325289819 Change-Id: Iaf6f464990278d893ef4c030ae0acea44f8f325e --- tensorflow/compiler/xla/python/py_client.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/compiler/xla/python/py_client.cc b/tensorflow/compiler/xla/python/py_client.cc index 1f07c6e2042..4224d69dc8f 100644 --- a/tensorflow/compiler/xla/python/py_client.cc +++ b/tensorflow/compiler/xla/python/py_client.cc @@ -104,7 +104,6 @@ StatusOr> PyClient::BufferFromPyval( return InvalidArgument("from_python argument must be an array."); } - TF_ASSIGN_OR_RETURN(PythonBufferTree tree, GetPythonBufferTree(argument)); std::shared_ptr py_buffer_ref = GlobalPyRefManager()->ManageReference(std::move(c->array)); From 7df6aa0253ae7c7423ad9f625e782d668649609e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 6 Aug 2020 12:53:49 -0700 Subject: [PATCH 2277/2522] Added beta parameter from FTRL paper to main optimizer class. PiperOrigin-RevId: 325290409 Change-Id: I0aa85a26b188b9ab3e1faa7462dca4c5d81f8712 --- RELEASE.md | 1 + tensorflow/python/training/ftrl.py | 39 +++++++----- tensorflow/python/training/ftrl_test.py | 59 +++++++++++++++++++ .../v1/tensorflow.train.-ftrl-optimizer.pbtxt | 2 +- 4 files changed, 86 insertions(+), 15 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 62bdc11aa68..0eb673b0db7 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -95,6 +95,7 @@ * Error messages when Functional API construction goes wrong (and when ops cannot be converted to Keras layers automatically) should be clearer and easier to understand. * `Optimizer.minimize` can now accept a loss `Tensor` and a `GradientTape` as an alternative to accepting a `callable` loss. + * Added `beta` parameter to FTRL optimizer to match paper. * `tf.function` / AutoGraph: * Added `experimental_follow_type_hints` argument for `tf.function`. When True, the function may use type annotations to optimize the tracing diff --git a/tensorflow/python/training/ftrl.py b/tensorflow/python/training/ftrl.py index c7b3867631d..6c8a6ceadc5 100644 --- a/tensorflow/python/training/ftrl.py +++ b/tensorflow/python/training/ftrl.py @@ -49,7 +49,8 @@ class FtrlOptimizer(optimizer.Optimizer): name="Ftrl", accum_name=None, linear_name=None, - l2_shrinkage_regularization_strength=0.0): + l2_shrinkage_regularization_strength=0.0, + beta=None): r"""Construct a new FTRL optimizer. Args: @@ -79,10 +80,11 @@ class FtrlOptimizer(optimizer.Optimizer): function w.r.t. the weights w. Specifically, in the absence of L1 regularization, it is equivalent to the following update rule: - w_{t+1} = w_t - lr_t / (1 + 2*L2*lr_t) * g_t - - 2*L2_shrinkage*lr_t / (1 + 2*L2*lr_t) * w_t + w_{t+1} = w_t - lr_t / (beta + 2*L2*lr_t) * g_t - + 2*L2_shrinkage*lr_t / (beta + 2*L2*lr_t) * w_t where lr_t is the learning rate at t. When input is sparse shrinkage will only happen on the active weights. + beta: A float value; corresponds to the beta parameter in the paper. Raises: ValueError: If one of the arguments is invalid. @@ -119,12 +121,13 @@ class FtrlOptimizer(optimizer.Optimizer): self._initial_accumulator_value = initial_accumulator_value self._l1_regularization_strength = l1_regularization_strength self._l2_regularization_strength = l2_regularization_strength + self._beta = (0.0 if beta is None else beta) self._l2_shrinkage_regularization_strength = ( l2_shrinkage_regularization_strength) self._learning_rate_tensor = None self._learning_rate_power_tensor = None self._l1_regularization_strength_tensor = None - self._l2_regularization_strength_tensor = None + self._adjusted_l2_regularization_strength_tensor = None self._l2_shrinkage_regularization_strength_tensor = None self._accum_name = accum_name self._linear_name = linear_name @@ -142,8 +145,14 @@ class FtrlOptimizer(optimizer.Optimizer): self._learning_rate, name="learning_rate") self._l1_regularization_strength_tensor = ops.convert_to_tensor( self._l1_regularization_strength, name="l1_regularization_strength") - self._l2_regularization_strength_tensor = ops.convert_to_tensor( - self._l2_regularization_strength, name="l2_regularization_strength") + # L2 regularization strength with beta added in so that the underlying + # TensorFlow ops do not need to include that parameter. + self._adjusted_l2_regularization_strength_tensor = ops.convert_to_tensor( + self._l2_regularization_strength + self._beta / + (2. * self._learning_rate), + name="adjusted_l2_regularization_strength") + assert self._adjusted_l2_regularization_strength_tensor is not None + self._beta_tensor = ops.convert_to_tensor(self._beta, name="beta") self._l2_shrinkage_regularization_strength_tensor = ops.convert_to_tensor( self._l2_shrinkage_regularization_strength, name="l2_shrinkage_regularization_strength") @@ -162,7 +171,7 @@ class FtrlOptimizer(optimizer.Optimizer): math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), math_ops.cast(self._l1_regularization_strength_tensor, var.dtype.base_dtype), - math_ops.cast(self._l2_regularization_strength_tensor, + math_ops.cast(self._adjusted_l2_regularization_strength_tensor, var.dtype.base_dtype), math_ops.cast(self._learning_rate_power_tensor, var.dtype.base_dtype), use_locking=self._use_locking) @@ -175,7 +184,7 @@ class FtrlOptimizer(optimizer.Optimizer): math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), math_ops.cast(self._l1_regularization_strength_tensor, var.dtype.base_dtype), - math_ops.cast(self._l2_regularization_strength_tensor, + math_ops.cast(self._adjusted_l2_regularization_strength_tensor, var.dtype.base_dtype), math_ops.cast(self._l2_shrinkage_regularization_strength_tensor, var.dtype.base_dtype), @@ -194,7 +203,7 @@ class FtrlOptimizer(optimizer.Optimizer): math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), math_ops.cast(self._l1_regularization_strength_tensor, var.dtype.base_dtype), - math_ops.cast(self._l2_regularization_strength_tensor, + math_ops.cast(self._adjusted_l2_regularization_strength_tensor, var.dtype.base_dtype), math_ops.cast(self._learning_rate_power_tensor, var.dtype.base_dtype), use_locking=self._use_locking) @@ -207,7 +216,7 @@ class FtrlOptimizer(optimizer.Optimizer): math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), math_ops.cast(self._l1_regularization_strength_tensor, var.dtype.base_dtype), - math_ops.cast(self._l2_regularization_strength_tensor, + math_ops.cast(self._adjusted_l2_regularization_strength_tensor, var.dtype.base_dtype), math_ops.cast(self._l2_shrinkage_regularization_strength_tensor, var.dtype.base_dtype), @@ -227,7 +236,7 @@ class FtrlOptimizer(optimizer.Optimizer): math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), math_ops.cast(self._l1_regularization_strength_tensor, var.dtype.base_dtype), - math_ops.cast(self._l2_regularization_strength_tensor, + math_ops.cast(self._adjusted_l2_regularization_strength_tensor, var.dtype.base_dtype), math_ops.cast(self._learning_rate_power_tensor, var.dtype.base_dtype), use_locking=self._use_locking) @@ -241,7 +250,7 @@ class FtrlOptimizer(optimizer.Optimizer): math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), math_ops.cast(self._l1_regularization_strength_tensor, var.dtype.base_dtype), - math_ops.cast(self._l2_regularization_strength_tensor, + math_ops.cast(self._adjusted_l2_regularization_strength_tensor, var.dtype.base_dtype), math_ops.cast(self._l2_shrinkage_regularization_strength_tensor, grad.dtype.base_dtype), @@ -260,7 +269,8 @@ class FtrlOptimizer(optimizer.Optimizer): indices, math_ops.cast(self._learning_rate_tensor, grad.dtype), math_ops.cast(self._l1_regularization_strength_tensor, grad.dtype), - math_ops.cast(self._l2_regularization_strength_tensor, grad.dtype), + math_ops.cast(self._adjusted_l2_regularization_strength_tensor, + grad.dtype), math_ops.cast(self._learning_rate_power_tensor, grad.dtype), use_locking=self._use_locking) else: @@ -272,7 +282,8 @@ class FtrlOptimizer(optimizer.Optimizer): indices, math_ops.cast(self._learning_rate_tensor, grad.dtype), math_ops.cast(self._l1_regularization_strength_tensor, grad.dtype), - math_ops.cast(self._l2_regularization_strength_tensor, grad.dtype), + math_ops.cast(self._adjusted_l2_regularization_strength_tensor, + grad.dtype), math_ops.cast(self._l2_shrinkage_regularization_strength_tensor, grad.dtype), math_ops.cast(self._learning_rate_power_tensor, grad.dtype), diff --git a/tensorflow/python/training/ftrl_test.py b/tensorflow/python/training/ftrl_test.py index f0cbe13e037..ff1bf177a72 100644 --- a/tensorflow/python/training/ftrl_test.py +++ b/tensorflow/python/training/ftrl_test.py @@ -161,6 +161,65 @@ class FtrlOptimizerTest(test.TestCase): self.assertAllCloseAccordingToType( np.array([-0.93460727, -1.86147261]), v1_val) + def testFtrlWithBeta(self): + # The v1 optimizers do not support eager execution + with ops.Graph().as_default(): + for dtype in [dtypes.half, dtypes.float32]: + with self.cached_session(): + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([4.0, 3.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) + + opt = ftrl.FtrlOptimizer(3.0, initial_accumulator_value=0.1, beta=0.1) + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + self.evaluate(variables.global_variables_initializer()) + + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) + self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) + + # Run 10 steps FTRL + for _ in range(10): + update.run() + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllCloseAccordingToType( + np.array([-6.096838, -9.162214]), v0_val) + self.assertAllCloseAccordingToType( + np.array([-0.717741, -1.425132]), v1_val) + + def testFtrlWithL2_Beta(self): + # The v1 optimizers do not support eager execution + with ops.Graph().as_default(): + for dtype in [dtypes.half, dtypes.float32]: + with self.cached_session(): + var0 = variables.Variable([1.0, 2.0], dtype=dtype) + var1 = variables.Variable([4.0, 3.0], dtype=dtype) + grads0 = constant_op.constant([0.1, 0.2], dtype=dtype) + grads1 = constant_op.constant([0.01, 0.02], dtype=dtype) + + opt = ftrl.FtrlOptimizer( + 3.0, + initial_accumulator_value=0.1, + l1_regularization_strength=0.0, + l2_regularization_strength=0.1, + beta=0.1) + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + self.evaluate(variables.global_variables_initializer()) + + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllCloseAccordingToType([1.0, 2.0], v0_val) + self.assertAllCloseAccordingToType([4.0, 3.0], v1_val) + + # Run 10 steps FTRL + for _ in range(10): + update.run() + v0_val, v1_val = self.evaluate([var0, var1]) + self.assertAllCloseAccordingToType( + np.array([-2.735487, -4.704625]), v0_val) + self.assertAllCloseAccordingToType( + np.array([-0.294335, -0.586556]), v1_val) + def testFtrlWithL1_L2(self): # The v1 optimizers do not support eager execution with ops.Graph().as_default(): diff --git a/tensorflow/tools/api/golden/v1/tensorflow.train.-ftrl-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.train.-ftrl-optimizer.pbtxt index 1d1aceb0138..9e12ae9b71f 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.train.-ftrl-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.train.-ftrl-optimizer.pbtxt @@ -18,7 +18,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'learning_rate_power\', \'initial_accumulator_value\', \'l1_regularization_strength\', \'l2_regularization_strength\', \'use_locking\', \'name\', \'accum_name\', \'linear_name\', \'l2_shrinkage_regularization_strength\'], varargs=None, keywords=None, defaults=[\'-0.5\', \'0.1\', \'0.0\', \'0.0\', \'False\', \'Ftrl\', \'None\', \'None\', \'0.0\'], " + argspec: "args=[\'self\', \'learning_rate\', \'learning_rate_power\', \'initial_accumulator_value\', \'l1_regularization_strength\', \'l2_regularization_strength\', \'use_locking\', \'name\', \'accum_name\', \'linear_name\', \'l2_shrinkage_regularization_strength\', \'beta\'], varargs=None, keywords=None, defaults=[\'-0.5\', \'0.1\', \'0.0\', \'0.0\', \'False\', \'Ftrl\', \'None\', \'None\', \'0.0\', \'None\'], " } member_method { name: "apply_gradients" From dffb0b56192f4c95fbf563a82742b4a3f4881e05 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Thu, 6 Aug 2020 12:57:39 -0700 Subject: [PATCH 2278/2522] [XLA] Avoid harmless signed integer overflow A U16 of 46977 multiplied by a U16 of 53826, when evaluated in the evaluator, results in the operands of the multiply getting promoted to the C++ type "int" which is signed. The result of the multiply will overflow a signed int and give a negative result. Instead, let's promote both operands to "unsigned int" which will not suffer from any overflow issues. PiperOrigin-RevId: 325291178 Change-Id: Ib20b9b56fe3803e7ec4a927abb6b862683523068 --- .../xla/service/hlo_evaluator_typed_visitor.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h index 9226cd556ff..d5f0c62adc1 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h @@ -48,22 +48,26 @@ template struct is_complex_t : absl::disjunction, std::is_same> {}; +namespace detail { +template +using unsigned_promoted_type_t = + std::make_unsigned_t() + std::declval())>; +} + // ToArithmeticSafeType(T t): -// - converts `t` to the bitwise-equivalent `unsigned T` if T is a signed +// - converts `t` to an unsigned integer at least as wide as `int` if T is an // integer, and // - otherwise returns `t` unchanged. // // It's UB in C++ to under/overflow a signed integer, so we wrap all arithmetic // in this type to force 2's complement behavior. template ::value && - std::is_signed::value>::type* = nullptr> -typename std::make_unsigned::type ToArithmeticSafeType(T t) { - return static_cast::type>(t); + typename std::enable_if::value>::type* = nullptr> +detail::unsigned_promoted_type_t ToArithmeticSafeType(T t) { + return static_cast>(t); } template ::value || - !std::is_signed::value>::type* = nullptr> + typename std::enable_if::value>::type* = nullptr> T ToArithmeticSafeType(T t) { return std::move(t); } From 444e113642f3a419aa85043a0d5253abb311a338 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Thu, 6 Aug 2020 13:12:02 -0700 Subject: [PATCH 2279/2522] Added a couple missing symbols. --- tensorflow/core/framework/tensor_shape.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/core/framework/tensor_shape.h b/tensorflow/core/framework/tensor_shape.h index dbe103088c1..81020ff7820 100644 --- a/tensorflow/core/framework/tensor_shape.h +++ b/tensorflow/core/framework/tensor_shape.h @@ -299,6 +299,9 @@ class TensorShape : public TensorShapeBase { public: using TensorShapeBase::TensorShapeBase; + TF_EXPORT TensorShape(): TensorShapeBase() {}; + TF_EXPORT TensorShape(const TensorShapeProto& proto): TensorShapeBase(proto) {}; + /// Allow a TensorShape to be used as a PartialTensorShape without copying operator const PartialTensorShape&() const; // NOLINT(runtime/explicit) From 8145abe98c231fd3a6fe3453f44b4d011a7f73bc Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Thu, 6 Aug 2020 13:07:39 -0700 Subject: [PATCH 2280/2522] Swap to new sizetracker-specific artifact bucket PiperOrigin-RevId: 325293486 Change-Id: Icd442b8ec5562453ebb48ac4be717a19c5d90e80 --- tensorflow/tools/ci_build/sizetrack_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/sizetrack_helper.py b/tensorflow/tools/ci_build/sizetrack_helper.py index 48e8879e9df..ff5ff1bf60d 100755 --- a/tensorflow/tools/ci_build/sizetrack_helper.py +++ b/tensorflow/tools/ci_build/sizetrack_helper.py @@ -78,7 +78,7 @@ parser.add_argument( parser.add_argument( "--bucket", type=str, - default="gs://tensorflow-testing-bucket", + default="gs://tf-sizetracker-artifacts", help="GCS bucket for artifacts.") parser.add_argument( "--team", From ba13b9a9ce61c1603f88b8daa4fa801627b09a91 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 6 Aug 2020 13:21:50 -0700 Subject: [PATCH 2281/2522] [XLA:SPMD] Very first step to support partial replication with ElementWiseOps. PiperOrigin-RevId: 325296526 Change-Id: I777e04f9887ed6d73aaf39f767c3c8ca9b199dce --- .../xla/service/spmd/spmd_partitioner.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc index 906303e1a3c..9db76a65486 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc @@ -296,6 +296,22 @@ PartitionedHlo PartitionedHlo::ReshardNoCache(const HloSharding& target) { return PartitionedHlo(copy, base_shape_, state_); } + // 'Replicated' to partial replicated. + if (target.ReplicateOnLastTileDim()) { + std::vector group_dims(target.tile_assignment().num_dimensions() - + 1); + std::iota(group_dims.begin(), group_dims.end(), 0); + auto target_grouped = GroupShardingOnDims(target, group_dims); + auto per_group_partitioner_state = CreatePerGroupPartitioningState( + state_, target_grouped.device_groups, state_.b); + auto partially_sharded = PerGroupSliceFromReplicated( + hlo_, state_.partition_id, target_grouped.device_groups, group_dims, + target_grouped.group_dim_sizes, state_.b); + partially_sharded->set_sharding(target); + return PartitionedHlo(partially_sharded, base_shape(), + per_group_partitioner_state); + } + // 'Replicated' to 'Tiled'. auto padded_hlo = PadBaseShapeBeforeUnevenTiledSharding(hlo_, target, state_.b); From 023e45081e55e496ec179e23e5c1389714f4ae2f Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Thu, 6 Aug 2020 13:29:11 -0700 Subject: [PATCH 2282/2522] Fix. --- tensorflow/core/framework/tensor_shape.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/framework/tensor_shape.h b/tensorflow/core/framework/tensor_shape.h index 81020ff7820..c65aba2babf 100644 --- a/tensorflow/core/framework/tensor_shape.h +++ b/tensorflow/core/framework/tensor_shape.h @@ -297,7 +297,10 @@ std::ostream& operator<<(std::ostream& os, const TensorShapeBase& tsb) { /// zero dimensions and one element, and call AddDim() to add dimensions later. class TensorShape : public TensorShapeBase { public: - using TensorShapeBase::TensorShapeBase; + TF_EXPORT TensorShape(gtl::ArraySlice dim_sizes) + : TensorShapeBase(dim_sizes) {}; + TF_EXPORT TensorShape(std::initializer_list dim_sizes) + : TensorShapeBase(dim_sizes) {}; TF_EXPORT TensorShape(): TensorShapeBase() {}; TF_EXPORT TensorShape(const TensorShapeProto& proto): TensorShapeBase(proto) {}; @@ -324,6 +327,9 @@ class TensorShape : public TensorShapeBase { template Eigen::DSizes AsEigenDSizesWithPadding() const; + protected: + explicit TensorShape(DataType dt): TensorShapeBase(dt) {}; + private: // These CHECK fail to ease debugging. // REQUIRES: dims() == NDIMS From b74d6158ef68ce8eab6520a3dc6239819c9c3a32 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Thu, 6 Aug 2020 13:37:35 -0700 Subject: [PATCH 2283/2522] Fix. --- tensorflow/core/framework/tensor_shape.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/tensorflow/core/framework/tensor_shape.h b/tensorflow/core/framework/tensor_shape.h index c65aba2babf..dbe103088c1 100644 --- a/tensorflow/core/framework/tensor_shape.h +++ b/tensorflow/core/framework/tensor_shape.h @@ -297,13 +297,7 @@ std::ostream& operator<<(std::ostream& os, const TensorShapeBase& tsb) { /// zero dimensions and one element, and call AddDim() to add dimensions later. class TensorShape : public TensorShapeBase { public: - TF_EXPORT TensorShape(gtl::ArraySlice dim_sizes) - : TensorShapeBase(dim_sizes) {}; - TF_EXPORT TensorShape(std::initializer_list dim_sizes) - : TensorShapeBase(dim_sizes) {}; - - TF_EXPORT TensorShape(): TensorShapeBase() {}; - TF_EXPORT TensorShape(const TensorShapeProto& proto): TensorShapeBase(proto) {}; + using TensorShapeBase::TensorShapeBase; /// Allow a TensorShape to be used as a PartialTensorShape without copying operator const PartialTensorShape&() const; // NOLINT(runtime/explicit) @@ -327,9 +321,6 @@ class TensorShape : public TensorShapeBase { template Eigen::DSizes AsEigenDSizesWithPadding() const; - protected: - explicit TensorShape(DataType dt): TensorShapeBase(dt) {}; - private: // These CHECK fail to ease debugging. // REQUIRES: dims() == NDIMS From 16414a14f478716fc0acfa9a5c2330ba3a24919e Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Thu, 6 Aug 2020 13:44:55 -0700 Subject: [PATCH 2284/2522] [tf.data service] Support recovering from dispatcher restart. When a work_dir is provided, the dispatcher will journal its state to disk, so that when it gets restarted, it can recover its original state and continue running as before. PiperOrigin-RevId: 325301539 Change-Id: Iab331517f59da92edd40fac65a025937248b2114 --- RELEASE.md | 5 + tensorflow/core/data/service/worker_impl.cc | 19 +- .../data/experimental/service/__init__.py | 19 +- .../data/experimental/service/server_lib.py | 43 ++- .../kernel_tests/data_service_ops_test.py | 307 ++++++++++-------- ...xperimental.service.-dispatch-server.pbtxt | 2 +- 6 files changed, 240 insertions(+), 155 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 0eb673b0db7..bb4d29f9020 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -72,6 +72,11 @@ `tf.data.experimental.service.from_dataset_id` APIs to enable one process to register a dataset with the tf.data service, and another process to consume data from the dataset. + * Added support for tf.data service dispatcher fault tolerance. To enable + fault tolerance, configure a `work_dir` when running your dispatcher + server and set `dispatcher_fault_tolerance=True`. The dispatcher will + store its state to `work_dir`, so that on restart it can continue from its + previous state after restart. * Added optional `exclude_cols` parameter to CsvDataset. This parameter is the complement of `select_cols`; at most one of these should be specified. * We have implemented an optimization which reorders data-discarding diff --git a/tensorflow/core/data/service/worker_impl.cc b/tensorflow/core/data/service/worker_impl.cc index 6326d65782b..d2c75bbc719 100644 --- a/tensorflow/core/data/service/worker_impl.cc +++ b/tensorflow/core/data/service/worker_impl.cc @@ -224,15 +224,18 @@ Status DataServiceWorkerImpl::SendTaskUpdate() EXCLUSIVE_LOCKS_REQUIRED(mu_) { void DataServiceWorkerImpl::HeartbeatThread() { while (true) { - mutex_lock l(mu_); - while (!cancelled_ && pending_completed_tasks_.empty()) { - heartbeat_cv_.wait(l); + Status s; + { + mutex_lock l(mu_); + while (!cancelled_ && pending_completed_tasks_.empty()) { + heartbeat_cv_.wait(l); + } + if (cancelled_) { + VLOG(3) << "Heartbeat thread shutting down"; + return; + } + s = SendTaskUpdate(); } - if (cancelled_) { - VLOG(3) << "Heartbeat thread shutting down"; - return; - } - Status s = SendTaskUpdate(); if (!s.ok()) { LOG(WARNING) << "Failed to send task updates to dispatcher: " << s; Env::Default()->SleepForMicroseconds(kHeartbeatIntervalMicros); diff --git a/tensorflow/python/data/experimental/service/__init__.py b/tensorflow/python/data/experimental/service/__init__.py index 74ced1a8eb6..987eb6d6dc2 100644 --- a/tensorflow/python/data/experimental/service/__init__.py +++ b/tensorflow/python/data/experimental/service/__init__.py @@ -64,8 +64,8 @@ workers, the tf.data service should be able to achieve similar speed. ## Running the tf.data service tf.data servers should be brought up alongside your training jobs, and brought -down when the jobs are finished. The tf.data service uses one DispatchServer and -any number of WorkerServers. See +down when the jobs are finished. The tf.data service uses one `DispatchServer` +and any number of `WorkerServers`. See https://github.com/tensorflow/ecosystem/tree/master/data_service for an example of using Google Kubernetes Engine (GKE) to manage the tf.data service. The server implementation in @@ -75,12 +75,17 @@ contexts. ### Fault tolerance -The tf.data dispatch server manages all state for the service, so it is -important to keep the server alive. If the dispatch server is restarted -mid-training, the training must also be restarted. +By default, the tf.data dispatch server stores its state in-memory, making it a +single point of failure during training. To avoid this, pass +`fault_tolerant_mode=True` when creating your `DispatchServer`. Dispatcher +fault tolerance requires `work_dir` to be configured and accessible from the +dispatcher both before and after restart (e.g. a GCS path). With fault tolerant +mode enabled, the dispatcher will journal its state to the work directory so +that no state is lost when the dispatcher is restarted. -WorkerServers, on the other hand, may be freely restarted, added, or removed -during training. +WorkerServers may be freely restarted, added, or removed during training. At +startup, workers will register with the dispatcher and begin processing all +outstanding jobs from the beginning. ## Using the tf.data service from your training job diff --git a/tensorflow/python/data/experimental/service/server_lib.py b/tensorflow/python/data/experimental/service/server_lib.py index 99dc9297901..12c1903fe22 100644 --- a/tensorflow/python/data/experimental/service/server_lib.py +++ b/tensorflow/python/data/experimental/service/server_lib.py @@ -25,6 +25,9 @@ from tensorflow.python.data.experimental.service import _pywrap_server_lib from tensorflow.python.util.tf_export import tf_export +DEFAULT_PROTOCOL = "grpc" + + @tf_export("data.experimental.service.DispatchServer", v1=[]) class DispatchServer(object): """An in-process tf.data service dispatch server. @@ -50,15 +53,38 @@ class DispatchServer(object): dispatcher = tf.data.experimental.service.DispatchServer(port=5050) dispatcher.join() ``` + + To start a `DispatchServer` in fault-tolerant mode, set `work_dir` and + `fault_tolerant_mode` like below: + + ``` + dispatcher = tf.data.experimental.service.DispatchServer( + port=5050, + work_dir="gs://my-bucket/dispatcher/work_dir", + fault_tolerant_mode=True) + ``` """ - def __init__(self, port, protocol=None, start=True): + def __init__(self, + port, + protocol=None, + work_dir=None, + fault_tolerant_mode=None, + start=True): """Creates a new dispatch server. Args: port: Specifies the port to bind to. protocol: (Optional.) Specifies the protocol to be used by the server. Acceptable values include `"grpc", "grpc+local"`. Defaults to `"grpc"`. + work_dir: (Optional.) A directory to store dispatcher state in. This + argument is required for the dispatcher to be able to recover from + restarts. + fault_tolerant_mode: (Optional.) Whether the dispatcher should write + its state to a journal so that it can recover from restarts. Dispatcher + state, including registered datasets and created jobs, is synchronously + written to the journal before responding to RPCs. If `True`, `work_dir` + must also be specified. Defaults to `False`. start: (Optional.) Boolean, indicating whether to start the server after creating it. Defaults to `True`. @@ -66,10 +92,17 @@ class DispatchServer(object): tf.errors.OpError: Or one of its subclasses if an error occurs while creating the TensorFlow server. """ - if protocol is None: - protocol = "grpc" - self._protocol = protocol - config = service_config_pb2.DispatcherConfig(port=port, protocol=protocol) + self._protocol = protocol or DEFAULT_PROTOCOL + work_dir = work_dir or "" + fault_tolerant_mode = fault_tolerant_mode or False + if fault_tolerant_mode and not work_dir: + raise ValueError( + "Cannot enable fault tolerant mode without configuring a work_dir") + config = service_config_pb2.DispatcherConfig( + port=port, + protocol=self._protocol, + work_dir=work_dir, + fault_tolerant_mode=fault_tolerant_mode) self._server = _pywrap_server_lib.TF_DATA_NewDispatchServer( config.SerializeToString()) if start: diff --git a/tensorflow/python/data/kernel_tests/data_service_ops_test.py b/tensorflow/python/data/kernel_tests/data_service_ops_test.py index c8a4b8262c3..49cf1772661 100644 --- a/tensorflow/python/data/kernel_tests/data_service_ops_test.py +++ b/tensorflow/python/data/kernel_tests/data_service_ops_test.py @@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os import threading import time @@ -43,88 +44,157 @@ from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import tensor_array_ops from tensorflow.python.platform import test -PROTOCOL = "grpc" + +def _address_from_target(target): + # Targets are in the format ://
    + return target.split("://")[1] -def _make_distributed_dataset(dataset, service, job_name=None): - """Creates a distributed dataset with a short task refresh interval.""" +def _make_distributed_dataset(dataset, + dispatcher, + job_name=None, + max_outstanding_requests=None): return dataset.apply( data_service_ops._distribute( "parallel_epochs", - service, + dispatcher.target, job_name=job_name, + max_outstanding_requests=max_outstanding_requests, task_refresh_interval_hint_ms=20)) +def _make_distributed_range_dataset(num_elements, + dispatcher, + job_name=None, + max_outstanding_requests=None): + """Creates a distributed dataset. + + Args: + num_elements: The number of elements in the range dataset that will be + distributed. + dispatcher: The dispatcher to distribute to. + job_name: Optional job name for the distributed dataset. + max_outstanding_requests: Optional limit on the number of outstanding + requests. + + Returns: + The created dataset. + """ + dataset = dataset_ops.Dataset.range(num_elements) + return _make_distributed_dataset(dataset, dispatcher, job_name, + max_outstanding_requests) + + class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): - def create_cluster(self, num_workers): + def start_dispatch_server(self, port=0): + work_dir = os.path.join(self.get_temp_dir(), "work_dir") + return server_lib.DispatchServer( + port=port, + protocol=server_lib.DEFAULT_PROTOCOL, + work_dir=work_dir, + fault_tolerant_mode=True) + + def start_worker_server(self, dispatcher, port=0): + return server_lib.WorkerServer( + port=port, + dispatcher_address=_address_from_target(dispatcher.target), + protocol=server_lib.DEFAULT_PROTOCOL) + + def restart_dispatcher(self, dispatcher): + """Stops `dispatcher` and returns a new dispatcher with the same port.""" + port = int(_address_from_target(dispatcher.target).split(":")[1]) + dispatcher._stop() + return self.start_dispatch_server(port=port) + + def start_cluster(self, num_workers): """Creates a cluster of tf.data service servers. Args: num_workers: The number of workers in the cluster. Returns: - A string for connecting to the tf.data service. + A tuple of (dispatcher, list_of_workers). """ - self._dispatcher = server_lib.DispatchServer(port=0, protocol=PROTOCOL) - self._servers = [] - for _ in range(num_workers): - self._servers.append( - server_lib.WorkerServer( - port=0, - dispatcher_address=self._dispatcher._address, - protocol=PROTOCOL)) - - return "{0}://{1}".format(PROTOCOL, self._dispatcher._address) + dispatcher = self.start_dispatch_server() + servers = [self.start_worker_server(dispatcher) for _ in range(num_workers)] + return dispatcher, servers @combinations.generate(test_base.eager_only_combinations()) def testDistributeBasic(self): + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable num_elements = 10 - service = self.create_cluster(1) - ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, service) + ds = _make_distributed_range_dataset(10, dispatcher) results = [elem.numpy() for elem in ds] self.assertEqual(list(range(num_elements)), results) @combinations.generate(test_base.eager_only_combinations()) - def testDispatcherPreemption(self): - self._dispatcher = server_lib.DispatchServer(port=0, protocol=PROTOCOL) - self._worker = server_lib.WorkerServer( - port=0, dispatcher_address=self._dispatcher._address, protocol=PROTOCOL) + def testDispatcherStop(self): + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable num_elements = 100 - ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset( - ds, "{}://{}".format(PROTOCOL, self._dispatcher._address)) + ds = _make_distributed_range_dataset(num_elements, dispatcher) iterator = iter(ds) results = [] results.append(next(iterator).numpy()) - self._dispatcher._stop() + dispatcher._stop() # After the dispatcher dies, the worker should continue providing the rest # of the dataset's elements. for _ in range(num_elements - 1): results.append(next(iterator).numpy()) self.assertEqual(results, list(range(num_elements))) + @combinations.generate(test_base.eager_only_combinations()) + def testDispatcherRestartBeforeReading(self): + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable + num_elements = 100 + ds = _make_distributed_range_dataset(num_elements, dispatcher) + dispatcher = self.restart_dispatcher(dispatcher) + + self.assertDatasetProduces(ds, list(range(num_elements))) + + @combinations.generate(test_base.eager_only_combinations()) + def testDispatcherRestartDuringReading(self): + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable + num_elements = 100 + ds = _make_distributed_range_dataset(num_elements, dispatcher) + iterator = iter(ds) + results = [] + for _ in range(num_elements // 2): + results.append(next(iterator).numpy()) + dispatcher = self.restart_dispatcher(dispatcher) + for elem in iterator: + results.append(elem.numpy()) + + self.assertEqual(list(range(num_elements)), results) + + @combinations.generate(test_base.eager_only_combinations()) + def testDispatcherRestartBetweenIterations(self): + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable + num_elements = 100 + ds = _make_distributed_range_dataset(100, dispatcher) + self.assertDatasetProduces(ds, list(range(num_elements))) + dispatcher = self.restart_dispatcher(dispatcher) + self.assertDatasetProduces(ds, list(range(num_elements))) + @combinations.generate(test_base.eager_only_combinations()) def testDistributeSparse(self): - service = self.create_cluster(1) + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable element = sparse_tensor.SparseTensor( indices=[[0]], values=constant_op.constant([0], dtype=dtypes.int32), dense_shape=[1]) ds = dataset_ops.Dataset.from_tensors(element) - ds = _make_distributed_dataset(ds, service) + ds = _make_distributed_dataset(ds, dispatcher) results = [sparse_ops.sparse_tensor_to_dense(elem) for elem in ds] self.assertAllEqual(results, [[0]]) @combinations.generate(test_base.eager_only_combinations()) def testDistributeRagged(self): - service = self.create_cluster(1) + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable ds = dataset_ops.Dataset.from_tensor_slices([1, 5, 3, 2, 8]) ds = ds.map(math_ops.range) ds = ds.apply(batching.dense_to_ragged_batch(2)) - ds = _make_distributed_dataset(ds, service) + ds = _make_distributed_dataset(ds, dispatcher) results = [elem.to_tensor() for elem in ds] self.assertAllEqual(results[0], [[0, 0, 0, 0, 0], [0, 1, 2, 3, 4]]) self.assertAllEqual(results[1], [[0, 1, 2], [0, 1, 0]]) @@ -134,10 +204,10 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): def testDifferentShuffleOrders(self): random_seed.set_random_seed(None) num_elements = 100 - service = self.create_cluster(2) + dispatcher, workers = self.start_cluster(2) # to avoid gcing workers, pylint: disable=unused-variable ds = dataset_ops.Dataset.range(num_elements) ds = ds.shuffle(num_elements) - ds = _make_distributed_dataset(ds, service) + ds = _make_distributed_dataset(ds, dispatcher) output = [elem.numpy() for elem in ds] # The output will be two sequences of range(num_elements) @@ -154,34 +224,31 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testMultipleEpochs(self): + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable num_elements = 3 - service = self.create_cluster(1) - ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, service) + ds = _make_distributed_range_dataset(num_elements, dispatcher) for _ in range(10): self.assertEqual(list(range(num_elements)), [elem.numpy() for elem in ds]) @combinations.generate(test_base.eager_only_combinations()) def testRepeatedDataset(self): + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable num_elements = 10 num_repetitions = 5 - service = self.create_cluster(1) - ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, service) + ds = _make_distributed_range_dataset(num_elements, dispatcher) ds = ds.repeat(num_repetitions) self.assertDatasetProduces( ds, expected_output=num_repetitions * list(range(num_elements))) @combinations.generate(test_base.eager_only_combinations()) def testConcurrentEpoch(self): + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable num_elements = 10 num_datasets = 3 - service = self.create_cluster(1) iterators = [] results = [] for _ in range(num_datasets): - ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, service) + ds = _make_distributed_range_dataset(num_elements, dispatcher) iterators.append(iter(ds)) results.append([]) @@ -195,11 +262,10 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testSharedEpoch(self): self.skipTest("Not yet implemented") + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable num_elements = 10 num_iterators = 3 - service = self.create_cluster(1) - ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, service) + ds = _make_distributed_range_dataset(num_elements, dispatcher) result = [] iterators = [] for _ in range(num_iterators): @@ -220,10 +286,9 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testMultiWorker(self): num_workers = 3 + dispatcher, workers = self.start_cluster(num_workers) # to avoid gcing workers, pylint: disable=unused-variable num_elements = 10 - service = self.create_cluster(num_workers) - ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, service) + ds = _make_distributed_range_dataset(num_elements, dispatcher) results = [elem.numpy() for elem in ds] self.assertCountEqual(num_workers * list(range(num_elements)), results) @@ -237,12 +302,10 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): except: raise self.skipTest("Flakes in portpicker library do not represent " "TensorFlow errors.") - dispatcher = server_lib.DispatchServer( - port=dispatcher_port, protocol=PROTOCOL, start=False) + dispatcher = server_lib.DispatchServer(port=dispatcher_port, start=False) worker = server_lib.WorkerServer( port=0, - dispatcher_address=dispatcher._address, - protocol=PROTOCOL, + dispatcher_address=_address_from_target(dispatcher.target), start=False) def start_servers(): @@ -254,33 +317,25 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): start_servers_thread.start() num_elements = 10 - ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset( - ds, "{}://{}".format(PROTOCOL, dispatcher._address)) + ds = _make_distributed_range_dataset(num_elements, dispatcher) results = [elem.numpy() for elem in ds] self.assertEqual(list(range(num_elements)), results) start_servers_thread.join() @combinations.generate(test_base.eager_only_combinations()) def testAddWorkerMidJob(self): - self._dispatcher = server_lib.DispatchServer(port=0, protocol=PROTOCOL) - self._worker = server_lib.WorkerServer( - port=0, dispatcher_address=self._dispatcher._address, protocol=PROTOCOL) + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable num_elements = 100 - ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset( - ds, "{}://{}".format(PROTOCOL, self._dispatcher._address)) + ds = _make_distributed_range_dataset(num_elements, dispatcher) iterator = iter(ds) results = [] # Read halfway through the dataset. for _ in range(num_elements // 2): results.append(next(iterator).numpy()) - self._new_worker = server_lib.WorkerServer( - port=0, dispatcher_address=self._dispatcher._address, protocol=PROTOCOL) - + new_worker = self.start_worker_server(dispatcher) # to avoid gcing workers, pylint: disable=unused-variable # Wait for the new worker to register with the dispatcher. - while self._dispatcher._num_workers() < 2: + while dispatcher._num_workers() < 2: time.sleep(10 / 1000) # 10ms for elem in iterator: @@ -292,13 +347,9 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): combinations.times(test_base.eager_only_combinations(), combinations.combine(use_same_port=[True, False]))) def testRestartWorker(self, use_same_port): - self._dispatcher = server_lib.DispatchServer(port=0, protocol=PROTOCOL) - self._worker = server_lib.WorkerServer( - port=0, dispatcher_address=self._dispatcher._address, protocol=PROTOCOL) + dispatcher, [worker] = self.start_cluster(1) num_elements = 100 - ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset( - ds, "{}://{}".format(PROTOCOL, self._dispatcher._address)) + ds = _make_distributed_range_dataset(num_elements, dispatcher) iterator = iter(ds) # Read halfway through the dataset. midpoint = num_elements // 2 @@ -308,12 +359,9 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): # Stop the original worker and start a new one. port = 0 if use_same_port: - port = int(self._worker._address.split(":")[1]) - self._worker._stop() - self._new_worker = server_lib.WorkerServer( - port=port, - dispatcher_address=self._dispatcher._address, - protocol=PROTOCOL) + port = int(worker._address.split(":")[1]) + worker._stop() + new_worker = self.start_worker_server(dispatcher, port=port) # to avoid gcing workers, pylint: disable=unused-variable # There may have been some elements prefetched from the first worker # before it was stopped. @@ -331,29 +379,23 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testMaxOutstandingRequests(self): - num_elements = 10 num_workers = 3 - service = self.create_cluster(num_workers) - ds = dataset_ops.Dataset.range(num_elements) - ds = ds.apply( - data_service_ops._distribute( - "parallel_epochs", - service, - max_outstanding_requests=1, - task_refresh_interval_hint_ms=20)) + dispatcher, workers = self.start_cluster(num_workers) # to avoid gcing workers, pylint: disable=unused-variable + num_elements = 10 + ds = _make_distributed_range_dataset( + num_elements, dispatcher, max_outstanding_requests=1) self.assertCountEqual(num_workers * list(range(num_elements)), self.getDatasetOutput(ds)) @combinations.generate(test_base.eager_only_combinations()) def testInsideFunction(self): num_workers = 3 + dispatcher, workers = self.start_cluster(num_workers) # to avoid gcing workers, pylint: disable=unused-variable num_elements = 10 - service = self.create_cluster(num_workers) @def_function.function def f(): - ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, service) + ds = _make_distributed_range_dataset(num_elements, dispatcher) result = tensor_array_ops.TensorArray( dtypes.int64, size=num_workers * num_elements, dynamic_size=True) i = 0 @@ -367,11 +409,11 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testSharedJobName(self): + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable num_elements = 100 - service = self.create_cluster(1) ds = dataset_ops.Dataset.range(num_elements) - ds1 = _make_distributed_dataset(ds, service, job_name="job_name") - ds2 = _make_distributed_dataset(ds, service, job_name="job_name") + ds1 = _make_distributed_dataset(ds, dispatcher, job_name="job_name") + ds2 = _make_distributed_dataset(ds, dispatcher, job_name="job_name") iter1 = iter(ds1) iter2 = iter(ds2) results = [] @@ -386,21 +428,21 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testDifferentJobNames(self): + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable num_elements = 10 - service = self.create_cluster(1) ds = dataset_ops.Dataset.range(num_elements) - ds1 = _make_distributed_dataset(ds, service, job_name="job_name1") - ds2 = _make_distributed_dataset(ds, service, job_name="job_name2") + ds1 = _make_distributed_dataset(ds, dispatcher, job_name="job_name1") + ds2 = _make_distributed_dataset(ds, dispatcher, job_name="job_name2") self.assertDatasetProduces(ds1, list(range(num_elements))) self.assertDatasetProduces(ds2, list(range(num_elements))) @combinations.generate(test_base.eager_only_combinations()) def testSharedJobNameMultiIteration(self): + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable num_elements = 10 - service = self.create_cluster(1) ds = dataset_ops.Dataset.range(num_elements) - ds1 = _make_distributed_dataset(ds, service, job_name="job_name") - ds2 = _make_distributed_dataset(ds, service, job_name="job_name") + ds1 = _make_distributed_dataset(ds, dispatcher, job_name="job_name") + ds2 = _make_distributed_dataset(ds, dispatcher, job_name="job_name") # iteration 1 self.assertDatasetProduces(ds1, list(range(num_elements))) self.assertDatasetProduces(ds2, []) @@ -410,13 +452,13 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testSharedJobNameRepeat(self): + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable num_elements = 100 num_repetitions = 3 - service = self.create_cluster(1) ds = dataset_ops.Dataset.range(num_elements) - ds1 = _make_distributed_dataset(ds, service, job_name="job_name") + ds1 = _make_distributed_dataset(ds, dispatcher, job_name="job_name") ds1 = ds1.repeat(num_repetitions) - ds2 = _make_distributed_dataset(ds, service, job_name="job_name") + ds2 = _make_distributed_dataset(ds, dispatcher, job_name="job_name") ds2 = ds2.repeat(num_repetitions) results = [] iter1 = iter(ds1) @@ -434,7 +476,7 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testApplyDeterminismOption(self): elements = list(range(10)) - service = self.create_cluster(1) + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable def dataset_fn(delay_ms): @@ -451,7 +493,7 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): opts = dataset_ops.Options() opts.experimental_deterministic = False ds = ds.with_options(opts) - ds = _make_distributed_dataset(ds, service) + ds = _make_distributed_dataset(ds, dispatcher) return ds self.checkDeterminism( @@ -468,8 +510,8 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): options.experimental_external_state_policy = external_state_policy ds = ds.with_options(options) - service = self.create_cluster(3) - ds = _make_distributed_dataset(ds, service) + dispatcher, workers = self.start_cluster(3) # to avoid gcing workers, pylint: disable=unused-variable + ds = _make_distributed_dataset(ds, dispatcher) next(iter(ds)) @combinations.generate( @@ -489,13 +531,13 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testDistributeFromInterleave(self): - service = self.create_cluster(1) + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable ds = dataset_ops.Dataset.range(2) def interleave_fn(_): - ds = dataset_ops.Dataset.range(2) - _make_distributed_dataset(ds, service) - return ds + dataset = dataset_ops.Dataset.range(2) + _make_distributed_dataset(dataset, dispatcher) + return dataset with self.assertRaisesRegex( errors.InvalidArgumentError, r"The `.distribute\(...\)` dataset " @@ -530,25 +572,25 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testFromDatasetId(self): - num_elements = 10 - service = self.create_cluster(1) + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable + num_elements = 10 ds = dataset_ops.Dataset.range(num_elements) - dataset_id = data_service_ops.register_dataset(service, ds) + dataset_id = data_service_ops.register_dataset(dispatcher.target, ds) from_dataset_id_ds = data_service_ops.from_dataset_id( - "parallel_epochs", service, dataset_id, ds.element_spec) + "parallel_epochs", dispatcher.target, dataset_id, ds.element_spec) self.assertDatasetProduces(from_dataset_id_ds, list(range(num_elements))) @combinations.generate(test_base.eager_only_combinations()) def testFromDatasetIdMultipleComponents(self): - num_elements = 10 - service = self.create_cluster(1) + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable + num_elements = 10 ds = dataset_ops.Dataset.range(num_elements) ds = dataset_ops.Dataset.zip({"a": (ds, ds), "b": ds}) - dataset_id = data_service_ops.register_dataset(service, ds) + dataset_id = data_service_ops.register_dataset(dispatcher.target, ds) from_dataset_id_ds = data_service_ops.from_dataset_id( - "parallel_epochs", service, dataset_id, ds.element_spec) + "parallel_epochs", dispatcher.target, dataset_id, ds.element_spec) output = self.getDatasetOutput(from_dataset_id_ds) for i in range(num_elements): self.assertEqual(i, output[i]["a"][0]) @@ -557,26 +599,26 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testFromDatasetIdWrongElementSpec(self): - num_elements = 10 - service = self.create_cluster(1) + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable + num_elements = 10 ds = dataset_ops.Dataset.range(num_elements) - dataset_id = data_service_ops.register_dataset(service, ds) + dataset_id = data_service_ops.register_dataset(dispatcher.target, ds) wrong_spec = tensor_spec.TensorSpec(shape=(), dtype=dtypes.variant) from_dataset_id_ds = data_service_ops.from_dataset_id( - "parallel_epochs", service, dataset_id, wrong_spec) + "parallel_epochs", dispatcher.target, dataset_id, wrong_spec) with self.assertRaisesRegex(errors.FailedPreconditionError, "Expected a tensor of type variant"): self.evaluate(self.getNext(from_dataset_id_ds)()) @combinations.generate(test_base.eager_only_combinations()) def testFromDatasetIdNotRegistered(self): - service = self.create_cluster(1) + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable dataset_id = 0 element_spec = tensor_spec.TensorSpec(shape=(), dtype=dtypes.variant) from_dataset_id_ds = data_service_ops.from_dataset_id( - "parallel_epochs", service, dataset_id, element_spec) + "parallel_epochs", dispatcher.target, dataset_id, element_spec) with self.assertRaisesRegex(errors.NotFoundError, "Dataset id"): self.evaluate(self.getNext(from_dataset_id_ds)()) @@ -585,17 +627,14 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): self.skipTest("b/162521601") sleep_microseconds = int(1e6) * 1000 - self._dispatcher = server_lib.DispatchServer(port=0, protocol=PROTOCOL) - self._worker = server_lib.WorkerServer( - port=0, dispatcher_address=self._dispatcher._address, protocol=PROTOCOL) + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable # Create a dataset which produces the first element quickly, and the second # element slowly. Fetching the first element triggers prefetching of the # second element, which we should be able to cancel. slow = dataset_ops.Dataset.range(1) slow = slow.apply(testing.sleep(sleep_microseconds)) ds = dataset_ops.Dataset.range(1).concatenate(slow) - ds = _make_distributed_dataset( - ds, "{}://{}".format(PROTOCOL, self._dispatcher._address)) + ds = _make_distributed_dataset(ds, dispatcher) ds = ds.prefetch(1) get_next = self.getNext(ds, requires_initialization=True) self.assertEqual(0, self.evaluate(get_next())) @@ -606,18 +645,18 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): def testRegisterEquivalentDatasets(self): ds_1 = dataset_ops.Dataset.range(10) ds_2 = dataset_ops.Dataset.range(10) - service = self.create_cluster(1) - id_1 = data_service_ops.register_dataset(service, ds_1) - id_2 = data_service_ops.register_dataset(service, ds_2) + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable + id_1 = data_service_ops.register_dataset(dispatcher.target, ds_1) + id_2 = data_service_ops.register_dataset(dispatcher.target, ds_2) self.assertEqual(id_1.numpy(), id_2.numpy()) @combinations.generate(test_base.eager_only_combinations()) def testRegisterDifferentDatasets(self): ds_1 = dataset_ops.Dataset.range(10) ds_2 = dataset_ops.Dataset.range(20) - service = self.create_cluster(1) - id_1 = data_service_ops.register_dataset(service, ds_1) - id_2 = data_service_ops.register_dataset(service, ds_2) + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable + id_1 = data_service_ops.register_dataset(dispatcher.target, ds_1) + id_2 = data_service_ops.register_dataset(dispatcher.target, ds_2) self.assertNotEqual(id_1.numpy(), id_2.numpy()) diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.-dispatch-server.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.-dispatch-server.pbtxt index 86efaf268e0..522cc00448a 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.-dispatch-server.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.-dispatch-server.pbtxt @@ -8,7 +8,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'port\', \'protocol\', \'start\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], " + argspec: "args=[\'self\', \'port\', \'protocol\', \'work_dir\', \'fault_tolerant_mode\', \'start\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\'], " } member_method { name: "join" From cf9518e745cf7fccc53dcdcc856970216d0468fd Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Thu, 6 Aug 2020 14:11:14 -0700 Subject: [PATCH 2285/2522] [tf.data service] Avoid holding locks while calling RPCs from worker. This CL also improves some naming and changes WorkerImpl::Start to return Status, similar to DispatcherImpl::Start. PiperOrigin-RevId: 325307263 Change-Id: I8b1fc9416908acd3c6e971e0c6ee2b3cf23cfd4f --- .../core/data/service/grpc_worker_impl.cc | 15 +- .../core/data/service/grpc_worker_impl.h | 2 +- tensorflow/core/data/service/server_lib.cc | 2 +- tensorflow/core/data/service/worker_impl.cc | 132 ++++++++++-------- tensorflow/core/data/service/worker_impl.h | 37 ++--- 5 files changed, 101 insertions(+), 87 deletions(-) diff --git a/tensorflow/core/data/service/grpc_worker_impl.cc b/tensorflow/core/data/service/grpc_worker_impl.cc index c76e1062753..5e3183d61b8 100644 --- a/tensorflow/core/data/service/grpc_worker_impl.cc +++ b/tensorflow/core/data/service/grpc_worker_impl.cc @@ -23,7 +23,6 @@ namespace data { using ::grpc::ServerBuilder; using ::grpc::ServerContext; -using ::grpc::Status; GrpcWorkerImpl::GrpcWorkerImpl(ServerBuilder* server_builder, const experimental::WorkerConfig& config) @@ -32,15 +31,15 @@ GrpcWorkerImpl::GrpcWorkerImpl(ServerBuilder* server_builder, VLOG(1) << "Registered data service worker"; } -void GrpcWorkerImpl::Start(const std::string& worker_address) { - impl_.Start(worker_address); +Status GrpcWorkerImpl::Start(const std::string& worker_address) { + return impl_.Start(worker_address); } -#define HANDLER(method) \ - Status GrpcWorkerImpl::method(ServerContext* context, \ - const method##Request* request, \ - method##Response* response) { \ - return ToGrpcStatus(impl_.method(request, response)); \ +#define HANDLER(method) \ + grpc::Status GrpcWorkerImpl::method(ServerContext* context, \ + const method##Request* request, \ + method##Response* response) { \ + return ToGrpcStatus(impl_.method(request, response)); \ } HANDLER(ProcessTask); HANDLER(GetElement); diff --git a/tensorflow/core/data/service/grpc_worker_impl.h b/tensorflow/core/data/service/grpc_worker_impl.h index b0881143a57..49caab246ac 100644 --- a/tensorflow/core/data/service/grpc_worker_impl.h +++ b/tensorflow/core/data/service/grpc_worker_impl.h @@ -39,7 +39,7 @@ class GrpcWorkerImpl : public WorkerService::Service { const experimental::WorkerConfig& config); ~GrpcWorkerImpl() override {} - void Start(const std::string& worker_address); + Status Start(const std::string& worker_address); #define HANDLER(method) \ grpc::Status method(grpc::ServerContext* context, \ diff --git a/tensorflow/core/data/service/server_lib.cc b/tensorflow/core/data/service/server_lib.cc index 7f698f8669b..98157f6b232 100644 --- a/tensorflow/core/data/service/server_lib.cc +++ b/tensorflow/core/data/service/server_lib.cc @@ -116,7 +116,7 @@ Status WorkerGrpcDataServer::StartServiceInternal() { std::string resolved_address = str_util::StringReplace( worker_address, kPortPlaceholder, absl::StrCat(bound_port()), /*replace_all=*/false); - service_->Start(resolved_address); + TF_RETURN_IF_ERROR(service_->Start(resolved_address)); return Status::OK(); } diff --git a/tensorflow/core/data/service/worker_impl.cc b/tensorflow/core/data/service/worker_impl.cc index d2c75bbc719..0e955e136d2 100644 --- a/tensorflow/core/data/service/worker_impl.cc +++ b/tensorflow/core/data/service/worker_impl.cc @@ -36,7 +36,7 @@ limitations under the License. namespace tensorflow { namespace data { -const constexpr uint64 kHeartbeatIntervalMicros = 5ull * 1000 * 1000; +const constexpr uint64 kRetryIntervalMicros = 5ull * 1000 * 1000; namespace { auto* tf_data_service_created = @@ -54,24 +54,30 @@ DataServiceWorkerImpl::DataServiceWorkerImpl( DataServiceWorkerImpl::~DataServiceWorkerImpl() { mutex_lock l(mu_); cancelled_ = true; - heartbeat_cv_.notify_one(); + background_cv_.notify_one(); } -void DataServiceWorkerImpl::Start(const std::string& worker_address) { +Status DataServiceWorkerImpl::Start(const std::string& worker_address) { VLOG(3) << "Starting tf.data service worker at address " << worker_address; - mutex_lock l(mu_); worker_address_ = worker_address; - Thread* thread = Env::Default()->StartThread( - {}, "data-service-worker-heartbeat", [this]() { HeartbeatThread(); }); - heartbeat_thread_.reset(thread); - Status s = Register(); + std::unique_ptr dispatcher; + TF_RETURN_IF_ERROR(MakeDispatcherStub(&dispatcher)); + + Status s = Register(dispatcher.get()); while (!s.ok()) { LOG(WARNING) << "Failed to register with dispatcher at " << config_.dispatcher_address() << ": " << s; - Env::Default()->SleepForMicroseconds(kHeartbeatIntervalMicros); - s = Register(); + Env::Default()->SleepForMicroseconds(kRetryIntervalMicros); + s = Register(dispatcher.get()); } + Thread* thread = + Env::Default()->StartThread({}, "data-service-worker-background", + [this, dispatcher = dispatcher.release()]() { + BackgroundThread(dispatcher); + }); + background_thread_.reset(thread); + return Status::OK(); } Status DataServiceWorkerImpl::ProcessTask(const ProcessTaskRequest* request, @@ -98,7 +104,7 @@ Status DataServiceWorkerImpl::ProcessTaskInternal(const TaskDef& task_def) " already exists."); } Task& task = tasks_[task_def.task_id()]; - task.id = task_def.task_id(); + task.task_id = task_def.task_id(); task.dataset = std::move(dataset); task.iterator = std::move(iterator); VLOG(3) << "Began processing for task " << task_def.task_id(); @@ -128,8 +134,8 @@ Status DataServiceWorkerImpl::GetElement(const GetElementRequest* request, VLOG(3) << "Reached end_of_sequence for task " << request->task_id(); // Release iterator memory and leave a null entry as a tombstone. iter.reset(); - pending_completed_tasks_.push_back(request->task_id()); - heartbeat_cv_.notify_one(); + pending_completed_tasks_.insert(request->task_id()); + background_cv_.notify_one(); } } @@ -168,80 +174,88 @@ Status DataServiceWorkerImpl::GetElement(const GetElementRequest* request, return Status::OK(); } -Status DataServiceWorkerImpl::EnsureDispatcherStubInitialized() - EXCLUSIVE_LOCKS_REQUIRED(mu_) { - if (!dispatcher_stub_) { - ::grpc::ChannelArguments args; - std::shared_ptr<::grpc::ChannelCredentials> credentials; - TF_RETURN_IF_ERROR(CredentialsFactory::CreateClientCredentials( - config_.protocol(), &credentials)); - auto channel = ::grpc::CreateCustomChannel(config_.dispatcher_address(), - credentials, args); - dispatcher_stub_ = DispatcherService::NewStub(channel); - } +Status DataServiceWorkerImpl::MakeDispatcherStub( + std::unique_ptr* stub) { + ::grpc::ChannelArguments args; + std::shared_ptr<::grpc::ChannelCredentials> credentials; + TF_RETURN_IF_ERROR(CredentialsFactory::CreateClientCredentials( + config_.protocol(), &credentials)); + auto channel = ::grpc::CreateCustomChannel(config_.dispatcher_address(), + credentials, args); + *stub = DispatcherService::NewStub(channel); return Status::OK(); } -Status DataServiceWorkerImpl::Register() EXCLUSIVE_LOCKS_REQUIRED(mu_) { +Status DataServiceWorkerImpl::Register(DispatcherService::Stub* dispatcher_stub) + LOCKS_EXCLUDED(mu_) { VLOG(3) << "Registering with dispatcher at " << config_.dispatcher_address(); - TF_RETURN_IF_ERROR(EnsureDispatcherStubInitialized()); RegisterWorkerRequest req; req.set_worker_address(worker_address_); RegisterWorkerResponse resp; - grpc::ClientContext ctx; - grpc::Status s = dispatcher_stub_->RegisterWorker(&ctx, req, &resp); + grpc::Status s = dispatcher_stub->RegisterWorker(&ctx, req, &resp); if (!s.ok()) { return grpc_util::WrapError("Failed to register worker", s); } for (const TaskDef& task : resp.tasks()) { + mutex_lock l(mu_); TF_RETURN_IF_ERROR(ProcessTaskInternal(task)); } + VLOG(3) << "Registered worker with address " << worker_address_; return Status::OK(); } -Status DataServiceWorkerImpl::SendTaskUpdate() EXCLUSIVE_LOCKS_REQUIRED(mu_) { - VLOG(3) << "Sending " << pending_completed_tasks_.size() - << " task updates to dispatcher"; - TF_RETURN_IF_ERROR(EnsureDispatcherStubInitialized()); +void DataServiceWorkerImpl::BackgroundThread( + DispatcherService::Stub* dispatcher_ptr) LOCKS_EXCLUDED(mu_) { + std::unique_ptr dispatcher = + absl::WrapUnique(dispatcher_ptr); + while (true) { + { + mutex_lock l(mu_); + while (!cancelled_ && pending_completed_tasks_.empty()) { + background_cv_.wait(l); + } + if (cancelled_) { + VLOG(3) << "Background thread shutting down"; + return; + } + } + Status s = SendTaskUpdates(dispatcher.get()); + if (!s.ok()) { + LOG(WARNING) << "Failed to send task updates to dispatcher: " << s; + Env::Default()->SleepForMicroseconds(kRetryIntervalMicros); + } + } +} + +Status DataServiceWorkerImpl::SendTaskUpdates( + DispatcherService::Stub* dispatcher) LOCKS_EXCLUDED(mu_) { WorkerUpdateRequest req; - for (int task_id : pending_completed_tasks_) { - TaskProgress* update = req.add_updates(); - update->set_task_id(task_id); - update->set_completed(true); + { + mutex_lock l(mu_); + VLOG(3) << "Sending " << pending_completed_tasks_.size() + << " task updates to dispatcher"; + req.set_worker_address(worker_address_); + for (int task_id : pending_completed_tasks_) { + TaskProgress* update = req.add_updates(); + update->set_task_id(task_id); + update->set_completed(true); + } } WorkerUpdateResponse resp; grpc::ClientContext ctx; - grpc::Status s = dispatcher_stub_->WorkerUpdate(&ctx, req, &resp); + grpc::Status s = dispatcher->WorkerUpdate(&ctx, req, &resp); if (!s.ok()) { return grpc_util::WrapError("Failed to send task updates", s); } - pending_completed_tasks_.clear(); + mutex_lock l(mu_); + for (const auto& update : req.updates()) { + pending_completed_tasks_.erase(update.task_id()); + } VLOG(3) << "Sent " << req.updates().size() << " task updates "; return Status::OK(); } -void DataServiceWorkerImpl::HeartbeatThread() { - while (true) { - Status s; - { - mutex_lock l(mu_); - while (!cancelled_ && pending_completed_tasks_.empty()) { - heartbeat_cv_.wait(l); - } - if (cancelled_) { - VLOG(3) << "Heartbeat thread shutting down"; - return; - } - s = SendTaskUpdate(); - } - if (!s.ok()) { - LOG(WARNING) << "Failed to send task updates to dispatcher: " << s; - Env::Default()->SleepForMicroseconds(kHeartbeatIntervalMicros); - } - } -} - } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/data/service/worker_impl.h b/tensorflow/core/data/service/worker_impl.h index 6961312ee34..8353d11efdc 100644 --- a/tensorflow/core/data/service/worker_impl.h +++ b/tensorflow/core/data/service/worker_impl.h @@ -38,7 +38,7 @@ class DataServiceWorkerImpl { // constructor because the worker may be binding to port `0`, in which case // the address isn't known until the worker has started and decided which port // to bind to. - void Start(const std::string& worker_address); + Status Start(const std::string& worker_address); // See worker.proto for API documentation. @@ -51,19 +51,23 @@ class DataServiceWorkerImpl { GetElementResponse* response); private: - // Sets dispatcher_stub_ if it isn't already set. - Status EnsureDispatcherStubInitialized(); + Status MakeDispatcherStub(std::unique_ptr* stub); // Registers the worker with the dispatcher. - Status Register(); - // Sends task status to the dispatcher. - Status SendTaskUpdate(); + Status Register(DispatcherService::Stub* dispatcher) LOCKS_EXCLUDED(mu_); + // Sends task status to the dispatcher and checks for dispatcher commands. + Status SendTaskUpdates(DispatcherService::Stub* dispatcher) + LOCKS_EXCLUDED(mu_); // Creates an iterator to process a task. - Status ProcessTaskInternal(const TaskDef& task); - // A thread for updating the dispatcher with worker status. - void HeartbeatThread(); + Status ProcessTaskInternal(const TaskDef& task) EXCLUSIVE_LOCKS_REQUIRED(mu_); + // A thread for doing async background processing not associated with a + // specific RPC, such as reporting finished tasks. The thread takes + // ownership of the passed dispatcher_ptr. We use a raw pointer instead of + // unique_ptr since unique_ptr cannot be passed to std::function. + void BackgroundThread(DispatcherService::Stub* dispatcher_ptr) + LOCKS_EXCLUDED(mu_); typedef struct Task { - int64 id; + int64 task_id; // TODO(aaudibert): Have standalone::Iterator own a reference to // standalone::Dataset so that we don't need to store the dataset here. std::unique_ptr dataset; @@ -75,17 +79,14 @@ class DataServiceWorkerImpl { std::string worker_address_; mutex mu_; - int64 worker_id_ TF_GUARDED_BY(mu_); - std::unique_ptr dispatcher_stub_ TF_GUARDED_BY(mu_); // Information about tasks, keyed by task ids. absl::flat_hash_map tasks_ TF_GUARDED_BY(mu_); - // List of completed tasks which haven't yet been communicated to the - // dispatcher. - std::vector pending_completed_tasks_ TF_GUARDED_BY(mu_); + // Completed tasks which haven't yet been communicated to the dispatcher. + absl::flat_hash_set pending_completed_tasks_ TF_GUARDED_BY(mu_); bool cancelled_ TF_GUARDED_BY(mu_) = false; - // Condition variable for notifying the heartbeat thread. - condition_variable heartbeat_cv_ TF_GUARDED_BY(mu_); - std::unique_ptr heartbeat_thread_; + // Condition variable for notifying the background thread. + condition_variable background_cv_ TF_GUARDED_BY(mu_); + std::unique_ptr background_thread_; TF_DISALLOW_COPY_AND_ASSIGN(DataServiceWorkerImpl); }; From fffd56ade18e5546599ec98f0156081db799fce4 Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Thu, 6 Aug 2020 14:27:51 -0700 Subject: [PATCH 2286/2522] Add support for `mhlo.if` in LegalizeTFCommunication pass. TF/XLA communication ops in `mhlo.if` are now legalized, and `mhlo.if` parents/ancestors are rewritten to receive and emit `mhlo.token` generated from these communication op legalizations. PiperOrigin-RevId: 325310956 Change-Id: Id4c9a426b01d44b9191dd364458fc2eb8227383a --- .../xla/tests/legalize-tf-communication.mlir | 496 ++++++++++++++++ .../transforms/legalize_tf_communication.cc | 538 +++++++++++++++--- 2 files changed, 963 insertions(+), 71 deletions(-) diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf-communication.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf-communication.mlir index f84a2f28a23..d01ab38bd6b 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf-communication.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf-communication.mlir @@ -407,6 +407,502 @@ func @callee2() attributes {sym_visibility = "private"} { // ----- +// Test cloned function rewrite also checks transitive function calls to +// TF/XLA communication ops. + +// CHECK: func @callee3() +func @callee3() { + // CHECK: [[CALLEE3_INIT_TOKEN:%.*]] = "mhlo.create_token" + + // CHECK: call @callee4{{.+}}([[CALLEE3_INIT_TOKEN]]) + call @callee4() : () -> () + return +} + +// CHECK: func @callee4() +func @callee4() { + // CHECK: [[CALLEE4_INIT_TOKEN:%.*]] = "mhlo.create_token" + + // CHECK: [[CALL_5:%.*]] = call @callee5([[CALLEE4_INIT_TOKEN]]) + call @callee5() : () -> () + + // CHECK: return + return +} + +// CHECK: func @callee5([[CALLEE5_ARG0:%.*]]: !mhlo.token) -> !mhlo.token +func @callee5() attributes {sym_visibility = "private"} { + // CHECK-NOT: "mhlo.create_token" + + // CHECK: [[RECV_TUPLE:%.*]] = "mhlo.recv"([[CALLEE5_ARG0]]) + // CHECK: [[RECV_VAL:%.*]] = "mhlo.get_tuple_element"([[RECV_TUPLE]]) + // CHECK-SAME: index = 0 + // CHECK: [[RECV_TOKEN:%.*]] = "mhlo.get_tuple_element"([[RECV_TUPLE]]) + // CHECK-SAME: index = 1 + %0 = "tf.XlaRecvFromHost"() {key = "recv_key", shape = #tf.shape<>} : () -> tensor + + // CHECK: return [[RECV_TOKEN]] + return +} + +// CHECK: func @callee4{{.+}}([[CALLEE4_ARG0:%.*]]: !mhlo.token) -> !mhlo.token attributes {sym_visibility = "private"} +// CHECK-NOT: "mhlo.create_token" +// CHECK: [[CALL_5:%.*]] = call @callee5([[CALLEE4_ARG0]]) +// CHECK: return [[CALL_5]] + +// ----- + +// Tests `mhlo.if` with branches populated with TF/XLA communication ops. + +// CHECK-LABEL: func @if_both_branches +// CHECK-SAME: ([[ARG0:%.*]]: tensor, [[ARG1:%.*]]: tensor, [[ARG2:%.*]]: tensor) +func @if_both_branches(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { + // CHECK: [[INIT_TOKEN:%.*]] = "mhlo.create_token" + // CHECK: [[TRUE_TUPLE:%.*]] = "mhlo.tuple"([[ARG1]], [[INIT_TOKEN]]) + // CHECK: [[FALSE_TUPLE:%.*]] = "mhlo.tuple"([[ARG2]], [[INIT_TOKEN]]) + + // CHECK: [[IF_TUPLE:%.*]] = "mhlo.if"([[ARG0]], [[TRUE_TUPLE]], [[FALSE_TUPLE]]) + %0 = "mhlo.if"(%arg0, %arg1, %arg2) ( { + // CHECK: ^bb0([[TRUE_REGION_ARG:%.*]]: tuple, !mhlo.token>): + ^bb0(%arg3: tensor): + // CHECK-DAG: [[TRUE_REGION_ARG_VALUE:%.*]] = "mhlo.get_tuple_element"([[TRUE_REGION_ARG]]) {index = 0 + // CHECK-DAG: [[TRUE_REGION_ARG_TOKEN:%.*]] = "mhlo.get_tuple_element"([[TRUE_REGION_ARG]]) {index = 1 + + // CHECK: [[TRUE_SEND_TOKEN:%.*]] = "mhlo.send"([[TRUE_REGION_ARG_VALUE]], [[TRUE_REGION_ARG_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 1 : i64, type = 2 : i64} + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "f32", _xla_host_transfer_rendezvous = "send_if_true_dtoh_0"} + + // CHECK: [[TRUE_RECV_TUPLE:%.*]] = "mhlo.recv"([[TRUE_SEND_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 2 : i64, type = 3 : i64} + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "f32", _xla_host_transfer_rendezvous = "recv_if_true_htod_0"} + %1 = "tf._XlaHostComputeMlir"(%arg3) {recv_key = "recv_if_true", send_key = "send_if_true", tpu_core = 0 : i64} : (tensor) -> tensor + + // CHECK-DAG: [[TRUE_GET_TUPLE_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[TRUE_RECV_TUPLE]]) {index = 0 + // CHECK-DAG: [[TRUE_GET_TUPLE_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[TRUE_RECV_TUPLE]]) {index = 1 + // CHECK: [[TRUE_RETURN_TUPLE:%.*]] = "mhlo.tuple"([[TRUE_GET_TUPLE_ELEMENT0]], [[TRUE_GET_TUPLE_ELEMENT1]]) + // CHECK: "mhlo.return"([[TRUE_RETURN_TUPLE]]) + "mhlo.return"(%1) : (tensor) -> () + }, { + // CHECK: ^bb0([[FALSE_REGION_ARG:%.*]]: tuple, !mhlo.token>): + ^bb0(%arg3: tensor): + // CHECK-DAG: [[FALSE_REGION_ARG_VALUE:%.*]] = "mhlo.get_tuple_element"([[FALSE_REGION_ARG]]) {index = 0 + // CHECK-DAG: [[FALSE_REGION_ARG_TOKEN:%.*]] = "mhlo.get_tuple_element"([[FALSE_REGION_ARG]]) {index = 1 + + // CHECK: [[FALSE_SEND_TOKEN:%.*]] = "mhlo.send"([[FALSE_REGION_ARG_VALUE]], [[FALSE_REGION_ARG_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 3 : i64, type = 2 : i64} + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "f32", _xla_host_transfer_rendezvous = "send_if_false_dtoh_0"} + + // CHECK: [[FALSE_RECV_TUPLE:%.*]] = "mhlo.recv"([[FALSE_SEND_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 4 : i64, type = 3 : i64} + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "f32", _xla_host_transfer_rendezvous = "recv_if_false_htod_0"} + %1 = "tf._XlaHostComputeMlir"(%arg3) {recv_key = "recv_if_false", send_key = "send_if_false", tpu_core = 0 : i64} : (tensor) -> tensor + + // CHECK-DAG: [[FALSE_GET_TUPLE_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[FALSE_RECV_TUPLE]]) {index = 0 + // CHECK-DAG: [[FALSE_GET_TUPLE_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[FALSE_RECV_TUPLE]]) {index = 1 + // CHECK: [[FALSE_RETURN_TUPLE:%.*]] = "mhlo.tuple"([[FALSE_GET_TUPLE_ELEMENT0]], [[FALSE_GET_TUPLE_ELEMENT1]]) + // CHECK: "mhlo.return"([[FALSE_RETURN_TUPLE]]) + "mhlo.return"(%1) : (tensor) -> () + + // CHECK: (tensor, tuple, !mhlo.token>, tuple, !mhlo.token>) -> tuple, !mhlo.token> + }) : (tensor, tensor, tensor) -> tensor + + // CHECK: [[IF_TUPLE_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[IF_TUPLE]]) + // CHECK-SAME: index = 0 + // CHECK: return [[IF_TUPLE_ELEMENT0]] + return %0 : tensor +} + +// ----- + +// Tests `mhlo.if` with only the `true` branch populated with TF/XLA +// communication ops. + +// CHECK-LABEL: func @if_true_branch +// CHECK-SAME: ([[ARG0:%.*]]: tensor, [[ARG1:%.*]]: tensor, [[ARG2:%.*]]: tensor) +func @if_true_branch(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { + // CHECK: [[INIT_TOKEN:%.*]] = "mhlo.create_token" + // CHECK: [[TRUE_TUPLE:%.*]] = "mhlo.tuple"([[ARG1]], [[INIT_TOKEN]]) + // CHECK: [[FALSE_TUPLE:%.*]] = "mhlo.tuple"([[ARG2]], [[INIT_TOKEN]]) + + // CHECK: [[IF_TUPLE:%.*]] = "mhlo.if"([[ARG0]], [[TRUE_TUPLE]], [[FALSE_TUPLE]]) + %0 = "mhlo.if"(%arg0, %arg1, %arg2) ( { + // CHECK: ^bb0([[TRUE_REGION_ARG:%.*]]: tuple, !mhlo.token>): + ^bb0(%arg3: tensor): + // CHECK-DAG: [[TRUE_REGION_ARG_VALUE:%.*]] = "mhlo.get_tuple_element"([[TRUE_REGION_ARG]]) {index = 0 + // CHECK-DAG: [[TRUE_REGION_ARG_TOKEN:%.*]] = "mhlo.get_tuple_element"([[TRUE_REGION_ARG]]) {index = 1 + + // CHECK: [[TRUE_SEND_TOKEN:%.*]] = "mhlo.send"([[TRUE_REGION_ARG_VALUE]], [[TRUE_REGION_ARG_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 1 : i64, type = 2 : i64} + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "f32", _xla_host_transfer_rendezvous = "send_if_true_dtoh_0"} + + // CHECK: [[TRUE_RECV_TUPLE:%.*]] = "mhlo.recv"([[TRUE_SEND_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 2 : i64, type = 3 : i64} + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "f32", _xla_host_transfer_rendezvous = "recv_if_true_htod_0"} + %1 = "tf._XlaHostComputeMlir"(%arg3) {recv_key = "recv_if_true", send_key = "send_if_true", tpu_core = 0 : i64} : (tensor) -> tensor + + // CHECK-DAG: [[TRUE_GET_TUPLE_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[TRUE_RECV_TUPLE]]) {index = 0 + // CHECK-DAG: [[TRUE_GET_TUPLE_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[TRUE_RECV_TUPLE]]) {index = 1 + // CHECK: [[TRUE_RETURN_TUPLE:%.*]] = "mhlo.tuple"([[TRUE_GET_TUPLE_ELEMENT0]], [[TRUE_GET_TUPLE_ELEMENT1]]) + // CHECK: "mhlo.return"([[TRUE_RETURN_TUPLE]]) + "mhlo.return"(%1) : (tensor) -> () + }, { + // CHECK: ^bb0([[FALSE_REGION_ARG:%.*]]: tuple, !mhlo.token>): + ^bb0(%arg3: tensor): + // CHECK-DAG: [[FALSE_GET_TUPLE_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[FALSE_REGION_ARG]]) {index = 0 + // CHECK-DAG: [[FALSE_GET_TUPLE_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[FALSE_REGION_ARG]]) {index = 1 + // CHECK: [[FALSE_RETURN_TUPLE:%.*]] = "mhlo.tuple"([[FALSE_GET_TUPLE_ELEMENT0]], [[FALSE_GET_TUPLE_ELEMENT1]]) + // CHECK: "mhlo.return"([[FALSE_RETURN_TUPLE]]) + "mhlo.return"(%arg3) : (tensor) -> () + + // CHECK: (tensor, tuple, !mhlo.token>, tuple, !mhlo.token>) -> tuple, !mhlo.token> + }) : (tensor, tensor, tensor) -> tensor + + // CHECK: [[IF_TUPLE_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[IF_TUPLE]]) + // CHECK-SAME: index = 0 + // CHECK: return [[IF_TUPLE_ELEMENT0]] + return %0 : tensor +} + +// ----- + +// Tests `mhlo.if` with only the `false` branch populated with TF/XLA +// communication ops. + +// CHECK-LABEL: func @if_false_branch +// CHECK-SAME: ([[ARG0:%.*]]: tensor, [[ARG1:%.*]]: tensor, [[ARG2:%.*]]: tensor) +func @if_false_branch(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { + // CHECK: [[INIT_TOKEN:%.*]] = "mhlo.create_token" + // CHECK: [[TRUE_TUPLE:%.*]] = "mhlo.tuple"([[ARG1]], [[INIT_TOKEN]]) + // CHECK: [[FALSE_TUPLE:%.*]] = "mhlo.tuple"([[ARG2]], [[INIT_TOKEN]]) + + // CHECK: [[IF_TUPLE:%.*]] = "mhlo.if"([[ARG0]], [[TRUE_TUPLE]], [[FALSE_TUPLE]]) + %0 = "mhlo.if"(%arg0, %arg1, %arg2) ( { + // CHECK: ^bb0([[TRUE_REGION_ARG:%.*]]: tuple, !mhlo.token>): + ^bb0(%arg3: tensor): + // CHECK-DAG: [[TRUE_GET_TUPLE_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[TRUE_REGION_ARG]]) {index = 0 + // CHECK-DAG: [[TRUE_GET_TUPLE_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[TRUE_REGION_ARG]]) {index = 1 + // CHECK: [[TRUE_RETURN_TUPLE:%.*]] = "mhlo.tuple"([[TRUE_GET_TUPLE_ELEMENT0]], [[TRUE_GET_TUPLE_ELEMENT1]]) + // CHECK: "mhlo.return"([[TRUE_RETURN_TUPLE]]) + "mhlo.return"(%arg3) : (tensor) -> () + }, { + // CHECK: ^bb0([[FALSE_REGION_ARG:%.*]]: tuple, !mhlo.token>): + ^bb0(%arg3: tensor): + // CHECK-DAG: [[FALSE_REGION_ARG_VALUE:%.*]] = "mhlo.get_tuple_element"([[FALSE_REGION_ARG]]) {index = 0 + // CHECK-DAG: [[FALSE_REGION_ARG_TOKEN:%.*]] = "mhlo.get_tuple_element"([[FALSE_REGION_ARG]]) {index = 1 + + // CHECK: [[FALSE_SEND_TOKEN:%.*]] = "mhlo.send"([[FALSE_REGION_ARG_VALUE]], [[FALSE_REGION_ARG_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 1 : i64, type = 2 : i64} + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "f32", _xla_host_transfer_rendezvous = "send_if_false_dtoh_0"} + + // CHECK: [[FALSE_RECV_TUPLE:%.*]] = "mhlo.recv"([[FALSE_SEND_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 2 : i64, type = 3 : i64} + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "f32", _xla_host_transfer_rendezvous = "recv_if_false_htod_0"} + %1 = "tf._XlaHostComputeMlir"(%arg3) {recv_key = "recv_if_false", send_key = "send_if_false", tpu_core = 0 : i64} : (tensor) -> tensor + + // CHECK-DAG: [[FALSE_GET_TUPLE_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[FALSE_RECV_TUPLE]]) {index = 0 + // CHECK-DAG: [[FALSE_GET_TUPLE_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[FALSE_RECV_TUPLE]]) {index = 1 + // CHECK: [[FALSE_RETURN_TUPLE:%.*]] = "mhlo.tuple"([[FALSE_GET_TUPLE_ELEMENT0]], [[FALSE_GET_TUPLE_ELEMENT1]]) + // CHECK: "mhlo.return"([[FALSE_RETURN_TUPLE]]) + "mhlo.return"(%1) : (tensor) -> () + + // CHECK: (tensor, tuple, !mhlo.token>, tuple, !mhlo.token>) -> tuple, !mhlo.token> + }) : (tensor, tensor, tensor) -> tensor + + // CHECK: [[IF_TUPLE_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[IF_TUPLE]]) + // CHECK-SAME: index = 0 + // CHECK: return [[IF_TUPLE_ELEMENT0]] + return %0 : tensor +} + +// ----- + +// Tests `mhlo.if` with tuple arg from a `mhlo.tuple` only used by `mhlo.if` is +// replaced. + +// CHECK-LABEL: func @if_replace_tuple_arg +// CHECK-SAME: ([[ARG0:%.*]]: tensor, [[ARG1:%.*]]: tensor, [[ARG2:%.*]]: tensor) +func @if_replace_tuple_arg(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { + // CHECK-NOT: "mhlo.tuple"([[ARG1]], [[ARG2]]) + // CHECK: [[INIT_TOKEN:%.*]] = "mhlo.create_token" + // CHECK: [[IF_ARG_TUPLE:%.*]] = "mhlo.tuple"([[ARG1]], [[ARG2]], [[INIT_TOKEN]]) + %0 = "mhlo.tuple"(%arg1, %arg2) : (tensor, tensor) -> tuple, tensor> + + // CHECK: [[IF_TUPLE:%.*]] = "mhlo.if"([[ARG0]], [[IF_ARG_TUPLE]], [[IF_ARG_TUPLE]]) + %1 = "mhlo.if"(%arg0, %0, %0) ( { + ^bb0(%arg3: tuple, tensor>): + %2 = "mhlo.get_tuple_element"(%arg3) {index = 0 : i32} : (tuple, tensor>) -> tensor + "tf.XlaSendToHost"(%2) {key = "send_key"} : (tensor) -> () + "mhlo.return"(%2) : (tensor) -> () + }, { + ^bb0(%arg3: tuple, tensor>): + %2 = "mhlo.get_tuple_element"(%arg3) {index = 0 : i32} : (tuple, tensor>) -> tensor + "mhlo.return"(%2) : (tensor) -> () + }) : (tensor, tuple, tensor>, tuple, tensor>) -> tensor + return %1 : tensor +} + +// ----- + +// Tests `mhlo.if` with tuple arg not from a `mhlo.tuple` is unpacked. + +// CHECK-LABEL: func @if_unpack_tuple_arg +// CHECK-SAME: ([[ARG0:%.*]]: tensor, [[ARG1:%.*]]: tuple, tensor>) +func @if_unpack_tuple_arg(%arg0: tensor, %arg1: tuple, tensor>) -> tensor { + // CHECK: [[INIT_TOKEN:%.*]] = "mhlo.create_token" + // CHECK-DAG: [[IF_ARG_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[ARG1]]) {index = 0 + // CHECK-DAG: [[IF_ARG_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[ARG1]]) {index = 1 + // CHECK: [[IF_ARG_TUPLE:%.*]] = "mhlo.tuple"([[IF_ARG_ELEMENT0]], [[IF_ARG_ELEMENT1]], [[INIT_TOKEN]]) + + // CHECK: [[IF_TUPLE:%.*]] = "mhlo.if"([[ARG0]], [[IF_ARG_TUPLE]], [[IF_ARG_TUPLE]]) + %0 = "mhlo.if"(%arg0, %arg1, %arg1) ( { + ^bb0(%arg2: tuple, tensor>): + %1 = "mhlo.get_tuple_element"(%arg2) {index = 0 : i32} : (tuple, tensor>) -> tensor + "tf.XlaSendToHost"(%1) {key = "send_key"} : (tensor) -> () + "mhlo.return"(%1) : (tensor) -> () + }, { + ^bb0(%arg2: tuple, tensor>): + %1 = "mhlo.get_tuple_element"(%arg2) {index = 0 : i32} : (tuple, tensor>) -> tensor + "mhlo.return"(%1) : (tensor) -> () + }) : (tensor, tuple, tensor>, tuple, tensor>) -> tensor + return %0 : tensor +} + +// ----- + +// Tests `mhlo.if` tuple result is extended with a `mhlo.token`. + +// CHECK-LABEL: func @if_extend_tuple_result +func @if_extend_tuple_result(%arg0: tensor, %arg1: tuple, tensor>) -> tuple, tensor> { + // CHECK: [[IF_TUPLE:%.*]] = "mhlo.if" + %0 = "mhlo.if"(%arg0, %arg1, %arg1) ( { + ^bb0(%arg2: tuple, tensor>): + %1 = "mhlo.get_tuple_element"(%arg2) {index = 0 : i32} : (tuple, tensor>) -> tensor + "tf.XlaSendToHost"(%1) {key = "send_key"} : (tensor) -> () + "mhlo.return"(%arg2) : (tuple, tensor>) -> () + }, { + ^bb0(%arg2: tuple, tensor>): + "mhlo.return"(%arg2) : (tuple, tensor>) -> () + // CHECK: (tensor, tuple, tensor, !mhlo.token>, tuple, tensor, !mhlo.token>) -> tuple, tensor, !mhlo.token> + }) : (tensor, tuple, tensor>, tuple, tensor>) -> tuple, tensor> + + // CHECK-DAG: [[IF_TUPLE_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[IF_TUPLE]]) {index = 0 + // CHECK-DAG: [[IF_TUPLE_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[IF_TUPLE]]) {index = 1 + // CHECK: [[IF_SUBTUPLE_RESULT:%.*]] = "mhlo.tuple"([[IF_TUPLE_ELEMENT0]], [[IF_TUPLE_ELEMENT1]]) + // CHECK: return [[IF_SUBTUPLE_RESULT]] + return %0 : tuple, tensor> +} + +// ----- + +// Tests nested `mhlo.if` containing TF/XLA communication ops. + +// CHECK-LABEL: func @if_nested +// CHECK-SAME: ([[ARG0:%.*]]: tensor, [[ARG1:%.*]]: tensor) +func @if_nested(%arg0: tensor, %arg1: tensor) -> tensor { + // CHECK: [[INIT_TOKEN:%.*]] = "mhlo.create_token" + // CHECK: [[OUTER_IF_ARG_TUPLE:%.*]] = "mhlo.tuple"([[ARG1]], [[INIT_TOKEN]]) + + // CHECK: "mhlo.if"([[ARG0]], [[OUTER_IF_ARG_TUPLE]], [[OUTER_IF_ARG_TUPLE]]) + %0 = "mhlo.if"(%arg0, %arg1, %arg1) ( { + // CHECK-NEXT: ^bb0([[OUTER_IF_TRUE_ARG:%.*]]: tuple, !mhlo.token>): + ^bb0(%arg2: tensor): + // CHECK-DAG: [[OUTER_IF_TRUE_ARG_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[OUTER_IF_TRUE_ARG]]) {index = 0 + // CHECK-DAG: [[OUTER_IF_TRUE_ARG_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[OUTER_IF_TRUE_ARG]]) {index = 1 + // CHECK: [[INNER_IF_ARG_TUPLE:%.*]] = "mhlo.tuple"([[OUTER_IF_TRUE_ARG_ELEMENT0]], [[OUTER_IF_TRUE_ARG_ELEMENT1]]) + + %1 = mhlo.constant dense : tensor + + // CHECK: [[INNER_IF_TUPLE:%.*]] = "mhlo.if"({{%.*}}, [[INNER_IF_ARG_TUPLE]], [[INNER_IF_ARG_TUPLE]]) + %2 = "mhlo.if"(%1, %arg2, %arg2) ( { + // CHECK-NEXT: ^bb0([[INNER_IF_TRUE_ARG:%.*]]: tuple, !mhlo.token>): + ^bb0(%arg3: tensor): + // CHECK-DAG: [[INNER_IF_TRUE_ARG_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[INNER_IF_TRUE_ARG]]) {index = 0 + // CHECK-DAG: [[INNER_IF_TRUE_ARG_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[INNER_IF_TRUE_ARG]]) {index = 1 + + // CHECK: [[SEND_TOKEN:%.*]] = "mhlo.send"([[INNER_IF_TRUE_ARG_ELEMENT0]], [[INNER_IF_TRUE_ARG_ELEMENT1]]) + "tf.XlaSendToHost"(%arg3) {key = "send_key"} : (tensor) -> () + + // CHECK: [[INNER_IF_TRUE_RESULT:%.*]] = "mhlo.tuple"([[INNER_IF_TRUE_ARG_ELEMENT0]], [[SEND_TOKEN]]) + // CHECK: "mhlo.return"([[INNER_IF_TRUE_RESULT]]) + "mhlo.return"(%arg3) : (tensor) -> () + + // CHECK-NEXT: }, { + }, { + + // CHECK-NEXT: ^bb0([[INNER_IF_FALSE_ARG:%.*]]: tuple, !mhlo.token>): + ^bb0(%arg3: tensor): + // CHECK-DAG: [[INNER_IF_FALSE_ARG_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[INNER_IF_FALSE_ARG]]) {index = 0 + // CHECK-DAG: [[INNER_IF_FALSE_ARG_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[INNER_IF_FALSE_ARG]]) {index = 1 + // CHECK: [[INNER_IF_FALSE_RESULT:%.*]] = "mhlo.tuple"([[INNER_IF_FALSE_ARG_ELEMENT0]], [[INNER_IF_FALSE_ARG_ELEMENT1]]) + // CHECK: "mhlo.return"([[INNER_IF_FALSE_RESULT]]) + "mhlo.return"(%arg3) : (tensor) -> () + // CHECK-NEXT: (tensor, tuple, !mhlo.token>, tuple, !mhlo.token>) -> tuple, !mhlo.token> + }) : (tensor, tensor, tensor) -> tensor + + // CHECK-DAG: [[INNER_IF_TUPLE_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[INNER_IF_TUPLE]]) {index = 1 + // CHECK: [[OUTER_IF_TRUE_RESULT:%.*]] = "mhlo.tuple"([[OUTER_IF_TRUE_ARG_ELEMENT0]], [[INNER_IF_TUPLE_ELEMENT1]]) + // CHECK: "mhlo.return"([[OUTER_IF_TRUE_RESULT]]) + "mhlo.return"(%arg2) : (tensor) -> () + + // CHECK-NEXT: }, { + }, { + + // CHECK-NEXT: ^bb0([[OUTER_IF_FALSE_ARG:%.*]]: tuple, !mhlo.token>): + ^bb0(%arg2: tensor): + // CHECK-DAG: [[OUTER_IF_FALSE_ARG_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[OUTER_IF_FALSE_ARG]]) {index = 0 + // CHECK-DAG: [[OUTER_IF_FALSE_ARG_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[OUTER_IF_FALSE_ARG]]) {index = 1 + // CHECK: [[OUTER_IF_FALSE_RESULT:%.*]] = "mhlo.tuple"([[OUTER_IF_FALSE_ARG_ELEMENT0]], [[OUTER_IF_FALSE_ARG_ELEMENT1]]) + // CHECK: "mhlo.return"([[OUTER_IF_FALSE_RESULT]]) + "mhlo.return"(%arg2) : (tensor) -> () + // CHECK-NEXT: (tensor, tuple, !mhlo.token>, tuple, !mhlo.token>) -> tuple, !mhlo.token> + }) : (tensor, tensor, tensor) -> tensor + return %0 : tensor +} + +// ----- + +// Tests `mhlo.if` containing a function call to TF/XLA communication ops. + +// CHECK-LABEL: func @if_function_call +func @if_function_call(%arg0: tensor, %arg1: tensor) -> tensor { + // CHECK: "mhlo.if" + %0 = "mhlo.if"(%arg0, %arg1, %arg1) ( { + // CHECK: ^bb0([[TRUE_REGION_ARG:%.*]]: tuple, !mhlo.token>): + ^bb0(%arg2: tensor): + // CHECK-DAG: [[TRUE_REGION_ARG_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[TRUE_REGION_ARG]]) {index = 0 + // CHECK-DAG: [[TRUE_REGION_ARG_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[TRUE_REGION_ARG]]) {index = 1 + // CHECK: [[CALL_TOKEN:%.*]] = call @callee([[TRUE_REGION_ARG_ELEMENT0]], [[TRUE_REGION_ARG_ELEMENT1]]) + call @callee(%arg2) : (tensor) -> () + + // CHECK: [[TRUE_RETURN_TUPLE:%.*]] = "mhlo.tuple"([[TRUE_REGION_ARG_ELEMENT0]], [[CALL_TOKEN]]) + // CHECK: "mhlo.return"([[TRUE_RETURN_TUPLE]]) + "mhlo.return"(%arg2) : (tensor) -> () + }, { + ^bb0(%arg2: tensor): + "mhlo.return"(%arg2) : (tensor) -> () + }) : (tensor, tensor, tensor) -> tensor + return %0 : tensor +} + +// CHECK-LABEL: func @callee +// CHECK-SAME: ([[CALLEE_ARG0:%.*]]: tensor, [[CALLEE_ARG1:%.*]]: !mhlo.token) -> !mhlo.token +func @callee(%arg0: tensor) attributes {sym_visibility = "private"} { + // CHECK: [[SEND_TOKEN:%.*]] = "mhlo.send" + "tf.XlaSendToHost"(%arg0) {key = "send_key"} : (tensor) -> () + + // CHECK: return [[SEND_TOKEN]] + return +} + +// ----- + +// Tests `mhlo.if` containing multiple TF/XLA communication ops. + +// CHECK-LABEL: func @if_region_multiple_ops +func @if_region_multiple_ops(%arg0: tensor, %arg1: tensor) { + // CHECK: "mhlo.if" + %0 = "mhlo.if"(%arg0, %arg1, %arg1) ( { + // CHECK: ^bb0([[TRUE_REGION_ARG:%.*]]: tuple, !mhlo.token>): + ^bb0(%arg2: tensor): + // CHECK: [[TRUE_REGION_ARG_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[TRUE_REGION_ARG]]) {index = 0 + // CHECK: [[TRUE_REGION_ARG_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[TRUE_REGION_ARG]]) {index = 1 + + // CHECK: [[SEND0_TOKEN:%.*]] = "mhlo.send"([[TRUE_REGION_ARG_ELEMENT0]], [[TRUE_REGION_ARG_ELEMENT1]]) + "tf.XlaSendToHost"(%arg2) {key = "send_key0"} : (tensor) -> () + + // CHECK: [[SEND1_TOKEN:%.*]] = "mhlo.send"([[TRUE_REGION_ARG_ELEMENT0]], [[SEND0_TOKEN]]) + "tf.XlaSendToHost"(%arg2) {key = "send_key1"} : (tensor) -> () + + // CHECK: [[TRUE_RETURN_TUPLE:%.*]] = "mhlo.tuple"([[TRUE_REGION_ARG_ELEMENT0]], [[SEND1_TOKEN]]) + // CHECK: "mhlo.return"([[TRUE_RETURN_TUPLE]]) + "mhlo.return"(%arg2) : (tensor) -> () + }, { + ^bb0(%arg2: tensor): + "mhlo.return"(%arg2) : (tensor) -> () + }) : (tensor, tensor, tensor) -> tensor + return +} + +// ----- + +// Tests `mhlo.if` containing TF/XLA communication ops followed by other TF/XLA +// communication ops. + +func @if_followed_by_communication_op(%arg0: tensor, %arg1: tensor) { + // CHECK: [[IF_TUPLE:%.*]] = "mhlo.if" + %0 = "mhlo.if"(%arg0, %arg1, %arg1) ( { + ^bb0(%arg2: tensor): + "tf.XlaSendToHost"(%arg2) {key = "send_key0"} : (tensor) -> () + "mhlo.return"(%arg2) : (tensor) -> () + }, { + ^bb0(%arg2: tensor): + "mhlo.return"(%arg2) : (tensor) -> () + }) : (tensor, tensor, tensor) -> tensor + + // CHECK: [[IF_TUPLE_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[IF_TUPLE]]) {index = 1 + + // CHECK: "mhlo.send"({{.*}}, [[IF_TUPLE_ELEMENT1]]) + "tf.XlaSendToHost"(%arg1) {key = "send_key1"} : (tensor) -> () + return +} + +// ----- + +// Tests unsupported parent of TF/XLA communication op. + +func @unsupported_ancestor(%arg0: tensor, %arg1: tensor) { + %0 = "mhlo.reduce"(%arg0, %arg1) ( { + ^bb0(%arg2: tensor, %arg3: tensor): + %1 = mhlo.add %arg2, %arg3 : tensor + // expected-error@+1 {{expects ancestor(s) to be of ['mhlo.if', 'func']}} + "tf._XlaHostComputeMlir"() {recv_key = "host_compute_channel_recv", send_key = "host_compute_channel_send", tpu_core = 0 : i64} : () -> () + "mhlo.return"(%1) : (tensor) -> () + }) {dimensions = dense<[1]> : tensor<1xi64>} : (tensor, tensor) -> tensor + return +} + +// ----- + +// Tests transitive unsupported parent of TF/XLA communication op. + +func @unsupported_ancestor(%arg0: tensor, %arg1: tensor) { + %0 = "mhlo.reduce"(%arg0, %arg1) ( { + ^bb0(%arg2: tensor, %arg3: tensor): + %1 = mhlo.add %arg2, %arg3 : tensor + // expected-error@+1 {{expects ancestor(s) to be of ['mhlo.if', 'func']}} + call @callee() : () -> () + "mhlo.return"(%1) : (tensor) -> () + }) {dimensions = dense<[1]> : tensor<1xi64>} : (tensor, tensor) -> tensor + return +} + +func @callee() attributes {sym_visibility = "private"} { + "tf._XlaHostComputeMlir"() {recv_key = "host_compute_channel_recv", send_key = "host_compute_channel_send", tpu_core = 0 : i64} : () -> () + return +} + +// ----- + +// Tests unsupported `mhlo.if` with region of more than one block and contains a +// TF/XLA communication op. + +func @if_multiple_blocks(%arg0: tensor, %arg1: tensor) { + %0 = "mhlo.if"(%arg0, %arg1, %arg1) ( { + ^bb0(%arg2: tensor): + br ^bb1(%arg2 : tensor) + ^bb1(%arg3: tensor): + // expected-error@+1 {{expects single block region ancestor(s)}} + "tf.XlaSendToHost"(%arg3) {key = "send_key0"} : (tensor) -> () + "mhlo.return"(%arg3) : (tensor) -> () + }, { + ^bb0(%arg2: tensor): + "mhlo.return"(%arg2) : (tensor) -> () + }) : (tensor, tensor, tensor) -> tensor + return +} + +// ----- + // Tests function with more than one block that is to be rewritten emits an // error instead. diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_communication.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_communication.cc index 588e31ab669..b4e4f5c4f5c 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_communication.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_communication.cc @@ -22,15 +22,20 @@ limitations under the License. #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormatVariadic.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Module.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project #include "mlir/IR/TypeUtilities.h" // from @llvm-project +#include "mlir/IR/Value.h" // from @llvm-project #include "mlir/IR/Visitors.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Support/LLVM.h" // from @llvm-project +#include "mlir/Support/LogicalResult.h" // from @llvm-project #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" #include "tensorflow/compiler/mlir/xla/type_to_shape.h" @@ -49,45 +54,100 @@ const char kXlaHostTransferOriginalTypeAttr[] = "_xla_host_transfer_original_type"; // A pass that legalizes TF/XLA communication ops, propagate their respective -// tokens (for ordering), and rewrite their respective functions when necessary. +// tokens (for ordering), and rewrite their respective functions and control +// flow ops when necessary. // Note, this currently does not handle nested modules/functions or region based -// ops (e.g. control flow). +// ops other than certain control flow ops (`mhlo.if`). class LegalizeTFCommunication : public PassWrapper> { public: void runOnOperation() override; }; -// Checks if a function has any communication ops. -bool HasCommunicationOps(FuncOp func) { - auto result = func.walk([](Operation* op) { - if (isa(op)) +// Checks if an op is a TF/XLA communication op. +bool IsCommunicationOp(Operation* op) { + return isa(op); +} + +// Checks if an op is a supported HLO control flow op. +bool IsControlFlowOp(Operation* op) { return isa(op); } + +// Collects control flow op ancestors of a given op, up until FuncOp. If any +// ancestor is not a control flow op or a FuncOp, or of a single block region, +// an error will be returned. +LogicalResult GetControlFlowAncestors( + Operation* op, llvm::SmallPtrSetImpl& control_flow_ops, + llvm::SmallPtrSetImpl& control_flow_blocks) { + Block* block = op->getBlock(); + Operation* parent = block->getParentOp(); + while (block && parent && !isa(parent)) { + if (!IsControlFlowOp(parent)) + return op->emitOpError() + << "expects ancestor(s) to be of ['" << IfOp::getOperationName() + << "', '" << FuncOp::getOperationName() << "']"; + + if (!llvm::hasSingleElement(block->getParent()->getBlocks())) + return op->emitOpError() << "expects single block region ancestor(s)"; + + control_flow_ops.insert(parent); + control_flow_blocks.insert(block); + + parent = block->getParentOp(); + block = parent->getBlock(); + } + return success(); +} + +// Finds communication ops in a function. `control_flow_ops` and +// `control_flow_blocks` will be populated with control flow op ancestors for +// every communication op. +LogicalResult FindCommunicationOps( + FuncOp func, llvm::SmallPtrSetImpl& control_flow_ops, + llvm::SmallPtrSetImpl& control_flow_blocks, + bool& has_communication_ops) { + auto result = func.walk([&](Operation* op) { + if (!IsCommunicationOp(op)) return WalkResult::advance(); + has_communication_ops = true; + if (failed( + GetControlFlowAncestors(op, control_flow_ops, control_flow_blocks))) return WalkResult::interrupt(); return WalkResult::advance(); }); - return result.wasInterrupted(); + return failure(result.wasInterrupted()); } -// Helper struct holding a function and optional cloned version. If `clone` is -// set, function calls to `original` will be replaced with `clone`. -struct FuncAndClone { +// Helper struct holding a function to be rewritten, it's control flow ops that +// lead to a communication op or function call with a communication op +// (transitively), and an optional clone of itself. If `clone` is set, function +// calls to `original` will be replaced with `clone`. +struct FuncToRewrite { FuncOp original; + llvm::SmallPtrSet control_flow_ops; + llvm::SmallPtrSet control_flow_blocks; FuncOp clone; }; // Finds all functions that need to be rewritten with communication ops and // and associated tokens. -llvm::SmallDenseMap GetFunctionsToRewrite( - ModuleOp module) { +LogicalResult GetFunctionsToRewrite( + ModuleOp module, + llvm::SmallDenseMap& funcs_to_rewrite) { // Find functions containing communication ops. - llvm::SmallDenseMap funcs; SmallVector funcs_to_visit; for (FuncOp func : module.getOps()) { - if (HasCommunicationOps(func)) { - funcs.insert({func.getName(), {func, /*clone=*/nullptr}}); - funcs_to_visit.push_back(func); - } + FuncToRewrite func_to_rewrite{/*original=*/func, /*control_flow_ops=*/{}, + /*control_flow_blocks=*/{}, + /*clone=*/nullptr}; + bool has_communication_ops = false; + if (failed(FindCommunicationOps(func, func_to_rewrite.control_flow_ops, + func_to_rewrite.control_flow_blocks, + has_communication_ops))) + return failure(); + + if (!has_communication_ops) continue; + funcs_to_rewrite.insert({func.getName(), func_to_rewrite}); + funcs_to_visit.push_back(func); } // Find functions that call functions with communication ops, transitively. @@ -100,13 +160,30 @@ llvm::SmallDenseMap GetFunctionsToRewrite( // Only `mlir::CallOp` is supported as this requires knowing how to // rewrite arguments and results to a function. if (!isa(use.getUser())) continue; - auto caller_func = use.getUser()->getParentOfType(); - if (!caller_func) continue; - if (funcs - .insert( - {caller_func.getName(), {caller_func, /*clone=*/nullptr}}) - .second) - new_funcs_to_visit.push_back(caller_func); + auto caller_parent_func = use.getUser()->getParentOfType(); + if (!caller_parent_func) continue; + + FuncToRewrite func_to_rewrite{/*original=*/caller_parent_func, + /*control_flow_ops=*/{}, + /*control_flow_blocks=*/{}, + /*clone=*/nullptr}; + if (failed(GetControlFlowAncestors( + use.getUser(), func_to_rewrite.control_flow_ops, + func_to_rewrite.control_flow_blocks))) + return failure(); + + auto it = funcs_to_rewrite.insert( + {caller_parent_func.getName(), func_to_rewrite}); + if (it.second) { + new_funcs_to_visit.push_back(caller_parent_func); + } else { + it.first->getSecond().control_flow_ops.insert( + func_to_rewrite.control_flow_ops.begin(), + func_to_rewrite.control_flow_ops.end()); + it.first->getSecond().control_flow_blocks.insert( + func_to_rewrite.control_flow_blocks.begin(), + func_to_rewrite.control_flow_blocks.end()); + } } } @@ -116,8 +193,9 @@ llvm::SmallDenseMap GetFunctionsToRewrite( // Clone public functions that need to be rewritten. Function calls to this // function will be replaced with the cloned function. SymbolTable symbol_table(module); - for (auto& func : funcs) { - if (func.getSecond().original.isPublic()) { + for (auto& func : funcs_to_rewrite) { + if (func.getSecond().original.isPublic() && + !func.getSecond().original.symbolKnownUseEmpty(module)) { auto clone = func.getSecond().original.clone(); clone.setVisibility(SymbolTable::Visibility::Private); symbol_table.insert(clone); @@ -125,7 +203,7 @@ llvm::SmallDenseMap GetFunctionsToRewrite( } } - return funcs; + return success(); } // Assigns op sharding to an op for a given device core. @@ -329,94 +407,412 @@ Value RewriteCallOp(OpBuilder& builder, CallOp call, return new_call.getResults().back(); } -// Updates function terminator and type if a token is to be emitted by the -// function. -void RewriteFunctionTerminatorAndUpdateType(OpBuilder& builder, FuncOp func, - Block& func_body, Value token) { - // If the function signature is changed, update to emit a token and update - // the function type. - Operation* terminator = func_body.getTerminator(); - auto new_results = llvm::to_vector<4>(terminator->getOperands()); - new_results.push_back(token); - builder.setInsertionPoint(terminator); - auto new_return = - builder.create(terminator->getLoc(), new_results); - terminator->erase(); +// Helper struct holding state of which op to visit to next. If `op` is in a +// control flow op region, `region_idx` will be set with the respective region +// index. `token` will be current token from the last communication op/control +// flow op transitive communication ops. +struct OpVisitorState { + Optional region_idx; + Value token; + Operation* op; +}; +// Creates a tuple from a sequence of values. +Value CreateTuple(OpBuilder& builder, Location loc, ArrayRef operands) { + return builder.create(loc, operands).getResult(); +} + +// Replaces a value `value` with a new value but the token attached. If `value` +// is not a tuple, a new tuple is formed with `token`. If `value` is a tuple, +// `value` is extended instead. New tuple values created are cached. +Value GetValueWithToken(OpBuilder& builder, Value value, Value token, + llvm::SmallDenseMap& rewritten_values) { + // If value with token already exists, reuse it. + auto it = rewritten_values.find(value); + if (it != rewritten_values.end()) return it->getSecond(); + + auto create_tuple = [&](ArrayRef operands) { + auto new_result = CreateTuple(builder, value.getLoc(), operands); + rewritten_values.insert({value, new_result}); + return new_result; + }; + + auto tuple_type = value.getType().dyn_cast(); + // `value` is not a tuple, create a new tuple. + if (!tuple_type) return create_tuple({value, token}); + + // Extend tuple if `value` is a tuple. + // If `value` is an op result and the owner is a `mhlo.tuple`, simply unpack + // the tuple. + if (auto tuple_op = value.getDefiningOp()) { + auto tuple_operands = llvm::to_vector<4>(tuple_op.getOperands()); + tuple_operands.push_back(token); + return create_tuple(tuple_operands); + } + + // `value` is not created via a `mhlo.tuple` directly, unpack individual + // elements directly with `mhlo.get_tuple_element`. + SmallVector tuple_operands; + for (auto idx : llvm::seq(0, tuple_type.getTypes().size())) + tuple_operands.push_back( + builder.create(value.getLoc(), value, idx) + .getResult()); + + tuple_operands.push_back(token); + return create_tuple(tuple_operands); +} + +// Extends a type to include a `mhlo.token` type. If `type` is not a tuple type, +// a new tuple type with `type` and `mhlo.token` type is created instead. +TupleType GetTypeWithToken(OpBuilder& builder, Type type) { + auto token_type = TokenType::get(builder.getContext()); + if (auto tuple_type = type.dyn_cast()) { + auto result_types = llvm::to_vector<4>(tuple_type.getTypes()); + result_types.push_back(token_type); + return builder.getTupleType(result_types); + } + + return builder.getTupleType({type, token_type}); +} + +// Creates a slice of a tuple `value` with `mhlo.get_tuple_element` from index 0 +// to `end`, exclusive. +Value CreateSubTuple(OpBuilder& builder, Value value, size_t end) { + SmallVector tuple_operands; + for (auto idx : llvm::seq(0, end)) + tuple_operands.push_back( + builder.create(value.getLoc(), value, idx) + .getResult()); + + return CreateTuple(builder, value.getLoc(), tuple_operands); +} + +// Replaces uses of `value` with `replacement`. If `value` is not a tuple type, +// an explicit `mhlo.get_tuple_element` is created to unpack the tuple and +// return the first element. Otherwise, `mhlo.get_tuple_element` users are +// simply updated with `replacement`, and all other users are updated with a +// slice of `replacement`. +void ReplaceWithTupleResult(OpBuilder& builder, Value value, + Value replacement) { + auto tuple_type = value.getType().dyn_cast(); + if (!tuple_type) { + if (!value.use_empty()) { + auto new_element = builder.create(replacement.getLoc(), + replacement, 0); + value.replaceAllUsesWith(new_element.getResult()); + } + return; + } + + Value sub_tuple; + for (auto& use : llvm::make_early_inc_range(value.getUses())) { + if (isa(use.getOwner())) { + use.set(replacement); + continue; + } + + if (!sub_tuple) + sub_tuple = CreateSubTuple(builder, replacement, tuple_type.size()); + + use.set(sub_tuple); + } +} + +// Replaces control flow op block single block argument with new block argument +// of type `new_type` (tuple type). The last element of the new block argument +// (token) is returned. +Value UpdateControlFlowBlockArgWithToken(OpBuilder& builder, Block& block, + Type token_type) { + assert(block.getNumArguments() == 1); + builder.setInsertionPointToStart(&block); + auto new_arg = block.addArgument(token_type); + ReplaceWithTupleResult(builder, block.getArgument(0), new_arg); + block.eraseArgument(0); + return builder + .create(new_arg.getLoc(), new_arg, + token_type.cast().size() - 1) + .getResult(); +} + +// Updates control flow op terminator with an extra element `token`. If the +// original return value is not a tuple, a new tuple is formed. Otherwise the +// tuple is extended. +void RewriteControlFlowTerminator(OpBuilder& builder, Operation* terminator, + Value token) { + assert(terminator->getNumOperands() == 1); + assert(terminator->getBlock()->getNumArguments() == 1); + + builder.setInsertionPoint(terminator); + llvm::SmallDenseMap rewritten_operands; + Value new_result = GetValueWithToken(builder, terminator->getOperand(0), + token, rewritten_operands); + terminator->setOperand(0, new_result); +} + +// Rewrites a `mhlo.if` op to receive and forward a `mhlo.token`. Operands to +// the op for all of its regions are extended to have an extra operand `token`. +void RewriteRegionIfOp(OpBuilder& builder, IfOp region_if, + SmallVectorImpl& ops_to_visit, + Value token) { + SmallVector new_branch_operands; + llvm::SmallDenseMap rewritten_operands; + auto old_branch_operands = llvm::drop_begin(region_if.getOperands(), 1); + + // Rewrite all region operands to have an extra operand `token`. + for (Value operand : old_branch_operands) + new_branch_operands.push_back( + GetValueWithToken(builder, operand, token, rewritten_operands)); + + auto new_result_type = GetTypeWithToken(builder, region_if.getType()); + + // Create new `mhlo.if` op with extra token operands and result. + auto new_if = builder.create(region_if.getLoc(), new_result_type, + region_if.pred(), new_branch_operands[0], + new_branch_operands[1]); + + // Move all regions from the old `mhlo.if` op to its replacement. + for (auto& region_and_idx : llvm::enumerate(region_if.getRegions())) + new_if.getRegion(region_and_idx.index()).takeBody(*region_and_idx.value()); + + // Forward result from old `mhlo.if` with replacement, and unpack result when + // necessary. + ReplaceWithTupleResult(builder, region_if.getResult(), new_if.getResult()); + + auto new_token = builder.create( + new_if.getLoc(), new_if.getResult(), + new_if.getResult().getType().cast().size() - 1); + + region_if.erase(); + + // Remove leftover operands to old `mhlo.if` if they have no uses. + for (auto& rewritten_operand : rewritten_operands) + if (auto tuple_op = rewritten_operand.getFirst().getDefiningOp()) + if (tuple_op.use_empty()) tuple_op.erase(); + + // Next op to visit. The replacement is visited but at its first region. The + // token result of the new region if is propagated. + ops_to_visit.push_back({/*region_idx=*/0, new_token, new_if}); +} + +// Rewrites a `mhlo.if` region to receive and forward a `mhlo.token`. The block +// argument is updated to have an extra `mhlo.token` element. If the region +// block is to be rewritten, the next op to visit is set to the first op in the +// block. Otherwise the terminator is updated to forward `token`. +void RewriteRegionIfRegion( + OpBuilder& builder, IfOp region_if, unsigned region_idx, + SmallVectorImpl& ops_to_visit, + const llvm::SmallPtrSetImpl& control_flow_blocks, Value token) { + ops_to_visit.push_back({region_idx + 1, token, region_if}); + + Region& region = region_if.getRegion(region_idx); + assert(llvm::hasSingleElement(region)); + + auto block_token = UpdateControlFlowBlockArgWithToken( + builder, region.front(), region_if.getOperand(region_idx + 1).getType()); + + if (control_flow_blocks.contains(®ion.front())) { + ops_to_visit.push_back({/*region_idx=*/llvm::None, block_token, + block_token.getDefiningOp()->getNextNode()}); + return; + } + + RewriteControlFlowTerminator(builder, region.front().getTerminator(), + block_token); +} + +// Rewrites an `mhlo.if` op or its region. If `region_idx` is not set, the op +// operands and results rewritten. If `region_idx` is set, region `region_idx` +// is rewritten to take in and return an additional token. Returns true if op +// is still being rewritten. +bool ProcessRegionIfOp(OpBuilder& builder, IfOp region_if, + Optional region_idx, + SmallVectorImpl& ops_to_visit, + const llvm::SmallPtrSetImpl& control_flow_blocks, + Value token) { + builder.setInsertionPoint(region_if); + + if (!region_idx) { + RewriteRegionIfOp(builder, region_if, ops_to_visit, token); + return true; + } + + if (*region_idx < region_if.getNumRegions()) { + RewriteRegionIfRegion(builder, region_if, *region_idx, ops_to_visit, + control_flow_blocks, token); + return true; + } + + return false; +} + +// Updates function type based on current function body block arguments and +// terminator operand types. +void UpdateFunctionType(OpBuilder& builder, FuncOp func, Block& func_body) { auto new_argument_types = llvm::to_vector<4>(func_body.getArgumentTypes()); - auto new_result_types = llvm::to_vector<4>(new_return.getOperandTypes()); + auto new_result_types = + llvm::to_vector<4>(func_body.getTerminator()->getOperandTypes()); func.setType(FunctionType::get(new_argument_types, new_result_types, builder.getContext())); } -// Rewrites a function body and communication ops inside. The function may -// either be rewritten to create a token or take in and return a token, -// depending on its visibility and if there are any callers. +// Replaces a function terminator `return` with another `return` that has an +// extra `mhlo.token` operand. +void RewriteFunctionTerminator(OpBuilder& builder, mlir::ReturnOp terminator, + Value token) { + auto new_results = llvm::to_vector<4>(terminator.getOperands()); + new_results.push_back(token); + builder.setInsertionPoint(terminator); + builder.create(terminator.getLoc(), new_results); + terminator.erase(); +} + +// Rewrites a function body and communication ops inside. Region control flow +// are updated when necessary, to propagate tokens. The function may either be +// rewritten to create a token or take in and return a token, depending on its +// visibility and if there are any callers. LogicalResult RewriteFunction( OpBuilder& builder, int64_t& channel_id, ModuleOp module, FuncOp func, - const llvm::SmallDenseMap& funcs) { + const llvm::SmallDenseMap& funcs, + const llvm::SmallPtrSetImpl& control_flow_ops, + const llvm::SmallPtrSetImpl& control_flow_blocks, bool is_clone) { MLIRContext* context = module.getContext(); if (!llvm::hasSingleElement(func.getBody())) return func.emitError() << "'" << FuncOp::getOperationName() << "' ops with more than one block are not supported"; - bool rewrite_block = !func.isPublic() && !func.symbolKnownUseEmpty(module); + bool rewrite_block = + is_clone || (!func.isPublic() && !func.symbolKnownUseEmpty(module)); Block& func_body = func.front(); builder.setInsertionPointToStart(&func_body); - auto token_type = mlir::mhlo::TokenType::get(context); + auto token_type = TokenType::get(context); // If a function is public, it's signature should not be modified, and instead // a token will be created. Otherwise a token block argument is inserted. - Value token = rewrite_block - ? func_body.addArgument(token_type) + Value init_token = + rewrite_block ? func_body.addArgument(token_type) : builder.create(func.getLoc(), token_type) .getResult(); - for (Operation& op : llvm::make_early_inc_range(func_body)) { - if (auto host_compute = dyn_cast(op)) { + // Stack to keep track of region based control flow op nesting and current + // op to visit. + SmallVector ops_to_visit{ + {/*region_idx=*/llvm::None, init_token, &func_body.front()}}; + + while (!ops_to_visit.empty()) { + OpVisitorState op_to_visit = ops_to_visit.pop_back_val(); + Operation* curr_op = op_to_visit.op; + + Value token = op_to_visit.token; + // Ops may be removed, so the next op is kept track of beforehand. + Operation* next_op = curr_op->getNextNode(); + + if (auto host_compute = dyn_cast(curr_op)) { token = RewriteHostComputeOp(builder, channel_id, host_compute, token); - } else if (auto send_to_host = dyn_cast(op)) { + } else if (auto send_to_host = dyn_cast(curr_op)) { token = RewriteSendToHostOp(builder, channel_id, send_to_host, token); - } else if (auto recv_from_host = dyn_cast(op)) { + } else if (auto recv_from_host = dyn_cast(curr_op)) { token = RewriteRecvFromHostOp(builder, channel_id, recv_from_host, token); - } else if (auto call = dyn_cast(op)) { + } else if (auto call = dyn_cast(curr_op)) { // Only `mlir::CallOp` is supported as this requires knowing how to // rewrite arguments and results to a function. auto it = funcs.find(call.getCallee()); - if (it == funcs.end()) continue; - FuncOp clone = it->getSecond().clone; - Optional symbol_name = - clone ? Optional(clone.getName()) : llvm::None; - // If the function being called is to be cloned, update the call to also - // point to the cloned function. - token = RewriteCallOp(builder, call, symbol_name, token); + if (it != funcs.end()) { + FuncOp clone = it->getSecond().clone; + Optional symbol_name = + clone ? Optional(clone.getName()) : llvm::None; + // If the function being called is to be cloned, update the call to also + // point to the cloned function. + token = RewriteCallOp(builder, call, symbol_name, token); + } + } else if (auto region_if = dyn_cast(curr_op)) { + if (op_to_visit.region_idx || control_flow_ops.contains(region_if)) + if (ProcessRegionIfOp(builder, region_if, op_to_visit.region_idx, + ops_to_visit, control_flow_blocks, token)) + continue; + } else if (auto region_terminator = dyn_cast(curr_op)) { + RewriteControlFlowTerminator(builder, region_terminator, token); + // There is no next op afer the control flow op terminator, simply let + // stack have one less element. + continue; + } else if (auto func_terminator = dyn_cast(curr_op)) { + if (rewrite_block) + RewriteFunctionTerminator(builder, func_terminator, token); + + // There is no next op afer the function terminator, simply let stack have + // one less element/be empty. + continue; } + + // Visit next op. + ops_to_visit.push_back({/*region_idx=*/llvm::None, token, next_op}); } - if (rewrite_block) - RewriteFunctionTerminatorAndUpdateType(builder, func, func_body, token); + if (rewrite_block) UpdateFunctionType(builder, func, func_body); return success(); } +// Checks if a function call is pointing to a function with communication ops. +bool IsFunctionCallWithCommunication( + Operation* op, + const llvm::SmallDenseMap& funcs_to_rewrite) { + if (auto call = dyn_cast(op)) + return funcs_to_rewrite.count(call.callee()); + + return false; +} + +// Collects all control flow op ancestors of communication ops or function calls +// with communication ops (transitively). +void GetCommunicationControlFlowOps( + FuncOp func, + const llvm::SmallDenseMap& funcs_to_rewrite, + llvm::SmallPtrSetImpl& control_flow_ops, + llvm::SmallPtrSetImpl& control_flow_blocks) { + func.walk([&](Operation* op) { + if (IsCommunicationOp(op) || + IsFunctionCallWithCommunication(op, funcs_to_rewrite)) + if (failed(GetControlFlowAncestors(op, control_flow_ops, + control_flow_blocks))) + llvm_unreachable( + "checking original function for control flow ancestors should have " + "errored first"); + }); +} + void LegalizeTFCommunication::runOnOperation() { auto module = getOperation(); - llvm::SmallDenseMap funcs = - GetFunctionsToRewrite(module); + llvm::SmallDenseMap funcs_to_rewrite; + if (failed(GetFunctionsToRewrite(module, funcs_to_rewrite))) + return signalPassFailure(); // Module level counter to make sure Channel Id's are unique. int64_t channel_id = 1; OpBuilder builder(&getContext()); - for (const auto& func_and_name : funcs) { - FuncOp func = func_and_name.getSecond().original; - if (failed(RewriteFunction(builder, channel_id, module, func, funcs))) + for (const auto& func_and_name : funcs_to_rewrite) { + const auto& func_to_rewrite = func_and_name.getSecond(); + FuncOp func = func_to_rewrite.original; + if (failed(RewriteFunction(builder, channel_id, module, func, + funcs_to_rewrite, + func_to_rewrite.control_flow_ops, + func_to_rewrite.control_flow_blocks, + /*is_clone=*/false))) return signalPassFailure(); FuncOp clone = func_and_name.getSecond().clone; if (!clone) continue; - if (failed(RewriteFunction(builder, channel_id, module, clone, funcs))) - return signalPassFailure(); + llvm::SmallPtrSet clone_control_flow_ops; + llvm::SmallPtrSet clone_control_flow_blocks; + GetCommunicationControlFlowOps(clone, funcs_to_rewrite, + clone_control_flow_ops, + clone_control_flow_blocks); + if (failed(RewriteFunction(builder, channel_id, module, clone, + funcs_to_rewrite, clone_control_flow_ops, + clone_control_flow_blocks, + /*is_clone=*/true))) + llvm_unreachable( + "rewriting of original function should have errored first"); } } From 8296bf5a55e75d246b27e1cfa431f6e15dfef49d Mon Sep 17 00:00:00 2001 From: Brian Zhao Date: Thu, 6 Aug 2020 14:32:47 -0700 Subject: [PATCH 2287/2522] Add a utility to flatten StructuredValues of SavedConcreteFunctions' input and output signatures. This can be re-used for the upcoming tensorflow::Argument, as well as the current input/output size validation logic. PiperOrigin-RevId: 325312214 Change-Id: I2a043d869dfb4dca082a322587f7c65cbb8cdf01 --- .../c/experimental/saved_model/core/BUILD | 17 +++ .../saved_model/core/saved_model_utils.cc | 125 ++++++++-------- .../saved_model/core/saved_model_utils.h | 16 ++- .../core/signature_flattening_test.cc | 133 ++++++++++++++++++ 4 files changed, 231 insertions(+), 60 deletions(-) create mode 100644 tensorflow/c/experimental/saved_model/core/signature_flattening_test.cc diff --git a/tensorflow/c/experimental/saved_model/core/BUILD b/tensorflow/c/experimental/saved_model/core/BUILD index 8078758328c..b2e432782de 100644 --- a/tensorflow/c/experimental/saved_model/core/BUILD +++ b/tensorflow/c/experimental/saved_model/core/BUILD @@ -216,6 +216,23 @@ tf_cc_test( ], ) +tf_cc_test( + name = "signature_flattening_test", + srcs = [ + "signature_flattening_test.cc", + ], + deps = [ + ":saved_model_utils", + "//tensorflow/c/experimental/saved_model/core:tf_concrete_function_test_protos", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core/common_runtime/eager:core", + ], +) + tf_cc_test( name = "tf_concrete_function_loading_test", srcs = [ diff --git a/tensorflow/c/experimental/saved_model/core/saved_model_utils.cc b/tensorflow/c/experimental/saved_model/core/saved_model_utils.cc index 2037c4886de..0d97741d7f0 100644 --- a/tensorflow/c/experimental/saved_model/core/saved_model_utils.cc +++ b/tensorflow/c/experimental/saved_model/core/saved_model_utils.cc @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/stringpiece.h" #include "tensorflow/core/protobuf/saved_object_graph.pb.h" #include "tensorflow/core/protobuf/struct.pb.h" @@ -36,52 +37,8 @@ namespace tensorflow { namespace internal { namespace { -// This returns the size of `tf.nest.flatten(value)`, on values that are -// used in tf.function's input_signatures. -int FlattenedSize(const tensorflow::StructuredValue& value, Status* status) { - // This follows the logic from - // https://github.com/tensorflow/tensorflow/blob/1c064ab76064c58e54261b805027474885a1534d/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc#L2775 - switch (value.kind_case()) { - case StructuredValue::kDictValue: { - const DictValue& dict = value.dict_value(); - int size = 0; - for (const auto& field : dict.fields()) { - size += FlattenedSize(field.second, status); - } - return size; - } - case StructuredValue::kTupleValue: { - const TupleValue& tuple = value.tuple_value(); - int size = 0; - for (const StructuredValue& value : tuple.values()) { - size += FlattenedSize(value, status); - } - return size; - } - case StructuredValue::kListValue: { - const ListValue& list = value.list_value(); - int size = 0; - for (const StructuredValue& value : list.values()) { - size += FlattenedSize(value, status); - } - return size; - } - case StructuredValue::kTensorSpecValue: { - return 1; - } - case StructuredValue::kNoneValue: { - // Base case: do nothing. - // This arises, for example, as the top-level object of an output - // signature when there are no return values. - return 0; - } - default: { - status->Update(errors::Internal("Unhandled structured value kind ", - value.kind_case())); - return 0; - } - } -} +using StructuredValueDictEntry = + protobuf::MapPair; // Perform some basic sanity checks on SavedConcreteFunction's input and // output signatures with respect to the corresponding FunctionDef's input @@ -111,34 +68,34 @@ Status ValidateSavedFunctionCompatibleWithFunctionDef( // https://github.com/tensorflow/tensorflow/blob/1c064ab76064c58e54261b805027474885a1534d/tensorflow/python/eager/function.py#L1974-L1979 const std::string& name = function_def->signature().name(); + const StructuredValue& input_signature = saved_concrete_function.canonicalized_input_signature(); - Status status; - int input_signature_size = FlattenedSize(input_signature, &status); - TF_RETURN_IF_ERROR(status); - if (input_signature_size + saved_concrete_function.bound_inputs_size() != + std::vector input_specs; + TF_RETURN_IF_ERROR(FlattenSignature(input_signature, &input_specs)); + if (input_specs.size() + saved_concrete_function.bound_inputs_size() != function_def->signature().input_arg_size()) { return errors::FailedPrecondition( "FunctionDef ", name, " has ", function_def->signature().input_arg_size(), - " inputs, but the SavedConcreteFunction has ", input_signature_size, + " inputs, but the SavedConcreteFunction has ", input_specs.size(), " flattened user inputs and ", saved_concrete_function.bound_inputs_size(), " captured inputs."); } const StructuredValue& output_signature = saved_concrete_function.output_signature(); - int output_signature_size = FlattenedSize(output_signature, &status); - TF_RETURN_IF_ERROR(status); - if (output_signature_size != function_def->signature().output_arg_size()) { + std::vector output_specs; + TF_RETURN_IF_ERROR(FlattenSignature(output_signature, &output_specs)); + if (output_specs.size() != function_def->signature().output_arg_size()) { return errors::FailedPrecondition( "FunctionDef ", name, " has ", function_def->signature().output_arg_size(), - " outputs, but the SavedConcreteFunction has ", output_signature_size, + " outputs, but the SavedConcreteFunction has ", output_specs.size(), " flattened outputs."); } - return status; + return Status(); } } // namespace @@ -197,6 +154,62 @@ Status LoadTFConcreteFunction( out); } +Status FlattenSignature(const StructuredValue& signature, + std::vector* flattened_specs) { + // This follows the logic from + // https://github.com/tensorflow/tensorflow/blob/1c064ab76064c58e54261b805027474885a1534d/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc#L2775 + switch (signature.kind_case()) { + case StructuredValue::kDictValue: { + // Dictionaries must be sorted in order of keys + const DictValue& dict = signature.dict_value(); + std::vector entries; + entries.reserve(dict.fields_size()); + for (const auto& field : dict.fields()) { + entries.push_back(&field); + } + + std::sort(entries.begin(), entries.end(), + [](const StructuredValueDictEntry* x, + const StructuredValueDictEntry* y) { + return x->first < y->first; + }); + + for (const auto& entry : entries) { + TF_RETURN_IF_ERROR(FlattenSignature(entry->second, flattened_specs)); + } + return Status(); + } + case StructuredValue::kTupleValue: { + const TupleValue& tuple = signature.tuple_value(); + for (const StructuredValue& value : tuple.values()) { + TF_RETURN_IF_ERROR(FlattenSignature(value, flattened_specs)); + } + return Status(); + } + case StructuredValue::kListValue: { + const ListValue& list = signature.list_value(); + for (const StructuredValue& value : list.values()) { + TF_RETURN_IF_ERROR(FlattenSignature(value, flattened_specs)); + } + return Status(); + } + case StructuredValue::kTensorSpecValue: { + flattened_specs->push_back(&signature.tensor_spec_value()); + return Status(); + } + case StructuredValue::kNoneValue: { + // Base case: do nothing. + // This arises, for example, as the top-level object of an output + // signature when there are no return values. + return Status(); + } + default: { + return errors::Internal("Unhandled structured value kind ", + signature.kind_case()); + } + } +} + const SavedObject* FindNodeAtPath(StringPiece path, const SavedObjectGraph& object_graph) { const auto& nodes = object_graph.nodes(); diff --git a/tensorflow/c/experimental/saved_model/core/saved_model_utils.h b/tensorflow/c/experimental/saved_model/core/saved_model_utils.h index 57f30afa91b..68bfbe32222 100644 --- a/tensorflow/c/experimental/saved_model/core/saved_model_utils.h +++ b/tensorflow/c/experimental/saved_model/core/saved_model_utils.h @@ -32,6 +32,7 @@ limitations under the License. #include "tensorflow/core/platform/status.h" #include "tensorflow/core/platform/stringpiece.h" #include "tensorflow/core/protobuf/saved_object_graph.pb.h" +#include "tensorflow/core/protobuf/struct.pb.h" namespace tensorflow { namespace internal { @@ -59,10 +60,17 @@ Status LoadTFConcreteFunction( captured_objects, ImmediateExecutionContext* ctx, std::unique_ptr* out); -// Find the SavedObject in `object_graph` at location `path`. `path` must be a -// dot-delimited string of object names relative to the root object. If no -// object is found, returns nullptr. Callers must ensure `object_graph` outlives -// the returned pointer. +// Flattens `signature` into a vector of TensorSpecProto pointers back into +// `signature`. `signature` must outlive flattened_specs. `signature` must also +// be the input or output signature of a SavedConcreteFunction (i.e. "nested +// structures of tensorspecs"). +Status FlattenSignature(const StructuredValue& signature, + std::vector* flattened_specs); + +// Find the SavedObject in `object_graph` at location `path`. `path` must be +// a dot-delimited string of object names relative to the root object. If no +// object is found, returns nullptr. Callers must ensure `object_graph` +// outlives the returned pointer. const SavedObject* FindNodeAtPath(StringPiece path, const SavedObjectGraph& object_graph); diff --git a/tensorflow/c/experimental/saved_model/core/signature_flattening_test.cc b/tensorflow/c/experimental/saved_model/core/signature_flattening_test.cc new file mode 100644 index 00000000000..9ee495f524a --- /dev/null +++ b/tensorflow/c/experimental/saved_model/core/signature_flattening_test.cc @@ -0,0 +1,133 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/c/experimental/saved_model/core/saved_model_utils.h" +#include "tensorflow/c/experimental/saved_model/core/tf_concrete_function_test_protos.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/protobuf/struct.pb.h" + +namespace tensorflow { +namespace { + +// Validates names, shapes, and dtypes of two tensorspecprotos are equivalent. +bool TensorSpecsAreEqual(const TensorSpecProto& spec, + const std::string& expected_name, + const PartialTensorShape& expected_shape, + DataType expected_dtype) { + return spec.name() == expected_name && + PartialTensorShape(spec.shape()).IsIdenticalTo(expected_shape) && + spec.dtype() == expected_dtype; +} + +// This tests the common case for a tf.function w/o inputs. This ends up +// being serialized as a tuple of an empty tuple + empty dictionary +// (corresponding to the args, kwargs) of the function. +TEST(SignatureFlatteningTest, ZeroArgInputSignature) { + std::vector flattened; + StructuredValue value = testing::ZeroArgInputSignature(); + TF_EXPECT_OK(internal::FlattenSignature(value, &flattened)); + EXPECT_EQ(flattened.size(), 0); +} + +// This tests the common case for a tf.function w/o outputs. This ends up +// being serialized as a "NoneValue". +TEST(SignatureFlatteningTest, ZeroRetOutputSignature) { + std::vector flattened; + StructuredValue value = testing::ZeroReturnOutputSignature(); + TF_EXPECT_OK(internal::FlattenSignature(value, &flattened)); + EXPECT_EQ(flattened.size(), 0); +} + +TEST(SignatureFlatteningTest, SingleArgInputSignature) { + std::vector flattened; + StructuredValue value = testing::SingleArgInputSignature(); + TF_EXPECT_OK(internal::FlattenSignature(value, &flattened)); + EXPECT_EQ(flattened.size(), 1); + EXPECT_TRUE(TensorSpecsAreEqual(*flattened[0], + /* expected_name = */ "x", + /* expected_shape = */ {1, 10}, + /* expected_dtype = */ DT_FLOAT)) + << "Expected " << flattened[0]->DebugString(); +} + +TEST(SignatureFlatteningTest, SingleReturnOutputSignature) { + std::vector flattened; + StructuredValue value = testing::SingleReturnOutputSignature(); + TF_EXPECT_OK(internal::FlattenSignature(value, &flattened)); + EXPECT_EQ(flattened.size(), 1); + EXPECT_TRUE(TensorSpecsAreEqual(*flattened[0], + /* expected_name = */ "", + /* expected_shape = */ {1}, + /* expected_dtype = */ DT_FLOAT)) + << "Expected " << flattened[0]->DebugString(); +} + +TEST(SignatureFlatteningTest, ThreeArgInputSignature) { + std::vector flattened; + StructuredValue value = testing::ThreeArgInputSignature(); + TF_EXPECT_OK(internal::FlattenSignature(value, &flattened)); + EXPECT_EQ(flattened.size(), 3); + EXPECT_TRUE(TensorSpecsAreEqual(*flattened[0], + /* expected_name = */ "x", + /* expected_shape = */ {1}, + /* expected_dtype = */ DT_FLOAT)) + << "Expected " << flattened[0]->DebugString(); + + EXPECT_TRUE(TensorSpecsAreEqual(*flattened[1], + /* expected_name = */ "y", + /* expected_shape = */ {1}, + /* expected_dtype = */ DT_FLOAT)) + << "Expected " << flattened[1]->DebugString(); + + EXPECT_TRUE(TensorSpecsAreEqual(*flattened[2], + /* expected_name = */ "z", + /* expected_shape = */ {1}, + /* expected_dtype = */ DT_FLOAT)) + << "Expected " << flattened[2]->DebugString(); +} + +// This test has an exotic outputsignature of tuple of a +// dictionary, tensor +TEST(SignatureFlatteningTest, ThreeReturnOutputSignature) { + std::vector flattened; + StructuredValue value = testing::ThreeReturnOutputSignature(); + TF_EXPECT_OK(internal::FlattenSignature(value, &flattened)); + EXPECT_EQ(flattened.size(), 3); + EXPECT_TRUE(TensorSpecsAreEqual(*flattened[0], + /* expected_name = */ "0/a", + /* expected_shape = */ {1}, + /* expected_dtype = */ DT_FLOAT)) + << "Expected " << flattened[0]->DebugString(); + + EXPECT_TRUE(TensorSpecsAreEqual(*flattened[1], + /* expected_name = */ "0/b", + /* expected_shape = */ {1}, + /* expected_dtype = */ DT_FLOAT)) + << "Expected " << flattened[1]->DebugString(); + + EXPECT_TRUE(TensorSpecsAreEqual(*flattened[2], + /* expected_name = */ "1", + /* expected_shape = */ {1}, + /* expected_dtype = */ DT_FLOAT)) + << "Expected " << flattened[2]->DebugString(); +} + +} // namespace +} // namespace tensorflow From 1da0eb2f4781a68383f71c2082e4d753872fb953 Mon Sep 17 00:00:00 2001 From: Krzysztof Laskowski Date: Thu, 6 Aug 2020 23:42:39 +0200 Subject: [PATCH 2288/2522] Extend MemoryTypesForNode test Add verification of "_input_hostmem" and "_output_hostmem" attributes. --- .../core/framework/memory_types_test.cc | 38 +++++++++++-------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/tensorflow/core/framework/memory_types_test.cc b/tensorflow/core/framework/memory_types_test.cc index 3126ea8e5f8..5228dbafc9b 100644 --- a/tensorflow/core/framework/memory_types_test.cc +++ b/tensorflow/core/framework/memory_types_test.cc @@ -33,12 +33,14 @@ class DummyKernel : public OpKernel { REGISTER_OP("HostMemoryTest") .Input("a: float") - .Input("b: T") - .Input("c: N * string") - .Input("d: Tlist") - .Input("e: Rlist") + .Input("b: float") + .Input("c: T") + .Input("d: N * string") + .Input("e: Tlist") + .Input("f: Rlist") .Output("o: N * T") - .Output("p: Tlist") + .Output("p: N * T") + .Output("r: Tlist") .Attr("T: type") .Attr("N: int") .Attr("Tlist: list(type)") @@ -46,21 +48,25 @@ REGISTER_OP("HostMemoryTest") REGISTER_KERNEL_BUILDER(Name("HostMemoryTest").Device(DEVICE_CPU), DummyKernel); REGISTER_KERNEL_BUILDER(Name("HostMemoryTest") .Device(DEVICE_GPU) - .HostMemory("a") - .HostMemory("c") + .HostMemory("b") .HostMemory("d") - .HostMemory("o"), + .HostMemory("e") + .HostMemory("p"), DummyKernel); TEST(MemoryTypesForNode, Simple) { NodeDef node_def; TF_ASSERT_OK(NodeDefBuilder("test", "HostMemoryTest") + .Input(FakeInput()) .Input(FakeInput()) .Input(FakeInput(DT_BOOL)) .Input(FakeInput(3)) .Input(FakeInput({DT_INT32, DT_FLOAT, DT_INT32})) .Input(FakeInput({DT_RESOURCE, DT_STRING, DT_RESOURCE})) .Finalize(&node_def)); + AddNodeAttr("_input_hostmem", {0}, &node_def); + AddNodeAttr("_output_hostmem", {6, 7}, &node_def); + MemoryTypeVector input, output; TF_EXPECT_OK(MemoryTypesForNode(OpRegistry::Global(), DEVICE_CPU, node_def, @@ -68,24 +74,26 @@ TEST(MemoryTypesForNode, Simple) { // a:float, b:bool, c:3*string, d:(int32, float, int32), // e:(resource, string, resource) EXPECT_EQ( - MemoryTypeVector({DEVICE_MEMORY, DEVICE_MEMORY, HOST_MEMORY, HOST_MEMORY, - HOST_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY, + MemoryTypeVector({HOST_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY, HOST_MEMORY, + HOST_MEMORY, HOST_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY, HOST_MEMORY, HOST_MEMORY, HOST_MEMORY}), input); // o:3*bool, p:(int32, float, int32) EXPECT_EQ(MemoryTypeVector({DEVICE_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY, - DEVICE_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY}), + DEVICE_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY, + HOST_MEMORY, HOST_MEMORY, DEVICE_MEMORY}), output); TF_EXPECT_OK(MemoryTypesForNode(OpRegistry::Global(), DEVICE_GPU, node_def, &input, &output)); EXPECT_EQ( - MemoryTypeVector({HOST_MEMORY, DEVICE_MEMORY, HOST_MEMORY, HOST_MEMORY, + MemoryTypeVector({HOST_MEMORY, HOST_MEMORY, DEVICE_MEMORY, HOST_MEMORY, HOST_MEMORY, HOST_MEMORY, HOST_MEMORY, HOST_MEMORY, - HOST_MEMORY, HOST_MEMORY, HOST_MEMORY}), + HOST_MEMORY, HOST_MEMORY, HOST_MEMORY, HOST_MEMORY}), input); - EXPECT_EQ(MemoryTypeVector({HOST_MEMORY, HOST_MEMORY, HOST_MEMORY, - DEVICE_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY}), + EXPECT_EQ(MemoryTypeVector({DEVICE_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY, + HOST_MEMORY, HOST_MEMORY, HOST_MEMORY, + HOST_MEMORY, HOST_MEMORY, DEVICE_MEMORY}), output); } From a0aee5ed2c89d83019e6b724c3bf593074456e1c Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Thu, 6 Aug 2020 14:43:38 -0700 Subject: [PATCH 2289/2522] [XLA] Implement S8,S16,U16 support for Literal::GetIntegralAsS64 PiperOrigin-RevId: 325314732 Change-Id: Ia89c4153d2a70564f46c880f25112c3b74a44b2d --- tensorflow/compiler/xla/literal.cc | 10 ++++++++-- tensorflow/compiler/xla/service/hlo_evaluator.cc | 6 +++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc index 3807e6d3a56..d26e0881c53 100644 --- a/tensorflow/compiler/xla/literal.cc +++ b/tensorflow/compiler/xla/literal.cc @@ -1004,14 +1004,20 @@ absl::optional LiteralBase::GetIntegralAsS64( switch (shape().element_type()) { case PRED: return Get(multi_index); + case S8: + return Get(multi_index); case U8: return Get(multi_index); + case S16: + return Get(multi_index); + case U16: + return Get(multi_index); case S32: return Get(multi_index); - case S64: - return Get(multi_index); case U32: return Get(multi_index); + case S64: + return Get(multi_index); case U64: return Get(multi_index); default: diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 66e9e01fc38..acccf7aac9a 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -1573,9 +1573,9 @@ class OutputBatchIndexToInputIndex { int64 index_vector_dim = dim_numbers_.index_vector_dim(); for (int64 i = 0, e = index_vector_.size(); i < e; i++) { index_vector_index_[index_vector_dim] = i; - // TODO(george): OK what should happen here? - // seems OK to crash though. - index_vector_[i] = *start_indices_.GetIntegralAsS64(index_vector_index_); + auto start_index = start_indices_.GetIntegralAsS64(index_vector_index_); + TF_RET_CHECK(start_index.has_value()); + index_vector_[i] = *start_index; } return Status::OK(); } From 1fb7fbe3563fc177c2d9d19239e3f3f3687cffa0 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Thu, 6 Aug 2020 14:54:56 -0700 Subject: [PATCH 2290/2522] Fix. --- tensorflow/BUILD | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 8a6c1048078..484e45eb11d 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -882,8 +882,8 @@ genrule( visibility = ["//visibility:public"], ) -# The interface library (tensorflow_framework.dll.if.lib) for linking tensorflow DLL library -# (tensorflow_framework.dll) on Windows. +# The interface library (tensorflow_framework.dll.if.lib) for linking tensorflow DLL +# library (tensorflow_framework.dll) on Windows. # To learn more about import library (called interface library in Bazel): # https://docs.microsoft.com/en-us/cpp/build/linking-an-executable-to-a-dll?view=vs-2017#linking-implicitly filegroup( @@ -893,8 +893,8 @@ filegroup( visibility = ["//visibility:public"], ) -# Rename the import library for tensorflow_framework.dll from tensorflow_framework.dll.if.lib to -# tensorflow_framework.lib +# Rename the import library for tensorflow_framework.dll from +# tensorflow_framework.dll.if.lib to tensorflow_framework.lib genrule( name = "tensorflow_framework_dll_import_lib", srcs = [":get_tensorflow_framework_dll_import_lib"], From 544771ff261d80e9ca831d9b18d1690d4e00bf7d Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Thu, 6 Aug 2020 14:46:43 -0700 Subject: [PATCH 2291/2522] Add a static `getDialectNamespace()` method on MLIR Dialect This is now a requirement from the framework. PiperOrigin-RevId: 325315389 Change-Id: Ia8b8641d208caabd861c7ef1f63a99cbbd4dac8e --- tensorflow/compiler/mlir/tensorflow/ir/tf_device.h | 1 + tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h | 1 + 2 files changed, 2 insertions(+) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_device.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_device.h index d1ca07d85a7..688c8ca5715 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_device.h +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_device.h @@ -36,6 +36,7 @@ namespace tf_device { // XlaRun. class TensorFlowDeviceDialect : public Dialect { public: + static StringRef getDialectNamespace() { return "tf_device"; } // Constructing TensorFlowDevice dialect under an non-null MLIRContext. explicit TensorFlowDeviceDialect(MLIRContext* context); }; diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h index 3bb30f16c3d..61358172d6d 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h @@ -35,6 +35,7 @@ namespace tf_executor { class TensorFlowExecutorDialect : public Dialect { public: + static StringRef getDialectNamespace() { return "tf_executor"; } explicit TensorFlowExecutorDialect(MLIRContext *context); // Parses a type registered to this dialect. From e75098f34e92a3538876e0b636785e2f413e472d Mon Sep 17 00:00:00 2001 From: Mehmet Deveci Date: Thu, 6 Aug 2020 14:48:07 -0700 Subject: [PATCH 2292/2522] Adding a file name suffix option to event file names to prevent event file name collusions. PiperOrigin-RevId: 325315695 Change-Id: I9020e0ab52ea8e86b1f2dd852ff285c8a27a3ddb --- tensorflow/python/tpu/tensor_tracer.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/tpu/tensor_tracer.py b/tensorflow/python/tpu/tensor_tracer.py index c0536d84182..3f8f7530a8d 100644 --- a/tensorflow/python/tpu/tensor_tracer.py +++ b/tensorflow/python/tpu/tensor_tracer.py @@ -44,6 +44,7 @@ from tensorflow.python.ops import logging_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_impl from tensorflow.python.ops import state_ops +from tensorflow.python.ops import string_ops from tensorflow.python.ops import summary_ops_v2 as summary from tensorflow.python.ops import variable_scope from tensorflow.python.platform import analytics @@ -1643,11 +1644,12 @@ class TensorTracer(object): raise ValueError('Provide a trace_dir for tensor tracer in summary mode. ' '--trace_dir=/model/dir') - def _write_cache(step, **kwargs): + def _write_cache(step, event_file_suffix=None, **kwargs): """Writes the given caches as tensor summary. Args: step: Step tensor with dimension [num_cores]. + event_file_suffix: Event filename suffix tensor. **kwargs: The dictionary of tensors that needs to be written as summaries. Key and value pairs within kwargs correspond to the tag name, and tensor content that will be written using summary.write. @@ -1664,16 +1666,20 @@ class TensorTracer(object): Raises: RuntimeError: if there is no aggregate function defined for a signature. """ - + file_suffix = _TT_EVENT_FILE_SUFFIX + if event_file_suffix is not None: + file_suffix = string_ops.string_join([file_suffix, event_file_suffix], + separator='.') # TODO(deveci): Parametrize max_queue, so that flushing op can be called # less frequently. # Setting max_queue to 100 appears to be safe even when the number of # iterations are much lower, as the destructor of the writer flushes it. summary_write_ops = [] - with summary.create_file_writer_v2( + summary_writer = summary.create_file_writer_v2( self._parameters.trace_dir, - filename_suffix=_TT_EVENT_FILE_SUFFIX, - max_queue=_TT_SUMMARY_MAX_QUEUE).as_default(): + filename_suffix=file_suffix, + max_queue=_TT_SUMMARY_MAX_QUEUE) + with summary_writer.as_default(): summary_metadata = summary_pb2.SummaryMetadata( plugin_data=summary_pb2.SummaryMetadata.PluginData( plugin_name=_TT_TENSORBOARD_PLUGIN_NAME)) @@ -1688,8 +1694,7 @@ class TensorTracer(object): if key == _TT_SUMMARY_TAG and value.shape.as_list()[0] != 1: value = self.aggregate_global_cache(value) - with ops.control_dependencies( - summary.summary_writer_initializer_op()): + with ops.control_dependencies([summary_writer.init()]): summary_write_ops.append(summary.write( _TT_SUMMARY_TAG + '/' + key, value, metadata=summary_metadata, step=step[0])) From aca9c898732ab1ef08aaebbacce4f1a82cda2fcb Mon Sep 17 00:00:00 2001 From: Ayush Dubey Date: Thu, 6 Aug 2020 14:55:57 -0700 Subject: [PATCH 2293/2522] Return `errors::Aborted` from ScopedAllocatorOptimizer when it cannot optimize a valid graph. PiperOrigin-RevId: 325317272 Change-Id: I97c9d51cffbf08cb44f8078357530b83a8061c0e --- .../optimizers/scoped_allocator_optimizer.cc | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc index 6fb62019806..3f33ff50f6c 100644 --- a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc @@ -104,20 +104,20 @@ Status CheckTypesAndGetShapes(const GraphProperties& graph_properties, << shapes->size(); if (!graph_properties.HasOutputProperties(n->name())) { LOG(ERROR) << "Node " << n->DebugString() << " lacks output shape."; - return errors::Internal("Node ", n->name(), " lacks output shape."); + return errors::Aborted("Node ", n->name(), " lacks output shape."); } const std::vector& prop_list = graph_properties.GetOutputProperties(n->name()); if (prop_list.size() != 1) { - return errors::Internal("Node ", n->name(), - " does not have exactly one output as expected " - "by ScopedAllocatorOptimizer"); + return errors::Aborted("Node ", n->name(), + " does not have exactly one output as expected " + "by ScopedAllocatorOptimizer"); } const OpInfo::TensorProperties& props = prop_list[0]; if (shapes->empty()) { *type = props.dtype(); } else if (*type != props.dtype()) { - return errors::Internal("Group ops don't all have same type"); + return errors::Aborted("Group ops don't all have same type"); } if (*type != dtype) { return errors::Internal( @@ -128,7 +128,7 @@ Status CheckTypesAndGetShapes(const GraphProperties& graph_properties, // TensorShape::IsValid may return true if unknown_rank is True, i.e. // number of dimensions is unknown. But for ScopedAllocatorOptimizer we // need to know the shape fully. - return errors::Internal("Complete shape not known for ", n->name()); + return errors::Aborted("Complete shape not known for ", n->name()); } VLOG(2) << "Adding shape " << props.shape().DebugString(); shapes->push_back(TensorShape(props.shape())); @@ -301,8 +301,8 @@ Status GetInputs(ScopedAllocatorOptimizer* sa_opti, int64 invocation_count, GetOutputDataType(inode_output_props, output_index, &inode_dtype)); } if (inode_dtype != dtype) { - return errors::Internal("ScopedAllocatorOptimizer expected input type ", - dtype, " but found ", inode_dtype); + return errors::Aborted("ScopedAllocatorOptimizer expected input type ", + dtype, " but found ", inode_dtype); } inputs->emplace_back(inode, output_index, n); } @@ -393,7 +393,7 @@ class UnaryElementwiseRewriter : public ScopedAllocatorOptimizer::Rewriter { LOG(INFO) << "Abandoning ScopedAllocatorOptimizer because input " << nd.from_node_def->name() << " output " << scope_ids[0] << " is already assigned to scope_id " << scope_ids[1]; - return errors::Internal( + return errors::Aborted( "Abandoning ScopedAllocatorOptimizer because input ", nd.from_node_def->name(), " output ", scope_ids[0], " is already ", "assigned to scope_id ", scope_ids[1]); @@ -408,10 +408,10 @@ class UnaryElementwiseRewriter : public ScopedAllocatorOptimizer::Rewriter { for (const InputDesc& nd : inputs) { if (op_set.find(nd.from_node_def->name()) != op_set.end()) { if (nd.output_slot != tensorflow::Graph::kControlSlot) { - return errors::Internal("Data edge exists between ", - nd.from_node_def->name(), - " and another " - "node in the set"); + return errors::Aborted("Data edge exists between ", + nd.from_node_def->name(), + " and another " + "node in the set"); } } } @@ -539,7 +539,7 @@ class UnaryElementwiseRewriter : public ScopedAllocatorOptimizer::Rewriter { for (int i = 0, end = inputs.size(); i < end; ++i) { auto& nd = inputs[i]; if (IsArg(*nd.from_node_def)) { - return errors::Internal( + return errors::Aborted( "ScopedAllocatorOptimizer does not work well when the op inputs " "are _Arg ops; skipping this optimizer for this function"); } @@ -619,9 +619,9 @@ class UnaryElementwiseRewriter : public ScopedAllocatorOptimizer::Rewriter { if (op_instance_names.find(old_op_input) != op_instance_names.end()) { LOG(ERROR) << "Data edge between " << old_op_input << " and " << old_op->name() << " cannot build ScopedAllocator."; - return errors::Internal("Data edge between ", old_op_input, " and ", - old_op->name(), - " cannot build ScopedAllocator."); + return errors::Aborted("Data edge between ", old_op_input, " and ", + old_op->name(), + " cannot build ScopedAllocator."); } sac_inputs->push_back( NodeDefBuilder::NodeOut(old_op_input, 0, dtype)); @@ -952,7 +952,7 @@ int ScopedAllocatorOptimizer::NewScopedAllocatorId(int num_fields) { Status ScopedAllocatorOptimizer::NewIdentityId(int* id) { *id = next_identity_id_++; if (next_identity_id_ < 0) { - return errors::Internal("NewIdentityId overflow"); + return errors::Aborted("NewIdentityId overflow"); } return Status::OK(); } From 50145eeeb12f5023efab8fea27e6f94953d44299 Mon Sep 17 00:00:00 2001 From: Sachin Joglekar Date: Thu, 6 Aug 2020 15:07:26 -0700 Subject: [PATCH 2294/2522] Remove NodeInfoDelegate from calibrator & inspect Interpreter directly. It breaks the contract that delegates must outlive Interpreter. PiperOrigin-RevId: 325319661 Change-Id: Ia33c0e67721e35347ae859f82d8e3bf0d17d1c2d --- .../lite/tools/optimize/calibration/BUILD | 36 ---- .../tools/optimize/calibration/calibrator.cc | 25 +-- .../calibration/node_info_delegate.cc | 69 ------- .../optimize/calibration/node_info_delegate.h | 67 ------- .../calibration/node_info_delegate_test.cc | 178 ------------------ 5 files changed, 13 insertions(+), 362 deletions(-) delete mode 100644 tensorflow/lite/tools/optimize/calibration/node_info_delegate.cc delete mode 100644 tensorflow/lite/tools/optimize/calibration/node_info_delegate.h delete mode 100644 tensorflow/lite/tools/optimize/calibration/node_info_delegate_test.cc diff --git a/tensorflow/lite/tools/optimize/calibration/BUILD b/tensorflow/lite/tools/optimize/calibration/BUILD index 06183353e44..674ef0ae4f6 100644 --- a/tensorflow/lite/tools/optimize/calibration/BUILD +++ b/tensorflow/lite/tools/optimize/calibration/BUILD @@ -41,7 +41,6 @@ cc_library( ":calibration_reader", ":logging_op", ":logging_op_resolver", - ":node_info_delegate", "//tensorflow/lite:framework", "//tensorflow/lite:string_util", "//tensorflow/lite/c:common", @@ -156,39 +155,4 @@ cc_library( ], ) -cc_library( - name = "node_info_delegate", - srcs = ["node_info_delegate.cc"], - hdrs = ["node_info_delegate.h"], - copts = tflite_copts(), - deps = [ - ":calibration_common", - "//tensorflow/lite:framework", - ], -) - -tf_cc_test( - name = "node_info_delegate_test", - srcs = ["node_info_delegate_test.cc"], - args = [ - "--test_model_file=$(location //tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin)", - ], - data = [ - "//tensorflow/lite/tools/optimize:testdata/single_conv_weights_min_0_max_plus_10.bin", - ], - tags = [ - "tflite_not_portable_android", - "tflite_not_portable_ios", - ], - deps = [ - ":node_info_delegate", - "//tensorflow/core:framework_internal", - "//tensorflow/core:lib", - "//tensorflow/lite:framework", - "//tensorflow/lite/kernels:builtin_ops", - "//tensorflow/lite/tools/optimize:test_util", - "@com_google_googletest//:gtest", - ], -) - tflite_portable_test_suite() diff --git a/tensorflow/lite/tools/optimize/calibration/calibrator.cc b/tensorflow/lite/tools/optimize/calibration/calibrator.cc index fb1677fda99..c82057ec207 100644 --- a/tensorflow/lite/tools/optimize/calibration/calibrator.cc +++ b/tensorflow/lite/tools/optimize/calibration/calibrator.cc @@ -39,7 +39,6 @@ limitations under the License. #include "tensorflow/lite/tools/optimize/calibration/calibration_reader.h" #include "tensorflow/lite/tools/optimize/calibration/logging_op.h" #include "tensorflow/lite/tools/optimize/calibration/logging_op_resolver.h" -#include "tensorflow/lite/tools/optimize/calibration/node_info_delegate.h" namespace tflite { namespace optimize { @@ -267,18 +266,20 @@ TfLiteStatus GetNodeOpInfoMapAndContext( const std::unordered_map& node_to_opinfo, tflite::Interpreter* const interpreter, std::unordered_map* node_ptr_opinfo_map, - const TfLiteContext** context) { - NodeInfoDelegateObserver delegate_observer(node_to_opinfo, - node_ptr_opinfo_map); - NodeInfoDelegateParams delegate_params; - delegate_params.delegate_observer = &delegate_observer; - TfLiteDelegate logging_delegate = CreateNodeInfoDelegate(&delegate_params); + TfLiteContext** context) { + *context = interpreter->primary_subgraph().context(); - auto modify_status = interpreter->ModifyGraphWithDelegate(&logging_delegate); - if (modify_status != kTfLiteOk) { - return kTfLiteError; + // Since we only consider the primary subgraph while populating + // node_to_opinfo, do the same here. + TF_LITE_ENSURE_EQ(*context, interpreter->execution_plan().size(), + node_to_opinfo.size()); + for (const auto op_index : interpreter->execution_plan()) { + const auto* node_and_reg = interpreter->node_and_registration(op_index); + + auto op_info = node_to_opinfo.at(op_index); + op_info.registration = &node_and_reg->second; + node_ptr_opinfo_map->insert({&node_and_reg->first, op_info}); } - *context = delegate_observer.GetContext(); return kTfLiteOk; } @@ -391,7 +392,7 @@ TfLiteStatus BuildLoggingInterpreter( // Compute the mapping between runtime and static graph structure, i.e. // (TfLiteContext, TfLiteNode) -> OperatorInfo std::unordered_map node_ptr_opinfo_map; - const TfLiteContext* context = nullptr; + TfLiteContext* context = nullptr; GetNodeOpInfoMapAndContext(node_to_opinfo, interpreter->get(), &node_ptr_opinfo_map, &context); diff --git a/tensorflow/lite/tools/optimize/calibration/node_info_delegate.cc b/tensorflow/lite/tools/optimize/calibration/node_info_delegate.cc deleted file mode 100644 index 84031761b30..00000000000 --- a/tensorflow/lite/tools/optimize/calibration/node_info_delegate.cc +++ /dev/null @@ -1,69 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/lite/tools/optimize/calibration/node_info_delegate.h" - -namespace tflite { -namespace optimize { -namespace calibration { - -namespace { -// The prepare function for delegate that forwards the prepare call to the -// delegate observer in node info delegate params. -// The function simply calls a delegate observer OnDelegatePrepareMethod. -TfLiteStatus NodeInfoDelegatePrepare(TfLiteContext* context, - TfLiteDelegate* delegate) { - if (delegate == nullptr) return TfLiteStatus::kTfLiteError; - - NodeInfoDelegateParams* params = - reinterpret_cast(delegate->data_); - return params->delegate_observer->OnDelegatePrepareCalled(context); -} -} // namespace - -TfLiteDelegate CreateNodeInfoDelegate(NodeInfoDelegateParams* params) { - auto delegate = TfLiteDelegateCreate(); - delegate.data_ = params; - delegate.Prepare = NodeInfoDelegatePrepare; - delegate.CopyFromBufferHandle = nullptr; - delegate.CopyToBufferHandle = nullptr; - delegate.FreeBufferHandle = nullptr; - delegate.flags = kTfLiteDelegateFlagsAllowDynamicTensors; - return delegate; -} - -TfLiteStatus NodeInfoDelegateObserver::OnDelegatePrepareCalled( - TfLiteContext* context) { - context_ = context; - const size_t num_nodes = node_index_opinfo_map_.size(); - for (size_t node_index = 0; node_index < num_nodes; node_index++) { - TfLiteNode* node = nullptr; - TfLiteRegistration* reg = nullptr; - TF_LITE_ENSURE_STATUS( - context->GetNodeAndRegistration(context, node_index, &node, ®)); - auto op_info = node_index_opinfo_map_.at(node_index); - op_info.registration = reg; - node_ptr_opinfo_map_->insert({node, op_info}); - } - - if (node_ptr_opinfo_map_->size() != node_index_opinfo_map_.size()) { - // Something wrong. - return kTfLiteError; - } - return kTfLiteOk; -} - -} // namespace calibration -} // namespace optimize -} // namespace tflite diff --git a/tensorflow/lite/tools/optimize/calibration/node_info_delegate.h b/tensorflow/lite/tools/optimize/calibration/node_info_delegate.h deleted file mode 100644 index 56f6141f21d..00000000000 --- a/tensorflow/lite/tools/optimize/calibration/node_info_delegate.h +++ /dev/null @@ -1,67 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_LITE_TOOLS_OPTIMIZE_NODE_INFO_DELEGATE_H_ -#define TENSORFLOW_LITE_TOOLS_OPTIMIZE_NODE_INFO_DELEGATE_H_ - -#include - -#include "tensorflow/lite/context.h" -#include "tensorflow/lite/tools/optimize/calibration/calibration_common.h" - -namespace tflite { -namespace optimize { -namespace calibration { - -// An interface for delegate observer that can listen to TfLiteDelegate::Prepare -// calls. -class DelegateObserver { - public: - virtual TfLiteStatus OnDelegatePrepareCalled(TfLiteContext* context) = 0; - virtual ~DelegateObserver() {} -}; - -// The parameters for the node info delegate. -struct NodeInfoDelegateParams { - DelegateObserver* delegate_observer; -}; - -// Creates a delegate with the given |params|. -TfLiteDelegate CreateNodeInfoDelegate(NodeInfoDelegateParams* params); - -// A delegate observer that can construct the map from TfLiteNode* -> -// OperatorInfo. -class NodeInfoDelegateObserver : public DelegateObserver { - public: - NodeInfoDelegateObserver( - const std::unordered_map& node_index_to_op, - std::unordered_map* node_ptr_opinfo_map) - : node_index_opinfo_map_(node_index_to_op), - node_ptr_opinfo_map_(node_ptr_opinfo_map) {} - - TfLiteStatus OnDelegatePrepareCalled(TfLiteContext* context) override; - - // Returns the context that was used to called the prepare method. - const TfLiteContext* GetContext() const { return context_; } - - private: - const TfLiteContext* context_ = nullptr; - const std::unordered_map& node_index_opinfo_map_; - std::unordered_map* node_ptr_opinfo_map_; -}; - -} // namespace calibration -} // namespace optimize -} // namespace tflite -#endif // TENSORFLOW_LITE_TOOLS_OPTIMIZE_NODE_INFO_DELEGATE_H_ diff --git a/tensorflow/lite/tools/optimize/calibration/node_info_delegate_test.cc b/tensorflow/lite/tools/optimize/calibration/node_info_delegate_test.cc deleted file mode 100644 index 722bdbdbb39..00000000000 --- a/tensorflow/lite/tools/optimize/calibration/node_info_delegate_test.cc +++ /dev/null @@ -1,178 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include - -#include -#include -#include "tensorflow/core/lib/io/path.h" -#include "tensorflow/core/platform/init_main.h" -#include "tensorflow/core/util/command_line_flags.h" -#include "tensorflow/lite/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/tools/optimize/calibration/node_info_delegate.h" -#include "tensorflow/lite/tools/optimize/test_util.h" - -namespace { -tensorflow::string* g_test_model_dir = nullptr; -} // namespace - -namespace tflite { -namespace optimize { -namespace calibration { -namespace { - -std::unique_ptr ReadModel(const char* model) { - auto model_path = tensorflow::io::JoinPath(*g_test_model_dir, model); - return FlatBufferModel::BuildFromFile(model_path.c_str()); -} - -std::unique_ptr ReadModel() { - return ReadModel(internal::kConvModelWith0Plus10Weights); -} - -class TestDelegateObserver : public DelegateObserver { - public: - explicit TestDelegateObserver(TfLiteStatus status_to_return) - : status_to_return_(status_to_return) {} - - TfLiteStatus OnDelegatePrepareCalled(TfLiteContext* context) override { - num_times_called_++; - return status_to_return_; - } - int num_times_called() { return num_times_called_; } - - private: - int num_times_called_ = 0; - TfLiteStatus status_to_return_; -}; - -TEST(NodeInfoDelegateTest, DelegateObserverIsCalled) { - TestDelegateObserver observer(kTfLiteOk); - NodeInfoDelegateParams params; - params.delegate_observer = &observer; - auto model = ReadModel(); - ASSERT_TRUE(model); - std::unique_ptr interpreter; - ASSERT_EQ(InterpreterBuilder(*model, - ops::builtin::BuiltinOpResolver{})(&interpreter), - kTfLiteOk); - ASSERT_TRUE(interpreter); - EXPECT_EQ(0, observer.num_times_called()); - TfLiteDelegate delegate = CreateNodeInfoDelegate(¶ms); - - auto status = interpreter->ModifyGraphWithDelegate(&delegate); - EXPECT_EQ(kTfLiteOk, status); - EXPECT_EQ(1, observer.num_times_called()); -} - -TEST(NodeInfoDelegateTest, ObserverErrorCausesModifyGraphFailure) { - // Observer returns error - TestDelegateObserver observer(kTfLiteError); - NodeInfoDelegateParams params; - params.delegate_observer = &observer; - auto model = ReadModel(); - ASSERT_TRUE(model); - std::unique_ptr interpreter; - ASSERT_EQ(InterpreterBuilder(*model, - ops::builtin::BuiltinOpResolver{})(&interpreter), - kTfLiteOk); - ASSERT_TRUE(interpreter); - TfLiteDelegate delegate = CreateNodeInfoDelegate(¶ms); - - auto status = interpreter->ModifyGraphWithDelegate(&delegate); - EXPECT_EQ(kTfLiteDelegateError, status); -} - -TEST(NodeInfoDelegateTest, NodeInfoDelegateObserver) { - auto model = ReadModel(); - ASSERT_TRUE(model); - - std::unordered_map index_to_opinfo; - auto primary_subgraph = model->GetModel()->subgraphs()->Get(0); - auto operators = primary_subgraph->operators(); - auto subgraph_tensors = primary_subgraph->tensors(); - for (size_t i = 0; i < operators->size(); i++) { - OperatorInfo info; - auto op_inputs = operators->Get(i)->inputs(); - auto op_outputs = operators->Get(i)->outputs(); - info.inputs = std::vector(op_inputs->begin(), op_inputs->end()); - info.outputs = std::vector(op_outputs->begin(), op_outputs->end()); - index_to_opinfo[i] = info; - } - - std::unordered_map node_to_opinfo; - NodeInfoDelegateObserver observer(index_to_opinfo, &node_to_opinfo); - NodeInfoDelegateParams params; - params.delegate_observer = &observer; - std::unique_ptr interpreter; - ASSERT_EQ(InterpreterBuilder(*model, - ops::builtin::BuiltinOpResolver{})(&interpreter), - kTfLiteOk); - ASSERT_TRUE(interpreter); - - TfLiteDelegate delegate = CreateNodeInfoDelegate(¶ms); - - auto status = interpreter->ModifyGraphWithDelegate(&delegate); - EXPECT_EQ(kTfLiteOk, status); - EXPECT_EQ(index_to_opinfo.size(), node_to_opinfo.size()); - EXPECT_EQ(interpreter->nodes_size(), node_to_opinfo.size()); - - for (const auto& node_and_opinfo : node_to_opinfo) { - const TfLiteNode* tflite_node = node_and_opinfo.first; - const OperatorInfo& info = node_and_opinfo.second; - ASSERT_EQ(tflite_node->inputs->size, info.inputs.size()); - ASSERT_EQ(tflite_node->outputs->size, info.outputs.size()); - - for (size_t input_index = 0; input_index < info.inputs.size(); - input_index++) { - const TfLiteTensor* tflite_tensor = - interpreter->tensor(tflite_node->inputs->data[input_index]); - EXPECT_EQ(tflite_tensor->name, - subgraph_tensors->Get(info.inputs[input_index])->name()->str()); - } - - for (size_t output_index = 0; output_index < info.outputs.size(); - output_index++) { - const TfLiteTensor* tflite_tensor = - interpreter->tensor(tflite_node->outputs->data[output_index]); - EXPECT_EQ( - tflite_tensor->name, - subgraph_tensors->Get(info.outputs[output_index])->name()->str()); - } - } -} - -} // namespace -} // namespace calibration -} // namespace optimize -} // namespace tflite - -int main(int argc, char** argv) { - tensorflow::string model_file; - const std::vector flag_list = { - tensorflow::Flag("test_model_file", &model_file, - "Path to test tflite model file."), - }; - - const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); - if (!parse_result) { - std::cerr << "Required test_model_file\n"; - std::abort(); - } - g_test_model_dir = - new tensorflow::string(tensorflow::io::Dirname(model_file)); - ::tensorflow::port::InitMain(argv[0], &argc, &argv); - return RUN_ALL_TESTS(); -} From 9c8581efc5c58f09924d86e4ede497223f854d1e Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Thu, 6 Aug 2020 22:45:17 +0000 Subject: [PATCH 2295/2522] fix build --- tensorflow/core/framework/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/framework/BUILD b/tensorflow/core/framework/BUILD index e09022d5235..f0dfa267d77 100644 --- a/tensorflow/core/framework/BUILD +++ b/tensorflow/core/framework/BUILD @@ -291,6 +291,7 @@ filegroup( "resource_handle.h", "tensor.cc", "tensor.h", + "tensor_key.h", "tensor_shape.cc", "tensor_shape.h", "tensor_types.h", From c7e51d1866fa0f7ca43a0eb83d4072a200ab7946 Mon Sep 17 00:00:00 2001 From: Penporn Koanantakool Date: Thu, 6 Aug 2020 15:48:50 -0700 Subject: [PATCH 2296/2522] Refactor meta_support out of two targets ("quantized_ops" and "cwise_lib") that could possibly be included in the same binary and cause multiple definition errors. meta_support.h will be removed from all MKL targets in a later commit. PiperOrigin-RevId: 325327446 Change-Id: I89c25be8f9cb587b005cd68d32d3078ad9b8829c --- tensorflow/core/kernels/BUILD | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index dfe9f35701c..e5e2ad38d9b 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -6096,6 +6096,19 @@ cc_library( ], ) +cc_library( + name = "meta_support", + srcs = ["meta_support.cc"], + hdrs = ["meta_support.h"], + deps = [ + ":quantization_utils", + "//tensorflow/core:framework", + "//tensorflow/core/platform:logging", + "//tensorflow/core/platform:mutex", + "@gemmlowp", + ], +) + # Android libraries ----------------------------------------------------------- # Changes to the Android srcs here should be replicated in @@ -6867,7 +6880,6 @@ tf_kernel_library( name = "quantized_ops", srcs = [ "dequantize_op.cc", - "meta_support.cc", "quantize_down_and_shrink_range.cc", "quantize_op.cc", "quantized_activation_ops.cc", @@ -6886,16 +6898,14 @@ tf_kernel_library( "requantize.cc", "reshape_op.h", ], - hdrs = [ - "meta_support.h", - "reference_gemm.h", - ], + hdrs = ["reference_gemm.h"], deps = [ ":concat_lib_hdrs", ":conv_ops", ":cwise_op", ":eigen_helpers", ":image_resizer_state", + ":meta_support", ":ops_util", ":pooling_ops", ":quantization_utils", @@ -8264,10 +8274,7 @@ tf_kernel_library( # should not be linked by projects that also link the cwise_op library. cc_library( name = "cwise_lib", - srcs = [ - "cwise_ops_common.cc", - "meta_support.cc", - ], + srcs = ["cwise_ops_common.cc"], hdrs = [ "cwise_ops.h", "cwise_ops_common.h", @@ -8275,10 +8282,10 @@ cc_library( "cwise_ops_gpu_gradients.cu.h", "cwise_ops_gradients.h", "fill_functor.h", - "meta_support.h", ], deps = [ ":bounds_check", + ":meta_support", ":quantization_utils", "//tensorflow/core:framework", "//tensorflow/core:lib", From 41622e7754d0103e18b12cd3756e35c7d7953d96 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Thu, 6 Aug 2020 15:49:03 -0700 Subject: [PATCH 2297/2522] Clone nightly jobs for testing CUDA 11 PiperOrigin-RevId: 325327481 Change-Id: Iba28b227125f83527ee8b4bffd947b54fd9f2006 --- .../rel/ubuntu_cuda11/cpu_libtensorflow.sh | 40 ++++++++++++ .../rel/ubuntu_cuda11/cpu_py35_nonpip.sh | 48 +++++++++++++++ .../rel/ubuntu_cuda11/cpu_py35_pip.sh | 47 ++++++++++++++ .../rel/ubuntu_cuda11/cpu_py36_nonpip.sh | 48 +++++++++++++++ .../rel/ubuntu_cuda11/cpu_py36_pip.sh | 47 ++++++++++++++ .../rel/ubuntu_cuda11/cpu_py37_nonpip.sh | 48 +++++++++++++++ .../rel/ubuntu_cuda11/cpu_py37_pip.sh | 47 ++++++++++++++ .../rel/ubuntu_cuda11/cpu_py38_nonpip.sh | 48 +++++++++++++++ .../rel/ubuntu_cuda11/cpu_py38_pip.sh | 47 ++++++++++++++ .../rel/ubuntu_cuda11/gpu_libtensorflow.sh | 40 ++++++++++++ .../rel/ubuntu_cuda11/gpu_pip_on_cpu.sh | 61 +++++++++++++++++++ .../rel/ubuntu_cuda11/gpu_py35_nonpip.sh | 60 ++++++++++++++++++ .../rel/ubuntu_cuda11/gpu_py35_pip.sh | 55 +++++++++++++++++ .../rel/ubuntu_cuda11/gpu_py36_nonpip.sh | 60 ++++++++++++++++++ .../rel/ubuntu_cuda11/gpu_py36_pip.sh | 55 +++++++++++++++++ .../rel/ubuntu_cuda11/gpu_py37_nonpip.sh | 60 ++++++++++++++++++ .../rel/ubuntu_cuda11/gpu_py37_pip.sh | 55 +++++++++++++++++ .../rel/ubuntu_cuda11/gpu_py38_nonpip.sh | 60 ++++++++++++++++++ .../rel/ubuntu_cuda11/gpu_py38_pip.sh | 55 +++++++++++++++++ .../ci_build/rel/ubuntu_cuda11/sanity.sh | 36 +++++++++++ 20 files changed, 1017 insertions(+) create mode 100644 tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_libtensorflow.sh create mode 100644 tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py35_nonpip.sh create mode 100644 tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py35_pip.sh create mode 100644 tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py36_nonpip.sh create mode 100644 tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py36_pip.sh create mode 100644 tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py37_nonpip.sh create mode 100644 tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py37_pip.sh create mode 100644 tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py38_nonpip.sh create mode 100644 tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py38_pip.sh create mode 100644 tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_libtensorflow.sh create mode 100755 tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_pip_on_cpu.sh create mode 100644 tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py35_nonpip.sh create mode 100644 tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py35_pip.sh create mode 100644 tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py36_nonpip.sh create mode 100644 tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py36_pip.sh create mode 100644 tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py37_nonpip.sh create mode 100644 tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py37_pip.sh create mode 100644 tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py38_nonpip.sh create mode 100644 tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py38_pip.sh create mode 100644 tensorflow/tools/ci_build/rel/ubuntu_cuda11/sanity.sh diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_libtensorflow.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_libtensorflow.sh new file mode 100644 index 00000000000..a0e3a7f4594 --- /dev/null +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_libtensorflow.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e + +# Source the external common scripts. +source tensorflow/tools/ci_build/release/common.sh + + +# Install latest bazel +install_bazelisk +which bazel + +# Install realpath +sudo apt-get install realpath + +# Update the version string to nightly +if [ -n "${IS_NIGHTLY_BUILD}" ]; then + ./tensorflow/tools/ci_build/update_version.py --nightly +fi + +./tensorflow/tools/ci_build/linux/libtensorflow.sh + +# Copy the nightly version update script +if [ -n "${IS_NIGHTLY_BUILD}" ]; then + cp tensorflow/tools/ci_build/builds/libtensorflow_nightly_symlink.sh lib_package +fi + diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py35_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py35_nonpip.sh new file mode 100644 index 00000000000..fee64f0beb1 --- /dev/null +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py35_nonpip.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh + +install_ubuntu_16_pip_deps pip3.5 +# Update bazel +install_bazelisk + +# Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' +export PYTHON_BIN_PATH=$(which python3.5) +export TF2_BEHAVIOR=1 +yes "" | "$PYTHON_BIN_PATH" configure.py +tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py35,-v1only" + +# Get the default test targets for bazel. +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh + +# Run tests +set +e +bazel test --test_output=errors --config=opt --test_lang_filters=py \ + --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ + --linkopt=-lrt \ + --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \ + --build_tag_filters="${tag_filters}" \ + --test_tag_filters="${tag_filters}" -- \ + ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... +test_xml_summary_exit diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py35_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py35_pip.sh new file mode 100644 index 00000000000..bdbb7f15e34 --- /dev/null +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py35_pip.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh + +install_ubuntu_16_pip_deps pip3.5 +# Update bazel +install_bazelisk + +# Export required variables for running pip.sh +export OS_TYPE="UBUNTU" +export CONTAINER_TYPE="CPU" +export TF_PYTHON_VERSION='python3.5' + +# Run configure. +export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) +yes "" | "$PYTHON_BIN_PATH" configure.py + +# Get the default test targets for bazel. +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh + +# Export optional variables for running pip.sh +export TF_BUILD_FLAGS="--config=release_cpu_linux" +export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" +export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " +export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" +export TF_TEST_FILTER_TAGS='-no_oss,-oss_serial,-no_oss_py35,-v1only' +#export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. +export TF_PROJECT_NAME="tensorflow_cpu" +export TF_PIP_TEST_ROOT="pip_test" + +./tensorflow/tools/ci_build/builds/pip_new.sh diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py36_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py36_nonpip.sh new file mode 100644 index 00000000000..6b05141f00f --- /dev/null +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py36_nonpip.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh + +install_ubuntu_16_pip_deps pip3.6 +# Update bazel +install_bazelisk + +# Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' +export PYTHON_BIN_PATH=$(which python3.6) +export TF2_BEHAVIOR=1 +yes "" | "$PYTHON_BIN_PATH" configure.py +tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py36,-v1only" + +# Get the default test targets for bazel. +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh + +# Run tests +set +e +bazel test --test_output=errors --config=opt --test_lang_filters=py \ + --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ + --linkopt=-lrt \ + --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \ + --build_tag_filters="${tag_filters}" \ + --test_tag_filters="${tag_filters}" -- \ + ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... +test_xml_summary_exit diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py36_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py36_pip.sh new file mode 100644 index 00000000000..6277291043c --- /dev/null +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py36_pip.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh + +install_ubuntu_16_pip_deps pip3.6 +# Update bazel +install_bazelisk + +# Export required variables for running pip.sh +export OS_TYPE="UBUNTU" +export CONTAINER_TYPE="CPU" +export TF_PYTHON_VERSION='python3.6' + +# Run configure. +export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) +yes "" | "$PYTHON_BIN_PATH" configure.py + +# Get the default test targets for bazel. +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh + +# Export optional variables for running pip.sh +export TF_BUILD_FLAGS="--config=release_cpu_linux" +export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" +export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " +export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" +export TF_TEST_FILTER_TAGS='-no_oss,-oss_serial,-no_oss_py36,-v1only' +#export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. +export TF_PROJECT_NAME="tensorflow_cpu" +export TF_PIP_TEST_ROOT="pip_test" + +./tensorflow/tools/ci_build/builds/pip_new.sh diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py37_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py37_nonpip.sh new file mode 100644 index 00000000000..db0c6056b6c --- /dev/null +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py37_nonpip.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh + +install_ubuntu_16_pip_deps pip3.7 +# Update bazel +install_bazelisk + +# Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' +export PYTHON_BIN_PATH=$(which python3.7) +export TF2_BEHAVIOR=1 +yes "" | "$PYTHON_BIN_PATH" configure.py +tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py37,-v1only" + +# Get the default test targets for bazel. +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh + +# Run tests +set +e +bazel test --test_output=errors --config=opt --test_lang_filters=py \ + --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ + --linkopt=-lrt \ + --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \ + --build_tag_filters="${tag_filters}" \ + --test_tag_filters="${tag_filters}" -- \ + ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... +test_xml_summary_exit diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py37_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py37_pip.sh new file mode 100644 index 00000000000..ff88ae46f39 --- /dev/null +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py37_pip.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh + +install_ubuntu_16_pip_deps pip3.7 +# Update bazel +install_bazelisk + +# Export required variables for running pip.sh +export OS_TYPE="UBUNTU" +export CONTAINER_TYPE="CPU" +export TF_PYTHON_VERSION='python3.7' + +# Run configure. +export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) +yes "" | "$PYTHON_BIN_PATH" configure.py + +# Get the default test targets for bazel. +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh + +# Export optional variables for running pip.sh +export TF_BUILD_FLAGS="--config=release_cpu_linux" +export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" +export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " +export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" +export TF_TEST_FILTER_TAGS='-no_oss,-oss_serial,-no_oss_py37,-v1only' +#export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. +export TF_PROJECT_NAME="tensorflow_cpu" +export TF_PIP_TEST_ROOT="pip_test" + +./tensorflow/tools/ci_build/builds/pip_new.sh diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py38_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py38_nonpip.sh new file mode 100644 index 00000000000..36da30167d0 --- /dev/null +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py38_nonpip.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh + +install_ubuntu_16_pip_deps pip3.8 +# Update bazel +install_bazelisk + +# Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' +export PYTHON_BIN_PATH=$(which python3.8) +export TF2_BEHAVIOR=1 +yes "" | "$PYTHON_BIN_PATH" configure.py +tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py38,-v1only" + +# Get the default test targets for bazel. +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh + +# Run tests +set +e +bazel test --test_output=errors --config=opt --test_lang_filters=py \ + --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ + --linkopt=-lrt \ + --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \ + --build_tag_filters="${tag_filters}" \ + --test_tag_filters="${tag_filters}" -- \ + ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... +test_xml_summary_exit diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py38_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py38_pip.sh new file mode 100644 index 00000000000..52872cfd0a6 --- /dev/null +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/cpu_py38_pip.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh + +install_ubuntu_16_pip_deps pip3.8 +# Update bazel +install_bazelisk + +# Export required variables for running pip.sh +export OS_TYPE="UBUNTU" +export CONTAINER_TYPE="CPU" +export TF_PYTHON_VERSION='python3.8' + +# Run configure. +export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) +yes "" | "$PYTHON_BIN_PATH" configure.py + +# Get the default test targets for bazel. +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh + +# Export optional variables for running pip.sh +export TF_BUILD_FLAGS="--config=release_cpu_linux" +export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1" +export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " +export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" +export TF_TEST_FILTER_TAGS='-no_oss,-oss_serial,-no_oss_py38,-v1only' +#export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. +export TF_PROJECT_NAME="tensorflow_cpu" +export TF_PIP_TEST_ROOT="pip_test" + +./tensorflow/tools/ci_build/builds/pip_new.sh diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_libtensorflow.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_libtensorflow.sh new file mode 100644 index 00000000000..d294311d1ff --- /dev/null +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_libtensorflow.sh @@ -0,0 +1,40 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e + +# Source the external common scripts. +source tensorflow/tools/ci_build/release/common.sh + + +# Install latest bazel +install_bazelisk +which bazel + +# Install realpath +sudo apt-get install realpath + +export TF_NEED_CUDA=1 + +# Update the version string to nightly +if [ -n "${IS_NIGHTLY_BUILD}" ]; then + ./tensorflow/tools/ci_build/update_version.py --nightly +fi + +./tensorflow/tools/ci_build/linux/libtensorflow.sh + +# Copy the nightly version update script +if [ -n "${IS_NIGHTLY_BUILD}" ]; then + cp tensorflow/tools/ci_build/builds/libtensorflow_nightly_symlink.sh lib_package +fi diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_pip_on_cpu.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_pip_on_cpu.sh new file mode 100755 index 00000000000..6e67bf20730 --- /dev/null +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_pip_on_cpu.sh @@ -0,0 +1,61 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh + +install_ubuntu_16_pip_deps pip3.6 +# Update Bazel to the desired version +install_bazelisk + +# Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=1 +export TF_CUDA_VERSION=10 +export TF_CUDNN_VERSION=7 +export TF_NEED_TENSORRT=1 +export TENSORRT_INSTALL_PATH=/usr/local/tensorrt +export CC_OPT_FLAGS='-mavx' +export PYTHON_BIN_PATH=$(which python3.6) +export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 + +yes "" | "$PYTHON_BIN_PATH" configure.py + +######################## +## Build GPU pip package +######################## +bazel build --config=opt \ + --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ + tensorflow/tools/pip_package:build_pip_package + +# Set TF nightly flag so we get the proper version of estimator +if [[ "$IS_NIGHTLY" == 1 ]]; then + NIGHTLY_FLAG="--nightly_flag" +fi + +PIP_WHL_DIR=whl +mkdir -p ${PIP_WHL_DIR} +PIP_WHL_DIR=$(readlink -f ${PIP_WHL_DIR}) # Get absolute path +bazel-bin/tensorflow/tools/pip_package/build_pip_package "${PIP_WHL_DIR}" "${NIGHTLY_FLAG}" +WHL_PATH=$(ls "${PIP_WHL_DIR}"/*.whl) + +cp "${WHL_PATH}" "$(pwd)"/. +chmod +x tensorflow/tools/ci_build/builds/docker_cpu_pip.sh +docker run -e "BAZEL_VERSION=${BAZEL_VERSION}" -e "CI_BUILD_USER=$(id -u -n)" -e "CI_BUILD_UID=$(id -u)" -e "CI_BUILD_GROUP=$(id -g -n)" -e "CI_BUILD_GID=$(id -g)" -e "CI_BUILD_HOME=/bazel_pip" -v "$(pwd)":/bazel_pip tensorflow/tensorflow:devel "./bazel_pip/tensorflow/tools/ci_build/builds/with_the_same_user" "./bazel_pip/tensorflow/tools/ci_build/builds/docker_cpu_pip.sh" diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py35_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py35_nonpip.sh new file mode 100644 index 00000000000..47ed3c4fd2a --- /dev/null +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py35_nonpip.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh + +install_ubuntu_16_pip_deps pip3.5 +# Update bazel +install_bazelisk + +# Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=1 +export TF_CUDA_VERSION=10 +export TF_CUDNN_VERSION=7 +export TF_NEED_TENSORRT=1 +export TENSORRT_INSTALL_PATH=/usr/local/tensorrt +export CC_OPT_FLAGS='-mavx' +export PYTHON_BIN_PATH=$(which python3.5) +export TF2_BEHAVIOR=1 +export PROJECT_NAME="tensorflow_gpu" +export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 + +yes "" | "$PYTHON_BIN_PATH" configure.py + +# Get the default test targets for bazel. +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh + +tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py35" + +set +e +bazel test --config=cuda --config=opt \ + --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ + --linkopt=-lrt \ + --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \ + --test_lang_filters=py \ + --test_tag_filters=${tag_filters} \ + --build_tag_filters=${tag_filters} \ + --test_timeout="300,450,1200,3600" --local_test_jobs=4 \ + --test_output=errors --verbose_failures=true --keep_going \ + --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \ + -- ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... +test_xml_summary_exit diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py35_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py35_pip.sh new file mode 100644 index 00000000000..2a5c550890b --- /dev/null +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py35_pip.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh + +install_ubuntu_16_pip_deps pip3.5 +# Update bazel +install_bazelisk + +# Export required variables for running pip.sh +export OS_TYPE="UBUNTU" +export CONTAINER_TYPE="GPU" +export TF_PYTHON_VERSION='python3.5' + +# Run configure. +export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) +yes "" | "$PYTHON_BIN_PATH" configure.py + +# Get the default test targets for bazel. +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh + +# Export optional variables for running pip.sh +export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py35' +export TF_BUILD_FLAGS="--config=release_gpu_linux " +export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filters=${TF_TEST_FILTER_TAGS} \ +--distinct_host_configuration=false \ +--action_env=TF_CUDA_VERSION=10 --action_env=TF_CUDNN_VERSION=7 --test_env=TF2_BEHAVIOR=1 \ +--config=cuda --test_output=errors --local_test_jobs=4 --test_lang_filters=py \ +--verbose_failures=true --keep_going --define=no_tensorflow_py_deps=true \ +--run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute " +export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " +export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" +#export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. +export TF_PROJECT_NAME="tensorflow_gpu" +export TF_PIP_TEST_ROOT="pip_test" + +# To build both tensorflow and tensorflow-gpu pip packages +export TF_BUILD_BOTH_GPU_PACKAGES=1 + +./tensorflow/tools/ci_build/builds/pip_new.sh diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py36_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py36_nonpip.sh new file mode 100644 index 00000000000..70038a8d875 --- /dev/null +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py36_nonpip.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh + +install_ubuntu_16_pip_deps pip3.6 +# Update bazel +install_bazelisk + +# Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=1 +export TF_CUDA_VERSION=10 +export TF_CUDNN_VERSION=7 +export TF_NEED_TENSORRT=1 +export TENSORRT_INSTALL_PATH=/usr/local/tensorrt +export CC_OPT_FLAGS='-mavx' +export PYTHON_BIN_PATH=$(which python3.6) +export TF2_BEHAVIOR=1 +export PROJECT_NAME="tensorflow_gpu" +export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 + +yes "" | "$PYTHON_BIN_PATH" configure.py + +# Get the default test targets for bazel. +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh + +tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py36" + +set +e +bazel test --config=cuda --config=opt \ + --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ + --linkopt=-lrt \ + --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \ + --test_lang_filters=py \ + --test_tag_filters=${tag_filters} \ + --build_tag_filters=${tag_filters} \ + --test_timeout="300,450,1200,3600" --local_test_jobs=4 \ + --test_output=errors --verbose_failures=true --keep_going \ + --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \ + -- ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... +test_xml_summary_exit diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py36_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py36_pip.sh new file mode 100644 index 00000000000..9aa724c27b9 --- /dev/null +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py36_pip.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh + +install_ubuntu_16_pip_deps pip3.6 +# Update bazel +install_bazelisk + +# Export required variables for running pip.sh +export OS_TYPE="UBUNTU" +export CONTAINER_TYPE="GPU" +export TF_PYTHON_VERSION='python3.6' + +# Run configure. +export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) +yes "" | "$PYTHON_BIN_PATH" configure.py + +# Get the default test targets for bazel. +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh + +# Export optional variables for running pip.sh +export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py36' +export TF_BUILD_FLAGS="--config=release_gpu_linux " +export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filters=${TF_TEST_FILTER_TAGS} \ +--distinct_host_configuration=false \ +--action_env=TF_CUDA_VERSION=10 --action_env=TF_CUDNN_VERSION=7 --test_env=TF2_BEHAVIOR=1 \ +--config=cuda --test_output=errors --local_test_jobs=4 --test_lang_filters=py \ +--verbose_failures=true --keep_going --define=no_tensorflow_py_deps=true \ +--run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute " +export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " +export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" +#export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. +export TF_PROJECT_NAME=="tensorflow_gpu" +export TF_PIP_TEST_ROOT="pip_test" + +# To build both tensorflow and tensorflow-gpu pip packages +export TF_BUILD_BOTH_GPU_PACKAGES=1 + +./tensorflow/tools/ci_build/builds/pip_new.sh diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py37_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py37_nonpip.sh new file mode 100644 index 00000000000..225b2cf4b7b --- /dev/null +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py37_nonpip.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh + +install_ubuntu_16_pip_deps pip3.7 +# Update bazel +install_bazelisk + +# Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=1 +export TF_CUDA_VERSION=10 +export TF_CUDNN_VERSION=7 +export TF_NEED_TENSORRT=1 +export TENSORRT_INSTALL_PATH=/usr/local/tensorrt +export CC_OPT_FLAGS='-mavx' +export PYTHON_BIN_PATH=$(which python3.7) +export TF2_BEHAVIOR=1 +export PROJECT_NAME="tensorflow_gpu" +export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 + +yes "" | "$PYTHON_BIN_PATH" configure.py + +# Get the default test targets for bazel. +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh + +tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py37" + +set +e +bazel test --config=cuda --config=opt \ + --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ + --linkopt=-lrt \ + --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \ + --test_lang_filters=py \ + --build_tag_filters=${tag_filters} \ + --test_tag_filters=${tag_filters} \ + --test_timeout="300,450,1200,3600" --local_test_jobs=4 \ + --test_output=errors --verbose_failures=true --keep_going \ + --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \ + -- ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... +test_xml_summary_exit diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py37_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py37_pip.sh new file mode 100644 index 00000000000..9bfc6608a0b --- /dev/null +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py37_pip.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh + +install_ubuntu_16_pip_deps pip3.7 +# Update bazel +install_bazelisk + +# Export required variables for running pip.sh +export OS_TYPE="UBUNTU" +export CONTAINER_TYPE="GPU" +export TF_PYTHON_VERSION='python3.7' + +# Run configure. +export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) +yes "" | "$PYTHON_BIN_PATH" configure.py + +# Get the default test targets for bazel. +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh + +# Export optional variables for running pip.sh +export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py37' +export TF_BUILD_FLAGS="--config=release_gpu_linux " +export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filters=${TF_TEST_FILTER_TAGS} \ +--distinct_host_configuration=false \ +--action_env=TF_CUDA_VERSION=10 --action_env=TF_CUDNN_VERSION=7 --test_env=TF2_BEHAVIOR=1 \ +--config=cuda --test_output=errors --local_test_jobs=4 --test_lang_filters=py \ +--verbose_failures=true --keep_going --define=no_tensorflow_py_deps=true \ +--run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute " +export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " +export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" +#export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. +export TF_PROJECT_NAME=="tensorflow_gpu" +export TF_PIP_TEST_ROOT="pip_test" + +# To build both tensorflow and tensorflow-gpu pip packages +export TF_BUILD_BOTH_GPU_PACKAGES=1 + +./tensorflow/tools/ci_build/builds/pip_new.sh diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py38_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py38_nonpip.sh new file mode 100644 index 00000000000..f7678b7436f --- /dev/null +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py38_nonpip.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh + +install_ubuntu_16_pip_deps pip3.8 +# Update bazel +update_bazel_linux + +# Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=1 +export TF_CUDA_VERSION=10 +export TF_CUDNN_VERSION=7 +export TF_NEED_TENSORRT=1 +export TENSORRT_INSTALL_PATH=/usr/local/tensorrt +export CC_OPT_FLAGS='-mavx' +export PYTHON_BIN_PATH=$(which python3.8) +export TF2_BEHAVIOR=1 +export PROJECT_NAME="tensorflow_gpu" +export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 + +yes "" | "$PYTHON_BIN_PATH" configure.py + +# Get the default test targets for bazel. +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh + +tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py38" + +test +e +bazel test --config=cuda --config=opt \ + --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ + --linkopt=-lrt \ + --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \ + --test_lang_filters=py \ + --build_tag_filters=${tag_filters} \ + --test_tag_filters=${tag_filters} \ + --test_timeout="300,450,1200,3600" --local_test_jobs=4 \ + --test_output=errors --verbose_failures=true --keep_going \ + --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \ + -- ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... +test_xml_summary_exit diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py38_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py38_pip.sh new file mode 100644 index 00000000000..d8838e7704a --- /dev/null +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py38_pip.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e +set -x + +source tensorflow/tools/ci_build/release/common.sh + +install_ubuntu_16_pip_deps pip3.8 +# Update bazel +update_bazel_linux + +# Export required variables for running pip.sh +export OS_TYPE="UBUNTU" +export CONTAINER_TYPE="GPU" +export TF_PYTHON_VERSION='python3.8' + +# Run configure. +export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) +yes "" | "$PYTHON_BIN_PATH" configure.py + +# Get the default test targets for bazel. +source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh + +# Export optional variables for running pip.sh +export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py38' +export TF_BUILD_FLAGS="--config=release_gpu_linux " +export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filters=${TF_TEST_FILTER_TAGS} \ +--distinct_host_configuration=false \ +--action_env=TF_CUDA_VERSION=10 --action_env=TF_CUDNN_VERSION=7 --test_env=TF2_BEHAVIOR=1 \ +--config=cuda --test_output=errors --local_test_jobs=4 --test_lang_filters=py \ +--verbose_failures=true --keep_going --define=no_tensorflow_py_deps=true \ +--run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute " +export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... " +export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean" +#export IS_NIGHTLY=0 # Not nightly; uncomment if building from tf repo. +export TF_PROJECT_NAME=="tensorflow_gpu" +export TF_PIP_TEST_ROOT="pip_test" + +# To build both tensorflow and tensorflow-gpu pip packages +export TF_BUILD_BOTH_GPU_PACKAGES=1 + +./tensorflow/tools/ci_build/builds/pip_new.sh diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/sanity.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/sanity.sh new file mode 100644 index 00000000000..4fc600de867 --- /dev/null +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/sanity.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +set -e + +# Install latest bazel +source tensorflow/tools/ci_build/release/common.sh +install_bazelisk +which bazel + +# We need py3 lint +sudo pip3 install pep8 + +# TODO(gunan): figure out why we get stuck with later versions of pylint. +# Install pylint. +sudo python3 -m pip install setuptools --upgrade +sudo python2 -m pip install pylint==1.6.4 +sudo python3 -m pip install pylint==1.6.4 + +# TODO(yifeif): print pylint version for debug. remove later. +python3 -m pylint --version + +# Run tensorflow sanity checks. +tensorflow/tools/ci_build/ci_sanity.sh From a684b992889cc93fb4dc461934de894a4f18268b Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Thu, 6 Aug 2020 16:06:25 -0700 Subject: [PATCH 2298/2522] Removed duplicated fields. PiperOrigin-RevId: 325330615 Change-Id: I9e5af33fb2dce06fff8f187a0a024b0c80b9cc3c --- .../lite/delegates/gpu/cl/kernels/conv_3d.cc | 47 +++++------ .../lite/delegates/gpu/cl/kernels/conv_3d.h | 5 +- .../gpu/cl/kernels/conv_buffer_1x1.cc | 7 +- .../gpu/cl/kernels/conv_buffer_1x1.h | 2 - .../delegates/gpu/cl/kernels/conv_powervr.cc | 82 +++++++++---------- .../delegates/gpu/cl/kernels/conv_powervr.h | 11 ++- 6 files changed, 68 insertions(+), 86 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc index b1e1e39327c..727cd488694 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc @@ -237,15 +237,12 @@ int3 Conv3D::GetGridSize() const { DivideRoundUp(dst_[0]->Slices(), conv_params_.block_size.w) * DivideRoundUp(dst_[0]->Depth(), conv_params_.block_size.z); int3 wg; - wg.x = DivideRoundUp(grid_x, conv_params_.work_group_size.x); - wg.y = DivideRoundUp(grid_y, conv_params_.work_group_size.y); - wg.z = DivideRoundUp(grid_z, conv_params_.work_group_size.z); - return int3(wg[conv_params_.work_group_launch_order[0]] * - conv_params_.work_group_size.x, - wg[conv_params_.work_group_launch_order[1]] * - conv_params_.work_group_size.y, - wg[conv_params_.work_group_launch_order[2]] * - conv_params_.work_group_size.z); + wg.x = DivideRoundUp(grid_x, work_group_size_.x); + wg.y = DivideRoundUp(grid_y, work_group_size_.y); + wg.z = DivideRoundUp(grid_z, work_group_size_.z); + return int3(wg[conv_params_.work_group_launch_order[0]] * work_group_size_.x, + wg[conv_params_.work_group_launch_order[1]] * work_group_size_.y, + wg[conv_params_.work_group_launch_order[2]] * work_group_size_.z); } absl::Status Conv3D::Tune(const TuningParameters& params) { @@ -259,9 +256,8 @@ absl::Status Conv3D::Tune(const TuningParameters& params) { conv_params_.work_group_launch_order[1] == 1 && conv_params_.work_group_launch_order[2] == 2) { RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); - RETURN_IF_ERROR(GetBestWorkGroupConv(params, kernel_, grid_size_, - &conv_params_.work_group_size)); - work_group_size_ = conv_params_.work_group_size; + RETURN_IF_ERROR( + GetBestWorkGroupConv(params, kernel_, grid_size_, &work_group_size_)); } return absl::OkStatus(); } @@ -328,14 +324,13 @@ std::string Conv3D::GenerateConv3D(const OperationDef& op_def, conv_params.weights_upload_type == Conv3D::WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP; - const int3 work_group_size = conv_params.work_group_size; const int4 block_size = conv_params.block_size; std::string c = GetCommonDefines(op_def.precision); if (need_local_mem) { // we use fixed workgroup size when use local mem c += "__attribute__((reqd_work_group_size(" + - std::to_string(work_group_size.x) + ", " + - std::to_string(work_group_size.y) + ", " + - std::to_string(work_group_size.z) + ")))\n"; + std::to_string(work_group_size_.x) + ", " + + std::to_string(work_group_size_.y) + ", " + + std::to_string(work_group_size_.z) + ")))\n"; } c += "__kernel void main_function(\n"; c += "$0) {\n"; @@ -348,7 +343,7 @@ std::string Conv3D::GenerateConv3D(const OperationDef& op_def, } if (conv_params.weights_upload_type == Conv3D::WeightsUploadType::LOCAL_MEM_BY_THREADS) { - c += " int lid = get_local_id(1) * " + std::to_string(work_group_size.x) + + c += " int lid = get_local_id(1) * " + std::to_string(work_group_size_.x) + " + get_local_id(0);\n"; } for (int s = 0; s < block_size.w; ++s) { @@ -608,7 +603,7 @@ std::string Conv3D::GenerateConv3D(const OperationDef& op_def, declare_src(); c += " do {\n"; const int total_work_items = - work_group_size.x * work_group_size.y * work_group_size.z; + work_group_size_.x * work_group_size_.y * work_group_size_.z; if (conv_params.weights_upload_type == Conv3D::WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP) { c += @@ -731,14 +726,14 @@ Conv3D::ConvParams Conv3D::GuessBestParams(const CLDevice& device, int src_slices, int dst_slices, bool x_kernel_is_1, bool y_kernel_is_1, - bool z_kernel_is_1) const { + bool z_kernel_is_1) { ConvParams conv_params; conv_params.x_kernel_is_1 = x_kernel_is_1; conv_params.y_kernel_is_1 = y_kernel_is_1; conv_params.z_kernel_is_1 = z_kernel_is_1; if (device.IsNvidia()) { conv_params.block_size = int4(1, 1, 1, 4); - conv_params.work_group_size = int3(8, 4, 1); + work_group_size_ = int3(8, 4, 1); conv_params.work_group_launch_order = int3(2, 0, 1); conv_params.src_depth_loop_size = 1; conv_params.weights_upload_type = WeightsUploadType::LOCAL_MEM_BY_THREADS; @@ -757,7 +752,7 @@ Conv3D::ConvParams Conv3D::GuessBestParams(const CLDevice& device, } } else if (device.IsPowerVR()) { conv_params.block_size = int4(1, 1, 1, 4); - conv_params.work_group_size = int3(8, 4, 1); + work_group_size_ = int3(8, 4, 1); conv_params.work_group_launch_order = int3(2, 0, 1); conv_params.src_depth_loop_size = 1; conv_params.weights_upload_type = @@ -791,17 +786,17 @@ Conv3D::ConvParams Conv3D::GuessBestParams(const CLDevice& device, } } conv_params.block_size.x = 2; - conv_params.work_group_size = int3(4, 8, 1); + work_group_size_ = int3(4, 8, 1); } } else if (device.IsAdreno()) { conv_params.block_size = int4(2, 2, 1, 2); - conv_params.work_group_size = int3(8, 4, 1); + work_group_size_ = int3(8, 4, 1); conv_params.work_group_launch_order = int3(0, 1, 2); conv_params.src_depth_loop_size = 1; conv_params.weights_upload_type = WeightsUploadType::TEXTURES_MEM; } else if (device.IsMali()) { conv_params.block_size = int4(1, 1, 1, 4); - conv_params.work_group_size = int3(8, 4, 1); + work_group_size_ = int3(8, 4, 1); conv_params.work_group_launch_order = int3(0, 1, 2); conv_params.src_depth_loop_size = 1; conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM; @@ -820,7 +815,7 @@ Conv3D::ConvParams Conv3D::GuessBestParams(const CLDevice& device, } } else { conv_params.block_size = int4(2, 2, 1, 2); - conv_params.work_group_size = int3(8, 4, 1); + work_group_size_ = int3(8, 4, 1); conv_params.work_group_launch_order = int3(0, 1, 2); conv_params.src_depth_loop_size = 1; conv_params.weights_upload_type = WeightsUploadType::TEXTURES_MEM; @@ -831,7 +826,7 @@ Conv3D::ConvParams Conv3D::GuessBestParams(const CLDevice& device, Conv3D::ConvParams Conv3D::GuessBestParams( const CLDevice& device, const OperationDef& definition, - const Convolution3DAttributes& attr) const { + const Convolution3DAttributes& attr) { const int dst_slices = DivideRoundUp(attr.weights.shape.o, 4); const int src_slices = DivideRoundUp(attr.weights.shape.i, 4); const bool x_kernel_is_1 = attr.weights.shape.w == 1 && attr.strides.w == 1 && diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h index ce2d7794411..ffa269d1629 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h @@ -59,7 +59,6 @@ class Conv3D : public GPUOperation { struct ConvParams { int4 block_size; // WHDS - int3 work_group_size; int3 work_group_launch_order; int src_depth_loop_size; WeightsUploadType weights_upload_type; @@ -98,12 +97,12 @@ class Conv3D : public GPUOperation { ConvParams GuessBestParams(const CLDevice& device, const OperationDef& definition, - const Convolution3DAttributes& attr) const; + const Convolution3DAttributes& attr); ConvParams GuessBestParams(const CLDevice& device, const OperationDef& definition, int src_slices, int dst_slices, bool x_kernel_is_1, - bool y_kernel_is_1, bool z_kernel_is_1) const; + bool y_kernel_is_1, bool z_kernel_is_1); std::string GenerateConv3D(const OperationDef& op_def, bool stride_correction, const Conv3D::ConvParams& conv_params); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc index 949651c1f87..de6021aa5fe 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc @@ -153,7 +153,7 @@ ConvBuffer1x1::ConvBuffer1x1(const OperationDef& definition, const ConvParams& conv_params) : GPUOperation(definition), conv_params_(conv_params) { code_ = GenerateConvBuffer1x1(definition_, conv_params_, &args_); - work_group_size_ = conv_params_.work_group_size; + work_group_size_ = int3(2, 4, 1); } ConvBuffer1x1::ConvBuffer1x1(ConvBuffer1x1&& operation) @@ -317,9 +317,8 @@ int3 ConvBuffer1x1::GetGridSize() const { absl::Status ConvBuffer1x1::Tune(const TuningParameters& params) { RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); - RETURN_IF_ERROR(GetBestWorkGroupConv(params, kernel_, grid_size_, - &conv_params_.work_group_size)); - work_group_size_ = conv_params_.work_group_size; + RETURN_IF_ERROR( + GetBestWorkGroupConv(params, kernel_, grid_size_, &work_group_size_)); return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h index 90df8f2f9ad..94b7cbd1b37 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h @@ -65,8 +65,6 @@ class ConvBuffer1x1 : public GPUOperation { // some cases we need separate weights for H dimension and convolution // kernel requires very small modifications to support it. bool different_weights_for_height = false; - - int3 work_group_size = int3(2, 4, 1); }; private: diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc index c4e26725f74..f69368d1083 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc @@ -184,7 +184,6 @@ void ConvPowerVR::GenerateCode(const DeviceInfo& device_info) { definition_.IsBatchSupported() && stride_padding_.x != 1; code_ = GenerateConv(device_info, definition_, stride_correction, conv_params_); - work_group_size_ = conv_params_.work_group_size; if (definition_.precision == CalculationsPrecision::F16 && device_info.IsPowerVR()) { compiler_options_.push_back(CompilerOptions::POWERVR_FP16); @@ -225,23 +224,19 @@ int3 ConvPowerVR::GetGridSize() const { int3 wg; if (conv_params_.linear_hw) { - wg.x = DivideRoundUp(grid_x * grid_y, conv_params_.work_group_size.x); - wg.y = DivideRoundUp(grid_z, conv_params_.work_group_size.y); - return int3(wg[conv_params_.work_group_launch_order[0]] * - conv_params_.work_group_size.x, - wg[conv_params_.work_group_launch_order[1]] * - conv_params_.work_group_size.y, - 1); + wg.x = DivideRoundUp(grid_x * grid_y, work_group_size_.x); + wg.y = DivideRoundUp(grid_z, work_group_size_.y); + return int3( + wg[conv_params_.work_group_launch_order[0]] * work_group_size_.x, + wg[conv_params_.work_group_launch_order[1]] * work_group_size_.y, 1); } else { - wg.x = DivideRoundUp(grid_x, conv_params_.work_group_size.x); - wg.y = DivideRoundUp(grid_y, conv_params_.work_group_size.y); - wg.z = DivideRoundUp(grid_z, conv_params_.work_group_size.z); - return int3(wg[conv_params_.work_group_launch_order[0]] * - conv_params_.work_group_size.x, - wg[conv_params_.work_group_launch_order[1]] * - conv_params_.work_group_size.y, - wg[conv_params_.work_group_launch_order[2]] * - conv_params_.work_group_size.z); + wg.x = DivideRoundUp(grid_x, work_group_size_.x); + wg.y = DivideRoundUp(grid_y, work_group_size_.y); + wg.z = DivideRoundUp(grid_z, work_group_size_.z); + return int3( + wg[conv_params_.work_group_launch_order[0]] * work_group_size_.x, + wg[conv_params_.work_group_launch_order[1]] * work_group_size_.y, + wg[conv_params_.work_group_launch_order[2]] * work_group_size_.z); } } @@ -257,9 +252,8 @@ absl::Status ConvPowerVR::Tune(const TuningParameters& params) { conv_params_.work_group_launch_order[1] == 1 && conv_params_.work_group_launch_order[2] == 2) { RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); - RETURN_IF_ERROR(GetBestWorkGroupConv(params, kernel_, grid_size_, - &conv_params_.work_group_size)); - work_group_size_ = conv_params_.work_group_size; + RETURN_IF_ERROR( + GetBestWorkGroupConv(params, kernel_, grid_size_, &work_group_size_)); } return absl::OkStatus(); } @@ -345,14 +339,12 @@ std::string ConvPowerVR::GenerateConv(const DeviceInfo& device_info, c += "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"; } } - - const int3 work_group_size = conv_params.work_group_size; const int3 block_size = conv_params.block_size; if (conv_params.fixed_work_group_size) { c += "__attribute__((reqd_work_group_size(" + - std::to_string(work_group_size.x) + ", " + - std::to_string(work_group_size.y) + ", " + - std::to_string(work_group_size.z) + ")))\n"; + std::to_string(work_group_size_.x) + ", " + + std::to_string(work_group_size_.y) + ", " + + std::to_string(work_group_size_.z) + ")))\n"; } if (use_simd_broadcast && device_info.IsIntel()) { c += "__attribute__((intel_reqd_sub_group_size(" + @@ -383,7 +375,7 @@ std::string ConvPowerVR::GenerateConv(const DeviceInfo& device_info, c += " int lid = get_local_id(0);\n"; } else { c += " int lid = get_local_id(1) * " + - std::to_string(work_group_size.x) + " + get_local_id(0);\n"; + std::to_string(work_group_size_.x) + " + get_local_id(0);\n"; } } if (use_simd_broadcast) { @@ -590,7 +582,7 @@ std::string ConvPowerVR::GenerateConv(const DeviceInfo& device_info, c += " do {\n"; declare_src(); const int total_work_items = - work_group_size.x * work_group_size.y * work_group_size.z; + work_group_size_.x * work_group_size_.y * work_group_size_.z; if (conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP) { c += GenerateAsyncUpload("weights_cache", "filters_loc", @@ -694,7 +686,7 @@ std::string ConvPowerVR::GenerateConv(const DeviceInfo& device_info, ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( const CLDevice& device, const OperationDef& definition, int src_depth, int dst_depth, bool x_kernel_is_1, bool y_kernel_is_1, - bool different_weights_for_height, const BHWC* dst_shape) const { + bool different_weights_for_height, const BHWC* dst_shape) { ConvParams conv_params; conv_params.linear_hw = false; conv_params.weights_data_type = @@ -704,12 +696,12 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( conv_params.different_weights_for_height = different_weights_for_height; if (device.IsNvidia()) { if (different_weights_for_height) { - conv_params.work_group_size = int3(32, 1, 1); + work_group_size_ = int3(32, 1, 1); conv_params.work_group_launch_order = int3(2, 0, 1); conv_params.fixed_work_group_size = true; } else { conv_params.linear_hw = true; - conv_params.work_group_size = int3(32, 1, 1); + work_group_size_ = int3(32, 1, 1); conv_params.work_group_launch_order = int3(1, 0, 2); conv_params.fixed_work_group_size = true; } @@ -749,12 +741,12 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( } } else if (device.IsPowerVR()) { if (different_weights_for_height) { - conv_params.work_group_size = int3(32, 1, 1); + work_group_size_ = int3(32, 1, 1); conv_params.work_group_launch_order = int3(2, 0, 1); conv_params.fixed_work_group_size = true; } else { conv_params.linear_hw = true; - conv_params.work_group_size = int3(32, 1, 1); + work_group_size_ = int3(32, 1, 1); conv_params.work_group_launch_order = int3(1, 0, 2); conv_params.fixed_work_group_size = true; } @@ -797,11 +789,11 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( } } else if (device.IsAMD()) { if (different_weights_for_height) { - conv_params.work_group_size = int3(32, 1, 1); + work_group_size_ = int3(32, 1, 1); conv_params.work_group_launch_order = int3(2, 0, 1); conv_params.fixed_work_group_size = true; } else { - conv_params.work_group_size = int3(8, 4, 1); + work_group_size_ = int3(8, 4, 1); conv_params.work_group_launch_order = int3(2, 0, 1); conv_params.fixed_work_group_size = true; } @@ -860,25 +852,25 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( definition.precision == CalculationsPrecision::F16) { conv_params.src_depth_loop_size = 4; } - conv_params.work_group_size = int3(4, 4, 1); + work_group_size_ = int3(4, 4, 1); conv_params.work_group_launch_order = int3(0, 1, 2); conv_params.fixed_work_group_size = false; conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM; } else if (device.IsAdreno()) { conv_params.block_size = int3(2, 2, 1); - conv_params.work_group_size = int3(8, 2, 1); + work_group_size_ = int3(8, 2, 1); conv_params.work_group_launch_order = int3(0, 1, 2); conv_params.fixed_work_group_size = false; conv_params.src_depth_loop_size = 1; conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM; } else if (device.IsIntel()) { if (different_weights_for_height) { - conv_params.work_group_size = int3(16, 1, 1); + work_group_size_ = int3(16, 1, 1); conv_params.work_group_launch_order = int3(0, 1, 2); conv_params.fixed_work_group_size = true; } else { conv_params.linear_hw = true; - conv_params.work_group_size = int3(16, 1, 1); + work_group_size_ = int3(16, 1, 1); conv_params.work_group_launch_order = int3(0, 1, 2); conv_params.fixed_work_group_size = true; } @@ -908,7 +900,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( } } else { conv_params.block_size = int3(1, 1, 4); - conv_params.work_group_size = int3(8, 2, 1); + work_group_size_ = int3(8, 2, 1); conv_params.work_group_launch_order = int3(0, 1, 2); conv_params.fixed_work_group_size = false; conv_params.src_depth_loop_size = 1; @@ -933,7 +925,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( const CLDevice& device, const OperationDef& definition, - const Convolution2DAttributes& attr, const BHWC* dst_shape) const { + const Convolution2DAttributes& attr, const BHWC* dst_shape) { const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); const int src_depth = DivideRoundUp(attr.weights.shape.i, 4); const bool x_kernel_is_1 = attr.weights.shape.w == 1 && attr.strides.w == 1 && @@ -951,7 +943,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( const CLDevice& device, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC& weights_shape, - const BHWC* dst_shape) const { + const BHWC* dst_shape) { const int dst_depth = DivideRoundUp(weights_shape.b, 4); const int src_depth = DivideRoundUp(weights_shape.c, 4); const bool x_kernel_is_1 = @@ -966,13 +958,13 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( const CLDevice& device, const OperationDef& definition, - const FullyConnectedAttributes& attr, const BHWC* dst_shape) const { + const FullyConnectedAttributes& attr, const BHWC* dst_shape) { const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); const int src_depth = DivideRoundUp(attr.weights.shape.i, 4); ConvPowerVR::ConvParams params = GuessBestParams( device, definition, src_depth, dst_depth, true, true, false, dst_shape); - params.work_group_size.x *= params.work_group_size.y; - params.work_group_size.y = 1; + work_group_size_.x *= work_group_size_.y; + work_group_size_.y = 1; params.block_size.x *= params.block_size.y; params.block_size.y = 1; return params; @@ -980,7 +972,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( ConvPowerVR::ConvParams ConvPowerVR::GuessBestParamsWinograd( const CLDevice& device, const OperationDef& definition, - const Convolution2DAttributes& attr, const BHWC* dst_shape) const { + const Convolution2DAttributes& attr, const BHWC* dst_shape) { const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); const int src_depth = DivideRoundUp(attr.weights.shape.i, 4); ConvPowerVR::ConvParams params = GuessBestParams( diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h index 148dad38708..e61d4c14ce7 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h @@ -81,7 +81,6 @@ class ConvPowerVR : public GPUOperation { // F32_F16 precision mode DataType weights_data_type; // used for weights and biases int3 block_size; - int3 work_group_size; int3 work_group_launch_order; bool fixed_work_group_size; bool linear_hw; @@ -180,26 +179,26 @@ class ConvPowerVR : public GPUOperation { ConvParams GuessBestParams(const CLDevice& device, const OperationDef& definition, const Convolution2DAttributes& attr, - const BHWC* dst_shape = nullptr) const; + const BHWC* dst_shape = nullptr); ConvParams GuessBestParams(const CLDevice& device, const OperationDef& definition, const Convolution2DAttributes& attr, const BHWC& weights_shape, - const BHWC* dst_shape = nullptr) const; + const BHWC* dst_shape = nullptr); ConvParams GuessBestParams(const CLDevice& device, const OperationDef& definition, const FullyConnectedAttributes& attr, - const BHWC* dst_shape = nullptr) const; + const BHWC* dst_shape = nullptr); ConvParams GuessBestParamsWinograd(const CLDevice& device, const OperationDef& definition, const Convolution2DAttributes& attr, - const BHWC* dst_shape = nullptr) const; + const BHWC* dst_shape = nullptr); ConvParams GuessBestParams(const CLDevice& device, const OperationDef& definition, int src_depth, int dst_depth, bool x_kernel_is_1, bool y_kernel_is_1, bool different_weights_for_height, - const BHWC* dst_shape = nullptr) const; + const BHWC* dst_shape = nullptr); std::string GenerateConv(const DeviceInfo& device_info, const OperationDef& op_def, bool stride_correction, From eaaccbe0dd81533e17ec5975553e70aedf48d302 Mon Sep 17 00:00:00 2001 From: Anthony Platanios Date: Thu, 6 Aug 2020 16:15:47 -0700 Subject: [PATCH 2299/2522] Fix. --- tensorflow/BUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 484e45eb11d..6745b3e54fa 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -882,7 +882,7 @@ genrule( visibility = ["//visibility:public"], ) -# The interface library (tensorflow_framework.dll.if.lib) for linking tensorflow DLL +# The interface library (tensorflow_framework.dll.if.lib) for linking tensorflow DLL # library (tensorflow_framework.dll) on Windows. # To learn more about import library (called interface library in Bazel): # https://docs.microsoft.com/en-us/cpp/build/linking-an-executable-to-a-dll?view=vs-2017#linking-implicitly @@ -893,7 +893,7 @@ filegroup( visibility = ["//visibility:public"], ) -# Rename the import library for tensorflow_framework.dll from +# Rename the import library for tensorflow_framework.dll from # tensorflow_framework.dll.if.lib to tensorflow_framework.lib genrule( name = "tensorflow_framework_dll_import_lib", From dc76cd3e09ef7bc1b3d3f3f1108df3f5cd22276c Mon Sep 17 00:00:00 2001 From: Katherine Wu Date: Thu, 6 Aug 2020 16:11:23 -0700 Subject: [PATCH 2300/2522] Add structured input signature to functions loaded from V1 SavedModel. PiperOrigin-RevId: 325331549 Change-Id: I030c5ce8a54372a0f5350a22b981721c787615d5 --- tensorflow/python/saved_model/BUILD | 1 + tensorflow/python/saved_model/load_v1_in_v2.py | 9 +++++++++ tensorflow/python/saved_model/load_v1_in_v2_test.py | 10 ++++++++++ 3 files changed, 20 insertions(+) diff --git a/tensorflow/python/saved_model/BUILD b/tensorflow/python/saved_model/BUILD index 27e0e984f5f..45ee73de51c 100644 --- a/tensorflow/python/saved_model/BUILD +++ b/tensorflow/python/saved_model/BUILD @@ -445,6 +445,7 @@ py_strict_library( "//tensorflow/python:constant_op", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", + "//tensorflow/python:func_graph", "//tensorflow/python:platform", "//tensorflow/python:saver", "//tensorflow/python:sparse_tensor", diff --git a/tensorflow/python/saved_model/load_v1_in_v2.py b/tensorflow/python/saved_model/load_v1_in_v2.py index ede91da168c..add3b4e6320 100644 --- a/tensorflow/python/saved_model/load_v1_in_v2.py +++ b/tensorflow/python/saved_model/load_v1_in_v2.py @@ -25,6 +25,7 @@ from tensorflow.python.eager import lift_to_graph from tensorflow.python.eager import wrap_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import func_graph from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops @@ -143,6 +144,7 @@ class _EagerSavedModelLoader(loader_impl.SavedModelLoader): for input_spec in input_specs ] input_names = [] + input_tensors = [] for original_input_name, feed in zip(original_input_names, feeds): if isinstance(feed, sparse_tensor.SparseTensor): # We have to give explicit name for SparseTensor arguments, because @@ -151,8 +153,10 @@ class _EagerSavedModelLoader(loader_impl.SavedModelLoader): values_name = "%s_values" % original_input_name dense_shape_name = "%s_dense_shape" % original_input_name input_names.extend([indices_name, values_name, dense_shape_name]) + input_tensors.extend([feed.indices, feed.values, feed.dense_shape]) else: input_names.append(original_input_name) + input_tensors.append(feed) fetches = {name: out for name, out in signature_def.outputs.items()} try: signature_fn = wrapped.prune(feeds=feeds, fetches=fetches) @@ -173,6 +177,11 @@ class _EagerSavedModelLoader(loader_impl.SavedModelLoader): raise # pylint: disable=protected-access signature_fn._arg_keywords = input_names + signature_fn._func_graph.structured_input_signature = ( + (), + func_graph.convert_structure_to_signature( + dict(zip(input_names, input_tensors)))) + if len(input_names) == 1: # Allowing positional arguments does not create any ambiguity if there's # only one. diff --git a/tensorflow/python/saved_model/load_v1_in_v2_test.py b/tensorflow/python/saved_model/load_v1_in_v2_test.py index bafeea128ed..806a4db6fba 100644 --- a/tensorflow/python/saved_model/load_v1_in_v2_test.py +++ b/tensorflow/python/saved_model/load_v1_in_v2_test.py @@ -32,6 +32,7 @@ from tensorflow.python.framework import function as framework_function from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import tensor_spec from tensorflow.python.framework import test_util from tensorflow.python.framework import versions from tensorflow.python.lib.io import file_io @@ -630,6 +631,15 @@ class LoadTest(test.TestCase): imported.signatures["serving_default"](constant_op.constant(2.))), {"y": [10, 8, 6, 4, 2, 0]}) + def test_structured_input_signature(self): + path = self._v1_single_metagraph_saved_model(False) + imported = load.load(path) + args, kwargs = ( + imported.signatures["serving_default"].structured_input_signature) + self.assertEqual(args, ()) + self.assertAllEqual( + kwargs, {"start": tensor_spec.TensorSpec(shape=None, name="start")}) + if __name__ == "__main__": test.main() From 0c3334857dd912a6b7a31c85b056721afe6af324 Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Thu, 6 Aug 2020 16:15:44 -0700 Subject: [PATCH 2301/2522] Use selects.with_or to avoid duplication of deps. PiperOrigin-RevId: 325332323 Change-Id: I3b0b35abbde47adc1d2fcbdea2e21e7e608cfa43 --- tensorflow/lite/kernels/internal/BUILD | 91 ++++++++------------------ 1 file changed, 29 insertions(+), 62 deletions(-) diff --git a/tensorflow/lite/kernels/internal/BUILD b/tensorflow/lite/kernels/internal/BUILD index 2707871df16..ad11c06eb37 100644 --- a/tensorflow/lite/kernels/internal/BUILD +++ b/tensorflow/lite/kernels/internal/BUILD @@ -1,3 +1,4 @@ +load("@bazel_skylib//lib:selects.bzl", "selects") load("//tensorflow:tensorflow.bzl", "transitive_hdrs") load("//tensorflow/lite:build_def.bzl", "tflite_copts") load("//tensorflow/lite/micro:build_def.bzl", "micro_copts") @@ -735,70 +736,36 @@ cc_library( ":cpu_check", "//third_party/eigen3", "//tensorflow/lite/c:common", - ] + select({ - ":aarch64": [ - ":neon_tensor_utils", - ], - ":arm": [ - ":neon_tensor_utils", - ], - ":arm64-v8a": [ - ":neon_tensor_utils", - ], - ":armeabi-v7a": [ - ":neon_tensor_utils", - ], - ":armhf": [ - ":neon_tensor_utils", - ], - ":armv7a": [ - ":neon_tensor_utils", - ], - ":haswell": [ + ] + selects.with_or({ + ( + ":aarch64", + ":arm", + ":arm64-v8a", + ":armeabi-v7a", + ":armhf", + ":armv7a", + ":ios_armv7", + ":ios_arm64", + ":ios_arm64e", + ":raspberry_pi_with_neon", + ): [":neon_tensor_utils"], + ( + ":darwin", + ":darwin_x86_64", + ":freebsd", + ":haswell", + ":ios_x86_64", + ":x86_64", + ":x86", + ":k8", + ":windows", + ): [ ":sse_tensor_utils", ], - ":ios_armv7": [ - ":neon_tensor_utils", - ], - ":ios_arm64": [ - ":neon_tensor_utils", - ], - ":ios_arm64e": [ - ":neon_tensor_utils", - ], - ":raspberry_pi_with_neon": [ - ":neon_tensor_utils", - ], - ":ios_x86_64": [ - ":sse_tensor_utils", - ], - ":x86_64": [ - ":sse_tensor_utils", - ], - ":x86": [ - ":sse_tensor_utils", - ], - ":k8": [ - ":sse_tensor_utils", - ], - ":darwin": [ - ":sse_tensor_utils", - ], - ":darwin_x86_64": [ - ":sse_tensor_utils", - ], - ":freebsd": [ - ":sse_tensor_utils", - ], - ":windows": [ - ":sse_tensor_utils", - ], - ":tf_lite_static_memory": [ - ":portable_tensor_utils", - ], - "//conditions:default": [ - ":portable_tensor_utils", - ], + ( + ":tf_lite_static_memory", + "//conditions:default", + ): [":portable_tensor_utils"], }), ) From 0a0a9eeb6bf42ae062ebafa68e4d87ae0c62d7e5 Mon Sep 17 00:00:00 2001 From: Robert David Date: Thu, 6 Aug 2020 16:16:29 -0700 Subject: [PATCH 2302/2522] Change unordered containers to Swiss table. PiperOrigin-RevId: 325332451 Change-Id: I5349d9b9e9227b62752f21e0b2c777bfcc59d3eb --- tensorflow/lite/delegates/gpu/BUILD | 1 + tensorflow/lite/delegates/gpu/cl/BUILD | 3 +++ .../lite/delegates/gpu/cl/inference_context.cc | 9 +++++---- .../lite/delegates/gpu/cl/inference_context.h | 4 ++-- tensorflow/lite/delegates/gpu/cl/program_cache.h | 6 +++--- tensorflow/lite/delegates/gpu/common/BUILD | 5 +++++ .../lite/delegates/gpu/common/model_builder.cc | 12 ++++++------ .../lite/delegates/gpu/common/model_builder.h | 6 +++--- .../delegates/gpu/common/model_transformer.h | 4 ++-- .../lite/delegates/gpu/common/object_reader.cc | 6 +++--- .../lite/delegates/gpu/common/object_reader.h | 14 +++++++------- .../lite/delegates/gpu/common/operations.cc | 4 ++-- .../delegates/gpu/common/quantization_util.cc | 16 +++++++++------- .../delegates/gpu/common/quantization_util.h | 10 +++++----- .../gpu/common/quantization_util_test.cc | 8 ++++---- tensorflow/lite/delegates/gpu/delegate.cc | 4 ++-- tensorflow/lite/delegates/gpu/gl/BUILD | 3 +++ tensorflow/lite/delegates/gpu/gl/api.cc | 12 ++++++------ tensorflow/lite/delegates/gpu/gl/api.h | 2 +- tensorflow/lite/delegates/gpu/gl/api2.cc | 4 ++-- tensorflow/lite/delegates/gpu/gl/compiler.cc | 10 ++++++---- tensorflow/lite/delegates/gpu/gl/compiler.h | 7 ++++--- tensorflow/lite/delegates/gpu/gl/compiler/BUILD | 5 +++++ .../delegates/gpu/gl/compiler/compiled_node.cc | 5 ++--- .../delegates/gpu/gl/compiler/fuse_auto_input.cc | 4 ++-- .../delegates/gpu/gl/compiler/object_accessor.h | 4 ++-- .../lite/delegates/gpu/gl/compiler/rename.cc | 6 +++--- .../gpu/gl/compiler/variable_accessor.h | 6 +++--- tensorflow/lite/delegates/gpu/gl/kernels/BUILD | 6 +++++- .../delegates/gpu/gl/kernels/custom_registry.cc | 5 +++-- .../delegates/gpu/gl/kernels/custom_registry.h | 4 ++-- .../lite/delegates/gpu/gl/kernels/registry.cc | 4 ++-- .../lite/delegates/gpu/gl/kernels/test_util.cc | 12 ++++++------ tensorflow/lite/delegates/gpu/gl/runtime.cc | 1 - .../gl/workgroups/calculator_from_metadata.cc | 4 ++-- tensorflow/lite/delegates/gpu/gl_delegate.cc | 2 +- .../delegates/gpu/metal/kernels/elementwise.cc | 6 +++--- tensorflow/lite/delegates/gpu/metal_delegate.mm | 3 ++- 38 files changed, 127 insertions(+), 100 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/BUILD b/tensorflow/lite/delegates/gpu/BUILD index 4113d34b3f8..d69bed4c03a 100644 --- a/tensorflow/lite/delegates/gpu/BUILD +++ b/tensorflow/lite/delegates/gpu/BUILD @@ -251,6 +251,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/gl:api2", ], }) + [ + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", "@com_google_absl//absl/types:span", "//tensorflow/lite:kernel_api", diff --git a/tensorflow/lite/delegates/gpu/cl/BUILD b/tensorflow/lite/delegates/gpu/cl/BUILD index ebfb2cff41b..66bcbc826ea 100644 --- a/tensorflow/lite/delegates/gpu/cl/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/BUILD @@ -388,6 +388,8 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:util", "//tensorflow/lite/delegates/gpu/common/transformations:add_bias", "//tensorflow/lite/delegates/gpu/common/transformations:merge_padding_with", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", ], ) @@ -454,6 +456,7 @@ cc_library( ":compiled_program_cache_cc_fbs", ":util", "//tensorflow/lite/delegates/gpu/common:status", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/types:span", "@farmhash_archive//:farmhash", "@flatbuffers", diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.cc b/tensorflow/lite/delegates/gpu/cl/inference_context.cc index 8e23eb1bcee..689b511bb5e 100644 --- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc +++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc @@ -21,9 +21,10 @@ limitations under the License. #include #include #include -#include #include +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "tensorflow/lite/delegates/gpu/cl/buffer.h" #include "tensorflow/lite/delegates/gpu/cl/cl_device.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h" @@ -49,7 +50,7 @@ namespace gpu { namespace cl { namespace { -bool IsReady(const std::unordered_set& ready_tensors, +bool IsReady(const absl::flat_hash_set& ready_tensors, const CLNode& node) { for (const ValueId in_id : node.inputs) { if (ready_tensors.find(in_id) == ready_tensors.end()) { @@ -325,7 +326,7 @@ absl::Status InferenceContext::ConvertOperations( inputs, outputs, node, &gpu_subgraph)); } - std::unordered_map mapping_to_global_ids; + absl::flat_hash_map mapping_to_global_ids; for (int j = 0; j < gpu_subgraph.new_tensors.size(); ++j) { const auto& t = gpu_subgraph.new_tensors[j]; auto global_id = tensor_reserver_.Add({t.first, t.second}); @@ -364,7 +365,7 @@ absl::Status InferenceContext::ConvertOperations( } void InferenceContext::Merge() { - std::unordered_set ready_tensors; + absl::flat_hash_set ready_tensors; for (const auto& input_id : input_ids_) { ready_tensors.insert(input_id); } diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.h b/tensorflow/lite/delegates/gpu/cl/inference_context.h index 3f05026b795..e26cb170228 100644 --- a/tensorflow/lite/delegates/gpu/cl/inference_context.h +++ b/tensorflow/lite/delegates/gpu/cl/inference_context.h @@ -20,9 +20,9 @@ limitations under the License. #include #include #include -#include #include +#include "absl/container/flat_hash_map.h" #include "tensorflow/lite/delegates/gpu/cl/buffer.h" #include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h" #include "tensorflow/lite/delegates/gpu/cl/environment.h" @@ -160,7 +160,7 @@ class InferenceContext { DummyTensor Get(ValueId id) { return reservations_[id]; } private: - std::unordered_map reservations_; + absl::flat_hash_map reservations_; ValueId next_; }; TensorReserver tensor_reserver_; diff --git a/tensorflow/lite/delegates/gpu/cl/program_cache.h b/tensorflow/lite/delegates/gpu/cl/program_cache.h index 21f9583a59a..81649d677f7 100644 --- a/tensorflow/lite/delegates/gpu/cl/program_cache.h +++ b/tensorflow/lite/delegates/gpu/cl/program_cache.h @@ -18,9 +18,9 @@ limitations under the License. #include #include -#include #include +#include "absl/container/flat_hash_map.h" #include "absl/types/span.h" #include "tensorflow/lite/delegates/gpu/cl/cl_context.h" #include "tensorflow/lite/delegates/gpu/cl/cl_device.h" @@ -93,8 +93,8 @@ class ProgramCache { // There is a low probability of a hash collision when cache is deserialized // because only fingerprints are serialized instead of full source code. bool use_fingerprints_ = false; - std::unordered_map + absl::flat_hash_map programs_; }; diff --git a/tensorflow/lite/delegates/gpu/common/BUILD b/tensorflow/lite/delegates/gpu/common/BUILD index ab2d5d033f7..3caee09ca7e 100644 --- a/tensorflow/lite/delegates/gpu/common/BUILD +++ b/tensorflow/lite/delegates/gpu/common/BUILD @@ -114,6 +114,7 @@ cc_library( ":shape", ":status", ":tensor", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", "//tensorflow/lite/delegates:utils", "//tensorflow/lite:context", @@ -169,6 +170,7 @@ cc_library( hdrs = ["model_transformer.h"], deps = [ ":model", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", ], ) @@ -186,6 +188,7 @@ cc_library( "//tensorflow/lite/c:common", "//tensorflow/lite/delegates:utils", "//tensorflow/lite/kernels:kernel_util", + "@com_google_absl//absl/container:flat_hash_map", ], ) @@ -198,6 +201,7 @@ cc_library( ":model", ":shape", ":status", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/types:variant", ], ) @@ -212,6 +216,7 @@ cc_library( "//tensorflow/lite/c:common", "//tensorflow/lite/kernels/internal:optimized_base", "//tensorflow/lite/kernels/internal:types", + "@com_google_absl//absl/container:flat_hash_map", ], ) diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.cc b/tensorflow/lite/delegates/gpu/common/model_builder.cc index 4c0fd827834..84622cdc294 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc @@ -22,10 +22,10 @@ limitations under the License. #include #include #include -#include #include #include +#include "absl/container/flat_hash_map.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "absl/strings/string_view.h" @@ -2884,8 +2884,8 @@ TfLiteIntArray* GetOpsToReplace(TfLiteContext* context, bool allow_quant_ops, // guarantee that the order will match the source model tensors order. absl::Status PrecreateIOTensors( TfLiteContext* context, GraphFloat32* graph, TfLiteIntArray* io_tensors, - std::unordered_map* quant_conversion_map, - std::unordered_map* tensor_to_value) { + absl::flat_hash_map* quant_conversion_map, + absl::flat_hash_map* tensor_to_value) { for (int i = 0; i < io_tensors->size; ++i) { const int tensor_index = io_tensors->data[i]; const TfLiteTensor& tflite_tensor = context->tensors[tensor_index]; @@ -2899,7 +2899,7 @@ absl::Status PrecreateIOTensors( absl::Status BuildModel(TfLiteContext* context, const TfLiteDelegateParams* delegate_params, GraphFloat32* graph, - std::unordered_map* quant_conversion_map) { + absl::flat_hash_map* quant_conversion_map) { std::vector> operations; std::vector tflite_nodes; for (int i = 0; i < delegate_params->nodes_to_replace->size; ++i) { @@ -2925,7 +2925,7 @@ absl::Status BuildModel(TfLiteContext* context, operations.push_back(std::move(op_parser)); tflite_nodes.push_back(i); } - std::unordered_map tensor_to_value; + absl::flat_hash_map tensor_to_value; RETURN_IF_ERROR(PrecreateIOTensors(context, graph, delegate_params->input_tensors, quant_conversion_map, &tensor_to_value)); @@ -2952,7 +2952,7 @@ absl::Status BuildModel(TfLiteContext* context, absl::Status BuildFinalModel( TfLiteContext* context, const TfLiteDelegateParams* delegate_params, - GraphFloat32* graph, std::unordered_map* quant_conversion_map) { + GraphFloat32* graph, absl::flat_hash_map* quant_conversion_map) { RETURN_IF_ERROR( BuildModel(context, delegate_params, graph, quant_conversion_map)); diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.h b/tensorflow/lite/delegates/gpu/common/model_builder.h index 1e5016d86b6..9d80e9636f0 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder.h +++ b/tensorflow/lite/delegates/gpu/common/model_builder.h @@ -18,8 +18,8 @@ limitations under the License. #include #include -#include +#include "absl/container/flat_hash_map.h" #include "tensorflow/lite/context.h" #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/status.h" @@ -48,7 +48,7 @@ TfLiteIntArray* GetOpsToReplace(TfLiteContext* context, absl::Status BuildModel( TfLiteContext* context, const TfLiteDelegateParams* delegate_params, GraphFloat32* graph, - std::unordered_map* quant_conversion_map = nullptr); + absl::flat_hash_map* quant_conversion_map = nullptr); // Same as above but also apply all transformations on the final graph. // Prefer using this method instead of BuildModel. @@ -62,7 +62,7 @@ absl::Status BuildModel( absl::Status BuildFinalModel( TfLiteContext* context, const TfLiteDelegateParams* delegate_params, GraphFloat32* graph, - std::unordered_map* quant_conversion_map = nullptr); + absl::flat_hash_map* quant_conversion_map = nullptr); // Module-internal converter, exposed for unit testing purpose only. absl::Status ConvertTfLiteTensorToTensorRef(const TfLiteTensor& tflite_tensor, diff --git a/tensorflow/lite/delegates/gpu/common/model_transformer.h b/tensorflow/lite/delegates/gpu/common/model_transformer.h index d82a6a687ca..fd2667390f3 100644 --- a/tensorflow/lite/delegates/gpu/common/model_transformer.h +++ b/tensorflow/lite/delegates/gpu/common/model_transformer.h @@ -18,9 +18,9 @@ limitations under the License. #include #include -#include #include +#include "absl/container/flat_hash_set.h" #include "tensorflow/lite/delegates/gpu/common/model.h" namespace tflite { @@ -126,7 +126,7 @@ class ModelTransformer { TransformationReporter* reporter_; std::deque to_process_; - std::unordered_set processed_; + absl::flat_hash_set processed_; }; class NullTransformationReporter : public TransformationReporter { diff --git a/tensorflow/lite/delegates/gpu/common/object_reader.cc b/tensorflow/lite/delegates/gpu/common/object_reader.cc index 41f3ef8ff19..c837fa061c0 100644 --- a/tensorflow/lite/delegates/gpu/common/object_reader.cc +++ b/tensorflow/lite/delegates/gpu/common/object_reader.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/object_reader.h" #include -#include +#include "absl/container/flat_hash_map.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/model_builder_helper.h" @@ -28,8 +28,8 @@ namespace tflite { namespace gpu { absl::Status ObjectReader::ReadNonConstantTensor( - TfLiteContext* context, std::unordered_map* tensor_to_value, - std::unordered_map* quant_conversion_map, GraphFloat32* graph, + TfLiteContext* context, absl::flat_hash_map* tensor_to_value, + absl::flat_hash_map* quant_conversion_map, GraphFloat32* graph, uint32_t tensor_idx, Value** value) { if (tensor_idx >= context->tensors_size) { return absl::OutOfRangeError( diff --git a/tensorflow/lite/delegates/gpu/common/object_reader.h b/tensorflow/lite/delegates/gpu/common/object_reader.h index be9a89e1b4e..246bc71f9c5 100644 --- a/tensorflow/lite/delegates/gpu/common/object_reader.h +++ b/tensorflow/lite/delegates/gpu/common/object_reader.h @@ -17,8 +17,8 @@ limitations under the License. #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_OBJECT_READER_H_ #include -#include +#include "absl/container/flat_hash_map.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/model_builder_helper.h" @@ -34,14 +34,14 @@ namespace gpu { class ObjectReader { public: static absl::Status ReadNonConstantTensor( - TfLiteContext* context, std::unordered_map* tensor_to_value, - std::unordered_map* quant_conversion_map, GraphFloat32* graph, + TfLiteContext* context, absl::flat_hash_map* tensor_to_value, + absl::flat_hash_map* quant_conversion_map, GraphFloat32* graph, uint32_t tensor_idx, Value** value = nullptr); ObjectReader(GraphFloat32* graph, TfLiteContext* context, const TfLiteNode* node, - std::unordered_map* tensor_to_value, - std::unordered_map* quant_conversion_map = nullptr) + absl::flat_hash_map* tensor_to_value, + absl::flat_hash_map* quant_conversion_map = nullptr) : graph_(graph), context_(context), node_(node), @@ -98,8 +98,8 @@ class ObjectReader { GraphFloat32* graph_; TfLiteContext* context_; const TfLiteNode* node_; - std::unordered_map* tensor_to_value_; - std::unordered_map* quant_conversion_map_; + absl::flat_hash_map* tensor_to_value_; + absl::flat_hash_map* quant_conversion_map_; }; } // namespace gpu diff --git a/tensorflow/lite/delegates/gpu/common/operations.cc b/tensorflow/lite/delegates/gpu/common/operations.cc index 245a5a80639..fbffe9d65ff 100644 --- a/tensorflow/lite/delegates/gpu/common/operations.cc +++ b/tensorflow/lite/delegates/gpu/common/operations.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/operations.h" #include -#include +#include "absl/container/flat_hash_map.h" #include "tensorflow/lite/delegates/gpu/common/shape.h" #include "tensorflow/lite/delegates/gpu/common/status.h" @@ -165,7 +165,7 @@ std::string ToString(enum OperationType op) { OperationType OperationTypeFromString(const std::string& name) { static const auto operations = - new std::unordered_map({ + new absl::flat_hash_map({ {"abs", OperationType::ABS}, {"add", OperationType::ADD}, {"batch_normalization", OperationType::BATCH_NORMALIZATION}, diff --git a/tensorflow/lite/delegates/gpu/common/quantization_util.cc b/tensorflow/lite/delegates/gpu/common/quantization_util.cc index 9584d1d98ec..fe92989a3ae 100644 --- a/tensorflow/lite/delegates/gpu/common/quantization_util.cc +++ b/tensorflow/lite/delegates/gpu/common/quantization_util.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/quantization_util.h" +#include "absl/container/flat_hash_map.h" #include "tensorflow/lite/builtin_ops.h" #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h" #include "tensorflow/lite/kernels/internal/types.h" @@ -22,8 +23,9 @@ limitations under the License. namespace tflite { namespace gpu { namespace { -void DequantizeInput(TfLiteContext* context, int input_index, - const std::unordered_map& quant_conversion_map) { +void DequantizeInput( + TfLiteContext* context, int input_index, + const absl::flat_hash_map& quant_conversion_map) { if (quant_conversion_map.find(input_index) == quant_conversion_map.end()) { return; } @@ -50,7 +52,7 @@ void DequantizeInput(TfLiteContext* context, int input_index, } void QuantizeOutput(TfLiteContext* context, int output_index, - const std::unordered_map& quant_conversion_map) { + const absl::flat_hash_map& quant_conversion_map) { if (quant_conversion_map.find(output_index) == quant_conversion_map.end()) { return; } @@ -80,7 +82,7 @@ void QuantizeOutput(TfLiteContext* context, int output_index, absl::Status DequantizeInputs( TfLiteContext* context, const std::vector& input_indices, - const std::unordered_map& quant_conversion_map) { + const absl::flat_hash_map& quant_conversion_map) { for (auto index : input_indices) { DequantizeInput(context, static_cast(index), quant_conversion_map); } @@ -89,7 +91,7 @@ absl::Status DequantizeInputs( absl::Status DequantizeInputs( TfLiteContext* context, const std::vector& input_indices, - const std::unordered_map& quant_conversion_map) { + const absl::flat_hash_map& quant_conversion_map) { for (auto index : input_indices) { DequantizeInput(context, static_cast(index), quant_conversion_map); } @@ -98,7 +100,7 @@ absl::Status DequantizeInputs( absl::Status QuantizeOutputs( TfLiteContext* context, const std::vector& output_indices, - const std::unordered_map& quant_conversion_map) { + const absl::flat_hash_map& quant_conversion_map) { for (auto index : output_indices) { QuantizeOutput(context, static_cast(index), quant_conversion_map); } @@ -108,7 +110,7 @@ absl::Status QuantizeOutputs( absl::Status QuantizeOutputs( TfLiteContext* context, const std::vector& output_indices, - const std::unordered_map& quant_conversion_map) { + const absl::flat_hash_map& quant_conversion_map) { for (auto index : output_indices) { QuantizeOutput(context, static_cast(index), quant_conversion_map); } diff --git a/tensorflow/lite/delegates/gpu/common/quantization_util.h b/tensorflow/lite/delegates/gpu/common/quantization_util.h index 26512531f29..fc01d612d6f 100644 --- a/tensorflow/lite/delegates/gpu/common/quantization_util.h +++ b/tensorflow/lite/delegates/gpu/common/quantization_util.h @@ -16,9 +16,9 @@ limitations under the License. #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_QUANTIZATION_UTIL_H_ #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_QUANTIZATION_UTIL_H_ -#include #include +#include "absl/container/flat_hash_map.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/delegates/gpu/common/status.h" @@ -32,11 +32,11 @@ namespace gpu { // tensor and its original quantized one. absl::Status DequantizeInputs( TfLiteContext* context, const std::vector& input_indices, - const std::unordered_map& quant_conversion_map); + const absl::flat_hash_map& quant_conversion_map); absl::Status DequantizeInputs( TfLiteContext* context, const std::vector& input_indices, - const std::unordered_map& quant_conversion_map); + const absl::flat_hash_map& quant_conversion_map); // Quantizes output tensors post-inference, leaving float tensors intact. // output_indices contains (fp32) inputs to be quantized, which are outputs of @@ -45,11 +45,11 @@ absl::Status DequantizeInputs( // tensor and its original quantized one. absl::Status QuantizeOutputs( TfLiteContext* context, const std::vector& output_indices, - const std::unordered_map& quant_conversion_map); + const absl::flat_hash_map& quant_conversion_map); absl::Status QuantizeOutputs( TfLiteContext* context, const std::vector& output_indices, - const std::unordered_map& quant_conversion_map); + const absl::flat_hash_map& quant_conversion_map); } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/common/quantization_util_test.cc b/tensorflow/lite/delegates/gpu/common/quantization_util_test.cc index 064a2a2e6b2..b5cdaec91e0 100644 --- a/tensorflow/lite/delegates/gpu/common/quantization_util_test.cc +++ b/tensorflow/lite/delegates/gpu/common/quantization_util_test.cc @@ -151,7 +151,7 @@ TEST(DequantizeInputs, Int8) { PopulateContext(tensors, context); std::vector input_indices = {1}; - std::unordered_map quant_conversion_map = {{1, 0}}; + absl::flat_hash_map quant_conversion_map = {{1, 0}}; auto status = DequantizeInputs(&context, input_indices, quant_conversion_map); EXPECT_TRUE(status.ok()); @@ -176,7 +176,7 @@ TEST(DequantizeInputs, UInt8) { PopulateContext(tensors, context); std::vector input_indices = {1}; - std::unordered_map quant_conversion_map = {{1, 0}}; + absl::flat_hash_map quant_conversion_map = {{1, 0}}; auto status = DequantizeInputs(&context, input_indices, quant_conversion_map); EXPECT_TRUE(status.ok()); @@ -199,7 +199,7 @@ TEST(QuantizeOutputs, Int8) { PopulateContext(tensors, context); std::vector output_indices = {0}; - std::unordered_map quant_conversion_map = {{0, 1}}; + absl::flat_hash_map quant_conversion_map = {{0, 1}}; auto status = QuantizeOutputs(&context, output_indices, quant_conversion_map); EXPECT_TRUE(status.ok()); @@ -221,7 +221,7 @@ TEST(QuantizeOutputs, UInt8) { PopulateContext(tensors, context); std::vector output_indices = {0}; - std::unordered_map quant_conversion_map = {{0, 1}}; + absl::flat_hash_map quant_conversion_map = {{0, 1}}; auto status = QuantizeOutputs(&context, output_indices, quant_conversion_map); EXPECT_TRUE(status.ok()); diff --git a/tensorflow/lite/delegates/gpu/delegate.cc b/tensorflow/lite/delegates/gpu/delegate.cc index 0f2d9811633..bfc2b7f08c4 100644 --- a/tensorflow/lite/delegates/gpu/delegate.cc +++ b/tensorflow/lite/delegates/gpu/delegate.cc @@ -18,9 +18,9 @@ limitations under the License. #include #include #include // NOLINT(build/c++11) -#include #include +#include "absl/container/flat_hash_map.h" #include "absl/memory/memory.h" #include "absl/types/span.h" #include "tensorflow/lite/builtin_ops.h" @@ -350,7 +350,7 @@ class DelegateKernel { // Whenever quantized inference is enabled, this maps the tensor index of each // originally quantized (8-bit) tensor to its float version added in // model_builder - and vice versa. - std::unordered_map quant_conversion_map_; + absl::flat_hash_map quant_conversion_map_; std::thread::id thread_id_prepare_; // thread id used for Prapare() bool enforce_same_thread_ = false; // flag to enforce same thread for Invoke }; diff --git a/tensorflow/lite/delegates/gpu/gl/BUILD b/tensorflow/lite/delegates/gpu/gl/BUILD index 91472261d04..d39f5e3c34a 100644 --- a/tensorflow/lite/delegates/gpu/gl/BUILD +++ b/tensorflow/lite/delegates/gpu/gl/BUILD @@ -29,6 +29,7 @@ cc_library( ":runtime_options", ":stats", ":variable", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "//tensorflow/lite/delegates/gpu/common:model", @@ -66,6 +67,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/gl/kernels:converter", "//tensorflow/lite/delegates/gpu/gl/kernels:registry", "//tensorflow/lite/delegates/gpu/gl/workgroups:default_calculator", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", "@com_google_absl//absl/types:span", ], @@ -125,6 +127,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/gl/compiler:fuse_inplace", "//tensorflow/lite/delegates/gpu/gl/compiler:shader_code", "//tensorflow/lite/delegates/gpu/gl/compiler:shader_codegen", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", "@com_google_absl//absl/types:any", ], diff --git a/tensorflow/lite/delegates/gpu/gl/api.cc b/tensorflow/lite/delegates/gpu/gl/api.cc index 0240a5cfbed..f50b8cb5d5c 100644 --- a/tensorflow/lite/delegates/gpu/gl/api.cc +++ b/tensorflow/lite/delegates/gpu/gl/api.cc @@ -19,10 +19,10 @@ limitations under the License. #include #include #include // NOLINT -#include #include #include +#include "absl/container/flat_hash_map.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" #include "tensorflow/lite/delegates/gpu/common/model.h" @@ -46,7 +46,7 @@ namespace gpu { namespace gl { namespace { -using ObjectsSizes = std::unordered_map; +using ObjectsSizes = absl::flat_hash_map; enum class InferenceContextState { NOT_STARTED, @@ -313,7 +313,7 @@ class CompiledModelImpl full_shaders[shader.second] = shader.first; } - std::unordered_map partial_shader_to_index; + absl::flat_hash_map partial_shader_to_index; std::vector partial_shaders; for (const auto& program : programs_) { // Remove a header from a shader. @@ -366,16 +366,16 @@ class CompiledModelImpl std::vector shaders_; // Shaders are serialized in order of their indices. - std::unordered_map shader_to_index_; + absl::flat_hash_map shader_to_index_; std::deque programs_; - std::unordered_map object_sizes_; + absl::flat_hash_map object_sizes_; CompilerStats stats_; }; } // namespace absl::Status Compile(const CompilationOptions& options, const GraphFloat32& model, - const std::unordered_set& tflite_graph_io, + const std::unordered_set& tflite_graph_io, // NOLINT const NodeShader& node_shader, const WorkgroupsCalculator& workgroup_calculator, std::unique_ptr* compiled_model) { diff --git a/tensorflow/lite/delegates/gpu/gl/api.h b/tensorflow/lite/delegates/gpu/gl/api.h index c37eb9b7772..11498243757 100644 --- a/tensorflow/lite/delegates/gpu/gl/api.h +++ b/tensorflow/lite/delegates/gpu/gl/api.h @@ -67,7 +67,7 @@ class CompiledModel { // Turns the given model into "compiled" form that is suitable for inference. absl::Status Compile(const CompilationOptions& options, const GraphFloat32& model, - const std::unordered_set& tflite_graph_io, + const std::unordered_set& tflite_graph_io, // NOLINT const NodeShader& node_shader, const WorkgroupsCalculator& workgroup_calculator, std::unique_ptr* compiled_model); diff --git a/tensorflow/lite/delegates/gpu/gl/api2.cc b/tensorflow/lite/delegates/gpu/gl/api2.cc index c8bf6dd063a..c12463800a9 100644 --- a/tensorflow/lite/delegates/gpu/gl/api2.cc +++ b/tensorflow/lite/delegates/gpu/gl/api2.cc @@ -18,10 +18,10 @@ limitations under the License. #include #include #include -#include #include #include +#include "absl/container/flat_hash_map.h" #include "absl/memory/memory.h" #include "absl/types/span.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" @@ -542,7 +542,7 @@ class InferenceBuilderImpl : public InferenceBuilder { auto workgroup_calculator = NewDefaultWorkgroupsCalculator(*gpu_info_); auto external_objects = absl::make_unique(); std::vector shaders; - std::unordered_map shader_to_index; + absl::flat_hash_map shader_to_index; RuntimeOptions runtime_options; auto runtime = absl::make_unique(runtime_options, *gpu_info_, diff --git a/tensorflow/lite/delegates/gpu/gl/compiler.cc b/tensorflow/lite/delegates/gpu/gl/compiler.cc index d316505a0e0..eba25171ca3 100644 --- a/tensorflow/lite/delegates/gpu/gl/compiler.cc +++ b/tensorflow/lite/delegates/gpu/gl/compiler.cc @@ -21,6 +21,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "absl/memory/memory.h" #include "absl/types/any.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" @@ -102,9 +103,10 @@ class CompilerImpl : public Compiler { } } - absl::Status Compile(const GraphFloat32& graph, - const std::unordered_set& tflite_graph_io, - const ShaderCodeCallback& callback) final { + absl::Status Compile( + const GraphFloat32& graph, + const std::unordered_set& tflite_graph_io, // NOLINT + const ShaderCodeCallback& callback) final { // It is important to have ids in a compiled graph identical to the given // graph. RETURN_IF_ERROR(graph.MakeExactCopy(&compiled_graph_)); @@ -158,7 +160,7 @@ class CompilerImpl : public Compiler { } // Prepare internal objects. - std::unordered_map objects; + absl::flat_hash_map objects; for (auto value : compiled_graph_.values()) { Object object = MakePHWC4Ref(value->id, value->tensor.shape); object.data_type = value->tensor.type; diff --git a/tensorflow/lite/delegates/gpu/gl/compiler.h b/tensorflow/lite/delegates/gpu/gl/compiler.h index 7769890b769..03ea3dd2a90 100644 --- a/tensorflow/lite/delegates/gpu/gl/compiler.h +++ b/tensorflow/lite/delegates/gpu/gl/compiler.h @@ -40,9 +40,10 @@ class Compiler { // Goes over a graph and generates OpenGL shaders for the given graph. // Callback is called for every generated shader. Callback may execute shaders // as they come or store them elsewhere to execute later. - virtual absl::Status Compile(const GraphFloat32& graph, - const std::unordered_set& tflite_graph_io, - const ShaderCodeCallback& callback) = 0; + virtual absl::Status Compile( + const GraphFloat32& graph, + const std::unordered_set& tflite_graph_io, // NOLINT + const ShaderCodeCallback& callback) = 0; }; std::unique_ptr NewCompiler( diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/BUILD b/tensorflow/lite/delegates/gpu/gl/compiler/BUILD index 601e809fffa..f62f48750bd 100644 --- a/tensorflow/lite/delegates/gpu/gl/compiler/BUILD +++ b/tensorflow/lite/delegates/gpu/gl/compiler/BUILD @@ -38,6 +38,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:data_type", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/gl:object", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/types:variant", @@ -101,6 +102,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/gl:node_shader", "//tensorflow/lite/delegates/gpu/gl:object", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", ], ) @@ -150,6 +152,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/gl:node_shader", "//tensorflow/lite/delegates/gpu/gl:object", "//tensorflow/lite/delegates/gpu/gl:variable", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", ], ) @@ -164,6 +167,7 @@ cc_library( "//tensorflow/lite/delegates/gpu/common:model_transformer", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:types", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:any", "@com_google_absl//absl/types:variant", @@ -193,6 +197,7 @@ cc_library( ":preprocessor", "//tensorflow/lite/delegates/gpu/common:types", "//tensorflow/lite/delegates/gpu/gl:variable", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/types:variant", diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/compiled_node.cc b/tensorflow/lite/delegates/gpu/gl/compiler/compiled_node.cc index 4048a07d087..035fce56d31 100644 --- a/tensorflow/lite/delegates/gpu/gl/compiler/compiled_node.cc +++ b/tensorflow/lite/delegates/gpu/gl/compiler/compiled_node.cc @@ -15,8 +15,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/gl/compiler/compiled_node.h" -#include - +#include "absl/container/flat_hash_set.h" #include "absl/strings/str_cat.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/gpu/gl/compiler/rename.h" @@ -28,7 +27,7 @@ namespace gl { absl::Status MergeCode(CompiledNodeAttributes* attr, CompiledNodeAttributes* merged_attr) { // build a map of known names. - std::unordered_set known_names; + absl::flat_hash_set known_names; for (const auto& parameter : merged_attr->code.parameters) { known_names.insert(parameter.name); } diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/fuse_auto_input.cc b/tensorflow/lite/delegates/gpu/gl/compiler/fuse_auto_input.cc index d0408c6a7be..36d8fa8c1c7 100644 --- a/tensorflow/lite/delegates/gpu/gl/compiler/fuse_auto_input.cc +++ b/tensorflow/lite/delegates/gpu/gl/compiler/fuse_auto_input.cc @@ -16,9 +16,9 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/gl/compiler/fuse_auto_input.h" #include -#include #include +#include "absl/container/flat_hash_set.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_replace.h" #include "absl/types/any.h" @@ -102,7 +102,7 @@ TransformResult FuseAutoInput::ApplyToNode(Node* node, GraphFloat32* graph) { // Skip fusions which will result in duplicate inputs, e.g. diamond shapes. { - std::unordered_set all_inputs; + absl::flat_hash_set all_inputs; for (const auto& node_to_fuse : nodes_to_fuse) { for (const auto& input : graph->FindInputs(node_to_fuse.first->id)) { if (all_inputs.find(input->id) != all_inputs.end()) { diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/object_accessor.h b/tensorflow/lite/delegates/gpu/gl/compiler/object_accessor.h index 78e7a2f1e17..5c4de49c44b 100644 --- a/tensorflow/lite/delegates/gpu/gl/compiler/object_accessor.h +++ b/tensorflow/lite/delegates/gpu/gl/compiler/object_accessor.h @@ -17,9 +17,9 @@ limitations under the License. #define TENSORFLOW_LITE_DELEGATES_GPU_GL_COMPILER_OBJECT_ACCESSOR_H_ #include -#include #include +#include "absl/container/flat_hash_map.h" #include "tensorflow/lite/delegates/gpu/gl/compiler/preprocessor.h" #include "tensorflow/lite/delegates/gpu/gl/compiler/variable_accessor.h" #include "tensorflow/lite/delegates/gpu/gl/object.h" @@ -85,7 +85,7 @@ class ObjectAccessor : public InlineRewrite { RewriteStatus RewriteWrite(absl::string_view location, absl::string_view value, std::string* output); - std::unordered_map name_to_object_; + absl::flat_hash_map name_to_object_; const bool is_mali_; const bool sampler_textures_; diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/rename.cc b/tensorflow/lite/delegates/gpu/gl/compiler/rename.cc index 956f6afae28..b41ba473b85 100644 --- a/tensorflow/lite/delegates/gpu/gl/compiler/rename.cc +++ b/tensorflow/lite/delegates/gpu/gl/compiler/rename.cc @@ -16,10 +16,10 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/gl/compiler/rename.h" #include -#include #include #include +#include "absl/container/flat_hash_map.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "absl/strings/str_split.h" @@ -86,7 +86,7 @@ class VariableRewriter : public InlineRewrite { const std::string inline_delimiter_; const NameFunctor name_func_; - std::unordered_map name_to_variable_; + absl::flat_hash_map name_to_variable_; }; // Rewrites names of all objects according to returned values from the @@ -168,7 +168,7 @@ class ObjectRewriter : public InlineRewrite { const std::string inline_delimiter_; const NameFunctor name_func_; - std::unordered_map> + absl::flat_hash_map> name_to_object_; }; diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/variable_accessor.h b/tensorflow/lite/delegates/gpu/gl/compiler/variable_accessor.h index c9946a00395..db4b031548b 100644 --- a/tensorflow/lite/delegates/gpu/gl/compiler/variable_accessor.h +++ b/tensorflow/lite/delegates/gpu/gl/compiler/variable_accessor.h @@ -16,11 +16,11 @@ limitations under the License. #ifndef TENSORFLOW_LITE_DELEGATES_GPU_GL_COMPILER_VARIABLE_ACCESSOR_H_ #define TENSORFLOW_LITE_DELEGATES_GPU_GL_COMPILER_VARIABLE_ACCESSOR_H_ -#include -#include #include +#include #include +#include "absl/container/flat_hash_map.h" #include "tensorflow/lite/delegates/gpu/gl/compiler/preprocessor.h" #include "tensorflow/lite/delegates/gpu/gl/variable.h" @@ -72,7 +72,7 @@ class VariableAccessor : public InlineRewrite { private: const bool inline_values_; const bool vulkan_support_; - std::unordered_map name_to_variable_; + absl::flat_hash_map name_to_variable_; std::set shared_variables_; std::set uniform_parameters_; }; diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/BUILD b/tensorflow/lite/delegates/gpu/gl/kernels/BUILD index a367a60ba41..774b6755014 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/gl/kernels/BUILD @@ -155,7 +155,10 @@ cc_library( name = "custom_registry", srcs = ["custom_registry.cc"], hdrs = ["custom_registry.h"], - deps = ["//tensorflow/lite/delegates/gpu/gl:node_shader"], + deps = [ + "//tensorflow/lite/delegates/gpu/gl:node_shader", + "@com_google_absl//absl/container:flat_hash_map", + ], ) cc_library( @@ -774,6 +777,7 @@ cc_library( "//conditions:default": NON_TFLITE_GPU_BINARY_RELEASE_OPERATORS, }) + [ ":custom_registry", + "@com_google_absl//absl/container:flat_hash_map", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:status", "//tensorflow/lite/delegates/gpu/gl:node_shader", diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/custom_registry.cc b/tensorflow/lite/delegates/gpu/gl/kernels/custom_registry.cc index f5c5429e867..a01e885adef 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/custom_registry.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/custom_registry.cc @@ -17,15 +17,16 @@ limitations under the License. #include #include -#include #include +#include "absl/container/flat_hash_map.h" + namespace tflite { namespace gpu { namespace gl { void RegisterCustomOps( - std::unordered_map>>* + absl::flat_hash_map>>* shaders) {} } // namespace gl diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/custom_registry.h b/tensorflow/lite/delegates/gpu/gl/kernels/custom_registry.h index 9a979a982db..7b2a841bca9 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/custom_registry.h +++ b/tensorflow/lite/delegates/gpu/gl/kernels/custom_registry.h @@ -18,9 +18,9 @@ limitations under the License. #include #include -#include #include +#include "absl/container/flat_hash_map.h" #include "tensorflow/lite/delegates/gpu/gl/node_shader.h" namespace tflite { @@ -29,7 +29,7 @@ namespace gl { // Registers custom operations. void RegisterCustomOps( - std::unordered_map>>* + absl::flat_hash_map>>* shaders_); } // namespace gl diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc b/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc index da6aad720a2..645e5b6c728 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/registry.cc @@ -18,10 +18,10 @@ limitations under the License. #include #include #include -#include #include #include +#include "absl/container/flat_hash_map.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" @@ -139,7 +139,7 @@ class Registry : public NodeShader { } private: - std::unordered_map>> + absl::flat_hash_map>> shaders_; }; diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/test_util.cc b/tensorflow/lite/delegates/gpu/gl/kernels/test_util.cc index e9abec7eec6..21a53acd9c9 100644 --- a/tensorflow/lite/delegates/gpu/gl/kernels/test_util.cc +++ b/tensorflow/lite/delegates/gpu/gl/kernels/test_util.cc @@ -17,10 +17,10 @@ limitations under the License. #include #include -#include -#include #include +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/operations.h" #include "tensorflow/lite/delegates/gpu/common/status.h" @@ -78,7 +78,7 @@ absl::Status SingleOpModel::Invoke(const CompilationOptions& compile_options, // Create buffers for input tensors. { - std::unordered_map tensor_to_id; + absl::flat_hash_map tensor_to_id; for (const auto* input : graph_.inputs()) { tensor_to_id[input->tensor.ref] = input->id; } @@ -101,9 +101,9 @@ absl::Status SingleOpModel::Invoke(const CompilationOptions& compile_options, GpuInfo gpu_info; RETURN_IF_ERROR(RequestGpuInfo(&gpu_info)); std::unique_ptr compiled_model; - RETURN_IF_ERROR(Compile( - compile_options, graph_, /*tflite_graph_io=*/std::unordered_set(), - shader, *NewDefaultWorkgroupsCalculator(gpu_info), &compiled_model)); + RETURN_IF_ERROR(Compile(compile_options, graph_, /*tflite_graph_io=*/{}, + shader, *NewDefaultWorkgroupsCalculator(gpu_info), + &compiled_model)); // Get inference context. auto command_queue = NewCommandQueue(gpu_info); diff --git a/tensorflow/lite/delegates/gpu/gl/runtime.cc b/tensorflow/lite/delegates/gpu/gl/runtime.cc index b7e01a33570..7f0cbe0284b 100644 --- a/tensorflow/lite/delegates/gpu/gl/runtime.cc +++ b/tensorflow/lite/delegates/gpu/gl/runtime.cc @@ -17,7 +17,6 @@ limitations under the License. #include #include -#include #include #include "absl/strings/str_cat.h" diff --git a/tensorflow/lite/delegates/gpu/gl/workgroups/calculator_from_metadata.cc b/tensorflow/lite/delegates/gpu/gl/workgroups/calculator_from_metadata.cc index 7976fd54ed0..8a269e7cf25 100644 --- a/tensorflow/lite/delegates/gpu/gl/workgroups/calculator_from_metadata.cc +++ b/tensorflow/lite/delegates/gpu/gl/workgroups/calculator_from_metadata.cc @@ -18,8 +18,8 @@ limitations under the License. #ifndef TFLITE_GPU_BINARY_RELEASE #include -#include +#include "absl/container/flat_hash_map.h" #include "absl/memory/memory.h" #include "flatbuffers/flatbuffers.h" // from @flatbuffers #include "tensorflow/lite/delegates/gpu/common/gpu_info.h" @@ -62,7 +62,7 @@ class WorkgroupsCalculatorFromMetadata : public WorkgroupsCalculator { } private: - std::unordered_map workgroups_; + absl::flat_hash_map workgroups_; std::unique_ptr default_calculator_; }; diff --git a/tensorflow/lite/delegates/gpu/gl_delegate.cc b/tensorflow/lite/delegates/gpu/gl_delegate.cc index 0587cb4f3a3..2f25539802a 100644 --- a/tensorflow/lite/delegates/gpu/gl_delegate.cc +++ b/tensorflow/lite/delegates/gpu/gl_delegate.cc @@ -160,7 +160,7 @@ class Delegate { tensors_[value->id] = {value->tensor.shape, 0}; } - std::unordered_set tflite_graph_io; + std::unordered_set tflite_graph_io; // NOLINT // Prepare graph inputs. // diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/elementwise.cc b/tensorflow/lite/delegates/gpu/metal/kernels/elementwise.cc index 53c1c5b38dd..9edfc884638 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/elementwise.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/elementwise.cc @@ -16,9 +16,9 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/metal/kernels/elementwise.h" #include -#include #include +#include "absl/container/flat_hash_map.h" #include "absl/strings/substitute.h" #include "tensorflow/lite/delegates/gpu/common/convert.h" #include "tensorflow/lite/delegates/gpu/common/operations.h" @@ -32,7 +32,7 @@ namespace metal { namespace { std::string OneInputFunctor(OperationType op_type, const std::string& value) { - const std::unordered_map functors{ + const absl::flat_hash_map functors{ {OperationType::ABS, "abs($0)"}, {OperationType::SIN, "sin($0)"}, {OperationType::HARD_SWISH, @@ -62,7 +62,7 @@ std::string OneInputFunctor(OperationType op_type, const std::string& value) { std::string TwoInputFunctor(OperationType op_type, const std::string& value0, const std::string& value1) { - const std::unordered_map functors{ + const absl::flat_hash_map functors{ {OperationType::ADD, "$0 + $1"}, {OperationType::DIV, "$0 / $1"}, {OperationType::MAXIMUM, "max($0, $1)"}, diff --git a/tensorflow/lite/delegates/gpu/metal_delegate.mm b/tensorflow/lite/delegates/gpu/metal_delegate.mm index 45bfe1f3b2f..c2e5289c604 100644 --- a/tensorflow/lite/delegates/gpu/metal_delegate.mm +++ b/tensorflow/lite/delegates/gpu/metal_delegate.mm @@ -26,6 +26,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_set.h" #include "absl/types/span.h" #include "tensorflow/lite/builtin_ops.h" #include "tensorflow/lite/c/common.h" @@ -613,7 +614,7 @@ class Delegate { // Whenever quantized inference is enabled, this maps the tensor index of each // originally quantized (8-bit) tensor to its float version added in // model_builder - and vice versa. - std::unordered_map quant_conversion_map_; + absl::flat_hash_map quant_conversion_map_; TFLInferenceContext* inference_context_; // input and output buffers are passed into Metal inference engine From 2e4bf2c0bcd506a1ee20388dab498e75b2ca0826 Mon Sep 17 00:00:00 2001 From: YoungSeok Yoon Date: Thu, 6 Aug 2020 16:32:28 -0700 Subject: [PATCH 2303/2522] Prepend "TensorFlowLiteC/" to common.h path in delegate subspecs PiperOrigin-RevId: 325335212 Change-Id: I6a932d12d84d308c0083db97aed23817c40e7f30 --- tensorflow/lite/experimental/ios/BUILD.apple | 13 +++++++++---- tensorflow/lite/experimental/ios/ios.bzl | 7 ++++--- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tensorflow/lite/experimental/ios/BUILD.apple b/tensorflow/lite/experimental/ios/BUILD.apple index 99ea2f8acbb..e1e3be2bcde 100644 --- a/tensorflow/lite/experimental/ios/BUILD.apple +++ b/tensorflow/lite/experimental/ios/BUILD.apple @@ -24,13 +24,20 @@ sh_binary( ) strip_common_include_path_prefix( - name = "strip_common_include_path", + name = "strip_common_include_path_core", hdr_labels = [ "//tensorflow/lite/c:c_api.h", - "//tensorflow/lite/delegates/gpu:metal_delegate.h", "//tensorflow/lite/delegates/xnnpack:xnnpack_delegate.h", + ], +) + +strip_common_include_path_prefix( + name = "strip_common_include_path_subspecs", + hdr_labels = [ + "//tensorflow/lite/delegates/gpu:metal_delegate.h", "//tensorflow/lite/experimental/delegates/coreml:coreml_delegate.h", ], + prefix = "TensorFlowLiteC/", ) # bazel build -c opt --config=ios_fat //tensorflow/lite/experimental/ios:TensorFlowLiteC_framework @@ -77,7 +84,6 @@ tflite_ios_static_framework( name = "TensorFlowLiteCCoreML_framework", hdrs = [ ":coreml_delegate.h", - "//tensorflow/lite/c:common.h", ], allowlist_symbols_file = ":allowlist_TensorFlowLiteCCoreML.txt", bundle_name = "TensorFlowLiteCCoreML", @@ -97,7 +103,6 @@ tflite_ios_static_framework( name = "TensorFlowLiteCMetal_framework", hdrs = [ ":metal_delegate.h", - "//tensorflow/lite/c:common.h", ], allowlist_symbols_file = ":allowlist_TensorFlowLiteCMetal.txt", bundle_name = "TensorFlowLiteCMetal", diff --git a/tensorflow/lite/experimental/ios/ios.bzl b/tensorflow/lite/experimental/ios/ios.bzl index 43ca6ec6010..63747eb8d1a 100644 --- a/tensorflow/lite/experimental/ios/ios.bzl +++ b/tensorflow/lite/experimental/ios/ios.bzl @@ -76,13 +76,14 @@ def tflite_ios_static_framework( # to the "Headers" directory with no header path prefixes. This auxiliary rule # is used for stripping the path prefix to the "common.h" file included by the # "c_api.h" header. -def strip_common_include_path_prefix(name, hdr_labels): +def strip_common_include_path_prefix(name, hdr_labels, prefix = ""): """Create modified header files with the common.h include path stripped out. Args: name: The name to be used as a prefix to the generated genrules. hdr_labels: List of header labels to strip out the include path. Each label must end with a colon followed by the header file name. + prefix: Optional prefix path to prepend to the common.h inclusion path. """ for hdr_label in hdr_labels: @@ -94,8 +95,8 @@ def strip_common_include_path_prefix(name, hdr_labels): srcs = [hdr_label], outs = [hdr_filename], cmd = """ - sed 's|#include ".*common.h"|#include "common.h"|'\ + sed 's|#include ".*common.h"|#include "{}common.h"|'\ "$(location {})"\ > "$@" - """.format(hdr_label), + """.format(prefix, hdr_label), ) From 1db5e0f3233230cd99b4877d8a11fb96f16aac36 Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Thu, 6 Aug 2020 16:57:28 -0700 Subject: [PATCH 2304/2522] Update ReplicateToIslandPass in preparation for cloning functions (NFC). Pass is now converted to a module pass and some pointers are replaced with references. Modification to replicate variant ops are now under one function. PiperOrigin-RevId: 325339815 Change-Id: I1419ea05c808b471701189698ceb965556b81ae8 --- .../mlir/tensorflow/transforms/bridge.cc | 3 +- .../mlir/tensorflow/transforms/passes.h | 2 +- .../transforms/replicate_to_island.cc | 172 ++++++++++-------- 3 files changed, 95 insertions(+), 82 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc index ed0528ae054..2a5c8a05ef3 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc @@ -47,7 +47,8 @@ void AddGraphExportLoweringPasses(OpPassManager &pm) { pm.addNestedPass(CreateFunctionalToExecutorDialectConversionPass()); add_pass(TFDevice::CreateParallelizeEmbeddingParamsOpsPass()); - add_pass(TFDevice::CreateReplicateToIslandPass()); + pm.addPass(TFDevice::CreateReplicateToIslandPass()); + pm.addPass(CreateBreakUpIslandsPass()); add_pass(TFDevice::CreateParallelExecuteToIslandsPass()); add_pass(TFDevice::CreateLaunchToDeviceAttributePass()); } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h index 3afadd2b06d..3be6c9e1a70 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h +++ b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h @@ -239,7 +239,7 @@ std::unique_ptr> CreateReplicateInvariantOpHoistingPass(); // Creates a pass that forms replica `tf_executor.island` from a single // `tf_device.replicate` island. -std::unique_ptr> CreateReplicateToIslandPass(); +std::unique_ptr> CreateReplicateToIslandPass(); // Creates a pass that creates `tf_executor.island` from a single // `tf_device.parallel_execute` island. diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc b/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc index fcf0bb98a61..e7f2977dbcd 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc @@ -49,8 +49,8 @@ constexpr char kReplicaIdAttr[] = "_xla_replica_id"; constexpr char kDeviceOrdinalAttr[] = "device_ordinal"; struct ReplicateToIslandPass - : public PassWrapper { - void runOnFunction() override; + : public PassWrapper> { + void runOnOperation() override; }; // Returns whether op requires `_xla_replica_id` attribute. @@ -64,29 +64,62 @@ bool RequiresDeviceOrdinalAttribute(Operation* op) { llvm::isa(op); } -// Adds integer attribute that represents replica id for replicated ops that -// require replica id attribute. -void AddReplicaIdToOpsInReplicatedRegion(OpBuilder* builder, Region* region, - const int replica_id) { - region->walk([&](Operation* replicated_op) { - if (RequiresReplicaIDAttribute(replicated_op)) - replicated_op->setAttr(kReplicaIdAttr, - builder->getI32IntegerAttr(replica_id)); +// Updates replica variant ops in a region based on replica `replica_id`. +// TODO(b/157624749): Replace this with better abstraction to differentiate ops +// for different replicas. Some ops, such as XlaHostCompute op or TPU Embedding +// ops, require replica id to be added as an op attribute to be used during +// execution. Handle such ops separately and add an integer attribute that +// represents replica id. +LogicalResult UpdateRegionReplicateVariantOps( + OpBuilder& builder, Location loc, Region& region, int replica_id, + const llvm::Optional& devices) { + int64_t device_ordinal = -1; + const bool has_devices = devices.hasValue(); + if (has_devices) { + if (auto tpu_replica_0 = devices.getValue().get("TPU_REPLICATED_CORE_0")) { + llvm::StringRef tpu_device = tpu_replica_0.cast()[replica_id] + .cast() + .getValue(); + if (failed(tensorflow::GetDeviceOrdinalFromDeviceString( + loc, tpu_device, &device_ordinal))) { + return failure(); + } + } + } + + region.walk([&](Operation* op) { + // Add replica id. + if (RequiresReplicaIDAttribute(op)) + op->setAttr(kReplicaIdAttr, builder.getI32IntegerAttr(replica_id)); + + if (!has_devices) return; + + // Map aliased devices to explicit devices based on replica. + if (auto launch = dyn_cast(op)) + if (auto device_by_replica = devices.getValue().get(launch.device())) + launch.setAttr( + kDeviceAttr, + device_by_replica.cast()[replica_id].cast()); + + // Add device ordinal. + if (device_ordinal >= 0 && RequiresDeviceOrdinalAttribute(op)) + op->setAttr(kDeviceOrdinalAttr, + builder.getI64IntegerAttr(device_ordinal)); }); + + return success(); } // Creates islands per replica from `tf_device.replicate` region. If for a // `tf_device.launch` op the device is an aliased device of the // `tf_device.replicate`, the device will be remapped to an explicit device // for the associated replica island. -llvm::SmallVector ExpandReplicateIntoReplicas( - const Dialect* tf_dialect, OpBuilder* builder, +LogicalResult ExpandReplicateIntoReplicas( + const Dialect* tf_dialect, OpBuilder& builder, tf_executor::IslandOp island_op, tf_device::ReplicateOp replicate_op, - int num_replicas) { - auto devices = replicate_op.devices(); - const bool has_devices = devices.hasValue(); - llvm::SmallVector replicas; + int num_replicas, llvm::SmallVectorImpl& replicas) { replicas.reserve(num_replicas); + auto devices = replicate_op.devices(); // Collect result types and operands. Operation& terminator = replicate_op.GetBody().back(); @@ -95,16 +128,16 @@ llvm::SmallVector ExpandReplicateIntoReplicas( llvm::SmallVector replica_inputs(island_op.controlInputs()); // Replace replicate terminator with YieldOp. - builder->setInsertionPoint(&terminator); - builder->create(terminator.getLoc(), - terminator.getOperands()); + builder.setInsertionPoint(&terminator); + builder.create(terminator.getLoc(), + terminator.getOperands()); terminator.erase(); - builder->setInsertionPoint(island_op); + builder.setInsertionPoint(island_op); BlockAndValueMapping mapping; for (int i : llvm::seq(0, num_replicas)) { // Create new island for replica. - auto replica = builder->create( + auto replica = builder.create( island_op.getLoc(), output_types, control_type, replica_inputs); // Map block arg to replica arg. @@ -116,42 +149,15 @@ llvm::SmallVector ExpandReplicateIntoReplicas( // Copy over replicate region into replica island. replicate_op.body().cloneInto(&replica.body(), mapping); - // TODO(b/157624749): Replace this with better abstraction to - // differentiate ops for different replicas. - // Some ops, such as XlaHostCompute op or TPU Embedding ops, require - // replica id to be added as an op attribute to be used during - // execution. Handle such ops separately and add an integer attribute - // that represents replica id. - AddReplicaIdToOpsInReplicatedRegion(builder, &replica.body(), i); - - // Map aliased devices to explicit devices based on replica. - if (has_devices) { - replica.walk([&](tf_device::LaunchOp launch) { - if (auto device_by_replica = devices.getValue().get(launch.device())) - launch.setAttr( - kDeviceAttr, - device_by_replica.cast()[i].cast()); - }); - - if (auto tpu_replica_0 = - devices.getValue().get("TPU_REPLICATED_CORE_0")) { - int64_t device_ordinal = 0; - tensorflow::GetDeviceOrdinalFromDeviceString( - replicate_op.getLoc(), - tpu_replica_0.cast()[i].cast().getValue(), - &device_ordinal); - replica.walk([&](Operation* op) { - if (RequiresDeviceOrdinalAttribute(op)) - op->setAttr(kDeviceOrdinalAttr, - builder->getI64IntegerAttr(device_ordinal)); - }); - } - } + if (failed(UpdateRegionReplicateVariantOps(builder, replicate_op.getLoc(), + replica.body(), /*replica_id=*/i, + devices))) + return failure(); replicas.push_back(replica); } - return replicas; + return success(); } // Creates islands per replica from `tf_device.replicate` region and remap @@ -204,17 +210,18 @@ llvm::SmallVector ExpandReplicateIntoReplicas( // }) {device = "/DEVICE:3"} : () -> tensor // tf_executor.yield %a1, %b1 : tensor, tensor // } -void CreateIslandsFromReplicate(const Dialect* tf_dialect, - tf_executor::GraphOp graph_op, - tf_executor::IslandOp island_op, - tf_device::ReplicateOp replicate_op) { +LogicalResult CreateIslandsFromReplicate(const Dialect* tf_dialect, + tf_executor::GraphOp graph_op, + tf_executor::IslandOp island_op, + tf_device::ReplicateOp replicate_op) { OpBuilder builder(island_op); const int num_replicas = replicate_op.n().getLimitedValue(); // Create islands per replica. - llvm::SmallVector replicas = - ExpandReplicateIntoReplicas(tf_dialect, &builder, island_op, replicate_op, - num_replicas); + llvm::SmallVector replicas; + if (failed(ExpandReplicateIntoReplicas(tf_dialect, builder, island_op, + replicate_op, num_replicas, replicas))) + return failure(); // Collect all replica results. llvm::SmallVector replicas_outputs(replicate_op.getNumResults(), @@ -265,36 +272,41 @@ void CreateIslandsFromReplicate(const Dialect* tf_dialect, } island_op.erase(); + return success(); } -// Finds islands with a single `tf_device.replicate` and create individual -// islands per replica of the replicate. -void LowerSingleIslandReplicateToIslands(const Dialect* tf_dialect, - tf_executor::GraphOp graph_op, - tf_executor::IslandOp island_op) { - if (!island_op.WrapsSingleOp()) return; - - if (auto replicate_op = - llvm::dyn_cast(&island_op.GetBody().front())) - CreateIslandsFromReplicate(tf_dialect, graph_op, island_op, replicate_op); -} - -void ReplicateToIslandPass::runOnFunction() { +void ReplicateToIslandPass::runOnOperation() { + auto module = getOperation(); const Dialect* tf_dialect = getContext().getRegisteredDialect("tf"); if (!tf_dialect) { - signalPassFailure(); - getFunction().emitError() << "'tf' dialect is not registered"; + module.emitError() << "'tf' dialect is not registered"; + return signalPassFailure(); } - getFunction().walk([&](tf_executor::GraphOp graph_op) { - for (auto island_op : - llvm::make_early_inc_range(graph_op.getOps())) - LowerSingleIslandReplicateToIslands(tf_dialect, graph_op, island_op); + // Find islands with a single `tf_device.replicate` and create individual + // islands per replica of the replicate. + llvm::SmallVector replicate_op_islands; + module.walk([&](tf_executor::GraphOp graph_op) { + for (auto island_op : graph_op.getOps()) { + if (!island_op.WrapsSingleOp()) continue; + + if (isa(&island_op.GetBody().front())) + replicate_op_islands.push_back(island_op); + } }); + + for (tf_executor::IslandOp island_op : replicate_op_islands) { + auto graph_op = island_op.getParentOfType(); + auto replicate_op = + cast(island_op.GetBody().front()); + if (failed(CreateIslandsFromReplicate(tf_dialect, graph_op, island_op, + replicate_op))) + return signalPassFailure(); + } } } // anonymous namespace -std::unique_ptr> CreateReplicateToIslandPass() { +std::unique_ptr> CreateReplicateToIslandPass() { return std::make_unique(); } From 3fd7bac6ed8fc9edac3be94c4fde0fc72a630663 Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Thu, 6 Aug 2020 17:09:55 -0700 Subject: [PATCH 2305/2522] Fix tsan failure. Instead of creating a new threadpool in ReadElementsParallel, we can reuse the existing thread_pool_ field. This solves the tsan failure because now the destructor will block until the threads created in ReadElementsParallel exit. PiperOrigin-RevId: 325341982 Change-Id: I1107bde215a5384ded98633f5f46a2dde3ff7e23 --- tensorflow/core/kernels/data/BUILD | 1 - .../kernels/data/parallel_interleave_dataset_op.cc | 10 +++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD index 94cc31a8cb6..1365f8a1d31 100644 --- a/tensorflow/core/kernels/data/BUILD +++ b/tensorflow/core/kernels/data/BUILD @@ -623,7 +623,6 @@ tf_cc_test( name = "parallel_interleave_dataset_op_test", size = "small", srcs = ["parallel_interleave_dataset_op_test.cc"], - tags = ["notsan"], # TODO(b/147147071): Remove this tag once bug fix lands. deps = [ ":captured_function", ":dataset_test_base", diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc index 54ad8886a95..90dd5337c1d 100644 --- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc +++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc @@ -41,6 +41,7 @@ limitations under the License. #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/blocking_counter.h" #include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/stringprintf.h" #include "tensorflow/core/profiler/lib/traceme.h" #include "tensorflow/core/profiler/lib/traceme_encode.h" @@ -1342,12 +1343,10 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { IteratorContext* ctx, IteratorStateReader* reader, int64 size, const string& name, std::vector>* elements) { elements->resize(size); - std::unique_ptr threadpool = - ctx->CreateThreadPool(absl::StrCat("read_", name), size); Status s = Status::OK(); BlockingCounter counter(size); for (int idx = 0; idx < size; ++idx) { - threadpool->Schedule( + thread_pool_->Schedule( [this, ctx, reader, idx, name, &s, &counter, elements] { RecordStart(ctx); auto cleanup = gtl::MakeCleanup([this, ctx, &counter]() { @@ -1357,6 +1356,11 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase { std::shared_ptr elem; Status ret_status = ReadElement(ctx, reader, idx, name, &elem); mutex_lock l(*mu_); + if (cancelled_) { + s.Update( + errors::Cancelled("Cancelled in ReadElementsParallel")); + return; + } if (!ret_status.ok()) { s.Update(ret_status); return; From 0573f8cb6976c592eee660da9e4ce58e0c1eb0c0 Mon Sep 17 00:00:00 2001 From: bhack Date: Thu, 6 Aug 2020 23:32:37 +0000 Subject: [PATCH 2306/2522] Check input and axis params --- tensorflow/python/ops/array_ops.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 5d68deb7ac1..8724ecebbfe 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -5316,12 +5316,12 @@ def quantize_and_dequantize( A `Tensor`. Each element is the result of quantizing and dequantizing the corresponding element of `input`. """ + with ops.name_scope(name, "quantize_and_dequantize", [input]) as name: + if not tensor_util.is_tensor(input): + input = ops.convert_to_tensor(input) if axis is None: axis = -1 - elif axis < 0: - if input.shape.ndims is None: - raise ValueError("input should have known rank to use negative axis.") - axis %= input.shape.ndims + axis = get_positive_axis(axis, input.shape.ndims) return gen_array_ops.quantize_and_dequantize_v2( input, From bcf052b88c480ec87b14fd1430f89f6bedfd6e7d Mon Sep 17 00:00:00 2001 From: Lucy Fox Date: Thu, 6 Aug 2020 17:29:29 -0700 Subject: [PATCH 2307/2522] Relax DynamicBroadcastInDim verifier when dimensions are dynamic. For input and output dimensions which must match, we shouldn't fail in the case where one dim is dynamic and the other is static. This is insufficient information to conclude a dimension mismatch. PiperOrigin-RevId: 325344738 Change-Id: Ifb7b95219aa97244a08c053d70cb82020afc4c48 --- .../mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc | 8 ++++--- tensorflow/compiler/mlir/hlo/tests/ops.mlir | 24 +++++++++++++++++++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc index de3f950c300..a1f0480f4fe 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc @@ -748,10 +748,12 @@ static LogicalResult Verify(DynamicBroadcastInDimOp op) { auto dimSize = operandType.getDimSize(i); auto resultDimSize = resultType.getDimSize(dimIndex); - if (dimSize != 1 && dimSize != resultDimSize) { + // Note: verifyCompatibleShapes doesn't consider size-1 broadcasting, so we + // add a manual check for this. + if (dimSize != 1 && failed(verifyCompatibleShape(dimSize, resultDimSize))) { return op.emitOpError( - llvm::formatv("size of operand dimension {0} ({1}) is not equal to " - "1 or size of result dimension {2} ({3})", + llvm::formatv("size of operand dimension {0} ({1}) is not compatible " + "with size of result dimension {2} ({3})", i, dimSize, dimIndex, resultDimSize)); } } diff --git a/tensorflow/compiler/mlir/hlo/tests/ops.mlir b/tensorflow/compiler/mlir/hlo/tests/ops.mlir index 212e79432b1..3443f21bc84 100644 --- a/tensorflow/compiler/mlir/hlo/tests/ops.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/ops.mlir @@ -116,6 +116,30 @@ func @dynamic_broadcast_in_dim(%arg0: tensor, %shape: tensor<3xi64>) -> // ----- +// CHECK-LABEL: func @dynamic_broadcast_in_dim_unknown_dim +func @dynamic_broadcast_in_dim_unknown_dim(%arg0: tensor<32xf32>, %shape: tensor<3xi64>) -> tensor { + %0 = "mhlo.dynamic_broadcast_in_dim"(%arg0, %shape) {broadcast_dimensions = dense<[2]> : tensor<1xi64>} : (tensor<32xf32>, tensor<3xi64>) -> tensor + return %0 : tensor +} + +// ----- + +// CHECK-LABEL: func @dynamic_broadcast_in_dim_ok_dim +func @dynamic_broadcast_in_dim_ok_dim(%arg0: tensor<1xf32>, %shape: tensor<3xi64>) -> tensor<7x8x9xf32> { + %0 = "mhlo.dynamic_broadcast_in_dim"(%arg0, %shape) {broadcast_dimensions = dense<[2]> : tensor<1xi64>} : (tensor<1xf32>, tensor<3xi64>) -> tensor<7x8x9xf32> + return %0 : tensor<7x8x9xf32> +} + +// ----- + +func @dynamic_broadcast_in_dim_shape_mismatch(%arg0: tensor<32xf32>, %shape: tensor<3xi64>) -> tensor<7x8x9xf32> { + // expected-error@+1 {{size of operand dimension 0 (32) is not compatible with size of result dimension 2 (9)}} + %0 = "mhlo.dynamic_broadcast_in_dim"(%arg0, %shape) {broadcast_dimensions = dense<[2]> : tensor<1xi64>} : (tensor<32xf32>, tensor<3xi64>) -> tensor<7x8x9xf32> + return %0 : tensor<7x8x9xf32> +} + +// ----- + func @broadcast_in_dim_bad_dimension_rank(%arg0: tensor<1x2xi32>) -> tensor<1x2x3xi32> { // expected-error@+1 {{broadcast_dimensions has rank 2 instead of rank 1}} %0 = "mhlo.broadcast_in_dim"(%arg0) {broadcast_dimensions = dense<[[1,1],[1,1]]> : tensor<2x2xi64>} : (tensor<1x2xi32>) -> tensor<1x2x3xi32> From d9ea5051104b3580fee2d49c94be2ec45012672f Mon Sep 17 00:00:00 2001 From: Jay Shi Date: Thu, 6 Aug 2020 17:32:29 -0700 Subject: [PATCH 2308/2522] [tf.data] Record the number of times tf.data experiment applied to input pipelines. PiperOrigin-RevId: 325345128 Change-Id: I8de30c47f681a6f41e25e5ade4460f20a9d7bb5d --- tensorflow/core/framework/metrics.cc | 9 +++++++++ tensorflow/core/framework/metrics.h | 3 +++ tensorflow/core/kernels/data/dataset_utils.cc | 11 ----------- .../core/kernels/data/optimize_dataset_op.cc | 15 +++++++++++++++ 4 files changed, 27 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/framework/metrics.cc b/tensorflow/core/framework/metrics.cc index 8cbfcd5342a..f5aff3a4e11 100644 --- a/tensorflow/core/framework/metrics.cc +++ b/tensorflow/core/framework/metrics.cc @@ -80,6 +80,11 @@ auto* tf_data_bytes_fetched_counter = monitoring::Counter<0>::New( auto* tf_data_elements_counter = monitoring::Counter<1>::New( "/tensorflow/data/elements", "tf.data elements", "name"); +auto* tf_data_experiment_counter = monitoring::Counter<1>::New( + "/tensorflow/data/experiment", + "The number of times tf.data experiment is applied to input pipelines.", + "name"); + auto* tf_data_fingerprint_counter = monitoring::Counter<1>::New( "/tensorflow/data/fingerprint", "tf.data fingerprint", "name"); @@ -179,6 +184,10 @@ void RecordTFDataBytesFetched(int64 num_bytes) { tf_data_bytes_fetched_counter->GetCell()->IncrementBy(num_bytes); } +void RecordTFDataExperiment(const string& name) { + tf_data_experiment_counter->GetCell(name)->IncrementBy(1); +} + void RecordTFDataFingerprint(const string& name) { tf_data_fingerprint_counter->GetCell(name)->IncrementBy(1); } diff --git a/tensorflow/core/framework/metrics.h b/tensorflow/core/framework/metrics.h index 7bc9a1bda0b..f7c90ce593e 100644 --- a/tensorflow/core/framework/metrics.h +++ b/tensorflow/core/framework/metrics.h @@ -56,6 +56,9 @@ monitoring::CounterCell* GetTFDataElementsCounter(const string& name); // Records the number of bytes fetched from tf.data.Dataset iterator. void RecordTFDataBytesFetched(int64 num_bytes); +// Records the number of times tf.data experiment is applied to input pipelines. +void RecordTFDataExperiment(const string& name); + // Records the time spent in ItertatorResource::GetNext() in microseconds. void RecordTFDataGetNextDuration(uint64 duration_us); diff --git a/tensorflow/core/kernels/data/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc index 4151442d747..66de482467d 100644 --- a/tensorflow/core/kernels/data/dataset_utils.cc +++ b/tensorflow/core/kernels/data/dataset_utils.cc @@ -1018,17 +1018,6 @@ std::vector SelectOptimizations( } } - // Log the experiments that will be applied. - if (VLOG_IS_ON(1)) { - for (auto& pair : live_experiments) { - string experiment = pair.first; - if (std::find(optimizations_set.begin(), optimizations_set.end(), - experiment) != optimizations_set.end()) { - VLOG(1) << "The experiment \"" << experiment << "\" is applied."; - } - } - } - std::vector optimizations; optimizations.insert(optimizations.end(), optimizations_set.begin(), optimizations_set.end()); diff --git a/tensorflow/core/kernels/data/optimize_dataset_op.cc b/tensorflow/core/kernels/data/optimize_dataset_op.cc index a0101435794..a566693ec3d 100644 --- a/tensorflow/core/kernels/data/optimize_dataset_op.cc +++ b/tensorflow/core/kernels/data/optimize_dataset_op.cc @@ -101,6 +101,21 @@ void OptimizeDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase* input, job_name, opt_ins_raw, opt_outs_raw, live_experiments, optimizations_enabled, optimizations_disabled, optimizations_default, hash_func); + + // Log the experiments that will be applied. + if (!live_experiments.empty() && VLOG_IS_ON(1)) { + VLOG(1) << "The input pipeline is subject to tf.data experiment. " + "Please see `go/tf-data-experiments` for more details."; + + for (auto& pair : live_experiments) { + string experiment = pair.first; + if (std::find(optimizations.begin(), optimizations.end(), + experiment) != optimizations.end()) { + VLOG(1) << "The experiment \"" << experiment << "\" is applied."; + metrics::RecordTFDataExperiment(experiment); + } + } + } } } From 235dbc2dc26a00ebf6d2b1f3cba37cba9d548ffc Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Thu, 6 Aug 2020 17:49:37 -0700 Subject: [PATCH 2309/2522] Added info about supported image formats to DeviceInfo. storage_type_util cleaned from OpenCL API calls/structs. PiperOrigin-RevId: 325347475 Change-Id: I096636e4dd837ef9754df70caf37842c605c24f5 --- tensorflow/lite/delegates/gpu/cl/BUILD | 5 +- .../lite/delegates/gpu/cl/cl_context.cc | 39 +++++++++++++ tensorflow/lite/delegates/gpu/cl/cl_device.cc | 16 ++--- tensorflow/lite/delegates/gpu/cl/cl_device.h | 8 +-- .../lite/delegates/gpu/cl/cl_program.cc | 4 +- .../lite/delegates/gpu/cl/device_info.cc | 22 +++++++ .../lite/delegates/gpu/cl/device_info.h | 17 ++++++ .../lite/delegates/gpu/cl/environment.cc | 4 +- .../delegates/gpu/cl/inference_context.cc | 7 +-- .../gpu/cl/kernels/conv_buffer_1x1.cc | 4 +- .../gpu/cl/kernels/conv_constants.cc | 4 +- .../delegates/gpu/cl/kernels/conv_powervr.cc | 12 ++-- .../delegates/gpu/cl/kernels/conv_texture.cc | 6 +- .../delegates/gpu/cl/kernels/converter.cc | 8 +-- .../gpu/cl/kernels/convolution_transposed.cc | 4 +- .../cl/kernels/convolution_transposed_thin.cc | 2 +- .../gpu/cl/kernels/depthwise_conv_3x3.cc | 2 +- .../delegates/gpu/cl/kernels/elementwise.cc | 14 ++--- .../gpu/cl/kernels/fully_connected.cc | 2 +- .../delegates/gpu/cl/kernels/gpu_operation.cc | 6 +- .../lite/delegates/gpu/cl/kernels/util.cc | 4 +- .../lite/delegates/gpu/cl/kernels/winograd.cc | 4 +- .../gpu/cl/selectors/convolution_selector.cc | 6 +- .../convolution_transposed_selector.cc | 2 +- .../cl/selectors/dw_convolution_selector.cc | 4 +- .../cl/selectors/fully_connected_selector.cc | 2 +- .../gpu/cl/selectors/operation_selector.cc | 11 ++-- .../delegates/gpu/cl/storage_type_util.cc | 58 +++++++++---------- .../lite/delegates/gpu/cl/storage_type_util.h | 12 ++-- 29 files changed, 177 insertions(+), 112 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/BUILD b/tensorflow/lite/delegates/gpu/cl/BUILD index 66bcbc826ea..d6076e221bd 100644 --- a/tensorflow/lite/delegates/gpu/cl/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/BUILD @@ -257,6 +257,7 @@ cc_library( srcs = ["device_info.cc"], hdrs = ["device_info.h"], deps = [ + "//tensorflow/lite/delegates/gpu/common:data_type", "@com_google_absl//absl/strings", ], ) @@ -468,11 +469,11 @@ cc_library( srcs = ["storage_type_util.cc"], hdrs = ["storage_type_util.h"], deps = [ - ":cl_context", - ":cl_device", + ":device_info", ":tensor_type", "//tensorflow/lite/delegates/gpu/common:data_type", "//tensorflow/lite/delegates/gpu/common:shape", + "//tensorflow/lite/delegates/gpu/common:util", ], ) diff --git a/tensorflow/lite/delegates/gpu/cl/cl_context.cc b/tensorflow/lite/delegates/gpu/cl/cl_context.cc index e697c78b692..9a8f404c46e 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_context.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_context.cc @@ -43,6 +43,44 @@ std::vector GetSupportedImage2DFormats(cl_context context, return result; } +bool IsEqualToImageFormat(cl_image_format image_format, DataType data_type, + int num_channels) { + return image_format.image_channel_data_type == + ToImageChannelType(data_type) && + image_format.image_channel_order == ToChannelOrder(num_channels); +} + +void AddSupportedImageFormats(cl_context context, DeviceInfo* info) { + auto supported_formats = + GetSupportedImage2DFormats(context, CL_MEM_READ_WRITE); + for (auto format : supported_formats) { + info->supports_r_f16_tex2d = + info->supports_r_f16_tex2d || + IsEqualToImageFormat(format, DataType::FLOAT16, 1); + info->supports_rg_f16_tex2d = + info->supports_rg_f16_tex2d || + IsEqualToImageFormat(format, DataType::FLOAT16, 2); + info->supports_rgb_f16_tex2d = + info->supports_rgb_f16_tex2d || + IsEqualToImageFormat(format, DataType::FLOAT16, 3); + info->supports_rgba_f16_tex2d = + info->supports_rgba_f16_tex2d || + IsEqualToImageFormat(format, DataType::FLOAT16, 4); + info->supports_r_f32_tex2d = + info->supports_r_f32_tex2d || + IsEqualToImageFormat(format, DataType::FLOAT32, 1); + info->supports_rg_f32_tex2d = + info->supports_rg_f32_tex2d || + IsEqualToImageFormat(format, DataType::FLOAT32, 2); + info->supports_rgb_f32_tex2d = + info->supports_rgb_f32_tex2d || + IsEqualToImageFormat(format, DataType::FLOAT32, 3); + info->supports_rgba_f32_tex2d = + info->supports_rgba_f32_tex2d || + IsEqualToImageFormat(format, DataType::FLOAT32, 4); + } +} + absl::Status CreateCLContext(const CLDevice& device, cl_context_properties* properties, CLContext* result) { @@ -55,6 +93,7 @@ absl::Status CreateCLContext(const CLDevice& device, absl::StrCat("Failed to create a compute context - ", CLErrorCodeToString(error_code))); } + AddSupportedImageFormats(context, &device.info_); *result = CLContext(context, true); return absl::OkStatus(); diff --git a/tensorflow/lite/delegates/gpu/cl/cl_device.cc b/tensorflow/lite/delegates/gpu/cl/cl_device.cc index b93bfb25ad1..16f5ce217e9 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_device.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_device.cc @@ -248,24 +248,24 @@ DeviceInfo DeviceInfoFromDeviceID(cl_device_id id) { } CLDevice::CLDevice(cl_device_id id, cl_platform_id platform_id) - : id_(id), platform_id_(platform_id), info_(DeviceInfoFromDeviceID(id)) {} + : info_(DeviceInfoFromDeviceID(id)), id_(id), platform_id_(platform_id) {} CLDevice::CLDevice(const CLDevice& device) - : id_(device.id_), platform_id_(device.platform_id_), info_(device.info_) {} + : info_(device.info_), id_(device.id_), platform_id_(device.platform_id_) {} CLDevice& CLDevice::operator=(const CLDevice& device) { if (this != &device) { + info_ = device.info_; id_ = device.id_; platform_id_ = device.platform_id_; - info_ = device.info_; } return *this; } CLDevice::CLDevice(CLDevice&& device) - : id_(device.id_), - platform_id_(device.platform_id_), - info_(std::move(device.info_)) { + : info_(std::move(device.info_)), + id_(device.id_), + platform_id_(device.platform_id_) { device.id_ = nullptr; device.platform_id_ = nullptr; } @@ -274,9 +274,9 @@ CLDevice& CLDevice::operator=(CLDevice&& device) { if (this != &device) { id_ = nullptr; platform_id_ = nullptr; + info_ = std::move(device.info_); std::swap(id_, device.id_); std::swap(platform_id_, device.platform_id_); - info_ = std::move(device.info_); } return *this; } @@ -368,7 +368,7 @@ bool CLDevice::IsAMD() const { return info_.IsAMD(); } bool CLDevice::IsIntel() const { return info_.IsIntel(); } bool CLDevice::SupportsOneLayerTextureArray() const { - return !IsAdreno() || info_.adreno_info.support_one_layer_texture_array; + return info_.SupportsOneLayerTextureArray(); } void CLDevice::DisableOneLayerTextureArray() { diff --git a/tensorflow/lite/delegates/gpu/cl/cl_device.h b/tensorflow/lite/delegates/gpu/cl/cl_device.h index 7e4792b0a53..e7cd274661d 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_device.h +++ b/tensorflow/lite/delegates/gpu/cl/cl_device.h @@ -46,9 +46,6 @@ class CLDevice { cl_platform_id platform() const { return platform_id_; } std::string GetPlatformVersion() const; - const DeviceInfo& GetInfo() const { return info_; } - const DeviceInfo* GetInfoPtr() const { return &info_; } - Vendor vendor() const { return info_.vendor; } OpenCLVersion cl_version() const { return info_.cl_version; } bool SupportsFP16() const; @@ -76,10 +73,13 @@ class CLDevice { bool SupportsOneLayerTextureArray() const; void DisableOneLayerTextureArray(); + // We update device info during context creation, so as supported texture + // formats can be requested from context only. + mutable DeviceInfo info_; + private: cl_device_id id_ = nullptr; cl_platform_id platform_id_ = nullptr; - DeviceInfo info_; }; absl::Status CreateDefaultGPUDevice(CLDevice* result); diff --git a/tensorflow/lite/delegates/gpu/cl/cl_program.cc b/tensorflow/lite/delegates/gpu/cl/cl_program.cc index 3b821dc3a5d..fd29ebec2d7 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_program.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_program.cc @@ -78,13 +78,13 @@ std::string CompilerOptionToString(const CLDevice& device, CompilerOptions option) { switch (option) { case CompilerOptions::ADRENO_FULL_SIMD_LINE: - if (device.GetInfo().adreno_info.gpu_version < 500) { + if (device.info_.adreno_info.gpu_version < 500) { return "-qcom-accelerate-16-bit"; } else { return "-qcom-accelerate-16-bit=true"; } case CompilerOptions::ADRENO_MORE_WAVES: - if (device.GetInfo().adreno_info.gpu_version >= 500) { + if (device.info_.adreno_info.gpu_version >= 500) { return "-qcom-accelerate-16-bit=false"; } else { return ""; diff --git a/tensorflow/lite/delegates/gpu/cl/device_info.cc b/tensorflow/lite/delegates/gpu/cl/device_info.cc index 7e0acb87ab7..d1ed69aa100 100644 --- a/tensorflow/lite/delegates/gpu/cl/device_info.cc +++ b/tensorflow/lite/delegates/gpu/cl/device_info.cc @@ -231,6 +231,28 @@ bool DeviceInfo::SupportsImage3D() const { return supports_image3d_writes; } +bool DeviceInfo::SupportsFloatImage2D(DataType data_type, int channels) const { + if (channels == 1) { + return data_type == DataType::FLOAT32 ? supports_r_f32_tex2d + : supports_r_f16_tex2d; + } else if (channels == 2) { + return data_type == DataType::FLOAT32 ? supports_rg_f32_tex2d + : supports_rg_f16_tex2d; + } else if (channels == 3) { + return data_type == DataType::FLOAT32 ? supports_rgb_f32_tex2d + : supports_rgb_f16_tex2d; + } else if (channels == 4) { + return data_type == DataType::FLOAT32 ? supports_rgba_f32_tex2d + : supports_rgba_f16_tex2d; + } else { + return false; + } +} + +bool DeviceInfo::SupportsOneLayerTextureArray() const { + return !IsAdreno() || adreno_info.support_one_layer_texture_array; +} + bool DeviceInfo::IsAdreno() const { return vendor == Vendor::kQualcomm; } bool DeviceInfo::IsAdreno3xx() const { diff --git a/tensorflow/lite/delegates/gpu/cl/device_info.h b/tensorflow/lite/delegates/gpu/cl/device_info.h index b13fe3df846..7123891ecf4 100644 --- a/tensorflow/lite/delegates/gpu/cl/device_info.h +++ b/tensorflow/lite/delegates/gpu/cl/device_info.h @@ -19,6 +19,8 @@ limitations under the License. #include #include +#include "tensorflow/lite/delegates/gpu/common/data_type.h" + // for use only in device_info.cc, but keep here to make tests int GetAdrenoGPUVersion(const std::string& gpu_version); @@ -131,6 +133,11 @@ struct DeviceInfo { bool SupportsImageBuffer() const; bool SupportsImage3D() const; + bool SupportsFloatImage2D(DataType data_type, int channels) const; + + // To track bug on some Adreno. b/131099086 + bool SupportsOneLayerTextureArray() const; + std::vector extensions; bool supports_fp16; bool supports_image3d_writes; @@ -157,6 +164,16 @@ struct DeviceInfo { bool supports_fp32_rtn; bool supports_fp16_rtn; + bool supports_r_f16_tex2d = false; + bool supports_rg_f16_tex2d = false; + bool supports_rgb_f16_tex2d = false; + bool supports_rgba_f16_tex2d = false; + + bool supports_r_f32_tex2d = false; + bool supports_rg_f32_tex2d = false; + bool supports_rgb_f32_tex2d = false; + bool supports_rgba_f32_tex2d = false; + AdrenoInfo adreno_info; MaliInfo mali_info; }; diff --git a/tensorflow/lite/delegates/gpu/cl/environment.cc b/tensorflow/lite/delegates/gpu/cl/environment.cc index c8b0b56978c..3d5546a8ebb 100644 --- a/tensorflow/lite/delegates/gpu/cl/environment.cc +++ b/tensorflow/lite/delegates/gpu/cl/environment.cc @@ -47,7 +47,7 @@ __kernel void main_function(__write_only image2d_array_t dst) { absl::Status CheckKernelSupportOfOneLayerTextureArray(Environment* env, bool* result) { // No bug on Adreno 6xx - if (env->device().GetInfo().adreno_info.gpu_version >= 600) { + if (env->device().info_.adreno_info.gpu_version >= 600) { *result = true; return absl::OkStatus(); } @@ -242,7 +242,7 @@ TensorStorageType GetFastestStorageType(const CLDevice& gpu) { } else if (gpu.IsPowerVR()) { return TensorStorageType::TEXTURE_2D; } else if (gpu.IsMali()) { - const MaliInfo mali_info = gpu.GetInfo().mali_info; + const MaliInfo mali_info = gpu.info_.mali_info; if (mali_info.IsMaliT8xx() || mali_info.IsBifrostGen3() || mali_info.IsValhall()) { return TensorStorageType::TEXTURE_2D; diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.cc b/tensorflow/lite/delegates/gpu/cl/inference_context.cc index 689b511bb5e..7802024302b 100644 --- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc +++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc @@ -203,7 +203,7 @@ absl::Status InferenceContext::InitFromGraph( TuningParameters tuning_parameters; tuning_parameters.queue = env->profiling_queue(); - tuning_parameters.info = env->device().GetInfoPtr(); + tuning_parameters.info = &env->device().info_; if (create_info.hints.Check(ModelHints::kFastTuning)) { tuning_parameters.tuning_type = TuningType::FAST; } @@ -244,14 +244,13 @@ void InferenceContext::ReserveGraphTensors( if (graph.IsGraphInput(t->id) || graph.IsGraphOutput(t->id)) { if (shape.c < 4 && CanCreateTensorWithShape( - *creation_context.context, *creation_context.device, shape, + creation_context.device->info_, shape, TensorDescriptor{data_type, TensorStorageType::SINGLE_TEXTURE_2D, layout})) { storage_type = TensorStorageType::SINGLE_TEXTURE_2D; } } - storage_type = SelectBestStorageType(*creation_context.context, - *creation_context.device, shape, + storage_type = SelectBestStorageType(creation_context.device->info_, shape, storage_type, data_type, layout); tensor_reserver_.Add( t->id, {shape, TensorDescriptor{data_type, storage_type, layout}}); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc index de6021aa5fe..3216e2ef246 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc @@ -93,7 +93,7 @@ ConvBuffer1x1::ConvParams GetBestParams(const CLDevice& device, } bool can_use_flt8 = (shape.w * shape.b) % 2 == 0 && definition.precision != CalculationsPrecision::F32; - bool is_midgard = device.IsMali() && device.GetInfo().mali_info.IsMidgard(); + bool is_midgard = device.IsMali() && device.info_.mali_info.IsMidgard(); if (is_midgard) { if (can_use_flt8) { conv_params.element_size = 8; @@ -141,7 +141,7 @@ ConvBuffer1x1::ConvParams GetBestParams(const CLDevice& device, conv_params.element_size = 4; conv_params.block_size = int3(1, 1, 1); if (device.IsMali() && definition.precision == CalculationsPrecision::F16 && - device.GetInfo().compute_units_count <= 4) { + device.info_.compute_units_count <= 4) { conv_params.block_size.x *= 2; } return conv_params; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc index d5a2a56c19c..1ed900a2080 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc @@ -271,7 +271,7 @@ bool IsConvConstantsSupported(const CLDevice& device, ? sizeof(float) : sizeof(half); const int filters_buffer_size = filters_count * float_size; - const int kConstantMaxSize = GetOptimalMaxConstantSize(device.GetInfo()); + const int kConstantMaxSize = GetOptimalMaxConstantSize(device.info_); const int flt4_registers = DivideRoundUp(w_shape.o, 4); return filters_buffer_size <= kConstantMaxSize && flt4_registers <= 8; } @@ -283,7 +283,7 @@ absl::Status CreateConvConstants(const CreationContext& creation_context, if (!IsConvConstantsSupported(*creation_context.device, definition, attr)) { return absl::InvalidArgumentError("ConvConstants doesn't supported"); } - *result = ConvConstants(definition, attr, creation_context.device->GetInfo()); + *result = ConvConstants(definition, attr, creation_context.device->info_); RETURN_IF_ERROR( result->UploadWeights(attr.weights, creation_context.context)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc index f69368d1083..d65595d068c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc @@ -718,7 +718,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( if (dst_shape) { int task_size = dst_shape->w * dst_shape->b * dst_shape->h * dst_depth; float task_size_per_cu = - static_cast(task_size) / device.GetInfo().compute_units_count; + static_cast(task_size) / device.info_.compute_units_count; int block_size = conv_params.block_size.x * conv_params.block_size.y * conv_params.block_size.z; float threads_per_cu = task_size_per_cu / block_size; @@ -844,7 +844,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( conv_params.block_size = int3(1, 1, 1); } conv_params.src_depth_loop_size = 1; - MaliInfo mali_info = device.GetInfo().mali_info; + MaliInfo mali_info = device.info_.mali_info; if (src_depth % 2 == 0 && block_size <= 2 && !mali_info.IsMidgard()) { conv_params.src_depth_loop_size = 2; } @@ -987,7 +987,7 @@ absl::Status CreateConvPowerVR(const CreationContext& creation_context, const Convolution2DAttributes& attr, ConvPowerVR* result, const BHWC* dst_shape) { *result = ConvPowerVR(definition, attr, *creation_context.device, dst_shape); - result->GenerateCode(creation_context.device->GetInfo()); + result->GenerateCode(creation_context.device->info_); return result->UploadData(attr.weights, attr.bias, creation_context.context); } @@ -996,7 +996,7 @@ absl::Status CreateConvPowerVR(const CreationContext& creation_context, const FullyConnectedAttributes& attr, ConvPowerVR* result, const BHWC* dst_shape) { *result = ConvPowerVR(definition, attr, *creation_context.device, dst_shape); - result->GenerateCode(creation_context.device->GetInfo()); + result->GenerateCode(creation_context.device->info_); return result->UploadData(attr.weights, attr.bias, creation_context.context); } @@ -1006,7 +1006,7 @@ absl::Status CreateConvPowerVRDynamicWeights( ConvPowerVR* result, const BHWC* dst_shape) { *result = ConvPowerVR(definition, attr, weights_shape, *creation_context.device, dst_shape); - result->GenerateCode(creation_context.device->GetInfo()); + result->GenerateCode(creation_context.device->info_); return result->UploadBias(attr.bias, creation_context.context); } @@ -1017,7 +1017,7 @@ absl::Status CreateConvPowerVRWino4x4To6x6( *result = ConvPowerVR(definition); result->conv_params_ = result->GuessBestParamsWinograd( *creation_context.device, definition, attr, dst_shape); - result->GenerateCode(creation_context.device->GetInfo()); + result->GenerateCode(creation_context.device->info_); return result->UploadDataForWinograd4x4To6x6( attr.weights, *creation_context.device, creation_context.context); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc index 88035556c86..581c8056ced 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc @@ -430,7 +430,7 @@ absl::Status CreateConvTexture(const CreationContext& creation_context, const Convolution2DAttributes& attr, ConvTexture* result) { *result = ConvTexture(definition, attr); - result->GenerateCode(creation_context.device->GetInfo()); + result->GenerateCode(creation_context.device->info_); return result->UploadData(attr.weights, attr.bias, creation_context.context); } @@ -439,7 +439,7 @@ absl::Status CreateConvTexture(const CreationContext& creation_context, const FullyConnectedAttributes& attr, ConvTexture* result) { *result = ConvTexture(definition); - result->GenerateCode(creation_context.device->GetInfo()); + result->GenerateCode(creation_context.device->info_); return result->UploadData(attr.weights, attr.bias, creation_context.context); } @@ -449,7 +449,7 @@ absl::Status CreateConvTextureWino4x4To6x6( *result = ConvTexture(definition); result->different_weights_for_height_ = true; result->block_size_ = {4, 1, 2}; - result->GenerateCode(creation_context.device->GetInfo()); + result->GenerateCode(creation_context.device->info_); return result->UploadDataForWinograd4x4To6x6( attr.weights, *creation_context.device, creation_context.context); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc b/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc index bd5aaed8bc3..d52efb43a08 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc @@ -152,8 +152,8 @@ __kernel void from_tensor()" + context_ = &environment->context(); shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h, input_def.dimensions.w, input_def.dimensions.c); - RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().GetInfo(), {}, - &shader_src)); + RETURN_IF_ERROR( + args_.TransformToCLCode(environment->device().info_, {}, &shader_src)); return environment->program_cache()->GetOrCreateCLKernel( shader_src, "from_tensor", environment->context(), environment->device(), &kernel_); @@ -272,8 +272,8 @@ __kernel void to_tensor()" + context_ = &environment->context(); shape_ = BHWC(output_def.dimensions.b, output_def.dimensions.h, output_def.dimensions.w, output_def.dimensions.c); - RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().GetInfo(), {}, - &shader_src)); + RETURN_IF_ERROR( + args_.TransformToCLCode(environment->device().info_, {}, &shader_src)); return environment->program_cache()->GetOrCreateCLKernel( shader_src, "to_tensor", environment->context(), environment->device(), &kernel_); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc index a139b3affc9..c6eba691306 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc @@ -360,8 +360,8 @@ absl::Status CreateConvolutionTransposed( const CreationContext& creation_context, const OperationDef& definition, const ConvolutionTransposedAttributes& attr, ConvolutionTransposed* result) { - *result = ConvolutionTransposed(definition, attr, - creation_context.device->GetInfo()); + *result = + ConvolutionTransposed(definition, attr, creation_context.device->info_); RETURN_IF_ERROR( result->UploadWeights(attr.weights, creation_context.context)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc index 2268313a867..54fd5396869 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc @@ -175,7 +175,7 @@ absl::Status CreateConvolutionTransposedThin( "ConvolutionTransposedThin doesn't support this attributes"); } *result = ConvolutionTransposedThin(definition, attr, - creation_context.device->GetInfo()); + creation_context.device->info_); RETURN_IF_ERROR( result->UploadData(attr.weights, attr.bias, creation_context.context)); return absl::OkStatus(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc index e171231fc0a..f0213cda805 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc @@ -330,7 +330,7 @@ absl::Status CreateDepthwiseConv3x3( bool local_mem_uploads = weights_are_buffer && creation_context.device->IsPowerVR(); *result = DepthwiseConv3x3(definition, weights_are_buffer, local_mem_uploads, - creation_context.device->GetInfo()); + creation_context.device->info_); return result->UploadWeightsAndBiases(attr.weights, attr.bias, creation_context.context); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc index f735f1aa047..7d46ae4a109 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/elementwise.cc @@ -166,10 +166,9 @@ absl::Status CreateElementwiseTwoInput( const tflite::gpu::Tensor& constant_tensor, bool swap_inputs, GPUOperation* result) { const BHWC shape = BHWC(1, 1, 1, constant_tensor.shape.v); - TensorStorageType storage_type = - SelectBestStorageType(*creation_context.context, *creation_context.device, - shape, definition.GetPrimaryStorageType(), - definition.GetDataType(), Layout::HWC); + TensorStorageType storage_type = SelectBestStorageType( + creation_context.device->info_, shape, definition.GetPrimaryStorageType(), + definition.GetDataType(), Layout::HWC); TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC}; Tensor gpu_tensor; RETURN_IF_ERROR(CreateTensor(*creation_context.context, @@ -205,10 +204,9 @@ absl::Status CreateElementwiseTwoInput( bool swap_inputs, GPUOperation* result) { const BHWC shape = BHWC(1, constant_tensor.shape.h, constant_tensor.shape.w, constant_tensor.shape.c); - TensorStorageType storage_type = - SelectBestStorageType(*creation_context.context, *creation_context.device, - shape, definition.GetPrimaryStorageType(), - definition.GetDataType(), Layout::HWC); + TensorStorageType storage_type = SelectBestStorageType( + creation_context.device->info_, shape, definition.GetPrimaryStorageType(), + definition.GetDataType(), Layout::HWC); TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC}; Tensor gpu_tensor; RETURN_IF_ERROR(CreateTensor(*creation_context.context, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc index 2ab0284febe..ec18fa9f6e2 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc @@ -114,7 +114,7 @@ absl::Status CreateFullyConnected(const CreationContext& creation_context, const OperationDef& definition, const FullyConnectedAttributes& attr, FullyConnected* result) { - *result = FullyConnected(definition, creation_context.device->GetInfo()); + *result = FullyConnected(definition, creation_context.device->info_); RETURN_IF_ERROR( result->UploadWeights(attr.weights, creation_context.context)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc index 7260048c6d3..97c72c1269d 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc @@ -227,7 +227,7 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) { RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); RETURN_IF_ERROR(args_.TransformToCLCode( - creation_context.device->GetInfo(), + creation_context.device->info_, {{dst_tensors_names_[0], element_wise_code}}, &code)); code = absl::Substitute(code, args_.GetListOfArgs()); RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel( @@ -238,13 +238,13 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) { RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); RETURN_IF_ERROR(args_.TransformToCLCode( - creation_context.device->GetInfo(), + creation_context.device->info_, {{dst_tensors_names_[0], element_wise_code}}, &code_)); RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel( code_, "main_function", compiler_options_, *creation_context.context, *creation_context.device, &kernel_)); } - return PostCompileCheck(creation_context.device->GetInfo()); + return PostCompileCheck(creation_context.device->info_); } int3 GPUOperation::GetGridSize() const { diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/util.cc b/tensorflow/lite/delegates/gpu/cl/kernels/util.cc index 3fe4ffb4acd..d907c0210b7 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/util.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/util.cc @@ -117,7 +117,7 @@ int GetRecommendedBlockSizeForConv(const CLDevice& device, CalculationsPrecision precision, int task_size) { const float task_size_per_cu = - task_size / static_cast(device.GetInfo().compute_units_count); + task_size / static_cast(device.info_.compute_units_count); int block_size = 1; float threshold_1 = FLT_MAX; float threshold_2 = FLT_MAX; @@ -125,7 +125,7 @@ int GetRecommendedBlockSizeForConv(const CLDevice& device, if (!device.IsMali()) { return 1; } - MaliInfo mali_info = device.GetInfo().mali_info; + MaliInfo mali_info = device.info_.mali_info; switch (precision) { case CalculationsPrecision::F16: if (mali_info.IsBifrostGen1()) { diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc index 4c3e8ddba05..698599a5bbd 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc @@ -303,7 +303,7 @@ absl::Status CreateWinograd4x4To36(const CreationContext& creation_context, const Padding2D& padding, Winograd4x4To36* result) { *result = - Winograd4x4To36(definition, padding, creation_context.device->GetInfo()); + Winograd4x4To36(definition, padding, creation_context.device->info_); return result->UploadBt(creation_context.context); } @@ -502,7 +502,7 @@ absl::Status CreateWinograd36To4x4( const CreationContext& creation_context, const OperationDef& definition, const tflite::gpu::Tensor& biases, Winograd36To4x4* result) { - *result = Winograd36To4x4(definition, creation_context.device->GetInfo()); + *result = Winograd36To4x4(definition, creation_context.device->info_); TensorLinearDescriptor desc; desc.storage_type = LinearStorageType::TEXTURE_2D; desc.element_type = definition.GetDataType(); diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc index b577757057e..4a97bdddd09 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc @@ -167,7 +167,7 @@ absl::Status SelectConvolution(const Convolution2DAttributes& attr, const CreationContext& creation_context, const OperationDef& op_def, ModelHints hints, std::unique_ptr* ptr) { - const auto& device_info = creation_context.device->GetInfo(); + const auto& device_info = creation_context.device->info_; if (device_info.IsAdreno()) { return SelectConvolutionAdreno(attr, dst_shape, creation_context, op_def, hints, ptr); @@ -190,7 +190,7 @@ absl::Status SelectConvolutionForWinograd( const Convolution2DAttributes& attr, const BHWC& dst_shape, const CreationContext& creation_context, const OperationDef& op_def, ModelHints hints, std::unique_ptr* ptr) { - const auto& device_info = creation_context.device->GetInfo(); + const auto& device_info = creation_context.device->info_; if (device_info.IsAdreno()) { return SelectConvolutionWinogradAdreno(attr, dst_shape, creation_context, op_def, hints, ptr); @@ -215,7 +215,7 @@ absl::Status SelectConvolutionWithDynamicWeights( const BHWC& dst_shape, const CreationContext& creation_context, const OperationDef& op_def, ModelHints hints, std::unique_ptr* ptr, ConvWeightsDescription* weights_desc) { - const auto& device_info = creation_context.device->GetInfo(); + const auto& device_info = creation_context.device->info_; if (device_info.IsAdreno()) { return SelectConvolutionDynamicWeightsAdreno(attr, weights_shape, dst_shape, creation_context, op_def, diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc index 56864f2c575..c00d9392702 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_transposed_selector.cc @@ -105,7 +105,7 @@ absl::Status SelectConvolutionTransposed( const ConvolutionTransposedAttributes& attr, const CreationContext& creation_context, const OperationDef& op_def, std::unique_ptr* ptr) { - const auto& device_info = creation_context.device->GetInfo(); + const auto& device_info = creation_context.device->info_; if (device_info.IsAdreno()) { return SelectConvolutionTransposedAdreno(attr, creation_context, op_def, ptr); diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc index fafd9078f6f..b89f271365f 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc @@ -69,7 +69,7 @@ absl::Status SelectDWConvolutionMali( const auto storage_type = op_def.src_tensors[0].storage_type; bool buffer_type = storage_type == TensorStorageType::BUFFER || storage_type == TensorStorageType::IMAGE_BUFFER; - MaliInfo mali_info = creation_context.device->GetInfo().mali_info; + MaliInfo mali_info = creation_context.device->info_.mali_info; if (IsDepthwiseConv3x3Supported(attr) && !mali_info.IsMidgard() && !buffer_type && op_def.precision != CalculationsPrecision::F32) { DepthwiseConv3x3 dw_conv; @@ -90,7 +90,7 @@ absl::Status SelectDWConvolution(const DepthwiseConvolution2DAttributes& attr, const CreationContext& creation_context, const OperationDef& op_def, std::unique_ptr* ptr) { - const auto& device_info = creation_context.device->GetInfo(); + const auto& device_info = creation_context.device->info_; if (device_info.IsAdreno()) { return SelectDWConvolutionAdreno(attr, creation_context, op_def, ptr); } else if (device_info.IsPowerVR()) { diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc index cb967e45b52..0df8e243da3 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/fully_connected_selector.cc @@ -104,7 +104,7 @@ absl::Status SelectFullyConnected(const FullyConnectedAttributes& attr, const CreationContext& creation_context, const OperationDef& op_def, int batch_size, std::unique_ptr* ptr) { - const auto& device_info = creation_context.device->GetInfo(); + const auto& device_info = creation_context.device->info_; if (device_info.IsAdreno()) { return SelectFullyConnectedAdreno(attr, creation_context, op_def, batch_size, ptr); diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc index 5661c3d0a37..b257e5a85da 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/operation_selector.cc @@ -75,14 +75,14 @@ absl::Status WinogradFromNode(const CreationContext& creation_context, const BHWC shape_1{input_shape.b, 36, tiles_x * tiles_y, output_shape.c}; TensorDescriptor td_0; td_0.storage_type = SelectBestStorageType( - *creation_context.context, *creation_context.device, shape_0, + creation_context.device->info_, shape_0, op_def.src_tensors[0].storage_type, op_def.src_tensors[0].data_type, op_def.src_tensors[0].layout); td_0.data_type = op_def.src_tensors[0].data_type; td_0.layout = op_def.src_tensors[0].layout; TensorDescriptor td_1; td_1.storage_type = SelectBestStorageType( - *creation_context.context, *creation_context.device, shape_1, + creation_context.device->info_, shape_1, op_def.src_tensors[0].storage_type, op_def.src_tensors[0].data_type, op_def.src_tensors[0].layout); td_1.data_type = op_def.src_tensors[0].data_type; @@ -175,7 +175,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, channels[i] = inputs[i]->tensor.shape.c; } return SelectConcat(attr, channels, op_def, - creation_context.device->GetInfo(), gpu_op); + creation_context.device->info_, gpu_op); } case OperationType::CONVOLUTION_2D: { auto attr = @@ -248,7 +248,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, inputs[0]->tensor.shape.b, gpu_op); } case OperationType::LSTM: { - SelectLSTM(op_def, creation_context.device->GetInfo(), gpu_op); + SelectLSTM(op_def, creation_context.device->info_, gpu_op); return absl::OkStatus(); } case OperationType::MAX_UNPOOLING_2D: { @@ -259,8 +259,7 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context, } case OperationType::MEAN: { auto attr = absl::any_cast(node.operation.attributes); - return SelectMean(attr, op_def, creation_context.device->GetInfo(), - gpu_op); + return SelectMean(attr, op_def, creation_context.device->info_, gpu_op); } case OperationType::MEAN_STDDEV_NORMALIZATION: { MeanStdDevNormalization operation = CreateMeanStdDevNormalization(op_def); diff --git a/tensorflow/lite/delegates/gpu/cl/storage_type_util.cc b/tensorflow/lite/delegates/gpu/cl/storage_type_util.cc index 755da0c7619..ddcb65e07f9 100644 --- a/tensorflow/lite/delegates/gpu/cl/storage_type_util.cc +++ b/tensorflow/lite/delegates/gpu/cl/storage_type_util.cc @@ -15,18 +15,16 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/cl/storage_type_util.h" -#include "tensorflow/lite/delegates/gpu/cl/cl_context.h" -#include "tensorflow/lite/delegates/gpu/cl/cl_device.h" #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" #include "tensorflow/lite/delegates/gpu/common/shape.h" +#include "tensorflow/lite/delegates/gpu/common/util.h" namespace tflite { namespace gpu { namespace cl { -bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device, - const BHWDC& shape, +bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWDC& shape, const TensorDescriptor& descriptor) { const int slices = DivideRoundUp(shape.c, 4); switch (descriptor.storage_type) { @@ -35,64 +33,60 @@ bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device, 4 * (descriptor.data_type == DataType::FLOAT32 ? 4 : 2); const int buffer_size = shape.b * shape.w * shape.h * shape.d * slices * flt4_size; - return buffer_size <= device.GetInfo().buffer_max_size; + return buffer_size <= device_info.buffer_max_size; } case TensorStorageType::IMAGE_BUFFER: return shape.b * shape.w * shape.h * shape.d * slices <= - device.GetInfo().image_buffer_max_size; + device_info.image_buffer_max_size; case TensorStorageType::TEXTURE_3D: - if (device.cl_version() < OpenCLVersion::CL_1_2 && slices == 1) { + if (device_info.cl_version < OpenCLVersion::CL_1_2 && slices == 1) { // clCreateImage3D (that used in CL 1.0/1.1) can not create image with // depth = 1 by specification; return false; } - return shape.w * shape.b <= device.GetInfo().image3d_max_width && - shape.h <= device.GetInfo().image3d_max_height && - slices * shape.d <= device.GetInfo().image3d_max_depth; + return shape.w * shape.b <= device_info.image3d_max_width && + shape.h <= device_info.image3d_max_height && + slices * shape.d <= device_info.image3d_max_depth; case TensorStorageType::TEXTURE_ARRAY: // Bug on some Adreno. b/131099086 - if (slices == 1 && !device.SupportsOneLayerTextureArray()) { + if (slices == 1 && !device_info.SupportsOneLayerTextureArray()) { return false; } - return shape.w * shape.b <= device.GetInfo().image2d_max_width && - shape.h <= device.GetInfo().image2d_max_height && - slices * shape.d <= device.GetInfo().image_array_max_layers; + return shape.w * shape.b <= device_info.image2d_max_width && + shape.h <= device_info.image2d_max_height && + slices * shape.d <= device_info.image_array_max_layers; case TensorStorageType::TEXTURE_2D: - return shape.w * shape.b * shape.d <= - device.GetInfo().image2d_max_width && - shape.h * slices <= device.GetInfo().image2d_max_height; + return shape.w * shape.b * shape.d <= device_info.image2d_max_width && + shape.h * slices <= device_info.image2d_max_height; case TensorStorageType::SINGLE_TEXTURE_2D: return shape.c <= 4 && - context.IsFloatTexture2DSupported(shape.c, descriptor.data_type) && - shape.w * shape.b * shape.d <= - device.GetInfo().image2d_max_width && - shape.h <= device.GetInfo().image2d_max_height; + device_info.SupportsFloatImage2D(descriptor.data_type, shape.c) && + shape.w * shape.b * shape.d <= device_info.image2d_max_width && + shape.h <= device_info.image2d_max_height; default: return false; } } -bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device, - const BHWC& shape, +bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWC& shape, const TensorDescriptor& descriptor) { const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c); - return CanCreateTensorWithShape(context, device, shape5D, descriptor); + return CanCreateTensorWithShape(device_info, shape5D, descriptor); } -TensorStorageType SelectBestStorageType(const CLContext& context, - const CLDevice& device, +TensorStorageType SelectBestStorageType(const DeviceInfo& device_info, const BHWC& shape, const TensorStorageType& desired, const DataType& data_type, const Layout& layout) { - if (CanCreateTensorWithShape(context, device, shape, + if (CanCreateTensorWithShape(device_info, shape, TensorDescriptor{data_type, desired, layout})) { return desired; } auto GetBestTypeAfterTextureArray = [&]() { - if (device.SupportsImageBuffer() && + if (device_info.SupportsImageBuffer() && CanCreateTensorWithShape( - context, device, shape, + device_info, shape, TensorDescriptor{data_type, TensorStorageType::IMAGE_BUFFER, layout})) { return TensorStorageType::IMAGE_BUFFER; @@ -101,9 +95,9 @@ TensorStorageType SelectBestStorageType(const CLContext& context, } }; auto GetBestTypeAfterTexture2D = [&]() { - if (device.SupportsTextureArray() && + if (device_info.SupportsTextureArray() && CanCreateTensorWithShape( - context, device, shape, + device_info, shape, TensorDescriptor{data_type, TensorStorageType::TEXTURE_ARRAY, layout})) { return TensorStorageType::TEXTURE_ARRAY; @@ -113,7 +107,7 @@ TensorStorageType SelectBestStorageType(const CLContext& context, }; auto GetBestTypeAfterTexture3D = [&]() { if (CanCreateTensorWithShape( - context, device, shape, + device_info, shape, TensorDescriptor{data_type, TensorStorageType::TEXTURE_2D, layout})) { return TensorStorageType::TEXTURE_2D; diff --git a/tensorflow/lite/delegates/gpu/cl/storage_type_util.h b/tensorflow/lite/delegates/gpu/cl/storage_type_util.h index 87fc2206e81..a8a82008461 100644 --- a/tensorflow/lite/delegates/gpu/cl/storage_type_util.h +++ b/tensorflow/lite/delegates/gpu/cl/storage_type_util.h @@ -16,8 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_STORAGE_TYPE_UTIL_H_ #define TENSORFLOW_LITE_DELEGATES_GPU_CL_STORAGE_TYPE_UTIL_H_ -#include "tensorflow/lite/delegates/gpu/cl/cl_context.h" -#include "tensorflow/lite/delegates/gpu/cl/cl_device.h" +#include "tensorflow/lite/delegates/gpu/cl/device_info.h" #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" #include "tensorflow/lite/delegates/gpu/common/shape.h" @@ -26,16 +25,13 @@ namespace tflite { namespace gpu { namespace cl { -bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device, - const BHWDC& shape, +bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWDC& shape, const TensorDescriptor& descriptor); -bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device, - const BHWC& shape, +bool CanCreateTensorWithShape(const DeviceInfo& device_info, const BHWC& shape, const TensorDescriptor& descriptor); -TensorStorageType SelectBestStorageType(const CLContext& context, - const CLDevice& device, +TensorStorageType SelectBestStorageType(const DeviceInfo& device_info, const BHWC& shape, const TensorStorageType& desired, const DataType& data_type, From 3e5a78fa1b3854e536587a94514ac42b8b621225 Mon Sep 17 00:00:00 2001 From: bhack Date: Fri, 7 Aug 2020 02:59:42 +0200 Subject: [PATCH 2310/2522] Else fix --- tensorflow/python/ops/array_ops.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 8724ecebbfe..9875342730c 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -5321,7 +5321,8 @@ def quantize_and_dequantize( input = ops.convert_to_tensor(input) if axis is None: axis = -1 - axis = get_positive_axis(axis, input.shape.ndims) + else: + axis = get_positive_axis(axis, input.shape.ndims) return gen_array_ops.quantize_and_dequantize_v2( input, From cfb79f71512a9e79e470948fa25c62da985de43a Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Thu, 6 Aug 2020 17:50:20 -0700 Subject: [PATCH 2311/2522] Rollback of 0c2b2a3063dc4334b6 to allow for backwards compatibility for some more time. Note that the kernel tests will still fail unless the kernel implementations are updated to use TfLiteEvalTensors. PiperOrigin-RevId: 325347554 Change-Id: I0cb002257f0b8a807accb50b365d727eda579f3c --- tensorflow/lite/micro/BUILD | 1 - tensorflow/lite/micro/kernels/BUILD | 5 ---- .../micro/kernels/xtensa_hifimini/svdf.cc | 6 ++-- tensorflow/lite/micro/micro_interpreter.cc | 20 ++++++++----- tensorflow/lite/micro/test_helpers.cc | 30 ++++++++----------- 5 files changed, 29 insertions(+), 33 deletions(-) diff --git a/tensorflow/lite/micro/BUILD b/tensorflow/lite/micro/BUILD index a8fec96c3e3..9b3d0d623cc 100644 --- a/tensorflow/lite/micro/BUILD +++ b/tensorflow/lite/micro/BUILD @@ -87,7 +87,6 @@ cc_library( "//tensorflow/lite/kernels:kernel_util", "//tensorflow/lite/kernels/internal:compatibility", "//tensorflow/lite/kernels/internal:tensor", - "//tensorflow/lite/micro/kernels:kernel_util", "//tensorflow/lite/schema:schema_fbs", "@flatbuffers//:runtime_cc", ], diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 76ad03991e3..dcf2337aa24 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -518,11 +518,6 @@ cc_library( "kernel_util.cc", ], hdrs = ["kernel_util.h"], - visibility = [ - # Needed for micro:test_helpers but visibility can not be finer-grained - # than a package. - ":micro_top_level", - ], deps = [ "//tensorflow/lite/c:common", "//tensorflow/lite/kernels/internal:compatibility", diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc index 00ee9b2e809..545e91bab3d 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc @@ -343,7 +343,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1], memory_size * num_filters); - TF_LITE_ENSURE_EQ(context, NumInputs(node), 5); + TF_LITE_ENSURE_EQ(context, node->inputs->size, 5); TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8); TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteInt16); TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteInt16); @@ -398,7 +398,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteEvalTensor* weights_time = tflite::micro::GetEvalInput(context, node, kWeightsTimeTensor); const TfLiteEvalTensor* bias = - tflite::micro::GetEvalInput(context, node, kBiasTensor); + (NumInputs(node) == 5) + ? tflite::micro::GetEvalInput(context, node, kBiasTensor) + : nullptr; TfLiteEvalTensor* activation_state = tflite::micro::GetMutableEvalInput( context, node, kInputActivationStateTensor); TfLiteEvalTensor* output = diff --git a/tensorflow/lite/micro/micro_interpreter.cc b/tensorflow/lite/micro/micro_interpreter.cc index 1c6ebd5953e..8c2f8e031d8 100644 --- a/tensorflow/lite/micro/micro_interpreter.cc +++ b/tensorflow/lite/micro/micro_interpreter.cc @@ -166,7 +166,9 @@ void MicroInterpreter::Init(tflite::Profiler* profiler) { context_.impl_ = static_cast(&context_helper_); context_.ReportError = context_helper_.ReportOpError; + context_.GetTensor = context_helper_.GetTensor; context_.GetEvalTensor = context_helper_.GetEvalTensor; + context_.recommended_num_threads = 1; context_.profiler = profiler; initialization_status_ = kTfLiteOk; @@ -275,12 +277,10 @@ TfLiteStatus MicroInterpreter::AllocateTensors() { } context_helper_.SetNodeIndex(-1); - // RequestScratchBufferInArena and GetTensor (with associated TempAllocation) - // are also available in Prepare stage. - context_.GetTensor = context_helper_.GetTensor; + // Both AllocatePersistentBuffer and RequestScratchBufferInArena is + // available in Prepare stage. context_.RequestScratchBufferInArena = context_helper_.RequestScratchBufferInArena; - for (size_t i = 0; i < subgraph_->operators()->size(); ++i) { // Set node idx to annotate the lifetime for scratch buffers. context_helper_.SetNodeIndex(i); @@ -300,13 +300,11 @@ TfLiteStatus MicroInterpreter::AllocateTensors() { } context_helper_.SetNodeIndex(-1); - // Prepare is done, we're ready for Invoke. Memory allocation and full - // TfLiteTensors (via GetTensor) are no longer allowed. Kernels can only fetch - // scratch buffers via GetScratchBuffer. + // Prepare is done, we're ready for Invoke. Memory allocation is no longer + // allowed. Kernels can only fetch scratch buffers via GetScratchBuffer. context_.AllocatePersistentBuffer = nullptr; context_.RequestScratchBufferInArena = nullptr; context_.GetScratchBuffer = context_helper_.GetScratchBuffer; - context_.GetTensor = nullptr; TF_LITE_ENSURE_OK(&context_, allocator_.FinishModelAllocation(model_, eval_tensors_)); @@ -345,6 +343,12 @@ TfLiteStatus MicroInterpreter::Invoke() { #endif invoke_status = registration->invoke(&context_, node); + // All TfLiteTensor structs used in the kernel are allocated from temp + // memory in the allocator. This creates a chain of allocations in the + // temp section. The call below resets the chain of allocations to + // prepare for the next call. + allocator_.ResetTempAllocations(); + if (invoke_status == kTfLiteError) { TF_LITE_REPORT_ERROR( error_reporter_, diff --git a/tensorflow/lite/micro/test_helpers.cc b/tensorflow/lite/micro/test_helpers.cc index a4f716fca06..23c7ca96408 100644 --- a/tensorflow/lite/micro/test_helpers.cc +++ b/tensorflow/lite/micro/test_helpers.cc @@ -28,7 +28,6 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/micro/all_ops_resolver.h" -#include "tensorflow/lite/micro/kernels/kernel_util.h" #include "tensorflow/lite/micro/micro_utils.h" #include "tensorflow/lite/schema/schema_generated.h" @@ -602,9 +601,8 @@ TfLiteStatus SimpleStatefulOp::Invoke(TfLiteContext* context, OpData* data = reinterpret_cast(node->user_data); data->invoke_count += 1; - const TfLiteEvalTensor* input = - tflite::micro::GetEvalInput(context, node, kInputTensor); - const uint8_t* input_data = tflite::micro::GetTensorData(input); + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + const uint8_t* input_data = GetTensorData(input); int size = NumElements(input->dims); uint8_t* sorting_buffer = reinterpret_cast( @@ -622,13 +620,10 @@ TfLiteStatus SimpleStatefulOp::Invoke(TfLiteContext* context, } } - TfLiteEvalTensor* median = - tflite::micro::GetEvalOutput(context, node, kMedianTensor); - uint8_t* median_data = tflite::micro::GetTensorData(median); - TfLiteEvalTensor* invoke_count = - tflite::micro::GetEvalOutput(context, node, kInvokeCount); - int32_t* invoke_count_data = - tflite::micro::GetTensorData(invoke_count); + TfLiteTensor* median = GetOutput(context, node, kMedianTensor); + uint8_t* median_data = GetTensorData(median); + TfLiteTensor* invoke_count = GetOutput(context, node, kInvokeCount); + int32_t* invoke_count_data = GetTensorData(invoke_count); median_data[0] = sorting_buffer[size / 2]; invoke_count_data[0] = data->invoke_count; @@ -665,13 +660,14 @@ TfLiteStatus MockCustom::Prepare(TfLiteContext* context, TfLiteNode* node) { } TfLiteStatus MockCustom::Invoke(TfLiteContext* context, TfLiteNode* node) { - const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); - const int32_t* input_data = tflite::micro::GetTensorData(input); - const TfLiteEvalTensor* weight = - tflite::micro::GetEvalInput(context, node, 1); + const TfLiteTensor* input = tflite::GetInput(context, node, 0); + const int32_t* input_data = input->data.i32; + const TfLiteTensor* weight = tflite::GetInput(context, node, 1); const uint8_t* weight_data = weight->data.uint8; - TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); - int32_t* output_data = tflite::micro::GetTensorData(output); + TfLiteTensor* output = GetOutput(context, node, 0); + int32_t* output_data = output->data.i32; + output_data[0] = + 0; // Catch output tensor sharing memory with an input tensor output_data[0] = input_data[0] + weight_data[0]; return kTfLiteOk; } From 016646b73291f95625baf526881c5cc7b3a5c74d Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Thu, 6 Aug 2020 17:54:14 -0700 Subject: [PATCH 2312/2522] [XLA] Make FillRandomDouble method that can allow for better creation of random bool arrays. PiperOrigin-RevId: 325348068 Change-Id: Idadb60f5a34f140c4affc5585216539460554f8e --- tensorflow/compiler/xla/array.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/array.h b/tensorflow/compiler/xla/array.h index 392cd9bd359..0f31d4c27f5 100644 --- a/tensorflow/compiler/xla/array.h +++ b/tensorflow/compiler/xla/array.h @@ -289,13 +289,19 @@ class Array { } // Fills the array with random normal variables with the specified mean. - void FillRandom(const T& stddev, const double mean = 0.0, - const int seed = 12345) { + void FillRandom(const T& stddev, double mean = 0.0, int seed = 12345) { + FillRandomDouble(static_cast(stddev), mean, seed); + } + + void FillRandomDouble(double stddev, double mean = 0.0, int seed = 12345) { std::mt19937 g(seed); - std::normal_distribution distribution(mean, - static_cast(stddev)); + std::normal_distribution distribution(mean, stddev); for (int64 i = 0; i < num_elements(); ++i) { - values_[i] = static_cast(distribution(g)); + if constexpr (std::is_same()) { + values_[i] = distribution(g) > 0.0; + } else { + values_[i] = static_cast(distribution(g)); + } } } From 5f392207bb31c4fb381900bc4239b30a39a326ef Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Thu, 6 Aug 2020 17:57:23 -0700 Subject: [PATCH 2313/2522] Disable flaky test PiperOrigin-RevId: 325348505 Change-Id: I534aedd5f980915aa41f00b333d6ae25826319d8 --- tensorflow/python/data/kernel_tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD index 210b6f59681..639c07bac01 100644 --- a/tensorflow/python/data/kernel_tests/BUILD +++ b/tensorflow/python/data/kernel_tests/BUILD @@ -94,6 +94,7 @@ tf_py_test( name = "data_service_ops_test", size = "medium", srcs = ["data_service_ops_test.py"], + tags = ["notap"], # "b/163085430" deps = [ "//tensorflow:tensorflow_py", "//tensorflow/python:client_testlib", From 2a7de5bd6c1cd3f12ccfbbdc92e12d3e4a455d60 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Thu, 6 Aug 2020 18:23:40 -0700 Subject: [PATCH 2314/2522] [XLA] Merge partial sharding in elementwise ops' sharding propagation PiperOrigin-RevId: 325352068 Change-Id: I0e60eaaaad23d8091e5db686b7990e219a95d061 --- .../compiler/xla/service/hlo_sharding.cc | 18 +- .../compiler/xla/service/hlo_sharding.h | 10 + .../xla/service/sharding_propagation.cc | 265 ++++++++++++++---- .../xla/service/sharding_propagation_test.cc | 65 ++++- 4 files changed, 293 insertions(+), 65 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc index d522fc8bd14..ba1fc0d0450 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding.cc @@ -51,7 +51,12 @@ HloSharding HloSharding::PartialTile( int64 group = group_tile_assignment(group_index); *device = replication_groups[group][indices.back()]; }); - return HloSharding(new_tile_assignment, + return PartialTile(new_tile_assignment); +} + +HloSharding HloSharding::PartialTile( + const Array& tile_assignment_last_dim_replicate) { + return HloSharding(tile_assignment_last_dim_replicate, /*replicate_on_last_tile_dim=*/true); } @@ -494,6 +499,17 @@ Shape HloSharding::TileShape(const Shape& shape, int64 device) const { return result_shape; } +int64 HloSharding::NumTiles() const { + if (IsTileMaximal()) { + return 1; + } + if (ReplicateOnLastTileDim()) { + return tile_assignment().num_elements() / + tile_assignment().dimensions().back(); + } + return tile_assignment().num_elements(); +} + HloSharding HloSharding::GetSubSharding(const Shape& shape, const ShapeIndex& index) const { CHECK(IsTuple()); diff --git a/tensorflow/compiler/xla/service/hlo_sharding.h b/tensorflow/compiler/xla/service/hlo_sharding.h index af28df56e68..1b827efff2d 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.h +++ b/tensorflow/compiler/xla/service/hlo_sharding.h @@ -61,6 +61,12 @@ class HloSharding { const Array& group_tile_assignment, absl::Span> replication_groups); + // Creates a partially replicated tiled sharding with device-level tile + // assignment, where the last dimension is the additional replication + // dimension. + static HloSharding PartialTile( + const Array& tile_assignment_last_dim_replicate); + // Creates a new sharding which splits a one-dimensional input shape into // `num_tiles` tiles. static HloSharding Tile1D(const Shape& input_shape, int64 num_tiles); @@ -237,6 +243,10 @@ class HloSharding { // REQUIRES: !IsTuple() Shape TileShape(const Shape& shape, int64 device) const; + // Gets the number of tiles. If it has partial replication, this will not + // equal the device count. + int64 NumTiles() const; + private: HloSharding() : replicated_(true), diff --git a/tensorflow/compiler/xla/service/sharding_propagation.cc b/tensorflow/compiler/xla/service/sharding_propagation.cc index 5d85fb5189c..0e4b0568134 100644 --- a/tensorflow/compiler/xla/service/sharding_propagation.cc +++ b/tensorflow/compiler/xla/service/sharding_propagation.cc @@ -91,9 +91,7 @@ bool IsShardingMoreSpecific(const HloSharding& lhs, const HloSharding& rhs) { return is_better; } if (!rhs.IsTileMaximal()) { - // If we already have a non-tile-maximal sharding then we can't improve - // that. - return false; + return lhs.NumTiles() > rhs.NumTiles(); } else if (!rhs.IsReplicated()) { // If we are not replicated then only tiled (not tile maximal) shardings // can improve us. @@ -124,9 +122,12 @@ HloSharding MergeForMoreSpecificSharding(const HloSharding& a, // Updates the sharding of the specified instruction with the specified sharding // if it is better than the current one and returns true if a new sharding have -// been applied. +// been applied. If may_combine_partial_sharding is true, this may combine the +// new and existing sharding if they are both partial tiling partial +// replication. bool MaybeImproveInstructionSharding(const HloSharding& sharding, - HloInstruction* instruction) { + HloInstruction* instruction, + bool may_combine_partial_sharding) { // We don't want to propagate tile maximal shardings. if (!IsSpatiallyPartitioned(sharding)) { return false; @@ -136,6 +137,101 @@ bool MaybeImproveInstructionSharding(const HloSharding& sharding, instruction->set_sharding(sharding); return true; } + if (may_combine_partial_sharding && sharding.ReplicateOnLastTileDim() && + instruction->sharding().ReplicateOnLastTileDim()) { + if (sharding.tile_assignment().num_elements() == + instruction->sharding().tile_assignment().num_elements()) { + // Combine the tile dimension sizes from new and old. + int64 num_devices = sharding.tile_assignment().num_elements(); + std::vector new_tile_dims; + bool compatible = true; + new_tile_dims.reserve(sharding.tile_assignment().num_dimensions()); + for (int64 i = 0; i < sharding.tile_assignment().num_dimensions() - 1; + ++i) { + int64 new_dim = sharding.tile_assignment().dim(i); + int64 old_dim = instruction->sharding().tile_assignment().dim(i); + if (new_dim == 1) { + new_tile_dims.push_back(old_dim); + } else if (old_dim == 1) { + new_tile_dims.push_back(new_dim); + } else if (new_dim == old_dim) { + new_tile_dims.push_back(new_dim); + } else { + compatible = false; + break; + } + } + int64 replication = num_devices / Product(new_tile_dims); + if (compatible && num_devices % Product(new_tile_dims) == 0 && + replication < + instruction->sharding().tile_assignment().dimensions().back()) { + new_tile_dims.push_back(replication); + Array new_tile(new_tile_dims); + // Maps from replication group ID to sorted members. + absl::flat_hash_map> old_group_members; + absl::flat_hash_map> new_group_members; + auto get_group_index = [&](absl::Span tile_indices, + const HloSharding& sharding) { + int64 group_id = 0; + for (int64 i = 0; i < tile_indices.size() - 1; ++i) { + group_id *= sharding.tile_assignment().dim(i); + group_id += tile_indices[i]; + } + return group_id; + }; + instruction->sharding().tile_assignment().Each( + [&](absl::Span indices, int64 device) { + old_group_members[get_group_index(indices, + instruction->sharding())] + .insert(device); + }); + sharding.tile_assignment().Each([&](absl::Span indices, + int64 device) { + new_group_members[get_group_index(indices, sharding)].insert(device); + }); + // Try to find the intersection of old and new replication groups, in + // order to determine the merged tile assignment. + new_tile.Each([&](absl::Span indices, int64* device) { + if (!compatible) { + return; + } + std::vector old_index(indices.begin(), indices.end()); + std::vector new_index = old_index; + for (int64 i = 0; i < indices.size() - 1; ++i) { + if (instruction->sharding().tile_assignment().dim(i) == 1) { + old_index[i] = 0; + } + if (sharding.tile_assignment().dim(i) == 1) { + new_index[i] = 0; + } + } + int64 old_group_id = + get_group_index(old_index, instruction->sharding()); + int64 new_group_id = get_group_index(new_index, sharding); + if (old_group_members[old_group_id].empty() || + new_group_members[new_group_id].empty() || + *old_group_members[old_group_id].begin() != + *new_group_members[new_group_id].begin()) { + compatible = false; + return; + } + *device = *old_group_members[old_group_id].begin(); + old_group_members[old_group_id].erase(*device); + new_group_members[new_group_id].erase(*device); + }); + if (compatible) { + if (replication == 1) { + new_tile_dims.pop_back(); + new_tile.Reshape(new_tile_dims); + instruction->set_sharding(HloSharding::Tile(new_tile)); + } else { + instruction->set_sharding(HloSharding::PartialTile(new_tile)); + } + return true; + } + } + } + } if (IsShardingMoreSpecific(sharding, instruction->sharding())) { instruction->set_sharding(sharding); return true; @@ -363,7 +459,8 @@ bool SupportSpatialPartitioning(const HloInstruction* instruction, // Convolution handling for InferShardingFromOperands(). bool InferConvolutionShardingFromOperands(HloInstruction* instruction, - bool aggressive_prop) { + bool aggressive_prop, + bool may_combine_partial_sharding) { const auto& dnums = instruction->convolution_dimension_numbers(); const HloInstruction* lhs = instruction->operand(0); const HloInstruction* rhs = instruction->operand(1); @@ -430,13 +527,15 @@ bool InferConvolutionShardingFromOperands(HloInstruction* instruction, partitioned_only_along_non_trivial_dims(lhs->sharding(), dot_dims->batch_dims, 0)) { return MaybeImproveInstructionSharding(get_tiled_sharding_based_on_lhs(), - instruction); + instruction, + may_combine_partial_sharding); } if (IsSpatiallyPartitioned(rhs) && partitioned_only_along_non_trivial_dims(rhs->sharding(), dot_dims->batch_dims, 1)) { return MaybeImproveInstructionSharding(get_tiled_sharding_based_on_rhs(), - instruction); + instruction, + may_combine_partial_sharding); } if (aggressive_prop) { // If LHS/RHS is partitioned only along the non-contracting @@ -455,19 +554,23 @@ bool InferConvolutionShardingFromOperands(HloInstruction* instruction, if (Product(lhs->shape().dimensions()) >= Product(rhs->shape().dimensions())) { return MaybeImproveInstructionSharding( - get_tiled_sharding_based_on_lhs(), instruction); + get_tiled_sharding_based_on_lhs(), instruction, + may_combine_partial_sharding); } else { return MaybeImproveInstructionSharding( - get_tiled_sharding_based_on_rhs(), instruction); + get_tiled_sharding_based_on_rhs(), instruction, + may_combine_partial_sharding); } } if (can_propagate_from_lhs) { return MaybeImproveInstructionSharding( - get_tiled_sharding_based_on_lhs(), instruction); + get_tiled_sharding_based_on_lhs(), instruction, + may_combine_partial_sharding); } if (can_propagate_from_rhs) { return MaybeImproveInstructionSharding( - get_tiled_sharding_based_on_rhs(), instruction); + get_tiled_sharding_based_on_rhs(), instruction, + may_combine_partial_sharding); } } } @@ -476,8 +579,8 @@ bool InferConvolutionShardingFromOperands(HloInstruction* instruction, return false; } if (lhs->sharding().IsReplicated()) { - return MaybeImproveInstructionSharding(HloSharding::Replicate(), - instruction); + return MaybeImproveInstructionSharding( + HloSharding::Replicate(), instruction, may_combine_partial_sharding); } if (IsConvolutionKernelSmall(instruction)) { @@ -488,11 +591,13 @@ bool InferConvolutionShardingFromOperands(HloInstruction* instruction, return false; } return MaybeImproveInstructionSharding(get_tiled_sharding_based_on_lhs(), - instruction); + instruction, + may_combine_partial_sharding); } // If the kernel is large (e.g backward convolution) then we only support // replicated output. - return MaybeImproveInstructionSharding(HloSharding::Replicate(), instruction); + return MaybeImproveInstructionSharding(HloSharding::Replicate(), instruction, + may_combine_partial_sharding); } // Tries to update the sharding of the specified instruction based on its @@ -512,8 +617,9 @@ bool InferShardingFromOperands(HloInstruction* instruction, if (absl::c_any_of(instruction->operands(), [](const HloInstruction* op) { return op->has_sharding() && op->sharding().IsReplicated(); })) { - return MaybeImproveInstructionSharding(HloSharding::Replicate(), - instruction); + return MaybeImproveInstructionSharding( + HloSharding::Replicate(), instruction, + /*may_combine_partial_sharding=*/is_spmd); } return false; } @@ -526,7 +632,8 @@ bool InferShardingFromOperands(HloInstruction* instruction, } HloSharding new_sharding = operand->sharding().GetSubSharding( operand->shape(), {instruction->tuple_index()}); - return MaybeImproveInstructionSharding(new_sharding, instruction); + return MaybeImproveInstructionSharding( + new_sharding, instruction, /*may_combine_partial_sharding=*/is_spmd); } case HloOpcode::kTuple: { if (absl::c_none_of(instruction->operands(), @@ -601,7 +708,8 @@ bool InferShardingFromOperands(HloInstruction* instruction, }; if (operand->sharding().IsReplicated()) { changed |= MaybeImproveInstructionSharding( - get_maybe_tuple_sharding(HloSharding::Replicate()), instruction); + get_maybe_tuple_sharding(HloSharding::Replicate()), instruction, + /*may_combine_partial_sharding=*/is_spmd); continue; } if (absl::c_any_of(instruction->dimensions(), [operand](int64 dim) { @@ -610,7 +718,8 @@ bool InferShardingFromOperands(HloInstruction* instruction, // We are reducing along one of the sharded dimensions. We don't // support tiled sharding in this case. changed |= MaybeImproveInstructionSharding( - get_maybe_tuple_sharding(HloSharding::Replicate()), instruction); + get_maybe_tuple_sharding(HloSharding::Replicate()), instruction, + /*may_combine_partial_sharding=*/is_spmd); } else { // We are reducing along some of the non-sharded dimensions. The // result sharding should be the same as the operand sharding with the @@ -631,7 +740,9 @@ bool InferShardingFromOperands(HloInstruction* instruction, // of the same reduce instruction. HloSharding new_sharding = get_maybe_tuple_sharding(HloSharding::Tile(new_tile_assignment)); - changed |= MaybeImproveInstructionSharding(new_sharding, instruction); + changed |= MaybeImproveInstructionSharding( + new_sharding, instruction, + /*may_combine_partial_sharding=*/is_spmd); } } return changed; @@ -665,10 +776,13 @@ bool InferShardingFromOperands(HloInstruction* instruction, Array new_tile_assignment = op->sharding().tile_assignment(); new_tile_assignment.Reshape(target_tile_assignment_dimensions); HloSharding new_sharding = HloSharding::Tile(new_tile_assignment); - return MaybeImproveInstructionSharding(new_sharding, instruction); + return MaybeImproveInstructionSharding( + new_sharding, instruction, /*may_combine_partial_sharding=*/is_spmd); } case HloOpcode::kConvolution: - return InferConvolutionShardingFromOperands(instruction, aggressive_prop); + return InferConvolutionShardingFromOperands( + instruction, aggressive_prop, + /*may_combine_partial_sharding=*/is_spmd); case HloOpcode::kTranspose: { const HloInstruction* input = instruction->operand(0); if (!IsSpatiallyPartitioned(input)) { @@ -676,7 +790,8 @@ bool InferShardingFromOperands(HloInstruction* instruction, } HloSharding sharding = hlo_sharding_util::TransposeSharding( input->sharding(), instruction->dimensions()); - return MaybeImproveInstructionSharding(sharding, instruction); + return MaybeImproveInstructionSharding( + sharding, instruction, /*may_combine_partial_sharding=*/is_spmd); } case HloOpcode::kReduceWindow: { const HloInstruction* lhs = instruction->operand(0); @@ -694,7 +809,9 @@ bool InferShardingFromOperands(HloInstruction* instruction, << instruction->ToString(); return false; } - return MaybeImproveInstructionSharding(lhs->sharding(), instruction); + return MaybeImproveInstructionSharding( + lhs->sharding(), instruction, + /*may_combine_partial_sharding=*/is_spmd); } case HloOpcode::kSelectAndScatter: { // Shard according to first operand, as output keeps the same shape. @@ -713,7 +830,9 @@ bool InferShardingFromOperands(HloInstruction* instruction, << instruction->ToString(); return false; } - return MaybeImproveInstructionSharding(lhs->sharding(), instruction); + return MaybeImproveInstructionSharding( + lhs->sharding(), instruction, + /*may_combine_partial_sharding=*/is_spmd); } case HloOpcode::kReshape: { if (!IsSpatiallyPartitioned(instruction->operand(0))) { @@ -724,8 +843,9 @@ bool InferShardingFromOperands(HloInstruction* instruction, instruction->operand(0)->shape(), instruction->shape(), instruction->operand(0)->sharding()); if (new_sharding.has_value()) { - return MaybeImproveInstructionSharding(new_sharding.value(), - instruction); + return MaybeImproveInstructionSharding( + new_sharding.value(), instruction, + /*may_combine_partial_sharding=*/is_spmd); } return false; } @@ -736,7 +856,7 @@ bool InferShardingFromOperands(HloInstruction* instruction, return MaybeImproveInstructionSharding( hlo_sharding_util::ReverseSharding( instruction->operand(0)->sharding(), instruction->dimensions()), - instruction); + instruction, /*may_combine_partial_sharding=*/is_spmd); } case HloOpcode::kDot: { auto& dot_dim_numbs = instruction->dot_dimension_numbers(); @@ -765,8 +885,9 @@ bool InferShardingFromOperands(HloInstruction* instruction, } else if (ops_sharding[0]->IsReplicated() && ops_sharding[1]->IsReplicated()) { // Both replicated -> replicate - return MaybeImproveInstructionSharding(HloSharding::Replicate(), - instruction); + return MaybeImproveInstructionSharding( + HloSharding::Replicate(), instruction, + /*may_combine_partial_sharding=*/is_spmd); } else if (!ops_sharding[0]->IsReplicated() && !ops_sharding[1]->IsReplicated()) { // Both tile sharded. The dot spatial partitioning implementation @@ -785,8 +906,9 @@ bool InferShardingFromOperands(HloInstruction* instruction, } if (ops_sharding[representative_op]->IsReplicated()) { - return MaybeImproveInstructionSharding(HloSharding::Replicate(), - instruction); + return MaybeImproveInstructionSharding( + HloSharding::Replicate(), instruction, + /*may_combine_partial_sharding=*/is_spmd); } else { // Tile-shard instruction according to representative op. auto sharding = *ops_sharding[representative_op]; @@ -811,7 +933,8 @@ bool InferShardingFromOperands(HloInstruction* instruction, tile_assignment.Reshape(dimensions); sharding = HloSharding::Tile(tile_assignment); } - return MaybeImproveInstructionSharding(sharding, instruction); + return MaybeImproveInstructionSharding( + sharding, instruction, /*may_combine_partial_sharding=*/is_spmd); } } case HloOpcode::kParameter: { @@ -826,7 +949,8 @@ bool InferShardingFromOperands(HloInstruction* instruction, if (parent->called_computations()[i - 1] == instruction->parent()) { if (parent->operand(i)->has_sharding()) { return MaybeImproveInstructionSharding( - parent->operand(i)->sharding(), instruction); + parent->operand(i)->sharding(), instruction, + /*may_combine_partial_sharding=*/is_spmd); } return false; } @@ -853,15 +977,16 @@ bool InferShardingFromOperands(HloInstruction* instruction, if (instruction->shape().IsTuple()) { return MaybeImproveInstructionSharding( HloSharding::SingleTuple(instruction->shape(), operand->sharding()), - instruction); + instruction, /*may_combine_partial_sharding=*/is_spmd); } else { - return MaybeImproveInstructionSharding(operand->sharding(), - instruction); + return MaybeImproveInstructionSharding( + operand->sharding(), instruction, + /*may_combine_partial_sharding=*/is_spmd); } } case HloOpcode::kDynamicSlice: case HloOpcode::kDynamicUpdateSlice: { - auto propagate_slicing = [instruction]() { + auto propagate_slicing = [instruction, is_spmd]() { const HloInstruction* operand = instruction->opcode() == HloOpcode::kDynamicSlice ? instruction->operand(0) @@ -871,8 +996,9 @@ bool InferShardingFromOperands(HloInstruction* instruction, } if (operand->sharding().IsReplicated()) { - return MaybeImproveInstructionSharding(HloSharding::Replicate(), - instruction); + return MaybeImproveInstructionSharding( + HloSharding::Replicate(), instruction, + /*may_combine_partial_sharding=*/is_spmd); } const auto& tile_assignment = operand->sharding().tile_assignment(); @@ -883,10 +1009,11 @@ bool InferShardingFromOperands(HloInstruction* instruction, return false; } } - return MaybeImproveInstructionSharding(operand->sharding(), - instruction); + return MaybeImproveInstructionSharding( + operand->sharding(), instruction, + /*may_combine_partial_sharding=*/is_spmd); }; - auto propagate_base = [instruction]() { + auto propagate_base = [instruction, is_spmd]() { if (instruction->opcode() != HloOpcode::kDynamicUpdateSlice) { return false; } @@ -894,7 +1021,8 @@ bool InferShardingFromOperands(HloInstruction* instruction, return false; } return MaybeImproveInstructionSharding( - instruction->operand(0)->sharding(), instruction); + instruction->operand(0)->sharding(), instruction, + /*may_combine_partial_sharding=*/is_spmd); }; return propagate_slicing() || propagate_base(); } @@ -903,15 +1031,18 @@ bool InferShardingFromOperands(HloInstruction* instruction, if (IsSpatiallyPartitioned(instruction->operand(1))) { HloSharding new_sharding = hlo_sharding_util::GatherOutputSharding( instruction->operand(1)->sharding(), instruction); - changed |= MaybeImproveInstructionSharding(new_sharding, instruction); + changed |= MaybeImproveInstructionSharding( + new_sharding, instruction, + /*may_combine_partial_sharding=*/is_spmd); } if (is_spmd && IsSpatiallyPartitioned(instruction->operand(0))) { auto maybe_from_data = hlo_sharding_util::GatherOutputShardingFromDataOperand( instruction->operand(0)->sharding(), *instruction); if (maybe_from_data) { - changed |= - MaybeImproveInstructionSharding(*maybe_from_data, instruction); + changed |= MaybeImproveInstructionSharding( + *maybe_from_data, instruction, + /*may_combine_partial_sharding=*/is_spmd); } } return changed; @@ -920,7 +1051,8 @@ bool InferShardingFromOperands(HloInstruction* instruction, bool changed = false; if (is_spmd && IsSpatiallyPartitioned(instruction->operand(0))) { changed |= MaybeImproveInstructionSharding( - instruction->operand(0)->sharding(), instruction); + instruction->operand(0)->sharding(), instruction, + /*may_combine_partial_sharding=*/is_spmd); } if (!IsSpatiallyPartitioned(instruction->operand(1)) && !IsSpatiallyPartitioned(instruction->operand(2))) { @@ -931,12 +1063,14 @@ bool InferShardingFromOperands(HloInstruction* instruction, hlo_sharding_util::ScatterOutputShardingFromUpdate( instruction->operand(2)->sharding(), *instruction); if (maybe_from_update) { - changed |= - MaybeImproveInstructionSharding(*maybe_from_update, instruction); + changed |= MaybeImproveInstructionSharding( + *maybe_from_update, instruction, + /*may_combine_partial_sharding=*/is_spmd); } } - changed |= MaybeImproveInstructionSharding(HloSharding::Replicate(), - instruction); + changed |= MaybeImproveInstructionSharding( + HloSharding::Replicate(), instruction, + /*may_combine_partial_sharding=*/is_spmd); return changed; } case HloOpcode::kWhile: { @@ -948,14 +1082,28 @@ bool InferShardingFromOperands(HloInstruction* instruction, sharding = MergeForMoreSpecificSharding(sharding, instruction->sharding()); } - return MaybeImproveInstructionSharding(sharding, instruction); + return MaybeImproveInstructionSharding( + sharding, instruction, /*may_combine_partial_sharding=*/is_spmd); } default: { + if (instruction->IsElementwise() && is_spmd) { + bool changed = false; + for (auto operand : instruction->operands()) { + if (IsSpatiallyPartitioned(operand)) { + changed |= MaybeImproveInstructionSharding( + operand->sharding(), instruction, + /*may_combine_partial_sharding=*/is_spmd); + } + } + return changed; + } const HloInstruction* operand = PickRepresentativeOperand(instruction); if (!operand || !IsSpatiallyPartitioned(operand)) { return false; } - return MaybeImproveInstructionSharding(operand->sharding(), instruction); + return MaybeImproveInstructionSharding( + operand->sharding(), instruction, + /*may_combine_partial_sharding=*/is_spmd); } } return false; @@ -1308,8 +1456,9 @@ bool InferShardingFromUsers(HloInstruction* instruction, absl::optional user_sharding = GetShardingFromUser(*instruction, *user, aggressive_prop, is_spmd); if (user_sharding) { - improved_sharding |= - MaybeImproveInstructionSharding(*user_sharding, instruction); + improved_sharding |= MaybeImproveInstructionSharding( + *user_sharding, instruction, + /*may_combine_partial_sharding=*/is_spmd); } } return improved_sharding; diff --git a/tensorflow/compiler/xla/service/sharding_propagation_test.cc b/tensorflow/compiler/xla/service/sharding_propagation_test.cc index 594130daf0b..8aa10b67ed8 100644 --- a/tensorflow/compiler/xla/service/sharding_propagation_test.cc +++ b/tensorflow/compiler/xla/service/sharding_propagation_test.cc @@ -1149,21 +1149,21 @@ ENTRY entry { ShardingPropagation().Run(module.get())); EXPECT_TRUE(changed); EXPECT_THAT(FindInstruction(module.get(), "tp"), - op::Sharding("{{devices=[1,2]0,1}}")); + op::Sharding("{{devices=[3,1]0,1,2}}")); EXPECT_THAT(FindInstruction(module.get(), "tgte"), - op::Sharding("{devices=[1,2]0,1}")); + op::Sharding("{devices=[3,1]0,1,2}")); EXPECT_THAT(FindInstruction(module.get(), "ttr"), - op::Sharding("{devices=[2,1]0,1}")); + op::Sharding("{devices=[1,3]0,1,2}")); EXPECT_THAT(FindInstruction(module.get(), "tr"), - op::Sharding("{{devices=[2,1]0,1}}")); + op::Sharding("{{devices=[1,3]0,1,2}}")); EXPECT_THAT(FindInstruction(module.get(), "fp"), op::Sharding("{{devices=[1,3]0,1,2}}")); EXPECT_THAT(FindInstruction(module.get(), "fgte"), op::Sharding("{devices=[1,3]0,1,2}")); EXPECT_THAT(FindInstruction(module.get(), "fr"), - op::Sharding("{{devices=[2,1]0,1}}")); + op::Sharding("{{devices=[1,3]0,1,2}}")); EXPECT_THAT(FindInstruction(module.get(), "conditional"), - op::Sharding("{{devices=[2,1]0,1}}")); + op::Sharding("{{devices=[1,3]0,1,2}}")); } TEST_F(ShardingPropagationTest, TupleFromUser) { @@ -1764,5 +1764,58 @@ ENTRY entry { op::Sharding("{devices=[2,1]0,1}")); } +TEST_F(ShardingPropagationTest, PartialShardingOnElementwise) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %p0 = f32[2,9] parameter(0), sharding={devices=[1,2,2]0,1,2,3 last_tile_dim_replicate} + %p1 = f32[2,9] parameter(1), sharding={devices=[2,1,2]0,2,1,3 last_tile_dim_replicate} + %lhs = f32[2,9] copy(%p0) + %rhs = f32[2,9] copy(%p1) + %add = f32[2,9] add(%lhs, %rhs) + ROOT %copy = f32[2,9] copy(%add) +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + TF_ASSERT_OK_AND_ASSIGN( + bool changed, ShardingPropagation(/*is_spmd=*/true).Run(module.get())); + EXPECT_TRUE(changed); + EXPECT_THAT(FindInstruction(module.get(), "lhs"), + op::Sharding("{devices=[2,2]0,2,1,3}")); + EXPECT_THAT(FindInstruction(module.get(), "rhs"), + op::Sharding("{devices=[2,2]0,2,1,3}")); + EXPECT_THAT(FindInstruction(module.get(), "add"), + op::Sharding("{devices=[2,2]0,2,1,3}")); +} + +TEST_F(ShardingPropagationTest, PartialShardingOnElementwise2) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + %p0 = f32[2,9] parameter(0), sharding={devices=[1,2,4]0,1,2,3,4,5,6,7 last_tile_dim_replicate} + %p1 = f32[2,9] parameter(1), sharding={devices=[2,1,4]0,1,4,5,2,3,6,7 last_tile_dim_replicate} + %lhs = f32[2,9] copy(%p0) + %rhs = f32[2,9] copy(%p1) + %add = f32[2,9] add(%lhs, %rhs) + ROOT %copy = f32[2,9] copy(%add) +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + TF_ASSERT_OK_AND_ASSIGN( + bool changed, ShardingPropagation(/*is_spmd=*/true).Run(module.get())); + EXPECT_TRUE(changed); + EXPECT_THAT( + FindInstruction(module.get(), "lhs"), + op::Sharding("{devices=[2,2,2]0,1,4,5,2,3,6,7 last_tile_dim_replicate}")); + EXPECT_THAT( + FindInstruction(module.get(), "rhs"), + op::Sharding("{devices=[2,2,2]0,1,4,5,2,3,6,7 last_tile_dim_replicate}")); + EXPECT_THAT( + FindInstruction(module.get(), "add"), + op::Sharding("{devices=[2,2,2]0,1,4,5,2,3,6,7 last_tile_dim_replicate}")); +} + } // namespace } // namespace xla From 8c0c1e173093db7993544c46187d3be34a9a8e0b Mon Sep 17 00:00:00 2001 From: Jay Shi Date: Thu, 6 Aug 2020 18:26:11 -0700 Subject: [PATCH 2315/2522] [tf.data] Add the optimization to disable intra op parallelism. PiperOrigin-RevId: 325352372 Change-Id: I7f31e249f788316ff21263dc1b08882029da6ab9 --- .../core/grappler/optimizers/data/BUILD | 35 ++++++ .../data/disable_intra_op_parallelism.cc | 99 +++++++++++++++ .../data/disable_intra_op_parallelism.h | 50 ++++++++ .../data/disable_intra_op_parallelism_test.cc | 117 ++++++++++++++++++ .../optimizers/data/inject_prefetch.cc | 2 +- .../optimizers/data/meta_optimizer.cc | 3 +- 6 files changed, 304 insertions(+), 2 deletions(-) create mode 100644 tensorflow/core/grappler/optimizers/data/disable_intra_op_parallelism.cc create mode 100644 tensorflow/core/grappler/optimizers/data/disable_intra_op_parallelism.h create mode 100644 tensorflow/core/grappler/optimizers/data/disable_intra_op_parallelism_test.cc diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD index 1daf7e9b76e..860cbd7c35e 100644 --- a/tensorflow/core/grappler/optimizers/data/BUILD +++ b/tensorflow/core/grappler/optimizers/data/BUILD @@ -57,6 +57,41 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "disable_intra_op_parallelism", + srcs = ["disable_intra_op_parallelism.cc"], + hdrs = ["disable_intra_op_parallelism.h"], + deps = [ + ":graph_utils", + ":optimizer_base", + "//tensorflow/core/grappler:mutable_graph_view", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:op_types", + "//tensorflow/core/grappler:utils", + "//tensorflow/core/grappler/clusters:cluster", + "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry", + "//tensorflow/core:lib_internal", + ] + tf_protos_all(), + alwayslink = 1, +) + +tf_cc_test( + name = "disable_intra_op_parallelism_test", + srcs = ["disable_intra_op_parallelism_test.cc"], + deps = [ + ":disable_intra_op_parallelism", + ":graph_test_utils", + ":graph_utils", + "//tensorflow/core:framework", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/grappler:grappler_item", + ], +) + cc_library( name = "filter_fusion", srcs = ["filter_fusion.cc"], diff --git a/tensorflow/core/grappler/optimizers/data/disable_intra_op_parallelism.cc b/tensorflow/core/grappler/optimizers/data/disable_intra_op_parallelism.cc new file mode 100644 index 00000000000..4b6d6ac1bfa --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/disable_intra_op_parallelism.cc @@ -0,0 +1,99 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/data/disable_intra_op_parallelism.h" + +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/grappler/clusters/cluster.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/mutable_graph_view.h" +#include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" +#include "tensorflow/core/grappler/optimizers/data/graph_utils.h" +#include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/platform/protobuf.h" + +namespace tensorflow { +namespace grappler { +namespace { + +constexpr char kMaxIntraOpParallelismDataset[] = "MaxIntraOpParallelismDataset"; + +constexpr std::array kMaxIntraOpParallelismDatasetOps = { + "MaxIntraOpParallelismDataset", + "ExperimentalMaxIntraOpParallelismDataset", +}; + +} // namespace + +Status DisableIntraOpParallelism::OptimizeAndCollectStats( + Cluster* cluster, const GrapplerItem& item, GraphDef* output, + OptimizationStats* stats) { + *output = item.graph; + MutableGraphView graph(output); + + const NodeDef* sink_node; + for (const NodeDef& node : item.graph.node()) { + for (const auto& target_dataset_op : kMaxIntraOpParallelismDatasetOps) { + if (node.op() == target_dataset_op) { + // If parallelism is set by the user, we keep the user setting instead + // of disabling it. + return Status::OK(); + } + } + if (node.name() == "Sink") { + sink_node = &node; + } + } + + NodeDef* last_node = graph_utils::GetInputNode(*sink_node, graph); + + // Add a const node with value 1 + NodeDef* max_parallelism_value = graph_utils::AddScalarConstNode(1LL, &graph); + + NodeDef insert_node; + graph_utils::SetUniqueGraphNodeName("intra_op_parallelism", graph.graph(), + &insert_node); + insert_node.set_op(kMaxIntraOpParallelismDataset); + + // `input_dataset` input + *insert_node.mutable_input()->Add() = last_node->name(); + // `max_intra_op_parallelism` input + *insert_node.mutable_input()->Add() = max_parallelism_value->name(); + + for (const auto& attr_name : {"output_types", "output_shapes"}) { + graph_utils::CopyAttribute(attr_name, *last_node, &insert_node); + } + + auto* added_node = graph.AddNode(std::move(insert_node)); + TF_RETURN_IF_ERROR( + graph.UpdateFanouts(last_node->name(), added_node->name())); + + stats->num_changes++; + return Status::OK(); +} + +void DisableIntraOpParallelism::Feedback(Cluster* cluster, + const GrapplerItem& item, + const GraphDef& optimize_output, + double result) { + // no-op +} + +REGISTER_GRAPH_OPTIMIZER_AS(DisableIntraOpParallelism, + "disable_intra_op_parallelism"); + +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/disable_intra_op_parallelism.h b/tensorflow/core/grappler/optimizers/data/disable_intra_op_parallelism.h new file mode 100644 index 00000000000..d2355eb8766 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/disable_intra_op_parallelism.h @@ -0,0 +1,50 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_DISABLE_INTRA_OP_PARALLELISM_H_ +#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_DISABLE_INTRA_OP_PARALLELISM_H_ + +#include "tensorflow/core/grappler/optimizers/data/optimizer_base.h" + +namespace tensorflow { +namespace grappler { + +// This optimization sets intra-op parallelism to be 1. +class DisableIntraOpParallelism : public TFDataOptimizerBase { + public: + DisableIntraOpParallelism() = default; + ~DisableIntraOpParallelism() override = default; + + string name() const override { return "disable_intra_op_parallelism"; }; + + bool UsesFunctionLibrary() const override { return false; } + + Status Init( + const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override { + return Status::OK(); + } + + Status OptimizeAndCollectStats(Cluster* cluster, const GrapplerItem& item, + GraphDef* output, + OptimizationStats* stats) override; + + void Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimize_output, double result) override; +}; + +} // namespace grappler +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_DATA_DISABLE_INTRA_OP_PARALLELISM_H_ diff --git a/tensorflow/core/grappler/optimizers/data/disable_intra_op_parallelism_test.cc b/tensorflow/core/grappler/optimizers/data/disable_intra_op_parallelism_test.cc new file mode 100644 index 00000000000..76d6b46fb4e --- /dev/null +++ b/tensorflow/core/grappler/optimizers/data/disable_intra_op_parallelism_test.cc @@ -0,0 +1,117 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/data/disable_intra_op_parallelism.h" + +#include "tensorflow/core/framework/attr_value_util.h" +#include "tensorflow/core/framework/function_testlib.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/optimizers/data/graph_test_utils.h" +#include "tensorflow/core/grappler/optimizers/data/graph_utils.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace grappler { +namespace { + +using test::function::NDef; + +// If the user manually sets intra op parallelism, we don't insert the op. +class IntraOpAlreadySetTest + : public ::testing::TestWithParam> {}; + +TEST_P(IntraOpAlreadySetTest, IntraOpParallelism) { + const string op = std::get<0>(GetParam()); + const int64 value = std::get<1>(GetParam()); + + GrapplerItem item; + MutableGraphView graph(&item.graph); + + NodeDef *start_val = graph_utils::AddScalarConstNode(0, &graph); + NodeDef *stop_val = graph_utils::AddScalarConstNode(10, &graph); + NodeDef *step_val = graph_utils::AddScalarConstNode(1, &graph); + std::vector range_inputs(3); + range_inputs[0] = start_val->name(); + range_inputs[1] = stop_val->name(); + range_inputs[2] = step_val->name(); + std::vector> range_attrs; + NodeDef *range_node = graph_utils::AddNode("", "RangeDataset", range_inputs, + range_attrs, &graph); + + NodeDef *max_parallelism_val = + graph_utils::AddScalarConstNode(value, &graph); + std::vector parallelism_inputs(2); + parallelism_inputs[0] = range_node->name(); + parallelism_inputs[1] = max_parallelism_val->name(); + std::vector> parallelism_attrs; + graph_utils::AddNode("", op, parallelism_inputs, parallelism_attrs, &graph); + + EXPECT_TRUE(graph_utils::ContainsNodeWithOp(op, item.graph)); + EXPECT_EQ(item.graph.node_size(), 6); + EXPECT_EQ(max_parallelism_val->attr().at("value").tensor().int64_val(0), + value); + + DisableIntraOpParallelism optimizer; + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); + EXPECT_EQ(output.node_size(), 6); + EXPECT_TRUE(graph_utils::ContainsNodeWithOp(op, output)); + NodeDef test_node = output.node(graph_utils::FindGraphNodeWithOp(op, output)); + NodeDef test_val = output.node( + graph_utils::FindGraphNodeWithName(test_node.input(1), output)); + EXPECT_EQ(test_val.attr().at("value").tensor().int64_val(0), value); +} + +INSTANTIATE_TEST_SUITE_P( + Test, IntraOpAlreadySetTest, + ::testing::Combine( + ::testing::Values("MaxIntraOpParallelismDataset", + "ExperimentalMaxIntraOpParallelismDataset"), + ::testing::Values(1, 5))); + +// If the user hasn't set intra op parallelism, we insert the op to disable it. +TEST(IntraOpNotSetTest, IntraOpParallelism) { + GrapplerItem item; + + item.graph = test::function::GDef( + {NDef("start", "Const", {}, {{"value", 0}, {"dtype", DT_INT32}}), + NDef("stop", "Const", {}, {{"value", 10}, {"dtype", DT_INT32}}), + NDef("step", "Const", {}, {{"value", 1}, {"dtype", DT_INT32}}), + NDef("range", "RangeDataset", {"start", "stop", "step"}, + {{"output_shapes", gtl::ArraySlice{}}, + {"output_types", gtl::ArraySlice{}}}), + NDef("Sink", "Identity", {"range"}, {})}); + EXPECT_FALSE(graph_utils::ContainsNodeWithOp("MaxIntraOpParallelismDataset", + item.graph)); + EXPECT_EQ(item.graph.node_size(), 5); + + DisableIntraOpParallelism optimizer; + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); + EXPECT_EQ(output.node_size(), 7); + EXPECT_TRUE( + graph_utils::ContainsNodeWithOp("MaxIntraOpParallelismDataset", output)); + NodeDef test_node = output.node( + graph_utils::FindGraphNodeWithOp("MaxIntraOpParallelismDataset", output)); + NodeDef test_val = output.node( + graph_utils::FindGraphNodeWithName(test_node.input(1), output)); + EXPECT_EQ(test_val.attr().at("value").tensor().int64_val(0), 1); +} + +} // namespace +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/data/inject_prefetch.cc b/tensorflow/core/grappler/optimizers/data/inject_prefetch.cc index eae8d294247..ed202c151ae 100644 --- a/tensorflow/core/grappler/optimizers/data/inject_prefetch.cc +++ b/tensorflow/core/grappler/optimizers/data/inject_prefetch.cc @@ -70,7 +70,7 @@ Status InjectPrefetch::OptimizeAndCollectStats(Cluster* cluster, graph_utils::SetUniqueGraphNodeName( strings::StrCat("inject/prefetch_", async_dataset_node->name()), graph.graph(), &prefetch_node); - prefetch_node.set_op("PrefetchDataset"); + prefetch_node.set_op(kPrefetchDataset); // `input_dataset` input *prefetch_node.mutable_input()->Add() = async_dataset_node->name(); // `buffer_size` input diff --git a/tensorflow/core/grappler/optimizers/data/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/data/meta_optimizer.cc index bd7e18b807c..8d50a0409df 100644 --- a/tensorflow/core/grappler/optimizers/data/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/data/meta_optimizer.cc @@ -35,8 +35,9 @@ using ConfigMap = std::map; // tf.data optimizations, in the order we want to perform them. -constexpr std::array kTFDataOptimizations = { +constexpr std::array kTFDataOptimizations = { "noop_elimination", + "disable_intra_op_parallelism", "shuffle_and_repeat_fusion", "map_fusion", "filter_fusion", From 3b47c2bdeadec041c62f2f56593e0054b2eb6743 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Thu, 6 Aug 2020 18:31:33 -0700 Subject: [PATCH 2316/2522] [MLIR][NFC] Adopt FuncOp/Region argument API's. - Use FuncOp::getArguments() and Region::getArguments() and friends where possible instead of going through the front() block. PiperOrigin-RevId: 325352975 Change-Id: Ib3dcfed692c0e04c554120a748f82e9efe009b89 --- .../hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_gpu.cc | 6 +++--- .../compiler/mlir/lite/transforms/while_loop_outline.cc | 5 ++--- .../compiler/mlir/tensorflow/translate/import_model.cc | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_gpu.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_gpu.cc index 0d0b8b0ab6e..cffb58b37de 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_gpu.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_gpu.cc @@ -147,9 +147,9 @@ class LhloReduceToGPULaunchConverter : public OpConversionPattern { // Now copy over the actual body of the reduction, leaving out the // terminator. BlockAndValueMapping mapping; - mapping.map(reduce_op.body().front().getArgument(0), accumulator); - mapping.map(reduce_op.body().front().getArgument(1), rhs); - mapping.map(reduce_op.body().front().getArgument(2), accumulator); + mapping.map(reduce_op.body().getArgument(0), accumulator); + mapping.map(reduce_op.body().getArgument(1), rhs); + mapping.map(reduce_op.body().getArgument(2), accumulator); for (auto& nested : reduce_op.body().front().without_terminator()) { auto clone = rewriter.clone(nested, mapping); for (auto pair : llvm::zip(nested.getResults(), clone->getResults())) { diff --git a/tensorflow/compiler/mlir/lite/transforms/while_loop_outline.cc b/tensorflow/compiler/mlir/lite/transforms/while_loop_outline.cc index 3342981b75f..56b38ec58d8 100644 --- a/tensorflow/compiler/mlir/lite/transforms/while_loop_outline.cc +++ b/tensorflow/compiler/mlir/lite/transforms/while_loop_outline.cc @@ -80,7 +80,7 @@ void WhileOutlinePass::OutlineWhile(WhileOp while_op) { // The basic block arguments correspond to values that are loop carried, while // all those post are loop independent. Initialize extern_values with while_op // not loop carried operands. - auto num_loop_carried = while_op.cond().front().getNumArguments(); + auto num_loop_carried = while_op.cond().getNumArguments(); auto not_carried_operands = while_op.getOperands().drop_front(num_loop_carried); extern_values.insert(not_carried_operands.begin(), @@ -124,8 +124,7 @@ void WhileOutlinePass::OutlineWhile(WhileOp while_op) { // Collect new types. SmallVector types; types.reserve(extra_operands.size() + while_op.getNumOperands()); - for (BlockArgument ba : while_op.cond().front().getArguments()) - types.push_back(ba.getType()); + for (Type type : while_op.cond().getArgumentTypes()) types.push_back(type); for (Value operand : extern_values) types.push_back(operand.getType()); // Create outline function from region. Optional pass extra arguments through diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc index 27385e81262..ef0087c4310 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc @@ -2873,7 +2873,7 @@ void AdjustBoundInputArgTypes(mlir::ModuleOp module) { mlir::OpBuilder builder(func.getBody()); llvm::SmallVector new_input_types; for (int i = 0, e = func.getNumArguments(); i < e; i++) { - auto arg = func.front().getArgument(i); + auto arg = func.getArgument(i); auto global_tensor = mlir::tf_saved_model::LookupBoundInputOfType< mlir::tf_saved_model::GlobalTensorOp>(func, i, symbol_table); if (global_tensor) { From 39ea1f2706b7fad1905346f107e39259e8af7b7d Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Thu, 6 Aug 2020 18:37:45 -0700 Subject: [PATCH 2317/2522] PSv2: Add distribute/client:parameter_server_client dep to tensorflow/python/distribute py_library so it gets built as part of tensorflow. PiperOrigin-RevId: 325353821 Change-Id: Ifba7e355c092dcdc138ef9f3ac5660d9c0e6e012 --- tensorflow/python/distribute/BUILD | 3 ++- tensorflow/python/distribute/__init__.py | 1 + tensorflow/python/distribute/client/BUILD | 6 +++--- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index 7965be1d90f..6bb0570d77b 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -146,6 +146,7 @@ py_library( ":mirrored_strategy", ":one_device_strategy", ":sharded_variable", + "//tensorflow/python/distribute/client:parameter_server_client", "//tensorflow/python/distribute/experimental", ], ) @@ -1775,7 +1776,7 @@ distribute_py_test( py_library( name = "parameter_server_strategy_v2", srcs = ["parameter_server_strategy_v2.py"], - srcs_version = "PY3", + srcs_version = "PY2AND3", deps = [ ":parameter_server_strategy", "//tensorflow/python:constant_op", diff --git a/tensorflow/python/distribute/__init__.py b/tensorflow/python/distribute/__init__.py index f9d0a95ea58..acb3c112226 100644 --- a/tensorflow/python/distribute/__init__.py +++ b/tensorflow/python/distribute/__init__.py @@ -25,6 +25,7 @@ from tensorflow.python.distribute import distribute_lib from tensorflow.python.distribute import distribution_strategy_context from tensorflow.python.distribute import mirrored_strategy from tensorflow.python.distribute import one_device_strategy +from tensorflow.python.distribute.client import parameter_server_client from tensorflow.python.distribute.experimental import collective_all_reduce_strategy from tensorflow.python.distribute.experimental import parameter_server_strategy # pylint: enable=unused-import diff --git a/tensorflow/python/distribute/client/BUILD b/tensorflow/python/distribute/client/BUILD index 35d8de95276..d0d8d3af4ec 100644 --- a/tensorflow/python/distribute/client/BUILD +++ b/tensorflow/python/distribute/client/BUILD @@ -10,7 +10,7 @@ exports_files(["LICENSE"]) py_library( name = "parameter_server_client", srcs = ["parameter_server_client.py"], - srcs_version = "PY3", + srcs_version = "PY2AND3", deps = [ ":client", "//tensorflow/python/distribute:parameter_server_strategy_v2", @@ -20,7 +20,7 @@ py_library( py_library( name = "client", srcs = ["client.py"], - srcs_version = "PY3", + srcs_version = "PY2AND3", deps = [ ":metric_utils", "//tensorflow/python:errors", @@ -84,7 +84,7 @@ tf_py_test( py_library( name = "metric_utils", srcs = ["metric_utils.py"], - srcs_version = "PY3", + srcs_version = "PY2AND3", deps = [ "//tensorflow/python/eager:monitoring", ], From ab66003a02ead8f8e8620cc18f2d7c07ed7d04f7 Mon Sep 17 00:00:00 2001 From: Chenkai Kuang Date: Thu, 6 Aug 2020 18:39:07 -0700 Subject: [PATCH 2318/2522] Disable collective_all_reduce_strategy_test msan test. PiperOrigin-RevId: 325354007 Change-Id: I38d72f6fd0bfb8111338a9d31c4ef5ac8a5d921c --- tensorflow/python/distribute/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index 6bb0570d77b..f67f306706f 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -1497,6 +1497,7 @@ cuda_py_test( python_version = "PY3", tags = [ "multi_and_single_gpu", + "nomsan", # b/154224457: Re-enable when fixed. ], # b/155301154 broken with XLA:GPU xla_enable_strict_auto_jit = True, From a8108923832fcf7f43b2f551912951fa50bed066 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 6 Aug 2020 18:41:46 -0700 Subject: [PATCH 2319/2522] Integrate LLVM at llvm/llvm-project@9dbdaea9a0e6 Updates LLVM usage to match [9dbdaea9a0e6](https://github.com/llvm/llvm-project/commit/9dbdaea9a0e6) PiperOrigin-RevId: 325354353 Change-Id: Icb539b494c5dfa096c8ed605907ab9341c0f670f --- .../lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm.cc | 5 ++--- .../transforms/tf_framework_legalize_to_llvm_pass.cc | 2 +- .../compiler/xla/service/mlir_gpu/mlir_compiler_impl.cc | 8 ++++---- tensorflow/workspace.bzl | 4 ++-- third_party/mlir/BUILD | 1 + 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm.cc index af64c448ad9..42b71543543 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm.cc @@ -217,8 +217,7 @@ struct ReshapeMemRefCastOpConverter SmallVector sizes; UnrankedMemRefDescriptor::computeSizes(rewriter, loc, typeConverter, {target_desc}, sizes); - auto void_ptr_type = - LLVM::LLVMType::getInt8PtrTy(typeConverter.getDialect()); + auto void_ptr_type = LLVM::LLVMType::getInt8PtrTy(rewriter.getContext()); Value ranked_desc_mem = rewriter.create( loc, void_ptr_type, sizes.front(), llvm::None); target_desc.setMemRefDescPtr(rewriter, loc, ranked_desc_mem); @@ -282,7 +281,7 @@ struct ReshapeMemRefCastOpConverter auto index_arg = cond_block->addArgument(typeConverter.getIndexType()); auto stride_arg = cond_block->addArgument(typeConverter.getIndexType()); auto pred = rewriter.create( - loc, LLVM::LLVMType::getInt1Ty(typeConverter.getDialect()), + loc, LLVM::LLVMType::getInt1Ty(rewriter.getContext()), LLVM::ICmpPredicate::sge, index_arg, zero_index); Block *body_block = diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm_pass.cc index 41b38bb574f..42e89433dff 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm_pass.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm_pass.cc @@ -40,7 +40,7 @@ class TestTFFrameworkToLLVMPass // Populate type conversions. LLVMTypeConverter type_converter(m.getContext()); type_converter.addConversion([&](tf_framework::OpKernelContextType type) { - return LLVM::LLVMType::getInt8PtrTy(type_converter.getDialect()); + return LLVM::LLVMType::getInt8PtrTy(m.getContext()); }); // Populate patterns. diff --git a/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler_impl.cc b/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler_impl.cc index 2c2076bbd97..25a35a89cb4 100644 --- a/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler_impl.cc +++ b/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler_impl.cc @@ -292,10 +292,10 @@ Status InsertBufferLoadPreduleIntoKernel( BufferAssignment* assignment, const std::vector& buffers) { mlir::OpBuilder builder(kernel.getBody()); - auto llvm_dialect = kernel.getContext()->getRegisteredDialect(); - auto offset_type = LLVMType::getInt64Ty(llvm_dialect); - auto ptr_type = LLVMType::getInt8PtrTy(llvm_dialect); - auto void_type = LLVMType::getVoidTy(llvm_dialect); + auto* context = kernel.getContext(); + auto offset_type = LLVMType::getInt64Ty(context); + auto ptr_type = LLVMType::getInt8PtrTy(context); + auto void_type = LLVMType::getVoidTy(context); auto loc = kernel.getLoc(); auto num_original_args = kernel.getNumArguments(); diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 440aa1b23ec..07b9950bca2 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -712,8 +712,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "633e3dacf27ea4950b7067803502490597ba96e0" - LLVM_SHA256 = "585299b33c32ea3a39b0cfb70e5dd431f3ab064d9f96baa4787693b3c66af21e" + LLVM_COMMIT = "9dbdaea9a0e6f58417b5bd8980e7ea6723fd1783" + LLVM_SHA256 = "1ae491e33bb35777cf5f38acd183ce3ca2aff255c15254ae97084bcbd2e4aa56" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD index 872d04194f6..eeb78e0544b 100644 --- a/third_party/mlir/BUILD +++ b/third_party/mlir/BUILD @@ -1491,6 +1491,7 @@ cc_library( ":IR", ":LLVMDialect", ":Pass", + ":StandardToLLVM", ":Support", ":TargetNVVMIR", "@llvm-project//llvm:Core", From d51d619a0e7e9bb97c1a6bc1754d09db1c37f1b1 Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Thu, 6 Aug 2020 18:55:08 -0700 Subject: [PATCH 2320/2522] Clean up is_saving_non_distributed() options here is never None. It's confusing to return False when it's None, since the default variable policy is NONE, not EXPAND_DISTRIBUTED_VARIABLE PiperOrigin-RevId: 325355974 Change-Id: Idfcb9498e3efcc20729e6b1309e47c7f80f35862 --- tensorflow/python/distribute/values_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/distribute/values_util.py b/tensorflow/python/distribute/values_util.py index 67b5234d82c..535351e6563 100644 --- a/tensorflow/python/distribute/values_util.py +++ b/tensorflow/python/distribute/values_util.py @@ -270,5 +270,5 @@ def is_saving_non_distributed(): if not save_context.in_save_context(): return False options = save_context.get_save_options() - return (options is not None and options.experimental_variable_policy != + return (options.experimental_variable_policy != save_options.VariablePolicy.EXPAND_DISTRIBUTED_VARIABLES) From 78606da47fb80ef2b16ad81b5c5f1129857040cd Mon Sep 17 00:00:00 2001 From: Robert David Date: Thu, 6 Aug 2020 19:23:10 -0700 Subject: [PATCH 2321/2522] Add include-what-you-use pragma so IWYU does not try to include these files again. PiperOrigin-RevId: 325359254 Change-Id: Ibeb53b70736036ab22ed59858f31adb0b55c65a7 --- tensorflow/lite/delegates/gpu/common/status.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/common/status.h b/tensorflow/lite/delegates/gpu/common/status.h index d6b5dd8a94a..22dcc11d57f 100644 --- a/tensorflow/lite/delegates/gpu/common/status.h +++ b/tensorflow/lite/delegates/gpu/common/status.h @@ -16,7 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_STATUS_H_ #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_STATUS_H_ -#include "absl/status/status.h" -#define RETURN_IF_ERROR(s) {auto c=(s);if(!c.ok())return c;} +#include "absl/status/status.h" // IWYU pragma: export +#define RETURN_IF_ERROR(s) {auto c=(s);if(!c.ok())return c;} // IWYU pragma: export #endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_STATUS_H_ From 74a3400d3f609b604cb44c48c3cd2bd7582eec5c Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Thu, 6 Aug 2020 19:36:38 -0700 Subject: [PATCH 2322/2522] Add a test of some known deadlocks after peer failures PiperOrigin-RevId: 325360404 Change-Id: I003fac1bb797e8bacbaaafa02be6e8cc41e35f66 --- .../python/distribute/integration_test/BUILD | 19 ++ .../mwms_peer_failure_test.py | 167 ++++++++++++++++++ 2 files changed, 186 insertions(+) create mode 100644 tensorflow/python/distribute/integration_test/mwms_peer_failure_test.py diff --git a/tensorflow/python/distribute/integration_test/BUILD b/tensorflow/python/distribute/integration_test/BUILD index 156699fad7b..307f2580996 100644 --- a/tensorflow/python/distribute/integration_test/BUILD +++ b/tensorflow/python/distribute/integration_test/BUILD @@ -1,4 +1,5 @@ load("//tensorflow/core/platform/default:distribute.bzl", "distribute_py_test") +load("//tensorflow:tensorflow.bzl", "cuda_py_test") package( default_visibility = ["//tensorflow:internal"], @@ -19,3 +20,21 @@ distribute_py_test( "@absl_py//absl/testing:parameterized", ], ) + +cuda_py_test( + name = "mwms_peer_failure_test", + size = "medium", + srcs = ["mwms_peer_failure_test.py"], + python_version = "PY3", + shard_count = 2, + tags = [ + "multi_and_single_gpu", + ], + deps = [ + "//tensorflow:tensorflow_py", + "//tensorflow/python/distribute:combinations", + "//tensorflow/python/distribute:multi_process_runner", + "//tensorflow/python/distribute:multi_worker_test_base", + "//tensorflow/python/eager:test", + ], +) diff --git a/tensorflow/python/distribute/integration_test/mwms_peer_failure_test.py b/tensorflow/python/distribute/integration_test/mwms_peer_failure_test.py new file mode 100644 index 00000000000..c247be1c280 --- /dev/null +++ b/tensorflow/python/distribute/integration_test/mwms_peer_failure_test.py @@ -0,0 +1,167 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""This file contains tests that simulate peer failures. + +When a peer fails during MultiWorkerMirroredStrategy training. All workers +should get Unavailable error. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import multi_process_runner +from tensorflow.python.distribute import multi_worker_test_base +from tensorflow.python.eager import test + + +def get_attempt(strategy, attempts): + task_type = strategy.cluster_resolver.task_type + task_id = strategy.cluster_resolver.task_id + attempts[(task_type, task_id)] = attempts.get((task_type, task_id), 0) + 1 + return task_id, attempts[(task_type, task_id)] + + +quick_exit = os._exit # pylint: disable=protected-access + + +class PeerFailureTest(test.TestCase): + # Note that all the tests use auto_restart=True. Currently we rely on the + # assumption that an external system restarts failed tasks. If the assumption + # is not true, the remaining tasks may still hang instead of fail. + # + # In these tests we leverage the auto restart feature of MultiProcessRunner. + # Failed workers are restarted automatically. In reality there needs to be + # some job management system that does the restart, e.g. Kubernetes. + # + # Worker failures may cause problems if there're more than one collective, and + # the failure happens after the first collective. In this case the recovered + # worker will be running a different collective with the rest, which causes a + # deadlock. Note that collectives are common, e.g. when creating variables the + # initial values are broadcasted from the first worker. + # + # We use a multiprocessing.Manager().dict() object to track the attempts of + # each worker. We take different actions in different attempts to simuate the + # events in real world. E.g. some tests make a worker fail on the first + # attempt only, and asserts that it should recovery. + + def test_creating_variable_broken(self): + # This test simulates the case when a worker fails before or during creating + # a variable. Creating variables involve broadcasting the initial value from + # the first replica to all replicas. + + def worker_fn(attempts): + strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() + task_id, attempt = get_attempt(strategy, attempts) + with strategy.scope(): + tf.Variable(1.) + # worker-1 dies here. + if attempt == 1 and task_id == 1: + quick_exit(1) + v = tf.Variable(tf.random.uniform(())) + return v.read_value().numpy() + + cluster_spec = multi_worker_test_base.create_cluster_spec(num_workers=2) + attempts = multi_process_runner.manager().dict() + mpr = multi_process_runner.MultiProcessRunner( + worker_fn, cluster_spec, args=(attempts,), auto_restart=True) + mpr.start() + # TODO(b/151232436): worker-0 should raises Unavailable instead of hanging. + # Now after worker-1 fails, worker-0 waits on the second variable creation; + # after worker-1 recovers, worker-1 waits on the first variable creation. + with self.assertRaises(multi_process_runner.SubprocessTimeoutError): + mpr.join(timeout=30) + + def test_reduce_small_tensor_broken(self): + # This test simulates the case when a worker fails before or during reducing + # a small tensors, e.g. reading a metric. + # + # Note that this is a rather corner case and only happens when all of the + # following conditions are met: + # - There're two workers. + # - They're reducing a small tensor. The definition of small varies + # per platform. + # - They're reducing a single tensor. Batched all-reduce are not affected. + # - It must be worker-1 that fails. + + def worker_fn(attempts): + strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() + task_id, attempt = get_attempt(strategy, attempts) + value = tf.identity([1.]) + strategy.reduce("sum", value, axis=None) + # worker-1 dies here. + if attempt == 1 and task_id == 1: + quick_exit(1) + strategy.reduce("sum", value, axis=None) + + cluster_spec = multi_worker_test_base.create_cluster_spec(num_workers=2) + attempts = multi_process_runner.manager().dict() + mpr = multi_process_runner.MultiProcessRunner( + worker_fn, cluster_spec, args=(attempts,), auto_restart=True) + mpr.start() + # TODO(b/151232436): worker-0 should raises Unavailable instead of hanging. + # Now after worker-1 fails, worker-0 waits on the second reduce; after + # worker-1 recovers, worker-1 waits on the first reduce. + with self.assertRaises(multi_process_runner.SubprocessTimeoutError): + mpr.join(timeout=30) + + def test_quick_recover(self): + # This test simulates the case when a worker fails but recovers quickly + # before the next collective. + # + # It's not guaranteed that the cluster only restarts once when one worker + # fails. The external job management system is expected to keep restarting + # failed workers. + + def worker_fn(attempts): + strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() + task_id, attempt = get_attempt(strategy, attempts) + + if attempt == 2 and task_id == 1: + multi_process_runner.barrier().wait() + + @tf.function + def replica_fn(): + ctx = tf.distribute.get_replica_context() + # Use a large tensor because small tensor may hang regardless when the + # worker recovers. + value = tf.ones((64, 64)) + ctx.all_reduce(tf.distribute.ReduceOp.SUM, [value, value]) + + strategy.run(replica_fn) + # worker-1 dies here. + if attempt == 1 and task_id == 1: + quick_exit(1) + # Make worker-0 waits for worker-1 to restart before entering the next + # collective to simulate a quick recovery of worker-1. + if attempt == 1 and task_id == 0: + multi_process_runner.barrier().wait() + strategy.run(replica_fn) + + cluster_spec = multi_worker_test_base.create_cluster_spec(num_workers=2) + attempts = multi_process_runner.manager().dict() + mpr = multi_process_runner.MultiProcessRunner( + worker_fn, cluster_spec, args=(attempts,), auto_restart=True) + mpr.start() + mpr.join(timeout=90) + + +if __name__ == "__main__": + combinations.main() From d547659011b22058e99ba97f941166954b69b29d Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Thu, 6 Aug 2020 20:34:30 -0700 Subject: [PATCH 2323/2522] Fix mlir-hlo CMakefiles after internal change PiperOrigin-RevId: 325366142 Change-Id: I17a38ef6ffd23052f1d112355caa4798bf269a27 --- .../include/mlir-hlo/Dialect/mhlo/transforms/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/CMakeLists.txt b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/CMakeLists.txt index 6fbc5306a8f..6de6851b8d7 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/CMakeLists.txt +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/CMakeLists.txt @@ -15,9 +15,9 @@ # set(LLVM_TARGET_DEFINITIONS mhlo_passes.td) -mlir_tablegen(mhlo_passes.h.inc -gen-pass-decls) +mlir_tablegen(mhlo_passes.h.inc -gen-pass-decls -name MHLO) add_public_tablegen_target(MLIRMhloPassIncGen) set(LLVM_TARGET_DEFINITIONS lmhlo_passes.td) -mlir_tablegen(lmhlo_passes.h.inc -gen-pass-decls) +mlir_tablegen(lmhlo_passes.h.inc -gen-pass-decls -name LMHLO) add_public_tablegen_target(MLIRLmhloPassIncGen) From 0a764ff2fe38274592d721690cc29c5612010967 Mon Sep 17 00:00:00 2001 From: Yuqi Li Date: Thu, 6 Aug 2020 21:00:05 -0700 Subject: [PATCH 2324/2522] Add homepage for TFLite Model Maker under "guide" PiperOrigin-RevId: 325368667 Change-Id: I3f4d1162684564373a9abbda59c520928448a0cf --- tensorflow/lite/g3doc/_book.yaml | 5 ++ tensorflow/lite/g3doc/guide/get_started.md | 4 ++ tensorflow/lite/g3doc/guide/model_maker.md | 60 ++++++++++++++++++++++ 3 files changed, 69 insertions(+) create mode 100644 tensorflow/lite/g3doc/guide/model_maker.md diff --git a/tensorflow/lite/g3doc/_book.yaml b/tensorflow/lite/g3doc/_book.yaml index 45be4737fd5..96ec7363ab1 100644 --- a/tensorflow/lite/g3doc/_book.yaml +++ b/tensorflow/lite/g3doc/_book.yaml @@ -93,6 +93,11 @@ upper_tabs: - title: "1.x compatibility" path: /lite/convert/1x_compatibility + - heading: "Create a model" + - title: "TensorFlow Lite Model Maker" + status: experimental + path: /lite/guide/model_maker + - heading: "Inference" - title: "Overview" path: /lite/guide/inference diff --git a/tensorflow/lite/g3doc/guide/get_started.md b/tensorflow/lite/g3doc/guide/get_started.md index c9543c7f553..df206e73416 100644 --- a/tensorflow/lite/g3doc/guide/get_started.md +++ b/tensorflow/lite/g3doc/guide/get_started.md @@ -67,6 +67,10 @@ If you have designed and trained your own TensorFlow model, or you have trained a model obtained from another source, you must [convert it to the TensorFlow Lite format](#2_convert_the_model_format). +You can also try [The TensorFlow Lite Model Maker library](model_maker.md) which +simplifies the process of training a TensorFlow Lite model using custom +datasets. + ## 2. Convert the model diff --git a/tensorflow/lite/g3doc/guide/model_maker.md b/tensorflow/lite/g3doc/guide/model_maker.md new file mode 100644 index 00000000000..824fb1a3fd6 --- /dev/null +++ b/tensorflow/lite/g3doc/guide/model_maker.md @@ -0,0 +1,60 @@ +# TensorFlow Lite Model Maker + +## Overview + +The TensorFlow Lite Model Maker library simplifies the process of training a +TensorFlow Lite model using custom dataset. It uses transfer learning to reduce +the amount of training data required and shorten the training time. + +## Supported Tasks + +The Model Maker library currently supports the following ML tasks. Click the +links below for guides on how to train the model. + +Supported Tasks | Task Utility +-------------------------------------------------------------------------------------------------------- | ------------ +Image Classification [guide](https://www.tensorflow.org/lite/tutorials/model_maker_image_classification) | Classify images into predefined categories. +Text Classification [guide](https://www.tensorflow.org/lite/tutorials/model_maker_text_classification) | Classify text into predefined categories. +Question Answer [guide](https://www.tensorflow.org/lite/tutorials/model_maker_question_answer) | Find the answer in a certain context for a given question. + +## End-to-End Example + +Model Maker allows you to train a TensorFlow Lite model using custom datasets in +just a few lines of code. For example, here are the steps to train an image +classification model. + +```python +# Load input data specific to an on-device ML app. +data = ImageClassifierDataLoader.from_folder('flower_photos/') +train_data, test_data = data.split(0.9) + +# Customize the TensorFlow model. +model = image_classifier.create(data) + +# Evaluate the model. +loss, accuracy = model.evaluate(test_data) + +# Export to Tensorflow Lite model and label file in `export_dir`. +model.export(export_dir='/tmp/') +``` + +For more details, see the +[image classification guide](https://www.tensorflow.org/lite/tutorials/model_maker_image_classification). + +## Installation + +There are two ways to install Model Maker. + +* Install a prebuilt pip package + +```shell +pip install tflite-model-maker +``` + +* Clone the source code from GitHub and install. + +```shell +git clone https://github.com/tensorflow/examples +cd examples +pip install .[model_maker] +``` From 982d7edbfa023bb271a5e7592c747b63919705bf Mon Sep 17 00:00:00 2001 From: Haoliang Zhang Date: Thu, 6 Aug 2020 23:12:36 -0700 Subject: [PATCH 2325/2522] Internal cleanup on minor namespace issues. PiperOrigin-RevId: 325383289 Change-Id: I76247774f4a1bf06de4547f0b70a35c77c3af68f --- tensorflow/compiler/mlir/tensorflow/utils/error_util.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/mlir/tensorflow/utils/error_util.h b/tensorflow/compiler/mlir/tensorflow/utils/error_util.h index 4feb3837357..b5f2acc581d 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/error_util.h +++ b/tensorflow/compiler/mlir/tensorflow/utils/error_util.h @@ -27,7 +27,7 @@ limitations under the License. namespace mlir { // TensorFlow's Status is used for error reporting back to callers. -using tensorflow::Status; +using ::tensorflow::Status; // Diagnostic handler that collects all the diagnostics reported and can produce // a Status to return to callers. This is for the case where MLIR functions are From d50776efab854ae3af2bfbfaef0f54c836a09fe9 Mon Sep 17 00:00:00 2001 From: Yuqi Li Date: Thu, 6 Aug 2020 23:27:23 -0700 Subject: [PATCH 2326/2522] Remove install from github directly for tflite model maker. PiperOrigin-RevId: 325384550 Change-Id: Iab4572a8ec9a105f8b80a5b2a9aae64f4558379a --- tensorflow/lite/g3doc/guide/model_maker.md | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/tensorflow/lite/g3doc/guide/model_maker.md b/tensorflow/lite/g3doc/guide/model_maker.md index 824fb1a3fd6..76b32eac75e 100644 --- a/tensorflow/lite/g3doc/guide/model_maker.md +++ b/tensorflow/lite/g3doc/guide/model_maker.md @@ -43,18 +43,8 @@ For more details, see the ## Installation -There are two ways to install Model Maker. - -* Install a prebuilt pip package +Install a prebuilt pip package. ```shell pip install tflite-model-maker ``` - -* Clone the source code from GitHub and install. - -```shell -git clone https://github.com/tensorflow/examples -cd examples -pip install .[model_maker] -``` From bf3b427a6763f7109b0a08124108e3a4836ef549 Mon Sep 17 00:00:00 2001 From: Chao Mei Date: Thu, 6 Aug 2020 23:44:12 -0700 Subject: [PATCH 2327/2522] Add the flat_hash_map explicitly to the dep. PiperOrigin-RevId: 325385963 Change-Id: I953e4b0c6f26d60b1df7e54c9f7246b234300c89 --- tensorflow/lite/delegates/gpu/gl/workgroups/BUILD | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/gl/workgroups/BUILD b/tensorflow/lite/delegates/gpu/gl/workgroups/BUILD index 52fdb7435f9..1048912d754 100644 --- a/tensorflow/lite/delegates/gpu/gl/workgroups/BUILD +++ b/tensorflow/lite/delegates/gpu/gl/workgroups/BUILD @@ -32,15 +32,16 @@ cc_library( deps = select({ "//tensorflow/lite/delegates/gpu:tflite_gpu_binary_release": [], "//conditions:default": [ - ":default_calculator", - "//tensorflow/lite/delegates/gpu/gl:common_cc_fbs", - "//tensorflow/lite/delegates/gpu/gl:workgroups_cc_fbs", - "//tensorflow/lite/delegates/gpu/common:gpu_info", - "//tensorflow/lite/delegates/gpu/gl:metadata_cc_fbs", ":calculator", + ":default_calculator", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", "@flatbuffers", + "//tensorflow/lite/delegates/gpu/common:gpu_info", "//tensorflow/lite/delegates/gpu/common:types", + "//tensorflow/lite/delegates/gpu/gl:common_cc_fbs", + "//tensorflow/lite/delegates/gpu/gl:metadata_cc_fbs", + "//tensorflow/lite/delegates/gpu/gl:workgroups_cc_fbs", ], }), ) From 5e0ed38eb746f3a86463f19bcf7138a959ddb2d4 Mon Sep 17 00:00:00 2001 From: Smit Hinsu Date: Fri, 7 Aug 2020 00:37:37 -0700 Subject: [PATCH 2328/2522] Use string_view type for FormatFromString and FilterFormatFromString arguments PiperOrigin-RevId: 325391279 Change-Id: If834446e5eac71840b92e6efcd6eac5170579769 --- tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc | 9 ++++++--- tensorflow/core/util/BUILD | 1 + tensorflow/core/util/tensor_format.cc | 4 ++-- tensorflow/core/util/tensor_format.h | 5 +++-- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc index 39dae59ecb4..1a730a38618 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc @@ -339,7 +339,8 @@ void BatchToSpaceOp::getCanonicalizationPatterns( // are not unknown. // static LogicalResult Verify(BiasAddOp op) { - std::string data_format = op.data_format().str(); + absl::string_view data_format(op.data_format().data(), + op.data_format().size()); tensorflow::TensorFormat format; bool is_valid = FormatFromString(data_format, &format); DCHECK(is_valid) << data_format; @@ -385,7 +386,8 @@ static LogicalResult Verify(BiasAddOp op) { // * the out_backprop operands have valid ranks or are unranked. // static LogicalResult Verify(BiasAddGradOp op) { - std::string data_format = op.data_format().str(); + absl::string_view data_format(op.data_format().data(), + op.data_format().size()); tensorflow::TensorFormat format; bool is_valid = FormatFromString(data_format, &format); DCHECK(is_valid) << data_format; @@ -995,7 +997,8 @@ static LogicalResult Verify(OpT op) { int64_t input_channels = -1; if (auto ty = op.input().getType().template dyn_cast()) { - std::string data_format = op.data_format().str(); + absl::string_view data_format(op.data_format().data(), + op.data_format().size()); tensorflow::TensorFormat format; auto is_valid = FormatFromString(data_format, &format); DCHECK(is_valid) << data_format; diff --git a/tensorflow/core/util/BUILD b/tensorflow/core/util/BUILD index dcb2787e309..634a937d1c4 100644 --- a/tensorflow/core/util/BUILD +++ b/tensorflow/core/util/BUILD @@ -519,6 +519,7 @@ cc_library( "//tensorflow/core/lib/gtl:array_slice", "//tensorflow/core/lib/gtl:inlined_vector", "//tensorflow/core/platform:types", + "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/core/util/tensor_format.cc b/tensorflow/core/util/tensor_format.cc index 5dbd8ef318f..008c4d45200 100644 --- a/tensorflow/core/util/tensor_format.cc +++ b/tensorflow/core/util/tensor_format.cc @@ -73,7 +73,7 @@ string ToString(FilterTensorFormat format) { } } -bool FormatFromString(const string& format_str, TensorFormat* format) { +bool FormatFromString(absl::string_view format_str, TensorFormat* format) { if (format_str == "NHWC" || format_str == "NDHWC") { *format = FORMAT_NHWC; return true; @@ -101,7 +101,7 @@ bool FormatFromString(const string& format_str, TensorFormat* format) { return false; } -bool FilterFormatFromString(const string& format_str, +bool FilterFormatFromString(absl::string_view format_str, FilterTensorFormat* format) { if (format_str == "HWIO" || format_str == "DHWIO") { *format = FORMAT_HWIO; diff --git a/tensorflow/core/util/tensor_format.h b/tensorflow/core/util/tensor_format.h index d2d7b9e58de..07762f84300 100644 --- a/tensorflow/core/util/tensor_format.h +++ b/tensorflow/core/util/tensor_format.h @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "absl/strings/string_view.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/lib/gtl/inlined_vector.h" @@ -97,11 +98,11 @@ enum FilterTensorFormat { // Parse tensor format from the given string. // Return true if the parsing succeeds, and false if it fails. -bool FormatFromString(const std::string& format_str, TensorFormat* format); +bool FormatFromString(absl::string_view format_str, TensorFormat* format); // Parse tensor format from the given string. // Return true if the parsing succeeds, and false if it fails. -bool FilterFormatFromString(const std::string& format_str, +bool FilterFormatFromString(absl::string_view format_str, FilterTensorFormat* format); // Convert a tensor format into string. From 155e561f2dae8ffef6570cf36d86e2d18a7eb0ce Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 7 Aug 2020 02:01:30 -0700 Subject: [PATCH 2329/2522] Update GraphDef version to 486. PiperOrigin-RevId: 325399599 Change-Id: I40193d624f2af8aa0e8dd47ce17c9bf49545e948 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 431784a5a1a..ee9be29958d 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 485 // Updated: 2020/8/6 +#define TF_GRAPH_DEF_VERSION 486 // Updated: 2020/8/7 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From a8230634dddbbc1a097a8c24295d46680b897e4c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 7 Aug 2020 02:01:33 -0700 Subject: [PATCH 2330/2522] compat: Update forward compatibility horizon to 2020-08-07 PiperOrigin-RevId: 325399607 Change-Id: I61d99514eb9400fd92e02762d19fd8011a856a0d --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 65bb633855a..bef47619972 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 6) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 7) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From f068418748981d4e3a4a01c1a98b888eb4d76a67 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Fri, 7 Aug 2020 20:35:05 +0700 Subject: [PATCH 2331/2522] Gcs Filesystem refactor part 1 --- .../filesystem/plugins/gcs/gcs_filesystem.cc | 192 ++++++++---------- 1 file changed, 87 insertions(+), 105 deletions(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc index 039bc4fd236..d170e51e3b1 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc @@ -776,28 +776,65 @@ static bool FolderExists(GCSFile* gcs_file, std::string dir, return true; } -void CreateDir(const TF_Filesystem* filesystem, const char* path, - TF_Status* status) { +static void ClearFileCaches(GCSFile* gcs_file, const std::string& path) { + absl::ReaderMutexLock l(&gcs_file->block_cache_lock); + gcs_file->file_block_cache->RemoveFile(path); + gcs_file->stat_cache->Delete(path); +} + +void PathExists(const TF_Filesystem* filesystem, const char* path, + TF_Status* status) { std::string bucket, object; ParseGCSPath(path, true, &bucket, &object, status); if (TF_GetCode(status) != TF_OK) return; + auto gcs_file = static_cast(filesystem->plugin_filesystem); if (object.empty()) { - auto bucket_metadata = gcs_file->gcs_client.GetBucketMetadata(bucket); - TF_SetStatusFromGCSStatus(bucket_metadata.status(), status); + bool result = BucketExists(gcs_file, bucket, status); + if (result) return TF_SetStatus(status, TF_OK, ""); + } + + GcsFileStat stat; + StatForObject(gcs_file, path, bucket, object, &stat, status); + if (TF_GetCode(status) != TF_NOT_FOUND) return; + + bool result = FolderExists(gcs_file, path, status); + if (TF_GetCode(status) != TF_OK || (TF_GetCode(status) == TF_OK && result)) + return; + return TF_SetStatus( + status, TF_NOT_FOUND, + absl::StrCat("The path ", path, " does not exist.").c_str()); +} + +void CreateDir(const TF_Filesystem* filesystem, const char* path, + TF_Status* status) { + std::string dir = path; + MaybeAppendSlash(&dir); + std::string bucket, object; + ParseGCSPath(dir, true, &bucket, &object, status); + if (TF_GetCode(status) != TF_OK) return; + auto gcs_file = static_cast(filesystem->plugin_filesystem); + if (object.empty()) { + bool is_directory = BucketExists(gcs_file, bucket, status); + if (TF_GetCode(status) != TF_OK) return; + if (!is_directory) + TF_SetStatus(status, TF_NOT_FOUND, + ("The specified bucket " + dir + " was not found.").c_str()); return; } - MaybeAppendSlash(&object); - auto object_metadata = gcs_file->gcs_client.GetObjectMetadata(bucket, object); - TF_SetStatusFromGCSStatus(object_metadata.status(), status); - if (TF_GetCode(status) == TF_NOT_FOUND) { - auto insert_metadata = - gcs_file->gcs_client.InsertObject(bucket, object, ""); - TF_SetStatusFromGCSStatus(insert_metadata.status(), status); - } else if (TF_GetCode(status) == TF_OK) { + PathExists(filesystem, dir.c_str(), status); + if (TF_GetCode(status) == TF_OK) + return TF_SetStatus(status, TF_ALREADY_EXISTS, path); + + auto metadata = gcs_file->gcs_client.InsertObject( + bucket, object, "", + // Adding this parameter means HTTP_CODE_PRECONDITION_FAILED + // will be returned if the object already exists, so avoid reuploading. + gcs::IfGenerationMatch(0)); + TF_SetStatusFromGCSStatus(metadata.status(), status); + if (TF_GetCode(status) == TF_FAILED_PRECONDITION) TF_SetStatus(status, TF_ALREADY_EXISTS, path); - } } // TODO(vnvo2409): `RecursivelyCreateDir` should use `CreateDir` instead of the @@ -813,53 +850,31 @@ void DeleteFile(const TF_Filesystem* filesystem, const char* path, auto gcs_file = static_cast(filesystem->plugin_filesystem); auto gcs_status = gcs_file->gcs_client.DeleteObject(bucket, object); TF_SetStatusFromGCSStatus(gcs_status, status); + if (TF_GetCode(status) == TF_OK) ClearFileCaches(gcs_file, path); } +// Checks that the directory is empty (i.e no objects with this prefix exist). +// Deletes the GCS directory marker if it exists. void DeleteDir(const TF_Filesystem* filesystem, const char* path, TF_Status* status) { - std::string bucket, object; - ParseGCSPath(path, false, &bucket, &object, status); - if (TF_GetCode(status) != TF_OK) return; - MaybeAppendSlash(&object); + // A directory is considered empty either if there are no matching objects + // with the corresponding name prefix or if there is exactly one matching + // object and it is the directory marker. Therefore we need to retrieve + // at most two children for the prefix to detect if a directory is empty. auto gcs_file = static_cast(filesystem->plugin_filesystem); - int object_count = 0; - for (auto&& metadata : - gcs_file->gcs_client.ListObjects(bucket, gcs::Prefix(object))) { - if (!metadata) { - TF_SetStatusFromGCSStatus(metadata.status(), status); - return; - } - ++object_count; - // We consider a path is a non-empty directory in two cases: - // - There are more than two objects whose keys start with the name of this - // directory. - // - There is one object whose key contains the name of this directory ( but - // not equal ). - if (object_count > 1 || metadata->name() != object) { - TF_SetStatus(status, TF_FAILED_PRECONDITION, - "Cannot delete a non-empty directory."); - return; - } + auto childrens = GetChildrenBounded(gcs_file, path, 2, true, true, status); + if (TF_GetCode(status) != TF_OK) return; + if (childrens.size() > 1 || (childrens.size() == 1 && !childrens[0].empty())) + return TF_SetStatus(status, TF_FAILED_PRECONDITION, + "Cannot delete a non-empty directory."); + if (childrens.size() == 1 && childrens[0].empty()) { + // This is the directory marker object. Delete it. + std::string dir = path; + MaybeAppendSlash(&dir); + DeleteFile(filesystem, dir.c_str(), status); + return; } - auto gcs_status = gcs_file->gcs_client.DeleteObject(bucket, object); - TF_SetStatusFromGCSStatus(gcs_status, status); -} - -// TODO(vnvo2409): `DeleteRecursively` needs `GetChildrens` but there will be -// some differents compared to the default implementation. Will be refactored. -static void DeleteRecursively(const TF_Filesystem* filesystem, const char* path, - uint64_t* undeleted_files, - uint64_t* undeleted_dirs, TF_Status* status) { - std::string bucket, object; - ParseGCSPath(path, false, &bucket, &object, status); - if (TF_GetCode(status) != TF_OK) return; - - auto gcs_file = static_cast(filesystem->plugin_filesystem); - auto gcs_status = gcs::DeleteByPrefix(gcs_file->gcs_client, bucket, object); - TF_SetStatusFromGCSStatus(gcs_status, status); - if (TF_GetCode(status) != TF_OK) return; - *undeleted_dirs = 0; - *undeleted_files = 0; + TF_SetStatus(status, TF_OK, ""); } // TODO(vnvo2409): `RewriteObjectBlocking` will set `status` to `TF_NOT_FOUND` @@ -904,31 +919,6 @@ void CopyFile(const TF_Filesystem* filesystem, const char* src, const char* dst, TF_SetStatusFromGCSStatus(metadata.status(), status); } -// TODO(vnvo2409): This approach can cause a problem when our path is -// `path/to/dir` and there is an object with key `path/to/directory`. Will be -// fixed when refactoring. -void PathExists(const TF_Filesystem* filesystem, const char* path, - TF_Status* status) { - std::string bucket, object; - ParseGCSPath(path, true, &bucket, &object, status); - if (TF_GetCode(status) != TF_OK) return; - - auto gcs_file = static_cast(filesystem->plugin_filesystem); - for (auto&& metadata : - gcs_file->gcs_client.ListObjects(bucket, gcs::Prefix(object))) { - if (!metadata) { - TF_SetStatusFromGCSStatus(metadata.status(), status); - return; - } - // We consider a path exists if there is at least one object whose key - // contains the path. - return TF_SetStatus(status, TF_OK, ""); - } - return TF_SetStatus( - status, TF_NOT_FOUND, - absl::StrCat("The path ", path, " does not exist.").c_str()); -} - bool IsDirectory(const TF_Filesystem* filesystem, const char* path, TF_Status* status) { std::string bucket, object; @@ -937,35 +927,27 @@ bool IsDirectory(const TF_Filesystem* filesystem, const char* path, auto gcs_file = static_cast(filesystem->plugin_filesystem); if (object.empty()) { - auto bucket_metadata = gcs_file->gcs_client.GetBucketMetadata(bucket); - TF_SetStatusFromGCSStatus(bucket_metadata.status(), status); - if (TF_GetCode(status) == TF_OK) - return true; - else - return false; + bool result = BucketExists(gcs_file, bucket, status); + if (TF_GetCode(status) != TF_OK) return false; + if (!result) + TF_SetStatus( + status, TF_NOT_FOUND, + ("The specified bucket gs://" + bucket + " was not found.").c_str()); + return result; } - // We check if there is an object with this key on the GCS server. - auto metadata = gcs_file->gcs_client.GetObjectMetadata(bucket, object); - if (metadata) { - TF_SetStatus(status, TF_OK, ""); - if (metadata->name().back() == '/') - return true; - else - return false; - } + bool is_folder = FolderExists(gcs_file, path, status); + if (TF_GetCode(status) != TF_OK) return false; + if (is_folder) return true; - // If there is no object with this key on the GCS server. We check if there is - // any object whose key contains that path. - MaybeAppendSlash(&object); - for (auto&& metadata : - gcs_file->gcs_client.ListObjects(bucket, gcs::Prefix(object))) { - if (!metadata) { - TF_SetStatusFromGCSStatus(metadata.status(), status); - return false; - } - TF_SetStatus(status, TF_OK, ""); - return true; + bool is_object = ObjectExists(gcs_file, path, bucket, object, status); + if (TF_GetCode(status) != TF_OK) return false; + if (is_object) { + TF_SetStatus( + status, TF_FAILED_PRECONDITION, + absl::StrCat("The specified path ", path, " is not a directory.") + .c_str()); + return false; } TF_SetStatus(status, TF_NOT_FOUND, absl::StrCat("The path ", path, " does not exist.").c_str()); From 0996281285b5d905062f56a80f996cacad9dfc79 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 7 Aug 2020 07:53:36 -0700 Subject: [PATCH 2332/2522] Support 'exclusive' and 'reverse', in tf.CumSum legalization. PiperOrigin-RevId: 325434770 Change-Id: I48914d39f347ed68d7c730d220ea54c4c3f857fe --- .../compiler/mlir/xla/tests/legalize-tf.mlir | 52 +++++++++++++++++- .../mlir/xla/transforms/legalize_tf.cc | 55 ++++++++++++++----- 2 files changed, 91 insertions(+), 16 deletions(-) diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir index 3b4efc388eb..bad9c1ef279 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir @@ -4581,21 +4581,65 @@ func @cumsum_static(%arg0: tensor<4xf32>) -> tensor<4xf32> { } // CHECK-LABEL: func @cumsum_exclusive +// CHECK-SAME: [[X:%.*]]: tensor<4xf32> func @cumsum_exclusive(%arg0: tensor<4xf32>) -> tensor<4xf32> { - // CHECK: "tf.Cumsum" + // CHECK: [[AXIS:%.*]] = mhlo.constant dense<0> : tensor + // CHECK: [[CONVERT_X:%.*]] = "mhlo.convert"([[X]]) : (tensor<4xf32>) -> tensor<4xf32> + // CHECK: [[INIT:%.*]] = mhlo.constant dense<0.000000e+00> : tensor + // CHECK: [[REDUCE:%.*]] = "mhlo.reduce_window"([[CONVERT_X]], [[INIT]]) ( { + // CHECK: ^bb0([[A:%.*]]: tensor, [[B:%.*]]: tensor): + // CHECK: [[SUM:%.*]] = mhlo.add [[A]], [[B]] : tensor + // CHECK: "mhlo.return"([[SUM]]) : (tensor) -> () + // CHECK: }) {padding = dense<{{\[\[}}3, 0]]> : tensor<1x2xi64>, window_dimensions = dense<4> : tensor<1xi64>, window_strides = dense<1> : tensor<1xi64>} : (tensor<4xf32>, tensor) -> tensor<4xf32> + // CHECK: [[PAD:%.*]] = "mhlo.pad"([[REDUCE]], %{{.*}}) {edge_padding_high = dense<-1> : tensor<1xi64>, edge_padding_low = dense<1> : tensor<1xi64>, interior_padding = dense<0> : tensor<1xi64>} : (tensor<4xf32>, tensor) -> tensor<4xf32> + // CHECK: [[CONVERT_REDUCE:%.*]] = "mhlo.convert"([[PAD]]) : (tensor<4xf32>) -> tensor<4xf32> + // CHECK: return [[CONVERT_REDUCE]] %0 = "tf.Const"() {_output_shapes = ["tfshape$"], device = "", dtype = i32, value = dense<0> : tensor} : () -> tensor %1 = "tf.Cumsum"(%arg0, %0) {exclusive = true, reverse = false} : (tensor<4xf32>, tensor) -> tensor<4xf32> return %1 : tensor<4xf32> } // CHECK-LABEL: func @cumsum_reverse +// CHECK-SAME: [[X:%.*]]: tensor<4xf32> func @cumsum_reverse(%arg0: tensor<4xf32>) -> tensor<4xf32> { - // CHECK: "tf.Cumsum" + // CHECK: [[AXIS:%.*]] = mhlo.constant dense<0> : tensor + // CHECK: [[REVERSE1:%.*]] = "mhlo.reverse"([[X]]) {dimensions = dense<0> : tensor<1xi64>} : (tensor<4xf32>) -> tensor<4xf32> + // CHECK: [[CONVERT_X:%.*]] = "mhlo.convert"([[REVERSE1]]) : (tensor<4xf32>) -> tensor<4xf32> + // CHECK: [[INIT:%.*]] = mhlo.constant dense<0.000000e+00> : tensor + // CHECK: [[REDUCE:%.*]] = "mhlo.reduce_window"([[CONVERT_X]], [[INIT]]) ( { + // CHECK: ^bb0([[A:%.*]]: tensor, [[B:%.*]]: tensor): + // CHECK: [[SUM:%.*]] = mhlo.add [[A]], [[B]] : tensor + // CHECK: "mhlo.return"([[SUM]]) : (tensor) -> () + // CHECK: }) {padding = dense<{{\[\[}}3, 0]]> : tensor<1x2xi64>, window_dimensions = dense<4> : tensor<1xi64>, window_strides = dense<1> : tensor<1xi64>} : (tensor<4xf32>, tensor) -> tensor<4xf32> + // CHECK: [[CONVERT_REDUCE:%.*]] = "mhlo.convert"([[REDUCE]]) : (tensor<4xf32>) -> tensor<4xf32> + // CHECK: [[REVERSE_BACK:%.*]] = "mhlo.reverse"([[CONVERT_REDUCE]]) {dimensions = dense<0> : tensor<1xi64>} : (tensor<4xf32>) -> tensor<4xf32> + // CHECK: return [[REVERSE_BACK]] %0 = "tf.Const"() {_output_shapes = ["tfshape$"], device = "", dtype = i32, value = dense<0> : tensor} : () -> tensor %1 = "tf.Cumsum"(%arg0, %0) {exclusive = false, reverse = true} : (tensor<4xf32>, tensor) -> tensor<4xf32> return %1 : tensor<4xf32> } +// CHECK-LABEL: func @cumsum_exclusive_reverse +// CHECK-SAME: [[X:%.*]]: tensor<4xf32> +func @cumsum_exclusive_reverse(%arg0: tensor<4xf32>) -> tensor<4xf32> { + // CHECK: [[AXIS:%.*]] = mhlo.constant dense<0> : tensor + // CHECK: [[REVERSE1:%.*]] = "mhlo.reverse"([[X]]) {dimensions = dense<0> : tensor<1xi64>} : (tensor<4xf32>) -> tensor<4xf32> + // CHECK: [[CONVERT_X:%.*]] = "mhlo.convert"([[REVERSE1]]) : (tensor<4xf32>) -> tensor<4xf32> + // CHECK: [[INIT:%.*]] = mhlo.constant dense<0.000000e+00> : tensor + // CHECK: [[REDUCE:%.*]] = "mhlo.reduce_window"([[CONVERT_X]], [[INIT]]) ( { + // CHECK: ^bb0([[A:%.*]]: tensor, [[B:%.*]]: tensor): + // CHECK: [[SUM:%.*]] = mhlo.add [[A]], [[B]] : tensor + // CHECK: "mhlo.return"([[SUM]]) : (tensor) -> () + // CHECK: }) {padding = dense<{{\[\[}}3, 0]]> : tensor<1x2xi64>, window_dimensions = dense<4> : tensor<1xi64>, window_strides = dense<1> : tensor<1xi64>} : (tensor<4xf32>, tensor) -> tensor<4xf32> + // CHECK: [[PAD:%.*]] = "mhlo.pad"([[REDUCE]], %{{.*}}) {edge_padding_high = dense<-1> : tensor<1xi64>, edge_padding_low = dense<1> : tensor<1xi64>, interior_padding = dense<0> : tensor<1xi64>} : (tensor<4xf32>, tensor) -> tensor<4xf32> + // CHECK: [[CONVERT_REDUCE:%.*]] = "mhlo.convert"([[PAD]]) : (tensor<4xf32>) -> tensor<4xf32> + // CHECK: [[REVERSE_BACK:%.*]] = "mhlo.reverse"([[CONVERT_REDUCE]]) {dimensions = dense<0> : tensor<1xi64>} : (tensor<4xf32>) -> tensor<4xf32> + // CHECK: return [[REVERSE_BACK]] + %0 = "tf.Const"() {_output_shapes = ["tfshape$"], device = "", dtype = i32, value = dense<0> : tensor} : () -> tensor + %1 = "tf.Cumsum"(%arg0, %0) {exclusive = true, reverse = true} : (tensor<4xf32>, tensor) -> tensor<4xf32> + return %1 : tensor<4xf32> +} + // CHECK-LABEL: func @cumsum_dynamic func @cumsum_dynamic(%arg0: tensor, %arg1: tensor) -> tensor { // CHECK: "tf.Cumsum" @@ -4603,6 +4647,10 @@ func @cumsum_dynamic(%arg0: tensor, %arg1: tensor) -> tensor return %0 : tensor } +//===----------------------------------------------------------------------===// +// Qr op legalization +//===----------------------------------------------------------------------===// + // CHECK: func @qr([[VAL_0:%.*]]: tensor<500x100x75xf32>) -> (tensor<500x100x75xf32>, tensor<500x75x75xf32>) func @qr(%arg0: tensor<500x100x75xf32>) -> (tensor<500x100x75xf32>, tensor<500x75x75xf32>) { // The tf.Qr lowering is a full algorithm that is not effective to verify with diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc index 0b420fff785..f2b3822188c 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc @@ -5086,11 +5086,8 @@ class ConvertCumsumOp : public OpRewritePattern { return failure(); } - // TODO(jennik): Add support for the optional 'exclusive' and 'reverse' - // arguments. - if (op.exclusive() || op.reverse()) { - return failure(); - } + ArrayRef input_shape = input_type.getShape(); + int64_t rank = input_shape.size(); // We can only match when the axis is a constant scalar. DenseIntElementsAttr axis_attr; @@ -5098,15 +5095,6 @@ class ConvertCumsumOp : public OpRewritePattern { return failure(); } - // Convert if we need to enlarge the element type's bitwidth to avoid - // precision loss. - Type input_element_type = input_type.getElementType(); - Type sum_element_type = GetSumAccumulationType(input_element_type); - input = rewriter.create(op.getLoc(), input, sum_element_type); - - ArrayRef input_shape = input_type.getShape(); - int64_t rank = input_shape.size(); - // Get the dimension to apply the reduction on, and offset properly if it is // negative. int64_t axis = (*axis_attr.begin()).getSExtValue(); @@ -5114,6 +5102,21 @@ class ConvertCumsumOp : public OpRewritePattern { axis += rank; } + // If we're supposed to sum things up in the reverse direction, we reverse + // the input and then later reverse the output. + if (op.reverse()) { + llvm::SmallVector dims_to_reverse({axis}); + input = rewriter.create( + op.getLoc(), op.getType(), input, + GetI64ElementsAttr(dims_to_reverse, &rewriter)); + } + + // Convert if we need to enlarge the element type's bitwidth to avoid + // precision loss. + Type input_element_type = input_type.getElementType(); + Type sum_element_type = GetSumAccumulationType(input_element_type); + input = rewriter.create(op.getLoc(), input, sum_element_type); + SmallVector window_dims(rank, 1); SmallVector window_strides(rank, 1); window_dims[axis] = input_shape[axis]; @@ -5136,10 +5139,34 @@ class ConvertCumsumOp : public OpRewritePattern { BuildReduceBody(sum_element_type, &reduce.body(), &rewriter); Value result = reduce.getResult(); + if (op.exclusive()) { + // In "exclusive" operation, the output will start with the "init" (0) + // values. There is no way to express that as a ReduceWindowOp, so run the + // normal operation, and then use a PadOp to add the 0 "column" on the + // left and cut away the last column on the right. + llvm::SmallVector low_padding(rank, 0); + llvm::SmallVector high_padding(rank, 0); + llvm::SmallVector interior_padding(rank, 0); + low_padding[axis] = 1; + high_padding[axis] = -1; + result = rewriter.create( + op.getLoc(), op.getType(), result, init, + GetI64ElementsAttr(low_padding, &rewriter), + GetI64ElementsAttr(high_padding, &rewriter), + GetI64ElementsAttr(interior_padding, &rewriter)); + } + // Convert back if we enlarged the element type's bitwidth. result = rewriter.create(op.getLoc(), result, input_element_type); + if (op.reverse()) { + llvm::SmallVector dims_to_reverse({axis}); + result = rewriter.create( + op.getLoc(), op.getType(), result, + GetI64ElementsAttr(dims_to_reverse, &rewriter)); + } + rewriter.replaceOp(op, result); return success(); } From fb837585264e6abe4b0488e3a9dd5c5507e69bf6 Mon Sep 17 00:00:00 2001 From: Rick Chao Date: Fri, 7 Aug 2020 08:40:16 -0700 Subject: [PATCH 2333/2522] PSv2: Fix asan test for various targets. PiperOrigin-RevId: 325441069 Change-Id: I1fa1b2b10670f34739323292eab623d5b538142e --- tensorflow/python/distribute/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/distribute/__init__.py b/tensorflow/python/distribute/__init__.py index acb3c112226..f9d0a95ea58 100644 --- a/tensorflow/python/distribute/__init__.py +++ b/tensorflow/python/distribute/__init__.py @@ -25,7 +25,6 @@ from tensorflow.python.distribute import distribute_lib from tensorflow.python.distribute import distribution_strategy_context from tensorflow.python.distribute import mirrored_strategy from tensorflow.python.distribute import one_device_strategy -from tensorflow.python.distribute.client import parameter_server_client from tensorflow.python.distribute.experimental import collective_all_reduce_strategy from tensorflow.python.distribute.experimental import parameter_server_strategy # pylint: enable=unused-import From f63877d6371aa5e47391e520a29a6aa1ef2e0199 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 7 Aug 2020 09:23:36 -0700 Subject: [PATCH 2334/2522] Update ops-related pbtxt files. PiperOrigin-RevId: 325447902 Change-Id: Ie19ab04da2b7d678efc2b87c52e15a88f0150c8c --- .../ops_history_v2/EmptyTensorMap.pbtxt | 7 ++ .../ops_history_v2/TensorMapErase.pbtxt | 27 +++++ .../ops_history_v2/TensorMapHasKey.pbtxt | 19 +++ .../ops_history_v2/TensorMapInsert.pbtxt | 27 +++++ .../ops_history_v2/TensorMapLookup.pbtxt | 23 ++++ .../compat/ops_history_v2/TensorMapSize.pbtxt | 11 ++ tensorflow/core/ops/ops.pbtxt | 114 ++++++++++++++++++ 7 files changed, 228 insertions(+) create mode 100644 tensorflow/core/ops/compat/ops_history_v2/EmptyTensorMap.pbtxt create mode 100644 tensorflow/core/ops/compat/ops_history_v2/TensorMapErase.pbtxt create mode 100644 tensorflow/core/ops/compat/ops_history_v2/TensorMapHasKey.pbtxt create mode 100644 tensorflow/core/ops/compat/ops_history_v2/TensorMapInsert.pbtxt create mode 100644 tensorflow/core/ops/compat/ops_history_v2/TensorMapLookup.pbtxt create mode 100644 tensorflow/core/ops/compat/ops_history_v2/TensorMapSize.pbtxt diff --git a/tensorflow/core/ops/compat/ops_history_v2/EmptyTensorMap.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/EmptyTensorMap.pbtxt new file mode 100644 index 00000000000..25327b4e1e8 --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/EmptyTensorMap.pbtxt @@ -0,0 +1,7 @@ +op { + name: "EmptyTensorMap" + output_arg { + name: "handle" + type: DT_VARIANT + } +} diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorMapErase.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorMapErase.pbtxt new file mode 100644 index 00000000000..8b6c16005b5 --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorMapErase.pbtxt @@ -0,0 +1,27 @@ +op { + name: "TensorMapErase" + input_arg { + name: "input_handle" + type: DT_VARIANT + } + input_arg { + name: "key" + type_attr: "key_dtype" + } + output_arg { + name: "output_handle" + type: DT_VARIANT + } + output_arg { + name: "value" + type_attr: "value_dtype" + } + attr { + name: "key_dtype" + type: "type" + } + attr { + name: "value_dtype" + type: "type" + } +} diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorMapHasKey.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorMapHasKey.pbtxt new file mode 100644 index 00000000000..437822797af --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorMapHasKey.pbtxt @@ -0,0 +1,19 @@ +op { + name: "TensorMapHasKey" + input_arg { + name: "input_handle" + type: DT_VARIANT + } + input_arg { + name: "key" + type_attr: "element_dtype" + } + output_arg { + name: "has_key" + type: DT_BOOL + } + attr { + name: "element_dtype" + type: "type" + } +} diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorMapInsert.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorMapInsert.pbtxt new file mode 100644 index 00000000000..10061ea1cde --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorMapInsert.pbtxt @@ -0,0 +1,27 @@ +op { + name: "TensorMapInsert" + input_arg { + name: "input_handle" + type: DT_VARIANT + } + input_arg { + name: "key" + type_attr: "key_dtype" + } + input_arg { + name: "value" + type_attr: "value_dtype" + } + output_arg { + name: "output_handle" + type: DT_VARIANT + } + attr { + name: "key_dtype" + type: "type" + } + attr { + name: "value_dtype" + type: "type" + } +} diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorMapLookup.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorMapLookup.pbtxt new file mode 100644 index 00000000000..b48fda8ac46 --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorMapLookup.pbtxt @@ -0,0 +1,23 @@ +op { + name: "TensorMapLookup" + input_arg { + name: "input_handle" + type: DT_VARIANT + } + input_arg { + name: "key" + type_attr: "key_dtype" + } + output_arg { + name: "value" + type_attr: "value_dtype" + } + attr { + name: "key_dtype" + type: "type" + } + attr { + name: "value_dtype" + type: "type" + } +} diff --git a/tensorflow/core/ops/compat/ops_history_v2/TensorMapSize.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/TensorMapSize.pbtxt new file mode 100644 index 00000000000..dd8ade84414 --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/TensorMapSize.pbtxt @@ -0,0 +1,11 @@ +op { + name: "TensorMapSize" + input_arg { + name: "input_handle" + type: DT_VARIANT + } + output_arg { + name: "size" + type: DT_INT32 + } +} diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 931208fdb4a..7e138923a8d 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -13481,6 +13481,13 @@ op { } } } +op { + name: "EmptyTensorMap" + output_arg { + name: "handle" + type: DT_VARIANT + } +} op { name: "EncodeBase64" input_arg { @@ -53010,6 +53017,113 @@ op { } } } +op { + name: "TensorMapErase" + input_arg { + name: "input_handle" + type: DT_VARIANT + } + input_arg { + name: "key" + type_attr: "key_dtype" + } + output_arg { + name: "output_handle" + type: DT_VARIANT + } + output_arg { + name: "value" + type_attr: "value_dtype" + } + attr { + name: "key_dtype" + type: "type" + } + attr { + name: "value_dtype" + type: "type" + } +} +op { + name: "TensorMapHasKey" + input_arg { + name: "input_handle" + type: DT_VARIANT + } + input_arg { + name: "key" + type_attr: "element_dtype" + } + output_arg { + name: "has_key" + type: DT_BOOL + } + attr { + name: "element_dtype" + type: "type" + } +} +op { + name: "TensorMapInsert" + input_arg { + name: "input_handle" + type: DT_VARIANT + } + input_arg { + name: "key" + type_attr: "key_dtype" + } + input_arg { + name: "value" + type_attr: "value_dtype" + } + output_arg { + name: "output_handle" + type: DT_VARIANT + } + attr { + name: "key_dtype" + type: "type" + } + attr { + name: "value_dtype" + type: "type" + } +} +op { + name: "TensorMapLookup" + input_arg { + name: "input_handle" + type: DT_VARIANT + } + input_arg { + name: "key" + type_attr: "key_dtype" + } + output_arg { + name: "value" + type_attr: "value_dtype" + } + attr { + name: "key_dtype" + type: "type" + } + attr { + name: "value_dtype" + type: "type" + } +} +op { + name: "TensorMapSize" + input_arg { + name: "input_handle" + type: DT_VARIANT + } + output_arg { + name: "size" + type: DT_INT32 + } +} op { name: "TensorScatterAdd" input_arg { From c27f93d4b14c47f7bde086a0afb1ff657fe37031 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 7 Aug 2020 09:45:54 -0700 Subject: [PATCH 2335/2522] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 325451755 Change-Id: Ic8ff17cf02922c9b68143e71ba2980d3205f278e --- tensorflow/go/op/wrappers.go | 206 +++++++++++++++++++++++++++-------- 1 file changed, 160 insertions(+), 46 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 34ff57636ca..cd6284aab05 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -13369,6 +13369,24 @@ func ResizeArea(scope *Scope, images tf.Output, size tf.Output, optional ...Resi return op.Output(0) } +// Returns the number of work units this Reader has finished processing. +// +// Arguments: +// reader_handle: Handle to a Reader. +func ReaderNumWorkUnitsCompletedV2(scope *Scope, reader_handle tf.Output) (units_completed tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ReaderNumWorkUnitsCompletedV2", + Input: []tf.Input{ + reader_handle, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Returns up to `num_records` (key, value) pairs produced by a Reader. // // Will dequeue from the input queue if necessary (e.g. when the @@ -15101,6 +15119,94 @@ func TensorListLength(scope *Scope, input_handle tf.Output) (length tf.Output) { return op.Output(0) } +// Returns whether the given key exists in the map. +// +// input_handle: the input map +// key: the key to check +// has_key: whether the key is already in the map or not +func TensorMapHasKey(scope *Scope, input_handle tf.Output, key tf.Output) (has_key tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TensorMapHasKey", + Input: []tf.Input{ + input_handle, key, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns the value from a given key in a tensor map. +// +// input_handle: the input map +// key: the key to be looked up +// value: the value found from the given key +func TensorMapLookup(scope *Scope, input_handle tf.Output, key tf.Output, value_dtype tf.DataType) (value tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"value_dtype": value_dtype} + opspec := tf.OpSpec{ + Type: "TensorMapLookup", + Input: []tf.Input{ + input_handle, key, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Inverse 3D fast Fourier transform. +// +// Computes the inverse 3-dimensional discrete Fourier transform over the +// inner-most 3 dimensions of `input`. +// +// Arguments: +// input: A complex tensor. +// +// Returns A complex tensor of the same shape as `input`. The inner-most 3 +// dimensions of `input` are replaced with their inverse 3D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.ifftn with 3 dimensions. +// @end_compatibility +func IFFT3D(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IFFT3D", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns a map that is the 'input_handle' with the given key-value pair inserted. +// +// input_handle: the original map +// output_handle: the map with key and value inserted +// key: the key to be inserted +// value: the value to be inserted +func TensorMapInsert(scope *Scope, input_handle tf.Output, key tf.Output, value tf.Output) (output_handle tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TensorMapInsert", + Input: []tf.Input{ + input_handle, key, value, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Merges summaries. // // This op creates a @@ -20036,6 +20142,28 @@ func Polygamma(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) { return op.Output(0) } +// Returns a tensor map with item from given key erased. +// +// input_handle: the original map +// output_handle: the map with value from given key removed +// key: the key of the value to be erased +// value: the value that was erased +func TensorMapErase(scope *Scope, input_handle tf.Output, key tf.Output, value_dtype tf.DataType) (output_handle tf.Output, value tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"value_dtype": value_dtype} + opspec := tf.OpSpec{ + Type: "TensorMapErase", + Input: []tf.Input{ + input_handle, key, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + // Shuffle dimensions of x according to a permutation. // // The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy: @@ -24997,6 +25125,24 @@ func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []i return op.Output(0), op.Output(1) } +// Returns the number of tensors in the input tensor map. +// +// input_handle: the input map +// size: the number of tensors in the map +func TensorMapSize(scope *Scope, input_handle tf.Output) (size tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TensorMapSize", + Input: []tf.Input{ + input_handle, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // MaxPoolGradGradAttr is an optional argument to MaxPoolGradGrad. type MaxPoolGradGradAttr func(optionalAttr) @@ -35440,34 +35586,6 @@ func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ... return op.Output(0) } -// Inverse 3D fast Fourier transform. -// -// Computes the inverse 3-dimensional discrete Fourier transform over the -// inner-most 3 dimensions of `input`. -// -// Arguments: -// input: A complex tensor. -// -// Returns A complex tensor of the same shape as `input`. The inner-most 3 -// dimensions of `input` are replaced with their inverse 3D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.ifftn with 3 dimensions. -// @end_compatibility -func IFFT3D(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IFFT3D", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // QueueDequeueUpToV2Attr is an optional argument to QueueDequeueUpToV2. type QueueDequeueUpToV2Attr func(optionalAttr) @@ -39085,24 +39203,6 @@ func ResourceApplyAddSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Outpu return scope.AddOperation(opspec) } -// Returns the number of work units this Reader has finished processing. -// -// Arguments: -// reader_handle: Handle to a Reader. -func ReaderNumWorkUnitsCompletedV2(scope *Scope, reader_handle tf.Output) (units_completed tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReaderNumWorkUnitsCompletedV2", - Input: []tf.Input{ - reader_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // FractionalMaxPoolAttr is an optional argument to FractionalMaxPool. type FractionalMaxPoolAttr func(optionalAttr) @@ -39425,6 +39525,20 @@ func RemoteFusedGraphExecute(scope *Scope, inputs []tf.Output, Toutputs []tf.Dat return outputs } +// Creates and returns an empty tensor map. +// +// handle: an empty tensor map +func EmptyTensorMap(scope *Scope) (handle tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "EmptyTensorMap", + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // DatasetToGraphAttr is an optional argument to DatasetToGraph. type DatasetToGraphAttr func(optionalAttr) From 17cdd71cc9fd30a7a38c08dc46c4f821c0685ea2 Mon Sep 17 00:00:00 2001 From: Edward Loper Date: Fri, 7 Aug 2020 09:58:24 -0700 Subject: [PATCH 2336/2522] Add c++ utility function that converts AttrDef values to the expected types. This will replace methods such as `make_bool` (in `eager/execute.py`) and `_MakeBool` (in `op_def_library.py`). PiperOrigin-RevId: 325453895 Change-Id: I3a454a0365b08c545944ef7528c357b5d8bf2c02 --- tensorflow/python/BUILD | 49 ++++ tensorflow/python/framework/op_def_library.py | 12 + tensorflow/python/framework/op_def_util.cc | 270 ++++++++++++++++++ tensorflow/python/framework/op_def_util.h | 77 +++++ .../python/framework/op_def_util_pybind.cc | 41 +++ .../python/framework/op_def_util_test.py | 97 +++++++ 6 files changed, 546 insertions(+) create mode 100644 tensorflow/python/framework/op_def_util.cc create mode 100644 tensorflow/python/framework/op_def_util.h create mode 100644 tensorflow/python/framework/op_def_util_pybind.cc create mode 100644 tensorflow/python/framework/op_def_util_test.py diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 5166d5b891d..039fc945eca 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1617,11 +1617,60 @@ py_library( ], ) +cc_library( + name = "op_def_util_cc", + srcs = ["framework/op_def_util.cc"], + hdrs = ["framework/op_def_util.h"], + deps = [ + ":cpp_python_util", + ":safe_ptr", + "//tensorflow/core:protos_all_cc", + "@com_google_absl//absl/strings", + ], +) + +# Note: this target is only used for op_def_util_test. It includes op_def_util.cc +# directly in its srcs (rather than depending on the `op_def_util_cc` target) because +# depending on that target adds dependencies that register objects; and since the +# extension is built as a shared object in some kokoro tests, this causes those objects +# to get registered multiple times (which fails). +tf_python_pybind_extension( + name = "_op_def_util", + srcs = [ + "framework/op_def_util.cc", + "framework/op_def_util_pybind.cc", + ], + hdrs = [ + "framework/op_def_util.h", + "lib/core/safe_ptr.h", + "util/util.h", + "//tensorflow/c:headers", + "//tensorflow/c/eager:headers", + ], + module_name = "_op_def_util", + deps = [ + ":pybind11_status", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/platform:status", + "//third_party/python_runtime:headers", + "@com_google_absl//absl/strings", + "@pybind11", + ], +) + +tf_py_test( + name = "op_def_util_test", + srcs = ["framework/op_def_util_test.py"], + python_version = "PY3", + tags = ["no_pip"], +) + py_library( name = "framework_ops", # "ops" is already the name of a deprecated target srcs = ["framework/ops.py"], srcs_version = "PY2AND3", deps = [ + ":_op_def_util", ":c_api_util", ":control_flow_util", ":device", diff --git a/tensorflow/python/framework/op_def_library.py b/tensorflow/python/framework/op_def_library.py index 17e06b79f74..53d092787f6 100644 --- a/tensorflow/python/framework/op_def_library.py +++ b/tensorflow/python/framework/op_def_library.py @@ -21,10 +21,12 @@ from __future__ import print_function import six +from google.protobuf import text_format from tensorflow.core.framework import attr_value_pb2 from tensorflow.core.framework import tensor_pb2 from tensorflow.core.framework import tensor_shape_pb2 from tensorflow.core.framework import types_pb2 +from tensorflow.python import _pywrap_utils from tensorflow.python.framework import dtypes from tensorflow.python.framework import op_callbacks from tensorflow.python.framework import op_def_registry @@ -788,3 +790,13 @@ def _apply_op_helper(op_type_name, name=None, **keywords): # pylint: disable=in outputs = callback_outputs return output_structure, op_def.is_stateful, op, outputs + + +# The following symbols are used by op_def_util.cc. +_pywrap_utils.RegisterPyObject("tf.dtypes.DType", dtypes.DType) +_pywrap_utils.RegisterPyObject("tf.dtypes.as_dtype", dtypes.as_dtype) +_pywrap_utils.RegisterPyObject("tf.TensorShape", tensor_shape.TensorShape) +_pywrap_utils.RegisterPyObject("tf.as_shape", tensor_shape.as_shape) +_pywrap_utils.RegisterPyObject("tf.TensorProto", tensor_pb2.TensorProto) +_pywrap_utils.RegisterPyObject("text_format.Parse", text_format.Parse) +_pywrap_utils.RegisterPyObject("tf.convert_to_tensor", ops.convert_to_tensor) diff --git a/tensorflow/python/framework/op_def_util.cc b/tensorflow/python/framework/op_def_util.cc new file mode 100644 index 00000000000..4f56c62317c --- /dev/null +++ b/tensorflow/python/framework/op_def_util.cc @@ -0,0 +1,270 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/python/framework/op_def_util.h" + +#include + +#include "absl/strings/str_cat.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/python/util/util.h" + +using ::tensorflow::swig::GetRegisteredPyObject; + +#if PY_MAJOR_VERSION < 3 +#define PY_STRING_CHECK(x) (PyString_Check(x) || PyUnicode_Check(x)) +#define PY_INT_CHECK(x) (PyInt_Check(x)) +#define PY_INT_TYPE PyInt_Type +#else +#define PY_STRING_CHECK(x) (PyBytes_Check(x) || PyUnicode_Check(x)) +#define PY_INT_CHECK(x) (PyLong_Check(x)) +#define PY_INT_TYPE PyLong_Type +#endif + +namespace tensorflow { + +namespace { + +const std::map* AttributeTypeNameMap() { + static auto* type_map = new std::map( + {{"any", AttributeType::ANY}, + {"float", AttributeType::FLOAT}, + {"int", AttributeType::INT}, + {"string", AttributeType::STRING}, + {"bool", AttributeType::BOOL}, + {"shape", AttributeType::SHAPE}, + {"type", AttributeType::DTYPE}, + {"tensor", AttributeType::TENSOR}, + {"list(any)", AttributeType::LIST_ANY}, + {"list(float)", AttributeType::LIST_FLOAT}, + {"list(int)", AttributeType::LIST_INT}, + {"list(string)", AttributeType::LIST_STRING}, + {"list(bool)", AttributeType::LIST_BOOL}, + {"list(type)", AttributeType::LIST_DTYPE}, + {"list(shape)", AttributeType::LIST_SHAPE}, + {"list(tensor)", AttributeType::LIST_TENSOR}}); + return type_map; +} + +// Note: we define functors for converting value types (rather than simple +// functions) so we can define a generic ConvertListAttr method. These +// functors all return a new reference on success, or nullptr on failure. +// They do not (necessarily) call PyErr_SetString. + +struct ConvertAnyFunctor { + Safe_PyObjectPtr operator()(PyObject* value) { + Py_INCREF(value); + return Safe_PyObjectPtr(value); + } +}; + +struct ConvertFloatFunctor { + Safe_PyObjectPtr operator()(PyObject* value) { + Safe_PyObjectPtr result; + if (PyFloat_Check(value)) { + Py_INCREF(value); + result.reset(value); + } else if (!PY_STRING_CHECK(value)) { + result.reset(PyObject_CallFunctionObjArgs( + reinterpret_cast(&PyFloat_Type), value, nullptr)); + } + return result; + } +}; + +struct ConvertIntFunctor { + Safe_PyObjectPtr operator()(PyObject* value) { + Safe_PyObjectPtr result; + if (PY_INT_CHECK(value)) { + Py_INCREF(value); + result.reset(value); + } else if (!PY_STRING_CHECK(value)) { + result.reset(PyObject_CallFunctionObjArgs( + reinterpret_cast(&PY_INT_TYPE), value, nullptr)); + } + return result; + } +}; + +struct ConvertStringFunctor { + Safe_PyObjectPtr operator()(PyObject* value) { + Safe_PyObjectPtr result; + if (PY_STRING_CHECK(value)) { + Py_INCREF(value); + result.reset(value); + } + return result; + } +}; + +// TODO(edloper): Should we allow ints (or any other values) to be converted +// to booleans? Currently, TensorFlow does not do this conversion for attribute +// values in _MakeBool or make_bool. +struct ConvertBoolFunctor { + Safe_PyObjectPtr operator()(PyObject* value) { + Safe_PyObjectPtr result; + if (PyBool_Check(value)) { + Py_INCREF(value); + result.reset(value); + } + return result; + } +}; + +struct ConvertDTypeFunctor { + Safe_PyObjectPtr operator()(PyObject* value) { + Safe_PyObjectPtr result; + // The following symbols are registered in op_def_library.py + static PyObject* dtype = GetRegisteredPyObject("tf.dtypes.DType"); + static PyObject* as_dtype = GetRegisteredPyObject("tf.dtypes.as_dtype"); + if (reinterpret_cast(value->ob_type) == dtype) { + Py_INCREF(value); + result.reset(value); + } else { + result.reset(PyObject_CallFunctionObjArgs(as_dtype, value, nullptr)); + } + return result; + } +}; + +struct ConvertTensorShapeFunctor { + Safe_PyObjectPtr operator()(PyObject* value) { + Safe_PyObjectPtr result; + // The following symbols are registered in op_def_library.py + static PyObject* shape = GetRegisteredPyObject("tf.TensorShape"); + static PyObject* as_shape = GetRegisteredPyObject("tf.as_shape"); + if (reinterpret_cast(value->ob_type) == shape) { + Py_INCREF(value); + result.reset(value); + } else { + result.reset(PyObject_CallFunctionObjArgs(as_shape, value, nullptr)); + } + return result; + } +}; + +struct ConvertTensorProtoFunctor { + Safe_PyObjectPtr operator()(PyObject* value) { + Safe_PyObjectPtr result; + // The following symbols are registered in op_def_library.py + static PyObject* tensor_proto = GetRegisteredPyObject("tf.TensorProto"); + static PyObject* text_format_parse = + GetRegisteredPyObject("text_format.Parse"); + if (reinterpret_cast(value->ob_type) == tensor_proto) { + Py_INCREF(value); + result.reset(value); + } else if (PY_STRING_CHECK(value)) { + result.reset(PyObject_CallObject(tensor_proto, nullptr)); + if (result) { + PyObject_CallFunctionObjArgs(text_format_parse, value, result.get(), + nullptr); + } + } + return result; + } +}; + +// Converts `value` to a list of elements with the same type, using +// `convert_functor` to convert each element. +template +Safe_PyObjectPtr ConvertListAttr(PyObject* value, T convert_functor) { + // Copy the list. + Safe_PyObjectPtr result(PySequence_List(value)); + if (!result) return nullptr; + + // Check the type of each item in the list. + Py_ssize_t len = PySequence_Fast_GET_SIZE(result.get()); + PyObject** items = PySequence_Fast_ITEMS(result.get()); + for (Py_ssize_t i = 0; i < len; ++i) { + if (!PyFloat_Check(value)) { + Safe_PyObjectPtr item = convert_functor(items[i]); + if (!item) return nullptr; + PySequence_SetItem(result.get(), i, item.get()); + } + } + return result; +} + +// Returns the given `value` value, converted to the indicated type. +// Returns nullptr if `value` is not convertible. +Safe_PyObjectPtr ConvertAttrOrNull(PyObject* value, AttributeType attr_type) { + switch (attr_type) { + case AttributeType::ANY: + return ConvertAnyFunctor()(value); + case AttributeType::FLOAT: + return ConvertFloatFunctor()(value); + case AttributeType::INT: + return ConvertIntFunctor()(value); + case AttributeType::STRING: + return ConvertStringFunctor()(value); + case AttributeType::BOOL: + return ConvertBoolFunctor()(value); + case AttributeType::DTYPE: + return ConvertDTypeFunctor()(value); + case AttributeType::SHAPE: + return ConvertTensorShapeFunctor()(value); + case AttributeType::TENSOR: + return ConvertTensorProtoFunctor()(value); + case AttributeType::LIST_ANY: + return ConvertListAttr(value, ConvertAnyFunctor()); + case AttributeType::LIST_FLOAT: + return ConvertListAttr(value, ConvertFloatFunctor()); + case AttributeType::LIST_INT: + return ConvertListAttr(value, ConvertIntFunctor()); + case AttributeType::LIST_STRING: + return ConvertListAttr(value, ConvertStringFunctor()); + case AttributeType::LIST_BOOL: + return ConvertListAttr(value, ConvertBoolFunctor()); + case AttributeType::LIST_DTYPE: + return ConvertListAttr(value, ConvertDTypeFunctor()); + case AttributeType::LIST_SHAPE: + return ConvertListAttr(value, ConvertTensorShapeFunctor()); + case AttributeType::LIST_TENSOR: + return ConvertListAttr(value, ConvertTensorProtoFunctor()); + default: + return nullptr; + } +} + +} // namespace + +AttributeType AttributeTypeFromName(const std::string& type_name) { + const auto* type_map = AttributeTypeNameMap(); + auto it = type_map->find(type_name); + return it != type_map->end() ? it->second : AttributeType::UNKNOWN; +} + +std::string AttributeTypeToName(AttributeType attr_type) { + for (const auto& pair : *AttributeTypeNameMap()) { + if (pair.second == attr_type) { + return pair.first; + } + } + return ""; +} + +Safe_PyObjectPtr ConvertPyObjectToAttributeType(PyObject* value, + AttributeType type) { + Safe_PyObjectPtr result = ConvertAttrOrNull(value, type); + if (!result) { + auto err = absl::StrCat("Failed to convert value of type '", + value->ob_type->tp_name, "' to type '", + AttributeTypeToName(type), "'."); + PyErr_SetString(PyExc_TypeError, err.c_str()); + } + + return result; +} + +} // namespace tensorflow diff --git a/tensorflow/python/framework/op_def_util.h b/tensorflow/python/framework/op_def_util.h new file mode 100644 index 00000000000..ef5e64e68fa --- /dev/null +++ b/tensorflow/python/framework/op_def_util.h @@ -0,0 +1,77 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_PYTHON_FRAMEWORK_OP_DEF_UTIL_H_ +#define TENSORFLOW_PYTHON_FRAMEWORK_OP_DEF_UTIL_H_ + +#include + +#include "tensorflow/python/lib/core/safe_ptr.h" + +namespace tensorflow { + +// Enumerated type corresponding with string values in AttrDef::type. +enum class AttributeType { + UNKNOWN, + ANY, // "any" + FLOAT, // "float" + INT, // "int" + STRING, // "string" + BOOL, // "bool" + DTYPE, // "type" (tf.dtypes.DType) + SHAPE, // "shape" (tf.TensorShape) + TENSOR, // "tensor" (tf.TensorProto) + LIST_ANY, // "list(any)" + LIST_FLOAT, // "list(float)" + LIST_INT, // "list(int)" + LIST_STRING, // "list(string)" + LIST_BOOL, // "list(bool)" + LIST_DTYPE, // "list(dtype)" + LIST_SHAPE, // "list(shape)" + LIST_TENSOR // "list(tensor)" +}; + +// Returns the enumerated value corresponding to a given string (e.g. +// "string" or "list(string)". +AttributeType AttributeTypeFromName(const std::string& type_name); + +// Returns the string corresponding to a given enumerated value. +std::string AttributeTypeToName(AttributeType attr_type); + +// Converts `value` to the specified type and returns a new reference to the +// converted value (if possible); or sets a Python exception and returns +// nullptr. This function is optimized to be fast if `value` already has the +// desired type. +// +// * 'any' values are returned as-is. +// * 'float' values are converted by calling float(value). +// * 'int' values are converted by calling int(value). +// * 'string' values are returned as-is if they are (bytes, unicode); +// otherwise, an exception is raised. +// * 'bool' values are returned as-is if they are boolean; otherwise, an +// exception is raised. +// * 'dtype' values are converted using `dtypes.as_dtype`. +// * 'shape' values are converted using `tensor_shape.as_shape`. +// * 'tensor' values are returned as-is if they are a `TensorProto`; or are +// parsed into `TensorProto` using `textformat.merge` if they are a string. +// Otherwise, an exception is raised. +// * 'list(*)' values are copied to a new list, and then each element is +// converted (in-place) as described above. (If the value is not iterable, +// or if conversion fails for any item, then an exception is raised.) +Safe_PyObjectPtr ConvertPyObjectToAttributeType(PyObject* value, + AttributeType type); + +} // namespace tensorflow + +#endif // TENSORFLOW_PYTHON_FRAMEWORK_OP_DEF_UTIL_H_ diff --git a/tensorflow/python/framework/op_def_util_pybind.cc b/tensorflow/python/framework/op_def_util_pybind.cc new file mode 100644 index 00000000000..d13f605b599 --- /dev/null +++ b/tensorflow/python/framework/op_def_util_pybind.cc @@ -0,0 +1,41 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "pybind11/pybind11.h" +#include "tensorflow/python/framework/op_def_util.h" + +namespace py = pybind11; + +namespace { + +py::handle ConvertAttr(py::handle value, std::string attr_type) { + tensorflow::Safe_PyObjectPtr result = + ::tensorflow::ConvertPyObjectToAttributeType( + value.ptr(), ::tensorflow::AttributeTypeFromName(attr_type)); + if (!result) { + throw py::error_already_set(); + } + Py_INCREF(result.get()); + return result.release(); +} + +} // namespace + +// Expose ConvertPyObjectToAttributeType via Python. Note: this is done to +// simplify testing; ConvertPyObjectToAttributeType is expected to be called +// directly from c++. +PYBIND11_MODULE(_op_def_util, m) { + m.def("ConvertPyObjectToAttributeType", ConvertAttr, py::arg("value"), + py::arg("attr_type_enum")); +} diff --git a/tensorflow/python/framework/op_def_util_test.py b/tensorflow/python/framework/op_def_util_test.py new file mode 100644 index 00000000000..74cd6046f68 --- /dev/null +++ b/tensorflow/python/framework/op_def_util_test.py @@ -0,0 +1,97 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tests for tensorflow.python.ops.op_def_library.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized + +import numpy as np + +from tensorflow.core.framework import tensor_pb2 +from tensorflow.core.framework import types_pb2 +from tensorflow.python import _op_def_util +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import test_util +from tensorflow.python.platform import googletest + + +class OpDefUtilTest(test_util.TensorFlowTestCase, parameterized.TestCase): + + @parameterized.parameters([ + ("any", "Foo", "Foo"), + ("any", 12, 12), + ("any", {2: 3}, {2: 3}), + ("string", "Foo", "Foo"), + ("string", b"Foo", b"Foo"), + ("int", 12, 12), + ("int", 12.3, 12), + ("float", 12, 12.0), + ("float", 12.3, 12.3), + ("bool", True, True), + ("shape", tensor_shape.TensorShape([3]), tensor_shape.TensorShape([3])), + ("shape", [3], tensor_shape.TensorShape([3])), + ("type", dtypes.int32, dtypes.int32), + ("type", np.int32, dtypes.int32), + ("type", "int32", dtypes.int32), + ("tensor", tensor_pb2.TensorProto(dtype=types_pb2.DataType.DT_FLOAT), + tensor_pb2.TensorProto(dtype=types_pb2.DataType.DT_FLOAT)), + ("tensor", "dtype: DT_FLOAT", + tensor_pb2.TensorProto(dtype=types_pb2.DataType.DT_FLOAT)), + ("list(any)", [1, "foo", 7.3, dtypes.int32], + [1, "foo", 7.3, dtypes.int32]), + ("list(any)", (1, "foo"), [1, "foo"]), + ("list(string)", ["foo", "bar"], ["foo", "bar"]), + ("list(string)", ("foo", "bar"), ["foo", "bar"]), + ("list(string)", iter("abcd"), ["a", "b", "c", "d"]), + ("list(int)", (1, 2.3), [1, 2]), + ("list(float)", (1, 2.3), [1.0, 2.3]), + ("list(bool)", [True, False], [True, False]), + ]) + def testConvert(self, attr_type, value, expected): + result = _op_def_util.ConvertPyObjectToAttributeType(value, attr_type) + + # Check that we get the expected value(s). + self.assertEqual(expected, result) + + # Check that we get the expected type(s). + self.assertEqual(type(expected), type(result)) + if isinstance(result, list): + for expected_item, result_item in zip(expected, result): + self.assertEqual(type(expected_item), type(result_item)) + + @parameterized.parameters([ + ("string", 12), + ("int", "foo"), + ("float", "foo"), + ("bool", 1), + ("dtype", None), + ("shape", 12.0), + ("tensor", [1, 2, 3]), + ("list(any)", 12), + ("list(int)", [1, "two"]), + ("list(string)", [1, "two"]), + ]) + def testConvertError(self, attr_type, value): + with self.assertRaisesRegex(TypeError, "Failed to convert value"): + _op_def_util.ConvertPyObjectToAttributeType(value, attr_type) + +if __name__ == "__main__": + googletest.main() + From be8b52212a31735463b49f2ced43c8f656e23ab4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 7 Aug 2020 10:09:26 -0700 Subject: [PATCH 2337/2522] The code block in TF_BatchFunctionOp documentation was missing its end quote. Now it has it. PiperOrigin-RevId: 325456158 Change-Id: I7d988643100c21451a22f190a2559cb81f54dab7 --- tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td | 1 + tensorflow/core/api_def/base_api/api_def_BatchFunction.pbtxt | 1 + 2 files changed, 2 insertions(+) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td index 1e99675d938..376b7933b47 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td @@ -1295,6 +1295,7 @@ So, for example, in the following code batch_timeout_micros=100000, # 100ms allowed_batch_sizes=[3, 10], batching_queue="") + ``` If more than one session.run call is simultaneously trying to compute `b` the values of `a` will be gathered, non-deterministically concatenated diff --git a/tensorflow/core/api_def/base_api/api_def_BatchFunction.pbtxt b/tensorflow/core/api_def/base_api/api_def_BatchFunction.pbtxt index a7792dc9bf2..b2cace5c3bc 100644 --- a/tensorflow/core/api_def/base_api/api_def_BatchFunction.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_BatchFunction.pbtxt @@ -117,6 +117,7 @@ So, for example, in the following code batch_timeout_micros=100000, # 100ms allowed_batch_sizes=[3, 10], batching_queue="") + ``` If more than one session.run call is simultaneously trying to compute `b` the values of `a` will be gathered, non-deterministically concatenated From a919022737c0510d8f3f461e3b98f621ab0ba3bd Mon Sep 17 00:00:00 2001 From: Ken Franko Date: Fri, 7 Aug 2020 10:10:39 -0700 Subject: [PATCH 2338/2522] Mark ops within WhileRegion control flow for outside compilation when unsupported. Additionally marks WhileRegion for outside compilation if there are captured string arguments. PiperOrigin-RevId: 325456398 Change-Id: I21c83df43cbf475a61efa548224cc6c02cd2367b --- .../mark_ops_for_outside_compilation.mlir | 66 +++++++++++++++++++ .../mark_ops_for_outside_compilation.cc | 29 ++++---- 2 files changed, 79 insertions(+), 16 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir index d0a4c101bdf..0bb37e4c3cd 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir @@ -86,6 +86,7 @@ func @op_string_operand_string_result(%arg0: tensor) -> tensor } // Test that a tf.IfRegion op with a captured string operand is marked for outside compilation. + // CHECK-LABEL: func @if_region_captured_string func @if_region_captured_string(%arg0: tensor, %arg1: tensor) -> tensor { %0 = "tf_device.cluster"() ( { @@ -175,3 +176,68 @@ func @nested_if_region_string_op(%arg0: tensor, %arg1: tensor) -> ten }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor return %0 : tensor } + +// Test that a tf.WhileRegion op with a captured string operand is marked for outside compilation. + +// CHECK-LABEL: func @while_region_captured_string +func @while_region_captured_string(%arg0: tensor, %arg1: tensor) -> tensor { + %0 = "tf_device.cluster"() ( { + // CHECK: "tf.Const"() {value = dense<1.000000e+00> : tensor} + // CHECK-NOT: _xla_outside_compilation + // CHECK: "tf.WhileRegion" + // CHECK: "tf.StringToNumber" + // CHECK: _xla_outside_compilation = "auto", is_stateless = true + %1 = "tf.Const"() {value = dense<1.0> : tensor} : () -> tensor + %2:2 = "tf.WhileRegion"(%1, %arg0) ( { + ^bb0(%carg0: tensor, %carg1: tensor): + %limit = constant dense<5> : tensor + %cond = "tf.NotEqual"(%carg1, %limit) : (tensor, tensor) -> tensor + "tf.Yield"(%cond) : (tensor) -> () + }, { + ^bb0(%barg0: tensor, %barg1: tensor): + %one = constant dense<1> : tensor + %sub = "tf.Sub"(%barg1, %one) : (tensor, tensor) -> tensor + %3 = "tf.StringToNumber"(%arg1) {out_type = f32} : (tensor) -> tensor + "tf.Yield"(%3, %sub) : (tensor, tensor) -> () + }) {is_stateless = true} : (tensor, tensor) -> (tensor, tensor) + // CHECK: "tf.Identity" + // CHECK-NOT: _xla_outside_compilation + %5 = "tf.Identity"(%2#0) : (tensor) -> (tensor) + tf_device.return %5 : tensor + }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor + return %0 : tensor +} + +// Test that an unsupported op within a tf.WhileRegion is marked for outside compilation. + +// CHECK-LABEL: func @while_region_unsupported_op +func @while_region_unsupported_op(%arg0: tensor, %arg1: tensor) -> tensor { + %0 = "tf_device.cluster"() ( { + // CHECK: "tf.Const"() {value = dense<1.000000e+00> : tensor} + // CHECK-NOT: _xla_outside_compilation + // CHECK: "tf.WhileRegion" + %1 = "tf.Const"() {value = dense<1.0> : tensor} : () -> tensor + %2:2 = "tf.WhileRegion"(%1, %arg0) ( { + ^bb0(%carg0: tensor, %carg1: tensor): + %limit = constant dense<5> : tensor + %cond = "tf.NotEqual"(%carg1, %limit) : (tensor, tensor) -> tensor + "tf.Yield"(%cond) : (tensor) -> () + }, { + ^bb0(%barg0: tensor, %barg1: tensor): + %one = constant dense<1> : tensor + %sub = "tf.Sub"(%barg1, %one) : (tensor, tensor) -> tensor + // CHECK: "tf.UnsupportedOp" + // CHECK-SAME: _xla_outside_compilation + %3 = "tf.UnsupportedOp"() {value = dense<1> : tensor} : () -> tensor + // CHECK: "tf.Const"() {value = dense<1.000000e+00> : tensor} + %4 = "tf.Const"() {value = dense<1.0> : tensor} : () -> tensor + "tf.Yield"(%4, %sub) : (tensor, tensor) -> () + // CHECK: {is_stateless = true + }) {is_stateless = true} : (tensor, tensor) -> (tensor, tensor) + // CHECK: "tf.Identity" + // CHECK-NOT: _xla_outside_compilation + %5 = "tf.Identity"(%2#0) : (tensor) -> (tensor) + tf_device.return %5 : tensor + }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor + return %0 : tensor +} diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc index 6c93a9eb9cc..c0889affb30 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc @@ -49,6 +49,7 @@ struct MarkOpsForOutsideCompilation void AddSupportedControlFlowOps(MLIRContext* context, llvm::DenseSet* supported_ops) { supported_ops->insert(OperationName("tf.IfRegion", context)); + supported_ops->insert(OperationName("tf.WhileRegion", context)); supported_ops->insert(OperationName("tf.Yield", context)); } @@ -81,21 +82,17 @@ bool IsSupportedOp(Operation& op, mhlo::IsOpAllowedTf2XlaFallback(&op)); } -bool HasCapturedStringOperand(TF::IfRegionOp* if_op) { +// Checks all regions of `op` for captured string operands. +bool HasCapturedStringOperand(Operation* op) { bool string_operand = false; - mlir::visitUsedValuesDefinedAbove( - if_op->then_branch(), if_op->then_branch(), - [&](mlir::OpOperand* operand) { - if (getElementTypeOrSelf(operand->get()).isa()) - string_operand = true; - }); - if (string_operand) return string_operand; - mlir::visitUsedValuesDefinedAbove( - if_op->else_branch(), if_op->else_branch(), - [&](mlir::OpOperand* operand) { - if (getElementTypeOrSelf(operand->get()).isa()) - string_operand = true; - }); + for (auto& region : op->getRegions()) { + mlir::visitUsedValuesDefinedAbove( + region, region, [&](mlir::OpOperand* operand) { + if (getElementTypeOrSelf(operand->get()).isa()) + string_operand = true; + }); + if (string_operand) return string_operand; + } return string_operand; } @@ -106,8 +103,8 @@ LogicalResult MarkUncompilableOps( op->setAttr(kXlaOutsideCompilationAttr, StringAttr::get("auto", op->getContext())); } - if (auto if_op = llvm::dyn_cast(op)) { - if (HasCapturedStringOperand(&if_op)) { + if (llvm::isa(op)) { + if (HasCapturedStringOperand(op)) { op->setAttr(kXlaOutsideCompilationAttr, StringAttr::get("auto", op->getContext())); } From cc38583a9fc80f83482a73e88396ce6f89e8ca08 Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Fri, 7 Aug 2020 10:12:08 -0700 Subject: [PATCH 2339/2522] Fix finished task handling for restarted workers. This reflects the recent change where workers now receive their previous tasks on restart, as opposed to being assigned new tasks. PiperOrigin-RevId: 325456693 Change-Id: I2c99998c1310983ecc70e57f9c7c0a362d42c9d6 --- tensorflow/core/data/service/worker_impl.cc | 11 +++++++++++ tensorflow/core/data/service/worker_impl.h | 2 ++ .../data/experimental/data_service_dataset_op.cc | 14 ++++---------- tensorflow/python/data/kernel_tests/BUILD | 1 - 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/data/service/worker_impl.cc b/tensorflow/core/data/service/worker_impl.cc index 0e955e136d2..d17acffb941 100644 --- a/tensorflow/core/data/service/worker_impl.cc +++ b/tensorflow/core/data/service/worker_impl.cc @@ -76,7 +76,11 @@ Status DataServiceWorkerImpl::Start(const std::string& worker_address) { [this, dispatcher = dispatcher.release()]() { BackgroundThread(dispatcher); }); + LOG(INFO) << "Worker registered with dispatcher running at " + << config_.dispatcher_address(); background_thread_.reset(thread); + mutex_lock l(mu_); + registered_ = true; return Status::OK(); } @@ -118,6 +122,13 @@ Status DataServiceWorkerImpl::GetElement(const GetElementRequest* request, std::vector outputs; { mutex_lock l(mu_); + if (!registered_) { + // We need to reject requests until the worker has registered with the + // dispatcher, so that we don't return NOT_FOUND for tasks that the worker + // had before preemption. + return errors::Unavailable( + "Worker has not yet registered with dispatcher."); + } auto it = tasks_.find(request->task_id()); if (it == tasks_.end()) { return errors::NotFound("DataServiceWorkerImpl::GetElement failed. ", diff --git a/tensorflow/core/data/service/worker_impl.h b/tensorflow/core/data/service/worker_impl.h index 8353d11efdc..36edbe5ce74 100644 --- a/tensorflow/core/data/service/worker_impl.h +++ b/tensorflow/core/data/service/worker_impl.h @@ -84,6 +84,8 @@ class DataServiceWorkerImpl { // Completed tasks which haven't yet been communicated to the dispatcher. absl::flat_hash_set pending_completed_tasks_ TF_GUARDED_BY(mu_); bool cancelled_ TF_GUARDED_BY(mu_) = false; + // Whether the worker has registered with the dispatcher yet. + bool registered_ TF_GUARDED_BY(mu_) = false; // Condition variable for notifying the background thread. condition_variable background_cv_ TF_GUARDED_BY(mu_); std::unique_ptr background_thread_; diff --git a/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc b/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc index ca73799bd24..8a160aa8502 100644 --- a/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc @@ -401,7 +401,6 @@ class DataServiceDatasetOp::Dataset : public DatasetBase { mutex_lock l(mu_); num_running_worker_threads_--; outstanding_requests_--; - VLOG(3) << "Exiting worker thread"; }; worker_threads_.push_back(ctx->StartThread( "tf-data-service-task_thread", [this, done = std::move(done)]() { @@ -437,10 +436,10 @@ class DataServiceDatasetOp::Dataset : public DatasetBase { } worker_thread_cv_.wait(l); } + outstanding_requests_++; if (cancelled_) { return; } - outstanding_requests_++; // Search for a task to update. int num_tasks = tasks_.size(); for (int i = 0; i < num_tasks; ++i) { @@ -461,6 +460,9 @@ class DataServiceDatasetOp::Dataset : public DatasetBase { Status s = GetElement(task_to_process.get(), deadline_micros); if (!s.ok()) { mutex_lock l(mu_); + VLOG(1) << "Failed to get element for task " + << task_to_process->task_id << ": " << s; + task_to_process->in_use = false; status_ = s; get_next_cv_.notify_all(); return; @@ -486,14 +488,6 @@ class DataServiceDatasetOp::Dataset : public DatasetBase { if (s.ok()) { break; } - if (errors::IsNotFound(s)) { - // This indicates that the worker was restarted. The restarted worker - // will get a new task, and the old task is lost. - mutex_lock l(mu_); - finished_tasks_++; - task->end_of_sequence = true; - return Status::OK(); - } // Retry all errors that could indicate preemption. if (!errors::IsUnavailable(s) && !errors::IsCancelled(s) && !errors::IsAborted(s)) { diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD index 639c07bac01..210b6f59681 100644 --- a/tensorflow/python/data/kernel_tests/BUILD +++ b/tensorflow/python/data/kernel_tests/BUILD @@ -94,7 +94,6 @@ tf_py_test( name = "data_service_ops_test", size = "medium", srcs = ["data_service_ops_test.py"], - tags = ["notap"], # "b/163085430" deps = [ "//tensorflow:tensorflow_py", "//tensorflow/python:client_testlib", From 88109c5078286ddda4aad4e10b6bddff096554ba Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 7 Aug 2020 10:19:36 -0700 Subject: [PATCH 2340/2522] Check if environment variables are null before assigning to string PiperOrigin-RevId: 325458185 Change-Id: I38905998dab7bfb5802ad94689282db4c08e271e --- tensorflow/core/kernels/data/optimize_dataset_op.cc | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/data/optimize_dataset_op.cc b/tensorflow/core/kernels/data/optimize_dataset_op.cc index a566693ec3d..74468e71241 100644 --- a/tensorflow/core/kernels/data/optimize_dataset_op.cc +++ b/tensorflow/core/kernels/data/optimize_dataset_op.cc @@ -94,8 +94,16 @@ void OptimizeDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase* input, // This is currently empty; we have no live experiments yet. absl::flat_hash_map live_experiments; - const string opt_ins_raw = std::getenv("TF_DATA_EXPERIMENT_OPT_IN"); - const string opt_outs_raw = std::getenv("TF_DATA_EXPERIMENT_OPT_OUT"); + const char* opt_ins_raw_cs = std::getenv("TF_DATA_EXPERIMENT_OPT_IN"); + const char* opt_outs_raw_cs = std::getenv("TF_DATA_EXPERIMENT_OPT_OUT"); + string opt_ins_raw; + if (opt_ins_raw_cs != nullptr) { + opt_ins_raw = string(opt_ins_raw_cs); + } + string opt_outs_raw; + if (opt_outs_raw_cs != nullptr) { + opt_outs_raw = string(opt_outs_raw_cs); + } auto hash_func = [](const string& str) { return Hash64(str); }; optimizations = SelectOptimizations( job_name, opt_ins_raw, opt_outs_raw, live_experiments, From 194ff7b835fef5aceb9495ae298e4fa05b8ad23e Mon Sep 17 00:00:00 2001 From: Lucy Fox Date: Fri, 7 Aug 2020 10:46:06 -0700 Subject: [PATCH 2341/2522] Verify that MHLO DynamicUpdateSlice start indices have matching element types. HLO requires that the element types match for all start index parameters. Right now we don't catch this invalid case until export, so adding a check in the verifier so that we catch this sooner. This also requires a small tweak to the TF InplaceUpdate op lowering. PiperOrigin-RevId: 325463796 Change-Id: I71b5ff347a87bb63138d228796ffa1b115d74aba --- .../mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc | 27 +++++++++++++++++++ tensorflow/compiler/mlir/hlo/tests/ops.mlir | 8 ++++++ .../mlir/xla/transforms/legalize_tf.cc | 7 ++++- 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc index a1f0480f4fe..6f453d1a167 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc @@ -340,6 +340,33 @@ void DynamicIotaOp::getCanonicalizationPatterns( results.insert(context); } +//===----------------------------------------------------------------------===// +// DynamicUpdateSliceOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(DynamicUpdateSliceOp op) { + OperandRange indices = op.start_indices(); + if (indices.size() <= 1) return success(); + + // Note: start_indices is constrained to Variadic, so it + // is OK to cast indices to ShapedType here. + auto idx_tensor = indices.take_front().front().getType().cast(); + Type first_elem_ty = idx_tensor.getElementType(); + Type elem_ty; + + for (auto idx : llvm::drop_begin(indices, 1)) { + idx_tensor = idx.getType().cast(); + elem_ty = idx_tensor.getElementType(); + + if (first_elem_ty != elem_ty) { + return op.emitOpError() << "start indices must have same element type " + "(encountered mismatch: " + << first_elem_ty << " vs " << elem_ty << ")"; + } + } + return success(); +} + //===----------------------------------------------------------------------===// // AbsOp //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/hlo/tests/ops.mlir b/tensorflow/compiler/mlir/hlo/tests/ops.mlir index 3443f21bc84..25c7d6aee61 100644 --- a/tensorflow/compiler/mlir/hlo/tests/ops.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/ops.mlir @@ -754,6 +754,14 @@ func @dynamic_update_slice_invalid_start(%input: tensor<3x4xi64>, %update: tenso // ----- +func @dynamic_update_slice_mismatched_start(%input: tensor<11x3x4xi32>, %update: tensor<1x3x4xi32>, %start1: tensor, %start2: tensor, %start3: tensor) -> tensor<11x3x4xi32> { + // expected-error@+1 {{start indices must have same element type (encountered mismatch: 'i32' vs 'i64')}} + %0 = "mhlo.dynamic-update-slice"(%input, %update, %start1, %start2, %start3) : (tensor<11x3x4xi32>, tensor<1x3x4xi32>, tensor, tensor, tensor) -> tensor<11x3x4xi32> + return %0 : tensor<11x3x4xi32> +} + +// ----- + // CHECK-LABEL: func @transpose func @transpose(%arg0: tensor<1x2x3x4xi32>) -> tensor<2x1x4x3xi32> { %0 = "mhlo.transpose"(%arg0) {permutation = dense<[1, 0, 3, 2]> : tensor<4xi64>} : (tensor<1x2x3x4xi32>) -> tensor<2x1x4x3xi32> diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc index f2b3822188c..6d99e714fc2 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc @@ -5007,7 +5007,12 @@ class ConvertInplaceUpdateOp : public OpRewritePattern { SmallVector unpacked_indices_type( indices_type.getDimSize(0), RankedTensorType::get({}, indices_type.getElementType())); - auto zero_attr = IntegerAttr::get(rewriter.getIntegerType(64), 0); + // Note on zero_attr integer type: DynamicUpdateSlice op start_indices are + // required to have matching types. This rewrite rule creates + // DynamicUpdateSlice ops where the first "start index" is always i32 and + // subsequent ones are constructed based on zero_attr. Thus the type + // for zero_attr needs to be i32 as well. + auto zero_attr = IntegerAttr::get(rewriter.getIntegerType(32), 0); auto unpacked_indices = rewriter.create( op.getLoc(), unpacked_indices_type, indices, zero_attr); From 472576cae52279e72a4e5ddf6c9e767af98c4668 Mon Sep 17 00:00:00 2001 From: Peng Wang Date: Fri, 7 Aug 2020 10:46:11 -0700 Subject: [PATCH 2342/2522] Changed stateless_random_ops_test to use parameterized tests. Test time came down from ~600s to ~60s. PiperOrigin-RevId: 325463819 Change-Id: Ida4b4210ef6be21470d7b00688709edd6407e76c --- tensorflow/python/kernel_tests/random/BUILD | 3 - .../random/stateless_random_ops_test.py | 317 +++++++++++------- 2 files changed, 188 insertions(+), 132 deletions(-) diff --git a/tensorflow/python/kernel_tests/random/BUILD b/tensorflow/python/kernel_tests/random/BUILD index 31e0417102d..06360fc2095 100644 --- a/tensorflow/python/kernel_tests/random/BUILD +++ b/tensorflow/python/kernel_tests/random/BUILD @@ -120,9 +120,6 @@ cuda_py_test( size = "medium", srcs = ["stateless_random_ops_test.py"], shard_count = 10, - tags = [ - "notap", # b/162112278 - ], tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", diff --git a/tensorflow/python/kernel_tests/random/stateless_random_ops_test.py b/tensorflow/python/kernel_tests/random/stateless_random_ops_test.py index 27b10ea2258..f3949f30c03 100644 --- a/tensorflow/python/kernel_tests/random/stateless_random_ops_test.py +++ b/tensorflow/python/kernel_tests/random/stateless_random_ops_test.py @@ -52,161 +52,220 @@ def invert_philox(key, value): return np.array(value) -class StatelessOpsTest(test.TestCase, parameterized.TestCase): +SEEDS = ((7, 17), (11, 5), (2, 3)) +SEED_TYPES = [dtypes.int32, dtypes.int64] - def _test_match(self, cases): - # Stateless ops should be the same as stateful ops on the first call - # after seed scrambling. - cases = tuple(cases) - key = 0x3ec8f720, 0x02461e29 - for seed in (7, 17), (11, 5), (2, 3): - preseed = invert_philox(key, (seed[0], 0, seed[1], 0)).astype(np.uint64) - preseed = preseed[::2] | preseed[1::2] << 32 - random_seed.set_random_seed(seed[0]) - with test_util.use_gpu(): - for stateless_op, stateful_op in cases: - if context.executing_eagerly(): - # Call set_random_seed in order to clear kernel cache, to prevent - # kernel reusing for the stateful op - random_seed.set_random_seed(seed[0]) - stateful = stateful_op(seed=seed[1]) - pure = stateless_op(seed=preseed) - self.assertAllEqual(stateful, pure) - def _test_determinism(self, cases): - # Stateless values should be equal iff the seeds are equal (roughly) - cases = tuple(cases) - seeds = [(x, y) for x in range(5) for y in range(5)] * 3 - with self.test_session(use_gpu=True), test_util.use_gpu(): - for seed_type in [dtypes.int32, dtypes.int64]: - for stateless_op, _ in cases: - if context.executing_eagerly(): - values = [ - (seed, stateless_op(seed=constant_op.constant(seed, seed_type))) - for seed in seeds] - else: - # Have this branch because the above branch is too slow in graph - # mode - seed_t = array_ops.placeholder(seed_type, shape=[2]) - pure = stateless_op(seed=seed_t) - values = [ - (seed, pure.eval(feed_dict={seed_t: seed})) for seed in seeds - ] - for s0, v0 in values: - for s1, v1 in values: - self.assertEqual(s0 == s1, np.all(v0 == v1)) - - def _float_cases(self, shape_dtypes=(None,)): - float_cases = ( - # Uniform distribution, with and without range - (stateless.stateless_random_uniform, random_ops.random_uniform, {}), - (stateless.stateless_random_uniform, random_ops.random_uniform, - dict(minval=2.2, maxval=7.1)), - # Normal distribution, with and without mean+stddev - (stateless.stateless_random_normal, random_ops.random_normal, {}), - (stateless.stateless_random_normal, random_ops.random_normal, - dict(mean=2, stddev=3)), - # Truncated normal distribution, with and without mean+stddev - (stateless.stateless_truncated_normal, random_ops.truncated_normal, {}), - (stateless.stateless_truncated_normal, random_ops.truncated_normal, - dict(mean=3, stddev=4)), - ) - for dtype in dtypes.float16, dtypes.float32, dtypes.float64: - for shape_dtype in shape_dtypes: - for shape in (), (3,), (2, 5): - if shape_dtype is not None: - shape = constant_op.constant(shape, dtype=shape_dtype) - for stateless_op, stateful_op, kwds in float_cases: - kwds = dict(shape=shape, dtype=dtype, **kwds) - yield (functools.partial(stateless_op, **kwds), - functools.partial(stateful_op, **kwds)) - - def _int_cases(self, shape_dtypes=(None,)): +def float_cases(shape_dtypes=(None,)): + cases = ( + # Uniform distribution, with and without range + (stateless.stateless_random_uniform, random_ops.random_uniform, {}), + (stateless.stateless_random_uniform, random_ops.random_uniform, + dict(minval=2.2, maxval=7.1)), + # Normal distribution, with and without mean+stddev + (stateless.stateless_random_normal, random_ops.random_normal, {}), + (stateless.stateless_random_normal, random_ops.random_normal, + dict(mean=2, stddev=3)), + # Truncated normal distribution, with and without mean+stddev + (stateless.stateless_truncated_normal, random_ops.truncated_normal, {}), + (stateless.stateless_truncated_normal, random_ops.truncated_normal, + dict(mean=3, stddev=4)), + ) + # Explicitly passing in params because capturing cell variable from loop is + # problematic in Python + def wrap(op, dtype, shape, shape_dtype, kwds, seed): + shape_ = (constant_op.constant(shape, dtype=shape_dtype) + if shape_dtype is not None else shape) + return op(seed=seed, shape=shape_, dtype=dtype, **kwds) + for dtype in dtypes.float16, dtypes.float32, dtypes.float64: for shape_dtype in shape_dtypes: for shape in (), (3,), (2, 5): - if shape_dtype is not None: - shape = constant_op.constant(shape, dtype=shape_dtype) - for dtype in dtypes.int32, dtypes.int64: - kwds = dict(minval=2, maxval=11111, dtype=dtype, shape=shape) - yield (functools.partial(stateless.stateless_random_uniform, **kwds), - functools.partial(random_ops.random_uniform, **kwds)) + for stateless_op, stateful_op, kwds in cases: + yield (functools.partial(wrap, stateless_op, dtype, shape, + shape_dtype, kwds), + functools.partial(wrap, stateful_op, dtype, shape, + shape_dtype, kwds)) - def _multinomial_cases(self): - num_samples = 10 - for logits_dtype in np.float16, np.float32, np.float64: - for output_dtype in dtypes.int32, dtypes.int64: - for logits in ([[0.1, 0.25, 0.5, 0.15]], [[0.5, 0.5], [0.8, 0.2], - [0.25, 0.75]]): - kwds = dict( + +def int_cases(shape_dtypes=(None,)): + def wrap(op, shape, shape_dtype, dtype, seed): + shape_ = (constant_op.constant(shape, dtype=shape_dtype) + if shape_dtype is not None else shape) + return op(seed=seed, shape=shape_, minval=2, maxval=11111, + dtype=dtype) + for shape_dtype in shape_dtypes: + for shape in (), (3,), (2, 5): + for dtype in dtypes.int32, dtypes.int64: + yield (functools.partial(wrap, stateless.stateless_random_uniform, + shape, shape_dtype, dtype), + functools.partial(wrap, random_ops.random_uniform, + shape, shape_dtype, dtype)) + + +def multinomial_cases(): + num_samples = 10 + def wrap(op, logits, logits_dtype, output_dtype, seed): + return op(seed=seed, logits=constant_op.constant(logits, dtype=logits_dtype), - num_samples=num_samples, - output_dtype=output_dtype) - yield (functools.partial(stateless.stateless_multinomial, **kwds), - functools.partial(random_ops.multinomial, **kwds)) + num_samples=num_samples, output_dtype=output_dtype) + for logits_dtype in np.float16, np.float32, np.float64: + for output_dtype in dtypes.int32, dtypes.int64: + for logits in ([[0.1, 0.25, 0.5, 0.15]], [[0.5, 0.5], [0.8, 0.2], + [0.25, 0.75]]): + yield (functools.partial(wrap, stateless.stateless_multinomial, logits, + logits_dtype, output_dtype), + functools.partial(wrap, random_ops.multinomial, logits, + logits_dtype, output_dtype)) - def _gamma_cases(self): - for dtype in np.float16, np.float32, np.float64: - for alpha in ([[.5, 1., 2.]], [[0.5, 0.5], [0.8, 0.2], [0.25, 0.75]]): - kwds = dict(alpha=constant_op.constant(alpha, dtype=dtype), dtype=dtype) - yield ( - functools.partial(stateless.stateless_random_gamma, - shape=(10,) + tuple(np.shape(alpha)), **kwds), - functools.partial(random_ops.random_gamma, shape=(10,), **kwds)) - def _poisson_cases(self): - for lam_dtype in np.float16, np.float32, np.float64, np.int32, np.int64: - for out_dtype in np.float16, np.float32, np.float64, np.int32, np.int64: - for lam in ([[5.5, 1., 2.]], [[7.5, 10.5], [3.8, 8.2], [1.25, 9.75]]): - kwds = dict( +def gamma_cases(): + def wrap(op, alpha, dtype, shape, seed): + return op(seed=seed, shape=shape, + alpha=constant_op.constant(alpha, dtype=dtype), dtype=dtype) + for dtype in np.float16, np.float32, np.float64: + for alpha in ([[.5, 1., 2.]], [[0.5, 0.5], [0.8, 0.2], [0.25, 0.75]]): + yield (functools.partial(wrap, stateless.stateless_random_gamma, alpha, + dtype, (10,) + tuple(np.shape(alpha))), + functools.partial(wrap, random_ops.random_gamma, alpha, + dtype, (10,))) + + +def poisson_cases(): + def wrap(op, lam, lam_dtype, out_dtype, shape, seed): + return op(seed=seed, shape=shape, lam=constant_op.constant(lam_dtype(lam), dtype=lam_dtype), dtype=out_dtype) - yield ( - functools.partial(stateless.stateless_random_poisson, - shape=(10,) + tuple(np.shape(lam)), - **kwds), - functools.partial(random_ops.random_poisson, shape=(10,), **kwds)) + for lam_dtype in np.float16, np.float32, np.float64, np.int32, np.int64: + for out_dtype in np.float16, np.float32, np.float64, np.int32, np.int64: + for lam in ([[5.5, 1., 2.]], [[7.5, 10.5], [3.8, 8.2], [1.25, 9.75]]): + yield (functools.partial(wrap, stateless.stateless_random_poisson, lam, + lam_dtype, out_dtype, + (10,) + tuple(np.shape(lam))), + functools.partial(wrap, random_ops.random_poisson, lam, + lam_dtype, out_dtype, (10,))) - @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') - def testMatchFloat(self): - self._test_match(self._float_cases()) - @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') - def testMatchInt(self): - self._test_match(self._int_cases()) +class StatelessOpsTest(test.TestCase, parameterized.TestCase): - @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') - def testMatchMultinomial(self): - self._test_match(self._multinomial_cases()) + def _test_match(self, case, seed): + # Stateless ops should be the same as stateful ops on the first call + # after seed scrambling. + key = 0x3ec8f720, 0x02461e29 + preseed = invert_philox(key, (seed[0], 0, seed[1], 0)).astype(np.uint64) + preseed = preseed[::2] | preseed[1::2] << 32 + random_seed.set_random_seed(seed[0]) + with test_util.use_gpu(): + stateless_op, stateful_op = case + if context.executing_eagerly(): + # Call set_random_seed in order to clear kernel cache, to prevent + # kernel reusing for the stateful op + random_seed.set_random_seed(seed[0]) + stateful = stateful_op(seed=seed[1]) + pure = stateless_op(seed=preseed) + self.assertAllEqual(stateful, pure) - @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') - def testMatchGamma(self): - self._test_match(self._gamma_cases()) + def _test_determinism(self, case, seed_type): + # Stateless values should be equal iff the seeds are equal (roughly) + seeds = [(x, y) for x in range(5) for y in range(5)] * 3 # pylint: disable=g-complex-comprehension + with self.test_session(use_gpu=True), test_util.use_gpu(): + stateless_op, _ = case + if context.executing_eagerly(): + values = [ + (seed, stateless_op(seed=constant_op.constant(seed, seed_type))) + for seed in seeds] + else: + # Have this branch because the above branch is too slow in graph + # mode + seed_t = array_ops.placeholder(seed_type, shape=[2]) + pure = stateless_op(seed=seed_t) + values = [ + (seed, pure.eval(feed_dict={seed_t: seed})) for seed in seeds + ] + for s0, v0 in values: + for s1, v1 in values: + self.assertEqual(s0 == s1, np.all(v0 == v1)) + @parameterized.named_parameters( + ('_%s_%s' % (case_id, seed_id), case, seed) # pylint: disable=g-complex-comprehension + for seed_id, seed in enumerate(SEEDS) + for case_id, case in enumerate(float_cases())) @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') - def testMatchPoisson(self): - self._test_match(self._poisson_cases()) + def testMatchFloat(self, case, seed): + self._test_match(case, seed) + @parameterized.named_parameters( + ('_%s_%s' % (case_id, seed_id), case, seed) # pylint: disable=g-complex-comprehension + for seed_id, seed in enumerate(SEEDS) + for case_id, case in enumerate(int_cases())) @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') - def testDeterminismFloat(self): - self._test_determinism( - self._float_cases(shape_dtypes=(dtypes.int32, dtypes.int64))) + def testMatchInt(self, case, seed): + self._test_match(case, seed) + @parameterized.named_parameters( + ('_%s_%s' % (case_id, seed_id), case, seed) # pylint: disable=g-complex-comprehension + for seed_id, seed in enumerate(SEEDS) + for case_id, case in enumerate(multinomial_cases())) @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') - def testDeterminismInt(self): - self._test_determinism( - self._int_cases(shape_dtypes=(dtypes.int32, dtypes.int64))) + def testMatchMultinomial(self, case, seed): + self._test_match(case, seed) + @parameterized.named_parameters( + ('_%s_%s' % (case_id, seed_id), case, seed) # pylint: disable=g-complex-comprehension + for seed_id, seed in enumerate(SEEDS) + for case_id, case in enumerate(gamma_cases())) @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') - def testDeterminismMultinomial(self): - self._test_determinism(self._multinomial_cases()) + def testMatchGamma(self, case, seed): + self._test_match(case, seed) + @parameterized.named_parameters( + ('_%s_%s' % (case_id, seed_id), case, seed) # pylint: disable=g-complex-comprehension + for seed_id, seed in enumerate(SEEDS) + for case_id, case in enumerate(poisson_cases())) @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') - def testDeterminismGamma(self): - self._test_determinism(self._gamma_cases()) + def testMatchPoisson(self, case, seed): + self._test_match(case, seed) + @parameterized.named_parameters( + ('_%s_%s' % (case_id, type_id), case, seed_type) # pylint: disable=g-complex-comprehension + for type_id, seed_type in enumerate(SEED_TYPES) + for case_id, case in enumerate(float_cases( + shape_dtypes=(dtypes.int32, dtypes.int64)))) @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') - def testDeterminismPoisson(self): - self._test_determinism(self._poisson_cases()) + def testDeterminismFloat(self, case, seed_type): + self._test_determinism(case, seed_type) + + @parameterized.named_parameters( + ('_%s_%s' % (case_id, type_id), case, seed_type) # pylint: disable=g-complex-comprehension + for type_id, seed_type in enumerate(SEED_TYPES) + for case_id, case in enumerate(int_cases( + shape_dtypes=(dtypes.int32, dtypes.int64)))) + @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') + def testDeterminismInt(self, case, seed_type): + self._test_determinism(case, seed_type) + + @parameterized.named_parameters( + ('_%s_%s' % (case_id, type_id), case, seed_type) # pylint: disable=g-complex-comprehension + for type_id, seed_type in enumerate(SEED_TYPES) + for case_id, case in enumerate(multinomial_cases())) + @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') + def testDeterminismMultinomial(self, case, seed_type): + self._test_determinism(case, seed_type) + + @parameterized.named_parameters( + ('_%s_%s' % (case_id, type_id), case, seed_type) # pylint: disable=g-complex-comprehension + for type_id, seed_type in enumerate(SEED_TYPES) + for case_id, case in enumerate(gamma_cases())) + @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') + def testDeterminismGamma(self, case, seed_type): + self._test_determinism(case, seed_type) + + @parameterized.named_parameters( + ('_%s_%s' % (case_id, type_id), case, seed_type) # pylint: disable=g-complex-comprehension + for type_id, seed_type in enumerate(SEED_TYPES) + for case_id, case in enumerate(poisson_cases())) + @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') + def testDeterminismPoisson(self, case, seed_type): + self._test_determinism(case, seed_type) def assertDTypeEqual(self, a, b): self.assertEqual(dtypes.as_dtype(a), dtypes.as_dtype(b)) From 595776085e1baadbf67ca62a81e6fdb54e61e4d9 Mon Sep 17 00:00:00 2001 From: Jiho Choi Date: Fri, 7 Aug 2020 11:11:46 -0700 Subject: [PATCH 2343/2522] Add helper to identify JAX op name and type. PiperOrigin-RevId: 325469642 Change-Id: I796eac9ab961385dd0b7b99670a3883780604d1e --- tensorflow/core/profiler/utils/tf_op_utils.cc | 13 +++++++++++-- tensorflow/core/profiler/utils/tf_op_utils.h | 3 +++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/profiler/utils/tf_op_utils.cc b/tensorflow/core/profiler/utils/tf_op_utils.cc index e58ccba445b..941676079b9 100644 --- a/tensorflow/core/profiler/utils/tf_op_utils.cc +++ b/tensorflow/core/profiler/utils/tf_op_utils.cc @@ -32,6 +32,7 @@ namespace { const absl::string_view kIterator = "Iterator"; const absl::string_view kSeparator = "::"; +constexpr char kNameScopeSeparator = '/'; } // namespace @@ -51,10 +52,17 @@ bool IsTfOpType(absl::string_view op_type) { } bool IsJaxOpType(absl::string_view op_type) { - static const LazyRE2 kJaxOpTypeRegEx = {"[a-z_]*"}; + static const LazyRE2 kJaxOpTypeRegEx = {"[a-z_][a-z_]*"}; return RE2::FullMatch(op_type, *kJaxOpTypeRegEx); } +bool IsJaxOpNameAndType(absl::string_view op_name, absl::string_view op_type) { + if (op_name.empty() || !IsJaxOpType(op_type)) return false; + std::vector split_result = + absl::StrSplit(op_name, kNameScopeSeparator); + return absl::StrContains(split_result.back(), op_type); +} + TfOp ParseTfOpFullname(absl::string_view tf_op_fullname) { // TF Op names have the format "name:type". TfOp tf_op = {Category::kUnknown, tf_op_fullname, kUnknownOp}; @@ -85,7 +93,8 @@ TfOp ParseTfOpFullname(absl::string_view tf_op_fullname) { } std::vector ParseTfNameScopes(const TfOp& tf_op) { - std::vector name_scopes = absl::StrSplit(tf_op.name, '/'); + std::vector name_scopes = + absl::StrSplit(tf_op.name, kNameScopeSeparator); // The last element is an op name not TF name scope. if (!name_scopes.empty()) name_scopes.pop_back(); return name_scopes; diff --git a/tensorflow/core/profiler/utils/tf_op_utils.h b/tensorflow/core/profiler/utils/tf_op_utils.h index f0668190a07..76e6256164b 100644 --- a/tensorflow/core/profiler/utils/tf_op_utils.h +++ b/tensorflow/core/profiler/utils/tf_op_utils.h @@ -104,6 +104,9 @@ bool IsTfOpType(absl::string_view op_type); // Returns true if the given string matches JAX pattern. bool IsJaxOpType(absl::string_view op_type); +// Returns true if the given strings match JAX pattern. +bool IsJaxOpNameAndType(absl::string_view op_name, absl::string_view op_type); + } // namespace profiler } // namespace tensorflow From dbb961df7fcaabb3c504e31b541b338d6360bec0 Mon Sep 17 00:00:00 2001 From: Jay Shi Date: Fri, 7 Aug 2020 11:36:44 -0700 Subject: [PATCH 2344/2522] [tf.data] Add more unit test to check the correctness of `disable_intra_op_parallelism` optimization. PiperOrigin-RevId: 325475258 Change-Id: I3a8480a15c0828add3fa37e7a7afe095a20a73e2 --- .../data/disable_intra_op_parallelism_test.cc | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/data/disable_intra_op_parallelism_test.cc b/tensorflow/core/grappler/optimizers/data/disable_intra_op_parallelism_test.cc index 76d6b46fb4e..b1c886594ec 100644 --- a/tensorflow/core/grappler/optimizers/data/disable_intra_op_parallelism_test.cc +++ b/tensorflow/core/grappler/optimizers/data/disable_intra_op_parallelism_test.cc @@ -70,10 +70,11 @@ TEST_P(IntraOpAlreadySetTest, IntraOpParallelism) { TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); EXPECT_EQ(output.node_size(), 6); EXPECT_TRUE(graph_utils::ContainsNodeWithOp(op, output)); - NodeDef test_node = output.node(graph_utils::FindGraphNodeWithOp(op, output)); - NodeDef test_val = output.node( - graph_utils::FindGraphNodeWithName(test_node.input(1), output)); - EXPECT_EQ(test_val.attr().at("value").tensor().int64_val(0), value); + NodeDef parallelism_node = + output.node(graph_utils::FindGraphNodeWithOp(op, output)); + NodeDef parallelism_val = output.node( + graph_utils::FindGraphNodeWithName(parallelism_node.input(1), output)); + EXPECT_EQ(parallelism_val.attr().at("value").tensor().int64_val(0), value); } INSTANTIATE_TEST_SUITE_P( @@ -105,11 +106,19 @@ TEST(IntraOpNotSetTest, IntraOpParallelism) { EXPECT_EQ(output.node_size(), 7); EXPECT_TRUE( graph_utils::ContainsNodeWithOp("MaxIntraOpParallelismDataset", output)); - NodeDef test_node = output.node( - graph_utils::FindGraphNodeWithOp("MaxIntraOpParallelismDataset", output)); - NodeDef test_val = output.node( - graph_utils::FindGraphNodeWithName(test_node.input(1), output)); - EXPECT_EQ(test_val.attr().at("value").tensor().int64_val(0), 1); + NodeDef sink_node = + output.node(graph_utils::FindGraphNodeWithName("Sink", output)); + EXPECT_EQ(sink_node.input_size(), 1); + NodeDef parallelism_node = output.node( + graph_utils::FindGraphNodeWithName(sink_node.input(0), output)); + EXPECT_EQ(parallelism_node.op(), "MaxIntraOpParallelismDataset"); + EXPECT_EQ(parallelism_node.input_size(), 2); + NodeDef range_node = output.node( + graph_utils::FindGraphNodeWithName(parallelism_node.input(0), output)); + EXPECT_EQ(range_node.name(), "range"); + NodeDef parallelism_val = output.node( + graph_utils::FindGraphNodeWithName(parallelism_node.input(1), output)); + EXPECT_EQ(parallelism_val.attr().at("value").tensor().int64_val(0), 1); } } // namespace From 2af416baef3c5dd9927950515ca357f5d28d483b Mon Sep 17 00:00:00 2001 From: Kaixi Hou Date: Fri, 7 Aug 2020 11:51:08 -0700 Subject: [PATCH 2345/2522] Fix a logic issue --- tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc index 6053f96ae08..9e3a09b5d79 100644 --- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc @@ -77,7 +77,7 @@ inline bool NumConvOnDeviceWithDataTypeOverThreshold( for (const auto& node : context.graph_view->GetNodes()) { const auto* node_def = node.node(); - if (!IsConv2D(*node_def) or !IsConv3D(*node_def)) { + if (!IsConv2D(*node_def) and !IsConv3D(*node_def)) { continue; } const string& device_name = From 49aae376828f823a5b0edc561b8cc7f0d48fb09f Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Fri, 7 Aug 2020 11:56:32 -0700 Subject: [PATCH 2346/2522] Changed signature of GenerateWorkGroupSizesAlignedToGrid to return void instead of Status. This function returned status::ok always. PiperOrigin-RevId: 325479195 Change-Id: Ib2d4e51265d3e226d60be7ae5313a8d5d9d82b2e --- .../delegates/gpu/cl/kernels/work_group_picking.cc | 4 ++-- .../lite/delegates/gpu/common/workgroup_selection.cc | 12 ++++++------ .../lite/delegates/gpu/common/workgroup_selection.h | 7 ++++--- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc index 3771a5b033a..e85e20761e3 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc @@ -84,8 +84,8 @@ absl::Status GetBestWorkGroupAlignedToGrid(const TuningParameters& params, max_wg_size.x = params.info->max_work_group_size_x; max_wg_size.y = params.info->max_work_group_size_y; max_wg_size.z = params.info->max_work_group_size_z; - RETURN_IF_ERROR(GenerateWorkGroupSizesAlignedToGrid( - grid, max_wg_size, kernel.GetMaxWorkGroupSize(), &work_groups)); + GenerateWorkGroupSizesAlignedToGrid( + grid, max_wg_size, kernel.GetMaxWorkGroupSize(), &work_groups); int best_work_group_index; RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex( kernel, *params.info, grid, work_groups, &best_work_group_index)); diff --git a/tensorflow/lite/delegates/gpu/common/workgroup_selection.cc b/tensorflow/lite/delegates/gpu/common/workgroup_selection.cc index 3abab71829f..5ae2a53f449 100644 --- a/tensorflow/lite/delegates/gpu/common/workgroup_selection.cc +++ b/tensorflow/lite/delegates/gpu/common/workgroup_selection.cc @@ -184,9 +184,10 @@ template std::vector GenerateWorkGroupSizes( WorkGroupSizeAlignment z_alignment); template -absl::Status GenerateWorkGroupSizesAlignedToGrid( - const T& grid, const T& max_work_group_size, - const int max_work_group_invocations, std::vector* work_groups) { +void GenerateWorkGroupSizesAlignedToGrid(const T& grid, + const T& max_work_group_size, + const int max_work_group_invocations, + std::vector* work_groups) { auto alignment = WorkGroupSizeAlignment::PRECISE; *work_groups = GenerateWorkGroupSizes( grid, /*min_work_group_total_size = */ 32, max_work_group_invocations, @@ -196,16 +197,15 @@ absl::Status GenerateWorkGroupSizesAlignedToGrid( AddCornerCases(grid, max_work_group_invocations, max_work_group_size, alignment, alignment, alignment, work_groups); } - return absl::OkStatus(); } // Specializations of GenerateWorkGroupSizesAlignedToGrid for int3 and uint3 -template absl::Status GenerateWorkGroupSizesAlignedToGrid( +template void GenerateWorkGroupSizesAlignedToGrid( const int3& grid, const int3& max_work_group_size, const int max_work_group_invocations, std::vector* work_groups); -template absl::Status GenerateWorkGroupSizesAlignedToGrid( +template void GenerateWorkGroupSizesAlignedToGrid( const uint3& grid, const uint3& max_work_group_size, const int max_work_group_invocations, std::vector* work_groups); diff --git a/tensorflow/lite/delegates/gpu/common/workgroup_selection.h b/tensorflow/lite/delegates/gpu/common/workgroup_selection.h index 75967cb04df..a08bfce991a 100644 --- a/tensorflow/lite/delegates/gpu/common/workgroup_selection.h +++ b/tensorflow/lite/delegates/gpu/common/workgroup_selection.h @@ -42,9 +42,10 @@ std::vector GenerateWorkGroupSizes( WorkGroupSizeAlignment y_alignment, WorkGroupSizeAlignment z_alignment); template -absl::Status GenerateWorkGroupSizesAlignedToGrid( - const T& grid, const T& max_work_group_size, - const int max_work_group_invocations, std::vector* work_groups); +void GenerateWorkGroupSizesAlignedToGrid(const T& grid, + const T& max_work_group_size, + const int max_work_group_invocations, + std::vector* work_groups); } // namespace gpu } // namespace tflite From f037d18d239a0d6c78b62c43c5b894beb8b980eb Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 7 Aug 2020 12:03:42 -0700 Subject: [PATCH 2347/2522] Fix the missing symbol Windows breakage. PiperOrigin-RevId: 325480645 Change-Id: I6bd669446b2c74ad7f4bdb5eb4813c0ad0fecba8 --- tensorflow/tools/def_file_filter/symbols_pybind.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/tools/def_file_filter/symbols_pybind.txt b/tensorflow/tools/def_file_filter/symbols_pybind.txt index 55db32e8d6f..b2546582418 100644 --- a/tensorflow/tools/def_file_filter/symbols_pybind.txt +++ b/tensorflow/tools/def_file_filter/symbols_pybind.txt @@ -20,6 +20,7 @@ tensorflow::swig::AssertSameStructureForData tensorflow::swig::RegisterPyObject tensorflow::swig::RegisterType tensorflow::swig::IsEagerTensorSlow +tensorflow::swig::GetRegisteredPyObject [util_port] # util_port tensorflow::IsGoogleCudaEnabled From 60dd10d03fb74d285dd7b3cad0acf350d542b620 Mon Sep 17 00:00:00 2001 From: Jonathan Chu Date: Fri, 7 Aug 2020 19:18:50 +0000 Subject: [PATCH 2348/2522] Remove TODO --- tensorflow/python/eager/def_function.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py index 8447245b524..a333c5e468b 100644 --- a/tensorflow/python/eager/def_function.py +++ b/tensorflow/python/eager/def_function.py @@ -917,7 +917,6 @@ class Function(object): canon_args, canon_kwds, flat_args, flat_kwds = \ self._stateful_fn._function_spec.canonicalize_function_inputs( # pylint: disable=protected-access *args, **kwds) - # TODO(jlchu): verify that modification to fn_with_cond works return function_lib.defun(fn_with_cond)(canon_args, canon_kwds, flat_args, flat_kwds) From 486ee6c6c6f037e63a5aa69057b5986d7a8ce145 Mon Sep 17 00:00:00 2001 From: Robert David Date: Fri, 7 Aug 2020 12:33:22 -0700 Subject: [PATCH 2349/2522] Move IsActivationSupported and MaybeFuseActivation to model_builder_helper.h/.cc PiperOrigin-RevId: 325486463 Change-Id: I25d035ffecffe2514f15a5f34ecfc16cc5741e4a --- tensorflow/lite/delegates/gpu/common/BUILD | 3 +- .../delegates/gpu/common/model_builder.cc | 97 +------------------ .../gpu/common/model_builder_helper.cc | 90 +++++++++++++++++ .../gpu/common/model_builder_helper.h | 12 +++ 4 files changed, 109 insertions(+), 93 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/common/BUILD b/tensorflow/lite/delegates/gpu/common/BUILD index 3caee09ca7e..60a0fda422c 100644 --- a/tensorflow/lite/delegates/gpu/common/BUILD +++ b/tensorflow/lite/delegates/gpu/common/BUILD @@ -109,6 +109,7 @@ cc_library( ":data_type", ":model", ":model_builder_helper", + ":model_transformer", ":object_reader", ":operations", ":shape", @@ -125,7 +126,6 @@ cc_library( "//tensorflow/lite/kernels:kernel_util", "//tensorflow/lite/kernels/internal:reference_base", "//tensorflow/lite/kernels/internal:tensor", - "//tensorflow/lite/schema:schema_fbs", ] + tf_platform_alias("custom_parsers", "//tensorflow/lite/delegates/gpu/common/"), ) @@ -148,6 +148,7 @@ cc_library( deps = [ ":data_type", ":model", + ":operations", ":shape", ":status", ":tensor", diff --git a/tensorflow/lite/delegates/gpu/common/model_builder.cc b/tensorflow/lite/delegates/gpu/common/model_builder.cc index 84622cdc294..426d4d2436a 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder.cc @@ -16,29 +16,29 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/common/model_builder.h" #include +#include #include -#include -#include +#include #include +#include #include #include #include +#include #include #include "absl/container/flat_hash_map.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "absl/strings/string_view.h" -#include "tensorflow/lite/builtin_op_data.h" #include "tensorflow/lite/builtin_ops.h" #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/context.h" -#include "tensorflow/lite/context_util.h" #include "tensorflow/lite/delegates/gpu/common/custom_parsers.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" #include "tensorflow/lite/delegates/gpu/common/model.h" #include "tensorflow/lite/delegates/gpu/common/model_builder_helper.h" +#include "tensorflow/lite/delegates/gpu/common/model_transformer.h" #include "tensorflow/lite/delegates/gpu/common/object_reader.h" #include "tensorflow/lite/delegates/gpu/common/operations.h" #include "tensorflow/lite/delegates/gpu/common/shape.h" @@ -49,34 +49,12 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/dequantize.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/schema/schema_generated.h" #include "tensorflow/lite/util.h" namespace tflite { namespace gpu { namespace { -// Creates a node that consumes output from the given node. Because output need -// to stay the same, newly created node will inherit the output from the given -// node, which will in turn get newly created copy of output. This is necessary -// to preserve reference consistency if another node was pointing at that -// output: -// node(output) -// will turn into: -// node(copy(output)) <- passthrough_node(output) -absl::Status NewPassthroughNode(GraphFloat32* graph, Node* node, - const Value* output, Node** passthru_node) { - *passthru_node = graph->NewNode(); - // Make copies for every output in the original node. - RETURN_IF_ERROR(graph->SetProducer((*passthru_node)->id, output->id)); - Value* copy_output = graph->NewValue(); - RETURN_IF_ERROR(graph->SetProducer(node->id, copy_output->id)); - RETURN_IF_ERROR(graph->AddConsumer((*passthru_node)->id, copy_output->id)); - copy_output->tensor = output->tensor; - copy_output->tensor.ref = -1; - return absl::OkStatus(); -} - absl::Status CheckTensorIsAvailable(const TfLiteContext* context, const TfLiteNode* tflite_node, int idx) { // If tensor id is in range, it's guaranteed that it'll be available. @@ -105,71 +83,6 @@ class TFLiteOperationParser { const TfLiteRegistration* registration) = 0; }; -absl::Status IsActivationSupported(TfLiteFusedActivation fused_activation) { - switch (fused_activation) { - case kTfLiteActNone: - case kTfLiteActRelu: - case kTfLiteActReluN1To1: - case kTfLiteActRelu6: - case kTfLiteActTanh: - case kTfLiteActSigmoid: - return absl::OkStatus(); - case kTfLiteActSignBit: - return absl::UnimplementedError( - "TfLiteFusedActivation.kTfLiteActSignBit"); - - // Do not add default; we want compilation error rather than run-time - // error. - } -} - -// If there is fused activation present, then there will be another node created -// that will have identical output as the given node. New operation node will -// depend on the given node output. -absl::Status MaybeFuseActivation(TfLiteFusedActivation fused_activation, - GraphFloat32* graph, Node* node) { - const auto outputs = graph->FindOutputs(node->id); - if (outputs.size() != 1) { - return absl::InternalError("Number of outputs != 1"); - } - switch (fused_activation) { - case kTfLiteActNone: - // Nothing to do here - return absl::OkStatus(); - case kTfLiteActRelu: - case kTfLiteActReluN1To1: - case kTfLiteActRelu6: { - ReLUAttributes attr; - attr.clip = fused_activation == kTfLiteActRelu - ? 0.0f - : (fused_activation == kTfLiteActReluN1To1 ? 1.0f : 6.0f); - Node* activation_node; - RETURN_IF_ERROR( - NewPassthroughNode(graph, node, outputs[0], &activation_node)); - activation_node->operation.type = ToString(OperationType::RELU); - activation_node->operation.attributes = attr; - return absl::OkStatus(); - } - case kTfLiteActTanh: { - Node* activation_node; - RETURN_IF_ERROR( - NewPassthroughNode(graph, node, outputs[0], &activation_node)); - activation_node->operation.type = ToString(OperationType::TANH); - return absl::OkStatus(); - } - case kTfLiteActSigmoid: { - Node* activation_node; - RETURN_IF_ERROR( - NewPassthroughNode(graph, node, outputs[0], &activation_node)); - activation_node->operation.type = ToString(OperationType::SIGMOID); - return absl::OkStatus(); - } break; - default: - return absl::NotFoundError( - absl::StrCat("Unsupported fused activation: ", fused_activation)); - } -} - HW ToHW(int32_t h, int32_t w) { return HW(h > 0 ? h : 1, w > 0 ? w : 1); } template diff --git a/tensorflow/lite/delegates/gpu/common/model_builder_helper.cc b/tensorflow/lite/delegates/gpu/common/model_builder_helper.cc index 453e33ec916..b030fb7e700 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder_helper.cc +++ b/tensorflow/lite/delegates/gpu/common/model_builder_helper.cc @@ -25,12 +25,37 @@ limitations under the License. #include "tensorflow/lite/context.h" #include "tensorflow/lite/context_util.h" #include "tensorflow/lite/delegates/gpu/common/model.h" +#include "tensorflow/lite/delegates/gpu/common/operations.h" #include "tensorflow/lite/delegates/gpu/common/status.h" #include "tensorflow/lite/delegates/utils.h" #include "tensorflow/lite/kernels/kernel_util.h" namespace tflite { namespace gpu { +namespace { + +// Creates a node that consumes output from the given node. Because output need +// to stay the same, newly created node will inherit the output from the given +// node, which will in turn get newly created copy of output. This is necessary +// to preserve reference consistency if another node was pointing at that +// output: +// node(output) +// will turn into: +// node(copy(output)) <- passthrough_node(output) +absl::Status NewPassthroughNode(GraphFloat32* graph, Node* node, + const Value* output, Node** passthru_node) { + *passthru_node = graph->NewNode(); + // Make copies for every output in the original node. + RETURN_IF_ERROR(graph->SetProducer((*passthru_node)->id, output->id)); + Value* copy_output = graph->NewValue(); + RETURN_IF_ERROR(graph->SetProducer(node->id, copy_output->id)); + RETURN_IF_ERROR(graph->AddConsumer((*passthru_node)->id, copy_output->id)); + copy_output->tensor = output->tensor; + copy_output->tensor.ref = -1; + return absl::OkStatus(); +} + +} // namespace absl::Status GetNodeAndRegistration(TfLiteContext* context, int node_id, TfLiteNode** tflite_node, @@ -307,5 +332,70 @@ absl::Status SetAllDimensions(const TfLiteIntArray* dimensions, BHWC* shape) { return absl::OkStatus(); } +absl::Status IsActivationSupported(TfLiteFusedActivation fused_activation) { + switch (fused_activation) { + case kTfLiteActNone: + case kTfLiteActRelu: + case kTfLiteActReluN1To1: + case kTfLiteActRelu6: + case kTfLiteActTanh: + case kTfLiteActSigmoid: + return absl::OkStatus(); + case kTfLiteActSignBit: + return absl::UnimplementedError( + "TfLiteFusedActivation.kTfLiteActSignBit"); + + // Do not add default; we want compilation error rather than run-time + // error. + } +} + +// If there is fused activation present, then there will be another node created +// that will have identical output as the given node. New operation node will +// depend on the given node output. +absl::Status MaybeFuseActivation(TfLiteFusedActivation fused_activation, + GraphFloat32* graph, Node* node) { + const auto outputs = graph->FindOutputs(node->id); + if (outputs.size() != 1) { + return absl::InternalError("Number of outputs != 1"); + } + switch (fused_activation) { + case kTfLiteActNone: + // Nothing to do here + return absl::OkStatus(); + case kTfLiteActRelu: + case kTfLiteActReluN1To1: + case kTfLiteActRelu6: { + ReLUAttributes attr; + attr.clip = fused_activation == kTfLiteActRelu + ? 0.0f + : (fused_activation == kTfLiteActReluN1To1 ? 1.0f : 6.0f); + Node* activation_node; + RETURN_IF_ERROR( + NewPassthroughNode(graph, node, outputs[0], &activation_node)); + activation_node->operation.type = ToString(OperationType::RELU); + activation_node->operation.attributes = attr; + return absl::OkStatus(); + } + case kTfLiteActTanh: { + Node* activation_node; + RETURN_IF_ERROR( + NewPassthroughNode(graph, node, outputs[0], &activation_node)); + activation_node->operation.type = ToString(OperationType::TANH); + return absl::OkStatus(); + } + case kTfLiteActSigmoid: { + Node* activation_node; + RETURN_IF_ERROR( + NewPassthroughNode(graph, node, outputs[0], &activation_node)); + activation_node->operation.type = ToString(OperationType::SIGMOID); + return absl::OkStatus(); + } break; + default: + return absl::NotFoundError( + absl::StrCat("Unsupported fused activation: ", fused_activation)); + } +} + } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/common/model_builder_helper.h b/tensorflow/lite/delegates/gpu/common/model_builder_helper.h index 064c42ae9ed..849ef049683 100644 --- a/tensorflow/lite/delegates/gpu/common/model_builder_helper.h +++ b/tensorflow/lite/delegates/gpu/common/model_builder_helper.h @@ -16,6 +16,10 @@ limitations under the License. #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MODEL_BUILDER_HELPER_H_ #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MODEL_BUILDER_HELPER_H_ +#include + +#include "absl/strings/str_cat.h" +#include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/delegates/gpu/common/data_type.h" #include "tensorflow/lite/delegates/gpu/common/model.h" @@ -118,6 +122,14 @@ absl::Status SetAllDimensions(const TfLiteIntArray* dimensions, OHWI* shape); absl::Status SetAllDimensions(const TfLiteIntArray* dimensions, BHWC* shape); +absl::Status IsActivationSupported(TfLiteFusedActivation fused_activation); + +// If there is fused activation present, then there will be another node created +// that will have identical output as the given node. New operation node will +// depend on the given node output. +absl::Status MaybeFuseActivation(TfLiteFusedActivation fused_activation, + GraphFloat32* graph, Node* node); + } // namespace gpu } // namespace tflite From a947442bbbf2f32d499d2e14052ec82ed9a55dc2 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Fri, 7 Aug 2020 12:39:22 -0700 Subject: [PATCH 2350/2522] Removed virtual for Compile, so as method generic. PiperOrigin-RevId: 325487679 Change-Id: Ie4ac95290e4e77e763e49cbb08eab958d753b74b --- tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h index 620883f26f4..6fc9a47f075 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h @@ -98,7 +98,7 @@ class GPUOperation { return GetBestWorkGroup(params, kernel_, grid_size_, &work_group_size_); } - virtual absl::Status Compile(const CreationContext& creation_context); + absl::Status Compile(const CreationContext& creation_context); virtual absl::Status PostCompileCheck(const DeviceInfo& device_info) { return absl::OkStatus(); From 51ecfb3061d981bc8b9530e25f7323029029710c Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Fri, 7 Aug 2020 12:45:04 -0700 Subject: [PATCH 2351/2522] Properly inherit closure types in local functions. Add partial support for resolving local functions based on their type annotations. Propagate types into Expr nodes (although these are roots in expression trees). PiperOrigin-RevId: 325488762 Change-Id: Id1754d65bf15b47ca0ef991959d6491c7ebdc118 --- tensorflow/python/autograph/pyct/anno.py | 4 + .../pyct/static_analysis/type_inference.py | 101 ++++++++++++++---- .../static_analysis/type_inference_test.py | 52 ++++++++- 3 files changed, 134 insertions(+), 23 deletions(-) diff --git a/tensorflow/python/autograph/pyct/anno.py b/tensorflow/python/autograph/pyct/anno.py index 90535ffd903..3abee325084 100644 --- a/tensorflow/python/autograph/pyct/anno.py +++ b/tensorflow/python/autograph/pyct/anno.py @@ -35,10 +35,14 @@ import gast class NoValue(enum.Enum): + """Base class for different types of AST annotations.""" def of(self, node, default=None): return getanno(node, self, default=default) + def add_to(self, node, value): + setanno(node, self, value) + def exists(self, node): return hasanno(node, self) diff --git a/tensorflow/python/autograph/pyct/static_analysis/type_inference.py b/tensorflow/python/autograph/pyct/static_analysis/type_inference.py index 755b6c32c64..a5ed40a1e53 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/type_inference.py +++ b/tensorflow/python/autograph/pyct/static_analysis/type_inference.py @@ -31,7 +31,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from typing import Tuple +from typing import Any, Callable, Tuple import gast @@ -187,16 +187,13 @@ class StmtInferrer(gast.NodeVisitor): def visit(self, node): types = super().visit(node) + if __debug__: + self._check_set(types) if types is not None: # TODO(mdan): Normalize by removing subtypes. anno.setanno(node, anno.Static.TYPES, tuple(types)) return types - def visit_FunctionDef(self, node): - # Skip local function definitions. They are analyzed separately. - # TODO(mdan): Don't skip. Analyze side effects instead. - return None - def _check_set(self, value): if value is not None and not isinstance(value, set): raise ValueError('{} method expected to return set, got {}'.format( @@ -300,21 +297,73 @@ class StmtInferrer(gast.NodeVisitor): return types + def visit_FunctionDef(self, node): + f_name = qual_names.QN(node.name) + + if node.decorator_list: + raise NotImplementedError('decorators: {}'.format(node.decorator_list)) + + # TODO(mdan): Use args. + + ret_types = None + if node.returns: + ret_types, _ = self.resolver.res_name( + self.namespace, self.types_in.types, anno.Basic.QN.of(node.returns)) + if __debug__: + self._check_set(ret_types) + + if ret_types is None: + ret_types = {Any} + + fn_types = set() + for rt in ret_types: + fn_types.add(Callable[[Any], rt]) + + self.new_symbols[f_name] = fn_types + # The definition of a function is an expression, hence has no return value. + return None + + def _resolve_typed_callable(self, fn_types, arg_types, keyword_types): + ret_types = set() + for t in fn_types: + + if isinstance(t, Callable): + # Note: these are undocummented - may be version-specific! + # Callable[[x], y]: __args__ are (x, y) + args = t.__args__ + if args: + ret_types.add(args[-1]) + else: + ret_types.add(Any) + else: + raise NotImplementedError('callable type {}'.format(type(t))) + + # Side effects can not be inferred based on type alone. + side_effects = None + return ret_types, side_effects + def visit_Call(self, node): self.visit(node.func) - f_name = anno.getanno(node.func, anno.Basic.QN) - if f_name in self.scope.bound: - # Don't attempt external resolution of local functions. - # TODO(mdan): Use type annotations of the local definition. - return None - + f_name = anno.Basic.QN.of(node.func) arg_types = [self.visit(a) for a in node.args] keyword_types = [self.visit(kw.value) for kw in node.keywords] - ret_type, side_effects = self.resolver.res_call(self.namespace, - self.types_in.types, node, - arg_types, keyword_types) + if f_name in self.scope.bound: + # Local function, use local type definitions, if available. + fn_type = self.types_in.types.get(f_name, None) + if fn_type is None: + # No static type info available, nothing more to do. + ret_type, side_effects = None, None + else: + ret_type, side_effects = self._resolve_typed_callable( + self.types_in.types.get(f_name), arg_types, keyword_types) + + else: + # Nonlocal function, resolve externally. + ret_type, side_effects = self.resolver.res_call(self.namespace, + self.types_in.types, node, + arg_types, keyword_types) if __debug__: self._check_set(ret_type) if side_effects: @@ -330,6 +379,9 @@ class StmtInferrer(gast.NodeVisitor): self.new_symbols.update(side_effects) return ret_type + def visit_Expr(self, node): + return self.visit(node.value) + def visit_Index(self, node): return self.visit(node.value) @@ -406,15 +458,24 @@ class Analyzer(cfg.GraphVisitor): self.scope = scope self.closure_types = closure_types + context_types = { + n: t for n, t in closure_types.items() if n not in scope.bound + } + if context_types: + self.context_types = _SymbolTable() + self.context_types.types = context_types + else: + self.context_types = None + def init_state(self, _): return _SymbolTable() def _update_closure_types(self, ast_node, types): - existing_types = anno.getanno(ast_node, anno.Static.CLOSURE_TYPES, None) + existing_types = anno.Static.CLOSURE_TYPES.of(ast_node, None) if existing_types is None: existing_types = {} - anno.setanno(ast_node, anno.Static.CLOSURE_TYPES, existing_types) + anno.Static.CLOSURE_TYPES.add_to(ast_node, existing_types) for k, v in types.types.items(): if k in existing_types: @@ -428,6 +489,8 @@ class Analyzer(cfg.GraphVisitor): types_in = _SymbolTable() for n in node.prev: types_in |= self.out[n] + if (self.context_types is not None) and (node is self.graph.entry): + types_in |= self.context_types types_out = _SymbolTable(types_in) ast_node = node.ast_node @@ -437,8 +500,8 @@ class Analyzer(cfg.GraphVisitor): inferrer.visit(ast_node) types_out.types.update(inferrer.new_symbols) - reaching_fndefs = anno.getanno(ast_node, anno.Static.DEFINED_FNS_IN) - node_scope = anno.getanno(ast_node, anno.Static.SCOPE, None) + reaching_fndefs = anno.Static.DEFINED_FNS_IN.of(ast_node) + node_scope = anno.Static.SCOPE.of(ast_node, None) if node_scope is not None: # TODO(mdan): Check that it's actually safe to skip nodes without scope. reads = {str(qn) for qn in node_scope.read} diff --git a/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py b/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py index de71854d4fe..ae54cd98b25 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py +++ b/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from typing import Any, Callable + from tensorflow.python.autograph.pyct import anno from tensorflow.python.autograph.pyct import cfg from tensorflow.python.autograph.pyct import qual_names @@ -33,7 +35,10 @@ class BasicTestResolver(type_inference.Resolver): """A very basic resolver for testing.""" def res_name(self, ns, types_ns, name): - return {type(ns[str(name)])}, ns[str(name)] + str_name = str(name) + if str_name == 'int': + return {int}, int + return {type(ns[str_name])}, ns[str_name] def res_value(self, ns, value): return {type(value)} @@ -72,7 +77,9 @@ class TypeInferenceAnalyzerTest(test.TestCase): def assertClosureTypes(self, node, expected): actual = anno.getanno(node, anno.Static.CLOSURE_TYPES) actual = {str(k): v for k, v in actual.items()} - self.assertDictEqual(actual, expected) + for k, v in expected.items(): + self.assertIn(k, actual) + self.assertEqual(actual[k], v) def test_no_inference_on_unknown_operand_types(self): @@ -188,10 +195,11 @@ class TypeInferenceAnalyzerTest(test.TestCase): node, _ = TestTranspiler(Resolver).transform(test_fn, None) fn_body = node.body - self.assertTypes(fn_body[0].value, int) - self.assertTypes(fn_body[0].value.func, str) self.assertEqual( anno.getanno(fn_body[0].value.func, anno.Static.VALUE), tc.a) + self.assertTypes(fn_body[0].value.func, str) + self.assertTypes(fn_body[0].value, int) + self.assertTypes(fn_body[0], int) def test_assign_overwriting(self): @@ -463,6 +471,26 @@ class TypeInferenceAnalyzerTest(test.TestCase): self.assertTypes(fn_body[0].body[0].value, 'int') self.assertClosureTypes(fn_body[0], {'x': {'int'}}) + def test_local_function_closure_nested(self): + + def test_fn(x: int): + + def foo(): + + def bar(): + return x + + bar() + + foo() + + node, _ = TestTranspiler(BasicTestResolver).transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[0].body[0].body[0].value, 'int') + self.assertClosureTypes(fn_body[0], {'x': {'int'}}) + self.assertClosureTypes(fn_body[0].body[0], {'x': {'int'}}) + def test_local_function_closure_mutable_var(self): def test_fn(x: int): @@ -512,6 +540,22 @@ class TypeInferenceAnalyzerTest(test.TestCase): self.assertTypes(fn_body[1].targets[0], float) self.assertClosureTypes(fn_body[0], {'x': {float}}) + def test_local_function_type(self): + + def test_fn(x: int): + + def foo() -> int: + return x + + foo() + + node, _ = TestTranspiler(BasicTestResolver).transform(test_fn, None) + fn_body = node.body + + self.assertTypes(fn_body[1].value.func, Callable[[Any], int]) + self.assertTypes(fn_body[1].value, int) + self.assertTypes(fn_body[1], int) + def test_side_effects_on_arg_function_closure(self): test_self = self From cee1115ed5cd52a3c58fef0fdf21042e1195ff3a Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Fri, 7 Aug 2020 12:54:35 -0700 Subject: [PATCH 2352/2522] Internal change PiperOrigin-RevId: 325490761 Change-Id: Ice7ba20990a41d04271253e037002a379fc01335 --- .../gpu/cl/kernels/mean_stddev_normalization.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc index bf2ae33ec6d..ec775861da7 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.cc @@ -86,8 +86,11 @@ std::string MeanStdDevNormalization::GetNormalizationCode() { std::string c = GetCommonDefines(definition_.precision); c += GetVectorReduceCode(); c += GetReduceCode(work_group_size_.x, work_group_size_.y); - c += R"(__attribute__((reqd_work_group_size(128, 1, 1))) -__kernel void main_function( + c += "__attribute__((reqd_work_group_size(" + + std::to_string(work_group_size_.x) + ", " + + std::to_string(work_group_size_.y) + ", " + + std::to_string(work_group_size_.z) + ")))\n"; + c += R"(__kernel void main_function( $0) { #ifndef __opencl_c_work_group_collective_functions __local float tmp[)" + @@ -130,7 +133,7 @@ $0) { const float variance = sum_diff_sq / args.src_tensor.Channels(); const float stddev_inv = rsqrt(variance + 1.0e-8f); // Calculate (t-mean)/stddev for each element - for (int S = 0; S < args.src_tensor.Slices(); ++S) { + for (int S = get_local_id(0); S < args.src_tensor.Slices(); S += get_local_size(0)) { const float4 t = args.src_tensor.Read(0, 0, S, B); FLT4 result = TO_FLT4((t - mean) * stddev_inv); args.dst_tensor.Write(result, 0, 0, S, B); From 0a7a6220981cedb1cdaf858a563e73aeae90543b Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Fri, 7 Aug 2020 12:58:48 -0700 Subject: [PATCH 2353/2522] KernelInfo separated from kernel. GPUOperation::PostCompileCheck don't need CLKernel(API specific) now and uses KernelInfo(API neutral). PiperOrigin-RevId: 325491634 Change-Id: I6eb07614d7d2044dce34d61adaf63d606581fc21 --- tensorflow/lite/delegates/gpu/cl/cl_kernel.cc | 10 +-- tensorflow/lite/delegates/gpu/cl/cl_kernel.h | 12 +-- .../delegates/gpu/cl/kernels/gpu_operation.cc | 2 +- .../delegates/gpu/cl/kernels/gpu_operation.h | 3 +- .../lite/delegates/gpu/cl/kernels/winograd.cc | 4 +- .../gpu/cl/kernels/work_group_picking.cc | 78 +++++++++++-------- 6 files changed, 60 insertions(+), 49 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/cl_kernel.cc b/tensorflow/lite/delegates/gpu/cl/cl_kernel.cc index c498c14dfe8..7a8aaf6102f 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_kernel.cc +++ b/tensorflow/lite/delegates/gpu/cl/cl_kernel.cc @@ -58,8 +58,7 @@ absl::Status GetKernelPrivateMemorySize(cl_kernel kernel, } // namespace CLKernel::CLKernel(CLKernel&& kernel) - : private_memory_size_(kernel.private_memory_size_), - max_work_group_size_(kernel.max_work_group_size_), + : info_(kernel.info_), binding_counter_(kernel.binding_counter_), function_name_(std::move(kernel.function_name_)), program_(kernel.program_), @@ -70,8 +69,7 @@ CLKernel::CLKernel(CLKernel&& kernel) CLKernel& CLKernel::operator=(CLKernel&& kernel) { if (this != &kernel) { Release(); - std::swap(private_memory_size_, kernel.private_memory_size_); - std::swap(max_work_group_size_, kernel.max_work_group_size_); + std::swap(info_, kernel.info_); std::swap(binding_counter_, kernel.binding_counter_); function_name_ = std::move(kernel.function_name_); std::swap(program_, kernel.program_); @@ -119,9 +117,9 @@ absl::Status CLKernel::CreateFromProgram(const CLProgram& program, clRetainProgram(program_); RETURN_IF_ERROR(GetKernelPrivateMemorySize(kernel_, program.GetDeviceId(), - &private_memory_size_)); + &info_.private_memory_size)); RETURN_IF_ERROR(GetKernelMaxWorkGroupSize(kernel_, program.GetDeviceId(), - &max_work_group_size_)); + &info_.max_work_group_size)); return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/cl_kernel.h b/tensorflow/lite/delegates/gpu/cl/cl_kernel.h index 81a777ed822..0af8052f738 100644 --- a/tensorflow/lite/delegates/gpu/cl/cl_kernel.h +++ b/tensorflow/lite/delegates/gpu/cl/cl_kernel.h @@ -28,6 +28,11 @@ namespace tflite { namespace gpu { namespace cl { +struct KernelInfo { + int private_memory_size; + int max_work_group_size; +}; + // Arguments binding to CLKernel can be manual or automatic // In manual you specify binding index explicitly // In automatic binding, index auto-incremented with every binding call @@ -61,9 +66,6 @@ class CLKernel { return SetBytesAuto(static_cast(&value), sizeof(T)); } - int GetPrivateMemorySize() const { return private_memory_size_; } - int GetMaxWorkGroupSize() const { return max_work_group_size_; } - int GetBindingCounter() const { return binding_counter_; } void ResetBindingCounter() { binding_counter_ = 0; } @@ -71,13 +73,13 @@ class CLKernel { // workaround for Mali memory leak absl::Status ReInit() const; + KernelInfo info_; + private: void Release(); absl::Status SetBytes(int index, const void* ptr, int length) const; absl::Status SetBytesAuto(const void* ptr, int length); - int private_memory_size_; - int max_work_group_size_; int binding_counter_ = -1; std::string function_name_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc index 97c72c1269d..0aa1842791f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc @@ -244,7 +244,7 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) { code_, "main_function", compiler_options_, *creation_context.context, *creation_context.device, &kernel_)); } - return PostCompileCheck(creation_context.device->info_); + return PostCompileCheck(creation_context.device->info_, kernel_.info_); } int3 GPUOperation::GetGridSize() const { diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h index 6fc9a47f075..ba266f8dcc9 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h @@ -100,7 +100,8 @@ class GPUOperation { absl::Status Compile(const CreationContext& creation_context); - virtual absl::Status PostCompileCheck(const DeviceInfo& device_info) { + virtual absl::Status PostCompileCheck(const DeviceInfo& device_info, + const KernelInfo& kernel_info) { return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc index 698599a5bbd..c77c805a712 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc @@ -263,7 +263,7 @@ int3 Winograd4x4To36::SelectBestWorkGroup() { const std::vector wgs = {{8, 6, 4}, {8, 6, 2}, {4, 6, 2}, {4, 6, 2}, {2, 6, 2}, {2, 6, 1}, {1, 6, 1}, {1, 3, 1}, {1, 1, 1}}; - return GetFirstSuitableWorkGroup(wgs, kernel_.GetMaxWorkGroupSize()); + return GetFirstSuitableWorkGroup(wgs, kernel_.info_.max_work_group_size); } absl::Status Winograd4x4To36::BindArguments() { @@ -465,7 +465,7 @@ int3 Winograd36To4x4::SelectBestWorkGroup() { const std::vector wgs = {{32, 4, 2}, {16, 4, 2}, {16, 4, 1}, {8, 4, 1}, {4, 4, 1}, {2, 4, 1}, {1, 4, 1}, {1, 2, 1}, {1, 1, 1}}; - return GetFirstSuitableWorkGroup(wgs, kernel_.GetMaxWorkGroupSize()); + return GetFirstSuitableWorkGroup(wgs, kernel_.info_.max_work_group_size); } absl::Status Winograd36To4x4::BindArguments() { diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc index e85e20761e3..9a1a24895bf 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc @@ -34,21 +34,22 @@ std::vector Get2DWorkgroupsEqualTo128() { } std::vector GenerateWorkGroupSizesXY128( - int3 grid, int max_work_group_size, WorkGroupSizeAlignment z_alignment) { + int3 grid, const KernelInfo& kernel_info, + WorkGroupSizeAlignment z_alignment) { std::vector work_groups; work_groups.reserve(32); std::vector possible_z_sizes = GetPossibleSizes(grid.z, z_alignment); - for (int x = 1; x <= max_work_group_size; x *= 2) { - for (int y = 1; y <= max_work_group_size; y *= 2) { + for (int x = 1; x <= kernel_info.max_work_group_size; x *= 2) { + for (int y = 1; y <= kernel_info.max_work_group_size; y *= 2) { int work_group_size_xy = x * y; if (work_group_size_xy % 128 != 0 || - work_group_size_xy > max_work_group_size) { + work_group_size_xy > kernel_info.max_work_group_size) { continue; } for (auto z : possible_z_sizes) { - if (work_group_size_xy * z > max_work_group_size) { + if (work_group_size_xy * z > kernel_info.max_work_group_size) { continue; } work_groups.push_back({x, y, z}); @@ -59,15 +60,17 @@ std::vector GenerateWorkGroupSizesXY128( } std::vector GenerateWorkGroupSizesXY128Linear( - int3 grid, int max_work_group_size, WorkGroupSizeAlignment z_alignment) { + int3 grid, const KernelInfo& kernel_info, + WorkGroupSizeAlignment z_alignment) { std::vector work_groups; work_groups.reserve(32); std::vector possible_z_sizes = GetPossibleSizes(grid.z, z_alignment); - for (int x = 128; x <= max_work_group_size && x < grid.x + 128; x += 128) { + for (int x = 128; x <= kernel_info.max_work_group_size && x < grid.x + 128; + x += 128) { for (auto z : possible_z_sizes) { - if (x * z <= max_work_group_size) { + if (x * z <= kernel_info.max_work_group_size) { work_groups.push_back({x, 1, z}); } } @@ -75,22 +78,15 @@ std::vector GenerateWorkGroupSizesXY128Linear( return work_groups; } -absl::Status GetBestWorkGroupAlignedToGrid(const TuningParameters& params, - const CLKernel& kernel, - const int3& grid, - int3* best_work_group) { - std::vector work_groups; +void GetWorkGroupsAlignedToGrid(const DeviceInfo& device_info, + const KernelInfo& kernel_info, const int3& grid, + std::vector* work_groups) { int3 max_wg_size; - max_wg_size.x = params.info->max_work_group_size_x; - max_wg_size.y = params.info->max_work_group_size_y; - max_wg_size.z = params.info->max_work_group_size_z; + max_wg_size.x = device_info.max_work_group_size_x; + max_wg_size.y = device_info.max_work_group_size_y; + max_wg_size.z = device_info.max_work_group_size_z; GenerateWorkGroupSizesAlignedToGrid( - grid, max_wg_size, kernel.GetMaxWorkGroupSize(), &work_groups); - int best_work_group_index; - RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex( - kernel, *params.info, grid, work_groups, &best_work_group_index)); - *best_work_group = work_groups[best_work_group_index]; - return absl::OkStatus(); + grid, max_wg_size, kernel_info.max_work_group_size, work_groups); } int GetPenalty(int grid_size, int group_size) { @@ -210,8 +206,8 @@ absl::Status GetBestWorkGroupXY128(const TuningParameters& params, const CLKernel& kernel, const int3& grid, WorkGroupSizeAlignment z_alignment, int3* best_work_group) { - std::vector work_groups = GenerateWorkGroupSizesXY128( - grid, kernel.GetMaxWorkGroupSize(), z_alignment); + std::vector work_groups = + GenerateWorkGroupSizesXY128(grid, kernel.info_, z_alignment); int best_work_group_index; RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex( kernel, *params.info, grid, work_groups, &best_work_group_index)); @@ -224,8 +220,8 @@ absl::Status GetBestWorkGroupXY128Linear(const TuningParameters& params, const int3& grid, WorkGroupSizeAlignment z_alignment, int3* best_work_group) { - std::vector work_groups = GenerateWorkGroupSizesXY128Linear( - grid, kernel.GetMaxWorkGroupSize(), z_alignment); + std::vector work_groups = + GenerateWorkGroupSizesXY128Linear(grid, kernel.info_, z_alignment); int best_work_group_index; RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex( kernel, *params.info, grid, work_groups, &best_work_group_index)); @@ -254,11 +250,18 @@ absl::Status GetBestWorkGroup(const TuningParameters& params, int3* best_work_group) { switch (params.tuning_type) { case TuningType::FAST: - *best_work_group = GetWorkGroup(grid, kernel.GetMaxWorkGroupSize()); + *best_work_group = GetWorkGroup(grid, kernel.info_.max_work_group_size); return absl::OkStatus(); - case TuningType::EXHAUSTIVE: - return GetBestWorkGroupAlignedToGrid(params, kernel, grid, - best_work_group); + case TuningType::EXHAUSTIVE: { + std::vector work_groups; + GetWorkGroupsAlignedToGrid(*params.info, kernel.info_, grid, + &work_groups); + int best_work_group_index; + RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex( + kernel, *params.info, grid, work_groups, &best_work_group_index)); + *best_work_group = work_groups[best_work_group_index]; + return absl::OkStatus(); + } default: *best_work_group = {8, 4, 1}; return absl::OkStatus(); @@ -276,12 +279,19 @@ absl::Status GetBestWorkGroupConv(const TuningParameters& params, } max_z_size = std::min(max_z_size, params.info->max_work_group_size_z); *best_work_group = - GetWorkGroupConv(grid, kernel.GetMaxWorkGroupSize(), max_z_size); + GetWorkGroupConv(grid, kernel.info_.max_work_group_size, max_z_size); + return absl::OkStatus(); + } + case TuningType::EXHAUSTIVE: { + std::vector work_groups; + GetWorkGroupsAlignedToGrid(*params.info, kernel.info_, grid, + &work_groups); + int best_work_group_index; + RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex( + kernel, *params.info, grid, work_groups, &best_work_group_index)); + *best_work_group = work_groups[best_work_group_index]; return absl::OkStatus(); } - case TuningType::EXHAUSTIVE: - return GetBestWorkGroupAlignedToGrid(params, kernel, grid, - best_work_group); default: *best_work_group = {8, 4, 1}; return absl::OkStatus(); From 001ec7efbed18e9581e859513c5acc76e5aabbe9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 7 Aug 2020 13:14:43 -0700 Subject: [PATCH 2354/2522] Changes are excluded via copybara PiperOrigin-RevId: 325494931 Change-Id: I69c55142e00f691e6ce1b77557173e28022d4b83 --- tensorflow/g3doc/README.txt | 46 ------------------------------------- 1 file changed, 46 deletions(-) delete mode 100644 tensorflow/g3doc/README.txt diff --git a/tensorflow/g3doc/README.txt b/tensorflow/g3doc/README.txt deleted file mode 100644 index 515a9e9a025..00000000000 --- a/tensorflow/g3doc/README.txt +++ /dev/null @@ -1,46 +0,0 @@ -Docs have moved! If you just want to view TensorFlow documentation, -go to: - - https://www.tensorflow.org/ - -Documentation (on Github, tensorflow.org, and anywhere else we decide to -serve it from) is now generated from the files in -tensorflow/docs_src/ (for tutorials and other guides) and -TensorFlow source code (for the API reference pages). If you see a problem with -API reference, edit the code comments in the appropriate language. If you see a -problem with our other docs, edit the files in docs_src. - -To preview the results of your changes, or generate an offline copy of -the docs, run: - - bazel run -- tensorflow/tools/docs:generate \ - --src_dir=/path/to/tensorflow/docs_src/ \ - --output_dir=/tmp/tfdocs/ - -`src_dir` must be absolute path to documentation source. -When authoring docs, note that we have some new syntax for references -- -at least for docs coming from Python docstrings or -tensorflow/docs_src/. Use: - -* `tf.symbol` to make a link to the reference page for a Python - symbol. Note that class members don't get their own page, but the - syntax still works, since `tf.MyClass.method` links to the right - part of the tf.MyClass page. - -* `tensorflow::symbol` to make a link to the reference page for a C++ - symbol. (This only works for a few symbols but will work for more soon.) - -* @{$doc_page} to make a link to another (not an API reference) doc - page. To link to - - red/green/blue/index.md use @{$blue} or @{$green/blue}, - - foo/bar/baz.md use @{$baz} or @{$bar/baz}. - The shorter one is preferred, so we can move pages around without - breaking these references. The main exception is that the Python API - guides should probably be referred to using @{$python/} - to avoid ambiguity. To link to an anchor in that doc and use - different link text (by default it uses the title of the target - page) use: - @{$doc_page#anchor-tag$link-text} - (You can skip #anchor-tag if you just want to override the link text). - -Thanks! From 874db4d37159c8d67a1df5cc978dda2b783c6476 Mon Sep 17 00:00:00 2001 From: Cesar Crusius Date: Fri, 7 Aug 2020 13:15:49 -0700 Subject: [PATCH 2355/2522] Allows experimental loading of a model in C++ with unsupported features. PiperOrigin-RevId: 325495136 Change-Id: Ie6fd62d6826abeb6d8fa3291c666dba37fc4ca72 --- .../c/experimental/saved_model/core/tf_saved_model_api.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/c/experimental/saved_model/core/tf_saved_model_api.cc b/tensorflow/c/experimental/saved_model/core/tf_saved_model_api.cc index c22f8d86174..0f0102be857 100644 --- a/tensorflow/c/experimental/saved_model/core/tf_saved_model_api.cc +++ b/tensorflow/c/experimental/saved_model/core/tf_saved_model_api.cc @@ -47,6 +47,7 @@ limitations under the License. #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/platform/casts.h" #include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/path.h" #include "tensorflow/core/platform/stringpiece.h" @@ -241,8 +242,11 @@ Status RestoreCheckpoint(SavedModelV2Bundle* bundle, // TODO(bmzhao): This requires using the newly added Save/Restore // functions from // https://github.com/tensorflow/tensorflow/commit/df6b21c13c82b5d0981642cfe18f10e60f78ea5c - return errors::Unimplemented( - "Restoring non-variable objects has not been implemented yet. "); + LOG(WARNING) << "Restoring non-variable objects has not been " + "implemented yet. (Kind=" + << bundle->saved_object_graph().nodes(node).kind_case() + << ")"; + return Status::OK(); } Variable* variable = From bf6f488c1102a88252960c4c8b90f6122e966b14 Mon Sep 17 00:00:00 2001 From: Jared Duke Date: Fri, 7 Aug 2020 13:22:02 -0700 Subject: [PATCH 2356/2522] Mark Interpreter:UseNNAPI(bool) deprecated Prefer using the NnApiDelegate() API directly. The current API is unreliable and causes issues with the first inference execution. PiperOrigin-RevId: 325496322 Change-Id: I44c8fc04bcd08ce5b92e22cd170e075c0abbaecf --- RELEASE.md | 2 ++ tensorflow/lite/c/BUILD | 1 + tensorflow/lite/c/c_api.cc | 10 ++++++++-- .../gpu/common/testing/tflite_model_reader.cc | 1 - tensorflow/lite/interpreter.cc | 7 ++++++- tensorflow/lite/interpreter.h | 2 ++ 6 files changed, 19 insertions(+), 4 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index bb4d29f9020..f0c590710cf 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -111,6 +111,8 @@ string to be joined is empty. * `TFLiteConverter`: * Support optional flags `inference_input_type` and `inference_output_type` for full integer quantized models. This allows users to modify the model input and output type to integer types (`tf.int8`, `tf.uint8`) instead of defaulting to float type (`tf.float32`). + * Deprecate `Interpreter::UseNNAPI(bool)` C++ API + * Prefer using `NnApiDelegate()` and related delegate configuration methods directly. * * `tf.random`: * diff --git a/tensorflow/lite/c/BUILD b/tensorflow/lite/c/BUILD index bdf86d7904f..5ac6d7881ac 100644 --- a/tensorflow/lite/c/BUILD +++ b/tensorflow/lite/c/BUILD @@ -62,6 +62,7 @@ cc_library( "//tensorflow/lite:framework", "//tensorflow/lite:version", "//tensorflow/lite/core/api", + "//tensorflow/lite/delegates/nnapi:nnapi_delegate", "//tensorflow/lite/kernels:builtin_ops", ], alwayslink = 1, diff --git a/tensorflow/lite/c/c_api.cc b/tensorflow/lite/c/c_api.cc index aa93a10302c..4afd413ba9c 100644 --- a/tensorflow/lite/c/c_api.cc +++ b/tensorflow/lite/c/c_api.cc @@ -17,6 +17,7 @@ limitations under the License. #include #include "tensorflow/lite/c/c_api_internal.h" +#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h" #include "tensorflow/lite/error_reporter.h" #include "tensorflow/lite/interpreter.h" #include "tensorflow/lite/kernels/register.h" @@ -123,13 +124,18 @@ TfLiteInterpreter* TfLiteInterpreterCreate( } if (optional_options) { - interpreter->UseNNAPI(optional_options->use_nnapi); - if (optional_options->num_threads != TfLiteInterpreterOptions::kDefaultNumThreads) { interpreter->SetNumThreads(optional_options->num_threads); } + if (optional_options->use_nnapi) { + if (interpreter->ModifyGraphWithDelegate(tflite::NnApiDelegate()) != + kTfLiteOk) { + return nullptr; + } + } + for (auto* delegate : optional_options->delegates) { if (interpreter->ModifyGraphWithDelegate(delegate) != kTfLiteOk) { return nullptr; diff --git a/tensorflow/lite/delegates/gpu/common/testing/tflite_model_reader.cc b/tensorflow/lite/delegates/gpu/common/testing/tflite_model_reader.cc index 0faa621f72f..a67602cf245 100644 --- a/tensorflow/lite/delegates/gpu/common/testing/tflite_model_reader.cc +++ b/tensorflow/lite/delegates/gpu/common/testing/tflite_model_reader.cc @@ -79,7 +79,6 @@ absl::Status BuildFromFlatBuffer(const tflite::FlatBufferModel& flatbuffer, if (interpreter_builder(&interpreter) != kTfLiteOk || !interpreter) { return absl::InternalError("Unable to prepare TfLite interpreter."); } - interpreter->UseNNAPI(false); TfLiteDelegate delegate; delegate.data_ = graph; delegate.flags = kTfLiteDelegateFlagsNone; diff --git a/tensorflow/lite/interpreter.cc b/tensorflow/lite/interpreter.cc index 88dcb37898a..62de5896d84 100644 --- a/tensorflow/lite/interpreter.cc +++ b/tensorflow/lite/interpreter.cc @@ -300,7 +300,12 @@ TfLiteStatus Interpreter::SetExecutionPlan(const std::vector& new_plan) { return primary_subgraph().SetExecutionPlan(new_plan); } -void Interpreter::UseNNAPI(bool enable) { primary_subgraph().UseNNAPI(enable); } +void Interpreter::UseNNAPI(bool enable) { + TFLITE_LOG_PROD_ONCE(TFLITE_LOG_INFO, + "Interpreter::UseNNAPI() is deprecated. Use " + "tflite::NnApiDelegate() directly instead."); + primary_subgraph().UseNNAPI(enable); +} TfLiteStatus Interpreter::SetNumThreads(int num_threads) { if (num_threads < -1) { diff --git a/tensorflow/lite/interpreter.h b/tensorflow/lite/interpreter.h index 653283bc234..b9e2045cd96 100644 --- a/tensorflow/lite/interpreter.h +++ b/tensorflow/lite/interpreter.h @@ -365,6 +365,8 @@ class Interpreter { TfLiteStatus Invoke(); /// Enable or disable the NN API (true to enable) + /// NOTE: This API is deprecated, prefer using the NNAPI delegate directly. + /// This method will be removed in a future release. void UseNNAPI(bool enable); /// Set the number of threads available to the interpreter. From 7197362d5ae9a8b7461e93064ce646b40c1eb9e5 Mon Sep 17 00:00:00 2001 From: Jose Baiocchi Date: Fri, 7 Aug 2020 13:28:37 -0700 Subject: [PATCH 2357/2522] Avoid a copy in AnnotatedTraceMe PiperOrigin-RevId: 325497419 Change-Id: I2d6bee06b6037089ab388331b8942bcd9a96addc --- tensorflow/core/profiler/lib/annotated_traceme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/profiler/lib/annotated_traceme.h b/tensorflow/core/profiler/lib/annotated_traceme.h index 636b901e226..eb75a896107 100644 --- a/tensorflow/core/profiler/lib/annotated_traceme.h +++ b/tensorflow/core/profiler/lib/annotated_traceme.h @@ -43,7 +43,7 @@ class AnnotatedTraceMe { scoped_annotation_.emplace(absl::string_view(name)); } if (TF_PREDICT_TRUE(traceme_enabled)) { - trace_me_.emplace([name = std::move(name)] { return name; }, level); + trace_me_.emplace([&name] { return std::move(name); }, level); } } } From 247e9bd050e68fa4b055fe6c99144def3fde4e81 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Fri, 7 Aug 2020 13:32:59 -0700 Subject: [PATCH 2358/2522] Treat Case similar to If/While wrt stateless variant On import dedup to tf.Case and on export expand to either Case or StatelessCase depending on variant. Kept it mechanical to the other two control flow ops here. PiperOrigin-RevId: 325498204 Change-Id: Icf5f6f580510908d7dd7c043ac287b19862eaa02 --- .../mlir/tensorflow/ir/tf_generated_ops.td | 42 --- .../compiler/mlir/tensorflow/ir/tf_ops.td | 45 +++ .../tests/graphdef2mlir/case_op.pbtxt | 261 ++++++++++++++++++ .../tensorflow/tests/mlir2graphdef/case.mlir | 38 +++ .../mlir/tensorflow/translate/import_model.cc | 24 +- .../mlir/tensorflow/utils/export_utils.cc | 25 +- 6 files changed, 361 insertions(+), 74 deletions(-) create mode 100644 tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/case_op.pbtxt create mode 100644 tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/case.mlir diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index 081903d13cf..bf8d7015b46 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -1350,48 +1350,6 @@ then the output will be TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; } -def TF_CaseOp : TF_Op<"Case", []> { - let summary = [{ -An n-way switch statement which calls a single branch function. - }]; - - let description = [{ -An n-way switch statement, implementing the following: - ``` - switch (branch_index) { - case 0: - output = branches[0](input); - break; - case 1: - output = branches[1](input); - break; - ... - case [[nbranches-1]]: - default: - output = branches[nbranches-1](input); - break; - } - ``` - }]; - - let arguments = (ins - I32Tensor:$branch_index, - Variadic:$input, - - Confined]>:$branches, - DefaultValuedAttr:$output_shapes - ); - - let results = (outs - Variadic:$output - ); - - TF_DerivedOperandTypeListAttr Tin = TF_DerivedOperandTypeListAttr<1>; - TF_DerivedResultTypeListAttr Tout = TF_DerivedResultTypeListAttr<0>; - - let hasCanonicalizer = 1; -} - def TF_CastOp : TF_Op<"Cast", [NoSideEffect, SameOperandsAndResultShape]> { let summary = "Cast x of type SrcT to y of DstT."; diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td index 376b7933b47..5269bb82239 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td @@ -68,6 +68,51 @@ class TF_TensorListInitOp : TF_Op { }]; } +def TF_CaseOp : TF_Op<"Case", []> { + let summary = [{ +An n-way switch statement which calls a single branch function. + }]; + + let description = [{ +An n-way switch statement, implementing the following: + ``` + switch (branch_index) { + case 0: + output = branches[0](input); + break; + case 1: + output = branches[1](input); + break; + ... + case [[nbranches-1]]: + default: + output = branches[nbranches-1](input); + break; + } + ``` + }]; + + let arguments = (ins + I32Tensor:$branch_index, + Variadic:$input, + + Confined]>:$branches, + DefaultValuedAttr:$output_shapes, + + // Used to map StatelessCase and Case to a common op. + DefaultValuedAttr:$is_stateless + ); + + let results = (outs + Variadic:$output + ); + + TF_DerivedOperandTypeListAttr Tin = TF_DerivedOperandTypeListAttr<1>; + TF_DerivedResultTypeListAttr Tout = TF_DerivedResultTypeListAttr<0>; + + let hasCanonicalizer = 1; +} + // In MLIR, the TensorFlow tensor value is represented as an ElementsAttr, with // its type encoding the tensor's shape and data type. def TF_ConstOp : TF_Op<"Const", [ConstantLike, NoSideEffect, diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/case_op.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/case_op.pbtxt new file mode 100644 index 00000000000..1372ad71283 --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/case_op.pbtxt @@ -0,0 +1,261 @@ +# RUN: tf-mlir-translate -graphdef-to-splatted-mlir %s -o - | FileCheck %s + +node { + name: "Const" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "Const_1" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "indexed_case" + op: "StatelessCase" + input: "Const_1" + input: "Const" + attr { + key: "Tin" + value { + list { + type: DT_INT32 + } + } + } + attr { + key: "Tout" + value { + list { + type: DT_INT32 + } + } + } + attr { + key: "_lower_using_switch_merge" + value { + b: true + } + } + attr { + key: "_read_only_resource_inputs" + value { + list { + } + } + } + attr { + key: "branches" + value { + list { + func { + name: "indexed_case_branch0_4" + } + func { + name: "indexed_case_branch1_5" + } + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "indexed_case/Identity" + op: "Identity" + input: "indexed_case" + attr { + key: "T" + value { + type: DT_INT32 + } + } +} +library { + function { + signature { + name: "indexed_case_branch0_4" + input_arg { + name: "add_const" + type: DT_INT32 + } + output_arg { + name: "add" + type: DT_INT32 + } + } + node_def { + name: "add/y" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } + experimental_debug_info { + original_node_names: "add/y" + } + } + node_def { + name: "add_0" + op: "AddV2" + input: "add_const" + input: "add/y:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + experimental_debug_info { + original_node_names: "add" + } + } + ret { + key: "add" + value: "add_0:z:0" + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } + function { + signature { + name: "indexed_case_branch1_5" + input_arg { + name: "add_const" + type: DT_INT32 + } + output_arg { + name: "add" + type: DT_INT32 + } + } + node_def { + name: "add/y" + op: "Const" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } + experimental_debug_info { + original_node_names: "add/y" + } + } + node_def { + name: "add_0" + op: "AddV2" + input: "add_const" + input: "add/y:output:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + experimental_debug_info { + original_node_names: "add" + } + } + ret { + key: "add" + value: "add_0:z:0" + } + arg_attr { + key: 0 + value { + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + } + } +} +versions { + producer: 486 + min_consumer: 12 +} + +# CHECK: tf.Case +# CHECK-SAME: is_stateless diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/case.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/case.mlir new file mode 100644 index 00000000000..2f2ee6f1286 --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/case.mlir @@ -0,0 +1,38 @@ +// RUN: tf-mlir-translate -mlir-to-graphdef %s -o - | FileCheck %s + +module attributes {tf.versions = {bad_consumers = [], min_consumer = 12 : i32, producer = 486 : i32}} { + func @main() { + tf_executor.graph { + %outputs, %control = tf_executor.island wraps "tf.Const"() {device = "", value = dense<1> : tensor} : () -> tensor + %outputs_0, %control_1 = tf_executor.island wraps "tf.Const"() {device = "", value = dense<0> : tensor} : () -> tensor + %outputs_2, %control_3 = tf_executor.island wraps "tf.Case"(%outputs_0, %outputs) {Tin = [i32], Tout = [i32], _lower_using_switch_merge = true, _read_only_resource_inputs = [], branches = [@indexed_case_branch0_40, @indexed_case_branch1_50], device = "", is_stateless = true, output_shapes = [#tf.shape<>]} : (tensor, tensor) -> tensor<*xi32> loc("stateless_case") + %outputs_4, %control_5 = tf_executor.island wraps "tf.Identity"(%outputs_2) {device = ""} : (tensor<*xi32>) -> tensor<*xi32> + %outputs_6, %control_7 = tf_executor.island wraps "tf.Case"(%outputs_0, %outputs) {Tin = [i32], Tout = [i32], _lower_using_switch_merge = true, _read_only_resource_inputs = [], branches = [@indexed_case_branch0_40, @indexed_case_branch1_50], device = "", is_stateless = false, output_shapes = [#tf.shape<>]} : (tensor, tensor) -> tensor<*xi32> loc("regular_case") + tf_executor.fetch + } + return + } + + func @indexed_case_branch0_40(%arg0: tensor) -> tensor<*xi32> attributes {sym_visibility = "private"} { + %0 = tf_executor.graph { + %outputs, %control = tf_executor.island wraps "tf.Const"() {device = "", value = dense<1> : tensor} : () -> tensor + %outputs_0, %control_1 = tf_executor.island wraps "tf.AddV2"(%arg0, %outputs) {device = ""} : (tensor, tensor) -> tensor<*xi32> + tf_executor.fetch %outputs_0 : tensor<*xi32> + } + return %0 : tensor<*xi32> + } + + func @indexed_case_branch1_50(%arg0: tensor) -> tensor<*xi32> attributes {sym_visibility = "private"} { + %0 = tf_executor.graph { + %outputs, %control = tf_executor.island wraps "tf.Const"() {device = "", value = dense<2> : tensor} : () -> tensor + %outputs_0, %control_1 = tf_executor.island wraps "tf.AddV2"(%arg0, %outputs) {device = ""} : (tensor, tensor) -> tensor<*xi32> + tf_executor.fetch %outputs_0 : tensor<*xi32> + } + return %0 : tensor<*xi32> + } +} + +// CHECK: name: "stateless_case" +// CHECK-NEXT: "StatelessCase" +// CHECK: name: "regular_case" +// CHECK-NEXT: "Case" diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc index ef0087c4310..94ddf76736e 100644 --- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc +++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc @@ -1934,22 +1934,18 @@ Status ImporterBase::ConvertNode(const Node& node) { } } - // Map If and StatelessIf op in TensorFlow to the common If op in MLIR and add - // the differentiating attribute. - if (node.IsIfNode()) { - result.name = mlir::OperationName(get_full_op_name("If"), context_); - mlir::BoolAttr val = builder_.getBoolAttr(node_type_name == "StatelessIf"); + auto composite_control_flow_op = [&](const std::string& name) { + result.name = mlir::OperationName(get_full_op_name(name), context_); + bool stateless = absl::StartsWith(node_type_name, "Stateless"); + mlir::BoolAttr val = builder_.getBoolAttr(stateless); result.attributes.push_back(builder_.getNamedAttr("is_stateless", val)); - } + }; - // Map While and StatelessWhile op in TensorFlow to the common While op in - // MLIR and add the differentiating attribute. - if (node.IsWhileNode()) { - result.name = mlir::OperationName(get_full_op_name("While"), context_); - mlir::BoolAttr val = - builder_.getBoolAttr(node_type_name == "StatelessWhile"); - result.attributes.push_back(builder_.getNamedAttr("is_stateless", val)); - } + // Map Case/If/While and StatelessCase/If/While op in TensorFlow to the common + // Case/If/While op in MLIR and add the differentiating attribute. + if (node.IsCaseNode()) composite_control_flow_op("Case"); + if (node.IsIfNode()) composite_control_flow_op("If"); + if (node.IsWhileNode()) composite_control_flow_op("While"); // Register the mapping between the TF node and the newly created operation. node_values_[node.id()] = diff --git a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc index 0364b935b92..ad9ddb277d7 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc @@ -227,25 +227,13 @@ Status ConvertAttribute(const mlir::ArrayAttr& attr, AttrValue* value) { return Status::OK(); } -// Updates NodeDef constructed out of an MLIR If op to map it to either -// TensorFlow StatelessIf or If op depending on the additional attribute. -void UpdateCompositeIfOp(NodeDef* node_def) { +// Updates NodeDef constructed out of an MLIR Case/IfW/While op to map it to +// either TensorFlow StatelessX or X op depending on the additional attribute. +void UpdateCompositeOp(NodeDef* node_def) { auto it = node_def->mutable_attr()->find("is_stateless"); if (it != node_def->attr().end()) { if (it->second.b()) { - *node_def->mutable_op() = "StatelessIf"; - } - node_def->mutable_attr()->erase(it); - } -} - -// Updates NodeDef constructed out of an MLIR While op to map it to either -// TensorFlow StatelessWhile or While op depending on the additional attribute. -void UpdateCompositeWhileOp(NodeDef* node_def) { - auto it = node_def->mutable_attr()->find("is_stateless"); - if (it != node_def->attr().end()) { - if (it->second.b()) { - *node_def->mutable_op() = "StatelessWhile"; + *node_def->mutable_op() = "Stateless" + node_def->op(); } node_def->mutable_attr()->erase(it); } @@ -352,8 +340,9 @@ StatusOr> GetOperationNodeDef( TF_RETURN_IF_ERROR(ConvertLocation( inst->getLoc(), node_def->mutable_experimental_debug_info())); - if (node_def->op() == "If") UpdateCompositeIfOp(node_def.get()); - if (node_def->op() == "While") UpdateCompositeWhileOp(node_def.get()); + if (node_def->op() == "Case") UpdateCompositeOp(node_def.get()); + if (node_def->op() == "If") UpdateCompositeOp(node_def.get()); + if (node_def->op() == "While") UpdateCompositeOp(node_def.get()); return node_def; } From 5296ad4ffdb2c4ec6fb3a413e91591052d5f8684 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Fri, 7 Aug 2020 13:59:33 -0700 Subject: [PATCH 2359/2522] [Resubmit] If an input-output pair is configured to be must-alias(off by default), they must be aliased at runtime. PiperOrigin-RevId: 325503193 Change-Id: Ida4e46531052c40eebce5f0dff4c50914cc1f3f4 --- .../utils/compile_mlir_util_test.cc | 2 +- tensorflow/compiler/xla/client/xla_builder.cc | 2 +- tensorflow/compiler/xla/client/xla_builder.h | 17 ++++-- .../xla/service/cpu/cpu_executable.cc | 6 ++ .../xla/service/gpu/gpu_executable.cc | 6 ++ tensorflow/compiler/xla/service/hlo.proto | 14 ++++- .../service/hlo_input_output_alias_config.cc | 38 +++++++++--- .../service/hlo_input_output_alias_config.h | 32 +++++++--- tensorflow/compiler/xla/service/hlo_parser.cc | 59 +++++++++++-------- .../compiler/xla/service/hlo_parser_test.cc | 41 ++----------- .../xla/tests/buffer_donation_test.cc | 49 +++++++++++++-- .../tpu/tpu_executable_interface.cc | 18 ++++++ 12 files changed, 194 insertions(+), 90 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc index 6ebf6897bb1..8a07aab11e1 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc @@ -524,7 +524,7 @@ TEST(CompileGraphToXlaHlo, Resources) { ASSERT_TRUE(status_or_hlo_module.ok()); constexpr char expected_hlo_module_string[] = - R"(HloModule main.4, input_output_alias={ {0}: 1 } + R"(HloModule main.4, input_output_alias={ {0}: (1, {}, may_alias) } ENTRY %main.4 (Arg_0.1: f32[2], Arg_1.2: f32[2]) -> (f32[2]) { %Arg_1.2 = f32[2]{0} parameter(1) diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index 52f61408cbb..484fb0aabe7 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -446,7 +446,7 @@ StatusOr XlaBuilder::Build(int64 root_id, alias.param_index.ToString().c_str()); } TF_RETURN_IF_ERROR(config.SetUpAlias(alias.output_index, alias.param_number, - alias.param_index)); + alias.param_index, alias.kind)); } *module->mutable_input_output_alias() = config.ToProto(); return Status::OK(); diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h index 1960d0c4632..aa5074d28d9 100644 --- a/tensorflow/compiler/xla/client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_builder.h @@ -32,6 +32,7 @@ limitations under the License. #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/dynamic_parameter_binding.h" #include "tensorflow/compiler/xla/service/hlo.pb.h" +#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" @@ -349,12 +350,16 @@ class XlaBuilder { // not available until the computation is built, and eventual error in the // arguments of this API will be detected only at computation Build() time. // - // Note: Aliasing API is 'may-alias' and only donated buffer at runtime will - // be aliased with output. If a buffer is not donated at runtime, a copy will - // be inserted by XLA to prevent buffer clobbering. + // Note: Except when 'must-alias' is true, alias is assumed to be 'may-alias' + // and only donated buffer at runtime will be aliased with output. If a buffer + // is not donated at runtime, a copy will be inserted by XLA to prevent buffer + // clobbering. void SetUpAlias(const ShapeIndex& output_index, int64 param_number, - const ShapeIndex& param_index) { - input_output_aliases_.push_back({output_index, param_number, param_index}); + const ShapeIndex& param_index, + HloInputOutputAliasConfig::AliasKind kind = + HloInputOutputAliasConfig::AliasKind::kMayAlias) { + input_output_aliases_.push_back( + {output_index, param_number, param_index, kind}); } // Describes an input/output alias as inserted by the SetUpAlias() API. @@ -365,6 +370,8 @@ class XlaBuilder { int64 param_number; // Specifies the index of the aliased buffer in the parameter ShapeIndex param_index; + // Specifies if the alias is a must alias or may alias. + HloInputOutputAliasConfig::AliasKind kind; }; // Looks up the HloInstruction and sets the frontend attribute "attribute" to diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc index 0abcc91a1d7..7431e829b8e 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc @@ -247,6 +247,12 @@ StatusOr CpuExecutable::CreateResultShapedBuffer( ExecutionInput& input = arguments[alias->parameter_number]; MaybeOwningDeviceMemory* maybe_owning_memory = input.MutableBuffer(alias->parameter_index); + if (alias->must_alias() && !maybe_owning_memory->HasOwnership()) { + return InvalidArgument( + "An input was configured to be must-alias at " + "compile time but not donated at runtime: %s", + alias->ToString()); + } if (absl::optional owning = maybe_owning_memory->Release()) { // If the caller passes the ownership of the device memory, reuse it diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc index 469f2919fba..726f1963545 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc @@ -480,6 +480,12 @@ StatusOr GpuExecutable::ExecuteAsyncOnStream( ExecutionInput& input = arguments[alias->parameter_number]; MaybeOwningDeviceMemory* maybe_owning_memory = input.MutableBuffer(alias->parameter_index); + if (alias->must_alias() && !maybe_owning_memory->HasOwnership()) { + return InvalidArgument( + "An input was configured to be must-alias at " + "compile time but not donated at runtime: %s", + alias->ToString()); + } if (absl::optional owning = maybe_owning_memory->Release()) { // If the caller passes the ownership of the device memory, reuse it diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index 960f60fe882..e043216c17e 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -283,6 +283,16 @@ message HloScheduleProto { map sequences = 1; } +enum Kind { + // Define a UNDEFINED_ALIAS equal to zero to get around the default-0 proto3 + // behavior and missing has_*() APIs. + UNDEFINED_ALIAS = 0; + // The buffers may or may not alias at runtime. + MAY_ALIAS = 1; + // The buffers must alias at runtime. + MUST_ALIAS = 2; +} + message HloInputOutputAliasProto { // The following proto describes a pair of aliased an input // (described by parameter number and a ShapeIndex of the parameter) @@ -304,8 +314,8 @@ message HloInputOutputAliasProto { int64 parameter_number = 2; // ShapeIndex of the parameter instruction. repeated int64 parameter_shape_index = 3; - reserved 4; - reserved "kind"; + // The kind of alias to be setup. + Kind kind = 4; } repeated AliasEntryProto entries = 1; diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc index e123161720b..34bc30d641f 100644 --- a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc +++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" +#include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/compiler/xla/service/hlo_module.h" namespace xla { @@ -24,9 +25,10 @@ bool HloInputOutputAliasConfig::OutputHasAlias( return alias_.element(output_index).has_value(); } -Status HloInputOutputAliasConfig::SetUpAlias(const ShapeIndex& output_index, - int64 param_number, - const ShapeIndex& param_index) { +Status HloInputOutputAliasConfig::SetUpAlias( + const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& param_index, + HloInputOutputAliasConfig::AliasKind must_alias) { TF_RET_CHECK(ShapeUtil::IndexIsValid(alias_.shape(), output_index)) << "Trying to set up alias at " << output_index.ToString() << " which is an invalid index for shape " @@ -41,7 +43,8 @@ Status HloInputOutputAliasConfig::SetUpAlias(const ShapeIndex& output_index, param_number, param_index.ToString(), output_index.ToString(), alias_.element(output_index)->parameter_number, alias_.element(output_index)->parameter_index.ToString()); - (*alias_.mutable_element(output_index)) = Alias(param_number, param_index); + (*alias_.mutable_element(output_index)) = + Alias(param_number, param_index, must_alias); VLOG(4) << "Set up alias between output index " << output_index.ToString() << " and parameter " << param_index << " at index " << param_index.ToString(); @@ -61,6 +64,11 @@ HloInputOutputAliasProto HloInputOutputAliasConfig::ToProto() const { for (int64 i : data->parameter_index) { entry.add_parameter_shape_index(i); } + if (data->must_alias()) { + entry.set_kind(Kind::MUST_ALIAS); + } else { + entry.set_kind(Kind::MAY_ALIAS); + } result.add_entries()->Swap(&entry); } }); @@ -77,8 +85,9 @@ StatusOr HloInputOutputAliasConfig::CreateFromProto( int64 param_number = entry.parameter_number(); ShapeIndex param_index(entry.parameter_shape_index().begin(), entry.parameter_shape_index().end()); + AliasKind kind = entry.kind() == Kind::MAY_ALIAS ? kMayAlias : kMustAlias; TF_RETURN_IF_ERROR( - result.SetUpAlias(output_index, param_number, param_index)); + result.SetUpAlias(output_index, param_number, param_index, kind)); } return result; } @@ -93,9 +102,9 @@ string HloInputOutputAliasConfig::ToString() const { ForEachAlias([&](const ShapeIndex& output_index, const Alias& alias) { pieces.push_back(absl::StrFormat( - " OutputIndex %s is aliased with parameter %lld at %s:", - output_index.ToString(), alias.parameter_number, - alias.parameter_index.ToString())); + " OutputIndex %s is %saliased with parameter %lld at %s:", + output_index.ToString(), alias.kind == kMustAlias ? "must-" : "may-", + alias.parameter_number, alias.parameter_index.ToString())); }); return absl::StrJoin(pieces, "\n"); } @@ -112,6 +121,19 @@ string HloInputOutputAliasConfig::ToShortString() const { return absl::StrJoin(pieces, ", "); } +bool HloInputOutputAliasConfig::ParameterMustAlias( + int64 param_number, const ShapeIndex& param_index) const { + bool result = false; + alias_.ForEachElement( + [&](const xla::ShapeIndex&, absl::optional alias) { + if (alias && alias->parameter_number == param_number && + alias->parameter_index == param_index && alias->must_alias()) { + result = true; + } + }); + return result; +} + absl::optional HloInputOutputAliasConfig::GetAliasedOutput( int64 param_number, const ShapeIndex& param_index) const { absl::optional output; diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h index d5ca28e9387..6b84bdb6a68 100644 --- a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h +++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h @@ -32,22 +32,32 @@ class HloModule; // parameter index in the entry computation. class HloInputOutputAliasConfig { public: + // The kind of aliases which can be set. A kMayAlias is one setup at + // compilation time by the user, and has to be respected. A kMustAlias one + // might be setup by the compiler, if it decides it is convenient to do so. + enum AliasKind { + kMayAlias, + kMustAlias, + }; // Defines the alias information for a given output buffer. A given output // buffer shape index can refer only to one parameter+index. struct Alias { - Alias(int64 parameter_number, ShapeIndex parameter_index) + Alias(int64 parameter_number, ShapeIndex parameter_index, + AliasKind kind = kMayAlias) : parameter_number(parameter_number), - parameter_index(std::move(parameter_index)) {} + parameter_index(std::move(parameter_index)), + kind(kind) {} int64 parameter_number; ShapeIndex parameter_index; + AliasKind kind; + + bool must_alias() const { return kind == kMustAlias; } std::string ToString() { - if (parameter_index.empty()) { - return absl::StrCat(parameter_number); - } - return absl::StrFormat("(%lld, %s)", parameter_number, - parameter_index.ToString()); + return absl::StrFormat("(%lld, %s, %s)", parameter_number, + parameter_index.ToString(), + kind == kMustAlias ? "must_alias" : "may_alias"); } }; @@ -61,7 +71,8 @@ class HloInputOutputAliasConfig { // Sets up alias config from `output_index` to `param_index` at // `param_number`. Status SetUpAlias(const ShapeIndex& output_index, int64 param_number, - const ShapeIndex& param_index); + const ShapeIndex& param_index, + AliasKind must_alias = kMayAlias); // Returns true if the given parameter is aliased with one of the output // buffers. @@ -92,6 +103,11 @@ class HloInputOutputAliasConfig { absl::optional GetAliasedParameter( const ShapeIndex& output_index) const; + // Returns if the parameter at the given parameter number and parameter + // index must-alias with an output. + bool ParameterMustAlias(int64 param_number, + const ShapeIndex& param_index) const; + using AliasFn = std::function; diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index b12779e65ce..a093a9d0f52 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -552,33 +552,39 @@ bool HloParserImpl::ParseAliasing(AliasingData* data) { return false; } - if (lexer_.GetKind() != TokKind::kLparen) { - // Short form: "{0}: 0", output index "{}" is assumed. - int64 param_num; - ParseInt64(¶m_num); - data->emplace(std::piecewise_construct, std::forward_as_tuple(out), - std::forward_as_tuple(param_num, ShapeIndex{})); - } else { - // Long form: "{0}: (0, {0})", output index is explicitly specified. - if (!ParseToken(TokKind::kLparen, errmsg)) { - return false; - } - int64 param_num; - ParseInt64(¶m_num); - if (!ParseToken(TokKind::kComma, errmsg)) { - return false; - } - ShapeIndex param_idx; - if (!ParseShapeIndex(¶m_idx)) { - return false; - } - data->emplace(std::piecewise_construct, std::forward_as_tuple(out), - std::forward_as_tuple(param_num, param_idx)); - if (!ParseToken(TokKind::kRparen, errmsg)) { - return false; + if (!ParseToken(TokKind::kLparen, errmsg)) { + return false; + } + int64 param_num; + ParseInt64(¶m_num); + if (!ParseToken(TokKind::kComma, errmsg)) { + return false; + } + ShapeIndex param_idx; + if (!ParseShapeIndex(¶m_idx)) { + return false; + } + + HloInputOutputAliasConfig::AliasKind alias_kind = + HloInputOutputAliasConfig::kMayAlias; + if (EatIfPresent(TokKind::kComma)) { + std::string type; + ParseName(&type); + if (type == "must-alias") { + alias_kind = HloInputOutputAliasConfig::kMustAlias; + } else if (type == "may-alias") { + alias_kind = HloInputOutputAliasConfig::kMayAlias; + } else { + return TokenError("Unexpected aliasing kind; expected SYSTEM or USER"); } } + data->emplace(std::piecewise_construct, std::forward_as_tuple(out), + std::forward_as_tuple(param_num, param_idx, alias_kind)); + if (!ParseToken(TokKind::kRparen, errmsg)) { + return false; + } + if (!EatIfPresent(TokKind::kComma)) { break; } @@ -624,8 +630,9 @@ bool HloParserImpl::ParseHloModule(HloModule* module) { if (aliasing_data) { HloInputOutputAliasConfig alias_config(module->result_shape()); for (auto& p : *aliasing_data) { - Status st = alias_config.SetUpAlias(p.first, p.second.parameter_number, - p.second.parameter_index); + Status st = + alias_config.SetUpAlias(p.first, p.second.parameter_number, + p.second.parameter_index, p.second.kind); if (!st.ok()) { return TokenError(st.error_message()); } diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc index 1b33cf2f4c3..7880075dcbe 100644 --- a/tensorflow/compiler/xla/service/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc @@ -2399,7 +2399,7 @@ ENTRY c2 { TEST_F(HloParserTest, SimpleAliasing) { const string original = R"( -HloModule Module, input_output_alias={ {0}: (0, {0}), {1}: (0, {1}) } +HloModule Module, input_output_alias={ {0}: (0, {0}, must-alias), {1}: (0, {1}) } ENTRY entry { %p = (f32[], f32[]) parameter(0) @@ -2413,42 +2413,13 @@ ENTRY entry { std::unique_ptr parsed_module = module.ConsumeValueOrDie(); EXPECT_EQ(parsed_module->input_output_alias_config().GetAliasedOutput(0, {0}), ShapeIndex{0}); + + EXPECT_TRUE( + parsed_module->input_output_alias_config().ParameterMustAlias(0, {0})); EXPECT_EQ(parsed_module->input_output_alias_config().GetAliasedOutput(0, {1}), ShapeIndex{1}); -} - -TEST_F(HloParserTest, SimpleAliasingShortForm) { - const string original = R"( -HloModule Module, input_output_alias={ {0}: 0, {1}: 1 } - -ENTRY entry { - %p0 = f32[] parameter(0) - %p1 = f32[] parameter(1) - ROOT %out = (f32[], f32[]) tuple(%p0, %p1) -} - )"; - auto module = ParseAndReturnVerifiedModule(original); - TF_ASSERT_OK(module.status()); - std::unique_ptr parsed_module = module.ConsumeValueOrDie(); - EXPECT_EQ(parsed_module->input_output_alias_config().GetAliasedOutput(0, {}), - ShapeIndex{0}); - EXPECT_EQ(parsed_module->input_output_alias_config().GetAliasedOutput(1, {}), - ShapeIndex{1}); -} - -TEST_F(HloParserTest, SimpleAliasingShortFormError) { - const string original = R"( -HloModule Module, input_output_alias={ {0}: A, {1}: 1 } - -ENTRY entry { - %p0 = f32[] parameter(0) - %p1 = f32[] parameter(1) - ROOT %out = (f32[], f32[]) tuple(%p0, %p1) -} - )"; - ExpectHasSubstr( - ParseAndReturnUnverifiedModule(original).status().error_message(), - "expects integer"); + EXPECT_FALSE( + parsed_module->input_output_alias_config().ParameterMustAlias(0, {1})); } TEST_F(HloParserTest, NestedAliasing) { diff --git a/tensorflow/compiler/xla/tests/buffer_donation_test.cc b/tensorflow/compiler/xla/tests/buffer_donation_test.cc index 856ea7c9b44..f78083fe2af 100644 --- a/tensorflow/compiler/xla/tests/buffer_donation_test.cc +++ b/tensorflow/compiler/xla/tests/buffer_donation_test.cc @@ -61,7 +61,7 @@ class BufferDonationTest : public HloTestBase { absl::Span argument_literals, absl::Span donate_arguments, absl::Span expected_runtime_aliasing, - const Literal& expected) { + const Literal& expected, std::string expected_failure = "") { // Create a copy of the output shape because the HLO module is std::moved // into the compiler and may be deallocated. const Shape output_shape = hlo_module->result_shape(); @@ -123,10 +123,19 @@ class BufferDonationTest : public HloTestBase { ExecutionInput(std::move(owned_buffers), argument_literal.shape())); } - TF_ASSERT_OK_AND_ASSIGN( - ExecutionOutput output, + StatusOr output_status = executable->ExecuteAsyncOnStream(&service_run_options, std::move(args), - /*hlo_execution_profile=*/nullptr)); + /*hlo_execution_profile=*/nullptr); + if (!expected_failure.empty()) { + ASSERT_FALSE(output_status.ok()); + ASSERT_TRUE(absl::StrContains(output_status.status().error_message(), + expected_failure)) + << "got: \n" + << output_status.status().error_message() << " \nvs want\n" + << expected_failure; + return; + } + ExecutionOutput output = output_status.ConsumeValueOrDie(); se::DeviceMemoryBase result_root_buffer = output.Result().root_buffer(); LOG(INFO) << "result allocation = " << result_root_buffer.opaque() @@ -303,5 +312,37 @@ ENTRY entry { #endif } +TEST_F(BufferDonationTest, TestMustAliasNotDonated) { + HloModuleConfig config; + + StatusOr> module = + ParseAndReturnVerifiedModule(R"( +HloModule module + +ENTRY entry { + a = f32[] parameter(0) + b = f32[] parameter(1) + ROOT out = (f32[], f32[]) tuple(a, b) +} + )", + config); + + TF_ASSERT_OK(module->get()->input_output_alias_config().SetUpAlias( + {0}, 0, {}, HloInputOutputAliasConfig::kMustAlias)); + + std::vector args; + args.push_back(LiteralUtil::CreateR0(0.1)); + args.push_back(LiteralUtil::CreateR0(0.2)); + Literal expected = LiteralUtil::MakeTupleFromSlices( + {LiteralUtil::CreateR0(0.1), LiteralUtil::CreateR0(0.2)}); + +#ifndef XLA_TEST_BACKEND_INTERPRETER + RunAndCheck(std::move(*module), args, + /*donate_arguments=*/{false, false}, {true, false}, expected, + "An input was configured to be must-alias at " + "compile time but not donated at runtime:"); +#endif +} + } // namespace } // namespace xla diff --git a/tensorflow/stream_executor/tpu/tpu_executable_interface.cc b/tensorflow/stream_executor/tpu/tpu_executable_interface.cc index 13f9db98e5d..f260cc1631f 100644 --- a/tensorflow/stream_executor/tpu/tpu_executable_interface.cc +++ b/tensorflow/stream_executor/tpu/tpu_executable_interface.cc @@ -62,6 +62,24 @@ TpuExecutableInterface::AllocateOutputMemoryWithInputReuse( << " host_shape = " << ShapeUtil::HumanStringWithLayout(host_shape); Shape device_shape = HostShapeToDeviceShape(host_shape); + TF_RETURN_IF_ERROR(alias_config.ForEachAliasWithStatus( + [&](const ShapeIndex& output_index, + absl::optional alias) { + if (alias && alias->must_alias()) { + VLOG(1) << alias->ToString(); + const MaybeOwningDeviceMemory& original_input = + (*arguments)[alias->parameter_number].Buffers().element( + alias->parameter_index); + if (!original_input.HasOwnership()) { + return InvalidArgument( + "An input was configured to be must-alias at " + "compile time but not donated at runtime: %s", + alias->ToString()); + } + } + return Status::OK(); + })); + if (VLOG_IS_ON(3)) { VLOG(3) << "AllocateOutputMemoryWithInputReuse, device = " << device_ordinal << " host_shape = " << ShapeUtil::HumanStringWithLayout(host_shape); From 4684e40f18bcc5af6ef5a5605d9e9901d430621f Mon Sep 17 00:00:00 2001 From: Jared Duke Date: Fri, 7 Aug 2020 14:18:53 -0700 Subject: [PATCH 2360/2522] Clarify default Interpreter NNAPI behavior PiperOrigin-RevId: 325507160 Change-Id: Id2063801d286ebda56e0105c806aaed52a930e72 --- tensorflow/lite/interpreter.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/interpreter.h b/tensorflow/lite/interpreter.h index b9e2045cd96..ab2d1250513 100644 --- a/tensorflow/lite/interpreter.h +++ b/tensorflow/lite/interpreter.h @@ -364,7 +364,11 @@ class Interpreter { /// Returns status of success or failure. TfLiteStatus Invoke(); - /// Enable or disable the NN API (true to enable) + /// Enable or disable NNAPI (true to enable). Disabled by default. + /// + /// WARNING: NNAPI cannot be disabled after the graph has been prepared + /// (via `AllocateTensors`) with NNAPI enabled. + /// /// NOTE: This API is deprecated, prefer using the NNAPI delegate directly. /// This method will be removed in a future release. void UseNNAPI(bool enable); From 201d45cea27c1792a86b3fc7eb688fb2dd1d0df1 Mon Sep 17 00:00:00 2001 From: Reed Wanderman-Milne Date: Fri, 7 Aug 2020 14:27:14 -0700 Subject: [PATCH 2361/2522] Improve performance of fp16 DepthwiseConv2DBackpropFilter. When cuDNN is not used, performance and numeric stability is improved by casting inputs to float32 and outputs back to float16. The original implementation does accumulation in float16 and is slow for unknown reasons. Running the benchmark in [this comment](https://github.com/tensorflow/tensorflow/issues/41715#issuecomment-664705080) on my machine with a Titan V, I get the following numbers. All numbers are in seconds. ``` bench before after float16 NHWC backprop_filter 7.6379 0.0098 float16 NCHW backprop_filter 4.1965 0.0449 float32 NHWC backprop_filter 0.0094 0.0094 float32 NCHW backprop_filter 0.0449 0.0444 ``` Fixes https://github.com/tensorflow/tensorflow/issues/41715. PiperOrigin-RevId: 325508729 Change-Id: I694a62dcdd8731bc90e98d2a09486160d8740b5f --- tensorflow/core/kernels/BUILD | 1 + .../core/kernels/depthwise_conv_grad_op.cc | 42 +++++++++++++++++-- .../kernel_tests/depthwise_conv_op_test.py | 2 +- 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index f98b510b96f..99970a9558c 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -4422,6 +4422,7 @@ tf_kernel_library( "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", + ":cast_op", ] + if_cuda([ "@local_config_cuda//cuda:cudnn_header", ]), diff --git a/tensorflow/core/kernels/depthwise_conv_grad_op.cc b/tensorflow/core/kernels/depthwise_conv_grad_op.cc index 310bd73ba65..b809e1d1065 100644 --- a/tensorflow/core/kernels/depthwise_conv_grad_op.cc +++ b/tensorflow/core/kernels/depthwise_conv_grad_op.cc @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/cast_op.h" #include "tensorflow/core/kernels/conv_grad_ops.h" #include "tensorflow/core/kernels/depthwise_conv_op.h" #include "tensorflow/core/lib/core/status.h" @@ -1180,12 +1181,45 @@ class DepthwiseConv2dNativeBackpropFilterOp : public OpKernel { return; } - auto out_backprop_ptr = out_backprop.template flat().data(); - auto input_ptr = input.template flat().data(); - auto filter_backprop_ptr = filter_backprop->template flat().data(); - LaunchDepthwiseConvBackpropFilterOp()( + // For GPU inputs with type half, we cast inputs to float and outputs back + // to half, as half implementation is slow and does not use full precision + // accumulation in some cases. + constexpr bool cast_to_float = std::is_same::value && + std::is_same::value; + using U = typename std::conditional::type; + Tensor casted_out_backprop = out_backprop; + Tensor casted_input = input; + Tensor casted_filter_backprop = *filter_backprop; + const Device& device = context->template eigen_device(); + if (cast_to_float) { + functor::CastFunctor cast; + OP_REQUIRES_OK(context, + context->allocate_temp(DT_FLOAT, out_backprop.shape(), + &casted_out_backprop)); + cast(device, casted_out_backprop.template flat(), + out_backprop.template flat()); + OP_REQUIRES_OK(context, context->allocate_temp(DT_FLOAT, input.shape(), + &casted_input)); + cast(device, casted_input.template flat(), + input.template flat()); + OP_REQUIRES_OK(context, + context->allocate_temp(DT_FLOAT, filter_backprop->shape(), + &casted_filter_backprop)); + } + + auto out_backprop_ptr = casted_out_backprop.template flat().data(); + auto input_ptr = casted_input.template flat().data(); + auto filter_backprop_ptr = casted_filter_backprop.template flat().data(); + LaunchDepthwiseConvBackpropFilterOp()( context, args, out_backprop_ptr, input_ptr, filter_backprop_ptr, data_format_); + + if (cast_to_float) { + functor::CastFunctor cast; + const Tensor& casted_filter_backprop_const = casted_filter_backprop; + cast(device, filter_backprop->template flat(), + casted_filter_backprop_const.template flat()); + } } protected: diff --git a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py index 093de720b53..266a0f8d0fb 100644 --- a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py +++ b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py @@ -832,7 +832,7 @@ class DepthwiseConv2DTest(test.TestCase): # double datatype is currently not supported for convolution ops # on the ROCm platform optional_float64 = [] if test.is_built_with_rocm() else [dtypes.float64] - for data_type in ([dtypes.float32] + optional_float64): + for data_type in ([dtypes.float16, dtypes.float32] + optional_float64): self._ConstructAndTestGradient( input_size, filter_size, From a5f56f8c1166cd4a45b2a4fc10d07fa1dd3f1e6f Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Fri, 7 Aug 2020 14:29:41 -0700 Subject: [PATCH 2362/2522] Enable creating variables in loops. PiperOrigin-RevId: 325509160 Change-Id: I1d73baf75d0be1b3707b1cfceb97e8b9a32162e4 --- RELEASE.md | 14 ++++++++++++++ .../python/autograph/operators/control_flow.py | 3 ++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index f0c590710cf..5f0553c2a94 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -106,6 +106,20 @@ True, the function may use type annotations to optimize the tracing performance. * Added support for `iter(DistributedDataset)` in AutoGraph `for` loops. + * AutoGraph now allows creating new symbols inside a TensorFLow loop, if + the values of these symbols at an iteration does not depend on the previous + iteration. These types of loops must run at least one iteration, and will + raise a runtime error otherwise. + + Example: + + ``` + for batch in data: + outputs = train_step(batch) + tf.print('final outputs', outputs) + ``` + See tensorflow/python/autograph/g3doc/reference/limitations.md for more + info. * `tf.lite`: * `DynamicBuffer::AddJoinedString()` will now add a separator if the first string to be joined is empty. diff --git a/tensorflow/python/autograph/operators/control_flow.py b/tensorflow/python/autograph/operators/control_flow.py index 3418450e813..0106efda5dd 100644 --- a/tensorflow/python/autograph/operators/control_flow.py +++ b/tensorflow/python/autograph/operators/control_flow.py @@ -973,7 +973,8 @@ def _try_handling_undefineds( """ state_modified = False - if not os.getenv('AUTOGRAPH_CREATE_SYMBOLS_IN_LOOPS', ''): + # TODO(mdan): Remove once the default option is stable. + if os.getenv('AUTOGRAPH_CREATE_SYMBOLS_IN_LOOPS', '1') == '0': _verify_loop_init_vars(init_vars, symbol_names) return False, init_vars From 1e6fa32dfe9d3f1631ed6218985426d3f6ca4860 Mon Sep 17 00:00:00 2001 From: Revan Sopher Date: Fri, 7 Aug 2020 14:36:54 -0700 Subject: [PATCH 2363/2522] Exception grammar fix. PiperOrigin-RevId: 325510739 Change-Id: Idc535c7c62629b53e51919e8cc417e89e7168175 --- tensorflow/python/tpu/tpu_embedding_v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/tpu/tpu_embedding_v2.py b/tensorflow/python/tpu/tpu_embedding_v2.py index 5e316d35aa4..412c7eb03d3 100644 --- a/tensorflow/python/tpu/tpu_embedding_v2.py +++ b/tensorflow/python/tpu/tpu_embedding_v2.py @@ -1501,7 +1501,7 @@ def make_sharded_variable_creator(hosts): if isinstance(initial_value, base.CheckpointInitialValue) and num_hosts > 1: raise RuntimeError("Delayed restoration of variables not available when " "there are multiple TPU hosts, please ensure that the " - "api object is build before you restore.") + "api object has been built before you restore.") for i, p in enumerate(partitions): with ops.device(hosts[i]): From 7a93fd22f78231a64abb212dffa15d9749da1281 Mon Sep 17 00:00:00 2001 From: Sachin Joglekar Date: Fri, 7 Aug 2020 14:39:18 -0700 Subject: [PATCH 2364/2522] Adds API for users to provide custom allocations for TFLite tensors PiperOrigin-RevId: 325511199 Change-Id: Ia8c0550375d508db3fa75b6b5df5a70088b7470b --- tensorflow/lite/BUILD | 2 + tensorflow/lite/c/common.h | 12 + tensorflow/lite/core/subgraph.cc | 57 ++++- tensorflow/lite/core/subgraph.h | 26 ++ tensorflow/lite/interpreter.cc | 6 + tensorflow/lite/interpreter.h | 23 ++ tensorflow/lite/interpreter_test.cc | 241 ++++++++++++++++++ .../lite/micro/micro_optional_debug_tools.cc | 2 + tensorflow/lite/optional_debug_tools.cc | 3 + .../benchmark/experimental/c/c_api_types.h | 12 + 10 files changed, 383 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/BUILD b/tensorflow/lite/BUILD index fac85181231..7007a847d83 100644 --- a/tensorflow/lite/BUILD +++ b/tensorflow/lite/BUILD @@ -412,9 +412,11 @@ cc_test( "tflite_smoke_test", ], deps = [ + ":builtin_op_data", ":external_cpu_backend_context", ":framework", ":string_util", + ":util", ":version", "//tensorflow/lite/core/api", "//tensorflow/lite/kernels:builtin_ops", diff --git a/tensorflow/lite/c/common.h b/tensorflow/lite/c/common.h index 7ef173c78d2..23eb528f4c9 100644 --- a/tensorflow/lite/c/common.h +++ b/tensorflow/lite/c/common.h @@ -358,6 +358,8 @@ typedef union TfLitePtrUnion { // * kTfLitePersistentRo: Allocated and populated during prepare. This is // useful for tensors that can be computed during prepare and treated // as constant inputs for downstream ops (also in prepare). +// * kTfLiteCustom: Custom memory allocation provided by the user. See +// TfLiteCustomAllocation below. typedef enum TfLiteAllocationType { kTfLiteMemNone = 0, kTfLiteMmapRo, @@ -365,6 +367,7 @@ typedef enum TfLiteAllocationType { kTfLiteArenaRwPersistent, kTfLiteDynamic, kTfLitePersistentRo, + kTfLiteCustom, } TfLiteAllocationType; // The delegates should use zero or positive integers to represent handles. @@ -397,6 +400,15 @@ typedef struct TfLiteSparsity { int dim_metadata_size; } TfLiteSparsity; +// Defines a custom memory allocation not owned by the runtime. +// `data` should be aligned to kDefaultTensorAlignment defined in +// lite/util.h. (Currently 64 bytes) +// NOTE: See Interpreter.SetCustomAllocationForTensor for details on usage. +typedef struct TfLiteCustomAllocation { + void* data; + size_t bytes; +} TfLiteCustomAllocation; + // An tensor in the interpreter system which is a wrapper around a buffer of // data including a dimensionality (or NULL if not currently defined). #ifndef TF_LITE_STATIC_MEMORY diff --git a/tensorflow/lite/core/subgraph.cc b/tensorflow/lite/core/subgraph.cc index beedbe6c5ea..15b8a0bcc57 100644 --- a/tensorflow/lite/core/subgraph.cc +++ b/tensorflow/lite/core/subgraph.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/lite/core/subgraph.h" #include +#include #include "tensorflow/lite/arena_planner.h" #include "tensorflow/lite/c/common.h" @@ -140,6 +141,17 @@ const char* GetTFLiteOpName(const TfLiteRegistration& op_reg) { return tflite::EnumNamesBuiltinOperator()[op_reg.builtin_code]; } +TfLiteStatus ValidateCustomAllocationForTensor( + TfLiteContext* context, const TfLiteTensor* tensor, + const TfLiteCustomAllocation& allocation) { + TF_LITE_ENSURE(context, allocation.data != nullptr); + TF_LITE_ENSURE(context, allocation.bytes >= tensor->bytes); + // Ensure provided memory is aligned to what TFLite requires. + const intptr_t data_ptr_value = reinterpret_cast(allocation.data); + TF_LITE_ENSURE(context, data_ptr_value % kDefaultTensorAlignment == 0); + return kTfLiteOk; +} + } // namespace // A trivial implementation of GraphInfo around the Interpreter. @@ -898,9 +910,24 @@ TfLiteStatus Subgraph::PrepareOpsAndTensors() { execution_plan_, &last_exec_plan_index_prepared)); next_execution_plan_index_to_prepare_ = last_exec_plan_index_prepared + 1; + // Execute arena allocations. TF_LITE_ENSURE_STATUS(memory_planner_->ExecuteAllocations( next_execution_plan_index_to_plan_allocation_, last_exec_plan_index_prepared)); + + // Ensure custom allocations are still valid for applicable tensors. + // This causes some extra validations for cases with dynamic tensors, but the + // overhead should be minimal since the number of custom-allocated tensors + // will typically be low. + for (int i = 0; i < custom_allocations_.size(); ++i) { + auto idx_and_alloc = custom_allocations_[i]; + auto& tensor = tensors()[idx_and_alloc.first]; + const auto& alloc = idx_and_alloc.second; + TF_LITE_ENSURE(context(), tensor.allocation_type == kTfLiteCustom); + TF_LITE_ENSURE_STATUS( + ValidateCustomAllocationForTensor(context(), &tensor, alloc)); + } + next_execution_plan_index_to_plan_allocation_ = last_exec_plan_index_prepared + 1; @@ -1218,7 +1245,8 @@ TfLiteStatus Subgraph::ResizeTensorImpl(TfLiteTensor* tensor, if (tensor->allocation_type == kTfLiteArenaRw || tensor->allocation_type == kTfLiteDynamic || tensor->allocation_type == kTfLiteArenaRwPersistent || - tensor->allocation_type == kTfLitePersistentRo) { + tensor->allocation_type == kTfLitePersistentRo || + tensor->allocation_type == kTfLiteCustom) { tensor_resized_since_op_invoke_ |= TfLiteIntArrayEqual(tensor->dims, new_size) == 0; if (tensor->type != kTfLiteString) { @@ -1455,6 +1483,33 @@ TfLiteStatus Subgraph::ModifyGraphWithDelegate(TfLiteDelegate* delegate) { return status; } +TfLiteStatus Subgraph::SetCustomAllocationForTensor( + int tensor_index, const TfLiteCustomAllocation& allocation) { + TfLiteTensor* tensor = &context_.tensors[tensor_index]; + TF_LITE_ENSURE(context(), tensor->allocation_type == kTfLiteArenaRw || + tensor->allocation_type == kTfLiteCustom); + TF_LITE_ENSURE_STATUS( + ValidateCustomAllocationForTensor(context(), tensor, allocation)); + + // If tensor already has a custom alloc, just reassign. + const auto alloc_it = std::find_if( + custom_allocations_.begin(), custom_allocations_.end(), + [tensor_index]( + const std::pair& existing_alloc) { + return existing_alloc.first == tensor_index; + }); + if (alloc_it == custom_allocations_.end()) { + custom_allocations_.emplace_back(tensor_index, allocation); + } else { + alloc_it->second = allocation; + } + + tensor->allocation_type = kTfLiteCustom; + tensor->data.data = allocation.data; + + return kTfLiteOk; +} + } // namespace impl } // namespace tflite diff --git a/tensorflow/lite/core/subgraph.h b/tensorflow/lite/core/subgraph.h index 5058273667a..1fe1c7e4391 100644 --- a/tensorflow/lite/core/subgraph.h +++ b/tensorflow/lite/core/subgraph.h @@ -332,6 +332,29 @@ class Subgraph { // Before `AllocateTensors` is called, this will always return true; bool HasDynamicTensors() { return has_dynamic_tensors_; } + // Assigns (or reassigns) a custom memory allocation for the given tensor. + // If AllocateTensors() is called after this, the runtime does not consider + // the tensor during internal memory planning and will continue using the + // provided allocation for the tensor (assuming it satisfies the expected + // tensor byte length). + // The runtime does NOT take ownership of the underlying memory. + // Note that while this function can be called again to set a new allocation + // for the tensor, it can no longer be reset to the TFLite arena memory. + // + // Parameters should satisfy the following conditions: + // 1. tensor->allocation_type == kTfLiteArenaRw + // In general, this is true for all non-constants such as I/O tensors. + // 2. allocation->data has the appropriate permissions for runtime access + // (Read-only for inputs, Read-Write for others), and outlives Interpreter. + // 3. allocation->bytes >= tensor->bytes. + // This condition is checked again if any tensors are resized. + // 4. allocation->data should be aligned to kDefaultTensorAlignment + // defined in lite/util.h. (Currently 64 bytes) + // + // WARNING: This is an experimental interface that is subject to change. + TfLiteStatus SetCustomAllocationForTensor( + int tensor_index, const TfLiteCustomAllocation& allocation); + private: // SubgraphAwareProfiler wraps an actual TFLite profiler, such as a // BufferedProfiler instance, and takes care of event profiling/tracing in a @@ -680,6 +703,9 @@ class Subgraph { std::unique_ptr memory_planner_; + // Contains pairs for all applicable tensors. + std::vector> custom_allocations_; + // Tracking bit for whether a tensor was resized in the course of an op // invocation. This is a useful hint to ensure that dynamic tensor outputs // trigger downstream reallocation after op invocation. diff --git a/tensorflow/lite/interpreter.cc b/tensorflow/lite/interpreter.cc index 62de5896d84..7a5f4df5155 100644 --- a/tensorflow/lite/interpreter.cc +++ b/tensorflow/lite/interpreter.cc @@ -163,6 +163,12 @@ void Interpreter::SetExternalContext(TfLiteExternalContextType type, primary_subgraph().SetExternalContext(type, ctx); } +TfLiteStatus Interpreter::SetCustomAllocationForTensor( + int tensor_index, const TfLiteCustomAllocation& allocation) { + return primary_subgraph().SetCustomAllocationForTensor(tensor_index, + allocation); +} + TfLiteStatus Interpreter::SetInputs(std::vector inputs) { return primary_subgraph().SetInputs(std::move(inputs)); } diff --git a/tensorflow/lite/interpreter.h b/tensorflow/lite/interpreter.h index ab2d1250513..5c354d63dd5 100644 --- a/tensorflow/lite/interpreter.h +++ b/tensorflow/lite/interpreter.h @@ -504,6 +504,29 @@ class Interpreter { void SetExternalContext(TfLiteExternalContextType type, TfLiteExternalContext* ctx); + // Assigns (or reassigns) a custom memory allocation for the given tensor. + // If AllocateTensors() is called after this, the runtime does not consider + // the tensor during internal memory planning and will continue using the + // provided allocation for the tensor (assuming it satisfies the expected + // tensor byte length). + // The runtime does NOT take ownership of the underlying memory. + // Note that while this function can be called again to set a new allocation + // for the tensor, it can no longer be reset to the TFLite arena memory. + // + // Parameters should satisfy the following conditions: + // 1. tensor->allocation_type == kTfLiteArenaRw + // In general, this is true for all non-constants such as I/O tensors. + // 2. allocation->data has the appropriate permissions for runtime access + // (Read-only for inputs, Read-Write for others), and outlives Interpreter. + // 3. allocation->bytes >= tensor->bytes. + // This condition is checked again if any tensors are resized. + // 4. allocation->data should be aligned to kDefaultTensorAlignment + // defined in lite/util.h. (Currently 64 bytes) + // + // WARNING: This is an experimental interface that is subject to change. + TfLiteStatus SetCustomAllocationForTensor( + int tensor_index, const TfLiteCustomAllocation& allocation); + #ifndef DOXYGEN_SKIP /// Adds `subgraphs_to_add` subgraphs, preserving pre-existing Subgraph /// entries. The value pointed to by `first_new_subgraph_index` will be set to diff --git a/tensorflow/lite/interpreter_test.cc b/tensorflow/lite/interpreter_test.cc index 899811b3fea..bf40843876c 100644 --- a/tensorflow/lite/interpreter_test.cc +++ b/tensorflow/lite/interpreter_test.cc @@ -22,8 +22,10 @@ limitations under the License. #include #include #include "third_party/eigen3/Eigen/Core" +#include "tensorflow/lite/builtin_op_data.h" #include "tensorflow/lite/core/api/error_reporter.h" #include "tensorflow/lite/external_cpu_backend_context.h" +#include "tensorflow/lite/kernels/builtin_op_kernels.h" #include "tensorflow/lite/kernels/cpu_backend_context.h" #include "tensorflow/lite/kernels/internal/compatibility.h" #include "tensorflow/lite/kernels/kernel_util.h" @@ -1480,6 +1482,245 @@ TEST_F(CancellationTest, CancelDuringInvoke) { ASSERT_EQ(invoke_error_code, kTfLiteError); } +// Tests functionality related to custom memory allocations in TFLite. +class TestCustomAllocation : public ::testing::Test { + protected: + void SetUp() override { + // Simple model with two custom ops that add 2 float tensors each. + interpreter_.reset(new Interpreter); + interpreter_->AddTensors(5); + interpreter_->SetInputs({0, 1}); + interpreter_->SetOutputs({3, 4}); + TfLiteQuantizationParams quant; + interpreter_->SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {3}, + quant); + interpreter_->SetTensorParametersReadWrite(1, kTfLiteFloat32, "", {3}, + quant); + interpreter_->SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {3}, + quant); + interpreter_->SetTensorParametersReadWrite(3, kTfLiteFloat32, "", {3}, + quant); + interpreter_->SetTensorParametersReadWrite(4, kTfLiteFloat32, "", {3}, + quant); + auto* add_reg = ops::builtin::Register_ADD(); + TfLiteAddParams* builtin_data0 = + reinterpret_cast(malloc(sizeof(TfLiteAddParams))); + TfLiteAddParams* builtin_data1 = + reinterpret_cast(malloc(sizeof(TfLiteAddParams))); + TfLiteAddParams* builtin_data2 = + reinterpret_cast(malloc(sizeof(TfLiteAddParams))); + builtin_data0->activation = kTfLiteActNone; + builtin_data1->activation = kTfLiteActNone; + builtin_data2->activation = kTfLiteActNone; + interpreter_->AddNodeWithParameters({0, 0}, {2}, nullptr, 0, builtin_data0, + add_reg); + interpreter_->AddNodeWithParameters({1, 1}, {3}, nullptr, 0, builtin_data1, + add_reg); + interpreter_->AddNodeWithParameters({2, 1}, {4}, nullptr, 0, builtin_data2, + add_reg); + } + + void AssignCustomAllocForTensor(int tensor_idx, int required_alignment) { + const TfLiteTensor* tensor = interpreter_->tensor(tensor_idx); + auto tensor_alloc = NewCustomAlloc(tensor->bytes, required_alignment); + ASSERT_EQ( + interpreter_->SetCustomAllocationForTensor(tensor_idx, tensor_alloc), + kTfLiteOk); + } + + void VerifyInvoke() { + std::vector input = {1.0f, 2.0f, 3.0f}; + std::vector expected_output = {2.0f, 4.0f, 6.0f}; + TfLiteTensor* tensor = interpreter_->tensor(interpreter_->outputs()[0]); + + // typed_tensor<...> should work irrespective of custom alloc, since it + // accesses tensor.data. + memcpy(interpreter_->typed_tensor(0), input.data(), + 3 * sizeof(float)); + memcpy(interpreter_->typed_tensor(1), input.data(), + 3 * sizeof(float)); + ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk); + for (int i = 0; i < 3; ++i) { + EXPECT_EQ(tensor->data.f[i], expected_output[i]) << i; + } + } + + // Actual initialized allocation is more than num_bytes, to account for + // required_allocation. + TfLiteCustomAllocation NewCustomAlloc(size_t num_bytes, + int required_alignment) { + // Extra memory to ensure alignment. + char* new_alloc = new char[num_bytes + required_alignment]; + char* new_underlying_buffer_aligned_ptr = reinterpret_cast( + AlignTo(required_alignment, reinterpret_cast(new_alloc))); + custom_alloc_buffers_.emplace_back(new_alloc); + + return TfLiteCustomAllocation( + {new_underlying_buffer_aligned_ptr, num_bytes}); + } + + intptr_t AlignTo(size_t alignment, intptr_t offset) { + return offset % alignment == 0 ? offset + : offset + (alignment - offset % alignment); + } + + void TearDown() override { + interpreter_.reset(); + custom_alloc_buffers_.clear(); + } + + protected: + TfLiteAddParams add_params_; + std::unique_ptr interpreter_; + std::vector> custom_alloc_buffers_; +}; + +TEST_F(TestCustomAllocation, InvalidAlignment) { + const TfLiteTensor* input_tensor = + interpreter_->tensor(interpreter_->inputs()[0]); + auto input_alloc = + NewCustomAlloc(input_tensor->bytes, kDefaultTensorAlignment - 1); + ASSERT_EQ(interpreter_->SetCustomAllocationForTensor( + interpreter_->inputs()[0], input_alloc), + kTfLiteError); + + // Allocate tensors & Invoke should still work. + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); + VerifyInvoke(); +} + +TEST_F(TestCustomAllocation, InsufficientBytes) { + auto input_alloc = NewCustomAlloc(4, kDefaultTensorAlignment); + ASSERT_EQ(interpreter_->SetCustomAllocationForTensor( + interpreter_->inputs()[0], input_alloc), + kTfLiteError); + + // Allocate tensors & Invoke should still work. + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); + VerifyInvoke(); +} + +TEST_F(TestCustomAllocation, CustomInputAlloc) { + // Set custom allocation for one input tensor. + AssignCustomAllocForTensor(interpreter_->inputs()[0], + /*required_alignment=*/kDefaultTensorAlignment); + + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); + VerifyInvoke(); +} + +TEST_F(TestCustomAllocation, CustomInputAlloc_MultipleAssigns) { + // Set custom allocation for one input tensor. + AssignCustomAllocForTensor(interpreter_->inputs()[0], + /*required_alignment=*/kDefaultTensorAlignment); + + AssignCustomAllocForTensor(interpreter_->inputs()[0], + /*required_alignment=*/kDefaultTensorAlignment); + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); + VerifyInvoke(); + + AssignCustomAllocForTensor(interpreter_->inputs()[0], + /*required_alignment=*/kDefaultTensorAlignment); + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); + VerifyInvoke(); +} + +TEST_F(TestCustomAllocation, CustomInputAlloc_AllocateTensorsBefore) { + // Allocate tensors. + // Allocating now will cause TFLite to reserve some extra memory, but nothing + // should break. + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); + + AssignCustomAllocForTensor(interpreter_->inputs()[0], + /*required_alignment=*/kDefaultTensorAlignment); + + VerifyInvoke(); +} + +TEST_F(TestCustomAllocation, CustomInputAndOutputAllocs) { + // Set custom allocations for all IO tensors. + AssignCustomAllocForTensor(interpreter_->inputs()[0], + /*required_alignment=*/kDefaultTensorAlignment); + AssignCustomAllocForTensor(interpreter_->inputs()[1], + /*required_alignment=*/kDefaultTensorAlignment); + AssignCustomAllocForTensor(interpreter_->outputs()[0], + /*required_alignment=*/kDefaultTensorAlignment); + AssignCustomAllocForTensor(interpreter_->outputs()[1], + /*required_alignment=*/kDefaultTensorAlignment); + + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); + VerifyInvoke(); +} + +TEST_F(TestCustomAllocation, ResizeTensorsWithoutEnoughMemory) { + // Set custom allocations for all input tensors. + AssignCustomAllocForTensor(interpreter_->inputs()[0], + /*required_alignment=*/kDefaultTensorAlignment); + AssignCustomAllocForTensor(interpreter_->inputs()[1], + /*required_alignment=*/kDefaultTensorAlignment); + + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); + + // Now resize tensors to double the size. + ASSERT_EQ(interpreter_->ResizeInputTensor(interpreter_->inputs()[0], {2, 3}), + kTfLiteOk); + ASSERT_EQ(interpreter_->ResizeInputTensor(interpreter_->inputs()[1], {2, 3}), + kTfLiteOk); + + // Since the custom memory previously allocated isn't enough, + // AllocateTensors() will fail. + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteError); + // Interpreter should no longer be in invokable state, so expect failure. + ASSERT_EQ(interpreter_->Invoke(), kTfLiteError); +} + +TEST_F(TestCustomAllocation, ResizeTensorsWithEnoughMemory) { + // Set custom allocations for all input tensors, with double the required + // memory. + const TfLiteTensor* input0_tensor = + interpreter_->tensor(interpreter_->inputs()[0]); + auto input0_alloc = + NewCustomAlloc(2 * input0_tensor->bytes, kDefaultTensorAlignment); + ASSERT_EQ(interpreter_->SetCustomAllocationForTensor( + interpreter_->inputs()[0], input0_alloc), + kTfLiteOk); + const TfLiteTensor* input1_tensor = + interpreter_->tensor(interpreter_->inputs()[1]); + auto input1_alloc = + NewCustomAlloc(2 * input1_tensor->bytes, kDefaultTensorAlignment); + ASSERT_EQ(interpreter_->SetCustomAllocationForTensor( + interpreter_->inputs()[1], input1_alloc), + kTfLiteOk); + + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); + + // Now resize tensors to double the size. + ASSERT_EQ(interpreter_->ResizeInputTensor(interpreter_->inputs()[0], {6, 1}), + kTfLiteOk); + ASSERT_EQ(interpreter_->ResizeInputTensor(interpreter_->inputs()[1], {6, 1}), + kTfLiteOk); + + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); + + std::vector input = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + std::vector expected_output = {2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f}; + TfLiteTensor* tensor = interpreter_->tensor(interpreter_->outputs()[0]); + memcpy(interpreter_->typed_tensor(0), input.data(), 6 * sizeof(float)); + memcpy(interpreter_->typed_tensor(1), input.data(), 6 * sizeof(float)); + ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk); + for (int i = 0; i < 6; ++i) { + EXPECT_EQ(tensor->data.f[i], expected_output[i]) << i; + } + + ASSERT_EQ(interpreter_->ResizeInputTensor(interpreter_->inputs()[0], {3, 1}), + kTfLiteOk); + ASSERT_EQ(interpreter_->ResizeInputTensor(interpreter_->inputs()[1], {3, 1}), + kTfLiteOk); + + ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); + VerifyInvoke(); +} + } // namespace } // namespace tflite diff --git a/tensorflow/lite/micro/micro_optional_debug_tools.cc b/tensorflow/lite/micro/micro_optional_debug_tools.cc index 4617b3d9825..e7aee576351 100644 --- a/tensorflow/lite/micro/micro_optional_debug_tools.cc +++ b/tensorflow/lite/micro/micro_optional_debug_tools.cc @@ -109,6 +109,8 @@ const char* AllocTypeName(TfLiteAllocationType type) { return "kTfLiteArenaRwPersistent"; case kTfLitePersistentRo: return "kTfLitePersistentRo"; + case kTfLiteCustom: + return "kTfLiteCustom"; } return "(invalid)"; } diff --git a/tensorflow/lite/optional_debug_tools.cc b/tensorflow/lite/optional_debug_tools.cc index 8ee5c3b3f56..ef4ee1cb4e3 100644 --- a/tensorflow/lite/optional_debug_tools.cc +++ b/tensorflow/lite/optional_debug_tools.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/lite/optional_debug_tools.h" +#include "tensorflow/lite/c/common.h" #include "tensorflow/lite/schema/schema_generated.h" namespace tflite { @@ -81,6 +82,8 @@ const char* AllocTypeName(TfLiteAllocationType type) { return "kTfLiteArenaRwPersistent"; case kTfLitePersistentRo: return "kTfLitePersistentRo"; + case kTfLiteCustom: + return "kTfLiteCustom"; } return "(invalid)"; } diff --git a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h index 7ef173c78d2..23eb528f4c9 100644 --- a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h +++ b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h @@ -358,6 +358,8 @@ typedef union TfLitePtrUnion { // * kTfLitePersistentRo: Allocated and populated during prepare. This is // useful for tensors that can be computed during prepare and treated // as constant inputs for downstream ops (also in prepare). +// * kTfLiteCustom: Custom memory allocation provided by the user. See +// TfLiteCustomAllocation below. typedef enum TfLiteAllocationType { kTfLiteMemNone = 0, kTfLiteMmapRo, @@ -365,6 +367,7 @@ typedef enum TfLiteAllocationType { kTfLiteArenaRwPersistent, kTfLiteDynamic, kTfLitePersistentRo, + kTfLiteCustom, } TfLiteAllocationType; // The delegates should use zero or positive integers to represent handles. @@ -397,6 +400,15 @@ typedef struct TfLiteSparsity { int dim_metadata_size; } TfLiteSparsity; +// Defines a custom memory allocation not owned by the runtime. +// `data` should be aligned to kDefaultTensorAlignment defined in +// lite/util.h. (Currently 64 bytes) +// NOTE: See Interpreter.SetCustomAllocationForTensor for details on usage. +typedef struct TfLiteCustomAllocation { + void* data; + size_t bytes; +} TfLiteCustomAllocation; + // An tensor in the interpreter system which is a wrapper around a buffer of // data including a dimensionality (or NULL if not currently defined). #ifndef TF_LITE_STATIC_MEMORY From 271f2bad32147951000fbdfac341fb91436622f0 Mon Sep 17 00:00:00 2001 From: Geoffrey Martin-Noble Date: Fri, 7 Aug 2020 15:03:00 -0700 Subject: [PATCH 2365/2522] Add license header to lit.site.cfg.py.in PiperOrigin-RevId: 325515949 Change-Id: I5d507dedfcc56b241b79025ed170be67e15cec11 --- .../compiler/mlir/hlo/tests/lit.site.cfg.py.in | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tensorflow/compiler/mlir/hlo/tests/lit.site.cfg.py.in b/tensorflow/compiler/mlir/hlo/tests/lit.site.cfg.py.in index 17b99e983f6..1555d314df0 100644 --- a/tensorflow/compiler/mlir/hlo/tests/lit.site.cfg.py.in +++ b/tensorflow/compiler/mlir/hlo/tests/lit.site.cfg.py.in @@ -1,3 +1,16 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + @LIT_SITE_CFG_IN_HEADER@ import sys From ba1341974543e05eeede31927c2662193b28951b Mon Sep 17 00:00:00 2001 From: Yanhua Sun Date: Fri, 7 Aug 2020 15:06:36 -0700 Subject: [PATCH 2366/2522] add defun benchmark with relaxed shape option PiperOrigin-RevId: 325516758 Change-Id: I9b284d5faea62862775de86e617967acd0ea8315 --- tensorflow/python/eager/benchmarks_test.py | 41 ++++++++++++++++++---- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index 93766d809f2..22110e1ae71 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -481,14 +481,26 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): num_iters, execution_mode=None): - def func_matmul(m): + @def_function.function( + input_signature=[tensor_spec.TensorSpec([2, 2], dtypes.float32)]) + def defun_matmul(m): return math_ops.matmul(m, m) - f = function.defun( - func_matmul, - input_signature=[tensor_spec.TensorSpec([2, 2], dtypes.float32)]) + func = lambda: defun_matmul(m) + self._run(func, num_iters, execution_mode=execution_mode) - func = lambda: f(m) + def _benchmark_defun_matmul_relaxed_shape(self, + m, + num_iters, + execution_mode=None): + + @def_function.function(experimental_relax_shapes=True) + def defun_matmul(m): + return math_ops.matmul(m, m) + + m_3_by_3 = random_ops.random_uniform((3, 3)) + defun_matmul(m_3_by_3) + func = lambda: defun_matmul(m) self._run(func, num_iters, execution_mode=execution_mode) def _benchmark_defun_args_matmul(self, m, num_iters, execution_mode=None): @@ -591,12 +603,18 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._benchmark_defun_matmul( m, transpose_b=False, num_iters=self._num_iters_2_by_2) - def benchmark_defun_matmul_2_by_2_CPU_with_signature(self): + def benchmark_defun_matmul_2_by_2_with_signature_CPU(self): with context.device(CPU): m = self._m_2_by_2.cpu() self._benchmark_defun_matmul_with_signature( m, num_iters=self._num_iters_2_by_2) + def benchmark_defun_matmul_2_by_2_relaxed_shape_CPU(self): + with context.device(CPU): + m = self._m_2_by_2.cpu() + self._benchmark_defun_matmul_relaxed_shape( + m, num_iters=self._num_iters_2_by_2) + @test_util.disable_tfrt("Graph is not supported yet. b/156187905") def benchmark_defun_args_matmul_2_by_2_CPU(self): with context.device(CPU): @@ -678,7 +696,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): m, transpose_b=False, num_iters=self._num_iters_2_by_2) @test_util.disable_tfrt("copy to GPU not supported") - def benchmark_defun_matmul_2_by_2_GPU_with_signature(self): + def benchmark_defun_matmul_2_by_2_with_signature_GPU(self): if not context.num_gpus(): return with context.device(GPU): @@ -686,6 +704,15 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._benchmark_defun_matmul_with_signature( m, num_iters=self._num_iters_2_by_2) + @test_util.disable_tfrt("copy to GPU not supported") + def benchmark_defun_matmul_2_by_2_relaxed_shape_GPU(self): + if not context.num_gpus(): + return + with context.device(GPU): + m = self._m_2_by_2.gpu() + self._benchmark_defun_matmul_relaxed_shape( + m, num_iters=self._num_iters_2_by_2) + @test_util.disable_tfrt("Graph is not supported yet. b/156187905") def benchmark_defun_args_matmul_2_by_2_GPU(self): if not context.num_gpus(): From 7e16538d56497f0465b8a020221da0543f52454c Mon Sep 17 00:00:00 2001 From: Chuan He Date: Fri, 7 Aug 2020 15:08:16 -0700 Subject: [PATCH 2367/2522] Eliminating NOP GatherND or ScatterND if the indices value are from 0 to n-1, n is the first dimension of input tensor. PiperOrigin-RevId: 325517081 Change-Id: I45ec755f0678c913c8be82956b2de0c22474d675 --- .../compiler/mlir/lite/tests/optimize.mlir | 30 +++++++++++++++++++ .../compiler/mlir/lite/transforms/optimize.cc | 25 ++++++++++++++++ .../mlir/lite/transforms/optimize_patterns.td | 16 ++++++++++ 3 files changed, 71 insertions(+) diff --git a/tensorflow/compiler/mlir/lite/tests/optimize.mlir b/tensorflow/compiler/mlir/lite/tests/optimize.mlir index cf7fe07d729..7923c82ba92 100644 --- a/tensorflow/compiler/mlir/lite/tests/optimize.mlir +++ b/tensorflow/compiler/mlir/lite/tests/optimize.mlir @@ -1085,3 +1085,33 @@ func @ConvertPow2ToSquare(%arg0: tensor<2x2xf32>) -> tensor<2x2xf32> { // CHECK: return %[[RESULT]] } +func @ConvertIdentityGatherNdOp(%arg0: tensor<4x3xf32>) -> tensor<4x3xf32> { + %cst = constant dense<[[0], [1], [2], [3]]> : tensor<4x1xi32> + %0 = "tfl.gather_nd"(%arg0, %cst) : (tensor<4x3xf32>, tensor<4x1xi32>) -> tensor<4x3xf32> + return %0 : tensor<4x3xf32> + +// CHECK-LABEL: ConvertIdentityGatherNdOp +// CHECK-SAME: (%[[ARG:.*]]: tensor<4x3xf32>) -> tensor<4x3xf32> +// CHECK-NEXT: return %[[ARG]] : tensor<4x3xf32> +} + +func @ConvertIdentityGatherNdOp3D(%arg0: tensor<4x3x4xf32>) -> tensor<4x3x4xf32> { + %cst = constant dense<[[0], [1], [2], [3]]> : tensor<4x1xi32> + %0 = "tfl.gather_nd"(%arg0, %cst) : (tensor<4x3x4xf32>, tensor<4x1xi32>) -> tensor<4x3x4xf32> + return %0 : tensor<4x3x4xf32> + +// CHECK-LABEL: ConvertIdentityGatherNdOp3D +// CHECK-SAME: (%[[ARG:.*]]: tensor<4x3x4xf32>) -> tensor<4x3x4xf32> +// CHECK-NEXT: return %[[ARG]] : tensor<4x3x4xf32> +} + +func @ConvertIdentityScatterNd(%arg0: tensor<4x3xf32>) -> tensor<4x3xf32> { + %cst = constant dense<[[0], [1], [2], [3]]> : tensor<4x1xi32> + %shape = constant dense<[4, 3]> : tensor<2xi32> + %0 = "tfl.scatter_nd"(%cst, %arg0, %shape) : (tensor<4x1xi32>, tensor<4x3xf32>, tensor<2xi32>) -> tensor<4x3xf32> + return %0 : tensor<4x3xf32> + +// CHECK-LABEL: ConvertIdentityScatterNd +// CHECK-SAME: (%[[ARG:.*]]: tensor<4x3xf32>) -> tensor<4x3xf32> +// CHECK-NEXT: return %[[ARG]] : tensor<4x3xf32> +} diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize.cc b/tensorflow/compiler/mlir/lite/transforms/optimize.cc index 6de6187d81a..eeecfac67cf 100644 --- a/tensorflow/compiler/mlir/lite/transforms/optimize.cc +++ b/tensorflow/compiler/mlir/lite/transforms/optimize.cc @@ -160,6 +160,31 @@ bool CanFuseConvOrDepthwiseConv(Attribute filter, Attribute val, return false; } +// Retuns true if we can eliminate the GatherNdOp or ScatterNdOp. When the value +// of `indices` are from 0 to n-1, the output tensor are identical to the +// `params`. +bool CanOptimizeIdentityGatherNdOrScatterNdOp(Value params, + DenseIntElementsAttr indices) { + auto params_type = params.getType().dyn_cast(); + auto indices_type = indices.getType().dyn_cast(); + // Checks the shape of `params` is [n, ...], shape of `indices` is [n, 1]. 2D + // `indices` means it gets the first row of `params`. As long as indices + // iterate the first row of `params`, the output is identical to input. + if (!params_type || !indices_type || indices_type.getRank() != 2 || + indices_type.getDimSize(0) != params_type.getDimSize(0) || + indices_type.getDimSize(1) != 1) + return false; + + // Checks the value in `indices` is from 0 to n-1. + int cur_value = 0; + for (const auto &v : indices.getValues()) { + if (v.getSExtValue() != cur_value) return false; + ++cur_value; + } + + return true; +} + // Expand Attribute 'a' to 4D with all 1s except 1 dimension. // Which dimension depends on 'is_depthwise' is true or false. ElementsAttr ExpandTo4DForConvImpl(Attribute a, bool is_depthwise) { diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td index ef6706875c9..3c5fc7a0c5e 100644 --- a/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td +++ b/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td @@ -520,3 +520,19 @@ def OptimizePow2ToSquare : Pat< (TFL_PowOp $input, (ConstantOp ConstantAttr, "2.0f">)), (TFL_MulOp $input, $input, TFL_AF_None)>; + +def CanOptimizeIdentityGatherNdOrScatterNdOp : Constraint())">>; + +def OptimizeIdentityGatherNdOp : Pat< + (TFL_GatherNdOp $params, (ConstantOp I32ElementsAttr: $indices)), + (replaceWithValue $params), + [(CanOptimizeIdentityGatherNdOrScatterNdOp $params, $indices)]>; + +def OptimizeIdentityScatterNdOp : Pat< + (TFL_ScatterNdOp (ConstantOp I32ElementsAttr: $indices), $params, $ignored), + (replaceWithValue $params), + [(CanOptimizeIdentityGatherNdOrScatterNdOp $params, $indices)]>; + + From cf8e65f21206ed48f8c87e5bb17d2326d4414c7d Mon Sep 17 00:00:00 2001 From: Xiao Yu Date: Fri, 7 Aug 2020 15:23:18 -0700 Subject: [PATCH 2368/2522] Update the namespaces of TF-TFRT integration. All TF-TFRT integration code should under namespace tfrt::tf. PiperOrigin-RevId: 325519742 Change-Id: I71a2abd0d209956dfd3ecb6a146769b29048b14f --- tensorflow/c/eager/c_api.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 76d603694e3..fefa753c608 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -724,7 +724,7 @@ void TFE_DeleteContextOptions(TFE_ContextOptions* options) { delete options; } TFE_Context* TFE_NewContext(const TFE_ContextOptions* opts, TF_Status* status) { if (opts->use_tfrt) { #ifdef PLATFORM_GOOGLE - return tensorflow::wrap(new tfrt::ContextInterface(opts->async)); + return tensorflow::wrap(new tfrt::tf::ContextInterface(opts->async)); #else status->status = tensorflow::errors::Unimplemented("TFRT is not supported"); return nullptr; From 9091c70c38aad8adb1ee434e5b3c9fe61fdfd130 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 7 Aug 2020 15:27:00 -0700 Subject: [PATCH 2369/2522] Added Softmax to the estimator. PiperOrigin-RevId: 325520405 Change-Id: I6d783f69cec11d0e8c82ee9fd385f57d6b46eb39 --- .../costs/analytical_cost_estimator_test.cc | 3 +- .../grappler/costs/op_level_cost_estimator.cc | 28 +++++++++++++++++++ .../grappler/costs/op_level_cost_estimator.h | 1 + .../costs/op_level_cost_estimator_test.cc | 9 +++--- 4 files changed, 35 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc b/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc index e558558d00a..b23b657308d 100644 --- a/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc +++ b/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc @@ -102,14 +102,13 @@ TEST_F(AnalyticalCostEstimatorTest, SimpleTest) { Costs summary; TF_ASSERT_OK(estimator.PredictCosts(item.graph, &run_metadata, &summary)); - EXPECT_EQ(Costs::NanoSeconds(9157), summary.execution_time); + EXPECT_EQ(Costs::NanoSeconds(9158), summary.execution_time); // Note there are totally 17 nodes (RandomUniform creates 2 nodes), but // grappler will not process "label", therefore we have 15 here instead EXPECT_EQ(15, summary.num_ops_total); // Make this estimate accurate: // TODO(http://b/70031255): Accurate estimator for RandomUniform op needed - // TODO(http://b/70031363): Accurate estimator for Softmax needed // // Change to EXPECT_FALSE when the above TODOs are done: EXPECT_TRUE(summary.inaccurate); diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index d76ff4359c1..e148f6a61c8 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -95,6 +95,7 @@ constexpr char kFusedBatchNormGrad[] = "FusedBatchNormGrad"; constexpr char kQuantizedMatMul[] = "QuantizedMatMul"; constexpr char kQuantizedMatMulV2[] = "QuantizedMatMulV2"; constexpr char kUnpack[] = "Unpack"; +constexpr char kSoftmax[] = "Softmax"; // Dynamic control flow ops. constexpr char kSwitch[] = "Switch"; constexpr char kMerge[] = "Merge"; @@ -503,6 +504,8 @@ OpLevelCostEstimator::OpLevelCostEstimator() { device_cost_impl_.emplace( kFusedBatchNormGrad, wrap(&OpLevelCostEstimator::PredictFusedBatchNormGrad)); + device_cost_impl_.emplace(kSoftmax, + wrap(&OpLevelCostEstimator::PredictSoftmax)); device_cost_impl_.emplace( kAssignVariableOp, wrap(&OpLevelCostEstimator::PredictAssignVariableOps)); device_cost_impl_.emplace( @@ -2287,5 +2290,30 @@ Costs OpLevelCostEstimator::PredictNaryOp(const OpContext& op_context) const { costs.num_ops_with_unknown_shapes = found_unknown_shapes; return costs; } + +// softmax[i, j] = exp(logits[i, j]) / sum_j(exp(logits[i, j])) +Costs OpLevelCostEstimator::PredictSoftmax(const OpContext& op_context) const { + bool found_unknown_shapes = false; + const int64 logits_size = CalculateTensorElementCount( + op_context.op_info.inputs(0), &found_unknown_shapes); + TensorShapeProto logits_shape = MaybeGetMinimumShape( + op_context.op_info.inputs(0).shape(), 2, &found_unknown_shapes); + +#define EIGEN_COST(X) Eigen::internal::functor_traits::Cost + + // Every element of will be exponentiated, have that result included + // in a sum across j, and also have that result multiplied by the reciprocal + // of the sum_j. In addition, we'll compute 1/sum_j for every i. + auto ops = + (EIGEN_COST(scalar_exp_op) + EIGEN_COST(scalar_sum_op) + + EIGEN_COST(scalar_product_op)) * + logits_size + + EIGEN_COST(scalar_inverse_op) * logits_shape.dim(0).size(); + +#undef EIGEN_COST + + return PredictOpCountBasedCost(ops, op_context.op_info); +} + } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.h b/tensorflow/core/grappler/costs/op_level_cost_estimator.h index f44f4ee19e5..be0d7f76621 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.h +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.h @@ -88,6 +88,7 @@ class OpLevelCostEstimator { Costs PredictEinsum(const OpContext& op_context) const; Costs PredictAssignVariableOps(const OpContext& op_context) const; Costs PredictPureMemoryOp(const OpContext& op_context) const; + Costs PredictSoftmax(const OpContext& op_context) const; // Generic cost prediction method for fused operations. Costs PredictFusedOp(const OpContext& op_context, diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc index c5209753a90..5ddefdc9602 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc @@ -966,8 +966,9 @@ TEST_F(OpLevelCostEstimatorTest, SquaredDifferenceExecutionTime) { TEST_F(OpLevelCostEstimatorTest, UnaryOpExecutionTime) { std::vector> unary_ops = { - {"All", 1}, {"ArgMax", 1}, {"Cast", 1}, {"Max", 1}, {"Min", 1}, - {"Prod", 1}, {"Relu", 1}, {"Relu6", 1}, {"Sum", 1}, {"TopKV2", 1}}; + {"All", 1}, {"ArgMax", 1}, {"Cast", 1}, {"Max", 1}, + {"Min", 1}, {"Prod", 1}, {"Relu", 1}, {"Relu6", 1}, + {"Softmax", 43}, {"Sum", 1}, {"TopKV2", 1}}; const int kTensorSize = 1000; for (auto unary_op : unary_ops) { @@ -980,7 +981,8 @@ TEST_F(OpLevelCostEstimatorTest, UnaryOpExecutionTime) { auto cost = PredictCosts(op_context); EXPECT_EQ(cost.memory_time, Costs::Duration(kExpectedMemoryTime)); - EXPECT_EQ(cost.compute_time, Costs::Duration(expected_compute_time)); + EXPECT_EQ(cost.compute_time, Costs::Duration(expected_compute_time)) + << unary_op.first; EXPECT_EQ(cost.execution_time, Costs::Duration(expected_compute_time + kExpectedMemoryTime)); EXPECT_EQ(cost.num_ops_total, 1); @@ -1972,6 +1974,5 @@ TEST_F(OpLevelCostEstimatorTest, PureMemoryOpExecutionTime) { EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } } - } // end namespace grappler } // end namespace tensorflow From 6848f640974107c40ce0d53a233dffee122b2a85 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Fri, 7 Aug 2020 15:41:10 -0700 Subject: [PATCH 2370/2522] Run DimensionSizeRewriter and ZeroSizedHloElimination as part of the GPU simplification fixed-point pass pipeline. Looks like there was a typo here and we were running these two passes only once, rather than as part of the fixed-point pass pipeline. While we're here, fix a comment in algebraic_simplifier as well. PiperOrigin-RevId: 325522950 Change-Id: Ic716b4831c7ac5b96234d496b0890c3b312528b7 --- tensorflow/compiler/xla/service/algebraic_simplifier.cc | 4 ++-- tensorflow/compiler/xla/service/gpu/gpu_compiler.cc | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index c793c4958a2..fa4d0e47a5d 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -4161,8 +4161,8 @@ Status AlgebraicSimplifierVisitor::HandleDynamicSlice( return ReplaceWithNewInstruction(dynamic_slice, std::move(new_broadcast)); } - // Convert a dynamic slice into a slice if all offsets are constant and the - // operand is not constant. If ev + // Convert a dynamic slice into a slice if all offsets are constant and the + // operand is not constant. if (operand->opcode() != HloOpcode::kConstant && absl::c_all_of(absl::MakeSpan(dynamic_slice->operands().begin() + 1, dynamic_slice->operands().end()), diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index f2d29b5d11f..6d441903b25 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -190,11 +190,11 @@ Status GpuCompiler::OptimizeHloModule( /*layout_sensitive=*/false, /*allow_mixed_precision=*/false); - pipeline.AddPass(); + pass.AddPass(); // BatchNormExpander can create zero-sized ops, so zero-sized HLO // elimination has to come after that pass. - pipeline.AddPass(); + pass.AddPass(); AlgebraicSimplifierOptions options; // When transposes appear in a fusion node, we can easily adjust the From f6cfee3dff8fbc2ab94667cf54545fadab378f0f Mon Sep 17 00:00:00 2001 From: Reed Wanderman-Milne Date: Fri, 7 Aug 2020 15:41:34 -0700 Subject: [PATCH 2371/2522] Fix Windows cwise_ops_test.py bfloat16 failure. Test was broken by https://github.com/tensorflow/tensorflow/commit/696a4a76cebdd9150ede235a6edfd223a64e7129 PiperOrigin-RevId: 325523032 Change-Id: Ieabe77e2cf53e4e4e81530ededbb55a4b3120c6a --- tensorflow/python/kernel_tests/cwise_ops_test.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py index 8d628d448db..a7d8f841401 100644 --- a/tensorflow/python/kernel_tests/cwise_ops_test.py +++ b/tensorflow/python/kernel_tests/cwise_ops_test.py @@ -840,14 +840,16 @@ class MathOpsOverloadTest(test.TestCase): return self.evaluate(z) def _compareBinary(self, x, y, dtype, np_func, tf_func): - np_ans = np_func(x, y).astype(dtype.as_numpy_dtype) - if dtype == dtypes_lib.bfloat16: - # assertAllClose does not properly handle bfloat16 values - np_ans = np_ans.astype(np.float32) + # astype and assertAllClose do not properly handle bfloat16 values + np_ans = np_func(x, y).astype(np.float32 if dtype == dtypes_lib.bfloat16 + else dtype.as_numpy_dtype) + rtol = 1e-2 if dtype == dtypes_lib.bfloat16 else 1e-6 self.assertAllClose(np_ans, - self._computeTensorAndLiteral(x, y, dtype, tf_func)) + self._computeTensorAndLiteral(x, y, dtype, tf_func), + rtol=rtol) self.assertAllClose(np_ans, - self._computeLiteralAndTensor(x, y, dtype, tf_func)) + self._computeLiteralAndTensor(x, y, dtype, tf_func), + rtol=rtol) def _compareUnary(self, x, dtype, np_func, tf_func): np_ans = np_func(x).astype(dtype.as_numpy_dtype) From 0d2f665129f15e45384dc455afecdaf2ac9b256b Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Fri, 7 Aug 2020 15:58:52 -0700 Subject: [PATCH 2372/2522] Add APIs for Buffer Linearization Support in TPUs PiperOrigin-RevId: 325525811 Change-Id: I964daa0ba93bba5707c4714ac8de3564bba58c06 --- tensorflow/core/tpu/tpu_library_init_fns.inc | 2 ++ tensorflow/stream_executor/tpu/tpu_executor_c_api.h | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index bc93b737eb5..be9d594685e 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -161,6 +161,8 @@ tensorflow::Status SetExecutorStructFn(void* library_handle) { TFTPU_SET_FN(executor_fn, TpuTransferManager_TransferLiteralFromDevice); TFTPU_SET_FN(executor_fn, TpuTransferManager_GetByteSizeRequirement); TFTPU_SET_FN(executor_fn, TpuTransferManager_WriteSingleTupleIndexTable); + TFTPU_SET_FN(executor_fn, TpuTransferManager_LinearizeToBuffers); + TFTPU_SET_FN(executor_fn, TpuTransferManager_FreeBuffers); TFTPU_SET_FN(executor_fn, TpuComputationPlacer_New); TFTPU_SET_FN(executor_fn, TpuComputationPlacer_Free); diff --git a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h index a67fc9ddf61..2b66c2ce4c5 100644 --- a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h +++ b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h @@ -182,6 +182,11 @@ void TpuTransferManager_WriteSingleTupleIndexTable( XLA_TransferManager* manager, SE_Stream* stream, SE_DeviceMemoryBase* elements, size_t elements_len, XLA_Shape* shape, SE_DeviceMemoryBase* region, SE_Status* status); +void TpuTransferManager_LinearizeToBuffers( + XLA_TransferManager* manager, XLA_Literal* c_literal, char*** buffers_array, + int64_t** buffers_size, int64_t* buffers_array_size, SE_Status* status); +void TpuTransferManager_FreeBuffers(char** buffers_array, int64_t* buffers_size, + int64_t buffers_array_size); XLA_ComputationPlacer* TpuComputationPlacer_New(); void TpuComputationPlacer_Free(XLA_ComputationPlacer* placer); @@ -336,6 +341,8 @@ struct TfTpu_ExecutorApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralFromDevice); TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_GetByteSizeRequirement); TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_WriteSingleTupleIndexTable); + TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_LinearizeToBuffers); + TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_FreeBuffers); TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_New); TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_Free); From 6e909825ed44655636170c739d43b6030c742201 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Fri, 7 Aug 2020 16:01:44 -0700 Subject: [PATCH 2373/2522] Expand simple gathers into dynamic-slice. Especially for (GPU) fusion, XLA prefers to call a slice a slice. PiperOrigin-RevId: 325526316 Change-Id: I12b98756eca017d520a9a40d03dc291a42b9eaa3 --- tensorflow/compiler/xla/service/BUILD | 1 + tensorflow/compiler/xla/service/cpu/BUILD | 1 + .../compiler/xla/service/cpu/cpu_compiler.cc | 2 + .../compiler/xla/service/gather_expander.cc | 31 +++++++--- .../compiler/xla/service/gather_expander.h | 27 +++++++- .../xla/service/gather_expander_test.cc | 62 ++++++++++++++++++- tensorflow/compiler/xla/service/gpu/BUILD | 1 + .../compiler/xla/service/gpu/gpu_compiler.cc | 3 + 8 files changed, 114 insertions(+), 14 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 49431b19a69..bfcdf6fae34 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -2259,6 +2259,7 @@ tf_cc_test( srcs = ["gather_expander_test.cc"], deps = [ ":gather_expander", + ":hlo_query", "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:test_macros_header", diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 6eaf43902fe..e0317574e59 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -183,6 +183,7 @@ cc_library( "//tensorflow/compiler/xla/service:hlo_verifier", "//tensorflow/compiler/xla/service:indexed_array_analysis", "//tensorflow/compiler/xla/service:llvm_compiler", + "//tensorflow/compiler/xla/service:gather_expander", "//tensorflow/compiler/xla/service:reshape_mover", "//tensorflow/compiler/xla/service:rng_expander", "//tensorflow/compiler/xla/service:sort_simplifier", diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index 0826d7b8ce1..eb5d9e704f5 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -77,6 +77,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/dynamic_index_splitter.h" #include "tensorflow/compiler/xla/service/dynamic_padder.h" #include "tensorflow/compiler/xla/service/flatten_call_graph.h" +#include "tensorflow/compiler/xla/service/gather_expander.h" #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_constant_folding.h" @@ -303,6 +304,7 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn( pass.AddPass(options); pass.AddPass(); pass.AddPass(); + pass.AddPass(GatherExpander::kEliminateSimpleGathers); // BatchNormExpander can create zero-sized ops, so zero-sized HLO // elimination has to come after that pass. diff --git a/tensorflow/compiler/xla/service/gather_expander.cc b/tensorflow/compiler/xla/service/gather_expander.cc index 1838f65e6ea..d38873a501d 100644 --- a/tensorflow/compiler/xla/service/gather_expander.cc +++ b/tensorflow/compiler/xla/service/gather_expander.cc @@ -269,6 +269,22 @@ static StatusOr PermuteBatchAndOffsetDims( return MakeTransposeHlo(accumulator, permutation); } +// Computes how many trips a loop implementing this gather op would take. +static int64 GatherLoopTripCount(HloInstruction* gather_instr) { + HloInstruction* start_indices = gather_instr->mutable_operand(1); + const Shape& start_indices_shape = start_indices->shape(); + const GatherDimensionNumbers& dim_numbers = + gather_instr->gather_dimension_numbers(); + + int64 trip_count = 1; + for (int64 i = 0, e = start_indices_shape.dimensions_size(); i < e; i++) { + if (i != dim_numbers.index_vector_dim()) { + trip_count *= start_indices_shape.dimensions(i); + } + } + return trip_count; +} + // High Level Algorithm // // We follow the following steps in sequence: @@ -311,20 +327,13 @@ StatusOr GatherExpander::ExpandInstruction( HloComputation* computation = gather_instr->parent(); HloInstruction* operand = gather_instr->mutable_operand(0); HloInstruction* start_indices = gather_instr->mutable_operand(1); - const Shape& start_indices_shape = start_indices->shape(); const Shape& output_shape = gather_instr->shape(); int64 output_rank = output_shape.dimensions_size(); const GatherDimensionNumbers& dim_numbers = gather_instr->gather_dimension_numbers(); - int64 gather_loop_trip_count = 1; - for (int64 i = 0, e = start_indices_shape.dimensions_size(); i < e; i++) { - if (i != dim_numbers.index_vector_dim()) { - gather_loop_trip_count *= start_indices_shape.dimensions(i); - } - } - + int64 gather_loop_trip_count = GatherLoopTripCount(gather_instr); if (!IsInt32(gather_loop_trip_count)) { return Unimplemented( "Gather operations with more than 2147483647 gather indices are not " @@ -373,7 +382,11 @@ bool GatherExpander::InstructionMatchesPattern(HloInstruction* inst) { return inst->opcode() == HloOpcode::kGather && // Avoid expanding gather ops that produce zero sized tensors, // instead punt these to ZeroSizedHloElimination. - !ShapeUtil::IsZeroElementArray(inst->shape()); + !ShapeUtil::IsZeroElementArray(inst->shape()) && + // In kEliminateSimpleGathers mode, we only simplify instructions + // which can be represented without a loop -- i.e. we only simplify + // gathers which have a trip count of 1. + (mode_ == kEliminateAllGathers || GatherLoopTripCount(inst) == 1); } } // namespace xla diff --git a/tensorflow/compiler/xla/service/gather_expander.h b/tensorflow/compiler/xla/service/gather_expander.h index 5625a37cb46..e665fcd713c 100644 --- a/tensorflow/compiler/xla/service/gather_expander.h +++ b/tensorflow/compiler/xla/service/gather_expander.h @@ -21,10 +21,30 @@ limitations under the License. namespace xla { // This pass rewrites gather operations into (roughly) while loops of dynamic -// slices. This lets backends that don't support gather directly to -// nevertheless have a minimum level of support. +// slices. +// +// This pass can be used two ways: +// +// - kEliminateAllGathers: For backends that don't support gather, this pass +// can convert every gather to a loop. +// +// - kEliminateSimpleGathers: For backends that *do* support gather, this pass +// can strength-reduce "simple" gathers -- specifically, gathers that can be +// represented without a loop -- to dyanmic-slices. +// +// Note that even in kEliminateSimpleGathers mode, this pass may still expand a +// gather into a loop (with a trip-count of 1). It's up to other simplification +// passes to remove the loop. +// class GatherExpander : public OpExpanderPass { public: + enum Mode { + kEliminateAllGathers, + kEliminateSimpleGathers, + }; + + explicit GatherExpander(Mode m) : mode_(m) {} + absl::string_view name() const override { return "gather_expander"; } protected: @@ -32,6 +52,9 @@ class GatherExpander : public OpExpanderPass { StatusOr ExpandInstruction( HloInstruction* gather_inst) override; + + private: + Mode mode_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/gather_expander_test.cc b/tensorflow/compiler/xla/service/gather_expander_test.cc index 706327091d9..4b0808e9aaf 100644 --- a/tensorflow/compiler/xla/service/gather_expander_test.cc +++ b/tensorflow/compiler/xla/service/gather_expander_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gather_expander.h" +#include "tensorflow/compiler/xla/service/hlo_query.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" #include "tensorflow/compiler/xla/tests/test_macros.h" @@ -42,7 +43,9 @@ ENTRY main { TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ParseAndReturnVerifiedModule(hlo_text)); - Status status = GatherExpander{}.Run(module.get()).status(); + Status status = GatherExpander{GatherExpander::kEliminateAllGathers} + .Run(module.get()) + .status(); EXPECT_EQ(status.code(), tensorflow::error::UNIMPLEMENTED); ASSERT_THAT( @@ -68,7 +71,9 @@ ENTRY main { )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ParseAndReturnVerifiedModule(hlo_text)); - TF_ASSERT_OK_AND_ASSIGN(bool changed, GatherExpander{}.Run(module.get())); + TF_ASSERT_OK_AND_ASSIGN( + bool changed, + GatherExpander{GatherExpander::kEliminateAllGathers}.Run(module.get())); ASSERT_TRUE(changed); HloInstruction* while_instr = nullptr; @@ -129,7 +134,9 @@ ENTRY main { OpMetadata metadata; metadata.set_op_name("Gather"); module->entry_computation()->root_instruction()->set_metadata(metadata); - TF_ASSERT_OK_AND_ASSIGN(bool changed, GatherExpander{}.Run(module.get())); + TF_ASSERT_OK_AND_ASSIGN( + bool changed, + GatherExpander{GatherExpander::kEliminateAllGathers}.Run(module.get())); ASSERT_TRUE(changed); HloInstruction* while_instr = nullptr; @@ -147,5 +154,54 @@ ENTRY main { "after gather expansion"; EXPECT_EQ(while_instr->metadata().op_name(), "Gather"); } + +TEST_F(GatherExpanderTest, EliminateSimpleGathersSkipsNontrivialGather) { + const string hlo_text = R"( +HloModule TensorFlowGatherV1 + +ENTRY main { + operand = s32[3,3] parameter(0) + indices = s32[2] parameter(1) + ROOT gather = s32[2,3] gather(operand, indices), + offset_dims={1}, + collapsed_slice_dims={0}, + start_index_map={0}, + index_vector_dim=1, + slice_sizes={1, 3} +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseAndReturnVerifiedModule(hlo_text)); + GatherExpander pass(GatherExpander::kEliminateSimpleGathers); + TF_ASSERT_OK_AND_ASSIGN(bool changed, RunHloPass(&pass, module.get())); + ASSERT_FALSE(changed); +} + +TEST_F(GatherExpanderTest, EliminateSimpleGathersRewritesTrivialGather) { + const string hlo_text = R"( +HloModule test + +ENTRY main { + operand = s32[100] parameter(0) + indices = s32[1] parameter(1) + ROOT gather = s32[10] gather(operand, indices), + offset_dims={0}, + collapsed_slice_dims={}, + start_index_map={0}, + index_vector_dim=0, + slice_sizes={10} +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseAndReturnVerifiedModule(hlo_text)); + GatherExpander pass(GatherExpander::kEliminateAllGathers); + TF_ASSERT_OK_AND_ASSIGN(bool changed, RunHloPass(&pass, module.get())); + ASSERT_TRUE(changed); + ASSERT_FALSE(hlo_query::ContainsInstrWithOpcode(module->entry_computation(), + {HloOpcode::kGather})); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 8dfd73e9a6a..47af5756f87 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -1177,6 +1177,7 @@ cc_library( "//tensorflow/compiler/xla/service:dynamic_padder", "//tensorflow/compiler/xla/service:executable", "//tensorflow/compiler/xla/service:flatten_call_graph", + "//tensorflow/compiler/xla/service:gather_expander", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_constant_folding", "//tensorflow/compiler/xla/service:hlo_cse", diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 6d441903b25..225fa328f3d 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -43,6 +43,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/dynamic_index_splitter.h" #include "tensorflow/compiler/xla/service/dynamic_padder.h" #include "tensorflow/compiler/xla/service/flatten_call_graph.h" +#include "tensorflow/compiler/xla/service/gather_expander.h" #include "tensorflow/compiler/xla/service/gpu/alias_passthrough_params.h" #include "tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.h" #include "tensorflow/compiler/xla/service/gpu/fusion_merger.h" @@ -196,6 +197,8 @@ Status GpuCompiler::OptimizeHloModule( // elimination has to come after that pass. pass.AddPass(); + pass.AddPass(GatherExpander::kEliminateSimpleGathers); + AlgebraicSimplifierOptions options; // When transposes appear in a fusion node, we can easily adjust the // multi-dimensional index to create the one needed for the operand. This From a8dad139b6a7ecce56cfad1a2838e31d4870de1e Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Fri, 7 Aug 2020 16:02:09 -0700 Subject: [PATCH 2374/2522] Avoid setting the leading dimension in TensorList slice to dynamic. The first dimension (which is always 1) has to be static. PiperOrigin-RevId: 325526392 Change-Id: I073d01b35267b9c5f3e7fbd4986cd5bdd3151d67 --- tensorflow/compiler/tf2xla/kernels/tensor_list_utils.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/tf2xla/kernels/tensor_list_utils.cc b/tensorflow/compiler/tf2xla/kernels/tensor_list_utils.cc index aa71e4d4364..0e367e10ec4 100644 --- a/tensorflow/compiler/tf2xla/kernels/tensor_list_utils.cc +++ b/tensorflow/compiler/tf2xla/kernels/tensor_list_utils.cc @@ -504,7 +504,9 @@ Status ExecuteTensorListGetItem(xla::XlaOp list, xla::XlaOp index, xla::XlaOp list_part = xla::GetTupleElement(list, 0); xla::XlaOp read = xla::DynamicSlice(list_part, start_indices, slice_shape); - for (int64 i = 0; i < buffer_shape.dimensions_size(); ++i) { + // Propagate dynamic dimensions from buffer to the sliced buffer, except for + // leading dimension (which is always static 1). + for (int64 i = 1; i < buffer_shape.dimensions_size(); ++i) { if (buffer_shape.is_dynamic_dimension(i)) { auto buffer = xla::GetTupleElement(list, 0); auto gds = xla::GetDimensionSize(buffer, i); From d1e617ded2a7b172c89529b65a646eb7c9a86d37 Mon Sep 17 00:00:00 2001 From: Kuangyuan Chen Date: Fri, 7 Aug 2020 16:14:53 -0700 Subject: [PATCH 2375/2522] Pass `upgrade_legacy` cmd line flag to signaturedef importer in tf-mlir-translate. PiperOrigin-RevId: 325528613 Change-Id: I43585f35c9a7038bd0c094d3ffb68df8103a3a9f --- tensorflow/compiler/mlir/tf_mlir_translate_main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/mlir/tf_mlir_translate_main.cc b/tensorflow/compiler/mlir/tf_mlir_translate_main.cc index 8cfdfd01120..caac8ea1eeb 100644 --- a/tensorflow/compiler/mlir/tf_mlir_translate_main.cc +++ b/tensorflow/compiler/mlir/tf_mlir_translate_main.cc @@ -121,7 +121,7 @@ int main(int argc, char** argv) { mlir::MLIRContext context; auto module_or = tensorflow::SavedModelSignatureDefsToMlirImport( - input_filename, tags, exported_names, &context); + input_filename, tags, exported_names, &context, upgrade_legacy); if (!module_or.status().ok()) return 1; module_or.ConsumeValueOrDie()->print(output->os()); From ff457d4d01479627a443c4999b0726dc8bbf9d7f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 7 Aug 2020 16:29:36 -0700 Subject: [PATCH 2376/2522] Add bzl_library rules for .bzl files without one. PiperOrigin-RevId: 325530970 Change-Id: Ia5f631fc056830ec36de176143e1ee58a6284571 --- tensorflow/lite/micro/BUILD | 10 +++++++++- tensorflow/lite/micro/testing/BUILD | 8 ++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/micro/BUILD b/tensorflow/lite/micro/BUILD index 9b3d0d623cc..7cec8584413 100644 --- a/tensorflow/lite/micro/BUILD +++ b/tensorflow/lite/micro/BUILD @@ -1,3 +1,4 @@ +load("@bazel_skylib//:bzl_library.bzl", "bzl_library") load( "//tensorflow/lite/micro/testing:micro_test.bzl", "tflite_micro_cc_test", @@ -43,13 +44,13 @@ cc_library( deps = [ ":memory_helpers", ":micro_compatibility", + ":micro_profiler", ":op_resolvers", "//tensorflow/lite:type_to_tflitetype", "//tensorflow/lite/c:common", "//tensorflow/lite/core/api", "//tensorflow/lite/kernels/internal:compatibility", "//tensorflow/lite/kernels/internal:tensor", - "//tensorflow/lite/micro:micro_profiler", "//tensorflow/lite/micro/memory_planner", "//tensorflow/lite/micro/memory_planner:greedy_memory_planner", "//tensorflow/lite/schema:schema_fbs", @@ -379,3 +380,10 @@ tflite_micro_cc_test( "//tensorflow/lite/micro/testing:test_conv_model", ], ) + +bzl_library( + name = "build_def_bzl", + srcs = ["build_def.bzl"], + visibility = [":micro"], + deps = ["//tensorflow:tensorflow_bzl"], +) diff --git a/tensorflow/lite/micro/testing/BUILD b/tensorflow/lite/micro/testing/BUILD index 6f4b2502f4a..207d500c53d 100644 --- a/tensorflow/lite/micro/testing/BUILD +++ b/tensorflow/lite/micro/testing/BUILD @@ -1,3 +1,4 @@ +load("@bazel_skylib//:bzl_library.bzl", "bzl_library") load( "//tensorflow/lite/micro/testing:micro_test.bzl", "tflite_micro_cc_test", @@ -78,3 +79,10 @@ py_binary( "@absl_py//absl:app", ], ) + +bzl_library( + name = "micro_test_bzl", + srcs = ["micro_test.bzl"], + visibility = ["//visibility:private"], + deps = ["//tensorflow/lite/micro:build_def_bzl"], +) From 2e3e2bb33559bef5b76cb1e5bc745a75488efaff Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Fri, 7 Aug 2020 16:36:02 -0700 Subject: [PATCH 2377/2522] [XLA] Use dynamism inference to infer dynamic dimensions for reshape. - Introduce dynamism inference function in xla builder, which tells if a value is dynamic or static. - Use dynamism inference to infer whether an input to reshape's dimensions is dynamic. - This removes the "-1" hack I made before in the bridge, makes the code cleaner. Plus it can support more complex cases dynamic reshape when the dimension comes from a series of transformations. PiperOrigin-RevId: 325532056 Change-Id: Icc5bad39a857be77537e4736dd6863b833e2fe9d --- .../compiler/tf2xla/kernels/reshape_op.cc | 38 +-- tensorflow/compiler/tf2xla/xla_expression.cc | 42 ++++ tensorflow/compiler/tf2xla/xla_expression.h | 4 + tensorflow/compiler/tf2xla/xla_op_kernel.cc | 42 ++++ tensorflow/compiler/tf2xla/xla_op_kernel.h | 3 + tensorflow/compiler/xla/client/xla_builder.cc | 238 ++++++++++++++++++ tensorflow/compiler/xla/client/xla_builder.h | 25 ++ .../service/dynamic_dimension_inference.cc | 3 +- tensorflow/compiler/xla/shape_util.cc | 15 +- tensorflow/compiler/xla/tests/BUILD | 25 ++ .../xla/tests/dynamism_inference_test.cc | 215 ++++++++++++++++ 11 files changed, 630 insertions(+), 20 deletions(-) create mode 100644 tensorflow/compiler/xla/tests/dynamism_inference_test.cc diff --git a/tensorflow/compiler/tf2xla/kernels/reshape_op.cc b/tensorflow/compiler/tf2xla/kernels/reshape_op.cc index bf9a9150ea6..a85ba547179 100644 --- a/tensorflow/compiler/tf2xla/kernels/reshape_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/reshape_op.cc @@ -109,27 +109,33 @@ class ReshapeOp : public XlaOpKernel { VLOG(2) << "Reshape from " << input_shape.DebugString() << " to " << shape.DebugString() << ", unknown_index=" << unknown_index; - shape_input.clear(); - // Run get input again, this time with dynamic dimension represented as - // "-1" - ctx->set_dynamic_dimension_is_minus_one(true); - OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntVector(1, &shape_input)); - int dynamic_dimension = -1; - - for (int d = 0; d < num_dims; ++d) { - const int32 size = shape_input[d]; - if (size == -1) { - if (dynamic_dimension == -1) { + if (ctx->InputXlaShape(0)->is_dynamic()) { + std::vector dynamic_dims; + OP_REQUIRES_OK(ctx, + ctx->ResolveInputDynamismIntoPredVector(1, &dynamic_dims)); + for (int d = 0; d < num_dims; ++d) { + const bool dim_is_dynamic = dynamic_dims[d]; + if (dim_is_dynamic) { dynamic_dimension = d; - } else { - if (unknown_index != d) { - dynamic_dimension = d; - } } } - } + // When reshaping from dynamic dimension, unkwown index is considered + // dynamic. E.g., + // [<=10] + // | + // Reshape + // | + // [2, -1] + // The second dimension is dynamic. + if (dynamic_dimension == -1) { + dynamic_dimension = unknown_index; + } + VLOG(2) << "Reshape from " << ctx->InputXlaShape(0)->ToString() << " to " + << xla::VectorString(shape.dim_sizes()) + << ", dynamic_dim=" << dynamic_dimension; + } // Pass unknown_index to Xla::Reshape as a hint for dynamic shape inference // in XLA to know which output dimension is dynamic. ctx->SetOutput(0, xla::ReshapeWithInferredDimension( diff --git a/tensorflow/compiler/tf2xla/xla_expression.cc b/tensorflow/compiler/tf2xla/xla_expression.cc index 34e108bb6bf..f0cc8d26709 100644 --- a/tensorflow/compiler/tf2xla/xla_expression.cc +++ b/tensorflow/compiler/tf2xla/xla_expression.cc @@ -101,6 +101,48 @@ xla::XlaOp XlaExpression::AsXlaOp(xla::XlaBuilder* builder) const { }); } +xla::StatusOr XlaExpression::ResolveDynamism( + xla::Client* client) const { + switch (kind()) { + case Kind::kConstant: { + // Constant values are considered static. + Tensor constant_false(DT_BOOL, constant_value().shape()); + auto flat = constant_false.flat(); + for (int64 i = 0; i < flat.size(); ++i) flat(i) = false; + return constant_false; + } + case Kind::kXlaOp: + break; + case Kind::kTensorList: + TF_FALLTHROUGH_INTENDED; + case Kind::kResource: + TF_FALLTHROUGH_INTENDED; + case Kind::kInvalid: + return errors::InvalidArgument( + "ResolveDynamism called on unsupported XlaExpression: ", + HumanString()); + } + + if (!client) + return errors::InvalidArgument("client is required to resolve constant"); + + TF_ASSIGN_OR_RETURN(xla::XlaComputation constant_graph, + handle().builder()->BuildDynamicInferenceGraph(handle())); + + TF_ASSIGN_OR_RETURN(TensorShape shape, GetShape()); + + // The XLA layout is specified minor to major, and TensorFlow uses a major to + // minor order. + std::vector layout_indices(shape.dims()); + std::iota(layout_indices.rbegin(), layout_indices.rend(), 0); + xla::Layout layout = xla::LayoutUtil::MakeLayout(layout_indices); + TF_ASSIGN_OR_RETURN(xla::Literal literal, + client->ComputeConstant(constant_graph, &layout)); + Tensor tensor(DT_BOOL); + TF_RETURN_IF_ERROR(LiteralToHostTensor(literal, DT_BOOL, &tensor)); + return tensor; +} + xla::StatusOr> XlaExpression::ResolveConstant( xla::Client* client, bool dynamic_dimension_is_minus_one) const { switch (kind()) { diff --git a/tensorflow/compiler/tf2xla/xla_expression.h b/tensorflow/compiler/tf2xla/xla_expression.h index 3010964c5b7..3546368ff7b 100644 --- a/tensorflow/compiler/tf2xla/xla_expression.h +++ b/tensorflow/compiler/tf2xla/xla_expression.h @@ -99,6 +99,10 @@ class XlaExpression { xla::StatusOr> ResolveConstant( xla::Client* client, bool dynamic_dimension_is_minus_one = false) const; + // ResolveDynamism computes where a value inside this op is dynamic or can be + // inferred at compile time. + xla::StatusOr ResolveDynamism(xla::Client* client) const; + // Returns the shape of the tensor. // The shape of a resource is the shape of a resource handle (i.e., a scalar), // not the shape of the resource's value. diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc index 735a6c7291e..07537546d52 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc @@ -243,6 +243,48 @@ Status XlaOpKernelContext::ConstantInputAsFloatScalar(int index, double* out) { return LiteralToFloat64Scalar(literal, out); } +static Status LiteralToPredVector(const xla::LiteralSlice& literal, + std::vector* out) { + if (literal.shape().rank() != 1) { + return errors::InvalidArgument("value is not 1D, rank: ", + literal.shape().rank()); + } + int64 size = xla::ShapeUtil::ElementsIn(literal.shape()); + if (literal.shape().element_type() != xla::PRED) { + return errors::InvalidArgument("value is not PRED"); + } + for (int64 i = 0; i < size; ++i) { + out->push_back(literal.Get({i})); + } + return Status::OK(); +} + +Status XlaOpKernelContext::ResolveInputDynamismIntoPredVector( + int index, std::vector* out) { + xla::Literal literal; + XlaExpression e = InputExpression(index); + auto* client = compiler() ? compiler()->client() : nullptr; + xla::StatusOr dynamism_or_status = e.ResolveDynamism(client); + if (!dynamism_or_status.ok()) { + Status status = dynamism_or_status.status(); + errors::AppendToMessage(&status, "while evaluating input dynamism", index, + " of ", context_->op_kernel().type_string()); + return status; + } + Tensor dynamism = dynamism_or_status.ValueOrDie(); + + Tensor temp(dynamism.dtype()); + TensorShape tensor_shape({InputShape(index).num_elements()}); + if (!temp.CopyFrom(dynamism, tensor_shape)) { + return errors::InvalidArgument( + context_->op_kernel().name(), " input ", index, " has shape ", + dynamism.shape().DebugString(), " which is not a R1 ", tensor_shape); + } + + TF_ASSIGN_OR_RETURN(literal, HostTensorToLiteral(temp)); + return LiteralToPredVector(literal, out); +} + // Converts an int32 or int64 1D literal to an int64 vector. static Status LiteralToInt64Vector(const xla::LiteralSlice& literal, std::vector* out) { diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.h b/tensorflow/compiler/tf2xla/xla_op_kernel.h index 3cf51e6ec6f..75c3e60171a 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.h +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.h @@ -116,6 +116,9 @@ class XlaOpKernelContext { // returns a one-element list. Status InputList(absl::string_view name, std::vector* handles, std::vector* shapes); + // Evaluates input and returns their dynamism vector in a vector of + // predicates. + Status ResolveInputDynamismIntoPredVector(int index, std::vector* out); // Helper methods for constant inputs. diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index 484fb0aabe7..8de8216c005 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -32,6 +32,7 @@ limitations under the License. #include "tensorflow/compiler/xla/client/xla_computation.h" #include "tensorflow/compiler/xla/comparison_util.h" #include "tensorflow/compiler/xla/execution_options_util.h" +#include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" @@ -39,6 +40,7 @@ limitations under the License. #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/errors.h" namespace xla { @@ -71,6 +73,52 @@ void SetProtoIdAndName(T* entry, const string& base_name, char separator, entry->set_id(id); entry->set_name(GetFullName(base_name, separator, id)); } + +ShapeProto ConvertShapeProtoToPred(const ShapeProto& shape_proto) { + return ShapeUtil::ChangeElementType(Shape(shape_proto), PRED).ToProto(); +} + +HloInstructionProto CreateConstantInstruction(int64 id, const Shape& shape, + bool pred) { + HloInstructionProto const_instr; + Literal literal = LiteralUtil::CreateR0(pred); + Literal literal_broadcast = literal.Broadcast(shape, {}).ValueOrDie(); + *const_instr.mutable_shape() = shape.ToProto(); + *const_instr.mutable_literal() = literal_broadcast.ToProto(); + *const_instr.mutable_opcode() = HloOpcodeString(HloOpcode::kConstant); + const_instr.set_id(id); + return const_instr; +} + +// Converts a HloComputation into ReducerOr with predicate types. +HloComputationProto CreateReduceOr(int64 reducer_id, + HloComputationProto* original_reducer) { + HloComputationProto reducer; + SetProtoIdAndName(&reducer, StrCat("reduce_or"), kNameSeparator, reducer_id); + std::vector operands_id; + for (auto& inst : original_reducer->instructions()) { + // Copy params. + if (StringToHloOpcode(inst.opcode()).ValueOrDie() == + HloOpcode::kParameter) { + HloInstructionProto* new_param = reducer.add_instructions(); + *new_param = inst; + *new_param->mutable_shape() = ConvertShapeProtoToPred(inst.shape()); + operands_id.push_back(inst.id()); + } + if (inst.id() == original_reducer->root_id()) { + HloInstructionProto* new_root = reducer.add_instructions(); + *new_root = inst; + *new_root->mutable_shape() = ConvertShapeProtoToPred(inst.shape()); + *new_root->mutable_opcode() = HloOpcodeString(HloOpcode::kOr); + new_root->clear_operand_ids(); + for (int64 operand_id : operands_id) { + new_root->add_operand_ids(operand_id); + } + reducer.set_root_id(inst.id()); + } + } + return reducer; +} } // namespace namespace internal { @@ -2842,6 +2890,196 @@ StatusOr XlaBuilder::IsConstant(XlaOp operand) const { return is_constant; } +StatusOr XlaBuilder::BuildDynamicInferenceGraph(XlaOp root_op) { + TF_ASSIGN_OR_RETURN(const HloInstructionProto* root, + LookUpInstruction(root_op)); + + HloComputationProto entry; + SetProtoIdAndName(&entry, StrCat(name_, "_dynamic_inference"), kNameSeparator, + GetNextId()); + ProgramShapeProto* program_shape = entry.mutable_program_shape(); + *program_shape->mutable_result() = + ShapeUtil::ChangeElementType(Shape(root->shape()), PRED).ToProto(); + + std::set seen; + struct WorkItem { + explicit WorkItem(int64 handle, bool need_rewrite) + : handle(handle), need_rewrite(need_rewrite) {} + int64 handle; + // If need_rewrite is true, the instruction will be copied and rewrite into + // a pred instruction indicating if each value is dynamic. If need_rewrite + // is false, simply copy the instruction to the output graph. + // E.g., + // For select(P, A, B), we need to rewrite A and B into predicates, but + // don't need to rewrite P. + bool need_rewrite; + }; + std::queue worklist; + worklist.push(WorkItem(root->id(), true)); + entry.set_root_id(root->id()); + std::vector called_computatons; + // Rewritre instruction with id "from" into the new graph. + // Returns more work items that need to finish. + auto rewrite_instruction = + [&](int64 from, bool need_rewrite) -> StatusOr> { + // Rewrite the instruction with following rules: + // - Unary ops: Convert into bitcast (identity) with type Pred. + // - Binary ops: Convert into binary or. + // - Select: Convert into binary or with its two data operands. + // - Concat / Tuple/ GTE / Bitcast: Copy. + // - Param: Convert to constant True. + // - GetDimensionSize: Convert to constant True if dimension is dynamic, + // contant False if dimension is static. + // - Reduce: Convert to reduce or. + // - Constant: Convert to constant False. + // - Other ops: Not supported. + // Create the instruction for the new handle. + TF_ASSIGN_OR_RETURN(const HloInstructionProto* instr_proto, + LookUpInstructionByHandle(from)); + + TF_ASSIGN_OR_RETURN(HloOpcode opcode, + StringToHloOpcode(instr_proto->opcode())); + std::vector operands_todo; + auto* new_instr = entry.add_instructions(); + *new_instr = *instr_proto; + for (auto operand_id : new_instr->operand_ids()) { + operands_todo.emplace_back(operand_id, need_rewrite); + } + + if (!need_rewrite) { + *new_instr->mutable_name() = + GetFullName(instr_proto->opcode(), kNameSeparator, instr_proto->id()); + return operands_todo; + } + *new_instr->mutable_shape() = ConvertShapeProtoToPred(instr_proto->shape()); + Shape new_shape(new_instr->shape()); + switch (opcode) { + case HloOpcode::kAbs: + case HloOpcode::kRoundNearestAfz: + case HloOpcode::kBitcast: + case HloOpcode::kCeil: + case HloOpcode::kCollectivePermuteDone: + case HloOpcode::kCos: + case HloOpcode::kClz: + case HloOpcode::kExp: + case HloOpcode::kExpm1: + case HloOpcode::kFloor: + case HloOpcode::kImag: + case HloOpcode::kIsFinite: + case HloOpcode::kLog: + case HloOpcode::kLog1p: + case HloOpcode::kNot: + case HloOpcode::kNegate: + case HloOpcode::kPopulationCount: + case HloOpcode::kReal: + case HloOpcode::kRsqrt: + case HloOpcode::kLogistic: + case HloOpcode::kSign: + case HloOpcode::kSin: + case HloOpcode::kConvert: + case HloOpcode::kSqrt: + case HloOpcode::kCbrt: + case HloOpcode::kTanh: + CHECK_EQ(instr_proto->operand_ids_size(), 1); + *new_instr->mutable_opcode() = HloOpcodeString(HloOpcode::kBitcast); + break; + case HloOpcode::kAdd: + case HloOpcode::kAtan2: + case HloOpcode::kDivide: + case HloOpcode::kComplex: + case HloOpcode::kMaximum: + case HloOpcode::kMinimum: + case HloOpcode::kMultiply: + case HloOpcode::kPower: + case HloOpcode::kRemainder: + case HloOpcode::kSubtract: + case HloOpcode::kCompare: + case HloOpcode::kAnd: + case HloOpcode::kOr: + case HloOpcode::kXor: + case HloOpcode::kShiftLeft: + case HloOpcode::kShiftRightArithmetic: + case HloOpcode::kShiftRightLogical: + CHECK_EQ(instr_proto->operand_ids_size(), 2); + *new_instr->mutable_opcode() = HloOpcodeString(HloOpcode::kOr); + break; + case HloOpcode::kSelect: + operands_todo[0].need_rewrite = false; + break; + case HloOpcode::kGather: + operands_todo[1].need_rewrite = false; + break; + case HloOpcode::kReduce: { + int64 reducer_id = new_instr->called_computation_ids(0); + called_computatons.push_back( + CreateReduceOr(reducer_id, &embedded_[reducer_id])); + break; + } + case HloOpcode::kTuple: + case HloOpcode::kTranspose: + case HloOpcode::kGetTupleElement: + case HloOpcode::kSlice: + case HloOpcode::kBroadcast: + case HloOpcode::kConcatenate: + case HloOpcode::kReshape: + break; + case HloOpcode::kGetDimensionSize: { + int64 dimension = instr_proto->dimensions(0); + int64 operand_handle = instr_proto->operand_ids(0); + TF_ASSIGN_OR_RETURN(const HloInstructionProto* operand_proto, + LookUpInstructionByHandle(operand_handle)); + + *new_instr = CreateConstantInstruction( + from, new_shape, + operand_proto->shape().is_dynamic_dimension(dimension)); + operands_todo.clear(); + break; + } + case HloOpcode::kConstant: + *new_instr = CreateConstantInstruction(from, new_shape, false); + break; + case HloOpcode::kParameter: + *new_instr = CreateConstantInstruction(from, new_shape, true); + break; + default: + return InvalidArgument("Dynamic inferencing %s is not supported", + instr_proto->DebugString()); + } + *new_instr->mutable_name() = + GetFullName(instr_proto->opcode(), kNameSeparator, instr_proto->id()); + return operands_todo; + }; + + while (!worklist.empty()) { + WorkItem item = worklist.front(); + worklist.pop(); + if (!seen.insert(item.handle).second) { + continue; + } + TF_ASSIGN_OR_RETURN(auto todos, + rewrite_instruction(item.handle, item.need_rewrite)); + for (WorkItem& todo : todos) { + worklist.push(todo); + } + } + absl::c_sort(*entry.mutable_instructions(), + [](const HloInstructionProto& p1, + const HloInstructionProto& p2) { return p1.id() < p2.id(); }); + XlaComputation computation(entry.id()); + HloModuleProto* module = computation.mutable_proto(); + module->set_name(entry.name()); + module->set_id(entry.id()); + module->set_entry_computation_name(entry.name()); + module->set_entry_computation_id(entry.id()); + *module->mutable_host_program_shape() = *program_shape; + for (auto& called_comp : called_computatons) { + *module->add_computations() = called_comp; + } + *module->add_computations() = std::move(entry); + XLA_VLOG_LINES(3, module->DebugString()); + return std::move(computation); +} + StatusOr XlaBuilder::BuildConstantSubGraph( XlaOp root_op, bool dynamic_dimension_is_minus_one) { TF_ASSIGN_OR_RETURN(bool is_constant, IsConstant(root_op)); diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h index aa5074d28d9..6753b6dd919 100644 --- a/tensorflow/compiler/xla/client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_builder.h @@ -278,6 +278,31 @@ class XlaBuilder { StatusOr BuildConstantSubGraph( XlaOp root_op, bool dynamic_dimension_is_uint_max = false); + // Similar to BuildConstantSubGraph, but with root element type changed to + // boolean. A true value in the root indicates that the value is dynamic while + // false value indicates that the value is a constant. This will copy the + // needed ops/computations to the subgraph. + // + // E.g., + // Compuptation { + // a = 3 + // b = param(0) + // ROOT Tuple(a + b, a + 1, b + 1) + // } + // Calling BuildDynamicInferenceGraph on root will produce the following + // graph: + // + // Compuptation { + // a = False + // b = True + // ROOT Tuple(a | b, a, b) + // } + // + // The result, which is (True, False, True) after evaluation, can be + // interpreted as "First element is dynamic; Second element is static; Third + // element is dynamic". + StatusOr BuildDynamicInferenceGraph(XlaOp root_op); + // Returns the first error that was encountered while building the // computation. When an error is encountered, by default we return a vacuous // XlaOp and inform the user of the error that occurred while diff --git a/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc b/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc index 2f2456863e9..36429d3d755 100644 --- a/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc +++ b/tensorflow/compiler/xla/service/dynamic_dimension_inference.cc @@ -805,7 +805,8 @@ Status DynamicDimensionInferenceVisitor::HandleReshape(HloInstruction* hlo) { } if (input_dim_size > output_dim_size) { - TF_RET_CHECK(input_dim_size % output_dim_size == 0); + TF_RET_CHECK(input_dim_size % output_dim_size == 0) + << reshape->ToString(); const int64 divisor = input_dim_size / output_dim_size; HloInstruction* divisor_hlo = hlo->parent()->AddInstruction(HloInstruction::CreateConstant( diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index 02fcaafd19d..0833919b124 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -783,9 +783,18 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( /* static */ Shape ShapeUtil::ChangeElementType(const Shape& original, PrimitiveType type) { - Shape new_shape = original; - new_shape.set_element_type(type); - return new_shape; + if (original.IsTuple()) { + std::vector new_operands; + new_operands.reserve(original.tuple_shapes_size()); + for (const Shape& operand : original.tuple_shapes()) { + new_operands.push_back(ChangeElementType(operand, type)); + } + return MakeTupleShape(new_operands); + } else { + Shape new_shape = original; + new_shape.set_element_type(type); + return new_shape; + } } /* static */ bool ShapeUtil::IndexIsValid(const Shape& shape, diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 927f9d14883..17444c042e7 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -2088,6 +2088,31 @@ xla_test( ], ) +xla_test( + name = "dynamism_inference_test", + srcs = ["dynamism_inference_test.cc"], + deps = [ + ":test_macros_header", + "//tensorflow/compiler/xla:literal", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla:xla_data_proto_cc", + "//tensorflow/compiler/xla/client:client_library", + "//tensorflow/compiler/xla/client:global_data", + "//tensorflow/compiler/xla/client:xla_builder", + "//tensorflow/compiler/xla/client:xla_computation", + "//tensorflow/compiler/xla/client/lib:prng", + "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/compiler/xla/tests:test_utils", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "@com_google_absl//absl/strings", + ], +) + xla_test( name = "compute_constant_test", srcs = ["compute_constant_test.cc"], diff --git a/tensorflow/compiler/xla/tests/dynamism_inference_test.cc b/tensorflow/compiler/xla/tests/dynamism_inference_test.cc new file mode 100644 index 00000000000..ba4092def16 --- /dev/null +++ b/tensorflow/compiler/xla/tests/dynamism_inference_test.cc @@ -0,0 +1,215 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include + +#include "absl/strings/match.h" +#include "tensorflow/compiler/xla/client/client_library.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/lib/prng.h" +#include "tensorflow/compiler/xla/client/xla_builder.h" +#include "tensorflow/compiler/xla/client/xla_computation.h" +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/literal.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/tests/test_utils.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +// An enumerator for the client types that we want to iterate over in +// the various tests. +enum class ClientType { kLocal, kCompileOnly }; +ClientType client_types[] = {ClientType::kLocal, ClientType::kCompileOnly}; + +class DynamismInferenceTest : public ::testing::Test { + public: + explicit DynamismInferenceTest(se::Platform* platform = nullptr) + : platform_(platform) {} + + string TestName() const { + return ::testing::UnitTest::GetInstance()->current_test_info()->name(); + } + + Client* ClientOrDie(se::Platform* platform, ClientType client_type) { + if (client_type == ClientType::kLocal) { + StatusOr result = + ClientLibrary::GetOrCreateLocalClient(platform); + TF_CHECK_OK(result.status()) + << "could not create LocalClient for testing"; + return result.ValueOrDie(); + } else if (client_type == ClientType::kCompileOnly) { + StatusOr result = + ClientLibrary::GetOrCreateCompileOnlyClient(platform); + TF_CHECK_OK(result.status()) + << "could not create CompileOnlyClient for testing"; + return result.ValueOrDie(); + } + LOG(FATAL) << "invalid client_type value"; + } + + StatusOr ComputeDynamismLiteral(Client* client, XlaOp operand, + XlaBuilder* builder, + Layout* output_layout = nullptr) { + TF_ASSIGN_OR_RETURN(auto subgraph, + builder->BuildDynamicInferenceGraph(operand)); + TF_ASSIGN_OR_RETURN(auto computed, + client->ComputeConstant(subgraph, output_layout)); + return std::move(computed); + } + + StatusOr ComputeDynamismScalar(Client* client, XlaOp operand, + XlaBuilder* builder, + ShapeIndex index = {}) { + TF_ASSIGN_OR_RETURN(auto literal, ComputeDynamismLiteral(client, operand, + builder, nullptr)); + return literal.Get({}, index); + } + + se::Platform* platform_; +}; + +TEST_F(DynamismInferenceTest, ScalarInt32Literal) { + for (ClientType client_type : client_types) { + Client* client = ClientOrDie(platform_, client_type); + XlaBuilder b(TestName()); + auto computation = ConstantR0(&b, 42); + + auto value = ComputeDynamismScalar(client, computation, &b); + ASSERT_TRUE(value.ok()) << value.status(); + // A constant is not dynamic. + EXPECT_EQ(value.ValueOrDie(), false); + } +} + +TEST_F(DynamismInferenceTest, TupleGteKeepsDynamism) { + for (ClientType client_type : client_types) { + Client* client = ClientOrDie(platform_, client_type); + XlaBuilder b(TestName()); + auto c = ConstantR0(&b, 42); + auto p = Parameter(&b, 0, ShapeUtil::MakeScalarShape(S32), "0"); + + auto tuple = Tuple(&b, {c, p}); + auto gte0 = GetTupleElement(tuple, 0); + auto gte1 = GetTupleElement(tuple, 1); + auto tuple_2 = Tuple(&b, {gte0, gte1}); + EXPECT_EQ(ComputeDynamismScalar(client, tuple_2, &b, {0}).ValueOrDie(), + false); + EXPECT_EQ(ComputeDynamismScalar(client, tuple_2, &b, {1}).ValueOrDie(), + true); + } +} + +TEST_F(DynamismInferenceTest, ConcatSliceReshapeKeepsDynamism) { + for (ClientType client_type : client_types) { + Client* client = ClientOrDie(platform_, client_type); + XlaBuilder b(TestName()); + auto c = ConstantR0(&b, 42); + auto p = Parameter(&b, 0, ShapeUtil::MakeScalarShape(S32), "0"); + + auto concat = ConcatScalars(&b, {c, p}); + auto slice0 = SliceInDim(concat, 0, 1, 1, 0); + auto reshape0 = Reshape(slice0, {}); + auto slice1 = SliceInDim(concat, 1, 2, 1, 0); + auto reshape1 = Reshape(slice1, {}); + auto tuple_2 = Tuple(&b, {reshape0, reshape1}); + EXPECT_EQ(ComputeDynamismScalar(client, tuple_2, &b, {0}).ValueOrDie(), + false); + EXPECT_EQ(ComputeDynamismScalar(client, tuple_2, &b, {1}).ValueOrDie(), + true); + } +} + +TEST_F(DynamismInferenceTest, ParameterIsDynamic) { + for (ClientType client_type : client_types) { + Client* client = ClientOrDie(platform_, client_type); + XlaBuilder b(TestName()); + auto computation = Parameter(&b, 0, ShapeUtil::MakeScalarShape(S32), "0"); + + auto value = ComputeDynamismScalar(client, computation, &b); + ASSERT_TRUE(value.ok()) << value.status(); + // A parameter is considered dynamic. + EXPECT_EQ(value.ValueOrDie(), true); + } +} + +TEST_F(DynamismInferenceTest, UnaryOpKeepsDynamism) { + for (ClientType client_type : client_types) { + Client* client = ClientOrDie(platform_, client_type); + XlaBuilder b(TestName()); + auto c = ConstantR0(&b, 42); + auto p = Parameter(&b, 0, ShapeUtil::MakeScalarShape(S32), "0"); + + auto neg0 = Neg(c); + auto neg1 = Neg(p); + auto tuple_2 = Tuple(&b, {neg0, neg1}); + EXPECT_EQ(ComputeDynamismScalar(client, tuple_2, &b, {0}).ValueOrDie(), + false); + EXPECT_EQ(ComputeDynamismScalar(client, tuple_2, &b, {1}).ValueOrDie(), + true); + } +} + +TEST_F(DynamismInferenceTest, BinaryOpsOrsDynamism) { + for (ClientType client_type : client_types) { + Client* client = ClientOrDie(platform_, client_type); + XlaBuilder b(TestName()); + auto c = ConstantR0(&b, 42); + auto p = Parameter(&b, 0, ShapeUtil::MakeScalarShape(S32), "0"); + + // Static value + static value = static + auto add1 = Add(c, c); + // Dynamic value + dynamic value = dynamic + auto add2 = Add(p, c); + auto tuple_2 = Tuple(&b, {add1, add2}); + EXPECT_EQ(ComputeDynamismScalar(client, tuple_2, &b, {0}).ValueOrDie(), + false); + EXPECT_EQ(ComputeDynamismScalar(client, tuple_2, &b, {1}).ValueOrDie(), + true); + } +} + +TEST_F(DynamismInferenceTest, GetDimensionSize) { + for (ClientType client_type : client_types) { + Client* client = ClientOrDie(platform_, client_type); + XlaBuilder b(TestName()); + // param = Param([<=2, 3]) + // get_dimension_size(param, 0) is dynamic + // get_dimension_size(param, 1) is static + auto p = + Parameter(&b, 0, ShapeUtil::MakeShape(S32, {2, 3}, {true, false}), "0"); + + auto gds0 = GetDimensionSize(p, 0); + auto gds1 = GetDimensionSize(p, 1); + auto tuple_2 = Tuple(&b, {gds0, gds1}); + EXPECT_EQ(ComputeDynamismScalar(client, tuple_2, &b, {0}).ValueOrDie(), + true); + EXPECT_EQ(ComputeDynamismScalar(client, tuple_2, &b, {1}).ValueOrDie(), + false); + } +} + +} // namespace +} // namespace xla From 0df7db5b43f19a3ecfb10b740026f215d57d07b7 Mon Sep 17 00:00:00 2001 From: Ken Franko Date: Fri, 7 Aug 2020 16:37:47 -0700 Subject: [PATCH 2378/2522] Only mark ops in the `tf` dialect for outside compilation. There are ops from other dialects like tf_device that will be rewritten in other steps and will never to compiled to HLO. PiperOrigin-RevId: 325532336 Change-Id: Ic763dc7bd2de667935f597f24bd3e043ca94d25b --- .../mark_ops_for_outside_compilation.cc | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc index c0889affb30..cd34525f2af 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc @@ -72,14 +72,17 @@ bool MatchesPattern(Operation& op, return (supported_ops.contains(op.getName())); } -// Checks if the op is supported inside of a device cluster. +// Checks if the op is supported inside of a device cluster. Ops not +// in `tf_dialect` are considered supported. bool IsSupportedOp(Operation& op, - const llvm::DenseSet& supported_ops) { - // TODO(b/161726307): Check the allowed ops list in LegalizeTfWithTf2XlaPass - // as well. - return !HasStringOperand(op) && !HasStringResult(op) && - (MatchesPattern(op, supported_ops) || - mhlo::IsOpAllowedTf2XlaFallback(&op)); + const llvm::DenseSet& supported_ops, + const Dialect* tf_dialect) { + if (op.getDialect() != tf_dialect) + return true; + else + return !HasStringOperand(op) && !HasStringResult(op) && + (MatchesPattern(op, supported_ops) || + mhlo::IsOpAllowedTf2XlaFallback(&op)); } // Checks all regions of `op` for captured string operands. @@ -96,10 +99,12 @@ bool HasCapturedStringOperand(Operation* op) { return string_operand; } +// Marks uncompilable ops that are in `tf_dialect` for outside compilation. LogicalResult MarkUncompilableOps( - Block* block, llvm::DenseSet& supported_ops) { + const Dialect* tf_dialect, Block* block, + llvm::DenseSet& supported_ops) { block->walk([&](Operation* op) { - if (!IsSupportedOp(*op, supported_ops)) { + if (!IsSupportedOp(*op, supported_ops, tf_dialect)) { op->setAttr(kXlaOutsideCompilationAttr, StringAttr::get("auto", op->getContext())); } @@ -115,6 +120,11 @@ LogicalResult MarkUncompilableOps( void MarkOpsForOutsideCompilation::runOnOperation() { auto module = getOperation(); + const Dialect* tf_dialect = getContext().getRegisteredDialect("tf"); + if (!tf_dialect) { + getOperation().emitError() << "'tf' dialect is not registered"; + return signalPassFailure(); + } OwningRewritePatternList patterns; mhlo::PopulateLegalizeTfPatterns(module.getContext(), &patterns); @@ -129,7 +139,8 @@ void MarkOpsForOutsideCompilation::runOnOperation() { AddSupportedControlFlowOps(module.getContext(), &supported_ops); auto result = module.walk([&](tf_device::ClusterOp cluster) { - if (failed(MarkUncompilableOps(&cluster.GetBody(), supported_ops))) + if (failed( + MarkUncompilableOps(tf_dialect, &cluster.GetBody(), supported_ops))) return WalkResult::interrupt(); return WalkResult::advance(); From 48c12a5abaf802d6a194df46f9ff95cd1557540d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 7 Aug 2020 16:39:26 -0700 Subject: [PATCH 2379/2522] Calculate TensorCore utilization PiperOrigin-RevId: 325532659 Change-Id: I0450e1ec72e22d9d9c31a26d2e16e0950c13276a --- tensorflow/core/profiler/convert/BUILD | 2 + .../profiler/convert/op_stats_to_tf_stats.cc | 35 +++++--- .../convert/op_stats_to_tf_stats_test.cc | 79 ++++++++++++++++--- .../core/profiler/protobuf/tf_stats.proto | 3 + 4 files changed, 98 insertions(+), 21 deletions(-) diff --git a/tensorflow/core/profiler/convert/BUILD b/tensorflow/core/profiler/convert/BUILD index 3261d918e04..66e027ed8ac 100644 --- a/tensorflow/core/profiler/convert/BUILD +++ b/tensorflow/core/profiler/convert/BUILD @@ -153,6 +153,8 @@ cc_library( "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc", "//tensorflow/core/profiler/protobuf:op_stats_proto_cc", "//tensorflow/core/profiler/protobuf:tf_stats_proto_cc", + "//tensorflow/core/profiler/utils:kernel_stats_utils", + "//tensorflow/core/profiler/utils:math_utils", "//tensorflow/core/profiler/utils:op_metrics_db_utils", "//tensorflow/core/profiler/utils:time_utils", ], diff --git a/tensorflow/core/profiler/convert/op_stats_to_tf_stats.cc b/tensorflow/core/profiler/convert/op_stats_to_tf_stats.cc index e23813a5b5d..67024809e61 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_tf_stats.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_tf_stats.cc @@ -20,6 +20,8 @@ limitations under the License. #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" #include "tensorflow/core/profiler/protobuf/op_stats.pb.h" #include "tensorflow/core/profiler/protobuf/tf_stats.pb.h" +#include "tensorflow/core/profiler/utils/kernel_stats_utils.h" +#include "tensorflow/core/profiler/utils/math_utils.h" #include "tensorflow/core/profiler/utils/op_metrics_db_utils.h" #include "tensorflow/core/profiler/utils/time_utils.h" @@ -40,9 +42,11 @@ TfStatsRecord ConvertOpMetricsToTfStatsRecord( return record; } -TfStatsTable GenerateTfStatsTable(const OpMetricsDb& host_tf_metrics_db, - const OpMetricsDb& device_tf_metrics_db, - double ridge_point, bool exclude_idle) { +TfStatsTable GenerateTfStatsTable( + const OpMetricsDb& host_tf_metrics_db, + const OpMetricsDb& device_tf_metrics_db, + const KernelStatsByOpName& kernel_stats_by_op_name, double ridge_point, + bool exclude_idle) { TfStatsTable tf_stats_table; TfStatsRecord sentinel; sentinel.set_rank(0); @@ -61,6 +65,15 @@ TfStatsTable GenerateTfStatsTable(const OpMetricsDb& host_tf_metrics_db, TfStatsRecord* record = tf_stats_table.add_tf_stats_record(); *record = ConvertOpMetricsToTfStatsRecord( /*on_device=*/true, *metrics, ridge_point); + // Compute TensorCore utilization only on device side. + auto iter = kernel_stats_by_op_name.find(record->op_name()); + if (iter != kernel_stats_by_op_name.end()) { + record->set_gpu_tensorcore_utilization( + SafeDivide(iter->second.tensor_core_duration_ns, + iter->second.total_duration_ns)); + } else { + record->set_gpu_tensorcore_utilization(0.0); + } SetRankAndDeviceTimeFractions(total_device_time_us, *prev_record, record); prev_record = record; } @@ -77,6 +90,8 @@ TfStatsTable GenerateTfStatsTable(const OpMetricsDb& host_tf_metrics_db, TfStatsRecord* record = tf_stats_table.add_tf_stats_record(); *record = ConvertOpMetricsToTfStatsRecord( /*on_device=*/false, *metrics, ridge_point); + // Host side TensorCore utilization is always 0.0 + record->set_gpu_tensorcore_utilization(0.0); SetRankAndHostTimeFractions(total_host_time_us, *prev_record, record); prev_record = record; } @@ -90,13 +105,15 @@ TfStatsDatabase ConvertOpStatsToTfStats(const OpStats& op_stats) { OpMetricsDb device_tf_metrics_db = CreateTfMetricsDbFromDeviceOpMetricsDb(op_stats.device_op_metrics_db()); double ridge_point = op_stats.perf_env().ridge_point(); + KernelStatsByOpName kernel_stats_by_op_name = + GroupKernelReportsByOpName(op_stats.kernel_stats_db()); TfStatsDatabase tf_stats_db; - *tf_stats_db.mutable_with_idle() = - GenerateTfStatsTable(host_tf_metrics_db, device_tf_metrics_db, - ridge_point, /*exclude_idle=*/false); - *tf_stats_db.mutable_without_idle() = - GenerateTfStatsTable(host_tf_metrics_db, device_tf_metrics_db, - ridge_point, /*exclude_idle=*/true); + *tf_stats_db.mutable_with_idle() = GenerateTfStatsTable( + host_tf_metrics_db, device_tf_metrics_db, kernel_stats_by_op_name, + ridge_point, /*exclude_idle=*/false); + *tf_stats_db.mutable_without_idle() = GenerateTfStatsTable( + host_tf_metrics_db, device_tf_metrics_db, kernel_stats_by_op_name, + ridge_point, /*exclude_idle=*/true); return tf_stats_db; } diff --git a/tensorflow/core/profiler/convert/op_stats_to_tf_stats_test.cc b/tensorflow/core/profiler/convert/op_stats_to_tf_stats_test.cc index 4abd210705b..5cf2847ea0d 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_tf_stats_test.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_tf_stats_test.cc @@ -32,32 +32,69 @@ namespace tensorflow { namespace profiler { namespace { -void AddTensorFlowOpEvent(absl::string_view tf_op_fullname, - int64 start_timestamp_ns, int64 duration_ns, - bool on_device, absl::string_view kernel_name, - XPlaneBuilder* plane, XLineBuilder* line) { +XEventBuilder AddTensorFlowOpEvent(absl::string_view tf_op_fullname, + int64 start_timestamp_ns, int64 duration_ns, + bool on_device, + absl::string_view kernel_name, + XPlaneBuilder* plane, XLineBuilder* line) { absl::string_view name = on_device ? kernel_name : tf_op_fullname; XEventBuilder event = line->AddEvent(*plane->GetOrCreateEventMetadata(name)); event.SetTimestampNs(start_timestamp_ns); event.SetDurationNs(duration_ns); - if (!on_device) return; + if (!on_device) return event; event.ParseAndAddStatValue(*plane->GetOrCreateStatMetadata("level 0"), tf_op_fullname); + return event; +} + +void AddTensorFlowOpEventWithKernelDetails(absl::string_view tf_op_fullname, + int64 start_timestamp_ns, + int64 duration_ns, bool on_device, + absl::string_view kernel_name, + absl::string_view kernel_details, + XPlaneBuilder* plane, + XLineBuilder* line) { + XEventBuilder event = + AddTensorFlowOpEvent(tf_op_fullname, start_timestamp_ns, duration_ns, + on_device, kernel_name, plane, line); + if (!on_device) return; + event.ParseAndAddStatValue(*plane->GetOrCreateStatMetadata("kernel_details"), + kernel_details); } TEST(OpStatsToTfStats, GpuTfStats) { - // TfOp1 has kernel1 and kernel2; TfOp2 has kernel3. + // TfOp1 has kernel1 and kernel2; TfOp2 has kernel3; + // TfOp3 has kernel4 and kernel5 and is TensorCore eligible. static constexpr char kTfOp1[] = "TfOp1"; static constexpr char kTfOp2[] = "TfOp2"; + static constexpr char kTfOp3[] = "Conv2D"; static constexpr char kKernel1[] = "kernel1"; static constexpr char kKernel2[] = "kernel2"; static constexpr char kKernel3[] = "kernel3"; + // Kernel4 is a kernel using TensorCore + static constexpr char kKernel4[] = "volta_fp16_s884gemm"; + static constexpr char kKernel5[] = "kernel5"; constexpr int64 kKernel1StartNs = 100000; constexpr int64 kKernel1DurationNs = 8000; constexpr int64 kKernel2StartNs = 110000; constexpr int64 kKernel2DurationNs = 10000; constexpr int64 kKernel3StartNs = 120000; constexpr int64 kKernel3DurationNs = 10000; + constexpr int64 kKernel4StartNs = 130000; + constexpr int64 kKernel4DurationNs = 10000; + constexpr int64 kKernel5StartNs = 150000; + constexpr int64 kKernel5DurationNs = 10000; + + // Mock kernel details for both kernel4 and kernel5. + const std::string kKernelDetails = R"MULTI(registers_per_thread:32 +static_shared_memory_usage:0 +dynamic_shared_memory_usage:16384 +grid_x:2 +grid_y:1 +grid_z:1 +block_x:32 +block_y:1 +block_z:1)MULTI"; XSpace space; XPlaneBuilder device_plane( @@ -79,12 +116,19 @@ TEST(OpStatsToTfStats, GpuTfStats) { AddTensorFlowOpEvent(absl::StrCat(kTfOp2, ":", kTfOp2), kKernel3StartNs, kKernel3DurationNs, /*on_device=*/true, kKernel3, &device_plane, &stream2); + AddTensorFlowOpEventWithKernelDetails( + absl::StrCat(kTfOp3, ":", kTfOp3), kKernel4StartNs, kKernel4DurationNs, + /*on_device=*/true, kKernel4, kKernelDetails, &device_plane, &stream2); + AddTensorFlowOpEventWithKernelDetails( + absl::StrCat(kTfOp3, ":", kTfOp3), kKernel5StartNs, kKernel5DurationNs, + /*on_device=*/true, kKernel5, kKernelDetails, &device_plane, &stream2); - const OpStats op_stats = ConvertXSpaceToOpStats(space, {OP_METRICS_DB}); + const OpStats op_stats = + ConvertXSpaceToOpStats(space, {OP_METRICS_DB, KERNEL_STATS_DB}); const TfStatsDatabase tf_stats = ConvertOpStatsToTfStats(op_stats); - // TfOp1, TfOp2, Idle - EXPECT_EQ(3, tf_stats.with_idle().tf_stats_record_size()); + // TfOp1, TfOp3, TfOp2, Idle + EXPECT_EQ(4, tf_stats.with_idle().tf_stats_record_size()); const TfStatsRecord& record_0 = tf_stats.with_idle().tf_stats_record(0); EXPECT_EQ(kTfOp1, record_0.op_name()); @@ -95,11 +139,22 @@ TEST(OpStatsToTfStats, GpuTfStats) { record_0.total_self_time_in_us()); const TfStatsRecord& record_1 = tf_stats.with_idle().tf_stats_record(1); - EXPECT_EQ(kTfOp2, record_1.op_name()); - EXPECT_EQ(kTfOp2, record_1.op_type()); + EXPECT_EQ(kTfOp3, record_1.op_name()); + EXPECT_EQ(kTfOp3, record_1.op_type()); EXPECT_EQ(1, record_1.occurrences()); + EXPECT_EQ( + NanosToMicros(kKernel4DurationNs) + NanosToMicros(kKernel5DurationNs), + record_1.total_self_time_in_us()); + // GPU TensorCore utilization is 0.5 because kernel4 is using TensorCore and + // kernel5 is not using TensorCore, and they have the same duration. + EXPECT_DOUBLE_EQ(0.5, record_1.gpu_tensorcore_utilization()); + + const TfStatsRecord& record_2 = tf_stats.with_idle().tf_stats_record(2); + EXPECT_EQ(kTfOp2, record_2.op_name()); + EXPECT_EQ(kTfOp2, record_2.op_type()); + EXPECT_EQ(1, record_2.occurrences()); EXPECT_EQ(NanosToMicros(kKernel3DurationNs), - record_1.total_self_time_in_us()); + record_2.total_self_time_in_us()); } } // namespace diff --git a/tensorflow/core/profiler/protobuf/tf_stats.proto b/tensorflow/core/profiler/protobuf/tf_stats.proto index 2dae6230f50..099d8478831 100644 --- a/tensorflow/core/profiler/protobuf/tf_stats.proto +++ b/tensorflow/core/profiler/protobuf/tf_stats.proto @@ -71,4 +71,7 @@ message TfStatsRecord { string bound_by = 17; // Whether this TF-op is eagerly executed. bool is_eager = 18; + // Fraction of kernel time that utilizes GPU TensorCore. + // It is 0.0 if this op does not run on a GPU device. + double gpu_tensorcore_utilization = 19; } From 6b65afa4209a8743d819693ab6c50b5df4db8af9 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Fri, 7 Aug 2020 16:46:02 -0700 Subject: [PATCH 2380/2522] [TF2XLA] Remove XLA:Interpreter device PiperOrigin-RevId: 325533768 Change-Id: I6ebc60da947a5ba1e9b0782c7e18c996234875ce --- tensorflow/compiler/jit/BUILD | 16 --- .../compiler/jit/xla_interpreter_device.cc | 106 ------------------ 2 files changed, 122 deletions(-) delete mode 100644 tensorflow/compiler/jit/xla_interpreter_device.cc diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index 01b02ad3580..63f985935fb 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -128,22 +128,6 @@ cc_library( alwayslink = 1, ) -cc_library( - name = "xla_interpreter_device", - srcs = ["xla_interpreter_device.cc"], - visibility = [":friends"], - deps = [ - ":jit_compilation_passes", - ":xla_device", - "//tensorflow/compiler/jit/kernels:xla_ops", - "//tensorflow/compiler/tf2xla:xla_compiler", - "//tensorflow/compiler/tf2xla/kernels:xla_ops", - "//tensorflow/compiler/xla/service:interpreter_plugin", # buildcleaner: keep - "@com_google_absl//absl/memory", - ], - alwayslink = 1, -) - cc_library( name = "xla_tensor", srcs = ["xla_tensor.cc"], diff --git a/tensorflow/compiler/jit/xla_interpreter_device.cc b/tensorflow/compiler/jit/xla_interpreter_device.cc deleted file mode 100644 index f720183e196..00000000000 --- a/tensorflow/compiler/jit/xla_interpreter_device.cc +++ /dev/null @@ -1,106 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// Registers the XLA_INTERPRETER device which exposes the XLA Interpreter. - -#include "absl/memory/memory.h" -#include "tensorflow/compiler/jit/kernels/xla_ops.h" -#include "tensorflow/compiler/jit/xla_device.h" -#include "tensorflow/compiler/jit/xla_device_ops.h" -#include "tensorflow/compiler/tf2xla/xla_op_registry.h" - -namespace tensorflow { - -const char* const DEVICE_XLA_INTERPRETER = "XLA_INTERPRETER"; -const char* const DEVICE_INTERPRETER_XLA_JIT = "XLA_INTERPRETER_JIT"; - -constexpr std::array kExecAllTypes = { - {DT_INT8, DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, - DT_COMPLEX128, DT_BOOL, DT_BFLOAT16}}; - -class XlaInterpreterDeviceFactory : public DeviceFactory { - public: - Status ListPhysicalDevices(std::vector* devices) override; - Status CreateDevices(const SessionOptions& options, const string& name_prefix, - std::vector>* devices) override; -}; - -Status XlaInterpreterDeviceFactory::ListPhysicalDevices( - std::vector* devices) { - devices->push_back( - absl::StrCat("/physical_device:", DEVICE_XLA_INTERPRETER, ":0")); - - return Status::OK(); -} - -Status XlaInterpreterDeviceFactory::CreateDevices( - const SessionOptions& session_options, const string& name_prefix, - std::vector>* devices) { - static XlaDeviceOpRegistrations* registrations = RegisterXlaDeviceKernels( - DEVICE_XLA_INTERPRETER, DEVICE_INTERPRETER_XLA_JIT); - (void)registrations; - - XlaOpRegistry::DeviceRegistration registration; - registration.compilation_device_name = DEVICE_INTERPRETER_XLA_JIT; - registration.autoclustering_policy = - XlaOpRegistry::AutoclusteringPolicy::kAlways; - registration.cluster_resource_variable_ops_unsafely = true; - registration.cluster_stack_ops = false; - registration.cluster_tensor_array_ops = true; - registration.cluster_stateful_rng_ops = true; - registration.cluster_control_trigger = true; - registration.elide_assert_and_checknumerics = true; - registration.cluster_variant_ops = true; - registration.cluster_slow_ops = true; - registration.cluster_inaccurate_ops = true; - XlaOpRegistry::RegisterCompilationDevice(DEVICE_XLA_INTERPRETER, - registration); - - TF_ASSIGN_OR_RETURN( - auto platform, se::MultiPlatformManager::PlatformWithName("Interpreter")); - - XlaDevice::Options options; - options.platform = platform; - options.device_name_prefix = name_prefix; - options.device_name = DEVICE_XLA_INTERPRETER; - options.device_ordinal = 0; - options.compilation_device_name = DEVICE_INTERPRETER_XLA_JIT; - options.use_multiple_streams = false; - devices->push_back(absl::make_unique(session_options, options)); - - return Status::OK(); -} - -// Set priority to be below the default priority (50), so that Interpreter is -// not selected as a high priority device over other default devices. See -// constructor comments for Registrar in -// tensorflow/core/common_runtime/device_factory.h for a list of priority for -// devices. -REGISTER_LOCAL_DEVICE_FACTORY(DEVICE_XLA_INTERPRETER, - XlaInterpreterDeviceFactory, 40); - -// Kernel registrations -static bool OpFilter(KernelDef* kdef) { return true; } - -REGISTER_XLA_LAUNCH_KERNEL(DEVICE_XLA_INTERPRETER, XlaLocalLaunchOp, - kExecAllTypes); -REGISTER_XLA_COMPILE_KERNEL(DEVICE_XLA_INTERPRETER, XlaCompileOp, - kExecAllTypes); -REGISTER_XLA_RUN_KERNEL(DEVICE_XLA_INTERPRETER, XlaRunOp, kExecAllTypes); - -REGISTER_XLA_DEVICE_KERNELS(DEVICE_XLA_INTERPRETER, kExecAllTypes); -REGISTER_XLA_BACKEND(DEVICE_INTERPRETER_XLA_JIT, kExecAllTypes, OpFilter); - -} // namespace tensorflow From 00dbf072dbe69521ae2170a9fac4052187d187d6 Mon Sep 17 00:00:00 2001 From: Michael Gester Date: Fri, 7 Aug 2020 17:02:03 -0700 Subject: [PATCH 2381/2522] Add TF2XLA fallback patterns to LegalizeTF pass Now LegalizeTF pass can optionally apply TF2XLA fallback patterns. ConvertMLIRToXlaComputation now uses this instead of a separate TF2XLA fallback pass which has following advantages: - ops which need TF -> TF lowering before a fallback pattern can be applied can now be legalized, previously they couldn't - saves intermediate canonicalization and shape inference passes - more flexible control over order in which patterns should be applied PiperOrigin-RevId: 325536262 Change-Id: I6f5d42fe889e5d9404ac558b23ca2e4d6277226f --- .../tensorflow/utils/compile_mlir_util.cc | 16 ++--- tensorflow/compiler/mlir/xla/BUILD | 1 + .../legalize-tf-include-tf2xla-fallback.mlir | 50 +++++++++++++++ .../mlir/xla/transforms/legalize_tf.cc | 61 ++++++++++++++----- .../xla/transforms/legalize_tf_with_tf2xla.cc | 3 +- .../compiler/mlir/xla/transforms/passes.h | 17 +++++- 6 files changed, 119 insertions(+), 29 deletions(-) create mode 100644 tensorflow/compiler/mlir/xla/tests/legalize-tf-include-tf2xla-fallback.mlir diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc index f06fe1280f0..78d621cbe75 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc @@ -312,29 +312,25 @@ Status ConvertMLIRToXlaComputation( // inside PromoteResourcesToArgs. tf2xla.addPass(mlir::mhlo::createLegalizeTFControlFlowPass()); - tf2xla.addNestedPass(mlir::mhlo::createLegalizeTFPass(true)); + tf2xla.addNestedPass(mlir::mhlo::createLegalizeTFPass( + /*allow_partial_conversion=*/true, /*legalize_chlo=*/true, + /*tf2xla_fallback_device_type=*/device_type)); for (auto& target_pass : custom_legalization_passes) { tf2xla.addNestedPass(std::move(target_pass)); } tf2xla.addNestedPass(mlir::createCanonicalizerPass()); - tf2xla.addPass(mlir::TF::CreateTFShapeInferencePass()); - - // Leverage tf2xla kernels for ops that didn't get lowered in the previous - // legalization pass. - tf2xla.addPass(mlir::mhlo::createLegalizeTfWithTf2XlaPass(device_type)); - tf2xla.addNestedPass(mlir::createCanonicalizerPass()); - // Run shape inference pass to propagate shapes through tensor_cast operations // from static to dynamic shapes. This could be generated if the shape // inference was originally missing in a TF op but the corresponding HLO op // had static shape after lowering. tf2xla.addPass(mlir::TF::CreateTFShapeInferencePass()); - // Run LegalizeTFPass again because the previous legalization passes can // expose more graph pruning and canonicalization opportunities that are // necessary for the second LegalizeTFPass(allow_partial_conversion=false) // invocation. - tf2xla.addNestedPass(mlir::mhlo::createLegalizeTFPass(false)); + tf2xla.addNestedPass(mlir::mhlo::createLegalizeTFPass( + /*allow_partial_conversion=*/false, /*legalize_chlo=*/true, + /*tf2xla_fallback_device_type=*/device_type)); // In order to export to XLA, we must sink constants to control flow regions, // since XLA uses functional control flow. tf2xla.addNestedPass( diff --git a/tensorflow/compiler/mlir/xla/BUILD b/tensorflow/compiler/mlir/xla/BUILD index ada81634567..71e18af498b 100644 --- a/tensorflow/compiler/mlir/xla/BUILD +++ b/tensorflow/compiler/mlir/xla/BUILD @@ -56,6 +56,7 @@ cc_library( ], deps = [ ":type_to_shape", + ":xla_legalize_tf_with_tf2xla", "//tensorflow/compiler/mlir/hlo", "//tensorflow/compiler/mlir/hlo:chlo_legalize_to_hlo", "//tensorflow/compiler/mlir/hlo:convert_op_folder", diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf-include-tf2xla-fallback.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf-include-tf2xla-fallback.mlir new file mode 100644 index 00000000000..9f72820d15b --- /dev/null +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf-include-tf2xla-fallback.mlir @@ -0,0 +1,50 @@ +// RUN: tf-opt "-xla-legalize-tf=allow-partial-conversion use-tf2xla-fallback=false" -verify-diagnostics %s | FileCheck --check-prefix NO_FALLBACK %s +// RUN: tf-opt "-xla-legalize-tf=use-tf2xla-fallback=true device-type=XLA_CPU_JIT" -verify-diagnostics %s | FileCheck --check-prefix SUPPORTED_FALLBACK_DEVICE %s +// RUN: tf-opt "-xla-legalize-tf=allow-partial-conversion use-tf2xla-fallback=true" %s | FileCheck --check-prefix UNSPECIFIED_FALLBACK_DEVICE %s +// RUN: tf-opt "-xla-legalize-tf=allow-partial-conversion use-tf2xla-fallback=true device-type=INVALID_DEVICE_TYPE" %s | FileCheck --check-prefix UNSUPPORTED_FALLBACK_DEVICE %s + +// We run this test four times: +// 1) Legalize without using TF2XLA fallback (ops cannot be legalized). +// 2) Use fallback with a device that supports all ops (ops can be legalized). +// 3) Use fallback with unspecified device (ops cannot be legalized). +// 4) Use fallback with specified but unsupported device (ops cannot be legalized). +// +// Note: For 3) and 4) we do not use `-verify-diagnostics` because these cases +// produce remarks that don't occur for 1) and 2) and there is no way to check +// the remarks only for 3) and 4) (except using two files). + +module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, producer = 268 : i32}} { + +// CHECK-LABEL: non_max_suppression_v4 +func @non_max_suppression_v4(%arg0: tensor<3x4xf32>, %arg1: tensor<3xf32>, %arg2: tensor, %arg3: tensor) -> tensor<2xi32> { + %max_size = mhlo.constant dense<2> : tensor + // NO_FALLBACK: tf.NonMaxSuppressionV4 + // SUPPORTED_FALLBACK_DEVICE-NOT: tf.NonMaxSuppressionV4 + // UNSPECIFIED_FALLBACK_DEVICE: tf.NonMaxSuppressionV4 + // UNSUPPORTED_FALLBACK_DEVICE: tf.NonMaxSuppressionV4 + %0:2 = "tf.NonMaxSuppressionV4"(%arg0, %arg1, %max_size, %arg2, %arg3) {pad_to_max_output_size = true}: (tensor<3x4xf32>, tensor<3xf32>, tensor, tensor, tensor) -> (tensor<2xi32>, tensor) + return %0#0 : tensor<2xi32> +} + +// CHECK-LABEL: mirror_pad +func @mirror_pad(%arg0: tensor<2x3xcomplex>) -> tensor<4x7xcomplex> { + %0 = mhlo.constant dense<[[1, 1], [2, 2]]> : tensor<2x2xi32> + // NO_FALLBACK: tf.MirrorPad + // SUPPORTED_FALLBACK_DEVICE-NOT: tf.MirrorPad + // UNSPECIFIED_FALLBACK_DEVICE: tf.MirrorPad + // UNSUPPORTED_FALLBACK_DEVICE: tf.MirrorPad + %1 = "tf.MirrorPad"(%arg0, %0) {mode = "SYMMETRIC"} : (tensor<2x3xcomplex>, tensor<2x2xi32>) -> tensor<4x7xcomplex> + return %1 : tensor<4x7xcomplex> +} + +// CHECK-LABEL: atan2 +func @atan2(%arg0: tensor<4x1xf32>, %arg1: tensor<4x1x4xf32>) -> tensor<4x4x4xf32> { + // NO_FALLBACK: tf.Atan2 + // SUPPORTED_FALLBACK_DEVICE-NOT: tf.Atan2 + // UNSPECIFIED_FALLBACK_DEVICE: tf.Atan2 + // UNSUPPORTED_FALLBACK_DEVICE: tf.Atan2 + %0 = "tf.Atan2"(%arg0, %arg1) : (tensor<4x1xf32>, tensor<4x1x4xf32>) -> tensor<4x4x4xf32> + return %0: tensor<4x4x4xf32> +} + +} \ No newline at end of file diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc index 6d99e714fc2..aa6f25570a1 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc @@ -71,9 +71,14 @@ class LegalizeTF : public PassWrapper { public: LegalizeTF() = default; LegalizeTF(const LegalizeTF &) {} - explicit LegalizeTF(bool allow_partial_conversion, bool legalize_chlo) { + explicit LegalizeTF(bool allow_partial_conversion, bool legalize_chlo, + llvm::Optional tf2xla_fallback_device_type) { allow_partial_conversion_ = allow_partial_conversion; legalize_chlo_ = legalize_chlo; + use_tf2xla_fallback_ = tf2xla_fallback_device_type.hasValue(); + if (tf2xla_fallback_device_type.hasValue()) { + device_type_ = tf2xla_fallback_device_type.getValue().str(); + } } /// Performs the lowering to XLA dialect. @@ -89,6 +94,17 @@ class LegalizeTF : public PassWrapper { llvm::cl::desc( "Also legalizes intermediate chlo ops to hlo (default true)"), llvm::cl::init(true)}; + Option use_tf2xla_fallback_{ + *this, "use-tf2xla-fallback", + llvm::cl::desc( + "Also use TF2XLA fallback for legalization (default false)"), + llvm::cl::init(false)}; + Option device_type_{ + *this, "device-type", + llvm::cl::desc( + "The device type used by TF2XLA fallback. Must be specified if " + "use-tf2xla-fallback is true, otherwise not used."), + llvm::cl::init("INVALID_DEVICE_TYPE")}; }; /// Returns if the given TF data format string is the default format. @@ -5746,9 +5762,14 @@ void EmitLegalizationErrors(Operation *op, // Performs the lowering to XLA dialect. void LegalizeTF::runOnFunction() { - if (failed( - legalizeTF(getFunction(), allow_partial_conversion_, legalize_chlo_))) + llvm::Optional tf2xla_fallback_device_type = llvm::None; + if (use_tf2xla_fallback_) { + tf2xla_fallback_device_type = device_type_; + } + if (failed(legalizeTF(getFunction(), allow_partial_conversion_, + legalize_chlo_, tf2xla_fallback_device_type))) { signalPassFailure(); + } } static PassRegistration pass( @@ -5758,14 +5779,29 @@ static PassRegistration pass( #include "tensorflow/compiler/mlir/xla/transforms/generated_legalize_tf.inc" -LogicalResult legalizeTF(Operation *op, bool allow_partial_conversion, - bool legalize_chlo) { +LogicalResult legalizeTF( + Operation *op, bool allow_partial_conversion, bool legalize_chlo, + llvm::Optional tf2xla_fallback_device_type) { MLIRContext *context = op->getContext(); - - // Add lowering patterns to the list. OwningRewritePatternList patterns; + // Note that the `OperationConverter` orders patterns lexicographically by: + // 1) Ascending legalization depth (i.e., minimum number of patterns necessary + // to arrive at conversion target). + // 2) Descending pattern benefit. + // 3) Order of patterns in `OwningRewritePatternList`. + + // Add TF->HLO legalization patterns. PopulateLegalizeTfPatterns(context, &patterns); + // Add TF->HLO legalization patterns via TF2XLA fallback. + if (tf2xla_fallback_device_type.hasValue()) { + PopulateLegalizeTfWithTf2XlaPatterns(tf2xla_fallback_device_type.getValue(), + patterns); + } + + // Add TF->TF lowering patterns. + TF::PopulateLoweringTFPatterns(context, &patterns); + // Populate with CHLO->HLO lowerings to account for TF ops legalized to // CHLO first. if (legalize_chlo) { @@ -5805,11 +5841,6 @@ LogicalResult legalizeTF(Operation *op, bool allow_partial_conversion, void PopulateLegalizeTfPatterns(MLIRContext *context, OwningRewritePatternList *patterns) { populateWithGenerated(context, patterns); - - // Add patterns that lower some of the high level TensorFlow ops to lower - // level TensorFlow ops. So, we don't have to target all the TensorFlow ops - // here for lowering to HLO. - TF::PopulateLoweringTFPatterns(context, patterns); patterns->insert< ConvertAllOp, ConvertAnyOp, ConvertArgMaxOp, ConvertBatchMatMulV2Op, ConvertBiasAddOp, ConvertBroadcastToOp, ConvertBF16FloorDivOp, @@ -5838,8 +5869,10 @@ void PopulateLegalizeTfPatterns(MLIRContext *context, } std::unique_ptr> createLegalizeTFPass( - bool allow_partial_conversion, bool legalize_chlo) { - return std::make_unique(allow_partial_conversion, legalize_chlo); + bool allow_partial_conversion, bool legalize_chlo, + llvm::Optional tf2xla_fallback_device_type) { + return std::make_unique(allow_partial_conversion, legalize_chlo, + tf2xla_fallback_device_type); } } // end namespace mhlo diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc index c63e77f2a47..f04f1653505 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc @@ -528,8 +528,7 @@ class LegalizeTF : public PassWrapper { // global device type for all TensorFlow ops. Option device_type_{ *this, "device-type", - llvm::cl::desc("XLA device type for execution of TensorFlow ops. " - "Supports XLA_CPU_JIT and XLA_TPU_JIT for now.")}; + llvm::cl::desc("XLA device type for execution of TensorFlow ops.")}; }; static PassRegistration pass( diff --git a/tensorflow/compiler/mlir/xla/transforms/passes.h b/tensorflow/compiler/mlir/xla/transforms/passes.h index 85bdaaa0e31..45166941620 100644 --- a/tensorflow/compiler/mlir/xla/transforms/passes.h +++ b/tensorflow/compiler/mlir/xla/transforms/passes.h @@ -36,8 +36,13 @@ namespace mhlo { /// Lowers from TF dialect to HLO dialect. When allow_partial_conversion is /// false, emits an error if there is any operation that can't be legalized. +/// When `tf2xla_fallback_device_type` is not `None`, also uses legalization +/// patterns from TF2XLA fallback for provided device type (see +/// legalize_tf_with_tf2xla.cc for details). By default, TF2XLA fallback is not +/// used. std::unique_ptr> createLegalizeTFPass( - bool allow_partial_conversion = false, bool legalize_chlo = true); + bool allow_partial_conversion = false, bool legalize_chlo = true, + llvm::Optional tf2xla_fallback_device_type = llvm::None); /// Lowers from TF dialect to HLO dialect using tf2xla op kernels for the /// specified device type. @@ -63,8 +68,14 @@ std::unique_ptr> createLegalizeTFControlFlowPass(); /// dialect using the conversion patterns registered by the HLO dialect. When /// allow_partial_conversion is false, emits an error if there is any operation /// that can't be legalized. -LogicalResult legalizeTF(Operation* op, bool allow_partial_conversion = false, - bool legalize_chlo = true); +/// When `tf2xla_fallback_device_type` is not `None`, also uses legalization +/// patterns from TF2XLA fallback for provided device type (see +/// legalize_tf_with_tf2xla.cc for details). By default, TF2XLA fallback is not +/// used. +LogicalResult legalizeTF( + Operation* op, bool allow_partial_conversion = false, + bool legalize_chlo = true, + llvm::Optional tf2xla_fallback_device_type = llvm::None); // Legalizes TF/XLA communication ops (TF dialect) to HLO dialect communication // ops. From 4bbdc6ce8e2011e6ca3ae41240b32eef3ed69e9a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 7 Aug 2020 17:05:58 -0700 Subject: [PATCH 2382/2522] Break up core/kernels/BUILD (part 2 of N): Move MKL kernels to subdirectory tensorflow/core/kernels/mkl with its own BUILD file. PiperOrigin-RevId: 325536890 Change-Id: Ia98ea1a3b2c0aa52e0c611e6ae38315802b20261 --- tensorflow/core/BUILD | 86 +-- tensorflow/core/common_runtime/BUILD | 46 +- tensorflow/core/kernels/BUILD | 568 +----------------- tensorflow/core/kernels/mkl/BUILD | 428 +++++++++++++ .../kernels/{ => mkl}/mkl_aggregate_ops.cc | 0 .../kernels/{ => mkl}/mkl_avgpooling_op.cc | 2 +- .../kernels/{ => mkl}/mkl_batch_matmul_op.cc | 2 +- .../core/kernels/{ => mkl}/mkl_concat_op.cc | 0 .../{ => mkl}/mkl_conv_grad_filter_ops.cc | 2 +- .../{ => mkl}/mkl_conv_grad_input_ops.cc | 2 +- .../core/kernels/{ => mkl}/mkl_conv_ops.cc | 4 +- .../core/kernels/{ => mkl}/mkl_conv_ops.h | 8 +- .../kernels/{ => mkl}/mkl_conv_ops_test.cc | 0 .../kernels/{ => mkl}/mkl_cwise_ops_common.cc | 0 .../kernels/{ => mkl}/mkl_dequantize_op.cc | 0 .../{ => mkl}/mkl_dequantize_op_test.cc | 20 +- .../{ => mkl}/mkl_fused_batch_norm_op.cc | 0 .../{ => mkl}/mkl_fused_batch_norm_op_test.cc | 0 .../kernels/{ => mkl}/mkl_fused_ops_test.cc | 0 .../core/kernels/{ => mkl}/mkl_identity_op.cc | 0 .../{ => mkl}/mkl_input_conversion_op.cc | 8 +- .../core/kernels/{ => mkl}/mkl_lrn_op.cc | 0 .../core/kernels/{ => mkl}/mkl_matmul_op.cc | 2 +- .../kernels/{ => mkl}/mkl_matmul_op_fused.cc | 2 +- .../kernels/{ => mkl}/mkl_matmul_ops_common.h | 7 +- .../kernels/{ => mkl}/mkl_maxpooling_op.cc | 2 +- .../{ => mkl}/mkl_pooling_ops_common.cc | 2 +- .../{ => mkl}/mkl_pooling_ops_common.h | 6 +- .../core/kernels/{ => mkl}/mkl_qmatmul_op.cc | 4 +- .../kernels/{ => mkl}/mkl_qmatmul_op_test.cc | 0 .../core/kernels/{ => mkl}/mkl_quantize_op.cc | 0 .../kernels/{ => mkl}/mkl_quantize_op_test.cc | 0 .../{ => mkl}/mkl_quantized_concat_op_test.cc | 0 .../{ => mkl}/mkl_quantized_conv_ops.h | 6 +- .../mkl_quantized_conv_ops_perchannel_test.cc | 0 .../{ => mkl}/mkl_quantized_conv_ops_test.cc | 0 .../mkl_quantized_pooling_ops_test.cc | 0 .../core/kernels/{ => mkl}/mkl_relu_op.cc | 0 .../kernels/{ => mkl}/mkl_relu_op_test.cc | 0 ...mkl_requantization_range_per_channel_op.cc | 0 .../{ => mkl}/mkl_requantize_ops_test.cc | 0 .../mkl_requantize_per_channel_op.cc | 0 .../core/kernels/{ => mkl}/mkl_reshape_op.cc | 0 .../core/kernels/{ => mkl}/mkl_slice_op.cc | 0 .../core/kernels/{ => mkl}/mkl_softmax_op.cc | 0 .../core/kernels/{ => mkl}/mkl_tfconv_op.h | 6 +- .../kernels/{ => mkl}/mkl_tmp_bf16_ops.cc | 0 .../kernels/{ => mkl}/mkl_transpose_op.cc | 0 48 files changed, 533 insertions(+), 680 deletions(-) create mode 100644 tensorflow/core/kernels/mkl/BUILD rename tensorflow/core/kernels/{ => mkl}/mkl_aggregate_ops.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_avgpooling_op.cc (99%) rename tensorflow/core/kernels/{ => mkl}/mkl_batch_matmul_op.cc (99%) rename tensorflow/core/kernels/{ => mkl}/mkl_concat_op.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_conv_grad_filter_ops.cc (99%) rename tensorflow/core/kernels/{ => mkl}/mkl_conv_grad_input_ops.cc (99%) rename tensorflow/core/kernels/{ => mkl}/mkl_conv_ops.cc (99%) rename tensorflow/core/kernels/{ => mkl}/mkl_conv_ops.h (99%) rename tensorflow/core/kernels/{ => mkl}/mkl_conv_ops_test.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_cwise_ops_common.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_dequantize_op.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_dequantize_op_test.cc (88%) rename tensorflow/core/kernels/{ => mkl}/mkl_fused_batch_norm_op.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_fused_batch_norm_op_test.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_fused_ops_test.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_identity_op.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_input_conversion_op.cc (99%) rename tensorflow/core/kernels/{ => mkl}/mkl_lrn_op.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_matmul_op.cc (99%) rename tensorflow/core/kernels/{ => mkl}/mkl_matmul_op_fused.cc (99%) rename tensorflow/core/kernels/{ => mkl}/mkl_matmul_ops_common.h (99%) rename tensorflow/core/kernels/{ => mkl}/mkl_maxpooling_op.cc (99%) rename tensorflow/core/kernels/{ => mkl}/mkl_pooling_ops_common.cc (99%) rename tensorflow/core/kernels/{ => mkl}/mkl_pooling_ops_common.h (99%) rename tensorflow/core/kernels/{ => mkl}/mkl_qmatmul_op.cc (99%) rename tensorflow/core/kernels/{ => mkl}/mkl_qmatmul_op_test.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_quantize_op.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_quantize_op_test.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_quantized_concat_op_test.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_quantized_conv_ops.h (95%) rename tensorflow/core/kernels/{ => mkl}/mkl_quantized_conv_ops_perchannel_test.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_quantized_conv_ops_test.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_quantized_pooling_ops_test.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_relu_op.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_relu_op_test.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_requantization_range_per_channel_op.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_requantize_ops_test.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_requantize_per_channel_op.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_reshape_op.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_slice_op.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_softmax_op.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_tfconv_op.h (97%) rename tensorflow/core/kernels/{ => mkl}/mkl_tmp_bf16_ops.cc (100%) rename tensorflow/core/kernels/{ => mkl}/mkl_transpose_op.cc (100%) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index b449ae1f484..67e0a160c4f 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1050,28 +1050,28 @@ cc_library( ] + if_not_windows([ "//tensorflow/core/kernels/neon:neon_depthwise_conv_op", ]) + if_mkl([ - "//tensorflow/core/kernels:mkl_aggregate_ops", - "//tensorflow/core/kernels:mkl_concat_op", - "//tensorflow/core/kernels:mkl_dequantize_op", - "//tensorflow/core/kernels:mkl_conv_op", - "//tensorflow/core/kernels:mkl_cwise_ops_common", - "//tensorflow/core/kernels:mkl_fused_batch_norm_op", - "//tensorflow/core/kernels:mkl_identity_op", - "//tensorflow/core/kernels:mkl_input_conversion_op", - "//tensorflow/core/kernels:mkl_lrn_op", - "//tensorflow/core/kernels:mkl_pooling_ops", - "//tensorflow/core/kernels:mkl_qmatmul_op", - "//tensorflow/core/kernels:mkl_requantize_ops", - "//tensorflow/core/kernels:mkl_quantize_op", - "//tensorflow/core/kernels:mkl_relu_op", - "//tensorflow/core/kernels:mkl_reshape_op", - "//tensorflow/core/kernels:mkl_slice_op", - "//tensorflow/core/kernels:mkl_softmax_op", - "//tensorflow/core/kernels:mkl_transpose_op", - "//tensorflow/core/kernels:mkl_batch_matmul_op", - "//tensorflow/core/kernels:mkl_matmul_op", - "//tensorflow/core/kernels:mkl_tfconv_op", - "//tensorflow/core/kernels:mkl_tmp_bf16_ops", + "//tensorflow/core/kernels/mkl:mkl_aggregate_ops", + "//tensorflow/core/kernels/mkl:mkl_concat_op", + "//tensorflow/core/kernels/mkl:mkl_dequantize_op", + "//tensorflow/core/kernels/mkl:mkl_conv_op", + "//tensorflow/core/kernels/mkl:mkl_cwise_ops_common", + "//tensorflow/core/kernels/mkl:mkl_fused_batch_norm_op", + "//tensorflow/core/kernels/mkl:mkl_identity_op", + "//tensorflow/core/kernels/mkl:mkl_input_conversion_op", + "//tensorflow/core/kernels/mkl:mkl_lrn_op", + "//tensorflow/core/kernels/mkl:mkl_pooling_ops", + "//tensorflow/core/kernels/mkl:mkl_qmatmul_op", + "//tensorflow/core/kernels/mkl:mkl_requantize_ops", + "//tensorflow/core/kernels/mkl:mkl_quantize_op", + "//tensorflow/core/kernels/mkl:mkl_relu_op", + "//tensorflow/core/kernels/mkl:mkl_reshape_op", + "//tensorflow/core/kernels/mkl:mkl_slice_op", + "//tensorflow/core/kernels/mkl:mkl_softmax_op", + "//tensorflow/core/kernels/mkl:mkl_transpose_op", + "//tensorflow/core/kernels/mkl:mkl_batch_matmul_op", + "//tensorflow/core/kernels/mkl:mkl_matmul_op", + "//tensorflow/core/kernels/mkl:mkl_tfconv_op", + "//tensorflow/core/kernels/mkl:mkl_tmp_bf16_ops", ]) + if_cuda_or_rocm([ "//tensorflow/core/kernels:cudnn_rnn_kernels", ]) + if_cuda([ @@ -2697,27 +2697,27 @@ tf_cc_test_mkl( "//tensorflow/core/kernels:ops_util", "//third_party/eigen3", ] + if_mkl([ - "//tensorflow/core/kernels:mkl_aggregate_ops", - "//tensorflow/core/kernels:mkl_batch_matmul_op", - "//tensorflow/core/kernels:mkl_concat_op", - "//tensorflow/core/kernels:mkl_conv_op", - "//tensorflow/core/kernels:mkl_cwise_ops_common", - "//tensorflow/core/kernels:mkl_dequantize_op", - "//tensorflow/core/kernels:mkl_fused_batch_norm_op", - "//tensorflow/core/kernels:mkl_identity_op", - "//tensorflow/core/kernels:mkl_input_conversion_op", - "//tensorflow/core/kernels:mkl_lrn_op", - "//tensorflow/core/kernels:mkl_matmul_op", - "//tensorflow/core/kernels:mkl_pooling_ops", - "//tensorflow/core/kernels:mkl_qmatmul_op", - "//tensorflow/core/kernels:mkl_quantize_op", - "//tensorflow/core/kernels:mkl_relu_op", - "//tensorflow/core/kernels:mkl_reshape_op", - "//tensorflow/core/kernels:mkl_slice_op", - "//tensorflow/core/kernels:mkl_softmax_op", - "//tensorflow/core/kernels:mkl_tfconv_op", - "//tensorflow/core/kernels:mkl_transpose_op", - "//tensorflow/core/kernels:mkl_tmp_bf16_ops", + "//tensorflow/core/kernels/mkl:mkl_aggregate_ops", + "//tensorflow/core/kernels/mkl:mkl_batch_matmul_op", + "//tensorflow/core/kernels/mkl:mkl_concat_op", + "//tensorflow/core/kernels/mkl:mkl_conv_op", + "//tensorflow/core/kernels/mkl:mkl_cwise_ops_common", + "//tensorflow/core/kernels/mkl:mkl_dequantize_op", + "//tensorflow/core/kernels/mkl:mkl_fused_batch_norm_op", + "//tensorflow/core/kernels/mkl:mkl_identity_op", + "//tensorflow/core/kernels/mkl:mkl_input_conversion_op", + "//tensorflow/core/kernels/mkl:mkl_lrn_op", + "//tensorflow/core/kernels/mkl:mkl_matmul_op", + "//tensorflow/core/kernels/mkl:mkl_pooling_ops", + "//tensorflow/core/kernels/mkl:mkl_qmatmul_op", + "//tensorflow/core/kernels/mkl:mkl_quantize_op", + "//tensorflow/core/kernels/mkl:mkl_relu_op", + "//tensorflow/core/kernels/mkl:mkl_reshape_op", + "//tensorflow/core/kernels/mkl:mkl_slice_op", + "//tensorflow/core/kernels/mkl:mkl_softmax_op", + "//tensorflow/core/kernels/mkl:mkl_tfconv_op", + "//tensorflow/core/kernels/mkl:mkl_transpose_op", + "//tensorflow/core/kernels/mkl:mkl_tmp_bf16_ops", ]), ) diff --git a/tensorflow/core/common_runtime/BUILD b/tensorflow/core/common_runtime/BUILD index 2dbcfdbee38..b46efe01474 100644 --- a/tensorflow/core/common_runtime/BUILD +++ b/tensorflow/core/common_runtime/BUILD @@ -1050,10 +1050,13 @@ cc_library( deps = [ ":function", ":optimization_registry", + "@com_google_absl//absl/base", "//tensorflow/core:framework", "//tensorflow/core:framework_internal", "//tensorflow/core:graph", "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", ] + mkl_deps(), alwayslink = 1, ) @@ -1073,6 +1076,7 @@ cc_library( "//tensorflow/core:framework_internal", "//tensorflow/core:graph", "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", ] + mkl_deps(), alwayslink = 1, ) @@ -2597,27 +2601,27 @@ tf_cc_test_mkl( "//tensorflow/core/kernels:ops_util", "//third_party/eigen3", ] + if_mkl([ - "//tensorflow/core/kernels:mkl_aggregate_ops", - "//tensorflow/core/kernels:mkl_batch_matmul_op", - "//tensorflow/core/kernels:mkl_concat_op", - "//tensorflow/core/kernels:mkl_conv_op", - "//tensorflow/core/kernels:mkl_cwise_ops_common", - "//tensorflow/core/kernels:mkl_dequantize_op", - "//tensorflow/core/kernels:mkl_fused_batch_norm_op", - "//tensorflow/core/kernels:mkl_identity_op", - "//tensorflow/core/kernels:mkl_input_conversion_op", - "//tensorflow/core/kernels:mkl_lrn_op", - "//tensorflow/core/kernels:mkl_matmul_op", - "//tensorflow/core/kernels:mkl_pooling_ops", - "//tensorflow/core/kernels:mkl_qmatmul_op", - "//tensorflow/core/kernels:mkl_quantize_op", - "//tensorflow/core/kernels:mkl_relu_op", - "//tensorflow/core/kernels:mkl_reshape_op", - "//tensorflow/core/kernels:mkl_slice_op", - "//tensorflow/core/kernels:mkl_softmax_op", - "//tensorflow/core/kernels:mkl_tfconv_op", - "//tensorflow/core/kernels:mkl_transpose_op", - "//tensorflow/core/kernels:mkl_tmp_bf16_ops", + "//tensorflow/core/kernels/mkl:mkl_aggregate_ops", + "//tensorflow/core/kernels/mkl:mkl_batch_matmul_op", + "//tensorflow/core/kernels/mkl:mkl_concat_op", + "//tensorflow/core/kernels/mkl:mkl_conv_op", + "//tensorflow/core/kernels/mkl:mkl_cwise_ops_common", + "//tensorflow/core/kernels/mkl:mkl_dequantize_op", + "//tensorflow/core/kernels/mkl:mkl_fused_batch_norm_op", + "//tensorflow/core/kernels/mkl:mkl_identity_op", + "//tensorflow/core/kernels/mkl:mkl_input_conversion_op", + "//tensorflow/core/kernels/mkl:mkl_lrn_op", + "//tensorflow/core/kernels/mkl:mkl_matmul_op", + "//tensorflow/core/kernels/mkl:mkl_pooling_ops", + "//tensorflow/core/kernels/mkl:mkl_qmatmul_op", + "//tensorflow/core/kernels/mkl:mkl_quantize_op", + "//tensorflow/core/kernels/mkl:mkl_relu_op", + "//tensorflow/core/kernels/mkl:mkl_reshape_op", + "//tensorflow/core/kernels/mkl:mkl_slice_op", + "//tensorflow/core/kernels/mkl:mkl_softmax_op", + "//tensorflow/core/kernels/mkl:mkl_tfconv_op", + "//tensorflow/core/kernels/mkl:mkl_transpose_op", + "//tensorflow/core/kernels/mkl:mkl_tmp_bf16_ops", ]), ) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 99970a9558c..9be043c3907 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -9,12 +9,10 @@ load( "tf_cc_binary", "tf_cc_shared_object", "tf_cc_test", - "tf_cc_test_mkl", "tf_cc_tests", "tf_copts", "tf_cuda_library", "tf_kernel_library", - "tf_mkl_kernel_library", "tf_opts_nortti_if_lite_protos", ) load("@local_config_sycl//sycl:build_defs.bzl", "if_sycl") @@ -654,6 +652,7 @@ cc_library( cc_library( name = "batch_kernels", srcs = ["batch_kernels.cc"], + hdrs = ["batch_matmul_op_impl.h"], deps = [ ":ops_util_hdrs", "//tensorflow/core:framework", @@ -939,7 +938,7 @@ cc_library( cc_library( name = "image_resizer_state", hdrs = ["image_resizer_state.h"], - visibility = ["//visibility:private"], + visibility = ["//tensorflow:__subpackages__"], deps = [ ":bounds_check", "//tensorflow/core:framework", @@ -3818,16 +3817,6 @@ tf_kernel_library( ]), ) -tf_mkl_kernel_library( - name = "mkl_batch_matmul_op", - srcs = ["mkl_batch_matmul_op.cc"], - hdrs = [ - "batch_matmul_op_impl.h", - "mkl_matmul_ops_common.h", - ], - deps = MATH_DEPS + mkl_deps(), -) - tf_kernel_library( name = "betainc_op", prefix = "betainc_op", @@ -3909,16 +3898,6 @@ tf_kernel_library( ]) + if_cuda_or_rocm([":gpu_utils"]), ) -tf_mkl_kernel_library( - name = "mkl_matmul_op", - srcs = [ - "mkl_matmul_op.cc", - "mkl_matmul_op_fused.cc", - ], - hdrs = ["mkl_matmul_ops_common.h"], - deps = MATH_DEPS + mkl_deps(), -) - tf_kernel_library( name = "reduction_ops", gpu_srcs = ["reduction_gpu_kernels.cu.h"], @@ -6824,7 +6803,6 @@ filegroup( "unicode_ops.cc", "unicode_script_op.cc", # Ops that are inherently incompatible with Android (e.g. tied to x86 platform). - "mkl_*", "xsmm_*", "cwise_ops_sycl_common.h", "nextafter_op.cc", @@ -7260,50 +7238,6 @@ tf_cc_test( ], ) -tf_cc_test_mkl( - name = "mkl_quantized_conv_ops_perchannel_test", - size = "small", - srcs = ["mkl_quantized_conv_ops_perchannel_test.cc"], - deps = [ - ":mkl_conv_op", - ":mkl_input_conversion_op", - ":ops_testutil", - ":ops_util", - ":quantization_utils", - ":quantized_ops", - "//tensorflow/core:array_ops_op_lib", - "//tensorflow/core:framework", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - -tf_cc_test_mkl( - name = "mkl_quantized_conv_ops_test", - size = "small", - srcs = ["mkl_quantized_conv_ops_test.cc"], - deps = [ - ":mkl_conv_op", - ":mkl_input_conversion_op", - ":ops_testutil", - ":ops_util", - ":quantization_utils", - ":quantized_ops", - "//tensorflow/core:array_ops_op_lib", - "//tensorflow/core:framework", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - tf_cc_test( name = "quantize_op_test", size = "small", @@ -7344,28 +7278,6 @@ tf_cc_test( ], ) -tf_cc_test_mkl( - name = "mkl_qmatmul_op_test", - size = "small", - srcs = ["mkl_qmatmul_op_test.cc"], - deps = [ - ":mkl_input_conversion_op", - ":mkl_qmatmul_op", - ":ops_testutil", - ":ops_util", - ":quantization_utils", - ":quantized_ops", - "//tensorflow/core:array_ops_op_lib", - "//tensorflow/core:framework", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - # Android-only test for quantized multiply. cc_binary( name = "quantized_mul_op_test_android_only", @@ -7446,66 +7358,6 @@ tf_cc_test( ], ) -tf_mkl_kernel_library( - name = "mkl_quantize_op", - srcs = ["mkl_quantize_op.cc"], - hdrs = [ - "meta_support.h", - "reference_gemm.h", - ], - deps = [ - ":bounds_check", - ":ops_util", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core:mkl_graph_util", - "@gemmlowp", - ] + mkl_deps(), -) - -tf_cc_test_mkl( - name = "mkl_quantize_op_test", - size = "small", - srcs = ["mkl_quantize_op_test.cc"], - deps = [ - ":mkl_quantize_op", - ":ops_testutil", - ":ops_util", - "//tensorflow/core:array_ops_op_lib", - "//tensorflow/core:framework", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - -tf_cc_test_mkl( - name = "mkl_quantized_pooling_ops_test", - size = "small", - srcs = ["mkl_quantized_pooling_ops_test.cc"], - deps = [ - ":mkl_input_conversion_op", - ":mkl_pooling_ops", - ":ops_testutil", - ":ops_util", - ":quantization_utils", - ":quantized_ops", - "//tensorflow/core:array_ops_op_lib", - "//tensorflow/core:framework", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - tf_cc_test( name = "quantized_reshape_op_test", size = "small", @@ -7545,30 +7397,6 @@ tf_cc_test( ], ) -tf_cc_test_mkl( - name = "mkl_quantized_concat_op_test", - size = "small", - srcs = ["mkl_quantized_concat_op_test.cc"], - deps = [ - ":mkl_concat_op", - ":ops_testutil", - ":ops_util", - ":quantization_utils", - ":quantized_ops", - "//tensorflow/core:array_ops_op_lib", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:mkl_array_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - tf_cc_test( name = "quantized_batch_norm_op_test", size = "small", @@ -7792,50 +7620,6 @@ tf_cc_test( ], ) -tf_mkl_kernel_library( - name = "mkl_qmatmul_op", - srcs = ["mkl_qmatmul_op.cc"], - hdrs = [ - "mkl_matmul_ops_common.h", - "mkl_quantized_conv_ops.h", - "no_op.h", - ], - deps = [ - ":bounds_check", - ":fill_functor", - ":matmul_op", - ":ops_util", - "//third_party/eigen3", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:mkl_nn_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", - ] + mkl_deps(), -) - -tf_mkl_kernel_library( - name = "mkl_conv_op", - hdrs = [ - "mkl_quantized_conv_ops.h", - "no_op.h", - ], - prefix = "mkl_conv", - deps = [ - ":bounds_check", - ":conv_ops", - ":ops_util", - "@com_google_absl//absl/strings", - "//third_party/eigen3", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - ] + mkl_deps(), -) - tf_cc_test( name = "bias_op_test", size = "small", @@ -7850,354 +7634,6 @@ tf_cc_test( ], ) -tf_cc_test_mkl( - name = "mkl_conv_ops_test", - size = "small", - srcs = ["mkl_conv_ops_test.cc"], - linkstatic = 1, # Fixes dyld error on MacOS. - deps = [ - ":ops_testutil", - ":ops_util", - "//tensorflow/cc:cc_ops", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:tensorflow", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - -tf_cc_test_mkl( - name = "mkl_relu_op_test", - size = "small", - srcs = ["mkl_relu_op_test.cc"], - linkstatic = 1, # Fixes dyld error on MacOS. - deps = [ - ":ops_testutil", - ":ops_util", - "//tensorflow/cc:cc_ops", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:tensorflow", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - -tf_mkl_kernel_library( - name = "mkl_tfconv_op", - prefix = "mkl_tfconv", - deps = [ - ":bounds_check", - ":ops_util", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - ] + mkl_deps(), -) - -tf_mkl_kernel_library( - name = "mkl_input_conversion_op", - hdrs = ["mkl_tfconv_op.h"], - prefix = "mkl_input_conversion", - deps = [ - ":bounds_check", - ":ops_util", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - ] + mkl_deps(), -) - -tf_mkl_kernel_library( - name = "mkl_pooling_ops", - srcs = [ - "mkl_avgpooling_op.cc", - "mkl_maxpooling_op.cc", - "mkl_pooling_ops_common.cc", - ], - hdrs = ["mkl_pooling_ops_common.h"], - deps = [ - ":bounds_check", - ":ops_util", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - ] + mkl_deps(), -) - -tf_mkl_kernel_library( - name = "mkl_dequantize_op", - srcs = ["mkl_dequantize_op.cc"], - hdrs = [ - "meta_support.h", - "reference_gemm.h", - ], - deps = [ - ":concat_lib_hdrs", - ":conv_ops", - ":cwise_op", - ":eigen_helpers", - ":image_resizer_state", - ":ops_util", - ":pooling_ops", - ":quantization_utils", - ":quantized_ops", - ":transpose_functor", - "//tensorflow/core:array_ops_op_lib", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:mkl_graph_util", - "//tensorflow/core:nn_ops_op_lib", - "//third_party/eigen3", - "@gemmlowp", - ] + mkl_deps(), -) - -tf_cc_test_mkl( - name = "mkl_dequantize_op_test", - size = "small", - srcs = ["mkl_dequantize_op_test.cc"], - # TODO(b/149940073): Re-enable. - tags = [ - "no_oss", - "notap", - ], - deps = [ - ":mkl_dequantize_op", - ":mkl_tfconv_op", - ":ops_testutil", - ":ops_util", - "//tensorflow/core:array_ops_op_lib", - "//tensorflow/core:framework", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:mkl_array_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:tensorflow", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - -tf_mkl_kernel_library( - name = "mkl_relu_op", - prefix = "mkl_relu", - deps = [ - ":bounds_check", - ":ops_util", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//third_party/eigen3", - ] + mkl_deps(), -) - -tf_mkl_kernel_library( - name = "mkl_softmax_op", - prefix = "mkl_softmax", - deps = [ - ":bounds_check", - ":ops_util", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//third_party/eigen3", - ] + mkl_deps(), -) - -tf_mkl_kernel_library( - name = "mkl_tmp_bf16_ops", - prefix = "mkl_tmp_bf16_ops", - deps = [ - ":no_op", - ] + mkl_deps(), -) - -tf_mkl_kernel_library( - name = "mkl_fused_batch_norm_op", - srcs = ["mkl_fused_batch_norm_op.cc"], - deps = NN_DEPS + [ - ":fused_batch_norm_op", - ":no_op", - ] + mkl_deps(), -) - -tf_cc_test_mkl( - name = "mkl_fused_batch_norm_op_test", - size = "small", - srcs = ["mkl_fused_batch_norm_op_test.cc"], - linkstatic = 1, - deps = [ - ":mkl_fused_batch_norm_op", - ":ops_testutil", - ":ops_util", - "//tensorflow/cc:cc_ops", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:tensorflow", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - -tf_mkl_kernel_library( - name = "mkl_aggregate_ops", - prefix = "mkl_aggregate_ops", - deps = MATH_DEPS + mkl_deps(), -) - -tf_mkl_kernel_library( - name = "mkl_concat_op", - prefix = "mkl_concat_op", - deps = [":quantization_utils"] + ARRAY_DEPS + mkl_deps(), -) - -tf_mkl_kernel_library( - name = "mkl_reshape_op", - prefix = "mkl_reshape_op", - deps = ARRAY_DEPS + mkl_deps(), -) - -tf_mkl_kernel_library( - name = "mkl_slice_op", - prefix = "mkl_slice_op", - deps = ARRAY_DEPS + mkl_deps(), -) - -tf_mkl_kernel_library( - name = "mkl_identity_op", - prefix = "mkl_identity_op", - deps = ARRAY_DEPS + mkl_deps(), -) - -tf_mkl_kernel_library( - name = "mkl_lrn_op", - prefix = "mkl_lrn_op", - deps = NN_DEPS + mkl_deps(), -) - -tf_mkl_kernel_library( - name = "mkl_cwise_ops_common", - hdrs = [ - "cwise_ops.h", - "cwise_ops_common.h", - "cwise_ops_gradients.h", - ], - prefix = "mkl_cwise_ops_common", - deps = NN_DEPS + mkl_deps() + [":cwise_op"], -) - -tf_mkl_kernel_library( - name = "mkl_requantize_ops", - srcs = [ - "mkl_requantization_range_per_channel_op.cc", - "mkl_requantize_per_channel_op.cc", - ], - hdrs = [ - "meta_support.h", - "no_op.h", - "reference_gemm.h", - ], - deps = [ - ":concat_lib_hdrs", - ":conv_ops", - ":cwise_op", - ":eigen_helpers", - ":image_resizer_state", - ":ops_util", - ":pooling_ops", - ":quantization_utils", - ":transpose_functor", - "//third_party/eigen3", - "@gemmlowp", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - ] + mkl_deps(), -) - -tf_cc_test_mkl( - name = "mkl_requantize_ops_test", - size = "small", - srcs = ["mkl_requantize_ops_test.cc"], - linkstatic = 1, # Fixes dyld error on MacOS. - deps = [ - ":mkl_requantize_ops", - ":ops_testutil", - ":ops_util", - ":quantization_utils", - ":quantized_ops", - "//tensorflow/cc:cc_ops", - "//tensorflow/core:array_ops_op_lib", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", - "//tensorflow/core:lib", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:tensorflow", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - -tf_cc_test_mkl( - name = "mkl_fused_ops_test", - size = "small", - srcs = ["mkl_fused_ops_test.cc"], - linkstatic = 1, - deps = [ - ":conv_ops", - ":image", - ":mkl_conv_op", - ":mkl_matmul_op", - ":mkl_tfconv_op", - ":ops_testutil", - ":ops_util", - "//tensorflow/cc:cc_ops", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:tensorflow", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - -tf_mkl_kernel_library( - name = "mkl_transpose_op", - srcs = [ - "mkl_transpose_op.cc", - ], - hdrs = ["transpose_op.h"], - deps = ARRAY_DEPS + mkl_deps() + [":transpose_op"], -) - # NOTE(lespeholt): This rule is deprecated, please use: # tensorflow/core/util/batch_util.h cc_library( diff --git a/tensorflow/core/kernels/mkl/BUILD b/tensorflow/core/kernels/mkl/BUILD new file mode 100644 index 00000000000..4abeee20e30 --- /dev/null +++ b/tensorflow/core/kernels/mkl/BUILD @@ -0,0 +1,428 @@ +load( + "//tensorflow:tensorflow.bzl", + "tf_cc_test_mkl", + "tf_mkl_kernel_library", +) +load( + "//third_party/mkl:build_defs.bzl", + "mkl_deps", +) + +package( + default_visibility = [ + "//tensorflow:__subpackages__", + "//tensorflow:internal", + ], + licenses = ["notice"], # Apache 2.0 +) + +# Public support libraries ---------------------------------------------------- +MKL_SHORT_DEPS = [ + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core/kernels:bounds_check", + "//tensorflow/core/kernels:ops_util", +] + mkl_deps() + +MKL_DEPS = MKL_SHORT_DEPS + [ + "//third_party/eigen3", + "//tensorflow/core:array_grad", + "//tensorflow/core:math_grad", + "//tensorflow/core:nn_grad", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/kernels:concat_lib", + "//tensorflow/core/kernels:conv_2d", + "//tensorflow/core/kernels:eigen_contraction_kernel", + "//tensorflow/core/kernels:fill_functor", + "//tensorflow/core/kernels:gather_functor", + "//tensorflow/core/kernels:transpose_functor", +] + +MKL_TEST_DEPS = [ + ":mkl_input_conversion_op", + "//tensorflow/cc:cc_ops", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", +] + +tf_mkl_kernel_library( + name = "mkl_batch_matmul_op", + srcs = ["mkl_batch_matmul_op.cc"], + hdrs = [ + "mkl_matmul_ops_common.h", + ], + deps = ["//tensorflow/core/kernels:batch_matmul_op"] + MKL_DEPS, +) + +tf_mkl_kernel_library( + name = "mkl_matmul_op", + srcs = [ + "mkl_matmul_op.cc", + "mkl_matmul_op_fused.cc", + ], + hdrs = ["mkl_matmul_ops_common.h"], + deps = MKL_DEPS, +) + +tf_cc_test_mkl( + name = "mkl_quantized_conv_ops_perchannel_test", + size = "small", + srcs = ["mkl_quantized_conv_ops_perchannel_test.cc"], + deps = [ + ":mkl_conv_op", + "//tensorflow/core:array_ops_op_lib", + "//tensorflow/core:math_ops_op_lib", + "//tensorflow/core:nn_ops_op_lib", + "//tensorflow/core/kernels:quantization_utils", + "//tensorflow/core/kernels:quantized_ops", + ] + MKL_TEST_DEPS, +) + +tf_cc_test_mkl( + name = "mkl_quantized_conv_ops_test", + size = "small", + srcs = ["mkl_quantized_conv_ops_test.cc"], + deps = [ + ":mkl_conv_op", + "//tensorflow/core:array_ops_op_lib", + "//tensorflow/core:math_ops_op_lib", + "//tensorflow/core:nn_ops_op_lib", + "//tensorflow/core/kernels:quantization_utils", + "//tensorflow/core/kernels:quantized_ops", + ] + MKL_TEST_DEPS, +) + +tf_cc_test_mkl( + name = "mkl_qmatmul_op_test", + size = "small", + srcs = ["mkl_qmatmul_op_test.cc"], + deps = [ + ":mkl_qmatmul_op", + "//tensorflow/core:array_ops_op_lib", + "//tensorflow/core:math_ops_op_lib", + "//tensorflow/core:nn_ops_op_lib", + "//tensorflow/core/framework:fake_input", + "//tensorflow/core/framework:tensor_testutil", + "//tensorflow/core/kernels:quantization_utils", + "//tensorflow/core/kernels:quantized_ops", + ] + MKL_TEST_DEPS, +) + +tf_mkl_kernel_library( + name = "mkl_quantize_op", + srcs = ["mkl_quantize_op.cc"], + deps = [ + "//tensorflow/core/kernels:quantized_ops", + "//tensorflow/core:mkl_graph_util", + "@gemmlowp", + ] + MKL_DEPS, +) + +tf_cc_test_mkl( + name = "mkl_quantize_op_test", + size = "small", + srcs = ["mkl_quantize_op_test.cc"], + deps = [ + ":mkl_quantize_op", + "//tensorflow/core:array_ops_op_lib", + "//tensorflow/core:math_ops_op_lib", + "//tensorflow/core:nn_ops_op_lib", + ] + MKL_TEST_DEPS, +) + +tf_cc_test_mkl( + name = "mkl_quantized_pooling_ops_test", + size = "small", + srcs = ["mkl_quantized_pooling_ops_test.cc"], + deps = [ + ":mkl_pooling_ops", + "//tensorflow/core:array_ops_op_lib", + "//tensorflow/core:math_ops_op_lib", + "//tensorflow/core:nn_ops_op_lib", + "//tensorflow/core/kernels:quantization_utils", + "//tensorflow/core/kernels:quantized_ops", + ] + MKL_TEST_DEPS, +) + +tf_cc_test_mkl( + name = "mkl_quantized_concat_op_test", + size = "small", + srcs = ["mkl_quantized_concat_op_test.cc"], + deps = [ + ":mkl_concat_op", + "//tensorflow/core:array_ops_op_lib", + "//tensorflow/core:math_ops_op_lib", + "//tensorflow/core:mkl_array_ops_op_lib", + "//tensorflow/core:nn_ops_op_lib", + "//tensorflow/core/kernels:quantization_utils", + "//tensorflow/core/kernels:quantized_ops", + ] + MKL_TEST_DEPS, +) + +tf_mkl_kernel_library( + name = "mkl_qmatmul_op", + srcs = ["mkl_qmatmul_op.cc"], + hdrs = [ + "mkl_matmul_ops_common.h", + "mkl_quantized_conv_ops.h", + ], + deps = [ + "//tensorflow/core/kernels:matmul_op", + "//tensorflow/core/kernels:no_op", + "//tensorflow/core:math_ops_op_lib", + "//tensorflow/core:mkl_nn_ops_op_lib", + "//tensorflow/core:nn_ops_op_lib", + ] + MKL_DEPS, +) + +tf_mkl_kernel_library( + name = "mkl_conv_op", + hdrs = [ + "mkl_quantized_conv_ops.h", + ], + prefix = "mkl_conv", + deps = [ + "@com_google_absl//absl/strings", + "//tensorflow/core/kernels:conv_ops", + "//tensorflow/core/kernels:no_op", + ] + MKL_DEPS, +) + +tf_cc_test_mkl( + name = "mkl_conv_ops_test", + size = "small", + srcs = ["mkl_conv_ops_test.cc"], + linkstatic = 1, # Fixes dyld error on MacOS. + deps = MKL_TEST_DEPS, +) + +tf_cc_test_mkl( + name = "mkl_relu_op_test", + size = "small", + srcs = ["mkl_relu_op_test.cc"], + linkstatic = 1, # Fixes dyld error on MacOS. + deps = MKL_TEST_DEPS, +) + +tf_mkl_kernel_library( + name = "mkl_tfconv_op", + prefix = "mkl_tfconv", + deps = MKL_SHORT_DEPS, +) + +tf_mkl_kernel_library( + name = "mkl_input_conversion_op", + hdrs = ["mkl_tfconv_op.h"], + prefix = "mkl_input_conversion", + deps = MKL_SHORT_DEPS, +) + +tf_mkl_kernel_library( + name = "mkl_pooling_ops", + srcs = [ + "mkl_avgpooling_op.cc", + "mkl_maxpooling_op.cc", + "mkl_pooling_ops_common.cc", + ], + hdrs = ["mkl_pooling_ops_common.h"], + deps = MKL_SHORT_DEPS, +) + +tf_mkl_kernel_library( + name = "mkl_dequantize_op", + srcs = ["mkl_dequantize_op.cc"], + deps = [ + "//tensorflow/core/kernels:concat_lib_hdrs", + "//tensorflow/core/kernels:conv_ops", + "//tensorflow/core/kernels:cwise_op", + "//tensorflow/core/kernels:eigen_helpers", + "//tensorflow/core/kernels:image_resizer_state", + "//tensorflow/core/kernels:ops_util", + "//tensorflow/core/kernels:pooling_ops", + "//tensorflow/core/kernels:quantization_utils", + "//tensorflow/core/kernels:quantized_ops", + "//tensorflow/core/kernels:transpose_functor", + "//tensorflow/core:array_ops_op_lib", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:math_ops_op_lib", + "//tensorflow/core:mkl_graph_util", + "//tensorflow/core:nn_ops_op_lib", + "//third_party/eigen3", + "@gemmlowp", + ] + mkl_deps(), +) + +tf_cc_test_mkl( + name = "mkl_dequantize_op_test", + size = "small", + srcs = ["mkl_dequantize_op_test.cc"], + # TODO(b/149940073): Re-enable. + tags = [ + "no_oss", + "notap", + ], + deps = [ + ":mkl_dequantize_op", + ":mkl_tfconv_op", + "//tensorflow/core:array_ops_op_lib", + "//tensorflow/core:math_ops_op_lib", + "//tensorflow/core:mkl_array_ops_op_lib", + "//tensorflow/core:nn_ops_op_lib", + ] + MKL_TEST_DEPS, +) + +tf_mkl_kernel_library( + name = "mkl_relu_op", + prefix = "mkl_relu", + deps = MKL_DEPS, +) + +tf_mkl_kernel_library( + name = "mkl_softmax_op", + prefix = "mkl_softmax", + deps = MKL_SHORT_DEPS, +) + +tf_mkl_kernel_library( + name = "mkl_tmp_bf16_ops", + prefix = "mkl_tmp_bf16_ops", + deps = MKL_DEPS + [ + "//tensorflow/core/kernels:no_op", + ], +) + +tf_mkl_kernel_library( + name = "mkl_fused_batch_norm_op", + srcs = ["mkl_fused_batch_norm_op.cc"], + deps = [ + "//tensorflow/core/kernels:fused_batch_norm_op", + "//tensorflow/core/kernels:no_op", + ] + mkl_deps(), +) + +tf_cc_test_mkl( + name = "mkl_fused_batch_norm_op_test", + size = "small", + srcs = ["mkl_fused_batch_norm_op_test.cc"], + linkstatic = 1, + deps = [ + ":mkl_fused_batch_norm_op", + "//tensorflow/core:direct_session", + "//tensorflow/core/kernels:conv_ops_gpu_hdrs", + ] + MKL_TEST_DEPS, +) + +tf_mkl_kernel_library( + name = "mkl_aggregate_ops", + prefix = "mkl_aggregate_ops", + deps = MKL_DEPS, +) + +tf_mkl_kernel_library( + name = "mkl_concat_op", + prefix = "mkl_concat_op", + deps = ["//tensorflow/core/kernels:quantization_utils"] + MKL_DEPS, +) + +tf_mkl_kernel_library( + name = "mkl_reshape_op", + prefix = "mkl_reshape_op", + deps = MKL_DEPS, +) + +tf_mkl_kernel_library( + name = "mkl_slice_op", + prefix = "mkl_slice_op", + deps = MKL_DEPS, +) + +tf_mkl_kernel_library( + name = "mkl_identity_op", + prefix = "mkl_identity_op", + deps = MKL_DEPS, +) + +tf_mkl_kernel_library( + name = "mkl_lrn_op", + prefix = "mkl_lrn_op", + deps = MKL_DEPS, +) + +tf_mkl_kernel_library( + name = "mkl_cwise_ops_common", + prefix = "mkl_cwise_ops_common", + deps = MKL_DEPS + ["//tensorflow/core/kernels:cwise_op"], +) + +tf_mkl_kernel_library( + name = "mkl_requantize_ops", + srcs = [ + "mkl_requantization_range_per_channel_op.cc", + "mkl_requantize_per_channel_op.cc", + ], + deps = [ + "//tensorflow/core/kernels:concat_lib_hdrs", + "//tensorflow/core/kernels:conv_ops", + "//tensorflow/core/kernels:eigen_helpers", + "//tensorflow/core/kernels:image_resizer_state", + "//tensorflow/core/kernels:meta_support", + "//tensorflow/core/kernels:no_op", + "//tensorflow/core/kernels:pooling_ops", + "//tensorflow/core/kernels:quantization_utils", + "@gemmlowp", + ] + MKL_DEPS, +) + +tf_cc_test_mkl( + name = "mkl_requantize_ops_test", + size = "small", + srcs = ["mkl_requantize_ops_test.cc"], + linkstatic = 1, # Fixes dyld error on MacOS. + deps = [ + ":mkl_requantize_ops", + "//tensorflow/core:array_ops_op_lib", + "//tensorflow/core:math_ops_op_lib", + "//tensorflow/core/kernels:quantization_utils", + "//tensorflow/core/kernels:quantized_ops", + ] + MKL_TEST_DEPS, +) + +tf_cc_test_mkl( + name = "mkl_fused_ops_test", + size = "small", + srcs = ["mkl_fused_ops_test.cc"], + linkstatic = 1, + deps = [ + ":mkl_conv_op", + ":mkl_matmul_op", + ":mkl_tfconv_op", + "//tensorflow/core:direct_session", + "//tensorflow/core/kernels:bias_op", + "//tensorflow/core/kernels:conv_ops", + "//tensorflow/core/kernels:depthwise_conv_op", + "//tensorflow/core/kernels:image", + "//tensorflow/core/kernels:matmul_op", + "//tensorflow/core/kernels:pad_op", + "//tensorflow/core/kernels:relu_op", + ] + MKL_TEST_DEPS, +) + +tf_mkl_kernel_library( + name = "mkl_transpose_op", + srcs = [ + "mkl_transpose_op.cc", + ], + deps = MKL_DEPS + ["//tensorflow/core/kernels:transpose_op"], +) diff --git a/tensorflow/core/kernels/mkl_aggregate_ops.cc b/tensorflow/core/kernels/mkl/mkl_aggregate_ops.cc similarity index 100% rename from tensorflow/core/kernels/mkl_aggregate_ops.cc rename to tensorflow/core/kernels/mkl/mkl_aggregate_ops.cc diff --git a/tensorflow/core/kernels/mkl_avgpooling_op.cc b/tensorflow/core/kernels/mkl/mkl_avgpooling_op.cc similarity index 99% rename from tensorflow/core/kernels/mkl_avgpooling_op.cc rename to tensorflow/core/kernels/mkl/mkl_avgpooling_op.cc index a238f51860b..754156c860a 100644 --- a/tensorflow/core/kernels/mkl_avgpooling_op.cc +++ b/tensorflow/core/kernels/mkl/mkl_avgpooling_op.cc @@ -21,7 +21,7 @@ #include "tensorflow/core/framework/common_shape_fns.h" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/mkl_pooling_ops_common.h" +#include "tensorflow/core/kernels/mkl/mkl_pooling_ops_common.h" #include "tensorflow/core/util/mkl_types.h" #include "tensorflow/core/util/mkl_util.h" diff --git a/tensorflow/core/kernels/mkl_batch_matmul_op.cc b/tensorflow/core/kernels/mkl/mkl_batch_matmul_op.cc similarity index 99% rename from tensorflow/core/kernels/mkl_batch_matmul_op.cc rename to tensorflow/core/kernels/mkl/mkl_batch_matmul_op.cc index b65c70566b5..da5a239c224 100644 --- a/tensorflow/core/kernels/mkl_batch_matmul_op.cc +++ b/tensorflow/core/kernels/mkl/mkl_batch_matmul_op.cc @@ -45,7 +45,7 @@ limitations under the License. #include "tensorflow/core/framework/types.h" #include "tensorflow/core/kernels/batch_matmul_op_impl.h" #include "tensorflow/core/kernels/fill_functor.h" -#include "tensorflow/core/kernels/mkl_matmul_ops_common.h" +#include "tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/matmul_bcast.h" diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl/mkl_concat_op.cc similarity index 100% rename from tensorflow/core/kernels/mkl_concat_op.cc rename to tensorflow/core/kernels/mkl/mkl_concat_op.cc diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl/mkl_conv_grad_filter_ops.cc similarity index 99% rename from tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc rename to tensorflow/core/kernels/mkl/mkl_conv_grad_filter_ops.cc index 12581d0bfa5..339ab938cca 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/mkl/mkl_conv_grad_filter_ops.cc @@ -28,7 +28,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_slice.h" #include "tensorflow/core/kernels/conv_grad_ops.h" -#include "tensorflow/core/kernels/mkl_conv_ops.h" +#include "tensorflow/core/kernels/mkl/mkl_conv_ops.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl/mkl_conv_grad_input_ops.cc similarity index 99% rename from tensorflow/core/kernels/mkl_conv_grad_input_ops.cc rename to tensorflow/core/kernels/mkl/mkl_conv_grad_input_ops.cc index 7177431029a..2e700d0a627 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/mkl/mkl_conv_grad_input_ops.cc @@ -35,7 +35,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_util.h" #include "tensorflow/core/kernels/conv_grad_ops.h" #include "tensorflow/core/kernels/conv_grad_shape_utils.h" -#include "tensorflow/core/kernels/mkl_conv_ops.h" +#include "tensorflow/core/kernels/mkl/mkl_conv_ops.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl/mkl_conv_ops.cc similarity index 99% rename from tensorflow/core/kernels/mkl_conv_ops.cc rename to tensorflow/core/kernels/mkl/mkl_conv_ops.cc index 210044436aa..84fa20ed221 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl/mkl_conv_ops.cc @@ -16,7 +16,7 @@ limitations under the License. // See docs in ../ops/nn_ops.cc. #ifdef INTEL_MKL -#include "tensorflow/core/kernels/mkl_conv_ops.h" +#include "tensorflow/core/kernels/mkl/mkl_conv_ops.h" #include #include @@ -33,7 +33,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_slice.h" -#include "tensorflow/core/kernels/mkl_quantized_conv_ops.h" +#include "tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h" #include "tensorflow/core/kernels/no_op.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/lib/core/errors.h" diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl/mkl_conv_ops.h similarity index 99% rename from tensorflow/core/kernels/mkl_conv_ops.h rename to tensorflow/core/kernels/mkl/mkl_conv_ops.h index 2ee2a621067..c4a4942e877 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.h +++ b/tensorflow/core/kernels/mkl/mkl_conv_ops.h @@ -13,9 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_ -#define TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_ +#ifndef TENSORFLOW_CORE_KERNELS_MKL_MKL_CONV_OPS_H_ +#define TENSORFLOW_CORE_KERNELS_MKL_MKL_CONV_OPS_H_ +#ifdef INTEL_MKL #include #include #include @@ -640,4 +641,5 @@ class MklDummyOp : public OpKernel { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_ +#endif // INTEL_MKL +#endif // TENSORFLOW_CORE_KERNELS_MKL_MKL_CONV_OPS_H_ diff --git a/tensorflow/core/kernels/mkl_conv_ops_test.cc b/tensorflow/core/kernels/mkl/mkl_conv_ops_test.cc similarity index 100% rename from tensorflow/core/kernels/mkl_conv_ops_test.cc rename to tensorflow/core/kernels/mkl/mkl_conv_ops_test.cc diff --git a/tensorflow/core/kernels/mkl_cwise_ops_common.cc b/tensorflow/core/kernels/mkl/mkl_cwise_ops_common.cc similarity index 100% rename from tensorflow/core/kernels/mkl_cwise_ops_common.cc rename to tensorflow/core/kernels/mkl/mkl_cwise_ops_common.cc diff --git a/tensorflow/core/kernels/mkl_dequantize_op.cc b/tensorflow/core/kernels/mkl/mkl_dequantize_op.cc similarity index 100% rename from tensorflow/core/kernels/mkl_dequantize_op.cc rename to tensorflow/core/kernels/mkl/mkl_dequantize_op.cc diff --git a/tensorflow/core/kernels/mkl_dequantize_op_test.cc b/tensorflow/core/kernels/mkl/mkl_dequantize_op_test.cc similarity index 88% rename from tensorflow/core/kernels/mkl_dequantize_op_test.cc rename to tensorflow/core/kernels/mkl/mkl_dequantize_op_test.cc index b400fb761cb..564c2829e99 100644 --- a/tensorflow/core/kernels/mkl_dequantize_op_test.cc +++ b/tensorflow/core/kernels/mkl/mkl_dequantize_op_test.cc @@ -62,23 +62,6 @@ TEST_F(MklDequantizeOpTest, small) { test::ExpectTensorNear(expected, output, 0.1); } -Tensor CreateMklInput() { - MklDnnShape mkl_shape; - memory::desc md = - memory::desc({1, 2, 2, 2}, MklDnnType(), memory::format::nhwc); - mkl_shape.SetMklTensor(true); - mkl_shape.SetMklLayout(&md); - mkl_shape.SetElemType(MklDnnType()); - mkl_shape.SetTfLayout(4, {1, 2, 2, 2}, memory::format::nhwc); - - DataType dtype = DataTypeToEnum::v(); - Tensor mkl_tensor(dtype, {mkl_shape.GetSerializeBufferSize()}); - mkl_shape.SerializeMklDnnShape( - mkl_tensor.flat().data(), - mkl_tensor.flat().size() * sizeof(uint8)); - return mkl_tensor; -} - template class CommonTestUtilities : public OpsTestBase { public: @@ -129,8 +112,7 @@ TEST_F(MklDequantizeOpTest, MKLInput) { AddInputFromArray(TensorShape({1}), {0}); // max_range = 200 AddInputFromArray(TensorShape({1}), {200.0f}); - auto mkl_tensor = CreateMklInput(); - AddInputFromArray(mkl_tensor.shape(), mkl_tensor.flat()); + AddInputFromArray(dummy_shape, dummy_tensor); AddInputFromArray(dummy_shape, dummy_tensor); AddInputFromArray(dummy_shape, dummy_tensor); TF_ASSERT_OK(RunOpKernel()); diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op.cc similarity index 100% rename from tensorflow/core/kernels/mkl_fused_batch_norm_op.cc rename to tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op.cc diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op_test.cc b/tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op_test.cc similarity index 100% rename from tensorflow/core/kernels/mkl_fused_batch_norm_op_test.cc rename to tensorflow/core/kernels/mkl/mkl_fused_batch_norm_op_test.cc diff --git a/tensorflow/core/kernels/mkl_fused_ops_test.cc b/tensorflow/core/kernels/mkl/mkl_fused_ops_test.cc similarity index 100% rename from tensorflow/core/kernels/mkl_fused_ops_test.cc rename to tensorflow/core/kernels/mkl/mkl_fused_ops_test.cc diff --git a/tensorflow/core/kernels/mkl_identity_op.cc b/tensorflow/core/kernels/mkl/mkl_identity_op.cc similarity index 100% rename from tensorflow/core/kernels/mkl_identity_op.cc rename to tensorflow/core/kernels/mkl/mkl_identity_op.cc diff --git a/tensorflow/core/kernels/mkl_input_conversion_op.cc b/tensorflow/core/kernels/mkl/mkl_input_conversion_op.cc similarity index 99% rename from tensorflow/core/kernels/mkl_input_conversion_op.cc rename to tensorflow/core/kernels/mkl/mkl_input_conversion_op.cc index f7866cbcea6..ae130700a8d 100644 --- a/tensorflow/core/kernels/mkl_input_conversion_op.cc +++ b/tensorflow/core/kernels/mkl/mkl_input_conversion_op.cc @@ -17,21 +17,21 @@ limitations under the License. #include #include + +#include "mkldnn.hpp" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/kernels/mkl/mkl_tfconv_op.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/platform/byte_order.h" #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/macros.h" -#include "tensorflow/core/util/tensor_format.h" - -#include "mkldnn.hpp" -#include "tensorflow/core/kernels/mkl_tfconv_op.h" #include "tensorflow/core/util/mkl_util.h" +#include "tensorflow/core/util/tensor_format.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/mkl_lrn_op.cc b/tensorflow/core/kernels/mkl/mkl_lrn_op.cc similarity index 100% rename from tensorflow/core/kernels/mkl_lrn_op.cc rename to tensorflow/core/kernels/mkl/mkl_lrn_op.cc diff --git a/tensorflow/core/kernels/mkl_matmul_op.cc b/tensorflow/core/kernels/mkl/mkl_matmul_op.cc similarity index 99% rename from tensorflow/core/kernels/mkl_matmul_op.cc rename to tensorflow/core/kernels/mkl/mkl_matmul_op.cc index c92fceb415c..81339489223 100644 --- a/tensorflow/core/kernels/mkl_matmul_op.cc +++ b/tensorflow/core/kernels/mkl/mkl_matmul_op.cc @@ -34,7 +34,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/kernels/fill_functor.h" -#include "tensorflow/core/kernels/mkl_matmul_ops_common.h" +#include "tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h" #include "tensorflow/core/util/mkl_util.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/mkl_matmul_op_fused.cc b/tensorflow/core/kernels/mkl/mkl_matmul_op_fused.cc similarity index 99% rename from tensorflow/core/kernels/mkl_matmul_op_fused.cc rename to tensorflow/core/kernels/mkl/mkl_matmul_op_fused.cc index 9e05d3c0cfe..4dd7e3f8c6e 100644 --- a/tensorflow/core/kernels/mkl_matmul_op_fused.cc +++ b/tensorflow/core/kernels/mkl/mkl_matmul_op_fused.cc @@ -21,7 +21,7 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/kernels/fill_functor.h" -#include "tensorflow/core/kernels/mkl_matmul_ops_common.h" +#include "tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h" #include "tensorflow/core/lib/core/errors.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/mkl_matmul_ops_common.h b/tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h similarity index 99% rename from tensorflow/core/kernels/mkl_matmul_ops_common.h rename to tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h index f8242d06fa6..fc03374a414 100644 --- a/tensorflow/core/kernels/mkl_matmul_ops_common.h +++ b/tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_MKL_MATMUL_OPS_COMMON_H_ -#define TENSORFLOW_CORE_KERNELS_MKL_MATMUL_OPS_COMMON_H_ +#ifndef TENSORFLOW_CORE_KERNELS_MKL_MKL_MATMUL_OPS_COMMON_H_ +#define TENSORFLOW_CORE_KERNELS_MKL_MKL_MATMUL_OPS_COMMON_H_ #ifdef INTEL_MKL #include @@ -41,6 +41,7 @@ typedef Eigen::ThreadPoolDevice CPUDevice; typedef enum { CblasRowMajor, CblasColumnMajor } CBLAS_LAYOUT; #define MKL_INT int #endif + // This structure aggregates multiple inputs to MklDnnMatMul* methods. struct MklDnnMatMulFwdParams { memory::dims src_dims; @@ -817,4 +818,4 @@ void dnnl_gemm(char transa, char transb, int64_t m, int64_t n, int64_t k, } // namespace tensorflow #endif // INTEL_MKL -#endif // TENSORFLOW_CORE_KERNELS_MKL_MATMUL_OPS_COMMON_H_ +#endif // TENSORFLOW_CORE_KERNELS_MKL_MKL_MATMUL_OPS_COMMON_H_ diff --git a/tensorflow/core/kernels/mkl_maxpooling_op.cc b/tensorflow/core/kernels/mkl/mkl_maxpooling_op.cc similarity index 99% rename from tensorflow/core/kernels/mkl_maxpooling_op.cc rename to tensorflow/core/kernels/mkl/mkl_maxpooling_op.cc index 3ed6b9d02a2..ca7ebd7fd12 100644 --- a/tensorflow/core/kernels/mkl_maxpooling_op.cc +++ b/tensorflow/core/kernels/mkl/mkl_maxpooling_op.cc @@ -22,7 +22,7 @@ limitations under the License. #include "mkldnn.hpp" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/mkl_pooling_ops_common.h" +#include "tensorflow/core/kernels/mkl/mkl_pooling_ops_common.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/util/mkl_types.h" #include "tensorflow/core/util/mkl_util.h" diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.cc b/tensorflow/core/kernels/mkl/mkl_pooling_ops_common.cc similarity index 99% rename from tensorflow/core/kernels/mkl_pooling_ops_common.cc rename to tensorflow/core/kernels/mkl/mkl_pooling_ops_common.cc index c7ad39ddb50..9824fabce0e 100644 --- a/tensorflow/core/kernels/mkl_pooling_ops_common.cc +++ b/tensorflow/core/kernels/mkl/mkl_pooling_ops_common.cc @@ -15,7 +15,7 @@ limitations under the License. #ifdef INTEL_MKL -#include "tensorflow/core/kernels/mkl_pooling_ops_common.h" +#include "tensorflow/core/kernels/mkl/mkl_pooling_ops_common.h" #include #include diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl/mkl_pooling_ops_common.h similarity index 99% rename from tensorflow/core/kernels/mkl_pooling_ops_common.h rename to tensorflow/core/kernels/mkl/mkl_pooling_ops_common.h index 3d5498ed77b..3a608a66c16 100644 --- a/tensorflow/core/kernels/mkl_pooling_ops_common.h +++ b/tensorflow/core/kernels/mkl/mkl_pooling_ops_common.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_MKL_POOLING_OPS_COMMON_H_ -#define TENSORFLOW_CORE_KERNELS_MKL_POOLING_OPS_COMMON_H_ +#ifndef TENSORFLOW_CORE_KERNELS_MKL_MKL_POOLING_OPS_COMMON_H_ +#define TENSORFLOW_CORE_KERNELS_MKL_MKL_POOLING_OPS_COMMON_H_ #ifdef INTEL_MKL @@ -728,4 +728,4 @@ class MklPoolingBackwardOpBase : public MklPoolingOpBase { } // namespace tensorflow #endif // INTEL_MKL -#endif // TENSORFLOW_CORE_KERNELS_MKL_POOLING_OPS_COMMON_H_ +#endif // TENSORFLOW_CORE_KERNELS_MKL_MKL_POOLING_OPS_COMMON_H_ diff --git a/tensorflow/core/kernels/mkl_qmatmul_op.cc b/tensorflow/core/kernels/mkl/mkl_qmatmul_op.cc similarity index 99% rename from tensorflow/core/kernels/mkl_qmatmul_op.cc rename to tensorflow/core/kernels/mkl/mkl_qmatmul_op.cc index b59612433e6..1cc1945dd4b 100644 --- a/tensorflow/core/kernels/mkl_qmatmul_op.cc +++ b/tensorflow/core/kernels/mkl/mkl_qmatmul_op.cc @@ -94,8 +94,8 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/kernels/fill_functor.h" -#include "tensorflow/core/kernels/mkl_matmul_ops_common.h" -#include "tensorflow/core/kernels/mkl_quantized_conv_ops.h" +#include "tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h" +#include "tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h" #include "tensorflow/core/kernels/no_op.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/util/mkl_threadpool.h" diff --git a/tensorflow/core/kernels/mkl_qmatmul_op_test.cc b/tensorflow/core/kernels/mkl/mkl_qmatmul_op_test.cc similarity index 100% rename from tensorflow/core/kernels/mkl_qmatmul_op_test.cc rename to tensorflow/core/kernels/mkl/mkl_qmatmul_op_test.cc diff --git a/tensorflow/core/kernels/mkl_quantize_op.cc b/tensorflow/core/kernels/mkl/mkl_quantize_op.cc similarity index 100% rename from tensorflow/core/kernels/mkl_quantize_op.cc rename to tensorflow/core/kernels/mkl/mkl_quantize_op.cc diff --git a/tensorflow/core/kernels/mkl_quantize_op_test.cc b/tensorflow/core/kernels/mkl/mkl_quantize_op_test.cc similarity index 100% rename from tensorflow/core/kernels/mkl_quantize_op_test.cc rename to tensorflow/core/kernels/mkl/mkl_quantize_op_test.cc diff --git a/tensorflow/core/kernels/mkl_quantized_concat_op_test.cc b/tensorflow/core/kernels/mkl/mkl_quantized_concat_op_test.cc similarity index 100% rename from tensorflow/core/kernels/mkl_quantized_concat_op_test.cc rename to tensorflow/core/kernels/mkl/mkl_quantized_concat_op_test.cc diff --git a/tensorflow/core/kernels/mkl_quantized_conv_ops.h b/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h similarity index 95% rename from tensorflow/core/kernels/mkl_quantized_conv_ops.h rename to tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h index 4121c88fb83..9fd699cf704 100644 --- a/tensorflow/core/kernels/mkl_quantized_conv_ops.h +++ b/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_MKL_QUANTIZED_CONV_OPS_H_ -#define TENSORFLOW_CORE_KERNELS_MKL_QUANTIZED_CONV_OPS_H_ +#ifndef TENSORFLOW_CORE_KERNELS_MKL_MKL_QUANTIZED_CONV_OPS_H_ +#define TENSORFLOW_CORE_KERNELS_MKL_MKL_QUANTIZED_CONV_OPS_H_ #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor.h" @@ -90,4 +90,4 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a, #endif // INTEL_MKL -#endif // TENSORFLOW_CORE_KERNELS_MKL_QUANTIZED_CONV_OPS_H_ +#endif // TENSORFLOW_CORE_KERNELS_MKL_MKL_QUANTIZED_CONV_OPS_H_ diff --git a/tensorflow/core/kernels/mkl_quantized_conv_ops_perchannel_test.cc b/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops_perchannel_test.cc similarity index 100% rename from tensorflow/core/kernels/mkl_quantized_conv_ops_perchannel_test.cc rename to tensorflow/core/kernels/mkl/mkl_quantized_conv_ops_perchannel_test.cc diff --git a/tensorflow/core/kernels/mkl_quantized_conv_ops_test.cc b/tensorflow/core/kernels/mkl/mkl_quantized_conv_ops_test.cc similarity index 100% rename from tensorflow/core/kernels/mkl_quantized_conv_ops_test.cc rename to tensorflow/core/kernels/mkl/mkl_quantized_conv_ops_test.cc diff --git a/tensorflow/core/kernels/mkl_quantized_pooling_ops_test.cc b/tensorflow/core/kernels/mkl/mkl_quantized_pooling_ops_test.cc similarity index 100% rename from tensorflow/core/kernels/mkl_quantized_pooling_ops_test.cc rename to tensorflow/core/kernels/mkl/mkl_quantized_pooling_ops_test.cc diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl/mkl_relu_op.cc similarity index 100% rename from tensorflow/core/kernels/mkl_relu_op.cc rename to tensorflow/core/kernels/mkl/mkl_relu_op.cc diff --git a/tensorflow/core/kernels/mkl_relu_op_test.cc b/tensorflow/core/kernels/mkl/mkl_relu_op_test.cc similarity index 100% rename from tensorflow/core/kernels/mkl_relu_op_test.cc rename to tensorflow/core/kernels/mkl/mkl_relu_op_test.cc diff --git a/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc b/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc similarity index 100% rename from tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc rename to tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc diff --git a/tensorflow/core/kernels/mkl_requantize_ops_test.cc b/tensorflow/core/kernels/mkl/mkl_requantize_ops_test.cc similarity index 100% rename from tensorflow/core/kernels/mkl_requantize_ops_test.cc rename to tensorflow/core/kernels/mkl/mkl_requantize_ops_test.cc diff --git a/tensorflow/core/kernels/mkl_requantize_per_channel_op.cc b/tensorflow/core/kernels/mkl/mkl_requantize_per_channel_op.cc similarity index 100% rename from tensorflow/core/kernels/mkl_requantize_per_channel_op.cc rename to tensorflow/core/kernels/mkl/mkl_requantize_per_channel_op.cc diff --git a/tensorflow/core/kernels/mkl_reshape_op.cc b/tensorflow/core/kernels/mkl/mkl_reshape_op.cc similarity index 100% rename from tensorflow/core/kernels/mkl_reshape_op.cc rename to tensorflow/core/kernels/mkl/mkl_reshape_op.cc diff --git a/tensorflow/core/kernels/mkl_slice_op.cc b/tensorflow/core/kernels/mkl/mkl_slice_op.cc similarity index 100% rename from tensorflow/core/kernels/mkl_slice_op.cc rename to tensorflow/core/kernels/mkl/mkl_slice_op.cc diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl/mkl_softmax_op.cc similarity index 100% rename from tensorflow/core/kernels/mkl_softmax_op.cc rename to tensorflow/core/kernels/mkl/mkl_softmax_op.cc diff --git a/tensorflow/core/kernels/mkl_tfconv_op.h b/tensorflow/core/kernels/mkl/mkl_tfconv_op.h similarity index 97% rename from tensorflow/core/kernels/mkl_tfconv_op.h rename to tensorflow/core/kernels/mkl/mkl_tfconv_op.h index f7aa4d2bebf..0a603ee2c12 100644 --- a/tensorflow/core/kernels/mkl_tfconv_op.h +++ b/tensorflow/core/kernels/mkl/mkl_tfconv_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_ -#define TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_MKL_MKL_TFCONV_OP_H_ +#define TENSORFLOW_CORE_KERNELS_MKL_MKL_TFCONV_OP_H_ #ifdef INTEL_MKL @@ -160,4 +160,4 @@ TF_CALL_QUANTIZED_TYPES(REGISTER_CPU); } // namespace tensorflow #endif // INTEL_MKL -#endif // TENSORFLOW_CORE_KERNELS_MKL_TFCONV_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_MKL_MKL_TFCONV_OP_H_ diff --git a/tensorflow/core/kernels/mkl_tmp_bf16_ops.cc b/tensorflow/core/kernels/mkl/mkl_tmp_bf16_ops.cc similarity index 100% rename from tensorflow/core/kernels/mkl_tmp_bf16_ops.cc rename to tensorflow/core/kernels/mkl/mkl_tmp_bf16_ops.cc diff --git a/tensorflow/core/kernels/mkl_transpose_op.cc b/tensorflow/core/kernels/mkl/mkl_transpose_op.cc similarity index 100% rename from tensorflow/core/kernels/mkl_transpose_op.cc rename to tensorflow/core/kernels/mkl/mkl_transpose_op.cc From 59affc4d61328c86baf680f2d818b43da13373cc Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Fri, 7 Aug 2020 17:18:52 -0700 Subject: [PATCH 2383/2522] Refactor LSTM and GRU layer with `tf.function` and stateless Case op. PiperOrigin-RevId: 325538595 Change-Id: Iecc5789d33f9dbfc221aedc54500745c79230022 --- .../python/keras/layers/recurrent_v2.py | 306 ++++++++++++------ 1 file changed, 205 insertions(+), 101 deletions(-) diff --git a/tensorflow/python/keras/layers/recurrent_v2.py b/tensorflow/python/keras/layers/recurrent_v2.py index 878269dee5e..a2ed7141608 100644 --- a/tensorflow/python/keras/layers/recurrent_v2.py +++ b/tensorflow/python/keras/layers/recurrent_v2.py @@ -20,6 +20,7 @@ from __future__ import print_function import uuid +from tensorflow.python.compat import compat from tensorflow.python.eager import context from tensorflow.python.eager import function from tensorflow.python.framework import constant_op @@ -386,6 +387,20 @@ class GRU(recurrent.DropoutRNNCellMixin, recurrent.GRU): else: logging.warn(_CUDNN_NOT_AVAILABLE_MSG % self.name) + # TODO(b/162616551): Remove all compat statements after 08/20/2020. + # This follows b/161915509 and is mainly to test the stateless Case op. + if compat.forward_compatible(2020, 8, 20): + # The first two attributes are added to support TFLite use case. + supportive_attributes = { + 'time_major': time_major, + 'go_backwards': go_backwards, + _FUNCTION_API_NAME_ATTRIBUTE: 'gru_' + str(uuid.uuid4()) + } + self.defun_gru_with_backend_selection = function.defun_with_attributes( + gru_with_backend_selection, + attributes=supportive_attributes, + autograph=False) + def build(self, input_shape): super(GRU, self).build(input_shape) @@ -468,38 +483,54 @@ class GRU(recurrent.DropoutRNNCellMixin, recurrent.GRU): if dropout_mask is not None: inputs = inputs * dropout_mask[0] - gpu_gru_kwargs = { - 'inputs': inputs, - 'init_h': _read_variable_value(initial_state[0]), - 'kernel': _read_variable_value(self.cell.kernel), - 'recurrent_kernel': _read_variable_value(self.cell.recurrent_kernel), - 'bias': _read_variable_value(self.cell.bias), - 'mask': mask, - 'time_major': self.time_major, - 'go_backwards': self.go_backwards, - 'sequence_lengths': sequence_lengths - } - normal_gru_kwargs = gpu_gru_kwargs.copy() - normal_gru_kwargs.update({ - 'zero_output_for_mask': self.zero_output_for_mask, - }) - - if context.executing_eagerly(): - device_type = _get_context_device_type() - can_use_gpu = ( - # Either user specified GPU or unspecified but GPU is available. - (device_type == _GPU_DEVICE_NAME - or (device_type is None and context.num_gpus() > 0)) - and - (mask is None or is_cudnn_supported_inputs(mask, self.time_major))) - # Under eager context, check the device placement and prefer the - if can_use_gpu: - last_output, outputs, new_h, runtime = gpu_gru(**gpu_gru_kwargs) - else: - last_output, outputs, new_h, runtime = standard_gru(**normal_gru_kwargs) + if compat.forward_compatible(2020, 8, 20): + gru_kwargs = { + 'inputs': inputs, + 'init_h': _read_variable_value(initial_state[0]), + 'kernel': _read_variable_value(self.cell.kernel), + 'recurrent_kernel': _read_variable_value(self.cell.recurrent_kernel), + 'bias': _read_variable_value(self.cell.bias), + 'mask': mask, + 'time_major': self.time_major, + 'go_backwards': self.go_backwards, + 'sequence_lengths': sequence_lengths, + 'zero_output_for_mask': self.zero_output_for_mask + } + (last_output, outputs, new_h, + runtime) = self.defun_gru_with_backend_selection(**gru_kwargs) else: - last_output, outputs, new_h, runtime = gru_with_backend_selection( - **normal_gru_kwargs) + gpu_gru_kwargs = { + 'inputs': inputs, + 'init_h': _read_variable_value(initial_state[0]), + 'kernel': _read_variable_value(self.cell.kernel), + 'recurrent_kernel': _read_variable_value(self.cell.recurrent_kernel), + 'bias': _read_variable_value(self.cell.bias), + 'mask': mask, + 'time_major': self.time_major, + 'go_backwards': self.go_backwards, + 'sequence_lengths': sequence_lengths + } + normal_gru_kwargs = gpu_gru_kwargs.copy() + normal_gru_kwargs.update({ + 'zero_output_for_mask': self.zero_output_for_mask, + }) + + if context.executing_eagerly(): + device_type = _get_context_device_type() + can_use_gpu = ( + # Either user specified GPU or unspecified but GPU is available. + (device_type == _GPU_DEVICE_NAME or + (device_type is None and context.num_gpus() > 0)) and + (mask is None or is_cudnn_supported_inputs(mask, self.time_major))) + # Under eager context, check the device placement and prefer the + if can_use_gpu: + last_output, outputs, new_h, runtime = gpu_gru(**gpu_gru_kwargs) + else: + last_output, outputs, new_h, runtime = standard_gru( + **normal_gru_kwargs) + else: + last_output, outputs, new_h, runtime = gru_with_backend_selection( + **normal_gru_kwargs) states = [new_h] return last_output, outputs, runtime, states @@ -766,24 +797,36 @@ def gru_with_backend_selection(inputs, init_h, kernel, recurrent_kernel, bias, true_fn=cudnn_gru_fn, false_fn=standard_gru_fn) - # Each time a `tf.function` is called, we will give it a unique - # identifiable API name, so that Grappler won't get confused when it - # sees multiple GRU layers added into same graph, and it will be able - # to pair up the different implementations across them. - api_name = 'gru_' + str(uuid.uuid4()) - supportive_attribute = { - 'time_major': time_major, - 'go_backwards': go_backwards, - } - defun_standard_gru = _generate_defun_backend( - api_name, _CPU_DEVICE_NAME, standard_gru, supportive_attribute) - defun_gpu_gru = _generate_defun_backend( - api_name, _GPU_DEVICE_NAME, gpu_gru_with_fallback, supportive_attribute) + if compat.forward_compatible(2020, 8, 20): + # Chooses the implementation dynamicly based on the running device. + (last_output, outputs, new_h, + runtime) = control_flow_ops.execute_fn_for_device( + { + _CPU_DEVICE_NAME: lambda: standard_gru(**params), + _GPU_DEVICE_NAME: lambda: gpu_gru_with_fallback(**params) + }, lambda: standard_gru(**params)) + else: + # Each time a `tf.function` is called, we will give it a unique + # identifiable API name, so that Grappler won't get confused when it + # sees multiple GRU layers added into same graph, and it will be able + # to pair up the different implementations across them. + api_name = 'gru_' + str(uuid.uuid4()) + supportive_attribute = { + 'time_major': time_major, + 'go_backwards': go_backwards, + } + defun_standard_gru = _generate_defun_backend(api_name, _CPU_DEVICE_NAME, + standard_gru, + supportive_attribute) + defun_gpu_gru = _generate_defun_backend(api_name, _GPU_DEVICE_NAME, + gpu_gru_with_fallback, + supportive_attribute) + + # Call the normal GRU impl and register the CuDNN impl function. The + # grappler will kick in during session execution to optimize the graph. + last_output, outputs, new_h, runtime = defun_standard_gru(**params) + function.register(defun_gpu_gru, **params) - # Call the normal GRU impl and register the CuDNN impl function. The - # grappler will kick in during session execution to optimize the graph. - last_output, outputs, new_h, runtime = defun_standard_gru(**params) - function.register(defun_gpu_gru, **params) return last_output, outputs, new_h, runtime @@ -1098,6 +1141,19 @@ class LSTM(recurrent.DropoutRNNCellMixin, recurrent.LSTM): else: logging.warn(_CUDNN_NOT_AVAILABLE_MSG % self.name) + if compat.forward_compatible(2020, 8, 20): + # The first two attributes are added to support TFLite use case. + supportive_attributes = { + 'time_major': time_major, + 'go_backwards': go_backwards, + _FUNCTION_API_NAME_ATTRIBUTE: 'lstm_' + str(uuid.uuid4()) + } + + self.defun_lstm_with_backend_selection = function.defun_with_attributes( + lstm_with_backend_selection, + attributes=supportive_attributes, + autograph=False) + def call(self, inputs, mask=None, training=None, initial_state=None): # The input should be dense, padded with zeros. If a ragged input is fed # into the layer, it is padded and the row lengths are used for masking. @@ -1146,42 +1202,80 @@ class LSTM(recurrent.DropoutRNNCellMixin, recurrent.LSTM): dropout_mask = self.get_dropout_mask_for_cell(inputs, training, count=4) if dropout_mask is not None: inputs = inputs * dropout_mask[0] - gpu_lstm_kwargs = { - 'inputs': inputs, - 'init_h': _read_variable_value(initial_state[0]), - 'init_c': _read_variable_value(initial_state[1]), - 'kernel': _read_variable_value(self.cell.kernel), - 'recurrent_kernel': _read_variable_value(self.cell.recurrent_kernel), - 'bias': _read_variable_value(self.cell.bias), - 'mask': mask, - 'time_major': self.time_major, - 'go_backwards': self.go_backwards, - 'sequence_lengths': row_lengths - } - normal_lstm_kwargs = gpu_lstm_kwargs.copy() - normal_lstm_kwargs.update({ - 'zero_output_for_mask': self.zero_output_for_mask, - }) - - if context.executing_eagerly(): - device_type = _get_context_device_type() - can_use_gpu = ( - # Either user specified GPU or unspecified but GPU is available. - (device_type == _GPU_DEVICE_NAME - or (device_type is None and context.num_gpus() > 0)) - and - (mask is None or is_cudnn_supported_inputs(mask, self.time_major))) - # Under eager context, check the device placement and prefer the - # GPU implementation when GPU is available. - if can_use_gpu: - last_output, outputs, new_h, new_c, runtime = gpu_lstm( - **gpu_lstm_kwargs) - else: - last_output, outputs, new_h, new_c, runtime = standard_lstm( - **normal_lstm_kwargs) - else: + if compat.forward_compatible(2020, 8, 20): + lstm_kwargs = { + 'inputs': + inputs, + 'init_h': + _read_variable_value(initial_state[0]), + 'init_c': + _read_variable_value(initial_state[1]), + 'kernel': + _read_variable_value(self.cell.kernel), + 'recurrent_kernel': + _read_variable_value(self.cell.recurrent_kernel), + 'bias': + _read_variable_value(self.cell.bias), + 'mask': + mask, + 'time_major': + self.time_major, + 'go_backwards': + self.go_backwards, + 'sequence_lengths': + row_lengths, + 'zero_output_for_mask': + self.zero_output_for_mask, + } (last_output, outputs, new_h, new_c, - runtime) = lstm_with_backend_selection(**normal_lstm_kwargs) + runtime) = self.defun_lstm_with_backend_selection(**lstm_kwargs) + else: + gpu_lstm_kwargs = { + 'inputs': + inputs, + 'init_h': + _read_variable_value(initial_state[0]), + 'init_c': + _read_variable_value(initial_state[1]), + 'kernel': + _read_variable_value(self.cell.kernel), + 'recurrent_kernel': + _read_variable_value(self.cell.recurrent_kernel), + 'bias': + _read_variable_value(self.cell.bias), + 'mask': + mask, + 'time_major': + self.time_major, + 'go_backwards': + self.go_backwards, + 'sequence_lengths': + row_lengths + } + normal_lstm_kwargs = gpu_lstm_kwargs.copy() + normal_lstm_kwargs.update({ + 'zero_output_for_mask': self.zero_output_for_mask, + }) + + if context.executing_eagerly(): + device_type = _get_context_device_type() + can_use_gpu = ( + # Either user specified GPU or unspecified but GPU is available. + (device_type == _GPU_DEVICE_NAME or + (device_type is None and context.num_gpus() > 0)) and + (mask is None or + is_cudnn_supported_inputs(mask, self.time_major))) + # Under eager context, check the device placement and prefer the + # GPU implementation when GPU is available. + if can_use_gpu: + last_output, outputs, new_h, new_c, runtime = gpu_lstm( + **gpu_lstm_kwargs) + else: + last_output, outputs, new_h, new_c, runtime = standard_lstm( + **normal_lstm_kwargs) + else: + (last_output, outputs, new_h, new_c, + runtime) = lstm_with_backend_selection(**normal_lstm_kwargs) states = [new_h, new_c] @@ -1539,25 +1633,35 @@ def lstm_with_backend_selection(inputs, init_h, init_c, kernel, true_fn=cudnn_lstm_fn, false_fn=stardard_lstm_fn) - # Each time a `tf.function` is called, we will give it a unique - # identifiable API name, so that Grappler won't get confused when it - # sees multiple LSTM layers added into same graph, and it will be able - # to pair up the different implementations across them. - api_name = 'lstm_' + str(uuid.uuid4()) - supportive_attribute = { - 'time_major': time_major, - 'go_backwards': go_backwards, - } - defun_standard_lstm = _generate_defun_backend( - api_name, _CPU_DEVICE_NAME, standard_lstm, supportive_attribute) - defun_gpu_lstm = _generate_defun_backend( - api_name, _GPU_DEVICE_NAME, gpu_lstm_with_fallback, supportive_attribute) + if compat.forward_compatible(2020, 8, 20): + # Chooses the implementation dynamicly based on the running device. + (last_output, outputs, new_h, new_c, + runtime) = control_flow_ops.execute_fn_for_device( + { + _CPU_DEVICE_NAME: lambda: standard_lstm(**params), + _GPU_DEVICE_NAME: lambda: gpu_lstm_with_fallback(**params) + }, lambda: standard_lstm(**params)) + else: + # Each time a `tf.function` is called, we will give it a unique + # identifiable API name, so that Grappler won't get confused when it + # sees multiple LSTM layers added into same graph, and it will be able + # to pair up the different implementations across them. + api_name = 'lstm_' + str(uuid.uuid4()) + supportive_attribute = { + 'time_major': time_major, + 'go_backwards': go_backwards, + } + defun_standard_lstm = _generate_defun_backend(api_name, _CPU_DEVICE_NAME, + standard_lstm, + supportive_attribute) + defun_gpu_lstm = _generate_defun_backend(api_name, _GPU_DEVICE_NAME, + gpu_lstm_with_fallback, + supportive_attribute) - # Call the normal LSTM impl and register the CuDNN impl function. The - # grappler will kick in during session execution to optimize the graph. - last_output, outputs, new_h, new_c, runtime = defun_standard_lstm( - **params) - function.register(defun_gpu_lstm, **params) + # Call the normal LSTM impl and register the CuDNN impl function. The + # grappler will kick in during session execution to optimize the graph. + last_output, outputs, new_h, new_c, runtime = defun_standard_lstm(**params) + function.register(defun_gpu_lstm, **params) return last_output, outputs, new_h, new_c, runtime From 769155a21e632b89ffa96a6e0d27523f6b6a94b3 Mon Sep 17 00:00:00 2001 From: Ayush Dubey Date: Fri, 7 Aug 2020 17:46:31 -0700 Subject: [PATCH 2384/2522] Make ScopedAllocatorOptimizer compatible with Const input. In a previous change, we aborted ScopedAllocatorOptimizer when one of the inputs is a Const op. This change instead enables Const op to work with ScopedAllocatorOptimizer by introducing an Identity op after Const. Thus we change: Const -> CollectiveReduce to Const -> Identity -> CollectiveReduce The Identity becomes the real input to CollectiveReduce, and it will use the pre-allocated buffer slice for its output tensor when it invokes `set_output` by this logic: https://github.com/tensorflow/tensorflow/blob/6b65afa4209a8743d819693ab6c50b5df4db8af9/tensorflow/core/framework/op_kernel.cc#L903. This is similar to the approach in cl/259138773. PiperOrigin-RevId: 325541732 Change-Id: I6487685089520b73387197a31fef5780217a3a4b --- tensorflow/core/grappler/optimizers/BUILD | 1 + .../optimizers/scoped_allocator_optimizer.cc | 5 +-- .../scoped_allocator_optimizer_test.cc | 33 +++++++++++++++---- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index d3db2f19596..2ce037178b9 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -993,6 +993,7 @@ tf_cc_test( "//tensorflow/core:test_main", "//tensorflow/core:testlib", "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:op_types", "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/inputs:trivial_test_graph_input_yielder", "//tensorflow/core/grappler/utils:topological_sort", diff --git a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc index 3f33ff50f6c..11f95894ff9 100644 --- a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer.cc @@ -218,8 +218,9 @@ Status MaybeRewriteInput(ScopedAllocatorOptimizer* sa_opti, NodeDef* input, const string& edge_name, int output_index, NodeDef* op, NodeDef** new_input, int* new_output_index, bool* rewrite) { - *rewrite = IsExit(*input) || (sa_opti->repeated_outputs().find(edge_name) != - sa_opti->repeated_outputs().end()); + *rewrite = IsConstant(*input) || IsExit(*input) || + (sa_opti->repeated_outputs().find(edge_name) != + sa_opti->repeated_outputs().end()); if (!(*rewrite)) { *new_input = input; *new_output_index = output_index; diff --git a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer_test.cc b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer_test.cc index 905968b5fcb..4f7f4f582e4 100644 --- a/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/scoped_allocator_optimizer_test.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/graph/testlib.h" #include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/op_types.h" #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/grappler/utils/topological_sort.h" #include "tensorflow/core/lib/core/status_test_util.h" @@ -241,8 +242,9 @@ class ScopedAllocatorOptimizerTest : public ::testing::Test { // Constructs the following graph. // // c1 and c2 are Const ops. a1 and a2 are Abs ops. - // We expect the optimizer to fail, because Const ops do not allocate their - // output on every Compute, and hence are not compatible with ScopedAllocator. + // We expect the optimizer to succeed and insert Identity between ci and ai. + // This will ensure that we will still be able use ScopedAllocator with Const + // inputs. /* c1 c2 | | @@ -559,7 +561,8 @@ TEST_F(ScopedAllocatorOptimizerTest, ControlEdgeRewire) { EXPECT_EQ(NumControlInputs(&node_map, "ctl4"), 1); } -// Test that the optimization fails when any input is a Const op. +// Test that the optimization succeeds when any input is a Const op, and that it +// inserts Identity op between Const and Abs. TEST_F(ScopedAllocatorOptimizerTest, ConstInput) { GrapplerItem item; BuildConstGraph(&item.graph, false); @@ -572,10 +575,26 @@ TEST_F(ScopedAllocatorOptimizerTest, ConstInput) { ons.insert("Abs"); GraphDef optimized_graph; - auto status = sao.Optimize(nullptr /*cluster*/, item, &optimized_graph); - EXPECT_EQ(status.code(), tensorflow::error::ABORTED); - EXPECT_TRUE(str_util::StrContains(status.error_message(), - "does not use AllocatorAttributes")); + TF_ASSERT_OK(sao.Optimize(nullptr /*cluster*/, item, &optimized_graph)); + + // Examine the resulting graphdef. + const NodeDef* sa_node = nullptr; + for (const NodeDef& node : optimized_graph.node()) { + if (node.op() == "_ScopedAllocator") { + sa_node = &node; + break; + } + } + ASSERT_NE(sa_node, nullptr); + int num_identity_ops = 0; + NodeMap node_map(&optimized_graph); + for (NodeDef* sa_output : node_map.GetOutputs(sa_node->name())) { + EXPECT_FALSE(IsConstant(*sa_output)); + if (IsIdentity(*sa_output)) { + ++num_identity_ops; + } + } + EXPECT_EQ(num_identity_ops, 2); } } // namespace From 3ba0deba916b52d1a8ee0b13b94352c60203072d Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Fri, 7 Aug 2020 17:51:06 -0700 Subject: [PATCH 2385/2522] Introduce additional TPU infeed and outfeed ops PiperOrigin-RevId: 325542225 Change-Id: Ie972e60d6c5639b71719837c500ecc716eda2ebd --- tensorflow/core/tpu/BUILD | 8 +- tensorflow/core/tpu/kernels/BUILD | 107 ++++ .../core/tpu/kernels/image_resize_ops.cc | 155 +++++ tensorflow/core/tpu/kernels/infeed_ops.cc | 529 ++++++++++++++++++ tensorflow/core/tpu/kernels/infeed_ops.h | 69 +++ tensorflow/core/tpu/kernels/outfeed_ops.cc | 116 ++++ tensorflow/core/tpu/kernels/outfeed_ops.h | 69 +++ .../core/tpu/kernels/replication_ops.cc | 27 + .../core/tpu/kernels/tpu_handle_to_key_op.cc | 62 ++ tensorflow/core/tpu/kernels/transfer_ops.cc | 98 ++++ tensorflow/core/tpu/kernels/transfer_ops.h | 56 ++ tensorflow/core/tpu/tpu_defs.cc | 18 + tensorflow/core/tpu/tpu_defs.h | 6 + tensorflow/core/tpu/tpu_library_init_fns.inc | 1 + tensorflow/stream_executor/tpu/BUILD | 2 + .../stream_executor/tpu/tpu_executor_c_api.h | 3 + .../tpu/tpu_transfer_manager.h | 6 + .../tpu/tpu_transfer_manager_interface.cc | 40 ++ .../tpu/tpu_transfer_manager_interface.h | 7 + 19 files changed, 1378 insertions(+), 1 deletion(-) create mode 100644 tensorflow/core/tpu/kernels/image_resize_ops.cc create mode 100644 tensorflow/core/tpu/kernels/infeed_ops.cc create mode 100644 tensorflow/core/tpu/kernels/infeed_ops.h create mode 100644 tensorflow/core/tpu/kernels/outfeed_ops.cc create mode 100644 tensorflow/core/tpu/kernels/outfeed_ops.h create mode 100644 tensorflow/core/tpu/kernels/replication_ops.cc create mode 100644 tensorflow/core/tpu/kernels/tpu_handle_to_key_op.cc create mode 100644 tensorflow/core/tpu/kernels/transfer_ops.cc create mode 100644 tensorflow/core/tpu/kernels/transfer_ops.h create mode 100644 tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.cc diff --git a/tensorflow/core/tpu/BUILD b/tensorflow/core/tpu/BUILD index 0a17ba3d408..15b2b93e46f 100644 --- a/tensorflow/core/tpu/BUILD +++ b/tensorflow/core/tpu/BUILD @@ -88,7 +88,13 @@ cc_library( name = "tpu_defs", srcs = ["tpu_defs.cc"], hdrs = ["tpu_defs.h"], - deps = ["//tensorflow/core:protos_all_cc"], + deps = [ + ":tpu_api", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/core:protos_all_cc", + "//tensorflow/stream_executor/tpu:c_api_conversions", + "//tensorflow/stream_executor/tpu:c_api_decl", + ], ) cc_library( diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 1336f52ed34..157aeb3df58 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -28,10 +28,16 @@ tf_kernel_library( deps = [ ":cross_replica_ops", ":host_compute_ops", + ":image_resize_ops", + ":infeed_ops", + ":outfeed_ops", + ":replication_ops", ":topk_ops", ":tpu_compile_op", ":tpu_configuration_ops", ":tpu_execute_op", + ":tpu_handle_to_key_op", + ":transfer_ops", ], ) @@ -684,3 +690,104 @@ cc_library( ], alwayslink = 1, ) + +cc_library( + name = "infeed_ops", + srcs = ["infeed_ops.cc"], + hdrs = ["infeed_ops.h"], + visibility = ["//visibility:public"], + deps = [ + ":transfer_ops", + "//tensorflow/compiler/jit:xla_device_no_jit_rewrite_registration", + "//tensorflow/compiler/tf2xla:common", + "//tensorflow/compiler/xla:util", + "//tensorflow/core:framework", + "//tensorflow/core/common_runtime:dma_helper", + "//tensorflow/core/framework:protos_all_cc", + "//tensorflow/core/kernels:transpose_functor", + "//tensorflow/core/platform:status", + "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/core/tpu:tpu_defs", + "//tensorflow/stream_executor:multi_platform_manager", + "//tensorflow/stream_executor/tpu:tpu_transfer_manager_base", + "//tensorflow/stream_executor/tpu:tpu_transfer_manager_interface", + ], + alwayslink = True, +) + +cc_library( + name = "transfer_ops", + srcs = ["transfer_ops.cc"], + hdrs = ["transfer_ops.h"], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/compiler/jit:xla_device_no_jit_rewrite_registration", + "//tensorflow/core:framework", + "//tensorflow/core:lib_internal", + "//tensorflow/core/kernels:ops_util", + "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/stream_executor:multi_platform_manager", + "//tensorflow/stream_executor/tpu:tpu_node_context", + "//tensorflow/stream_executor/tpu:tpu_platform_interface", + "//tensorflow/stream_executor/tpu:tpu_transfer_manager_interface", + ], + alwayslink = True, +) + +cc_library( + name = "outfeed_ops", + srcs = ["outfeed_ops.cc"], + hdrs = ["outfeed_ops.h"], + visibility = ["//visibility:public"], + deps = [ + ":transfer_ops", + "//tensorflow/compiler/jit:xla_device_no_jit_rewrite_registration", + "//tensorflow/compiler/tf2xla:common", + "//tensorflow/core:framework", + "//tensorflow/core/framework:protos_all_cc", + "//tensorflow/core/tpu:tpu_defs", + "//tensorflow/stream_executor:multi_platform_manager", + ], + alwayslink = True, +) + +cc_library( + name = "image_resize_ops", + srcs = ["image_resize_ops.cc"], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/compiler/tf2xla:common", + "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/xla/client:xla_builder", + "//tensorflow/compiler/xla/client/lib:constants", + "//tensorflow/core:framework", + "//tensorflow/core/tpu:tpu_defs", + "@com_google_absl//absl/strings", + ], + alwayslink = True, +) + +cc_library( + name = "replication_ops", + srcs = ["replication_ops.cc"], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/compiler/jit:xla_device_no_jit_rewrite_registration", + "//tensorflow/core:framework", + "//tensorflow/core/tpu:tpu_defs", + ], + alwayslink = True, +) + +cc_library( + name = "tpu_handle_to_key_op", + srcs = ["tpu_handle_to_key_op.cc"], + visibility = ["//visibility:public"], + deps = [ + ":tpu_compilation_cache_interface", + ":tpu_op_consts", + "//tensorflow/core:framework", + "//tensorflow/core/tpu:tpu_configuration", + ], + alwayslink = True, +) diff --git a/tensorflow/core/tpu/kernels/image_resize_ops.cc b/tensorflow/core/tpu/kernels/image_resize_ops.cc new file mode 100644 index 00000000000..fd0f5e4c7a6 --- /dev/null +++ b/tensorflow/core/tpu/kernels/image_resize_ops.cc @@ -0,0 +1,155 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "absl/strings/match.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "tensorflow/compiler/tf2xla/shape_util.h" +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/compiler/xla/client/lib/constants.h" +#include "tensorflow/compiler/xla/client/xla_builder.h" +#include "tensorflow/core/framework/kernel_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/tpu/tpu_defs.h" + +namespace tensorflow { + +class TpuCustomResizeOp : public XlaOpKernel { + public: + explicit TpuCustomResizeOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("align_corners", &align_corners_)); + OP_REQUIRES_OK(ctx, + ctx->GetAttr("half_pixel_centers", &half_pixel_centers_)); + } + + xla::Shape GetOutputShape(XlaOpKernelContext* ctx) const { + std::vector out_size; + auto status = ctx->ConstantInputAsIntVector(1, &out_size); + CHECK_EQ(out_size.size(), 2) << status.ToString(); + xla::Shape output_shape = + TensorShapeToXLAShape(ctx->output_xla_type(0), ctx->InputShape(0)); + output_shape.mutable_dimensions()[1] = out_size[0]; + output_shape.mutable_dimensions()[2] = out_size[1]; + return output_shape; + } + + string OpaqueField() const { + return absl::StrCat("\"", align_corners_, half_pixel_centers_, "\""); + } + + void CompileGrad(XlaOpKernelContext* ctx, const char* target, + const xla::Shape& output_shape) { + auto input_shape = + TensorShapeToXLAShape(ctx->output_xla_type(0), ctx->InputShape(0)); + if (ctx->InputShape(1).dim_sizes() == ctx->InputShape(0).dim_sizes()) { + ctx->SetOutput( + 0, xla::ConvertElementType(ctx->Input(0), ctx->output_xla_type(0))); + return; + } + // The gradient should be done in two phases for large resizes. + auto input = ctx->Input(0); + if (input_shape.dimensions(1) / output_shape.dimensions(1) > 3 && + input_shape.dimensions(2) / output_shape.dimensions(2) > 3) { + auto intermediate_shape = output_shape; + intermediate_shape.mutable_dimensions()[1] = input_shape.dimensions(1); + input = xla::CustomCall(ctx->builder(), target, {ctx->Input(0)}, + intermediate_shape, OpaqueField()); + } + ctx->SetOutput(0, xla::CustomCall(ctx->builder(), target, {input}, + output_shape, OpaqueField())); + } + + void CompileForward(XlaOpKernelContext* ctx, const char* target) { + auto output_shape = GetOutputShape(ctx); + if (ctx->InputShape(0).dim_size(1) == output_shape.dimensions(1) && + ctx->InputShape(0).dim_size(2) == output_shape.dimensions(2)) { + ctx->SetOutput( + 0, xla::ConvertElementType(ctx->Input(0), ctx->output_xla_type(0))); + return; + } + if (ctx->InputShape(0).dim_size(1) == 1 && + ctx->InputShape(0).dim_size(2) == 1) { + ctx->SetOutput(0, + ctx->Input(0) + xla::Zeros(ctx->builder(), output_shape)); + return; + } + ctx->SetOutput(0, xla::CustomCall(ctx->builder(), target, {ctx->Input(0)}, + output_shape, OpaqueField())); + } + + private: + bool align_corners_; + bool half_pixel_centers_; +}; + +class TpuResizeNearestNeighborOp : public TpuCustomResizeOp { + public: + explicit TpuResizeNearestNeighborOp(OpKernelConstruction* ctx) + : TpuCustomResizeOp(ctx) {} + void Compile(XlaOpKernelContext* ctx) override { + CompileForward(ctx, "ResizeNearest"); + } +}; + +class TpuResizeBilinearOp : public TpuCustomResizeOp { + public: + explicit TpuResizeBilinearOp(OpKernelConstruction* ctx) + : TpuCustomResizeOp(ctx) {} + void Compile(XlaOpKernelContext* ctx) override { + CompileForward(ctx, "ResizeBilinear"); + } +}; + +class TpuResizeNearestNeighborGradOp : public TpuCustomResizeOp { + public: + explicit TpuResizeNearestNeighborGradOp(OpKernelConstruction* ctx) + : TpuCustomResizeOp(ctx) {} + void Compile(XlaOpKernelContext* ctx) override { + CompileGrad(ctx, "ResizeNearestGrad", GetOutputShape(ctx)); + } +}; + +class TpuResizeBilinearGradOp : public TpuCustomResizeOp { + public: + explicit TpuResizeBilinearGradOp(OpKernelConstruction* ctx) + : TpuCustomResizeOp(ctx) {} + void Compile(XlaOpKernelContext* ctx) override { + auto output_shape = + TensorShapeToXLAShape(ctx->output_xla_type(0), ctx->InputShape(1)); + CompileGrad(ctx, "ResizeBilinearGrad", output_shape); + } +}; + +REGISTER_XLA_OP(Name("ResizeNearestNeighbor") + .CompileTimeConstantInput("size") + .Device(DEVICE_TPU_XLA_JIT), + TpuResizeNearestNeighborOp); + +REGISTER_XLA_OP(Name("ResizeNearestNeighborGrad") + .CompileTimeConstantInput("size") + .Device(DEVICE_TPU_XLA_JIT), + TpuResizeNearestNeighborGradOp); + +REGISTER_XLA_OP(Name("ResizeBilinear") + .CompileTimeConstantInput("size") + .Device(DEVICE_TPU_XLA_JIT), + TpuResizeBilinearOp); + +REGISTER_XLA_OP(Name("ResizeBilinearGrad").Device(DEVICE_TPU_XLA_JIT), + TpuResizeBilinearGradOp); + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/infeed_ops.cc b/tensorflow/core/tpu/kernels/infeed_ops.cc new file mode 100644 index 00000000000..f3fbd16b6cc --- /dev/null +++ b/tensorflow/core/tpu/kernels/infeed_ops.cc @@ -0,0 +1,529 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/kernels/infeed_ops.h" + +#include +#include + +#include "tensorflow/compiler/jit/xla_device.h" +#include "tensorflow/compiler/tf2xla/literal_util.h" +#include "tensorflow/compiler/tf2xla/shape_util.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/common_runtime/dma_helper.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/dataset.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/function_handle_cache.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/variant.h" +#include "tensorflow/core/framework/variant_encode_decode.h" +#include "tensorflow/core/framework/variant_tensor_data.h" +#include "tensorflow/core/kernels/transpose_functor.h" +#include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/core/tpu/kernels/transfer_ops.h" +#include "tensorflow/core/tpu/tpu_defs.h" +#include "tensorflow/stream_executor/multi_platform_manager.h" +#include "tensorflow/stream_executor/tpu/tpu_transfer_manager.h" +#include "tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.h" + +namespace tensorflow { +namespace { + +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef tensorflow::tpu::NoncopyableBuffer LinearizerBuffer; +typedef std::deque LinearizerBufferList; + +// Transposes the given tensor using the tensorflow C++ transpose implementation +// to obtain a XLA literal for the host tensor laid out as the given layout. The +// returned tensor is normalized to the dim0major layout -- F32[10,20,30]{2,0,1} +// is returned as F32[20,10,30]{2,1,0}. +xla::StatusOr TransposeTensor(OpKernelContext* ctx, + const Tensor& input_tensor, + const xla::Shape& xla_shape) { + profiler::TraceMe trace_me("TransposeTensor", /*level=*/2); + const int64 rank = xla_shape.rank(); + std::vector permutation(rank); + std::vector transposed_shapes(rank); + for (int64 i = 0; i < rank; ++i) { + permutation[i] = xla_shape.layout().minor_to_major(rank - 1 - i); + transposed_shapes[i] = xla_shape.dimensions(permutation[i]); + } + + Tensor transposed_tensor; + + // If this is a trivial transpose (i.e., bitcast), just create an aliased + // tensor with the transposed shape. + if (xla::LayoutUtil::IsMonotonicWithDim0Major( + xla::ShapeUtil::DropDegenerateDimensions(xla_shape).layout())) { + TensorShape shape; + TF_RETURN_IF_ERROR(TensorShapeUtils::MakeShape(transposed_shapes, &shape)); + TF_RETURN_IF_ERROR(transposed_tensor.BitcastFrom( + input_tensor, input_tensor.dtype(), shape)); + return transposed_tensor; + } + + AllocatorAttributes alloc_attr; + alloc_attr.set_on_host(true); + TF_RETURN_IF_ERROR(ctx->allocate_temp(input_tensor.dtype(), + TensorShape(transposed_shapes), + &transposed_tensor, alloc_attr)); + // Eigen Transpose fails with SIGFPE if there is a dimension of size 0. + if (input_tensor.NumElements() > 0) { + TF_RETURN_IF_ERROR(DoTranspose(ctx->eigen_device(), + input_tensor, permutation, + &transposed_tensor)); + } + return transposed_tensor; +} + +xla::StatusOr GetLayoutOverride(OpKernelConstruction* ctx, + const char* attrn_name, + std::vector* minor_to_major) { + if (!ctx->HasAttr(attrn_name)) { + return false; + } + TF_RETURN_IF_ERROR(ctx->GetAttr(attrn_name, minor_to_major)); + return !minor_to_major->empty(); +} + +Status GetInfeedShapeWithLayout(OpKernelConstruction* ctx, + const char* attrn_name, + const xla::Shape& input_shape, + xla::Shape* output_shape) { + std::vector minor_to_major; + TF_ASSIGN_OR_RETURN(bool has_override, + GetLayoutOverride(ctx, attrn_name, &minor_to_major)); + if (!has_override) { + *output_shape = input_shape; + if (output_shape->IsTuple()) { + int64 tuple_elements = xla::ShapeUtil::TupleElementCount(*output_shape); + for (int64 i = 0; i < tuple_elements; ++i) { + xla::Shape* sub_shape = + xla::ShapeUtil::GetMutableSubshape(output_shape, {i}); + *sub_shape->mutable_layout() = GetTPUInfeedLayout(*sub_shape).layout(); + } + } else { + *output_shape->mutable_layout() = + GetTPUInfeedLayout(*output_shape).layout(); + } + return Status::OK(); + } + + auto layout_func = [](const xla::Shape& shape) -> xla::Layout { + return GetTPUInfeedLayout(shape).layout(); + }; + return GetShapeWithLayout(input_shape, minor_to_major, layout_func, + output_shape); +} + +// LinearizedBuffersWrapper is an opaque C++ data structure for the outputs of +// PrelinearizeOp and PrelinearizeTupleOp. It holds the resultant linearized +// buffers and references to input tensors whose underlying storage are shared +// with linearized buffers. +// NOTE: This is not a feature-complete implementation of the DT_VARIANT +// specification. In particular, we cannot currently serialize an arbitrary +// `LinearizerBufferList` (aka `std::deque`) +// object, so the `Encode()` and `Decode()` methods are not implemented. +struct LinearizedBuffersWrapper { + explicit LinearizedBuffersWrapper() {} + explicit LinearizedBuffersWrapper(LinearizerBufferList bufs, + std::vector ts) + : buffers(std::move(bufs)), tensors(std::move(ts)) {} + LinearizedBuffersWrapper(const LinearizedBuffersWrapper& wrapper) { + // tensorflow::Variant requires this copy constructor to compile. + LOG(FATAL) << "LinearizedBuffersWrapper should not copy."; + } + LinearizedBuffersWrapper& operator=(const LinearizedBuffersWrapper& wrapper) = + delete; + LinearizedBuffersWrapper(LinearizedBuffersWrapper&&) = default; + LinearizedBuffersWrapper& operator=(LinearizedBuffersWrapper&&) = default; + ~LinearizedBuffersWrapper() = default; + + // These functions are tensorflow::Variant requirements. + string TypeName() const { return "(anonymous)::LinearizedBuffersWrapper"; } + void Encode(tensorflow::VariantTensorData* data) const { + LOG(ERROR) << "Encode() is not implemented for LinearizedBuffersWrapper " + "objects."; + } + bool Decode(const tensorflow::VariantTensorData& data) { + LOG(ERROR) << "Decode() is not implemented for LinearizedBuffersWrapper " + "objects."; + return false; + } + + LinearizerBufferList buffers; + // Save references on tensors whose underlying storage are shared with + // LiteralLinearizer::Buffer in `buffers`. + std::vector tensors; +}; + +Status AutoTransposeAndLinearize(OpKernelContext* ctx, + const Tensor& input_tensor, + const xla::Shape& shape, + LinearizerBufferList* linearized_buffers, + std::vector* saved_input_tensors) { + const Tensor* tensor = &input_tensor; + // If the given layout is not in dim0major layout, tranposes the tensor. + bool has_transposed = false; + Tensor transposed_tensor; + if (!xla::LayoutUtil::IsMonotonicWithDim0Major(shape.layout())) { + // If the given layout is not in dim0major layout, transpose the tensor. + TF_ASSIGN_OR_RETURN(transposed_tensor, + TransposeTensor(ctx, input_tensor, shape)); + tensor = &transposed_tensor; + has_transposed = true; + } + + xla::BorrowingLiteral literal; + TF_RETURN_IF_ERROR(HostTensorToBorrowingLiteral(*tensor, &literal)); + + TF_RETURN_IF_ERROR( + xla::TpuTransferManagerInterface::GetRegisteredTpuTransferManager() + ->LinearizeToBuffers(literal, linearized_buffers)); + + // The input tensor is ref-counted. Save a handle on the input tensor if + // its underlying storage is shared with linearized buffers to prevent + // input tensor from getting freed. + for (const auto& buffer : *linearized_buffers) { + if (!buffer.owns_data() && !has_transposed) { + // `buffer` is created from zero-copy fast path from the un-transposed + // input tensor so its underlying data is shared with input tensor. + // Save a handle to input tensor to increment its ref-count and avoid + // it getting deallocated after PrelinearizeTupleOp completes. + saved_input_tensors->push_back(*tensor); + // A literal can be linearized to zero to two buffers. If any of the + // linearized buffer shares storage with input tensor. We save exactly + // one handle on the input tensor. + break; + } + } + return Status::OK(); +} + +// PrelinearizeOp is used to linearize one tensor to the device format. +class PrelinearizeOp : public OpKernel { + public: + explicit PrelinearizeOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &shape_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("dtype", &dtype_)); + xla::Shape shape; + OP_REQUIRES_OK(ctx, TensorShapeToXLAShape(dtype_, shape_, &shape)); + OP_REQUIRES_OK(ctx, + GetInfeedShapeWithLayout(ctx, "layout", shape, &xla_shape_)); + } + + void Compute(OpKernelContext* ctx) override { + const Tensor& input_tensor = ctx->input(0); + // Validate input. + OP_REQUIRES( + ctx, input_tensor.dtype() == dtype_, + errors::InvalidArgument("Prelinearize dtype mismatch; expected ", + DataType_Name(dtype_), ", got ", + DataType_Name(input_tensor.dtype()))); + OP_REQUIRES( + ctx, input_tensor.shape() == shape_, + errors::InvalidArgument("Prelinearize shape mismatch; expected ", + shape_.DebugString(), ", got ", + input_tensor.shape().DebugString())); + + // Auto-transpose and prelinearize. + LinearizerBufferList linearized_buffers; + std::vector saved_input_tensors; + auto status = + AutoTransposeAndLinearize(ctx, input_tensor, xla_shape_, + &linearized_buffers, &saved_input_tensors); + OP_REQUIRES_OK(ctx, status); + + // Write to output. + tensorflow::Tensor* output; + OP_REQUIRES_OK(ctx, + ctx->allocate_output(0, tensorflow::TensorShape{}, &output)); + output->scalar()() = LinearizedBuffersWrapper{ + std::move(linearized_buffers), std::move(saved_input_tensors)}; + } + + bool IsExpensive() override { return true; } + + private: + TensorShape shape_; + DataType dtype_; + xla::Shape xla_shape_; + + // PrelinearizeOp is neither copyable nor movable. + PrelinearizeOp(const PrelinearizeOp&) = delete; + PrelinearizeOp& operator=(const PrelinearizeOp&) = delete; +}; + +// PrelinearizeTupleOp is used to linearize multiple tensors to the device +// format. +class PrelinearizeTupleOp : public OpKernel { + public: + explicit PrelinearizeTupleOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("shapes", &shapes_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("dtypes", &dtypes_)); + OP_REQUIRES( + ctx, shapes_.size() == dtypes_.size(), + errors::InvalidArgument( + "shapes and dtypes must be the same length. shapes length = ", + shapes_.size(), ", dtypes length = ", dtypes_.size())); + + std::vector xla_shapes; + for (int i = 0; i < shapes_.size(); i++) { + xla::Shape xla_shape; + OP_REQUIRES_OK(ctx, + TensorShapeToXLAShape(dtypes_[i], shapes_[i], &xla_shape)); + xla_shapes.push_back(xla_shape); + } + OP_REQUIRES_OK( + ctx, GetInfeedShapeWithLayout( + ctx, "layouts", xla::ShapeUtil::MakeTupleShape(xla_shapes), + &tuple_shape_)); + } + + void Compute(OpKernelContext* ctx) override { + OpInputList values; + OP_REQUIRES_OK(ctx, ctx->input_list("inputs", &values)); + OP_REQUIRES(ctx, values.size() == shapes_.size(), + errors::InvalidArgument( + "Wrong number of inputs to PrelinearizeTuple.")); + + LinearizerBufferList all_linearized_buffers; + std::vector all_saved_input_tensors; + for (int i = 0; i < values.size(); i++) { + // Validate input. + const Tensor& input_tensor = values[i]; + OP_REQUIRES(ctx, input_tensor.dtype() == dtypes_[i], + errors::InvalidArgument( + "PrelinearizeTuple dtype mismatch at tuple element ", i, + "; expected ", DataType_Name(dtypes_[i]), ", got ", + DataType_Name(input_tensor.dtype()))); + OP_REQUIRES(ctx, input_tensor.shape() == shapes_[i], + errors::InvalidArgument( + "PrelinearizeTuple shape mismatch at tuple element ", i, + "; expected ", shapes_[i].DebugString(), ", got ", + input_tensor.shape().DebugString())); + + // Auto-transpose and prelinearize. + LinearizerBufferList linearized_buffers; + std::vector saved_input_tensors; + auto status = AutoTransposeAndLinearize( + ctx, input_tensor, tuple_shape_.tuple_shapes(i), &linearized_buffers, + &saved_input_tensors); + OP_REQUIRES_OK(ctx, status); + all_linearized_buffers.insert( + all_linearized_buffers.end(), + std::make_move_iterator(linearized_buffers.begin()), + std::make_move_iterator(linearized_buffers.end())); + all_saved_input_tensors.insert( + all_saved_input_tensors.end(), + std::make_move_iterator(saved_input_tensors.begin()), + std::make_move_iterator(saved_input_tensors.end())); + } + + tensorflow::Tensor* output; + OP_REQUIRES_OK(ctx, + ctx->allocate_output(0, tensorflow::TensorShape{}, &output)); + output->scalar()() = LinearizedBuffersWrapper{ + std::move(all_linearized_buffers), std::move(all_saved_input_tensors)}; + } + + bool IsExpensive() override { return true; } + + private: + std::vector shapes_; + DataTypeVector dtypes_; + xla::Shape tuple_shape_; + + // PrelinearizeTupleOp is neither copyable nor movable. + PrelinearizeTupleOp(const PrelinearizeTupleOp&) = delete; + PrelinearizeTupleOp& operator=(const PrelinearizeTupleOp&) = delete; +}; + +// The InfeedEnqueuePrelinearizedBufferOp op is used to transfer prelinearized +// buffers to the device infeed queue. +class InfeedEnqueuePrelinearizedBufferOp : public TpuTransferAsyncOpKernel { + public: + explicit InfeedEnqueuePrelinearizedBufferOp(OpKernelConstruction* ctx) + : TpuTransferAsyncOpKernel(ctx, "prelinearized_buffers_to_infeed", 8) {} + + Status DoWork(OpKernelContext* ctx, + xla::TpuTransferManagerInterface* transfer_manager, + stream_executor::StreamExecutor* stream_executor) override { + const Tensor& input_tensor = ctx->input(0); + const LinearizedBuffersWrapper* wrapper = + input_tensor.scalar()() + .get(); + TF_RETURN_IF_ERROR(transfer_manager->TransferBuffersToInfeed( + stream_executor, wrapper->buffers)); + + return Status::OK(); + } + + private: + // InfeedEnqueuePrelinearizedBufferOp is neither copyable nor movable. + InfeedEnqueuePrelinearizedBufferOp( + const InfeedEnqueuePrelinearizedBufferOp&) = delete; + InfeedEnqueuePrelinearizedBufferOp& operator=( + const InfeedEnqueuePrelinearizedBufferOp&) = delete; +}; + +} // anonymous namespace + +TpuInfeedEnqueueOp::TpuInfeedEnqueueOp(OpKernelConstruction* ctx) + : TpuTransferAsyncOpKernel(ctx, "infeed_enqueue", 8) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &shape_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("dtype", &dtype_)); + xla::Shape shape; + OP_REQUIRES_OK(ctx, TensorShapeToXLAShape(dtype_, shape_, &shape)); + OP_REQUIRES_OK(ctx, + GetInfeedShapeWithLayout(ctx, "layout", shape, &xla_shape_)); +} + +Status TpuInfeedEnqueueOp::DoWork( + OpKernelContext* ctx, xla::TpuTransferManagerInterface* transfer_manager, + stream_executor::StreamExecutor* stream_executor) { + const Tensor& input_tensor = ctx->input(0); + + // Validate runtime shape and fail if it doesn't match the contract. + if (input_tensor.dtype() != dtype_) { + return errors::InvalidArgument("Infeed dtype mismatch."); + } + if (input_tensor.shape() != shape_) { + return errors::InvalidArgument("Infeed shape mismatch; expected ", + shape_.DebugString(), ", got ", + input_tensor.shape().DebugString()); + } + + const Tensor* tensor = &input_tensor; + Tensor transposed_tensor; + if (!xla::LayoutUtil::IsMonotonicWithDim0Major(xla_shape_.layout())) { + // If the given layout is not in dim0major layout, transpose the tensor. + TF_ASSIGN_OR_RETURN(transposed_tensor, + TransposeTensor(ctx, input_tensor, xla_shape_)); + tensor = &transposed_tensor; + } + + xla::BorrowingLiteral literal; + TF_RETURN_IF_ERROR(HostTensorToBorrowingLiteral(*tensor, &literal)); + + // Transfer the given literal to the Infeed interface of the device. + TF_RETURN_IF_ERROR( + transfer_manager->TransferLiteralToInfeed(stream_executor, literal)); + return Status::OK(); +} + +TpuInfeedEnqueueTupleOp::TpuInfeedEnqueueTupleOp(OpKernelConstruction* ctx) + : TpuTransferAsyncOpKernel(ctx, "infeed_enqueue", 8) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("shapes", &shapes_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("dtypes", &dtypes_)); + OP_REQUIRES( + ctx, shapes_.size() == dtypes_.size(), + errors::InvalidArgument("shapes and dtypes must be the same length.")); + + std::vector xla_shapes; + for (int i = 0; i < shapes_.size(); i++) { + xla::Shape xla_shape; + OP_REQUIRES_OK(ctx, + TensorShapeToXLAShape(dtypes_[i], shapes_[i], &xla_shape)); + xla_shapes.push_back(xla_shape); + } + OP_REQUIRES_OK( + ctx, GetInfeedShapeWithLayout(ctx, "layouts", + xla::ShapeUtil::MakeTupleShape(xla_shapes), + &tuple_shape_)); +} + +Status TpuInfeedEnqueueTupleOp::DoWork( + OpKernelContext* ctx, xla::TpuTransferManagerInterface* transfer_manager, + stream_executor::StreamExecutor* stream_executor) { + OpInputList values; + TF_RETURN_IF_ERROR(ctx->input_list("inputs", &values)); + if (values.size() != shapes_.size()) { + return errors::InvalidArgument( + "Wrong number of inputs to InfeedEnqueueTuple."); + } + + for (const auto& shapes : shapes_) { + VLOG(1) << "TransferLiteralToInfeed " << shapes.DebugString(); + } + + std::vector maybe_transposed_tensors; + maybe_transposed_tensors.reserve(values.size()); + for (int i = 0; i < values.size(); i++) { + // Validate runtime shapes and fail if it doesn't match the contract. + const Tensor* tensor = &values[i]; + if (tensor->shape() != shapes_[i]) { + return errors::InvalidArgument("Infeed shape mismatch for tuple element ", + i, "; expected ", shapes_[i].DebugString(), + ", got ", tensor->shape().DebugString()); + } + if (!xla::LayoutUtil::IsMonotonicWithDim0Major( + tuple_shape_.tuple_shapes(i).layout())) { + // If the given layout is not in dim0major layout, tranposes the given + // tensor. + TF_ASSIGN_OR_RETURN( + Tensor transposed_tensor, + TransposeTensor(ctx, *tensor, tuple_shape_.tuple_shapes(i))); + maybe_transposed_tensors.emplace_back(transposed_tensor); + } else { + maybe_transposed_tensors.emplace_back(*tensor); + } + } + + xla::BorrowingLiteral tuple; + TF_RETURN_IF_ERROR( + HostTensorsToBorrowingLiteralTuple(maybe_transposed_tensors, &tuple)); + + // Transfer the given literal to the Infeed interface of the device. + TF_RETURN_IF_ERROR( + transfer_manager->TransferLiteralToInfeed(stream_executor, tuple)); + + VLOG(1) << "TransferLiteralToInfeed complete."; + + return Status::OK(); +} + +// These ops execute on either the TPU device or the CPU device. When running on +// CPU they must specify a non-negative value for device_ordinal to indicate +// which TPU to send infeed to. +REGISTER_KERNEL_BUILDER( + Name("InfeedEnqueue").Device(DEVICE_TPU_NODE).HostMemory("input"), + TpuInfeedEnqueueOp); +REGISTER_KERNEL_BUILDER(Name("InfeedEnqueue").Device(DEVICE_CPU), + TpuInfeedEnqueueOp); + +REGISTER_KERNEL_BUILDER( + Name("InfeedEnqueueTuple").Device(DEVICE_TPU_NODE).HostMemory("inputs"), + TpuInfeedEnqueueTupleOp); +REGISTER_KERNEL_BUILDER(Name("InfeedEnqueueTuple").Device(DEVICE_CPU), + TpuInfeedEnqueueTupleOp); + +// Prelinearize ops run on CPU as part of tf.data input pipeline. +REGISTER_KERNEL_BUILDER(Name("Prelinearize").Device(DEVICE_CPU), + PrelinearizeOp); +REGISTER_KERNEL_BUILDER(Name("PrelinearizeTuple").Device(DEVICE_CPU), + PrelinearizeTupleOp); + +// InfeedEnqueuePrelinearizedBuffer op run on CPU and takes a device_ordinal to +// select the right device to infeed. +REGISTER_KERNEL_BUILDER( + Name("InfeedEnqueuePrelinearizedBuffer").Device(DEVICE_CPU), + InfeedEnqueuePrelinearizedBufferOp); + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/infeed_ops.h b/tensorflow/core/tpu/kernels/infeed_ops.h new file mode 100644 index 00000000000..622583b6a73 --- /dev/null +++ b/tensorflow/core/tpu/kernels/infeed_ops.h @@ -0,0 +1,69 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_TPU_KERNELS_INFEED_OPS_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_INFEED_OPS_H_ + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/tpu/kernels/transfer_ops.h" + +namespace tensorflow { + +// TODO(b/65200690): Rework this when there is a callback based infeed API to +// StreamExecutor. + +// The InfeedEnqueue op is used to deliver data to the device infeed queue. +class TpuInfeedEnqueueOp : public TpuTransferAsyncOpKernel { + public: + explicit TpuInfeedEnqueueOp(OpKernelConstruction* ctx); + Status DoWork(OpKernelContext* ctx, + xla::TpuTransferManagerInterface* transfer_manager, + stream_executor::StreamExecutor* stream_executor) override; + + private: + TensorShape shape_; + DataType dtype_; + xla::Shape xla_shape_; + + // TpuInfeedEnqueueOp is neither copyable nor movable. + TpuInfeedEnqueueOp(const TpuInfeedEnqueueOp&) = delete; + TpuInfeedEnqueueOp& operator=(const TpuInfeedEnqueueOp&) = delete; +}; + +// The InfeedEnqueueTuple op is used on the host to deliver multiple tensors to +// the device infeed queue as an XLA tuple. +class TpuInfeedEnqueueTupleOp : public TpuTransferAsyncOpKernel { + public: + explicit TpuInfeedEnqueueTupleOp(OpKernelConstruction* ctx); + Status DoWork(OpKernelContext* ctx, + xla::TpuTransferManagerInterface* transfer_manager, + stream_executor::StreamExecutor* stream_executor) override; + + private: + std::vector shapes_; + DataTypeVector dtypes_; + xla::Shape tuple_shape_; + + // TpuInfeedEnqueueTupleOp is neither copyable nor movable. + TpuInfeedEnqueueTupleOp(const TpuInfeedEnqueueTupleOp&) = delete; + TpuInfeedEnqueueTupleOp& operator=(const TpuInfeedEnqueueTupleOp&) = delete; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_KERNELS_INFEED_OPS_H_ diff --git a/tensorflow/core/tpu/kernels/outfeed_ops.cc b/tensorflow/core/tpu/kernels/outfeed_ops.cc new file mode 100644 index 00000000000..51a3a71a297 --- /dev/null +++ b/tensorflow/core/tpu/kernels/outfeed_ops.cc @@ -0,0 +1,116 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/kernels/outfeed_ops.h" + +#include "tensorflow/compiler/jit/xla_device.h" +#include "tensorflow/compiler/tf2xla/literal_util.h" +#include "tensorflow/compiler/tf2xla/shape_util.h" +#include "tensorflow/compiler/tf2xla/type_util.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/tpu/kernels/transfer_ops.h" +#include "tensorflow/core/tpu/tpu_defs.h" +#include "tensorflow/stream_executor/multi_platform_manager.h" + +namespace tensorflow { + +TpuOutfeedDequeueOp::TpuOutfeedDequeueOp(OpKernelConstruction* ctx) + : TpuTransferAsyncOpKernel(ctx, "outfeed_dequeue", 1) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &shape_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("dtype", &dtype_)); + OP_REQUIRES_OK(ctx, TensorShapeToXLAShape(dtype_, shape_, &xla_shape_)); +} + +Status TpuOutfeedDequeueOp::DoWork( + OpKernelContext* ctx, xla::TpuTransferManagerInterface* transfer_manager, + stream_executor::StreamExecutor* stream_executor) { + Tensor* output; + TF_RETURN_IF_ERROR(ctx->allocate_output(0, shape_, &output)); + + // Transfer from the outfeed interface of the device. + xla::MutableBorrowingLiteral literal; + TF_RETURN_IF_ERROR( + HostTensorToMutableBorrowingLiteral(xla_shape_, output, &literal)); + + VLOG(1) << "TransferLiteralFromOutfeed " + << xla::ShapeUtil::HumanStringWithLayout(xla_shape_); + + TF_RETURN_IF_ERROR(transfer_manager->TransferLiteralFromOutfeed( + stream_executor, xla_shape_, literal)); + + VLOG(1) << "TransferLiteralFromOutfeed complete."; + + return Status::OK(); +} + +// The OutfeedDequeueTuple op is used to retrieve multiple tensors from the +// device outfeed queue. +TpuOutfeedDequeueTupleOp::TpuOutfeedDequeueTupleOp(OpKernelConstruction* ctx) + : TpuTransferAsyncOpKernel(ctx, "outfeed_dequeue", 1) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("shapes", &shapes_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("dtypes", &dtypes_)); + OP_REQUIRES( + ctx, shapes_.size() == dtypes_.size(), + errors::InvalidArgument("shapes and dtypes must be the same length.")); + // The `dtypes` list is inferred from the supplied inputs, so it + // is always the correct length. + for (int i = 0; i < shapes_.size(); i++) { + xla::Shape xla_shape; + OP_REQUIRES_OK(ctx, + TensorShapeToXLAShape(dtypes_[i], shapes_[i], &xla_shape)); + xla_shapes_.push_back(xla_shape); + } + tuple_shape_ = xla::ShapeUtil::MakeTupleShape(xla_shapes_); +} + +Status TpuOutfeedDequeueTupleOp::DoWork( + OpKernelContext* ctx, xla::TpuTransferManagerInterface* transfer_manager, + stream_executor::StreamExecutor* stream_executor) { + VLOG(1) << "TransferLiteralFromOutfeed " + << xla::ShapeUtil::HumanStringWithLayout(tuple_shape_); + + for (int i = 0; i < shapes_.size(); ++i) { + Tensor* output; + TF_RETURN_IF_ERROR(ctx->allocate_output(i, shapes_[i], &output)); + + xla::MutableBorrowingLiteral literal; + TF_RETURN_IF_ERROR( + HostTensorToMutableBorrowingLiteral(xla_shapes_[i], output, &literal)); + TF_RETURN_IF_ERROR(transfer_manager->TransferLiteralFromOutfeed( + stream_executor, xla_shapes_[i], literal)); + } + return Status::OK(); +} + +// These ops execute on either the TPU device or the CPU device. When +// running on CPU they must specify a non-negative value for +// device_ordinal to indicate which TPU to receive outfeed from. +REGISTER_KERNEL_BUILDER( + Name("OutfeedDequeue").Device(DEVICE_TPU_NODE).HostMemory("output"), + TpuOutfeedDequeueOp); +REGISTER_KERNEL_BUILDER(Name("OutfeedDequeue").Device(DEVICE_CPU), + TpuOutfeedDequeueOp); + +REGISTER_KERNEL_BUILDER( + Name("OutfeedDequeueTuple").Device(DEVICE_TPU_NODE).HostMemory("outputs"), + TpuOutfeedDequeueTupleOp); +REGISTER_KERNEL_BUILDER(Name("OutfeedDequeueTuple").Device(DEVICE_CPU), + TpuOutfeedDequeueTupleOp); + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/outfeed_ops.h b/tensorflow/core/tpu/kernels/outfeed_ops.h new file mode 100644 index 00000000000..5e3ed87c04b --- /dev/null +++ b/tensorflow/core/tpu/kernels/outfeed_ops.h @@ -0,0 +1,69 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_TPU_KERNELS_OUTFEED_OPS_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_OUTFEED_OPS_H_ + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/tpu/kernels/transfer_ops.h" + +namespace tensorflow { + +// The OutfeedDequeue op is used to retrieve a single tensor from the device +// outfeed queue. +class TpuOutfeedDequeueOp : public TpuTransferAsyncOpKernel { + public: + explicit TpuOutfeedDequeueOp(OpKernelConstruction* ctx); + + Status DoWork(OpKernelContext* ctx, + xla::TpuTransferManagerInterface* transfer_manager, + stream_executor::StreamExecutor* stream_executor) override; + + private: + TensorShape shape_; + DataType dtype_; + xla::Shape xla_shape_; + + // OutfeedDequeueOp is neither copyable nor movable. + TpuOutfeedDequeueOp(const TpuOutfeedDequeueOp&) = delete; + TpuOutfeedDequeueOp& operator=(const TpuOutfeedDequeueOp&) = delete; +}; + +// The OutfeedDequeueTuple op is used to retrieve multiple tensors from the +// device outfeed queue. +class TpuOutfeedDequeueTupleOp : public TpuTransferAsyncOpKernel { + public: + explicit TpuOutfeedDequeueTupleOp(OpKernelConstruction* ctx); + + Status DoWork(OpKernelContext* ctx, + xla::TpuTransferManagerInterface* transfer_manager, + stream_executor::StreamExecutor* stream_executor) override; + + private: + std::vector shapes_; + DataTypeVector dtypes_; + std::vector xla_shapes_; + xla::Shape tuple_shape_; + + // OutfeedDequeueTupleOp is neither copyable nor movable. + TpuOutfeedDequeueTupleOp(const TpuOutfeedDequeueTupleOp&) = delete; + TpuOutfeedDequeueTupleOp& operator=(const TpuOutfeedDequeueTupleOp&) = delete; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_KERNELS_OUTFEED_OPS_H_ diff --git a/tensorflow/core/tpu/kernels/replication_ops.cc b/tensorflow/core/tpu/kernels/replication_ops.cc new file mode 100644 index 00000000000..4c986e880e7 --- /dev/null +++ b/tensorflow/core/tpu/kernels/replication_ops.cc @@ -0,0 +1,27 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/jit/xla_device_ops.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/tpu/tpu_defs.h" + +namespace tensorflow { + +REGISTER_KERNEL_BUILDER(Name("_TPUReplicate").Device(DEVICE_TPU_SYSTEM), + XlaDeviceDummyOp); + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_handle_to_key_op.cc b/tensorflow/core/tpu/kernels/tpu_handle_to_key_op.cc new file mode 100644 index 00000000000..ec2ae91d3eb --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_handle_to_key_op.cc @@ -0,0 +1,62 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" +#include "tensorflow/core/tpu/kernels/tpu_op_consts.h" +#include "tensorflow/core/tpu/tpu_configuration.h" + +namespace tensorflow { + +class TpuHandleToProtoKeyOp : public OpKernel { + public: + explicit TpuHandleToProtoKeyOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + ~TpuHandleToProtoKeyOp() override = default; + TpuHandleToProtoKeyOp(const TpuHandleToProtoKeyOp&) = delete; + TpuHandleToProtoKeyOp& operator=(const TpuHandleToProtoKeyOp&) = delete; + + void Compute(OpKernelContext* ctx) override { + VLOG(1) << "TpuHandleToProtoKeyOp::Compute " << ctx->op_kernel().name() + << " on device " << ctx->op_kernel().requested_device(); + const Tensor& uid = ctx->input(0); + + ResourceMgr* rm = GetTPUConfigResourceMgr(); + tpu::TpuCompilationCacheInterface* cache; + OP_REQUIRES_OK(ctx, rm->Lookup( + rm->default_container(), + tpu::kCompilationCacheResourceName, &cache)); + core::ScopedUnref cache_unref(cache); + + std::vector keys; + OP_REQUIRES_OK(ctx, cache->GetKeysFromUid(uid.scalar()(), &keys)); + + TensorShape output_shape; + output_shape.AddDim(keys.size()); + Tensor* result = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, output_shape, &result)); + for (int i = 0; i < keys.size(); ++i) { + result->vec()(i) = keys[i]; + } + }; +}; + +REGISTER_KERNEL_BUILDER(Name("TpuHandleToProtoKey").Device(DEVICE_CPU), + TpuHandleToProtoKeyOp); + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/transfer_ops.cc b/tensorflow/core/tpu/kernels/transfer_ops.cc new file mode 100644 index 00000000000..40b85e2cfbd --- /dev/null +++ b/tensorflow/core/tpu/kernels/transfer_ops.cc @@ -0,0 +1,98 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/kernels/transfer_ops.h" + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/platform/tracing.h" +#include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/stream_executor/multi_platform_manager.h" +#include "tensorflow/stream_executor/tpu/tpu_node_context.h" +#include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" +#include "tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.h" + +namespace tensorflow { + +TpuTransferAsyncOpKernel::TpuTransferAsyncOpKernel(OpKernelConstruction* ctx, + const string& transfer_type, + int number_of_threads) + : AsyncOpKernel(ctx), + thread_pool_(new thread::ThreadPool( + ctx->env(), + strings::StrCat(transfer_type, "_thread_", + SanitizeThreadSuffix(def().name())), + /*num_threads=*/8)) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("device_ordinal", &device_ordinal_)); + if (ctx->device_type() == DeviceType(DEVICE_CPU)) { + OP_REQUIRES( + ctx, device_ordinal_ >= 0, + errors::InvalidArgument(transfer_type, + " ops must specify a device_ordinal when " + "placed on CPU.")); + } +} + +void TpuTransferAsyncOpKernel::ComputeAsync(OpKernelContext* ctx, + DoneCallback done) { + CancellationToken token = + ctx->cancellation_manager()->get_cancellation_token(); + bool already_cancelled; + { + // Only protect registering the cancellation callback as mu_ cannot be held + // at a point where `done` could be called. + mutex_lock lock(mu_); + already_cancelled = !ctx->cancellation_manager()->RegisterCallback( + token, [this]() { Cancel(); }); + } + OP_REQUIRES_ASYNC(ctx, !already_cancelled, + errors::Cancelled("Infeed was cancelled."), done); + thread_pool_->Schedule([this, ctx, done, token]() { + Status s = RunTransfer(ctx); + ctx->cancellation_manager()->DeregisterCallback(token); + OP_REQUIRES_OK_ASYNC(ctx, s, done); + done(); + }); +} + +Status TpuTransferAsyncOpKernel::RunTransfer(OpKernelContext* ctx) { + auto* tpu_platform = tpu::TpuPlatformInterface::GetRegisteredPlatform(); + + int real_device_ordinal = device_ordinal_; + if (real_device_ordinal < 0) { + const XlaDevice::Metadata* metadata; + TF_RETURN_IF_ERROR(XlaDevice::GetMetadata(ctx, &metadata)); + real_device_ordinal = metadata->device_ordinal(); + } + stream_executor::StreamExecutor* stream_executor = + tpu_platform->ExecutorForDevice(real_device_ordinal).ValueOrDie(); + + // When Xprof profiling is off (which is the default), constructing the + // activity is simple enough that its overhead is negligible. + profiler::TraceMe activity( + [this] { return profiler::TraceMeOp(name(), type_string()); }, + profiler::TraceMeLevel::kInfo); + return DoWork( + ctx, xla::TpuTransferManagerInterface::GetRegisteredTpuTransferManager(), + stream_executor); +} + +void TpuTransferAsyncOpKernel::Cancel() { + mutex_lock lock(mu_); + TF_CHECK_OK(tpu::TpuNodeContext::CloseTpuHost()); +} + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/transfer_ops.h b/tensorflow/core/tpu/kernels/transfer_ops.h new file mode 100644 index 00000000000..d98d743f569 --- /dev/null +++ b/tensorflow/core/tpu/kernels/transfer_ops.h @@ -0,0 +1,56 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TRANSFER_OPS_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TRANSFER_OPS_H_ + +#include "tensorflow/compiler/jit/xla_device.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/util/stream_executor_util.h" +#include "tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.h" + +namespace tensorflow { + +// Base class providing common functionality for async ops that transfer from +// host to TPU. +class TpuTransferAsyncOpKernel : public AsyncOpKernel { + public: + explicit TpuTransferAsyncOpKernel(OpKernelConstruction* ctx, + const string& transfer_type, + int number_of_threads); + + void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override; + + protected: + virtual Status DoWork(OpKernelContext* context, + xla::TpuTransferManagerInterface* transfer_manager, + stream_executor::StreamExecutor* stream_executor) = 0; + + private: + Status RunTransfer(OpKernelContext* ctx); + void Cancel(); + + std::unique_ptr thread_pool_; + int device_ordinal_; + mutex mu_; + + // TpuTransferAsyncOpKernel is neither copyable nor movable. + TpuTransferAsyncOpKernel(const TpuTransferAsyncOpKernel&) = delete; + TpuTransferAsyncOpKernel& operator=(const TpuTransferAsyncOpKernel&) = delete; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_KERNELS_TRANSFER_OPS_H_ diff --git a/tensorflow/core/tpu/tpu_defs.cc b/tensorflow/core/tpu/tpu_defs.cc index 69669bfdb7b..69d4989773a 100644 --- a/tensorflow/core/tpu/tpu_defs.cc +++ b/tensorflow/core/tpu/tpu_defs.cc @@ -15,6 +15,10 @@ limitations under the License. #include "tensorflow/core/tpu/tpu_defs.h" +#include "tensorflow/core/tpu/tpu_api.h" +#include "tensorflow/stream_executor/tpu/c_api_conversions.h" +#include "tensorflow/stream_executor/tpu/c_api_decl.h" + namespace tensorflow { const char* const DEVICE_TPU_NODE = "TPU"; @@ -27,4 +31,18 @@ const char* const TPUREPLICATE_MIRRORED_VAR_INDICES_ATTR = const char* const kTPUReplicateAttr = "_tpu_replicate"; const char* const kOutsideCompilationAttr = "_xla_outside_compilation"; +xla::Shape GetTPUInfeedLayout(const xla::Shape& shape) { + XLA_Shape c_shape; + XLA_Shape c_infeed_shape; + + ApiConverter::ToC(shape, &c_shape); + + tpu::ExecutorApiFn()->TpuTransferManager_GetInfeedLayoutFn(&c_shape, + &c_infeed_shape); + xla::Shape infeed_shape = ApiConverter::FromC(&c_infeed_shape); + ApiConverter::Free(&c_shape); + ApiConverter::Free(&c_infeed_shape); + return infeed_shape; +} + } // namespace tensorflow diff --git a/tensorflow/core/tpu/tpu_defs.h b/tensorflow/core/tpu/tpu_defs.h index 008e386dde6..29954b2289f 100644 --- a/tensorflow/core/tpu/tpu_defs.h +++ b/tensorflow/core/tpu/tpu_defs.h @@ -20,6 +20,7 @@ limitations under the License. #include +#include "tensorflow/compiler/xla/shape.h" #include "tensorflow/core/framework/types.pb.h" namespace tensorflow { @@ -56,6 +57,11 @@ static constexpr std::array kTpuAllTypes = { DT_COMPLEX64, DT_INT64, DT_UINT64, DT_QINT8, DT_QUINT8, DT_INT8, DT_UINT8, DT_INT16, DT_UINT16}}; +// For the given shape, chooses a layout for infeed on TPU. The returned shape +// has the same dimensions as the original shape, and only the layout is +// changed. +xla::Shape GetTPUInfeedLayout(const xla::Shape& shape); + } // namespace tensorflow #endif // TENSORFLOW_CORE_TPU_TPU_DEFS_H_ diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index be9d594685e..40130bd46dd 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -161,6 +161,7 @@ tensorflow::Status SetExecutorStructFn(void* library_handle) { TFTPU_SET_FN(executor_fn, TpuTransferManager_TransferLiteralFromDevice); TFTPU_SET_FN(executor_fn, TpuTransferManager_GetByteSizeRequirement); TFTPU_SET_FN(executor_fn, TpuTransferManager_WriteSingleTupleIndexTable); + TFTPU_SET_FN(executor_fn, TpuTransferManager_GetInfeedLayout); TFTPU_SET_FN(executor_fn, TpuTransferManager_LinearizeToBuffers); TFTPU_SET_FN(executor_fn, TpuTransferManager_FreeBuffers); diff --git a/tensorflow/stream_executor/tpu/BUILD b/tensorflow/stream_executor/tpu/BUILD index a52f9919e6e..a8178404dff 100644 --- a/tensorflow/stream_executor/tpu/BUILD +++ b/tensorflow/stream_executor/tpu/BUILD @@ -203,10 +203,12 @@ cc_library( cc_library( name = "tpu_transfer_manager_interface", + srcs = ["tpu_transfer_manager_interface.cc"], hdrs = ["tpu_transfer_manager_interface.h"], visibility = ["//visibility:public"], deps = [ ":noncopyable_buffer", + ":tpu_platform_interface", "//tensorflow/compiler/xla/service:transfer_manager", ], ) diff --git a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h index 2b66c2ce4c5..013e7fe4e0c 100644 --- a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h +++ b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h @@ -182,6 +182,8 @@ void TpuTransferManager_WriteSingleTupleIndexTable( XLA_TransferManager* manager, SE_Stream* stream, SE_DeviceMemoryBase* elements, size_t elements_len, XLA_Shape* shape, SE_DeviceMemoryBase* region, SE_Status* status); +void TpuTransferManager_GetInfeedLayout(XLA_Shape* shape, + XLA_Shape* infeed_shape); void TpuTransferManager_LinearizeToBuffers( XLA_TransferManager* manager, XLA_Literal* c_literal, char*** buffers_array, int64_t** buffers_size, int64_t* buffers_array_size, SE_Status* status); @@ -341,6 +343,7 @@ struct TfTpu_ExecutorApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_TransferLiteralFromDevice); TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_GetByteSizeRequirement); TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_WriteSingleTupleIndexTable); + TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_GetInfeedLayout); TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_LinearizeToBuffers); TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_FreeBuffers); diff --git a/tensorflow/stream_executor/tpu/tpu_transfer_manager.h b/tensorflow/stream_executor/tpu/tpu_transfer_manager.h index c201d63d2d5..e758c702204 100644 --- a/tensorflow/stream_executor/tpu/tpu_transfer_manager.h +++ b/tensorflow/stream_executor/tpu/tpu_transfer_manager.h @@ -81,6 +81,12 @@ class TpuTransferManager : public xla::TpuTransferManagerInterface { const xla::Shape& shape, stream_executor::DeviceMemoryBase* region) override; + Status LinearizeToBuffers( + const xla::LiteralSlice& literal, + std::deque* buffers) override { + LOG(FATAL) << "Not yet implemented."; + } + private: XLA_TransferManager* manager_; }; diff --git a/tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.cc b/tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.cc new file mode 100644 index 00000000000..746093972a4 --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.cc @@ -0,0 +1,40 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.h" + +#include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" + +namespace xla { + +/*static*/ TpuTransferManagerInterface* +TpuTransferManagerInterface::GetRegisteredTpuTransferManager() { + auto* platform = tensorflow::tpu::TpuPlatformInterface::GetRegisteredPlatform( + /*initialize_platform=*/false); + if (platform == nullptr) { + LOG(ERROR) << "Unable to retrieve registered TPU platform."; + return nullptr; + } + auto tm = xla::TransferManager::GetForPlatform(platform); + if (!tm.ok()) { + LOG(ERROR) << "Unable to retrieve TpuTransferManager. No TPU platform is " + "registered for platform " + << platform->Name() << " and ID " << platform->id(); + return nullptr; + } + return static_cast(tm.ValueOrDie()); +} + +} // namespace xla diff --git a/tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.h b/tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.h index 3f34ed8064d..b7e000b89ac 100644 --- a/tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.h +++ b/tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.h @@ -24,9 +24,16 @@ limitations under the License. namespace xla { class TpuTransferManagerInterface : public xla::TransferManager { + public: virtual Status TransferBuffersToInfeed( se::StreamExecutor* executor, const std::deque& buffers) = 0; + + virtual Status LinearizeToBuffers( + const LiteralSlice& literal, + std::deque* buffers) = 0; + + static TpuTransferManagerInterface* GetRegisteredTpuTransferManager(); }; } // namespace xla From 997eef7812d26d8b13bb53c0d24de9a86f968deb Mon Sep 17 00:00:00 2001 From: Karim Nosir Date: Fri, 7 Aug 2020 18:21:06 -0700 Subject: [PATCH 2386/2522] Add cache for const nodes in hexagon delegate. On few test models this reduces the size of const nodes by half, which will reduce graph preparation time. Bug fix for sometimes wrong casting. Remove some redundant const nodes. PiperOrigin-RevId: 325545512 Change-Id: I6918ab991b416c9a729fd8e7e303f543b331523e --- .../lite/delegates/hexagon/builders/BUILD | 1 + .../hexagon/builders/conv_2d_builder.cc | 8 +- .../hexagon/builders/conv_2d_builder.h | 2 - .../hexagon/builders/conv_2d_helpers.cc | 19 +-- .../hexagon/builders/min_max_builder.cc | 4 - .../delegates/hexagon/builders/op_builder.cc | 112 +++++++++++++++--- .../delegates/hexagon/builders/op_builder.h | 34 +++++- .../hexagon/builders/transpose_builder.cc | 10 +- .../builders/transpose_conv_2d_builder.cc | 22 +--- .../builders/transpose_conv_2d_builder.h | 2 +- .../hexagon/hexagon_delegate_kernel.cc | 5 +- 11 files changed, 152 insertions(+), 67 deletions(-) diff --git a/tensorflow/lite/delegates/hexagon/builders/BUILD b/tensorflow/lite/delegates/hexagon/builders/BUILD index 63ff274c7b7..ef4b0e957c1 100644 --- a/tensorflow/lite/delegates/hexagon/builders/BUILD +++ b/tensorflow/lite/delegates/hexagon/builders/BUILD @@ -85,6 +85,7 @@ cc_library( "//tensorflow/lite/kernels:padding", "//tensorflow/lite/kernels/internal:optimized_base", "//tensorflow/lite/kernels/internal:tensor", + "@farmhash_archive//:farmhash", "@hexagon_nn//:hexagon_nn_ops", ], ) diff --git a/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.cc b/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.cc index cfddd2c2b97..c6d20004227 100644 --- a/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.cc @@ -267,13 +267,13 @@ TfLiteStatus Conv2dOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, auto* conv_op = graph_builder_->AddNode(GetTFLiteNodeID()); conv_op->SetOpType(OP_DepthwiseSupernode_8x8p32to8); conv_op->AddInput(space_to_batch_op_out); - conv_op->AddInput(TensorID(weights_data_node_->GetID(), 0)); + conv_op->AddInput(graph_builder_->GetHexagonTensorId(inputs->data[1])); conv_op->AddInput(TensorID(data_min_const->GetID(), 0)); conv_op->AddInput(TensorID(data_max_const->GetID(), 0)); conv_op->AddInput(TensorID(weights_min_node_->GetID(), 0)); conv_op->AddInput(TensorID(weights_max_node_->GetID(), 0)); conv_op->AddInput(TensorID(stride_node->GetID(), 0)); - conv_op->AddInput(TensorID(bias_data_node_->GetID(), 0)); + conv_op->AddInput(graph_builder_->GetHexagonTensorId(inputs->data[2])); conv_op->AddInput(TensorID(bias_min_node_->GetID(), 0)); conv_op->AddInput(TensorID(bias_max_node_->GetID(), 0)); conv_op->AddInput(TensorID(conv_output_min_const->GetID(), 0)); @@ -330,13 +330,13 @@ TfLiteStatus Conv2dOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, } // Inputs AddInput(graph_builder_->GetHexagonTensorId(inputs->data[0])); - AddInput(TensorID(weights_data_node_->GetID(), 0)); + AddInput(graph_builder_->GetHexagonTensorId(inputs->data[1])); AddInput(TensorID(data_min_const->GetID(), 0)); AddInput(TensorID(data_max_const->GetID(), 0)); AddInput(TensorID(weights_min_node_->GetID(), 0)); AddInput(TensorID(weights_max_node_->GetID(), 0)); AddInput(TensorID(stride_node->GetID(), 0)); - AddInput(TensorID(bias_data_node_->GetID(), 0)); + AddInput(graph_builder_->GetHexagonTensorId(inputs->data[2])); AddInput(TensorID(bias_min_node_->GetID(), 0)); AddInput(TensorID(bias_max_node_->GetID(), 0)); AddInput(TensorID(conv_output_min_const->GetID(), 0)); diff --git a/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.h b/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.h index 4980b294481..1407f06154b 100644 --- a/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.h @@ -62,10 +62,8 @@ class Conv2dOpBuilder : public OpBuilder { std::vector transposed_weights_; std::vector stride_shape_; std::vector weight_shape_; - OpBuilder* weights_data_node_ = nullptr; OpBuilder* weights_min_node_ = nullptr; OpBuilder* weights_max_node_ = nullptr; - OpBuilder* bias_data_node_ = nullptr; OpBuilder* bias_min_node_ = nullptr; OpBuilder* bias_max_node_ = nullptr; diff --git a/tensorflow/lite/delegates/hexagon/builders/conv_2d_helpers.cc b/tensorflow/lite/delegates/hexagon/builders/conv_2d_helpers.cc index bf68bbe5a25..b33e28f4e71 100644 --- a/tensorflow/lite/delegates/hexagon/builders/conv_2d_helpers.cc +++ b/tensorflow/lite/delegates/hexagon/builders/conv_2d_helpers.cc @@ -106,6 +106,7 @@ TfLiteStatus Conv2dOpBuilder::InitializeWeightsNodes( const bool is_per_channel_quant = weights_quant_params->scale->size > 1; // WEIGHTS DATA. + OpBuilder* weights_data_node = nullptr; if (op_node_.op_type == OP_Supernode_8x8p32to8) { // Hexagon lib expects the weight tensor in HWCN, TFLite uses NHWC. // Transpose NHWC -> HWCN @@ -137,7 +138,7 @@ TfLiteStatus Conv2dOpBuilder::InitializeWeightsNodes( weights_tensor.data.uint8, hwcn_shape, hwcn.data()); } - weights_data_node_ = graph_builder_->AddConstNodeWithData( + weights_data_node = graph_builder_->AddConstNodeWithData( weight_shape_.data(), reinterpret_cast(hwcn.data()), hwcn.size() * sizeof(hwcn[0])); } else if (op_node_.op_type == OP_DepthwiseSupernode_8x8p32to8) { @@ -156,17 +157,17 @@ TfLiteStatus Conv2dOpBuilder::InitializeWeightsNodes( for (int i = 0; i < converted_data.size(); ++i) { converted_data[i] = weights_tensor.data.int8[i] ^ k8BitSignFlipConstant; } - weights_data_node_ = graph_builder_->AddConstNodeWithData( + weights_data_node = graph_builder_->AddConstNodeWithData( weight_shape_.data(), reinterpret_cast(converted_data.data()), converted_data.size() * sizeof(converted_data[0])); } else { - weights_data_node_ = graph_builder_->AddConstNodeWithData( + weights_data_node = graph_builder_->AddConstNodeWithData( weight_shape_.data(), weights_tensor.data.raw, NumElements(&weights_tensor) * sizeof(weights_tensor.data.uint8[0])); } } - graph_builder_->AddTensorWithID(inputs->data[1], weights_data_node_->GetID(), - 0); + graph_builder_->AddTensorWithID(inputs->data[1], weights_data_node->GetID(), + 0, /*overwrite=*/true); // WEIGHTS QUANTIZATION. float weights_min = 0; @@ -229,9 +230,11 @@ TfLiteStatus Conv2dOpBuilder::ProcessPerChannelQuantizedBias( } // Add nodes for bias. const std::vector bias_shape = {1, 1, 1, bias_size}; - bias_data_node_ = graph_builder_->AddConstNodeWithData( + auto* bias_data_node = graph_builder_->AddConstNodeWithData( bias_shape.data(), reinterpret_cast(preprocessed_bias_data.data()), preprocessed_bias_data.size() * sizeof(preprocessed_bias_data[0])); + graph_builder_->AddTensorWithID(inputs->data[2], bias_data_node->GetID(), 0, + /*overwrite=*/true); return kTfLiteOk; } @@ -248,8 +251,10 @@ TfLiteStatus Conv2dOpBuilder::InitializeBiasNodes(const TfLiteIntArray* inputs, ProcessPerChannelQuantizedBias(inputs, outputs, context, &bias_min, &bias_max); } else { - bias_data_node_ = + auto* bias_data_node = graph_builder_->AddConstNodeWithData(inputs->data[2], bias_tensor); + graph_builder_->AddTensorWithID(inputs->data[2], bias_data_node->GetID(), 0, + /*overwrite=*/true); TF_LITE_ENSURE_STATUS( ComputeMinAndMaxQuantValues(bias_tensor, &bias_min, &bias_max)); } diff --git a/tensorflow/lite/delegates/hexagon/builders/min_max_builder.cc b/tensorflow/lite/delegates/hexagon/builders/min_max_builder.cc index bcfae6032c8..0c6dea2096d 100644 --- a/tensorflow/lite/delegates/hexagon/builders/min_max_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/min_max_builder.cc @@ -27,10 +27,6 @@ TfLiteStatus MinMaxOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, int b_tensor_id = inputs->data[1]; const auto& a_tensor = context->tensors[a_tensor_id]; const auto& b_tensor = context->tensors[b_tensor_id]; - if (a_tensor.allocation_type == kTfLiteMmapRo) - graph_builder_->AddConstNodeWithData(a_tensor_id, a_tensor); - if (b_tensor.allocation_type == kTfLiteMmapRo) - graph_builder_->AddConstNodeWithData(b_tensor_id, b_tensor); AddInput(graph_builder_->GetHexagonTensorId(a_tensor_id)); AddInput(graph_builder_->GetHexagonTensorId(b_tensor_id)); diff --git a/tensorflow/lite/delegates/hexagon/builders/op_builder.cc b/tensorflow/lite/delegates/hexagon/builders/op_builder.cc index 0f32a4de6e1..80aa4c8155c 100644 --- a/tensorflow/lite/delegates/hexagon/builders/op_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/op_builder.cc @@ -18,10 +18,59 @@ limitations under the License. #include "tensorflow/lite/builtin_ops.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/delegates/hexagon/builders/op_factory.h" +#include namespace tflite { namespace delegates { namespace hexagon { +namespace { +// Farmhash Fingerprint +inline uint64_t CombineFingerprints(uint64_t l, uint64_t h) { + // Murmur-inspired hashing. + const uint64_t kMul = 0x9ddfea08eb382d69ULL; + uint64_t a = (l ^ h) * kMul; + a ^= (a >> 47); + uint64_t b = (h ^ a) * kMul; + b ^= (b >> 44); + b *= kMul; + b ^= (b >> 41); + b *= kMul; + return b; +} + +inline uint64_t ComputeHash(const int shape[], const char* data, + const int data_len) { + return CombineFingerprints( + ::util::Fingerprint64(data, data_len), + ::util::Fingerprint64(reinterpret_cast(shape), + sizeof(shape[0]) * 4)); +} + +inline uint64_t ComputeHash(const TfLiteTensor& tensor, const int shape[], + int int8_to_uint8) { + auto data_hash = ComputeHash(shape, tensor.data.raw_const, tensor.bytes); + auto int8_to_uint8_hash = ::util::Fingerprint64( + reinterpret_cast(&int8_to_uint8), sizeof(int8_to_uint8)); + return CombineFingerprints(data_hash, int8_to_uint8_hash); +} + +int GetElementSize(TfLiteType type) { + switch (type) { + case kTfLiteFloat32: + return sizeof(float); + case kTfLiteBool: + return sizeof(bool); + case kTfLiteInt32: + return sizeof(int32_t); + case kTfLiteInt8: + return sizeof(int8_t); + case kTfLiteUInt8: + return sizeof(uint8_t); + default: + return sizeof(int8_t); + } +} +} // namespace OpBuilder* GraphBuilder::CreateOpBuilderFromTfLiteOp(int op_type, TfLiteNode* node) { @@ -116,8 +165,20 @@ OpBuilder* GraphBuilder::CreateOpBuilderFromTfLiteOp(int op_type, } } +OpBuilder* GraphBuilder::LookupConstData(uint64_t cache_key) { + auto lookup_result = cache_.find(cache_key); + if (lookup_result != cache_.end()) return lookup_result->second; + return nullptr; +} + +void GraphBuilder::AddToCache(uint64_t cache_key, OpBuilder* value) { + cache_[cache_key] = value; +} + OpBuilder* GraphBuilder::AddConstNodeWithData(const int shape[], char* data, int data_size) { + auto cache_key = ComputeHash(shape, data, data_size); + if (auto lookup_result = LookupConstData(cache_key)) return lookup_result; builders_.emplace_back(new OpBuilder(this, OP_Const)); builders_.back()->SetConstNode(); builders_.back()->SetNodeId(builders_.size()); @@ -125,22 +186,36 @@ OpBuilder* GraphBuilder::AddConstNodeWithData(const int shape[], char* data, graph_id_, builders_.size(), shape[0], shape[1], shape[2], shape[3], reinterpret_cast(data), data_size); if (error != 0) { - context_->ReportError(context_, "Error adding const node with shape id: %d", - (int)builders_.size()); + TF_LITE_KERNEL_LOG(context_, "Error adding const node with shape id: %d", + static_cast(builders_.size())); return nullptr; } + AddToCache(cache_key, builders_.back().get()); return builders_.back().get(); } OpBuilder* GraphBuilder::AddConstNodeWithData(int tensor_id, const TfLiteTensor& tensor, bool int8_to_uint8) { + // Fetch shape of tensor and pad 1's so it is always 4D. + int batch_size, height_size, width_size, depth_size; + GetDims(&batch_size, &height_size, &width_size, &depth_size, tensor.dims); + const int shape[] = {batch_size, height_size, width_size, depth_size}; + + auto cache_key = ComputeHash(tensor, shape, int8_to_uint8 ? 1 : 0); + if (auto lookup_result = LookupConstData(cache_key)) { + // If tensor is cached but with no id, that can happen when the same + // data is added from a constant value (not tensor). We can cache the data + // and reuse it. + // We assign the tensor to this cached const node before returning. + if (!HasTensor(tensor_id)) + AddTensorWithID(tensor_id, lookup_result->GetID(), 0); + return lookup_result; + } builders_.emplace_back(new OpBuilder(this, OP_Const)); const int node_id = builders_.size(); builders_.back()->SetConstNode(); builders_.back()->SetNodeId(node_id); - int batch_size, height_size, width_size, depth_size; - GetDims(&batch_size, &height_size, &width_size, &depth_size, tensor.dims); int error = hexagon_nn_->hexagon_nn_append_const_node( graph_id_, node_id, batch_size, height_size, width_size, depth_size, reinterpret_cast(tensor.data.raw), tensor.bytes); @@ -150,19 +225,26 @@ OpBuilder* GraphBuilder::AddConstNodeWithData(int tensor_id, return nullptr; } AddTensorWithID(tensor_id, node_id, 0); + // We need to return the builder with result, so we can't rely + // on builders_.back() as it can change while casting, so we hold pointer + // and update with value from casting if needed. + OpBuilder* result_builder = builders_.back().get(); // Cast int8 to uint8 if requested. // This will add cast op to uint8 and update tensor map to point // to the casted tensor. if (int8_to_uint8 && tensor.type == kTfLiteInt8) { - AddCastOp(context_, OP_Quantized_CastInt8ToUInt8, tensor_id); + AddCastOp(context_, OP_Quantized_CastInt8ToUInt8, tensor_id, + &result_builder); } - return builders_.back().get(); + AddToCache(cache_key, result_builder); + return result_builder; } // TODO(b/154604279): Support these casting ops in Hexagon op profiling (which // seems to key tensors on a single op, which may not be the case now). TfLiteStatus GraphBuilder::AddCastOp(TfLiteContext* context, int op_type, - int tensor_id) { + int tensor_id, + OpBuilder** cast_op_builder) { // Create a new OpBuilder for casting the tensor. OpBuilder* cast_builder = CreateCastBuilder(this, op_type); builders_.emplace_back(cast_builder); @@ -177,6 +259,7 @@ TfLiteStatus GraphBuilder::AddCastOp(TfLiteContext* context, int op_type, TF_LITE_ENSURE_STATUS(cast_builder->RegisterOutputs(tensor_data, context)); TfLiteIntArrayFree(tensor_data); + if (cast_op_builder != nullptr) *cast_op_builder = cast_builder; return kTfLiteOk; } @@ -192,12 +275,12 @@ TfLiteStatus GraphBuilder::AddInputTensors(const TfLiteIntArray* input_tensors, const int tensor_id = input_tensors->data[i]; const auto& tensor = context->tensors[tensor_id]; if (tensor.allocation_type == kTfLiteMmapRo) continue; - input_op->AddOutput(tensor.dims); + input_op->AddOutput(tensor.dims, GetElementSize(tensor.type)); AddTensorWithID(tensor_id, input_op->GetID(), num_inputs); // If tensor is of type int8, add an op to cast it to uint8. if (tensor.type == kTfLiteInt8) { - TF_LITE_ENSURE_STATUS( - AddCastOp(context, OP_Quantized_CastInt8ToUInt8, tensor_id)); + TF_LITE_ENSURE_STATUS(AddCastOp(context, OP_Quantized_CastInt8ToUInt8, + tensor_id, /*cast_op_builder=*/nullptr)); } ++num_inputs; } @@ -215,8 +298,8 @@ TfLiteStatus GraphBuilder::AddOutputTensors( const auto& tensor = context->tensors[tensor_id]; // If tensor is of type int8, add an op to cast it to uint8. if (tensor.type == kTfLiteInt8) { - TF_LITE_ENSURE_STATUS( - AddCastOp(context, OP_Quantized_CastUInt8ToInt8, tensor_id)); + TF_LITE_ENSURE_STATUS(AddCastOp(context, OP_Quantized_CastUInt8ToInt8, + tensor_id, /*cast_op_builder=*/nullptr)); } hexagon_output_ids.push_back(GetHexagonTensorId(tensor_id)); } @@ -231,9 +314,10 @@ TfLiteStatus GraphBuilder::AddOutputTensors( return kTfLiteOk; } -OpBuilder::TensorID OpBuilder::AddOutput(const TfLiteIntArray* dims) { +OpBuilder::TensorID OpBuilder::AddOutput(const TfLiteIntArray* dims, + int element_size) { op_node_.outputs.push_back(hexagon_nn_output()); - op_node_.outputs.back().elementsize = sizeof(uint8_t); + op_node_.outputs.back().elementsize = element_size; op_node_.outputs.back().rank = 4; // TODO(karimnosseir): What is a good to estimate the max size ? int batch_size, height_size, width_size, depth_size; diff --git a/tensorflow/lite/delegates/hexagon/builders/op_builder.h b/tensorflow/lite/delegates/hexagon/builders/op_builder.h index 52b130c756f..c2a2889b142 100644 --- a/tensorflow/lite/delegates/hexagon/builders/op_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/op_builder.h @@ -16,6 +16,7 @@ limitations under the License. #define TENSORFLOW_LITE_DELEGATES_HEXAGON_BUILDERS_OP_BUILDER_H_ #include +#include #include #include #include @@ -131,9 +132,9 @@ class OpBuilder { void AddInput(const TensorID& tensor_id) { input_ids_.push_back(tensor_id); } // Adds Output to the current node, the output has shape defined in 'dims'. - // This assumes the data type is uint8. + // The size of each element is defined using 'element_size'. // Returns the TensorID identifying this output in the graph. - TensorID AddOutput(const TfLiteIntArray* dims); + TensorID AddOutput(const TfLiteIntArray* dims, int element_size); // Adds Output to the current node, each element in the output has // size 'elementsize' and rank 'rank' and for each dimension in the output @@ -316,11 +317,22 @@ class GraphBuilder { bool AddTensorWithID(int tflite_tensor_id, int hexagon_node_id, int hexagon_node_output_id, bool overwrite = false) { if (!overwrite && HasTensor(tflite_tensor_id)) { + TF_LITE_KERNEL_LOG( + context_, + "Trying to add duplicate tensor without overwrite, tflite_tensor_id " + "%d, hexagon_node_id %d, hexagon_node_output_id %d", + tflite_tensor_id, hexagon_node_id, hexagon_node_output_id); return false; } if (tensors_.size() <= tflite_tensor_id) { tensors_.resize(tflite_tensor_id + 1); } + if (hexagon_node_id == -1 || hexagon_node_output_id == -1) + TF_LITE_KERNEL_LOG(context_, + "Trying to add invalid id, tflite_tensor_id " + "%d, hexagon_node_id %d, hexagon_node_output_id %d", + tflite_tensor_id, hexagon_node_id, + hexagon_node_output_id); tensors_[tflite_tensor_id] = OpBuilder::TensorID(hexagon_node_id, hexagon_node_output_id); return true; @@ -348,6 +360,14 @@ class GraphBuilder { int GetMaxBatchSize() const { return max_size_for_batch_; } private: + // Lookup in cache if data with key 'cache_key' is present. + // Return OpBuilder* for the data if found, nullptr otherwise. + OpBuilder* LookupConstData(uint64_t cache_key); + + // Inserts 'value' in cache, with key equals 'cache_key'. + // If data in cache with same key then it will be overwritten. + void AddToCache(uint64_t cache_key, OpBuilder* value); + // Helper method to fetch dimensions. // TODO(karimnosseir): Move this method to shared place. void GetDims(int* batch_size, int* height_size, int* width_size, @@ -360,7 +380,10 @@ class GraphBuilder { } // Adds a Cast op to convert a tensor from int8 to uint8 (or vice versa). - TfLiteStatus AddCastOp(TfLiteContext* context, int op_type, int tensor_id); + // The builder which has the casting operator is filled in 'cast_op_builder' + // if not nullptr. + TfLiteStatus AddCastOp(TfLiteContext* context, int op_type, int tensor_id, + OpBuilder** cast_op_builder); const HexagonNN* hexagon_nn_ = nullptr; TfLiteContext* context_ = nullptr; @@ -373,6 +396,11 @@ class GraphBuilder { // If the graph being built supports dynamic batch, this represents // the maximum value for batch. int max_size_for_batch_ = -1; + + // Cache for const data in the graph. + // Key is hash of the data, value is pointer to the OpBuilder* for the added + // data. + std::map cache_; }; } // namespace hexagon diff --git a/tensorflow/lite/delegates/hexagon/builders/transpose_builder.cc b/tensorflow/lite/delegates/hexagon/builders/transpose_builder.cc index 4a7304d011e..eb0c2668edc 100644 --- a/tensorflow/lite/delegates/hexagon/builders/transpose_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/transpose_builder.cc @@ -29,15 +29,7 @@ TfLiteStatus TransposeOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, const auto& input_tensor = context->tensors[tensor_id]; AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); // permutation tensor. - tensor_id = inputs->data[1]; - const auto& control_tensor = context->tensors[tensor_id]; - if (control_tensor.allocation_type == kTfLiteMmapRo) { - auto* const_control_tensor_node = - graph_builder_->AddConstNodeWithData(tensor_id, control_tensor); - AddInput(TensorID(const_control_tensor_node->GetID(), 0)); - } else { - AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); - } + AddInput(graph_builder_->GetHexagonTensorId(inputs->data[1])); TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, input_tensor)); diff --git a/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.cc b/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.cc index d2620f71007..3e852533394 100644 --- a/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.cc @@ -97,8 +97,6 @@ TfLiteStatus TransposeConv2dOpBuilder::PopulateSubGraph( filter_depth_size; GetDims(&filter_batch_size, &filter_height_size, &filter_width_size, &filter_depth_size, weights_tensor.dims); - weight_shape_ = {filter_batch_size, filter_height_size, filter_width_size, - filter_depth_size}; // Weights tensor could be int8 even for per-tensor quantization. // Therefore, we look at the number of scale values to check if it is // per-channel quantized. @@ -106,25 +104,7 @@ TfLiteStatus TransposeConv2dOpBuilder::PopulateSubGraph( reinterpret_cast( weights_tensor.quantization.params); const bool is_per_channel_quant = weights_quant_params->scale->size > 1; - - OpBuilder* const_weights_node; - if (weights_tensor.type == kTfLiteInt8) { - std::vector weights_data(NumElements(&weights_tensor)); - const int8_t* original_data = weights_tensor.data.int8; - // Flip bits on the weight values so that the int8 values are treated - // as uint8. - for (int i = 0; i < NumElements(&weights_tensor); ++i) { - weights_data[i] = original_data[i] ^ k8BitSignFlipConstant; - } - const_weights_node = graph_builder_->AddConstNodeWithData( - weight_shape_.data(), reinterpret_cast(weights_data.data()), - weights_data.size() * sizeof(weights_data[0])); - } else { - const_weights_node = graph_builder_->AddConstNodeWithData( - weight_shape_.data(), weights_tensor.data.raw, weights_tensor.bytes); - } - graph_builder_->AddTensorWithID(tensor_id, const_weights_node->GetID(), 0); - AddInput(TensorID(const_weights_node->GetID(), 0)); + AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); // Handle weights quantization. float weights_min = 0; diff --git a/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.h b/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.h index 0a6a90a0297..4afab9894f0 100644 --- a/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.h @@ -47,7 +47,7 @@ class TransposeConv2dOpBuilder : public OpBuilder { TensorID node_output_; std::vector transposed_weights_; std::vector stride_shape_; - std::vector weight_shape_, bias_shape_; + std::vector bias_shape_; std::vector bias_data_; // Non-null only if node has per-channel quantized weights/biases. diff --git a/tensorflow/lite/delegates/hexagon/hexagon_delegate_kernel.cc b/tensorflow/lite/delegates/hexagon/hexagon_delegate_kernel.cc index cdf6b555929..83ebc15510e 100644 --- a/tensorflow/lite/delegates/hexagon/hexagon_delegate_kernel.cc +++ b/tensorflow/lite/delegates/hexagon/hexagon_delegate_kernel.cc @@ -264,8 +264,9 @@ TfLiteStatus HexagonDelegateKernel::BuildGraph( if (tensor_id == -1) continue; const auto& input_tensor = context->tensors[tensor_id]; if (input_tensor.allocation_type == kTfLiteMmapRo) { - builder_->AddConstNodeWithData(tensor_id, input_tensor, - /*int8_to_uint8*/ true); + builder_->AddConstNodeWithData( + tensor_id, input_tensor, + /*int8_to_uint8*/ (input_tensor.type == kTfLiteInt8)); } } auto* op_builder = From e38601bb2d8fde60ad333483ebf44a05dff87624 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Fri, 7 Aug 2020 10:14:51 -0700 Subject: [PATCH 2387/2522] Parallel tf.AddN Clarify range --- .../mlir/tensorflow/tests/lower_tf.mlir | 18 ++++++-- .../mlir/tensorflow/transforms/lower_tf.cc | 45 ++++++++++++++++--- 2 files changed, 52 insertions(+), 11 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir b/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir index e7e07845fcc..23fdddc0eb7 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir @@ -353,15 +353,25 @@ func @ZerosLike_variant(%arg0: tensor>>) -> tensor>> } -// CHECK-LABEL: func @addN -func @addN(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>, %arg2: tensor<*xf32>) -> tensor<*xf32> { - // CHECK: %[[SUM0:.*]] = "tf.AddV2"(%arg0, %arg1) - // CHECK: %[[SUM1:.*]] = "tf.AddV2"(%[[SUM0]], %arg2) +// CHECK-LABEL: func @addN_3 +func @addN_3(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>, %arg2: tensor<*xf32>) -> tensor<*xf32> { + // CHECK: %[[SUM0:.*]] = "tf.AddV2"(%arg1, %arg2) + // CHECK: %[[SUM1:.*]] = "tf.AddV2"(%arg0, %[[SUM0]]) // return %[[SUM1]] %0 = "tf.AddN"(%arg0, %arg1, %arg2) : (tensor<*xf32>, tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32> return %0 : tensor<*xf32> } +// CHECK-LABEL: func @addN_4 +func @addN_4(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>, %arg2: tensor<*xf32>, %arg3: tensor<*xf32>) -> tensor<*xf32> { + // CHECK: %[[SUM0:.*]] = "tf.AddV2"(%arg0, %arg1) + // CHECK: %[[SUM1:.*]] = "tf.AddV2"(%arg2, %arg3) + // CHECK: %[[SUM2:.*]] = "tf.AddV2"(%[[SUM0]], %[[SUM1]]) + // return %[[SUM2]] + %0 = "tf.AddN"(%arg0, %arg1, %arg2, %arg3) : (tensor<*xf32>, tensor<*xf32>, tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32> + return %0 : tensor<*xf32> +} + // CHECK-LABEL: func @addN_variant func @addN_variant(%arg0: tensor>>, %arg1: tensor>>, %arg2: tensor>>) -> tensor>> { // CHECK: tf.AddN diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc index d67739a739b..f853d8bd1fa 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc @@ -113,12 +113,27 @@ Type InferExpandDimsType(Type ty, int64_t axis, Builder *builder) { // Lowers AddN op to a sequence of AddV2 ops to accumulate operands. // +// Note that to improve the parallelism, the operands are split +// into two halves, and are accumulated first. +// +// Example: +// // %result = "tf.AddN"(%0, %1, %2) // // is lowered to: // -// %sum_0 = "tf.AddV2"(%0, %1) -// %result = "tf.AddV2"(%sum_0, %2) +// %sum_right = "tf.AddV2"(%1, %2) +// %result = "tf.AddV2"(%0, %sum_right) +// +// Or +// +// %result = "tf.AddN"(%0, %1, %2, %3) +// +// is lowered to: +// +// %sum_left = "tf.AddV2"(%0, %1) +// %sum_right = "tf.AddV2"(%2, %2) +// %result = "tf.AddV2"(%sum_left, %sum_right) // class LowerAddNOp : public OpRewritePattern { public: @@ -131,13 +146,29 @@ class LowerAddNOp : public OpRewritePattern { // support variant type so variant types require special handling. if (getElementTypeOrSelf(op.getType()).isa()) return failure(); - // TODO(hinsu): Improve parallelism by splitting operands in two halves and - // accumulating them first. - Value result = *op.inputs().begin(); - for (Value operand : llvm::drop_begin(op.inputs(), 1)) { - result = rewriter.create(op.getLoc(), result, operand); + auto begin = op.inputs().begin(); + // Return the only operand directly. + if (op.N() == 1) { + rewriter.replaceOp(op, *begin); + return success(); } + // Helper functor to accumulate from `begin` to `end` (exclusive). + auto accumulate_add = [&rewriter, &op] (auto begin, auto end) -> Value { + Value result = *begin; + ++begin; + for (auto operand = begin; operand != end; ++operand) { + result = rewriter.create(op.getLoc(), result, *operand); + } + return result; + }; + + // Accumulate range `[begin, half)` and `[half, end)`, + // and add the results of two halves. + auto half = begin + op.N() / 2; + Value left = accumulate_add(begin, half); + Value right = accumulate_add(half, op.inputs().end()); + Value result = rewriter.create(op.getLoc(), left, right); rewriter.replaceOp(op, result); return success(); } From be5728d9362a1259d9e5126146782b2d3dbde2ea Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Fri, 7 Aug 2020 16:46:32 -0700 Subject: [PATCH 2388/2522] Use tree-based reduction Fix comment Update index Remove trivial word Use only i to index --- .../mlir/tensorflow/tests/lower_tf.mlir | 23 +++++- .../mlir/tensorflow/transforms/lower_tf.cc | 70 +++++++++++-------- 2 files changed, 60 insertions(+), 33 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir b/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir index 23fdddc0eb7..e11474c0755 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir @@ -353,10 +353,18 @@ func @ZerosLike_variant(%arg0: tensor>>) -> tensor>> } +// CHECK-LABEL: func @addN_2 +func @addN_2(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>) -> tensor<*xf32> { + // CHECK: %[[SUM0:.*]] = "tf.AddV2"(%arg0, %arg1) + // return %[[SUM0]] + %0 = "tf.AddN"(%arg0, %arg1) : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32> + return %0 : tensor<*xf32> +} + // CHECK-LABEL: func @addN_3 func @addN_3(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>, %arg2: tensor<*xf32>) -> tensor<*xf32> { - // CHECK: %[[SUM0:.*]] = "tf.AddV2"(%arg1, %arg2) - // CHECK: %[[SUM1:.*]] = "tf.AddV2"(%arg0, %[[SUM0]]) + // CHECK: %[[SUM0:.*]] = "tf.AddV2"(%arg0, %arg1) + // CHECK: %[[SUM1:.*]] = "tf.AddV2"(%[[SUM0]], %arg2) // return %[[SUM1]] %0 = "tf.AddN"(%arg0, %arg1, %arg2) : (tensor<*xf32>, tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32> return %0 : tensor<*xf32> @@ -372,6 +380,17 @@ func @addN_4(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>, %arg2: tensor<*xf32>, % return %0 : tensor<*xf32> } +// CHECK-LABEL: func @addN_5 +func @addN_5(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>, %arg2: tensor<*xf32>, %arg3: tensor<*xf32>, %arg4: tensor<*xf32>) -> tensor<*xf32> { + // CHECK: %[[SUM0:.*]] = "tf.AddV2"(%arg0, %arg1) + // CHECK: %[[SUM1:.*]] = "tf.AddV2"(%arg2, %arg3) + // CHECK: %[[SUM2:.*]] = "tf.AddV2"(%[[SUM0]], %[[SUM1]]) + // CHECK: %[[SUM3:.*]] = "tf.AddV2"(%[[SUM2]], %arg4) + // return %[[SUM3]] + %0 = "tf.AddN"(%arg0, %arg1, %arg2, %arg3, %arg4) : (tensor<*xf32>, tensor<*xf32>, tensor<*xf32>, tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32> + return %0 : tensor<*xf32> +} + // CHECK-LABEL: func @addN_variant func @addN_variant(%arg0: tensor>>, %arg1: tensor>>, %arg2: tensor>>) -> tensor>> { // CHECK: tf.AddN diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc index f853d8bd1fa..483c84b3e80 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc @@ -113,8 +113,22 @@ Type InferExpandDimsType(Type ty, int64_t axis, Builder *builder) { // Lowers AddN op to a sequence of AddV2 ops to accumulate operands. // -// Note that to improve the parallelism, the operands are split -// into two halves, and are accumulated first. +// Note that to improve the parallelism, AddN op uses tree-based reduction. +// For example, tf.AddN([0, 1, 2, 3, 4]) behaves as follows: +// +// 0 1 2 3 4 +// | | | | | +// ------- ------- | +// | | | +// 5 6 | +// | | | +// ------------- | +// | | +// 7 | +// | | +// ---------------- +// | +// 8 // // Example: // @@ -122,18 +136,19 @@ Type InferExpandDimsType(Type ty, int64_t axis, Builder *builder) { // // is lowered to: // -// %sum_right = "tf.AddV2"(%1, %2) -// %result = "tf.AddV2"(%0, %sum_right) +// %sum0 = "tf.AddV2"(%0, %1) +// %result = "tf.AddV2"(%sum0, %2) // -// Or +// While // -// %result = "tf.AddN"(%0, %1, %2, %3) +// %result = "tf.AddN"(%0, %1, %2, %3, %4) // // is lowered to: // -// %sum_left = "tf.AddV2"(%0, %1) -// %sum_right = "tf.AddV2"(%2, %2) -// %result = "tf.AddV2"(%sum_left, %sum_right) +// %sum0 = "tf.AddV2"(%0, %1) +// %sum1 = "tf.AddV2"(%2, %3) +// %sum2 = "tf.AddV2"(%sum0, %sum1) +// %result = "tf.AddV2"(%sum2, %4) // class LowerAddNOp : public OpRewritePattern { public: @@ -146,30 +161,23 @@ class LowerAddNOp : public OpRewritePattern { // support variant type so variant types require special handling. if (getElementTypeOrSelf(op.getType()).isa()) return failure(); - auto begin = op.inputs().begin(); - // Return the only operand directly. - if (op.N() == 1) { - rewriter.replaceOp(op, *begin); - return success(); + llvm::SmallVector operands(op.inputs().begin(), + op.inputs().end()); + + int64_t n = operands.size(); + // Keep doing tree-based reduction when there are more than one operand. + while (n > 1) { + for (int64_t i = 0; i < n; i += 2) { + // Add two adjacent operands if applicable. + operands[i / 2] = (i + 1 < n) + ? rewriter.create( + op.getLoc(), operands[i], operands[i + 1]) + : operands[i]; + } + n = (n + 1) / 2; } - // Helper functor to accumulate from `begin` to `end` (exclusive). - auto accumulate_add = [&rewriter, &op] (auto begin, auto end) -> Value { - Value result = *begin; - ++begin; - for (auto operand = begin; operand != end; ++operand) { - result = rewriter.create(op.getLoc(), result, *operand); - } - return result; - }; - - // Accumulate range `[begin, half)` and `[half, end)`, - // and add the results of two halves. - auto half = begin + op.N() / 2; - Value left = accumulate_add(begin, half); - Value right = accumulate_add(half, op.inputs().end()); - Value result = rewriter.create(op.getLoc(), left, right); - rewriter.replaceOp(op, result); + rewriter.replaceOp(op, operands[0]); return success(); } }; From 902abbe41e198d58446896dacc4f00bc5e7bac7d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 7 Aug 2020 20:12:44 -0700 Subject: [PATCH 2389/2522] Add cache for const nodes in hexagon delegate. On few test models this reduces the size of const nodes by half, which will reduce graph preparation time. Bug fix for sometimes wrong casting. Remove some redundant const nodes. PiperOrigin-RevId: 325554729 Change-Id: I805afc4151d99a1ed2f739accc7f86d6336b14b7 --- .../lite/delegates/hexagon/builders/BUILD | 1 - .../hexagon/builders/conv_2d_builder.cc | 8 +- .../hexagon/builders/conv_2d_builder.h | 2 + .../hexagon/builders/conv_2d_helpers.cc | 19 ++- .../hexagon/builders/min_max_builder.cc | 4 + .../delegates/hexagon/builders/op_builder.cc | 112 +++--------------- .../delegates/hexagon/builders/op_builder.h | 34 +----- .../hexagon/builders/transpose_builder.cc | 10 +- .../builders/transpose_conv_2d_builder.cc | 22 +++- .../builders/transpose_conv_2d_builder.h | 2 +- .../hexagon/hexagon_delegate_kernel.cc | 5 +- 11 files changed, 67 insertions(+), 152 deletions(-) diff --git a/tensorflow/lite/delegates/hexagon/builders/BUILD b/tensorflow/lite/delegates/hexagon/builders/BUILD index ef4b0e957c1..63ff274c7b7 100644 --- a/tensorflow/lite/delegates/hexagon/builders/BUILD +++ b/tensorflow/lite/delegates/hexagon/builders/BUILD @@ -85,7 +85,6 @@ cc_library( "//tensorflow/lite/kernels:padding", "//tensorflow/lite/kernels/internal:optimized_base", "//tensorflow/lite/kernels/internal:tensor", - "@farmhash_archive//:farmhash", "@hexagon_nn//:hexagon_nn_ops", ], ) diff --git a/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.cc b/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.cc index c6d20004227..cfddd2c2b97 100644 --- a/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.cc @@ -267,13 +267,13 @@ TfLiteStatus Conv2dOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, auto* conv_op = graph_builder_->AddNode(GetTFLiteNodeID()); conv_op->SetOpType(OP_DepthwiseSupernode_8x8p32to8); conv_op->AddInput(space_to_batch_op_out); - conv_op->AddInput(graph_builder_->GetHexagonTensorId(inputs->data[1])); + conv_op->AddInput(TensorID(weights_data_node_->GetID(), 0)); conv_op->AddInput(TensorID(data_min_const->GetID(), 0)); conv_op->AddInput(TensorID(data_max_const->GetID(), 0)); conv_op->AddInput(TensorID(weights_min_node_->GetID(), 0)); conv_op->AddInput(TensorID(weights_max_node_->GetID(), 0)); conv_op->AddInput(TensorID(stride_node->GetID(), 0)); - conv_op->AddInput(graph_builder_->GetHexagonTensorId(inputs->data[2])); + conv_op->AddInput(TensorID(bias_data_node_->GetID(), 0)); conv_op->AddInput(TensorID(bias_min_node_->GetID(), 0)); conv_op->AddInput(TensorID(bias_max_node_->GetID(), 0)); conv_op->AddInput(TensorID(conv_output_min_const->GetID(), 0)); @@ -330,13 +330,13 @@ TfLiteStatus Conv2dOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, } // Inputs AddInput(graph_builder_->GetHexagonTensorId(inputs->data[0])); - AddInput(graph_builder_->GetHexagonTensorId(inputs->data[1])); + AddInput(TensorID(weights_data_node_->GetID(), 0)); AddInput(TensorID(data_min_const->GetID(), 0)); AddInput(TensorID(data_max_const->GetID(), 0)); AddInput(TensorID(weights_min_node_->GetID(), 0)); AddInput(TensorID(weights_max_node_->GetID(), 0)); AddInput(TensorID(stride_node->GetID(), 0)); - AddInput(graph_builder_->GetHexagonTensorId(inputs->data[2])); + AddInput(TensorID(bias_data_node_->GetID(), 0)); AddInput(TensorID(bias_min_node_->GetID(), 0)); AddInput(TensorID(bias_max_node_->GetID(), 0)); AddInput(TensorID(conv_output_min_const->GetID(), 0)); diff --git a/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.h b/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.h index 1407f06154b..4980b294481 100644 --- a/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.h @@ -62,8 +62,10 @@ class Conv2dOpBuilder : public OpBuilder { std::vector transposed_weights_; std::vector stride_shape_; std::vector weight_shape_; + OpBuilder* weights_data_node_ = nullptr; OpBuilder* weights_min_node_ = nullptr; OpBuilder* weights_max_node_ = nullptr; + OpBuilder* bias_data_node_ = nullptr; OpBuilder* bias_min_node_ = nullptr; OpBuilder* bias_max_node_ = nullptr; diff --git a/tensorflow/lite/delegates/hexagon/builders/conv_2d_helpers.cc b/tensorflow/lite/delegates/hexagon/builders/conv_2d_helpers.cc index b33e28f4e71..bf68bbe5a25 100644 --- a/tensorflow/lite/delegates/hexagon/builders/conv_2d_helpers.cc +++ b/tensorflow/lite/delegates/hexagon/builders/conv_2d_helpers.cc @@ -106,7 +106,6 @@ TfLiteStatus Conv2dOpBuilder::InitializeWeightsNodes( const bool is_per_channel_quant = weights_quant_params->scale->size > 1; // WEIGHTS DATA. - OpBuilder* weights_data_node = nullptr; if (op_node_.op_type == OP_Supernode_8x8p32to8) { // Hexagon lib expects the weight tensor in HWCN, TFLite uses NHWC. // Transpose NHWC -> HWCN @@ -138,7 +137,7 @@ TfLiteStatus Conv2dOpBuilder::InitializeWeightsNodes( weights_tensor.data.uint8, hwcn_shape, hwcn.data()); } - weights_data_node = graph_builder_->AddConstNodeWithData( + weights_data_node_ = graph_builder_->AddConstNodeWithData( weight_shape_.data(), reinterpret_cast(hwcn.data()), hwcn.size() * sizeof(hwcn[0])); } else if (op_node_.op_type == OP_DepthwiseSupernode_8x8p32to8) { @@ -157,17 +156,17 @@ TfLiteStatus Conv2dOpBuilder::InitializeWeightsNodes( for (int i = 0; i < converted_data.size(); ++i) { converted_data[i] = weights_tensor.data.int8[i] ^ k8BitSignFlipConstant; } - weights_data_node = graph_builder_->AddConstNodeWithData( + weights_data_node_ = graph_builder_->AddConstNodeWithData( weight_shape_.data(), reinterpret_cast(converted_data.data()), converted_data.size() * sizeof(converted_data[0])); } else { - weights_data_node = graph_builder_->AddConstNodeWithData( + weights_data_node_ = graph_builder_->AddConstNodeWithData( weight_shape_.data(), weights_tensor.data.raw, NumElements(&weights_tensor) * sizeof(weights_tensor.data.uint8[0])); } } - graph_builder_->AddTensorWithID(inputs->data[1], weights_data_node->GetID(), - 0, /*overwrite=*/true); + graph_builder_->AddTensorWithID(inputs->data[1], weights_data_node_->GetID(), + 0); // WEIGHTS QUANTIZATION. float weights_min = 0; @@ -230,11 +229,9 @@ TfLiteStatus Conv2dOpBuilder::ProcessPerChannelQuantizedBias( } // Add nodes for bias. const std::vector bias_shape = {1, 1, 1, bias_size}; - auto* bias_data_node = graph_builder_->AddConstNodeWithData( + bias_data_node_ = graph_builder_->AddConstNodeWithData( bias_shape.data(), reinterpret_cast(preprocessed_bias_data.data()), preprocessed_bias_data.size() * sizeof(preprocessed_bias_data[0])); - graph_builder_->AddTensorWithID(inputs->data[2], bias_data_node->GetID(), 0, - /*overwrite=*/true); return kTfLiteOk; } @@ -251,10 +248,8 @@ TfLiteStatus Conv2dOpBuilder::InitializeBiasNodes(const TfLiteIntArray* inputs, ProcessPerChannelQuantizedBias(inputs, outputs, context, &bias_min, &bias_max); } else { - auto* bias_data_node = + bias_data_node_ = graph_builder_->AddConstNodeWithData(inputs->data[2], bias_tensor); - graph_builder_->AddTensorWithID(inputs->data[2], bias_data_node->GetID(), 0, - /*overwrite=*/true); TF_LITE_ENSURE_STATUS( ComputeMinAndMaxQuantValues(bias_tensor, &bias_min, &bias_max)); } diff --git a/tensorflow/lite/delegates/hexagon/builders/min_max_builder.cc b/tensorflow/lite/delegates/hexagon/builders/min_max_builder.cc index 0c6dea2096d..bcfae6032c8 100644 --- a/tensorflow/lite/delegates/hexagon/builders/min_max_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/min_max_builder.cc @@ -27,6 +27,10 @@ TfLiteStatus MinMaxOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, int b_tensor_id = inputs->data[1]; const auto& a_tensor = context->tensors[a_tensor_id]; const auto& b_tensor = context->tensors[b_tensor_id]; + if (a_tensor.allocation_type == kTfLiteMmapRo) + graph_builder_->AddConstNodeWithData(a_tensor_id, a_tensor); + if (b_tensor.allocation_type == kTfLiteMmapRo) + graph_builder_->AddConstNodeWithData(b_tensor_id, b_tensor); AddInput(graph_builder_->GetHexagonTensorId(a_tensor_id)); AddInput(graph_builder_->GetHexagonTensorId(b_tensor_id)); diff --git a/tensorflow/lite/delegates/hexagon/builders/op_builder.cc b/tensorflow/lite/delegates/hexagon/builders/op_builder.cc index 80aa4c8155c..0f32a4de6e1 100644 --- a/tensorflow/lite/delegates/hexagon/builders/op_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/op_builder.cc @@ -18,59 +18,10 @@ limitations under the License. #include "tensorflow/lite/builtin_ops.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/delegates/hexagon/builders/op_factory.h" -#include namespace tflite { namespace delegates { namespace hexagon { -namespace { -// Farmhash Fingerprint -inline uint64_t CombineFingerprints(uint64_t l, uint64_t h) { - // Murmur-inspired hashing. - const uint64_t kMul = 0x9ddfea08eb382d69ULL; - uint64_t a = (l ^ h) * kMul; - a ^= (a >> 47); - uint64_t b = (h ^ a) * kMul; - b ^= (b >> 44); - b *= kMul; - b ^= (b >> 41); - b *= kMul; - return b; -} - -inline uint64_t ComputeHash(const int shape[], const char* data, - const int data_len) { - return CombineFingerprints( - ::util::Fingerprint64(data, data_len), - ::util::Fingerprint64(reinterpret_cast(shape), - sizeof(shape[0]) * 4)); -} - -inline uint64_t ComputeHash(const TfLiteTensor& tensor, const int shape[], - int int8_to_uint8) { - auto data_hash = ComputeHash(shape, tensor.data.raw_const, tensor.bytes); - auto int8_to_uint8_hash = ::util::Fingerprint64( - reinterpret_cast(&int8_to_uint8), sizeof(int8_to_uint8)); - return CombineFingerprints(data_hash, int8_to_uint8_hash); -} - -int GetElementSize(TfLiteType type) { - switch (type) { - case kTfLiteFloat32: - return sizeof(float); - case kTfLiteBool: - return sizeof(bool); - case kTfLiteInt32: - return sizeof(int32_t); - case kTfLiteInt8: - return sizeof(int8_t); - case kTfLiteUInt8: - return sizeof(uint8_t); - default: - return sizeof(int8_t); - } -} -} // namespace OpBuilder* GraphBuilder::CreateOpBuilderFromTfLiteOp(int op_type, TfLiteNode* node) { @@ -165,20 +116,8 @@ OpBuilder* GraphBuilder::CreateOpBuilderFromTfLiteOp(int op_type, } } -OpBuilder* GraphBuilder::LookupConstData(uint64_t cache_key) { - auto lookup_result = cache_.find(cache_key); - if (lookup_result != cache_.end()) return lookup_result->second; - return nullptr; -} - -void GraphBuilder::AddToCache(uint64_t cache_key, OpBuilder* value) { - cache_[cache_key] = value; -} - OpBuilder* GraphBuilder::AddConstNodeWithData(const int shape[], char* data, int data_size) { - auto cache_key = ComputeHash(shape, data, data_size); - if (auto lookup_result = LookupConstData(cache_key)) return lookup_result; builders_.emplace_back(new OpBuilder(this, OP_Const)); builders_.back()->SetConstNode(); builders_.back()->SetNodeId(builders_.size()); @@ -186,36 +125,22 @@ OpBuilder* GraphBuilder::AddConstNodeWithData(const int shape[], char* data, graph_id_, builders_.size(), shape[0], shape[1], shape[2], shape[3], reinterpret_cast(data), data_size); if (error != 0) { - TF_LITE_KERNEL_LOG(context_, "Error adding const node with shape id: %d", - static_cast(builders_.size())); + context_->ReportError(context_, "Error adding const node with shape id: %d", + (int)builders_.size()); return nullptr; } - AddToCache(cache_key, builders_.back().get()); return builders_.back().get(); } OpBuilder* GraphBuilder::AddConstNodeWithData(int tensor_id, const TfLiteTensor& tensor, bool int8_to_uint8) { - // Fetch shape of tensor and pad 1's so it is always 4D. - int batch_size, height_size, width_size, depth_size; - GetDims(&batch_size, &height_size, &width_size, &depth_size, tensor.dims); - const int shape[] = {batch_size, height_size, width_size, depth_size}; - - auto cache_key = ComputeHash(tensor, shape, int8_to_uint8 ? 1 : 0); - if (auto lookup_result = LookupConstData(cache_key)) { - // If tensor is cached but with no id, that can happen when the same - // data is added from a constant value (not tensor). We can cache the data - // and reuse it. - // We assign the tensor to this cached const node before returning. - if (!HasTensor(tensor_id)) - AddTensorWithID(tensor_id, lookup_result->GetID(), 0); - return lookup_result; - } builders_.emplace_back(new OpBuilder(this, OP_Const)); const int node_id = builders_.size(); builders_.back()->SetConstNode(); builders_.back()->SetNodeId(node_id); + int batch_size, height_size, width_size, depth_size; + GetDims(&batch_size, &height_size, &width_size, &depth_size, tensor.dims); int error = hexagon_nn_->hexagon_nn_append_const_node( graph_id_, node_id, batch_size, height_size, width_size, depth_size, reinterpret_cast(tensor.data.raw), tensor.bytes); @@ -225,26 +150,19 @@ OpBuilder* GraphBuilder::AddConstNodeWithData(int tensor_id, return nullptr; } AddTensorWithID(tensor_id, node_id, 0); - // We need to return the builder with result, so we can't rely - // on builders_.back() as it can change while casting, so we hold pointer - // and update with value from casting if needed. - OpBuilder* result_builder = builders_.back().get(); // Cast int8 to uint8 if requested. // This will add cast op to uint8 and update tensor map to point // to the casted tensor. if (int8_to_uint8 && tensor.type == kTfLiteInt8) { - AddCastOp(context_, OP_Quantized_CastInt8ToUInt8, tensor_id, - &result_builder); + AddCastOp(context_, OP_Quantized_CastInt8ToUInt8, tensor_id); } - AddToCache(cache_key, result_builder); - return result_builder; + return builders_.back().get(); } // TODO(b/154604279): Support these casting ops in Hexagon op profiling (which // seems to key tensors on a single op, which may not be the case now). TfLiteStatus GraphBuilder::AddCastOp(TfLiteContext* context, int op_type, - int tensor_id, - OpBuilder** cast_op_builder) { + int tensor_id) { // Create a new OpBuilder for casting the tensor. OpBuilder* cast_builder = CreateCastBuilder(this, op_type); builders_.emplace_back(cast_builder); @@ -259,7 +177,6 @@ TfLiteStatus GraphBuilder::AddCastOp(TfLiteContext* context, int op_type, TF_LITE_ENSURE_STATUS(cast_builder->RegisterOutputs(tensor_data, context)); TfLiteIntArrayFree(tensor_data); - if (cast_op_builder != nullptr) *cast_op_builder = cast_builder; return kTfLiteOk; } @@ -275,12 +192,12 @@ TfLiteStatus GraphBuilder::AddInputTensors(const TfLiteIntArray* input_tensors, const int tensor_id = input_tensors->data[i]; const auto& tensor = context->tensors[tensor_id]; if (tensor.allocation_type == kTfLiteMmapRo) continue; - input_op->AddOutput(tensor.dims, GetElementSize(tensor.type)); + input_op->AddOutput(tensor.dims); AddTensorWithID(tensor_id, input_op->GetID(), num_inputs); // If tensor is of type int8, add an op to cast it to uint8. if (tensor.type == kTfLiteInt8) { - TF_LITE_ENSURE_STATUS(AddCastOp(context, OP_Quantized_CastInt8ToUInt8, - tensor_id, /*cast_op_builder=*/nullptr)); + TF_LITE_ENSURE_STATUS( + AddCastOp(context, OP_Quantized_CastInt8ToUInt8, tensor_id)); } ++num_inputs; } @@ -298,8 +215,8 @@ TfLiteStatus GraphBuilder::AddOutputTensors( const auto& tensor = context->tensors[tensor_id]; // If tensor is of type int8, add an op to cast it to uint8. if (tensor.type == kTfLiteInt8) { - TF_LITE_ENSURE_STATUS(AddCastOp(context, OP_Quantized_CastUInt8ToInt8, - tensor_id, /*cast_op_builder=*/nullptr)); + TF_LITE_ENSURE_STATUS( + AddCastOp(context, OP_Quantized_CastUInt8ToInt8, tensor_id)); } hexagon_output_ids.push_back(GetHexagonTensorId(tensor_id)); } @@ -314,10 +231,9 @@ TfLiteStatus GraphBuilder::AddOutputTensors( return kTfLiteOk; } -OpBuilder::TensorID OpBuilder::AddOutput(const TfLiteIntArray* dims, - int element_size) { +OpBuilder::TensorID OpBuilder::AddOutput(const TfLiteIntArray* dims) { op_node_.outputs.push_back(hexagon_nn_output()); - op_node_.outputs.back().elementsize = element_size; + op_node_.outputs.back().elementsize = sizeof(uint8_t); op_node_.outputs.back().rank = 4; // TODO(karimnosseir): What is a good to estimate the max size ? int batch_size, height_size, width_size, depth_size; diff --git a/tensorflow/lite/delegates/hexagon/builders/op_builder.h b/tensorflow/lite/delegates/hexagon/builders/op_builder.h index c2a2889b142..52b130c756f 100644 --- a/tensorflow/lite/delegates/hexagon/builders/op_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/op_builder.h @@ -16,7 +16,6 @@ limitations under the License. #define TENSORFLOW_LITE_DELEGATES_HEXAGON_BUILDERS_OP_BUILDER_H_ #include -#include #include #include #include @@ -132,9 +131,9 @@ class OpBuilder { void AddInput(const TensorID& tensor_id) { input_ids_.push_back(tensor_id); } // Adds Output to the current node, the output has shape defined in 'dims'. - // The size of each element is defined using 'element_size'. + // This assumes the data type is uint8. // Returns the TensorID identifying this output in the graph. - TensorID AddOutput(const TfLiteIntArray* dims, int element_size); + TensorID AddOutput(const TfLiteIntArray* dims); // Adds Output to the current node, each element in the output has // size 'elementsize' and rank 'rank' and for each dimension in the output @@ -317,22 +316,11 @@ class GraphBuilder { bool AddTensorWithID(int tflite_tensor_id, int hexagon_node_id, int hexagon_node_output_id, bool overwrite = false) { if (!overwrite && HasTensor(tflite_tensor_id)) { - TF_LITE_KERNEL_LOG( - context_, - "Trying to add duplicate tensor without overwrite, tflite_tensor_id " - "%d, hexagon_node_id %d, hexagon_node_output_id %d", - tflite_tensor_id, hexagon_node_id, hexagon_node_output_id); return false; } if (tensors_.size() <= tflite_tensor_id) { tensors_.resize(tflite_tensor_id + 1); } - if (hexagon_node_id == -1 || hexagon_node_output_id == -1) - TF_LITE_KERNEL_LOG(context_, - "Trying to add invalid id, tflite_tensor_id " - "%d, hexagon_node_id %d, hexagon_node_output_id %d", - tflite_tensor_id, hexagon_node_id, - hexagon_node_output_id); tensors_[tflite_tensor_id] = OpBuilder::TensorID(hexagon_node_id, hexagon_node_output_id); return true; @@ -360,14 +348,6 @@ class GraphBuilder { int GetMaxBatchSize() const { return max_size_for_batch_; } private: - // Lookup in cache if data with key 'cache_key' is present. - // Return OpBuilder* for the data if found, nullptr otherwise. - OpBuilder* LookupConstData(uint64_t cache_key); - - // Inserts 'value' in cache, with key equals 'cache_key'. - // If data in cache with same key then it will be overwritten. - void AddToCache(uint64_t cache_key, OpBuilder* value); - // Helper method to fetch dimensions. // TODO(karimnosseir): Move this method to shared place. void GetDims(int* batch_size, int* height_size, int* width_size, @@ -380,10 +360,7 @@ class GraphBuilder { } // Adds a Cast op to convert a tensor from int8 to uint8 (or vice versa). - // The builder which has the casting operator is filled in 'cast_op_builder' - // if not nullptr. - TfLiteStatus AddCastOp(TfLiteContext* context, int op_type, int tensor_id, - OpBuilder** cast_op_builder); + TfLiteStatus AddCastOp(TfLiteContext* context, int op_type, int tensor_id); const HexagonNN* hexagon_nn_ = nullptr; TfLiteContext* context_ = nullptr; @@ -396,11 +373,6 @@ class GraphBuilder { // If the graph being built supports dynamic batch, this represents // the maximum value for batch. int max_size_for_batch_ = -1; - - // Cache for const data in the graph. - // Key is hash of the data, value is pointer to the OpBuilder* for the added - // data. - std::map cache_; }; } // namespace hexagon diff --git a/tensorflow/lite/delegates/hexagon/builders/transpose_builder.cc b/tensorflow/lite/delegates/hexagon/builders/transpose_builder.cc index eb0c2668edc..4a7304d011e 100644 --- a/tensorflow/lite/delegates/hexagon/builders/transpose_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/transpose_builder.cc @@ -29,7 +29,15 @@ TfLiteStatus TransposeOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, const auto& input_tensor = context->tensors[tensor_id]; AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); // permutation tensor. - AddInput(graph_builder_->GetHexagonTensorId(inputs->data[1])); + tensor_id = inputs->data[1]; + const auto& control_tensor = context->tensors[tensor_id]; + if (control_tensor.allocation_type == kTfLiteMmapRo) { + auto* const_control_tensor_node = + graph_builder_->AddConstNodeWithData(tensor_id, control_tensor); + AddInput(TensorID(const_control_tensor_node->GetID(), 0)); + } else { + AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); + } TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, input_tensor)); diff --git a/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.cc b/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.cc index 3e852533394..d2620f71007 100644 --- a/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.cc @@ -97,6 +97,8 @@ TfLiteStatus TransposeConv2dOpBuilder::PopulateSubGraph( filter_depth_size; GetDims(&filter_batch_size, &filter_height_size, &filter_width_size, &filter_depth_size, weights_tensor.dims); + weight_shape_ = {filter_batch_size, filter_height_size, filter_width_size, + filter_depth_size}; // Weights tensor could be int8 even for per-tensor quantization. // Therefore, we look at the number of scale values to check if it is // per-channel quantized. @@ -104,7 +106,25 @@ TfLiteStatus TransposeConv2dOpBuilder::PopulateSubGraph( reinterpret_cast( weights_tensor.quantization.params); const bool is_per_channel_quant = weights_quant_params->scale->size > 1; - AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); + + OpBuilder* const_weights_node; + if (weights_tensor.type == kTfLiteInt8) { + std::vector weights_data(NumElements(&weights_tensor)); + const int8_t* original_data = weights_tensor.data.int8; + // Flip bits on the weight values so that the int8 values are treated + // as uint8. + for (int i = 0; i < NumElements(&weights_tensor); ++i) { + weights_data[i] = original_data[i] ^ k8BitSignFlipConstant; + } + const_weights_node = graph_builder_->AddConstNodeWithData( + weight_shape_.data(), reinterpret_cast(weights_data.data()), + weights_data.size() * sizeof(weights_data[0])); + } else { + const_weights_node = graph_builder_->AddConstNodeWithData( + weight_shape_.data(), weights_tensor.data.raw, weights_tensor.bytes); + } + graph_builder_->AddTensorWithID(tensor_id, const_weights_node->GetID(), 0); + AddInput(TensorID(const_weights_node->GetID(), 0)); // Handle weights quantization. float weights_min = 0; diff --git a/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.h b/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.h index 4afab9894f0..0a6a90a0297 100644 --- a/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.h @@ -47,7 +47,7 @@ class TransposeConv2dOpBuilder : public OpBuilder { TensorID node_output_; std::vector transposed_weights_; std::vector stride_shape_; - std::vector bias_shape_; + std::vector weight_shape_, bias_shape_; std::vector bias_data_; // Non-null only if node has per-channel quantized weights/biases. diff --git a/tensorflow/lite/delegates/hexagon/hexagon_delegate_kernel.cc b/tensorflow/lite/delegates/hexagon/hexagon_delegate_kernel.cc index 83ebc15510e..cdf6b555929 100644 --- a/tensorflow/lite/delegates/hexagon/hexagon_delegate_kernel.cc +++ b/tensorflow/lite/delegates/hexagon/hexagon_delegate_kernel.cc @@ -264,9 +264,8 @@ TfLiteStatus HexagonDelegateKernel::BuildGraph( if (tensor_id == -1) continue; const auto& input_tensor = context->tensors[tensor_id]; if (input_tensor.allocation_type == kTfLiteMmapRo) { - builder_->AddConstNodeWithData( - tensor_id, input_tensor, - /*int8_to_uint8*/ (input_tensor.type == kTfLiteInt8)); + builder_->AddConstNodeWithData(tensor_id, input_tensor, + /*int8_to_uint8*/ true); } } auto* op_builder = From 702ca66743d70882099d6cc4ac1c81646763b8f7 Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Fri, 7 Aug 2020 20:30:32 -0700 Subject: [PATCH 2390/2522] Add support for replicating functions with replicate variant ops in ReplicateToIslandPass. Certain ops, while stateful, should be rewritten with different attributes depending on which replica they are in. This extends to such ops via function calls. PiperOrigin-RevId: 325555999 Change-Id: I9806184cc3b4dae49b22061ba1ab680c02ad9ba1 --- .../tensorflow/tests/replicate_to_island.mlir | 185 ++++++++++++++- .../transforms/replicate_to_island.cc | 211 +++++++++++++++--- 2 files changed, 370 insertions(+), 26 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/replicate_to_island.mlir b/tensorflow/compiler/mlir/tensorflow/tests/replicate_to_island.mlir index ddcfde5cbcd..487234ce958 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/replicate_to_island.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/replicate_to_island.mlir @@ -1,4 +1,4 @@ -// RUN: tf-opt %s -tf-replicate-to-island | FileCheck %s +// RUN: tf-opt -split-input-file %s -tf-replicate-to-island | FileCheck %s // Tests per replica island has same control operands as island holding // replicate. @@ -256,3 +256,186 @@ func @device_ordinals(%arg0: tensor, %arg1: tensor<2x!tf.string>) { // CHECK: "tf._XlaSendFromHost" // CHECK-SAME: device_ordinal = 2 // CHECK: "tf.NoOp" + +// ----- + +// Tests functions with replica variant ops reachable from a replicate region +// is cloned and remapped. + +// CHECK-LABEL: func @call_with_replicate_variant_ops +func @call_with_replicate_variant_ops(%arg0: tensor, %arg1: tensor<2x!tf.string>) { + tf_executor.graph { + tf_executor.island { + tf_device.replicate([%arg0, %arg0] as %arg2: tensor) {n = 2 : i32, devices = {TPU_REPLICATED_CORE_0 = ["/job:worker/replica:0/task:0/device:TPU:1", "/job:worker/replica:0/task:0/device:TPU:2"]}} { + "tf.StatefulPartitionedCall"(%arg1) {config = "", config_proto = "", executor_type = "", f = @send_recv} : (tensor<2x!tf.string>) -> () + tf_device.return + } + tf_executor.yield + } + tf_executor.fetch + } + return +} + +// CHECK: "tf.StatefulPartitionedCall" +// CHECK-SAME: f = [[CALL_REPLICA_0:@[a-z0-9_]+]] +// CHECK: "tf.StatefulPartitionedCall" +// CHECK-SAME: f = [[CALL_REPLICA_1:@[a-z0-9_]+]] + +func @send_recv(%arg0: tensor<2x!tf.string>) { + %0 = "tf._XlaRecvAtHost"(%arg0) {_xla_has_host_transfer = true, device_ordinal = 0 : i64, key = "host_compute_channel_send_0"} : (tensor<2x!tf.string>) -> tensor + "tf._XlaSendFromHost"(%0, %arg0) {_xla_has_host_transfer = true, device_ordinal = 0 : i64, key = "host_compute_channel_recv_0"} : (tensor, tensor<2x!tf.string>) -> () + "tf.NoOp"() : () -> () + return +} + +// CHECK: func [[CALL_REPLICA_0]] +// CHECK: "tf._XlaRecvAtHost" +// CHECK-SAME: device_ordinal = 1 +// CHECK: "tf._XlaSendFromHost" +// CHECK-SAME: device_ordinal = 1 + +// CHECK: func [[CALL_REPLICA_1]] +// CHECK: "tf._XlaRecvAtHost" +// CHECK-SAME: device_ordinal = 2 +// CHECK: "tf._XlaSendFromHost" +// CHECK-SAME: device_ordinal = 2 + +// ----- + +// Tests transitive functions with replica variant ops reachable from a +// replicate region is cloned and remapped. + +// CHECK-LABEL: func @call_with_replicate_variant_ops +func @call_with_replicate_variant_ops(%arg0: tensor, %arg1: tensor<2x!tf.string>) { + tf_executor.graph { + tf_executor.island { + tf_device.replicate([%arg0, %arg0] as %arg2: tensor) {n = 2 : i32, devices = {TPU_REPLICATED_CORE_0 = ["/job:worker/replica:0/task:0/device:TPU:1", "/job:worker/replica:0/task:0/device:TPU:2"]}} { + "tf.StatefulPartitionedCall"(%arg1) {config = "", config_proto = "", executor_type = "", f = @callee} : (tensor<2x!tf.string>) -> () + tf_device.return + } + tf_executor.yield + } + tf_executor.fetch + } + return +} + +// CHECK: "tf.StatefulPartitionedCall" +// CHECK-SAME: f = [[CALLEE_REPLICA_0:@[a-z0-9_]+]] +// CHECK: "tf.StatefulPartitionedCall" +// CHECK-SAME: f = [[CALLEE_REPLICA_1:@[a-z0-9_]+]] + +func @callee(%arg0: tensor<2x!tf.string>) { + "tf.StatefulPartitionedCall"(%arg0) {config = "", config_proto = "", executor_type = "", f = @send_recv} : (tensor<2x!tf.string>) -> () + return +} + +func @send_recv(%arg0: tensor<2x!tf.string>) { + %0 = "tf._XlaRecvAtHost"(%arg0) {_xla_has_host_transfer = true, device_ordinal = 0 : i64, key = "host_compute_channel_send_0"} : (tensor<2x!tf.string>) -> tensor + "tf._XlaSendFromHost"(%0, %arg0) {_xla_has_host_transfer = true, device_ordinal = 0 : i64, key = "host_compute_channel_recv_0"} : (tensor, tensor<2x!tf.string>) -> () + "tf.NoOp"() : () -> () + return +} + +// CHECK: func [[CALLEE_REPLICA_0]] +// CHECK: "tf.StatefulPartitionedCall" +// CHECK-SAME: f = [[TRANSITIVE_CALLEE_REPLICA_0:@[a-z0-9_]+]] + +// CHECK: func [[TRANSITIVE_CALLEE_REPLICA_0]] +// CHECK: "tf._XlaRecvAtHost" +// CHECK-SAME: device_ordinal = 1 +// CHECK: "tf._XlaSendFromHost" +// CHECK-SAME: device_ordinal = 1 + +// CHECK: func [[CALLEE_REPLICA_1]] +// CHECK: "tf.StatefulPartitionedCall" +// CHECK-SAME: f = [[TRANSITIVE_CALLEE_REPLICA_1:@[a-z0-9_]+]] + +// CHECK: func [[TRANSITIVE_CALLEE_REPLICA_1]] +// CHECK: "tf._XlaRecvAtHost" +// CHECK-SAME: device_ordinal = 2 +// CHECK: "tf._XlaSendFromHost" +// CHECK-SAME: device_ordinal = 2 + +// ----- + +// Tests functional control flow functions with replica variant ops reachable +// from a replicate region is cloned and remapped. Only the branches reachable +// with replica variant ops are cloned. + +// CHECK-LABEL: func @control_flow_with_replicate_variant_ops +func @control_flow_with_replicate_variant_ops(%arg0: tensor, %arg1: tensor, %arg2: tensor, %arg3: tensor<2x!tf.string>) { + tf_executor.graph { + tf_executor.island { + tf_device.replicate([%arg0, %arg0] as %arg4: tensor, [%arg1, %arg1] as %arg5: tensor, [%arg2, %arg2] as %arg6: tensor) {n = 2 : i32, devices = {TPU_REPLICATED_CORE_0 = ["/job:worker/replica:0/task:0/device:TPU:1", "/job:worker/replica:0/task:0/device:TPU:2"]}} { + %0 = "tf.If"(%arg4, %arg5, %arg6, %arg3) {else_branch = @cond_false, is_stateless = true, then_branch = @cond_true} : (tensor, tensor, tensor, tensor<2x!tf.string>) -> tensor + tf_device.return + } + tf_executor.yield + } + tf_executor.fetch + } + return +} + +// CHECK: "tf.If" +// CHECK-SAME: else_branch = @cond_false +// CHECK-SAME: then_branch = [[COND_TRUE_REPLICA_0:@[a-z0-9_]+]] +// CHECK: "tf.If" +// CHECK-SAME: else_branch = @cond_false +// CHECK-SAME: then_branch = [[COND_TRUE_REPLICA_1:@[a-z0-9_]+]] + +func @cond_false(%arg0: tensor, %arg1: tensor, %arg2: tensor<2x!tf.string>) -> tensor { + return %arg0 : tensor +} + +// CHECK-NOT: func @cond_false.+( + +func @cond_true(%arg0: tensor, %arg1: tensor, %arg2: tensor<2x!tf.string>) -> tensor { + "tf._XlaSendFromHost"(%arg1, %arg2) {_xla_has_host_transfer = true, device_ordinal = 0 : i64, key = "host_compute_channel_recv_0"} : (tensor, tensor<2x!tf.string>) -> () + %0 = "tf._XlaRecvAtHost"(%arg2) {_xla_has_host_transfer = true, device_ordinal = 0 : i64, key = "host_compute_channel_send_0"} : (tensor<2x!tf.string>) -> tensor + return %0 : tensor +} + +// CHECK: func [[COND_TRUE_REPLICA_0]] +// CHECK: "tf._XlaSendFromHost" +// CHECK-SAME: device_ordinal = 1 +// CHECK: "tf._XlaRecvAtHost" +// CHECK-SAME: device_ordinal = 1 + +// CHECK: func [[COND_TRUE_REPLICA_1]] +// CHECK: "tf._XlaSendFromHost" +// CHECK-SAME: device_ordinal = 2 +// CHECK: "tf._XlaRecvAtHost" +// CHECK-SAME: device_ordinal = 2 + +// ----- + +// Tests function with no replica variant ops reachable from a replicate region +// is not cloned. + +// CHECK-LABEL: func @no_replicate_variant_ops +func @no_replicate_variant_ops(%arg0: tensor, %arg1: tensor<2x!tf.string>) { + tf_executor.graph { + tf_executor.island { + tf_device.replicate([%arg0, %arg0] as %arg2: tensor) {n = 2 : i32, devices = {TPU_REPLICATED_CORE_0 = ["/job:worker/replica:0/task:0/device:TPU:1", "/job:worker/replica:0/task:0/device:TPU:2"]}} { + "tf.StatefulPartitionedCall"(%arg1) {config = "", config_proto = "", executor_type = "", f = @send_recv} : (tensor<2x!tf.string>) -> () + tf_device.return + } + tf_executor.yield + } + tf_executor.fetch + } + return +} + +// CHECK: "tf.StatefulPartitionedCall" +// CHECK-SAME: f = @send_recv + +func @send_recv(%arg0: tensor<2x!tf.string>) { + "tf.NoOp"() : () -> () + return +} + +// CHECK-NOT: @send_recv.+( diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc b/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc index e7f2977dbcd..ef75f90d5c1 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Sequence.h" @@ -32,6 +33,7 @@ limitations under the License. #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Diagnostics.h" // from @llvm-project #include "mlir/IR/Dialect.h" // from @llvm-project +#include "mlir/IR/SymbolTable.h" // from @llvm-project #include "mlir/IR/Visitors.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project @@ -64,6 +66,151 @@ bool RequiresDeviceOrdinalAttribute(Operation* op) { llvm::isa(op); } +// Checks if a region contains ops that are replica variant. +bool HasReplicaVariantOps(Region& region, + const llvm::Optional& devices) { + auto result = region.walk([&](Operation* op) { + if (RequiresReplicaIDAttribute(op) || + (devices.hasValue() && RequiresDeviceOrdinalAttribute(op))) + return WalkResult::interrupt(); + + if (auto launch = dyn_cast(op)) + if (devices.hasValue() && devices.getValue().get(launch.device())) + return WalkResult::interrupt(); + + return WalkResult::advance(); + }); + return result.wasInterrupted(); +} + +// Collects all functions reachable from a region, including transitive ones. +llvm::SmallPtrSet GetReachableFunctionsFromRegion(ModuleOp module, + Region& region) { + llvm::SmallPtrSet visited_functions; + + SymbolTable symbol_table(module); + auto symbol_uses = symbol_table.getSymbolUses(®ion); + if (!symbol_uses) return {}; + + for (auto& use : *symbol_uses) + if (auto func = + symbol_table.lookup(use.getSymbolRef().getRootReference())) + visited_functions.insert(func); + + llvm::SmallVector functions_to_visit(visited_functions.begin(), + visited_functions.end()); + while (!functions_to_visit.empty()) { + llvm::SmallVector new_functions_to_visit; + + for (FuncOp function_to_visit : functions_to_visit) { + auto func_symbol_uses = + symbol_table.getSymbolUses(function_to_visit.getCallableRegion()); + if (!func_symbol_uses) continue; + + for (auto& use : *func_symbol_uses) + if (auto func = symbol_table.lookup( + use.getSymbolRef().getRootReference())) + if (visited_functions.insert(func).second) + new_functions_to_visit.push_back(func); + } + + functions_to_visit.swap(new_functions_to_visit); + } + + return visited_functions; +} + +// Collects all functions and transitive functions reachable from region that +// contain replicate variant ops. +llvm::SmallDenseMap GetReachableFunctionsToClone( + ModuleOp module, Region& region, + const llvm::Optional& devices) { + llvm::SmallPtrSet reachable_functions = + GetReachableFunctionsFromRegion(module, region); + + llvm::SmallDenseMap functions_to_clone; + llvm::SmallVector functions_to_visit; + for (FuncOp func : reachable_functions) { + if (!func.getCallableRegion()) continue; + if (HasReplicaVariantOps(*func.getCallableRegion(), devices)) { + functions_to_clone.insert({func.getName(), func}); + functions_to_visit.push_back(func); + } + } + + while (!functions_to_visit.empty()) { + llvm::SmallVector new_functions_to_visit; + + for (FuncOp func_to_visit : functions_to_visit) { + auto func_uses = func_to_visit.getSymbolUses(module); + if (!func_uses) continue; + for (auto use : *func_uses) { + auto parent_func = use.getUser()->getParentOfType(); + if (!parent_func || !reachable_functions.contains(parent_func) || + !functions_to_clone.insert({parent_func.getName(), parent_func}) + .second) + continue; + new_functions_to_visit.push_back(parent_func); + } + } + + functions_to_visit.swap(new_functions_to_visit); + } + + return functions_to_clone; +} + +struct FuncOldNameAndClone { + StringRef old_name; + FuncOp clone; +}; + +// Replaces all symbol uses with cloned functions, for `region` and across the +// cloned functions themselves. +LogicalResult UpdateSymbolUsesWithClones( + SymbolTable& symbol_table, ModuleOp module, Region& region, + llvm::MutableArrayRef cloned_functions) { + llvm::SmallVector, 4> old_to_new_names; + old_to_new_names.reserve(cloned_functions.size()); + for (auto& cloned_function : cloned_functions) + old_to_new_names.push_back( + {cloned_function.old_name, cloned_function.clone.getName()}); + + for (const auto& old_to_new_name : old_to_new_names) { + if (failed(symbol_table.replaceAllSymbolUses( + old_to_new_name.first, old_to_new_name.second, ®ion))) + return failure(); + + for (auto& cloned_function : cloned_functions) + if (failed(symbol_table.replaceAllSymbolUses( + old_to_new_name.first, old_to_new_name.second, + cloned_function.clone.getCallableRegion()))) + return failure(); + } + return success(); +} + +// Collects TPU device ordinal for outside compilation communication ops. This +// currently assumes outside compilation only uses `TPU_REPLICATED_CORE_0` +// aliased device for the device computation. +llvm::Optional GetDeviceOrdinal( + const llvm::Optional& devices, Location loc, + unsigned replica_id) { + int64_t device_ordinal = 0; + if (devices.hasValue()) { + if (auto tpu_replica_0 = devices.getValue().get("TPU_REPLICATED_CORE_0")) { + llvm::StringRef tpu_device = tpu_replica_0.cast()[replica_id] + .cast() + .getValue(); + if (succeeded(tensorflow::GetDeviceOrdinalFromDeviceString( + loc, tpu_device, &device_ordinal))) { + return llvm::Optional(device_ordinal); + } + } + } + return llvm::None; +} + // Updates replica variant ops in a region based on replica `replica_id`. // TODO(b/157624749): Replace this with better abstraction to differentiate ops // for different replicas. Some ops, such as XlaHostCompute op or TPU Embedding @@ -72,27 +219,17 @@ bool RequiresDeviceOrdinalAttribute(Operation* op) { // represents replica id. LogicalResult UpdateRegionReplicateVariantOps( OpBuilder& builder, Location loc, Region& region, int replica_id, + llvm::MutableArrayRef cloned_functions, const llvm::Optional& devices) { - int64_t device_ordinal = -1; - const bool has_devices = devices.hasValue(); - if (has_devices) { - if (auto tpu_replica_0 = devices.getValue().get("TPU_REPLICATED_CORE_0")) { - llvm::StringRef tpu_device = tpu_replica_0.cast()[replica_id] - .cast() - .getValue(); - if (failed(tensorflow::GetDeviceOrdinalFromDeviceString( - loc, tpu_device, &device_ordinal))) { - return failure(); - } - } - } + llvm::Optional device_ordinal = + GetDeviceOrdinal(devices, loc, replica_id); - region.walk([&](Operation* op) { + auto update_replicate_variant_ops = [&](Operation* op) { // Add replica id. if (RequiresReplicaIDAttribute(op)) op->setAttr(kReplicaIdAttr, builder.getI32IntegerAttr(replica_id)); - if (!has_devices) return; + if (!devices.hasValue()) return; // Map aliased devices to explicit devices based on replica. if (auto launch = dyn_cast(op)) @@ -102,10 +239,15 @@ LogicalResult UpdateRegionReplicateVariantOps( device_by_replica.cast()[replica_id].cast()); // Add device ordinal. - if (device_ordinal >= 0 && RequiresDeviceOrdinalAttribute(op)) + if (device_ordinal && RequiresDeviceOrdinalAttribute(op)) op->setAttr(kDeviceOrdinalAttr, - builder.getI64IntegerAttr(device_ordinal)); - }); + builder.getI64IntegerAttr(*device_ordinal)); + }; + + region.walk(update_replicate_variant_ops); + for (auto& cloned_function : cloned_functions) + cloned_function.clone.getCallableRegion()->walk( + update_replicate_variant_ops); return success(); } @@ -115,7 +257,7 @@ LogicalResult UpdateRegionReplicateVariantOps( // `tf_device.replicate`, the device will be remapped to an explicit device // for the associated replica island. LogicalResult ExpandReplicateIntoReplicas( - const Dialect* tf_dialect, OpBuilder& builder, + const Dialect* tf_dialect, OpBuilder& builder, ModuleOp module, tf_executor::IslandOp island_op, tf_device::ReplicateOp replicate_op, int num_replicas, llvm::SmallVectorImpl& replicas) { replicas.reserve(num_replicas); @@ -133,9 +275,23 @@ LogicalResult ExpandReplicateIntoReplicas( terminator.getOperands()); terminator.erase(); + auto funcs_to_clone = + GetReachableFunctionsToClone(module, replicate_op.body(), devices); + SymbolTable symbol_table(module); + builder.setInsertionPoint(island_op); BlockAndValueMapping mapping; for (int i : llvm::seq(0, num_replicas)) { + // Clone reachable functions with replica variant ops. + llvm::SmallVector cloned_functions; + cloned_functions.reserve(funcs_to_clone.size()); + for (auto& func_to_clone : funcs_to_clone) { + auto cloned_function = func_to_clone.getSecond().clone(); + symbol_table.insert(cloned_function, module.end()); + cloned_functions.push_back( + {func_to_clone.getSecond().getName(), cloned_function}); + } + // Create new island for replica. auto replica = builder.create( island_op.getLoc(), output_types, control_type, replica_inputs); @@ -149,9 +305,13 @@ LogicalResult ExpandReplicateIntoReplicas( // Copy over replicate region into replica island. replicate_op.body().cloneInto(&replica.body(), mapping); - if (failed(UpdateRegionReplicateVariantOps(builder, replicate_op.getLoc(), - replica.body(), /*replica_id=*/i, - devices))) + if (failed(UpdateSymbolUsesWithClones(symbol_table, module, replica.body(), + cloned_functions))) + return failure(); + + if (failed(UpdateRegionReplicateVariantOps( + builder, replicate_op.getLoc(), replica.body(), + /*replica_id=*/i, cloned_functions, devices))) return failure(); replicas.push_back(replica); @@ -211,6 +371,7 @@ LogicalResult ExpandReplicateIntoReplicas( // tf_executor.yield %a1, %b1 : tensor, tensor // } LogicalResult CreateIslandsFromReplicate(const Dialect* tf_dialect, + ModuleOp module, tf_executor::GraphOp graph_op, tf_executor::IslandOp island_op, tf_device::ReplicateOp replicate_op) { @@ -219,7 +380,7 @@ LogicalResult CreateIslandsFromReplicate(const Dialect* tf_dialect, // Create islands per replica. llvm::SmallVector replicas; - if (failed(ExpandReplicateIntoReplicas(tf_dialect, builder, island_op, + if (failed(ExpandReplicateIntoReplicas(tf_dialect, builder, module, island_op, replicate_op, num_replicas, replicas))) return failure(); @@ -299,8 +460,8 @@ void ReplicateToIslandPass::runOnOperation() { auto graph_op = island_op.getParentOfType(); auto replicate_op = cast(island_op.GetBody().front()); - if (failed(CreateIslandsFromReplicate(tf_dialect, graph_op, island_op, - replicate_op))) + if (failed(CreateIslandsFromReplicate(tf_dialect, module, graph_op, + island_op, replicate_op))) return signalPassFailure(); } } From a144b1a24e06a173d700dcfe3d12f804f14acb75 Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Fri, 7 Aug 2020 21:33:04 -0700 Subject: [PATCH 2391/2522] Update mhlo.constant to use a custom assembly format instead of a custom printer and parser (NFC). PiperOrigin-RevId: 325560779 Change-Id: I8ce3350d6af6986cdcb7e4e1ee308b1095fb120b --- .../mlir-hlo/Dialect/mhlo/IR/hlo_ops.td | 3 +- .../mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc | 31 ------------------- tensorflow/compiler/mlir/hlo/tests/ops.mlir | 10 +++--- 3 files changed, 6 insertions(+), 38 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td index 4c09c209bd1..b8b1926a0c9 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td @@ -67,8 +67,7 @@ def HLO_ConstOp : HLO_Op<"constant", "OpBuilder &builder, OperationState &result, Attribute value" >]; - let printer = [{ return Print(*this, &p); }]; - let parser = [{ return ParseConstOp(&parser, &result); }]; + let assemblyFormat = "attr-dict $value"; let hasFolder = 1; diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc index 6f453d1a167..eda10b0f187 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc @@ -112,37 +112,6 @@ DenseIntElementsAttr BuildSliceLimits(DenseIntElementsAttr start_indices, // ConstOp //===----------------------------------------------------------------------===// -static void Print(ConstOp op, OpAsmPrinter* printer) { - // Print op name. - *printer << op.getOperationName(); - - // Elide attribute value while printing the attribute dictionary. - SmallVector elided_attrs; - elided_attrs.push_back("value"); - printer->printOptionalAttrDict(op.getAttrs(), elided_attrs); - - *printer << ' ' << op.value(); -} - -static ParseResult ParseConstOp(OpAsmParser* parser, OperationState* result) { - if (parser->parseOptionalAttrDict(result->attributes)) return failure(); - - // If colon is not present after attribute dictionary, it should be short form - // and attribute 'value' is outside the dictionary. - if (failed(parser->parseOptionalColon())) { - Attribute value; - if (parser->parseAttribute(value, "value", result->attributes)) - return failure(); - return parser->addTypeToList(value.getType(), result->types); - } - - // Long form should have type of the result after colon. - Type ty; - if (parser->parseType(ty)) return failure(); - result->types.push_back(ty); - return success(); -} - OpFoldResult ConstOp::fold(ArrayRef operands) { assert(operands.empty() && "constant has no operands"); diff --git a/tensorflow/compiler/mlir/hlo/tests/ops.mlir b/tensorflow/compiler/mlir/hlo/tests/ops.mlir index 25c7d6aee61..a8f16c403ae 100644 --- a/tensorflow/compiler/mlir/hlo/tests/ops.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/ops.mlir @@ -480,7 +480,7 @@ func @map_non_scalar_computation_operand(%arg0: tensor<4x5xf32>, %arg1: tensor<4 // expected-error@+1 {{computation arguments must be 0-rank tensor, but got: arg #1 of type 'tensor<5xf32>'}} %0 = "mhlo.map"(%arg0, %arg1) ( { ^bb0(%arg2: tensor, %arg3: tensor<5xf32>): - %1 = mhlo.constant {value = dense<2.0> : tensor} : tensor + %1 = mhlo.constant dense<2.0> : tensor "mhlo.return"(%1) : (tensor) -> () }) {dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<4x5xf32>, tensor<4x5xf32>) -> tensor<4x5xf32> return %0 : tensor<4x5xf32> @@ -492,7 +492,7 @@ func @map_mismatch_operand_and_computation_args(%arg0: tensor<4x5xf32>, %arg1: t // expected-error@+1 {{element type of operands and computation arguments must match, but got: 'f32' and 'i32'}} %0 = "mhlo.map"(%arg0, %arg1) ( { ^bb0(%arg2: tensor, %arg3: tensor): - %1 = mhlo.constant {value = dense<2.0> : tensor} : tensor + %1 = mhlo.constant dense<2.0> : tensor "mhlo.return"(%1) : (tensor) -> () }) {dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<4x5xf32>, tensor<4x5xf32>) -> tensor<4x5xf32> return %0 : tensor<4x5xf32> @@ -504,7 +504,7 @@ func @map_invalid_number_of_computation_output(%arg0: tensor<4x5xf32>, %arg1: te // expected-error@+1 {{computation must return single output, but got: 0}} %0 = "mhlo.map"(%arg0, %arg1) ( { ^bb0(%arg2: tensor, %arg3: tensor): - %1 = mhlo.constant {value = dense<2.0> : tensor} : tensor + %1 = mhlo.constant dense<2.0> : tensor "mhlo.return"() : () -> () }) {dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<4x5xf32>, tensor<4x5xf32>) -> tensor<4x5xf32> return %0 : tensor<4x5xf32> @@ -516,7 +516,7 @@ func @main_non_scalar_computation_output(%arg0: tensor<4x5xf32>, %arg1: tensor<4 // expected-error@+1 {{computation must return 0-rank tensor, but got: 'tensor<5xf32>'}} %0 = "mhlo.map"(%arg0, %arg1) ( { ^bb0(%arg2: tensor, %arg3: tensor): - %1 = mhlo.constant {value = dense<2.0> : tensor} : tensor<5xf32> + %1 = mhlo.constant dense<2.0> : tensor<5xf32> "mhlo.return"(%1) : (tensor<5xf32>) -> () }) {dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<4x5xf32>, tensor<4x5xf32>) -> tensor<4x5xf32> return %0 : tensor<4x5xf32> @@ -528,7 +528,7 @@ func @mismatch_computation_output_type(%arg0: tensor<4x5xf32>, %arg1: tensor<4x5 // expected-error@+1 {{element type of result and computation output must match, but got: 'f32' and 'i32'}} %0 = "mhlo.map"(%arg0, %arg1) ( { ^bb0(%arg2: tensor, %arg3: tensor): - %1 = mhlo.constant {value = dense<2> : tensor} : tensor + %1 = mhlo.constant dense<2> : tensor "mhlo.return"(%1) : (tensor) -> () }) {dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor<4x5xf32>, tensor<4x5xf32>) -> tensor<4x5xf32> return %0 : tensor<4x5xf32> From e7c59b4c5164552386f208de8766b31c787ff4f9 Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Fri, 7 Aug 2020 21:46:10 -0700 Subject: [PATCH 2392/2522] Early terminate LaunchToDeviceAttributePass on missing 'tf' dialect registration (NFC). PiperOrigin-RevId: 325561680 Change-Id: Iafeee051275632bdfa7890ca44239e277d55feef --- .../mlir/tensorflow/transforms/launch_to_device_attribute.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/launch_to_device_attribute.cc b/tensorflow/compiler/mlir/tensorflow/transforms/launch_to_device_attribute.cc index bce18c0b4b7..9f67a3e7e71 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/launch_to_device_attribute.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/launch_to_device_attribute.cc @@ -106,8 +106,8 @@ LogicalResult HoistOpsAndAnnotateWithDevice(const Dialect* tf_dialect, void LaunchToDeviceAttributePass::runOnFunction() { const Dialect* tf_dialect = getContext().getRegisteredDialect("tf"); if (!tf_dialect) { - signalPassFailure(); getFunction().emitError() << "'tf' dialect is not registered"; + return signalPassFailure(); } auto result = getFunction().walk([&](tf_device::LaunchOp launch) { From 78686f49f50ec0bc69a864911c6e33adca56937e Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Fri, 7 Aug 2020 21:46:23 -0700 Subject: [PATCH 2393/2522] Update tf_device.return to use assembly format instead of a custom parser and printer (NFC). PiperOrigin-RevId: 325561693 Change-Id: Ie13a8006580c4477a793e745b7e71e06bb19bd1f --- .../compiler/mlir/tensorflow/ir/tf_device.cc | 25 ------------------- .../mlir/tensorflow/ir/tf_device_ops.td | 3 +-- 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc index 77008b55672..9aa0a72f475 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc @@ -118,31 +118,6 @@ TensorFlowDeviceDialect::TensorFlowDeviceDialect(MLIRContext* context) // operation results are perfectly forwarded to the launch return. bool LaunchOp::WrapsSingleOp() { return BlockWrapsSingleOp(&GetBody()); } -//===----------------------------------------------------------------------===// -// tf_device.return -//===----------------------------------------------------------------------===// - -namespace { -ParseResult ParseReturnOp(OpAsmParser* parser, OperationState* state) { - llvm::SmallVector op_info; - llvm::SmallVector types; - llvm::SMLoc loc = parser->getCurrentLocation(); - return failure(parser->parseOperandList(op_info) || - (!op_info.empty() && parser->parseColonTypeList(types)) || - parser->resolveOperands(op_info, types, loc, state->operands)); -} - -void Print(ReturnOp op, OpAsmPrinter* p) { - *p << op.getOperationName(); - if (op.getNumOperands() > 0) { - *p << ' '; - p->printOperands(op.getOperands()); - *p << " : "; - interleaveComma(op.getOperandTypes(), *p); - } -} -} // anonymous namespace - //===----------------------------------------------------------------------===// // tf_device.parallel_execute //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_device_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_device_ops.td index 565be63a74f..d94a37d9b02 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_device_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_device_ops.td @@ -104,8 +104,7 @@ The `tf_device.return` operation terminates and returns values from a }]> ]; - let parser = [{ return Parse$cppClass(&parser, &result); }]; - let printer = [{ return Print(*this, &p); }]; + let assemblyFormat = "attr-dict ($results^ `:` type($results))?"; } def TfDevice_LaunchFuncOp : TfDevice_Op<"launch_func", []> { From 3be8012f0cb2003db4c28b33cf9078010ace8cf8 Mon Sep 17 00:00:00 2001 From: Smit Hinsu Date: Fri, 7 Aug 2020 22:20:35 -0700 Subject: [PATCH 2394/2522] Disable shape inference while importing Graph to MLIR in the bridge Currently the shape inference causes a hard error during inference for ops that requires some of the inputs to be ranked. For example, SpaceToBatchND requires block_size to be of rank one. This mode is slated to be the default mode and we run shape inference pass in the bridge early in the pipeline so we don't need to infer shapes during the import. PiperOrigin-RevId: 325565060 Change-Id: Ic8933afd406757d5ca9bb0a1174101f38483f368 --- tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc | 6 ++++++ .../compiler/mlir/tensorflow/utils/compile_mlir_util.cc | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc b/tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc index 8e6d9042987..8be6facce38 100644 --- a/tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc +++ b/tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc @@ -132,6 +132,12 @@ Status MlirFunctionOptimizationPass::Run( import_config.graph_as_function = true; import_config.control_outputs = *control_ret_node_names; import_config.upgrade_legacy = true; + // Disable shape inference during import as some TensorFlow op fails during + // shape inference with dynamic shaped operands. This in turn causes the + // import to fail. Shape inference during import is going to be removed and + // the shape inference pass is run early in the pass pipeline, shape inference + // during import is not necessary. + import_config.enable_shape_inference = false; TF_ASSIGN_OR_RETURN(auto module_ref, ConvertGraphToMlir(**graph, debug_info, *flib_def, import_config, &context)); diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc index 78d621cbe75..eee2f0a560c 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc @@ -510,6 +510,12 @@ Status CompileGraphToXlaHlo( mlir::MLIRContext context; GraphImportConfig config; config.graph_as_function = true; + // Disable shape inference during import as some TensorFlow op fails during + // shape inference with dynamic shaped operands. This in turn causes the + // import to fail. Shape inference during import is going to be removed and + // the shape inference pass is run early in the pass pipeline, shape inference + // during import is not necessary. + config.enable_shape_inference = false; auto module_or = ConvertGraphToMlir(graph, debug_info, flib_def, config, &context); if (!module_or.ok()) return module_or.status(); From fb9db5d28cc97bb2c05305b671f2e5fe12a251e2 Mon Sep 17 00:00:00 2001 From: Jan Pfeiffer Date: Sat, 8 Aug 2020 01:24:44 -0700 Subject: [PATCH 2395/2522] Adds `with_updates` method to `StructuredTensor`. PiperOrigin-RevId: 325577753 Change-Id: Ie7c462c2ed468c54a68e43905da4885be1041267 --- .../ops/structured/structured_tensor.py | 267 +++++++++++++++++- .../ops/structured/structured_tensor_test.py | 175 +++++++++++- 2 files changed, 435 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/ops/structured/structured_tensor.py b/tensorflow/python/ops/structured/structured_tensor.py index 3c3bd03a06b..c09a38f1d21 100644 --- a/tensorflow/python/ops/structured/structured_tensor.py +++ b/tensorflow/python/ops/structured/structured_tensor.py @@ -1,3 +1,4 @@ +# Lint as python3 # Copyright 2019 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -19,6 +20,7 @@ from __future__ import division from __future__ import print_function import re +from typing import Callable, Dict, List, Sequence, Tuple, Union import numpy as np @@ -85,6 +87,23 @@ class StructuredTensor(composite_tensor.CompositeTensor): field. """ + #============================================================================= + # Common Types + #============================================================================= + # pylint: disable=invalid-name + # Field names work as key, and they can be a sequence to refer to the + # sub-levels (embedded) StructuredTensor's. + FieldName = Union[str, Sequence[str]] + + # Each field may contain one of the following types of Tensors. + FieldValue = Union[ops.Tensor, ragged_tensor.RaggedTensor, 'StructuredTensor'] + + # Function that takes a FieldValue as input and returns the transformed + # FieldValue. + FieldFn = Callable[[FieldValue], FieldValue] + + # pylint: enable=invalid-name + #============================================================================= # Constructor & Factory Methods #============================================================================= @@ -252,6 +271,180 @@ class StructuredTensor(composite_tensor.CompositeTensor): row_partitions, internal=_structured_tensor_factory_key) + def with_updates(self, + updates: Dict[FieldName, Union[FieldValue, FieldFn, None]], + validate: bool = False) -> 'StructuredTensor': # pylint: disable=bad-whitespace + """Creates a new `StructuredTensor` with the updated fields. + + If this `StructuredTensor` is a scalar, and `k` is the `FieldName` being + updated and `v` the new value, then: + + ``` + result[k] = v # If (k, v) is in updates and v is a FieldValue + result[k] = f(self[k]) # If (k, f) is in updates and f is a FieldFn + result[k] = self[k] # If k is in self.field_names but not in updates + ``` + + If this `StructuredTensor` has rank `N` and shape `[D1...DN]`, then each + FieldValue `v` in `updates` must have shape `[D1...DN, ...]`, that is, + prefixed with the same shape as the `StructuredTensor`. Then the resulting + `StructuredTensor` will have: + + ``` + result[i1...iN][k] = v[i1...iN] # (k, v) in updates + result[i1...iN][k] = f(self.field_value(k))[i1...iN] # (k, f) in updates + result[i1...iN][k] = self[i1...iN][k] # k not in updates + ``` + + Note that `result.shape` is always equal to `self.shape` (but the shapes + of nested StructuredTensors may be changed if they are updated with new + values). + + Args: + updates: A dictionary mapping `FieldName` to either a `FieldValue` to be + used to update, or a `FieldFn` that will transform the value for the + given `FieldName`. `FieldName` can be a string for a direct field, or a + sequence of strings to refer to a nested sub-field. `FieldFn` is a + function that takes a `FieldValue` as input and should return a + `FieldValue`. All other fields are copied over to the new + `StructuredTensor`. New `FieldName` can be given (to add new fields), + but only to existing `StructuredTensor`, it won't automatically create + new nested structures -- but one can create a whole `StructureTensor` + sub-structure and set that into an existing structure. If the new value + is set to `None`, it is removed. + validate: If true, then add runtime validation ops that check that the + field values all have compatible shapes in the outer `shape.rank` + dimensions. + + Returns: + A `StructuredTensor`. + + Raises: + `ValueError`: If the any of the `FieldName` keys points to non-existent + sub-structures, if parent and child nodes are updated, if shapes + change, if a delete update is given for a non-existant field, or if a + `FieldFn` transforming function is given for a `FieldName` that doesn't + yet exist. + + Examples: + + >>> shoes_us = StructuredTensor.from_pyval([ + ... {"age": 12, "nicknames": ["Josaphine"], + ... "shoes": {"sizes": [8.0, 7.5, 7.5]}}, + ... {"age": 82, "nicknames": ["Bob", "Bobby"], + ... "shoes": {"sizes": [11.0, 11.5, 12.0]}}, + ... {"age": 42, "nicknames": ["Elmo"], + ... "shoes": {"sizes": [9.0, 9.5, 10.0]}}]) + >>> def us_to_europe(t): + ... return tf.round(t * 2.54 + 17.0) # Rough approximation. + >>> shoe_sizes_key = ("shoes", "sizes") + >>> shoes_eu = shoes_us.with_updates({shoe_sizes_key: us_to_europe}) + >>> shoes_eu.field_value(shoe_sizes_key) + + """ + updates_items = [(_normalize_field_name_to_tuple(name), value) + for name, value in updates.items()] + + # Sort by keys and check for updates of both parent and child nodes. + updates_items = sorted(updates_items) + for i in range(1, len(updates_items)): + # Parent of a node would precede node in the sorted order. + name = updates_items[i][0] # item[0] is the name, item[1] is the value. + prev_name = updates_items[i - 1][0] + if name[:len(prev_name)] == prev_name: + raise ValueError( + '`StructuredTensor.with_updates` does not allow both parent and ' + 'child nodes to be updated: parent={}, child={}. If needed you can ' + 'update child nodes in the parent update value.'.format( + prev_name, name)) + return self._with_updates_impl((), updates_items, validate) + + def _with_updates_impl(self, error_prefix: Tuple[str], # pylint: disable=invalid-sequence-index + updates: List[Tuple[FieldName, Union[FieldValue, # pylint: disable=invalid-sequence-index + FieldFn]]], + validate: bool) -> 'StructuredTensor': + """Recursive part of `with_updates` implementation.""" + # Get current fields. + new_fields = dict(self._fields) + + # Convert field name to string with full path for error messages. + def name_fullpath(name: Sequence[str]) -> str: + return str(error_prefix + (name,)) + + # Apply value if a function or the value itself. + def apply_value(name: str, value: Union['FieldValue', + 'FieldFn']) -> 'FieldValue': + if callable(value): + # `value` is actually a transforming function. + if name not in new_fields: + raise ValueError( + '`StructuredTensor.with_updates` cannot update the field {} ' + 'because a transforming function was given, but that field ' + 'does not already exist.'.format(name_fullpath(name))) + value = value(new_fields[name]) + return value + + # Merge updates. + for name, value in updates: + if not name or not name[0]: + raise ValueError( + '`StructuredTensor.with_updates` does not allow empty names ' + '{}.'.format(name_fullpath(name))) + + if len(name) == 1: + name = name[0] + if value is None: + if name not in new_fields: + raise ValueError( + '`StructuredTensor.with_updates` cannot delete field ' + '{} because it is not present.'.format(name_fullpath(name))) + new_fields.pop(name) + else: + new_fields[name] = apply_value(name, value) + else: + # Recursive + prefix = name[0] + suffix = name[1:] + if prefix not in new_fields: + raise ValueError( + '`StructuredTensor.with_updates` cannot create new sub-field ' + '{} if parent field {} is not set.'.format( + error_prefix + tuple(name), name_fullpath(prefix))) + current_value = new_fields[prefix] + if not isinstance(current_value, StructuredTensor): + raise ValueError( + '`StructuredTensor.with_updates` cannot create new sub-field ' + '{} if parent structure {} is not a `StructuredTensor` that ' + 'can contain sub-structures -- it is a `{}`.'.format( + error_prefix + tuple(name), name_fullpath(prefix), + type(current_value))) + one_update = [(suffix, value)] + + # Accessing protected member in recursion. + # FutureWork: optimize by aggregating the recursions, instead of + # calling one at a time. + # pylint: disable=protected-access + value = current_value._with_updates_impl(error_prefix + (prefix,), + one_update, validate) + # pylint: enable=protected-access + new_fields[prefix] = value + + # TODO(edloper): When validate=True, only validate the modified fields. + try: + return StructuredTensor.from_fields( + new_fields, + shape=self.shape, + row_partitions=self._row_partitions, + nrows=self._nrows, + validate=validate) + + except ValueError as e: + msg = '`StructuredTensor.with_updates` failed' + if error_prefix: + msg = '{} for field {}'.format(msg, error_prefix) + raise ValueError('{}: {}'.format(msg, e)) + #============================================================================= # Properties #============================================================================= @@ -279,22 +472,74 @@ class StructuredTensor(composite_tensor.CompositeTensor): def row_partitions(self): """A tuple of `RowPartition`s defining the shape of this `StructuredTensor`. - If this `StructuredTensor` has a ragged shape, then all fields will be - encoded as either `RaggedTensor`s or `StructuredTensor`s with these - `RowPartition`s used to define their outermost `self.rank` dimensions. + When `self.rank <= 1`, this tuple will be empty. - If this `StructuredTensor` has a uniform (non-ragged) shape, then these - row partitions will all be defined using `uniform_row_length`. + When `self.rank > 1`, these `RowPartitions` define the shape of the + `StructuredTensor` by describing how a flat (1D) list of structures can be + repeatedly partitioned to form a higher-dimensional object. In particular, + the flat list is first partitioned into sublists using `row_partitions[-1]`, + and then those sublists are further partitioned using `row_partitions[-2]`, + etc. The following examples show the row partitions used to describe + several different `StructuredTensor`, each of which contains 8 copies of + the same structure (`x`): + + >>> x = {'a': 1, 'b': ['foo', 'bar', 'baz']} # shape = [] (scalar) + + >>> s1 = [[x, x, x, x], [x, x, x, x]] # shape = [2, 4] + >>> StructuredTensor.from_pyval(s1).row_partitions + (tf.RowPartition(row_splits=tf.Tensor([0 4 8], shape=(3,), + dtype=int64)),) + + >>> s2 = [[x, x], [x, x], [x, x], [x, x]] # shape = [4, 2] + >>> StructuredTensor.from_pyval(s2).row_partitions + (tf.RowPartition(row_splits=tf.Tensor([0 2 4 6 8], shape=(5,), + dtype=int64)),) + + >>> s3 = [[x, x, x], [], [x, x, x, x], [x]] # shape = [2, None] + >>> StructuredTensor.from_pyval(s3).row_partitions + (tf.RowPartition(row_splits=tf.Tensor([0 3 3 7 8], shape=(5,), + dtype=int64)),) + + >>> s4 = [[[x, x], [x, x]], [[x, x], [x, x]]] # shape = [2, 2, 2] + >>> StructuredTensor.from_pyval(s4).row_partitions + (tf.RowPartition(row_splits=tf.Tensor([0 2 4], shape=(3,), dtype=int64)), + tf.RowPartition(row_splits=tf.Tensor([0 2 4 6 8], shape=(5,), + dtype=int64))) + + + >>> s5 = [[[x, x], [x]], [[x, x]], [[x, x], [x]]] # shape = [3, None, None] + >>> StructuredTensor.from_pyval(s5).row_partitions + (tf.RowPartition(row_splits=tf.Tensor([0 2 3 5], shape=(4,), dtype=int64)), + tf.RowPartition(row_splits=tf.Tensor([0 2 3 5 7 8], shape=(6,), + dtype=int64))) + + Note that shapes for nested fields (such as `x['b']` in the above example) + are not considered part of the shape of a `StructuredTensor`, and are not + included in `row_partitions`. + + If this `StructuredTensor` has a ragged shape (i.e., if any of the + `row_partitions` is not uniform in size), then all fields will be encoded + as either `RaggedTensor`s or `StructuredTensor`s with these `RowPartition`s + used to define their outermost `self.rank` dimensions. Returns: A `tuple` of `RowPartition` objects with length `self.rank - 1` - (or `0` if `self.rank < 2`). + (or `0` if `self.rank < 2`) + """ return self._row_partitions def nrows(self): """The number of rows in this StructuredTensor (if rank>0). + This means the length of the outer-most dimension of the StructuredTensor. + + Notice that if `self.rank > 1`, then this equals the number of rows + of the first row partition. That is, + `self.nrows() == self.row_partitions[0].nrows()`. + + Otherwise `self.nrows()` will be the first dimension of the field values. + Returns: A scalar integer `Tensor` (or `None` if `self.rank == 0`). """ @@ -1175,3 +1420,13 @@ def _merge_dims(value, outer_axis, inner_axis): _structured_tensor_factory_key = object() # unique private object + + +def _normalize_field_name_to_tuple(name: 'FieldName') -> Sequence[str]: + """FieldName can be given also as string, this normalizes it to a tuple.""" + if isinstance(name, str): + return (name,) + if isinstance(name, list): + return tuple(name) + assert isinstance(name, tuple) + return name diff --git a/tensorflow/python/ops/structured/structured_tensor_test.py b/tensorflow/python/ops/structured/structured_tensor_test.py index 75aa5a872a6..f4218042cc2 100644 --- a/tensorflow/python/ops/structured/structured_tensor_test.py +++ b/tensorflow/python/ops/structured/structured_tensor_test.py @@ -924,7 +924,7 @@ class StructuredTensorTest(test_util.TensorFlowTestCase, st = StructuredTensor.from_pyval({"a": 5, "b": {"c": [1, 2, 3]}}) self.assertAllEqual(st.field_value(("a",)), 5) self.assertAllEqual(st.field_value(("b", "c")), [1, 2, 3]) - expected = "Field path \(.*a.*,.*b.*\) not found in .*" + expected = r"Field path \(.*a.*,.*b.*\) not found in .*" with self.assertRaisesRegex(KeyError, expected): st.field_value(("a", "b")) @@ -961,6 +961,179 @@ class StructuredTensorTest(test_util.TensorFlowTestCase, r = result.field_value("r") self.assertAllEqual(r, [[[1, 2], [3, 4]]]) + @parameterized.parameters([ + # Simple example. + ( + {"a": 12, "b": 23}, + {"a": 7}, + ), + # New field. + ( + {"a": 12}, + {("b",): 13}, + ), + # Nested example. + ( + {"a": 12, "b": {"c": 23}}, + {("b", "c"): 7}, + ), + # Multipe updates. + ( + {"a": 12, "b": {"c": 23}}, + {"a": 3, ("b", "c"): 7}, + ), + # Deep updates. + ( + {"a": 12, "b": {"c": 23, "d": {"e": 11}}}, + {("b", "c"): 7, ("b", "d", "e"): 13}, + ), + # Multiple updates to the same substructure. + ( + {"a": 12, "b": {"c": 23, "d": {"e": 11}}}, + {("b", "c"): 7, ("b", "f"): 13}, + ), + # Scalar to non-scalar elements. Shape remains unchanged. + ( + {"a": 5}, + {"a": ragged_factory_ops.constant_value([[51, 52], [61, 62, 63]])}, + ), + # Non-scalar element to scalar. + ( + {"c": {"a": [5, 3], "b": 2}}, + {("c", "a"): 5}, + ), + # Rank-1 StructuredTensor: shape is preserved and an item is added. + ( + [{"a": 5}, {"a": 6}], + {"a": [15, 16], "b": np.array([0.9, 1.1])}, + ), + # Non-scalar ragged elements, within a rank-2 StructuredTensor: elements + # rows (inner dimensions) are changed, but StructuredTensor shape + # (outer dimensions) are preserved. + ( + [[{"a": [5]}], [{"a": [3, 4]}, {"a": [8]}]], + {"a": ragged_factory_ops.constant_value([[[50, 60]], [[30], []]])}, + ), + ]) # pyformat: disable + def testWithUpdatesValues(self, pyval, updates): + st = StructuredTensor.from_pyval(pyval) + updated_st = st.with_updates(updates, validate=False) + for key, value in updates.items(): + got = updated_st.field_value(key) + self.assertAllEqual( + value, got, "Update failed: key={}, value={}, got={}".format( + key, value, got)) + + def testWithUpdatesFunctions(self): + pyval = {"a": 12, "b": {"c": 23, "d": {"e": 11}}} + st = StructuredTensor.from_pyval(pyval) + st_updated = st.with_updates( + { + "a": lambda x: x + 1, + ("b", "d", "e"): lambda x: x + 7 + }, validate=True) + # Updated values. + self.assertAllEqual(st_updated.field_value("a"), 13) + self.assertAllEqual(st_updated.field_value(("b", "d", "e")), 18) + # Unchanged value. + self.assertAllEqual(st_updated.field_value(("b", "c")), 23) + + def testWithUpdatesChecks(self): + pyval = {"a": 12, "b": {"c": 23, "d": {"e": 11}}} + st = StructuredTensor.from_pyval(pyval) + + # Try to set non-existant sub-structure. + with self.assertRaisesRegex( + ValueError, r"cannot create new sub-field.*\('b', 'x'\).*is not set"): + st.with_updates({("b", "x", "e"): 5}) + + # Try to set with path to a non-sub-structure. + with self.assertRaisesRegex( + ValueError, r"cannot create new sub-field.*\('b', 'c'\).*is not a " + r"`StructuredTensor`"): + st.with_updates({("b", "c", "e"): 5}) + + # Try to apply function to non-existing value. + with self.assertRaisesRegex( + ValueError, r"cannot update.*\('b', 'd', 'x'\).*does not already " + r"exist"): + st.with_updates({("b", "d", "x"): lambda x: x + 1}) + + # Empty names not allowed. + with self.assertRaisesRegex(ValueError, r"does not allow empty names"): + st.with_updates({(): lambda x: x + 1}) + with self.assertRaisesRegex(ValueError, r"does not allow empty names"): + st.with_updates({("b", ""): lambda x: x + 1}) + + # Parent and child nodes cannot be updated simultaneously. + with self.assertRaisesRegex( + ValueError, r"does not allow both parent and child nodes.*" + r"parent=\('b'.*child=\('b', 'd'"): + st.with_updates({("b", "d"): lambda x: x + 1, "a": 3, "b": 10}) + + # Invalid shape change. + with self.assertRaisesRegex( + ValueError, r"\('c'.*incompatible with the shape that was specified"): + st_with_shape = StructuredTensor.from_pyval([[{ + "c": { + "a": 5, + "b": 2 + } + }], [{ + "c": { + "a": 3, + "b": 1 + } + }, { + "c": { + "a": 8, + "b": 18 + } + }]]) + st_with_shape.with_updates({("c", "a"): 3}) + + def testWithUpdatesDelete(self): + pyval = {"a": 12, "b": {"c": 23, "d": {"e": 11}}} + st = StructuredTensor.from_pyval(pyval) + updated_st = st.with_updates({("b", "c"): None}, validate=True) + self.assertNotIn("c", updated_st.field_value("b").field_names()) + with self.assertRaisesRegex(ValueError, + r"cannot delete.*\('b', 'x'\).*not present"): + st.with_updates({("b", "x"): None}, validate=True) + with self.assertRaisesRegex(ValueError, + r"cannot delete.*\'x'.*not present"): + st.with_updates({"x": None}, validate=False) + + # Test that nrows() and rowpartitions() is preserved after removal. + pyval = [[{"a": 1}, {"a": 2}], [{"a": 3}]] + st = StructuredTensor.from_pyval(pyval) + self.assertLen(st.row_partitions, 1) + self.assertAllEqual(st.nrows(), 2) + self.assertAllEqual(st.row_partitions[0].row_lengths(), [2, 1]) + updated_st = st.with_updates({("a",): None}, validate=True) + self.assertLen(updated_st.row_partitions, 1) + self.assertAllEqual(updated_st.nrows(), 2) + self.assertAllEqual(updated_st.row_partitions[0].row_lengths(), [2, 1]) + + # Test that it works also for rank-1 and rank-0 empty results. + pyval = [{"a": 1}, {"a": 2}] + st = StructuredTensor.from_pyval(pyval) + self.assertEqual(st.rank, 1) + updated_st = st.with_updates({("a",): None}, validate=True) + self.assertEqual(updated_st.rank, 1) + + # assertEqual won't work because nrows() returns a tensor, and + # assertEqual doesn't do the magic to convert them to numbers in a + # way that works in eager/non-eager mode. + self.assertAllEqual(updated_st.nrows(), 2) + pyval = {"a": [0, 1]} + st = StructuredTensor.from_pyval(pyval) + self.assertEqual(st.rank, 0) + updated_st = st.with_updates({("a",): None}, validate=True) + self.assertEqual(updated_st.rank, 0) + self.assertFalse(updated_st.row_partitions) + self.assertIsNone(updated_st.nrows()) + if __name__ == "__main__": googletest.main() From cdfd1114fd309abc86f4c2b5507abdd3578457fb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 8 Aug 2020 02:01:43 -0700 Subject: [PATCH 2396/2522] compat: Update forward compatibility horizon to 2020-08-08 PiperOrigin-RevId: 325579714 Change-Id: Ia49ab74cdbebbc59dc4f919fb2b6a68c3507cc42 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index bef47619972..01ea900fd11 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 7) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 8) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 1b8087726a5667d9e88e41dda310f25e1593b0f2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 8 Aug 2020 02:01:44 -0700 Subject: [PATCH 2397/2522] Update GraphDef version to 487. PiperOrigin-RevId: 325579715 Change-Id: Ie4b2d7cdd7672486494b057c5a1cdd35ab2b6f81 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index ee9be29958d..3512cb4c5b9 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 486 // Updated: 2020/8/7 +#define TF_GRAPH_DEF_VERSION 487 // Updated: 2020/8/8 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From c1a176930a9d5895bdd87b91ba0dde53a0aa1a35 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 8 Aug 2020 04:34:20 -0700 Subject: [PATCH 2398/2522] Integrate LLVM at llvm/llvm-project@b6d9add71b1a Updates LLVM usage to match [b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a) PiperOrigin-RevId: 325589103 Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72 --- .../hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h | 8 +++++++- .../mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc | 3 +-- .../mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc | 2 +- .../mlir/hlo/lib/Dialect/mhlo/IR/lhlo_ops.cc | 2 +- tensorflow/compiler/mlir/lite/ir/tfl_ops.cc | 2 +- tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc | 3 ++- .../compiler/mlir/tensorflow/ir/tf_executor.cc | 3 ++- tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc | 2 +- .../compiler/mlir/tensorflow/ir/tf_saved_model.cc | 3 ++- tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.cc | 3 +-- .../compiler/mlir/tools/kernel_gen/cubin_creator.cc | 3 ++- .../mlir/tools/kernel_gen/ir/tf_framework_ops.cc | 3 +-- tensorflow/compiler/xla/service/cpu/cpu_compiler.cc | 13 +++++-------- tensorflow/compiler/xla/service/cpu/mlir_emitter.cc | 8 +++++--- .../compiler/xla/service/gpu/ir/xla_thunks_ops.cc | 2 +- tensorflow/compiler/xla/service/mlir_gpu/BUILD | 1 + .../xla/service/mlir_gpu/lhlo_dialect_emitter.cc | 2 +- .../compiler/xla/service/mlir_gpu/mlir_compiler.cc | 10 +++------- .../xla/service/mlir_gpu/mlir_compiler_impl.cc | 7 ++++++- tensorflow/workspace.bzl | 4 ++-- third_party/llvm/llvm.autogenerated.BUILD | 3 +++ 21 files changed, 49 insertions(+), 38 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h index 14a22e92a74..4c92ef3de85 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h @@ -32,8 +32,14 @@ namespace mlir { namespace chlo { class HloClientDialect : public Dialect { + void initialize(); + public: - explicit HloClientDialect(MLIRContext *context); + explicit HloClientDialect(MLIRContext *context) + : Dialect(getDialectNamespace(), context, + TypeID::get()) { + initialize(); + } static StringRef getDialectNamespace() { return "chlo"; } }; diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc index 81389c3be89..d43dd71e94b 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc @@ -266,8 +266,7 @@ BROADCAST_BINARY_OP_DEFS(BroadcastXorOp); // chlo Dialect Constructor //===----------------------------------------------------------------------===// -HloClientDialect::HloClientDialect(MLIRContext* context) - : Dialect(getDialectNamespace(), context) { +void HloClientDialect::initialize() { addOperations< #define GET_OP_LIST #include "mlir-hlo/Dialect/mhlo/IR/chlo_ops.cc.inc" diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc index eda10b0f187..f5deb94e3a4 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc @@ -2188,7 +2188,7 @@ struct HLOInlinerInterface : public DialectInlinerInterface { //===----------------------------------------------------------------------===// MhloDialect::MhloDialect(MLIRContext* context) - : Dialect(getDialectNamespace(), context) { + : Dialect(getDialectNamespace(), context, TypeID::get()) { addOperations< #define GET_OP_LIST #include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.cc.inc" diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/lhlo_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/lhlo_ops.cc index bbb463cd1a9..f61a66397e7 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/lhlo_ops.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/lhlo_ops.cc @@ -49,7 +49,7 @@ namespace mlir { namespace lmhlo { LmhloDialect::LmhloDialect(MLIRContext *context) - : Dialect(getDialectNamespace(), context) { + : Dialect(getDialectNamespace(), context, TypeID::get()) { addOperations< #define GET_OP_LIST #include "mlir-hlo/Dialect/mhlo/IR/lhlo_ops.cc.inc" diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc index ae1e3ebe5e6..b5fcd5e82e2 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc @@ -269,7 +269,7 @@ struct TensorFlowLiteOpFolderDialectInterface }; TensorFlowLiteDialect::TensorFlowLiteDialect(mlir::MLIRContext *context) - : Dialect(/*name=*/"tfl", context) { + : Dialect(/*name=*/"tfl", context, TypeID::get()) { addOperations< #define GET_OP_LIST #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.cc.inc" diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc index 9aa0a72f475..5345000b4bd 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc @@ -101,7 +101,8 @@ bool BlockWrapsSingleOp(Block* block) { } // end anonymous namespace TensorFlowDeviceDialect::TensorFlowDeviceDialect(MLIRContext* context) - : Dialect(/*name=*/"tf_device", context) { + : Dialect(/*name=*/"tf_device", context, + TypeID::get()) { addOperations< #define GET_OP_LIST #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc.inc" diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc index c18723b0982..9c2968fab37 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc @@ -92,7 +92,8 @@ struct TensorFlowExecutorOpFolderDialectInterface } // namespace TensorFlowExecutorDialect::TensorFlowExecutorDialect(MLIRContext *context) - : Dialect(/*name=*/"tf_executor", context) { + : Dialect(/*name=*/"tf_executor", context, + TypeID::get()) { addOperations< #define GET_OP_LIST #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc.inc" diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc index abff4c21cf1..dbad613d909 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc @@ -188,7 +188,7 @@ std::vector new std::vector(); TensorFlowDialect::TensorFlowDialect(MLIRContext *context) - : Dialect(/*name=*/"tf", context) { + : Dialect(/*name=*/"tf", context, TypeID::get()) { addOperations< #define GET_OP_LIST #include "tensorflow/compiler/mlir/tensorflow/ir/tf_all_ops.cc.inc" diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc index 94a792ec3db..6883d0358ec 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc @@ -113,7 +113,8 @@ static LogicalResult Verify(SessionInitializerOp session_initializer) { //===----------------------------------------------------------------------===// TensorFlowSavedModelDialect::TensorFlowSavedModelDialect(MLIRContext *context) - : Dialect(/*name=*/"tf_saved_model", context) { + : Dialect(/*name=*/"tf_saved_model", context, + TypeID::get()) { addOperations< #define GET_OP_LIST #include "tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc.inc" diff --git a/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.cc b/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.cc index 9ba875cdce4..331bed09dce 100644 --- a/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.cc +++ b/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.cc @@ -25,8 +25,7 @@ namespace tfjs { // TFJSDialect //===----------------------------------------------------------------------===// -TFJSDialect::TFJSDialect(MLIRContext *context) - : Dialect(getDialectNamespace(), context) { +void TFJSDialect::initialize() { addOperations< #define GET_OP_LIST #include "tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.cc.inc" diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc index 1f511e27d9e..82b0e613f90 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/cubin_creator.cc @@ -278,7 +278,8 @@ StatusOr> tensorflow::kernel_gen::GenerateCubinForTfCode( mlir::OwningModuleRef kernel_module = xla::mlir_gpu::ExtractKernelModule(*module).ValueOrDie(); - auto llvmModule = mlir::translateModuleToNVVMIR(*kernel_module); + llvm::LLVMContext llvmContext; + auto llvmModule = mlir::translateModuleToNVVMIR(*kernel_module, llvmContext); if (!llvmModule) { return InternalError("Could not translate MLIR module to NVVM"); } diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc index f85f1229fe8..5b7a19a3eac 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc @@ -24,8 +24,7 @@ namespace mlir { namespace kernel_gen { namespace tf_framework { -TFFrameworkDialect::TFFrameworkDialect(MLIRContext *context) - : Dialect(getDialectNamespace(), context) { +void TFFrameworkDialect::initialize() { addOperations< #define GET_OP_LIST #include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc.inc" diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index eb5d9e704f5..aab13f6e8dd 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -622,10 +622,9 @@ StatusOr> CpuCompiler::RunBackend( // Compile must be thread-safe so create a new LLVM context for the module. mlir::MLIRContext mlir_context; - auto llvm_module = absl::make_unique( - "__compute_module", - mlir_context.getRegisteredDialect() - ->getLLVMContext()); + llvm::LLVMContext llvm_context; + auto llvm_module = + absl::make_unique("__compute_module", llvm_context); auto jit = absl::make_unique( CompilerTargetOptions(module->config()), @@ -834,10 +833,8 @@ CpuCompiler::CompileAheadOfTime(std::unique_ptr module_group, // Compile must be thread-safe so create a new LLVM context for the module. mlir::MLIRContext mlir_context; - llvm::Module llvm_module( - "__compute_module", - mlir_context.getRegisteredDialect() - ->getLLVMContext()); + llvm::LLVMContext llvm_context; + llvm::Module llvm_module("__compute_module", llvm_context); llvm_module.setDataLayout(target_machine->createDataLayout()); llvm_module.setTargetTriple(triple.getTriple()); if (pic_level != llvm::PICLevel::NotPIC) { diff --git a/tensorflow/compiler/xla/service/cpu/mlir_emitter.cc b/tensorflow/compiler/xla/service/cpu/mlir_emitter.cc index ff48f554ce6..ae23f224207 100644 --- a/tensorflow/compiler/xla/service/cpu/mlir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/mlir_emitter.cc @@ -32,7 +32,8 @@ namespace cpu { namespace { // Lower an MLIR module to an LLVM module. -std::unique_ptr MakeLLVMModule(mlir::OwningModuleRef module) { +std::unique_ptr MakeLLVMModule(mlir::OwningModuleRef module, + llvm::LLVMContext *context) { // When set, the LLVM backend will be allowed to reassociate floating-point // reductions, which enables much more efficient "horizontal" SIMD // implementations. @@ -47,7 +48,7 @@ std::unique_ptr MakeLLVMModule(mlir::OwningModuleRef module) { mlir::LowerVectorToLLVMOptions().setReassociateFPReductions( kReassociateFPReductions))); CHECK(succeeded(manager.run(*module))); - return mlir::translateModuleToLLVMIR(*module); + return mlir::translateModuleToLLVMIR(*module, *context); } // Get arguments to pass a memref to an mlir function. @@ -114,7 +115,8 @@ Status EmitMlirFuncAndCall( emitter(&op_builder, function); // Now link it all into the main LLVM module. - auto mlir_llvm_module = MakeLLVMModule(std::move(mlir_module)); + auto mlir_llvm_module = + MakeLLVMModule(std::move(mlir_module), &b->getContext()); mlir_llvm_module->setDataLayout(llvm_module->getDataLayout()); llvm::Linker::linkModules( *llvm_module, std::move(mlir_llvm_module), llvm::Linker::None, diff --git a/tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.cc b/tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.cc index 4dbd3196ae6..154612824ef 100644 --- a/tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.cc +++ b/tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.cc @@ -28,7 +28,7 @@ namespace mlir { namespace xla_thunks { XLAThunksDialect::XLAThunksDialect(MLIRContext *context) - : Dialect(getDialectNamespace(), context) { + : Dialect(getDialectNamespace(), context, TypeID::get()) { addOperations< #define GET_OP_LIST #include "tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.cc.inc" diff --git a/tensorflow/compiler/xla/service/mlir_gpu/BUILD b/tensorflow/compiler/xla/service/mlir_gpu/BUILD index 2bcf5fa7dae..43a6efe9e90 100644 --- a/tensorflow/compiler/xla/service/mlir_gpu/BUILD +++ b/tensorflow/compiler/xla/service/mlir_gpu/BUILD @@ -82,6 +82,7 @@ cc_library( ":kernel_lowering", ":lhlo_dialect_emitter", "@com_google_absl//absl/container:flat_hash_map", + "@llvm-project//llvm:Core", "@llvm-project//mlir:GPUDialect", "@llvm-project//mlir:AllPassesAndDialects", "@llvm-project//mlir:IR", diff --git a/tensorflow/compiler/xla/service/mlir_gpu/lhlo_dialect_emitter.cc b/tensorflow/compiler/xla/service/mlir_gpu/lhlo_dialect_emitter.cc index 194eb4618d3..e0d7456fbb8 100644 --- a/tensorflow/compiler/xla/service/mlir_gpu/lhlo_dialect_emitter.cc +++ b/tensorflow/compiler/xla/service/mlir_gpu/lhlo_dialect_emitter.cc @@ -205,7 +205,7 @@ LhloDialectEmitter::LhloDialectEmitter( platform_(platform) { LLVMDialect* llvmDialect = mlir_module.getContext()->getRegisteredDialect(); - pointer_size_ = llvmDialect->getLLVMModule().getDataLayout().getPointerSize(); + pointer_size_ = llvmDialect->getDataLayout().getPointerSize(); } void LhloDialectEmitter::AddThunkToThunkSequence(std::unique_ptr thunk) { diff --git a/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc b/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc index 458522f89e6..df2bd2e4c23 100644 --- a/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc +++ b/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc @@ -30,18 +30,14 @@ namespace { using ::mlir::MLIRContext; using ::mlir::LLVM::LLVMDialect; -int64 ConfigureLLVMModuleAndGetPointerSize(MLIRContext* context) { +int64 GetPointerSize(MLIRContext* context) { LLVMDialect* dialect = context->getRegisteredDialect(); - llvm::Module& module = dialect->getLLVMModule(); - module.setTargetTriple(gpu::nvptx::kTargetTriple); - module.setDataLayout(gpu::nvptx::kDataLayout); - return module.getDataLayout().getPointerSize(); + return dialect->getDataLayout().getPointerSize(); } } // namespace -MlirCompiler::MlirCompiler() - : pointer_size_(ConfigureLLVMModuleAndGetPointerSize(&context_)) {} +MlirCompiler::MlirCompiler() : pointer_size_(GetPointerSize(&context_)) {} se::Platform::Id MlirCompiler::PlatformId() const { return stream_executor::cuda::kCudaPlatformId; diff --git a/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler_impl.cc b/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler_impl.cc index 25a35a89cb4..4879c6b5099 100644 --- a/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler_impl.cc +++ b/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler_impl.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include "absl/container/flat_hash_map.h" +#include "llvm/IR/LLVMContext.h" #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" // from @llvm-project #include "mlir/Dialect/GPU/GPUDialect.h" // from @llvm-project #include "mlir/Dialect/LLVMIR/LLVMDialect.h" // from @llvm-project @@ -543,7 +544,11 @@ StatusOr> MlirCompilerImpl::RunBackend( TF_RETURN_IF_ERROR( module_hook_.invoke(IRHook::LoweringStage::KERNEL, *kernel_module)); - auto llvmModule = mlir::translateModuleToNVVMIR(*kernel_module); + // Translate to LLVM IR in a fresh context. The module is further translated + // to textual PTX and a CUBIN blob so there is no need for the context to live + // longer than this function. + llvm::LLVMContext llvmContext; + auto llvmModule = mlir::translateModuleToNVVMIR(*kernel_module, llvmContext); if (!llvmModule) { return InternalError("Translation to LLVM failed"); diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 07b9950bca2..0b4898b2c35 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -712,8 +712,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "9dbdaea9a0e6f58417b5bd8980e7ea6723fd1783" - LLVM_SHA256 = "1ae491e33bb35777cf5f38acd183ce3ca2aff255c15254ae97084bcbd2e4aa56" + LLVM_COMMIT = "b6d9add71b1a7bc77ce504ed09a43288ca67c0cd" + LLVM_SHA256 = "60160d35f22445819da148af4ac1119d9275f7c363a841748593e276c284fa20" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), diff --git a/third_party/llvm/llvm.autogenerated.BUILD b/third_party/llvm/llvm.autogenerated.BUILD index befc20c4fab..e3ae2c9e889 100644 --- a/third_party/llvm/llvm.autogenerated.BUILD +++ b/third_party/llvm/llvm.autogenerated.BUILD @@ -1556,7 +1556,9 @@ cc_library( ":BPFInfo", ":CodeGen", ":Core", + ":IPO", ":MC", + ":Scalar", ":SelectionDAG", ":Support", ":Target", @@ -1763,6 +1765,7 @@ cc_library( ":Core", ":Instrumentation", ":MC", + ":Passes", ":ProfileData", ":Scalar", ":Support", From 24bbe30d1ee97f7ae051b5c52d70c2a2b94a1bb4 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 8 Aug 2020 05:03:25 -0700 Subject: [PATCH 2399/2522] Integrate LLVM at llvm/llvm-project@38537307e502 Updates LLVM usage to match [38537307e502](https://github.com/llvm/llvm-project/commit/38537307e502) PiperOrigin-RevId: 325590988 Change-Id: I8ee6b700411d243d6df8e044a54b72be8323a6c4 --- tensorflow/workspace.bzl | 4 ++-- third_party/llvm/llvm.autogenerated.BUILD | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 0b4898b2c35..a726bf642d1 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -712,8 +712,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "b6d9add71b1a7bc77ce504ed09a43288ca67c0cd" - LLVM_SHA256 = "60160d35f22445819da148af4ac1119d9275f7c363a841748593e276c284fa20" + LLVM_COMMIT = "38537307e502c1ac9a09e6f75f9208db1327a0bf" + LLVM_SHA256 = "c801bf0f2ebfce86dbf7ad39c40ee371f422e8d07213f4ca67e5e46c7cb200ed" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), diff --git a/third_party/llvm/llvm.autogenerated.BUILD b/third_party/llvm/llvm.autogenerated.BUILD index e3ae2c9e889..13bc7bf2902 100644 --- a/third_party/llvm/llvm.autogenerated.BUILD +++ b/third_party/llvm/llvm.autogenerated.BUILD @@ -1765,7 +1765,6 @@ cc_library( ":Core", ":Instrumentation", ":MC", - ":Passes", ":ProfileData", ":Scalar", ":Support", From 11305e9e40f3a992fe4f9edc8f1acfcc024e4ca1 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Sat, 8 Aug 2020 07:43:57 -0700 Subject: [PATCH 2400/2522] Removes most run_deprecated_v1 and run_v1_only from scatter_nd_ops_test. The others will be removed in another cl PiperOrigin-RevId: 325599122 Change-Id: Ic0d8827514b2feb79abf7b05cdbecfa901cad7cb --- tensorflow/python/framework/test_util.py | 4 +- .../kernel_tests/scatter_nd_ops_test.py | 331 +++++++++--------- .../python/kernel_tests/v1_compat_tests/BUILD | 12 + .../v1_compat_tests/scatter_nd_ops_test.py | 159 +++++++++ 4 files changed, 330 insertions(+), 176 deletions(-) create mode 100644 tensorflow/python/kernel_tests/v1_compat_tests/scatter_nd_ops_test.py diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 15f4507b5e2..4d7b7746b9c 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -3295,8 +3295,8 @@ def _fake_gradient_tape_context_manager(): def watch(self, x): pass - def gradient(self, y, x): - result = gradients_impl.gradients(y, x) + def gradient(self, y, x, grad_ys=None): + result = gradients_impl.gradients(y, x, grad_ys) # Unlike `tape.gradient()`, `tf.gradients()` returns a list for a single # element. So unpack if needed to match `tape.gradient()` behavior. diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py index d6768712d65..c5e5e549ee7 100644 --- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py @@ -20,18 +20,18 @@ from __future__ import print_function import functools +from absl.testing import parameterized import numpy as np -from tensorflow.python.client import session from tensorflow.python.eager import context from tensorflow.python.eager import def_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gradient_checker -from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import gradient_checker_v2 from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variables @@ -115,7 +115,7 @@ class StatefulScatterNdTest(test.TestCase): np.random.seed(8) ref_shapes = [(3, 6), (3, 6), (3, 6, 9), (3, 6, 9), (3, 6, 9), (3, 6, 9)] indices_shapes = [(2,), (2, 2), (2,), (2, 2), (2, 3), (2, 3, 3)] - with self.cached_session(use_gpu=True): + with test_util.device(use_gpu=True): for ref_shape, indices_shape in zip(ref_shapes, indices_shapes): num_updates = indices_shape[0] ixdim = indices_shape[-1] @@ -151,7 +151,7 @@ class StatefulScatterNdTest(test.TestCase): # Scatter via tensorflow ref_var = variables.VariableV1(ref) self.evaluate(ref_var.initializer) - tf_scatter(ref_var, indices, updates).eval() + self.evaluate(tf_scatter(ref_var, indices, updates)) # Compare self.assertAllClose(new, self.evaluate(ref_var)) @@ -187,7 +187,6 @@ class StatefulScatterNdTest(test.TestCase): self.assertAllEqual(self.evaluate(update), [b"qq", b"cc", b"ee", b"dd", b"aa", b"", b"", b"bb"]) - @test_util.run_deprecated_v1 def testSimpleResource(self): indices = constant_op.constant([[4], [3], [1], [7]], dtype=dtypes.int32) updates = constant_op.constant([9, 10, 11, 12], dtype=dtypes.float32) @@ -195,10 +194,9 @@ class StatefulScatterNdTest(test.TestCase): [0, 0, 0, 0, 0, 0, 0, 0], dtype=dtypes.float32) expected = np.array([0, 11, 0, 10, 9, 0, 0, 12]) scatter = state_ops.scatter_nd_update(ref, indices, updates) - init = variables.global_variables_initializer() - with self.session(use_gpu=True) as sess: - self.evaluate(init) + with test_util.device(use_gpu=True): + self.evaluate(ref.initializer) self.evaluate(scatter) self.assertAllClose(ref, expected) @@ -230,15 +228,12 @@ class StatefulScatterNdTest(test.TestCase): result = self.evaluate(scatter) self.assertAllClose(result, expected) - @test_util.run_deprecated_v1 def testVariableRankUpdate(self): self._VariableRankTests(_NumpyUpdate, state_ops.scatter_nd_update) - @test_util.run_deprecated_v1 def testVariableRankAdd(self): self._VariableRankTests(_NumpyAdd, state_ops.scatter_nd_add) - @test_util.run_deprecated_v1 def testVariableRankSub(self): self._VariableRankTests(_NumpySub, state_ops.scatter_nd_sub) @@ -256,13 +251,10 @@ class StatefulScatterNdTest(test.TestCase): self._VariableRankTest( np_scatter, tf_scatter, vtype, itype, repeat_indices=True) - @test_util.run_v1_only("b/120545219") def testScatterRepeatIndices(self): """This tests scatter_add using indices that repeat.""" self._ScatterRepeatIndicesTest(_NumpyAdd, state_ops.scatter_nd_add) self._ScatterRepeatIndicesTest(_NumpySub, state_ops.scatter_nd_sub) - self._ScatterRepeatIndicesTest(_NumpyMin, state_ops.scatter_nd_min) - self._ScatterRepeatIndicesTest(_NumpyMax, state_ops.scatter_nd_max) # TODO(ebrevdo): Re-enable when we need ScatterNdMul and ScatterNdDiv. # self._ScatterRepeatIndicesTest(_NumpyMul, state_ops.scatter_nd_mul) # self._ScatterRepeatIndicesTest(_NumpyDiv, state_ops.scatter_nd_div) @@ -280,34 +272,32 @@ class StatefulScatterNdTest(test.TestCase): # session.run([update0, update1]) # self.assertAllEqual([False, True], self.evaluate(var)) - @test_util.run_v1_only("b/120545219") def testScatterOutOfRangeCpu(self): # TODO(simister): Re-enable once binary size increase due to # scatter_nd ops is under control. # tf.scatter_nd_mul, tf.scatter_nd_div, for op in (state_ops.scatter_nd_add, state_ops.scatter_nd_sub, - state_ops.scatter_nd_min, state_ops.scatter_nd_max, state_ops.scatter_nd_update): params = np.array([1, 2, 3, 4, 5, 6]).astype(np.float32) updates = np.array([-3, -4, -5]).astype(np.float32) - with self.cached_session(use_gpu=False): + with test_util.device(use_gpu=False): ref = variables.VariableV1(params) self.evaluate(ref.initializer) # Indices all in range, no problem. indices = np.array([[2], [0], [5]]) - op(ref, indices, updates).eval() + self.evaluate(op(ref, indices, updates)) # Test some out of range errors. indices = np.array([[-1], [0], [5]]) with self.assertRaisesOpError( r"indices\[0\] = \[-1\] does not index into shape \[6\]"): - op(ref, indices, updates).eval() + self.evaluate(op(ref, indices, updates)) indices = np.array([[2], [0], [6]]) with self.assertRaisesOpError( r"indices\[2\] = \[6\] does not index into shape \[6\]"): - op(ref, indices, updates).eval() + self.evaluate(op(ref, indices, updates)) def testRank3ValidShape(self): indices = array_ops.zeros([2, 2, 2], dtypes.int32) @@ -318,7 +308,6 @@ class StatefulScatterNdTest(test.TestCase): state_ops.scatter_nd_update(ref, indices, updates).get_shape().as_list(), shape) - @test_util.run_v1_only("b/120545219") @test_util.disable_xla("b/123337890") # Error messages differ def testResVarInvalidOutputShape(self): res = variables.Variable( @@ -329,7 +318,6 @@ class StatefulScatterNdTest(test.TestCase): with self.assertRaisesOpError("Output must be at least 1-D"): state_ops.scatter_nd_update(res, [[0]], [0.22]).eval() - @test_util.run_deprecated_v1 def testExtraIndicesDimensions(self): indices = array_ops.zeros([1, 1, 2], dtypes.int32) updates = array_ops.zeros([1, 1], dtypes.int32) @@ -363,7 +351,6 @@ class StatefulScatterNdTest(test.TestCase): ValueError, r"The inner \d+ dimensions of input\.shape="): state_ops.scatter_nd_update(ref, indices, updates) - @test_util.run_deprecated_v1 def testConcurrentUpdates(self): num_updates = 10000 update_values = np.random.rand(num_updates) @@ -377,10 +364,9 @@ class StatefulScatterNdTest(test.TestCase): scatter = state_ops.scatter_nd_add(ref, indices, updates) init = variables.global_variables_initializer() - with session.Session() as sess: - self.evaluate(init) - result = self.evaluate(scatter) - assert np.allclose(result, expected_result) + self.evaluate(init) + result = self.evaluate(scatter) + assert np.allclose(result, expected_result) # TODO(fpmc): Re-enable this test when gpu_pip test actually runs on a GPU. def _disabledTestScatterOutOfRangeGpu(self): @@ -410,7 +396,7 @@ class StatefulScatterNdTest(test.TestCase): op(ref, indices, updates).eval() -class ScatterNdTest(test.TestCase): +class ScatterNdTest(test.TestCase, parameterized.TestCase): non_aliasing_add_test = False def scatter_nd(self, indices, updates, shape, input_=None): @@ -492,7 +478,6 @@ class ScatterNdTest(test.TestCase): self.assertAllEqual( self.scatter_nd(indices, updates, shape).get_shape().as_list(), shape) - @test_util.run_deprecated_v1 def testExtraIndicesDimensions(self): indices = array_ops.zeros([1, 1, 2], dtypes.int32) updates = array_ops.zeros([1, 1], dtypes.int32) @@ -500,29 +485,31 @@ class ScatterNdTest(test.TestCase): scatter = self.scatter_nd(indices, updates, shape) self.assertAllEqual(scatter.get_shape().as_list(), shape) expected_result = np.zeros([2, 2], dtype=np.int32) - with self.cached_session(): - self.assertAllEqual(expected_result, self.evaluate(scatter)) + self.assertAllEqual(expected_result, self.evaluate(scatter)) - @test_util.run_deprecated_v1 def testUndefinedIndicesShape(self): - indices = array_ops.placeholder(dtypes.int32, shape=None) - updates = array_ops.placeholder(dtypes.int32, shape=[2, 2, 2]) - shape = constant_op.constant([2, 2, 2], dtypes.int32) - self.scatter_nd(indices, updates, shape) + # Placeholders are only valid in Graph. + with ops.Graph().as_default(): + indices = array_ops.placeholder(dtypes.int32, shape=None) + updates = array_ops.placeholder(dtypes.int32, shape=[2, 2, 2]) + shape = constant_op.constant([2, 2, 2], dtypes.int32) + self.scatter_nd(indices, updates, shape) - @test_util.run_deprecated_v1 def testUndefinedUpdatesShape(self): - indices = array_ops.placeholder(dtypes.int32, shape=[2, 2, 2]) - updates = array_ops.placeholder(dtypes.int32, shape=None) - shape = constant_op.constant([2, 2, 2], dtypes.int32) - self.scatter_nd(indices, updates, shape) + # Placeholders are only valid in Graph. + with ops.Graph().as_default(): + indices = array_ops.placeholder(dtypes.int32, shape=[2, 2, 2]) + updates = array_ops.placeholder(dtypes.int32, shape=None) + shape = constant_op.constant([2, 2, 2], dtypes.int32) + self.scatter_nd(indices, updates, shape) - @test_util.run_deprecated_v1 def testUndefinedOutputShape(self): - indices = array_ops.placeholder(dtypes.int32, shape=[2, 2, 2]) - updates = array_ops.placeholder(dtypes.int32, shape=[2, 2, 2]) - shape = array_ops.placeholder(dtypes.int32, shape=[None]) - self.scatter_nd(indices, updates, shape) + # Placeholders are only valid in Graph. + with ops.Graph().as_default(): + indices = array_ops.placeholder(dtypes.int32, shape=[2, 2, 2]) + updates = array_ops.placeholder(dtypes.int32, shape=[2, 2, 2]) + shape = array_ops.placeholder(dtypes.int32, shape=[None]) + self.scatter_nd(indices, updates, shape) @test_util.run_deprecated_v1 def testEmptyOutputShape1(self): @@ -534,21 +521,21 @@ class ScatterNdTest(test.TestCase): ValueError, "Indices and updates specified for empty output shape"): self.scatter_nd(indices, updates, shape) - @test_util.run_v1_only("b/120545219") def testEmptyOutputShape2(self): - indices = array_ops.placeholder(dtypes.int32, shape=None) - updates = array_ops.placeholder(dtypes.int32, shape=None) - shape = constant_op.constant([0, 3, 2], dtypes.int32) + with ops.Graph().as_default(): + indices = array_ops.placeholder(dtypes.int32, shape=None) + updates = array_ops.placeholder(dtypes.int32, shape=None) + shape = constant_op.constant([0, 3, 2], dtypes.int32) - with self.cached_session(): - with self.assertRaisesOpError( - "Indices and updates specified for empty output"): - self.scatter_nd(indices, updates, shape).eval(feed_dict={ - indices: np.zeros([2, 2, 2], dtype=np.int32), - updates: np.zeros([2, 2, 2], dtype=np.int32) - }) + with self.cached_session(): + with self.assertRaisesOpError( + "Indices and updates specified for empty output"): + self.scatter_nd(indices, updates, shape).eval( + feed_dict={ + indices: np.zeros([2, 2, 2], dtype=np.int32), + updates: np.zeros([2, 2, 2], dtype=np.int32) + }) - @test_util.run_deprecated_v1 def testEmptyOutputShape3(self): indices = array_ops.zeros([0], dtypes.int32) updates = array_ops.zeros([0], dtypes.int32) @@ -576,139 +563,138 @@ class ScatterNdTest(test.TestCase): ValueError, r"The inner \d+ dimensions of (input|output)\.shape="): self.scatter_nd(indices, updates, shape) - @test_util.run_deprecated_v1 - def testGradientsRank2ElementUpdate(self): + @parameterized.parameters(set((True, context.executing_eagerly()))) + def testGradientsRank2ElementUpdate(self, use_tape): for dtype in GRADIENT_TESTS_DTYPES: - indices = constant_op.constant([[0, 0], [1, 1]], dtype=dtypes.int32) - updates = constant_op.constant([1, 4], dtype=dtype) - shape = constant_op.constant([2, 2], dtype=dtypes.int32) - input_ = array_ops.zeros(shape, dtype=dtype) - outputs = self.scatter_nd(indices, updates, shape, input_) + with test_util.AbstractGradientTape(use_tape=use_tape) as tape: + indices = constant_op.constant([[0, 0], [1, 1]], dtype=dtypes.int32) + updates = constant_op.constant([1, 4], dtype=dtype) + tape.watch(updates) + shape = constant_op.constant([2, 2], dtype=dtypes.int32) + input_ = array_ops.zeros(shape, dtype=dtype) + tape.watch(input_) + outputs = self.scatter_nd(indices, updates, shape, input_) - grad_vals = constant_op.constant([[1, 2], [3, 4]], dtype=dtype) - updates_grad, input_grad = gradients_impl.gradients( - [outputs], [updates, input_], [grad_vals]) + grad_vals = constant_op.constant([[1, 2], [3, 4]], dtype=dtype) + + updates_grad, input_grad = tape.gradient([outputs], [updates, input_], + [grad_vals]) expected_updates_grad = np.array([1, 4], dtype=dtype.as_numpy_dtype()) expected_input_grad = np.array([[1, 2], [3, 4]], dtype=dtype.as_numpy_dtype()) - with self.cached_session(): - self.assertAllEqual(expected_updates_grad, self.evaluate(updates_grad)) - if self.non_aliasing_add_test: - self.assertAllEqual(expected_input_grad, self.evaluate(input_grad)) + self.assertAllEqual(expected_updates_grad, self.evaluate(updates_grad)) + if self.non_aliasing_add_test: + self.assertAllEqual(expected_input_grad, self.evaluate(input_grad)) - @test_util.run_deprecated_v1 - def testGradientsRank2SliceUpdate(self): + @parameterized.parameters(set((True, context.executing_eagerly()))) + def testGradientsRank2SliceUpdate(self, use_tape): for dtype in GRADIENT_TESTS_DTYPES: - indices = constant_op.constant([[1], [0]], dtype=dtypes.int32) - updates = constant_op.constant([[3, 4], [1, 2]], dtype=dtype) - shape = constant_op.constant([2, 2], dtype=dtypes.int32) - input_ = array_ops.zeros(shape, dtype=dtype) - outputs = self.scatter_nd(indices, updates, shape, input_) + with test_util.AbstractGradientTape(use_tape=use_tape) as tape: + indices = constant_op.constant([[1], [0]], dtype=dtypes.int32) + updates = constant_op.constant([[3, 4], [1, 2]], dtype=dtype) + tape.watch(updates) + shape = constant_op.constant([2, 2], dtype=dtypes.int32) + input_ = array_ops.zeros(shape, dtype=dtype) + tape.watch(input_) + outputs = self.scatter_nd(indices, updates, shape, input_) - grad_vals = constant_op.constant([[3, 4], [1, 2]], dtype=dtype) - updates_grad, input_grad = gradients_impl.gradients( - [outputs], [updates, input_], [grad_vals]) + grad_vals = constant_op.constant([[3, 4], [1, 2]], dtype=dtype) + updates_grad, input_grad = tape.gradient([outputs], [updates, input_], + [grad_vals]) expected_updates_grad = np.array([[1, 2], [3, 4]], dtype=dtype.as_numpy_dtype()) expected_input_grad = np.array([[3, 4], [1, 2]], dtype=dtype.as_numpy_dtype()) - with self.cached_session(): - self.assertAllEqual(expected_updates_grad, self.evaluate(updates_grad)) - if self.non_aliasing_add_test: - self.assertAllEqual(expected_input_grad, self.evaluate(input_grad)) + self.assertAllEqual(expected_updates_grad, self.evaluate(updates_grad)) + if self.non_aliasing_add_test: + self.assertAllEqual(expected_input_grad, self.evaluate(input_grad)) - @test_util.run_deprecated_v1 - def testGradientsRank3SliceUpdate(self): + @parameterized.parameters(set((True, context.executing_eagerly()))) + def testGradientsRank3SliceUpdate(self, use_tape): for dtype in GRADIENT_TESTS_DTYPES: - indices = constant_op.constant([[[0, 1], [1, 0]], [[0, 0], [1, 1]]], - dtype=dtypes.int32) - updates = constant_op.constant([[[5, 7], [2, 4]], [[1, 3], [6, 8]]], - dtype=dtype) - shape = constant_op.constant([2, 2, 2], dtype=dtypes.int32) - input_ = array_ops.zeros(shape, dtype=dtype) - outputs = self.scatter_nd(indices, updates, shape, input_) - - grad_vals = constant_op.constant([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], + with test_util.AbstractGradientTape(use_tape=use_tape) as tape: + indices = constant_op.constant([[[0, 1], [1, 0]], [[0, 0], [1, 1]]], + dtype=dtypes.int32) + updates = constant_op.constant([[[5, 7], [2, 4]], [[1, 3], [6, 8]]], dtype=dtype) - updates_grad, input_grad = gradients_impl.gradients( - [outputs], [updates, input_], [grad_vals]) + tape.watch(updates) + shape = constant_op.constant([2, 2, 2], dtype=dtypes.int32) + input_ = array_ops.zeros(shape, dtype=dtype) + tape.watch(input_) + outputs = self.scatter_nd(indices, updates, shape, input_) + + grad_vals = constant_op.constant([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], + dtype=dtype) + updates_grad, input_grad = tape.gradient([outputs], [updates, input_], + [grad_vals]) expected_updates_grad = np.array([[[3, 4], [5, 6]], [[1, 2], [7, 8]]], dtype=dtype.as_numpy_dtype()) expected_input_grad = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], dtype=dtype.as_numpy_dtype()) - with self.cached_session(): - self.assertAllEqual(expected_updates_grad, self.evaluate(updates_grad)) - if self.non_aliasing_add_test: - self.assertAllEqual(expected_input_grad, self.evaluate(input_grad)) + self.assertAllEqual(expected_updates_grad, self.evaluate(updates_grad)) + if self.non_aliasing_add_test: + self.assertAllEqual(expected_input_grad, self.evaluate(input_grad)) - @test_util.run_deprecated_v1 - def testGradientsRank7SliceUpdate(self): + @parameterized.parameters(set((True, context.executing_eagerly()))) + def testGradientsRank7SliceUpdate(self, use_tape): for dtype in GRADIENT_TESTS_DTYPES: - indices = constant_op.constant( - [[[[[[[0, 0, 0, 0, 0, 1], [0, 0, 1, 0, 0, 0]]]], - [[[[0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 1]]]]]]], - dtype=dtypes.int32) - updates = constant_op.constant( - [[[[[[[5, 6], [2, 4]]]], [[[[1, 3], [6, 8]]]]]]], dtype=dtype) - shape = constant_op.constant([1, 1, 2, 1, 1, 2, 2], dtype=dtypes.int32) - input_ = array_ops.zeros(shape, dtype=dtype) - outputs = self.scatter_nd(indices, updates, shape, input_) + with test_util.AbstractGradientTape(use_tape=use_tape) as tape: + indices = constant_op.constant( + [[[[[[[0, 0, 0, 0, 0, 1], [0, 0, 1, 0, 0, 0]]]], + [[[[0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 1]]]]]]], + dtype=dtypes.int32) + updates = constant_op.constant( + [[[[[[[5, 6], [2, 4]]]], [[[[1, 3], [6, 8]]]]]]], dtype=dtype) + tape.watch(updates) + shape = constant_op.constant([1, 1, 2, 1, 1, 2, 2], dtype=dtypes.int32) + input_ = array_ops.zeros(shape, dtype=dtype) + tape.watch(input_) + outputs = self.scatter_nd(indices, updates, shape, input_) - grad_vals = constant_op.constant( - [[[[[[[1, 2], [3, 4]]]], [[[[5, 6], [7, 8]]]]]]], dtype=dtype) - updates_grad, input_grad = gradients_impl.gradients( - [outputs], [updates, input_], [grad_vals]) + grad_vals = constant_op.constant( + [[[[[[[1, 2], [3, 4]]]], [[[[5, 6], [7, 8]]]]]]], dtype=dtype) + updates_grad, input_grad = tape.gradient([outputs], [updates, input_], + [grad_vals]) expected_updates_grad = np.array( [[[[[[[3, 4], [5, 6]]]], [[[[1, 2], [7, 8]]]]]]], dtype=dtype.as_numpy_dtype()) expected_input_grad = np.array( [[[[[[[1, 2], [3, 4]]]], [[[[5, 6], [7, 8]]]]]]], dtype=dtype.as_numpy_dtype()) - with self.cached_session(): - self.assertAllEqual(expected_updates_grad, self.evaluate(updates_grad)) - if self.non_aliasing_add_test: - self.assertAllEqual(expected_input_grad, self.evaluate(input_grad)) + self.assertAllEqual(expected_updates_grad, self.evaluate(updates_grad)) + if self.non_aliasing_add_test: + self.assertAllEqual(expected_input_grad, self.evaluate(input_grad)) - @test_util.run_deprecated_v1 def testScatterNdRepeatedIndicesAdd(self): indices = array_ops.zeros([100000, 1], dtypes.int32) values = np.random.randn(100000) shape = [1] - with self.cached_session(): - val = self.scatter_nd(indices, values, shape).eval() + val = self.evaluate(self.scatter_nd(indices, values, shape)) self.assertAllClose([np.sum(values)], val) - @test_util.run_deprecated_v1 def testSmokeScatterNdBatch2DSliceDim2(self): - with self.cached_session(): - indices = array_ops.zeros([3, 5, 2], dtype=dtypes.int32) - values = array_ops.zeros([3, 5, 7]) - shape = [4, 6, 7] - self.scatter_nd(indices, values, shape).eval() + indices = array_ops.zeros([3, 5, 2], dtype=dtypes.int32) + values = array_ops.zeros([3, 5, 7]) + shape = [4, 6, 7] + self.evaluate(self.scatter_nd(indices, values, shape)) - @test_util.run_deprecated_v1 def testSmokeScatterNdBatch1DSliceDim2(self): - with self.cached_session(): - indices = array_ops.zeros([0, 2], dtype=dtypes.int32) - values = array_ops.zeros([0, 7]) - shape = [4, 6, 7] - self.scatter_nd(indices, values, shape).eval() + indices = array_ops.zeros([0, 2], dtype=dtypes.int32) + values = array_ops.zeros([0, 7]) + shape = [4, 6, 7] + self.evaluate(self.scatter_nd(indices, values, shape)) - @test_util.run_deprecated_v1 def testSmokeScatterNdBatch1DSliceDim3ShapeRank7(self): - with self.cached_session(): - indices = array_ops.zeros([1, 3], dtype=dtypes.int32) - values = array_ops.zeros([1, 6, 7, 8, 9]) - shape = [3, 4, 5, 6, 7, 8, 9] - self.scatter_nd(indices, values, shape).eval() + indices = array_ops.zeros([1, 3], dtype=dtypes.int32) + values = array_ops.zeros([1, 6, 7, 8, 9]) + shape = [3, 4, 5, 6, 7, 8, 9] + self.evaluate(self.scatter_nd(indices, values, shape)) - @test_util.run_deprecated_v1 def testSmokeScatterNdBatch2DSliceDim3ShapeRank7(self): - with self.cached_session(): - indices = array_ops.zeros([1, 2, 3], dtype=dtypes.int32) - values = array_ops.zeros([1, 2, 6, 7, 8, 9]) - shape = [3, 4, 5, 6, 7, 8, 9] - self.scatter_nd(indices, values, shape).eval() + indices = array_ops.zeros([1, 2, 3], dtype=dtypes.int32) + values = array_ops.zeros([1, 2, 6, 7, 8, 9]) + shape = [3, 4, 5, 6, 7, 8, 9] + self.evaluate(self.scatter_nd(indices, values, shape)) class ScatterNdNonAliasingAddTest(ScatterNdTest): @@ -742,37 +728,34 @@ class ScatterNdTensorTest(test.TestCase): self.assertAllEqual(subbed, constant_op.constant([1, -10, 1, -9, -8, 1, 1, -11])) - @test_util.run_v1_only("b/120545219") def testUpdateAddSubGradients(self): - with self.cached_session(): indices = constant_op.constant([[3], [1]]) updates = constant_op.constant([9, 10], dtype=dtypes.float32) x = array_ops.ones([4], dtype=dtypes.float32) - assigned = array_ops.tensor_scatter_update(x, indices, updates) - added = array_ops.tensor_scatter_add(x, indices, updates) - subbed = array_ops.tensor_scatter_sub(x, indices, updates) + theoretical, numerical = gradient_checker_v2.compute_gradient( + lambda x: array_ops.tensor_scatter_update(x, indices, updates), [x]) + self.assertAllClose(theoretical, numerical, 5e-4, 5e-4) + theoretical, numerical = gradient_checker_v2.compute_gradient( + lambda x: array_ops.tensor_scatter_add(x, indices, updates), [x]) + self.assertAllClose(theoretical, numerical, 5e-4, 5e-4) + theoretical, numerical = gradient_checker_v2.compute_gradient( + lambda x: array_ops.tensor_scatter_sub(x, indices, updates), [x]) + self.assertAllClose(theoretical, numerical, 5e-4, 5e-4) - err_assigned = gradient_checker.compute_gradient_error( - x, [4], assigned, [4]) - err_added = gradient_checker.compute_gradient_error(x, [4], added, [4]) - err_subbed = gradient_checker.compute_gradient_error(x, [4], subbed, [4]) - - self.assertLess(err_assigned, 2e-4) - self.assertLess(err_added, 2e-4) - self.assertLess(err_subbed, 2e-4) - - err_assigned_wrt_updates = gradient_checker.compute_gradient_error( - updates, [2], assigned, [4]) - err_added_wrt_updates = gradient_checker.compute_gradient_error( - updates, [2], added, [4]) - err_subbed_wrt_updates = gradient_checker.compute_gradient_error( - updates, [2], subbed, [4]) - - self.assertLess(err_assigned_wrt_updates, 2e-4) - self.assertLess(err_added_wrt_updates, 2e-4) - self.assertLess(err_subbed_wrt_updates, 2e-4) + theoretical, numerical = gradient_checker_v2.compute_gradient( + lambda updates: array_ops.tensor_scatter_update(x, indices, updates), + [updates]) + self.assertAllClose(theoretical, numerical, 5e-4, 5e-4) + theoretical, numerical = gradient_checker_v2.compute_gradient( + lambda updates: array_ops.tensor_scatter_add(x, indices, updates), + [updates]) + self.assertAllClose(theoretical, numerical, 5e-4, 5e-4) + theoretical, numerical = gradient_checker_v2.compute_gradient( + lambda updates: array_ops.tensor_scatter_sub(x, indices, updates), + [updates]) + self.assertAllClose(theoretical, numerical, 5e-4, 5e-4) @test_util.run_in_graph_and_eager_modes def testUpdateMinMax(self): diff --git a/tensorflow/python/kernel_tests/v1_compat_tests/BUILD b/tensorflow/python/kernel_tests/v1_compat_tests/BUILD index 9cd0f4df101..bd9c02d8101 100644 --- a/tensorflow/python/kernel_tests/v1_compat_tests/BUILD +++ b/tensorflow/python/kernel_tests/v1_compat_tests/BUILD @@ -19,6 +19,18 @@ tf_py_test( ], ) +cuda_py_test( + name = "scatter_nd_ops_test", + size = "small", + srcs = ["scatter_nd_ops_test.py"], + deps = [ + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:state_ops", + "//tensorflow/python:variables", + "//third_party/py/numpy", + ], +) + cuda_py_test( name = "session_ops_test", size = "small", diff --git a/tensorflow/python/kernel_tests/v1_compat_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/v1_compat_tests/scatter_nd_ops_test.py new file mode 100644 index 00000000000..6ee75649867 --- /dev/null +++ b/tensorflow/python/kernel_tests/v1_compat_tests/scatter_nd_ops_test.py @@ -0,0 +1,159 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for scatter_nd_ops that only work in V1.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import functools + +import numpy as np + +from tensorflow.python.framework import test_util +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + + +def _AsType(v, vtype): + return v.astype(vtype) if isinstance(v, np.ndarray) else vtype(v) + + +def _FlatInnerDims(tensor, ndims=2): + shape = list(tensor.shape) + return tensor.reshape( + [functools.reduce(lambda x, y: x * y, shape[:-ndims + 1], 1)] + + shape[-ndims + 1:]) + + +def _FlatOuterDims(tensor, ndims=2): + shape = list(tensor.shape) + return tensor.reshape( + shape[:ndims - 1] + + [functools.reduce(lambda x, y: x * y, shape[ndims - 1:], 1)]) + + +def _NumpyScatterNd(ref, indices, updates, op): + ixdim = indices.shape[-1] + num_updates = indices.size // ixdim + total_nd = len(ref.shape) + slice_size = 1 + for i in range(ixdim, total_nd): + slice_size *= ref.shape[i] + flat_indices = _FlatInnerDims(indices) + flat_updates = updates.reshape((num_updates, slice_size)) + output_flat = _FlatOuterDims(ref, ixdim + 1) + for ix_updates, ix_output in enumerate(flat_indices): + ix_output = tuple(ix_output) + output_flat[ix_output] = op(output_flat[ix_output], + flat_updates[ix_updates]) + return output_flat.reshape(ref.shape) + + +def _NumpyMin(ref, indices, updates): + return _NumpyScatterNd(ref, indices, updates, np.minimum) + + +def _NumpyMax(ref, indices, updates): + return _NumpyScatterNd(ref, indices, updates, np.maximum) + + +class StatefulScatterNdTest(test.TestCase): + + def _VariableRankTest(self, + np_scatter, + tf_scatter, + vtype, + itype, + repeat_indices=False): + np.random.seed(8) + ref_shapes = [(3, 6), (3, 6), (3, 6, 9), (3, 6, 9), (3, 6, 9), (3, 6, 9)] + indices_shapes = [(2,), (2, 2), (2,), (2, 2), (2, 3), (2, 3, 3)] + with test_util.device(use_gpu=True): + for ref_shape, indices_shape in zip(ref_shapes, indices_shapes): + num_updates = indices_shape[0] + ixdim = indices_shape[-1] + + indexable_area_shape = () + for i in range(ixdim): + indexable_area_shape += (ref_shape[i],) + all_indices = [ + list(coord) for coord, _ in np.ndenumerate( + np.empty(indexable_area_shape, vtype)) + ] + np.random.shuffle(all_indices) + indices = np.array(all_indices[:num_updates]) + + if num_updates > 1 and repeat_indices: + indices = indices[:num_updates // 2] + for _ in range(num_updates - num_updates // 2): + indices = np.append( + indices, [indices[np.random.randint(num_updates // 2)]], axis=0) + np.random.shuffle(indices) + indices = _AsType(indices[:num_updates], itype) + + updates_shape = (num_updates,) + for i in range(ixdim, len(ref_shape)): + updates_shape += (ref_shape[i],) + updates = _AsType(np.random.randn(*(updates_shape)), vtype) + ref = _AsType(np.random.randn(*(ref_shape)), vtype) + + # Scatter via numpy + new = ref.copy() + np_scatter(new, indices, updates) + # Scatter via tensorflow + ref_var = variables.VariableV1(ref) + self.evaluate(ref_var.initializer) + self.evaluate(tf_scatter(ref_var, indices, updates)) + + # Compare + self.assertAllClose(new, self.evaluate(ref_var)) + + def _ScatterRepeatIndicesTest(self, np_scatter, tf_scatter): + for vtype in (np.int32, np.float16, np.float32, np.float64): + for itype in (np.int32, np.int64): + self._VariableRankTest( + np_scatter, tf_scatter, vtype, itype, repeat_indices=True) + + @test_util.run_v1_only("Don't need to test VariableV1 in TF2") + def testScatterRepeatIndicesMinMax(self): + """This tests scatter_add using indices that repeat.""" + self._ScatterRepeatIndicesTest(_NumpyMin, state_ops.scatter_nd_min) + self._ScatterRepeatIndicesTest(_NumpyMax, state_ops.scatter_nd_max) + + @test_util.run_v1_only("Don't need to test VariableV1 in TF2") + def testScatterOutOfRangeCpu(self): + for op in (state_ops.scatter_nd_min, state_ops.scatter_nd_max): + params = np.array([1, 2, 3, 4, 5, 6]).astype(np.float32) + updates = np.array([-3, -4, -5]).astype(np.float32) + with self.cached_session(use_gpu=False): + ref = variables.VariableV1(params) + self.evaluate(ref.initializer) + + # Indices all in range, no problem. + indices = np.array([[2], [0], [5]]) + self.evaluate(op(ref, indices, updates)) + + # Test some out of range errors. + indices = np.array([[-1], [0], [5]]) + with self.assertRaisesOpError( + r"indices\[0\] = \[-1\] does not index into shape \[6\]"): + op(ref, indices, updates).eval() + + indices = np.array([[2], [0], [6]]) + with self.assertRaisesOpError( + r"indices\[2\] = \[6\] does not index into shape \[6\]"): + op(ref, indices, updates).eval() From 2f93ec916b844309b3544e278995fd57a1e049be Mon Sep 17 00:00:00 2001 From: Jose Baiocchi Date: Sat, 8 Aug 2020 08:17:04 -0700 Subject: [PATCH 2401/2522] Mark TraceMe(string&&) as deleted PiperOrigin-RevId: 325600691 Change-Id: Icde5cd4515e4feb31ab90465c8da15a370b6ad28 --- tensorflow/core/profiler/lib/traceme.h | 27 +++++++++----------------- 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/tensorflow/core/profiler/lib/traceme.h b/tensorflow/core/profiler/lib/traceme.h index 64103d95215..526f6d5104d 100644 --- a/tensorflow/core/profiler/lib/traceme.h +++ b/tensorflow/core/profiler/lib/traceme.h @@ -97,30 +97,21 @@ class TraceMe { #endif } - // string&& constructor to prevent an unnecessary string copy, e.g. when a - // TraceMe is constructed based on the result of a StrCat operation. - // Note: We can't take the string by value because a) it would make the - // overloads ambiguous, and b) we want lvalue strings to use the string_view - // constructor so we avoid copying them when tracing is disabled. - explicit TraceMe(std::string&& name, int level = 1) { - DCHECK_GE(level, 1); -#if !defined(IS_MOBILE_PLATFORM) - if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) { - new (&no_init_.name) std::string(std::move(name)); - start_time_ = EnvTime::NowNanos(); - } -#endif - } + // Do not allow passing a temporary string as the overhead of generating that + // string should only be incurred when tracing is enabled. Wrap the temporary + // string generation (e.g., StrCat) in a lambda and use the name_generator + // template instead. + explicit TraceMe(std::string&& name, int level = 1) = delete; // Do not allow passing strings by reference or value since the caller // may unintentionally maintain ownership of the name. - // Explicitly std::move the name or wrap it in a string_view if - // you really wish to maintain ownership. + // Explicitly wrap the name in a string_view if you really wish to maintain + // ownership of a string already generated for other purposes. For temporary + // strings (e.g., result of StrCat) use the name_generator template. explicit TraceMe(const std::string& name, int level = 1) = delete; // This overload is necessary to make TraceMe's with string literals work. - // Otherwise, the string&& and the string_view constructor would be equally - // good overload candidates. + // Otherwise, the name_generator template would be used. explicit TraceMe(const char* raw, int level = 1) : TraceMe(absl::string_view(raw), level) {} From 79b77e8bfd95d614f5fabea72ad06d18afda284c Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Sat, 8 Aug 2020 13:07:42 -0700 Subject: [PATCH 2402/2522] Fix C++14 build. PiperOrigin-RevId: 325618095 Change-Id: Ib9d47e20a8ab0e432948cf275211725936cec0ce --- tensorflow/compiler/xla/array.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/array.h b/tensorflow/compiler/xla/array.h index 0f31d4c27f5..a85d551769c 100644 --- a/tensorflow/compiler/xla/array.h +++ b/tensorflow/compiler/xla/array.h @@ -297,8 +297,8 @@ class Array { std::mt19937 g(seed); std::normal_distribution distribution(mean, stddev); for (int64 i = 0; i < num_elements(); ++i) { - if constexpr (std::is_same()) { - values_[i] = distribution(g) > 0.0; + if (std::is_same()) { + values_[i] = static_cast(distribution(g) > 0.0); } else { values_[i] = static_cast(distribution(g)); } From 450ce2e30577daec1789d6aee944c8c7799a86ac Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Sat, 8 Aug 2020 16:51:30 -0400 Subject: [PATCH 2403/2522] Update the GPU description in setup part --- .../python/keras/benchmarks/keras_examples_benchmarks/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/README.md b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/README.md index 3c34dbc68ab..b7a16c516c0 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/README.md +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/README.md @@ -49,7 +49,7 @@ These examples are implemented by Functional API and Sequential API. The listed benchmark results are obtained by running on Google Cloud Platform (GCP) with the following setup:
    -- GPU: 2 x Tesla V100 (only for GPU test)
    +- GPU: 2 x Tesla V100
    - OS: Ubuntu 18.04
    - CPU: 8 x vCPUs, 30 GB memory
    - CUDA: 10.1
    From ddba76d1f27f383bfc05c5ecea03a8ebea2f87d8 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Sat, 8 Aug 2020 18:02:41 -0700 Subject: [PATCH 2404/2522] C++17 build without linking libc++ Based on #23561 and #41710, trying to see if this would enable building on C++17 without also linking in `libc++` (which is a Clang lib, does not come from a default GCC install) --- .bazelrc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.bazelrc b/.bazelrc index ddeb2515d70..c15ce04fdfb 100644 --- a/.bazelrc +++ b/.bazelrc @@ -278,6 +278,8 @@ build:dynamic_kernels --copt=-DAUTOLOAD_DYNAMIC_KERNELS build:c++17 --cxxopt=-std=c++1z build:c++17 --cxxopt=-stdlib=libc++ build:c++1z --config=c++17 +build:c++17_gcc --cxxopt=-std=c++1z +build:c++1z_gcc --config=c++17_gcc # Enable using platform specific build settings, except when cross-compiling for # mobile platforms. From 9f62efeba92a72d6696fba46b1a55aa983f8338b Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Sat, 8 Aug 2020 18:06:25 -0700 Subject: [PATCH 2405/2522] Add documentation of the new options --- .bazelrc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.bazelrc b/.bazelrc index c15ce04fdfb..1b9f5e87c6b 100644 --- a/.bazelrc +++ b/.bazelrc @@ -18,8 +18,10 @@ # # Compiler options: # cuda_clang: Use clang when building CUDA code. -# c++17: Build with C++17 options -# c++1z: Build with C++17 options +# c++17: Build with C++17 options (links with libc++) +# c++1z: Build with C++17 options (links with libc++) +# c++17_gcc: Build with C++17 options (links with stdlibc++) +# c++1z_gcc: Build with C++17 options (links with stdlibc++) # avx_linux: Build with avx instruction set on linux. # avx2_linux: Build with avx2 instruction set on linux. # native_arch_linux: Build with instruction sets available to the host machine on linux From cbe30de1d255ad46932aca35f51af30dfb3dd7ee Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Sat, 8 Aug 2020 18:34:51 -0700 Subject: [PATCH 2406/2522] Enable more tfrt tests(311) that are passing. PiperOrigin-RevId: 325637415 Change-Id: Ied4cb5e4149f6ad60b5dd0eecdb4c34f87e70ca0 --- tensorflow/python/BUILD | 128 ++++++++++++++++++++ tensorflow/python/eager/BUILD | 8 ++ tensorflow/python/kernel_tests/BUILD | 175 +++++++++++++++++++++++++++ 3 files changed, 311 insertions(+) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 039fc945eca..4efe769c59d 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -347,6 +347,7 @@ tf_py_test( "no_pip", "no_windows", ], + tfrt_enabled = True, deps = [ ":platform", ":platform_test", @@ -365,6 +366,7 @@ tf_py_test( "no_pip", "no_windows", ], + tfrt_enabled = True, deps = [ ":platform", ":platform_test", @@ -376,6 +378,7 @@ tf_py_test( size = "small", srcs = ["platform/flags_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":platform", @@ -391,6 +394,7 @@ tf_py_test( "no_windows", "nomac", ], + tfrt_enabled = True, deps = [ ":client_testlib", ":platform", @@ -1148,6 +1152,7 @@ tf_py_test( name = "decorator_utils_test", srcs = ["util/decorator_utils_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":platform", @@ -1159,6 +1164,7 @@ tf_py_test( name = "deprecation_test", srcs = ["util/deprecation_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":platform", @@ -1170,6 +1176,7 @@ tf_py_test( name = "dispatch_test", srcs = ["util/dispatch_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":platform", @@ -1181,6 +1188,7 @@ tf_py_test( name = "keyword_args_test", srcs = ["util/keyword_args_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":util", @@ -1518,6 +1526,7 @@ tf_py_test( srcs = ["framework/function_def_to_graph_test.py"], python_version = "PY3", tags = ["no_pip"], + tfrt_enabled = True, deps = [ ":array_ops", ":client_testlib", @@ -1663,6 +1672,7 @@ tf_py_test( srcs = ["framework/op_def_util_test.py"], python_version = "PY3", tags = ["no_pip"], + tfrt_enabled = True, ) py_library( @@ -1875,6 +1885,7 @@ tf_py_test( size = "small", srcs = ["framework/smart_cond_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":constant_op", @@ -1947,6 +1958,7 @@ tf_py_test( srcs = ["framework/composite_tensor_utils_test.py"], main = "framework/composite_tensor_utils_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":composite_tensor", @@ -2204,6 +2216,7 @@ tf_py_test( srcs = ["framework/registry_test.py"], main = "framework/registry_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":framework_for_generated_wrappers", @@ -2217,6 +2230,7 @@ tf_py_test( srcs = ["framework/errors_test.py"], main = "framework/errors_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":errors", @@ -2230,6 +2244,7 @@ tf_py_test( srcs = ["framework/error_interpolation_test.py"], main = "framework/error_interpolation_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":constant_op", @@ -2244,6 +2259,7 @@ tf_py_test( srcs = ["framework/subscribe_test.py"], main = "framework/subscribe_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":framework", ":framework_for_generated_wrappers", @@ -2286,6 +2302,7 @@ tf_py_test( tags = [ "no_pip", ], + tfrt_enabled = True, deps = [ ":client_testlib", ":platform", @@ -2298,6 +2315,7 @@ tf_py_test( srcs = ["framework/proto_test.py"], main = "framework/proto_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":framework_for_generated_wrappers", @@ -2386,6 +2404,7 @@ tf_py_test( srcs = ["framework/versions_test.py"], main = "framework/versions_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":framework_for_generated_wrappers", @@ -2398,6 +2417,7 @@ tf_py_test( srcs = ["framework/importer_test.py"], main = "framework/importer_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":client_testlib", @@ -2434,6 +2454,7 @@ tf_py_test( "no_pip", "no_windows", ], + tfrt_enabled = True, deps = [ ":array_ops", ":client_testlib", @@ -2457,6 +2478,7 @@ tf_py_test( srcs = ["framework/traceable_stack_test.py"], main = "framework/traceable_stack_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":framework_test_lib", ":platform_test", @@ -2511,6 +2533,7 @@ tf_py_test( srcs = ["framework/common_shapes_test.py"], main = "framework/common_shapes_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":framework", ":framework_for_generated_wrappers", @@ -2557,6 +2580,7 @@ tf_py_test( srcs = ["framework/ops_enable_eager_test.py"], main = "framework/ops_enable_eager_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":framework", ":platform_test", @@ -2570,6 +2594,7 @@ tf_py_test( srcs = ["framework/tensor_shape_test.py"], main = "framework/tensor_shape_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":framework_for_generated_wrappers", ":framework_test_lib", @@ -2585,6 +2610,7 @@ tf_py_test( srcs = ["framework/type_spec_test.py"], main = "framework/type_spec_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":framework_for_generated_wrappers", ":framework_test_lib", @@ -2600,6 +2626,7 @@ tf_py_test( srcs = ["framework/tensor_spec_test.py"], main = "framework/tensor_spec_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":framework_for_generated_wrappers", ":framework_test_lib", @@ -2632,6 +2659,7 @@ tf_py_test( srcs = ["framework/device_spec_test.py"], main = "framework/device_spec_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":framework_for_generated_wrappers", ":framework_test_lib", @@ -2646,6 +2674,7 @@ tf_py_test( srcs = ["framework/device_test.py"], main = "framework/device_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":framework_for_generated_wrappers", ":framework_test_lib", @@ -2660,6 +2689,7 @@ tf_py_test( srcs = ["framework/random_seed_test.py"], main = "framework/random_seed_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":framework", @@ -2672,6 +2702,7 @@ tf_py_test( srcs = ["framework/tensor_shape_div_test.py"], main = "framework/tensor_shape_div_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":framework_for_generated_wrappers", ":framework_test_lib", @@ -2688,6 +2719,7 @@ tf_py_test( main = "framework/tensor_util_test.py", python_version = "PY3", tags = ["no_windows"], + tfrt_enabled = True, deps = [ ":array_ops", ":client_testlib", @@ -2707,6 +2739,7 @@ tf_py_test( main = "framework/test_util_test.py", python_version = "PY3", tags = ["no_windows"], + tfrt_enabled = True, deps = [ ":control_flow_ops", ":errors", @@ -2741,6 +2774,7 @@ tf_py_test( "nomsan", # TODO(b/149948895): Re-enable. "notsan", # TODO(b/149948895): Re-enable. ], + tfrt_enabled = True, deps = [ ":framework_test_lib", # TODO(kkb): Find more appropriate place to add `memory_checker` as deps @@ -2766,6 +2800,7 @@ tf_py_test( srcs = ["framework/dtypes_test.py"], main = "framework/dtypes_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":framework_for_generated_wrappers", ":framework_test_lib", @@ -2781,6 +2816,7 @@ tf_py_test( size = "small", srcs = ["framework/op_def_library_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":framework_for_generated_wrappers", ":framework_test_lib", @@ -2794,6 +2830,7 @@ tf_py_test( srcs = ["framework/kernels_test.py"], main = "framework/kernels_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":framework_test_lib", ":kernels", @@ -3319,6 +3356,7 @@ tf_py_test( size = "small", srcs = ["ops/clip_ops_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":clip_ops", @@ -3344,6 +3382,7 @@ tf_py_test( size = "medium", srcs = ["ops/clustering_ops_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":clustering_ops", @@ -3368,6 +3407,7 @@ tf_py_test( srcs = ["ops/collective_ops_test.py"], python_version = "PY3", tags = ["no_rocm"], + tfrt_enabled = True, deps = [ ":client_testlib", ":collective_ops", @@ -3388,6 +3428,7 @@ tf_py_test( "no_windows", "nomac", ], + tfrt_enabled = True, xla_enable_strict_auto_jit = True, deps = [ ":client_testlib", @@ -3409,6 +3450,7 @@ cuda_py_test( "no_rocm", "no_windows", ], + tfrt_enabled = True, deps = [ ":client_testlib", ":collective_ops", @@ -3512,6 +3554,7 @@ tf_py_test( size = "small", srcs = ["ops/control_flow_v2_toggles_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":control_flow_util_v2", @@ -3525,6 +3568,7 @@ tf_py_test( size = "small", srcs = ["ops/control_flow_v2_enable_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":control_flow_util", @@ -3546,6 +3590,7 @@ tf_py_test( "no_oss", "no_pip", ], + tfrt_enabled = True, deps = [ ":client_testlib", ":control_flow_util", @@ -3627,6 +3672,7 @@ tf_py_test( size = "small", srcs = ["ops/bincount_ops_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":bincount_ops", ":platform_test", @@ -4108,6 +4154,7 @@ cuda_py_test( size = "small", srcs = ["training/experimental/mixed_precision_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":mixed_precision", @@ -4426,6 +4473,7 @@ cuda_py_test( size = "medium", srcs = ["ops/stateful_random_ops_test.py"], python_version = "PY3", + tfrt_enabled = True, xla_enable_strict_auto_jit = False, xla_enabled = True, deps = [ @@ -4597,6 +4645,7 @@ tf_py_test( name = "sort_ops_test", srcs = ["ops/sort_ops_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":client_testlib", @@ -4696,6 +4745,7 @@ cuda_py_test( name = "rnn_grad_test", srcs = ["ops/rnn_grad_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":client_testlib", @@ -4978,6 +5028,7 @@ cuda_py_test( srcs = ["ops/bitwise_ops_test.py"], python_version = "PY3", tags = ["no_windows"], + tfrt_enabled = True, deps = [ ":bitwise_ops", ":constant_op", @@ -5089,6 +5140,7 @@ cuda_py_test( size = "small", srcs = ["ops/histogram_ops_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":client_testlib", @@ -5105,6 +5157,7 @@ cuda_py_test( size = "medium", srcs = ["ops/image_grad_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":framework_for_generated_wrappers", @@ -5146,6 +5199,7 @@ cuda_py_test( size = "small", srcs = ["ops/init_ops_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":framework_ops", @@ -5161,6 +5215,7 @@ cuda_py_test( size = "medium", srcs = ["ops/init_ops_v2_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":client_testlib", @@ -5231,6 +5286,7 @@ cuda_py_test( python_version = "PY3", shard_count = 4, tags = ["no_windows"], + tfrt_enabled = True, deps = [ ":array_ops", ":client_testlib", @@ -5250,6 +5306,7 @@ cuda_py_test( srcs = ["ops/nn_fused_batchnorm_test.py"], python_version = "PY3", shard_count = 24, + tfrt_enabled = True, deps = [ ":array_ops", ":client_testlib", @@ -5300,6 +5357,7 @@ cuda_py_test( size = "medium", srcs = ["ops/nn_xent_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":framework_for_generated_wrappers", @@ -5333,6 +5391,7 @@ cuda_py_test( "no_oss", # TODO(b/149565560) "no_windows_gpu", ], + tfrt_enabled = True, deps = [ ":framework_for_generated_wrappers", ":framework_test_lib", @@ -5369,6 +5428,7 @@ tf_py_test( size = "small", srcs = ["ops/variable_spec_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":framework_for_generated_wrappers", ":framework_test_lib", @@ -5639,6 +5699,7 @@ tf_py_test( name = "tf_export_test", srcs = ["util/tf_export_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":platform", @@ -5696,6 +5757,7 @@ tf_py_test( name = "tf_stack_test", srcs = ["util/tf_stack_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":tf_export", @@ -5768,6 +5830,7 @@ tf_py_test( size = "small", srcs = ["util/object_identity_test.py"], python_version = "PY3", + tfrt_enabled = True, ) # Placeholder for intenal nest_test comments. @@ -5777,6 +5840,7 @@ tf_py_test( srcs = ["util/nest_test.py"], main = "util/nest_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [":util_nest_test_main_lib"], ) @@ -5802,6 +5866,7 @@ tf_py_test( srcs = ["util/serialization_test.py"], main = "util/serialization_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":util", @@ -5812,6 +5877,7 @@ tf_py_test( name = "function_utils_test", srcs = ["util/function_utils_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":util", @@ -5823,6 +5889,7 @@ tf_py_test( size = "small", srcs = ["util/tf_contextlib_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":util", @@ -5834,6 +5901,7 @@ tf_py_test( size = "small", srcs = ["util/tf_decorator_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":util", @@ -5857,6 +5925,7 @@ tf_py_test( size = "small", srcs = ["util/tf_should_use_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":tf_should_use", @@ -5868,6 +5937,7 @@ tf_py_test( size = "small", srcs = ["util/tf_inspect_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":util", @@ -5892,6 +5962,7 @@ tf_py_test( srcs = ["util/lock_util_test.py"], main = "util/lock_util_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":util", @@ -5904,6 +5975,7 @@ tf_py_test( size = "small", srcs = ["util/module_wrapper_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":util", @@ -5946,6 +6018,7 @@ tf_py_test( main = "util/protobuf/compare_test.py", python_version = "PY3", tags = ["no_pip"], # compare_test_pb2 proto is not available in pip. + tfrt_enabled = True, deps = [ ":compare_test_proto_py", ":platform_test", @@ -5960,6 +6033,7 @@ tf_py_test( srcs = ["util/example_parser_configuration_test.py"], main = "util/example_parser_configuration_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":client", @@ -5975,6 +6049,7 @@ tf_py_test( size = "small", srcs = ["client/events_writer_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":errors", ":framework_test_lib", @@ -6396,6 +6471,7 @@ tf_py_test( tags = [ "noasan", # TODO(b/161236904): flaky timeout in trying to start gRPC server ], + tfrt_enabled = True, deps = [ ":array_ops", ":client", @@ -6417,6 +6493,7 @@ tf_py_test( srcs = ["training/server_lib_multiple_containers_test.py"], grpc_enabled = True, python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":client", @@ -6438,6 +6515,7 @@ tf_py_test( srcs = ["training/server_lib_same_variables_clear_container_test.py"], grpc_enabled = True, python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":client", @@ -6459,6 +6537,7 @@ tf_py_test( srcs = ["training/server_lib_same_variables_clear_test.py"], grpc_enabled = True, python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":client", @@ -6480,6 +6559,7 @@ tf_py_test( srcs = ["training/server_lib_same_variables_no_clear_test.py"], grpc_enabled = True, python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":client", @@ -6501,6 +6581,7 @@ tf_py_test( srcs = ["training/server_lib_sparse_job_test.py"], grpc_enabled = True, python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":client", @@ -6528,6 +6609,7 @@ cuda_py_test( "no_oss", # Test flaky due to port collisions. "oss_serial", ], + tfrt_enabled = True, deps = [ ":client", ":client_testlib", @@ -6554,6 +6636,7 @@ tf_py_test( "notsan", # data race due to b/62910646 "oss_serial", ], + tfrt_enabled = True, deps = [ ":client_testlib", ":framework_for_generated_wrappers", @@ -6596,6 +6679,7 @@ tf_py_test( "no_pip_gpu", # testInteractivePlacePrunedGraph fails on invalid assumption about GPU ops. "no_windows", ], + tfrt_enabled = True, deps = [ ":array_ops", ":client", @@ -6658,6 +6742,7 @@ tf_py_test( "no_pip_gpu", "notsan", # data race due to b/62910646 ], + tfrt_enabled = True, deps = [ ":client", ":framework", @@ -6677,6 +6762,7 @@ tf_py_test( "no_gpu", "no_windows", ], + tfrt_enabled = True, deps = [ ":array_ops", ":client", @@ -6701,6 +6787,7 @@ cuda_py_test( "gpu_cupti", "no_gpu", # b/154742661 ], + tfrt_enabled = True, xla_enable_strict_auto_jit = False, # Graph structure is different with autojit deps = [ ":client", @@ -6720,6 +6807,7 @@ cuda_py_test( "no_gpu", # b/127386241 "no_windows_gpu", ], + tfrt_enabled = True, deps = [ ":client", ":client_testlib", @@ -6734,6 +6822,7 @@ tf_py_test( size = "small", srcs = ["framework/c_api_util_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":c_api_util", ":framework_test_lib", @@ -6746,6 +6835,7 @@ tf_py_test( size = "small", srcs = ["framework/graph_util_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client", ":client_testlib", @@ -6766,6 +6856,7 @@ tf_py_test( srcs = ["framework/convert_to_constants_test.py"], python_version = "PY3", tags = ["no_rocm"], + tfrt_enabled = True, deps = [ ":client_testlib", ":control_flow_v2_toggles", @@ -6780,6 +6871,7 @@ tf_py_test( size = "small", srcs = ["lib/core/bfloat16_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":lib", @@ -6796,6 +6888,7 @@ tf_py_test( "no_rocm", "no_windows", ], + tfrt_enabled = True, deps = [ ":client_testlib", ":errors", @@ -6808,6 +6901,7 @@ tf_py_test( size = "small", srcs = ["lib/io/tf_record_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":errors", @@ -6844,6 +6938,7 @@ cuda_py_test( "no_windows", # b/139083295: bfloat16 tests fail on Windows "notsan", ], + tfrt_enabled = True, deps = [ ":array_ops", ":client_testlib", @@ -7014,6 +7109,7 @@ tf_py_test( "noasan", # http://b/30379628 "notsan", # http://b/30379628 ], + tfrt_enabled = True, deps = [ ":client", ":client_testlib", @@ -7034,6 +7130,7 @@ tf_py_test( "noasan", # http://b/30782289 "notsan", # http://b/30782289 ], + tfrt_enabled = True, deps = [ ":client", ":client_testlib", @@ -7051,6 +7148,7 @@ cuda_py_test( grpc_enabled = True, main = "training/session_manager_test.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":client", @@ -7071,6 +7169,7 @@ tf_py_test( grpc_enabled = True, python_version = "PY3", tags = ["no_windows"], + tfrt_enabled = True, deps = [ ":array_ops", ":checkpoint_management", @@ -7099,6 +7198,7 @@ tf_py_test( "no_windows", "notsan", # intermittent races on a few percent of runs ], + tfrt_enabled = True, deps = [ ":client", ":client_testlib", @@ -7149,6 +7249,7 @@ tf_py_test( size = "small", srcs = ["training/checkpoint_ops_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":checkpoint_ops_gen", ":client", @@ -7170,6 +7271,7 @@ tf_py_test( size = "medium", srcs = ["training/warm_starting_util_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":client_testlib", @@ -7191,6 +7293,7 @@ tf_py_test( "no_pip", "notsan", # b/67945581 ], + tfrt_enabled = True, deps = [ ":array_ops", ":checkpoint_management", @@ -7235,6 +7338,7 @@ tf_py_test( size = "small", srcs = ["training/training_util_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":framework", @@ -7250,6 +7354,7 @@ tf_py_test( size = "medium", srcs = ["training/input_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":client_testlib", @@ -7400,6 +7505,7 @@ tf_py_test( srcs = ["ops/dequantize_op_test.py"], python_version = "PY3", tags = ["no_windows"], + tfrt_enabled = True, deps = [ ":array_ops", ":client_testlib", @@ -7414,6 +7520,7 @@ tf_py_test( srcs = ["ops/quantized_ops_test.py"], python_version = "PY3", tags = ["no_windows"], + tfrt_enabled = True, deps = [ ":array_ops", ":client_testlib", @@ -7428,6 +7535,7 @@ tf_py_test( srcs = ["ops/quantized_conv_ops_test.py"], python_version = "PY3", tags = ["no_windows"], + tfrt_enabled = True, deps = [ ":client_testlib", ":framework_for_generated_wrappers", @@ -7461,6 +7569,7 @@ cuda_py_test( main = "ops/accumulate_n_benchmark.py", python_version = "PY3", shard_count = 6, + tfrt_enabled = True, deps = [ ":array_ops", ":client", @@ -7480,6 +7589,7 @@ cuda_py_test( srcs = ["ops/batch_norm_benchmark.py"], main = "ops/batch_norm_benchmark.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":client", @@ -7501,6 +7611,7 @@ cuda_py_test( srcs = ["ops/collective_ops_benchmark.py"], main = "ops/collective_ops_benchmark.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":client", @@ -7518,6 +7629,7 @@ cuda_py_test( srcs = ["ops/concat_benchmark.py"], main = "ops/concat_benchmark.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":client", @@ -7536,6 +7648,7 @@ cuda_py_test( srcs = ["ops/control_flow_ops_benchmark.py"], main = "ops/control_flow_ops_benchmark.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":constant_op", @@ -7551,6 +7664,7 @@ cuda_py_test( srcs = ["ops/conv2d_benchmark.py"], main = "ops/conv2d_benchmark.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":client", ":client_testlib", @@ -7571,6 +7685,7 @@ cuda_py_test( srcs = ["ops/split_benchmark.py"], main = "ops/split_benchmark.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":client", @@ -7591,6 +7706,7 @@ cuda_py_test( srcs = ["ops/transpose_benchmark.py"], main = "ops/transpose_benchmark.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":client", @@ -7611,6 +7727,7 @@ cuda_py_test( srcs = ["ops/matmul_benchmark.py"], main = "ops/matmul_benchmark.py", python_version = "PY3", + tfrt_enabled = True, deps = [":matmul_benchmark_main_lib"], ) @@ -7640,6 +7757,7 @@ cuda_py_test( grpc_enabled = True, main = "client/session_benchmark.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":client", @@ -7658,6 +7776,7 @@ cuda_py_test( srcs = ["framework/graph_building_benchmark.py"], main = "framework/graph_building_benchmark.py", python_version = "PY3", + tfrt_enabled = True, deps = [ ":array_ops", ":client_testlib", @@ -7673,6 +7792,7 @@ cuda_py_test( size = "medium", srcs = ["ops/nn_grad_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ":framework_for_generated_wrappers", @@ -7727,6 +7847,7 @@ tf_py_test( "grappler", "no_pip", # tf_optimizer is not available in pip. ], + tfrt_enabled = True, deps = [ ":client_testlib", ":framework_for_generated_wrappers", @@ -7747,6 +7868,7 @@ tf_py_test( "grappler", "no_pip", # tf_optimizer is not available in pip. ], + tfrt_enabled = True, deps = [ ":array_ops", ":client_testlib", @@ -7865,6 +7987,7 @@ tf_py_test( "grappler", "no_pip", # tf_optimizer is not available in pip. ], + tfrt_enabled = True, deps = [ ":client_testlib", ":framework_for_generated_wrappers", @@ -7886,6 +8009,7 @@ tf_py_test( tags = [ "grappler", ], + tfrt_enabled = True, deps = [ ":client_testlib", ":framework_for_generated_wrappers", @@ -8021,6 +8145,7 @@ tf_py_test( "no_pip", "no_windows", # TODO(b/151942037) ], + tfrt_enabled = True, deps = [ ":array_ops", ":client_testlib", @@ -8055,6 +8180,7 @@ tf_py_test( "grappler", "no_pip", ], + tfrt_enabled = True, deps = [ ":array_ops", ":client_testlib", @@ -8075,6 +8201,7 @@ cuda_py_test( ], python_version = "PY3", tags = ["grappler"], + tfrt_enabled = True, # This test analyzes the graph, but XLA changes the names of nodes. xla_enable_strict_auto_jit = False, deps = [ @@ -8330,6 +8457,7 @@ cuda_py_test( name = "raw_ops_test", srcs = ["ops/raw_ops_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":client_testlib", ], diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index 3c0c3894a64..358929dc870 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -144,6 +144,7 @@ cuda_py_test( size = "small", srcs = ["cancellation_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":cancellation", ":test", @@ -250,6 +251,7 @@ cuda_py_test( name = "monitoring_test", srcs = ["monitoring_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":monitoring", ":test", @@ -393,6 +395,7 @@ cuda_py_test( size = "medium", srcs = ["function_argument_naming_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":backprop", ":def_function", @@ -408,6 +411,7 @@ cuda_py_test( size = "medium", srcs = ["function_defun_collection_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":backprop", ":def_function", @@ -524,6 +528,7 @@ cuda_py_test( name = "graph_only_ops_test", srcs = ["graph_only_ops_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ "graph_only_ops", "//tensorflow/python:client_testlib", @@ -670,6 +675,7 @@ cuda_py_test( name = "remote_benchmarks_test", srcs = ["remote_benchmarks_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":backprop", ":benchmarks_test_base", @@ -695,6 +701,7 @@ tf_py_test( name = "tape_test", srcs = ["tape_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ ":backprop", ":context", @@ -803,6 +810,7 @@ tf_py_test( size = "medium", srcs = ["lift_to_graph_test.py"], python_version = "PY3", + tfrt_enabled = True, deps = [ "lift_to_graph", "//tensorflow/python:framework_ops", diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 91d1cd4c4c9..e73f2ea29fc 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -20,6 +20,7 @@ tf_py_test( size = "small", srcs = ["as_string_op_test.py"], tags = ["no_windows"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -33,6 +34,7 @@ tf_py_test( name = "attention_ops_test", size = "small", srcs = ["attention_ops_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -52,6 +54,7 @@ tf_py_test( "nomsan", # TODO(b/161902335): Re-enable. "notsan", # TODO(b/161829717): Re-enable. ], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:data_flow_ops", @@ -66,6 +69,7 @@ tf_py_test( size = "small", srcs = ["base64_ops_test.py"], tags = ["nomac"], # b/35468214 + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -80,6 +84,7 @@ tf_py_test( tf_py_test( name = "batch_scatter_ops_test", srcs = ["batch_scatter_ops_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -100,6 +105,7 @@ tf_py_test( name = "bcast_ops_test", size = "small", srcs = ["bcast_ops_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops_gen", "//tensorflow/python:client_testlib", @@ -157,6 +163,7 @@ cuda_py_test( size = "small", srcs = ["benchmark_test.py"], tags = ["no_windows"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client", "//tensorflow/python:client_testlib", @@ -169,6 +176,7 @@ cuda_py_test( cuda_py_test( name = "reduce_benchmark_test", srcs = ["reduce_benchmark_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -187,6 +195,7 @@ cuda_py_test( size = "small", srcs = ["bincount_op_test.py"], tags = ["no_windows_gpu"], + tfrt_enabled = True, deps = [ "//tensorflow/python:bincount_ops", "//tensorflow/python:client_testlib", @@ -198,6 +207,7 @@ tf_py_test( name = "candidate_sampler_ops_test", size = "small", srcs = ["candidate_sampler_ops_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:candidate_sampling_ops", @@ -212,6 +222,7 @@ tf_py_test( name = "checkpoint_ops_test", size = "medium", srcs = ["checkpoint_ops_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:checkpoint_ops_gen", @@ -259,6 +270,7 @@ tf_py_test( "no_gpu", # b/127001953 "no_windows", ], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:clip_ops", @@ -270,6 +282,7 @@ tf_py_test( name = "collective_ops_test", size = "small", srcs = ["collective_ops_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:collective_ops_gen", @@ -281,6 +294,7 @@ tf_py_test( name = "conditional_accumulator_test", size = "small", srcs = ["conditional_accumulator_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -298,6 +312,7 @@ tf_py_test( name = "ctc_decoder_ops_test", size = "small", srcs = ["ctc_decoder_ops_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -338,6 +353,7 @@ cuda_py_test( name = "cudnn_deterministic_ops_test", size = "small", srcs = ["cudnn_deterministic_ops_test.py"], + tfrt_enabled = True, xla_enable_strict_auto_jit = True, deps = [ ":cudnn_deterministic_base", @@ -348,6 +364,7 @@ cuda_py_test( name = "cudnn_deterministic_test", size = "small", srcs = ["cudnn_deterministic_test.py"], + tfrt_enabled = True, deps = [ ":cudnn_deterministic_base", ], @@ -357,6 +374,7 @@ cuda_py_test( name = "cumulative_logsumexp_test", size = "medium", srcs = ["cumulative_logsumexp_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -372,6 +390,7 @@ tf_py_test( name = "decode_csv_op_test", size = "small", srcs = ["decode_csv_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:errors", @@ -386,6 +405,7 @@ tf_py_test( name = "decode_png_op_test", size = "small", srcs = ["decode_png_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -399,6 +419,7 @@ tf_py_test( name = "decode_bmp_op_test", size = "small", srcs = ["decode_bmp_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -412,6 +433,7 @@ tf_py_test( name = "decode_jpeg_op_test", srcs = ["decode_jpeg_op_test.py"], data = ["//tensorflow/core:image_testdata"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -425,6 +447,7 @@ tf_py_test( size = "small", srcs = ["decode_image_op_test.py"], data = ["//tensorflow/core:image_testdata"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:errors", @@ -439,6 +462,7 @@ tf_py_test( name = "decode_raw_op_test", size = "small", srcs = ["decode_raw_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -452,6 +476,7 @@ tf_py_test( name = "decode_compressed_op_test", size = "small", srcs = ["decode_compressed_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -465,6 +490,7 @@ cuda_py_test( name = "determinant_op_test", size = "medium", srcs = ["determinant_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -477,6 +503,7 @@ tf_py_test( name = "draw_bounding_box_op_test", size = "small", srcs = ["draw_bounding_box_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -491,6 +518,7 @@ tf_py_test( name = "edit_distance_op_test", size = "small", srcs = ["edit_distance_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -522,6 +550,7 @@ tf_py_test( name = "fingerprint_op_test", size = "small", srcs = ["fingerprint_op_test.py"], + tfrt_enabled = True, deps = [ "//third_party/py/numpy", ], @@ -532,6 +561,7 @@ tf_py_test( size = "small", srcs = ["fractional_avg_pool_op_test.py"], shard_count = 5, + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -548,6 +578,7 @@ tf_py_test( size = "small", srcs = ["fractional_max_pool_op_test.py"], shard_count = 5, + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -563,6 +594,7 @@ tf_py_test( name = "identity_op_py_test", size = "small", srcs = ["identity_op_py_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:array_ops_gen", @@ -577,6 +609,7 @@ tf_py_test( name = "identity_n_op_py_test", size = "small", srcs = ["identity_n_op_py_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:array_ops_gen", @@ -591,6 +624,7 @@ cuda_py_test( name = "in_topk_op_test", size = "small", srcs = ["in_topk_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:errors", @@ -603,6 +637,7 @@ tf_py_test( name = "record_input_test", size = "medium", srcs = ["record_input_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:data_flow_ops", @@ -615,6 +650,7 @@ tf_py_test( name = "io_ops_test", size = "small", srcs = ["io_ops_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:io_ops", @@ -626,6 +662,7 @@ tf_py_test( name = "listdiff_op_test", size = "small", srcs = ["listdiff_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -642,6 +679,7 @@ tf_py_test( tags = [ "no_windows", ], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -688,6 +726,7 @@ tf_py_test( name = "losses_test", size = "medium", srcs = ["losses_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -710,6 +749,7 @@ tf_py_test( srcs = ["matrix_exponential_op_test.py"], shard_count = 16, tags = ["no_windows_gpu"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -787,6 +827,7 @@ cuda_py_test( name = "banded_triangular_solve_op_test", size = "small", srcs = ["banded_triangular_solve_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:linalg_ops", @@ -799,6 +840,7 @@ cuda_py_test( size = "medium", srcs = ["matrix_triangular_solve_op_test.py"], shard_count = 3, + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:linalg_ops", @@ -827,6 +869,7 @@ tf_py_test( name = "parse_single_example_op_test", size = "small", srcs = ["parse_single_example_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", @@ -844,6 +887,7 @@ tf_py_test( name = "partitioned_variables_test", size = "small", srcs = ["partitioned_variables_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -861,6 +905,7 @@ tf_py_test( name = "priority_queue_test", size = "medium", srcs = ["priority_queue_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -896,6 +941,7 @@ tf_py_test( name = "regex_replace_op_test", size = "small", srcs = ["regex_replace_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", @@ -909,6 +955,7 @@ tf_py_test( name = "regex_full_match_op_test", size = "small", srcs = ["regex_full_match_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", @@ -970,6 +1017,7 @@ tf_py_test( name = "sparse_add_op_test", size = "small", srcs = ["sparse_add_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client", "//tensorflow/python:client_testlib", @@ -986,6 +1034,7 @@ tf_py_test( name = "sparse_concat_op_test", size = "small", srcs = ["sparse_concat_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1000,6 +1049,7 @@ tf_py_test( name = "sparse_conditional_accumulator_test", size = "small", srcs = ["sparse_conditional_accumulator_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1014,6 +1064,7 @@ tf_py_test( name = "sparse_reorder_op_test", size = "small", srcs = ["sparse_reorder_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1029,6 +1080,7 @@ tf_py_test( name = "sparse_reshape_op_test", size = "small", srcs = ["sparse_reshape_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1044,6 +1096,7 @@ tf_py_test( name = "sparse_split_op_test", size = "small", srcs = ["sparse_split_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework", @@ -1056,6 +1109,7 @@ tf_py_test( name = "sparse_slice_op_test", size = "small", srcs = ["sparse_slice_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework", @@ -1069,6 +1123,7 @@ tf_py_test( name = "sparse_to_dense_op_py_test", size = "small", srcs = ["sparse_to_dense_op_py_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1082,6 +1137,7 @@ tf_py_test( name = "sparsemask_op_test", size = "small", srcs = ["sparsemask_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1107,6 +1163,7 @@ tf_py_test( name = "string_join_op_test", size = "small", srcs = ["string_join_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:string_ops", @@ -1117,6 +1174,7 @@ tf_py_test( name = "string_split_op_test", size = "small", srcs = ["string_split_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1136,6 +1194,7 @@ tf_py_test( name = "string_bytes_split_op_test", size = "small", srcs = ["string_bytes_split_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1155,6 +1214,7 @@ tf_py_test( name = "string_length_op_test", size = "small", srcs = ["string_length_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -1166,6 +1226,7 @@ tf_py_test( name = "string_strip_op_test", size = "small", srcs = ["string_strip_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1180,6 +1241,7 @@ tf_py_test( name = "string_lower_op_test", size = "small", srcs = ["string_lower_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1194,6 +1256,7 @@ tf_py_test( name = "string_upper_op_test", size = "small", srcs = ["string_upper_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1208,6 +1271,7 @@ tf_py_test( name = "substr_op_test", size = "small", srcs = ["substr_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:errors", @@ -1246,6 +1310,7 @@ tf_py_test( name = "summary_v1_ops_test", size = "small", srcs = ["summary_v1_ops_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/core:protos_all_py", "//tensorflow/python:client_testlib", @@ -1259,6 +1324,7 @@ tf_py_test( name = "summary_v1_tensor_op_test", size = "small", srcs = ["summary_v1_tensor_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", @@ -1293,6 +1359,7 @@ cuda_py_test( name = "template_mirrored_strategy_test", size = "small", srcs = ["template_mirrored_strategy_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:init_ops", @@ -1312,6 +1379,7 @@ cuda_py_test( tags = [ "no_oss", # TODO(b/142818120): Re-enable. ], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -1324,6 +1392,7 @@ tf_py_test( name = "unicode_script_op_test", size = "small", srcs = ["unicode_script_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", @@ -1336,6 +1405,7 @@ cuda_py_test( name = "topk_op_test", size = "medium", srcs = ["topk_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1351,6 +1421,7 @@ cuda_py_test( name = "nth_element_op_test", size = "small", srcs = ["nth_element_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1366,6 +1437,7 @@ tf_py_test( name = "unicode_encode_op_test", size = "small", srcs = ["unicode_encode_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", @@ -1384,6 +1456,7 @@ tf_py_test( name = "unicode_transcode_op_test", size = "small", srcs = ["unicode_transcode_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -1416,6 +1489,7 @@ tf_py_test( name = "unique_op_test", size = "small", srcs = ["unique_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1477,6 +1551,7 @@ cuda_py_test( name = "where_op_test", size = "medium", srcs = ["where_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1489,6 +1564,7 @@ cuda_py_test( name = "cast_op_test", size = "small", srcs = ["cast_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1505,6 +1581,7 @@ cuda_py_test( size = "small", srcs = ["dense_update_ops_no_tsan_test.py"], tags = ["notsan"], + tfrt_enabled = True, # TODO (b/140294007): the test fails with XLA. xla_enable_strict_auto_jit = False, deps = [ @@ -1523,6 +1600,7 @@ cuda_py_test( srcs = ["diag_op_test.py"], shard_count = 6, tags = ["no_windows_gpu"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1538,6 +1616,7 @@ tf_py_test( size = "small", srcs = ["reader_ops_test.py"], data = ["//tensorflow/core:lmdb_testdata"], + tfrt_enabled = True, deps = [ "//tensorflow/core:protos_all_py", "//tensorflow/python:client_testlib", @@ -1556,6 +1635,7 @@ cuda_py_test( name = "aggregate_ops_test", size = "small", srcs = ["aggregate_ops_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1569,6 +1649,7 @@ cuda_py_test( name = "argmax_op_test", size = "small", srcs = ["argmax_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:math_ops", @@ -1622,6 +1703,7 @@ cuda_py_test( size = "small", srcs = ["inplace_ops_test.py"], shard_count = 10, + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1638,6 +1720,7 @@ cuda_py_test( size = "medium", srcs = ["batch_matmul_op_test.py"], shard_count = 20, + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1651,6 +1734,7 @@ cuda_py_test( name = "batchtospace_op_test", size = "small", srcs = ["batchtospace_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:array_ops_gen", @@ -1664,6 +1748,7 @@ cuda_py_test( name = "betainc_op_test", size = "small", srcs = ["betainc_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1692,6 +1777,7 @@ cuda_py_test( name = "bias_op_deterministic_test", size = "medium", srcs = ["bias_op_deterministic_test.py"], + tfrt_enabled = True, deps = [ ":bias_op_base", ], @@ -1710,6 +1796,7 @@ cuda_py_test( name = "bitcast_op_test", size = "small", srcs = ["bitcast_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1740,6 +1827,7 @@ cuda_py_test( name = "constant_op_test", size = "small", srcs = ["constant_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1816,6 +1904,7 @@ tf_py_test( name = "control_flow_util_test", size = "small", srcs = ["control_flow_util_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:control_flow_ops", @@ -1843,6 +1932,7 @@ cuda_py_test( name = "conv1d_test", size = "small", srcs = ["conv1d_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1855,6 +1945,7 @@ cuda_py_test( name = "conv1d_transpose_test", size = "small", srcs = ["conv1d_transpose_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client", "//tensorflow/python:client_testlib", @@ -1868,6 +1959,7 @@ cuda_py_test( name = "conv2d_transpose_test", size = "small", srcs = ["conv2d_transpose_test.py"], + tfrt_enabled = True, # TODO(b/144432983): S32 convolutions should not be auto-clustered, only # crashes tests. @@ -1886,6 +1978,7 @@ cuda_py_test( name = "conv3d_backprop_filter_v2_grad_test", size = "small", srcs = ["conv3d_backprop_filter_v2_grad_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1900,6 +1993,7 @@ cuda_py_test( name = "cross_grad_test", size = "small", srcs = ["cross_grad_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1924,6 +2018,7 @@ cuda_py_test( name = "dense_update_ops_test", size = "small", srcs = ["dense_update_ops_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1940,6 +2035,7 @@ cuda_py_test( size = "medium", srcs = ["depthtospace_op_test.py"], tags = ["no_windows_gpu"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1954,6 +2050,7 @@ cuda_py_test( size = "medium", srcs = ["division_past_test.py"], tags = ["manual"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -1965,6 +2062,7 @@ cuda_py_test( name = "dynamic_partition_op_test", size = "medium", srcs = ["dynamic_partition_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -1980,6 +2078,7 @@ cuda_py_test( name = "dynamic_stitch_op_test", size = "small", srcs = ["dynamic_stitch_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:data_flow_grad", @@ -1994,6 +2093,7 @@ cuda_py_test( name = "extract_image_patches_op_test", size = "small", srcs = ["extract_image_patches_op_test.py"], + tfrt_enabled = True, # TODO(b/144432983): S32 convolutions should not be auto-clustered. xla_enable_strict_auto_jit = False, deps = [ @@ -2008,6 +2108,7 @@ cuda_py_test( name = "extract_volume_patches_op_test", size = "small", srcs = ["extract_volume_patches_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2046,6 +2147,7 @@ cuda_py_test( name = "gather_nd_op_test", size = "small", srcs = ["gather_nd_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client", @@ -2061,6 +2163,7 @@ cuda_py_test( name = "gradient_correctness_test", size = "small", srcs = ["gradient_correctness_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -2118,6 +2221,7 @@ cuda_py_test( name = "lrn_op_test", size = "medium", srcs = ["lrn_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2133,6 +2237,7 @@ cuda_py_test( name = "lu_op_test", size = "small", srcs = ["lu_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2167,6 +2272,7 @@ cuda_py_test( size = "small", srcs = ["manip_ops_test.py"], tags = ["no_windows_gpu"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -2180,6 +2286,7 @@ cuda_py_test( size = "medium", srcs = ["matmul_op_test.py"], shard_count = 20, + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2196,6 +2303,7 @@ cuda_py_test( name = "morphological_ops_test", size = "small", srcs = ["morphological_ops_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -2209,6 +2317,7 @@ cuda_py_test( name = "numerics_test", size = "small", srcs = ["numerics_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2225,6 +2334,7 @@ cuda_py_test( size = "small", srcs = ["one_hot_op_test.py"], tags = ["no_windows_gpu"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2237,6 +2347,7 @@ cuda_py_test( name = "stack_op_test", size = "small", srcs = ["stack_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2273,6 +2384,7 @@ cuda_py_test( name = "pad_op_test", size = "small", srcs = ["pad_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2285,6 +2397,7 @@ cuda_py_test( name = "padding_fifo_queue_test", size = "small", srcs = ["padding_fifo_queue_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2318,6 +2431,7 @@ cuda_py_test( name = "reduce_join_op_test", size = "small", srcs = ["reduce_join_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2348,6 +2462,7 @@ cuda_py_test( tags = [ "no_windows_gpu", ], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2367,6 +2482,7 @@ cuda_py_test( "no_gpu", "noguitar", ], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2380,6 +2496,7 @@ cuda_py_test( name = "relu_op_test", size = "small", srcs = ["relu_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -2398,6 +2515,7 @@ cuda_py_test( name = "reshape_op_test", size = "small", srcs = ["reshape_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2410,6 +2528,7 @@ cuda_py_test( name = "reverse_sequence_op_test", size = "small", srcs = ["reverse_sequence_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2422,6 +2541,7 @@ cuda_py_test( name = "compare_and_bitpack_op_test", size = "small", srcs = ["compare_and_bitpack_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -2434,6 +2554,7 @@ cuda_py_test( name = "scalar_test", size = "small", srcs = ["scalar_test.py"], + tfrt_enabled = True, # b/140221961: Invalid dims for operations xla_enable_strict_auto_jit = False, deps = [ @@ -2454,6 +2575,7 @@ cuda_py_test( name = "scan_ops_test", size = "medium", srcs = ["scan_ops_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:errors", @@ -2467,6 +2589,7 @@ cuda_py_test( name = "shape_ops_test", size = "medium", srcs = ["shape_ops_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", @@ -2484,6 +2607,7 @@ cuda_py_test( name = "softmax_op_test", size = "medium", srcs = ["softmax_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2498,6 +2622,7 @@ cuda_py_test( name = "softplus_op_test", size = "small", srcs = ["softplus_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -2511,6 +2636,7 @@ cuda_py_test( name = "softsign_op_test", size = "small", srcs = ["softsign_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -2524,6 +2650,7 @@ cuda_py_test( name = "spacetobatch_op_test", size = "small", srcs = ["spacetobatch_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:array_ops_gen", @@ -2543,6 +2670,7 @@ cuda_py_test( "no_windows", "no_windows_gpu", ], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2556,6 +2684,7 @@ tf_py_test( name = "sparse_serialization_ops_test", size = "small", srcs = ["sparse_serialization_ops_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2570,6 +2699,7 @@ tf_py_test( name = "sparse_tensors_map_ops_test", size = "small", srcs = ["sparse_tensors_map_ops_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client", @@ -2586,6 +2716,7 @@ cuda_py_test( name = "sparse_tensor_dense_matmul_grad_test", size = "small", srcs = ["sparse_tensor_dense_matmul_grad_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework", @@ -2600,6 +2731,7 @@ cuda_py_test( name = "sparse_xent_op_test", size = "small", srcs = ["sparse_xent_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", @@ -2638,6 +2770,7 @@ cuda_py_test( name = "stack_ops_test", size = "small", srcs = ["stack_ops_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:control_flow_ops", @@ -2653,6 +2786,7 @@ cuda_py_test( name = "string_to_hash_bucket_op_test", size = "small", srcs = ["string_to_hash_bucket_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2665,6 +2799,7 @@ cuda_py_test( name = "string_to_number_op_test", size = "small", srcs = ["string_to_number_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2677,6 +2812,7 @@ cuda_py_test( name = "summary_v1_audio_op_test", size = "small", srcs = ["summary_v1_audio_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/core:protos_all_py", "//tensorflow/python:client_testlib", @@ -2690,6 +2826,7 @@ cuda_py_test( name = "summary_v1_image_op_test", size = "small", srcs = ["summary_v1_image_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/core:protos_all_py", "//tensorflow/python:client_testlib", @@ -2740,6 +2877,7 @@ cuda_py_test( size = "small", srcs = ["trace_op_test.py"], tags = ["no_windows_gpu"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:math_ops", @@ -2770,6 +2908,7 @@ cuda_py_test( name = "variable_ops_test", size = "small", srcs = ["variable_ops_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2787,6 +2926,7 @@ cuda_py_test( name = "xent_op_test", size = "small", srcs = ["xent_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -2803,6 +2943,7 @@ cuda_py_test( name = "zero_division_test", size = "medium", srcs = ["zero_division_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:errors", @@ -2818,6 +2959,7 @@ cuda_py_test( tags = [ "no_gpu", # Flaky: b/80127739, b/127001953 ], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2834,6 +2976,7 @@ cuda_py_test( size = "medium", srcs = ["atrous_convolution_test.py"], tags = ["manual"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2848,6 +2991,7 @@ cuda_py_test( name = "pool_test", size = "medium", srcs = ["pool_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -2879,6 +3023,7 @@ cuda_py_test( name = "conv3d_transpose_test", size = "medium", srcs = ["conv3d_transpose_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -2922,6 +3067,7 @@ cuda_py_test( shard_count = 3, # TODO(b/118842098): Re-enable this test in Kokoro. tags = ["no_oss"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2938,6 +3084,7 @@ tf_py_test( size = "medium", srcs = ["neon_depthwise_conv_op_test.py"], tags = ["no_windows"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -2954,6 +3101,7 @@ cuda_py_test( size = "medium", srcs = ["division_future_test.py"], tags = ["manual"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -2965,6 +3113,7 @@ cuda_py_test( name = "pooling_ops_3d_test", size = "medium", srcs = ["pooling_ops_3d_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -2979,6 +3128,7 @@ cuda_py_test( size = "medium", srcs = ["pooling_ops_test.py"], shard_count = 4, + tfrt_enabled = True, xla_enable_strict_auto_jit = False, # Flaky in XLA b/149568654 deps = [ "//tensorflow/python:array_ops", @@ -2999,6 +3149,7 @@ cuda_py_test( timeout = "long", srcs = ["rnn_test.py"], shard_count = 10, + tfrt_enabled = True, deps = [ "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", @@ -3073,6 +3224,7 @@ cuda_py_test( tags = [ "no_oss", # Requires 4GB+ RAM ], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -3087,6 +3239,7 @@ cuda_py_test( size = "medium", srcs = ["sparse_matmul_op_test.py"], tags = ["no_windows"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -3122,6 +3275,7 @@ cuda_py_test( name = "sparse_tensor_dense_matmul_op_test", size = "medium", srcs = ["sparse_tensor_dense_matmul_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", @@ -3176,6 +3330,7 @@ cuda_py_test( name = "stage_op_test", size = "medium", srcs = ["stage_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -3191,6 +3346,7 @@ cuda_py_test( size = "medium", srcs = ["map_stage_op_test.py"], tags = ["no_oss"], # b/124474135 + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -3206,6 +3362,7 @@ cuda_py_test( size = "medium", srcs = ["concat_op_test.py"], tags = ["no_windows"], # b/126916429 + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:array_ops_gen", @@ -3227,6 +3384,7 @@ cuda_py_test( "nomsan", "notsan", ], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -3239,6 +3397,7 @@ cuda_py_test( size = "medium", srcs = ["conv_ops_3d_test.py"], shard_count = 30, + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -3252,6 +3411,7 @@ cuda_py_test( size = "medium", srcs = ["cwise_ops_test.py"], shard_count = 50, + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -3272,6 +3432,7 @@ cuda_py_test( size = "medium", srcs = ["cwise_ops_binary_test.py"], shard_count = 50, + tfrt_enabled = True, # b/140155647: Error just outside of tolerance xla_enable_strict_auto_jit = False, deps = [ @@ -3316,6 +3477,7 @@ cuda_py_test( size = "medium", srcs = ["embedding_ops_test.py"], shard_count = 20, + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -3358,6 +3520,7 @@ cuda_py_test( size = "medium", srcs = ["matrix_band_part_op_test.py"], shard_count = 20, + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -3375,6 +3538,7 @@ tf_py_test( tags = [ "no_windows", ], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -3395,6 +3559,7 @@ cuda_py_test( tags = [ "no_windows", ], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -3454,6 +3619,7 @@ cuda_py_test( "no_windows_gpu", "nomsan", ], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -3515,6 +3681,7 @@ tf_py_test( name = "sets_test", size = "medium", srcs = ["sets_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:errors", "//tensorflow/python:framework", @@ -3533,6 +3700,7 @@ tf_py_test( size = "small", srcs = ["weights_broadcast_test.py"], shard_count = 3, + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -3552,6 +3720,7 @@ tf_py_test( srcs = ["metrics_test.py"], shard_count = 20, tags = ["no_windows_gpu"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -3572,6 +3741,7 @@ tf_py_test( name = "confusion_matrix_test", size = "small", srcs = ["confusion_matrix_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -3587,6 +3757,7 @@ cuda_py_test( name = "bucketize_op_test", size = "medium", srcs = ["bucketize_op_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -3600,6 +3771,7 @@ tf_py_test( size = "small", srcs = ["sparse_cross_op_test.py"], tags = ["no_windows"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", @@ -3612,6 +3784,7 @@ tf_py_test( name = "garbage_collection_test", size = "small", srcs = ["garbage_collection_test.py"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:dtypes", @@ -3693,6 +3866,7 @@ cuda_py_test( size = "medium", srcs = ["cond_v2_test.py"], grpc_enabled = True, + tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -3768,6 +3942,7 @@ cuda_py_test( srcs = ["tridiagonal_matmul_op_test.py"], shard_count = 10, tags = ["no_rocm"], + tfrt_enabled = True, deps = [ "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", From 12a806e96866296b154134b27ef4228f39f403cc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 8 Aug 2020 19:12:31 -0700 Subject: [PATCH 2407/2522] PR #42109: Check input and axis param in quantize and dequantize Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/42109 Try to fix https://github.com/tensorflow/tensorflow/issues/42105 Copybara import of the project: -- 0573f8cb6976c592eee660da9e4ce58e0c1eb0c0 by bhack : Check input and axis params -- 3e5a78fa1b3854e536587a94514ac42b8b621225 by bhack : Else fix PiperOrigin-RevId: 325639857 Change-Id: Ifc18bc9686e9ff38839bb0c45fb3ef1d2ad9c208 --- tensorflow/python/ops/array_ops.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 9875342730c..5d68deb7ac1 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -5316,13 +5316,12 @@ def quantize_and_dequantize( A `Tensor`. Each element is the result of quantizing and dequantizing the corresponding element of `input`. """ - with ops.name_scope(name, "quantize_and_dequantize", [input]) as name: - if not tensor_util.is_tensor(input): - input = ops.convert_to_tensor(input) if axis is None: axis = -1 - else: - axis = get_positive_axis(axis, input.shape.ndims) + elif axis < 0: + if input.shape.ndims is None: + raise ValueError("input should have known rank to use negative axis.") + axis %= input.shape.ndims return gen_array_ops.quantize_and_dequantize_v2( input, From 8f35f8fd6be8e7dcba038fd87065f88035a5aa20 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 9 Aug 2020 02:01:33 -0700 Subject: [PATCH 2408/2522] Update GraphDef version to 488. PiperOrigin-RevId: 325663031 Change-Id: I5dbbba32c099c84f19dc20174929489b13b353ff --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 3512cb4c5b9..1813717e87a 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 487 // Updated: 2020/8/8 +#define TF_GRAPH_DEF_VERSION 488 // Updated: 2020/8/9 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 4308605c8926e5f53550d22fde86d22573507d2c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 9 Aug 2020 02:01:47 -0700 Subject: [PATCH 2409/2522] compat: Update forward compatibility horizon to 2020-08-09 PiperOrigin-RevId: 325663050 Change-Id: I312308efc62cc443638895aaa32ee33b23f4ea05 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 01ea900fd11..fefbf667704 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 8) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 9) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 947b6c3a4bc8d51565e73e2f3d977b3298b9f64c Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Sun, 9 Aug 2020 02:36:32 -0700 Subject: [PATCH 2410/2522] [MLIR] Add e2e test for unranked unary TF op, lowered and run with CPU runner. PiperOrigin-RevId: 325665428 Change-Id: I3e8a1a3a9551ba470e858fb775a31ca894f47359 --- .../mhlo/transforms/transform_unranked_hlo.cc | 2 +- .../kernel_gen/transforms/bufferize_pass.cc | 30 +++++++++++++++++-- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/transform_unranked_hlo.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/transform_unranked_hlo.cc index b6e55a9322f..7c985ea7535 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/transform_unranked_hlo.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/transform_unranked_hlo.cc @@ -170,7 +170,7 @@ struct TransformUnrankedHloPass PopulateTransformUnrankedHloPatterns(&ctx, &patterns); // Apply transformation. - if (failed(applyFullConversion(getFunction(), target, patterns))) + if (failed(applyPartialConversion(getFunction(), target, patterns))) return signalPassFailure(); } }; diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc index 7d195c69c37..ef07c801bc4 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc +++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc @@ -44,6 +44,28 @@ namespace { #define GEN_PASS_CLASSES #include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/kernel_gen_passes.h.inc" +// TODO(herhut) : This could become a real pattern in bufferize pass. What we +// would need to do is insert a copy to model the semantics correctly. The same +// is true for the TensorLoad pattern that is already in there. Then buffer +// assignment free insertion and copy removal should clean this up for us. +// +// This patten erases `tensor_store(src_unranked_tensor, dst_unranked_memref)` +// op and replaces the result of the defining op produced `dst_unranked_memref` +// with the rewritten `src_unranked_tensor`. +class UnrankedTensorStoreTestOnlyPattern + : public OpConversionPattern { + public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult matchAndRewrite( + mlir::TensorStoreOp op, ArrayRef operands, + ConversionPatternRewriter& rewriter) const final { + rewriter.replaceOp(op.memref().getDefiningOp(), op.tensor()); + rewriter.replaceOp(op, {}); + return success(); + } +}; + struct BufferizePass : public BufferizePassBase { public: void runOnOperation() override { @@ -57,8 +79,11 @@ struct BufferizePass : public BufferizePassBase { target.addLegalOp(); target.addIllegalDialect(); target.addIllegalOp(); - target.addIllegalOp(); target.addIllegalOp(); + target.addIllegalOp(); + target.addDynamicallyLegalOp([&](TensorStoreOp op) { + return !op.tensor().getType().isa(); + }); BufferAssignmentTypeConverter converter; auto typesAreLegal = [&converter](Operation* op) { @@ -86,8 +111,9 @@ struct BufferizePass : public BufferizePassBase { &converter, &patterns); populateStandardBufferizePattern(func.getContext(), &bufferAssignment, &converter, &patterns); + patterns.insert(func.getContext()); - return applyFullConversion(func, target, patterns); + return applyPartialConversion(func, target, patterns); }); if (result.wasInterrupted()) { signalPassFailure(); From 5676c66e9108dd6bb609b6e032860ba9c2e80078 Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Sun, 9 Aug 2020 09:48:49 -0700 Subject: [PATCH 2411/2522] Add convert_to_tensor KPI and introduce @trace.trace_wrapper(...) API Note that this is a superset of tf.convert_to_tensor, which includes internal convert_to_tensor calls. Also, introduced @trace.trace_wrapper(...) API that's faster than `with trace.Trace(...):` API. Benchmark: `with trace.Trace(...):` time : 0.67 us `@trace.trace_wrapper(...)` time : 0.21 us Direct `if trace.enabled:` inlining time : 0.17 us PiperOrigin-RevId: 325690563 Change-Id: I3251e38b7543e3121be6fad0266706a1b3b5c389 --- tensorflow/python/BUILD | 1 + tensorflow/python/framework/ops.py | 2 ++ tensorflow/python/profiler/trace.py | 42 +++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 4efe769c59d..a8a70566ab7 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1702,6 +1702,7 @@ py_library( "//tensorflow/python/eager:core", "//tensorflow/python/eager:monitoring", "//tensorflow/python/eager:tape", + "//tensorflow/python/profiler:traceme", "@six_archive//:six", ], ) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 75a36f83fc5..f07bca17061 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -62,6 +62,7 @@ from tensorflow.python.framework import versions from tensorflow.python.ops import control_flow_util from tensorflow.python.platform import app from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.profiler import trace from tensorflow.python.types import core as core_tf_types from tensorflow.python.types import internal from tensorflow.python.util import compat @@ -1472,6 +1473,7 @@ def pack_eager_tensors(tensors, ctx=None): return packed_tensor +@trace.trace_wrapper("convert_to_tensor") def convert_to_tensor(value, dtype=None, name=None, diff --git a/tensorflow/python/profiler/trace.py b/tensorflow/python/profiler/trace.py index 0591d90fa43..e4cf581bd25 100644 --- a/tensorflow/python/profiler/trace.py +++ b/tensorflow/python/profiler/trace.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import functools + from tensorflow.python.profiler.internal import _pywrap_traceme from tensorflow.python.util.tf_export import tf_export @@ -123,3 +125,43 @@ class Trace(object): def __exit__(self, exc_type, exc_val, exc_tb): if self._traceme: self._traceme.Stop() + + +def trace_wrapper(trace_name, **trace_kwargs): + """Decorator alternative to `with Trace(): ...`. It's faster. + + Args: + trace_name: The name of the trace event. + **trace_kwargs: Keyword arguments added to the trace event. Both the key and + value are of types that can be converted to strings, which will be + interpreted by the profiler according to the traceme name. + + Returns: + A decorator that can wrap a function and apply `Trace` scope if needed. + + Example usage: + ```python + + @trace_wrapper('trace_name') + def func(x, y, z): + pass # code to execute and apply `Trace` if needed. + + # Equivalent to + # with Trace('trace_name'): + # func(1, 2, 3) + func(1, 2, 3) + ``` + """ + + def inner_wrapper(func): + + @functools.wraps(func) + def wrapped(*args, **kwargs): + if enabled: + with Trace(trace_name, **trace_kwargs): + return func(*args, **kwargs) + return func(*args, **kwargs) + + return wrapped + + return inner_wrapper From 11f952a84a07771e43c068c7094f1ee3674f7b20 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Sun, 9 Aug 2020 10:41:51 -0700 Subject: [PATCH 2412/2522] Disable tensor_map_test for now since it failed msan test. PiperOrigin-RevId: 325693925 Change-Id: I905ac70c5a6717d5da69b914a2093d357f82f67a --- tensorflow/core/kernels/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 9be043c3907..ccb12d9b09d 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2932,6 +2932,7 @@ tf_cc_tests( srcs = [ "tensor_map_test.cc", ], + tags = ["nomsan"], # b/163222155 deps = [ ":tensor_map", "//tensorflow/core:framework", From 11f5967d2ca3f73ecff4ac5c32bfc53a744a9871 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Sun, 9 Aug 2020 10:43:12 -0700 Subject: [PATCH 2413/2522] Disable op_level_cost_estimator_test for the moment. PiperOrigin-RevId: 325694016 Change-Id: I4eac77ca6bc9c3cd68418f8afdd72679b63827d3 --- tensorflow/core/grappler/costs/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD index edbdaffa1c8..02c69920b84 100644 --- a/tensorflow/core/grappler/costs/BUILD +++ b/tensorflow/core/grappler/costs/BUILD @@ -337,6 +337,7 @@ cc_library( tf_cc_test( name = "op_level_cost_estimator_test", srcs = ["op_level_cost_estimator_test.cc"], + tags = ["no_oss"], # b/163222310 deps = [ ":op_level_cost_estimator", "//tensorflow/core:framework", From e69bf2f9e30cefee81c7fdcee1ae71962b09d19c Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Sun, 9 Aug 2020 10:44:32 -0700 Subject: [PATCH 2414/2522] Disable cwise_ops_test on windows for now. PiperOrigin-RevId: 325694097 Change-Id: I7566db6043b246efd67f947ed36977af319e921d --- tensorflow/python/kernel_tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index e73f2ea29fc..2888730e2bb 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -3411,6 +3411,7 @@ cuda_py_test( size = "medium", srcs = ["cwise_ops_test.py"], shard_count = 50, + tags = ["no_windows"], # b/163222163 tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", From 1f3b5f79383f3f84957c5f98710ed9d24bdeba2c Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Sun, 9 Aug 2020 17:55:57 -0700 Subject: [PATCH 2415/2522] [XLA:SPMD] Fix replicate to partial sharding PiperOrigin-RevId: 325721330 Change-Id: I53448aa275c4c3ef8790485994c0ebb6cc4394a7 --- tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc index 9db76a65486..813ccc46f32 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc @@ -302,14 +302,11 @@ PartitionedHlo PartitionedHlo::ReshardNoCache(const HloSharding& target) { 1); std::iota(group_dims.begin(), group_dims.end(), 0); auto target_grouped = GroupShardingOnDims(target, group_dims); - auto per_group_partitioner_state = CreatePerGroupPartitioningState( - state_, target_grouped.device_groups, state_.b); auto partially_sharded = PerGroupSliceFromReplicated( hlo_, state_.partition_id, target_grouped.device_groups, group_dims, target_grouped.group_dim_sizes, state_.b); partially_sharded->set_sharding(target); - return PartitionedHlo(partially_sharded, base_shape(), - per_group_partitioner_state); + return PartitionedHlo(partially_sharded, base_shape(), state_); } // 'Replicated' to 'Tiled'. From f4c0fae59291f9d5f92bdadc166cb616315a551e Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Sun, 9 Aug 2020 18:01:47 -0700 Subject: [PATCH 2416/2522] Fix linking error of android_tensorflow_image_op The errors occurs due to duplicated definition in portable_tensorflow_lib_lite and tensorflow/core/platform:strcat (a dependency listed in android_gif_internal). PiperOrigin-RevId: 325721659 Change-Id: Ic5c0accd67999f901d0cb284467b808f293d4771 --- tensorflow/core/BUILD | 5 +---- tensorflow/core/lib/gif/gif_io.cc | 25 +++++++++++++------------ 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 67e0a160c4f..41eba6b5e28 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1977,6 +1977,7 @@ cc_library( ":lib", ":lib_internal", "//tensorflow/core/platform:gif", + "@com_google_absl//absl/strings", ], ) @@ -2084,13 +2085,9 @@ cc_library( copts = tf_copts(), linkopts = ["-ldl"], deps = [ - "//tensorflow/core/lib/strings:numbers", - "//tensorflow/core/lib/strings:strcat", "//tensorflow/core/platform:dynamic_annotations", "//tensorflow/core/platform:gif", "//tensorflow/core/platform:logging", - "//tensorflow/core/platform:numbers", - "//tensorflow/core/platform:strcat", "//tensorflow/core/platform:stringpiece", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/strings", diff --git a/tensorflow/core/lib/gif/gif_io.cc b/tensorflow/core/lib/gif/gif_io.cc index 32e2f6dfa52..659513d05ed 100644 --- a/tensorflow/core/lib/gif/gif_io.cc +++ b/tensorflow/core/lib/gif/gif_io.cc @@ -16,9 +16,11 @@ limitations under the License. // Functions to read images in GIF format. #include "tensorflow/core/lib/gif/gif_io.h" + #include + +#include "absl/strings/str_cat.h" #include "tensorflow/core/lib/gtl/cleanup.h" -#include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/gif.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/mem.h" @@ -68,17 +70,17 @@ uint8* Decode(const void* srcdata, int datasize, } }); if (error_code != D_GIF_SUCCEEDED) { - *error_string = strings::StrCat("failed to open gif file: ", - GifErrorStringNonNull(error_code)); + *error_string = absl::StrCat("failed to open gif file: ", + GifErrorStringNonNull(error_code)); return nullptr; } if (DGifSlurp(gif_file) != GIF_OK) { - *error_string = strings::StrCat("failed to slurp gif file: ", - GifErrorStringNonNull(gif_file->Error)); + *error_string = absl::StrCat("failed to slurp gif file: ", + GifErrorStringNonNull(gif_file->Error)); return nullptr; } if (gif_file->ImageCount <= 0) { - *error_string = strings::StrCat("gif file does not contain any image"); + *error_string = "gif file does not contain any image"; return nullptr; } @@ -118,8 +120,7 @@ uint8* Decode(const void* srcdata, int datasize, img_desc->Height != height) { // If the first frame does not fill the entire canvas then return error. if (k == 0) { - *error_string = - strings::StrCat("the first frame does not fill the canvas"); + *error_string = "the first frame does not fill the canvas"; return nullptr; } // Otherwise previous frame will be reused to fill the unoccupied canvas. @@ -144,7 +145,7 @@ uint8* Decode(const void* srcdata, int datasize, ? this_image->ImageDesc.ColorMap : gif_file->SColorMap; if (color_map == nullptr) { - *error_string = strings::StrCat("missing color map for frame ", k); + *error_string = absl::StrCat("missing color map for frame ", k); return nullptr; } @@ -156,9 +157,9 @@ uint8* Decode(const void* srcdata, int datasize, (j - img_desc->Left)]; if (color_index >= color_map->ColorCount) { - *error_string = strings::StrCat("found color index ", color_index, - " outside of color map range ", - color_map->ColorCount); + *error_string = absl::StrCat("found color index ", color_index, + " outside of color map range ", + color_map->ColorCount); return nullptr; } From 5f75a84642b6032c553362f5e48254c4bf0aa86c Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Sun, 9 Aug 2020 19:18:29 -0700 Subject: [PATCH 2417/2522] Disable gpu_compatibility_test on mac for now. PiperOrigin-RevId: 325727277 Change-Id: I55338ecf787c010d16410e37c804f0391b5dad75 --- tensorflow/lite/experimental/acceleration/compatibility/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/lite/experimental/acceleration/compatibility/BUILD b/tensorflow/lite/experimental/acceleration/compatibility/BUILD index 387f475fa17..97c903d561f 100644 --- a/tensorflow/lite/experimental/acceleration/compatibility/BUILD +++ b/tensorflow/lite/experimental/acceleration/compatibility/BUILD @@ -169,6 +169,7 @@ cc_library( cc_test( name = "gpu_compatibility_test", srcs = ["gpu_compatibility_test.cc"], + tags = ["no_mac"], # b/163222453 deps = [ ":devicedb_sample", ":gpu_compatibility", From 4e7127d73f2ddf14b85ef55e3c4c0717dbbcde1d Mon Sep 17 00:00:00 2001 From: Chao Mei Date: Sun, 9 Aug 2020 19:33:03 -0700 Subject: [PATCH 2418/2522] Add a method to add metric to the benchmark entry. PiperOrigin-RevId: 325728292 Change-Id: I1829091880d54a4e6692c0fd3d39b3f128a3a1a6 --- tensorflow/core/util/reporter.cc | 8 ++++++++ tensorflow/core/util/reporter.h | 3 +++ tensorflow/core/util/reporter_test.cc | 25 +++++++++++++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/tensorflow/core/util/reporter.cc b/tensorflow/core/util/reporter.cc index 8e9d863b4c2..44465a58329 100644 --- a/tensorflow/core/util/reporter.cc +++ b/tensorflow/core/util/reporter.cc @@ -91,6 +91,14 @@ Status TestReporter::SetProperty(const string& name, double value) { return Status::OK(); } +Status TestReporter::AddMetric(const string& name, double value) { + if (report_file_.IsClosed()) return Status::OK(); + auto* metric = benchmark_entry_.add_metrics(); + metric->set_name(name); + metric->set_value(value); + return Status::OK(); +} + Status TestReporter::Initialize() { return report_file_.Initialize(); } } // namespace tensorflow diff --git a/tensorflow/core/util/reporter.h b/tensorflow/core/util/reporter.h index 51d7502701c..900fe40353e 100644 --- a/tensorflow/core/util/reporter.h +++ b/tensorflow/core/util/reporter.h @@ -111,6 +111,9 @@ class TestReporter { // Set property on Benchmark to the given value. Status SetProperty(const string& name, const string& value); + // Add the given value to the metrics on the Benchmark. + Status AddMetric(const string& name, double value); + // TODO(b/32704451): Don't just ignore the ::tensorflow::Status object! ~TestReporter() { Close().IgnoreError(); } // Autoclose in destructor. diff --git a/tensorflow/core/util/reporter_test.cc b/tensorflow/core/util/reporter_test.cc index 4c06560b852..77e7ed6467e 100644 --- a/tensorflow/core/util/reporter_test.cc +++ b/tensorflow/core/util/reporter_test.cc @@ -138,5 +138,30 @@ TEST(TestReporter, SetProperties) { EXPECT_EQ(4.0, extras.at("double_prop").double_value()); } +TEST(TestReporter, AddMetrics) { + string fname = + strings::StrCat(testing::TmpDir(), "/test_reporter_benchmarks_"); + TestReporter test_reporter(fname, "b3/4/5"); + TF_EXPECT_OK(test_reporter.Initialize()); + TF_EXPECT_OK(test_reporter.AddMetric("metric1", 2.0)); + TF_EXPECT_OK(test_reporter.AddMetric("metric2", 3.0)); + + TF_EXPECT_OK(test_reporter.Close()); + string expected_fname = strings::StrCat(fname, "b3__4__5"); + string read; + TF_EXPECT_OK(ReadFileToString(Env::Default(), expected_fname, &read)); + + BenchmarkEntries benchmark_entries; + ASSERT_TRUE(benchmark_entries.ParseFromString(read)); + ASSERT_EQ(1, benchmark_entries.entry_size()); + const BenchmarkEntry& benchmark_entry = benchmark_entries.entry(0); + const auto& metrics = benchmark_entry.metrics(); + ASSERT_EQ(2, metrics.size()); + EXPECT_EQ("metric1", metrics.at(0).name()); + EXPECT_EQ(2.0, metrics.at(0).value()); + EXPECT_EQ("metric2", metrics.at(1).name()); + EXPECT_EQ(3.0, metrics.at(1).value()); +} + } // namespace } // namespace tensorflow From 0720cbfdd3b1147f6c88343ee04997fc3085c5dd Mon Sep 17 00:00:00 2001 From: Chao Mei Date: Sun, 9 Aug 2020 20:09:37 -0700 Subject: [PATCH 2419/2522] Make it more clear about applying xnnpack delegate by default in TFLite's interpreter when a delegate is explicitly applied via ModifyGraphWithDelegate. This is to address https://github.com/tensorflow/tensorflow/issues/41977 PiperOrigin-RevId: 325731102 Change-Id: I6702088e5b10cae23f8de5b7b30d3fc53243fb86 --- tensorflow/lite/interpreter.cc | 11 ++++++++++- .../lite/tools/benchmark/benchmark_tflite_model.cc | 13 ++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/tensorflow/lite/interpreter.cc b/tensorflow/lite/interpreter.cc index 7a5f4df5155..307ede187b2 100644 --- a/tensorflow/lite/interpreter.cc +++ b/tensorflow/lite/interpreter.cc @@ -188,7 +188,16 @@ TfLiteStatus Interpreter::AllocateTensors() { // The execution will fall back to default implementation if the XNNPACK // delegate fails to be applied. Therefore, we ignore the return status // here and let it fall through the rest of the code. - ModifyGraphWithDelegate(std::move(lazy_delegate_provider_)); + auto status = ModifyGraphWithDelegate(std::move(lazy_delegate_provider_)); + if (status != kTfLiteOk) { + TF_LITE_REPORT_ERROR( + error_reporter_, + "Ignoring failed application of the default TensorFlow Lite " + "delegate."); + } else { + TFLITE_LOG(TFLITE_LOG_INFO, + "Successfully applied the default TensorFlow Lite delegate."); + } lazy_delegate_provider_.reset(); } diff --git a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc index 9da48badfbc..39ecded5484 100644 --- a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc +++ b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc @@ -669,18 +669,21 @@ TfLiteStatus BenchmarkTfLiteModel::Init() { return kTfLiteError; } if (fully_delegated) { - TFLITE_LOG(INFO) << "Applied " << delegate_provider->GetName() + TFLITE_LOG(INFO) << "Explicitly applied " + << delegate_provider->GetName() << " delegate, and the model graph will be completely" << " executed by the delegate."; } else if (num_delegated_kernels > 0) { - TFLITE_LOG(INFO) << "Applied " << delegate_provider->GetName() + TFLITE_LOG(INFO) << "Explicitly applied " + << delegate_provider->GetName() << " delegate, and the model graph will be partially" << " executed by the delegate w/ " << num_delegated_kernels << " delegate kernels."; } else { - TFLITE_LOG(INFO) << "Though " << delegate_provider->GetName() - << " delegate is applied, the model graph will not be" - << " executed by the delegate."; + TFLITE_LOG(INFO) + << "Though " << delegate_provider->GetName() + << " delegate is explicitly applied, the model graph will not be" + << " executed by the delegate."; } } owned_delegates_.emplace_back(std::move(delegate)); From ef88a7aad44756a99aa57deddc59dc756ac2b469 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Sun, 9 Aug 2020 20:54:59 -0700 Subject: [PATCH 2420/2522] Port the Mobilenet_v3 to keras/application. Fix https://github.com/tensorflow/tensorflow/issues/40217. The implementation is based on https://github.com/keras-team/keras-applications/blob/master/keras_applications/mobilenet_v3.py with a few modifications. 1. Updated to use TF backend only (theano related code is removed). 2. Remove all the *kwargs, and directly use tf.keras packages (disallow package injection). 3. Add 'classifier_activation' which is used by 'top' layer. This is aligned with v1/v2 implementation. 4. [Major] Changed the include_top implementation. The Conv2D layer with name "Conv_2" and its activation is moved to be base model structure, which means they are in the model even the include_top is False. This is based on comparing the implementation detail in original slim implementation in https://github.com/tensorflow/models/blob/a811a3b7e640722318ad868c99feddf3f3063e36/research/slim/nets/mobilenet/mobilenet_v3.py. If we can confirm this change is correct, then we should also fix it on the OSS keras_application as well. 5. [Major] Remove the first ZeroPadding2D layer right after the model input, and change the first conv2D layer to use "same" padding. This is aligned with original implementation in https://github.com/tensorflow/models/blob/692215511a27c49dadabd4fac1d83aef25bc840f/research/slim/nets/mobilenet/mobilenet.py#L155, where use_explicit_padding is False. 6. Added API for preprocess_input and decode_predictions, which aligns with v1 and v2 implementation. PiperOrigin-RevId: 325734579 Change-Id: I2ba6a9aa695baaa145d1a7cd3aeae86d48b823a2 --- RELEASE.md | 1 + tensorflow/python/keras/api/BUILD | 1 + tensorflow/python/keras/applications/BUILD | 17 + .../applications_load_weight_test.py | 3 + .../keras/applications/applications_test.py | 3 + .../python/keras/applications/mobilenet_v3.py | 567 ++++++++++++++++++ .../tools/api/generator/api_init_files.bzl | 1 + .../tools/api/generator/api_init_files_v1.bzl | 1 + ...flow.keras.applications.mobilenet_v3.pbtxt | 11 + .../v1/tensorflow.keras.applications.pbtxt | 12 + ...flow.keras.applications.mobilenet_v3.pbtxt | 11 + .../v2/tensorflow.keras.applications.pbtxt | 12 + 12 files changed, 640 insertions(+) create mode 100644 tensorflow/python/keras/applications/mobilenet_v3.py create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.keras.applications.mobilenet_v3.pbtxt create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.keras.applications.mobilenet_v3.pbtxt diff --git a/RELEASE.md b/RELEASE.md index 5f0553c2a94..191c18e5ddb 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -101,6 +101,7 @@ * `Optimizer.minimize` can now accept a loss `Tensor` and a `GradientTape` as an alternative to accepting a `callable` loss. * Added `beta` parameter to FTRL optimizer to match paper. + * Added `mobilenet_v3` to keras application model. * `tf.function` / AutoGraph: * Added `experimental_follow_type_hints` argument for `tf.function`. When True, the function may use type annotations to optimize the tracing diff --git a/tensorflow/python/keras/api/BUILD b/tensorflow/python/keras/api/BUILD index ff54400ae15..d69930b7455 100644 --- a/tensorflow/python/keras/api/BUILD +++ b/tensorflow/python/keras/api/BUILD @@ -23,6 +23,7 @@ keras_packages = [ "tensorflow.python.keras.applications.inception_v3", "tensorflow.python.keras.applications.mobilenet", "tensorflow.python.keras.applications.mobilenet_v2", + "tensorflow.python.keras.applications.mobilenet_v3", "tensorflow.python.keras.applications.nasnet", "tensorflow.python.keras.applications.resnet", "tensorflow.python.keras.applications.resnet_v2", diff --git a/tensorflow/python/keras/applications/BUILD b/tensorflow/python/keras/applications/BUILD index 0c566c6e6d5..a2c41dbe501 100644 --- a/tensorflow/python/keras/applications/BUILD +++ b/tensorflow/python/keras/applications/BUILD @@ -25,6 +25,7 @@ py_library( "inception_v3.py", "mobilenet.py", "mobilenet_v2.py", + "mobilenet_v3.py", "nasnet.py", "resnet.py", "resnet_v2.py", @@ -209,6 +210,22 @@ tf_py_test( ], ) +tf_py_test( + name = "applications_load_weight_test_mobilenet_v3", + srcs = ["applications_load_weight_test.py"], + args = ["--module=mobilenet_v3"], + main = "applications_load_weight_test.py", + tags = [ + "no_oss", + "no_pip", + ], + deps = [ + ":applications", + "//tensorflow/python:client_testlib", + "@absl_py//absl/testing:parameterized", + ], +) + tf_py_test( name = "applications_load_weight_test_densenet", size = "large", diff --git a/tensorflow/python/keras/applications/applications_load_weight_test.py b/tensorflow/python/keras/applications/applications_load_weight_test.py index 42146c66f97..aaafe9f984a 100644 --- a/tensorflow/python/keras/applications/applications_load_weight_test.py +++ b/tensorflow/python/keras/applications/applications_load_weight_test.py @@ -28,6 +28,7 @@ from tensorflow.python.keras.applications import inception_resnet_v2 from tensorflow.python.keras.applications import inception_v3 from tensorflow.python.keras.applications import mobilenet from tensorflow.python.keras.applications import mobilenet_v2 +from tensorflow.python.keras.applications import mobilenet_v3 from tensorflow.python.keras.applications import nasnet from tensorflow.python.keras.applications import resnet from tensorflow.python.keras.applications import resnet_v2 @@ -51,6 +52,8 @@ ARG_TO_MODEL = { [inception_resnet_v2.InceptionResNetV2]), 'mobilenet': (mobilenet, [mobilenet.MobileNet]), 'mobilenet_v2': (mobilenet_v2, [mobilenet_v2.MobileNetV2]), + 'mobilenet_v3': (mobilenet_v3, [mobilenet_v3.MobileNetV3Small, + mobilenet_v3.MobileNetV3Large]), 'densenet': (densenet, [densenet.DenseNet121, densenet.DenseNet169, densenet.DenseNet201]), 'nasnet_mobile': (nasnet, [nasnet.NASNetMobile]), diff --git a/tensorflow/python/keras/applications/applications_test.py b/tensorflow/python/keras/applications/applications_test.py index 198bebd904c..d92a2aaee7f 100644 --- a/tensorflow/python/keras/applications/applications_test.py +++ b/tensorflow/python/keras/applications/applications_test.py @@ -27,6 +27,7 @@ from tensorflow.python.keras.applications import inception_resnet_v2 from tensorflow.python.keras.applications import inception_v3 from tensorflow.python.keras.applications import mobilenet from tensorflow.python.keras.applications import mobilenet_v2 +from tensorflow.python.keras.applications import mobilenet_v3 from tensorflow.python.keras.applications import nasnet from tensorflow.python.keras.applications import resnet from tensorflow.python.keras.applications import resnet_v2 @@ -50,6 +51,8 @@ MODEL_LIST_NO_NASNET = [ (inception_resnet_v2.InceptionResNetV2, 1536), (mobilenet.MobileNet, 1024), (mobilenet_v2.MobileNetV2, 1280), + (mobilenet_v3.MobileNetV3Small, 1024), + (mobilenet_v3.MobileNetV3Large, 1280), (densenet.DenseNet121, 1024), (densenet.DenseNet169, 1664), (densenet.DenseNet201, 1920), diff --git a/tensorflow/python/keras/applications/mobilenet_v3.py b/tensorflow/python/keras/applications/mobilenet_v3.py new file mode 100644 index 00000000000..bdf2ca40142 --- /dev/null +++ b/tensorflow/python/keras/applications/mobilenet_v3.py @@ -0,0 +1,567 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# pylint: disable=invalid-name +# pylint: disable=missing-function-docstring +"""MobileNet v3 models for Keras.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +from tensorflow.python.keras import backend +from tensorflow.python.keras import models +from tensorflow.python.keras.applications import imagenet_utils +from tensorflow.python.keras.layers import VersionAwareLayers +from tensorflow.python.keras.utils import data_utils +from tensorflow.python.keras.utils import layer_utils +from tensorflow.python.lib.io import file_io +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util.tf_export import keras_export + + +# TODO(scottzhu): Change this to the GCS path. +BASE_WEIGHT_PATH = ('https://storage.googleapis.com/tensorflow/' + 'keras-applications/mobilenet_v3/') +WEIGHTS_HASHES = { + 'large_224_0.75_float': ('765b44a33ad4005b3ac83185abf1d0eb', + 'e7b4d1071996dd51a2c2ca2424570e20'), + 'large_224_1.0_float': ('59e551e166be033d707958cf9e29a6a7', + '037116398e07f018c0005ffcb0406831'), + 'large_minimalistic_224_1.0_float': ('675e7b876c45c57e9e63e6d90a36599c', + 'a2c33aed672524d1d0b4431808177695'), + 'small_224_0.75_float': ('cb65d4e5be93758266aa0a7f2c6708b7', + '4d2fe46f1c1f38057392514b0df1d673'), + 'small_224_1.0_float': ('8768d4c2e7dee89b9d02b2d03d65d862', + 'be7100780f875c06bcab93d76641aa26'), + 'small_minimalistic_224_1.0_float': ('99cd97fb2fcdad2bf028eb838de69e37', + '20d4e357df3f7a6361f3a288857b1051'), +} + +layers = VersionAwareLayers() + + +BASE_DOCSTRING = """Instantiates the {name} architecture. + + Reference: + - [Searching for MobileNetV3]( + https://arxiv.org/pdf/1905.02244.pdf) (ICCV 2019) + + The following table describes the performance of MobileNets: + ------------------------------------------------------------------------ + MACs stands for Multiply Adds + + |Classification Checkpoint|MACs(M)|Parameters(M)|Top1 Accuracy|Pixel1|CPU(ms)| + | [mobilenet_v3_large_1.0_224] | 217 | 5.4 | 75.6 | 51.2 | + | [mobilenet_v3_large_0.75_224] | 155 | 4.0 | 73.3 | 39.8 | + | [mobilenet_v3_large_minimalistic_1.0_224] | 209 | 3.9 | 72.3 | 44.1 | + | [mobilenet_v3_small_1.0_224] | 66 | 2.9 | 68.1 | 15.8 | + | [mobilenet_v3_small_0.75_224] | 44 | 2.4 | 65.4 | 12.8 | + | [mobilenet_v3_small_minimalistic_1.0_224] | 65 | 2.0 | 61.9 | 12.2 | + + The weights for all 6 models are obtained and translated from the Tensorflow + checkpoints from TensorFlow checkpoints found [here] + (https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet/README.md). + + Optionally loads weights pre-trained on ImageNet. + + Caution: Be sure to properly pre-process your inputs to the application. + Please see `applications.mobilenet_v3.preprocess_input` for an example. + + Arguments: + input_shape: Optional shape tuple, to be specified if you would + like to use a model with an input image resolution that is not + (224, 224, 3). + It should have exactly 3 inputs channels (224, 224, 3). + You can also omit this option if you would like + to infer input_shape from an input_tensor. + If you choose to include both input_tensor and input_shape then + input_shape will be used if they match, if the shapes + do not match then we will throw an error. + E.g. `(160, 160, 3)` would be one valid value. + alpha: controls the width of the network. This is known as the + depth multiplier in the MobileNetV3 paper, but the name is kept for + consistency with MobileNetV1 in Keras. + - If `alpha` < 1.0, proportionally decreases the number + of filters in each layer. + - If `alpha` > 1.0, proportionally increases the number + of filters in each layer. + - If `alpha` = 1, default number of filters from the paper + are used at each layer. + minimalistic: In addition to large and small models this module also + contains so-called minimalistic models, these models have the same + per-layer dimensions characteristic as MobilenetV3 however, they don't + utilize any of the advanced blocks (squeeze-and-excite units, hard-swish, + and 5x5 convolutions). While these models are less efficient on CPU, they + are much more performant on GPU/DSP. + include_top: Boolean, whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: String, one of `None` (random initialization), + 'imagenet' (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: Optional Keras tensor (i.e. output of + `layers.Input()`) + to use as image input for the model. + pooling: String, optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model + will be the 4D tensor output of the + last convolutional block. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a + 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Integer, optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + dropout_rate: fraction of the input units to drop on the last layer. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + + Returns: + A `keras.Model` instance. + + Raises: + ValueError: in case of invalid argument for `weights`, + or invalid input shape or invalid alpha, rows when + weights='imagenet' + ValueError: if `classifier_activation` is not `softmax` or `None` when + using a pretrained top layer. +""" + + +def MobileNetV3(stack_fn, + last_point_ch, + input_shape=None, + alpha=1.0, + model_type='large', + minimalistic=False, + include_top=True, + weights='imagenet', + input_tensor=None, + classes=1000, + pooling=None, + dropout_rate=0.2, + classifier_activation='softmax'): + if not (weights in {'imagenet', None} or file_io.file_exists(weights)): + raise ValueError('The `weights` argument should be either ' + '`None` (random initialization), `imagenet` ' + '(pre-training on ImageNet), ' + 'or the path to the weights file to be loaded.') + + if weights == 'imagenet' and include_top and classes != 1000: + raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' + 'as true, `classes` should be 1000') + + # Determine proper input shape and default size. + # If both input_shape and input_tensor are used, they should match + if input_shape is not None and input_tensor is not None: + try: + is_input_t_tensor = backend.is_keras_tensor(input_tensor) + except ValueError: + try: + is_input_t_tensor = backend.is_keras_tensor( + layer_utils.get_source_inputs(input_tensor)) + except ValueError: + raise ValueError('input_tensor: ', input_tensor, + 'is not type input_tensor') + if is_input_t_tensor: + if backend.image_data_format == 'channels_first': + if backend.int_shape(input_tensor)[1] != input_shape[1]: + raise ValueError('input_shape: ', input_shape, 'and input_tensor: ', + input_tensor, + 'do not meet the same shape requirements') + else: + if backend.int_shape(input_tensor)[2] != input_shape[1]: + raise ValueError('input_shape: ', input_shape, 'and input_tensor: ', + input_tensor, + 'do not meet the same shape requirements') + else: + raise ValueError('input_tensor specified: ', input_tensor, + 'is not a keras tensor') + + # If input_shape is None, infer shape from input_tensor + if input_shape is None and input_tensor is not None: + + try: + backend.is_keras_tensor(input_tensor) + except ValueError: + raise ValueError('input_tensor: ', input_tensor, 'is type: ', + type(input_tensor), 'which is not a valid type') + + if backend.is_keras_tensor(input_tensor): + if backend.image_data_format() == 'channels_first': + rows = backend.int_shape(input_tensor)[2] + cols = backend.int_shape(input_tensor)[3] + input_shape = (3, cols, rows) + else: + rows = backend.int_shape(input_tensor)[1] + cols = backend.int_shape(input_tensor)[2] + input_shape = (cols, rows, 3) + # If input_shape is None and input_tensor is None using standart shape + if input_shape is None and input_tensor is None: + input_shape = (None, None, 3) + + if backend.image_data_format() == 'channels_last': + row_axis, col_axis = (0, 1) + else: + row_axis, col_axis = (1, 2) + rows = input_shape[row_axis] + cols = input_shape[col_axis] + if rows and cols and (rows < 32 or cols < 32): + raise ValueError('Input size must be at least 32x32; got `input_shape=' + + str(input_shape) + '`') + if weights == 'imagenet': + if (not minimalistic and alpha not in [0.75, 1.0] + or minimalistic and alpha != 1.0): + raise ValueError('If imagenet weights are being loaded, ' + 'alpha can be one of `0.75`, `1.0` for non minimalistic' + ' or `1.0` for minimalistic only.') + + if rows != cols or rows != 224: + logging.warning('`input_shape` is undefined or non-square, ' + 'or `rows` is not 224.' + ' Weights for input shape (224, 224) will be' + ' loaded as the default.') + + if input_tensor is None: + img_input = layers.Input(shape=input_shape) + else: + if not backend.is_keras_tensor(input_tensor): + img_input = layers.Input(tensor=input_tensor, shape=input_shape) + else: + img_input = input_tensor + + channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 + + if minimalistic: + kernel = 3 + activation = relu + se_ratio = None + else: + kernel = 5 + activation = hard_swish + se_ratio = 0.25 + + x = img_input + x = layers.Rescaling(1. / 255.)(x) + x = layers.Conv2D( + 16, + kernel_size=3, + strides=(2, 2), + padding='same', + use_bias=False, + name='Conv')(x) + x = layers.BatchNormalization( + axis=channel_axis, epsilon=1e-3, + momentum=0.999, name='Conv/BatchNorm')(x) + x = activation(x) + + x = stack_fn(x, kernel, activation, se_ratio) + + last_conv_ch = _depth(backend.int_shape(x)[channel_axis] * 6) + + # if the width multiplier is greater than 1 we + # increase the number of output channels + if alpha > 1.0: + last_point_ch = _depth(last_point_ch * alpha) + x = layers.Conv2D( + last_conv_ch, + kernel_size=1, + padding='same', + use_bias=False, + name='Conv_1')(x) + x = layers.BatchNormalization( + axis=channel_axis, epsilon=1e-3, + momentum=0.999, name='Conv_1/BatchNorm')(x) + x = activation(x) + x = layers.Conv2D( + last_point_ch, + kernel_size=1, + padding='same', + use_bias=True, + name='Conv_2')(x) + x = activation(x) + + if include_top: + x = layers.GlobalAveragePooling2D()(x) + if channel_axis == 1: + x = layers.Reshape((last_point_ch, 1, 1))(x) + else: + x = layers.Reshape((1, 1, last_point_ch))(x) + if dropout_rate > 0: + x = layers.Dropout(dropout_rate)(x) + x = layers.Conv2D(classes, kernel_size=1, padding='same', name='Logits')(x) + x = layers.Flatten()(x) + imagenet_utils.validate_activation(classifier_activation, weights) + x = layers.Activation(activation=classifier_activation, + name='Predictions')(x) + else: + if pooling == 'avg': + x = layers.GlobalAveragePooling2D(name='avg_pool')(x) + elif pooling == 'max': + x = layers.GlobalMaxPooling2D(name='max_pool')(x) + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = layer_utils.get_source_inputs(input_tensor) + else: + inputs = img_input + + # Create model. + model = models.Model(inputs, x, name='MobilenetV3' + model_type) + + # Load weights. + if weights == 'imagenet': + model_name = '{}{}_224_{}_float'.format( + model_type, '_minimalistic' if minimalistic else '', str(alpha)) + if include_top: + file_name = 'weights_mobilenet_v3_' + model_name + '.h5' + file_hash = WEIGHTS_HASHES[model_name][0] + else: + file_name = 'weights_mobilenet_v3_' + model_name + '_no_top.h5' + file_hash = WEIGHTS_HASHES[model_name][1] + weights_path = data_utils.get_file( + file_name, + BASE_WEIGHT_PATH + file_name, + cache_subdir='models', + file_hash=file_hash) + model.load_weights(weights_path) + elif weights is not None: + model.load_weights(weights) + + return model + + +@keras_export('keras.applications.MobileNetV3Samll') +def MobileNetV3Small(input_shape=None, + alpha=1.0, + minimalistic=False, + include_top=True, + weights='imagenet', + input_tensor=None, + classes=1000, + pooling=None, + dropout_rate=0.2, + classifier_activation='softmax'): + + def stack_fn(x, kernel, activation, se_ratio): + + def depth(d): + return _depth(d * alpha) + + x = _inverted_res_block(x, 1, depth(16), 3, 2, se_ratio, relu, 0) + x = _inverted_res_block(x, 72. / 16, depth(24), 3, 2, None, relu, 1) + x = _inverted_res_block(x, 88. / 24, depth(24), 3, 1, None, relu, 2) + x = _inverted_res_block(x, 4, depth(40), kernel, 2, se_ratio, activation, 3) + x = _inverted_res_block(x, 6, depth(40), kernel, 1, se_ratio, activation, 4) + x = _inverted_res_block(x, 6, depth(40), kernel, 1, se_ratio, activation, 5) + x = _inverted_res_block(x, 3, depth(48), kernel, 1, se_ratio, activation, 6) + x = _inverted_res_block(x, 3, depth(48), kernel, 1, se_ratio, activation, 7) + x = _inverted_res_block(x, 6, depth(96), kernel, 2, se_ratio, activation, 8) + x = _inverted_res_block(x, 6, depth(96), kernel, 1, se_ratio, activation, 9) + x = _inverted_res_block(x, 6, depth(96), kernel, 1, se_ratio, activation, + 10) + return x + + return MobileNetV3(stack_fn, 1024, input_shape, alpha, 'small', minimalistic, + include_top, weights, input_tensor, classes, pooling, + dropout_rate, classifier_activation) + + +@keras_export('keras.applications.MobileNetV3Large') +def MobileNetV3Large(input_shape=None, + alpha=1.0, + minimalistic=False, + include_top=True, + weights='imagenet', + input_tensor=None, + classes=1000, + pooling=None, + dropout_rate=0.2, + classifier_activation='softmax'): + + def stack_fn(x, kernel, activation, se_ratio): + + def depth(d): + return _depth(d * alpha) + + x = _inverted_res_block(x, 1, depth(16), 3, 1, None, relu, 0) + x = _inverted_res_block(x, 4, depth(24), 3, 2, None, relu, 1) + x = _inverted_res_block(x, 3, depth(24), 3, 1, None, relu, 2) + x = _inverted_res_block(x, 3, depth(40), kernel, 2, se_ratio, relu, 3) + x = _inverted_res_block(x, 3, depth(40), kernel, 1, se_ratio, relu, 4) + x = _inverted_res_block(x, 3, depth(40), kernel, 1, se_ratio, relu, 5) + x = _inverted_res_block(x, 6, depth(80), 3, 2, None, activation, 6) + x = _inverted_res_block(x, 2.5, depth(80), 3, 1, None, activation, 7) + x = _inverted_res_block(x, 2.3, depth(80), 3, 1, None, activation, 8) + x = _inverted_res_block(x, 2.3, depth(80), 3, 1, None, activation, 9) + x = _inverted_res_block(x, 6, depth(112), 3, 1, se_ratio, activation, 10) + x = _inverted_res_block(x, 6, depth(112), 3, 1, se_ratio, activation, 11) + x = _inverted_res_block(x, 6, depth(160), kernel, 2, se_ratio, activation, + 12) + x = _inverted_res_block(x, 6, depth(160), kernel, 1, se_ratio, activation, + 13) + x = _inverted_res_block(x, 6, depth(160), kernel, 1, se_ratio, activation, + 14) + return x + + return MobileNetV3(stack_fn, 1280, input_shape, alpha, 'large', minimalistic, + include_top, weights, input_tensor, classes, pooling, + dropout_rate, classifier_activation) + + +MobileNetV3Small.__doc__ = BASE_DOCSTRING.format(name='MobileNetV3Small') +MobileNetV3Large.__doc__ = BASE_DOCSTRING.format(name='MobileNetV3Large') + + +def relu(x): + return layers.ReLU()(x) + + +def hard_sigmoid(x): + return layers.ReLU(6.)(x + 3.) * (1. / 6.) + + +def hard_swish(x): + return layers.Multiply()([hard_sigmoid(x), x]) + + +# This function is taken from the original tf repo. +# It ensures that all layers have a channel number that is divisible by 8 +# It can be seen here: +# https://github.com/tensorflow/models/blob/master/research/ +# slim/nets/mobilenet/mobilenet.py + + +def _depth(v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +def _se_block(inputs, filters, se_ratio, prefix): + x = layers.GlobalAveragePooling2D(name=prefix + 'squeeze_excite/AvgPool')( + inputs) + if backend.image_data_format() == 'channels_first': + x = layers.Reshape((filters, 1, 1))(x) + else: + x = layers.Reshape((1, 1, filters))(x) + x = layers.Conv2D( + _depth(filters * se_ratio), + kernel_size=1, + padding='same', + name=prefix + 'squeeze_excite/Conv')( + x) + x = layers.ReLU(name=prefix + 'squeeze_excite/Relu')(x) + x = layers.Conv2D( + filters, + kernel_size=1, + padding='same', + name=prefix + 'squeeze_excite/Conv_1')( + x) + x = hard_sigmoid(x) + x = layers.Multiply(name=prefix + 'squeeze_excite/Mul')([inputs, x]) + return x + + +def _inverted_res_block(x, expansion, filters, kernel_size, stride, se_ratio, + activation, block_id): + channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 + shortcut = x + prefix = 'expanded_conv/' + infilters = backend.int_shape(x)[channel_axis] + if block_id: + # Expand + prefix = 'expanded_conv_{}/'.format(block_id) + x = layers.Conv2D( + _depth(infilters * expansion), + kernel_size=1, + padding='same', + use_bias=False, + name=prefix + 'expand')( + x) + x = layers.BatchNormalization( + axis=channel_axis, + epsilon=1e-3, + momentum=0.999, + name=prefix + 'expand/BatchNorm')( + x) + x = activation(x) + + if stride == 2: + x = layers.ZeroPadding2D( + padding=imagenet_utils.correct_pad(x, kernel_size), + name=prefix + 'depthwise/pad')( + x) + x = layers.DepthwiseConv2D( + kernel_size, + strides=stride, + padding='same' if stride == 1 else 'valid', + use_bias=False, + name=prefix + 'depthwise')( + x) + x = layers.BatchNormalization( + axis=channel_axis, + epsilon=1e-3, + momentum=0.999, + name=prefix + 'depthwise/BatchNorm')( + x) + x = activation(x) + + if se_ratio: + x = _se_block(x, _depth(infilters * expansion), se_ratio, prefix) + + x = layers.Conv2D( + filters, + kernel_size=1, + padding='same', + use_bias=False, + name=prefix + 'project')( + x) + x = layers.BatchNormalization( + axis=channel_axis, + epsilon=1e-3, + momentum=0.999, + name=prefix + 'project/BatchNorm')( + x) + + if stride == 1 and infilters == filters: + x = layers.Add(name=prefix + 'Add')([shortcut, x]) + return x + + +@keras_export('keras.applications.mobilenet_v3.preprocess_input') +def preprocess_input(x, data_format=None): # pylint: disable=unused-argument + return x + + +@keras_export('keras.applications.mobilenet_v3.decode_predictions') +def decode_predictions(preds, top=5): + return imagenet_utils.decode_predictions(preds, top=top) + + +preprocess_input.__doc__ = imagenet_utils.PREPROCESS_INPUT_DOC.format( + mode='', + ret=imagenet_utils.PREPROCESS_INPUT_RET_DOC_TF, + error=imagenet_utils.PREPROCESS_INPUT_ERROR_DOC) +decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ diff --git a/tensorflow/python/tools/api/generator/api_init_files.bzl b/tensorflow/python/tools/api/generator/api_init_files.bzl index d2770e92b2e..c7a788450dc 100644 --- a/tensorflow/python/tools/api/generator/api_init_files.bzl +++ b/tensorflow/python/tools/api/generator/api_init_files.bzl @@ -97,6 +97,7 @@ KERAS_API_INIT_FILES = [ "keras/applications/inception_v3/__init__.py", "keras/applications/mobilenet/__init__.py", "keras/applications/mobilenet_v2/__init__.py", + "keras/applications/mobilenet_v3/__init__.py", "keras/applications/nasnet/__init__.py", "keras/applications/resnet/__init__.py", "keras/applications/resnet_v2/__init__.py", diff --git a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl index d1761b4d2bc..36593eff901 100644 --- a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl +++ b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl @@ -113,6 +113,7 @@ KERAS_API_INIT_FILES_V1 = [ "keras/applications/inception_v3/__init__.py", "keras/applications/mobilenet/__init__.py", "keras/applications/mobilenet_v2/__init__.py", + "keras/applications/mobilenet_v3/__init__.py", "keras/applications/nasnet/__init__.py", "keras/applications/resnet/__init__.py", "keras/applications/resnet_v2/__init__.py", diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.applications.mobilenet_v3.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.applications.mobilenet_v3.pbtxt new file mode 100644 index 00000000000..418ace0882f --- /dev/null +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.applications.mobilenet_v3.pbtxt @@ -0,0 +1,11 @@ +path: "tensorflow.keras.applications.mobilenet_v3" +tf_module { + member_method { + name: "decode_predictions" + argspec: "args=[\'preds\', \'top\'], varargs=None, keywords=None, defaults=[\'5\'], " + } + member_method { + name: "preprocess_input" + argspec: "args=[\'x\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.applications.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.applications.pbtxt index 900df849f45..9f367742398 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.applications.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.applications.pbtxt @@ -28,6 +28,10 @@ tf_module { name: "mobilenet_v2" mtype: "" } + member { + name: "mobilenet_v3" + mtype: "" + } member { name: "nasnet" mtype: "" @@ -116,6 +120,14 @@ tf_module { name: "MobileNetV2" argspec: "args=[\'input_shape\', \'alpha\', \'include_top\', \'weights\', \'input_tensor\', \'pooling\', \'classes\', \'classifier_activation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'1.0\', \'True\', \'imagenet\', \'None\', \'None\', \'1000\', \'softmax\'], " } + member_method { + name: "MobileNetV3Large" + argspec: "args=[\'input_shape\', \'alpha\', \'minimalistic\', \'include_top\', \'weights\', \'input_tensor\', \'classes\', \'pooling\', \'dropout_rate\', \'classifier_activation\'], varargs=None, keywords=None, defaults=[\'None\', \'1.0\', \'False\', \'True\', \'imagenet\', \'None\', \'1000\', \'None\', \'0.2\', \'softmax\'], " + } + member_method { + name: "MobileNetV3Samll" + argspec: "args=[\'input_shape\', \'alpha\', \'minimalistic\', \'include_top\', \'weights\', \'input_tensor\', \'classes\', \'pooling\', \'dropout_rate\', \'classifier_activation\'], varargs=None, keywords=None, defaults=[\'None\', \'1.0\', \'False\', \'True\', \'imagenet\', \'None\', \'1000\', \'None\', \'0.2\', \'softmax\'], " + } member_method { name: "NASNetLarge" argspec: "args=[\'input_shape\', \'include_top\', \'weights\', \'input_tensor\', \'pooling\', \'classes\'], varargs=None, keywords=None, defaults=[\'None\', \'True\', \'imagenet\', \'None\', \'None\', \'1000\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.applications.mobilenet_v3.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.applications.mobilenet_v3.pbtxt new file mode 100644 index 00000000000..418ace0882f --- /dev/null +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.applications.mobilenet_v3.pbtxt @@ -0,0 +1,11 @@ +path: "tensorflow.keras.applications.mobilenet_v3" +tf_module { + member_method { + name: "decode_predictions" + argspec: "args=[\'preds\', \'top\'], varargs=None, keywords=None, defaults=[\'5\'], " + } + member_method { + name: "preprocess_input" + argspec: "args=[\'x\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.applications.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.applications.pbtxt index 900df849f45..9f367742398 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.applications.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.applications.pbtxt @@ -28,6 +28,10 @@ tf_module { name: "mobilenet_v2" mtype: "" } + member { + name: "mobilenet_v3" + mtype: "" + } member { name: "nasnet" mtype: "" @@ -116,6 +120,14 @@ tf_module { name: "MobileNetV2" argspec: "args=[\'input_shape\', \'alpha\', \'include_top\', \'weights\', \'input_tensor\', \'pooling\', \'classes\', \'classifier_activation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'1.0\', \'True\', \'imagenet\', \'None\', \'None\', \'1000\', \'softmax\'], " } + member_method { + name: "MobileNetV3Large" + argspec: "args=[\'input_shape\', \'alpha\', \'minimalistic\', \'include_top\', \'weights\', \'input_tensor\', \'classes\', \'pooling\', \'dropout_rate\', \'classifier_activation\'], varargs=None, keywords=None, defaults=[\'None\', \'1.0\', \'False\', \'True\', \'imagenet\', \'None\', \'1000\', \'None\', \'0.2\', \'softmax\'], " + } + member_method { + name: "MobileNetV3Samll" + argspec: "args=[\'input_shape\', \'alpha\', \'minimalistic\', \'include_top\', \'weights\', \'input_tensor\', \'classes\', \'pooling\', \'dropout_rate\', \'classifier_activation\'], varargs=None, keywords=None, defaults=[\'None\', \'1.0\', \'False\', \'True\', \'imagenet\', \'None\', \'1000\', \'None\', \'0.2\', \'softmax\'], " + } member_method { name: "NASNetLarge" argspec: "args=[\'input_shape\', \'include_top\', \'weights\', \'input_tensor\', \'pooling\', \'classes\'], varargs=None, keywords=None, defaults=[\'None\', \'True\', \'imagenet\', \'None\', \'None\', \'1000\'], " From 83d4de42ff4339f22cf08c9a707872e69415f85e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 9 Aug 2020 22:46:00 -0700 Subject: [PATCH 2421/2522] Add CompileTimeConstantInput to XlaGather. PiperOrigin-RevId: 325743288 Change-Id: Ia2629ba3562a17937decd381ec2cb28695422b86 --- tensorflow/compiler/tests/xla_ops_test.py | 19 +++++++++++++++++++ .../tf2xla/kernels/gather_scatter_ops.cc | 3 ++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/tests/xla_ops_test.py b/tensorflow/compiler/tests/xla_ops_test.py index 0d6ae81ef6e..3e9f5e8c5dd 100644 --- a/tensorflow/compiler/tests/xla_ops_test.py +++ b/tensorflow/compiler/tests/xla_ops_test.py @@ -79,6 +79,25 @@ class XlaOpsNumericalTest(xla_test.XLATestCase, parameterized.TestCase): args=(v,), expected=np.tile(v, (7, 42, 1, 1))) + @test_util.disable_mlir_bridge('Not supported yet') + def testGather(self): + operand = np.arange(10, dtype=np.int32).reshape([2, 5]) + start_indices = np.array([2], np.int32) + slice_sizes = np.array([1, 3], np.int32) + + def gather(operand, start_indices): + dimension_numbers = xla_data_pb2.GatherDimensionNumbers() + dimension_numbers.offset_dims.extend([1]) + dimension_numbers.collapsed_slice_dims.extend([0]) + dimension_numbers.start_index_map.extend([0]) + dimension_numbers.index_vector_dim = 1 + return xla.gather(operand, start_indices, dimension_numbers, slice_sizes) + + self._assertOpOutputMatchesExpected( + gather, + args=(operand, start_indices), + expected=np.array([[5, 6, 7]])) + @test_util.disable_mlir_bridge('Dynamic result types not supported') def testShiftRightLogical(self): self._assertOpOutputMatchesExpected( diff --git a/tensorflow/compiler/tf2xla/kernels/gather_scatter_ops.cc b/tensorflow/compiler/tf2xla/kernels/gather_scatter_ops.cc index 19aa85f9d42..b4b18dd2b36 100644 --- a/tensorflow/compiler/tf2xla/kernels/gather_scatter_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/gather_scatter_ops.cc @@ -49,7 +49,8 @@ class GatherOp : public XlaOpKernel { bool indices_are_sorted_; }; -REGISTER_XLA_OP(Name("XlaGather"), GatherOp); +REGISTER_XLA_OP(Name("XlaGather").CompileTimeConstantInput("slice_sizes"), + GatherOp); class ScatterOp : public XlaOpKernel { public: From 24e203fa08feee48c766b15eaa3afcc912324437 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 9 Aug 2020 23:04:38 -0700 Subject: [PATCH 2422/2522] Add CompileTimeConstantInput to XlaGather. PiperOrigin-RevId: 325744813 Change-Id: I26a4f02376493e0642a251a8a504bfcefc93c6ce --- tensorflow/compiler/tests/xla_ops_test.py | 19 ------------------- .../tf2xla/kernels/gather_scatter_ops.cc | 3 +-- 2 files changed, 1 insertion(+), 21 deletions(-) diff --git a/tensorflow/compiler/tests/xla_ops_test.py b/tensorflow/compiler/tests/xla_ops_test.py index 3e9f5e8c5dd..0d6ae81ef6e 100644 --- a/tensorflow/compiler/tests/xla_ops_test.py +++ b/tensorflow/compiler/tests/xla_ops_test.py @@ -79,25 +79,6 @@ class XlaOpsNumericalTest(xla_test.XLATestCase, parameterized.TestCase): args=(v,), expected=np.tile(v, (7, 42, 1, 1))) - @test_util.disable_mlir_bridge('Not supported yet') - def testGather(self): - operand = np.arange(10, dtype=np.int32).reshape([2, 5]) - start_indices = np.array([2], np.int32) - slice_sizes = np.array([1, 3], np.int32) - - def gather(operand, start_indices): - dimension_numbers = xla_data_pb2.GatherDimensionNumbers() - dimension_numbers.offset_dims.extend([1]) - dimension_numbers.collapsed_slice_dims.extend([0]) - dimension_numbers.start_index_map.extend([0]) - dimension_numbers.index_vector_dim = 1 - return xla.gather(operand, start_indices, dimension_numbers, slice_sizes) - - self._assertOpOutputMatchesExpected( - gather, - args=(operand, start_indices), - expected=np.array([[5, 6, 7]])) - @test_util.disable_mlir_bridge('Dynamic result types not supported') def testShiftRightLogical(self): self._assertOpOutputMatchesExpected( diff --git a/tensorflow/compiler/tf2xla/kernels/gather_scatter_ops.cc b/tensorflow/compiler/tf2xla/kernels/gather_scatter_ops.cc index b4b18dd2b36..19aa85f9d42 100644 --- a/tensorflow/compiler/tf2xla/kernels/gather_scatter_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/gather_scatter_ops.cc @@ -49,8 +49,7 @@ class GatherOp : public XlaOpKernel { bool indices_are_sorted_; }; -REGISTER_XLA_OP(Name("XlaGather").CompileTimeConstantInput("slice_sizes"), - GatherOp); +REGISTER_XLA_OP(Name("XlaGather"), GatherOp); class ScatterOp : public XlaOpKernel { public: From e800ce58624851b0c6cca284e2ab8a97e028bc69 Mon Sep 17 00:00:00 2001 From: Yuqi Li Date: Sun, 9 Aug 2020 23:35:43 -0700 Subject: [PATCH 2423/2522] add installation from github source. PiperOrigin-RevId: 325746904 Change-Id: I093322bb6fa72489864e23604c3911aad7744593 --- tensorflow/lite/g3doc/guide/model_maker.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/g3doc/guide/model_maker.md b/tensorflow/lite/g3doc/guide/model_maker.md index 76b32eac75e..3746dbd6c9f 100644 --- a/tensorflow/lite/g3doc/guide/model_maker.md +++ b/tensorflow/lite/g3doc/guide/model_maker.md @@ -43,8 +43,18 @@ For more details, see the ## Installation -Install a prebuilt pip package. +There are two ways to install Model Maker. + +* Install a prebuilt pip package. ```shell pip install tflite-model-maker ``` + +* Clone the source code from GitHub and install. + +```shell +git clone https://github.com/tensorflow/examples +cd examples/tensorflow_examples/lite/model_maker/pip_package +pip install -e . +``` From fc11ea4eec3a74dda5f7fd302ec5da07a12cdb82 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Sun, 9 Aug 2020 23:48:22 -0700 Subject: [PATCH 2424/2522] [XLA] Partial sharding and propagation for broadcast/reduce PiperOrigin-RevId: 325747712 Change-Id: I31b795ec0a2138b427a0818939331d7fe30bd457 --- .../compiler/xla/service/hlo_sharding.cc | 27 +++- .../compiler/xla/service/hlo_sharding.h | 4 +- .../compiler/xla/service/hlo_sharding_util.cc | 64 +++++++++ .../compiler/xla/service/hlo_sharding_util.h | 11 ++ .../xla/service/sharding_propagation.cc | 99 ++++++------- .../xla/service/sharding_propagation_test.cc | 110 +++++++++++++++ .../xla/service/spmd/spmd_partitioner.cc | 132 +++++------------- .../xla/service/spmd/spmd_partitioner_test.cc | 50 +++++++ .../xla/service/spmd/spmd_partitioner_util.cc | 4 +- 9 files changed, 350 insertions(+), 151 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc index ba1fc0d0450..07444aca82b 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding.cc @@ -56,8 +56,28 @@ HloSharding HloSharding::PartialTile( HloSharding HloSharding::PartialTile( const Array& tile_assignment_last_dim_replicate) { - return HloSharding(tile_assignment_last_dim_replicate, - /*replicate_on_last_tile_dim=*/true); + std::vector> sorted_groups( + tile_assignment_last_dim_replicate.num_elements() / + tile_assignment_last_dim_replicate.dimensions().back()); + auto get_group_id = [&](absl::Span indices) { + int64 group_id = 0; + for (int64 i = 0; i < indices.size() - 1; ++i) { + group_id *= tile_assignment_last_dim_replicate.dim(i); + group_id += indices[i]; + } + return group_id; + }; + tile_assignment_last_dim_replicate.Each( + [&](absl::Span indices, const int64 device) { + sorted_groups[get_group_id(indices)].insert(device); + }); + Array sorted_tile(tile_assignment_last_dim_replicate.dimensions()); + sorted_tile.Each([&](absl::Span indices, int64* device) { + auto begin = sorted_groups[get_group_id(indices)].begin(); + *device = *begin; + sorted_groups[get_group_id(indices)].erase(begin); + }); + return HloSharding(sorted_tile, /*replicate_on_last_tile_dim=*/true); } HloSharding HloSharding::Tuple(const ShapeTree& sub_shardings) { @@ -437,7 +457,8 @@ Status HloSharding::ValidateNonTuple(const Shape& shape, proto.tile_assignment_dimensions().end())); std::copy(proto.tile_assignment_devices().begin(), proto.tile_assignment_devices().end(), tile_assignment.begin()); - return HloSharding(tile_assignment, proto.replicate_on_last_tile_dim()); + return proto.replicate_on_last_tile_dim() ? PartialTile(tile_assignment) + : HloSharding(tile_assignment); } OpSharding HloSharding::ToProto() const { diff --git a/tensorflow/compiler/xla/service/hlo_sharding.h b/tensorflow/compiler/xla/service/hlo_sharding.h index 1b827efff2d..e7ba2bc0680 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.h +++ b/tensorflow/compiler/xla/service/hlo_sharding.h @@ -56,14 +56,14 @@ class HloSharding { // Creates a new sharding where data is replicated within each replication // group, and sharded across replication groups according to - // group_tile_assignment. + // group_tile_assignment. Replication group members will be sorted. static HloSharding PartialTile( const Array& group_tile_assignment, absl::Span> replication_groups); // Creates a partially replicated tiled sharding with device-level tile // assignment, where the last dimension is the additional replication - // dimension. + // dimension. Replication group members will be sorted. static HloSharding PartialTile( const Array& tile_assignment_last_dim_replicate); diff --git a/tensorflow/compiler/xla/service/hlo_sharding_util.cc b/tensorflow/compiler/xla/service/hlo_sharding_util.cc index 94c348cdeaa..65295a8e620 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding_util.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding_util.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_sharding_util.h" #include +#include #include "absl/algorithm/container.h" #include "tensorflow/compiler/xla/array.h" @@ -777,5 +778,68 @@ std::vector DevicesForSharding( return devices; } +HloSharding PartiallyReplicateTiledShardingOnDims( + const HloSharding& sharding, const std::vector& dims_to_replicate) { + if (sharding.IsTileMaximal()) { + return sharding; + } + int64 group_count = 1; + for (int64 dim : dims_to_replicate) { + if (sharding.ReplicateOnLastTileDim()) { + CHECK_LT(dim, sharding.tile_assignment().num_dimensions()); + } + group_count *= sharding.tile_assignment().dim(dim); + } + if (group_count == 1) { + return sharding; + } + if (group_count == sharding.NumTiles()) { + return HloSharding::Replicate(); + } + std::vector dim_permutation( + sharding.tile_assignment().num_dimensions()); + std::iota(dim_permutation.begin(), dim_permutation.end(), 0); + absl::c_sort(dim_permutation, [&](const int64 a, const int64 b) { + return absl::c_linear_search(dims_to_replicate, a) < + absl::c_linear_search(dims_to_replicate, b); + }); + auto transposed = TransposeSharding(sharding, dim_permutation); + auto new_tile = transposed.tile_assignment(); + std::vector new_tile_shape( + sharding.tile_assignment().dimensions().begin(), + sharding.tile_assignment().dimensions().end()); + for (int64 dim : dims_to_replicate) { + new_tile_shape[dim] = 1; + } + if (sharding.ReplicateOnLastTileDim()) { + new_tile_shape.back() *= group_count; + } else { + new_tile_shape.push_back(group_count); + } + new_tile.Reshape(new_tile_shape); + return HloSharding::PartialTile(new_tile); +} + +HloSharding RemoveShapeDimensions(const HloSharding& sharding, + const std::vector& dims_to_remove) { + if (sharding.IsTileMaximal() || dims_to_remove.empty()) { + return sharding; + } + std::vector new_tile_shape; + new_tile_shape.reserve(sharding.tile_assignment().num_dimensions() - + dims_to_remove.size()); + for (int64 i = 0; i < sharding.tile_assignment().num_dimensions(); ++i) { + if (absl::c_linear_search(dims_to_remove, i)) { + CHECK_EQ(sharding.tile_assignment().dim(i), 1); + } else { + new_tile_shape.push_back(sharding.tile_assignment().dim(i)); + } + } + auto new_tile = sharding.tile_assignment(); + new_tile.Reshape(new_tile_shape); + return sharding.ReplicateOnLastTileDim() ? HloSharding::PartialTile(new_tile) + : HloSharding::Tile(new_tile); +} + } // namespace hlo_sharding_util } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_sharding_util.h b/tensorflow/compiler/xla/service/hlo_sharding_util.h index cc4068121ae..ce19d8c7a19 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding_util.h +++ b/tensorflow/compiler/xla/service/hlo_sharding_util.h @@ -163,6 +163,17 @@ IdentityValueAndHloOpcodeForScatterReduceComputation( std::vector DevicesForSharding( const HloSharding& sharding, const std::vector& available_devices); +// Returns a sharding that replicates data across devices along the given +// dimensions in the original sharding. +HloSharding PartiallyReplicateTiledShardingOnDims( + const HloSharding& sharding, const std::vector& dims_to_replicate); + +// Returns a sharding the removes given tile dimensions. +// +// Precondition: if not tile maximal, the size of each tile dimension must be 1. +HloSharding RemoveShapeDimensions(const HloSharding& sharding, + const std::vector& dims_to_remove); + } // namespace hlo_sharding_util } // namespace xla diff --git a/tensorflow/compiler/xla/service/sharding_propagation.cc b/tensorflow/compiler/xla/service/sharding_propagation.cc index 0e4b0568134..4ff492047a3 100644 --- a/tensorflow/compiler/xla/service/sharding_propagation.cc +++ b/tensorflow/compiler/xla/service/sharding_propagation.cc @@ -706,44 +706,37 @@ bool InferShardingFromOperands(HloInstruction* instruction, sharding); return HloSharding::Tuple(instruction->shape(), tuple); }; - if (operand->sharding().IsReplicated()) { + if (operand->sharding().IsReplicated() || + (!is_spmd && + absl::c_any_of(instruction->dimensions(), [operand](int64 dim) { + return operand->sharding().tile_assignment().dim(dim) > 1; + }))) { + // We are reducing along one of the sharded dimensions. We only + // support this in SPMD. changed |= MaybeImproveInstructionSharding( get_maybe_tuple_sharding(HloSharding::Replicate()), instruction, /*may_combine_partial_sharding=*/is_spmd); continue; } - if (absl::c_any_of(instruction->dimensions(), [operand](int64 dim) { - return operand->sharding().tile_assignment().dim(dim) > 1; - })) { - // We are reducing along one of the sharded dimensions. We don't - // support tiled sharding in this case. + auto after_partial_replication = + operand->sharding().IsReplicated() + ? operand->sharding() + : hlo_sharding_util::PartiallyReplicateTiledShardingOnDims( + operand->sharding(), instruction->dimensions()); + if (after_partial_replication.IsReplicated()) { changed |= MaybeImproveInstructionSharding( get_maybe_tuple_sharding(HloSharding::Replicate()), instruction, /*may_combine_partial_sharding=*/is_spmd); - } else { - // We are reducing along some of the non-sharded dimensions. The - // result sharding should be the same as the operand sharding with the - // reduction dimensions removed as they are removed from the result - // shape. - std::vector target_tile_assignment_dimensions; - const auto& dimensions = instruction->dimensions(); - for (int64 i = 0; i < operand->shape().rank(); ++i) { - if (absl::c_find(dimensions, i) == dimensions.end()) { - target_tile_assignment_dimensions.push_back( - operand->sharding().tile_assignment().dim(i)); - } - } - Array new_tile_assignment = - operand->sharding().tile_assignment(); - new_tile_assignment.Reshape(target_tile_assignment_dimensions); - // Use the same sharding for all tuple elements, because they are part - // of the same reduce instruction. - HloSharding new_sharding = - get_maybe_tuple_sharding(HloSharding::Tile(new_tile_assignment)); - changed |= MaybeImproveInstructionSharding( - new_sharding, instruction, - /*may_combine_partial_sharding=*/is_spmd); + continue; } + // Use the same sharding for all tuple elements, because they are part + // of the same reduce instruction. + HloSharding new_sharding = + get_maybe_tuple_sharding(hlo_sharding_util::RemoveShapeDimensions( + after_partial_replication, instruction->dimensions())); + changed |= MaybeImproveInstructionSharding( + new_sharding, instruction, + /*may_combine_partial_sharding=*/is_spmd); } return changed; } @@ -773,9 +766,16 @@ bool InferShardingFromOperands(HloInstruction* instruction, op->sharding().tile_assignment().dim(source_dim)); } } + if (op->sharding().ReplicateOnLastTileDim()) { + target_tile_assignment_dimensions.push_back( + op->sharding().tile_assignment().dimensions().back()); + } Array new_tile_assignment = op->sharding().tile_assignment(); new_tile_assignment.Reshape(target_tile_assignment_dimensions); - HloSharding new_sharding = HloSharding::Tile(new_tile_assignment); + HloSharding new_sharding = + op->sharding().ReplicateOnLastTileDim() + ? HloSharding::PartialTile(new_tile_assignment) + : HloSharding::Tile(new_tile_assignment); return MaybeImproveInstructionSharding( new_sharding, instruction, /*may_combine_partial_sharding=*/is_spmd); } @@ -1121,25 +1121,25 @@ absl::optional GetShardingFromUser( if (user.sharding().IsReplicated()) { return user.sharding(); } - // Only support when none of the partitioned dimensions in the broadcast - // output belong to new dimensions. + std::vector dims_to_replicate; + bool needs_replication = false; for (int64 i = 0; i < user.shape().rank(); ++i) { - if (user.sharding().tile_assignment().dim(i) > 1 && - absl::c_count(user.dimensions(), i) == 0) { - return absl::nullopt; + if (absl::c_count(user.dimensions(), i) == 0) { + dims_to_replicate.push_back(i); + if (user.sharding().tile_assignment().dim(i) > 1) { + needs_replication = true; + } } } - - // The instruction (operand of broadcast) will be tiled the same way - // as the output. - std::vector target_tile_assignment_dimensions; - for (int64 output_dim : user.dimensions()) { - target_tile_assignment_dimensions.push_back( - user.sharding().tile_assignment().dim(output_dim)); + // If not SPMD, only support when none of the partitioned dimensions in + // the broadcast output belong to new dimensions. + if (!is_spmd && needs_replication) { + return absl::nullopt; } - Array new_tile_assignment = user.sharding().tile_assignment(); - new_tile_assignment.Reshape(target_tile_assignment_dimensions); - return HloSharding::Tile(new_tile_assignment); + return hlo_sharding_util::RemoveShapeDimensions( + hlo_sharding_util::PartiallyReplicateTiledShardingOnDims( + user.sharding(), dims_to_replicate), + dims_to_replicate); } case HloOpcode::kConcatenate: { if (user.sharding().IsReplicated()) { @@ -1364,10 +1364,11 @@ absl::optional GetShardingFromUser( return user_sharding; } std::vector target_tile_assignment_dimensions( - instruction.shape().rank()); + instruction.shape().rank() + + (user_sharding.ReplicateOnLastTileDim() ? 1 : 0)); const auto& dimensions = user.dimensions(); int64 next_output_dim = 0; - for (int64 i = 0; i < instruction.shape().rank(); ++i) { + for (int64 i = 0; i < target_tile_assignment_dimensions.size(); ++i) { if (absl::c_find(dimensions, i) == dimensions.end()) { target_tile_assignment_dimensions[i] = user_sharding.tile_assignment().dim(next_output_dim++); @@ -1377,7 +1378,9 @@ absl::optional GetShardingFromUser( } auto tile_assignment = user_sharding.tile_assignment(); tile_assignment.Reshape(target_tile_assignment_dimensions); - return HloSharding::Tile(tile_assignment); + return user_sharding.ReplicateOnLastTileDim() + ? HloSharding::PartialTile(tile_assignment) + : HloSharding::Tile(tile_assignment); } case HloOpcode::kSort: { if (user.sharding().IsTuple()) { diff --git a/tensorflow/compiler/xla/service/sharding_propagation_test.cc b/tensorflow/compiler/xla/service/sharding_propagation_test.cc index 8aa10b67ed8..a182af001c2 100644 --- a/tensorflow/compiler/xla/service/sharding_propagation_test.cc +++ b/tensorflow/compiler/xla/service/sharding_propagation_test.cc @@ -118,6 +118,25 @@ ENTRY %broadcast { op::Sharding("{devices=[1,2,2,1]0,1,2,3}")); } +TEST_F(ShardingPropagationTest, BroadcastForwardPartial) { + const char* const hlo_string = R"( +HloModule module +ENTRY %broadcast { + %param0 = f32[3,2048]parameter(0), + sharding={devices=[1,2,2]0,1,2,3 last_tile_dim_replicate} + %broadcast = f32[3,2048,3] broadcast(%param0), dimensions={0,1} + ROOT %copy = f32[3,2048,3] copy(%broadcast) +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + TF_ASSERT_OK_AND_ASSIGN( + bool changed, ShardingPropagation(/*is_spmd=*/true).Run(module.get())); + EXPECT_TRUE(changed); + EXPECT_THAT( + FindInstruction(module.get(), "broadcast"), + op::Sharding("{devices=[1,2,1,2]0,1,2,3 last_tile_dim_replicate}")); +} + TEST_F(ShardingPropagationTest, BroadcastUser) { const char* const hlo_string = R"( HloModule module @@ -136,6 +155,25 @@ ENTRY %broadcast { op::Sharding("{devices=[2,4]0,1,2,3,4,5,6,7}")); } +TEST_F(ShardingPropagationTest, BroadcastUserPartial) { + const char* const hlo_string = R"( +HloModule module +ENTRY %broadcast { + %param0 = f32[24,8]{0,1} parameter(0) + %copy = f32[24,8]{0,1} copy(%param0) + ROOT %broadcast = f32[4,24,6,8] broadcast(%copy), dimensions={1,3}, + sharding={devices=[4,2,1,1]0,1,2,3,4,5,6,7} +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + TF_ASSERT_OK_AND_ASSIGN( + bool changed, ShardingPropagation(/*is_spmd=*/true).Run(module.get())); + EXPECT_TRUE(changed); + EXPECT_THAT( + FindInstruction(module.get(), "copy"), + op::Sharding("{devices=[2,1,4]0,2,4,6,1,3,5,7 last_tile_dim_replicate}")); +} + TEST_F(ShardingPropagationTest, MaximalReduceForwardPass) { const char* const hlo_string = R"( HloModule module @@ -184,6 +222,78 @@ ENTRY %reduce { op::Sharding("{devices=[2,2]0,1,2,3}")); } +TEST_F(ShardingPropagationTest, ReducePartiallyOnTiledDims) { + const char* const hlo_string = R"( +HloModule module +%add { + %lhs = f32[] parameter(0) + %rhs = f32[] parameter(1) + ROOT %add = f32[] add(%lhs, %rhs) +} +ENTRY %reduce { + %param0 = f32[8,8] parameter(0), sharding={devices=[2,2]0,1,2,3} + %init = f32[] parameter(1) + %reduce = f32[8] reduce(%param0, %init), dimensions={0}, to_apply=%add + ROOT %copy = f32[8] copy(%reduce) +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + TF_ASSERT_OK_AND_ASSIGN( + bool changed, ShardingPropagation(/*is_spmd=*/true).Run(module.get())); + EXPECT_TRUE(changed); + EXPECT_THAT(FindInstruction(module.get(), "reduce"), + op::Sharding("{devices=[2,2]0,2,1,3 last_tile_dim_replicate}")); +} + +TEST_F(ShardingPropagationTest, ReducePartiallyOnTiledDims2) { + const char* const hlo_string = R"( +HloModule module +%add { + %lhs = f32[] parameter(0) + %rhs = f32[] parameter(1) + ROOT %add = f32[] add(%lhs, %rhs) +} +ENTRY %reduce { + %param0 = f32[8,8] parameter(0), sharding={devices=[2,2,2]0,1,2,3,4,5,6,7 last_tile_dim_replicate} + %init = f32[] parameter(1) + %reduce = f32[8] reduce(%param0, %init), dimensions={0}, to_apply=%add + ROOT %copy = f32[8] copy(%reduce) +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + TF_ASSERT_OK_AND_ASSIGN( + bool changed, ShardingPropagation(/*is_spmd=*/true).Run(module.get())); + EXPECT_TRUE(changed); + EXPECT_THAT( + FindInstruction(module.get(), "reduce"), + op::Sharding("{devices=[2,4]0,1,4,5,2,3,6,7 last_tile_dim_replicate}")); +} + +TEST_F(ShardingPropagationTest, ReducePartiallyBackward) { + const char* const hlo_string = R"( +HloModule module +%add { + %lhs = f32[] parameter(0) + %rhs = f32[] parameter(1) + ROOT %add = f32[] add(%lhs, %rhs) +} +ENTRY %reduce { + %param0 = f32[8,8] parameter(0) + %input = f32[8,8] copy(%param0) + %init = f32[] parameter(1) + %reduce = f32[8] reduce(%input, %init), dimensions={0}, to_apply=%add, + sharding={devices=[2,2]0,1,2,3 last_tile_dim_replicate} + ROOT %copy = f32[8] copy(%reduce) +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + TF_ASSERT_OK_AND_ASSIGN(bool changed, + ShardingPropagation().Run(module.get())); + EXPECT_TRUE(changed); + EXPECT_THAT(FindInstruction(module.get(), "input"), + op::Sharding("{devices=[1,2,2]0,1,2,3 last_tile_dim_replicate}")); +} + TEST_F(ShardingPropagationTest, ShardedTupleReduceForwardAndBackwardPass) { const char* const hlo_string = R"( HloModule module diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc index 813ccc46f32..fc065bcdd72 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc @@ -826,8 +826,9 @@ PartitionedHlo PartitionedHlo::ReshardWithAllToAll( sharding().tile_assignment().dim(source_dim); temp_target_tile.Reshape(temp_target_tile_dims); } - auto temp_target = HloSharding::Tile(temp_target_tile); - + auto temp_target = target.ReplicateOnLastTileDim() + ? HloSharding::PartialTile(temp_target_tile) + : HloSharding::Tile(temp_target_tile); auto padded_shape = hlo_->shape(); padded_shape.set_dimensions( target_dim, @@ -1974,67 +1975,22 @@ Status SpmdPartitioningVisitor::HandleBroadcast(HloInstruction* hlo) { auto& operand = GetPartitionedHlo(hlo->operand(0)); // Tiled output. - std::vector wanted_input_tile_size(operand.base_shape().rank()); - std::vector sharded_new_dims; - for (int64 i = 0; i < operand.base_shape().rank(); ++i) { - wanted_input_tile_size[i] = - hlo->sharding().tile_assignment().dim(hlo->dimensions(i)); - } + std::vector new_dims; for (int64 i = 0; i < hlo->shape().rank(); ++i) { - if (!absl::c_linear_search(hlo->dimensions(), i) && - hlo->sharding().tile_assignment().dim(i) > 1) { - sharded_new_dims.push_back(i); + if (!absl::c_linear_search(hlo->dimensions(), i)) { + new_dims.push_back(i); } } - if (sharded_new_dims.empty()) { - // The new dimensions are replicated, so that we can do the adjustment on - // the input. - Array wanted_input_tile_assignment(wanted_input_tile_size); - wanted_input_tile_assignment.Each( - [&](absl::Span indices, int64* val) { - std::vector indices_in_broadcast(hlo->shape().rank(), 0); - for (int64 i = 0; i < operand.base_shape().rank(); ++i) { - indices_in_broadcast[hlo->dimensions(i)] = indices[i]; - } - *val = hlo->sharding().tile_assignment()(indices_in_broadcast); - }); - SetPartitionedHlo(hlo, [&] { - return b_.AddInstruction(hlo->CloneWithNewOperands( - MakePartitionedShape(hlo->shape(), hlo->sharding()), - {operand.Reshard(HloSharding::Tile(wanted_input_tile_assignment)) - .hlo()})); - }); - } else { - auto input = operand.Reshard(HloSharding::Replicate()).hlo(); - // We pad and shard the input first, then broadcast to the final shard - // shape. - auto output_offsets = - MakePartitionOffsets(hlo->shape(), hlo->sharding(), partition_id_, &b_); - std::vector input_offsets(operand.base_shape().rank()); - auto output_shard_shape = - MakePartitionedShape(hlo->shape(), hlo->sharding()); - auto input_shard_shape = input->shape(); - auto padded_input_shape = input->shape(); - for (int64 i = 0; i < input_offsets.size(); ++i) { - input_offsets[i] = output_offsets[hlo->dimensions(i)]; - input_shard_shape.set_dimensions( - i, output_shard_shape.dimensions(hlo->dimensions(i))); - padded_input_shape.set_dimensions( - i, hlo->sharding().tile_assignment().dim(hlo->dimensions(i)) * - input_shard_shape.dimensions(i)); - } - auto padded_input = PadToShape(input, padded_input_shape, &b_); - auto input_shard = - ShapeUtil::Compatible(input_shard_shape, padded_input->shape()) - ? padded_input - : b_.AddInstruction(HloInstruction::CreateDynamicSlice( - input_shard_shape, padded_input, input_offsets, - input_shard_shape.dimensions())); - SetPartitionedHlo(hlo, [&] { - return b_.AddInstruction( - hlo->CloneWithNewOperands(output_shard_shape, {input_shard})); - }); - } + auto desired_input_sharding = hlo_sharding_util::RemoveShapeDimensions( + hlo_sharding_util::PartiallyReplicateTiledShardingOnDims(hlo->sharding(), + new_dims), + new_dims); + auto input = operand.Reshard(desired_input_sharding).hlo(); + auto output_shard_shape = MakePartitionedShape(hlo->shape(), hlo->sharding()); + SetPartitionedHlo(hlo, [&] { + return b_.AddInstruction( + hlo->CloneWithNewOperands(output_shard_shape, {input})); + }); return Status::OK(); } @@ -2533,17 +2489,6 @@ Status SpmdPartitioningVisitor::HandleReduce(HloInstruction* hlo) { if (reduce_sharded_dimension && input_count > 1) { return DefaultAction(hlo); } - - // Currently we only support reducing all or none of the sharded - // dimensions. - if (reduce_sharded_dimension) { - for (int64 i = 0; i < inputs[0].base_shape().rank(); ++i) { - if (inputs[0].sharding().tile_assignment().dim(i) > 1 && - absl::c_count(hlo->dimensions(), i) == 0) { - return DefaultAction(hlo); - } - } - } } std::vector new_operand_shapes(input_count * 2); @@ -2556,7 +2501,6 @@ Status SpmdPartitioningVisitor::HandleReduce(HloInstruction* hlo) { auto reduce_shape, ShapeInference::InferReduceShape(new_operand_shapes, hlo->dimensions(), hlo->to_apply()->ComputeProgramShape())); - *reduce_shape.mutable_layout() = hlo->shape().layout(); std::vector input_hlos(input_count); for (int64 i = 0; i < input_count; ++i) { @@ -2567,36 +2511,30 @@ Status SpmdPartitioningVisitor::HandleReduce(HloInstruction* hlo) { local_reduce->set_metadata(hlo->metadata()); SetPartitionedHlo(hlo, [&]() { - HloInstruction* reduce; + HloInstruction* reduce = local_reduce; if (reduce_sharded_dimension) { CHECK(local_reduce->shape().IsArray()); - reduce = collective_ops_creator_.create_cross_partition_all_reduce( - &b_, local_reduce, hlo->to_apply(), {}, NewChannel()); - reduce->set_sharding(HloSharding::Replicate()); - } else { - reduce = local_reduce; - if (inputs[0].sharding().IsTileMaximal()) { - reduce->set_sharding(inputs[0].sharding()); - } else { - // Remove tile assignment dimensions that are reduced. - std::vector tile_dimensions; - for (int64 i = 0; i < input_hlos[0]->shape().rank(); ++i) { - if (absl::c_count(hlo->dimensions(), i) == 0) { - tile_dimensions.push_back( - inputs[0].sharding().tile_assignment().dim(i)); - } + std::vector preserved_dims; + for (int64 i = 0; i < inputs[0].base_shape().rank(); ++i) { + if (!absl::c_linear_search(hlo->dimensions(), i)) { + preserved_dims.push_back(i); } - Array new_tile = inputs[0].sharding().tile_assignment(); - new_tile.Reshape(tile_dimensions); - auto sharding = HloSharding::Tile(new_tile); - if (input_count > 1) { - std::vector tuple(input_count, sharding); - sharding = HloSharding::Tuple(hlo->shape(), tuple); - } - reduce->set_sharding(sharding); } + if (inputs[0].sharding().ReplicateOnLastTileDim()) { + preserved_dims.push_back(inputs[0].base_shape().rank()); + } + auto grouped = GroupShardingOnDims(inputs[0].sharding(), preserved_dims); + auto grouped_state = CreatePerGroupPartitioningState( + inputs[0].state(), grouped.device_groups, &b_); + reduce = grouped_state.collective_ops_creator + .create_cross_partition_all_reduce( + &b_, local_reduce, hlo->to_apply(), {}, NewChannel()); } - + auto sharding = hlo_sharding_util::RemoveShapeDimensions( + hlo_sharding_util::PartiallyReplicateTiledShardingOnDims( + inputs[0].sharding(), hlo->dimensions()), + hlo->dimensions()); + reduce->set_sharding(sharding); return PartitionedHlo(reduce, hlo->shape(), MakePartitioningState()) .Reshard(hlo->sharding()) .hlo(); diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc index 04641f2f463..386d634779b 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc @@ -564,6 +564,27 @@ ENTRY entry { op::Constant()))))); } +TEST_F(SpmdPartitioningTest, + BroadcastBothOldAndNewDimsShardedPartiallySharded) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + param = f32[4,3] parameter(0), + sharding={devices=[1,2,4]0,1,4,5,2,3,6,7 last_tile_dim_replicate} + ROOT broadcast = f32[4,4,3] broadcast(param), dimensions={1,2}, + sharding={devices=[2,1,2,2]0,1,2,3,4,5,6,7 last_tile_dim_replicate} +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/8)); + VLOG(1) << module->ToString(); + HloInstruction* root = module->entry_computation()->root_instruction(); + EXPECT_THAT( + root, + AllOf(op::Shape("f32[2,4,2]"), + op::Broadcast(AllOf(op::Shape("f32[4,2]"), op::Parameter(0))))); +} + TEST_F(SpmdPartitioningTest, ConvWithParallelDimAndNonParallelSpatialDimPartitioned) { const char* const hlo_string = R"( @@ -2746,6 +2767,35 @@ ENTRY entry { AllOf(op::Reduce(param0, op::Constant()), op::Shape("f32[64]"))); } +TEST_F(SpmdPartitioningTest, PartialTiledToPartialTiledReduce) { + const char* const hlo_string = R"( +HloModule module + +sum { + a = f32[] parameter(0) + b = f32[] parameter(1) + ROOT add = f32[] add(a, b) +} + +ENTRY entry { + %param0 = f32[4,4] parameter(0), + sharding={devices=[2,2,2]0,1,2,3,4,5,6,7 last_tile_dim_replicate} + %constant.1 = f32[] constant(0), sharding={replicated} + ROOT %reduce = f32[4] reduce(%param0, %constant.1), dimensions={0}, + to_apply=%sum, + sharding={devices=[2,4]0,1,4,5,2,3,6,7 last_tile_dim_replicate} +})"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/8)); + VLOG(1) << module->ToString(); + + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, + AllOf(op::AllReduce(op::Reduce(op::Parameter(0), op::Constant())), + op::Shape("f32[2]"))); +} + TEST_F(SpmdPartitioningTest, TiledToTiledTupleReduce) { const char* const hlo_string = R"( HloModule module diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc index 8f94a90de8e..767bed2a21a 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc @@ -942,7 +942,8 @@ GetReshardAllToAllSourceTargetDims(const HloSharding& source, const HloSharding& target) { if (source.IsTileMaximal() || target.IsTileMaximal() || source.tile_assignment().num_dimensions() != - target.tile_assignment().num_dimensions()) { + target.tile_assignment().num_dimensions() || + source.NumTiles() != target.NumTiles()) { return absl::nullopt; } // Record partition count to index for indices that have different partition @@ -1027,6 +1028,7 @@ bool CanReshardWithCollectivePermute(const HloSharding& source, return !source.IsTileMaximal() && !target.IsTileMaximal() && source.tile_assignment().dimensions() == target.tile_assignment().dimensions() && + source.ReplicateOnLastTileDim() == target.ReplicateOnLastTileDim() && source.tile_assignment() != target.tile_assignment(); } From fe968502a9835afec951a669d64224e411746605 Mon Sep 17 00:00:00 2001 From: Chao Mei Date: Sun, 9 Aug 2020 23:59:01 -0700 Subject: [PATCH 2425/2522] Stop support of --use_legacy_nnapi in the benchmark tool as Interpreter:UseNNAPI(bool) is marked deprecated now. Instead, use "--use_nnapi" and other NNAPI options. PiperOrigin-RevId: 325748539 Change-Id: Ie2c1c4ac3054239e9355b85092020ed36f06f0ff --- tensorflow/lite/interpreter.cc | 2 +- tensorflow/lite/tools/benchmark/README.md | 10 +--------- .../lite/tools/benchmark/benchmark_tflite_model.cc | 7 ------- 3 files changed, 2 insertions(+), 17 deletions(-) diff --git a/tensorflow/lite/interpreter.cc b/tensorflow/lite/interpreter.cc index 307ede187b2..1d702dd8397 100644 --- a/tensorflow/lite/interpreter.cc +++ b/tensorflow/lite/interpreter.cc @@ -113,7 +113,7 @@ Interpreter::Interpreter(ErrorReporter* error_reporter) external_contexts_[kTfLiteCpuBackendContext] = own_external_cpu_backend_context_.get(); - UseNNAPI(false); + primary_subgraph().UseNNAPI(false); } Interpreter::~Interpreter() { diff --git a/tensorflow/lite/tools/benchmark/README.md b/tensorflow/lite/tools/benchmark/README.md index 8d7e6643d79..68cc59dd371 100644 --- a/tensorflow/lite/tools/benchmark/README.md +++ b/tensorflow/lite/tools/benchmark/README.md @@ -34,13 +34,6 @@ and the following optional parameters: * `run_delay`: `float` (default=-1.0) \ The delay in seconds between subsequent benchmark runs. Non-positive values mean use no delay. -* `use_legacy_nnapi`: `bool` (default=false) \ - Whether to use the legacy - [Android NNAPI](https://developer.android.com/ndk/guides/neuralnetworks/) - TFLite path, which requires the graph to be fully compatible with NNAPI. - This is available on recent Android devices. Note that some Android P - devices will fail to use NNAPI for models in `/data/local/tmp/` and this - benchmark tool will not correctly use NNAPI. * `enable_op_profiling`: `bool` (default=false) \ Whether to enable per-operator profiling measurement. * `enable_platform_tracing`: `bool` (default=false) \ @@ -65,8 +58,7 @@ The following simply lists the names of these parameters and additional notes where applicable. For details about each parameter, please refer to [this page](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/tools/delegates/README.md#tflite-delegate-registrar). #### Common parameters -* `max_delegated_partitions`: `int` (default=0) \ -Note when `use_legacy_nnapi` is selected, this parameter won't work. +* `max_delegated_partitions`: `int` (default=0) * `min_nodes_per_partition`:`int` (default=0) #### GPU delegate diff --git a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc index 39ecded5484..ef9742eaac7 100644 --- a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc +++ b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc @@ -259,8 +259,6 @@ BenchmarkParams BenchmarkTfLiteModel::DefaultParams() { BenchmarkParam::Create("")); default_params.AddParam("input_layer_value_files", BenchmarkParam::Create("")); - default_params.AddParam("use_legacy_nnapi", - BenchmarkParam::Create(false)); default_params.AddParam("allow_fp16", BenchmarkParam::Create(false)); default_params.AddParam("require_full_delegation", BenchmarkParam::Create(false)); @@ -324,7 +322,6 @@ std::vector BenchmarkTfLiteModel::GetFlags() { "input_layer_value_range of the input_name will be ignored. The file " "format is binary and it should be array format or null separated " "strings format."), - CreateFlag("use_legacy_nnapi", ¶ms_, "use legacy nnapi api"), CreateFlag("allow_fp16", ¶ms_, "allow fp16"), CreateFlag("require_full_delegation", ¶ms_, "require delegate to run the entire graph"), @@ -363,9 +360,6 @@ void BenchmarkTfLiteModel::LogParams() { LOG_BENCHMARK_PARAM(std::string, "input_layer_value_files", "Input value files", verbose); -#if defined(__ANDROID__) - LOG_BENCHMARK_PARAM(bool, "use_legacy_nnapi", "Use legacy nnapi", verbose); -#endif LOG_BENCHMARK_PARAM(bool, "allow_fp16", "Allow fp16", verbose); LOG_BENCHMARK_PARAM(bool, "require_full_delegation", "Require full delegation", verbose); @@ -635,7 +629,6 @@ TfLiteStatus BenchmarkTfLiteModel::Init() { profiling_listener_ = MayCreateProfilingListener(); if (profiling_listener_) AddListener(profiling_listener_.get()); - interpreter_->UseNNAPI(params_.Get("use_legacy_nnapi")); interpreter_->SetAllowFp16PrecisionForFp32(params_.Get("allow_fp16")); owned_delegates_.clear(); From 502b61114452d6bd6ceaba5d3b3578dab88af71c Mon Sep 17 00:00:00 2001 From: Balint Cristian Date: Mon, 10 Aug 2020 10:30:22 +0300 Subject: [PATCH 2426/2522] [EXT-SYSLIB] Add runtime_py for external flatbuffer. --- third_party/flatbuffers/BUILD.system | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/third_party/flatbuffers/BUILD.system b/third_party/flatbuffers/BUILD.system index 14fceada826..8fe4d7a5907 100644 --- a/third_party/flatbuffers/BUILD.system +++ b/third_party/flatbuffers/BUILD.system @@ -36,3 +36,8 @@ cc_library( name = "runtime_cc", visibility = ["//visibility:public"], ) + +py_library( + name = "runtime_py", + visibility = ["//visibility:public"], +) From c25b9687614443c27a6425a25276133bbf4ac06d Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Mon, 10 Aug 2020 17:34:11 +1000 Subject: [PATCH 2427/2522] Skip fp16 depthwise conv test when cudnn < v8 --- tensorflow/python/grappler/auto_mixed_precision_test.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/python/grappler/auto_mixed_precision_test.py b/tensorflow/python/grappler/auto_mixed_precision_test.py index f7f3777f7a9..615e01ed668 100644 --- a/tensorflow/python/grappler/auto_mixed_precision_test.py +++ b/tensorflow/python/grappler/auto_mixed_precision_test.py @@ -46,6 +46,7 @@ from tensorflow.python.ops import random_ops from tensorflow.python.ops import tensor_array_ops from tensorflow.python.ops import variables from tensorflow.python.ops.losses import losses +from tensorflow.python.platform import sysconfig from tensorflow.python.platform import test from tensorflow.python.training import adam from tensorflow.python.training import gradient_descent @@ -578,6 +579,12 @@ class AutoMixedPrecisionTest(test.TestCase, parameterized.TestCase): def test_depthwise_conv2d(self, mode): """Test grad ops with depthwise convolution2d graph.""" self._maybe_skip(mode) + cudnn_version = tuple([ + int(x) for x in sysconfig.get_build_info()['cudnn_version'].split('.')]) + if cudnn_version < (8,): + # Depthwise conv2d ops are only enabled in auto_mixed_precision as of + # cuDNN v8. + self.skipTest('cuDNN version >= 8 required') random_seed.set_random_seed(0) x = _input([2, 8, 8, 1]) f = _weight([3, 3, 1, 4]) From d97f769ce41968108dccbac00309809a08b61e29 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 10 Aug 2020 01:01:23 -0700 Subject: [PATCH 2428/2522] Introduce additional TPU infeed and outfeed ops PiperOrigin-RevId: 325755345 Change-Id: Ia6ebc88985f3a0859e044f1145cf8689cfcfd387 --- tensorflow/core/tpu/BUILD | 8 +- tensorflow/core/tpu/kernels/BUILD | 107 ---- .../core/tpu/kernels/image_resize_ops.cc | 155 ----- tensorflow/core/tpu/kernels/infeed_ops.cc | 529 ------------------ tensorflow/core/tpu/kernels/infeed_ops.h | 69 --- tensorflow/core/tpu/kernels/outfeed_ops.cc | 116 ---- tensorflow/core/tpu/kernels/outfeed_ops.h | 69 --- .../core/tpu/kernels/replication_ops.cc | 27 - .../core/tpu/kernels/tpu_handle_to_key_op.cc | 62 -- tensorflow/core/tpu/kernels/transfer_ops.cc | 98 ---- tensorflow/core/tpu/kernels/transfer_ops.h | 56 -- tensorflow/core/tpu/tpu_defs.cc | 18 - tensorflow/core/tpu/tpu_defs.h | 6 - tensorflow/core/tpu/tpu_library_init_fns.inc | 1 - 14 files changed, 1 insertion(+), 1320 deletions(-) delete mode 100644 tensorflow/core/tpu/kernels/image_resize_ops.cc delete mode 100644 tensorflow/core/tpu/kernels/infeed_ops.cc delete mode 100644 tensorflow/core/tpu/kernels/infeed_ops.h delete mode 100644 tensorflow/core/tpu/kernels/outfeed_ops.cc delete mode 100644 tensorflow/core/tpu/kernels/outfeed_ops.h delete mode 100644 tensorflow/core/tpu/kernels/replication_ops.cc delete mode 100644 tensorflow/core/tpu/kernels/tpu_handle_to_key_op.cc delete mode 100644 tensorflow/core/tpu/kernels/transfer_ops.cc delete mode 100644 tensorflow/core/tpu/kernels/transfer_ops.h diff --git a/tensorflow/core/tpu/BUILD b/tensorflow/core/tpu/BUILD index 15b2b93e46f..0a17ba3d408 100644 --- a/tensorflow/core/tpu/BUILD +++ b/tensorflow/core/tpu/BUILD @@ -88,13 +88,7 @@ cc_library( name = "tpu_defs", srcs = ["tpu_defs.cc"], hdrs = ["tpu_defs.h"], - deps = [ - ":tpu_api", - "//tensorflow/compiler/xla:shape_util", - "//tensorflow/core:protos_all_cc", - "//tensorflow/stream_executor/tpu:c_api_conversions", - "//tensorflow/stream_executor/tpu:c_api_decl", - ], + deps = ["//tensorflow/core:protos_all_cc"], ) cc_library( diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 157aeb3df58..1336f52ed34 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -28,16 +28,10 @@ tf_kernel_library( deps = [ ":cross_replica_ops", ":host_compute_ops", - ":image_resize_ops", - ":infeed_ops", - ":outfeed_ops", - ":replication_ops", ":topk_ops", ":tpu_compile_op", ":tpu_configuration_ops", ":tpu_execute_op", - ":tpu_handle_to_key_op", - ":transfer_ops", ], ) @@ -690,104 +684,3 @@ cc_library( ], alwayslink = 1, ) - -cc_library( - name = "infeed_ops", - srcs = ["infeed_ops.cc"], - hdrs = ["infeed_ops.h"], - visibility = ["//visibility:public"], - deps = [ - ":transfer_ops", - "//tensorflow/compiler/jit:xla_device_no_jit_rewrite_registration", - "//tensorflow/compiler/tf2xla:common", - "//tensorflow/compiler/xla:util", - "//tensorflow/core:framework", - "//tensorflow/core/common_runtime:dma_helper", - "//tensorflow/core/framework:protos_all_cc", - "//tensorflow/core/kernels:transpose_functor", - "//tensorflow/core/platform:status", - "//tensorflow/core/profiler/lib:traceme", - "//tensorflow/core/tpu:tpu_defs", - "//tensorflow/stream_executor:multi_platform_manager", - "//tensorflow/stream_executor/tpu:tpu_transfer_manager_base", - "//tensorflow/stream_executor/tpu:tpu_transfer_manager_interface", - ], - alwayslink = True, -) - -cc_library( - name = "transfer_ops", - srcs = ["transfer_ops.cc"], - hdrs = ["transfer_ops.h"], - visibility = ["//visibility:public"], - deps = [ - "//tensorflow/compiler/jit:xla_device_no_jit_rewrite_registration", - "//tensorflow/core:framework", - "//tensorflow/core:lib_internal", - "//tensorflow/core/kernels:ops_util", - "//tensorflow/core/profiler/lib:traceme", - "//tensorflow/stream_executor:multi_platform_manager", - "//tensorflow/stream_executor/tpu:tpu_node_context", - "//tensorflow/stream_executor/tpu:tpu_platform_interface", - "//tensorflow/stream_executor/tpu:tpu_transfer_manager_interface", - ], - alwayslink = True, -) - -cc_library( - name = "outfeed_ops", - srcs = ["outfeed_ops.cc"], - hdrs = ["outfeed_ops.h"], - visibility = ["//visibility:public"], - deps = [ - ":transfer_ops", - "//tensorflow/compiler/jit:xla_device_no_jit_rewrite_registration", - "//tensorflow/compiler/tf2xla:common", - "//tensorflow/core:framework", - "//tensorflow/core/framework:protos_all_cc", - "//tensorflow/core/tpu:tpu_defs", - "//tensorflow/stream_executor:multi_platform_manager", - ], - alwayslink = True, -) - -cc_library( - name = "image_resize_ops", - srcs = ["image_resize_ops.cc"], - visibility = ["//visibility:public"], - deps = [ - "//tensorflow/compiler/tf2xla:common", - "//tensorflow/compiler/tf2xla:xla_compiler", - "//tensorflow/compiler/xla/client:xla_builder", - "//tensorflow/compiler/xla/client/lib:constants", - "//tensorflow/core:framework", - "//tensorflow/core/tpu:tpu_defs", - "@com_google_absl//absl/strings", - ], - alwayslink = True, -) - -cc_library( - name = "replication_ops", - srcs = ["replication_ops.cc"], - visibility = ["//visibility:public"], - deps = [ - "//tensorflow/compiler/jit:xla_device_no_jit_rewrite_registration", - "//tensorflow/core:framework", - "//tensorflow/core/tpu:tpu_defs", - ], - alwayslink = True, -) - -cc_library( - name = "tpu_handle_to_key_op", - srcs = ["tpu_handle_to_key_op.cc"], - visibility = ["//visibility:public"], - deps = [ - ":tpu_compilation_cache_interface", - ":tpu_op_consts", - "//tensorflow/core:framework", - "//tensorflow/core/tpu:tpu_configuration", - ], - alwayslink = True, -) diff --git a/tensorflow/core/tpu/kernels/image_resize_ops.cc b/tensorflow/core/tpu/kernels/image_resize_ops.cc deleted file mode 100644 index fd0f5e4c7a6..00000000000 --- a/tensorflow/core/tpu/kernels/image_resize_ops.cc +++ /dev/null @@ -1,155 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "absl/strings/match.h" -#include "absl/strings/str_cat.h" -#include "absl/strings/string_view.h" -#include "tensorflow/compiler/tf2xla/shape_util.h" -#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" -#include "tensorflow/compiler/tf2xla/xla_op_registry.h" -#include "tensorflow/compiler/xla/client/lib/constants.h" -#include "tensorflow/compiler/xla/client/xla_builder.h" -#include "tensorflow/core/framework/kernel_def_builder.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/tpu/tpu_defs.h" - -namespace tensorflow { - -class TpuCustomResizeOp : public XlaOpKernel { - public: - explicit TpuCustomResizeOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("align_corners", &align_corners_)); - OP_REQUIRES_OK(ctx, - ctx->GetAttr("half_pixel_centers", &half_pixel_centers_)); - } - - xla::Shape GetOutputShape(XlaOpKernelContext* ctx) const { - std::vector out_size; - auto status = ctx->ConstantInputAsIntVector(1, &out_size); - CHECK_EQ(out_size.size(), 2) << status.ToString(); - xla::Shape output_shape = - TensorShapeToXLAShape(ctx->output_xla_type(0), ctx->InputShape(0)); - output_shape.mutable_dimensions()[1] = out_size[0]; - output_shape.mutable_dimensions()[2] = out_size[1]; - return output_shape; - } - - string OpaqueField() const { - return absl::StrCat("\"", align_corners_, half_pixel_centers_, "\""); - } - - void CompileGrad(XlaOpKernelContext* ctx, const char* target, - const xla::Shape& output_shape) { - auto input_shape = - TensorShapeToXLAShape(ctx->output_xla_type(0), ctx->InputShape(0)); - if (ctx->InputShape(1).dim_sizes() == ctx->InputShape(0).dim_sizes()) { - ctx->SetOutput( - 0, xla::ConvertElementType(ctx->Input(0), ctx->output_xla_type(0))); - return; - } - // The gradient should be done in two phases for large resizes. - auto input = ctx->Input(0); - if (input_shape.dimensions(1) / output_shape.dimensions(1) > 3 && - input_shape.dimensions(2) / output_shape.dimensions(2) > 3) { - auto intermediate_shape = output_shape; - intermediate_shape.mutable_dimensions()[1] = input_shape.dimensions(1); - input = xla::CustomCall(ctx->builder(), target, {ctx->Input(0)}, - intermediate_shape, OpaqueField()); - } - ctx->SetOutput(0, xla::CustomCall(ctx->builder(), target, {input}, - output_shape, OpaqueField())); - } - - void CompileForward(XlaOpKernelContext* ctx, const char* target) { - auto output_shape = GetOutputShape(ctx); - if (ctx->InputShape(0).dim_size(1) == output_shape.dimensions(1) && - ctx->InputShape(0).dim_size(2) == output_shape.dimensions(2)) { - ctx->SetOutput( - 0, xla::ConvertElementType(ctx->Input(0), ctx->output_xla_type(0))); - return; - } - if (ctx->InputShape(0).dim_size(1) == 1 && - ctx->InputShape(0).dim_size(2) == 1) { - ctx->SetOutput(0, - ctx->Input(0) + xla::Zeros(ctx->builder(), output_shape)); - return; - } - ctx->SetOutput(0, xla::CustomCall(ctx->builder(), target, {ctx->Input(0)}, - output_shape, OpaqueField())); - } - - private: - bool align_corners_; - bool half_pixel_centers_; -}; - -class TpuResizeNearestNeighborOp : public TpuCustomResizeOp { - public: - explicit TpuResizeNearestNeighborOp(OpKernelConstruction* ctx) - : TpuCustomResizeOp(ctx) {} - void Compile(XlaOpKernelContext* ctx) override { - CompileForward(ctx, "ResizeNearest"); - } -}; - -class TpuResizeBilinearOp : public TpuCustomResizeOp { - public: - explicit TpuResizeBilinearOp(OpKernelConstruction* ctx) - : TpuCustomResizeOp(ctx) {} - void Compile(XlaOpKernelContext* ctx) override { - CompileForward(ctx, "ResizeBilinear"); - } -}; - -class TpuResizeNearestNeighborGradOp : public TpuCustomResizeOp { - public: - explicit TpuResizeNearestNeighborGradOp(OpKernelConstruction* ctx) - : TpuCustomResizeOp(ctx) {} - void Compile(XlaOpKernelContext* ctx) override { - CompileGrad(ctx, "ResizeNearestGrad", GetOutputShape(ctx)); - } -}; - -class TpuResizeBilinearGradOp : public TpuCustomResizeOp { - public: - explicit TpuResizeBilinearGradOp(OpKernelConstruction* ctx) - : TpuCustomResizeOp(ctx) {} - void Compile(XlaOpKernelContext* ctx) override { - auto output_shape = - TensorShapeToXLAShape(ctx->output_xla_type(0), ctx->InputShape(1)); - CompileGrad(ctx, "ResizeBilinearGrad", output_shape); - } -}; - -REGISTER_XLA_OP(Name("ResizeNearestNeighbor") - .CompileTimeConstantInput("size") - .Device(DEVICE_TPU_XLA_JIT), - TpuResizeNearestNeighborOp); - -REGISTER_XLA_OP(Name("ResizeNearestNeighborGrad") - .CompileTimeConstantInput("size") - .Device(DEVICE_TPU_XLA_JIT), - TpuResizeNearestNeighborGradOp); - -REGISTER_XLA_OP(Name("ResizeBilinear") - .CompileTimeConstantInput("size") - .Device(DEVICE_TPU_XLA_JIT), - TpuResizeBilinearOp); - -REGISTER_XLA_OP(Name("ResizeBilinearGrad").Device(DEVICE_TPU_XLA_JIT), - TpuResizeBilinearGradOp); - -} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/infeed_ops.cc b/tensorflow/core/tpu/kernels/infeed_ops.cc deleted file mode 100644 index f3fbd16b6cc..00000000000 --- a/tensorflow/core/tpu/kernels/infeed_ops.cc +++ /dev/null @@ -1,529 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/tpu/kernels/infeed_ops.h" - -#include -#include - -#include "tensorflow/compiler/jit/xla_device.h" -#include "tensorflow/compiler/tf2xla/literal_util.h" -#include "tensorflow/compiler/tf2xla/shape_util.h" -#include "tensorflow/compiler/xla/util.h" -#include "tensorflow/core/common_runtime/dma_helper.h" -#include "tensorflow/core/framework/allocator.h" -#include "tensorflow/core/framework/dataset.h" -#include "tensorflow/core/framework/function.h" -#include "tensorflow/core/framework/function_handle_cache.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/shape_inference.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/variant.h" -#include "tensorflow/core/framework/variant_encode_decode.h" -#include "tensorflow/core/framework/variant_tensor_data.h" -#include "tensorflow/core/kernels/transpose_functor.h" -#include "tensorflow/core/profiler/lib/traceme.h" -#include "tensorflow/core/tpu/kernels/transfer_ops.h" -#include "tensorflow/core/tpu/tpu_defs.h" -#include "tensorflow/stream_executor/multi_platform_manager.h" -#include "tensorflow/stream_executor/tpu/tpu_transfer_manager.h" -#include "tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.h" - -namespace tensorflow { -namespace { - -typedef Eigen::ThreadPoolDevice CPUDevice; -typedef tensorflow::tpu::NoncopyableBuffer LinearizerBuffer; -typedef std::deque LinearizerBufferList; - -// Transposes the given tensor using the tensorflow C++ transpose implementation -// to obtain a XLA literal for the host tensor laid out as the given layout. The -// returned tensor is normalized to the dim0major layout -- F32[10,20,30]{2,0,1} -// is returned as F32[20,10,30]{2,1,0}. -xla::StatusOr TransposeTensor(OpKernelContext* ctx, - const Tensor& input_tensor, - const xla::Shape& xla_shape) { - profiler::TraceMe trace_me("TransposeTensor", /*level=*/2); - const int64 rank = xla_shape.rank(); - std::vector permutation(rank); - std::vector transposed_shapes(rank); - for (int64 i = 0; i < rank; ++i) { - permutation[i] = xla_shape.layout().minor_to_major(rank - 1 - i); - transposed_shapes[i] = xla_shape.dimensions(permutation[i]); - } - - Tensor transposed_tensor; - - // If this is a trivial transpose (i.e., bitcast), just create an aliased - // tensor with the transposed shape. - if (xla::LayoutUtil::IsMonotonicWithDim0Major( - xla::ShapeUtil::DropDegenerateDimensions(xla_shape).layout())) { - TensorShape shape; - TF_RETURN_IF_ERROR(TensorShapeUtils::MakeShape(transposed_shapes, &shape)); - TF_RETURN_IF_ERROR(transposed_tensor.BitcastFrom( - input_tensor, input_tensor.dtype(), shape)); - return transposed_tensor; - } - - AllocatorAttributes alloc_attr; - alloc_attr.set_on_host(true); - TF_RETURN_IF_ERROR(ctx->allocate_temp(input_tensor.dtype(), - TensorShape(transposed_shapes), - &transposed_tensor, alloc_attr)); - // Eigen Transpose fails with SIGFPE if there is a dimension of size 0. - if (input_tensor.NumElements() > 0) { - TF_RETURN_IF_ERROR(DoTranspose(ctx->eigen_device(), - input_tensor, permutation, - &transposed_tensor)); - } - return transposed_tensor; -} - -xla::StatusOr GetLayoutOverride(OpKernelConstruction* ctx, - const char* attrn_name, - std::vector* minor_to_major) { - if (!ctx->HasAttr(attrn_name)) { - return false; - } - TF_RETURN_IF_ERROR(ctx->GetAttr(attrn_name, minor_to_major)); - return !minor_to_major->empty(); -} - -Status GetInfeedShapeWithLayout(OpKernelConstruction* ctx, - const char* attrn_name, - const xla::Shape& input_shape, - xla::Shape* output_shape) { - std::vector minor_to_major; - TF_ASSIGN_OR_RETURN(bool has_override, - GetLayoutOverride(ctx, attrn_name, &minor_to_major)); - if (!has_override) { - *output_shape = input_shape; - if (output_shape->IsTuple()) { - int64 tuple_elements = xla::ShapeUtil::TupleElementCount(*output_shape); - for (int64 i = 0; i < tuple_elements; ++i) { - xla::Shape* sub_shape = - xla::ShapeUtil::GetMutableSubshape(output_shape, {i}); - *sub_shape->mutable_layout() = GetTPUInfeedLayout(*sub_shape).layout(); - } - } else { - *output_shape->mutable_layout() = - GetTPUInfeedLayout(*output_shape).layout(); - } - return Status::OK(); - } - - auto layout_func = [](const xla::Shape& shape) -> xla::Layout { - return GetTPUInfeedLayout(shape).layout(); - }; - return GetShapeWithLayout(input_shape, minor_to_major, layout_func, - output_shape); -} - -// LinearizedBuffersWrapper is an opaque C++ data structure for the outputs of -// PrelinearizeOp and PrelinearizeTupleOp. It holds the resultant linearized -// buffers and references to input tensors whose underlying storage are shared -// with linearized buffers. -// NOTE: This is not a feature-complete implementation of the DT_VARIANT -// specification. In particular, we cannot currently serialize an arbitrary -// `LinearizerBufferList` (aka `std::deque`) -// object, so the `Encode()` and `Decode()` methods are not implemented. -struct LinearizedBuffersWrapper { - explicit LinearizedBuffersWrapper() {} - explicit LinearizedBuffersWrapper(LinearizerBufferList bufs, - std::vector ts) - : buffers(std::move(bufs)), tensors(std::move(ts)) {} - LinearizedBuffersWrapper(const LinearizedBuffersWrapper& wrapper) { - // tensorflow::Variant requires this copy constructor to compile. - LOG(FATAL) << "LinearizedBuffersWrapper should not copy."; - } - LinearizedBuffersWrapper& operator=(const LinearizedBuffersWrapper& wrapper) = - delete; - LinearizedBuffersWrapper(LinearizedBuffersWrapper&&) = default; - LinearizedBuffersWrapper& operator=(LinearizedBuffersWrapper&&) = default; - ~LinearizedBuffersWrapper() = default; - - // These functions are tensorflow::Variant requirements. - string TypeName() const { return "(anonymous)::LinearizedBuffersWrapper"; } - void Encode(tensorflow::VariantTensorData* data) const { - LOG(ERROR) << "Encode() is not implemented for LinearizedBuffersWrapper " - "objects."; - } - bool Decode(const tensorflow::VariantTensorData& data) { - LOG(ERROR) << "Decode() is not implemented for LinearizedBuffersWrapper " - "objects."; - return false; - } - - LinearizerBufferList buffers; - // Save references on tensors whose underlying storage are shared with - // LiteralLinearizer::Buffer in `buffers`. - std::vector tensors; -}; - -Status AutoTransposeAndLinearize(OpKernelContext* ctx, - const Tensor& input_tensor, - const xla::Shape& shape, - LinearizerBufferList* linearized_buffers, - std::vector* saved_input_tensors) { - const Tensor* tensor = &input_tensor; - // If the given layout is not in dim0major layout, tranposes the tensor. - bool has_transposed = false; - Tensor transposed_tensor; - if (!xla::LayoutUtil::IsMonotonicWithDim0Major(shape.layout())) { - // If the given layout is not in dim0major layout, transpose the tensor. - TF_ASSIGN_OR_RETURN(transposed_tensor, - TransposeTensor(ctx, input_tensor, shape)); - tensor = &transposed_tensor; - has_transposed = true; - } - - xla::BorrowingLiteral literal; - TF_RETURN_IF_ERROR(HostTensorToBorrowingLiteral(*tensor, &literal)); - - TF_RETURN_IF_ERROR( - xla::TpuTransferManagerInterface::GetRegisteredTpuTransferManager() - ->LinearizeToBuffers(literal, linearized_buffers)); - - // The input tensor is ref-counted. Save a handle on the input tensor if - // its underlying storage is shared with linearized buffers to prevent - // input tensor from getting freed. - for (const auto& buffer : *linearized_buffers) { - if (!buffer.owns_data() && !has_transposed) { - // `buffer` is created from zero-copy fast path from the un-transposed - // input tensor so its underlying data is shared with input tensor. - // Save a handle to input tensor to increment its ref-count and avoid - // it getting deallocated after PrelinearizeTupleOp completes. - saved_input_tensors->push_back(*tensor); - // A literal can be linearized to zero to two buffers. If any of the - // linearized buffer shares storage with input tensor. We save exactly - // one handle on the input tensor. - break; - } - } - return Status::OK(); -} - -// PrelinearizeOp is used to linearize one tensor to the device format. -class PrelinearizeOp : public OpKernel { - public: - explicit PrelinearizeOp(OpKernelConstruction* ctx) : OpKernel(ctx) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &shape_)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("dtype", &dtype_)); - xla::Shape shape; - OP_REQUIRES_OK(ctx, TensorShapeToXLAShape(dtype_, shape_, &shape)); - OP_REQUIRES_OK(ctx, - GetInfeedShapeWithLayout(ctx, "layout", shape, &xla_shape_)); - } - - void Compute(OpKernelContext* ctx) override { - const Tensor& input_tensor = ctx->input(0); - // Validate input. - OP_REQUIRES( - ctx, input_tensor.dtype() == dtype_, - errors::InvalidArgument("Prelinearize dtype mismatch; expected ", - DataType_Name(dtype_), ", got ", - DataType_Name(input_tensor.dtype()))); - OP_REQUIRES( - ctx, input_tensor.shape() == shape_, - errors::InvalidArgument("Prelinearize shape mismatch; expected ", - shape_.DebugString(), ", got ", - input_tensor.shape().DebugString())); - - // Auto-transpose and prelinearize. - LinearizerBufferList linearized_buffers; - std::vector saved_input_tensors; - auto status = - AutoTransposeAndLinearize(ctx, input_tensor, xla_shape_, - &linearized_buffers, &saved_input_tensors); - OP_REQUIRES_OK(ctx, status); - - // Write to output. - tensorflow::Tensor* output; - OP_REQUIRES_OK(ctx, - ctx->allocate_output(0, tensorflow::TensorShape{}, &output)); - output->scalar()() = LinearizedBuffersWrapper{ - std::move(linearized_buffers), std::move(saved_input_tensors)}; - } - - bool IsExpensive() override { return true; } - - private: - TensorShape shape_; - DataType dtype_; - xla::Shape xla_shape_; - - // PrelinearizeOp is neither copyable nor movable. - PrelinearizeOp(const PrelinearizeOp&) = delete; - PrelinearizeOp& operator=(const PrelinearizeOp&) = delete; -}; - -// PrelinearizeTupleOp is used to linearize multiple tensors to the device -// format. -class PrelinearizeTupleOp : public OpKernel { - public: - explicit PrelinearizeTupleOp(OpKernelConstruction* ctx) : OpKernel(ctx) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("shapes", &shapes_)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("dtypes", &dtypes_)); - OP_REQUIRES( - ctx, shapes_.size() == dtypes_.size(), - errors::InvalidArgument( - "shapes and dtypes must be the same length. shapes length = ", - shapes_.size(), ", dtypes length = ", dtypes_.size())); - - std::vector xla_shapes; - for (int i = 0; i < shapes_.size(); i++) { - xla::Shape xla_shape; - OP_REQUIRES_OK(ctx, - TensorShapeToXLAShape(dtypes_[i], shapes_[i], &xla_shape)); - xla_shapes.push_back(xla_shape); - } - OP_REQUIRES_OK( - ctx, GetInfeedShapeWithLayout( - ctx, "layouts", xla::ShapeUtil::MakeTupleShape(xla_shapes), - &tuple_shape_)); - } - - void Compute(OpKernelContext* ctx) override { - OpInputList values; - OP_REQUIRES_OK(ctx, ctx->input_list("inputs", &values)); - OP_REQUIRES(ctx, values.size() == shapes_.size(), - errors::InvalidArgument( - "Wrong number of inputs to PrelinearizeTuple.")); - - LinearizerBufferList all_linearized_buffers; - std::vector all_saved_input_tensors; - for (int i = 0; i < values.size(); i++) { - // Validate input. - const Tensor& input_tensor = values[i]; - OP_REQUIRES(ctx, input_tensor.dtype() == dtypes_[i], - errors::InvalidArgument( - "PrelinearizeTuple dtype mismatch at tuple element ", i, - "; expected ", DataType_Name(dtypes_[i]), ", got ", - DataType_Name(input_tensor.dtype()))); - OP_REQUIRES(ctx, input_tensor.shape() == shapes_[i], - errors::InvalidArgument( - "PrelinearizeTuple shape mismatch at tuple element ", i, - "; expected ", shapes_[i].DebugString(), ", got ", - input_tensor.shape().DebugString())); - - // Auto-transpose and prelinearize. - LinearizerBufferList linearized_buffers; - std::vector saved_input_tensors; - auto status = AutoTransposeAndLinearize( - ctx, input_tensor, tuple_shape_.tuple_shapes(i), &linearized_buffers, - &saved_input_tensors); - OP_REQUIRES_OK(ctx, status); - all_linearized_buffers.insert( - all_linearized_buffers.end(), - std::make_move_iterator(linearized_buffers.begin()), - std::make_move_iterator(linearized_buffers.end())); - all_saved_input_tensors.insert( - all_saved_input_tensors.end(), - std::make_move_iterator(saved_input_tensors.begin()), - std::make_move_iterator(saved_input_tensors.end())); - } - - tensorflow::Tensor* output; - OP_REQUIRES_OK(ctx, - ctx->allocate_output(0, tensorflow::TensorShape{}, &output)); - output->scalar()() = LinearizedBuffersWrapper{ - std::move(all_linearized_buffers), std::move(all_saved_input_tensors)}; - } - - bool IsExpensive() override { return true; } - - private: - std::vector shapes_; - DataTypeVector dtypes_; - xla::Shape tuple_shape_; - - // PrelinearizeTupleOp is neither copyable nor movable. - PrelinearizeTupleOp(const PrelinearizeTupleOp&) = delete; - PrelinearizeTupleOp& operator=(const PrelinearizeTupleOp&) = delete; -}; - -// The InfeedEnqueuePrelinearizedBufferOp op is used to transfer prelinearized -// buffers to the device infeed queue. -class InfeedEnqueuePrelinearizedBufferOp : public TpuTransferAsyncOpKernel { - public: - explicit InfeedEnqueuePrelinearizedBufferOp(OpKernelConstruction* ctx) - : TpuTransferAsyncOpKernel(ctx, "prelinearized_buffers_to_infeed", 8) {} - - Status DoWork(OpKernelContext* ctx, - xla::TpuTransferManagerInterface* transfer_manager, - stream_executor::StreamExecutor* stream_executor) override { - const Tensor& input_tensor = ctx->input(0); - const LinearizedBuffersWrapper* wrapper = - input_tensor.scalar()() - .get(); - TF_RETURN_IF_ERROR(transfer_manager->TransferBuffersToInfeed( - stream_executor, wrapper->buffers)); - - return Status::OK(); - } - - private: - // InfeedEnqueuePrelinearizedBufferOp is neither copyable nor movable. - InfeedEnqueuePrelinearizedBufferOp( - const InfeedEnqueuePrelinearizedBufferOp&) = delete; - InfeedEnqueuePrelinearizedBufferOp& operator=( - const InfeedEnqueuePrelinearizedBufferOp&) = delete; -}; - -} // anonymous namespace - -TpuInfeedEnqueueOp::TpuInfeedEnqueueOp(OpKernelConstruction* ctx) - : TpuTransferAsyncOpKernel(ctx, "infeed_enqueue", 8) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &shape_)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("dtype", &dtype_)); - xla::Shape shape; - OP_REQUIRES_OK(ctx, TensorShapeToXLAShape(dtype_, shape_, &shape)); - OP_REQUIRES_OK(ctx, - GetInfeedShapeWithLayout(ctx, "layout", shape, &xla_shape_)); -} - -Status TpuInfeedEnqueueOp::DoWork( - OpKernelContext* ctx, xla::TpuTransferManagerInterface* transfer_manager, - stream_executor::StreamExecutor* stream_executor) { - const Tensor& input_tensor = ctx->input(0); - - // Validate runtime shape and fail if it doesn't match the contract. - if (input_tensor.dtype() != dtype_) { - return errors::InvalidArgument("Infeed dtype mismatch."); - } - if (input_tensor.shape() != shape_) { - return errors::InvalidArgument("Infeed shape mismatch; expected ", - shape_.DebugString(), ", got ", - input_tensor.shape().DebugString()); - } - - const Tensor* tensor = &input_tensor; - Tensor transposed_tensor; - if (!xla::LayoutUtil::IsMonotonicWithDim0Major(xla_shape_.layout())) { - // If the given layout is not in dim0major layout, transpose the tensor. - TF_ASSIGN_OR_RETURN(transposed_tensor, - TransposeTensor(ctx, input_tensor, xla_shape_)); - tensor = &transposed_tensor; - } - - xla::BorrowingLiteral literal; - TF_RETURN_IF_ERROR(HostTensorToBorrowingLiteral(*tensor, &literal)); - - // Transfer the given literal to the Infeed interface of the device. - TF_RETURN_IF_ERROR( - transfer_manager->TransferLiteralToInfeed(stream_executor, literal)); - return Status::OK(); -} - -TpuInfeedEnqueueTupleOp::TpuInfeedEnqueueTupleOp(OpKernelConstruction* ctx) - : TpuTransferAsyncOpKernel(ctx, "infeed_enqueue", 8) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("shapes", &shapes_)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("dtypes", &dtypes_)); - OP_REQUIRES( - ctx, shapes_.size() == dtypes_.size(), - errors::InvalidArgument("shapes and dtypes must be the same length.")); - - std::vector xla_shapes; - for (int i = 0; i < shapes_.size(); i++) { - xla::Shape xla_shape; - OP_REQUIRES_OK(ctx, - TensorShapeToXLAShape(dtypes_[i], shapes_[i], &xla_shape)); - xla_shapes.push_back(xla_shape); - } - OP_REQUIRES_OK( - ctx, GetInfeedShapeWithLayout(ctx, "layouts", - xla::ShapeUtil::MakeTupleShape(xla_shapes), - &tuple_shape_)); -} - -Status TpuInfeedEnqueueTupleOp::DoWork( - OpKernelContext* ctx, xla::TpuTransferManagerInterface* transfer_manager, - stream_executor::StreamExecutor* stream_executor) { - OpInputList values; - TF_RETURN_IF_ERROR(ctx->input_list("inputs", &values)); - if (values.size() != shapes_.size()) { - return errors::InvalidArgument( - "Wrong number of inputs to InfeedEnqueueTuple."); - } - - for (const auto& shapes : shapes_) { - VLOG(1) << "TransferLiteralToInfeed " << shapes.DebugString(); - } - - std::vector maybe_transposed_tensors; - maybe_transposed_tensors.reserve(values.size()); - for (int i = 0; i < values.size(); i++) { - // Validate runtime shapes and fail if it doesn't match the contract. - const Tensor* tensor = &values[i]; - if (tensor->shape() != shapes_[i]) { - return errors::InvalidArgument("Infeed shape mismatch for tuple element ", - i, "; expected ", shapes_[i].DebugString(), - ", got ", tensor->shape().DebugString()); - } - if (!xla::LayoutUtil::IsMonotonicWithDim0Major( - tuple_shape_.tuple_shapes(i).layout())) { - // If the given layout is not in dim0major layout, tranposes the given - // tensor. - TF_ASSIGN_OR_RETURN( - Tensor transposed_tensor, - TransposeTensor(ctx, *tensor, tuple_shape_.tuple_shapes(i))); - maybe_transposed_tensors.emplace_back(transposed_tensor); - } else { - maybe_transposed_tensors.emplace_back(*tensor); - } - } - - xla::BorrowingLiteral tuple; - TF_RETURN_IF_ERROR( - HostTensorsToBorrowingLiteralTuple(maybe_transposed_tensors, &tuple)); - - // Transfer the given literal to the Infeed interface of the device. - TF_RETURN_IF_ERROR( - transfer_manager->TransferLiteralToInfeed(stream_executor, tuple)); - - VLOG(1) << "TransferLiteralToInfeed complete."; - - return Status::OK(); -} - -// These ops execute on either the TPU device or the CPU device. When running on -// CPU they must specify a non-negative value for device_ordinal to indicate -// which TPU to send infeed to. -REGISTER_KERNEL_BUILDER( - Name("InfeedEnqueue").Device(DEVICE_TPU_NODE).HostMemory("input"), - TpuInfeedEnqueueOp); -REGISTER_KERNEL_BUILDER(Name("InfeedEnqueue").Device(DEVICE_CPU), - TpuInfeedEnqueueOp); - -REGISTER_KERNEL_BUILDER( - Name("InfeedEnqueueTuple").Device(DEVICE_TPU_NODE).HostMemory("inputs"), - TpuInfeedEnqueueTupleOp); -REGISTER_KERNEL_BUILDER(Name("InfeedEnqueueTuple").Device(DEVICE_CPU), - TpuInfeedEnqueueTupleOp); - -// Prelinearize ops run on CPU as part of tf.data input pipeline. -REGISTER_KERNEL_BUILDER(Name("Prelinearize").Device(DEVICE_CPU), - PrelinearizeOp); -REGISTER_KERNEL_BUILDER(Name("PrelinearizeTuple").Device(DEVICE_CPU), - PrelinearizeTupleOp); - -// InfeedEnqueuePrelinearizedBuffer op run on CPU and takes a device_ordinal to -// select the right device to infeed. -REGISTER_KERNEL_BUILDER( - Name("InfeedEnqueuePrelinearizedBuffer").Device(DEVICE_CPU), - InfeedEnqueuePrelinearizedBufferOp); - -} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/infeed_ops.h b/tensorflow/core/tpu/kernels/infeed_ops.h deleted file mode 100644 index 622583b6a73..00000000000 --- a/tensorflow/core/tpu/kernels/infeed_ops.h +++ /dev/null @@ -1,69 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CORE_TPU_KERNELS_INFEED_OPS_H_ -#define TENSORFLOW_CORE_TPU_KERNELS_INFEED_OPS_H_ - -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/types.pb.h" -#include "tensorflow/core/platform/status.h" -#include "tensorflow/core/tpu/kernels/transfer_ops.h" - -namespace tensorflow { - -// TODO(b/65200690): Rework this when there is a callback based infeed API to -// StreamExecutor. - -// The InfeedEnqueue op is used to deliver data to the device infeed queue. -class TpuInfeedEnqueueOp : public TpuTransferAsyncOpKernel { - public: - explicit TpuInfeedEnqueueOp(OpKernelConstruction* ctx); - Status DoWork(OpKernelContext* ctx, - xla::TpuTransferManagerInterface* transfer_manager, - stream_executor::StreamExecutor* stream_executor) override; - - private: - TensorShape shape_; - DataType dtype_; - xla::Shape xla_shape_; - - // TpuInfeedEnqueueOp is neither copyable nor movable. - TpuInfeedEnqueueOp(const TpuInfeedEnqueueOp&) = delete; - TpuInfeedEnqueueOp& operator=(const TpuInfeedEnqueueOp&) = delete; -}; - -// The InfeedEnqueueTuple op is used on the host to deliver multiple tensors to -// the device infeed queue as an XLA tuple. -class TpuInfeedEnqueueTupleOp : public TpuTransferAsyncOpKernel { - public: - explicit TpuInfeedEnqueueTupleOp(OpKernelConstruction* ctx); - Status DoWork(OpKernelContext* ctx, - xla::TpuTransferManagerInterface* transfer_manager, - stream_executor::StreamExecutor* stream_executor) override; - - private: - std::vector shapes_; - DataTypeVector dtypes_; - xla::Shape tuple_shape_; - - // TpuInfeedEnqueueTupleOp is neither copyable nor movable. - TpuInfeedEnqueueTupleOp(const TpuInfeedEnqueueTupleOp&) = delete; - TpuInfeedEnqueueTupleOp& operator=(const TpuInfeedEnqueueTupleOp&) = delete; -}; - -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_TPU_KERNELS_INFEED_OPS_H_ diff --git a/tensorflow/core/tpu/kernels/outfeed_ops.cc b/tensorflow/core/tpu/kernels/outfeed_ops.cc deleted file mode 100644 index 51a3a71a297..00000000000 --- a/tensorflow/core/tpu/kernels/outfeed_ops.cc +++ /dev/null @@ -1,116 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/tpu/kernels/outfeed_ops.h" - -#include "tensorflow/compiler/jit/xla_device.h" -#include "tensorflow/compiler/tf2xla/literal_util.h" -#include "tensorflow/compiler/tf2xla/shape_util.h" -#include "tensorflow/compiler/tf2xla/type_util.h" -#include "tensorflow/core/framework/allocator.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/shape_inference.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/tpu/kernels/transfer_ops.h" -#include "tensorflow/core/tpu/tpu_defs.h" -#include "tensorflow/stream_executor/multi_platform_manager.h" - -namespace tensorflow { - -TpuOutfeedDequeueOp::TpuOutfeedDequeueOp(OpKernelConstruction* ctx) - : TpuTransferAsyncOpKernel(ctx, "outfeed_dequeue", 1) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &shape_)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("dtype", &dtype_)); - OP_REQUIRES_OK(ctx, TensorShapeToXLAShape(dtype_, shape_, &xla_shape_)); -} - -Status TpuOutfeedDequeueOp::DoWork( - OpKernelContext* ctx, xla::TpuTransferManagerInterface* transfer_manager, - stream_executor::StreamExecutor* stream_executor) { - Tensor* output; - TF_RETURN_IF_ERROR(ctx->allocate_output(0, shape_, &output)); - - // Transfer from the outfeed interface of the device. - xla::MutableBorrowingLiteral literal; - TF_RETURN_IF_ERROR( - HostTensorToMutableBorrowingLiteral(xla_shape_, output, &literal)); - - VLOG(1) << "TransferLiteralFromOutfeed " - << xla::ShapeUtil::HumanStringWithLayout(xla_shape_); - - TF_RETURN_IF_ERROR(transfer_manager->TransferLiteralFromOutfeed( - stream_executor, xla_shape_, literal)); - - VLOG(1) << "TransferLiteralFromOutfeed complete."; - - return Status::OK(); -} - -// The OutfeedDequeueTuple op is used to retrieve multiple tensors from the -// device outfeed queue. -TpuOutfeedDequeueTupleOp::TpuOutfeedDequeueTupleOp(OpKernelConstruction* ctx) - : TpuTransferAsyncOpKernel(ctx, "outfeed_dequeue", 1) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("shapes", &shapes_)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("dtypes", &dtypes_)); - OP_REQUIRES( - ctx, shapes_.size() == dtypes_.size(), - errors::InvalidArgument("shapes and dtypes must be the same length.")); - // The `dtypes` list is inferred from the supplied inputs, so it - // is always the correct length. - for (int i = 0; i < shapes_.size(); i++) { - xla::Shape xla_shape; - OP_REQUIRES_OK(ctx, - TensorShapeToXLAShape(dtypes_[i], shapes_[i], &xla_shape)); - xla_shapes_.push_back(xla_shape); - } - tuple_shape_ = xla::ShapeUtil::MakeTupleShape(xla_shapes_); -} - -Status TpuOutfeedDequeueTupleOp::DoWork( - OpKernelContext* ctx, xla::TpuTransferManagerInterface* transfer_manager, - stream_executor::StreamExecutor* stream_executor) { - VLOG(1) << "TransferLiteralFromOutfeed " - << xla::ShapeUtil::HumanStringWithLayout(tuple_shape_); - - for (int i = 0; i < shapes_.size(); ++i) { - Tensor* output; - TF_RETURN_IF_ERROR(ctx->allocate_output(i, shapes_[i], &output)); - - xla::MutableBorrowingLiteral literal; - TF_RETURN_IF_ERROR( - HostTensorToMutableBorrowingLiteral(xla_shapes_[i], output, &literal)); - TF_RETURN_IF_ERROR(transfer_manager->TransferLiteralFromOutfeed( - stream_executor, xla_shapes_[i], literal)); - } - return Status::OK(); -} - -// These ops execute on either the TPU device or the CPU device. When -// running on CPU they must specify a non-negative value for -// device_ordinal to indicate which TPU to receive outfeed from. -REGISTER_KERNEL_BUILDER( - Name("OutfeedDequeue").Device(DEVICE_TPU_NODE).HostMemory("output"), - TpuOutfeedDequeueOp); -REGISTER_KERNEL_BUILDER(Name("OutfeedDequeue").Device(DEVICE_CPU), - TpuOutfeedDequeueOp); - -REGISTER_KERNEL_BUILDER( - Name("OutfeedDequeueTuple").Device(DEVICE_TPU_NODE).HostMemory("outputs"), - TpuOutfeedDequeueTupleOp); -REGISTER_KERNEL_BUILDER(Name("OutfeedDequeueTuple").Device(DEVICE_CPU), - TpuOutfeedDequeueTupleOp); - -} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/outfeed_ops.h b/tensorflow/core/tpu/kernels/outfeed_ops.h deleted file mode 100644 index 5e3ed87c04b..00000000000 --- a/tensorflow/core/tpu/kernels/outfeed_ops.h +++ /dev/null @@ -1,69 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CORE_TPU_KERNELS_OUTFEED_OPS_H_ -#define TENSORFLOW_CORE_TPU_KERNELS_OUTFEED_OPS_H_ - -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/types.pb.h" -#include "tensorflow/core/tpu/kernels/transfer_ops.h" - -namespace tensorflow { - -// The OutfeedDequeue op is used to retrieve a single tensor from the device -// outfeed queue. -class TpuOutfeedDequeueOp : public TpuTransferAsyncOpKernel { - public: - explicit TpuOutfeedDequeueOp(OpKernelConstruction* ctx); - - Status DoWork(OpKernelContext* ctx, - xla::TpuTransferManagerInterface* transfer_manager, - stream_executor::StreamExecutor* stream_executor) override; - - private: - TensorShape shape_; - DataType dtype_; - xla::Shape xla_shape_; - - // OutfeedDequeueOp is neither copyable nor movable. - TpuOutfeedDequeueOp(const TpuOutfeedDequeueOp&) = delete; - TpuOutfeedDequeueOp& operator=(const TpuOutfeedDequeueOp&) = delete; -}; - -// The OutfeedDequeueTuple op is used to retrieve multiple tensors from the -// device outfeed queue. -class TpuOutfeedDequeueTupleOp : public TpuTransferAsyncOpKernel { - public: - explicit TpuOutfeedDequeueTupleOp(OpKernelConstruction* ctx); - - Status DoWork(OpKernelContext* ctx, - xla::TpuTransferManagerInterface* transfer_manager, - stream_executor::StreamExecutor* stream_executor) override; - - private: - std::vector shapes_; - DataTypeVector dtypes_; - std::vector xla_shapes_; - xla::Shape tuple_shape_; - - // OutfeedDequeueTupleOp is neither copyable nor movable. - TpuOutfeedDequeueTupleOp(const TpuOutfeedDequeueTupleOp&) = delete; - TpuOutfeedDequeueTupleOp& operator=(const TpuOutfeedDequeueTupleOp&) = delete; -}; - -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_TPU_KERNELS_OUTFEED_OPS_H_ diff --git a/tensorflow/core/tpu/kernels/replication_ops.cc b/tensorflow/core/tpu/kernels/replication_ops.cc deleted file mode 100644 index 4c986e880e7..00000000000 --- a/tensorflow/core/tpu/kernels/replication_ops.cc +++ /dev/null @@ -1,27 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/jit/xla_device_ops.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/shape_inference.h" -#include "tensorflow/core/tpu/tpu_defs.h" - -namespace tensorflow { - -REGISTER_KERNEL_BUILDER(Name("_TPUReplicate").Device(DEVICE_TPU_SYSTEM), - XlaDeviceDummyOp); - -} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_handle_to_key_op.cc b/tensorflow/core/tpu/kernels/tpu_handle_to_key_op.cc deleted file mode 100644 index ec2ae91d3eb..00000000000 --- a/tensorflow/core/tpu/kernels/tpu_handle_to_key_op.cc +++ /dev/null @@ -1,62 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include -#include - -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" -#include "tensorflow/core/tpu/kernels/tpu_op_consts.h" -#include "tensorflow/core/tpu/tpu_configuration.h" - -namespace tensorflow { - -class TpuHandleToProtoKeyOp : public OpKernel { - public: - explicit TpuHandleToProtoKeyOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} - ~TpuHandleToProtoKeyOp() override = default; - TpuHandleToProtoKeyOp(const TpuHandleToProtoKeyOp&) = delete; - TpuHandleToProtoKeyOp& operator=(const TpuHandleToProtoKeyOp&) = delete; - - void Compute(OpKernelContext* ctx) override { - VLOG(1) << "TpuHandleToProtoKeyOp::Compute " << ctx->op_kernel().name() - << " on device " << ctx->op_kernel().requested_device(); - const Tensor& uid = ctx->input(0); - - ResourceMgr* rm = GetTPUConfigResourceMgr(); - tpu::TpuCompilationCacheInterface* cache; - OP_REQUIRES_OK(ctx, rm->Lookup( - rm->default_container(), - tpu::kCompilationCacheResourceName, &cache)); - core::ScopedUnref cache_unref(cache); - - std::vector keys; - OP_REQUIRES_OK(ctx, cache->GetKeysFromUid(uid.scalar()(), &keys)); - - TensorShape output_shape; - output_shape.AddDim(keys.size()); - Tensor* result = nullptr; - OP_REQUIRES_OK(ctx, ctx->allocate_output(0, output_shape, &result)); - for (int i = 0; i < keys.size(); ++i) { - result->vec()(i) = keys[i]; - } - }; -}; - -REGISTER_KERNEL_BUILDER(Name("TpuHandleToProtoKey").Device(DEVICE_CPU), - TpuHandleToProtoKeyOp); - -} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/transfer_ops.cc b/tensorflow/core/tpu/kernels/transfer_ops.cc deleted file mode 100644 index 40b85e2cfbd..00000000000 --- a/tensorflow/core/tpu/kernels/transfer_ops.cc +++ /dev/null @@ -1,98 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/tpu/kernels/transfer_ops.h" - -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/kernels/ops_util.h" -#include "tensorflow/core/platform/tracing.h" -#include "tensorflow/core/profiler/lib/traceme.h" -#include "tensorflow/stream_executor/multi_platform_manager.h" -#include "tensorflow/stream_executor/tpu/tpu_node_context.h" -#include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" -#include "tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.h" - -namespace tensorflow { - -TpuTransferAsyncOpKernel::TpuTransferAsyncOpKernel(OpKernelConstruction* ctx, - const string& transfer_type, - int number_of_threads) - : AsyncOpKernel(ctx), - thread_pool_(new thread::ThreadPool( - ctx->env(), - strings::StrCat(transfer_type, "_thread_", - SanitizeThreadSuffix(def().name())), - /*num_threads=*/8)) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("device_ordinal", &device_ordinal_)); - if (ctx->device_type() == DeviceType(DEVICE_CPU)) { - OP_REQUIRES( - ctx, device_ordinal_ >= 0, - errors::InvalidArgument(transfer_type, - " ops must specify a device_ordinal when " - "placed on CPU.")); - } -} - -void TpuTransferAsyncOpKernel::ComputeAsync(OpKernelContext* ctx, - DoneCallback done) { - CancellationToken token = - ctx->cancellation_manager()->get_cancellation_token(); - bool already_cancelled; - { - // Only protect registering the cancellation callback as mu_ cannot be held - // at a point where `done` could be called. - mutex_lock lock(mu_); - already_cancelled = !ctx->cancellation_manager()->RegisterCallback( - token, [this]() { Cancel(); }); - } - OP_REQUIRES_ASYNC(ctx, !already_cancelled, - errors::Cancelled("Infeed was cancelled."), done); - thread_pool_->Schedule([this, ctx, done, token]() { - Status s = RunTransfer(ctx); - ctx->cancellation_manager()->DeregisterCallback(token); - OP_REQUIRES_OK_ASYNC(ctx, s, done); - done(); - }); -} - -Status TpuTransferAsyncOpKernel::RunTransfer(OpKernelContext* ctx) { - auto* tpu_platform = tpu::TpuPlatformInterface::GetRegisteredPlatform(); - - int real_device_ordinal = device_ordinal_; - if (real_device_ordinal < 0) { - const XlaDevice::Metadata* metadata; - TF_RETURN_IF_ERROR(XlaDevice::GetMetadata(ctx, &metadata)); - real_device_ordinal = metadata->device_ordinal(); - } - stream_executor::StreamExecutor* stream_executor = - tpu_platform->ExecutorForDevice(real_device_ordinal).ValueOrDie(); - - // When Xprof profiling is off (which is the default), constructing the - // activity is simple enough that its overhead is negligible. - profiler::TraceMe activity( - [this] { return profiler::TraceMeOp(name(), type_string()); }, - profiler::TraceMeLevel::kInfo); - return DoWork( - ctx, xla::TpuTransferManagerInterface::GetRegisteredTpuTransferManager(), - stream_executor); -} - -void TpuTransferAsyncOpKernel::Cancel() { - mutex_lock lock(mu_); - TF_CHECK_OK(tpu::TpuNodeContext::CloseTpuHost()); -} - -} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/transfer_ops.h b/tensorflow/core/tpu/kernels/transfer_ops.h deleted file mode 100644 index d98d743f569..00000000000 --- a/tensorflow/core/tpu/kernels/transfer_ops.h +++ /dev/null @@ -1,56 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CORE_TPU_KERNELS_TRANSFER_OPS_H_ -#define TENSORFLOW_CORE_TPU_KERNELS_TRANSFER_OPS_H_ - -#include "tensorflow/compiler/jit/xla_device.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/util/stream_executor_util.h" -#include "tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.h" - -namespace tensorflow { - -// Base class providing common functionality for async ops that transfer from -// host to TPU. -class TpuTransferAsyncOpKernel : public AsyncOpKernel { - public: - explicit TpuTransferAsyncOpKernel(OpKernelConstruction* ctx, - const string& transfer_type, - int number_of_threads); - - void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override; - - protected: - virtual Status DoWork(OpKernelContext* context, - xla::TpuTransferManagerInterface* transfer_manager, - stream_executor::StreamExecutor* stream_executor) = 0; - - private: - Status RunTransfer(OpKernelContext* ctx); - void Cancel(); - - std::unique_ptr thread_pool_; - int device_ordinal_; - mutex mu_; - - // TpuTransferAsyncOpKernel is neither copyable nor movable. - TpuTransferAsyncOpKernel(const TpuTransferAsyncOpKernel&) = delete; - TpuTransferAsyncOpKernel& operator=(const TpuTransferAsyncOpKernel&) = delete; -}; - -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_TPU_KERNELS_TRANSFER_OPS_H_ diff --git a/tensorflow/core/tpu/tpu_defs.cc b/tensorflow/core/tpu/tpu_defs.cc index 69d4989773a..69669bfdb7b 100644 --- a/tensorflow/core/tpu/tpu_defs.cc +++ b/tensorflow/core/tpu/tpu_defs.cc @@ -15,10 +15,6 @@ limitations under the License. #include "tensorflow/core/tpu/tpu_defs.h" -#include "tensorflow/core/tpu/tpu_api.h" -#include "tensorflow/stream_executor/tpu/c_api_conversions.h" -#include "tensorflow/stream_executor/tpu/c_api_decl.h" - namespace tensorflow { const char* const DEVICE_TPU_NODE = "TPU"; @@ -31,18 +27,4 @@ const char* const TPUREPLICATE_MIRRORED_VAR_INDICES_ATTR = const char* const kTPUReplicateAttr = "_tpu_replicate"; const char* const kOutsideCompilationAttr = "_xla_outside_compilation"; -xla::Shape GetTPUInfeedLayout(const xla::Shape& shape) { - XLA_Shape c_shape; - XLA_Shape c_infeed_shape; - - ApiConverter::ToC(shape, &c_shape); - - tpu::ExecutorApiFn()->TpuTransferManager_GetInfeedLayoutFn(&c_shape, - &c_infeed_shape); - xla::Shape infeed_shape = ApiConverter::FromC(&c_infeed_shape); - ApiConverter::Free(&c_shape); - ApiConverter::Free(&c_infeed_shape); - return infeed_shape; -} - } // namespace tensorflow diff --git a/tensorflow/core/tpu/tpu_defs.h b/tensorflow/core/tpu/tpu_defs.h index 29954b2289f..008e386dde6 100644 --- a/tensorflow/core/tpu/tpu_defs.h +++ b/tensorflow/core/tpu/tpu_defs.h @@ -20,7 +20,6 @@ limitations under the License. #include -#include "tensorflow/compiler/xla/shape.h" #include "tensorflow/core/framework/types.pb.h" namespace tensorflow { @@ -57,11 +56,6 @@ static constexpr std::array kTpuAllTypes = { DT_COMPLEX64, DT_INT64, DT_UINT64, DT_QINT8, DT_QUINT8, DT_INT8, DT_UINT8, DT_INT16, DT_UINT16}}; -// For the given shape, chooses a layout for infeed on TPU. The returned shape -// has the same dimensions as the original shape, and only the layout is -// changed. -xla::Shape GetTPUInfeedLayout(const xla::Shape& shape); - } // namespace tensorflow #endif // TENSORFLOW_CORE_TPU_TPU_DEFS_H_ diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index 40130bd46dd..be9d594685e 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -161,7 +161,6 @@ tensorflow::Status SetExecutorStructFn(void* library_handle) { TFTPU_SET_FN(executor_fn, TpuTransferManager_TransferLiteralFromDevice); TFTPU_SET_FN(executor_fn, TpuTransferManager_GetByteSizeRequirement); TFTPU_SET_FN(executor_fn, TpuTransferManager_WriteSingleTupleIndexTable); - TFTPU_SET_FN(executor_fn, TpuTransferManager_GetInfeedLayout); TFTPU_SET_FN(executor_fn, TpuTransferManager_LinearizeToBuffers); TFTPU_SET_FN(executor_fn, TpuTransferManager_FreeBuffers); From 53c817161456f7922c832db1a3dbd84d994337ac Mon Sep 17 00:00:00 2001 From: Yuqi Li Date: Mon, 10 Aug 2020 01:07:34 -0700 Subject: [PATCH 2429/2522] update pip installation and import path in colabs for tflite model maker. PiperOrigin-RevId: 325756233 Change-Id: Ifb629e75408c620e5c11e00fa5eaef79633c858b --- .../model_maker_image_classification.ipynb | 61 +++---------------- .../model_maker_question_answer.ipynb | 12 ++-- .../model_maker_text_classification.ipynb | 12 ++-- 3 files changed, 22 insertions(+), 63 deletions(-) diff --git a/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb b/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb index 99ebb7087f2..e88c2e93519 100644 --- a/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb +++ b/tensorflow/lite/g3doc/tutorials/model_maker_image_classification.ipynb @@ -101,7 +101,7 @@ }, "outputs": [], "source": [ - "!pip install git+https://github.com/tensorflow/examples.git#egg=tensorflow-examples[model_maker]" + "!pip install tflite-model-maker" ] }, { @@ -129,11 +129,10 @@ "import tensorflow as tf\n", "assert tf.__version__.startswith('2')\n", "\n", - "from tensorflow_examples.lite.model_maker.core.data_util.image_dataloader import ImageClassifierDataLoader\n", - "from tensorflow_examples.lite.model_maker.core.task import image_classifier\n", - "from tensorflow_examples.lite.model_maker.core.task.configs import QuantizationConfig\n", - "from tensorflow_examples.lite.model_maker.core.task.model_spec import mobilenet_v2_spec\n", - "from tensorflow_examples.lite.model_maker.core.task.model_spec import ImageModelSpec\n", + "from tflite_model_maker import configs\n", + "from tflite_model_maker import image_classifier\n", + "from tflite_model_maker import ImageClassifierDataLoader\n", + "from tflite_model_maker import model_spec\n", "\n", "import matplotlib.pyplot as plt" ] @@ -640,7 +639,7 @@ "id": "-4jQaxyT5_KV" }, "source": [ - "Here, we also demonstrate how to use the above files to run and evaluate the TensorFlow Lite model." + "You can also evalute the tflite model with the `evaluate_tflite` method." ] }, { @@ -653,47 +652,7 @@ }, "outputs": [], "source": [ - "# Read TensorFlow Lite model from TensorFlow Lite file.\n", - "with tf.io.gfile.GFile('model.tflite', 'rb') as f:\n", - " model_content = f.read()\n", - "\n", - "# Initialze TensorFlow Lite inpterpreter.\n", - "interpreter = tf.lite.Interpreter(model_content=model_content)\n", - "interpreter.allocate_tensors()\n", - "input_index = interpreter.get_input_details()[0]['index']\n", - "output = interpreter.tensor(interpreter.get_output_details()[0][\"index\"])\n", - "\n", - "# Run predictions on each test image data and calculate accuracy.\n", - "accurate_count = 0\n", - "for i, (image, label) in enumerate(test_data.dataset):\n", - " # Pre-processing should remain the same. Currently, just normalize each pixel value and resize image according to the model's specification.\n", - " image, _ = model.preprocess(image, label)\n", - " # Add batch dimension and convert to float32 to match with the model's input\n", - " # data format.\n", - " image = tf.expand_dims(image, 0).numpy()\n", - "\n", - " # Run inference.\n", - " interpreter.set_tensor(input_index, image)\n", - " interpreter.invoke()\n", - "\n", - " # Post-processing: remove batch dimension and find the label with highest\n", - " # probability.\n", - " predict_label = np.argmax(output()[0])\n", - "\n", - " accurate_count += (predict_label == label.numpy())\n", - "\n", - "accuracy = accurate_count * 1.0 / test_data.size\n", - "print('TensorFlow Lite model accuracy = %.4f' % accuracy)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "fuHB-NFqpKTD" - }, - "source": [ - "Note that preprocessing for inference should be the same as training. Currently, preprocessing contains normalizing each pixel value and resizing the image according to the model's specification. For EfficientNet-Lite0, input image should be normalized to `[0, 1]` and resized to `[224, 224, 3]`." + "model.evaluate_tflite('model.tflite', test_data)" ] }, { @@ -760,7 +719,7 @@ }, "outputs": [], "source": [ - "config = QuantizationConfig.create_full_integer_quantization(representative_data=test_data, is_integer_only=True)" + "config = configs.QuantizationConfig.create_full_integer_quantization(representative_data=test_data, is_integer_only=True)" ] }, { @@ -830,7 +789,7 @@ }, "outputs": [], "source": [ - "model = image_classifier.create(train_data, model_spec=mobilenet_v2_spec, validation_data=validation_data)" + "model = image_classifier.create(train_data, model_spec=model_spec.mobilenet_v2_spec, validation_data=validation_data)" ] }, { @@ -882,7 +841,7 @@ }, "outputs": [], "source": [ - "inception_v3_spec = ImageModelSpec(\n", + "inception_v3_spec = model_spec.ImageModelSpec(\n", " uri='https://tfhub.dev/google/imagenet/inception_v3/feature_vector/1')\n", "inception_v3_spec.input_image_shape = [299, 299]" ] diff --git a/tensorflow/lite/g3doc/tutorials/model_maker_question_answer.ipynb b/tensorflow/lite/g3doc/tutorials/model_maker_question_answer.ipynb index a1d11115a75..645be959d0e 100644 --- a/tensorflow/lite/g3doc/tutorials/model_maker_question_answer.ipynb +++ b/tensorflow/lite/g3doc/tutorials/model_maker_question_answer.ipynb @@ -188,7 +188,7 @@ }, "outputs": [], "source": [ - "!pip install git+https://github.com/tensorflow/examples.git#egg=tensorflow-examples[model_maker]" + "!pip install tflite-model-maker" ] }, { @@ -217,10 +217,10 @@ "import tensorflow as tf\n", "assert tf.__version__.startswith('2')\n", "\n", - "from tensorflow_examples.lite.model_maker.core.data_util.text_dataloader import QuestionAnswerDataLoader\n", - "from tensorflow_examples.lite.model_maker.core.task import model_spec\n", - "from tensorflow_examples.lite.model_maker.core.task import question_answer\n", - "from tensorflow_examples.lite.model_maker.core.task.configs import QuantizationConfig" + "from tflite_model_maker import configs\n", + "from tflite_model_maker import model_spec\n", + "from tflite_model_maker import question_answer\n", + "from tflite_model_maker import QuestionAnswerDataLoader" ] }, { @@ -448,7 +448,7 @@ }, "outputs": [], "source": [ - "config = QuantizationConfig.create_dynamic_range_quantization(optimizations=[tf.lite.Optimize.OPTIMIZE_FOR_LATENCY])\n", + "config = configs.QuantizationConfig.create_dynamic_range_quantization(optimizations=[tf.lite.Optimize.OPTIMIZE_FOR_LATENCY])\n", "config._experimental_new_quantizer = True" ] }, diff --git a/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb b/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb index 1a839d70e38..88cef93e761 100644 --- a/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb +++ b/tensorflow/lite/g3doc/tutorials/model_maker_text_classification.ipynb @@ -110,7 +110,7 @@ }, "outputs": [], "source": [ - "!pip install git+https://github.com/tensorflow/examples.git#egg=tensorflow-examples[model_maker]" + "!pip install tflite-model-maker" ] }, { @@ -139,10 +139,10 @@ "import tensorflow as tf\n", "assert tf.__version__.startswith('2')\n", "\n", - "from tensorflow_examples.lite.model_maker.core.data_util.text_dataloader import TextClassifierDataLoader\n", - "from tensorflow_examples.lite.model_maker.core.task import model_spec\n", - "from tensorflow_examples.lite.model_maker.core.task import text_classifier\n", - "from tensorflow_examples.lite.model_maker.core.task.configs import QuantizationConfig" + "from tflite_model_maker import configs\n", + "from tflite_model_maker import model_spec\n", + "from tflite_model_maker import text_classifier\n", + "from tflite_model_maker import TextClassifierDataLoader" ] }, { @@ -344,7 +344,7 @@ }, "outputs": [], "source": [ - "config = QuantizationConfig.create_dynamic_range_quantization(optimizations=[tf.lite.Optimize.OPTIMIZE_FOR_LATENCY])\n", + "config = configs.QuantizationConfig.create_dynamic_range_quantization(optimizations=[tf.lite.Optimize.OPTIMIZE_FOR_LATENCY])\n", "config._experimental_new_quantizer = True" ] }, From c09f5c4f1aa857a2f6bd311d30695fe1b0b66f1b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 10 Aug 2020 02:01:29 -0700 Subject: [PATCH 2430/2522] compat: Update forward compatibility horizon to 2020-08-10 PiperOrigin-RevId: 325761194 Change-Id: I5cb5e65aef07cd70a0331aa162c92b5f8d828ce8 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index fefbf667704..3d8f9c5490e 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 9) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 10) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 8a4ffe2e1ae722cff5306778df0cfca8b7f503fe Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 10 Aug 2020 02:01:30 -0700 Subject: [PATCH 2431/2522] Update GraphDef version to 489. PiperOrigin-RevId: 325761195 Change-Id: I1eb19d20d43ee9189b563472dbe0b3a65a5ef9f8 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 1813717e87a..fa0df24a7e7 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 488 // Updated: 2020/8/9 +#define TF_GRAPH_DEF_VERSION 489 // Updated: 2020/8/10 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From d5eaf2316406e774a7f38a6fac7826783cbf4e8c Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Mon, 10 Aug 2020 06:03:37 -0700 Subject: [PATCH 2432/2522] Disable logical_expressions_test for now since it failed tsan test. PiperOrigin-RevId: 325788797 Change-Id: I92a0141f570fc7b4c2d5ff182278f8bbc9ad7436 --- tensorflow/python/autograph/converters/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/autograph/converters/BUILD b/tensorflow/python/autograph/converters/BUILD index f584038978f..fd8ec1dbaa3 100644 --- a/tensorflow/python/autograph/converters/BUILD +++ b/tensorflow/python/autograph/converters/BUILD @@ -176,6 +176,7 @@ py_test( srcs = ["logical_expressions_test.py"], python_version = "PY3", srcs_version = "PY2AND3", + tags = ["notsan"], # b/163218460 deps = [ ":converters", "//tensorflow/python:client_testlib", From 7f5e4f3361f178f5c1109b4ff0b69185daf63aa4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 10 Aug 2020 06:44:31 -0700 Subject: [PATCH 2433/2522] Return shared_ptr to allow passing them back to C++. PiperOrigin-RevId: 325793565 Change-Id: I71958cdb27da33471ea795d7e8a41085983bfb2b --- tensorflow/compiler/xla/python/py_client.cc | 6 ++++-- tensorflow/compiler/xla/python/py_client.h | 2 +- tensorflow/compiler/xla/python/xla.cc | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/python/py_client.cc b/tensorflow/compiler/xla/python/py_client.cc index 4224d69dc8f..9b95f8e03de 100644 --- a/tensorflow/compiler/xla/python/py_client.cc +++ b/tensorflow/compiler/xla/python/py_client.cc @@ -15,6 +15,8 @@ limitations under the License. #include "tensorflow/compiler/xla/python/py_client.h" +#include + #include "absl/container/flat_hash_map.h" #include "tensorflow/compiler/xla/python/py_buffer.h" #include "tensorflow/compiler/xla/python/py_executable.h" @@ -120,7 +122,7 @@ StatusOr> PyClient::BufferFromPyval( std::move(traceback)); } -StatusOr> PyClient::Compile( +StatusOr> PyClient::Compile( const XlaComputation& computation, CompileOptions options) { std::unique_ptr executable; absl::optional fingerprint; @@ -133,7 +135,7 @@ StatusOr> PyClient::Compile( pjrt_client_->ExecutableFingerprint(*executable)); } auto traceback = Traceback::Get(); - return std::make_unique( + return std::make_shared( shared_from_this(), std::move(executable), std::move(traceback), std::move(fingerprint)); } diff --git a/tensorflow/compiler/xla/python/py_client.h b/tensorflow/compiler/xla/python/py_client.h index d33f3dadd7d..e41415c42f2 100644 --- a/tensorflow/compiler/xla/python/py_client.h +++ b/tensorflow/compiler/xla/python/py_client.h @@ -124,7 +124,7 @@ class PyClient : public std::enable_shared_from_this { const pybind11::object& argument, Device* device, bool force_copy, PjRtBuffer::HostBufferSemantics host_buffer_semantics); - StatusOr> Compile( + StatusOr> Compile( const XlaComputation& computation, CompileOptions options); pybind11::bytes HeapProfile(); diff --git a/tensorflow/compiler/xla/python/xla.cc b/tensorflow/compiler/xla/python/xla.cc index 9590c5d57c3..510175cebf6 100644 --- a/tensorflow/compiler/xla/python/xla.cc +++ b/tensorflow/compiler/xla/python/xla.cc @@ -654,7 +654,7 @@ PYBIND11_MODULE(xla_extension, m) { PyTypeObject* buffer_type = reinterpret_cast(buffer.ptr()); buffer_type->tp_as_buffer = PyBuffer::BufferProtocol(); - py::class_> executable( + py::class_> executable( m, "Executable"); executable.def_property_readonly("client", &PyExecutable::client) .def("local_logical_device_ids", &PyExecutable::local_logical_device_ids) From 2ec0a4987e807b18c52a6ec93e5aed74ea22117c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 10 Aug 2020 07:05:41 -0700 Subject: [PATCH 2434/2522] Add a `PjRtExecute` function. PiperOrigin-RevId: 325796542 Change-Id: Iba3235bd650316f61b3ce37013b4437cafb205fa --- .../compiler/xla/python/py_executable.cc | 33 ++++++++++++------- .../compiler/xla/python/py_executable.h | 9 +++++ 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/tensorflow/compiler/xla/python/py_executable.cc b/tensorflow/compiler/xla/python/py_executable.cc index b2cd2af56ea..ed524f1cb33 100644 --- a/tensorflow/compiler/xla/python/py_executable.cc +++ b/tensorflow/compiler/xla/python/py_executable.cc @@ -37,7 +37,9 @@ PyExecutable::PyExecutable(std::shared_ptr client, if (next_) { next_->prev_ = this; } + options_.untuple_result = true; if (fingerprint_) { + options_.launch_id = tensorflow::Fingerprint32(*fingerprint_); VLOG(1) << "Fingerprint for executable " << executable_->name() << ": " << *fingerprint_; } @@ -65,21 +67,33 @@ std::vector> PyExecutable::LocalDevices() const { return devices; } +StatusOr>> PyExecutable::PjRtExecute( + absl::Span args) { + std::vector> output_buffers; + { + py::gil_scoped_release gil_release; + TF_ASSIGN_OR_RETURN(output_buffers, executable_->Execute(args, options_)); + } + auto traceback = Traceback::Get(); + std::vector> outputs; + outputs.reserve(output_buffers.size()); + for (auto& buffer : output_buffers) { + outputs.push_back( + std::make_unique(client_, std::move(buffer), traceback)); + } + return outputs; +} + StatusOr>> PyExecutable::Execute( absl::Span args) { std::vector> output_buffers; { py::gil_scoped_release gil_release; - ExecuteOptions options; - options.untuple_result = true; - if (fingerprint_) { - options.launch_id = tensorflow::Fingerprint32(*fingerprint_); - } std::vector arg_buffers(args.size()); absl::c_transform(args, arg_buffers.begin(), [](PyBuffer* buf) { return buf->buffer(); }); TF_ASSIGN_OR_RETURN(output_buffers, - executable_->Execute(arg_buffers, options)); + executable_->Execute(arg_buffers, options_)); } auto traceback = Traceback::Get(); std::vector> outputs; @@ -97,11 +111,6 @@ PyExecutable::ExecuteOnLocalDevices( std::vector>> output_buffers; { py::gil_scoped_release gil_release; - ExecuteOptions options; - options.untuple_result = true; - if (fingerprint_) { - options.launch_id = tensorflow::Fingerprint32(*fingerprint_); - } std::vector> arg_buffers(args.size()); for (int computation = 0; computation < args.size(); ++computation) { arg_buffers[computation].resize(args[computation].size()); @@ -109,7 +118,7 @@ PyExecutable::ExecuteOnLocalDevices( [](PyBuffer* buf) { return buf->buffer(); }); } TF_ASSIGN_OR_RETURN(output_buffers, executable_->ExecuteOnLocalDevices( - arg_buffers, options)); + arg_buffers, options_)); } auto traceback = Traceback::Get(); std::vector>> outputs; diff --git a/tensorflow/compiler/xla/python/py_executable.h b/tensorflow/compiler/xla/python/py_executable.h index 1051d065335..24f177261e7 100644 --- a/tensorflow/compiler/xla/python/py_executable.h +++ b/tensorflow/compiler/xla/python/py_executable.h @@ -58,6 +58,10 @@ class PyExecutable { StatusOr>> Execute( absl::Span args); + // Same as above, but take as inputs `PjRtBuffer*`. Only targets C++ code. + StatusOr>> PjRtExecute( + absl::Span args); + StatusOr>>> ExecuteOnLocalDevices(absl::Span> args); @@ -65,6 +69,8 @@ class PyExecutable { Traceback* traceback() { return traceback_.get(); } + const PjRtExecutable& pjrt_executable() const { return *executable_; } + private: friend class PyClient; @@ -77,6 +83,9 @@ class PyExecutable { // aren't implemented. absl::optional fingerprint_; + // The options to pass to `executable_.Execute`. + ExecuteOptions options_; + // Doubly-linked list of all executables known to the client. Protected by the // GIL. PyExecutable* next_; From 21240f6f4086038a94c65c6cb527786cef17cc76 Mon Sep 17 00:00:00 2001 From: Tom Hennigan Date: Mon, 10 Aug 2020 08:09:30 -0700 Subject: [PATCH 2435/2522] Set connect timeout based on client RPC deadline. PiperOrigin-RevId: 325805342 Change-Id: I22ec18729cf027560a479ef27596fce54bc90606 --- tensorflow/compiler/xla/pjrt/distributed/client.cc | 2 ++ tensorflow/compiler/xla/pjrt/distributed/protocol.h | 2 +- tensorflow/compiler/xla/pjrt/distributed/protocol.proto | 1 + tensorflow/compiler/xla/pjrt/distributed/service.cc | 6 ++++-- tensorflow/compiler/xla/pjrt/distributed/service.h | 2 -- 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/pjrt/distributed/client.cc b/tensorflow/compiler/xla/pjrt/distributed/client.cc index 55b02c6a09e..43c0c7b277d 100644 --- a/tensorflow/compiler/xla/pjrt/distributed/client.cc +++ b/tensorflow/compiler/xla/pjrt/distributed/client.cc @@ -17,6 +17,7 @@ limitations under the License. #include // NOLINT +#include "absl/time/time.h" #include "tensorflow/compiler/xla/pjrt/distributed/protocol.h" #include "tensorflow/compiler/xla/pjrt/distributed/util.h" @@ -36,6 +37,7 @@ xla::Status DistributedRuntimeClient::Connect( ctx.set_deadline(absl::ToChronoTime(absl::Now() + rpc_timeout_)); ConnectRequest request; request.set_protocol_version(kDistributedRuntimeProtocolVersion); + request.set_timeout_milliseconds(absl::ToInt64Milliseconds(rpc_timeout_)); *request.mutable_local_topology() = local_topology; VLOG(10) << "Connect: " << request.DebugString(); ConnectResponse response; diff --git a/tensorflow/compiler/xla/pjrt/distributed/protocol.h b/tensorflow/compiler/xla/pjrt/distributed/protocol.h index 4daa939ac8d..e8be43006f7 100644 --- a/tensorflow/compiler/xla/pjrt/distributed/protocol.h +++ b/tensorflow/compiler/xla/pjrt/distributed/protocol.h @@ -18,7 +18,7 @@ limitations under the License. namespace xla { -static constexpr int kDistributedRuntimeProtocolVersion = 1; +static constexpr int kDistributedRuntimeProtocolVersion = 2; } // namespace xla diff --git a/tensorflow/compiler/xla/pjrt/distributed/protocol.proto b/tensorflow/compiler/xla/pjrt/distributed/protocol.proto index 18bfa221110..c3bbb3a7f5d 100644 --- a/tensorflow/compiler/xla/pjrt/distributed/protocol.proto +++ b/tensorflow/compiler/xla/pjrt/distributed/protocol.proto @@ -61,6 +61,7 @@ message ConnectRequest { int32 protocol_version = 1; // Always 1 at present. LocalTopologyProto local_topology = 2; + int32 timeout_milliseconds = 3; } message ConnectResponse { diff --git a/tensorflow/compiler/xla/pjrt/distributed/service.cc b/tensorflow/compiler/xla/pjrt/distributed/service.cc index 3325fcd8319..868529637de 100644 --- a/tensorflow/compiler/xla/pjrt/distributed/service.cc +++ b/tensorflow/compiler/xla/pjrt/distributed/service.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/xla/pjrt/distributed/service.h" +#include "absl/time/time.h" #include "tensorflow/compiler/xla/pjrt/distributed/protocol.h" #include "tensorflow/compiler/xla/pjrt/distributed/util.h" #include "tensorflow/compiler/xla/status.h" @@ -69,11 +70,12 @@ void BuildGlobalTopology(absl::Span local_topologies, mu_.AssertHeld(); return num_nodes_present_ == nodes_.size(); }; + auto connect_timeout = absl::Milliseconds(request->timeout_milliseconds()); if (!mu_.AwaitWithTimeout(absl::Condition(&all_nodes_present), - kConnectTimeout)) { + connect_timeout)) { return ToGrpcStatus(tensorflow::errors::DeadlineExceeded( "Timed out after %s waiting for all nodes to call Connect()", - absl::FormatDuration(kConnectTimeout))); + absl::FormatDuration(connect_timeout))); } if (node_id == 0) { diff --git a/tensorflow/compiler/xla/pjrt/distributed/service.h b/tensorflow/compiler/xla/pjrt/distributed/service.h index 9ecbdb3cc7c..fe323d9f3b2 100644 --- a/tensorflow/compiler/xla/pjrt/distributed/service.h +++ b/tensorflow/compiler/xla/pjrt/distributed/service.h @@ -50,8 +50,6 @@ class DistributedRuntimeServiceImpl final KeyValueSetResponse* response) override; private: - const absl::Duration kConnectTimeout = absl::Seconds(120); - absl::Mutex mu_; enum class State { kInitializing, kRunning }; State state_ ABSL_GUARDED_BY(mu_) = State::kInitializing; From 0e368fa3998ce7213159751f74bde3c98d72edc9 Mon Sep 17 00:00:00 2001 From: Pankaj Kanwar Date: Mon, 10 Aug 2020 08:40:33 -0700 Subject: [PATCH 2436/2522] Test build for testing cuda11 changes. PiperOrigin-RevId: 325810725 Change-Id: Id018e4512c074fba2af19b8d8919c3dbb6c7768b --- tensorflow/opensource_only.files | 2 + .../rel/ubuntu_cuda11/gpu_py35_nonpip.sh | 6 +- .../rel/ubuntu_cuda11/gpu_py37_pip.sh | 2 +- .../gcc7_manylinux2010-nvcc-cuda11/BUILD | 175 ++ .../cc_toolchain_config.bzl | 1516 +++++++++++++++++ .../bin/crosstool_wrapper_driver_is_not_gcc | 289 ++++ 6 files changed, 1986 insertions(+), 4 deletions(-) create mode 100755 third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda11/BUILD create mode 100755 third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda11/cc_toolchain_config.bzl create mode 100755 third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda11/clang/bin/crosstool_wrapper_driver_is_not_gcc diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files index d46d9a27b24..faf097e85f9 100644 --- a/tensorflow/opensource_only.files +++ b/tensorflow/opensource_only.files @@ -245,6 +245,8 @@ tensorflow/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc- tensorflow/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.0/cc_toolchain_config.bzl tensorflow/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1/BUILD tensorflow/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1/cc_toolchain_config.bzl +tensorflow/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda11/BUILD +tensorflow/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda11/cc_toolchain_config.bzl tensorflow/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010/BUILD tensorflow/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010/cc_toolchain_config.bzl tensorflow/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010/dummy_toolchain.bzl diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py35_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py35_nonpip.sh index 47ed3c4fd2a..8a0796723b2 100644 --- a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py35_nonpip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py35_nonpip.sh @@ -27,8 +27,8 @@ export TF_NEED_GCP=1 export TF_NEED_HDFS=1 export TF_NEED_S3=1 export TF_NEED_CUDA=1 -export TF_CUDA_VERSION=10 -export TF_CUDNN_VERSION=7 +export TF_CUDA_VERSION=11 +export TF_CUDNN_VERSION=8 export TF_NEED_TENSORRT=1 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt export CC_OPT_FLAGS='-mavx' @@ -47,7 +47,7 @@ tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py35" set +e bazel test --config=cuda --config=opt \ - --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ + --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda11:toolchain \ --linkopt=-lrt \ --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \ --test_lang_filters=py \ diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py37_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py37_pip.sh index 9bfc6608a0b..71d6f3e6401 100644 --- a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py37_pip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py37_pip.sh @@ -39,7 +39,7 @@ export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss export TF_BUILD_FLAGS="--config=release_gpu_linux " export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filters=${TF_TEST_FILTER_TAGS} \ --distinct_host_configuration=false \ ---action_env=TF_CUDA_VERSION=10 --action_env=TF_CUDNN_VERSION=7 --test_env=TF2_BEHAVIOR=1 \ +--action_env=TF_CUDA_VERSION=11 --action_env=TF_CUDNN_VERSION=8 --test_env=TF2_BEHAVIOR=1 \ --config=cuda --test_output=errors --local_test_jobs=4 --test_lang_filters=py \ --verbose_failures=true --keep_going --define=no_tensorflow_py_deps=true \ --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute " diff --git a/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda11/BUILD b/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda11/BUILD new file mode 100755 index 00000000000..92305526c5c --- /dev/null +++ b/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda11/BUILD @@ -0,0 +1,175 @@ +# This file is expanded from a template by cuda_configure.bzl +# Update cuda_configure.bzl#verify_build_defines when adding new variables. + +load(":cc_toolchain_config.bzl", "cc_toolchain_config") + +licenses(["restricted"]) + +package(default_visibility = ["//visibility:public"]) + +toolchain( + name = "toolchain-linux-x86_64", + exec_compatible_with = [ + "@bazel_tools//platforms:linux", + "@bazel_tools//platforms:x86_64", + ], + target_compatible_with = [ + "@bazel_tools//platforms:linux", + "@bazel_tools//platforms:x86_64", + ], + toolchain = ":cc-compiler-local", + toolchain_type = "@bazel_tools//tools/cpp:toolchain_type", +) + +cc_toolchain_suite( + name = "toolchain", + toolchains = { + "local|compiler": ":cc-compiler-local", + "darwin|compiler": ":cc-compiler-darwin", + "x64_windows|msvc-cl": ":cc-compiler-windows", + "x64_windows": ":cc-compiler-windows", + "arm": ":cc-compiler-local", + "aarch64": ":cc-compiler-local", + "k8": ":cc-compiler-local", + "piii": ":cc-compiler-local", + "ppc": ":cc-compiler-local", + "darwin": ":cc-compiler-darwin", + }, +) + +cc_toolchain( + name = "cc-compiler-local", + all_files = ":crosstool_wrapper_driver_is_not_gcc", + ar_files = ":crosstool_wrapper_driver_is_not_gcc", + as_files = ":crosstool_wrapper_driver_is_not_gcc", + compiler_files = ":crosstool_wrapper_driver_is_not_gcc", + dwp_files = ":empty", + linker_files = ":crosstool_wrapper_driver_is_not_gcc", + objcopy_files = ":empty", + strip_files = ":empty", + # To support linker flags that need to go to the start of command line + # we need the toolchain to support parameter files. Parameter files are + # last on the command line and contain all shared libraries to link, so all + # regular options will be left of them. + supports_param_files = 1, + toolchain_config = ":cc-compiler-local-config", + toolchain_identifier = "local_linux", +) + +cc_toolchain_config( + name = "cc-compiler-local-config", + builtin_include_directories = [ + "/dt7/usr/include/c++/7", + "/dt7/usr/include/c++/7/x86_64-pc-linux-gnu", + "/dt7/usr/include/c++/7/backward", + "/dt7/usr/lib/gcc/x86_64-pc-linux-gnu/7/include", + "/dt7/usr/lib/gcc/x86_64-pc-linux-gnu/7/include-fixed", + "/dt7/usr/include", + "/usr/local/cuda11/targets/x86_64-linux/include", + "/usr/local/cuda11/include", + "/usr/local/cuda11/extras/CUPTI/include", + "/usr/include", + ], + builtin_sysroot = "", + cpu = "local", + cuda_path = "", + extra_no_canonical_prefixes_flags = ["-fno-canonical-system-headers"], + host_compiler_path = "clang/bin/crosstool_wrapper_driver_is_not_gcc", + host_compiler_prefix = "/usr/bin", + host_compiler_warnings = [], + host_unfiltered_compile_flags = [], + linker_bin_path = "/usr/bin", +) + +cc_toolchain( + name = "cc-compiler-darwin", + all_files = ":crosstool_wrapper_driver_is_not_gcc", + ar_files = ":crosstool_wrapper_driver_is_not_gcc", + as_files = ":crosstool_wrapper_driver_is_not_gcc", + compiler_files = ":crosstool_wrapper_driver_is_not_gcc", + dwp_files = ":empty", + linker_files = ":crosstool_wrapper_driver_is_not_gcc", + objcopy_files = ":empty", + strip_files = ":empty", + supports_param_files = 0, + toolchain_config = ":cc-compiler-local-darwin", + toolchain_identifier = "local_darwin", +) + +cc_toolchain_config( + name = "cc-compiler-local-darwin", + builtin_include_directories = [ + "/dt7/usr/include/c++/7", + "/dt7/usr/include/c++/7/x86_64-pc-linux-gnu", + "/dt7/usr/include/c++/7/backward", + "/dt7/usr/lib/gcc/x86_64-pc-linux-gnu/7/include", + "/dt7/usr/lib/gcc/x86_64-pc-linux-gnu/7/include-fixed", + "/dt7/usr/include", + "/usr/local/cuda-11/targets/x86_64-linux/include", + "/usr/local/cuda-11/include", + "/usr/local/cuda-11/extras/CUPTI/include", + "/usr/include", + ], + cpu = "darwin", + extra_no_canonical_prefixes_flags = ["-fno-canonical-system-headers"], + host_compiler_path = "clang/bin/crosstool_wrapper_driver_is_not_gcc", + host_compiler_prefix = "/usr/bin", + host_compiler_warnings = [], + host_unfiltered_compile_flags = [], + linker_bin_path = "/usr/bin", +) + +cc_toolchain( + name = "cc-compiler-windows", + all_files = ":windows_msvc_wrapper_files", + ar_files = ":windows_msvc_wrapper_files", + as_files = ":windows_msvc_wrapper_files", + compiler_files = ":windows_msvc_wrapper_files", + dwp_files = ":empty", + linker_files = ":windows_msvc_wrapper_files", + objcopy_files = ":empty", + strip_files = ":empty", + supports_param_files = 1, + toolchain_config = ":cc-compiler-windows-config", + toolchain_identifier = "local_windows", +) + +cc_toolchain_config( + name = "cc-compiler-windows-config", + builtin_include_directories = [ + "/dt7/usr/include/c++/7", + "/dt7/usr/include/c++/7/x86_64-pc-linux-gnu", + "/dt7/usr/include/c++/7/backward", + "/dt7/usr/lib/gcc/x86_64-pc-linux-gnu/7/include", + "/dt7/usr/lib/gcc/x86_64-pc-linux-gnu/7/include-fixed", + "/dt7/usr/include", + "/usr/local/cuda-11/targets/x86_64-linux/include", + "/usr/local/cuda-11/include", + "/usr/local/cuda-11/extras/CUPTI/include", + "/usr/include", + ], + cpu = "x64_windows", + msvc_cl_path = "msvc_not_used", + msvc_env_include = "msvc_not_used", + msvc_env_lib = "msvc_not_used", + msvc_env_path = "msvc_not_used", + msvc_env_tmp = "msvc_not_used", + msvc_lib_path = "msvc_not_used", + msvc_link_path = "msvc_not_used", + msvc_ml_path = "msvc_not_used", +) + +filegroup( + name = "empty", + srcs = [], +) + +filegroup( + name = "crosstool_wrapper_driver_is_not_gcc", + srcs = ["clang/bin/crosstool_wrapper_driver_is_not_gcc"], +) + +filegroup( + name = "windows_msvc_wrapper_files", + srcs = glob(["windows/msvc_*"]), +) diff --git a/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda11/cc_toolchain_config.bzl b/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda11/cc_toolchain_config.bzl new file mode 100755 index 00000000000..70197628811 --- /dev/null +++ b/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda11/cc_toolchain_config.bzl @@ -0,0 +1,1516 @@ +"""cc_toolchain_config rule for configuring CUDA toolchains on Linux, Mac, and Windows.""" + +load( + "@bazel_tools//tools/cpp:cc_toolchain_config_lib.bzl", + "action_config", + "env_entry", + "env_set", + "feature", + "feature_set", + "flag_group", + "flag_set", + "tool", + "tool_path", + "variable_with_value", +) +load( + "@bazel_tools//tools/build_defs/cc:action_names.bzl", + "ASSEMBLE_ACTION_NAME", + "CC_FLAGS_MAKE_VARIABLE_ACTION_NAME", + "CLIF_MATCH_ACTION_NAME", + "CPP_COMPILE_ACTION_NAME", + "CPP_HEADER_PARSING_ACTION_NAME", + "CPP_LINK_DYNAMIC_LIBRARY_ACTION_NAME", + "CPP_LINK_EXECUTABLE_ACTION_NAME", + "CPP_LINK_NODEPS_DYNAMIC_LIBRARY_ACTION_NAME", + "CPP_LINK_STATIC_LIBRARY_ACTION_NAME", + "CPP_MODULE_CODEGEN_ACTION_NAME", + "CPP_MODULE_COMPILE_ACTION_NAME", + "C_COMPILE_ACTION_NAME", + "LINKSTAMP_COMPILE_ACTION_NAME", + "LTO_BACKEND_ACTION_NAME", + "LTO_INDEXING_ACTION_NAME", + "OBJCPP_COMPILE_ACTION_NAME", + "OBJCPP_EXECUTABLE_ACTION_NAME", + "OBJC_ARCHIVE_ACTION_NAME", + "OBJC_COMPILE_ACTION_NAME", + "OBJC_EXECUTABLE_ACTION_NAME", + "OBJC_FULLY_LINK_ACTION_NAME", + "PREPROCESS_ASSEMBLE_ACTION_NAME", + "STRIP_ACTION_NAME", +) + +ACTION_NAMES = struct( + c_compile = C_COMPILE_ACTION_NAME, + cpp_compile = CPP_COMPILE_ACTION_NAME, + linkstamp_compile = LINKSTAMP_COMPILE_ACTION_NAME, + cc_flags_make_variable = CC_FLAGS_MAKE_VARIABLE_ACTION_NAME, + cpp_module_codegen = CPP_MODULE_CODEGEN_ACTION_NAME, + cpp_header_parsing = CPP_HEADER_PARSING_ACTION_NAME, + cpp_module_compile = CPP_MODULE_COMPILE_ACTION_NAME, + assemble = ASSEMBLE_ACTION_NAME, + preprocess_assemble = PREPROCESS_ASSEMBLE_ACTION_NAME, + lto_indexing = LTO_INDEXING_ACTION_NAME, + lto_backend = LTO_BACKEND_ACTION_NAME, + cpp_link_executable = CPP_LINK_EXECUTABLE_ACTION_NAME, + cpp_link_dynamic_library = CPP_LINK_DYNAMIC_LIBRARY_ACTION_NAME, + cpp_link_nodeps_dynamic_library = CPP_LINK_NODEPS_DYNAMIC_LIBRARY_ACTION_NAME, + cpp_link_static_library = CPP_LINK_STATIC_LIBRARY_ACTION_NAME, + strip = STRIP_ACTION_NAME, + objc_archive = OBJC_ARCHIVE_ACTION_NAME, + objc_compile = OBJC_COMPILE_ACTION_NAME, + objc_executable = OBJC_EXECUTABLE_ACTION_NAME, + objc_fully_link = OBJC_FULLY_LINK_ACTION_NAME, + objcpp_compile = OBJCPP_COMPILE_ACTION_NAME, + objcpp_executable = OBJCPP_EXECUTABLE_ACTION_NAME, + clif_match = CLIF_MATCH_ACTION_NAME, + objcopy_embed_data = "objcopy_embed_data", + ld_embed_data = "ld_embed_data", +) + +def _impl(ctx): + if (ctx.attr.cpu == "darwin"): + toolchain_identifier = "local_darwin" + elif (ctx.attr.cpu == "local"): + toolchain_identifier = "local_linux" + elif (ctx.attr.cpu == "x64_windows"): + toolchain_identifier = "local_windows" + else: + fail("Unreachable") + + host_system_name = "local" + + target_system_name = "local" + + if (ctx.attr.cpu == "darwin"): + target_cpu = "darwin" + elif (ctx.attr.cpu == "local"): + target_cpu = "local" + elif (ctx.attr.cpu == "x64_windows"): + target_cpu = "x64_windows" + else: + fail("Unreachable") + + if (ctx.attr.cpu == "local"): + target_libc = "local" + elif (ctx.attr.cpu == "darwin"): + target_libc = "macosx" + elif (ctx.attr.cpu == "x64_windows"): + target_libc = "msvcrt" + else: + fail("Unreachable") + + if (ctx.attr.cpu == "darwin" or + ctx.attr.cpu == "local"): + compiler = "compiler" + elif (ctx.attr.cpu == "x64_windows"): + compiler = "msvc-cl" + else: + fail("Unreachable") + + abi_version = "local" + + abi_libc_version = "local" + + cc_target_os = None + + builtin_sysroot = ctx.attr.builtin_sysroot + + all_link_actions = [ + ACTION_NAMES.cpp_link_executable, + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ] + + cpp_link_dynamic_library_action = action_config( + action_name = ACTION_NAMES.cpp_link_dynamic_library, + implies = [ + "nologo", + "shared_flag", + "linkstamps", + "output_execpath_flags", + "input_param_flags", + "user_link_flags", + "linker_subsystem_flag", + "linker_param_file", + "msvc_env", + "no_stripping", + "has_configured_linker_path", + "def_file", + ], + tools = [tool(path = ctx.attr.msvc_link_path)], + ) + + cpp_link_nodeps_dynamic_library_action = action_config( + action_name = ACTION_NAMES.cpp_link_nodeps_dynamic_library, + implies = [ + "nologo", + "shared_flag", + "linkstamps", + "output_execpath_flags", + "input_param_flags", + "user_link_flags", + "linker_subsystem_flag", + "linker_param_file", + "msvc_env", + "no_stripping", + "has_configured_linker_path", + "def_file", + ], + tools = [tool(path = ctx.attr.msvc_link_path)], + ) + + cpp_link_static_library_action = action_config( + action_name = ACTION_NAMES.cpp_link_static_library, + implies = [ + "nologo", + "archiver_flags", + "input_param_flags", + "linker_param_file", + "msvc_env", + ], + tools = [tool(path = ctx.attr.msvc_lib_path)], + ) + + assemble_action = action_config( + action_name = ACTION_NAMES.assemble, + implies = [ + "compiler_input_flags", + "compiler_output_flags", + "nologo", + "msvc_env", + "sysroot", + ], + tools = [tool(path = ctx.attr.msvc_ml_path)], + ) + + preprocess_assemble_action = action_config( + action_name = ACTION_NAMES.preprocess_assemble, + implies = [ + "compiler_input_flags", + "compiler_output_flags", + "nologo", + "msvc_env", + "sysroot", + ], + tools = [tool(path = ctx.attr.msvc_ml_path)], + ) + + c_compile_action = action_config( + action_name = ACTION_NAMES.c_compile, + implies = [ + "compiler_input_flags", + "compiler_output_flags", + "nologo", + "msvc_env", + "parse_showincludes", + "user_compile_flags", + "sysroot", + "unfiltered_compile_flags", + ], + tools = [tool(path = ctx.attr.msvc_cl_path)], + ) + + cpp_compile_action = action_config( + action_name = ACTION_NAMES.cpp_compile, + implies = [ + "compiler_input_flags", + "compiler_output_flags", + "nologo", + "msvc_env", + "parse_showincludes", + "user_compile_flags", + "sysroot", + "unfiltered_compile_flags", + ], + tools = [tool(path = ctx.attr.msvc_cl_path)], + ) + + cpp_link_executable_action = action_config( + action_name = ACTION_NAMES.cpp_link_executable, + implies = [ + "nologo", + "linkstamps", + "output_execpath_flags", + "input_param_flags", + "user_link_flags", + "linker_subsystem_flag", + "linker_param_file", + "msvc_env", + "no_stripping", + ], + tools = [tool(path = ctx.attr.msvc_link_path)], + ) + + if (ctx.attr.cpu == "darwin" or + ctx.attr.cpu == "local"): + action_configs = [] + elif (ctx.attr.cpu == "x64_windows"): + action_configs = [ + assemble_action, + preprocess_assemble_action, + c_compile_action, + cpp_compile_action, + cpp_link_executable_action, + cpp_link_dynamic_library_action, + cpp_link_nodeps_dynamic_library_action, + cpp_link_static_library_action, + ] + else: + fail("Unreachable") + + no_windows_export_all_symbols_feature = feature(name = "no_windows_export_all_symbols") + + pic_feature = feature( + name = "pic", + enabled = True, + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [ + flag_group(flags = ["-fPIC"], expand_if_available = "pic"), + flag_group( + flags = ["-fPIE"], + expand_if_not_available = "pic", + ), + ], + ), + ], + ) + + preprocessor_defines_feature = feature( + name = "preprocessor_defines", + enabled = True, + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ], + flag_groups = [ + flag_group( + flags = ["/D%{preprocessor_defines}"], + iterate_over = "preprocessor_defines", + ), + ], + ), + ], + ) + + generate_pdb_file_feature = feature( + name = "generate_pdb_file", + requires = [ + feature_set(features = ["dbg"]), + feature_set(features = ["fastbuild"]), + ], + ) + + linkstamps_feature = feature( + name = "linkstamps", + flag_sets = [ + flag_set( + actions = all_link_actions, + flag_groups = [ + flag_group( + flags = ["%{linkstamp_paths}"], + iterate_over = "linkstamp_paths", + expand_if_available = "linkstamp_paths", + ), + ], + ), + ], + ) + + unfiltered_compile_flags_feature = feature( + name = "unfiltered_compile_flags", + flag_sets = ([ + flag_set( + actions = [ + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ], + flag_groups = [ + flag_group( + flags = ctx.attr.host_unfiltered_compile_flags, + ), + ], + ), + ] if ctx.attr.host_unfiltered_compile_flags else []), + ) + + determinism_feature = feature( + name = "determinism", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [ + flag_group( + flags = [ + "-Wno-builtin-macro-redefined", + "-D__DATE__=\"redacted\"", + "-D__TIMESTAMP__=\"redacted\"", + "-D__TIME__=\"redacted\"", + ], + ), + ], + ), + ], + ) + + nologo_feature = feature( + name = "nologo", + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.cpp_link_executable, + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ACTION_NAMES.cpp_link_static_library, + ], + flag_groups = [flag_group(flags = ["/nologo"])], + ), + ], + ) + + supports_pic_feature = feature(name = "supports_pic", enabled = True) + + output_execpath_flags_feature = feature( + name = "output_execpath_flags", + flag_sets = [ + flag_set( + actions = all_link_actions, + flag_groups = [ + flag_group( + flags = ["/OUT:%{output_execpath}"], + expand_if_available = "output_execpath", + ), + ], + ), + ], + ) + + default_link_flags_feature = feature( + name = "default_link_flags", + enabled = True, + flag_sets = [ + flag_set( + actions = all_link_actions, + flag_groups = [flag_group(flags = ["/MACHINE:X64"])], + ), + ], + ) + + if (ctx.attr.cpu == "local"): + hardening_feature = feature( + name = "hardening", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [ + flag_group( + flags = [ + "-U_FORTIFY_SOURCE", + "-D_FORTIFY_SOURCE=1", + "-fstack-protector", + ], + ), + ], + ), + flag_set( + actions = [ + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ], + flag_groups = [flag_group(flags = ["-Wl,-z,relro,-z,now"])], + ), + flag_set( + actions = [ACTION_NAMES.cpp_link_executable], + flag_groups = [flag_group(flags = ["-pie", "-Wl,-z,relro,-z,now"])], + ), + ], + ) + elif (ctx.attr.cpu == "darwin"): + hardening_feature = feature( + name = "hardening", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [ + flag_group( + flags = [ + "-U_FORTIFY_SOURCE", + "-D_FORTIFY_SOURCE=1", + "-fstack-protector", + ], + ), + ], + ), + flag_set( + actions = [ACTION_NAMES.cpp_link_executable], + flag_groups = [flag_group(flags = ["-pie"])], + ), + ], + ) + else: + hardening_feature = None + + supports_dynamic_linker_feature = feature(name = "supports_dynamic_linker", enabled = True) + + targets_windows_feature = feature( + name = "targets_windows", + enabled = True, + implies = ["copy_dynamic_libraries_to_binary"], + ) + + msvc_env_feature = feature( + name = "msvc_env", + env_sets = [ + env_set( + actions = [ + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.cpp_link_executable, + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ACTION_NAMES.cpp_link_static_library, + ], + env_entries = [ + env_entry(key = "PATH", value = ctx.attr.msvc_env_path), + env_entry( + key = "INCLUDE", + value = ctx.attr.msvc_env_include, + ), + env_entry(key = "LIB", value = ctx.attr.msvc_env_lib), + env_entry(key = "TMP", value = ctx.attr.msvc_env_tmp), + env_entry(key = "TEMP", value = ctx.attr.msvc_env_tmp), + ], + ), + ], + ) + + linker_subsystem_flag_feature = feature( + name = "linker_subsystem_flag", + flag_sets = [ + flag_set( + actions = all_link_actions, + flag_groups = [flag_group(flags = ["/SUBSYSTEM:CONSOLE"])], + ), + ], + ) + + dynamic_link_msvcrt_no_debug_feature = feature( + name = "dynamic_link_msvcrt_no_debug", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["/MD"])], + ), + flag_set( + actions = all_link_actions, + flag_groups = [flag_group(flags = ["/DEFAULTLIB:msvcrt.lib"])], + ), + ], + requires = [ + feature_set(features = ["fastbuild"]), + feature_set(features = ["opt"]), + ], + ) + + warnings_feature = feature( + name = "warnings", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [ + flag_group( + flags = ["-Wall"] + ctx.attr.host_compiler_warnings, + ), + ], + ), + ], + ) + + dynamic_link_msvcrt_debug_feature = feature( + name = "dynamic_link_msvcrt_debug", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["/MDd"])], + ), + flag_set( + actions = all_link_actions, + flag_groups = [flag_group(flags = ["/DEFAULTLIB:msvcrtd.lib"])], + ), + ], + requires = [feature_set(features = ["dbg"])], + ) + + compiler_output_flags_feature = feature( + name = "compiler_output_flags", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.assemble], + flag_groups = [ + flag_group( + flag_groups = [ + flag_group( + flags = ["/Fo%{output_file}", "/Zi"], + expand_if_not_available = "output_preprocess_file", + ), + ], + expand_if_available = "output_file", + expand_if_not_available = "output_assembly_file", + ), + ], + ), + flag_set( + actions = [ + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ], + flag_groups = [ + flag_group( + flag_groups = [ + flag_group( + flags = ["/Fo%{output_file}"], + expand_if_not_available = "output_preprocess_file", + ), + ], + expand_if_available = "output_file", + expand_if_not_available = "output_assembly_file", + ), + flag_group( + flag_groups = [ + flag_group( + flags = ["/Fa%{output_file}"], + expand_if_available = "output_assembly_file", + ), + ], + expand_if_available = "output_file", + ), + flag_group( + flag_groups = [ + flag_group( + flags = ["/P", "/Fi%{output_file}"], + expand_if_available = "output_preprocess_file", + ), + ], + expand_if_available = "output_file", + ), + ], + ), + ], + ) + + default_compile_flags_feature = feature( + name = "default_compile_flags", + enabled = True, + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.linkstamp_compile, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ACTION_NAMES.lto_backend, + ACTION_NAMES.clif_match, + ], + flag_groups = [ + flag_group( + flags = [ + "/DCOMPILER_MSVC", + "/DNOMINMAX", + "/D_WIN32_WINNT=0x0600", + "/D_CRT_SECURE_NO_DEPRECATE", + "/D_CRT_SECURE_NO_WARNINGS", + "/D_SILENCE_STDEXT_HASH_DEPRECATION_WARNINGS", + "/bigobj", + "/Zm500", + "/J", + "/Gy", + "/GF", + "/EHsc", + "/wd4351", + "/wd4291", + "/wd4250", + "/wd4996", + ], + ), + ], + ), + ], + ) + + static_link_msvcrt_debug_feature = feature( + name = "static_link_msvcrt_debug", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["/MTd"])], + ), + flag_set( + actions = all_link_actions, + flag_groups = [flag_group(flags = ["/DEFAULTLIB:libcmtd.lib"])], + ), + ], + requires = [feature_set(features = ["dbg"])], + ) + + static_link_msvcrt_feature = feature(name = "static_link_msvcrt") + + if (ctx.attr.cpu == "darwin" or + ctx.attr.cpu == "local"): + dbg_feature = feature( + name = "dbg", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["-g"])], + ), + ], + implies = ["common"], + ) + elif (ctx.attr.cpu == "x64_windows"): + dbg_feature = feature( + name = "dbg", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["/Od", "/Z7", "/DDEBUG"])], + ), + flag_set( + actions = all_link_actions, + flag_groups = [flag_group(flags = ["/DEBUG:FULL", "/INCREMENTAL:NO"])], + ), + ], + implies = ["generate_pdb_file"], + ) + else: + dbg_feature = None + + undefined_dynamic_feature = feature( + name = "undefined-dynamic", + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ACTION_NAMES.cpp_link_executable, + ], + flag_groups = [flag_group(flags = ["-undefined", "dynamic_lookup"])], + ), + ], + ) + + parse_showincludes_feature = feature( + name = "parse_showincludes", + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_header_parsing, + ], + flag_groups = [flag_group(flags = ["/showIncludes"])], + ), + ], + ) + + linker_param_file_feature = feature( + name = "linker_param_file", + flag_sets = [ + flag_set( + actions = all_link_actions + + [ACTION_NAMES.cpp_link_static_library], + flag_groups = [ + flag_group( + flags = ["@%{linker_param_file}"], + expand_if_available = "linker_param_file", + ), + ], + ), + ], + ) + + static_link_msvcrt_no_debug_feature = feature( + name = "static_link_msvcrt_no_debug", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["/MT"])], + ), + flag_set( + actions = all_link_actions, + flag_groups = [flag_group(flags = ["/DEFAULTLIB:libcmt.lib"])], + ), + ], + requires = [ + feature_set(features = ["fastbuild"]), + feature_set(features = ["opt"]), + ], + ) + + supports_interface_shared_libraries_feature = feature( + name = "supports_interface_shared_libraries", + enabled = True, + ) + + disable_assertions_feature = feature( + name = "disable-assertions", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["-DNDEBUG"])], + ), + ], + ) + + if (ctx.attr.cpu == "x64_windows"): + fastbuild_feature = feature( + name = "fastbuild", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["/Od", "/Z7", "/DDEBUG"])], + ), + flag_set( + actions = all_link_actions, + flag_groups = [ + flag_group(flags = ["/DEBUG:FASTLINK", "/INCREMENTAL:NO"]), + ], + ), + ], + implies = ["generate_pdb_file"], + ) + elif (ctx.attr.cpu == "darwin" or + ctx.attr.cpu == "local"): + fastbuild_feature = feature(name = "fastbuild", implies = ["common"]) + else: + fastbuild_feature = None + + user_compile_flags_feature = feature( + name = "user_compile_flags", + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ], + flag_groups = [ + flag_group( + flags = ["%{user_compile_flags}"], + iterate_over = "user_compile_flags", + expand_if_available = "user_compile_flags", + ), + ], + ), + ], + ) + + compiler_input_flags_feature = feature( + name = "compiler_input_flags", + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ], + flag_groups = [ + flag_group( + flags = ["/c", "%{source_file}"], + expand_if_available = "source_file", + ), + ], + ), + ], + ) + + no_legacy_features_feature = feature(name = "no_legacy_features") + + archiver_flags_feature = feature( + name = "archiver_flags", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.cpp_link_static_library], + flag_groups = [ + flag_group( + flags = ["/OUT:%{output_execpath}"], + expand_if_available = "output_execpath", + ), + ], + ), + ], + ) + + redirector_feature = feature( + name = "redirector", + enabled = True, + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ], + flag_groups = [ + flag_group( + flags = [ + "-B", + "external/local_config_cuda/crosstool/windows/msvc_wrapper_for_nvcc.py", + ], + ), + ], + ), + ], + ) + + linker_bin_path_feature = feature( + name = "linker-bin-path", + flag_sets = [ + flag_set( + actions = all_link_actions, + flag_groups = [flag_group(flags = ["-B" + ctx.attr.linker_bin_path])], + ), + ], + ) + + if (ctx.attr.cpu == "local"): + opt_feature = feature( + name = "opt", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [ + flag_group( + flags = ["-g0", "-O2", "-ffunction-sections", "-fdata-sections"], + ), + ], + ), + flag_set( + actions = [ + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ACTION_NAMES.cpp_link_executable, + ], + flag_groups = [flag_group(flags = ["-Wl,--gc-sections"])], + ), + ], + implies = ["common", "disable-assertions"], + ) + elif (ctx.attr.cpu == "darwin"): + opt_feature = feature( + name = "opt", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [ + flag_group( + flags = ["-g0", "-O2", "-ffunction-sections", "-fdata-sections"], + ), + ], + ), + ], + implies = ["common", "disable-assertions"], + ) + elif (ctx.attr.cpu == "x64_windows"): + opt_feature = feature( + name = "opt", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["/O2", "/DNDEBUG"])], + ), + ], + ) + else: + opt_feature = None + + include_paths_feature = feature( + name = "include_paths", + enabled = True, + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ], + flag_groups = [ + flag_group( + flags = ["/I%{quote_include_paths}"], + iterate_over = "quote_include_paths", + ), + flag_group( + flags = ["/I%{include_paths}"], + iterate_over = "include_paths", + ), + flag_group( + flags = ["/I%{system_include_paths}"], + iterate_over = "system_include_paths", + ), + ], + ), + ], + ) + + shared_flag_feature = feature( + name = "shared_flag", + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ], + flag_groups = [flag_group(flags = ["/DLL"])], + ), + ], + ) + + windows_export_all_symbols_feature = feature(name = "windows_export_all_symbols") + + frame_pointer_feature = feature( + name = "frame-pointer", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["-fno-omit-frame-pointer"])], + ), + ], + ) + + build_id_feature = feature( + name = "build-id", + flag_sets = [ + flag_set( + actions = all_link_actions, + flag_groups = [ + flag_group( + flags = ["-Wl,--build-id=md5", "-Wl,--hash-style=gnu"], + ), + ], + ), + ], + ) + + sysroot_feature = feature( + name = "sysroot", + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ACTION_NAMES.cpp_link_executable, + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ], + flag_groups = [ + flag_group( + flags = ["--sysroot=%{sysroot}"], + iterate_over = "sysroot", + expand_if_available = "sysroot", + ), + ], + ), + ], + ) + + cuda_path_feature = feature( + name = "cuda_path", + enabled = True, + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ACTION_NAMES.cpp_link_executable, + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ], + flag_groups = [ + flag_group( + flags = ["--cuda-path=" + ctx.attr.cuda_path], + ), + ], + ), + ], + ) + + def_file_feature = feature( + name = "def_file", + flag_sets = [ + flag_set( + actions = all_link_actions, + flag_groups = [ + flag_group( + flags = ["/DEF:%{def_file_path}", "/ignore:4070"], + expand_if_available = "def_file_path", + ), + ], + ), + ], + ) + + if (ctx.attr.cpu == "darwin"): + stdlib_feature = feature( + name = "stdlib", + flag_sets = [ + flag_set( + actions = all_link_actions, + flag_groups = [flag_group(flags = ["-lc++"])], + ), + ], + ) + elif (ctx.attr.cpu == "local"): + stdlib_feature = feature( + name = "stdlib", + flag_sets = [ + flag_set( + actions = all_link_actions, + flag_groups = [flag_group(flags = ["-lstdc++"])], + ), + ], + ) + else: + stdlib_feature = None + + no_stripping_feature = feature(name = "no_stripping") + + alwayslink_feature = feature( + name = "alwayslink", + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ACTION_NAMES.cpp_link_executable, + ], + flag_groups = [flag_group(flags = ["-Wl,-no-as-needed"])], + ), + ], + ) + + input_param_flags_feature = feature( + name = "input_param_flags", + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ], + flag_groups = [ + flag_group( + flags = ["/IMPLIB:%{interface_library_output_path}"], + expand_if_available = "interface_library_output_path", + ), + ], + ), + flag_set( + actions = all_link_actions + + [ACTION_NAMES.cpp_link_static_library], + flag_groups = [ + flag_group( + iterate_over = "libraries_to_link", + flag_groups = [ + flag_group( + iterate_over = "libraries_to_link.object_files", + flag_groups = [flag_group(flags = ["%{libraries_to_link.object_files}"])], + expand_if_equal = variable_with_value( + name = "libraries_to_link.type", + value = "object_file_group", + ), + ), + flag_group( + flag_groups = [flag_group(flags = ["%{libraries_to_link.name}"])], + expand_if_equal = variable_with_value( + name = "libraries_to_link.type", + value = "object_file", + ), + ), + flag_group( + flag_groups = [flag_group(flags = ["%{libraries_to_link.name}"])], + expand_if_equal = variable_with_value( + name = "libraries_to_link.type", + value = "interface_library", + ), + ), + flag_group( + flag_groups = [ + flag_group( + flags = ["%{libraries_to_link.name}"], + expand_if_false = "libraries_to_link.is_whole_archive", + ), + flag_group( + flags = ["/WHOLEARCHIVE:%{libraries_to_link.name}"], + expand_if_true = "libraries_to_link.is_whole_archive", + ), + ], + expand_if_equal = variable_with_value( + name = "libraries_to_link.type", + value = "static_library", + ), + ), + ], + expand_if_available = "libraries_to_link", + ), + ], + ), + ], + ) + + if (ctx.attr.cpu == "local"): + no_canonical_prefixes_feature = feature( + name = "no-canonical-prefixes", + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_link_executable, + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ], + flag_groups = [ + flag_group( + flags = [ + "-no-canonical-prefixes", + ] + ctx.attr.extra_no_canonical_prefixes_flags, + ), + ], + ), + ], + ) + elif (ctx.attr.cpu == "darwin"): + no_canonical_prefixes_feature = feature( + name = "no-canonical-prefixes", + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_link_executable, + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ], + flag_groups = [flag_group(flags = ["-no-canonical-prefixes"])], + ), + ], + ) + else: + no_canonical_prefixes_feature = None + + has_configured_linker_path_feature = feature(name = "has_configured_linker_path") + + copy_dynamic_libraries_to_binary_feature = feature(name = "copy_dynamic_libraries_to_binary") + + user_link_flags_feature = feature( + name = "user_link_flags", + flag_sets = [ + flag_set( + actions = all_link_actions, + flag_groups = [ + flag_group( + flags = ["%{user_link_flags}"], + iterate_over = "user_link_flags", + expand_if_available = "user_link_flags", + ), + ], + ), + ], + ) + + cpp11_feature = feature( + name = "c++11", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["-std=c++11"])], + ), + ], + ) + + if (ctx.attr.cpu == "local"): + common_feature = feature( + name = "common", + implies = [ + "stdlib", + "c++11", + "determinism", + "alwayslink", + "hardening", + "warnings", + "frame-pointer", + "build-id", + "no-canonical-prefixes", + "linker-bin-path", + ], + ) + elif (ctx.attr.cpu == "darwin"): + common_feature = feature( + name = "common", + implies = [ + "stdlib", + "c++11", + "determinism", + "hardening", + "warnings", + "frame-pointer", + "no-canonical-prefixes", + "linker-bin-path", + "undefined-dynamic", + ], + ) + else: + common_feature = None + + if (ctx.attr.cpu == "local"): + features = [ + cpp11_feature, + stdlib_feature, + determinism_feature, + alwayslink_feature, + pic_feature, + hardening_feature, + warnings_feature, + frame_pointer_feature, + build_id_feature, + no_canonical_prefixes_feature, + disable_assertions_feature, + linker_bin_path_feature, + common_feature, + opt_feature, + fastbuild_feature, + dbg_feature, + supports_dynamic_linker_feature, + supports_pic_feature, + ] + if ctx.attr.cuda_path: + features.append(cuda_path_feature) + elif (ctx.attr.cpu == "darwin"): + features = [ + cpp11_feature, + stdlib_feature, + determinism_feature, + pic_feature, + hardening_feature, + warnings_feature, + frame_pointer_feature, + no_canonical_prefixes_feature, + disable_assertions_feature, + linker_bin_path_feature, + undefined_dynamic_feature, + common_feature, + opt_feature, + fastbuild_feature, + dbg_feature, + supports_dynamic_linker_feature, + supports_pic_feature, + ] + elif (ctx.attr.cpu == "x64_windows"): + features = [ + no_legacy_features_feature, + redirector_feature, + nologo_feature, + has_configured_linker_path_feature, + no_stripping_feature, + targets_windows_feature, + copy_dynamic_libraries_to_binary_feature, + default_compile_flags_feature, + msvc_env_feature, + include_paths_feature, + preprocessor_defines_feature, + parse_showincludes_feature, + generate_pdb_file_feature, + shared_flag_feature, + linkstamps_feature, + output_execpath_flags_feature, + archiver_flags_feature, + input_param_flags_feature, + linker_subsystem_flag_feature, + user_link_flags_feature, + default_link_flags_feature, + linker_param_file_feature, + static_link_msvcrt_feature, + static_link_msvcrt_no_debug_feature, + dynamic_link_msvcrt_no_debug_feature, + static_link_msvcrt_debug_feature, + dynamic_link_msvcrt_debug_feature, + dbg_feature, + fastbuild_feature, + opt_feature, + user_compile_flags_feature, + sysroot_feature, + unfiltered_compile_flags_feature, + compiler_output_flags_feature, + compiler_input_flags_feature, + def_file_feature, + windows_export_all_symbols_feature, + no_windows_export_all_symbols_feature, + supports_dynamic_linker_feature, + supports_interface_shared_libraries_feature, + ] + else: + fail("Unreachable") + + cxx_builtin_include_directories = ctx.attr.builtin_include_directories + + if (ctx.attr.cpu == "x64_windows"): + tool_paths = [ + tool_path(name = "ar", path = ctx.attr.msvc_lib_path), + tool_path(name = "ml", path = ctx.attr.msvc_ml_path), + tool_path(name = "cpp", path = ctx.attr.msvc_cl_path), + tool_path(name = "gcc", path = ctx.attr.msvc_cl_path), + tool_path(name = "gcov", path = "wrapper/bin/msvc_nop.bat"), + tool_path(name = "ld", path = ctx.attr.msvc_link_path), + tool_path(name = "nm", path = "wrapper/bin/msvc_nop.bat"), + tool_path( + name = "objcopy", + path = "wrapper/bin/msvc_nop.bat", + ), + tool_path( + name = "objdump", + path = "wrapper/bin/msvc_nop.bat", + ), + tool_path( + name = "strip", + path = "wrapper/bin/msvc_nop.bat", + ), + ] + elif (ctx.attr.cpu == "local"): + tool_paths = [ + tool_path(name = "gcc", path = ctx.attr.host_compiler_path), + tool_path(name = "ar", path = ctx.attr.host_compiler_prefix + "/ar"), + tool_path(name = "compat-ld", path = ctx.attr.host_compiler_prefix + "/ld"), + tool_path(name = "cpp", path = ctx.attr.host_compiler_prefix + "/cpp"), + tool_path(name = "dwp", path = ctx.attr.host_compiler_prefix + "/dwp"), + tool_path(name = "gcov", path = ctx.attr.host_compiler_prefix + "/gcov"), + tool_path(name = "ld", path = ctx.attr.host_compiler_prefix + "/ld"), + tool_path(name = "nm", path = ctx.attr.host_compiler_prefix + "/nm"), + tool_path(name = "objcopy", path = ctx.attr.host_compiler_prefix + "/objcopy"), + tool_path(name = "objdump", path = ctx.attr.host_compiler_prefix + "/objdump"), + tool_path(name = "strip", path = ctx.attr.host_compiler_prefix + "/strip"), + ] + elif (ctx.attr.cpu == "darwin"): + tool_paths = [ + tool_path(name = "gcc", path = ctx.attr.host_compiler_path), + tool_path(name = "ar", path = ctx.attr.host_compiler_prefix + "/libtool"), + tool_path(name = "compat-ld", path = ctx.attr.host_compiler_prefix + "/ld"), + tool_path(name = "cpp", path = ctx.attr.host_compiler_prefix + "/cpp"), + tool_path(name = "dwp", path = ctx.attr.host_compiler_prefix + "/dwp"), + tool_path(name = "gcov", path = ctx.attr.host_compiler_prefix + "/gcov"), + tool_path(name = "ld", path = ctx.attr.host_compiler_prefix + "/ld"), + tool_path(name = "nm", path = ctx.attr.host_compiler_prefix + "/nm"), + tool_path(name = "objcopy", path = ctx.attr.host_compiler_prefix + "/objcopy"), + tool_path(name = "objdump", path = ctx.attr.host_compiler_prefix + "/objdump"), + tool_path(name = "strip", path = ctx.attr.host_compiler_prefix + "/strip"), + ] + else: + fail("Unreachable") + + out = ctx.actions.declare_file(ctx.label.name) + ctx.actions.write(out, "Fake executable") + return [ + cc_common.create_cc_toolchain_config_info( + ctx = ctx, + features = features, + action_configs = action_configs, + artifact_name_patterns = [], + cxx_builtin_include_directories = cxx_builtin_include_directories, + toolchain_identifier = toolchain_identifier, + host_system_name = host_system_name, + target_system_name = target_system_name, + target_cpu = target_cpu, + target_libc = target_libc, + compiler = compiler, + abi_version = abi_version, + abi_libc_version = abi_libc_version, + tool_paths = tool_paths, + make_variables = [], + builtin_sysroot = builtin_sysroot, + cc_target_os = cc_target_os, + ), + DefaultInfo( + executable = out, + ), + ] + +cc_toolchain_config = rule( + implementation = _impl, + attrs = { + "cpu": attr.string(mandatory = True, values = ["darwin", "local", "x64_windows"]), + "builtin_include_directories": attr.string_list(), + "extra_no_canonical_prefixes_flags": attr.string_list(), + "host_compiler_path": attr.string(), + "host_compiler_prefix": attr.string(), + "host_compiler_warnings": attr.string_list(), + "host_unfiltered_compile_flags": attr.string_list(), + "linker_bin_path": attr.string(), + "builtin_sysroot": attr.string(), + "cuda_path": attr.string(), + "msvc_cl_path": attr.string(default = "msvc_not_used"), + "msvc_env_include": attr.string(default = "msvc_not_used"), + "msvc_env_lib": attr.string(default = "msvc_not_used"), + "msvc_env_path": attr.string(default = "msvc_not_used"), + "msvc_env_tmp": attr.string(default = "msvc_not_used"), + "msvc_lib_path": attr.string(default = "msvc_not_used"), + "msvc_link_path": attr.string(default = "msvc_not_used"), + "msvc_ml_path": attr.string(default = "msvc_not_used"), + }, + provides = [CcToolchainConfigInfo], + executable = True, +) diff --git a/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda11/clang/bin/crosstool_wrapper_driver_is_not_gcc b/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda11/clang/bin/crosstool_wrapper_driver_is_not_gcc new file mode 100755 index 00000000000..07c85a38229 --- /dev/null +++ b/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda11/clang/bin/crosstool_wrapper_driver_is_not_gcc @@ -0,0 +1,289 @@ +#!/usr/bin/env python +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Crosstool wrapper for compiling CUDA programs. + +SYNOPSIS: + crosstool_wrapper_is_not_gcc [options passed in by cc_library() + or cc_binary() rule] + +DESCRIPTION: + This script is expected to be called by the cc_library() or cc_binary() bazel + rules. When the option "-x cuda" is present in the list of arguments passed + to this script, it invokes the nvcc CUDA compiler. Most arguments are passed + as is as a string to --compiler-options of nvcc. When "-x cuda" is not + present, this wrapper invokes hybrid_driver_is_not_gcc with the input + arguments as is. + +NOTES: + Changes to the contents of this file must be propagated from + //third_party/gpus/crosstool/crosstool_wrapper_is_not_gcc to + //third_party/gpus/crosstool/v*/*/clang/bin/crosstool_wrapper_is_not_gcc +""" + +from __future__ import print_function + +__author__ = 'keveman@google.com (Manjunath Kudlur)' + +from argparse import ArgumentParser +import os +import subprocess +import re +import sys +import pipes + +# Template values set by cuda_autoconf. +CPU_COMPILER = ('/dt7/usr/bin/gcc') +GCC_HOST_COMPILER_PATH = ('/dt7/usr/bin/gcc') + +NVCC_PATH = '/usr/local/cuda-11.0/bin/nvcc' +PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH) +NVCC_VERSION = '10.1' + +def Log(s): + print('gpus/crosstool: {0}'.format(s)) + + +def GetOptionValue(argv, option): + """Extract the list of values for option from the argv list. + + Args: + argv: A list of strings, possibly the argv passed to main(). + option: The option whose value to extract, with the leading '-'. + + Returns: + A list of values, either directly following the option, + (eg., -opt val1 val2) or values collected from multiple occurrences of + the option (eg., -opt val1 -opt val2). + """ + + parser = ArgumentParser() + parser.add_argument(option, nargs='*', action='append') + option = option.lstrip('-').replace('-', '_') + args, _ = parser.parse_known_args(argv) + if not args or not vars(args)[option]: + return [] + else: + return sum(vars(args)[option], []) + + +def GetHostCompilerOptions(argv): + """Collect the -isystem, -iquote, and --sysroot option values from argv. + + Args: + argv: A list of strings, possibly the argv passed to main(). + + Returns: + The string that can be used as the --compiler-options to nvcc. + """ + + parser = ArgumentParser() + parser.add_argument('-isystem', nargs='*', action='append') + parser.add_argument('-iquote', nargs='*', action='append') + parser.add_argument('--sysroot', nargs=1) + parser.add_argument('-g', nargs='*', action='append') + parser.add_argument('-fno-canonical-system-headers', action='store_true') + parser.add_argument('-no-canonical-prefixes', action='store_true') + + args, _ = parser.parse_known_args(argv) + + opts = '' + + if args.isystem: + opts += ' -isystem ' + ' -isystem '.join(sum(args.isystem, [])) + if args.iquote: + opts += ' -iquote ' + ' -iquote '.join(sum(args.iquote, [])) + if args.g: + opts += ' -g' + ' -g'.join(sum(args.g, [])) + if args.fno_canonical_system_headers: + opts += ' -fno-canonical-system-headers' + if args.no_canonical_prefixes: + opts += ' -no-canonical-prefixes' + if args.sysroot: + opts += ' --sysroot ' + args.sysroot[0] + + return opts + +def _update_options(nvcc_options): + if NVCC_VERSION in ("7.0",): + return nvcc_options + + update_options = { "relaxed-constexpr" : "expt-relaxed-constexpr" } + return [ update_options[opt] if opt in update_options else opt + for opt in nvcc_options ] + +def GetNvccOptions(argv): + """Collect the -nvcc_options values from argv. + + Args: + argv: A list of strings, possibly the argv passed to main(). + + Returns: + The string that can be passed directly to nvcc. + """ + + parser = ArgumentParser() + parser.add_argument('-nvcc_options', nargs='*', action='append') + + args, _ = parser.parse_known_args(argv) + + if args.nvcc_options: + options = _update_options(sum(args.nvcc_options, [])) + return ' '.join(['--'+a for a in options]) + return '' + +def system(cmd): + """Invokes cmd with os.system(). + + Args: + cmd: The command. + + Returns: + The exit code if the process exited with exit() or -signal + if the process was terminated by a signal. + """ + retv = os.system(cmd) + if os.WIFEXITED(retv): + return os.WEXITSTATUS(retv) + else: + return -os.WTERMSIG(retv) + +def InvokeNvcc(argv, log=False): + """Call nvcc with arguments assembled from argv. + + Args: + argv: A list of strings, possibly the argv passed to main(). + log: True if logging is requested. + + Returns: + The return value of calling system('nvcc ' + args) + """ + + host_compiler_options = GetHostCompilerOptions(argv) + nvcc_compiler_options = GetNvccOptions(argv) + opt_option = GetOptionValue(argv, '-O') + m_options = GetOptionValue(argv, '-m') + m_options = ''.join([' -m' + m for m in m_options if m in ['32', '64']]) + include_options = GetOptionValue(argv, '-I') + out_file = GetOptionValue(argv, '-o') + depfiles = GetOptionValue(argv, '-MF') + defines = GetOptionValue(argv, '-D') + defines = ''.join([' -D' + define for define in defines]) + undefines = GetOptionValue(argv, '-U') + undefines = ''.join([' -U' + define for define in undefines]) + std_options = GetOptionValue(argv, '-std') + # Supported -std flags as of CUDA 9.0. Only keep last to mimic gcc/clang. + nvcc_allowed_std_options = ["c++03", "c++11", "c++14"] + std_options = ''.join([' -std=' + define + for define in std_options if define in nvcc_allowed_std_options][-1:]) + fatbin_options = ''.join([' --fatbin-options=' + option + for option in GetOptionValue(argv, '-Xcuda-fatbinary')]) + + # The list of source files get passed after the -c option. I don't know of + # any other reliable way to just get the list of source files to be compiled. + src_files = GetOptionValue(argv, '-c') + + # Pass -w through from host to nvcc, but don't do anything fancier with + # warnings-related flags, since they're not necessarily the same across + # compilers. + warning_options = ' -w' if '-w' in argv else '' + + if len(src_files) == 0: + return 1 + if len(out_file) != 1: + return 1 + + opt = (' -O2' if (len(opt_option) > 0 and int(opt_option[0]) > 0) + else ' -g') + + includes = (' -I ' + ' -I '.join(include_options) + if len(include_options) > 0 + else '') + + # Unfortunately, there are other options that have -c prefix too. + # So allowing only those look like C/C++ files. + src_files = [f for f in src_files if + re.search('\.cpp$|\.cc$|\.c$|\.cxx$|\.C$', f)] + srcs = ' '.join(src_files) + out = ' -o ' + out_file[0] + + nvccopts = '-D_FORCE_INLINES ' + for capability in GetOptionValue(argv, "--cuda-gpu-arch"): + capability = capability[len('sm_'):] + nvccopts += r'-gencode=arch=compute_%s,\"code=sm_%s\" ' % (capability, + capability) + for capability in GetOptionValue(argv, '--cuda-include-ptx'): + capability = capability[len('sm_'):] + nvccopts += r'-gencode=arch=compute_%s,\"code=compute_%s\" ' % (capability, + capability) + nvccopts += nvcc_compiler_options + nvccopts += undefines + nvccopts += defines + nvccopts += std_options + nvccopts += m_options + nvccopts += warning_options + nvccopts += fatbin_options + + if depfiles: + # Generate the dependency file + depfile = depfiles[0] + cmd = (NVCC_PATH + ' ' + nvccopts + + ' --compiler-options "' + host_compiler_options + '"' + + ' --compiler-bindir=' + GCC_HOST_COMPILER_PATH + + ' -I .' + + ' -x cu ' + opt + includes + ' ' + srcs + ' -M -o ' + depfile) + if log: Log(cmd) + exit_status = system(cmd) + if exit_status != 0: + return exit_status + + cmd = (NVCC_PATH + ' ' + nvccopts + + ' --compiler-options "' + host_compiler_options + ' -fPIC"' + + ' --compiler-bindir=' + GCC_HOST_COMPILER_PATH + + ' -I .' + + ' -x cu ' + opt + includes + ' -c ' + srcs + out) + + # TODO(zhengxq): for some reason, 'gcc' needs this help to find 'as'. + # Need to investigate and fix. + cmd = 'PATH=' + PREFIX_DIR + ':$PATH ' + cmd + if log: Log(cmd) + return system(cmd) + + +def main(): + parser = ArgumentParser() + parser.add_argument('-x', nargs=1) + parser.add_argument('--cuda_log', action='store_true') + args, leftover = parser.parse_known_args(sys.argv[1:]) + + if args.x and args.x[0] == 'cuda': + if args.cuda_log: Log('-x cuda') + leftover = [pipes.quote(s) for s in leftover] + if args.cuda_log: Log('using nvcc') + return InvokeNvcc(leftover, log=args.cuda_log) + + # Strip our flags before passing through to the CPU compiler for files which + # are not -x cuda. We can't just pass 'leftover' because it also strips -x. + # We not only want to pass -x to the CPU compiler, but also keep it in its + # relative location in the argv list (the compiler is actually sensitive to + # this). + cpu_compiler_flags = [flag for flag in sys.argv[1:] + if not flag.startswith(('--cuda_log'))] + + return subprocess.call([CPU_COMPILER] + cpu_compiler_flags) + +if __name__ == '__main__': + sys.exit(main()) From 14ca6184d13822192c2126fbc2ebeaf9d87dcec7 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Mon, 10 Aug 2020 22:48:53 +0700 Subject: [PATCH 2437/2522] Gcs refactor part 2 --- .../filesystem/plugins/gcs/gcs_filesystem.cc | 152 +++++++++++++++--- 1 file changed, 126 insertions(+), 26 deletions(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc index d170e51e3b1..f16f55f251f 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/gcs_filesystem.cc @@ -877,32 +877,6 @@ void DeleteDir(const TF_Filesystem* filesystem, const char* path, TF_SetStatus(status, TF_OK, ""); } -// TODO(vnvo2409): `RewriteObjectBlocking` will set `status` to `TF_NOT_FOUND` -// if the object does not exist. In that case, we will have to check if the -// `src` is a directory or not to set the correspondent `status` (i.e -// `TF_NOT_FOUND` if path `src` does not exist, `TF_FAILED_PRECONDITION` if -// path `src` is a directory). -void RenameFile(const TF_Filesystem* filesystem, const char* src, - const char* dst, TF_Status* status) { - std::string bucket_src, object_src; - ParseGCSPath(src, false, &bucket_src, &object_src, status); - if (TF_GetCode(status) != TF_OK) return; - - std::string bucket_dst, object_dst; - ParseGCSPath(dst, false, &bucket_dst, &object_dst, status); - if (TF_GetCode(status) != TF_OK) return; - - auto gcs_file = static_cast(filesystem->plugin_filesystem); - auto metadata = gcs_file->gcs_client.RewriteObjectBlocking( - bucket_src, object_src, bucket_dst, object_dst); - if (!metadata) { - TF_SetStatusFromGCSStatus(metadata.status(), status); - return; - } - auto gcs_status = gcs_file->gcs_client.DeleteObject(bucket_src, object_src); - TF_SetStatusFromGCSStatus(gcs_status, status); -} - void CopyFile(const TF_Filesystem* filesystem, const char* src, const char* dst, TF_Status* status) { std::string bucket_src, object_src; @@ -954,6 +928,100 @@ bool IsDirectory(const TF_Filesystem* filesystem, const char* path, return false; } +static void RenameObject(const TF_Filesystem* filesystem, + const std::string& src, const std::string& dst, + TF_Status* status) { + std::string bucket_src, object_src; + ParseGCSPath(src, false, &bucket_src, &object_src, status); + if (TF_GetCode(status) != TF_OK) return; + + std::string bucket_dst, object_dst; + ParseGCSPath(dst, false, &bucket_dst, &object_dst, status); + if (TF_GetCode(status) != TF_OK) return; + + auto gcs_file = static_cast(filesystem->plugin_filesystem); + auto metadata = gcs_file->gcs_client.RewriteObjectBlocking( + bucket_src, object_src, bucket_dst, object_dst); + TF_SetStatusFromGCSStatus(metadata.status(), status); + if (TF_GetCode(status) != TF_OK) return; + + ClearFileCaches(gcs_file, dst); + DeleteFile(filesystem, src.c_str(), status); +} + +void RenameFile(const TF_Filesystem* filesystem, const char* src, + const char* dst, TF_Status* status) { + if (!IsDirectory(filesystem, src, status)) { + if (TF_GetCode(status) == TF_FAILED_PRECONDITION) + RenameObject(filesystem, src, dst, status); + return; + } + + auto gcs_file = static_cast(filesystem->plugin_filesystem); + std::vector childrens = + GetChildrenBounded(gcs_file, src, UINT64_MAX, true, true, status); + if (TF_GetCode(status) != TF_OK) return; + + std::string src_dir = src; + std::string dst_dir = dst; + MaybeAppendSlash(&src_dir); + MaybeAppendSlash(&dst_dir); + for (const std::string& children : childrens) { + RenameObject(filesystem, src_dir + children, dst_dir + children, status); + if (TF_GetCode(status) != TF_OK) return; + } + TF_SetStatus(status, TF_OK, ""); +} + +void DeleteRecursively(const TF_Filesystem* filesystem, const char* path, + uint64_t* undeleted_files, uint64_t* undeleted_dirs, + TF_Status* status) { + if (!undeleted_files || !undeleted_dirs) + return TF_SetStatus( + status, TF_INTERNAL, + "'undeleted_files' and 'undeleted_dirs' cannot be nullptr."); + *undeleted_files = 0; + *undeleted_dirs = 0; + if (!IsDirectory(filesystem, path, status)) { + *undeleted_dirs = 1; + return; + } + auto gcs_file = static_cast(filesystem->plugin_filesystem); + std::vector childrens = + GetChildrenBounded(gcs_file, path, UINT64_MAX, true, true, status); + if (TF_GetCode(status) != TF_OK) return; + + std::string dir = path; + MaybeAppendSlash(&dir); + for (const std::string& children : childrens) { + const std::string& full_path = dir + children; + DeleteFile(filesystem, full_path.c_str(), status); + if (TF_GetCode(status) != TF_OK) { + if (IsDirectory(filesystem, full_path.c_str(), status)) + // The object is a directory marker. + (*undeleted_dirs)++; + else + (*undeleted_files)++; + } + } +} + +int GetChildren(const TF_Filesystem* filesystem, const char* path, + char*** entries, TF_Status* status) { + auto gcs_file = static_cast(filesystem->plugin_filesystem); + std::vector childrens = + GetChildrenBounded(gcs_file, path, UINT64_MAX, false, false, status); + if (TF_GetCode(status) != TF_OK) return -1; + + int num_entries = childrens.size(); + *entries = static_cast( + plugin_memory_allocate(num_entries * sizeof((*entries)[0]))); + for (int i = 0; i < num_entries; i++) + (*entries)[i] = strdup(childrens[i].c_str()); + TF_SetStatus(status, TF_OK, ""); + return num_entries; +} + void Stat(const TF_Filesystem* filesystem, const char* path, TF_FileStatistics* stats, TF_Status* status) { std::string bucket, object; @@ -991,6 +1059,17 @@ void Stat(const TF_Filesystem* filesystem, const char* path, } } +static char* TranslateName(const TF_Filesystem* filesystem, const char* uri) { + return strdup(uri); +} + +static void FlushCaches(const TF_Filesystem* filesystem) { + auto gcs_file = static_cast(filesystem->plugin_filesystem); + absl::ReaderMutexLock l(&gcs_file->block_cache_lock); + gcs_file->file_block_cache->Flush(); + gcs_file->stat_cache->Clear(); +} + } // namespace tf_gcs_filesystem static void ProvideFilesystemSupportFor(TF_FilesystemPluginOps* ops, @@ -1007,6 +1086,13 @@ static void ProvideFilesystemSupportFor(TF_FilesystemPluginOps* ops, plugin_memory_allocate(TF_WRITABLE_FILE_OPS_SIZE)); ops->writable_file_ops->cleanup = tf_writable_file::Cleanup; + ops->read_only_memory_region_ops = static_cast( + plugin_memory_allocate(TF_READ_ONLY_MEMORY_REGION_OPS_SIZE)); + ops->read_only_memory_region_ops->cleanup = + tf_read_only_memory_region::Cleanup; + ops->read_only_memory_region_ops->data = tf_read_only_memory_region::Data; + ops->read_only_memory_region_ops->length = tf_read_only_memory_region::Length; + ops->filesystem_ops = static_cast( plugin_memory_allocate(TF_FILESYSTEM_OPS_SIZE)); ops->filesystem_ops->init = tf_gcs_filesystem::Init; @@ -1016,6 +1102,20 @@ static void ProvideFilesystemSupportFor(TF_FilesystemPluginOps* ops, ops->filesystem_ops->new_writable_file = tf_gcs_filesystem::NewWritableFile; ops->filesystem_ops->new_appendable_file = tf_gcs_filesystem::NewAppendableFile; + ops->filesystem_ops->new_read_only_memory_region_from_file = + tf_gcs_filesystem::NewReadOnlyMemoryRegionFromFile; + ops->filesystem_ops->create_dir = tf_gcs_filesystem::CreateDir; + ops->filesystem_ops->delete_file = tf_gcs_filesystem::DeleteFile; + ops->filesystem_ops->delete_dir = tf_gcs_filesystem::DeleteDir; + ops->filesystem_ops->delete_recursively = + tf_gcs_filesystem::DeleteRecursively; + ops->filesystem_ops->copy_file = tf_gcs_filesystem::CopyFile; + ops->filesystem_ops->path_exists = tf_gcs_filesystem::PathExists; + ops->filesystem_ops->is_directory = tf_gcs_filesystem::IsDirectory; + ops->filesystem_ops->stat = tf_gcs_filesystem::Stat; + ops->filesystem_ops->get_children = tf_gcs_filesystem::GetChildren; + ops->filesystem_ops->translate_name = tf_gcs_filesystem::TranslateName; + ops->filesystem_ops->flush_caches = tf_gcs_filesystem::FlushCaches; } void TF_InitPlugin(TF_FilesystemPluginInfo* info) { From 91736566223baa1a74cd729a8b869f95f895da61 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Mon, 10 Aug 2020 08:59:35 -0700 Subject: [PATCH 2438/2522] Add HLO RngBitGenerator This adds the XlaBuilder RngBitGenerator to the MHLO dialect. The op is currently represented very directly using int attribute for random algorithm and direct import/export. PiperOrigin-RevId: 325814134 Change-Id: I640e3141b8e16d1e186cd99848273b245d1c8b3e --- .../include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td | 16 +++++++++++++++- .../mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.td | 13 +++++++++++++ .../compiler/mlir/xla/hlo_function_importer.cc | 7 +++++++ tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc | 11 +++++++++++ .../mlir/xla/tests/translate/export.mlir | 12 ++++++++++++ .../mlir/xla/tests/translate/import.hlotxt | 9 +++++++++ 6 files changed, 67 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td index b8b1926a0c9..d0abbe043ea 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td @@ -1329,8 +1329,9 @@ def HLO_TorchIndexSelectOp : HLO_Op<"torch_index_select", [NoSideEffect]> { } //===----------------------------------------------------------------------===// -// MHLO RngUniform Operator. +// MHLO RNG Operators. //===----------------------------------------------------------------------===// + def HLO_RngUniformOp : HLO_Op<"rng_uniform", []>, BASE_HLO_RngUniformOp { let arguments = (ins HLO_PredIntOrFpTensor:$a, @@ -1355,6 +1356,19 @@ def HLO_RngNormalOp : HLO_Op<"rng_normal", []>, BASE_HLO_RngNormalOp { let hasCustomHLOConverter = 1; } +def HLO_RngBitGeneratorOp : HLO_Op<"rng_bit_generator", [NoSideEffect]>, BASE_HLO_RngBitGeneratorOp { + let arguments = (ins + // TODO(jpienaar): This could be an enum instead. + I32Attr:$rng_algorithm, + HLO_IntOrFpTensor:$initial_state + ); + + let results = (outs HLO_TensorOrTuple:$result); + + // TODO(jpienaar): This should not be needed. + let hasCustomHLOConverter = 1; +} + //===----------------------------------------------------------------------===// // MHLO Quantize Operator. //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.td b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.td index 7f9784d7f11..2f80545ad19 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.td +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.td @@ -316,6 +316,19 @@ class BASE_HLO_RealOp { }]; } +class BASE_HLO_RngBitGeneratorOp { + string summary = "Uniform random number generator operator"; + + string description = [{ + Returns an output with a given shape filled with uniform random bits using + the specified algorithm (or backend default) and returns an updated state + (with the same shape as initial state) and the generated random data. + + See + https://www.tensorflow.org/xla/operation_semantics#rngbitgenerator. + }]; +} + class BASE_HLO_RoundOp { string summary = "Round operator"; diff --git a/tensorflow/compiler/mlir/xla/hlo_function_importer.cc b/tensorflow/compiler/mlir/xla/hlo_function_importer.cc index d366a36c212..a63fc12c285 100644 --- a/tensorflow/compiler/mlir/xla/hlo_function_importer.cc +++ b/tensorflow/compiler/mlir/xla/hlo_function_importer.cc @@ -521,6 +521,13 @@ StatusOr HloFunctionImporter::ImportInstruction( RandomDistributionToString(instruction->random_distribution()))); } } + case HloOpcode::kRngBitGenerator: { + auto rng_op = Cast(instruction); + auto op = func_builder->create( + loc, result_type, + func_builder->getI32IntegerAttr(rng_op->algorithm()), operands[0]); + return op.getOperation(); + } case HloOpcode::kWhile: { auto op = func_builder->create( loc, operands[0].getType(), operands[0]); diff --git a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc index e6d0b8f8dd8..5398cd70777 100644 --- a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc +++ b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc @@ -882,6 +882,17 @@ LogicalResult ExportXlaOp(ReturnOp op, OpLoweringContext ctx) { return failure(); } +LogicalResult ExportXlaOp(RngBitGeneratorOp op, OpLoweringContext ctx) { + auto& value_map = *ctx.values; + auto result = op.getResult(); + auto xla_arg_1 = value_map[*op.getODSOperands(0).begin()]; + auto xla_result = xla::RngBitGenerator( + static_cast(op.rng_algorithm().getSExtValue()), + Unwrap(xla_arg_1), xla::TypeToShape(result.getType()).tuple_shapes(1)); + value_map[result] = xla_result; + return mlir::success(); +} + LogicalResult ExportXlaOp(RngNormalOp op, OpLoweringContext ctx) { auto& value_map = *ctx.values; xla::XlaOp mu, sigma; diff --git a/tensorflow/compiler/mlir/xla/tests/translate/export.mlir b/tensorflow/compiler/mlir/xla/tests/translate/export.mlir index 9929bd85b43..316eda4c4aa 100644 --- a/tensorflow/compiler/mlir/xla/tests/translate/export.mlir +++ b/tensorflow/compiler/mlir/xla/tests/translate/export.mlir @@ -1087,3 +1087,15 @@ func @main(%arg: tensor<3x4xf32>, %token: !mhlo.token) -> !mhlo.token { } // CHECK-NOT: frontend_attributes + +// ----- + +// Checks exporting rng-bit-generator. + +// CHECK: HloModule +func @main(%arg: tensor<3xui64>) -> tuple, tensor<2x2xui32>> { +// CHECK: %[[ARG0:.*]] = u64[3] parameter(0) +// CHECK: ROOT %[[RESULT:.*]] = (u64[3], u32[2,2]) rng-bit-generator(u64[3] %[[ARG0]]), algorithm=rng_philox + %0 = "mhlo.rng_bit_generator"(%arg) {rng_algorithm = 2 : i32} : (tensor<3xui64>) -> tuple, tensor<2x2xui32>> + return %0 : tuple, tensor<2x2xui32>> +} diff --git a/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt b/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt index d89b1fa44e1..4d4e0213da8 100644 --- a/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt +++ b/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt @@ -1005,3 +1005,12 @@ add { // CHECK: "mhlo.not"(%[[ARG0]]) {name = "{{.*}}"} : (tensor<4xui16>) -> tensor<4xui16> ROOT %not.2 = u16[4] not(u16[4] %Arg_0.1) } + +// CHECK-LABEL: func @rngbitgen +// CHECK-SAME: (%[[ARG0:.*]]: tensor<3xui64>) +%rngbitgen (Arg_0.1: u64[3]) -> (u64[3], u32[2,2]) { + %Arg_0.1 = u64[3] parameter(0) + // CHECK: "mhlo.rng_bit_generator"(%[[ARG0]]) {rng_algorithm = 2 : i32} : (tensor<3xui64>) -> tuple, tensor<2x2xui32>> + ROOT %rng-bit-generator.2 = (u64[3], u32[2,2]) rng-bit-generator(u64[3] %Arg_0.1), algorithm=rng_philox +} + From 2c6f7e24dd003aaf2c66747ea40f1280b011ffa7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 10 Aug 2020 09:05:54 -0700 Subject: [PATCH 2439/2522] Get namedtuple _make method from instance instead of class. It is possible that v0 is a _TupleWrapper object wrapping a namedtuple instead of a namedtuple itself. The class _TupleWrapper does not have the class method _make, but it should have the instance method _make which calls the namedtuple class method. Instances of a namedtuple also have the instance method _make which calls the relevant class method. This will allow the code to work for both cases. PiperOrigin-RevId: 325815594 Change-Id: I209f8ec5a8617f72183e4c12937b4429321e7b4f --- tensorflow/python/distribute/BUILD | 1 + tensorflow/python/distribute/distribute_utils.py | 4 ++-- tensorflow/python/distribute/distribute_utils_test.py | 10 ++++++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index f67f306706f..8497c4da8a7 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -1197,6 +1197,7 @@ distribute_py_test( "//tensorflow/python/eager:test", "//tensorflow/python/saved_model/model_utils:mode_keys", "@absl_py//absl/testing:parameterized", + "@wrapt", ], ) diff --git a/tensorflow/python/distribute/distribute_utils.py b/tensorflow/python/distribute/distribute_utils.py index 916ebafd8ac..62f03c60224 100644 --- a/tensorflow/python/distribute/distribute_utils.py +++ b/tensorflow/python/distribute/distribute_utils.py @@ -63,8 +63,8 @@ def regroup(values, wrap_class=values_lib.PerReplica, always_wrap=False): if hasattr(v0, "_fields"): # This tuple is in fact a namedtuple! Create a new namedtuple instance # and initialize it with the regrouped values: - assert hasattr(type(v0), "_make") - return type(v0)._make(regrouped_tuple) + assert hasattr(v0, "_make") + return v0._make(regrouped_tuple) else: return regrouped_tuple diff --git a/tensorflow/python/distribute/distribute_utils_test.py b/tensorflow/python/distribute/distribute_utils_test.py index f91cad2db47..22ea6264d07 100644 --- a/tensorflow/python/distribute/distribute_utils_test.py +++ b/tensorflow/python/distribute/distribute_utils_test.py @@ -21,6 +21,7 @@ from __future__ import print_function import collections from absl.testing import parameterized +import wrapt from tensorflow.python.distribute import combinations from tensorflow.python.distribute import distribute_utils @@ -211,6 +212,15 @@ class RegroupAndSelectDeviceTest(test.TestCase, parameterized.TestCase): distribute_utils.select_replica( device_id, merged_estimator_spec)) + def testWrappedNamedTuple(self): + Point = collections.namedtuple("Point", ["x", "y"]) + point1 = Point(x=0, y=2) + point2 = Point(x=1, y=3) + wrapped1 = wrapt.ObjectProxy(point1) + wrapped2 = wrapt.ObjectProxy(point2) + result = distribute_utils.regroup([wrapped1, wrapped2]) + self.assertEqual(result.x.values, (0, 1)) + self.assertEqual(result.y.values, (2, 3)) if __name__ == "__main__": test.main() From 4ed4c14e4cbb2a6439cce8a62bd20957a00a75d0 Mon Sep 17 00:00:00 2001 From: Stefano Galarraga Date: Mon, 10 Aug 2020 09:09:59 -0700 Subject: [PATCH 2440/2522] Add NNAPI Delegation support for requantization use cases (transforming a quantized tensor into another quantized one with different quantization parameters) by converting the operation into a dequantize-quantize pair. PiperOrigin-RevId: 325816400 Change-Id: I55f8726f0478e9795c667a9cf4eddda084ed95a7 --- RELEASE.md | 1 + .../delegates/nnapi/acceleration_test_list.cc | 2 ++ .../lite/delegates/nnapi/nnapi_delegate.cc | 24 +++++++++++++++---- .../delegates/nnapi/nnapi_delegate_kernel.h | 1 + 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 191c18e5ddb..525db3cade8 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -128,6 +128,7 @@ * Support optional flags `inference_input_type` and `inference_output_type` for full integer quantized models. This allows users to modify the model input and output type to integer types (`tf.int8`, `tf.uint8`) instead of defaulting to float type (`tf.float32`). * Deprecate `Interpreter::UseNNAPI(bool)` C++ API * Prefer using `NnApiDelegate()` and related delegate configuration methods directly. + * Add NNAPI Delegation support for requantization use cases by converting the operation into a dequantize-quantize pair. * * `tf.random`: * diff --git a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc index 5183ab4b062..43f9c1b0953 100644 --- a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc +++ b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc @@ -309,6 +309,8 @@ QuantizedLstmTest/BasicQuantizedLstmTest/29 # quantize_test QuantizeOpTest/UINT8,29 +QuantizeOpTest/UInt8UInt8.+,29 +QuantizeOpTest/Int8Int8.+,30 QuantizeOpTest/INT8,30 # rank diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc index 2a33d764949..122ddc043b2 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc @@ -2436,13 +2436,20 @@ bool NNAPIDelegateKernel::Validate( "Input should be Float32.", &val_ctx); } break; case kTfLiteBuiltinQuantize: { - ExpectOpVersion(version, 1, &val_ctx); + ExpectMaxOpVersion(version, 2, &val_ctx); ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12, &val_ctx); const auto value_type = context->tensors[node->inputs->data[0]].type; - Expect(value_type == kTfLiteFloat32, + Expect(value_type == kTfLiteFloat32 || IsQuantized(value_type), NNAPIValidationFailureType::kUnsupportedInputType, - "Value should be Float32.", &val_ctx); + "Value should be quantized or Float32.", &val_ctx); + if (IsQuantized(value_type)) { + const auto quantization_params = + context->tensors[node->inputs->data[0]].params; + Expect(quantization_params.scale > 0.f, + NNAPIValidationFailureType::kUnsupportedQuantizationParameters, + "Quantization scale should be > 0.", &val_ctx); + } const auto output_type = context->tensors[node->outputs->data[0]].type; if (android_sdk_version < kMinSdkVersionForNNAPI13) { Expect(output_type == kTfLiteUInt8, @@ -3284,6 +3291,15 @@ TfLiteStatus NNAPIDelegateKernel::Map( *nn_op_type = ANEURALNETWORKS_LOG_SOFTMAX; } break; case kTfLiteBuiltinQuantize: { + auto input_index = mapping_args.node->inputs->data[0]; + // NNAPI doesn't support requantization cases but only quantizations + // from float. Dequantizing our input adding a Dequantize node before + // this one. + if (IsQuantized(mapping_args.context->tensors[input_index].type)) { + mapping_args.builder->AddDequantize(0, input_index, kTfLiteFloat32, + mapping_args.node_index); + } + *nn_op_type = ANEURALNETWORKS_QUANTIZE; } break; case kTfLiteBuiltinReduceAny: { @@ -4254,7 +4270,7 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors( int nn_op_type; TF_LITE_ENSURE_STATUS( Map(context, reg->builtin_code, reg->version, target_sdk_version_, - {context, &builder, node, &model_state_outputs_, + {context, &builder, node, node_index, &model_state_outputs_, &model_state_tfl_inputs_, &feedback_loops_, nnapi_errno}, &nn_op_type)); diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h index 72a64d2404a..36c1dd32efb 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h @@ -111,6 +111,7 @@ struct NNAPIOpMappingArgs { TfLiteContext* context; NNAPIOpBuilder* builder; TfLiteNode* node; + int node_index; std::vector* model_state_outputs; std::vector* model_state_tfl_inputs; std::vector>* feedback_loops; From 04d770b603809a9c89974e77576af238d422fde5 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 10 Aug 2020 09:27:19 -0700 Subject: [PATCH 2441/2522] Causes some windows tests to fail with ``` FAILED: //tensorflow/python/kernel_tests:cwise_ops_test (Summary) C:/tmp/hgi62d3r/execroot/org_tensorflow/bazel-out/x64_windows-opt/testlogs/tensorflow/python/kernel_tests/cwise_ops_test/shard_41_of_50/test.log C:/tmp/hgi62d3r/execroot/org_tensorflow/bazel-out/x64_windows-opt/testlogs/tensorflow/python/kernel_tests/cwise_ops_test/shard_41_of_50/test_attempts/attempt_1.log C:/tmp/hgi62d3r/execroot/org_tensorflow/bazel-out/x64_windows-opt/testlogs/tensorflow/python/kernel_tests/cwise_ops_test/shard_41_of_50/test_attempts/attempt_2.log INFO: From Testing //tensorflow/python/kernel_tests:cwise_ops_test (shard 41 of 50): ==================== Test output for //tensorflow/python/kernel_tests:cwise_ops_test (shard 41 of 50): Running tests under Python 3.8.3: c:\Python38\python.exe [ RUN ] MathOpsOverloadTest.testOverload 2020-08-07 18:06:36.702914: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX AVX2 To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. 2020-08-07 18:06:36.731707: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x1f14f681f00 initialized for platform Host (this does not guarantee that XLA will be used). Devices: 2020-08-07 18:06:36.732038: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version INFO:tensorflow:time(__main__.MathOpsOverloadTest.testOverload): 0.09s I0807 18:06:36.788563 9040 test_util.py:1974] time(__main__.MathOpsOverloadTest.testOverload): 0.09s No pending test case: __main__.MathOpsOverloadTest.testOverload ====================================================================== ERROR: testOverload (__main__.MathOpsOverloadTest) (dtype=tf.bfloat16, np_func=, tf_func= at 0x000001F152FA13A0>) testOverload (__main__.MathOpsOverloadTest) ---------------------------------------------------------------------- Traceback (most recent call last): File "\\?\C:\Users\ContainerAdministrator\AppData\Local\Temp\Bazel.runfiles_6r__ku2y\runfiles\org_tensorflow\tensorflow\python\kernel_tests\cwise_ops_test.py", line 883, in testOverload self._compareBinary(10, 5, dtype, np_func, tf_func) File "\\?\C:\Users\ContainerAdministrator\AppData\Local\Temp\Bazel.runfiles_6r__ku2y\runfiles\org_tensorflow\tensorflow\python\kernel_tests\cwise_ops_test.py", line 843, in _compareBinary np_ans = np_func(x, y).astype(dtype.as_numpy_dtype) ValueError: No cast function available. ====================================================================== ERROR: testOverload (__main__.MathOpsOverloadTest) (dtype=tf.bfloat16, np_func=, tf_func= at 0x000001F152FA14C0>) testOverload (__main__.MathOpsOverloadTest) ---------------------------------------------------------------------- Traceback (most recent call last): File "\\?\C:\Users\ContainerAdministrator\AppData\Local\Temp\Bazel.runfiles_6r__ku2y\runfiles\org_tensorflow\tensorflow\python\kernel_tests\cwise_ops_test.py", line 883, in testOverload self._compareBinary(10, 5, dtype, np_func, tf_func) File "\\?\C:\Users\ContainerAdministrator\AppData\Local\Temp\Bazel.runfiles_6r__ku2y\runfiles\org_tensorflow\tensorflow\python\kernel_tests\cwise_ops_test.py", line 843, in _compareBinary np_ans = np_func(x, y).astype(dtype.as_numpy_dtype) ValueError: No cast function available. ---------------------------------------------------------------------- Ran 1 test in 0.093s FAILED (errors=2) ================================================================================ ==================== Test output for //tensorflow/python/kernel_tests:cwise_ops_test (shard 41 of 50): Running tests under Python 3.8.3: c:\Python38\python.exe [ RUN ] MathOpsOverloadTest.testOverload 2020-08-07 18:07:02.741015: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX AVX2 To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. 2020-08-07 18:07:02.768942: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x177405cd170 initialized for platform Host (this does not guarantee that XLA will be used). Devices: 2020-08-07 18:07:02.769253: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version INFO:tensorflow:time(__main__.MathOpsOverloadTest.testOverload): 0.08s I0807 18:07:02.821412 3188 test_util.py:1974] time(__main__.MathOpsOverloadTest.testOverload): 0.08s No pending test case: __main__.MathOpsOverloadTest.testOverload ====================================================================== ERROR: testOverload (__main__.MathOpsOverloadTest) (dtype=tf.bfloat16, np_func=, tf_func= at 0x0000017741FFF3A0>) testOverload (__main__.MathOpsOverloadTest) ---------------------------------------------------------------------- Traceback (most recent call last): File "\\?\C:\Users\ContainerAdministrator\AppData\Local\Temp\Bazel.runfiles_1vkgaz40\runfiles\org_tensorflow\tensorflow\python\kernel_tests\cwise_ops_test.py", line 883, in testOverload self._compareBinary(10, 5, dtype, np_func, tf_func) File "\\?\C:\Users\ContainerAdministrator\AppData\Local\Temp\Bazel.runfiles_1vkgaz40\runfiles\org_tensorflow\tensorflow\python\kernel_tests\cwise_ops_test.py", line 843, in _compareBinary np_ans = np_func(x, y).astype(dtype.as_numpy_dtype) ValueError: No cast function available. ====================================================================== ERROR: testOverload (__main__.MathOpsOverloadTest) (dtype=tf.bfloat16, np_func=, tf_func= at 0x0000017741FFF4C0>) testOverload (__main__.MathOpsOverloadTest) ---------------------------------------------------------------------- Traceback (most recent call last): File "\\?\C:\Users\ContainerAdministrator\AppData\Local\Temp\Bazel.runfiles_1vkgaz40\runfiles\org_tensorflow\tensorflow\python\kernel_tests\cwise_ops_test.py", line 883, in testOverload self._compareBinary(10, 5, dtype, np_func, tf_func) File "\\?\C:\Users\ContainerAdministrator\AppData\Local\Temp\Bazel.runfiles_1vkgaz40\runfiles\org_tensorflow\tensorflow\python\kernel_tests\cwise_ops_test.py", line 843, in _compareBinary np_ans = np_func(x, y).astype(dtype.as_numpy_dtype) ValueError: No cast function available. ---------------------------------------------------------------------- Ran 1 test in 0.087s FAILED (errors=2) ================================================================================ ==================== Test output for //tensorflow/python/kernel_tests:cwise_ops_test (shard 41 of 50): Running tests under Python 3.8.3: c:\Python38\python.exe [ RUN ] MathOpsOverloadTest.testOverload 2020-08-07 18:07:22.498277: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX AVX2 To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. 2020-08-07 18:07:22.528039: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x18eee98a7e0 initialized for platform Host (this does not guarantee that XLA will be used). Devices: 2020-08-07 18:07:22.528627: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version INFO:tensorflow:time(__main__.MathOpsOverloadTest.testOverload): 0.07s I0807 18:07:22.565276 5312 test_util.py:1974] time(__main__.MathOpsOverloadTest.testOverload): 0.07s No pending test case: __main__.MathOpsOverloadTest.testOverload ====================================================================== ERROR: testOverload (__main__.MathOpsOverloadTest) (dtype=tf.bfloat16, np_func=, tf_func= at 0x0000018EED9D13A0>) testOverload (__main__.MathOpsOverloadTest) ---------------------------------------------------------------------- Traceback (most recent call last): File "\\?\C:\Users\ContainerAdministrator\AppData\Local\Temp\Bazel.runfiles_ruka25hh\runfiles\org_tensorflow\tensorflow\python\kernel_tests\cwise_ops_test.py", line 883, in testOverload self._compareBinary(10, 5, dtype, np_func, tf_func) File "\\?\C:\Users\ContainerAdministrator\AppData\Local\Temp\Bazel.runfiles_ruka25hh\runfiles\org_tensorflow\tensorflow\python\kernel_tests\cwise_ops_test.py", line 843, in _compareBinary np_ans = np_func(x, y).astype(dtype.as_numpy_dtype) ValueError: No cast function available. ====================================================================== ERROR: testOverload (__main__.MathOpsOverloadTest) (dtype=tf.bfloat16, np_func=, tf_func= at 0x0000018EED9D14C0>) testOverload (__main__.MathOpsOverloadTest) ---------------------------------------------------------------------- Traceback (most recent call last): File "\\?\C:\Users\ContainerAdministrator\AppData\Local\Temp\Bazel.runfiles_ruka25hh\runfiles\org_tensorflow\tensorflow\python\kernel_tests\cwise_ops_test.py", line 883, in testOverload self._compareBinary(10, 5, dtype, np_func, tf_func) File "\\?\C:\Users\ContainerAdministrator\AppData\Local\Temp\Bazel.runfiles_ruka25hh\runfiles\org_tensorflow\tensorflow\python\kernel_tests\cwise_ops_test.py", line 843, in _compareBinary np_ans = np_func(x, y).astype(dtype.as_numpy_dtype) ValueError: No cast function available. ---------------------------------------------------------------------- Ran 1 test in 0.074s ``` PiperOrigin-RevId: 325819709 Change-Id: I919270e31978573c652f88224a32beb55592aa84 --- tensorflow/BUILD | 24 ----------- .../core/util/tensor_bundle/tensor_bundle.cc | 29 ++++++------- .../core/util/tensor_bundle/tensor_bundle.h | 41 +++++++++---------- .../def_file_filter/def_file_filter.py.tpl | 3 -- 4 files changed, 33 insertions(+), 64 deletions(-) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 6745b3e54fa..d1c1d7dcdef 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -882,30 +882,6 @@ genrule( visibility = ["//visibility:public"], ) -# The interface library (tensorflow_framework.dll.if.lib) for linking tensorflow DLL -# library (tensorflow_framework.dll) on Windows. -# To learn more about import library (called interface library in Bazel): -# https://docs.microsoft.com/en-us/cpp/build/linking-an-executable-to-a-dll?view=vs-2017#linking-implicitly -filegroup( - name = "get_tensorflow_framework_dll_import_lib", - srcs = ["//tensorflow:tensorflow_framework.dll"], - output_group = "interface_library", - visibility = ["//visibility:public"], -) - -# Rename the import library for tensorflow_framework.dll from -# tensorflow_framework.dll.if.lib to tensorflow_framework.lib -genrule( - name = "tensorflow_framework_dll_import_lib", - srcs = [":get_tensorflow_framework_dll_import_lib"], - outs = ["tensorflow_framework.lib"], - cmd = select({ - "//tensorflow:windows": "cp -f $< $@", - "//conditions:default": "touch $@", # Just a placeholder for Unix platforms - }), - visibility = ["//visibility:public"], -) - # The interface library (tensorflow_cc.dll.if.lib) for linking tensorflow DLL library (tensorflow_cc.dll) on Windows. # To learn more about import library (called interface library in Bazel): # https://docs.microsoft.com/en-us/cpp/build/linking-an-executable-to-a-dll?view=vs-2017#linking-implicitly diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc index ae9e10b3f67..bb18000fcfe 100644 --- a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc +++ b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc @@ -741,7 +741,7 @@ Status MergeBundles(Env* env, gtl::ArraySlice prefixes, // Interface for reading a tensor bundle. -TF_EXPORT BundleReader::BundleReader(Env* env, StringPiece prefix) +BundleReader::BundleReader(Env* env, StringPiece prefix) : env_(env), prefix_(prefix), metadata_(nullptr), @@ -796,7 +796,7 @@ TF_EXPORT BundleReader::BundleReader(Env* env, StringPiece prefix) kTensorBundleMinProducer, "Checkpoint", "checkpoint"); } -TF_EXPORT BundleReader::~BundleReader() { +BundleReader::~BundleReader() { delete metadata_; delete iter_; delete table_; @@ -936,7 +936,7 @@ Status BundleReader::GetValue(const BundleEntryProto& entry, Tensor* val) { return Status::OK(); } -TF_EXPORT Status BundleReader::Lookup(StringPiece key, Tensor* val) { +Status BundleReader::Lookup(StringPiece key, Tensor* val) { CHECK(val != nullptr); BundleEntryProto entry; TF_RETURN_IF_ERROR(GetBundleEntryProto(key, &entry)); @@ -950,7 +950,7 @@ TF_EXPORT Status BundleReader::Lookup(StringPiece key, Tensor* val) { } } -TF_EXPORT Status BundleReader::ReadCurrent(Tensor* val) { +Status BundleReader::ReadCurrent(Tensor* val) { CHECK(val != nullptr); BundleEntryProto entry; TF_RETURN_IF_ERROR(ParseEntryProto(iter_->key(), iter_->value(), &entry)); @@ -968,8 +968,8 @@ TF_EXPORT Status BundleReader::ReadCurrent(Tensor* val) { } } -TF_EXPORT Status BundleReader::LookupTensorSlices( - StringPiece key, std::vector* slices) { +Status BundleReader::LookupTensorSlices(StringPiece key, + std::vector* slices) { slices->clear(); BundleEntryProto entry; TF_RETURN_IF_ERROR(GetBundleEntryProto(key, &entry)); @@ -980,9 +980,8 @@ TF_EXPORT Status BundleReader::LookupTensorSlices( return Status::OK(); } -TF_EXPORT Status BundleReader::LookupSlice(StringPiece full_tensor_key, - const TensorSlice& slice_spec, - Tensor* val) { +Status BundleReader::LookupSlice(StringPiece full_tensor_key, + const TensorSlice& slice_spec, Tensor* val) { CHECK(val != nullptr); BundleEntryProto entry; TF_RETURN_IF_ERROR(GetBundleEntryProto(full_tensor_key, &entry)); @@ -1104,14 +1103,13 @@ Status BundleReader::GetSliceValue(StringPiece full_tensor_key, return Status::OK(); } -TF_EXPORT bool BundleReader::Contains(StringPiece key) { +bool BundleReader::Contains(StringPiece key) { Seek(key); return Valid() && (this->key() == key); } -TF_EXPORT Status BundleReader::LookupDtypeAndShape(StringPiece key, - DataType* dtype, - TensorShape* shape) { +Status BundleReader::LookupDtypeAndShape(StringPiece key, DataType* dtype, + TensorShape* shape) { BundleEntryProto entry; TF_RETURN_IF_ERROR(GetBundleEntryProto(key, &entry)); *dtype = entry.dtype(); @@ -1119,13 +1117,12 @@ TF_EXPORT Status BundleReader::LookupDtypeAndShape(StringPiece key, return Status::OK(); } -TF_EXPORT Status BundleReader::LookupTensorShape(StringPiece key, - TensorShape* shape) { +Status BundleReader::LookupTensorShape(StringPiece key, TensorShape* shape) { DataType ignored; return LookupDtypeAndShape(key, &ignored, shape); } -TF_EXPORT string BundleReader::DebugString() { +string BundleReader::DebugString() { // Format used below emulates that of TensorSliceReader::DebugString(). string shape_str; BundleEntryProto entry; diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle.h b/tensorflow/core/util/tensor_bundle/tensor_bundle.h index 0ff0b2d8939..c441000e47d 100644 --- a/tensorflow/core/util/tensor_bundle/tensor_bundle.h +++ b/tensorflow/core/util/tensor_bundle/tensor_bundle.h @@ -182,28 +182,28 @@ Status MergeBundles(Env* env, gtl::ArraySlice prefixes, // All threads accessing the same BundleReader must synchronize. class BundleReader { public: - TF_EXPORT BundleReader(Env* const env, StringPiece prefix); - TF_EXPORT ~BundleReader(); + BundleReader(Env* const env, StringPiece prefix); + ~BundleReader(); // Is ok() iff the reader construction is successful (completed the read of // the metadata). - TF_EXPORT Status status() const { return status_; } + Status status() const { return status_; } // Queries whether the bundle contains an entry keyed by "key". Calls Seek() // internally, so this call invalidates the reader's current position. // REQUIRES: status().ok() - TF_EXPORT bool Contains(StringPiece key); + bool Contains(StringPiece key); // Looks up the dtype and the shape of the tensor keyed by "key". // REQUIRES: status().ok() - TF_EXPORT Status LookupDtypeAndShape(StringPiece key, DataType* dtype, - TensorShape* shape) TF_MUST_USE_RESULT; + Status LookupDtypeAndShape(StringPiece key, DataType* dtype, + TensorShape* shape) TF_MUST_USE_RESULT; // Looks up the shape of the tensor keyed by "key". // Clears "shape" if not found. // REQUIRES: status().ok() - TF_EXPORT Status LookupTensorShape(StringPiece key, - TensorShape* shape) TF_MUST_USE_RESULT; + Status LookupTensorShape(StringPiece key, + TensorShape* shape) TF_MUST_USE_RESULT; // Looks up the tensor keyed by "key". If "key" refers to a partitioned // tensor, attempts to look up the full contents using all stored slices. @@ -217,7 +217,7 @@ class BundleReader { // // Validates the stored crc32c checksum against the restored bytes. // REQUIRES: status().ok() - TF_EXPORT Status Lookup(StringPiece key, Tensor* val) TF_MUST_USE_RESULT; + Status Lookup(StringPiece key, Tensor* val) TF_MUST_USE_RESULT; // Looks up the tensor pointed to by the internal iterator. // @@ -225,7 +225,7 @@ class BundleReader { // // Validates the stored crc32c checksum against the restored bytes. // REQUIRES: status().ok() && Valid() - TF_EXPORT Status ReadCurrent(Tensor* val) TF_MUST_USE_RESULT; + Status ReadCurrent(Tensor* val) TF_MUST_USE_RESULT; // Looks up the slices of the tensor keyed by "key". On OK, "slices" // is non-empty if and only if the tensor is a partitioned tensor. @@ -234,35 +234,34 @@ class BundleReader { // a slice with a larger start index in some dimension could come before // another slice with a smaller start index in the same dimension. // REQUIRES: status().ok() - TF_EXPORT Status LookupTensorSlices( - StringPiece key, std::vector* slices) TF_MUST_USE_RESULT; + Status LookupTensorSlices(StringPiece key, std::vector* slices) + TF_MUST_USE_RESULT; // Looks up a specific slice of a partitioned tensor. // It is only required that the stored slices cover the requested slice, // namely "slice_spec" is a subset of the union of the stored slices. // REQUIRES: status().ok() - TF_EXPORT Status LookupSlice(StringPiece full_tensor_key, - const TensorSlice& slice_spec, - Tensor* val) TF_MUST_USE_RESULT; + Status LookupSlice(StringPiece full_tensor_key, const TensorSlice& slice_spec, + Tensor* val) TF_MUST_USE_RESULT; // Seeks to the first position in the bundle whose key is no less than "key". // REQUIRES: status().ok() - TF_EXPORT void Seek(StringPiece key) { return iter_->Seek(key); } + void Seek(StringPiece key) { return iter_->Seek(key); } // Moves to the next position in the bundle. // REQUIRES: status().ok() - TF_EXPORT void Next() const { iter_->Next(); } + void Next() const { iter_->Next(); } // Returns true iff the reader is positioned to a key/val pair. // REQUIRES: status().ok() - TF_EXPORT bool Valid() const { return iter_->Valid(); } + bool Valid() const { return iter_->Valid(); } // Returns the key at the current position. // REQUIRES: status().ok() && Valid() - TF_EXPORT StringPiece key() const { return iter_->key(); } + StringPiece key() const { return iter_->key(); } // Returns the raw value at the current position. // REQUIRES: status().ok() && Valid() - TF_EXPORT StringPiece value() const { return iter_->value(); } + StringPiece value() const { return iter_->value(); } - TF_EXPORT string DebugString(); + string DebugString(); private: // Seeks for "key" and reads the metadata proto. diff --git a/tensorflow/tools/def_file_filter/def_file_filter.py.tpl b/tensorflow/tools/def_file_filter/def_file_filter.py.tpl index 8642a6d2e24..1049939c94b 100644 --- a/tensorflow/tools/def_file_filter/def_file_filter.py.tpl +++ b/tensorflow/tools/def_file_filter/def_file_filter.py.tpl @@ -51,11 +51,8 @@ INCLUDEPRE_RE = re.compile(r"google::protobuf::internal::ExplicitlyConstructed|" r"google::protobuf::internal::ArenaImpl::AddCleanup|" # for contrib/data/_prefetching_ops r"google::protobuf::internal::LogMessage|" # for contrib/data/_prefetching_ops r"google::protobuf::Arena::OnArenaAllocation|" # for contrib/data/_prefetching_ops - r"google::protobuf::Message::InitializationErrorString|" - r"google::protobuf::MessageLite::ParseFromArray|" r"absl::Mutex::ReaderLock|" # for //tensorflow/contrib/rnn:python/ops/_gru_ops.so and more ops r"absl::Mutex::ReaderUnlock|" # for //tensorflow/contrib/rnn:python/ops/_gru_ops.so and more ops - r"tensorflow::TensorShape|" r"tensorflow::internal::LogMessage|" r"tensorflow::internal::LogString|" r"tensorflow::internal::CheckOpMessageBuilder|" From fd87e24980083e179bf93430cb6a1f920b9839d2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 10 Aug 2020 09:32:46 -0700 Subject: [PATCH 2442/2522] Adding total-order comparison support in proto and HloInstruction. Specifically a comparison type attribute is added to Hlo proto so that total order comparison can be explicitly specified. A comparison expander pass is added to all compilers to expand total order comparison into equivalent implementations through type conversion. PiperOrigin-RevId: 325820826 Change-Id: I7beceb2f751ddc0be7c6b7a74037e562e7580b62 --- tensorflow/compiler/xla/client/lib/BUILD | 6 +- .../compiler/xla/client/lib/comparators.cc | 90 ++---------- .../compiler/xla/client/lib/comparators.h | 7 +- tensorflow/compiler/xla/client/xla_builder.cc | 64 ++++++++- tensorflow/compiler/xla/client/xla_builder.h | 45 +++--- tensorflow/compiler/xla/comparison_util.cc | 57 +++++--- tensorflow/compiler/xla/comparison_util.h | 6 +- .../compiler/xla/g3doc/operation_semantics.md | 5 +- tensorflow/compiler/xla/primitive_util.cc | 15 ++ tensorflow/compiler/xla/primitive_util.h | 2 + tensorflow/compiler/xla/service/BUILD | 20 ++- .../xla/service/comparison_expander.cc | 133 ++++++++++++++++++ .../xla/service/comparison_expander.h | 47 +++++++ tensorflow/compiler/xla/service/cpu/BUILD | 1 + .../compiler/xla/service/cpu/cpu_compiler.cc | 2 + tensorflow/compiler/xla/service/gpu/BUILD | 1 + .../compiler/xla/service/gpu/gpu_compiler.cc | 4 + tensorflow/compiler/xla/service/hlo.proto | 5 +- .../compiler/xla/service/hlo_instruction.cc | 20 ++- .../compiler/xla/service/hlo_instruction.h | 3 +- .../compiler/xla/service/hlo_instructions.cc | 26 ++-- .../compiler/xla/service/hlo_instructions.h | 4 +- tensorflow/compiler/xla/service/hlo_parser.cc | 29 +++- .../compiler/xla/service/hlo_parser_test.cc | 4 +- .../compiler/xla/service/interpreter/BUILD | 1 + .../xla/service/interpreter/compiler.cc | 2 + .../xla/tests/array_elementwise_ops_test.cc | 22 +++ 27 files changed, 472 insertions(+), 149 deletions(-) create mode 100644 tensorflow/compiler/xla/service/comparison_expander.cc create mode 100644 tensorflow/compiler/xla/service/comparison_expander.h diff --git a/tensorflow/compiler/xla/client/lib/BUILD b/tensorflow/compiler/xla/client/lib/BUILD index 06fd8ceeb2b..a3c7c39e3ff 100644 --- a/tensorflow/compiler/xla/client/lib/BUILD +++ b/tensorflow/compiler/xla/client/lib/BUILD @@ -55,9 +55,13 @@ xla_test( cc_library( name = "comparators", srcs = ["comparators.cc"], - hdrs = ["comparators.h"], + hdrs = [ + "comparators.h", + "//tensorflow/compiler/xla:literal_util", + ], deps = [ ":constants", + "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:xla_data_proto_cc", diff --git a/tensorflow/compiler/xla/client/lib/comparators.cc b/tensorflow/compiler/xla/client/lib/comparators.cc index 74e89b767cf..cd594a5cf39 100644 --- a/tensorflow/compiler/xla/client/lib/comparators.cc +++ b/tensorflow/compiler/xla/client/lib/comparators.cc @@ -32,85 +32,13 @@ limitations under the License. namespace xla { namespace { -using XlaOpGenerator = XlaOp (*)(XlaOp, XlaOp, absl::Span); - -XlaOp BitcastConvertFloatingPointToIntegral(const XlaOp& value, - int64 bit_width) { - PrimitiveType signed_type; - PrimitiveType unsigned_type; - XlaOp max_value; - switch (bit_width) { - case 16: - max_value = - ConstantR0(value.builder(), - static_cast(std::numeric_limits::max())); - signed_type = S16; - unsigned_type = U16; - break; - case 32: - max_value = - ConstantR0(value.builder(), - static_cast(std::numeric_limits::max())); - signed_type = S32; - unsigned_type = U32; - break; - case 64: - max_value = - ConstantR0(value.builder(), - static_cast(std::numeric_limits::max())); - signed_type = S64; - unsigned_type = U64; - break; - default: - return value.builder()->ReportError( - InvalidArgument("Invalid bit width %lld for Comparator floating " - "point parameter.", - bit_width)); - } - // Switch from a floating point value to a integer value in such a way that - // when using the integer value to compare, we get the same result for normal - // values, and -Nan is treated as the smallest value, and Nan is treated as - // the largest value. - // If f is a float, and - // x = bit_cast(f); - // y = x < 0 ? numeric_limits::max() - x : x; - // then y is ordered as an int32 such that finite values have the obvious - // order, -0 is ordered before 0, and -NaN and NaN appear at the beginning - // and end of the ordering. - // Note that in order to avoid -x to overflow, we calculate - // numeric_limits::max() - x as unsigned, and then convert back to - // signed. - auto signed_value = BitcastConvertType(value, signed_type); - auto unsigned_value = BitcastConvertType(value, unsigned_type); - auto flipped_value = - BitcastConvertType(Sub(max_value, unsigned_value), signed_type); - auto is_negative = Lt(signed_value, Zero(value.builder(), signed_type)); - return Select(is_negative, flipped_value, signed_value); -} - -void ConvertFloatingPoint(const PrimitiveType& operand_type, XlaOp* lhs_param, - XlaOp* rhs_param) { - if (primitive_util::IsFloatingPointType(operand_type)) { - PrimitiveType compare_type = operand_type; - // Special-case handling for BF16. We currently do not support direct - // comparisons with BF16, so we convert to F32 and then use the F32 - // comparison logic. - if (compare_type == BF16) { - compare_type = F32; - *lhs_param = ConvertElementType(*lhs_param, F32); - *rhs_param = ConvertElementType(*rhs_param, F32); - } - int64 bit_width = primitive_util::BitWidth(compare_type); - *lhs_param = BitcastConvertFloatingPointToIntegral(*lhs_param, bit_width); - *rhs_param = BitcastConvertFloatingPointToIntegral(*rhs_param, bit_width); - } -} +using XlaCompareOp = XlaOp (*)(XlaOp, XlaOp, absl::Span); XlaComputation CreateScalarComparisonComputation( const string& name, const std::vector& operand_types, - XlaBuilder* builder, XlaOpGenerator generator) { + XlaBuilder* builder, XlaCompareOp generator) { CHECK_NE(operand_types.size(), 0); - std::vector> generators(operand_types.size()); + std::vector> generators(operand_types.size()); generators[0] = generator; return CreateScalarComparisonComputation(name, operand_types, generators, builder); @@ -119,7 +47,7 @@ XlaComputation CreateScalarComparisonComputation( XlaComputation CreateScalarComparisonComputation( const string& name, const std::vector& operand_types, - const std::vector>& generators, + const std::vector>& generators, XlaBuilder* builder) { // Create a default computation where we compare only the first two // parameters of type 'operand_types[0]'. @@ -146,7 +74,6 @@ XlaComputation CreateScalarComparisonComputation( absl::StrCat("p.", parameter_count, ".lhs")); auto rhs_param = Parameter(b.get(), parameter_count * 2 + 1, scalar_shape, absl::StrCat("p.", parameter_count, ".rhs")); - ConvertFloatingPoint(operand_type, &lhs_param, &rhs_param); lhs_params.emplace_back(lhs_param); rhs_params.emplace_back(rhs_param); if (generators[parameter_count].has_value()) { @@ -169,7 +96,8 @@ XlaComputation CreateScalarComparisonComputation( generators[i].value()(lhs_params[i], rhs_params[i], {}), result); if (i != last_generator_index) { - param_equal = And(param_equal, Eq(lhs_params[i], rhs_params[i])); + param_equal = + And(param_equal, EqTotalOrder(lhs_params[i], rhs_params[i])); } } } @@ -181,14 +109,14 @@ XlaComputation CreateScalarComparisonComputation( XlaComputation CreateScalarLtComputation( const std::vector& operand_types, XlaBuilder* builder) { return CreateScalarComparisonComputation("compare-less-than", operand_types, - builder, Lt); + builder, LtTotalOrder); } // Creates a scalar greater-than computation and returns it. XlaComputation CreateScalarGtComputation( const std::vector& operand_types, XlaBuilder* builder) { - return CreateScalarComparisonComputation("compare-greater-than", - operand_types, builder, Gt); + return CreateScalarComparisonComputation( + "compare-greater-than", operand_types, builder, GtTotalOrder); } } // namespace xla diff --git a/tensorflow/compiler/xla/client/lib/comparators.h b/tensorflow/compiler/xla/client/lib/comparators.h index 25924d4a4f4..a82a84799aa 100644 --- a/tensorflow/compiler/xla/client/lib/comparators.h +++ b/tensorflow/compiler/xla/client/lib/comparators.h @@ -43,14 +43,13 @@ XlaComputation CreateScalarGtComputation( const std::vector& operand_types, XlaBuilder* builder); // Creates a scalar comparison computation and returns it. This function takes -// an std::vector> and compare the operands -// where the generator isn't nullopt with the specified comparator -// at that location. +// a vector of comparator functions to compare the operands where the function +// isn't nullopt with the specified comparator at that location. XlaComputation CreateScalarComparisonComputation( const string& name, const std::vector& operand_types, const std::vector< absl::optional)>>& - generators, + comparators, XlaBuilder* builder); } // namespace xla diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index 8de8216c005..2b69c71042d 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -577,7 +577,8 @@ XlaOp XlaBuilder::UnaryOp(HloOpcode unop, XlaOp operand) { XlaOp XlaBuilder::BinaryOp(HloOpcode binop, XlaOp lhs, XlaOp rhs, absl::Span broadcast_dimensions, - absl::optional direction) { + absl::optional direction, + absl::optional type) { return ReportErrorOrReturn([&]() -> StatusOr { TF_ASSIGN_OR_RETURN(const Shape* lhs_shape, GetShapePtr(lhs)); TF_ASSIGN_OR_RETURN(const Shape* rhs_shape, GetShapePtr(rhs)); @@ -635,7 +636,11 @@ XlaOp XlaBuilder::BinaryOp(HloOpcode binop, XlaOp lhs, XlaOp rhs, return InvalidArgument( "kCompare expects a ComparisonDirection, but none provided."); } - return Compare(shape, updated_lhs, updated_rhs, *direction); + if (type == absl::nullopt) { + return Compare(shape, updated_lhs, updated_rhs, *direction); + } else { + return Compare(shape, updated_lhs, updated_rhs, *direction, *type); + } } if (direction.has_value()) { @@ -658,8 +663,16 @@ XlaOp XlaBuilder::BinaryOpNoBroadcast(HloOpcode binop, const Shape& shape, StatusOr XlaBuilder::Compare(const Shape& shape, XlaOp lhs, XlaOp rhs, ComparisonDirection direction) { + return Compare(shape, lhs, rhs, direction, + Comparison::DefaultComparisonType(shape.element_type())); +} + +StatusOr XlaBuilder::Compare(const Shape& shape, XlaOp lhs, XlaOp rhs, + ComparisonDirection direction, + Comparison::Type type) { HloInstructionProto instr; instr.set_comparison_direction(ComparisonDirectionToString(direction)); + instr.set_comparison_type(ComparisonTypeToString(type)); *instr.mutable_shape() = shape.ToProto(); return AddInstruction(std::move(instr), HloOpcode::kCompare, {lhs, rhs}); } @@ -3512,31 +3525,71 @@ XlaOp Eq(const XlaOp lhs, const XlaOp rhs, return Compare(lhs, rhs, broadcast_dimensions, ComparisonDirection::kEq); } +XlaOp EqTotalOrder(const XlaOp lhs, const XlaOp rhs, + absl::Span broadcast_dimensions) { + auto compare_type = Comparison::Type::kFloatTotalOrder; + return Compare(lhs, rhs, broadcast_dimensions, ComparisonDirection::kEq, + compare_type); +} + XlaOp Ne(const XlaOp lhs, const XlaOp rhs, absl::Span broadcast_dimensions) { return Compare(lhs, rhs, broadcast_dimensions, ComparisonDirection::kNe); } +XlaOp NeTotalOrder(const XlaOp lhs, const XlaOp rhs, + absl::Span broadcast_dimensions) { + auto compare_type = Comparison::Type::kFloatTotalOrder; + return Compare(lhs, rhs, broadcast_dimensions, ComparisonDirection::kNe, + compare_type); +} + XlaOp Ge(const XlaOp lhs, const XlaOp rhs, absl::Span broadcast_dimensions) { return Compare(lhs, rhs, broadcast_dimensions, ComparisonDirection::kGe); } +XlaOp GeTotalOrder(const XlaOp lhs, const XlaOp rhs, + absl::Span broadcast_dimensions) { + auto compare_type = Comparison::Type::kFloatTotalOrder; + return Compare(lhs, rhs, broadcast_dimensions, ComparisonDirection::kGe, + compare_type); +} + XlaOp Gt(const XlaOp lhs, const XlaOp rhs, absl::Span broadcast_dimensions) { return Compare(lhs, rhs, broadcast_dimensions, ComparisonDirection::kGt); } +XlaOp GtTotalOrder(const XlaOp lhs, const XlaOp rhs, + absl::Span broadcast_dimensions) { + auto compare_type = Comparison::Type::kFloatTotalOrder; + return Compare(lhs, rhs, broadcast_dimensions, ComparisonDirection::kGt, + compare_type); +} + XlaOp Le(const XlaOp lhs, const XlaOp rhs, absl::Span broadcast_dimensions) { return Compare(lhs, rhs, broadcast_dimensions, ComparisonDirection::kLe); } +XlaOp LeTotalOrder(const XlaOp lhs, const XlaOp rhs, + absl::Span broadcast_dimensions) { + auto compare_type = Comparison::Type::kFloatTotalOrder; + return Compare(lhs, rhs, broadcast_dimensions, ComparisonDirection::kLe, + compare_type); +} XlaOp Lt(const XlaOp lhs, const XlaOp rhs, absl::Span broadcast_dimensions) { return Compare(lhs, rhs, broadcast_dimensions, ComparisonDirection::kLt); } +XlaOp LtTotalOrder(const XlaOp lhs, const XlaOp rhs, + absl::Span broadcast_dimensions) { + return Compare(lhs, rhs, broadcast_dimensions, ComparisonDirection::kLt, + Comparison::Type::kFloatTotalOrder); +} + XlaOp Compare(const XlaOp lhs, const XlaOp rhs, absl::Span broadcast_dimensions, ComparisonDirection direction) { @@ -3544,6 +3597,13 @@ XlaOp Compare(const XlaOp lhs, const XlaOp rhs, broadcast_dimensions, direction); } +XlaOp Compare(const XlaOp lhs, const XlaOp rhs, + absl::Span broadcast_dimensions, + ComparisonDirection direction, Comparison::Type compare_type) { + return lhs.builder()->BinaryOp(HloOpcode::kCompare, lhs, rhs, + broadcast_dimensions, direction, compare_type); +} + XlaOp Compare(const XlaOp lhs, const XlaOp rhs, ComparisonDirection direction) { return Compare(lhs, rhs, {}, direction); } diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h index 6753b6dd919..6d30195d3d0 100644 --- a/tensorflow/compiler/xla/client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_builder.h @@ -792,14 +792,17 @@ class XlaBuilder { // broadcast_dimensions specifies which dimensions to use for broadcasting // when the operation is between tensors of different ranks. The direction is // only used if opcode is kCompare. - XlaOp BinaryOp( - HloOpcode binop, XlaOp lhs, XlaOp rhs, - absl::Span broadcast_dimensions, - absl::optional direction = absl::nullopt); + XlaOp BinaryOp(HloOpcode binop, XlaOp lhs, XlaOp rhs, + absl::Span broadcast_dimensions, + absl::optional direction = absl::nullopt, + absl::optional type = absl::nullopt); // Internal helper method for binary op compare without broadcast dimensions. virtual StatusOr Compare(const Shape& shape, XlaOp lhs, XlaOp rhs, - Comparison::Direction direction); + ComparisonDirection direction); + virtual StatusOr Compare(const Shape& shape, XlaOp lhs, XlaOp rhs, + ComparisonDirection direction, + Comparison::Type type); // Internal helper method that does the building for an arbitrary binary op // with same ranked operands that doesn't broadcast. @@ -965,22 +968,13 @@ class XlaBuilder { friend XlaOp Select(XlaOp pred, XlaOp on_true, XlaOp on_false); friend XlaOp Tuple(XlaBuilder* builder, absl::Span elements); friend XlaOp GetTupleElement(XlaOp tuple_data, int64 index); - friend XlaOp Eq(XlaOp lhs, XlaOp rhs, - absl::Span broadcast_dimensions); - friend XlaOp Ne(XlaOp lhs, XlaOp rhs, - absl::Span broadcast_dimensions); - friend XlaOp Ge(XlaOp lhs, XlaOp rhs, - absl::Span broadcast_dimensions); - friend XlaOp Gt(XlaOp lhs, XlaOp rhs, - absl::Span broadcast_dimensions); - friend XlaOp Lt(XlaOp lhs, XlaOp rhs, - absl::Span broadcast_dimensions); - friend XlaOp Le(XlaOp lhs, XlaOp rhs, - absl::Span broadcast_dimensions); friend XlaOp Compare(XlaOp lhs, XlaOp rhs, absl::Span broadcast_dimensions, ComparisonDirection direction); - friend XlaOp Compare(XlaOp lhs, XlaOp rhs, ComparisonDirection direction); + friend XlaOp Compare(XlaOp lhs, XlaOp rhs, + absl::Span broadcast_dimensions, + ComparisonDirection direction, + Comparison::Type compare_type); friend XlaOp Dot(XlaOp lhs, XlaOp rhs, const PrecisionConfig* precision_config); friend XlaOp DotGeneral(XlaOp lhs, XlaOp rhs, @@ -1574,29 +1568,44 @@ XlaOp GetTupleElement(XlaOp tuple_data, int64 index); // Enqueues an equal-to comparison instruction onto the computation. XlaOp Eq(XlaOp lhs, XlaOp rhs, absl::Span broadcast_dimensions = {}); +XlaOp EqTotalOrder(XlaOp lhs, XlaOp rhs, + absl::Span broadcast_dimensions = {}); // Enqueues a not-equal comparison instruction onto the computation. XlaOp Ne(XlaOp lhs, XlaOp rhs, absl::Span broadcast_dimensions = {}); +XlaOp NeTotalOrder(XlaOp lhs, XlaOp rhs, + absl::Span broadcast_dimensions = {}); // Enqueues a greater-or-equal comparison instruction onto the computation. XlaOp Ge(XlaOp lhs, XlaOp rhs, absl::Span broadcast_dimensions = {}); +XlaOp GeTotalOrder(XlaOp lhs, XlaOp rhs, + absl::Span broadcast_dimensions = {}); // Enqueues a greater-than comparison instruction onto the computation. XlaOp Gt(XlaOp lhs, XlaOp rhs, absl::Span broadcast_dimensions = {}); +XlaOp GtTotalOrder(XlaOp lhs, XlaOp rhs, + absl::Span broadcast_dimensions = {}); // Enqueues a less-than comparison instruction onto the computation. XlaOp Lt(XlaOp lhs, XlaOp rhs, absl::Span broadcast_dimensions = {}); +XlaOp LtTotalOrder(XlaOp lhs, XlaOp rhs, + absl::Span broadcast_dimensions = {}); // Enqueues a less-or-equal comparison instruction onto the computation. XlaOp Le(XlaOp lhs, XlaOp rhs, absl::Span broadcast_dimensions = {}); +XlaOp LeTotalOrder(XlaOp lhs, XlaOp rhs, + absl::Span broadcast_dimensions = {}); // Enqueues a comparison instruction onto the computation (optionally without // broadcast_dimensions for consistency with others). +XlaOp Compare(XlaOp lhs, XlaOp rhs, + absl::Span broadcast_dimensions, + ComparisonDirection direction, Comparison::Type compare_type); XlaOp Compare(XlaOp lhs, XlaOp rhs, absl::Span broadcast_dimensions, ComparisonDirection direction); diff --git a/tensorflow/compiler/xla/comparison_util.cc b/tensorflow/compiler/xla/comparison_util.cc index 47fb69e3bce..06dd9642cac 100644 --- a/tensorflow/compiler/xla/comparison_util.cc +++ b/tensorflow/compiler/xla/comparison_util.cc @@ -54,32 +54,59 @@ StatusOr StringToComparisonDirection( return it->second; } -Comparison::Comparison(Direction dir, PrimitiveType type) : dir_(dir) { +StatusOr StringToComparisonType( + absl::string_view compare_type_name) { + static auto* type_map = new absl::flat_hash_map({ + {"FLOAT", Comparison::Type::kFloat}, + {"TOTALORDER", Comparison::Type::kFloatTotalOrder}, + {"SIGNED", Comparison::Type::kSigned}, + {"UNSIGNED", Comparison::Type::kUnsigned}, + }); + auto it = type_map->find(compare_type_name); + if (it == type_map->end()) { + return InvalidArgument("Unknown comparison type: %s", compare_type_name); + } + return it->second; +} + +std::string ComparisonTypeToString(Comparison::Type type) { + switch (type) { + case Comparison::Type::kFloat: + return "FLOAT"; + case Comparison::Type::kFloatTotalOrder: + return "TOTALORDER"; + case Comparison::Type::kSigned: + return "SIGNED"; + case Comparison::Type::kUnsigned: + return "UNSIGNED"; + } +} + +Comparison::Comparison(Direction dir, PrimitiveType type) + : dir_(dir), type_(DefaultComparisonType(type)) {} + +Comparison::Type Comparison::DefaultComparisonType(PrimitiveType type) { switch (type) { case S8: case S16: case S32: case S64: - type_ = Type::kSigned; - break; + return Type::kSigned; case PRED: case U8: case U16: case U32: case U64: - type_ = Type::kUnsigned; - break; + return Type::kUnsigned; case F16: case F32: case BF16: case F64: case C64: case C128: - type_ = Type::kFloat; - break; + return Type::kFloat; default: LOG(FATAL) << "Unsupported comparison mode." - << ComparisonDirectionToString(dir) << ":" << PrimitiveType_Name(type) << "\n"; } } @@ -164,20 +191,6 @@ bool Comparison::IsAntireflexive() const { } } -/* static */ const char* Comparison::ComparisonTypeToString( - Comparison::Type type) { - switch (type) { - case Type::kFloat: - return "f"; - case Type::kFloatTotalOrder: - return "ft"; - case Type::kSigned: - return "s"; - case Type::kUnsigned: - return "u"; - } -} - std::string Comparison::ToString(std::string prefix1, std::string prefix2) const { return prefix1 + std::string(ComparisonDirectionToString(dir_)) + prefix2 + diff --git a/tensorflow/compiler/xla/comparison_util.h b/tensorflow/compiler/xla/comparison_util.h index 11335c6b5ba..33ae2c67106 100644 --- a/tensorflow/compiler/xla/comparison_util.h +++ b/tensorflow/compiler/xla/comparison_util.h @@ -103,11 +103,11 @@ class Comparison { bool Compare(const T a, const T b) const { return GetComparator()(a, b); } + static Type DefaultComparisonType(PrimitiveType t); private: static Direction Converse(Direction dir); static Direction Inverse(Direction dir); - static const char* ComparisonTypeToString(Type type); const Direction dir_; Type type_; @@ -117,10 +117,14 @@ inline std::ostream& operator<<(std::ostream& os, const Comparison& cmp) { return os << cmp.ToString(); } string ComparisonDirectionToString(Comparison::Direction direction); +std::string ComparisonTypeToString(Comparison::Type type); StatusOr StringToComparisonDirection( absl::string_view direction_name); +StatusOr StringToComparisonType( + absl::string_view compare_type_name); + using ComparisonDirection = Comparison::Direction; } // namespace xla diff --git a/tensorflow/compiler/xla/g3doc/operation_semantics.md b/tensorflow/compiler/xla/g3doc/operation_semantics.md index 3031bfbf2e2..051c1539f6b 100644 --- a/tensorflow/compiler/xla/g3doc/operation_semantics.md +++ b/tensorflow/compiler/xla/g3doc/operation_semantics.md @@ -1235,7 +1235,10 @@ floating-point types. Where `Op` is one of `Eq` (equal-to), `Ne` (not equal-to), `Ge` (greater-or-equal-than), `Gt` (greater-than), `Le` (less-or-equal-than), `Lt` -(less-than). +(less-than). Another set of operators, EqTotalOrder, NeTotalOrder, GeTotalOrder, +GtTotalOrder, LeTotalOrder, and LtTotalOrder, provide the same functionalities, +except that they additionally support a total order over the floating point +numbers, by enforcing -NaN < -Inf < -Finite < -0 < +0 < +Finite < +Inf < +NaN. Arguments | Type | Semantics --------- | ------- | ---------------------------------------- diff --git a/tensorflow/compiler/xla/primitive_util.cc b/tensorflow/compiler/xla/primitive_util.cc index 2143d1dfbe7..c932469c56a 100644 --- a/tensorflow/compiler/xla/primitive_util.cc +++ b/tensorflow/compiler/xla/primitive_util.cc @@ -112,6 +112,21 @@ xla::PrimitiveType UnsignedIntegralTypeForBitWidth(int64 src_bitwidth) { } } +xla::PrimitiveType SignedIntegralTypeForBitWidth(int64 src_bitwidth) { + switch (src_bitwidth) { + case 8: + return xla::S8; + case 16: + return xla::S16; + case 32: + return xla::S32; + case 64: + return xla::S64; + default: + return xla::PRIMITIVE_TYPE_INVALID; + } +} + PrimitiveType ComplexComponentType(PrimitiveType complex_type) { switch (complex_type) { case C64: diff --git a/tensorflow/compiler/xla/primitive_util.h b/tensorflow/compiler/xla/primitive_util.h index 034c14e8930..1228b4f9a32 100644 --- a/tensorflow/compiler/xla/primitive_util.h +++ b/tensorflow/compiler/xla/primitive_util.h @@ -153,6 +153,8 @@ int BitWidth(PrimitiveType type); PrimitiveType UnsignedIntegralTypeForBitWidth(int64 src_bitwidth); +PrimitiveType SignedIntegralTypeForBitWidth(int64 src_bitwidth); + // Returns the real, imag component type underlying the given complex type. // LOG(FATAL)'s if complex_type is not complex. PrimitiveType ComplexComponentType(PrimitiveType complex_type); diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index bfcdf6fae34..fa7b480cab6 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1700,7 +1700,10 @@ cc_library( cc_library( name = "hlo_creation_utils", srcs = ["hlo_creation_utils.cc"], - hdrs = ["hlo_creation_utils.h"], + hdrs = [ + "hlo_creation_utils.h", + "//tensorflow/compiler/xla:literal_util", + ], deps = [ ":hlo", ":hlo_module_config", @@ -1816,6 +1819,21 @@ cc_library( ], ) +cc_library( + name = "comparison_expander", + srcs = ["comparison_expander.cc"], + hdrs = ["comparison_expander.h"], + deps = [ + ":hlo", + ":hlo_creation_utils", + ":hlo_pass", + ":op_expander_pass", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla/client/lib:comparators", + ], +) + cc_library( name = "scatter_expander", srcs = ["scatter_expander.cc"], diff --git a/tensorflow/compiler/xla/service/comparison_expander.cc b/tensorflow/compiler/xla/service/comparison_expander.cc new file mode 100644 index 00000000000..5c88ff8cae2 --- /dev/null +++ b/tensorflow/compiler/xla/service/comparison_expander.cc @@ -0,0 +1,133 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/comparison_expander.h" + +#include "tensorflow/compiler/xla/client/lib/comparators.h" +#include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_creation_utils.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_instructions.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/util.h" + +namespace xla { + +HloInstruction* BitcastConvertFloatingPointToIntegral( + HloComputation* computation, HloInstruction* value, + const Shape& signed_shape, const Shape& unsigned_shape, + HloInstruction* zero, HloInstruction* max_value) { + // Switch from a floating point value to a integer value in such a way that + // when using the integer value to compare, we get the same result for normal + // values, and -Nan is treated as the smallest value, and Nan is treated as + // the largest value. + // If f is a float, and + // x = bit_cast(f); + // y = x < 0 ? numeric_limits::max() - x : x; + // then y is ordered as an int32 such that finite values have the obvious + // order, -0 is ordered before 0, and -NaN and NaN appear at the beginning + // and end of the ordering. + // Note that in order to avoid -x to overflow, we calculate + // numeric_limits::max() - x as unsigned, and then convert back to + // signed. + auto signed_value = computation->AddInstruction( + HloInstruction::CreateBitcastConvert(signed_shape, value)); + auto unsigned_value = computation->AddInstruction( + HloInstruction::CreateBitcastConvert(unsigned_shape, value)); + auto flipped_value = computation->AddInstruction(HloInstruction::CreateBinary( + unsigned_shape, HloOpcode::kSubtract, max_value, unsigned_value)); + flipped_value = computation->AddInstruction( + HloInstruction::CreateBitcastConvert(signed_shape, flipped_value)); + auto compare_shape = signed_shape; + compare_shape.set_element_type(PRED); + auto is_negative = computation->AddInstruction(HloInstruction::CreateCompare( + compare_shape, signed_value, zero, ComparisonDirection::kLt)); + return computation->AddInstruction( + HloInstruction::CreateTernary(signed_shape, HloOpcode::kSelect, + is_negative, flipped_value, signed_value)); +} + +bool ComparisonExpander::InstructionMatchesPattern( + HloInstruction* instruction) { + if (HloCompareInstruction* compare = + dynamic_cast(instruction)) { + HloInstruction* lhs = instruction->operands()[0]; + if (compare->type() == Comparison::Type::kFloatTotalOrder && + primitive_util::IsFloatingPointType(lhs->shape().element_type())) { + return true; + } + } + return false; +} + +StatusOr ComparisonExpander::ExpandInstruction( + HloInstruction* instruction) { + CHECK(instruction->opcode() == HloOpcode::kCompare); + HloCompareInstruction* compare = + static_cast(instruction); + CHECK(compare->type() == Comparison::Type::kFloatTotalOrder); + HloComputation* computation = instruction->parent(); + HloInstruction* lhs = instruction->operands()[0]; + HloInstruction* rhs = instruction->operands()[1]; + Shape compare_shape = lhs->shape(); + PrimitiveType compare_type = compare_shape.element_type(); + CHECK(primitive_util::IsFloatingPointType(compare_type)); + // Special-case handling for BF16. We currently do not support direct + // comparisons with BF16, so we convert to F32 and then use the F32 + // comparison logic. + if (compare_type == BF16) { + compare_type = F32; + compare_shape.set_element_type(compare_type); + lhs = computation->AddInstruction( + HloInstruction::CreateConvert(compare_shape, lhs)); + rhs = computation->AddInstruction( + HloInstruction::CreateConvert(compare_shape, rhs)); + } + + int64 bit_width = primitive_util::BitWidth(compare_type); + PrimitiveType signed_type = + primitive_util::SignedIntegralTypeForBitWidth(bit_width); + PrimitiveType unsigned_type = + primitive_util::UnsignedIntegralTypeForBitWidth(bit_width); + auto signed_shape = compare_shape; + signed_shape.set_element_type(signed_type); + auto unsigned_shape = compare_shape; + unsigned_shape.set_element_type(unsigned_type); + auto zero_value = computation->AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::Zero(signed_type))); + zero_value = computation->AddInstruction(HloInstruction::CreateBroadcast( + signed_shape, zero_value, zero_value->shape().dimensions())); + auto max_signed = computation->AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::MaxValue(signed_type))); + auto max_shape = max_signed->shape(); + max_shape.set_element_type(unsigned_type); + auto max_unsigned = computation->AddInstruction( + HloInstruction::CreateConvert(max_shape, max_signed)); + auto max_value = computation->AddInstruction(HloInstruction::CreateBroadcast( + unsigned_shape, max_unsigned, max_shape.dimensions())); + lhs = BitcastConvertFloatingPointToIntegral( + computation, lhs, signed_shape, unsigned_shape, zero_value, max_value); + rhs = BitcastConvertFloatingPointToIntegral( + computation, rhs, signed_shape, unsigned_shape, zero_value, max_value); + auto new_compare = computation->AddInstruction(HloInstruction::CreateCompare( + instruction->shape(), lhs, rhs, compare->direction(), + Comparison::Type::kSigned)); + VLOG(2) << "New comparison instruction for total order:" + << new_compare->ToString() << "\n"; + return new_compare; +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/service/comparison_expander.h b/tensorflow/compiler/xla/service/comparison_expander.h new file mode 100644 index 00000000000..df8b5dc0137 --- /dev/null +++ b/tensorflow/compiler/xla/service/comparison_expander.h @@ -0,0 +1,47 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_COMPARISON_EXPANDER_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_COMPARISON_EXPANDER_H_ + +#include + +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_pass_interface.h" +#include "tensorflow/compiler/xla/service/op_expander_pass.h" + +namespace xla { + +// A pass which performs expansion of the comparison operator to support total +// order comparison of floating point numbers. +class ComparisonExpander : public OpExpanderPass { + public: + explicit ComparisonExpander() = default; + ~ComparisonExpander() override = default; + absl::string_view name() const override { return "comparison-expander"; } + + private: + // Returns `true` if `instruction` should be expanded by this pass. + bool InstructionMatchesPattern(HloInstruction* instruction) override; + // Returns a replacement for `instruction`, or nullptr if no replacement is + // needed (e.g. only the to_apply subcomputation of the instruction was + // modified). + StatusOr ExpandInstruction( + HloInstruction* instruction) override; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_COMPARISON_EXPANDER_H_ diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index e0317574e59..7c362b2da44 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -145,6 +145,7 @@ cc_library( "//tensorflow/compiler/xla/service:conditional_to_select", "//tensorflow/compiler/xla/service:slow_operation_alarm", "//tensorflow/compiler/xla/service:scatter_expander", + "//tensorflow/compiler/xla/service:comparison_expander", "//tensorflow/compiler/xla/service:slice_sinker", "//tensorflow/compiler/xla:cpu_function_runtime", "//tensorflow/compiler/xla:literal", diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index aab13f6e8dd..39d2b11ad37 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -54,6 +54,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/call_inliner.h" #include "tensorflow/compiler/xla/service/cholesky_expander.h" +#include "tensorflow/compiler/xla/service/comparison_expander.h" #include "tensorflow/compiler/xla/service/conditional_canonicalizer.h" #include "tensorflow/compiler/xla/service/conditional_simplifier.h" #include "tensorflow/compiler/xla/service/conditional_to_select.h" @@ -261,6 +262,7 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn( pipeline.AddPass(); pipeline.AddPass(); + pipeline.AddPass(); pipeline.AddPass(); pipeline.AddPass(); diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 47af5756f87..b9ba2100293 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -1168,6 +1168,7 @@ cc_library( "//tensorflow/compiler/xla/service:batchnorm_expander", "//tensorflow/compiler/xla/service:buffer_assignment", "//tensorflow/compiler/xla/service:call_inliner", + "//tensorflow/compiler/xla/service:comparison_expander", "//tensorflow/compiler/xla/service:conditional_canonicalizer", "//tensorflow/compiler/xla/service:conditional_simplifier", "//tensorflow/compiler/xla/service:convolution_4d_expander", diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 225fa328f3d..f5bf7476059 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -35,6 +35,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/batchnorm_expander.h" #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/call_inliner.h" +#include "tensorflow/compiler/xla/service/comparison_expander.h" #include "tensorflow/compiler/xla/service/conditional_canonicalizer.h" #include "tensorflow/compiler/xla/service/conditional_simplifier.h" #include "tensorflow/compiler/xla/service/convolution_4d_expander.h" @@ -140,6 +141,9 @@ Status GpuCompiler::OptimizeHloModule( pipeline.AddPass(); pipeline.AddPass(RandomAlgorithm::RNG_PHILOX); + // Comparison total order expander + pipeline.AddPass(); + // Remove zero-sized HLO from the input so that other passes don't have to // handle it. pipeline.AddPass(); diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index e043216c17e..17a7b18c84b 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -35,7 +35,7 @@ import "tensorflow/compiler/xla/xla_data.proto"; option cc_enable_arenas = true; // Serialization of HloInstruction. -// Next ID: 72 +// Next ID: 73 message HloInstructionProto { reserved 10; reserved "parameter_name"; @@ -248,6 +248,9 @@ message HloInstructionProto { // RNG algorithm used by kRngBitGenerator. xla.RandomAlgorithm rng_algorithm = 70; + + // The comparison type used for kCompare. + string comparison_type = 72; } // Serialization of HloComputation. diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 94d53ebe0b1..2ce3c12b4e9 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -174,8 +174,19 @@ StatusOr> HloInstruction::CreateFromProto( comparison_direction, StringToComparisonDirection(proto.comparison_direction())); } - instruction = - CreateCompare(shape, operands(0), operands(1), *comparison_direction); + auto comparison_type_str = proto.comparison_type(); + if (!comparison_type_str.empty()) { + // If a comparison type is specified, it *must* be valid. + TF_ASSIGN_OR_RETURN(auto comparison_type, + StringToComparisonType(comparison_type_str)); + instruction = CreateCompare(shape, operands(0), operands(1), + *comparison_direction, comparison_type); + } else { + // Allow the specify of comparison type to be optional. + // The comparison type will be determined by the types of the operands. + instruction = CreateCompare(shape, operands(0), operands(1), + *comparison_direction); + } break; } case HloOpcode::kTriangularSolve: { @@ -926,8 +937,9 @@ HloInstruction::CreateRngBitGenerator(const Shape& shape, HloInstruction* state, /* static */ std::unique_ptr HloInstruction::CreateCompare( const Shape& shape, HloInstruction* lhs, HloInstruction* rhs, - ComparisonDirection direction) { - return absl::make_unique(shape, lhs, rhs, direction); + ComparisonDirection direction, absl::optional type) { + return absl::make_unique(shape, lhs, rhs, direction, + type); } /* static */ std::unique_ptr diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index e29323c25b4..bdd64c908f0 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -595,7 +595,8 @@ class HloInstruction { // Creates a compare op, performing the comparison specified in direction. static std::unique_ptr CreateCompare( const Shape& shape, HloInstruction* lhs, HloInstruction* rhs, - Comparison::Direction direction); + Comparison::Direction direction, + absl::optional type = absl::nullopt); static std::unique_ptr CreateTriangularSolve( const Shape& shape, HloInstruction* a, HloInstruction* b, diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc index 3d34fa03a80..dbc1d85d1bb 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.cc +++ b/tensorflow/compiler/xla/service/hlo_instructions.cc @@ -204,12 +204,13 @@ std::unique_ptr HloFftInstruction::CloneWithNewOperandsImpl( fft_length_); } -HloCompareInstruction::HloCompareInstruction(const Shape& shape, - HloInstruction* lhs, - HloInstruction* rhs, - ComparisonDirection direction) +HloCompareInstruction::HloCompareInstruction( + const Shape& shape, HloInstruction* lhs, HloInstruction* rhs, + ComparisonDirection direction, absl::optional type) : HloInstruction(HloOpcode::kCompare, shape), - compare_(direction, lhs->shape().element_type()) { + compare_(direction, type ? (*type) + : Comparison::DefaultComparisonType( + lhs->shape().element_type())) { AppendOperand(lhs); AppendOperand(rhs); } @@ -218,12 +219,21 @@ HloInstructionProto HloCompareInstruction::ToProto() const { HloInstructionProto proto = HloInstruction::ToProto(); proto.set_comparison_direction( ComparisonDirectionToString(compare_.GetDirection())); + proto.set_comparison_type(ComparisonTypeToString(compare_.GetType())); return proto; } std::vector HloCompareInstruction::ExtraAttributesToStringImpl( const HloPrintOptions& options) const { - return {StrCat("direction=", ComparisonDirectionToString(direction()))}; + std::vector result; + result.push_back( + StrCat("direction=", ComparisonDirectionToString(direction()))); + if (compare_.GetType() != + Comparison::DefaultComparisonType(operand(0)->shape().element_type())) { + result.push_back( + StrCat("type=", ComparisonTypeToString(compare_.GetType()))); + } + return result; } bool HloCompareInstruction::IdenticalSlowPath( @@ -238,8 +248,8 @@ std::unique_ptr HloCompareInstruction::CloneWithNewOperandsImpl( const Shape& shape, absl::Span new_operands, HloCloneContext* context) const { CHECK_EQ(new_operands.size(), 2); - return absl::make_unique(shape, new_operands[0], - new_operands[1], direction()); + return absl::make_unique( + shape, new_operands[0], new_operands[1], direction(), type()); } namespace { diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h index 51317b32bd0..3f92bb92f02 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.h +++ b/tensorflow/compiler/xla/service/hlo_instructions.h @@ -136,8 +136,10 @@ class HloCompareInstruction : public HloInstruction { public: explicit HloCompareInstruction(const Shape& shape, HloInstruction* lhs, HloInstruction* rhs, - ComparisonDirection direction); + ComparisonDirection direction, + absl::optional type); ComparisonDirection direction() const { return compare_.GetDirection(); } + Comparison::Type type() const { return compare_.GetType(); } HloInstructionProto ToProto() const override; private: diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index a093a9d0f52..2afa06a5df4 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -194,6 +194,7 @@ class HloParserImpl : public HloParser { kBracedHloComputationList, kFftType, kComparisonDirection, + kComparisonType, kWindow, kConvolutionDimensionNumbers, kSharding, @@ -327,6 +328,7 @@ class HloParserImpl : public HloParser { bool ParseOpcode(HloOpcode* result); bool ParseFftType(FftType* result); bool ParseComparisonDirection(ComparisonDirection* result); + bool ParseComparisonType(Comparison::Type* result); bool ParseFusionKind(HloInstruction::FusionKind* result); bool ParseRandomDistribution(RandomDistribution* result); bool ParseRandomAlgorithm(RandomAlgorithm* result); @@ -1362,14 +1364,16 @@ bool HloParserImpl::ParseInstructionRhs(HloComputation::Builder* builder, } case HloOpcode::kCompare: { optional direction; + optional type; attrs["direction"] = {/*required=*/true, AttrTy::kComparisonDirection, &direction}; + attrs["type"] = {/*required=*/false, AttrTy::kComparisonType, &type}; if (!ParseOperands(&operands, /*expected_size=*/2) || !ParseAttributes(attrs)) { return false; } instruction = builder->AddInstruction(HloInstruction::CreateCompare( - shape, operands[0], operands[1], *direction)); + shape, operands[0], operands[1], *direction, type)); break; } case HloOpcode::kCholesky: { @@ -3018,6 +3022,14 @@ bool HloParserImpl::ParseAttributeHelper( ->emplace(result); return true; } + case AttrTy::kComparisonType: { + Comparison::Type result; + if (!ParseComparisonType(&result)) { + return false; + } + static_cast*>(attr_out_ptr)->emplace(result); + return true; + } case AttrTy::kEnum: { if (lexer_.GetKind() != TokKind::kIdent) { return TokenError("expects an enumeration value"); @@ -4145,6 +4157,21 @@ bool HloParserImpl::ParseComparisonDirection(ComparisonDirection* result) { return true; } +bool HloParserImpl::ParseComparisonType(Comparison::Type* result) { + VLOG(1) << "ParseComparisonType"; + if (lexer_.GetKind() != TokKind::kIdent) { + return TokenError("expects comparison type"); + } + std::string val = lexer_.GetStrVal(); + auto status_or_result = StringToComparisonType(val); + if (!status_or_result.ok()) { + return TokenError(StrFormat("expects comparison type but sees: %s", val)); + } + *result = status_or_result.ValueOrDie(); + lexer_.Lex(); + return true; +} + bool HloParserImpl::ParseFusionKind(HloInstruction::FusionKind* result) { VLOG(3) << "ParseFusionKind"; if (lexer_.GetKind() != TokKind::kIdent) { diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc index 7880075dcbe..aba6aeff999 100644 --- a/tensorflow/compiler/xla/service/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc @@ -230,7 +230,7 @@ R"(HloModule SelectR1F32WithCmpR1F32sFromParamsSmall_module ENTRY %SelectR1F32WithCmpR1F32sFromParamsSmall.v4 (v1: f32[4], v2: f32[4]) -> f32[4] { %v1 = f32[4]{0} parameter(0), sharding={maximal device=1} %v2 = f32[4]{0} parameter(1), sharding={maximal device=1} - %greater-than = pred[4]{0} compare(f32[4]{0} %v1, f32[4]{0} %v2), direction=GT, sharding={replicated} + %greater-than = pred[4]{0} compare(f32[4]{0} %v1, f32[4]{0} %v2), direction=GT, type=TOTALORDER, sharding={replicated} ROOT %select = f32[4]{0} select(pred[4]{0} %greater-than, f32[4]{0} %v1, f32[4]{0} %v2), sharding={} } @@ -512,7 +512,7 @@ R"(HloModule R4F32OverlapSmall_module %ge_F32.v3 (lhs: f32[], rhs: f32[]) -> pred[] { %lhs = f32[] parameter(0) %rhs = f32[] parameter(1) - ROOT %greater-than-or-equal-to = pred[] compare(f32[] %lhs, f32[] %rhs), direction=GE + ROOT %greater-than-or-equal-to = pred[] compare(f32[] %lhs, f32[] %rhs), direction=GE, type=TOTALORDER } %add_F32.v3 (lhs.1: f32[], rhs.1: f32[]) -> f32[] { diff --git a/tensorflow/compiler/xla/service/interpreter/BUILD b/tensorflow/compiler/xla/service/interpreter/BUILD index 7a4eefc1ab6..3444d4cae42 100644 --- a/tensorflow/compiler/xla/service/interpreter/BUILD +++ b/tensorflow/compiler/xla/service/interpreter/BUILD @@ -34,6 +34,7 @@ cc_library( "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla/service:algebraic_simplifier", "//tensorflow/compiler/xla/service:cholesky_expander", + "//tensorflow/compiler/xla/service:comparison_expander", "//tensorflow/compiler/xla/service:compiler", "//tensorflow/compiler/xla/service:computation_placer", "//tensorflow/compiler/xla/service:custom_call_target_registry", diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc index 1649be2ca8f..a059482d832 100644 --- a/tensorflow/compiler/xla/service/interpreter/compiler.cc +++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc @@ -21,6 +21,7 @@ limitations under the License. #include "absl/memory/memory.h" #include "tensorflow/compiler/xla/service/algebraic_simplifier.h" #include "tensorflow/compiler/xla/service/cholesky_expander.h" +#include "tensorflow/compiler/xla/service/comparison_expander.h" #include "tensorflow/compiler/xla/service/computation_placer.h" #include "tensorflow/compiler/xla/service/custom_call_target_registry.h" #include "tensorflow/compiler/xla/service/dynamic_index_splitter.h" @@ -81,6 +82,7 @@ Status InterpreterCompiler::RunHloOptimization(HloModule* hlo_module) { pipeline.AddPass(); pipeline.AddPass(); + pipeline.AddPass(); pipeline.AddPass(); pipeline.AddPass( hlo_module->mutable_entry_computation_layout(), diff --git a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc index a956b85a940..fdc679a61c6 100644 --- a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc +++ b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc @@ -1203,6 +1203,16 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareEqF32s) { ComputeAndCompareR1(&builder, {false, false, true, false, false}, {}); } +XLA_TEST_F(ArrayElementwiseOpTest, CompareEqF32sTO) { + SetFastMathDisabled(true); + XlaBuilder builder(TestName()); + auto lhs = ConstantR1(&builder, {-2.5f, 25.5f, 2.25f, NAN, 6.0f}); + auto rhs = ConstantR1(&builder, {10.0f, 5.0f, 2.25f, NAN, NAN}); + EqTotalOrder(lhs, rhs); + + ComputeAndCompareR1(&builder, {false, false, true, true, false}, {}); +} + XLA_TEST_F(ArrayElementwiseOpTest, CompareEqZeroElementF32s) { XlaBuilder builder(TestName()); auto lhs = ConstantR1(&builder, {}); @@ -1222,6 +1232,18 @@ XLA_TEST_F(ArrayElementwiseOpTest, CompareGeF32s) { ComputeAndCompareR1(&builder, {false, true, true, false, false}, {}); } +XLA_TEST_F(ArrayElementwiseOpTest, CompareGeF32sTO) { + SetFastMathDisabled(true); + XlaBuilder builder(TestName()); + auto lhs = + ConstantR1(&builder, {-2.5f, 25.5f, 2.25f, NAN, 6.0f, 6.0f}); + auto rhs = ConstantR1(&builder, {10.0f, 5.0f, 1.0f, 10.0f, NAN, -NAN}); + GeTotalOrder(lhs, rhs); + + ComputeAndCompareR1(&builder, {false, true, true, true, false, true}, + {}); +} + XLA_TEST_F(ArrayElementwiseOpTest, CompareGtF32s) { SetFastMathDisabled(true); XlaBuilder builder(TestName()); From 5161a45676dc8959a2d4aaeca5b8efa5335461f7 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Mon, 10 Aug 2020 09:52:40 -0700 Subject: [PATCH 2443/2522] Added virtual destructors. PiperOrigin-RevId: 325824783 Change-Id: I06f8add05dc77dd93310738157c4cb83adf1e795 --- tensorflow/lite/delegates/gpu/cl/buffer.cc | 2 -- tensorflow/lite/delegates/gpu/cl/buffer.h | 2 +- tensorflow/lite/delegates/gpu/cl/linear_storage.h | 2 ++ tensorflow/lite/delegates/gpu/cl/texture2d.cc | 2 -- tensorflow/lite/delegates/gpu/cl/texture2d.h | 2 +- 5 files changed, 4 insertions(+), 6 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/buffer.cc b/tensorflow/lite/delegates/gpu/cl/buffer.cc index 8639e8bbf18..31770fca47e 100644 --- a/tensorflow/lite/delegates/gpu/cl/buffer.cc +++ b/tensorflow/lite/delegates/gpu/cl/buffer.cc @@ -132,8 +132,6 @@ Buffer& Buffer::operator=(Buffer&& buffer) { return *this; } -Buffer::~Buffer() { Release(); } - void Buffer::Release() { if (buffer_) { clReleaseMemObject(buffer_); diff --git a/tensorflow/lite/delegates/gpu/cl/buffer.h b/tensorflow/lite/delegates/gpu/cl/buffer.h index dc5befebea2..dbc43463bc7 100644 --- a/tensorflow/lite/delegates/gpu/cl/buffer.h +++ b/tensorflow/lite/delegates/gpu/cl/buffer.h @@ -61,7 +61,7 @@ class Buffer : public GPUObject { Buffer(const Buffer&) = delete; Buffer& operator=(const Buffer&) = delete; - ~Buffer(); + virtual ~Buffer() { Release(); } // for profiling and memory statistics uint64_t GetMemorySizeInBytes() const { return size_; } diff --git a/tensorflow/lite/delegates/gpu/cl/linear_storage.h b/tensorflow/lite/delegates/gpu/cl/linear_storage.h index 1bc855f4205..b69f76b9c1a 100644 --- a/tensorflow/lite/delegates/gpu/cl/linear_storage.h +++ b/tensorflow/lite/delegates/gpu/cl/linear_storage.h @@ -61,6 +61,8 @@ class LinearStorage : public GPUObject { public: LinearStorage() {} + virtual ~LinearStorage() {} + // Move only LinearStorage(LinearStorage&& storage); LinearStorage& operator=(LinearStorage&& storage); diff --git a/tensorflow/lite/delegates/gpu/cl/texture2d.cc b/tensorflow/lite/delegates/gpu/cl/texture2d.cc index cbeafe04c05..5edf64e83e7 100644 --- a/tensorflow/lite/delegates/gpu/cl/texture2d.cc +++ b/tensorflow/lite/delegates/gpu/cl/texture2d.cc @@ -118,8 +118,6 @@ Texture2D& Texture2D::operator=(Texture2D&& texture) { return *this; } -Texture2D::~Texture2D() { Release(); } - void Texture2D::Release() { if (texture_) { clReleaseMemObject(texture_); diff --git a/tensorflow/lite/delegates/gpu/cl/texture2d.h b/tensorflow/lite/delegates/gpu/cl/texture2d.h index 54a2732fc90..0e972de8cd3 100644 --- a/tensorflow/lite/delegates/gpu/cl/texture2d.h +++ b/tensorflow/lite/delegates/gpu/cl/texture2d.h @@ -57,7 +57,7 @@ class Texture2D : public GPUObject { Texture2D(const Texture2D&) = delete; Texture2D& operator=(const Texture2D&) = delete; - ~Texture2D(); + virtual ~Texture2D() { Release(); } cl_mem GetMemoryPtr() const { return texture_; } From 662291071f6c6271138e52c73a61a31308d5bc6e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 10 Aug 2020 10:12:27 -0700 Subject: [PATCH 2444/2522] Integrate LLVM at llvm/llvm-project@54cb552b9620 Updates LLVM usage to match [54cb552b9620](https://github.com/llvm/llvm-project/commit/54cb552b9620) PiperOrigin-RevId: 325829818 Change-Id: If43fd8a2f7453600e07781bcf288b09d1cd8ffc1 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index a726bf642d1..73b3fb42a96 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -712,8 +712,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "38537307e502c1ac9a09e6f75f9208db1327a0bf" - LLVM_SHA256 = "c801bf0f2ebfce86dbf7ad39c40ee371f422e8d07213f4ca67e5e46c7cb200ed" + LLVM_COMMIT = "54cb552b962097d0e3ef7306b69a3c82cc7fff37" + LLVM_SHA256 = "42a65541c62e8349cac068e7f44cb1d9d41addf0dbab0002cc7a7d7203bcb35b" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From e1654c78feae0663e6cbf1b13d8a54832deb3129 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Mon, 10 Aug 2020 10:18:01 -0700 Subject: [PATCH 2445/2522] Retire temporary release toggle. PiperOrigin-RevId: 325831070 Change-Id: If723c0bd89255522c8cbfd02156c30db23ea53b8 --- tensorflow/python/autograph/core/converter_testing.py | 2 -- tensorflow/python/autograph/operators/control_flow.py | 6 ------ tensorflow/python/autograph/utils/testing.py | 2 -- 3 files changed, 10 deletions(-) diff --git a/tensorflow/python/autograph/core/converter_testing.py b/tensorflow/python/autograph/core/converter_testing.py index 2909cf3f8bc..9f2604dec94 100644 --- a/tensorflow/python/autograph/core/converter_testing.py +++ b/tensorflow/python/autograph/core/converter_testing.py @@ -21,7 +21,6 @@ from __future__ import print_function import contextlib import imp import inspect -import os import sys import six @@ -101,7 +100,6 @@ class TestCase(test.TestCase): def setUp(self): # AutoGraph tests must run in graph mode to properly test control flow. - os.environ['AUTOGRAPH_CREATE_SYMBOLS_IN_LOOPS'] = '1' self.graph = ops.Graph().as_default() self.graph.__enter__() diff --git a/tensorflow/python/autograph/operators/control_flow.py b/tensorflow/python/autograph/operators/control_flow.py index 0106efda5dd..7b307ed5020 100644 --- a/tensorflow/python/autograph/operators/control_flow.py +++ b/tensorflow/python/autograph/operators/control_flow.py @@ -60,7 +60,6 @@ from __future__ import division from __future__ import print_function import functools -import os import traceback import numpy as np @@ -973,11 +972,6 @@ def _try_handling_undefineds( """ state_modified = False - # TODO(mdan): Remove once the default option is stable. - if os.getenv('AUTOGRAPH_CREATE_SYMBOLS_IN_LOOPS', '1') == '0': - _verify_loop_init_vars(init_vars, symbol_names) - return False, init_vars - try: # Stage an iteration of the loop body in a temporary graph. with func_graph.FuncGraph('tmp').as_default(): diff --git a/tensorflow/python/autograph/utils/testing.py b/tensorflow/python/autograph/utils/testing.py index 1da82db66c8..df60583bf85 100644 --- a/tensorflow/python/autograph/utils/testing.py +++ b/tensorflow/python/autograph/utils/testing.py @@ -18,7 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os import re import types import unittest @@ -108,7 +107,6 @@ class AutoGraphTestCase(test.TestCase): def setUp(self): super().setUp() - os.environ['AUTOGRAPH_CREATE_SYMBOLS_IN_LOOPS'] = '1' self.variables = {} self.trace_log = [] op_callbacks.add_op_callback(self._op_callback) From 96e80d355547079f2bb8db0fddd68a924883c5d9 Mon Sep 17 00:00:00 2001 From: Yujing Zhang Date: Mon, 10 Aug 2020 10:29:54 -0700 Subject: [PATCH 2446/2522] Support local multi-device functions with outputs on remote devices. PiperOrigin-RevId: 325834034 Change-Id: Iee8c0c6694dfcee108b73ee9dec41658907cb187 --- tensorflow/c/eager/c_api_remote_test.cc | 88 +++++--- .../core/common_runtime/eager/execute.cc | 207 +++++++++++++----- .../common_runtime/eager/kernel_and_device.cc | 27 +-- .../common_runtime/eager/kernel_and_device.h | 22 +- .../eager/kernel_and_device_test.cc | 2 +- .../process_function_library_runtime.cc | 32 +-- .../eager/eager_service_impl_test.cc | 21 +- .../eager/remote_copy_node.cc | 17 +- 8 files changed, 268 insertions(+), 148 deletions(-) diff --git a/tensorflow/c/eager/c_api_remote_test.cc b/tensorflow/c/eager/c_api_remote_test.cc index 94c32cf3f30..e99f6d6e170 100644 --- a/tensorflow/c/eager/c_api_remote_test.cc +++ b/tensorflow/c/eager/c_api_remote_test.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "absl/strings/str_cat.h" #include "tensorflow/c/eager/c_api.h" #include "tensorflow/c/eager/c_api_experimental.h" #include "tensorflow/c/eager/c_api_internal.h" @@ -115,40 +116,42 @@ void TestRemoteExecute(bool async) { TEST(CAPI, RemoteExecute) { TestRemoteExecute(false); } TEST(CAPI, RemoteExecuteAsync) { TestRemoteExecute(true); } -string MatMulFunction() { +string MatMulFunction(const string& matmul_device) { tensorflow::FunctionDef def; CHECK(tensorflow::protobuf::TextFormat::ParseFromString( - " signature {" - " name: 'MatMulFunction'" - " input_arg {" - " name: 'a'" - " type: DT_FLOAT" - " }" - " input_arg {" - " name: 'b'" - " type: DT_FLOAT" - " }" - " output_arg {" - " name: 'm'" - " type: DT_FLOAT" - " }" - " }" - " node_def {" - " name: 'matmul'" - " op: 'MatMul'" - " input: 'a'" - " input: 'b'" - " attr {" - " key: 'T'" - " value {" - " type: DT_FLOAT" - " }" - " }" - " }" - " ret {" - " key: 'm'" - " value: 'matmul:product'" - " }", + absl::StrCat(" signature {" + " name: 'MatMulFunction'" + " input_arg {" + " name: 'a'" + " type: DT_FLOAT" + " }" + " input_arg {" + " name: 'b'" + " type: DT_FLOAT" + " }" + " output_arg {" + " name: 'm'" + " type: DT_FLOAT" + " }" + " }" + " node_def {" + " name: 'matmul'" + " op: 'MatMul'" + " input: 'a'" + " input: 'b'" + " device: '", + matmul_device, "'", + " attr {" + " key: 'T'" + " value {" + " type: DT_FLOAT" + " }" + " }" + " }" + " ret {" + " key: 'm'" + " value: 'matmul:product'" + " }"), &def)); return def.SerializeAsString(); } @@ -157,7 +160,8 @@ string MatMulFunction() { // which creates a remote remote input, to simulate a scenario that the remote // input is not ready when we start running an op or a function. void TestRemoteExecuteSilentCopies(bool async, bool remote, bool func, - bool heavy_load_on_streaming_rpc) { + bool heavy_load_on_streaming_rpc, + bool remote_func_outputs = false) { tensorflow::ServerDef server_def = GetServerDef(3); // This server def has the task index set to 0. @@ -214,7 +218,8 @@ void TestRemoteExecuteSilentCopies(bool async, bool remote, bool func, TFE_Op* matmul = nullptr; if (func) { - string function_def = MatMulFunction(); + const string matmul_device = remote_func_outputs ? task2_name : ""; + string function_def = MatMulFunction(matmul_device); TFE_ContextAddFunctionDef(ctx, function_def.data(), function_def.size(), status); CHECK_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); @@ -250,7 +255,7 @@ void TestRemoteExecuteSilentCopies(bool async, bool remote, bool func, EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); // TODO(gjn): Add support for waiting on async local mirrors - if (!remote && !async) { + if (!remote && !async && !remote_func_outputs) { auto remote_arg = tensorflow::TensorHandleFromInterface(tensorflow::unwrap(h1_task2)); // The input handles should never change since they have been mirrored. @@ -329,6 +334,19 @@ TEST(CAPI, RemoteExecuteSilentCopiesLocalAsyncFunc) { TestRemoteExecuteSilentCopies(/*async=*/true, /*remote=*/false, /*func=*/true, /*heavy_load_on_streaming_rpc=*/false); } +// TODO(b/162618595): Enable this test once we remove the check of remote +// outputs in ProcessFunctionLibraryRuntime. +TEST(CAPI, DISABLED_RemoteExecuteSilentCopiesLocalFuncRemoteOutputs) { + TestRemoteExecuteSilentCopies(/*async=*/false, /*remote=*/false, + /*func=*/true, + /*heavy_load_on_streaming_rpc=*/false, + /*remote_func_outputs=*/true); +} +TEST(CAPI, DISABLED_RemoteExecuteSilentCopiesLocalAsyncFuncRemoteOutputs) { + TestRemoteExecuteSilentCopies(/*async=*/true, /*remote=*/false, /*func=*/true, + /*heavy_load_on_streaming_rpc=*/false, + /*remote_func_outputs=*/true); +} TEST(CAPI, RemoteExecuteSilentCopiesLocalAsyncFuncOrdering) { // A remote input may be not ready when we start running a function. Test that // the function execution should wait until the remote input is ready. diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc index e51456eaa27..4bffd887750 100644 --- a/tensorflow/core/common_runtime/eager/execute.cc +++ b/tensorflow/core/common_runtime/eager/execute.cc @@ -584,6 +584,101 @@ Status GetOrCreateKernelAndDevice( return Status::OK(); } +Status CreateUnshapedOutput( + const KernelAndDevice& kernel, const int output_num, Device* output_device, + const DataType& output_dtype, + const absl::optional& remote_func_params, + EagerContext* ctx, TensorHandle** output) { +#if defined(IS_MOBILE_PLATFORM) + return errors::Unimplemented( + "Remote outputs are not available on mobile devices."); +#else // !IS_MOBILE_PLATFORM + int64 op_id; + if (remote_func_params.has_value()) { + op_id = remote_func_params.value().op_id; + } else { + return errors::InvalidArgument( + "Unable to find a remote op id for a remote output of ", kernel.name()); + } + string remote_task; + if (!DeviceNameUtils::GetTaskName(output_device->parsed_name(), + &remote_task)) { + return errors::InvalidArgument( + "Unable to find remote task corresponding to device ", + output_device->name()); + } + *output = TensorHandle::CreateUnshapedRemoteHandle( + op_id, output_num, remote_task, output_dtype, output_device, ctx); + return Status::OK(); +#endif // !IS_MOBILE_PLATFORM +} + +Status AddOrExecuteNode(core::RefCountPtr kernel, + EagerOperation* op, TensorHandle** retvals) { + EagerExecutor& executor = op->Executor(); + EagerContext& ctx = op->EagerContext(); + GraphCollector* graph_collector = nullptr; + if (ctx.ShouldStoreGraphs()) { + graph_collector = ctx.GetGraphCollector(); + } + const int num_outputs = kernel->num_outputs(); + absl::optional remote_func_params = + op->remote_func_params(); + if (kernel->IsCrossProcess() && !remote_func_params.has_value()) { + // Create an eager op id for a cross-process function if not exist. +#if defined(IS_MOBILE_PLATFORM) + return errors::Unimplemented( + "Cross-process functions are not supported on mobile devices."); +#else // !IS_MOBILE_PLATFORM + const int64 op_id = ctx.RemoteMgr()->NextOpId(); + remote_func_params = + EagerRemoteFunctionParams{op_id, /*step_id=*/absl::nullopt}; +#endif // !IS_MOBILE_PLATFORM + } + if (executor.Async()) { + const DataTypeVector& output_dtypes = kernel->output_dtypes(); + for (int i = 0, end = num_outputs; i < end; ++i) { + Device* output_device = ctx.CanonicalDevice(kernel->OutputDevice(i)); + if (output_device == nullptr || output_device->IsLocal()) { + retvals[i] = TensorHandle::CreateEmptyLocalHandle( + /* d= */ output_device, /* op_device= */ kernel->device(), + /* resource_device= */ kernel->OutputResourceDevice(i), + output_dtypes[i], &ctx); + } else { + TF_RETURN_IF_ERROR( + CreateUnshapedOutput(*kernel, i, output_device, output_dtypes[i], + remote_func_params, &ctx, &retvals[i])); + } + } + auto node = absl::make_unique( + &ctx, op->Inputs(), remote_func_params, std::move(kernel), + graph_collector, op->GetCancellationManager(), + absl::Span(retvals, num_outputs), op->GetStackTrace()); + // Release the inputs from the eager operation since the AsyncExecuteNode + // would have taken ownership. This allows the inputs to be forwarded if + // possible. + op->Clear(); + // For async mode, execution order will make sure that all + // input handles are ready before executing them. + // TODO(b/137118203): Consider executing "cheap" kernels inline for + // performance. + return executor.AddOrExecute(std::move(node)); + } else { + for (int i = 0, end = num_outputs; i < end; ++i) { + retvals[i] = nullptr; + } + ExecuteNode node(&ctx, op->Inputs(), remote_func_params, kernel, + graph_collector, op->GetCancellationManager(), + {retvals, static_cast(num_outputs)}); + Status s = executor.SyncExecute(&node); + // We release the inputs AFTER executing the operation in sync mode since + // ExecuteNode does not increment the reference count and thus does not have + // ownership of the inputs while executing. + op->Clear(); + return s; + } +} + // There are a lot of references to devices in this function and around. // Here is what they mean: // EagerOperation::Device(): The device on which the user requested the op @@ -626,47 +721,7 @@ Status EagerLocalExecute(EagerOperation* op, TensorHandle** retvals, } } - GraphCollector* graph_collector = nullptr; - if (ctx.ShouldStoreGraphs()) { - graph_collector = ctx.GetGraphCollector(); - } - - Status s; - if (executor.Async()) { - const DataTypeVector& output_dtypes = kernel->output_dtypes(); - for (int i = 0, end = num_outputs; i < end; ++i) { - retvals[i] = TensorHandle::CreateEmptyLocalHandle( - /* d= */ ctx.CanonicalDevice(kernel->OutputDevice(i)), - /* op_device= */ kernel->device(), - /* resource_device= */ kernel->OutputResourceDevice(i), - output_dtypes[i], &ctx); - } - auto node = absl::make_unique( - &ctx, op->Inputs(), op->remote_func_params(), std::move(kernel), - graph_collector, op->GetCancellationManager(), - absl::Span(retvals, num_outputs), op->GetStackTrace()); - // Release the inputs from the eager operation since the AsyncExecuteNode - // would have taken ownership. This allows the inputs to be forwarded if - // possible. - op->Clear(); - // For async mode, execution order will make sure that all - // input handles are ready before executing them. - // TODO(b/137118203): Consider executing "cheap" kernels inline for - // performance. - s = executor.AddOrExecute(std::move(node)); - } else { - for (int i = 0, end = num_outputs; i < end; ++i) { - retvals[i] = nullptr; - } - ExecuteNode node(&ctx, op->Inputs(), op->remote_func_params(), kernel, - graph_collector, op->GetCancellationManager(), - {retvals, static_cast(num_outputs)}); - s = executor.SyncExecute(&node); - // We release the inputs AFTER executing the operation in sync mode since - // ExecuteNode does not increment the reference count and thus does not have - // ownership of the inputs while executing. - op->Clear(); - } + Status s = AddOrExecuteNode(std::move(kernel), op, retvals); // Since the operation failed, we need to Unref any outputs if they were // allocated. if (!s.ok()) { @@ -917,18 +972,34 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals, } #endif // IS_MOBILE_PLATFORM -Status GetKernelOutputs(std::vector* outputs, int num_outputs, - TensorHandle** retvals, EagerContext* ctx, - KernelAndDevice* kernel) { +Status GetKernelOutputs( + std::vector* outputs, int num_outputs, + TensorHandle** retvals, EagerContext* ctx, KernelAndDevice* kernel, + const absl::optional& remote_func_params) { for (int i = 0, end = num_outputs; i < end; ++i) { if (retvals[i] == nullptr) { - retvals[i] = TensorHandle::CreateLocalHandle( - std::move((*outputs)[i]), - /* d= */ ctx->CanonicalDevice(kernel->OutputDevice(i)), - /* op_device= */ kernel->device(), - /* resource_device= */ kernel->OutputResourceDevice(i), ctx); + EagerKernelRet& ret = (*outputs)[i]; + Device* output_device = ctx->CanonicalDevice(kernel->OutputDevice(i)); + if (ret.index() == 0) { + retvals[i] = TensorHandle::CreateLocalHandle( + std::move(absl::get(ret)), + /* d= */ output_device, + /* op_device= */ kernel->device(), + /* resource_device= */ kernel->OutputResourceDevice(i), ctx); + } else { + const DataTypeVector& output_dtypes = kernel->output_dtypes(); + TF_RETURN_IF_ERROR( + CreateUnshapedOutput(*kernel, i, output_device, output_dtypes[i], + remote_func_params, ctx, &retvals[i])); +#if !defined(IS_MOBILE_PLATFORM) + TF_RETURN_IF_ERROR( + retvals[i]->SetRemoteShape(absl::get(ret), + output_device, ctx->GetContextViewId())); +#endif // IS_MOBILE_PLATFORM + } } else { - if (TF_PREDICT_FALSE(kernel->device() != retvals[i]->op_device())) { + if (!kernel->IsFunction() && + TF_PREDICT_FALSE(kernel->device() != retvals[i]->op_device())) { return errors::Internal( "Kernel output tensor handle has a different op device than the " "kernel. This should never happen."); @@ -940,9 +1011,21 @@ Status GetKernelOutputs(std::vector* outputs, int num_outputs, "the specified kernel output device. This should never happen."); } - TF_RETURN_IF_ERROR( - retvals[i]->SetTensor(std::move((*outputs)[i]), - ctx->CanonicalDevice(kernel->OutputDevice(i)))); + EagerKernelRet& ret = (*outputs)[i]; + if (ret.index() == 0) { + TF_RETURN_IF_ERROR(retvals[i]->SetTensor( + std::move(absl::get(ret)), + ctx->CanonicalDevice(kernel->OutputDevice(i)))); + } else { +#if defined(IS_MOBILE_PLATFORM) + return errors::Unimplemented( + "Remote outputs are not available on mobile devices."); +#else // !IS_MOBILE_PLATFORM + TF_RETURN_IF_ERROR(retvals[i]->SetRemoteShape( + absl::get(ret), + absl::get(retvals[i]->device()), ctx->GetContextViewId())); +#endif // !IS_MOBILE_PLATFORM + } } } return Status::OK(); @@ -1022,7 +1105,7 @@ Status EagerKernelExecute( absl::Span retvals) { profiler::TraceMe activity("EagerKernelExecute", profiler::TraceMeLevel::kInfo); - std::vector outputs(1); + std::vector outputs(1); ExecuteNodeArgs inputs(op_inputs.size()); TF_RETURN_IF_ERROR(inputs.Init(ctx, op_inputs, kernel)); @@ -1047,7 +1130,7 @@ Status EagerKernelExecute( "happen. Please file a bug with the TensorFlow team."); } return GetKernelOutputs(&outputs, retvals.size(), retvals.data(), ctx, - kernel.get()); + kernel.get(), remote_func_params); } namespace { @@ -1229,7 +1312,7 @@ void EagerKernelExecuteAsync( GraphCollector* graph_collector, CancellationManager* cancellation_manager, TensorHandle** retvals, int num_outputs, StatusCallback done) { auto inputs = std::make_shared(op_inputs.size()); - auto outputs = std::make_shared>(1); + auto outputs = std::make_shared>(1); Status s = inputs->Init(ctx, op_inputs, kernel); if (!s.ok()) { @@ -1242,7 +1325,8 @@ void EagerKernelExecuteAsync( ctx->StepContainer(), *inputs, outputs.get(), cancellation_manager, remote_func_params, [retvals, inputs, outputs, num_outputs, ctx, graph_collector, - kernel_raw = kernel.get(), done = std::move(done)](const Status& s) { + remote_func_params, kernel_raw = kernel.get(), + done = std::move(done)](const Status& s) { auto wrapped_done = [&](const Status& s) { kernel_raw->Unref(); done(s); @@ -1256,7 +1340,7 @@ void EagerKernelExecuteAsync( } DCHECK_EQ(num_outputs, outputs->size()); wrapped_done(GetKernelOutputs(outputs.get(), num_outputs, retvals, ctx, - kernel_raw)); + kernel_raw, remote_func_params)); }); } } // namespace @@ -1316,7 +1400,12 @@ void EagerLocalExecuteAsync(EagerOperation* op, TensorHandle** retvals, } for (int i = 0, end = num_outputs; i < end; ++i) { - retvals[i] = nullptr; + const DataTypeVector& output_dtypes = kernel->output_dtypes(); + retvals[i] = TensorHandle::CreateEmptyLocalHandle( + /* d= */ ctx.CanonicalDevice(kernel->OutputDevice(i)), + /* op_device= */ kernel->device(), + /* resource_device= */ kernel->OutputResourceDevice(i), + output_dtypes[i], &ctx); } EagerKernelExecuteAsync( diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.cc b/tensorflow/core/common_runtime/eager/kernel_and_device.cc index 46aea040295..00d832365e9 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device.cc +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc @@ -239,7 +239,8 @@ struct OpExecutionState : public core::RefCounted { Status KernelAndDeviceOp::Run( ScopedStepContainer* step_container, const EagerKernelArgs& inputs, - std::vector* outputs, CancellationManager* cancellation_manager, + std::vector* outputs, + CancellationManager* cancellation_manager, const absl::optional& remote_func_params) { OpKernelContext::Params params; params.device = device_; @@ -316,7 +317,8 @@ Status KernelAndDeviceOp::Run( Status KernelAndDeviceFunc::Run( ScopedStepContainer* step_container, const EagerKernelArgs& inputs, - std::vector* outputs, CancellationManager* cancellation_manager, + std::vector* outputs, + CancellationManager* cancellation_manager, const absl::optional& remote_func_params) { Notification n; Status status; @@ -331,7 +333,8 @@ Status KernelAndDeviceFunc::Run( void KernelAndDeviceFunc::RunAsync( ScopedStepContainer* step_container, const EagerKernelArgs& inputs, - std::vector* outputs, CancellationManager* cancellation_manager, + std::vector* outputs, + CancellationManager* cancellation_manager, const absl::optional& remote_func_params, std::function done) { std::shared_ptr opts = nullptr; @@ -392,25 +395,13 @@ void KernelAndDeviceFunc::RunAsync( }, profiler::ContextType::kTfExecutor, opts->step_id, profiler::TraceMeLevel::kInfo); - std::vector* function_rets = new std::vector; - pflr_->Run(*opts, handle_, inputs, function_rets, - [opts, outputs, function_rets, rendezvous, local_cm, - step_container, this, done = std::move(done)](const Status& s) { + pflr_->Run(*opts, handle_, inputs, outputs, + [opts, rendezvous, local_cm, step_container, this, + done = std::move(done)](const Status& s) { rendezvous->Unref(); if (step_container == nullptr) { this->step_container_.CleanUp(); } - if (s.ok()) { - // TODO(b/162618595): Change the type of `outputs` to - // support TensorShapes for remote outputs and remove the - // FunctionRet to Tensor conversion here. - for (const auto& ret : *function_rets) { - if (ret.index() == 0) { - outputs->push_back(absl::get(ret)); - } - } - } - delete function_rets; done(s); }); } diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.h b/tensorflow/core/common_runtime/eager/kernel_and_device.h index 87c2d7a5510..7bf4afbaf24 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device.h +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.h @@ -82,6 +82,8 @@ class EagerKernelArgs : public FunctionArgsInterface { gtl::InlinedVector tensor_args_; }; +typedef absl::variant EagerKernelRet; + // KernelAndDevice encapsulates the logic needed to run a computation eagerly. // The computation can be a single instantiated kernel (implemented by // KernelAndDeviceOp below) or a multi-device function (implemented by @@ -124,10 +126,13 @@ class KernelAndDevice : public core::RefCounted { virtual bool IsFunction() { return false; } + virtual bool IsCrossProcess() { return false; } + // TODO(ashankar): Handle list-valued inputs. virtual Status Run( ScopedStepContainer* step_container, const EagerKernelArgs& inputs, - std::vector* outputs, CancellationManager* cancellation_manager, + std::vector* outputs, + CancellationManager* cancellation_manager, const absl::optional& remote_func_params) = 0; // Execute kernel asynchronously when applicable. Different from `Run` which @@ -140,7 +145,8 @@ class KernelAndDevice : public core::RefCounted { // from sync execution. virtual void RunAsync( ScopedStepContainer* step_container, const EagerKernelArgs& inputs, - std::vector* outputs, CancellationManager* cancellation_manager, + std::vector* outputs, + CancellationManager* cancellation_manager, const absl::optional& remote_func_params, StatusCallback done) = 0; @@ -203,14 +209,15 @@ class KernelAndDeviceOp final : public KernelAndDevice { GraphCollector* graph_collector) override; Status Run(ScopedStepContainer* step_container, const EagerKernelArgs& inputs, - std::vector* outputs, + std::vector* outputs, CancellationManager* cancellation_manager, const absl::optional& remote_func_params) override; void RunAsync( ScopedStepContainer* step_container, const EagerKernelArgs& inputs, - std::vector* outputs, CancellationManager* cancellation_manager, + std::vector* outputs, + CancellationManager* cancellation_manager, const absl::optional& remote_func_params, StatusCallback done) override { // Trivial async implementation on top of the sync version @@ -288,6 +295,8 @@ class KernelAndDeviceFunc : public KernelAndDevice { bool IsFunction() override { return true; }; + bool IsCrossProcess() override { return is_cross_process_; } + Status InstantiateFunc(const Context& ctx, const NodeDef& ndef, GraphCollector* graph_collector); @@ -295,14 +304,15 @@ class KernelAndDeviceFunc : public KernelAndDevice { GraphCollector* graph_collector) override; Status Run(ScopedStepContainer* step_container, const EagerKernelArgs& inputs, - std::vector* outputs, + std::vector* outputs, CancellationManager* cancellation_manager, const absl::optional& remote_func_params) override; void RunAsync( ScopedStepContainer* step_container, const EagerKernelArgs& inputs, - std::vector* outputs, CancellationManager* cancellation_manager, + std::vector* outputs, + CancellationManager* cancellation_manager, const absl::optional& remote_func_params, StatusCallback done) override; diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device_test.cc b/tensorflow/core/common_runtime/eager/kernel_and_device_test.cc index a7aac4a8f6d..33e85b25fb4 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device_test.cc +++ b/tensorflow/core/common_runtime/eager/kernel_and_device_test.cc @@ -133,7 +133,7 @@ void BM_KernelAndDeviceRun(int iters) { gtl::InlinedVector inputs; inputs.push_back(TensorValue(&t)); inputs.push_back(TensorValue(&t)); - std::vector outputs; + std::vector outputs; NodeDef ndef(AttrBuilder("MatMul") .Set("T", DT_FLOAT) .Set("transpose_a", false) diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.cc b/tensorflow/core/common_runtime/process_function_library_runtime.cc index b31b2b78bf0..3248d3f10a7 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime.cc +++ b/tensorflow/core/common_runtime/process_function_library_runtime.cc @@ -999,30 +999,30 @@ Status ProcessFunctionLibraryRuntime::GetOutputDevices( for (const auto& pair : data->glue_) { const ComponentFunctionData& comp_data = pair.second; DCHECK(comp_data.ret_alloc_attrs.size() == comp_data.ret_indices.size()); + if (comp_data.ret_indices.empty()) { + continue; + } const string& target = pair.first; FunctionLibraryRuntime* target_flr = GetFLR(target); + Device* target_device = nullptr; if (target_flr == nullptr) { - if (!comp_data.ret_indices.empty()) { - return errors::Unimplemented( - "Currently, outputting tensors on remote devices is not supported. " - "The ", - comp_data.ret_indices[0], - "-th return value of the function outputs to target_device: ", - target, - " Please copy the tensor to local device explicitly using " - "tf.identity and return the new Tensor instead."); - } - continue; + // TODO(b/162618595): Remove this error once we support a remote + // multi-device function with remote outputs. + return errors::Unimplemented( + "Currently, outputting tensors on remote devices is not supported." + "The ", + comp_data.ret_indices[0], + "-th return value of the function outputs to target_device: ", target, + " Please copy the tensor to local device explicitly using " + "tf.identity and return the new Tensor instead."); + } else { + target_device = target_flr->device(); } - Device* target_device = target_flr->device(); - const FunctionBody* fbody = target_flr->GetFunctionBody(comp_data.handle); - DCHECK(fbody != nullptr); - output_devices->resize(data->num_outputs_); for (int j = 0; j < comp_data.ret_indices.size(); ++j) { int ret_index = comp_data.ret_indices[j]; - if (fbody->ret_types[j] == DT_RESOURCE) { + if (data->ret_types_[ret_index] == DT_RESOURCE) { (*output_devices)[ret_index] = target_device; } else { (*output_devices)[ret_index] = diff --git a/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc b/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc index be81355cbc8..76fc12d1adc 100644 --- a/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc +++ b/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc @@ -771,12 +771,17 @@ class FunctionWithRemoteInputsTest : public EagerServiceImplTest { &close_context_response)); } - void CheckOutputsAndClose(const int64 op_id) { + void CheckOutputsAndClose(const std::vector& outputs, + const int64 op_id) { const tensorflow::Tensor* t = nullptr; tensorflow::TensorHandle* tensor_handle; TF_ASSERT_OK(eager_service_impl_.GetTensorHandle( context_id_, RemoteTensorHandleInternal(2, 0), &tensor_handle)); TF_ASSERT_OK(tensor_handle->Tensor(&t)); + EXPECT_EQ(outputs.size(), 1); + EXPECT_EQ(outputs.at(0).index(), 1); + const TensorShape& shape = absl::get(outputs.at(0)); + EXPECT_EQ(shape, t->shape()); CheckOutputTensorAndClose(*t); } @@ -845,11 +850,7 @@ TEST_F(FunctionWithRemoteInputsTest, EagerPFLRTest) { }); done.WaitForNotification(); TF_ASSERT_OK(status); - EXPECT_EQ(outputs.size(), 1); - EXPECT_EQ(outputs.at(0).index(), 1); - const TensorShape& shape = absl::get(outputs.at(0)); - EXPECT_EQ(shape, TensorShape({2, 2})); - CheckOutputsAndClose(op_id); + CheckOutputsAndClose(outputs, op_id); } // Test executes a remote function with local input and output tensors. @@ -940,13 +941,13 @@ TEST_F(FunctionWithRemoteInputsTest, KernelAndDeviceFuncTest) { *handle = remote_handles.at(index); return Status::OK(); }); - std::vector outputs; + std::vector outputs; TF_ASSERT_OK(kernel->Run(/*step_container=*/nullptr, inputs, &outputs, /*cancellation_manager=*/nullptr, /*remote_func_params=*/absl::nullopt)); - CheckOutputsAndClose(op_id); + CheckOutputsAndClose(outputs, op_id); } // Test executes a remote function through KernelAndDeviceFunc::RunAsync. @@ -987,7 +988,7 @@ TEST_F(FunctionWithRemoteInputsTest, KernelAndDeviceFuncAsyncTest) { *handle = remote_handles.at(index); return Status::OK(); }); - std::vector outputs; + std::vector outputs; Status status; Notification n; @@ -1000,7 +1001,7 @@ TEST_F(FunctionWithRemoteInputsTest, KernelAndDeviceFuncAsyncTest) { }); n.WaitForNotification(); TF_ASSERT_OK(status); - CheckOutputsAndClose(op_id); + CheckOutputsAndClose(outputs, op_id); } // Test creates a context and attempts to send a tensor (using the RPC), and diff --git a/tensorflow/core/distributed_runtime/eager/remote_copy_node.cc b/tensorflow/core/distributed_runtime/eager/remote_copy_node.cc index d4b5fe38964..a1d0e09faf9 100644 --- a/tensorflow/core/distributed_runtime/eager/remote_copy_node.cc +++ b/tensorflow/core/distributed_runtime/eager/remote_copy_node.cc @@ -192,9 +192,20 @@ Status RemoteCopyNode::RunLocalRecv(EagerOperation* op, TF_RETURN_IF_ERROR(CreateUncachedKernelAndDeviceOp(op, &kernel)); EagerKernelArgs args; - return kernel->Run(/*step_container*/ nullptr, args, outputs, - captured_state_->recv_cancellation(), - /*remote_func_params=*/absl::nullopt); + std::vector rets; + TF_RETURN_IF_ERROR(kernel->Run(/*step_container*/ nullptr, args, &rets, + captured_state_->recv_cancellation(), + /*remote_func_params=*/absl::nullopt)); + outputs->clear(); + for (const auto& ret : rets) { + if (ret.index() == 0) { + outputs->push_back(absl::get(ret)); + } else { + return errors::Internal( + "Expect to receive a Tensor but got a TensorShape."); + } + } + return Status::OK(); } void RemoteCopyNode::RunRemoteRecv(EagerOperation* op, StatusCallback done) { From 79448c479679170b84354ff452ca31a5f34007f5 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Mon, 10 Aug 2020 10:53:55 -0700 Subject: [PATCH 2447/2522] Touch ~/.bigqueryrc when running bq commands "bq load" corrupts output on its first invocation if ~/.bigqueryrc doesn't exist. AFAIK this cannot be turned off. PiperOrigin-RevId: 325840422 Change-Id: Iefe81ac14b1b4679530fca152e7f336dc9e76f68 --- tensorflow/tools/ci_build/sizetrack_helper.py | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/tensorflow/tools/ci_build/sizetrack_helper.py b/tensorflow/tools/ci_build/sizetrack_helper.py index ff5ff1bf60d..8377d733c56 100755 --- a/tensorflow/tools/ci_build/sizetrack_helper.py +++ b/tensorflow/tools/ci_build/sizetrack_helper.py @@ -54,9 +54,11 @@ import csv import datetime import os import os.path +import pathlib import platform import subprocess + parser = argparse.ArgumentParser( usage=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( @@ -231,6 +233,15 @@ def gcloud(tool, args, stdin=None): return ret.stdout.strip() +def bq(args, stdin=None): + """Helper for running bq, the BigQuery tool.""" + # bq prints extra messages to stdout if ~/.bigqueryrc doesn't exist + pathlib.Path(pathlib.Path.home() / ".bigqueryrc").touch() + return gcloud( + "bq", ["--project_id", FLAGS.project, "--headless", *args], + stdin=stdin) + + def get_all_tested_commits(): """Get details about the full commit range tested by this invocation.""" head_info = git_pretty("HEAD", PRETTY_HEAD_INFO, n=1) @@ -245,12 +256,8 @@ def get_all_tested_commits(): # --format=csv returns an empty string if no results, or else two lines: # commit # COMMIT_HASH - earliest_commit = gcloud( - "bq", [ - "--project_id", FLAGS.project, "--headless", "-q", "query", - "--format", "csv", "--nouse_legacy_sql" - ], - stdin=query_earliest_included_commit) + earliest_commit = bq(["query", "--format", "csv", "--nouse_legacy_sql"], + stdin=query_earliest_included_commit) # Compute the commit/CL range since the last test if earliest_commit: @@ -359,9 +366,8 @@ def main(): with open("data.tsv", "w") as tsvfile: writer = csv.writer(tsvfile, delimiter="\t", quoting=csv.QUOTE_MINIMAL) writer.writerow(next_tsv_row) - gcloud("bq", [ - "--project_id", FLAGS.project, "--headless", "-q", "load", - "--source_format", "CSV", "--field_delimiter", "tab", + bq([ + "load", "--source_format", "CSV", "--field_delimiter", "tab", PROJECT_LEVEL_TABLE_NAME, "data.tsv", SCHEMA ]) From 7c991932387ac05f727dec82c88329ae5f97b3f5 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Mon, 10 Aug 2020 10:57:19 -0700 Subject: [PATCH 2448/2522] Fix the typo in the API name for "MobileNetV3Samll" PiperOrigin-RevId: 325841242 Change-Id: Ie05542d29674c5e3d8ea8e51799094256e90ee52 --- tensorflow/python/keras/applications/mobilenet_v3.py | 2 +- .../tools/api/golden/v1/tensorflow.keras.applications.pbtxt | 2 +- .../tools/api/golden/v2/tensorflow.keras.applications.pbtxt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/keras/applications/mobilenet_v3.py b/tensorflow/python/keras/applications/mobilenet_v3.py index bdf2ca40142..44ba6fd3a39 100644 --- a/tensorflow/python/keras/applications/mobilenet_v3.py +++ b/tensorflow/python/keras/applications/mobilenet_v3.py @@ -348,7 +348,7 @@ def MobileNetV3(stack_fn, return model -@keras_export('keras.applications.MobileNetV3Samll') +@keras_export('keras.applications.MobileNetV3Small') def MobileNetV3Small(input_shape=None, alpha=1.0, minimalistic=False, diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.applications.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.applications.pbtxt index 9f367742398..60c9b6d2909 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.applications.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.applications.pbtxt @@ -125,7 +125,7 @@ tf_module { argspec: "args=[\'input_shape\', \'alpha\', \'minimalistic\', \'include_top\', \'weights\', \'input_tensor\', \'classes\', \'pooling\', \'dropout_rate\', \'classifier_activation\'], varargs=None, keywords=None, defaults=[\'None\', \'1.0\', \'False\', \'True\', \'imagenet\', \'None\', \'1000\', \'None\', \'0.2\', \'softmax\'], " } member_method { - name: "MobileNetV3Samll" + name: "MobileNetV3Small" argspec: "args=[\'input_shape\', \'alpha\', \'minimalistic\', \'include_top\', \'weights\', \'input_tensor\', \'classes\', \'pooling\', \'dropout_rate\', \'classifier_activation\'], varargs=None, keywords=None, defaults=[\'None\', \'1.0\', \'False\', \'True\', \'imagenet\', \'None\', \'1000\', \'None\', \'0.2\', \'softmax\'], " } member_method { diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.applications.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.applications.pbtxt index 9f367742398..60c9b6d2909 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.applications.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.applications.pbtxt @@ -125,7 +125,7 @@ tf_module { argspec: "args=[\'input_shape\', \'alpha\', \'minimalistic\', \'include_top\', \'weights\', \'input_tensor\', \'classes\', \'pooling\', \'dropout_rate\', \'classifier_activation\'], varargs=None, keywords=None, defaults=[\'None\', \'1.0\', \'False\', \'True\', \'imagenet\', \'None\', \'1000\', \'None\', \'0.2\', \'softmax\'], " } member_method { - name: "MobileNetV3Samll" + name: "MobileNetV3Small" argspec: "args=[\'input_shape\', \'alpha\', \'minimalistic\', \'include_top\', \'weights\', \'input_tensor\', \'classes\', \'pooling\', \'dropout_rate\', \'classifier_activation\'], varargs=None, keywords=None, defaults=[\'None\', \'1.0\', \'False\', \'True\', \'imagenet\', \'None\', \'1000\', \'None\', \'0.2\', \'softmax\'], " } member_method { From 4fcaeef9048077099bb20d5b40ced6edf68eaa2b Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 10 Aug 2020 11:04:33 -0700 Subject: [PATCH 2449/2522] Re-introduce some infeed and outfeed ops after a revert caused by failing tests. PiperOrigin-RevId: 325843303 Change-Id: I081e2ba0437fe9812ea61cf90d6bef62c50b3908 --- tensorflow/core/tpu/kernels/BUILD | 109 ++++ .../core/tpu/kernels/image_resize_ops.cc | 155 +++++ tensorflow/core/tpu/kernels/infeed_ops.cc | 548 ++++++++++++++++++ tensorflow/core/tpu/kernels/infeed_ops.h | 69 +++ tensorflow/core/tpu/kernels/outfeed_ops.cc | 116 ++++ tensorflow/core/tpu/kernels/outfeed_ops.h | 69 +++ .../core/tpu/kernels/replication_ops.cc | 27 + .../core/tpu/kernels/tpu_handle_to_key_op.cc | 62 ++ tensorflow/core/tpu/kernels/transfer_ops.cc | 98 ++++ tensorflow/core/tpu/kernels/transfer_ops.h | 56 ++ tensorflow/core/tpu/tpu_library_init_fns.inc | 1 + 11 files changed, 1310 insertions(+) create mode 100644 tensorflow/core/tpu/kernels/image_resize_ops.cc create mode 100644 tensorflow/core/tpu/kernels/infeed_ops.cc create mode 100644 tensorflow/core/tpu/kernels/infeed_ops.h create mode 100644 tensorflow/core/tpu/kernels/outfeed_ops.cc create mode 100644 tensorflow/core/tpu/kernels/outfeed_ops.h create mode 100644 tensorflow/core/tpu/kernels/replication_ops.cc create mode 100644 tensorflow/core/tpu/kernels/tpu_handle_to_key_op.cc create mode 100644 tensorflow/core/tpu/kernels/transfer_ops.cc create mode 100644 tensorflow/core/tpu/kernels/transfer_ops.h diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 1336f52ed34..6d3369022ad 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -28,10 +28,16 @@ tf_kernel_library( deps = [ ":cross_replica_ops", ":host_compute_ops", + ":image_resize_ops", + ":infeed_ops", + ":outfeed_ops", + ":replication_ops", ":topk_ops", ":tpu_compile_op", ":tpu_configuration_ops", ":tpu_execute_op", + ":tpu_handle_to_key_op", + ":transfer_ops", ], ) @@ -684,3 +690,106 @@ cc_library( ], alwayslink = 1, ) + +cc_library( + name = "infeed_ops", + srcs = ["infeed_ops.cc"], + hdrs = ["infeed_ops.h"], + visibility = ["//visibility:public"], + deps = [ + ":transfer_ops", + "//tensorflow/compiler/jit:xla_device_no_jit_rewrite_registration", + "//tensorflow/compiler/tf2xla:common", + "//tensorflow/compiler/xla:util", + "//tensorflow/core:framework", + "//tensorflow/core/common_runtime:dma_helper", + "//tensorflow/core/framework:protos_all_cc", + "//tensorflow/core/kernels:transpose_functor", + "//tensorflow/core/platform:status", + "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/core/tpu:tpu_api", + "//tensorflow/core/tpu:tpu_defs", + "//tensorflow/stream_executor:multi_platform_manager", + "//tensorflow/stream_executor/tpu:c_api_conversions", + "//tensorflow/stream_executor/tpu:tpu_transfer_manager_base", + "//tensorflow/stream_executor/tpu:tpu_transfer_manager_interface", + ], + alwayslink = True, +) + +cc_library( + name = "transfer_ops", + srcs = ["transfer_ops.cc"], + hdrs = ["transfer_ops.h"], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/compiler/jit:xla_device_no_jit_rewrite_registration", + "//tensorflow/core:framework", + "//tensorflow/core:lib_internal", + "//tensorflow/core/kernels:ops_util", + "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/stream_executor:multi_platform_manager", + "//tensorflow/stream_executor/tpu:tpu_node_context", + "//tensorflow/stream_executor/tpu:tpu_platform_interface", + "//tensorflow/stream_executor/tpu:tpu_transfer_manager_interface", + ], + alwayslink = True, +) + +cc_library( + name = "outfeed_ops", + srcs = ["outfeed_ops.cc"], + hdrs = ["outfeed_ops.h"], + visibility = ["//visibility:public"], + deps = [ + ":transfer_ops", + "//tensorflow/compiler/jit:xla_device_no_jit_rewrite_registration", + "//tensorflow/compiler/tf2xla:common", + "//tensorflow/core:framework", + "//tensorflow/core/framework:protos_all_cc", + "//tensorflow/core/tpu:tpu_defs", + "//tensorflow/stream_executor:multi_platform_manager", + ], + alwayslink = True, +) + +cc_library( + name = "image_resize_ops", + srcs = ["image_resize_ops.cc"], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/compiler/tf2xla:common", + "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/xla/client:xla_builder", + "//tensorflow/compiler/xla/client/lib:constants", + "//tensorflow/core:framework", + "//tensorflow/core/tpu:tpu_defs", + "@com_google_absl//absl/strings", + ], + alwayslink = True, +) + +cc_library( + name = "replication_ops", + srcs = ["replication_ops.cc"], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/compiler/jit:xla_device_no_jit_rewrite_registration", + "//tensorflow/core:framework", + "//tensorflow/core/tpu:tpu_defs", + ], + alwayslink = True, +) + +cc_library( + name = "tpu_handle_to_key_op", + srcs = ["tpu_handle_to_key_op.cc"], + visibility = ["//visibility:public"], + deps = [ + ":tpu_compilation_cache_interface", + ":tpu_op_consts", + "//tensorflow/core:framework", + "//tensorflow/core/tpu:tpu_configuration", + ], + alwayslink = True, +) diff --git a/tensorflow/core/tpu/kernels/image_resize_ops.cc b/tensorflow/core/tpu/kernels/image_resize_ops.cc new file mode 100644 index 00000000000..fd0f5e4c7a6 --- /dev/null +++ b/tensorflow/core/tpu/kernels/image_resize_ops.cc @@ -0,0 +1,155 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "absl/strings/match.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "tensorflow/compiler/tf2xla/shape_util.h" +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/compiler/xla/client/lib/constants.h" +#include "tensorflow/compiler/xla/client/xla_builder.h" +#include "tensorflow/core/framework/kernel_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/tpu/tpu_defs.h" + +namespace tensorflow { + +class TpuCustomResizeOp : public XlaOpKernel { + public: + explicit TpuCustomResizeOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("align_corners", &align_corners_)); + OP_REQUIRES_OK(ctx, + ctx->GetAttr("half_pixel_centers", &half_pixel_centers_)); + } + + xla::Shape GetOutputShape(XlaOpKernelContext* ctx) const { + std::vector out_size; + auto status = ctx->ConstantInputAsIntVector(1, &out_size); + CHECK_EQ(out_size.size(), 2) << status.ToString(); + xla::Shape output_shape = + TensorShapeToXLAShape(ctx->output_xla_type(0), ctx->InputShape(0)); + output_shape.mutable_dimensions()[1] = out_size[0]; + output_shape.mutable_dimensions()[2] = out_size[1]; + return output_shape; + } + + string OpaqueField() const { + return absl::StrCat("\"", align_corners_, half_pixel_centers_, "\""); + } + + void CompileGrad(XlaOpKernelContext* ctx, const char* target, + const xla::Shape& output_shape) { + auto input_shape = + TensorShapeToXLAShape(ctx->output_xla_type(0), ctx->InputShape(0)); + if (ctx->InputShape(1).dim_sizes() == ctx->InputShape(0).dim_sizes()) { + ctx->SetOutput( + 0, xla::ConvertElementType(ctx->Input(0), ctx->output_xla_type(0))); + return; + } + // The gradient should be done in two phases for large resizes. + auto input = ctx->Input(0); + if (input_shape.dimensions(1) / output_shape.dimensions(1) > 3 && + input_shape.dimensions(2) / output_shape.dimensions(2) > 3) { + auto intermediate_shape = output_shape; + intermediate_shape.mutable_dimensions()[1] = input_shape.dimensions(1); + input = xla::CustomCall(ctx->builder(), target, {ctx->Input(0)}, + intermediate_shape, OpaqueField()); + } + ctx->SetOutput(0, xla::CustomCall(ctx->builder(), target, {input}, + output_shape, OpaqueField())); + } + + void CompileForward(XlaOpKernelContext* ctx, const char* target) { + auto output_shape = GetOutputShape(ctx); + if (ctx->InputShape(0).dim_size(1) == output_shape.dimensions(1) && + ctx->InputShape(0).dim_size(2) == output_shape.dimensions(2)) { + ctx->SetOutput( + 0, xla::ConvertElementType(ctx->Input(0), ctx->output_xla_type(0))); + return; + } + if (ctx->InputShape(0).dim_size(1) == 1 && + ctx->InputShape(0).dim_size(2) == 1) { + ctx->SetOutput(0, + ctx->Input(0) + xla::Zeros(ctx->builder(), output_shape)); + return; + } + ctx->SetOutput(0, xla::CustomCall(ctx->builder(), target, {ctx->Input(0)}, + output_shape, OpaqueField())); + } + + private: + bool align_corners_; + bool half_pixel_centers_; +}; + +class TpuResizeNearestNeighborOp : public TpuCustomResizeOp { + public: + explicit TpuResizeNearestNeighborOp(OpKernelConstruction* ctx) + : TpuCustomResizeOp(ctx) {} + void Compile(XlaOpKernelContext* ctx) override { + CompileForward(ctx, "ResizeNearest"); + } +}; + +class TpuResizeBilinearOp : public TpuCustomResizeOp { + public: + explicit TpuResizeBilinearOp(OpKernelConstruction* ctx) + : TpuCustomResizeOp(ctx) {} + void Compile(XlaOpKernelContext* ctx) override { + CompileForward(ctx, "ResizeBilinear"); + } +}; + +class TpuResizeNearestNeighborGradOp : public TpuCustomResizeOp { + public: + explicit TpuResizeNearestNeighborGradOp(OpKernelConstruction* ctx) + : TpuCustomResizeOp(ctx) {} + void Compile(XlaOpKernelContext* ctx) override { + CompileGrad(ctx, "ResizeNearestGrad", GetOutputShape(ctx)); + } +}; + +class TpuResizeBilinearGradOp : public TpuCustomResizeOp { + public: + explicit TpuResizeBilinearGradOp(OpKernelConstruction* ctx) + : TpuCustomResizeOp(ctx) {} + void Compile(XlaOpKernelContext* ctx) override { + auto output_shape = + TensorShapeToXLAShape(ctx->output_xla_type(0), ctx->InputShape(1)); + CompileGrad(ctx, "ResizeBilinearGrad", output_shape); + } +}; + +REGISTER_XLA_OP(Name("ResizeNearestNeighbor") + .CompileTimeConstantInput("size") + .Device(DEVICE_TPU_XLA_JIT), + TpuResizeNearestNeighborOp); + +REGISTER_XLA_OP(Name("ResizeNearestNeighborGrad") + .CompileTimeConstantInput("size") + .Device(DEVICE_TPU_XLA_JIT), + TpuResizeNearestNeighborGradOp); + +REGISTER_XLA_OP(Name("ResizeBilinear") + .CompileTimeConstantInput("size") + .Device(DEVICE_TPU_XLA_JIT), + TpuResizeBilinearOp); + +REGISTER_XLA_OP(Name("ResizeBilinearGrad").Device(DEVICE_TPU_XLA_JIT), + TpuResizeBilinearGradOp); + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/infeed_ops.cc b/tensorflow/core/tpu/kernels/infeed_ops.cc new file mode 100644 index 00000000000..1d10667f2c2 --- /dev/null +++ b/tensorflow/core/tpu/kernels/infeed_ops.cc @@ -0,0 +1,548 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/kernels/infeed_ops.h" + +#include +#include + +#include "tensorflow/compiler/jit/xla_device.h" +#include "tensorflow/compiler/tf2xla/literal_util.h" +#include "tensorflow/compiler/tf2xla/shape_util.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/common_runtime/dma_helper.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/dataset.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/function_handle_cache.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/variant.h" +#include "tensorflow/core/framework/variant_encode_decode.h" +#include "tensorflow/core/framework/variant_tensor_data.h" +#include "tensorflow/core/kernels/transpose_functor.h" +#include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/core/tpu/kernels/transfer_ops.h" +#include "tensorflow/core/tpu/tpu_api.h" +#include "tensorflow/core/tpu/tpu_defs.h" +#include "tensorflow/stream_executor/tpu/c_api_conversions.h" +#include "tensorflow/stream_executor/multi_platform_manager.h" +#include "tensorflow/stream_executor/tpu/tpu_transfer_manager.h" +#include "tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.h" + +namespace tensorflow { +namespace { + +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef tensorflow::tpu::NoncopyableBuffer LinearizerBuffer; +typedef std::deque LinearizerBufferList; + +// For the given shape, chooses a layout for infeed on TPU. The returned shape +// has the same dimensions as the original shape, and only the layout is +// changed. +xla::Shape GetTPUInfeedLayout(const xla::Shape& shape) { + XLA_Shape c_shape; + XLA_Shape c_infeed_shape; + + ApiConverter::ToC(shape, &c_shape); + + tpu::ExecutorApiFn()->TpuTransferManager_GetInfeedLayoutFn(&c_shape, + &c_infeed_shape); + xla::Shape infeed_shape = ApiConverter::FromC(&c_infeed_shape); + ApiConverter::Free(&c_shape); + ApiConverter::Free(&c_infeed_shape); + return infeed_shape; +} + +// Transposes the given tensor using the tensorflow C++ transpose implementation +// to obtain a XLA literal for the host tensor laid out as the given layout. The +// returned tensor is normalized to the dim0major layout -- F32[10,20,30]{2,0,1} +// is returned as F32[20,10,30]{2,1,0}. +xla::StatusOr TransposeTensor(OpKernelContext* ctx, + const Tensor& input_tensor, + const xla::Shape& xla_shape) { + profiler::TraceMe trace_me("TransposeTensor", /*level=*/2); + const int64 rank = xla_shape.rank(); + std::vector permutation(rank); + std::vector transposed_shapes(rank); + for (int64 i = 0; i < rank; ++i) { + permutation[i] = xla_shape.layout().minor_to_major(rank - 1 - i); + transposed_shapes[i] = xla_shape.dimensions(permutation[i]); + } + + Tensor transposed_tensor; + + // If this is a trivial transpose (i.e., bitcast), just create an aliased + // tensor with the transposed shape. + if (xla::LayoutUtil::IsMonotonicWithDim0Major( + xla::ShapeUtil::DropDegenerateDimensions(xla_shape).layout())) { + TensorShape shape; + TF_RETURN_IF_ERROR(TensorShapeUtils::MakeShape(transposed_shapes, &shape)); + TF_RETURN_IF_ERROR(transposed_tensor.BitcastFrom( + input_tensor, input_tensor.dtype(), shape)); + return transposed_tensor; + } + + AllocatorAttributes alloc_attr; + alloc_attr.set_on_host(true); + TF_RETURN_IF_ERROR(ctx->allocate_temp(input_tensor.dtype(), + TensorShape(transposed_shapes), + &transposed_tensor, alloc_attr)); + // Eigen Transpose fails with SIGFPE if there is a dimension of size 0. + if (input_tensor.NumElements() > 0) { + TF_RETURN_IF_ERROR(DoTranspose(ctx->eigen_device(), + input_tensor, permutation, + &transposed_tensor)); + } + return transposed_tensor; +} + +xla::StatusOr GetLayoutOverride(OpKernelConstruction* ctx, + const char* attrn_name, + std::vector* minor_to_major) { + if (!ctx->HasAttr(attrn_name)) { + return false; + } + TF_RETURN_IF_ERROR(ctx->GetAttr(attrn_name, minor_to_major)); + return !minor_to_major->empty(); +} + +Status GetInfeedShapeWithLayout(OpKernelConstruction* ctx, + const char* attrn_name, + const xla::Shape& input_shape, + xla::Shape* output_shape) { + std::vector minor_to_major; + TF_ASSIGN_OR_RETURN(bool has_override, + GetLayoutOverride(ctx, attrn_name, &minor_to_major)); + if (!has_override) { + *output_shape = input_shape; + if (output_shape->IsTuple()) { + int64 tuple_elements = xla::ShapeUtil::TupleElementCount(*output_shape); + for (int64 i = 0; i < tuple_elements; ++i) { + xla::Shape* sub_shape = + xla::ShapeUtil::GetMutableSubshape(output_shape, {i}); + *sub_shape->mutable_layout() = GetTPUInfeedLayout(*sub_shape).layout(); + } + } else { + *output_shape->mutable_layout() = + GetTPUInfeedLayout(*output_shape).layout(); + } + return Status::OK(); + } + + auto layout_func = [](const xla::Shape& shape) -> xla::Layout { + return GetTPUInfeedLayout(shape).layout(); + }; + return GetShapeWithLayout(input_shape, minor_to_major, layout_func, + output_shape); +} + +// LinearizedBuffersWrapper is an opaque C++ data structure for the outputs of +// PrelinearizeOp and PrelinearizeTupleOp. It holds the resultant linearized +// buffers and references to input tensors whose underlying storage are shared +// with linearized buffers. +// NOTE: This is not a feature-complete implementation of the DT_VARIANT +// specification. In particular, we cannot currently serialize an arbitrary +// `LinearizerBufferList` (aka `std::deque`) +// object, so the `Encode()` and `Decode()` methods are not implemented. +struct LinearizedBuffersWrapper { + explicit LinearizedBuffersWrapper() {} + explicit LinearizedBuffersWrapper(LinearizerBufferList bufs, + std::vector ts) + : buffers(std::move(bufs)), tensors(std::move(ts)) {} + LinearizedBuffersWrapper(const LinearizedBuffersWrapper& wrapper) { + // tensorflow::Variant requires this copy constructor to compile. + LOG(FATAL) << "LinearizedBuffersWrapper should not copy."; + } + LinearizedBuffersWrapper& operator=(const LinearizedBuffersWrapper& wrapper) = + delete; + LinearizedBuffersWrapper(LinearizedBuffersWrapper&&) = default; + LinearizedBuffersWrapper& operator=(LinearizedBuffersWrapper&&) = default; + ~LinearizedBuffersWrapper() = default; + + // These functions are tensorflow::Variant requirements. + string TypeName() const { return "(anonymous)::LinearizedBuffersWrapper"; } + void Encode(tensorflow::VariantTensorData* data) const { + LOG(ERROR) << "Encode() is not implemented for LinearizedBuffersWrapper " + "objects."; + } + bool Decode(const tensorflow::VariantTensorData& data) { + LOG(ERROR) << "Decode() is not implemented for LinearizedBuffersWrapper " + "objects."; + return false; + } + + LinearizerBufferList buffers; + // Save references on tensors whose underlying storage are shared with + // LiteralLinearizer::Buffer in `buffers`. + std::vector tensors; +}; + +Status AutoTransposeAndLinearize(OpKernelContext* ctx, + const Tensor& input_tensor, + const xla::Shape& shape, + LinearizerBufferList* linearized_buffers, + std::vector* saved_input_tensors) { + const Tensor* tensor = &input_tensor; + // If the given layout is not in dim0major layout, tranposes the tensor. + bool has_transposed = false; + Tensor transposed_tensor; + if (!xla::LayoutUtil::IsMonotonicWithDim0Major(shape.layout())) { + // If the given layout is not in dim0major layout, transpose the tensor. + TF_ASSIGN_OR_RETURN(transposed_tensor, + TransposeTensor(ctx, input_tensor, shape)); + tensor = &transposed_tensor; + has_transposed = true; + } + + xla::BorrowingLiteral literal; + TF_RETURN_IF_ERROR(HostTensorToBorrowingLiteral(*tensor, &literal)); + + TF_RETURN_IF_ERROR( + xla::TpuTransferManagerInterface::GetRegisteredTpuTransferManager() + ->LinearizeToBuffers(literal, linearized_buffers)); + + // The input tensor is ref-counted. Save a handle on the input tensor if + // its underlying storage is shared with linearized buffers to prevent + // input tensor from getting freed. + for (const auto& buffer : *linearized_buffers) { + if (!buffer.owns_data() && !has_transposed) { + // `buffer` is created from zero-copy fast path from the un-transposed + // input tensor so its underlying data is shared with input tensor. + // Save a handle to input tensor to increment its ref-count and avoid + // it getting deallocated after PrelinearizeTupleOp completes. + saved_input_tensors->push_back(*tensor); + // A literal can be linearized to zero to two buffers. If any of the + // linearized buffer shares storage with input tensor. We save exactly + // one handle on the input tensor. + break; + } + } + return Status::OK(); +} + +// PrelinearizeOp is used to linearize one tensor to the device format. +class PrelinearizeOp : public OpKernel { + public: + explicit PrelinearizeOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &shape_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("dtype", &dtype_)); + xla::Shape shape; + OP_REQUIRES_OK(ctx, TensorShapeToXLAShape(dtype_, shape_, &shape)); + OP_REQUIRES_OK(ctx, + GetInfeedShapeWithLayout(ctx, "layout", shape, &xla_shape_)); + } + + void Compute(OpKernelContext* ctx) override { + const Tensor& input_tensor = ctx->input(0); + // Validate input. + OP_REQUIRES( + ctx, input_tensor.dtype() == dtype_, + errors::InvalidArgument("Prelinearize dtype mismatch; expected ", + DataType_Name(dtype_), ", got ", + DataType_Name(input_tensor.dtype()))); + OP_REQUIRES( + ctx, input_tensor.shape() == shape_, + errors::InvalidArgument("Prelinearize shape mismatch; expected ", + shape_.DebugString(), ", got ", + input_tensor.shape().DebugString())); + + // Auto-transpose and prelinearize. + LinearizerBufferList linearized_buffers; + std::vector saved_input_tensors; + auto status = + AutoTransposeAndLinearize(ctx, input_tensor, xla_shape_, + &linearized_buffers, &saved_input_tensors); + OP_REQUIRES_OK(ctx, status); + + // Write to output. + tensorflow::Tensor* output; + OP_REQUIRES_OK(ctx, + ctx->allocate_output(0, tensorflow::TensorShape{}, &output)); + output->scalar()() = LinearizedBuffersWrapper{ + std::move(linearized_buffers), std::move(saved_input_tensors)}; + } + + bool IsExpensive() override { return true; } + + private: + TensorShape shape_; + DataType dtype_; + xla::Shape xla_shape_; + + // PrelinearizeOp is neither copyable nor movable. + PrelinearizeOp(const PrelinearizeOp&) = delete; + PrelinearizeOp& operator=(const PrelinearizeOp&) = delete; +}; + +// PrelinearizeTupleOp is used to linearize multiple tensors to the device +// format. +class PrelinearizeTupleOp : public OpKernel { + public: + explicit PrelinearizeTupleOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("shapes", &shapes_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("dtypes", &dtypes_)); + OP_REQUIRES( + ctx, shapes_.size() == dtypes_.size(), + errors::InvalidArgument( + "shapes and dtypes must be the same length. shapes length = ", + shapes_.size(), ", dtypes length = ", dtypes_.size())); + + std::vector xla_shapes; + for (int i = 0; i < shapes_.size(); i++) { + xla::Shape xla_shape; + OP_REQUIRES_OK(ctx, + TensorShapeToXLAShape(dtypes_[i], shapes_[i], &xla_shape)); + xla_shapes.push_back(xla_shape); + } + OP_REQUIRES_OK( + ctx, GetInfeedShapeWithLayout( + ctx, "layouts", xla::ShapeUtil::MakeTupleShape(xla_shapes), + &tuple_shape_)); + } + + void Compute(OpKernelContext* ctx) override { + OpInputList values; + OP_REQUIRES_OK(ctx, ctx->input_list("inputs", &values)); + OP_REQUIRES(ctx, values.size() == shapes_.size(), + errors::InvalidArgument( + "Wrong number of inputs to PrelinearizeTuple.")); + + LinearizerBufferList all_linearized_buffers; + std::vector all_saved_input_tensors; + for (int i = 0; i < values.size(); i++) { + // Validate input. + const Tensor& input_tensor = values[i]; + OP_REQUIRES(ctx, input_tensor.dtype() == dtypes_[i], + errors::InvalidArgument( + "PrelinearizeTuple dtype mismatch at tuple element ", i, + "; expected ", DataType_Name(dtypes_[i]), ", got ", + DataType_Name(input_tensor.dtype()))); + OP_REQUIRES(ctx, input_tensor.shape() == shapes_[i], + errors::InvalidArgument( + "PrelinearizeTuple shape mismatch at tuple element ", i, + "; expected ", shapes_[i].DebugString(), ", got ", + input_tensor.shape().DebugString())); + + // Auto-transpose and prelinearize. + LinearizerBufferList linearized_buffers; + std::vector saved_input_tensors; + auto status = AutoTransposeAndLinearize( + ctx, input_tensor, tuple_shape_.tuple_shapes(i), &linearized_buffers, + &saved_input_tensors); + OP_REQUIRES_OK(ctx, status); + all_linearized_buffers.insert( + all_linearized_buffers.end(), + std::make_move_iterator(linearized_buffers.begin()), + std::make_move_iterator(linearized_buffers.end())); + all_saved_input_tensors.insert( + all_saved_input_tensors.end(), + std::make_move_iterator(saved_input_tensors.begin()), + std::make_move_iterator(saved_input_tensors.end())); + } + + tensorflow::Tensor* output; + OP_REQUIRES_OK(ctx, + ctx->allocate_output(0, tensorflow::TensorShape{}, &output)); + output->scalar()() = LinearizedBuffersWrapper{ + std::move(all_linearized_buffers), std::move(all_saved_input_tensors)}; + } + + bool IsExpensive() override { return true; } + + private: + std::vector shapes_; + DataTypeVector dtypes_; + xla::Shape tuple_shape_; + + // PrelinearizeTupleOp is neither copyable nor movable. + PrelinearizeTupleOp(const PrelinearizeTupleOp&) = delete; + PrelinearizeTupleOp& operator=(const PrelinearizeTupleOp&) = delete; +}; + +// The InfeedEnqueuePrelinearizedBufferOp op is used to transfer prelinearized +// buffers to the device infeed queue. +class InfeedEnqueuePrelinearizedBufferOp : public TpuTransferAsyncOpKernel { + public: + explicit InfeedEnqueuePrelinearizedBufferOp(OpKernelConstruction* ctx) + : TpuTransferAsyncOpKernel(ctx, "prelinearized_buffers_to_infeed", 8) {} + + Status DoWork(OpKernelContext* ctx, + xla::TpuTransferManagerInterface* transfer_manager, + stream_executor::StreamExecutor* stream_executor) override { + const Tensor& input_tensor = ctx->input(0); + const LinearizedBuffersWrapper* wrapper = + input_tensor.scalar()() + .get(); + TF_RETURN_IF_ERROR(transfer_manager->TransferBuffersToInfeed( + stream_executor, wrapper->buffers)); + + return Status::OK(); + } + + private: + // InfeedEnqueuePrelinearizedBufferOp is neither copyable nor movable. + InfeedEnqueuePrelinearizedBufferOp( + const InfeedEnqueuePrelinearizedBufferOp&) = delete; + InfeedEnqueuePrelinearizedBufferOp& operator=( + const InfeedEnqueuePrelinearizedBufferOp&) = delete; +}; + +} // anonymous namespace + +TpuInfeedEnqueueOp::TpuInfeedEnqueueOp(OpKernelConstruction* ctx) + : TpuTransferAsyncOpKernel(ctx, "infeed_enqueue", 8) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &shape_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("dtype", &dtype_)); + xla::Shape shape; + OP_REQUIRES_OK(ctx, TensorShapeToXLAShape(dtype_, shape_, &shape)); + OP_REQUIRES_OK(ctx, + GetInfeedShapeWithLayout(ctx, "layout", shape, &xla_shape_)); +} + +Status TpuInfeedEnqueueOp::DoWork( + OpKernelContext* ctx, xla::TpuTransferManagerInterface* transfer_manager, + stream_executor::StreamExecutor* stream_executor) { + const Tensor& input_tensor = ctx->input(0); + + // Validate runtime shape and fail if it doesn't match the contract. + if (input_tensor.dtype() != dtype_) { + return errors::InvalidArgument("Infeed dtype mismatch."); + } + if (input_tensor.shape() != shape_) { + return errors::InvalidArgument("Infeed shape mismatch; expected ", + shape_.DebugString(), ", got ", + input_tensor.shape().DebugString()); + } + + const Tensor* tensor = &input_tensor; + Tensor transposed_tensor; + if (!xla::LayoutUtil::IsMonotonicWithDim0Major(xla_shape_.layout())) { + // If the given layout is not in dim0major layout, transpose the tensor. + TF_ASSIGN_OR_RETURN(transposed_tensor, + TransposeTensor(ctx, input_tensor, xla_shape_)); + tensor = &transposed_tensor; + } + + xla::BorrowingLiteral literal; + TF_RETURN_IF_ERROR(HostTensorToBorrowingLiteral(*tensor, &literal)); + + // Transfer the given literal to the Infeed interface of the device. + TF_RETURN_IF_ERROR( + transfer_manager->TransferLiteralToInfeed(stream_executor, literal)); + return Status::OK(); +} + +TpuInfeedEnqueueTupleOp::TpuInfeedEnqueueTupleOp(OpKernelConstruction* ctx) + : TpuTransferAsyncOpKernel(ctx, "infeed_enqueue", 8) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("shapes", &shapes_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("dtypes", &dtypes_)); + OP_REQUIRES( + ctx, shapes_.size() == dtypes_.size(), + errors::InvalidArgument("shapes and dtypes must be the same length.")); + + std::vector xla_shapes; + for (int i = 0; i < shapes_.size(); i++) { + xla::Shape xla_shape; + OP_REQUIRES_OK(ctx, + TensorShapeToXLAShape(dtypes_[i], shapes_[i], &xla_shape)); + xla_shapes.push_back(xla_shape); + } + OP_REQUIRES_OK( + ctx, GetInfeedShapeWithLayout(ctx, "layouts", + xla::ShapeUtil::MakeTupleShape(xla_shapes), + &tuple_shape_)); +} + +Status TpuInfeedEnqueueTupleOp::DoWork( + OpKernelContext* ctx, xla::TpuTransferManagerInterface* transfer_manager, + stream_executor::StreamExecutor* stream_executor) { + OpInputList values; + TF_RETURN_IF_ERROR(ctx->input_list("inputs", &values)); + if (values.size() != shapes_.size()) { + return errors::InvalidArgument( + "Wrong number of inputs to InfeedEnqueueTuple."); + } + + for (const auto& shapes : shapes_) { + VLOG(1) << "TransferLiteralToInfeed " << shapes.DebugString(); + } + + std::vector maybe_transposed_tensors; + maybe_transposed_tensors.reserve(values.size()); + for (int i = 0; i < values.size(); i++) { + // Validate runtime shapes and fail if it doesn't match the contract. + const Tensor* tensor = &values[i]; + if (tensor->shape() != shapes_[i]) { + return errors::InvalidArgument("Infeed shape mismatch for tuple element ", + i, "; expected ", shapes_[i].DebugString(), + ", got ", tensor->shape().DebugString()); + } + if (!xla::LayoutUtil::IsMonotonicWithDim0Major( + tuple_shape_.tuple_shapes(i).layout())) { + // If the given layout is not in dim0major layout, tranposes the given + // tensor. + TF_ASSIGN_OR_RETURN( + Tensor transposed_tensor, + TransposeTensor(ctx, *tensor, tuple_shape_.tuple_shapes(i))); + maybe_transposed_tensors.emplace_back(transposed_tensor); + } else { + maybe_transposed_tensors.emplace_back(*tensor); + } + } + + xla::BorrowingLiteral tuple; + TF_RETURN_IF_ERROR( + HostTensorsToBorrowingLiteralTuple(maybe_transposed_tensors, &tuple)); + + // Transfer the given literal to the Infeed interface of the device. + TF_RETURN_IF_ERROR( + transfer_manager->TransferLiteralToInfeed(stream_executor, tuple)); + + VLOG(1) << "TransferLiteralToInfeed complete."; + + return Status::OK(); +} + +// These ops execute on either the TPU device or the CPU device. When running on +// CPU they must specify a non-negative value for device_ordinal to indicate +// which TPU to send infeed to. +REGISTER_KERNEL_BUILDER( + Name("InfeedEnqueue").Device(DEVICE_TPU_NODE).HostMemory("input"), + TpuInfeedEnqueueOp); +REGISTER_KERNEL_BUILDER(Name("InfeedEnqueue").Device(DEVICE_CPU), + TpuInfeedEnqueueOp); + +REGISTER_KERNEL_BUILDER( + Name("InfeedEnqueueTuple").Device(DEVICE_TPU_NODE).HostMemory("inputs"), + TpuInfeedEnqueueTupleOp); +REGISTER_KERNEL_BUILDER(Name("InfeedEnqueueTuple").Device(DEVICE_CPU), + TpuInfeedEnqueueTupleOp); + +// Prelinearize ops run on CPU as part of tf.data input pipeline. +REGISTER_KERNEL_BUILDER(Name("Prelinearize").Device(DEVICE_CPU), + PrelinearizeOp); +REGISTER_KERNEL_BUILDER(Name("PrelinearizeTuple").Device(DEVICE_CPU), + PrelinearizeTupleOp); + +// InfeedEnqueuePrelinearizedBuffer op run on CPU and takes a device_ordinal to +// select the right device to infeed. +REGISTER_KERNEL_BUILDER( + Name("InfeedEnqueuePrelinearizedBuffer").Device(DEVICE_CPU), + InfeedEnqueuePrelinearizedBufferOp); + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/infeed_ops.h b/tensorflow/core/tpu/kernels/infeed_ops.h new file mode 100644 index 00000000000..622583b6a73 --- /dev/null +++ b/tensorflow/core/tpu/kernels/infeed_ops.h @@ -0,0 +1,69 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_TPU_KERNELS_INFEED_OPS_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_INFEED_OPS_H_ + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/tpu/kernels/transfer_ops.h" + +namespace tensorflow { + +// TODO(b/65200690): Rework this when there is a callback based infeed API to +// StreamExecutor. + +// The InfeedEnqueue op is used to deliver data to the device infeed queue. +class TpuInfeedEnqueueOp : public TpuTransferAsyncOpKernel { + public: + explicit TpuInfeedEnqueueOp(OpKernelConstruction* ctx); + Status DoWork(OpKernelContext* ctx, + xla::TpuTransferManagerInterface* transfer_manager, + stream_executor::StreamExecutor* stream_executor) override; + + private: + TensorShape shape_; + DataType dtype_; + xla::Shape xla_shape_; + + // TpuInfeedEnqueueOp is neither copyable nor movable. + TpuInfeedEnqueueOp(const TpuInfeedEnqueueOp&) = delete; + TpuInfeedEnqueueOp& operator=(const TpuInfeedEnqueueOp&) = delete; +}; + +// The InfeedEnqueueTuple op is used on the host to deliver multiple tensors to +// the device infeed queue as an XLA tuple. +class TpuInfeedEnqueueTupleOp : public TpuTransferAsyncOpKernel { + public: + explicit TpuInfeedEnqueueTupleOp(OpKernelConstruction* ctx); + Status DoWork(OpKernelContext* ctx, + xla::TpuTransferManagerInterface* transfer_manager, + stream_executor::StreamExecutor* stream_executor) override; + + private: + std::vector shapes_; + DataTypeVector dtypes_; + xla::Shape tuple_shape_; + + // TpuInfeedEnqueueTupleOp is neither copyable nor movable. + TpuInfeedEnqueueTupleOp(const TpuInfeedEnqueueTupleOp&) = delete; + TpuInfeedEnqueueTupleOp& operator=(const TpuInfeedEnqueueTupleOp&) = delete; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_KERNELS_INFEED_OPS_H_ diff --git a/tensorflow/core/tpu/kernels/outfeed_ops.cc b/tensorflow/core/tpu/kernels/outfeed_ops.cc new file mode 100644 index 00000000000..51a3a71a297 --- /dev/null +++ b/tensorflow/core/tpu/kernels/outfeed_ops.cc @@ -0,0 +1,116 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/kernels/outfeed_ops.h" + +#include "tensorflow/compiler/jit/xla_device.h" +#include "tensorflow/compiler/tf2xla/literal_util.h" +#include "tensorflow/compiler/tf2xla/shape_util.h" +#include "tensorflow/compiler/tf2xla/type_util.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/tpu/kernels/transfer_ops.h" +#include "tensorflow/core/tpu/tpu_defs.h" +#include "tensorflow/stream_executor/multi_platform_manager.h" + +namespace tensorflow { + +TpuOutfeedDequeueOp::TpuOutfeedDequeueOp(OpKernelConstruction* ctx) + : TpuTransferAsyncOpKernel(ctx, "outfeed_dequeue", 1) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &shape_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("dtype", &dtype_)); + OP_REQUIRES_OK(ctx, TensorShapeToXLAShape(dtype_, shape_, &xla_shape_)); +} + +Status TpuOutfeedDequeueOp::DoWork( + OpKernelContext* ctx, xla::TpuTransferManagerInterface* transfer_manager, + stream_executor::StreamExecutor* stream_executor) { + Tensor* output; + TF_RETURN_IF_ERROR(ctx->allocate_output(0, shape_, &output)); + + // Transfer from the outfeed interface of the device. + xla::MutableBorrowingLiteral literal; + TF_RETURN_IF_ERROR( + HostTensorToMutableBorrowingLiteral(xla_shape_, output, &literal)); + + VLOG(1) << "TransferLiteralFromOutfeed " + << xla::ShapeUtil::HumanStringWithLayout(xla_shape_); + + TF_RETURN_IF_ERROR(transfer_manager->TransferLiteralFromOutfeed( + stream_executor, xla_shape_, literal)); + + VLOG(1) << "TransferLiteralFromOutfeed complete."; + + return Status::OK(); +} + +// The OutfeedDequeueTuple op is used to retrieve multiple tensors from the +// device outfeed queue. +TpuOutfeedDequeueTupleOp::TpuOutfeedDequeueTupleOp(OpKernelConstruction* ctx) + : TpuTransferAsyncOpKernel(ctx, "outfeed_dequeue", 1) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("shapes", &shapes_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("dtypes", &dtypes_)); + OP_REQUIRES( + ctx, shapes_.size() == dtypes_.size(), + errors::InvalidArgument("shapes and dtypes must be the same length.")); + // The `dtypes` list is inferred from the supplied inputs, so it + // is always the correct length. + for (int i = 0; i < shapes_.size(); i++) { + xla::Shape xla_shape; + OP_REQUIRES_OK(ctx, + TensorShapeToXLAShape(dtypes_[i], shapes_[i], &xla_shape)); + xla_shapes_.push_back(xla_shape); + } + tuple_shape_ = xla::ShapeUtil::MakeTupleShape(xla_shapes_); +} + +Status TpuOutfeedDequeueTupleOp::DoWork( + OpKernelContext* ctx, xla::TpuTransferManagerInterface* transfer_manager, + stream_executor::StreamExecutor* stream_executor) { + VLOG(1) << "TransferLiteralFromOutfeed " + << xla::ShapeUtil::HumanStringWithLayout(tuple_shape_); + + for (int i = 0; i < shapes_.size(); ++i) { + Tensor* output; + TF_RETURN_IF_ERROR(ctx->allocate_output(i, shapes_[i], &output)); + + xla::MutableBorrowingLiteral literal; + TF_RETURN_IF_ERROR( + HostTensorToMutableBorrowingLiteral(xla_shapes_[i], output, &literal)); + TF_RETURN_IF_ERROR(transfer_manager->TransferLiteralFromOutfeed( + stream_executor, xla_shapes_[i], literal)); + } + return Status::OK(); +} + +// These ops execute on either the TPU device or the CPU device. When +// running on CPU they must specify a non-negative value for +// device_ordinal to indicate which TPU to receive outfeed from. +REGISTER_KERNEL_BUILDER( + Name("OutfeedDequeue").Device(DEVICE_TPU_NODE).HostMemory("output"), + TpuOutfeedDequeueOp); +REGISTER_KERNEL_BUILDER(Name("OutfeedDequeue").Device(DEVICE_CPU), + TpuOutfeedDequeueOp); + +REGISTER_KERNEL_BUILDER( + Name("OutfeedDequeueTuple").Device(DEVICE_TPU_NODE).HostMemory("outputs"), + TpuOutfeedDequeueTupleOp); +REGISTER_KERNEL_BUILDER(Name("OutfeedDequeueTuple").Device(DEVICE_CPU), + TpuOutfeedDequeueTupleOp); + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/outfeed_ops.h b/tensorflow/core/tpu/kernels/outfeed_ops.h new file mode 100644 index 00000000000..5e3ed87c04b --- /dev/null +++ b/tensorflow/core/tpu/kernels/outfeed_ops.h @@ -0,0 +1,69 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_TPU_KERNELS_OUTFEED_OPS_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_OUTFEED_OPS_H_ + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/tpu/kernels/transfer_ops.h" + +namespace tensorflow { + +// The OutfeedDequeue op is used to retrieve a single tensor from the device +// outfeed queue. +class TpuOutfeedDequeueOp : public TpuTransferAsyncOpKernel { + public: + explicit TpuOutfeedDequeueOp(OpKernelConstruction* ctx); + + Status DoWork(OpKernelContext* ctx, + xla::TpuTransferManagerInterface* transfer_manager, + stream_executor::StreamExecutor* stream_executor) override; + + private: + TensorShape shape_; + DataType dtype_; + xla::Shape xla_shape_; + + // OutfeedDequeueOp is neither copyable nor movable. + TpuOutfeedDequeueOp(const TpuOutfeedDequeueOp&) = delete; + TpuOutfeedDequeueOp& operator=(const TpuOutfeedDequeueOp&) = delete; +}; + +// The OutfeedDequeueTuple op is used to retrieve multiple tensors from the +// device outfeed queue. +class TpuOutfeedDequeueTupleOp : public TpuTransferAsyncOpKernel { + public: + explicit TpuOutfeedDequeueTupleOp(OpKernelConstruction* ctx); + + Status DoWork(OpKernelContext* ctx, + xla::TpuTransferManagerInterface* transfer_manager, + stream_executor::StreamExecutor* stream_executor) override; + + private: + std::vector shapes_; + DataTypeVector dtypes_; + std::vector xla_shapes_; + xla::Shape tuple_shape_; + + // OutfeedDequeueTupleOp is neither copyable nor movable. + TpuOutfeedDequeueTupleOp(const TpuOutfeedDequeueTupleOp&) = delete; + TpuOutfeedDequeueTupleOp& operator=(const TpuOutfeedDequeueTupleOp&) = delete; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_KERNELS_OUTFEED_OPS_H_ diff --git a/tensorflow/core/tpu/kernels/replication_ops.cc b/tensorflow/core/tpu/kernels/replication_ops.cc new file mode 100644 index 00000000000..4c986e880e7 --- /dev/null +++ b/tensorflow/core/tpu/kernels/replication_ops.cc @@ -0,0 +1,27 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/jit/xla_device_ops.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/tpu/tpu_defs.h" + +namespace tensorflow { + +REGISTER_KERNEL_BUILDER(Name("_TPUReplicate").Device(DEVICE_TPU_SYSTEM), + XlaDeviceDummyOp); + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_handle_to_key_op.cc b/tensorflow/core/tpu/kernels/tpu_handle_to_key_op.cc new file mode 100644 index 00000000000..ec2ae91d3eb --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_handle_to_key_op.cc @@ -0,0 +1,62 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" +#include "tensorflow/core/tpu/kernels/tpu_op_consts.h" +#include "tensorflow/core/tpu/tpu_configuration.h" + +namespace tensorflow { + +class TpuHandleToProtoKeyOp : public OpKernel { + public: + explicit TpuHandleToProtoKeyOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + ~TpuHandleToProtoKeyOp() override = default; + TpuHandleToProtoKeyOp(const TpuHandleToProtoKeyOp&) = delete; + TpuHandleToProtoKeyOp& operator=(const TpuHandleToProtoKeyOp&) = delete; + + void Compute(OpKernelContext* ctx) override { + VLOG(1) << "TpuHandleToProtoKeyOp::Compute " << ctx->op_kernel().name() + << " on device " << ctx->op_kernel().requested_device(); + const Tensor& uid = ctx->input(0); + + ResourceMgr* rm = GetTPUConfigResourceMgr(); + tpu::TpuCompilationCacheInterface* cache; + OP_REQUIRES_OK(ctx, rm->Lookup( + rm->default_container(), + tpu::kCompilationCacheResourceName, &cache)); + core::ScopedUnref cache_unref(cache); + + std::vector keys; + OP_REQUIRES_OK(ctx, cache->GetKeysFromUid(uid.scalar()(), &keys)); + + TensorShape output_shape; + output_shape.AddDim(keys.size()); + Tensor* result = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, output_shape, &result)); + for (int i = 0; i < keys.size(); ++i) { + result->vec()(i) = keys[i]; + } + }; +}; + +REGISTER_KERNEL_BUILDER(Name("TpuHandleToProtoKey").Device(DEVICE_CPU), + TpuHandleToProtoKeyOp); + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/transfer_ops.cc b/tensorflow/core/tpu/kernels/transfer_ops.cc new file mode 100644 index 00000000000..40b85e2cfbd --- /dev/null +++ b/tensorflow/core/tpu/kernels/transfer_ops.cc @@ -0,0 +1,98 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/kernels/transfer_ops.h" + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/platform/tracing.h" +#include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/stream_executor/multi_platform_manager.h" +#include "tensorflow/stream_executor/tpu/tpu_node_context.h" +#include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" +#include "tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.h" + +namespace tensorflow { + +TpuTransferAsyncOpKernel::TpuTransferAsyncOpKernel(OpKernelConstruction* ctx, + const string& transfer_type, + int number_of_threads) + : AsyncOpKernel(ctx), + thread_pool_(new thread::ThreadPool( + ctx->env(), + strings::StrCat(transfer_type, "_thread_", + SanitizeThreadSuffix(def().name())), + /*num_threads=*/8)) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("device_ordinal", &device_ordinal_)); + if (ctx->device_type() == DeviceType(DEVICE_CPU)) { + OP_REQUIRES( + ctx, device_ordinal_ >= 0, + errors::InvalidArgument(transfer_type, + " ops must specify a device_ordinal when " + "placed on CPU.")); + } +} + +void TpuTransferAsyncOpKernel::ComputeAsync(OpKernelContext* ctx, + DoneCallback done) { + CancellationToken token = + ctx->cancellation_manager()->get_cancellation_token(); + bool already_cancelled; + { + // Only protect registering the cancellation callback as mu_ cannot be held + // at a point where `done` could be called. + mutex_lock lock(mu_); + already_cancelled = !ctx->cancellation_manager()->RegisterCallback( + token, [this]() { Cancel(); }); + } + OP_REQUIRES_ASYNC(ctx, !already_cancelled, + errors::Cancelled("Infeed was cancelled."), done); + thread_pool_->Schedule([this, ctx, done, token]() { + Status s = RunTransfer(ctx); + ctx->cancellation_manager()->DeregisterCallback(token); + OP_REQUIRES_OK_ASYNC(ctx, s, done); + done(); + }); +} + +Status TpuTransferAsyncOpKernel::RunTransfer(OpKernelContext* ctx) { + auto* tpu_platform = tpu::TpuPlatformInterface::GetRegisteredPlatform(); + + int real_device_ordinal = device_ordinal_; + if (real_device_ordinal < 0) { + const XlaDevice::Metadata* metadata; + TF_RETURN_IF_ERROR(XlaDevice::GetMetadata(ctx, &metadata)); + real_device_ordinal = metadata->device_ordinal(); + } + stream_executor::StreamExecutor* stream_executor = + tpu_platform->ExecutorForDevice(real_device_ordinal).ValueOrDie(); + + // When Xprof profiling is off (which is the default), constructing the + // activity is simple enough that its overhead is negligible. + profiler::TraceMe activity( + [this] { return profiler::TraceMeOp(name(), type_string()); }, + profiler::TraceMeLevel::kInfo); + return DoWork( + ctx, xla::TpuTransferManagerInterface::GetRegisteredTpuTransferManager(), + stream_executor); +} + +void TpuTransferAsyncOpKernel::Cancel() { + mutex_lock lock(mu_); + TF_CHECK_OK(tpu::TpuNodeContext::CloseTpuHost()); +} + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/transfer_ops.h b/tensorflow/core/tpu/kernels/transfer_ops.h new file mode 100644 index 00000000000..d98d743f569 --- /dev/null +++ b/tensorflow/core/tpu/kernels/transfer_ops.h @@ -0,0 +1,56 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TRANSFER_OPS_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TRANSFER_OPS_H_ + +#include "tensorflow/compiler/jit/xla_device.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/util/stream_executor_util.h" +#include "tensorflow/stream_executor/tpu/tpu_transfer_manager_interface.h" + +namespace tensorflow { + +// Base class providing common functionality for async ops that transfer from +// host to TPU. +class TpuTransferAsyncOpKernel : public AsyncOpKernel { + public: + explicit TpuTransferAsyncOpKernel(OpKernelConstruction* ctx, + const string& transfer_type, + int number_of_threads); + + void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override; + + protected: + virtual Status DoWork(OpKernelContext* context, + xla::TpuTransferManagerInterface* transfer_manager, + stream_executor::StreamExecutor* stream_executor) = 0; + + private: + Status RunTransfer(OpKernelContext* ctx); + void Cancel(); + + std::unique_ptr thread_pool_; + int device_ordinal_; + mutex mu_; + + // TpuTransferAsyncOpKernel is neither copyable nor movable. + TpuTransferAsyncOpKernel(const TpuTransferAsyncOpKernel&) = delete; + TpuTransferAsyncOpKernel& operator=(const TpuTransferAsyncOpKernel&) = delete; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_KERNELS_TRANSFER_OPS_H_ diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index be9d594685e..40130bd46dd 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -161,6 +161,7 @@ tensorflow::Status SetExecutorStructFn(void* library_handle) { TFTPU_SET_FN(executor_fn, TpuTransferManager_TransferLiteralFromDevice); TFTPU_SET_FN(executor_fn, TpuTransferManager_GetByteSizeRequirement); TFTPU_SET_FN(executor_fn, TpuTransferManager_WriteSingleTupleIndexTable); + TFTPU_SET_FN(executor_fn, TpuTransferManager_GetInfeedLayout); TFTPU_SET_FN(executor_fn, TpuTransferManager_LinearizeToBuffers); TFTPU_SET_FN(executor_fn, TpuTransferManager_FreeBuffers); From fb05951e41e6bcdaddf183dffb8cc91c434f5ff4 Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Mon, 10 Aug 2020 11:04:50 -0700 Subject: [PATCH 2450/2522] Extend LegalizeTFCommunication to support legalizing TF/XLA communication ops in `mhlo.while` op regions. This supports nesting of `mhlo.while` and other control flow ops. `mhlo.while` ops are rewritten to propagate !mhlo.token values across. PiperOrigin-RevId: 325843393 Change-Id: I0e1af045f7c78256b1b0598094ade46c027e5f11 --- .../xla/tests/legalize-tf-communication.mlir | 192 ++++++++++++++++++ .../transforms/legalize_tf_communication.cc | 131 +++++++++--- 2 files changed, 296 insertions(+), 27 deletions(-) diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf-communication.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf-communication.mlir index d01ab38bd6b..550b2ba4da3 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf-communication.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf-communication.mlir @@ -848,6 +848,198 @@ func @if_followed_by_communication_op(%arg0: tensor, %arg1: tensor) { // ----- +// Tests `mhlo.while` with cond and body populated with TF/XLA communication +// ops. + +// CHECK-LABEL: func @while_cond_body +// CHECK-SAME: ([[ARG0:%.*]]: tensor) +func @while_cond_body(%arg0: tensor) -> tensor { + // CHECK: [[INIT_TOKEN:%.*]] = "mhlo.create_token" + // CHECK: [[ARG_TUPLE:%.*]] = "mhlo.tuple"([[ARG0]], [[INIT_TOKEN]]) + + // CHECK: [[WHILE_TUPLE:%.*]] = "mhlo.while"([[ARG_TUPLE]]) + %0 = "mhlo.while"(%arg0) ( { + // CHECK: ^bb0([[COND_REGION_ARG:%.*]]: tuple, !mhlo.token>): + ^bb0(%arg1: tensor): + // CHECK-DAG: [[COND_REGION_ARG_VALUE:%.*]] = "mhlo.get_tuple_element"([[COND_REGION_ARG]]) {index = 0 + // CHECK-DAG: [[COND_REGION_ARG_TOKEN:%.*]] = "mhlo.get_tuple_element"([[COND_REGION_ARG]]) {index = 1 + + // CHECK: [[COND_SEND_TOKEN:%.*]] = "mhlo.send"([[COND_REGION_ARG_VALUE]], [[COND_REGION_ARG_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 1 : i64, type = 2 : i64} + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "f32", _xla_host_transfer_rendezvous = "send_while_cond_dtoh_0"} + + // CHECK: [[COND_RECV_TUPLE:%.*]] = "mhlo.recv"([[COND_SEND_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 2 : i64, type = 3 : i64} + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "f32", _xla_host_transfer_rendezvous = "recv_while_cond_htod_0"} + %1 = "tf._XlaHostComputeMlir"(%arg1) {recv_key = "recv_while_cond", send_key = "send_while_cond", tpu_core = 0 : i64} : (tensor) -> tensor + + // CHECK-DAG: [[COND_GET_TUPLE_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[COND_RECV_TUPLE]]) {index = 0 + // CHECK-DAG: [[COND_GET_TUPLE_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[COND_RECV_TUPLE]]) {index = 1 + + // CHECK: [[COND_COMPARE:%.*]] = "mhlo.compare"([[COND_GET_TUPLE_ELEMENT0]], [[COND_GET_TUPLE_ELEMENT0]]) + %2 = "mhlo.compare"(%1, %1) {comparison_direction = "LT"} : (tensor, tensor) -> tensor + + // CHECK: "mhlo.return"([[COND_COMPARE]]) + "mhlo.return"(%2) : (tensor) -> () + }, { + // CHECK: ^bb0([[BODY_REGION_ARG:%.*]]: tuple, !mhlo.token>): + ^bb0(%arg1: tensor): + // CHECK-DAG: [[BODY_REGION_ARG_VALUE:%.*]] = "mhlo.get_tuple_element"([[BODY_REGION_ARG]]) {index = 0 + // CHECK-DAG: [[BODY_REGION_ARG_TOKEN:%.*]] = "mhlo.get_tuple_element"([[BODY_REGION_ARG]]) {index = 1 + + // CHECK: [[BODY_SEND_TOKEN:%.*]] = "mhlo.send"([[BODY_REGION_ARG_VALUE]], [[BODY_REGION_ARG_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 3 : i64, type = 2 : i64} + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "f32", _xla_host_transfer_rendezvous = "send_while_body_dtoh_0"} + + // CHECK: [[BODY_RECV_TUPLE:%.*]] = "mhlo.recv"([[BODY_SEND_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 4 : i64, type = 3 : i64} + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "f32", _xla_host_transfer_rendezvous = "recv_while_body_htod_0"} + %1 = "tf._XlaHostComputeMlir"(%arg1) {recv_key = "recv_while_body", send_key = "send_while_body", tpu_core = 0 : i64} : (tensor) -> tensor + + // CHECK-DAG: [[BODY_GET_TUPLE_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[BODY_RECV_TUPLE]]) {index = 0 + // CHECK-DAG: [[BODY_GET_TUPLE_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[BODY_RECV_TUPLE]]) {index = 1 + // CHECK: [[BODY_RETURN_TUPLE:%.*]] = "mhlo.tuple"([[BODY_GET_TUPLE_ELEMENT0]], [[BODY_GET_TUPLE_ELEMENT1]]) + // CHECK: "mhlo.return"([[BODY_RETURN_TUPLE]]) + "mhlo.return"(%1) : (tensor) -> () + // CHECK: (tuple, !mhlo.token>) -> tuple, !mhlo.token> + }) : (tensor) -> tensor + + // CHECK: [[WHILE_TUPLE_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[WHILE_TUPLE]]) + // CHECK-SAME: index = 0 + // CHECK: return [[WHILE_TUPLE_ELEMENT0]] + return %0 : tensor +} + +// ----- + +// Tests `mhlo.while` with only the `cond` region populated with TF/XLA +// communication ops. + +// CHECK-LABEL: func @while_cond +// CHECK-SAME: ([[ARG0:%.*]]: tensor) +func @while_cond(%arg0: tensor) -> tensor { + // CHECK: [[INIT_TOKEN:%.*]] = "mhlo.create_token" + // CHECK: [[ARG_TUPLE:%.*]] = "mhlo.tuple"([[ARG0]], [[INIT_TOKEN]]) + + // CHECK: [[WHILE_TUPLE:%.*]] = "mhlo.while"([[ARG_TUPLE]]) + %0 = "mhlo.while"(%arg0) ( { + // CHECK: ^bb0([[COND_REGION_ARG:%.*]]: tuple, !mhlo.token>): + ^bb0(%arg1: tensor): + // CHECK-DAG: [[COND_REGION_ARG_VALUE:%.*]] = "mhlo.get_tuple_element"([[COND_REGION_ARG]]) {index = 0 + // CHECK-DAG: [[COND_REGION_ARG_TOKEN:%.*]] = "mhlo.get_tuple_element"([[COND_REGION_ARG]]) {index = 1 + + // CHECK: [[COND_SEND_TOKEN:%.*]] = "mhlo.send"([[COND_REGION_ARG_VALUE]], [[COND_REGION_ARG_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 1 : i64, type = 2 : i64} + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "f32", _xla_host_transfer_rendezvous = "send_while_cond_dtoh_0"} + + // CHECK: [[COND_RECV_TUPLE:%.*]] = "mhlo.recv"([[COND_SEND_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 2 : i64, type = 3 : i64} + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "f32", _xla_host_transfer_rendezvous = "recv_while_cond_htod_0"} + %1 = "tf._XlaHostComputeMlir"(%arg1) {recv_key = "recv_while_cond", send_key = "send_while_cond", tpu_core = 0 : i64} : (tensor) -> tensor + + // CHECK-DAG: [[COND_GET_TUPLE_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[COND_RECV_TUPLE]]) {index = 0 + // CHECK-DAG: [[COND_GET_TUPLE_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[COND_RECV_TUPLE]]) {index = 1 + + // CHECK: [[COND_COMPARE:%.*]] = "mhlo.compare"([[COND_GET_TUPLE_ELEMENT0]], [[COND_GET_TUPLE_ELEMENT0]]) + %2 = "mhlo.compare"(%1, %1) {comparison_direction = "LT"} : (tensor, tensor) -> tensor + + // CHECK: "mhlo.return"([[COND_COMPARE]]) + "mhlo.return"(%2) : (tensor) -> () + }, { + // CHECK: ^bb0([[BODY_REGION_ARG:%.*]]: tuple, !mhlo.token>): + ^bb0(%arg1: tensor): + // CHECK-DAG: [[BODY_GET_TUPLE_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[BODY_REGION_ARG]]) {index = 0 + // CHECK-DAG: [[BODY_GET_TUPLE_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[BODY_REGION_ARG]]) {index = 1 + // CHECK: [[BODY_RETURN_TUPLE:%.*]] = "mhlo.tuple"([[BODY_GET_TUPLE_ELEMENT0]], [[BODY_GET_TUPLE_ELEMENT1]]) + // CHECK: "mhlo.return"([[BODY_RETURN_TUPLE]]) + "mhlo.return"(%arg1) : (tensor) -> () + // CHECK: (tuple, !mhlo.token>) -> tuple, !mhlo.token> + }) : (tensor) -> tensor + + // CHECK: [[WHILE_TUPLE_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[WHILE_TUPLE]]) + // CHECK-SAME: index = 0 + // CHECK: return [[WHILE_TUPLE_ELEMENT0]] + return %0 : tensor +} + +// ----- + +// Tests `mhlo.while` with only the `body` region populated with TF/XLA +// communication ops. + +// CHECK-LABEL: func @while_body +// CHECK-SAME: ([[ARG0:%.*]]: tensor) +func @while_body(%arg0: tensor) -> tensor { + // CHECK: [[INIT_TOKEN:%.*]] = "mhlo.create_token" + // CHECK: [[ARG_TUPLE:%.*]] = "mhlo.tuple"([[ARG0]], [[INIT_TOKEN]]) + + // CHECK: [[WHILE_TUPLE:%.*]] = "mhlo.while"([[ARG_TUPLE]]) + %0 = "mhlo.while"(%arg0) ( { + // CHECK: ^bb0([[COND_REGION_ARG:%.*]]: tuple, !mhlo.token>): + ^bb0(%arg1: tensor): + // CHECK-DAG: [[COND_GET_TUPLE_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[COND_REGION_ARG]]) {index = 0 + // CHECK-DAG: [[COND_GET_TUPLE_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[COND_REGION_ARG]]) {index = 1 + + // CHECK: [[COND_COMPARE:%.*]] = "mhlo.compare"([[COND_GET_TUPLE_ELEMENT0]], [[COND_GET_TUPLE_ELEMENT0]]) + %2 = "mhlo.compare"(%arg1, %arg1) {comparison_direction = "LT"} : (tensor, tensor) -> tensor + + // CHECK: "mhlo.return"([[COND_COMPARE]]) + "mhlo.return"(%2) : (tensor) -> () + }, { + // CHECK: ^bb0([[BODY_REGION_ARG:%.*]]: tuple, !mhlo.token>): + ^bb0(%arg1: tensor): + // CHECK-DAG: [[BODY_REGION_ARG_VALUE:%.*]] = "mhlo.get_tuple_element"([[BODY_REGION_ARG]]) {index = 0 + // CHECK-DAG: [[BODY_REGION_ARG_TOKEN:%.*]] = "mhlo.get_tuple_element"([[BODY_REGION_ARG]]) {index = 1 + + // CHECK: [[BODY_SEND_TOKEN:%.*]] = "mhlo.send"([[BODY_REGION_ARG_VALUE]], [[BODY_REGION_ARG_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 1 : i64, type = 2 : i64} + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "f32", _xla_host_transfer_rendezvous = "send_while_body_dtoh_0"} + + // CHECK: [[BODY_RECV_TUPLE:%.*]] = "mhlo.recv"([[BODY_SEND_TOKEN]]) + // CHECK-SAME: channel_id = {handle = 2 : i64, type = 3 : i64} + // CHECK-SAME: mhlo.frontend_attributes = {_xla_host_transfer_original_type = "f32", _xla_host_transfer_rendezvous = "recv_while_body_htod_0"} + %1 = "tf._XlaHostComputeMlir"(%arg1) {recv_key = "recv_while_body", send_key = "send_while_body", tpu_core = 0 : i64} : (tensor) -> tensor + + // CHECK-DAG: [[BODY_GET_TUPLE_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[BODY_RECV_TUPLE]]) {index = 0 + // CHECK-DAG: [[BODY_GET_TUPLE_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[BODY_RECV_TUPLE]]) {index = 1 + // CHECK: [[BODY_RETURN_TUPLE:%.*]] = "mhlo.tuple"([[BODY_GET_TUPLE_ELEMENT0]], [[BODY_GET_TUPLE_ELEMENT1]]) + // CHECK: "mhlo.return"([[BODY_RETURN_TUPLE]]) + "mhlo.return"(%1) : (tensor) -> () + // CHECK: (tuple, !mhlo.token>) -> tuple, !mhlo.token> + }) : (tensor) -> tensor + + // CHECK: [[WHILE_TUPLE_ELEMENT0:%.*]] = "mhlo.get_tuple_element"([[WHILE_TUPLE]]) + // CHECK-SAME: index = 0 + // CHECK: return [[WHILE_TUPLE_ELEMENT0]] + return %0 : tensor +} + +// ----- + +// Tests `mhlo.while` containing TF/XLA communication ops followed by other +// TF/XLA communication ops. + +func @while_followed_by_communication_op(%arg0: tensor) { + // CHECK: [[WHILE_TUPLE:%.*]] = "mhlo.while" + %0 = "mhlo.while"(%arg0) ( { + ^bb0(%arg1: tensor): + "tf.XlaSendToHost"(%arg1) {key = "send_key0"} : (tensor) -> () + %1 = "mhlo.compare"(%arg1, %arg1) {comparison_direction = "LT"} : (tensor, tensor) -> tensor + "mhlo.return"(%1) : (tensor) -> () + }, { + ^bb0(%arg1: tensor): + "mhlo.return"(%arg1) : (tensor) -> () + }) : (tensor) -> tensor + + // CHECK: [[WHILE_TUPLE_ELEMENT1:%.*]] = "mhlo.get_tuple_element"([[WHILE_TUPLE]]) {index = 1 + + // CHECK: "mhlo.send"({{.*}}, [[WHILE_TUPLE_ELEMENT1]]) + "tf.XlaSendToHost"(%arg0) {key = "send_key1"} : (tensor) -> () + return +} + +// ----- + // Tests unsupported parent of TF/XLA communication op. func @unsupported_ancestor(%arg0: tensor, %arg1: tensor) { diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_communication.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_communication.cc index b4e4f5c4f5c..1d6ce36300f 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_communication.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_communication.cc @@ -57,7 +57,7 @@ const char kXlaHostTransferOriginalTypeAttr[] = // tokens (for ordering), and rewrite their respective functions and control // flow ops when necessary. // Note, this currently does not handle nested modules/functions or region based -// ops other than certain control flow ops (`mhlo.if`). +// ops other than certain control flow ops (`mhlo.if`, `mhlo.while`). class LegalizeTFCommunication : public PassWrapper> { public: @@ -71,7 +71,7 @@ bool IsCommunicationOp(Operation* op) { } // Checks if an op is a supported HLO control flow op. -bool IsControlFlowOp(Operation* op) { return isa(op); } +bool IsControlFlowOp(Operation* op) { return isa(op); } // Collects control flow op ancestors of a given op, up until FuncOp. If any // ancestor is not a control flow op or a FuncOp, or of a single block region, @@ -541,6 +541,10 @@ void RewriteControlFlowTerminator(OpBuilder& builder, Operation* terminator, Value token) { assert(terminator->getNumOperands() == 1); assert(terminator->getBlock()->getNumArguments() == 1); + // `mhlo.while` cond terminator does not need to be rewritten as it always + // returns a tensor predicate value. + if (auto while_parent = dyn_cast_or_null(terminator->getParentOp())) + if (terminator->getParentRegion() == &while_parent.cond()) return; builder.setInsertionPoint(terminator); llvm::SmallDenseMap rewritten_operands; @@ -554,25 +558,24 @@ void RewriteControlFlowTerminator(OpBuilder& builder, Operation* terminator, void RewriteRegionIfOp(OpBuilder& builder, IfOp region_if, SmallVectorImpl& ops_to_visit, Value token) { - SmallVector new_branch_operands; llvm::SmallDenseMap rewritten_operands; - auto old_branch_operands = llvm::drop_begin(region_if.getOperands(), 1); // Rewrite all region operands to have an extra operand `token`. - for (Value operand : old_branch_operands) - new_branch_operands.push_back( - GetValueWithToken(builder, operand, token, rewritten_operands)); + Value new_true_operand = GetValueWithToken(builder, region_if.true_arg(), + token, rewritten_operands); + Value new_false_operand = GetValueWithToken(builder, region_if.false_arg(), + token, rewritten_operands); auto new_result_type = GetTypeWithToken(builder, region_if.getType()); // Create new `mhlo.if` op with extra token operands and result. auto new_if = builder.create(region_if.getLoc(), new_result_type, - region_if.pred(), new_branch_operands[0], - new_branch_operands[1]); + region_if.pred(), new_true_operand, + new_false_operand); // Move all regions from the old `mhlo.if` op to its replacement. - for (auto& region_and_idx : llvm::enumerate(region_if.getRegions())) - new_if.getRegion(region_and_idx.index()).takeBody(*region_and_idx.value()); + new_if.true_branch().takeBody(region_if.true_branch()); + new_if.false_branch().takeBody(region_if.false_branch()); // Forward result from old `mhlo.if` with replacement, and unpack result when // necessary. @@ -594,21 +597,22 @@ void RewriteRegionIfOp(OpBuilder& builder, IfOp region_if, ops_to_visit.push_back({/*region_idx=*/0, new_token, new_if}); } -// Rewrites a `mhlo.if` region to receive and forward a `mhlo.token`. The block -// argument is updated to have an extra `mhlo.token` element. If the region -// block is to be rewritten, the next op to visit is set to the first op in the -// block. Otherwise the terminator is updated to forward `token`. -void RewriteRegionIfRegion( - OpBuilder& builder, IfOp region_if, unsigned region_idx, - SmallVectorImpl& ops_to_visit, +// Rewrites a `mhlo.if`/`mhlo.while` region to receive and forward a +// `mhlo.token`. The block argument is updated to have an extra `mhlo.token` +// element. If the region block is to be rewritten, the next op to visit is set +// to the first op in the block. Otherwise the terminator is updated to forward +// `token`. +void RewriteControlFlowOpRegion( + OpBuilder& builder, Operation* region_op, unsigned region_idx, + Type block_arg_type, SmallVectorImpl& ops_to_visit, const llvm::SmallPtrSetImpl& control_flow_blocks, Value token) { - ops_to_visit.push_back({region_idx + 1, token, region_if}); + ops_to_visit.push_back({region_idx + 1, token, region_op}); - Region& region = region_if.getRegion(region_idx); + Region& region = region_op->getRegion(region_idx); assert(llvm::hasSingleElement(region)); - auto block_token = UpdateControlFlowBlockArgWithToken( - builder, region.front(), region_if.getOperand(region_idx + 1).getType()); + auto block_token = UpdateControlFlowBlockArgWithToken(builder, region.front(), + block_arg_type); if (control_flow_blocks.contains(®ion.front())) { ops_to_visit.push_back({/*region_idx=*/llvm::None, block_token, @@ -621,9 +625,9 @@ void RewriteRegionIfRegion( } // Rewrites an `mhlo.if` op or its region. If `region_idx` is not set, the op -// operands and results rewritten. If `region_idx` is set, region `region_idx` -// is rewritten to take in and return an additional token. Returns true if op -// is still being rewritten. +// operands and results are rewritten. If `region_idx` is set, region +// `region_idx` is rewritten to take in and return an additional token. Returns +// true if the op or its region was rewritten. bool ProcessRegionIfOp(OpBuilder& builder, IfOp region_if, Optional region_idx, SmallVectorImpl& ops_to_visit, @@ -637,8 +641,76 @@ bool ProcessRegionIfOp(OpBuilder& builder, IfOp region_if, } if (*region_idx < region_if.getNumRegions()) { - RewriteRegionIfRegion(builder, region_if, *region_idx, ops_to_visit, - control_flow_blocks, token); + RewriteControlFlowOpRegion(builder, region_if, *region_idx, + region_if.getOperand(*region_idx + 1).getType(), + ops_to_visit, control_flow_blocks, token); + return true; + } + + return false; +} + +// Rewrites a `mhlo.while` op to receive and forward a `mhlo.token`. Operands to +// the op for all of its regions are extended to have an extra operand `token`. +void RewriteRegionWhileOp(OpBuilder& builder, WhileOp region_while, + SmallVectorImpl& ops_to_visit, + Value token) { + llvm::SmallDenseMap rewritten_operands; + + // Rewrite region operand to have an extra operand `token`. + Value new_val_operand = + GetValueWithToken(builder, region_while.val(), token, rewritten_operands); + + auto new_result_type = GetTypeWithToken(builder, region_while.getType()); + + // Create new `mhlo.while` op with extra token operand and result. + auto new_while = builder.create(region_while.getLoc(), + new_result_type, new_val_operand); + + // Move all regions from the old `mhlo.while` op to its replacement. + new_while.cond().takeBody(region_while.cond()); + new_while.body().takeBody(region_while.body()); + + // Forward result from old `mhlo.while` with replacement, and unpack result + // when necessary. + ReplaceWithTupleResult(builder, region_while.getResult(), + new_while.getResult()); + + auto new_token = builder.create( + new_while.getLoc(), new_while.getResult(), + new_while.getResult().getType().cast().size() - 1); + + region_while.erase(); + + // Remove leftover operands to old `mhlo.while` if they have no uses. + for (auto& rewritten_operand : rewritten_operands) + if (auto tuple_op = rewritten_operand.getFirst().getDefiningOp()) + if (tuple_op.use_empty()) tuple_op.erase(); + + // Next op to visit. The replacement is visited but at its first region. The + // token result of the new region if is propagated. + ops_to_visit.push_back({/*region_idx=*/0, new_token, new_while}); +} + +// Rewrites an `mhlo.while` op or its region. If `region_idx` is not set, the op +// operands and results are rewritten. If `region_idx` is set, region +// `region_idx` is rewritten to take in and return an additional token. Returns +// true if the op or its region was rewritten. +bool ProcessRegionWhileOp( + OpBuilder& builder, WhileOp region_while, Optional region_idx, + SmallVectorImpl& ops_to_visit, + const llvm::SmallPtrSetImpl& control_flow_blocks, Value token) { + builder.setInsertionPoint(region_while); + + if (!region_idx) { + RewriteRegionWhileOp(builder, region_while, ops_to_visit, token); + return true; + } + + if (*region_idx < region_while.getNumRegions()) { + RewriteControlFlowOpRegion(builder, region_while, *region_idx, + region_while.val().getType(), ops_to_visit, + control_flow_blocks, token); return true; } @@ -730,6 +802,11 @@ LogicalResult RewriteFunction( if (ProcessRegionIfOp(builder, region_if, op_to_visit.region_idx, ops_to_visit, control_flow_blocks, token)) continue; + } else if (auto region_while = dyn_cast(curr_op)) { + if (op_to_visit.region_idx || control_flow_ops.contains(region_while)) + if (ProcessRegionWhileOp(builder, region_while, op_to_visit.region_idx, + ops_to_visit, control_flow_blocks, token)) + continue; } else if (auto region_terminator = dyn_cast(curr_op)) { RewriteControlFlowTerminator(builder, region_terminator, token); // There is no next op afer the control flow op terminator, simply let From 525e240a17eff24c1901b1468554c846028b665e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20K=C3=B6ppe?= Date: Mon, 10 Aug 2020 11:13:41 -0700 Subject: [PATCH 2451/2522] Remove using-declaration for absl::bit_cast. Using-declarations into absl:: are discouraged, and this removal disambiguates which overload of absl::bit_cast is actually selected, and thus helps us trace the remaining uses of the "bad" overload. PiperOrigin-RevId: 325845404 Change-Id: I6441a0be8ef048ca8af544047b36b1eefac94b85 --- tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc b/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc index ac8f657d20d..4bbe74c423a 100644 --- a/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc +++ b/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc @@ -34,7 +34,6 @@ namespace tensorflow { namespace jpeg { namespace { -using absl::bit_cast; const char kTestData[] = "tensorflow/core/lib/jpeg/testdata/"; int ComputeSumAbsoluteDifference(const uint8* a, const uint8* b, int width, @@ -60,7 +59,7 @@ void TestJPEG(Env* env, const string& jpegfile) { string jpeg; ReadFileToStringOrDie(env, jpegfile, &jpeg); const int fsize = jpeg.size(); - const uint8* const temp = bit_cast(jpeg.data()); + const uint8* const temp = absl::bit_cast(jpeg.data()); // Try partial decoding (half of the data) int w, h, c; @@ -102,7 +101,7 @@ void TestCropAndDecodeJpeg(Env* env, const string& jpegfile, string jpeg; ReadFileToStringOrDie(env, jpegfile, &jpeg); const int fsize = jpeg.size(); - auto temp = bit_cast(jpeg.data()); + const auto* temp = absl::bit_cast(jpeg.data()); // Decode the whole image. std::unique_ptr imgdata1; @@ -225,7 +224,7 @@ TEST(JpegMemTest, CropAndDecodeJpegWithStride) { string jpeg; ReadFileToStringOrDie(env, data_path + "jpeg_merge_test1.jpg", &jpeg); const int fsize = jpeg.size(); - auto temp = bit_cast(jpeg.data()); + const auto* temp = absl::bit_cast(jpeg.data()); int w, h, c; ASSERT_TRUE(GetImageInfo(temp, fsize, &w, &h, &c)); @@ -263,7 +262,7 @@ TEST(JpegMemTest, CropAndDecodeJpegWithInvalidCropWindow) { string jpeg; ReadFileToStringOrDie(env, data_path + "jpeg_merge_test1.jpg", &jpeg); const int fsize = jpeg.size(); - auto temp = bit_cast(jpeg.data()); + const auto* temp = absl::bit_cast(jpeg.data()); int w, h, c; ASSERT_TRUE(GetImageInfo(temp, fsize, &w, &h, &c)); From 52b64bc279923596d4f2d8b4c6355eb926f1b76d Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Mon, 10 Aug 2020 11:38:09 -0700 Subject: [PATCH 2452/2522] Make global NOW timestamp for upload times I forgot that most scripts don't execute within one nanosecond. PiperOrigin-RevId: 325850999 Change-Id: Ib05e4debd598299a4d17c603908a5625082e9331 --- tensorflow/tools/ci_build/sizetrack_helper.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/tools/ci_build/sizetrack_helper.py b/tensorflow/tools/ci_build/sizetrack_helper.py index 8377d733c56..eb3a6afda5e 100755 --- a/tensorflow/tools/ci_build/sizetrack_helper.py +++ b/tensorflow/tools/ci_build/sizetrack_helper.py @@ -114,6 +114,9 @@ size.add_argument( help="Manually set the recorded size instead of providing an artifact.") FLAGS = parser.parse_args() + +NOW = datetime.datetime.now( + datetime.timezone.utc).replace(microsecond=0).isoformat() TABLE_NAME = "{}.{}".format(FLAGS.dataset, FLAGS.table) PROJECT_LEVEL_TABLE_NAME = "{}:{}".format(FLAGS.project, TABLE_NAME) CL_TRAILER = "PiperOrigin-RevId" @@ -285,15 +288,13 @@ def get_upload_path(): """Generate URL for 'gsutil cp'.""" if FLAGS.upload and FLAGS.artifact: artifact_filename = os.path.basename(FLAGS.artifact.name) - ts = datetime.datetime.now( - datetime.timezone.utc).replace(microsecond=0).isoformat() # note: not os.path.join here, because gsutil is always linux-style # Using a timestamp prevents duplicate entries path = "{bucket}/{team}/{artifact_id}/{now}.{artifact_filename}".format( bucket=FLAGS.bucket, team=FLAGS.team, artifact_id=FLAGS.artifact_id, - now=ts, + now=NOW, artifact_filename=artifact_filename) return path else: From f7ca4c921e71d28257c88c6787dae911e3411f25 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 10 Aug 2020 11:58:28 -0700 Subject: [PATCH 2453/2522] Add TpuTopologyExternal::cores(). Also expose the underlying TpuCoreLocationExternal pointer so internal users can get at not-yet-exposed functionality. PiperOrigin-RevId: 325855758 Change-Id: I640e4d4996f60689cd533321434a5fec5f612c60 --- tensorflow/core/tpu/tpu_library_init_fns.inc | 2 ++ .../stream_executor/tpu/tpu_executor_c_api.h | 6 ++++++ tensorflow/stream_executor/tpu/tpu_topology.cc | 15 +++++++++++++++ tensorflow/stream_executor/tpu/tpu_topology.h | 5 +++++ 4 files changed, 28 insertions(+) diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index 40130bd46dd..a27dbea2388 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -175,6 +175,8 @@ tensorflow::Status SetExecutorStructFn(void* library_handle) { TFTPU_SET_FN(executor_fn, TpuTopology_ChipBounds_Z); TFTPU_SET_FN(executor_fn, TpuTopology_HasChip); TFTPU_SET_FN(executor_fn, TpuTopology_Core); + TFTPU_SET_FN(executor_fn, TpuTopology_NumCores); + TFTPU_SET_FN(executor_fn, TpuTopology_Cores); TFTPU_SET_FN(executor_fn, TpuTopology_IdForHost); TFTPU_SET_FN(executor_fn, TpuCoreLocation_ChipCoordinates); TFTPU_SET_FN(executor_fn, TpuCoreLocation_HostCoordinates); diff --git a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h index 013e7fe4e0c..1dcb3eaf244 100644 --- a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h +++ b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h @@ -203,6 +203,10 @@ int TpuTopology_ChipBounds_Z(void* tpu_topology); bool TpuTopology_HasChip(void* tpu_topology, int x, int y, int z); void* TpuTopology_Core(void* tpu_topology, int x, int y, int z, TpuCoreTypeEnum tpu_core_type, int index); +int TpuTopology_NumCores(void* tpu_topology, TpuCoreTypeEnum tpu_core_type); +// 'cores' should be a preallocated array of size TpuTopology_NumCores. +void TpuTopology_Cores(void* tpu_topology, TpuCoreTypeEnum tpu_core_type, + void** cores); int TpuTopology_IdForHost(void* tpu_topology, int x, int y, int z); void TpuCoreLocation_ChipCoordinates(void* tpu_core_location, int* x, int* y, int* z); @@ -357,6 +361,8 @@ struct TfTpu_ExecutorApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_Z); TFTPU_ADD_FN_IN_STRUCT(TpuTopology_HasChip); TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Core); + TFTPU_ADD_FN_IN_STRUCT(TpuTopology_NumCores); + TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Cores); TFTPU_ADD_FN_IN_STRUCT(TpuTopology_IdForHost); TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_ChipCoordinates); diff --git a/tensorflow/stream_executor/tpu/tpu_topology.cc b/tensorflow/stream_executor/tpu/tpu_topology.cc index 74eb0aaf607..cfcea2dc944 100644 --- a/tensorflow/stream_executor/tpu/tpu_topology.cc +++ b/tensorflow/stream_executor/tpu/tpu_topology.cc @@ -75,6 +75,21 @@ TpuCoreLocationExternal TpuTopologyExternal::Core(int x, int y, int z, topology_, x, y, z, core_type, index)); } +std::vector TpuTopologyExternal::cores( + TpuCoreTypeEnum core_type) const { + int num_cores = + tpu::ExecutorApiFn()->TpuTopology_NumCoresFn(topology_, core_type); + std::vector core_ptrs(num_cores); + tpu::ExecutorApiFn()->TpuTopology_CoresFn(topology_, core_type, + core_ptrs.data()); + std::vector result; + result.reserve(num_cores); + for (void* ptr : core_ptrs) { + result.emplace_back(ptr); + } + return result; +} + int TpuTopologyExternal::IdForHost(TpuDimensionsExternal host) const { return tpu::ExecutorApiFn()->TpuTopology_IdForHostFn(topology_, host.x, host.y, host.z); diff --git a/tensorflow/stream_executor/tpu/tpu_topology.h b/tensorflow/stream_executor/tpu/tpu_topology.h index 6b64fb64985..3b0c4c5aa20 100644 --- a/tensorflow/stream_executor/tpu/tpu_topology.h +++ b/tensorflow/stream_executor/tpu/tpu_topology.h @@ -16,6 +16,8 @@ limitations under the License. #ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_TOPOLOGY_H_ #define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_TOPOLOGY_H_ +#include + #include "tensorflow/core/platform/types.h" #include "tensorflow/stream_executor/tpu/c_api_decl.h" @@ -38,6 +40,8 @@ class TpuCoreLocationExternal { int32 index() const; int32 Id() const; + void* impl() const { return core_location_; } + private: void* core_location_; }; @@ -67,6 +71,7 @@ class TpuTopologyExternal { bool HasChip(int x, int y, int z) const; TpuCoreLocationExternal Core(int x, int y, int z, TpuCoreTypeEnum core_type, int index) const; + std::vector cores(TpuCoreTypeEnum core_type) const; int IdForHost(TpuDimensionsExternal host) const; private: From 9908d143206dffc4b24b9b028ebfa0dda392e05d Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Mon, 10 Aug 2020 12:19:15 -0700 Subject: [PATCH 2454/2522] Add chlo.constant_like op which splats a constant to shape of operand This allows specifying a constant whose shape is only known when operand shape is. Also use it to update tf.Acos legalization. PiperOrigin-RevId: 325860604 Change-Id: I93317bd2c9d6935d527712b10b6ef312c4f8548f --- .../mlir-hlo/Dialect/mhlo/IR/chlo_ops.h | 14 ++++++ .../mlir-hlo/Dialect/mhlo/IR/chlo_ops.td | 18 ++++++++ .../mlir-hlo/Dialect/mhlo/IR/hlo_utils.td | 3 ++ .../lib/Dialect/mhlo/IR/chlo_canonicalize.td | 30 +++++++++++++ .../mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc | 44 +++++++++++++++++++ .../compiler/mlir/hlo/tests/canonicalize.mlir | 14 ++++++ .../compiler/mlir/xla/tests/legalize-tf.mlir | 27 ++++++++++-- .../mlir/xla/transforms/legalize_tf.cc | 10 +++++ .../xla/transforms/legalize_tf_patterns.td | 23 +++++----- 9 files changed, 170 insertions(+), 13 deletions(-) create mode 100644 tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_canonicalize.td diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h index 4c92ef3de85..9704f34a4d6 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h @@ -24,6 +24,7 @@ limitations under the License. #include "mlir/IR/OpDefinition.h" #include "mlir/IR/Operation.h" #include "mlir/IR/StandardTypes.h" +#include "mlir/IR/TypeUtilities.h" #include "mlir/IR/Types.h" #include "mlir/Interfaces/InferTypeOpInterface.h" #include "mlir/Interfaces/SideEffectInterfaces.h" @@ -46,6 +47,19 @@ class HloClientDialect : public Dialect { #define GET_OP_CLASSES #include "mlir-hlo/Dialect/mhlo/IR/chlo_ops.h.inc" +template +static Value getConstantLike(OpBuilder& b, T constant, Value val) { + Type ty = getElementTypeOrSelf(val.getType()); + + auto getAttr = [&]() -> Attribute { + if (ty.isa()) return b.getIntegerAttr(ty, constant); + if (ty.isa()) return b.getFloatAttr(ty, constant); + llvm_unreachable("unhandled element type"); + }; + // TODO(jpienaar): Add ability to pass loc via native call and update. + return b.create(b.getUnknownLoc(), getAttr(), val); +} + } // namespace chlo } // namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.td b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.td index d7cdd12d351..2f3bbefb5ab 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.td +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.td @@ -364,6 +364,24 @@ def HLOClient_AcosOp: HLOClient_UnaryElementwiseOp<"acos", }]; } +def HLOClient_ConstantLikeOp: HLOClient_Op<"constant_like", + [NoSideEffect, SameOperandsAndResultShape, + InferTypeOpInterface, + DeclareOpInterfaceMethods, + NativeOpTrait<"InferTensorType">]> { + let summary = "Constant like operator"; + + let description = [{ + Returns a splat constant of the same shape as the operand. + }]; + + // TODO(jpienaar): value's type could be tightened. + let arguments = (ins AnyAttr:$value, HLO_Tensor:$operand); + let results = (outs HLO_Tensor); + + let hasCanonicalizer = 1; +} + //===----------------------------------------------------------------------===// // Broadcasting compare op //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_utils.td b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_utils.td index e1ae9e1fb89..c201aeff8ec 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_utils.td +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_utils.td @@ -27,6 +27,9 @@ def CastIntElementsAttr : NativeCodeCall<"$0.cast()">; class ConstantSplat : NativeCodeCall< "hlo::getSplat(&$_builder, $0, " # value # ")">; +class HLO_ConstantLike : NativeCodeCall< + "chlo::getConstantLike($_builder, " # value # ", $0)">; + def NullDenseIntElementsAttr : NativeCodeCall<"DenseIntElementsAttr()">; def BinBroadcastDimensions : NativeCodeCall< diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_canonicalize.td b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_canonicalize.td new file mode 100644 index 00000000000..eb92d9e0e46 --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_canonicalize.td @@ -0,0 +1,30 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This is the canonicalize pattern definition file. + +include "mlir/IR/OpBase.td" +include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.td" +include "mlir-hlo/Dialect/mhlo/IR/hlo_utils.td" + +def UnaryToBinaryEinsumEq : NativeCodeCall< + "$_builder.getStringAttr(\",\" + $0.getValue().str())">; + +// Convert UnaryEinsumOp to EinsumOp with two operands with redundant first +// operand. +def UnaryEinsumToEinsum : Pat< + (HLO_UnaryEinsumOp $operand, $equation), + (HLO_EinsumOp (HLO_ConstOp (GetScalarOfType<1> $operand)), + $operand, (UnaryToBinaryEinsumEq $equation))>; diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc index d43dd71e94b..b5eacd686bd 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc @@ -15,10 +15,12 @@ limitations under the License. #include "mlir-hlo/Dialect/mhlo/IR/chlo_ops.h" +#include "mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" #include "mlir-hlo/utils/broadcast_utils.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/Diagnostics.h" +#include "mlir/IR/PatternMatch.h" #include "mlir/IR/StandardTypes.h" #include "mlir/IR/TypeUtilities.h" @@ -259,6 +261,48 @@ BROADCAST_BINARY_OP_DEFS(BroadcastXorOp); #undef BROADCAST_INFER_SHAPE_TYPE_OP_DEFS #undef BROADCAST_BINARY_OP_DEFS +static LogicalResult Verify(ConstantLikeOp op) { + if (op.value().getType() != op.getType().cast().getElementType()) + return op.emitOpError() << "value's type doesn't match element return type"; + return success(); +} + +LogicalResult ConstantLikeOp::inferReturnTypeComponents( + MLIRContext* context, Optional location, ValueRange operands, + DictionaryAttr attributes, RegionRange regions, + SmallVectorImpl& inferedReturnShapes) { + ConstantLikeOp::Adaptor op(operands, attributes); + if (failed(op.verify(location.getValue()))) return failure(); + Type element_type = op.value().getType(); + Type operand_type = op.operand().getType(); + if (operand_type.isa()) { + inferedReturnShapes.emplace_back(element_type); + } else { + const auto& shape = operand_type.cast().getShape(); + inferedReturnShapes.emplace_back(shape, element_type); + } + return success(); +} + +struct ConstantLikeToConstant : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(ConstantLikeOp op, + PatternRewriter& rewriter) const override { + auto op_type = op.operand().getType().cast(); + if (!op_type.hasStaticShape()) return failure(); + auto type = RankedTensorType::get(op_type.getShape(), op.value().getType()); + ElementsAttr attr = DenseElementsAttr::get(type, op.value()); + rewriter.replaceOpWithNewOp(op.getOperation(), attr); + return success(); + } +}; + +void ConstantLikeOp::getCanonicalizationPatterns( + OwningRewritePatternList& results, MLIRContext* context) { + results.insert(context); +} + #define GET_OP_CLASSES #include "mlir-hlo/Dialect/mhlo/IR/chlo_ops.cc.inc" diff --git a/tensorflow/compiler/mlir/hlo/tests/canonicalize.mlir b/tensorflow/compiler/mlir/hlo/tests/canonicalize.mlir index e793e213e50..15b1a150fdd 100644 --- a/tensorflow/compiler/mlir/hlo/tests/canonicalize.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/canonicalize.mlir @@ -191,6 +191,20 @@ func @concatenate_const_2D_horizontal() -> tensor<2x2xi32> { return %2 : tensor<2x2xi32> } +// CHECK-LABEL: constant_like_constant +func @constant_like_constant(%arg0: tensor<3x4xi32>) -> tensor<3x4xf32> { + // CHECK: mhlo.constant dense<3.200000e+00> + %0 = "chlo.constant_like"(%arg0) { value = 3.2 : f32 } : (tensor<3x4xi32>) -> tensor<3x4xf32> + return %0 : tensor<3x4xf32> +} + +// CHECK-LABEL: constant_like_constant_dynamic +func @constant_like_constant_dynamic(%arg0: tensor<*xi32>) -> tensor<*xf32> { + // CHECK: chlo.constant_like + %0 = "chlo.constant_like"(%arg0) { value = 3.2 : f32 } : (tensor<*xi32>) -> tensor<*xf32> + return %0 : tensor<*xf32> +} + // CHECK-LABEL: dynamic_slice_variable_start func @dynamic_slice_variable_start(%arg0: tensor<3x4xi32>, %arg1: tensor, %arg2: tensor) -> tensor<1x4xi32> { // CHECK: "mhlo.dynamic-slice" diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir index bad9c1ef279..9b32fb97260 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir @@ -1,5 +1,5 @@ // RUN: tf-opt "-xla-legalize-tf=allow-partial-conversion legalize-chlo=false" %s | FILECHECK_OPTS="" FileCheck %s -// RUN: tf-opt "-xla-legalize-tf=allow-partial-conversion legalize-chlo=true" -verify-diagnostics %s | FileCheck %s --check-prefix CHLO +// RUN: tf-opt "-xla-legalize-tf=allow-partial-conversion legalize-chlo=true" -verify-diagnostics %s | FileCheck %s --check-prefix CHLO --dump-input-filter=all // This test runs twice: // 1. Through FILECHECK_OPTS="" FileCheck with chlo legalization disabled since verifying // that the chlo ops emit produces more useful tests. @@ -1854,14 +1854,14 @@ func @abs_unranked(%arg0: tensor<*xf32>) -> tensor<*xf32> { func @acos(%arg0: tensor<2xf32>) -> tensor<2xf32> { // CHECK: "chlo.acos"(%arg0) : (tensor<2xf32>) -> tensor<2xf32> // CHLO: %[[VAL_1:.*]] = "mhlo.compare"({{.*}}) {comparison_direction = "NE"} -// CHLO: %[[VAL_3:.*]] = mhlo.constant dense<2.000000e+00> -// CHLO: %[[VAL_4:.*]] = mhlo.constant dense<1.000000e+00> // CHLO: %[[VAL_5:.*]] = mhlo.multiply %arg0, %arg0 +// CHLO: %[[VAL_4:.*]] = mhlo.constant dense<1.000000e+00> // CHLO: %[[VAL_6:.*]] = mhlo.subtract %[[VAL_4]], %[[VAL_5]] // CHLO: %[[VAL_7:.*]] = "mhlo.sqrt"(%[[VAL_6]]) // CHLO: %[[VAL_8:.*]] = mhlo.constant dense<1.000000e+00> // CHLO: %[[VAL_9:.*]] = mhlo.add %[[VAL_8]], %arg0 // CHLO: %[[VAL_10:.*]] = mhlo.atan2 %[[VAL_7]], %[[VAL_9]] +// CHLO: %[[VAL_3:.*]] = mhlo.constant dense<2.000000e+00> // CHLO: %[[VAL_11:.*]] = mhlo.multiply %[[VAL_3]], %[[VAL_10]] // CHLO: %[[VAL_12:.*]] = mhlo.constant dense<3.14159274> // CHLO: %[[VAL_13:.*]] = "mhlo.select"(%[[VAL_1]], %[[VAL_11]], %[[VAL_12]]) @@ -1870,6 +1870,27 @@ func @acos(%arg0: tensor<2xf32>) -> tensor<2xf32> { return %0 : tensor<2xf32> } +// CHECK-LABEL: @acos_dynamic +// CHLO-LABEL: @acos_dynamic +func @acos_dynamic(%arg0: tensor<*xf32>) -> tensor<*xf32> { + // CHECK: "chlo.acos"(%arg0) : (tensor<*xf32>) -> tensor<*xf32> +// CHLO: %[[VAL_1:.*]] = "mhlo.compare"({{.*}}) {comparison_direction = "NE"} +// CHLO: %[[VAL_5:.*]] = mhlo.multiply %arg0, %arg0 +// CHLO: %[[VAL_4:.*]] = "chlo.constant_like"(%arg0) {value = 1.000000e+00 : f32} +// CHLO: %[[VAL_6:.*]] = mhlo.subtract %[[VAL_4]], %[[VAL_5]] +// CHLO: %[[VAL_7:.*]] = "mhlo.sqrt"(%[[VAL_6]]) +// CHLO: %[[VAL_8:.*]] = "chlo.constant_like"(%arg0) {value = 1.000000e+00 : f32} +// CHLO: %[[VAL_9:.*]] = mhlo.add %[[VAL_8]], %arg0 +// CHLO: %[[VAL_10:.*]] = mhlo.atan2 %[[VAL_7]], %[[VAL_9]] +// CHLO: %[[VAL_3:.*]] = "chlo.constant_like"(%arg0) {value = 2.000000e+00 : f32} +// CHLO: %[[VAL_11:.*]] = mhlo.multiply %[[VAL_3]], %[[VAL_10]] +// CHLO: %[[VAL_12:.*]] = "chlo.constant_like"(%arg0) {value = 3.14159274 : f32} +// CHLO: %[[VAL_13:.*]] = "mhlo.select"(%[[VAL_1]], %[[VAL_11]], %[[VAL_12]]) +// CHLO: return %[[VAL_13]] + %0 = "tf.Acos"(%arg0) : (tensor<*xf32>) -> tensor<*xf32> + return %0 : tensor<*xf32> +} + // CHECK-LABEL: func @cast_dynamic_i2f func @cast_dynamic_i2f(%arg0: tensor) -> tensor { // CHECK: "mhlo.convert"(%arg0) : (tensor) -> tensor diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc index aa6f25570a1..1f63f2a9396 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc @@ -5807,10 +5807,20 @@ LogicalResult legalizeTF( if (legalize_chlo) { chlo::PopulateLegalizeChloToHloPatterns(context, &patterns); } + // ConstantLike op is convenient to create splat constants, but is + // canonicalized to plain HLO constant if statically shaped. Add the + // canonicalization pattern to pattern list to enable multi-hop lowering. + chlo::ConstantLikeOp::getCanonicalizationPatterns(patterns, context); ConversionTarget target(*context); if (legalize_chlo) { target.addIllegalDialect(); + + // Mark ConstantLikeOp as dynamically legal only when it doesn't have a + // static result type so that it gets canonicalized to MHLO constant. + target.addDynamicallyLegalOp([](Operation *op) { + return !op->getResultTypes().front().cast().hasStaticShape(); + }); } else { target.addLegalDialect(); } diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td index f0ad04c8246..1d4c9503afa 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td @@ -548,17 +548,17 @@ foreach Mapping = [ } // Expand acos to MHLO dialect as follows: -// acos(x) = 2 * atan(sqrt(1 - x^2) / (1 + x)) if x != -1 -// = pi if x == -1 +// acos(x) = 2 * atan(sqrt(1 - x^2) / (1 + x)) if x != -1 +// = pi if x == -1 def : Pat<(HLOClient_AcosOp $input), (HLO_SelectOp - (HLO_CompareOp $input, (HLO_ConstOp (ConstantSplat<"0"> $input)), - HLO_COMPARISON_DIRECTION_NE), - (HLO_MulOp (HLO_ConstOp (ConstantSplat<"2"> $input)), - (HLO_Atan2Op (HLO_SqrtOp (HLO_SubOp - (HLO_ConstOp (ConstantSplat<"1"> $input)), - (HLO_MulOp $input, $input))), - (HLO_AddOp (HLO_ConstOp (ConstantSplat<"1"> $input)), $input))), - (HLO_ConstOp (ConstantSplat<"M_PI"> $input)))>; + (HLO_CompareOp $input, (HLO_ConstantLike<"0"> $input), + HLO_COMPARISON_DIRECTION_NE), + (HLO_MulOp (HLO_ConstantLike<"2.0f"> $input), + (HLO_Atan2Op + (HLO_SqrtOp (HLO_SubOp + (HLO_ConstantLike<"1"> $input), (HLO_MulOp $input, $input))), + (HLO_AddOp (HLO_ConstantLike<"1"> $input), $input))), + (HLO_ConstantLike<"M_PI"> $input))>; // TODO(bixia): Lower Cast with a Complex type source operand or with // Truncate=True for floating point value conversions. @@ -594,6 +594,9 @@ def : Pat<(TF_BitcastOp:$res HLO_Tensor:$arg), (HLO_BitcastConvertOp $arg), [(BothElementTypesSameWidthIntOrFloat $res, $arg)]>; +// TODO(jpienaar): Lower constant like to constant to broadcast if dynamic +// and going to MHLO. + //===----------------------------------------------------------------------===// // Random ops. //===----------------------------------------------------------------------===// From 672d92ce3e7eb45762d1de36bf34ea635ff0fef9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 10 Aug 2020 12:19:58 -0700 Subject: [PATCH 2455/2522] Implement AllPermute collective in TF2 AllPermute takes - a list of devices participating in the collective - a permutation as a list of integers. - a tensor The list of devices replaces the need for group_key and group_size. The number of inputs only scales with the number of devices within one group. The integers in the permutation are based on indices of the list of devices. E.g. devices = {"GPU:0", "GPU:1"} and permutation = {1,0} means - devices[0] sends to devices[permutation[0]] and - devices[1] sends to devices[permutation[1]]. PiperOrigin-RevId: 325860722 Change-Id: I919438fa1cd75d684e47f927289de389e8dcb3f0 --- tensorflow/core/common_runtime/BUILD | 51 ++ .../base_collective_executor.cc | 1 + tensorflow/core/common_runtime/permuter.cc | 118 ++++ tensorflow/core/common_runtime/permuter.h | 89 +++ .../core/common_runtime/permuter_test.cc | 507 ++++++++++++++++++ tensorflow/core/framework/collective.h | 4 + 6 files changed, 770 insertions(+) create mode 100644 tensorflow/core/common_runtime/permuter.cc create mode 100644 tensorflow/core/common_runtime/permuter.h create mode 100644 tensorflow/core/common_runtime/permuter_test.cc diff --git a/tensorflow/core/common_runtime/BUILD b/tensorflow/core/common_runtime/BUILD index b46efe01474..4978a613707 100644 --- a/tensorflow/core/common_runtime/BUILD +++ b/tensorflow/core/common_runtime/BUILD @@ -269,6 +269,7 @@ filegroup( "threadpool_device.h", "process_state.h", "pool_allocator.h", + "permuter.h", ] + if_mkl(["//tensorflow/core/graph:mkl_graph_util_header"]), ) @@ -1129,6 +1130,27 @@ cc_library( ], ) +cc_library( + name = "permuter", + srcs = ["permuter.cc"], + hdrs = ["permuter.h"], + copts = tf_copts(), + deps = [ + ":base_collective_executor", + ":collective_rma_local", + ":collective_util", + ":copy_tensor", + ":device", + ":device_mgr", + ":dma_helper", + ":process_util", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/profiler/lib:traceme", + ], + alwayslink = 1, +) + cc_library( name = "pool_allocator", srcs = ["pool_allocator.cc"], @@ -1585,6 +1607,7 @@ tf_cuda_library( ":parallel_concat_optimizer", ":partitioning_utils", ":pending_counts", + ":permuter", ":placer", ":pool_allocator", ":process_state", @@ -1973,6 +1996,34 @@ tf_cc_tests_gpu( ], ) +tf_cc_tests_gpu( + name = "permuter_test", + size = "medium", + srcs = [ + "permuter_test.cc", + ], + linkstatic = tf_kernel_tests_linkstatic(), + tags = ["no_cuda_on_cpu_tap"], + deps = [ + ":core", + ":core_cpu", + ":core_cpu_internal", + "//tensorflow/core:all_kernels", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:ops", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/common_runtime/gpu:gpu_runtime", + "//tensorflow/core/util:protos_test_cc", + "@com_google_absl//absl/memory", + ], +) + tf_cc_test_mkl( name = "mkl_runtime_tests", size = "small", diff --git a/tensorflow/core/common_runtime/base_collective_executor.cc b/tensorflow/core/common_runtime/base_collective_executor.cc index 80820c9022c..754f8196d29 100644 --- a/tensorflow/core/common_runtime/base_collective_executor.cc +++ b/tensorflow/core/common_runtime/base_collective_executor.cc @@ -255,6 +255,7 @@ void BaseCollectiveExecutor::ExecuteAsync(OpKernelContext* ctx, Tensor* output = ctx->mutable_output(0); const Tensor* input = (col_params.instance.type == REDUCTION_COLLECTIVE || col_params.instance.type == GATHER_COLLECTIVE || + col_params.instance.type == PERMUTE_COLLECTIVE || (col_params.instance.type == BROADCAST_COLLECTIVE && col_params.is_source)) ? &ctx->input(0) diff --git a/tensorflow/core/common_runtime/permuter.cc b/tensorflow/core/common_runtime/permuter.cc new file mode 100644 index 00000000000..c3081d6bc61 --- /dev/null +++ b/tensorflow/core/common_runtime/permuter.cc @@ -0,0 +1,118 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/common_runtime/permuter.h" + +#include "tensorflow/core/common_runtime/collective_rma_local.h" +#include "tensorflow/core/common_runtime/collective_util.h" +#include "tensorflow/core/common_runtime/copy_tensor.h" +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/dma_helper.h" +#include "tensorflow/core/common_runtime/process_util.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/device_base.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/notification.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { + +Permuter::Permuter() + : col_ctx_(nullptr), col_params_(nullptr), done_(nullptr), counter_(0) {} + +bool Permuter::CheckCounter() { + mutex_lock lock(mu_counter_); + ++counter_; + if (counter_ == 2) return true; + return false; +} + +StatusCallback Permuter::HalfDone() { + return [this](const Status& s) { + status_.Update(s); + if (CheckCounter()) done_(status_); + }; +} + +Status Permuter::InitializeCollectiveContext( + std::shared_ptr col_ctx) { + DCHECK(col_ctx->dev_mgr); + col_ctx_ = col_ctx; + col_params_ = &col_ctx->col_params; + return collective_util::InitializeDeviceAndLocality( + col_ctx->dev_mgr, col_ctx->device_name, &col_ctx->device, + &col_ctx->device_locality); +} + +void Permuter::Run(StatusCallback done) { + done_ = std::move(done); + for (int i = 0; i < col_params_->instance.devices.size(); ++i) { + if (col_ctx_->device_name == col_params_->instance.devices[i]) { + DispatchSend(i, col_params_->instance.permutation[i], col_ctx_->input, + HalfDone()); + continue; + } + if (col_ctx_->device_name == + col_params_->instance.devices[col_params_->instance.permutation[i]]) { + DispatchRecv(i, col_params_->instance.permutation[i], col_ctx_->output, + HalfDone()); + } + } +} + +void Permuter::DispatchSend(int src_rank, int target_rank, const Tensor* tensor, + const StatusCallback& done) { + string send_buf_key = + strings::StrCat(col_ctx_->exec_key, src_rank, target_rank); + VLOG(1) << "DispatchSend " << send_buf_key << " from_device " + << col_ctx_->device_name << " to_device " + << col_params_->instance.devices[target_rank] + << " target_rank=" << target_rank << " src_rank=" << src_rank; + col_ctx_->col_exec->PostToPeer(col_params_->instance.devices[target_rank], + col_params_->instance.task_names[target_rank], + send_buf_key, col_ctx_->device, + col_ctx_->op_ctx->op_device_context(), + col_ctx_->op_ctx->output_alloc_attr(0), tensor, + col_ctx_->device_locality, done); +} + +void Permuter::DispatchRecv(int src_rank, int target_rank, Tensor* tensor, + const StatusCallback& done) { + string recv_buf_key = + strings::StrCat(col_ctx_->exec_key, src_rank, target_rank); + VLOG(1) << "DispatchRecv " << recv_buf_key << " to_device " + << col_ctx_->device_name << " from_device " + << col_params_->instance.devices[src_rank] + << " target_rank=" << target_rank << " src_rank=" << src_rank; + col_ctx_->col_exec->RecvFromPeer(col_params_->instance.devices[src_rank], + col_params_->instance.task_names[src_rank], + col_params_->task.is_local[src_rank], + recv_buf_key, col_ctx_->device, + col_ctx_->op_ctx->op_device_context(), + col_ctx_->op_ctx->output_alloc_attr(0), + tensor, col_ctx_->device_locality, 0, done); +} +namespace { +REGISTER_COLLECTIVE(Permute, Permuter); +} // namespace + +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/permuter.h b/tensorflow/core/common_runtime/permuter.h new file mode 100644 index 00000000000..245168b4b0d --- /dev/null +++ b/tensorflow/core/common_runtime/permuter.h @@ -0,0 +1,89 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_PERMUTER_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_PERMUTER_H_ + +#include +#include +#include +#include + +#include "tensorflow/core/common_runtime/base_collective_executor.h" +#include "tensorflow/core/framework/collective.h" + +namespace tensorflow { +class Device; + +// Implementation of collective permute. +// +// Permute takes +// - a list of devices participating in the collective +// - a permutation as a list of integers. +// - a tensor +// +// The list of devices replaces the need for group_key and group_size. The +// number of inputs only scales with the number of devices within one group. +// +// The integers in the permutation are based on indices of the list of devices. +// E.g. devices = {"GPU:0", "GPU:1"} and permutation = {1,0} means +// - devices[0] sends to devices[permutation[0]] and +// - devices[1] sends to devices[permutation[1]]. +// +// Each device sends exactly one tensor and receives exactly one tensor. +class Permuter : public CollectiveImplementationInterface { + public: + Permuter(); + ~Permuter() override = default; + + void Run(StatusCallback done) override; + + Status InitializeCollectiveParams(CollectiveParams* col_params) override { + return Status::OK(); + } + + // Initializes members of CollectiveContext not yet initialized, i.e. device + // and device_locality. Also saves the CollectiveContext in this object. + Status InitializeCollectiveContext( + std::shared_ptr col_ctx) override; + + Status InitializeCollectiveGroupRuntimeDetails( + CollGroupRuntimeDetails*) override { + return Status::OK(); + } + + private: + std::shared_ptr col_ctx_; + const CollectiveParams* col_params_; // Not owned + StatusCallback done_; + Status status_; + mutex mu_counter_; + int counter_ TF_GUARDED_BY(mu_counter_); + + void DispatchSend(int src_rank, int target_rank, const Tensor* tensor, + const StatusCallback& done); + + void DispatchRecv(int src_rank, int target_rank, Tensor* tensor, + const StatusCallback& done); + + // Checks if counter_ reaches 2. + // Atomically increments counter_ by one for sending, one for receiving. + // The purpose of this check is to ensure that done_ is called only once. + bool CheckCounter(); + + StatusCallback HalfDone(); +}; + +} // namespace tensorflow +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_PERMUTER_H_ diff --git a/tensorflow/core/common_runtime/permuter_test.cc b/tensorflow/core/common_runtime/permuter_test.cc new file mode 100644 index 00000000000..a5117322ffa --- /dev/null +++ b/tensorflow/core/common_runtime/permuter_test.cc @@ -0,0 +1,507 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/common_runtime/permuter.h" + +#include + +#include "absl/memory/memory.h" +#include "tensorflow/core/common_runtime/base_collective_executor.h" +#include "tensorflow/core/common_runtime/collective_rma_local.h" +#include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/device_resolver_local.h" +#include "tensorflow/core/common_runtime/process_util.h" +#include "tensorflow/core/common_runtime/test_collective_executor_mgr.h" +#include "tensorflow/core/common_runtime/threadpool_device.h" +#include "tensorflow/core/framework/collective.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/lib/core/notification.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/unbounded_work_queue.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/public/version.h" + +namespace tensorflow { +namespace { + +static int64 kStepId = 123; + +// Wraps CollectiveRemoteAccessLocal with the ability to return an +// error status to the N'th action. +// TODO(b/113171733): factor out of this file and ring_reducer_test.cc +// into a single common source. +class FailTestRMA : public CollectiveRemoteAccessLocal { + public: + FailTestRMA(const DeviceMgr* dev_mgr, DeviceResolverInterface* dev_resolver, + std::shared_ptr work_queue, int64 step_id, + int fail_after) + : CollectiveRemoteAccessLocal(dev_mgr, dev_resolver, work_queue, step_id), + fail_after_(fail_after) {} + + bool MaybeFail(const StatusCallback& done) { + bool fail_now = false; + { + mutex_lock l(mu_); + if (fail_after_ > 0) { + fail_now = (--fail_after_ == 0); + } + } + if (fail_now) { + auto error = errors::Internal("Deliberate failure"); + LOG(INFO) << "triggering failure " << error; + SchedNonBlockingClosureAfter( + 1000, [this, error] { buf_rendezvous()->StartAbort(error); }); + done(error); + return true; + } + return false; + } + + void RecvFromPeer(const string& peer_device, const string& peer_task, + bool peer_is_local, const string& key, Device* to_device, + DeviceContext* to_device_ctx, + const AllocatorAttributes& to_alloc_attr, Tensor* to_tensor, + const DeviceLocality& client_locality, int stream_index, + const StatusCallback& done) override { + if (MaybeFail(done)) return; + CollectiveRemoteAccessLocal::RecvFromPeer( + peer_device, peer_task, peer_is_local, key, to_device, to_device_ctx, + to_alloc_attr, to_tensor, client_locality, stream_index, done); + } + + void PostToPeer(const string& peer_device, const string& peer_task, + const string& key, Device* from_device, + DeviceContext* from_device_ctx, + const AllocatorAttributes& from_alloc_attr, + const Tensor* from_tensor, + const DeviceLocality& client_locality, + const StatusCallback& done) override { + if (MaybeFail(done)) return; + CollectiveRemoteAccessLocal::PostToPeer( + peer_device, peer_task, key, from_device, from_device_ctx, + from_alloc_attr, from_tensor, client_locality, done); + } + + mutex mu_; + int fail_after_ TF_GUARDED_BY(mu_); +}; + +class PermuterTest : public ::testing::Test { + protected: + PermuterTest() : device_type_(DEVICE_CPU) {} + + ~PermuterTest() override { + stop_ = true; + for (auto i : instances_) delete i; + if (col_exec_) col_exec_->Unref(); + } + +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM + void InitGPUDevices() { + auto device_factory = DeviceFactory::GetFactory("GPU"); + CHECK(device_factory); + SessionOptions options; + Status s = device_factory->CreateDevices( + options, "/job:worker/replica:0/task:0", &gpu_devices_); + CHECK(s.ok()); + } +#endif + + void Init(int num_workers, int num_devices_per_worker, DataType dtype, + const DeviceType& device_type, int fail_after) { +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM + InitGPUDevices(); +#endif + device_type_ = device_type; + std::vector> local_devices; + SessionOptions sess_opts; + sess_opts.env = Env::Default(); + Bytes mem_limit(4 << 20); + DeviceLocality dev_locality; + for (int wi = 0; wi < num_workers; ++wi) { + for (int di = 0; di < num_devices_per_worker; ++di) { + if (device_type == DEVICE_CPU) { + string dev_name = strings::StrCat("/job:worker/replica:0/task:", wi, + "/device:CPU:", di); + local_devices.push_back(absl::make_unique( + sess_opts, dev_name, mem_limit, dev_locality, cpu_allocator())); + } else if (device_type == DEVICE_GPU && !gpu_devices_.empty()) { + int dev_idx = (wi * num_devices_per_worker) + di; + if (dev_idx >= static_cast(gpu_devices_.size())) { + LOG(INFO) << "dev_mgr has access to limited GPUs, reusing for more " + "than one ring node."; + } else { + local_devices.push_back(std::move(gpu_devices_[dev_idx])); + } + } else { + LOG(FATAL) << "Unsupported device_type " << device_type; + } + } + } + if (!dev_mgr_ || device_type == DEVICE_CPU) { + dev_mgr_ = absl::make_unique(std::move(local_devices)); + } + if (!gpu_ring_order_) { + gpu_ring_order_ = absl::make_unique(); + } + dev_resolver_ = absl::make_unique(dev_mgr_.get()); + work_queue_ = std::make_shared(Env::Default(), "test"); + rma_ = new FailTestRMA(dev_mgr_.get(), dev_resolver_.get(), work_queue_, + kStepId, fail_after); + col_exec_ = new BaseCollectiveExecutor( + &col_exec_mgr_, rma_, kStepId, dev_mgr_.get(), gpu_ring_order_.get()); + col_params_.name = "test_collective"; + col_params_.instance.data_type = dtype; + static const int kInstanceKey = 18; + col_params_.instance.instance_key = kInstanceKey; + col_params_.group.device_type = device_type; + col_params_.instance.type = PERMUTE_COLLECTIVE; + + // Set up all the fake device contexts. + for (int wi = 0; wi < num_workers; wi++) { + for (int di = 0; di < num_devices_per_worker; di++) { + string task_name = strings::StrCat("/job:worker/replica:0/task:", wi); + string dev_name; + if (device_type == DEVICE_GPU) { + dev_name = strings::StrCat(task_name, "/device:GPU:0"); + } else { + dev_name = strings::StrCat(task_name, "/device:CPU:", di); + } + col_params_.instance.device_names.push_back(dev_name); + col_params_.instance.devices.push_back(dev_name); + int default_rank = wi * num_devices_per_worker + di; + permutation_.push_back(default_rank); + col_params_.instance.task_names.push_back(task_name); + col_params_.task.is_local.push_back(true); + } + } + + // Generate a permutation by permuting every two instances. + // E.g. [0,1] becomes [1,0] + // [0,1,2,3] becomes [1,0,3,2] + for (int i = 0; i < permutation_.size(); i += 2) { + // If the total number of instances is odd, + // swap the last instance with the first. + // E.g. [0,1,2] becomes [2,0,1] + if (permutation_.size() == i + 1) { + std::swap(permutation_[i], permutation_[0]); + continue; + } + std::next_permutation(permutation_.begin() + i, + permutation_.begin() + i + 2); + } + col_params_.instance.permutation = permutation_; + + for (int wi = 0; wi < num_workers; wi++) { + for (int di = 0; di < num_devices_per_worker; di++) { + int default_rank = wi * num_devices_per_worker + di; + instances_.push_back(new DeviceInstance( + default_rank, col_params_.instance.device_names[default_rank], + device_type, this)); + } + } + } + + typedef std::function InitFunc; + + void Permute(int fail_after) { + std::atomic done(0); + for (auto di : instances_) { + SchedClosure([di, &done] { + di->DoPermute(); + ++done; + }); + if (fail_after > 0) { + // Stagger the op execution starts. + Env::Default()->SleepForMicroseconds(100); + } + } + while (done < instances_.size()) { + if (stop_) break; + Env::Default()->SleepForMicroseconds(1000); + } + } + + template + void RunTest(DataType dtype, const DeviceType& device_type, int num_workers, + int num_devices, int tensor_len, int fail_after) { + Init(num_workers, num_devices, dtype, device_type, fail_after); + std::vector expected(tensor_len * num_devices * num_workers, 0.0); + // Initialize each instance tensor with distinct values. + for (int di = 0; di < instances_.size(); ++di) { + DeviceInstance* instance = instances_[di]; + instance->InitTensor( + dtype, TensorShape({tensor_len}), + [this, &expected, di, tensor_len](Tensor* t) { + for (size_t i = 0; i < t->NumElements(); ++i) { + // The cast is necessary to prevent clang-tidy from insisting + // that a faster non-open source function be substituted. + float value = pow(10, static_cast(di)) * i; + t->flat()(i) = value; + expected[permutation_[di] * tensor_len + i] = value; + } + }); + } + + Permute(fail_after); + + // At this point all of the ops have terminated. + for (int di = 0; di < instances_.size(); ++di) { + if (!instances_[di]->status_.ok()) { + ASSERT_GT(fail_after, 0); + ASSERT_NE( + instances_[di]->status_.error_message().find("Deliberate failure"), + string::npos); + continue; + } + TF_EXPECT_OK(instances_[di]->status_); + Tensor* inst = &instances_[di]->tensor_output_; + Tensor actual(dtype, TensorShape({tensor_len})); + if (device_type_ == DEVICE_CPU) { + CHECK(actual.CopyFrom(*inst, inst->shape())); + } else if (device_type_ == DEVICE_GPU) { + Device* dev = instances_[di]->device_; + auto* dev_info = dev->tensorflow_gpu_device_info(); + CHECK(dev_info); + TF_CHECK_OK(dev_info->default_context->CopyDeviceTensorToCPUSync( + inst, "" /*tensor_name*/, dev, &actual)); + } + for (int i = 0; i < tensor_len; ++i) { + switch (dtype) { + case DT_FLOAT: + EXPECT_FLOAT_EQ(expected[(di * tensor_len) + i], + actual.template flat()(i)) + << "Mismatch at device " << di << " index " << i; + break; + case DT_DOUBLE: + EXPECT_DOUBLE_EQ(expected[(di * tensor_len) + i], + actual.template flat()(i)) + << "Mismatch at device " << di << " index " << i; + break; + case DT_INT32: + case DT_INT64: + EXPECT_EQ(expected[(di * tensor_len) + i], + actual.template flat()(i)) + << "Mismatch at device " << di << " index " << i; + break; + default: + LOG(FATAL) << "unimplemented"; + } + } + // } + } + } + + class DeviceInstance { + public: + DeviceInstance(int rank, const string& dev_name, + const DeviceType& device_type, PermuterTest* parent) + : parent_(parent), + dev_name_(dev_name), + device_type_(device_type), + rank_(rank) { + TF_CHECK_OK(parent_->dev_mgr_->LookupDevice(dev_name, &device_)); + col_params_.name = parent_->col_params_.name; + col_params_.instance.data_type = parent_->col_params_.instance.data_type; + col_params_.instance.instance_key = + parent_->col_params_.instance.instance_key; + col_params_.group.device_type = parent_->col_params_.group.device_type; + col_params_.instance.device_names = + parent_->col_params_.instance.device_names; + col_params_.instance.devices = parent_->col_params_.instance.devices; + col_params_.instance.permutation = + parent->col_params_.instance.permutation; + col_params_.instance.task_names = + parent_->col_params_.instance.task_names; + col_params_.task.is_local = parent_->col_params_.task.is_local; + CHECK_EQ(col_params_.instance.devices.size(), + col_params_.instance.device_names.size()); + // Default rank is order in device_names. + col_params_.default_rank = rank; + } + + void InitTensor(DataType dtype, const TensorShape& shape, + const InitFunc& f) { + tensor_input_ = + Tensor(device_->GetAllocator(AllocatorAttributes()), dtype, shape); + tensor_output_ = + Tensor(device_->GetAllocator(AllocatorAttributes()), dtype, shape); + if (device_type_ == DEVICE_CPU) { + f(&tensor_input_); + } else if (device_type_ == DEVICE_GPU) { + Tensor cpu_tensor(dtype, shape); + f(&cpu_tensor); + // Notification notification; + auto* dev_info = device_->tensorflow_gpu_device_info(); + CHECK(dev_info); + TF_CHECK_OK(dev_info->default_context->CopyCPUTensorToDeviceSync( + &cpu_tensor, device_, &tensor_input_)); + } else { + LOG(FATAL) << "Unsupported device_type " << device_type_; + } + } + + void DoPermute() { + // Prepare an OpKernelContext. + OpKernelContext::Params op_params; + op_params.step_id = parent_->step_id_; + op_params.device = device_; + gtl::InlinedVector inputs; + inputs.push_back(TensorValue(&tensor_input_)); + op_params.inputs = &inputs; + gtl::InlinedVector input_aa( + {AllocatorAttributes()}); + op_params.input_alloc_attrs = &input_aa; + DeviceContext* dev_ctx = nullptr; + auto* dev_info = device_->tensorflow_gpu_device_info(); + if (dev_info) { + dev_ctx = dev_info->default_context; + dev_ctx->Ref(); + } else { + dev_ctx = new DeviceContext; + } + op_params.op_device_context = dev_ctx; + AllocatorAttributes generic_alloc_attr; + op_params.output_attr_array = &generic_alloc_attr; + OpKernelContext ctx(&op_params, 1); + + // Prepare a Permuter instance. + string exec_key = + strings::StrCat(col_params_.instance.instance_key, ":0:0"); + Permuter* permuter = new Permuter; + core::ScopedUnref unref(permuter); + auto col_ctx = std::make_shared( + parent_->col_exec_, parent_->dev_mgr_.get(), &ctx, &op_params, + col_params_, exec_key, kStepId, &tensor_input_, &tensor_output_); + TF_CHECK_OK(permuter->InitializeCollectiveContext(col_ctx)); + Notification note; + // Run the permute. + permuter->Run([this, ¬e](Status s) { + status_ = s; + note.Notify(); + }); + note.WaitForNotification(); + dev_ctx->Unref(); + } + + PermuterTest* parent_; + string dev_name_; + DeviceType device_type_ = DEVICE_CPU; + int rank_; + Tensor tensor_input_; + Tensor tensor_output_; + Device* device_; + CollectiveParams col_params_; + Status status_; + }; // class DeviceInstance + + bool stop_ = false; + int64 step_id_ = kStepId; + DeviceType device_type_; + TestCollectiveExecutorMgr col_exec_mgr_; + CollectiveExecutor* col_exec_ = nullptr; + CollectiveRemoteAccessLocal* rma_; + std::unique_ptr dev_resolver_; + std::shared_ptr work_queue_; + std::vector instances_; + CollectiveParams col_params_; + std::vector> gpu_devices_; + std::unique_ptr dev_mgr_; + std::unique_ptr gpu_ring_order_; + mutex mu_; + int permute_counter_ TF_GUARDED_BY(mu_) = 0; + std::vector permutation_; +}; + +// TODO(b/113171733): change to use TEST_P. +// Tests of full permute algorithm, with different device and +// data types. +// B = data element type +// T = device type +// W = number of workers +// D = number of devices per worker +// L = tensor length +// A = abort after count +#define DEF_TEST(B, T, W, D, L, A) \ + TEST_F(PermuterTest, \ + DaTy##B##_DevTy##T##_Wkr##W##_Dev##D##_Sdiv##S##_Len##L##_Abrt##A) { \ + DataType dtype = DT_##B; \ + switch (dtype) { \ + case DT_FLOAT: { \ + RunTest(dtype, DEVICE_##T, W, D, L, A); \ + } break; \ + case DT_DOUBLE: { \ + RunTest(dtype, DEVICE_##T, W, D, L, A); \ + } break; \ + case DT_INT32: { \ + RunTest(dtype, DEVICE_##T, W, D, L, A); \ + } break; \ + case DT_INT64: { \ + RunTest(dtype, DEVICE_##T, W, D, L, A); \ + } break; \ + default: \ + LOG(FATAL) << "Unimplemented"; \ + } \ + } + +#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM) +// B T W D L A +DEF_TEST(FLOAT, CPU, 1, 2, 1, 0) +DEF_TEST(FLOAT, CPU, 1, 3, 3, 0) +DEF_TEST(FLOAT, CPU, 1, 7, 3, 0) +DEF_TEST(FLOAT, CPU, 1, 2, 1001, 0) +DEF_TEST(FLOAT, CPU, 2, 2, 3, 0) +DEF_TEST(FLOAT, CPU, 2, 1, 128, 0) +DEF_TEST(FLOAT, CPU, 2, 4, 128, 0) +DEF_TEST(FLOAT, CPU, 2, 8, 4095, 0) +DEF_TEST(FLOAT, CPU, 4, 4, 1045991, 0) + +DEF_TEST(DOUBLE, CPU, 2, 4, 128, 0) +DEF_TEST(INT32, CPU, 2, 4, 128, 0) +DEF_TEST(INT64, CPU, 2, 4, 128, 0) + +// Failure cases +DEF_TEST(FLOAT, CPU, 1, 2, 1, 1) +DEF_TEST(FLOAT, CPU, 2, 4, 128, 1) +DEF_TEST(FLOAT, CPU, 2, 4, 128, 5) +#endif + +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +// Can only set W=1 for GPU tests. +// B T W D L A +DEF_TEST(FLOAT, GPU, 1, 2, 1, 0) +DEF_TEST(FLOAT, GPU, 1, 7, 3, 0) +DEF_TEST(FLOAT, GPU, 1, 2, 33, 0) +DEF_TEST(FLOAT, GPU, 1, 3, 64, 0) +DEF_TEST(FLOAT, GPU, 1, 8, 1001, 0) +DEF_TEST(FLOAT, GPU, 1, 8, 4095, 0) +DEF_TEST(FLOAT, GPU, 1, 8, 1045991, 0) + +DEF_TEST(BOOL, GPU, 1, 4, 1, 0) +DEF_TEST(BOOL, GPU, 1, 4, 1001, 0) + +DEF_TEST(DOUBLE, GPU, 1, 8, 1001, 0) +DEF_TEST(INT64, GPU, 1, 8, 1001, 0) + +// Failure cases +DEF_TEST(FLOAT, GPU, 1, 8, 128, 6) +#endif + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/framework/collective.h b/tensorflow/core/framework/collective.h index 94e83fa2f08..e7110d9512c 100644 --- a/tensorflow/core/framework/collective.h +++ b/tensorflow/core/framework/collective.h @@ -43,6 +43,7 @@ enum CollectiveType { REDUCTION_COLLECTIVE = 0, BROADCAST_COLLECTIVE, GATHER_COLLECTIVE, + PERMUTE_COLLECTIVE, UNDEFINED_COLLECTIVE, }; @@ -89,6 +90,7 @@ struct CollImplDetails { }; // Data common to all members of a collective instance. +// TODO(b/163171014) Refactor this struct to not be a union of all fields. struct CollInstanceParams { // Identifies all participating graph nodes. int32 instance_key = -1; @@ -109,6 +111,8 @@ struct CollInstanceParams { CollImplDetails impl_details; string ToString() const; CollInstanceParams& operator=(const struct CollInstanceParams& other); + std::vector devices; // all_permute only + std::vector permutation; // all_permute only }; // Data common to all instance members in the same task. From 413823fcb11f2658ba16219d0dce71ed09082557 Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Sat, 8 Aug 2020 13:15:33 +0000 Subject: [PATCH 2456/2522] Enabling XLA specific subtests (for ROCm) within the test `def_function_xla_jit_test` --- .../python/eager/def_function_xla_jit_test.py | 31 +++++-------------- 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/tensorflow/python/eager/def_function_xla_jit_test.py b/tensorflow/python/eager/def_function_xla_jit_test.py index 813c1377cd9..ba75aed5f1c 100644 --- a/tensorflow/python/eager/def_function_xla_jit_test.py +++ b/tensorflow/python/eager/def_function_xla_jit_test.py @@ -76,9 +76,7 @@ class DefFunctionTest(xla_test.XLATestCase): inputs = constant_op.constant([1, 2, 2, 3, 3]) self.assertAllClose([2, 3, 3, 4, 4], func(inputs, 1)) - if not test.is_built_with_rocm(): - # XLA support is not yet enabled for TF ROCm - self.assertAllClose([2, 3, 3, 4, 4], xla_func(inputs, 1)) + self.assertAllClose([2, 3, 3, 4, 4], xla_func(inputs, 1)) def testBasicInt32(self): with ops.device('device:{}:0'.format(self.device)): @@ -88,14 +86,10 @@ class DefFunctionTest(xla_test.XLATestCase): return x + a inputs = constant_op.constant([1, 2, 2, 3, 3], dtype=dtypes.int32) - if not test.is_built_with_rocm(): - # XLA support is not yet enabled for TF ROCm - self.assertAllClose([2, 3, 3, 4, 4], fn(inputs, 1)) + self.assertAllClose([2, 3, 3, 4, 4], fn(inputs, 1)) def testDerivative(self): with ops.device('device:{}:0'.format(self.device)): - if test.is_built_with_rocm(): - return def fn(x, a): return 2 * x + a @@ -135,9 +129,7 @@ class DefFunctionTest(xla_test.XLATestCase): return fn(x, a) inputs = constant_op.constant([1, 2, 2, 3, 3]) - if not test.is_built_with_rocm(): - # XLA support is not yet enabled for TF ROCm - self.assertAllClose([2, 3, 3, 4, 4], fn2(inputs, 1)) + self.assertAllClose([2, 3, 3, 4, 4], fn2(inputs, 1)) @test_util.disable_mlir_bridge('TODO(b/162272821): MLIR bridge returns' ' wrong status type') @@ -154,10 +146,9 @@ class DefFunctionTest(xla_test.XLATestCase): func = def_function.function(fn2, experimental_compile=False) inputs = constant_op.constant([1, 2, 2, 3, 3]) - if not test.is_built_with_rocm(): - with self.assertRaisesRegex(errors.InvalidArgumentError, - 'not compilable'): - func(inputs) + with self.assertRaisesRegex(errors.InvalidArgumentError, + 'not compilable'): + func(inputs) @test_util.disable_mlir_bridge('TODO(b/162272821): MLIR bridge returns' ' wrong status type') @@ -196,9 +187,7 @@ class DefFunctionTest(xla_test.XLATestCase): self.assertAllClose(3.0, dy) run_and_check(func) - if not test.is_built_with_rocm(): - # XLA support is not yet enabled for TF ROCm - run_and_check(xla_func) + run_and_check(xla_func) @test_util.disable_mlir_bridge('TODO(b/162521846): MLIR bridge fails' ' msan, function library not found') @@ -233,8 +222,6 @@ class DefFunctionTest(xla_test.XLATestCase): self.assertAllClose([40.0, 28.0], g(2.0)) def testMethodCompilation(self): - if test.is_built_with_rocm(): - return with ops.device('device:{}:0'.format(self.device)): @@ -251,8 +238,6 @@ class DefFunctionTest(xla_test.XLATestCase): @test_util.disable_mlir_bridge('TODO(b/162272821): MLIR bridge returns ' ' wrong status type') def testMethodCompilationUnsupportedFunc(self): - if test.is_built_with_rocm(): - return with ops.device('device:{}:0'.format(self.device)): @@ -273,8 +258,6 @@ class DefFunctionTest(xla_test.XLATestCase): self.skipTest('b/162799319: Cannot resolve constant on TPU') with ops.device('device:{}:0'.format(self.device)): - if test.is_built_with_rocm(): - return @def_function.function(experimental_compile=True) def f(): From 35475aae9b38d00129652a2f316730c8c17398fd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 10 Aug 2020 13:30:12 -0700 Subject: [PATCH 2457/2522] [XLA:SPMD] Support partial replicate to tile resharding by dynamic slice. PiperOrigin-RevId: 325876094 Change-Id: I6a085904de8e3ab21f3e5107ce70c22c6a892cea --- .../compiler/xla/service/hlo_sharding.cc | 7 +- .../xla/service/spmd/spmd_partitioner.cc | 71 +++++++ .../xla/service/spmd/spmd_partitioner_test.cc | 47 +++++ .../xla/service/spmd/spmd_partitioner_util.cc | 198 +++++++++++++++++- .../xla/service/spmd/spmd_partitioner_util.h | 24 +++ 5 files changed, 343 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc index 07444aca82b..92270005ffd 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding.cc @@ -219,8 +219,11 @@ std::vector HloSharding::TileOffsetForDevice(const Shape& shape, if (maximal_) { return std::vector(shape.dimensions_size(), 0); } - - CHECK_EQ(shape.dimensions_size(), tile_assignment_.num_dimensions()); + if (replicate_on_last_tile_dim_) { + CHECK_EQ(shape.dimensions_size(), tile_assignment_.num_dimensions() - 1); + } else { + CHECK_EQ(shape.dimensions_size(), tile_assignment_.num_dimensions()); + } std::vector index = TileIndexForDevice(device); for (int64 i = 0; i < index.size(); ++i) { const int64 shape_dim = shape.dimensions(i); diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc index fc065bcdd72..8006e47d90d 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc @@ -282,6 +282,77 @@ PartitionedHlo PartitionedHlo::ReshardNoCache(const HloSharding& target) { return ReshardWithAllToAll(target, *src_tgt_dims); } + // Partial replicated to tiled. + if (sharding().ReplicateOnLastTileDim() && !target.ReplicateOnLastTileDim() && + !target.IsTileMaximal()) { + // Get the temp sharding target from partial replicate to target tile dims. + // target_compatible_sharding has the same tile_assignment dimensions + // as the target and can reshard to target by collective permute. + // target_compatible_sharding could have different device assignment as + // targe. sharding() can reshard to target_compatible_sharding by + // dynamic slice. + auto target_compatible_sharding = PartialReplicateToTileCompatibleSharding( + sharding(), target.tile_assignment().dimensions()); + // Reshard to target_compatible_sharding by dynamic slice. + if (target_compatible_sharding.has_value()) { + std::vector expand_tile_dims; + std::vector tiling_dim_factors; + int64 rank = shape.rank(); + tiling_dim_factors.reserve(rank); + auto temp_target_sharding = target_compatible_sharding.value(); + for (int64 dim = 0; dim < rank; dim++) { + if (temp_target_sharding.tile_assignment().dim(dim) > + sharding().tile_assignment().dim(dim)) { + expand_tile_dims.push_back(dim); + } + tiling_dim_factors.emplace_back( + temp_target_sharding.tile_assignment().dim(dim) / + sharding().tile_assignment().dim(dim)); + } + + // Get per_group partitioner state. + std::vector group_dims( + sharding().tile_assignment().num_dimensions() - 1); + std::iota(group_dims.begin(), group_dims.end(), 0); + auto sharding_grouped = GroupShardingOnDims(sharding(), group_dims); + auto per_group_partitioner_state = CreatePerGroupPartitioningState( + state_, sharding_grouped.device_groups, state_.b); + // 2. Get the padded_hlo, do right halo exchange if needed. + auto padded_hlo = PadFromPartialReplicateShape( + hlo_, base_shape_, sharding(), temp_target_sharding, expand_tile_dims, + state_.collective_ops_creator, state_.next_channel_id, + state_.partition_id, state_.b); + if (padded_hlo.has_value()) { + // 3. Slice out the tile from replicate ones. + auto shard_shape = + MakePartitionedShape(base_shape_, temp_target_sharding); + // device assignment within each group is sorted in + // HloSharding::PartialTile, thus partiton_id within each group can be + // matched with the order in tile_assignment. + Array tiling_assignment(tiling_dim_factors); + tiling_assignment.FillIota(0); + auto slice = + state_.b->AddInstruction(HloInstruction::CreateDynamicSlice( + shard_shape, padded_hlo.value(), + MakePartitionOffsets(padded_hlo.value()->shape(), + HloSharding::Tile(tiling_assignment), + per_group_partitioner_state.partition_id, + per_group_partitioner_state.b), + shard_shape.dimensions())); + slice->set_sharding(temp_target_sharding); + auto result = PartitionedHlo(slice, base_shape_, state_); + // If temp_target_sharding's device assignment is different from target, + // use collective permute to reshard. + if (CanReshardWithCollectivePermute(temp_target_sharding, target)) { + return result.ReshardWithCollectivePermute(target); + } + // If device assignment in temp_target_sharding and target are the same, + // return result directly. + return result; + } + } + } + // If not replicated yet, first replicate and then reshard to use one of the // two implementations below. if (!sharding().IsReplicated()) { diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc index 386d634779b..3ffe2954d61 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_test.cc @@ -4348,6 +4348,53 @@ ENTRY entry { EXPECT_THAT(root, AllOf(op::Shape("f32[4,4,12,32]"), op::Reshape(xpose))); } +TEST_F(SpmdPartitioningTest, + ElementwiseTest_PartialReplicateToTiledHaloExchange) { + const char* const hlo_string = R"( +HloModule module + +ENTRY entry { + constant = f32[6,3]{1,0} + constant({{1,3,7},{5,1,4},{1,2,8},{2,3,7},{5,2,4},{2,2,8}}), + sharding={replicated} + constant.1 = f32[6,3]{1,0} + constant({{2,7,2},{2,9,2},{2,6,2},{3,7,2},{2,9,3},{2,3,2}}), + sharding={replicated} + multiply = f32[6,3]{1,0} multiply(constant, constant.1), + sharding={devices=[2,1,2]0,1,2,3 last_tile_dim_replicate} + ROOT add = f32[6,3]{1,0} add(multiply, constant.1), + sharding={devices=[4,1]0,1,2,3} +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + PartitionComputation(hlo_string, /*num_devices=*/4)); + VLOG(1) << module->ToString(); + auto partial_replicate_lhs = + AllOf(op::Shape("f32[3,3]"), + op::DynamicSlice(op::Constant(), op::Reshape(), op::Constant())); + auto partial_replicate_rhs = + AllOf(op::Shape("f32[3,3]"), + op::DynamicSlice(op::Constant(), op::Reshape(), op::Constant())); + auto multiply = + AllOf(op::Shape("f32[3,3]"), + op::Multiply(partial_replicate_lhs, partial_replicate_rhs)); + auto right_halo = + AllOf(op::Shape("f32[1,3]"), op::CollectivePermute(op::Slice(multiply))); + auto add_lhs = AllOf( + op::Shape("f32[2,3]"), + op::DynamicSlice( + op::DynamicSlice( + op::Pad(op::Concatenate(multiply, right_halo), op::Constant()), + op::Reshape(), op::Constant()), + op::Reshape(), op::Constant())); + auto add_rhs = AllOf(op::Shape("f32[2,3]"), + op::DynamicSlice(op::Pad(op::Constant(), op::Constant()), + op::Reshape(), op::Constant())); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, AllOf(op::Shape("f32[2,3]"), op::Add(add_lhs, add_rhs))); +} + } // namespace } // namespace spmd } // namespace xla diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc index 767bed2a21a..3443c6e013d 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc @@ -32,9 +32,11 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_sharding.h" #include "tensorflow/compiler/xla/service/hlo_sharding_util.h" #include "tensorflow/compiler/xla/service/pattern_matcher.h" +#include "tensorflow/compiler/xla/service/shape_inference.h" #include "tensorflow/compiler/xla/service/spmd/spmd_partitioner.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/util.h" +#include "tensorflow/compiler/xla/window_util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" namespace xla { @@ -229,8 +231,11 @@ std::vector MakePartitionOffsets( std::vector MakeTiledPartitionOrdinals( const HloSharding& sharding, HloInstruction* partition_id, SpmdBuilder* b) { CHECK(!sharding.IsTileMaximal()); - auto table_shape = - ShapeUtil::MakeShape(S32, sharding.tile_assignment().dimensions()); + auto dimensions = sharding.tile_assignment().dimensions(); + if (sharding.ReplicateOnLastTileDim()) { + dimensions.pop_back(); + } + auto table_shape = ShapeUtil::MakeShape(S32, dimensions); return MakePartitionOffsets(table_shape, sharding, partition_id, b); } @@ -287,6 +292,195 @@ HloInstruction* PadBaseShapeBeforeUnevenTiledSharding( return PadToShape(hlo, padded_base_shape, b); } +// TODO(wangtao): generize this function when target is partial replicate. +absl::optional PartialReplicateToTileCompatibleSharding( + const HloSharding& partial_sharding, + const std::vector& target_tile_dims) { + if (!partial_sharding.ReplicateOnLastTileDim()) { + return absl::nullopt; + } + int64 rank = partial_sharding.tile_assignment().num_dimensions() - 1; + if (target_tile_dims.size() < rank) { + return absl::nullopt; + } + // A dimension is expanded when target_tile_size > partial_tile_size and + // target_tile_size % partial_tile_size == 0. + // expand_tile_dims_positions is the index of the expand_dim. + std::vector expand_tile_dims_indices(rank, -1); + // expand_tile_size = target_tile_size / partial_tile_size. + std::vector expand_tile_sizes; + int num_expand_dims = 0; + for (int64 dim = 0; dim < rank; dim++) { + int64 partial_tile_size = partial_sharding.tile_assignment().dim(dim); + int64 target_tile_size = target_tile_dims[dim]; + if (target_tile_size % partial_tile_size != 0 || + target_tile_size < partial_tile_size) { + return absl::nullopt; + } + + if (target_tile_size > partial_tile_size) { + expand_tile_dims_indices[dim] = num_expand_dims++; + expand_tile_sizes.emplace_back(target_tile_size / partial_tile_size); + } + } + + // Reshape the partial replicate tile_dimensions. + auto reshape_dimensions = partial_sharding.tile_assignment().dimensions(); + int64 num_replication = reshape_dimensions.back(); + if (num_replication != Product(expand_tile_sizes)) { + return absl::nullopt; + } + reshape_dimensions.pop_back(); + reshape_dimensions.insert(reshape_dimensions.end(), expand_tile_sizes.begin(), + expand_tile_sizes.end()); + auto reshape_tile_assignment = partial_sharding.tile_assignment(); + + // Transpose. + std::vector perm; + perm.reserve(rank); + for (int64 dim = 0; dim < rank; dim++) { + perm.emplace_back(dim); + if (expand_tile_dims_indices[dim] > -1) { + perm.emplace_back(expand_tile_dims_indices[dim] + rank); + } + } + auto transpose_sharding = hlo_sharding_util::TransposeSharding( + HloSharding::Tile(reshape_tile_assignment), perm); + + // Reshape to target shape + auto transpose_tile_assignment = transpose_sharding.tile_assignment(); + transpose_tile_assignment.Reshape(target_tile_dims); + + return HloSharding::Tile(transpose_tile_assignment); +} + +absl::optional PadFromPartialReplicateShape( + HloInstruction* hlo, const Shape& base_shape, + const HloSharding& src_sharding, const HloSharding& dst_sharding, + const std::vector& expand_tile_dims, + const SPMDCollectiveOpsCreator& collective_ops_creator, + int64* next_channel_id, HloInstruction* partition_id, SpmdBuilder* b) { + auto padded_src_shape = + GetPaddedShapeForUnevenPartitioning(base_shape, src_sharding); + auto padded_dst_shape = + GetPaddedShapeForUnevenPartitioning(base_shape, dst_sharding); + if (ShapeUtil::Compatible(padded_dst_shape, hlo->shape())) { + return hlo; + } + + auto partition_ordinals = + MakeTiledPartitionOrdinals(src_sharding, partition_id, b); + + HloInstruction* result = hlo; + auto zero = b->AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::Zero(hlo->shape().element_type()))); + std::vector expand_dims_without_halo_exchange; + // Pad the dimensions needs halo exchange and record the padded dims that + // won't need halo exchange. + for (auto dim : expand_tile_dims) { + int64 src_shard_count = src_sharding.tile_assignment().dim(dim); + int64 src_per_shard_size = + padded_src_shape.dimensions(dim) / src_shard_count; + // Calculate per shard size using the sharding to compare if dst_sharding + // needs more padding at the end. + int64 dst_per_shard_size = + padded_dst_shape.dimensions(dim) / src_shard_count; + + // If dst_sharding doesn't need more padding at the end. + if (src_per_shard_size >= dst_per_shard_size) { + continue; + } + // If src sharding at this dimension is not partitoned, simply pad to + // the desired shape. + if (src_shard_count == 1) { + expand_dims_without_halo_exchange.emplace_back(dim); + continue; + } + + // If dst_padding needs more padding at the end, need to re-distribute the + // data between each shard using collective permute. + // For example, if dimension size is 6 and shard 2 ways in the src but + // needs to shard 4 ways in the dst. 4 ways needs padding 2 0s at the end + // and has 2 elements at each shard, while 2 way sharding has 3 elements + // in each shard, re-distribution is needed. + // + // 1. Calculate left_halo size. + // left-halo size is 0 + OffsetCalculation left_halo_size_function = + OffsetCalculation(MultiplyAddDivideOffsetCalculation(0, 0, 1)); + + // 2. Calculate right_halo size. + // right-halo size is D * (i + 1) - S * (i + 1) = (D - S) * i + (D - S) + OffsetCalculation right_halo_size_function = + OffsetCalculation(MultiplyAddDivideOffsetCalculation( + dst_per_shard_size - src_per_shard_size, + dst_per_shard_size - src_per_shard_size, 1)); + + auto concat = result; + // 3. Halo exchange. + auto halo_exchange_result = ExchangeHalo( + result, left_halo_size_function, right_halo_size_function, dim, + src_sharding, collective_ops_creator, next_channel_id, b); + + if (halo_exchange_result.has_value()) { + concat = halo_exchange_result.value(); + } else { + return absl::nullopt; + } + + // 4. Pad. + std::vector zero_padding(concat->shape().rank()); + PaddingConfig pad_config = window_util::MakeSymmetricPadding(zero_padding); + pad_config.mutable_dimensions(dim)->set_edge_padding_low(0); + int64 max_right_halo_size = + right_halo_size_function.MaxInRange(0, src_shard_count - 1); + pad_config.mutable_dimensions(dim)->set_edge_padding_high(std::max( + 0LL, padded_dst_shape.dimensions(dim) - + padded_src_shape.dimensions(dim) - max_right_halo_size)); + auto padded_concat_shape = ShapeInference::InferPadShape( + concat->shape(), zero->shape(), pad_config) + .ValueOrDie(); + concat = b->AddInstruction(HloInstruction::CreatePad( + padded_concat_shape, concat, zero, pad_config)); + + // 5. Slice the valid result. + // Slice offset is (D-S) * i + auto zero_s32 = b->AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::Zero(S32))); + OffsetCalculation start_offset_on_padded_concat_calculation = + OffsetCalculation(MultiplyAddDivideOffsetCalculation( + dst_per_shard_size - src_per_shard_size, 0, 1)); + auto slice_shape = concat->shape(); + slice_shape.set_dimensions(dim, dst_per_shard_size); + std::vector slice_offsets(concat->shape().rank(), + zero_s32); + slice_offsets[dim] = start_offset_on_padded_concat_calculation.Calculate( + partition_ordinals[dim], b); + result = b->AddInstruction(HloInstruction::CreateDynamicSlice( + slice_shape, concat, slice_offsets, slice_shape.dimensions())); + } + + // Pad other dimensions that won't need halo exchange with a single pad. + if (!expand_dims_without_halo_exchange.empty()) { + std::vector zero_padding(result->shape().rank()); + PaddingConfig pad_config = window_util::MakeSymmetricPadding(zero_padding); + + auto padded_shape = result->shape(); + for (auto dim : expand_dims_without_halo_exchange) { + pad_config.mutable_dimensions(dim)->set_edge_padding_low(0); + pad_config.mutable_dimensions(dim)->set_edge_padding_high( + padded_dst_shape.dimensions(dim) - padded_src_shape.dimensions(dim)); + padded_shape.set_dimensions(dim, result->shape().dimensions(dim) + + padded_dst_shape.dimensions(dim) - + padded_src_shape.dimensions(dim)); + } + result = b->AddInstruction( + HloInstruction::CreatePad(padded_shape, result, zero, pad_config)); + } + + return result; +} + absl::optional UniqueTiledDim(const HloSharding& sharding) { if (sharding.IsTileMaximal()) { return absl::nullopt; diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h index e8b51567359..6906b52ca79 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h @@ -347,6 +347,30 @@ absl::optional TransposeShardingWithCollapsedDims( absl::optional ParseReductionComputation( const HloComputation* reduction_comp); +// Pad the shape from partial replicate shape for `dst_sharding`. +// If dst_sharding needs more padding and per_shard_size increased in +// dst_sharding, halo exchange on the right side is needed. +absl::optional PadFromPartialReplicateShape( + HloInstruction* hlo, const Shape& base_shape, + const HloSharding& src_sharding, const HloSharding& dst_sharding, + const std::vector& expand_tile_dims, + const SPMDCollectiveOpsCreator& collective_ops_creator, + int64* next_channel_id, HloInstruction* partition_id, SpmdBuilder* b); + +// Get the compatible sharding from a partial replicate sharding to a given +// target tile dimensions. +// Compatible means replicate sharding can transform to the target tile +// dimensions by dynamic slice. +// For example, if partial_sharding is +// {devices=[1,2,2]0,1,2,3 last_tile_dim_replicate} +// Target tile dims is {2, 2}, the returned compatible sharding will be +// sharding={devices=[1,2,2]0,2,1,3 last_tile_dim_replicate}. +// If patial replicate sharding is not partial replicate or can't reshard to +// target_tile_dims by dynamic slice, return absl::nullopt. +absl::optional PartialReplicateToTileCompatibleSharding( + const HloSharding& partial_sharding, + const std::vector& target_tile_dims); + } // namespace spmd } // namespace xla From 7ffd29e060f005a92f089dca9cec1eccf76a79af Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Mon, 10 Aug 2020 13:34:30 -0700 Subject: [PATCH 2458/2522] [tf.data service] Write new journal files instead of appending. Some filesystems don't support appending to an existing file. Better to begin a new journal file each time the dispatcher is restarted. PiperOrigin-RevId: 325876992 Change-Id: I0e1046a67e85241d7542b368c8b5b7a5b8955556 --- tensorflow/core/data/service/BUILD | 1 + .../core/data/service/dispatcher_impl.cc | 20 +++-- tensorflow/core/data/service/journal.cc | 80 ++++++++++++++----- tensorflow/core/data/service/journal.h | 32 ++++++-- tensorflow/core/data/service/journal_test.cc | 6 +- tensorflow/core/data/service/server_lib.cc | 17 ++-- tensorflow/core/data/service/server_lib.h | 8 +- .../kernel_tests/data_service_ops_test.py | 43 ++++++++-- 8 files changed, 159 insertions(+), 48 deletions(-) diff --git a/tensorflow/core/data/service/BUILD b/tensorflow/core/data/service/BUILD index aed402fb3b9..13034eb4354 100644 --- a/tensorflow/core/data/service/BUILD +++ b/tensorflow/core/data/service/BUILD @@ -173,6 +173,7 @@ cc_library( deps = [ ":journal_proto_cc", "//tensorflow/core:lib", + "//tensorflow/core/platform:regexp", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", ], diff --git a/tensorflow/core/data/service/dispatcher_impl.cc b/tensorflow/core/data/service/dispatcher_impl.cc index b440e9be905..5dbcece7b49 100644 --- a/tensorflow/core/data/service/dispatcher_impl.cc +++ b/tensorflow/core/data/service/dispatcher_impl.cc @@ -87,21 +87,25 @@ Status DataServiceDispatcherImpl::Start() { } journal_writer_ = absl::make_unique( Env::Default(), JournalDir(config_.work_dir())); + LOG(INFO) << "Restoring dispatcher state from journal in " + << JournalDir(config_.work_dir()); Update update; bool end_of_journal = false; FileJournalReader reader(Env::Default(), JournalDir(config_.work_dir())); Status s = reader.Read(&update, &end_of_journal); if (errors::IsNotFound(s)) { LOG(INFO) << "No journal found. Starting dispatcher from new state."; - return Status::OK(); - } - TF_RETURN_IF_ERROR(s); - LOG(INFO) << "Restoring dispatcher state from journal in " - << JournalDir(config_.work_dir()); - while (!end_of_journal) { - TF_RETURN_IF_ERROR(ApplyWithoutJournaling(update)); - TF_RETURN_IF_ERROR(reader.Read(&update, &end_of_journal)); + } else if (!s.ok()) { + return s; + } else { + while (!end_of_journal) { + TF_RETURN_IF_ERROR(ApplyWithoutJournaling(update)); + TF_RETURN_IF_ERROR(reader.Read(&update, &end_of_journal)); + } } + // Initialize the journal writer in `Start` so that we fail fast in case it + // can't be initialized. + TF_RETURN_IF_ERROR(journal_writer_.value()->EnsureInitialized()); return Status::OK(); } diff --git a/tensorflow/core/data/service/journal.cc b/tensorflow/core/data/service/journal.cc index 11952b0dfd9..b0ce0876c69 100644 --- a/tensorflow/core/data/service/journal.cc +++ b/tensorflow/core/data/service/journal.cc @@ -22,29 +22,51 @@ limitations under the License. #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/path.h" +#include "tensorflow/core/platform/regexp.h" namespace tensorflow { namespace data { namespace { constexpr StringPiece kJournal = "journal"; + +Status ParseSequenceNumber(const std::string& journal_file, + int64* sequence_number) { + if (!RE2::FullMatch(journal_file, ".*_(\\d+)", sequence_number)) { + return errors::InvalidArgument("Failed to parse journal file name: ", + journal_file); + } + return Status::OK(); +} } // namespace -std::string DataServiceJournalFile(StringPiece journal_dir) { - return io::JoinPath(journal_dir, kJournal); +std::string DataServiceJournalFile(const std::string& journal_dir, + int64 sequence_number) { + return io::JoinPath(journal_dir, + absl::StrCat(kJournal, "_", sequence_number)); } -FileJournalWriter::FileJournalWriter(Env* env, StringPiece journal_dir) +FileJournalWriter::FileJournalWriter(Env* env, const std::string& journal_dir) : env_(env), journal_dir_(journal_dir) {} Status FileJournalWriter::EnsureInitialized() { if (writer_) { return Status::OK(); } + std::vector journal_files; TF_RETURN_IF_ERROR(env_->RecursivelyCreateDir(journal_dir_)); - TF_RETURN_IF_ERROR( - env_->NewAppendableFile(DataServiceJournalFile(journal_dir_), &file_)); + TF_RETURN_IF_ERROR(env_->GetChildren(journal_dir_, &journal_files)); + int64 latest_sequence_number = -1; + for (const auto& file : journal_files) { + int64 sequence_number; + TF_RETURN_IF_ERROR(ParseSequenceNumber(file, &sequence_number)); + latest_sequence_number = std::max(latest_sequence_number, sequence_number); + } + std::string journal_file = + DataServiceJournalFile(journal_dir_, latest_sequence_number + 1); + TF_RETURN_IF_ERROR(env_->NewAppendableFile(journal_file, &file_)); writer_ = absl::make_unique(file_.get()); + VLOG(1) << "Created journal writer to write to " << journal_file; return Status::OK(); } @@ -58,6 +80,9 @@ Status FileJournalWriter::Write(const Update& update) { TF_RETURN_IF_ERROR(writer_->WriteRecord(s)); TF_RETURN_IF_ERROR(writer_->Flush()); TF_RETURN_IF_ERROR(file_->Sync()); + if (VLOG_IS_ON(4)) { + VLOG(4) << "Wrote journal entry: " << update.DebugString(); + } return Status::OK(); } @@ -68,25 +93,44 @@ Status FileJournalReader::EnsureInitialized() { if (reader_) { return Status::OK(); } - TF_RETURN_IF_ERROR( - env_->NewRandomAccessFile(DataServiceJournalFile(journal_dir_), &file_)); - reader_ = absl::make_unique(file_.get()); - return Status::OK(); + return UpdateFile(DataServiceJournalFile(journal_dir_, 0)); } Status FileJournalReader::Read(Update* update, bool* end_of_journal) { TF_RETURN_IF_ERROR(EnsureInitialized()); - tstring record; - Status s = reader_->ReadRecord(&offset_, &record); - if (errors::IsOutOfRange(s)) { - *end_of_journal = true; + while (true) { + tstring record; + Status s = reader_->ReadRecord(&offset_, &record); + if (errors::IsOutOfRange(s)) { + sequence_number_++; + std::string next_journal_file = + DataServiceJournalFile(journal_dir_, sequence_number_); + if (errors::IsNotFound(env_->FileExists(next_journal_file))) { + VLOG(3) << "Next journal file " << next_journal_file + << " does not exist. End of journal reached."; + *end_of_journal = true; + return Status::OK(); + } + TF_RETURN_IF_ERROR(UpdateFile(next_journal_file)); + continue; + } + TF_RETURN_IF_ERROR(s); + if (!update->ParseFromString(record)) { + return errors::DataLoss("Failed to parse journal record."); + } + if (VLOG_IS_ON(4)) { + VLOG(4) << "Read journal entry: " << update->DebugString(); + } + *end_of_journal = false; return Status::OK(); } - TF_RETURN_IF_ERROR(s); - if (!update->ParseFromString(record)) { - return errors::DataLoss("Failed to parse journal record."); - } - *end_of_journal = false; +} + +Status FileJournalReader::UpdateFile(const std::string& filename) { + VLOG(1) << "Reading from journal file " << filename; + TF_RETURN_IF_ERROR(env_->NewRandomAccessFile(filename, &file_)); + reader_ = absl::make_unique(file_.get()); + offset_ = 0; return Status::OK(); } diff --git a/tensorflow/core/data/service/journal.h b/tensorflow/core/data/service/journal.h index c627c21756c..3483497705e 100644 --- a/tensorflow/core/data/service/journal.h +++ b/tensorflow/core/data/service/journal.h @@ -25,7 +25,8 @@ namespace tensorflow { namespace data { // Returns the location of the journal file within the journal directory. -std::string DataServiceJournalFile(StringPiece journal_dir); +std::string DataServiceJournalFile(const std::string& journal_dir, + int64 sequence_number); // Interface for writing to a journal. class JournalWriter { @@ -33,25 +34,39 @@ class JournalWriter { virtual ~JournalWriter() = default; // Writes and syncs an update to the journal. virtual Status Write(const Update& update) = 0; + // Initializes the writer if it is not yet initialized. + virtual Status EnsureInitialized() = 0; }; // FileJournalWriter is not thread-safe, requiring external synchronization when // used by multiple threads. +// +// FileJournalWriter writes journal files to a configured journal directory. The +// directory is laid out in the following format: +// +// journal_dir/ +// journal_0 +// journal_1 +// ... +// +// When the writer is created, it lists the directory to find the next available +// journal file name. For example, if the journal directory contains +// "journal_0", "journal_1", and "journal_2", the writer will write to +// "journal_3". The writer will flush updates as they are written, so that they +// can be stored durably in case of machine failure. class FileJournalWriter : public JournalWriter { public: // Creates a journal writer to write to the given journal directory. // If there is already journal data there, the journal writer will append to // the existing journal. - explicit FileJournalWriter(Env* env, StringPiece journal_dir); + explicit FileJournalWriter(Env* env, const std::string& journal_dir); FileJournalWriter(const FileJournalWriter&) = delete; FileJournalWriter& operator=(const FileJournalWriter&) = delete; Status Write(const Update& update) override; + Status EnsureInitialized() override; private: - // Initializes the writer if it is not yet initialized. - Status EnsureInitialized(); - Env* env_; const std::string journal_dir_; std::unique_ptr file_; @@ -69,6 +84,9 @@ class JournalReader { // JournalReader is not thread-safe, requiring external synchronization when // used by multiple threads. +// +// The journal reader reads through all journal files in the configured journal +// directory, in order of their sequence numbers. See FileJournalWriter above. class FileJournalReader : public JournalReader { public: explicit FileJournalReader(Env* env, StringPiece journal_dir); @@ -80,9 +98,13 @@ class FileJournalReader : public JournalReader { private: // Initializes the reader if it is not yet initialized. Status EnsureInitialized(); + // Updates the `FileJournalReader` to read from a new file. + Status UpdateFile(const std::string& filename); Env* env_; const std::string journal_dir_; + // Sequence number of current journal file. + int64 sequence_number_ = 0; // Current offset into `file_`. uint64 offset_ = 0; std::unique_ptr file_; diff --git a/tensorflow/core/data/service/journal_test.cc b/tensorflow/core/data/service/journal_test.cc index 169e58ed048..313b216fe76 100644 --- a/tensorflow/core/data/service/journal_test.cc +++ b/tensorflow/core/data/service/journal_test.cc @@ -95,7 +95,7 @@ TEST(Journal, RoundTripMultiple) { TF_EXPECT_OK(CheckJournalContent(journal_dir, updates)); } -TEST(Journal, AppendExistingFile) { +TEST(Journal, AppendExistingJournal) { std::string journal_dir; EXPECT_TRUE(NewJournalDir(&journal_dir)); std::vector updates = {MakeCreateJobUpdate(), @@ -127,7 +127,7 @@ TEST(Journal, NonRecordData) { { std::unique_ptr file; TF_ASSERT_OK(Env::Default()->NewAppendableFile( - DataServiceJournalFile(journal_dir), &file)); + DataServiceJournalFile(journal_dir, /*sequence_number=*/0), &file)); TF_ASSERT_OK(file->Append("not record data")); } @@ -147,7 +147,7 @@ TEST(Journal, InvalidRecordData) { { std::unique_ptr file; TF_ASSERT_OK(Env::Default()->NewAppendableFile( - DataServiceJournalFile(journal_dir), &file)); + DataServiceJournalFile(journal_dir, /*sequence_number=*/0), &file)); auto writer = absl::make_unique(file.get()); TF_ASSERT_OK(writer->WriteRecord("not serializd proto")); } diff --git a/tensorflow/core/data/service/server_lib.cc b/tensorflow/core/data/service/server_lib.cc index 98157f6b232..fb33319db29 100644 --- a/tensorflow/core/data/service/server_lib.cc +++ b/tensorflow/core/data/service/server_lib.cc @@ -28,8 +28,12 @@ namespace { constexpr char kPortPlaceholder[] = "%port%"; } -GrpcDataServerBase::GrpcDataServerBase(int port, const std::string& protocol) - : requested_port_(port), protocol_(protocol), bound_port_(port) {} +GrpcDataServerBase::GrpcDataServerBase(int port, const std::string& protocol, + const std::string server_type) + : requested_port_(port), + protocol_(protocol), + server_type_(server_type), + bound_port_(port) {} Status GrpcDataServerBase::Start() { if (stopped_) { @@ -56,7 +60,8 @@ Status GrpcDataServerBase::Start() { TF_RETURN_IF_ERROR(StartServiceInternal()); started_ = true; - VLOG(1) << "Started tf.data service running at 0.0.0.0:" << BoundPort(); + LOG(INFO) << "Started tf.data " << server_type_ + << " running at 0.0.0.0:" << BoundPort(); return Status::OK(); } @@ -74,7 +79,8 @@ int GrpcDataServerBase::BoundPort() { return bound_port(); } DispatchGrpcDataServer::DispatchGrpcDataServer( const experimental::DispatcherConfig& config) - : GrpcDataServerBase(config.port(), config.protocol()), config_(config) {} + : GrpcDataServerBase(config.port(), config.protocol(), "DispatchServer"), + config_(config) {} DispatchGrpcDataServer::~DispatchGrpcDataServer() { delete service_; } @@ -100,7 +106,8 @@ Status DispatchGrpcDataServer::NumWorkers(int* num_workers) { WorkerGrpcDataServer::WorkerGrpcDataServer( const experimental::WorkerConfig& config) - : GrpcDataServerBase(config.port(), config.protocol()), config_(config) {} + : GrpcDataServerBase(config.port(), config.protocol(), "WorkerServer"), + config_(config) {} WorkerGrpcDataServer::~WorkerGrpcDataServer() { delete service_; } diff --git a/tensorflow/core/data/service/server_lib.h b/tensorflow/core/data/service/server_lib.h index 2c300947f63..62662e61c8a 100644 --- a/tensorflow/core/data/service/server_lib.h +++ b/tensorflow/core/data/service/server_lib.h @@ -34,10 +34,9 @@ class GrpcDataServerBase { public: // Constructs a tf.data server with the specified port. If the port is 0, the // server will find an available port in `Start()`. The chosen port can be - // found in the output of `Target()`. - // - // dispatcher_address is only needed for worker data servers. - GrpcDataServerBase(int requested_port, const std::string& protocol); + // found by calling `BoundPort()`. + GrpcDataServerBase(int requested_port, const std::string& protocol, + const std::string server_type); virtual ~GrpcDataServerBase() {} // Starts the server running asynchronously. @@ -62,6 +61,7 @@ class GrpcDataServerBase { const int requested_port_; const std::string protocol_; + const std::string server_type_; private: int bound_port_; diff --git a/tensorflow/python/data/kernel_tests/data_service_ops_test.py b/tensorflow/python/data/kernel_tests/data_service_ops_test.py index 49cf1772661..6ef9293ddd7 100644 --- a/tensorflow/python/data/kernel_tests/data_service_ops_test.py +++ b/tensorflow/python/data/kernel_tests/data_service_ops_test.py @@ -107,6 +107,14 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): dispatcher._stop() return self.start_dispatch_server(port=port) + def restart_worker(self, worker, dispatcher, use_same_port=True): + """Stops `worker` and returns a new worker.""" + port = 0 + if use_same_port: + port = int(worker._address.split(":")[1]) + worker._stop() + return self.start_worker_server(dispatcher, port) + def start_cluster(self, num_workers): """Creates a cluster of tf.data service servers. @@ -176,6 +184,35 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): dispatcher = self.restart_dispatcher(dispatcher) self.assertDatasetProduces(ds, list(range(num_elements))) + @combinations.generate(test_base.eager_only_combinations()) + def testDispatcherManyRestarts(self): + dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable + num_elements_start = 10 + num_elements_end = 15 + datasets = [] + for num_elements in range(num_elements_start, num_elements_end): + datasets.append(_make_distributed_range_dataset(num_elements, dispatcher)) + dispatcher = self.restart_dispatcher(dispatcher) + for ds, num_elements in zip(datasets, + range(num_elements_start, num_elements_end)): + self.assertDatasetProduces(ds, list(range(num_elements))) + + @combinations.generate(test_base.eager_only_combinations()) + def testDispatcherAndWorkerRestart(self): + dispatcher, [worker] = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable + num_elements = 100 + ds = dataset_ops.Dataset.range(num_elements) + + def restart(): + return (self.restart_dispatcher(dispatcher), + self.restart_worker(worker, dispatcher)) + + ds = _make_distributed_dataset(ds, dispatcher) + dispatcher, worker = restart() + self.assertDatasetProduces(ds, list(range(num_elements))) + dispatcher, worker = restart() + self.assertDatasetProduces(ds, list(range(num_elements))) + @combinations.generate(test_base.eager_only_combinations()) def testDistributeSparse(self): dispatcher, workers = self.start_cluster(1) # to avoid gcing workers, pylint: disable=unused-variable @@ -357,11 +394,7 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): self.assertEqual(i, next(iterator).numpy()) # Stop the original worker and start a new one. - port = 0 - if use_same_port: - port = int(worker._address.split(":")[1]) - worker._stop() - new_worker = self.start_worker_server(dispatcher, port=port) # to avoid gcing workers, pylint: disable=unused-variable + worker = self.restart_worker(worker, dispatcher, use_same_port) # There may have been some elements prefetched from the first worker # before it was stopped. From 72aa724a2860b34945698dc84e199bce3f05084a Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Mon, 10 Aug 2020 13:46:08 -0700 Subject: [PATCH 2459/2522] Add VLOGs to XLA:GPU sort emitter. These give us a sense of what it's doing, which is particularly important because all of this is hidden within --xla_hlo_profile (as it's just one HLO). PiperOrigin-RevId: 325879218 Change-Id: I7b177ac6c636e35f0a178502098d9c1317a68448 --- tensorflow/compiler/xla/service/gpu/BUILD | 1 + .../xla/service/gpu/ir_emitter_unnested.cc | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index b9ba2100293..074fbd92b27 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -286,6 +286,7 @@ cc_library( "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", + "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", "@llvm-project//llvm:Core", diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index 34cdfb4ecf0..61b78b6004d 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -27,6 +27,7 @@ limitations under the License. #include "absl/container/inlined_vector.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" #include "absl/types/optional.h" #include "absl/types/span.h" #include "llvm/ADT/StringRef.h" @@ -1284,6 +1285,7 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { if (destination_buffer != source_address) { // TODO(b/26783907): Figure out why we never seem to share buffers for // key/value sort. + VLOG(2) << sort->name() << " requires initial D2D copy for operand " << i; thunks.push_back(absl::make_unique( Thunk::ThunkInfo(), /*source_address=*/source_address, @@ -1294,6 +1296,7 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { uint64 dimension_to_sort_bound = keys_shape.dimensions(dimension_to_sort); int64 num_stages = tensorflow::Log2Ceiling(dimension_to_sort_bound); + VLOG(2) << sort->name() << " requires " << num_stages << " stages."; CHECK_GE(1ULL << num_stages, dimension_to_sort_bound); CHECK_LT(1ULL << (num_stages - 1), dimension_to_sort_bound); @@ -1368,11 +1371,27 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { ir_emitter_context_->gpu_device_info().threads_per_block_limit || total_shared_memory_needed > ir_emitter_context_->gpu_device_info().shared_memory_per_block; + VLOG(2) << absl::StreamFormat( + "%s %s use tiling. No tiling if any of the following is true: " + "kTileSize=%d < 128, " + "kThreadsPerBlock=%d > threads_per_block_limit=%d, " + "total_shared_memory_needed=%d > shared_memory_per_block=%d", + sort->name(), (no_tiling ? "won't" : "will"), kTileSize, kThreadsPerBlock, + ir_emitter_context_->gpu_device_info().threads_per_block_limit, + total_shared_memory_needed, + ir_emitter_context_->gpu_device_info().shared_memory_per_block); uint64 num_blocks = CeilOfRatio(num_iterations, kThreadsPerBlock); LaunchDimensions tiled_launch_dimensions(num_blocks, kThreadsPerBlock); + VLOG(2) << absl::StreamFormat("%s launch dims: %d blocks, %d threads/block", + sort->name(), num_blocks, kThreadsPerBlock); auto emit_kernel = [&](absl::Span xor_masks) { + VLOG(2) << absl::StreamFormat( + "%s uses kernel for xor masks [%s]", sort->name(), + absl::StrJoin(xor_masks, ", ", [](std::string* out, int64 xor_mask) { + absl::StrAppendFormat(out, "0x%x", xor_mask); + })); thunks.push_back( BuildKernelThunk(sort, /*implements_whole_instruction=*/false)); LaunchDimensions launch_dimensions = xor_masks.size() > 1 @@ -1421,6 +1440,9 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { if (!xor_masks.empty()) { TF_RETURN_IF_ERROR(emit_kernel(xor_masks)); } + VLOG(2) << absl::StreamFormat( + "%s requires %d thunks (including any D2D copies)", sort->name(), + thunks.size()); AddThunkToThunkSequence(absl::make_unique( GetThunkInfo(sort), std::move(thunks))); From 83b86b6333a2e1a8cdb7b399378b91927caded00 Mon Sep 17 00:00:00 2001 From: Berkin Ilbeyi Date: Mon, 10 Aug 2020 13:54:37 -0700 Subject: [PATCH 2460/2522] [XLA] NFC: Refactor allocation success/failure logic in memory space assignment. This CL is in preparation for repacking the allocations in case we run out of alternate memory due to fragmentation. This CL introduces a Result enum to capture success of an allocation or failure due to multiple different reasons. Moved the logic around to perform the decision to Finalize or Uncommit (and possibly retry) the allocations based on the Result. We can further use this Result to repack the assigned offsets to help with fragmentation. I also modified the function signatures that I touched to use non-const references instead of non-const pointers due to go/totw/178. PiperOrigin-RevId: 325881001 Change-Id: Icdbf1892d75208ec3eba8d2e084097106fb07dde --- .../xla/service/memory_space_assignment.cc | 208 ++++++++++-------- .../xla/service/memory_space_assignment.h | 130 ++++++++--- 2 files changed, 219 insertions(+), 119 deletions(-) diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.cc b/tensorflow/compiler/xla/service/memory_space_assignment.cc index 377c84eaf6b..4131a0199bf 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment.cc +++ b/tensorflow/compiler/xla/service/memory_space_assignment.cc @@ -610,7 +610,9 @@ std::string MemorySpaceAssignment::AllocationValue::ToShortString() const { } void AlternateMemoryBestFitHeap::CreateAllocationValues( - const HloValue* value, std::vector* allocation_values) { + const AlternateMemoryBestFitHeap::BufferInterval& buffer_interval, + std::vector& allocation_values) const { + const HloValue* value = buffer_interval.buffer; VLOG(3) << "Creating AllocationValues for: " << value->ToString(); // Find and sort all non-trivial (excluding GTE, Tuple, and bitcast) @@ -638,10 +640,10 @@ void AlternateMemoryBestFitHeap::CreateAllocationValues( // Create an AllocationValue for each non-trivial position. absl::flat_hash_set computations; - int beginning_idx = allocation_values->size(); + int beginning_idx = allocation_values.size(); for (int i = 0; i < positions.size(); ++i) { const HloPosition& position = positions.at(i); - allocation_values->emplace_back(value, position); + allocation_values.emplace_back(value, position, buffer_interval.size); } std::vector uses(value->uses()); @@ -662,8 +664,8 @@ void AlternateMemoryBestFitHeap::CreateAllocationValues( HloComputation* use_computation = use.instruction->parent(); AllocationValue* last_allocation_value = nullptr; - for (int i = beginning_idx; i < allocation_values->size(); ++i) { - AllocationValue* allocation_value = &allocation_values->at(i); + for (int i = beginning_idx; i < allocation_values.size(); ++i) { + AllocationValue* allocation_value = &allocation_values.at(i); if (allocation_value->computation() == use_computation && instruction_schedule.at( allocation_value->defining_position().instruction) < use_time) { @@ -674,9 +676,9 @@ void AlternateMemoryBestFitHeap::CreateAllocationValues( last_allocation_value->AddUse(use, use_time); } - for (int i = beginning_idx; i < allocation_values->size(); ++i) { + for (int i = beginning_idx; i < allocation_values.size(); ++i) { VLOG(3) << "Created allocation value: " - << allocation_values->at(i).ToString(); + << allocation_values.at(i).ToString(); } } @@ -920,27 +922,27 @@ void AlternateMemoryBestFitHeap::AppendBufferInfoDebugString( } void AlternateMemoryBestFitHeap::AppendAllocationInfoDebugString( - const GlobalDecreasingSizeBestFitHeap::BufferInterval& interval, + const AllocationValue& value, const MemorySpaceAssignment::Allocation& allocation, - std::string* debug_str) const { + std::string& debug_str) const { // Columns in allocation information: // buffer_id: int. This value can be used the match with buffer info. // size: int. In bytes. // offset: int. In bytes. // start_time: int. Logical start time of the allocation. // end_time: int. Logical end time of the allocation. - if (debug_str->empty()) { + if (debug_str.empty()) { // Append the column names. - absl::StrAppend(debug_str, "buffer_id,size,offset,start_time,end_time\n"); + absl::StrAppend(&debug_str, "buffer_id,size,offset,start_time,end_time\n"); } if (allocation.memory_space() == MemorySpace::kAlternate) { const HloBuffer& buffer = - alias_analysis_.GetBufferContainingValue(*interval.buffer); - absl::StrAppend(debug_str, buffer.id(), ","); - absl::StrAppend(debug_str, interval.size, ","); - absl::StrAppend(debug_str, allocation.chunk().offset, ","); - absl::StrAppend(debug_str, allocation.start_time(), ","); - absl::StrAppend(debug_str, allocation.end_time(), "\n"); + alias_analysis_.GetBufferContainingValue(*value.value()); + absl::StrAppend(&debug_str, buffer.id(), ","); + absl::StrAppend(&debug_str, value.size(), ","); + absl::StrAppend(&debug_str, allocation.chunk().offset, ","); + absl::StrAppend(&debug_str, allocation.start_time(), ","); + absl::StrAppend(&debug_str, allocation.end_time(), "\n"); } } @@ -1044,16 +1046,25 @@ HeapSimulator::Result AlternateMemoryBestFitHeap::Finish() { AppendBufferInfoDebugString(interval, &buffer_info_str_); + std::vector allocation_values; + CreateAllocationValuesFromColocatedIntervals(colocated_intervals, + allocation_values); + // Retry allocating this value with larger limits if allocation fails. for (int retry_number = 0; retry_number < options_.max_retries; retry_number++) { - final_retry_ = (retry_number == options_.max_retries - 1); + bool final_retry = (retry_number == options_.max_retries - 1); options_.prefetch_interval_picker->SetRetryNumber(retry_number); - bool success = AllocateColocatedIntervals(colocated_intervals); - if (success) { + Result result = + AllocateAllocationValues(absl::MakeSpan(allocation_values)); + if (result_requires_uncommit(result) || + (!final_retry && result_failed_because_of_async_copy(result))) { + UncommitPendingChunks(absl::MakeSpan(allocation_values)); + VLOG(2) << "Couldn't allocate. Retry number " << retry_number; + } else { + FinalizeAllocations(absl::MakeSpan(allocation_values)); break; } - VLOG(2) << "Couldn't allocate. Retry number " << retry_number; } } @@ -1066,9 +1077,10 @@ HeapSimulator::Result AlternateMemoryBestFitHeap::Finish() { return result_; } -bool AlternateMemoryBestFitHeap::AllocateColocatedIntervals( - const std::vector& - colocated_intervals) { +void AlternateMemoryBestFitHeap::CreateAllocationValuesFromColocatedIntervals( + absl::Span + colocated_intervals, + std::vector& allocation_values) { // TODO(berkin): For now, place the phi values due to conditionals in // default memory. for (const BufferInterval* colocated_interval : colocated_intervals) { @@ -1089,11 +1101,15 @@ bool AlternateMemoryBestFitHeap::AllocateColocatedIntervals( } // Create AllocationValues for all the colocated intervals. - std::vector allocation_values; for (const auto& colocated_interval : colocated_intervals) { - CreateAllocationValues(colocated_interval->buffer, &allocation_values); + CreateAllocationValues(*colocated_interval, allocation_values); } FindAliases(&allocation_values); +} + +AlternateMemoryBestFitHeap::Result +AlternateMemoryBestFitHeap::AllocateAllocationValues( + absl::Span allocation_values) { const auto& instruction_schedule = hlo_live_range_.instruction_schedule(); // Data structure to contain the preferred offset for a given computation. @@ -1102,8 +1118,8 @@ bool AlternateMemoryBestFitHeap::AllocateColocatedIntervals( absl::flat_hash_map preferred_offset_for_computation; - bool allocation_success = true; - for (auto& allocation_value : allocation_values) { + Result result = Result::kSuccess; + for (AllocationValue& allocation_value : allocation_values) { int64 definition_time = instruction_schedule.at(allocation_value.defining_instruction()); @@ -1217,20 +1233,19 @@ bool AlternateMemoryBestFitHeap::AllocateColocatedIntervals( request.start_time = std::min(definition_time, use_time); request.end_time = use_time; request.latest_prefetch_time = latest_prefetch_time; - request.size = colocated_intervals[0]->size; + request.size = allocation_value.size(); request.allow_no_copy_alternate_mem_allocation = allow_no_copy_alternate_mem_allocation; request.earliest_prefetch_time = earliest_prefetch_time; request.preferred_offset = preferred_offset; request.use = &use; request.allocation_value = &allocation_value; - if (!AllocateSegment(request)) { + result_mark(AllocateSegment(request), result); + if (result_requires_uncommit(result)) { // If the allocation finding failed (e.g., due to running out of // asynchronous copies), then fall back to allocating the buffer // entirely in the default memory. - UncommitPendingChunks(); - allocation_success = false; - break; + return result; } // If there are multiple uses, they can try using the memory allocation @@ -1256,24 +1271,8 @@ bool AlternateMemoryBestFitHeap::AllocateColocatedIntervals( aliased_allocation->chunk().offset; } } - if (!allocation_success) { - break; - } } - if (allocation_success) { - for (AllocationValue& allocation_value : allocation_values) { - for (auto& allocation : *allocation_value.allocation_sequence()) { - AppendAllocationInfoDebugString(*colocated_intervals[0], *allocation, - &allocation_info_str_); - allocations_->push_back(std::move(allocation)); - } - } - } - - pending_chunks_.clear(); - pending_async_copies_.clear(); - pending_required_assignments_.clear(); - return allocation_success; + return result; } bool operator<(const AsynchronousCopy& a, const AsynchronousCopy& b) { @@ -1365,9 +1364,7 @@ void AlternateMemoryBestFitHeap::AllocateCrossProgramPrefetchBuffer( allocations_->push_back(std::move(allocation)); } - pending_chunks_.clear(); - pending_async_copies_.clear(); - pending_required_assignments_.clear(); + ClearPendingChunks(); } absl::optional @@ -1544,7 +1541,13 @@ bool AlternateMemoryBestFitHeap::AreIntervalsReservedInAlternateMemory( return false; } -void AlternateMemoryBestFitHeap::UncommitPendingChunks() { +void AlternateMemoryBestFitHeap::UncommitPendingChunks( + absl::Span allocation_values) { + // Clear the allocation sequence of the allocation values so that in case we + // retry allocation after uncommitting. + for (AllocationValue& allocation_value : allocation_values) { + allocation_value.allocation_sequence()->clear(); + } for (const auto& interval_and_chunk : pending_chunks_) { const BufferInterval& interval = interval_and_chunk.first; const Chunk& chunk = interval_and_chunk.second.chunk; @@ -1583,6 +1586,22 @@ void AlternateMemoryBestFitHeap::UncommitPendingChunks() { } } } + ClearPendingChunks(); +} + +void AlternateMemoryBestFitHeap::FinalizeAllocations( + absl::Span allocation_values) { + for (AllocationValue& allocation_value : allocation_values) { + for (auto& allocation : *allocation_value.allocation_sequence()) { + AppendAllocationInfoDebugString(allocation_value, *allocation, + allocation_info_str_); + allocations_->push_back(std::move(allocation)); + } + } + ClearPendingChunks(); +} + +void AlternateMemoryBestFitHeap::ClearPendingChunks() { pending_chunks_.clear(); pending_async_copies_.clear(); pending_required_assignments_.clear(); @@ -1598,7 +1617,7 @@ void AlternateMemoryBestFitHeap::AddToPendingChunks( CommitChunk(buffer_interval, chunk_candidate); } -bool AlternateMemoryBestFitHeap::AllocateSegment( +AlternateMemoryBestFitHeap::Result AlternateMemoryBestFitHeap::AllocateSegment( const AllocationRequest& request) { auto allocation_sequence = request.allocation_value->allocation_sequence(); // start_time == end_time is a special case where the value is consumed @@ -1609,7 +1628,7 @@ bool AlternateMemoryBestFitHeap::AllocateSegment( GetLiveAllocationAt(*allocation_sequence, request.end_time); CHECK_NE(allocation, nullptr); allocation->AddUse(request.use->hlo_use); - return true; + return Result::kSuccess; } const HloPosition& defining_position = @@ -1673,12 +1692,15 @@ bool AlternateMemoryBestFitHeap::AllocateSegment( } } + Result allocation_result = Result::kSuccess; // First try keeping the allocation entirely in the alternate memory. if (required_memory_space_at_start != MemorySpace::kDefault && required_memory_space_at_end != MemorySpace::kDefault && - request.allow_no_copy_alternate_mem_allocation && - AllocateInAlternateMemoryNoCopy(request)) { - return true; + request.allow_no_copy_alternate_mem_allocation) { + allocation_result = AllocateInAlternateMemoryNoCopy(request); + if (allocation_result == Result::kSuccess) { + return Result::kSuccess; + } } auto prev_allocation_it = allocation_sequence->rbegin(); @@ -1697,8 +1719,10 @@ bool AlternateMemoryBestFitHeap::AllocateSegment( (*prev_allocation_it)->defining_position() == defining_position) { // If there was an allocation for this HloValue that was in the alternate // memory space, we also need to perform an eviction. - if (!Evict(request)) { - return false; + Result eviction_result = Evict(request); + if (eviction_result != Result::kSuccess) { + // A non-success eviction requires us to uncommit previous allocations. + return result_mark(Result::kFailRequiresUncommit, eviction_result); } prev_allocation_in_default_mem_it = allocation_sequence->rbegin(); } else if (prev_allocation_in_default_mem_it == allocation_sequence->rend()) { @@ -1719,31 +1743,28 @@ bool AlternateMemoryBestFitHeap::AllocateSegment( << "Not trying to prefetch because use requires buffer in default mem."; (*prev_allocation_in_default_mem_it)->Extend(request.end_time); (*prev_allocation_in_default_mem_it)->AddUse(request.use->hlo_use); - return true; + return Result::kSuccess; } // Finally, try to prefetch the buffer into alternate memory. - if (Prefetch(request, **prev_allocation_in_default_mem_it)) { - return true; - } - if (!final_retry_ && prefetch_failed_due_to_async_copy_) { - // If prefetching failed due to asynchronous copy and we're not in our final - // try, return false (failure) so that we can retry this interval with - // larger limits. - return false; + Result prefetch_result = + Prefetch(request, **prev_allocation_in_default_mem_it); + if (prefetch_result == Result::kSuccess) { + return Result::kSuccess; } + result_mark(prefetch_result, allocation_result); // If the end assignment was required to be in alternate memory but that // wasn't possible, then this allocation is invalid. if (required_memory_space_at_end == MemorySpace::kAlternate) { - return false; + return result_mark(Result::kFailRequiresUncommit, allocation_result); } // If a copy wasn't inserted, then add this use to the latest allocation in // default memory. (*prev_allocation_in_default_mem_it)->Extend(request.end_time); (*prev_allocation_in_default_mem_it)->AddUse(request.use->hlo_use); - return true; + return allocation_result; } void AlternateMemoryBestFitHeap::AddAsyncCopy( @@ -1810,7 +1831,8 @@ AlternateMemoryBestFitHeap::ViolatesAsyncCopyOrdering(int64 start_time, return async_copy_ordering_.ViolatesOrdering(start_time, end_time); } -bool AlternateMemoryBestFitHeap::AllocateInAlternateMemoryNoCopy( +AlternateMemoryBestFitHeap::Result +AlternateMemoryBestFitHeap::AllocateInAlternateMemoryNoCopy( const AllocationRequest& request) { MemorySpaceAssignment::Allocation* prev_allocation = nullptr; bool can_eliminate_copy = false; @@ -1829,7 +1851,7 @@ bool AlternateMemoryBestFitHeap::AllocateInAlternateMemoryNoCopy( } if (!can_eliminate_copy) { - return false; + return Result::kFailPrevAllocationNotInAlternateMem; } const HloPosition& defining_position = @@ -1837,7 +1859,7 @@ bool AlternateMemoryBestFitHeap::AllocateInAlternateMemoryNoCopy( if (!options_.prefetch_interval_picker->CanAllocateInAlternateMemoryNoCopy( defining_position.shape(), request.start_time + 1, request.end_time)) { - return false; + return Result::kFailLiveRangeTooLong; } BufferInterval alternate_mem_interval; @@ -1916,12 +1938,13 @@ bool AlternateMemoryBestFitHeap::AllocateInAlternateMemoryNoCopy( } request.allocation_value->allocation_sequence()->back()->AddUse( request.use->hlo_use); - return true; + return Result::kSuccess; } - return false; + return Result::kFailOutOfMemory; } -bool AlternateMemoryBestFitHeap::Evict(const AllocationRequest& request) { +AlternateMemoryBestFitHeap::Result AlternateMemoryBestFitHeap::Evict( + const AllocationRequest& request) { CHECK_GT(request.allocation_value->allocation_sequence()->size(), 0); MemorySpaceAssignment::Allocation* prev_allocation = request.allocation_value->allocation_sequence()->back().get(); @@ -2010,10 +2033,12 @@ bool AlternateMemoryBestFitHeap::Evict(const AllocationRequest& request) { << " and " << hlo_live_range_.flattened_instruction_sequence() .instructions()[eviction_end_time]; - return false; + // return false; + return Result::kFailOutOfAsyncCopies; } } - return true; + // return true; + return Result::kSuccess; } int64 AlternateMemoryBestFitHeap::FindPrefetchEndTime( @@ -2063,7 +2088,7 @@ int64 AlternateMemoryBestFitHeap::FindPrefetchEndTime( return prefetch_end_time; } -bool AlternateMemoryBestFitHeap::Prefetch( +AlternateMemoryBestFitHeap::Result AlternateMemoryBestFitHeap::Prefetch( const AllocationRequest& request, const MemorySpaceAssignment::Allocation& prev_allocation_in_default_mem) { // Try partially placing the buffer in the alternate space. The time that is @@ -2097,15 +2122,12 @@ bool AlternateMemoryBestFitHeap::Prefetch( BufferInterval alternate_mem_interval; alternate_mem_interval.buffer = request.allocation_value->value(); alternate_mem_interval.size = request.size; - // If any of the prefetch intervals couldn't be used due to number of - // outstanding async copy limit or async copy ordering, set - // prefetch_failed_due_to_async_copy_. - prefetch_failed_due_to_async_copy_ = false; // While uses might be allowed to have additional outstanding prefetches. int64 extra_async_copy_limit = request.use->hlo_use.instruction->opcode() == HloOpcode::kWhile ? options_.while_use_extra_outstanding_prefetch_limit : 0; + Result result = Result::kSuccess; while (!options_.prefetch_interval_picker->Done()) { alternate_mem_interval.start = options_.prefetch_interval_picker->Next(); CHECK_LT(alternate_mem_interval.start, prefetch_end_time); @@ -2116,14 +2138,14 @@ bool AlternateMemoryBestFitHeap::Prefetch( if (ViolatesAsyncCopyOrdering(alternate_mem_interval.start, prefetch_end_time)) { VLOG(4) << "This would violate asynchronous copy ordering."; - prefetch_failed_due_to_async_copy_ = true; + result_mark(Result::kFailViolatesAsyncCopyOrdering, result); continue; } if (ViolatesMaximumOutstandingAsyncCopies( alternate_mem_interval.start, prefetch_end_time, /*is_prefetch=*/true, extra_async_copy_limit)) { VLOG(4) << "This would violate the outstanding async copy limit."; - prefetch_failed_due_to_async_copy_ = true; + result_mark(Result::kFailOutOfAsyncCopies, result); continue; } @@ -2147,11 +2169,17 @@ bool AlternateMemoryBestFitHeap::Prefetch( request.allocation_value->allocation_sequence()->back()->AddUse( request.use->hlo_use); - prefetch_failed_due_to_async_copy_ = false; - return true; + return Result::kSuccess; } + result_mark(Result::kFailOutOfMemory, result); + } + // If we didn't consider any prefetch intervals, then the live range was too + // short. + if (result == Result::kSuccess) { + return Result::kFailLiveRangeTooShort; + } else { + return result; } - return false; } absl::optional diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.h b/tensorflow/compiler/xla/service/memory_space_assignment.h index 87f7dd2ddae..d530a57d257 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment.h +++ b/tensorflow/compiler/xla/service/memory_space_assignment.h @@ -687,13 +687,15 @@ class MemorySpaceAssignment { std::vector aliases; }; - AllocationValue(const HloValue* value, const HloPosition& position) - : value_(value), defining_position_(position) {} + AllocationValue(const HloValue* value, const HloPosition& position, + int64 size) + : value_(value), defining_position_(position), size_(size) {} const HloPosition& defining_position() const { return defining_position_; } const HloInstruction* defining_instruction() const { return defining_position().instruction; } + int64 size() const { return size_; } const std::vector& uses() const { return uses_; } std::vector& uses() { return uses_; } const HloValue* value() const { return value_; } @@ -712,6 +714,7 @@ class MemorySpaceAssignment { private: const HloValue* value_; HloPosition defining_position_; + int64 size_; std::vector uses_; AllocationSequence allocation_sequence_; }; @@ -958,6 +961,62 @@ class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap { MemorySpaceAssignment::AllocationValue* allocation_value; }; + // Result of an allocation, prefetch, eviction etc. request. The result is + // either kSuccess or a bitwise OR of one or more failures. The values are + // unique powers of two. To check if a result contains a particular failure, + // use the result_is method. To add a new failure to a result, use the + // result_mark method. + enum class Result { + // Successful allocation. + kSuccess = 0, + // Allocation failed because we ran out of alternate memory. + kFailOutOfMemory = 1, + // A no-copy allocation couldn't be performed because the previous + // allocation wasn't in the alternate memory space. + kFailPrevAllocationNotInAlternateMem = 2, + // A no-copy allocation couldn't be performed because the live range was too + // long. + kFailLiveRangeTooLong = 4, + // A prefetching couldn't be performed because the live range was too short. + kFailLiveRangeTooShort = 8, + // Ran out of outstanding asynchronous copy limit either during prefetching + // or eviction. + kFailOutOfAsyncCopies = 16, + // A prefetching couldn't be performed because the asynchronous copy + // ordering was violated. + kFailViolatesAsyncCopyOrdering = 32, + // An allocation failure happened that requires uncommitting all the pending + // allocations. Usually this is due to a situation requiring an eviction but + // the eviction couldn't be performed. + kFailRequiresUncommit = 64 + }; + + // Return true if the result belongs to a failure. + static bool result_is(Result result, Result failure) { + return static_cast(result) & static_cast(failure); + } + + // Mark (bitwise OR) a failure to the result. + static Result result_mark(Result failure, Result& result) { + result = static_cast(static_cast(result) | + static_cast(failure)); + return result; + } + + // Return true if the result is a failure that requires us to uncommit pending + // chunks. + static bool result_requires_uncommit(Result result) { + return result_is(result, Result::kFailRequiresUncommit); + } + + // Return true if the result is a failure either due to running out of + // outstanding asynchronous copies or due to violating asynchronous copy + // ordering. + static bool result_failed_because_of_async_copy(Result result) { + return result_is(result, Result::kFailOutOfAsyncCopies) || + result_is(result, Result::kFailViolatesAsyncCopyOrdering); + } + // Given an allocation sequence, returns the live allocation at time with a // preference towards allocations in alternate memory. Returns nullptr if no // allocation is alive at that time. @@ -968,17 +1027,24 @@ class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap { bool IsUseAllowedInAlternateMemory(const AllocationValue& value, const HloUse& use) const; - // Given an HloValue, creates AllocationValue objects and corresponding + // Given a BufferInterval, creates AllocationValue objects and corresponding // AllocationSequences and appends them into allocation_sequence_list_. - void CreateAllocationValues(const HloValue* value, - std::vector* allocation_values); + void CreateAllocationValues( + const BufferInterval& buffer_interval, + std::vector& allocation_values) const; - // Finds allocations for colocated intervals. Colocated intervals consist of - // one or more BufferIntervals, each with a different HloValue. All of the - // intervals within colocated intervals have a must-alias relationship with - // each other. Returns true if allocation succeeded. - bool AllocateColocatedIntervals( - const std::vector& colocated_intervals); + // Given colocated intervals, populates allocation_values with the + // corresponding AllocationValue objects. + void CreateAllocationValuesFromColocatedIntervals( + absl::Span colocated_intervals, + std::vector& allocation_values); + + // Finds allocations for allocation values generated from colocated intervals. + // All of the allocation values have a must-alias relationship with each + // other. Returns either kSuccess if all of the sites could be placed in the + // alternate memory or a bitwise OR of failure reasons why they couldn't + Result AllocateAllocationValues( + absl::Span allocation_values); // Go through all the uses in the AllocationValues and find the aliasing // positions. @@ -996,24 +1062,26 @@ class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap { // if there is enough space and if the prefetch interval picker allows. // // If an eviction (2) was requested and was unsuccessful, this method returns - // false. This means we could not find a suitable allocation, so all previous - // allocations for this buffer must be removed and allocated in the default - // memory. Otherwise, this method returns true. - bool AllocateSegment(const AllocationRequest& request); + // Result::kFailRequiresUncommit. This means we could not find a suitable + // allocation, so all previous allocations for this buffer must be removed and + // allocated in the default memory. Otherwise, this method may return + // Result::kSuccess if the buffer could be placed in alternate memory or some + // other Result with an OR of reasons why the buffer couldn't be placed in + // alternate memory. + Result AllocateSegment(const AllocationRequest& request); - // Try allocating in alternate memory without any copies. Returns true if - // successful. - bool AllocateInAlternateMemoryNoCopy(const AllocationRequest& request); + // Try allocating in alternate memory without any copies. + Result AllocateInAlternateMemoryNoCopy(const AllocationRequest& request); - // Try evicting to default memory space. Returns true if successful. - bool Evict(const AllocationRequest& request); + // Try evicting to default memory space. + Result Evict(const AllocationRequest& request); // Returns the time a copy done of a prefetch should be scheduled. int64 FindPrefetchEndTime(const AllocationRequest& request, int64 earliest_prefetch_time) const; - // Try prefetching to alternate memory space. Returns true if successful. - bool Prefetch( + // Try prefetching to alternate memory space. + Result Prefetch( const AllocationRequest& request, const MemorySpaceAssignment::Allocation& prev_allocation_in_default_mem); @@ -1095,17 +1163,24 @@ class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap { const ChunkCandidate& chunk_candidate); // If we need to remove the allocations for this allocation sequence, this // removes pending chunks and asynchronous copies in the respective pending - // buffers from the interval trees. - void UncommitPendingChunks(); + // buffers from the interval trees. If an allocation request returns + // kFailRequiresUncommit, this method must be called. + void UncommitPendingChunks(absl::Span allocation_values); + + // Finalizes the allocations where they can no longer be uncommitted. + void FinalizeAllocations(absl::Span allocation_values); + + // Clears all pending chunks and asynchronous copies. + void ClearPendingChunks(); // Append buffer and allocation infos for debugging and dump it into a file, // if enabled. void AppendBufferInfoDebugString(const BufferInterval& interval, std::string* debug_str) const; void AppendAllocationInfoDebugString( - const BufferInterval& interval, + const AllocationValue& value, const MemorySpaceAssignment::Allocation& allocation, - std::string* debug_str) const; + std::string& debug_str) const; void DumpDebugStringsIfEnabled() const; // Returns the available heap size in the alternate memory. @@ -1132,9 +1207,6 @@ class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap { required_assignments_; // Number of bytes reserved in alternate memory space. int64 reserved_in_bytes_ = 0; - // Variables to control allocation retries. - bool final_retry_; - bool prefetch_failed_due_to_async_copy_; // Debug strings. std::string buffer_info_str_; std::string allocation_info_str_; From 45d4e5624be1ec842299216833fd91717db34e21 Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Mon, 10 Aug 2020 14:03:39 -0700 Subject: [PATCH 2461/2522] [tf.data service] Rename dispatcher journal directory. PiperOrigin-RevId: 325882898 Change-Id: I4ce77ee6f7cbeb5ed8cfd7caed96a505aea327aa --- tensorflow/core/data/service/dispatcher_impl.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/data/service/dispatcher_impl.cc b/tensorflow/core/data/service/dispatcher_impl.cc index 5dbcece7b49..a30de89ccea 100644 --- a/tensorflow/core/data/service/dispatcher_impl.cc +++ b/tensorflow/core/data/service/dispatcher_impl.cc @@ -44,7 +44,7 @@ namespace data { namespace { // The name of the journal directory inside the dispatcher's working directory. -constexpr char kJournalDir[] = "journal"; +constexpr char kJournalDir[] = "tf_data_dispatcher_journal"; using Dataset = DispatcherState::Dataset; using Worker = DispatcherState::Worker; From 642360f24eaa774abd8a48a2e94e08265da07038 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 10 Aug 2020 14:22:32 -0700 Subject: [PATCH 2462/2522] Let CreateNewUploadSession support non-resumable writes by returning whether a resumable session was created via the out param "bool* resumable". PiperOrigin-RevId: 325886630 Change-Id: I97d81cb636d75c8593370676e052f40ab10d9a0c --- .../core/platform/cloud/gcs_file_system.cc | 44 +++++++++++-------- .../core/platform/cloud/gcs_file_system.h | 13 ++++-- 2 files changed, 34 insertions(+), 23 deletions(-) diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index 31b0c790f50..f0d2138b379 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -389,7 +389,7 @@ class BufferedGcsRandomAccessFile : public RandomAccessFile { typedef std::function + UploadSessionHandle* session_handle)> SessionCreator; // Function object declaration with params needed to upload objects. @@ -542,7 +542,7 @@ class GcsWritableFile : public WritableFile { return errors::Internal( "Could not write to the internal temporary file."); } - string session_uri; + UploadSessionHandle session_handle; uint64 start_offset = 0; string object_to_upload = object_; bool should_compose = false; @@ -556,17 +556,21 @@ class GcsWritableFile : public WritableFile { io::Basename(object_), ".", start_offset_); } } - TF_RETURN_IF_ERROR( - CreateNewUploadSession(start_offset, object_to_upload, &session_uri)); + TF_RETURN_IF_ERROR(CreateNewUploadSession(start_offset, object_to_upload, + &session_handle)); uint64 already_uploaded = 0; bool first_attempt = true; const Status upload_status = RetryingUtils::CallWithRetries( - [&first_attempt, &already_uploaded, &session_uri, &start_offset, + [&first_attempt, &already_uploaded, &session_handle, &start_offset, this]() { - if (!first_attempt) { + if (session_handle.resumable && !first_attempt) { bool completed; TF_RETURN_IF_ERROR(RequestUploadSessionStatus( - session_uri, &completed, &already_uploaded)); + session_handle.session_uri, &completed, &already_uploaded)); + LOG(INFO) << "### RequestUploadSessionStatus: completed = " + << completed + << ", already_uploaded = " << already_uploaded + << ", file = " << GetGcsPath(); if (completed) { // Erase the file from the file cache on every successful write. file_cache_erase_(); @@ -577,7 +581,8 @@ class GcsWritableFile : public WritableFile { } } first_attempt = false; - return UploadToSession(session_uri, start_offset, already_uploaded); + return UploadToSession(session_handle.session_uri, start_offset, + already_uploaded); }, retry_config_); if (upload_status.code() == errors::Code::NOT_FOUND) { @@ -617,11 +622,11 @@ class GcsWritableFile : public WritableFile { /// Initiates a new resumable upload session. Status CreateNewUploadSession(uint64 start_offset, std::string object_to_upload, - std::string* session_uri) { + UploadSessionHandle* session_handle) { uint64 file_size; TF_RETURN_IF_ERROR(GetCurrentFileSize(&file_size)); return session_creator_(start_offset, object_to_upload, bucket_, file_size, - GetGcsPath(), session_uri); + GetGcsPath(), session_handle); } /// Appends the data of append_object to the original object and deletes @@ -913,6 +918,7 @@ GcsFileSystem::GcsFileSystem( std::pair* additional_header, bool compose_append) : timeouts_(timeouts), + retry_config_(retry_config), auth_provider_(std::move(auth_provider)), http_request_factory_(std::move(http_request_factory)), zone_provider_(std::move(zone_provider)), @@ -926,7 +932,6 @@ GcsFileSystem::GcsFileSystem( kCacheNeverExpire, kBucketLocationCacheMaxEntries)), allowed_locations_(allowed_locations), compose_append_(compose_append), - retry_config_(retry_config), additional_header_(additional_header) {} Status GcsFileSystem::NewRandomAccessFile( @@ -1080,7 +1085,7 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& fname, size_t offset, Status GcsFileSystem::CreateNewUploadSession( uint64 start_offset, const std::string& object_to_upload, const std::string& bucket, uint64 file_size, const std::string& gcs_path, - std::string* session_uri) { + UploadSessionHandle* session_handle) { std::vector output_buffer; std::unique_ptr request; TF_RETURN_IF_ERROR(CreateHttpRequest(&request)); @@ -1096,9 +1101,10 @@ Status GcsFileSystem::CreateNewUploadSession( request->SetTimeouts(timeouts_.connect, timeouts_.idle, timeouts_.metadata); TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when initiating an upload to ", gcs_path); - if (session_uri != nullptr) { - *session_uri = request->GetResponseHeader("Location"); - if (session_uri->empty()) { + if (session_handle != nullptr) { + session_handle->resumable = true; + session_handle->session_uri = request->GetResponseHeader("Location"); + if (session_handle->session_uri.empty()) { return errors::Internal("Unexpected response from GCS when writing to ", gcs_path, ": 'Location' header not returned."); } @@ -1241,9 +1247,9 @@ Status GcsFileSystem::NewWritableFile(const string& fname, auto session_creator = [this](uint64 start_offset, const std::string& object_to_upload, const std::string& bucket, uint64 file_size, - const std::string& gcs_path, std::string* session_uri) { + const std::string& gcs_path, UploadSessionHandle* session_handle) { return CreateNewUploadSession(start_offset, object_to_upload, bucket, - file_size, gcs_path, session_uri); + file_size, gcs_path, session_handle); }; auto object_uploader = [this](const std::string& session_uri, uint64 start_offset, @@ -1301,9 +1307,9 @@ Status GcsFileSystem::NewAppendableFile(const string& fname, auto session_creator = [this](uint64 start_offset, const std::string& object_to_upload, const std::string& bucket, uint64 file_size, - const std::string& gcs_path, std::string* session_uri) { + const std::string& gcs_path, UploadSessionHandle* session_handle) { return CreateNewUploadSession(start_offset, object_to_upload, bucket, - file_size, gcs_path, session_uri); + file_size, gcs_path, session_handle); }; auto object_uploader = [this](const std::string& session_uri, uint64 start_offset, diff --git a/tensorflow/core/platform/cloud/gcs_file_system.h b/tensorflow/core/platform/cloud/gcs_file_system.h index 203c501ff4c..eceb76970fb 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.h +++ b/tensorflow/core/platform/cloud/gcs_file_system.h @@ -101,6 +101,11 @@ class GcsStatsInterface { virtual ~GcsStatsInterface() = default; }; +struct UploadSessionHandle { + std::string session_uri; + bool resumable; +}; + /// Google Cloud Storage implementation of a file system. /// /// The clients should use RetryingGcsFileSystem defined below, @@ -281,7 +286,7 @@ class GcsFileSystem : public FileSystem { const std::string& bucket, uint64 file_size, const std::string& gcs_path, - std::string* session_uri); + UploadSessionHandle* session_handle); // Uploads object data to session. virtual Status UploadToSession(const std::string& session_uri, @@ -318,6 +323,9 @@ class GcsFileSystem : public FileSystem { // Used by a subclass. TimeoutConfig timeouts_; + /// The retry configuration used for retrying failed calls. + RetryConfig retry_config_; + private: // GCS file statistics. struct GcsFileStat { @@ -416,9 +424,6 @@ class GcsFileSystem : public FileSystem { GcsStatsInterface* stats_ = nullptr; // Not owned. - /// The initial delay for exponential backoffs when retrying failed calls. - RetryConfig retry_config_; - // Additional header material to be transmitted with all GCS requests std::unique_ptr> additional_header_; From 9b4f994681a00f741bf6dc1f033b588099064fc0 Mon Sep 17 00:00:00 2001 From: Edward Loper Date: Mon, 10 Aug 2020 14:25:56 -0700 Subject: [PATCH 2463/2522] Improve error message when tf.ragged.map_flat_values is called with a function that doesn't preserve the outer dimension size of ragged inputs. PiperOrigin-RevId: 325887317 Change-Id: Ibdc85269a9ff8b842844b1f22c4bd005e554bf52 --- .../ops/ragged/ragged_functional_ops.py | 44 ++++++++++++++++--- .../ragged/ragged_map_flat_values_op_test.py | 31 +++++++++---- 2 files changed, 60 insertions(+), 15 deletions(-) diff --git a/tensorflow/python/ops/ragged/ragged_functional_ops.py b/tensorflow/python/ops/ragged/ragged_functional_ops.py index 00b5ced6170..22625077e56 100644 --- a/tensorflow/python/ops/ragged/ragged_functional_ops.py +++ b/tensorflow/python/ops/ragged/ragged_functional_ops.py @@ -20,6 +20,7 @@ from __future__ import print_function from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import math_ops from tensorflow.python.ops.ragged import ragged_config from tensorflow.python.ops.ragged import ragged_tensor @@ -70,10 +71,22 @@ def map_flat_values(op, *args, **kwargs): # Replace RaggedTensors with their values; and collect the splits tensors # from each RaggedTensor. nested_splits_lists = [] - inner_args = _replace_ragged_with_flat_values(args, nested_splits_lists) - inner_kwargs = _replace_ragged_with_flat_values(kwargs, nested_splits_lists) + flat_values_nrows = [] + inner_args = _replace_ragged_with_flat_values(args, nested_splits_lists, + flat_values_nrows) + inner_kwargs = _replace_ragged_with_flat_values(kwargs, nested_splits_lists, + flat_values_nrows) if not nested_splits_lists: return op(*args, **kwargs) + if flat_values_nrows: + flat_values_nrows = set(flat_values_nrows) + if len(flat_values_nrows) != 1: + raise ValueError("Input RaggedTensors' flat_values must all have the " + "same outer-dimension size. Got sizes: %s" % + flat_values_nrows) + flat_values_nrows = flat_values_nrows.pop() # Get the single element + else: + flat_values_nrows = None split_dtypes = set(splits[0].dtype for splits in nested_splits_lists) if len(split_dtypes) > 1: @@ -88,13 +101,23 @@ def map_flat_values(op, *args, **kwargs): with ops.control_dependencies( ragged_util.assert_splits_match(nested_splits_lists)): - # Delegate to op, and then compose the result from the transformed values - # and the splits. + # Delegate to `op` + op_output = op(*inner_args, **inner_kwargs) + # Check that the result has the expected shape (if known). + if flat_values_nrows is not None: + if not op_output.shape[:1].is_compatible_with([flat_values_nrows]): + raise ValueError( + "tf.ragged.map_flat_values requires that the output of `op` have " + "the same outer-dimension size as flat_values of any ragged " + "inputs. (output shape: %s; expected outer dimension size: %s)" % + (op_output.shape, flat_values_nrows)) + # Compose the result from the transformed values and the splits. return ragged_tensor.RaggedTensor.from_nested_row_splits( - op(*inner_args, **inner_kwargs), nested_splits_lists[0], validate=False) + op_output, nested_splits_lists[0], validate=False) -def _replace_ragged_with_flat_values(value, nested_splits_lists): +def _replace_ragged_with_flat_values(value, nested_splits_lists, + flat_values_nrows): """Replace RaggedTensors with their flat_values, and record their splits. Returns a copy of `value`, with any nested `RaggedTensor`s replaced by their @@ -106,6 +129,9 @@ def _replace_ragged_with_flat_values(value, nested_splits_lists): value: The value that should be transformed by replacing `RaggedTensors`. nested_splits_lists: An output parameter used to record the `nested_splits` for any `RaggedTensors` that were replaced. + flat_values_nrows: An output parameter used to record the outer dimension + size for each replacement `flat_values` (when known). Contains a list of + int. Returns: A copy of `value` with nested `RaggedTensors` replaced by their `values`. @@ -114,11 +140,15 @@ def _replace_ragged_with_flat_values(value, nested_splits_lists): if ragged_tensor.is_ragged(value): value = ragged_tensor.convert_to_tensor_or_ragged_tensor(value) nested_splits_lists.append(value.nested_row_splits) + nrows = tensor_shape.dimension_at_index(value.flat_values.shape, 0).value + if nrows is not None: + flat_values_nrows.append(nrows) return value.flat_values # Recursion cases def recurse(v): - return _replace_ragged_with_flat_values(v, nested_splits_lists) + return _replace_ragged_with_flat_values(v, nested_splits_lists, + flat_values_nrows) if isinstance(value, list): return [recurse(v) for v in value] diff --git a/tensorflow/python/ops/ragged/ragged_map_flat_values_op_test.py b/tensorflow/python/ops/ragged/ragged_map_flat_values_op_test.py index 588a5473741..e65c877aa68 100644 --- a/tensorflow/python/ops/ragged/ragged_map_flat_values_op_test.py +++ b/tensorflow/python/ops/ragged/ragged_map_flat_values_op_test.py @@ -178,18 +178,33 @@ class RaggedMapInnerValuesOpTest(test_util.TensorFlowTestCase): def testRaggedTensorSplitsRaggedRankMismatchError(self): x = ragged_factory_ops.constant([[3, 1, 4], [], [1, 5]]) y = ragged_factory_ops.constant([[[3, 1, 4], []], [], [[1, 5]]]) - self.assertRaisesRegex(ValueError, - r'Inputs must have identical ragged splits.*', - ragged_functional_ops.map_flat_values, math_ops.add, - x, y) + with self.assertRaisesRegex(ValueError, + r'Inputs must have identical ragged splits.*'): + ragged_functional_ops.map_flat_values(math_ops.add, x, y) def testRaggedTensorSplitsValueMismatchError(self): x = ragged_factory_ops.constant([[3, 1, 4], [], [1, 5]]) y = ragged_factory_ops.constant([[1], [2, 3], [4, 5]]) - self.assertRaisesRegex(errors.InvalidArgumentError, - r'Inputs must have identical ragged splits.*', - ragged_functional_ops.map_flat_values, math_ops.add, - x, y) + with self.assertRaisesRegex(errors.InvalidArgumentError, + r'Inputs must have identical ragged splits.*'): + ragged_functional_ops.map_flat_values(math_ops.add, x, y) + + z_splits = array_ops.placeholder_with_default( + constant_op.constant([0, 3], dtypes.int64), None) + z = ragged_tensor.RaggedTensor.from_row_splits([0, 1, 2], z_splits) + with self.assertRaisesRegex( + ValueError, + r"Input RaggedTensors' flat_values must all have the same " + r'outer-dimension size. Got sizes: \{3, 5\}'): + ragged_functional_ops.map_flat_values(math_ops.add, x, z) + + def testRaggedTensorShapeMismatchError(self): + x = ragged_factory_ops.constant([[1, 2, 3], [4, 5]]) + with self.assertRaisesRegex( + ValueError, r'tf.ragged.map_flat_values requires that the output of ' + '`op` have the same outer-dimension size as flat_values of any ragged ' + r'inputs. \(output shape: \(\); expected outer dimension size: 5\)'): + ragged_functional_ops.map_flat_values(math_ops.argmax, x) def testRaggedTensorSplitsMismatchErrorAtRuntime(self): splits1 = array_ops.placeholder_with_default( From c69cd441212e35a3157d067b263314bcb9b34c0e Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Mon, 10 Aug 2020 14:26:29 -0700 Subject: [PATCH 2464/2522] Replace CLDevice with DeviceInfo in util.h. PiperOrigin-RevId: 325887446 Change-Id: Icbe7decfd7e1f3c851a088a37af92d8e2d30c4b2 --- tensorflow/lite/delegates/gpu/cl/kernels/BUILD | 2 +- .../gpu/cl/kernels/conv_buffer_1x1.cc | 4 ++-- .../delegates/gpu/cl/kernels/conv_powervr.cc | 4 ++-- .../lite/delegates/gpu/cl/kernels/util.cc | 18 ++++-------------- .../lite/delegates/gpu/cl/kernels/util.h | 4 ++-- 5 files changed, 11 insertions(+), 21 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD index b89e7d7252a..27e12b5981f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD @@ -1296,7 +1296,7 @@ cc_library( srcs = ["util.cc"], hdrs = ["util.h"], deps = [ - "//tensorflow/lite/delegates/gpu/cl:cl_device", + "//tensorflow/lite/delegates/gpu/cl:device_info", "//tensorflow/lite/delegates/gpu/cl:precision", "//tensorflow/lite/delegates/gpu/cl:tensor_type", "//tensorflow/lite/delegates/gpu/common:access_type", diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc index 3216e2ef246..38e04e221f2 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc @@ -105,8 +105,8 @@ ConvBuffer1x1::ConvParams GetBestParams(const CLDevice& device, } int task_size = shape.w * shape.b * shape.h * dst_depth; - int block_size = - GetRecommendedBlockSizeForConv(device, definition.precision, task_size); + int block_size = GetRecommendedBlockSizeForConv( + device.info_, definition.precision, task_size); if (!can_use_flt8 && block_size > 4) { block_size = 4; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc index d65595d068c..f04102d25d6 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc @@ -820,8 +820,8 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( int block_size = 2; if (dst_shape) { int task_size = dst_shape->w * dst_shape->b * dst_shape->h * dst_depth; - block_size = GetRecommendedBlockSizeForConv(device, definition.precision, - task_size); + block_size = GetRecommendedBlockSizeForConv( + device.info_, definition.precision, task_size); } if (!x_kernel_is_1 || !y_kernel_is_1) { block_size = std::min(block_size, 4); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/util.cc b/tensorflow/lite/delegates/gpu/cl/kernels/util.cc index d907c0210b7..b7cfa5f013e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/util.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/util.cc @@ -84,16 +84,6 @@ std::string GetXStrideCorrected(const std::string& src_x, batch_size, stride_x, padding_x); } -TextureAddressMode GetFastestZeroMode(const CLDevice& device) { - return device.IsAdreno3xx() ? TextureAddressMode::DONT_CARE - : TextureAddressMode::ZERO; -} - -TextureAddressMode GetFastestZeroMode(const DeviceInfo& device_info) { - return device_info.IsAdreno3xx() ? TextureAddressMode::DONT_CARE - : TextureAddressMode::ZERO; -} - float4 GetMaskForLastPlane(int channels) { float4 mask = float4(0.0f); const int reminder = channels % 4 == 0 ? 4 : channels % 4; @@ -113,19 +103,19 @@ int3 GetFirstSuitableWorkGroup(const std::vector& wgs, int max_wg_size) { return {1, 1, 1}; } -int GetRecommendedBlockSizeForConv(const CLDevice& device, +int GetRecommendedBlockSizeForConv(const DeviceInfo& device_info, CalculationsPrecision precision, int task_size) { const float task_size_per_cu = - task_size / static_cast(device.info_.compute_units_count); + task_size / static_cast(device_info.compute_units_count); int block_size = 1; float threshold_1 = FLT_MAX; float threshold_2 = FLT_MAX; float threshold_4 = FLT_MAX; - if (!device.IsMali()) { + if (!device_info.IsMali()) { return 1; } - MaliInfo mali_info = device.info_.mali_info; + MaliInfo mali_info = device_info.mali_info; switch (precision) { case CalculationsPrecision::F16: if (mali_info.IsBifrostGen1()) { diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/util.h b/tensorflow/lite/delegates/gpu/cl/kernels/util.h index 173a4d43072..b1dd4fe8c57 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/util.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/util.h @@ -19,7 +19,7 @@ limitations under the License. #include #include "absl/types/span.h" -#include "tensorflow/lite/delegates/gpu/cl/cl_device.h" +#include "tensorflow/lite/delegates/gpu/cl/device_info.h" #include "tensorflow/lite/delegates/gpu/cl/precision.h" #include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" #include "tensorflow/lite/delegates/gpu/common/access_type.h" @@ -95,7 +95,7 @@ float4 GetMaskForLastPlane(int channels); int3 GetFirstSuitableWorkGroup(const std::vector& wgs, int max_wg_size); // task_size as amount of FLT4 processed elements. -int GetRecommendedBlockSizeForConv(const CLDevice& device, +int GetRecommendedBlockSizeForConv(const DeviceInfo& device, CalculationsPrecision precision, int task_size); } // namespace cl From 7a9511547798bff3d48ecc8598ec3acc3b7269f7 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 10 Aug 2020 14:29:41 -0700 Subject: [PATCH 2465/2522] Introduce local TPU support for TPUClusterResolver PiperOrigin-RevId: 325888141 Change-Id: Iea19b39a6c5a6a56dec7cd32eae8b5b12ce849a7 --- .../tpu/tpu_cluster_resolver.py | 98 ++++++++++++------- .../tpu/tpu_cluster_resolver_test.py | 4 + 2 files changed, 66 insertions(+), 36 deletions(-) diff --git a/tensorflow/python/distribute/cluster_resolver/tpu/tpu_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/tpu/tpu_cluster_resolver.py index e42420ec644..d400e7aeed4 100644 --- a/tensorflow/python/distribute/cluster_resolver/tpu/tpu_cluster_resolver.py +++ b/tensorflow/python/distribute/cluster_resolver/tpu/tpu_cluster_resolver.py @@ -22,6 +22,7 @@ import collections import re from tensorflow.python.distribute.cluster_resolver import cluster_resolver +from tensorflow.python.framework import config as framework_config from tensorflow.python.framework import errors from tensorflow.python.platform import tf_logging as logging from tensorflow.python.tpu import tpu_system_metadata as tpu_system_metadata_lib @@ -155,7 +156,8 @@ class TPUClusterResolver(cluster_resolver.ClusterResolver): Args: tpu: A string corresponding to the TPU to use. It can be the TPU name or TPU worker gRPC address. If not set, it will try automatically resolve - the TPU address on Cloud TPUs. + the TPU address on Cloud TPUs. If set to "local", it will assume that + the TPU is directly connected to the VM instead of over the network. zone: Zone where the TPUs are located. If omitted or empty, we will assume that the zone of the TPU is the same as the zone of the GCE VM, which we will try to discover from the GCE metadata service. @@ -187,15 +189,21 @@ class TPUClusterResolver(cluster_resolver.ClusterResolver): Google Cloud environment. """ - self._cloud_tpu_client = client.Client( - tpu=tpu, - zone=zone, - project=project, - credentials=credentials, - service=service, - discovery_url=discovery_url) + if tpu != 'local': + # Default Cloud environment + self._cloud_tpu_client = client.Client( + tpu=tpu, + zone=zone, + project=project, + credentials=credentials, + service=service, + discovery_url=discovery_url) + self._tpu = self._cloud_tpu_client.name() + else: + # Directly connected TPU environment + self._cloud_tpu_client = None + self._tpu = 'local' - self._tpu = self._cloud_tpu_client.name() # By default the task_type is 'worker` and the task_id is 0 (which is the # first worker in the task). self.task_type = job_name @@ -238,20 +246,23 @@ class TPUClusterResolver(cluster_resolver.ClusterResolver): ValueError: If none of the TPUs specified exists. """ - cluster_spec = self.cluster_spec() - if task_type is not None and task_id is not None: - # task_type and task_id is from the function parameter - master = cluster_spec.task_address(task_type, task_id) - elif self.task_type is not None and self.task_id is not None: - # task_type and task_id is from the object - master = cluster_spec.task_address(self.task_type, self.task_id) + if self._tpu != 'local': + cluster_spec = self.cluster_spec() + if task_type is not None and task_id is not None: + # task_type and task_id is from the function parameter + master = cluster_spec.task_address(task_type, task_id) + elif self.task_type is not None and self.task_id is not None: + # task_type and task_id is from the object + master = cluster_spec.task_address(self.task_type, self.task_id) + else: + # by default we take the first item in the cluster with the right name + job_tasks = cluster_spec.job_tasks(self.task_type) + if not job_tasks: + raise ValueError('No TPUs with the specified names exist.') + master = job_tasks[0] + return cluster_resolver.format_master_url(master, 'grpc') else: - # by default we take the first item in the cluster with the right name - job_tasks = cluster_spec.job_tasks(self.task_type) - if not job_tasks: - raise ValueError('No TPUs with the specified names exist.') - master = job_tasks[0] - return cluster_resolver.format_master_url(master, 'grpc') + return '' def get_master(self): return self.master() @@ -298,7 +309,8 @@ class TPUClusterResolver(cluster_resolver.ClusterResolver): RuntimeError: If the provided TPU is not healthy. """ ############################################################################ - # There are 5 potential cases this code must handle: + # There are 6 potential cases this code must handle: + # 0. [Local case.] When a TPU is connected directly to the VM. # 1. [Normal case.] We should resolve the TPU name to a set of tasks, and # a. Create a ClusterSpec that includes the coordinator job # b. Create a ClusterSpec without the coordinator job. @@ -308,17 +320,19 @@ class TPUClusterResolver(cluster_resolver.ClusterResolver): # b. Create a ClusterSpec without the coordinator ############################################################################ - network_endpoints = self._cloud_tpu_client.network_endpoints() - worker_list = [ - '%s:%s' % (endpoint['ipAddress'], endpoint['port']) - for endpoint in network_endpoints - ] - cluster_spec = {self.task_type: worker_list} - if self._coordinator_address: - # {1, 2}.a - cluster_spec[self._coordinator_name] = [self._coordinator_address] - - return server_lib.ClusterSpec(cluster_spec) + if self._tpu != 'local': + network_endpoints = self._cloud_tpu_client.network_endpoints() + worker_list = [ + '%s:%s' % (endpoint['ipAddress'], endpoint['port']) + for endpoint in network_endpoints + ] + cluster_spec = {self.task_type: worker_list} + if self._coordinator_address: + # {1, 2}.a + cluster_spec[self._coordinator_name] = [self._coordinator_address] + return server_lib.ClusterSpec(cluster_spec) + else: + return server_lib.ClusterSpec({}) def num_accelerators(self, task_type=None, @@ -340,6 +354,15 @@ class TPUClusterResolver(cluster_resolver.ClusterResolver): RuntimeError: If we cannot talk to a TPU worker after retrying or if the number of TPU devices per host is different. """ + if self._tpu == 'local': + return { + 'TPU': + len([ + d for d in framework_config.list_logical_devices() + if d.device_type == 'TPU' + ]) + } + retry_count = 1 # TODO(b/120564445): Replace with standard library for retries. while True: @@ -360,8 +383,11 @@ class TPUClusterResolver(cluster_resolver.ClusterResolver): raise RuntimeError(error_message) if device_details.total_cores: - return {'TPU': TPUClusterResolver._verify_and_return_same_core_count( - device_details.device_map)} + return { + 'TPU': + TPUClusterResolver._verify_and_return_same_core_count( + device_details.device_map) + } return {'TPU': 0} @property diff --git a/tensorflow/python/distribute/cluster_resolver/tpu/tpu_cluster_resolver_test.py b/tensorflow/python/distribute/cluster_resolver/tpu/tpu_cluster_resolver_test.py index 51abc850bb2..155410f9668 100644 --- a/tensorflow/python/distribute/cluster_resolver/tpu/tpu_cluster_resolver_test.py +++ b/tensorflow/python/distribute/cluster_resolver/tpu/tpu_cluster_resolver_test.py @@ -706,6 +706,10 @@ class TPUClusterResolverTest(test.TestCase): with self.assertRaises(RuntimeError): cluster_resolver.num_accelerators() + def testLocalTpuResolver(self): + cr = resolver.TPUClusterResolver(tpu='local') + self.assertEqual(cr.get_master(), '') + if __name__ == '__main__': test.main() From 31ea2b852ae121ffc1b1958ea7ed0f0edab10ae7 Mon Sep 17 00:00:00 2001 From: Smit Hinsu Date: Mon, 10 Aug 2020 14:29:48 -0700 Subject: [PATCH 2466/2522] Legalize TensorFlow ops related to Matrix diag, space and batch conversion in the fallback path These ops are: BatchToSpaceNDOp BatchToSpaceOp SpaceToBatchNDOp SpaceToBatchOp MatrixDiagV3Op MatrixSetDiagV3Op PiperOrigin-RevId: 325888162 Change-Id: Ifab8fa68c87f745f91a8d6faeb765e40c2d07d94 --- .../compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc | 6 ++++++ tensorflow/compiler/tests/BUILD | 2 ++ 2 files changed, 8 insertions(+) diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc index f04f1653505..6e651df5075 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc @@ -100,6 +100,8 @@ bool IsOpAllowedTf2XlaFallback(Operation* op) { TypeID::get(), TypeID::get(), TypeID::get(), + TypeID::get(), + TypeID::get(), TypeID::get(), TypeID::get(), TypeID::get(), @@ -152,6 +154,8 @@ bool IsOpAllowedTf2XlaFallback(Operation* op) { TypeID::get(), TypeID::get(), TypeID::get(), + TypeID::get(), + TypeID::get(), TypeID::get(), TypeID::get(), TypeID::get(), @@ -184,6 +188,8 @@ bool IsOpAllowedTf2XlaFallback(Operation* op) { TypeID::get(), TypeID::get(), TypeID::get(), + TypeID::get(), + TypeID::get(), TypeID::get(), TypeID::get(), TypeID::get(), diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index a3134fc1c94..924834fc0fc 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -858,6 +858,7 @@ tf_xla_py_test( size = "medium", timeout = "long", srcs = ["matrix_diag_ops_test.py"], + enable_mlir_bridge = True, python_version = "PY3", tags = [ "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip @@ -1204,6 +1205,7 @@ tf_xla_py_test( name = "spacetobatch_op_test", size = "medium", srcs = ["spacetobatch_op_test.py"], + enable_mlir_bridge = True, python_version = "PY3", shard_count = 3, tags = [ From c042a0c82aa3949745eb92e6b9c82350ab710625 Mon Sep 17 00:00:00 2001 From: Bruce Fontaine Date: Mon, 10 Aug 2020 14:33:35 -0700 Subject: [PATCH 2467/2522] Add support for the multiply_linear_by_lr, beta and allow_zero_accumulator options to the FTRL optimizer for TPUEmbedding. Increase TPU initialization timeout in TPUEstimator. PiperOrigin-RevId: 325889098 Change-Id: I826f3bd8a7fbfb9b74ece7359b778d7fee604b10 --- tensorflow/python/tpu/tpu_embedding.py | 25 ++++++++++++++++++- ...ow.tpu.experimental.-ftrl-parameters.pbtxt | 2 +- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/tpu/tpu_embedding.py b/tensorflow/python/tpu/tpu_embedding.py index 13afe1a2147..e9d1a3be6b6 100644 --- a/tensorflow/python/tpu/tpu_embedding.py +++ b/tensorflow/python/tpu/tpu_embedding.py @@ -577,9 +577,15 @@ class FtrlParameters(_OptimizationParameters): clip_weight_min=None, clip_weight_max=None, weight_decay_factor=None, - multiply_weight_decay_factor_by_learning_rate=None): + multiply_weight_decay_factor_by_learning_rate=None, + multiply_linear_by_learning_rate=False, + beta=0, + allow_zero_accumulator=False): """Optimization parameters for Ftrl. + Implements FTRL as described in the following [paper]( + https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/41159.pdf) + Args: learning_rate: a floating point value. The learning rate. learning_rate_power: A float value, must be less or equal to zero. @@ -602,6 +608,14 @@ class FtrlParameters(_OptimizationParameters): weights are not decayed. multiply_weight_decay_factor_by_learning_rate: if true, `weight_decay_factor` is multiplied by the current learning rate. + multiply_linear_by_learning_rate: When true, multiplies the usages of the + linear slot in the weight update by the learning rate. This is useful + when ramping up learning rate from 0 (which would normally produce + NaNs). + beta: The beta parameter for FTRL. + allow_zero_accumulator: Changes the implementation of the square root to + allow for the case of initial_accumulator_value being zero. This will + cause a slight performance drop. """ super(FtrlParameters, self).__init__(learning_rate, use_gradient_accumulation, @@ -628,6 +642,9 @@ class FtrlParameters(_OptimizationParameters): self.initial_linear_value = 0.0 self.l1_regularization_strength = l1_regularization_strength self.l2_regularization_strength = l2_regularization_strength + self.multiply_linear_by_learning_rate = multiply_linear_by_learning_rate + self.beta = beta + self.allow_zero_accumulator = allow_zero_accumulator class ProximalYogiParameters(_OptimizationParameters): @@ -1896,6 +1913,12 @@ class _FtrlHandler(_OptimizerHandler): self._optimization_parameters.l1_regularization_strength) table_descriptor.optimization_parameters.ftrl.l2 = ( self._optimization_parameters.l2_regularization_strength) + table_descriptor.optimization_parameters.ftrl.multiply_linear_by_lr = ( + self._optimization_parameters.multiply_linear_by_learning_rate) + table_descriptor.optimization_parameters.ftrl.beta = ( + self._optimization_parameters.beta) + table_descriptor.optimization_parameters.ftrl.allow_zero_accumulator = ( + self._optimization_parameters.allow_zero_accumulator) def get_default_slot_variable_names(self, table): # These match the default slot variable names created by diff --git a/tensorflow/tools/api/golden/v1/tensorflow.tpu.experimental.-ftrl-parameters.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.tpu.experimental.-ftrl-parameters.pbtxt index 9e435cc0e8f..450015c3695 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.tpu.experimental.-ftrl-parameters.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.tpu.experimental.-ftrl-parameters.pbtxt @@ -5,6 +5,6 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'learning_rate\', \'learning_rate_power\', \'initial_accumulator_value\', \'l1_regularization_strength\', \'l2_regularization_strength\', \'use_gradient_accumulation\', \'clip_weight_min\', \'clip_weight_max\', \'weight_decay_factor\', \'multiply_weight_decay_factor_by_learning_rate\'], varargs=None, keywords=None, defaults=[\'-0.5\', \'0.1\', \'0.0\', \'0.0\', \'True\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'learning_rate\', \'learning_rate_power\', \'initial_accumulator_value\', \'l1_regularization_strength\', \'l2_regularization_strength\', \'use_gradient_accumulation\', \'clip_weight_min\', \'clip_weight_max\', \'weight_decay_factor\', \'multiply_weight_decay_factor_by_learning_rate\', \'multiply_linear_by_learning_rate\', \'beta\', \'allow_zero_accumulator\'], varargs=None, keywords=None, defaults=[\'-0.5\', \'0.1\', \'0.0\', \'0.0\', \'True\', \'None\', \'None\', \'None\', \'None\', \'False\', \'0\', \'False\'], " } } From 6411114cbd94777be68d78b6005cb44fad8e4983 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Mon, 10 Aug 2020 14:47:36 -0700 Subject: [PATCH 2468/2522] Fix CSV writer on Windows PiperOrigin-RevId: 325892185 Change-Id: I96053328785323a40450e4b33704d873a6c6f38b --- tensorflow/tools/ci_build/sizetrack_helper.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/ci_build/sizetrack_helper.py b/tensorflow/tools/ci_build/sizetrack_helper.py index eb3a6afda5e..03a04e36588 100755 --- a/tensorflow/tools/ci_build/sizetrack_helper.py +++ b/tensorflow/tools/ci_build/sizetrack_helper.py @@ -364,8 +364,9 @@ def main(): print("DRY RUN: Generated this TSV row:") print("\t".join(map(str, next_tsv_row))) else: - with open("data.tsv", "w") as tsvfile: - writer = csv.writer(tsvfile, delimiter="\t", quoting=csv.QUOTE_MINIMAL) + with open("data.tsv", "w", newline="") as tsvfile: + writer = csv.writer(tsvfile, delimiter="\t", quoting=csv.QUOTE_MINIMAL, + lineterminator=os.linesep) writer.writerow(next_tsv_row) bq([ "load", "--source_format", "CSV", "--field_delimiter", "tab", From dd2ee4e8cc917e878d566b7f5ca28d8866247492 Mon Sep 17 00:00:00 2001 From: Edward Loper Date: Mon, 10 Aug 2020 14:48:44 -0700 Subject: [PATCH 2469/2522] Fix bug in ConvertPyObjectToAttributeType when type="tensor" and value is a string that's not a Tensor textproto. PiperOrigin-RevId: 325892441 Change-Id: I4a29954ec32b84fef75859ac36f4e694a0317688 --- tensorflow/python/framework/op_def_util.cc | 6 ++++-- tensorflow/python/framework/op_def_util_test.py | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/framework/op_def_util.cc b/tensorflow/python/framework/op_def_util.cc index 4f56c62317c..c915c494be9 100644 --- a/tensorflow/python/framework/op_def_util.cc +++ b/tensorflow/python/framework/op_def_util.cc @@ -167,8 +167,10 @@ struct ConvertTensorProtoFunctor { } else if (PY_STRING_CHECK(value)) { result.reset(PyObject_CallObject(tensor_proto, nullptr)); if (result) { - PyObject_CallFunctionObjArgs(text_format_parse, value, result.get(), - nullptr); + if (!PyObject_CallFunctionObjArgs(text_format_parse, value, + result.get(), nullptr)) { + return nullptr; + } } } return result; diff --git a/tensorflow/python/framework/op_def_util_test.py b/tensorflow/python/framework/op_def_util_test.py index 74cd6046f68..69aaffbf19f 100644 --- a/tensorflow/python/framework/op_def_util_test.py +++ b/tensorflow/python/framework/op_def_util_test.py @@ -87,6 +87,7 @@ class OpDefUtilTest(test_util.TensorFlowTestCase, parameterized.TestCase): ("list(any)", 12), ("list(int)", [1, "two"]), ("list(string)", [1, "two"]), + ("tensor", "string that is not a text-formatted TensorProto"), ]) def testConvertError(self, attr_type, value): with self.assertRaisesRegex(TypeError, "Failed to convert value"): From 1e336d3ef0706792772b605a1fed0135f5cc5cfe Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Mon, 10 Aug 2020 15:19:24 -0700 Subject: [PATCH 2470/2522] [SE] Don't assume that the CUDA context has not changed in the outermost ScopedActivationContext. Will fix https://github.com/google/jax/issues/3802 when incorporated into JAX. PiperOrigin-RevId: 325899237 Change-Id: I1f2bf59d982da16db138229d8fa155f41a7e094a --- tensorflow/stream_executor/cuda/BUILD | 12 +++ .../stream_executor/cuda/cuda_driver.cc | 18 ++++- .../stream_executor/cuda/cuda_driver_test.cc | 76 +++++++++++++++++++ 3 files changed, 104 insertions(+), 2 deletions(-) create mode 100644 tensorflow/stream_executor/cuda/cuda_driver_test.cc diff --git a/tensorflow/stream_executor/cuda/BUILD b/tensorflow/stream_executor/cuda/BUILD index f3cffc04465..bd545f097cf 100644 --- a/tensorflow/stream_executor/cuda/BUILD +++ b/tensorflow/stream_executor/cuda/BUILD @@ -130,6 +130,18 @@ cc_library( ], ) +tf_cuda_cc_test( + name = "cuda_driver_test", + srcs = ["cuda_driver_test.cc"], + tags = tf_cuda_tests_tags(), + deps = [ + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/stream_executor/lib", + "@local_config_cuda//cuda:cuda_headers", + ], +) + tf_cuda_cc_test( name = "memcpy_test", srcs = ["memcpy_test.cc"], diff --git a/tensorflow/stream_executor/cuda/cuda_driver.cc b/tensorflow/stream_executor/cuda/cuda_driver.cc index e30eb549a9c..67fd72d52f3 100644 --- a/tensorflow/stream_executor/cuda/cuda_driver.cc +++ b/tensorflow/stream_executor/cuda/cuda_driver.cc @@ -200,6 +200,21 @@ ScopedActivateContext::ScopedActivateContext(GpuContext* cuda_context) { if (FLAGS_gpuexec_cuda_sync_around_driver_calls) SynchronizeOrDie(); auto* tls = &tls_data.get(); + + // If this is an outermost scope, we must not assume that the CUDA context has + // been left in the same state we left it. Other code may have run on this + // thread and altered the context. + if (tls->depth == 0) { + VLOG(3) << "ScopedActivateContext switching to " << cuda_context->id(); + FAIL_IF_CUDA_RES_ERROR(cuCtxSetCurrent(cuda_context->context()), + "Failed setting context"); + tls->depth = 1; + tls->id = cuda_context->id(); + tls->context = cuda_context; + to_restore_ = nullptr; + return; + } + tls->depth++; if (tls->id == cuda_context->id()) { if (kVerifyGpuContext) { @@ -212,8 +227,7 @@ ScopedActivateContext::ScopedActivateContext(GpuContext* cuda_context) { VLOG(3) << "ScopedActivateContext switching context from " << tls->id << " to " << cuda_context->id(); - to_restore_ = (tls->depth == 1 ? nullptr : tls->context); - + to_restore_ = tls->context; // Set the context and update thread local. FAIL_IF_CUDA_RES_ERROR(cuCtxSetCurrent(cuda_context->context()), "Failed setting context"); diff --git a/tensorflow/stream_executor/cuda/cuda_driver_test.cc b/tensorflow/stream_executor/cuda/cuda_driver_test.cc new file mode 100644 index 00000000000..5b173f96d85 --- /dev/null +++ b/tensorflow/stream_executor/cuda/cuda_driver_test.cc @@ -0,0 +1,76 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if GOOGLE_CUDA +#include "tensorflow/stream_executor/cuda/cuda_driver.h" + +#include "absl/memory/memory.h" +#include "third_party/gpus/cuda/include/cuda_runtime_api.h" +#include "tensorflow/core/platform/test.h" + +namespace stream_executor { +namespace gpu { + +void CheckCuda(CUresult result, const char* file, int line) { + if (result == CUDA_SUCCESS) { + return; + } + const char* name; + cuGetErrorName(result, &name); + const char* message; + cuGetErrorString(result, &message); + LOG(FATAL) << file << "(" << line << "): " << name << ", " << message; +} + +void CheckCuda(cudaError_t result, const char* file, int line) { + if (result == cudaSuccess) { + return; + } + const char* name = cudaGetErrorName(result); + const char* message = cudaGetErrorString(result); + LOG(FATAL) << file << "(" << line << "): " << name << ", " << message; +} + +#define CHECK_CUDA(result) CheckCuda(result, __FILE__, __LINE__) + +TEST(CudaDriverTest, ScopedActivateContextTest) { + CHECK_CUDA(cuInit(0)); + CUdevice device; + CHECK_CUDA(cuDeviceGet(&device, 0)); + CUcontext context0, context1; + CHECK_CUDA(cuCtxCreate(&context0, 0, device)); + CHECK_CUDA(cuCtxCreate(&context1, 0, device)); + GpuContext se_context1(context1, /*id=*/101); + { + ScopedActivateContext scope(&se_context1); + CUcontext c; + CHECK_CUDA(cuCtxGetCurrent(&c)); + EXPECT_EQ(c, context1); + } + CHECK_CUDA(cuCtxSetCurrent(context0)); + // ScopedActivateContext must correctly set the CUDA context even if some + // other code changes the context between the two scopes. + { + ScopedActivateContext scope(&se_context1); + CUcontext c; + CHECK_CUDA(cuCtxGetCurrent(&c)); + EXPECT_EQ(c, context1); + } +} + +} // namespace gpu +} // namespace stream_executor + +#endif // GOOGLE_CUDA From ba06a75f1661be297f741feba705808d97612717 Mon Sep 17 00:00:00 2001 From: Jay Shi Date: Mon, 10 Aug 2020 15:40:19 -0700 Subject: [PATCH 2471/2522] [tf.data] Add unit test to test the environment variable settings in `ObtainOptimizations` function. PiperOrigin-RevId: 325903283 Change-Id: I45742135cf4afe7c45c29afd9585b0445c07dd0a --- tensorflow/core/kernels/data/dataset_utils.cc | 30 +++- tensorflow/core/kernels/data/dataset_utils.h | 7 +- .../core/kernels/data/dataset_utils_test.cc | 141 ++++++++++++++---- .../core/kernels/data/optimize_dataset_op.cc | 59 +++----- 4 files changed, 160 insertions(+), 77 deletions(-) diff --git a/tensorflow/core/kernels/data/dataset_utils.cc b/tensorflow/core/kernels/data/dataset_utils.cc index 66de482467d..d79288b86d3 100644 --- a/tensorflow/core/kernels/data/dataset_utils.cc +++ b/tensorflow/core/kernels/data/dataset_utils.cc @@ -906,13 +906,38 @@ bool MatchesAnyVersionRE(StringPiece op_prefix, StringPiece op_to_match) { } std::vector SelectOptimizations( - const string& job_name, const string& opt_ins_raw, - const string& opt_outs_raw, + const string& job_name, const absl::flat_hash_map& live_experiments, const std::vector& optimizations_enabled, const std::vector& optimizations_disabled, const std::vector& optimizations_default, std::function hash_func) { + std::vector optimizations; + if (job_name.empty()) { + // If `job_name` is empty, apply the enabled and default optimizations + // directly. + optimizations.insert(optimizations.end(), optimizations_enabled.begin(), + optimizations_enabled.end()); + optimizations.insert(optimizations.end(), optimizations_default.begin(), + optimizations_default.end()); + return optimizations; + } + + // If `job_name` is non-empty, we determine which optimizations to apply to + // this job based on the enable/disable settings from tf.data.Options, the + // opt in/out settings from environment variables, and rollout condition from + // `live_experiments`. + const char* opt_ins_raw_cs = std::getenv("TF_DATA_EXPERIMENT_OPT_IN"); + const char* opt_outs_raw_cs = std::getenv("TF_DATA_EXPERIMENT_OPT_OUT"); + string opt_ins_raw; + if (opt_ins_raw_cs != nullptr) { + opt_ins_raw = string(opt_ins_raw_cs); + } + string opt_outs_raw; + if (opt_outs_raw_cs != nullptr) { + opt_outs_raw = string(opt_outs_raw_cs); + } + // Creates a set of optimizations. absl::flat_hash_set optimizations_set; @@ -1018,7 +1043,6 @@ std::vector SelectOptimizations( } } - std::vector optimizations; optimizations.insert(optimizations.end(), optimizations_set.begin(), optimizations_set.end()); return optimizations; diff --git a/tensorflow/core/kernels/data/dataset_utils.h b/tensorflow/core/kernels/data/dataset_utils.h index 0fe3618f34b..7f9ea923b98 100644 --- a/tensorflow/core/kernels/data/dataset_utils.h +++ b/tensorflow/core/kernels/data/dataset_utils.h @@ -304,12 +304,11 @@ class DummyResourceOp : public OpKernel { // MatchesAnyVersionRE("PaddedBatchDataset", "BatchDataset") == false bool MatchesAnyVersionRE(StringPiece op_prefix, StringPiece op_to_match); -// Based on `optimizations_enabled`, `optimizations_disabled`, and -// `optimizations_disabled`, returns the list of optimizations that will be +// Based on `job_name`, `optimizations_enabled`, `optimizations_disabled` and +// `optimizations_default`, returns the list of optimizations that will be // applied. std::vector SelectOptimizations( - const string& job_name, const string& opt_ins_raw, - const string& opt_outs_raw, + const string& job_name, const absl::flat_hash_map& live_experiments, const std::vector& optimizations_enabled, const std::vector& optimizations_disabled, diff --git a/tensorflow/core/kernels/data/dataset_utils_test.cc b/tensorflow/core/kernels/data/dataset_utils_test.cc index a1f624faeb6..85019e3f8da 100644 --- a/tensorflow/core/kernels/data/dataset_utils_test.cc +++ b/tensorflow/core/kernels/data/dataset_utils_test.cc @@ -1138,18 +1138,15 @@ class SelectOptimizationsHashTest : public ::testing::TestWithParam {}; TEST_P(SelectOptimizationsHashTest, DatasetUtils) { const uint64 hash_result = GetParam(); string job_name = "job"; - const string opt_ins_raw = ""; - const string opt_outs_raw = ""; auto hash_func = [hash_result](const string& str) { return hash_result; }; absl::flat_hash_map live_experiments = { {"exp1", 0}, {"exp2", 20}, {"exp3", 33}, {"exp4", 45}, {"exp5", 67}, {"exp6", 88}, {"exp7", 100}}; std::vector optimizations_enabled, optimizations_disabled, optimizations_default; - std::vector optimizations = - SelectOptimizations(job_name, opt_ins_raw, opt_outs_raw, live_experiments, - optimizations_enabled, optimizations_disabled, - optimizations_default, hash_func); + std::vector optimizations = SelectOptimizations( + job_name, live_experiments, optimizations_enabled, optimizations_disabled, + optimizations_default, hash_func); int tested_times = 0; switch (hash_result) { @@ -1182,48 +1179,60 @@ class SelectOptimizationsOptTest : public ::testing::TestWithParam> {}; TEST_P(SelectOptimizationsOptTest, DatasetUtils) { + const string opt_ins = std::get<0>(GetParam()); + const string opt_outs = std::get<1>(GetParam()); + if (!opt_ins.empty()) { + setenv("TF_DATA_EXPERIMENT_OPT_IN", opt_ins.c_str(), 1); + } + if (!opt_outs.empty()) { + setenv("TF_DATA_EXPERIMENT_OPT_OUT", opt_outs.c_str(), 1); + } string job_name = "job"; - const string opt_ins_raw = std::get<0>(GetParam()); - const string opt_outs_raw = std::get<1>(GetParam()); auto hash_func = [](const string& str) { return 50; }; absl::flat_hash_map live_experiments = { {"exp1", 0}, {"exp2", 25}, {"exp3", 50}, {"exp4", 75}, {"exp5", 100}}; std::vector optimizations_enabled, optimizations_disabled, optimizations_default; - std::vector optimizations = - SelectOptimizations(job_name, opt_ins_raw, opt_outs_raw, live_experiments, - optimizations_enabled, optimizations_disabled, - optimizations_default, hash_func); + std::vector optimizations = SelectOptimizations( + job_name, live_experiments, optimizations_enabled, optimizations_disabled, + optimizations_default, hash_func); int tested_times = 0; - if (opt_outs_raw == "all") { + if (opt_outs == "all") { EXPECT_THAT(optimizations, UnorderedElementsAre()); tested_times++; - } else if (opt_outs_raw.empty()) { - if (opt_ins_raw == "all") { + } else if (opt_outs.empty()) { + if (opt_ins == "all") { EXPECT_THAT(optimizations, UnorderedElementsAre("exp1", "exp2", "exp3", "exp4", "exp5")); tested_times++; - } else if (opt_ins_raw.empty()) { + } else if (opt_ins.empty()) { EXPECT_THAT(optimizations, UnorderedElementsAre("exp4", "exp5")); tested_times++; - } else if (opt_ins_raw == "exp2,exp4") { + } else if (opt_ins == "exp2,exp4") { EXPECT_THAT(optimizations, UnorderedElementsAre("exp2", "exp4", "exp5")); tested_times++; } - } else if (opt_outs_raw == "exp1,exp5") { - if (opt_ins_raw == "all") { + } else if (opt_outs == "exp1,exp5") { + if (opt_ins == "all") { EXPECT_THAT(optimizations, UnorderedElementsAre("exp2", "exp3", "exp4")); tested_times++; - } else if (opt_ins_raw.empty()) { + } else if (opt_ins.empty()) { EXPECT_THAT(optimizations, UnorderedElementsAre("exp4")); tested_times++; - } else if (opt_ins_raw == "exp2,exp4") { + } else if (opt_ins == "exp2,exp4") { EXPECT_THAT(optimizations, UnorderedElementsAre("exp2", "exp4")); tested_times++; } } EXPECT_EQ(tested_times, 1); + + if (!opt_ins.empty()) { + unsetenv("TF_DATA_EXPERIMENT_OPT_IN"); + } + if (!opt_outs.empty()) { + unsetenv("TF_DATA_EXPERIMENT_OPT_OUT"); + } } INSTANTIATE_TEST_SUITE_P( @@ -1235,10 +1244,16 @@ class SelectOptimizationsConflictTest : public ::testing::TestWithParam> {}; TEST_P(SelectOptimizationsConflictTest, DatasetUtils) { - string job_name = "job"; - const string opt_ins_raw = std::get<0>(GetParam()); - const string opt_outs_raw = std::get<1>(GetParam()); + const string opt_ins = std::get<0>(GetParam()); + const string opt_outs = std::get<1>(GetParam()); const uint64 hash_result = std::get<2>(GetParam()); + if (!opt_ins.empty()) { + setenv("TF_DATA_EXPERIMENT_OPT_IN", opt_ins.c_str(), 1); + } + if (!opt_outs.empty()) { + setenv("TF_DATA_EXPERIMENT_OPT_OUT", opt_outs.c_str(), 1); + } + string job_name = "job"; auto hash_func = [hash_result](const string& str) { return hash_result; }; absl::flat_hash_map live_experiments = { {"exp1", 20}, {"exp2", 30}, {"exp3", 40}, @@ -1246,21 +1261,27 @@ TEST_P(SelectOptimizationsConflictTest, DatasetUtils) { std::vector optimizations_enabled = {"exp1", "exp4"}, optimizations_disabled = {"exp2", "exp5"}, optimizations_default = {"exp3", "exp6"}; - std::vector optimizations = - SelectOptimizations(job_name, opt_ins_raw, opt_outs_raw, live_experiments, - optimizations_enabled, optimizations_disabled, - optimizations_default, hash_func); + std::vector optimizations = SelectOptimizations( + job_name, live_experiments, optimizations_enabled, optimizations_disabled, + optimizations_default, hash_func); int tested_times = 0; - if (opt_outs_raw.empty()) { + if (opt_outs.empty()) { EXPECT_THAT(optimizations, UnorderedElementsAre("exp1", "exp3", "exp4", "exp6")); tested_times++; - } else if (opt_outs_raw == "exp1,exp3") { + } else if (opt_outs == "exp1,exp3") { EXPECT_THAT(optimizations, UnorderedElementsAre("exp1", "exp4", "exp6")); tested_times++; } EXPECT_EQ(tested_times, 1); + + if (!opt_ins.empty()) { + unsetenv("TF_DATA_EXPERIMENT_OPT_IN"); + } + if (!opt_outs.empty()) { + unsetenv("TF_DATA_EXPERIMENT_OPT_OUT"); + } } INSTANTIATE_TEST_SUITE_P(Test, SelectOptimizationsConflictTest, @@ -1268,6 +1289,66 @@ INSTANTIATE_TEST_SUITE_P(Test, SelectOptimizationsConflictTest, ::testing::Values("", "exp1,exp3"), ::testing::Values(10, 50, 90))); +class SelectOptimizationsJobTest + : public ::testing::TestWithParam> {}; + +TEST_P(SelectOptimizationsJobTest, DatasetUtils) { + const string job_name = std::get<0>(GetParam()); + const string opt_ins = std::get<1>(GetParam()); + const string opt_outs = std::get<2>(GetParam()); + if (!opt_ins.empty()) { + setenv("TF_DATA_EXPERIMENT_OPT_IN", opt_ins.c_str(), 1); + } + if (!opt_outs.empty()) { + setenv("TF_DATA_EXPERIMENT_OPT_OUT", opt_outs.c_str(), 1); + } + std::vector optimizations_enabled = {"exp4"}, optimizations_disabled, + optimizations_default = {"exp2"}; + absl::flat_hash_map live_experiments = { + {"exp1", 0}, {"exp2", 100}, {"exp3", 100}}; + auto hash_func = [](const string& str) { return Hash64(str); }; + std::vector optimizations = SelectOptimizations( + job_name, live_experiments, optimizations_enabled, optimizations_disabled, + optimizations_default, hash_func); + + int tested_times = 0; + if (job_name.empty()) { + EXPECT_THAT(optimizations, UnorderedElementsAre("exp2", "exp4")); + tested_times++; + } else if (opt_ins.empty()) { + if (opt_outs.empty()) { + EXPECT_THAT(optimizations, UnorderedElementsAre("exp2", "exp3", "exp4")); + tested_times++; + } else if (opt_outs == "exp2,exp3") { + EXPECT_THAT(optimizations, UnorderedElementsAre("exp4")); + tested_times++; + } + } else if (opt_ins == "exp1") { + if (opt_outs.empty()) { + EXPECT_THAT(optimizations, + UnorderedElementsAre("exp1", "exp2", "exp3", "exp4")); + tested_times++; + } else if (opt_outs == "exp2,exp3") { + EXPECT_THAT(optimizations, UnorderedElementsAre("exp1", "exp4")); + tested_times++; + } + } + EXPECT_EQ(tested_times, 1); + + if (!opt_ins.empty()) { + unsetenv("TF_DATA_EXPERIMENT_OPT_IN"); + } + if (!opt_outs.empty()) { + unsetenv("TF_DATA_EXPERIMENT_OPT_OUT"); + } +} + +INSTANTIATE_TEST_SUITE_P(Test, SelectOptimizationsJobTest, + ::testing::Combine(::testing::Values("", "job"), + ::testing::Values("", "exp1"), + ::testing::Values("", + "exp2,exp3"))); + } // namespace } // namespace data } // namespace tensorflow diff --git a/tensorflow/core/kernels/data/optimize_dataset_op.cc b/tensorflow/core/kernels/data/optimize_dataset_op.cc index 74468e71241..f1fa96d9ac3 100644 --- a/tensorflow/core/kernels/data/optimize_dataset_op.cc +++ b/tensorflow/core/kernels/data/optimize_dataset_op.cc @@ -80,48 +80,27 @@ void OptimizeDatasetOp::MakeDataset(OpKernelContext* ctx, DatasetBase* input, &optimizations_default)); string job_name = port::JobName(); - if (job_name.empty()) { - // If `job_name` is empty, apply the enabled and default optimizations - // directly. - optimizations.insert(optimizations.end(), optimizations_enabled.begin(), - optimizations_enabled.end()); - optimizations.insert(optimizations.end(), optimizations_default.begin(), - optimizations_default.end()); - } else { - // The map that stores the experiment names and for how much percentage - // of the jobs, the experiments will be randomly turned on. - // - // This is currently empty; we have no live experiments yet. - absl::flat_hash_map live_experiments; + // The map that stores the experiment names and for how much percentage + // of the jobs, the experiments will be randomly turned on. + // + // This is currently empty; we have no live experiments yet. + absl::flat_hash_map live_experiments; + auto hash_func = [](const string& str) { return Hash64(str); }; + optimizations = SelectOptimizations( + job_name, live_experiments, optimizations_enabled, + optimizations_disabled, optimizations_default, hash_func); - const char* opt_ins_raw_cs = std::getenv("TF_DATA_EXPERIMENT_OPT_IN"); - const char* opt_outs_raw_cs = std::getenv("TF_DATA_EXPERIMENT_OPT_OUT"); - string opt_ins_raw; - if (opt_ins_raw_cs != nullptr) { - opt_ins_raw = string(opt_ins_raw_cs); - } - string opt_outs_raw; - if (opt_outs_raw_cs != nullptr) { - opt_outs_raw = string(opt_outs_raw_cs); - } - auto hash_func = [](const string& str) { return Hash64(str); }; - optimizations = SelectOptimizations( - job_name, opt_ins_raw, opt_outs_raw, live_experiments, - optimizations_enabled, optimizations_disabled, optimizations_default, - hash_func); + // Log and record the experiments that will be applied. + if (!job_name.empty() && !live_experiments.empty()) { + VLOG(1) << "The input pipeline is subject to tf.data experiment. " + "Please see `go/tf-data-experiments` for more details."; - // Log the experiments that will be applied. - if (!live_experiments.empty() && VLOG_IS_ON(1)) { - VLOG(1) << "The input pipeline is subject to tf.data experiment. " - "Please see `go/tf-data-experiments` for more details."; - - for (auto& pair : live_experiments) { - string experiment = pair.first; - if (std::find(optimizations.begin(), optimizations.end(), - experiment) != optimizations.end()) { - VLOG(1) << "The experiment \"" << experiment << "\" is applied."; - metrics::RecordTFDataExperiment(experiment); - } + for (auto& pair : live_experiments) { + string experiment = pair.first; + if (std::find(optimizations.begin(), optimizations.end(), experiment) != + optimizations.end()) { + VLOG(1) << "The experiment \"" << experiment << "\" is applied."; + metrics::RecordTFDataExperiment(experiment); } } } From 8daae0f3dab01119fe8dbf994852d20a80bace39 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Mon, 10 Aug 2020 16:04:30 -0700 Subject: [PATCH 2472/2522] TEsting CUDA 11 nightly build PiperOrigin-RevId: 325907860 Change-Id: Ieff7d7964818490300c019143afde201b6491156 --- .../tools/ci_build/rel/ubuntu_cuda11/gpu_py35_nonpip.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py35_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py35_nonpip.sh index 8a0796723b2..3e91bf787a9 100644 --- a/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py35_nonpip.sh +++ b/tensorflow/tools/ci_build/rel/ubuntu_cuda11/gpu_py35_nonpip.sh @@ -46,7 +46,8 @@ source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh tag_filters="gpu,requires-gpu,-no_gpu,-no_oss,-oss_serial,-no_oss_py35" set +e -bazel test --config=cuda --config=opt \ +ls /usr/include/cud* +bazel test --config=cuda --config=opt -s \ --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda11:toolchain \ --linkopt=-lrt \ --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \ @@ -54,7 +55,7 @@ bazel test --config=cuda --config=opt \ --test_tag_filters=${tag_filters} \ --build_tag_filters=${tag_filters} \ --test_timeout="300,450,1200,3600" --local_test_jobs=4 \ - --test_output=errors --verbose_failures=true --keep_going \ + --test_output=errors --verbose_failures=true \ --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \ -- ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... test_xml_summary_exit From 14f6926300639fb589a10a750b44b03d32997765 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 10 Aug 2020 16:05:20 -0700 Subject: [PATCH 2473/2522] Implement extraction of outside compilation of ops nested inside tf.IfRegion op. PiperOrigin-RevId: 325908017 Change-Id: Ib45dce2d7e9c61a9f1ca9e9d90663f9b902d44c4 --- .../tpu_extract_outside_compilation.mlir | 232 ++++++++++++++ .../tpu_extract_outside_compilation.cc | 289 +++++++++++++++--- 2 files changed, 474 insertions(+), 47 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_outside_compilation.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_outside_compilation.mlir index 732e34fce90..1f516a25824 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_outside_compilation.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_extract_outside_compilation.mlir @@ -456,4 +456,236 @@ module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:wor } return %1 : tensor } + + // Tests extraction of a single outside compiled cluster inside a tf.IfRegion op. + + // CHECK-LABEL: func @outside_compiled_ops_inside_tf_if + func @outside_compiled_ops_inside_tf_if(%arg0: tensor) -> tensor { + %0 = "tf.A"(%arg0) : (tensor) -> tensor + + // CHECK: %[[REPLICATE:[0-9]*]]:2 = tf_device.replicate + // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute" + // CHECK-NEXT: "tf_device.launch" + // CHECK-NEXT: %[[PLACEHOLDER_KEY:[0-9]*]] = "tf._TPUCompileMlirPlaceholderProgramKey"() + // CHECK-NEXT: %[[PREDICATE_RECV_OUTPUT:[0-9]*]] = "tf._XlaRecvAtHost"(%[[PLACEHOLDER_KEY]]) + // CHECK-SAME: device_ordinal = 0 + // CHECK-SAME: key = "if_predicate_channel_cluster1_0" + // CHECK-NEXT: tf.IfRegion"(%[[PREDICATE_RECV_OUTPUT]]) + // CHECK-NEXT: %[[ARG_RECV_OUTPUT:[0-9]*]]:2 = "tf._XlaRecvAtHost"(%[[PLACEHOLDER_KEY]]) + // CHECK-SAME: device_ordinal = 0 + // CHECK-SAME: key = "host_compute_channel_cluster1_args" + // CHECK: "tf.D"(%[[ARG_RECV_OUTPUT]]#0, %[[ARG_RECV_OUTPUT]]#1) + // CHECK: "tf._XlaSendFromHost"(%[[PLACEHOLDER_KEY]]) + // CHECK-SAME: device_ordinal = 0 + // CHECK-SAME: key = "host_compute_channel_cluster1_retvals" + // CHECK-NEXT: "tf.Yield"() : () -> () + // CHECK: "tf_device.cluster" + // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A" + // CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B" + // CHECK: %[[G_OUTPUT:[0-9]*]] = "tf.G" + // CHECK: "tf.XlaSendToHost"(%6) {key = "if_predicate_channel_cluster1_0"} + // CHECK-NEXT: tf.IfRegion"(%[[G_OUTPUT]]) + // CHECK: "tf._XlaHostComputeMlir"(%[[B_OUTPUT]], %[[A_OUTPUT]]) + // CHECK-SAME: recv_key = "host_compute_channel_cluster1_retvals" + // CHECK-SAME: send_key = "host_compute_channel_cluster1_args" + // CHECK-SAME: tpu_core = 0 + // CHECK-NEXT: "tf.Yield"() : () -> () + %1:2 = tf_device.replicate([%0, %arg0] as %ri_0: tensor) {n = 2 : i32} { + %2 = "tf_device.cluster"() ( { + %3 = "tf.A"() : () -> (tensor) + %4 = "tf.B"() : () -> (tensor) + %6 = "tf.G"() : () -> (tensor) + + "tf.IfRegion"(%6) ({ + "tf.D"(%4, %3) {_xla_outside_compilation = "cluster1"} : (tensor, tensor) -> () + "tf.Yield"() : () -> () + }, { + "tf.Yield"() : () -> () + }) { is_stateless = false} : (tensor) -> () + + %5 = "tf.E"() : () -> tensor + tf_device.return %5 : tensor + }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor + tf_device.return %2 : tensor + } + + return %1 : tensor + } + + // Tests extraction of a single outside compiled cluster inside a tf.IfRegion + // op with return values. + + // CHECK-LABEL: func @outside_compiled_ops_inside_tf_if_with_return_values + func @outside_compiled_ops_inside_tf_if_with_return_values( + %arg0: tensor) -> tensor { + %0 = "tf.A"(%arg0) : (tensor) -> tensor + + // CHECK: %[[REPLICATE:[0-9]*]]:2 = tf_device.replicate + // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute" + // CHECK-NEXT: "tf_device.launch" + // CHECK-NEXT: %[[PLACEHOLDER_KEY:[0-9]*]] = "tf._TPUCompileMlirPlaceholderProgramKey"() + // CHECK-NEXT: %[[PREDICATE_RECV_OUTPUT:[0-9]*]] = "tf._XlaRecvAtHost"(%[[PLACEHOLDER_KEY]]) + // CHECK-SAME: device_ordinal = 0 + // CHECK-SAME: key = "if_predicate_channel_cluster1_0" + // CHECK-NEXT: tf.IfRegion"(%[[PREDICATE_RECV_OUTPUT]]) + // CHECK-NEXT: %[[ARG_RECV_OUTPUT:[0-9]*]]:2 = "tf._XlaRecvAtHost"(%[[PLACEHOLDER_KEY]]) + // CHECK-SAME: device_ordinal = 0 + // CHECK-SAME: key = "host_compute_channel_cluster1_args" + // CHECK: %[[D_OUTPUT:[0-9]*]] = "tf.D"(%[[ARG_RECV_OUTPUT]]#0, %[[ARG_RECV_OUTPUT]]#1) + // CHECK: "tf._XlaSendFromHost"(%[[D_OUTPUT]], %[[PLACEHOLDER_KEY]]) + // CHECK-SAME: device_ordinal = 0 + // CHECK-SAME: key = "host_compute_channel_cluster1_retvals" + // CHECK-NEXT: "tf.Yield"() : () -> () + // CHECK: "tf_device.cluster" + // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A" + // CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B" + // CHECK: %[[G_OUTPUT:[0-9]*]] = "tf.G" + // CHECK: "tf.XlaSendToHost"(%6) {key = "if_predicate_channel_cluster1_0"} + // CHECK-NEXT: tf.IfRegion"(%[[G_OUTPUT]]) + // CHECK: %[[HOST_COMPUTE_OUT:[0-9]*]] = "tf._XlaHostComputeMlir"(%[[B_OUTPUT]], %[[A_OUTPUT]]) + // CHECK-SAME: recv_key = "host_compute_channel_cluster1_retvals" + // CHECK-SAME: send_key = "host_compute_channel_cluster1_args" + // CHECK-SAME: tpu_core = 0 + // CHECK-NEXT: "tf.Yield"(%[[HOST_COMPUTE_OUT]]) + %1:2 = tf_device.replicate([%0, %arg0] as %ri_0: tensor) {n = 2 : i32} { + %2 = "tf_device.cluster"() ( { + %3 = "tf.A"() : () -> (tensor) + %4 = "tf.B"() : () -> (tensor) + %6 = "tf.G"() : () -> (tensor) + + "tf.IfRegion"(%6) ({ + %7 = "tf.D"(%4, %3) {_xla_outside_compilation = "cluster1"} : (tensor, tensor) -> (tensor) + "tf.Yield"(%7) : (tensor) -> () + }, { + + %8 = "tf.F"() : () -> (tensor) + "tf.Yield"(%8) : (tensor) -> () + }) { is_stateless = false} : (tensor) -> (tensor) + + %5 = "tf.E"() : () -> tensor + tf_device.return %5 : tensor + }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor + tf_device.return %2 : tensor + } + + return %1 : tensor + } + + // Tests extraction of a single outside compiled cluster inside a tf.IfRegion op without external inputs/outputs + + // CHECK-LABEL: func @outside_compiled_ops_inside_tf_if_without_input_outputs + func @outside_compiled_ops_inside_tf_if_without_input_outputs( + %arg0: tensor) -> tensor { + %0 = "tf.A"(%arg0) : (tensor) -> tensor + // CHECK: %[[REPLICATE:[0-9]*]]:2 = tf_device.replicate + // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute" + // CHECK-NEXT: "tf_device.launch" + // CHECK-NEXT: %[[PLACEHOLDER_KEY:[0-9]*]] = "tf._TPUCompileMlirPlaceholderProgramKey"() + // CHECK-NEXT: %[[PREDICATE_RECV_OUTPUT:[0-9]*]] = "tf._XlaRecvAtHost"(%[[PLACEHOLDER_KEY]]) + // CHECK-SAME: device_ordinal = 0 + // CHECK-SAME: key = "if_predicate_channel_cluster1_0" + // CHECK-NEXT: tf.IfRegion"(%[[PREDICATE_RECV_OUTPUT]]) + // CHECK: "tf.D" + // CHECK-NEXT: "tf.Yield"() : () -> () + // CHECK: "tf_device.cluster" + // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A" + // CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B" + // CHECK: %[[G_OUTPUT:[0-9]*]] = "tf.G" + // CHECK: "tf.XlaSendToHost"(%6) {key = "if_predicate_channel_cluster1_0"} + // CHECK-NEXT: tf.IfRegion"(%[[G_OUTPUT]]) + // CHECK-NEXT: "tf.Yield"() : () -> () + %1:2 = tf_device.replicate([%0, %arg0] as %ri_0: tensor) {n = 2 : i32} { + %2 = "tf_device.cluster"() ( { + %3 = "tf.A"() : () -> (tensor) + %4 = "tf.B"() : () -> (tensor) + %6 = "tf.G"() : () -> (tensor) + + "tf.IfRegion"(%6) ({ + "tf.D"() {_xla_outside_compilation = "cluster1"} : () -> () + "tf.Yield"() : () -> () + }, { + "tf.Yield"() : () -> () + }) { is_stateless = false} : (tensor) -> () + + %5 = "tf.E"() : () -> tensor + tf_device.return %5 : tensor + }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor + tf_device.return %2 : tensor + } + + return %1 : tensor + } + + // Tests extraction of a single outside compiled cluster inside a nested + // tf.IfRegion op. + + // CHECK-LABEL: func @outside_compiled_ops_inside_nested_if + func @outside_compiled_ops_inside_nested_if(%arg0: tensor) -> tensor { + %0 = "tf.A"(%arg0) : (tensor) -> tensor + // CHECK: %[[REPLICATE:[0-9]*]]:2 = tf_device.replicate + // CHECK: %[[PARALLEL_EXECUTE_OUTPUT:[0-9]*]] = "tf_device.parallel_execute" + // CHECK-NEXT: "tf_device.launch" + // CHECK-NEXT: %[[PLACEHOLDER_KEY:[0-9]*]] = "tf._TPUCompileMlirPlaceholderProgramKey"() + // CHECK-NEXT: %[[PREDICATE_RECV_OUTPUT:[0-9]*]] = "tf._XlaRecvAtHost"(%[[PLACEHOLDER_KEY]]) + // CHECK-SAME: device_ordinal = 0 + // CHECK-SAME: key = "if_predicate_channel_cluster1_0" + // CHECK-NEXT: tf.IfRegion"(%[[PREDICATE_RECV_OUTPUT]]) + // CHECK-NEXT: %[[PREDICATE2_RECV_OUTPUT:[0-9]*]] = "tf._XlaRecvAtHost"(%[[PLACEHOLDER_KEY]]) + // CHECK-SAME: device_ordinal = 0 + // CHECK-SAME: key = "if_predicate_channel_cluster1_1" + // CHECK-NEXT: tf.IfRegion"(%[[PREDICATE2_RECV_OUTPUT]]) + // CHECK-NEXT: "tf.Yield"() : () -> () + // CHECK: %[[ARG_RECV_OUTPUT:[0-9]*]] = "tf._XlaRecvAtHost"(%[[PLACEHOLDER_KEY]]) + // CHECK-SAME: device_ordinal = 0 + // CHECK-SAME: key = "host_compute_channel_cluster1_args" + // CHECK: "tf.D"(%[[ARG_RECV_OUTPUT]]) + // CHECK: "tf._XlaSendFromHost"(%[[PLACEHOLDER_KEY]]) + // CHECK-SAME: device_ordinal = 0 + // CHECK-SAME: key = "host_compute_channel_cluster1_retvals" + // CHECK-NEXT: "tf.Yield"() : () -> () + + // CHECK: "tf_device.cluster" + // CHECK: %[[A_OUTPUT:[0-9]*]] = "tf.A" + // CHECK: %[[B_OUTPUT:[0-9]*]] = "tf.B" + // CHECK: %[[G_OUTPUT:[0-9]*]] = "tf.G" + // CHECK: "tf.XlaSendToHost"(%[[G_OUTPUT]]) {key = "if_predicate_channel_cluster1_0"} + // CHECK-NEXT: tf.IfRegion"(%[[G_OUTPUT]]) + // CHECK: %[[H_OUTPUT:[0-9]*]] = "tf.H"(%[[B_OUTPUT]]) + // CHECK: "tf.XlaSendToHost"(%[[H_OUTPUT]]) {key = "if_predicate_channel_cluster1_1"} + // CHECK-NEXT: tf.IfRegion"(%[[H_OUTPUT]]) + // CHECK-NEXT: "tf.Yield"() : () -> () + // CHECK: %[[I_OUTPUT:[0-9]*]] = "tf.I"(%[[H_OUTPUT]]) + // CHECK: "tf._XlaHostComputeMlir"(%[[I_OUTPUT]]) + // CHECK-NEXT: "tf.Yield"() : () -> () + %1:2 = tf_device.replicate([%0, %arg0] as %ri_0: tensor) {n = 2 : i32} { + %2 = "tf_device.cluster"() ( { + %3 = "tf.A"() : () -> (tensor) + %4 = "tf.B"() : () -> (tensor) + %6 = "tf.G"() : () -> (tensor) + + "tf.IfRegion"(%6) ({ + %7 = "tf.H"(%4) : (tensor) -> (tensor) + + "tf.IfRegion"(%7)({ + "tf.Yield"() : () -> () + }, + { + %8 = "tf.I"(%7) : (tensor) -> (tensor) + "tf.D"(%8) {_xla_outside_compilation = "cluster1"} : (tensor) -> () + "tf.Yield"() : () -> () + }) { is_stateless = false} : (tensor) -> () + + "tf.Yield"() : () -> () + }, { + "tf.Yield"() : () -> () + }) { is_stateless = false} : (tensor) -> () + + %5 = "tf.E"() : () -> tensor + tf_device.return %5 : tensor + }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> tensor + tf_device.return %2 : tensor + } + + return %1 : tensor + } } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc index cbea4ae6544..9365807663a 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc @@ -17,11 +17,21 @@ limitations under the License. #include #include +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/FormatVariadic.h" +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/Function.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/Module.h" // from @llvm-project +#include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "mlir/IR/Visitors.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Pass/PassRegistry.h" // from @llvm-project +#include "mlir/Support/LogicalResult.h" // from @llvm-project #include "mlir/Transforms/RegionUtils.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" @@ -77,31 +87,203 @@ struct TPUExtractOutsideCompilation void runOnOperation() override; }; -// Collects and clusters ops in `block` with the same `_xla_outside_compilation` -// attribute into `clusters` This returns an error if a -// `_xla_outside_compilation` attribute of an op is empty. -LogicalResult CollectAndGroupOutsideClusterOps(Block* block, - OutsideClusterMap* clusters) { - for (Operation& op : *block) { - if (auto attr = op.getAttrOfType(kXlaOutsideCompilationAttr)) { - if (attr.getValue().empty()) - return op.emitError() - << "attribute '" << kXlaOutsideCompilationAttr << "' is empty"; +// Holds information about control flow operations that wrap outside compiled +// op. Currently only tf.If op is supported. +class ControlFlowStackInfo { + public: + enum ControlFlowBranchType { kIfThen, kIfElse }; - auto it = clusters->try_emplace(attr.getValue()); - it.first->getSecond().push_back(&op); + explicit ControlFlowStackInfo(Operation* wrapping_op, Operation* nested_op) + : callsite_op_(wrapping_op) { + // Only tf.IfRegion op is supported for now. + auto control_flow_op = llvm::cast(callsite_op_); + assert(control_flow_op); + + auto parent_region = nested_op->getParentRegion(); + if (&control_flow_op.then_branch() == parent_region) { + type_ = ControlFlowBranchType::kIfThen; + } else { + type_ = ControlFlowBranchType::kIfElse; } } - return success(); + Value GetIfPredicateValue() { + auto if_op = llvm::cast(callsite_op_); + return if_op.cond(); + } + + ControlFlowBranchType GetBranchType() const { return type_; } + + Operation* GetCallSiteOp() const { return callsite_op_; } + + private: + ControlFlowBranchType type_; + + // `this` does not hold ownership of `callsite_op_`. + Operation* callsite_op_; +}; + +// Returns a list of ControlFlowStackInfo that represents a stack of control +// flow operations that wraps `op`. +llvm::SmallVector GetControlFlowStackForOp( + tf_device::ClusterOp tpu_cluster, Operation* op) { + assert(tpu_cluster.getOperation()->isProperAncestor(op)); + + llvm::SmallVector controlflow_stack; + Operation* op_in_stack = op; + while (op_in_stack != tpu_cluster.getOperation()) { + auto parent_op = op_in_stack->getParentOp(); + if (llvm::isa(parent_op)) { + controlflow_stack.insert(controlflow_stack.begin(), + ControlFlowStackInfo(parent_op, op_in_stack)); + } + op_in_stack = parent_op; + } + + return controlflow_stack; } -// Moves `cluster_ops` to associated `launch_op` body. -void MoveOutsideClusterOpsToLaunchOp(tf_device::LaunchOp launch_op, - llvm::ArrayRef cluster_ops) { - MLIRContext* context = launch_op.getContext(); - Operation* terminator = launch_op.GetBody().getTerminator(); +// Creates a IfRegionOp with `predicate` and then/else region with yield op and +// an empty block. +TF::IfRegionOp CloneEmptyIfWithPredicate(Value predicate, bool is_stateless, + Location loc, OpBuilder* builder) { + auto host_side_if = builder->create( + loc, llvm::SmallVector{}, predicate, is_stateless); + // Create empty then branch region. + auto& then_branch = host_side_if.then_branch(); + builder->setInsertionPoint(&then_branch.front(), then_branch.front().begin()); + builder->createBlock(&then_branch); + builder->create(loc, llvm::SmallVector({})); + + // Create empty else branch region. + auto& else_branch = host_side_if.else_branch(); + builder->setInsertionPoint(&else_branch.front(), else_branch.front().begin()); + builder->createBlock(&else_branch); + builder->create(loc, llvm::SmallVector({})); + return host_side_if; +} + +// Replicates tf.IfRegion op to host side computation. +Operation* ReplicateIf(const ControlFlowStackInfo& controlflow_info, + llvm::StringRef outside_cluster_name, ModuleOp module, + Value compilation_key, OpBuilder* builder, + int* send_recv_counter) { + // Create XlaSendToHostOp to send predicate value from device to host. + OpBuilder::InsertPoint insert_point = builder->saveInsertionPoint(); + auto if_callsite_op = + llvm::cast(controlflow_info.GetCallSiteOp()); + builder->setInsertionPoint(if_callsite_op); + + const auto predicate_send_recv_key = + llvm::formatv("if_predicate_channel_{0}_{1}", outside_cluster_name, + *send_recv_counter) + .str(); + *send_recv_counter += 1; + + auto predicate = if_callsite_op.cond(); + auto predicate_shape = predicate.getType(); + builder->create(if_callsite_op.getLoc(), predicate, + predicate_send_recv_key); + + // Create XlaRecvAtHostOp to receive predicate value from host. + builder->restoreInsertionPoint(insert_point); + auto recv_predicate_at_host = builder->create( + if_callsite_op.getLoc(), llvm::ArrayRef{predicate_shape}, + /*dynamic_key=*/compilation_key, + builder->getStringAttr(predicate_send_recv_key), + /*device_ordinal=*/builder->getI64IntegerAttr(0)); + + // Create host side if op. + return CloneEmptyIfWithPredicate(recv_predicate_at_host.getResult(0), + if_callsite_op.is_stateless(), + if_callsite_op.getLoc(), builder); +} + +// TODO(b/157054714): Use a better abstraction instead of +// _TPUCompileMlirOp and _XlaRecvAtHostOp and _XlaSendFromHostOp. +// Creates a compilation key as placeholder. A placeholder compilation cache key +// is created because it is a required input to _XlaRecvAtHost and +// _XlaSendFromHost but the _TPUCompileMlir has not yet been created for the TPU +// cluster that contains the outside compiled ops. This placeholder should be +// replaced by the TPU cluster _TPUCompileMlir in a subsequent pass. +Value CreateCompilationKeyPlaceholder(Location loc, OpBuilder* builder) { + auto result_type = + RankedTensorType::get({2}, builder->getType()); + return builder->create( + loc, /*program=*/result_type, llvm::ArrayRef{}); +} + +// Replicates the control flow operations that wraps outside compiled ops to +// `destination_block`. +Block* ReplicateControlFlowStack( + llvm::StringRef outside_cluster_name, + const llvm::SmallVectorImpl& stack_info, + tf_device::ClusterOp tpu_cluster, ModuleOp module, Value compilation_key, + Block* destination_block, int* send_recv_counter) { + assert(stack_info.size()); + OpBuilder builder = OpBuilder::atBlockTerminator(destination_block); + Operation* previous_replicated_controlflow_op = nullptr; + for (const auto& controlflow_stack_info : stack_info) { + // Create control flow op given provided insertion point and + // ControlFlowStackInfo. + previous_replicated_controlflow_op = + ReplicateIf(controlflow_stack_info, outside_cluster_name, module, + compilation_key, &builder, send_recv_counter); + auto if_op = llvm::cast(previous_replicated_controlflow_op); + auto type = controlflow_stack_info.GetBranchType(); + + // Update the insertion point to proper region inside the newly created + // control flow op. + if (type == ControlFlowStackInfo::kIfThen) { + builder.setInsertionPoint(&if_op.then_branch().front().front()); + } else { + builder.setInsertionPoint(&if_op.else_branch().front().front()); + } + } + + // Return the inner most branch at which outside compiled op is located. + // This block will later be used as insertion point to create send/recv ops. + auto inner_most_controlflow_stack = stack_info.back(); + auto inner_most_if = + llvm::cast(previous_replicated_controlflow_op); + if (inner_most_controlflow_stack.GetBranchType() == + ControlFlowStackInfo::kIfThen) { + return &inner_most_if.then_branch().front(); + } else { + return &inner_most_if.else_branch().front(); + } +} + +// Collects and clusters ops in `block` with the same `_xla_outside_compilation` +// attribute into `clusters` This returns an error if a +// `_xla_outside_compilation` attribute of an op is empty. +// TODO(b/163141763): Make sure ops inside control flow regions are not outside +// compiled if the entire control flow op is marked as outside compiled. +LogicalResult CollectAndGroupOutsideClusterOps(Block* block, + OutsideClusterMap* clusters) { + auto walk_result = block->walk([&](Operation* op) { + if (auto attr = op->getAttrOfType(kXlaOutsideCompilationAttr)) { + if (attr.getValue().empty()) { + op->emitError() << "attribute '" << kXlaOutsideCompilationAttr + << "' is empty"; + return WalkResult::interrupt(); + } + + auto it = clusters->try_emplace(attr.getValue()); + it.first->getSecond().push_back(op); + } + return WalkResult::advance(); + }); + + return failure(walk_result.wasInterrupted()); +} + +// Moves `cluster_ops` to associated `block`. +void MoveOutsideClusterOpsToBlock(Block& block, + llvm::ArrayRef cluster_ops, + MLIRContext* context) { + Operation* terminator = block.getTerminator(); for (Operation* cluster_op : cluster_ops) { // Remove `_xla_outside_compilation` and `device` attribute from ops in the // cluster as that information will be present in the `launch_op`. @@ -112,7 +294,7 @@ void MoveOutsideClusterOpsToLaunchOp(tf_device::LaunchOp launch_op, } } -// Creates a `tf_device::LaunchOp` to wrap cluster ops. +// Creates a `tf_device.launch` to wrap cluster ops. tf_device::LaunchOp CreateLaunchOpForOutsideCluster( OpBuilder* builder, Operation* last_cluster_op, llvm::StringRef host_device) { @@ -212,34 +394,43 @@ TF::_XlaHostComputeMlirOp CreateHostCompute( } void MoveOutsideCompiledOps( - tf_device::ClusterOp tpu_cluster, llvm::StringRef outside_cluster_name, - tf_device::LaunchOp host_launch_op, llvm::ArrayRef cluster_ops, + ModuleOp module, tf_device::ClusterOp tpu_cluster, + llvm::StringRef outside_cluster_name, tf_device::LaunchOp host_launch_op, + llvm::ArrayRef cluster_ops, const llvm::SmallSetVector& external_inputs, llvm::ArrayRef external_outputs) { + // Since ops in `cluster_ops` do not cross function/control flow boundary, it + // is sufficient to identify the control flow that wraps `cluster_ops` by + // looking at any arbitary op inside `cluster_ops`. + auto controlflow_stack = + GetControlFlowStackForOp(tpu_cluster, cluster_ops.front()); + + Value compilation_key; + if (!controlflow_stack.empty() || !external_inputs.empty() || + !external_outputs.empty()) { + OpBuilder builder(&host_launch_op.GetBody().front()); + compilation_key = + CreateCompilationKeyPlaceholder(tpu_cluster.getLoc(), &builder); + } + + Block* block_to_move_host_cluster = nullptr; + if (controlflow_stack.empty()) { + block_to_move_host_cluster = &host_launch_op.GetBody(); + } else { + int send_recv_counter = 0; + block_to_move_host_cluster = ReplicateControlFlowStack( + outside_cluster_name, controlflow_stack, tpu_cluster, module, + compilation_key, &host_launch_op.GetBody(), &send_recv_counter); + } + + MLIRContext* context = host_launch_op.getContext(); if (external_inputs.empty() && external_outputs.empty()) { - MoveOutsideClusterOpsToLaunchOp(host_launch_op, cluster_ops); + MoveOutsideClusterOpsToBlock(*block_to_move_host_cluster, cluster_ops, + context); return; } - OpBuilder builder(host_launch_op.GetBody().getTerminator()); - auto result_type = - RankedTensorType::get({}, builder.getType()); - - std::string txt_metadata; - std::string txt_module; - // TODO(b/157054714): Use a better abstraction instead of _TPUCompileMlirOp - // and _XlaRecvAtHostOp and _XlaSendFromHostOp. - - // A placeholder compilation cache key is created because it is a required - // input to _XlaRecvAtHost and _XlaSendFromHost but the _TPUCompileMlir has - // not yet been created for the TPU cluster that contains the outside compiled - // ops. This placeholder should be replaced by the TPU cluster _TPUCompileMlir - // in a subsequent pass. - auto compilation_key = - builder.create( - tpu_cluster.getLoc(), /*program=*/result_type, - llvm::ArrayRef{}); - + OpBuilder builder(block_to_move_host_cluster->getTerminator()); llvm::SmallVector host_output_types; for (const auto& external_input : external_inputs) host_output_types.push_back(external_input.getType()); @@ -250,6 +441,7 @@ void MoveOutsideCompiledOps( std::string retvals_communication_key = llvm::formatv("host_compute_channel_{0}_retvals", outside_cluster_name) .str(); + auto recv_at_host = builder.create( tpu_cluster.getLoc(), host_output_types, /*dynamic_key=*/compilation_key, @@ -259,9 +451,10 @@ void MoveOutsideCompiledOps( auto host_compute = CreateHostCompute( &builder, tpu_cluster, cluster_ops, external_inputs, external_outputs, args_communication_key, retvals_communication_key); - MoveOutsideClusterOpsToLaunchOp(host_launch_op, cluster_ops); + MoveOutsideClusterOpsToBlock(*block_to_move_host_cluster, cluster_ops, + context); - builder.setInsertionPoint(host_launch_op.GetBody().getTerminator()); + builder.setInsertionPoint(block_to_move_host_cluster->getTerminator()); builder.create( tpu_cluster.getLoc(), external_outputs, /*dynamic_key=*/compilation_key, @@ -279,7 +472,8 @@ void MoveOutsideCompiledOps( // Creates a `parallel_execute` op in place of launch with 'clusters` and // 'launch` as regions. -void CreateParallelExecuteFromOutsideClusters(tf_device::ClusterOp tpu_cluster, +void CreateParallelExecuteFromOutsideClusters(ModuleOp module, + tf_device::ClusterOp tpu_cluster, const OutsideClusterMap& clusters, llvm::StringRef host_device) { OpBuilder builder(tpu_cluster); @@ -295,6 +489,7 @@ void CreateParallelExecuteFromOutsideClusters(tf_device::ClusterOp tpu_cluster, Block& outside_block = parallel_execute_op.GetRegionBlockWithIndex(cluster.index()); + builder.setInsertionPointToEnd(&outside_block); tf_device::LaunchOp host_launch_op = CreateLaunchOpForOutsideCluster( &builder, cluster_ops.back(), host_device); @@ -303,10 +498,9 @@ void CreateParallelExecuteFromOutsideClusters(tf_device::ClusterOp tpu_cluster, auto external_inputs = GetExternalOperands(cluster_ops); auto external_outputs = GetExternalOutputs(cluster_ops); - MoveOutsideCompiledOps(tpu_cluster, cluster.value().getFirst(), + MoveOutsideCompiledOps(module, tpu_cluster, cluster.value().getFirst(), host_launch_op, cluster_ops, external_inputs, external_outputs); - builder.setInsertionPointToEnd(&outside_block); builder.create(tpu_cluster.getLoc(), ArrayRef{}); @@ -352,7 +546,8 @@ void TPUExtractOutsideCompilation::runOnOperation() { std::string host_device; tensorflow::GetHostDeviceOutsideComputation(devices, tpu_cluster, &host_device); - CreateParallelExecuteFromOutsideClusters(tpu_cluster, clusters, + + CreateParallelExecuteFromOutsideClusters(module, tpu_cluster, clusters, host_device); return WalkResult::advance(); From 53f24479118a9a0a358a86c2bd93654b776f0870 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Mon, 10 Aug 2020 16:17:30 -0700 Subject: [PATCH 2474/2522] [TF2XLA] [NFC] Allow using XlaCompileOnDemandOp without XlaDeviceMetadata PiperOrigin-RevId: 325910208 Change-Id: I24f6b14fa24c614b0994ee2efdd077e5ef2fe55e --- tensorflow/compiler/jit/BUILD | 5 +- tensorflow/compiler/jit/kernels/xla_ops.cc | 146 +--------------- tensorflow/compiler/jit/kernels/xla_ops.h | 56 +------ .../compiler/jit/xla_compile_on_demand_op.cc | 57 +++---- .../compiler/jit/xla_compile_on_demand_op.h | 12 +- tensorflow/compiler/jit/xla_platform_info.cc | 158 ++++++++++++++++++ tensorflow/compiler/jit/xla_platform_info.h | 108 ++++++++++++ 7 files changed, 315 insertions(+), 227 deletions(-) create mode 100644 tensorflow/compiler/jit/xla_platform_info.cc create mode 100644 tensorflow/compiler/jit/xla_platform_info.h diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index 63f985935fb..d05bb8264c3 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -195,6 +195,7 @@ XLA_DEVICE_DEPS = [ "//tensorflow/core/kernels/data:optional_ops", "//tensorflow/core/kernels/data:prefetch_dataset_op", "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/stream_executor:tf_allocator_adapter", "//tensorflow/stream_executor/platform", ] @@ -205,16 +206,18 @@ cc_library( "xla_device.cc", "xla_device_context.cc", "xla_device_ops.cc", + "xla_platform_info.cc", ], hdrs = [ "xla_compile_on_demand_op.h", "xla_device.h", "xla_device_context.h", "xla_device_ops.h", + "xla_platform_info.h", ], # Public visibility is needed for external TF/XLA backends. visibility = ["//visibility:public"], - deps = XLA_DEVICE_DEPS, + deps = XLA_DEVICE_DEPS + [":xla_compilation_cache"], ) cc_library( diff --git a/tensorflow/compiler/jit/kernels/xla_ops.cc b/tensorflow/compiler/jit/kernels/xla_ops.cc index 38e33a60657..9cee4b9af28 100644 --- a/tensorflow/compiler/jit/kernels/xla_ops.cc +++ b/tensorflow/compiler/jit/kernels/xla_ops.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/compiler/jit/flags.h" #include "tensorflow/compiler/jit/xla_activity_listener.h" #include "tensorflow/compiler/jit/xla_cluster_util.h" +#include "tensorflow/compiler/jit/xla_platform_info.h" #include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/tf2xla/tf2xla_util.h" #include "tensorflow/compiler/tf2xla/xla_compiler.h" @@ -63,38 +64,6 @@ namespace tensorflow { namespace { -XlaPlatformInfo PlatformInfoFromContext(OpKernelConstruction* ctx) { - DeviceType device_type = ctx->device_type(); - se::Platform::Id platform_id = nullptr; - const XlaDevice::Metadata* xla_device_metadata = nullptr; - se::DeviceMemoryAllocator* custom_allocator = nullptr; - - if (ctx->device_type() == DeviceType(DEVICE_CPU)) { - platform_id = se::host::kHostPlatformId; - } else if (ctx->device_type() == DeviceType(DEVICE_GPU)) { - platform_id = ctx->device() - ->tensorflow_gpu_device_info() - ->stream->parent() - ->platform() - ->id(); - } else if (XlaDevice::GetMetadata(ctx, &xla_device_metadata).ok()) { - // If we are on an XlaDevice, use the underlying XLA platform's allocator - // directly. We could use the StreamExecutor's allocator which may - // theoretically be more correct, but XLA returns a nice OOM message in a - // Status and StreamExecutor does not. - // - // Importantly we can't use ctx->device()->GetAllocator() as the allocator - // (which xla_allocator above uses) as on an XlaDevice, this is a dummy - // allocator that returns XlaTensor objects. The XlaCompiler needs a real - // allocator to allocate real buffers. - platform_id = xla_device_metadata->platform()->id(); - custom_allocator = - xla_device_metadata->client()->backend().memory_allocator(); - } - - return XlaPlatformInfo(device_type, platform_id, xla_device_metadata, - custom_allocator); -} // A closure describing how to run a compiled version of a TensorFlow function. // @@ -178,31 +147,6 @@ class XlaExecutableClosureStore { TF_DISALLOW_COPY_AND_ASSIGN(XlaExecutableClosureStore); }; -// Return allocator from platform info if non-null, or populate and return a -// pointer to the allocator adapter with allocator from context. -// -// This is necessary because for XLA devices the underlying TF allocator returns -// dummy tensors. -se::DeviceMemoryAllocator* GetAllocator( - absl::optional* tf_allocator_adapter, - OpKernelContext* ctx, const XlaPlatformInfo& platform_info) { - if (platform_info.custom_allocator()) { - return platform_info.custom_allocator(); - } - if (!ctx->op_device_context()) { - // Stream is not set for the host platform. - se::Platform* platform = - se::MultiPlatformManager::PlatformWithId(platform_info.platform_id()) - .ValueOrDie(); - tf_allocator_adapter->emplace(ctx->device()->GetAllocator({}), platform); - return &tf_allocator_adapter->value(); - } - // platform_info. - tf_allocator_adapter->emplace(ctx->device()->GetAllocator({}), - ctx->op_device_context()->stream()); - return &tf_allocator_adapter->value(); -} - } // namespace XlaLocalLaunchBase::XlaLocalLaunchBase(OpKernelConstruction* ctx, @@ -214,65 +158,9 @@ XlaLocalLaunchBase::XlaLocalLaunchBase(OpKernelConstruction* ctx, constants_(constants), resources_(resources), function_(function), - platform_info_(PlatformInfoFromContext(ctx)), + platform_info_(XlaPlatformInfoFromContext(ctx)), has_ref_vars_(has_ref_vars) {} -static Status BuildCompilationCache(OpKernelContext* ctx, - const XlaPlatformInfo& platform_info, - XlaCompilationCache** cache) { - if (platform_info.xla_device_metadata()) { - *cache = new XlaCompilationCache( - platform_info.xla_device_metadata()->client(), - platform_info.xla_device_metadata()->jit_device_type()); - return Status::OK(); - } - - auto platform = - se::MultiPlatformManager::PlatformWithId(platform_info.platform_id()); - if (!platform.ok()) { - return platform.status(); - } - - xla::StatusOr compiler_for_platform = - xla::Compiler::GetForPlatform(platform.ValueOrDie()); - if (!compiler_for_platform.ok()) { - // In some rare cases (usually in unit tests with very small clusters) we - // may end up transforming an XLA cluster with at least one GPU operation - // (which would normally force the cluster to be compiled using XLA:GPU) - // into an XLA cluster with no GPU operations (i.e. containing only CPU - // operations). Such a cluster can fail compilation (in way that - // MarkForCompilation could not have detected) if the CPU JIT is not linked - // in. - // - // So bail out of _XlaCompile in this case, and let the executor handle the - // situation for us. - const Status& status = compiler_for_platform.status(); - if (status.code() == error::NOT_FOUND) { - return errors::Unimplemented("Could not find compiler for platform ", - platform.ValueOrDie()->Name(), ": ", - status.ToString()); - } - } - - xla::LocalClientOptions client_options; - client_options.set_platform(platform.ValueOrDie()); - client_options.set_intra_op_parallelism_threads( - ctx->device()->tensorflow_cpu_worker_threads()->num_threads); - auto client = xla::ClientLibrary::GetOrCreateLocalClient(client_options); - if (!client.ok()) { - return client.status(); - } - const XlaOpRegistry::DeviceRegistration* registration; - if (!XlaOpRegistry::GetCompilationDevice(platform_info.device_type().type(), - ®istration)) { - return errors::InvalidArgument("No JIT device registered for ", - platform_info.device_type().type()); - } - *cache = new XlaCompilationCache( - client.ValueOrDie(), DeviceType(registration->compilation_device_name)); - return Status::OK(); -} - static Status CompileToLocalExecutable( OpKernelContext* ctx, const NameAttrList& function, bool has_ref_vars, const XlaPlatformInfo& platform_info, @@ -292,7 +180,7 @@ static Status CompileToLocalExecutable( TF_RETURN_IF_ERROR(rm->LookupOrCreate( rm->default_container(), "xla_cache", &cache, [&](XlaCompilationCache** cache) { - return BuildCompilationCache(ctx, platform_info, cache); + return BuildXlaCompilationCache(ctx, platform_info, cache); })); // Hold the reference to the JIT during evaluation. (We could probably // free it sooner because the ResourceMgr will retain a reference, but @@ -302,32 +190,14 @@ static Status CompileToLocalExecutable( *client = static_cast(cache->client()); absl::optional tf_allocator_adapter; - XlaCompiler::Options options; - options.client = *client; - if (ctx->op_device_context() != nullptr) { - options.device_ordinal = - ctx->op_device_context()->stream()->parent()->device_ordinal(); - } - options.device_type = cache->device_type(); - options.flib_def = ctx->function_library()->GetFunctionLibraryDefinition(); - options.graph_def_version = ctx->function_library()->graph_def_version(); - options.allow_cpu_custom_calls = - (platform_info.platform_id() == se::host::kHostPlatformId); - options.device_allocator = - GetAllocator(&tf_allocator_adapter, ctx, platform_info); - if (platform_info.xla_device_metadata()) { - options.shape_representation_fn = - platform_info.xla_device_metadata()->shape_representation_fn(); - } - // If reference variables are not present in the graph, we can safely alias - // passthrough parameters without performing a copy. - options.alias_passthrough_params = - !has_ref_vars && !platform_info.is_on_xla_device(); + XlaCompiler::Options options = GenerateCompilerOptions( + cache, ctx, platform_info, has_ref_vars, &tf_allocator_adapter); std::map constant_args; for (int i : constants) { constant_args.insert({i, ctx->input(i)}); } + XlaCompiler::CompileOptions compile_options; compile_options.is_entry_computation = true; // Optimization: where possible, have the computation return a naked array @@ -503,7 +373,7 @@ XlaCompileOp::XlaCompileOp(OpKernelConstruction* ctx) constants_(ConstantsVector(ctx)), resources_(ResourcesVector(ctx)), function_(FunctionAttr(ctx)), - platform_info_(PlatformInfoFromContext(ctx)), + platform_info_(XlaPlatformInfoFromContext(ctx)), must_compile_(MustCompileAttr(ctx)), has_ref_vars_(HasRefVars(ctx)) {} @@ -591,7 +461,7 @@ void XlaCompileOp::Compute(OpKernelContext* ctx) { } XlaRunOp::XlaRunOp(OpKernelConstruction* ctx) - : OpKernel(ctx), platform_info_(PlatformInfoFromContext(ctx)) {} + : OpKernel(ctx), platform_info_(XlaPlatformInfoFromContext(ctx)) {} void XlaRunOp::Compute(OpKernelContext* ctx) { VLOG(3) << "XlaRunOp " << def().name(); diff --git a/tensorflow/compiler/jit/kernels/xla_ops.h b/tensorflow/compiler/jit/kernels/xla_ops.h index 112408226a8..78707c8126d 100644 --- a/tensorflow/compiler/jit/kernels/xla_ops.h +++ b/tensorflow/compiler/jit/kernels/xla_ops.h @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/compiler/jit/xla_compilation_cache.h" #include "tensorflow/compiler/jit/xla_device.h" #include "tensorflow/compiler/jit/xla_launch_util.h" +#include "tensorflow/compiler/jit/xla_platform_info.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" @@ -31,61 +32,6 @@ limitations under the License. namespace tensorflow { -// Holds some information about the platform on which an -// XlaLaunch/_XlaCompile/_XlaRun op must run on. -class XlaPlatformInfo { - public: - XlaPlatformInfo() : device_type_("") {} - XlaPlatformInfo(XlaPlatformInfo&&) = default; - explicit XlaPlatformInfo(const DeviceType device_type, - se::Platform::Id platform_id, - const XlaDevice::Metadata* xla_device_metadata, - se::DeviceMemoryAllocator* device_allocator) - : device_type_(device_type), - platform_id_(platform_id), - xla_device_metadata_(xla_device_metadata), - device_allocator_(device_allocator) {} - - XlaPlatformInfo& operator=(XlaPlatformInfo&& other) = default; - - bool UseMultipleStreams() const { - return xla_device_metadata_ && xla_device_metadata_->UseMultipleStreams(); - } - - // Non-null only when run on an XLA device. - se::DeviceMemoryAllocator* custom_allocator() const { - return device_allocator_; - } - - DeviceType device_type() const { return device_type_; } - - // This is equal to xla_device_metadata()->platform()->id() if - // xla_device_metadata() is not nullptr. - se::Platform::Id platform_id() const { return platform_id_; } - - // This may be null if the op this XlaPlatformInfo is for was not placed on an - // XLA device. - const XlaDevice::Metadata* xla_device_metadata() const { - return xla_device_metadata_; - } - bool is_on_xla_device() const { return xla_device_metadata() != nullptr; } - - private: - DeviceType device_type_; - se::Platform::Id platform_id_; - - // xla_device_metadata_ lives in the tensorflow::DeviceBase in which the - // XlaLaunch/_XlaCompile/_XlaRun op is placed and thus does not die before the - // XlaLaunch/_XlaCompile/_XlaRun OpKernel. - const XlaDevice::Metadata* xla_device_metadata_; - - // If the op associated with this XlaPlatformInfo is placed on an XLA device - // then device_allocator_ is the xla::Backend's memory allocator. If the op - // is placed on a regular CPU or GPU device then device_allocator_ is null. - se::DeviceMemoryAllocator* device_allocator_; - - TF_DISALLOW_COPY_AND_ASSIGN(XlaPlatformInfo); -}; // XlaLocalLaunchBase is almost the same as XlaLocalLaunchOp. // The only difference is that it does not require arguments to follow diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc index 50813859603..d1ea9083796 100644 --- a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc +++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc @@ -20,6 +20,7 @@ limitations under the License. #include "absl/memory/memory.h" #include "tensorflow/compiler/jit/xla_device.h" #include "tensorflow/compiler/jit/xla_launch_util.h" +#include "tensorflow/compiler/jit/xla_platform_info.h" #include "tensorflow/compiler/tf2xla/const_analysis.h" #include "tensorflow/compiler/tf2xla/tf2xla_util.h" #include "tensorflow/compiler/tf2xla/xla_compiler.h" @@ -41,18 +42,19 @@ static std::vector GetResourceVariableIndices(OpKernelContext* ctx) { } Status XlaCompileOnDemandOp::Run(OpKernelContext* ctx, - const XlaDevice::Metadata& metadata, + XlaCompilationCache* cache, const XlaCompiler::CompilationResult* result, xla::LocalExecutable* executable, const ResourceVarsSnapshot& variable_args) { - xla::LocalClient* client = metadata.client(); + xla::LocalClient* client = static_cast(cache->client()); - // Builds an XLA allocator for the device. XlaComputationLaunchContext launch_context( client, client->backend().memory_allocator(), client->default_device_ordinal(), - /*allocate_xla_tensors=*/true, - /*use_multiple_streams=*/metadata.UseMultipleStreams()); + /*allocate_xla_tensors=*/platform_info_.xla_device_metadata() != nullptr, + platform_info_.xla_device_metadata() + ? platform_info_.xla_device_metadata()->UseMultipleStreams() + : false); std::map snapshot_ptrs; for (auto& p : variable_args) { @@ -70,7 +72,6 @@ Status XlaCompileOnDemandOp::Run(OpKernelContext* ctx, se::Stream* stream = ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; - TF_RET_CHECK(stream); VLOG(2) << "Executing computation: " << name(); xla::ExecutableRunOptions run_options; @@ -116,9 +117,9 @@ Status XlaCompileOnDemandOp::ShouldArgumentBeConstant( } Status XlaCompileOnDemandOp::Compile( - OpKernelContext* ctx, const XlaDevice::Metadata& metadata, - const XlaCompiler::CompilationResult** result, - ResourceVarsSnapshot* variable_args, xla::LocalExecutable** executable) { + OpKernelContext* ctx, const XlaCompiler::CompilationResult** result, + XlaCompilationCache** cache, ResourceVarsSnapshot* variable_args, + xla::LocalExecutable** executable) { std::map constant_arguments; for (int64 i = 0; i < ctx->num_inputs(); ++i) { const Tensor& device_tensor = ctx->input(i); @@ -168,24 +169,16 @@ Status XlaCompileOnDemandOp::Compile( ResourceMgr* rm = ctx->resource_manager(); CHECK(rm); - XlaCompilationCache* cache; TF_RETURN_IF_ERROR(rm->LookupOrCreate( - rm->default_container(), "xla_cache", &cache, - [&](XlaCompilationCache** cache) { - *cache = new XlaCompilationCache(metadata.client(), - metadata.jit_device_type()); - return Status::OK(); + rm->default_container(), "xla_cache", cache, + [&](XlaCompilationCache** write_into_cache) { + return BuildXlaCompilationCache(ctx, platform_info_, write_into_cache); })); - // Hold the reference to the JIT during evaluation. (We could probably - // free it sooner because the ResourceMgr will retain a reference, but - // this is more obviously correct.) - core::ScopedUnref cache_ref(cache); - XlaCompiler::Options options; - options.device_type = metadata.jit_device_type(); - options.client = metadata.client(); - options.flib_def = ctx->function_library()->GetFunctionLibraryDefinition(); - options.shape_representation_fn = metadata.shape_representation_fn(); + absl::optional tf_allocator_adapter; + XlaCompiler::Options options = + GenerateCompilerOptions(*cache, ctx, platform_info_, + /*has_ref_vars=*/true, &tf_allocator_adapter); XlaCompiler::CompileOptions compile_options; compile_options.is_entry_computation = true; @@ -206,19 +199,23 @@ Status XlaCompileOnDemandOp::Compile( constant_arguments, variable_infos, ctx, &args)); } - return cache->CompileSingleOp(options, args, ctx, compile_options, result, - executable); + return (*cache)->CompileSingleOp(options, args, ctx, compile_options, result, + executable); } void XlaCompileOnDemandOp::Compute(OpKernelContext* ctx) { const XlaCompiler::CompilationResult* result; xla::LocalExecutable* executable; - const XlaDevice::Metadata* metadata; - OP_REQUIRES_OK(ctx, XlaDevice::GetMetadata(ctx, &metadata)); ResourceVarsSnapshot variable_args; + XlaCompilationCache* cache; OP_REQUIRES_OK(ctx, - Compile(ctx, *metadata, &result, &variable_args, &executable)); - OP_REQUIRES_OK(ctx, Run(ctx, *metadata, result, executable, variable_args)); + Compile(ctx, &result, &cache, &variable_args, &executable)); + + // Hold the reference to the JIT during evaluation. (We could probably + // free it sooner because the ResourceMgr will retain a reference, but + // this is more obviously correct.) + core::ScopedUnref cache_ref(cache); + OP_REQUIRES_OK(ctx, Run(ctx, cache, result, executable, variable_args)); } } // namespace tensorflow diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.h b/tensorflow/compiler/jit/xla_compile_on_demand_op.h index cc5f2f1e42f..a3fb60febd7 100644 --- a/tensorflow/compiler/jit/xla_compile_on_demand_op.h +++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.h @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/compiler/jit/xla_device.h" #include "tensorflow/compiler/jit/xla_launch_util.h" +#include "tensorflow/compiler/jit/xla_platform_info.h" #include "tensorflow/compiler/tf2xla/xla_compiler.h" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/core/framework/function.h" @@ -35,7 +36,8 @@ namespace tensorflow { // vanilla TensorFlow op as long as the bridge supports it. class XlaCompileOnDemandOp : public OpKernel { public: - explicit XlaCompileOnDemandOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + explicit XlaCompileOnDemandOp(OpKernelConstruction* ctx) + : OpKernel(ctx), platform_info_(XlaPlatformInfoFromContext(ctx)) {} void Compute(OpKernelContext* ctx) override; private: @@ -46,14 +48,18 @@ class XlaCompileOnDemandOp : public OpKernel { Status MustArgumentBeConstant(const OpKernel* op_kernel, int64 argument_idx, FunctionLibraryRuntime* flib_runtime, bool* result); - Status Compile(OpKernelContext* ctx, const XlaDevice::Metadata& metadata, + Status Compile(OpKernelContext* ctx, const XlaCompiler::CompilationResult** result, + XlaCompilationCache** cache, ResourceVarsSnapshot* variable_args, xla::LocalExecutable** executable); - Status Run(OpKernelContext* ctx, const XlaDevice::Metadata& metadata, + + Status Run(OpKernelContext* ctx, XlaCompilationCache* cache, const XlaCompiler::CompilationResult* result, xla::LocalExecutable* executable, const ResourceVarsSnapshot& variable_args); + + const XlaPlatformInfo platform_info_; }; } // namespace tensorflow diff --git a/tensorflow/compiler/jit/xla_platform_info.cc b/tensorflow/compiler/jit/xla_platform_info.cc new file mode 100644 index 00000000000..e2a89353055 --- /dev/null +++ b/tensorflow/compiler/jit/xla_platform_info.cc @@ -0,0 +1,158 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/jit/xla_platform_info.h" + +#include "tensorflow/compiler/xla/client/client_library.h" + +namespace tensorflow { + +Status BuildXlaCompilationCache(OpKernelContext* ctx, + const XlaPlatformInfo& platform_info, + XlaCompilationCache** cache) { + if (platform_info.xla_device_metadata()) { + *cache = new XlaCompilationCache( + platform_info.xla_device_metadata()->client(), + platform_info.xla_device_metadata()->jit_device_type()); + return Status::OK(); + } + + auto platform = + se::MultiPlatformManager::PlatformWithId(platform_info.platform_id()); + if (!platform.ok()) { + return platform.status(); + } + + xla::StatusOr compiler_for_platform = + xla::Compiler::GetForPlatform(platform.ValueOrDie()); + if (!compiler_for_platform.ok()) { + // In some rare cases (usually in unit tests with very small clusters) we + // may end up transforming an XLA cluster with at least one GPU operation + // (which would normally force the cluster to be compiled using XLA:GPU) + // into an XLA cluster with no GPU operations (i.e. containing only CPU + // operations). Such a cluster can fail compilation (in way that + // MarkForCompilation could not have detected) if the CPU JIT is not linked + // in. + // + // So bail out of _XlaCompile in this case, and let the executor handle the + // situation for us. + const Status& status = compiler_for_platform.status(); + if (status.code() == error::NOT_FOUND) { + return errors::Unimplemented("Could not find compiler for platform ", + platform.ValueOrDie()->Name(), ": ", + status.ToString()); + } + } + + xla::LocalClientOptions client_options; + client_options.set_platform(platform.ValueOrDie()); + client_options.set_intra_op_parallelism_threads( + ctx->device()->tensorflow_cpu_worker_threads()->num_threads); + auto client = xla::ClientLibrary::GetOrCreateLocalClient(client_options); + if (!client.ok()) { + return client.status(); + } + const XlaOpRegistry::DeviceRegistration* registration; + if (!XlaOpRegistry::GetCompilationDevice(platform_info.device_type().type(), + ®istration)) { + return errors::InvalidArgument("No JIT device registered for ", + platform_info.device_type().type()); + } + *cache = new XlaCompilationCache( + client.ValueOrDie(), DeviceType(registration->compilation_device_name)); + return Status::OK(); +} + +XlaPlatformInfo XlaPlatformInfoFromContext(OpKernelConstruction* ctx) { + DeviceType device_type = ctx->device_type(); + se::Platform::Id platform_id = nullptr; + const XlaDevice::Metadata* xla_device_metadata = nullptr; + se::DeviceMemoryAllocator* custom_allocator = nullptr; + + if (ctx->device_type() == DeviceType(DEVICE_CPU)) { + platform_id = se::host::kHostPlatformId; + } else if (ctx->device_type() == DeviceType(DEVICE_GPU)) { + platform_id = ctx->device() + ->tensorflow_gpu_device_info() + ->stream->parent() + ->platform() + ->id(); + } else if (XlaDevice::GetMetadata(ctx, &xla_device_metadata).ok()) { + // If we are on an XlaDevice, use the underlying XLA platform's allocator + // directly. We could use the StreamExecutor's allocator which may + // theoretically be more correct, but XLA returns a nice OOM message in a + // Status and StreamExecutor does not. + // + // Importantly we can't use ctx->device()->GetAllocator() as the allocator + // (which xla_allocator above uses) as on an XlaDevice, this is a dummy + // allocator that returns XlaTensor objects. The XlaCompiler needs a real + // allocator to allocate real buffers. + platform_id = xla_device_metadata->platform()->id(); + custom_allocator = + xla_device_metadata->client()->backend().memory_allocator(); + } + + return XlaPlatformInfo(device_type, platform_id, xla_device_metadata, + custom_allocator); +} + +se::DeviceMemoryAllocator* GetAllocator( + absl::optional* tf_allocator_adapter, + OpKernelContext* ctx, const XlaPlatformInfo& platform_info) { + if (platform_info.custom_allocator()) { + return platform_info.custom_allocator(); + } + if (!ctx->op_device_context()) { + // Stream is not set for the host platform. + se::Platform* platform = + se::MultiPlatformManager::PlatformWithId(platform_info.platform_id()) + .ValueOrDie(); + tf_allocator_adapter->emplace(ctx->device()->GetAllocator({}), platform); + return &tf_allocator_adapter->value(); + } + tf_allocator_adapter->emplace(ctx->device()->GetAllocator({}), + ctx->op_device_context()->stream()); + return &tf_allocator_adapter->value(); +} + +XlaCompiler::Options GenerateCompilerOptions( + XlaCompilationCache* cache, OpKernelContext* ctx, + const XlaPlatformInfo& platform_info, bool has_ref_vars, + absl::optional* tf_allocator_adapter) { + XlaCompiler::Options options; + options.client = static_cast(cache->client()); + if (ctx->op_device_context() != nullptr) { + options.device_ordinal = + ctx->op_device_context()->stream()->parent()->device_ordinal(); + } + options.device_type = cache->device_type(); + options.flib_def = ctx->function_library()->GetFunctionLibraryDefinition(); + options.graph_def_version = ctx->function_library()->graph_def_version(); + options.allow_cpu_custom_calls = + (platform_info.platform_id() == se::host::kHostPlatformId); + options.device_allocator = + GetAllocator(tf_allocator_adapter, ctx, platform_info); + if (platform_info.xla_device_metadata()) { + options.shape_representation_fn = + platform_info.xla_device_metadata()->shape_representation_fn(); + } + // If reference variables are not present in the graph, we can safely alias + // passthrough parameters without performing a copy. + options.alias_passthrough_params = + !has_ref_vars && !platform_info.is_on_xla_device(); + return options; +} + +} // namespace tensorflow diff --git a/tensorflow/compiler/jit/xla_platform_info.h b/tensorflow/compiler/jit/xla_platform_info.h new file mode 100644 index 00000000000..dac45529ac9 --- /dev/null +++ b/tensorflow/compiler/jit/xla_platform_info.h @@ -0,0 +1,108 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_JIT_XLA_PLATFORM_INFO_H_ +#define TENSORFLOW_COMPILER_JIT_XLA_PLATFORM_INFO_H_ + +#include "tensorflow/compiler/jit/xla_compilation_cache.h" +#include "tensorflow/compiler/jit/xla_device.h" +#include "tensorflow/stream_executor/tf_allocator_adapter.h" + +namespace tensorflow { + +// Holds some information about the platform on which an +// XlaLaunch/_XlaCompile/_XlaRun op must run on. Provides a common layer of +// abstraction for normal and XLA devices. +class XlaPlatformInfo { + public: + XlaPlatformInfo() : device_type_("") {} + XlaPlatformInfo(XlaPlatformInfo&&) = default; + explicit XlaPlatformInfo(const DeviceType device_type, + se::Platform::Id platform_id, + const XlaDevice::Metadata* xla_device_metadata, + se::DeviceMemoryAllocator* device_allocator) + : device_type_(device_type), + platform_id_(platform_id), + xla_device_metadata_(xla_device_metadata), + device_allocator_(device_allocator) {} + + XlaPlatformInfo& operator=(XlaPlatformInfo&& other) = default; + + bool UseMultipleStreams() const { + return xla_device_metadata_ && xla_device_metadata_->UseMultipleStreams(); + } + + // Non-null only when run on an XLA device. + se::DeviceMemoryAllocator* custom_allocator() const { + return device_allocator_; + } + + DeviceType device_type() const { return device_type_; } + + // This is equal to xla_device_metadata()->platform()->id() if + // xla_device_metadata() is not nullptr. + se::Platform::Id platform_id() const { return platform_id_; } + + // This may be null if the op this XlaPlatformInfo is for was not placed on an + // XLA device. + const XlaDevice::Metadata* xla_device_metadata() const { + return xla_device_metadata_; + } + bool is_on_xla_device() const { return xla_device_metadata() != nullptr; } + + private: + DeviceType device_type_; + se::Platform::Id platform_id_; + + // xla_device_metadata_ lives in the tensorflow::DeviceBase in which the + // XlaLaunch/_XlaCompile/_XlaRun op is placed and thus does not die before the + // XlaLaunch/_XlaCompile/_XlaRun OpKernel. + const XlaDevice::Metadata* xla_device_metadata_; + + // If the op associated with this XlaPlatformInfo is placed on an XLA device + // then device_allocator_ is the xla::Backend's memory allocator. If the op + // is placed on a regular CPU or GPU device then device_allocator_ is null. + se::DeviceMemoryAllocator* device_allocator_; + + TF_DISALLOW_COPY_AND_ASSIGN(XlaPlatformInfo); +}; + +// Returns created XLA compilation cache. +Status BuildXlaCompilationCache(OpKernelContext* ctx, + const XlaPlatformInfo& platform_info, + XlaCompilationCache** cache); + +// Returns information about the platform from kernel context. +XlaPlatformInfo XlaPlatformInfoFromContext(OpKernelConstruction* ctx); + +// Returns allocator from platform info if non-null, or populate and return a +// pointer to the allocator adapter with allocator from context. +// +// This is necessary because for XLA devices the underlying TF allocator returns +// dummy tensors. +se::DeviceMemoryAllocator* GetAllocator( + absl::optional* tf_allocator_adapter, + OpKernelContext* ctx, const XlaPlatformInfo& platform_info); + +// Returns created options for the XLA compiler, and writes the used allocator +// into `tf_allocator_adapter`. +XlaCompiler::Options GenerateCompilerOptions( + XlaCompilationCache* cache, OpKernelContext* ctx, + const XlaPlatformInfo& platform_info, bool has_ref_vars, + absl::optional* tf_allocator_adapter); + +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_JIT_XLA_PLATFORM_INFO_H_ From dda5175d39285f53379ebb0627bdc1d575fea69a Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Mon, 10 Aug 2020 16:22:41 -0700 Subject: [PATCH 2475/2522] [TF2XLA] Simplify logic for finding constant arguments in XlaCompileOnDemandOp PiperOrigin-RevId: 325911135 Change-Id: I7664ed7d19edb5aba52b2c07443e9786761823bd --- .../compiler/jit/xla_compile_on_demand_op.cc | 42 ++++--------------- .../compiler/jit/xla_compile_on_demand_op.h | 6 --- 2 files changed, 9 insertions(+), 39 deletions(-) diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc index d1ea9083796..73c512bfa6f 100644 --- a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc +++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc @@ -95,53 +95,29 @@ Status XlaCompileOnDemandOp::Run(OpKernelContext* ctx, return Status::OK(); } -Status XlaCompileOnDemandOp::MustArgumentBeConstant( - const OpKernel* op_kernel, int64 argument_idx, - FunctionLibraryRuntime* flib_runtime, bool* result) { - *result = false; - - // TODO(jmolloy): This could be expensive, so memoize. - std::vector constant_input_indices; - TF_RETURN_IF_ERROR(GetCompileTimeConstInputs( - op_kernel, &constant_input_indices, flib_runtime)); - *result = absl::c_binary_search(constant_input_indices, argument_idx); - return Status::OK(); -} - -// TODO(ycao): Remove the need to call ShouldArgumentBeConstant. Its benefit is -// not clear yet and it causes heavy constant analysis to run twice. -Status XlaCompileOnDemandOp::ShouldArgumentBeConstant( - const OpKernel* op_kernel, int64 argument_idx, - FunctionLibraryRuntime* flib_runtime, bool* result) { - return MustArgumentBeConstant(op_kernel, argument_idx, flib_runtime, result); -} - Status XlaCompileOnDemandOp::Compile( OpKernelContext* ctx, const XlaCompiler::CompilationResult** result, XlaCompilationCache** cache, ResourceVarsSnapshot* variable_args, xla::LocalExecutable** executable) { std::map constant_arguments; + + std::vector constant_input_indices; + TF_RETURN_IF_ERROR(GetCompileTimeConstInputs( + &ctx->op_kernel(), &constant_input_indices, ctx->function_library())); + CHECK(absl::c_is_sorted(constant_input_indices)); + for (int64 i = 0; i < ctx->num_inputs(); ++i) { const Tensor& device_tensor = ctx->input(i); if (const XlaTensor* xla_tensor = XlaTensor::FromTensor(&device_tensor)) { if (xla_tensor->has_host_tensor()) { - bool should_arg_be_const; - TF_RETURN_IF_ERROR(ShouldArgumentBeConstant(&ctx->op_kernel(), i, - ctx->function_library(), - &should_arg_be_const)); - if (should_arg_be_const) { + if (absl::c_binary_search(constant_input_indices, i)) { constant_arguments[i] = xla_tensor->host_tensor(); } } } - if (constant_arguments.count(i) == 0) { - bool must_argument_be_const; - TF_RETURN_IF_ERROR(MustArgumentBeConstant(&ctx->op_kernel(), i, - ctx->function_library(), - &must_argument_be_const)); - - if (must_argument_be_const) { + if (!constant_arguments.count(i)) { + if (absl::c_binary_search(constant_input_indices, i)) { // Slow path; the argument is not available as a host constant so we // must fetch it synchronously. Tensor host_tensor; diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.h b/tensorflow/compiler/jit/xla_compile_on_demand_op.h index a3fb60febd7..095d3427d41 100644 --- a/tensorflow/compiler/jit/xla_compile_on_demand_op.h +++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.h @@ -42,12 +42,6 @@ class XlaCompileOnDemandOp : public OpKernel { private: XlaCompiler::Argument CreateCompilerArgument(OpKernelContext* ctx, int64 i); - Status ShouldArgumentBeConstant(const OpKernel* op_kernel, int64 argument_idx, - FunctionLibraryRuntime* flib_runtime, - bool* result); - Status MustArgumentBeConstant(const OpKernel* op_kernel, int64 argument_idx, - FunctionLibraryRuntime* flib_runtime, - bool* result); Status Compile(OpKernelContext* ctx, const XlaCompiler::CompilationResult** result, XlaCompilationCache** cache, From 5db822426c82d63f88e7423bd6d0e5734a70e41a Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Mon, 10 Aug 2020 16:22:44 -0700 Subject: [PATCH 2476/2522] Roll forward XLA/GPU LHLO sort emitter PiperOrigin-RevId: 325911150 Change-Id: Idf8f73f2840377592c4f2eaa439a07c1700236fd --- tensorflow/compiler/mlir/xla/hlo_utils.cc | 3 + .../non_identity_layouts.hlotxt | 2 +- .../xla/transforms/mhlo_to_lhlo_with_xla.cc | 11 +- .../xla/transforms/mhlo_to_lhlo_with_xla.h | 3 +- tensorflow/compiler/xla/service/gpu/BUILD | 10 + .../compiler/xla/service/gpu/gpu_compiler.cc | 24 +- .../xla/service/gpu/hlo_to_ir_bindings.cc | 20 +- .../xla/service/gpu/hlo_to_ir_bindings.h | 4 + .../xla/service/gpu/ir_emitter_context.h | 7 +- .../xla/service/gpu/ir_emitter_unnested.cc | 416 +++++++++++---- .../xla/service/gpu/ir_emitter_unnested.h | 82 ++- .../compiler/xla/service/gpu/tests/BUILD | 29 + .../xla/service/gpu/tests/sorting.hlo | 504 +++++++++--------- .../xla/service/gpu/tests/sorting_test.cc | 71 +++ .../compiler/xla/service/llvm_ir/llvm_util.cc | 7 +- .../compiler/xla/service/llvm_ir/llvm_util.h | 2 +- 16 files changed, 792 insertions(+), 403 deletions(-) create mode 100644 tensorflow/compiler/xla/service/gpu/tests/sorting_test.cc diff --git a/tensorflow/compiler/mlir/xla/hlo_utils.cc b/tensorflow/compiler/mlir/xla/hlo_utils.cc index cf78c81908d..18b4265d786 100644 --- a/tensorflow/compiler/mlir/xla/hlo_utils.cc +++ b/tensorflow/compiler/mlir/xla/hlo_utils.cc @@ -83,6 +83,9 @@ StatusOr> GetPermutationIfAvailable( strides[dim] = accumulated_stride; accumulated_stride *= shape.dimensions(dim); } + if (accumulated_stride == 0) { + return llvm::SmallVector{}; + } return llvm::SmallVector{ makeStridedLinearLayoutMap(strides, /*offset=*/0, builder.getContext())}; } diff --git a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/non_identity_layouts.hlotxt b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/non_identity_layouts.hlotxt index 3630d2d45e4..a83e36cff64 100644 --- a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/non_identity_layouts.hlotxt +++ b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/non_identity_layouts.hlotxt @@ -8,6 +8,6 @@ HloModule TestModule ENTRY TestComputation { x = f32[3, 2]{1,0} parameter(0) - // CHECK: "lmhlo.copy"(%{{.*}}, %{{.*}}) : (memref<3x2xf32>, memref<3x2xf32, #[[MAP]]>) -> () + // CHECK: "lmhlo.copy"(%{{.*}}, %{{.*}}) {name = "copy.1"} : (memref<3x2xf32>, memref<3x2xf32, #[[MAP]]>) -> () ROOT x.copy = f32[3, 2]{0,1} copy(x) } diff --git a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc index 832bad2dcc8..6ce91599fb1 100644 --- a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc +++ b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc @@ -34,7 +34,6 @@ limitations under the License. #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Pass/PassOptions.h" // from @llvm-project #include "mlir/Translation.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" #include "tensorflow/compiler/mlir/xla/hlo_function_importer.h" #include "tensorflow/compiler/mlir/xla/hlo_utils.h" #include "tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h" @@ -182,7 +181,10 @@ template StatusOr LhloDialectEmitter::CreateOpWithoutAttrs( HloInstruction* instr) { Location loc = getLocation(instr); - ArrayRef> attrs; + std::pair attrs[] = { + {Identifier::get("name", builder_.getContext()), + builder_.getStringAttr(instr->name())}, + }; ArrayRef rets{}; llvm::SmallVector operands; @@ -252,15 +254,14 @@ Status LhloDialectEmitter::DefaultAction(HloInstruction* instr) { return Status::OK(); } -StatusOr LhloDialectEmitter::EmitSortOp( - HloInstruction* instr) { +StatusOr LhloDialectEmitter::EmitSortOp(HloInstruction* instr) { TF_ASSIGN_OR_RETURN(auto sort, CreateOpWithoutAttrs(instr)); auto* sort_instr = ::xla::Cast<::xla::HloSortInstruction>(instr); sort.dimensionAttr(builder_.getI64IntegerAttr(sort_instr->sort_dimension())); sort.is_stableAttr(builder_.getBoolAttr(sort_instr->is_stable())); TF_RETURN_IF_ERROR(::xla::HloFunctionImporter::ImportAsRegion( *sort_instr->called_computations()[0], &sort.comparator(), &builder_)); - return sort.getOperation(); + return sort; } Status LhloDialectEmitter::HandleSort(HloInstruction* instr) { diff --git a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h index bdc977616b1..4000fa01970 100644 --- a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h +++ b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h @@ -19,6 +19,7 @@ limitations under the License. #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Module.h" // from @llvm-project #include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/hlo_module.h" @@ -41,7 +42,7 @@ class LhloDialectEmitter : public ::xla::DfsHloVisitorWithDefault { builder_(module.getContext()), i8_type_(builder_.getIntegerType(8)) {} - ::xla::StatusOr EmitSortOp(::xla::HloInstruction* instr); + ::xla::StatusOr EmitSortOp(::xla::HloInstruction* instr); private: template diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 074fbd92b27..a19f9965fc7 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -254,6 +254,11 @@ cc_library( ":target_util", ":thunk", ":thunk_emitter", + "//tensorflow/compiler/mlir/hlo:lhlo", + "//tensorflow/compiler/mlir/xla:hlo_utils", + "//tensorflow/compiler/mlir/xla:mhlo_to_lhlo_with_xla", + "//tensorflow/compiler/mlir/xla:mlir_hlo_to_hlo", + "//tensorflow/compiler/mlir/xla:type_to_shape", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", @@ -291,6 +296,8 @@ cc_library( "@com_google_absl//absl/types:span", "@llvm-project//llvm:Core", "@llvm-project//llvm:Support", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:StandardOps", ], ) @@ -1159,6 +1166,7 @@ cc_library( ":target_constants", ":tree_reduction_rewriter", ":variadic_op_splitter", + "//tensorflow/compiler/mlir/xla:mhlo_to_lhlo_with_xla", "//tensorflow/compiler/xla:protobuf_util", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", @@ -1217,6 +1225,8 @@ cc_library( "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@llvm-project//llvm:Core", + "@llvm-project//mlir:AllPassesAndDialectsNoRegistration", + "@llvm-project//mlir:IR", ], ) diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index f5bf7476059..b796737e601 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -29,6 +29,8 @@ limitations under the License. #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" +#include "mlir/IR/Module.h" // from @llvm-project +#include "mlir/InitAllDialects.h" // from @llvm-project #include "tensorflow/compiler/xla/protobuf_util.h" #include "tensorflow/compiler/xla/service/algebraic_simplifier.h" #include "tensorflow/compiler/xla/service/all_reduce_combiner.h" @@ -516,15 +518,22 @@ static Status CompileModuleToLlvmIrImpl( DumpHloModuleIfEnabled(*hlo_module, **buffer_assignment, "after_optimizations"); + mlir::registerAllDialects(); + mlir::MLIRContext mlir_context; + IrEmitterContext ir_emitter_context( hlo_module, buffer_assignment->get(), platform_name, gpu_device_info, - cuda_compute_capability, profile_index_map, llvm_module->get()); + cuda_compute_capability, profile_index_map, &mlir_context, + llvm_module->get()); HloComputation* entry_computation = hlo_module->entry_computation(); - IrEmitterUnnested ir_emitter(hlo_module->config(), entry_computation, - &ir_emitter_context); - TF_RETURN_IF_ERROR(ir_emitter.EmitConstantGlobals()); + TF_ASSIGN_OR_RETURN( + auto ir_emitter, + IrEmitterUnnested::Create(hlo_module->config(), entry_computation, + &ir_emitter_context)); + + TF_RETURN_IF_ERROR(ir_emitter->EmitConstantGlobals()); { XLA_SCOPED_LOGGING_TIMER("GpuCompiler::RunBackend - IR emission"); @@ -533,9 +542,10 @@ static Status CompileModuleToLlvmIrImpl( ThunkSequence thunk_sequence; absl::Span order = hlo_schedule->ThunkLaunchOrder(); for (HloInstruction* instruction : order) { - TF_RETURN_IF_ERROR(instruction->Visit(&ir_emitter)); - TF_RETURN_IF_ERROR(ir_emitter.Postprocess(instruction)); - std::unique_ptr thunks = ir_emitter.ConsumeThunkSequence(); + TF_RETURN_IF_ERROR(instruction->Visit(ir_emitter.get())); + TF_RETURN_IF_ERROR(ir_emitter->Postprocess(instruction)); + std::unique_ptr thunks = + ir_emitter->ConsumeThunkSequence(); // The invariants between each input HloInstruction* and output Thunk* are // not all explicitly checked, but at least we can document them here: diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc index 5d38d1b727c..332db83b6ad 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc +++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc @@ -117,11 +117,11 @@ static bool HasMeaningfulName(llvm::Value* value) { return false; } -llvm::Value* HloToIrBindings::GetTypedIrValue(const HloInstruction& hlo, - ShapeIndexView shape_index, - llvm::Value* ir_value) { - llvm::Type* pointee_type = llvm_ir::ShapeToIrType( - ShapeUtil::GetSubshape(hlo.shape(), shape_index), module_); +llvm::Value* CastToTypedValue(const Shape& shape, llvm::Value* ir_value, + llvm::IRBuilder<>* b) { + llvm::Type* pointee_type = + llvm_ir::ShapeToIrType(shape, b->GetInsertBlock()->getModule()); + llvm::Type* dest_type = pointee_type->getPointerTo(); llvm::Value* typed_ir_value; @@ -129,9 +129,17 @@ llvm::Value* HloToIrBindings::GetTypedIrValue(const HloInstruction& hlo, typed_ir_value = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( llvm::cast(ir_value), dest_type); } else { - typed_ir_value = b_->CreatePointerBitCastOrAddrSpaceCast( + typed_ir_value = b->CreatePointerBitCastOrAddrSpaceCast( ir_value, pointee_type->getPointerTo()); } + return typed_ir_value; +} + +llvm::Value* HloToIrBindings::GetTypedIrValue(const HloInstruction& hlo, + ShapeIndexView shape_index, + llvm::Value* ir_value) { + auto typed_ir_value = CastToTypedValue( + ShapeUtil::GetSubshape(hlo.shape(), shape_index), ir_value, b_); if (!HasMeaningfulName(ir_value)) { ir_value->setName(llvm_ir::IrName(&hlo, "raw")); } diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h index 5eef6727801..3813ec6c949 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h +++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h @@ -116,6 +116,10 @@ class HloToIrBindings { llvm::Value* temp_buffer_base_ = nullptr; }; +// Converts `ir_value` with type i8* to a typed LLVM Value* based on `shape`. +llvm::Value* CastToTypedValue(const Shape& shape, llvm::Value* ir_value, + llvm::IRBuilder<>* b); + } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_context.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_context.h index 9c43f80dc60..7d5a8d032e6 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_context.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_context.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_IR_EMITTER_CONTEXT_H_ #include "llvm/IR/Module.h" +#include "mlir/IR/MLIRContext.h" // from @llvm-project #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/gpu/launch_dimensions.h" #include "tensorflow/compiler/xla/service/hlo_execution_profile.h" @@ -34,13 +35,15 @@ class IrEmitterContext { const HloModule* hlo_module, const BufferAssignment* buffer_assignment, std::string platform_name, GpuDeviceInfo gpu_device_info, absl::optional cuda_compute_capability, - const HloProfileIndexMap* profile_index_map, llvm::Module* llvm_module) + const HloProfileIndexMap* profile_index_map, + mlir::MLIRContext* mlir_context, llvm::Module* llvm_module) : hlo_module_(hlo_module), buffer_assignment_(buffer_assignment), platform_name_(std::move(platform_name)), gpu_device_info_(gpu_device_info), cuda_compute_capability_(cuda_compute_capability), profile_index_map_(profile_index_map), + mlir_context_(mlir_context), llvm_module_(llvm_module) {} // Disallow copy and assign. IrEmitterContext(const IrEmitterContext&) = delete; @@ -57,6 +60,7 @@ class IrEmitterContext { return cuda_compute_capability_; } const HloProfileIndexMap* profile_index_map() { return profile_index_map_; } + mlir::MLIRContext* mlir_context() { return mlir_context_; } llvm::Module* llvm_module() { return llvm_module_; } NameUniquer* name_uniquer() { return &name_uniquer_; } @@ -67,6 +71,7 @@ class IrEmitterContext { GpuDeviceInfo gpu_device_info_; absl::optional cuda_compute_capability_; const HloProfileIndexMap* profile_index_map_; + mlir::MLIRContext* mlir_context_; llvm::Module* llvm_module_; NameUniquer name_uniquer_; }; diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index 61b78b6004d..f88c70b1a33 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -37,6 +37,13 @@ limitations under the License. #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/Function.h" // from @llvm-project +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" +#include "tensorflow/compiler/mlir/xla/hlo_utils.h" +#include "tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h" +#include "tensorflow/compiler/mlir/xla/type_to_shape.h" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/service/buffer_assignment.h" @@ -144,13 +151,86 @@ void UpdateLaunchDimensions(const LaunchDimensions& launch_dims, Thunk* thunk, llvm::ConstantAsMetadata::get(threads_per_block_ir_value)})); } +const BufferAllocation* GetAllocation( + mlir::BlockArgument func_arg, const BufferAssignment& buffer_assignment) { + auto func_op = + mlir::cast(func_arg.getParentRegion()->getParentOp()); + int64 allocation_index = func_op + .getArgAttrOfType( + func_arg.getArgNumber(), "lmhlo.alloc") + .getValue() + .getSExtValue(); + return &buffer_assignment.GetAllocation(allocation_index); +} + +StatusOr GetAllocationSliceForMlir( + mlir::Value v, const BufferAssignment& buffer_assignment) { + int64 size = v.getType().cast().getSizeInBits() / 8; + + if (auto arg = v.dyn_cast()) { + return BufferAllocation::Slice(GetAllocation(arg, buffer_assignment), 0, + size); + } + + // We match two patterns here: + // * v = ViewOp(arg); + // * v = StaticMemRefCastOp(ViewOp(arg)); + if (mlir::Operation* op = v.getDefiningOp()) { + if (auto cast = mlir::dyn_cast(op)) { + mlir::Value source = cast.getViewSource(); + op = source.getDefiningOp(); + if (!op) { + return Unimplemented("StaticMemRefCastOp has to wrap an op"); + } + } + if (auto view = mlir::dyn_cast(op)) { + return BufferAllocation::Slice( + GetAllocation(view.source().cast(), + buffer_assignment), + mlir::cast(view.byte_shift().getDefiningOp()) + .value() + .cast() + .getValue() + .getSExtValue(), + size); + } + return Unimplemented("StaticMemRefCastOp has to wrap a ViewOp"); + } + + return Unimplemented( + "Operand has to be in the form of ViewOp(arg) or " + "StaticMemRefCastOp(ViewOp(arg))"); +} + +absl::string_view GetHloName(mlir::Operation* op) { + if (auto attr = op->getAttrOfType("name")) { + auto ref = attr.getValue(); + return absl::string_view(ref.data(), ref.size()); + } + return ""; +} + } // namespace IrEmitterUnnested::IrEmitterUnnested(const HloModuleConfig& hlo_module_config, const HloComputation* hlo_computation, IrEmitterContext* ir_emitter_context) : IrEmitter(hlo_module_config, ir_emitter_context, /*is_nested=*/false), - hlo_computation_(hlo_computation) {} + hlo_computation_(hlo_computation), + mlir_scratch_module_(mlir::ModuleOp::create( + mlir::Builder(ir_emitter_context->mlir_context()).getUnknownLoc())), + lhlo_scratch_emitter_(ir_emitter_context_->buffer_assignment(), + *hlo_computation, mlir_scratch_module_.get()) {} + +StatusOr> IrEmitterUnnested::Create( + const HloModuleConfig& hlo_module_config, + const HloComputation* hlo_computation, + IrEmitterContext* ir_emitter_context) { + auto emitter = std::unique_ptr(new IrEmitterUnnested( + hlo_module_config, hlo_computation, ir_emitter_context)); + TF_RETURN_IF_ERROR(emitter->lhlo_scratch_emitter_.Initialize()); + return std::move(emitter); +} Status IrEmitterUnnested::Postprocess(HloInstruction* hlo) { bindings_.UnbindAllLocalIrValues(); @@ -158,12 +238,11 @@ Status IrEmitterUnnested::Postprocess(HloInstruction* hlo) { } llvm::Function* IrEmitterUnnested::BuildKernelPrototype( - const HloInstruction& inst, - absl::Span args) { + absl::string_view name, absl::Span args) { // Compute the kernel name. The opcode string may contain "-" which cannot be // in a PTX function name, so sanitize the name before uniquifying it. string kernel_name = ir_emitter_context_->name_uniquer()->GetUniqueName( - llvm_ir::SanitizeFunctionName(inst.name())); + llvm_ir::SanitizeFunctionName(std::string(name))); // Create the kernel and add it to the module. llvm::Module* module = ir_emitter_context_->llvm_module(); @@ -359,7 +438,8 @@ Status IrEmitterUnnested::HandleDot(HloInstruction* dot) { } Status IrEmitterUnnested::HandleConditional(HloInstruction* conditional) { - AddThunkToThunkSequence(BuildConditionalThunk(conditional)); + TF_ASSIGN_OR_RETURN(auto thunk, BuildConditionalThunk(conditional)); + AddThunkToThunkSequence(std::move(thunk)); return Status::OK(); } @@ -1038,10 +1118,13 @@ Status IrEmitterUnnested::HandleWhile(HloInstruction* xla_while) { // Build ForThunk for conformant while loops, otherwise build WhileThunk. auto config = xla_while->backend_config(); if (config.ok() && config.ValueOrDie().has_known_trip_count()) { - AddThunkToThunkSequence( + TF_ASSIGN_OR_RETURN( + auto thunk, BuildForThunk(xla_while, config.ValueOrDie().known_trip_count().n())); + AddThunkToThunkSequence(std::move(thunk)); } else { - AddThunkToThunkSequence(BuildWhileThunk(xla_while)); + TF_ASSIGN_OR_RETURN(auto thunk, BuildWhileThunk(xla_while)); + AddThunkToThunkSequence(std::move(thunk)); } return Status::OK(); } @@ -1264,39 +1347,109 @@ Status IrEmitterUnnested::HandleSelect(HloInstruction* select) { return IrEmitter::HandleSelect(select); } +StatusOr +IrEmitterUnnested::GetOrCreateSubComputationFromRegion(mlir::Region* region) { + std::unique_ptr& module = scratch_nested_computations_[region]; + if (module == nullptr) { + xla::XlaComputation xla_computation; + TF_RETURN_IF_ERROR(ConvertRegionToComputation(region, &xla_computation)); + TF_ASSIGN_OR_RETURN(auto program_shape, xla_computation.GetProgramShape()); + TF_ASSIGN_OR_RETURN( + module, HloModule::CreateFromProto(xla_computation.proto(), + HloModuleConfig(program_shape))); + } + return module->entry_computation(); +} + Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { + MlirEmitterInput result; + + TF_ASSIGN_OR_RETURN(auto sort_op, lhlo_scratch_emitter_.EmitSortOp(sort)); + result.op = sort_op; + result.name = GetHloName(sort_op); + // The name in sort op has no semantics, and it's for debug only. If the name + // doesn't exist, we should use a namer (e.g. count-based). + // TODO(timshen): use a namer instead of relying on the HloInstruction names. + if (result.name.empty()) { + result.name = sort->name(); + } + const auto& buffer_assignment = ir_emitter_context_->buffer_assignment(); + auto& slice = result.extra_slice; + TF_ASSIGN_OR_RETURN(slice.buffer_slice, + buffer_assignment.GetUniqueSlice(sort, {})); + slice.written = true; + slice.shape = sort->shape(); + + result.thunk_info = GetThunkInfo(sort); + + return EmitMlirSort(result); +} + +Status IrEmitterUnnested::EmitMlirSort(MlirEmitterInput input) { + const auto& buffer_assignment = ir_emitter_context_->buffer_assignment(); + auto sort_op = mlir::cast(input.op); + + int operand_count = sort_op.operands().size(); + std::vector operand_shapes(operand_count); + std::vector slices; + std::vector output_shapes(sort_op.output().size()); + + for (int i = 0; i < operand_count; i++) { + operand_shapes[i] = + TypeToShape(sort_op.operands()[i].getType().cast()); + } + + // Craft n + 1 slices, where the first n are output parameters, and the last + // is the on-device tuple storage. We don't need n operands because sorting + // kernels are always in-place. + for (int i = 0; i < operand_count; i++) { + output_shapes[i] = + TypeToShape(sort_op.output()[i].getType().cast()); + MlirBufferSlice slice; + TF_ASSIGN_OR_RETURN( + slice.buffer_slice, + GetAllocationSliceForMlir(sort_op.output()[i], buffer_assignment)); + slice.written = true; + slice.shape = operand_shapes[i]; + slices.push_back(slice); + } + slices.push_back(input.extra_slice); + std::vector> thunks; - Shape keys_shape = sort->operand(0)->shape(); - int64 dimension_to_sort = sort->dimensions(0); - for (int64 i = 0; i < sort->operand_count(); ++i) { - ShapeIndex shape_index = - sort->operand_count() > 1 ? ShapeIndex({i}) : ShapeIndex({}); + + Shape keys_shape = operand_shapes[0]; + int64 dimension_to_sort = sort_op.dimension().getSExtValue(); + for (int64 i = 0; i < operand_count; ++i) { // We assume that the layout of all involved operands and outputs is the // same. - TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual(keys_shape, - sort->operand(i)->shape())); - TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual( - keys_shape, ShapeUtil::GetSubshape(sort->shape(), shape_index))); + TF_RET_CHECK( + LayoutUtil::LayoutsInShapesEqual(keys_shape, operand_shapes[i])); + TF_RET_CHECK( + LayoutUtil::LayoutsInShapesEqual(keys_shape, output_shapes[i])); // If possible, we share buffers. If that is not possible, we need to copy // the values, because the emitter does the sorting in-place. - auto destination_buffer = GetAllocationSlice(*sort, shape_index); - auto source_address = GetAllocationSlice(*sort->operand(i)); + TF_ASSIGN_OR_RETURN( + auto destination_buffer, + GetAllocationSliceForMlir(sort_op.output()[i], buffer_assignment)); + TF_ASSIGN_OR_RETURN( + auto source_address, + GetAllocationSliceForMlir(sort_op.operands()[i], buffer_assignment)); if (destination_buffer != source_address) { // TODO(b/26783907): Figure out why we never seem to share buffers for // key/value sort. - VLOG(2) << sort->name() << " requires initial D2D copy for operand " << i; + VLOG(2) << input.name << " requires initial D2D copy for operand " << i; thunks.push_back(absl::make_unique( Thunk::ThunkInfo(), /*source_address=*/source_address, /*destination_buffer=*/destination_buffer, - /*mem_size=*/ShapeUtil::ByteSizeOf(sort->operand(i)->shape()))); + /*mem_size=*/ShapeUtil::ByteSizeOf(operand_shapes[i]))); } } uint64 dimension_to_sort_bound = keys_shape.dimensions(dimension_to_sort); int64 num_stages = tensorflow::Log2Ceiling(dimension_to_sort_bound); - VLOG(2) << sort->name() << " requires " << num_stages << " stages."; + VLOG(2) << input.name << " requires " << num_stages << " stages."; CHECK_GE(1ULL << num_stages, dimension_to_sort_bound); CHECK_LT(1ULL << (num_stages - 1), dimension_to_sort_bound); @@ -1360,10 +1513,10 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { // we have not enough threads, or not enough shared memory. Also it does not // give a speedup if the tile size is < 128. int64 total_shared_memory_needed = 0; - for (int64 i = 0; i < sort->operand_count(); ++i) { + for (int64 i = 0; i < operand_count; ++i) { total_shared_memory_needed += - kTileSize * ShapeUtil::ByteSizeOfPrimitiveType( - sort->operand(i)->shape().element_type()); + kTileSize * + ShapeUtil::ByteSizeOfPrimitiveType(operand_shapes[i].element_type()); } bool no_tiling = kTileSize < 128 || @@ -1376,7 +1529,7 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { "kTileSize=%d < 128, " "kThreadsPerBlock=%d > threads_per_block_limit=%d, " "total_shared_memory_needed=%d > shared_memory_per_block=%d", - sort->name(), (no_tiling ? "won't" : "will"), kTileSize, kThreadsPerBlock, + input.name, (no_tiling ? "won't" : "will"), kTileSize, kThreadsPerBlock, ir_emitter_context_->gpu_device_info().threads_per_block_limit, total_shared_memory_needed, ir_emitter_context_->gpu_device_info().shared_memory_per_block); @@ -1384,37 +1537,38 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { uint64 num_blocks = CeilOfRatio(num_iterations, kThreadsPerBlock); LaunchDimensions tiled_launch_dimensions(num_blocks, kThreadsPerBlock); VLOG(2) << absl::StreamFormat("%s launch dims: %d blocks, %d threads/block", - sort->name(), num_blocks, kThreadsPerBlock); + input.name, num_blocks, kThreadsPerBlock); + std::vector ir_arrays; auto emit_kernel = [&](absl::Span xor_masks) { VLOG(2) << absl::StreamFormat( - "%s uses kernel for xor masks [%s]", sort->name(), + "%s uses kernel for xor masks [%s]", input.name, absl::StrJoin(xor_masks, ", ", [](std::string* out, int64 xor_mask) { absl::StrAppendFormat(out, "0x%x", xor_mask); })); - thunks.push_back( - BuildKernelThunk(sort, /*implements_whole_instruction=*/false)); + thunks.push_back(BuildKernelThunkForMlir(input.name, Thunk::ThunkInfo(), + slices, &ir_arrays)); LaunchDimensions launch_dimensions = xor_masks.size() > 1 ? tiled_launch_dimensions : standard_launch_dimensions; UpdateLaunchDimensions(launch_dimensions, thunks.back().get(), ir_emitter_context_->llvm_module()); std::vector values_arrays; - values_arrays.reserve(sort->operand_count()); - for (int64 i = 0; i < sort->operand_count(); ++i) { - ShapeIndex shape_index = - sort->operand_count() > 1 ? ShapeIndex({i}) : ShapeIndex({}); - values_arrays.push_back(GetIrArray(*sort, *sort, shape_index)); + values_arrays.reserve(operand_count); + for (int64 i = 0; i < operand_count; ++i) { + values_arrays.push_back(ir_arrays[i]); } + TF_ASSIGN_OR_RETURN( + const HloComputation* comparator, + GetOrCreateSubComputationFromRegion(&sort_op.comparator())); return llvm_ir::EmitSortInPlace( - dimension_to_sort, values_arrays, IrName(sort), xor_masks, &b_, + dimension_to_sort, values_arrays, IrName(input.name), xor_masks, &b_, launch_dimensions, xor_masks.size() > 1 ? num_iterations_in_sort_dim : standard_num_iterations_in_sort_dim, kTileSize, [&](absl::Span operands, llvm::Value* output) { - return EmitCallToNestedComputation(*sort->to_apply(), operands, - output); + return EmitCallToNestedComputation(*comparator, operands, output); }); }; std::vector xor_masks; @@ -1441,17 +1595,18 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { TF_RETURN_IF_ERROR(emit_kernel(xor_masks)); } VLOG(2) << absl::StreamFormat( - "%s requires %d thunks (including any D2D copies)", sort->name(), + "%s requires %d thunks (including any D2D copies)", input.name, thunks.size()); - AddThunkToThunkSequence(absl::make_unique( - GetThunkInfo(sort), std::move(thunks))); - if (sort->operand_count() > 1) { + AddThunkToThunkSequence( + absl::make_unique(input.thunk_info, std::move(thunks))); + if (operand_count > 1) { // Emit the tuple as part of the last stage of sorting. // We are currently in the block sorted.in_bounds.after. b_.SetInsertPoint(b_.GetInsertBlock()->getTerminator()); - llvm_ir::EmitTuple(GetIrArray(*sort, *sort), - ConstructIrArrayForOutputs(*sort), &b_); + llvm_ir::EmitTuple( + ir_arrays[operand_count], + absl::MakeSpan(ir_arrays).subspan(0, ir_arrays.size() - 1), &b_); } return Status::OK(); } @@ -1589,24 +1744,6 @@ Status IrEmitterUnnested::HandleAfterAll(HloInstruction* after_all) { return Status::OK(); } -// Describes how to access a particular subshape for an HLO. For instance if -// `.hlo_index` is {1} and `.gte_index` is {3, 4} then buffer for `.instr` at -// ShapeIndex {1} (i.e. the buffer for the second tuple element of hlo) is found -// at `.buffer_slice`[3][4]. That is, `.slice` is a void***, which we -// dereference twice -- first at index 3, and then at index 4 -- to get the -// address of our buffer. -struct HloBufferSlice { - const HloInstruction* instr; - ShapeIndex hlo_index; - - // The root buffer to look at. - BufferAllocation::Slice buffer_slice; - - // Describes how to dereference starting at that buffer to get to the buffer - // in question. - ShapeIndex gte_index; -}; - // Figures out how to access the buffers for all subshapes of hlo's operands and // for hlo itself (i.e. all the buffers produced by HLO). // @@ -1715,22 +1852,22 @@ static std::vector GetHloBufferSlices( return result; } -std::unique_ptr IrEmitterUnnested::BuildKernelThunk( - const HloInstruction* inst, bool implements_whole_instruction) { - const BufferAssignment& buffer_assn = - ir_emitter_context_->buffer_assignment(); - - std::vector hlo_slices = - GetHloBufferSlices(inst, buffer_assn); +std::unique_ptr +IrEmitterUnnested::BuildKernelThunkFromBufferSlices( + absl::string_view name, Thunk::ThunkInfo thunk_info, + absl::Span slices, + std::function + bind_slice_to_ir_value) { + const auto& buffer_assn = ir_emitter_context_->buffer_assignment(); // Figure out which buffer allocations need to be passed as arguments to our - // kernel. This is simply all of the allocations referenced in hlo_slices, + // kernel. This is simply all of the allocations referenced in slices, // plus the XLA temp buffer (if we have it). We always include the temp // buffer because even if the kernel itself doesn't use it, a nested // subcomputation within the kernel (e.g. a kMap's computation) might. std::unordered_set buffers_needed; - for (const auto& hlo_buffer_slice : hlo_slices) { - buffers_needed.insert(hlo_buffer_slice.buffer_slice.allocation()); + for (auto* slice : slices) { + buffers_needed.insert(slice->buffer_slice.allocation()); } absl::optional temp_buffer; for (const BufferAllocation& alloc : buffer_assn.Allocations()) { @@ -1759,7 +1896,7 @@ std::unique_ptr IrEmitterUnnested::BuildKernelThunk( return a->index() < b->index(); }); - llvm::Function* kernel = BuildKernelPrototype(*inst, non_constant_buffers); + llvm::Function* kernel = BuildKernelPrototype(name, non_constant_buffers); // Build a map from a BufferAllocation to the corresponding argument in our // kernel. @@ -1793,24 +1930,19 @@ std::unique_ptr IrEmitterUnnested::BuildKernelThunk( // For each buffer our kernel might want to touch, bind it to a value derived // from our kernel args. - for (const auto& hlo_buffer_slice : hlo_slices) { - const HloInstruction* instr = hlo_buffer_slice.instr; - const ShapeIndex& index = hlo_buffer_slice.hlo_index; - const BufferAllocation::Slice& slice = hlo_buffer_slice.buffer_slice; - const ShapeIndex& gte_index = hlo_buffer_slice.gte_index; - - VLOG(3) << "Buffer for " << instr->ToString() << " at " << index.ToString() - << " is found in slice " << slice.ToString() << " at GTE index " - << gte_index.ToString(); + for (auto* slice : slices) { + const BufferAllocation::Slice& buffer_slice = slice->buffer_slice; + const ShapeIndex& gte_index = slice->gte_index; llvm::Value* loc; - if (slice.allocation()->is_constant()) { + if (buffer_slice.allocation()->is_constant()) { loc = ir_emitter_context_->llvm_module()->getGlobalVariable( - llvm_ir::ConstantBufferAllocationToGlobalName(*slice.allocation())); + llvm_ir::ConstantBufferAllocationToGlobalName( + *buffer_slice.allocation())); CHECK_NE(loc, nullptr); } else { - loc = InBoundsGEP(kernel_args.at(slice.allocation()), - {b_.getInt64(slice.offset())}); + loc = InBoundsGEP(kernel_args.at(buffer_slice.allocation()), + {b_.getInt64(buffer_slice.offset())}); } // If gte_index is nonempty, we have to dereference `loc` to get to the @@ -1822,7 +1954,7 @@ std::unique_ptr IrEmitterUnnested::BuildKernelThunk( loc = Load(InBoundsGEP(loc, {b_.getInt64(idx)})); } - bindings_.BindHloToIrValue(*instr, loc, index); + bind_slice_to_ir_value(slice, loc); } // Bind the temp buffer so that nested subcomputations can find it if they @@ -1834,9 +1966,66 @@ std::unique_ptr IrEmitterUnnested::BuildKernelThunk( llvm::ConstantPointerNull::get(b_.getInt8PtrTy())); } - return absl::make_unique( + return absl::make_unique(thunk_info, non_constant_buffers, + std::string(kernel->getName())); +} + +std::unique_ptr IrEmitterUnnested::BuildKernelThunk( + const HloInstruction* inst, bool implements_whole_instruction) { + std::vector hlo_slices = + GetHloBufferSlices(inst, ir_emitter_context_->buffer_assignment()); + + std::vector slice_ptrs; + slice_ptrs.reserve(hlo_slices.size()); + for (auto& slice : hlo_slices) { + slice_ptrs.push_back(&slice); + } + + return BuildKernelThunkFromBufferSlices( + inst->name(), implements_whole_instruction ? GetThunkInfo(inst) : Thunk::ThunkInfo(), - non_constant_buffers, std::string(kernel->getName())); + slice_ptrs, [this](const BufferSlice* slice, llvm::Value* value) { + const HloBufferSlice* hlo_buffer_slice = + static_cast(slice); + const HloInstruction* instr = hlo_buffer_slice->instr; + const ShapeIndex& index = hlo_buffer_slice->hlo_index; + VLOG(3) << "Buffer for " << instr->ToString() << " at " + << index.ToString() << " is found in slice " + << hlo_buffer_slice->buffer_slice.ToString() << " at GTE index " + << hlo_buffer_slice->gte_index.ToString(); + + bindings_.BindHloToIrValue(*instr, value, index); + }); +} + +std::unique_ptr IrEmitterUnnested::BuildKernelThunkForMlir( + absl::string_view name, Thunk::ThunkInfo thunk_info, + absl::Span slices, + std::vector* ir_arrays) { + absl::flat_hash_set buffers_written; + std::vector slice_ptrs; + slice_ptrs.reserve(slices.size()); + for (auto& slice : slices) { + slice_ptrs.push_back(&slice); + if (slice.written) { + buffers_written.insert(slice.buffer_slice); + } + } + + ir_arrays->clear(); + return BuildKernelThunkFromBufferSlices( + name, thunk_info, slice_ptrs, + [&](const BufferSlice* slice, llvm::Value* value) { + const auto& mlir_slice = static_cast(*slice); + + llvm_ir::IrArray ir_array( + CastToTypedValue(mlir_slice.shape, value, &b_), mlir_slice.shape); + if (!buffers_written.contains(slice->buffer_slice)) { + ir_array.MarkInvariantOverWholeProgram(&value->getContext()); + } + + ir_arrays->push_back(ir_array); + }); } StatusOr> IrEmitterUnnested::BuildInitializerThunk( @@ -2043,7 +2232,7 @@ Status CheckConditionalBuffersShareAllocation( } // namespace -std::unique_ptr IrEmitterUnnested::BuildWhileThunk( +StatusOr> IrEmitterUnnested::BuildWhileThunk( const HloInstruction* hlo) { // Check that all while-related buffers share an allocation. TF_CHECK_OK(CheckWhileBuffersShareAllocation( @@ -2051,24 +2240,26 @@ std::unique_ptr IrEmitterUnnested::BuildWhileThunk( // Generate thunk sequence for while 'condition'. HloComputation* condition = hlo->while_condition(); - IrEmitterUnnested ir_emitter_condition(hlo_module_config_, condition, - ir_emitter_context_); - TF_CHECK_OK(condition->Accept(&ir_emitter_condition)); + TF_ASSIGN_OR_RETURN(auto ir_emitter_condition, + IrEmitterUnnested::Create(hlo_module_config_, condition, + ir_emitter_context_)); + TF_RETURN_IF_ERROR(condition->Accept(ir_emitter_condition.get())); // Generate thunk sequence for while 'body'. HloComputation* body = hlo->while_body(); - IrEmitterUnnested ir_emitter_body(hlo_module_config_, body, - ir_emitter_context_); - TF_CHECK_OK(body->Accept(&ir_emitter_body)); + TF_ASSIGN_OR_RETURN( + auto ir_emitter_body, + IrEmitterUnnested::Create(hlo_module_config_, body, ir_emitter_context_)); + TF_RETURN_IF_ERROR(body->Accept(ir_emitter_body.get())); - return absl::make_unique( + return std::unique_ptr(new WhileThunk( GetThunkInfo(hlo), GetAllocationSlice(*condition->root_instruction()), // cond result - ir_emitter_condition.ConsumeThunkSequence(), - ir_emitter_body.ConsumeThunkSequence()); + ir_emitter_condition->ConsumeThunkSequence(), + ir_emitter_body->ConsumeThunkSequence())); } -std::unique_ptr IrEmitterUnnested::BuildForThunk( +StatusOr> IrEmitterUnnested::BuildForThunk( const HloInstruction* hlo, const int64 loop_limit) { // Check that all while-related buffers share an allocation. TF_CHECK_OK(CheckWhileBuffersShareAllocation( @@ -2076,15 +2267,16 @@ std::unique_ptr IrEmitterUnnested::BuildForThunk( // Generate thunk sequence for while 'body' (will be used a For loop body). HloComputation* body = hlo->while_body(); - IrEmitterUnnested ir_emitter_body(hlo_module_config_, body, - ir_emitter_context_); - TF_CHECK_OK(body->Accept(&ir_emitter_body)); + TF_ASSIGN_OR_RETURN( + auto ir_emitter_body, + IrEmitterUnnested::Create(hlo_module_config_, body, ir_emitter_context_)); + TF_RETURN_IF_ERROR(body->Accept(ir_emitter_body.get())); - return absl::make_unique(GetThunkInfo(hlo), loop_limit, - ir_emitter_body.ConsumeThunkSequence()); + return std::unique_ptr(new ForThunk( + GetThunkInfo(hlo), loop_limit, ir_emitter_body->ConsumeThunkSequence())); } -std::unique_ptr IrEmitterUnnested::BuildConditionalThunk( +StatusOr> IrEmitterUnnested::BuildConditionalThunk( const HloInstruction* hlo) { // Check that the buffers used in conditional are shared with the operands and // result appropriately. @@ -2096,15 +2288,17 @@ std::unique_ptr IrEmitterUnnested::BuildConditionalThunk( for (int j = 0; j < hlo->branch_count(); ++j) { branch_operands.emplace_back(GetAllocationSlice(*hlo->operand(j + 1))); HloComputation* branch_computation = hlo->branch_computation(j); - IrEmitterUnnested ir_emitter(hlo_module_config_, branch_computation, - ir_emitter_context_); - TF_CHECK_OK(branch_computation->Accept(&ir_emitter)); - branch_thunks.push_back(std::move(*ir_emitter.ConsumeThunkSequence())); + TF_ASSIGN_OR_RETURN( + auto ir_emitter, + IrEmitterUnnested::Create(hlo_module_config_, branch_computation, + ir_emitter_context_)); + TF_CHECK_OK(branch_computation->Accept(ir_emitter.get())); + branch_thunks.push_back(std::move(*ir_emitter->ConsumeThunkSequence())); } - return absl::make_unique( + return std::unique_ptr(new ConditionalThunk( GetThunkInfo(hlo), GetAllocationSlice(*hlo->operand(0)), branch_operands, - std::move(branch_thunks)); + std::move(branch_thunks))); } Status IrEmitterUnnested::EmitTargetElementLoopInThunk( diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h index 019fcdf21db..b9146dd8fae 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_IR_EMITTER_UNNESTED_H_ #include "absl/container/inlined_vector.h" +#include "tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h" #include "tensorflow/compiler/xla/service/gpu/ir_emitter.h" #include "tensorflow/compiler/xla/service/gpu/kernel_mapping_scheme.h" #include "tensorflow/compiler/xla/service/gpu/sequential_thunk.h" @@ -28,6 +29,40 @@ limitations under the License. namespace xla { namespace gpu { +struct BufferSlice { + // The root buffer to look at. + BufferAllocation::Slice buffer_slice; + + // Describes how to dereference starting at that buffer to get to the buffer + // in question. + ShapeIndex gte_index; +}; + +// Describes how to access a particular subshape for an HLO. For instance if +// `.hlo_index` is {1} and `.gte_index` is {3, 4} then buffer for `.instr` at +// ShapeIndex {1} (i.e. the buffer for the second tuple element of hlo) is +// found at `.buffer_slice`[3][4]. That is, `.slice` is a void***, which we +// dereference twice -- first at index 3, and then at index 4 -- to get the +// address of our buffer. +struct HloBufferSlice : public BufferSlice { + const HloInstruction* instr; + ShapeIndex hlo_index; +}; + +struct MlirBufferSlice : public BufferSlice { + // The buffer is modified by the kernel. + bool written; + + Shape shape; +}; + +struct MlirEmitterInput { + mlir::Operation* op; + absl::string_view name; + Thunk::ThunkInfo thunk_info; + MlirBufferSlice extra_slice; +}; + // Emits LLVM IR for an "unnested computation". // // An unnested computation is an HloComputation which you run by executing one @@ -89,12 +124,14 @@ class IrEmitterUnnested : public IrEmitter, const string& loop_name, llvm::Value* tile_height, llvm::Value* tile_width, KernelSupportLibrary* ksl)>; - IrEmitterUnnested(const HloModuleConfig& hlo_module_config, - const HloComputation* hlo_computation, - IrEmitterContext* ir_emitter_context); IrEmitterUnnested(const IrEmitterUnnested&) = delete; IrEmitterUnnested& operator=(const IrEmitterUnnested&) = delete; + static StatusOr> Create( + const HloModuleConfig& hlo_module_config, + const HloComputation* hlo_computation, + IrEmitterContext* ir_emitter_context); + // Transfers the ownship of thunk_sequence_ out. std::unique_ptr ConsumeThunkSequence() { return std::make_unique(std::move(thunk_sequence_)); @@ -124,6 +161,7 @@ class IrEmitterUnnested : public IrEmitter, Status HandleScatter(HloInstruction* scatter) override; Status HandleSelect(HloInstruction* select) override; Status HandleSort(HloInstruction* sort) override; + Status EmitMlirSort(MlirEmitterInput input); Status HandleTriangularSolve(HloInstruction* hlo) override; Status HandleTupleSelect(HloInstruction* tuple_select) override; Status HandleAllReduce(HloInstruction* crs) override; @@ -148,6 +186,10 @@ class IrEmitterUnnested : public IrEmitter, Status Postprocess(HloInstruction* hlo) override; private: + IrEmitterUnnested(const HloModuleConfig& hlo_module_config, + const HloComputation* hlo_computation, + IrEmitterContext* ir_emitter_context); + // Add a owning Thunk object to the thunk sequence. void AddThunkToThunkSequence(std::unique_ptr thunk) override { thunk_sequence_.emplace_back(std::move(thunk)); @@ -264,8 +306,7 @@ class IrEmitterUnnested : public IrEmitter, // Builds the prototype of the IR kernel for `inst` and adds it to the module. // This kernel takes as arguments pointers to the given buffer allocations. llvm::Function* BuildKernelPrototype( - const HloInstruction& inst, - absl::Span args); + absl::string_view name, absl::Span args); // Helper for writing extra outputs from inside a reduce kernel. Status EmitExtraOutputsForReduce( @@ -490,6 +531,12 @@ class IrEmitterUnnested : public IrEmitter, HloComputation* reducer, llvm::Type* element_type, llvm::Value* partial_result_address); + std::unique_ptr BuildKernelThunkFromBufferSlices( + absl::string_view name, Thunk::ThunkInfo thunk_info, + absl::Span slices, + std::function + bind_slice_to_ir_value); + // Returns a KernelThunk that invokes the kernel emitted for `inst`. The // caller needs to make sure `inst` outlives the lifetime of the returned // Thunk object. 'implements_whole_instruction' specifies whether this @@ -498,6 +545,11 @@ class IrEmitterUnnested : public IrEmitter, std::unique_ptr BuildKernelThunk( const HloInstruction* inst, bool implements_whole_instruction); + std::unique_ptr BuildKernelThunkForMlir( + absl::string_view name, Thunk::ThunkInfo thunk_info, + absl::Span slices, + std::vector* ir_arrays); + // Returns a thunk that, given a reduce or select-and-scatter op, // initializes its memory to the appropriate initial value. StatusOr> BuildInitializerThunk( @@ -505,17 +557,18 @@ class IrEmitterUnnested : public IrEmitter, // Returns a WhileThunk that invokes thunk sequences for 'condition' and // 'body' sub-computations of while instruction 'hlo'. - std::unique_ptr BuildWhileThunk(const HloInstruction* hlo); + StatusOr> BuildWhileThunk(const HloInstruction* hlo); // Returns a ForThunk which executes 'loop_limit' invocations of a thunk // sequence from the 'body' sub-computation of the while instruction 'hlo'. - std::unique_ptr BuildForThunk(const HloInstruction* hlo, - const int64 loop_limit); + StatusOr> BuildForThunk(const HloInstruction* hlo, + const int64 loop_limit); // Returns a ConditionalThunk which executes the thunk sequence for the // 'branch_computation' corresponding to the predicate/branch_index of the // given conditional instruction. - std::unique_ptr BuildConditionalThunk(const HloInstruction* hlo); + StatusOr> BuildConditionalThunk( + const HloInstruction* hlo); // Emits current thread id with the given type. // @@ -545,6 +598,9 @@ class IrEmitterUnnested : public IrEmitter, absl::optional thread_id_filter = absl::nullopt, absl::optional block_id_filter = absl::nullopt); + StatusOr GetOrCreateSubComputationFromRegion( + mlir::Region* region); + // Returns the last generated thunk. Thunk* LastThunk() const { return thunk_sequence_.back().get(); } @@ -555,6 +611,14 @@ class IrEmitterUnnested : public IrEmitter, // The HloComputation that this IrEmitter emits code for. const HloComputation* hlo_computation_; + + mlir::OwningModuleRef mlir_scratch_module_; + + // This is for cache-purpose only. It has no significant semantics. + mlir::LhloDialectEmitter lhlo_scratch_emitter_; + + absl::flat_hash_map> + scratch_nested_computations_; }; } // namespace gpu diff --git a/tensorflow/compiler/xla/service/gpu/tests/BUILD b/tensorflow/compiler/xla/service/gpu/tests/BUILD index a2bddd2d0d7..809b277317f 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/BUILD +++ b/tensorflow/compiler/xla/service/gpu/tests/BUILD @@ -458,6 +458,35 @@ xla_test( ], ) +tf_cc_test( + name = "sorting_test", + srcs = [ + "sorting_test.cc", + ], + tags = tf_cuda_tests_tags() + [ + "no_rocm", + ], + deps = [ + ":gpu_codegen_test", + "//tensorflow/compiler/xla:debug_options_flags", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:xla_proto_cc", + "//tensorflow/compiler/xla/service:gpu_plugin", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_module_config", + "//tensorflow/compiler/xla/service:hlo_parser", + "//tensorflow/compiler/xla/service/gpu:gpu_executable", + "//tensorflow/compiler/xla/tests:filecheck", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:llvm_irgen_test_base", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/stream_executor/lib", + "@com_google_absl//absl/memory", + ], +) + tf_cc_binary( name = "hlo_to_llvm_ir", srcs = ["hlo_to_llvm_ir.cc"], diff --git a/tensorflow/compiler/xla/service/gpu/tests/sorting.hlo b/tensorflow/compiler/xla/service/gpu/tests/sorting.hlo index 272c9a25769..4d29a8df116 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/sorting.hlo +++ b/tensorflow/compiler/xla/service/gpu/tests/sorting.hlo @@ -8,162 +8,162 @@ compare { ROOT lt = pred[] compare(p.0.lhs, p.0.rhs), direction=LT } -// CHECK: define void @sort(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC1:%.*]]) +// CHECK: define void @sort(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]]) // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 -// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x [3 x float]]* -// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1]], i64 0 -// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 -// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP4]] to i64 +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP5]] to i64 +// CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP6]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 -// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = urem i64 [[TMP7]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP10]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 1 -// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], [[TMP8]] -// CHECK-NEXT: [[TMP10:%.*]] = icmp slt i64 [[TMP8]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] -// CHECK-NEXT: br i1 [[TMP11]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 2 +// CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 1 +// CHECK-NEXT: [[TMP13:%.*]] = icmp slt i64 [[TMP11]], [[TMP12]] +// CHECK-NEXT: [[TMP14:%.*]] = icmp slt i64 [[TMP12]], 3 +// CHECK-NEXT: [[TMP15:%.*]] = and i1 [[TMP13]], [[TMP14]] +// CHECK-NEXT: br i1 [[TMP15]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP8]] -// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: call void @compare(float* [[TMP12]], float* [[TMP13]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP14:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP14]], 0 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP12]] +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] +// CHECK-NEXT: call void @region_0_4(float* [[TMP16]], float* [[TMP17]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP18:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP18]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[TMP12]], align 4 -// CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[TMP13]], align 4 -// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP8]] -// CHECK-NEXT: store float [[TMP16]], float* [[TMP18]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = load float, float* [[TMP16]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[TMP17]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] +// CHECK-NEXT: store float [[TMP19]], float* [[TMP21]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP12]] +// CHECK-NEXT: store float [[TMP20]], float* [[TMP22]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] -// CHECK: define internal void @compare(float* dereferenceable(4) [[P_0_LHS_TYPED:%.*]], float* dereferenceable(4) [[P_0_RHS_TYPED:%.*]], i8* dereferenceable(1) [[OUTPUT_ARG:%.*]]) +// CHECK: define internal void @region_0_4(float* dereferenceable(4) [[P_0_LHS_TYPED:%.*]], float* dereferenceable(4) [[P_0_RHS_TYPED:%.*]], i8* dereferenceable(1) [[OUTPUT_ARG:%.*]]) // CHECK-NEXT: entry: -// CHECK-NEXT: [[LT_TYPED:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[P_0_LHS_TYPED]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[P_0_RHS_TYPED]], align 4 +// CHECK-NEXT: [[COMPARE_3_TYPED:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARG_0_1_TYPED:%.*]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARG_1_2_TYPED:%.*]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = fcmp olt float [[TMP0]], [[TMP1]] // CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i8 -// CHECK-NEXT: store i8 [[TMP3]], i8* [[LT_TYPED]], align 1 -// CHECK-NEXT: [[LOAD_RET_VALUE:%.*]] = load i8, i8* [[LT_TYPED]], align 1 -// CHECK-NEXT: store i8 [[LOAD_RET_VALUE]], i8* [[OUTPUT_ARG]], align 1 +// CHECK-NEXT: store i8 [[TMP3]], i8* [[COMPARE_3_TYPED]], align 1 +// CHECK-NEXT: [[LOAD_RET_VALUE:%.*]] = load i8, i8* [[COMPARE_3_TYPED]], align 1 +// CHECK-NEXT: store i8 [[LOAD_RET_VALUE]], i8* [[OUTPUT_ARG:%.*]], align 1 // CHECK-NEXT: ret void -// CHECK: define void @sort__1(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC1:%.*]]) { +// CHECK: define void @sort__1(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]]) { // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 -// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x [3 x float]]* -// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1]], i64 0 -// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 -// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP4]] to i64 +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP5]] to i64 +// CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP6]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 -// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = urem i64 [[TMP7]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP10]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP4]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = icmp slt i64 [[TMP4]], [[TMP7]] -// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP8]], [[TMP9]] -// CHECK-NEXT: br i1 [[TMP10]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP8]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = icmp slt i64 [[TMP8]], [[TMP11]] +// CHECK-NEXT: [[TMP13:%.*]] = icmp slt i64 [[TMP11]], 3 +// CHECK-NEXT: [[TMP14:%.*]] = and i1 [[TMP12]], [[TMP13]] +// CHECK-NEXT: br i1 [[TMP14]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP4]] -// CHECK-NEXT: call void @compare(float* [[TMP11]], float* [[TMP12]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP13]], 0 +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP8]] +// CHECK-NEXT: call void @region_0_4(float* [[TMP15]], float* [[TMP16]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP17:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP17]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[TMP11]], align 4 -// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[TMP12]], align 4 -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP4]] -// CHECK-NEXT: store float [[TMP14]], float* [[TMP16]], align 4 -// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = load float, float* [[TMP15]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = load float, float* [[TMP16]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP8]] +// CHECK-NEXT: store float [[TMP18]], float* [[TMP20]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] +// CHECK-NEXT: store float [[TMP19]], float* [[TMP21]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] -// CHECK: define void @sort__2(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC1:%.*]]) { +// CHECK: define void @sort__2(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]]) { // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 -// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x [3 x float]]* -// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 -// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 -// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP4]] to i64 +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP5]] to i64 +// CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP6]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 -// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = urem i64 [[TMP7]], 2 +// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP10]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 1 -// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], [[TMP8]] -// CHECK-NEXT: [[TMP10:%.*]] = icmp slt i64 [[TMP8]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] -// CHECK-NEXT: br i1 [[TMP11]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 2 +// CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 1 +// CHECK-NEXT: [[TMP13:%.*]] = icmp slt i64 [[TMP11]], [[TMP12]] +// CHECK-NEXT: [[TMP14:%.*]] = icmp slt i64 [[TMP12]], 3 +// CHECK-NEXT: [[TMP15:%.*]] = and i1 [[TMP13]], [[TMP14]] +// CHECK-NEXT: br i1 [[TMP15]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP8]] -// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: call void @compare(float* [[TMP12]], float* [[TMP13]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP14:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP14]], 0 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP12]] +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] +// CHECK-NEXT: call void @region_0_4(float* [[TMP16]], float* [[TMP17]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP18:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP18]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[TMP12]], align 4 -// CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[TMP13]], align 4 -// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP8]] -// CHECK-NEXT: store float [[TMP16]], float* [[TMP18]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = load float, float* [[TMP16]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[TMP17]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] +// CHECK-NEXT: store float [[TMP19]], float* [[TMP21]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP12]] +// CHECK-NEXT: store float [[TMP20]], float* [[TMP22]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] ENTRY main { x = f32[2, 3] parameter(0) @@ -182,210 +182,198 @@ compare { ROOT lt = pred[] compare(p.1.lhs, p.1.rhs), direction=LT } -// CHECK: define void @sort(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC2:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC3:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) +// CHECK: define void @sort(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4]], i64 0 -// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x i8*]* -// CHECK-NEXT: [[SORT_RAW1:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 -// CHECK-NEXT: [[SORT_TYPED2:%.*]] = bitcast i8* [[SORT_RAW1]] to [2 x [3 x i32]]* -// CHECK-NEXT: [[SORT_RAW3:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1]], i64 0 -// CHECK-NEXT: [[SORT_TYPED4:%.*]] = bitcast i8* [[SORT_RAW3]] to [2 x [3 x float]]* -// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC2]], i64 0 -// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x i32]]* -// CHECK-NEXT: [[Y_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC3]], i64 0 -// CHECK-NEXT: [[Y_TYPED:%.*]] = bitcast i8* [[Y_RAW]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 -// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 +// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-NEXT: [[TMP8:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP8]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 -// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP10:%.*]] = urem i64 [[TMP9]], 2 +// CHECK-NEXT: [[TMP11:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP12:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP12]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 2 -// CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 1 -// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], [[TMP8]] -// CHECK-NEXT: [[TMP10:%.*]] = icmp slt i64 [[TMP8]], 3 -// CHECK-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] -// CHECK-NEXT: br i1 [[TMP11]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP10]], 2 +// CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 1 +// CHECK-NEXT: [[TMP15:%.*]] = icmp slt i64 [[TMP13]], [[TMP14]] +// CHECK-NEXT: [[TMP16:%.*]] = icmp slt i64 [[TMP14]], 3 +// CHECK-NEXT: [[TMP17:%.*]] = and i1 [[TMP15]], [[TMP16]] +// CHECK-NEXT: br i1 [[TMP17]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP8]] -// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP8]] -// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: call void @compare(i32* [[TMP12]], i32* [[TMP13]], float* [[TMP14]], float* [[TMP15]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP16:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP16]], 0 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP14]] +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP13]] +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP14]] +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP13]] +// CHECK-NEXT: call void @region_0_6(i32* [[TMP18]], i32* [[TMP19]], float* [[TMP20]], float* [[TMP21]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP22:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP22]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP19]], align 4 -// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP8]] -// CHECK-NEXT: store i32 [[TMP18]], i32* [[TMP20]], align 4 -// CHECK-NEXT: [[TMP21:%.*]] = load float, float* [[TMP14]], align 4 -// CHECK-NEXT: [[TMP22:%.*]] = load float, float* [[TMP15]], align 4 -// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: store float [[TMP21]], float* [[TMP23]], align 4 -// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP8]] -// CHECK-NEXT: store float [[TMP22]], float* [[TMP24]], align 4 +// CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP13]] +// CHECK-NEXT: store i32 [[TMP23]], i32* [[TMP25]], align 4 +// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP14]] +// CHECK-NEXT: store i32 [[TMP24]], i32* [[TMP26]], align 4 +// CHECK-NEXT: [[TMP27:%.*]] = load float, float* [[TMP20]], align 4 +// CHECK-NEXT: [[TMP28:%.*]] = load float, float* [[TMP21]], align 4 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP13]] +// CHECK-NEXT: store float [[TMP27]], float* [[TMP29]], align 4 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP14]] +// CHECK-NEXT: store float [[TMP28]], float* [[TMP30]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] -// CHECK: define internal void @compare(i32* dereferenceable(4) [[P_0_LHS_TYPED:%.*]], i32* dereferenceable(4) [[P_0_RHS_TYPED:%.*]], float* dereferenceable(4) [[P_1_LHS_TYPED:%.*]], float* dereferenceable(4) [[P_1_RHS_TYPED:%.*]], i8* dereferenceable(1) [[OUTPUT_ARG:%.*]]) +// CHECK: define internal void @region_0_6(i32* dereferenceable(4) [[P_0_LHS_TYPED:%.*]], i32* dereferenceable(4) [[P_0_RHS_TYPED:%.*]], float* dereferenceable(4) [[P_1_LHS_TYPED:%.*]], float* dereferenceable(4) [[P_1_RHS_TYPED:%.*]], i8* dereferenceable(1) [[OUTPUT_ARG:%.*]]) // CHECK-NEXT: entry: -// CHECK-NEXT: [[LT_TYPED:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[P_1_LHS_TYPED]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[P_1_RHS_TYPED]], align 4 +// CHECK-NEXT: [[COMPARE_5_TYPED:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARG_2_3_TYPED:%.*]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARG_3_4_TYPED:%.*]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = fcmp olt float [[TMP0]], [[TMP1]] // CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i8 -// CHECK-NEXT: store i8 [[TMP3]], i8* [[LT_TYPED]], align 1 -// CHECK-NEXT: [[LOAD_RET_VALUE:%.*]] = load i8, i8* [[LT_TYPED]], align 1 -// CHECK-NEXT: store i8 [[LOAD_RET_VALUE]], i8* [[OUTPUT_ARG]], align 1 +// CHECK-NEXT: store i8 [[TMP3]], i8* [[COMPARE_5_TYPED]], align 1 +// CHECK-NEXT: [[LOAD_RET_VALUE:%.*]] = load i8, i8* [[COMPARE_5_TYPED]], align 1 +// CHECK-NEXT: store i8 [[LOAD_RET_VALUE]], i8* [[OUTPUT_ARG:%.*]], align 1 // CHECK-NEXT: ret void -// CHECK: define void @sort__1(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC2:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC3:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) +// CHECK: define void @sort__1(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 -// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x i8*]* -// CHECK-NEXT: [[SORT_RAW1:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 -// CHECK-NEXT: [[SORT_TYPED2:%.*]] = bitcast i8* [[SORT_RAW1]] to [2 x [3 x i32]]* -// CHECK-NEXT: [[SORT_RAW3:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 -// CHECK-NEXT: [[SORT_TYPED4:%.*]] = bitcast i8* [[SORT_RAW3]] to [2 x [3 x float]]* -// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC2:%.*]], i64 0 -// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x i32]]* -// CHECK-NEXT: [[Y_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC3:%.*]], i64 0 -// CHECK-NEXT: [[Y_TYPED:%.*]] = bitcast i8* [[Y_RAW]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 -// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 +// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-NEXT: [[TMP8:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP8]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 -// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP10:%.*]] = urem i64 [[TMP9]], 2 +// CHECK-NEXT: [[TMP11:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP12:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP12]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP4]], 3 -// CHECK-NEXT: [[TMP8:%.*]] = icmp slt i64 [[TMP4]], [[TMP7]] -// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], 3 -// CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP8]], [[TMP9]] -// CHECK-NEXT: br i1 [[TMP10]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP10]], 3 +// CHECK-NEXT: [[TMP14:%.*]] = icmp slt i64 [[TMP10]], [[TMP13]] +// CHECK-NEXT: [[TMP15:%.*]] = icmp slt i64 [[TMP13]], 3 +// CHECK-NEXT: [[TMP16:%.*]] = and i1 [[TMP14]], [[TMP15]] +// CHECK-NEXT: br i1 [[TMP16]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP4]] -// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP4]] -// CHECK-NEXT: call void @compare(i32* [[TMP11]], i32* [[TMP12]], float* [[TMP13]], float* [[TMP14]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP15:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP15]], 0 +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP13]] +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP10]] +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP13]] +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP10]] +// CHECK-NEXT: call void @region_0_6(i32* [[TMP17]], i32* [[TMP18]], float* [[TMP19]], float* [[TMP20]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP21:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP21]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP4]] -// CHECK-NEXT: store i32 [[TMP16]], i32* [[TMP18]], align 4 -// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP19]], align 4 -// CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[TMP13]], align 4 -// CHECK-NEXT: [[TMP21:%.*]] = load float, float* [[TMP14]], align 4 -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP4]] -// CHECK-NEXT: store float [[TMP20]], float* [[TMP22]], align 4 -// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP7]] -// CHECK-NEXT: store float [[TMP21]], float* [[TMP23]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP10]] +// CHECK-NEXT: store i32 [[TMP22]], i32* [[TMP24]], align 4 +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP13]] +// CHECK-NEXT: store i32 [[TMP23]], i32* [[TMP25]], align 4 +// CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[TMP19]], align 4 +// CHECK-NEXT: [[TMP27:%.*]] = load float, float* [[TMP20]], align 4 +// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP10]] +// CHECK-NEXT: store float [[TMP26]], float* [[TMP28]], align 4 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP13]] +// CHECK-NEXT: store float [[TMP27]], float* [[TMP29]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] -// CHECK: define void @sort__2(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC2:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC3:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) +// CHECK: define void @sort__2(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 -// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x i8*]* -// CHECK-NEXT: [[SORT_RAW1:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 -// CHECK-NEXT: [[SORT_TYPED2:%.*]] = bitcast i8* [[SORT_RAW1]] to [2 x [3 x i32]]* -// CHECK-NEXT: [[SORT_RAW3:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 -// CHECK-NEXT: [[SORT_TYPED4:%.*]] = bitcast i8* [[SORT_RAW3]] to [2 x [3 x float]]* -// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC2:%.*]], i64 0 -// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x i32]]* -// CHECK-NEXT: [[Y_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC3:%.*]], i64 0 -// CHECK-NEXT: [[Y_TYPED:%.*]] = bitcast i8* [[Y_RAW]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 -// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 +// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP6]] to i64 +// CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-NEXT: [[TMP8:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP8]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 -// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP10:%.*]] = urem i64 [[TMP9]], 2 +// CHECK-NEXT: [[TMP11:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP12:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP12]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: -// CHECK-NEXT: [[TMP7:%.*]] = bitcast [2 x [3 x i32]]* [[SORT_TYPED2]] to i8* -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[SORT_TYPED]], i64 0, i64 0 -// CHECK-NEXT: store i8* [[TMP7]], i8** [[TMP8]], align 8 -// CHECK-NEXT: [[TMP9:%.*]] = bitcast [2 x [3 x float]]* [[SORT_TYPED4]] to i8* -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[SORT_TYPED]], i64 0, i64 1 -// CHECK-NEXT: store i8* [[TMP9]], i8** [[TMP10]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x [3 x i32]]* [[TMP1]] to i8* +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK-NEXT: store i8* [[TMP13]], i8** [[TMP14]], align 8 +// CHECK-NEXT: [[TMP15:%.*]] = bitcast [2 x [3 x float]]* [[TMP3]] to i8* +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 +// CHECK-NEXT: store i8* [[TMP15]], i8** [[TMP16]], align 8 // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP4]], 2 -// CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 1 -// CHECK-NEXT: [[TMP13:%.*]] = icmp slt i64 [[TMP11]], [[TMP12]] -// CHECK-NEXT: [[TMP14:%.*]] = icmp slt i64 [[TMP12]], 3 -// CHECK-NEXT: [[TMP15:%.*]] = and i1 [[TMP13]], [[TMP14]] -// CHECK-NEXT: br i1 [[TMP15]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP10]], 2 +// CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP17]], 1 +// CHECK-NEXT: [[TMP19:%.*]] = icmp slt i64 [[TMP17]], [[TMP18]] +// CHECK-NEXT: [[TMP20:%.*]] = icmp slt i64 [[TMP18]], 3 +// CHECK-NEXT: [[TMP21:%.*]] = and i1 [[TMP19]], [[TMP20]] +// CHECK-NEXT: br i1 [[TMP21]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP12]] -// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP11]] -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP12]] -// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP11]] -// CHECK-NEXT: call void @compare(i32* [[TMP16]], i32* [[TMP17]], float* [[TMP18]], float* [[TMP19]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP20:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP20]], 0 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP18]] +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP17]] +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP18]] +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP17]] +// CHECK-NEXT: call void @region_0_6(i32* [[TMP22]], i32* [[TMP23]], float* [[TMP24]], float* [[TMP25]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP26:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP26]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP16]], align 4 -// CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP11]] -// CHECK-NEXT: store i32 [[TMP21]], i32* [[TMP23]], align 4 -// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP12]] -// CHECK-NEXT: store i32 [[TMP22]], i32* [[TMP24]], align 4 -// CHECK-NEXT: [[TMP25:%.*]] = load float, float* [[TMP18]], align 4 -// CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[TMP19]], align 4 -// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP11]] -// CHECK-NEXT: store float [[TMP25]], float* [[TMP27]], align 4 -// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP12]] -// CHECK-NEXT: store float [[TMP26]], float* [[TMP28]], align 4 +// CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP17]] +// CHECK-NEXT: store i32 [[TMP27]], i32* [[TMP29]], align 4 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP18]] +// CHECK-NEXT: store i32 [[TMP28]], i32* [[TMP30]], align 4 +// CHECK-NEXT: [[TMP31:%.*]] = load float, float* [[TMP24]], align 4 +// CHECK-NEXT: [[TMP32:%.*]] = load float, float* [[TMP25]], align 4 +// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP17]] +// CHECK-NEXT: store float [[TMP31]], float* [[TMP33]], align 4 +// CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP18]] +// CHECK-NEXT: store float [[TMP32]], float* [[TMP34]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] ENTRY main { x = s32[2, 3] parameter(0) diff --git a/tensorflow/compiler/xla/service/gpu/tests/sorting_test.cc b/tensorflow/compiler/xla/service/gpu/tests/sorting_test.cc new file mode 100644 index 00000000000..197a0c6cfeb --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/tests/sorting_test.cc @@ -0,0 +1,71 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/compiler/xla/service/gpu/gpu_executable.h" +#include "tensorflow/compiler/xla/service/gpu/tests/gpu_codegen_test.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module_config.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/tests/filecheck.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/xla.pb.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/stream_executor/lib/statusor.h" + +namespace xla { +namespace gpu { + +namespace { + +class SortingTest : public GpuCodegenTest { + protected: + HloModuleConfig ConfigWithoutLayoutAssignment() { + HloModuleConfig config; + auto debug_options = HloTestBase::GetDebugOptionsForTest(); + // Disable layout_assignment to use the preassigned layouts. + debug_options.add_xla_disable_hlo_passes("layout-assignment"); + config.set_debug_options(debug_options); + return config; + } +}; + +TEST_F(SortingTest, Regression1) { + const char* hlo_text = R"( +HloModule TestModule + +compare { + p.0.lhs = f32[] parameter(0) + p.0.rhs = f32[] parameter(1) + ROOT lt = pred[] compare(p.0.lhs, p.0.rhs), direction=LT +} + +ENTRY TestComputation { + x = f32[3, 2]{1, 0} parameter(0) + x.copy = f32[3, 2]{0, 1} copy(x) + ROOT sort = f32[3, 2]{0, 1} sort(x.copy), dimensions={1}, to_apply=compare +} + +)"; + + EXPECT_TRUE(RunAndCompareNoHloPasses(hlo_text, ErrorSpec{1e-5, 1e-5})); +} + +} // namespace +} // namespace gpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc index b01ae2efe43..2963d546380 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc @@ -415,9 +415,10 @@ llvm::Instruction* AddRangeMetadata(int64 lower, int64 upper, return inst; } -string IrName(string a) { - a.erase(std::remove(a.begin(), a.end(), '%'), a.end()); - return a; +string IrName(absl::string_view a) { + std::string s(a); + s.erase(std::remove(s.begin(), s.end(), '%'), s.end()); + return s; } string IrName(absl::string_view a, absl::string_view b) { diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h index 642965b6470..c0a55e4da33 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h @@ -87,7 +87,7 @@ string DumpModuleToString(const llvm::Module& module); // - joining all of the nonempty inputs by '.', and then // - removing all '%'s. // -string IrName(string a); +string IrName(absl::string_view a); string IrName(absl::string_view a, absl::string_view b); string IrName(const HloInstruction* a, absl::string_view b = ""); From d10c814ce1e58831c8c5a9869555eb326d67fd2f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 10 Aug 2020 16:25:19 -0700 Subject: [PATCH 2477/2522] TF NumPy: point to guide from API documentation. PiperOrigin-RevId: 325911592 Change-Id: I64f9c34badb5b5484be4527a86df218bf910a79f --- tensorflow/python/ops/numpy_ops/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/python/ops/numpy_ops/__init__.py b/tensorflow/python/ops/numpy_ops/__init__.py index 5cc5cf5ac85..633b74b4a78 100644 --- a/tensorflow/python/ops/numpy_ops/__init__.py +++ b/tensorflow/python/ops/numpy_ops/__init__.py @@ -24,6 +24,9 @@ NumPy" section. ## Getting Started +Please also see [TensorFlow NumPy Guide]( +https://www.tensorflow.org/guide/tf_numpy). + In the code snippets below, we will assume that `tf.experimental.numpy` is imported as `tnp` and NumPy is imported as `np` From 82c043fee561dccc4ede0e44e2a09c9c744a5b9d Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 10 Aug 2020 17:06:51 -0700 Subject: [PATCH 2478/2522] Create dummy topology types for libtpu C API. This gives us better typechecking than just using void*. PiperOrigin-RevId: 325919251 Change-Id: Idc375ed151a2b0b2cf7d62b7be9cf2fafe48e934 --- tensorflow/stream_executor/tpu/BUILD | 1 + tensorflow/stream_executor/tpu/c_api_decl.h | 4 ++ tensorflow/stream_executor/tpu/c_api_defn.h | 4 +- .../stream_executor/tpu/tpu_executor_c_api.h | 45 ++++++++++--------- .../tpu/tpu_platform_interface.h | 5 ++- .../stream_executor/tpu/tpu_topology.cc | 4 +- tensorflow/stream_executor/tpu/tpu_topology.h | 15 ++++--- 7 files changed, 46 insertions(+), 32 deletions(-) diff --git a/tensorflow/stream_executor/tpu/BUILD b/tensorflow/stream_executor/tpu/BUILD index a8178404dff..207984e0c89 100644 --- a/tensorflow/stream_executor/tpu/BUILD +++ b/tensorflow/stream_executor/tpu/BUILD @@ -294,6 +294,7 @@ cc_library( hdrs = ["tpu_platform_interface.h"], visibility = ["//visibility:public"], deps = [ + ":c_api_decl", ":tpu_topology_external", "//tensorflow/core:lib", "//tensorflow/stream_executor", diff --git a/tensorflow/stream_executor/tpu/c_api_decl.h b/tensorflow/stream_executor/tpu/c_api_decl.h index c42423c232f..bca5f254ad1 100644 --- a/tensorflow/stream_executor/tpu/c_api_decl.h +++ b/tensorflow/stream_executor/tpu/c_api_decl.h @@ -253,6 +253,10 @@ typedef struct XLA_ComputationPlacer XLA_ComputationPlacer; typedef void (*XLA_CallbackFn)(void*); typedef void (*XLA_StatusCallbackFn)(void*, SE_Status*); + +typedef struct SE_TpuTopology SE_TpuTopology; +typedef struct SE_TpuTopology_Core SE_TpuTopology_Core; +typedef struct SE_TpuTopology_Core SE_TpuTopology_Host; } #endif // TENSORFLOW_STREAM_EXECUTOR_TPU_C_API_DECL_H_ diff --git a/tensorflow/stream_executor/tpu/c_api_defn.h b/tensorflow/stream_executor/tpu/c_api_defn.h index 1599f1f266a..62c02e2de48 100644 --- a/tensorflow/stream_executor/tpu/c_api_defn.h +++ b/tensorflow/stream_executor/tpu/c_api_defn.h @@ -63,8 +63,10 @@ struct SE_DeviceOptions { stream_executor::DeviceOptions options; }; +// Ignored -- these are just used to enforce the interface types struct XLA_TransferManager {}; - struct XLA_ComputationPlacer {}; +struct SE_TpuTopology {}; +struct SE_TpuTopology_Core {}; #endif // TENSORFLOW_STREAM_EXECUTOR_TPU_C_API_DEFN_H_ diff --git a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h index 1dcb3eaf244..c498244cc6e 100644 --- a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h +++ b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h @@ -39,8 +39,8 @@ SE_PlatformId TpuPlatform_Id(SE_Platform* platform); int64_t TpuPlatform_VisibleDeviceCount(SE_Platform* platform); int64_t TpuPlatform_TpuMemoryLimit(SE_Platform* platform); bool TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopy(SE_Platform* platform); -void* TpuPlatform_GetTopologyPtr(SE_Platform* platform); -void* TpuPlatform_GetHostLocation(SE_Platform* platform); +SE_TpuTopology* TpuPlatform_GetTopologyPtr(SE_Platform* platform); +SE_TpuTopology_Host* TpuPlatform_GetHostLocation(SE_Platform* platform); void TpuExecutor_Init(SE_StreamExecutor* executor, int device_ordinal, SE_DeviceOptions* device_options, SE_Status* status); @@ -193,29 +193,32 @@ void TpuTransferManager_FreeBuffers(char** buffers_array, int64_t* buffers_size, XLA_ComputationPlacer* TpuComputationPlacer_New(); void TpuComputationPlacer_Free(XLA_ComputationPlacer* placer); -int TpuTopology_LogicalDevicesPerHost(void* tpu_topology, +int TpuTopology_LogicalDevicesPerHost(SE_TpuTopology* tpu_topology, TpuCoreTypeEnum tpu_core_type); -int TpuTopology_LogicalDevicesPerChip(void* tpu_topology, +int TpuTopology_LogicalDevicesPerChip(SE_TpuTopology* tpu_topology, TpuCoreTypeEnum tpu_core_type); -int TpuTopology_ChipBounds_X(void* tpu_topology); -int TpuTopology_ChipBounds_Y(void* tpu_topology); -int TpuTopology_ChipBounds_Z(void* tpu_topology); -bool TpuTopology_HasChip(void* tpu_topology, int x, int y, int z); -void* TpuTopology_Core(void* tpu_topology, int x, int y, int z, - TpuCoreTypeEnum tpu_core_type, int index); -int TpuTopology_NumCores(void* tpu_topology, TpuCoreTypeEnum tpu_core_type); +int TpuTopology_ChipBounds_X(SE_TpuTopology* tpu_topology); +int TpuTopology_ChipBounds_Y(SE_TpuTopology* tpu_topology); +int TpuTopology_ChipBounds_Z(SE_TpuTopology* tpu_topology); +bool TpuTopology_HasChip(SE_TpuTopology* tpu_topology, int x, int y, int z); +SE_TpuTopology_Core* TpuTopology_Core(SE_TpuTopology* tpu_topology, int x, + int y, int z, + TpuCoreTypeEnum tpu_core_type, int index); +int TpuTopology_NumCores(SE_TpuTopology* tpu_topology, + TpuCoreTypeEnum tpu_core_type); // 'cores' should be a preallocated array of size TpuTopology_NumCores. -void TpuTopology_Cores(void* tpu_topology, TpuCoreTypeEnum tpu_core_type, - void** cores); -int TpuTopology_IdForHost(void* tpu_topology, int x, int y, int z); -void TpuCoreLocation_ChipCoordinates(void* tpu_core_location, int* x, int* y, - int* z); -void TpuCoreLocation_HostCoordinates(void* tpu_core_location, int* x, int* y, - int* z); -int TpuCoreLocation_Index(void* tpu_core_location); -int TpuCoreLocation_Id(void* tpu_core_location); +void TpuTopology_Cores(SE_TpuTopology* tpu_topology, + TpuCoreTypeEnum tpu_core_type, + SE_TpuTopology_Core** cores); +int TpuTopology_IdForHost(SE_TpuTopology* tpu_topology, int x, int y, int z); +void TpuCoreLocation_ChipCoordinates(SE_TpuTopology_Core* tpu_core_location, + int* x, int* y, int* z); +void TpuCoreLocation_HostCoordinates(SE_TpuTopology_Core* tpu_core_location, + int* x, int* y, int* z); +int TpuCoreLocation_Index(SE_TpuTopology_Core* tpu_core_location); +int TpuCoreLocation_Id(SE_TpuTopology_Core* tpu_core_location); -int TpuHostLocation_Id(void* tpu_host_location); +int TpuHostLocation_Id(SE_TpuTopology_Host* tpu_host_location); // C API for XLA::Compiler interface diff --git a/tensorflow/stream_executor/tpu/tpu_platform_interface.h b/tensorflow/stream_executor/tpu/tpu_platform_interface.h index a0a3b444550..936de8d5c34 100644 --- a/tensorflow/stream_executor/tpu/tpu_platform_interface.h +++ b/tensorflow/stream_executor/tpu/tpu_platform_interface.h @@ -18,12 +18,15 @@ limitations under the License. #include "tensorflow/core/platform/types.h" #include "tensorflow/stream_executor/platform.h" +#include "tensorflow/stream_executor/tpu/c_api_decl.h" #include "tensorflow/stream_executor/tpu/tpu_topology.h" namespace tensorflow { namespace tpu { -typedef void* TpuTopologyPtr; +// TODO(skyewm): get rid of TpuTopologyPtr and either use SE_TpuTopology* or +// return a TpuTopologyExternal. +typedef SE_TpuTopology* TpuTopologyPtr; class TpuPlatformInterface : public stream_executor::Platform { public: diff --git a/tensorflow/stream_executor/tpu/tpu_topology.cc b/tensorflow/stream_executor/tpu/tpu_topology.cc index cfcea2dc944..6c885b229ec 100644 --- a/tensorflow/stream_executor/tpu/tpu_topology.cc +++ b/tensorflow/stream_executor/tpu/tpu_topology.cc @@ -79,12 +79,12 @@ std::vector TpuTopologyExternal::cores( TpuCoreTypeEnum core_type) const { int num_cores = tpu::ExecutorApiFn()->TpuTopology_NumCoresFn(topology_, core_type); - std::vector core_ptrs(num_cores); + std::vector core_ptrs(num_cores); tpu::ExecutorApiFn()->TpuTopology_CoresFn(topology_, core_type, core_ptrs.data()); std::vector result; result.reserve(num_cores); - for (void* ptr : core_ptrs) { + for (SE_TpuTopology_Core* ptr : core_ptrs) { result.emplace_back(ptr); } return result; diff --git a/tensorflow/stream_executor/tpu/tpu_topology.h b/tensorflow/stream_executor/tpu/tpu_topology.h index 3b0c4c5aa20..07e9afc7d81 100644 --- a/tensorflow/stream_executor/tpu/tpu_topology.h +++ b/tensorflow/stream_executor/tpu/tpu_topology.h @@ -33,27 +33,27 @@ struct TpuDimensionsExternal { class TpuCoreLocationExternal { public: TpuCoreLocationExternal() : core_location_(nullptr) {} - explicit TpuCoreLocationExternal(void* core_location) + explicit TpuCoreLocationExternal(SE_TpuTopology_Core* core_location) : core_location_(core_location) {} TpuDimensionsExternal chip_coordinates() const; TpuDimensionsExternal host_coordinates() const; int32 index() const; int32 Id() const; - void* impl() const { return core_location_; } + SE_TpuTopology_Core* impl() const { return core_location_; } private: - void* core_location_; + SE_TpuTopology_Core* core_location_; }; class TpuHostLocationExternal { public: - explicit TpuHostLocationExternal(void* host_location) + explicit TpuHostLocationExternal(SE_TpuTopology_Host* host_location) : host_location_(host_location) {} int32 Id() const; private: - void* host_location_; + SE_TpuTopology_Host* host_location_; }; struct TpuTopologyChipBoundsExternal { @@ -64,7 +64,8 @@ struct TpuTopologyChipBoundsExternal { class TpuTopologyExternal { public: - explicit TpuTopologyExternal(void* topology) : topology_(topology) {} + explicit TpuTopologyExternal(SE_TpuTopology* topology) + : topology_(topology) {} int32 LogicalDevicesPerHost(TpuCoreTypeEnum core_type) const; int32 LogicalDevicesPerChip(TpuCoreTypeEnum core_type) const; TpuTopologyChipBoundsExternal chip_bounds() const; @@ -75,7 +76,7 @@ class TpuTopologyExternal { int IdForHost(TpuDimensionsExternal host) const; private: - void* topology_; + SE_TpuTopology* topology_; }; } // namespace tpu From 956c8578c338f85ad0ababee2274b3d7db1a777c Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Mon, 10 Aug 2020 17:14:19 -0700 Subject: [PATCH 2479/2522] Clone Windows job nightly job suite for CUDA11 testing PiperOrigin-RevId: 325920384 Change-Id: I136dfc361c01a9efa3e4ef2f87fca2d9c041314a --- .../rel/windows_cuda11/cpu_libtensorflow.bat | 20 ++++++++++++++++ .../ci_build/rel/windows_cuda11/cpu_py35.bat | 20 ++++++++++++++++ .../ci_build/rel/windows_cuda11/cpu_py36.bat | 20 ++++++++++++++++ .../ci_build/rel/windows_cuda11/cpu_py37.bat | 20 ++++++++++++++++ .../ci_build/rel/windows_cuda11/cpu_py38.bat | 21 +++++++++++++++++ .../rel/windows_cuda11/gpu_libtensorflow.bat | 20 ++++++++++++++++ .../rel/windows_cuda11/gpu_pip_on_cpu.bat | 21 +++++++++++++++++ .../ci_build/rel/windows_cuda11/gpu_py35.bat | 23 +++++++++++++++++++ .../ci_build/rel/windows_cuda11/gpu_py36.bat | 23 +++++++++++++++++++ .../ci_build/rel/windows_cuda11/gpu_py37.bat | 23 +++++++++++++++++++ .../ci_build/rel/windows_cuda11/gpu_py38.bat | 23 +++++++++++++++++++ 11 files changed, 234 insertions(+) create mode 100644 tensorflow/tools/ci_build/rel/windows_cuda11/cpu_libtensorflow.bat create mode 100644 tensorflow/tools/ci_build/rel/windows_cuda11/cpu_py35.bat create mode 100644 tensorflow/tools/ci_build/rel/windows_cuda11/cpu_py36.bat create mode 100644 tensorflow/tools/ci_build/rel/windows_cuda11/cpu_py37.bat create mode 100644 tensorflow/tools/ci_build/rel/windows_cuda11/cpu_py38.bat create mode 100644 tensorflow/tools/ci_build/rel/windows_cuda11/gpu_libtensorflow.bat create mode 100644 tensorflow/tools/ci_build/rel/windows_cuda11/gpu_pip_on_cpu.bat create mode 100644 tensorflow/tools/ci_build/rel/windows_cuda11/gpu_py35.bat create mode 100644 tensorflow/tools/ci_build/rel/windows_cuda11/gpu_py36.bat create mode 100644 tensorflow/tools/ci_build/rel/windows_cuda11/gpu_py37.bat create mode 100644 tensorflow/tools/ci_build/rel/windows_cuda11/gpu_py38.bat diff --git a/tensorflow/tools/ci_build/rel/windows_cuda11/cpu_libtensorflow.bat b/tensorflow/tools/ci_build/rel/windows_cuda11/cpu_libtensorflow.bat new file mode 100644 index 00000000000..67941234b15 --- /dev/null +++ b/tensorflow/tools/ci_build/rel/windows_cuda11/cpu_libtensorflow.bat @@ -0,0 +1,20 @@ +:: Copyright 2019 The TensorFlow Authors. All Rights Reserved. +:: +:: Licensed under the Apache License, Version 2.0 (the "License"); +:: you may not use this file except in compliance with the License. +:: You may obtain a copy of the License at +:: +:: http://www.apache.org/licenses/LICENSE-2.0 +:: +:: Unless required by applicable law or agreed to in writing, software +:: distributed under the License is distributed on an "AS IS" BASIS, +:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +:: See the License for the specific language governing permissions and +:: limitations under the License. +:: ============================================================================= + +CALL tensorflow\tools\ci_build\release\common_win.bat + +call tensorflow\tools\ci_build\windows\cpu\bazel\run_libtensorflow.bat || exit /b 1 + +copy lib_package %TF_ARTIFACTS_DIR%\lib_package diff --git a/tensorflow/tools/ci_build/rel/windows_cuda11/cpu_py35.bat b/tensorflow/tools/ci_build/rel/windows_cuda11/cpu_py35.bat new file mode 100644 index 00000000000..175917d7cad --- /dev/null +++ b/tensorflow/tools/ci_build/rel/windows_cuda11/cpu_py35.bat @@ -0,0 +1,20 @@ +:: Copyright 2019 The TensorFlow Authors. All Rights Reserved. +:: +:: Licensed under the Apache License, Version 2.0 (the "License"); +:: you may not use this file except in compliance with the License. +:: You may obtain a copy of the License at +:: +:: http://www.apache.org/licenses/LICENSE-2.0 +:: +:: Unless required by applicable law or agreed to in writing, software +:: distributed under the License is distributed on an "AS IS" BASIS, +:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +:: See the License for the specific language governing permissions and +:: limitations under the License. +:: ============================================================================= + +SET PYTHON_DIRECTORY=Python35 + +CALL tensorflow\tools\ci_build\release\common_win.bat + +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" diff --git a/tensorflow/tools/ci_build/rel/windows_cuda11/cpu_py36.bat b/tensorflow/tools/ci_build/rel/windows_cuda11/cpu_py36.bat new file mode 100644 index 00000000000..85b75053eff --- /dev/null +++ b/tensorflow/tools/ci_build/rel/windows_cuda11/cpu_py36.bat @@ -0,0 +1,20 @@ +:: Copyright 2019 The TensorFlow Authors. All Rights Reserved. +:: +:: Licensed under the Apache License, Version 2.0 (the "License"); +:: you may not use this file except in compliance with the License. +:: You may obtain a copy of the License at +:: +:: http://www.apache.org/licenses/LICENSE-2.0 +:: +:: Unless required by applicable law or agreed to in writing, software +:: distributed under the License is distributed on an "AS IS" BASIS, +:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +:: See the License for the specific language governing permissions and +:: limitations under the License. +:: ============================================================================= + +SET PYTHON_DIRECTORY=Python36 + +CALL tensorflow\tools\ci_build\release\common_win.bat + +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" diff --git a/tensorflow/tools/ci_build/rel/windows_cuda11/cpu_py37.bat b/tensorflow/tools/ci_build/rel/windows_cuda11/cpu_py37.bat new file mode 100644 index 00000000000..d8a6673ba4c --- /dev/null +++ b/tensorflow/tools/ci_build/rel/windows_cuda11/cpu_py37.bat @@ -0,0 +1,20 @@ +:: Copyright 2019 The TensorFlow Authors. All Rights Reserved. +:: +:: Licensed under the Apache License, Version 2.0 (the "License"); +:: you may not use this file except in compliance with the License. +:: You may obtain a copy of the License at +:: +:: http://www.apache.org/licenses/LICENSE-2.0 +:: +:: Unless required by applicable law or agreed to in writing, software +:: distributed under the License is distributed on an "AS IS" BASIS, +:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +:: See the License for the specific language governing permissions and +:: limitations under the License. +:: ============================================================================= + +SET PYTHON_DIRECTORY=Python37 + +CALL tensorflow\tools\ci_build\release\common_win.bat + +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" diff --git a/tensorflow/tools/ci_build/rel/windows_cuda11/cpu_py38.bat b/tensorflow/tools/ci_build/rel/windows_cuda11/cpu_py38.bat new file mode 100644 index 00000000000..86adcda0bb9 --- /dev/null +++ b/tensorflow/tools/ci_build/rel/windows_cuda11/cpu_py38.bat @@ -0,0 +1,21 @@ +:: Copyright 2019 The TensorFlow Authors. All Rights Reserved. +:: +:: Licensed under the Apache License, Version 2.0 (the "License"); +:: you may not use this file except in compliance with the License. +:: You may obtain a copy of the License at +:: +:: http://www.apache.org/licenses/LICENSE-2.0 +:: +:: Unless required by applicable law or agreed to in writing, software +:: distributed under the License is distributed on an "AS IS" BASIS, +:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +:: See the License for the specific language governing permissions and +:: limitations under the License. +:: ============================================================================= + +SET PYTHON_DIRECTORY=Python38 + +CALL tensorflow\tools\ci_build\release\common_win.bat + +call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu" + diff --git a/tensorflow/tools/ci_build/rel/windows_cuda11/gpu_libtensorflow.bat b/tensorflow/tools/ci_build/rel/windows_cuda11/gpu_libtensorflow.bat new file mode 100644 index 00000000000..8ab78bef3ca --- /dev/null +++ b/tensorflow/tools/ci_build/rel/windows_cuda11/gpu_libtensorflow.bat @@ -0,0 +1,20 @@ +:: Copyright 2019 The TensorFlow Authors. All Rights Reserved. +:: +:: Licensed under the Apache License, Version 2.0 (the "License"); +:: you may not use this file except in compliance with the License. +:: You may obtain a copy of the License at +:: +:: http://www.apache.org/licenses/LICENSE-2.0 +:: +:: Unless required by applicable law or agreed to in writing, software +:: distributed under the License is distributed on an "AS IS" BASIS, +:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +:: See the License for the specific language governing permissions and +:: limitations under the License. +:: ============================================================================= + +CALL tensorflow\tools\ci_build\release\common_win.bat + +call tensorflow\tools\ci_build\windows\gpu\bazel\run_libtensorflow.bat || exit /b + +copy lib_package %TF_ARTIFACTS_DIR%\lib_package diff --git a/tensorflow/tools/ci_build/rel/windows_cuda11/gpu_pip_on_cpu.bat b/tensorflow/tools/ci_build/rel/windows_cuda11/gpu_pip_on_cpu.bat new file mode 100644 index 00000000000..213de532069 --- /dev/null +++ b/tensorflow/tools/ci_build/rel/windows_cuda11/gpu_pip_on_cpu.bat @@ -0,0 +1,21 @@ +:: Copyright 2019 The TensorFlow Authors. All Rights Reserved. +:: +:: Licensed under the Apache License, Version 2.0 (the "License"); +:: you may not use this file except in compliance with the License. +:: You may obtain a copy of the License at +:: +:: http://www.apache.org/licenses/LICENSE-2.0 +:: +:: Unless required by applicable law or agreed to in writing, software +:: distributed under the License is distributed on an "AS IS" BASIS, +:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +:: See the License for the specific language governing permissions and +:: limitations under the License. +:: ============================================================================= + +SET PYTHON_DIRECTORY=Python36 + +CALL tensorflow\tools\ci_build\release\common_win.bat + +call tensorflow\tools\ci_build\windows\integration\gpu_pip_on_cpu\run.bat + diff --git a/tensorflow/tools/ci_build/rel/windows_cuda11/gpu_py35.bat b/tensorflow/tools/ci_build/rel/windows_cuda11/gpu_py35.bat new file mode 100644 index 00000000000..86c118b2f83 --- /dev/null +++ b/tensorflow/tools/ci_build/rel/windows_cuda11/gpu_py35.bat @@ -0,0 +1,23 @@ +:: Copyright 2019 The TensorFlow Authors. All Rights Reserved. +:: +:: Licensed under the Apache License, Version 2.0 (the "License"); +:: you may not use this file except in compliance with the License. +:: You may obtain a copy of the License at +:: +:: http://www.apache.org/licenses/LICENSE-2.0 +:: +:: Unless required by applicable law or agreed to in writing, software +:: distributed under the License is distributed on an "AS IS" BASIS, +:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +:: See the License for the specific language governing permissions and +:: limitations under the License. +:: ============================================================================= + +SET PYTHON_DIRECTORY=Python35 + +CALL tensorflow\tools\ci_build\release\common_win.bat + +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" + +for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa" +bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh diff --git a/tensorflow/tools/ci_build/rel/windows_cuda11/gpu_py36.bat b/tensorflow/tools/ci_build/rel/windows_cuda11/gpu_py36.bat new file mode 100644 index 00000000000..cc4f84afbee --- /dev/null +++ b/tensorflow/tools/ci_build/rel/windows_cuda11/gpu_py36.bat @@ -0,0 +1,23 @@ +:: Copyright 2019 The TensorFlow Authors. All Rights Reserved. +:: +:: Licensed under the Apache License, Version 2.0 (the "License"); +:: you may not use this file except in compliance with the License. +:: You may obtain a copy of the License at +:: +:: http://www.apache.org/licenses/LICENSE-2.0 +:: +:: Unless required by applicable law or agreed to in writing, software +:: distributed under the License is distributed on an "AS IS" BASIS, +:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +:: See the License for the specific language governing permissions and +:: limitations under the License. +:: ============================================================================= + +SET PYTHON_DIRECTORY=Python36 + +CALL tensorflow\tools\ci_build\release\common_win.bat + +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" + +for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa" +bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh \ No newline at end of file diff --git a/tensorflow/tools/ci_build/rel/windows_cuda11/gpu_py37.bat b/tensorflow/tools/ci_build/rel/windows_cuda11/gpu_py37.bat new file mode 100644 index 00000000000..5fa798e3eb8 --- /dev/null +++ b/tensorflow/tools/ci_build/rel/windows_cuda11/gpu_py37.bat @@ -0,0 +1,23 @@ +:: Copyright 2019 The TensorFlow Authors. All Rights Reserved. +:: +:: Licensed under the Apache License, Version 2.0 (the "License"); +:: you may not use this file except in compliance with the License. +:: You may obtain a copy of the License at +:: +:: http://www.apache.org/licenses/LICENSE-2.0 +:: +:: Unless required by applicable law or agreed to in writing, software +:: distributed under the License is distributed on an "AS IS" BASIS, +:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +:: See the License for the specific language governing permissions and +:: limitations under the License. +:: ============================================================================= + +SET PYTHON_DIRECTORY=Python37 + +CALL tensorflow\tools\ci_build\release\common_win.bat + +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" + +for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa" +bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh \ No newline at end of file diff --git a/tensorflow/tools/ci_build/rel/windows_cuda11/gpu_py38.bat b/tensorflow/tools/ci_build/rel/windows_cuda11/gpu_py38.bat new file mode 100644 index 00000000000..fa1fc131145 --- /dev/null +++ b/tensorflow/tools/ci_build/rel/windows_cuda11/gpu_py38.bat @@ -0,0 +1,23 @@ +:: Copyright 2019 The TensorFlow Authors. All Rights Reserved. +:: +:: Licensed under the Apache License, Version 2.0 (the "License"); +:: you may not use this file except in compliance with the License. +:: You may obtain a copy of the License at +:: +:: http://www.apache.org/licenses/LICENSE-2.0 +:: +:: Unless required by applicable law or agreed to in writing, software +:: distributed under the License is distributed on an "AS IS" BASIS, +:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +:: See the License for the specific language governing permissions and +:: limitations under the License. +:: ============================================================================= + +SET PYTHON_DIRECTORY=Python38 + +CALL tensorflow\tools\ci_build\release\common_win.bat + +call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow" + +for %%a in ("%~dp0\.") do set "PARENT_DIR=%%~nxa" +bash -l tensorflow\tools\ci_build\release\windows\%PARENT_DIR%\release_pip_rename.sh From f621eebcfff1b506ac48716c763988196ef7d881 Mon Sep 17 00:00:00 2001 From: Steve Chien Date: Mon, 10 Aug 2020 17:19:05 -0700 Subject: [PATCH 2480/2522] Add DP-enabled binary-class head and multi-class heads for Estimator. PiperOrigin-RevId: 325921076 Change-Id: I958d492afb9b0d53300559d1880372b27400154e --- tensorflow/python/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index a8a70566ab7..745fed375b8 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -102,6 +102,7 @@ py_library( "//tensorflow/tools/api/tests:__pkg__", "//tensorflow/tools/compatibility/update:__pkg__", "//tensorflow_estimator:__subpackages__", + "//third_party/py/tensorflow_privacy:__subpackages__", # TODO(b/163395075): remove when fixed ], deps = [ ":layers", From 36d55f1c562e89d0e1eddc0a7d4d79049cabb466 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 10 Aug 2020 17:23:57 -0700 Subject: [PATCH 2481/2522] [Profiler] Fix a bug in the memory profiler that caused the disappearance of the TF ops in the memory breakdown table. PiperOrigin-RevId: 325921811 Change-Id: I22877b935fa9edcbd66be5489c0b7da29d0276fc --- tensorflow/core/profiler/convert/BUILD | 1 + .../convert/xplane_to_memory_profile.cc | 94 ++++++++++++++++--- 2 files changed, 81 insertions(+), 14 deletions(-) diff --git a/tensorflow/core/profiler/convert/BUILD b/tensorflow/core/profiler/convert/BUILD index 66e027ed8ac..2274a227f4d 100644 --- a/tensorflow/core/profiler/convert/BUILD +++ b/tensorflow/core/profiler/convert/BUILD @@ -514,6 +514,7 @@ cc_library( "//tensorflow/core/profiler/utils:xplane_visitor", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/types:optional", diff --git a/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc b/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc index 9a5130f63be..3b67124ef27 100644 --- a/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc +++ b/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc @@ -24,11 +24,13 @@ limitations under the License. #include "absl/algorithm/container.h" #include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" #include "absl/types/optional.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/profiler/protobuf/memory_profile.pb.h" @@ -424,23 +426,86 @@ void ProcessActiveAllocations(int64 peak_bytes_profile_step_id, << memory_profile->active_allocations_size(); } +struct Sample { + int64 orig_index; // original index to the snapshot. + MemoryProfileSnapshot* snapshot; +}; + +// This function samples max_num_snapshots from snapshots. We first keep the +// snapshots referenced by active_allocations in the samples. After this, if +// there is still room for more samples, we pick more from snapshots into the +// samples. Then, we sort the samples in time (so that they can be correctly +// displayed on the timeline). Finally, we need to adjust the original indices +// (to snapshots) in active_allocations to the new indices in the samples. void SampleSnapshots( int64 max_num_snapshots, - protobuf::RepeatedPtrField* snapshots) { + protobuf::RepeatedPtrField* snapshots, + protobuf::RepeatedPtrField* active_allocations) { if (snapshots->size() <= max_num_snapshots) return; - absl::c_partial_sort( - *snapshots, snapshots->begin() + max_num_snapshots, - [](const MemoryProfileSnapshot& a, const MemoryProfileSnapshot& b) { - return a.aggregation_stats().free_memory_bytes() < - b.aggregation_stats().free_memory_bytes(); - }); - snapshots->erase(snapshots->begin() + max_num_snapshots, snapshots->end()); - // Sort the memory_profile_snapshots by time_offset_ps (ascending) after - // sampling. - absl::c_sort(*snapshots, [](const MemoryProfileSnapshot& a, - const MemoryProfileSnapshot& b) { - return a.time_offset_ps() < b.time_offset_ps(); + + std::vector samples; + + // First, puts the snapshots referenced by active_allocations in samples[]. + absl::flat_hash_set allocation_snapshot_indices; + for (const auto& allocation : *active_allocations) { + auto orig_index = allocation.snapshot_index(); + if (orig_index < 0) continue; + allocation_snapshot_indices.insert(orig_index); + samples.push_back({orig_index, &(*snapshots)[orig_index]}); + if (allocation_snapshot_indices.size() >= max_num_snapshots) break; + } + + // Second, extracts remaining samples from snapshots. + int64 num_samples_remained = + max_num_snapshots - allocation_snapshot_indices.size(); + if (num_samples_remained > 0) { + std::vector remaining; + for (int64 i = 0; i < snapshots->size(); i++) { + if (allocation_snapshot_indices.contains(i)) continue; + // snapshots[i] is not yet sampled; put it in remaining[] for further + // consideration. + remaining.push_back({i, &(*snapshots)[i]}); + } + // Moves the num_samples_remained snapshots with least free bytes to the + // beginning of remaining[]. + absl::c_partial_sort( + remaining, remaining.begin() + num_samples_remained, + [](const Sample& a, const Sample& b) { + return a.snapshot->aggregation_stats().free_memory_bytes() < + b.snapshot->aggregation_stats().free_memory_bytes(); + }); + // Copies the first num_samples_remained in remaining[] to samples[]. + for (int64 i = 0; i < num_samples_remained; i++) + samples.push_back(remaining[i]); + } + + // Third, sorts samples[] in ascending order of time_offset_ps. + absl::c_sort(samples, [](const Sample& a, const Sample& b) { + return a.snapshot->time_offset_ps() < b.snapshot->time_offset_ps(); }); + + // Fourth, constructs a map from the original snapshot index to samples index. + absl::flat_hash_map index_map; + for (int64 i = 0; i < samples.size(); i++) { + index_map[samples[i].orig_index] = i; + } + + // Fifth, changes the original snapshot indices in active_allocations to the + // sample indices. + for (auto& allocation : *active_allocations) { + auto orig_index = allocation.snapshot_index(); + if (orig_index < 0) continue; + auto new_index = gtl::FindWithDefault(index_map, orig_index, -1); + allocation.set_snapshot_index(new_index); + } + + // Sixth, replaces *snapshot by samples[] + protobuf::RepeatedPtrField new_snapshots; + new_snapshots.Reserve(samples.size()); + for (const auto& sample : samples) { + *new_snapshots.Add() = std::move(*sample.snapshot); + } + *snapshots = std::move(new_snapshots); } // Post-process the memory profile to correctly update proto fields, and break @@ -478,7 +543,8 @@ void ProcessMemoryProfileProto(int64 max_num_snapshots, .peak_bytes_in_use(), allocator_memory_profile); ProcessActiveAllocations(peak_step_id, allocator_memory_profile); - SampleSnapshots(max_num_snapshots, snapshots); + SampleSnapshots(max_num_snapshots, snapshots, + allocator_memory_profile->mutable_active_allocations()); } } From 6fb229b3e7ab5f0f45397fccabf5105266b74f68 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 10 Aug 2020 17:24:25 -0700 Subject: [PATCH 2482/2522] Re-apply github tensorflow/pull/22264/commits/51d72a7d7f74784b68916819edd04e890b36f957 PiperOrigin-RevId: 325921879 Change-Id: I703edc9e0f381d64784027eb9457bc10f5e5aef8 --- tensorflow/python/saved_model/model_utils/BUILD | 2 ++ .../python/saved_model/model_utils/export_output.py | 11 ++++++----- .../saved_model/model_utils/export_output_test.py | 9 ++++++++- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/saved_model/model_utils/BUILD b/tensorflow/python/saved_model/model_utils/BUILD index 775d81a86bc..8e41a613b64 100644 --- a/tensorflow/python/saved_model/model_utils/BUILD +++ b/tensorflow/python/saved_model/model_utils/BUILD @@ -48,6 +48,7 @@ py_strict_library( "//tensorflow/python:constant_op", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", + "//tensorflow/python:tensor_util", "//tensorflow/python/saved_model:signature_def_utils", "@six_archive//:six", ], @@ -69,6 +70,7 @@ py_strict_test( "//tensorflow/python:framework_ops", "//tensorflow/python:metrics", "//tensorflow/python:sparse_tensor", + "//tensorflow/python:variables", "//tensorflow/python/eager:context", "//tensorflow/python/saved_model:signature_constants", ], diff --git a/tensorflow/python/saved_model/model_utils/export_output.py b/tensorflow/python/saved_model/model_utils/export_output.py index b571bad067e..9b3ce04e071 100644 --- a/tensorflow/python/saved_model/model_utils/export_output.py +++ b/tensorflow/python/saved_model/model_utils/export_output.py @@ -26,6 +26,7 @@ import six from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_util from tensorflow.python.saved_model import signature_def_utils @@ -342,16 +343,16 @@ class _SupervisedOutput(ExportOutput): raise ValueError( '{} output value must be a Tensor; got {}.'.format( key, metric_val)) - if (not isinstance(metric_op, ops.Tensor) and - not isinstance(metric_op, ops.Operation)): + if not (tensor_util.is_tensor(metric_op) or + isinstance(metric_op, ops.Operation)): raise ValueError( '{} update_op must be a Tensor or Operation; got {}.'.format( key, metric_op)) - # We must wrap any ops in a Tensor before export, as the SignatureDef - # proto expects tensors only. See b/109740581 + # We must wrap any ops (or variables) in a Tensor before export, as the + # SignatureDef proto expects tensors only. See b/109740581 metric_op_tensor = metric_op - if isinstance(metric_op, ops.Operation): + if not isinstance(metric_op, ops.Tensor): with ops.control_dependencies([metric_op]): metric_op_tensor = constant_op.constant([], name='metric_op_wrapper') diff --git a/tensorflow/python/saved_model/model_utils/export_output_test.py b/tensorflow/python/saved_model/model_utils/export_output_test.py index 8a3f107ce6c..8fd13b3d72e 100644 --- a/tensorflow/python/saved_model/model_utils/export_output_test.py +++ b/tensorflow/python/saved_model/model_utils/export_output_test.py @@ -29,6 +29,7 @@ from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import metrics as metrics_module +from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model.model_utils import export_output as export_output_lib @@ -373,10 +374,16 @@ class SupervisedOutputTest(test.TestCase): mean, update_op = metrics_module.mean_tensor(constant_op.constant([0])) metrics = { 'metrics_1': (mean, update_op), - 'metrics_2': (constant_op.constant([0]), control_flow_ops.no_op()) + 'metrics_2': (constant_op.constant([0]), control_flow_ops.no_op()), + # Keras metric's update_state() could return a Variable, rather than + # an Operation or Tensor. + 'keras_1': (constant_op.constant([0.5]), + variables.Variable(1.0, name='AssignAddVariableOp_3')) } outputter = MockSupervisedOutput(loss, predictions, metrics) + # If we get there, it means constructor succeeded; which is sufficient + # for testing the constructor. self.assertTrue(outputter.metrics['metrics_1/update_op'].name.startswith( 'mean/update_op')) From da05b9c999fad870bfd4107de0d96e6f0733e8bb Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Mon, 10 Aug 2020 17:33:01 -0700 Subject: [PATCH 2483/2522] Disable failed tests. PiperOrigin-RevId: 325923288 Change-Id: I1b9ece631c3bfec26e234eb26ee1f2fd74930373 --- tensorflow/core/common_runtime/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/common_runtime/BUILD b/tensorflow/core/common_runtime/BUILD index 4978a613707..a2b9867f132 100644 --- a/tensorflow/core/common_runtime/BUILD +++ b/tensorflow/core/common_runtime/BUILD @@ -2003,7 +2003,7 @@ tf_cc_tests_gpu( "permuter_test.cc", ], linkstatic = tf_kernel_tests_linkstatic(), - tags = ["no_cuda_on_cpu_tap"], + tags = ["notap"], # b/163417734 deps = [ ":core", ":core_cpu", From 4ef3bf9474e8b13c277d8fe2651ba46c6df23500 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Mon, 10 Aug 2020 17:52:52 -0700 Subject: [PATCH 2484/2522] Return None, not ndarray wrapping a None from tape gradient PiperOrigin-RevId: 325926529 Change-Id: Ic0326b07b9d8d396c48aa2c16d5c5e3f8889c434 --- tensorflow/python/eager/backprop.py | 6 +++++- tensorflow/python/ops/numpy_ops/np_interop_test.py | 12 ++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 7cb3abf4e07..71b1303ecf4 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -1091,7 +1091,11 @@ class GradientTape(object): self._tape = None if rewrap_as_ndarray: - flat_grad = nest.map_structure(np_arrays.tensor_to_ndarray, flat_grad) + def _tensor_to_ndarray(x): + if x is not None: + return np_arrays.tensor_to_ndarray(x) + return None + flat_grad = nest.map_structure(_tensor_to_ndarray, flat_grad) grad = nest.pack_sequence_as(sources, flat_grad) return grad diff --git a/tensorflow/python/ops/numpy_ops/np_interop_test.py b/tensorflow/python/ops/numpy_ops/np_interop_test.py index 3eb7bebd767..0b474035edd 100644 --- a/tensorflow/python/ops/numpy_ops/np_interop_test.py +++ b/tensorflow/python/ops/numpy_ops/np_interop_test.py @@ -98,6 +98,18 @@ class InteropTest(tf.test.TestCase): self.assertAllClose(dx, 2.0) self.assertAllClose(dy, 3.0) + def testGradientTapeNoneGradients(self): + y = np.asarray(2.0) + + with tf.GradientTape() as t: + x = np.asarray(3.0) + t.watch([x]) + z = 2 * x + + dz = t.gradient(z, y) + + self.assertIsNone(dz) + def testCondInterop(self): x = np.asarray(3.0) From 11e82b2a6cd87f4fa47bdd8baa4032e2d29d898b Mon Sep 17 00:00:00 2001 From: Jose Baiocchi Date: Mon, 10 Aug 2020 17:57:55 -0700 Subject: [PATCH 2485/2522] Expose CopyOpMetricsMetadata via header file PiperOrigin-RevId: 325927125 Change-Id: I69b70e48ac41c275ac9ad131a29a8f27cb29efac --- .../convert/op_metrics_db_combiner.cc | 19 +++++++++---------- .../profiler/convert/op_metrics_db_combiner.h | 5 ++++- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/profiler/convert/op_metrics_db_combiner.cc b/tensorflow/core/profiler/convert/op_metrics_db_combiner.cc index ad1d4bf380a..425bf0077c3 100644 --- a/tensorflow/core/profiler/convert/op_metrics_db_combiner.cc +++ b/tensorflow/core/profiler/convert/op_metrics_db_combiner.cc @@ -25,8 +25,14 @@ namespace { using OperationType = OpMetrics::MemoryAccessed::OperationType; -// Copies OpMetrics symbol data from src to dst. -void CopyOpMetricsSymbolData(const OpMetrics& src, OpMetrics* dst) { +void CombinePrecisionStats(const PrecisionStats& src, PrecisionStats* dst) { + dst->set_compute_16bit_ps(src.compute_16bit_ps() + dst->compute_16bit_ps()); + dst->set_compute_32bit_ps(src.compute_32bit_ps() + dst->compute_32bit_ps()); +} + +} // namespace + +void CopyOpMetricsMetadata(const OpMetrics& src, OpMetrics* dst) { DCHECK(dst != nullptr); DCHECK_EQ(src.hlo_module_id(), dst->hlo_module_id()); DCHECK_EQ(src.name(), dst->name()); @@ -47,13 +53,6 @@ void CopyOpMetricsSymbolData(const OpMetrics& src, OpMetrics* dst) { } } -void CombinePrecisionStats(const PrecisionStats& src, PrecisionStats* dst) { - dst->set_compute_16bit_ps(src.compute_16bit_ps() + dst->compute_16bit_ps()); - dst->set_compute_32bit_ps(src.compute_32bit_ps() + dst->compute_32bit_ps()); -} - -} // namespace - void CombineOpMetrics(const OpMetrics& src, OpMetrics* dst) { DCHECK(dst != nullptr); if (dst->occurrences() == 0) { @@ -115,7 +114,7 @@ void OpMetricsDbCombiner::Combine(const OpMetricsDb& src) { for (const auto& src_metrics : src.metrics_db()) { auto* dst_metrics = LookupOrInsertNewOpMetrics(src_metrics.hlo_module_id(), src_metrics.name()); - CopyOpMetricsSymbolData(src_metrics, dst_metrics); + CopyOpMetricsMetadata(src_metrics, dst_metrics); CombineOpMetrics(src_metrics, dst_metrics); } } diff --git a/tensorflow/core/profiler/convert/op_metrics_db_combiner.h b/tensorflow/core/profiler/convert/op_metrics_db_combiner.h index a87a2b53500..5c1490d2e8b 100644 --- a/tensorflow/core/profiler/convert/op_metrics_db_combiner.h +++ b/tensorflow/core/profiler/convert/op_metrics_db_combiner.h @@ -23,7 +23,10 @@ limitations under the License. namespace tensorflow { namespace profiler { -// Combines the src OpMetrics into the dst OpMetrics. +// Copies OpMetrics metadata (e.g., category, provenance) from src to dst. +void CopyOpMetricsMetadata(const OpMetrics& src, OpMetrics* dst); + +// Combines OpMetrics data (e.g., occurrences, time) from src into dst. void CombineOpMetrics(const OpMetrics& src, OpMetrics* dst); // Combines the memory access breakdown. From 6ea0d3d925a6588f9b48283c6043b14752593cc6 Mon Sep 17 00:00:00 2001 From: Jian Li Date: Mon, 10 Aug 2020 18:54:05 -0700 Subject: [PATCH 2486/2522] Remove duplicated comments. PiperOrigin-RevId: 325934154 Change-Id: I26296779d852b5a6a7b9bc17f06ecc9adc6b3dc8 --- tensorflow/lite/kernels/lstm_eval.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/kernels/lstm_eval.cc b/tensorflow/lite/kernels/lstm_eval.cc index 9087bbeada9..e11a7c5a026 100644 --- a/tensorflow/lite/kernels/lstm_eval.cc +++ b/tensorflow/lite/kernels/lstm_eval.cc @@ -673,7 +673,7 @@ void CalculateLstmGateInteger8x8_8( tensor_utils::ApplyLayerNormFloat( gate, layer_norm_gate_weight, layer_norm_gate_scale_a, layer_norm_gate_scale_b, gate_bias, n_batch, n_cell, gate); - // Apply activation. // Apply activation + // Apply activation. switch (activation) { case kTfLiteActSigmoid: tensor_utils::ApplySigmoidFloat(gate, n_batch, n_cell, gate); From ef20eb2110fc3e1584849b8641aacd7846576b28 Mon Sep 17 00:00:00 2001 From: Hye Soo Yang Date: Mon, 10 Aug 2020 19:26:24 -0700 Subject: [PATCH 2487/2522] Add `stateless_random_crop` to tf.image API; it is a deterministic version of `tf.image.random_crop`. Given the same seed, `stateless_random_crop` guarantees the same results independent of how many times it is called, and independent of global seed settings. PiperOrigin-RevId: 325938094 Change-Id: Iad3132e097d71513193304d8aad45a5585656c53 --- tensorflow/core/kernels/random_crop_op.cc | 5 -- .../kernel_tests/random/random_crop_test.py | 85 ++++++++++++++++++- tensorflow/python/ops/random_ops.py | 57 ++++++++++++- .../api/golden/v2/tensorflow.image.pbtxt | 4 + 4 files changed, 142 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/kernels/random_crop_op.cc b/tensorflow/core/kernels/random_crop_op.cc index b89bda4769d..eb7980fa58e 100644 --- a/tensorflow/core/kernels/random_crop_op.cc +++ b/tensorflow/core/kernels/random_crop_op.cc @@ -63,11 +63,6 @@ class RandomCropOp : public OpKernel { if ((target_height == height) && (target_width == width)) { *output = context->input(0); } - - // TODO(shlens): Implement edge case to guarantee output size dimensions. - // Edge case. The target dimensions are larger then the image, so - // zero-pad the image. This guarantees that the image will *always* - // be [target_height, target_width] in size. OP_REQUIRES(context, width >= target_width, errors::FailedPrecondition( "width must be >= target_width: width = ", width, diff --git a/tensorflow/python/kernel_tests/random/random_crop_test.py b/tensorflow/python/kernel_tests/random/random_crop_test.py index 724bee07157..f8effa0ee7b 100644 --- a/tensorflow/python/kernel_tests/random/random_crop_test.py +++ b/tensorflow/python/kernel_tests/random/random_crop_test.py @@ -77,5 +77,88 @@ class RandomCropTest(test.TestCase): self.assertAllClose(counts, mean, atol=four_stddev) -if __name__ == '__main__': +class StatelessRandomCropTest(test.TestCase): + + def testNoOp(self): + # No random cropping is performed since the size is value.shape. + for shape in (2, 1, 1), (2, 1, 3), (4, 5, 3): + value = np.arange(0, np.prod(shape), dtype=np.int32).reshape(shape) + crop = random_ops.stateless_random_crop(value, shape, seed=(1, 2)) + self.evaluate(crop) + self.assertAllEqual(crop, value) + + def testContains(self): + with test_util.use_gpu(): + shape = (3, 5, 7) + target = (2, 3, 4) + value = np.random.randint(1000000, size=shape) + iterations = 10 + value_set = set( + tuple(value[i:i + 2, j:j + 3, k:k + 4].ravel()) # pylint: disable=g-complex-comprehension + for i in range(2) for j in range(3) for k in range(4)) + test_seeds = [ + tuple(map(lambda x, i=i: x + 1 * i, t)) + for (i, t) in enumerate((1, 2) for _ in range(iterations)) + ] + + # Check that the result is valid by making sure that it is one of all + # possible values for randomly cropping `value` with `target` shape. + for seed in test_seeds: + crop = random_ops.stateless_random_crop(value, size=target, seed=seed) + y = self.evaluate(crop) + self.assertAllEqual(y.shape, target) + self.assertIn(tuple(y.ravel()), value_set) + + # TODO(b/162345082): stateless random op generates different random number + # with xla_gpu. Update tests such that there is a single ground truth result + # to test against. + def testRandomization(self): + with test_util.use_gpu(): + shape = [5, 4, 1] + size = np.prod(shape) + single = [1, 1, 1] + value = np.arange(size).reshape(shape) + iterations = 5 + num_samples = 5 + + # Test that the same result is returned given the same seed is provided + # for each round. + test_seed = (1, 2) + observations = [[] for _ in range(iterations)] + for observation in observations: + crop = random_ops.stateless_random_crop(value, single, seed=test_seed) + counts = np.zeros(size, dtype=np.int32) + for _ in range(num_samples): + y = self.evaluate(crop) + self.assertAllEqual(y.shape, single) + counts[y] += 1 + + observation.append(counts) + + for i in range(1, iterations): + self.assertAllEqual(observations[0], observations[i]) + + # Test that the same sequence of results are returned given the same + # sequence of seeds provided. + test_seeds = [ + tuple(map(lambda x, i=i: x + 1 * i, t)) + for (i, t) in enumerate((1, 2) for _ in range(iterations)) + ] + observations = [[] for _ in range(iterations)] + for observation in observations: + counts = np.zeros(size, dtype=np.int32) + for seed in test_seeds: + crop = random_ops.stateless_random_crop( + value, single, seed=seed) + y = self.evaluate(crop) + self.assertAllEqual(y.shape, single) + counts[y] += 1 + + observation.append(counts) + + for i in range(1, iterations): + self.assertAllEqual(observations[0], observations[i]) + + +if __name__ == "__main__": test.main() diff --git a/tensorflow/python/ops/random_ops.py b/tensorflow/python/ops/random_ops.py index 0bb4b78c29f..46a1e321093 100644 --- a/tensorflow/python/ops/random_ops.py +++ b/tensorflow/python/ops/random_ops.py @@ -29,6 +29,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_random_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import stateless_random_ops # go/tf-wildcard-import # pylint: disable=wildcard-import @@ -373,9 +374,6 @@ def random_crop(value, size, seed=None, name=None): Returns: A cropped tensor of the same rank as `value` and shape `size`. """ - # TODO(shlens): Implement edge case to guarantee output size dimensions. - # If size > value.shape, zero pad the result so that it always has shape - # exactly size. with ops.name_scope(name, "random_crop", [value, size]) as name: value = ops.convert_to_tensor(value, name="value") size = ops.convert_to_tensor(size, dtype=dtypes.int32, name="size") @@ -394,6 +392,59 @@ def random_crop(value, size, seed=None, name=None): return array_ops.slice(value, offset, size, name=name) +@tf_export("image.stateless_random_crop", v1=[]) +@dispatch.add_dispatch_support +def stateless_random_crop(value, size, seed, name=None): + """Randomly crops a tensor to a given size in a deterministic manner. + + Slices a shape `size` portion out of `value` at a uniformly chosen offset. + Requires `value.shape >= size`. + + If a dimension should not be cropped, pass the full size of that dimension. + For example, RGB images can be cropped with + `size = [crop_height, crop_width, 3]`. + + Guarantees the same results given the same `seed` independent of how many + times the function is called, and independent of global seed settings (e.g. + `tf.random.set_seed`). + + Usage Example: + + >>> image = [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]] + >>> seed = (1, 2) + >>> tf.image.stateless_random_crop(value=image, size=(1, 2, 3), seed=seed) + + + Args: + value: Input tensor to crop. + size: 1-D tensor with size the rank of `value`. + seed: A shape [2] Tensor, the seed to the random number generator. Must have + dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) + name: A name for this operation (optional). + + Returns: + A cropped tensor of the same rank as `value` and shape `size`. + """ + with ops.name_scope(name, "random_crop", [value, size]) as name: + value = ops.convert_to_tensor(value, name="value") + size = ops.convert_to_tensor(size, dtype=dtypes.int32, name="size") + shape = array_ops.shape(value) + check = control_flow_ops.Assert( + math_ops.reduce_all(shape >= size), + ["Need value.shape >= size, got ", shape, size], + summarize=1000) + shape = control_flow_ops.with_dependencies([check], shape) + limit = shape - size + 1 + offset = stateless_random_ops.stateless_random_uniform( + array_ops.shape(shape), + dtype=size.dtype, + maxval=size.dtype.max, + seed=seed) % limit + return array_ops.slice(value, offset, size, name=name) + + @tf_export(v1=["random.multinomial", "multinomial"]) @dispatch.add_dispatch_support @deprecation.deprecated( diff --git a/tensorflow/tools/api/golden/v2/tensorflow.image.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.image.pbtxt index 8bca192e1c1..fd3cf2988da 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.image.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.image.pbtxt @@ -240,6 +240,10 @@ tf_module { name: "stateless_random_contrast" argspec: "args=[\'image\', \'lower\', \'upper\', \'seed\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "stateless_random_crop" + argspec: "args=[\'value\', \'size\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "stateless_random_flip_left_right" argspec: "args=[\'image\', \'seed\'], varargs=None, keywords=None, defaults=None" From f3e8f1f3f1792796b2e369d6d3b69fbfcaa92968 Mon Sep 17 00:00:00 2001 From: Hye Soo Yang Date: Mon, 10 Aug 2020 20:23:11 -0700 Subject: [PATCH 2488/2522] Add `stateless_sample_distorted_bounding_box` op which is deterministic; it guarantees the same results independent of how many times the they are called, and independent of global seed settings. PiperOrigin-RevId: 325943656 Change-Id: Ia1c3e6f95862c175dcc55be672fd878a4130f3c4 --- RELEASE.md | 8 +- ..._StatelessSampleDistortedBoundingBox.pbtxt | 144 ++++++++++++++++++ ..._StatelessSampleDistortedBoundingBox.pbtxt | 4 + tensorflow/core/kernels/BUILD | 2 +- .../sample_distorted_bounding_box_op.cc | 90 ++++++++--- tensorflow/core/ops/image_ops.cc | 38 +++++ tensorflow/python/ops/image_ops_impl.py | 124 +++++++++++++++ tensorflow/python/ops/image_ops_test.py | 143 +++++++++++++++++ .../api/golden/v1/tensorflow.raw_ops.pbtxt | 4 + .../api/golden/v2/tensorflow.image.pbtxt | 4 + .../api/golden/v2/tensorflow.raw_ops.pbtxt | 4 + 11 files changed, 542 insertions(+), 23 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_StatelessSampleDistortedBoundingBox.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_StatelessSampleDistortedBoundingBox.pbtxt diff --git a/RELEASE.md b/RELEASE.md index 525db3cade8..430e1b83885 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -86,9 +86,11 @@ option. * `tf.image`: * Added deterministic `tf.image.stateless_random_*` functions for each - `tf.image.random_*` function. Given the same seed, the stateless functions - produce the same results independent of how many times the function is - called, and independent of global seed settings. + `tf.image.random_*` function. Added a new op + `stateless_sample_distorted_bounding_box` which is a determinstic + version of `sample_distorted_bounding_box` op. Given the same seed, these + stateless functions/ops produce the same results independent of how many + times the function is called, and independent of global seed settings. * `tf.distribute`: * * `tf.keras`: diff --git a/tensorflow/core/api_def/base_api/api_def_StatelessSampleDistortedBoundingBox.pbtxt b/tensorflow/core/api_def/base_api/api_def_StatelessSampleDistortedBoundingBox.pbtxt new file mode 100644 index 00000000000..2c5e32a0c1e --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_StatelessSampleDistortedBoundingBox.pbtxt @@ -0,0 +1,144 @@ +op { + graph_op_name: "StatelessSampleDistortedBoundingBox" + in_arg { + name: "image_size" + description: <>> image = np.array([[[1], [2], [3]], [[4], [5], [6]], [[7], [8], [9]]]) +>>> bbox = tf.constant( +... [0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]) +>>> seed = (1, 2) +>>> # Generate a single distorted bounding box. +>>> bbox_begin, bbox_size, bbox_draw = ( +... tf.image.stateless_sample_distorted_bounding_box( +... tf.shape(image), bounding_boxes=bbox, seed=seed)) +>>> # Employ the bounding box to distort the image. +>>> tf.slice(image, bbox_begin, bbox_size) + +>>> # Draw the bounding box in an image summary. +>>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) +>>> tf.image.draw_bounding_boxes( +... tf.expand_dims(tf.cast(image, tf.float32),0), bbox_draw, colors) + + +Note that if no bounding box information is available, setting +`use_image_if_no_bounding_boxes = true` will assume there is a single implicit +bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is +false and no bounding boxes are supplied, an error is raised. +END +} diff --git a/tensorflow/core/api_def/python_api/api_def_StatelessSampleDistortedBoundingBox.pbtxt b/tensorflow/core/api_def/python_api/api_def_StatelessSampleDistortedBoundingBox.pbtxt new file mode 100644 index 00000000000..2ee453ee2f5 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_StatelessSampleDistortedBoundingBox.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "StatelessSampleDistortedBoundingBox" + visibility: HIDDEN +} diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index ccb12d9b09d..bfb192023a1 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -3166,7 +3166,7 @@ tf_kernel_library( tf_kernel_library( name = "sample_distorted_bounding_box_op", prefix = "sample_distorted_bounding_box_op", - deps = IMAGE_DEPS, + deps = IMAGE_DEPS + [":stateless_random_ops"], ) tf_kernel_library( diff --git a/tensorflow/core/kernels/sample_distorted_bounding_box_op.cc b/tensorflow/core/kernels/sample_distorted_bounding_box_op.cc index 2936856ec29..3b1cc3d27f0 100644 --- a/tensorflow/core/kernels/sample_distorted_bounding_box_op.cc +++ b/tensorflow/core/kernels/sample_distorted_bounding_box_op.cc @@ -14,12 +14,16 @@ limitations under the License. ==============================================================================*/ // See docs in ../ops/image_ops.cc. #include + #include + #include "tensorflow/core/framework/bounds_check.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/stateless_random_ops.h" +#include "tensorflow/core/lib/random/philox_random.h" #include "tensorflow/core/lib/random/simple_philox.h" #include "tensorflow/core/util/guarded_philox_random.h" @@ -201,12 +205,10 @@ bool GenerateRandomCrop(int original_width, int original_height, } // namespace template -class SampleDistortedBoundingBoxV2Op : public OpKernel { +class SampleDistortedBoundingBoxBaseOp : public OpKernel { public: - explicit SampleDistortedBoundingBoxV2Op(OpKernelConstruction* context) + explicit SampleDistortedBoundingBoxBaseOp(OpKernelConstruction* context) : OpKernel(context) { - OP_REQUIRES_OK(context, generator_.Init(context)); - if (context->num_inputs() == 2) { OP_REQUIRES_OK(context, context->GetAttr("min_object_covered", &min_object_covered_)); @@ -252,7 +254,7 @@ class SampleDistortedBoundingBoxV2Op : public OpKernel { max_attempts_)); } - void Compute(OpKernelContext* context) override { + void DoCompute(OpKernelContext* context, const random::PhiloxRandom& rng) { const Tensor& image_size = context->input(0); OP_REQUIRES(context, image_size.dims() == 1, @@ -287,7 +289,11 @@ class SampleDistortedBoundingBoxV2Op : public OpKernel { input_boxes.shape().DebugString())); float min_object_covered_val = 0.0; - if (context->num_inputs() == 3) { + // `SampleDistortedBoundingBox` op accepts 2 inputs and has + // `min_object_covered` as an attribute (handled in the constructor). + // `SampleDistortedBoundingBoxV2` and `StatelessSampleDistortedBoundingBox` + // ops accept 3+ inputs, including `min_object_covered`. + if (context->num_inputs() >= 3) { const Tensor& min_object_covered = context->input(2); OP_REQUIRES( @@ -342,8 +348,8 @@ class SampleDistortedBoundingBoxV2Op : public OpKernel { const float min_sample_aspect_ratio = aspect_ratio_range_[0]; const float max_sample_aspect_ratio = aspect_ratio_range_[1]; - auto local_gen = generator_.ReserveSamples32(4 * max_attempts_); - random::SimplePhilox random(&local_gen); + auto local_rng = rng; + random::SimplePhilox random(&local_rng); Rectangle crop_rect; bool sample_generated = false; @@ -420,8 +426,7 @@ class SampleDistortedBoundingBoxV2Op : public OpKernel { size_data(2) = T(-1); } - private: - GuardedPhiloxRandom generator_; + protected: int32 max_attempts_; std::vector area_range_; std::vector aspect_ratio_range_; @@ -429,15 +434,62 @@ class SampleDistortedBoundingBoxV2Op : public OpKernel { bool use_image_if_no_bounding_boxes_; }; -#define REGISTER_KERNELS(type) \ - REGISTER_KERNEL_BUILDER(Name("SampleDistortedBoundingBox") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("T"), \ - SampleDistortedBoundingBoxV2Op) \ - REGISTER_KERNEL_BUILDER(Name("SampleDistortedBoundingBoxV2") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("T"), \ - SampleDistortedBoundingBoxV2Op) +template +class StatefulSampleDistortedBoundingBoxOp + : public SampleDistortedBoundingBoxBaseOp { + public: + explicit StatefulSampleDistortedBoundingBoxOp(OpKernelConstruction* context) + : SampleDistortedBoundingBoxBaseOp(context) { + OP_REQUIRES_OK(context, generator_.Init(context)); + } + + void Compute(OpKernelContext* context) override { + // Need to reserve samples since `generator_` is shared. + this->DoCompute(context, + generator_.ReserveSamples32(4 * this->max_attempts_)); + } + + private: + GuardedPhiloxRandom generator_; +}; + +template +class StatelessSampleDistortedBoundingBoxOp + : public SampleDistortedBoundingBoxBaseOp { + public: + explicit StatelessSampleDistortedBoundingBoxOp(OpKernelConstruction* context) + : SampleDistortedBoundingBoxBaseOp(context) {} + + void Compute(OpKernelContext* context) override { + const Tensor& seed_t = context->input(3); + OP_REQUIRES(context, seed_t.dims() == 1 && seed_t.dim_size(0) == 2, + errors::InvalidArgument("seed must have shape [2], not ", + seed_t.shape().DebugString())); + + // Create and initialize stateless random number generator (rng). + // There is no need to `Skip` (or reserve) samples since the scope of this + // rng is local. + random::PhiloxRandom::Key key; + random::PhiloxRandom::ResultType counter; + OP_REQUIRES_OK(context, GenerateKey(seed_t, &key, &counter)); + + this->DoCompute(context, random::PhiloxRandom(counter, key)); + } +}; + +#define REGISTER_KERNELS(type) \ + REGISTER_KERNEL_BUILDER(Name("SampleDistortedBoundingBox") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T"), \ + StatefulSampleDistortedBoundingBoxOp) \ + REGISTER_KERNEL_BUILDER(Name("SampleDistortedBoundingBoxV2") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T"), \ + StatefulSampleDistortedBoundingBoxOp) \ + REGISTER_KERNEL_BUILDER(Name("StatelessSampleDistortedBoundingBox") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T"), \ + StatelessSampleDistortedBoundingBoxOp) TF_CALL_INTEGRAL_TYPES(REGISTER_KERNELS); #undef REGISTER_KERNELS diff --git a/tensorflow/core/ops/image_ops.cc b/tensorflow/core/ops/image_ops.cc index 43ee65c4ab4..8dfc67f22d3 100644 --- a/tensorflow/core/ops/image_ops.cc +++ b/tensorflow/core/ops/image_ops.cc @@ -758,6 +758,44 @@ REGISTER_OP("SampleDistortedBoundingBoxV2") return Status::OK(); }); +REGISTER_OP("StatelessSampleDistortedBoundingBox") + .Input("image_size: T") + .Input("bounding_boxes: float") + .Input("min_object_covered: float") + .Input("seed: Tseed") + .Output("begin: T") + .Output("size: T") + .Output("bboxes: float") + .Attr("T: {uint8, int8, int16, int32, int64}") + .Attr("Tseed: {int32, int64}") + .Attr("aspect_ratio_range: list(float) = [0.75, 1.33]") + .Attr("area_range: list(float) = [0.05, 1.0]") + .Attr("max_attempts: int = 100") + .Attr("use_image_if_no_bounding_boxes: bool = false") + .SetShapeFn([](InferenceContext* c) { + // Get inputs and validate ranks. + ShapeHandle image_size; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &image_size)); + ShapeHandle bounding_boxes; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 3, &bounding_boxes)); + ShapeHandle min_object_covered; + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &min_object_covered)); + ShapeHandle seed; + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 1, &seed)); + // image_size: 1-D with [height, width, channels] + // bounding_boxes: 3-D with shape [batch, N, 4] + DimensionHandle unused; + TF_RETURN_IF_ERROR(c->WithValue(c->Dim(image_size, 0), 3, &unused)); + TF_RETURN_IF_ERROR(c->WithValue(c->Dim(bounding_boxes, 2), 4, &unused)); + TF_RETURN_IF_ERROR(c->WithValue(c->Dim(seed, 0), 2, &unused)); + + c->set_output(0, c->Vector(3)); + c->set_output(1, c->Vector(3)); + c->set_output(2, c->MakeShape({1, 1, 4})); + + return Status::OK(); + }); + // -------------------------------------------------------------------------- // glimpse = extract_glimpse(input, size, offsets) extract the glimpse diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 8d542b4eaaa..e728da34117 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -3256,6 +3256,130 @@ def sample_distorted_bounding_box_v2(image_size, name=name) +@tf_export('image.stateless_sample_distorted_bounding_box', v1=[]) +@dispatch.add_dispatch_support +def stateless_sample_distorted_bounding_box(image_size, + bounding_boxes, + seed, + min_object_covered=0.1, + aspect_ratio_range=None, + area_range=None, + max_attempts=None, + use_image_if_no_bounding_boxes=None, + name=None): + """Generate a randomly distorted bounding box for an image deterministically. + + Bounding box annotations are often supplied in addition to ground-truth labels + in image recognition or object localization tasks. A common technique for + training such a system is to randomly distort an image while preserving + its content, i.e. *data augmentation*. This Op, given the same `seed`, + deterministically outputs a randomly distorted localization of an object, i.e. + bounding box, given an `image_size`, `bounding_boxes` and a series of + constraints. + + The output of this Op is a single bounding box that may be used to crop the + original image. The output is returned as 3 tensors: `begin`, `size` and + `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the + image. The latter may be supplied to `tf.image.draw_bounding_boxes` to + visualize what the bounding box looks like. + + Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. + The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width + and the height of the underlying image. + + The output of this Op is guaranteed to be the same given the same `seed` and + is independent of how many times the function is called, and independent of + global seed settings (e.g. `tf.random.set_seed`). + + Example usage: + + >>> image = np.array([[[1], [2], [3]], [[4], [5], [6]], [[7], [8], [9]]]) + >>> bbox = tf.constant( + ... [0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]) + >>> seed = (1, 2) + >>> # Generate a single distorted bounding box. + >>> bbox_begin, bbox_size, bbox_draw = ( + ... tf.image.stateless_sample_distorted_bounding_box( + ... tf.shape(image), bounding_boxes=bbox, seed=seed)) + >>> # Employ the bounding box to distort the image. + >>> tf.slice(image, bbox_begin, bbox_size) + + >>> # Draw the bounding box in an image summary. + >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) + >>> tf.image.draw_bounding_boxes( + ... tf.expand_dims(tf.cast(image, tf.float32),0), bbox_draw, colors) + + + Note that if no bounding box information is available, setting + `use_image_if_no_bounding_boxes = true` will assume there is a single implicit + bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is + false and no bounding boxes are supplied, an error is raised. + + Args: + image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`, + `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`. + bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]` + describing the N bounding boxes associated with the image. + seed: A shape [2] Tensor, the seed to the random number generator. Must have + dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) + min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The + cropped area of the image must contain at least this fraction of any + bounding box supplied. The value of this parameter should be non-negative. + In the case of 0, the cropped area does not need to overlap any of the + bounding boxes supplied. + aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75, + 1.33]`. The cropped area of the image must have an aspect `ratio = width / + height` within this range. + area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The + cropped area of the image must contain a fraction of the supplied image + within this range. + max_attempts: An optional `int`. Defaults to `100`. Number of attempts at + generating a cropped region of the image of the specified constraints. + After `max_attempts` failures, return the entire image. + use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`. + Controls behavior if no bounding boxes supplied. If true, assume an + implicit bounding box covering the whole input. If false, raise an error. + name: A name for the operation (optional). + + Returns: + A tuple of `Tensor` objects (begin, size, bboxes). + + begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing + `[offset_height, offset_width, 0]`. Provide as input to + `tf.slice`. + size: A `Tensor`. Has the same type as `image_size`. 1-D, containing + `[target_height, target_width, -1]`. Provide as input to + `tf.slice`. + bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing + the distorted bounding box. + Provide as input to `tf.image.draw_bounding_boxes`. + """ + with ops.name_scope(name, 'stateless_sample_distorted_bounding_box'): + return gen_image_ops.stateless_sample_distorted_bounding_box( + image_size=image_size, + bounding_boxes=bounding_boxes, + seed=seed, + min_object_covered=min_object_covered, + aspect_ratio_range=aspect_ratio_range, + area_range=area_range, + max_attempts=max_attempts, + use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes, + name=name) + + @tf_export(v1=['image.sample_distorted_bounding_box']) @dispatch.add_dispatch_support @deprecation.deprecated( diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index 1adece3474b..210b6c6e65d 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -2418,6 +2418,149 @@ class SelectDistortedCropBoxTest(test_util.TensorFlowTestCase): end = self.evaluate(end) bbox_for_drawing = self.evaluate(bbox_for_drawing) + def _testStatelessSampleDistortedBoundingBox(self, image, bounding_box, + min_object_covered, + aspect_ratio_range, area_range): + with test_util.use_gpu(): + original_area = float(np.prod(image.shape)) + bounding_box_area = float((bounding_box[3] - bounding_box[1]) * + (bounding_box[2] - bounding_box[0])) + + image_size_np = np.array(image.shape, dtype=np.int32) + bounding_box_np = ( + np.array(bounding_box, dtype=np.float32).reshape([1, 1, 4])) + + iterations = 2 + test_seeds = [(1, 2), (3, 4), (5, 6)] + + for seed in test_seeds: + aspect_ratios = [] + area_ratios = [] + fraction_object_covered = [] + for _ in range(iterations): + image_tf = constant_op.constant(image, shape=image.shape) + image_size_tf = constant_op.constant( + image_size_np, shape=image_size_np.shape) + bounding_box_tf = constant_op.constant(bounding_box_np, + dtype=dtypes.float32, + shape=bounding_box_np.shape) + begin, size, _ = image_ops.stateless_sample_distorted_bounding_box( + image_size=image_size_tf, + bounding_boxes=bounding_box_tf, + seed=seed, + min_object_covered=min_object_covered, + aspect_ratio_range=aspect_ratio_range, + area_range=area_range) + y = array_ops.strided_slice(image_tf, begin, begin + size) + y_tf = self.evaluate(y) + crop_height = y_tf.shape[0] + crop_width = y_tf.shape[1] + aspect_ratio = float(crop_width) / float(crop_height) + area = float(crop_width * crop_height) + aspect_ratios.append(aspect_ratio) + area_ratio = area / original_area + area_ratios.append(area_ratio) + fraction_object_covered.append( + float(np.sum(y_tf)) / bounding_box_area) + + # Check that `area_ratio` is within valid range. + self.assertLessEqual(area_ratio, area_range[1]) + self.assertGreaterEqual(area_ratio, area_range[0]) + + # Each array should consist of one value just repeated `iteration` times + # because the same seed is used. + self.assertEqual(len(set(aspect_ratios)), 1) + self.assertEqual(len(set(area_ratios)), 1) + self.assertEqual(len(set(fraction_object_covered)), 1) + + # TODO(b/162345082): stateless random op generates different random number + # with xla_gpu. Update tests such that there is a single ground truth result + # to test against. + def testWholeImageBoundingBoxStateless(self): + height = 40 + width = 50 + image_size = [height, width, 1] + bounding_box = [0.0, 0.0, 1.0, 1.0] + image = np.arange( + 0, np.prod(image_size), dtype=np.int32).reshape(image_size) + for min_obj_covered in [0.1, constant_op.constant(0.1)]: + self._testStatelessSampleDistortedBoundingBox( + image, + bounding_box, + min_object_covered=min_obj_covered, + aspect_ratio_range=(0.75, 1.33), + area_range=(0.05, 1.0)) + + # TODO(b/162345082): stateless random op generates different random number + # with xla_gpu. Update tests such that there is a single ground truth result + # to test against. + def testWithBoundingBoxStateless(self): + height = 40 + width = 50 + x_shape = [height, width, 1] + image = np.zeros(x_shape, dtype=np.int32) + + xmin = 2 + ymin = 3 + xmax = 12 + ymax = 13 + for x in np.arange(xmin, xmax + 1, 1): + for y in np.arange(ymin, ymax + 1, 1): + image[x, y] = 1 + + # Bounding box is specified as (ymin, xmin, ymax, xmax) in + # relative coordinates. + bounding_box = (float(ymin) / height, float(xmin) / width, + float(ymax) / height, float(xmax) / width) + + # Test both scalar and tensor input for `min_object_covered`. + for min_obj_covered in [0.1, constant_op.constant(0.1)]: + self._testStatelessSampleDistortedBoundingBox( + image, + bounding_box=bounding_box, + min_object_covered=min_obj_covered, + aspect_ratio_range=(0.75, 1.33), + area_range=(0.05, 1.0)) + + def testSampleDistortedBoundingBoxShapeStateless(self): + with test_util.use_gpu(): + image_size = constant_op.constant( + [40, 50, 1], shape=[3], dtype=dtypes.int32) + bounding_box = constant_op.constant( + [[[0.0, 0.0, 1.0, 1.0]]], + shape=[1, 1, 4], + dtype=dtypes.float32, + ) + + bbox_func = functools.partial( + image_ops.stateless_sample_distorted_bounding_box, + image_size=image_size, + bounding_boxes=bounding_box, + min_object_covered=0.1, + aspect_ratio_range=(0.75, 1.33), + area_range=(0.05, 1.0)) + + # Check error is raised with wrong seed shapes. + for seed in [1, (1, 2, 3)]: + with self.assertRaises((ValueError, errors.InvalidArgumentError)): + begin, end, bbox_for_drawing = bbox_func(seed=seed) + + test_seed = (1, 2) + begin, end, bbox_for_drawing = bbox_func(seed=test_seed) + + # Test that the shapes are correct. + self.assertAllEqual([3], begin.get_shape().as_list()) + self.assertAllEqual([3], end.get_shape().as_list()) + self.assertAllEqual([1, 1, 4], bbox_for_drawing.get_shape().as_list()) + + # Actual run to make sure shape is correct inside Compute(). + begin = self.evaluate(begin) + end = self.evaluate(end) + bbox_for_drawing = self.evaluate(bbox_for_drawing) + self.assertAllEqual([3], begin.shape) + self.assertAllEqual([3], end.shape) + self.assertAllEqual([1, 1, 4], bbox_for_drawing.shape) + class ResizeImagesV2Test(test_util.TensorFlowTestCase): diff --git a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt index 1d74f47508a..0a2843431f2 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt @@ -4552,6 +4552,10 @@ tf_module { name: "StatelessRandomUniformInt" argspec: "args=[\'shape\', \'seed\', \'minval\', \'maxval\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "StatelessSampleDistortedBoundingBox" + argspec: "args=[\'image_size\', \'bounding_boxes\', \'min_object_covered\', \'seed\', \'aspect_ratio_range\', \'area_range\', \'max_attempts\', \'use_image_if_no_bounding_boxes\', \'name\'], varargs=None, keywords=None, defaults=[\'[0.75, 1.33]\', \'[0.05, 1]\', \'100\', \'False\', \'None\'], " + } member_method { name: "StatelessTruncatedNormal" argspec: "args=[\'shape\', \'seed\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\"\", \'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.image.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.image.pbtxt index fd3cf2988da..941d811f435 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.image.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.image.pbtxt @@ -264,6 +264,10 @@ tf_module { name: "stateless_random_saturation" argspec: "args=[\'image\', \'lower\', \'upper\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "stateless_sample_distorted_bounding_box" + argspec: "args=[\'image_size\', \'bounding_boxes\', \'seed\', \'min_object_covered\', \'aspect_ratio_range\', \'area_range\', \'max_attempts\', \'use_image_if_no_bounding_boxes\', \'name\'], varargs=None, keywords=None, defaults=[\'0.1\', \'None\', \'None\', \'None\', \'None\', \'None\'], " + } member_method { name: "total_variation" argspec: "args=[\'images\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt index 1d74f47508a..0a2843431f2 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt @@ -4552,6 +4552,10 @@ tf_module { name: "StatelessRandomUniformInt" argspec: "args=[\'shape\', \'seed\', \'minval\', \'maxval\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "StatelessSampleDistortedBoundingBox" + argspec: "args=[\'image_size\', \'bounding_boxes\', \'min_object_covered\', \'seed\', \'aspect_ratio_range\', \'area_range\', \'max_attempts\', \'use_image_if_no_bounding_boxes\', \'name\'], varargs=None, keywords=None, defaults=[\'[0.75, 1.33]\', \'[0.05, 1]\', \'100\', \'False\', \'None\'], " + } member_method { name: "StatelessTruncatedNormal" argspec: "args=[\'shape\', \'seed\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\"\", \'None\'], " From 4b901e2a7ea0b849ad1d1cea311cd131bc089ebe Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Mon, 10 Aug 2020 20:33:26 -0700 Subject: [PATCH 2489/2522] Disable the failed array_elementwise_ops_test for OSS build PiperOrigin-RevId: 325944589 Change-Id: Id94df83d6f35acbe912f09bae9f36763f1b90290 --- tensorflow/compiler/xla/tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 17444c042e7..3dac381ae7d 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -728,6 +728,7 @@ xla_test( name = "array_elementwise_ops_test", srcs = ["array_elementwise_ops_test.cc"], shard_count = 25, + tags = ["no_oss"], # b/163416869 deps = [ ":test_macros_header", "//tensorflow/compiler/xla:array2d", From 3812bce0ef02a3fc23f2beb55e633da74b2958d3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 10 Aug 2020 21:18:02 -0700 Subject: [PATCH 2490/2522] Update ops-related pbtxt files. PiperOrigin-RevId: 325949809 Change-Id: I7ce980f2d0b6a8f392d05b3aa54777bfea7c9d3f --- .../StatelessSampleDistortedBoundingBox.pbtxt | 88 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 88 +++++++++++++++++++ 2 files changed, 176 insertions(+) create mode 100644 tensorflow/core/ops/compat/ops_history_v2/StatelessSampleDistortedBoundingBox.pbtxt diff --git a/tensorflow/core/ops/compat/ops_history_v2/StatelessSampleDistortedBoundingBox.pbtxt b/tensorflow/core/ops/compat/ops_history_v2/StatelessSampleDistortedBoundingBox.pbtxt new file mode 100644 index 00000000000..6858a110cf4 --- /dev/null +++ b/tensorflow/core/ops/compat/ops_history_v2/StatelessSampleDistortedBoundingBox.pbtxt @@ -0,0 +1,88 @@ +op { + name: "StatelessSampleDistortedBoundingBox" + input_arg { + name: "image_size" + type_attr: "T" + } + input_arg { + name: "bounding_boxes" + type: DT_FLOAT + } + input_arg { + name: "min_object_covered" + type: DT_FLOAT + } + input_arg { + name: "seed" + type_attr: "Tseed" + } + output_arg { + name: "begin" + type_attr: "T" + } + output_arg { + name: "size" + type_attr: "T" + } + output_arg { + name: "bboxes" + type: DT_FLOAT + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_UINT8 + type: DT_INT8 + type: DT_INT16 + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "Tseed" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "aspect_ratio_range" + type: "list(float)" + default_value { + list { + f: 0.75 + f: 1.33 + } + } + } + attr { + name: "area_range" + type: "list(float)" + default_value { + list { + f: 0.05 + f: 1 + } + } + } + attr { + name: "max_attempts" + type: "int" + default_value { + i: 100 + } + } + attr { + name: "use_image_if_no_bounding_boxes" + type: "bool" + default_value { + b: false + } + } +} diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 7e138923a8d..7610619019d 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -49730,6 +49730,94 @@ op { } } } +op { + name: "StatelessSampleDistortedBoundingBox" + input_arg { + name: "image_size" + type_attr: "T" + } + input_arg { + name: "bounding_boxes" + type: DT_FLOAT + } + input_arg { + name: "min_object_covered" + type: DT_FLOAT + } + input_arg { + name: "seed" + type_attr: "Tseed" + } + output_arg { + name: "begin" + type_attr: "T" + } + output_arg { + name: "size" + type_attr: "T" + } + output_arg { + name: "bboxes" + type: DT_FLOAT + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_UINT8 + type: DT_INT8 + type: DT_INT16 + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "Tseed" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "aspect_ratio_range" + type: "list(float)" + default_value { + list { + f: 0.75 + f: 1.33 + } + } + } + attr { + name: "area_range" + type: "list(float)" + default_value { + list { + f: 0.05 + f: 1 + } + } + } + attr { + name: "max_attempts" + type: "int" + default_value { + i: 100 + } + } + attr { + name: "use_image_if_no_bounding_boxes" + type: "bool" + default_value { + b: false + } + } +} op { name: "StatelessTruncatedNormal" input_arg { From 1731f0719a3c11e1f5122d8f81534a580b298fe0 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 10 Aug 2020 21:24:51 -0700 Subject: [PATCH 2491/2522] Provide an implementation for LinearizeToBuffers in TpuTransferManager PiperOrigin-RevId: 325950544 Change-Id: I600856e01f357bcfe3c8f2071ebc42f18a809678 --- tensorflow/stream_executor/tpu/BUILD | 1 + .../tpu/tpu_transfer_manager.cc | 28 +++++++++++++++++++ .../tpu/tpu_transfer_manager.h | 4 +-- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/tensorflow/stream_executor/tpu/BUILD b/tensorflow/stream_executor/tpu/BUILD index 207984e0c89..93998a4aefc 100644 --- a/tensorflow/stream_executor/tpu/BUILD +++ b/tensorflow/stream_executor/tpu/BUILD @@ -230,6 +230,7 @@ cc_library( hdrs = ["tpu_transfer_manager.h"], deps = [ ":c_api_conversions", + ":noncopyable_buffer", ":proto_helper", ":status_helper", ":tpu_executor_base", diff --git a/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc b/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc index a7288003f8d..9b268a6d8c9 100644 --- a/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc +++ b/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/tpu/tpu_api.h" #include "tensorflow/stream_executor/device_memory.h" #include "tensorflow/stream_executor/tpu/c_api_conversions.h" +#include "tensorflow/stream_executor/tpu/noncopyable_buffer.h" #include "tensorflow/stream_executor/tpu/proto_helper.h" #include "tensorflow/stream_executor/tpu/status_helper.h" #include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" @@ -168,4 +169,31 @@ Status TpuTransferManager::WriteSingleTupleIndexTable( return status.status(); } +Status TpuTransferManager::LinearizeToBuffers( + const xla::LiteralSlice& literal, + std::deque* buffers) { + XLA_Literal c_literal; + ApiConverter::ToC(literal, &c_literal); + + char** buffers_array; + int64_t* buffers_size; + int64_t buffers_array_size; + StatusHelper status; + + tpu::ExecutorApiFn()->TpuTransferManager_LinearizeToBuffersFn( + manager_, &c_literal, &buffers_array, &buffers_size, &buffers_array_size, + status.c_status); + + for (int64_t i = 0; i < buffers_array_size; ++i) { + tpu::NoncopyableBuffer buf(buffers_size[i]); + memcpy(buf.mutable_data().data(), buffers_array[i], buffers_size[i]); + buffers->push_back(std::move(buf)); + } + + tpu::ExecutorApiFn()->TpuTransferManager_FreeBuffersFn( + buffers_array, buffers_size, buffers_array_size); + + return status.status(); +} + } // namespace tensorflow diff --git a/tensorflow/stream_executor/tpu/tpu_transfer_manager.h b/tensorflow/stream_executor/tpu/tpu_transfer_manager.h index e758c702204..558a5106d86 100644 --- a/tensorflow/stream_executor/tpu/tpu_transfer_manager.h +++ b/tensorflow/stream_executor/tpu/tpu_transfer_manager.h @@ -83,9 +83,7 @@ class TpuTransferManager : public xla::TpuTransferManagerInterface { Status LinearizeToBuffers( const xla::LiteralSlice& literal, - std::deque* buffers) override { - LOG(FATAL) << "Not yet implemented."; - } + std::deque* buffers) override; private: XLA_TransferManager* manager_; From acbfab2b0191ec7a845eb967b39f1cb08d3a3c3a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 10 Aug 2020 21:47:22 -0700 Subject: [PATCH 2492/2522] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 325952753 Change-Id: Ib8753517fc99d292c911071948e591a8ba0d4b05 --- tensorflow/go/op/wrappers.go | 147 +++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index cd6284aab05..4d39ab20deb 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -12077,6 +12077,153 @@ func ExtractGlimpse(scope *Scope, input tf.Output, size tf.Output, offsets tf.Ou return op.Output(0) } +// StatelessSampleDistortedBoundingBoxAttr is an optional argument to StatelessSampleDistortedBoundingBox. +type StatelessSampleDistortedBoundingBoxAttr func(optionalAttr) + +// StatelessSampleDistortedBoundingBoxAspectRatioRange sets the optional aspect_ratio_range attribute to value. +// +// value: The cropped area of the image must have an aspect ratio = +// width / height within this range. +// If not specified, defaults to +func StatelessSampleDistortedBoundingBoxAspectRatioRange(value []float32) StatelessSampleDistortedBoundingBoxAttr { + return func(m optionalAttr) { + m["aspect_ratio_range"] = value + } +} + +// StatelessSampleDistortedBoundingBoxAreaRange sets the optional area_range attribute to value. +// +// value: The cropped area of the image must contain a fraction of the +// supplied image within this range. +// If not specified, defaults to +func StatelessSampleDistortedBoundingBoxAreaRange(value []float32) StatelessSampleDistortedBoundingBoxAttr { + return func(m optionalAttr) { + m["area_range"] = value + } +} + +// StatelessSampleDistortedBoundingBoxMaxAttempts sets the optional max_attempts attribute to value. +// +// value: Number of attempts at generating a cropped region of the image +// of the specified constraints. After `max_attempts` failures, return the entire +// image. +// If not specified, defaults to 100 +func StatelessSampleDistortedBoundingBoxMaxAttempts(value int64) StatelessSampleDistortedBoundingBoxAttr { + return func(m optionalAttr) { + m["max_attempts"] = value + } +} + +// StatelessSampleDistortedBoundingBoxUseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value. +// +// value: Controls behavior if no bounding boxes supplied. +// If true, assume an implicit bounding box covering the whole input. If false, +// raise an error. +// If not specified, defaults to false +func StatelessSampleDistortedBoundingBoxUseImageIfNoBoundingBoxes(value bool) StatelessSampleDistortedBoundingBoxAttr { + return func(m optionalAttr) { + m["use_image_if_no_bounding_boxes"] = value + } +} + +// Generate a randomly distorted bounding box for an image deterministically. +// +// Bounding box annotations are often supplied in addition to ground-truth labels +// in image recognition or object localization tasks. A common technique for +// training such a system is to randomly distort an image while preserving its +// content, i.e. *data augmentation*. This Op, given the same `seed`, +// deterministically outputs a randomly distorted localization of an object, i.e. +// bounding box, given an `image_size`, `bounding_boxes` and a series of +// constraints. +// +// The output of this Op is a single bounding box that may be used to crop the +// original image. The output is returned as 3 tensors: `begin`, `size` and +// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the +// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize +// what the bounding box looks like. +// +// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The +// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and +// the height of the underlying image. +// +// The output of this Op is guaranteed to be the same given the same `seed` and is +// independent of how many times the function is called, and independent of global +// seed settings (e.g. `tf.random.set_seed`). +// +// Example usage: +// +// >>> image = np.array([[[1], [2], [3]], [[4], [5], [6]], [[7], [8], [9]]]) +// >>> bbox = tf.constant( +// ... [0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]) +// >>> seed = (1, 2) +// >>> # Generate a single distorted bounding box. +// >>> bbox_begin, bbox_size, bbox_draw = ( +// ... tf.image.stateless_sample_distorted_bounding_box( +// ... tf.shape(image), bounding_boxes=bbox, seed=seed)) +// >>> # Employ the bounding box to distort the image. +// >>> tf.slice(image, bbox_begin, bbox_size) +// +// >>> # Draw the bounding box in an image summary. +// >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) +// >>> tf.image.draw_bounding_boxes( +// ... tf.expand_dims(tf.cast(image, tf.float32),0), bbox_draw, colors) +// +// +// Note that if no bounding box information is available, setting +// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit +// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is +// false and no bounding boxes are supplied, an error is raised. +// +// Arguments: +// image_size: 1-D, containing `[height, width, channels]`. +// bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes +// associated with the image. +// min_object_covered: The cropped area of the image must contain at least this +// fraction of any bounding box supplied. The value of this parameter should be +// non-negative. In the case of 0, the cropped area does not need to overlap +// any of the bounding boxes supplied. +// seed: 1-D with shape `[2]`. The seed to the random number generator. Must have dtype +// `int32` or `int64`. (When using XLA, only `int32` is allowed.) +// +// Returns: +// begin: 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to +// `tf.slice`. +// size: 1-D, containing `[target_height, target_width, -1]`. Provide as input to +// `tf.slice`. +// bboxes: 3-D with shape `[1, 1, 4]` containing the distorted bounding box. +// Provide as input to `tf.image.draw_bounding_boxes`. +func StatelessSampleDistortedBoundingBox(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, min_object_covered tf.Output, seed tf.Output, optional ...StatelessSampleDistortedBoundingBoxAttr) (begin tf.Output, size tf.Output, bboxes tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "StatelessSampleDistortedBoundingBox", + Input: []tf.Input{ + image_size, bounding_boxes, min_object_covered, seed, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + // SampleDistortedBoundingBoxAttr is an optional argument to SampleDistortedBoundingBox. type SampleDistortedBoundingBoxAttr func(optionalAttr) From 403258a15c64b0a6fb8d0d40996702bd304446af Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Mon, 10 Aug 2020 22:26:41 -0700 Subject: [PATCH 2493/2522] [TF2XLA] Do not enable XLA devices by default XLA:CPU and XLA:GPU devices are a perennial source of confusion and hard-to-trace bugs due to: 1) Unexpected appearence in the device list 2) Soft placement on these devices Unfortunately, we can't permanently remove them because of a) Test coverage we need for tf2xla bridge when running on XLA devices in compiler/tests b) XRT using XLA:CPU/XLA:GPU devices to insert their hooks Moreover, long term XLA:TPU device is still useful so we can't remove the corresponding codepath either. This patch disables registration of XLA:CPU and XLA:GPU devices by default, unless the option `tf_xla_enable_xla_devices` is set to `True` before the device initialization. This option can be set using one of these ways: i) When running the process: run inside the environment `TF_XLA_FLAGS=--tf_xla_enable_xla_devices` ii) In Python: call `context.context().enable_xla_devices()` (should not be called outside a very specific number of tests) iii) In C++: set `GetXlaDeviceFlags()->tf_xla_enable_xla_devices` to `true` TBD: I. Good testing story: what is a good place to check that XLA devices are not registered? II. Removing special casing XLA devices inside TF: do we still need it for those tests which do actually use XLA devices? Probably most of it can be still removed. PiperOrigin-RevId: 325956767 Change-Id: Ic1bb050d1fe10b9af5a32071b8da9e39d35d9104 --- RELEASE.md | 3 + tensorflow/compiler/jit/BUILD | 1 + tensorflow/compiler/jit/flags.cc | 2 +- tensorflow/compiler/jit/kernels/xla_ops.cc | 2 +- .../jit/mark_for_compilation_pass_test.cc | 5 ++ .../jit/partially_decluster_pass_test.cc | 31 ------- .../compiler/jit/xla_compile_on_demand_op.cc | 53 ++++++----- tensorflow/compiler/jit/xla_device.cc | 29 +++--- tensorflow/compiler/jit/xla_device.h | 2 + .../compiler/jit/xla_ops_regular_devices.cc | 89 +++++++++++++++++++ tensorflow/compiler/jit/xla_platform_info.cc | 7 +- tensorflow/compiler/jit/xla_platform_info.h | 2 +- tensorflow/compiler/tests/BUILD | 1 + .../tests/unary_ops_composition_test.cc | 6 ++ .../compiler/tests/xla_device_gpu_test.py | 5 ++ tensorflow/compiler/tests/xla_test.py | 2 + tensorflow/compiler/tf2xla/BUILD | 2 + .../compiler/tf2xla/const_analysis_test.cc | 6 ++ .../fused_batchnorm_reserve_space_test.cc | 7 ++ tensorflow/compiler/xrt/BUILD | 1 + tensorflow/compiler/xrt/ops/xrt_state_ops.cc | 6 ++ .../optimizers/pin_to_host_optimizer_test.cc | 20 ----- tensorflow/python/eager/context.py | 14 +-- tensorflow/python/framework/config_test.py | 9 -- .../parallel_for/xla_control_flow_ops_test.py | 5 ++ tensorflow/python/tfe_wrapper.cc | 3 + 26 files changed, 206 insertions(+), 107 deletions(-) create mode 100644 tensorflow/compiler/jit/xla_ops_regular_devices.cc diff --git a/RELEASE.md b/RELEASE.md index 430e1b83885..241a5077251 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -33,6 +33,9 @@ shape assumptions (note that you can pass shapes with `None` entries for axes that are meant to be dynamic). You can also disable the input checking entirely by setting `model.input_spec = None`. +* XLA:CPU and XLA:GPU devices are no longer registered by default. Use + `TF_XLA_FLAGS=--tf_xla_enable_xla_devices` if you really need them (to be + removed). ## Known Caveats diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index d05bb8264c3..55400c7d3b7 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -206,6 +206,7 @@ cc_library( "xla_device.cc", "xla_device_context.cc", "xla_device_ops.cc", + "xla_ops_regular_devices.cc", "xla_platform_info.cc", ], hdrs = [ diff --git a/tensorflow/compiler/jit/flags.cc b/tensorflow/compiler/jit/flags.cc index ff085c854c6..a4a750bae0d 100644 --- a/tensorflow/compiler/jit/flags.cc +++ b/tensorflow/compiler/jit/flags.cc @@ -159,7 +159,7 @@ void AllocateAndParseFlags() { device_flags = new XlaDeviceFlags; device_flags->tf_xla_compile_on_demand = false; - device_flags->tf_xla_enable_xla_devices = true; + device_flags->tf_xla_enable_xla_devices = false; ops_flags = new XlaOpsCommonFlags; ops_flags->tf_xla_always_defer_compilation = false; diff --git a/tensorflow/compiler/jit/kernels/xla_ops.cc b/tensorflow/compiler/jit/kernels/xla_ops.cc index 9cee4b9af28..de462928c46 100644 --- a/tensorflow/compiler/jit/kernels/xla_ops.cc +++ b/tensorflow/compiler/jit/kernels/xla_ops.cc @@ -191,7 +191,7 @@ static Status CompileToLocalExecutable( absl::optional tf_allocator_adapter; XlaCompiler::Options options = GenerateCompilerOptions( - cache, ctx, platform_info, has_ref_vars, &tf_allocator_adapter); + *cache, ctx, platform_info, has_ref_vars, &tf_allocator_adapter); std::map constant_args; for (int i : constants) { diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc index e88319bb732..1be3e5ba9e7 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc @@ -44,6 +44,11 @@ using ::tensorflow::testing::FindNodeByName; namespace tensorflow { namespace { +static bool Initialized = [] { + tensorflow::GetXlaDeviceFlags()->tf_xla_enable_xla_devices = true; + return true; +}(); + REGISTER_OP("UncompilableNullary").Output("o: float"); REGISTER_OP("UncompilableUnary").Input("a: float").Output("o: float"); diff --git a/tensorflow/compiler/jit/partially_decluster_pass_test.cc b/tensorflow/compiler/jit/partially_decluster_pass_test.cc index 7378d17f88d..87c9fbf0af7 100644 --- a/tensorflow/compiler/jit/partially_decluster_pass_test.cc +++ b/tensorflow/compiler/jit/partially_decluster_pass_test.cc @@ -406,37 +406,6 @@ TEST(PartiallyDeclusterPassTest, DontDeclusterXlaDeviceOps) { EXPECT_EQ(GetXlaClusterForNode(*n), "cluster_0"); } -TEST(PartiallyDeclusterPassTest, DontDeclusterNonTensorFlowOps) { - tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - Output dynamic_slice_operand = - ops::Placeholder(s.WithOpName("dynamic_slice_operand"), DT_INT32, - ops::Placeholder::Attrs{}); - Output dynamic_slice_begin = ops::Placeholder( - s.WithOpName("dynamic_slice_begin"), DT_INT32, ops::Placeholder::Attrs{}); - Output dynamic_slice_size = ops::Placeholder( - s.WithOpName("dynamic_slice_size"), DT_INT32, ops::Placeholder::Attrs{}); - Output dynamic_slice = - ops::XlaDynamicSlice(s.WithOpName("dynamic_slice"), dynamic_slice_operand, - dynamic_slice_begin, dynamic_slice_size); - - Output reshape_input = ops::Placeholder(s.WithOpName("reshape_input"), - DT_FLOAT, ops::Placeholder::Attrs{}); - Output reshape = - ops::Reshape(s.WithOpName("reshape"), reshape_input, dynamic_slice); - - AddToCluster({dynamic_slice.node(), reshape.node()}, "cluster_0"); - - std::unique_ptr graph = absl::make_unique(OpRegistry::Global()); - TF_ASSERT_OK(s.ToGraph(graph.get())); - - Node* n = FindNodeByName(*graph, "dynamic_slice"); - ASSERT_NE(n, nullptr); - - TF_ASSERT_OK(PartiallyDecluster(&graph)); - - EXPECT_EQ(GetXlaClusterForNode(*n), "cluster_0"); -} - TEST(PartiallyDeclusterPassTest, EliminatedUnusedNodes) { const char* const kClusteredProducer0Name = "ClusteredProducer0"; const char* const kClusteredProducer1Name = "ClusteredProducer1"; diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc index 73c512bfa6f..da251c2c8f3 100644 --- a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc +++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc @@ -48,9 +48,11 @@ Status XlaCompileOnDemandOp::Run(OpKernelContext* ctx, const ResourceVarsSnapshot& variable_args) { xla::LocalClient* client = static_cast(cache->client()); + absl::optional tf_allocator_adapter; + se::DeviceMemoryAllocator* allocator = + GetAllocator(&tf_allocator_adapter, ctx, platform_info_); XlaComputationLaunchContext launch_context( - client, client->backend().memory_allocator(), - client->default_device_ordinal(), + client, allocator, client->default_device_ordinal(), /*allocate_xla_tensors=*/platform_info_.xla_device_metadata() != nullptr, platform_info_.xla_device_metadata() ? platform_info_.xla_device_metadata()->UseMultipleStreams() @@ -76,7 +78,7 @@ Status XlaCompileOnDemandOp::Run(OpKernelContext* ctx, VLOG(2) << "Executing computation: " << name(); xla::ExecutableRunOptions run_options; run_options.set_stream(stream); - run_options.set_allocator(client->backend().memory_allocator()); + run_options.set_allocator(allocator); run_options.set_intra_op_thread_pool(&ctx->eigen_cpu_device()); run_options.set_rng_seed(GetXLARandomSeed()); @@ -108,6 +110,7 @@ Status XlaCompileOnDemandOp::Compile( for (int64 i = 0; i < ctx->num_inputs(); ++i) { const Tensor& device_tensor = ctx->input(i); + if (const XlaTensor* xla_tensor = XlaTensor::FromTensor(&device_tensor)) { if (xla_tensor->has_host_tensor()) { if (absl::c_binary_search(constant_input_indices, i)) { @@ -118,24 +121,30 @@ Status XlaCompileOnDemandOp::Compile( if (!constant_arguments.count(i)) { if (absl::c_binary_search(constant_input_indices, i)) { - // Slow path; the argument is not available as a host constant so we - // must fetch it synchronously. - Tensor host_tensor; - AllocatorAttributes attrs; - attrs.set_on_host(true); - TF_RETURN_IF_ERROR(ctx->allocate_temp( - device_tensor.dtype(), device_tensor.shape(), &host_tensor, attrs)); - Status status = ctx->op_device_context()->CopyDeviceTensorToCPUSync( - &device_tensor, "ConstantArgument", - reinterpret_cast(ctx->device()), &host_tensor); - if (!status.ok()) { - LOG(ERROR) << "Copying tensor of shape " - << device_tensor.shape().DebugString() << " from " - << ctx->device()->name() << "to CPU failed with " - << status.ToString(); - return status; + if (ctx->input_memory_type(i) != HOST_MEMORY && + ctx->op_device_context()) { + // Slow path; the argument is not available as a host constant so we + // must fetch it synchronously. + Tensor host_tensor; + AllocatorAttributes attrs; + attrs.set_on_host(true); + TF_RETURN_IF_ERROR(ctx->allocate_temp(device_tensor.dtype(), + device_tensor.shape(), + &host_tensor, attrs)); + Status status = ctx->op_device_context()->CopyDeviceTensorToCPUSync( + &device_tensor, "ConstantArgument", + reinterpret_cast(ctx->device()), &host_tensor); + if (!status.ok()) { + LOG(ERROR) << "Copying tensor of shape " + << device_tensor.shape().DebugString() << " from " + << ctx->device()->name() << "to CPU failed with " + << status.ToString(); + return status; + } + constant_arguments[i] = host_tensor; + } else { + constant_arguments[i] = device_tensor; } - constant_arguments[i] = host_tensor; } } } @@ -153,7 +162,7 @@ Status XlaCompileOnDemandOp::Compile( absl::optional tf_allocator_adapter; XlaCompiler::Options options = - GenerateCompilerOptions(*cache, ctx, platform_info_, + GenerateCompilerOptions(**cache, ctx, platform_info_, /*has_ref_vars=*/true, &tf_allocator_adapter); XlaCompiler::CompileOptions compile_options; @@ -184,6 +193,8 @@ void XlaCompileOnDemandOp::Compute(OpKernelContext* ctx) { xla::LocalExecutable* executable; ResourceVarsSnapshot variable_args; XlaCompilationCache* cache; + OP_REQUIRES(ctx, ctx->function_library(), + errors::Internal("Function library missing")); OP_REQUIRES_OK(ctx, Compile(ctx, &result, &cache, &variable_args, &executable)); diff --git a/tensorflow/compiler/jit/xla_device.cc b/tensorflow/compiler/jit/xla_device.cc index 7842513331d..c47c9a29c1a 100644 --- a/tensorflow/compiler/jit/xla_device.cc +++ b/tensorflow/compiler/jit/xla_device.cc @@ -61,6 +61,21 @@ limitations under the License. namespace tensorflow { +// Default PaddedShapeFn implementation that simply returns the unpadded +// on-device shape. This is accurate for CPU and GPU devices that neither +// transpose nor pad tensors. +Status DefaultPaddedShapeFn(const Tensor& tensor, xla::Shape* shape) { + const tensorflow::XlaTensor* xla_tensor = + tensorflow::XlaTensor::FromTensor(&tensor); + if (xla_tensor == nullptr) { + return TensorShapeToXLAShape(tensor.dtype(), tensor.shape(), shape); + } + + const xla::ShapedBuffer& shaped_buffer = xla_tensor->shaped_buffer(); + *shape = shaped_buffer.on_device_shape(); + return Status::OK(); +} + // Caches a XlaDeviceAllocator per pair. A // XlaDeviceAllocator is created on demand and is associated with a // XlaDevice. It outlives the device itself (for instance, the buffer @@ -116,20 +131,6 @@ XlaDeviceAllocator* XlaDeviceAllocatorState::GetOrCreateXlaDeviceAllocator( namespace { -// Default PaddedShapeFn implementation that simply returns the unpadded -// on-device shape. This is accurate for CPU and GPU devices that neither -// transpose nor pad tensors. -Status DefaultPaddedShapeFn(const Tensor& tensor, xla::Shape* shape) { - const tensorflow::XlaTensor* xla_tensor = - tensorflow::XlaTensor::FromTensor(&tensor); - if (xla_tensor == nullptr) { - return TensorShapeToXLAShape(tensor.dtype(), tensor.shape(), shape); - } - - const xla::ShapedBuffer& shaped_buffer = xla_tensor->shaped_buffer(); - *shape = shaped_buffer.on_device_shape(); - return Status::OK(); -} static DeviceAttributes BuildXlaDeviceAttributes(const string& name_prefix, const string& device_name, diff --git a/tensorflow/compiler/jit/xla_device.h b/tensorflow/compiler/jit/xla_device.h index 30f9a99e36a..f7e7ee9cf95 100644 --- a/tensorflow/compiler/jit/xla_device.h +++ b/tensorflow/compiler/jit/xla_device.h @@ -280,6 +280,8 @@ struct XlaDeviceOpRegistrations { XlaDeviceOpRegistrations* RegisterXlaDeviceKernels(const char* device, const char* jit_device); +Status DefaultPaddedShapeFn(const Tensor& tensor, xla::Shape* shape); + } // namespace tensorflow #endif // TENSORFLOW_COMPILER_JIT_XLA_DEVICE_H_ diff --git a/tensorflow/compiler/jit/xla_ops_regular_devices.cc b/tensorflow/compiler/jit/xla_ops_regular_devices.cc new file mode 100644 index 00000000000..82510a4926b --- /dev/null +++ b/tensorflow/compiler/jit/xla_ops_regular_devices.cc @@ -0,0 +1,89 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Register XlaXXX operations on regular CPU/GPU devices using +// `XlaCompileOnDemandOp`. +#include "tensorflow/compiler/jit/xla_compile_on_demand_op.h" +#include "tensorflow/core/framework/op_kernel.h" + +namespace tensorflow { + +#define REGISTER_XLA_OPS_ON_DEVICE(DEVICE) \ + REGISTER_KERNEL_BUILDER(Name("XlaConv") \ + .HostMemory("window_strides") \ + .HostMemory("padding") \ + .HostMemory("lhs_dilation") \ + .HostMemory("rhs_dilation") \ + .HostMemory("feature_group_count") \ + .Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("XlaBroadcastHelper").HostMemory("broadcast_dims").Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaSelfAdjointEig").Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaSvd").Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaDot").Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaDynamicSlice").Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaDynamicUpdateSlice").Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaIf").Device(DEVICE), XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaPad").Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaRecv").Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaReduce").Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaReduceWindow").Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaSelectAndScatter") \ + .HostMemory("window_dimensions") \ + .HostMemory("window_strides") \ + .HostMemory("padding") \ + .Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaSend").Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaSort").Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaKeyValueSort").Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaWhile").Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaDequantize").Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaEinsum").Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaSpmdShardToFullShape").Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaSharding").Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaReplicaId").Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaGather") \ + .HostMemory("start_indices") \ + .HostMemory("slice_sizes") \ + .Device(DEVICE), \ + XlaCompileOnDemandOp); \ + REGISTER_KERNEL_BUILDER(Name("XlaScatter").Device(DEVICE), \ + XlaCompileOnDemandOp); + +REGISTER_XLA_OPS_ON_DEVICE(DEVICE_CPU); +REGISTER_XLA_OPS_ON_DEVICE(DEVICE_GPU); + +} // namespace tensorflow diff --git a/tensorflow/compiler/jit/xla_platform_info.cc b/tensorflow/compiler/jit/xla_platform_info.cc index e2a89353055..a5e12b37563 100644 --- a/tensorflow/compiler/jit/xla_platform_info.cc +++ b/tensorflow/compiler/jit/xla_platform_info.cc @@ -128,16 +128,17 @@ se::DeviceMemoryAllocator* GetAllocator( } XlaCompiler::Options GenerateCompilerOptions( - XlaCompilationCache* cache, OpKernelContext* ctx, + const XlaCompilationCache& cache, OpKernelContext* ctx, const XlaPlatformInfo& platform_info, bool has_ref_vars, absl::optional* tf_allocator_adapter) { + CHECK(ctx->function_library()); XlaCompiler::Options options; - options.client = static_cast(cache->client()); + options.client = static_cast(cache.client()); if (ctx->op_device_context() != nullptr) { options.device_ordinal = ctx->op_device_context()->stream()->parent()->device_ordinal(); } - options.device_type = cache->device_type(); + options.device_type = cache.device_type(); options.flib_def = ctx->function_library()->GetFunctionLibraryDefinition(); options.graph_def_version = ctx->function_library()->graph_def_version(); options.allow_cpu_custom_calls = diff --git a/tensorflow/compiler/jit/xla_platform_info.h b/tensorflow/compiler/jit/xla_platform_info.h index dac45529ac9..d58b32a996f 100644 --- a/tensorflow/compiler/jit/xla_platform_info.h +++ b/tensorflow/compiler/jit/xla_platform_info.h @@ -99,7 +99,7 @@ se::DeviceMemoryAllocator* GetAllocator( // Returns created options for the XLA compiler, and writes the used allocator // into `tf_allocator_adapter`. XlaCompiler::Options GenerateCompilerOptions( - XlaCompilationCache* cache, OpKernelContext* ctx, + const XlaCompilationCache& cache, OpKernelContext* ctx, const XlaPlatformInfo& platform_info, bool has_ref_vars, absl::optional* tf_allocator_adapter); diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 924834fc0fc..7f099540f39 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -1687,6 +1687,7 @@ tf_cuda_cc_test( deps = [ "//tensorflow/cc:cc_ops", "//tensorflow/compiler/jit", + "//tensorflow/compiler/jit:flags", "//tensorflow/compiler/jit:xla_kernel_creator", "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/core:core_cpu", diff --git a/tensorflow/compiler/tests/unary_ops_composition_test.cc b/tensorflow/compiler/tests/unary_ops_composition_test.cc index 569261de094..0e40c497c24 100644 --- a/tensorflow/compiler/tests/unary_ops_composition_test.cc +++ b/tensorflow/compiler/tests/unary_ops_composition_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include "absl/synchronization/notification.h" +#include "tensorflow/compiler/jit/flags.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/device_factory.h" @@ -43,6 +44,11 @@ limitations under the License. namespace tensorflow { namespace { +static bool Initialized = [] { + tensorflow::GetXlaDeviceFlags()->tf_xla_enable_xla_devices = true; + return true; +}(); + class UnaryOpsCompositionTest : public OpsTestBase { protected: template diff --git a/tensorflow/compiler/tests/xla_device_gpu_test.py b/tensorflow/compiler/tests/xla_device_gpu_test.py index 1e30ebd55d0..304405c82ce 100644 --- a/tensorflow/compiler/tests/xla_device_gpu_test.py +++ b/tensorflow/compiler/tests/xla_device_gpu_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function from tensorflow.python.client import session as session_lib +from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops @@ -27,6 +28,10 @@ from tensorflow.python.platform import test class XlaDeviceGpuTest(test.TestCase): + def __init__(self, method_name="runTest"): + super(XlaDeviceGpuTest, self).__init__(method_name) + context.context().enable_xla_devices() + def testCopiesToAndFromGpuWork(self): """Tests that copies between GPU and XLA devices work.""" if not test.is_gpu_available(): diff --git a/tensorflow/compiler/tests/xla_test.py b/tensorflow/compiler/tests/xla_test.py index 3b057ed8b17..8c31629c234 100644 --- a/tensorflow/compiler/tests/xla_test.py +++ b/tensorflow/compiler/tests/xla_test.py @@ -83,6 +83,8 @@ class XLATestCase(test.TestCase): def __init__(self, method_name='runTest'): super(XLATestCase, self).__init__(method_name) + if 'XLA' in FLAGS.test_device: + context.context().enable_xla_devices() context.context().enable_mlir_bridge = test_util.is_mlir_bridge_enabled() self.device = FLAGS.test_device diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index 1e57c11b2cf..ac999d875de 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -787,6 +787,7 @@ tf_cc_test( "//tensorflow/cc:function_ops", "//tensorflow/cc:functional_ops", "//tensorflow/cc:ops", + "//tensorflow/compiler/jit:flags", "//tensorflow/compiler/jit:xla_cluster_util", "//tensorflow/compiler/tf2xla/kernels:xla_ops", "//tensorflow/core:core_cpu_internal", @@ -1087,6 +1088,7 @@ tf_cuda_cc_test( "//tensorflow/cc:ops", "//tensorflow/cc:scope", "//tensorflow/compiler/jit", + "//tensorflow/compiler/jit:flags", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:framework_internal", diff --git a/tensorflow/compiler/tf2xla/const_analysis_test.cc b/tensorflow/compiler/tf2xla/const_analysis_test.cc index 936b74f7b33..c7c8702b49b 100644 --- a/tensorflow/compiler/tf2xla/const_analysis_test.cc +++ b/tensorflow/compiler/tf2xla/const_analysis_test.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/cc/ops/function_ops.h" #include "tensorflow/cc/ops/functional_ops.h" #include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/compiler/jit/flags.h" #include "tensorflow/compiler/jit/xla_cluster_util.h" #include "tensorflow/core/common_runtime/process_function_library_runtime.h" #include "tensorflow/core/graph/algorithm.h" @@ -217,5 +218,10 @@ TEST(ConstAnalysisTest, RespectExplicitAttr_1) { EXPECT_EQ(const_args, std::vector({true})); } +static bool Initialized = [] { + tensorflow::GetXlaDeviceFlags()->tf_xla_enable_xla_devices = true; + return true; +}(); + } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/fused_batchnorm_reserve_space_test.cc b/tensorflow/compiler/tf2xla/fused_batchnorm_reserve_space_test.cc index 1a26f974989..02f178f9acf 100644 --- a/tensorflow/compiler/tf2xla/fused_batchnorm_reserve_space_test.cc +++ b/tensorflow/compiler/tf2xla/fused_batchnorm_reserve_space_test.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/cc/ops/array_ops.h" #include "tensorflow/cc/ops/const_op.h" #include "tensorflow/cc/ops/nn_ops.h" +#include "tensorflow/compiler/jit/flags.h" #include "tensorflow/core/framework/device_attributes.pb.h" #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/tensor.h" @@ -139,5 +140,11 @@ TEST(FusedBatchnormReserveSpaceTest, Test) { test::ExpectClose(results[0], results[1], /*atol=*/1e-4); test::ExpectClose(results[2], results[3], /*atol=*/1e-4); } + +static bool Initialized = [] { + tensorflow::GetXlaDeviceFlags()->tf_xla_enable_xla_devices = true; + return true; +}(); + } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/xrt/BUILD b/tensorflow/compiler/xrt/BUILD index 6a704be4adb..172a970d207 100644 --- a/tensorflow/compiler/xrt/BUILD +++ b/tensorflow/compiler/xrt/BUILD @@ -96,6 +96,7 @@ tf_gen_op_libs( "xrt_execute_op", ], deps = [ + "//tensorflow/compiler/jit:flags", "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/xrt/ops/xrt_state_ops.cc b/tensorflow/compiler/xrt/ops/xrt_state_ops.cc index a4be39b96c6..321d7409103 100644 --- a/tensorflow/compiler/xrt/ops/xrt_state_ops.cc +++ b/tensorflow/compiler/xrt/ops/xrt_state_ops.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/compiler/jit/flags.h" #include "tensorflow/core/framework/common_shape_fns.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/shape_inference.h" @@ -20,6 +21,11 @@ limitations under the License. namespace tensorflow { +static bool Initialized = [] { + tensorflow::GetXlaDeviceFlags()->tf_xla_enable_xla_devices = true; + return true; +}(); + REGISTER_OP("XRTAllocate") .Input("allocation: string") .Output("handle: int64") diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc index 641b5b4ef31..4fe8e6a6c3f 100644 --- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc @@ -44,26 +44,6 @@ TEST_F(PinToHostOptimizerTest, TryFindHostDeviceCpuXlaGpu) { "/device:CPU:0"); } -TEST_F(PinToHostOptimizerTest, TryFindHostDeviceXlaCpuXlaGpu) { - gtl::FlatSet devices = {"/device:XLA_CPU:0", "/device:XLA_GPU:0"}; - - EXPECT_EQ(internal::TryFindHostDevice(devices, false, ""), ""); - EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:0"), - "/device:XLA_CPU:0"); - EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:*"), - "/device:XLA_CPU:0"); -} - -TEST_F(PinToHostOptimizerTest, TryFindHostDeviceXlaGpu) { - gtl::FlatSet devices = {"/device:XLA_GPU:0"}; - - EXPECT_EQ(internal::TryFindHostDevice(devices, false, ""), ""); - EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:0"), - ""); - EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:*"), - ""); -} - TEST_F(PinToHostOptimizerTest, OptimizeSmallOpsToHost) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output a = ops::Const(s.WithOpName("a"), 1, {1024, 1024}); diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index 765c77af7cd..9c939fe0a76 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -42,6 +42,7 @@ from tensorflow.python.framework import device as pydev from tensorflow.python.util import compat from tensorflow.python.util import is_in_graph_mode from tensorflow.python.util import tf_contextlib +from tensorflow.python.util.deprecation import deprecated from tensorflow.python.util.tf_export import tf_export GRAPH_MODE = 0 @@ -1254,12 +1255,7 @@ class Context(object): p: i for i, p in enumerate(self._physical_devices) } - # Construct the visible device list from all physical devices but ignore - # XLA devices - self._visible_device_list = [ - d for d in self._physical_devices - if not d.device_type.startswith("XLA") - ] + self._visible_device_list = list(self._physical_devices) self._memory_growth_map = { d: None for d in self._physical_devices if d.device_type == "GPU" } @@ -1493,6 +1489,12 @@ class Context(object): self._virtual_device_map[dev] = virtual_devices + @deprecated( + None, "XLA:CPU and XLA:GPU devices are deprecated", warn_once=True) + def enable_xla_devices(self): + """Enables XLA:CPU and XLA:GPU devices registration.""" + pywrap_tfe.TF_EnableXlaDevices() + @property def enable_mlir_bridge(self): return pywrap_tfe.TF_IsMlirBridgeEnabled() diff --git a/tensorflow/python/framework/config_test.py b/tensorflow/python/framework/config_test.py index 70857ef4b83..ee7e111f6b0 100644 --- a/tensorflow/python/framework/config_test.py +++ b/tensorflow/python/framework/config_test.py @@ -435,9 +435,6 @@ class DeviceTest(test.TestCase): self.assertEqual(len(config.get_visible_devices('CPU')), 1) self.assertGreater(len(config.get_visible_devices('GPU')), 0) - # get_visible_devices filters out XLA_* devices. list_logical_devices does - # not, but we can't call it here because it initializes the devices and - # calling set_visible_devices after that is disallowed. self.assertEqual(len(config.get_visible_devices('XLA_GPU')), 0) config.set_visible_devices(cpus[0]) @@ -451,12 +448,6 @@ class DeviceTest(test.TestCase): a = array_ops.identity(1.0) self.evaluate(a) - with self.assertRaisesRegex(errors.InvalidArgumentError, - 'Could not satisfy'): - with ops.device('/device:XLA_GPU:0'): - a = array_ops.identity(1.0) - self.evaluate(a) - # Modifying the visible devices is not supported with self.assertRaisesRegex(RuntimeError, 'cannot be modified'): config.set_visible_devices(gpus) diff --git a/tensorflow/python/ops/parallel_for/xla_control_flow_ops_test.py b/tensorflow/python/ops/parallel_for/xla_control_flow_ops_test.py index 33f0d7b76ae..188df3f9b87 100644 --- a/tensorflow/python/ops/parallel_for/xla_control_flow_ops_test.py +++ b/tensorflow/python/ops/parallel_for/xla_control_flow_ops_test.py @@ -22,6 +22,7 @@ from __future__ import print_function from tensorflow.compiler.tf2xla.python import xla as xla_ops from tensorflow.python.compiler.xla import jit from tensorflow.python.compiler.xla import xla +from tensorflow.python.eager import context from tensorflow.python.eager import def_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import test_util @@ -39,6 +40,10 @@ from tensorflow.python.platform import test @test_util.run_all_in_graph_and_eager_modes class PForTest(PForTestCase): + def __init__(self, method_name="runTest"): + super(PForTest, self).__init__(method_name) + context.context().enable_xla_devices() + def test_xla_einsum(self): num_loop = 10 x_series = random_ops.random_uniform([num_loop, 9, 9]) diff --git a/tensorflow/python/tfe_wrapper.cc b/tensorflow/python/tfe_wrapper.cc index c66397036c0..0afd05e94cb 100644 --- a/tensorflow/python/tfe_wrapper.cc +++ b/tensorflow/python/tfe_wrapper.cc @@ -444,6 +444,9 @@ PYBIND11_MODULE(_pywrap_tfe, m) { m.def("TF_EnableMlirBridge", [](bool enabled) { tensorflow::GetMlirCommonFlags()->tf_mlir_enable_mlir_bridge = enabled; }); + m.def("TF_EnableXlaDevices", [] { + tensorflow::GetXlaDeviceFlags()->tf_xla_enable_xla_devices = true; + }); // // TFE_Context Logic m.def( From 61173a23ce65e9d9541d6a1bc84bfd17e3dcfd23 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 11 Aug 2020 00:16:38 -0700 Subject: [PATCH 2494/2522] Internal change PiperOrigin-RevId: 325968539 Change-Id: Ie240a4ab7389e5b54923738ce290ca1a3b0bafa4 --- RELEASE.md | 3 - tensorflow/compiler/jit/BUILD | 1 - tensorflow/compiler/jit/flags.cc | 2 +- tensorflow/compiler/jit/kernels/xla_ops.cc | 2 +- .../jit/mark_for_compilation_pass_test.cc | 5 -- .../jit/partially_decluster_pass_test.cc | 31 +++++++ .../compiler/jit/xla_compile_on_demand_op.cc | 53 +++++------ tensorflow/compiler/jit/xla_device.cc | 29 +++--- tensorflow/compiler/jit/xla_device.h | 2 - .../compiler/jit/xla_ops_regular_devices.cc | 89 ------------------- tensorflow/compiler/jit/xla_platform_info.cc | 7 +- tensorflow/compiler/jit/xla_platform_info.h | 2 +- tensorflow/compiler/tests/BUILD | 1 - .../tests/unary_ops_composition_test.cc | 6 -- .../compiler/tests/xla_device_gpu_test.py | 5 -- tensorflow/compiler/tests/xla_test.py | 2 - tensorflow/compiler/tf2xla/BUILD | 2 - .../compiler/tf2xla/const_analysis_test.cc | 6 -- .../fused_batchnorm_reserve_space_test.cc | 7 -- tensorflow/compiler/xrt/BUILD | 1 - tensorflow/compiler/xrt/ops/xrt_state_ops.cc | 6 -- .../optimizers/pin_to_host_optimizer_test.cc | 20 +++++ tensorflow/python/eager/context.py | 14 ++- tensorflow/python/framework/config_test.py | 9 ++ .../parallel_for/xla_control_flow_ops_test.py | 5 -- tensorflow/python/tfe_wrapper.cc | 3 - 26 files changed, 107 insertions(+), 206 deletions(-) delete mode 100644 tensorflow/compiler/jit/xla_ops_regular_devices.cc diff --git a/RELEASE.md b/RELEASE.md index 241a5077251..430e1b83885 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -33,9 +33,6 @@ shape assumptions (note that you can pass shapes with `None` entries for axes that are meant to be dynamic). You can also disable the input checking entirely by setting `model.input_spec = None`. -* XLA:CPU and XLA:GPU devices are no longer registered by default. Use - `TF_XLA_FLAGS=--tf_xla_enable_xla_devices` if you really need them (to be - removed). ## Known Caveats diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index 55400c7d3b7..d05bb8264c3 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -206,7 +206,6 @@ cc_library( "xla_device.cc", "xla_device_context.cc", "xla_device_ops.cc", - "xla_ops_regular_devices.cc", "xla_platform_info.cc", ], hdrs = [ diff --git a/tensorflow/compiler/jit/flags.cc b/tensorflow/compiler/jit/flags.cc index a4a750bae0d..ff085c854c6 100644 --- a/tensorflow/compiler/jit/flags.cc +++ b/tensorflow/compiler/jit/flags.cc @@ -159,7 +159,7 @@ void AllocateAndParseFlags() { device_flags = new XlaDeviceFlags; device_flags->tf_xla_compile_on_demand = false; - device_flags->tf_xla_enable_xla_devices = false; + device_flags->tf_xla_enable_xla_devices = true; ops_flags = new XlaOpsCommonFlags; ops_flags->tf_xla_always_defer_compilation = false; diff --git a/tensorflow/compiler/jit/kernels/xla_ops.cc b/tensorflow/compiler/jit/kernels/xla_ops.cc index de462928c46..9cee4b9af28 100644 --- a/tensorflow/compiler/jit/kernels/xla_ops.cc +++ b/tensorflow/compiler/jit/kernels/xla_ops.cc @@ -191,7 +191,7 @@ static Status CompileToLocalExecutable( absl::optional tf_allocator_adapter; XlaCompiler::Options options = GenerateCompilerOptions( - *cache, ctx, platform_info, has_ref_vars, &tf_allocator_adapter); + cache, ctx, platform_info, has_ref_vars, &tf_allocator_adapter); std::map constant_args; for (int i : constants) { diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc index 1be3e5ba9e7..e88319bb732 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass_test.cc @@ -44,11 +44,6 @@ using ::tensorflow::testing::FindNodeByName; namespace tensorflow { namespace { -static bool Initialized = [] { - tensorflow::GetXlaDeviceFlags()->tf_xla_enable_xla_devices = true; - return true; -}(); - REGISTER_OP("UncompilableNullary").Output("o: float"); REGISTER_OP("UncompilableUnary").Input("a: float").Output("o: float"); diff --git a/tensorflow/compiler/jit/partially_decluster_pass_test.cc b/tensorflow/compiler/jit/partially_decluster_pass_test.cc index 87c9fbf0af7..7378d17f88d 100644 --- a/tensorflow/compiler/jit/partially_decluster_pass_test.cc +++ b/tensorflow/compiler/jit/partially_decluster_pass_test.cc @@ -406,6 +406,37 @@ TEST(PartiallyDeclusterPassTest, DontDeclusterXlaDeviceOps) { EXPECT_EQ(GetXlaClusterForNode(*n), "cluster_0"); } +TEST(PartiallyDeclusterPassTest, DontDeclusterNonTensorFlowOps) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output dynamic_slice_operand = + ops::Placeholder(s.WithOpName("dynamic_slice_operand"), DT_INT32, + ops::Placeholder::Attrs{}); + Output dynamic_slice_begin = ops::Placeholder( + s.WithOpName("dynamic_slice_begin"), DT_INT32, ops::Placeholder::Attrs{}); + Output dynamic_slice_size = ops::Placeholder( + s.WithOpName("dynamic_slice_size"), DT_INT32, ops::Placeholder::Attrs{}); + Output dynamic_slice = + ops::XlaDynamicSlice(s.WithOpName("dynamic_slice"), dynamic_slice_operand, + dynamic_slice_begin, dynamic_slice_size); + + Output reshape_input = ops::Placeholder(s.WithOpName("reshape_input"), + DT_FLOAT, ops::Placeholder::Attrs{}); + Output reshape = + ops::Reshape(s.WithOpName("reshape"), reshape_input, dynamic_slice); + + AddToCluster({dynamic_slice.node(), reshape.node()}, "cluster_0"); + + std::unique_ptr graph = absl::make_unique(OpRegistry::Global()); + TF_ASSERT_OK(s.ToGraph(graph.get())); + + Node* n = FindNodeByName(*graph, "dynamic_slice"); + ASSERT_NE(n, nullptr); + + TF_ASSERT_OK(PartiallyDecluster(&graph)); + + EXPECT_EQ(GetXlaClusterForNode(*n), "cluster_0"); +} + TEST(PartiallyDeclusterPassTest, EliminatedUnusedNodes) { const char* const kClusteredProducer0Name = "ClusteredProducer0"; const char* const kClusteredProducer1Name = "ClusteredProducer1"; diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc index da251c2c8f3..73c512bfa6f 100644 --- a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc +++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc @@ -48,11 +48,9 @@ Status XlaCompileOnDemandOp::Run(OpKernelContext* ctx, const ResourceVarsSnapshot& variable_args) { xla::LocalClient* client = static_cast(cache->client()); - absl::optional tf_allocator_adapter; - se::DeviceMemoryAllocator* allocator = - GetAllocator(&tf_allocator_adapter, ctx, platform_info_); XlaComputationLaunchContext launch_context( - client, allocator, client->default_device_ordinal(), + client, client->backend().memory_allocator(), + client->default_device_ordinal(), /*allocate_xla_tensors=*/platform_info_.xla_device_metadata() != nullptr, platform_info_.xla_device_metadata() ? platform_info_.xla_device_metadata()->UseMultipleStreams() @@ -78,7 +76,7 @@ Status XlaCompileOnDemandOp::Run(OpKernelContext* ctx, VLOG(2) << "Executing computation: " << name(); xla::ExecutableRunOptions run_options; run_options.set_stream(stream); - run_options.set_allocator(allocator); + run_options.set_allocator(client->backend().memory_allocator()); run_options.set_intra_op_thread_pool(&ctx->eigen_cpu_device()); run_options.set_rng_seed(GetXLARandomSeed()); @@ -110,7 +108,6 @@ Status XlaCompileOnDemandOp::Compile( for (int64 i = 0; i < ctx->num_inputs(); ++i) { const Tensor& device_tensor = ctx->input(i); - if (const XlaTensor* xla_tensor = XlaTensor::FromTensor(&device_tensor)) { if (xla_tensor->has_host_tensor()) { if (absl::c_binary_search(constant_input_indices, i)) { @@ -121,30 +118,24 @@ Status XlaCompileOnDemandOp::Compile( if (!constant_arguments.count(i)) { if (absl::c_binary_search(constant_input_indices, i)) { - if (ctx->input_memory_type(i) != HOST_MEMORY && - ctx->op_device_context()) { - // Slow path; the argument is not available as a host constant so we - // must fetch it synchronously. - Tensor host_tensor; - AllocatorAttributes attrs; - attrs.set_on_host(true); - TF_RETURN_IF_ERROR(ctx->allocate_temp(device_tensor.dtype(), - device_tensor.shape(), - &host_tensor, attrs)); - Status status = ctx->op_device_context()->CopyDeviceTensorToCPUSync( - &device_tensor, "ConstantArgument", - reinterpret_cast(ctx->device()), &host_tensor); - if (!status.ok()) { - LOG(ERROR) << "Copying tensor of shape " - << device_tensor.shape().DebugString() << " from " - << ctx->device()->name() << "to CPU failed with " - << status.ToString(); - return status; - } - constant_arguments[i] = host_tensor; - } else { - constant_arguments[i] = device_tensor; + // Slow path; the argument is not available as a host constant so we + // must fetch it synchronously. + Tensor host_tensor; + AllocatorAttributes attrs; + attrs.set_on_host(true); + TF_RETURN_IF_ERROR(ctx->allocate_temp( + device_tensor.dtype(), device_tensor.shape(), &host_tensor, attrs)); + Status status = ctx->op_device_context()->CopyDeviceTensorToCPUSync( + &device_tensor, "ConstantArgument", + reinterpret_cast(ctx->device()), &host_tensor); + if (!status.ok()) { + LOG(ERROR) << "Copying tensor of shape " + << device_tensor.shape().DebugString() << " from " + << ctx->device()->name() << "to CPU failed with " + << status.ToString(); + return status; } + constant_arguments[i] = host_tensor; } } } @@ -162,7 +153,7 @@ Status XlaCompileOnDemandOp::Compile( absl::optional tf_allocator_adapter; XlaCompiler::Options options = - GenerateCompilerOptions(**cache, ctx, platform_info_, + GenerateCompilerOptions(*cache, ctx, platform_info_, /*has_ref_vars=*/true, &tf_allocator_adapter); XlaCompiler::CompileOptions compile_options; @@ -193,8 +184,6 @@ void XlaCompileOnDemandOp::Compute(OpKernelContext* ctx) { xla::LocalExecutable* executable; ResourceVarsSnapshot variable_args; XlaCompilationCache* cache; - OP_REQUIRES(ctx, ctx->function_library(), - errors::Internal("Function library missing")); OP_REQUIRES_OK(ctx, Compile(ctx, &result, &cache, &variable_args, &executable)); diff --git a/tensorflow/compiler/jit/xla_device.cc b/tensorflow/compiler/jit/xla_device.cc index c47c9a29c1a..7842513331d 100644 --- a/tensorflow/compiler/jit/xla_device.cc +++ b/tensorflow/compiler/jit/xla_device.cc @@ -61,21 +61,6 @@ limitations under the License. namespace tensorflow { -// Default PaddedShapeFn implementation that simply returns the unpadded -// on-device shape. This is accurate for CPU and GPU devices that neither -// transpose nor pad tensors. -Status DefaultPaddedShapeFn(const Tensor& tensor, xla::Shape* shape) { - const tensorflow::XlaTensor* xla_tensor = - tensorflow::XlaTensor::FromTensor(&tensor); - if (xla_tensor == nullptr) { - return TensorShapeToXLAShape(tensor.dtype(), tensor.shape(), shape); - } - - const xla::ShapedBuffer& shaped_buffer = xla_tensor->shaped_buffer(); - *shape = shaped_buffer.on_device_shape(); - return Status::OK(); -} - // Caches a XlaDeviceAllocator per pair. A // XlaDeviceAllocator is created on demand and is associated with a // XlaDevice. It outlives the device itself (for instance, the buffer @@ -131,6 +116,20 @@ XlaDeviceAllocator* XlaDeviceAllocatorState::GetOrCreateXlaDeviceAllocator( namespace { +// Default PaddedShapeFn implementation that simply returns the unpadded +// on-device shape. This is accurate for CPU and GPU devices that neither +// transpose nor pad tensors. +Status DefaultPaddedShapeFn(const Tensor& tensor, xla::Shape* shape) { + const tensorflow::XlaTensor* xla_tensor = + tensorflow::XlaTensor::FromTensor(&tensor); + if (xla_tensor == nullptr) { + return TensorShapeToXLAShape(tensor.dtype(), tensor.shape(), shape); + } + + const xla::ShapedBuffer& shaped_buffer = xla_tensor->shaped_buffer(); + *shape = shaped_buffer.on_device_shape(); + return Status::OK(); +} static DeviceAttributes BuildXlaDeviceAttributes(const string& name_prefix, const string& device_name, diff --git a/tensorflow/compiler/jit/xla_device.h b/tensorflow/compiler/jit/xla_device.h index f7e7ee9cf95..30f9a99e36a 100644 --- a/tensorflow/compiler/jit/xla_device.h +++ b/tensorflow/compiler/jit/xla_device.h @@ -280,8 +280,6 @@ struct XlaDeviceOpRegistrations { XlaDeviceOpRegistrations* RegisterXlaDeviceKernels(const char* device, const char* jit_device); -Status DefaultPaddedShapeFn(const Tensor& tensor, xla::Shape* shape); - } // namespace tensorflow #endif // TENSORFLOW_COMPILER_JIT_XLA_DEVICE_H_ diff --git a/tensorflow/compiler/jit/xla_ops_regular_devices.cc b/tensorflow/compiler/jit/xla_ops_regular_devices.cc deleted file mode 100644 index 82510a4926b..00000000000 --- a/tensorflow/compiler/jit/xla_ops_regular_devices.cc +++ /dev/null @@ -1,89 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// Register XlaXXX operations on regular CPU/GPU devices using -// `XlaCompileOnDemandOp`. -#include "tensorflow/compiler/jit/xla_compile_on_demand_op.h" -#include "tensorflow/core/framework/op_kernel.h" - -namespace tensorflow { - -#define REGISTER_XLA_OPS_ON_DEVICE(DEVICE) \ - REGISTER_KERNEL_BUILDER(Name("XlaConv") \ - .HostMemory("window_strides") \ - .HostMemory("padding") \ - .HostMemory("lhs_dilation") \ - .HostMemory("rhs_dilation") \ - .HostMemory("feature_group_count") \ - .Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("XlaBroadcastHelper").HostMemory("broadcast_dims").Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaSelfAdjointEig").Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaSvd").Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaDot").Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaDynamicSlice").Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaDynamicUpdateSlice").Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaIf").Device(DEVICE), XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaPad").Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaRecv").Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaReduce").Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaReduceWindow").Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaSelectAndScatter") \ - .HostMemory("window_dimensions") \ - .HostMemory("window_strides") \ - .HostMemory("padding") \ - .Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaSend").Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaSort").Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaKeyValueSort").Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaWhile").Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaDequantize").Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaEinsum").Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaSpmdShardToFullShape").Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaSharding").Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaReplicaId").Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaGather") \ - .HostMemory("start_indices") \ - .HostMemory("slice_sizes") \ - .Device(DEVICE), \ - XlaCompileOnDemandOp); \ - REGISTER_KERNEL_BUILDER(Name("XlaScatter").Device(DEVICE), \ - XlaCompileOnDemandOp); - -REGISTER_XLA_OPS_ON_DEVICE(DEVICE_CPU); -REGISTER_XLA_OPS_ON_DEVICE(DEVICE_GPU); - -} // namespace tensorflow diff --git a/tensorflow/compiler/jit/xla_platform_info.cc b/tensorflow/compiler/jit/xla_platform_info.cc index a5e12b37563..e2a89353055 100644 --- a/tensorflow/compiler/jit/xla_platform_info.cc +++ b/tensorflow/compiler/jit/xla_platform_info.cc @@ -128,17 +128,16 @@ se::DeviceMemoryAllocator* GetAllocator( } XlaCompiler::Options GenerateCompilerOptions( - const XlaCompilationCache& cache, OpKernelContext* ctx, + XlaCompilationCache* cache, OpKernelContext* ctx, const XlaPlatformInfo& platform_info, bool has_ref_vars, absl::optional* tf_allocator_adapter) { - CHECK(ctx->function_library()); XlaCompiler::Options options; - options.client = static_cast(cache.client()); + options.client = static_cast(cache->client()); if (ctx->op_device_context() != nullptr) { options.device_ordinal = ctx->op_device_context()->stream()->parent()->device_ordinal(); } - options.device_type = cache.device_type(); + options.device_type = cache->device_type(); options.flib_def = ctx->function_library()->GetFunctionLibraryDefinition(); options.graph_def_version = ctx->function_library()->graph_def_version(); options.allow_cpu_custom_calls = diff --git a/tensorflow/compiler/jit/xla_platform_info.h b/tensorflow/compiler/jit/xla_platform_info.h index d58b32a996f..dac45529ac9 100644 --- a/tensorflow/compiler/jit/xla_platform_info.h +++ b/tensorflow/compiler/jit/xla_platform_info.h @@ -99,7 +99,7 @@ se::DeviceMemoryAllocator* GetAllocator( // Returns created options for the XLA compiler, and writes the used allocator // into `tf_allocator_adapter`. XlaCompiler::Options GenerateCompilerOptions( - const XlaCompilationCache& cache, OpKernelContext* ctx, + XlaCompilationCache* cache, OpKernelContext* ctx, const XlaPlatformInfo& platform_info, bool has_ref_vars, absl::optional* tf_allocator_adapter); diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 7f099540f39..924834fc0fc 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -1687,7 +1687,6 @@ tf_cuda_cc_test( deps = [ "//tensorflow/cc:cc_ops", "//tensorflow/compiler/jit", - "//tensorflow/compiler/jit:flags", "//tensorflow/compiler/jit:xla_kernel_creator", "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/core:core_cpu", diff --git a/tensorflow/compiler/tests/unary_ops_composition_test.cc b/tensorflow/compiler/tests/unary_ops_composition_test.cc index 0e40c497c24..569261de094 100644 --- a/tensorflow/compiler/tests/unary_ops_composition_test.cc +++ b/tensorflow/compiler/tests/unary_ops_composition_test.cc @@ -20,7 +20,6 @@ limitations under the License. #include #include "absl/synchronization/notification.h" -#include "tensorflow/compiler/jit/flags.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/device_factory.h" @@ -44,11 +43,6 @@ limitations under the License. namespace tensorflow { namespace { -static bool Initialized = [] { - tensorflow::GetXlaDeviceFlags()->tf_xla_enable_xla_devices = true; - return true; -}(); - class UnaryOpsCompositionTest : public OpsTestBase { protected: template diff --git a/tensorflow/compiler/tests/xla_device_gpu_test.py b/tensorflow/compiler/tests/xla_device_gpu_test.py index 304405c82ce..1e30ebd55d0 100644 --- a/tensorflow/compiler/tests/xla_device_gpu_test.py +++ b/tensorflow/compiler/tests/xla_device_gpu_test.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function from tensorflow.python.client import session as session_lib -from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops @@ -28,10 +27,6 @@ from tensorflow.python.platform import test class XlaDeviceGpuTest(test.TestCase): - def __init__(self, method_name="runTest"): - super(XlaDeviceGpuTest, self).__init__(method_name) - context.context().enable_xla_devices() - def testCopiesToAndFromGpuWork(self): """Tests that copies between GPU and XLA devices work.""" if not test.is_gpu_available(): diff --git a/tensorflow/compiler/tests/xla_test.py b/tensorflow/compiler/tests/xla_test.py index 8c31629c234..3b057ed8b17 100644 --- a/tensorflow/compiler/tests/xla_test.py +++ b/tensorflow/compiler/tests/xla_test.py @@ -83,8 +83,6 @@ class XLATestCase(test.TestCase): def __init__(self, method_name='runTest'): super(XLATestCase, self).__init__(method_name) - if 'XLA' in FLAGS.test_device: - context.context().enable_xla_devices() context.context().enable_mlir_bridge = test_util.is_mlir_bridge_enabled() self.device = FLAGS.test_device diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index ac999d875de..1e57c11b2cf 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -787,7 +787,6 @@ tf_cc_test( "//tensorflow/cc:function_ops", "//tensorflow/cc:functional_ops", "//tensorflow/cc:ops", - "//tensorflow/compiler/jit:flags", "//tensorflow/compiler/jit:xla_cluster_util", "//tensorflow/compiler/tf2xla/kernels:xla_ops", "//tensorflow/core:core_cpu_internal", @@ -1088,7 +1087,6 @@ tf_cuda_cc_test( "//tensorflow/cc:ops", "//tensorflow/cc:scope", "//tensorflow/compiler/jit", - "//tensorflow/compiler/jit:flags", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:framework_internal", diff --git a/tensorflow/compiler/tf2xla/const_analysis_test.cc b/tensorflow/compiler/tf2xla/const_analysis_test.cc index c7c8702b49b..936b74f7b33 100644 --- a/tensorflow/compiler/tf2xla/const_analysis_test.cc +++ b/tensorflow/compiler/tf2xla/const_analysis_test.cc @@ -21,7 +21,6 @@ limitations under the License. #include "tensorflow/cc/ops/function_ops.h" #include "tensorflow/cc/ops/functional_ops.h" #include "tensorflow/cc/ops/standard_ops.h" -#include "tensorflow/compiler/jit/flags.h" #include "tensorflow/compiler/jit/xla_cluster_util.h" #include "tensorflow/core/common_runtime/process_function_library_runtime.h" #include "tensorflow/core/graph/algorithm.h" @@ -218,10 +217,5 @@ TEST(ConstAnalysisTest, RespectExplicitAttr_1) { EXPECT_EQ(const_args, std::vector({true})); } -static bool Initialized = [] { - tensorflow::GetXlaDeviceFlags()->tf_xla_enable_xla_devices = true; - return true; -}(); - } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/fused_batchnorm_reserve_space_test.cc b/tensorflow/compiler/tf2xla/fused_batchnorm_reserve_space_test.cc index 02f178f9acf..1a26f974989 100644 --- a/tensorflow/compiler/tf2xla/fused_batchnorm_reserve_space_test.cc +++ b/tensorflow/compiler/tf2xla/fused_batchnorm_reserve_space_test.cc @@ -26,7 +26,6 @@ limitations under the License. #include "tensorflow/cc/ops/array_ops.h" #include "tensorflow/cc/ops/const_op.h" #include "tensorflow/cc/ops/nn_ops.h" -#include "tensorflow/compiler/jit/flags.h" #include "tensorflow/core/framework/device_attributes.pb.h" #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/tensor.h" @@ -140,11 +139,5 @@ TEST(FusedBatchnormReserveSpaceTest, Test) { test::ExpectClose(results[0], results[1], /*atol=*/1e-4); test::ExpectClose(results[2], results[3], /*atol=*/1e-4); } - -static bool Initialized = [] { - tensorflow::GetXlaDeviceFlags()->tf_xla_enable_xla_devices = true; - return true; -}(); - } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/xrt/BUILD b/tensorflow/compiler/xrt/BUILD index 172a970d207..6a704be4adb 100644 --- a/tensorflow/compiler/xrt/BUILD +++ b/tensorflow/compiler/xrt/BUILD @@ -96,7 +96,6 @@ tf_gen_op_libs( "xrt_execute_op", ], deps = [ - "//tensorflow/compiler/jit:flags", "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/xrt/ops/xrt_state_ops.cc b/tensorflow/compiler/xrt/ops/xrt_state_ops.cc index 321d7409103..a4be39b96c6 100644 --- a/tensorflow/compiler/xrt/ops/xrt_state_ops.cc +++ b/tensorflow/compiler/xrt/ops/xrt_state_ops.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/jit/flags.h" #include "tensorflow/core/framework/common_shape_fns.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/shape_inference.h" @@ -21,11 +20,6 @@ limitations under the License. namespace tensorflow { -static bool Initialized = [] { - tensorflow::GetXlaDeviceFlags()->tf_xla_enable_xla_devices = true; - return true; -}(); - REGISTER_OP("XRTAllocate") .Input("allocation: string") .Output("handle: int64") diff --git a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc index 4fe8e6a6c3f..641b5b4ef31 100644 --- a/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/pin_to_host_optimizer_test.cc @@ -44,6 +44,26 @@ TEST_F(PinToHostOptimizerTest, TryFindHostDeviceCpuXlaGpu) { "/device:CPU:0"); } +TEST_F(PinToHostOptimizerTest, TryFindHostDeviceXlaCpuXlaGpu) { + gtl::FlatSet devices = {"/device:XLA_CPU:0", "/device:XLA_GPU:0"}; + + EXPECT_EQ(internal::TryFindHostDevice(devices, false, ""), ""); + EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:0"), + "/device:XLA_CPU:0"); + EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:*"), + "/device:XLA_CPU:0"); +} + +TEST_F(PinToHostOptimizerTest, TryFindHostDeviceXlaGpu) { + gtl::FlatSet devices = {"/device:XLA_GPU:0"}; + + EXPECT_EQ(internal::TryFindHostDevice(devices, false, ""), ""); + EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:0"), + ""); + EXPECT_EQ(internal::TryFindHostDevice(devices, false, "/device:XLA_GPU:*"), + ""); +} + TEST_F(PinToHostOptimizerTest, OptimizeSmallOpsToHost) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); Output a = ops::Const(s.WithOpName("a"), 1, {1024, 1024}); diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index 9c939fe0a76..765c77af7cd 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -42,7 +42,6 @@ from tensorflow.python.framework import device as pydev from tensorflow.python.util import compat from tensorflow.python.util import is_in_graph_mode from tensorflow.python.util import tf_contextlib -from tensorflow.python.util.deprecation import deprecated from tensorflow.python.util.tf_export import tf_export GRAPH_MODE = 0 @@ -1255,7 +1254,12 @@ class Context(object): p: i for i, p in enumerate(self._physical_devices) } - self._visible_device_list = list(self._physical_devices) + # Construct the visible device list from all physical devices but ignore + # XLA devices + self._visible_device_list = [ + d for d in self._physical_devices + if not d.device_type.startswith("XLA") + ] self._memory_growth_map = { d: None for d in self._physical_devices if d.device_type == "GPU" } @@ -1489,12 +1493,6 @@ class Context(object): self._virtual_device_map[dev] = virtual_devices - @deprecated( - None, "XLA:CPU and XLA:GPU devices are deprecated", warn_once=True) - def enable_xla_devices(self): - """Enables XLA:CPU and XLA:GPU devices registration.""" - pywrap_tfe.TF_EnableXlaDevices() - @property def enable_mlir_bridge(self): return pywrap_tfe.TF_IsMlirBridgeEnabled() diff --git a/tensorflow/python/framework/config_test.py b/tensorflow/python/framework/config_test.py index ee7e111f6b0..70857ef4b83 100644 --- a/tensorflow/python/framework/config_test.py +++ b/tensorflow/python/framework/config_test.py @@ -435,6 +435,9 @@ class DeviceTest(test.TestCase): self.assertEqual(len(config.get_visible_devices('CPU')), 1) self.assertGreater(len(config.get_visible_devices('GPU')), 0) + # get_visible_devices filters out XLA_* devices. list_logical_devices does + # not, but we can't call it here because it initializes the devices and + # calling set_visible_devices after that is disallowed. self.assertEqual(len(config.get_visible_devices('XLA_GPU')), 0) config.set_visible_devices(cpus[0]) @@ -448,6 +451,12 @@ class DeviceTest(test.TestCase): a = array_ops.identity(1.0) self.evaluate(a) + with self.assertRaisesRegex(errors.InvalidArgumentError, + 'Could not satisfy'): + with ops.device('/device:XLA_GPU:0'): + a = array_ops.identity(1.0) + self.evaluate(a) + # Modifying the visible devices is not supported with self.assertRaisesRegex(RuntimeError, 'cannot be modified'): config.set_visible_devices(gpus) diff --git a/tensorflow/python/ops/parallel_for/xla_control_flow_ops_test.py b/tensorflow/python/ops/parallel_for/xla_control_flow_ops_test.py index 188df3f9b87..33f0d7b76ae 100644 --- a/tensorflow/python/ops/parallel_for/xla_control_flow_ops_test.py +++ b/tensorflow/python/ops/parallel_for/xla_control_flow_ops_test.py @@ -22,7 +22,6 @@ from __future__ import print_function from tensorflow.compiler.tf2xla.python import xla as xla_ops from tensorflow.python.compiler.xla import jit from tensorflow.python.compiler.xla import xla -from tensorflow.python.eager import context from tensorflow.python.eager import def_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import test_util @@ -40,10 +39,6 @@ from tensorflow.python.platform import test @test_util.run_all_in_graph_and_eager_modes class PForTest(PForTestCase): - def __init__(self, method_name="runTest"): - super(PForTest, self).__init__(method_name) - context.context().enable_xla_devices() - def test_xla_einsum(self): num_loop = 10 x_series = random_ops.random_uniform([num_loop, 9, 9]) diff --git a/tensorflow/python/tfe_wrapper.cc b/tensorflow/python/tfe_wrapper.cc index 0afd05e94cb..c66397036c0 100644 --- a/tensorflow/python/tfe_wrapper.cc +++ b/tensorflow/python/tfe_wrapper.cc @@ -444,9 +444,6 @@ PYBIND11_MODULE(_pywrap_tfe, m) { m.def("TF_EnableMlirBridge", [](bool enabled) { tensorflow::GetMlirCommonFlags()->tf_mlir_enable_mlir_bridge = enabled; }); - m.def("TF_EnableXlaDevices", [] { - tensorflow::GetXlaDeviceFlags()->tf_xla_enable_xla_devices = true; - }); // // TFE_Context Logic m.def( From 56548c2425ffb735cf85c4a147ccd71aadf79f4d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 11 Aug 2020 00:34:59 -0700 Subject: [PATCH 2495/2522] Add std:: qualifications to all references to std::string and std::basic_string. PiperOrigin-RevId: 325970529 Change-Id: Id596be41afa3a19116d00197773b8a89dd98ebdc --- tensorflow/core/platform/file_system.h | 34 ++++++++++++++------------ 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/tensorflow/core/platform/file_system.h b/tensorflow/core/platform/file_system.h index 28d09c39db1..ca9f0fd5145 100644 --- a/tensorflow/core/platform/file_system.h +++ b/tensorflow/core/platform/file_system.h @@ -68,7 +68,7 @@ class FileSystem { /// The ownership of the returned RandomAccessFile is passed to the caller /// and the object should be deleted when is not used. virtual tensorflow::Status NewRandomAccessFile( - const string& fname, std::unique_ptr* result) { + const std::string& fname, std::unique_ptr* result) { return NewRandomAccessFile(fname, nullptr, result); }; @@ -93,7 +93,7 @@ class FileSystem { /// The ownership of the returned WritableFile is passed to the caller /// and the object should be deleted when is not used. virtual tensorflow::Status NewWritableFile( - const string& fname, std::unique_ptr* result) { + const std::string& fname, std::unique_ptr* result) { return NewWritableFile(fname, nullptr, result); }; @@ -115,7 +115,7 @@ class FileSystem { /// The ownership of the returned WritableFile is passed to the caller /// and the object should be deleted when is not used. virtual tensorflow::Status NewAppendableFile( - const string& fname, std::unique_ptr* result) { + const std::string& fname, std::unique_ptr* result) { return NewAppendableFile(fname, nullptr, result); }; @@ -136,7 +136,7 @@ class FileSystem { /// The ownership of the returned ReadOnlyMemoryRegion is passed to the caller /// and the object should be deleted when is not used. virtual tensorflow::Status NewReadOnlyMemoryRegionFromFile( - const string& fname, std::unique_ptr* result) { + const std::string& fname, std::unique_ptr* result) { return NewReadOnlyMemoryRegionFromFile(fname, nullptr, result); } @@ -147,7 +147,7 @@ class FileSystem { } /// Returns OK if the named path exists and NOT_FOUND otherwise. - virtual tensorflow::Status FileExists(const string& fname) { + virtual tensorflow::Status FileExists(const std::string& fname) { return FileExists(fname, nullptr); }; @@ -222,7 +222,8 @@ class FileSystem { virtual bool Match(const std::string& filename, const std::string& pattern); /// \brief Obtains statistics for the given path. - virtual tensorflow::Status Stat(const string& fname, FileStatistics* stat) { + virtual tensorflow::Status Stat(const std::string& fname, + FileStatistics* stat) { return Stat(fname, nullptr, stat); } @@ -233,7 +234,7 @@ class FileSystem { } /// \brief Deletes the named file. - virtual tensorflow::Status DeleteFile(const string& fname) { + virtual tensorflow::Status DeleteFile(const std::string& fname) { return DeleteFile(fname, nullptr); } @@ -247,7 +248,7 @@ class FileSystem { /// * OK - successfully created the directory. /// * ALREADY_EXISTS - directory with name dirname already exists. /// * PERMISSION_DENIED - dirname is not writable. - virtual tensorflow::Status CreateDir(const string& dirname) { + virtual tensorflow::Status CreateDir(const std::string& dirname) { return CreateDir(dirname, nullptr); } @@ -262,7 +263,7 @@ class FileSystem { /// * OK - successfully created the directory and sub directories, even if /// they were already created. /// * PERMISSION_DENIED - dirname or some subdirectory is not writable. - virtual tensorflow::Status RecursivelyCreateDir(const string& dirname) { + virtual tensorflow::Status RecursivelyCreateDir(const std::string& dirname) { return RecursivelyCreateDir(dirname, nullptr); } @@ -270,7 +271,7 @@ class FileSystem { TransactionToken* token); /// \brief Deletes the specified directory. - virtual tensorflow::Status DeleteDir(const string& dirname) { + virtual tensorflow::Status DeleteDir(const std::string& dirname) { return DeleteDir(dirname, nullptr); }; @@ -309,7 +310,7 @@ class FileSystem { return DeleteRecursively(dirname, nullptr, undeleted_files, undeleted_dirs); } - virtual tensorflow::Status DeleteRecursively(const string& dirname, + virtual tensorflow::Status DeleteRecursively(const std::string& dirname, TransactionToken* token, int64* undeleted_files, int64* undeleted_dirs); @@ -327,8 +328,8 @@ class FileSystem { } /// \brief Overwrites the target if it exists. - virtual tensorflow::Status RenameFile(const string& src, - const string& target) { + virtual tensorflow::Status RenameFile(const std::string& src, + const std::string& target) { return RenameFile(src, target, nullptr); } @@ -339,7 +340,8 @@ class FileSystem { } /// \brief Copy the src to target. - virtual tensorflow::Status CopyFile(const string& src, const string& target) { + virtual tensorflow::Status CopyFile(const std::string& src, + const std::string& target) { return CopyFile(src, target, nullptr); } @@ -365,7 +367,7 @@ class FileSystem { /// * NOT_FOUND - The path entry does not exist. /// * PERMISSION_DENIED - Insufficient permissions. /// * UNIMPLEMENTED - The file factory doesn't support directories. - virtual tensorflow::Status IsDirectory(const string& fname) { + virtual tensorflow::Status IsDirectory(const std::string& fname) { return IsDirectory(fname, nullptr); } @@ -631,7 +633,7 @@ class WrappedFileSystem : public FileSystem { return fs_->DeleteDir(dirname, (token ? token : token_)); } - tensorflow::Status DeleteRecursively(const string& dirname, + tensorflow::Status DeleteRecursively(const std::string& dirname, TransactionToken* token, int64* undeleted_files, int64* undeleted_dirs) override { From b4ab032959b2fb68022b2cf70de46e9564a50d6f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 11 Aug 2020 01:40:26 -0700 Subject: [PATCH 2496/2522] Integrate LLVM at llvm/llvm-project@0de60b550b72 Updates LLVM usage to match [0de60b550b72](https://github.com/llvm/llvm-project/commit/0de60b550b72) PiperOrigin-RevId: 325977375 Change-Id: I0f24ce1139fd51196e24adcbc5e7b8027cf3bb53 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 73b3fb42a96..6493fe87836 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -712,8 +712,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "54cb552b962097d0e3ef7306b69a3c82cc7fff37" - LLVM_SHA256 = "42a65541c62e8349cac068e7f44cb1d9d41addf0dbab0002cc7a7d7203bcb35b" + LLVM_COMMIT = "0de60b550b727fa3a0202a9ab5ca30520e291dd5" + LLVM_SHA256 = "d23a64cca502c32aa3990b5252f19cb2ad59534084384df6d7a355e6f23fac62" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 59bcb3ab288a303fbd7225d6b9622b6945b038e4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 11 Aug 2020 02:01:47 -0700 Subject: [PATCH 2497/2522] Update GraphDef version to 490. PiperOrigin-RevId: 325979480 Change-Id: Id7dc33982c8f45b76f8b1d53a2d1f7f0a446ad8d --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index fa0df24a7e7..34b358612ad 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 489 // Updated: 2020/8/10 +#define TF_GRAPH_DEF_VERSION 490 // Updated: 2020/8/11 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 7169ab7935248f4cd2ceb8cbe1b874849b07d346 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 11 Aug 2020 02:01:48 -0700 Subject: [PATCH 2498/2522] compat: Update forward compatibility horizon to 2020-08-11 PiperOrigin-RevId: 325979481 Change-Id: If52dfd419928bf71c132207368e54f07a96348eb --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 3d8f9c5490e..3e91a8b3f45 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 10) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 11) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From cb09f04f33362e40147643a151005a731798f77a Mon Sep 17 00:00:00 2001 From: Thai Nguyen Date: Tue, 11 Aug 2020 02:08:22 -0700 Subject: [PATCH 2499/2522] Add script to generate trimmed tflite aar files The script can be used as follows: bash tensorflow/lite/java/build_customized_aar_for_models.sh \ --customize_for_models=model1,model2 \ --target_archs=x86,x86_64,arm64-v8a,armeabi-v7a it will generate the tensorflow-lite.aar and tensorflow-lite-select-tf-ops.aar if needed. PiperOrigin-RevId: 325980230 Change-Id: I9c5216108ae87ac22a69cbb88b37bb473bd4d37f --- tensorflow/lite/g3doc/guide/android.md | 15 +- tensorflow/lite/tools/BUILD | 21 ++ tensorflow/lite/tools/build_aar.sh | 214 ++++++++++++++++++ .../lite/tools/build_aar_with_docker.sh | 115 ++++++++++ tensorflow/lite/tools/list_flex_ops.h | 2 +- .../lite/tools/list_flex_ops_no_kernel.cc | 61 +++++ 6 files changed, 425 insertions(+), 3 deletions(-) create mode 100755 tensorflow/lite/tools/build_aar.sh create mode 100755 tensorflow/lite/tools/build_aar_with_docker.sh create mode 100644 tensorflow/lite/tools/list_flex_ops_no_kernel.cc diff --git a/tensorflow/lite/g3doc/guide/android.md b/tensorflow/lite/g3doc/guide/android.md index a1493090588..72eb07aa34b 100644 --- a/tensorflow/lite/g3doc/guide/android.md +++ b/tensorflow/lite/g3doc/guide/android.md @@ -205,8 +205,19 @@ bazel build -c opt --fat_apk_cpu=x86,x86_64,arm64-v8a,armeabi-v7a \ This will generate an AAR file in `bazel-bin/tensorflow/lite/java/`. Note that this builds a "fat" AAR with several different architectures; if you don't need all of them, use the subset appropriate for your deployment environment. -From there, there are several approaches you can take to use the .aar in your -Android Studio project. + +Caution: Following feature is experimental and only available at HEAD. You can +build smaller AAR files targeting only a set of models as follows: + +```sh +bash tensorflow/lite/tools/build_aar.sh \ + --input_models=model1,model2 \ + --target_archs=x86,x86_64,arm64-v8a,armeabi-v7a +``` + +Above script will generate the `tensorflow-lite.aar` file and optionally the +`tensorflow-lite-select-tf-ops.aar` file if one of the models is using +Tensorflow ops. ##### Add AAR directly to project diff --git a/tensorflow/lite/tools/BUILD b/tensorflow/lite/tools/BUILD index 1f57cad7f7a..ad19cd2b519 100644 --- a/tensorflow/lite/tools/BUILD +++ b/tensorflow/lite/tools/BUILD @@ -296,6 +296,27 @@ cc_library( ], ) +cc_library( + name = "list_flex_ops_no_kernel", + srcs = ["list_flex_ops_no_kernel.cc"], + hdrs = ["list_flex_ops.h"], + deps = [ + "//tensorflow/lite:framework", + "@com_google_absl//absl/strings", + ], +) + +tf_cc_binary( + name = "list_flex_ops_no_kernel_main", + srcs = ["list_flex_ops_main.cc"], + visibility = ["//visibility:public"], + deps = [ + ":list_flex_ops_no_kernel", + "//tensorflow/lite/tools:command_line_flags", + "@com_google_absl//absl/strings", + ], +) + tf_cc_test( name = "list_flex_ops_test", srcs = ["list_flex_ops_test.cc"], diff --git a/tensorflow/lite/tools/build_aar.sh b/tensorflow/lite/tools/build_aar.sh new file mode 100755 index 00000000000..6d84d5b35b1 --- /dev/null +++ b/tensorflow/lite/tools/build_aar.sh @@ -0,0 +1,214 @@ +#!/bin/bash +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(cd "${SCRIPT_DIR}/../../../" && pwd)" + +function print_usage { + echo "Usage:" + echo " $(basename ${BASH_SOURCE}) \\" + echo " --input_models=model1.tflite,model2.tflite \\" + echo " --target_archs=x86,x86_64,arm64-v8a,armeabi-v7a \\" + echo " --tflite_custom_ops_srcs=file1.cc,file2.h \\" + echo " --tflite_custom_ops_deps=dep1,dep2" + echo "" + echo "Where: " + echo " --input_models: Supported TFLite models. " + echo " --target_archs: Supported arches included in the aar file." + echo " --tflite_custom_ops_srcs: The src files for building additional TFLite custom ops if any." + echo " --tflite_custom_ops_deps: Dependencies for building additional TFLite custom ops if any." + echo "" + exit 1 +} + +function generate_list_field { + local name="$1" + local list_string="$2" + local list=(${list_string//,/ }) + + local message+=("$name=[") + for item in "${list[@]}" + do + message+=("\"$item\",") + done + message+=('],') + printf '%s' "${message[@]}" +} + +function print_output { + echo "Output can be found here:" + for i in "$@" + do + # Check if the file exist. + ls -1a ${ROOT_DIR}/$i + done +} + +function generate_tflite_aar { + pushd ${TMP_DIR} > /dev/null + # Generate the BUILD file. + message=( + 'load("//tensorflow/lite:build_def.bzl", "tflite_custom_android_library")' + 'load("//tensorflow/lite/java:aar_with_jni.bzl", "aar_with_jni")' + '' + 'tflite_custom_android_library(' + ' name = "custom_tensorflowlite",' + ) + message+=(' '$(generate_list_field "models" $MODEL_NAMES)) + message+=(' '$(generate_list_field "srcs" $TFLITE_OPS_SRCS)) + message+=(' '$(generate_list_field "deps" $FLAG_TFLITE_OPS_DEPS)) + message+=( + ')' + '' + 'aar_with_jni(' + ' name = "tensorflow-lite",' + ' android_library = ":custom_tensorflowlite",' + ')' + '' + ) + printf '%s\n' "${message[@]}" >> BUILD + + # Build the aar package. + popd > /dev/null + bazel build -c opt --cxxopt='--std=c++14' \ + --fat_apk_cpu=${TARGET_ARCHS} \ + --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \ + //tmp:tensorflow-lite + + OUT_FILES="${OUT_FILES} bazel-bin/tmp/tensorflow-lite.aar" +} + +function generate_flex_aar { + pushd ${TMP_DIR} + # Generating the BUILD file. + message=( + 'load("//tensorflow/lite/delegates/flex:build_def.bzl", "tflite_flex_android_library")' + 'load("//tensorflow/lite/java:aar_with_jni.bzl", "aar_with_jni")' + '' + 'tflite_flex_android_library(' + ' name = "custom_tensorflowlite_flex",' + ) + message+=(' '$(generate_list_field "models" $MODEL_NAMES)) + message+=( + ')' + '' + 'aar_with_jni(' + ' name = "tensorflow-lite-select-tf-ops",' + ' android_library = ":custom_tensorflowlite_flex",' + ')' + ) + printf '%s\n' "${message[@]}" >> BUILD + + cp ${ROOT_DIR}/tensorflow/lite/java/AndroidManifest.xml . + cp ${ROOT_DIR}/tensorflow/lite/java/proguard.flags . + popd + + # Build the aar package. + bazel build -c opt --cxxopt='--std=c++14' \ + --fat_apk_cpu=${TARGET_ARCHS} \ + --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \ + //tmp:tensorflow-lite-select-tf-ops + + OUT_FILES="${OUT_FILES} bazel-bin/tmp/tensorflow-lite-select-tf-ops.aar" +} + +# Check command line flags. +TARGET_ARCHS=x86,x86_64,arm64-v8a,armeabi-v7a + +if [ "$#" -gt 4 ]; then + echo "ERROR: Too many arguments." + print_usage +fi + +for i in "$@" +do +case $i in + --input_models=*) + FLAG_MODELS="${i#*=}" + shift;; + --target_archs=*) + TARGET_ARCHS="${i#*=}" + shift;; + --tflite_custom_ops_srcs=*) + FLAG_TFLITE_OPS_SRCS="${i#*=}" + shift;; + --tflite_custom_ops_deps=*) + FLAG_TFLITE_OPS_DEPS="${i#*=}" + shift;; + *) + echo "ERROR: Unrecognized argument: ${i}" + print_usage;; +esac +done + +# Check if users already run configure +cd $ROOT_DIR +if [ ! -f "$ROOT_DIR/.tf_configure.bazelrc" ]; then + echo "ERROR: Please run ./configure first." + exit 1 +else + if ! grep -q ANDROID_SDK_HOME "$ROOT_DIR/.tf_configure.bazelrc"; then + echo "ERROR: Please run ./configure with Android config." + exit 1 + fi +fi + +# Build the standard aar package of no models provided. +if [ -z ${FLAG_MODELS} ]; then + bazel build -c opt --cxxopt='--std=c++14' \ + --fat_apk_cpu=${TARGET_ARCHS} \ + --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \ + //tensorflow/lite/java:tensorflow-lite + + print_output bazel-bin/tensorflow/lite/java/tensorflow-lite.aar + exit 0 +fi + +# Prepare the tmp directory. +TMP_DIR="${ROOT_DIR}/tmp/" +rm -rf ${TMP_DIR} && mkdir -p ${TMP_DIR} + +# Copy models to tmp directory. +MODEL_NAMES="" +for model in $(echo ${FLAG_MODELS} | sed "s/,/ /g") +do + cp ${model} ${TMP_DIR} + MODEL_NAMES="${MODEL_NAMES},$(basename ${model})" +done + +# Copy srcs of additional tflite ops to tmp directory. +TFLITE_OPS_SRCS="" +for src_file in $(echo ${FLAG_TFLITE_OPS_SRCS} | sed "s/,/ /g") +do + cp ${src_file} ${TMP_DIR} + TFLITE_OPS_SRCS="${TFLITE_OPS_SRCS},$(basename ${src_file})" +done + +# Build the custom aar package. +generate_tflite_aar + +# Build flex aar if one of the models contain flex ops. +bazel build -c opt --config=monolithic //tensorflow/lite/tools:list_flex_ops_no_kernel_main +bazel-bin/tensorflow/lite/tools/list_flex_ops_no_kernel_main --graphs=${FLAG_MODELS} > ${TMP_DIR}/ops_list.txt +if [[ `cat ${TMP_DIR}/ops_list.txt` != "[]" ]]; then + generate_flex_aar +fi + +# List the output files. +rm -rf ${TMP_DIR} +print_output ${OUT_FILES} diff --git a/tensorflow/lite/tools/build_aar_with_docker.sh b/tensorflow/lite/tools/build_aar_with_docker.sh new file mode 100755 index 00000000000..2af4787c35c --- /dev/null +++ b/tensorflow/lite/tools/build_aar_with_docker.sh @@ -0,0 +1,115 @@ +#!/bin/bash +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +function print_usage { + echo "Usage:" + echo " $(basename ${BASH_SOURCE}) \\" + echo " --input_models=model1.tflite,model2.tflite \\" + echo " --target_archs=x86,x86_64,arm64-v8a,armeabi-v7a \\" + echo " --checkpoint=master" + echo "" + echo "Where: " + echo " --input_models: Supported TFLite models. " + echo " --target_archs: Supported arches included in the aar file." + echo " --checkpoint: Checkpoint of the github repo, could be a branch, a commit or a tag. Default: master" + echo "" + exit 1 +} + +# Check command line flags. +ARGUMENTS=$@ +TARGET_ARCHS=x86,x86_64,arm64-v8a,armeabi-v7a +FLAG_CHECKPOINT="master" + +if [ "$#" -gt 3 ]; then + echo "ERROR: Too many arguments." + print_usage +fi + +for i in "$@" +do +case $i in + --input_models=*) + FLAG_MODELS="${i#*=}" + shift;; + --target_archs=*) + TARGET_ARCHS="${i#*=}" + shift;; + --checkpoint=*) + FLAG_CHECKPOINT="${i#*=}" + shift;; + *) + echo "ERROR: Unrecognized argument: ${i}" + print_usage;; +esac +done + +if [ ! -d /tensorflow_src ]; then + # Running on host. + for model in $(echo ${FLAG_MODELS} | sed "s/,/ /g") + do + FLAG_DIR="${FLAG_DIR} -v ${model}:${model}" + done + docker run --rm -it -v $PWD:/tmp -v ${SCRIPT_DIR}:/script_dir ${FLAG_DIR} \ + --entrypoint /script_dir/build_aar_with_docker.sh tflite-builder \ + ${ARGUMENTS} + exit 0 +else + # Running inside docker container, download the SDK first. + android update sdk --no-ui -a \ + --filter tools,platform-tools,android-${ANDROID_API_LEVEL},build-tools-${ANDROID_BUILD_TOOLS_VERSION} + + cd /tensorflow_src + + # Run configure. + configs=( + '/usr/bin/python3' + '/usr/lib/python3/dist-packages' + 'N' + 'N' + 'N' + 'N' + '-march=native -Wno-sign-compare' + 'y' + '/android/sdk' + ) + printf '%s\n' "${configs[@]}" | ./configure + + # Pull the latest code from tensorflow. + git pull -a + git checkout ${FLAG_CHECKPOINT} + + # Building with bazel. + bash /tensorflow_src/tensorflow/lite/tools/build_aar.sh ${ARGUMENTS} + + # Copy the output files from docker container. + clear + OUT_FILES="/tensorflow_src/bazel-bin/tmp/tensorflow-lite.aar" + OUT_FILES="${OUT_FILES} /tensorflow_src/bazel-bin/tmp/tensorflow-lite-select-tf-ops.aar" + echo "Output can be found here:" + for i in ${OUT_FILES} + do + if [ -f $i ]; then + cp $i /tmp + basename $i + fi + done +fi + diff --git a/tensorflow/lite/tools/list_flex_ops.h b/tensorflow/lite/tools/list_flex_ops.h index 070da2d9b3d..f9bc7b952df 100644 --- a/tensorflow/lite/tools/list_flex_ops.h +++ b/tensorflow/lite/tools/list_flex_ops.h @@ -42,7 +42,7 @@ struct OpKernelCompare { using OpKernelSet = std::set; // Find flex ops and its kernel classes inside a TFLite model and add them to -// the map flex_ops. The map stores +// the map flex_ops. void AddFlexOpsFromModel(const tflite::Model* model, OpKernelSet* flex_ops); // Serialize the list op of to a json string. If flex_ops is empty, return an diff --git a/tensorflow/lite/tools/list_flex_ops_no_kernel.cc b/tensorflow/lite/tools/list_flex_ops_no_kernel.cc new file mode 100644 index 00000000000..11a9f39dbfd --- /dev/null +++ b/tensorflow/lite/tools/list_flex_ops_no_kernel.cc @@ -0,0 +1,61 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "tensorflow/lite/tools/list_flex_ops.h" + +namespace tflite { +namespace flex { + +std::string OpListToJSONString(const OpKernelSet& flex_ops) { + return absl::StrCat("[", + absl::StrJoin(flex_ops, ",\n", + [](std::string* out, const OpKernel& op) { + absl::StrAppend(out, "\"", op.op_name, + "\""); + }), + "]"); +} + +void AddFlexOpsFromModel(const tflite::Model* model, OpKernelSet* flex_ops) { + auto* subgraphs = model->subgraphs(); + if (!subgraphs) return; + + for (int subgraph_index = 0; subgraph_index < subgraphs->size(); + ++subgraph_index) { + const tflite::SubGraph* subgraph = subgraphs->Get(subgraph_index); + auto* operators = subgraph->operators(); + auto* opcodes = model->operator_codes(); + if (!operators || !opcodes) continue; + + for (int i = 0; i < operators->size(); ++i) { + const tflite::Operator* op = operators->Get(i); + const tflite::OperatorCode* opcode = opcodes->Get(op->opcode_index()); + if (opcode->builtin_code() != tflite::BuiltinOperator_CUSTOM || + !tflite::IsFlexOp(opcode->custom_code()->c_str())) { + continue; + } + + // Remove the "Flex" prefix from op name. + std::string flex_op_name(opcode->custom_code()->c_str()); + std::string tf_op_name = + flex_op_name.substr(strlen(tflite::kFlexCustomCodePrefix)); + + flex_ops->insert({tf_op_name, ""}); + } + } +} +} // namespace flex +} // namespace tflite From 78a451a5bc954891f6db9abc2a68508c21781e4a Mon Sep 17 00:00:00 2001 From: Renjie Liu Date: Tue, 11 Aug 2020 02:47:42 -0700 Subject: [PATCH 2500/2522] Add a warning handler for StatusScopedDiagnosticHandler. PiperOrigin-RevId: 325984749 Change-Id: I3a6b2a7dd043148b8e2dc5cdf2a2929341e699bc --- .../compiler/mlir/lite/tf_to_tfl_flatbuffer.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc index 414a0de0118..c158f3a8e21 100644 --- a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc +++ b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc @@ -129,6 +129,18 @@ Status ConvertTFExecutorToTFLOrFlatbuffer( bool emit_select_tf_ops, bool emit_custom_ops, const mlir::TFL::QuantizationSpecs& quant_specs, std::string* result, mlir::PassManager* pass_manager) { + // Register a warning handler only log to std out. + mlir::ScopedDiagnosticHandler s( + module.getContext(), [](mlir::Diagnostic& diag) { + if (diag.getSeverity() == mlir::DiagnosticSeverity::Warning) { + for (auto& note : diag.getNotes()) { + std::cout << note.str() << "\n"; + LOG(WARNING) << note.str() << "\n"; + } + } + return mlir::failure(); + }); + mlir::StatusScopedDiagnosticHandler statusHandler(module.getContext(), /*propagate=*/true); From af5df07df41a2a9dcb9c73b594b5e7ffc853e88a Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Tue, 11 Aug 2020 04:00:17 -0700 Subject: [PATCH 2501/2522] [MLIR:GPU] Expose aux passes in kernel_lowering.cc. Also clean up dependencies in kernel_lowering. PiperOrigin-RevId: 325992384 Change-Id: I420b4ef7c8058bc573cc2b525961283df4927f99 --- .../compiler/xla/service/mlir_gpu/BUILD | 24 +- .../xla/service/mlir_gpu/kernel_lowering.cc | 412 +---------------- .../compiler/xla/service/mlir_gpu/passes.cc | 423 ++++++++++++++++++ .../compiler/xla/service/mlir_gpu/passes.h | 66 +++ 4 files changed, 519 insertions(+), 406 deletions(-) create mode 100644 tensorflow/compiler/xla/service/mlir_gpu/passes.cc create mode 100644 tensorflow/compiler/xla/service/mlir_gpu/passes.h diff --git a/tensorflow/compiler/xla/service/mlir_gpu/BUILD b/tensorflow/compiler/xla/service/mlir_gpu/BUILD index 43a6efe9e90..31cf36dee85 100644 --- a/tensorflow/compiler/xla/service/mlir_gpu/BUILD +++ b/tensorflow/compiler/xla/service/mlir_gpu/BUILD @@ -155,11 +155,31 @@ cc_library( ], ) +cc_library( + name = "passes", + srcs = ["passes.cc"], + hdrs = ["passes.h"], + deps = [ + "//tensorflow/compiler/mlir/hlo:lhlo", + "@com_google_absl//absl/memory", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:GPUDialect", + "@llvm-project//mlir:GPUTransforms", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:Pass", + "@llvm-project//mlir:SCFDialect", + "@llvm-project//mlir:SCFTransforms", + "@llvm-project//mlir:StandardOps", + "@llvm-project//mlir:Transforms", + ], +) + cc_library( name = "kernel_lowering", srcs = ["kernel_lowering.cc"], hdrs = ["kernel_lowering.h"], deps = [ + ":passes", "//tensorflow/compiler/mlir/hlo", "//tensorflow/compiler/mlir/hlo:hlo_dialect_force_registration", "//tensorflow/compiler/mlir/hlo:hlo_legalize_to_lhlo", @@ -173,9 +193,7 @@ cc_library( "//tensorflow/compiler/xla:status", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:util", - "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/memory", - "@llvm-project//mlir:Affine", "@llvm-project//mlir:AffineToStandardTransforms", "@llvm-project//mlir:CFGTransforms", "@llvm-project//mlir:GPUDialect", @@ -184,7 +202,6 @@ cc_library( "@llvm-project//mlir:IR", "@llvm-project//mlir:LLVMDialect", "@llvm-project//mlir:LLVMTransforms", - "@llvm-project//mlir:LinalgOps", "@llvm-project//mlir:LinalgToLLVM", "@llvm-project//mlir:LinalgTransforms", "@llvm-project//mlir:NVVMDialect", @@ -193,7 +210,6 @@ cc_library( "@llvm-project//mlir:SCFToGPUPass", "@llvm-project//mlir:SCFTransforms", "@llvm-project//mlir:StandardOps", - "@llvm-project//mlir:Support", "@llvm-project//mlir:Transforms", ], ) diff --git a/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc b/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc index 2e3fa00ca86..ae99cc9ba63 100644 --- a/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc +++ b/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc @@ -22,419 +22,26 @@ limitations under the License. #include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h" // from @llvm-project #include "mlir/Conversion/SCFToStandard/SCFToStandard.h" // from @llvm-project #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" // from @llvm-project -#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" // from @llvm-project -#include "mlir/Dialect/Affine/IR/AffineOps.h" // from @llvm-project #include "mlir/Dialect/GPU/GPUDialect.h" // from @llvm-project -#include "mlir/Dialect/GPU/ParallelLoopMapper.h" // from @llvm-project #include "mlir/Dialect/GPU/Passes.h" // from @llvm-project #include "mlir/Dialect/LLVMIR/LLVMDialect.h" // from @llvm-project #include "mlir/Dialect/LLVMIR/NVVMDialect.h" // from @llvm-project -#include "mlir/Dialect/Linalg/IR/LinalgOps.h" // from @llvm-project #include "mlir/Dialect/Linalg/Passes.h" // from @llvm-project #include "mlir/Dialect/SCF/Passes.h" // from @llvm-project -#include "mlir/Dialect/SCF/SCF.h" // from @llvm-project #include "mlir/Dialect/SCF/Transforms.h" // from @llvm-project -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/Attributes.h" // from @llvm-project -#include "mlir/IR/BlockAndValueMapping.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project -#include "mlir/IR/Function.h" // from @llvm-project -#include "mlir/IR/Module.h" // from @llvm-project -#include "mlir/IR/OperationSupport.h" // from @llvm-project -#include "mlir/IR/PatternMatch.h" // from @llvm-project -#include "mlir/IR/Region.h" // from @llvm-project -#include "mlir/IR/Value.h" // from @llvm-project #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Pass/PassManager.h" // from @llvm-project #include "mlir/Transforms/BufferPlacement.h" // from @llvm-project -#include "mlir/Transforms/DialectConversion.h" // from @llvm-project -#include "mlir/Transforms/LoopUtils.h" // from @llvm-project #include "mlir/Transforms/Passes.h" // from @llvm-project -#include "mlir/Transforms/RegionUtils.h" // from @llvm-project #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/passes.h" #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h" +#include "tensorflow/compiler/xla/service/mlir_gpu/passes.h" #include "tensorflow/compiler/xla/util.h" namespace xla { namespace mlir_gpu { -namespace { - -using ::mlir::lmhlo::FusionOp; - -// Replaces a FusionOp by the operations contained in its region. -struct FusionOpRemover - : public mlir::PassWrapper { - void runOnFunction() override { - getFunction().walk([&](FusionOp op) { - mlir::OpBuilder builder(op); - // FusionOp has a single region with a single block, so we can just walk - // over it and clone operations to the outside. - mlir::BlockAndValueMapping mapping; - for (auto& nested_op : op.region().front().without_terminator()) { - auto clone = builder.clone(nested_op, mapping); - for (auto pair : - llvm::zip(nested_op.getResults(), clone->getResults())) { - mapping.map(std::get<0>(pair), std::get<1>(pair)); - } - } - op.erase(); - }); - } -}; - -// Simple pass that replaces a load that immediately follows a store to the -// same address with the stored value. This needs generalization. -struct StoreForwardingPass - : mlir::PassWrapper { - mlir::StoreOp findStore(mlir::Operation* op, - std::function matches) { - // Search from op upwards in the current block. - mlir::Block* block = op->getBlock(); - auto startFromIt = - std::find_if(block->rbegin(), block->rend(), - [op](mlir::Operation& other) { return &other == op; }); - for (auto storeOpIt = startFromIt; storeOpIt != block->rend(); - ++storeOpIt) { - auto storeOp = llvm::dyn_cast(&*(storeOpIt)); - if (!storeOp || !matches(storeOp)) { - continue; - } - - return storeOp; - } - // No store operation found. Continue search outside of the parallel - // loop if block is in a parallel loop. - if (auto parallelOp = - llvm::dyn_cast(block->getParentOp())) { - return findStore(parallelOp.getOperation(), matches); - } - return {}; - } - - // Recursively search defining ops for AllocOp. Return either AllocOp if it is - // found or nullptr. - mlir::Operation* SearchAllocOp(mlir::Value memref) { - mlir::Operation* defOp = memref.getDefiningOp(); - while (auto subviewOp = mlir::dyn_cast_or_null(defOp)) { - defOp = subviewOp.source().getDefiningOp(); - } - if (auto allocOp = mlir::dyn_cast_or_null(defOp)) { - return allocOp.getOperation(); - } - return nullptr; - } - - // Retrieves AllocOp from the cache or actually looks for it. - mlir::Operation* GetAllocOp( - mlir::Value memref, - llvm::DenseMap* memrefToAllocOp) { - auto allocOpIt = memrefToAllocOp->find(memref); - if (allocOpIt != memrefToAllocOp->end()) { - return allocOpIt->second; - } - auto allocOp = SearchAllocOp(memref); - memrefToAllocOp->insert({memref, allocOp}); - return allocOp; - } - - void runOnFunction() override { - llvm::DenseMap memrefToAllocOp; - - getFunction().walk([&](mlir::LoadOp loadOp) { - auto storeOp = findStore(loadOp, [&](mlir::StoreOp storeOp) { - mlir::Operation* storeOpAlloc = - GetAllocOp(storeOp.memref(), &memrefToAllocOp); - mlir::Operation* loadOpAlloc = - GetAllocOp(loadOp.memref(), &memrefToAllocOp); - return storeOpAlloc && loadOpAlloc && (storeOpAlloc == loadOpAlloc); - }); - if (!storeOp) { - return; - } - auto storeIndices = storeOp.getIndices(); - auto loadIndices = loadOp.getIndices(); - if (!std::equal(storeIndices.begin(), storeIndices.end(), - loadIndices.begin(), loadIndices.end())) { - return; - } - loadOp.replaceAllUsesWith(storeOp.getValueToStore()); - loadOp.erase(); - }); - } -}; - -// Simple pass that removes temporary buffers that are only written to but -// never read from or that are read but the read value is not used. -// Needs an analysis that proves that loads and stores are side-effect free -// (in bounds, no aliasing, etc.). -struct DeadTempBufferRemoval - : mlir::PassWrapper { - bool operationConsideredDead(mlir::Operation* op) { - for (auto result : op->getResults()) { - if (!llvm::all_of(result.getUsers(), [&](mlir::Operation* op) { - // Store and Dealloc is OK. - if (llvm::isa(op)) { - return true; - } - // Load without uses is also ok. - if (auto loadOp = llvm::dyn_cast(op)) { - return loadOp.use_empty(); - } - // Subview is ok if it is dead itself. - if (llvm::isa(op)) { - return operationConsideredDead(op); - } - return false; - })) { - return false; - } - } - return true; - } - - void recursiveErase(mlir::Operation* op, - llvm::SmallVectorImpl* erase_list) { - for (auto result : op->getResults()) { - for (auto user : llvm::make_early_inc_range(result.getUsers())) { - recursiveErase(user, erase_list); - } - } - erase_list->push_back(op); - } - - void runOnFunction() override { - llvm::SmallVector dead_ops; - getFunction().walk([&](mlir::AllocOp allocOp) { - if (!operationConsideredDead(allocOp)) { - return; - } - - // TODO(herhut): There should be a generic helper for this. - recursiveErase(allocOp, &dead_ops); - }); - for (auto op : dead_ops) { - op->erase(); - } - } -}; - -// TODO(herhut): Move this to MLIR core. -struct MoveScalarComputationsIntoGpuLaunch - : mlir::PassWrapper { - static bool isInliningBeneficiary(mlir::Operation* op) { - return llvm::isa(op); - } - - static bool extractBeneficiaryOps( - mlir::Operation* op, llvm::SmallVectorImpl* ops, - llvm::SetVector args) { - if (!isInliningBeneficiary(op)) { - return false; - } - - ops->push_back(op); - for (auto operand : op->getOperands()) { - // It is an existing arg, keep going. - if (args.count(operand)) { - continue; - } - mlir::Operation* definingOp = operand.getDefiningOp(); - if (!definingOp || !extractBeneficiaryOps(definingOp, ops, args)) { - return false; - } - } - return true; - } - - static void inlineOperationsIntoLaunch(mlir::gpu::LaunchOp launch) { - llvm::SetVector used_above; - mlir::getUsedValuesDefinedAbove(launch.body(), used_above); - mlir::BlockAndValueMapping inlined_map; - for (mlir::Value v : used_above) { - llvm::SmallVector ops_to_move; - mlir::Operation* definingOp = v.getDefiningOp(); - if (definingOp && - extractBeneficiaryOps(definingOp, &ops_to_move, used_above)) { - mlir::OpBuilder b(launch.body()); - for (mlir::Operation* op : llvm::reverse(ops_to_move)) { - auto result = b.clone(*op, inlined_map); - for (auto pair : llvm::zip(op->getResults(), result->getResults())) { - mlir::replaceAllUsesInRegionWith(std::get<0>(pair), - std::get<1>(pair), launch.body()); - } - inlined_map.map(op->getResults(), result->getResults()); - } - } - } - } - - void runOnFunction() override { - mlir::FuncOp fun = getFunction(); - fun.walk( - [](mlir::gpu::LaunchOp launch) { inlineOperationsIntoLaunch(launch); }); - } -}; - -// Sort the operands to the kernel for a deterministic order. First operands -// that are defined by function arguments, followed by operands that are -// returned from the function. This only works for simple functions without -// control flow and can be used in cases where the kernel is extracted and used -// independently of the host-side code. -struct RewriteKernelSignature - : mlir::PassWrapper { - void runOnFunction() override { - mlir::FuncOp func = getFunction(); - mlir::ModuleOp module = func.getParentOfType(); - getFunction().walk([&](mlir::gpu::LaunchFuncOp launchOp) { - mlir::gpu::GPUFuncOp kernel = - module.lookupSymbol(launchOp.kernel()); - - if (kernel.getNumFuncArguments() != - func.getNumArguments() + func.getNumResults()) { - kernel.emitError() - << "number of kernel arguments does not match number" - << "of arguments and results of surrounding function"; - signalPassFailure(); - return; - } - if (!llvm::hasSingleElement(func)) { - func.emitError() << "surrounding function has more than one block"; - signalPassFailure(); - return; - } - - // Compute a map from function arguments to kernel function operands. - mlir::BlockAndValueMapping func_to_kernel; - for (mlir::BlockArgument arg : func.getArguments()) { - for (int i = 0, e = launchOp.getNumKernelOperands(); i < e; ++i) { - if (launchOp.getKernelOperand(i) == arg) { - func_to_kernel.map(arg, kernel.getArgument(i)); - break; - } - } - } - // Also add function results that are computed by the launch. - mlir::Operation* returnOp = func.getBody().back().getTerminator(); - for (mlir::Value result : returnOp->getOperands()) { - for (int i = 0, e = launchOp.getNumKernelOperands(); i < e; ++i) { - if (launchOp.getKernelOperand(i) == result) { - func_to_kernel.map(result, kernel.getArgument(i)); - break; - } - } - } - - // Create a new kernel function with modified signature. It will have the - // parameters and result types of the original funcion as its parameter - // type and otherwise will be void. - auto gpu_module = kernel.getParentOfType(); - mlir::OpBuilder kernel_builder(gpu_module.body()); - auto operand_types = llvm::to_vector<4>(llvm::concat( - func.getType().getInputs(), func.getType().getResults())); - auto new_kernel = kernel_builder.create( - kernel.getLoc(), kernel.getName(), - kernel_builder.getFunctionType(operand_types, {})); - new_kernel.setAttr(mlir::gpu::GPUDialect::getKernelFuncAttrName(), - kernel_builder.getUnitAttr()); - - // Create a map from old kernel argument to new one. - mlir::BlockAndValueMapping old_kernel_to_new; - for (int i = 0, e = func.getNumArguments(); i < e; ++i) { - mlir::Value func_arg = func.getArgument(i); - mlir::Value new_kernel_arg = new_kernel.getArgument(i); - mlir::Value old_kernel_arg = func_to_kernel.lookupOrNull(func_arg); - if (!old_kernel_arg) { - kernel.emitOpError() - << "argument " << i - << " to containing function is not an argument to the kernel"; - signalPassFailure(); - return; - } - old_kernel_to_new.map(old_kernel_arg, new_kernel_arg); - } - for (int i = 0, e = returnOp->getNumOperands(); i < e; ++i) { - mlir::Value ret_op = returnOp->getOperand(i); - mlir::Value new_kernel_arg = - new_kernel.getArgument(func.getNumArguments() + i); - mlir::Value old_kernel_arg = func_to_kernel.lookupOrNull(ret_op); - if (!old_kernel_arg) { - kernel.emitOpError() - << "result " << i - << " of containing function is not an argument to the kernel"; - signalPassFailure(); - return; - } - old_kernel_to_new.map(old_kernel_arg, new_kernel_arg); - } - // Steal the body by appending the blocks and inserting a branch. - kernel.body().cloneInto(&new_kernel.getBody(), old_kernel_to_new); - kernel_builder.setInsertionPointToEnd(&new_kernel.body().front()); - kernel_builder.create( - new_kernel.getLoc(), &*std::next(new_kernel.body().begin())); - // Now create a new launchOp calling the new kernel. We need to forward - // the arguments of the surrounding function and operands to the return. - mlir::SmallVector new_operands; - new_operands.reserve(new_kernel.getNumFuncArguments()); - new_operands.append(func.args_begin(), func.args_end()); - new_operands.append(returnOp->operand_begin(), returnOp->operand_end()); - mlir::OpBuilder launch_builder(launchOp); - launch_builder.create( - launchOp.getLoc(), new_kernel, launchOp.getGridSizeOperandValues(), - launchOp.getBlockSizeOperandValues(), new_operands); - // Launch does not have results, so we can just erase it. And the kernel - // also needs to go. - launchOp.erase(); - kernel.erase(); - }); - } -}; - -// Extract_element(mhlo_scalars_to_dimension_tensor(v_i), i) -> v_i -// -// We need to direct fusion to the inner loops. This cannot be done with -// a passmanager alone ATM, as nested pass managers require operations to -// be closed from above. -struct MapParallelLoops - : public mlir::PassWrapper { - void runOnFunction() override { - mlir::greedilyMapParallelSCFToGPU(getFunction().getBody()); - } -}; - -// We need to direct fusion to the inner loops. This cannot be done with -// a passmanager alone ATM, as nested pass managers require operations to -// be closed from above. -struct FuseInnerParallelLoops - : public mlir::PassWrapper { - void runOnFunction() override { - getFunction().walk([](mlir::scf::ParallelOp op) { - mlir::scf::naivelyFuseParallelOps(op.region()); - }); - } -}; - -// Collapse all loop dimension into the first one. -struct ParallelLoopCollapsingToFirstDim - : public mlir::PassWrapper> { - void runOnOperation() override { - mlir::Operation* module = getOperation(); - - module->walk([&](mlir::scf::ParallelOp op) { - unsigned num_loops = op.getNumLoops(); - std::vector combinedLoops; - combinedLoops.reserve(num_loops); - for (unsigned i = 0; i < num_loops; ++i) { - combinedLoops.push_back(i); - } - mlir::collapseParallelLoops(op, {combinedLoops}); - }); - } -}; -} // namespace Status LowerLHLOToGPU(mlir::ModuleOp module, LowerLHLOToGPUOptions options) { mlir::PassManager pm(module.getContext()); @@ -461,7 +68,7 @@ Status LowerLHLOToGPU(mlir::ModuleOp module, LowerLHLOToGPUOptions options) { // Moving `AllocOp`s and inserting missing `DeallocOp`s pm.addPass(::mlir::createBufferPlacementPass()); // Next, we can strip the outer fusion operation. - pm.addPass(absl::make_unique()); + pm.addPass(createFusionOpRemoverPass()); // Remove unnecessary LHLO copies. pm.addPass(::mlir::lmhlo::createLhloCopyRemovalPass()); // Transform LHLO operations to LinAlg. @@ -479,26 +86,26 @@ Status LowerLHLOToGPU(mlir::ModuleOp module, LowerLHLOToGPUOptions options) { pm.addNestedPass<::mlir::FuncOp>(::mlir::createCanonicalizerPass()); pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass()); // Fuse the inner-most loops. - pm.addPass(absl::make_unique()); + pm.addPass(createFuseInnerParallelLoopsPass()); // Run CSE to ensure that loads and stores to the same subview get // recognized as such. pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass()); // Forward stores to buffers to loads. - pm.addPass(absl::make_unique()); + pm.addPass(createStoreForwardingPass()); // Remove now unused temporary buffers. - pm.addPass(absl::make_unique()); + pm.addPass(createDeadTempBufferRemovalPass()); if (!options.unroll_factors.empty()) { pm.addPass(::mlir::createParallelLoopTilingPass(as_int64)); } // Project all loop dimensions to X if necessary. if (options.collapse_parallel_loops) { - pm.addPass(absl::make_unique()); + pm.addPass(createParallelLoopCollapsingToFirstDimPass()); } // Some basic cleanup. pm.addNestedPass<::mlir::FuncOp>(::mlir::createCanonicalizerPass()); pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass()); // Greedily map the remaining loop to GPU hardware dimensions. - pm.addPass(absl::make_unique()); + pm.addPass(createMapParallelLoopsPass()); // Apply the mapping. pm.addPass(mlir::createParallelLoopToGpuPass()); // Some basic cleanup. @@ -515,13 +122,13 @@ Status LowerLHLOToGPU(mlir::ModuleOp module, LowerLHLOToGPUOptions options) { ::mlir::mhlo::createLegalizeTanhToApproximationPass()); } // Move scalar operations into the launch to ensure smaller signatures. - pm.addPass(absl::make_unique()); + pm.addPass(createMoveScalarComputationsIntoGpuLaunchPass()); // Take launches to launches with kernels. pm.addPass(::mlir::createGpuKernelOutliningPass()); // Make sure the kernel signature resembled the original function's // signature if (options.rewrite_signature) { - pm.addPass(absl::make_unique()); + pm.addPass(createRewriteKernelSignaturePass()); } if (failed(pm.run(module))) { return InternalError("Lowering to GPU kernels failed."); @@ -595,5 +202,6 @@ StatusOr ExtractKernelModule(mlir::ModuleOp module) { }); return kernelModule; } + } // namespace mlir_gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/mlir_gpu/passes.cc b/tensorflow/compiler/xla/service/mlir_gpu/passes.cc new file mode 100644 index 00000000000..887f14e90d9 --- /dev/null +++ b/tensorflow/compiler/xla/service/mlir_gpu/passes.cc @@ -0,0 +1,423 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/mlir_gpu/passes.h" + +#include "absl/memory/memory.h" +#include "llvm/ADT/SetVector.h" +#include "mlir/Dialect/GPU/GPUDialect.h" // from @llvm-project +#include "mlir/Dialect/GPU/ParallelLoopMapper.h" // from @llvm-project +#include "mlir/Dialect/SCF/SCF.h" // from @llvm-project +#include "mlir/Dialect/SCF/Transforms.h" // from @llvm-project +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/IR/BlockAndValueMapping.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/Transforms/LoopUtils.h" // from @llvm-project +#include "mlir/Transforms/RegionUtils.h" // from @llvm-project +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" + +namespace xla { +namespace mlir_gpu { +namespace { + +struct FusionOpRemoverPass + : public mlir::PassWrapper { + void runOnFunction() override { + getFunction().walk([&](mlir::lmhlo::FusionOp op) { + mlir::OpBuilder builder(op); + // FusionOp has a single region with a single block, so we can just walk + // over it and clone operations to the outside. + mlir::BlockAndValueMapping mapping; + for (auto& nested_op : op.region().front().without_terminator()) { + auto clone = builder.clone(nested_op, mapping); + for (auto pair : + llvm::zip(nested_op.getResults(), clone->getResults())) { + mapping.map(std::get<0>(pair), std::get<1>(pair)); + } + } + op.erase(); + }); + } +}; + +struct StoreForwardingPass + : mlir::PassWrapper { + mlir::StoreOp findStore(mlir::Operation* op, + std::function matches) { + // Search from op upwards in the current block. + mlir::Block* block = op->getBlock(); + auto startFromIt = + std::find_if(block->rbegin(), block->rend(), + [op](mlir::Operation& other) { return &other == op; }); + for (auto storeOpIt = startFromIt; storeOpIt != block->rend(); + ++storeOpIt) { + auto storeOp = llvm::dyn_cast(&*(storeOpIt)); + if (!storeOp || !matches(storeOp)) { + continue; + } + + return storeOp; + } + // No store operation found. Continue search outside of the parallel + // loop if block is in a parallel loop. + if (auto parallelOp = + llvm::dyn_cast(block->getParentOp())) { + return findStore(parallelOp.getOperation(), matches); + } + return {}; + } + + // Recursively search defining ops for AllocOp. Return either AllocOp if it is + // found or nullptr. + mlir::Operation* SearchAllocOp(mlir::Value memref) { + mlir::Operation* defOp = memref.getDefiningOp(); + while (auto subviewOp = mlir::dyn_cast_or_null(defOp)) { + defOp = subviewOp.source().getDefiningOp(); + } + if (auto allocOp = mlir::dyn_cast_or_null(defOp)) { + return allocOp.getOperation(); + } + return nullptr; + } + + // Retrieves AllocOp from the cache or actually looks for it. + mlir::Operation* GetAllocOp( + mlir::Value memref, + llvm::DenseMap* memrefToAllocOp) { + auto allocOpIt = memrefToAllocOp->find(memref); + if (allocOpIt != memrefToAllocOp->end()) { + return allocOpIt->second; + } + auto allocOp = SearchAllocOp(memref); + memrefToAllocOp->insert({memref, allocOp}); + return allocOp; + } + + void runOnFunction() override { + llvm::DenseMap memrefToAllocOp; + + getFunction().walk([&](mlir::LoadOp loadOp) { + auto storeOp = findStore(loadOp, [&](mlir::StoreOp storeOp) { + mlir::Operation* storeOpAlloc = + GetAllocOp(storeOp.memref(), &memrefToAllocOp); + mlir::Operation* loadOpAlloc = + GetAllocOp(loadOp.memref(), &memrefToAllocOp); + return storeOpAlloc && loadOpAlloc && (storeOpAlloc == loadOpAlloc); + }); + if (!storeOp) { + return; + } + auto storeIndices = storeOp.getIndices(); + auto loadIndices = loadOp.getIndices(); + if (!std::equal(storeIndices.begin(), storeIndices.end(), + loadIndices.begin(), loadIndices.end())) { + return; + } + loadOp.replaceAllUsesWith(storeOp.getValueToStore()); + loadOp.erase(); + }); + } +}; + +struct DeadTempBufferRemovalPass + : mlir::PassWrapper { + bool operationConsideredDead(mlir::Operation* op) { + for (auto result : op->getResults()) { + if (!llvm::all_of(result.getUsers(), [&](mlir::Operation* op) { + // Store and Dealloc is OK. + if (llvm::isa(op)) { + return true; + } + // Load without uses is also ok. + if (auto loadOp = llvm::dyn_cast(op)) { + return loadOp.use_empty(); + } + // Subview is ok if it is dead itself. + if (llvm::isa(op)) { + return operationConsideredDead(op); + } + return false; + })) { + return false; + } + } + return true; + } + + void recursiveErase(mlir::Operation* op, + llvm::SmallVectorImpl* erase_list) { + for (auto result : op->getResults()) { + for (auto user : llvm::make_early_inc_range(result.getUsers())) { + recursiveErase(user, erase_list); + } + } + erase_list->push_back(op); + } + + void runOnFunction() override { + llvm::SmallVector dead_ops; + getFunction().walk([&](mlir::AllocOp allocOp) { + if (!operationConsideredDead(allocOp)) { + return; + } + + // TODO(herhut): There should be a generic helper for this. + recursiveErase(allocOp, &dead_ops); + }); + for (auto op : dead_ops) { + op->erase(); + } + } +}; + +struct MoveScalarComputationsIntoGpuLaunchPass + : mlir::PassWrapper { + static bool isInliningBeneficiary(mlir::Operation* op) { + return llvm::isa(op); + } + + static bool extractBeneficiaryOps( + mlir::Operation* op, llvm::SmallVectorImpl* ops, + llvm::SetVector args) { + if (!isInliningBeneficiary(op)) { + return false; + } + + ops->push_back(op); + for (auto operand : op->getOperands()) { + // It is an existing arg, keep going. + if (args.count(operand)) { + continue; + } + mlir::Operation* definingOp = operand.getDefiningOp(); + if (!definingOp || !extractBeneficiaryOps(definingOp, ops, args)) { + return false; + } + } + return true; + } + + static void inlineOperationsIntoLaunch(mlir::gpu::LaunchOp launch) { + llvm::SetVector used_above; + mlir::getUsedValuesDefinedAbove(launch.body(), used_above); + mlir::BlockAndValueMapping inlined_map; + for (mlir::Value v : used_above) { + llvm::SmallVector ops_to_move; + mlir::Operation* definingOp = v.getDefiningOp(); + if (definingOp && + extractBeneficiaryOps(definingOp, &ops_to_move, used_above)) { + mlir::OpBuilder b(launch.body()); + for (mlir::Operation* op : llvm::reverse(ops_to_move)) { + auto result = b.clone(*op, inlined_map); + for (auto pair : llvm::zip(op->getResults(), result->getResults())) { + mlir::replaceAllUsesInRegionWith(std::get<0>(pair), + std::get<1>(pair), launch.body()); + } + inlined_map.map(op->getResults(), result->getResults()); + } + } + } + } + + void runOnFunction() override { + mlir::FuncOp fun = getFunction(); + fun.walk( + [](mlir::gpu::LaunchOp launch) { inlineOperationsIntoLaunch(launch); }); + } +}; + +struct RewriteKernelSignaturePass + : mlir::PassWrapper { + void runOnFunction() override { + mlir::FuncOp func = getFunction(); + mlir::ModuleOp module = func.getParentOfType(); + getFunction().walk([&](mlir::gpu::LaunchFuncOp launchOp) { + mlir::gpu::GPUFuncOp kernel = + module.lookupSymbol(launchOp.kernel()); + + if (kernel.getNumFuncArguments() != + func.getNumArguments() + func.getNumResults()) { + kernel.emitError() + << "number of kernel arguments does not match number" + << "of arguments and results of surrounding function"; + signalPassFailure(); + return; + } + if (!llvm::hasSingleElement(func)) { + func.emitError() << "surrounding function has more than one block"; + signalPassFailure(); + return; + } + + // Compute a map from function arguments to kernel function operands. + mlir::BlockAndValueMapping func_to_kernel; + for (mlir::BlockArgument arg : func.getArguments()) { + for (int i = 0, e = launchOp.getNumKernelOperands(); i < e; ++i) { + if (launchOp.getKernelOperand(i) == arg) { + func_to_kernel.map(arg, kernel.getArgument(i)); + break; + } + } + } + // Also add function results that are computed by the launch. + mlir::Operation* returnOp = func.getBody().back().getTerminator(); + for (mlir::Value result : returnOp->getOperands()) { + for (int i = 0, e = launchOp.getNumKernelOperands(); i < e; ++i) { + if (launchOp.getKernelOperand(i) == result) { + func_to_kernel.map(result, kernel.getArgument(i)); + break; + } + } + } + + // Create a new kernel function with modified signature. It will have the + // parameters and result types of the original funcion as its parameter + // type and otherwise will be void. + auto gpu_module = kernel.getParentOfType(); + mlir::OpBuilder kernel_builder(gpu_module.body()); + auto operand_types = llvm::to_vector<4>(llvm::concat( + func.getType().getInputs(), func.getType().getResults())); + auto new_kernel = kernel_builder.create( + kernel.getLoc(), kernel.getName(), + kernel_builder.getFunctionType(operand_types, {})); + new_kernel.setAttr(mlir::gpu::GPUDialect::getKernelFuncAttrName(), + kernel_builder.getUnitAttr()); + + // Create a map from old kernel argument to new one. + mlir::BlockAndValueMapping old_kernel_to_new; + for (int i = 0, e = func.getNumArguments(); i < e; ++i) { + mlir::Value func_arg = func.getArgument(i); + mlir::Value new_kernel_arg = new_kernel.getArgument(i); + mlir::Value old_kernel_arg = func_to_kernel.lookupOrNull(func_arg); + if (!old_kernel_arg) { + kernel.emitOpError() + << "argument " << i + << " to containing function is not an argument to the kernel"; + signalPassFailure(); + return; + } + old_kernel_to_new.map(old_kernel_arg, new_kernel_arg); + } + for (int i = 0, e = returnOp->getNumOperands(); i < e; ++i) { + mlir::Value ret_op = returnOp->getOperand(i); + mlir::Value new_kernel_arg = + new_kernel.getArgument(func.getNumArguments() + i); + mlir::Value old_kernel_arg = func_to_kernel.lookupOrNull(ret_op); + if (!old_kernel_arg) { + kernel.emitOpError() + << "result " << i + << " of containing function is not an argument to the kernel"; + signalPassFailure(); + return; + } + old_kernel_to_new.map(old_kernel_arg, new_kernel_arg); + } + // Steal the body by appending the blocks and inserting a branch. + kernel.body().cloneInto(&new_kernel.getBody(), old_kernel_to_new); + kernel_builder.setInsertionPointToEnd(&new_kernel.body().front()); + kernel_builder.create( + new_kernel.getLoc(), &*std::next(new_kernel.body().begin())); + // Now create a new launchOp calling the new kernel. We need to forward + // the arguments of the surrounding function and operands to the return. + mlir::SmallVector new_operands; + new_operands.reserve(new_kernel.getNumFuncArguments()); + new_operands.append(func.args_begin(), func.args_end()); + new_operands.append(returnOp->operand_begin(), returnOp->operand_end()); + mlir::OpBuilder launch_builder(launchOp); + launch_builder.create( + launchOp.getLoc(), new_kernel, launchOp.getGridSizeOperandValues(), + launchOp.getBlockSizeOperandValues(), new_operands); + // Launch does not have results, so we can just erase it. And the kernel + // also needs to go. + launchOp.erase(); + kernel.erase(); + }); + } +}; + +struct MapParallelLoopsPass + : public mlir::PassWrapper { + void runOnFunction() override { + mlir::greedilyMapParallelSCFToGPU(getFunction().getBody()); + } +}; + +struct FuseInnerParallelLoopsPass + : public mlir::PassWrapper { + void runOnFunction() override { + getFunction().walk([](mlir::scf::ParallelOp op) { + mlir::scf::naivelyFuseParallelOps(op.region()); + }); + } +}; + +struct ParallelLoopCollapsingToFirstDimPass + : public mlir::PassWrapper> { + void runOnOperation() override { + mlir::Operation* module = getOperation(); + + module->walk([&](mlir::scf::ParallelOp op) { + unsigned num_loops = op.getNumLoops(); + std::vector combinedLoops; + combinedLoops.reserve(num_loops); + for (unsigned i = 0; i < num_loops; ++i) { + combinedLoops.push_back(i); + } + mlir::collapseParallelLoops(op, {combinedLoops}); + }); + } +}; + +} // namespace + +std::unique_ptr createFusionOpRemoverPass() { + return absl::make_unique(); +} + +std::unique_ptr createStoreForwardingPass() { + return absl::make_unique(); +} + +std::unique_ptr createDeadTempBufferRemovalPass() { + return absl::make_unique(); +} + +std::unique_ptr +createMoveScalarComputationsIntoGpuLaunchPass() { + return absl::make_unique(); +} + +std::unique_ptr createRewriteKernelSignaturePass() { + return absl::make_unique(); +} + +std::unique_ptr createFuseInnerParallelLoopsPass() { + return absl::make_unique(); +} + +std::unique_ptr createMapParallelLoopsPass() { + return absl::make_unique(); +} + +std::unique_ptr> +createParallelLoopCollapsingToFirstDimPass() { + return absl::make_unique(); +} + +} // namespace mlir_gpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/mlir_gpu/passes.h b/tensorflow/compiler/xla/service/mlir_gpu/passes.h new file mode 100644 index 00000000000..e3840628a2e --- /dev/null +++ b/tensorflow/compiler/xla/service/mlir_gpu/passes.h @@ -0,0 +1,66 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_PASSES_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_PASSES_H_ + +#include + +#include "mlir/Pass/Pass.h" // from @llvm-project + +namespace xla { +namespace mlir_gpu { + +// TODO(herhut, pifon): Move these passes to MLIR Core. + +/// Replaces a FusionOp by the operations contained in its region. +std::unique_ptr createFusionOpRemoverPass(); + +/// Replaces a load that immediately follows a store to the same address with +/// the stored value. This needs generalization. +std::unique_ptr createStoreForwardingPass(); + +/// Removes temporary buffers that are only written to but never read from or +/// that are read but the read value is not used. Needs an analysis that proves +/// that loads and stores are side-effect free (in bounds, no aliasing, etc.). +std::unique_ptr createDeadTempBufferRemovalPass(); + +/// Moves scalar computations to the GPULaunchOp body. +std::unique_ptr +createMoveScalarComputationsIntoGpuLaunchPass(); + +/// Sorts the operands to the kernel for a deterministic order. First operands +/// that are defined by function arguments, followed by operands that are +/// returned from the function. This only works for simple functions without +/// control flow and can be used in cases where the kernel is extracted and used +/// independently of the host-side code. +std::unique_ptr createRewriteKernelSignaturePass(); + +/// We need to direct fusion to the inner loops. This cannot be done with +/// a passmanager alone ATM, as nested pass managers require operations to +/// be closed from above. +std::unique_ptr createFuseInnerParallelLoopsPass(); + +/// Greedily maps loops to GPU hardware dimensions. +std::unique_ptr createMapParallelLoopsPass(); + +/// Collapses all loop dimension into the first one. +std::unique_ptr> +createParallelLoopCollapsingToFirstDimPass(); + +} // namespace mlir_gpu +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_MLIR_GPU_PASSES_H_ From d9f7377784a85bc619e66a74138f9d8fe300446a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 11 Aug 2020 05:38:25 -0700 Subject: [PATCH 2502/2522] Integrate LLVM at llvm/llvm-project@b2b7dbb47aa9 Updates LLVM usage to match [b2b7dbb47aa9](https://github.com/llvm/llvm-project/commit/b2b7dbb47aa9) PiperOrigin-RevId: 326003266 Change-Id: Id69725d2af6a92b2f55046438a6ec8e7c025581e --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 6493fe87836..7552eb2dd5a 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -712,8 +712,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "0de60b550b727fa3a0202a9ab5ca30520e291dd5" - LLVM_SHA256 = "d23a64cca502c32aa3990b5252f19cb2ad59534084384df6d7a355e6f23fac62" + LLVM_COMMIT = "b2b7dbb47aa9aff1252d4440bb9986df5a7e67cb" + LLVM_SHA256 = "1e26635eb360b81f75304d172a6eea50ea5a55a42fd21a35b973321b32df69e9" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 4507fa1347f0671c50123ba2fa0a100776562efe Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Tue, 11 Aug 2020 05:48:36 -0700 Subject: [PATCH 2503/2522] Fix bug in occurring handling when composite symbols enter a undefined into a TF loop. PiperOrigin-RevId: 326004271 Change-Id: I1a8641ca4d71bd9396fd2b5d73dc49c0c8fb9612 --- .../autograph/operators/control_flow.py | 8 ++++-- .../autograph/operators/control_flow_test.py | 28 +++++++++++++++++++ tensorflow/python/autograph/utils/testing.py | 3 ++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/autograph/operators/control_flow.py b/tensorflow/python/autograph/operators/control_flow.py index 7b307ed5020..ef9c3ae6427 100644 --- a/tensorflow/python/autograph/operators/control_flow.py +++ b/tensorflow/python/autograph/operators/control_flow.py @@ -934,7 +934,7 @@ def _shape_invariants_mapping_to_positional_list(mapping, keys): LEGAL_LOOP_TYPES = 'Tensor, int, float, bool or a list, tuple or dict thereof' -def _placeholder_value(like, original): +def _placeholder_value(like, original=None): if isinstance(like, (variables.Undefined, variables.UndefinedReturnValue)): return original if isinstance(like, (int, float, bool)): @@ -1079,8 +1079,10 @@ def _tf_while_stmt(test, body, get_state, set_state, symbol_names, opts): _runtime_zero_iterations_errmsg(symbol_names, nulls, orig_init_vars) ]) ]): - final_loop_vars = tuple( - array_ops.identity(v) for v in final_loop_vars[1:]) + final_loop_vars = nest.map_structure( + lambda v: (array_ops.identity(v) if tensor_util.is_tensor(v) else v), + final_loop_vars[1:], + ) set_state(final_loop_vars) diff --git a/tensorflow/python/autograph/operators/control_flow_test.py b/tensorflow/python/autograph/operators/control_flow_test.py index 553643956f6..32b36a29797 100644 --- a/tensorflow/python/autograph/operators/control_flow_test.py +++ b/tensorflow/python/autograph/operators/control_flow_test.py @@ -626,6 +626,34 @@ class WhileLoopTest(testing.AutoGraphTestCase): # Node naming is inconsistent between V1 and V2. self.assertGraphContains(r'(while/)?pow$', 1) + def test_tensor_creating_complex_variable(self): + + def body(): + nonlocal i, s + i = {'a': constant_op.constant(2), 'b': {'c': constant_op.constant(1)}} + s = i['a'] ** 5 + + def set_state(loop_vars): + nonlocal i, s + i, s = loop_vars + + i = variable_operators.Undefined('i') + s = constant_op.constant(0) + control_flow.while_stmt( + test=lambda: math_ops.equal(s, 0), + body=body, + get_state=lambda: (i, s), + set_state=set_state, + symbol_names=('i', 's'), + opts={}) + + self.assertDictEqual(i, {'a': 2, 'b': {'c': 1}}) + self.assertEqual(s, 32) + self.assertOpCreated('StatelessWhile') + # Check that the temporary staging of the body did not create extra ops. + # Node naming is inconsistent between V1 and V2. + self.assertGraphContains(r'(while/)?pow$', 1) + def test_tensor_with_side_effecting_condition(self): v = self.variable('v', 0, dtypes.int32) diff --git a/tensorflow/python/autograph/utils/testing.py b/tensorflow/python/autograph/utils/testing.py index df60583bf85..bec6966e7cb 100644 --- a/tensorflow/python/autograph/utils/testing.py +++ b/tensorflow/python/autograph/utils/testing.py @@ -142,3 +142,6 @@ class AutoGraphTestCase(test.TestCase): def assertEqual(self, *args): self.assertions.append((super().assertEqual, list(args))) + + def assertDictEqual(self, *args): + self.assertions.append((super().assertDictEqual, list(args))) From 7fd83a73f00efda181def4bee99e07413038dbe5 Mon Sep 17 00:00:00 2001 From: Pankaj Kanwar Date: Tue, 11 Aug 2020 08:06:11 -0700 Subject: [PATCH 2504/2522] update cuda paths for cuda11 PiperOrigin-RevId: 326022147 Change-Id: I7e0dff924c3d4ad5e3b50cdcd6551572ca93de93 --- .../gcc7_manylinux2010-nvcc-cuda11/BUILD | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda11/BUILD b/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda11/BUILD index 92305526c5c..358af09fbdd 100755 --- a/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda11/BUILD +++ b/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda11/BUILD @@ -65,9 +65,9 @@ cc_toolchain_config( "/dt7/usr/lib/gcc/x86_64-pc-linux-gnu/7/include", "/dt7/usr/lib/gcc/x86_64-pc-linux-gnu/7/include-fixed", "/dt7/usr/include", - "/usr/local/cuda11/targets/x86_64-linux/include", - "/usr/local/cuda11/include", - "/usr/local/cuda11/extras/CUPTI/include", + "/usr/local/cuda-11.0/targets/x86_64-linux/include", + "/usr/local/cuda-11.0/include", + "/usr/local/cuda-11.0/extras/CUPTI/include", "/usr/include", ], builtin_sysroot = "", @@ -105,9 +105,9 @@ cc_toolchain_config( "/dt7/usr/lib/gcc/x86_64-pc-linux-gnu/7/include", "/dt7/usr/lib/gcc/x86_64-pc-linux-gnu/7/include-fixed", "/dt7/usr/include", - "/usr/local/cuda-11/targets/x86_64-linux/include", - "/usr/local/cuda-11/include", - "/usr/local/cuda-11/extras/CUPTI/include", + "/usr/local/cuda-11.0/targets/x86_64-linux/include", + "/usr/local/cuda-11.0/include", + "/usr/local/cuda-11.0/extras/CUPTI/include", "/usr/include", ], cpu = "darwin", @@ -143,9 +143,9 @@ cc_toolchain_config( "/dt7/usr/lib/gcc/x86_64-pc-linux-gnu/7/include", "/dt7/usr/lib/gcc/x86_64-pc-linux-gnu/7/include-fixed", "/dt7/usr/include", - "/usr/local/cuda-11/targets/x86_64-linux/include", - "/usr/local/cuda-11/include", - "/usr/local/cuda-11/extras/CUPTI/include", + "/usr/local/cuda-11.0/targets/x86_64-linux/include", + "/usr/local/cuda-11.0/include", + "/usr/local/cuda-11.0/extras/CUPTI/include", "/usr/include", ], cpu = "x64_windows", From 0dc091cb46ad7778fbe19fd7519eb41cadba777d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 11 Aug 2020 08:47:31 -0700 Subject: [PATCH 2505/2522] Integrate LLVM at llvm/llvm-project@950f1bf976b3 Updates LLVM usage to match [950f1bf976b3](https://github.com/llvm/llvm-project/commit/950f1bf976b3) PiperOrigin-RevId: 326028888 Change-Id: Ia11d7606673f92f9de38703cc961ed8bc3416e75 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 7552eb2dd5a..95b9c91a62f 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -712,8 +712,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "b2b7dbb47aa9aff1252d4440bb9986df5a7e67cb" - LLVM_SHA256 = "1e26635eb360b81f75304d172a6eea50ea5a55a42fd21a35b973321b32df69e9" + LLVM_COMMIT = "950f1bf976b332eca60267b25bf759e2ad564e0c" + LLVM_SHA256 = "89b0e1e5d0cd56adfbe061fc42804088eaed6773e8ff9f1d597137b474055096" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 289c617c2ce0f5961da742ba4ad3e0ea23cbdb94 Mon Sep 17 00:00:00 2001 From: Michael Gester Date: Tue, 11 Aug 2020 09:16:38 -0700 Subject: [PATCH 2506/2522] Change benefit of TF2XLA fallback pattern to 0 This pattern should only be used as a last resort so we use the minimum benefit for it. PiperOrigin-RevId: 326034560 Change-Id: I1e11cae4807aad82dc0919ea32cac7eb1a59cca2 --- tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc | 6 +++--- .../compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc index 1f63f2a9396..5fe933ee635 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc @@ -5793,15 +5793,15 @@ LogicalResult legalizeTF( // Add TF->HLO legalization patterns. PopulateLegalizeTfPatterns(context, &patterns); + // Add TF->TF lowering patterns. + TF::PopulateLoweringTFPatterns(context, &patterns); + // Add TF->HLO legalization patterns via TF2XLA fallback. if (tf2xla_fallback_device_type.hasValue()) { PopulateLegalizeTfWithTf2XlaPatterns(tf2xla_fallback_device_type.getValue(), patterns); } - // Add TF->TF lowering patterns. - TF::PopulateLoweringTFPatterns(context, &patterns); - // Populate with CHLO->HLO lowerings to account for TF ops legalized to // CHLO first. if (legalize_chlo) { diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc index 6e651df5075..904b80e05b1 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc @@ -499,8 +499,10 @@ tensorflow::XlaExpression Tf2XlaRewriter::GetExprForOperand(Value operand, class Tf2XlaRewritePattern : public RewritePattern { public: + // Set benefit to 0 (= least benefit) so this pattern is only used as a + // fallback. explicit Tf2XlaRewritePattern(const std::string& device_type) - : RewritePattern(1, MatchAnyOpTypeTag()), device_type_(device_type) {} + : RewritePattern(0, MatchAnyOpTypeTag()), device_type_(device_type) {} LogicalResult matchAndRewrite(Operation* op, PatternRewriter& rewriter) const override { From ee2c2d17814c015477041dcafed0c9c7f1f00162 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Tue, 11 Aug 2020 09:32:56 -0700 Subject: [PATCH 2507/2522] Virtual Tune splitted to Virtual GetPossibleKernelWorkGroups(API neutral) and non-virtual generic Tune(API specific). PiperOrigin-RevId: 326037556 Change-Id: Ie330fe2b17316046c3a4cbb7d509d8cb6ab42fd4 --- .../lite/delegates/gpu/cl/kernels/BUILD | 2 + .../delegates/gpu/cl/kernels/concat_test.cc | 4 +- .../lite/delegates/gpu/cl/kernels/conv_3d.cc | 16 +- .../lite/delegates/gpu/cl/kernels/conv_3d.h | 5 +- .../gpu/cl/kernels/conv_buffer_1x1.cc | 10 +- .../gpu/cl/kernels/conv_buffer_1x1.h | 5 +- .../delegates/gpu/cl/kernels/conv_powervr.cc | 15 +- .../delegates/gpu/cl/kernels/conv_powervr.h | 5 +- .../delegates/gpu/cl/kernels/conv_texture.cc | 8 +- .../delegates/gpu/cl/kernels/conv_texture.h | 5 +- .../gpu/cl/kernels/convolution_transposed.cc | 8 +- .../gpu/cl/kernels/convolution_transposed.h | 5 +- .../cl/kernels/convolution_transposed_3d.cc | 12 +- .../cl/kernels/convolution_transposed_3d.h | 5 +- .../cl/kernels/convolution_transposed_3x3.h | 7 +- .../cl/kernels/convolution_transposed_4x4.h | 7 +- .../gpu/cl/kernels/depthwise_conv_3x3.cc | 11 +- .../gpu/cl/kernels/depthwise_conv_3x3.h | 5 +- .../gpu/cl/kernels/fully_connected.h | 7 +- .../delegates/gpu/cl/kernels/gpu_operation.cc | 29 ++++ .../delegates/gpu/cl/kernels/gpu_operation.h | 9 +- .../delegates/gpu/cl/kernels/lstm_test.cc | 2 +- .../lite/delegates/gpu/cl/kernels/mean.h | 7 +- .../cl/kernels/mean_stddev_normalization.h | 7 +- .../delegates/gpu/cl/kernels/mean_test.cc | 2 +- .../delegates/gpu/cl/kernels/softmax1x1.h | 7 +- .../lite/delegates/gpu/cl/kernels/winograd.cc | 39 ++--- .../lite/delegates/gpu/cl/kernels/winograd.h | 14 +- .../gpu/cl/kernels/work_group_picking.cc | 140 +++++++++--------- .../gpu/cl/kernels/work_group_picking.h | 40 ++--- 30 files changed, 267 insertions(+), 171 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD index 27e12b5981f..c8351304188 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD @@ -1388,6 +1388,8 @@ test_suite( "fully_connected_test", "lstm_test", "max_unpooling_test", + "mean_stddev_normalization_test", + "mean_test", "padding_test", "pooling_test", "prelu_test", diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/concat_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/concat_test.cc index b2e2e23b6f9..d6889af7717 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_test.cc @@ -118,7 +118,7 @@ TEST_F(OpenCLOperationTest, ConcatChannels) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; ConcatZ operation = - CreateConcatZ(op_def, {1, 2, 3}, env_.GetDevicePtr()->GetInfo()); + CreateConcatZ(op_def, {1, 2, 3}, env_.GetDevicePtr()->info_); ASSERT_OK(ExecuteGPUOperation({src0, src1, src2}, creation_context_, &operation, BHWC(1, 2, 1, 6), &dst_tensor)); EXPECT_THAT(dst_tensor.data, @@ -152,7 +152,7 @@ TEST_F(OpenCLOperationTest, ConcatChannelsAlignedx4) { op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; ConcatZ operation = - CreateConcatZ(op_def, {4, 4}, env_.GetDevicePtr()->GetInfo()); + CreateConcatZ(op_def, {4, 4}, env_.GetDevicePtr()->info_); ASSERT_OK(ExecuteGPUOperation({src0, src1}, creation_context_, &operation, BHWC(1, 2, 1, 8), &dst_tensor)); EXPECT_THAT( diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc index 727cd488694..4b898378c2d 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc @@ -245,21 +245,25 @@ int3 Conv3D::GetGridSize() const { wg[conv_params_.work_group_launch_order[2]] * work_group_size_.z); } -absl::Status Conv3D::Tune(const TuningParameters& params) { +void Conv3D::GetPossibleKernelWorkGroups(TuningType tuning_type, + const DeviceInfo& device_info, + const KernelInfo& kernel_info, + std::vector* work_groups) const { if (conv_params_.weights_upload_type == WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP || conv_params_.weights_upload_type == WeightsUploadType::LOCAL_MEM_BY_THREADS) { - return absl::OkStatus(); + work_groups->push_back(work_group_size_); + return; } if (conv_params_.work_group_launch_order[0] == 0 && conv_params_.work_group_launch_order[1] == 1 && conv_params_.work_group_launch_order[2] == 2) { - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); - RETURN_IF_ERROR( - GetBestWorkGroupConv(params, kernel_, grid_size_, &work_group_size_)); + GetPossibleWorkGroupsConv(tuning_type, device_info, kernel_info, grid_size_, + work_groups); + } else { + work_groups->push_back(work_group_size_); } - return absl::OkStatus(); } std::string Conv3D::GenerateConv3D(const OperationDef& op_def, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h index ffa269d1629..e53c9c8a6d0 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h @@ -39,7 +39,10 @@ namespace cl { class Conv3D : public GPUOperation { public: Conv3D() = default; - absl::Status Tune(const TuningParameters& params) override; + void GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, + std::vector* work_groups) const override; absl::Status BindArguments() override; int3 GetGridSize() const override; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc index 38e04e221f2..e75fe02df7a 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc @@ -315,11 +315,11 @@ int3 ConvBuffer1x1::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status ConvBuffer1x1::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); - RETURN_IF_ERROR( - GetBestWorkGroupConv(params, kernel_, grid_size_, &work_group_size_)); - return absl::OkStatus(); +void ConvBuffer1x1::GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, std::vector* work_groups) const { + GetPossibleWorkGroupsConv(tuning_type, device_info, kernel_info, grid_size_, + work_groups); } bool IsConvBuffer1x1Supported(const OperationDef& definition, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h index 94b7cbd1b37..530aec70a17 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h @@ -47,7 +47,10 @@ class ConvBuffer1x1 : public GPUOperation { ConvBuffer1x1(const ConvBuffer1x1&) = delete; ConvBuffer1x1& operator=(const ConvBuffer1x1&) = delete; - absl::Status Tune(const TuningParameters& params) override; + void GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, + std::vector* work_groups) const override; int3 GetGridSize() const override; ConvWeightsDescription GetConvWeightsDescription() const { diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc index f04102d25d6..eb5baa8a6ba 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc @@ -240,22 +240,25 @@ int3 ConvPowerVR::GetGridSize() const { } } -absl::Status ConvPowerVR::Tune(const TuningParameters& params) { +void ConvPowerVR::GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, std::vector* work_groups) const { if (conv_params_.weights_upload_type == WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP || conv_params_.weights_upload_type == WeightsUploadType::LOCAL_MEM_BY_THREADS || conv_params_.fixed_work_group_size) { - return absl::OkStatus(); + work_groups->push_back(work_group_size_); + return; } if (conv_params_.work_group_launch_order[0] == 0 && conv_params_.work_group_launch_order[1] == 1 && conv_params_.work_group_launch_order[2] == 2) { - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); - RETURN_IF_ERROR( - GetBestWorkGroupConv(params, kernel_, grid_size_, &work_group_size_)); + GetPossibleWorkGroupsConv(tuning_type, device_info, kernel_info, grid_size_, + work_groups); + } else { + work_groups->push_back(work_group_size_); } - return absl::OkStatus(); } std::string ConvPowerVR::GenerateConv(const DeviceInfo& device_info, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h index e61d4c14ce7..1ff6db43cbc 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h @@ -41,7 +41,10 @@ namespace cl { class ConvPowerVR : public GPUOperation { public: ConvPowerVR() = default; - absl::Status Tune(const TuningParameters& params) override; + void GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, + std::vector* work_groups) const override; absl::Status BindArguments() override; int3 GetGridSize() const override; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc index 581c8056ced..7f987cc724c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc @@ -420,9 +420,11 @@ int3 ConvTexture::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status ConvTexture::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); - return GetBestWorkGroupConv(params, kernel_, grid_size_, &work_group_size_); +void ConvTexture::GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, std::vector* work_groups) const { + GetPossibleWorkGroupsConv(tuning_type, device_info, kernel_info, grid_size_, + work_groups); } absl::Status CreateConvTexture(const CreationContext& creation_context, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h index 10efc23a044..8406918fe80 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h @@ -42,7 +42,10 @@ namespace cl { class ConvTexture : public GPUOperation { public: ConvTexture() = default; - absl::Status Tune(const TuningParameters& params) override; + void GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, + std::vector* work_groups) const override; absl::Status BindArguments() override; int3 GetGridSize() const override; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc index c6eba691306..314d0b20499 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc @@ -351,9 +351,11 @@ int3 ConvolutionTransposed::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status ConvolutionTransposed::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); - return GetBestWorkGroupConv(params, kernel_, grid_size_, &work_group_size_); +void ConvolutionTransposed::GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, std::vector* work_groups) const { + GetPossibleWorkGroupsConv(tuning_type, device_info, kernel_info, grid_size_, + work_groups); } absl::Status CreateConvolutionTransposed( diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h index 44e1c942925..9f865f8f0b7 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h @@ -38,7 +38,10 @@ namespace cl { class ConvolutionTransposed : public GPUOperation { public: ConvolutionTransposed() = default; - absl::Status Tune(const TuningParameters& params) override; + void GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, + std::vector* work_groups) const override; absl::Status BindArguments() override; int3 GetGridSize() const override; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc index eeb3ae15e51..2b35080b1ab 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc @@ -394,17 +394,19 @@ int3 ConvolutionTransposed3D::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status ConvolutionTransposed3D::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); - return GetBestWorkGroupConv(params, kernel_, grid_size_, &work_group_size_); +void ConvolutionTransposed3D::GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, std::vector* work_groups) const { + GetPossibleWorkGroupsConv(tuning_type, device_info, kernel_info, grid_size_, + work_groups); } absl::Status CreateConvolutionTransposed3D( const CreationContext& creation_context, const OperationDef& definition, const ConvolutionTransposed3DAttributes& attr, ConvolutionTransposed3D* result) { - *result = ConvolutionTransposed3D(definition, attr, - creation_context.device->GetInfo()); + *result = + ConvolutionTransposed3D(definition, attr, creation_context.device->info_); RETURN_IF_ERROR( result->UploadWeights(attr.weights, creation_context.context)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h index 0025d9da7b6..919181bceab 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h @@ -38,7 +38,10 @@ namespace cl { class ConvolutionTransposed3D : public GPUOperation { public: ConvolutionTransposed3D() = default; - absl::Status Tune(const TuningParameters& params) override; + void GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, + std::vector* work_groups) const override; absl::Status BindArguments() override; int3 GetGridSize() const override; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h index 9addfe11984..0f4022b6eb6 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h @@ -37,8 +37,11 @@ namespace cl { class ConvolutionTransposed3x3 : public GPUOperation { public: ConvolutionTransposed3x3() = default; - absl::Status Tune(const TuningParameters& params) override { - return absl::OkStatus(); + void GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, + std::vector* work_groups) const override { + work_groups->push_back(work_group_size_); } absl::Status BindArguments() override; int3 GetGridSize() const override; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h index 21ec8c3e293..6344ca39bc0 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h @@ -37,8 +37,11 @@ namespace cl { class ConvolutionTransposed4x4 : public GPUOperation { public: ConvolutionTransposed4x4() = default; - absl::Status Tune(const TuningParameters& params) override { - return absl::OkStatus(); + void GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, + std::vector* work_groups) const override { + work_groups->push_back(work_group_size_); } absl::Status BindArguments() override; int3 GetGridSize() const override; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc index f0213cda805..bb1b409482f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc @@ -301,12 +301,15 @@ int3 DepthwiseConv3x3::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status DepthwiseConv3x3::Tune(const TuningParameters& params) { +void DepthwiseConv3x3::GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, std::vector* work_groups) const { if (local_mem_uploads_) { - return absl::OkStatus(); + work_groups->push_back(work_group_size_); + } else { + GetPossibleWorkGroups(tuning_type, device_info, kernel_info, grid_size_, + work_groups); } - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); - return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); } bool IsDepthwiseConv3x3Supported(const DepthwiseConvolution2DAttributes& attr) { diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h index dedc9b530bb..b324b039f2b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h @@ -38,7 +38,10 @@ namespace cl { class DepthwiseConv3x3 : public GPUOperation { public: DepthwiseConv3x3() = default; - absl::Status Tune(const TuningParameters& params) override; + void GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, + std::vector* work_groups) const override; absl::Status BindArguments() override; int3 GetGridSize() const override; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h index ced3913ead7..8543c3defc0 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h @@ -89,8 +89,11 @@ void RearrangeFCWeightsToIOO4I4(const tflite::gpu::Tensor& weights, class FullyConnected : public GPUOperation { public: FullyConnected() = default; - absl::Status Tune(const TuningParameters& params) override { - return absl::OkStatus(); + void GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, + std::vector* work_groups) const override { + work_groups->push_back(work_group_size_); } int3 GetGridSize() const override; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc index 0aa1842791f..29f6c038f77 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc @@ -247,6 +247,35 @@ absl::Status GPUOperation::Compile(const CreationContext& creation_context) { return PostCompileCheck(creation_context.device->info_, kernel_.info_); } +void GPUOperation::GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, std::vector* work_groups) const { + GetPossibleWorkGroups(tuning_type, device_info, kernel_info, grid_size_, + work_groups); +} + +absl::Status GPUOperation::Tune(const TuningParameters& params) { + std::vector possible_work_groups; + GetPossibleKernelWorkGroups(params.tuning_type, *params.info, kernel_.info_, + &possible_work_groups); + if (possible_work_groups.empty()) { + return absl::NotFoundError( + "Can not found work_group size to launch kernel"); + } + if (possible_work_groups.size() == 1) { + work_group_size_ = possible_work_groups[0]; + return absl::OkStatus(); + } else { + RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); + int best_work_group_index; + RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex( + kernel_, *params.info, grid_size_, possible_work_groups, + &best_work_group_index)); + work_group_size_ = possible_work_groups[best_work_group_index]; + return absl::OkStatus(); + } +} + int3 GPUOperation::GetGridSize() const { if (elementwise_) { const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h index ba266f8dcc9..80f2eb3c950 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h @@ -93,10 +93,11 @@ class GPUOperation { return queue->DispatchImplicit(kernel_, grid_size_, work_group_size_); } - virtual absl::Status Tune(const TuningParameters& params) { - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); - return GetBestWorkGroup(params, kernel_, grid_size_, &work_group_size_); - } + virtual void GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, std::vector* work_groups) const; + + absl::Status Tune(const TuningParameters& params); absl::Status Compile(const CreationContext& creation_context); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/lstm_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/lstm_test.cc index d7ea3ee6474..52e9b4cba4c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/lstm_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/lstm_test.cc @@ -67,7 +67,7 @@ TEST_F(OpenCLOperationTest, LSTM) { op_def.dst_tensors.push_back({data_type, storage, Layout::BHWC}); TensorFloat32 new_state; TensorFloat32 new_activ; - LSTM operation = CreateLSTM(op_def, env_.GetDevicePtr()->GetInfo()); + LSTM operation = CreateLSTM(op_def, env_.GetDevicePtr()->info_); ASSERT_OK(ExecuteGPUOperation( {src_tensor, prev_state}, creation_context_, &operation, {BHWC(1, 1, 1, 4), BHWC(1, 1, 1, 4)}, {&new_state, &new_activ})); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean.h b/tensorflow/lite/delegates/gpu/cl/kernels/mean.h index cfdd7be53d3..12735c0b916 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean.h @@ -31,8 +31,11 @@ class Mean : public GPUOperation { Mean() = default; Mean(const OperationDef& definition, const DeviceInfo& device_info); - absl::Status Tune(const TuningParameters& params) override { - return absl::OkStatus(); + void GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, + std::vector* work_groups) const override { + work_groups->push_back(work_group_size_); } absl::Status BindArguments() override; int3 GetGridSize() const override; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h index 7dd45fcb86a..47cc7ff46d1 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean_stddev_normalization.h @@ -30,8 +30,11 @@ class MeanStdDevNormalization : public GPUOperation { public: explicit MeanStdDevNormalization(const OperationDef& definition); - absl::Status Tune(const TuningParameters& params) override { - return absl::OkStatus(); + void GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, + std::vector* work_groups) const override { + work_groups->push_back(work_group_size_); } int3 GetGridSize() const override; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/mean_test.cc index dbb70127317..b1ae1d354eb 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean_test.cc @@ -47,7 +47,7 @@ TEST_F(OpenCLOperationTest, Mean) { op_def.src_tensors.push_back({data_type, storage, Layout::HWC}); op_def.dst_tensors.push_back({data_type, storage, Layout::HWC}); TensorFloat32 dst_tensor; - Mean operation = CreateMean(op_def, env_.GetDevicePtr()->GetInfo()); + Mean operation = CreateMean(op_def, env_.GetDevicePtr()->info_); ASSERT_OK(ExecuteGPUOperation(src_tensor, creation_context_, &operation, BHWC(1, 1, 1, 1), &dst_tensor)); EXPECT_THAT(dst_tensor.data, Pointwise(FloatNear(eps), {2.5f})); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h index 42cbbabe799..5bc9278d612 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h @@ -29,8 +29,11 @@ class Softmax1x1 : public GPUOperation { public: Softmax1x1() = default; explicit Softmax1x1(const OperationDef& definition); - absl::Status Tune(const TuningParameters& params) override { - return absl::OkStatus(); + void GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, + std::vector* work_groups) const override { + work_groups->push_back(work_group_size_); } absl::Status BindArguments() override; int3 GetGridSize() const override; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc index c77c805a712..3af4c658ce2 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc @@ -259,11 +259,11 @@ absl::Status Winograd4x4To36::UploadBt(CLContext* context) { return absl::OkStatus(); } -int3 Winograd4x4To36::SelectBestWorkGroup() { +int3 Winograd4x4To36::SelectBestWorkGroup(const KernelInfo& kernel_info) const { const std::vector wgs = {{8, 6, 4}, {8, 6, 2}, {4, 6, 2}, {4, 6, 2}, {2, 6, 2}, {2, 6, 1}, {1, 6, 1}, {1, 3, 1}, {1, 1, 1}}; - return GetFirstSuitableWorkGroup(wgs, kernel_.info_.max_work_group_size); + return GetFirstSuitableWorkGroup(wgs, kernel_info.max_work_group_size); } absl::Status Winograd4x4To36::BindArguments() { @@ -286,15 +286,18 @@ int3 Winograd4x4To36::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status Winograd4x4To36::Tune(const TuningParameters& params) { - switch (params.tuning_type) { +void Winograd4x4To36::GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, std::vector* work_groups) const { + switch (tuning_type) { case TuningType::EXHAUSTIVE: - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); - return GetBestWorkGroup(params, kernel_, grid_size_, &work_group_size_); + GetPossibleWorkGroups(tuning_type, device_info, kernel_info, grid_size_, + work_groups); + return; case TuningType::FAST: default: - work_group_size_ = SelectBestWorkGroup(); - return absl::OkStatus(); + work_groups->push_back(SelectBestWorkGroup(kernel_info)); + return; } } @@ -461,11 +464,11 @@ absl::Status Winograd36To4x4::UploadAt(CLContext* context) { return absl::OkStatus(); } -int3 Winograd36To4x4::SelectBestWorkGroup() { +int3 Winograd36To4x4::SelectBestWorkGroup(const KernelInfo& kernel_info) const { const std::vector wgs = {{32, 4, 2}, {16, 4, 2}, {16, 4, 1}, {8, 4, 1}, {4, 4, 1}, {2, 4, 1}, {1, 4, 1}, {1, 2, 1}, {1, 1, 1}}; - return GetFirstSuitableWorkGroup(wgs, kernel_.info_.max_work_group_size); + return GetFirstSuitableWorkGroup(wgs, kernel_info.max_work_group_size); } absl::Status Winograd36To4x4::BindArguments() { @@ -485,19 +488,21 @@ int3 Winograd36To4x4::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status Winograd36To4x4::Tune(const TuningParameters& params) { - switch (params.tuning_type) { +void Winograd36To4x4::GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, std::vector* work_groups) const { + switch (tuning_type) { case TuningType::EXHAUSTIVE: - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); - return GetBestWorkGroup(params, kernel_, grid_size_, &work_group_size_); + GetPossibleWorkGroups(tuning_type, device_info, kernel_info, grid_size_, + work_groups); + return; case TuningType::FAST: default: - work_group_size_ = SelectBestWorkGroup(); - return absl::OkStatus(); + work_groups->push_back(SelectBestWorkGroup(kernel_info)); + return; } } - absl::Status CreateWinograd36To4x4( const CreationContext& creation_context, const OperationDef& definition, const tflite::gpu::Tensor& biases, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h index ddc1155e0b5..08153f1d8aa 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h @@ -38,7 +38,10 @@ class Winograd4x4To36 : public GPUOperation { const DeviceInfo& device_info); absl::Status BindArguments() override; int3 GetGridSize() const override; - absl::Status Tune(const TuningParameters& params) override; + void GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, + std::vector* work_groups) const override; // Move only Winograd4x4To36(Winograd4x4To36&& operation); @@ -56,7 +59,7 @@ class Winograd4x4To36 : public GPUOperation { std::string GetWinograd4x4To36Code(const OperationDef& op_def); // Must be called after kernel compilation - int3 SelectBestWorkGroup(); + int3 SelectBestWorkGroup(const KernelInfo& kernel_info) const; Padding2D padding_; }; @@ -73,7 +76,10 @@ class Winograd36To4x4 : public GPUOperation { const DeviceInfo& device_info); absl::Status BindArguments() override; int3 GetGridSize() const override; - absl::Status Tune(const TuningParameters& params) override; + void GetPossibleKernelWorkGroups( + TuningType tuning_type, const DeviceInfo& device_info, + const KernelInfo& kernel_info, + std::vector* work_groups) const override; // Move only Winograd36To4x4(Winograd36To4x4&& operation); @@ -92,7 +98,7 @@ class Winograd36To4x4 : public GPUOperation { std::string GetWinograd36To4x4Code(const OperationDef& op_def); // Must be called after kernel compilation - int3 SelectBestWorkGroup(); + int3 SelectBestWorkGroup(const KernelInfo& kernel_info) const; }; absl::Status CreateWinograd36To4x4( diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc index 9a1a24895bf..4c0cbc06985 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.cc @@ -33,9 +33,9 @@ std::vector Get2DWorkgroupsEqualTo128() { {8, 16}, {4, 32}, {2, 64}, {1, 128}}; } -std::vector GenerateWorkGroupSizesXY128( - int3 grid, const KernelInfo& kernel_info, - WorkGroupSizeAlignment z_alignment) { +std::vector GenerateWorkGroupSizesXYMultipleOf( + int multiplier, int3 grid, const KernelInfo& kernel_info, + const DeviceInfo& device_info, WorkGroupSizeAlignment z_alignment) { std::vector work_groups; work_groups.reserve(32); @@ -44,7 +44,7 @@ std::vector GenerateWorkGroupSizesXY128( for (int x = 1; x <= kernel_info.max_work_group_size; x *= 2) { for (int y = 1; y <= kernel_info.max_work_group_size; y *= 2) { int work_group_size_xy = x * y; - if (work_group_size_xy % 128 != 0 || + if (work_group_size_xy % multiplier != 0 || work_group_size_xy > kernel_info.max_work_group_size) { continue; } @@ -52,26 +52,38 @@ std::vector GenerateWorkGroupSizesXY128( if (work_group_size_xy * z > kernel_info.max_work_group_size) { continue; } - work_groups.push_back({x, y, z}); + if (x <= device_info.max_work_group_size_x && + y <= device_info.max_work_group_size_y && + z <= device_info.max_work_group_size_z) { + work_groups.push_back({x, y, z}); + } } } } return work_groups; } -std::vector GenerateWorkGroupSizesXY128Linear( - int3 grid, const KernelInfo& kernel_info, - WorkGroupSizeAlignment z_alignment) { +std::vector GenerateWorkGroupSizesXMultipleOf( + int multiplier, int3 grid, const KernelInfo& kernel_info, + const DeviceInfo& device_info, WorkGroupSizeAlignment z_alignment) { std::vector work_groups; work_groups.reserve(32); std::vector possible_z_sizes = GetPossibleSizes(grid.z, z_alignment); + std::vector possible_y_sizes = + GetPossibleSizes(grid.y, WorkGroupSizeAlignment::PRECISE); - for (int x = 128; x <= kernel_info.max_work_group_size && x < grid.x + 128; - x += 128) { - for (auto z : possible_z_sizes) { - if (x * z <= kernel_info.max_work_group_size) { - work_groups.push_back({x, 1, z}); + for (int x = multiplier; + x <= kernel_info.max_work_group_size && x < grid.x + multiplier; + x += multiplier) { + for (auto y : possible_y_sizes) { + for (auto z : possible_z_sizes) { + if (x <= device_info.max_work_group_size_x && + y <= device_info.max_work_group_size_y && + z <= device_info.max_work_group_size_z && + x * y * z <= kernel_info.max_work_group_size) { + work_groups.push_back({x, y, z}); + } } } } @@ -202,31 +214,24 @@ int3 GetWorkGroupConv(const int3& grid, int max_size, int max_z_size) { return int3(wg_x, wg_y, wg_z); } -absl::Status GetBestWorkGroupXY128(const TuningParameters& params, - const CLKernel& kernel, const int3& grid, - WorkGroupSizeAlignment z_alignment, - int3* best_work_group) { - std::vector work_groups = - GenerateWorkGroupSizesXY128(grid, kernel.info_, z_alignment); - int best_work_group_index; - RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex( - kernel, *params.info, grid, work_groups, &best_work_group_index)); - *best_work_group = work_groups[best_work_group_index]; - return absl::OkStatus(); +void GetPossibleWorkGroupsXYMultipleOf(int multiplier, + const DeviceInfo& device_info, + const KernelInfo& kernel_info, + const int3& grid, + WorkGroupSizeAlignment z_alignment, + std::vector* work_groups) { + *work_groups = GenerateWorkGroupSizesXYMultipleOf( + multiplier, grid, kernel_info, device_info, z_alignment); } -absl::Status GetBestWorkGroupXY128Linear(const TuningParameters& params, - const CLKernel& kernel, - const int3& grid, - WorkGroupSizeAlignment z_alignment, - int3* best_work_group) { - std::vector work_groups = - GenerateWorkGroupSizesXY128Linear(grid, kernel.info_, z_alignment); - int best_work_group_index; - RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex( - kernel, *params.info, grid, work_groups, &best_work_group_index)); - *best_work_group = work_groups[best_work_group_index]; - return absl::OkStatus(); +void GetPossibleWorkGroupsXMultipleOf(int multiplier, + const DeviceInfo& device_info, + const KernelInfo& kernel_info, + const int3& grid, + WorkGroupSizeAlignment z_alignment, + std::vector* work_groups) { + *work_groups = GenerateWorkGroupSizesXMultipleOf( + multiplier, grid, kernel_info, device_info, z_alignment); } bool XY128RequiresMoreWorkGroupsThenXY128Linear(int width, int height) { @@ -245,56 +250,47 @@ bool XY128RequiresMoreWorkGroupsThenXY128Linear(int width, int height) { return !have_equal_work_groups; } -absl::Status GetBestWorkGroup(const TuningParameters& params, - const CLKernel& kernel, const int3& grid, - int3* best_work_group) { - switch (params.tuning_type) { +void GetPossibleWorkGroups(TuningType tuning_type, + const DeviceInfo& device_info, + const KernelInfo& kernel_info, const int3& grid, + std::vector* work_groups) { + switch (tuning_type) { case TuningType::FAST: - *best_work_group = GetWorkGroup(grid, kernel.info_.max_work_group_size); - return absl::OkStatus(); + work_groups->push_back( + GetWorkGroup(grid, kernel_info.max_work_group_size)); + return; case TuningType::EXHAUSTIVE: { - std::vector work_groups; - GetWorkGroupsAlignedToGrid(*params.info, kernel.info_, grid, - &work_groups); - int best_work_group_index; - RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex( - kernel, *params.info, grid, work_groups, &best_work_group_index)); - *best_work_group = work_groups[best_work_group_index]; - return absl::OkStatus(); + GetWorkGroupsAlignedToGrid(device_info, kernel_info, grid, work_groups); + return; } default: - *best_work_group = {8, 4, 1}; - return absl::OkStatus(); + work_groups->push_back({8, 4, 1}); + return; } } -absl::Status GetBestWorkGroupConv(const TuningParameters& params, - const CLKernel& kernel, const int3& grid, - int3* best_work_group) { - switch (params.tuning_type) { +void GetPossibleWorkGroupsConv(TuningType tuning_type, + const DeviceInfo& device_info, + const KernelInfo& kernel_info, const int3& grid, + std::vector* work_groups) { + switch (tuning_type) { case TuningType::FAST: { int max_z_size = 16; - if (params.info->IsAdreno()) { - max_z_size = params.info->adreno_info.gpu_version < 400 ? 16 : 64; + if (device_info.IsAdreno()) { + max_z_size = device_info.IsAdreno3xx() ? 16 : 64; } - max_z_size = std::min(max_z_size, params.info->max_work_group_size_z); - *best_work_group = - GetWorkGroupConv(grid, kernel.info_.max_work_group_size, max_z_size); - return absl::OkStatus(); + max_z_size = std::min(max_z_size, device_info.max_work_group_size_z); + work_groups->push_back( + GetWorkGroupConv(grid, kernel_info.max_work_group_size, max_z_size)); + return; } case TuningType::EXHAUSTIVE: { - std::vector work_groups; - GetWorkGroupsAlignedToGrid(*params.info, kernel.info_, grid, - &work_groups); - int best_work_group_index; - RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex( - kernel, *params.info, grid, work_groups, &best_work_group_index)); - *best_work_group = work_groups[best_work_group_index]; - return absl::OkStatus(); + GetWorkGroupsAlignedToGrid(device_info, kernel_info, grid, work_groups); + return; } default: - *best_work_group = {8, 4, 1}; - return absl::OkStatus(); + work_groups->push_back({8, 4, 1}); + return; } } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h index 7cc60f4723f..0c1be10782e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h @@ -27,20 +27,20 @@ namespace tflite { namespace gpu { namespace cl { -// writes best_work_group if successful -// Here and later you can find XY128, this is because 128 is SIMD width of A6xx -// And XY128 means that work_group_size.x * work_group_size.y % 128 = 0 -// We need it to correctly work with constants uploading on A6xx -absl::Status GetBestWorkGroupXY128(const TuningParameters& params, - const CLKernel& kernel, const int3& grid, - WorkGroupSizeAlignment z_alignment, - int3* best_work_group); +// multiplier can be power of two only +void GetPossibleWorkGroupsXYMultipleOf(int multiplier, + const DeviceInfo& device_info, + const KernelInfo& kernel_info, + const int3& grid, + WorkGroupSizeAlignment z_alignment, + std::vector* work_groups); -absl::Status GetBestWorkGroupXY128Linear(const TuningParameters& params, - const CLKernel& kernel, - const int3& grid, - WorkGroupSizeAlignment z_alignment, - int3* best_work_group); +void GetPossibleWorkGroupsXMultipleOf(int multiplier, + const DeviceInfo& device_info, + const KernelInfo& kernel_info, + const int3& grid, + WorkGroupSizeAlignment z_alignment, + std::vector* work_groups); int3 GetWorkGroupXY128ConvLinear(const int3& grid); @@ -49,13 +49,15 @@ int3 GetWorkGroupXY128Conv(const int3& grid); bool XY128RequiresMoreWorkGroupsThenXY128Linear(int width, int height); -absl::Status GetBestWorkGroup(const TuningParameters& params, - const CLKernel& kernel, const int3& grid, - int3* best_work_group); +void GetPossibleWorkGroups(TuningType tuning_type, + const DeviceInfo& device_info, + const KernelInfo& kernel_info, const int3& grid, + std::vector* work_groups); -absl::Status GetBestWorkGroupConv(const TuningParameters& params, - const CLKernel& kernel, const int3& grid, - int3* best_work_group); +void GetPossibleWorkGroupsConv(TuningType tuning_type, + const DeviceInfo& device_info, + const KernelInfo& kernel_info, const int3& grid, + std::vector* work_groups); } // namespace cl } // namespace gpu From b4297ce0a82e4dc513fb819ab55e86379ac46e7f Mon Sep 17 00:00:00 2001 From: Paul Wankadia Date: Tue, 11 Aug 2020 09:39:30 -0700 Subject: [PATCH 2508/2522] Make RegexFullMatchOp and RegexReplaceOp cache RE2 objects. PiperOrigin-RevId: 326038690 Change-Id: Ia3b0177e38a40c1514c08d8821b992aca60cd0ac --- .../core/kernels/regex_full_match_op.cc | 36 +++++++++++++-- tensorflow/core/kernels/regex_replace_op.cc | 44 +++++++++++++++---- 2 files changed, 67 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/kernels/regex_full_match_op.cc b/tensorflow/core/kernels/regex_full_match_op.cc index 04da969df12..f00e971c0bc 100644 --- a/tensorflow/core/kernels/regex_full_match_op.cc +++ b/tensorflow/core/kernels/regex_full_match_op.cc @@ -20,6 +20,8 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/thread_annotations.h" #include "tensorflow/core/util/ptr_util.h" namespace tensorflow { @@ -28,6 +30,8 @@ class RegexFullMatchOp : public OpKernel { public: explicit RegexFullMatchOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + ~RegexFullMatchOp() override {} + void Compute(OpKernelContext* ctx) override { const Tensor* input_tensor; OP_REQUIRES_OK(ctx, ctx->input("input", &input_tensor)); @@ -39,19 +43,43 @@ class RegexFullMatchOp : public OpKernel { errors::InvalidArgument("Pattern must be scalar, but received ", pattern_tensor->shape().DebugString())); const string pattern = pattern_tensor->flat()(0); - const RE2 match(pattern); - OP_REQUIRES(ctx, match.ok(), + std::shared_ptr regex = CachedRE2(pattern); + OP_REQUIRES(ctx, regex->ok(), errors::InvalidArgument("Invalid pattern: ", pattern, - ", error: ", match.error())); + ", error: ", regex->error())); Tensor* output_tensor = nullptr; OP_REQUIRES_OK(ctx, ctx->allocate_output("output", input_tensor->shape(), &output_tensor)); auto output_flat = output_tensor->flat(); for (size_t i = 0; i < input_flat.size(); ++i) { - output_flat(i) = RE2::FullMatch(input_flat(i), match); + output_flat(i) = RE2::FullMatch(input_flat(i), *regex); } } + + private: + std::shared_ptr CachedRE2(const string& pattern) { + { + tf_shared_lock l(mu_); + if (regex_ != nullptr && regex_->pattern() == pattern) { + return regex_; + } + } + // Construct the new RE2 object before acquiring the lock. + auto regex = std::make_shared(pattern); + { + mutex_lock l(mu_); + // Swap instead of assigning so that we destruct the old + // RE2 object (when necessary) after releasing the lock. + regex_.swap(regex); + return regex_; + } + } + + mutex mu_; + std::shared_ptr regex_ TF_GUARDED_BY(mu_); + + TF_DISALLOW_COPY_AND_ASSIGN(RegexFullMatchOp); }; REGISTER_KERNEL_BUILDER(Name("RegexFullMatch").Device(DEVICE_CPU), diff --git a/tensorflow/core/kernels/regex_replace_op.cc b/tensorflow/core/kernels/regex_replace_op.cc index 4eb83c5fe0d..5e464e0a13a 100644 --- a/tensorflow/core/kernels/regex_replace_op.cc +++ b/tensorflow/core/kernels/regex_replace_op.cc @@ -20,6 +20,8 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/thread_annotations.h" #include "tensorflow/core/util/ptr_util.h" namespace tensorflow { @@ -29,7 +31,7 @@ namespace { // Context requirements: // - "input" string Tensor at input_index=0 // - "output" string Tensor at output_index=0 -Status InternalCompute(const RE2& match, const string& rewrite, +Status InternalCompute(const RE2& regex, const string& rewrite, const bool replace_global, OpKernelContext* ctx) { const Tensor* input_tensor; TF_RETURN_IF_ERROR(ctx->input("input", &input_tensor)); @@ -52,9 +54,9 @@ Status InternalCompute(const RE2& match, const string& rewrite, // accept std::string. string buf = output_flat(i); if (replace_global) { - RE2::GlobalReplace(&buf, match, rewrite); + RE2::GlobalReplace(&buf, regex, rewrite); } else { - RE2::Replace(&buf, match, rewrite); + RE2::Replace(&buf, regex, rewrite); } output_flat(i) = std::move(buf); } @@ -68,6 +70,8 @@ class RegexReplaceOp : public OpKernel { OP_REQUIRES_OK(ctx, ctx->GetAttr("replace_global", &replace_global_)); } + ~RegexReplaceOp() override {} + void Compute(OpKernelContext* ctx) override { const Tensor* pattern_tensor; OP_REQUIRES_OK(ctx, ctx->input("pattern", &pattern_tensor)); @@ -75,10 +79,10 @@ class RegexReplaceOp : public OpKernel { errors::InvalidArgument("Pattern must be scalar, but received ", pattern_tensor->shape().DebugString())); const string& pattern = pattern_tensor->scalar()(); - const RE2 match(pattern); - OP_REQUIRES(ctx, match.ok(), + std::shared_ptr regex = CachedRE2(pattern); + OP_REQUIRES(ctx, regex->ok(), errors::InvalidArgument("Invalid pattern: ", pattern, - ", error: ", match.error())); + ", error: ", regex->error())); const Tensor* rewrite_tensor; OP_REQUIRES_OK(ctx, ctx->input("rewrite", &rewrite_tensor)); @@ -86,11 +90,33 @@ class RegexReplaceOp : public OpKernel { errors::InvalidArgument("Rewrite must be scalar, but received ", rewrite_tensor->shape().DebugString())); const string& rewrite = rewrite_tensor->scalar()(); - OP_REQUIRES_OK(ctx, InternalCompute(match, rewrite, replace_global_, ctx)); + OP_REQUIRES_OK(ctx, InternalCompute(*regex, rewrite, replace_global_, ctx)); } private: + std::shared_ptr CachedRE2(const string& pattern) { + { + tf_shared_lock l(mu_); + if (regex_ != nullptr && regex_->pattern() == pattern) { + return regex_; + } + } + // Construct the new RE2 object before acquiring the lock. + auto regex = std::make_shared(pattern); + { + mutex_lock l(mu_); + // Swap instead of assigning so that we destruct the old + // RE2 object (when necessary) after releasing the lock. + regex_.swap(regex); + return regex_; + } + } + bool replace_global_; + mutex mu_; + std::shared_ptr regex_ TF_GUARDED_BY(mu_); + + TF_DISALLOW_COPY_AND_ASSIGN(RegexReplaceOp); }; REGISTER_KERNEL_BUILDER(Name("RegexReplace").Device(DEVICE_CPU), @@ -101,11 +127,11 @@ class StaticRegexReplaceOp : public OpKernel { explicit StaticRegexReplaceOp(OpKernelConstruction* ctx) : OpKernel(ctx) { string pattern; OP_REQUIRES_OK(ctx, ctx->GetAttr("pattern", &pattern)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("rewrite", &rewrite_str_)); re_ = MakeUnique(pattern); OP_REQUIRES(ctx, re_->ok(), errors::InvalidArgument("Invalid pattern: ", pattern, ", error: ", re_->error())); + OP_REQUIRES_OK(ctx, ctx->GetAttr("rewrite", &rewrite_str_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("replace_global", &replace_global_)); } @@ -115,8 +141,8 @@ class StaticRegexReplaceOp : public OpKernel { } private: - string rewrite_str_; std::unique_ptr re_; + string rewrite_str_; bool replace_global_; }; From 0f35ef2abc9535fbbf2e2b490867c8bd9212f03d Mon Sep 17 00:00:00 2001 From: Berkin Ilbeyi Date: Tue, 11 Aug 2020 09:47:25 -0700 Subject: [PATCH 2509/2522] [XLA] Implement mechanism to repack allocations to reduce fragmentation. This CL defines an interface for allocation repackers. A repacker can be specified in MemorySpaceAssignment::Options. If an HloValue couldn't be allocated due to running out of alternate memory, we now export the allocations done so far to the repacker, run the repacker, and import the new offsets back into memory space assignment for better memory packing. PiperOrigin-RevId: 326040207 Change-Id: Icc518874781eb74e38701514d8f6fb20d23a4124 --- tensorflow/compiler/xla/service/BUILD | 10 + .../xla/service/memory_space_assignment.cc | 71 +++++++ .../xla/service/memory_space_assignment.h | 32 +++ .../memory_space_assignment_repacking.h | 57 ++++++ .../service/memory_space_assignment_test.cc | 183 +++++++++++++++++- 5 files changed, 347 insertions(+), 6 deletions(-) create mode 100644 tensorflow/compiler/xla/service/memory_space_assignment_repacking.h diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index fa7b480cab6..f5618b95c3e 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -3426,6 +3426,15 @@ cc_library( ], ) +cc_library( + name = "memory_space_assignment_repacking", + hdrs = ["memory_space_assignment_repacking.h"], + deps = [ + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:types", + ], +) + cc_library( name = "memory_space_assignment", srcs = ["memory_space_assignment.cc"], @@ -3433,6 +3442,7 @@ cc_library( deps = [ ":heap_simulator", ":hlo_cost_analysis", + ":memory_space_assignment_repacking", ":memory_space_assignment_utils", "//tensorflow/compiler/xla:debug_options_flags", "//tensorflow/core/lib/math:math_util", diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.cc b/tensorflow/compiler/xla/service/memory_space_assignment.cc index 4131a0199bf..c5ae0573bed 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment.cc +++ b/tensorflow/compiler/xla/service/memory_space_assignment.cc @@ -1057,10 +1057,28 @@ HeapSimulator::Result AlternateMemoryBestFitHeap::Finish() { options_.prefetch_interval_picker->SetRetryNumber(retry_number); Result result = AllocateAllocationValues(absl::MakeSpan(allocation_values)); + VLOG(2) << "Allocation result = " + << absl::StrFormat("%x", static_cast(result)); if (result_requires_uncommit(result) || (!final_retry && result_failed_because_of_async_copy(result))) { UncommitPendingChunks(absl::MakeSpan(allocation_values)); VLOG(2) << "Couldn't allocate. Retry number " << retry_number; + } else if (result_is(result, Result::kFailOutOfMemory) && + num_repacks_ < options_.max_repacks) { + UncommitPendingChunks(absl::MakeSpan(allocation_values)); + ++num_repacks_; + CHECK_NE(options_.repacker, nullptr); + std::vector repack_allocation_blocks; + ExportAllocationsForRepacking(repack_allocation_blocks); + VLOG(2) << "Repacking."; + auto repack_status = + options_.repacker->Repack(absl::MakeSpan(repack_allocation_blocks)); + CHECK_EQ(repack_status.status(), Status::OK()); + VLOG(2) << "Repack complete. Modified = " << *repack_status; + if (*repack_status) { + ImportRepackedAllocations(absl::MakeSpan(repack_allocation_blocks)); + --retry_number; + } } else { FinalizeAllocations(absl::MakeSpan(allocation_values)); break; @@ -1541,6 +1559,33 @@ bool AlternateMemoryBestFitHeap::AreIntervalsReservedInAlternateMemory( return false; } +void AlternateMemoryBestFitHeap::ExportAllocationsForRepacking( + std::vector& + allocations) { + for (RepackAllocationBlock& allocation_block : repack_allocation_blocks_) { + allocations.push_back(&allocation_block); + } +} + +void AlternateMemoryBestFitHeap::ImportRepackedAllocations( + absl::Span + repacked_allocations) { + interval_tree_ = {}; + for (RepackAllocationBlock* allocation_block : repacked_allocations) { + MemorySpaceAssignment::Allocation* allocation = allocation_block->opaque; + VLOG(3) << "Moved " << allocation->ToString() << ", size " + << allocation->chunk().size << " from " + << allocation_block->initial_offset << " to " + << allocation_block->offset; + allocation_block->opaque->mutable_chunk()->offset = + allocation_block->offset; + interval_tree_.Add(allocation_block->start_time, allocation_block->end_time, + {allocation_block->offset, allocation_block->size}); + allocation_block->initial_offset = allocation_block->offset; + allocation_block->offset = -1; + } +} + void AlternateMemoryBestFitHeap::UncommitPendingChunks( absl::Span allocation_values) { // Clear the allocation sequence of the allocation values so that in case we @@ -1591,11 +1636,37 @@ void AlternateMemoryBestFitHeap::UncommitPendingChunks( void AlternateMemoryBestFitHeap::FinalizeAllocations( absl::Span allocation_values) { + absl::flat_hash_map> + colocation_map; for (AllocationValue& allocation_value : allocation_values) { for (auto& allocation : *allocation_value.allocation_sequence()) { AppendAllocationInfoDebugString(allocation_value, *allocation, allocation_info_str_); allocations_->push_back(std::move(allocation)); + MemorySpaceAssignment::Allocation* inserted_allocation = + allocations_->back().get(); + if (inserted_allocation->memory_space() == MemorySpace::kAlternate) { + colocation_map[inserted_allocation->chunk().offset].push_back( + inserted_allocation); + } + } + } + // Assume allocations that received the same offset need to be colocated. + // Export these to repack_allocation_blocks_ so that we can repack them to + // reduce fragmentation. + for (auto& colocation : colocation_map) { + std::vector colocations; + for (MemorySpaceAssignment::Allocation* colocated_allocation : + colocation.second) { + repack_allocation_blocks_.push_back( + {colocated_allocation->start_time(), colocated_allocation->end_time(), + colocated_allocation->chunk().size, /*offset=*/-1, + colocated_allocation->chunk().offset, /*colocations=*/{}, + colocated_allocation}); + colocations.push_back(&repack_allocation_blocks_.back()); + } + for (RepackAllocationBlock* repack_block : colocations) { + repack_block->colocations = colocations; } } ClearPendingChunks(); diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.h b/tensorflow/compiler/xla/service/memory_space_assignment.h index d530a57d257..d366c06a599 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment.h +++ b/tensorflow/compiler/xla/service/memory_space_assignment.h @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/heap_simulator.h" #include "tensorflow/compiler/xla/service/hlo_cost_analysis.h" +#include "tensorflow/compiler/xla/service/memory_space_assignment_repacking.h" namespace xla { @@ -379,6 +380,9 @@ class MemorySpaceAssignment { // space and a fast and small alternate memory space. enum class MemorySpace { kDefault, kAlternate }; + // Forward declaration for Allocation. + class Allocation; + // The different options to be passed to the Run() API. struct Options { // Backend-specific integer value that describes the alternate memory. @@ -424,6 +428,15 @@ class MemorySpaceAssignment { // copies or asynchronous copy ordering. int64 max_retries = 1; + // The maximum number of repacks that we are willing to perform in case we + // can't allocate a buffer due to running out of memory. If this value is + // greater than 0, repacker must be non-nullptr. + int64 max_repacks = 0; + + // The repacking algorithm to reduce fragmentation. Must be non-null if + // max_repacks is greater than 0. + MemorySpaceAssignmentRepacker* repacker = nullptr; + // If true, tries allocating buffers across (e.g., before and inside a while // loop body) sequential calls (kWhile, kCall, and kConditional). bool allocate_across_sequential_calls = false; @@ -511,6 +524,7 @@ class MemorySpaceAssignment { const std::vector& uses() const { return uses_; } MemorySpace memory_space() const { return memory_space_; } Chunk chunk() const { return *chunk_; } + Chunk* mutable_chunk() { return &*chunk_; } void set_start_time(int64 start_time) { start_time_ = start_time; } int64 start_time() const { return start_time_; } int64 end_time() const { return end_time_; } @@ -929,6 +943,9 @@ class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap { HeapSimulator::Result Finish() override; private: + using RepackAllocationBlock = MemorySpaceAssignmentRepacker< + MemorySpaceAssignment::Allocation*>::AllocationBlock; + // An allocation request for a use segment. A use segment is the time segment // between the definition and the first use, and the time segment between the // uses of a buffer. For example, the time between the definition and Use1, is @@ -1149,6 +1166,16 @@ class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap { absl::optional ViolatesAsyncCopyOrdering( int64 start_time, int64 end_time) const; + // Exports the allocations for repacking and puts them into the vector in the + // parameter. + void ExportAllocationsForRepacking( + std::vector& allocations); + + // Imports repacked allocations and updates the internal data structures + // consistent with the new packing. + void ImportRepackedAllocations( + absl::Span repacked_allocations); + // Adds an asynchronous copy to the allocations. void AddAsyncCopy(const MemorySpaceAssignment::Allocation& prev_allocation, MemorySpace memory_space, absl::optional chunk, @@ -1197,6 +1224,11 @@ class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap { BufferIntervalTree prefetch_interval_tree_; BufferIntervalTree eviction_interval_tree_; AsynchronousCopyOrdering async_copy_ordering_; + // A list of RepackAllocationBlock objects that mirrors allocation sequences, + // used for repacking. We use a list here because we need pointer stability + // for aliased allocations. + std::list repack_allocation_blocks_; + int64 num_repacks_ = 0; std::vector> pending_chunks_; std::vector pending_async_copies_; std::vector> diff --git a/tensorflow/compiler/xla/service/memory_space_assignment_repacking.h b/tensorflow/compiler/xla/service/memory_space_assignment_repacking.h new file mode 100644 index 00000000000..fcfdfc797fb --- /dev/null +++ b/tensorflow/compiler/xla/service/memory_space_assignment_repacking.h @@ -0,0 +1,57 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_REPACKING_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_REPACKING_H_ + +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/types.h" + +namespace xla { + +// An interface to define allocation repacking algorithms. +template +class MemorySpaceAssignmentRepacker { + public: + MemorySpaceAssignmentRepacker() = default; + virtual ~MemorySpaceAssignmentRepacker() = default; + + // A contiguous block of allocation consisting of start and end (logical) + // times, size, and the initial offset. After repacking, if the repacking was + // successful and the allocations were modified, the offset field holds the + // new offset. To support aliased allocations, AllocationBlock also includes a + // vector of AllocationBlock pointers, called colocations. All AllocationBlock + // objects within the colocations must get the same offset. The opaque field + // is used by the MemorySpaceAssignment pass and should not be accessed by the + // repacking algorithm. + struct AllocationBlock { + int64 start_time; + int64 end_time; + int64 size; + int64 offset; + int64 initial_offset; + std::vector colocations; + O opaque; + }; + + // Repack the AllocationBlocks provided in the parameter. Returns true if + // allocations have been modified and false if not. Returns a non-ok status if + // there was an error. + virtual StatusOr Repack(absl::Span allocations) = 0; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_MEMORY_SPACE_ASSIGNMENT_REPACKING_H_ diff --git a/tensorflow/compiler/xla/service/memory_space_assignment_test.cc b/tensorflow/compiler/xla/service/memory_space_assignment_test.cc index a3f8024bca8..464cfb502be 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment_test.cc +++ b/tensorflow/compiler/xla/service/memory_space_assignment_test.cc @@ -71,19 +71,22 @@ class MemorySpaceAssignmentTest : public HloTestBase, std::unique_ptr AssignMemorySpace( HloModule* module, int64 max_outstanding_async_copies = -1, - int64 max_prefetch_interval = 10, int64 min_prefetch_interval = 2) { + int64 max_prefetch_interval = 10, int64 min_prefetch_interval = 2, + absl::optional options = absl::nullopt) { InstructionCountPrefetchIntervalPicker prefetch_interval_picker( min_prefetch_interval, max_prefetch_interval); return AssignMemorySpace(module, max_outstanding_async_copies, /*buffer_interval_compare=*/{}, - &prefetch_interval_picker); + &prefetch_interval_picker, options); } std::unique_ptr AssignMemorySpace( HloModule* module, int64 max_outstanding_async_copies, absl::optional buffer_interval_compare, - PrefetchIntervalPicker* prefetch_interval_picker) { + PrefetchIntervalPicker* prefetch_interval_picker, + absl::optional + memory_space_assignment_options = absl::nullopt) { auto size_fn = [](const BufferValue& buffer) { return ShapeUtil::ByteSizeOf(buffer.shape(), /*pointer_size=*/8); }; @@ -117,9 +120,15 @@ class MemorySpaceAssignmentTest : public HloTestBase, } MemorySpaceAssignment::Options options; + if (memory_space_assignment_options) { + options = *memory_space_assignment_options; + } else { + options.max_size_in_bytes = 128; + options.alignment_in_bytes = 8; + options.verify = true; + } + options.alternate_memory_space = kAlternateMemorySpace; - options.max_size_in_bytes = 128; - options.alignment_in_bytes = 8; options.buffer_interval_compare = buffer_interval_compare; options.prefetch_interval_picker = prefetch_interval_picker; options.size_fn = size_fn; @@ -127,7 +136,6 @@ class MemorySpaceAssignmentTest : public HloTestBase, options.max_outstanding_prefetches = max_outstanding_async_copies; options.max_outstanding_evictions = max_outstanding_async_copies; options.allocate_across_sequential_calls = GetParam(); - options.verify = true; auto alias_analysis = HloAliasAnalysis::Run(module).ValueOrDie(); std::unique_ptr hlo_live_range = @@ -4058,6 +4066,169 @@ TEST_P(MemorySpaceAssignmentTest, MoveCopyDoneEarlier) { find_schedule_index(cos->operand(0))); } +// A mock MemorySpaceAssignmentRepacker class that accepst a map of +// (start_time,offset) -> new_offset values. Using this map, the repacker +// repacks the allocations to the new_offset. +class FakeMemorySpaceAssignmentRepacker + : public MemorySpaceAssignmentRepacker { + public: + FakeMemorySpaceAssignmentRepacker( + absl::flat_hash_map, int64>& repack_map) + : repack_map_(repack_map) {} + + StatusOr Repack(absl::Span allocations) override { + bool modified = false; + for (AllocationBlock* block : allocations) { + VLOG(1) << "Alloc time: [" << block->start_time << ", " << block->end_time + << "] size: " << block->size + << " init offset: " << block->initial_offset; + auto it = repack_map_.find({block->start_time, block->initial_offset}); + if (it != repack_map_.end()) { + modified = true; + block->offset = it->second; + } else { + block->offset = block->initial_offset; + } + for (AllocationBlock* colocation : block->colocations) { + VLOG(1) << " [" << colocation->start_time << ", " + << colocation->end_time << "]"; + if (it != repack_map_.end()) { + colocation->offset = it->second; + } else { + colocation->offset = colocation->initial_offset; + } + } + } + + return modified; + } + + private: + // A map from (start_time, offset) to new_offset. + absl::flat_hash_map, int64> repack_map_; +}; + +TEST_P(MemorySpaceAssignmentTest, Repack) { + // We initially perform the following allocations at these offsets. + // + // Max memory + // ------------------------------------------- + // + // + // + // + // +------------+ + // | b | + // +------------+ + // +-------+ +------------+ + // | a | | n | + // +-------+ +------------+ + // ------------------------------------------- + // Min memory time -> + // + // Next up, we try to allocate the prefetch for m. However due to + // fragmentation, this won't be possible: + // + // Max memory + // ------------------------------------------- + // + // + // + // +---------+ + // +------------+ | + // | b | | | + // +------------+ | + // +-------+ | | +------------+ + // | a | | d | | n | + // +-------+ +---------+ +------------+ + // ------------------------------------------- + // Min memory time -> + // + // We then call repack to repack the existing allocations which allows us to + // allocate the prefetch for m: + // + // Max memory + // ------------------------------------------- + // +---------+ + // | | + // | | + // | | + // +-------+ | | + // | a | | d | + // +-------+ +---------+ + // +------------+ +------------+ + // | b | | n | + // +------------+ +------------+ + // ------------------------------------------- + // Min memory time -> + absl::string_view hlo_string = R"( + HloModule bug, is_scheduled=true + + ENTRY Entry { + param0 = f32[8,3] parameter(0) + param1 = f32[2,4] parameter(1) + a = f32[2,4] sine(param1) + b = f32[2,4] cosine(param1) + c = f32[8,3] negate(param0) + j = f32[2,4] negate(a) + d = f32[8,3] tanh(param0) + k = f32[2,4] negate(j) + l = f32[2,4] add(b, k) + m = f32[8,3] negate(d) + n = f32[2,4] sine(l) + o = f32[8,3] negate(m) + p = f32[2,4] negate(n) + q = f32[8,3] negate(m) + ROOT tuple = (f32[2,4], f32[8,3], f32[8,3]) tuple(p, q, o) + } + )"; + + MemorySpaceAssignment::BufferIntervalCompare buffer_interval_compare = + [](const MemorySpaceAssignment::BufferInterval& a, + const MemorySpaceAssignment::BufferInterval& b) { + auto get_opcode_priority = [](const HloOpcode& opcode) { + switch (opcode) { + case HloOpcode::kSin: + return 0; + case HloOpcode::kCos: + return 1; + case HloOpcode::kTanh: + return 2; + default: + return 3; + } + }; + + return get_opcode_priority(a.buffer->defining_instruction()->opcode()) < + get_opcode_priority(b.buffer->defining_instruction()->opcode()); + }; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + InstructionCountPrefetchIntervalPicker prefetch_interval_picker(2, 10); + absl::flat_hash_map, int64> repack_map; + // Move "a" from offset 0 to 32. + repack_map[{2, 0}] = 32; + // Move "b" from offset 32 to 0. + repack_map[{3, 32}] = 0; + FakeMemorySpaceAssignmentRepacker repacker = + FakeMemorySpaceAssignmentRepacker(repack_map); + MemorySpaceAssignment::Options options; + options.max_size_in_bytes = 128; + options.alignment_in_bytes = 8; + options.verify = true; + options.max_repacks = 1; + options.repacker = &repacker; + AssignMemorySpace(module.get(), /*max_outstanding_async_copies=*/-1, + buffer_interval_compare, &prefetch_interval_picker, + options); + + // If repacking succeeds, we should find the buffer for d in alternate memory. + const HloInstruction* d = + module->entry_computation()->GetInstructionWithName("d"); + EXPECT_EQ(d->shape().layout().memory_space(), kAlternateMemorySpace); +} + TEST_P(MemorySpaceAssignmentTest, Determinism) { // Run memory space assignment a few times to make sure every time it compiles // to the same thing. From 6a2f00362eed16cfcc792d8fc716c19fa9108ea4 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Tue, 11 Aug 2020 09:50:22 -0700 Subject: [PATCH 2510/2522] Add TpuTopologyExternal::version() and TpuVersionEnumToString(). PiperOrigin-RevId: 326040796 Change-Id: I3033d21271db465e4d6c57f5a3fe3c0c8913d1aa --- tensorflow/core/tpu/tpu_library_init_fns.inc | 2 ++ tensorflow/stream_executor/tpu/c_api_decl.h | 6 ++++++ .../stream_executor/tpu/tpu_executor_c_api.h | 2 ++ tensorflow/stream_executor/tpu/tpu_topology.cc | 15 +++++++++++++++ tensorflow/stream_executor/tpu/tpu_topology.h | 3 +++ 5 files changed, 28 insertions(+) diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index a27dbea2388..9ac4fb9ec6d 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -178,6 +178,8 @@ tensorflow::Status SetExecutorStructFn(void* library_handle) { TFTPU_SET_FN(executor_fn, TpuTopology_NumCores); TFTPU_SET_FN(executor_fn, TpuTopology_Cores); TFTPU_SET_FN(executor_fn, TpuTopology_IdForHost); + TFTPU_SET_FN(executor_fn, TpuTopology_Version); + TFTPU_SET_FN(executor_fn, TpuCoreLocation_ChipCoordinates); TFTPU_SET_FN(executor_fn, TpuCoreLocation_HostCoordinates); TFTPU_SET_FN(executor_fn, TpuCoreLocation_Index); diff --git a/tensorflow/stream_executor/tpu/c_api_decl.h b/tensorflow/stream_executor/tpu/c_api_decl.h index bca5f254ad1..a7b4c372e18 100644 --- a/tensorflow/stream_executor/tpu/c_api_decl.h +++ b/tensorflow/stream_executor/tpu/c_api_decl.h @@ -31,6 +31,12 @@ enum TpuCoreTypeEnum { kEmbeddingV2, }; +enum TpuVersionEnum { + kUnknownTpuVersion, + kTpuV2, + kTpuV3, +}; + typedef struct SE_Status SE_Status; typedef struct SE_Platform SE_Platform; diff --git a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h index c498244cc6e..149a00615a9 100644 --- a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h +++ b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h @@ -211,6 +211,7 @@ void TpuTopology_Cores(SE_TpuTopology* tpu_topology, TpuCoreTypeEnum tpu_core_type, SE_TpuTopology_Core** cores); int TpuTopology_IdForHost(SE_TpuTopology* tpu_topology, int x, int y, int z); +TpuVersionEnum TpuTopology_Version(SE_TpuTopology* tpu_topology); void TpuCoreLocation_ChipCoordinates(SE_TpuTopology_Core* tpu_core_location, int* x, int* y, int* z); void TpuCoreLocation_HostCoordinates(SE_TpuTopology_Core* tpu_core_location, @@ -367,6 +368,7 @@ struct TfTpu_ExecutorApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuTopology_NumCores); TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Cores); TFTPU_ADD_FN_IN_STRUCT(TpuTopology_IdForHost); + TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Version); TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_ChipCoordinates); TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_HostCoordinates); diff --git a/tensorflow/stream_executor/tpu/tpu_topology.cc b/tensorflow/stream_executor/tpu/tpu_topology.cc index 6c885b229ec..c86b399b34e 100644 --- a/tensorflow/stream_executor/tpu/tpu_topology.cc +++ b/tensorflow/stream_executor/tpu/tpu_topology.cc @@ -95,5 +95,20 @@ int TpuTopologyExternal::IdForHost(TpuDimensionsExternal host) const { host.y, host.z); } +TpuVersionEnum TpuTopologyExternal::version() const { + return tpu::ExecutorApiFn()->TpuTopology_VersionFn(topology_); +} + +std::string TpuVersionEnumToString(TpuVersionEnum version) { + switch (version) { + case kUnknownTpuVersion: + return "Unknown TPU version"; + case kTpuV2: + return "TPU v2"; + case kTpuV3: + return "TPU v3"; + } +} + } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/stream_executor/tpu/tpu_topology.h b/tensorflow/stream_executor/tpu/tpu_topology.h index 07e9afc7d81..5219ba7017b 100644 --- a/tensorflow/stream_executor/tpu/tpu_topology.h +++ b/tensorflow/stream_executor/tpu/tpu_topology.h @@ -74,11 +74,14 @@ class TpuTopologyExternal { int index) const; std::vector cores(TpuCoreTypeEnum core_type) const; int IdForHost(TpuDimensionsExternal host) const; + TpuVersionEnum version() const; private: SE_TpuTopology* topology_; }; +std::string TpuVersionEnumToString(TpuVersionEnum version); + } // namespace tpu } // namespace tensorflow From aa9d2d80f4921c9d352aaa6ab3872cc14c4028e4 Mon Sep 17 00:00:00 2001 From: Ruoxin Sang Date: Tue, 11 Aug 2020 10:03:07 -0700 Subject: [PATCH 2511/2522] Add a test for model.predict with tf.keras.layers.ConvLSTM2D layer when XLA dynamic padder is enabled. PiperOrigin-RevId: 326043413 Change-Id: I38837b69a1bb09190f0c85b31d6d26ede4963e11 --- .../custom_training_loop_models_test.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tensorflow/python/keras/distribute/custom_training_loop_models_test.py b/tensorflow/python/keras/distribute/custom_training_loop_models_test.py index 5a9384bb7e0..b680960429c 100644 --- a/tensorflow/python/keras/distribute/custom_training_loop_models_test.py +++ b/tensorflow/python/keras/distribute/custom_training_loop_models_test.py @@ -251,6 +251,33 @@ class KerasModelsTest(test.TestCase, parameterized.TestCase): train_step(input_iterator) + @combinations.generate( + combinations.combine( + distribution=strategy_combinations.all_strategies, + mode=["eager"])) + def test_model_predict_with_dynamic_batch(self, distribution): + input_data = np.random.random([1, 32, 64, 64, 3]) + input_shape = tuple(input_data.shape[1:]) + + def build_model(): + model = keras.models.Sequential() + model.add( + keras.layers.ConvLSTM2D( + 4, + kernel_size=(4, 4), + activation="sigmoid", + padding="same", + input_shape=input_shape)) + model.add(keras.layers.GlobalMaxPooling2D()) + model.add(keras.layers.Dense(2, activation="sigmoid")) + return model + + with distribution.scope(): + model = build_model() + model.compile(loss="binary_crossentropy", optimizer="adam") + result = model.predict(input_data) + self.assertEqual(result.shape, (1, 2)) + @combinations.generate( combinations.combine( distribution=strategy_combinations.all_strategies, From 0d4f0584efdfd828335b594cd68827d4ddf6cd0d Mon Sep 17 00:00:00 2001 From: Karim Nosir Date: Tue, 11 Aug 2020 10:18:01 -0700 Subject: [PATCH 2512/2522] Add cache for const nodes in hexagon delegate. On few test models this reduces the size of const nodes by half, which will reduce graph preparation time. Bug fix for sometimes wrong casting. Remove some redundant const nodes. PiperOrigin-RevId: 326046789 Change-Id: I462dd6702e0e02953c43ab47dd53589a653b3531 --- .../lite/delegates/hexagon/builders/BUILD | 1 + .../hexagon/builders/conv_2d_builder.cc | 8 +- .../hexagon/builders/conv_2d_builder.h | 2 - .../hexagon/builders/conv_2d_helpers.cc | 19 +-- .../hexagon/builders/min_max_builder.cc | 4 - .../delegates/hexagon/builders/op_builder.cc | 112 +++++++++++++++--- .../delegates/hexagon/builders/op_builder.h | 34 +++++- .../hexagon/builders/transpose_builder.cc | 10 +- .../builders/transpose_conv_2d_builder.cc | 22 +--- .../builders/transpose_conv_2d_builder.h | 2 +- .../hexagon/hexagon_delegate_kernel.cc | 5 +- 11 files changed, 152 insertions(+), 67 deletions(-) diff --git a/tensorflow/lite/delegates/hexagon/builders/BUILD b/tensorflow/lite/delegates/hexagon/builders/BUILD index 63ff274c7b7..ef4b0e957c1 100644 --- a/tensorflow/lite/delegates/hexagon/builders/BUILD +++ b/tensorflow/lite/delegates/hexagon/builders/BUILD @@ -85,6 +85,7 @@ cc_library( "//tensorflow/lite/kernels:padding", "//tensorflow/lite/kernels/internal:optimized_base", "//tensorflow/lite/kernels/internal:tensor", + "@farmhash_archive//:farmhash", "@hexagon_nn//:hexagon_nn_ops", ], ) diff --git a/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.cc b/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.cc index cfddd2c2b97..c6d20004227 100644 --- a/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.cc @@ -267,13 +267,13 @@ TfLiteStatus Conv2dOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, auto* conv_op = graph_builder_->AddNode(GetTFLiteNodeID()); conv_op->SetOpType(OP_DepthwiseSupernode_8x8p32to8); conv_op->AddInput(space_to_batch_op_out); - conv_op->AddInput(TensorID(weights_data_node_->GetID(), 0)); + conv_op->AddInput(graph_builder_->GetHexagonTensorId(inputs->data[1])); conv_op->AddInput(TensorID(data_min_const->GetID(), 0)); conv_op->AddInput(TensorID(data_max_const->GetID(), 0)); conv_op->AddInput(TensorID(weights_min_node_->GetID(), 0)); conv_op->AddInput(TensorID(weights_max_node_->GetID(), 0)); conv_op->AddInput(TensorID(stride_node->GetID(), 0)); - conv_op->AddInput(TensorID(bias_data_node_->GetID(), 0)); + conv_op->AddInput(graph_builder_->GetHexagonTensorId(inputs->data[2])); conv_op->AddInput(TensorID(bias_min_node_->GetID(), 0)); conv_op->AddInput(TensorID(bias_max_node_->GetID(), 0)); conv_op->AddInput(TensorID(conv_output_min_const->GetID(), 0)); @@ -330,13 +330,13 @@ TfLiteStatus Conv2dOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, } // Inputs AddInput(graph_builder_->GetHexagonTensorId(inputs->data[0])); - AddInput(TensorID(weights_data_node_->GetID(), 0)); + AddInput(graph_builder_->GetHexagonTensorId(inputs->data[1])); AddInput(TensorID(data_min_const->GetID(), 0)); AddInput(TensorID(data_max_const->GetID(), 0)); AddInput(TensorID(weights_min_node_->GetID(), 0)); AddInput(TensorID(weights_max_node_->GetID(), 0)); AddInput(TensorID(stride_node->GetID(), 0)); - AddInput(TensorID(bias_data_node_->GetID(), 0)); + AddInput(graph_builder_->GetHexagonTensorId(inputs->data[2])); AddInput(TensorID(bias_min_node_->GetID(), 0)); AddInput(TensorID(bias_max_node_->GetID(), 0)); AddInput(TensorID(conv_output_min_const->GetID(), 0)); diff --git a/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.h b/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.h index 4980b294481..1407f06154b 100644 --- a/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.h @@ -62,10 +62,8 @@ class Conv2dOpBuilder : public OpBuilder { std::vector transposed_weights_; std::vector stride_shape_; std::vector weight_shape_; - OpBuilder* weights_data_node_ = nullptr; OpBuilder* weights_min_node_ = nullptr; OpBuilder* weights_max_node_ = nullptr; - OpBuilder* bias_data_node_ = nullptr; OpBuilder* bias_min_node_ = nullptr; OpBuilder* bias_max_node_ = nullptr; diff --git a/tensorflow/lite/delegates/hexagon/builders/conv_2d_helpers.cc b/tensorflow/lite/delegates/hexagon/builders/conv_2d_helpers.cc index bf68bbe5a25..b33e28f4e71 100644 --- a/tensorflow/lite/delegates/hexagon/builders/conv_2d_helpers.cc +++ b/tensorflow/lite/delegates/hexagon/builders/conv_2d_helpers.cc @@ -106,6 +106,7 @@ TfLiteStatus Conv2dOpBuilder::InitializeWeightsNodes( const bool is_per_channel_quant = weights_quant_params->scale->size > 1; // WEIGHTS DATA. + OpBuilder* weights_data_node = nullptr; if (op_node_.op_type == OP_Supernode_8x8p32to8) { // Hexagon lib expects the weight tensor in HWCN, TFLite uses NHWC. // Transpose NHWC -> HWCN @@ -137,7 +138,7 @@ TfLiteStatus Conv2dOpBuilder::InitializeWeightsNodes( weights_tensor.data.uint8, hwcn_shape, hwcn.data()); } - weights_data_node_ = graph_builder_->AddConstNodeWithData( + weights_data_node = graph_builder_->AddConstNodeWithData( weight_shape_.data(), reinterpret_cast(hwcn.data()), hwcn.size() * sizeof(hwcn[0])); } else if (op_node_.op_type == OP_DepthwiseSupernode_8x8p32to8) { @@ -156,17 +157,17 @@ TfLiteStatus Conv2dOpBuilder::InitializeWeightsNodes( for (int i = 0; i < converted_data.size(); ++i) { converted_data[i] = weights_tensor.data.int8[i] ^ k8BitSignFlipConstant; } - weights_data_node_ = graph_builder_->AddConstNodeWithData( + weights_data_node = graph_builder_->AddConstNodeWithData( weight_shape_.data(), reinterpret_cast(converted_data.data()), converted_data.size() * sizeof(converted_data[0])); } else { - weights_data_node_ = graph_builder_->AddConstNodeWithData( + weights_data_node = graph_builder_->AddConstNodeWithData( weight_shape_.data(), weights_tensor.data.raw, NumElements(&weights_tensor) * sizeof(weights_tensor.data.uint8[0])); } } - graph_builder_->AddTensorWithID(inputs->data[1], weights_data_node_->GetID(), - 0); + graph_builder_->AddTensorWithID(inputs->data[1], weights_data_node->GetID(), + 0, /*overwrite=*/true); // WEIGHTS QUANTIZATION. float weights_min = 0; @@ -229,9 +230,11 @@ TfLiteStatus Conv2dOpBuilder::ProcessPerChannelQuantizedBias( } // Add nodes for bias. const std::vector bias_shape = {1, 1, 1, bias_size}; - bias_data_node_ = graph_builder_->AddConstNodeWithData( + auto* bias_data_node = graph_builder_->AddConstNodeWithData( bias_shape.data(), reinterpret_cast(preprocessed_bias_data.data()), preprocessed_bias_data.size() * sizeof(preprocessed_bias_data[0])); + graph_builder_->AddTensorWithID(inputs->data[2], bias_data_node->GetID(), 0, + /*overwrite=*/true); return kTfLiteOk; } @@ -248,8 +251,10 @@ TfLiteStatus Conv2dOpBuilder::InitializeBiasNodes(const TfLiteIntArray* inputs, ProcessPerChannelQuantizedBias(inputs, outputs, context, &bias_min, &bias_max); } else { - bias_data_node_ = + auto* bias_data_node = graph_builder_->AddConstNodeWithData(inputs->data[2], bias_tensor); + graph_builder_->AddTensorWithID(inputs->data[2], bias_data_node->GetID(), 0, + /*overwrite=*/true); TF_LITE_ENSURE_STATUS( ComputeMinAndMaxQuantValues(bias_tensor, &bias_min, &bias_max)); } diff --git a/tensorflow/lite/delegates/hexagon/builders/min_max_builder.cc b/tensorflow/lite/delegates/hexagon/builders/min_max_builder.cc index bcfae6032c8..0c6dea2096d 100644 --- a/tensorflow/lite/delegates/hexagon/builders/min_max_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/min_max_builder.cc @@ -27,10 +27,6 @@ TfLiteStatus MinMaxOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, int b_tensor_id = inputs->data[1]; const auto& a_tensor = context->tensors[a_tensor_id]; const auto& b_tensor = context->tensors[b_tensor_id]; - if (a_tensor.allocation_type == kTfLiteMmapRo) - graph_builder_->AddConstNodeWithData(a_tensor_id, a_tensor); - if (b_tensor.allocation_type == kTfLiteMmapRo) - graph_builder_->AddConstNodeWithData(b_tensor_id, b_tensor); AddInput(graph_builder_->GetHexagonTensorId(a_tensor_id)); AddInput(graph_builder_->GetHexagonTensorId(b_tensor_id)); diff --git a/tensorflow/lite/delegates/hexagon/builders/op_builder.cc b/tensorflow/lite/delegates/hexagon/builders/op_builder.cc index 0f32a4de6e1..80aa4c8155c 100644 --- a/tensorflow/lite/delegates/hexagon/builders/op_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/op_builder.cc @@ -18,10 +18,59 @@ limitations under the License. #include "tensorflow/lite/builtin_ops.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/delegates/hexagon/builders/op_factory.h" +#include namespace tflite { namespace delegates { namespace hexagon { +namespace { +// Farmhash Fingerprint +inline uint64_t CombineFingerprints(uint64_t l, uint64_t h) { + // Murmur-inspired hashing. + const uint64_t kMul = 0x9ddfea08eb382d69ULL; + uint64_t a = (l ^ h) * kMul; + a ^= (a >> 47); + uint64_t b = (h ^ a) * kMul; + b ^= (b >> 44); + b *= kMul; + b ^= (b >> 41); + b *= kMul; + return b; +} + +inline uint64_t ComputeHash(const int shape[], const char* data, + const int data_len) { + return CombineFingerprints( + ::util::Fingerprint64(data, data_len), + ::util::Fingerprint64(reinterpret_cast(shape), + sizeof(shape[0]) * 4)); +} + +inline uint64_t ComputeHash(const TfLiteTensor& tensor, const int shape[], + int int8_to_uint8) { + auto data_hash = ComputeHash(shape, tensor.data.raw_const, tensor.bytes); + auto int8_to_uint8_hash = ::util::Fingerprint64( + reinterpret_cast(&int8_to_uint8), sizeof(int8_to_uint8)); + return CombineFingerprints(data_hash, int8_to_uint8_hash); +} + +int GetElementSize(TfLiteType type) { + switch (type) { + case kTfLiteFloat32: + return sizeof(float); + case kTfLiteBool: + return sizeof(bool); + case kTfLiteInt32: + return sizeof(int32_t); + case kTfLiteInt8: + return sizeof(int8_t); + case kTfLiteUInt8: + return sizeof(uint8_t); + default: + return sizeof(int8_t); + } +} +} // namespace OpBuilder* GraphBuilder::CreateOpBuilderFromTfLiteOp(int op_type, TfLiteNode* node) { @@ -116,8 +165,20 @@ OpBuilder* GraphBuilder::CreateOpBuilderFromTfLiteOp(int op_type, } } +OpBuilder* GraphBuilder::LookupConstData(uint64_t cache_key) { + auto lookup_result = cache_.find(cache_key); + if (lookup_result != cache_.end()) return lookup_result->second; + return nullptr; +} + +void GraphBuilder::AddToCache(uint64_t cache_key, OpBuilder* value) { + cache_[cache_key] = value; +} + OpBuilder* GraphBuilder::AddConstNodeWithData(const int shape[], char* data, int data_size) { + auto cache_key = ComputeHash(shape, data, data_size); + if (auto lookup_result = LookupConstData(cache_key)) return lookup_result; builders_.emplace_back(new OpBuilder(this, OP_Const)); builders_.back()->SetConstNode(); builders_.back()->SetNodeId(builders_.size()); @@ -125,22 +186,36 @@ OpBuilder* GraphBuilder::AddConstNodeWithData(const int shape[], char* data, graph_id_, builders_.size(), shape[0], shape[1], shape[2], shape[3], reinterpret_cast(data), data_size); if (error != 0) { - context_->ReportError(context_, "Error adding const node with shape id: %d", - (int)builders_.size()); + TF_LITE_KERNEL_LOG(context_, "Error adding const node with shape id: %d", + static_cast(builders_.size())); return nullptr; } + AddToCache(cache_key, builders_.back().get()); return builders_.back().get(); } OpBuilder* GraphBuilder::AddConstNodeWithData(int tensor_id, const TfLiteTensor& tensor, bool int8_to_uint8) { + // Fetch shape of tensor and pad 1's so it is always 4D. + int batch_size, height_size, width_size, depth_size; + GetDims(&batch_size, &height_size, &width_size, &depth_size, tensor.dims); + const int shape[] = {batch_size, height_size, width_size, depth_size}; + + auto cache_key = ComputeHash(tensor, shape, int8_to_uint8 ? 1 : 0); + if (auto lookup_result = LookupConstData(cache_key)) { + // If tensor is cached but with no id, that can happen when the same + // data is added from a constant value (not tensor). We can cache the data + // and reuse it. + // We assign the tensor to this cached const node before returning. + if (!HasTensor(tensor_id)) + AddTensorWithID(tensor_id, lookup_result->GetID(), 0); + return lookup_result; + } builders_.emplace_back(new OpBuilder(this, OP_Const)); const int node_id = builders_.size(); builders_.back()->SetConstNode(); builders_.back()->SetNodeId(node_id); - int batch_size, height_size, width_size, depth_size; - GetDims(&batch_size, &height_size, &width_size, &depth_size, tensor.dims); int error = hexagon_nn_->hexagon_nn_append_const_node( graph_id_, node_id, batch_size, height_size, width_size, depth_size, reinterpret_cast(tensor.data.raw), tensor.bytes); @@ -150,19 +225,26 @@ OpBuilder* GraphBuilder::AddConstNodeWithData(int tensor_id, return nullptr; } AddTensorWithID(tensor_id, node_id, 0); + // We need to return the builder with result, so we can't rely + // on builders_.back() as it can change while casting, so we hold pointer + // and update with value from casting if needed. + OpBuilder* result_builder = builders_.back().get(); // Cast int8 to uint8 if requested. // This will add cast op to uint8 and update tensor map to point // to the casted tensor. if (int8_to_uint8 && tensor.type == kTfLiteInt8) { - AddCastOp(context_, OP_Quantized_CastInt8ToUInt8, tensor_id); + AddCastOp(context_, OP_Quantized_CastInt8ToUInt8, tensor_id, + &result_builder); } - return builders_.back().get(); + AddToCache(cache_key, result_builder); + return result_builder; } // TODO(b/154604279): Support these casting ops in Hexagon op profiling (which // seems to key tensors on a single op, which may not be the case now). TfLiteStatus GraphBuilder::AddCastOp(TfLiteContext* context, int op_type, - int tensor_id) { + int tensor_id, + OpBuilder** cast_op_builder) { // Create a new OpBuilder for casting the tensor. OpBuilder* cast_builder = CreateCastBuilder(this, op_type); builders_.emplace_back(cast_builder); @@ -177,6 +259,7 @@ TfLiteStatus GraphBuilder::AddCastOp(TfLiteContext* context, int op_type, TF_LITE_ENSURE_STATUS(cast_builder->RegisterOutputs(tensor_data, context)); TfLiteIntArrayFree(tensor_data); + if (cast_op_builder != nullptr) *cast_op_builder = cast_builder; return kTfLiteOk; } @@ -192,12 +275,12 @@ TfLiteStatus GraphBuilder::AddInputTensors(const TfLiteIntArray* input_tensors, const int tensor_id = input_tensors->data[i]; const auto& tensor = context->tensors[tensor_id]; if (tensor.allocation_type == kTfLiteMmapRo) continue; - input_op->AddOutput(tensor.dims); + input_op->AddOutput(tensor.dims, GetElementSize(tensor.type)); AddTensorWithID(tensor_id, input_op->GetID(), num_inputs); // If tensor is of type int8, add an op to cast it to uint8. if (tensor.type == kTfLiteInt8) { - TF_LITE_ENSURE_STATUS( - AddCastOp(context, OP_Quantized_CastInt8ToUInt8, tensor_id)); + TF_LITE_ENSURE_STATUS(AddCastOp(context, OP_Quantized_CastInt8ToUInt8, + tensor_id, /*cast_op_builder=*/nullptr)); } ++num_inputs; } @@ -215,8 +298,8 @@ TfLiteStatus GraphBuilder::AddOutputTensors( const auto& tensor = context->tensors[tensor_id]; // If tensor is of type int8, add an op to cast it to uint8. if (tensor.type == kTfLiteInt8) { - TF_LITE_ENSURE_STATUS( - AddCastOp(context, OP_Quantized_CastUInt8ToInt8, tensor_id)); + TF_LITE_ENSURE_STATUS(AddCastOp(context, OP_Quantized_CastUInt8ToInt8, + tensor_id, /*cast_op_builder=*/nullptr)); } hexagon_output_ids.push_back(GetHexagonTensorId(tensor_id)); } @@ -231,9 +314,10 @@ TfLiteStatus GraphBuilder::AddOutputTensors( return kTfLiteOk; } -OpBuilder::TensorID OpBuilder::AddOutput(const TfLiteIntArray* dims) { +OpBuilder::TensorID OpBuilder::AddOutput(const TfLiteIntArray* dims, + int element_size) { op_node_.outputs.push_back(hexagon_nn_output()); - op_node_.outputs.back().elementsize = sizeof(uint8_t); + op_node_.outputs.back().elementsize = element_size; op_node_.outputs.back().rank = 4; // TODO(karimnosseir): What is a good to estimate the max size ? int batch_size, height_size, width_size, depth_size; diff --git a/tensorflow/lite/delegates/hexagon/builders/op_builder.h b/tensorflow/lite/delegates/hexagon/builders/op_builder.h index 52b130c756f..c2a2889b142 100644 --- a/tensorflow/lite/delegates/hexagon/builders/op_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/op_builder.h @@ -16,6 +16,7 @@ limitations under the License. #define TENSORFLOW_LITE_DELEGATES_HEXAGON_BUILDERS_OP_BUILDER_H_ #include +#include #include #include #include @@ -131,9 +132,9 @@ class OpBuilder { void AddInput(const TensorID& tensor_id) { input_ids_.push_back(tensor_id); } // Adds Output to the current node, the output has shape defined in 'dims'. - // This assumes the data type is uint8. + // The size of each element is defined using 'element_size'. // Returns the TensorID identifying this output in the graph. - TensorID AddOutput(const TfLiteIntArray* dims); + TensorID AddOutput(const TfLiteIntArray* dims, int element_size); // Adds Output to the current node, each element in the output has // size 'elementsize' and rank 'rank' and for each dimension in the output @@ -316,11 +317,22 @@ class GraphBuilder { bool AddTensorWithID(int tflite_tensor_id, int hexagon_node_id, int hexagon_node_output_id, bool overwrite = false) { if (!overwrite && HasTensor(tflite_tensor_id)) { + TF_LITE_KERNEL_LOG( + context_, + "Trying to add duplicate tensor without overwrite, tflite_tensor_id " + "%d, hexagon_node_id %d, hexagon_node_output_id %d", + tflite_tensor_id, hexagon_node_id, hexagon_node_output_id); return false; } if (tensors_.size() <= tflite_tensor_id) { tensors_.resize(tflite_tensor_id + 1); } + if (hexagon_node_id == -1 || hexagon_node_output_id == -1) + TF_LITE_KERNEL_LOG(context_, + "Trying to add invalid id, tflite_tensor_id " + "%d, hexagon_node_id %d, hexagon_node_output_id %d", + tflite_tensor_id, hexagon_node_id, + hexagon_node_output_id); tensors_[tflite_tensor_id] = OpBuilder::TensorID(hexagon_node_id, hexagon_node_output_id); return true; @@ -348,6 +360,14 @@ class GraphBuilder { int GetMaxBatchSize() const { return max_size_for_batch_; } private: + // Lookup in cache if data with key 'cache_key' is present. + // Return OpBuilder* for the data if found, nullptr otherwise. + OpBuilder* LookupConstData(uint64_t cache_key); + + // Inserts 'value' in cache, with key equals 'cache_key'. + // If data in cache with same key then it will be overwritten. + void AddToCache(uint64_t cache_key, OpBuilder* value); + // Helper method to fetch dimensions. // TODO(karimnosseir): Move this method to shared place. void GetDims(int* batch_size, int* height_size, int* width_size, @@ -360,7 +380,10 @@ class GraphBuilder { } // Adds a Cast op to convert a tensor from int8 to uint8 (or vice versa). - TfLiteStatus AddCastOp(TfLiteContext* context, int op_type, int tensor_id); + // The builder which has the casting operator is filled in 'cast_op_builder' + // if not nullptr. + TfLiteStatus AddCastOp(TfLiteContext* context, int op_type, int tensor_id, + OpBuilder** cast_op_builder); const HexagonNN* hexagon_nn_ = nullptr; TfLiteContext* context_ = nullptr; @@ -373,6 +396,11 @@ class GraphBuilder { // If the graph being built supports dynamic batch, this represents // the maximum value for batch. int max_size_for_batch_ = -1; + + // Cache for const data in the graph. + // Key is hash of the data, value is pointer to the OpBuilder* for the added + // data. + std::map cache_; }; } // namespace hexagon diff --git a/tensorflow/lite/delegates/hexagon/builders/transpose_builder.cc b/tensorflow/lite/delegates/hexagon/builders/transpose_builder.cc index 4a7304d011e..eb0c2668edc 100644 --- a/tensorflow/lite/delegates/hexagon/builders/transpose_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/transpose_builder.cc @@ -29,15 +29,7 @@ TfLiteStatus TransposeOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs, const auto& input_tensor = context->tensors[tensor_id]; AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); // permutation tensor. - tensor_id = inputs->data[1]; - const auto& control_tensor = context->tensors[tensor_id]; - if (control_tensor.allocation_type == kTfLiteMmapRo) { - auto* const_control_tensor_node = - graph_builder_->AddConstNodeWithData(tensor_id, control_tensor); - AddInput(TensorID(const_control_tensor_node->GetID(), 0)); - } else { - AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); - } + AddInput(graph_builder_->GetHexagonTensorId(inputs->data[1])); TF_LITE_ENSURE_STATUS(ComputeAndAddMinAndMax(context, input_tensor)); diff --git a/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.cc b/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.cc index d2620f71007..3e852533394 100644 --- a/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.cc +++ b/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.cc @@ -97,8 +97,6 @@ TfLiteStatus TransposeConv2dOpBuilder::PopulateSubGraph( filter_depth_size; GetDims(&filter_batch_size, &filter_height_size, &filter_width_size, &filter_depth_size, weights_tensor.dims); - weight_shape_ = {filter_batch_size, filter_height_size, filter_width_size, - filter_depth_size}; // Weights tensor could be int8 even for per-tensor quantization. // Therefore, we look at the number of scale values to check if it is // per-channel quantized. @@ -106,25 +104,7 @@ TfLiteStatus TransposeConv2dOpBuilder::PopulateSubGraph( reinterpret_cast( weights_tensor.quantization.params); const bool is_per_channel_quant = weights_quant_params->scale->size > 1; - - OpBuilder* const_weights_node; - if (weights_tensor.type == kTfLiteInt8) { - std::vector weights_data(NumElements(&weights_tensor)); - const int8_t* original_data = weights_tensor.data.int8; - // Flip bits on the weight values so that the int8 values are treated - // as uint8. - for (int i = 0; i < NumElements(&weights_tensor); ++i) { - weights_data[i] = original_data[i] ^ k8BitSignFlipConstant; - } - const_weights_node = graph_builder_->AddConstNodeWithData( - weight_shape_.data(), reinterpret_cast(weights_data.data()), - weights_data.size() * sizeof(weights_data[0])); - } else { - const_weights_node = graph_builder_->AddConstNodeWithData( - weight_shape_.data(), weights_tensor.data.raw, weights_tensor.bytes); - } - graph_builder_->AddTensorWithID(tensor_id, const_weights_node->GetID(), 0); - AddInput(TensorID(const_weights_node->GetID(), 0)); + AddInput(graph_builder_->GetHexagonTensorId(tensor_id)); // Handle weights quantization. float weights_min = 0; diff --git a/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.h b/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.h index 0a6a90a0297..4afab9894f0 100644 --- a/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.h +++ b/tensorflow/lite/delegates/hexagon/builders/transpose_conv_2d_builder.h @@ -47,7 +47,7 @@ class TransposeConv2dOpBuilder : public OpBuilder { TensorID node_output_; std::vector transposed_weights_; std::vector stride_shape_; - std::vector weight_shape_, bias_shape_; + std::vector bias_shape_; std::vector bias_data_; // Non-null only if node has per-channel quantized weights/biases. diff --git a/tensorflow/lite/delegates/hexagon/hexagon_delegate_kernel.cc b/tensorflow/lite/delegates/hexagon/hexagon_delegate_kernel.cc index cdf6b555929..83ebc15510e 100644 --- a/tensorflow/lite/delegates/hexagon/hexagon_delegate_kernel.cc +++ b/tensorflow/lite/delegates/hexagon/hexagon_delegate_kernel.cc @@ -264,8 +264,9 @@ TfLiteStatus HexagonDelegateKernel::BuildGraph( if (tensor_id == -1) continue; const auto& input_tensor = context->tensors[tensor_id]; if (input_tensor.allocation_type == kTfLiteMmapRo) { - builder_->AddConstNodeWithData(tensor_id, input_tensor, - /*int8_to_uint8*/ true); + builder_->AddConstNodeWithData( + tensor_id, input_tensor, + /*int8_to_uint8*/ (input_tensor.type == kTfLiteInt8)); } } auto* op_builder = From a879f1c1fb59d9c741a5eef5f614343f1d794eef Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Tue, 11 Aug 2020 10:26:24 -0700 Subject: [PATCH 2513/2522] [MLIR] Extend ResourceAliasAnalysis to handle calls and region based control flow - Extend the analysis to handle IfRegion, WhileRegion, and calls, propagating aliases through passthrough values. - Fix input -> output propagation to use AddValueUniqueIDMapping() so that both maps get correctly updated. - Fix GetResourceAliases() to add all unknown resource values to the set of aliases for any given value. - Add a new transform to annotate the IR with resource alias analysis info and use this pass to add a new unit test for resource alias analysis. PiperOrigin-RevId: 326048726 Change-Id: I92f6962dcece6ee55df236507fac20aee795157f --- tensorflow/compiler/mlir/tensorflow/BUILD | 1 + .../analysis/resource_alias_analysis.cc | 190 +++++++++----- .../analysis/resource_alias_analysis.h | 14 +- .../tests/resource-alias-analysis-test.mlir | 234 ++++++++++++++++++ .../test_resource_alias_analysis.cc | 111 +++++++++ 5 files changed, 483 insertions(+), 67 deletions(-) create mode 100644 tensorflow/compiler/mlir/tensorflow/tests/resource-alias-analysis-test.mlir create mode 100644 tensorflow/compiler/mlir/tensorflow/transforms/test_resource_alias_analysis.cc diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index 3ee591ce46a..d2e57f72774 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -776,6 +776,7 @@ cc_library( "transforms/stack_ops_decomposition.cc", "transforms/tensor_array_ops_decomposition.cc", "transforms/tensor_list_ops_decomposition.cc", + "transforms/test_resource_alias_analysis.cc", "transforms/test_side_effect_analysis.cc", "transforms/tf_data_optimization_pass.cc", "transforms/tf_device_assignment.cc", diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.cc b/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.cc index 3278c06fabe..53de595eef2 100644 --- a/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.cc +++ b/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.cc @@ -34,12 +34,12 @@ limitations under the License. #include "mlir/IR/Operation.h" // from @llvm-project #include "mlir/IR/StandardTypes.h" // from @llvm-project #include "mlir/IR/Value.h" // from @llvm-project +#include "mlir/IR/Visitors.h" // from @llvm-project #include "mlir/Support/LLVM.h" // from @llvm-project #include "mlir/Support/LogicalResult.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" -#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" #include "tensorflow/compiler/tf2xla/resource_operation_table.h" #include "tensorflow/core/framework/resource_mgr.h" @@ -161,7 +161,7 @@ Value BacktrackAnalysis::BacktrackValue(Value value) { // in the Island body. if (value == island.control()) break; value = island.GetYield().getOperand(res_index); - } else if (isa(op)) { + } else if (isa(op)) { value = op->getOperand(res_index); } else { break; @@ -196,12 +196,12 @@ constexpr char kResourceArgUniqueIdAttr[] = "tf._resource_arg_unique_id"; // Returns if a VarHandleOp is anonymous, which means it always creates a new // variable. -bool IsResourceHandleAnonymous(TF::VarHandleOp handle) { +bool IsResourceHandleAnonymous(VarHandleOp handle) { return handle.shared_name() == tensorflow::ResourceHandle::ANONYMOUS_NAME; } // Returns a string unique identifier for a non-anonymous VarHandleOp. -std::string GetVarHandleStringId(TF::VarHandleOp handle) { +std::string GetVarHandleStringId(VarHandleOp handle) { auto device = handle.getAttrOfType("device"); return absl::StrCat(handle.container().str(), "/", handle.shared_name().str(), "/", device ? device.getValue().str() : std::string("")); @@ -210,7 +210,7 @@ std::string GetVarHandleStringId(TF::VarHandleOp handle) { // Finds a unique ID for a VarHandleOp's output. If it is anonymous, always // creates a new ID; otherwise, tries to reuse the existing ID for the // referenced variable if it exists, or creates a new one if not. -int64_t GetOrCreateIdForVarHandle(TF::VarHandleOp handle, int64_t* next_id, +int64_t GetOrCreateIdForVarHandle(VarHandleOp handle, int64_t* next_id, llvm::StringMap* name_id_map) { // Always create a new ID for anonymous handle. if (IsResourceHandleAnonymous(handle)) return (*next_id)++; @@ -234,121 +234,173 @@ ResourceAliasAnalysisInfo::ResourceAliasAnalysisInfo( FuncOp func_op, const detail::BacktrackAnalysis& backtrack_analysis) { // This function populates resource_value_to_ids_ and id_to_resource_values_. + int64_t next_unique_id = 0; + + // Helper to assign new unique id for all resources in the given list of + // values. + auto assign_unique_id_to_all = [&](ValueRange values) { + for (Value value : filter_resources(values)) { + AddValueUniqueIDMapping(value, next_unique_id++); + } + }; + + // Helper to assign new unknown id for all resources in the given list of + // values. + auto assign_unknown_id_to_all = [&](ValueRange values) { + for (Value value : filter_resources(values)) { + AddValueUniqueIDMapping(value, kUnknownResourceId); + } + }; + // If the "tf.resource_arg_unique_id" argument attributes are present for // resource-type arguments, respect them when choosing IDs; otherwise, they // must not alias. - int64_t next_unique_id = 0; const bool has_arg_unique_id_attrs = llvm::any_of(func_op.getArguments(), [&](const BlockArgument& arg) { return func_op.getArgAttr(arg.getArgNumber(), kResourceArgUniqueIdAttr); }); // Maps the kResourceArgUniqueIdAttr attribute value to the internal integer // ID used by this pass. - llvm::SmallDenseMap attr_id_to_internal_id; - for (auto arg : func_op.getArguments()) { - if (!mlir::getElementTypeOrSelf(arg.getType()).isa()) - continue; - if (has_arg_unique_id_attrs) { + if (has_arg_unique_id_attrs) { + llvm::SmallDenseMap attr_id_to_internal_id; + for (auto arg : filter_resources(func_op.getArguments())) { auto id_attr = func_op.getArgAttrOfType( arg.getArgNumber(), kResourceArgUniqueIdAttr); assert(id_attr && - "tf.resource_arg_unique_id attribute should exist on either none " - "or all arguments."); + "tf.resource_arg_unique_id attribute should exist on either " + "none or all arguments."); auto emplace_res = attr_id_to_internal_id.try_emplace(id_attr.getInt(), next_unique_id++); AddValueUniqueIDMapping(arg, emplace_res.first->getSecond()); - } else { - AddValueUniqueIDMapping(arg, next_unique_id++); } + } else { + assign_unique_id_to_all(func_op.getArguments()); } + + // Since this analysis is neither inter-procedural nor inter-regional, + // each region attached to Op's within a function is analyzed independently. + // Seed this analysis for each such region by mapping all resource arguments + // for such regions to a new unique-id. This is required because walk() walks + // the attached regions first before visiting the op, so there is no + // opportunity during the walk to seed region arguments. Also note that walk + // eventually also visits the Op on which the walk() is called, so make sure + // we do not overwrite the function argument mapping here. + func_op.walk([&](Operation* op) { + if (op == func_op) return; + for (Region& region : op->getRegions()) { + assign_unique_id_to_all(region.getArguments()); + } + }); + llvm::StringMap var_handle_name_id_map; auto forward_input_to_output = [&](const Value& operand, - const Value& result) { - if (!mlir::getElementTypeOrSelf(result.getType()).isa()) - return; - auto& result_ids = resource_value_to_ids_[result]; + const OpResult& result) { auto operand_it = resource_value_to_ids_.find(operand); assert(operand_it != resource_value_to_ids_.end() && "A resource-type output does not have the corresponding " "resource-type input."); - result_ids.insert(operand_it->getSecond().begin(), - operand_it->getSecond().end()); + for (int64_t id : operand_it->second) AddValueUniqueIDMapping(result, id); }; func_op.walk([&](Operation* op) { - if (auto var_handle = llvm::dyn_cast(op)) { + if (auto var_handle = dyn_cast(op)) { AddValueUniqueIDMapping( var_handle.resource(), GetOrCreateIdForVarHandle(var_handle, &next_unique_id, &var_handle_name_id_map)); - } else if (llvm::isa(op)) { - for (auto operand_and_result : - llvm::zip(op->getOperands(), op->getResults())) { - forward_input_to_output(std::get<0>(operand_and_result), - std::get<1>(operand_and_result)); - } - } else if (auto replicate = llvm::dyn_cast(op)) { - // The nested block for ReplicateOp is handled separately in side-effect - // analysis. Inside that block, we can still treat its block arguments as - // different resources. - for (auto arg : replicate.GetBody().getArguments()) { - if (mlir::getElementTypeOrSelf(arg.getType()).isa()) { - AddValueUniqueIDMapping(arg, next_unique_id++); - } - } - } else if (auto while_op = llvm::dyn_cast(op)) { + } else if (llvm::isa(op)) { + for (auto result : filter_resources(op->getResults())) + forward_input_to_output(op->getOperand(result.getResultNumber()), + result); + } else if (auto while_op = dyn_cast(op)) { const auto& body_info = backtrack_analysis.GetAnalysisForFunc(while_op.body_func()); // If a result is a passthrough of the body input, use the corresponding // operand's resource IDs. - for (auto result : llvm::enumerate(while_op.getResults())) { - if (!mlir::getElementTypeOrSelf(result.value().getType()) - .isa()) { - continue; - } - auto passthrough_arg = body_info.GetArg(result.index()); + for (auto result : filter_resources(while_op.getResults())) { + auto passthrough_arg = body_info.GetArg(result.getResultNumber()); if (passthrough_arg) { forward_input_to_output( - while_op.getOperand(passthrough_arg.getValue()), result.value()); + while_op.getOperand(passthrough_arg.getValue()), result); } else { - AddValueUniqueIDMapping(result.value(), kUnknownResourceId); + AddValueUniqueIDMapping(result, kUnknownResourceId); } } - } else if (auto if_op = llvm::dyn_cast(op)) { + } else if (auto while_region = dyn_cast(op)) { + const auto& body_info = + backtrack_analysis.GetAnalysisForRegion(while_region.body()); + // If a result is a passthrough of the body input, use the corresponding + // operand's resource IDs. + for (auto result : filter_resources(while_region.getResults())) { + auto passthrough_arg = body_info.GetArg(result.getResultNumber()); + if (passthrough_arg) { + forward_input_to_output( + while_region.getOperand(passthrough_arg.getValue()), result); + } else { + AddValueUniqueIDMapping(result, kUnknownResourceId); + } + } + } else if (auto if_op = dyn_cast(op)) { const auto& then_info = backtrack_analysis.GetAnalysisForFunc(if_op.then_func()); const auto& else_info = backtrack_analysis.GetAnalysisForFunc(if_op.else_func()); // If a result is a passthrough of both branches' inputs, merge the // resource IDs of corresponding operands for the two inputs. - for (auto result : llvm::enumerate(if_op.getResults())) { - if (!mlir::getElementTypeOrSelf(result.value().getType()) - .isa()) { - continue; - } - auto passthrough_then_arg = then_info.GetArg(result.index()); - auto passthrough_else_arg = else_info.GetArg(result.index()); + for (auto result : filter_resources(if_op.getResults())) { + auto passthrough_then_arg = then_info.GetArg(result.getResultNumber()); + auto passthrough_else_arg = else_info.GetArg(result.getResultNumber()); if (passthrough_then_arg && passthrough_else_arg) { Value then_operand = if_op.input()[passthrough_then_arg.getValue()]; Value else_operand = if_op.input()[passthrough_else_arg.getValue()]; - forward_input_to_output(then_operand, result.value()); - forward_input_to_output(else_operand, result.value()); + forward_input_to_output(then_operand, result); + forward_input_to_output(else_operand, result); } else { - AddValueUniqueIDMapping(result.value(), kUnknownResourceId); + AddValueUniqueIDMapping(result, kUnknownResourceId); + } + } + } else if (auto if_region = dyn_cast(op)) { + const auto& then_info = + backtrack_analysis.GetAnalysisForRegion(if_region.then_branch()); + const auto& else_info = + backtrack_analysis.GetAnalysisForRegion(if_region.else_branch()); + for (auto result : filter_resources(if_region.getResults())) { + Value then_result = then_info.GetValue(result.getResultNumber()); + Value else_result = else_info.GetValue(result.getResultNumber()); + // For IfRegion, the walk would have visited the else and then regions + // before visiting the IfRegion op. Backtracking of the then and else + // results will either give a value computed within these regions, + // or a region capture. If its a region capture, computed before this + // IfRegion, it will have been visited earlier and a mapping would + // exist for that value. If its computed within the region, then again + // a mapping would exist. + forward_input_to_output(then_result, result); + forward_input_to_output(else_result, result); + } + } else if (auto call = dyn_cast(op)) { + FuncOp func = dyn_cast(call.resolveCallable()); + if (!func) { + assign_unknown_id_to_all(op->getResults()); + return WalkResult::advance(); + } + const auto& func_info = backtrack_analysis.GetAnalysisForFunc(func); + for (auto result : filter_resources(op->getResults())) { + auto passthrough_arg = func_info.GetArg(result.getResultNumber()); + if (passthrough_arg) { + forward_input_to_output( + call.getArgOperands()[passthrough_arg.getValue()], result); + } else { + AddValueUniqueIDMapping(result, kUnknownResourceId); } } } else { - for (auto result : op->getResults()) { - if (!mlir::getElementTypeOrSelf(result.getType()) - .isa()) - continue; - AddValueUniqueIDMapping(result, kUnknownResourceId); - } + assign_unknown_id_to_all(op->getResults()); } + return WalkResult::advance(); }); } -bool ResourceAliasAnalysisInfo::IsUnknownResource(const Value resource) const { +bool ResourceAliasAnalysisInfo::IsUnknownResource(Value resource) const { auto it = resource_value_to_ids_.find(resource); assert(it != resource_value_to_ids_.end() && !it->getSecond().empty()); // The set is sorted so we only need to check the first element since @@ -360,6 +412,7 @@ bool ResourceAliasAnalysisInfo::IsUnknownResource(const Value resource) const { const llvm::SmallSet& ResourceAliasAnalysisInfo::GetResourceUniqueIds(Value resource) const { + assert(!IsUnknownResource(resource)); auto it = resource_value_to_ids_.find(resource); assert(it != resource_value_to_ids_.end() && "Unseen resource was queried"); return it->getSecond(); @@ -373,14 +426,19 @@ ResourceAliasAnalysisInfo::GetUniqueIdResources(const int64_t id) const { } llvm::SmallSetVector ResourceAliasAnalysisInfo::GetResourceAliases( - const Value resource) const { - assert(!IsUnknownResource(resource) && "Unseen resource was queried"); + Value resource) const { + assert(!IsUnknownResource(resource) && "Unknown resource was queried"); llvm::SmallSetVector aliases; for (int64_t id : GetResourceUniqueIds(resource)) { const llvm::SmallSetVector& resources_aliasing_id = GetUniqueIdResources(id); aliases.insert(resources_aliasing_id.begin(), resources_aliasing_id.end()); } + // If there are resources that were marked as unknown, they alias with all + // other resources. + auto it = id_to_resource_values_.find(kUnknownResourceId); + if (it != id_to_resource_values_.end()) + aliases.insert(it->getSecond().begin(), it->getSecond().end()); return aliases; } diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h b/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h index 5a514a7fb64..d9fd693042f 100644 --- a/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h +++ b/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h @@ -20,13 +20,16 @@ limitations under the License. #include #include +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "mlir/IR/Operation.h" // from @llvm-project #include "mlir/IR/Region.h" // from @llvm-project +#include "mlir/IR/TypeUtilities.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/analysis/per_function_aggregate_analysis.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" namespace mlir { namespace TF { @@ -43,7 +46,7 @@ class ResourceAliasAnalysisInfo { ResourceAliasAnalysisInfo(ResourceAliasAnalysisInfo&&) = default; // Returns if the analysis fails to resolve a resource-type value. - bool IsUnknownResource(const Value resource) const; + bool IsUnknownResource(Value resource) const; // Returns the set unique IDs which `resource` could alias. Requires that // IsUnknownResource(resource) == false. @@ -91,6 +94,15 @@ class ResourceAliasAnalysis : public detail::PerFunctionAggregateAnalysis< explicit ResourceAliasAnalysis(Operation* op); }; +// Returns a range with just resource type values from the input range +// preserved. +template +auto filter_resources(RangeT&& range) { + return llvm::make_filter_range(std::forward(range), [](Value val) { + return getElementTypeOrSelf(val.getType()).isa(); + }); +} + } // namespace TF } // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/tests/resource-alias-analysis-test.mlir b/tensorflow/compiler/mlir/tensorflow/tests/resource-alias-analysis-test.mlir new file mode 100644 index 00000000000..af63f3312bc --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/tests/resource-alias-analysis-test.mlir @@ -0,0 +1,234 @@ +// RUN: tf-opt -split-input-file -tf-test-resource-alias-analysis -verify-diagnostics %s | FileCheck %s + +// Test 2 resources that do not alias. + +!tf_res = type tensor<*x!tf.resource>> +// CHECK-LABEL: func @non_aliasing_reads_writes +// expected-remark@below {{Region #0, Arg #0, ID 1 : 1}} +// expected-remark@below {{Region #0, Arg #1, ID 2 : 2}} +func @non_aliasing_reads_writes( + %arg0: !tf_res, + %arg1: !tf_res, + %arg2: tensor<32xf32>) -> (tensor<32xf32>) { + %graph = tf_executor.graph { + // CHECK: tf_executor.island + %island:2 = tf_executor.island { + %read0 = "tf.ReadVariableOp"(%arg0) : (!tf_res) -> tensor<32xf32> + "tf.AssignVariableOp"(%arg0, %arg2) : (!tf_res, tensor<32xf32>) -> () + %read1 = "tf.ReadVariableOp"(%arg1) : (!tf_res) -> tensor<32xf32> + // expected-remark@below {{Result #0, ID 0 : 0}} + %var_handle = "tf.VarHandleOp"() {container = "c", shared_name = "v0"} : () -> !tf_res + %read2 = "tf.ReadVariableOp"(%var_handle) : (!tf_res) -> tensor<32xf32> + "tf.AssignVariableOp"(%arg1, %read0) : (!tf_res, tensor<32xf32>) -> () + "tf.AssignVariableOp"(%arg0, %read2) : (!tf_res, tensor<32xf32>) -> () + %read3 = "tf.ReadVariableOp"(%arg0) : (!tf_res) -> tensor<32xf32> + tf_executor.yield %read3 : tensor<32xf32> + } + tf_executor.fetch %island#0 : tensor<32xf32> + } + return %graph : tensor<32xf32> +} + +// ----- +// Tests aliasing of the two resource handles that refer to the same variable. + +!tf_res = type tensor<*x!tf.resource>> +// CHECK-LABEL: func @aliasing_reads_writes +func @aliasing_reads_writes(%arg0: tensor<32xf32>) -> () { + tf_executor.graph { + // CHECK: tf_executor.island + %island = tf_executor.island { + // expected-remark@below {{Result #0, ID 0 : 0, 1, 2}} + %vh0 = "tf.VarHandleOp"() {container = "c", shared_name = "v0"} : () -> !tf_res + // expected-remark@below {{Result #0, ID 1 : 0, 1, 2}} + %vh1 = "tf.VarHandleOp"() {container = "c", shared_name = "v0"} : () -> !tf_res + // expected-remark@below {{Result #0, ID 2 : 0, 1, 2}} + %vh1_id:2 = "tf.IdentityN"(%vh1, %arg0) : (!tf_res, tensor<32xf32>) -> (!tf_res, tensor<32xf32>) + %read0 = "tf.ReadVariableOp"(%vh0) : (!tf_res) -> tensor<32xf32> + "tf.AssignVariableOp"(%vh1_id#0, %arg0) : (!tf_res, tensor<32xf32>) -> () + %read1 = "tf.ReadVariableOp"(%vh0) : (!tf_res) -> tensor<32xf32> + %read2 = "tf.ReadVariableOp"(%vh1) : (!tf_res) -> tensor<32xf32> + "tf.AssignVariableOp"(%vh0, %read2) : (!tf_res, tensor<32xf32>) -> () + "tf.AssignVariableOp"(%vh1_id#0, %read1) : (!tf_res, tensor<32xf32>) -> () + tf_executor.yield + } + tf_executor.fetch %island : !tf_executor.control + } + return +} + +// ----- +// Test an unknown op that has a resource result is marked unknown + +!tf_res = type tensor<*x!tf.resource>> +// CHECK-LABEL: func @unknown_resource_op +func @unknown_resource_op(%arg0: tensor<32xf32>) -> () { + // expected-remark@below {{Result #0, ID 0 : Unknown}} + %0 = "tf.UnknownVarHandleOp"() : () -> !tf_res +} + +// ----- +// Test aliasing through IfOp + +!tf_res = type tensor<*x!tf.resource>> + +// CHECK-LABEL: func @if_op_aliasing +// expected-remark@below {{Region #0, Arg #0, ID 4 : 1, 4}} +// expected-remark@below {{Region #0, Arg #1, ID 5 : 1, 2, 3, 5}} +func @if_op_aliasing(%arg0: !tf_res, %arg1: !tf_res) { + // expected-remark@below {{Result #0, ID 0 : 0}} + %vh0 = "tf.VarHandleOp"() {container = "c", shared_name = "v0"} : () -> !tf_res + %read0 = "tf.ReadVariableOp"(%vh0) : (!tf_res) -> tensor<32xf32> + // expected-remark@below {{Result #0, ID 1 : Unknown}} + // expected-remark@below {{Result #1, ID 2 : 1, 2, 3, 5}} + // expected-remark@below {{Result #2, ID 3 : 0, 1, 2, 3, 5}} + %if:3 = "tf.If"(%read0, %arg1, %vh0) { + then_branch = @if_then, else_branch = @if_else, is_stateless = true + } : (tensor<32xf32>, !tf_res, !tf_res) -> (!tf_res, !tf_res, !tf_res) + return +} + +// expected-remark@below {{Region #0, Arg #0, ID 2 : 0, 1, 2}} +// expected-remark@below {{Region #0, Arg #1, ID 3 : 0, 3}} +func @if_then(%arg0: !tf_res, %arg1: !tf_res) -> (!tf_res, !tf_res, !tf_res) { + // expected-remark@below {{Result #0, ID 0 : Unknown}} + %u0 = "tf._UnknownSideEffectingOp_"() : () -> !tf_res + // expected-remark@below {{Result #0, ID 1 : 0, 1, 2}} + %id0 = "tf.Identity"(%arg0) : (!tf_res) -> !tf_res + return %u0, %id0, %id0 : !tf_res, !tf_res, !tf_res +} + +// expected-remark@below {{Region #0, Arg #0, ID 1 : 0, 1}} +// expected-remark@below {{Region #0, Arg #1, ID 2 : 2}} +func @if_else(%arg0: !tf_res, %arg1: !tf_res) -> (!tf_res, !tf_res, !tf_res) { + // expected-remark@below {{Result #0, ID 0 : 0, 1}} + %id0 = "tf.Identity"(%arg0) : (!tf_res) -> !tf_res + return %id0, %id0, %arg1 : !tf_res, !tf_res, !tf_res +} + +// ----- +// Test aliasing through WhileOp +!tf_res = type tensor<*x!tf.resource>> + +// CHECK-LABEL: func @while_op_aliasing +// expected-remark@below {{Region #0, Arg #0, ID 4 : 1, 4}} +// expected-remark@below {{Region #0, Arg #1, ID 5 : 1, 3, 5}} +// expected-remark@below {{Region #0, Arg #2, ID 6 : 1, 2, 6}} +func @while_op_aliasing(%arg0: !tf_res, %arg1: !tf_res, %arg2: !tf_res) { + // expected-remark@below {{Result #0, ID 0 : 0}} + %vh0 = "tf.VarHandleOp"() {container = "c", shared_name = "v0"} : () -> !tf_res + // expected-remark@below {{Result #0, ID 1 : Unknown}} + // expected-remark@below {{Result #1, ID 2 : 1, 2, 6}} + // expected-remark@below {{Result #2, ID 3 : 1, 3, 5}} + %w:3 = "tf.While"(%arg0, %arg1, %arg2) { + body = @while_body, cond = @while_cond, is_stateless = false + } : (!tf_res, !tf_res, !tf_res) -> (!tf_res, !tf_res, !tf_res) + return +} + +// CHECK-LABEL: func @while_body +// Return 0 : new unknown resource +// Return 1 : arg2 +// Return 2 : arg1 +// expected-remark@below {{Region #0, Arg #0, ID 1 : 0, 1}} +// expected-remark@below {{Region #0, Arg #1, ID 2 : 0, 2}} +// expected-remark@below {{Region #0, Arg #2, ID 3 : 0, 3}} +func @while_body(%arg0: !tf_res, %arg1: !tf_res, %arg2: !tf_res) -> (!tf_res, !tf_res, !tf_res) { + // expected-remark@below {{Result #0, ID 0 : Unknown}} + %u0 = "tf._UnknownSideEffectingOp_"() : () -> !tf_res + return %u0, %arg2, %arg1 : !tf_res, !tf_res, !tf_res +} + +// CHECK-LABEL: func @while_cond +// expected-remark@below {{Region #0, Arg #0, ID 0 : 0}} +// expected-remark@below {{Region #0, Arg #1, ID 1 : 1}} +// expected-remark@below {{Region #0, Arg #2, ID 2 : 2}} +func @while_cond(%arg0: !tf_res, %arg1: !tf_res, %arg2: !tf_res) -> tensor { + %0 = constant dense : tensor + return %0 : tensor +} + +// ----- +// Test alias propagation through calls. +!tf_res = type tensor<*x!tf.resource>> +// CHECK-LABEL: func @aliasing_through_calls +func @aliasing_through_calls(%arg0: tensor<32xf32>) -> () { + // expected-remark@below {{Result #0, ID 0 : 0, 1, 2, 3}} + %vh0 = "tf.VarHandleOp"() {container = "c", shared_name = "v0"} : () -> !tf_res + // expected-remark@below {{Result #0, ID 1 : 0, 1, 2, 3}} + %vh1 = "tf.Identity"(%vh0) : (!tf_res) -> (!tf_res) + // expected-remark@below {{Result #0, ID 2 : Unknown}} + // expected-remark@below {{Result #1, ID 3 : 0, 1, 2, 3}} + %c:2 = call @passthru(%vh1) : (!tf_res) -> (!tf_res, !tf_res) + return +} + +// expected-remark@below {{Region #0, Arg #0, ID 1 : 1}} +func @passthru(%arg0: !tf_res) -> (!tf_res, !tf_res) { + // expected-remark@below {{Result #0, ID 0 : 0}} + %vx = "tf.VarHandleOp"() {container = "cf", shared_name = "vx"} : () -> !tf_res + return %vx, %arg0 : !tf_res, !tf_res +} + +// ----- +// Test aliasing through IfRegion + +!tf_res = type tensor<*x!tf.resource>> + +// CHECK-LABEL: func @if_region_aliasing +// expected-remark@below {{Region #0, Arg #0, ID 7 : 1, 4, 6, 7}} +// expected-remark@below {{Region #0, Arg #1, ID 8 : 1, 2, 4, 5, 6, 8}} +func @if_region_aliasing(%arg0: !tf_res, %arg1: !tf_res) { + // expected-remark@below {{Result #0, ID 0 : 0, 1, 3, 4, 5}} + %vh0 = "tf.VarHandleOp"() {container = "c", shared_name = "v0"} : () -> !tf_res + %read0 = "tf.ReadVariableOp"(%vh0) : (!tf_res) -> tensor<32xf32> + // expected-remark@below {{Result #0, ID 4 : Unknown}} + // expected-remark@below {{Result #1, ID 5 : 0, 1, 2, 3, 4, 5, 6, 8}} + // expected-remark@below {{Result #2, ID 6 : 1, 2, 4, 5, 6, 7, 8}} + %if:3 = "tf.IfRegion"(%read0) ({ + // expected-remark@below {{Result #0, ID 1 : Unknown}} + %u0 = "tf._UnknownSideEffectingOp_"() : () -> !tf_res + // expected-remark@below {{Result #0, ID 2 : 1, 2, 4, 5, 6, 8}} + %id0 = "tf.Identity"(%arg1) : (!tf_res) -> !tf_res + "tf.Yield"(%u0, %id0, %id0) : (!tf_res, !tf_res, !tf_res) -> () + }, { + // expected-remark@below {{Result #0, ID 3 : 0, 1, 3, 4, 5}} + %id0 = "tf.Identity"(%vh0) : (!tf_res) -> !tf_res + "tf.Yield"(%id0, %id0, %arg0) : (!tf_res, !tf_res, !tf_res) -> () + }) {is_stateless = true} : (tensor<32xf32>) -> (!tf_res, !tf_res, !tf_res) + return +} + +// ----- +// Test aliasing through WhileRegion +!tf_res = type tensor<*x!tf.resource>> + +// CHECK-LABEL: func @while_region_aliasing +// expected-remark@below {{Region #0, Arg #0, ID 11 : 1, 8, 11}} +// expected-remark@below {{Region #0, Arg #1, ID 12 : 1, 8, 10, 12}} +// expected-remark@below {{Region #0, Arg #2, ID 13 : 1, 8, 9, 13}} +func @while_region_aliasing(%arg0: !tf_res, %arg1: !tf_res, %arg2: !tf_res) { + // expected-remark@below {{Result #0, ID 0 : 0, 1, 8}} + %vh0 = "tf.VarHandleOp"() {container = "c", shared_name = "v0"} : () -> !tf_res + // expected-remark@below {{Result #0, ID 8 : Unknown}} + // expected-remark@below {{Result #1, ID 9 : 1, 8, 9, 13}} + // expected-remark@below {{Result #2, ID 10 : 1, 8, 10, 12}} + // expected-remark@below {{Region #0, Arg #0, ID 2 : 1, 2, 8}} + // expected-remark@below {{Region #0, Arg #1, ID 3 : 1, 3, 8}} + // expected-remark@below {{Region #0, Arg #2, ID 4 : 1, 4, 8}} + // expected-remark@below {{Region #1, Arg #0, ID 5 : 1, 5, 8}} + // expected-remark@below {{Region #1, Arg #1, ID 6 : 1, 6, 8}} + // expected-remark@below {{Region #1, Arg #2, ID 7 : 1, 7, 8}} + %w:3 = "tf.WhileRegion"(%arg0, %arg1, %arg2) ({ + ^bb0(%carg0: !tf_res, %carg1: !tf_res, %carg2: !tf_res): + %0 = constant dense : tensor + "tf.Yield"(%0) : (tensor) -> () + },{ + ^bb0(%barg0: !tf_res, %barg1: !tf_res, %barg2: !tf_res): + // expected-remark@below {{Result #0, ID 1 : Unknown}} + %u0 = "tf._UnknownSideEffectingOp_"() : () -> !tf_res + "tf.Yield"(%u0, %barg2, %barg1) : (!tf_res, !tf_res, !tf_res) -> () + }) {is_stateless = false} : (!tf_res, !tf_res, !tf_res) -> (!tf_res, !tf_res, !tf_res) + return +} + diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/test_resource_alias_analysis.cc b/tensorflow/compiler/mlir/tensorflow/transforms/test_resource_alias_analysis.cc new file mode 100644 index 00000000000..920b2024c0f --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/transforms/test_resource_alias_analysis.cc @@ -0,0 +1,111 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include +#include + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "mlir/Pass/PassManager.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project +#include "mlir/Transforms/Passes.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" + +namespace mlir { +namespace TF { +namespace { + +// A pass that annotates each operation with a resource type result with the +// aliasing values for each such result. Each value is assigned a unique ID, and +// that ID is used to annotate the operations. +struct TestResourceAliasAnalysis + : public TF::PerFunctionAggregateAnalysisConsumerPass< + TestResourceAliasAnalysis, TF::ResourceAliasAnalysis> { + void runOnFunction(FuncOp func, + const TF::ResourceAliasAnalysis::Info& analysis) { + int64_t next_id = 0; + llvm::SmallDenseMap ids; + + auto assign_id = [&](Value value) { + if (ids.find(value) == ids.end()) ids.insert({value, next_id++}); + }; + + auto get_id = [&](Value value) -> int64_t { + auto it = ids.find(value); + assert(it != ids.end()); + return it->second; + }; + + auto print_aliases = [&](InFlightDiagnostic& diag, Value value) { + diag << ", ID " << get_id(value) << " : "; + if (analysis.IsUnknownResource(value)) { + diag << "Unknown"; + } else { + auto aliases = llvm::to_vector<4>(analysis.GetResourceAliases(value)); + llvm::sort(aliases, + [&](Value v1, Value v2) { return get_id(v1) < get_id(v2); }); + llvm::interleaveComma(aliases, diag, + [&](Value v) { diag << get_id(v); }); + } + }; + + // Assign a unique ID to each value seen in this function. + func.walk([&](Operation* op) { + // For all attached regions, assign ID to the region arguments. + for (Region& region : op->getRegions()) { + for (auto region_arg : filter_resources(region.getArguments())) + assign_id(region_arg); + } + + // Assign ID for all results. + for (auto result : filter_resources(op->getResults())) assign_id(result); + }); + + // Now walk each operation, and annotate it wil remarks for aliases for + // each resource type result + func.walk([&](Operation* op) { + // For all attached regions, assign ID to the region arguments. + for (Region& region : op->getRegions()) { + for (auto region_arg : filter_resources(region.getArguments())) { + InFlightDiagnostic diag = op->emitRemark("Region #") + << region.getRegionNumber() << ", Arg #" + << region_arg.getArgNumber(); + print_aliases(diag, region_arg); + } + } + + for (auto result : filter_resources(op->getResults())) { + InFlightDiagnostic diag = op->emitRemark("Result #") + << result.getResultNumber(); + print_aliases(diag, result); + } + }); + } +}; + +static mlir::PassRegistration pass( + "tf-test-resource-alias-analysis", + "Add remarks based on resource alias analysis result, for testing " + "purpose."); + +} // anonymous namespace +} // namespace TF +} // namespace mlir From e8598ce0454c440fca64e4ebc4aeedfa7afd5c97 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 11 Aug 2020 10:57:58 -0700 Subject: [PATCH 2514/2522] Break up tensorflow/core/kernels/BUILD (part 3 of N): Move image processing kernels to subdirectory tensorflow/core/kernels/image with its own BUILD file. PiperOrigin-RevId: 326056277 Change-Id: Ibe8813eeb36432c6220bdd14de40898fdef195f2 --- tensorflow/core/BUILD | 4 +- tensorflow/core/kernels/BUILD | 679 +++--------------- .../kernels/conv_ops_fused_image_transform.cc | 2 +- .../core/kernels/conv_ops_using_gemm.cc | 2 +- tensorflow/core/kernels/image/BUILD | 449 ++++++++++++ .../kernels/{ => image}/adjust_contrast_op.cc | 4 +- .../kernels/{ => image}/adjust_contrast_op.h | 6 +- .../adjust_contrast_op_benchmark_test.cc | 0 .../{ => image}/adjust_contrast_op_gpu.cu.cc | 3 +- .../{ => image}/adjust_contrast_op_test.cc | 0 .../kernels/{ => image}/adjust_hsv_gpu.cu.h | 6 +- .../core/kernels/{ => image}/adjust_hue_op.cc | 4 +- .../core/kernels/{ => image}/adjust_hue_op.h | 6 +- .../{ => image}/adjust_hue_op_gpu.cu.cc | 4 +- .../{ => image}/adjust_saturation_op.cc | 4 +- .../{ => image}/adjust_saturation_op.h | 6 +- .../adjust_saturation_op_gpu.cu.cc | 4 +- .../core/kernels/{ => image}/attention_ops.cc | 0 .../core/kernels/{ => image}/colorspace_op.cc | 3 +- .../core/kernels/{ => image}/colorspace_op.h | 6 +- .../{ => image}/colorspace_op_gpu.cu.cc | 2 +- .../kernels/{ => image}/colorspace_op_test.cc | 0 .../kernels/{ => image}/crop_and_resize_op.cc | 2 +- .../kernels/{ => image}/crop_and_resize_op.h | 6 +- .../crop_and_resize_op_benchmark_test.cc | 0 .../{ => image}/crop_and_resize_op_gpu.cu.cc | 2 +- .../{ => image}/crop_and_resize_op_test.cc | 0 .../kernels/{ => image}/decode_image_op.cc | 0 .../{ => image}/draw_bounding_box_op.cc | 0 .../kernels/{ => image}/encode_jpeg_op.cc | 0 .../{ => image}/encode_jpeg_op_test.cc | 0 .../core/kernels/{ => image}/encode_png_op.cc | 0 .../{ => image}/extract_image_patches_op.cc | 2 +- .../{ => image}/extract_image_patches_op.h | 6 +- .../extract_image_patches_op_gpu.cu.cc | 2 +- .../{ => image}/extract_jpeg_shape_op.cc | 0 .../{ => image}/extract_volume_patches_op.cc | 2 +- .../{ => image}/extract_volume_patches_op.h | 6 +- .../extract_volume_patches_op_gpu.cu.cc | 2 +- .../generate_box_proposals_op.cu.cc | 2 +- .../core/kernels/{ => image}/image_ops.cc | 2 +- .../core/kernels/{ => image}/image_ops.h | 6 +- .../kernels/{ => image}/image_ops_gpu.cu.cc | 2 +- .../core/kernels/{ => image}/mirror_pad_op.cc | 4 +- .../core/kernels/{ => image}/mirror_pad_op.h | 6 +- .../mirror_pad_op_benchmark_test.cc | 0 .../{ => image}/mirror_pad_op_cpu_impl.h | 11 +- .../{ => image}/mirror_pad_op_cpu_impl_1.cc | 2 +- .../{ => image}/mirror_pad_op_cpu_impl_2.cc | 2 +- .../{ => image}/mirror_pad_op_cpu_impl_3.cc | 2 +- .../{ => image}/mirror_pad_op_cpu_impl_4.cc | 2 +- .../{ => image}/mirror_pad_op_cpu_impl_5.cc | 2 +- .../{ => image}/mirror_pad_op_gpu.cu.cc | 3 +- .../kernels/{ => image}/mirror_pad_op_test.cc | 0 .../{ => image}/non_max_suppression_op.cc | 2 +- .../{ => image}/non_max_suppression_op.cu.cc | 2 +- .../{ => image}/non_max_suppression_op.h | 6 +- .../non_max_suppression_op_benchmark_test.cc | 0 .../non_max_suppression_op_gpu_test.cc | 0 .../non_max_suppression_op_test.cc | 0 .../kernels/{ => image}/random_crop_op.cc | 1 + .../kernels/{ => image}/resize_area_op.cc | 3 +- .../{ => image}/resize_area_op_test.cc | 0 .../kernels/{ => image}/resize_bicubic_op.cc | 3 +- .../{ => image}/resize_bicubic_op_test.cc | 0 .../kernels/{ => image}/resize_bilinear_op.cc | 5 +- .../kernels/{ => image}/resize_bilinear_op.h | 6 +- .../{ => image}/resize_bilinear_op_gpu.cu.cc | 2 +- .../{ => image}/resize_bilinear_op_test.cc | 0 .../{ => image}/resize_nearest_neighbor_op.cc | 4 +- .../{ => image}/resize_nearest_neighbor_op.h | 6 +- .../resize_nearest_neighbor_op_gpu.cu.cc | 2 +- .../resize_nearest_neighbor_op_test.cc | 0 .../{ => image}/resize_op_benchmark_test.cc | 0 .../sample_distorted_bounding_box_op.cc | 0 .../kernels/{ => image}/sampling_kernels.cc | 4 +- .../kernels/{ => image}/sampling_kernels.h | 0 .../{ => image}/sampling_kernels_test.cc | 2 +- .../{ => image}/scale_and_translate_op.cc | 5 +- .../{ => image}/scale_and_translate_op.h | 8 +- .../scale_and_translate_op_test.cc | 2 +- tensorflow/core/kernels/linalg/BUILD | 13 - tensorflow/core/kernels/mkl/BUILD | 30 +- .../kernels/quantized_resize_bilinear_op.cc | 2 +- tensorflow/core/util/BUILD | 15 +- .../{kernels => util}/image_resizer_state.h | 8 +- tensorflow/examples/label_image/BUILD | 2 +- 87 files changed, 709 insertions(+), 694 deletions(-) create mode 100644 tensorflow/core/kernels/image/BUILD rename tensorflow/core/kernels/{ => image}/adjust_contrast_op.cc (99%) rename tensorflow/core/kernels/{ => image}/adjust_contrast_op.h (97%) rename tensorflow/core/kernels/{ => image}/adjust_contrast_op_benchmark_test.cc (100%) rename tensorflow/core/kernels/{ => image}/adjust_contrast_op_gpu.cu.cc (96%) rename tensorflow/core/kernels/{ => image}/adjust_contrast_op_test.cc (100%) rename tensorflow/core/kernels/{ => image}/adjust_hsv_gpu.cu.h (96%) rename tensorflow/core/kernels/{ => image}/adjust_hue_op.cc (99%) rename tensorflow/core/kernels/{ => image}/adjust_hue_op.h (88%) rename tensorflow/core/kernels/{ => image}/adjust_hue_op_gpu.cu.cc (93%) rename tensorflow/core/kernels/{ => image}/adjust_saturation_op.cc (99%) rename tensorflow/core/kernels/{ => image}/adjust_saturation_op.h (87%) rename tensorflow/core/kernels/{ => image}/adjust_saturation_op_gpu.cu.cc (93%) rename tensorflow/core/kernels/{ => image}/attention_ops.cc (100%) rename tensorflow/core/kernels/{ => image}/colorspace_op.cc (99%) rename tensorflow/core/kernels/{ => image}/colorspace_op.h (95%) rename tensorflow/core/kernels/{ => image}/colorspace_op_gpu.cu.cc (95%) rename tensorflow/core/kernels/{ => image}/colorspace_op_test.cc (100%) rename tensorflow/core/kernels/{ => image}/crop_and_resize_op.cc (99%) rename tensorflow/core/kernels/{ => image}/crop_and_resize_op.h (93%) rename tensorflow/core/kernels/{ => image}/crop_and_resize_op_benchmark_test.cc (100%) rename tensorflow/core/kernels/{ => image}/crop_and_resize_op_gpu.cu.cc (99%) rename tensorflow/core/kernels/{ => image}/crop_and_resize_op_test.cc (100%) rename tensorflow/core/kernels/{ => image}/decode_image_op.cc (100%) rename tensorflow/core/kernels/{ => image}/draw_bounding_box_op.cc (100%) rename tensorflow/core/kernels/{ => image}/encode_jpeg_op.cc (100%) rename tensorflow/core/kernels/{ => image}/encode_jpeg_op_test.cc (100%) rename tensorflow/core/kernels/{ => image}/encode_png_op.cc (100%) rename tensorflow/core/kernels/{ => image}/extract_image_patches_op.cc (98%) rename tensorflow/core/kernels/{ => image}/extract_image_patches_op.h (91%) rename tensorflow/core/kernels/{ => image}/extract_image_patches_op_gpu.cu.cc (94%) rename tensorflow/core/kernels/{ => image}/extract_jpeg_shape_op.cc (100%) rename tensorflow/core/kernels/{ => image}/extract_volume_patches_op.cc (99%) rename tensorflow/core/kernels/{ => image}/extract_volume_patches_op.h (92%) rename tensorflow/core/kernels/{ => image}/extract_volume_patches_op_gpu.cu.cc (94%) rename tensorflow/core/kernels/{ => image}/generate_box_proposals_op.cu.cc (99%) rename tensorflow/core/kernels/{ => image}/image_ops.cc (99%) rename tensorflow/core/kernels/{ => image}/image_ops.h (98%) rename tensorflow/core/kernels/{ => image}/image_ops_gpu.cu.cc (96%) rename tensorflow/core/kernels/{ => image}/mirror_pad_op.cc (99%) rename tensorflow/core/kernels/{ => image}/mirror_pad_op.h (99%) rename tensorflow/core/kernels/{ => image}/mirror_pad_op_benchmark_test.cc (100%) rename tensorflow/core/kernels/{ => image}/mirror_pad_op_cpu_impl.h (83%) rename tensorflow/core/kernels/{ => image}/mirror_pad_op_cpu_impl_1.cc (91%) rename tensorflow/core/kernels/{ => image}/mirror_pad_op_cpu_impl_2.cc (91%) rename tensorflow/core/kernels/{ => image}/mirror_pad_op_cpu_impl_3.cc (91%) rename tensorflow/core/kernels/{ => image}/mirror_pad_op_cpu_impl_4.cc (91%) rename tensorflow/core/kernels/{ => image}/mirror_pad_op_cpu_impl_5.cc (91%) rename tensorflow/core/kernels/{ => image}/mirror_pad_op_gpu.cu.cc (97%) rename tensorflow/core/kernels/{ => image}/mirror_pad_op_test.cc (100%) rename tensorflow/core/kernels/{ => image}/non_max_suppression_op.cc (99%) rename tensorflow/core/kernels/{ => image}/non_max_suppression_op.cu.cc (99%) rename tensorflow/core/kernels/{ => image}/non_max_suppression_op.h (92%) rename tensorflow/core/kernels/{ => image}/non_max_suppression_op_benchmark_test.cc (100%) rename tensorflow/core/kernels/{ => image}/non_max_suppression_op_gpu_test.cc (100%) rename tensorflow/core/kernels/{ => image}/non_max_suppression_op_test.cc (100%) rename tensorflow/core/kernels/{ => image}/random_crop_op.cc (99%) rename tensorflow/core/kernels/{ => image}/resize_area_op.cc (99%) rename tensorflow/core/kernels/{ => image}/resize_area_op_test.cc (100%) rename tensorflow/core/kernels/{ => image}/resize_bicubic_op.cc (99%) rename tensorflow/core/kernels/{ => image}/resize_bicubic_op_test.cc (100%) rename tensorflow/core/kernels/{ => image}/resize_bilinear_op.cc (99%) rename tensorflow/core/kernels/{ => image}/resize_bilinear_op.h (90%) rename tensorflow/core/kernels/{ => image}/resize_bilinear_op_gpu.cu.cc (99%) rename tensorflow/core/kernels/{ => image}/resize_bilinear_op_test.cc (100%) rename tensorflow/core/kernels/{ => image}/resize_nearest_neighbor_op.cc (99%) rename tensorflow/core/kernels/{ => image}/resize_nearest_neighbor_op.h (88%) rename tensorflow/core/kernels/{ => image}/resize_nearest_neighbor_op_gpu.cu.cc (99%) rename tensorflow/core/kernels/{ => image}/resize_nearest_neighbor_op_test.cc (100%) rename tensorflow/core/kernels/{ => image}/resize_op_benchmark_test.cc (100%) rename tensorflow/core/kernels/{ => image}/sample_distorted_bounding_box_op.cc (100%) rename tensorflow/core/kernels/{ => image}/sampling_kernels.cc (96%) rename tensorflow/core/kernels/{ => image}/sampling_kernels.h (100%) rename tensorflow/core/kernels/{ => image}/sampling_kernels_test.cc (98%) rename tensorflow/core/kernels/{ => image}/scale_and_translate_op.cc (99%) rename tensorflow/core/kernels/{ => image}/scale_and_translate_op.h (92%) rename tensorflow/core/kernels/{ => image}/scale_and_translate_op_test.cc (99%) rename tensorflow/core/{kernels => util}/image_resizer_state.h (98%) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 41eba6b5e28..12e143e7933 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1010,9 +1010,7 @@ cc_library( "//tensorflow/core/kernels:functional_ops", "//tensorflow/core/kernels:grappler", "//tensorflow/core/kernels:histogram_op", - "//tensorflow/core/kernels:image", "//tensorflow/core/kernels:io", - "//tensorflow/core/kernels/linalg:linalg", "//tensorflow/core/kernels:lookup", "//tensorflow/core/kernels:logging", "//tensorflow/core/kernels:manip", @@ -1046,6 +1044,8 @@ cc_library( "//tensorflow/core/kernels:summary_kernels", "//tensorflow/core/kernels:training_ops", "//tensorflow/core/kernels:word2vec_kernels", + "//tensorflow/core/kernels/linalg:linalg", + "//tensorflow/core/kernels/image:image", "//tensorflow/core/kernels/sparse:kernels", ] + if_not_windows([ "//tensorflow/core/kernels/neon:neon_depthwise_conv_op", diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index bfb192023a1..14e8d691d98 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -321,7 +321,6 @@ tf_kernel_library( deps = [ ":eigen_helpers", ":fill_functor", - ":image_resizer_state", ":ops_util", "//third_party/eigen3", "//tensorflow/core:core_cpu", @@ -341,32 +340,6 @@ cc_library( ], ) -tf_kernel_library( - name = "extract_image_patches_op", - prefix = "extract_image_patches_op", - deps = [ - ":bounds_check", - ":eigen_helpers", - ":ops_util", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//third_party/eigen3", - ], -) - -tf_kernel_library( - name = "extract_volume_patches_op", - prefix = "extract_volume_patches_op", - deps = [ - ":bounds_check", - ":eigen_helpers", - ":ops_util", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//third_party/eigen3", - ], -) - cc_library( name = "conv_3d", hdrs = ["conv_3d.h"], @@ -652,7 +625,6 @@ cc_library( cc_library( name = "batch_kernels", srcs = ["batch_kernels.cc"], - hdrs = ["batch_matmul_op_impl.h"], deps = [ ":ops_util_hdrs", "//tensorflow/core:framework", @@ -935,43 +907,6 @@ cc_library( ], ) -cc_library( - name = "image_resizer_state", - hdrs = ["image_resizer_state.h"], - visibility = ["//tensorflow:__subpackages__"], - deps = [ - ":bounds_check", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//third_party/eigen3", - ], -) - -cc_header_only_library( - name = "image_resizer_state_lib", - deps = [":image_resizer_state"], -) - -cc_library( - name = "sampling_kernels", - srcs = ["sampling_kernels.cc"], - hdrs = ["sampling_kernels.h"], - visibility = ["//visibility:private"], - deps = ["//tensorflow/core:lib"], -) - -tf_cc_test( - name = "sampling_kernels_test", - srcs = ["sampling_kernels_test.cc"], - deps = [ - ":sampling_kernels", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - "@com_google_absl//absl/strings", - ], -) - # OpKernel libraries ---------------------------------------------------------- ARRAY_DEPS = [ @@ -1026,8 +961,6 @@ cc_library( ":depth_space_ops", ":diag_op", ":edit_distance_op", - ":extract_image_patches_op", - ":extract_volume_patches_op", ":fingerprint_op", ":gather_nd_op", ":gather_op", @@ -1038,7 +971,6 @@ cc_library( ":immutable_constant_op", ":inplace_ops", ":listdiff_op", - ":mirror_pad_op", ":one_hot_op", ":pack_op", ":pad_op", @@ -1170,12 +1102,6 @@ tf_kernel_library( deps = ARRAY_DEPS, ) -tf_kernel_library( - name = "mirror_pad_op", - prefix = "mirror_pad_op", - deps = ARRAY_DEPS, -) - tf_kernel_library( name = "one_hot_op", prefix = "one_hot_op", @@ -1730,7 +1656,6 @@ tf_cuda_cc_test( tags = ["no_cuda11"], # b/159664089 deps = [ ":conv_ops", - ":image", ":ops_testutil", ":ops_util", "//tensorflow/cc:cc_ops", @@ -1743,6 +1668,7 @@ tf_cuda_cc_test( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", + "//tensorflow/core/kernels/image", "@com_google_absl//absl/algorithm:container", ], ) @@ -1828,7 +1754,6 @@ tf_cuda_cc_test( tags = tf_cuda_tests_tags(), deps = [ ":conv_ops", - ":image", ":ops_testutil", ":ops_util", "//tensorflow/cc:cc_ops", @@ -1841,69 +1766,7 @@ tf_cuda_cc_test( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", - ], -) - -tf_cc_test( - name = "decode_wav_op_test", - size = "small", - srcs = ["decode_wav_op_test.cc"], - deps = [ - ":decode_wav_op", - ":ops_testutil", - ":ops_util", - "//tensorflow/cc:cc_ops", - "//tensorflow/cc:client_session", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:tensorflow", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - -tf_cc_test( - name = "encode_jpeg_op_test", - size = "small", - srcs = ["encode_jpeg_op_test.cc"], - deps = [ - ":encode_jpeg_op", - ":ops_testutil", - ":ops_util", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - -tf_cc_test( - name = "encode_wav_op_test", - size = "small", - srcs = ["encode_wav_op_test.cc"], - deps = [ - ":decode_wav_op", - ":encode_wav_op", - ":ops_testutil", - ":ops_util", - "//tensorflow/cc:cc_ops", - "//tensorflow/cc:client_session", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:tensorflow", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", + "//tensorflow/core/kernels/image", ], ) @@ -2999,205 +2862,6 @@ tf_kernel_library( ], ) -cc_library( - name = "image", - deps = [ - ":adjust_contrast_op", - ":adjust_hue_op", - ":adjust_saturation_op", - ":attention_ops", - ":colorspace_op", - ":crop_and_resize_op", - ":decode_image_op", - ":draw_bounding_box_op", - ":encode_jpeg_op", - ":encode_png_op", - ":extract_jpeg_shape_op", - ":generate_box_proposals_op", - ":image_ops", - ":non_max_suppression_op", - ":random_crop_op", - ":resize_area_op", - ":resize_bicubic_op", - ":resize_bilinear_op", - ":resize_nearest_neighbor_op", - ":sample_distorted_bounding_box_op", - ":scale_and_translate_op", - ], -) - -IMAGE_DEPS = [ - ":bounds_check", - ":eigen_helpers", - ":image_resizer_state", - "//third_party/eigen3", - "//tensorflow/core:framework", - "//tensorflow/core:gif_internal", - "//tensorflow/core:jpeg_internal", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core:png_internal", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core/util/tensor_bundle", -] - -tf_kernel_library( - name = "adjust_contrast_op", - prefix = "adjust_contrast_op", - deps = IMAGE_DEPS, -) - -cc_library( - name = "adjust_hsv_gpu_lib", - hdrs = ["adjust_hsv_gpu.cu.h"], - deps = ["//tensorflow/core:framework"], -) - -tf_kernel_library( - name = "adjust_hue_op", - prefix = "adjust_hue_op", - deps = IMAGE_DEPS + [":adjust_hsv_gpu_lib"], -) - -tf_kernel_library( - name = "adjust_saturation_op", - prefix = "adjust_saturation_op", - deps = IMAGE_DEPS + [":adjust_hsv_gpu_lib"], -) - -tf_kernel_library( - name = "attention_ops", - prefix = "attention_ops", - deps = IMAGE_DEPS, -) - -tf_kernel_library( - name = "colorspace_op", - prefix = "colorspace_op", - deps = IMAGE_DEPS, -) - -tf_kernel_library( - name = "crop_and_resize_op", - prefix = "crop_and_resize_op", - deps = IMAGE_DEPS + ["//tensorflow/core:framework_internal"], -) - -tf_kernel_library( - name = "decode_image_op", - prefix = "decode_image_op", - deps = IMAGE_DEPS + ["@com_google_absl//absl/strings"], -) - -tf_kernel_library( - name = "draw_bounding_box_op", - prefix = "draw_bounding_box_op", - deps = IMAGE_DEPS, -) - -tf_kernel_library( - name = "encode_jpeg_op", - prefix = "encode_jpeg_op", - deps = IMAGE_DEPS, -) - -tf_kernel_library( - name = "encode_png_op", - prefix = "encode_png_op", - deps = IMAGE_DEPS, -) - -tf_kernel_library( - name = "extract_jpeg_shape_op", - prefix = "extract_jpeg_shape_op", - deps = IMAGE_DEPS, -) - -tf_kernel_library( - name = "generate_box_proposals_op", - gpu_srcs = ["generate_box_proposals_op.cu.cc"], - deps = [":gpu_prim_hdrs"] + if_cuda([ - ":non_max_suppression_op_gpu", - ]), -) - -tf_kernel_library( - name = "non_max_suppression_op", - prefix = "non_max_suppression_op", - deps = IMAGE_DEPS + [":gpu_prim_hdrs"], -) - -tf_kernel_library( - name = "scale_and_translate_op", - prefix = "scale_and_translate_op", - deps = IMAGE_DEPS + [":sampling_kernels"], -) - -tf_kernel_library( - name = "random_crop_op", - prefix = "random_crop_op", - deps = IMAGE_DEPS, -) - -tf_kernel_library( - name = "resize_area_op", - prefix = "resize_area_op", - deps = IMAGE_DEPS, -) - -tf_kernel_library( - name = "resize_bicubic_op", - prefix = "resize_bicubic_op", - deps = IMAGE_DEPS, -) - -tf_kernel_library( - name = "resize_bilinear_op", - prefix = "resize_bilinear_op", - deps = IMAGE_DEPS, -) - -tf_kernel_library( - name = "resize_nearest_neighbor_op", - prefix = "resize_nearest_neighbor_op", - deps = IMAGE_DEPS, -) - -tf_kernel_library( - name = "sample_distorted_bounding_box_op", - prefix = "sample_distorted_bounding_box_op", - deps = IMAGE_DEPS + [":stateless_random_ops"], -) - -tf_kernel_library( - name = "image_ops", - prefix = "image_ops", - deps = IMAGE_DEPS, -) - -tf_kernel_library( - name = "encode_wav_op", - prefix = "encode_wav_op", - deps = [ - ":bounds_check", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core:protos_all_cc", - ], -) - -tf_kernel_library( - name = "decode_wav_op", - prefix = "decode_wav_op", - deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core:protos_all_cc", - ], -) - tf_cc_tests( name = "eigen_test", size = "small", @@ -3282,158 +2946,6 @@ tf_cc_tests( ], ) -tf_cc_tests( - name = "bonus_tests", - srcs = [ - "adjust_contrast_op_test.cc", - "colorspace_op_test.cc", - "crop_and_resize_op_test.cc", - "mirror_pad_op_test.cc", - "non_max_suppression_op_test.cc", - "resize_area_op_test.cc", - "resize_bicubic_op_test.cc", - "resize_nearest_neighbor_op_test.cc", - "scale_and_translate_op_test.cc", - ], - linkopts = select({ - "//tensorflow:macos": ["-headerpad_max_install_names"], - "//conditions:default": [], - }), - deps = [ - ":image", - ":mirror_pad_op", - ":ops_testutil", - ":ops_util", - ":sampling_kernels", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - -tf_cc_test( - name = "non_max_suppression_op_benchmark_test", - srcs = ["non_max_suppression_op_benchmark_test.cc"], - deps = [ - ":image", - ":ops_testutil", - ":ops_util", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - -tf_cuda_cc_test( - name = "resize_bilinear_op_test", - srcs = ["resize_bilinear_op_test.cc"], - tags = ["no_cuda_on_cpu_tap"], - deps = [ - ":image", - ":ops_testutil", - ":ops_util", - ":sampling_kernels", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - -tf_cuda_cc_test( - name = "adjust_contrast_op_benchmark_test", - srcs = ["adjust_contrast_op_benchmark_test.cc"], - deps = [ - ":image", - ":ops_testutil", - ":ops_util", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - -tf_cuda_cc_test( - name = "crop_and_resize_op_benchmark_test", - srcs = ["crop_and_resize_op_benchmark_test.cc"], - deps = [ - ":image", - ":ops_testutil", - ":ops_util", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - -tf_cuda_cc_test( - name = "mirror_pad_op_benchmark_test", - srcs = ["mirror_pad_op_benchmark_test.cc"], - deps = [ - ":mirror_pad_op", - ":ops_testutil", - ":ops_util", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - -tf_cuda_cc_test( - name = "non_max_suppression_op_gpu_test", - srcs = ["non_max_suppression_op_gpu_test.cc"], - tags = tf_cuda_tests_tags() + ["no_cuda_on_cpu_tap"], - deps = [ - ":image", - ":ops_testutil", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - "@com_google_absl//absl/strings", - ], -) - -tf_cuda_cc_test( - name = "resize_benchmark_test", - srcs = ["resize_op_benchmark_test.cc"], - deps = [ - ":image", - ":ops_testutil", - ":ops_util", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - ], -) - cc_library( name = "io", deps = [ @@ -4312,7 +3824,6 @@ tf_kernel_library( ":conv_2d", ":conv_3d", ":eigen_contraction_kernel", - ":image_resizer_state", ":fill_functor", ":fused_eigen_output_kernels", ":ops_util", @@ -4323,6 +3834,7 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core/util:image_resizer_state", "//tensorflow/core/util/proto:proto_utils", "//tensorflow/stream_executor/gpu:gpu_asm_opts", ] + select({ @@ -5894,6 +5406,74 @@ tf_kernel_library( ], ) +tf_kernel_library( + name = "encode_wav_op", + prefix = "encode_wav_op", + deps = [ + ":bounds_check", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + ], +) + +tf_cc_test( + name = "encode_wav_op_test", + size = "small", + srcs = ["encode_wav_op_test.cc"], + deps = [ + ":decode_wav_op", + ":encode_wav_op", + ":ops_testutil", + ":ops_util", + "//tensorflow/cc:cc_ops", + "//tensorflow/cc:client_session", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:tensorflow", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + +tf_kernel_library( + name = "decode_wav_op", + prefix = "decode_wav_op", + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + ], +) + +tf_cc_test( + name = "decode_wav_op_test", + size = "small", + srcs = ["decode_wav_op_test.cc"], + deps = [ + ":decode_wav_op", + ":ops_testutil", + ":ops_util", + "//tensorflow/cc:cc_ops", + "//tensorflow/cc:client_session", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:tensorflow", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + filegroup( name = "spectrogram_test_data", srcs = [ @@ -6247,8 +5827,6 @@ filegroup( "matmul_op.h", "no_op.cc", "no_op.h", - "non_max_suppression_op.cc", - "non_max_suppression_op.h", "one_hot_op.cc", "one_hot_op.h", "ops_util.h", @@ -6296,7 +5874,10 @@ filegroup( "unpack_op.cc", "variable_ops.cc", "variable_ops.h", + ] + [ "//tensorflow/c/kernels:android_all_op_kernels", + "//tensorflow/core/kernels/image:non_max_suppression_op.cc", + "//tensorflow/core/kernels/image:non_max_suppression_op.h", ], ) @@ -6320,9 +5901,6 @@ filegroup( filegroup( name = "android_extended_ops_headers", srcs = [ - "adjust_contrast_op.h", - "adjust_hue_op.h", - "adjust_saturation_op.h", "argmax_op.h", "avgpooling_op.h", "batch_matmul_op_impl.h", @@ -6340,12 +5918,9 @@ filegroup( "depthwise_conv_op.h", "diag_op.h", "dilation_ops.h", - "extract_image_patches_op.h", "fake_quant_ops_functor.h", "fused_batch_norm_op.h", "gemm_functors.h", - "image_ops.h", - "image_resizer_state.h", "initializable_lookup_table.h", "inplace_ops.cc", "inplace_ops_functor.h", @@ -6357,8 +5932,6 @@ filegroup( "mfcc.h", "mfcc_dct.h", "mfcc_mel_filterbank.h", - "mirror_pad_op.h", - "mirror_pad_op_cpu_impl.h", "multinomial_op.h", "pad_op.h", "pooling_ops_3d.h", @@ -6369,8 +5942,6 @@ filegroup( "relu_op.h", "relu_op_functor.h", "reshape_util.h", - "resize_bilinear_op.h", - "resize_nearest_neighbor_op.h", "reverse_op.h", "save_restore_tensor.h", "scan_ops.h", @@ -6400,15 +5971,26 @@ filegroup( "xent_op.h", ] + [ "//tensorflow/core/kernels/boosted_trees/quantiles:weighted_quantiles_hdrs", + "//tensorflow/core/kernels/image:adjust_contrast_op.h", + "//tensorflow/core/kernels/image:adjust_hue_op.h", + "//tensorflow/core/kernels/image:adjust_saturation_op.h", + "//tensorflow/core/kernels/image:extract_image_patches_op.h", + "//tensorflow/core/kernels/image:image_ops.h", + "//tensorflow/core/kernels/image:mirror_pad_op.h", + "//tensorflow/core/kernels/image:mirror_pad_op_cpu_impl.h", + "//tensorflow/core/kernels/image:resize_bilinear_op.h", + "//tensorflow/core/kernels/image:resize_nearest_neighbor_op.h", "//tensorflow/core/kernels/linalg:linalg_ops_common.h", "//tensorflow/core/kernels/linalg:matrix_diag_op.h", "//tensorflow/core/kernels/linalg:matrix_set_diag_op.h", + "//tensorflow/core/util:image_resizer_state.h", ], ) filegroup( name = "android_extended_ops_group1", srcs = [ + ":android_extended_ops_headers", "argmax_op.cc", "avgpooling_op.cc", "batch_matmul_op_real.cc", @@ -6421,18 +6003,16 @@ filegroup( "conv_grad_input_ops.cc", "conv_grad_ops.h", "conv_grad_ops_3d.cc", - "conv_grad_shape_utils.h", "conv_grad_shape_utils.cc", + "conv_grad_shape_utils.h", "conv_ops.cc", "conv_ops_3d.cc", "conv_ops_fused_double.cc", "conv_ops_fused_float.cc", "conv_ops_fused_half.cc", - "conv_ops_fused_impl.h", "conv_ops_fused_image_transform.cc", + "conv_ops_fused_impl.h", "conv_ops_using_gemm.cc", - "crop_and_resize_op.cc", - "crop_and_resize_op.h", "cwise_op_abs.cc", "cwise_op_add_1.cc", "cwise_op_add_2.cc", @@ -6448,8 +6028,6 @@ filegroup( "cwise_op_div.cc", "cwise_op_equal_to_1.cc", "cwise_op_equal_to_2.cc", - "cwise_op_not_equal_to_1.cc", - "cwise_op_not_equal_to_2.cc", "cwise_op_erf.cc", "cwise_op_exp.cc", "cwise_op_floor.cc", @@ -6474,6 +6052,8 @@ filegroup( "cwise_op_mul_2.cc", "cwise_op_neg_1.cc", "cwise_op_neg_2.cc", + "cwise_op_not_equal_to_1.cc", + "cwise_op_not_equal_to_2.cc", "cwise_op_pow.cc", "cwise_op_real.cc", "cwise_op_reciprocal.cc", @@ -6491,9 +6071,9 @@ filegroup( "cwise_op_sub.cc", "cwise_op_tan.cc", "cwise_op_tanh.cc", - "cwise_op_xlogy.cc", - "cwise_op_xlog1py.cc", "cwise_op_xdivy.cc", + "cwise_op_xlog1py.cc", + "cwise_op_xlogy.cc", "data_format_ops.cc", "decode_raw_op.cc", "decode_wav_op.cc", @@ -6501,9 +6081,9 @@ filegroup( "deep_conv2d.h", "depthwise_conv_op.cc", "dynamic_partition_op.cc", - "encode_wav_op.cc", "eigen_contraction_kernel.cc", "eigen_contraction_kernel.h", + "encode_wav_op.cc", "fake_quant_ops.cc", "fifo_queue.cc", "fifo_queue_op.cc", @@ -6514,8 +6094,9 @@ filegroup( "population_count_op.cc", "population_count_op.h", "winograd_transform.h", - ":android_extended_ops_headers", ] + [ + "//tensorflow/core/kernels/image:crop_and_resize_op.cc", + "//tensorflow/core/kernels/image:crop_and_resize_op.h", "//tensorflow/core/kernels/linalg:einsum_op_impl_half.cc", "//tensorflow/core/kernels/linalg:einsum_op_impl_bfloat16.cc", "//tensorflow/core/kernels/linalg:einsum_op_impl_int32.cc", @@ -6538,9 +6119,7 @@ filegroup( filegroup( name = "android_extended_ops_group2", srcs = [ - "adjust_contrast_op.cc", - "adjust_hue_op.cc", - "adjust_saturation_op.cc", + ":android_extended_ops_headers", "base64_ops.cc", "batchtospace_op.cc", "broadcast_to_op.cc", @@ -6550,9 +6129,7 @@ filegroup( "diag_op.cc", "dilation_ops.cc", "dynamic_stitch_op.cc", - "extract_image_patches_op.cc", "fft_ops.cc", - "image_ops.cc", "in_topk_op.cc", "in_topk_op.h", "initializable_lookup_table.cc", @@ -6568,12 +6145,6 @@ filegroup( "mfcc_dct.cc", "mfcc_mel_filterbank.cc", "mfcc_op.cc", - "mirror_pad_op.cc", - "mirror_pad_op_cpu_impl_1.cc", - "mirror_pad_op_cpu_impl_2.cc", - "mirror_pad_op_cpu_impl_3.cc", - "mirror_pad_op_cpu_impl_4.cc", - "mirror_pad_op_cpu_impl_5.cc", "multinomial_op.cc", "pad_op.cc", "padding_fifo_queue.cc", @@ -6600,11 +6171,8 @@ filegroup( "regex_replace_op.cc", "relu_op.cc", "reshape_util.cc", - "resize_bilinear_op.cc", - "resize_nearest_neighbor_op.cc", "restore_op.cc", "reverse_op.cc", - "sample_distorted_bounding_box_op.cc", "save_op.cc", "save_restore_tensor.cc", "save_restore_v2_ops.cc", @@ -6682,9 +6250,22 @@ filegroup( "unique_op.cc", "where_op.cc", "xent_op.cc", - ":android_extended_ops_headers", ] + [ "//tensorflow/core/kernels/boosted_trees:quantile_ops.cc", + "//tensorflow/core/kernels/image:adjust_contrast_op.cc", + "//tensorflow/core/kernels/image:adjust_hue_op.cc", + "//tensorflow/core/kernels/image:adjust_saturation_op.cc", + "//tensorflow/core/kernels/image:extract_image_patches_op.cc", + "//tensorflow/core/kernels/image:image_ops.cc", + "//tensorflow/core/kernels/image:mirror_pad_op.cc", + "//tensorflow/core/kernels/image:mirror_pad_op_cpu_impl_1.cc", + "//tensorflow/core/kernels/image:mirror_pad_op_cpu_impl_2.cc", + "//tensorflow/core/kernels/image:mirror_pad_op_cpu_impl_3.cc", + "//tensorflow/core/kernels/image:mirror_pad_op_cpu_impl_4.cc", + "//tensorflow/core/kernels/image:mirror_pad_op_cpu_impl_5.cc", + "//tensorflow/core/kernels/image:resize_bilinear_op.cc", + "//tensorflow/core/kernels/image:resize_nearest_neighbor_op.cc", + "//tensorflow/core/kernels/image:sample_distorted_bounding_box_op.cc", "//tensorflow/core/kernels/linalg:linalg_ops_common.cc", "//tensorflow/core/kernels/linalg:matrix_diag_op.cc", "//tensorflow/core/kernels/linalg:matrix_inverse_op.cc", @@ -6727,7 +6308,6 @@ ANDROID_TEXTUAL_HDRS = [ "eigen_spatial_convolutions-inl.h", "gather_nd_op_cpu_impl.h", "gemm_functors.h", - "mirror_pad_op_cpu_impl.h", "scatter_nd_op_cpu_impl.h", "slice_op_cpu_impl.h", "strided_slice_op_impl.h", @@ -6742,6 +6322,7 @@ filegroup( srcs = [ "//tensorflow/c/kernels:android_all_op_kernels", "//tensorflow/core/kernels/data:android_all_op_kernels", + "//tensorflow/core/kernels/image:android_all_op_kernels", "//tensorflow/core/kernels/linalg:android_all_op_kernels", ] + glob( [ @@ -6773,13 +6354,6 @@ filegroup( "sparse_cross_op.*", "text_line_reader_op.*", "summary_image_op.*", - "decode_image_op.*", - "encode_png_op.*", - "encode_jpeg_op.*", - "extract_jpeg_shape_op.*", - "decode_jpeg_op.*", - "decode_and_crop_jpeg_op.*", - "decode_gif_op.*", "identity_reader_op.*", "remote_fused_graph_execute_op.*", "remote_fused_graph_rewriter_transform.*", @@ -6824,7 +6398,10 @@ filegroup( filegroup( name = "android_all_ops_textual_hdrs", - srcs = ANDROID_TEXTUAL_HDRS, + srcs = ANDROID_TEXTUAL_HDRS + [ + "//tensorflow/core/kernels/image:android_all_ops_textual_hdrs", + "//tensorflow/core/util:image_resizer_state.h", + ], visibility = ["//visibility:public"], ) # LINT.ThenChange(//tensorflow/contrib/makefile/tf_op_files.txt) @@ -6868,26 +6445,6 @@ build_test( targets = [":portable_tensorflow_kernels"], ) -cc_library( - name = "android_tensorflow_image_op", - srcs = if_android(["decode_image_op.cc"]), - copts = tf_copts(), - linkopts = ["-ldl"], - visibility = ["//visibility:public"], - deps = [ - "//tensorflow/core:android_gif_internal", - "//tensorflow/core:android_jpeg_internal", - "//tensorflow/core:android_png_internal", - "//tensorflow/core:portable_tensorflow_lib_lite", - ], - alwayslink = 1, -) - -build_test( - name = "android_tensorflow_image_op_build_test", - targets = [":android_tensorflow_image_op"], -) - cc_library( name = "android_whole_file_read_ops", srcs = if_android(["whole_file_read_ops.cc"]), @@ -6930,7 +6487,6 @@ tf_kernel_library( ":conv_ops", ":cwise_op", ":eigen_helpers", - ":image_resizer_state", ":meta_support", ":ops_util", ":pooling_ops", @@ -6938,6 +6494,7 @@ tf_kernel_library( "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core/util:image_resizer_state", "//third_party/eigen3", "@gemmlowp", ], @@ -7831,7 +7388,6 @@ test_suite( ":cast_op_test", ":concat_op_test", ":control_flow_ops_test", - ":crop_and_resize_op_test", ":cwise_ops_test", ":deep_conv2d_test", ":dequantize_op_test", @@ -7844,7 +7400,6 @@ test_suite( ":mfcc_test", ":multinomial_op_test", ":nn_ops_test", - ":non_max_suppression_op_test", ":quantization_utils_test", ":quantize_and_dequantize_op_test", ":quantize_op_test", @@ -7858,7 +7413,6 @@ test_suite( ":random_poisson_op_test", ":reduction_ops_test", ":requantization_range_op_test", - ":resize_bilinear_op_test", ":scatter_op_test", ":segment_reduction_ops_test", ":slice_op_test", @@ -7868,6 +7422,9 @@ test_suite( ":strided_slice_op_test", ":unique_op_test", ":variable_ops_test", + "//tensorflow/core/kernels/image:crop_and_resize_op_test", + "//tensorflow/core/kernels/image:non_max_suppression_op_test", + "//tensorflow/core/kernels/image:resize_bilinear_op_test", ], ) diff --git a/tensorflow/core/kernels/conv_ops_fused_image_transform.cc b/tensorflow/core/kernels/conv_ops_fused_image_transform.cc index 9055639aaaf..091e483b2ca 100644 --- a/tensorflow/core/kernels/conv_ops_fused_image_transform.cc +++ b/tensorflow/core/kernels/conv_ops_fused_image_transform.cc @@ -34,9 +34,9 @@ limitations under the License. #include "tensorflow/core/kernels/conv_2d.h" #include "tensorflow/core/kernels/conv_ops.h" #include "tensorflow/core/kernels/gemm_functors.h" -#include "tensorflow/core/kernels/image_resizer_state.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/util/image_resizer_state.h" #include "tensorflow/core/util/mirror_pad_mode.h" #include "tensorflow/core/util/padding.h" #include "tensorflow/core/util/tensor_format.h" diff --git a/tensorflow/core/kernels/conv_ops_using_gemm.cc b/tensorflow/core/kernels/conv_ops_using_gemm.cc index dff1a533ee0..71eda28899e 100644 --- a/tensorflow/core/kernels/conv_ops_using_gemm.cc +++ b/tensorflow/core/kernels/conv_ops_using_gemm.cc @@ -62,7 +62,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_slice.h" #include "tensorflow/core/kernels/conv_ops.h" #include "tensorflow/core/kernels/gemm_functors.h" -#include "tensorflow/core/kernels/image_resizer_state.h" +#include "tensorflow/core/util/image_resizer_state.h" #include "tensorflow/core/util/mirror_pad_mode.h" #include "tensorflow/core/util/padding.h" #include "tensorflow/core/util/tensor_format.h" diff --git a/tensorflow/core/kernels/image/BUILD b/tensorflow/core/kernels/image/BUILD new file mode 100644 index 00000000000..f7ad9ab0371 --- /dev/null +++ b/tensorflow/core/kernels/image/BUILD @@ -0,0 +1,449 @@ +load("@bazel_skylib//rules:build_test.bzl", "build_test") +load( + "//tensorflow:tensorflow.bzl", + "if_android", + "tf_cc_test", + "tf_cc_tests", + "tf_copts", + "tf_kernel_library", +) +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") +load( + "//tensorflow/core/platform:build_config_root.bzl", + "tf_cuda_tests_tags", +) +load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test") + +# TODO(rmlarsen): Remove ASAP. +package_group( + name = "friends", + packages = [ + "//tensorflow/...", + "//tensorflow_text/...", + ], +) + +package( + default_visibility = [ + ":friends", + "//tensorflow:__subpackages__", + "//tensorflow:internal", + ], + licenses = ["notice"], # Apache 2.0 +) + +# Export a few files for use on Android. +exports_files([ + "adjust_contrast_op.cc", + "adjust_contrast_op.h", + "adjust_hue_op.cc", + "adjust_hue_op.h", + "adjust_saturation_op.cc", + "adjust_saturation_op.h", + "crop_and_resize_op.cc", + "crop_and_resize_op.h", + "extract_image_patches_op.cc", + "extract_image_patches_op.h", + "image_ops.h", + "image_ops.cc", + "mirror_pad_op.cc", + "mirror_pad_op.h", + "mirror_pad_op_cpu_impl.h", + "mirror_pad_op_cpu_impl_1.cc", + "mirror_pad_op_cpu_impl_2.cc", + "mirror_pad_op_cpu_impl_3.cc", + "mirror_pad_op_cpu_impl_4.cc", + "mirror_pad_op_cpu_impl_5.cc", + "non_max_suppression_op.cc", + "non_max_suppression_op.h", + "resize_bilinear_op.cc", + "resize_bilinear_op.h", + "resize_nearest_neighbor_op.cc", + "resize_nearest_neighbor_op.h", + "sample_distorted_bounding_box_op.cc", +]) + +# Private support libraries --------------------------------------------------- +cc_library( + name = "sampling_kernels", + srcs = ["sampling_kernels.cc"], + hdrs = ["sampling_kernels.h"], + visibility = ["//visibility:private"], + deps = ["//tensorflow/core:lib"], +) + +tf_cc_test( + name = "sampling_kernels_test", + srcs = ["sampling_kernels_test.cc"], + deps = [ + ":sampling_kernels", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "@com_google_absl//absl/strings", + ], +) + +# Public support libraries ----------------------------------------------------< +cc_library( + name = "image", + deps = [ + ":adjust_contrast_op", + ":adjust_hue_op", + ":adjust_saturation_op", + ":attention_ops", + ":colorspace_op", + ":crop_and_resize_op", + ":decode_image_op", + ":draw_bounding_box_op", + ":encode_jpeg_op", + ":encode_png_op", + ":extract_image_patches_op", + ":extract_jpeg_shape_op", + ":extract_volume_patches_op", + ":generate_box_proposals_op", + ":image_ops", + ":mirror_pad_op", + ":non_max_suppression_op", + ":random_crop_op", + ":resize_area_op", + ":resize_bicubic_op", + ":resize_bilinear_op", + ":resize_nearest_neighbor_op", + ":sample_distorted_bounding_box_op", + ":scale_and_translate_op", + ], +) + +IMAGE_DEPS = [ + "//third_party/eigen3", + "//tensorflow/core:framework", + "//tensorflow/core:gif_internal", + "//tensorflow/core:jpeg_internal", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:png_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/kernels:bounds_check", + "//tensorflow/core/kernels:eigen_helpers", + "//tensorflow/core/util/tensor_bundle", + "//tensorflow/core/util:image_resizer_state", +] + +IMAGE_TEST_DEPS = [ + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", +] + +tf_kernel_library( + name = "adjust_contrast_op", + prefix = "adjust_contrast_op", + deps = IMAGE_DEPS, +) + +cc_library( + name = "adjust_hsv_gpu_lib", + hdrs = ["adjust_hsv_gpu.cu.h"], + deps = ["//tensorflow/core:framework"], +) + +tf_kernel_library( + name = "adjust_hue_op", + prefix = "adjust_hue_op", + deps = IMAGE_DEPS + [":adjust_hsv_gpu_lib"], +) + +tf_kernel_library( + name = "adjust_saturation_op", + prefix = "adjust_saturation_op", + deps = IMAGE_DEPS + [":adjust_hsv_gpu_lib"], +) + +tf_kernel_library( + name = "attention_ops", + prefix = "attention_ops", + deps = IMAGE_DEPS, +) + +tf_kernel_library( + name = "colorspace_op", + prefix = "colorspace_op", + deps = IMAGE_DEPS, +) + +tf_kernel_library( + name = "crop_and_resize_op", + prefix = "crop_and_resize_op", + deps = IMAGE_DEPS + ["//tensorflow/core:framework_internal"], +) + +tf_kernel_library( + name = "decode_image_op", + prefix = "decode_image_op", + deps = IMAGE_DEPS + ["@com_google_absl//absl/strings"], +) + +tf_kernel_library( + name = "draw_bounding_box_op", + prefix = "draw_bounding_box_op", + deps = IMAGE_DEPS, +) + +tf_kernel_library( + name = "encode_jpeg_op", + prefix = "encode_jpeg_op", + deps = IMAGE_DEPS, +) + +tf_kernel_library( + name = "encode_png_op", + prefix = "encode_png_op", + deps = IMAGE_DEPS, +) + +tf_kernel_library( + name = "extract_jpeg_shape_op", + prefix = "extract_jpeg_shape_op", + deps = IMAGE_DEPS, +) + +tf_kernel_library( + name = "extract_image_patches_op", + prefix = "extract_image_patches_op", + deps = [ + "//tensorflow/core/kernels:ops_util", + ] + IMAGE_DEPS, +) + +tf_kernel_library( + name = "extract_volume_patches_op", + prefix = "extract_volume_patches_op", + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/kernels:eigen_helpers", + "//tensorflow/core/kernels:ops_util", + "//third_party/eigen3", + ], +) + +tf_kernel_library( + name = "generate_box_proposals_op", + gpu_srcs = ["generate_box_proposals_op.cu.cc"], + deps = ["//tensorflow/core/kernels:gpu_prim_hdrs"] + if_cuda([ + ":non_max_suppression_op_gpu", + ]), +) + +tf_kernel_library( + name = "non_max_suppression_op", + prefix = "non_max_suppression_op", + deps = IMAGE_DEPS + ["//tensorflow/core/kernels:gpu_prim_hdrs"], +) + +tf_kernel_library( + name = "scale_and_translate_op", + prefix = "scale_and_translate_op", + deps = IMAGE_DEPS + [":sampling_kernels"], +) + +tf_kernel_library( + name = "random_crop_op", + prefix = "random_crop_op", + deps = IMAGE_DEPS, +) + +tf_kernel_library( + name = "resize_area_op", + prefix = "resize_area_op", + deps = IMAGE_DEPS, +) + +tf_kernel_library( + name = "resize_bicubic_op", + prefix = "resize_bicubic_op", + deps = IMAGE_DEPS, +) + +tf_kernel_library( + name = "resize_bilinear_op", + prefix = "resize_bilinear_op", + deps = IMAGE_DEPS, +) + +tf_kernel_library( + name = "resize_nearest_neighbor_op", + prefix = "resize_nearest_neighbor_op", + deps = IMAGE_DEPS, +) + +tf_kernel_library( + name = "sample_distorted_bounding_box_op", + prefix = "sample_distorted_bounding_box_op", + deps = IMAGE_DEPS + ["//tensorflow/core/kernels:stateless_random_ops"], +) + +tf_kernel_library( + name = "image_ops", + prefix = "image_ops", + deps = IMAGE_DEPS, +) + +tf_kernel_library( + name = "mirror_pad_op", + prefix = "mirror_pad_op", + deps = IMAGE_DEPS, +) + +# Tests ------------------------ + +tf_cc_tests( + name = "bonus_tests", + srcs = [ + "adjust_contrast_op_test.cc", + "colorspace_op_test.cc", + "crop_and_resize_op_test.cc", + "mirror_pad_op_test.cc", + "non_max_suppression_op_test.cc", + "resize_area_op_test.cc", + "resize_bicubic_op_test.cc", + "resize_nearest_neighbor_op_test.cc", + "scale_and_translate_op_test.cc", + ], + linkopts = select({ + "//tensorflow:macos": ["-headerpad_max_install_names"], + "//conditions:default": [], + }), + deps = [ + ":image", + ":sampling_kernels", + ":mirror_pad_op", + ] + IMAGE_TEST_DEPS, +) + +tf_cc_test( + name = "non_max_suppression_op_benchmark_test", + srcs = ["non_max_suppression_op_benchmark_test.cc"], + deps = [ + ":image", + ] + IMAGE_TEST_DEPS, +) + +tf_cuda_cc_test( + name = "resize_bilinear_op_test", + srcs = ["resize_bilinear_op_test.cc"], + tags = ["no_cuda_on_cpu_tap"], + deps = [ + ":image", + ":sampling_kernels", + ] + IMAGE_TEST_DEPS, +) + +tf_cuda_cc_test( + name = "adjust_contrast_op_benchmark_test", + srcs = ["adjust_contrast_op_benchmark_test.cc"], + deps = [ + ":image", + ] + IMAGE_TEST_DEPS, +) + +tf_cuda_cc_test( + name = "crop_and_resize_op_benchmark_test", + srcs = ["crop_and_resize_op_benchmark_test.cc"], + deps = [ + ":image", + ] + IMAGE_TEST_DEPS, +) + +tf_cuda_cc_test( + name = "mirror_pad_op_benchmark_test", + srcs = ["mirror_pad_op_benchmark_test.cc"], + deps = [ + ":mirror_pad_op", + ] + IMAGE_TEST_DEPS, +) + +tf_cuda_cc_test( + name = "non_max_suppression_op_gpu_test", + srcs = ["non_max_suppression_op_gpu_test.cc"], + tags = tf_cuda_tests_tags() + ["no_cuda_on_cpu_tap"], + deps = [ + ":image", + "@com_google_absl//absl/strings", + ] + IMAGE_TEST_DEPS, +) + +tf_cuda_cc_test( + name = "resize_benchmark_test", + srcs = ["resize_op_benchmark_test.cc"], + deps = [ + ":image", + ] + IMAGE_TEST_DEPS, +) + +tf_cc_test( + name = "encode_jpeg_op_test", + size = "small", + srcs = ["encode_jpeg_op_test.cc"], + deps = [ + ":encode_jpeg_op", + ] + IMAGE_TEST_DEPS, +) + +cc_library( + name = "android_tensorflow_image_op", + srcs = if_android(["decode_image_op.cc"]), + copts = tf_copts(), + linkopts = ["-ldl"], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/core:android_gif_internal", + "//tensorflow/core:android_jpeg_internal", + "//tensorflow/core:android_png_internal", + "//tensorflow/core:portable_tensorflow_lib_lite", + ], + alwayslink = 1, +) + +build_test( + name = "android_tensorflow_image_op_build_test", + targets = [":android_tensorflow_image_op"], +) + +# A file group which contains all operators which are known to work on mobile. +filegroup( + name = "android_all_op_kernels", + srcs = glob( + [ + "*.cc", + "*.h", + ], + exclude = [ + "*test.cc", + "*test.h", + "*_test_*", + "decode_image_op.*", + "encode_png_op.*", + "encode_jpeg_op.*", + "extract_jpeg_shape_op.*", + "decode_jpeg_op.*", + "decode_and_crop_jpeg_op.*", + "decode_gif_op.*", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) + +filegroup( + name = "android_all_ops_textual_hdrs", + srcs = ["mirror_pad_op.h"], + visibility = ["//visibility:public"], +) diff --git a/tensorflow/core/kernels/adjust_contrast_op.cc b/tensorflow/core/kernels/image/adjust_contrast_op.cc similarity index 99% rename from tensorflow/core/kernels/adjust_contrast_op.cc rename to tensorflow/core/kernels/image/adjust_contrast_op.cc index c13619e0e5f..6853465d9db 100644 --- a/tensorflow/core/kernels/adjust_contrast_op.cc +++ b/tensorflow/core/kernels/image/adjust_contrast_op.cc @@ -16,8 +16,10 @@ limitations under the License. // See docs in ../ops/image_ops.cc #define EIGEN_USE_THREADS -#include "tensorflow/core/kernels/adjust_contrast_op.h" +#include "tensorflow/core/kernels/image/adjust_contrast_op.h" + #include + #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" diff --git a/tensorflow/core/kernels/adjust_contrast_op.h b/tensorflow/core/kernels/image/adjust_contrast_op.h similarity index 97% rename from tensorflow/core/kernels/adjust_contrast_op.h rename to tensorflow/core/kernels/image/adjust_contrast_op.h index 3e501bccee3..4bff5f73a63 100644 --- a/tensorflow/core/kernels/adjust_contrast_op.h +++ b/tensorflow/core/kernels/image/adjust_contrast_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_ADJUST_CONTRAST_OP_H_ -#define TENSORFLOW_CORE_KERNELS_ADJUST_CONTRAST_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_IMAGEADJUST_CONTRAST_OP_H_ +#define TENSORFLOW_CORE_KERNELS_IMAGEADJUST_CONTRAST_OP_H_ #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor_types.h" @@ -157,4 +157,4 @@ struct AdjustContrastv2 { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_ADJUST_CONTRAST_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_IMAGEADJUST_CONTRAST_OP_H_ diff --git a/tensorflow/core/kernels/adjust_contrast_op_benchmark_test.cc b/tensorflow/core/kernels/image/adjust_contrast_op_benchmark_test.cc similarity index 100% rename from tensorflow/core/kernels/adjust_contrast_op_benchmark_test.cc rename to tensorflow/core/kernels/image/adjust_contrast_op_benchmark_test.cc diff --git a/tensorflow/core/kernels/adjust_contrast_op_gpu.cu.cc b/tensorflow/core/kernels/image/adjust_contrast_op_gpu.cu.cc similarity index 96% rename from tensorflow/core/kernels/adjust_contrast_op_gpu.cu.cc rename to tensorflow/core/kernels/image/adjust_contrast_op_gpu.cu.cc index e072dc46f5f..147700c1574 100644 --- a/tensorflow/core/kernels/adjust_contrast_op_gpu.cu.cc +++ b/tensorflow/core/kernels/image/adjust_contrast_op_gpu.cu.cc @@ -18,9 +18,8 @@ limitations under the License. #define EIGEN_USE_GPU -#include "tensorflow/core/kernels/adjust_contrast_op.h" - #include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/kernels/image/adjust_contrast_op.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/adjust_contrast_op_test.cc b/tensorflow/core/kernels/image/adjust_contrast_op_test.cc similarity index 100% rename from tensorflow/core/kernels/adjust_contrast_op_test.cc rename to tensorflow/core/kernels/image/adjust_contrast_op_test.cc diff --git a/tensorflow/core/kernels/adjust_hsv_gpu.cu.h b/tensorflow/core/kernels/image/adjust_hsv_gpu.cu.h similarity index 96% rename from tensorflow/core/kernels/adjust_hsv_gpu.cu.h rename to tensorflow/core/kernels/image/adjust_hsv_gpu.cu.h index ba4427ffb9d..42511f249bb 100644 --- a/tensorflow/core/kernels/adjust_hsv_gpu.cu.h +++ b/tensorflow/core/kernels/image/adjust_hsv_gpu.cu.h @@ -11,8 +11,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_ADJUST_HSV_GPU_CU_H_ -#define TENSORFLOW_CORE_KERNELS_ADJUST_HSV_GPU_CU_H_ +#ifndef TENSORFLOW_CORE_KERNELS_IMAGEADJUST_HSV_GPU_CU_H_ +#define TENSORFLOW_CORE_KERNELS_IMAGEADJUST_HSV_GPU_CU_H_ #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM @@ -142,4 +142,4 @@ __global__ void adjust_hsv_nhwc( } // namespace tensorflow #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#endif // TENSORFLOW_CORE_KERNELS_ADJUST_HSV_GPU_CU_H_ +#endif // TENSORFLOW_CORE_KERNELS_IMAGEADJUST_HSV_GPU_CU_H_ diff --git a/tensorflow/core/kernels/adjust_hue_op.cc b/tensorflow/core/kernels/image/adjust_hue_op.cc similarity index 99% rename from tensorflow/core/kernels/adjust_hue_op.cc rename to tensorflow/core/kernels/image/adjust_hue_op.cc index c1993029ac6..764665be48e 100644 --- a/tensorflow/core/kernels/adjust_hue_op.cc +++ b/tensorflow/core/kernels/image/adjust_hue_op.cc @@ -17,9 +17,9 @@ limitations under the License. #define EIGEN_USE_GPU #endif -#include +#include "tensorflow/core/kernels/image/adjust_hue_op.h" -#include "tensorflow/core/kernels/adjust_hue_op.h" +#include #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" diff --git a/tensorflow/core/kernels/adjust_hue_op.h b/tensorflow/core/kernels/image/adjust_hue_op.h similarity index 88% rename from tensorflow/core/kernels/adjust_hue_op.h rename to tensorflow/core/kernels/image/adjust_hue_op.h index edaf7f538e3..6a5758a44fb 100644 --- a/tensorflow/core/kernels/adjust_hue_op.h +++ b/tensorflow/core/kernels/image/adjust_hue_op.h @@ -11,8 +11,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_ADJUST_HUE_OP_H_ -#define TENSORFLOW_CORE_KERNELS_ADJUST_HUE_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_IMAGEADJUST_HUE_OP_H_ +#define TENSORFLOW_CORE_KERNELS_IMAGEADJUST_HUE_OP_H_ #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define EIGEN_USE_GPU @@ -38,4 +38,4 @@ struct AdjustHueGPU { } // namespace tensorflow #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#endif // TENSORFLOW_CORE_KERNELS_ADJUST_HUE_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_IMAGEADJUST_HUE_OP_H_ diff --git a/tensorflow/core/kernels/adjust_hue_op_gpu.cu.cc b/tensorflow/core/kernels/image/adjust_hue_op_gpu.cu.cc similarity index 93% rename from tensorflow/core/kernels/adjust_hue_op_gpu.cu.cc rename to tensorflow/core/kernels/image/adjust_hue_op_gpu.cu.cc index 174ca0002af..10c1ddb6aaf 100644 --- a/tensorflow/core/kernels/adjust_hue_op_gpu.cu.cc +++ b/tensorflow/core/kernels/image/adjust_hue_op_gpu.cu.cc @@ -16,8 +16,8 @@ limitations under the License. #define EIGEN_USE_GPU -#include "tensorflow/core/kernels/adjust_hsv_gpu.cu.h" -#include "tensorflow/core/kernels/adjust_hue_op.h" +#include "tensorflow/core/kernels/image/adjust_hsv_gpu.cu.h" +#include "tensorflow/core/kernels/image/adjust_hue_op.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/adjust_saturation_op.cc b/tensorflow/core/kernels/image/adjust_saturation_op.cc similarity index 99% rename from tensorflow/core/kernels/adjust_saturation_op.cc rename to tensorflow/core/kernels/image/adjust_saturation_op.cc index d1fc9d349be..41b0988cc50 100644 --- a/tensorflow/core/kernels/adjust_saturation_op.cc +++ b/tensorflow/core/kernels/image/adjust_saturation_op.cc @@ -18,8 +18,10 @@ limitations under the License. #define EIGEN_USE_GPU #endif -#include "tensorflow/core/kernels/adjust_saturation_op.h" +#include "tensorflow/core/kernels/image/adjust_saturation_op.h" + #include + #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" diff --git a/tensorflow/core/kernels/adjust_saturation_op.h b/tensorflow/core/kernels/image/adjust_saturation_op.h similarity index 87% rename from tensorflow/core/kernels/adjust_saturation_op.h rename to tensorflow/core/kernels/image/adjust_saturation_op.h index 0117a48ead8..4a1a619e1fd 100644 --- a/tensorflow/core/kernels/adjust_saturation_op.h +++ b/tensorflow/core/kernels/image/adjust_saturation_op.h @@ -11,8 +11,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_ADJUST_SATURATION_OP_H_ -#define TENSORFLOW_CORE_KERNELS_ADJUST_SATURATION_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_IMAGEADJUST_SATURATION_OP_H_ +#define TENSORFLOW_CORE_KERNELS_IMAGEADJUST_SATURATION_OP_H_ #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define EIGEN_USE_GPU @@ -38,4 +38,4 @@ struct AdjustSaturationGPU { } // namespace tensorflow #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM -#endif // TENSORFLOW_CORE_KERNELS_ADJUST_SATURATION_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_IMAGEADJUST_SATURATION_OP_H_ diff --git a/tensorflow/core/kernels/adjust_saturation_op_gpu.cu.cc b/tensorflow/core/kernels/image/adjust_saturation_op_gpu.cu.cc similarity index 93% rename from tensorflow/core/kernels/adjust_saturation_op_gpu.cu.cc rename to tensorflow/core/kernels/image/adjust_saturation_op_gpu.cu.cc index c2ef9a4d273..59541e41b46 100644 --- a/tensorflow/core/kernels/adjust_saturation_op_gpu.cu.cc +++ b/tensorflow/core/kernels/image/adjust_saturation_op_gpu.cu.cc @@ -16,8 +16,8 @@ limitations under the License. #define EIGEN_USE_GPU -#include "tensorflow/core/kernels/adjust_hsv_gpu.cu.h" -#include "tensorflow/core/kernels/adjust_saturation_op.h" +#include "tensorflow/core/kernels/image/adjust_hsv_gpu.cu.h" +#include "tensorflow/core/kernels/image/adjust_saturation_op.h" #include "tensorflow/core/util/gpu_kernel_helper.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/attention_ops.cc b/tensorflow/core/kernels/image/attention_ops.cc similarity index 100% rename from tensorflow/core/kernels/attention_ops.cc rename to tensorflow/core/kernels/image/attention_ops.cc diff --git a/tensorflow/core/kernels/colorspace_op.cc b/tensorflow/core/kernels/image/colorspace_op.cc similarity index 99% rename from tensorflow/core/kernels/colorspace_op.cc rename to tensorflow/core/kernels/image/colorspace_op.cc index 6c817f73058..a3164bb582d 100644 --- a/tensorflow/core/kernels/colorspace_op.cc +++ b/tensorflow/core/kernels/image/colorspace_op.cc @@ -16,6 +16,8 @@ limitations under the License. // See docs in ../ops/array_ops.cc. #define EIGEN_USE_THREADS +#include "tensorflow/core/kernels/image/colorspace_op.h" + #include #include @@ -26,7 +28,6 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/colorspace_op.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/core/kernels/colorspace_op.h b/tensorflow/core/kernels/image/colorspace_op.h similarity index 95% rename from tensorflow/core/kernels/colorspace_op.h rename to tensorflow/core/kernels/image/colorspace_op.h index 4de14bc3391..486aa1f5dca 100644 --- a/tensorflow/core/kernels/colorspace_op.h +++ b/tensorflow/core/kernels/image/colorspace_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_COLORSPACE_OP_H_ -#define TENSORFLOW_CORE_KERNELS_COLORSPACE_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_IMAGECOLORSPACE_OP_H_ +#define TENSORFLOW_CORE_KERNELS_IMAGECOLORSPACE_OP_H_ #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor_shape.h" @@ -91,4 +91,4 @@ struct HSVToRGB { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_COLORSPACE_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_IMAGECOLORSPACE_OP_H_ diff --git a/tensorflow/core/kernels/colorspace_op_gpu.cu.cc b/tensorflow/core/kernels/image/colorspace_op_gpu.cu.cc similarity index 95% rename from tensorflow/core/kernels/colorspace_op_gpu.cu.cc rename to tensorflow/core/kernels/image/colorspace_op_gpu.cu.cc index 227490a2056..c49698e4c04 100644 --- a/tensorflow/core/kernels/colorspace_op_gpu.cu.cc +++ b/tensorflow/core/kernels/image/colorspace_op_gpu.cu.cc @@ -19,7 +19,7 @@ limitations under the License. #define EIGEN_USE_GPU #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/colorspace_op.h" +#include "tensorflow/core/kernels/image/colorspace_op.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/colorspace_op_test.cc b/tensorflow/core/kernels/image/colorspace_op_test.cc similarity index 100% rename from tensorflow/core/kernels/colorspace_op_test.cc rename to tensorflow/core/kernels/image/colorspace_op_test.cc diff --git a/tensorflow/core/kernels/crop_and_resize_op.cc b/tensorflow/core/kernels/image/crop_and_resize_op.cc similarity index 99% rename from tensorflow/core/kernels/crop_and_resize_op.cc rename to tensorflow/core/kernels/image/crop_and_resize_op.cc index 23058788a4b..1979b0514c6 100644 --- a/tensorflow/core/kernels/crop_and_resize_op.cc +++ b/tensorflow/core/kernels/image/crop_and_resize_op.cc @@ -17,7 +17,7 @@ limitations under the License. #define EIGEN_USE_THREADS -#include "tensorflow/core/kernels/crop_and_resize_op.h" +#include "tensorflow/core/kernels/image/crop_and_resize_op.h" #include #include diff --git a/tensorflow/core/kernels/crop_and_resize_op.h b/tensorflow/core/kernels/image/crop_and_resize_op.h similarity index 93% rename from tensorflow/core/kernels/crop_and_resize_op.h rename to tensorflow/core/kernels/image/crop_and_resize_op.h index 66ff695d9ce..c26380e395c 100644 --- a/tensorflow/core/kernels/crop_and_resize_op.h +++ b/tensorflow/core/kernels/image/crop_and_resize_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_CROP_AND_RESIZE_OP_H_ -#define TENSORFLOW_CORE_KERNELS_CROP_AND_RESIZE_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_IMAGECROP_AND_RESIZE_OP_H_ +#define TENSORFLOW_CORE_KERNELS_IMAGECROP_AND_RESIZE_OP_H_ #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/numeric_types.h" @@ -69,4 +69,4 @@ struct CheckValidBoxIndexHelper { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_CROP_AND_RESIZE_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_IMAGECROP_AND_RESIZE_OP_H_ diff --git a/tensorflow/core/kernels/crop_and_resize_op_benchmark_test.cc b/tensorflow/core/kernels/image/crop_and_resize_op_benchmark_test.cc similarity index 100% rename from tensorflow/core/kernels/crop_and_resize_op_benchmark_test.cc rename to tensorflow/core/kernels/image/crop_and_resize_op_benchmark_test.cc diff --git a/tensorflow/core/kernels/crop_and_resize_op_gpu.cu.cc b/tensorflow/core/kernels/image/crop_and_resize_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/crop_and_resize_op_gpu.cu.cc rename to tensorflow/core/kernels/image/crop_and_resize_op_gpu.cu.cc index e64a055503f..e4bbbfa108a 100644 --- a/tensorflow/core/kernels/crop_and_resize_op_gpu.cu.cc +++ b/tensorflow/core/kernels/image/crop_and_resize_op_gpu.cu.cc @@ -21,7 +21,7 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/kernels/crop_and_resize_op.h" +#include "tensorflow/core/kernels/image/crop_and_resize_op.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/gpu_kernel_helper.h" diff --git a/tensorflow/core/kernels/crop_and_resize_op_test.cc b/tensorflow/core/kernels/image/crop_and_resize_op_test.cc similarity index 100% rename from tensorflow/core/kernels/crop_and_resize_op_test.cc rename to tensorflow/core/kernels/image/crop_and_resize_op_test.cc diff --git a/tensorflow/core/kernels/decode_image_op.cc b/tensorflow/core/kernels/image/decode_image_op.cc similarity index 100% rename from tensorflow/core/kernels/decode_image_op.cc rename to tensorflow/core/kernels/image/decode_image_op.cc diff --git a/tensorflow/core/kernels/draw_bounding_box_op.cc b/tensorflow/core/kernels/image/draw_bounding_box_op.cc similarity index 100% rename from tensorflow/core/kernels/draw_bounding_box_op.cc rename to tensorflow/core/kernels/image/draw_bounding_box_op.cc diff --git a/tensorflow/core/kernels/encode_jpeg_op.cc b/tensorflow/core/kernels/image/encode_jpeg_op.cc similarity index 100% rename from tensorflow/core/kernels/encode_jpeg_op.cc rename to tensorflow/core/kernels/image/encode_jpeg_op.cc diff --git a/tensorflow/core/kernels/encode_jpeg_op_test.cc b/tensorflow/core/kernels/image/encode_jpeg_op_test.cc similarity index 100% rename from tensorflow/core/kernels/encode_jpeg_op_test.cc rename to tensorflow/core/kernels/image/encode_jpeg_op_test.cc diff --git a/tensorflow/core/kernels/encode_png_op.cc b/tensorflow/core/kernels/image/encode_png_op.cc similarity index 100% rename from tensorflow/core/kernels/encode_png_op.cc rename to tensorflow/core/kernels/image/encode_png_op.cc diff --git a/tensorflow/core/kernels/extract_image_patches_op.cc b/tensorflow/core/kernels/image/extract_image_patches_op.cc similarity index 98% rename from tensorflow/core/kernels/extract_image_patches_op.cc rename to tensorflow/core/kernels/image/extract_image_patches_op.cc index 4e87dfc93a4..a7890090acb 100644 --- a/tensorflow/core/kernels/extract_image_patches_op.cc +++ b/tensorflow/core/kernels/image/extract_image_patches_op.cc @@ -18,7 +18,7 @@ limitations under the License. #define USE_EIGEN_TENSOR #define EIGEN_USE_THREADS -#include "tensorflow/core/kernels/extract_image_patches_op.h" +#include "tensorflow/core/kernels/image/extract_image_patches_op.h" #include diff --git a/tensorflow/core/kernels/extract_image_patches_op.h b/tensorflow/core/kernels/image/extract_image_patches_op.h similarity index 91% rename from tensorflow/core/kernels/extract_image_patches_op.h rename to tensorflow/core/kernels/image/extract_image_patches_op.h index 64b8c0338bd..ba952275c3e 100644 --- a/tensorflow/core/kernels/extract_image_patches_op.h +++ b/tensorflow/core/kernels/image/extract_image_patches_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_EXTRACT_IMAGE_PATCHES_OP_H_ -#define TENSORFLOW_CORE_KERNELS_EXTRACT_IMAGE_PATCHES_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_IMAGEEXTRACT_IMAGE_PATCHES_OP_H_ +#define TENSORFLOW_CORE_KERNELS_IMAGEEXTRACT_IMAGE_PATCHES_OP_H_ #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor_shape.h" @@ -53,4 +53,4 @@ struct ExtractImagePatchesForward { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_EXTRACT_IMAGE_PATCHES_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_IMAGEEXTRACT_IMAGE_PATCHES_OP_H_ diff --git a/tensorflow/core/kernels/extract_image_patches_op_gpu.cu.cc b/tensorflow/core/kernels/image/extract_image_patches_op_gpu.cu.cc similarity index 94% rename from tensorflow/core/kernels/extract_image_patches_op_gpu.cu.cc rename to tensorflow/core/kernels/image/extract_image_patches_op_gpu.cu.cc index e6a49da7fd2..37b9c9bda32 100644 --- a/tensorflow/core/kernels/extract_image_patches_op_gpu.cu.cc +++ b/tensorflow/core/kernels/image/extract_image_patches_op_gpu.cu.cc @@ -19,7 +19,7 @@ limitations under the License. #define EIGEN_USE_GPU #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/extract_image_patches_op.h" +#include "tensorflow/core/kernels/image/extract_image_patches_op.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/extract_jpeg_shape_op.cc b/tensorflow/core/kernels/image/extract_jpeg_shape_op.cc similarity index 100% rename from tensorflow/core/kernels/extract_jpeg_shape_op.cc rename to tensorflow/core/kernels/image/extract_jpeg_shape_op.cc diff --git a/tensorflow/core/kernels/extract_volume_patches_op.cc b/tensorflow/core/kernels/image/extract_volume_patches_op.cc similarity index 99% rename from tensorflow/core/kernels/extract_volume_patches_op.cc rename to tensorflow/core/kernels/image/extract_volume_patches_op.cc index 3f003b6f7f6..e48e7602afa 100644 --- a/tensorflow/core/kernels/extract_volume_patches_op.cc +++ b/tensorflow/core/kernels/image/extract_volume_patches_op.cc @@ -24,7 +24,7 @@ when rates are to be added. #define USE_EIGEN_TENSOR #define EIGEN_USE_THREADS -#include "tensorflow/core/kernels/extract_volume_patches_op.h" +#include "tensorflow/core/kernels/image/extract_volume_patches_op.h" #include diff --git a/tensorflow/core/kernels/extract_volume_patches_op.h b/tensorflow/core/kernels/image/extract_volume_patches_op.h similarity index 92% rename from tensorflow/core/kernels/extract_volume_patches_op.h rename to tensorflow/core/kernels/image/extract_volume_patches_op.h index 7e0502b7707..f20ee6a6ade 100644 --- a/tensorflow/core/kernels/extract_volume_patches_op.h +++ b/tensorflow/core/kernels/image/extract_volume_patches_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_KERNELS_EXTRACT_VOLUME_PATCHES_OP_H_ -#define TENSORFLOW_KERNELS_EXTRACT_VOLUME_PATCHES_OP_H_ +#ifndef TENSORFLOW_KERNELS_IMAGE_EXTRACT_VOLUME_PATCHES_OP_H_ +#define TENSORFLOW_KERNELS_IMAGE_EXTRACT_VOLUME_PATCHES_OP_H_ #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" @@ -55,4 +55,4 @@ struct ExtractVolumePatchesForward { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_KERNELS_EXTRACT_VOLUME_PATCHES_OP_H_ +#endif // TENSORFLOW_KERNELS_IMAGE_EXTRACT_VOLUME_PATCHES_OP_H_ diff --git a/tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc b/tensorflow/core/kernels/image/extract_volume_patches_op_gpu.cu.cc similarity index 94% rename from tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc rename to tensorflow/core/kernels/image/extract_volume_patches_op_gpu.cu.cc index df8b6f8bfa2..379907712a8 100644 --- a/tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc +++ b/tensorflow/core/kernels/image/extract_volume_patches_op_gpu.cu.cc @@ -17,8 +17,8 @@ limitations under the License. #define EIGEN_USE_GPU -#include "tensorflow/core/kernels/extract_volume_patches_op.h" #include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/kernels/image/extract_volume_patches_op.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/generate_box_proposals_op.cu.cc b/tensorflow/core/kernels/image/generate_box_proposals_op.cu.cc similarity index 99% rename from tensorflow/core/kernels/generate_box_proposals_op.cu.cc rename to tensorflow/core/kernels/image/generate_box_proposals_op.cu.cc index b862c42d299..721d190fa22 100644 --- a/tensorflow/core/kernels/generate_box_proposals_op.cu.cc +++ b/tensorflow/core/kernels/image/generate_box_proposals_op.cu.cc @@ -24,7 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/kernels/gpu_prim.h" -#include "tensorflow/core/kernels/non_max_suppression_op.h" +#include "tensorflow/core/kernels/image/non_max_suppression_op.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor.h" diff --git a/tensorflow/core/kernels/image_ops.cc b/tensorflow/core/kernels/image/image_ops.cc similarity index 99% rename from tensorflow/core/kernels/image_ops.cc rename to tensorflow/core/kernels/image/image_ops.cc index 8792372b6ff..f121fb81654 100644 --- a/tensorflow/core/kernels/image_ops.cc +++ b/tensorflow/core/kernels/image/image_ops.cc @@ -19,7 +19,7 @@ limitations under the License. #define EIGEN_USE_GPU #endif // GOOGLE_CUDA -#include "tensorflow/core/kernels/image_ops.h" +#include "tensorflow/core/kernels/image/image_ops.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" diff --git a/tensorflow/core/kernels/image_ops.h b/tensorflow/core/kernels/image/image_ops.h similarity index 98% rename from tensorflow/core/kernels/image_ops.h rename to tensorflow/core/kernels/image/image_ops.h index e77fcbbd56a..70b47e181df 100644 --- a/tensorflow/core/kernels/image_ops.h +++ b/tensorflow/core/kernels/image/image_ops.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_IMAGE_OPS_H_ -#define TENSORFLOW_CORE_KERNELS_IMAGE_OPS_H_ +#ifndef TENSORFLOW_CORE_KERNELS_IMAGE_IMAGE_OPS_H_ +#define TENSORFLOW_CORE_KERNELS_IMAGE_IMAGE_OPS_H_ // See docs in ../ops/image_ops.cc. @@ -255,4 +255,4 @@ struct FillProjectiveTransform { } // end namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_IMAGE_OPS_H_ +#endif // TENSORFLOW_CORE_KERNELS_IMAGE_IMAGE_OPS_H_ diff --git a/tensorflow/core/kernels/image_ops_gpu.cu.cc b/tensorflow/core/kernels/image/image_ops_gpu.cu.cc similarity index 96% rename from tensorflow/core/kernels/image_ops_gpu.cu.cc rename to tensorflow/core/kernels/image/image_ops_gpu.cu.cc index 827fb493e4c..dd94559ffd7 100644 --- a/tensorflow/core/kernels/image_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/image/image_ops_gpu.cu.cc @@ -19,7 +19,7 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/image_ops.h" +#include "tensorflow/core/kernels/image/image_ops.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/mirror_pad_op.cc b/tensorflow/core/kernels/image/mirror_pad_op.cc similarity index 99% rename from tensorflow/core/kernels/mirror_pad_op.cc rename to tensorflow/core/kernels/image/mirror_pad_op.cc index 20211c88c8b..e22b1f1adbf 100644 --- a/tensorflow/core/kernels/mirror_pad_op.cc +++ b/tensorflow/core/kernels/image/mirror_pad_op.cc @@ -17,11 +17,11 @@ limitations under the License. #define EIGEN_USE_THREADS -#include "tensorflow/core/kernels/mirror_pad_op.h" +#include "tensorflow/core/kernels/image/mirror_pad_op.h" + #include #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" - #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" diff --git a/tensorflow/core/kernels/mirror_pad_op.h b/tensorflow/core/kernels/image/mirror_pad_op.h similarity index 99% rename from tensorflow/core/kernels/mirror_pad_op.h rename to tensorflow/core/kernels/image/mirror_pad_op.h index 23ab574b8b6..8a8f84b7a64 100644 --- a/tensorflow/core/kernels/mirror_pad_op.h +++ b/tensorflow/core/kernels/image/mirror_pad_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_MIRROR_PAD_OP_H_ -#define TENSORFLOW_CORE_KERNELS_MIRROR_PAD_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_IMAGE_MIRROR_PAD_OP_H_ +#define TENSORFLOW_CORE_KERNELS_IMAGE_MIRROR_PAD_OP_H_ #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor_types.h" @@ -444,4 +444,4 @@ struct MirrorPadGrad { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_MIRROR_PAD_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_IMAGE_MIRROR_PAD_OP_H_ diff --git a/tensorflow/core/kernels/mirror_pad_op_benchmark_test.cc b/tensorflow/core/kernels/image/mirror_pad_op_benchmark_test.cc similarity index 100% rename from tensorflow/core/kernels/mirror_pad_op_benchmark_test.cc rename to tensorflow/core/kernels/image/mirror_pad_op_benchmark_test.cc diff --git a/tensorflow/core/kernels/mirror_pad_op_cpu_impl.h b/tensorflow/core/kernels/image/mirror_pad_op_cpu_impl.h similarity index 83% rename from tensorflow/core/kernels/mirror_pad_op_cpu_impl.h rename to tensorflow/core/kernels/image/mirror_pad_op_cpu_impl.h index 45e6676e5a6..7a7c263c526 100644 --- a/tensorflow/core/kernels/mirror_pad_op_cpu_impl.h +++ b/tensorflow/core/kernels/image/mirror_pad_op_cpu_impl.h @@ -13,13 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_MIRROR_PAD_OP_CPU_IMPL_H_ -#define TENSORFLOW_CORE_KERNELS_MIRROR_PAD_OP_CPU_IMPL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_IMAGE_MIRROR_PAD_OP_CPU_IMPL_H_ +#define TENSORFLOW_CORE_KERNELS_IMAGE_MIRROR_PAD_OP_CPU_IMPL_H_ +#if CPU_PROVIDED_IXDIM #define EIGEN_USE_THREADS #include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/kernels/mirror_pad_op.h" +#include "tensorflow/core/kernels/image/mirror_pad_op.h" namespace tensorflow { @@ -39,7 +40,7 @@ TF_CALL_tstring(DEFINE_CPU_SPECS); CPU_PROVIDED_IXDIM>; TF_CALL_NUMBER_TYPES(DEFINE_CPU_SPECS); #undef DEFINE_CPU_SPECS - } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_MIRROR_PAD_OP_CPU_IMPL_H_ +#endif // CPU_PROVIDED_IXDIM +#endif // TENSORFLOW_CORE_KERNELS_IMAGE_MIRROR_PAD_OP_CPU_IMPL_H_ diff --git a/tensorflow/core/kernels/mirror_pad_op_cpu_impl_1.cc b/tensorflow/core/kernels/image/mirror_pad_op_cpu_impl_1.cc similarity index 91% rename from tensorflow/core/kernels/mirror_pad_op_cpu_impl_1.cc rename to tensorflow/core/kernels/image/mirror_pad_op_cpu_impl_1.cc index 140c487221f..ad64170aa0f 100644 --- a/tensorflow/core/kernels/mirror_pad_op_cpu_impl_1.cc +++ b/tensorflow/core/kernels/image/mirror_pad_op_cpu_impl_1.cc @@ -14,5 +14,5 @@ limitations under the License. ==============================================================================*/ #define CPU_PROVIDED_IXDIM 1 -#include "tensorflow/core/kernels/mirror_pad_op_cpu_impl.h" +#include "tensorflow/core/kernels/image/mirror_pad_op_cpu_impl.h" #undef CPU_PROVIDED_IXDIM diff --git a/tensorflow/core/kernels/mirror_pad_op_cpu_impl_2.cc b/tensorflow/core/kernels/image/mirror_pad_op_cpu_impl_2.cc similarity index 91% rename from tensorflow/core/kernels/mirror_pad_op_cpu_impl_2.cc rename to tensorflow/core/kernels/image/mirror_pad_op_cpu_impl_2.cc index d67f7754e1d..76096f78030 100644 --- a/tensorflow/core/kernels/mirror_pad_op_cpu_impl_2.cc +++ b/tensorflow/core/kernels/image/mirror_pad_op_cpu_impl_2.cc @@ -14,5 +14,5 @@ limitations under the License. ==============================================================================*/ #define CPU_PROVIDED_IXDIM 2 -#include "tensorflow/core/kernels/mirror_pad_op_cpu_impl.h" +#include "tensorflow/core/kernels/image/mirror_pad_op_cpu_impl.h" #undef CPU_PROVIDED_IXDIM diff --git a/tensorflow/core/kernels/mirror_pad_op_cpu_impl_3.cc b/tensorflow/core/kernels/image/mirror_pad_op_cpu_impl_3.cc similarity index 91% rename from tensorflow/core/kernels/mirror_pad_op_cpu_impl_3.cc rename to tensorflow/core/kernels/image/mirror_pad_op_cpu_impl_3.cc index 096547f1f9c..3c29e87bc45 100644 --- a/tensorflow/core/kernels/mirror_pad_op_cpu_impl_3.cc +++ b/tensorflow/core/kernels/image/mirror_pad_op_cpu_impl_3.cc @@ -14,5 +14,5 @@ limitations under the License. ==============================================================================*/ #define CPU_PROVIDED_IXDIM 3 -#include "tensorflow/core/kernels/mirror_pad_op_cpu_impl.h" +#include "tensorflow/core/kernels/image/mirror_pad_op_cpu_impl.h" #undef CPU_PROVIDED_IXDIM diff --git a/tensorflow/core/kernels/mirror_pad_op_cpu_impl_4.cc b/tensorflow/core/kernels/image/mirror_pad_op_cpu_impl_4.cc similarity index 91% rename from tensorflow/core/kernels/mirror_pad_op_cpu_impl_4.cc rename to tensorflow/core/kernels/image/mirror_pad_op_cpu_impl_4.cc index 5a7455f3c07..5d1a3400054 100644 --- a/tensorflow/core/kernels/mirror_pad_op_cpu_impl_4.cc +++ b/tensorflow/core/kernels/image/mirror_pad_op_cpu_impl_4.cc @@ -14,5 +14,5 @@ limitations under the License. ==============================================================================*/ #define CPU_PROVIDED_IXDIM 4 -#include "tensorflow/core/kernels/mirror_pad_op_cpu_impl.h" +#include "tensorflow/core/kernels/image/mirror_pad_op_cpu_impl.h" #undef CPU_PROVIDED_IXDIM diff --git a/tensorflow/core/kernels/mirror_pad_op_cpu_impl_5.cc b/tensorflow/core/kernels/image/mirror_pad_op_cpu_impl_5.cc similarity index 91% rename from tensorflow/core/kernels/mirror_pad_op_cpu_impl_5.cc rename to tensorflow/core/kernels/image/mirror_pad_op_cpu_impl_5.cc index ed2db03a8f5..71a6c9307c6 100644 --- a/tensorflow/core/kernels/mirror_pad_op_cpu_impl_5.cc +++ b/tensorflow/core/kernels/image/mirror_pad_op_cpu_impl_5.cc @@ -14,5 +14,5 @@ limitations under the License. ==============================================================================*/ #define CPU_PROVIDED_IXDIM 5 -#include "tensorflow/core/kernels/mirror_pad_op_cpu_impl.h" +#include "tensorflow/core/kernels/image/mirror_pad_op_cpu_impl.h" #undef CPU_PROVIDED_IXDIM diff --git a/tensorflow/core/kernels/mirror_pad_op_gpu.cu.cc b/tensorflow/core/kernels/image/mirror_pad_op_gpu.cu.cc similarity index 97% rename from tensorflow/core/kernels/mirror_pad_op_gpu.cu.cc rename to tensorflow/core/kernels/image/mirror_pad_op_gpu.cu.cc index ac89599714d..f0afc707fc6 100644 --- a/tensorflow/core/kernels/mirror_pad_op_gpu.cu.cc +++ b/tensorflow/core/kernels/image/mirror_pad_op_gpu.cu.cc @@ -17,9 +17,8 @@ limitations under the License. #define EIGEN_USE_GPU -#include "tensorflow/core/kernels/mirror_pad_op.h" - #include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/kernels/image/mirror_pad_op.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/mirror_pad_op_test.cc b/tensorflow/core/kernels/image/mirror_pad_op_test.cc similarity index 100% rename from tensorflow/core/kernels/mirror_pad_op_test.cc rename to tensorflow/core/kernels/image/mirror_pad_op_test.cc diff --git a/tensorflow/core/kernels/non_max_suppression_op.cc b/tensorflow/core/kernels/image/non_max_suppression_op.cc similarity index 99% rename from tensorflow/core/kernels/non_max_suppression_op.cc rename to tensorflow/core/kernels/image/non_max_suppression_op.cc index 20ae3a2e0d0..701753a81d6 100644 --- a/tensorflow/core/kernels/non_max_suppression_op.cc +++ b/tensorflow/core/kernels/image/non_max_suppression_op.cc @@ -17,7 +17,7 @@ limitations under the License. #define EIGEN_USE_THREADS -#include "tensorflow/core/kernels/non_max_suppression_op.h" +#include "tensorflow/core/kernels/image/non_max_suppression_op.h" #include #include diff --git a/tensorflow/core/kernels/non_max_suppression_op.cu.cc b/tensorflow/core/kernels/image/non_max_suppression_op.cu.cc similarity index 99% rename from tensorflow/core/kernels/non_max_suppression_op.cu.cc rename to tensorflow/core/kernels/image/non_max_suppression_op.cu.cc index 8ec26ba13d7..37d7d42e438 100644 --- a/tensorflow/core/kernels/non_max_suppression_op.cu.cc +++ b/tensorflow/core/kernels/image/non_max_suppression_op.cu.cc @@ -23,7 +23,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/kernels/gpu_prim.h" -#include "tensorflow/core/kernels/non_max_suppression_op.h" +#include "tensorflow/core/kernels/image/non_max_suppression_op.h" #include "tensorflow/core/util/gpu_kernel_helper.h" #include "tensorflow/core/util/gpu_launch_config.h" #include "tensorflow/stream_executor/stream_executor.h" diff --git a/tensorflow/core/kernels/non_max_suppression_op.h b/tensorflow/core/kernels/image/non_max_suppression_op.h similarity index 92% rename from tensorflow/core/kernels/non_max_suppression_op.h rename to tensorflow/core/kernels/image/non_max_suppression_op.h index 24957c2bbed..d6d3b68b099 100644 --- a/tensorflow/core/kernels/non_max_suppression_op.h +++ b/tensorflow/core/kernels/image/non_max_suppression_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_NON_MAX_SUPPRESSION_OP_H_ -#define TENSORFLOW_CORE_KERNELS_NON_MAX_SUPPRESSION_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_IMAGENON_MAX_SUPPRESSION_OP_H_ +#define TENSORFLOW_CORE_KERNELS_IMAGENON_MAX_SUPPRESSION_OP_H_ #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/numeric_types.h" @@ -59,4 +59,4 @@ Status NmsGpu(const float* d_sorted_boxes_float_ptr, const int num_boxes, } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_NON_MAX_SUPPRESSION_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_IMAGENON_MAX_SUPPRESSION_OP_H_ diff --git a/tensorflow/core/kernels/non_max_suppression_op_benchmark_test.cc b/tensorflow/core/kernels/image/non_max_suppression_op_benchmark_test.cc similarity index 100% rename from tensorflow/core/kernels/non_max_suppression_op_benchmark_test.cc rename to tensorflow/core/kernels/image/non_max_suppression_op_benchmark_test.cc diff --git a/tensorflow/core/kernels/non_max_suppression_op_gpu_test.cc b/tensorflow/core/kernels/image/non_max_suppression_op_gpu_test.cc similarity index 100% rename from tensorflow/core/kernels/non_max_suppression_op_gpu_test.cc rename to tensorflow/core/kernels/image/non_max_suppression_op_gpu_test.cc diff --git a/tensorflow/core/kernels/non_max_suppression_op_test.cc b/tensorflow/core/kernels/image/non_max_suppression_op_test.cc similarity index 100% rename from tensorflow/core/kernels/non_max_suppression_op_test.cc rename to tensorflow/core/kernels/image/non_max_suppression_op_test.cc diff --git a/tensorflow/core/kernels/random_crop_op.cc b/tensorflow/core/kernels/image/random_crop_op.cc similarity index 99% rename from tensorflow/core/kernels/random_crop_op.cc rename to tensorflow/core/kernels/image/random_crop_op.cc index eb7980fa58e..7da97466636 100644 --- a/tensorflow/core/kernels/random_crop_op.cc +++ b/tensorflow/core/kernels/image/random_crop_op.cc @@ -63,6 +63,7 @@ class RandomCropOp : public OpKernel { if ((target_height == height) && (target_width == width)) { *output = context->input(0); } + OP_REQUIRES(context, width >= target_width, errors::FailedPrecondition( "width must be >= target_width: width = ", width, diff --git a/tensorflow/core/kernels/resize_area_op.cc b/tensorflow/core/kernels/image/resize_area_op.cc similarity index 99% rename from tensorflow/core/kernels/resize_area_op.cc rename to tensorflow/core/kernels/image/resize_area_op.cc index 325c5ccade1..00691ae46b0 100644 --- a/tensorflow/core/kernels/resize_area_op.cc +++ b/tensorflow/core/kernels/image/resize_area_op.cc @@ -18,15 +18,16 @@ limitations under the License. #include #include + #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/image_resizer_state.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/util/image_resizer_state.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/resize_area_op_test.cc b/tensorflow/core/kernels/image/resize_area_op_test.cc similarity index 100% rename from tensorflow/core/kernels/resize_area_op_test.cc rename to tensorflow/core/kernels/image/resize_area_op_test.cc diff --git a/tensorflow/core/kernels/resize_bicubic_op.cc b/tensorflow/core/kernels/image/resize_bicubic_op.cc similarity index 99% rename from tensorflow/core/kernels/resize_bicubic_op.cc rename to tensorflow/core/kernels/image/resize_bicubic_op.cc index 48bd1986b7b..89f34cb80f0 100644 --- a/tensorflow/core/kernels/resize_bicubic_op.cc +++ b/tensorflow/core/kernels/image/resize_bicubic_op.cc @@ -17,6 +17,7 @@ limitations under the License. #define EIGEN_USE_THREADS #include + #include #include @@ -26,9 +27,9 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/image_resizer_state.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/util/image_resizer_state.h" namespace tensorflow { namespace { diff --git a/tensorflow/core/kernels/resize_bicubic_op_test.cc b/tensorflow/core/kernels/image/resize_bicubic_op_test.cc similarity index 100% rename from tensorflow/core/kernels/resize_bicubic_op_test.cc rename to tensorflow/core/kernels/image/resize_bicubic_op_test.cc diff --git a/tensorflow/core/kernels/resize_bilinear_op.cc b/tensorflow/core/kernels/image/resize_bilinear_op.cc similarity index 99% rename from tensorflow/core/kernels/resize_bilinear_op.cc rename to tensorflow/core/kernels/image/resize_bilinear_op.cc index a0673fea73d..b9eb650c029 100644 --- a/tensorflow/core/kernels/resize_bilinear_op.cc +++ b/tensorflow/core/kernels/image/resize_bilinear_op.cc @@ -16,22 +16,23 @@ limitations under the License. // See docs in ../ops/image_ops.cc #define EIGEN_USE_THREADS -#include "tensorflow/core/kernels/resize_bilinear_op.h" +#include "tensorflow/core/kernels/image/resize_bilinear_op.h" #ifdef __SSE4_1__ #include #endif #include + #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/image_resizer_state.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/util/image_resizer_state.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/resize_bilinear_op.h b/tensorflow/core/kernels/image/resize_bilinear_op.h similarity index 90% rename from tensorflow/core/kernels/resize_bilinear_op.h rename to tensorflow/core/kernels/image/resize_bilinear_op.h index b4d0066d4f3..34a6b320251 100644 --- a/tensorflow/core/kernels/resize_bilinear_op.h +++ b/tensorflow/core/kernels/image/resize_bilinear_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_RESIZE_BILINEAR_OP_H_ -#define TENSORFLOW_CORE_KERNELS_RESIZE_BILINEAR_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_IMAGERESIZE_BILINEAR_OP_H_ +#define TENSORFLOW_CORE_KERNELS_IMAGERESIZE_BILINEAR_OP_H_ #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/numeric_types.h" @@ -43,4 +43,4 @@ struct ResizeBilinearGrad { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_RESIZE_BILINEAR_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_IMAGERESIZE_BILINEAR_OP_H_ diff --git a/tensorflow/core/kernels/resize_bilinear_op_gpu.cu.cc b/tensorflow/core/kernels/image/resize_bilinear_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/resize_bilinear_op_gpu.cu.cc rename to tensorflow/core/kernels/image/resize_bilinear_op_gpu.cu.cc index 42a3daae116..aa475a4a3af 100644 --- a/tensorflow/core/kernels/resize_bilinear_op_gpu.cu.cc +++ b/tensorflow/core/kernels/image/resize_bilinear_op_gpu.cu.cc @@ -21,7 +21,7 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/kernels/resize_bilinear_op.h" +#include "tensorflow/core/kernels/image/resize_bilinear_op.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/gpu_kernel_helper.h" diff --git a/tensorflow/core/kernels/resize_bilinear_op_test.cc b/tensorflow/core/kernels/image/resize_bilinear_op_test.cc similarity index 100% rename from tensorflow/core/kernels/resize_bilinear_op_test.cc rename to tensorflow/core/kernels/image/resize_bilinear_op_test.cc diff --git a/tensorflow/core/kernels/resize_nearest_neighbor_op.cc b/tensorflow/core/kernels/image/resize_nearest_neighbor_op.cc similarity index 99% rename from tensorflow/core/kernels/resize_nearest_neighbor_op.cc rename to tensorflow/core/kernels/image/resize_nearest_neighbor_op.cc index 4a357333957..a3c6a69a692 100644 --- a/tensorflow/core/kernels/resize_nearest_neighbor_op.cc +++ b/tensorflow/core/kernels/image/resize_nearest_neighbor_op.cc @@ -16,7 +16,7 @@ limitations under the License. // See docs in ../ops/image_ops.cc #define EIGEN_USE_THREADS -#include "tensorflow/core/kernels/resize_nearest_neighbor_op.h" +#include "tensorflow/core/kernels/image/resize_nearest_neighbor_op.h" #include @@ -26,9 +26,9 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/image_resizer_state.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/util/image_resizer_state.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/resize_nearest_neighbor_op.h b/tensorflow/core/kernels/image/resize_nearest_neighbor_op.h similarity index 88% rename from tensorflow/core/kernels/resize_nearest_neighbor_op.h rename to tensorflow/core/kernels/image/resize_nearest_neighbor_op.h index d6b053180ce..db0276477eb 100644 --- a/tensorflow/core/kernels/resize_nearest_neighbor_op.h +++ b/tensorflow/core/kernels/image/resize_nearest_neighbor_op.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_RESIZE_NEAREST_NEIGHBOR_OP_H_ -#define TENSORFLOW_CORE_KERNELS_RESIZE_NEAREST_NEIGHBOR_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_IMAGERESIZE_NEAREST_NEIGHBOR_OP_H_ +#define TENSORFLOW_CORE_KERNELS_IMAGERESIZE_NEAREST_NEIGHBOR_OP_H_ #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/platform/types.h" @@ -42,4 +42,4 @@ struct ResizeNearestNeighborGrad { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_RESIZE_NEAREST_NEIGHBOR_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_IMAGERESIZE_NEAREST_NEIGHBOR_OP_H_ diff --git a/tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.cu.cc b/tensorflow/core/kernels/image/resize_nearest_neighbor_op_gpu.cu.cc similarity index 99% rename from tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.cu.cc rename to tensorflow/core/kernels/image/resize_nearest_neighbor_op_gpu.cu.cc index b6a9c77ba13..50066d5b653 100644 --- a/tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.cu.cc +++ b/tensorflow/core/kernels/image/resize_nearest_neighbor_op_gpu.cu.cc @@ -21,7 +21,7 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/kernels/resize_nearest_neighbor_op.h" +#include "tensorflow/core/kernels/image/resize_nearest_neighbor_op.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/gpu_kernel_helper.h" diff --git a/tensorflow/core/kernels/resize_nearest_neighbor_op_test.cc b/tensorflow/core/kernels/image/resize_nearest_neighbor_op_test.cc similarity index 100% rename from tensorflow/core/kernels/resize_nearest_neighbor_op_test.cc rename to tensorflow/core/kernels/image/resize_nearest_neighbor_op_test.cc diff --git a/tensorflow/core/kernels/resize_op_benchmark_test.cc b/tensorflow/core/kernels/image/resize_op_benchmark_test.cc similarity index 100% rename from tensorflow/core/kernels/resize_op_benchmark_test.cc rename to tensorflow/core/kernels/image/resize_op_benchmark_test.cc diff --git a/tensorflow/core/kernels/sample_distorted_bounding_box_op.cc b/tensorflow/core/kernels/image/sample_distorted_bounding_box_op.cc similarity index 100% rename from tensorflow/core/kernels/sample_distorted_bounding_box_op.cc rename to tensorflow/core/kernels/image/sample_distorted_bounding_box_op.cc diff --git a/tensorflow/core/kernels/sampling_kernels.cc b/tensorflow/core/kernels/image/sampling_kernels.cc similarity index 96% rename from tensorflow/core/kernels/sampling_kernels.cc rename to tensorflow/core/kernels/image/sampling_kernels.cc index 306b8d6a390..ae62a1b2e3d 100644 --- a/tensorflow/core/kernels/sampling_kernels.cc +++ b/tensorflow/core/kernels/image/sampling_kernels.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/sampling_kernels.h" +#include "tensorflow/core/kernels/image/sampling_kernels.h" + #include + #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/strings/str_util.h" diff --git a/tensorflow/core/kernels/sampling_kernels.h b/tensorflow/core/kernels/image/sampling_kernels.h similarity index 100% rename from tensorflow/core/kernels/sampling_kernels.h rename to tensorflow/core/kernels/image/sampling_kernels.h diff --git a/tensorflow/core/kernels/sampling_kernels_test.cc b/tensorflow/core/kernels/image/sampling_kernels_test.cc similarity index 98% rename from tensorflow/core/kernels/sampling_kernels_test.cc rename to tensorflow/core/kernels/image/sampling_kernels_test.cc index 37c2edc14a3..039a785063f 100644 --- a/tensorflow/core/kernels/sampling_kernels_test.cc +++ b/tensorflow/core/kernels/image/sampling_kernels_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/sampling_kernels.h" +#include "tensorflow/core/kernels/image/sampling_kernels.h" #include "tensorflow/core/platform/test.h" diff --git a/tensorflow/core/kernels/scale_and_translate_op.cc b/tensorflow/core/kernels/image/scale_and_translate_op.cc similarity index 99% rename from tensorflow/core/kernels/scale_and_translate_op.cc rename to tensorflow/core/kernels/image/scale_and_translate_op.cc index fff457e55c7..1011af7d19e 100644 --- a/tensorflow/core/kernels/scale_and_translate_op.cc +++ b/tensorflow/core/kernels/image/scale_and_translate_op.cc @@ -16,9 +16,10 @@ limitations under the License. // See docs in ../ops/image_ops.cc #define EIGEN_USE_THREADS -#include "tensorflow/core/kernels/scale_and_translate_op.h" +#include "tensorflow/core/kernels/image/scale_and_translate_op.h" #include + #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/bounds_check.h" #include "tensorflow/core/framework/op_kernel.h" @@ -27,7 +28,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/types.pb.h" -#include "tensorflow/core/kernels/sampling_kernels.h" +#include "tensorflow/core/kernels/image/sampling_kernels.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" diff --git a/tensorflow/core/kernels/scale_and_translate_op.h b/tensorflow/core/kernels/image/scale_and_translate_op.h similarity index 92% rename from tensorflow/core/kernels/scale_and_translate_op.h rename to tensorflow/core/kernels/image/scale_and_translate_op.h index 74bc87ecc7a..9c0650a4c26 100644 --- a/tensorflow/core/kernels/scale_and_translate_op.h +++ b/tensorflow/core/kernels/image/scale_and_translate_op.h @@ -13,14 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_SCALE_AND_TRANSLATE_OP_H_ -#define TENSORFLOW_CORE_KERNELS_SCALE_AND_TRANSLATE_OP_H_ +#ifndef TENSORFLOW_CORE_KERNELS_IMAGESCALE_AND_TRANSLATE_OP_H_ +#define TENSORFLOW_CORE_KERNELS_IMAGESCALE_AND_TRANSLATE_OP_H_ #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/kernels/sampling_kernels.h" +#include "tensorflow/core/kernels/image/sampling_kernels.h" namespace tensorflow { namespace functor { @@ -72,4 +72,4 @@ struct GatherSpans { } // namespace functor } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_SCALE_AND_TRANSLATE_OP_H_ +#endif // TENSORFLOW_CORE_KERNELS_IMAGESCALE_AND_TRANSLATE_OP_H_ diff --git a/tensorflow/core/kernels/scale_and_translate_op_test.cc b/tensorflow/core/kernels/image/scale_and_translate_op_test.cc similarity index 99% rename from tensorflow/core/kernels/scale_and_translate_op_test.cc rename to tensorflow/core/kernels/image/scale_and_translate_op_test.cc index 412a1012686..2959f93a266 100644 --- a/tensorflow/core/kernels/scale_and_translate_op_test.cc +++ b/tensorflow/core/kernels/image/scale_and_translate_op_test.cc @@ -21,9 +21,9 @@ limitations under the License. #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/kernels/image/sampling_kernels.h" #include "tensorflow/core/kernels/ops_testutil.h" #include "tensorflow/core/kernels/ops_util.h" -#include "tensorflow/core/kernels/sampling_kernels.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/lib/random/simple_philox.h" diff --git a/tensorflow/core/kernels/linalg/BUILD b/tensorflow/core/kernels/linalg/BUILD index c735f58ae51..ab25fad3ec3 100644 --- a/tensorflow/core/kernels/linalg/BUILD +++ b/tensorflow/core/kernels/linalg/BUILD @@ -10,19 +10,6 @@ load( ) load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test") -# Description: -# Op kernel implementations for TensorFlow. -# -# Note: Any test that uses GPU support and which we would like to -# benchmark should be linked statically so that it can be executed -# from a py_binary or cuda_py_test test logger. For such a test, -# append "_gpu" to the test name to invoke the GPU benchmarks. Example: -# -# # for CPU tests -# $ bazel test --config opt //third_party/tensorflow/core/kernels:my_op_test -# # for GPU benchmarks -# $ bazel run --config opt --config=cuda //third_party/tensorflow/core/kernels:my_op_test_gpu -- --benchmarks=.. -# package( default_visibility = [ "//tensorflow:__subpackages__", diff --git a/tensorflow/core/kernels/mkl/BUILD b/tensorflow/core/kernels/mkl/BUILD index 4abeee20e30..16180a5b7bd 100644 --- a/tensorflow/core/kernels/mkl/BUILD +++ b/tensorflow/core/kernels/mkl/BUILD @@ -242,16 +242,8 @@ tf_mkl_kernel_library( name = "mkl_dequantize_op", srcs = ["mkl_dequantize_op.cc"], deps = [ - "//tensorflow/core/kernels:concat_lib_hdrs", - "//tensorflow/core/kernels:conv_ops", - "//tensorflow/core/kernels:cwise_op", - "//tensorflow/core/kernels:eigen_helpers", - "//tensorflow/core/kernels:image_resizer_state", - "//tensorflow/core/kernels:ops_util", - "//tensorflow/core/kernels:pooling_ops", - "//tensorflow/core/kernels:quantization_utils", - "//tensorflow/core/kernels:quantized_ops", - "//tensorflow/core/kernels:transpose_functor", + "//third_party/eigen3", + "@gemmlowp", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -259,8 +251,16 @@ tf_mkl_kernel_library( "//tensorflow/core:math_ops_op_lib", "//tensorflow/core:mkl_graph_util", "//tensorflow/core:nn_ops_op_lib", - "//third_party/eigen3", - "@gemmlowp", + "//tensorflow/core/kernels:concat_lib_hdrs", + "//tensorflow/core/kernels:conv_ops", + "//tensorflow/core/kernels:cwise_op", + "//tensorflow/core/kernels:eigen_helpers", + "//tensorflow/core/kernels:ops_util", + "//tensorflow/core/kernels:pooling_ops", + "//tensorflow/core/kernels:quantization_utils", + "//tensorflow/core/kernels:quantized_ops", + "//tensorflow/core/kernels:transpose_functor", + "//tensorflow/core/util:image_resizer_state", ] + mkl_deps(), ) @@ -373,15 +373,15 @@ tf_mkl_kernel_library( "mkl_requantize_per_channel_op.cc", ], deps = [ + "@gemmlowp", "//tensorflow/core/kernels:concat_lib_hdrs", "//tensorflow/core/kernels:conv_ops", "//tensorflow/core/kernels:eigen_helpers", - "//tensorflow/core/kernels:image_resizer_state", "//tensorflow/core/kernels:meta_support", "//tensorflow/core/kernels:no_op", "//tensorflow/core/kernels:pooling_ops", "//tensorflow/core/kernels:quantization_utils", - "@gemmlowp", + "//tensorflow/core/util:image_resizer_state", ] + MKL_DEPS, ) @@ -412,10 +412,10 @@ tf_cc_test_mkl( "//tensorflow/core/kernels:bias_op", "//tensorflow/core/kernels:conv_ops", "//tensorflow/core/kernels:depthwise_conv_op", - "//tensorflow/core/kernels:image", "//tensorflow/core/kernels:matmul_op", "//tensorflow/core/kernels:pad_op", "//tensorflow/core/kernels:relu_op", + "//tensorflow/core/kernels/image:image", ] + MKL_TEST_DEPS, ) diff --git a/tensorflow/core/kernels/quantized_resize_bilinear_op.cc b/tensorflow/core/kernels/quantized_resize_bilinear_op.cc index 4da56cde547..da0a35a6554 100644 --- a/tensorflow/core/kernels/quantized_resize_bilinear_op.cc +++ b/tensorflow/core/kernels/quantized_resize_bilinear_op.cc @@ -25,9 +25,9 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/kernels/image_resizer_state.h" #include "tensorflow/core/kernels/quantization_utils.h" #include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/util/image_resizer_state.h" namespace tensorflow { diff --git a/tensorflow/core/util/BUILD b/tensorflow/core/util/BUILD index 634a937d1c4..4d2ff9a8058 100644 --- a/tensorflow/core/util/BUILD +++ b/tensorflow/core/util/BUILD @@ -40,11 +40,12 @@ package( licenses = ["notice"], # Apache 2.0 ) -# List of exported proto source files. +# List of exported source files. exports_files( srcs = [ "event.proto", "example_proto_fast_parsing_test.proto", + "image_resizer_state.h", "memmapped_file_system.proto", "saved_tensor_slice.proto", ], @@ -631,8 +632,18 @@ tf_kernel_library( ]), ) -# Tests. +cc_library( + name = "image_resizer_state", + hdrs = ["image_resizer_state.h"], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/kernels:bounds_check", + "//third_party/eigen3", + ], +) +# Tests. tf_cc_test( name = "overflow_test", size = "small", diff --git a/tensorflow/core/kernels/image_resizer_state.h b/tensorflow/core/util/image_resizer_state.h similarity index 98% rename from tensorflow/core/kernels/image_resizer_state.h rename to tensorflow/core/util/image_resizer_state.h index 1b1550fd47a..b302021918d 100644 --- a/tensorflow/core/kernels/image_resizer_state.h +++ b/tensorflow/core/util/image_resizer_state.h @@ -18,12 +18,12 @@ limitations under the License. // reduce code duplication and ensure consistency across the different // resizers, it performs the input validation. -#ifndef TENSORFLOW_CORE_KERNELS_IMAGE_RESIZER_STATE_H_ -#define TENSORFLOW_CORE_KERNELS_IMAGE_RESIZER_STATE_H_ +#ifndef TENSORFLOW_CORE_KERNELS_UTIL_IMAGE_RESIZER_STATE_H_ +#define TENSORFLOW_CORE_KERNELS_UTIL_IMAGE_RESIZER_STATE_H_ #define EIGEN_USE_THREADS - #include + #include #include @@ -228,4 +228,4 @@ struct ImageResizerGradientState { } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_IMAGE_RESIZER_STATE_H_ +#endif // TENSORFLOW_CORE_KERNELS_UTIL_IMAGE_RESIZER_STATE_H_ diff --git a/tensorflow/examples/label_image/BUILD b/tensorflow/examples/label_image/BUILD index a0e5005d45a..7c3a6dca1b2 100644 --- a/tensorflow/examples/label_image/BUILD +++ b/tensorflow/examples/label_image/BUILD @@ -38,7 +38,7 @@ tf_cc_binary( "//tensorflow/core:portable_tensorflow_lib", # cc:android_tensorflow_image_op is for including jpeg/gif/png # decoder to enable real-image evaluation on Android - "//tensorflow/core/kernels:android_tensorflow_image_op", + "//tensorflow/core/kernels/image:android_tensorflow_image_op", ], "//conditions:default": [ "//tensorflow/cc:cc_ops", From a392a47994310363af835146586eb9c3b8f57466 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Tue, 11 Aug 2020 10:59:27 -0700 Subject: [PATCH 2515/2522] [MLIR] Handle while loops correctly in ResourceAliasAnalysis - When arguments pass through the while loop body to results, the existing code was not handling the loop back branch correctly. - Fixed the code to handle loops correctly by propagating resource id's along the loop back edge till the results converge. PiperOrigin-RevId: 326056635 Change-Id: I3dde7f7c9b63be15d47c00ce2996d15e614ecc6c --- .../analysis/resource_alias_analysis.cc | 145 ++++++++++++------ .../analysis/resource_alias_analysis.h | 17 +- .../tests/resource-alias-analysis-test.mlir | 16 +- 3 files changed, 116 insertions(+), 62 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.cc b/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.cc index 53de595eef2..256217b6542 100644 --- a/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.cc +++ b/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.cc @@ -45,8 +45,8 @@ limitations under the License. namespace mlir { namespace TF { +namespace detail { -namespace { //===----------------------------------------------------------------------===// // BacktrackAnalysisInfo //===----------------------------------------------------------------------===// @@ -86,9 +86,6 @@ class BacktrackAnalysisInfo { // Backtracked values indexed by the result number. llvm::SmallVector backtracked_values_; }; -} // namespace - -namespace detail { //===----------------------------------------------------------------------===// // BacktrackAnalysis @@ -169,9 +166,6 @@ Value BacktrackAnalysis::BacktrackValue(Value value) { } return value; } -} // namespace detail - -namespace { // Analyze the region. BacktrackAnalysisInfo::BacktrackAnalysisInfo( @@ -188,6 +182,8 @@ BacktrackAnalysisInfo::BacktrackAnalysisInfo( backtracked_values_.push_back(backtrack_analysis.BacktrackValue(result)); } +namespace { + //===----------------------------------------------------------------------===// // ResourceAliasAnalysisInfo helper functions. //===----------------------------------------------------------------------===// @@ -224,14 +220,13 @@ int64_t GetOrCreateIdForVarHandle(VarHandleOp handle, int64_t* next_id, } // namespace -namespace detail { //===----------------------------------------------------------------------===// // ResourceAliasAnalysisInfo //===----------------------------------------------------------------------===// // Constructs the analysis info by analyzing the given function. ResourceAliasAnalysisInfo::ResourceAliasAnalysisInfo( - FuncOp func_op, const detail::BacktrackAnalysis& backtrack_analysis) { + FuncOp func_op, const BacktrackAnalysis& backtrack_analysis) { // This function populates resource_value_to_ids_ and id_to_resource_values_. int64_t next_unique_id = 0; @@ -293,15 +288,6 @@ ResourceAliasAnalysisInfo::ResourceAliasAnalysisInfo( }); llvm::StringMap var_handle_name_id_map; - auto forward_input_to_output = [&](const Value& operand, - const OpResult& result) { - auto operand_it = resource_value_to_ids_.find(operand); - assert(operand_it != resource_value_to_ids_.end() && - "A resource-type output does not have the corresponding " - "resource-type input."); - for (int64_t id : operand_it->second) AddValueUniqueIDMapping(result, id); - }; - func_op.walk([&](Operation* op) { if (auto var_handle = dyn_cast(op)) { AddValueUniqueIDMapping( @@ -310,36 +296,14 @@ ResourceAliasAnalysisInfo::ResourceAliasAnalysisInfo( &var_handle_name_id_map)); } else if (llvm::isa(op)) { for (auto result : filter_resources(op->getResults())) - forward_input_to_output(op->getOperand(result.getResultNumber()), - result); + PropagateInputToOutput(op->getOperand(result.getResultNumber()), + result); } else if (auto while_op = dyn_cast(op)) { - const auto& body_info = - backtrack_analysis.GetAnalysisForFunc(while_op.body_func()); - // If a result is a passthrough of the body input, use the corresponding - // operand's resource IDs. - for (auto result : filter_resources(while_op.getResults())) { - auto passthrough_arg = body_info.GetArg(result.getResultNumber()); - if (passthrough_arg) { - forward_input_to_output( - while_op.getOperand(passthrough_arg.getValue()), result); - } else { - AddValueUniqueIDMapping(result, kUnknownResourceId); - } - } + AnalyzeWhileLoop(while_op, backtrack_analysis.GetAnalysisForFunc( + while_op.body_func())); } else if (auto while_region = dyn_cast(op)) { - const auto& body_info = - backtrack_analysis.GetAnalysisForRegion(while_region.body()); - // If a result is a passthrough of the body input, use the corresponding - // operand's resource IDs. - for (auto result : filter_resources(while_region.getResults())) { - auto passthrough_arg = body_info.GetArg(result.getResultNumber()); - if (passthrough_arg) { - forward_input_to_output( - while_region.getOperand(passthrough_arg.getValue()), result); - } else { - AddValueUniqueIDMapping(result, kUnknownResourceId); - } - } + AnalyzeWhileLoop(while_region, backtrack_analysis.GetAnalysisForRegion( + while_region.body())); } else if (auto if_op = dyn_cast(op)) { const auto& then_info = backtrack_analysis.GetAnalysisForFunc(if_op.then_func()); @@ -353,8 +317,8 @@ ResourceAliasAnalysisInfo::ResourceAliasAnalysisInfo( if (passthrough_then_arg && passthrough_else_arg) { Value then_operand = if_op.input()[passthrough_then_arg.getValue()]; Value else_operand = if_op.input()[passthrough_else_arg.getValue()]; - forward_input_to_output(then_operand, result); - forward_input_to_output(else_operand, result); + PropagateInputToOutput(then_operand, result); + PropagateInputToOutput(else_operand, result); } else { AddValueUniqueIDMapping(result, kUnknownResourceId); } @@ -374,8 +338,8 @@ ResourceAliasAnalysisInfo::ResourceAliasAnalysisInfo( // IfRegion, it will have been visited earlier and a mapping would // exist for that value. If its computed within the region, then again // a mapping would exist. - forward_input_to_output(then_result, result); - forward_input_to_output(else_result, result); + PropagateInputToOutput(then_result, result); + PropagateInputToOutput(else_result, result); } } else if (auto call = dyn_cast(op)) { FuncOp func = dyn_cast(call.resolveCallable()); @@ -387,7 +351,7 @@ ResourceAliasAnalysisInfo::ResourceAliasAnalysisInfo( for (auto result : filter_resources(op->getResults())) { auto passthrough_arg = func_info.GetArg(result.getResultNumber()); if (passthrough_arg) { - forward_input_to_output( + PropagateInputToOutput( call.getArgOperands()[passthrough_arg.getValue()], result); } else { AddValueUniqueIDMapping(result, kUnknownResourceId); @@ -400,6 +364,85 @@ ResourceAliasAnalysisInfo::ResourceAliasAnalysisInfo( }); } +// Propagates the resource ID's from an input operand to a result. Returns true +// if the mapping changed. +bool ResourceAliasAnalysisInfo::PropagateInputToOutput(const Value& operand, + const OpResult& result) { + auto operand_it = resource_value_to_ids_.find(operand); + assert(operand_it != resource_value_to_ids_.end() && + "A resource-type output does not have the corresponding " + "resource-type input."); + bool change = false; + for (int64_t id : operand_it->second) + change = AddValueUniqueIDMapping(result, id) || change; + return change; +} + +// Analyzes while loops to compute resourceIDs for the loop results. +// +// (1) The base case for the analysis is that if the loop body does not execute +// at all, the resource IDs for each result is the same as the resource IDs +// of the corresponding input. +// (2) If the loop does execute one or more times, then we need to account for +// data flow through the body of the while loop. If result #r is the same +// as arg #a of the loop body (pass through argument), then we can reason +// further, else if the result is not a passthrough, we mark it as unknown. +// (3) For passthrough results, if result #r is the same as arg #a of the loop +// body, after one iteration, result #r = arg #a, so we need to also +// propagate arg #a to result #r. After another iteration, arg #a of the +// loop body will be result #a of the previous iteration. So then we need +// propagate from result #a to result #r. Generalizing, the resource ID +// propagation (for results which are passthrough) looks like: +// +// for r in (0, num_results) : result[r] = arg[r]; +// repeat till no change { +// a = passthrough arg for result #r; +// result[r] += result[a]; +// } +// +void ResourceAliasAnalysisInfo::AnalyzeWhileLoop( + Operation* while_op, const BacktrackAnalysisInfo& body_info) { + // Seed the resource ID's for the results using either the resource ID of the + // passthrough arg, or unknown. We need to perform further analysis if we + // find a passthrough arg which is not the same as corresponding the result #. + llvm::SmallVector, 4> passthrough_args( + while_op->getNumResults()); + bool need_analysis = false; + for (auto result : filter_resources(while_op->getResults())) { + int result_index = result.getResultNumber(); + passthrough_args[result_index] = body_info.GetArg(result_index); + if (passthrough_args[result_index]) { + int passthru_index = passthrough_args[result_index].getValue(); + PropagateInputToOutput(while_op->getOperand(passthru_index), result); + need_analysis |= + !IsUnknownResource(result) && passthru_index != result_index; + } else { + AddValueUniqueIDMapping(result, kUnknownResourceId); + } + } + + if (!need_analysis) return; + + // We found a result that is not unknown and whose passthrough operand index + // is not the same as the result index, which means there is "crosstalk" + // between 2 or more operands. In that case, we do an iterative propagation + // of resource ID's till the results converge. + bool change = true; + while (change) { + change = false; + for (auto result : filter_resources(while_op->getResults())) { + if (IsUnknownResource(result)) continue; + // If this result has a valid passthrough arg, propagate resource ID's + // from the result of the passthrough arg + int result_index = result.getResultNumber(); + int passthru_index = passthrough_args[result_index].getValue(); + change = + PropagateInputToOutput(while_op->getResult(passthru_index), result) || + change; + } + } +} + bool ResourceAliasAnalysisInfo::IsUnknownResource(Value resource) const { auto it = resource_value_to_ids_.find(resource); assert(it != resource_value_to_ids_.end() && !it->getSecond().empty()); diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h b/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h index d9fd693042f..c965b5d7602 100644 --- a/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h +++ b/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.h @@ -35,6 +35,7 @@ namespace mlir { namespace TF { namespace detail { class BacktrackAnalysis; +class BacktrackAnalysisInfo; // Resource alias analysis information for a single function. class ResourceAliasAnalysisInfo { @@ -57,15 +58,25 @@ class ResourceAliasAnalysisInfo { llvm::SmallSetVector GetResourceAliases(Value resource) const; private: - // Maps resource value to unique ID and vice-versa. - void AddValueUniqueIDMapping(Value value, int64_t id) { + // Maps resource value to unique ID and vice-versa. Returns true of the + // mapping has changed. + bool AddValueUniqueIDMapping(Value value, int64_t id) { resource_value_to_ids_[value].insert(id); - id_to_resource_values_[id].insert(value); + return id_to_resource_values_[id].insert(value); } // Returns the set unique Values which map to `id`. const llvm::SmallSetVector& GetUniqueIdResources(int64_t id) const; + // Propagates the resource ID's from an input operand to a result. Returns + // true of the mapping has changed. + bool PropagateInputToOutput(const Value& operand, const OpResult& result); + + // Analyzes while loops to compute resourceID's for the loop results. + // `body_info` is the backtrack analysis info for the loop body. + void AnalyzeWhileLoop(Operation* while_op, + const BacktrackAnalysisInfo& body_info); + // Maps each resource-type value to a set of unique IDs that it could alias. llvm::SmallDenseMap, 8> resource_value_to_ids_; diff --git a/tensorflow/compiler/mlir/tensorflow/tests/resource-alias-analysis-test.mlir b/tensorflow/compiler/mlir/tensorflow/tests/resource-alias-analysis-test.mlir index af63f3312bc..87da399b726 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/resource-alias-analysis-test.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/resource-alias-analysis-test.mlir @@ -112,14 +112,14 @@ func @if_else(%arg0: !tf_res, %arg1: !tf_res) -> (!tf_res, !tf_res, !tf_res) { // CHECK-LABEL: func @while_op_aliasing // expected-remark@below {{Region #0, Arg #0, ID 4 : 1, 4}} -// expected-remark@below {{Region #0, Arg #1, ID 5 : 1, 3, 5}} -// expected-remark@below {{Region #0, Arg #2, ID 6 : 1, 2, 6}} +// expected-remark@below {{Region #0, Arg #1, ID 5 : 1, 2, 3, 5}} +// expected-remark@below {{Region #0, Arg #2, ID 6 : 1, 2, 3, 6}} func @while_op_aliasing(%arg0: !tf_res, %arg1: !tf_res, %arg2: !tf_res) { // expected-remark@below {{Result #0, ID 0 : 0}} %vh0 = "tf.VarHandleOp"() {container = "c", shared_name = "v0"} : () -> !tf_res // expected-remark@below {{Result #0, ID 1 : Unknown}} - // expected-remark@below {{Result #1, ID 2 : 1, 2, 6}} - // expected-remark@below {{Result #2, ID 3 : 1, 3, 5}} + // expected-remark@below {{Result #1, ID 2 : 1, 2, 3, 5, 6}} + // expected-remark@below {{Result #2, ID 3 : 1, 2, 3, 5, 6}} %w:3 = "tf.While"(%arg0, %arg1, %arg2) { body = @while_body, cond = @while_cond, is_stateless = false } : (!tf_res, !tf_res, !tf_res) -> (!tf_res, !tf_res, !tf_res) @@ -205,14 +205,14 @@ func @if_region_aliasing(%arg0: !tf_res, %arg1: !tf_res) { // CHECK-LABEL: func @while_region_aliasing // expected-remark@below {{Region #0, Arg #0, ID 11 : 1, 8, 11}} -// expected-remark@below {{Region #0, Arg #1, ID 12 : 1, 8, 10, 12}} -// expected-remark@below {{Region #0, Arg #2, ID 13 : 1, 8, 9, 13}} +// expected-remark@below {{Region #0, Arg #1, ID 12 : 1, 8, 9, 10, 12}} +// expected-remark@below {{Region #0, Arg #2, ID 13 : 1, 8, 9, 10, 13}} func @while_region_aliasing(%arg0: !tf_res, %arg1: !tf_res, %arg2: !tf_res) { // expected-remark@below {{Result #0, ID 0 : 0, 1, 8}} %vh0 = "tf.VarHandleOp"() {container = "c", shared_name = "v0"} : () -> !tf_res // expected-remark@below {{Result #0, ID 8 : Unknown}} - // expected-remark@below {{Result #1, ID 9 : 1, 8, 9, 13}} - // expected-remark@below {{Result #2, ID 10 : 1, 8, 10, 12}} + // expected-remark@below {{Result #1, ID 9 : 1, 8, 9, 10, 12, 13}} + // expected-remark@below {{Result #2, ID 10 : 1, 8, 9, 10, 12, 13}} // expected-remark@below {{Region #0, Arg #0, ID 2 : 1, 2, 8}} // expected-remark@below {{Region #0, Arg #1, ID 3 : 1, 3, 8}} // expected-remark@below {{Region #0, Arg #2, ID 4 : 1, 4, 8}} From ede136a3ba5c4dad4a6f58cf408ffe8206e1677d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 11 Aug 2020 10:59:38 -0700 Subject: [PATCH 2516/2522] Add a workaround for slowness of tf.while_loop in the default executor when maximum_iterations is set. Fixes #40517. PiperOrigin-RevId: 326056684 Change-Id: I81854d6731a9134b695c704b0f28786091f8239e --- .../python/autograph/operators/control_flow.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/autograph/operators/control_flow.py b/tensorflow/python/autograph/operators/control_flow.py index ef9c3ae6427..f194c446dc0 100644 --- a/tensorflow/python/autograph/operators/control_flow.py +++ b/tensorflow/python/autograph/operators/control_flow.py @@ -81,7 +81,6 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import control_flow_util from tensorflow.python.ops import math_ops from tensorflow.python.ops import tensor_array_ops from tensorflow.python.ops.ragged import ragged_tensor @@ -479,9 +478,7 @@ def _known_len_tf_for_stmt( return control_flow_ops.cond(main_test, extra_test, lambda: False) return main_test - # TODO(b/159186914): Remove. - if not control_flow_util.GraphOrParentsInXlaContext(ops.get_default_graph()): - opts['maximum_iterations'] = n + opts['maximum_iterations'] = n _tf_while_stmt( aug_test, @@ -527,9 +524,7 @@ def _tf_ragged_for_stmt( return control_flow_ops.cond(main_test, extra_test, lambda: False) return main_test - # TODO(b/159186914): Remove. - if not control_flow_util.GraphOrParentsInXlaContext(ops.get_default_graph()): - opts['maximum_iterations'] = n + opts['maximum_iterations'] = n _tf_while_stmt( aug_test, @@ -587,10 +582,8 @@ def _tf_range_for_stmt( main_test = control_flow_ops.cond(main_test, extra_test, lambda: False) return main_test - # TODO(b/134181679): Remove. - if not control_flow_util.GraphOrParentsInXlaContext(ops.get_default_graph()): - opts['maximum_iterations'] = math_ops.cast( - misc.get_range_len(start, limit, delta), dtypes.int32) + opts['maximum_iterations'] = math_ops.cast( + misc.get_range_len(start, limit, delta), dtypes.int32) _tf_while_stmt( aug_test, From b7685e0b65c6056da80122cf053942690b487831 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 11 Aug 2020 11:16:13 -0700 Subject: [PATCH 2517/2522] No external changes. PiperOrigin-RevId: 326060668 Change-Id: I44be7b76ffebddf584b13f6baab1769a98c27321 --- tensorflow/compiler/xla/pjrt/BUILD | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/compiler/xla/pjrt/BUILD b/tensorflow/compiler/xla/pjrt/BUILD index 6e61e0600a0..5b3b75eb352 100644 --- a/tensorflow/compiler/xla/pjrt/BUILD +++ b/tensorflow/compiler/xla/pjrt/BUILD @@ -59,6 +59,10 @@ cc_library( name = "tracked_device_buffer", srcs = ["tracked_device_buffer.cc"], hdrs = ["tracked_device_buffer.h"], + visibility = [ + "//learning/pathways/data_parallel:__pkg__", + "//tensorflow:internal", + ], deps = [ ":event_pool", ":local_device_state", From 47bd06cf2132cee053bfaa1e8d925f12f0c31c75 Mon Sep 17 00:00:00 2001 From: Ken Franko Date: Tue, 11 Aug 2020 11:16:44 -0700 Subject: [PATCH 2518/2522] Don't extract Embedding ops for outside compilation. These ops are rewritten as part of the compile op and shouldn't be marked for outside compilation. PiperOrigin-RevId: 326060767 Change-Id: I92b7d9500c05a8dad011b5a19df0a654346edeca --- .../mark_ops_for_outside_compilation.mlir | 14 ++++++++++++++ .../mark_ops_for_outside_compilation.cc | 19 ++++++++++++++++--- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir index 0bb37e4c3cd..2d86889e35b 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/mark_ops_for_outside_compilation.mlir @@ -32,6 +32,20 @@ func @tf2xla_fallback_op() -> tensor { return %0 : tensor } +// CHECK-LABEL: func @ignore_embedding_ops +func @ignore_embedding_ops() -> () { + "tf_device.cluster"() ( { + // CHECK: "tf.RecvTPUEmbeddingActivations" + // CHECK-NOT: _xla_outside_compilation + // CHECK: "tf.SendTPUEmbeddingGradients" + // CHECK-NOT: _xla_outside_compilation + %2:2 = "tf.RecvTPUEmbeddingActivations"() {_tpu_embedding_layer = "call1", config = "\0A\0B\0C\0D"} : () -> (tensor<2x2xf32>, tensor<4x4xf32>) + "tf.SendTPUEmbeddingGradients"(%2#0, %2#1) {_tpu_embedding_layer = "call1", config = "\0A\0B\0C\0D", operand_segment_sizes = dense<[2, 0]> : vector<2xi32>} : (tensor<2x2xf32>, tensor<4x4xf32>) -> () + tf_device.return + }) {num_cores_per_replica = 1, topology = "", device_assignment = []} : () -> () + return +} + // CHECK-LABEL: func @op_string_result func @op_string_result() -> tensor { %0 = "tf_device.cluster"() ( { diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc index cd34525f2af..ece26dca416 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/mark_ops_for_outside_compilation.cc @@ -48,9 +48,21 @@ struct MarkOpsForOutsideCompilation // added. void AddSupportedControlFlowOps(MLIRContext* context, llvm::DenseSet* supported_ops) { - supported_ops->insert(OperationName("tf.IfRegion", context)); - supported_ops->insert(OperationName("tf.WhileRegion", context)); - supported_ops->insert(OperationName("tf.Yield", context)); + supported_ops->insert( + OperationName(TF::IfRegionOp::getOperationName(), context)); + supported_ops->insert( + OperationName(TF::WhileRegionOp::getOperationName(), context)); + supported_ops->insert( + OperationName(TF::YieldOp::getOperationName(), context)); +} + +// These embedding ops are rewritten when running TPUCompileOp. +void AddRewrittenEmbeddingOps(MLIRContext* context, + llvm::DenseSet* supported_ops) { + supported_ops->insert(OperationName( + TF::RecvTPUEmbeddingActivationsOp::getOperationName(), context)); + supported_ops->insert(OperationName( + TF::SendTPUEmbeddingGradientsOp::getOperationName(), context)); } bool HasStringOperand(Operation& op) { @@ -137,6 +149,7 @@ void MarkOpsForOutsideCompilation::runOnOperation() { supported_ops.insert(*pattern->getRootKind()); } AddSupportedControlFlowOps(module.getContext(), &supported_ops); + AddRewrittenEmbeddingOps(module.getContext(), &supported_ops); auto result = module.walk([&](tf_device::ClusterOp cluster) { if (failed( From 2940b356dfaf06ffb421968ba9cb163f0edde013 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 11 Aug 2020 11:50:51 -0700 Subject: [PATCH 2519/2522] Fix typos in the doc. PiperOrigin-RevId: 326068303 Change-Id: I7ffc0560b3ba23055e6e5fe29166747748fbc1aa --- tensorflow/python/keras/metrics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py index 6b53a02ce05..b3f391c7897 100644 --- a/tensorflow/python/keras/metrics.py +++ b/tensorflow/python/keras/metrics.py @@ -644,7 +644,7 @@ class MeanMetricWrapper(Mean): @keras_export('keras.metrics.Accuracy') class Accuracy(MeanMetricWrapper): - """Calculates how often predictions equals labels. + """Calculates how often predictions equal labels. This metric creates two local variables, `total` and `count` that are used to compute the frequency with which `y_pred` matches `y_true`. This frequency is @@ -686,7 +686,7 @@ class Accuracy(MeanMetricWrapper): @keras_export('keras.metrics.BinaryAccuracy') class BinaryAccuracy(MeanMetricWrapper): - """Calculates how often predictions matches binary labels. + """Calculates how often predictions match binary labels. This metric creates two local variables, `total` and `count` that are used to compute the frequency with which `y_pred` matches `y_true`. This frequency is From 9bc641d16c132adb74b955fa09067630bcc93dae Mon Sep 17 00:00:00 2001 From: Chenkai Kuang Date: Tue, 11 Aug 2020 12:00:53 -0700 Subject: [PATCH 2520/2522] Fix an error when a concrete function is passed to client.schedule. PiperOrigin-RevId: 326070549 Change-Id: I620e3feb329a436121377e144d89f9d88e6bf442 --- tensorflow/python/distribute/client/client.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/python/distribute/client/client.py b/tensorflow/python/distribute/client/client.py index 7bef5e2385c..37f000d4a87 100644 --- a/tensorflow/python/distribute/client/client.py +++ b/tensorflow/python/distribute/client/client.py @@ -293,8 +293,7 @@ class Closure(object): self._output_remote_values = nest.map_structure( lambda x: RemoteValue(self, x), concrete_function.structured_outputs) elif isinstance(function, tf_function.ConcreteFunction): - self._function = cancellation_mgr.get_cancelable_function( - concrete_function) + self._function = cancellation_mgr.get_cancelable_function(function) self._output_remote_values = nest.map_structure( lambda x: RemoteValue(self, x), function.structured_outputs) else: From 0572b205b847917062091e4377110f5431c60d2b Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 11 Aug 2020 12:08:22 -0700 Subject: [PATCH 2521/2522] Roll back XLA/GPU LHLO sort emitter again It breaks an internal msan enabled test. PiperOrigin-RevId: 326072372 Change-Id: I245525cefa4da88097725662c75ccb213a328f19 --- tensorflow/compiler/mlir/xla/hlo_utils.cc | 3 - .../non_identity_layouts.hlotxt | 2 +- .../xla/transforms/mhlo_to_lhlo_with_xla.cc | 11 +- .../xla/transforms/mhlo_to_lhlo_with_xla.h | 3 +- tensorflow/compiler/xla/service/gpu/BUILD | 10 - .../compiler/xla/service/gpu/gpu_compiler.cc | 24 +- .../xla/service/gpu/hlo_to_ir_bindings.cc | 20 +- .../xla/service/gpu/hlo_to_ir_bindings.h | 4 - .../xla/service/gpu/ir_emitter_context.h | 7 +- .../xla/service/gpu/ir_emitter_unnested.cc | 416 ++++----------- .../xla/service/gpu/ir_emitter_unnested.h | 82 +-- .../compiler/xla/service/gpu/tests/BUILD | 29 - .../xla/service/gpu/tests/sorting.hlo | 504 +++++++++--------- .../xla/service/gpu/tests/sorting_test.cc | 71 --- .../compiler/xla/service/llvm_ir/llvm_util.cc | 7 +- .../compiler/xla/service/llvm_ir/llvm_util.h | 2 +- 16 files changed, 403 insertions(+), 792 deletions(-) delete mode 100644 tensorflow/compiler/xla/service/gpu/tests/sorting_test.cc diff --git a/tensorflow/compiler/mlir/xla/hlo_utils.cc b/tensorflow/compiler/mlir/xla/hlo_utils.cc index 18b4265d786..cf78c81908d 100644 --- a/tensorflow/compiler/mlir/xla/hlo_utils.cc +++ b/tensorflow/compiler/mlir/xla/hlo_utils.cc @@ -83,9 +83,6 @@ StatusOr> GetPermutationIfAvailable( strides[dim] = accumulated_stride; accumulated_stride *= shape.dimensions(dim); } - if (accumulated_stride == 0) { - return llvm::SmallVector{}; - } return llvm::SmallVector{ makeStridedLinearLayoutMap(strides, /*offset=*/0, builder.getContext())}; } diff --git a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/non_identity_layouts.hlotxt b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/non_identity_layouts.hlotxt index a83e36cff64..3630d2d45e4 100644 --- a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/non_identity_layouts.hlotxt +++ b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/non_identity_layouts.hlotxt @@ -8,6 +8,6 @@ HloModule TestModule ENTRY TestComputation { x = f32[3, 2]{1,0} parameter(0) - // CHECK: "lmhlo.copy"(%{{.*}}, %{{.*}}) {name = "copy.1"} : (memref<3x2xf32>, memref<3x2xf32, #[[MAP]]>) -> () + // CHECK: "lmhlo.copy"(%{{.*}}, %{{.*}}) : (memref<3x2xf32>, memref<3x2xf32, #[[MAP]]>) -> () ROOT x.copy = f32[3, 2]{0,1} copy(x) } diff --git a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc index 6ce91599fb1..832bad2dcc8 100644 --- a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc +++ b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc @@ -34,6 +34,7 @@ limitations under the License. #include "mlir/Pass/Pass.h" // from @llvm-project #include "mlir/Pass/PassOptions.h" // from @llvm-project #include "mlir/Translation.h" // from @llvm-project +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" #include "tensorflow/compiler/mlir/xla/hlo_function_importer.h" #include "tensorflow/compiler/mlir/xla/hlo_utils.h" #include "tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h" @@ -181,10 +182,7 @@ template StatusOr LhloDialectEmitter::CreateOpWithoutAttrs( HloInstruction* instr) { Location loc = getLocation(instr); - std::pair attrs[] = { - {Identifier::get("name", builder_.getContext()), - builder_.getStringAttr(instr->name())}, - }; + ArrayRef> attrs; ArrayRef rets{}; llvm::SmallVector operands; @@ -254,14 +252,15 @@ Status LhloDialectEmitter::DefaultAction(HloInstruction* instr) { return Status::OK(); } -StatusOr LhloDialectEmitter::EmitSortOp(HloInstruction* instr) { +StatusOr LhloDialectEmitter::EmitSortOp( + HloInstruction* instr) { TF_ASSIGN_OR_RETURN(auto sort, CreateOpWithoutAttrs(instr)); auto* sort_instr = ::xla::Cast<::xla::HloSortInstruction>(instr); sort.dimensionAttr(builder_.getI64IntegerAttr(sort_instr->sort_dimension())); sort.is_stableAttr(builder_.getBoolAttr(sort_instr->is_stable())); TF_RETURN_IF_ERROR(::xla::HloFunctionImporter::ImportAsRegion( *sort_instr->called_computations()[0], &sort.comparator(), &builder_)); - return sort; + return sort.getOperation(); } Status LhloDialectEmitter::HandleSort(HloInstruction* instr) { diff --git a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h index 4000fa01970..bdc977616b1 100644 --- a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h +++ b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h @@ -19,7 +19,6 @@ limitations under the License. #include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Module.h" // from @llvm-project #include "mlir/IR/StandardTypes.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/hlo_module.h" @@ -42,7 +41,7 @@ class LhloDialectEmitter : public ::xla::DfsHloVisitorWithDefault { builder_(module.getContext()), i8_type_(builder_.getIntegerType(8)) {} - ::xla::StatusOr EmitSortOp(::xla::HloInstruction* instr); + ::xla::StatusOr EmitSortOp(::xla::HloInstruction* instr); private: template diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index a19f9965fc7..074fbd92b27 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -254,11 +254,6 @@ cc_library( ":target_util", ":thunk", ":thunk_emitter", - "//tensorflow/compiler/mlir/hlo:lhlo", - "//tensorflow/compiler/mlir/xla:hlo_utils", - "//tensorflow/compiler/mlir/xla:mhlo_to_lhlo_with_xla", - "//tensorflow/compiler/mlir/xla:mlir_hlo_to_hlo", - "//tensorflow/compiler/mlir/xla:type_to_shape", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", @@ -296,8 +291,6 @@ cc_library( "@com_google_absl//absl/types:span", "@llvm-project//llvm:Core", "@llvm-project//llvm:Support", - "@llvm-project//mlir:IR", - "@llvm-project//mlir:StandardOps", ], ) @@ -1166,7 +1159,6 @@ cc_library( ":target_constants", ":tree_reduction_rewriter", ":variadic_op_splitter", - "//tensorflow/compiler/mlir/xla:mhlo_to_lhlo_with_xla", "//tensorflow/compiler/xla:protobuf_util", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", @@ -1225,8 +1217,6 @@ cc_library( "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", "@llvm-project//llvm:Core", - "@llvm-project//mlir:AllPassesAndDialectsNoRegistration", - "@llvm-project//mlir:IR", ], ) diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index b796737e601..f5bf7476059 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -29,8 +29,6 @@ limitations under the License. #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" -#include "mlir/IR/Module.h" // from @llvm-project -#include "mlir/InitAllDialects.h" // from @llvm-project #include "tensorflow/compiler/xla/protobuf_util.h" #include "tensorflow/compiler/xla/service/algebraic_simplifier.h" #include "tensorflow/compiler/xla/service/all_reduce_combiner.h" @@ -518,22 +516,15 @@ static Status CompileModuleToLlvmIrImpl( DumpHloModuleIfEnabled(*hlo_module, **buffer_assignment, "after_optimizations"); - mlir::registerAllDialects(); - mlir::MLIRContext mlir_context; - IrEmitterContext ir_emitter_context( hlo_module, buffer_assignment->get(), platform_name, gpu_device_info, - cuda_compute_capability, profile_index_map, &mlir_context, - llvm_module->get()); + cuda_compute_capability, profile_index_map, llvm_module->get()); HloComputation* entry_computation = hlo_module->entry_computation(); + IrEmitterUnnested ir_emitter(hlo_module->config(), entry_computation, + &ir_emitter_context); - TF_ASSIGN_OR_RETURN( - auto ir_emitter, - IrEmitterUnnested::Create(hlo_module->config(), entry_computation, - &ir_emitter_context)); - - TF_RETURN_IF_ERROR(ir_emitter->EmitConstantGlobals()); + TF_RETURN_IF_ERROR(ir_emitter.EmitConstantGlobals()); { XLA_SCOPED_LOGGING_TIMER("GpuCompiler::RunBackend - IR emission"); @@ -542,10 +533,9 @@ static Status CompileModuleToLlvmIrImpl( ThunkSequence thunk_sequence; absl::Span order = hlo_schedule->ThunkLaunchOrder(); for (HloInstruction* instruction : order) { - TF_RETURN_IF_ERROR(instruction->Visit(ir_emitter.get())); - TF_RETURN_IF_ERROR(ir_emitter->Postprocess(instruction)); - std::unique_ptr thunks = - ir_emitter->ConsumeThunkSequence(); + TF_RETURN_IF_ERROR(instruction->Visit(&ir_emitter)); + TF_RETURN_IF_ERROR(ir_emitter.Postprocess(instruction)); + std::unique_ptr thunks = ir_emitter.ConsumeThunkSequence(); // The invariants between each input HloInstruction* and output Thunk* are // not all explicitly checked, but at least we can document them here: diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc index 332db83b6ad..5d38d1b727c 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc +++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc @@ -117,11 +117,11 @@ static bool HasMeaningfulName(llvm::Value* value) { return false; } -llvm::Value* CastToTypedValue(const Shape& shape, llvm::Value* ir_value, - llvm::IRBuilder<>* b) { - llvm::Type* pointee_type = - llvm_ir::ShapeToIrType(shape, b->GetInsertBlock()->getModule()); - +llvm::Value* HloToIrBindings::GetTypedIrValue(const HloInstruction& hlo, + ShapeIndexView shape_index, + llvm::Value* ir_value) { + llvm::Type* pointee_type = llvm_ir::ShapeToIrType( + ShapeUtil::GetSubshape(hlo.shape(), shape_index), module_); llvm::Type* dest_type = pointee_type->getPointerTo(); llvm::Value* typed_ir_value; @@ -129,17 +129,9 @@ llvm::Value* CastToTypedValue(const Shape& shape, llvm::Value* ir_value, typed_ir_value = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( llvm::cast(ir_value), dest_type); } else { - typed_ir_value = b->CreatePointerBitCastOrAddrSpaceCast( + typed_ir_value = b_->CreatePointerBitCastOrAddrSpaceCast( ir_value, pointee_type->getPointerTo()); } - return typed_ir_value; -} - -llvm::Value* HloToIrBindings::GetTypedIrValue(const HloInstruction& hlo, - ShapeIndexView shape_index, - llvm::Value* ir_value) { - auto typed_ir_value = CastToTypedValue( - ShapeUtil::GetSubshape(hlo.shape(), shape_index), ir_value, b_); if (!HasMeaningfulName(ir_value)) { ir_value->setName(llvm_ir::IrName(&hlo, "raw")); } diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h index 3813ec6c949..5eef6727801 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h +++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h @@ -116,10 +116,6 @@ class HloToIrBindings { llvm::Value* temp_buffer_base_ = nullptr; }; -// Converts `ir_value` with type i8* to a typed LLVM Value* based on `shape`. -llvm::Value* CastToTypedValue(const Shape& shape, llvm::Value* ir_value, - llvm::IRBuilder<>* b); - } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_context.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_context.h index 7d5a8d032e6..9c43f80dc60 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_context.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_context.h @@ -17,7 +17,6 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_IR_EMITTER_CONTEXT_H_ #include "llvm/IR/Module.h" -#include "mlir/IR/MLIRContext.h" // from @llvm-project #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/gpu/launch_dimensions.h" #include "tensorflow/compiler/xla/service/hlo_execution_profile.h" @@ -35,15 +34,13 @@ class IrEmitterContext { const HloModule* hlo_module, const BufferAssignment* buffer_assignment, std::string platform_name, GpuDeviceInfo gpu_device_info, absl::optional cuda_compute_capability, - const HloProfileIndexMap* profile_index_map, - mlir::MLIRContext* mlir_context, llvm::Module* llvm_module) + const HloProfileIndexMap* profile_index_map, llvm::Module* llvm_module) : hlo_module_(hlo_module), buffer_assignment_(buffer_assignment), platform_name_(std::move(platform_name)), gpu_device_info_(gpu_device_info), cuda_compute_capability_(cuda_compute_capability), profile_index_map_(profile_index_map), - mlir_context_(mlir_context), llvm_module_(llvm_module) {} // Disallow copy and assign. IrEmitterContext(const IrEmitterContext&) = delete; @@ -60,7 +57,6 @@ class IrEmitterContext { return cuda_compute_capability_; } const HloProfileIndexMap* profile_index_map() { return profile_index_map_; } - mlir::MLIRContext* mlir_context() { return mlir_context_; } llvm::Module* llvm_module() { return llvm_module_; } NameUniquer* name_uniquer() { return &name_uniquer_; } @@ -71,7 +67,6 @@ class IrEmitterContext { GpuDeviceInfo gpu_device_info_; absl::optional cuda_compute_capability_; const HloProfileIndexMap* profile_index_map_; - mlir::MLIRContext* mlir_context_; llvm::Module* llvm_module_; NameUniquer name_uniquer_; }; diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index f88c70b1a33..61b78b6004d 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -37,13 +37,6 @@ limitations under the License. #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/Builders.h" // from @llvm-project -#include "mlir/IR/Function.h" // from @llvm-project -#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h" -#include "tensorflow/compiler/mlir/xla/hlo_utils.h" -#include "tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.h" -#include "tensorflow/compiler/mlir/xla/type_to_shape.h" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/service/buffer_assignment.h" @@ -151,86 +144,13 @@ void UpdateLaunchDimensions(const LaunchDimensions& launch_dims, Thunk* thunk, llvm::ConstantAsMetadata::get(threads_per_block_ir_value)})); } -const BufferAllocation* GetAllocation( - mlir::BlockArgument func_arg, const BufferAssignment& buffer_assignment) { - auto func_op = - mlir::cast(func_arg.getParentRegion()->getParentOp()); - int64 allocation_index = func_op - .getArgAttrOfType( - func_arg.getArgNumber(), "lmhlo.alloc") - .getValue() - .getSExtValue(); - return &buffer_assignment.GetAllocation(allocation_index); -} - -StatusOr GetAllocationSliceForMlir( - mlir::Value v, const BufferAssignment& buffer_assignment) { - int64 size = v.getType().cast().getSizeInBits() / 8; - - if (auto arg = v.dyn_cast()) { - return BufferAllocation::Slice(GetAllocation(arg, buffer_assignment), 0, - size); - } - - // We match two patterns here: - // * v = ViewOp(arg); - // * v = StaticMemRefCastOp(ViewOp(arg)); - if (mlir::Operation* op = v.getDefiningOp()) { - if (auto cast = mlir::dyn_cast(op)) { - mlir::Value source = cast.getViewSource(); - op = source.getDefiningOp(); - if (!op) { - return Unimplemented("StaticMemRefCastOp has to wrap an op"); - } - } - if (auto view = mlir::dyn_cast(op)) { - return BufferAllocation::Slice( - GetAllocation(view.source().cast(), - buffer_assignment), - mlir::cast(view.byte_shift().getDefiningOp()) - .value() - .cast() - .getValue() - .getSExtValue(), - size); - } - return Unimplemented("StaticMemRefCastOp has to wrap a ViewOp"); - } - - return Unimplemented( - "Operand has to be in the form of ViewOp(arg) or " - "StaticMemRefCastOp(ViewOp(arg))"); -} - -absl::string_view GetHloName(mlir::Operation* op) { - if (auto attr = op->getAttrOfType("name")) { - auto ref = attr.getValue(); - return absl::string_view(ref.data(), ref.size()); - } - return ""; -} - } // namespace IrEmitterUnnested::IrEmitterUnnested(const HloModuleConfig& hlo_module_config, const HloComputation* hlo_computation, IrEmitterContext* ir_emitter_context) : IrEmitter(hlo_module_config, ir_emitter_context, /*is_nested=*/false), - hlo_computation_(hlo_computation), - mlir_scratch_module_(mlir::ModuleOp::create( - mlir::Builder(ir_emitter_context->mlir_context()).getUnknownLoc())), - lhlo_scratch_emitter_(ir_emitter_context_->buffer_assignment(), - *hlo_computation, mlir_scratch_module_.get()) {} - -StatusOr> IrEmitterUnnested::Create( - const HloModuleConfig& hlo_module_config, - const HloComputation* hlo_computation, - IrEmitterContext* ir_emitter_context) { - auto emitter = std::unique_ptr(new IrEmitterUnnested( - hlo_module_config, hlo_computation, ir_emitter_context)); - TF_RETURN_IF_ERROR(emitter->lhlo_scratch_emitter_.Initialize()); - return std::move(emitter); -} + hlo_computation_(hlo_computation) {} Status IrEmitterUnnested::Postprocess(HloInstruction* hlo) { bindings_.UnbindAllLocalIrValues(); @@ -238,11 +158,12 @@ Status IrEmitterUnnested::Postprocess(HloInstruction* hlo) { } llvm::Function* IrEmitterUnnested::BuildKernelPrototype( - absl::string_view name, absl::Span args) { + const HloInstruction& inst, + absl::Span args) { // Compute the kernel name. The opcode string may contain "-" which cannot be // in a PTX function name, so sanitize the name before uniquifying it. string kernel_name = ir_emitter_context_->name_uniquer()->GetUniqueName( - llvm_ir::SanitizeFunctionName(std::string(name))); + llvm_ir::SanitizeFunctionName(inst.name())); // Create the kernel and add it to the module. llvm::Module* module = ir_emitter_context_->llvm_module(); @@ -438,8 +359,7 @@ Status IrEmitterUnnested::HandleDot(HloInstruction* dot) { } Status IrEmitterUnnested::HandleConditional(HloInstruction* conditional) { - TF_ASSIGN_OR_RETURN(auto thunk, BuildConditionalThunk(conditional)); - AddThunkToThunkSequence(std::move(thunk)); + AddThunkToThunkSequence(BuildConditionalThunk(conditional)); return Status::OK(); } @@ -1118,13 +1038,10 @@ Status IrEmitterUnnested::HandleWhile(HloInstruction* xla_while) { // Build ForThunk for conformant while loops, otherwise build WhileThunk. auto config = xla_while->backend_config(); if (config.ok() && config.ValueOrDie().has_known_trip_count()) { - TF_ASSIGN_OR_RETURN( - auto thunk, + AddThunkToThunkSequence( BuildForThunk(xla_while, config.ValueOrDie().known_trip_count().n())); - AddThunkToThunkSequence(std::move(thunk)); } else { - TF_ASSIGN_OR_RETURN(auto thunk, BuildWhileThunk(xla_while)); - AddThunkToThunkSequence(std::move(thunk)); + AddThunkToThunkSequence(BuildWhileThunk(xla_while)); } return Status::OK(); } @@ -1347,109 +1264,39 @@ Status IrEmitterUnnested::HandleSelect(HloInstruction* select) { return IrEmitter::HandleSelect(select); } -StatusOr -IrEmitterUnnested::GetOrCreateSubComputationFromRegion(mlir::Region* region) { - std::unique_ptr& module = scratch_nested_computations_[region]; - if (module == nullptr) { - xla::XlaComputation xla_computation; - TF_RETURN_IF_ERROR(ConvertRegionToComputation(region, &xla_computation)); - TF_ASSIGN_OR_RETURN(auto program_shape, xla_computation.GetProgramShape()); - TF_ASSIGN_OR_RETURN( - module, HloModule::CreateFromProto(xla_computation.proto(), - HloModuleConfig(program_shape))); - } - return module->entry_computation(); -} - Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { - MlirEmitterInput result; - - TF_ASSIGN_OR_RETURN(auto sort_op, lhlo_scratch_emitter_.EmitSortOp(sort)); - result.op = sort_op; - result.name = GetHloName(sort_op); - // The name in sort op has no semantics, and it's for debug only. If the name - // doesn't exist, we should use a namer (e.g. count-based). - // TODO(timshen): use a namer instead of relying on the HloInstruction names. - if (result.name.empty()) { - result.name = sort->name(); - } - const auto& buffer_assignment = ir_emitter_context_->buffer_assignment(); - auto& slice = result.extra_slice; - TF_ASSIGN_OR_RETURN(slice.buffer_slice, - buffer_assignment.GetUniqueSlice(sort, {})); - slice.written = true; - slice.shape = sort->shape(); - - result.thunk_info = GetThunkInfo(sort); - - return EmitMlirSort(result); -} - -Status IrEmitterUnnested::EmitMlirSort(MlirEmitterInput input) { - const auto& buffer_assignment = ir_emitter_context_->buffer_assignment(); - auto sort_op = mlir::cast(input.op); - - int operand_count = sort_op.operands().size(); - std::vector operand_shapes(operand_count); - std::vector slices; - std::vector output_shapes(sort_op.output().size()); - - for (int i = 0; i < operand_count; i++) { - operand_shapes[i] = - TypeToShape(sort_op.operands()[i].getType().cast()); - } - - // Craft n + 1 slices, where the first n are output parameters, and the last - // is the on-device tuple storage. We don't need n operands because sorting - // kernels are always in-place. - for (int i = 0; i < operand_count; i++) { - output_shapes[i] = - TypeToShape(sort_op.output()[i].getType().cast()); - MlirBufferSlice slice; - TF_ASSIGN_OR_RETURN( - slice.buffer_slice, - GetAllocationSliceForMlir(sort_op.output()[i], buffer_assignment)); - slice.written = true; - slice.shape = operand_shapes[i]; - slices.push_back(slice); - } - slices.push_back(input.extra_slice); - std::vector> thunks; - - Shape keys_shape = operand_shapes[0]; - int64 dimension_to_sort = sort_op.dimension().getSExtValue(); - for (int64 i = 0; i < operand_count; ++i) { + Shape keys_shape = sort->operand(0)->shape(); + int64 dimension_to_sort = sort->dimensions(0); + for (int64 i = 0; i < sort->operand_count(); ++i) { + ShapeIndex shape_index = + sort->operand_count() > 1 ? ShapeIndex({i}) : ShapeIndex({}); // We assume that the layout of all involved operands and outputs is the // same. - TF_RET_CHECK( - LayoutUtil::LayoutsInShapesEqual(keys_shape, operand_shapes[i])); - TF_RET_CHECK( - LayoutUtil::LayoutsInShapesEqual(keys_shape, output_shapes[i])); + TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual(keys_shape, + sort->operand(i)->shape())); + TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual( + keys_shape, ShapeUtil::GetSubshape(sort->shape(), shape_index))); // If possible, we share buffers. If that is not possible, we need to copy // the values, because the emitter does the sorting in-place. - TF_ASSIGN_OR_RETURN( - auto destination_buffer, - GetAllocationSliceForMlir(sort_op.output()[i], buffer_assignment)); - TF_ASSIGN_OR_RETURN( - auto source_address, - GetAllocationSliceForMlir(sort_op.operands()[i], buffer_assignment)); + auto destination_buffer = GetAllocationSlice(*sort, shape_index); + auto source_address = GetAllocationSlice(*sort->operand(i)); if (destination_buffer != source_address) { // TODO(b/26783907): Figure out why we never seem to share buffers for // key/value sort. - VLOG(2) << input.name << " requires initial D2D copy for operand " << i; + VLOG(2) << sort->name() << " requires initial D2D copy for operand " << i; thunks.push_back(absl::make_unique( Thunk::ThunkInfo(), /*source_address=*/source_address, /*destination_buffer=*/destination_buffer, - /*mem_size=*/ShapeUtil::ByteSizeOf(operand_shapes[i]))); + /*mem_size=*/ShapeUtil::ByteSizeOf(sort->operand(i)->shape()))); } } uint64 dimension_to_sort_bound = keys_shape.dimensions(dimension_to_sort); int64 num_stages = tensorflow::Log2Ceiling(dimension_to_sort_bound); - VLOG(2) << input.name << " requires " << num_stages << " stages."; + VLOG(2) << sort->name() << " requires " << num_stages << " stages."; CHECK_GE(1ULL << num_stages, dimension_to_sort_bound); CHECK_LT(1ULL << (num_stages - 1), dimension_to_sort_bound); @@ -1513,10 +1360,10 @@ Status IrEmitterUnnested::EmitMlirSort(MlirEmitterInput input) { // we have not enough threads, or not enough shared memory. Also it does not // give a speedup if the tile size is < 128. int64 total_shared_memory_needed = 0; - for (int64 i = 0; i < operand_count; ++i) { + for (int64 i = 0; i < sort->operand_count(); ++i) { total_shared_memory_needed += - kTileSize * - ShapeUtil::ByteSizeOfPrimitiveType(operand_shapes[i].element_type()); + kTileSize * ShapeUtil::ByteSizeOfPrimitiveType( + sort->operand(i)->shape().element_type()); } bool no_tiling = kTileSize < 128 || @@ -1529,7 +1376,7 @@ Status IrEmitterUnnested::EmitMlirSort(MlirEmitterInput input) { "kTileSize=%d < 128, " "kThreadsPerBlock=%d > threads_per_block_limit=%d, " "total_shared_memory_needed=%d > shared_memory_per_block=%d", - input.name, (no_tiling ? "won't" : "will"), kTileSize, kThreadsPerBlock, + sort->name(), (no_tiling ? "won't" : "will"), kTileSize, kThreadsPerBlock, ir_emitter_context_->gpu_device_info().threads_per_block_limit, total_shared_memory_needed, ir_emitter_context_->gpu_device_info().shared_memory_per_block); @@ -1537,38 +1384,37 @@ Status IrEmitterUnnested::EmitMlirSort(MlirEmitterInput input) { uint64 num_blocks = CeilOfRatio(num_iterations, kThreadsPerBlock); LaunchDimensions tiled_launch_dimensions(num_blocks, kThreadsPerBlock); VLOG(2) << absl::StreamFormat("%s launch dims: %d blocks, %d threads/block", - input.name, num_blocks, kThreadsPerBlock); + sort->name(), num_blocks, kThreadsPerBlock); - std::vector ir_arrays; auto emit_kernel = [&](absl::Span xor_masks) { VLOG(2) << absl::StreamFormat( - "%s uses kernel for xor masks [%s]", input.name, + "%s uses kernel for xor masks [%s]", sort->name(), absl::StrJoin(xor_masks, ", ", [](std::string* out, int64 xor_mask) { absl::StrAppendFormat(out, "0x%x", xor_mask); })); - thunks.push_back(BuildKernelThunkForMlir(input.name, Thunk::ThunkInfo(), - slices, &ir_arrays)); + thunks.push_back( + BuildKernelThunk(sort, /*implements_whole_instruction=*/false)); LaunchDimensions launch_dimensions = xor_masks.size() > 1 ? tiled_launch_dimensions : standard_launch_dimensions; UpdateLaunchDimensions(launch_dimensions, thunks.back().get(), ir_emitter_context_->llvm_module()); std::vector values_arrays; - values_arrays.reserve(operand_count); - for (int64 i = 0; i < operand_count; ++i) { - values_arrays.push_back(ir_arrays[i]); + values_arrays.reserve(sort->operand_count()); + for (int64 i = 0; i < sort->operand_count(); ++i) { + ShapeIndex shape_index = + sort->operand_count() > 1 ? ShapeIndex({i}) : ShapeIndex({}); + values_arrays.push_back(GetIrArray(*sort, *sort, shape_index)); } - TF_ASSIGN_OR_RETURN( - const HloComputation* comparator, - GetOrCreateSubComputationFromRegion(&sort_op.comparator())); return llvm_ir::EmitSortInPlace( - dimension_to_sort, values_arrays, IrName(input.name), xor_masks, &b_, + dimension_to_sort, values_arrays, IrName(sort), xor_masks, &b_, launch_dimensions, xor_masks.size() > 1 ? num_iterations_in_sort_dim : standard_num_iterations_in_sort_dim, kTileSize, [&](absl::Span operands, llvm::Value* output) { - return EmitCallToNestedComputation(*comparator, operands, output); + return EmitCallToNestedComputation(*sort->to_apply(), operands, + output); }); }; std::vector xor_masks; @@ -1595,18 +1441,17 @@ Status IrEmitterUnnested::EmitMlirSort(MlirEmitterInput input) { TF_RETURN_IF_ERROR(emit_kernel(xor_masks)); } VLOG(2) << absl::StreamFormat( - "%s requires %d thunks (including any D2D copies)", input.name, + "%s requires %d thunks (including any D2D copies)", sort->name(), thunks.size()); - AddThunkToThunkSequence( - absl::make_unique(input.thunk_info, std::move(thunks))); - if (operand_count > 1) { + AddThunkToThunkSequence(absl::make_unique( + GetThunkInfo(sort), std::move(thunks))); + if (sort->operand_count() > 1) { // Emit the tuple as part of the last stage of sorting. // We are currently in the block sorted.in_bounds.after. b_.SetInsertPoint(b_.GetInsertBlock()->getTerminator()); - llvm_ir::EmitTuple( - ir_arrays[operand_count], - absl::MakeSpan(ir_arrays).subspan(0, ir_arrays.size() - 1), &b_); + llvm_ir::EmitTuple(GetIrArray(*sort, *sort), + ConstructIrArrayForOutputs(*sort), &b_); } return Status::OK(); } @@ -1744,6 +1589,24 @@ Status IrEmitterUnnested::HandleAfterAll(HloInstruction* after_all) { return Status::OK(); } +// Describes how to access a particular subshape for an HLO. For instance if +// `.hlo_index` is {1} and `.gte_index` is {3, 4} then buffer for `.instr` at +// ShapeIndex {1} (i.e. the buffer for the second tuple element of hlo) is found +// at `.buffer_slice`[3][4]. That is, `.slice` is a void***, which we +// dereference twice -- first at index 3, and then at index 4 -- to get the +// address of our buffer. +struct HloBufferSlice { + const HloInstruction* instr; + ShapeIndex hlo_index; + + // The root buffer to look at. + BufferAllocation::Slice buffer_slice; + + // Describes how to dereference starting at that buffer to get to the buffer + // in question. + ShapeIndex gte_index; +}; + // Figures out how to access the buffers for all subshapes of hlo's operands and // for hlo itself (i.e. all the buffers produced by HLO). // @@ -1852,22 +1715,22 @@ static std::vector GetHloBufferSlices( return result; } -std::unique_ptr -IrEmitterUnnested::BuildKernelThunkFromBufferSlices( - absl::string_view name, Thunk::ThunkInfo thunk_info, - absl::Span slices, - std::function - bind_slice_to_ir_value) { - const auto& buffer_assn = ir_emitter_context_->buffer_assignment(); +std::unique_ptr IrEmitterUnnested::BuildKernelThunk( + const HloInstruction* inst, bool implements_whole_instruction) { + const BufferAssignment& buffer_assn = + ir_emitter_context_->buffer_assignment(); + + std::vector hlo_slices = + GetHloBufferSlices(inst, buffer_assn); // Figure out which buffer allocations need to be passed as arguments to our - // kernel. This is simply all of the allocations referenced in slices, + // kernel. This is simply all of the allocations referenced in hlo_slices, // plus the XLA temp buffer (if we have it). We always include the temp // buffer because even if the kernel itself doesn't use it, a nested // subcomputation within the kernel (e.g. a kMap's computation) might. std::unordered_set buffers_needed; - for (auto* slice : slices) { - buffers_needed.insert(slice->buffer_slice.allocation()); + for (const auto& hlo_buffer_slice : hlo_slices) { + buffers_needed.insert(hlo_buffer_slice.buffer_slice.allocation()); } absl::optional temp_buffer; for (const BufferAllocation& alloc : buffer_assn.Allocations()) { @@ -1896,7 +1759,7 @@ IrEmitterUnnested::BuildKernelThunkFromBufferSlices( return a->index() < b->index(); }); - llvm::Function* kernel = BuildKernelPrototype(name, non_constant_buffers); + llvm::Function* kernel = BuildKernelPrototype(*inst, non_constant_buffers); // Build a map from a BufferAllocation to the corresponding argument in our // kernel. @@ -1930,19 +1793,24 @@ IrEmitterUnnested::BuildKernelThunkFromBufferSlices( // For each buffer our kernel might want to touch, bind it to a value derived // from our kernel args. - for (auto* slice : slices) { - const BufferAllocation::Slice& buffer_slice = slice->buffer_slice; - const ShapeIndex& gte_index = slice->gte_index; + for (const auto& hlo_buffer_slice : hlo_slices) { + const HloInstruction* instr = hlo_buffer_slice.instr; + const ShapeIndex& index = hlo_buffer_slice.hlo_index; + const BufferAllocation::Slice& slice = hlo_buffer_slice.buffer_slice; + const ShapeIndex& gte_index = hlo_buffer_slice.gte_index; + + VLOG(3) << "Buffer for " << instr->ToString() << " at " << index.ToString() + << " is found in slice " << slice.ToString() << " at GTE index " + << gte_index.ToString(); llvm::Value* loc; - if (buffer_slice.allocation()->is_constant()) { + if (slice.allocation()->is_constant()) { loc = ir_emitter_context_->llvm_module()->getGlobalVariable( - llvm_ir::ConstantBufferAllocationToGlobalName( - *buffer_slice.allocation())); + llvm_ir::ConstantBufferAllocationToGlobalName(*slice.allocation())); CHECK_NE(loc, nullptr); } else { - loc = InBoundsGEP(kernel_args.at(buffer_slice.allocation()), - {b_.getInt64(buffer_slice.offset())}); + loc = InBoundsGEP(kernel_args.at(slice.allocation()), + {b_.getInt64(slice.offset())}); } // If gte_index is nonempty, we have to dereference `loc` to get to the @@ -1954,7 +1822,7 @@ IrEmitterUnnested::BuildKernelThunkFromBufferSlices( loc = Load(InBoundsGEP(loc, {b_.getInt64(idx)})); } - bind_slice_to_ir_value(slice, loc); + bindings_.BindHloToIrValue(*instr, loc, index); } // Bind the temp buffer so that nested subcomputations can find it if they @@ -1966,66 +1834,9 @@ IrEmitterUnnested::BuildKernelThunkFromBufferSlices( llvm::ConstantPointerNull::get(b_.getInt8PtrTy())); } - return absl::make_unique(thunk_info, non_constant_buffers, - std::string(kernel->getName())); -} - -std::unique_ptr IrEmitterUnnested::BuildKernelThunk( - const HloInstruction* inst, bool implements_whole_instruction) { - std::vector hlo_slices = - GetHloBufferSlices(inst, ir_emitter_context_->buffer_assignment()); - - std::vector slice_ptrs; - slice_ptrs.reserve(hlo_slices.size()); - for (auto& slice : hlo_slices) { - slice_ptrs.push_back(&slice); - } - - return BuildKernelThunkFromBufferSlices( - inst->name(), + return absl::make_unique( implements_whole_instruction ? GetThunkInfo(inst) : Thunk::ThunkInfo(), - slice_ptrs, [this](const BufferSlice* slice, llvm::Value* value) { - const HloBufferSlice* hlo_buffer_slice = - static_cast(slice); - const HloInstruction* instr = hlo_buffer_slice->instr; - const ShapeIndex& index = hlo_buffer_slice->hlo_index; - VLOG(3) << "Buffer for " << instr->ToString() << " at " - << index.ToString() << " is found in slice " - << hlo_buffer_slice->buffer_slice.ToString() << " at GTE index " - << hlo_buffer_slice->gte_index.ToString(); - - bindings_.BindHloToIrValue(*instr, value, index); - }); -} - -std::unique_ptr IrEmitterUnnested::BuildKernelThunkForMlir( - absl::string_view name, Thunk::ThunkInfo thunk_info, - absl::Span slices, - std::vector* ir_arrays) { - absl::flat_hash_set buffers_written; - std::vector slice_ptrs; - slice_ptrs.reserve(slices.size()); - for (auto& slice : slices) { - slice_ptrs.push_back(&slice); - if (slice.written) { - buffers_written.insert(slice.buffer_slice); - } - } - - ir_arrays->clear(); - return BuildKernelThunkFromBufferSlices( - name, thunk_info, slice_ptrs, - [&](const BufferSlice* slice, llvm::Value* value) { - const auto& mlir_slice = static_cast(*slice); - - llvm_ir::IrArray ir_array( - CastToTypedValue(mlir_slice.shape, value, &b_), mlir_slice.shape); - if (!buffers_written.contains(slice->buffer_slice)) { - ir_array.MarkInvariantOverWholeProgram(&value->getContext()); - } - - ir_arrays->push_back(ir_array); - }); + non_constant_buffers, std::string(kernel->getName())); } StatusOr> IrEmitterUnnested::BuildInitializerThunk( @@ -2232,7 +2043,7 @@ Status CheckConditionalBuffersShareAllocation( } // namespace -StatusOr> IrEmitterUnnested::BuildWhileThunk( +std::unique_ptr IrEmitterUnnested::BuildWhileThunk( const HloInstruction* hlo) { // Check that all while-related buffers share an allocation. TF_CHECK_OK(CheckWhileBuffersShareAllocation( @@ -2240,26 +2051,24 @@ StatusOr> IrEmitterUnnested::BuildWhileThunk( // Generate thunk sequence for while 'condition'. HloComputation* condition = hlo->while_condition(); - TF_ASSIGN_OR_RETURN(auto ir_emitter_condition, - IrEmitterUnnested::Create(hlo_module_config_, condition, - ir_emitter_context_)); - TF_RETURN_IF_ERROR(condition->Accept(ir_emitter_condition.get())); + IrEmitterUnnested ir_emitter_condition(hlo_module_config_, condition, + ir_emitter_context_); + TF_CHECK_OK(condition->Accept(&ir_emitter_condition)); // Generate thunk sequence for while 'body'. HloComputation* body = hlo->while_body(); - TF_ASSIGN_OR_RETURN( - auto ir_emitter_body, - IrEmitterUnnested::Create(hlo_module_config_, body, ir_emitter_context_)); - TF_RETURN_IF_ERROR(body->Accept(ir_emitter_body.get())); + IrEmitterUnnested ir_emitter_body(hlo_module_config_, body, + ir_emitter_context_); + TF_CHECK_OK(body->Accept(&ir_emitter_body)); - return std::unique_ptr(new WhileThunk( + return absl::make_unique( GetThunkInfo(hlo), GetAllocationSlice(*condition->root_instruction()), // cond result - ir_emitter_condition->ConsumeThunkSequence(), - ir_emitter_body->ConsumeThunkSequence())); + ir_emitter_condition.ConsumeThunkSequence(), + ir_emitter_body.ConsumeThunkSequence()); } -StatusOr> IrEmitterUnnested::BuildForThunk( +std::unique_ptr IrEmitterUnnested::BuildForThunk( const HloInstruction* hlo, const int64 loop_limit) { // Check that all while-related buffers share an allocation. TF_CHECK_OK(CheckWhileBuffersShareAllocation( @@ -2267,16 +2076,15 @@ StatusOr> IrEmitterUnnested::BuildForThunk( // Generate thunk sequence for while 'body' (will be used a For loop body). HloComputation* body = hlo->while_body(); - TF_ASSIGN_OR_RETURN( - auto ir_emitter_body, - IrEmitterUnnested::Create(hlo_module_config_, body, ir_emitter_context_)); - TF_RETURN_IF_ERROR(body->Accept(ir_emitter_body.get())); + IrEmitterUnnested ir_emitter_body(hlo_module_config_, body, + ir_emitter_context_); + TF_CHECK_OK(body->Accept(&ir_emitter_body)); - return std::unique_ptr(new ForThunk( - GetThunkInfo(hlo), loop_limit, ir_emitter_body->ConsumeThunkSequence())); + return absl::make_unique(GetThunkInfo(hlo), loop_limit, + ir_emitter_body.ConsumeThunkSequence()); } -StatusOr> IrEmitterUnnested::BuildConditionalThunk( +std::unique_ptr IrEmitterUnnested::BuildConditionalThunk( const HloInstruction* hlo) { // Check that the buffers used in conditional are shared with the operands and // result appropriately. @@ -2288,17 +2096,15 @@ StatusOr> IrEmitterUnnested::BuildConditionalThunk( for (int j = 0; j < hlo->branch_count(); ++j) { branch_operands.emplace_back(GetAllocationSlice(*hlo->operand(j + 1))); HloComputation* branch_computation = hlo->branch_computation(j); - TF_ASSIGN_OR_RETURN( - auto ir_emitter, - IrEmitterUnnested::Create(hlo_module_config_, branch_computation, - ir_emitter_context_)); - TF_CHECK_OK(branch_computation->Accept(ir_emitter.get())); - branch_thunks.push_back(std::move(*ir_emitter->ConsumeThunkSequence())); + IrEmitterUnnested ir_emitter(hlo_module_config_, branch_computation, + ir_emitter_context_); + TF_CHECK_OK(branch_computation->Accept(&ir_emitter)); + branch_thunks.push_back(std::move(*ir_emitter.ConsumeThunkSequence())); } - return std::unique_ptr(new ConditionalThunk( + return absl::make_unique( GetThunkInfo(hlo), GetAllocationSlice(*hlo->operand(0)), branch_operands, - std::move(branch_thunks))); + std::move(branch_thunks)); } Status IrEmitterUnnested::EmitTargetElementLoopInThunk( diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h index b9146dd8fae..019fcdf21db 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h @@ -17,7 +17,6 @@ limitations under the License. #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_IR_EMITTER_UNNESTED_H_ #include "absl/container/inlined_vector.h" -#include "tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.h" #include "tensorflow/compiler/xla/service/gpu/ir_emitter.h" #include "tensorflow/compiler/xla/service/gpu/kernel_mapping_scheme.h" #include "tensorflow/compiler/xla/service/gpu/sequential_thunk.h" @@ -29,40 +28,6 @@ limitations under the License. namespace xla { namespace gpu { -struct BufferSlice { - // The root buffer to look at. - BufferAllocation::Slice buffer_slice; - - // Describes how to dereference starting at that buffer to get to the buffer - // in question. - ShapeIndex gte_index; -}; - -// Describes how to access a particular subshape for an HLO. For instance if -// `.hlo_index` is {1} and `.gte_index` is {3, 4} then buffer for `.instr` at -// ShapeIndex {1} (i.e. the buffer for the second tuple element of hlo) is -// found at `.buffer_slice`[3][4]. That is, `.slice` is a void***, which we -// dereference twice -- first at index 3, and then at index 4 -- to get the -// address of our buffer. -struct HloBufferSlice : public BufferSlice { - const HloInstruction* instr; - ShapeIndex hlo_index; -}; - -struct MlirBufferSlice : public BufferSlice { - // The buffer is modified by the kernel. - bool written; - - Shape shape; -}; - -struct MlirEmitterInput { - mlir::Operation* op; - absl::string_view name; - Thunk::ThunkInfo thunk_info; - MlirBufferSlice extra_slice; -}; - // Emits LLVM IR for an "unnested computation". // // An unnested computation is an HloComputation which you run by executing one @@ -124,14 +89,12 @@ class IrEmitterUnnested : public IrEmitter, const string& loop_name, llvm::Value* tile_height, llvm::Value* tile_width, KernelSupportLibrary* ksl)>; + IrEmitterUnnested(const HloModuleConfig& hlo_module_config, + const HloComputation* hlo_computation, + IrEmitterContext* ir_emitter_context); IrEmitterUnnested(const IrEmitterUnnested&) = delete; IrEmitterUnnested& operator=(const IrEmitterUnnested&) = delete; - static StatusOr> Create( - const HloModuleConfig& hlo_module_config, - const HloComputation* hlo_computation, - IrEmitterContext* ir_emitter_context); - // Transfers the ownship of thunk_sequence_ out. std::unique_ptr ConsumeThunkSequence() { return std::make_unique(std::move(thunk_sequence_)); @@ -161,7 +124,6 @@ class IrEmitterUnnested : public IrEmitter, Status HandleScatter(HloInstruction* scatter) override; Status HandleSelect(HloInstruction* select) override; Status HandleSort(HloInstruction* sort) override; - Status EmitMlirSort(MlirEmitterInput input); Status HandleTriangularSolve(HloInstruction* hlo) override; Status HandleTupleSelect(HloInstruction* tuple_select) override; Status HandleAllReduce(HloInstruction* crs) override; @@ -186,10 +148,6 @@ class IrEmitterUnnested : public IrEmitter, Status Postprocess(HloInstruction* hlo) override; private: - IrEmitterUnnested(const HloModuleConfig& hlo_module_config, - const HloComputation* hlo_computation, - IrEmitterContext* ir_emitter_context); - // Add a owning Thunk object to the thunk sequence. void AddThunkToThunkSequence(std::unique_ptr thunk) override { thunk_sequence_.emplace_back(std::move(thunk)); @@ -306,7 +264,8 @@ class IrEmitterUnnested : public IrEmitter, // Builds the prototype of the IR kernel for `inst` and adds it to the module. // This kernel takes as arguments pointers to the given buffer allocations. llvm::Function* BuildKernelPrototype( - absl::string_view name, absl::Span args); + const HloInstruction& inst, + absl::Span args); // Helper for writing extra outputs from inside a reduce kernel. Status EmitExtraOutputsForReduce( @@ -531,12 +490,6 @@ class IrEmitterUnnested : public IrEmitter, HloComputation* reducer, llvm::Type* element_type, llvm::Value* partial_result_address); - std::unique_ptr BuildKernelThunkFromBufferSlices( - absl::string_view name, Thunk::ThunkInfo thunk_info, - absl::Span slices, - std::function - bind_slice_to_ir_value); - // Returns a KernelThunk that invokes the kernel emitted for `inst`. The // caller needs to make sure `inst` outlives the lifetime of the returned // Thunk object. 'implements_whole_instruction' specifies whether this @@ -545,11 +498,6 @@ class IrEmitterUnnested : public IrEmitter, std::unique_ptr BuildKernelThunk( const HloInstruction* inst, bool implements_whole_instruction); - std::unique_ptr BuildKernelThunkForMlir( - absl::string_view name, Thunk::ThunkInfo thunk_info, - absl::Span slices, - std::vector* ir_arrays); - // Returns a thunk that, given a reduce or select-and-scatter op, // initializes its memory to the appropriate initial value. StatusOr> BuildInitializerThunk( @@ -557,18 +505,17 @@ class IrEmitterUnnested : public IrEmitter, // Returns a WhileThunk that invokes thunk sequences for 'condition' and // 'body' sub-computations of while instruction 'hlo'. - StatusOr> BuildWhileThunk(const HloInstruction* hlo); + std::unique_ptr BuildWhileThunk(const HloInstruction* hlo); // Returns a ForThunk which executes 'loop_limit' invocations of a thunk // sequence from the 'body' sub-computation of the while instruction 'hlo'. - StatusOr> BuildForThunk(const HloInstruction* hlo, - const int64 loop_limit); + std::unique_ptr BuildForThunk(const HloInstruction* hlo, + const int64 loop_limit); // Returns a ConditionalThunk which executes the thunk sequence for the // 'branch_computation' corresponding to the predicate/branch_index of the // given conditional instruction. - StatusOr> BuildConditionalThunk( - const HloInstruction* hlo); + std::unique_ptr BuildConditionalThunk(const HloInstruction* hlo); // Emits current thread id with the given type. // @@ -598,9 +545,6 @@ class IrEmitterUnnested : public IrEmitter, absl::optional thread_id_filter = absl::nullopt, absl::optional block_id_filter = absl::nullopt); - StatusOr GetOrCreateSubComputationFromRegion( - mlir::Region* region); - // Returns the last generated thunk. Thunk* LastThunk() const { return thunk_sequence_.back().get(); } @@ -611,14 +555,6 @@ class IrEmitterUnnested : public IrEmitter, // The HloComputation that this IrEmitter emits code for. const HloComputation* hlo_computation_; - - mlir::OwningModuleRef mlir_scratch_module_; - - // This is for cache-purpose only. It has no significant semantics. - mlir::LhloDialectEmitter lhlo_scratch_emitter_; - - absl::flat_hash_map> - scratch_nested_computations_; }; } // namespace gpu diff --git a/tensorflow/compiler/xla/service/gpu/tests/BUILD b/tensorflow/compiler/xla/service/gpu/tests/BUILD index 809b277317f..a2bddd2d0d7 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/BUILD +++ b/tensorflow/compiler/xla/service/gpu/tests/BUILD @@ -458,35 +458,6 @@ xla_test( ], ) -tf_cc_test( - name = "sorting_test", - srcs = [ - "sorting_test.cc", - ], - tags = tf_cuda_tests_tags() + [ - "no_rocm", - ], - deps = [ - ":gpu_codegen_test", - "//tensorflow/compiler/xla:debug_options_flags", - "//tensorflow/compiler/xla:statusor", - "//tensorflow/compiler/xla:xla_proto_cc", - "//tensorflow/compiler/xla/service:gpu_plugin", - "//tensorflow/compiler/xla/service:hlo", - "//tensorflow/compiler/xla/service:hlo_module_config", - "//tensorflow/compiler/xla/service:hlo_parser", - "//tensorflow/compiler/xla/service/gpu:gpu_executable", - "//tensorflow/compiler/xla/tests:filecheck", - "//tensorflow/compiler/xla/tests:hlo_test_base", - "//tensorflow/compiler/xla/tests:llvm_irgen_test_base", - "//tensorflow/core:lib", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/stream_executor/lib", - "@com_google_absl//absl/memory", - ], -) - tf_cc_binary( name = "hlo_to_llvm_ir", srcs = ["hlo_to_llvm_ir.cc"], diff --git a/tensorflow/compiler/xla/service/gpu/tests/sorting.hlo b/tensorflow/compiler/xla/service/gpu/tests/sorting.hlo index 4d29a8df116..272c9a25769 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/sorting.hlo +++ b/tensorflow/compiler/xla/service/gpu/tests/sorting.hlo @@ -8,162 +8,162 @@ compare { ROOT lt = pred[] compare(p.0.lhs, p.0.rhs), direction=LT } -// CHECK: define void @sort(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]]) +// CHECK: define void @sort(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC1:%.*]]) // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP5]] to i64 -// CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP6]], [[THREAD_ID]] +// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 +// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1]], i64 0 +// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = urem i64 [[TMP7]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP10]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 +// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 2 -// CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 1 -// CHECK-NEXT: [[TMP13:%.*]] = icmp slt i64 [[TMP11]], [[TMP12]] -// CHECK-NEXT: [[TMP14:%.*]] = icmp slt i64 [[TMP12]], 3 -// CHECK-NEXT: [[TMP15:%.*]] = and i1 [[TMP13]], [[TMP14]] -// CHECK-NEXT: br i1 [[TMP15]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 1 +// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], [[TMP8]] +// CHECK-NEXT: [[TMP10:%.*]] = icmp slt i64 [[TMP8]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] +// CHECK-NEXT: br i1 [[TMP11]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP12]] -// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] -// CHECK-NEXT: call void @region_0_4(float* [[TMP16]], float* [[TMP17]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP18:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP18]], 0 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: call void @compare(float* [[TMP12]], float* [[TMP13]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP14:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP14]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP19:%.*]] = load float, float* [[TMP16]], align 4 -// CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[TMP17]], align 4 -// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] -// CHECK-NEXT: store float [[TMP19]], float* [[TMP21]], align 4 -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP12]] -// CHECK-NEXT: store float [[TMP20]], float* [[TMP22]], align 4 +// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[TMP12]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[TMP13]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: store float [[TMP16]], float* [[TMP18]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] -// CHECK: define internal void @region_0_4(float* dereferenceable(4) [[P_0_LHS_TYPED:%.*]], float* dereferenceable(4) [[P_0_RHS_TYPED:%.*]], i8* dereferenceable(1) [[OUTPUT_ARG:%.*]]) +// CHECK: define internal void @compare(float* dereferenceable(4) [[P_0_LHS_TYPED:%.*]], float* dereferenceable(4) [[P_0_RHS_TYPED:%.*]], i8* dereferenceable(1) [[OUTPUT_ARG:%.*]]) // CHECK-NEXT: entry: -// CHECK-NEXT: [[COMPARE_3_TYPED:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARG_0_1_TYPED:%.*]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARG_1_2_TYPED:%.*]], align 4 +// CHECK-NEXT: [[LT_TYPED:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[P_0_LHS_TYPED]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[P_0_RHS_TYPED]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = fcmp olt float [[TMP0]], [[TMP1]] // CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i8 -// CHECK-NEXT: store i8 [[TMP3]], i8* [[COMPARE_3_TYPED]], align 1 -// CHECK-NEXT: [[LOAD_RET_VALUE:%.*]] = load i8, i8* [[COMPARE_3_TYPED]], align 1 -// CHECK-NEXT: store i8 [[LOAD_RET_VALUE]], i8* [[OUTPUT_ARG:%.*]], align 1 +// CHECK-NEXT: store i8 [[TMP3]], i8* [[LT_TYPED]], align 1 +// CHECK-NEXT: [[LOAD_RET_VALUE:%.*]] = load i8, i8* [[LT_TYPED]], align 1 +// CHECK-NEXT: store i8 [[LOAD_RET_VALUE]], i8* [[OUTPUT_ARG]], align 1 // CHECK-NEXT: ret void -// CHECK: define void @sort__1(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]]) { +// CHECK: define void @sort__1(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC1:%.*]]) { // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP5]] to i64 -// CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP6]], [[THREAD_ID]] +// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 +// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1]], i64 0 +// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = urem i64 [[TMP7]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP10]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 +// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP8]], 3 -// CHECK-NEXT: [[TMP12:%.*]] = icmp slt i64 [[TMP8]], [[TMP11]] -// CHECK-NEXT: [[TMP13:%.*]] = icmp slt i64 [[TMP11]], 3 -// CHECK-NEXT: [[TMP14:%.*]] = and i1 [[TMP12]], [[TMP13]] -// CHECK-NEXT: br i1 [[TMP14]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP4]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = icmp slt i64 [[TMP4]], [[TMP7]] +// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP8]], [[TMP9]] +// CHECK-NEXT: br i1 [[TMP10]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP8]] -// CHECK-NEXT: call void @region_0_4(float* [[TMP15]], float* [[TMP16]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP17:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP17]], 0 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP4]] +// CHECK-NEXT: call void @compare(float* [[TMP11]], float* [[TMP12]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP13]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP18:%.*]] = load float, float* [[TMP15]], align 4 -// CHECK-NEXT: [[TMP19:%.*]] = load float, float* [[TMP16]], align 4 -// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP8]] -// CHECK-NEXT: store float [[TMP18]], float* [[TMP20]], align 4 -// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] -// CHECK-NEXT: store float [[TMP19]], float* [[TMP21]], align 4 +// CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[TMP11]], align 4 +// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[TMP12]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP4]] +// CHECK-NEXT: store float [[TMP14]], float* [[TMP16]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] -// CHECK: define void @sort__2(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]]) { +// CHECK: define void @sort__2(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC1:%.*]]) { // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP5]] to i64 -// CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP6]], [[THREAD_ID]] +// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 +// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 +// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP8:%.*]] = urem i64 [[TMP7]], 2 -// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP10:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP10]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 +// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP8]], 2 -// CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 1 -// CHECK-NEXT: [[TMP13:%.*]] = icmp slt i64 [[TMP11]], [[TMP12]] -// CHECK-NEXT: [[TMP14:%.*]] = icmp slt i64 [[TMP12]], 3 -// CHECK-NEXT: [[TMP15:%.*]] = and i1 [[TMP13]], [[TMP14]] -// CHECK-NEXT: br i1 [[TMP15]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 1 +// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], [[TMP8]] +// CHECK-NEXT: [[TMP10:%.*]] = icmp slt i64 [[TMP8]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] +// CHECK-NEXT: br i1 [[TMP11]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP12]] -// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] -// CHECK-NEXT: call void @region_0_4(float* [[TMP16]], float* [[TMP17]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP18:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP18]], 0 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: call void @compare(float* [[TMP12]], float* [[TMP13]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP14:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP14]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP19:%.*]] = load float, float* [[TMP16]], align 4 -// CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[TMP17]], align 4 -// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP11]] -// CHECK-NEXT: store float [[TMP19]], float* [[TMP21]], align 4 -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP1]], i64 0, i64 [[TMP9]], i64 [[TMP12]] -// CHECK-NEXT: store float [[TMP20]], float* [[TMP22]], align 4 +// CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[TMP12]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[TMP13]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: store float [[TMP16]], float* [[TMP18]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] ENTRY main { x = f32[2, 3] parameter(0) @@ -182,198 +182,210 @@ compare { ROOT lt = pred[] compare(p.1.lhs, p.1.rhs), direction=LT } -// CHECK: define void @sort(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) +// CHECK: define void @sort(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC2:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC3:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x i32]]* -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 -// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK-NEXT: [[TMP8:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP8]], [[THREAD_ID]] +// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4]], i64 0 +// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x i8*]* +// CHECK-NEXT: [[SORT_RAW1:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0]], i64 0 +// CHECK-NEXT: [[SORT_TYPED2:%.*]] = bitcast i8* [[SORT_RAW1]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[SORT_RAW3:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1]], i64 0 +// CHECK-NEXT: [[SORT_TYPED4:%.*]] = bitcast i8* [[SORT_RAW3]] to [2 x [3 x float]]* +// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC2]], i64 0 +// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[Y_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC3]], i64 0 +// CHECK-NEXT: [[Y_TYPED:%.*]] = bitcast i8* [[Y_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP10:%.*]] = urem i64 [[TMP9]], 2 -// CHECK-NEXT: [[TMP11:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP12:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP12]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 +// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP10]], 2 -// CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 1 -// CHECK-NEXT: [[TMP15:%.*]] = icmp slt i64 [[TMP13]], [[TMP14]] -// CHECK-NEXT: [[TMP16:%.*]] = icmp slt i64 [[TMP14]], 3 -// CHECK-NEXT: [[TMP17:%.*]] = and i1 [[TMP15]], [[TMP16]] -// CHECK-NEXT: br i1 [[TMP17]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 2 +// CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 1 +// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], [[TMP8]] +// CHECK-NEXT: [[TMP10:%.*]] = icmp slt i64 [[TMP8]], 3 +// CHECK-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] +// CHECK-NEXT: br i1 [[TMP11]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP14]] -// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP13]] -// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP14]] -// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP13]] -// CHECK-NEXT: call void @region_0_6(i32* [[TMP18]], i32* [[TMP19]], float* [[TMP20]], float* [[TMP21]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP22:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP22]], 0 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: call void @compare(i32* [[TMP12]], i32* [[TMP13]], float* [[TMP14]], float* [[TMP15]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP16:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP16]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP13]] -// CHECK-NEXT: store i32 [[TMP23]], i32* [[TMP25]], align 4 -// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP14]] -// CHECK-NEXT: store i32 [[TMP24]], i32* [[TMP26]], align 4 -// CHECK-NEXT: [[TMP27:%.*]] = load float, float* [[TMP20]], align 4 -// CHECK-NEXT: [[TMP28:%.*]] = load float, float* [[TMP21]], align 4 -// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP13]] -// CHECK-NEXT: store float [[TMP27]], float* [[TMP29]], align 4 -// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP14]] -// CHECK-NEXT: store float [[TMP28]], float* [[TMP30]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP19]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: store i32 [[TMP18]], i32* [[TMP20]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = load float, float* [[TMP14]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = load float, float* [[TMP15]], align 4 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: store float [[TMP21]], float* [[TMP23]], align 4 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP8]] +// CHECK-NEXT: store float [[TMP22]], float* [[TMP24]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] -// CHECK: define internal void @region_0_6(i32* dereferenceable(4) [[P_0_LHS_TYPED:%.*]], i32* dereferenceable(4) [[P_0_RHS_TYPED:%.*]], float* dereferenceable(4) [[P_1_LHS_TYPED:%.*]], float* dereferenceable(4) [[P_1_RHS_TYPED:%.*]], i8* dereferenceable(1) [[OUTPUT_ARG:%.*]]) +// CHECK: define internal void @compare(i32* dereferenceable(4) [[P_0_LHS_TYPED:%.*]], i32* dereferenceable(4) [[P_0_RHS_TYPED:%.*]], float* dereferenceable(4) [[P_1_LHS_TYPED:%.*]], float* dereferenceable(4) [[P_1_RHS_TYPED:%.*]], i8* dereferenceable(1) [[OUTPUT_ARG:%.*]]) // CHECK-NEXT: entry: -// CHECK-NEXT: [[COMPARE_5_TYPED:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARG_2_3_TYPED:%.*]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARG_3_4_TYPED:%.*]], align 4 +// CHECK-NEXT: [[LT_TYPED:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[P_1_LHS_TYPED]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[P_1_RHS_TYPED]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = fcmp olt float [[TMP0]], [[TMP1]] // CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i8 -// CHECK-NEXT: store i8 [[TMP3]], i8* [[COMPARE_5_TYPED]], align 1 -// CHECK-NEXT: [[LOAD_RET_VALUE:%.*]] = load i8, i8* [[COMPARE_5_TYPED]], align 1 -// CHECK-NEXT: store i8 [[LOAD_RET_VALUE]], i8* [[OUTPUT_ARG:%.*]], align 1 +// CHECK-NEXT: store i8 [[TMP3]], i8* [[LT_TYPED]], align 1 +// CHECK-NEXT: [[LOAD_RET_VALUE:%.*]] = load i8, i8* [[LT_TYPED]], align 1 +// CHECK-NEXT: store i8 [[LOAD_RET_VALUE]], i8* [[OUTPUT_ARG]], align 1 // CHECK-NEXT: ret void -// CHECK: define void @sort__1(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) +// CHECK: define void @sort__1(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC2:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC3:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x i32]]* -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 -// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK-NEXT: [[TMP8:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP8]], [[THREAD_ID]] +// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 +// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x i8*]* +// CHECK-NEXT: [[SORT_RAW1:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 +// CHECK-NEXT: [[SORT_TYPED2:%.*]] = bitcast i8* [[SORT_RAW1]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[SORT_RAW3:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 +// CHECK-NEXT: [[SORT_TYPED4:%.*]] = bitcast i8* [[SORT_RAW3]] to [2 x [3 x float]]* +// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC2:%.*]], i64 0 +// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[Y_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC3:%.*]], i64 0 +// CHECK-NEXT: [[Y_TYPED:%.*]] = bitcast i8* [[Y_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP10:%.*]] = urem i64 [[TMP9]], 2 -// CHECK-NEXT: [[TMP11:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP12:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP12]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 +// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP10]], 3 -// CHECK-NEXT: [[TMP14:%.*]] = icmp slt i64 [[TMP10]], [[TMP13]] -// CHECK-NEXT: [[TMP15:%.*]] = icmp slt i64 [[TMP13]], 3 -// CHECK-NEXT: [[TMP16:%.*]] = and i1 [[TMP14]], [[TMP15]] -// CHECK-NEXT: br i1 [[TMP16]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP4]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = icmp slt i64 [[TMP4]], [[TMP7]] +// CHECK-NEXT: [[TMP9:%.*]] = icmp slt i64 [[TMP7]], 3 +// CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP8]], [[TMP9]] +// CHECK-NEXT: br i1 [[TMP10]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP13]] -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP10]] -// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP13]] -// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP10]] -// CHECK-NEXT: call void @region_0_6(i32* [[TMP17]], i32* [[TMP18]], float* [[TMP19]], float* [[TMP20]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP21:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP21]], 0 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP4]] +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP4]] +// CHECK-NEXT: call void @compare(i32* [[TMP11]], i32* [[TMP12]], float* [[TMP13]], float* [[TMP14]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP15:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP15]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP10]] -// CHECK-NEXT: store i32 [[TMP22]], i32* [[TMP24]], align 4 -// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP13]] -// CHECK-NEXT: store i32 [[TMP23]], i32* [[TMP25]], align 4 -// CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[TMP19]], align 4 -// CHECK-NEXT: [[TMP27:%.*]] = load float, float* [[TMP20]], align 4 -// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP10]] -// CHECK-NEXT: store float [[TMP26]], float* [[TMP28]], align 4 -// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP13]] -// CHECK-NEXT: store float [[TMP27]], float* [[TMP29]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP4]] +// CHECK-NEXT: store i32 [[TMP16]], i32* [[TMP18]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP19]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[TMP13]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = load float, float* [[TMP14]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP4]] +// CHECK-NEXT: store float [[TMP20]], float* [[TMP22]], align 4 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP7]] +// CHECK-NEXT: store float [[TMP21]], float* [[TMP23]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] -// CHECK: define void @sort__2(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) +// CHECK: define void @sort__2(i8* noalias align 64 dereferenceable(24) [[ALLOC0:%.*]], i8* noalias align 64 dereferenceable(24) [[ALLOC1:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC2:%.*]], i8* noalias align 16 dereferenceable(24) [[ALLOC3:%.*]], i8* noalias align 64 dereferenceable(16) [[ALLOC4:%.*]]) // CHECK-NEXT: entry: // CHECK-NEXT: [[COMPARE_RETURN_BUFFER:%.*]] = alloca i8, align 1 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 -// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to [2 x [3 x i32]]* -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 -// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [2 x [3 x float]]* -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 -// CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 -// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 -// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK-NEXT: [[TMP8:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 -// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP8]], [[THREAD_ID]] +// CHECK-NEXT: [[SORT_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC4:%.*]], i64 0 +// CHECK-NEXT: [[SORT_TYPED:%.*]] = bitcast i8* [[SORT_RAW]] to [2 x i8*]* +// CHECK-NEXT: [[SORT_RAW1:%.*]] = getelementptr inbounds i8, i8* [[ALLOC0:%.*]], i64 0 +// CHECK-NEXT: [[SORT_TYPED2:%.*]] = bitcast i8* [[SORT_RAW1]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[SORT_RAW3:%.*]] = getelementptr inbounds i8, i8* [[ALLOC1:%.*]], i64 0 +// CHECK-NEXT: [[SORT_TYPED4:%.*]] = bitcast i8* [[SORT_RAW3]] to [2 x [3 x float]]* +// CHECK-NEXT: [[X_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC2:%.*]], i64 0 +// CHECK-NEXT: [[X_TYPED:%.*]] = bitcast i8* [[X_RAW]] to [2 x [3 x i32]]* +// CHECK-NEXT: [[Y_RAW:%.*]] = getelementptr inbounds i8, i8* [[ALLOC3:%.*]], i64 0 +// CHECK-NEXT: [[Y_TYPED:%.*]] = bitcast i8* [[Y_RAW]] to [2 x [3 x float]]* +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !6 +// CHECK-NEXT: [[BLOCK_ID:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !7 +// CHECK-NEXT: [[THREAD_ID:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw i64 [[BLOCK_ID]], 4 +// CHECK-NEXT: [[LINEAR_INDEX:%.*]] = add nuw nsw i64 [[TMP2]], [[THREAD_ID]] // CHECK-NEXT: [[LINEAR_INDEX_IN_RANGE:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 // CHECK-NEXT: call void @llvm.assume(i1 [[LINEAR_INDEX_IN_RANGE]]) -// CHECK-NEXT: [[TMP9:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 -// CHECK-NEXT: [[TMP10:%.*]] = urem i64 [[TMP9]], 2 -// CHECK-NEXT: [[TMP11:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 -// CHECK-NEXT: [[TMP12:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 -// CHECK-NEXT: br i1 [[TMP12]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] +// CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[LINEAR_INDEX]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = urem i64 [[TMP3]], 2 +// CHECK-NEXT: [[TMP5:%.*]] = udiv i64 [[LINEAR_INDEX]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[LINEAR_INDEX]], 4 +// CHECK-NEXT: br i1 [[TMP6]], label [[SORT_IN_BOUNDS_TRUE:%.*]], label [[SORT_IN_BOUNDS_AFTER:%.*]] // CHECK: sort.in_bounds-after: -// CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x [3 x i32]]* [[TMP1]] to i8* -// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK-NEXT: store i8* [[TMP13]], i8** [[TMP14]], align 8 -// CHECK-NEXT: [[TMP15:%.*]] = bitcast [2 x [3 x float]]* [[TMP3]] to i8* -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK-NEXT: store i8* [[TMP15]], i8** [[TMP16]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = bitcast [2 x [3 x i32]]* [[SORT_TYPED2]] to i8* +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[SORT_TYPED]], i64 0, i64 0 +// CHECK-NEXT: store i8* [[TMP7]], i8** [[TMP8]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = bitcast [2 x [3 x float]]* [[SORT_TYPED4]] to i8* +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[SORT_TYPED]], i64 0, i64 1 +// CHECK-NEXT: store i8* [[TMP9]], i8** [[TMP10]], align 8 // CHECK-NEXT: ret void // CHECK: sort.in_bounds-true: -// CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP10]], 2 -// CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP17]], 1 -// CHECK-NEXT: [[TMP19:%.*]] = icmp slt i64 [[TMP17]], [[TMP18]] -// CHECK-NEXT: [[TMP20:%.*]] = icmp slt i64 [[TMP18]], 3 -// CHECK-NEXT: [[TMP21:%.*]] = and i1 [[TMP19]], [[TMP20]] -// CHECK-NEXT: br i1 [[TMP21]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] +// CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP4]], 2 +// CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 1 +// CHECK-NEXT: [[TMP13:%.*]] = icmp slt i64 [[TMP11]], [[TMP12]] +// CHECK-NEXT: [[TMP14:%.*]] = icmp slt i64 [[TMP12]], 3 +// CHECK-NEXT: [[TMP15:%.*]] = and i1 [[TMP13]], [[TMP14]] +// CHECK-NEXT: br i1 [[TMP15]], label [[SMALLER_COMPARISON_INDEX_TRUE:%.*]], label [[SMALLER_COMPARISON_INDEX_AFTER:%.*]] // CHECK: smaller_comparison_index-after: // CHECK-NEXT: br label [[SORT_IN_BOUNDS_AFTER]] // CHECK: smaller_comparison_index-true: -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP18]] -// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP17]] -// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP18]] -// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP17]] -// CHECK-NEXT: call void @region_0_6(i32* [[TMP22]], i32* [[TMP23]], float* [[TMP24]], float* [[TMP25]], i8* [[COMPARE_RETURN_BUFFER]]) -// CHECK-NEXT: [[TMP26:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 -// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP26]], 0 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP12]] +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP11]] +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP12]] +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP11]] +// CHECK-NEXT: call void @compare(i32* [[TMP16]], i32* [[TMP17]], float* [[TMP18]], float* [[TMP19]], i8* [[COMPARE_RETURN_BUFFER]]) +// CHECK-NEXT: [[TMP20:%.*]] = load i8, i8* [[COMPARE_RETURN_BUFFER]], align 1 +// CHECK-NEXT: [[BOOLEAN_PREDICATE:%.*]] = icmp ne i8 [[TMP20]], 0 // CHECK-NEXT: br i1 [[BOOLEAN_PREDICATE]], label [[IS_SMALLER_THAN_TRUE:%.*]], label [[IS_SMALLER_THAN_AFTER:%.*]] // CHECK: is_smaller_than-after: // CHECK-NEXT: br label [[SMALLER_COMPARISON_INDEX_AFTER]] // CHECK: is_smaller_than-true: -// CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP17]] -// CHECK-NEXT: store i32 [[TMP27]], i32* [[TMP29]], align 4 -// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[TMP1]], i64 0, i64 [[TMP11]], i64 [[TMP18]] -// CHECK-NEXT: store i32 [[TMP28]], i32* [[TMP30]], align 4 -// CHECK-NEXT: [[TMP31:%.*]] = load float, float* [[TMP24]], align 4 -// CHECK-NEXT: [[TMP32:%.*]] = load float, float* [[TMP25]], align 4 -// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP17]] -// CHECK-NEXT: store float [[TMP31]], float* [[TMP33]], align 4 -// CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[TMP3]], i64 0, i64 [[TMP11]], i64 [[TMP18]] -// CHECK-NEXT: store float [[TMP32]], float* [[TMP34]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP11]] +// CHECK-NEXT: store i32 [[TMP21]], i32* [[TMP23]], align 4 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x [3 x i32]], [2 x [3 x i32]]* [[SORT_TYPED2]], i64 0, i64 [[TMP5]], i64 [[TMP12]] +// CHECK-NEXT: store i32 [[TMP22]], i32* [[TMP24]], align 4 +// CHECK-NEXT: [[TMP25:%.*]] = load float, float* [[TMP18]], align 4 +// CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[TMP19]], align 4 +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP11]] +// CHECK-NEXT: store float [[TMP25]], float* [[TMP27]], align 4 +// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x [3 x float]], [2 x [3 x float]]* [[SORT_TYPED4]], i64 0, i64 [[TMP5]], i64 [[TMP12]] +// CHECK-NEXT: store float [[TMP26]], float* [[TMP28]], align 4 // CHECK-NEXT: br label [[IS_SMALLER_THAN_AFTER]] ENTRY main { x = s32[2, 3] parameter(0) diff --git a/tensorflow/compiler/xla/service/gpu/tests/sorting_test.cc b/tensorflow/compiler/xla/service/gpu/tests/sorting_test.cc deleted file mode 100644 index 197a0c6cfeb..00000000000 --- a/tensorflow/compiler/xla/service/gpu/tests/sorting_test.cc +++ /dev/null @@ -1,71 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include - -#include "tensorflow/compiler/xla/service/gpu/gpu_executable.h" -#include "tensorflow/compiler/xla/service/gpu/tests/gpu_codegen_test.h" -#include "tensorflow/compiler/xla/service/hlo_instruction.h" -#include "tensorflow/compiler/xla/service/hlo_module_config.h" -#include "tensorflow/compiler/xla/service/hlo_parser.h" -#include "tensorflow/compiler/xla/statusor.h" -#include "tensorflow/compiler/xla/tests/filecheck.h" -#include "tensorflow/compiler/xla/tests/hlo_test_base.h" -#include "tensorflow/compiler/xla/xla.pb.h" -#include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/platform/test.h" -#include "tensorflow/stream_executor/lib/statusor.h" - -namespace xla { -namespace gpu { - -namespace { - -class SortingTest : public GpuCodegenTest { - protected: - HloModuleConfig ConfigWithoutLayoutAssignment() { - HloModuleConfig config; - auto debug_options = HloTestBase::GetDebugOptionsForTest(); - // Disable layout_assignment to use the preassigned layouts. - debug_options.add_xla_disable_hlo_passes("layout-assignment"); - config.set_debug_options(debug_options); - return config; - } -}; - -TEST_F(SortingTest, Regression1) { - const char* hlo_text = R"( -HloModule TestModule - -compare { - p.0.lhs = f32[] parameter(0) - p.0.rhs = f32[] parameter(1) - ROOT lt = pred[] compare(p.0.lhs, p.0.rhs), direction=LT -} - -ENTRY TestComputation { - x = f32[3, 2]{1, 0} parameter(0) - x.copy = f32[3, 2]{0, 1} copy(x) - ROOT sort = f32[3, 2]{0, 1} sort(x.copy), dimensions={1}, to_apply=compare -} - -)"; - - EXPECT_TRUE(RunAndCompareNoHloPasses(hlo_text, ErrorSpec{1e-5, 1e-5})); -} - -} // namespace -} // namespace gpu -} // namespace xla diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc index 2963d546380..b01ae2efe43 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc @@ -415,10 +415,9 @@ llvm::Instruction* AddRangeMetadata(int64 lower, int64 upper, return inst; } -string IrName(absl::string_view a) { - std::string s(a); - s.erase(std::remove(s.begin(), s.end(), '%'), s.end()); - return s; +string IrName(string a) { + a.erase(std::remove(a.begin(), a.end(), '%'), a.end()); + return a; } string IrName(absl::string_view a, absl::string_view b) { diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h index c0a55e4da33..642965b6470 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h @@ -87,7 +87,7 @@ string DumpModuleToString(const llvm::Module& module); // - joining all of the nonempty inputs by '.', and then // - removing all '%'s. // -string IrName(absl::string_view a); +string IrName(string a); string IrName(absl::string_view a, absl::string_view b); string IrName(const HloInstruction* a, absl::string_view b = ""); From d9d7f3711880bb7c6505685796968c00e9ea86a0 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 11 Aug 2020 12:33:11 -0700 Subject: [PATCH 2522/2522] Add build instructions for the standalone mlir-hlo compiler in the Readme.md + a title PiperOrigin-RevId: 326077782 Change-Id: I6fb6c95a47de332cbe1543a51b093f829e6b4ce8 --- tensorflow/compiler/mlir/hlo/.gitignore | 4 +++ tensorflow/compiler/mlir/hlo/README.md | 37 +++++++++++++++++++++++-- 2 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 tensorflow/compiler/mlir/hlo/.gitignore diff --git a/tensorflow/compiler/mlir/hlo/.gitignore b/tensorflow/compiler/mlir/hlo/.gitignore new file mode 100644 index 00000000000..cc1696bf575 --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/.gitignore @@ -0,0 +1,4 @@ +build +llvm-project +llvm-build + diff --git a/tensorflow/compiler/mlir/hlo/README.md b/tensorflow/compiler/mlir/hlo/README.md index 1be6fb29d13..9eaa14031fd 100644 --- a/tensorflow/compiler/mlir/hlo/README.md +++ b/tensorflow/compiler/mlir/hlo/README.md @@ -1,4 +1,4 @@ -# MLIR-HLO +# MLIR-HLO: A Standalone "HLO" MLIR-based Compiler The code here exists in two places: @@ -22,10 +22,43 @@ upstream. ## QuickStart: building and testing -TODO +These instructions work on Linux, you may have to adjust for your plaform. + +To build the code in this repository, you need a clone of the LLVM/MLIR git +repository: + + $ git clone https://github.com/llvm/llvm-project.git + + +You need to make sure you have the right commit checked out in the LLVM +repository (you need to do this every time you pull from this repo): + + $ (cd llvm-project && git checkout $(cat build_tools/llvm_version.txt)) + +We provide a script to configure and build LLVM/MLIR: + + $ build_tools/build_mlir.sh ${PWD}/llvm-project/ ${PWD}/llvm-build + +Again this is something to do every time you pull from this repository and the +LLVM revision changes. + +Finally you can build and test this repository: + + $ mkdir build && cd build + $ cmake .. -GNinja \ + -DLLVM_ENABLE_LLD=ON \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_ENABLE_ASSERTIONS=On \ + -DMLIR_DIR=${PWD}/../llvm-build/lib/cmake/mlir + $ ninja check-mlir-hlo + ## Overview +MLIR-HLO aims to provide an end-to-end compiler for CPU and GPU, as well as +building reusable blocks for other accelerators. This is heavily inspired by the +success of XLA. + [XLA](https://www.tensorflow.org/xla/) (Accelerated Linear Algebra) is a domain-specific compiler framework and execution environment for linear algebra, which powers code-generation for ML frameworks like TensorFlow, JAX, and others.

    u%B89%Qy9^h+>jFcJ$^H^{6uod1F0a$!meIq7MK_5A-JImKIQ6B!ErM^&w z`KDm@U1VA*Y3;oxGYaE&P`6!dj#uini{+h`ZR3L4?U!f+V)I#z-hGhKuVhXmX2(F? zgFZ6*@x7*?(FP?mSF3~usE?dXAY~Hg^$j+V)%Fs5-QDz(Ax^GIJ)iqeXXh8LXSBDKWhK9?*RlZ`wfGq}*zIRw9dk{@DNow3!5!8hryFiNxS4(x)}Mv-^A&nCh{Ht{ zPTQ12RX9{Z&jmRtD;ZevriEFMsyRtmXlC#@pyzPv%S#5&VA3;~9w6=xRO)+F>wjIj z<@GYd9bCgl0`nacW;+B1uL=xG3q`TLj8Nrj(wa$rVrtjmmJ6p-6H=^p0tI|NA@{NG zrWM#wQm&&(pK6|%ec902GG|27e9qTs zL_=&w^^PgEH#x+z6DTgPs19h`zNYQgWKM%gbW%K_mh?-1mKiRNxeRukR+{GXp;?6< zR3^v&Nf`iiucG+(e_onJ2sRT=L5P073Nv8kJkp0LhX+^g8XVmkwTDSqz|vJAlpio7 zsP{sPfPSH)i>&~2Yq8fyBjObS3w-tTm8?F2$$jOmfAtIl61O;Uw@eg7KUP|dDa{+{ zcfLPic^%P@HZq($zgn=rVC9O`ZespbTqx{8RBJo^h#(OO^V`v|^^MKg*Lz3zkMb{G zS!p$u;o8*uUtxSo060XMo!sr(>yfhDz)R3vFt1PPcB9vd^|VK?RMffj{ECLlH~x1# z8JDKQz<{HMwx;Gm+ZWyW7QM!3s*NCuK7|Pau=xsL*FaR-Rbn}SO{yEbn=yC?YP)My z^GBa^%ukGn?_7U5gH?)F4R3cpJqN;V32f0AAi$esBT2}^T;{dNaT&>#XF6Z3q2Vb1dSPfsty~A zq>GMVnc>@bqXO zmfKgO!Yzc`Eb(mz0`M7tpw>h+Uv1L5MFB!6;0R|D`6MEjK@~V){<8?k9Lzc)NHP<| z{v3BpclLvy_^Sxaz$bq#Cor%42twFKz3@-G#gl6mKb6mZSw5@yYDTpfDfo{O z#I>INH6rlj28C8|s`v*??Z|sBdH&F1xwN-&!)n#VIH>SKQ&ZPF?$rCq3g4#0fUEt1 z1YSUV^(k8RO1pC~Z{>2M6p+am7}-g4y3jz0$$H-t_*_M+GKKJ!PVBiZFd1?=_+zgv zaucud%vYT~j?BM_Nh6eNZrqr}96A&=Wck{!8L)% zBXCkmFt@PNjx#6vb*Sb6X~QazpkuEL!Z}7PVc^KjfX%PQ-KBVjN$3*KW$j7!OHZ{N z_1xC>IH2J4a^uv9wTA-=hPJl7BX|iz3R){q^e<{nXmV7nKpOLi1WR# z2y%YD2JUVEtNI87B#zoiSV2H??`%kX5+%(UKw;F z@$m;iSrs{w_=5jgFm!|lIj41Q%*0nC%77>J%VY57D?9bXmMdWeZjkXMl08^!RqAkR z7er<^H+ID>84~r~c3=2mU}>op)O621czx)igz6+Q*;Ofzk4Q>dW{WdT&iW^ukzQaZ zs|vx6_S4t4;xJSCbD(<$bvG0R087S1igh;)7}H=74^ z=sYf=$g3WwXnV&# zaV-j*!AOgwF_6e$b)#I8^Ej`6kJ2w%$CJgAtHiKn3-`R8UgC(u38HrNcS>_GCN`>d zvw>5<_m{dn%Nc8{kYDXvM*O{+l5dr}9MeCXlyr8JA!CE4=i!Qg@`sspq#lRSb?D5J zq*w3(XE@hO+y9$mLk zEz#{!U}Bvh!Y+;(iE#?EmxW~oaHJ+lC_vYsI6w8gEJT~?+CAH+jk@`|w?qYEN0$^% z-2^{}N5Le^%0)$5@8O9#4b=f=vB;R&QW*mDHv_J*yaEk^9Vm0^eh`nxJ%SKkCq6aS9V^1CU zl(1&tE5`Jd27coiYZNU$J=60CT|Hr-69nTT4_Q&$%w)_IjYPQ#)V(kY#jjtKR2F_y zoQ6TMMw~jbMoBN2Mkq0Wn22Z~XAw6)NG-lH#3`n=qZ@;GAU?kl`gTc|^f6Q1-(`R? zQmK44j>lb`&p3}SH?Psj=ipMKOHVEsCoT*zjYwc*l20Vl9XZ{!qr^i6>D`MiuX)Vd z`srT1ZSA?^gKVVTKH#e`FU|O*IoKC2)cH>IcHX_zxK|JBgWq>FN zc3!do%qht=!RKC zv89nq&@>l5ak{bSO!wu7*IMbF@6PU9KXL^=2WHA*l9Z1C@>X`ZW!)?R;hkr`80~{u zmPAC}r?Z0}Y5kEd4UV581Ps4Jb!#6!4|?cZ>j-VK=|lk-1^ImJ0_HL?$galee?uv* zv!29GO|3^Z7`Re3KGaox!g|#=pL4hFZqz@0{75dBlFgCo^A(VDp;@+*e&|flFZi%;em?oe znFHqgPtE!a9MuMiJO?mO?prkhJ1R(2x(xkPDcI&d|IGZaIoS-xZu&$^LDkQ5HfQdanX5PnNz{0ny*GB;!Ws<^x*SYi(xJsAE576u7$O+W*Cqcma4;?o)3JLYp2c|4 z#XsZ8_hFpjTsR@2ki>H-KLs{skxx|zz%Ar6VOf`Y$Tdf@LP3VzW`^N1x9YHL6Ric} zaJJU|y<#H!cFztfK^8o_W4+~0wdcMvlC!UVSc*(21MHtsx#Gjumw(?-y!KF+TGER0`L?@o)(@xBiS~|(CVUji#`EvBe z&-ZE2L)yYkR38D=@VwQL+3nv$cVyj)|0${2YJ-Em7>0}o5g%YimTht!WOg5Mf6}va zbyuRz5zJ)t!eH1gNmzB{@V3ec?2{a9B#QgQ+Ia2%nYE-dC@D$~viXEcgqT7Py8SeREH@fU~5%UUbiu{>2x0RWH9~tG;0ANsfd}x&BAn!DkuGR`6{*rA*)WxagjW{mKZiI z%6c!StU&)CqiBj^H;BAb0JHkYq*1*va~QKjf5Dzm{sI8hj_>g5eDX@~103R+R!s7F z`!wH@bC+-=rEdU&Yd=u7ebAG|FE}#+8UyGb+e`hz&FA-c@9j{c$~BmUaJ`QPi^9Z& z2%R0s2JAst=Bppwdgug4=f;-}soj?Le!qQPE4jkne!78X9lh~1hm@Cu15HH6S&FPh zJK(ISqchn94{PjSi-ZTDO~%pZd^b5AKEgV-bN3>lDcNKD5zO|-${kTvz6#9Sd`#&l zToD}~sWNtas;gy?UKHV0Ae2iYY&3wAa>#x=R_wI$YV>ckwmwr+rEVysjqgI$K~@0TRRD5 zbmCsReZ!O0DnLq}X%?JET%C>J|d^uWSul-;mNrckUPl;tVVKd4o zUcy#Figm)QTk}PQ2%od@NfgO?XH*~4DWZNq=)wAd@e?U0#%*6sA30l&dAkM^B#-k# zokJ_ew(u}${0gob z-|s=lgSDA`tx+7ym<7eN)XG`CB?IBoZHG)sA-0+%=3Q=-l6gnTK1#AfD2cgv?s9=jLDrvmswI;jvLEPmj*LXYF5){pZd=0cyMWiEyZ5qr%gy4c`uMU=*gt67W4$We= zqL2X$Y(;dGbe9u5#YH)XUX?*Qi>bfj16p^Ig=2>g%Nf6?9`<&>(v@o9eU>y2pT=4` zS&Cs?4*u$JVb{%uuDaU_Bsqt}&$;_g+F3$yRveWGx1aL~DuS-KiSwv>bC#OTg5A)tzQrREWPC(t}6z5JLVek`I6jIRsqSmy?W>N3D_}4SozE6DR$b{2X`~s^5^R_%OAhT13jc zO3G|!#Pkj>zeQqqkuuUqSwHbHa_mMv8Mm|2^mXcutobN%RGO&$s{LVnj$Rwuo#2YQ zc8-9BAKcvi)872UP4nl?zdI)NyuWt#Sls@G>bG8)B?_F11YJEwI;tjno*=Crrd;A< z1w6t&Em;vJjed$rk(>4mc->GgCfj(0Y>9Q}T-f0U5N83yPC|nMqt65GPUP|*093TZ zj|XZ|x_!L>MlRRq2*^j}q*;LUeir>ES`z{xF=Jw;t3M z+8G-B2tqX%Lnn@(np$4?u@cLcW2={_vwoh9p{u{Q52ii)?aX4*_w5F2+Of-cxWxSI z-aA8i**o%P)y`LTtVOYRD4Rx|($({cHva$JT&RERL>T|ig;h~B9O z!MG6~Iqnc7-L{7OE}Rm0`BxIPHBfDFoZP}2I=Vjo_-=#mZ(HV$l7RsAA|7rL2iJsB z#H#p>FwXonKBug!?|f^=hgJiZKZ8_h5q>*4iZ(wHyXV7bkB;eN=^b;E?)_n}RoAM6 zTdU(pS)z0nlND_C;~LQF@OIw(?T_`3NF2?FQ}VoE zjc(_bc*R@Vi$_$zg-OSaAyftm?Yny~a{GqgOR-wb{FFw&l{eHvkFU{D{4@D(NKQT^ z`t1;ss&}5W2vJw3*TB+#oS&z>gc`mk#HYJ^Xm89`!BEm z`Koj{>U8yG?(2>N)xMn{LmyxZ)x^c>h4r0w3OT73)fOCfuTs z%)g0!05!EP$=?IItF>=MpLjXo-cBkCmx3Pwub;%7YTSP9u?03G26=b-yalfR#ryEr z^qI#KR{>2O0MT&+90EhF8GZ%qi35FA)cbOPEeAUGY*1dA_}gso1fRAMCEw%>O?AEl z@4kz1C!G%b=MD0{4GnBT-*4bNSp%%ON4v_tgKsR6wx?U4x%;&3)YaQsK~WP2Wn3$` z^ylQle|us-_U?Y>aJM@0bT#+o$Dr+)m@R(m4DsoFVzU;b&L>78#qR5|i|P2?JaUto z*i-*7-tonxi(rRj*`o`y6y5P79vJ^@FkiI=L;$J=IQ=(3;b1E}iShHI)s-3y$N{#U zk<%2wnZN!bY~^3aK1`CnSeLGuzNSawkhdw`>db3_(&LOyAR5paLnnUk7~dbPZ-Ts= z;;EL12cw%D2m6{?lCv6i7tG2Tx%f5s3R7x^=|PwT_()6DB04UcBU6)_9+ ze{J8~PxFkVGAj$U^4(#_&_}z!Ba|UYT`-^Bs$67M!-0N)F0hb@0O*AN=gUsoeeMMMrdT{nGI^WUwx?{AhSt207yOKAZYzsLZA` zbIR{kv%K&AV&xVLnca9Uj{M=N+V#qN)#CoK1!NE5!wRoHq3zrY(^ebP?7{ul=aU1< zidh?K#kNKZfxe1}d7;&z$U1rbA;hBvkQ$$aUsuucSd9lF8`rMUbrrI$Ae#d=3Xq5v zO7OJJh4mG|`0ou;J78tTP~^<8W`B0HfZM?)NYSD|le3w$!lR$KuFy%dcW!6v^5!~< zG1Z$hR6ptgd4#?K=yIJmMJvVBP{B~FmuIH4r%u3@2TS}9wDg%JRrycp*uG#NiVxXx z!QgP*i|KRaC#5@+{3i1(v6pN-3TOd7jZ(?JQ%_WJGw-`!#wEos{?MW09k;~0ai2!I zRC^`aSMq+mZ?J4MjrDM_HOd{|SlkoHZ!L3Uf!Ok>+5`2v@z^Jblkcm5WLgFp4UkX~(xR>PJ6C;IigwY07WmXW^HCt~4 zz^j_v>;T|ym}>Q~M{Tc6uX4~j_7mQq_NJf0%_6mOC;(Gth#sKZ{Qbu5$rYI$`FdCEQUa)>eYxbD>Y-0DZhO{TkM5`(ec zy4;6h3!d@Eg3LM(d27VbuPo$%Pt^9c#1_1ZC$%<`VP+HOp<8;kO%P=-D{=1&o8{$6aVWNNQN_-L!wU}Xc3{P^^Ck$3y-W6}&#txGMviIyp5MX^ zmMyrvlZcpwUY-1>V?pdnX=3w{6=%}iFxJF7rvp00T_cVzt(!|a?Y>DfftFr3N2&8lEv6sP-4Lo;})(y>b| z0fO=>bi>eOI&AdEi`T!&4kQbN&dj2iTkigQ0KN$a#WVO&RL=+25I@yAvv)YFldX?M z{N7$_^;D8<$fc&k6IPEKgQKA>N1V(USI7K@l@e3oZHKz$rxK52UoRD#Bq7t5dT4a` zi4VKpgb!#AZK^oFH*MuIWa;4==}9-2RE{+43hd}0fEGXpVy$a?Z(N%Dd#{^2-wB}g z?(yrFR6GIi{Swo&c%t-%(bClX2E7FeF@Y?Xx-n{xil;H2wF1I(TqLyh(i;ds!p>H1 zt%&=a*Dw5O;}z3dv41-{Kg+z0TIBFaQ9c-E6^e68o**yhbs6SXq<PW1Lc}MG@Ba z`8P@Ol?yv+Zv5PEV0#}aQD}1QlG>fFyXi4X+eX$oChxU%?j`>=9C33kx5ASTF^-&sEW^2v7yf)m4PYZ6QRVMa$fX9* zF$+{E!L1N{lg1igZuKKS^jAJ=D#d$#)Q*7JS6qp+C%`@X04w))*COt>k1ez&X`};4 z82^zwsDIQ}?Q?6nU#Q^_zsP-!k{mM>YV67_wxQCN{JF*8UpYT3pe?m$+nb%6t~uA; zIQM&x;G4{4o-A-UmT<)X#LS_%qS zJIcDS61D0=+_6l6ng zl+Zq79ZDh3a~A)t^xVDWobQ!$BkzLvC^iG>W!T^aL|k-Xn^@Xbl<9YRv!CfzZb$um z&%=h}C0oiVlh-*MNt~j4E4yziN9r^TY16BobZk7aD3o55(oz&TCmj`QlPQ4i+Dld~ zT3#v&(hme{Fw9Pj8Jn(8`S9$+tj*1Ued)al1vkw58tU<8XC43bcA$yB-pqz z6bR>CVAN*U$WF%h?KEx7^0%(4rTT|`zjW+<7sRl^@!{fC*y7b+Blw)AV5!)|Ir#f! z?xV%Q*?E!K;p7NYW+WYJt&;73S!DH-Krij~1d6oH-&D9Lbbj{q@&7R9ElO-5FkJ@F z_)(2Ql){p5Ycb=!z#reGSwx&k56FiokfNgO07{gj{*{0>$rkvv{~uFIbLQo9(`U;+ zRcLvvPJ+t5P5{s9THNB&-H%I~HEb1v2DLDwRVzBZRFz2?4jRsJ$a6f{)=T1(-Ao+{*(-#Bf@9MW7=$rvv^Gs z%C(+}{U$-d=J}D#x!Vudfy|Z~dzOqt3{2^fEmz5`4WwFh>QYsaC;O^$Sg}wg4Iams z&4L!CpmE`TGd^T74O*yZfhbn18k{#3f~GxUs;Jp48A|0B3;8l~X_$Y$Ij$6tC580% zgpGJuo5cfPJ~C|*>xvb%MOWsI*vniAC3@VO&cgv|j6+gK&r*-N*v zmTm@0HqI5F$}j#x4(>SQwrZ^(tB1>ygT_Me=h)JYNx;;`s&#fLO%2)u*dUI?U4e`3 z#4b@uF;6z*07y%MLH(o3JNUzdO|?)c{Ik_x7He(L;v)^Y|Jd# z!^UclOH7TU0b8uE0jX-#T7`2~g?khl{4v{bZ`Rsj@zF!QSElk3eFtwX==88j#-_51 z-1s!wfW_PKL|!iDLy%*@u=LhWtPn2QE+_2lEZM0c?CBxwY$5EBmuyG=*jy@3m@y0v z`~BNNw+CCqKN_2U=7T@;#hqq%#+(+@T0r|b>B8|LPhnAP>9WNDE?t3Jr6{`aPO8!` zTBzuYWXVD`>d4yAytlTN8L})PKJ_%NQzXgeJ&NQ@BG>_I=|!};CCTwl7J7A+VerS{F;!&K~14zWwK|1^f4?_p(>q_sV&l7?! zPUR<)^T&Oj-@4t6nG&7TI)Ex89JS9y!CdinMafQ8$>H=T)k~gCJtUMTJ1&bk9=vm@ zbe&=;?&1H&R;D;EGZA)&a8k~X$d)T`$#Xsb+tGYG51(3S#!-tqBFtTSn4v_B%QVSl z9OCecMQb4Q9_f+)q&W)jZ2q5oM=V<;vDX~ZN~Bymr2(B7q*EFK$SB`9G#6O}eKfuh zYh;Pjx5Z_!i&B#Dj0@hWJMoNV%hw9=xrOr+3DeRi`kalzlGHhrs$OD%iuI>aeW6zO z-rzW0wC^hR(TOYjX2N~^lbya;KlM(0x;xqFK{})K0izQ<>5MC1UsbXD>h70uADmZ3 zq4;erB|8gWOz&Ie{itMrPia%ATjH_9U+bSF-i}FW>;2hyeCgd@U{kln9LNUDVrSiW zbH97YorYRtGn&qa!cRl!mm^_WU<+?885h(tB6KmMvc(ptOdBsz*5WT1lFtWconJPg zrAOzSsm$3ukuB=a;p4N@@d=$-=gn~mL|j+8%qbxz;P|<45eA`S1L;~$xb$bgBn`lM zav(YzeDPjDoi;Ne>Nsg|&hssh4NYfcw(^eZr1#-;wrd1?;hWuA79U zT75WeA?A);9DXzF$-$x*Q_oAhmp^&?kN95=D6TT}uhCUkzYOdU|9Bs>G@|EmqMlu+ zIDB?B_!FB56s2--NqhxuwOz9}=EWi=mXT{nEbPmnQJB z*~wTgDo7y4uf}7kcv(R0v&;(^pH3;e1;Z9XKKxia)`+QS#I&X6!Xhu>Xo68;4HKyj z{!HDPW$j3p{X8v66Wwtq>OAmHH`1U?jTY`9cIOZvnX5=k`@z4l+c4>U;`tV4>*fXN z&(d>xFM#vh=|!HD+sgNedij`Oe%?Yq*+!JGle4n=SI(WSnP#Qp8_(W1-+o_F3eK@@ zN3+|$CJ4XAM4oQrn#_q^iCDWHFc`*XbD}&r*m4hCyioSkvM2$lh!-`B)Jzuz7&Nzx z%PP|HrBe%)4#T6en-0s`YC+Ak%*d>@vKGzC zT5GMWb^EsLZ+?FPk9oj@nR$O+@8_#>I`+2-smXu$D`)k2mLs?si}gvejUk5@XAVEJ z(4DrJ#RVl7yC_jVtvqX1Zr+Krp9Nf1a>rS~NeudnR+xT-JZ7KF?^AeaQ#?3|b?@i& zzL&?h6B2afyK9Rr)qrvZzN(n!{-xU``l>p>#v_4;9x_G`&Bxw+d&R8|B`AYDFD^4X z?q{)mA24(j=l)~}BWBauH$YYU_ujMd)!1yR&M2rJe@2)pES0+69-qf65owpS(i~ee z@;;73;Zek`4O7N*Ki|&#%yI^Q%lrJ0opVBXL3=(80}7A0hgbk$;b|yv+ZREy3^S!d zByK`f#$)0$l%>pD5KVkLXyiEaiEKbW%H2E#+2l%XbEJS8u*G)JwD|@((%Z_f_VImo zjbA^%<|Kb*3g47AyN9A(|F%5If~^5ly;LDBT@muyrEZ{k`*WQ&n9qC4b;$oBQy z;1^3aMZP1Qz4?3%I8O|MJb=jm`-Zw}cl6e8v0J?}wz?Qq+WhHR^p_ct zhx@c;fF1C`xdAsvicb6w)$jSS`P&^rN>R3b0N8WzeuSB1!9bI7U)9aQR^R@0w|9@n zUMqT8I6}U=QXpFKlk#ob+eHZvAweNNtDLm^?YlaXtbXgZg9i&Mm^shZxXv;s143c# zI@kZ&sn9pnybmYjyEvR_{^XIR(IJ45BcqFD=g!FubSg$Np3yE}P$a8~ z254frL9YHb8F2GyExJ(IF@GQLbDYes-NVJknE%~CE00&~&MqQG!nmr$bCk~caa9XZ zfKXv3NV^jH=3az?&34N?4L=ffbx{o1PEG9%MudoJo?p{!j-5mFKHK;OL2&eCPQenfTsS?_~I{`oVV{a=?n1?;~4f}(&#<)ga_BZdt>bvB;NG&$CGsAO$VRM|?il!A%m z#6EgD!-x2&;cNNB5(5f8Q)~30ri@}8oSFxh{EU?bs2p_6%@l*j2bmghVx>dc|hap1)CW_gxLL9fVY3T=3>Fj5gfd4b>dVH6qw1ruS}1@)^E#f3)874vCe_?4sl@Ww}JK zB@U;KJLGQfin6@*`>UA0PSpviU*k>4i;sUW8(t*D)!*fo-%QaZkcU1bS{Lv+PvlDj zIcHVbi=&cC+1tNCW;y$+Z14Z-J@O1tblIkyfMT0Ho0{g3_w_`D-s-LF(5MYZhAzfl7YEfW(8B>t1p&^ zD7R3^xE|MRoQ)gAtWuN`N7XbWxd&uKslVv004Y9B3@w5GKgxC2wJx2Ol|0C5SKz)V z)Nuo6q8|46-=3j;RJ@PfqkUR-rKLGD^s&TZ(bTp5O_OtZ+f*t4y)rs;e83f5xCd`D zt0SbokCA3oib3u$a@vhg+OV$2xC$kt%(R$ICX<5hiTqRQ#>k0Gf_K9sn#8k(DQB*> zU+YI2N1&9 zC$BeF-Kl+(pK*zjENOd0{KM0pN>Lp>Dlr4G<~B&GEVaeQm+VN=FrHQR+4_nx{{h8q zb*-7Dosxd7dsyojZ!r|4&jLAhNNzBqv5MjvBC|cag9Y_Y(iV~u9%ue5Cjk+S$|%J? z#y*ryMKQ=JiSl-l9s{uH;hkLBWHR z;S6(a_?(pJ=A|nVy{o+YaGV2J;nAqUh3ZB*UDS;O0p053*yQVzl9U~ct6=lZ3_!MO zjt6jz21{+WGeNJtc{qv^fMWRa2Un#me-$RuVcuoCQScrm(Kp8PuBfPmB}1eU^M`IR z@m>q(Juc@m8eL{6?z{Fnyx)=5&=sdQu~4eUFzNhD>sH8X4{;|Rfu|w7t_hP6IfGs)+>Wj`QPdv8)|+Lp96lkc$WZmzg@$T+qL5k=`C#j9 z_vom|eSNhBuJ@}8R#C8EHE%`2-5sV^ zA8iu~)!p`LC8Ww|F+IlbHmB|)6pvOK4Jdl;KZ(o!_$VdLm*5P8kX*gRgqjjN378C( zNC6Ryz3ZyY$VR0UGFl#yj7$+FI74E?yk9Q8g2jL z=xz7IGi@nb!t9&4cTGM5@sM&B^)>vBl z;TPYLZu?A}iN$<~J*cgyS+Isy3KM*@!z=!ymQq$=%_B#Ya>txzMsZHK?QO{-%VH0} zFbA=welDT)WMON@547Cr;;a!V=%0gk*$D$Yy%$PH#nO6|Cerw~$_HyLU9-^~{ExrLQ(UdRjKL z_yItzQuh9wS(2@vn9DdT#25Z&Ejj7n(N19f)oo@2?b*;iR@4;mej^jI{)v1TAmho^ z5~3&vG&xir;=bjgK}uCK<0>=VU9X;rFaOg>o&(ZDl~_2s#r#PpfwQxwEbxl5%RHyn zI#gFu@DGB!xf#JfS5;YJfbUjRNaBldxI`2}$Q&b*PG21OJ)kKI4G#Mqu(P=fa+oM4 zPht^~#H{kWhM5SFuOa9YQNsWnt_A2ff_#}_o_xqR3i2EgKj$W~I#%8b>rH7;zJo)HSJe<~_S!O)f8I#Pu8J z0_03wN2ah@4Z6(;J$4FP-kAAL5vKoL=3ON-VImL;a7MwmqcYBn^hT82M<-yrNo`DJ z&a+bAs&2R~@=rufm_;HVq~L@jhN?cSGUooq6q6#9y;EcJT|kP{*t%(0EYQRaXJ3q= zy+Ydwgj?SAq69R>3*7^xFs4Ke3%p09aGX5HGc2Ibf^fy!#0~Uys5O2sjaZKU9gOP9 zRkWh1r-M7;?NJr=jVpe=wZQ)9@W}=>Eg0v(@3u|dH>_n%yc}NmK9Tl)nD!ye{wE}o zN=5yRJ%$D)Q|jWKP4AQXN?G@lv>LnQFy5$o=6?TvT$OI6+Vp#LWx~2oMCdg;|Ho14ePasxaGf+5DcB zK`a@pf_z09pEfz1gGQvQDGI=tm$EM8C1JG6EuS@5q>DJB2O{&{)SY6??OJj0=&rWdE@+^mbJJOy?3UJQ{JNm?$>Czb^LPW>j)U9TTcPzSzCX=fFPS(?hCRxO+^! zDyGkG94+ldN@9c#e7SpOi+d43nSe zUc=H!t!Ff~BT=N(YVHg+Ig-997qVv?Ns7my4=|+wC|3ZcTen>%%Gm|LGg9I(Zp{R+ zy4b~V1hpPO4Tq(Mqo{Se3k%WEFqm}|`G66oDk->I3hFgxe8G_fskQPXtMmV%Y8K2Q zWCBz&XpaX?B>3z7<9%%hZoFCJQO%(I$dGKL(rzxW;L9Wfg2fXW>Cy)am+w7OCl5|N zm@pltPwg_FpgmdXl`4Mwrh6=_WXojF8&`TKz?>G?`-GW9t%31Em$ot34Tn{a7r&Ik zbu#0B&Fp#0uCT#tRiHz$FlOj(|28QpPdZ*M=*-q&i`b+|HHjs_v(S1zVvra~LU3jU zD4p3GxE-OpY090dYJ&+YWkyAUJ62VM0 zrp)1-STMgE@`GTp((Tni-@d%T#1FGoNMu{y+@OR5h4*MPXv<#VX6x3|q6ep$1b$su z@MxGkXz~p8$%RaN{}19-+4e6wfu=_qSe!wOl3Evo1%5J@av{e}PlvJXk%f>qZ@+sn zFJ*6+{jf|dKJz*59dEeHx(G2UAik)OnkWweECdhJtV$OVU@@V15s`_?zpck;T!N1p zP)AJmq^lA16&=8f5vXtfv*o~APjsVlX;!kDkQ`3bMG%0O|2ftXMo<whG(G7-ssboV+18ZX=!rbZVIm}q7kv@3tc7_!ZsK)ZttqE5BslBIF zVqTA@Rp}?-MfirYgbV&l+b@I)ttU_x9`uZRmsufm$IDzsz|kk*m6Sh6#-*Gb4LeU@ zy;EuuE;We~utsobhkCSyNSrJu6m#%;7`-!1zW`RfT1tdck-AO!_(U}}VkWrd8%-U_ zap2>9fY+s^&yoQKv~3zVv= zz7J3PLL2L%D;|jO6y@1_pIWFa8(IOCR$$@n)_p4c!C#SSr))s}PWimPu!lBtwbUm} zU6$YWrf(^8S8czztP_2YlbX2+t#^_!=~Iy)ckVnfP%pIoDd6(Me5%}?tA*Ue=W7ZO zQYK2WYBAY~(%WJUR4ITPVlb((i^GF)RX7-hq7d2$YEV8xa8{LPssH^rO1KhPf2EYL zYI;z!h~VHgh3O5J&DLv6_~Zp1ZzJ<3O4Y>wR$+XHY2J$e#>#LWLdMA8gG{l-8yew= z+BO}}JWTRBhD_~#*1+YJsqeKmExKoFl0^s zpLG6uIA}%RDq?djt;-}>=2HhAEd9&9sEc)_&N|rLx48XA9&S2NV>2ve`v^>OG>UQ| zy-@ns696P3gha&J4K#_PK^(NAf6=Q?gOn92>@o%e)x4TZ&l-KE@B>7-P zLv70n3leFvk8=KIvP3Jeyfn^UiP+&m0w44Y$MsEWY)>8j!+mq4Hs52@Y5RJV@>9LJ zrpxzN&0nt;pRW7hxA=4N4sV}NQpK<|#bo`w*z$Q>FQ#7pS~9-%eDy|i{RRUZ7Aqex zeCbnGk%KzpBl{!5&X(No_r%OgkVEz`05rm7axU4yzAis#SZJFn<<#M9>ag!h=sP7{ z3!_HY=e+IZ8mItLsygQ348d}Rnm3*653Csri1D_Zv%mwhrX!U6Ie)Cl4@0TgPZ2AI z#0z;aF82v%;~&#=D~+gl(4|x3Lw+`XU7AqCXRZ@Zd2OCcT}YXDIkyYN?qP7I?*qTz zi#8W7ioTe}euyb;RqD8F9dlWx4V?I(|qMUa``letEBe&Q^ zC_Z2sITb(QrzXBGiS7=FxofK;>Hl}W?EBjJzY<1&eDG;G3Bu0C29M%Kh`UkWaEhqDC{7E0T6Z&+O-+jcj87rpI>_T z`0G2vDHeAE{{L*bI|O;Hai1IXpJVBde|9b@?^+fbpIl~-v# zG4F0%2%CRl!6`y`?s!-A#cK$~YR9D|zm4^#zwtWEsh1gwXph$Y+IIdCWkldIgTQ<_ zaR%@m!I2mKO_Dtv)Q`R+2ANYj-_iWzJ#l!?jN$T-{p__yE(!6^uTaKMq=%R1f4R~h zwvnOQ@cM24<+~e>h1S|fA7Hse?_*ebruIn8p#^c~o~wEr0xvra9Gk;seYdDB#HXUjd?F&Q zfxqiO&ZDt#vNg=-@Qbw9aB{UNjEE7bw1mWVv+|p|rW(6jV#~^AF89LIygX8KG%eo6 z28I@_?9maB;NX8CGfJluz(=xw}i-qA@=O@;Y+-{=lrUo-7R;5%Y1Ig-#vPDTjBJ+5^wB%LUeHcKPIPK z_Rl}IrEAZ8a7_xu$%9Jirh0EmKYeVs(&12mNi|teH%@7LGA(jYORFfNpYh$nFc>Q&A(Mwd`5@zF4WDN)NsAGW-@dvQk%?g(F$z6bmU#?tdV| zyCqjgzpkD$cHP*6uc`LSR_V1SJf0NvYoJW93I?K`t)1J-Pp~7)YkLhHM)`9qPP{1E z{@=Gq>vthbe=Xa&sKd8b<+>~gIQ$F+uY&(NES-PRbfkBS-^FOv(okKO^$$!J8(3(< zZM);KMQU(Ms;2jdBke?J@jQN)!PWAm?pP`_TR+@lbd9;dz5+Q!{V8U8qH0D9zsx|3 z8g0qJ5kggDC&l|!KZ25{$}^?fpZED^Za}co1EbwWL(>*wy{<=;C;#h$LASRZZE^eiaJN91_48)g%)^`2yABY2E6Cq zf(Co5D2dD`>Jho6gKM~8brjQT2%Gsh7+G*h!EZNq)UdSZ4>2W{m*aPFq1MSDRXLJ> z=|1tm$AbYyT~^eKIA$`>q(9Edefy(IuS_XbCXV#v)OxUXp+;A8!s(Qal_@`4*)MbO zB=9jx*N##ARZ#L;L>}@}kB|bzpglF6M7MhHl{Iemn@6_OJw;?JJ)j{o20Fyd?Dw{w zrjg*Q^UMasTaTI>Q_gbk=FJEI({hvimTPxO3OE0%ZI3g4Ohnlc$4DXNLPmpXsNT+< z8mwxv6dGX^jNIa}f@s>nKbIBNO^*`2e-!hN!dJN_f_f>Wq%lpDww|*$t-WY-Ry+yJSzSSZJmM z?l|AmQC0wU%mH1StOWD#;7y#L)=Dwn%z)VPHJ(v=UIX`stJo#%X|;@f_guR5WbB7x)_GP zGirD=qm}a_Sc#V`!dueKU;g!E8_|DKN{Lg*!h<2EpFs)C*|t*n0yV!M)#p8mUa|3c z&K1CK^wrPje-D-?o~e zfefP+%5Iwzf6DDmoIK?&*0sX!32q1btLWJ`d!|(Kci(0&M%yM-(TcFyKM$>`ettU>C3ymaO7a5N`EY7RNWCr0ZTU3esQsQgqFTqV^?*@;kmi71o1hVE*0jE9;eqS2@ahN`<{(7 zSdOAp+xBHq>-GadPkI-1Tz{}$&%4Fu>?5UAd)#S#ksA6{jc3DBY;6wu zDrF=M388%YU4`@ODyQqh3tB!eIP% zk6uH6zw`m)FTLb!&s=5}`ZH1d#HyF{R>}ULBlcEVw5L-~Ntxjq6QYzwPN%HZuqAy4 z4NB~-g5oL$gOoVR7!)f0w?GXA=@<=4###;Y2dd9+W?n@|SjROC5R>7kvr*G=&}Tcw z=iZ4ZMGt5Bbd!a+Bbiy$JJI2R62x<|J>=0NvHkt|j}eyy13M!%7v#<#3vlnWRM-5|&{nssGGN-e;2V$9f0Oh%jmXti*i z)dTK`awzo^(~8S^DzO?B{YdR!JIS?Qm(Rp`KEZGNU#Ky2**S7*+dJALrvCZ`au7g5 z#YUe1ieID(yDJ^gu)}e?uk=G|wA3Ps)5>(!O(Xp&-pMiZxqU3QrVzgZ=ICZWg zlJ6Oy#hs`H^8reb4#(8_X0(1YR^uloYN{*K8oI$xuu+PRm?0*9i2uG89gh>=n9-Tr zXz*v}7=Z#RUgL=Y_xVcJU>U0cGbrs0t={Jh!G=!ja|frHFpkqCwQ48+@cVfCL-UVG z1%2K5w_g-YO{8O`3G)uaq4Kgr<9_DGcl7Ij=_APTaRn$PFON8KhpkM<^h+oITJzp< z;s=D4)5jc=THb87xG4Znj@#B5n@OZD=cI(~QqmSr(IGR5{AWX96&l5VIFe$4I$6W&d!`aks2O8F|eY@qc z%`3kBxy5w^dk>+Qg#Y!{F_$B8IZDcu*oX%}1v-Nx zz~&$w@divY)?gp;2-Y{VI2vaD7<-~OxvJzviiXM8Fb9>vzq5>mQiEkbU9hTCGo8=G=d)!6{_UT6nvx zG7YabjL{fkPVu;d=Tug-2kEQBWo-M`t7G8$?sq;GNj`)z#uh&7kkp*hj9no!)tm5K zxk4kkm10sSqHvCkS*kVG0d_w;z8G%XOmDpy@L5DiZ6hb@0xq&J=QlPsA?VbSxSlq0 zunxTJnCuKgyeevyctfa|iuM1r0KrijlW=b6<}vmr-0HdP)&72FeF*D{(!ipbc~e5w zf4{fhMniW+fVTjXqoZ^JTW7NRQ`SY1AMztR5?!(h%hs8joQM)J7q-UZ)1O#y5qw4| zh*_9#;bmVVfYLQop~lcp4EmtaR>*eaR&PGyRX?q8Ga;&%-rqtZR(FHOTve3RpVh)@ zA4`ZCGiPgRjlF1h3Tbzr*o{CI2V(r+saBrbU@zZKm-|_~^UZN7kJb1vI{pf%)TO|=(F@lT`Zm1^?oIfTq)G*$@HOU!c z4Y_7fjYPj0#wXS&&;PQNC$KJoR!D^W_Q#c1v` zvX@YdV?!%_#8f_P?Ts$ltKI>nQwRN-2h;&pYD*J)a+5kMZEoZmR^GY`hU_s$lN{6@ z@V77x&r8Qbgsek2<6g<}jK}$An^M2Rh7apcZMpomEl~!U1<04auk)~2u1qukJ?-w? zRd*i}IuqruztMe1(qf0wEOk|>U!Y)s(u;H>Toa!ulR+KX4~9;Njl!kOd$5SBqRB=W z9vVjQ-!wkgtkaOTTgjx38B8k~7`3@cZD7+(|IvaUofDWQ#@X1f%3jlPwy&Zu)4YE+ ziiemVTb9lYJC<`dZj^RSsJ5I7FASHO=jl-pkK*!_bi|GMQ)v<>F#Z>%1S!9YASOpc zsQ&h?>nY)t(Tx;&;qm3=f_SgAX}tyxuwZYof^&d0GPT)X*# z?g266gHorIn3ecqaxwc~^zgqrTE%SgCLH)DLb6C_b}0{&HO%o`))ZRpsdTeZlA3gB z8F!-&{Ox(8JaNO-3jPB8d-rVD-i#lz&-!OgMq&Lw?eNBOic(DaU%Pp;h0 z&Dt+*ZjS!F@Lj>Dx4XZt^%{w2sf7}Oh4mJvLO6lfeKuZZoia2lS&j{U7R#-#IX?cI z+zXp+$5|f&t$VSB;W6X02s9jxT^`M&t>xFo!}oQEJ-MQP#mpQr6T~rYyk`>yru{9D z7kc3>3i4nwA6YefXPOTeu6^3yU{<8t%TN9hr|io}i_CC{+>x+4+4P220t5}+AQb6F z&R$~0X&Cr)T5vP#L^FdbCC&ukgs^{0ScV7ib(Th)FDJyok=syj1$+HR7|I4}Jaxnb zZ$0ahDYsSWn-p`>(J=Kf!mz?ws0AzS1KR)L4og10r7n9<`@7wfbMqQ2@c5FnyDK*B z+14PxO18N`&cU>6F{NG2lyJ}5e210nMVF@*;$v=R@|3hhMFIadfONoLrpKE@0z2Y+hKUCq5Xxq}&q$n64 zr4BF_tabTw{|3~kK@hcPn0V=aZOY}ku{m}g59*H={P;O%7_l2J_+;atKX`io^TEcT zQ+MiO_ggTpC;mKPA@65_KC|H4dcrTL0hvpy(%s(HZ2KI8ZDE6Iz`#pGLzK`IqC#&n zT@UH+>C|twmv3mMA!0B9X8ur9F!l0)nz%_xz)&ETP{m1SZURWioMwY;t-utzy6@Fa zzr7EOE&k`|d6t|ZRCO>dykO-ncYq3>q!?qF1K@7TQR?|k)O_^G%A?hk{k!(t2YUU5 zZ;|7JOo&mZY@*fAOd%$mPKws-YZ!yh3c2m+z8Y<9+G*bffpy{~jQq_H{+M1X{P>uh zgMO(b2*q))4ijwtI$dL-t=#g)zGv^ zzK@iBPYi)Kjiy|g*gon9#<1Z{1}y1D5FiMt=oW)KF-Qh7jJZZuhf>P-;JlQ;?V@DU z?CcfmVxvrze3lZrk?pRI^_bn79lQ!vdC|hKz$>KB!1)lL-ySlBG?ez$M$`TUi*!~m za-!D$diC-<`X`;&JwL?^%EFM|Hi3m&YJ^pgpZ6Ux*3=fk5_{-ol z-DdbZ<`awtu_!V9=?r1+C@222XIz4pCc!=eSWDsm3QH zqbttD(QqKR@`%*le{oar4&IRJ!*~GdkIw)xRjk>2Ba39mW3smM`WfxaFxqx z?3ID#S(@f3qe~I@Hsy#Wgho~?f3U!fcoX;7#+2N9_%qASd%`|>%USnoj&-3w>)x;K z92oa54c+DSMX%{Lo4+b_C-6u>GMu*;h>&g~X=y|Xm!EvgLP#L1RxeX&l{o2>j*b3B zw|>Yq|$QkN-IBa&|Y6-u%IGOXQPuSX^@yPva*?iOcG&sckWymWAD28q(x7 zXO#j2X95@I@9b;tzORfN`qc2dNyXl}xgrO!&+r%i<39U=;Dja2Tz=FNtU!M%?x(opyEc!J2dWq;e>)Tl94^I90 z_<#Bk;fgg8Ne7n1oyvJ~YwlCdfxQ{$lEcYa0#SIPVUbqDC^RIYLxdVVU^46#N|L|R{4O_a zqg$%pQ!#zs>#fI}AEzJPx+kkQJ*9MC%^F#LVjmRbCZhzY5fLt0fxNhm? zdrt?jZ16fYBzb-4w5Ql_@izVNhV1^6X`##zQwffMM+PW(HSwvZ6ieik!wE2MJD=b? zrI7mP!j5fX834wIvs3~`)vwrkCR!FWp`=P4Wlmc9 zO)L$8LLPu!Xc0C!vfx$-u|^*$|BfBtNu4KoY+~h2>$|7r^ww!iuJiF+{WR`iSAkKb zxW_)yvm%fQL$OiGBv2$Z(Ct~grQ&(h+J1|We;)qQbund`8Nsr469C>!rKcSIishK4 zMrfy@USPvSzfI@C7EHCFg;6;z6QJ;TvOtu(TOd7MP0U6rg6@EnfUS+%Vlj)5LPb(aB0{nP#B-M|mo9&v5PvcH_@nTOBdvmuIae~N zkd^3gyZgySv>Y6qm>>oE3hmpT_pP*5NK?<581tg=_U#(7;H*2fSu5YBUnBxU_}K9n zniTG3NfmVBn9|89+7zgoc^aZ=vuId)~6SXG%vc`&pjjB zw%2X@>W^3R;ujo`s52vy;NN6;>G?=%a^f=`AfE5Td?L+)0pwKX*t)z z(;G{Ef}B|Z_bL;bH!5JZio#u)VA zSCr&;7qo5=kYLGS0yDFlt{zXzXr$%giVm{(;X!F3S~^H-z3lG;D> z5__c`hh4gEwcmQsk`)l@Sk*D_&*hb{KA84Xy}v2~N24EYWQe4+^kg9+Usaf(uw%x; z`@eJ{1gzbXxu7MhS^8qmic{y$&S<42M+E>p1>y$ zlz#|p(yz%Ca9xL$p=XYNnJIW1u$R6!xj1R%Ui7FTUPdqf?a{G4>1XGA8-*8`0Pd(P z$5RUCtH}Nea@xT;&C?0z52C1oJbYp+6hUv^pE7~aKPl*@lUvWdNs&1h#hl3en_qO{ z4g>oe$2dPPv24XvyL>J`iMt`&bA$ei={Q-i%(?O(lZWLmmOk<%q&mW&R1~@&ad_{m@bGUmH#pXdW!J>v=*)h$lIqu zI=1f51b^X_lQiDW;$*naauQy7H6)*+42cl5=t^s{5=UV5egFt}_oV9p~T(`+fo4miPbm>>;)3v#w?@A5)YTs_%6>pz#@nWoW za@F=e3#^mCQv`MHBUh?OVY+f6>TK@Ebd~_`G|T`KLXV1!LoHY$uqk=n<$cCO041;y zteU2+yek;c5kpku`v856Prq2s zML7j3+`ML|f=TJp%f<5{HDDyd?NHOOJR-ydhyVaE*O9eWp_29;p8iSl@a%#bVs#T+%V5 zR=VPSN`dOws(igTe~ zoUD5d*S{7^t1OOveQ)ym%htc&r`*b66pBb`2q{&0Dix+QQI(;5WdK(Al%l*h0@6FG z=ufgTefWvkB$AKc43J}$2bBtgNiF@O&TzGAqtyp|3DZ!H5FFtX?h?pPgu}x?&9?o^ zI?mPwlCnYX87c0*D_|m)`17H~%v~oaxfNcuXZ``?o7%SV&PQJ3oeu-eU`WM{UHm+@ zQ>up$l;o*6T+0L?Bi)GA4jmxG)u zMh@z@Hj#zQlo}!dV<_|w&+5jsO&!Do z|JAR0f9gN;1Beb1TAps%Ugd)*J;ch2cR_0|vXXv8&V%v4C!XP^1b3^ zndL7dz=G{Q3xZ+?G>255c{fabCqI`lQD-wI4ubJJRJ6rN;h%abuzzy%q{cNw0_7pp zN|m+h3&2O+olt9c0AgYf1uByGIm@5Iy)8jj;JeW}Q;VaI4ptTxj ztqTeV@H?2qLaD)|vO|zXOhySxF%eedE14_PndDYcc-%LrvQb@e-Qs#bfGtLq=t|_% zcrz5Ym{0PTkfds&zX+T!YFWSTQ0(K)CXHozleq?dyydUwcM#TM3nG);T{OjEjGgvH zAw!_iA8Pb(1kN7-?oX39d=d0WmSH|Wm|6i87_W2EmDiI2!+2&r2dwGK7@zsI2f)G& z-fddiICAMChwgCrX5u#KnWXx``Gw*~vtN%j-Z&Nf=jCJ6+eNh7<120l zAdn}5J-7CGGNCXvp%x+1A?i7`!Fe%p4Pp?{Vxa$1!aQkSvPIGc;5!7sxvRk0+)f$- z-8;ESB*FD`BHo0(O$Yv^kE@9wW366xUeU z1UdGqa<*pt)YnxUqVrZrm%OKcpJH?d1kKWJeL+e3)I}qQFCTKH6u^{JK55I;t?gD< zw$YS=`*(em&|MDJ_CUsx&kYL7A(XJA+2DrCKtGRwBDCWw{52fz`z$su0UU#XYh#<8 z4fhpFFnSg53#{FcVU(1lanr^aJ}Wg2tYyE?UGSll4tMKcv)ORGabRJ^rLOssrUDTJ z97Yu)2)_|W;okrixbhC`z`VPJm0F@ZML2!xj~Cjy2inkS?Q8vg z!VWEY_cSie($nH7#rB+mG|=BQYv|vrHY}ZUMRe}mTYzDevo!*tZrE-FuBQjUP+;dq zg?~sbh1Dg9X{*|ilp8U8Iq|;IDc)nixqj*4GyOOZ(F%iUsESXuyF$xDk|<1I&o?LD zTVi3!gIBa)(*G@gvuyX=Jx>ii(;vfzhqLmwB3mC!)4m!0F>5V)(YSugO8SdE>vfX# zxJC3+j+LJf>9?Dt`~lL2RMJizA>hm@Kxo5%pPJKz)#ahcbQ~fy|JeoRZ2Y~kjR2{^XZA@r?G%inDI1GyhL||K& zH;MTRSNqmrBfx;cN$G=0>C-~}_!gKD&m<3NeSblyPt>#rz{=9yliD}5+Z4k}n-t-e z@IR($e@q*`nKr~>+?uzPbY52^8KHgB88w|sL+-L(;VDPQUL77Qa=TJ`)889zNJJbW z6CB=fI0?w$0Co~C%GoT2yw&)vT0&4JRH(+ANxa<;wagm?3L~uN9V*x>DGYs;bZSE3 zTqPbd+jHRQ#Fpf)zhcfO@6NT^(}hfv$FyW?1vy@%2S;bfrWc18V#xn3F<(iF0|3w5 zr>l-HCa)xw|3k5YMNP>?kI6Ow&5Fi38USLN+Mbt(fd6ByCDnc!sCjG>JGx4J?7UOFAfn&=^3#ZpCR+4UQV{f;;ARP|fiVKDD$q(?mlfXC>Z>4?yteKR6 z;GPtNfAzfm{QGP^{hgfzD5(BFiq6C@rv3lpXPJGUnx>i7X`fOV?b@7a9f_ud5GGP1 z$w;@`wSCVlWu%xC%2G2GvWJl6GLsO6k+>o5MBI>Wh}(7D?(h8mgZVt>ob&m7UhmiI z`EshBe8BQD!>_<~50@9ect|^M+0ek8erHcyQwmKQM@bovOVWH#YF>RRg?3lJ+TlOC zS@dori+o^5zk+uA$E9IF0LaXh`xm{@8auMprjD~KCeq+%(>|5k8_sIDc8s5ATgJO&?i#Y{>Hj2P|jg{v&} zI$bqFU)2sgL`_Bl)h;ZoOE1vinKXY?G-fiE(OrEqMXU}rf$3^@)z}zM1wyUJXJe1? z&n1NN-1U|mehn=(-}l^li&9NwGBgTSpXq@}GDPaUY0J}-_W9-Yh3(yUtAZ1pbD=SK#gJ#ZD~0DTL!p+wgVIJg zi-Z-jJEi>(irQ6&1^&H%4ydN!CFTQMTYk0!&++oO>S05mVj@$Z)-w2>J;PExQs(Ru zcU9J!KqV~Uo%j2CEqy-~{zG>bEbR+ewP8;H$whfkWA*+B9A_qro1MZ2LF=Seb=AD)d$FURLQ{&F|K%OVtYFrZPq9$Dp9~jYNt<;@Y+s)fgUxTb0 zw{`B*X{a|y)f&ldXtD2^<<04smT2R%zc{;kj>BOWoVLDTl6p<|ov$?w8sXw7Ym9 zTY%=!g!~K?B82%O%ivLD51s^YZ2sBoY)aMw#8!xOc@ zbIjgH>#bT>@WtA?vfAZ5rGnG<4TuZF`n9v!d8f}mStQ}E)36f-e}7x7aAB8aW@%E{ zCue-?hj&KEJK@v6UIl2a0-IUMR+BE}n&dlMToxybZhhY!7a&mRa6844qosAL8tV$} z{0|-2wNK7l*O;6Df!WN6n1yBAJhC_bNe-PRIm$_EMCa<_pfgHx8dRqQK`u!zEp1e9 zgBsRvA8vftu9igX5LU{1NoQXdJUWQgm80sALgRE=!oJuG;hPS-)FkG#3Y1nwk7NW+ zxWyn|`ac_1KQSguAZ%C1j4i-#+!x%jN`f(msG}QmbW!8|YGyI>cekoe=1k4H+tx=z zus?I}0K5Bo)L#R#G+d4@!qwEYzlqb%2WMxNqXd zV1D?DgqXzu`R1P8Z-p+Cus!eSBSv_`Oh$S9o~-7wdV?&EwWDg-vBaC_a&sFFN5T^B zdaf2F`XUWb+?3juBRh^w`QE#Ys{k6yTy<^3_-|`lTu77*gpKwJsH9Fph)}7C3j{$x zm5vHI_y9!dV3v`KYeC$efD<_1n`$D<46_ufE8C13a=4hOpH`XAicQ+q1=!m?tcW+e zFn>8?pnug9qem19ZA7ONV_N5}C`Mt;xdS4Hs!O(}DN8qeigCd0xx9t%8*pr{2(^{n$aCdP z;-e(7^M01yT9vR)iNlzP1QI|Uu+cb;2`H(xN`N5pp7fJ#8edJKg)oF^Yfs(JS^$btLLR77C6C!!4P z8*U|r3>%2WK&JzV6^%=Od~E(~z$c!Brpiwm1_js2SKBJhA901FJwVtz>KbPVqT0LA%Z z*d1triwK9=)jCKM#}>e2^GHJm0s}yZ-FH!lBg63dt67R$X!yt=(cb5#kiagN;#3=t zmA|%{6p>Om%L%*^0KWmDO<$MKR_iK*fHNAtgMtt#Ihy_At^PoG1;0;_U|%b`ff$Xr zX{eZ1EO_fUhLQ?ARIBFge-?yup`R*UZ~s}4yYbmvrq&G4Ab#4)e`va>Qql^Sh~)fs z>`~YQ60ItS8JuuJfS@3@E@H{n`Qefoa*z;oD-_8mu}aYut9eGHI2G836M56$o3#;~ zNdZh3An*pG!C7Js9yw~nAYXtGVL}AK%55HIXqpEUS0O_R7-KTpV%`m^%rsBw2cF)r z8^2KxllN_YLMTBX8#xk)-A4#TDCjjCCA9wxgPo}WUlvmts0Tf19ZZd&)3yM4<^sQh zps=GrIN-G#-=XV*9_f@uC0AVT$(r#UacQrlj ziQ+-~AGLlCX)(~J?hQP+=E|v`KZz4{e1CeTdDvB<|05pQV4i1Q&zQJ6tc@PmFF`Wj z+y#ib1fFxD8L|udo7#OwX+I*`>YS~ImYv4*1_WlZ&`H8OHPE$h+_De=aoA8l$PDogNL7k0T72$oMB_#qyfY)?b4@6U>sd74 zh2>&=r8YVd29g5QAR+F=0SXxz zKt9O2#Q0$j#Y=L4FGb@767*VghU`hFKH-Em%Q``_f^AE>bSu<K>VY z@zi|!D(Q`XmW&qGGGi1$y;NIhO(s9+8Qxc@GQgL=ps1@Hb=9JJYIxH>)AiLLQ>Vl{ zh$-75qFb?GB#gAEF@P5ueio$}QF04PW{Dm9)iJGV&tY|{OfxkDng*z&+to98rMXz$ z7NAL(fO{ljK^GXo!*PG8J$ahMF7U!Dc(t4`m{sPfhb66WwBY#4E5~xa_@tG$ra>1@ zyLz1409q+TI2{0GXchtL1QwCOLkAS93y)MYOPrAyWehy(LdJ0_ny)U+Bnd)%0N%{cp&tRV`Ae{1vKoG_|N-)%ggVYSD#WEuWDA zVLrMTBW1CSm|#Ikpr(wkp^O2HG1Ve%**zKbTCZ$wgx9r}CbZxe8-uLEnCCvbpM#E* zswJ%fQ^;LcS9MF`@90>!1bcxY=%1pqD#3HglT#Fky;F?f^N;}J~yTU%(U!V7* z-H~)j_vP*>_vuwr&Mgk=b{bYX4lCWZ-ImvNkJ&Lyf8sye#sxX(61W=Rjm+FN5LOyzZnQOHZCNf?IR6wp+WW|35~ zs$WfQQO8WcF)gKHAxdaLr?W}}DWx8w9S;5O3@uK?f~SqCzC3d#YY%--1++bqiq5VI z_c_~wrr^ynvh^qk0H6@^W2rqbom_MV=mKBR;K&RWO|C+^fV9@qdlM>}c6ae{R2&A! zj^U;Wfl!mFy#$w=RmMf391=3@6tZdYd58`Q3UgG1#?t`6i%l@JY-_jDUGf$KpYlYDe= zq#NVZI^2Ain~k1>J3IvNVq>X`N3YAtvNaN-S1X3O!-cdf!zT_oWT^Zm;8dYT!ovx= z0O5J~C!FT2HDK2crwVY96X?8uKqv$CD{{yQ?Xl;WwRO>g_O5{YzBpPfEK9PWJdEo2Z?+n;yp3J)BdH4%Ptp z{)a!kXiyFVp{^Q_VK{S)kZHkh@=IGQ*%~3Nj?=1$VOKo#zj_Rxbg%$So)+T(x)fol z5v8ZDf16_>Qljyapa01Q3N!tM7^MV~;2iPw^QG{hMQ866j)t z&1|(qr}5Ln5&h6q7W^Iui%HQDq}^o2`IUD~v4enU3=*}&Qw8V=SCCN$LSfznJs=d6 zg_GwCQ_se%12R)RtcI06HIS!7iJJf?6Ix0W1vkka;Jl$QYjar6nw!**Nj!`6*UFm< zhCqz4aXK6K&-z*R@2G+aWx?h)QaZ>G8rQA7wRNJKchY(Ni-y7U1rFy^2`zw~0uq%x z2xwifbgXRCz298k{I*CBhDdOsh%@R2Ff&x1V`y8_af(buAJn}#f1S?P%zUG>BcXOO zNYthB#UOwO)ge;lPxDX%g2Gbt9e&C?$A#N!URK8g)k{KV#G~^A04wZdI|eY#pq&|H zwC{5e-C=e?Q%j(5K*J!-1IGZnS~RKzN0Fd3*)m77D*e5{Dohh!3w9M+pI<{*u29eN zS?tuV#91C+B2R+U7BE3hnCVUsTR?kZY)(rz>2a79^5imp`sI~R##hH%le$lHF15K& zUo@ah08-6&=Knh@(_!oh2X8c->%JPf^X|z8eMXJTt%2?(=ueNY{`<7~#^U7E2@NI& z6pfYjo?qVkwX8r}cJ7M$_Sw2WYJp?+Xw@bUbvtO^q6{hqd8}%?i8EZFrj;*(x>O<| zJcFdB$)><1OR-wJ4~}yRUcG+z_0hXSOJ&>bWzUz=sO6_2v@LL%EeR;g0LC9HXDx*& zEnvhjN)w);O<=h}Yno*Tm2|}l;Ib8fwdg|f6eYDCwUVhXw8E?&jGxlZ)yXcA%NI_r zgE=|~K?&i4CyV5SwC2U>*)%I^nC0FTTo}ArnIwkaqBCWdkd!Af0fsculb=4H)~^3 z=f({-G(T+sSSU{TdRrI0w7?VK&A}2{(&bP03z3~(;Sy=s5 zLeO9S{1{C6+{Dj-zHMGL`T|r;W{)8%`9d=)@T+a5$@9?bG z?*9@NfUt<`fTZOz$o2WtI1)b1g|KP^*`Q$c)Yd(jH!8>{um#u2^XBUozm>lPZIBtO zz9rErpDa9a)A;qS9pT_b9If+l)?`!n?epEPz^l728op1P^suk|BVdaMU$0JRFIxQwm|a$Spe*>^ej3tXXI3Y+x3~TTEG4r}Tm{=sG*tl@bh^x@ zUuk2$dzpTTi-HNnS*@9a7wT^=)Xd+tS`)*=HkvW}m)EZl7BG01uWWj#w#-^Td%>>k zKkQyK#Qw5?FCi};9-A{ZM*mZj((-lD&gHR_q?vXuEBohZ3FhX1y-DkzT(#S~?*7u) zGR1u%5e0{VE(>HUM}xBqUH(q!IQZsd-nU;wUSlymVzHXq0^0R1BOfye1UjbAH)|lVN)g1@od?i@?d7fK@9T zRjZ82!079^IITJio#ASka75c|fpfa;V4MGkU%v)i*mHADPIt6c^YR(>-w0mWqHrhw z@Rj+gIhu!Qe|D>*7mVt_=vF+&1eW$I(na{pKNOjv*q*A9Ol*yBS39(W!dh6^1%k3q z+!P-`{%K~xH>ChLMnxwVyc+qV^#w_Y=KSss@HSq6rd5@0T(@B1&iO!vL;AUA$HdDtm(o-e+V`1{s?RsyCH2+&T>eMRC4Kxu0ETP71AvzDGw}41c{x+>1#o?( z7NcQO1rs>ZbeR#wZ6(osMHYRN8fGygD=bD7PoU<-t(vQjg9zjezRNpFR%cST+m#-h zUe0N@qp`w-Nxk4hoTvagyKsG~YlOG8bn}>AIRAu;blBxv1@BB0pZ9pf+Y$R`*>`mw z>zjvJjgjkKHKzt@x|TRaZ=!uNsYy(yh!UeNGIpmH6`HCW0W%ByD2k5#aghCBGv%dX5fDnOVkwW|yNN`CB>axfhKe(2q!BXb&( zUzCS*Ejk&I)r>m}<5_z9(0tJy1c|lw3?+$VuR8Id{Z8J$ES5kFP$!F>9%oz07~TmX zdSp@D2|NVO&)yH&9~?qfIf%!ASf_(=4`bQkaaQnT)mh2^#25i$HW;Zx7I3BUHUK#i zMW95-Ry5VeL+UyYB8iqmq)4N&jL}?+kQPigFM_7yDkr=s?=yl`#uClfnS>LT$vT+)#lTo$)tmh;?9FRh?| zcGhj-A<-+Gpr@u+Yb}k8NpgEq)zRY^ND^36w*ixB9S+GNk$SfOz*wAp&p7K*oXe8V zSBbtDGr}=aC)(O_4tW^p=ZtqgYrwr$+_L&P0WGcje94xtOfryQz#*S5+p<{M>YFo&2!`b+ueGhF# zsS@wQXfK(cE>LRd&T>T@Zwqfhd1z_Sif+L&37aylS>sCLROr%@yf?Sy%@U%t<=rPM zL?($_G6^uA^7Abuk~v)&F^S-uAx@f# zt8R$1qBdL{Ls&cAT{~b;;MFZyG?F^1bzbxnQkn2>|GaJBf8C<_3qCm&kcbYCU&CHu zJ!7yfkj9PtGkw{wH=ka2I}~8=OxA3XHHJm)8bKyld}?pF6#8#_fRWp^bmyQkSwcJwlN8 zLqCwl8L9F(*91^2&_Q;^0`btCS5@2lNI+2uA#l$pB4J=FZz>5%Af#UoOCSN0eip3g zPO2p8--4-vcn)JP$+}Kk$@S=G+oBKQm4}*~Zc^+w4&A0NH2Mt2n4w82HP-*^A^(-x zk*EIZ^7dY_%VE9dcIV!$k!=rV8^%@lhC*jV*7a`L^!|}gEK7*|JDliQ^(JpDgHsu! zCls?vy=h`RU1o%A#ykje#3aIKqtE3UzkM#0h<+dMsHAncCP4n7(gJ08m)b*R?(OqDkRZLe8 z^IM@;`sWJRryVi>=)7*Qp`*^z?tsa!fZdS-!cWFy&e5$(YC4Lu9bQHhwrgh3d(CZZ z#k+Kxm87Mz2tOlA&qtM%Y=k&R(NPj0bHaQ!EX_@WQ`;}-2U_Lo%QylAPcpA2{MZMcIyCrm_-&kRlaaBl-RK97IYGWM02Ckq z0T>`@7~aN{4>$S)+o+K>WDyWYg@8;zA}e|k{wnrD7`pKBAiLM-MUWK|R|Vwi z?%#;Jm;QZdYUQ%#?=vqqd~558J=yZ4MnI8%>Er|M!Z<7M9ZL z)A!HJ{GzKTCGU0Yi<#c}vf)MS$3ZoV;lh5kwBh5^(apAzJ;^_4&aAEl-gRkm1xg!F ze$WD>1*7KinUi|LIGyf`$NX1-H7=&F2(kf|5M;(vprZ{|u5v)-mkIMZ9bOO7sV7|s zLBNZ70AU~v#?)P3QW|i%>`5w#W%t)Bqgv5@r{?4tcV);o-|$?_Rpzp*cyZ zjW*c#|xi*pI z-ZdS;a(UiAI)m}xdS$lV?LO%6CJ6A?4V9SP&cN?(9H0=i3NxabaR z8YbW_RvU*_o&n@H5o!iXZOxz!OrlH~LhM8A0sM}9%|$s5a)$!jjPyu-P%>7gi+2;g zaTsSPY~sru22%A;Q@c8iU4o3Ir8I8*Dqqq_(1Z7-Msm%qO+);KuYA^Wx6u15Lti@G z{O)b$FzM zHV!6Hm1&*6qv=3L(1Ctyvyz@}d{kNc;<2u8nvv-nw~;~V<~@D?trYzD*Z#;{gzKt)k=1B2~1kfyC@*=rR6=| z$}tiO8s<0LJQR}W7Cofayp7fLdMw%Sy5UaGM2Xme*1#MAaz|)3i(KRbDce*D)RE-! z`dxCF3zw^0altAt10eV+1(Hhd9|;QK*1}<-d><4ZWM&RvQS*b}&%AXm`27XgA^c&o zI#5gR6VHwWcTB^9ae(O3B5FHQPI!xNcSL>>05kz^1S9x%I>arf%f)Vz*f4o4F$i+& zLQ}P!<(ebU;+6KkF`OE-wVxE*^Li#=o631_Y7@-oI?&2jW^nNtuFT2zFZ5S5$gh6t z2vJMJ%Ga#4VNG$~k@+p9&t68IAUjKMExjdP#xi-I^s!M9k`ATj+$is?SBr(ZLQO6O}$>1A~EJTn81}fl) z3N?`ixpg85T1{d`XCf;(c@wFk^@zPk>ip~)ky8)VX+fe((>yH#9XM22m)?1@{t5z+ zYXE2Aa99s~cxh86#h^)$A9=4nvL&y@DaJi+GNwy@hb4j7=rE(U_Za1{409P;%{%!W z!B8yLMqAdf@zE=k(FxEMtEdA^auG_`?T%5Yy!9PtoTl_|I2kI9bq17(sGwQu ziin?QYVF(B1UBCYNryuRRJI)GYV-}`VRaZS=J@$%cw^Jl(p!&TnF8gpp)zbq9l#Hg z;}6r-@AKA%AdG=UVdJrkA11d>qs5VwdSW?R`$F5_72l@Eb5s9V=k&+smj=6dQf*g8 zKwiea`U9UNGk>wUcIwI9%dy3G>bncvjy&J1zpJOzy%S_xjFgu5#~2@v zxvrrJPjcm;?WPZ0h4|{{L)=MUzrJ3Sk$jRW{n+*%;q~l0jJ|DsFD%q}`Uk}pzrq%V z5ZY|KaWMi?M-WBDsaPV!K*J8A5RQsR>%e4p!sD30^jLSymN}k+C4z7YZw5_;4U+6P)EG%hg{XTPfNA|FAGOWxpYV_S1K`{Z2{ z4s`rfvvZ)}+23PmH@D=F5FN`^U(GTSi9f z@V5Hi9DQbvLTTH!&ap)7h=RiNXD~s}*7)Juj;ROxW?~ruo1%M%OH_)X7H?sq%T(gu zGhyB^FzG~V#aP`zS&2WjRQ{Vvbkgtk>w~UND!J`O$Do+|T$OE{5nmmUM@pud#S8ml z{CqoRY}$e2(QQSo#itgPUqBugh^-?{g2)(p_N$dko^0!Sl;|$w{?xZ}$;w1t-u&er zV()=1PCd7`ynjBHF<{e5jqx;7v$uRB} zKGp~+07o>|ucVA&L77H*@++fb?K|h%caB(hU!O@}%$@otc(7h$S+a-Wb~DcRM7-um zT;@*!vD*!cd^^a~=J!7wDIl59m5aIsr6T5?T>vr8w0ohtvjVoc{${LxJr~RU=6u>M zfEJvMpGrr@*g zJ$zv#wgp)Y8?DTU_9;N?G1AdTbfe8tjNdE)`=2@sdX$sJ0DTBx4T!}w1jj+Z5|kK( z2pS17s&A2B=a3h*JFY~+v(c*NS z^EN(zzjV(q_fyB3m&2}m-@VfYuO*FWjgqI^LmJSj^H7$kBXkBFhN_qRetztlFPr;k zPm+AuZzo&@DqFbNp+#w@`9zVdHSd{LKZ0i|raC-GzCS-?<*bmEp&i@ihHPI^?Eb?v z_diq4S|w3#B#KO2v&nh_c{5FHJ%pC@C>>i(bh9w*9?Aha91*eeK*KYP1}rvdrvZc? z#>FqdwkSt$YooW<&-f{!6$wa@b?Ca&W#_+o!Zk+gOawR3J7+zboKSF4(HXETI3cpD zH;i;}KR>ZOb@QAN7PINr%ZppQK4p~oPTpCH1V)3Ltv7yk44d8`+&#Z(>}_yQV<0y% zOp_Ak+NC_w4VLAW@Ve1st|yoO(Vlzd&OUv+OHjuQZH!y{h754!<( zzdC<7r2WXZ;AL0VANl!v>DI~be#y5 zQ3qSj#s*IOYy`Q8+R)bN;hw zanHQZS!{l6YhGx`=^M`L+jM2LL0$84bWZSZ-SlXHtk^`(M9BSs8>jbI<)$v@i7?6K z`qT~611D@|4M#TzYP!Wo{{cac#LG87fo4L;|gq zW!2p6;9Q(PeDssdq{2BkDl$=Y#l1QzBW!qB;H9daVO`SsPB5${5-VJT^ZHEc!?T=4 zVQf|0V=LQ5!)W^5eL<@qoKXJbL(dE=e0}TE{?L@Ai#LqFe^EWfcG;3me~-R>bbP_G z=;{A^gB6z4O0ry|QOT~(Lk!2rDp&rlf#GFMi_RCUd~&sJ{U7&gseX4vHwc`FVu%dI zm9yQvr%%2Yovn7cQA*+X^j|{$=@Ou-^yS3-@iTM|L+L2l9NHW zV4wHl9SoVmtD$7M^avs9v*ai)&d9n!jLUd`l;s;XsATyG3~{s#vS#k8tKZ*GIdj*A z>QHz;ZMEMo!=^L!jvwo~Pi-E1nv1#X;p9qVpTKqIP0p7_M&u~kqZ8X5{p{QmcHEa{x zi+sQqCsI}%)hW#2v))e#!VZ{y@+S96`Ct4q#-kS#?4NhAPi=g)kJ=^p^!dZ-;2}YY zDA;|QTUO*mNpez*aP<(^H}G|D_x9tf4)3dMx)io==ZsT_SNH|OE7XjFk`6{lZEY68 zbJ4XyLMZOc>uoPZUH35I#wg3B;m!-%<8hZ>2StpMva6hf@&uDAFwA1S{2@q%;s_aI z9otK9RU@h+L`kxh|KW?JvcSBxm&% zMLJfg0w6jXl|TA#C@GO9@=wAzf*z>|?@9!U1@V(^9VS{rkcuo35LU-jY2$rW?Clm{ zR`l|)y<_ge%JmrOYZvg3=)T*WGlscy%$&xemc?#9zro4Pmx|8o*-fpt*f)A@&Nxgf zn%!nw+53u7(y8`4`8+jhgZp09Lw`}Bgy($KEb97^#hROkb1C?=^B&DTyK7c`XlqsE zbi1{)N?TB3fLZx$tSmJ0`Rs-*bN4?Rso12k>(k5Cs`r0mF4!&x9A?z!2C4ZwTW$a( zGx`@|!RVv_Mm+`{|G$Q&B|gYi?9{?$q5gsgI3Kf;VNsH;Yn1|SDzt2&j{Qno7Tl4l zrPio8D1arnt4Ik!uZg~^$kfSYT>vB&fYZrIr0ij<${-Inmy(orqs<)q^3}QG5k3Z# zQ^o645i@DCLUs*z_TFr9u(Wt_4wblVUh>H);X8HI-L$oA`iYA4fH%8av#EYR9bK`$ z!Y!l~Ox)IS#*R5}soQ9Yl9rCbs=k&4@QwXvywAqy*K=LymjVf-V{;E4djKiMNb4lA z_U|TQHwr9hu6PQRx*D*}ORZciPv$_p6n3V_+s|Tx9oC46*UY6%U&^XLtf6UmOdI8p z&3VL&Cq!#;p{!WD)OI*fB)XADANGJD;F6Qz2GFw`y0^|?C|um1SLLCqAc1XUhO zUrX4|ix29UT-Vv@&#j$FOAHzErf-ODst;^rM;`79-~8BLw2sw+;j`*~PkkG(y07-u zE@JBAQ+v*zENux5@uV<=f_K_KZYl`bw|{h1U+#GW;=FmklAulB7tlw72^J_p^P0W( z`HgcShaQc@VaL6HO#(vNq3)8Cz=+*VSHe0Xvq*L|s7vWsXa*yIMutZh0N0Lby|iZ; z@0tMb@~5F`ty&IO=$}&f85InGtnKC-uHO{eIQ^fDjj~S1?75I)oz-t8KGwr}p)2fACKvP=g5`Fe-iQsUwvfH=M6+c`sJXY2|0 zNlJO$%obkY(zvRDXKQ4!_QyXatZquBI%eyS+H^gwKas!xdBwO+HB=}0J6-u)iWN-( z&9&GyCAlz*xyTYzGL@wq$xb{%m1Bl(QF(3Dg*dIvhao_RGnirpxQ^Fxbrd9Aa_&(_?Lu{j4o#^Y$N6Paa;5-ktyLVuA~5 zANYund->nWwZ5{q6p=nOI|q5NApxZ+P{0v`iIyV=54;^;sK>>uU#IYCPSF3)0S_^X z1lPGkGHQ_a5W~)_5qAZgv>gV#Cv|rsUV3smTh;b4;>zT?;(b%xDf7MoWKYtXToI}Z}ZO6&r0RJZA?o3ap>QsE&xoGV2C%9!4?3uz?r&;l_EBf1+8xKc16YL$n zoZVP)&9LUz)%JGgt}7Mq%&*OpD_C#1XLIg$jxKiivuiZxV~8c}&kZR~FJH9&sBXAA z>W8fT{nDkg4PCZx#o%wb$cK9yZAR;VPy3aP%2fC25M|ON9fVT0qi{%lm8%&bIspi~ z<+#p^2C+7>H1&&QZ+LwMa=7+BHg4JK*=`B(s};WxK1L~RMyNs<{{TL-?8mj=N~6!9 z0c%AttCDg@Q2p|QX&07=lvT=`YMrtUB2dHUjew= z@1~ya(0p?A>>B9wNB#lBpZ;e4HN&5sq=36!Et7eb(aNoy5PR z+xt%4u@7W^KN14m44Lq|NGPxb<>^9vHWK1{tQe4L4ARTijL67~0199rEH^Sx6wZ4v z*Hz%gwLlqqkRX7D)zs+#D^JNum^85S1+W~2!YyPVi}30+d!HV->q%T>ybuGjvQGu` z46v1wx5vO46>#cJ49s`jeAJtPoji;To?$xW1eMoC=4Ej@4bE2))^ijpy0`U2&WE!r ze!D#9Kb1Ts2?b^vP4!tIU;43z{?wRdCuki#v1aM#0yvZf|2qm91|jnMpJQ}^Wx2D` zx<4JDy-e4A&eVPOKW%??@N+)Ml#zFa`*gP@_ry$X-t?C6S93~%o2941ei}jv@MDe0 z)&gQ?ql~#pPnkhpbfOicNr>21w*_(9k8*02{0eF~xa%#^7+UtU()8$W-RQFnU+J6opN{iR7MS|JU3-(8|> z=FCy>{6jGaY{fQqt%WcGn6S|U1V9Ai{Y@6K#zL&u(;IPDDJL|%L8m$sx7NT(%Hm55 z0sKK?fSzn8Cf;4a8rH*hf(;aq^<9stXivjs!EXg%0kBb#;+EOs?PHl_LnoM;gTZYt}gL6`ZoaCvqUO$;+;?<-eU}AqQ_>+6JOF{MGs}WLm$s=nwL11B z0KbLdHVcy?=3GO`wMzPUQrY(hVcP<|4!!zq&LozEwSyMXq`M3*t zGKQUrq1Q9=W&g@$gdz)}$VkqYu`F*F`vP$}h4%~7Mv|Hjh!VZJcK}l@SZaOt9i`2O z(xOEt9M1avxyvf@9Vg|>xNNc-})6rL~8jocO z?Dbzx^`!kw?U5Xlo=qKmfA^wD#^4&*5*JpjV1uum(OUsPDB!DSeA92P2T1t>B8pN+ zEo<@yVQuAXt`a01Z0WkzZR>IReNu6w%2@7epK&vK^C zdBcgUGVyg_mJ;MS7*e)Uei<0<8mL}=<@55Jh3omjD{chv_nxsoHs~w|_O!4sl7d1h z-cr(`pHh4_a{MPZ-py~y0VtCQbNNoF0Jxrc8vo=2#!;jbve-@_tKNvuL07CsMz(n;l4*CONqbtr~tFW+RU-mE|A0^it z2^eB|gMOvnLazClP$G-j+lZ}XZrhyG1(l?KvH|7ExP*r{82S9Z0*9XEihmy052Qao z_L2LcW}R(lirR&T4%%)pu{DO}TT*wpUy#Oz^3M-S7u2rzE4H@^PW59t`cK`_eB#Tu zHFm^RDYuBE#ebz(jfQkq^zk0G!?Qt^xXO+qD4A}GMH5c{u>c}dW z8v9T_GGWURvk&)nl^=;ZZ$((bPkU3HdUH_f_;`8l^c5?4MWqx{^2LzoJim>tg9((- zTe%liCEeW7i$9L1Z0V-+46`LRYu5QwdjD`+6%r~3Xvc;aa--WjOLYc@iV^&ssmJT~ z!ZG=obqWThr(`-x-6V)O)#@F#%8gpLBdW1I3CX}cq<@ZOZ~vK}g8-9OD0Hn9r?ZwQ z`>wa%M_;&s*SwY#&^La&kiuEV@4vq^ZtGc3)|`mx>khi`f_VHIaZQz>#&NL5Cd)AY zlHFB9l-v84BZ2Gf=dEAGU$K5V|I8q_SJO1)_jW{Eq1Bv4ZsU0)8!WgS3o!{L&VhIRb-^db z0!_%+c?OKA^CuKCCorsL*0bi=zimGkVedO{&cYqCeUsAM@1K|VALiSXT{5LUQeLoq zJmsW;Z`2Qc)9h4Dk%ruzVN;h`QX4`!Bpt@WulENxc~bU-Y;HzEaLP41-nngGo4b8# zc;o9Us3knZQcEyV!v#b_7IVCkN*QDnS>Ub~>L`i%9wmz`)kQI&g8Rhwma9wDu9S5%0n&_nOr({B(9aNt%Z%|_S{$w?~K){xORs;I8_u@N-jRA4D?SSIy zuLZEFnimt^Y`eE@=0=`$@gcUUWYIt8>GRg-K0Unl>iX4J)~{+W+izfYR)pfbiBU;2 zQl7+3)RJHQ#)t+eEof-l&YrUE;LGis4ovDD>dYrZn^ukfg)v@;ESRLRkXhAK_pJHC zQ=kHfTAjrz(z9C%sV@h(;d80^25wvyv8_P(q$k zFmNDzzmghh3W-Kf7=#BonJC= zlPzYmK=#t^=#2-;*8JUHbjQVPB4x0T3~q^T`Guh4*4Z^BHkmgwW0af& zC>d0;yk+e^vb*G)DYOmJAvLx~Mvq?dID6-Xw`d|?d=EuUYT*h=DdBI$&OI?o?OtJkkkdPZ2$Y?#fbn8B!=^4zj~(kK&N_mqrF%D zcu3Opy#3xjw5)Gbo8}5kWFikJt_0Vtma9e>SZ+Rtua?+8L+QrIt{S_DpM7MGw3pUepYEkVCMf(bk<=_uKyoj_h9rW9o;ZG z#nCB_MnD`P4QHf;;zo~wgQPBzYpTpBbDn+r9P)?4VKNS z-3Mm~4d~9^O*6d(XM!pE42-6OYG4h>(4wxHzb1=BOL5b)-#shbu`fULG%z^Hsy9WPd zcvdP`7>T-w1RB|rf%yj=1xwUivc1yIpXW9}2!QTnIF3)gqjScdV1qC@!b)-Kp6wI? zbu`8Tu8K0VQjhh1dg6&dT5+ozl$CL_8}^@M_)wBI2x}xS8W&%Dsz9Y~_`YN5!;$29 z5vSmSuDp98d&$|)$DbFLhL62`*1iCd;IBw~+k5dNqNc-0Af(g1sqF}0l%!UK#KT1u z*ysr^$6XvY@fZ|ojiJNyNIOeMOYQd@?pOo|Gx3$hZF8x4U)sP3%Ct2-r4X~83_khC z({S}ayu}+Z_W!k~ag95bT-%Bg8o0ZCr`6G1pSuQzNB^%9ad2!(Ef=9sCOk@)?E5{Z zct_j2Z*W;oR&hJxD7rr|u75#NE(teiJNA6%NQbmYx9+(svOsJ#eva>~zr`-VH`~&Y z0jgkma!G_)wJw_QS^zw)M*iuRz6|KosK3Lhw+L@S8PS~K;bIU$3voHgJ*=?UzB5D$ zi4*4>3k#SPS|Bq)(KZ;i7D;l1HwU7T(3L-E0978uN_AT(EPWUi?WQjB%iT^jFJie6jPL zz#nWAV;uebUzKZPe?&?DNs}GVG9p0#<&q{2jBvt)X?K_^8%Pd`1hw8Rt1gTHwvra} zMW3vPtMkO~*c1ZlmK(jD`CU*9r9dQrW^YC=vc=eX&6U|>RlZh)mb3|6}&*z9(PJcDTT`2$vaRS(=U#yW<2FB+$(Yqqa8s@<<-G-+= zy^c5Vrvue%!MfC$halr(=h_1gukxOIM>1T z7~pwZb?vDDMb}%=Pmd=gkvkO)uX_GPt#R57B#iU@lw8)|pRbBf0AqK7b-J$+zQj2d zizLRA#x|5cfPQRuLE0z%RT49g5V4paj2WSUSB6S}=?c!6_M0ByVpk5j z{+Wg4?4zu7O^!<;icSF9s#@YtgJZ_Tj-k*0cyKC8AGK5F z*C8X@f}Wf=Hl6yn%dxtzG)Dh*T{LbFta}6`orp$}DYi>3fP%z+YQ73K%{f2L71juH z3uvUS90k}DK5!uF&NCq%Y#aGhDqSdkMksh<0DYDQXCB!*%w>q_DlTkZ8+bHbNY?X> zdsr5mF&A7=Qf(C5Rv^a%zy;}Xu~t5WK;hkjkpZ9&fs{6hW1Q~5%T`}kS_{m7Fl2rJ zB(9QR2K6(-5j`LYCY!G2R|r%V6qhP$VeW+Z;d3J@`%Sgr|7wUXX!r^C2UdYMoU0ZO z05$q9;i=(=LOWzEyRS%F154arr0;AjCP-9aKwUgdKlhKXsns<#vRew`0+}cQFjH*p zzAQwqw}Z!1;Z{5v%&3?Sr}Dr$&LG6VZ{5_JZ$geU$0OS(7Sv)rskR&OvTOST>PvwS zcmK=?Mr^awmxx7{#IW!L0Es4vqd1+{F*=iv-!2jqux6(%FzSkHY`LS+7PsM z3v@wBARt0*YX<%Q4pLYfRNnazB5(_YyCIM-V+YB`Qw3_M3+P}UyDy#N7UA{MFv3HR zPk}wtV#(b6l7DuEO7RN@0;wVtKmrk*Xb@}T*Uv1kN5e^gYz)ECU7z-nz_RT*V_B$a z|DWiTF<5$qSEQZ4?j~Muqf~5D(pLK4zLWoa3Btv>yZ0FOos<55BrWjOTZH?WAxWjD zUL0k7|#0TG~4uMe43!+~wv`5SuCsW&a@H_%)l#E>?B8~|hfmFBz z2>>P=p~z?FOdS``0Or+>gc>=n4+F$`KyN~YadG-(VwOa@4B zf#oDr2d-FtyVC8t92!9iSTLpTC0$1mQk2Ap? zTxyU*xv*N+DnH{K59r24yfzMLC5NOsB67Hh46XoSuoxwr`hS5M_hcJ6YK)A1#}O(& zpq#Rxo&pjDSUGS>Duxo^Um;!iW+cUr0*fTV^lvlEI0z;WA!v+e0FZP-<$(?E)`_kV7j@Km9uUg~p249;Sy+Sc3;SJ(V@&Kj zf>1jRevan%u*L_*jh6eR@vr)krKq#aD*$T+fGo9P%^ya`p$<3zq@!u-ECAW4fpDdr zYau~ZzaZ`qQAJclF~5T_DVa$_P`R*VVpGWoOg*znb35(^QXWCa8F*vAN-;uj2i`WJ zS%tOOGtdp}^ad8XXG>_Kp?Pz=WU>Hi25n{A+qx^5yS7O$owjw`^@gy(ofPaUpIXk( z{7@TppNvjf1|}+oym7dcrm9hZLlkn_3rJvY8oV8ceMmv?h)`D{_3xNMmUyhSxRa+4BGh4xlTL-*GPITrY#d<@0gKQxAd8 z_4!{Ke9!UV7qZF|PaxX3h`>Ey5Es6)d9n-F`@$Es&beYX;oE-IM{cL0T2&N5iure| z!dBo|9|RJMpaERu1_{N|z+B)VN_oRYykQ0hQ9w91o+e<^P@{ZAF$Z2u>RgQNOQ!U# zT<=qvj8psBr^N?pv8sG-7i)6VxIi_XOtdemS}PJe=hB?HcK&H>c;ixaj7-a`z=0lo z`W_p*(a?R72}wJ!uyHg{eUg4{Vite}oc!=wMw%sS#78_N^qh~qFv^9y@qk|(!4^z# z8MEiB^u_b!!5j{g&Mx<$3UtO`H$L<{>B<;jsFj(&s$TzUKXcy~5kdl#p3uDQPf_+a zA~=biaR84NFj+j|RbIn96LIwSa0y}L4jK``E^MJd#BivYUn8E`eO_*(@!rKllg{?! zru&Uelgq_r>L4Gu!GBE5yE~Bv9T=M*Sms2$uA^>X34;;Za;KpM-GhF)BmABt^o-iF zJ%laZ#R^*E1oq7I?-8;TYq-Hg$n1ni8=ZUFw!ynXhl_ZK!;X^>qqOpI^06QQEQkz0 zI3;-0<|oFWg&viS5j0v!*E<1F6E@`PZ=eMZCiHh5JA3RkN2t}*Z^|3~b*W8=uOSde z$zKJ0M@+wm$azzdrlmsv60sawuffiAPIOKY4$(?#s3*c6-oebX;N+#wFkaX|v%751Z4x~HllnX0AkSppN zH{3(EqEUM8*uz$Hoo|uNLB5mmh;|%mBOq#3ecn(k3ml5bVETY@G4{`=X4D>{Nr0DU zhc8kFK5;Ol(q6S>#2gcmO%-SipwqE1F$$uKg!*-G_kIlb{=uIccI-Iy6Spnd#hR3@ zlKPvaIv_+jx0H=QuioS$)S|M{FR5c%eruat>_b`?wKTp(PO8Q#!o~%Cxdq+Ox1*Y{ zj?`ZH=-Mhv3ow#D4Sz0kh5neWEYi>+BE^P}mQh>b8@B&uZU$Uh)R}yG8kNuO$zg`< zFZl9ndlFwycH^-AC(#Y^gK`95NP_0V-}V0nP_*t0A;=Hc{Znpmg!cjE`Rr*1Q!xwh zD3^-fC!rDB$Z$6B91&H)zE5TLDNXhjI70yPH`RZFUOgU#-q_M4fZX@DDzHoM(6?^= ztok}Wwnf42u(03IJBi?d5Y3ea(%wNo4cX5^PjS#UX+qkE)>WUP<4j>Lkt` zy&;dc@44tHLP8`nLi@XzQ^E7GbFw$?siQhXM9A>_EbOZ-#N8ZrD{f=q*wY~6O&Sfp z+${M1U*Epxi;0xyy~O zd+PP37=LrDMTb$=j1L_r?bEQ?k%+hqBx~!9#s~DE&^nzrjb5;m3{ff^p{J2U;d&|w{dcW#=6^lXYwnfhx>jKkahROX!#l|ZHt9{ z$Bri$A`Y|MbY-6%ws`hN=owrPx*`F6n9v7O2_pc(BJUB-t>0iG5e%pgGxowb;w*r- zh{7fDI4<67mW(2z=pA`@Zuf!&&Uc#JW}9CkqG%tn;e5MP%KGrnG69F>rR4Nq)xQV5 zX>Qy75vvjR`utV{kHv3|q&;7*4l+%&DS$!W3`ZURYZG%~FnRfPmwN-{_0DLozuw7y z-I(h>FP9)73|^VSCTP=#F|#E{;!NJrfan*CCpj-0jDyDQ&xxv(Fi+W@hl&~47mKit zW{K*AZPa@zIMPv>DDh&0UE4yG2+Dp*Z1>jc$l!3d&Pl_d^YAr1jzXt{*;s8e8X-O5 z@mQ#u8I?z;3HvOTT}ejP=$^dR_v_8eQmGVoUv|C1XJ1;2Q~5q;S?qFlt5E_%#=V-k zE2J2wIBvF^3@&+u(H=>z$^?5K+PwU^pT*oan4GnJK@-=5#^bnps{=?V-x8QC3qHM3h4 zCi}#tspY~DRUKV=)kPd)aVMY*OCpE1KNekiqS!O2{F2Qu%QS5%{t&sogofuO2kKb3ls+wTm*L5JcJKHZ< zcH>G@OYtv8&ncC6R~iFTw*U}jlfK!q%WOgtMyo_{Fhd;*;LxG-jlECekb0(oM|&vK z0R**Yp)vB;Cwp=IR4yoA#o&&?u;aG?r2=Ey{|?TUAId;XFa?e0@0?fbCV&v(*`6vU zIsT`pY*4F0_FczDZU&$t9NgeBS$u7E72t)rPhOVnL^_o zM;bDom*8ZUE7D$D5G~&pfD&xF=Jx=Y3l2Ilnw+uqFE*n+P{$X!Zg4m+FOU}QUZs?W z$JoGh1Q6Tsl%1^oMY#Edjc_^_Ot=auKHN4yEQp?m*HIvXnw3zYIY*0m5o}zso$PuK zRN-uqS%nKFBH_)J?pVWg27lg{0#D4tRu1E#{#F+^&Jg2rO z+KSj{ckkke#54usK3x1@%m31kkGtpi-kX08lB&-C35yH)-PoV(Ti(onbN<$++8wqh zHi!odu^_yJmr+-=@Pa0tAEZG88W0oA0x1ZXQal9+b0=IBQ73^>WI%`s=VJ70O2FCI zcSQFn;ylXZ7xp5`p5kHX^L$T=kvD?0MTEwfo*{rG z1cCAx9RNtE_<#o5Czd$z+b;0*m%0pIGC%f*dcSx1U$#AtZd(FCcveIHxSxLT=bf6U zUy!}B_{R#TcpYhIJ8~=8mvpS8Lk%QHF||gnL>FI?o~Vyq_>_$94K()as8?%noau1H zwYPrNo+rGEd3f}lLO;STK>o>_0vl~R*|muRwKpYjJEK`#5CKfVn-n|j+hAh@>|{T& z@x$L{a3wnVhn4MAZ10xh2EqfB;?;!GH$cK9!NJ{vOh*L>C!qziGP*xk@8K0w3MAYX z4iH2)DIjSs2X0Nw6Q&Tr(kvR_5FdaU@Dsg9n^zE`7gx$CE7Zq2+bzOHj@n*S)Rovg zaG((%{NnEV827(E_sQGXZ~GsSK}~L-=84{>;$vz;2Cm+NyFP!tu)q1~h^t5Q8=qe} zUbjAOI;tAJKKk{|RU>dD&TK+KXzcyz!4mm-Q4UXO!*>~%=bVpqVqG(N2gKzuiRQkh zQ#Kp3IH$AP3Ww@0BpHE)5AA}auJgh8-yFCdA1F-a;Yk?-^GW4fr3? z6ke{(6Ar|RW`lL-zfe0B0rf7l(~)Z{e*Ow50;b!^p$s6^pYV$B8xq|@vqb7aGjJng ziKhR3e_F}+iL)!;bWco+g872w9s-I~n=D9w{m|q}eX2e8G5Zv#>b=dhC_Mk*U_4r* z&`$Qr7DHS|M=0irtzIhv95TZKZ?AqBq&wRmvL7!L>D*<@`JIeX_O8!mv-3t$2Ah}bB|muAjk?Cq#r}Hgb3!Msoa~Yr zKwa(N?dGMaU2G~oc(*vx?6{H3YnQmW^{3>kUK%&GhEDFtG<~o;ydn6 zrEk11v#?o|yM#dbd6IUYhzBd}@K{7?Y%%!IUWVf}8`l!^+>J+e-_r~Z9RSNZz;>_Q zjypc6q+_DgpUZlkSv0-rnHCjs5`h--n0Ioa9!=njN%Kc#=!eE52SKS z)mh9>A)j&Vpuc|H&=dNp;Np_=rr7-3l%)P(iXs23*s(+}F8XjPTRdkB_3u@GiN6^O z8UxX8cK4hjwsHmBE)?fZLMxqjueE9`65rQKWreSYkJVg9$nFW3q}j@w;9w+3x8xcH zZn+4&5K*@GcLm^=5=k?)Q|sS-ms44v>APtW>Po*>SUDTFY|47$o=VSbwqnaPVBt)J^3P`ga(0Hd8EK-z-hbQ5o9b(ojF^hJ)>*lUEJH=r9 z?3;gQU*8|LzJe}%`x~bD!um7;hf-f$| z9Q;|W@4_@y>@jt)sQqnLYZhKxy(IL>6;`i49W_z<#e%dDL@Ia;gBhVTJ`Lv%a}egT zT^8(iijhVMY*`ks1N70*9|fU&)ucHC z9lHT~ao$YeK()c)GgHOiGmiee1I#S7lkT`>+@awBVmOo-whmo7#jFJ>Oh@%d*hX|G z5+*`CWsibKI-DmGohKIa=3U}7+i^v#`Vn-*65_1ljfsNj{lbI{y(b?rVi?T5`l{6* zmBO-sJD6MKKJtDknE+wTYiDp_yGl1b5zwqgm-f%b6!)7HjH9ZL1^fBzdnMSF z4(oSQvpvtPqf>1|on47#CJEk<7wd~DcH;-HEMye2ubinqD!1f%xc=w>1SD5)XZhsT z4-sVV$BIi;oilG`vY%WEeqCtoHjO%aty|N%b*r$z&`bkV$C#RSt)4(&ilW&CH;b2+ zuV~Hj^U3yik6VeZ0AfnHaST`W;3qlsZ4 z3I#-c44%)kA~Jfd93OP5S1HCUmEHtkK>wCHA_{8ej~^#{n;vsH*r?)gMHQ=V2)}c7 zB<6$GVE3EhV1t{AeOyP}&5QHo4ec=}fEB0L#OcRur;#VlcrohFP2Krpq4>hs)Rj-! zYxu4jSDRwDBuj-9VY!2awdXH9`=9ox(iitk;cZ8G(fyj-v z>GL_;^#c>k|LId$j35L7rRxo(sX)f+pq!oWp{K4cKUQANTmF-K`+y8lme2%y7XGU+ z4+9oiXWwzOm(3qp2ecd2cX4!XOyuXE(0fTrLR3fh6dqhHjOG-?ateZQfo(0C$HaDG z`X9zhK#zq79&~S_e|{;YKE*JU;r|PVXGRzD9yqMMSg@5ap~#vHcKJtW1qqG%Jo$3- z6m4U4)W`kgTHM*Sglcet(K!9bYNFK@jT8i#n(fP;-)rRGNp&vb`aWe*PS!nz zO?`oF`!ZvlOIn_8v%YReZ=Jl_jt{25)T;`!{}hTFT6cUlN53BpG1>^TLq>*g#HHk) zzu_~||BOO?JT6~ohlknoLOh-h>C7p7WY>(O=(f{~z5WeiEYRh!PfTm{rR(ho?{$e2 z!WOLZJpcDcoe(C#pQcMH_D3Et)XOKF&g$KNA1rtSee5LFcj0tjKDm=(l*GS;@*$^^ zPhvcZ&>m00&tYoUDM?$rjiA{F=SM7~o-q2pHIrEKzm@3V}A{8vv; zIY-y-e^m{2&Of}k5iJXQ(;w2M9OjMaqjq6$1j{38UUd16M`@h?BBEn zKL8%+11!R#(PCJPG!`W;DkLk0m61XS35f^`ON$DLNehb!i-?PhiOWhz$Vp2`ic8B% z$jHmcN=i!#i(*vqNJR+=B^e1NNm(UXSxq@PO=(FZ8CfD8rK}*Yr6jMZEM=@FW2lCf zms6CNRh5;oP?A;Dl2y`@*HV#IF_BZzI&5%ALHn?(k+PDKma>wzhKh=kmX@-%j+U01 zy1IdilA)Hep^lb?j+UvqnxVdqrJ;_6k(!3Ok)gJsnYNL!p@F5bp_PTPm64^jrMa<@ zg^s?}Aq6QH6KQvS2}gMqKY10ZysCqlmaVa&{Sjl=BgXbtmaa!E?M*DaEsywHn0n}G zBQOGH=>eum zUb6IVrrV{TS0ww89j7B=pN&Zph%ZC#xlecWyRT%3cv z-2=Rxot^!BU44T5e2#f~Mf$s@1Uf|o`9z)YPYd>sa&w6dKam|5lpYyyCWh=E5b5q0 z7abgNBH~Qk>CCVPriD=akCH6P)%E){O)to66#ga22-K@cFzC)O zC<&GBHn(3lb-Cc^za@YC(B0h7Vpn*S=4I9vW!5l~d#jRr>rT!$x~*Ix zuU?}*WKrMmo`_9lq-PYTpR10h76r%VmE`0N1-VBZ@S*saDAwGe6V4x zr?jfR^Ga9OXm97_Q1|%gMBl(jb4MSytejs_wK7z`IC1q?|G<~_#(N9HZ`ay~rWRWU zX2%vD+_<;*@Zrw#%Do3WfA75D|NZLCt9Rete);qD_rbph2j9N_`1bkzwn#b{X!jZZHjf)qG{a<^Cn>yjW=qA9c@m>x#r(gD)<})yN&z1s1+<)>@A@ zla{Z4jr8jYZe!DJ>onJOJX&J6ZBx-Tx_`CQe=#Wf&^(p#m^!KTVp&XqR)7H-%)=}2 z`XSR45k*0PNxGt2T{G9i?oQp|oFu$R*us6WDca1Ir~6WAk8k?Vp~i?6-R^~vh}T17 z%XMR?(ktJNsch^-M_Ff@H|t0>2NwP>xHb6i`>Mysm36Xe-Nd?&kCKEnM}Biv{5uY; z6)gWOr}w>>`gKI}dq$FuZj^0<;eYR4ri1sn+fC=)$H|_omS*ps%v7m&P zF8;EAnJ%b33G|rpk@K*&`7W1w%32029O^7S zKrRI*)fzcTrR^F!kz2>sU7kECAvMWc_MajdnVt< z>y-KFh{~NK=1&KLo!bkPZKSWr1&z2q+VJG1In|0eCYsk=6PSYEh@B+6DF<)u6umkb z!cBSwEY*|8fM`igB|r^O1?ssRA{Ov zk)bj8P0>YnA)~33xEI-YS?%>=P+#|0O-Ha@>rPz_NvE_XeNw0^PRv8uO+I~9^|ZYA z!x&)7h`Q;CPEWz|b=4jp0ctU~E}V%S+jdV>0>y~m8^@t1R zxY>Nr_G9CGaaTO-MuSa6dWZNqNMW>GSM=Uxt%Q*Usgzq2?NTj^PNoN?lTV8uyG>*J zmCo8_byfGdIxMu1SG~5CYaJ4S@@%E!2ff1{z>g}+6OB)l1G%8Kg8iK~6+i#WUT*J$ zB0}FvHmF`{I86I}1F5zlNRi|TXXeu>^q zSd}Yv%4qPi>Q&`OWy$7Aa@3i%jqu%Q8|nVu3cc;jJCnAxH@#hqpXCp<|L^!v<@e#Q zPkkTT+^Kz{X7uR`L?>_?b&f4@^LOWK-PEnw#>Q@=WaDFq7(6rdYF)kc=~dw8D)Ptu z(^d>4^~}RwvN7su?UnV5O4P@XVMSiZ4M@pyUb>#}?D z-)k4{9s0XkQB&XTi21#0=dq)jAOAZ4EAGjEoH;}GKIU}r8Y+OS+WBAD2?;=l^Qc_`0b(oe0r9#J*jn8DcUA7W*ih_mAD4*ErQ?&)=)dml z{x7z(V-IuN>E#THoUeR~qVJaK)~0x|XSn60&VrRiX4Sz6ptYlK$oYGzN^0P?>lk8L z?`xdoVCxX%xNDJn1f$fWq_8{TTrYP|801@QyBt4*d6vf?y)9-%IzLEBf2|Y{_UP1Cp9?%aGDWc zg8M!Yo95T+y%4BQZ(Fu~^l-Kn3XM=C7NYijal&8;wf z`6YWrOX01H7>#rPJJP7`*+)6@1iR_%>?ifD8R_B1)5`MdHR{xrD!Xzc-GBre(PK;q zE>L<+nI4ME*>{oZq$<6{eRj{-7Cv7m$-2eWI9Y7N%;h6yV>}FPi7kj@vQTH?%xFrJ zVrTrG(mj{i!e*kvC0xt{52H`ffuArJjvHJts~J4rd#~g`CGErH&NrX?Xl`i9tG>L= z-!2ZCJpJB%8p&dBCHazDeXmWmrLWvR0G^RMfV~@w2zsK3xuxM2UDWMp^zkw_E~^^1 z$SWz#JhekG=9D*=CE4aY(k^DSLdWU#vf$*^v8JEhkQnnEi!!E%2H zaEs!>l#k+IDK2l(lx0(vMuF%=gd#~Mxq|XD!DNdDc2J)U*X7b}eL7@9abTZkLy`tb zo9?_F;dS2pY2exwl9}`CFc~ng?q*HM_gV0P^sC2COD`OiUZOaA#AN;0w)@4!1K{o)q91+*<# z({zQRdAX7!EO3{}*UAd*JjIbfs7oRV8gYbuhvo%+sk5d5x@sS%=g{;@}D z1O}1|x6N)inJ-HJ=lb?jJ!~dWxc1!dWB>dXaZVd{xCB-E`3uA6u0WCws%x3K59EHS z1lIYCvJ1rSQ}N!xa8t+8#Uo_MikkVd2%h%pqzB-l6ANL)w$qsvxe#AtEmuypg$F#N zLG3wT(Z%N=ccEMdX37^`zk` zn?mg;`PazmCFkB9fCoP0ozbgO*KR?i*2^!^pnDGk4?%q&)LGZPFPoKpBCA~Z%MSG{ z<&frlGwV#9eQ$BbxARtm%Kb)urGHELbcJbjhjCwmbU7d=KZwpFK+E^!Ml~Rq07S7e zrGF%g;(+*Kx?D6}u0E5n1(4%HIw;t#DC{;rcbkpzpaCVRDFR$z;9}=thRk!&6C4XX|K4(j(Le6V0MQ-AM6LRGt_{bA%pac!*K!Z=v(C4VNl)vUJDRa~RQA!syS z(8{W-&#cfUfg%>YF8Eq~^|DoaAswl7hN^}PBQ<-IAY~@VG#igo?6V;caOH9n0Z@q51 zi3ecV03aD?%|WaK@`xE&J$%b92Sek6Ob#Xh{c5!)PPIBk$r-6B7y)WZft~u95``-m zr=l;B>cWYz(}220(pk@F7=axhSd2||iH_&_Y=~Ma8rvu?7*&x%&e3%XJ;9cg`hC;% zlGhcYfwpfW;h}&A8C$^({&`CiaesE)2^#chR9yh)?1Ue%#vcGB2(lc&C)>&0lbJWA zvnBSMRQD^J)vK<3&oYdyvVPOihA5e5R2v{mF7B7e;;RMd-0bTT2nT@Y!|&2+wkb7_ ziP$kSY!_VUE+393wLV`C5GHnG`2gd;0n3g+SthiQ)%rFQ{e*{|AR!t~oDU^ly2wHv zt4EIz<7El3aUS+RuFJvaHT#nsShT_6==Rf($E5HP0kq06Yc4pyp+b)Y3XiHiIqW3r z1rH@DRA`7)o`8pN;IPSXH(Y!q4Hm^k4Y;BQTvMIlASg9c->K2^SF*Wt_LJo%)uj@B z(-ijYQuZ0f(X75}-x(I>oh}w7#wV(^%sY&KmQG1!gQ!4v5^AlX>oFC(q0ypehP_LK z>t1gbW&?BsBQd1zU1=*}CP0i0amANkqG1J`nV@*pbOaJgf;mT*3yPX2sj$NU&y|RzD^2qqfYkCVw)7DT>lBj}xPu_F0e@!= zgrY_UOabBF0S;y`j|_pvNYEmCt$+tj*^F@)q;u+^w~L_l%(HWKP}*O90HvM`-a!w3a>qHVEa?iJN~xbzk5*J$Mi57{D`VRu#OxMP(*-X;#<_% zZwu&LI9CdN~cor-U(_~ua=5@dlWaoM30tEjsHeXSWaEz4}sBV7heIJ z{6{n{HyzHHwn1kLaREY<@gdU@Rb+1J+tPvkDubVlYyKrZeN~zo&Bx+KVYI4S(F+aV z!36)Y({W>t4OrbCtg{Ao6$ioa+V@Ka5V-D1r&~A@P>cvYn1{2cIYQ%nZP48y?^SbYpQ+>~C}43!iVf0wu7V_B@cA}5z8!cy&fx4^j__VU`%Zdf zwL*qE>;z!yAZ}_Djqc~6u0KItCk~JOUUDR5f+_LdRvlJXM*@%|0SyBNj{r;WI*z?v z*g{vEL{)7yEp+3v!>0j9kT)!ss{+h#z_`isB$RH(*t1``ySU^X0`{4VK+b{C=Vs#p zw-(>1ZIyQG0f2#wi9e683Mljlwn}HX-(eK0?A=m76#|!?IU*_%%$C^9)hV{nP2nBh zH5hEuUP<$#&>t#}}kp#%$Ow~qe4(=U)%9XpHoGi3E{D2f7E zfK5+F(_5udZr<)#{)T^Y0;tUd>(Wx3RyvLcua>_JB?F~!;Nvv#%EN3azHYlLNRJGJuz`cUoYJ@0Ns1rJ z*pqgyb)Sh`%RoC(q1EwduU8IdkWi7;3rib*k}uNQ&xRaY1qCQwSEz|BTTc8fy3>-h zP9eii@P|fw(MPpYA8&P^=B_sohS_}7bsp-+Hpp;1vqdd`_A#IEZ$QJw~tvFLSVbVV}0ysP4mNFx|A)rd-nBY_{WJC}Uq5(bG@N_|cBL`B(!E`Xe zPLGFOr$(Ag`*fYE3>Y_#FmBAnRBd2NbftmP$eJ6bO%O6TorAaoNPd~{@F5{jiU~9) zbP>i80ud*|i14z4k1)pF(xd_pQ6Q`PWgs`SKbZ%qCzA*;CmL^Uf``3;9w*2rC_0KA z+l{s@Jh#0pX_@oNVU|d#Z#tH)lwd^%ooauRC{wkr2Za^#PW6c9tX2V#OmUOy2x;G4#1dz zxWnq&C1LM!Fzys^1p&Lx2fw_TtX`5|&qA`Z(2m@2#LJH^z%73&v?k{OS(XDCr@g+l z0n3d}OYjo?Jec-J->WfBcXDpGfHB*yg@e+cy!%!k3cf0yyQkT@C)!ppHZV2CeEXgz zG|d-AYRIZ83XJnZhHmHqS{(Vr?a%MQc)^ZBAfcjw&u}n24#M6Uh^N3;0YaTLAfAIs zeuBXBvUMJ1^WB=396L;ZBukJO(#w@1R3OQ@%7K^X^|s@ucJ-qbNFEto&KKmUA;J^c zlTHA6HYAOUxXZ}@Rk zE*jW{LZUe6e)6SD6sQyV`6dCDU~p*Lcuns(Yx&@~?p{=Y4I$1K(!=YX(NmKg0WNy*j0%G@eqTHRGkZ7GPlf)B8Cgt#$Me8vNN_wK!i~WK zJhi|;6#ku2lj9pZn9be#%}Fi`ScH}{8=+BmZAOXPaziPCSyA}tE~ihcwR0bD3-9#7d7 zY9e4lTtg5c2sa;KKD! zZ=CrK3|GV8iD|`;ks*KFT3+5MEq$F*=r?fkTFA@2O^<9{{q!3dBEt@D-}Of_-p4w; z#T0{}GTIsEpNnW%b{%^9!%H*REK>eqNDsIz(1OQ8zA_Mm|7GggD-4}RmO_#0EB2g_ z48Df|fLXBHgh-B|%1lSVGK;Qz=!ydz7~K$2-&Zx)`$5?k6y3M?|4BfH)Swr=G99FQ3hQ|<2bOfmQa7d{F zY&!tAYjR0$zuffrk@u?;FN&2mC-pdH5*R%W?s?tGtHZjMr?@}Wtn|jB`=&n5n?0XC z8ErF%5SA$h*nwODoehz$gL6o&%!tCm9*g?ChM4-$_PoOf><$nrkdMQWWTXpfY(G#s z#bfGY?29V3c{4&@oekZ+)Pb2ikY$IlQW&TnIGNUu5Up2}r z2{~yyejp4%F`B%ITvdGA__IS@n4;T}g2r}nYhdMNjT`m*x4VSk!ruh#Y=k7@820Jn7l1Bbh?BGAruPvz@kSH8KEX3 z)W<%iS0Ks}Vxo8#v zD(BThX|IeP`za#0Qv5Mn7vHm{0Iq46Q3A^zs)Whqb3T?xcgzCSnmrR~w0v{NO4TOj zTb=w|^G4+DMlzoTb0$UXxq#rxL;?tozrS`nM)BENV^-*?iM6>NRkun$ zEgy0T?G_guJIevWL;(t=hV0_Fi@}MDnaF>e0&>qc-(GUCG@uu{H7lTHw`gGp_|GGk zPevN#z5&T%3e?x~7mZAkYH$P+Lp2aWg|8ag6!Im2An{%ho?@%msen_ppVN9&zo;m*2_+GgWY?&( z&<_Oc_9_cz0fOJy-;{VbIS`g62v$ob)zp|XoXVX6FiCwZHDWI1L4czWj3`#1K~u8` zC7}SZUJcWK$EF?m^#X_%BsIgj8HO7~ zDcD^Q_)_7pHf6?YgbR_mM~30#R>d|2(Qy8r5oy&X_QGphV(-DXeDl<9?1?xZoRL|+ z0v`Z3E2d+X7D30_jwPr-eDSXqXZPm&g+k0EGM)*B5!~FufeEP0JfKzQtO6!<%U8Pd zIXY#`7JjH$AW(foLknDa1H&^|#~DmqJh-Bhu%I5Ru&9?xIW;>B#J<|BJx#QA3O!Lc z?RC2t{evcm5G`QiS(oCO?H~{?I$&C{+$Mqm_&?upskTj%{C@r_nnHfS7l7^@mA+$+I%k9qCQc03ZU3G_4+O=M{#rHSA$7BAQ z$NY27e9oCO@6YS?eELjUh5C=8&dsNH6?Fue?ufZj@w>U?c2-Jkp4DP*HI=qhqu;5x z(n6%O_ilhCHi<0dc+~4pde$(Ns@?0W!5D;vXchiw0=MYxn^QmCwgQNY6(4G0Tl4;&4v+3Sr4 z0C9VbK}0g`kp7chM*Q>v*M5(*hd%3A=bw5Kg}tcGl0isQ9tLSyO3$MEcIvE)$6QuD zSnwxr|2$K|KYqDwn5y%N9QyS=B}uI?Y;VK54(fDV9|>IFsYWfP-JxG5J(hcgk|>@K zTFB1tf7+oHFo9am(9r5_%yo`$y|eeDtrM``zS^y zm4965M0JXJT72ni?1Li*4~UoYKyn-c<#+cxDa#D;pZgr*A9tERQ{Vb9=_M)`o&F~cB9(m`41K@jS6=QoxBn1kj)qr7DmkXwHO>R zO<8i}3Q(FT1V=Lv3h|x~JTs|-5Wjk2%R!szVMe$TV8010Ce2i{CPf8+y%Ou-W+}hh z&Cq9U-f!-D5=)%}=!voC6NX^mYtr%Xm&ORIVVJv=<OLfnCPKOhRQbBaZ8xNLN3L6@BLuwe!Hph%Ee3KvXIEb#-g`dT=)^WZ{0~}Cex#c6 zu%ZBOug0c{{8^!Lq_pAQern;lTn{;-d8WB$7L1|MiKI|1;C?wErr-50{x-1s&qBUy z%6;BU!##%>v!WDy)MGtt+$?qx+Y_RxgRwTVsH03lO)G45Y+15z`OqO{GEe+)<8OdW zwRp6q`@qsDYiHUbp@q$`kbbP~wDwQ*0dOp&9PI9`{2S@YSbZVFuC(7?`}$Aq=kg1g z&y|}jF}y;~Tc)q+ZCs+a#FZ#>mqI?D@ zE5}F8AjWODt%FX~W{h)2flf2>2vb2}1CIXli@|10fIZGu4)MKMu(Zi4tp}+^MW)tzIQ2v~rm3m9iT;A7!KU15GPN zO%XJ`RqJo%!7G+W($^_AKEJAy=$Q3GX^z3T6>9iq{5T{(b~69x1s^xIgz0f8p6R=s zCm!6^{v+DIj$Mya1HM`H3A2~UGl0W|0{g4zdg6hFO2oFz-B|W#n$Xlxh#0Gj(`6Fu zAaeh3K~NkvYPMK3hw9DGVRlp&z_fhzPAN-v-ZP*B*NuQg0Fr}?Xt=b(g8eF!>eIcZ z7p%(5k7vf=xpL(2@&nLLY>0jSLP2Z9j|K=i>%f!*<^0up+UK;pINkxN5+b1;$kIVi zUm9lRE>@(K(iwrAvr6tUwMHYAsmlYqFp%yS_FpNeA zSG+Fe&1kQb;8Hw-l0?_M7dFZkN+O8YRNHWaUl3^AGV^QEj}#<@189WoQ^^Hn*(D!2 zK5I6}7+|K)Nyu~HNnwGr90|l!oK_-1ZDtFI!M@(bZG)!lWHBX9>ch&{R<`q|pdSgy ze2J#71d!%Lz#kbTn>kESTmFosBWv(_xo-8RogQgz7-$eV+)JCiX&SPtkisMp*|P7m zfayr}_uV&-(k1D&K(q2DK{-rTN)kpoPoYpmCi^y*eg}r{cC%updCOkctjZ*t71~_1 zb0rW_LU&gH81R zX+D3yJt*on$<~3E){IXp1Bqos7k2ad=EB9zvgfNk(nSRe*~l&GNj4k7=W%`75NtEz zhb(c8zGvG8)aMaF8e$TU_lW~Y%p#2!+DruA&?OZicNl*_aNB%-ns>vUvFqjK9ckSd zd>evm1Gex(oo^N7%#r`;10IF~L#GkVDbSfijN_mMzk$cS*QPU(2YV3mj3iOQTYI?e zcnkG{asM4}tr2gnV>7%5g_RGHTWNGwu zUh$$hN?APKSVXGaS-@f<_(Jr*La5OKFjQg;Tk(suj~?@bTttJmqYkb=oVnp{=DRKBK^J~P^-AoT%`5y zp1UoMu4IqWkl*yEdt)!vSKGeEXMK*?wA$8rL}PCULnD{32#kD-95LCwGPri+(V1(< z$1AC4&tISL)iLlSvJs!Sf*?3Rdrp$jjKKqliBKA-L_iv_{|bgULc{0T5Mktg1p;X- zIC4;%3y6(Ke~P*$B{>iY331tLIN z(4pC`)%eCim^(cGx}53uzBp*ar)Z{#Ce(b-wpEj(9YCl5#PY~>fM!W2VlW?Tsd#b> zW~in+Aw(NRovztiAfyFfkfV}CaX);k(WM zymFmy4cqZrX~11vvQ>AeoCzo>@Th+f`!Fp0{b5piM8V= zF=oiI%tO^opgOL0vAxBu=ZLLP`g>f5-I5>JEHR&ZIQXLCVPBb|t+wowF1NiPNJzrN zz@vQBKE>4hwHsGe;m860%QU~WT((2?v^>(j2vzb?kiegkymNJc|*ma7HzDZe953+VF}6%Sy|c zH%|dsq|wFae~*_t{3o%=0t2Qe$D}Qtv#7n>`8VlO4S0zn3?p*E%FCBCVZyG*gPD;@ zl#z-Dp@0|{CADLaXY@J+0Rfo+1Yg*6Ek8|!wVZ=&XZxFi4mzIDTF&!c7oY+)d~E#uF~n={gFn(d z9c4i}+;Wr9>z+fZxy7z?h#maw+`mStQCDw&2w_T5VPgp5QMcgVgLe2~(QhXoV-Q*N z9^kcEY*Jp%RGOIU?tgbd!cr3HyWL){us0fsO&1ii1;ZZJ;7--Tr5HUp0MWQP$loM{d>ur|c1jx&4W-#KBwaFk(-#Ja}zH?4*X;Ef+a^H{JIx zs_H-8rh9=^*3^A<>WO-aIt2focsu}|+Vth_)n{IY_Dv6)H<-#gwY(vB;$gF1Xtw{A zg=j{q>zOHylN2Nlb`Ct8&|4NR!*13oED&C8c#{9_+}(#W^O#l0{b=O4hGtl-fFy1oYD&3UeTJ#bt)PjX$Cu^5ugZ_l$`{^*I@d{TXEPc)(l(lT1-<5_zC|Avf})gX zkLCS;b6;(1+L*J(;D0%<-rsSQZ=w=!cR2~3r%j>AXN08OaB@BGBe>vH5`+pd2 z7Kkm1>&$!egpo$>M!0d~A@-U$&VF}fb9^B#K5IT7;&wbpMJ%|8MOOa%AWT(B6j^l0 zArEJ@>~oypv(?f3SZ^qzk~Pc?>9*)^$fTc7hwQd*skT1$GJh<-_Sn<$kT6{2`FC5` z&*7D?#k|4|Tb4g(2}Vl`QH@q>(Po{SlB9uf8PQv2G8FeJ*k01P_T>TH8_!Nks;`-J zEC=?UC_cVGP=$)&g3(P>{pqCLXt_8ozvM$*1CiXra`lFcuGoKQmK1 zI}weUSm)>#MTOb(x0(lmY?l-H4uzjFZAd`oA-kivP?3(d0MQ%V@c#Sf_d%OKJo^w& ztU4^jcq!BT9<-(Gt~~n3Em=O?vIo?ir*X3J+KV=vdGq3y_Vh@B@@CP$fzzS;Tb4r8 z_g-uraA{6Ag6->GRd9E^w=WwA&00Fr#f;|@6br&FDO(s{v)jDNFT zcI1=Fkm3UJsa?3WW%>3ekgDp8wQ;*X24OVg*yU~Ct@eF)5Q(`mi5GLr+K|T$j$P(? z^acBEymxp#;kq&`Fm-ine&a{}eEHLPu2e{#`#1uzMpm0$0Uz zDV(aC&Wxp-!#2JTft0k!v(cr%(FmhoWHr@l21EbzO)?OGll~2tlioltpv0a4yfw*Zv9sVy9$EjHWZLR#jMkWufL&TW+sL@-;l(*Hw?ZfqC)mmd|&u-S^t` z<>z1NB|bvQN1~1sP^jZ0%F9A~aRePRZ+d#1T!|1V4DUi=IV{B+EeKC!Z>{emVuG}1 z1)umhDhn2ks}&%+A7|s!oW3IU21vHBpYgts8hpYJL%-l2lF-!0yy`qz&or4F%)FCHB0zp3xF?Re(oFX@3YuZZKsWVZlf+1Qd*KgV?~ z3#djeX%{2;;y@zrO73$Krb%1ol6!5>r|1o(hA!KRZ9h&m9@sWBd?}};*LtBA%R6Nx zYqH0`{9_iy*@fRD)-=mD8-8yuZO9aQqOxODa*7M zbO;IV^OLFm4?H5m4i9?_d2=*QPC6iVxX>paNNVqnR$}#}v$XZ$el2mr^N3tV@bMTW znxzO*`V1~kzqnB}0~@lUQ&g)@jzYDW6wF(TsobSxjE z#S`iZv=cl=|DfjqkU>z%R=kZ|_?cw0p=4`cmBu%OVyvXrnxSEC+{&pKyRJn?JXUndVmyX;J%es)-y>DU%WJ4(oYf6Y>GG6uf=3I&yaif#%g0iP`bYpbvax z*zrStTBl=2^KMptz==J2$=G{gC243^TIr%0^L`7eh9~{^m>wdc&C?-56)mkEY7YF zM~V}o<+xX#aYrsPy_I<@uS|hj?8Ku>W=;A+D< zI4oK&w3T)I^nXPXs_RhKMPh-UQe1yt>2bi^0>b`^bO8%Th1qpPDZ5U z9k7MtGy~jy^4z{GK+e%h*EwsEbvm)=w#9Tg8)uUxbrjZ1&ME_*^2&=s3nQgR2Yc{S zJ%EiBMBXQm>L3=C$LFyAkFGE-adXTMQ8iU$p%RzfMhN0rvYekJzADSJFbnfHRtuB7 zx;R=nzAxWFl52*x0La8-HBCJ7Cx<&fns#m{pCVde%%Bx7D9w&BZT(VvQgaYV zbI|>U6^kQxB~!wb{O$NT&=7$+#Oa#d+t-&{c+8@-GCB6uI=!C)IiMv|63%4duU>}q zt7e3RW!^p7X&@FFRGL(Ah!#&25Qzz6tdMT)z`_ELmO_lY@lFLz2re}NF!m!@cnsS1 zfNQ!cJm#Q*V*Rp^*X4)K>1n3}7Teo*!;l%=__u+aSa7)xC6`(CF+~vJL7j^$f*S9c zV$_#G`VH)SOX)qmST%I4R@m-DoFx6d&QCvtb(UAHY(LlEUWuKYha(6G9p;SMOM=}t&Utj?E7^UtB=@PZt>{3 z@FB=ARIZ|K=ex9kUM~$-0RE+mfk!PX%SsP@f_4}fr|F*BI=VzvklHRFcZ#CChgU{0xftL)PHidp@mv;K^B z;VN3Gf&TpN{{UA_XD!;wV*B=$iEBDv=wtSLu{FvbkucT%P@a&4&;oV;^VHhXU7EA} z!hq)l^!TE{l4dOjilb`1T#l!=;)o|5G&tPlnIvq%h1+XGA``Ro;Im%C z20X2+UbX=MGrpBU(T?KnX7)u=D?K{#=7_D19}*c_VXkv?KZsfWilBsvtL6E*_Veu_ zhhUQ7j%AK|y_quLs)4N8h=Xp+OTCtTl0dWhV^(bU4$J zn#oG#{5>L__`IDM!R+~W%`W%HGw*N~#E*A1*X9YEgL{|uo7?%5Ov|J9ciP++8`|V! zbo%6M4WMxL8pfC^KYAeXtNWvH>(j)OU-r|WdcySfORF^3<~5k$vpHF6oysS`apE>_ zZNJd6WX$$944DZQQF$-`4mJt6(&18HCe+e&)7d(EdBRp?rSt3;s+$_?S6QFS38$=T zxNx^g+MRnpuTM)4M%%h~L9RxJ{Jx|VX^j>4w~stEyySrvk%C-VZSydm8g*bb!l%T~ z#hK+_!VL{gmYoc4sms}Y%ADJtwuJ3RO~gp*9u(XTRDa%5qMsG~d+gK$RC~VRr^o9| zbYH}MF|!MhHf+CvG3Wi^K$#McX9@3~nMwhFQJ*zGyp^RtboZ})ac+ww2mE38yTWS z#sezl)ccaEt?@^LpP7V)=Wn(eEjetP1saf>D47;H?m-_>Lqsa=h~?HV#RoD-S-- zPc!28nN%Lml6Tah^MS$qhA8}+LX|n>!WqIqYUQc%Uu4Luhs^vk=6`AoI#jnDx+HPlUCoxuc57NvmvT6@Bh2seq-xY#}f!;$v5 zrRHCu2@mo74T%6o2w<$>)4nj#7a7mdHR7{zweZg0#VT&m;lzSZ>($BKJ@}d9fqUo0 zsWFMj%7~dvc>bd zxq=(|RyPzZXY5^b}1e-0A;*WxfeAJwzEk_{zBwXYG~Le4#|rgFVEl|mQH^NlD5*I>{z?FZTTefo+PAk+ zheS;b?$tc8;l}_?!qCpw);;F_!+1mqRH8lzn*A6PBEr`Kq&S#DNBtk*){Lyjb-+nW znyb6luiwKR6yVv5L3*Z+abba7aL>GG{HBglzMy?TdrYa#v!RStmSk6F&&jii28aCi zuP>-5`;cU4r7$e4EXxK>vrtjObRer|4L68VdHg#c#+4~?Sz3fPgd$uSBs=&=j5KgO z%|YpBGvjw`6e502tI1H!q@zaDJT zRG5!bA}Q)}7g-cZ2<#)mZdZC7=ls^E&X_r&`i}fd4|I>^6x`BeK?3-Pcdn!EJ(Thp z(Q;mMw;A@axFs{XVM3pktz7ohrmg0n!&v5V!D93#`yc%g5Eq-=gWv32s}ksu9?jDy zj$F~Gf6KOKsdO~N-z<#}9#3Ekb{z@05T;}{B-_v&Zv;nd*}U1HKxH)J*Jn>5!i5X- zDZx}kO1%)7MH$(2yUT-kDeFMMI5rF(WYpY`D}lVn`5NLieq8zRY=}=F^EA&T{y5U# z*E;ubd~GIxOGWT!2osx>#z}mXhiZ+(d!E6|$Kudheq(TfF81-Wb4Bb!_%z{wsGE9m zKzKa#amo`mMTsc#?sS?h1DBg|t$Aq{iY28R4dGdH&f0FgiMc@RhbWf{+c-?Hay#LX z2du;6vw$8p@QU_yx#=&Zxi4fn258nA&YKIG)KKrXS$Fr$8zrg;C30=L`Aq+;v-F&e zJaVC(jPe$Ht~o*fpGXO>uz$c|={%JEGfJTZVl4&BW$RpvWw`3V|=SD)efsO z>Cbv>I z%vD#6_s4gYFuNqhj@BDB)iWO@p^FfF+g6eYAoEmFtro5{$qGMf&u(!2QP}?{nvoB7 zU0_`Jp_`%e`u2ELos}Mq_DpB2#JkkPTKAUbwt|VwZlGn|7aWap4bx*8*o>t^I*!X(uCJ*`HOEUtoIm$Ecc*nM}M23UTRwiFc-;7 zQ8KeSZ6*#n4G$a1b6sv~QkuE@2P0c<(xjnZJam+aE{OARr}v?4 zB%Ys_l~^P5c9mk>=d-pJlepM8_)OI*yb#c@>~XIY7aj9JoIURX)E7aIz zJ-Zua6CoohYD)Jn3iBeze8+l2)j5}LfrGNss~bAVglY>!Rd zm_2tWyCdgNw%b3W4B3uN$2yAaIBWIu_*Ma8iq%Sk?Vr@KQ7hJZUQy+#pnyholFNwwG3i=lpZ^&zpa&@E!K}K^}D!3oEr`rOenPy7wJ_(o>+(++J!IdI(FF4 zoX7v!#Rxq8{nFW+kDML0cy7#e&$c|Q+?42mZO+(Yx&QjnYUq^?)_5}Xbjht>N#hw{ zR-X}(jxxJ1b3MeS9q@H$X~xu`#k5jyT&XvKvgzVKdH$R|e2*ctKUWtQ2a__Dgt3D& z3DAv$>DBEk37zR%s|*q%!Ux^FA#WI(`8E9SwPAN83H>3C_>!<;u=#@XeqSZ{?i|j_ z&ui=oJ5>?^L3j-bYZ+*n)sdtedNY1OhZmiTZhg0 z_(zc4SZYU*aW?1U!d?g^P-{hLD02@=NmSOYPp1sQ#x#tbly5qLTKlL>$2BL;kE14< z3hzU6kNc}&0-|h4?_BlI9rvZ%oM#}N*e6Cpl|dc?%+dDFVLS2;BeDNGeIr-u1nlCv zO4Ci*Qh#pWgolZ>ov8ctrl-_jd@jx3s|>P?^mm_KY$@}zlZkCZQ9GGG_Zr_;Dt@v9 zf@$t6OA=1xjMZ;)&zFvOgtORwLnF(^ocQ6jk=0Z4sbu;gb45q;{ghRKp9`?A=~{zyH99^Rla%uRl+7k2bEBXHK@`p6lC2bO;|n z0k`#>5{hR=CdFq6I_5sYEY3+HLEAK82`t`bh#p?v^wfdvmD~#cUpK88KOx-5KzshMfY%@m)tPB*jEuc;(O8H z&Jgo?F+^sv#rGJ6N|0ny2q1)*)9*WQ)Hnbgqh<*s`C9Mwz5daQLn8DT0yQ`x;N4k( z=eDh5r8LM3STLk0D5r9{o!(l2T@WjykTY;U>W6T^c*}_Esl(s=_>Ii(rHYqKK2fi4 zMMh6Nm!1(ie-9{{ALPAYEUl75WV>2nPtl?AIYE$-G1a1&D<-BCviBfGHijNn$-Oi) z_03Z*VvCIChMG6x0~p1dsRTpU);F!zHKG)U;O<*|M^=uUgVzzwDDw+e7pn4^S#rgs z(OxAW(caVcywrrOmW%NW_G6If*oNu`dC(dvm(+Sb*AKll6mZ!60xWsy2+P>RV9)^} zdXJ9W83{5D3!53!lRJTgvI17dgU#+|PpR#NfOxs&`NdOM?#Mb;bkkrHT}vjksWfi5 zh@e;xp_IO0LYTWgO4#*+{@H8c3)u?2*8xTLo8LDSuLw`?qOZtOxzM*U61K-{&DZ?V z^^Z$)H^ZvQdudSv{)E84kel!$nJ9CXqWOk#G*r5MN46T&J?M9cAc*(>gIae6>9p)HJ0W(i=(UT8{YqhtXsfIZQ+B&7`0**g!E<@K`4s>1;y)|gjx-14Z%dp#S`uer$?5o{jm-3#|D+Wyp{CGsTPRk{ zdqjaWMK^Dd+3Plo`i-a1Oh^3mePB1*LIleN0_t0VJ)8 zOR+7>$D-uQ?BUOnpw_s+JLww_*i5F((?@CfTeVbTPc?WYRIZ@amfPy6p~xqK$|i20 zeRP!W$F!WYhrJaf+l7%5DS+iOgcUA{ESg^g*RfQES$4$sbC4Dkl_`_VJJFW{=a_Y&{A|-1HX153~100lkbn zZMeuye67_vrx**Jws^2Wy#LTqT0vPoCj-^WR4a(T4geUeS{f~dXbojOR3@izAswPA zW_k%fgcQx37{YbGg|y2M%@37cWE|Em=pIh41%m@IVHgX!>iUPyaECNax|yTpGNfdc z$o!qUV4(Gl4BAY5qs2Yl^ICM)=a$jKAH`AR=slD8k7xWuFLqL6#_u2ByZaDr<>@Ek zR(#q_Fr-&8yWT2BMZaWSrr#jf_1M=J7$F$hZtio14GR+KKIh(y8i@7VDBC-Z85%at zyB&FYV}twfHr05QfUfZBIDz_Mx=~i$+|lVA-u&p>%~beuyoqvV&aN8DS=?9ahq6|FYgt ze7!HR#S#trJAYvN@60RE;m zo3Qrpe5=@njk?Oeq?Qv1Y4@y$W#(3jf2~TVfauwJR*^pseZ2(ej%>v`Chq&aS_b(H z17?+(+H=#3rUo_1vGx}21`fFa zrPs^pdLRPKKzazNkYg-kx3^+4%@9cY5ahtbD zvbqu`H8~oTp#MD7@aa`1z8_7Z7350xw=;4&YlCG2n|v)x!7N_wR+^JoPYB=eZyfTk z*EARo91I7j1>)(_lr;?ONVAyY{JMg1=SBNzom{rCN==yswp1yJ?Ff1H6}gQ~8BkK$ z7pTo@JeK3qCm(Hxsn_IG?7W?xQm%_v5bMJr7Lq!*e|+{3*tGdAQwU+t&^tDJ#`S_^ zHXxH5y*pc5rNA@QAO{B7?8q`ZrhXxY(C+~SN*Flry#Q?-pf#XceteA^+2}ifaa2o1 zWt=9Ic_d$fFxWSfrZmk;eNy?K;m9`_n@^+9%7R@g$swVZlA%uE?C53P2m>1 zri~*tYJ0u+I{Ig??qxrh+Y+z?+8HPf7E*bW`fe&W(WFg!vTb@YwHaB&LU-=kOzHxt z^Ib4SW8Ihqbi9S2SBtG!(^E%4Blc0Q9Mh10F-_=G@)BXegzFM|%|Rm>0KBn!k|6b)Icij>(o))CTYK>#ph*LqKkq!~h`bE*%M&fus4 zD2-Om$mSU4WiMJiNj(RUx+U~$N=%v@vzbkp2Wa!~IZgZZyL=v3G5{h`OX{LM?b(DY zO1upCDN`6_YNU_Wo=xp|jV+oL8tja;B1Pcu3=sYtJi5 zS4fNGYz%{qKl>anNM(h;Hi z!Aa&Sh5KF5W6^z+5*2x+JLN1gl&R9^K^%e=Ic)|1dgIk9{eNvf54&M417XXjWtK}r zxH7Rfx?!Zu{a&=&RT=x`al~2~=(B#9bY0n0q9*f_4RaI`JM`#qvh7_pac{-Nc{VW{ zAx#x8pHUMEHF5(s)>&!CkrN?n?63JZ(avguS+cGkN*`U$3LttH6pS+w?Rq8dJEGe_ z>YqDR^Dg2|=*l-NHPo>A4GY1rgg_#kN|b>1fU}Dn<0FTB*qDS1I5z~akz-PX;>}8} z@G-5Be8?WboFfD+hcWF@fB13e&)B4AYQiD@Bir+_D;8`yIyF$Lj2s>DtGk7f_)#cL-f*;^CQuvjX&D{dq&?S z-_UJl<+D#sdWld2I1x)x!)(-W1U1Y<8N+ge1W>;e@TDBk{UFq3fHdZ$?mR5^QA3Gx zytfjUAg`L2vhf39k=&TtDiuSZU_dB6XjU511d^2)`ddh#?iaxsJFH@p5a3)X#?K~v zi7+-@Ntx)uXw19wa&2#t^gtH9k|-(Y+tY5a&D7YtthMeu5+!EmyH4f>^(O&X*c`tqm7 z-m%YrZN#r4W@|Pl5Q~jdxV#9=No;hiJ2M5-V9ZN3N{_-&_N95GAE1nbsks zjl>!&`P3l33DK^V+NRqrH>y!lpaXW7*r!+uMh9y8My*p2!XjS!ACf!KcMp2F=~E=~ zsq#fo7+)RER~IYHY|*6050YNw8hE4nWz044$@--^IVDnX0H|l*w4;+1zj`(?nQJ+v#WTaHd88s@wpatF;2AD$a z`UXsekE zotL%yP}5np?kGs_RWN2?lNwN`!b@s9zRwsm+^k?^C|Feh^`Qd`QpE6H{KW;l*`^<~;5KuAae4FFwRb}n z>|Y%&CcShdS|$8j`WClFiA^Nza34)Z_wTS~SO;FYDySi?nj&aG$w|>Z)k0FOP*o4^ zk#iF}vYCC%am18)dB+g^OkIZC;S+&kALdlWie1w9%s|Z<2T#gi@|3|_VA71X%uF(pw*>r*gTpm7bK(5+xus2JZV#;pq7dC(-`@tHr_=PW^9 z2fMW*n|6(@H_O)3$hP0Wc9)bmO;Zk0;t)A41LP&d)3NB*CF>z&Q;aCjME{Xb_mS1l zFY8aRNfT_E58AOvq33p3?`!!4*_Et6ovUA%mldj_`zh9vQm2F~ zR=4+W>pHzUI>BN$kSi1tG9E418jJOVue_Vdd&11Cak*NrexdGI7h9i~4G{MNjZs%f z8TPCmv4GA^3NHU!H{tR+A3CuwIT5(`Vs0)PpHz z_s8$)zqftaUIxL8siTA`KiG6%RabK|t?OJnR}N;#u-ESeM#+857U=3Jm_5m?QN_)F zbq#!0Y;)jmo6HqFQkah^OdDvo|9i~zII1TwG??pLn|vrYK6Z5nG3Of2kL^Yr(RBLZ z{En$_uM{-l{ydI^) zg(gFFHGjQUBaVfgu_v$m^}l0rxI`s}4cM!-Q>*T`e$kn4)@=|_t&*3~USfO!Y>ph* z)3+?~8nGC_#?=s z;8&bWw;L#3-6<&qwF&-ZEjAQR$~uZ=1nh_;-(CHIU&Frozp{mk7>%k$jmOki`;8Cj z)>WuC*v2!$5>UpPWX(Q2r4*)M`8`!i+!A&x2j5!mgHHjHWh~|VVOGmFIS3(uJ$oHd zNFws-)MPu%PjjDj{p;(JV@#4me~r5&IeN1nFH{m3iyr^8iom$~!x!cnj$^7|YK|OJ zqfFUeh4&UNTOusqmiUaRh;fwT`!5vSxChlEx`XK*{??|yIHn_f@}pz}_^3Wt)%7jP z{PCaWzZ4U{ITHuIno!M$a(~i&`IfW4hmzg-zJ3RIrNVM!8}i=E?mskGF3mDNxmI~8 z9LnozY;1qafBA0ez=FmVKG%8|5h8t}4xiFai?_MRj*MSrm;f?_$%ZzP4eiJxF>BL; zI(&w_MUg=Gs!=svPi74`Rs7gz4d1}4-E))|j=}Sc9S)!sG}^p?fG}YpEhN z#_JcRIpX&*wPOc+&(uGC>larof**oY5`w9@jb@xV*{el0^+_C5f&?vFg)h!NFpQ2P zGkmhA$6Y0OI?FH2edU=L+lW)m)$Ti5ANk~-US+g8F#U*6?!flew*(K-(%hDky58WH z=tg^a_|6*0ptZ7Pg+hRekKEDcMjh{XeakAO%sge^*BO=MMk$>aUC85zu@i;q0GE+-*nuN)v{Dm_{}el0lBYSgGcPl2wZMBb9Y^d z$Ayb$q7G}Di@FCs$p&=H;n^_-HGrTsZ|drsmG$YW3YqLBb#ns>g9s*O#canZj-pD( zylM|H8E`}IElGFpY`8yLC58T4@#uYWQ7Jiy_)PdYdElrv!qc);4oAdlNK?_W$;_isvVE}9|=RLt7O7Z zpj?)`R8%clSCV@bHSJ_t>3Fm<^e5QB%hG)QIFGd%83jB12b=_HnV5sIHE_~si%4;5@Jnr z<7HQ#5>g7IlFZ+KyO6KFZ|oD_Jl7EnGLIPpiF9TM!Y4V7NA~Vjcnh@LnUQ5ml+9`8 zciQN#7_zG)YDRyj)DQJK_ojWu*KA&H0II~2cym@0$cAMxFOl^Zf z&=H0S5)WuTztM@V%f|+KDACi&Gw^^p8=1VJm-1E7#W~N?D_|5LieA(8Gi7 z=HGvS=JfCh7^m;So0MuT7>|WWo@0oVT_^i&?#{|;;Nu~(fG@6bj~;S;ynQq{^ySxz z5SLe}qWkt|G&Slgzh?Y*7ydSR&Ivm+vWR4Vs@89S?)qlYy1%?K8c)Vz4m{{prN57~ zN*X=P{;eNQ4SJChMo){=k9TWotXfn2_=3#xK)jLV9a;d+=u%$vS^U@^aFA2|I6IJA zubxyjnp(9Qo6ZR-mtR+JTrwio>6T+c9U#IUo(YM53bT)iW%}?WtQZ8KsS%v_$h>r^ zCe`)FY>y`Dw!pP{*&^({u)LQyz|R58*d)#>f1VmO;!Q_e56ZO)zab1wE89aey7arJ zmys&pMr>EU@U6dG-eE=K-eVMmKccKWvo3MLWZ&hXd)m6MEzSQM-+whaie4ix)N^=K zW!FmcHM+xfSRfW3yk2qAi--z0yjE$`S2c7+Mg-f7D3y49>7R+AuKA>)f4v?uUH&N9 zGyM~#eNa+uBTwmEYU|gtfh9&c047p|^<-X<^2h3RZv9tH$>MQ?BhwZfS|~V?qJ*_G zTZsVj1laW&T}p>^oHWPBRsuv=x%EXgptTqfYvjq$*jL!|kEY|F~*f@$dIdgp2;p;5n0N*H-Z3OJJ@?lQ~82CjA!`0`dng8?&`1gt|iIm z>dyElbr@T||FyMr;;dNTt|s8)v>mZIMx_%P;@&{O4~qEq|6VvMKN>{srgh642To9?@`kx6|mUp6kN(t_G* zB4s3~#fq(35EbD5mrNvS^ON|5%lT+uR!|@ zvc)$N#OE|X-!JQsKCDA~+x0PqN~PD9@$gIYDV_LY(<-eT9&WXu@VMDHamiBSz-an9 za$OwFp4XvWO{~mK>IlA)`Y3Yy?`f0I3H>amC!|Cf6!hN|S8)FG`7mpLd*KUMK=^oH~9GLx?`Z>_M;ysFI9w04qV&h2ri zQ(|q#70ew(tesgD;p!EDDE4{cD0%6(^tBLuf?bWpv(Yu$Y=GL_%YMUiWMm0J8WAK3 zMri+tE{-6d$98XfrkxK~@TVwYtx+n}^pv9d^U53ul7!t4Jy zIunN^^1qMIFx()bqT-D=-gw_S;C*CxRHjyVV_8vInH@VEDy12wmECxi+C^n$Hy)Xl z6_%EjwP;pW*4pOox^3$|^LzdT7?_#w`}6+1UP_4*FPoCZe~UMmn@0Q`_5EGv*_kJZ z!j+2-H%dDs2FtTzf?p2pdGV!r(^u!Aq!ezl-||_8$(O08j7?cds72&bkQZ%Hq=+p8@1e>`3b)GX$7_$>_~c4~uC{ zgcUug)h62ZD_t6z{GJdr@EOr<7MX1ca4yXSoLUEt* zCaSazUIk$AQuS#6g}S4*%m4Jdc<z3etX0OlRe_~T!eSKrmSxJ+S>&#R=Z1Xi)$SaQZGNNkeT$^vqP zYd4Xle$;A~Omsr`DXUe|Hnh9B^s>l*+b8vsh%Yb=R)U~HJ*=-l$U-swmpSB^DeF1; z0esjkeH}&d8pw4qc)4N3Avl=cPKjK3xW>W}y7^*3k#`>c*?WNWeivk>e#$t;!Fe~G z#|n>L-*U2tmoYi3jid2FC0!e15jkyq-PZeXRAH+#;HS^PRD`vf^m9c|lK^PBMxja+9E zpdXu&n)LH4JgdivETZ&RAyC2RV932^LE}#fvz^;6Eri*_$l>M$BWmGFO6+@mvZoyU z1%Z}shKYZEqOaO>@ukjEjS^zn%;E3UJ7a zC97QXE!8a+&S*nu-!{=YF4Fvy*Dh}%cPMFJylk#KHMo3-Y_te>01i|h@K~$OOhxY) zysVQaZCCR-gxo@BTx>B{=CX0!{#oNn@jB%4vd~LdtGD)HT4pGK>dVp!&#(7zq(^h` zD|wVofZQn}E#Yr45kcEwGCH)GhcB+U3NAj|jiP(8pSX~hGFt~7@))claaILsi1icR z5FK=;V-hNtd@tN+2t(T01^*d1nN&pRSjRIXcmB5l3T!baXYYjTteQ(ejtKOffqdxV zpULDWES%w7Bq61g#CO|u5^TvQpR5aFwUN(6~^w0~rqM8HS5I>LLnEQ47Sr zf)qM#$#}v2t}-e?7CKY5+Ip+Td+V<8b(uX|EiW?dJ}^_*4q*ZzZ)15N;(D!d^8^uB z$|C#3;PL_TQN4=eFy*)iXGXVLLnq4=P!a@hj!H}Hbp&VAu1>8ZcBlPJXk1)z;gd-0B)SpOMZ*92BP1117Q ztS9kQGUTJE^A$nv_lQ5gS#9F8aC;XTAsCZ%w>VT4ETA2oA+@Qs^KM@12bFx=9T5&Oa3X@zsU}V`ED3K8bW*u4K7;ee#fZufai;@tt&31 z+8uxodyPJ2_wlwG=!7Q1GT?s2#$;+ z3H-W54dF$%pT7#5$N^u~f^TZU;cwyID!PR-Ai@Vqfr)wp)M0dIB%j4vhe@(y9W)3} zV-b(TIwxjGC>CbYEBVR*c~N^+hl2b;N$&JD_yoKATNl(Rv3nWxsA+6jfjhhdth8iaVw)rbJhrr8^DR9wzJW33k66H@T zC`*w4VHC{}>>81824LP4oi?NwVZnmR^euPr1eO4Z#&>v433;h+Er)of9ZJ9CnB`x2 z@hXng=pO{*PS@_-a>tt~*m!0fGy#yuTjT+e7V|rNe>vNTH0D8JtR_D#7GV)nqIAmO zHQ2o@nZJH)d;L+4&h77OGCQpu1CGxYNv^ZEoNA`o#;!fZAr+_LKvR)PzA`_qjFlgA z@KZtG$bQFnB4I6Nok{FP|8*|=4loaG-5w`2BYSN*0Bw2RSQb;`uY^99K=N<6aymX* zZQ}g@uInoz+!B~_RfLTNa8eFAplr!j8=scI7F*jhJDYx?(;+|w&^YF$0mNTK1YzZp z0R9px!{U^Yr6C6j^^O?wj0dJRX=l?S?auut<#zlTexRw&Q;}36AZ$_*cG8Kz&;wgw z9WkfgvhiejjJ~ZH`znPDESeAci&Ws)#`u1)$`SowB1u%EbwDH5s1=*Dlj0+6CuG>B z%y^u*q7>l^*S?4$*=~*LF>;A&+5>tqe?S4E#_>`6$*BY9cR?TXpb$lp58}$BV3#RR z-S}kUQbwuZoKDw}yF{l`I8Ga3=ys#er~5uv84=j_>=R3|bOAuDSPW?~ukZWY_O1xf zUfO);Y$%k({5#D}36&U|m}?+k4JMpJ@ScH!y`g1@>jn{lsm5*KLrC%xt_I)hMJQ&W zoqggSEtW9@if5?RGkOd9BBh(_P30K2h9u@+Ymt)q2g!boWG5cUC}HiDQpuIBYyaLO zq4S7E^PIpg7SSwUX}+;tSYYVaBjwWzLq#d8H8Et{j#b|OxcTc`$3<%g zbzx1QB5CZO099GdrH;+_CU8<1&yJ_;7oogV{O2lMIzmEUI;;dmy8-;o5HxY*iEKFk!P63WEu5F#7FaxoMC+`Z_cNZ9*^xEMOC? z(@1wax>a0WSU^Q==?YZq%V3s;E0 zF;IHpZu5mB_myyZAxBX7@QcNDP8Mp)h!#D(87_*R5%taEz)DTJ8 z+w&eGlOR$Yh8%}6QYc6!=|eW@7%hGnCKD$D7yRrS;psPLD!;s6Qzs(j-XP_kX2f^j zS$vPgzCg;!BIW(>EOV;S2RTkj@l$+|7#wEbHmr z?LQqYzTCBVed3&RTG#Z=^M{i5G}nFfffgunY7*wgA<{t&+50GI#Q^210GrJx>_*9o zEYi@o_zZBY12eygZ-0Hqa|90k4{P<%N6(CLvO=>9M(y?8Z@VkpA9ckarrph>uGxDd>LCUtCaD zJfm^FURH!)A5hi}xh&Jnr`iSnb9V*C?E3Cklk8IiUbmtS*ZC<2oR>je)7Vm&sPVxI zVxOvj*VkChF*CR-0VVtkZj&$lf(*j`*_>5)NS%GC)dULDBv3@pNow#HJ|zhNa4?|9 z04Z>edBFo8)^lOG`kyn3Tf@H+PgnkY2yC`^L?VGC0ij9+ArApv%`>`655vNt?)jv8 zjC6Az`Ew*C_}af|)b?PF)nRfQzyILs~gjcH?6N*mrlR;TEKY`IQf{hgo;S$xUA-Da zRMcPiC?&-F`?BXe6(O7tRm~8kGf;l0`H!1GyVJ*ci;q86KO=FpUrs8DL$$i94av%n zv^USFfSVB=Hw;r52xj-gN6}l>oRec?>E!oX{dRtT8$Efyj`EL*&;1H=z36y7oT+z- z6bEbSmf=qD$zn}%>Nvh|zjSb_pRQB5VhFmQFy!pJ^_g%tM>~0L<&(F!UQNmeVoudv zVZvfgO z0EUk1;}94c?Z70kBMJ0ZQ=cdRwgNc2V@>1r@!!nv92L;kBILWA{`acS;KR@Ho}^R3 ze|&(aJCNWuWO@zfMIMm%h<1bh0N+w6J-y8K%)^%*CvV(G1x2kKgmH7@)mIZ?e|05( z?cP4-^sp}aR2}#2*N`nx{Dy#4hWKRy;t3R^E+EFh6`rfH33U8+KDkvx>}ncGc78MA zmR%-U^z8BsMR)Xo7Zg|kmZ&#?FhJD;r+xz{9Bj3Um^?39QLTkQ4zT%*f~Ew{{Q4_l zli(J1Bu)Be)iUkWO+6Bayjl5CXC4Jgk1|xKgI8x589#S+bWfQca_MqBXBI-uLDD-yuRvTp)w{vMJ&T&Em+jfua1Qa-)s<|87~|OY@GV z8dYy(b%PknR{a;M+z)!~OwfqR%zXYe{o4GN4=?l8tZodRW)u+1+TW9%vWab{no)m) z@P^Ev0tcOP+#u>b4`}S}$=7L27Ea}TeQ3AgPRhaHlV|JnN6+s|J6Q5*^hEAS(b}N= zv#I*pSVf{|zKe$8pQ=b$*PBH?{9(;#WOnM!vcv4r^Cyd*<_b)u!DaHoQPJHRbK|Ec zZ%>`a*ZgIba2EUbH~jW`+rqwv25W zNod>vk=ZR5lgM9QYTU0+s}p*MEy-TOkt&}aq5a$&vvxbPf;)R|&8G+LtYEHb5ZfDv z0XAVtc9P0#1jN)LYHx?_%+fA%zH(FTEIlHHTJ5V*G5{9UyD?-`{WCC(x>Upw@GQq3180Y_$DpzZP~4rE=0dc_VwZ) z)6Lc`W{F--_Qoqm*H&}|3)**jFhT6Dn1nE%%tv9wa!P~-s%N@-q(wLh7_Xah8&|mzVG&D`K~CO zpS3c~2pDhUBqCDOh5m7zW>L{iNE%c@JQtZRo+vRqNS36^9#2}q+0#+ zXmeO&VElHcSe~xT7MIMor4P0b|5Wxnomd)E_nfrvzcEe4!ip!q%r{MR$K(j^D~{|9 zTW7WtV=@36YtBe?WUSXh4aU3^CS(GAteOG2aCE|GX@eT;rx5FI1IUR;k4@ydD&g3K zkzfw~pfAD%ad;H;*kyP`E3+@jrkBV;tNosp#gD{rDJ#l&jk-ENgzH|QHwfU>tsV{l z@GwHXV!*4RTc%fi$2RdB-k{-bfYQS%v${W?73z%gQuAh{RvmD8fZKblW2`#Di5?Ta zQoF~^J1=g5b8O5$AJQtO=|2tpBVqi;9mxs|&rp{qlyWtW&iupr8xUW%>sozY6TD5SI*;n}7w*=0Y4Fb1b2 zUyZauH#FthL=F;exaq*fpWi*2Epl(~GR6>O!0w_fH4!IBbZ+R@&F4g{e#pnaOd2Q3 z-d5wLOHU|vr6nQQmbj{|Df=wgyQZ_Ssxb*-bU8OUYeX4EWT2&X%yr5U_b>o{aq0wKj!x)De^ zon`#^&Aa2W+VpIpt5JFUJnvxdY>XwQHqo}B`}Wnj-+uA%6u1Di?i~TWlByTr zqkqNpe78Vq*>2WDJGB=QV&MMLGcvi-H=Uqjf=)?y7~bFz!B%GH&rG5Ufe7E zY3CE)UbSZnGCyNtM7m?hG=DgC<4xMw9!l2_QN&%Z zA=+lL&LKs&y=yo5_(0U%B5swp0K+(N3}YS1eIE96gc`(#L83a)t&~d*qC=Kygy2yM ze#m0>8Epz6j|4V8Yn9`@zoHQ^`?@=E<|w#hH(=v=+P$3n=hrqGsx5W`38Oz&-qAm3 zukpLLIDlu^FDUoSYb3|_^Gw{i6?RnGf>#?1{!$3?gE}(1Hh;DmSU2R|Ly?CQB@2%n+o7oX$!Q{u^OlUW(Xs zA-Wq!du`u~JVqXl<5vKEK8HXeXvg{(zwigfPJoI6W9+ZXyp+vXb4|YL>D+h~Typ(_ON2z7>;4{NM{beOe#bh= zG2R!g{#ok1eZxin>lX(U2@=Xb1^5~HIA7;I3dOKjKRhVhXmv`KPJb-w&f-O${;!QgE81+Nr4cZL$?d=8FT z?;fpU{Jn*S)+~=-s9C7L&;R?`Up|L0>~I2B#d2)LirFXur!`b6HgygC`x^IILTEv8 zbU`FJ%FHO5jK+w>QXNi<*imMDQnN_(BmrM-ks)SCP+el~8VWjqfoH z3AHGRHk!LXvI;b6+waJfafrbgU9!b$nXQ2oMW-%QmwU5sG!7`k)Y8yV$b1I0l7l9t zd&~tGt0~Y*O$$b_sA_QDObD8FiK(I!=INMBL4{BtBg-QKODu45K$aHX-4!wDWow=c z%zB?`&t#tYUiJ6Iy^N=T%V_MX>*XaXlzEC6u|Z~oxYiZ4n?t<2WC_E42a4+s+@P*E zi)t{xlTw^?xKqt}8<#AqS=!mYz&hSvDa{D!X#V|ds0dpK5H>R_x3Cv(W-eS0RIZ(? zI9XEhog8|$-eY-D0JDqBRDdQz@aOQtvlD=solX19LYfA21h63-iKh~msKPE#OBB7b zgaPbw4iupzWzIp=OMxLwS(H!~#l%JnWUsbV-OIoi2=O^{ke&rLMQ|j#RN^7WCkrZ~ zmGfe2u=gqxS@t;*U=BE#R4s#2t|LI$?9ptB-?L%n?jl>v9P-KwTAN$BgN;RxOGa8} z1GZRS3sN*AicL0D`Zt}Q4qGODz|D0JGuy4+X!Wf$}RBY^>T5_C&SQ)bARsjc)CIS zqows{3HY-_tTKNv?2~rEWi9&s@5+LQ}#qNWjw7W zx`9{x(cU^omM_F-p2Dd_k^=s-Xn`b(9aKawr_IG+fCD$w$qpYc+06fYE!#VV($=uc zsrr3UGv^~4R07lnz>dgq?UH0pRqg;Fn`=+TrUAyllRQj$qVT$x{QBd$YG0Gl^}$ay zL?{1c>46Ktl#+A@Z5-N&S61Iydb%b&)S3U}QHWL=3RJ8W5zxqP+d$hmyZTZ3g z>9r1ArV2+{%3;YHgM`@VsX)^?z+w(C<hFiY>~@t?J5s%U;wic=3LOuq)kpQT*Z1tqY~AlnZeq{|ztA za9(68>+V9$!!$F5mbosFcw|X z1OG@1mA(anR|01&TO@JR)}s5a6;@`>Aymx*14OJph3E^#%!X#?sq)=7u+NTO z-~B1lFEHKZyX{Ng)R)`SU7jvu$e%J);4xR+uGKYF+iz@toAkwXc?^Qz+*Y}@^v%@n z<-X4<_jJiyc^;{U_RTiENWCAQ(b4_0DQuJvfws;5g8ZKHYjv6r?SOXhzzZlG&bNb8Kw?642p^G8!}IfSxj6iK z=8L`MZ{EMI^j-Yo;~(O`w4k`gFmS)FrtWR7Do~!u z!8Ho7nZkF=Ztd^J$ikH3V0yV=wQQxlLij_%7or$S2|FNb_K7q_fGtSJa*^y5V)6<+ zmWr1JHH_t6hWu1gb{oVNV*CV&4$MI_(V$t|{>yPb!l4x7()~toUFh%BP5HLYblJ~S zk}S~!PomCK-(|*H#7K+6J;a_I0!HR8LTNwv4-RW4e2>1~VAQ_ea@p9j!tTr9Ja;P2 zn{vN#I#sU(3KbNG1<2MSgsq&Vb^jJV*pzE77vCP6K5>7#N)FDk?S~3FW>bW-@zJL` zxTbSrcOurI3k+r93pg=e9PBPHT(VI1(z-kas7e-{5NV9ulwio*A}+HWZa zVD{1QDQ$QxHJXish4@f4Lhb^phkM+1X0Css|IKo3erG*vaH496iJ@XNi~6 z&9w{5=X=HU0Z&80H!`u22B6Q$mN)#Cpp*oBI`esHj*oAu(~b$>eL3N#uZIka)qX^7 zdd1=+QM9z;@R742#IzPh#rn3&`_V5R%zvnwNw_^JHu&#B)!cls*(f+`x_XghUFhM3 zdBZPEwH>B&U{JcuNs0Jrr1^7FlP=J94scdU?dJdo0mh%7Y4`b|*8j4VSOZnY4 zdBEo2cXu0G&qMl;r){PJEjP3xj0e>P&5|&-G^q>2pLu@4Qnla)*c>WZ2v_3WATL<5 z_nswop=f>d2VA(A_{+?KyUQwfKMj?;G@ppdOq))=@*#qc%||NtZhW(7&+=VU57J}4 zr9Yldb)LF)T73KTAJYAM!dzC@&43%%OQ>z26HCSuNx13woPZy1+_PU?z7!2$^*S;= zG@u8N>@kB}YXze@A7kVgVq4?cJ(yqt2EzvMoWHKAdv?Ta{v%G^=$A&-d;4(@JYy&WT2OdNji(&?k=?_%jc+~8BZ`G|G z6A4#JUvC^G-d)D!XZ|GpvE_YI%43MT5t}b3?0Em7o*>z`dCP$V8+Xx#FAAOK=u-h< zEX^wC|JumVABf2S$m3uaufm}FBqSqQB;90W$V~4LpevNn1d=mnq!g`6Hyx|nCXFeT z?1=-Y0x6|v{uU8%^=U3WS9K|TFZ)}f#HP*N$x@&5Cq*MoR_rJ!#YeNi>a;VYuJFX_ z1qguKWz1dP8TRgel)}i!HLrs;#P8`LQH&gpwKeGBqH5TF$L54Kc5!>Hl3U=NJ=k3q zPG!nbgZO$gr`7YLs;e*hA13*)neDpwJ@v75%)6=arzcCCo}Ngu2f>M4=aG(pZCQ7^ zb2}EBFWoY%F};jmdLbl7wvlTzP{%C|NK+2oRp{wja`I2y;o`Be5BE~r_{YnBpIJBH@!N2nZYn-#pE4y2-neLyZo#{K zGJ|+#@4PJgj#gT3vj2}>17sQ{7Wi!AROqFlEKogG0GH^rN5M<<9#r(mjW=pb?^-Qj zTo|wtjEM=BveL#oEa_PH9fC!%N@i8=`i*O`W*zI$z9vr<070G4OjoVwFE|WQt3Rrt z%b^00)toN4>k~>;_j{Ew|AL9Sd|0iQ+K}CE|2ULDc9UM-yVF5u84qt5o2M9pYBK7R zw^)`BC&#%S_1BMgXwh1pbOK|@-kT<6E817j8mIC9vsfSNg`JRU!^ltK2^Rb~-$iFr zK(RD!X?9ej%5Y_yH7##^rq{d{c)}xCF>9ktj&GCPb4)kS?z5=&Ns0G)pm;JrV1<}p zCE6}PDW+Dnblw1~oUwD#_Tf)4!6a{PJkGYfx~In10+oRkXB1quhp?^)r)TM9{Hx;p z{m~9k;od*h7$Z9#8J`%kAp%7kZs>*f94gYoS*2z7Z(8IteBr^^KF>P@MmoKRRJ54k z6lEl`Yj3qHD(i_gyZQW0{BP~`Jo5Ks1LDHgPsYPZJaFG#Yx#`~Z3^+pr!iS+nOqQ7Brp&XUI~2CD2iUEH>`}|0H=ab>Xg@C^b3>F@ z?2XdWbqPLK>a*q6S4vZNyZx@R#e#Yq^qH|CWF3U}4e%sD7@v7~2B5;MjOkVj&hG;R zJAnh1-f$4HXs!pM{E|6bvYq|tj*~>2bcI+ira>Ha-1Wb|j~vfWv%ZLDgup$`qz3p{ z%iCWf=eMw06+>H(T)2Jf$?vt8B%2?vpW{1H|C_8sA**|I#QMm=5xdy^jqM$wa_o(;Rm?GWWbZ zRJn;n1pAx|KucGMUahFWkEy9}dLKqNR{i_DIUvR+3ZPWh|EgSfo$Iry`RR`sZ3^&r zg*tKYRP5tE|68-vFN%)|J2lTLJDVEw!k&sulculK9h>rCZ&79Z_eSqj>!35TU?7%$GR5@4){;SN_0VeP6u{b~f4G23K3l}-q{&-c%$A19{Kao!C>RFP*BT}scj zUFo?if0)M@)5mRJ37U9Bey47qk+MVdbiCLXamUSIAOMg@A6ciDP~UH`_&Fxp4g-O< z>8tfscWU3Q$+%OQOu<{k4{09Ww z?7U}=+AjWH|G3sZ+4M=!d_IO*4;O{N-R7~@I`CZIju7rX2XPZY$(o@5Dn)R-XO&Yn)9)DV7gS_TV=TT*#RT_@aNhhX>h|;s;sNf% zjNu#;O{7akT;t-*H1BHf0g$;L6&{UI&ea^0()hg~2GHiVqRH1+C7~b$+-1ZureKUz zF7Ci_DoboxM#p&VE(S?T0E*{GA6^zS{8cEC4&`08>4of6;(g;i@A6BV7!p_<75?NV z9qY9q_^I4l_n^}($!+^?yN_E7)RxSyY{sBQYG2r4)xlk&yNxgN(te9O?jWSM*a@WR z2j~5W)`t5T6Y;69F5|*DITWja2zO!QqUA@p)D^4Cod#fHa%8iKSlCk;?MaTNPUuzz zJgLRhx?B^i6WTur_2}kpUeSJ&Sa1EJJb)6t&0E^=1i6$~wW{m80sj2@C7)i-l_m!~@N@ z+~Q)R_YTySI6tTRwy55>n+f86M~Knvl2SWMfr&gNlCJuY$^7zlibSB2Xs?E}A!;RFe{>0D~vx zVt~&=&$?=3qFx1wh{&VT;c2`GG*e*cj48k(-t!232|%}xR`0r<*NTM^Y?T0-w$xn1 zF!zJ4^Cv(oiZgO!^O+|b2mFxrAOM$p)DAD#iK}dcaK?8qt2H6@#Wcv zU;PR9+U9`khG7eYe>;(ey?TP3vuU60Gi{`4==LsTD7 z?FklIbIuc#i+B5YTaX*ja7dBU*osnhW`Tu{_3C@3;tMVJkFIe(wN|+J&HABdl}{Ev z1jyCO{-3j{1@n_0x~F;AjsIDQPTIM*;TXU58e2j;*S3w99t-%mjt*J;gg*@uvBVk? zo-f209Fm8+Z8}fMsBYA~OwV?kSI;KPKkG?_7+RPTEk`$*jCA3c+xAojb}D;JgtZo7 z+VYaWVerNV82eIHZH5ATHzPa(hYx~j2m}+EdIasfEbz~OWBEu(#B;#L@-Bu+M^N%4 zS`kUhm*3UNgYleh+ZA3C=jzqJ?%tzq z-y_mnKECn(?$F+!v)Fm~*kn;kvYqtZGRssdlRwTKb>)}WA5&>f`nJEBS}NB~yr*9T z5c9xGdAvq7#&wqGzKz%P&e(SvH}mfj?`nww9flBqIfi*ZCSlHsufI`LtTL?bydf0~L%1i!|Lf=@INP^|EjDH+d4g7~@bPah&BU;(DvA6+Su= zpWz-O)Js!}Fn>lEKa-#!09z5)IPanF0?)*^MI0~PiFMuF@fge^20{Y2&0 zs0gd9h@erfk5Jmw8iD%kf)qTq(#}aG^-1Ig#X{D35di_*pq2=WNNK^w}A!LSN&?s$JJG9+hgNj?4qm#qa9dbv=4A4;&rd zeYDnod{4({{p9K=)~=HO7%231O#MRZ6vsOnbwoxNmlwaCI%xT70Pw%CprSh0d$h-B zR6MEeaqP5ZQ(x-MO|t1(%IJOkz<^AZ@B2 z$JHz12mB_Gihj5}o@d9Ay5%*wm7>rwo=NHd$aRhZpt0hEMQMW1jT7v-C8$@-qEt-d zp!@gT46WE=w!3yTmXK9rJ&Pts;&*bM(cD3T;wi=lm{tI!PJr&(YmmAQ8-!#?t&Y9K)&}X=|J`o17g)A)3sb!qy3Zm+9 z*=sSYUZVf6u`PQ^rxE5#HO5ZHi+^(W-WD;TSUe%;b`^A^i`axIHG#p!GLU^7n35Jv zfI;IDghuZVEQ4vT-Es|@QyZdkVI2*C?^06*h)$>azkR>r>a75GRMd;^fn@eYPj7Qb z!{n(^${gsxU~zgOKZqqzy1p44*jxN4?bBQ}9Nkjv5tOoj<9+HZ(zKhm!J@f?|FDCO z-$mKl^&zD8c}EC6Br* z^qf6irOka4u6xahW%eXFvQ}p|nW#f*V5~a`)3-T$4(l%H7C(IHdWsPhs7CwL%_-*h zvidtz0(PILdBs=YRpi>rlym-z+s=jYEG7{K7UP-tA#az&4J&aP#f*($Ix{|xPKcSp zZlhwZ#Wt})q}U*q%NPZbOX{&EJbp@!lQFULEac7%?Hs21&0;*11=p`%gH2PT5i`zp zjo$PTp&bY71H7%Mc(DjJ-}LyaLj4u{$|XZqw+O%$i*Q+MD88rA|6|M zXviaUZN6XGu|G0AR+ODz(uI5k#m25Yi)}=d^r=XZI(D5H+{d%}$+hN0_*A<&*6^&; zUKWVLnC;X0%D zbynb3%slE&!r6IEqx1%|MvIl@9Ab?7`{*^36>9u{D^PrgVR7bvPn9Jf7SZWvw|`QR^S_JsXYcE5}$g^fr{VUN_!Zc^%fy+zOuG!#o$a}x8<{iHmw2k85MgR-}#-+g)t{8(vD#S!~-Aj7&^${skg?@}jfok@i3oj1(Ok)S}QqjvV zNs}jz*4@lean;T%+%dlb`m@5$)}aea`pY@St4V5CTjYW7^J0f@cYmSd?CPb#gAt)Kz5awFIy4PGP$Ltx(bf?Ez@IRF z>r5}^K6?oSc}m%HV!e>Zgl+17@fb^4Psm=$bwS>QPlTYQ z>8ELg#fo8ZhQaFK1bOi0^H~?amrrayTeHq&ek}z= zYvqGFuYD?a2@ywZ^u4Hv)8!BDd7{iq43p?i0lIlhq}D_`+xj&@!#t}jF|!`DtVgdE z&v#n9l7;A9b$Q=!O;G`aEOq?jS)5s>nmtqG4-}3E#Cw~$#JHoZ=_n<~<=4VB5eOOm zDWYOXERW3si$<91ex32C(j#LrPF>wT#1|7+#VLC@^i_gsuMHmL1*FN>9z6*93}fZ_ zKJrOpFQEz)S(dMsQvoa4X|6Z1RX2SoR&6y=Y_rHEU_)=iS9&tbE zzGIfekXRS@D3|RDK8=2A=@a2L+zlCnG-^*coo6Qpqr=4W2I;+SviI+_7;)%fe3YcfR>1u%it zO=Wc#o+KU1+G_N=de2hCGON&rA;r5cXLNBrN5M0Pr)ev$q?TPJDYob+Vcjux+-q1D z;JKx55HHtX@ZvNd_1v~x2`Rm}^q;g0UBhFSE(I?kDpv@NA6)rWRp zXKGDfbFMAI>ZF1!z7)q7`*d*`bSX>mnozdo*VW3@0xZ4(VcAJo0ET79vuUB&c7bEx z&MrRO`u5InhUuMv|IL zGs%(!TZ+E}lIof@O!r=|z?W9=35zi z#gXD?0pC%OxZp2>w^x~jEr-`~44-deG`Si8e|-Ot)Dr5mtJGvN zDzSmHeZO#MJd$X^;xIX-nk%4GZODQJuvlD9Fmg5bo4%qNzgup_D4;L(!cx85vxMDE z-ZF}gDWhO)6oa$#-=7z&)pCnp=+Z(0YFXWoLnBWww*|y``ga@bQ}HE{ztfG9M#H*~ z^pU8H7LM(_!+_4FPzf*8)<`~lfnEHPQ(d~F=}t(c&o$|Xp*Ob_4j-$q`raq_2g3h0 zXm_d$KfbAFXE>%XgXG{&CiRlN*JpPe-yyo%L-Y>&mP_)A-&t`mW&g)r^&UGA$xf>u zg2_E*L83;87Qk*o@LzK#r1mYUTvD(QMKXpqu|e?zS44@yJG0z4C>>gG&))15Rdu^% zc9j@!$^H^cj5j!gqs59qALGC-nz-~$4yt2RMmSrp=850^flxcOE?zHxE7Hxpxm9q<7Pt)K9IDhC>CrnTFLbK?MBjTMA{T#=~QVuH$Qpm zTv$VTPow=J%J5G4(e*!c*@ypp)o!m8&2&@Xo`5DQUpQWFobf2&E!lNVV6U9h)Of3&Y| z>g&8Q7?1B@Bqq2)53XQ(oCP36Tigdojjnw;jiC^w+U4X=#C+3WY^k?@;OCa_od${U z_yQ_=0F>%;sdbJ1stp*>ZRQ5`W(v6O8$LU`9laAGer?obF&)F3T5YSmYuG^!%0Z1cF*EKQ^Y_J9CGJtpX-!LveJf5OVT zhZ3x7dQIHh8kg~_`tF?W8ovV7yvBgd7GwMxNRy0`!8k_?#}@esqiA_;zmDA)$79!t zSEXhD{Sj^PA$0Ne%56!Pd}~$COM-yIFA&US%)fSv!_OOz_HXh#AE#OzrtPu#fzrhW z7Z_N#+;QI|rrZ>(X?=od8$Kcn=JZf5%NM($t;~Y?;U>K+^cdS+@FDU~0o@Z(>o##J zDH=p?lMuv(sfZ4Wk1KuzrBBQA#G0QEIH#n`_QgL1sum4$F7?8M4CJa&7l7B#rDAhZ zndF~pQlqMuy7w<)jvY_;$~v{Bg|Y$bQyDlBtV6oB1dSE7_6`kPUA_j2@gm@i=(4p~ z&)p>rw&t-SiBIehvFVbvPE2kr-TVof`Pk{1S`$Lvue-E|p+SBKNC|AA-}wa^2TGQ5 zbj^ha`2AlF1eErelh1?nbhg30LdQpUp|h@N-P z+hT@FU|kM29u#aoYNAg%&AeMY%LNSO25XwG+$AVn{j0Y;&G|7IYl9yrgvxok4XP*m zY~08ps$*t6Jro5aHGQhU8#ZvxQ@E=nl* zfH7gyix-dByWe{Tlb_u`wPGLlY=#7KFKyJrH2^7LwWMWvO@`N$C>>dk9PTsWTrI#D z7+K>?e~KB^Laeo^H947@qq}DmA`sOGrB8{A>Ol;cAP!?!jj?F!oe*ik{-hz#?M|tm zh!FpQnl{fle+n*ir<+hYXk05CHF->2`X%{<7e*!4QE0`xdr1E0$8(F<#)>oaY2OcG%&VFk;u9pNlu1W(Sdlf=TgVl3(iub2E#L!a{#eTv@=1# zGgbn39PevMnSc#_P}^cQTm)^#Oo+A<`&igyf}5z3WG(;*8A<}o8OQtTV?P8br4AY` zVPivtv5+nvn%>}bAnx_S*i*?*tPgUQ+dRn^X36B9r*Ym$cSM3(;(MqcGw&zjv2MYD zHeT1?5z#rC)69GoqQr`lux2!q*T0Qy!TV2%Nr?(cWC%p}qm;9lTbA)&Atn!E2fW9S zPD`Jc)?C)R`I?vEe}+iYPS;nCgh?*V?+9A;SGov^aa)Wdj1K4g0ZVa57B!-B$V)Q_ zO|2y?6wT=$SFW-mCX>0_tQqMarHU6CT<|bRT^@VPY6htA811HQ{vU61rn2CFGYyv1 znyQDl0CMy)mA#fHK1*#496Y5?7PY+IN$X2SuRDBoR*f%^a5jFv)VXnKcpTq^JP6RA z5c{&>emja9?{{%aT~1RW=oe}Iz@KkNy=4qB2WGGN&C7d!XG$^ADiyZYoMu4Wt0+*3 zKWfNGeRsZSNsi8{zqN|p1Aa#B1?@p#eNX45%o9a7uFHQ7Cu+PzKF|TN>l~%9B*>yGS zeKFc#HinR`+Lpwwssn;X`jakQeYkp_ebb1!myl88ey90WR@iqn_Wvk454Wbyzm1=f zJr58d>=TAWWP~AGIAIwTH6REgVn9T2(5R?5don<#21NxN1VoG05eF8n+5}M%P$Qzn z*?DVQDCSi$nl!QA18^l9eD+$^qV9nLi4gB*&lB~caj%qXC)vMhe<&biJ^H%;R6@~ z3`02{!>;J?Q2(657qfxN8EKO!+9`K0Bwv!@clXMd)~KvV22v0JOJ&@5%bNHtQ3Q&0 z6UweXN|hYDviG#$;Wev@VW(b$OW_?4QpL9ur(chXUG;M*=egY9dO15eZ|C3U{$-J$ z=ng=Z;@{I`51emMUu*5&nJ71E9NTjl#}r6{!G@yX(Q@g_4R-P#W`maKD(F5kC{hd3 zhv696eL}tn(ovp<`G2GVu6d?;B-ph+}+~vTE=Zf>9HmvK8M~Vqep9@ za4lsy06SKL6dZsK2iD(fPSpkX%$UV12~IB#8pUjo{Hbs2HF} znn1)fHSh2jYdz`Z%bh!_a~iHeA5g0-6D3bZd6)8aIX;psyEJZca5a!lOtFH*43Z&` z0NfXAd3Uz+8VClZJKOnf;Q-2>w5sTi89_n*A%$~0NlK=e_0Y~Daj=`edS8Z&~-a)EB@$=&nt(XqOWY(z)!{GshyQRrT;VSZTA?NT za%#tFf>b8XkST<3`12ix&7n} zWC9zKQ~%AxD1<+t{(eMl@M!N{cxLLqH|miESaPA3J|?pg0&t0mc>q`&X`)<0S=I*P zC+?w^79QV#6c5|IypdT`_9V-Ihz!UbZS?Ov>o^7Tnmgi{JEoO^{y+smQ7Mu6?p8yn zcHUoSJAkCz9WP^T$~)$3$cfSxM;x*{{cO20q%A+-za7vRHf1@wNm-pk(z9n8*aW9| z#4~Vg=?s7BXU}tl`!}eg|KhecA&%)GZNr4 zfng`T!6n@NoBt>F#9$=PkRG*kE)9>MvYH7hmp*u3QKHd?d^I_H(?jl#kj zMvd%;7#V}e|7iuHB?g-WLCo4=yEWjF0J|k&q4qr(@4S}j*n(V^Gb~?wSGgKEJ{a&C zK=>wlC$M3B>8-34653r+Qb*e40&>O*2b-f3DKh(T3Mu!2qW~l2l|uvz^EI0BIT0vd z#)vbpLuF71KEoN?WPLbTgaz#}YkX}e`uTn9sgxbpAZvk6q6p))^4f<}XAV2q8S1Qq zSl8FFZa#1yf*tqH411$nbn*ue)ozX|)bY*K!pRl03?bZ(zrw3cIdAvm+#64H4%;Bt z(kU%aG(g&+XY`^z{w=KIdKxB!_oB#5ao#jROdtrw4olev?(5e!g~N9J7>~dra;Dsr z0(l}WGijJp^%82;=h3`wQSO>Y*YqaauJx?9ESt3_?Zxc#5-r?n74V1_6ek1i;6))a zh6r^D#^==PH^aG%J7LIPeYmsU$;N|rNWU~EAZZzI<%-j6yJ7Aj6=d8S=4hL+GM5k$ z@|r;F8}cK0_lxbW{Cv$ zoov=NEy5UPnzdX4wYf&mbZy~$Yb8CM5|JYVT|E{TEbBPYQ`M92aBD&$#2lJ4a*_Dv z!kfuYS--^TodVF=2?~dm791ks;7TnA^F_XEZIZ>-|KapV?Pn>B@D1c0U%qrbBtN&h zlm$>mWe$>IjjuM0)XGn*a6F`l=jTdn1hm)dkLN8oH3>) zScy+Y=i2>;@Bh!ls+!1L144gdRL5MTOS_+DKt_sqWB86ht*@(=ddQTMcT=+W@4!nH zX+K=35=|$)^(*kXk@rpc(emh!Rs5~*?j&m!U90Deyex{<(_ik~|F6le9kd_Qa*NRb zdljiv%eLgq?GybFv_N}@P(pr1xgtDp6C83FU=SgRC;rLWKFGO&{IqDdVCil}OUsPk z*S#tE@OsVv1$B>iy^De@s6?W%N7uWpSdcRU%c-BO}l~GZvtJK zAeS42VR+d31O^YzAdbiKZ)^Rvaetp_f1p76pA0FKArQ#D^wy3dw!PH~2jQn*#$=aU0oTNO@q|K>IGHovfrNZ#QC32~0=)_rGvVlv= zWkt8}j<#?a3d(pm7>9&dr5uDmON$F3?<0 z(+UaNwT#}bN92MN9wZl@9TUrCg_z)nlb>{+w2WNhR!!&y8oTc(^Sl-wEcRy%bGuP$ z3F`Y=&ylq7oWF3k;b!a2ANxH+_))nh3YS$|)h^m2>HJ;fe3}%F{q184_@3Aqr=Qn? zTXl)K8Yj3xPx}V!{}*TFJmHZPJS`55f86=K>$!*<@veq+z}S|nr%F(gL?3P~UOxHH z-9O+~4Pr^9vhVor$9Ebdk6o{ywc8Q7 znD+gsqv{q93Ymaj?;-z)Gid@=jp^#f7PrR)*cN5(0GL4r7N&*AFdcpw@oBR3n)I(W zRs7Jx!emf5ihR@43F_ruJ!OrSOhAEnaXNl3vId}HQ(Bk>MzJk?Vb}95p|y9_j{kZE zo}lH$={gX{AXK~64`4t)(X9#10dOI&izu{*0T$oE?JT+W*#?#oD-KcE^~zxA zvelRmLikbNaR=F#xv2aah5?aS)ioI;ltDBg&st#R+>}*O2?l9_tEHK?1qBON%#zq@ zMLaEhslZP^%YR}+LG)r=7sQGu1h23q4-BC^zB*tF8|dvztju90b4<=p3MI>by>RXg z=YuJzH&io@^9V?9o7hpWup%PJPfZ=NHq@1(v~qwot5Fa?*+bv*!(Ytf9M{3Gpkusj zc=smbzQ|+yBag*TAcjffA4RH1`^nWZK97PPpY$~%-643AuYCR#00#2FqWWv zv>b(UKdg+Yh(BoAA0Uu!d=BsW>AklWAgonEGA=*Yio{00a>Mdy9V$#YC_Y> zK3NRc>|+ysYjqOzR_Ni0#OM7F)&h0!)M&}rSZE(3M+7jK=00M10r zF5c49a;;CBH1MI}cblr(`T(gX)WiN!bic==KRR_DQY4bhBLW2ZiB6k=&#Z_S z@uJbT6YnQIEl!Nb6!PQ8<{J*{W;=d~Ndjl&}gY*Pn6M*CT=t>qlUh$vYL~rv}ux7EUam@+n@(1L#E>k z(zVp}LItW7S4pE^IP63zBD|&B3wmKS_xsad^np8Awbk)oCB2vKl%3k@OE|g}`pPQv zsi4>XQ@$O)ccoXk$M%@kZwYT`!O>lCq_2`5saN^6irFc?Ei8g^K9zLuAg{a&$rY;I zEcz=qLM8=WMB_bJEvh(RggI}t%leXt8Wlal-6SmYOB*PArU8mr*qs&g&-Fg+C$zyU z^sxLz)A2xA=-iE#!5<24{ge}fOtY1PWD?d-C+R5<0~LfKrvgny!A&Ca)G>`BtO)gL zlPLixC4r|Cb8CJj?3wuX$d_8WmvIh{Uq_oH!=vsB4bJU`;~v%?{O(MOXs_Y!((ok? z)3;E<+i5%REeaI6zZEMpb1x6Rkf_!BPp;d z0zu(fl1wTjRWMDJbJtfrKD7LnKlWh^Z zBKQ~cO4Je2vmLD}Sor{5*hUHD2bLM`_!ZkB*d0ItQZlvV9exTYR2(+n%ELKK4u|kT z`f?n0)RIpIYXh^4u4kO11k|XI!JPw31FPwYi|l{vQr4BPt|=}$$p80MRL#|oIaklM z&-2MjQOrnd4K3|*J8uTPMM{T$;q~LtaQQyQh?O*Rk2e7bh!?&7^~wE1x8#CB@u-Y& zKH0f89N8YxZ=gF)VAL3?f}Vy^odH=u#sbBH$Ehi2W*oVfP<7z2_tD<&fn#7Wq5OpS z69D);AC7t|Mh?po+``Q&|4yKYmdt~Nnpkm1L5j+Kg(|@1A$fuV{nj`W-}f1(hTeo+ z8pKpozMqWbU*qWSEc$&0tAL2~o`xo!B>%ii$RZ(pPl{8rcDt+81hyHcq77QAb3D{| zmqzw%069h43(f=lPr<#4ELQC`7s@o@#W|J@mpF}2o(AEDjJ8(gqr_LwerWsj;Z9e; zl>dEQ=ZP<(eVPqTKE2c@ydCzi;NT=tD{t5;!L94s)JZRo^N#3p_#>CEV5PoDnK{!T zrj9%w+QL=#5P{i&Rk7i@P=u@-&1n)v^^3Xwv=xVzzfC{<$oYO)SQgo=E}~?o=PX%J z)gQQSCdK@Tjt{9=1eGtUg*t76ts#!D!GE}5h+b2zbcGD8qD}#W7cSvF)vgG8xsuBt z1%sv5RS9hl+iK7oGm-?ga@OGE1lRWGSJ)rqKeU(3e7=2JbbTV%;rY|4^144)rrl6< z?4R6qrTxn7)}`SwUNs$2e=ev-Ezzv+`rS2&Ad7Rbkt$|#kzH=8h2zCy8BZX zMkdlN8S~_XiC+J&U;St9`a-2}Wea;%O&;Vj5$_=ZGeW_1A1t4w`!npUU-AC;iDxATgH05Lh9(3G{Y0>voP;8tP9pHJ zU;)*eQ4mD7u~A@eO$7cS}vmf2vBULfQy-2vX(kc3i+F0zC67> zzhuj&kfiweKYKc#^PDwc?{vH_RwpPqwIqaTE=i(R{5SvwN=g45UbhOMZ8Ad~!tRei zKk_T6hG1`529G$Bo+XjF?$`r_8rKEbk&nozYnBlu^b{{5v&yJSHo~jzIWK`~8)3E0 z=&C=~=YKYGGwKyHDN3}5tLH1f0;E_oOknN;Wy(Ll@^*QPI+mpTw#TmC-gXfsX@5pG zbbn*A(U)%o9ZaB`h`dxz&&1SU(+PVQxrIP=nMg{MoN5e z2Rxp(K?}QLaD)bQkQ+h&Dd#Q*Dt`Q_Qfhv3$cox2-u}DIS?Uhrfxl9DBtuc zwj-tJ$IhmWc_kJFw0vu+^MwW5{#hbqN=dFp0K^G$Z7>%HlC}eTcL6jx`26ib|035_ zdfG}OWdLK?=_z6@bwEc8nZXjG3kK&ylw8o;Og)0K6?#&)j4G5LN}kL)Gg|)w-Sa}G zsBvum{MP2hrwxC<&AL*^T_>gHVAO2wv22unh@p)UX~T)Yrz~yn5X9-I;XGKHA0kR4 zCXpi2T7Wi7yHBfOjv6`dP3$GQ)z0roWe8h^k-gBPesVZe3X)J@=f>R`9VhA|s0C2) zxB~3+0c>RQFcCZ#*?N>-R25YB^FM%UP1{D{sia?or{W=d6xIo5&3!zpQ(=KIoEoUv zziukRk^x{fBX})qB30H*AqoOEkwK{@t_q?Xmwk z?8QuQoYV2vO*J8y)?cQrdK0sWKKb$?nE!0yqtqXwU8a(E@isMfjYJ^>=! zZxZg}2fzqX6zHh3pLW~nVQ&pW1!00R`$;8Q883omos*y#x2<@;mA^8hj-Kej+=y>rKKp3OSOl2xF$sPyiil||7 zszOf*lR~l5)>SK-X5C+F)3|-*Xc041xZp)>2YESR5E&J0Woh;k*y*V_%my06;Kr~< z;M88A&rJK_lek-+LGbyZ>?)wddc`DD#U2{KPC@qYp`AT>Bjdkz1BAFCxXs8K!OouH zb3FH7PTQ#XIepKazO(ZmogMg<`g=9?(XY*omySjMd9ImpwUl*rWZ~6t3=YJIy0s?| zf#db$I*h`B87KA3Q!>gjjG5TVwEQVYesU;VFK+`#9b(|*1>i(cCkumne_A7zgWcCi z!KS=c!O=wfwm;sjE#JI0>0S9h_nY2)>$1n74-BUT@UxJ^fe!F5OMex#c*iX2+F9C5INSO`QWHW`TP5sY;oV@<=s#_B_`*9~3ahP>~V z`l=?5aG_$}Th7-pZfAJp1nbHtoNCFQGqnF)lMlTFrDuz%>&LEaaz4M2r4{$x4AH_j z`9#_SjZi)}Xy{Eid2r9N2!5R)<|TkvB=F1dT5tBQb#el|N}38R z{~^yRJ>B4IoXLJvZXHo)_qJ&IyK)YC&GN#|4o$I+SXXtnD^`A}L<$4@ag7uvtp@3$ zOOOVv=x{lbbe+rl@yPs#1+%Xgxgc>K@yxcG z&FMvngXhxvw3AZ&`@Q$f-~aP1&|kWcX@+Y=4EOV_l~_6*0V=;t627LaE4%%i^<(aT z6|XY31ynv{2j<>K+54BS+<f^-92+R1v6gK?`YhA;6Q5_9So}k6`F09Q+e0ouQ@(@-}_Tzkudt{cBjhe zt2!-Z+t1WFO&PmY*db);3pWF#P(ar~S=5HWyJvtajq3(cX}=U|>k6hKKMIVmnX3UV z3XLjmk18Invy7}q$tehJz&Q0sjPXFvx(zHU-!^J|#kxvoSG#7#txx#F%=*L3er0BZ z1a2)@K|N(El@GB#n5+&R%fW8)o|EVYhoA2sF7-WMemN`{ZAimBlTtlj@%gFPoql2y zE-hRugM;;?4MuWgKDgu&>2G4Yye|WQs!;awsa^NCpjA*KWd?-V&DTc^jG)mkH1e8 zbKbbifs!4ryDfJSZzDVcuD+MI{@FcNhiOwYH{#abgu`=L(geob@r0S`Z!_CgoSMt( z)vj>+(JD$@HAQW=$K*d=Mc9?^pnH!C<%MgSZgAG_Fxan zn61RxP|mzyq&`Sw59+6v8OLi-@>V%@GeTQ0W%%Ei8Y*gR&!uXNJ{!#NF?T|wBqxZ` z{@$e8GVjxMW9*_%;2v(!Q<#q7;XPVZP3E%vago`eFK4^^WQ&k46@}~Wcv;_?$OOWy z>1X4QJDf`j<9ljN1rGJBl$E~c)|wP5Dwm^{BZG#!+G$&>M9V`A^hr4Mmd;DGz-5t# zSJo~cm;5>ZRQ8NjF65O{L=t^JCBAxZrYLXKlgNM9F5CI^V(G+SOZ-iyi~rXD*3`9m zi4eay(hAUh#+i|8)h@7N46fjaRLcYN(q=aKEguZu*L0(XH?`nG>(nJLC#AVF`2I2+ zY9Bl(ZH04bL?F9s>W|R0U5$j{fAWT)-3+qCc)-ryA>Wzra%o(7uQ^ygk&UVhS*A;3QWo(${JvaG+s}mGXwHIVgF!``_6E@eii2wFcR#@iM!LAoHGCMf znWJ@F{DLw$In&fK!qnkrAbls2!q6TD{Wg14&(%P_a=-0&Rd^ zZ*3f3)DF)vPCj5hk461&xEtLf-JUl+;+ zR^{2b>J+P!v0sPaol%9|@aZd;0hSK6ot12D5mc#@Z*3DSOcvevwkIJ7m1{^l#gb!X zjYX}EYaIQL9N5)V$Y0%>oCJaS+^Dz(<&|FfoByJR1xk+bQd{wPngr;Kf}RRBDnQVV zrj?epsw$!8wbi4oZ#z|zs2##u*#PbAi{kqSiMVoH6}m;Q3%}rCgiX;Sci1}f2fo9tHkczvW`-eweH+P15 z^-Y^&p~T1B_tdKIw`qSrm8FsjG*Rw`!;5}>`6bSJ)BEdxFaB{)_b*?z{^z%Av1&D- zWV$p>TpL~)xg;rWAwa*rch75~zydq*kKN})HqYQxCGO2_D?ijCOF(wijk=Uh<_oS@ zl1Mbd;jXo-z^T4iGn6o;q5Y>F$ESQ7sI-#x~T^P}XiKeP*Ij7-c5A3&M3Zc3<7 zp-u<}K|q;?3wh)qOyO*l(bqSCq`g5WNWRxqRKy4)a^M9%#u#mUd(eMW3Am9m* z?X`Ng^B~Nux&8m#0MJ(s6klMAS?|Q|8Y#rnqATr>Ul(rElndL2>8nbY+Bnh}nGLm8A|Ud0oC`e(Ps8MM$+^iFIPgaWM?hOkh}pKzgKQXqiOCrpsQ^yKiU@ zm+DAix=P+ty;?}zZheIbY@b^y*9ii=?GlHSYbb*Aii4k%<8$6^oV)qb@S&(N?^xT6 zeUXEPfbRofyvj+%H)%Ure#L7XE*W3N@o0=%VTrwuLwVkLI8<(Oh<&Gy=oj!JdoYUj z5j4I4fay9QWExtLQX<}Z_ZYyHd}O6srnyhqDI;wYSTU|zy|5CXUJwKjhM`&9KWBEk zMD_rHyyH0017!$ABtov%MVmOD0h`llFIy-a0HgNx;t)?p;PY1?@*8;M*f7<}=em$$ zRV5`UHy^KhVQmr7=JN6={8E6t31da9UTLM$)P?|O)DF&aO0?uy{5{g+Qp31ioKLY>H8mtkOb^gWow;2q02Tup2w{)!m-qk+a2y1eo-n| zYZi$L9Xg4>!d{SST|3I*MN)#$;`qj>Mcd~`N@5E^O2~~cY$XjT!lNY@pot`! z%zkIAh~imLn2loaCVh*W#5i^An4W-q0YZ!mQP8E8UPdU;3reVwVZ|J?jJ1$|jVUvv zEBwIIoA!`5YhilR)`yf*46-f6f{A{FQi6ltvvEr21{my40UTs;wZU3&603`=M!W5c zvBv^<1cJhk0g-_B9&(qa2TquI_wUfL2AqBKsmiIrKye6#5mC5-8FEKy-vdwraFBK5 zGQ8{XqyA}pX#aiPr_lBmcEG;igR6c$wc(?9y2inuooyU-muuf70-KEK#u0Ln~kYGSi`qsGe6ab z;iUeo`2s|sA5rODDZ>iCkS;Pp$GJcq%oVlJgD2vJF=ho}EvCC+(6)m{YV^2*6--Gu zaezU`o??H>dc@>W9>ZI5z(I;9pc4FQTc+$`w>IfSMXpVfWQmnM?c$9v|7{vobbnN- zp{H+b{x7HIvx}tH{$2E8O#?Sh9x}kRhgM}fgghqu3Y9JJrO!C-UWmI_;96$n;eR8v zyFso-L1+-?Rz}6>2xBCSwW|n#7an;QXX$ZzJ5EQ$E<>s~oodpkDn+Ia$bfHKutfGP>0a`^aDx$?q&Wo#j-K3B~hRhd%uaA|5WrsQaq zVib-Ssr^j81UgS@gfR_e1#SCEnKDva9*2-pJA5Z;->&I3p`GAIe~ zvlfBY6LEGTaL1HbO?>RQ&BYAQiuoCd1P9n=gnz%eVcD#~xmpch(2TPwU;SC-UHUZ@YT4(Fcx zOz89DA^u4p)8Pa2uPP0Kg5^;Skn+H0+QN2=s{P75?tC{Wz!S> z$?3QB@0~8Ri<-~9r#vI-rkqwO1DzwT&60f=VzZvN_J^K!P;A8qy+GL#5m+FQzwnW9&#u^ zq|TB`iiT9oc2(R29M@hZ7UGn4JOU{TW|Vn}b~q1taw0aA5kwL@qyqOdz1i}o4&Qj8{+;Te{Dxi_V-(duNHSVpJgeZW<80Lh6s|m-#6)%pFu~OV| zqKh^)@zaXyr;>jgS6LFz)?epUTwmyU(c{-xrxum_Kj6zMg`EbplPVw1vX~ z!GO(Ry60~@@6RmgFtWb%?mC>v@r&RX2D%JP|5Byuqr!25mp^{dcG}KmTka(`-kVc} z57z_ap?m9|H7iDeFn6`rD4cDkWShua{8E3AY>yJ|PN+~)!+-VCUhx_|>1+bH{0f2t zC@3LZBZ^6yp-PwOSgG1uhI`{_aq_ti`D%#?q)PG15P&LtwEMP*Y`)KkdPL4#;NKPp z(?Ee9Hd?798nvGmjv9gj5cnMl7Sm#)XnW}Lj-|bZ_+da~hD4ok0E(Y*2RV%(6h4`v z1%zmMBz?Xx_@)6k?u6HE@lthyPVI`2^&lVF?KpA94ye~Th@c*& zNC?N$R4myPxHN}|wfoa$PVvZ#w_onRJ(44VghN`U!qYr=3Q2kzTdH zvKr=TAPlEOq7SnQDXDD>)AGTBSK-#%Ks!sgUBU3Oq}W=gzQR}1tS#3a*Y|y|?khX4 zxV{u@-u#r-GwaIBK0U9BtV^kT)!hHS4{XQXgzB>HvW2=4!R?GGcSe?XlIy>#B8?Ss zDf1GBo7)~W0|-tMzNqYBmlaO}_;Uz{R-oiLx6|Rx?fXd=a^ef-?0o%V*Q-h5MGJO+ zkx;akK0N^Eemd-s34PsKH2bQGup=|Hc<@PpK!nayZ?F$uY-+MoulUi0>U04_VTcPd6 zm7J$DI$~q~^t(Fr(0Zyrkz1bMb^{7ocBSTCAuD-3d+e(7oo^exOl@xbb~qI~y0C}r ze!J}E^WTfH?Zp)&8(v}jy?OvY;y&rfpPh|w%%06(J$AF704T91CB0~a)fh9;U-sZ_^z}bdg(BsjOv6v*DCU?yjFHi zI#jX$b9fMjP*(#p^JI|wlSc_Oa;kt*w29s#NBSEYAEtjmS5mCd-{|S{H49%CJ_l{F z*rKm9S+x%roVc$4((6b$_zQ{E{UFzJxbJ31pF1$p`>gp}pvB9n_D#?>F9)qEsk3|` zP&Ri$&3+^2{2|YpQ2P{@$4H{1%{W6^k>-Vi=q+VtUgl#rK!&Hbn$4isff=jrp83sR zrgOGk0MHowzKp^e92uRryW_oQ==-I?tLkz9pj4Syp=NJ{Ksr!MR>um}>a~j4c2J0& zmGZA#Kc(1kvbmGKaAzkh%r7rHP(JnTeiqj3XjDz_?9{CTa_Gp3%i#GFhwA_WI$dr% zq_8#iUSeOg!@(r#EM4~Sg+tdDsORrmp^oDd4~^OB`ES2cGC2H8zizpwGR;~$J7ZV= z8^>qO@f$N7B=m)&<~e3F`!Ds}_AgmG^WrVE8IFRbL+KS1W7~g|X=@)|cHFo6PELHe z{Em={gQGx?39>K5r{=E_{P~+`Z_70LKTk3x|=3k@bmV-IZZWq-v7@lUxF^|y}qiTFQ!8M{4w*tD1LcXq^p1A z(v>L%>U*hw^{HYqK4`!g9hqPPONZoXB69W{d3G4_rm7_q+Y>ug&YhsJ0T%XvpzNdF zTpytDS=_YkiI z3#7{7Whf;|q_8VhCt_eR* zN{o|*o5VE@-lzTt|9**wl1O=L*UKHIW~?hSQ-1&>MP;eFFLSXn?#(X?|3{jK3?#Le z699fOid>!w+sNvU=$J04NR zYjb2F;lh~%;60M47&^OPZHjx;WE<&LvsO6&gg`nf_*%n1GtGhjVAJa{r^ord8n3l& zqeyG?>XEjTV0F(T*O)D=j|LTu>l#(6N1ra4&gH9eSy>^4#)A@>+4wPNx;27^%e(c% zaTKPVbCsHKxkJl$Tg{J`BZ`(=qzT-`@XePIL0J zs?eUSlTo>Cq_Z#?(K>~#6y3tGc$-OKG?8qin+!Vb9mVSwM?;a&`nA@V~-HG}`ct1a)H{s&^ zg;kf!B+ezqJvWeS&PT7D_;GX+cUr@vcv%_CdPaU}ClR2U9SwGKV^`Y6v;5T;mRTVh zikQ^?pYX}bpKno|5>6jei+ME-p9aAM#s(ccv#of?-EQyZJ3pSCAG^rStL@;_J3+laOxjN?k5Nqb&pK< zC72N|(HhZq*G(i)ACGV*q_ji>Fxx5Ykg3PjeNr5QYnQ3%@kmcu>amlnNl26U-s#iB zUcKk_=`2$NzJpfYwrBdZ)aw|{Ga3UIm^7a`*KOzZ=SQSOHYP9J;k<_Cw&i8ZQs=ny zb>&Er+<~+&hx0Mht*&#%f&cSu7OYPMUUJb}NBpHUT77P6!dT`aL;1h%Q&X}lUu>deVo&o7@P#94WLCu>9o ziAORG(4X@2T|=ewdNSi&+AO1D1sV6f7GFuJ;0dOK1LA)7!`LU4OSSJgY7!+unnK#$ zoM6pt{>6+TJ3ZaI;7P#94Oldm@}a^lYaLdb^mc!GCHTKS(fo{$uEjK}^Me<#cX+=U zwg*z}qW?-;eC7J1-#m^4IeB~3EOgu6KYj0lqpO3y6|p|ublK3kV8PNgBU^rAZu)R5 zj+Xen(UHXNeEoR2hJwA?n&5>@DA3l|^q^!y$=3i<3UDf|+GZ|E!)w2^wlF+;UV&qp_u@vTNb*%Y+YjUZE^p&$bpG zI9_Ceu3pB7%`oVr8BTd3>h^28S0U2*AW|C1LyOY54{vik)6&rfVjdzDl&5MjO5qUn z9cztEDNJxr0xM7r@?NWFW4Y2r`h@d;E zf~tKDrl5&D&OVw=V@0i<*N~Mxeh*%H*VE7a zy2d;0Qw{9Xi5b6l|JGv5#@zyMfypDlo@kV^PJi4jMyFt=;UxRf#Z+OZdUpB?yH*`p z&}~%Ea%53{dYruyS1|H1>Ku7jX^_m7(AgmB>n*P5H>&;?shgRIepT98O~HbE4_YY~ zXw|c` zQ9Wn#>79->8*dSHm7DSKF82TuOKK^KcrD&J?f1iXw^`OUzew5nev`fHux)_nt)E$Q zm4a=deKp)MwaRg{zukGXv3dT+CfbQldrp=30O^OV6$XIOf+Q+~jUY{Kf}AJbQOg}@ z^AcXz)k-Jv3pJqiGHp2z#mF?{3d;8;@YIpv4UsoV_J{SH%1*r>Xm*8sWdMj`00nqT zQ*$QkPu|**pOfe8WGfJ9dEEOI3NLO8zQ!LZeU-kTy@LoL)m3@;g6vT<2J z-=Rms$h^AU&nD_d%0e#o)&?tGr2{r-#m3E<$JSHXqqS?d7}YT^llxkp&wa=W)_gUz zeH?eNGbfY3{x{Fk3|()^u*}_SVarMUao|~aFE?q3xE%qIlA(tsTBV@y87nSd$8nLb zBl~Ej#}=b7EESR~7R6I~Xt2CY>Ealu`llZNVo$9H0ND#ro(}+~o`eBsoGdj#Jmd_p z90wWoIGD54K=u?_RU}K?-2{L~gSaXpTFkX4n0CHqSSV7^y|hH{HAfU?=e&5%B)&6O zHa%khppVtg@V9<)C}LK|$OhrdLvPQl-ZH5Adf~6m=5tR5;#A*_?_R+NE~E6_eZN!p zBck@CpkR_UUU^|3Ud-o@ABdn4ZxhzYoOq^*~jaN8cWjXl9-%1KgGnnyfN+Vf-=^jwEZ)( zKWh%rlJ~g`#zk~LZ+;g4eprQY1Xd$C&F>$5*lHi$pZuNWhV*1iZcHtdg zRNOc|!=j~(v)R66LVpF=km3eQAX{J&MW#Omy4VuoDhHH)*{}nz%ljT4a8f`C0iLA; zl&7gMG2QjmZyuXTeF_#n0?FS2bP&JvY!`D7Bgl{z?s@%-D~rI;zEu+#>FacNjLdn^ zJv$^W%1fEFAYpf6H`xBnIRthZe8wI#u!t{(shsL@a)Ke)6ROfSIyqz*!WVQMY1B_x zeHQI>yR{&wN>6M_K9Bd%MYxNwGI~Nb?I|4k9JVTc5&AHv`QGyzZ=bv0V!2A0E)53? zUb6aH26)E-(;3_%YbklPGxfoXZDa;86`$ctql-X6bd`6#*m@-pK3h*>&9jZJnq1#C zwNUI5-o_bpQB4KN3C{*MKHwS+&V`;XLk4FOlZ%PD<1UnOeXZ~RQFQL{Oz(dj|L%UD z&2{c`$(ibXh1wf4GPP_ zt8S08KrhmtEaZqW6@w?tJA`4gMO3KlM=7O+hdP*V$6tfdPf{oGS02GBGBO$A6&rpYVGkO=gS3b{oF05<@@ zgE;&y!Zbr+j7#VS2xAbPIR?=ILYQ+SYn6JKD0odGfk_B`{U`iB(^c6}3>AYqi;&n( zh!1U`K#P$ap7R%T^rJdHv(od-5LLocp5F@Cyp=5=6t$h8^f~p*4;t*^4V>R+Wq)+d z9^bG(L&g92GVEpQ=)jWk}$#C=>$2m2@Y`sXt#rL z-HZa{M?|+a;}{jdQ{jtpg71KqTLdANY2fMV5bKJfLk)&NKi)t=_LZsaTuloXh0DRj4o?1Y>;_?Br$C@Q^`ce_KL5y#l}d*(Gyo=(%sE=hLeljzc3l-)F9|qtchHr zG<+!9wX@xMgbvSCngiu1W=y>ifx#jqs~~6+T^f`M@1uT|sHRKQ#07*7QyY38HI})$ zK^K{g#4O@TiCoo02t{7h;_2cO25K)#T5bEaNepT{#D)XgQj~?lLk$KP?!0V|paBn( zkIxBok?4x%2ERQ~6*QeOoX%bky}Cdi1A4I`r7B>cwWfywU@8DzlR1YmSYudSC{~#5 z9(!dW>&mJ4CLI?;w{@4H*kO_<}IAl}bZx;Zw zpUzZMhW0x5iyN~X@GjRnrx4=0jX8tQXC*eVuJ&yClKOz3Xzibp-XDR7?HI)AV27`+ z#Gev_0kKRQ?bi|kR=(j)4sGucAXNSSO4j-7xJq2Qs(%P?(yAH@Q9JQTmBZ{^ zZBSjeu$&kNi9W43)>6j;gECg#2!0eTn51MISru;I=+P2>$-3_)Pq0WJ7ls+A0 z(7T(VH__J`$Cb>AZ?fDxLgAWm6J6>IP`v*GT+LM!bJ8sSp2>9I(TbAkq^0G@i@68A zn1D77O3#LE4vilTd)N5FLN#X7XEPT=s>_$h2=dRxCxc?5XfD5IJX#PM&EcwbBx|QL zwLyqUXS-a`Qxkvsc4p(H2`_m@Kqd82V|pg%eUqt)hu%iizg{rP9I&o~7s*-O^l*=( zO~t?bP&$IMmJ`t4b*^f+Sex$hST$44O^hhFjz|08rObU3E|wO3+fJN;QwVA_QCeGY z!7aw~B&L#+gzDr%h?+gP|3&`LGY>d}dV`wx0h;`c;sV46;!(iwYrDsic%L8rlU zbx0t+J*s8lVgtH5bZ0rBHio*)$(xU|bq+f>9Z}secXoxVHHP?_`HmjW_14|BiY_^g%lhe(7U3#}asX}6)iOPoC<7J5 zu|Bh6?aF!G%6V;hci)7BDvsIsH{?=Hwru}7qQL_bdnrg`^oerRk2189$d8@5Ca)A3c2|Ms?5?=fsLq2JaPs z9}^S6X9TfIvz*`T37LGQOC95`rvZfb0BM@Z#53T^3`jbNqBE$q$X$ObqPiK{a#W=< zBXtMJ>A5ugjfI;4@O&_4KrVN5Fo+%EbJ5g(XIx-trze94=MMxPf4vZPZcg=c-=Vj2 z`se58dA5hqi#)OCt3tcW;Ko3ZMC-HP26F(j_W#+ra?gUS`uCW3?17gp`qWG{SejN7 zS0no~*7LCR+@_jE1c|dz>xs|fJ$46o*d4U*%lEe{j7&59De?a;i9XCl#fnihiKy+{5pj?af;V_1cGekMB|E{*f=1JSpJ#{M|ZO3YDR6QKBeI4|NxK~CYxiRUK zQShpMYs6pOF+Ic8f^I!}-!`6$L3c*W@2qP|nFYUL2bEl+6fLMSH~$Z0@nQgY@s zL+QOa)9I6LH{|Ab4TnH6ei9%Ez%&{}*^MjuB++V#M!g3JQm)Q~{0Vop5v^*A7{BZP z+j%XUtzl0{m*}P+v0-%Ge<3lUyEqg201gm80Pgy8uuQKMf=6&HTP`^c2=o!IB(oEF zof3&=@4vGj_am0{+m}u1>U5eAe8BPtV|HJ^4fylCUbjO(Y;?A>(XRW-H`@a}N3Q(w zBPVbD$JccWB51K3Dng7?1mYddh-}XIucFjb3?OI8u$@qp_#Vv({*Uc%;;k9im!{qfs~~*<2pc3yL2c1lZ9YtP!_{W(Kg&0%DXsvl zL)=DmdABFaHprEXXFy0|UAim0=hxY9J>FxU9wv)kp?jt_xlNhgl&Sp8uiqz(o|ucD zIMK6wIjdr6d6(_WQGzpo;heyPGBA^Xf%5oW?ujAYH3!c-HGUbJZPrwF%sJQFWREhh zoX%(}%%B+Ck?;p1d%U7|e2eV9qc@RhG7bG=fB(z2ZPNdo%=>>(f&WmGjI#6Cqw7+v z!32ar{XbBhka%*Ul#=y)|HSi41fWZ8QlgeFXN!Pp)7+cRt~M61)HQKR&uwoGXgB~c zWN|NAUflO9agf&&h_i(5xwW=tBq0Q@s-{X~gKRDgg~W?UiuI#fx@n1X&7awlOt_Xd zClmxDA64Oq4s&x<rbK+be~uBa9~2L_B((-WSw zaQ`zSggT_W8@zqN-Zv~HV z+D@hV>Sfb&2kL@5lMcS-FbgmCYva zfiuvYw>>eFPb06CT-vz9k^bsSB(ZL5I+6f|3iTf#RrDB>&+ws^LsHZXpCtg#*{}z` zKHZ(&k3f=ASR@7;$hv^}<`goRxbOF`v9JGk1S-a2Ah4ur)5bGIHplpKdIakVa_g#R zGu%x~>PNW+Ei{uX9A>yAORCTfpTVwIjqClfVBS8Whto=VygR}2*qo%jMtiAhwC&`| zs~EYv9_CXlo}lV`Jy8BqOLSTW$M{T*=2hJ>hyDijP)>8>=*GL#Db&!!_JcNl9V;@J z5gY^OGd}YBb~5YNB5WBK=VX~U3(=!4thGQ4UB{YGmd5G#U|Y>kZtVG0fa zw1h$t@VK9-A_b_f4#c=jzW@-T6_9)ZS_>&@QFj#seK@KaM~}2Ix|@AwONJ*E0M<C9%YRE7dL$Qa#OxS6 zyp!6!9e(>AeakXBs#KRAPhHQ1ILO3zKG8ElI5KFZuP|q@oRN72<>{lSy&~@Sa!N|8 zCA$OrYRS=yb34JUI_-65w4&$Oxtf)j1~?(TO{@>p2C|q~^K5BwN#^)1)8wiVk!zii zz4w5UOUsBEF-e3EQ=M-;|5oI7Iaw=ZBGaSh*xQVRhqss8ahLC(Y56te!1=CE%+)P? zEHvBD7&KNS_b|P)Dz-oM{$1VegM5s&tQ98M#Iny)v=|cP)SRpXj@KMt-zUY7ADks_ z1=`=0eEBYV7q1QinMiN@HxB>}nVK z?~*FlrZ$Bbn7xGg)Zb3+6p-sggXI2km3FO7sXIH=ipOV(=>u7ot*`yIW*DB&d}>8Y z@ucYXN@+ts!%6<}aJ{6@C6DliQA0aw>_w$cn>7ya$Y}ym)>7`&O0K=r>z$WR`Cph? zEIN^`F(HV}%3SzY?t&d6!W_iB7$v~GEgMSmayPdW1z3MLD~iHd9&L_1Qev9i{u4VTTh^2NEy^U`yKi4>;9W|4bK+cq_TVILtO0?DP#*!OWZI7z$#aG#=INE`8vR~;FbMu5l>mw#L72i6{Jo>NQKgV z{cjweo8WKaarS_Als}Y{tk~w_RV^iVWq5a!1C72bl(%Mf(>cM)e(UO!(h; z!XLC}hZ5CaAJMAJL4~{rP@B`g-4qrNxV9LsV+*RhnkY75Zl%*xhuE%~*OuH9>ilrp zQG#Zbp;t96i5FfMedTAquk!pifcu&yFPZ}CE9J)&pOkQgV3|w$MiO^1%Hs+yR&MB~ z6~xe?y@48T2!IBvI>jr@v`Q4ur9CA20QfXgK-Cr&Z1x~iM$ewRq?s}l6%`;@oonOU=E!ZEiG=Y6oG~-EzJBa6i zfVLbaTEMt;@#EKh0=UbOWR6+AyWq4I0!mD!-dBCk#?g5VL=9;+bI7_?Z4NM9=MQgr zYot6N%F=XU7W?=!tIZ9uf!_d(5&BRJO|HJ^@0Z&>!^xw(zE)dnt2*)7P$Ot*+xCu| z&ho<_bx=$_XHHDcj$9gY{?^|AbaB94Q|rr(<3;y`hwdh5YDkBAisq&7r0bES52`n! zM+TNdwEiAij$X2pIsBdBtMm4a=x6!m-epV1;XiNRzHxa-?IRQNGKTT#(Q%dKnm>2n zC4=nDM^y~2_c{Rr^(_Qpb~UB?QUIj`Fvv~U_{MlB={PC7CYb$!G9kn`@*c~tTk?#$ zWp_L2QQ@T4b{Q39S9V&wDR(oZCU`!!#IvLWZ<#k|yaTnv#f|Dy*GSR=kC$YS%Rk7VZ?kZVxFLBvGGhbMB&F3JK%xcnq?OE+i*X zVX+|8QHBA+k+avy)dFC|2o)^8%6-T6&89^8s)EKVQVZ4dAba@>i{{Yxd5pCPEb0m=Z^` z(GH(6myPu&J|e%@`z9I4#)>pIF|gK_q($~&fT>W+RyZNRZo>q(h#+`Rlzgi@#n8C` zDpzvV`$(io0a;E-3ob#%iscwtSOL4)kHK=-T^x#)y&M8tluxddAr}FO{OSP;K#}6d zCuEpx8LCD=sD<}pTe2xLI#m+YN};lMxVop%T73p(Ex>3nQ6rJ0IRQ+Ant_FoehB0$ z+ABx4?MEsk2{_L2H3)4oHIuCqovrce;m&a2sykDoEgW+PlrvbbuU#)MQUW$N!&8cX zU#zauPDt?}u+{b1YXa|uuoauEsyRm6nacM-OeL4_)jRjc6Nh{o zScQB)M#ZR zcbjVN8=n=koMiJqZPafIofT{?6ps3-1!~Fs)gwh3HF>v^M7x;_C7xILrwqojS^x1^ zqlr{M0h>I{fF-eQzY3s8b{l2cFPaU3vu8%eKGs~tB(bxg;p&N8va<{cozcpO?JDxY#vgIlz2SpHq6=rphF`{gTpy&OU* z3s-J~e;$8ihUo01U1zDB^|SmIVR_xTN3r&5I8{^drEP5YAGk5ktE?M-GYcbKG&WIH zNcPktRZ=r7Vu|BL02Bl)1jKKGlQjT3k%|IA+_LOY{0xj&ypzY(pv>&8f{+^hc1C|{ z4#*v=rF!p+REBcNJRv!hYH}kdS(Kk_!X}0;S#RW#b%guf^^`5j)lV#6bsYBGvS|w( z^>XUy@3;RzJq3$aV!NstAOGZ6=Paidnx~>S?fRhXR4P&DL&CWbik8qf4|{BSZfLqZ zqGvVYLCO(z+sJ-v_4DnTS7&r%fpbmdE;QX9Ysx}59Q8Fju57h_d?c|h8o;hI6e3EX zNx=Qk>xdVh-^(Na`xedGH^)lk61AV zT?Cj2W&sJ#NIYW&Kx*~jT6rcZebrHO$V3oRBSy**%Nc@$0vV>_4`e#q<$SGtCH@_| zt|t|v|BC{+t#0m5`-zF_6G>WQ5uE>?*GvbzzPhCPsUlg;-Zx8+0?w$Nl90275vTml z7~W#J+N*cYu=Z9Su}srcwDq%8(6-umroQFNw?i7JINw1OI^?^r;hur9F6;w zJ_uKK5Ut4$eS7YM_#;yyoC41MeNQjzez{YrUWKN;I%7>unMrOOA1b)AwNnwfUw!w7 za=!;NEbZqJF`FX~Qc`oUXrC@S=XlHGq8WFr{a{R2X8Ne=a@WsmzNbbB6yZ)!l|#u^ z*zvy%;_U2W0sPhX#8|PxysSJ(j*6lF8!ABX&%<02L#sGM>{$9{5O;#QrA=h&5NCj^ zIHE0`6f!V2w~3YBo-p4%F|!mS1iZx?NjXa*jMQ;$5dS-pKm zbv(On-7+J#A9?cA`KE z7M$;MRs_qHgAKmMFJ{Q#(K3`bh}s1!`+m#JMFw4CljDVQqR!t?yInR86+5QtUH?^E zO_e|IlwJGIu&dWuF+Wx#CL7gNa&Jrgo^fxfJvVO^s*45hzh#$XnzQUiwy9KwrdQfw z8(4Gl@ayB56Gqr`b|>o@c5v>YGxG+8hhqviI@Z4H0cDOsvPz@`=SW2%!xg?3vA#V#$ zTwE7uRY_ZDC*qdB%NZ{y5)7R!!;s2xhT(fm+8`WYYeG0FRX}b^!M&YUb@az23RT_0 zq31y57sp$-x|~$-J87R8l9tJM`<*O}Q3lvZMF^>htpJ5+C9x+s1hYNfLKw@O$o_~4KriInw*=CA^ECR8g(9Z(vu1wHoEKrisrqK3iSc5MbwoW^gHFu=gvu=TSFtj54kQPk zdfsE>6z5!@aPRw}F!hvQFOJ^*>)6)08T6OM^2vmZhld>>7GC=IV9+gJ`I)2x+T(FoS;Q`B>H&_gflIO`t+%mi~6Eax*JRl8Gj2#isJb5_!B<(ZZ8N zWhfy53fvQ<0WUs(<9CMC!qiSXDEJL;dKfo6S!In|l`U zRWI0~JsC?sKE0$x^^a7at@9-6Z%(P`KHWNh!9jd% zKFu|z>%y)0T4#q=v9c?&q1-fTk}vFgk5@B_WJKfz2pnxb0zaOD@_$#eI*YSYY@0KQ zx9UwBrrN{M4#(_y447{{GaclZP$T_r1{XSk&4D|7AhcSId{gV#080fOilYTd$i!KO z-HQFpphB}@I)^fS_+luC#gPJNBv~W3%6MDa`))8vK~_D))1fl_rZ$J>b=`bB;#7 zjOd(WL3~w1*%AygVx3>%ubD`Fg;1k5Gl^WZbsp6&QRqQbaWM)(;5qK$Ny^hmT{y{* zA*fP-PlCj(iSeJ_brHq=4tM4wf)Hxw$Sz5U<0Ad(zoTrzY z&g=(BCQ>q7VR9hM3r!V|U%{o4wqCI zaPX}4utT{|yM+2og{NQUP!wc*Zo@-be0!&=7U$Q4$Z+1%p!3CV24m~GHh+A+)3%d7 zqg-l8ItDGfaERYlPFV?-myyw!2qo)+{qrG_PRMK5wN-Gv3oUa!j;;$*Vi45og*V?y8n*TbfZk}_)fhZ~iNw%aN= z;?dYwPlnc2#{*Xt`5CD@udPn|p)pR<@+w0^Qs0Ezs>TX}VF4x3wG|T*c?WVfoV)qN zz3@dx$^h9TG^Zi!aoB9i@poPCatkAGeRx~@0Isq!qVDgudnGESH-Z7hcs%ZT6$Ob= zHIGtxF}MNQxlbzgQAT#ZhDJN%xu`70)X=sAE;_2liFQwy_?!vr%rCPI@zoVA(T`4S3EaO zigG9yUAUMaBbYX;Aj+ZK3P+un8=f9T|7=vh0<@S`j*2TCBQId-peHI)2`+CTE@t>d zJ1YTRH+L>zS{I@n3zuE61a-$UH*|ITlKLnY-ZnEvi zrGpfn=V8+oNjNj@k_Y?V1(BGMi?sYmzf!gbXG!j8rVbRc%5%ZH?CvjeBW(DmB)mYVaFRBl zJ-|X9ERYnOl=h*{#i1uZ2yh4HTqz^$?e6Iedr~YHA0!ghZugDFn$%HTcPoA~WE=Y8 z^1r!QdwiQq^v8owAQmg2(nW^xU#<-VN_&w95GllKm%4gPSTJ371g}Nz&{w;iSrqZk z@?~v(c<%;#&gT!OQ<+6?RlA?Ye-%PcT>wnZ*K-YmAaDz_o-4K$Pd4byy%`)8_ z8rNuZV0AvxdM_7Zau#OAk-UJ~@;uM3@7i75^mtpeJG$C7lN4H9q33OH6ZxWu#1>~! z#XxTOH;BSc5?B?)fFd4%Ms7zKTP}UC*_;`5MxpY`H@gjG5JJ=C*wuVr6O!)Qz?CdD z(l~+2#76+{gym-s1nGO>KV2=v?gh9zXC9~rqMJP?1*hGp5_~ zT`-=v7+3k$aZBIJj5Nb_!`of}q=r{Th-G^zhAE~b9HA+81H!UPi3eeEx0H6`POm*X z5VHw8RkWv0L-P{u_JFN@&#wme@|ME5P4nksNwYAktx)wO9G1m)8M*}IB(-v~HxQ0` zW+!+ftD!rBsyQRu05|Hg7;fSz3EL%NYNv9z#6$@(w7U&=0z~n*&gx3@m8|q1Jip%d zvNxBtF)-mpQG9xTXih;nJzkijB?D0M^te*Z3Y^N8-AA$j=yEVysIpVs7<|R)pOKoJ z&1?A@YkNX&2JWhXF{>w&AFL*kMN(iktBrgOOUa%9}ge4fkZVeSdhrDQgeHj5m0lyEL05 za;-O1*$Wx|V3#OCsCCgYRjlM@c(#65_2j^Y+FuwSS=v&xs4x3$N~ix6lD%!+R!D%$ zK^yYcZaFT<0181HPfBqAXOJR=2yIUtbnqmUbXN{vCbY9`dvc7iXK*l~SbH1%J;F3n>q*IG*b6vkCk_6}UA0 zu_f2c?YCl&ElhnxmS>SYzC*c^X~1I^2n&DDdF&l1Kzn&MUfW4n^w{*9VGg@`pZerj zSaRWk_uKdj=#4gJZ1w<`_|x8Mt!sZI8|iLz=)64G$H(0zWe8E&%bbvj7z8=jywp## z%1`fByZS5=Z4W|ORJ6-ebQ%YCgoWBA!oQGXJ6BMrr9SKZ0h~RkSS11C(q5a(|K}@l zy4g74`Iv5ykRyQU&>)U1^u7-Wxmn~Oh+@m&yQFY72W2&fI4#C!@ijL^r9h415Jp?| z$+_X_C-m565CP<+J(8Y>7>_v_2*mrdkhSh79qAAP2&rRWXZ}JLNb}4hPH~4)yU!tW zCQt1?A7L(l7BoSVZ$y;Dp5A!F;P)SDsBD7+T?P@xYHvBXN)U5_k2#cwxeemxI0QLu z;Vql^7R0%yg&bsY#SfHAZ*ItrEPP{+;<8akspz~IR6a+3t%pdH!aLc7F_5#o0b9et zJr((Nv2djVQs@WF^H}urH4coqj$RIosgncccEdN76K81|T`Z1JhRUL1J6VLMH1tk6 zBankqVE`}|9m_h=Z|^?%2QXl~HJt7~eiKlZLHw!lfgmChJjoUF3u)+6RJ1=U>d9GDLWrY()5 z$Y1y}e+L$BMoJ3Zj;>+h`k&#SuB7*7Wq+MN_Z}hG*AV7qf;}yR5TuYlNZ3vR!8-EH zTtm_=K4FnctOZdg!96c30+7;JtskbpOtucioZvSD&U%0*r|RimB$tF;7Xuh~`_w)F zQ*Da&22WmMAdJ4EM`_qR4w|=8N4>3j3`BFJ$Q0Uzf)1p~u?uFe67FKO(Oi z{P2gt5A8swh)ZtyxGGUvl>pZ?Nt~#<`21DD?Hq&yqDJKA;-k0OTc|#C&}GMrKU@H- zXA>T+aEe!sEuW2f!onRHh9v2SEo{Gh)W|f4gw7S$<}hH=qo`UE;RPEvMdmz$S1$62 zTPOr)6FF!X@oh2|Y8t1n!H$Y`$E~YE-|hd~nFtsH`72)$a}GtBVEzI0>+l!|%jbPTeU& zptQ3|m;)~$o|V~Os{&8TP-ilVllGy7QgrYvBt(judcMDbbnSg0c3j--&>dKNH9%{s zq})gm&4~LoS>hsRthYmGd59n>W`cngnBvdK(1o&_d9s`NVssAmWY03YaWvYx4$oX8@VgVO+^vEO^7F$ljaFU4 zHQ_!?DJ+SJZ#kD<-5~I02=dlAIXyG&dz2>I8D-ra*4MpD$dw*+sq!zel%ijd2we)62RMQ&9oq7OaCxT!758_Bd68}0UI(}Eq@NizL5BfMi7J7tfzWU#h%I|p=%gbl{DmwQT%`a z#p=21e*u-KgvwbPj3eDM&cAT`RcGky3-ddU>XQP`tSBU$!*}xWw*G`+R)e|S{rD}x zn@xA8&zwHr!N_aB*g}aK-`BPz#`Ut~-5+Xm0>ZSopo1Th76aM9R{t1zedxhSxu2Y0 zx>-aA;2cOHoA82Pg!(J&b$9WnrlM`3mt#Iq9K3Xu3=+e zQr8E%+32XfVd*l6_lB*NE_9rr=YCgZF|t?VGo)e9k$Fzrfb6F0`P7eB^z-=W@FGMx z`-mnRJ;uWB4(S}$xNunOVT#kk+2a>JY-t^@zE?`Z6s#zmn!aaShxgCGjnFclvxp@@ zqc6vA1&<7s@fxM>xVtjklHgLe7&pfscCBjb72v9Am;eFfT=EEVHR?_D^-!q#wV9FM zcYy7T(Q)>fku}VS$;4fsu*q2LMjwK%Q-A#@%*Bwv+llB}5_Tde=8?&OjZy|I9G%V& zfRW{ibOQtXV}0yNob<{1 z&%3Uo1e1Law}^OV?8$=4dxe%zbg{HhgvLF(Cq)~_9LIg&+%n(u^tqJq0&L(ECSKA~ zt8j{9df~2I!d10Q866tOSd4u3uF(X8=-LNS?=+gZuSJIBsyeco2-R7{sXDs{9qiHXpr#X1y5S zd*rTIVGbzH7WL-y^)rB%nH=041BZTvi4;Lj(y%3>Cmeo@!R?kDPdK1`&*Wd|$Jd>R zyOU;AsL$MF31MgvcmMvsW&d>Dnq(8E1cYz6sU%oinAu1bV|Kk2#Iy=x3ZH~-Yw{P<+v4oJ@YMTVWJEv2;5}S~M*tUiMadyTX~gSgEn;<^$umLmqM7p(k9*<(Pt#_Jz15KCX*X z(MUpf7hKCEp_ALj_XseTXb4SG;QHMsaqN17S5N*7k)fQWxOJ$30BT5s5_FtXLDUJV zR>>~~EMX`;w>2>MG%b#x@pk6&QftbVt}cE^LLR1yfit~_)6*VbB8_96MyNDM2i-`~ z&?v&%$Z1*Q_RI#2X+YyCkj8pHG#AFS#9m%mzw)4Uw3bs`47M+Rz(fpBpNVfLz3!V> z>#rF3U_RJ9DGH3;{IO(n+yf7~d4l^&DI`Fe8^poPt+Z7NrmnEDjiijqi^Ia8BaVNi z^BE{$*9vs~uHj}l(n|W_f*-E!;mkW3$rlwS2fBz+WUE(7r&`H`i+g_XWGkxLI$efG zE&uDeKO2lZPbx6`6Wb%0y|L@lk`&iNJrvE4viPp#k@N1>NzJ>DO|W%jG7I%YK=?R` z9y=wfAx%8k`6k5nIS8VLFUt4-JFgxG;^TUrogAw$BQA;~vStXO&rw#KB;`)2W^1Wthq7k7AHZCGPTz!jB)W39_v_`qu6#8;H?hA16pURQz5P#q zlXU4ekTJHev-qjt=%j$KC`zQ-pmha1tTf;1I=)>XzD3DHR}4r1AMs4>PzNCYk!PtV z$M5nn=zK%~KmJS?`UD^w#FCO_VlRJ@Ktms$x5&ELKXXQf6xhoWx?~rS_1&T-JRcUS zIqUvgB==$MHt_ge_G|sx(H*Z^qaH;)ZrSrFcxU2d$I*AgoJ#ZwH~BB}-VCcdv=Yl7V(2*JG{+%Nu6WgF7v zc2aRe0snx@X@rus8;>m5mZ4}FF;VHK=g!3*!z%NvXIyfzWUSke(#-uw9ql(8E%)1m zoJKvRkl0)Ze463VhqbskZg{nc4Jp0m)#eZZgNUt1TcID# zMlJ5VFY{}=5m5x7qs8Iw74B}E-aoX~-!$;>3Dy8^?L6VxU+kvkl}#Rm<|p2Cxw=?n z)91Wp)zi0DlB=Cqo!_YHHx%WDg3eYi*}|sW#Gw7V>?3xQC$@l@3XN1^p_gC;emksX zYD4)BQDstrsgWm`>8P`BV9W9>DYlnh;l@Mz=w_Jct4triUR&s*!c;8j zzkPRbTtjfg{^-W&tH&CRkebt87cQN-!7;%VUiDHY**WMJ&fc1GQ`#96{=(IK*F^@X zrUsg???ldOfFL@`;+h3CI+O}@Bl%)fY+u~RbM9-e zWgmVXe<}N#g6**?Uz@=os0ETF$WnG+UE3wF8!|`xf6zeH)t7^q@NW$Eut1rc^03dxQ0;^-cG0K?XUtF2C1L6zim; zyZQ3Q^F_}`=ei+qg}$aTJ}dEGMa_^Jz2jr<)zWmJBobD&>o)KDqesAQ{F$Py`)|Ew zS$x#V!V=&V0T=j4K`34}u;o!Vquzm-qd>$e9JL=ER- zI|C)i9q(-WYMUPIb&}-TPPv1aMA;z^hfH$q*_>EyVGve6bPdb`_%qur(yO|1UQW*6roTn>nqvga7pw6-3 z@|u-!V!ylN0GW`$bJZMgLg<~yb13m*M;%(2RNdMLAcD+~DXmMz*2lKR2K_CcT109mXi97)aRvNLgP!HP0 zZ8~N;4yat@BixGR)iYF>iabyrX9+-wymb&dM=S7H5uTB48fviq!2jGD?!V;q=W#a@ z*QEeJ`<26g`W$@rbJ19FPl%hL^5+tdL`!uC%&KRo2ws*#i#PXWeyDQ@# ztft_u1>5eat2C~1m(;nFYHR+n7@#i3z1X&>*NS!x(w<((VOqFqKJCsiUMN7h()&mu zR2Z9L$J;)~B*X`~YOabXH@`_p1(w;mhOQf2#tKPokwFHD#>BJAek90u+^5`ZTsxB3yoM~AKY${A$00KHI0KRRH;$v_?kI3bfY8dKO z#(TOBqR87^&g)yLJYU}q(w_a0&IH)D&YxWMx83vVZ*setw8d=SwF|tR#zbpx|Mr=H z!|&bht#0++b#Wo!$En@-KRpnVT>qL1Wp!a`5y+?ixLOhIP`s&;1uM)^;Sa$SKaaKu@ZX$NF= z$`CFLAb(H4s&BpW8ON4oYD|maP9T%uv+~~fKor71*VR%>$b0t@LWXf&wXoLk@>Pod zQdQE9@Cs_&?xvrYilljV)mc+PemyI=OSeQfrd-Z6!o$KkQL-#SVVhQNhUc7|%bci4ie~Z3(y%)Vbbl=ol zpR~@ZrAJ2L>SUfnw$?Y&p@Zho(G!d$ojak;K3mo9a4V9km>N6EW7Fs z$z9?6aRrYBpJ-^|g8vl(A%i_e4b~oDShSFL36ARCiYkof!E|QR-LEsf3pQs?ymbA3 z)LLg9Qd9?Xo!{Z>{;ZUXk1=_tx%-bn`+I&zW1_Zx8UQnBWn_GVTF&t{H;fIQ~ME{l@m%klXgpSx~U@N zQ@60cuI^R&EB}FMTkHmY{B&LNefus!!RdErXCVQAM+Vl>k2s7-p&U=D+5=4rF}c&K!+0G59eeI5lyfS#&Mo zjk;v=6s`CA*8{*iwaAy>Zk;r0UJWfc@w9>0~6NgpNZ_HyxZ#tlpyJ0u!hl12x=3r^# z5JM>BRXBV^s>qDZf_XxWyx_xV^W5Z4v(GRFI&0Svs97aYJ$emB&O1gyB~uV}O3nf$ z$6D7ro4xPVD1YT?dZK$araD`Vxh&jtiE42^IC*c$9~mL1 z8sN5tujPcV7$x^xTe`M!d0CR+wsLFTKYDoI)3^OQ3OQEjs$O^Bw$5}rG=F=6P0fwV z2l!{9dc84CDlSotN!0EzKh15hjylioB+u@_tO2h?vszM~pt1uOHH1E)f44g)b}si& z`o`(ccqKgkab?+~wNlKpqUTKQgem-eRtsyckVS>(4_Ks2kuwJO{LqMuYOmTawrrn! zY`nWsfTQSL-;x2cxz2g9r?2Uqb8M zty{UR4Gz_+bloz2e~pZ}_POM8S-s?w#_{RPq4T-UJ9@Dvt~Z)_)=cK+*f^L%&*k^@ zdY5;j@p-YLoO`^X;b!yxm28&Vn6H!KBM4z|x@e*qe^xm!VF($PpUaXKg@}+zm8djo z-a*>E^IDI>v=D}dw+tJe>WgA`b&WPOSQT!5{&~pcJ%c> z{2xW<9hKDo|MAO40mY3QM~Vw^Z^J!PG|Q0|ngdOZ)XcPT8KS6_JDlMv%QBo!W66+qTswzdwHGz#rVhKk&Zyp7(j(=kxJEEZAhM=?1vS%GwU~L1bkivY%te zIIa|am#goCc2cE&>SXD401N$F>WeO{9t#bnZFSfZurXUHmMH31UXy?C?@QrAr;+y? zQ84T4`sXFS`0FRfXtU;net@gE%ieE&wB0rgZ#R+h-*xiXmUsd$E$7ykf@wlWb)ZLa z&^{NvY*PX!gzo6#*mOA_k!EL z-kRM08KBix&>^_KHKJb}d8TVANr)%2xbVMea?T{OYc>3-7E^u~WAR;(! zde5Qhj4D`$?IqUw-AvcBCfR8Iv8b&bQKdd}Wph#a4fT$+nhl%VPQ7z6dHjN5n}#TS z29>(&aQWsN$V}Q;@s7#9u=~9WPldF}!bb117x(JdZs+(HNwz%`(&K8MA+LTzE^I4L z^Dk+7wjlhzz?hFa*Gh<`BT1FKg8z8RHtua-oiHB;cG}J+c*!L1p52qpJ$fyo|H5-R z^T{P`o)-b>y|^>vS+B(nz0acR{%mq9t2q4c4*obx3-{Ea+DcvEMf^af49PnSk8vVD zZ1zLjLw-3-Zt`|}z$YCW@;hXC|3j?w4)nR5D(JwIUp~E^ZM$#nlxzeohZctoDZ+$2 zgS|i+U!m`tUz~lmn7O{F_+wG28=uL$kx!nz|7FV++@=x12Z#2a|<&wV-m@FlYxzy zp^b%^vxS)h$=Jrq!o|kI+1A*EWNTw?<7jScXJhSRXXEN@=W6TX?&4%;>uh1=s-veC zV6Pr*rQ)l-X}k6&rnZrfqnW3jjklX!pqrhyt4pApi?_Y=Ru{KOXNM3=lT0!>#zb?Q zuL3ie5any)OSKF1wD<9F4fb*g32+JbaSIQ0kMwg0542|ZlTrd4a-$sfg_H9WZT6>Q z9Gv{f_JR6(TY_A@{9Jr#uD;oBo-SV7oIQeV{F8#bJp=uGBZ57*2l(&U8XUFN-`{`x zw!nxT+atDwg(pV_X2qDnG{ZM27Y0JnNEp_0vw? zGY$dABf_|8JIeQkw&!f&rl%AhU~uzuiVhyAK6IcqJGDD&U+4DN`+N7lOV4{4M!#v{ za!|6t&-LF5Y>+9sXSu@dzkW$XUnxcc%{H(6ZtghO)vHGA}XK8mYG9L<= zAC{uia`?G<#kq&87|f#BG)~D8PWKV^mptBd0jGddlg~MJfm7J`r z6;zhjb_n=q>TCL%YP*gVceR$CZ$H`BR@2{A-QQk6aG~MK`MN8;jhA}r20KeD1?^`$ zItIGhFZZ6lG%$3&yT7sRyrk^d+KI|ry~ieo&i%g7{jIhB-gw`;>DJz>6HVQtgX0gb z-CKV6@bT2Gdk-GJe!Q~w`qkT4?|;1e_TTqE8-F)8etcj5@%6)xr_+nqMvkx}&>O&K z^w`!E;s|HIqGIr*sO5$;T|cnVzh_y^l1|Q_9-1!oI}zu81iR4DK2^Rm@btIdj-cBX zmb*tA1Nu%^)j7n!YCJn0aQ~D$Cr#Zsf)v*3ej;E_Tz>eQ9* z$=f?)TSP3+TGJVekfyW+&m2rOd2f^8n|0C0#;bWxn3v5~rj+!+LM+6340VEFwR4!R zpf44eHAf?lx15|m_{y&mUk!>69dM^J> z<(^-;`rFO)XWl*wa*Ai2&Hp|G49BiY78;HQX^}JFMud2GWx@@)603&-SROb{*Ol`v zF4C5BPh3iFU@fUL+V&GF{OH9a(|+gqkeNNCim}H5o9YG;LFO0oG?|+|5AMi#84#(t z*J0fFX-vpSgjR^V$4{-CEsh7~V!WG*60>NhRbGa^x2u?|2@zaM%)MhW?o;dh5q>Im zUyZGw+WsXwzayb{Dv)YN0%juVh3cv>IjhMvHrmv6f~bSHpBEAp!Wkrjz^#3Wr**`! zp3oNm&Bs^&?5e^Z?YsLc(t2vX>1Udc{@fnab_I?I-t}1d-^`$A)lZc+ZjNvx!go0( zC00Ckb9&Yt>)%>v=%IdAYe#?JquH><{eCq{zL`$d7p0`&w@PueAj8=C$3?H=c1rd= z+L_%yg;4W^qvz>`OJbP3^?Na!eOzTlC{$!CFucE!tIK)+SwAKf)46(;YVkQ?6uwE% z5B&i0cwR#(WjVS2jPaX^9p_IER=34^H9xMcrdpI%=U$fU*rOC;7^Izh*C%&w? zc0b7>x;;#~d>u8R{SdSA`Qr!D2IrrrJkt6zJ$)LVjaIm+RQc^t<`oyMAp!ch6MmEn zU3g;0RSqN|uhn@Z=C&yxhVvL&9gO8OW*PnCYT5q`wW>8u_&IEpUVc{mBxo3S!QhZr zen-{$K%em@+THL4!y2DVh_=Webfc>;1oC;))KL9X!x#y)rEv9e%ckv-XTpO%>`07% zr&?!pwoVuPb4|v0R@x%r$R8x1_S~&UDgLzf%XgJdV6OzVFYr zJ@L3*^VHb(%Qv`1%!2G;k;?Ty?Qh6A^P~0kr){(BwxCl9&MKfxDHhXg~byoFA@=4y_ z5mOFSAOA`5D^yMzbT}%zBu#*LDu=+skH<6@H@>c9A5M$M{fP)jY@ZCh%)souz4UKd zd)qShrr*mESsK^yAG%dgPEd>f$Yr~rBybut20Q;P?HgHqPbMi#$SK3$o?}cio zN!XI=yL+fAivE==<+d$R&%&YSa^J4MElqXT! z9w-g32`pEnsMDJ+EuTaDJ>}vP^@-MDe(>#X^D7hagL=9_#9xa0zTh0}-%bbda;IL3 zZkg%53s3^Z_kYUR);|BNMH><|92)&XnjcZ`t4}i2CRLM|w<^7k*^;9&JQTN-!|^fd z(}t{g{E^iFwRWb#OZ?a1yaoBAwW`8@Bqnji9_8#c^k`~`jVGlEy+sq@FMp6hZ_sZ~ zUpBZGFv@GB=$*o+J_xb>!if2TJsxU(*0H)LwCi5U#-{xrFF*bfaUUazFL`yIGxsOJ z$8^!EYZatAFW<^K6(s2LsB+n+wF;o}*xk50Y3LnK^|Ak$1Tl(E``Ug!gWHo|g`e~} zqpQqA<%Pe|R)=pDEXSvhHKghq?-|XgCxy3PCSfC?4$syvC4A)aOHJ*2X0oI*z^xg- zk0h91yp?{wUAdEb%;n12WQ72wXP(+0#Y-;p5ci!-=svy~+yc!S+Dx$8mt79i`j3cm zF7CaMLh3Swca-u zy!cptW^pVDGJTfn=>H}`0|u$RUcK|@C~QOh)f2yl{B{qAam(jOyZ3XCLU72-bMaQMlP_0pn&O!)ZQ|dHyn*m6`Scn=we?hPBlsBnmK44NTE^W$ zvK(WPt=cbyb=ZHeIAPAw?~g$QWKt1{ZY-H}08XTMs!o(6Y|0>*=t;H$mBNC0FIwU0 zRD>)A9m~8Se=S`Y8NB}#dBvEWA%r~;4BpYhqI<6WY1-XxvK1Xyg=OgMoX4xQ^esi4 zF@uNsp$91%M;`k1#?(0d{p0W6xAos^J&CoQ>`k4*_lv#3d9B$Jc;5s8?P8S4YVLyR zx;~n)U8SNjhYNkZy)+)84;pN|rNZQ5iBJUw0N0QJ)$+hgQi{`^6uE4aIS7H1r1QWv z7X8!`R=^lHj#2m))Hs5@@dHGe(jgf0C-V1xa0sGwb+NfYk2`Gzrt#a~j|yxQ;YZZtu8FH2d3;sn&bs~$Y_ zr`(G)Qpw@>8?f$=oCB7XB=aV8jzD`7L@Ymy(Lro`Q0rd%p=?z1siqWv{VB1BerP<=g1 z5f330v$Pm2Ex|$JJfJ0kx6yGODY%6-_JRl-0zy=o+0wW`_sN4jKeIlKLd_!KWiTLsE<{QfVpLk2>)EWNz5$lyr}6&LHi+N}9tfkSu{%5f5sd*{4O! zcJc%2qkwufz@eC5U&J?5FK{72Nc_sv#Vk!UySWqS`MJS2yv%|DRi^|j0lJF9=Ogh2 z5DB10LEWL}TQW))D7b}kxg`-+8ru|1X7#?#mt&>}ZBO2+4v7o^l*%DNi>Omfq;KS* zkSOGJ9wM13bMxe({zVjV5u#3kB$Q+3=;%G43)7ii-}f2iwFL7jAX$<4onQx4)Il^0R4bU5~{%PAphiK<;kI{GmH{@r^bKYG+ZYDSP=lB zLEOdY8Gu8{!&wrVmc&?40=3^50xlTW*!$I36lLY*WMidKQ#x|juj~|j;Uy;K6t#9Y z1(^iY)>99KF_1)2dQ34cCxDT@7%{8pqHpJ+KW*;+>$pZ6k|N! zC8OejIt@?h6a1qt_R5i>J<%Xyb4qQL_|VXHNOdHDAWFL&kf#e-r!OD8u3n(B+F-O= z(MYPi_%q)ot+hZ zz~7vI{^C!*vr~J3bBSGam6=nU-LKNCY6VaxB$z5YUDxr1iJLWPvUJ4Vp`gfD8s$X* zIVKrPJ-wvvDqjvLiQs{RW2ZozG-oCiuR8R@sY#u9qCx=gvs~5a`BruHFs`@swJm4Jj3@mc3D3Xcnredy&HZc8b zvXmgqmGVO=<%l8PL4>%%;!&52MuGj$zO8(}t7kb^)eA1%WVxrEzxg)*R441!+e#rL zdy9*7aro9URWLj-@w;^PHvpl)Kq_v5iF-h8dY)GLNQm=G&5wDErig&o`Q7od{oM}0 z?w^2HJr>gX(0stnrb6 zo}DN-VidM*x8R@MHAk&If9Y>cziIh*8G-+tn*gHRjHG~dnQm!{2YhC!ExqY-;bFw-FBnxXa%u(NBoVyvCV;mby--@2(%lGwh4yX|C@ zBFvnUeC#tI2Po_^*p*G#nqhqIT&cp3M|--mwFxgjbzw4?@GH@E3JR=FGiL`oUb%BY231B00~z9!x*po2_r@h zChZyYt;3N!asDQ_yLdQuv30ei8;w7G+3z1b6{18zY>c5q!(zEhYv#~9JGQ1souil; zhD<*b=|_hT(QudWq1zo)=b%v&{c0kp*^5mwL@a^pab#N?!BX5adgF-va#ZW%+~g{~ zJQ6Y*xVo|D>Hr3FVNv$VQ`sw&zQI3}zSM&-diqw^HrI3gQ8N8eb=}sFfXVl5Ti%V& zW2)>^D(4%_y!r z>z)?#TUimKr7S_rjT7fxqsRL!J8xT7>2E0H86bCyng{;ez95BIZ8?Pguh;c`Zwehg zjvO9lu$tAfuitE&`ayUa4KXi=k-==gTWz7ScgtS(-A`-pbR2*9p8rL2BG7r_I;kY~ zSV`R@AV!29Slc&~ikp#4q(4D#ra{#3uuu?o>tTV~8o5;yYDt5@MUb8@ap^nUW%_nm zyD;!@^J=-wbRNc!iKt4)gun7RAcIi2dwgWg%o5%tDy% zPDdRUJ#-%GcMqBqlB#xewLjWOuRJw>_tx9HB&Y0qv4GEgB!f=uE@fJ~NN?P-=|DG_jU!-nhu8=4}ZNAw{ zxr%XY#7MgZpVak7ctiFVuJ!|mMuM_OG-su$Qmt}Wjs)H>EeL^-FcB(O>TeXo%f#5W za+u$fzQC*f4G!nYewEhzYi|5&H&QESu_a`6h`LPmHHQW`4VEiL-v+W?W;}dIf`nzp^C8!&)>5gb5*w5yvR9o7dX0VrVW^_6{ATO^4m3KP(hOTT}1*Yr_=mKebiW2Y%NGC{6`hEf&- zc0}}TVaE~`cSnp3ro&DUaWiYMm)EmMC0v0}MwE~7mFz~p{2Tz8k7OdMk8H@49f4m0 z-~2O+WHa_>ge!jS+5ewac>Nyo~6HOz1DRI8nCJ56F?x98B{Q3b#knSj?5Gp$CfEc?dMtl212z1n4K&~Bx z5X9K5r)a`rfyJYOwV;Mc-!_N!EEOtWeX2r%38DH|`Ydw7-?jZRuX=O~&Y@wBtx0>- zaQUHv%YJ~i2)+i3gz@B^(kWsp2X~hF zU^utQ=t8x!!)*IPi&)5-gzmoA*uyo6>@rK}H=nNjX{byJ9$uWADTy=-7FcUL>7Uae zTgEkuSgnYXD&rl}Son*?drIWkS9dlPX6kM$&3HDW`T8qE-Rx(l^<>?cH$~cfMSn3n?I5J=Hi z65Wu~AaoJ|q_e0+j_Mn4Sndl<30Y*Fvpy&YqjPt4(knsg88ntgWi?XL+12U2b;&o= z&sQWXQ?H0x3bCe+))t0Ry{GcU{BLPDbuttT@u~hTAOKa(BeG=1hTuwD%okqPT6Iri zL{~`->3s9<#X_}6+g}@rZurcRO&XLz;T=A?aq%6WJh(b)Lt4e-q^oGN1?|T|LyL8H zmg*rFqVP-hr?gg&IfS}>xI45`Y%q7(QtYULwG`uD)W)6bBfIR9{4#d69Auoo`gzRp z#c&+M;|5w@qZsgl1_JGM$$>pLWXujG@_3!j0#043AikBOyCG_W%EqkW@l*}A)7n^VqUzx z0`p0P18uWUro-R~vu#dc(QY?X-tOGzBXYdz)aDZVzy2wSQ_RK) zi3qLmPI_wvWAK-P^pWEFXl;7uv>vRwZo~klsZ)W};)*|)sJD$mj2pu;L6Ga@TVd2t z{?3BScB+@TSx;LNBK@g}%K=c7A%zG<5$;dlOx1rrU7sJn>tu!7tF?HZoPcgvU-?hT zmnofq_|wXYgNMWrq#~eKY9lJ%b22t_;-JjmhA8&Y#+#EqF4inwP@^73a~@3CAbjmV z6PIkwehbya7Lum96SnsIs`124CXe0=!~4QmvbHa)LcSYeGv7r#3Gp|*e7LCanbVD~ zozo@TS4K0hc4E5w*AOOXIo_+f7Co*C1ghKxCD1+f+x759-Z#u12`2RA<`7hhf#x)G z6!DRWTevGkIzv&{R_9b6UhYoVFD+K9?yIhL;`<%*2au{(IA-FF><3Z4a!86&p$Ta0 z98bk;lwWzX?Y_ptb%hhH$`j$X%W}CSxD7qh(xioxc%=$y^3`6n2W&XHFk+?U%qc?$MBVt-DHd(rWnHvMu$lN80f>o|LYQA6%!L)41q85qMii zuHG7eax7+HCnum=TDD|tf^Q?dniyRkyCApIQ6=xWbQmFF^SXx+8e>4S#UVXx{QNfc z_7|A!K~I!Uu@s>C2*gNPdEI>@INuTMo^;rWcH%fGO>e?7hrVmH4}yC&SCd5X^ox(? z4Ts+>#;k+V3eh+&U3eJNE7OhM?l0xMe#hxwwQvpel#={hPncVPYwwgdZy6F~EEE5dk zFLh2duYc>W0%QdOz0(Xw1HNyQFGmwTif4*8INFm^rvc3)!*|GDS?7%0M9+<w-k=&N{B{$r`RiFA4iwswf``wxTf1f7W?Kl-=)__ zu3vd;)1Aa1+o&-;SncGZ_KaWnE#$i&TP?p=ng^D+mLl-wvk1~1<6n&+3DYSPET~lF zQDEv5be>?&T&<3nq&@+#d+Sv~95#8|I->ZI6~)yQRq-7V&NDiRhbjgCQo2^+VqZKjdSS79uOyRMxyalTQ8XMD)gEl zjH3xyYQ*8G$d8l`*;o)7L>i}gc>oYN(bs7yF@ix1aQ=d>C=NFKe(%ptlZuOetAd@* zomOaBlFB%i+5>BR6d|AJ(M-3%T0}f6x%4e^uI5-Ndou+h8wElOhsG_$}c|+@ZUq(g&ORl1D&S`U4h;{JN z&0w=E)k%7B=*HMtl^X2?_nIrf@mM-!CJn@(r=6**OY$K4n;pCYVzhEkHAD;$-};ou ztk!8PF?fKf7;a{5AiO`Ms-%%wq-)g%$F2j)v5^;J?ofemam^vGG(nB_d!)@8mhETH zB=L{Qn=gn?PBxFtU?S*-#)ANgSWZE#^Z~kfucl-kGCIL?u4{t3Ndgwy2=r1k@c^P< zt~mCc^=N1|R+&6pXivl0ju_$aAyx0Y?Y5@hZQjD)w_8(1?O_Ia`?j*kmk9ND+StRL z>gVaxvh__fdq2H5zvw<)61UEl`cyp>N<=rgCNjc2?%4nHj9Ppq5 zU@PBuEcrgM<65zYRl)6)3elMKrrsjOH9$}5mztDvR`v2W4>+X*A5@#H(fN3co^5is}KaD#NhyiF0tLa2vh z2}3Hl%7ZrCYO*STBJ)fd0mBX`PzSJmXlh?e2&sb0=@RR2%L*;a?kZ&q>%OB-aRp2Iy6AG=~ule)WQ&x@<_vF6O*VqF-_x>(9L^N0cqBbN&Q z^f+4+IRx7i(FE5Z3VSlU_jkBg6R{Ps2)Hs^W7aOqt^#H~JkzVJBy_2h7&I=o(G*-u zrfaLwLCwXIQ~?J*2|lRe`9{IR)=C)bEX9p=*nw&u6<@e`kgFlMXy@IJ7y=<^04jvI z;1S8iyrL1Ux~?1AJ%$w(&FN7{k`Sy4Duo_|2bvZ-`JW5@-3|pWm=QR>IscsNa_+KW zN1K7QbTGId+&pRLiid5>*Pt1S$sm->ESo0aW@zHXAfG`PRN*V}p9Gc~-ux*D4vYjY zN1cDUNbOEMp81Ep_04h0s@ygXBEi--jxp@$)FE)vJsPCLFl|e`ZxaX1TaAl zkP6wC<9V`zOU^R2M0)F#HWF;QG+ zjY2tbFJ&3}I|ke+llGMWnRNzmI8DZA9p)=4T;=pLC$Cp1)O|i^n=FDsC&8*4__ZnR z!2DtiK?Y3}{8$6DC3QavrcNqzQX7Fz@f1pYPgcx{kqn$+K||Atw@J!(snq*J8WAfL zzc=;U(`9vvjrvTiP-qrvXfTAOxgxoB<2T!B{=VhvE%mSWE$5*l6{i$!Cn7<_z_-G$ z(vU^V@vlO?v0s3E=fwxTzG1gzjLq=Loyg>J2)Z0?N$gDOEOzS@yx3uz%HTN>!CTli zA`wL9k(@;!yc6^U-L1l>jYU9f9vXrNwW5*EQGg7AFZDv}Fd#P-Wcc7+)x%J#XrnOM zvHkAs^@@uADFAK7gf4SeCAJf!-f z3{uDnLl-4QVaSvcmnckmG*XixQ+<%9K>(4(tY5{@4t+pP3{yK>C>7>zUIh`I5H|s* zYQ~tj2$hQhq~Uwrd003ACr`3e*DjYwHCpgXkYdQemKpxWl0r~rWk%=aWB{({51^v} zMX5<1sQcb)n=HO_a7Sr{zv+B>jD zt6*C|qvX-~;pX{j?1hWhpSsGc*rAD_a}>{)>MggzV7Zdcm7$%dO%+pgdB_^mPUCkq^)S{SWxka;>4;OPemjp|e{b{!g0 zTvG0D`ddG?alQ?v12&|es3QP%QH^e<`nO(y#&quAd3oa5LZ42K?)t;Ymz594%3DN@ z<)0Nuy*yvK43Y{wDrA`^XzRYQCXG~@>jA&gb#Da;4vajjpqcW40gM2kbRuGjuuq5T z<9e|)*IU!`Y+LF^U~QbZ`!nQH!FV0+{JG)i(D?IvT+IKv*63m+qI4|f6?^Y8)DnPd zF(GZ&ppk@Kk(6Cg;EQi=u(j`iybw(D-jc3$MJx8kNA{vcJhsdVr=UO|{T12AP~+%Eo|uQ2)Q~?ezjQqBB#DTSp}m?kKZOgTIPUfyfk^q- z$c+Zj{>a?nWIJ<#uRN(jEBLzIoso4nt98(X`n>z6dScks+aCi7To%0}5VUP__<76( zd5`he!dVjnx_kgABK-{U<#e%@u0qlK9*%|>tz2OJYOATbBr?^%gy?_Ib`#{_h|{xb zq*NGhoW?UFnbj(TC34Sxo3gKJztt}v>J)-gO~4sb&}_XZmdY`6WSdD$WekoDOV}2R z&|U)}uj!w;8 zYlYGDDX(CV9Ni3MB33qvevoPhqPF50FQUr}KhzdRGYjcc(1G0?koI$g&A1%?LiCw7s<;k@Czs-PH9V`wAHwx zMGA#=F`U2cF*R%4=PzJn!28F`A5jecg-0QQtN5cU5JCq3xPoTDT|mTIGgS|G(3|RC z6eMeq`wK>5Lg4(UfxNemRg-0hKvL9vps6&DQ)B~XMi(QZnHz;rQvX~cs80g*nS~#G zVYOvpvWydEc?d(lFH!zq3{MtDIzmIMHSUoDhxEtW)0Ho#LQQh_)@^EdRX7{n*zjyV za0`NV@%>)n3+lEvY)Wy;-k=v6{xipmSyHW*OQ!r3CvGA{fOZsU-HCb~V9FWT^{Q0i z=JPgA-LO`F5OCyFNwb50EjUwqmaCJ2e7K6=1>h8*zGh1q6cWD<8MN&Fxr7%^=UNs= zBB>$Ibr#7MXiWl~9DcT?Qs>1x=|&*S@>HQ&@fVl~^iDrvauN~DkeBlZ6(>_Z{P^<0 zH}m83kJ0GbDmu(woa{L#N+_s4dH9x~kb3qIL}3H3mxYvbNjd-a6c9ML?z8Rcg0uE%@etG}Ws%{k6yHj}QD-P7b#x=u-@M{842^C*!Bj zN^sHH&Cr$USKZv?mD)afrTv>D&sHjaDfmB0OvWRqAX~LOvgQ!LDFkirLT0X6G>rb?RP|v^O*{W0CD&g4A+H~&%dUTtKs6*gw3!fTIUm}C_Vp7O zWq}W>J4IIAoVjJs`?gzN-m=v44UcNy6L$k_X+vpNc!qCm1zB&87g-tGNz zfB1oY#@9dpq?S3;IiJw-7C^DQGb4}5x`9B+>o_W>MhV3r8l&o1Os}AFk?Ia1vBZ6? zV`!MK+?xMq4+K_&%9vklfv}1YM$Vn7TN7_WWc5u~9eWZ3PI`cgjeMY@q>Wn|+?M8ST0)|nJ`7c#f?(INE1#LleNJUY_I zyVI!!h>K?EZ1`-AtnH9dvNccr_NnZ+S!C?XxrXs6CHw5=^rio}rRDaa&FFY*Z*=*r z`}RNc3I;r^x@B@7&4cZOrd-+oLI!qPPGGrz_|WI@l;dia*(JuGmODzbSMOcQIeTN1 z6Hdc1LBd=b^{V*fBOBE}xd?%wNqn4G$07f6O7QeM68sa!RQgt4-cpI7LdKN@8Pn=&8UBihR8REh8~8lvG)LZb7}*c*8?Vaa)fz z-~kitd!FuYHnP0!2M-zan!ccHeCUW9^*r{nG9=tsoGEw;!Xy z4m?*GgFI5}|1Ip2??2F4pi$_3>X2eQd>T**rj020;u`|YaxP^#kS-ibm{2$x4~Jmw zs+nH6FR7T119SN#)+Nn{%1i!u85$K8x2So{HtIv(7P~MXn{9R2dahdbk|dmnQeYCPWAminLwyzg|#Wcm5mR@FhZth;BABvM+@Sr9)c)qL*nU zYwXR!(MVP1hgdu_e~4^|gsnQX#x-))!?m*N@NCI7J<`TZa`!zR)s2=PkJ%#j$VIM0)Ih2pdZoS~H>pD3c8!HE7s6lfR{Y~H1aJZ| z>O2#9^)ghcc9o9ua~zdRhQOhdVy#*|wEp83s0@J$GX#g^e2RItXNzINj=Pn3I%KmJ z05g@qsk6}RIg3rF`(HnizLvm;ezQKX8ylQZTEEU}~0=WnGX}~Wc9X@H+GxK7!?FnZb z>tgR2xBZ#2kc77BtK8EbmS}%sULx+;1Hlj(r`Yy@t4zDJX51bb+iMC{Sr;qZD!-Ie zxu$e-%}ceHz8!yDMQNkp7homrtc4ru=jLvU-8t}533lkKv3iz-Ls;~J=Fw$9ypO^! zJKWx(<2gY+6LwFb3w{0u_)S8fB&^ZivN`n}0^MdN#o-2}%fNX(w|527VsORFs)Klr zYONS~-2D!Ei^xDT9aC=)k1warU%-oBb9C0kcCV@+_cN=m}P zxZiZ6H}C{itEOOQp@NN2jDWwoQRGc0!?jNwQ}Xp4i+wZEJ3sT@x}4?vR9|CflxO)V zwQFYQ&nBj8B^GZ7A)C6;ZC{J<&{ot+?xpS0Yjc>2a?XtBZc6k( zQKQ%*M>)^n@_1OT2qsH_@Uavs0GT}PPvYJ??n;23Jv~L7I3yHr{Ih(sW;i3=!uPmahm3Mw|y4B>@8Db2K=2pGslwK zX#ZpTCY^OO^Z}bN2yokTZ^Cpbhs`j{?0F{r{`utu$bZ=9oeys@mG0cXU<5$rdgoLw zFCttXFCDWskC!pp%T^NLi7`)cN=F<=l?8OxW_T-N$7>r{35|u06>dN4S#zYdgr(XK z=)K%KGKUR^>9!-$?wyd#Hl~*IJVbVvh(q-DIF@?nt7$+_Nlfgxed9e_>Ae6gMsN}N zuKq9~yj1%RoCZiSNpV`(va{M~$2$vKsIpQvBw^HImQCufyd!UqakSR2j8Ux~iFP39? z)Du{8Snh#Iz{?hZ5ATFg;F!h|-<3e`JT6>)3MGJieYS<%57Yd@mP>>T`ot?yMijLm zHwQ+e)maMjVw^vONEyLVJaSj4xD_czN<2mJP{UWQKHH`C6!Q14hxR;#_;iE_K&7E* zBOcVi7HYzkz7k9*H>{Yc(jKhcNUMRlr+iz3-Q3(hvSDL zmPbI$9iA*#+_Jj3PwQs;w}>o?Uwu-zLM}^{u=Hxb$E+(Z1+I=;1<$Ig#us3pi?$D%?MJxOfau7NZ(H$nAQTqIj48U}$*sXSWeWid1?F`W%s{3m|AN zi#YE=1lUrSv_5?F+MPs_E0k2?Nvia;nD?}xa6+3~O%m>zUn$()sc-jwmn~tF9hDPm z&9Rtgn+C%4xTDsIg;((O6BI8sGytOmFhgpWD;4bu&gUqodk_(g)PsLZMo9dsSl;KP z#rUC77+5EMekf~Xgj#QFMfe>S1c8)C6q_GJtCEP#GW)_0L zQ=w#_03NZ03&LXWShlhf62fFx0TAlBu4l_fk95e(U}FPrNOx?=SwNaFG;uYyiFoiZ8WaQ7EN76FwEzobAR7Uh zC@Myo=~Q{&t_>xPn&4v{APkwiC$a=N@`G0!Ambb~QJB`+2_^iIglisdLq znAz&Gth%gqVHR5DglAC_ud@7OoSI>aT5)xG7DSuLV#KWYXpHV8`C_V@e|S(4{VlmZZDci-rM@ysmc|a&wji+~D7y5^;_>Hu zE<}i-=}kcJ7$+Ex_$C3KYC)cAU0f^QS>a9Zc*H@MiD_c&EZa7R6?HyPs&P=7thwM% z=mM1C{i#0K?o8>j!L)#9(Nl0uW;qrUBH-pEMBiiK^A+3-7f z=kv45BM3*MM+7cLM1@hGHoFNB6^38mcp9}YEv9)W`|7DcAwPsmV5p?P~p@8yH9h#{Z zT?tSvh=}uk9@p5G>ufXl3z_0VgcwX%tgsY>$V`j7I&HiqzDdN9M z5(pwPeiWHWZXEGflzFtF{IuqZRQ+3?%N&uH5`UReA3PF8^v|#H?%5(Hw8tCa&2I*T z?#;|p;f<)TdXAZ5(9~k5LQDV_Y?YWJ{H&cxV8lMBp|4S&;UBVqI6L!7Ly>*AR{*h*mxJzG&0X z=!Sai2&zmdr>wg=zDDMD8wqJnCIZYmcz0)v(!VqgrI<(Dr|8#n)=50h+9y*Eb^MPt z?mD8I8TWr2od-h`Y1_7ECcO|s2q82Xdg!5dnb1YRP*kL70s;|RR8Z`bLPw0Eh>C7P z6R<2ID)vxCMK>ray6Og0L@bMCU38V_%lrL=OeT}Luj@RIqfKgu#ipbVS3kOoSVV%% z7#_Pw5(`yzB_gx2NAX0dgFFm=W$SnVazr39P=FtOZ8-`#mVy5Tf=sRi{x6Ynp`69u zxZp5TxUKOxC$}=`ByBME)QtARJhAcUhuGDhCpSbf!U%cwRJfdJJj$~b_iV8}wdzH$ zgEIG+UIx}Bf))Y7E`6S(QVTiJOB`t-Nc9fxeUxeLYX=Zfg>!0H&!c$fXPL|;FU{O| zCPn5hsl6s_Fz;^vACW_uj$>&z=KdLokAVEk+~&u~T$FAX)HX4F^OS6OO(eAIb;=bF zjQ`*74K)E5kThlB(__7&}GSCc0S!;&LJfHKS_Quc;PJT z><@!{{kM0Q}xyLaAy?5-XBKxXmW3!u$Lnr=iseyK?o$ z*}WUP3ifXF{OdleZp+%EUFD8eD-4Txwh>Z_?P8;EJ+3iZthAsWqCVJ((8eTN|3VDu z`|myq<50sKpGho&F@`Z?t`JBdhF)B-e>h1uZ=b}>a`ip*w*~@Hl^1O9fscMW6j3nPFCIinK_mZ ziu+B7ObNUD631I=(S=-hljlziS-(;<#?*{w5|}By#tpaH!Sf!L3=|sRQW4S`HDPqm zR2p=1&#J?niwWmfZKyR$Ta+;5=@0oM(A3X+|6aZ4g=R{ANEdxc%NuGv>sA`92H&5- z*@pR!UWQl77C{hRN5YB*ZNwKgmP{A!;xQvYqtxw;`Y?}zUdOm_kGgP=Q*+lUl4USG z430~NhtZ1vBsr_mSP`o5lEKRVUL+Mb`{m2OaFKBW}(~_ z*v9dYud;yU;hg?w-sX0WqVAJxpHqMGIP`E|1!Thy_nHZ@sS9(glRCsn9P7e4S9uO{ z>C-I`Libu+mG)b~Xv12sV)EdvE&t41P$W|1}wbh*$Q!*9Gswxd0=XC$_oRvje4A=oVIe1Tdm#gP*MAL`0d6R#w+F1|AN-ZQi0 zNxmH~=S-?c$_aduO|q-mGrRn&}e-jduJio(k5r6NZ= zOHP%f^oXEHBB#Szu>;XuXWa^vJ56&qij7hyPW==#;1C>m%jx8inRe0{vH2a0@#a?} zr&uqtBr;aoj1pt|9)4}W5wYcm6Q^615EsAI+Mmc0jT54P!#sLYFXv^|&3Whg_s401 z&l%kvX1}a}$ZS|T!YWgPB=a%=C8V7E(1oL>0+LaxSitA$eK7R>i;;>hVz6Sg;IkO+ zT|YdhBil5qNm*iwK&p~`R1W97zaHQiWtv}RHTWR$hhWfby~g82%|BthX7;yg)g+ro zWb`NJ_b%fw^W)}5fbzK^?rT!RMQh_#vbj{?RG~*SV%rsR0>nN5{)*bFx+`#k1fiAkn2mVi4s`Z#po)-sh!Z-0k6r8n3g6Y|;g(gYSV+ zl>*5K6TPd6GK-I&8f7S5L4u;hH2>i`uhS>AP69x>Q1rn>2P4; zr<2>ttFg0Y5>ZyWLv5-x!L?vadeZ*X`DuiGMKA%8-PB4cHRa6fz2x~?N6`StV*~#&iwCeFS8XU@@%c>ikkXP&KAm@cnQgDe2y4%SyS@%SQD@5KUQ~I znA@h2Cfsz|V~l3#Op!q`C467Swf&w4TO*1$Wy~C^O0~AJ>iR@Or}`}aqnFR8X3-1V zD7NeoVvIb?plFEQ=Q$$@8_S|wXu=Ew5l?ymisb+ij&%Y;)(#F-SX-IGu6NT00O9qR9>Jni6 zIoXL^1pHqr0x;?5pDo!EGpseeEi1~S%%CEvyN75xhA_5`m1lS59ltqiwk=^_gAKD~H@UXRpA3to;>s_6Cm9Q3si|7pyD z7EYv7Qli1%xdo?d{8c1}0KQBPV6D~&X)=1T>zYK0>uwA~UwtJaiU@Wy>Ks-tM0TIO5riR?m|FN9dViBUA4q1My6uo)b2Skkha5YW$L zW<=8Y2YZ9~XiR9Ls_%wT@0Rz>_-31F~Vd6FEWHz|6lY^$;W zCPY_F^$~suD7rZ@gzJ6>(G;lekBY%h#X3ff;FNj@IG7TTu~zn6|9C#ZIail%w$gJS zR-0DUg}YurfcEJ+XdQ7{k8`s3t>|>%ZIeeoDiX;_JI3*Up9&Mb-bzgwdvJW$_PzAQ zC!Z$R^5|0wAjbZg9NUy0=0!V&VUyCpdv|~2q8QC)%fQPp5|hCUJTraYNNU(Y+1z#1 z*tliR^WgJ?Uma7IIT}MP&)>h`2=098HIC?M4lMzxLVt3~b+tj&n95+7Ud#~-D;Jeb z7+#!-@>!R-r=Uos3&x4e3TYa_hYeJcNc=bCzl<23U6YMt`GHt=qi%Vctph*$>XS*!w z-AV+LG}XT}$OC6yyFd7yf2sPz_mZCYFj7tTVy7R+DmE$Jn8!}ZdrfGQwsDdT-Fu`= zd{^$?)-KgH>|=3!-kZ^!n9OY~o~Z?wu>TLMriAflA8RMD5$C#^t8xoA@7BsETzP~v zAl%}}TCCIi;99t>*+6|mZu1N!ZJ+VBS+ju>Uf-i%LiA}rtty^_zFq_jG#juk8M}X_ z*Fk~zfO!Yjj7eMiC!1(>5_+BRT8spImH=A+-@@hT#%%s9_OS0z;*MLsbSxU(IzW*!RB_ClU8W+b8$llq0QpjJzn0oK>5iJd5U<_)<6 zrVOg7@L6iB7LT=Z?^oXMM5tGlRP3B1L#;HxtB4KpkSV%ywsdS}FW9o~9a{ilPcggJ z`K0!NWEiMZn!G<sN$p=nn&h~U*kLKyKWkg-eIJ!EHJgxar}z$GA9&(Gj4cPwhX zyllN=%WBOLtrO#ezTxRByWp2f2Lg7GmM@_r0xEah(6h%=G;Y62=&(viZAF7jC0n{p(HHn%cgzGZqkv%4L05EO) zG+zcrzl>$8B~lV(I^g${yl~q~-Qf_=7Mj!wnOuOZl$ezD=&2pC?S1%I(7XvLo^u;= zK|h7PF4k;VS~OMibJ9FPXjI+9-mf`crSjU#Gpe#OTEhF9!ZV86WRWEK(#ai8{e8m) zgj{fP1@kj5^iQF7H0Z>{&&}U+MbEc5sc>doGW#|>;I-DKrL6KTC2k~ zq*FL*goIAt$J%IRT(mJLeVlp*Aa%=_SJjwYC1xE=m;>l@$QfPx^_y}IS2YNtiSv&o z`8dIZ%W8Zb@L!$EWQ|VxXy?d3&Hd!*~?mMSfoD>N=pE!x6hB0`5hw8UNe z&&|QajVNiNV&RmQP^Oa`z*sl6qm_~f*l8LemJMe@V$08>Ka< zao!wXnjHv*~KvoiEpcCNcuEYc?p+Fdub{6M}0`^Kw zmO#2rt(P#W7h4QDp_ns-sD%ioGjV5_74rp5dZ8uk<&7imMy*uRp$>+Vkl`ydtQcj- zZd$&#%PUbx0ymNot5}j;?)~AP!I`_=qV5KPU9$A0n{DC!*M7VI#tt*9tA5ucRX6|W z`2K>qO_|p{&o*$kmNbb{BdivMN{ly3j5QMDA_?oB(kKlytOkN9l?ERL1}u=y9ygdn zq=8x}Ly7lS`O*%P zhN#BDyou5G@b6!n@k@vsb;}9Vdi4aZXpzl;x6L@Q`d*4tE5+`pVfJ4IMSp!IpfF*7 z9_iEoZAQzg){+sP@n+ECscPGXaq>O2XV>2gweKioiNTDP_EA9bT$Nu4=+`+gu4?Bn ztyq1s7BbcTy?jkh>Pk}eIunr~+XXLFR%1}5klF}d5ExvC=rc-^snDQ6Nz;LEt^)d~ znsP))?-I~8OU=|gY820$=-5u}Fz8mA9OJz z8mE^i+UPK%vebaa^TAzdR>mDD`8v#`X#~^S-Q^Em0@W_~Ih9GjKv!g>)GAp=dSZsG z@oosf7SI}+Fl&_b;gk9^0s~Xk!T(t?L~6`8C9C%$%UVU#F=3~bCZ|CCQxXfj%HWhl z(P7m-Cal|fiPkT%n9&;C2bp~;))ZoX1k~T}D|Z;%Z3Y^zQ?c?@rnLa|k+UhJO5wh~ z!2xpGUt+I&m|Zf$DYWV5^Z)7S_(J?>UgE#WQDXS&prO{tXbHI7Phu3f$uP?A4!g8s zU+an$f5iHgrYA^AlP*Ntw12DL;g+kh8H6of_l1(uEp{xs$jet^j*ym25OkpA%%s4> z0#d!8rvco}m9Sh?L4Rneop2u2%6L#soWp{DvxUwxTKyT6QlP{I3SxFGA&Awu0y#cI zEgb5_ZA9KN&f_yK(-|Es?qQ3|60_PTlnepAU&S&7&5rO0-6~V-9@aOCS-Z+$4m3}D za_Sm<#s=he!R`Au(yzjd8JMAyZBHYP7u7gjQw~+*P$fMdq$^0p@T_5cUAF1CR*?4l#?R~odhwO!du8;Ujp z#9csh;$>34lWDJdj+lGd$i*seSle>W&v;l%Y(WiwW=BFtm!s}af%m>Jx2p;bt~aHf-^1oP{#*GD1~m4Wm83>YeQ zuh8_q>ejyoMuEwjoOzqZ3u7LuEJsxqP4qk8pRm0TF=C93X3wt>?k!xlG`)*ha1|E@ zdlEIePCp_{M#BBKHEw=mJNotB;k_e*i&vIePiDC__xx9sm=*{QQf4N0y7hRbEiv>K zwiM1WC|&OKI*Ebq=#`GTl$~GNc=^Wvj;7)=)EF3W($ZGeK4|}P&bUgf0I(_b19!6q?m+E#t!w}2b&36n5er-zE@!bz@cl3|LS?v4VoFq{ zRoWc;vn9|QIja4>;pTbvPoBknp2ZLF4w&ZUv$QbV-zwoOV9 zLIEc@n=HF=2|iFz)A=d!~y&{3Jk4U6Lb1mLq5ItY8Z&St#wkpL%i61yya&rV~Kn;dgb%onm{Is&r@W_vaM#v#U zPS1sZ5-p!xv;3)g_RH#7;50R?U0*ztyh8i>``RyO>73CLVM%J1g1bhirYaM zT|pITF}^=Bt&4sg)w*=SJ0||>&k3mwLLPw=GJ=Jry>6U2-ls>k2+SB#g9JSXRWQye zGM^EF9e2{*K@+3N37_ zI@Y(KEveZ_nXvT;WYm73Em;*KksiEj$Voij_4c-Htim#D_t&W&+0AMuH>r$kCB;57 zwm&|$^Zx5~HTH|--r(m~H*0Pr9wE;4ydo$p%XYs?9kz{LX@N}tv@#<1@6+KC&*w)j zG@kk+yUP3Qxl@TXG)qzU;HSDleM@9!R7H&-=*?Mp1dGL=uJoYeeWY$qL|GKUd|n0Y zGGV2v)witO0gOl7WV|C8?3zgkhkNAEjpWB4gyq%bDB=sjXW`%>8sw7Eu|6ALgQxOYpD62N66Z+ z1xYn260)00krwZwc5(4+HtgktXooStg|KLU(gbGv69Jf*}Dy;Dy- zXZ)77)S_@%-R0+mtg^&R%MbsYEvD@r{lv2@bOEC*Q$|4|lih{#NG@aizFjK+7(FjG zU!j)3R;|4A_6Etrj>m|)(ccN}{@A$awvC3)f!|;1^R1?BtV4gD+B6*C-JE)>!u1H_ zU`oo_4Mxm$+U|!-11lb8(~$-oM&Gd`N19=+X%4Gd^m*X%K}lv+&S1HZy-D`GEr-MF z0$*wR=QVL<{eoVejnWt_||tz@-Zmk!4l__G5yF9X=uy1IO0 zhv)=C?La`#1wjZhZ)mQ#*_CO)!$x|mB`1Yb$e<;Rj^AWteN|ns`rVXK!YV;keHJ-W zFn->dZz>D+pF2TIX2LTBXXwoxm+SeBErm!vqo^D{R`=P`%T(ON!$ag4p7e-U(y+&q z&G#3?O@7@U>;7h?=z-HIU5)zk&#Cb3Wxvc{TE(Bz1d*Ih)Q1f+J*I=QZ}{dnH!j6g zKJ4pR^?`4jdA|n!Wtc#XdYu)|%uO|1=GoF*yS(Db**crbWhORv=@B@Si$zJN@uNGD zDA$HDIFi$#9q(zjux&T9m=#biy*<2ozDBCwt;EDRLxde%a}x6eW;Ywl4&=&AQ&51e zMRER`IeDlq)wOe`R~L2j^Jt9@!j8ljP2L1QM?}LUt0m!Ft;B@;D#>n0saN(7%F?y6 zy>+9@un|W7O64YEr}}kp!=-&)wsg)2t0dtuW$~%(48QUIOT!~HgSR%8-^WU?BqcJB zD9aem)3uK6^k9>_9A`hNbkFtujlM*QAn$Dtjk%=h0)Np0ClTcU-cWumBhI6kH2kmc zBewg_svWQXlh8k^sSf-T`WHI}81{(FqyWJ1MOYv9SvhaCLI3vm!xS->vw)xLw>Kc> zw=6Y6ThmSi(5Jw*w~|mMr0=Txe6SiIBI*sV4+DB3fK(?>hDN_4K8>5eIhq(g$`%j_ z>lP?-L; z!J|re=Gdw5%q}yV4?j0lKRYcobUYIA_bW$YYf6tEe?$1<;QZiI5g_@tb1L)5o8WnB zV4Hgk*dM38Z%^!T_g*}_0u?`S-5Pgd->0ON{a-O=ue9(RuQ=lv$;s*NKj@lqL%m=a z7j?jz+HNjqrD>(A4WRyo5h!u(f|T-zo01F+iZ$leleH>(q0S^23rL8F7IG2u3W7C~ zD}&q;+9`B>FdxJhG(q1k>61QYOZqwuF~(}O@A|QXi*s4$@f8-edIeludQ92zd1J)+ zp=Oau%xveGXI=5VCwpmlu-voeV^dO{Qmj(Q&k7_ zZr$0}L*2|eXg`eQa(ifkAJT)5aNN=6b=F3G>>c?4@oL=W+YOMX3ioJ!1b)1(-xN;dFN&}y|$R@)=-wGs8vFTVmELl#20cvX>Jk51s zi3K2?2$Etn^uI*sAR{eM8k8Zyr)VMilszRzoW)Gn_8WI|@wAiiH$5-ljEh#2UYKG` z|14ePtCTv12`L%;mw3b3_o%M}!_Q@oliorUmuEx=JssKmQ||F_Ksry9Q73;A1D7icfc4et4~o-tq42p0&eQ z;B#QMJT^u32q16fL|E0&5D@-(){Bu|m~BNw6ummDO_a??x-2Adk`Orb4%M%F_&oTb zU!4=Q!L|bhU=$Pxu=7}pbu#-J9OD~GahvfXc4+H7bHSix>aoGjnqxN0zxkfKb$31E z3AXX!o>A52qzD8tvMy7GwD^z}vI4*WkU59bvdW`1pmX9Zpv z``j48cz&!dbVEgYzsFqE^~g;+^;M&sZ4ZoPj9jBl%36}Y}ZRcy5E`NcL4|shp+LzZ34Ql0dfXI#OumC0bxqwsQ9K8-oeiIEdT1XML~o-+d`iqDgX~cT?nW`owjjiB)AZGQ&pp*!ru@?kLczMFz*Oiu zAD7^!@W)Zv&kIsLP>df;{N=HTuK-6pVR`AgPsy$^NLnjO`?Q(XevamsxPEyPxm`*7 z;AwsFf#HP#GHWsJ5E7s~m{ z!*ecmlWPy-^jZCA(?ls}LraBA6!Q3P|Zs~aL?e1+lToFw6j>hEx14XvbzqtsUgHaA~`md^u+Q=leA1A#MH!s;`i0bl&>V%+~K=+j4Ft{*=~k zw#7kTj6%nPi4QQti#NCpu(}R;Jn7!PqBGg{5N0A~eh6%zBC0-ga7)!V_DLQ#3dMb5 zufO(x%vzF8^%|`JIRZizLQJEFUjdhFZ*YJ}P667*i<;xjH4qWM`uZz~_7-QMsIje$ zcR?^|TnkPL7^o#YOeE@taPm1|-);@Tfmy$hUlXSxJn8ZE>%hiw!9VwbFZO|>pF+Jl z=oU)e&RL{Yh1(9+JP_b>uR@!3`L6!f$nYyYcL_&Q`2`}lQ~S!d4tTKzg{Q|s69D~V zYngw9#oR8R-Dgy&Y9(eq+~BLlqA)QjQg0iw4!a+g`!QnL4jkj^-T1ODz01nM|F^HJ zq*pjwPd3tQV%DGJlGdc)KvS_vjxuM#HaI8x@N{nf!&-;eV$nWmgGtOezYWd@53!GI z-?>0!M)ur#2($IEYFl)*pAz%708{n}S4_u8sZE^zzw7#<7`GImToPkr0Gy0V_TRR2 zyR~;yK$DHl>GFo%bUFs;0BBtEB7fp;F+o(i)SthU%CtDinrFntgm{IIc*Fuzo3&q4 zB5Z&EpN!Z3E%Z=BjfWzkKtR~sK`5sachdu!5IqU^z&zE7;%J7A1p8+a8CX0Q_%A8f zp_TdNaHWIpgNdb)Dl2~#meq{SDyMjh+6ZYjNi*&(7nNds-ufreB%AHgy)5U*hP|LC z`v=BftQsG)do^|FYz5}+N=&dK!5ei6qhK?YC$CPMIB%nra8IRb$lc;oNnFQ51am{> zJ$>8z5;GjzCTv-Tr3(OJ$r6kfdVbr_rf-G6_WYKCGa;A+_V*N5C8ogG#9V{%(LkYG zg4YZt$P1H+x@-~?*lOG+J_b!(%G2QcJPB)HU1p!SSBquNfRY(%)r>)We}rty0n;*w zqajK7mz!i{{$aAOitHFhVl7;Mu}FHc`|___B;7t@@f`rAq*HIFOEyZcYZSwcwF zCkOc?=J)pYoolLhp4!sUgPE*L*!u6aD}@nT5>`8b^_ zz7Zu{6P6xGblg1Po&IlVQaAe&6Jt9_7`qJQECh_jtB8`F=~~^i9J60Xz|?)!9p;uP6WjN?0*=>XiQgrm|RyQCS_kGWuIck_6#h!MdEalvNA|HKk=~&?0Nwix4p{j zb^48*xoC29hWOM~hlBV$gLbql$qjey903bIxViJEgT;rN7SCILcS`Agf9>p%guRV5 zZ@n?`N}QSmT|GiNtRZ_HBdr*sToPb~e8L_bSrI0Ue2Ps|n05Di-%u?i+j@s?iu2%J z*yaxqX8@xPLZcGH-~)HZ^92t8Dq8B#2X!dj-W~v>P%yYc@?iyO1|U6obk=t@l@91= z#RbQJ4~G;V4gqv}M6j+u45zMH3+f8(Of7yGp;}0wV@FR+E-Ct0h2PI2u#5S)7cxLnz`3dP=`Y#4FQ)XXK@2K;w( z1!Q(*&!4Y2Cx<+*M;@*5RSr32V!GdBix8s58!w1?&;dNZ3^zv4;3@@_&=0uHKJ-pG z2K%isBkzb>c%<0`GhefiB7RI#gS+{Z1OUJxfB_SvAX(am^@2qE1{7<1Aj|^U6DW&$pfV&}QiRXRtaW(tkTw z+3qjqbblD>)-#(ZyJKP6wKx2=`dUp$TTQ}iY$2aGYUMR2tRr0pDEIf^qXpR41JqCp z(xRNwJHpZCTZo$)qOM%)24imEK^@N<^aWzlh?2$hLKd+y$g+Pd!d zGHeW;JgH^0@dsP!iM4vlewZv0w=ibCUfSTLl3wD7X8r@sbCw?BdyMv0tW#C7^ z`zLX`2DevtY=O;*!9E?nZ-MK7@jv`Eb^7u6RX|%0Ky+L`m%!9$hh71@5D zL;$K5IQcg~;bN;gh>3IJ6;)ac$OX2XR?w8d>A(IWY!+O{K1`9lSe>bzx@JJ)lD8<| z>donZ(qqgHor70z6dnKl%-G%#hAHxHlCPOmTuxX|JBYro4S8C(Cn$M0^&m2FR7_0^ zeZoz^HHgU)I?gQ8D4F}s@lNik=bC!cyw&>jnEP<@Vd)g3wpJq}2XlNuSp_o<3__cLoAI&R@%Bm`adq9Y?o$;h2`;9>x8)Q-0 z>5o6AUY^_f=4p-^?t$<$mVXRf+bc}k%(3m5QGdeVjo3d0_Iky*VV(EfU)9r_qo+y~ zO|ATR*LKrD(&3;JXKEN@XAhX zeZ@56BdytV-gSKX;b^J#$%{0qb%{t(8M|V7k_w&@jgw z28S4es(lfhsY(gKT!exY4kD>r#UFxYce&x?SRYrSE5_Cvu|BYAKS$3frNZOpMJIG& z!=kZvWS~B~;&4QH7CG|u9@~Z(sNA+Jd(!_^i=y}bLe(Y=nbUMGf&AgA#_h^`^}@c< zd1Oz~!%FX7k=^VIvo>3^+=0E<=TZa9OW5n`BzDI0f!@l>Dl^_u)!%0>cxE|0B$3eE)Pm|PyGDuB z{e@{{sa>5_4&K3tPK7AbXaqS4UiE4p1uMSq);nEM_j?aRtVPQm+^RtGw(ju`EG(u% zuk;_N_1^0DRmJ;yU0LHjq7XOMWwHMGvd0hhS&=vQ)zT=s%xz!~Jx6vwMPL>@HQ3u>*_kwst`_vqOE0 z1Ec@9j8ToK@h^((?{zq0N^hx2Q8KCj*+o0UlP&+dw`IO6Aa z{!|V+wk(UTc}zO^zh|0~MJ4xl&oxZ+L}v+ZE9&>p-(Xe_nG7Mun$uD}Is9Ctfy}!Q zLORe7R}Ynm#wJ+H>eN_Yg+zY`K#oIuts^#6ik?ld1hdE+h8POuhEa6KE~9#_+^#@K zFDL^oYduQW+>;YA_ZKankWpSI)>R+mT{_*Ga5Hj5^$r9vzym-RtVLazVUPO!Pr-(rUKpe|7x& z`Fz!`L==+~D~eAvU#tFgB* zX00bv2lt1%&WV&XR6{^^2rz*}nbn{G+ci&!(M@<9<^+VKvIgClNp5vdQ)54>-E^KF z$^IcAhK*U9o{uT@nbAslI!I|F8fKHSw8Cx*We!hCKeDueS2e{#A{Pxc+I;BT&oAzc z7TGr{j3I&?*t04_O~eV}oa%b?bGYGa@AC0a6UK@1msPm;MJ)-$IUU`(Dt>3z5&eyq$=chh3fc=ghpgWwPX!LrYI*xMOUH zcTNU&NIHj|T-!F5cG!O*W^?)+M4!q%bt9|i1!2ZvR^%%cyq=Ixz2^R?EKFxECtsf{ zcJ1CkSvQo;#LcK-U<9ZCOiMF-s@AhlF9m|vRO*MLsdU)*j~B0hlkZCvid_I`gO?J;vP6vm9Mf~4hYV%T&Y{_M2L*v$un?hosO^2M#nO8^s zhg4EC(QU{2B`1=PVqY(kn5O8Y%?4;p#IX-M-bD0k4{WGBx;tapVr0?5TG?@T*L1Ed z{0i*kD1_!g`{HcsdTv~r{d>2&hrk)2_3ZNRlU6HbZ&-q8@0?6kfMDt ztZEeJoH|Zk!tXT7uS|DdDZn_V*N7u+913od6wBtHslD-Y-M+28q-2rlwG(E?T2^)5 z4Yg~p*O%m>e}2b%pgqxKnjex}c#ZaKFQxm3IQ*vP2yF{l?}(zu&ZUR^+fd}qRlG_s z0mM9X9I}ewJ)ZybAvKVLfJAkGYY~qcNXN`mqXhRN@J$B0pS9Va{4hZEsJRU9^-(th z=3H?j&Kv`G?E$PkPPrBHzI|+^>9oa;AYtrB{s7~!oyPap68|uxK|!&{N)C$K%ed&`^c8?L$3-#GVsxA2?Xb&f1_Jd$)M;MnwmgrlrjKm9^| z?fG*y43HF!gt3p5Vka&3Ef0D3ME8RiU`ZAmti$Td^nwefS2@B_1y=VjO+~tEdML|( zz9j$FU2IZKFA0+OzACVgvxCtlaw?n}^c`hiScY16qxywoeKwO~_lI}K@hgCS?;{`) zw1wZsF8Tr43edBhw{CHVKIQE$eR)6WmdCvdWB67i(Tm9ba5)k(szR|gu*^dZ*mbk6U}x#4%g0u-Bt z^e}DlLLx4vs9hp!FV6NqxzXS3D(_6gT=&DqqotcFC==JYTxo)`Ycr>JGgsz31nJVN zUUY0Cu{exgoYq<#H7gsD=#nXb{;EsXtvX&R3exw5Xfdn~jMZCRCSOXP1rP;3mo1lH za>41K0c-%r4Mj~UU(G0&M4={zzNgZCbCWsFTW$2%m{6`~*B7j(64FxxOb)2mEwQb0 zg}h}8Tcz2idLO=1H#IhHGCUL7>6v8f##AEQcR|q`-J&{}-?!3qG0WdNs~0g2`h7XG z`&}^87AJs9mSam+e2o-vn?qz0QjD1wF3Ta}OuIn=M1ho* z<@->g0u87FbV;_LuYLcRQChMupPM>c@u^bBV|5l*^>zSwcIU!Y*RDQX#*9&$2sEsN znRU{IQ&_0UH9Ep@UqjFz^ho|nb)nZl($%1eb|fi6I**7=*D20lBDCR)(=!Kt%lG0t zIRz|~M@~iNq4*PWe4ZGerHE~}Ey>|GODWg7$M>2BhgcLuv1V^STn(~XYaQ5fE-@&r zTfRgsw=tCI(5Z{m#a^7Nsv+fkwJc-|V?F~~mVqWkd(8!q#~;ySSLYN?`6zHkV;oQnxplG0}})bjzsYu3T zBway)`CpF-DZYMXR zjrV4392Xuw&~s&SO|sv>t$7`uwyD^3PO-ayM(ekHJC@AP$9xEO3LKK%+Kv^$rCSw* z?H#4twS-;Wgzc?_ZHm&Z$R8WaBuUdoA>qG&+wcBhljKKJ^UnhCXMv={{LZNJLRu^6 zFe{rsHs~cPjw@Td5WrC&1*!Jh1-eAM zvT9)2sp`-WC;t6=!CF}eP_kZ3(2eYN43+NCl-4{X9DG>1bBwT~qcmo5``)qGwrxsT zP&cnqVtzn^cUo+Hd(oLrGq_tee-@ytp*VDLb2BbeTfE3$cDWsw-hrbmWm+{ zWV&PoC-DT5G4+t1pwZLFQTDpZ=kW8S;ER(5spNt&-{-e(cVQ;Q=X4IBsz@i@b5S^3 zvQ=5SU0r%G^GQwolgWpKid3h?u}4F;FOscRF2X(h-{`V5r^TkCu262;`C<7IB`$Tg z`+wV7Zm+?o7nyT4k~5JOuHCFKBF1%!>RPmmx*hj~%?3s@64M=tVV)N7|`RUG7 z=Leb0vIoo#@VE=EVr_Nh&Z|3LCVX&N9*yF+w3co!dNH+UxzD50z1?NaVeZLC4t{NT zl6*Tht-a@G)6qqDdw>mHma`xSFpryY=g#)exoXU|{qH9XxQ-s3ce*NX=XkETFHeBa&BP~lf=5KGdXUNocV8d=8Pp@%|eLLw{b5*{%P~&wuf3c@o@N!>?iw+UratP^;z=d?LU%# zwV7v(}*K2zq`Fy1$ zh+ZsME6*=05&e+zMLGwVROU26r>z-v_!1#8^IIS3t z2=W!g*|Ueu#KUH-wHFq6i$;=+i)vX&O~_~J<{TR*y8P!!X@>ZY2T|{VPo}XJWodPA z4~Ykt0Lk6NI@%BZjs3a_pJUHAvD!Ay%Y2rZ*K+}!<4rB_qTE)!Pc|sPgb3Em_m{6n z3ER2LYJTP2*_>@&Cb{wKear3lm1W>8$8IFI{cDowYi!iXcAn|1#Epox?*>C)d@eWI zlZ&nJ#3hR4PpyiRfXYO1i&(>QRf55@i+Su~9bY=VNJT!4LpaAmr?jCl)F=)P5#dAB zD7hP`8trv0Pv7{2@oC=r(zldStH-WAF|QKua}7}6tfihxSC?)qpYIvn4?G9~U&tja z4M3ljFR%N1p;GEUefr(9tp7*Ro%kh@|9=1@vKGzCT5GMWb^EsLZ+?FPk9oj@nR$O+ z@8`?McZticao_sP`4%r9ndYng$byuzFJ#B-w_L!;78uQc7HusGnAzJu5fw#M#dRt}40X zEZ`&t{YA@7KSCa}Pv-Y2JhZ7E9L3uAb9&#)Fw*G>iv7}+4yQ~ zHtx+V+&lh^FjZ70b-g`4k5?+vE@`DXwr1vk9EZZAh+7(_jOTv7o&TBT4E~n?`5`;^ zgz$p)d^!dc9&rz`0KlTtP~g@tf)p8MN`*+=gs6x?i2sDhuVw z7j=T7>EAEes21MDwT8$RBjrSQ$P1C}>$kx#mTZiCN0=`q|1!Ps?uzm~k3%Zvw4R7q zo;VeM?Om7%pN*F9D|(ewlU_M_KV`w^l!sGGTqkdzmHu}2oBTjc=$v)?r*TCaWwdsj z3tS?U$OI|G%zz)SJTjhL?T7^MMyHq0)#By?@pGrq8^>{x{PzngKyrKY`5JJZ7zB9$ zk^lD%b=R)wE#G3dcxP^LF{-lp)3f+5Ga?W7Y4ZR(;Dd7mZjKb4_#dj@{bSR&JA~BY z9Qy#U=ivPaGs%L1CgZ;9n}e;s{cCUU8jrnJ{IY0-e0POFwEQRK+t#;>5*|W=B7AlQ zY1i9#btGB+maPX57F9BHpRIPCWljc!!rHa2|Fu)0Z>V`6PRPe6uLZGv5(>K{oN4~# zk)_cgfRQVsi)H7|$qjTWMhc$ME?-bAtBD3^V!1({{x%tK^Jy)cn%D&iQfG3sHbjX(mX&68h#|go4d>%Q_7|5_NS^4A@Rh z?F~kRh`l_&rrBIOhviVSzWsvYfJEh^ zy9y(Q4L@x*p3F2k)^@0LO;1$$3bWM0iIl`XdIrOX_^9D)#lunq3O-9~^r5DlVjY~8 z0zb#)4Yk#dmENvM%-6ibtg+o08(@JFt9IbBAP|uPU)Eb^9!^ zF4zDcs&DaB0T6WMsm0ory*Y;=TJ?K1bTvebgIiO?cYQ+0>RzvHoWBt=LxiY}mNaDa zIz9{`QQhTN_w9BvTp=WyMrA1;L3>l{;>vgOo9 zm>=6A>vvAE%ILAK_DP8Gxvv<~_t~OW*U9z^F~!ujmMQFmOIf=o>>vD;lFYMa$D-{^ ztGo94T4QBk`KdYA9U;6M%I{ismG~k*`#_`Ka8ox_b12V< zV4IlHyE(~c_|pB+de1u~RtmF=l9$YKiC{|{P91m1+tw9jdF%IAF@K$^6H>p%n~)bD z|6n$}NQkSy%dNPXs!JdbeMqz}``ydl=-T3tp3_ge9iJ?>vr_IR8TAG>F4 z3f=$_eFHp65a^>vrU4oPr%oNW=6~rUIf$L`%!WqPy0s37@QddQs1A-JyH1krQWR37 z1r5@OV{ZTbbL3cdBKI$k$k6_2(bx%AuZp)a-IBD)_ie&2Eh_QGij&uj&%je-KjOF5R_X>Pl=nRe_&c)jlVtw7CUCiu&Zs+_l!}vD2bvg_{ zSgR`%9A+^fGV^YFM5dB~xtnJNQ{k&FR)i=wQ^>d;*BqRU8^o+ulo3bOG$f@5WJIaI z=&b-LK28iRf&V|sb=S2ngO;5#$ZA*Mz9`gj181Th_W0kPp?y@mkKL_(T7IRaIV<$B z#A4CZwf#+#b9q};ssFt)I&*x$6<+p?l5xNjZa#?w#T>{C8W-@ zm`$dTg6@g@Q|rdaiA;ib!y}r+vxO;VuCia_S(!a7WQ`gVcCNvh&kg@d+c_=g1sfT9 z@h%gNSt39HP!V}#gHdcG;p|h^8LRHpzA4DOL`jjfJtF?$X-}o7jvkem zfmm}JBvqE#;^RxVCutbZs`_kw#hCwq;S>Nqy}`lKY~1mh~%d@}=(t(xNj9HYTfn{7*WN_(j?psXa4Ad1 z>}jcpr|zv2I|l69PT?uTdEQa6X$H*v|0ke33r&qQZSB+!Em*v%W_e(;nwW2^o+wVz9ZfCSvV7m`4D?hTUoPUHLVOL_~wLH z{zomNEXSHhjwt1hxy_7{+;H35l0}v!9)MvkVom*AO6$qS){Gx$xzi=tBT~>m7w@tI z26%cel#Ys}^%ifB%WB1pxmVExDIX&r(R^@fPuyxQ{~}45yvN7en%aO3hZLF3$1zoB z23YJ|r|!3qUTV8{w7~Vu`rPDK8y`I_A6onXpjIn;f6gq)(ND}}oE74W{R^cvw?Q6Zyzge3V6SP30eO{J`9lYH2C>Jz2R1xC7`JzE;bu;5CGs9i4o{6vc z(@35R(nFP4IJ(9BNhg7`qozFYin7Z*x7IpTS6cWFg1fm1!9Q13TVjCk7F0;$i*UF^ z6hg=xBa%*E68Jr!DH{zA`yH^exeIcbC?-#05s}2~io1qc2$8QL=o3-H035Cb=r@9V znPHxM$Tte|91-M_2nK3aEpwod)Wo*Lb$8i`T*Skb*bA>BV`@Oq?aA}=A{LH=fi z%@P_%G$t5v6P4CAsdVN&J6}yMt6;?S8|MM!EL=yHuvrbd%?Leq2wUEm`A!k0|6S@` zEi+*v5DIWc!MCF_&W!X%l-x%rV7p0eOl8iqQs3%sxGnNeL`|4QA|IsUgd>KkKCCk4 z{)SYOVwAl@WAj}=iqqJ-X;>`K#0_U(f}y=a+X;kQ(edDo#;;E;DJK$|mmGzCwf4#N9{^;<@1~n}h z=fLl_P1`rDWlg*sUidzd_I;T4AATpImv%@i450|WT+_$z?hf1HsmE? zwA!tJHCU{RIHCt4^WW5+V$AJYe(>nPEo>L%6=aouG9u`u*@RQ*in5-mI$ zaioMPEzN&D*=X~k5AeUVu&jEO_ed9eL^`4Ca=v2Eqdhm8ov0fwUj`wAJ?XQe#`A`T0E8oJCCnk)P^?noD}7{FB=f|MgYiHQA3mPtjJ?~K4F zuG@H`&7@lBx5H^6T^PWWSx3p?62apghzq{UumrS^!wfbVaR%qY!Hwprjm6`}K$r0- z0*^>BD9}=*F&x40Ie>U&HHEd9l?7aWLc3n7F4&sAbnip=m`cg5<+?^Oe_ytpzTwTV z3jWQg^o%+tRDFE0dza6FeZHrMRI_mRn0jSQpWirI){B(J2p#xx_pBE8VhkMlmeh!0hn&xc3CKACjie#iNm7IM-rsg%9E_l|A(qsFpH1{P|2V@9yF2QulJAlwH>(eX0^v&2IWVl zWCN9UbAbh4CK(Vcp3q2>A1qw9=S-bEIPGA74*`T43)JW)ig;#tU8A#$Y!bRy|(wQVQ3}jQ=&W=PkX$2Cr3v4kf~vp}YNCrKEi6 zc!i)dM}sY5lP1+9mH^K}>-mU5Vk8N{nH8dRW^dp&gzl!P&|-6HQ>c#dzmiI(X?Qlv2v(%U}hjU`V{BFn(g2hU=R|9?f@&^+?%vK|j zZTWM95)KsIqs^c#dxV>;TThD~oMsaEbz#AyVe+8KGt?&+vh4joh*##=zvu*-9%*24 zCNWBCT>=*R$y_Rg95+23#1JWR7HTSS1xgpx%>CMy574x@1iK59T6G1-%$M$lJu053+MzWvXZ0c$+b4a(%~ z6g43woT!T+05AV@tRsw|EI?Q|s3|jY^>mZB_Kdsjvm|eGq*S2m4fjyVhLi@@hDLPpibd9#5;XPr!@t_2mf{{FB=+gbJ-EP!=Ba zjC+?=DRalmTt>jrC*YOTKS#!;oLmh%UtqmMY7#Cri4w3zaA=2mw1r5VtRR$d@Ol`% zGflq$R=-LnLTO0d#sYkz8XGYa+zO1Qj^sM<@jk%ova)AMh}GtYpA_m(_!o0hOZF!L zgnS7ht-v7vDb{A;69-Y%D|mINW$^_{b#>o|r+uM~bDNS9*Yd0LC-9SB^irw5+F9X?Y_A zh;_i?%SdO0;Omul5mRjD3!3-IDX(M0OF!vRty)b{yV;}neSgn)`h52nCc&XjK5rl_ zWV+j*bi|OZ6$NTb#O?F~@;BlMv)8W7e8JP_K#-@LH!C#?en{G({+9^alyn8}ll>~r z8utma&lNzbbrazb-F=WZps`b(TDe8Q4G`G=4>5qfE?_q{(!iXR00oAp!rv2qc}Xaz zk0+?3|8$>NR6Xb9%QsHv_ctFniXh~vy+EegmK7Ex(&QZF{LN&EmScHooV^mU!-E7q z=oyado7C8zI{b(Grbump$Hvq4^(f`1dQ(l8@2{G_UM)Ud_rY)R=alWWq)V;d{@`LGL zx2-xi?tj7V+)_aEuU{9j7+6^&?WNpvPLr9E_eT$|;<@1-M?SXk33DITKxQ~Pt!qA0 z=um-+43+9lQo7ws-oDX+9vHD6+{M262dGeoclx}#3K)cYjJI5QMxpJwu(_^s#PPT9 z6N?JgE=4Fm1E|Z@#9yxzESHSuAS75rgEgy9Qkv4XvtL%%EJJP5R@<@UM7MNSr@(Ur zckb{MedVwXgTk(oQw!KS!okQ?^w-+8}+o19TV55VVVfqQpWeZ`RwS5N)2P=^Vk% z$q|Ycu_o_U{M3y3NO_5J-Vy1W!ZD28Vi%+MfNA7Z{DhyH__{Q@J0Rw+t%{`o-}&ZOX@>=6QH|RgdvLFBK zSW?loG&IQB{ld1}jOtc*Ih>oBKYW+Z6PU(C{^N;t$ z;XN~lD?;|O*BH4Z#6Q1689$K`UQzJnN`Ke}hHCxmxBZvzu0Ix9Yae}p}?3V>@;wA&ejh-+^~}dR{LgsyfWM3JkoAH_r0}`@YvZivH{=HTU~c9 zp|zsc;z>pPNPIPb`|~5(oD~Gwv3s3X%NXyl8~-VSBYfdUzqGgBcYljJro+Njc>1a>NDINA`^z#vnz%#@$#SZtBZHF z+zBrCxgme|=+$k7)B7sCvG)nl!TJA~oO0Pe|JdfP-SffKsT3y3uA)ctQ43acM^Z;H9O8k^LE_ZBGt)k4!#Yo1G z79LJ|-wjoQL|53IkLl3jen-nQG}UiyGi#+p8{Wq#a*WA20zFEC^DztTRQ7dw1niLo z>Xp5FZ`(wnve8@a&GQ)?$R2Cw4?wvF3SJgI3O>qb~?pK?#8P>=z!M8Kk& zyeM27dkx3yubYcrqVkq78<%qXS^dPB3!x1uUCoY32E)56j&A(H;2r+{;*_IOGOaN{ zx&m5gqTI1kv(&+W*HpKH!0rluOY;fy(x9xY-R5W4WzZu(z|Ey}J01*@**b(2oFmqi z*)~X8jClO^Rkesr4C9DV=-CT97=Olk^<23*5JNo8ii>rJ?q37DT*Wwup4f**10Yoy{)M@tPBdQtLC2g9VrBrw*>! z>5_aH+|gg|0laVR(l6#WTJG_l@N2DKyf;9GmAz6T7?F{u2#ibLnRo^=rL)_2!68qK z;#}2Zj4s_xKpt+fp#`#Rm~AcDmT`GN)oT;Br%x7bGNmK3nNwb<=Nz|8dW<9^&Md7V zV5}=@@+{7Q*Cz&PP2z4kGLviXPg=A8fei1KvN!tm-Z^8}jXn69y?!|=z1D=slY)K? zlnGYhK(w>9b6dp;c4S3uuc5;ze{SW87scED`xa^aE+qNa@*Rshd}~#%OM`&J&rt9x z_^(6q{EMa|y_@|mMyrxTbzRm!FkNh5p$WI`j>l%H!7Ztp-Xo5*6QL#Z_+189E0W!@ zRA!ETxW(uibAf#&a)|m<%=ARnj23>mffhB|oQorbs>n`?_bYz{rA$?1Nwq)k^Uugv z)Ws26d}6rM?coRj;uM^#=~ zQmRZG>B*_}VC_VWuI7f*DI2O%f3~t;=Hf}LMnue2Gvl#ojWyH)nq9&!YCNI#bX80w1IyvJF1%=C3^oT z<{yQxa&^f4Sz_SmBcr7`?haoCNwJ0RKyw~5KlzEHNB>iV`t;tJmGy%2sWQl;xY>x% z03?LgQdVTOnBGugIC-O$(i4NDh=#l46mn9#_km$aeuIFDogRJ&on$!5$xHfmh{2_VS$M_Ro zph{|}&`I}nQT#8Ct;#Q0br|~j8OKqDu?qN*%_w$?*ok9gie$z|s!1VHC6YyKD`aFb zQ-E(K8CTKjj3 z1YO{sOdyR6XMRKEgd<7KSUF_JG)hbRvZOQRe!457f<-J_-OiP$IjH9JT zHQ7!BWphU~^ezAIZBCcx{QsE-Ea%dUh}J-+(Q;+C&51wd_9jlAau@4bVfO^LgZ=Wf zui@IUg))B8pB-0uz2IYqmx>b)%!~&cQkT5Bn71_3aP8ln|2)W6 ztx^4R^WCXP-!n@LXO59V&RB`1^iF<74j?cn6_>fkjZwUcv92`o9h)eliqjZzOO@hN zi1FVqLD-pw$P*@hCSCr0jQs>Iu?-cF(?;x`6#<|%HUX)=BaQ-=Y^?p_V(|zqd$roG z`i{eMb7KkOb2LLL%o{yUuTv5nIve&q8)>i{MX9#!OQY8A2ZEmTF6y}cV4a?Ki_O_b zO5N*mr}afz=vOtK4NI}LJuK+(H=ftIQj*M>Iv9EAR>Zm}3!&-PAA_y-fA7?P82mGX zafUGR&T4jzQKEa{#Wnv;j-G`74pD-mjlRYY`!vSXN3?UZT-pg{g!xZxx%#Pwy~MVM zy-Q8BymA@JRT2-0yMmOEs}c$pKh+}!7MhYl^o3-MeH9?1O2RHN<2ekB^m$oyM|q%l zzOo}T2cL8oZ;54I*2a0D&2SB2nKCw9(;eiswb8t8d;wO?Dn z#Cbl!Z}?xRF>~oTa$4Iv+9Rg^`UP?jKtjbvp8$$qqzbz$9ni4Dal5YcLu$0lB7=q5 zd?O1iTLdgjG%es-OEi|}xmG>R=5NJ}7ML<$ht=m$y2O-VB^aP2t^pv6DjX37z*f(? zN5>LW-hI)rtQ?<#eMh3?aXezQT`M?st|OA~8KA|Ts09lEN{|l6)cIz%elu3%Cnjq4 zR;4#|gP&ldR2?x>O#Bf4eGNJuC%!SGGq=&;&(1Ld1ysDo69euGl&ryWRs&{G+8J8C z&mDpdoz~?IPBUQ~r%7tnPW<8b@%D%2ACn6D+VgL}D43c^$0`%%9fm{IWrxQ7%nk48 z*Z0NL|iJ z4UIF54&0}-!!!`bS_@}bpmfd{-2w)Jl|=z_ePYh|=nM(*k~qagR3Dzu=$Vo5r&r1u z;;9M|8^*&q#2}RFrUQh)H=z@?vh`b5>mN3)6&_-*wMv2;ea}?2mt035(fM#Re||ui zWX+!^bUtBHRvV7H1pbG!vE2?dtY7nX^JAM=eEV~Y>Hg>JQ-CdT3I9t$h}eEW-Q&PL z-jnwpLNN*d>#buhL*jCklqs5EhildoY8DuaJ#84INb-F8fij47oU=K)k;ib|o_ZGQ;}E0_FrsRM}2*gG-bq^u)Z zL5L7FTc9BO!pk*UpSG;I|Lz5+kU4AMZL+F#yxK5EV~9D$;|`uv+0h=PuMU^7?O(5s zf$O{8`B)_R5XKmr`K&`yb51jMh0HW>!gJ+vjpSCUNu7woIWlIc)>sGF{qXo=xOEe~ z^AGkjYmXM2^ubC%_hUCfTu{ z_;7l14e_7ygK zSbu8s<+p8#GRQ1Ip8USf!(y2--Te3TyK`6GeMsm`l)wH)_Z>-(9ZI*%F zO*8#R3x0G?V7eG*W4|(Ib;sGh%Dyb~{@ExVVt#CC1~cqf?%lXi+BKotaxT0uTxy=L zM?pM_%U9A7H|9^JNu0p=Uz8H0{3?Q&91UUbw{Klf39pQ9qyofAv3b;(!cFOqZ{Z|X zSTsq)I2lnUT=Lt^$M^O=a&WJEj?%3(Y5jr~M?ctoZ2qx%@P}6ef%f|W&yj$6Bk~#( zN6%T=N7IE>x&z#rl||^rLZ$0`9Bab0n=j}d5JNsFbxMg@i7zG>v;Rd8|Er@_&Zca{ zfqx<-iwtI$@-SJ$9M5A-p?f`*ZZ=9%lP*2;Zq$LlJ#SPbuD@EzUx0t_p5xk^`9tZQ=0X)}CjxSak>eCnB=dbr#rX0kQ zkl$ejANAY@`LqI5tR(U5KdI%z^j_x475ll_`=!mz(Z3hHEBy3!*Vi>(BM~jNP$ICf z-r`gUC-Az@hRdu|hGwP9u))t_x%D;2$A6Q1VY6*G>qDS*FSal|W_%WbhNH2|qj|Ko z{MvZ+reVOTznGTWL6IP{|-tbC*prIRtV%^BuORP8z1D`<)Zf2cmW>BTXnE;#+_HPNx z^Z>rj(unirgg7{IE9$Lauloo?IY5o4j+o%BXI(Powkmy-Voo|5raeX&RyYf_V1<1^ z`#;=a$)~r}rSEBfw|jDKUSkCwUy^=z`NrK_8{}8XHW$dbn076uw5ypC?pb@hHH&S4 zWYBzc&{LNeo#*u9HyD+(YOaF)6E?V_goX<|sAG&Cm{bV6y;akrnpsxg=)2Hq>XC0mgzgE`RP{j~X=yqIM4xFWs+Ay<9go z$IjzH{n5f7Kj#c1cEg3AY#j6lPw#&|*cf!`PF?JN3+DC2pC>Hj{VdRD7JOSz_ysi} zb7|GO+gqD$pJT8sY_JzF@Y2u_B{YSo(3?!xL;8C<^_y)K>ziqa7z}`!KhzXVy*!{M zZd4L56o@5MaWa@20TME&*&s(NFomw}d$q%F&%+Xn|2cY|C1(m%9ZU-^Sh>?3pn@kU z##rV6xSM*EdVV9d0DZFJ=w8bHo%`(rz5c?t$nilY#Hdp?(duWW5EITIMQipoj6r9G z-1ZD#jkY%ZwC{qzI`I-l!KMd)Os^4sd`!+ozf=)~;<#6b2{wP7uCdToZF>05^k?Ni zD}6bycV8##J(REK!F^O*!63@kXPBjFXj-JeN6Nk@hQOOfQ?5*GAN2!c*zhI;mUJTs z5QJ2Ai$R_kBms}-!^?-~W6DSK2{7YRerQG5QT^e$T+!B7 z(7ux&os|H#TID5Xa59YW943ZK^TYtWIA_I^%mh+&%3aRbCBcQ~m*yfD?dN{sRq96Z zR@ePWKX3nKj4q4?sG}(01~7#gL@yx03>mdrHFeQb43$EBdZlZSl|Y{ ziF<5gYTiBknWg7FVW0eEta~-by3n6>?^ksWjC+@b?)3Vi*L0iBUm3ascqAYh&f5b- zNH>zSG$Mt|Pq}3wB#>3Bma4Q$ob*Y@2LIw)ziJ307m}_u-2x@{$RN| z@<|3Pt~rXQ@l&G2<#pE7wir*#qV6sYY4V!0N`ZkhfeQ{QBHlDtAG+s#W( zda@+5Uz@blwKLbWI(@fqa%yI6dosb@O}YTK#L}9M%B+1$=iJb4>^HJ31|r}Vz>dt_ zMH?rXS7qmom>-)+=(Y99#YNd>%se8QubAZPN%or@5F8;`jbJYkiK^$|Iovx7=NVG%4*pr`gv_MBiE8#zKE^>d(jj(}xI`uZ~DMuq5tO?vq<{pK=cD$vl@5 zPRV` zt7@UeHzC6fuy#R35{+$-S{-H=qKX%?hDMQUIIhBs>K)>Mf^hW_VnyzD#TT-D+{}#< zuI9N{1T;>E~@$>tlA zGS2u12bEe#7HI}qQ*OG3zixf+P3fuh8|Sh2;{DYE!%Z}yoRnB1D9b$R^T%%I*}RmKIdYdf_bJI~MU#Nc31kgn=4y#i8QTXjvazr3{0zY%0V0TEfIN7o{q1w! z%$j@p_YYcR-z}B>Zxc4J*%N*u0fVw&yuroCPp1`9KnWP1wX?*I!wfS;sL=x^!(O2z z2^?VR035MZx0>WPQE9X&MP_nHRI$9eg}p@$5Jz;UmnA7ln|V@LDX0_$zc#OdiF~xV z#}RyOwD{-fHMQqn+McSg?@{+|43?hX>4t4|OVzt8r_X!6^_cVH^rKt%Wc8+}l-lpwX-y+vRc@77GilyhJFqob^{E@lR=#8$7q)$8NrbsY|~r!~ukz_`JZ z$u8p?eA2w&amF@Yse9s3=}QHWM?(hJCSShybO6f+uT?{m*N0Afiv1RE)eo=F=|7ns z$_z1;;0SnRfPz;OpL$BML{0^q0OPjt3BFSbsec~q*d~?%V0<`BC16zlimhj&5~K4T@-$ZMn$z~i_MDy+BL9HqVRpbO^94JI0qx|PM=Lyeb|1#skB>8c zzxAmokSgVHnX=1m5d@on)>~(V>|b8yrf1y5(hw-*0oa8WW0NBbZlw@w^pW!K*a4o@ zd6LH_R^GI}ds?1uZ1DP-s8d~)Qj#+Aib{gshHca%}bUtjsR2y0tRnW2k3a^dm z$?+`J47wMvnaCg@#Sv4KguU)kOIYBaY-DfcFM)hGILaCnvQQGvc`H3rwKf;6=5a~$ ze1>a=vOTLP5i87o>yp)$ZKy8DJIeXrn|al@Kd0Zm+`hy$HC`H>*y2~*WqV~B=ggOx z5Ag1s2FJ<{Qpb%%DfP}65D>oUD)^XtC5sAKi4M2BpIk)C#leXQQlPKMzU_J63R{IV z?VO1*FA8tpt|1G~x>K9A@~!$sA~1xH9gm?&;a-+hK_`wWot&agA&c#I`CY-4Mk4XQ z?swh0x6pE&l)q|@UHL{bpv47L@WvmkZk}cO3tp3zAW_)d0&b9FL#*&{PXBNP{%7W&N3Ye|paNB@sxko3E zM~-7bJjHxLM^3!lZmoQ-^;5zu5&W$&20i!{CHdV2ts4X+SaO)a%<87A2Uq+aP0PV5 zy+h#0bA&Vdcr-kd<3O~GQ$obNGk)_HQtZYoolY?ittHI(H8W&=SYftj<@|sQFi_kB zryt_a8xSx&$ZMO{yiaO%C-Iz`3%k^&Rb+$8Qx~Ts~YZ&j^yr<(1xFf z+qA#BW%x<5zP&K!Re4BoT?E7Y)u^we_RsvpUTMc+m#$mww;r@)2ZTCScg*{9Srx1g zru|g!ua3ac=tmnFA}K8+MMx-66(uO_n6dEwFI@-$Yqz8>kzh;o>i@adePwLUmGSbM z4cAp?g4VNP_EET{e&1lS|Tia{H|z|2Sr8 z5lY&k7RF7J%J}480HUb~u%0FqZX$(}h_ zDsJKVRXzsOg^{F+Uxq+W5&oanqICdy+cZeW*8Q2_FPsXJ#@kuEl!aZAnieIt*;^0z zX^ZBv3CG2xjjORo=sAv9XO&hSr-e3{(Iji^jopM##apaO+)@#b2N;+MG4s;P@n>5EnXtmZS>Nn z?C&Z|{_1?XHZSyDnSo#J+buie?Gr9ujFnEV+}3A-brN`rpw4~dDitYAS3yLb&Hb3p z65ySN8DK)_QIT<|1xo}rrL4WY&v*!+1U7=z)3gCn4ppWtC7c01&p}HiW z`uAwn7iHD?2#;m&tE>aCtPBA`s_zz8dn6oipJvfJvcGO*CtZ}S?qE%Cl!NvQj9_-D zA1v62&jF}ADAWcKFE=F{fEH%*2}3D<0j{ljv=+uGf*J195}EKAWLws-XHbIvstGm=WB^W2$iiS3Y4TF71<}6 z#)Fp)CxgTcobxp4IBY0Y<9oy;o}?+xg?@3e?loNhS}d)$IQI3u$?Gp${(hf&E0<9u zBBdjwH07x@n9@X5hVqpGSm9Hu^4p17BW+6hy;wG&_6t@8`Cy@46Lt8 zmG1OsjDx2<3{L%vFpv%P->0|i5RnQMrvm?1zw-U5|I7~{I!I`Fx@B9n52ExCD=Xgx zt-i=g`Vlz~#{Z%qED#ZIs0}`986Q!DBrzdJy}?qPb5%r@tI6A#1PsSSMNpL78IOuR z#H3%)^rmR*pgLR}lkDmTiL_uwMBd5wikIbZQQ`$xV|Q*ANMmk5H>r)~YW6A9Z&^t=$2Li9HmkNaBIQN0pYMiw$C>+3VXA+B~29wGTK{hc3B`C#2SdFh@ zuE=1LTSeh<-=L~Sb>(%7>-_+>7*(n(l~3c%P~2iZ$zMW}s)_z0aK5Nz-P%L3k2je# zmgi6A8Tj#*y`JAeSc5HyOmcV96o)Z(+82ckfkuC*(Z3Nme*m~YOId&%N`7`} z=vUJ3RitOX9&5aDD)`UK$Edf9X}8Ch-wr?^PXv2z?ek*>aO>oQxoCnC(+{`jz|Z2P9j z4`u&6KJ@NKml*}~pg))s0 zCE)TkOmj9!V}b{aD%$^5CWt2{*KEtf8NPyar}=nM#& zrQQ01lJsedMh;&-T=F+PVkY&}r>!{e8lAEqT{8F5S}8;wZ)ToPjjZ-!*&a->Wt( zopWV$-rQS&VYRb00-1kzS1e)W5BsS z`S6*3oQG(+!8BCOr`lbisWOGYUN#SSXKz-A`~c%b;GJj~r?7r{Mf|VDi51H*g@N{BxO4na8=Tmm&+3fB@rpaSk zvbBO7FVcgfGi1|?LkuzG|CU&wB*g)MXWr9QM}hTvfHb{tQ~IvNeB9h!5O#>?(@1s{ zlRBnp9oofA+s{Ai7PJ0XJc!WD7^ywehE_9%clht&hwI)OJAn&i>$7n%}P4isaba z|3}f8_{FsUfBY=7?^Dw>(>m=_Dx+PSGp!@hln}y1Y9txyx>>(xmNHUIgtF93h3rMh za+ygHg^{=+?qm(=hPYkV?f%a1KbX&B&N-jY=k)kwreZqhCS0_4CrXg+~08GkDpPW2Tg^ zLC#Oerv_sMIOJJPg$4iBjov4t)?;Hs1@(fN_$Z#3EYdnD#s6k- zh^VG1i>NdE{5lFBbHz+b3XB--?}e)<@j6pEOkdFkJU~rG0@W@otV=J@;F&akR5WTb zmeO5)GDWO*HG%1Bchy)QPX$7)$Y*1Z^Uo!O^4#^79DWroHQ)E#T8mOmWHK}gR-fs< zK5BEBC@;uF_C%rgWKR49ToT@pUy(N^`8fA<=JZvL`20yCiLr+eyM1qlXz8kF;s317 z-8pcnc)ZULdyDGmzxm(7%0>?n%P$7E5_Fy3^76)ZM_4%wYZ)SS-jt>3N&Efs`oi|@ zzgf-+&bd$@yzI4Sx+{g}FGHcGzC+S_IE#c8vOA^y4~p7Vg9ZM*e+}48!Ar~sxwial z2cF~QG1Y^*K*e~bK&@r)J$r_vdZg6ZCGM)MIe|)8#5?cz?OOT)D*R4&7A)xtSg~Pm z0LevpNMrTkC>&=diyNK720`njW_9K3UEABCh=eTixp>|aMXZFUbdO^vx~ImS?SVW` zwN|+#)I?3V8a^_jN1LfTueO=DpScEE*Kh0CuhUR(kalY%x1fc-EgeOY@*U#pO03ep zvD|b2il&;kHIois{lZVEX5A~XD`<1^K(+vlp$YjJC`1^RZ|5MxveN4KVLd=bs>vnW z?33jF;S_GOOphZq7wAd)g=159*x7Y~1lF#;simhs-V==Ylr%-!W*bq!KEWlP5}}i5 z_z^h0QDr}Z(^ST3*Q;=#HE_pEAHx$h!L!WX$7-#bm+{5gn$qf}Jf(uu_Z^4}!}_(e z*?DKqKVBf=uFtVl$yO6CWt!yM zTU-_dH}d zNTG2mEn$D`h44*BT&fatngvR$qK7g9C){EXFaDnmtDhJXCJ?r%V@BuUH|`H^Um?Mm zL)4LtIl8E^el>F)^AESm4(4>tnp@V#La;w`-vGP&demP7vNT+dF2dE+u;|L`Z!uOI zK6U=JNS9h>bQUW#$bN>(q(ZaSkZ zes5M|X{|vP$J$XjiM~i36gR25>FAE*lYaDW<0^pqQdeE; z5dQn>CKnPV17V}R0xGG45F%7+;sQYsP^F_n4n6=;I+$hTb=4qlZ@@{MZ>O5bGQ%u| z>dH2wh8!+t>ZerXvtpCBbpiHv56a`sF3j~U2KqNWF?v{`&_;APF{X6fjA9g4ojWLU zsJvurnzVSs=NJdv-pgC~z5&N)J6<0@9PZ2+mLXatBU1!B#%cMvv^Y)E=MuWeg%(2x8jXYP*1U^aYITG%D5hw$ufEa8?C>3xle6arKp3OXSgCqw{DYmTLD0JQy85Noq6 z!MnOr%RfkCy+@9ORkIEO?f~9aZ=^f)!PN4*|IZBoeHB2#1-h8_LF}rNLL9vy-}ZQ? zaEq=~*f>aDRh+x{so=i-TNh@;)m-Mru(&PX1KDNhz?+vb4Oa$8(>0gsnVV3!G^A=> zn6BKWWqk7F4XK8I>p^-gOPf|)%30T@!bNHyr*EeJW17Z;9W!r2SK+da%eUKIpg@hDa7L zwAw{_*wCLVQPat?*W`|y`iA0WTxjz)&VW%P#J+Cb@;JKBZMD-l2HtjzVU_9#F0IN# zUz1}Oe^{Tg@$z78#IV;in|b@f`%II6_JXl$do|CZZ=?AY6!I^d-^6f8lnP;ytq;F+ zX)7F}u<)mS&`j-d;e>Y~1pO0eY(4;!n}MJxfi|^By!rkyfGPP*OPh#towQSi+v38e zTw}jty9hiW2*3hEGngNfIvm5h06=m6ICck`=OV&kcC~iW`0;u0=p52(1Aze`#O^yN z#F1h6{FN-lO*DLXkZAAIDI~DVq&U^a<7Ka`CPbtZ&Qb!e7{G5pXj9kZv(>taAfQFV zcTf-_CC9R#zttZMFX#6O66~u*HxQ!{Hw6{b)(PG^j-sRj57mk}2c8DuTqe1qi&sXmFO8gNKh9F~}DnM3@jkuzZ_` z8Jgk&#g)sD0>-F}wvcy&Dl^Sd`hjOQ?7?r;!{q&&9}|iZ$VQF?V)qe35ejw^5Ds|l!ME$W;JBpwe}xQJqx4$?YWr#v zfgb|1L}4as&^?uH7eJVdg0!1g;av?+dZKvHfrrgsLYfTp$@>Bit-5mh*U#ca9p9gx zX&!P_=-ELhqTdS`b9|Qn>zqem%wu_G(&ble^a|#l=j1-tK-6}xu?D)fja%jeAPyVq2bm$> zL8(fyMT^g!iD=wtgSV$;e5ndWaXky>y0Bb~FV#jT!l2SGs2$HDqfDR@W{M2t!0}jN z^r#Yx7L%P3Xv-lpF>*{v3navyJV+rU1IR~Nml!|9p?FCS@}+2;K!RRt%#c0q&?lVK zW?3glma%O~mu`moZ_%kEdm@TWUERZTFP@%DUm?Bm&!UmSYG#ZgsF!LBt;*yFJ;nP9 zRR;L-R}^)Xqpn(1PYrMQXR3ZT$kZt@4`Rx;i0Ec47zrayY7F3ohMz@gMwHxyl38NM zesxT<+H**qD$`8PfTjTI=r;8S0MU94$Dp{L1m1uRdvI&1ukuGp-)zHh@+N5l#m{8JY!vI)O!G@X!H;>cT_S zG&!y+OT!#eTT=HhNg6StV(3+30URsR_*r~0be_})|!cB?(RVg~lMOFy^^0?&qKrq}`I{fJx-8t1G&%0?+DbH~73h)&G1k4yqkS zUvw-^{HshyLuyxbm3m{F;Q?TOfF=Ow@W660)u4r?kz>q4$=2|3RgQ##LAu0fEoC}j zMPdB2Z6t|TuXv2@cM!7@iv?hFv zT$2F6OjI?k3-!o=yoc12jevav0GiQhL+bBiDhLy#B?6=xWExS1C^U35nloC1acZ3r z;7AaQC{U)~CMa~J*%Mi?eRFw27&v(&ILH0^&WA}k=gqWlcXl)+;>h!ktQWT+RK9-~O%XKN%DkSImB z1p!3ilihbk_|b=yh$r}r`TmVDFbQ-q!e+KwqSN^4;fQ`{G7J8IgT5?q$|j%0iiH&f*ueGO2f%>g{fy_)&QBQ9#%ujo+`*wqQs4Z6A4Wv ziGohD2RP?-n6)`9XH_TlQxeZ2{jH*N-fIveY@Er){j+w4{d=llTv@QWm6Q%LgvK?? zZ*Cp$=ACk0`@C*2eV)VlR6-MAr+`Go_XC>eEgmi1boY0cH@`2?gCP=JDB_H|0n7}Q z=P25mbb=yN(Fb+U&tIqWHPhed>`17c3=(yzd@%^%A$5pU`O6&CfS|AxeUG2?-f{l6 zsu#QCf!&Kjrp2Rk0{|=RWIGBl&7hqbWVG#f5Zz{WL6eK2a6rQ#%>hRNyJ|G57)Ozy zG}#hIvnu_Az$#1=Uk!E@TAyDquT_R6_)Fv=NPMGda5L-ZdVQfxQ zH|bHB74rBpe(L4rkH=QVTa&uaa4xmFPhBveOaM~Nx99#lBhz8@F$Zrnoa??Cx%1Ac zI(nJc$8DQ7H(C{19*5K0ra(8jUcpf%01gG#z$1#sC4z*=-6 zd6JUahFZzg7n)&K55`YvmHgd}F&8d}d>UutPgH5Qj zP*c)TvatE3%iVdC?!C-w!&m)OhnuxAsk7q->l&Zb0W1_Je6_8MUQ*x*@Md8NttpZh z%r^eJyAKmCERM~Ywe#((U2iVffi@Yk;gu$0?@;*o`*lS)w~;Z zulw${oBj+CU~^JjOvWlv@ec8p7iab_-uJw{^8BZmknDZIPs+*{$0Yy))}aX2Va8>p zvqJUjPnA{wYyP+@q78Omt6a+V$Zz+o)gJg77J#sbYk;JsGRXDWlQV55bkfCYyqobh5!Rf2+w_Nv^H9OeM2S}8C--civ${L!z`}`;OkWb44S5+@+#PRvY`^dpwp!`{Yo42oy+t~TogHUM}xBqUHr@30j9nn@>;GAwd*y{iB zw{HO#_I9qy>5kTFUOc7#8^J4G5boq3zC1rQNAn=<&u(?}yb&E3-HgYWz|ww2x(J{7 zPLUak?Wr2c_}2I~wL=>ytcHbMASnCHP4NNbpQjgmR|ntAhg&IQUH($765_XKDE`l2rV-p12ToEfO7?e2$7Ootcgd!?qWi&h?*tU#9Gvm zUE@KP@n^)*>w7w}OoefCeC3Hw-AY_+SCQFUxItWA?REM;`1i{k0S>ox?K%ZNc-}81 zqr}xZ|p+{EwPT z`t(i!hHHNSfR^$r@Z^$tDO2wSaDAl~qhUe?6FAaznGwZpCDD9E7JY*nW-%knEk+bi zpytJ`n5~Y32;>dEOFKwbEveh>N{&x0<22gQSYg7XUhn}e9>(LiM93&C5dD& zJMf_WPTs#PmOu=ARYx`oesr4h-HVz zS-}&PXC?m=V+4qqV5AP2$Cbw00OUv%ff5~?(NrG~sp}kwBw7lQB8|pUMq@QXnm5(F z0P>Q4i*@Z7!g`n+BWK@>_(S+Gd+MFIOLG^NT`iF~6q)zjWMwiwdn}F}6L>PCs-MKl zN@!Novy0oX0@ch&u!A`*pBqc_*IdYDvvdS8uIXRlv-}gc3HEVkj%mc4^6D@0!qv&2 zP3%Oei^S_laeHQPY230I&a*SUw1WPbS+|6TMK5uJo~mB0wKOs&$?b7vdyiuvNnlOg z227;2J0y!l>Y4rnqjB~cxaLt%0=bX{(9QkT2!Wa{G5CBFB%jj+d^R^uF+r5fEqDs9}87?swEklZ5?aK1(N zmCVY$*^CX{~zP{{S3ev|+2&stz}*4!aVz64P*|iB%pFgsc^B-By0ub$7&45)A<1 z)-1ND8L=N4h1gNROpXM2m7J}FhZ)}X?Y}4PQ0=j6Ati>p-cB8U@tGkBu)5BZuZ!T_%b@*NtHzXTQF4+&tdE%S=VSQxE}p%Tl4|E{BVO) zC&hl_>s$2sMxVhLGc+Nk#`>Qe#n6vi1H(FVD zHN9IleR${-%Mv304kdb4zR4TS;8euu3F}xT-ZU|uE;B+lqaFk~ViIAr(Pwgv-+mWL zL_ZJ_(hR%d9^g5pFR8Pd@WFc@Y`ySE#aFk91$$E|Y1v-vjbS8r$skM_&WegIZg=(P zXkJX$xKl~yjl)R!+!hy*o6+HVA%lZYPj%;S)`GyOHm4u!1Zy%m`H!Meq8pGO(BZUy z%KSHn&z@A6} z;TPj^=jdi7H66v-4lN-H+cY!hyyDh3<6Sz;O44Fkgr5orrHCut7 zD0@3fAYdhtD=c&EdV#G@O=lBcd>(Q0b){^dweieOyYlt7v2&HPxnY;wuE$-Glw;5yP=*YhoPFfx$F0nPJY>Q zy2uBZ(_p1E0Tcr(UKMBtNeT<(F#ds9$|uc^d&RAgdh+Bt&?;A7$`K%Vl6fWJ=YH_? z;lW?SZ{chkjErqc2vUh^6!12a_ z=V5o43H{jD5daDEjIczna*;o$#bh@#92LLdee}{}O9U`172>svVhLR&SW%*Kw40*- zy9WTKon8k3vR9xY8vra_alH;GUTT3jtQH^-1u0c1xOlkWq0We-*y2ec!TX zQ=j_#g+JTs&OPgmQU5f5cmp52Dj@If{++mI@!y9hS1f7#G5vDg_tu`+Q%#Sn1QhAl z4o(mP(nRBrZ2D1xubi?l>?pUSNll#y0DAbWMIY$ndDxuo!>Nc0Ha6aH6Pla&mQs+q z0s(NAnC9n@1WDyt;^nEf%EuuUklV>yZjq3C#(WlF+y2XT7yxiRj<0T$0K|SyITYA9 z^LQ0@#*)-H#wTaYO|?+3u1|kIzl2tseqeg$S6wYBd7oon%+!t-bGkPWbi zATyo@9c{33m4hn3OqkDU_j-U%KIK9P0-nzS2m@&_rtbP0Hx4He2SE7~5dRYp^r4rZ z@1pi07#VVc*=l_C+afTed(}9C`=02E;2DoPW(LJXc&HNQ$L)^q0GpmW1i|)w&*{S^ z8um{iYWpe_A7=`5hsyLd_WXIKu=(wWYmDRUucDpKx910x8L=tJ*Rd|L2z3;0iyW6l z4ZtBUV0OW)kjIPb9=y2u{)Ovpnv<03SbZ?(HLcsw%Q+5MTF?oZq3F%dv`4SD;3+^b zI^CB<7J)92WnNWct9&4ArV&S*Z4+7MUDX~e7dwVEGWs0V!2mw)dEfd+OtZ;B?(W!c za==lUh?qI%NEkC#_yXi%&|Mv$yGNuz2?__5TgZ7X0OA0EhEi#>=mQ9| zM@er3=slp*5W)xp=|gdBhss1Db9z4k=m&!)e&Vi+V>#|d$cQ=(gYxrWvZ$lX0a=q( z@=o0SmQPbvOLlY#Oj^$SC?N2KTVm#bWM!748UAowZ;k_zvi2@2uX!Xcr2 zKNKEhW)5Ic^Fu#c-Z~fj@f>Uy{xn$~tfu#gXGVfMrr^LhKy+yVwGAmFyv4UUBI^YJ zO@JH52)-Q-aZBlPv700|Odd-Ng50{$RBcC@=IGOSrM+(qrwVQEC&l)>nhw~eaz2<^ z1#`I$w6f(HTzrNrbK=8u{S^)Jo1Z#D)Ks_p6)SB>vo7!G+@_MJFQQJ8ouxMy-xM!l znY>T=*r*8dtHu+j<+)~KPT%p|;sl?WS?uzJuVRE$lmSIigqTF>eye%b%$DdoK{%E5 z5Q?Fg$&H?LL`q*{gBO{cMUVj3E*LX$i%qu1GJ30bU{Je2F8<=-OwbU>!!%aDlA90m zu%Z@8O!8vXaoBV!_=Y135#)h^3OJ%%O{76?9Y}&!lbF$w$VyJ$L@IASYVVOcH@iyY z)I)Wem*~l8zXcFW{Kd6sx$!l_oagUpb z>5|`ONgy^l#Hj8)PB|jOT!vP2PW?bI6pOXdmNjI2_!4Dw0CdF)>Hw2mgwl0;VpJ+` zeS3@3r2Y-3LZz|JfYPxD+bo^g?>4_M*u|5oyD|duGWORV{4ANi-salr$9FEruDe~^UEp@~vG3j9 z{JXtxTrK%sJtgiPAlqW3G<`VE_;lQL6-{`GD+g^iedH>{SHB$QPWbxu^`eaAQ&j1v z)(;4;Xa5oOZSx0Vp~llcD0baTY+(qY)y5kaBOtX0QB<7rMM4ZT>>vu^sCcw?Om-(c ziU~}Qb;oR(V<}i72&eF7&{WtkDIoZUus9ALH3^1LTVA^p2Tf{2T>9Eitx%d?8qZ1N z5;Jl3EBY?-5!ATo4bC>UId`*9-ZkMs`!7{H2MRuo+#4=kK3xnril873JT)_~d-1Cy zkA1nhQ$G8|xC>&w-ovwh4&T3NWHb+LtL@FvXXYrBwykR%i^Yy8C_LYS33|51kKcDp zKGZiI%Lv#M-91vQQoL^R7ACq(CjLDg<_!T8PPAr>)g6?T_)Am8zo|qg{T{zQ=<0-$ z+h%kOin+&C*~S_1y94q_$uzTgeqW5AZ~L@OJ8(R@t*Ck3=>=sMkVge#>qwIzGKQZ0 za`~di+qxbmy34q~^etbsJdu|-cd3Widti%G&#f)_pK1>~9hQ9Q(wi@JJNU)~qOoCZ z>M-Ku%$Omp`AFlmI=X2scgE13QKdr-M*2Og8QV$|dR0zNqwnP^S>9feMAZ6N*2xRhZ*nMQf?OQU1;d*|x+j#zhJ zpGjcMo%|J%Y5%cyg zfEZ`mGhf|N4%=LRGg`Zri{*ZEKJO7g4n}3<)+tec0w))rkxn}0K7P)&C)s&S>4KfD z>{Tvt?b!?0xks*Z?&l7N_f&v+>!7#e0XipW9cx7;@eB{=GJMHECFDlswrU zQio2SgR(^Jq0`_nRK4hr^P|^%+1x*SlH^N%Kj|t^*~04_nv`~$&lJgO^WG`7!+4fr zvcvu4dvimU&j?u_+P-ae$o6IH+<%&8|7Xfstt85gM3ISWHd&7&Z>ET?U!%o6O2=js z-7E~di*kT=M?~yAQ1=w00gFxADFET8ap7~YHOkT3+UV`|D}GXFc>+>o9lGXB>G^M- zaFx+I6TuDi&RUBmClp*%bObC3PKfO44I>>oz)x&T-8^fU#ca6w;^G#s&l#n@6So&5 zfzcpm>y6(W!>0BJch7AYeH+|UAIMD%)1-vCb}5f`gQdB}yl(Wk>#3#h+H$Yl-mhweRYXNdLdN#e+l&TZhizwE;1 z7-{_gU5~C4ft)yH^GB1z$sFQqfNoYg^(8%mU>BT1J;a)P&aoem{_x^KS&AKliz`h24O=U!Fe_(suNF@RBQQkN*0jWb4F_u?b%WD%4K? zu6-rm_U^~Ya{<4-XutpPgwTBA#7>p)=M6V=_$k{M=VE|Bicl;hx;>5i1;e7yuGshK ze#Ae|3qa0%diwmf4Ivo-WhldpIyUcd)WMdsv4P|N_&qNwj0NCU<7r0BtGVjnj94}e z0a-@ppjh0MfA@U9lGGzf6b?_%ocnZ2+*9v!7Mq{j8s{5w`iAoQHeDI1Q&)W&nHBtd zH$564D>jic5pqA^#_9b{xv9%}JWO)AHg&_)z;T-yL(z?anr`vY|7~d6zJccz!lEr% zc4d0bjPJ|ZF1z%pgagpm(Cc5PO*8)!IqAj^0q_s8AKA5!^2>yo;?@V!1^*9Jr{{0( ztK_L4E$Vx8hz?w|?KjyLV{8%Bc0hS9IyTT3E*du$Ub&Q{K;1h4#IoY?$;a1IcZOm0 z1V*(Xp|n2Ohh+e9TwAV4hRT#Mkw9x^Sv7V$IIqheI`&y+QsEpN6`3fy>~0N}5jHd= z@KRMzvo7v~1_fuSW0 z3(glTe|)uO?Yq0xRKMGz8w5_oI*1I#m9gEtr%rqjo!#wnqlCio>A!{!LM1P*_g++DWi+LHZxkGX1*D#6g`@kM0eWb&Neo)7-!lScOs+ zA_LS1$loIu-prr>-@iR$|L+L2l9NHWV88c~9SoVmtFCyd^e7?fi{uzC&d9n!jLZ0N zjO80PsATyG3~{s#vPSNTt3N(WYPsV=btt@-w$g9?kg28C@l#Fr>CK~0axr&3oLph- z6Sz+A`A5vOF-gb<_fT;>b81@>bVA zQ`|VdG#EKJWKmIlo9|wv1n7;>1VzaEv}4*qnd?>$c{8{(N-jrXW*7kN_{l0R|G2pwp8a2&R+#SR>F*75FGzSd(+wWIG}@NCW^dA;jPJzudfRM_l!M zjS=9EpI7b;-!)xar&2Yy8`d3{E<9Shzz1w{B4x!imyc(HUki#w!S37KvLeTelapeED_?Vc17G!aZ$Gi($o`6k zOJVzWPCI>MnO`8hOwA}LZfAs4S7#AC7hD@8gyLFWZF?c=x{C=nMp-TmcV5sIkGu3L zC}M<^UFjT@Czw!yVHV@%k3lLFN5~j$-(GTaH=;U9lq6d@jv8-Fk1SYM&rSg5*Rykm zhi0NtPAv^P9CC(arS{e2!#H~JX`?GpKSwI2OwyPa>{Rt$@Lp4Mr#ZI4Gc>8gI_BgZ zA4WklZ87)#seLb+dqa)9L&te+wcRa^OX=}T%iHvk)JH%`nq(|VV`WJCciuXcX@ ztNB$P9|cr|q|oP85C7uo4B#B9MGx2(=~yKSfaqvc{_MY@q(qv?zX;<9dZawOD-l>H zh@Wt4H_;M;RAh;Ourj7n8}F-PZ?^z5qL+s48+8{}ti?!QyMTX0_uOW+7-r8hbLxwl z7P|TT4ktHWDmt%cH#Fa5-{`ezahOsxv(>hu_a&paL+y9!S!&b<_kF4d{-Qz&&-toZ z)b%rqH9HUIQt)}_U7CA#*NocG=E}&acB^NUG@--*v-0U^X=voLnRQ!cA9y-kzDZ-( zr>BFm?QC+eG<`SkaWkn#?&@?ovjdIB5Jmkd_qP4hCR;*oW8yqMS-N>U4dB6~G$w_bn z=-C6^-Q^kfDc1ite|+JfW|CHdDvzhHCT!=$2enVE>FDt1R!^rThP?KsZ-{QF4XkHJ z9_b3-{K#LlhSh}OvugfGeH*Z{ulnXLV(O#Qd(WRLX$lSTq%ebmciKN{CTjdLM~9}dT1$Gv||0z%s1?&4FxuwADsVGWU4 zB)b~arF1MbgAqVI!=np;t4Fn7+Ov%J4S;vqlhCwgEr%=gPbvI@3I;&dcJmF_?+R_4 z{!hk6S%+ixY)G-j>U8wU=2SuAEYh%dvpa?;4@W#YRc`j>i-wnWyM~|bn|=1a?q;Yg zlfi$s)}gwDxbQVVoLc<-oS~v4_N4m+rL1Op6EARaT;;&iRkB$76Q2@RHl$J=v-QVp zx}MaY%s=p~d`zc$T_gEBUHMFk6-@z+)z~#9xiE{lz!FnDnWY@gPCQDLV}@=~S#A(? zb&g#e9uT42$UNrYDU;RSb+;JF0Bs#By(U5mr}Y@1nvO<1HjDApBLH8awhs^hE82A+ zQ3jCR$@Cn|W87h322BJ`m+MsIKbE5Su=lZovWJD694og*41iWZJ=@rq5T-jctUzP! zy%&eP{}9i;ll0xbx0n!A`cdY1#zUfXRA5sNC3gxFDHMBF0a4cU^C}Sh{f?w zkJSzKGq#+~J8)DzabziaXYTil2`;F8;6p<0<$o(y`^w%@MEcO|9OV9n1eB&g0Y?lb zT811t_;zf*9v8EAjl!ofLH|DoJj5sxT<5-)QG>LH8FprkxGUh4?GWHSp}QUN!jsd{ zthSF4S0v9C@1NvOne!bWdy-b=iZJ!XbrELw%wl(I+fM}t_%|5$WO_oCr{i1CMPrvg z!A+xJ?Zp_+S{4Cu9UwwzcNoOW4+;?&AHPt zve4nru92KiA(pT|H>5bdc;5VTciq(yKV#}_-27k{*KHlAEGgA9U+HY)B zrn*~$D3d1WAe6Elg+pp9UCjW|2|(CQCv;vkh_#WWsa+s@!|OAU!?pLYam!ZEHcN(NY)1l1C^iW#k0_1uzhn8W|`G=iQ&}DsbakpbR}o5I{p}>QsQ0r{p9|7})s&SdK#B z7BY}UczK4sUk}{zBrY­hvIr-OM0*h@EI zwfIW`9Lj?K9f1sk5c$KeF}lFg+!<-zpAXVrr0c$9>c04&vA;U_B_CwU$UDP*x?7Wb zVkS3kdQ14LF{QxG(o=3f1)&7^v4&-<0Wq^)#@wW*Od~Hi*$mPoL~N_uf;b&OIn_#j zJB!t?XJeGK1v?3HBZiTM%VfvGk&8KcHY46{qCU%^*8sJru*Lu@iHV`JFn(?qE|!A{ z*-S3T8mVM+^$-prju^QeSxyIy)V)f&DD}VaZJAmf=|;Tgs$D)e564B9<=D^+uBKvcP;7yII?d%M#BYAAFHMiLkq6(&U{lE-USg zU%tqwdpv=iO;hat(xenElLme0F4i@2W+{07p%?_Vd>gykLKp^2*ysTQAcFDUCJR|( zA=c{Y^|-5)lN#QjQ;msRZQvwj@g;@;{va_xPqq^i?<`{t>0vv;1`5dfp~qCTC*iW- zw*s&L*r-Tx%WU%Yu}mi=@1GYguv$Mr9odxTGgCt9eVMZTN`F{JCNnjVMSEKy@*3u{PwRAyG{IEE_^`_$1@>tzw&wV zs87k7Nq+?LFBzhl!M@C2Qf=orSZ4E=HQUv0y-?7cCjPKfa$L8?tw$sMM=R~l7o_TC z>;VKWkOhzFA%g5QZzMHG2I6FccY5B|9w!&b)KC^bU&)TP5THRP`!VQ)j6GICAQ%`N z;a*A>22*7gjWI`8@`v=SNgexr+=Vlyj7f8{blk%drXBXZ`-%Wfl3Jlk#;;HnAJz?aJbu#(nO+ z>w`akUUWukvnSQ>`%=F%(O0bMk7o(&^% z{xS}vWNQuVOo8xh$(rVEYlJdJ=BI$kTDFs6QGyG{cQ=32$no&ol87l0IO6Y5fBEml zH%Ou%_BV#q3?|QB^JK5z(+|g<=1iIOh7(z7;_JXHCCGCyq->?E9~kNy*uC`1m!+MB zYx%*;ZUpf6wb&mYbe02qo7fjgL7^0HDe3SpDLxxH{u3MTD%)LHR>&B$LVkq5dUc zqIwz!{Q zfbvva!h;)(e12bnL(fvhKaXk$(w`mw#Qj*c#x^uX?ZQI`ZMT@%8pF~psXN>+NMl3! z=LeRERh*>l&j3n!4(p8%_bEQiU0s&GAH{MtJJM9?|2*11Ks>bv^=g^=#eoarcXcJjJS9e_hp;xGhJ_KGNG&b~Ng|6=4xS?al7g&Oxc; zqoui1mo4WNl~72@7ek`+{5CcZCQw3e=3Z14cXLNB{ydSgrJK$(%#_%yTH{aYedo3! zBvcO2j=yHejc)HPyE8CUjNtD~JzlpDj>*rgQ7|Yy#Zyu0CPBpMX78{SZq%9`QT1&} zNCxf!{Yxx+`>*^Q1emZwp=%{LowY>ScfI)@`rHk?=C!DRzVZ8o6wVrc|GmX=ThDs3 zW<^Y0bI64k#N$_qt11muj)PS;S%$fn?5-N3+&;V*4qR(LXYC69vb9tBErZ-%O~ZuW z2Kf!b=8yZ-#kpIk+LCIixu@wIkus|^40IsF*#KPY zleepkz&kuIi||439~yZ(+8y295pSrVmIn3qWy=W}FsoKc?nil6E}9L9U5-(|uRYod z-OX9xHkLQM!Gg=N5R*{iEO^&n7kpwY(7249XTXR$e?lR%0>i3iJZ+5q+xANl_P+CG z&EFy0KOxQi@nvcM5x!07B~$7{=)wL_HtXRCn$XbJojs-7!57;%9h}fR z)R<3-Hmw->3uC+xSujasA+vT<-LvKjPlE~|YGoFyNY8F6q`ny7hR>$v8@O><&|Z}D zc;l(4n9a=8&7rD=>r}{#zu4wn4j>~~fvhGO69OFyWPJ*{&)g&fER7Rdrfd_ld@ToI4me3oN+5 zTkyJulIWJG;S3oeZB}oJn(kzJbULXIZLOt0G3`@jl04vnxoxTX>UD2%qFZrq0js^H zk+!c-XJul)o|k=19g*=i{%_lYsPl^kI@w}23uG_uj^22%bk*PemDlWhU2aAEzBX_t zD^dpg$l#{vru76Jx5lm^vBA8V8KdMJM9H9%U#qb_GJ52a$JyJ@ zyqLHO)*E5%j_5YU0ypMa27<6iAyur1yzydELF#&lI2 zPXN>cM6MAJ@hPPbqle)IMXo;0eZ3WS6Zl4@U65b{VBkn)V@3`%gt8ZGd2}T(az%9G z%4>hETFfu}_wg^ky;~nOG)Ve7yt{wphFhz{Z*4#H_wtMjHs|eIO?HIB59ETzgrqKT zdE4KgE{+FqATgXT`^_`G2Rhxu8|l6B?g2^D^Y%yE>m_|7+B8>SJQKN3aV5BBHC;8r zz*6(6gat^@(s)_6ImY#p{Fwoiaz$*@GbD;0g63c$M{mkOk;3KKh6K@y4!+K zm=v}?RExkVjpAZdArvf2J-1+cBzkPljAT$$Z@0}mK%VZ$bmE2kvJ7IX-{K4M&xioh zz)q`F$vPv1QmCwS5$pddy3e*I(zgxZlRjw>nsg1lW26^_Pz+TKQba%qMLKLyR1}3o zT4)lAN)ZfIP{1HnDTXRVG$0@%`VUwZP~3=R(N%o%yqXU%$1%C@E9dn)1r5wHE!(qi zdl>QJf|6ncQZxX-o5GD?#3>dMKxhO|?L3Q2?5`U81ZU(K2k3mcdh8)wZP)SevW#(3 z4sj^N|CF*%=?P|n)M8%BD65v1aVGS6Naq?Aa%5KkZ4iwN*D#j9DAY_S6c4AD> zD2r$oTkRw!W3TCtz|kqe@zTr4J#d0LnNubXUu1}-&CNbOmAgiDgeQ^zEx-ku80E+! z98XPY&&nUoHQ0TTM>Q~XBm-iBq*<0qW|G}7K+xt(z{M9w0$o5NZ}t{8sjd)GcV#0m z-U<&MJ&Z1)G1GA!RH&3KmB9p@L+DIGuGh({MiUmr?rnp_XEH3+B8xm}T#`r7r-~xgEpjkB$B$c@(M_m`J)v_?g&KA?Z7{8RPWJ zR7bU?UvKOnaDeG#+%0!%%HW(M(GD~_z&Y*IFjOZ2tu2oRZcDN+Bp$l6wd?_+_w$R( z)YSp`3*m8xe>fyw+uhlS+)@Q>!or#-01v~M}!+83SoYe*l|Nrc1O!-nF-@YJ+ z0c)sPs-jr#fwF<4km@u4Cp;B;B3voW#JtULmSQjXZCP?!Rh4>DMjZ)drNIM)?kG!ix zFbrjov4z_VKvMwUFTE-r0i>zK4nl6*k&*k^cHpkDDwsYw@0)zByfvc8#H!i*&PLgx zL^GRzlo`>M9ygGC`7r0^y|E`C$m|_$+n<#uI&j z>@et`%jBLN6M{Ig&mF1Bg-`>-p;eFbiZjC?RphaB$>rHlZGrTZT_$jCqTJJ2SdYNb zG9-K%juzA`dz}5z;XFsY#7!K@Fb>R1LuH5dnNt;ZT2V?wmA!VKwy3s6&*^VOZh zNAnw>*oITUa?{I%fcz2_S99agg**a{mqz3PnZe&7;)e)~GG8(jG0M<%JWaN){I|;T za@ZB|(p%rnG>Rbj#+#8_*+;Yqd$vbzrO6Uz@kBdZ2=J!5_-c@^s_VnZS4ZL!sCBBw zw>|!%mpHBX5xa#xYA&;zsYS7IF#HPSn&CR=O&Zp)O2}R=uf`k$n1@zI<-N|nPhh7J z!^YABa2*V&vajoDG+_wo#X$yeU0*Kv3sCoYP}mk>yY~-*Wn-E#OG}#Di-)4+b9Q+9@Z} znPRbnVgWs^*ozDl`@rfRezugM>gfF4*5`eh)V7Vy=h!ts^egOk%!eXN60+@j_ z6m0#^FmYfb@Cds*yrySVE82r@KNqVwv(~CT?zgb=Yfv;|8|u4B%CaVfgvJ46#x4TQ zDOH!J5EWsaHKtSWmI6pGG9(&ENRNNMOM3S>%Euo@>bjEw{?%42(E~GKWiRKb^+0X1 z29$aA;GnusTI;`$9yX@=Aaq${cbS{3@mz;Gb(v$3iFanPg9zDf(q$P#Q5c@4+Fial zqEYi3?JhXG8O|L_zj~_Ea|uba-7&4k!$r`BsI^=aZp3ZZ#GRU$Ju>7oiv7s7!&H%FReA!Z{kc>~d&RAde1 zl9dSr&w$jDF)M$AIs7c$kQC;4V$U^X#$w9R>mhm^Xig(E;a*5VWa{2~YJbkjqSDoZ zy46CoWM9*wl8U+2m$ZV^O4~EHXUZS1Ag+3s1<>7 zN8$U5#a2ldx~cSP0V<7%>7?RkNZ=un5lBbLkO3Ig1VgtZh?$$}ccGr>pcgE`%VxL7A_JuBxmxg={4Tb<=^65&Y< zlSj2{v`Omi8i8Ntg2&86whH1L4W%uF9bv<2`E>t-`Qlm^riDqD1Q0hqxNaI)MGZ`( zfGK=1i7$#USalME{vS}|pQxrrbW!mid1CoHv>+>b5QHR(l|#m5qiDXz3gk04I!^o0 zkl`fczDMjl9>^AeqQ>}a0L>&8+Ag5ONTRJ$uoJ*#2@Cb0MbC;*>QSs{F0Au-g&r3s zuL1k)bjC}|*kGN-W}Q(U_1i~@wcJu&4NK~MS53H^93AFPhbi-;?t$koT;v%~V zuu%aN>18v=kW2z*hXiT8J%XMl7*@)W5e zrvs^dzC!m!Og~x~WD<-#@!w>#gCF@lGGpuysdUW7mT}LPaj=bxVsm9T=3nI8&p=op zs<@8!?kbk9)$-#f?cG*9;T%{U4L>cU=L^q&x*GA6iak96iB}EWIC%4nrcMR{%;eQ% zkYW5Ys2T!(fred@pijd~KeENF~I0P8bI?% zDJg4UHc(zojSlCdPqL%yc(?*WoxV}?c{VD#5T$Baq&|`2K55&PUC}Q}sgf|=>>XT! zQN5wg4Wp*|S2?K(g*iQ>F#ZcMkreuobE65AU1I0VmtkYF5H1b!H>FRGn`#yf11ZC< zaJD>m1b`C&LlJ(*yOr`F4}nrnC!m8CAkL-fU(38N2~bxqk&(rr^Sb88Jx#@ zV~X{{@xv}R&d;R2iViIwFOE{EeDBxVD0g<1i=QiNxXy;3*|D;t7-=q_z1uSc;DHmL z))nMAiY7vEf!LTDeWjC+auYzlQeakWSRT9aoBZ`uYFi4A&E)1Eri(aZn41uhN^a@` z?p3kVr?pGpubp4>1_Q}J?s45m$7t%uOo&cW=MKU;GuRw~__UyGgbmvM*PcV{n8Jc# z+{{WET#A4h{Mq4=+-N+Pp>EH+jzB@P)7tuDc=VE1^YJPN}hDId9K)R5}wDnW3<$l5}A@k z0ISGlr6lCS6mEorqW1QCR-j^~P#N1}QG@}l?27v@Is;x-th=32C3s&EipN~Tb+U1$ zp7;rBy`I^_Xlp-9ouf%Bsny>&#;?r=Tv z{VmljHW-|TD5jlOq=ApAn4|uk6ABf7DLp#1|Iuo4#XIYk+46xx0y;-1p0Ye(T8s0% zfSn{=n5T*r_)a~a?ed!(FJd+DDcF7ic9T=t!^5tzCmhOJ`#9Kg655Lcxt1`A+zNXY zem4Lre`jU#&ja8fd1{t+WpW!msXf>49=I5Z+3Swqvwyhm1Nw%)_x)I~hJcy#jhNOR zF_yXj3kH+eUNAzG0S(K-bVv%zG#h%rF)MI)tXU_BZ0 zbLa8XDE`x(U;Pf;813Vathp>*+G0-WK&}B4%;)EFLG1JZAJmFS#=fI>>G{me^YIIe zdU|ebrIKvP{xCZi?5#>{S@V)++$>su^0RA|BE#22`6TL%LKE{PS6!m4RzjAG>dd28 zq2}y=pP%==Ic9MG)k#b`zcGa!xHjr7yxJJQe!qc$KXw9Jrrf4PgapRvj{aTx+n1&{ zbq}O2%pN=4&l6t*^3%C}*=*Gd!1K#=>>3#hzCed^A(u#)0`5~fyIJjibA~e)IA3YJeUWw}9mrvV8b0HgOL3vt){lv5+j*MN)m#UiHQl6fw6 zla1}BmoyN-o}4?E31C9&tPclWNkS+Rym$JaM$zijUOfGuzW`;!$L>JYI8b#G6u;Xh zk%78MR4Vu_j=_&7WwvZ4Z}BzSA3PTI701TD~u^j2d+^(Tkx73n5S-M5TG!QP7-k7NN zJ#F=O5}S+y4hx|>{pyx*q#^%Zg$K6v(aIYE;Rq^F1iJ805{56NQd%VNH++7w1j1d) zySvrT82s1!aMcfaosgsZKTI!Y_1@tRn|y39F)o}PrvI;$Q^uRFON#wZv@x|35>(Vv z4*vZj_&9}IMVK2s^vd6Kp20v(+z|c#KX~!P8yD64?9$^BU9pe6khB$iz&y&3{wc<$ zW}n+@l?V6GY>`=yjN3kYi@%}!1gm>x;@!>$_RLdW?O13sU_|Gk{!^-GvlPH>7FYcr z73{3x1|9_vP#3A6ukT(-dCZT!_Uh;cOKza6(jYtUhFowrd5wWj4hLhB(455$oln>{ zu~}jZc6Nd1#QR|J^wfhw#g;t z)ctSSjr`5~z=g-hJM(8aXBIj5kK9ymCc`;N3^9wolKIP<0HO zbAFU7YqRG^o+yWPRL$v6(eIr*XWU-2giVJ%Y4(}+I~2RvapuiLvA=n|T?P`d(H?Q+ zuX)_@wo?=9_3mY~^`*``$M&9RF^qcP^==#v#mTADxI}$sJ8mfFK#bW(1_b+d>;&&! znW=xb<0VOr9Cna>DniQ0ktM;gy&!22GFR%MLSbUgW29L|EB2Wf35?^o)XKx@j<)s& zgA>O7si+w_0*%Rpaq;>V4C2|imtDbH7IXoVA?`Jn*K`V9VtC?i^UsZUxw5C-y}6|- zU%VNWPWfxR38`DjRVHzuf_pK0MNBnDwKcV^rUoOa@GhoAT}N!3eNiafCm$u@de!$q zeVJd_gL;Ls=z9|iJ28o~no2iQtfIB6bF6fmQppxW82uZw>bDzdkit7Itrj6L2+wG$ z1^QlhO8>#zB9GR4A$b58&I_iLP#pKYePpD%Z{*Qaj2hg?X3lvy-%-gWU1AKH9ouey z`(vK*kd5`0^O2gtOy$_}?1o()<6({{=xX_b_oNwwjfr%L+I zk5Xv$Uq0XV7L^2euU~_r+_^phigQgBm8_rHjX@e8o67wX7Xi4sS@TfdEiN$ur> z)vmJi`3HOaCS%oQHZU**Fn+oI?A%gPW&t%M!XGY?Wt}hWkimk zuF>G4nw4O&VT#p=1U`o4pg7xzP`Q|8QQ$%gi`!V-b)W%2`0AaP+TrsgzK$5JB{X6} zMN8#uQj6ToibR3%__-b!D&=Zc2P2WAu?Qi!jR#y+NXXU9sohl@IYQiFWGN*w0AF|&bXAGIWX{2kS$KNmQ~8ptDR(2J+M$%k@kwV+Bga~t$zB&$ij zr!;zW>lF5mpQ%r6sa6?fu$DrosrsfrLi`xDVEa*}1$6LLUf#&C(|1su>B-RA$U!-n z3=#Z^Fq)hh>)@IlKHAqoaf>V0{(X>;W01Z_-9f|taW0`X)K@K5OYH0%RGcijxLZ+~ z7+>)OtY}uoJjUO<&}2@7huWh6(ds4*D$nPkY)EP1G$Kr%!vJ;*0bHw(bhLW(gzpiR1Grx0?L3;L+zv5-uvVLjH)70n1KN4-a{AS(bcPdziw8V^D zJ=<2i{(9>;uyw%o@Qn?xpD9Nlex9dj8n4@a+qi843ny6gsEBoKo@~oe9+Bh;)aJY= z2x-pgcqh(XvyTu$8k=P4ZQg4)H$-r{sIRj7+LZ(onE37$sO$qFOzuA(%0UPbr}NU} zmMxIwr^c~?fW*H^w^faVpRqK-WrbxPd_Thu?=F0+6%>Z>%yckNs%E`;50QW|9h5Le z@Zv2w)y=Yax8MsBrO-i?iK$G*|NebdDD;YP$e(wQKNErUhAAxoS(+7A$5@8$Y5vmXIu9IJV_)g3cBp5)>s_S++T?87wcsvPf6@PKZ0H2c5b|j^F(tpvnD!>;q;=e3{+5*_>^`1C#ph zOVK}Hc^x;1$)~!UkMh?0YNV~T^I`sG?+QLqE?|WfZgIrKW!)uac=i?b_EDVyy?ZB? z6e>PB>=_rO!r8OpP8;sw(A*8rTO9la+GC=h@(5YKS>iC~%$}~W+-MeT_i7U5uAOU+ z<>k5O4*#Ap+PwqGtA#nNyB(oCD`es#wBINmJvU}P;Bh7*?7)W+#syN51g>7T_r&3A6!f!_1*m%1iX9X-`<0Vedo+D+no zW$o8sXJ;YmP{@m5_#|J_E;0?~4AFFfPk?%v37xv1VPr7v@M)-SDNsIj2PTnqUJjKY zhp3gZ;YeB4I6Sa)9jKE+3X=!^PJXb~ZqtO#{O})A_pQxI5ZU_0hAU-DBOteo4ZplG zN|f631!8pVr*!F2CMc9vVz>P$-si$LS8X&uXm#~Ji>nr)SBuBRwp@{=+Isd46s^?ZNN2EjZ=#LiWBZME`>LF}tgmA;5Z8T+t?cw$eWhoUX4qt>}6-nnNiZNw#3w}z0#Deb_9je{3e`+G7X z*D~Xh_AY6w97WU1iu-10Qw`ZvYxA*psPTmD+QJ0rgO!% zO5?73N^M)=P^D4_>*a^r66iah3vL$G4SrBaUcMQyo@wLOhq-vSLD#uzF*C#1LI-*+ zySLA^xCg{#MRGF+SmP5-dc(qWs^jA$`z5C#__5`@xhC9I>8zM>WMFnCm7nL&MaGw+ z&Jwduk_N6TO$RC=)YZGx^}8x?l#p~vXx)HLz!+`Z-r((!rk87Q&)C+pdxcx~(n?*w zB=jDUaFS_+D2%&`mQ7pTADyW)KX~ErKhf$xe-!W(Z=d9-54YFZyZ1RYlY=D2Yq@T{ zCKVk`9g|ZQ*%cQ_>{m}yCS$dWTx?w+%I|^nv5fE?gh^&PN!LYF#>F{9MbbT7dz+@~0hYSN|XyC&j zHpQuNOi!&hPS5-_OxzXSt|Rs2YXVb_4rzJp+ETL5GWLlDcent^uB_S9Jg5-l<~B6^ z2=T%=PPoU#8iO(EX@_7i5X|R*k+l7!>^u7@&+4>`RHMdo2LK%OcP<6YC>c3&gz9O2 z$Yp1)h{u;yE$SEl=u9D{LuZgrZ=hgC1FFq@3Sr>-2z5@s%L&-eyJP0GJyd7nK`p?9Nhzo2LqhHt1DJhoxy4oosH}hOp8EB{he8p==^b` z{tUmq1*1x(rvz%e4B|-7)BkXJ+*xOjam%xetn;N==dEmJO@_+V&FYNJ+KkQcqS0@L zmviMeNkOwV7e(`)(bKoDcDv(wA{#7xY^nYdzDoJluxdpdr9lg?={JKxu{-Z%H)eBQk8 zg^F?u>eU_NDa(8Feo5KMea02Ogg7!e=n~s@7#U9;Vfl}@dpy1OVwpo7&$!{3_vT5t za4OL`i|_r4LpyQp6|(m$@`X1$+Bv84)eFwI7mr{((^LDsI$B=S=r6mKGaclRj)K6$ukINPt9<5`beuM6Locy$7o3>7%8RUogxQ63_XW82MJx&+*KHUrueStpLkxT0d zhVPcAPMahMZ(_WtiPRIg!&%tFuV8PGS`TOmi-NgVg7|HL(xWt@xCgWi~r2uvsN8G@{RJ9 KSHb`Q>;D6ktN}y- From 58e01eabf64a4c9f297bb7ba94c0db668c960d9e Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Wed, 15 Jul 2020 18:07:53 -0700 Subject: [PATCH 0572/2522] Pass the Env* pointer to ExitCountdown instead of OpKernelContext* as when it executes ExitCountdown on another thread, the OpKernelContext is already not alive when it executes, therefore it should pass the env directly instead of OpKernelContext PiperOrigin-RevId: 321476600 Change-Id: I79410d0b3b8dd96b366a7a413c8b211950b6c2b0 --- tensorflow/core/tpu/kernels/tpu_compile_op_common.cc | 7 ++++--- tensorflow/core/tpu/kernels/tpu_compile_op_common.h | 3 +-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc index 21cf6d68301..f9e3507f2d8 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_common.cc @@ -371,11 +371,11 @@ Status TpuCompileOpKernelCommon::CompileTFFunctionToHlo( } /* static */ void TpuCompileOpKernelCommon::ExitCountdown( - OpKernelContext* ctx, std::shared_ptr> done) { + Env* env, std::shared_ptr> done) { const int kSleepSeconds = 300; LOG(INFO) << "TpuCompileOp was cancelled. Sleeping for " << kSleepSeconds << " seconds to give time for TPUCompileOp to finished."; - ctx->env()->SleepForMicroseconds(kSleepSeconds * 1000000); + env->SleepForMicroseconds(kSleepSeconds * 1000000); if (done->load()) { // If the TPUCompileOp has finished, then terminate peacefully. return; @@ -562,7 +562,8 @@ void TpuCompileOpKernelCommon::Compute(OpKernelContext* ctx) { // Sleep and exit in another thread so the cancellation manager can // continue running callbacks. - ctx->env()->SchedClosure([ctx, done]() { ExitCountdown(ctx, done); }); + Env* env = ctx->env(); + env->SchedClosure([env, done]() { ExitCountdown(env, done); }); }); // If the RPC was cancelled before we registered the cancellation callback, diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_common.h b/tensorflow/core/tpu/kernels/tpu_compile_op_common.h index 567d5973226..1bbe698cb37 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_common.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_common.h @@ -120,8 +120,7 @@ class TpuCompileOpKernelCommon { // Sleeps for `kSleepSeconds` seconds to give time for TPUCompileOp to finish // before terminating peacefully. - static void ExitCountdown(OpKernelContext* ctx, - std::shared_ptr> done); + static void ExitCountdown(Env* env, std::shared_ptr> done); // Converts the `dynamic_shapes` arguments to the compile operator into // TensorShapes. From 55dc4ab4f8f4d74191669a878f5ca0077806981d Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Wed, 15 Jul 2020 18:13:09 -0700 Subject: [PATCH 0573/2522] Fix nested namespace usage Nested namespaces require C++17 PiperOrigin-RevId: 321477241 Change-Id: If0c2ca3794646434bc61bf71b7fbba41aa751fa7 --- tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.cc index 3e99f1e162b..ffcc9f7dd4f 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.cc @@ -15,6 +15,8 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.h" -namespace mlir::TF { +namespace mlir { +namespace TF { #include "tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.cc.inc" -} // namespace mlir::TF +} // namespace TF +} // namespace mlir From fd535403c0119a4649e15edb898e85c0dc706eda Mon Sep 17 00:00:00 2001 From: Berkin Ilbeyi Date: Wed, 15 Jul 2020 18:16:13 -0700 Subject: [PATCH 0574/2522] [XLA] Implement a more sophisticated prefetch interval ordering. We previously used latest-to-earliest order in the prefetch interval picker. This can result in prefetches to start unnecessarily late. With this CL, a preferred prefetch interval is specified and the prefetch interval picker starts at this preferred interval and returns alternating ascending and descending indices. This can help cases where we spend a long time in copy-dones before a while loop (3% improvement in b/161249728). PiperOrigin-RevId: 321477583 Change-Id: Icd88c306c5f2bf7cd55e693fc7d040d93f05b70d --- .../xla/service/memory_space_assignment.cc | 145 ++++++++---- .../xla/service/memory_space_assignment.h | 36 ++- .../service/memory_space_assignment_test.cc | 216 +++++++++++++++++- 3 files changed, 345 insertions(+), 52 deletions(-) diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.cc b/tensorflow/compiler/xla/service/memory_space_assignment.cc index 66100358d97..874200e7692 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment.cc +++ b/tensorflow/compiler/xla/service/memory_space_assignment.cc @@ -199,6 +199,12 @@ float MemorySpaceAssignmentCostAnalysis::GetInstructionElapsedDueToMemory( } float MemorySpaceAssignmentCostAnalysis::GetInstructionElapsed( + const HloInstruction& instruction) const { + return std::max(GetInstructionElapsedDueToCompute(instruction), + GetInstructionElapsedDueToMemory(instruction)); +} + +float MemorySpaceAssignmentCostAnalysis::GetInstructionElapsedInAlternateMemory( const HloInstruction& instruction, absl::optional operand_in_alternate_mem, bool output_in_alternate_mem) const { @@ -258,12 +264,15 @@ std::string InstructionCountPrefetchIntervalPicker::ToNoCopyDebugString( CostAnalysisPrefetchIntervalPicker::CostAnalysisPrefetchIntervalPicker( const MemorySpaceAssignmentCostAnalysis& cost_analysis, float min_async_copy_to_overlap_ratio, - float max_async_copy_to_overlap_ratio) + float max_async_copy_to_overlap_ratio, + float preferred_async_copy_to_overlap_ratio) : while_nest_level_( cost_analysis.hlo_live_range().instruction_schedule().size(), 0), cost_analysis_(cost_analysis), min_async_copy_to_overlap_ratio_(min_async_copy_to_overlap_ratio), - max_async_copy_to_overlap_ratio_(max_async_copy_to_overlap_ratio) { + max_async_copy_to_overlap_ratio_(max_async_copy_to_overlap_ratio), + preferred_async_copy_to_overlap_ratio_( + preferred_async_copy_to_overlap_ratio) { instruction_schedule_ = &cost_analysis_.hlo_live_range().instruction_schedule(); @@ -281,7 +290,7 @@ CostAnalysisPrefetchIntervalPicker::CostAnalysisPrefetchIntervalPicker( instruction->opcode() == HloOpcode::kConditional) { continue; } - float elapsed_time = cost_analysis_.cost_analysis().optimal_seconds( + float elapsed_time = cost_analysis_.GetInstructionElapsed( *instruction_and_logical_time.first); int64 logical_time = instruction_and_logical_time.second; if (logical_time >= instructions_elapsed_time.size()) { @@ -355,52 +364,107 @@ void CostAnalysisPrefetchIntervalPicker::Begin(const HloUse& use, async_copy_elapsed_ = cost_analysis_.GetAsyncCopyElapsed(shape); // Estimate the time we would save by having this op in alternate memory. float elapsed_time = cost_analysis_.GetInstructionElapsed(*use.instruction); - float elapsed_time_in_alternate_mem = cost_analysis_.GetInstructionElapsed( - *use.instruction, use.operand_number); + float elapsed_time_in_alternate_mem = + cost_analysis_.GetInstructionElapsedInAlternateMemory( + *use.instruction, use.operand_number, + /*output_in_alternate_mem=*/false); inst_elapsed_reduction_ = elapsed_time - elapsed_time_in_alternate_mem; end_logical_time_ = end_time; - earliest_start_logical_time_ = start_time; - int end_nest_level = while_nest_level_[end_time]; - // Find the latest time we're allowed to start prefetching. If the start and - // end nest levels differe look for an earlier prefetch start. - for (current_logical_prefetch_time_ = end_time - 1; - current_logical_prefetch_time_ > start_time && - (while_nest_level_[current_logical_prefetch_time_] != end_nest_level || - min_async_copy_to_overlap_ratio_ * async_copy_elapsed_ > - GetLogicalIntervalElapsed(current_logical_prefetch_time_, - end_logical_time_) + - inst_elapsed_reduction_); - --current_logical_prefetch_time_) { + int end_nest_level = while_nest_level_[end_logical_time_]; + + // Find the latest time we're allowed to start prefetching. + float min_interval = min_async_copy_to_overlap_ratio_ * async_copy_elapsed_; + for (latest_prefetch_time_ = end_logical_time_ - 1; + latest_prefetch_time_ >= start_time && + (while_nest_level_[latest_prefetch_time_] != end_nest_level || + min_interval > GetLogicalIntervalElapsed(latest_prefetch_time_, + end_logical_time_) + + inst_elapsed_reduction_); + --latest_prefetch_time_) { } + + // Find the earliest time we're allowed to start prefetching. + float max_interval = max_async_copy_to_overlap_ratio_ * + max_overlap_multiplier_ * async_copy_elapsed_; + for (earliest_prefetch_time_ = start_time; + earliest_prefetch_time_ <= end_logical_time_ && + (while_nest_level_[earliest_prefetch_time_] != end_nest_level || + max_interval < GetLogicalIntervalElapsed(earliest_prefetch_time_, + end_logical_time_)); + ++earliest_prefetch_time_) { + } + if (earliest_prefetch_time_ > latest_prefetch_time_) { + // There is no available prefetch interval for the given start and end + // times. Set the iterators accordingly to ensure Done() returns true. + increasing_prefetch_time_iterator_ = earliest_prefetch_time_; + decreasing_prefetch_time_iterator_ = latest_prefetch_time_; + CHECK(Done()); + return; + } + + // Between the earliest and latest prefetch interval, find the interval + // closest to the preferred interval and start iterating from there. + int64 starting_prefetch_time = earliest_prefetch_time_; + float preferred_interval = + preferred_async_copy_to_overlap_ratio_ * async_copy_elapsed_; + float best_interval = + GetLogicalIntervalElapsed(earliest_prefetch_time_, end_logical_time_); + for (int64 prefetch_time = earliest_prefetch_time_ + 1; + prefetch_time <= latest_prefetch_time_; ++prefetch_time) { + float interval = + GetLogicalIntervalElapsed(prefetch_time, end_logical_time_); + if (while_nest_level_[prefetch_time] == end_nest_level && + std::abs(preferred_interval - interval) < + std::abs(preferred_interval - best_interval)) { + best_interval = interval; + starting_prefetch_time = prefetch_time; + } + } + VLOG(4) << "Interval min/max/preferred = " << min_interval << " " + << max_interval << " " << preferred_interval + << " prefetch time earliest/latest/starting = " + << earliest_prefetch_time_ << " " << latest_prefetch_time_ << " " + << starting_prefetch_time; + + increasing_prefetch_time_iterator_ = starting_prefetch_time; + decreasing_prefetch_time_iterator_ = starting_prefetch_time; + using_increasing_prefetch_time_iterator_ = true; + // Since both iterators start at the same position, call Next() once to + // advance one of the iterators. + Next(); } int64 CostAnalysisPrefetchIntervalPicker::Next() { CHECK(!Done()) << "Prefetch interval picker's Next() is called even though " "Done() is false"; - int64 prefetch_time = current_logical_prefetch_time_; - if (!Done()) { - --current_logical_prefetch_time_; + if (using_increasing_prefetch_time_iterator_) { + int64 prefetch_time = increasing_prefetch_time_iterator_++; + while (increasing_prefetch_time_iterator_ <= latest_prefetch_time_ && + while_nest_level_[increasing_prefetch_time_iterator_] != + while_nest_level_[end_logical_time_]) { + ++increasing_prefetch_time_iterator_; + } + if (decreasing_prefetch_time_iterator_ >= earliest_prefetch_time_) { + using_increasing_prefetch_time_iterator_ = false; + } + return prefetch_time; + } else { + int64 prefetch_time = decreasing_prefetch_time_iterator_--; + while (decreasing_prefetch_time_iterator_ >= earliest_prefetch_time_ && + while_nest_level_[decreasing_prefetch_time_iterator_] != + while_nest_level_[end_logical_time_]) { + --decreasing_prefetch_time_iterator_; + } + if (increasing_prefetch_time_iterator_ <= latest_prefetch_time_) { + using_increasing_prefetch_time_iterator_ = true; + } + return prefetch_time; } - // If the prefetch start and end times differ, look for an earlier prefetch - // start. - while (!Done() && while_nest_level_[current_logical_prefetch_time_] != - while_nest_level_[end_logical_time_]) { - --current_logical_prefetch_time_; - } - return prefetch_time; } bool CostAnalysisPrefetchIntervalPicker::Done() const { - if (current_logical_prefetch_time_ < earliest_start_logical_time_) { - return true; - } - float logical_interval_elapsed = GetLogicalIntervalElapsed( - current_logical_prefetch_time_, end_logical_time_); - return (max_async_copy_to_overlap_ratio_ * max_overlap_multiplier_ * - async_copy_elapsed_ < - logical_interval_elapsed) || - (min_async_copy_to_overlap_ratio_ * async_copy_elapsed_ > - logical_interval_elapsed + inst_elapsed_reduction_); + return increasing_prefetch_time_iterator_ > latest_prefetch_time_ && + decreasing_prefetch_time_iterator_ < earliest_prefetch_time_; } void CostAnalysisPrefetchIntervalPicker::SetRetryNumber(int retry_number) { @@ -440,13 +504,16 @@ float CostAnalysisPrefetchIntervalPicker::GetLogicalIntervalElapsed( } std::string CostAnalysisPrefetchIntervalPicker::ToDebugString() const { + int current_logical_prefetch_time = using_increasing_prefetch_time_iterator_ + ? increasing_prefetch_time_iterator_ + : decreasing_prefetch_time_iterator_; float logical_interval_elapsed = GetLogicalIntervalElapsed( - current_logical_prefetch_time_, end_logical_time_); + current_logical_prefetch_time, end_logical_time_); return absl::StrCat( "Async copy elapsed (s) = ", async_copy_elapsed_, ", inst elapsed reduction (s) = ", inst_elapsed_reduction_, ", logical interval elapsed (s) = ", logical_interval_elapsed, - ", interval = (", current_logical_prefetch_time_, ", ", end_logical_time_, + ", interval = (", current_logical_prefetch_time, ", ", end_logical_time_, ")"); } diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.h b/tensorflow/compiler/xla/service/memory_space_assignment.h index 8f2002009b2..d1b508a6a85 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment.h +++ b/tensorflow/compiler/xla/service/memory_space_assignment.h @@ -84,6 +84,8 @@ class MemorySpaceAssignmentCostAnalysis { absl::flat_hash_map while_nest_multiplier; }; + virtual ~MemorySpaceAssignmentCostAnalysis() = default; + static StatusOr> Create( const HloCostAnalysis& cost_analysis, float async_copy_bandwidth_bytes_per_second, @@ -126,18 +128,23 @@ class MemorySpaceAssignmentCostAnalysis { // BufferInterval is prefetched. float GetInstructionElapsedDueToMemorySlowdown(int64 bytes) const; + // Returns the estimated elapsed duration of the instruction in seconds. It + // assumes all operands and outputs of the instruction are in the default + // memory. + virtual float GetInstructionElapsed(const HloInstruction& instruction) const; + // Returns the estimated elapsed duration of the instruction in seconds. It // assumes all operands and outputs of the instruction are in the default // memory, except for the operand number that is in the alternate memory, if // provided, or output if output_in_alternate_mem is true. - float GetInstructionElapsed( + virtual float GetInstructionElapsedInAlternateMemory( const HloInstruction& instruction, - absl::optional operand_in_alternate_mem = absl::nullopt, - bool output_in_alternate_mem = false) const; + absl::optional operand_in_alternate_mem, + bool output_in_alternate_mem) const; // Returns the elapsed time it would take to asynchronously copy the shape // from default to alternate memory space (or vice versa). - float GetAsyncCopyElapsed(const Shape& shape) const; + virtual float GetAsyncCopyElapsed(const Shape& shape) const; int64 GetScheduleEndTime() const; @@ -147,7 +154,7 @@ class MemorySpaceAssignmentCostAnalysis { const HloLiveRange& hlo_live_range() const { return *hlo_live_range_; } - private: + protected: MemorySpaceAssignmentCostAnalysis( const HloCostAnalysis& cost_analysis, float async_copy_bandwidth_bytes_per_second, @@ -164,6 +171,7 @@ class MemorySpaceAssignmentCostAnalysis { hlo_live_range_(std::move(hlo_live_range)), call_graph_(std::move(call_graph)) {} + private: const HloCostAnalysis& cost_analysis_; float async_copy_bandwidth_bytes_per_second_; float alternate_mem_bandwidth_bytes_per_second_; @@ -267,16 +275,16 @@ class InstructionCountPrefetchIntervalPicker : public PrefetchIntervalPicker { // Prefetch interval picker that uses cost analysis to overlap asynchronous // copies with independent computation. It uses min/max (asynchronous copy // duration) / (independent computation duration) ratios to guide whether the -// prefetch is within those bounds. It starts with the maximum allowed ratio -// (earliest prefetch) in Begin() and works its way for later and later prefetch -// with each Next() call until hitting the minimum ratio, in order not to hurt -// the critical path. +// prefetch is within those bounds. It starts with the preferred ratio in +// Begin() and works its way for alternately earlier and later prefetches until +// hitting min and max ratios. class CostAnalysisPrefetchIntervalPicker : public PrefetchIntervalPicker { public: CostAnalysisPrefetchIntervalPicker( const MemorySpaceAssignmentCostAnalysis& cost_analysis, float min_async_copy_to_overlap_ratio, - float max_async_copy_to_overlap_ratio); + float max_async_copy_to_overlap_ratio, + float preferred_async_copy_to_overlap_ratio); bool CanAllocateInAlternateMemoryNoCopy(const Shape& shape, int64 start_time, int64 end_time) const override; @@ -319,13 +327,17 @@ class CostAnalysisPrefetchIntervalPicker : public PrefetchIntervalPicker { const MemorySpaceAssignmentCostAnalysis& cost_analysis_; float min_async_copy_to_overlap_ratio_; float max_async_copy_to_overlap_ratio_; + float preferred_async_copy_to_overlap_ratio_; float max_overlap_multiplier_ = 1.0; float async_copy_elapsed_; float inst_elapsed_reduction_; int64 end_logical_time_; - int64 earliest_start_logical_time_; - int64 current_logical_prefetch_time_; + int64 earliest_prefetch_time_; + int64 latest_prefetch_time_; + bool using_increasing_prefetch_time_iterator_; + int64 increasing_prefetch_time_iterator_; + int64 decreasing_prefetch_time_iterator_; }; // MemorySpaceAssignment assigns memory spaces (default or alternate) to each diff --git a/tensorflow/compiler/xla/service/memory_space_assignment_test.cc b/tensorflow/compiler/xla/service/memory_space_assignment_test.cc index 10e11e55291..a92b73cfeb4 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment_test.cc +++ b/tensorflow/compiler/xla/service/memory_space_assignment_test.cc @@ -60,7 +60,8 @@ class MemorySpaceAssignmentTest : public HloTestBase, CostAnalysisPrefetchIntervalPicker prefetch_interval_picker( CostAnalysisPrefetchIntervalPicker( *cost_analysis, /*min_async_copy_to_overlap_ratio=*/0.8, - /*max_async_copy_to_overlap_ratio=*/10.0)); + /*max_async_copy_to_overlap_ratio=*/10.0, + /*preferred_async_copy_to_overlap_ratio=*/1.5)); return AssignMemorySpace( module, /*max_outstanding_async_copies=*/-1, MemorySpaceAssignment::GetMemoryBoundednessBufferIntervalCompare( @@ -4045,5 +4046,218 @@ TEST_P(MemorySpaceAssignmentTest, CrossProgramPrefetchFusionTest) { EXPECT_EQ(cross_program_prefetches.size(), 0); } +// For testing purposes, we define a cost analysis where we can control the +// elapsed times of each HLO and asynchronous copy. +class FakeMemorySpaceAssignmentCostAnalysis + : public MemorySpaceAssignmentCostAnalysis { + public: + static StatusOr> + Create(const HloCostAnalysis& cost_analysis, const HloModule& module) { + TF_ASSIGN_OR_RETURN(auto alias_analysis, HloAliasAnalysis::Run(&module)); + TF_ASSIGN_OR_RETURN(auto hlo_live_range, + HloLiveRange::Run(module.schedule(), *alias_analysis, + module.entry_computation())); + auto call_graph = CallGraph::Build(&module); + return absl::WrapUnique(new FakeMemorySpaceAssignmentCostAnalysis( + cost_analysis, /*async_copy_bandwidth_bytes_per_second=*/1, + /*alternate_mem_bandwidth_bytes_per_second=*/1, + std::move(alias_analysis), std::move(hlo_live_range), + std::move(call_graph))); + } + + float GetInstructionElapsed( + const HloInstruction& instruction) const override { + return 1.0; + } + + float GetInstructionElapsedInAlternateMemory( + const HloInstruction& instruction, + absl::optional operand_in_alternate_mem, + bool output_in_alternate_mem) const override { + if (operand_in_alternate_mem) { + return 0.5; + } else { + return 1.0; + } + } + + float GetAsyncCopyElapsed(const Shape& shape) const override { return 3.0; } + + protected: + FakeMemorySpaceAssignmentCostAnalysis( + const HloCostAnalysis& cost_analysis, + float async_copy_bandwidth_bytes_per_second, + float alternate_mem_bandwidth_bytes_per_second, + std::unique_ptr alias_analysis, + std::unique_ptr hlo_live_range, + std::unique_ptr call_graph) + : MemorySpaceAssignmentCostAnalysis( + cost_analysis, async_copy_bandwidth_bytes_per_second, + alternate_mem_bandwidth_bytes_per_second, std::move(alias_analysis), + std::move(hlo_live_range), std::move(call_graph)) {} +}; + +using CostAnalysisPrefetchIntervalPickerTest = HloTestBase; + +TEST_F(CostAnalysisPrefetchIntervalPickerTest, PrefetchIntervalOrder) { + absl::string_view hlo_string = R"( + HloModule bug, is_scheduled=true + + ENTRY Entry { + param0 = f32[2,4] parameter(0) + a = f32[2,4] negate(param0) + b = f32[2,4] negate(a) + c = f32[2,4] negate(b) + d = f32[2,4] negate(c) + e = f32[2,4] negate(d) + f = f32[2,4] negate(e) + g = f32[2,4] negate(f) + h = f32[2,4] negate(g) + i = f32[2,4] negate(h) + j = f32[2,4] negate(i) + k = f32[2,4] negate(j) + l = f32[2,4] negate(k) + m = f32[2,4] negate(l) + n = f32[2,4] negate(m) + o = f32[2,4] negate(n) + p = f32[2,4] negate(o) + q = f32[2,4] negate(p) + r = f32[2,4] negate(q) + s = f32[2,4] negate(r) + t = f32[2,4] negate(s) + u = f32[2,4] negate(t) + ROOT v = f32[2,4] add(u, param0) + } + )"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + HloCostAnalysis hlo_cost_analysis(ShapeSize); + TF_ASSERT_OK_AND_ASSIGN(auto cost_analysis, + FakeMemorySpaceAssignmentCostAnalysis::Create( + hlo_cost_analysis, *module)); + CostAnalysisPrefetchIntervalPicker interval_picker( + *cost_analysis, + /*min_async_copy_to_overlap_ratio=*/1.0, + /*max_async_copy_to_overlap_ratio=*/4.0, + /*preferred_async_copy_to_overlap_ratio=*/2.0); + + HloInstruction* root = module->entry_computation()->root_instruction(); + const HloUse use{root, /*operand_number=*/1, /*operand_index=*/{}}; + interval_picker.Begin(use, /*start_time=*/0, /*end_time=*/22); + + // Expect that the first interval is (15, 22), which has elapsed time of 6.0, + // twice of the async copy elased (3.0). Then we expect that intervals will be + // visited in alternating increasing and decreasing orders until hitting the + // min and max async copy overlap ratios, which are the intervals (18, 22) + // and (9, 22) respectively. + LOG(INFO) << interval_picker.ToDebugString(); + EXPECT_EQ(interval_picker.Next(), 15); + LOG(INFO) << interval_picker.ToDebugString(); + EXPECT_EQ(interval_picker.Next(), 16); + LOG(INFO) << interval_picker.ToDebugString(); + EXPECT_EQ(interval_picker.Next(), 14); + LOG(INFO) << interval_picker.ToDebugString(); + EXPECT_EQ(interval_picker.Next(), 17); + LOG(INFO) << interval_picker.ToDebugString(); + EXPECT_EQ(interval_picker.Next(), 13); + LOG(INFO) << interval_picker.ToDebugString(); + EXPECT_EQ(interval_picker.Next(), 18); // Min async overlap ratio reached. + LOG(INFO) << interval_picker.ToDebugString(); + EXPECT_EQ(interval_picker.Next(), 12); + LOG(INFO) << interval_picker.ToDebugString(); + EXPECT_EQ(interval_picker.Next(), 11); + LOG(INFO) << interval_picker.ToDebugString(); + EXPECT_EQ(interval_picker.Next(), 10); + LOG(INFO) << interval_picker.ToDebugString(); + EXPECT_EQ(interval_picker.Next(), 9); // Max async overlap ratio reached. + LOG(INFO) << interval_picker.ToDebugString(); + EXPECT_TRUE(interval_picker.Done()); + + // Expect that if the time between start_time and end_time is too short, there + // won't be any available intervals. + interval_picker.Begin(use, /*start_time=*/19, /*end_time=*/22); + LOG(INFO) << interval_picker.ToDebugString(); + EXPECT_TRUE(interval_picker.Done()); +} + +TEST_F(CostAnalysisPrefetchIntervalPickerTest, PrefetchIntervalOrderWhile) { + absl::string_view hlo_string = R"( + HloModule bug, is_scheduled=true + + while_condition { + param1 = (f32[2,4]) parameter(0) // 19 + ROOT cond = pred[] constant(true) // 20 + } + + while_body { + param2 = (f32[2,4]) parameter(0) // 21 + gte2 = f32[2,4] get-tuple-element(param2), index=0 // 22 + add = f32[2,4] add(gte2, gte2) // 23 + ROOT tuple2 = (f32[2,4]) tuple(add) // 24 + } + + ENTRY Entry { + param0 = f32[2,4] parameter(0) // 0 + a = f32[2,4] negate(param0) // 1 + b = f32[2,4] negate(a) // 2 + c = f32[2,4] negate(b) // 3 + d = f32[2,4] negate(c) // 4 + e = f32[2,4] negate(d) // 5 + f = f32[2,4] negate(e) // 6 + g = f32[2,4] negate(f) // 7 + h = f32[2,4] negate(g) // 8 + i = f32[2,4] negate(h) // 9 + j = f32[2,4] negate(i) // 10 + k = f32[2,4] negate(j) // 11 + l = f32[2,4] negate(k) // 12 + m = f32[2,4] negate(l) // 13 + n = f32[2,4] negate(m) // 14 + o = f32[2,4] negate(n) // 15 + p = f32[2,4] negate(o) // 16 + q = f32[2,4] negate(p) // 17 + tuple = (f32[2,4]) tuple(q) // 18 + while = (f32[2,4]) while(tuple), condition=while_condition, body=while_body // 25 + gte1 = f32[2,4] get-tuple-element(while), index=0 // 26 + r = f32[2,4] negate(gte1) // 27 + s = f32[2,4] negate(r) // 28 + t = f32[2,4] negate(s) // 29 + u = f32[2,4] negate(t) // 30 + ROOT v = f32[2,4] add(u, param0) // 31 + } + )"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + HloCostAnalysis hlo_cost_analysis(ShapeSize); + TF_ASSERT_OK_AND_ASSIGN(auto cost_analysis, + FakeMemorySpaceAssignmentCostAnalysis::Create( + hlo_cost_analysis, *module)); + CostAnalysisPrefetchIntervalPicker interval_picker( + *cost_analysis, + /*min_async_copy_to_overlap_ratio=*/1.0, + /*max_async_copy_to_overlap_ratio=*/12.0, + /*preferred_async_copy_to_overlap_ratio=*/2.0); + + HloInstruction* root = module->entry_computation()->root_instruction(); + const HloUse use{root, /*operand_number=*/1, /*operand_index=*/{}}; + interval_picker.Begin(use, /*start_time=*/0, /*end_time=*/31); + + // Because there are while loop computations between [19, 24], we ensure that + // the interval picker avoids this interval. + LOG(INFO) << interval_picker.ToDebugString(); + EXPECT_EQ(interval_picker.Next(), 25); + LOG(INFO) << interval_picker.ToDebugString(); + EXPECT_EQ(interval_picker.Next(), 26); + LOG(INFO) << interval_picker.ToDebugString(); + EXPECT_EQ(interval_picker.Next(), 18); + LOG(INFO) << interval_picker.ToDebugString(); + EXPECT_EQ(interval_picker.Next(), 27); // Min async overlap ratio reached. + LOG(INFO) << interval_picker.ToDebugString(); + EXPECT_EQ(interval_picker.Next(), 17); // Max async overlap ratio reached. + LOG(INFO) << interval_picker.ToDebugString(); + EXPECT_TRUE(interval_picker.Done()); +} + } // namespace } // namespace xla From 540f8dbdd86f868fbe1de50525a9ad47db1aa9a2 Mon Sep 17 00:00:00 2001 From: Anna R Date: Wed, 15 Jul 2020 18:17:02 -0700 Subject: [PATCH 0575/2522] Delete modules that shouldn't be part of dir(). We used to have these deletions before. They were no longer needed when we used virtual pip package setup.But now they are needed again since virtual pip package was removed. PiperOrigin-RevId: 321477672 Change-Id: I642e6d69bae8db721cec14cae36163e271a077be --- RELEASE.md | 2 ++ tensorflow/api_template.__init__.py | 19 +++++++++++++++++ tensorflow/api_template_v1.__init__.py | 21 +++++++++++++++++++ .../python/ops/numpy_ops/np_interop_test.py | 2 +- tensorflow/tools/api/tests/module_test.py | 8 ++++--- tensorflow/tools/docs/tf_doctest.py | 12 +++++++---- 6 files changed, 56 insertions(+), 8 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index d68eca00f44..534309a8be5 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -12,6 +12,8 @@ `TF_StringEncodedSize` are no longer relevant and have been removed; see core/platform/ctstring.h for string access/modification in C. * Removed `tf.distribute.Strategy.experimental_run_v2` method, which was deprecated in TF 2.2. +* `tensorflow.python`, `tensorflow.core` and `tensorflow.compiler` modules are + now hidden. These modules are not part of TensorFlow public API. ## Known Caveats diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py index f0f977aa0b5..0cd2b7da139 100644 --- a/tensorflow/api_template.__init__.py +++ b/tensorflow/api_template.__init__.py @@ -158,4 +158,23 @@ if hasattr(_current_module, 'keras'): setattr(_current_module, "initializers", initializers) # pylint: enable=undefined-variable +# Delete modules that should be hidden from dir(). +# Don't fail if these modules are not available. +# For e.g. this file will be originally placed under tensorflow/_api/v1 which +# does not have 'python', 'core' directories. Then, it will be copied +# to tensorflow/ which does have these two directories. +# pylint: disable=undefined-variable +try: + del python +except NameError: + pass +try: + del core +except NameError: + pass +try: + del compiler +except NameError: + pass + # __all__ PLACEHOLDER diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py index dad91f2d5b2..b73af197f7b 100644 --- a/tensorflow/api_template_v1.__init__.py +++ b/tensorflow/api_template_v1.__init__.py @@ -156,4 +156,25 @@ if _running_from_pip_package(): if _fi.file_exists(_plugin_dir): _ll.load_library(_plugin_dir) +# Delete modules that should be hidden from dir(). +# Don't fail if these modules are not available. +# For e.g. this file will be originally placed under tensorflow/_api/v1 which +# does not have 'python', 'core' directories. Then, it will be copied +# to tensorflow/ which does have these two directories. + +# pylint: disable=undefined-variable +try: + del python +except NameError: + pass +try: + del core +except NameError: + pass +try: + del compiler +except NameError: + pass + + # __all__ PLACEHOLDER diff --git a/tensorflow/python/ops/numpy_ops/np_interop_test.py b/tensorflow/python/ops/numpy_ops/np_interop_test.py index 33abb58f260..c66d40f54a8 100644 --- a/tensorflow/python/ops/numpy_ops/np_interop_test.py +++ b/tensorflow/python/ops/numpy_ops/np_interop_test.py @@ -21,7 +21,7 @@ from __future__ import print_function import numpy as onp import tensorflow.compat.v2 as tf -import tensorflow.python.ops.numpy_ops as np +from tensorflow.python.ops import numpy_ops as np # Tests for code snippet put in README.md diff --git a/tensorflow/tools/api/tests/module_test.py b/tensorflow/tools/api/tests/module_test.py index 5397278f5f3..b9ba655f84f 100644 --- a/tensorflow/tools/api/tests/module_test.py +++ b/tensorflow/tools/api/tests/module_test.py @@ -59,9 +59,8 @@ class ModuleTest(test.TestCase): 'tf.Tensor([1 2 3 4 5 6 7 8 9], shape=(9,), dtype=int32)', str(tf.range(1, 10))) else: - self.assertEqual( - 'Tensor("range:0", shape=(9,), dtype=int32)', - str(tf.range(1, 10))) + self.assertEqual('Tensor("range:0", shape=(9,), dtype=int32)', + str(tf.range(1, 10))) def testCompatV2HasCompatV1(self): # pylint: disable=pointless-statement @@ -79,6 +78,9 @@ class ModuleTest(test.TestCase): tf.compat.v1.summary.FileWriter # pylint: enable=pointless-statement + def testPythonModuleIsHidden(self): + self.assertNotIn('python', dir(tf)) + if __name__ == '__main__': test.main() diff --git a/tensorflow/tools/docs/tf_doctest.py b/tensorflow/tools/docs/tf_doctest.py index 40b06c6c53f..df6077b8c9c 100644 --- a/tensorflow/tools/docs/tf_doctest.py +++ b/tensorflow/tools/docs/tf_doctest.py @@ -29,7 +29,6 @@ from absl.testing import absltest import numpy as np import tensorflow.compat.v2 as tf -import tensorflow.python as tf_root from tensorflow.tools.docs import tf_doctest_lib # We put doctest after absltest so that it picks up the unittest monkeypatch. @@ -190,8 +189,9 @@ def load_tests(unused_loader, tests, unused_ignore): tf_modules = get_module_and_inject_docstring(FLAGS.file) for module in tf_modules: - if any(module.__name__.startswith(PACKAGE + prefix) - for prefix in FLAGS.module_prefix_skip): + if any( + module.__name__.startswith(PACKAGE + prefix) + for prefix in FLAGS.module_prefix_skip): continue testcase = TfTestCase() tests.addTests( @@ -221,5 +221,9 @@ def setUpModule(): if __name__ == '__main__': - recursive_import(tf_root) + # Use importlib to import python submodule of tensorflow. + # We delete python submodule in root __init__.py file. This means + # normal import won't work for some Python versions. + tf_python_root = importlib.import_module(PACKAGE[:-1]) + recursive_import(tf_python_root) absltest.main() From 2276d3565d77c0d0f723e9e799529b9b8b4ae658 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Thu, 16 Jul 2020 08:05:21 +0700 Subject: [PATCH 0576/2522] Refactor read and blockfetcher --- .../plugins/gcs/ram_file_block_cache.cc | 30 +++-- .../plugins/gcs/ram_file_block_cache.h | 24 ++-- .../plugins/gcs/ram_file_block_cache_test.cc | 116 ++++++++---------- 3 files changed, 81 insertions(+), 89 deletions(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/ram_file_block_cache.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/ram_file_block_cache.cc index 14e83d978e7..3700ccf17a2 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/ram_file_block_cache.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/ram_file_block_cache.cc @@ -133,9 +133,9 @@ void RamFileBlockCache::MaybeFetch(const Key& key, block->mu.Unlock(); // Release the lock while making the API call. block->data.clear(); block->data.resize(block_size_, 0); - size_t bytes_transferred; - block_fetcher_(key.first, key.second, block_size_, block->data.data(), - &bytes_transferred, status); + int64_t bytes_transferred; + bytes_transferred = block_fetcher_(key.first, key.second, block_size_, + block->data.data(), status); block->mu.Lock(); // Reacquire the lock immediately afterwards if (TF_GetCode(status) == TF_OK) { block->data.resize(bytes_transferred, 0); @@ -165,18 +165,16 @@ void RamFileBlockCache::MaybeFetch(const Key& key, "Control flow should never reach the end of RamFileBlockCache::Fetch."); } -void RamFileBlockCache::Read(const std::string& filename, size_t offset, - size_t n, char* buffer, size_t* bytes_transferred, - TF_Status* status) { - *bytes_transferred = 0; +int64_t RamFileBlockCache::Read(const std::string& filename, size_t offset, + size_t n, char* buffer, TF_Status* status) { if (n == 0) { - return TF_SetStatus(status, TF_OK, ""); + TF_SetStatus(status, TF_OK, ""); + return 0; } if (!IsCacheEnabled() || (n > max_bytes_)) { // The cache is effectively disabled, so we pass the read through to the // fetcher without breaking it up into blocks. - return block_fetcher_(filename, offset, n, buffer, bytes_transferred, - status); + return block_fetcher_(filename, offset, n, buffer, status); } // Calculate the block-aligned start and end of the read. size_t start = block_size_ * (offset / block_size_); @@ -196,20 +194,20 @@ void RamFileBlockCache::Read(const std::string& filename, size_t offset, abort(); } MaybeFetch(key, block, status); - if (TF_GetCode(status) != TF_OK) return; + if (TF_GetCode(status) != TF_OK) return -1; UpdateLRU(key, block, status); - if (TF_GetCode(status) != TF_OK) return; + if (TF_GetCode(status) != TF_OK) return -1; // Copy the relevant portion of the block into the result buffer. const auto& data = block->data; if (offset >= pos + data.size()) { // The requested offset is at or beyond the end of the file. This can // happen if `offset` is not block-aligned, and the read returns the last // block in the file, which does not extend all the way out to `offset`. - *bytes_transferred = total_bytes_transferred; std::stringstream os; os << "EOF at offset " << offset << " in file " << filename << " at position " << pos << " with data size " << data.size(); - return TF_SetStatus(status, TF_OUT_OF_RANGE, std::move(os).str().c_str()); + TF_SetStatus(status, TF_OUT_OF_RANGE, std::move(os).str().c_str()); + return total_bytes_transferred; } auto begin = data.begin(); if (offset > pos) { @@ -231,8 +229,8 @@ void RamFileBlockCache::Read(const std::string& filename, size_t offset, break; } } - *bytes_transferred = total_bytes_transferred; - return TF_SetStatus(status, TF_OK, ""); + TF_SetStatus(status, TF_OK, ""); + return total_bytes_transferred; } bool RamFileBlockCache::ValidateAndUpdateFileSignature( diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/ram_file_block_cache.h b/tensorflow/c/experimental/filesystem/plugins/gcs/ram_file_block_cache.h index 37cbc257e9c..2abfb6f924b 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/ram_file_block_cache.h +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/ram_file_block_cache.h @@ -40,12 +40,13 @@ class RamFileBlockCache { public: /// The callback executed when a block is not found in the cache, and needs to /// be fetched from the backing filesystem. This callback is provided when the - /// cache is constructed. The `status` should be `TF_OK` as long as the - /// read from the remote filesystem succeeded (similar to the semantics of the - /// read(2) system call). - typedef std::function + /// cache is constructed. It returns total bytes read ( -1 in case of errors + /// ). The `status` should be `TF_OK` as long as the read from the remote + /// filesystem succeeded (similar to the semantics of the read(2) system + /// call). + typedef std::function BlockFetcher; RamFileBlockCache(size_t block_size, size_t max_bytes, uint64_t max_staleness, @@ -65,7 +66,7 @@ class RamFileBlockCache { TF_StartThread(&thread_options, "TF_prune_FBC", PruneThread, this)); } std::cout << "GCS file block cache is " - << (IsCacheEnabled() ? "enabled" : "disabled"); + << (IsCacheEnabled() ? "enabled" : "disabled") << ".\n"; } ~RamFileBlockCache() { @@ -77,8 +78,9 @@ class RamFileBlockCache { } } - /// Read `n` bytes from `filename` starting at `offset` into `buffer`. This - /// method will set `status` to: + /// Read `n` bytes from `filename` starting at `offset` into `buffer`. It + /// returns total bytes read ( -1 in case of errors ). This method will set + /// `status` to: /// /// 1) The error from the remote filesystem, if the read from the remote /// filesystem failed. @@ -96,8 +98,8 @@ class RamFileBlockCache { /// /// Caller is responsible for allocating memory for `buffer`. /// `buffer` will be left unchanged in case of errors. - void Read(const std::string& filename, size_t offset, size_t n, char* buffer, - size_t* bytes_transferred, TF_Status* status); + int64_t Read(const std::string& filename, size_t offset, size_t n, + char* buffer, TF_Status* status); // Validate the given file signature with the existing file signature in the // cache. Returns true if the signature doesn't change or the file doesn't diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/ram_file_block_cache_test.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/ram_file_block_cache_test.cc index b1ea295c080..859d42d85e3 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/ram_file_block_cache_test.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/ram_file_block_cache_test.cc @@ -33,20 +33,22 @@ Status ReadCache(tf_gcs_filesystem::RamFileBlockCache* cache, std::vector* out) { out->clear(); out->resize(n, 0); - size_t bytes_transferred = 0; TF_Status status; - cache->Read(filename, offset, n, out->data(), &bytes_transferred, &status); - EXPECT_LE(bytes_transferred, n); - out->resize(bytes_transferred, n); + auto bytes_transferred = + cache->Read(filename, offset, n, out->data(), &status); + if (bytes_transferred >= 0) { + EXPECT_LE(bytes_transferred, n); + out->resize(bytes_transferred, n); + } return status.status; } TEST(RamFileBlockCacheTest, IsCacheEnabled) { auto fetcher = [](const string& filename, size_t offset, size_t n, - char* buffer, size_t* bytes_transferred, - TF_Status* status) { + char* buffer, TF_Status* status) -> int64_t { // Do nothing. - return TF_SetStatus(status, TF_OK, ""); + TF_SetStatus(status, TF_OK, ""); + return 0; }; tf_gcs_filesystem::RamFileBlockCache cache1(0, 0, 0, fetcher); tf_gcs_filesystem::RamFileBlockCache cache2(16, 0, 0, fetcher); @@ -62,12 +64,11 @@ TEST(RamFileBlockCacheTest, IsCacheEnabled) { TEST(RamFileBlockCacheTest, ValidateAndUpdateFileSignature) { int calls = 0; auto fetcher = [&calls](const string& filename, size_t offset, size_t n, - char* buffer, size_t* bytes_transferred, - TF_Status* status) { + char* buffer, TF_Status* status) -> int64_t { calls++; memset(buffer, 'x', n); - *bytes_transferred = n; - return TF_SetStatus(status, TF_OK, ""); + TF_SetStatus(status, TF_OK, ""); + return n; }; string filename = "file"; tf_gcs_filesystem::RamFileBlockCache cache(16, 32, 0, fetcher); @@ -96,15 +97,14 @@ TEST(RamFileBlockCacheTest, PassThrough) { int calls = 0; auto fetcher = [&calls, want_filename, want_offset, want_n]( const string& got_filename, size_t got_offset, - size_t got_n, char* buffer, size_t* bytes_transferred, - TF_Status* status) { + size_t got_n, char* buffer, TF_Status* status) -> int64_t { EXPECT_EQ(got_filename, want_filename); EXPECT_EQ(got_offset, want_offset); EXPECT_EQ(got_n, want_n); calls++; memset(buffer, 'x', got_n); - *bytes_transferred = got_n; - return TF_SetStatus(status, TF_OK, ""); + TF_SetStatus(status, TF_OK, ""); + return got_n; }; // If block_size, max_bytes, or both are zero, or want_n is larger than // max_bytes the cache is a pass-through. @@ -133,16 +133,17 @@ TEST(RamFileBlockCacheTest, BlockAlignment) { } // The fetcher just fetches slices of the buffer. auto fetcher = [&buf](const string& filename, size_t offset, size_t n, - char* buffer, size_t* bytes_transferred, - TF_Status* status) { + char* buffer, TF_Status* status) -> int64_t { + int64_t bytes_transferred; if (offset < buf.size()) { size_t bytes_to_copy = std::min(buf.size() - offset, n); memcpy(buffer, buf.data() + offset, bytes_to_copy); - *bytes_transferred = bytes_to_copy; + bytes_transferred = bytes_to_copy; } else { - *bytes_transferred = 0; + bytes_transferred = 0; } - return TF_SetStatus(status, TF_OK, ""); + TF_SetStatus(status, TF_OK, ""); + return bytes_transferred; }; for (size_t block_size = 2; block_size <= 4; block_size++) { // Make a cache of N-byte block size (1 block) and verify that reads of @@ -181,15 +182,14 @@ TEST(RamFileBlockCacheTest, CacheHits) { std::set calls; auto fetcher = [&calls, block_size](const string& filename, size_t offset, size_t n, char* buffer, - size_t* bytes_transferred, - TF_Status* status) { + TF_Status* status) -> int64_t { EXPECT_EQ(n, block_size); EXPECT_EQ(offset % block_size, 0); EXPECT_EQ(calls.find(offset), calls.end()) << "at offset " << offset; calls.insert(offset); memset(buffer, 'x', n); - *bytes_transferred = n; - return TF_SetStatus(status, TF_OK, ""); + TF_SetStatus(status, TF_OK, ""); + return n; }; const uint32 block_count = 256; tf_gcs_filesystem::RamFileBlockCache cache( @@ -215,8 +215,7 @@ TEST(RamFileBlockCacheTest, OutOfRange) { bool second_block = false; auto fetcher = [block_size, file_size, &first_block, &second_block]( const string& filename, size_t offset, size_t n, - char* buffer, size_t* bytes_transferred, - TF_Status* status) { + char* buffer, TF_Status* status) -> int64_t { EXPECT_EQ(n, block_size); EXPECT_EQ(offset % block_size, 0); size_t bytes_to_copy = 0; @@ -231,8 +230,8 @@ TEST(RamFileBlockCacheTest, OutOfRange) { memset(buffer, 'x', bytes_to_copy); second_block = true; } - *bytes_transferred = bytes_to_copy; - return TF_SetStatus(status, TF_OK, ""); + TF_SetStatus(status, TF_OK, ""); + return bytes_to_copy; }; tf_gcs_filesystem::RamFileBlockCache cache(block_size, block_size, 0, fetcher); @@ -260,14 +259,13 @@ TEST(RamFileBlockCacheTest, Inconsistent) { const size_t block_size = 16; // This fetcher returns OK but only fills in one byte for any offset. auto fetcher = [block_size](const string& filename, size_t offset, size_t n, - char* buffer, size_t* bytes_transferred, - TF_Status* status) { + char* buffer, TF_Status* status) -> int64_t { EXPECT_EQ(n, block_size); EXPECT_EQ(offset % block_size, 0); EXPECT_GE(n, 1); memset(buffer, 'x', 1); - *bytes_transferred = 1; - return TF_SetStatus(status, TF_OK, ""); + TF_SetStatus(status, TF_OK, ""); + return 1; }; tf_gcs_filesystem::RamFileBlockCache cache(block_size, 2 * block_size, 0, fetcher); @@ -286,8 +284,7 @@ TEST(RamFileBlockCacheTest, LRU) { std::list calls; auto fetcher = [&calls, block_size](const string& filename, size_t offset, size_t n, char* buffer, - size_t* bytes_transferred, - TF_Status* status) { + TF_Status* status) -> int64_t { EXPECT_EQ(n, block_size); EXPECT_FALSE(calls.empty()) << "at offset = " << offset; if (!calls.empty()) { @@ -295,8 +292,8 @@ TEST(RamFileBlockCacheTest, LRU) { calls.pop_front(); } memset(buffer, 'x', n); - *bytes_transferred = n; - return TF_SetStatus(status, TF_OK, ""); + TF_SetStatus(status, TF_OK, ""); + return n; }; const uint32 block_count = 2; tf_gcs_filesystem::RamFileBlockCache cache( @@ -335,12 +332,11 @@ TEST(RamFileBlockCacheTest, LRU) { TEST(RamFileBlockCacheTest, MaxStaleness) { int calls = 0; auto fetcher = [&calls](const string& filename, size_t offset, size_t n, - char* buffer, size_t* bytes_transferred, - TF_Status* status) { + char* buffer, TF_Status* status) -> int64_t { calls++; memset(buffer, 'x', n); - *bytes_transferred = n; - return TF_SetStatus(status, TF_OK, ""); + TF_SetStatus(status, TF_OK, ""); + return n; }; std::vector out; std::unique_ptr env(new NowSecondsEnv); @@ -380,8 +376,7 @@ TEST(RamFileBlockCacheTest, MaxStaleness) { TEST(RamFileBlockCacheTest, RemoveFile) { int calls = 0; auto fetcher = [&calls](const string& filename, size_t offset, size_t n, - char* buffer, size_t* bytes_transferred, - TF_Status* status) { + char* buffer, TF_Status* status) -> int64_t { calls++; char c = (filename == "a") ? 'a' : (filename == "b") ? 'b' : 'x'; if (offset > 0) { @@ -389,8 +384,8 @@ TEST(RamFileBlockCacheTest, RemoveFile) { c = toupper(c); } memset(buffer, c, n); - *bytes_transferred = n; - return TF_SetStatus(status, TF_OK, ""); + TF_SetStatus(status, TF_OK, ""); + return n; }; // This cache has space for 4 blocks; we'll read from two files. const size_t n = 3; @@ -443,12 +438,11 @@ TEST(RamFileBlockCacheTest, RemoveFile) { TEST(RamFileBlockCacheTest, Prune) { int calls = 0; auto fetcher = [&calls](const string& filename, size_t offset, size_t n, - char* buffer, size_t* bytes_transferred, - TF_Status* status) { + char* buffer, TF_Status* status) -> int64_t { calls++; memset(buffer, 'x', n); - *bytes_transferred = n; - return TF_SetStatus(status, TF_OK, ""); + TF_SetStatus(status, TF_OK, ""); + return n; }; std::vector out; // Our fake environment is initialized with the current timestamp. @@ -509,17 +503,17 @@ TEST(RamFileBlockCacheTest, ParallelReads) { const int callers = 4; BlockingCounter counter(callers); auto fetcher = [&counter](const string& filename, size_t offset, size_t n, - char* buffer, size_t* bytes_transferred, - TF_Status* status) { + char* buffer, TF_Status* status) -> int64_t { counter.DecrementCount(); if (!counter.WaitFor(std::chrono::seconds(10))) { // This avoids having the test time out, which is harder to debug. - return TF_SetStatus(status, TF_FAILED_PRECONDITION, - "desired concurrency not reached"); + TF_SetStatus(status, TF_FAILED_PRECONDITION, + "desired concurrency not reached"); + return -1; } memset(buffer, 'x', n); - *bytes_transferred = n; - return TF_SetStatus(status, TF_OK, ""); + TF_SetStatus(status, TF_OK, ""); + return n; }; const int block_size = 8; tf_gcs_filesystem::RamFileBlockCache cache( @@ -548,17 +542,16 @@ TEST(RamFileBlockCacheTest, CoalesceConcurrentReads) { Notification notification; auto fetcher = [&num_requests, ¬ification, block_size]( const string& filename, size_t offset, size_t n, - char* buffer, size_t* bytes_transferred, - TF_Status* status) { + char* buffer, TF_Status* status) -> int64_t { EXPECT_EQ(n, block_size); EXPECT_EQ(offset, 0); num_requests++; memset(buffer, 'x', n); - *bytes_transferred = n; notification.Notify(); // Wait for other thread to issue read. Env::Default()->SleepForMicroseconds(100000); // 0.1 secs - return TF_SetStatus(status, TF_OK, ""); + TF_SetStatus(status, TF_OK, ""); + return n; }; tf_gcs_filesystem::RamFileBlockCache cache(block_size, block_size, 0, fetcher); @@ -580,12 +573,11 @@ TEST(RamFileBlockCacheTest, CoalesceConcurrentReads) { TEST(RamFileBlockCacheTest, Flush) { int calls = 0; auto fetcher = [&calls](const string& filename, size_t offset, size_t n, - char* buffer, size_t* bytes_transferred, - TF_Status* status) { + char* buffer, TF_Status* status) -> int64_t { calls++; memset(buffer, 'x', n); - *bytes_transferred = n; - return TF_SetStatus(status, TF_OK, ""); + TF_SetStatus(status, TF_OK, ""); + return n; }; tf_gcs_filesystem::RamFileBlockCache cache(16, 32, 0, fetcher); std::vector out; From eab62ef2f770754bae4c85a45940785923e5a2f5 Mon Sep 17 00:00:00 2001 From: Denisa Roberts Date: Wed, 15 Jul 2020 21:40:32 -0400 Subject: [PATCH 0577/2522] Add test for grad not implemented check --- tensorflow/python/kernel_tests/qr_op_test.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/kernel_tests/qr_op_test.py b/tensorflow/python/kernel_tests/qr_op_test.py index 31d538f8b27..6df5d18ce3f 100644 --- a/tensorflow/python/kernel_tests/qr_op_test.py +++ b/tensorflow/python/kernel_tests/qr_op_test.py @@ -20,6 +20,7 @@ from __future__ import print_function import numpy as np +from tensorflow.python.eager import backprop from tensorflow.python.client import session from tensorflow.python.eager import context from tensorflow.python.framework import constant_op @@ -48,7 +49,7 @@ class QrOpTest(test.TestCase): @test_util.run_in_graph_and_eager_modes(use_gpu=True) def testWrongDimensions(self): - # The input to svd should be a tensor of at least rank 2. + # The input to qr should be a tensor of at least rank 2. scalar = constant_op.constant(1.) with self.assertRaisesRegex((ValueError, errors_impl.InvalidArgumentError), "rank.* 2.*0"): @@ -170,7 +171,20 @@ def _GetQrOpTest(dtype_, shape_, full_matrices_, use_static_shape_): class QrGradOpTest(test.TestCase): - pass + @test_util.run_in_graph_and_eager_modes(use_gpu=True) + def testNotImplementedCheck(self): + # Test that the correct message is issued + np.random.seed(42) + matrix = constant_op.constant( + np.random.uniform(low=-1.0, high=1.0, size=(5, 2)).astype(np.float32)) + def _NoGrad(x): + with backprop.GradientTape() as tape: + tape.watch(x) + ret = linalg_ops.qr(x, full_matrices=True) + return tape.gradient(ret, x) + m = r"QrGrad not implemented when nrows > ncols and full_matrices is true." + with self.assertRaisesRegex(NotImplementedError, m): + _NoGrad(matrix) def _GetQrGradOpTest(dtype_, shape_, full_matrices_): From adfcc7218eca0d6b3a69dda8a1cfc676a2e93906 Mon Sep 17 00:00:00 2001 From: "902449@58880@bigcat_chen@ASIC" Date: Thu, 16 Jul 2020 09:46:51 +0800 Subject: [PATCH 0578/2522] TFLM:update Himax WE1 EVB example micro speech animation gif link --- tensorflow/lite/micro/examples/micro_speech/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/micro/examples/micro_speech/README.md b/tensorflow/lite/micro/examples/micro_speech/README.md index 5b291a4d6cf..5b25bc99da0 100644 --- a/tensorflow/lite/micro/examples/micro_speech/README.md +++ b/tensorflow/lite/micro/examples/micro_speech/README.md @@ -660,7 +660,7 @@ Following the Steps to run micro speech example at HIMAX WE1 EVB platform. After these steps, press reset button on the HIMAX WE1 EVB, you will see application output in the serial terminal and lighting LED. -![Animation on Himax WE1 EVB](https://github.com/HimaxWiseEyePlus/bsp_tflu/tree/master/HIMAX_WE1_EVB_user_guide/images/tflm_example_micro_speech_int8_led.gif) +![Animation on Himax WE1 EVB](https://raw.githubusercontent.com/HimaxWiseEyePlus/bsp_tflu/master/HIMAX_WE1_EVB_user_guide/images/tflm_example_micro_speech_int8_led.gif) ## Run on macOS From 59215389ceed525e3a5e739780590a4f1bdf19f4 Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Wed, 15 Jul 2020 20:37:36 -0700 Subject: [PATCH 0579/2522] [XLA] Extend dot_as_convolution to detect the rhs transpose rule of the forward conv as batch dot dimension. Also do not swap conv operations based purely on inputs of size 1. PiperOrigin-RevId: 321492318 Change-Id: Ieb7f24c1347f9d2cbcfaba5e3bf3a62d8898b01c --- .../compiler/xla/service/algebraic_simplifier.cc | 6 ++++-- .../xla/service/dot_as_convolution_util.cc | 14 +++++++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 130661bf1cd..741edfc7c35 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -4697,15 +4697,17 @@ StatusOr AlgebraicSimplifierVisitor::SwapConvOperands( for (int64 spatial_dim = 0; spatial_dim < dnums.input_spatial_dimensions_size(); ++spatial_dim) { const int64 kernel_size = window_dims[spatial_dim].size(); - kernel_product *= kernel_size; const int64 dilated_kernel_size = 1 + (kernel_size - 1) * window_dims[spatial_dim].window_dilation(); const int64 input_size = input->shape().dimensions(dnums.input_spatial_dimensions(spatial_dim)); - swapped_kernel_product *= input_size; const int64 dilated_input_size = 1 + (input_size - 1) * window_dims[spatial_dim].base_dilation(); + // Don't decide to swap if the input size is one, since many convolution + // implementations can easily hand that special case efficiently. + kernel_product *= kernel_size; + swapped_kernel_product *= input_size == 1 ? kernel_size : input_size; auto new_dim = swapped_window.add_dimensions(); new_dim->set_size(input_size); diff --git a/tensorflow/compiler/xla/service/dot_as_convolution_util.cc b/tensorflow/compiler/xla/service/dot_as_convolution_util.cc index fcdf85d5ecb..576d9d48ab8 100644 --- a/tensorflow/compiler/xla/service/dot_as_convolution_util.cc +++ b/tensorflow/compiler/xla/service/dot_as_convolution_util.cc @@ -49,15 +49,23 @@ ParseDotGeneralFromConvolution(const HloInstruction* conv) { int64 rhs_size = conv->operand(1)->shape().dimensions(rhs); int64 output = conv_dims.output_spatial_dimensions(i); const auto& wd = conv->window().dimensions(i); - if (lhs_size == wd.size() && - std::max(1, lhs_size - 1) == wd.stride() && - lhs_size == wd.base_dilation() && wd.window_dilation() == 1 && + if (lhs_size == wd.size() && lhs_size == wd.base_dilation() && + ((std::max(1, lhs_size - 1) == wd.stride() && + wd.window_dilation() == 1) || + (std::max(1, lhs_size - 1) == wd.window_dilation() && + wd.stride() == 1)) && wd.padding_high() == 0 && wd.padding_low() == 0 && !wd.window_reversal()) { // A batch dimension in DotGeneral is represented as a spatial dimension // with window size B (batch dimension size), stride B - 1, and base // dilation B. dims.batch_dims.push_back({lhs, rhs, output, i}); + } else if (wd.size() == lhs_size && wd.padding_high() == lhs_size - 1 && + wd.padding_low() == lhs_size - 1 && wd.window_reversal() && + wd.window_dilation() == 1 && wd.stride() == lhs_size && + wd.base_dilation() == lhs_size - 1) { + // Aternative representation of a batch dimension. + dims.batch_dims.push_back({lhs, rhs, output, i}); } else if (lhs_size == wd.size() && wd.base_dilation() == 1 && wd.window_dilation() == 1 && wd.padding_high() == 0 && wd.padding_low() == 0 && !wd.window_reversal()) { From 0057d244a07eebf4fad6ceb30cdcfa5ccda40e7c Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Thu, 16 Jul 2020 10:47:55 +0700 Subject: [PATCH 0580/2522] Add ReadS3TransferManager --- .../filesystem/plugins/s3/s3_filesystem.cc | 44 ++++++++++++++++++- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index f23274e6191..7714b36c015 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h" #include +#include #include #include #include @@ -37,6 +38,7 @@ constexpr int kExecutorPoolSize = 25; constexpr uint64_t kS3MultiPartUploadChunkSize = 50 * 1024 * 1024; // 50 MB constexpr uint64_t kS3MultiPartDownloadChunkSize = 50 * 1024 * 1024; // 50 MB +constexpr size_t kDownloadRetries = 3; static void* plugin_memory_allocate(size_t size) { return calloc(1, size); } static void plugin_memory_free(void* ptr) { free(ptr); } @@ -241,6 +243,16 @@ typedef struct S3File { bool use_multi_part_download; } S3File; +// AWS Streams destroy the buffer (buf) passed, so creating a new +// IOStream that retains the buffer so the calling function +// can control it's lifecycle +class TFS3UnderlyingStream : public Aws::IOStream { + public: + using Base = Aws::IOStream; + TFS3UnderlyingStream(std::streambuf* buf) : Base(buf) {} + virtual ~TFS3UnderlyingStream() = default; +}; + void Cleanup(TF_RandomAccessFile* file) { auto s3_file = static_cast(file->plugin_file); delete s3_file; @@ -273,8 +285,36 @@ static int64_t ReadS3Client(S3File* s3_file, uint64_t offset, size_t n, static int64_t ReadS3TransferManager(S3File* s3_file, uint64_t offset, size_t n, char* buffer, TF_Status* status) { - // TODO(vnvo2409): Implement this function. - return -1; + auto create_download_stream = [&]() { + return Aws::New( + "S3ReadStream", + Aws::New( + "S3ReadStream", reinterpret_cast(buffer), n)); + }; + auto handle = s3_file->transfer_manager->DownloadFile( + s3_file->bucket, s3_file->object, offset, n, create_download_stream); + handle->WaitUntilFinished(); + + size_t retries = 0; + while (handle->GetStatus() == Aws::Transfer::TransferStatus::FAILED && + handle->GetLastError().GetResponseCode() != + Aws::Http::HttpResponseCode::REQUESTED_RANGE_NOT_SATISFIABLE && + retries++ < kDownloadRetries) { + // Only failed parts will be downloaded again. + s3_file->transfer_manager->RetryDownload(handle); + handle->WaitUntilFinished(); + } + + if (handle->GetStatus() != Aws::Transfer::TransferStatus::COMPLETED) + TF_SetStatusFromAWSError(handle->GetLastError(), status); + else + TF_SetStatus(status, TF_OK, ""); + if (TF_GetCode(status) != TF_OK && TF_GetCode(status) != TF_OUT_OF_RANGE) + return -1; + int64_t read = handle->GetBytesTransferred(); + if (read < n) + TF_SetStatus(status, TF_OUT_OF_RANGE, "Read less bytes than requested"); + return read; } int64_t Read(const TF_RandomAccessFile* file, uint64_t offset, size_t n, From 9c20fbff6c251b05c15f2b4f840722c78bf38233 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Wed, 15 Jul 2020 21:50:03 -0700 Subject: [PATCH 0581/2522] [TF:MLIR] Fold constant cond in IfOp PiperOrigin-RevId: 321499354 Change-Id: Ie644b7f25d1c05d43c43f90983c7f5ac00866c47 --- .../compiler/mlir/lite/tf_tfl_passes.cc | 5 ++ .../mlir/lite/transforms/quantize_patterns.td | 2 +- .../compiler/mlir/tensorflow/ir/tf_ops.td | 2 + .../compiler/mlir/tensorflow/ir/tf_ops_a_m.cc | 52 ++++++++++++++++++- .../mlir/tensorflow/ir/tf_ops_helpers.inc | 2 +- .../mlir/tensorflow/tests/canonicalize.mlir | 21 ++++++++ .../control_flow_upgrade_legacy_v1.py | 7 +-- 7 files changed, 85 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc index 1e1c431822d..fc44e778b92 100644 --- a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc +++ b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc @@ -175,6 +175,11 @@ void AddTFToTFLConversionPasses(const mlir::TFL::PassConfig& pass_config, // Add a shape inference pass to optimize away the unnecessary casts. pass_manager->addPass(mlir::TF::CreateTFShapeInferencePass()); } + + // Inline function calls that left in the graph after folding functional + // control flow ops (IfOp, CaseOp). + pass_manager->addPass(mlir::createInlinerPass()); + pass_manager->addPass( mlir::TFL::CreateLegalizeTFPass(pass_config.runtime_verification)); pass_manager->addPass(mlir::TFL::CreateOptimizePass()); diff --git a/tensorflow/compiler/mlir/lite/transforms/quantize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/quantize_patterns.td index 22bcc563f7b..38c754ed08c 100644 --- a/tensorflow/compiler/mlir/lite/transforms/quantize_patterns.td +++ b/tensorflow/compiler/mlir/lite/transforms/quantize_patterns.td @@ -33,7 +33,7 @@ def : Pat<(TFL_QuantizeOp (TFL_DequantizeOp $in), $qt), (replaceWithValue $in)>; // point constant. def : Pat<(TFL_DequantizeOp (TFL_QuantizeOp (ConstantOp F32ElementsAttr:$cst), $qt)), - (ConstantOp $cst)>; + (TFL_ConstOp $cst)>; // Quantize the value of a constant op if the quantization parameters have been // propagated to the output. diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td index 1fe301696a7..71b30ae8090 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td @@ -229,6 +229,8 @@ else_branch: A function that takes 'inputs' and returns a list of let verifier = [{ return Verify(*this); }]; + + let hasCanonicalizer = 1; } def TF_YieldOp : TF_Op<"Yield", diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc index af7a16ba127..f4f9ec42864 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc @@ -463,7 +463,7 @@ LogicalResult FoldConstantCaseOp::matchAndRewrite( auto call_op = rewriter.create( op.getLoc(), op.getResultTypes(), op.getOperands().drop_front(), func, /*config=*/empty, /*config_proto=*/empty, /*executor_type=*/empty); - PropagateAttributes(op.getOperation(), call_op); + PropagateDeviceAndInternalAttrs(op.getOperation(), call_op); rewriter.replaceOp(op, call_op.getResults()); return success(); } @@ -1615,6 +1615,56 @@ static LogicalResult Verify(IfOp op) { return success(); } +class FoldConstantIfOp : public OpRewritePattern { + public: + explicit FoldConstantIfOp(MLIRContext *context) + : OpRewritePattern(context) {} + LogicalResult matchAndRewrite(TF::IfOp op, + PatternRewriter &rewriter) const override; + + private: + template + struct CallOpType { + using CallOp = T; + }; +}; + +LogicalResult FoldConstantIfOp::matchAndRewrite( + TF::IfOp op, PatternRewriter &rewriter) const { + // Extract the constant cond value. + DenseIntElementsAttr cond_attr; + if (!matchPattern(op.cond(), m_Constant(&cond_attr))) return failure(); + + // Cond value must be a scalar. + if (cond_attr.getNumElements() != 1) return failure(); + + // Select a branch function. + bool cond = cond_attr.getSplatValue().getValue(); + FlatSymbolRefAttr func = cond ? op.then_branchAttr() : op.else_branchAttr(); + + // Replace IfOp with PartitionedCallOp or StatefulPartitionedCallOp. + auto rewrite = [&](auto op_type) { + auto empty = rewriter.getStringAttr(""); + auto call_op = rewriter.create( + op.getLoc(), op.getResultTypes(), op.getOperands().drop_front(), func, + /*config=*/empty, /*config_proto=*/empty, /*executor_type=*/empty); + PropagateDeviceAndInternalAttrs(op.getOperation(), call_op); + rewriter.replaceOp(op, call_op.getResults()); + }; + + if (op.is_stateless()) + rewrite(CallOpType{}); + else + rewrite(CallOpType{}); + + return success(); +} + +void IfOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + //===----------------------------------------------------------------------===// // IfRegionOp //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc index cea2aa17d46..33d51301208 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc @@ -21,7 +21,7 @@ limitations under the License. // Propagates underscore and device attributes from src to dst. // TODO(b/158769932): This should be a general feature instead post some policy // discussion. -static void PropagateAttributes(Operation *src, Operation *dst) { +static void PropagateDeviceAndInternalAttrs(Operation *src, Operation *dst) { auto device = mlir::Identifier::get("device", src->getContext()); for (auto named_attr : src->getAttrs()) { if (*named_attr.first.begin() == '_' || named_attr.first == device) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir index c67725fbccf..17a19c50998 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir @@ -755,6 +755,27 @@ func @foldFill() -> (tensor<3x2x1xf32>, tensor<*xf32>, tensor<*xcomplex>) { return %2, %3, %4 : tensor<3x2x1xf32>, tensor<*xf32>, tensor<*xcomplex> } +// CHECK-LABEL: foldIf +func @foldIf(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> (tensor) { + %0 = "tf.Const"() {value = dense : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense : tensor} : () -> tensor + + // CHECK: %0 = "tf.PartitionedCall"(%arg0, %arg1) + // CHECK-SAME: device = "noodle" + // CHECK-SAME: f = @sub + %2 = "tf.If"(%0, %arg0, %arg1) {then_branch = @add, else_branch = @sub, output_shapes = [#tf.shape<>], device = "noodle", is_stateless = true} : (tensor, tensor, tensor) -> tensor + // CHECK: %1 = "tf.StatefulPartitionedCall"(%0, %arg1) + // CHECK-SAME: _underscore_attr = "something" + // CHECK-SAME: f = @add + %3 = "tf.If"(%1, %2, %arg1) {then_branch = @add, else_branch = @sub, output_shapes = [#tf.shape<>], _underscore_attr = "something", is_stateless = false} : (tensor, tensor, tensor) -> tensor + + // CHECK: %2 = "tf.If" + %4 = "tf.If"(%arg2, %3, %arg1) {then_branch = @add, else_branch = @sub, is_stateless = false} : (tensor, tensor, tensor) -> tensor + + // CHECK: return %2 + return %4 : tensor +} + // CHECK-LABEL: foldCase func @foldCase(%arg0: tensor, %arg1: tensor) -> (tensor) { %2 = constant dense<1> : tensor diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/control_flow_upgrade_legacy_v1.py b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/control_flow_upgrade_legacy_v1.py index 209ed3492e8..19e7a90c1e1 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/control_flow_upgrade_legacy_v1.py +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/control_flow_upgrade_legacy_v1.py @@ -33,9 +33,10 @@ from tensorflow.python.ops import control_flow_ops def Test(): data = tf.constant([1, 2, 3, 4, 5, 6]) - zero = tf.convert_to_tensor(0) - one = tf.convert_to_tensor(1) - less_op = tf.less(zero, one) + # Create placeholders to prevent constant folding. + x_op = tf.placeholder(dtype=tf.int32) + y_op = tf.placeholder(dtype=tf.int32) + less_op = tf.less(x_op, y_op) switch_op = control_flow_ops.switch(data, less_op) merge_op = control_flow_ops.merge(switch_op)[0] result = tf.transpose(merge_op) From 3313b1d724670ed25e3ac3d360e5876fa2a50fb2 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Thu, 16 Jul 2020 11:53:25 +0700 Subject: [PATCH 0582/2522] Add tf_new_writable_file --- .../filesystem/plugins/s3/s3_filesystem.cc | 100 +++++++++++++++++- 1 file changed, 99 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 7714b36c015..9bff8070427 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h" #include +#include #include #include #include @@ -39,6 +40,7 @@ constexpr int kExecutorPoolSize = 25; constexpr uint64_t kS3MultiPartUploadChunkSize = 50 * 1024 * 1024; // 50 MB constexpr uint64_t kS3MultiPartDownloadChunkSize = 50 * 1024 * 1024; // 50 MB constexpr size_t kDownloadRetries = 3; +constexpr size_t kUploadRetries = 3; static void* plugin_memory_allocate(size_t size) { return calloc(1, size); } static void plugin_memory_free(void* ptr) { free(ptr); } @@ -331,8 +333,104 @@ int64_t Read(const TF_RandomAccessFile* file, uint64_t offset, size_t n, // SECTION 2. Implementation for `TF_WritableFile` // ---------------------------------------------------------------------------- namespace tf_writable_file { +typedef struct S3File { + Aws::String bucket; + Aws::String object; + std::shared_ptr s3_client; + std::shared_ptr transfer_manager; + bool sync_needed; + std::shared_ptr outfile; + S3File(Aws::String bucket, Aws::String object, + std::shared_ptr s3_client, + std::shared_ptr transfer_manager) + : bucket(bucket), + object(object), + s3_client(s3_client), + transfer_manager(transfer_manager), + outfile(Aws::MakeShared( + kS3FileSystemAllocationTag, nullptr, "_s3_filesystem_XXXXXX", + std::ios_base::binary | std::ios_base::trunc | std::ios_base::in | + std::ios_base::out)) {} +} S3File; -// TODO(vnvo2409): Implement later +void Cleanup(TF_WritableFile* file) { + auto s3_file = static_cast(file->plugin_file); + delete s3_file; +} + +void Append(const TF_WritableFile* file, const char* buffer, size_t n, + TF_Status* status) { + auto s3_file = static_cast(file->plugin_file); + if (!s3_file->outfile) { + TF_SetStatus(status, TF_FAILED_PRECONDITION, + "The internal temporary file is not writable."); + return; + } + s3_file->sync_needed = true; + s3_file->outfile->write(buffer, n); + if (!s3_file->outfile->good()) + TF_SetStatus(status, TF_INTERNAL, + "Could not append to the internal temporary file."); + else + TF_SetStatus(status, TF_OK, ""); +} + +int64_t Tell(const TF_WritableFile* file, TF_Status* status) { + auto s3_file = static_cast(file->plugin_file); + auto position = static_cast(s3_file->outfile->tellp()); + if (position == -1) + TF_SetStatus(status, TF_INTERNAL, + "tellp on the internal temporary file failed"); + else + TF_SetStatus(status, TF_OK, ""); + return position; +} + +void Sync(const TF_WritableFile* file, TF_Status* status) { + auto s3_file = static_cast(file->plugin_file); + if (!s3_file->outfile) { + TF_SetStatus(status, TF_FAILED_PRECONDITION, + "The internal temporary file is not writable."); + return; + } + if (!s3_file->sync_needed) { + TF_SetStatus(status, TF_OK, ""); + return; + } + auto position = static_cast(s3_file->outfile->tellp()); + auto handle = s3_file->transfer_manager->UploadFile( + s3_file->outfile, s3_file->bucket, s3_file->object, + "application/octet-stream", Aws::Map()); + handle->WaitUntilFinished(); + + size_t retries = 0; + while (handle->GetStatus() == Aws::Transfer::TransferStatus::FAILED && + retries++ < kUploadRetries) { + // if multipart upload was used, only the failed parts will be re-sent + s3_file->transfer_manager->RetryUpload(s3_file->outfile, handle); + handle->WaitUntilFinished(); + } + if (handle->GetStatus() != Aws::Transfer::TransferStatus::COMPLETED) + return TF_SetStatusFromAWSError(handle->GetLastError(), status); + s3_file->outfile->clear(); + s3_file->outfile->seekp(position); + s3_file->sync_needed = false; + TF_SetStatus(status, TF_OK, ""); +} + +void Flush(const TF_WritableFile* file, TF_Status* status) { + Sync(file, status); +} + +void Close(const TF_WritableFile* file, TF_Status* status) { + auto s3_file = static_cast(file->plugin_file); + if (s3_file->outfile) { + Sync(file, status); + if (TF_GetCode(status) != TF_OK) return; + s3_file->outfile.reset(); + } + TF_SetStatus(status, TF_OK, ""); +} } // namespace tf_writable_file From d2d6c3f07a0b874e64a024c767deb7c9fb39b704 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Jul 2020 23:20:19 -0700 Subject: [PATCH 0583/2522] Fix to handle Reshape Layer in experimental TFLite writer library. Changes: 1. Updated handling of ReshapeParams. 2. Added write_lib tests to check different scenarios. PiperOrigin-RevId: 321508374 Change-Id: I6e22be4d5fcfd6b771e0e5f1d28e9459deb49af7 --- .../writer/option_writer_generator.cc | 33 ++++++++ .../lite/experimental/writer/writer_lib.cc | 4 +- .../experimental/writer/writer_lib_test.cc | 75 +++++++++++++++++++ 3 files changed, 110 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/experimental/writer/option_writer_generator.cc b/tensorflow/lite/experimental/writer/option_writer_generator.cc index a565422457c..e484c5ba2f4 100644 --- a/tensorflow/lite/experimental/writer/option_writer_generator.cc +++ b/tensorflow/lite/experimental/writer/option_writer_generator.cc @@ -265,6 +265,32 @@ void GenerateImportForResizeBilinearOp(FILE* fp) { " }\n break;\n"); } +// Reshape Op infers output shape either from Parameter or from shape tensor +// that's is an additional input. When we have this additional shape tensor as +// input we don't have the parameter present in this layer. In case of more than +// one input we import an empty vector for the parameters. +void GenerateImportForReshapeOp(FILE* fp) { + fprintf(fp, + " case BuiltinOperator_RESHAPE: {\n" + " const auto* params = reinterpret_cast(builtin_op_data);\n" + " flatbuffers::Offset union_type;\n" + " if ((node.inputs->size > 1) &&\n" + " (params->num_dimensions < 0 ||\n" + " params->num_dimensions >= " + "TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT)) {\n" + " union_type = CreateReshapeOptions(*fbb).Union();\n" + " } else {\n" + " auto val0 = fbb->CreateVector(std::vector(params->shape, " + "params->shape + params->num_dimensions));\n" + " union_type = CreateReshapeOptions(*fbb, " + "val0).Union();\n" + " }\n" + " return std::make_pair(BuiltinOptions_ReshapeOptions, " + "union_type);\n" + " }\n break;\n"); +} + void GenerateImportForOp(FILE* fp, const std::string& op_name, const std::string& option_name, const std::string& option_type, @@ -276,6 +302,13 @@ void GenerateImportForOp(FILE* fp, const std::string& op_name, return; } + // Special case Reshape that may have 'new_shape' field missing from the + // parameters. + if (struct_name == "TfLiteReshapeParams") { + GenerateImportForReshapeOp(fp); + return; + } + fprintf(fp, " case BuiltinOperator_%s: {\n", op_name.c_str()); if (options->num_elems != 0) { fprintf(fp, diff --git a/tensorflow/lite/experimental/writer/writer_lib.cc b/tensorflow/lite/experimental/writer/writer_lib.cc index 85f57527c31..2c71919724c 100644 --- a/tensorflow/lite/experimental/writer/writer_lib.cc +++ b/tensorflow/lite/experimental/writer/writer_lib.cc @@ -31,7 +31,7 @@ namespace tflite { std::pair> CreateBuiltinUnion( flatbuffers::FlatBufferBuilder* fbb, enum BuiltinOperator op, - void* builtin_op_data) { + void* builtin_op_data, const TfLiteNode& node) { switch (op) { #include "tensorflow/lite/experimental/writer/option_writer_generated.h" } @@ -82,7 +82,7 @@ SubgraphWriter::ExportOperators(flatbuffers::FlatBufferBuilder* fbb) { // builtin auto builtin_options_and_type = CreateBuiltinUnion( fbb, static_cast(registration.builtin_code), - node.builtin_data); + node.builtin_data, node); builtin_options = builtin_options_and_type.second; builtin_options_type = builtin_options_and_type.first; } else { diff --git a/tensorflow/lite/experimental/writer/writer_lib_test.cc b/tensorflow/lite/experimental/writer/writer_lib_test.cc index 41cca88ead7..4cab27ecb2d 100644 --- a/tensorflow/lite/experimental/writer/writer_lib_test.cc +++ b/tensorflow/lite/experimental/writer/writer_lib_test.cc @@ -15,6 +15,8 @@ limitations under the License. #include "tensorflow/lite/experimental/writer/writer_lib.h" +#include + #include #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/interpreter.h" @@ -184,6 +186,79 @@ TEST(Writer, PerTensorQuantizedModelTest) { CHECK_EQ(new_interpreter->AllocateTensors(), kTfLiteOk); } +struct ReshapeTestPattern { + int num_inputs; + bool is_param_valid; +}; + +class ReshapeLayerTest : public ::testing::TestWithParam {}; + +TEST_P(ReshapeLayerTest, ReshapeLayerTest) { + const auto param = GetParam(); + Interpreter interpreter; + const int total_tensors = param.num_inputs + 1; + interpreter.AddTensors(total_tensors); + int output_shape[] = {1, 2, 3}; + interpreter.SetTensorParametersReadWrite(/*tensor_index=*/0, kTfLiteFloat32, + /*name=*/"a", /*dims=*/{6}, + TfLiteQuantization()); + ASSERT_LE(param.num_inputs, 2); + if (param.num_inputs == 2) { + interpreter.SetTensorParametersReadOnly( + /*tensor_index=*/1, kTfLiteInt32, /*name=*/"b", /*dims=*/{3}, + TfLiteQuantization(), reinterpret_cast(output_shape), + sizeof(output_shape)); + } + interpreter.SetTensorParametersReadWrite(/*tensor_index=*/total_tensors - 1, + kTfLiteFloat32, /*name=*/"c", + /*dims=*/{3}, TfLiteQuantization()); + + std::vector input_tensors(param.num_inputs); + std::iota(input_tensors.begin(), input_tensors.end(), 0); + + interpreter.SetInputs(input_tensors); + interpreter.SetOutputs({total_tensors - 1}); + const char* initial_data = ""; + tflite::ops::builtin::BuiltinOpResolver resolver; + TfLiteReshapeParams* builtin_data = reinterpret_cast( + malloc(sizeof(TfLiteReshapeParams))); + if (param.is_param_valid) { + builtin_data->num_dimensions = 3; + for (int dim = 0; dim < builtin_data->num_dimensions; ++dim) { + builtin_data->shape[dim] = output_shape[dim]; + } + } + const TfLiteRegistration* reg = resolver.FindOp(BuiltinOperator_RESHAPE, 1); + interpreter.AddNodeWithParameters(input_tensors, + /*outputs=*/{total_tensors - 1}, + initial_data, /*init_data_size=*/0, + reinterpret_cast(builtin_data), reg); + + SubgraphWriter writer(&interpreter.primary_subgraph()); + std::string filename = absl::StrCat("/tmp/test_reshape_", param.num_inputs, + "_", param.is_param_valid, ".tflite"); + writer.Write(filename); + std::unique_ptr model = + FlatBufferModel::BuildFromFile(filename.c_str()); + InterpreterBuilder builder(*model, resolver); + std::unique_ptr new_interpreter; + builder(&new_interpreter); + ASSERT_EQ(new_interpreter->AllocateTensors(), kTfLiteOk); +} + +INSTANTIATE_TEST_SUITE_P( + Writer, ReshapeLayerTest, + ::testing::Values(ReshapeTestPattern{/*num_inputs=*/2, + /*is_param_valid=*/true}, + ReshapeTestPattern{/*num_inputs=*/2, + /*is_param_valid=*/false}, + ReshapeTestPattern{/*num_inputs=*/1, + /*is_param_valid=*/true}), + [](const ::testing::TestParamInfo& info) { + std::string name = absl::StrCat("num_inputs_", info.param.num_inputs, + "_isvalid_", info.param.is_param_valid); + return name; + }); } // namespace tflite int main(int argc, char** argv) { From ff2a56b9d383b22397f09cfce760721ebdffdc23 Mon Sep 17 00:00:00 2001 From: Feng Liu Date: Thu, 16 Jul 2020 00:08:59 -0700 Subject: [PATCH 0584/2522] Mark some comparison op quantizable in the tfl_ops PiperOrigin-RevId: 321512821 Change-Id: Id46e4c8e12f273554661834b7eafac8dbcf54bb5 --- tensorflow/compiler/mlir/lite/ir/tfl_ops.td | 13 ++++--------- tensorflow/lite/testing/op_tests/equal.py | 7 ++++++- tensorflow/lite/testing/op_tests/greater.py | 7 ++++++- tensorflow/lite/testing/op_tests/greater_equal.py | 7 ++++++- tensorflow/lite/testing/op_tests/less.py | 7 ++++++- tensorflow/lite/testing/op_tests/less_equal.py | 7 ++++++- 6 files changed, 34 insertions(+), 14 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td index 04e143541c3..c2d625d07fe 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td @@ -1059,8 +1059,7 @@ def TFL_LessEqualOp : TFL_Op<"less_equal", [ ResultsBroadcastableShape, BinaryOpSameElementTypeConstraint, TFL_OperandsHaveSameShapesOrBroadcastableShape<[0, 1], 4>, - NoSideEffect, - NoQuantizableResult]> { + NoSideEffect]> { let summary = "Less_equal operator"; let description = [{ @@ -1121,8 +1120,7 @@ convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imag def TFL_GreaterEqualOp : TFL_Op<"greater_equal", [ TFL_OperandsHaveSameShapesOrBroadcastableShape<[0, 1], 4>, ResultsBroadcastableShape, - NoSideEffect, - NoQuantizableResult]> { + NoSideEffect]> { let summary = "Greater_equal operator"; let description = [{ @@ -1415,7 +1413,6 @@ def TFL_EmbeddingLookupOp: TFL_Op<"embedding_lookup", def TFL_EqualOp: TFL_Op<"equal", [ Commutative, - NoQuantizableResult, ResultsBroadcastableShape, TFL_OperandsHaveSameShapesOrBroadcastableShape<[0, 1], 4>, PredOpTrait<"Operands have same value type", TCopVTEtIsSameAs<0, 1>>]> { @@ -1621,8 +1618,7 @@ def TFL_GreaterOp : TFL_Op<"greater", [ ResultsBroadcastableShape, BinaryOpSameElementTypeConstraint, TFL_OperandsHaveSameShapesOrBroadcastableShape<[0, 1], 4>, - NoSideEffect, - NoQuantizableResult]> { + NoSideEffect]> { let summary = "Greater operator"; let description = [{ @@ -1721,8 +1717,7 @@ def TFL_LessOp : TFL_Op<"less", [ ResultsBroadcastableShape, BinaryOpSameElementTypeConstraint, TFL_OperandsHaveSameShapesOrBroadcastableShape<[0, 1], 4>, - NoSideEffect, - NoQuantizableResult]> { + NoSideEffect]> { let summary = "Less operator"; let description = [{ diff --git a/tensorflow/lite/testing/op_tests/equal.py b/tensorflow/lite/testing/op_tests/equal.py index ddbece129d3..da55cc8f9bb 100644 --- a/tensorflow/lite/testing/op_tests/equal.py +++ b/tensorflow/lite/testing/op_tests/equal.py @@ -32,6 +32,11 @@ def make_equal_tests(options): "input_shape_pair": [([], []), ([1, 1, 1, 3], [1, 1, 1, 3]), ([2, 3, 4, 5], [2, 3, 4, 5]), ([2, 3, 3], [2, 3]), ([5, 5], [1]), ([10], [2, 4, 10])], + "fully_quantize": [False], + }, { + "input_dtype": [tf.float32], + "input_shape_pair": [([1, 1, 1, 3], [1, 1, 1, 3]), ([2, 3, 3], [2, 3])], + "fully_quantize": [True], }] def build_graph(parameters): @@ -60,4 +65,4 @@ def make_equal_tests(options): test_parameters, build_graph, build_inputs, - expected_tf_failures=4) + expected_tf_failures=5) diff --git a/tensorflow/lite/testing/op_tests/greater.py b/tensorflow/lite/testing/op_tests/greater.py index f30a085020b..a3210620ac8 100644 --- a/tensorflow/lite/testing/op_tests/greater.py +++ b/tensorflow/lite/testing/op_tests/greater.py @@ -32,6 +32,11 @@ def make_greater_tests(options): "input_shape_pair": [([1, 1, 1, 3], [1, 1, 1, 3]), ([2, 3, 4, 5], [2, 3, 4, 5]), ([2, 3, 3], [2, 3]), ([5, 5], [1]), ([10], [2, 4, 10])], + "fully_quantize": [False], + }, { + "input_dtype": [tf.float32], + "input_shape_pair": [([1, 1, 1, 3], [1, 1, 1, 3]), ([2, 3, 3], [2, 3])], + "fully_quantize": [True], }] def build_graph(parameters): @@ -60,4 +65,4 @@ def make_greater_tests(options): test_parameters, build_graph, build_inputs, - expected_tf_failures=3) + expected_tf_failures=4) diff --git a/tensorflow/lite/testing/op_tests/greater_equal.py b/tensorflow/lite/testing/op_tests/greater_equal.py index 20a49719f12..c7d7c3397da 100644 --- a/tensorflow/lite/testing/op_tests/greater_equal.py +++ b/tensorflow/lite/testing/op_tests/greater_equal.py @@ -32,6 +32,11 @@ def make_greater_equal_tests(options): "input_shape_pair": [([1, 1, 1, 3], [1, 1, 1, 3]), ([2, 3, 4, 5], [2, 3, 4, 5]), ([2, 3, 3], [2, 3]), ([5, 5], [1]), ([10], [2, 4, 10])], + "fully_quantize": [False], + }, { + "input_dtype": [tf.float32], + "input_shape_pair": [([1, 1, 1, 3], [1, 1, 1, 3]), ([2, 3, 3], [2, 3])], + "fully_quantize": [True], }] def build_graph(parameters): @@ -60,4 +65,4 @@ def make_greater_equal_tests(options): test_parameters, build_graph, build_inputs, - expected_tf_failures=3) + expected_tf_failures=4) diff --git a/tensorflow/lite/testing/op_tests/less.py b/tensorflow/lite/testing/op_tests/less.py index 099f0039454..2bb3c11ce57 100644 --- a/tensorflow/lite/testing/op_tests/less.py +++ b/tensorflow/lite/testing/op_tests/less.py @@ -32,6 +32,11 @@ def make_less_tests(options): "input_shape_pair": [([1, 1, 1, 3], [1, 1, 1, 3]), ([2, 3, 4, 5], [2, 3, 4, 5]), ([2, 3, 3], [2, 3]), ([5, 5], [1]), ([10], [2, 4, 10])], + "fully_quantize": [False], + }, { + "input_dtype": [tf.float32], + "input_shape_pair": [([1, 1, 1, 3], [1, 1, 1, 3]), ([2, 3, 3], [2, 3])], + "fully_quantize": [True], }] def build_graph(parameters): @@ -60,4 +65,4 @@ def make_less_tests(options): test_parameters, build_graph, build_inputs, - expected_tf_failures=3) + expected_tf_failures=4) diff --git a/tensorflow/lite/testing/op_tests/less_equal.py b/tensorflow/lite/testing/op_tests/less_equal.py index 2e8e8d03887..141d599a893 100644 --- a/tensorflow/lite/testing/op_tests/less_equal.py +++ b/tensorflow/lite/testing/op_tests/less_equal.py @@ -32,6 +32,11 @@ def make_less_equal_tests(options): "input_shape_pair": [([1, 1, 1, 3], [1, 1, 1, 3]), ([2, 3, 4, 5], [2, 3, 4, 5]), ([2, 3, 3], [2, 3]), ([5, 5], [1]), ([10], [2, 4, 10])], + "fully_quantize": [False], + }, { + "input_dtype": [tf.float32], + "input_shape_pair": [([1, 1, 1, 3], [1, 1, 1, 3]), ([2, 3, 3], [2, 3])], + "fully_quantize": [True], }] def build_graph(parameters): @@ -60,4 +65,4 @@ def make_less_equal_tests(options): test_parameters, build_graph, build_inputs, - expected_tf_failures=3) + expected_tf_failures=4) From 73dc02713aadac4ced06f1bfb6c178224f7c125a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Jul 2020 02:02:11 -0700 Subject: [PATCH 0585/2522] Update GraphDef version to 464. PiperOrigin-RevId: 321524961 Change-Id: I782524bee6e0b64f547a50fc6c911a2fb443f023 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 8f071beac51..2e02304e18c 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 463 // Updated: 2020/7/15 +#define TF_GRAPH_DEF_VERSION 464 // Updated: 2020/7/16 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 4f3cecaf3b8ed4f1eb037641551efd8d77c5c3ee Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Jul 2020 02:02:21 -0700 Subject: [PATCH 0586/2522] compat: Update forward compatibility horizon to 2020-07-16 PiperOrigin-RevId: 321524980 Change-Id: I5264b284e31404217a720741f6c707c2dcb2afcc --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index f6671b19a9c..601fac967a3 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 15) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 16) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 7adc5c6b78022a8c2cd25a0f14a7ade3819b2307 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Thu, 16 Jul 2020 03:05:52 -0700 Subject: [PATCH 0587/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/b2018198c32a PiperOrigin-RevId: 321532331 Change-Id: Ic472f0b495d333c9a76652c2e0926d509d225e2a --- tensorflow/workspace.bzl | 4 +-- third_party/mlir/BUILD | 69 ++++++++++++++++--------------------- third_party/mlir/test.BUILD | 1 + 3 files changed, 32 insertions(+), 42 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index eec97c99111..c95d7a8066e 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "f233b92f92a669f9f2cc6d08d57ca4931dd61b78" - LLVM_SHA256 = "020aff0a2cb5246e152d274a59006d81eb8338549b03c0e2e7fd1b630736fd53" + LLVM_COMMIT = "b2018198c32a0535bb1f5bb5b40fbcf50d8d47b7" + LLVM_SHA256 = "748e5da6e911046190a0962e7e0bf864f5861da7d52f6db6b547d23a800ca877" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD index 46ce6833de3..ec0574f3367 100644 --- a/third_party/mlir/BUILD +++ b/third_party/mlir/BUILD @@ -32,45 +32,32 @@ cc_library( textual_hdrs = ["include/mlir/IR/DialectSymbolRegistry.def"], ) -gentbl( - name = "OpAsmInterfacesIncGen", - strip_include_prefix = "include", - tbl_outs = [ - ( - "-gen-op-interface-decls", - "include/mlir/IR/OpAsmInterface.h.inc", - ), - ( - "-gen-op-interface-defs", - "include/mlir/IR/OpAsmInterface.cpp.inc", - ), - ], - tblgen = ":mlir-tblgen", - td_file = "include/mlir/IR/OpAsmInterface.td", - td_srcs = [ - ":OpBaseTdFiles", - ], -) - -gentbl( - name = "SymbolInterfacesIncGen", - strip_include_prefix = "include", - tbl_outs = [ - ( - "-gen-op-interface-decls", - "include/mlir/IR/SymbolInterfaces.h.inc", - ), - ( - "-gen-op-interface-defs", - "include/mlir/IR/SymbolInterfaces.cpp.inc", - ), - ], - tblgen = ":mlir-tblgen", - td_file = "include/mlir/IR/SymbolInterfaces.td", - td_srcs = [ - ":OpBaseTdFiles", - ], -) +[ + gentbl( + name = name + "IncGen", + strip_include_prefix = "include", + tbl_outs = [ + ( + "-gen-op-interface-decls", + "include/mlir/IR/" + name + ".h.inc", + ), + ( + "-gen-op-interface-defs", + "include/mlir/IR/" + name + ".cpp.inc", + ), + ], + tblgen = ":mlir-tblgen", + td_file = "include/mlir/IR/" + name + ".td", + td_srcs = [ + ":OpBaseTdFiles", + ], + ) + for name in [ + "OpAsmInterface", + "RegionKindInterface", + "SymbolInterfaces", + ] +] cc_library( name = "IR", @@ -88,7 +75,8 @@ cc_library( ":CallOpInterfacesIncGen", ":DialectSymbolRegistry", ":InferTypeOpInterfaceIncGen", - ":OpAsmInterfacesIncGen", + ":OpAsmInterfaceIncGen", + ":RegionKindInterfaceIncGen", ":SideEffectInterfacesIncGen", ":Support", ":SymbolInterfacesIncGen", @@ -3736,6 +3724,7 @@ exports_files( "include/mlir/Dialect/Shape/IR/ShapeBase.td", "include/mlir/IR/OpAsmInterface.td", "include/mlir/IR/OpBase.td", + "include/mlir/IR/RegionKindInterface.td", "include/mlir/IR/SymbolInterfaces.td", "include/mlir/Transforms/InliningUtils.h", "include/mlir/Interfaces/InferTypeOpInterface.td", diff --git a/third_party/mlir/test.BUILD b/third_party/mlir/test.BUILD index e0966054542..36e68ac2751 100644 --- a/third_party/mlir/test.BUILD +++ b/third_party/mlir/test.BUILD @@ -73,6 +73,7 @@ gentbl( td_srcs = [ "@llvm-project//mlir:OpBaseTdFiles", "@llvm-project//mlir:include/mlir/IR/OpAsmInterface.td", + "@llvm-project//mlir:include/mlir/IR/RegionKindInterface.td", "@llvm-project//mlir:include/mlir/IR/SymbolInterfaces.td", "@llvm-project//mlir:include/mlir/Interfaces/CallInterfaces.td", "@llvm-project//mlir:include/mlir/Interfaces/ControlFlowInterfaces.td", From c2c96f6e097e646ac610eabc17b9b1cb1b00be29 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Jul 2020 03:07:53 -0700 Subject: [PATCH 0588/2522] Change xla-legalize-tf-with-tf2xla pass to use a PatternRewrite rule Previously it used a fully custom pass what made it impossible to reuse the code with a conversion target. PiperOrigin-RevId: 321532581 Change-Id: I58744ca9f494be1a328731af01a397302dc14de8 --- .../xla/tests/legalize-tf-with-tf2xla.mlir | 4 +- .../xla/transforms/legalize_tf_with_tf2xla.cc | 173 ++++++++---------- 2 files changed, 81 insertions(+), 96 deletions(-) diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf-with-tf2xla.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf-with-tf2xla.mlir index ad4ef4b8f77..5a1edc0d933 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf-with-tf2xla.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf-with-tf2xla.mlir @@ -116,8 +116,7 @@ func @convert(%arg0: tensor<2xi32>) -> tensor<2xf32> { // CHECK-LABEL: func @constant func @constant(%arg0: tensor<2xf32>) -> tensor<2xf32> { - // CHECK: %[[SCALAR_ONE:.*]] = mhlo.constant dense<1.000000e+00> : tensor - // CHECK: %[[ONE:.*]] = "mhlo.broadcast_in_dim"(%[[SCALAR_ONE]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor) -> tensor<2xf32> + // CHECK: %[[ONE:.*]] = mhlo.constant dense<1.000000e+00> : tensor<2xf32> // CHECK: %[[RESULT:.*]] = mhlo.divide %[[ONE]], %arg0 : tensor<2xf32> // CHECK: return %[[RESULT]] @@ -199,7 +198,6 @@ func @dynamic_update_slice(%arg0: tensor<3x4xi32>, %arg1: tensor<2x2xi32>, %arg2 // CHECK-SAME: (%[[ARG0:.*]]: tensor<3x2xi32>, %[[ARG1:.*]]: tensor<3xf32>, %[[ARG2:.*]]: tensor) func @sparse_to_dense(%arg0: tensor<3x2xi32>, %arg1: tensor<3xf32>, %arg2: tensor) -> tensor<3x3xf32> { -// CHECK: %[[CST:.*]] = mhlo.constant dense<3> : tensor<2xi32> // CHECK: %[[DEFAULT:.*]] = "mhlo.broadcast_in_dim"(%[[ARG2]]) {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor) -> tensor<3x3xf32> // CHECK: %[[RESULT:.*]] = "mhlo.scatter"(%[[DEFAULT]], %[[ARG0]], %[[ARG1]]) ( { diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc index d25b38d9ece..5ba31318c31 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_with_tf2xla.cc @@ -25,11 +25,13 @@ limitations under the License. #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project #include "mlir/IR/Diagnostics.h" // from @llvm-project #include "mlir/IR/Function.h" // from @llvm-project #include "mlir/IR/Location.h" // from @llvm-project #include "mlir/IR/Module.h" // from @llvm-project #include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/PatternMatch.h" // from @llvm-project #include "mlir/IR/StandardTypes.h" // from @llvm-project #include "mlir/IR/Types.h" // from @llvm-project #include "mlir/IR/Value.h" // from @llvm-project @@ -210,7 +212,7 @@ static bool IsOpAllowlisted(Operation* op) { } static std::unique_ptr CreateDeviceMgr( - const std::string& device_type, const Location& loc) { + const std::string& device_type) { // Register compilation kernels for all registered XLA backends. tensorflow::XlaOpRegistry::RegisterCompilationKernels(); @@ -219,40 +221,41 @@ static std::unique_ptr CreateDeviceMgr( return absl::make_unique(std::move(device)); } -class FuncLegalizer { +class Tf2XlaRewriter { public: - static LogicalResult Legalize(FuncOp func, const std::string& device_type) { - FuncLegalizer legalizer(func, device_type); - if (failed(legalizer.PrepareParams())) return failure(); - return legalizer.Legalize(); + static LogicalResult RewriteOp(Operation* op, OpBuilder& builder, + const std::string& device_type) { + Tf2XlaRewriter rewriter(op, builder, device_type); + if (failed(rewriter.PrepareParams())) return failure(); + return rewriter.LegalizeOp(); } private: - FuncLegalizer(FuncOp func, const std::string& device_type) - : func_(func), device_type_(device_type), hlo_builder_(func) {} + Tf2XlaRewriter(Operation* op, OpBuilder builder, + const std::string& device_type) + : op_(op), + device_type_(device_type), + hlo_builder_(op->getName().getStringRef().str(), builder, + op->getLoc()) {} - ~FuncLegalizer() { context_->Unref(); } + ~Tf2XlaRewriter() { context_->Unref(); } // Prepares OpKernelContext params common to all the ops. // Emits an error on failure. LogicalResult PrepareParams(); - // Tries to legalize supported TensorFlow ops. - // Emits an error on failure. - LogicalResult Legalize(); - // Tries to legalize the specified TensorFlow op, if supported. // // Emits an error and returns failure if an error is encountered during // conversion. Note that success return value doesn't mean successful // legalization. - LogicalResult LegalizeOp(Operation* op); + LogicalResult LegalizeOp(); // Converts the given operand to expression of kind kConstant or kXlaOp. // Emits a remark and returns expression of kind kInvalid on failure. tensorflow::XlaExpression GetExprForOperand(Value operand, Operation* op); - FuncOp func_; + Operation* op_; std::string device_type_; ::xla::MlirHloBuilder hlo_builder_; @@ -268,15 +271,14 @@ class FuncLegalizer { tensorflow::OpKernelContext::Params params_; }; -LogicalResult FuncLegalizer::PrepareParams() { +LogicalResult Tf2XlaRewriter::PrepareParams() { // XlaCompiler within the context is only used by the functional ops to // compile functions. We are not handling those at the moment so XlaCompiler // is not required. context_ = new tensorflow::XlaContext(/*compiler=*/nullptr, &hlo_builder_); context_->Ref(); - mlir::Location loc = func_.getLoc(); - device_mgr_ = CreateDeviceMgr(device_type_, loc); + device_mgr_ = CreateDeviceMgr(device_type_); if (!device_mgr_) return failure(); // Type of params_.device is DeviceBase* so store it as Device* to access @@ -296,18 +298,16 @@ LogicalResult FuncLegalizer::PrepareParams() { device_->resource_manager(), tensorflow::XlaContext::kXlaContextResourceName, context_); if (!status.ok()) { - emitError(loc) << "failed to create XlaContext resource: " - << status.ToString(); - return failure(); + return emitError(op_->getLoc()) + << "failed to create XlaContext resource: " << status.ToString(); } params_.step_container = step_container_.get(); tensorflow::StatusOr version_or = tensorflow::GetTfGraphProducerVersion( - func_.getParentOfType()); + op_->getParentOfType()); if (!version_or.ok()) { - emitError(loc) << version_or.status().ToString(); - return failure(); + return emitError(op_->getLoc()) << version_or.status().ToString(); } flib_def_ = absl::make_unique( @@ -319,46 +319,21 @@ LogicalResult FuncLegalizer::PrepareParams() { return success(); } -LogicalResult FuncLegalizer::Legalize() { - if (func_.empty()) return success(); - - // TensorFlow functions don't use CFGs. - if (!llvm::hasSingleElement(func_)) { - emitError(func_.getLoc()) << "requires at most one block in a TF function"; - return failure(); - } - Block& block = func_.front(); - - std::vector ops; - ops.reserve(block.getOperations().size()); - for (Operation& op : block.getOperations()) { - ops.push_back(&op); - } - - for (Operation* op : ops) { - if (failed(LegalizeOp(op))) return failure(); - } - return success(); -} - -LogicalResult FuncLegalizer::LegalizeOp(Operation* op) { - if (!IsOpAllowlisted(op)) return success(); - +LogicalResult Tf2XlaRewriter::LegalizeOp() { // Only static shaped operands are supported in XLA builders for now. - for (Type ty : op->getOperandTypes()) { + for (Type ty : op_->getOperandTypes()) { auto ranked_ty = ty.dyn_cast(); if (!ranked_ty || !ranked_ty.hasStaticShape()) { - op->emitRemark() << "lowering requires static shaped tensor operands"; - return success(); + return op_->emitRemark() + << "lowering requires static shaped tensor operands"; } } auto nodedef_or = tensorflow::ConvertTFDialectOpToNodeDef( - op, name_mapper_.GetUniqueName(op), /*ignore_unregistered_attrs=*/true); + op_, name_mapper_.GetUniqueName(op_), /*ignore_unregistered_attrs=*/true); if (!nodedef_or.ok()) { - op->emitRemark() << "failed to convert op to NodeDef: " - << nodedef_or.status().ToString(); - return success(); + return op_->emitRemark() << "failed to convert op to NodeDef: " + << nodedef_or.status().ToString(); } std::shared_ptr props; @@ -366,15 +341,14 @@ LogicalResult FuncLegalizer::LegalizeOp(Operation* op) { *nodedef_or.ValueOrDie(), params_.function_library->GetFunctionLibraryDefinition(), &props); if (!status.ok()) { - op->emitRemark() << "failed to create NodeProperties: " - << status.ToString(); - return success(); + return op_->emitRemark() + << "failed to create NodeProperties: " << status.ToString(); } tensorflow::OpKernel* op_kernel_raw; status = params_.function_library->CreateKernel(props, &op_kernel_raw); if (!status.ok()) { - op->emitRemark() << "failed to create tf2xla kernel: " << status.ToString(); - return success(); + return op_->emitRemark() + << "failed to create tf2xla kernel: " << status.ToString(); } // Transfer ownership of the kernel to a local smart pointer. auto op_kernel = absl::WrapUnique(op_kernel_raw); @@ -383,9 +357,8 @@ LogicalResult FuncLegalizer::LegalizeOp(Operation* op) { status = tensorflow::XlaOpRegistry::CompileTimeConstantInputs( *op_kernel, &required_constants); if (!status.ok()) { - op->emitRemark() << "failed to compute required constants: " - << status.ToString(); - return success(); + return op_->emitRemark() + << "failed to compute required constants: " << status.ToString(); } llvm::SmallDenseSet required_consts; required_consts.insert(required_constants.begin(), required_constants.end()); @@ -395,37 +368,35 @@ LogicalResult FuncLegalizer::LegalizeOp(Operation* op) { InlinedVector expressions; InlinedVector tensors; InlinedVector inputs; - expressions.reserve(op->getNumOperands()); - tensors.reserve(op->getNumOperands()); - inputs.reserve(op->getNumOperands()); + expressions.reserve(op_->getNumOperands()); + tensors.reserve(op_->getNumOperands()); + inputs.reserve(op_->getNumOperands()); // Prepare the list of Tensor inputs for the kernel. - for (auto it : llvm::enumerate(op->getOperands())) { + for (auto it : llvm::enumerate(op_->getOperands())) { Value operand = it.value(); size_t idx = it.index(); - tensorflow::XlaExpression expr = GetExprForOperand(operand, op); + tensorflow::XlaExpression expr = GetExprForOperand(operand, op_); tensorflow::XlaExpression::Kind kind = expr.kind(); - if (kind == tensorflow::XlaExpression::Kind::kInvalid) return success(); + if (kind == tensorflow::XlaExpression::Kind::kInvalid) return failure(); if (required_consts.count(idx) && kind != tensorflow::XlaExpression::Kind::kConstant) { - op->emitRemark() << "lowering requires operand #" << idx - << " to be a constant"; - return success(); + return op_->emitRemark() + << "lowering requires operand #" << idx << " to be a constant"; } expressions.push_back(expr); if (!tensorflow::DataTypeCanUseMemcpy(expr.dtype())) { - op->emitRemark() << "skipping legalization due to unsupported type " - << operand.getType(); - return success(); + return op_->emitRemark() + << "skipping legalization due to unsupported type " + << operand.getType(); } auto shape_or = expr.GetShape(); if (!shape_or.ok()) { - op->emitRemark() << "failed to get shape for expression. " - << expr.HumanString(); - return success(); + return op_->emitRemark() + << "failed to get shape for expression. " << expr.HumanString(); } tensors.emplace_back( @@ -439,32 +410,31 @@ LogicalResult FuncLegalizer::LegalizeOp(Operation* op) { params_.inputs = &inputs; params_.op_kernel = op_kernel.get(); llvm::SmallVector output_attr( - op->getNumResults()); + op_->getNumResults()); params_.output_attr_array = output_attr.data(); - hlo_builder_.setInsertionPoint(op); - hlo_builder_.SetLocation(op->getLoc()); + hlo_builder_.setInsertionPoint(op_); + hlo_builder_.SetLocation(op_->getLoc()); // Execute the kernel. - tensorflow::OpKernelContext op_context(¶ms_, op->getNumResults()); + tensorflow::OpKernelContext op_context(¶ms_, op_->getNumResults()); device_->Compute(params_.op_kernel, &op_context); if (!op_context.status().ok()) { - op->emitRemark() << "compilation to HLO failed: " - << op_context.status().ToString(); - return success(); + return op_->emitRemark() + << "compilation to HLO failed: " << op_context.status().ToString(); } // Replace uses of old results using the corresponding value after the // lowering. - for (int i = 0, e = op->getNumResults(); i < e; i++) { + for (int i = 0, e = op_->getNumResults(); i < e; i++) { tensorflow::Tensor* output = op_context.mutable_output(i); const tensorflow::XlaExpression* expr = tensorflow::XlaOpKernelContext::CastExpressionFromTensor(*output); if (expr->kind() != tensorflow::XlaExpression::Kind::kXlaOp) - return op->emitError( + return op_->emitError( "expects XlaExpression of kind kXlaOp in compiled output"); auto value = hlo_builder_.GetValue(expr->handle()); - mlir::OpResult old_result = op->getResult(i); + mlir::OpResult old_result = op_->getResult(i); if (value.getType() != old_result.getType()) { value = hlo_builder_.create(value, old_result.getType()); @@ -472,12 +442,12 @@ LogicalResult FuncLegalizer::LegalizeOp(Operation* op) { old_result.replaceAllUsesWith(value); } - op->erase(); + op_->erase(); return success(); } -tensorflow::XlaExpression FuncLegalizer::GetExprForOperand(Value operand, - Operation* op) { +tensorflow::XlaExpression Tf2XlaRewriter::GetExprForOperand(Value operand, + Operation* op) { ElementsAttr const_attr; auto defining_op = operand.getDefiningOp(); if (defining_op && matchPattern(defining_op, m_Constant(&const_attr))) { @@ -509,6 +479,21 @@ tensorflow::XlaExpression FuncLegalizer::GetExprForOperand(Value operand, return tensorflow::XlaExpression::XlaOp(xla_op, dtype); } +class Tf2XlaRewritePattern : public RewritePattern { + public: + explicit Tf2XlaRewritePattern(const std::string& device_type) + : RewritePattern(1, MatchAnyOpTypeTag()), device_type_(device_type) {} + + LogicalResult matchAndRewrite(Operation* op, + PatternRewriter& rewriter) const override { + if (!IsOpAllowlisted(op)) return failure(); + return Tf2XlaRewriter::RewriteOp(op, rewriter, device_type_); + } + + private: + std::string device_type_; +}; + class LegalizeTF : public PassWrapper { public: LegalizeTF() = default; @@ -520,7 +505,9 @@ class LegalizeTF : public PassWrapper { LegalizeTF(const LegalizeTF&) {} void runOnFunction() override { - if (failed(FuncLegalizer::Legalize(getFunction(), device_type_))) + OwningRewritePatternList patterns; + patterns.insert(device_type_); + if (failed(applyPatternsAndFoldGreedily(getFunction(), patterns))) signalPassFailure(); } From 374a1e7a63f3839322dbb364dae22cf2bcf1c3fa Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Thu, 16 Jul 2020 11:38:59 +0100 Subject: [PATCH 0589/2522] Addressed review comments. Change-Id: I12b1bfc081dd04f4ab5480ea8bafe280f0e588e5 --- tensorflow/lite/testing/op_tests/pad.py | 32 +++---------------------- 1 file changed, 3 insertions(+), 29 deletions(-) diff --git a/tensorflow/lite/testing/op_tests/pad.py b/tensorflow/lite/testing/op_tests/pad.py index ab243d4edc6..ec524270ea4 100644 --- a/tensorflow/lite/testing/op_tests/pad.py +++ b/tensorflow/lite/testing/op_tests/pad.py @@ -67,17 +67,7 @@ def make_pad_tests(options): [[0, 0], [0, 0], [0, 0], [0, 0]]], "constant_paddings": [True], "fully_quantize": [True], - "quant_16x8": [False] - }, - { - "dtype": [tf.float32], - "input_shape": [[1, 1, 2, 1], [2, 1, 1, 1]], - "paddings": [[[0, 0], [0, 1], [2, 3], [0, 0]], - [[0, 1], [0, 0], [0, 0], [2, 3]], - [[0, 0], [0, 0], [0, 0], [0, 0]]], - "constant_paddings": [True], - "fully_quantize": [True], - "quant_16x8": [True] + "quant_16x8": [False, True] }, # 2D: { @@ -86,15 +76,7 @@ def make_pad_tests(options): "paddings": [[[0, 1], [2, 3]]], "constant_paddings": [True], "fully_quantize": [True], - "quant_16x8": [False], - }, - { - "dtype": [tf.float32], - "input_shape": [[1, 2]], - "paddings": [[[0, 1], [2, 3]]], - "constant_paddings": [True], - "fully_quantize": [True], - "quant_16x8": [True], + "quant_16x8": [False, True], }, # 1D: { @@ -103,15 +85,7 @@ def make_pad_tests(options): "paddings": [[[1, 2]]], "constant_paddings": [True], "fully_quantize": [True], - "quant_16x8": [False], - }, - { - "dtype": [tf.float32], - "input_shape": [[1]], - "paddings": [[[1, 2]]], - "constant_paddings": [True], - "fully_quantize": [True], - "quant_16x8": [True], + "quant_16x8": [False, True], }, ] From e81911eb49a8090bcfc4f93fdf4e48d9076f84cc Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Thu, 16 Jul 2020 12:16:50 +0100 Subject: [PATCH 0590/2522] Addressed reviewer's comments. Change-Id: I42c178ea0ff5f40c961ede467d2c909b6a660874 --- tensorflow/lite/delegates/nnapi/acceleration_test_list.cc | 6 +++--- tensorflow/lite/tools/optimize/operator_property.cc | 1 - tensorflow/lite/tools/versioning/runtime_version.cc | 1 + 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc index bed96a7b04d..15245173761 100644 --- a/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc +++ b/tensorflow/lite/delegates/nnapi/acceleration_test_list.cc @@ -306,9 +306,9 @@ QuantizeOpTest/INT8,30 -ConstInt8MeanOpTest.QuantizedDifferentScale ConstUint8(Max|Min)OpTest/.+,29 ConstUint8(Mean)OpTest/.+ -Constint8(Mean|Max|Min)OpTest/.+,29 -ConstMeanOpTest.+/.+Int8 -MeanOpTest.+/.+Int8 +-ConstInt8(Max|Min)OpTest/.+,29 +-ConstMeanOpTest.*/.+ +-MeanOpTestQuantized.*/.+ ConstFloat(Sum|Prod|Max|Min)OpTest/NotKeepDims,29 ConstFloat(Sum|Prod|Max|Min)OpTest/KeepDims,29 ConstFloat(Mean|Any)OpTest/NotKeepDims diff --git a/tensorflow/lite/tools/optimize/operator_property.cc b/tensorflow/lite/tools/optimize/operator_property.cc index f2cb98ef31a..dc54579c1a9 100644 --- a/tensorflow/lite/tools/optimize/operator_property.cc +++ b/tensorflow/lite/tools/optimize/operator_property.cc @@ -802,7 +802,6 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, property.inputs = {{0, {}}}; property.outputs = {{0, {}}}; property.version = 2; - property.quantizable_int16 = false; break; case BuiltinOperator_MINIMUM: property.arbitrary_inputs = true; diff --git a/tensorflow/lite/tools/versioning/runtime_version.cc b/tensorflow/lite/tools/versioning/runtime_version.cc index d345164f7e6..c2e3f279a90 100644 --- a/tensorflow/lite/tools/versioning/runtime_version.cc +++ b/tensorflow/lite/tools/versioning/runtime_version.cc @@ -170,6 +170,7 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code, {{BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN, 3}, "2.3.0"}, {{BuiltinOperator_MEAN, 1}, "1.6.0"}, {{BuiltinOperator_MEAN, 2}, "1.14.0"}, + {{BuiltinOperator_MEAN, 3}, kPendingReleaseVersion}, {{BuiltinOperator_SUM, 1}, "1.10.0"}, {{BuiltinOperator_SUM, 2}, "1.15.0"}, {{BuiltinOperator_REDUCE_MAX, 1}, "1.11.0"}, From e4aca25a2664d4a50b03bcb16880b0ba37b7987d Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Thu, 16 Jul 2020 12:23:01 +0700 Subject: [PATCH 0591/2522] Add New Writable File --- .../filesystem/plugins/s3/s3_filesystem.cc | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 9bff8070427..ff272cae4de 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -495,6 +495,21 @@ void NewRandomAccessFile(const TF_Filesystem* filesystem, const char* path, TF_SetStatus(status, TF_OK, ""); } +void NewWritableFile(const TF_Filesystem* filesystem, const char* path, + TF_WritableFile* file, TF_Status* status) { + Aws::String bucket, object; + ParseS3Path(path, false, &bucket, &object, status); + if (TF_GetCode(status) != TF_OK) return; + + auto s3_file = static_cast(filesystem->plugin_filesystem); + GetS3Client(s3_file); + GetTransferManager(Aws::Transfer::TransferDirection::UPLOAD, s3_file); + file->plugin_file = new tf_writable_file::S3File( + bucket, object, s3_file->s3_client, + s3_file->transfer_managers[Aws::Transfer::TransferDirection::UPLOAD]); + TF_SetStatus(status, TF_OK, ""); +} + // TODO(vnvo2409): Implement later } // namespace tf_s3_filesystem From 33a4c1a0abaf93656992d11205eabcdb21980bc9 Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Thu, 16 Jul 2020 04:40:32 -0700 Subject: [PATCH 0592/2522] Fix mhlo to lmhlo conversion for ReduceOp. The existing conversion no longer worked and was not save to undo. Furthermore, the pattern for mhlo.return had been removed. Also adds some tests to ensure this does not degrade again. PiperOrigin-RevId: 321542071 Change-Id: I304db0278a4b7bead2543d7ae4308be9a789f3e4 --- .../mhlo/transforms/hlo_legalize_to_lhlo.cc | 39 +++++++++---------- .../mlir/hlo/tests/hlo-legalize-to-lhlo.mlir | 23 +++++++++++ .../mlir/hlo/tests/lhlo-copy-removal.mlir | 22 +++++++++++ 3 files changed, 63 insertions(+), 21 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/hlo_legalize_to_lhlo.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/hlo_legalize_to_lhlo.cc index 4ee45d56a8e..108689c28d9 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/hlo_legalize_to_lhlo.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/hlo_legalize_to_lhlo.cc @@ -42,9 +42,6 @@ namespace { template using BaseOpConversion = BufferAssignmentOpConversionPattern; -using StdReturnOpConverter = - detail::BufferAssignmentReturnOpConverter; Value InsertDynamicAllocAndDealloc(Location loc, Value result, Value shape_operand, @@ -272,27 +269,21 @@ struct HloToLhloReduceOpConverter : public BaseOpConversion { // Copy over the operations inside the region. rewriter.inlineRegionBefore(op.body(), new_op.body(), new_op.body().end()); - // Create new block arguments with correct type. + // Convert the region signature to memref and add extra result. auto& entry_block = new_op.body().front(); - int original_arg_count = entry_block.getNumArguments(); - for (int i = 0; i < original_arg_count; ++i) { - auto old_arg = entry_block.getArgument(i); - auto old_type = old_arg.getType().cast(); + TypeConverter::SignatureConversion sig_conversion( + entry_block.getNumArguments() + 1); + for (auto arg : entry_block.getArguments()) { + auto old_type = arg.getType().cast(); auto new_type = MemRefType::get(old_type.getShape(), old_type.getElementType()); - auto new_arg = entry_block.addArgument(new_type); - rewriter.replaceUsesOfBlockArgument(old_arg, new_arg); + sig_conversion.addInputs(arg.getArgNumber(), new_type); } - // Add an argument for the result. - entry_block.addArgument( - entry_block.getArgument(original_arg_count).getType()); - // Remove the old arguments. - for (int i = original_arg_count - 1; i >= 0; --i) { - entry_block.eraseArgument(i); - } - // Insert terminator at the end. - rewriter.setInsertionPointToEnd(&entry_block); - rewriter.create(loc); + auto return_op = cast(entry_block.getTerminator()); + auto result_type = return_op.results().front().getType().cast(); + sig_conversion.addInputs({MemRefType::get(result_type.getShape(), + result_type.getElementType())}); + rewriter.applySignatureConversion(&new_op.body(), sig_conversion); rewriter.replaceOp(op, ArrayRef(buffer_args).slice(operands.size())); @@ -300,6 +291,12 @@ struct HloToLhloReduceOpConverter : public BaseOpConversion { } }; +// Legalize mhlo.return to a lmhlo.copy and lmhlo.terminator. This functionality +// is provided by mlir buffer assignment, so use the pattern from there. +// TODO(DFKI): Move this out of detail. +using HloToLhloReturnOpConverter = detail::BufferAssignmentReturnOpConverter< + mhlo::ReturnOp, lmhlo::TerminatorOp, lmhlo::CopyOp, false>; + class HloToLhloTensorLoadOpConverter : public BaseOpConversion { public: @@ -312,7 +309,6 @@ class HloToLhloTensorLoadOpConverter } }; -// TODO(b/137624192): Rewrite into a copy and elide copy if possible. class HloToLhloTensorStoreOpConverter : public BaseOpConversion { public: @@ -506,6 +502,7 @@ void populateHLOToLHLOConversionPattern( HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloReduceOpConverter, + HloToLhloReturnOpConverter, HloToLhloTensorLoadOpConverter, HloToLhloTensorStoreOpConverter >(context, bufferAssignment, converter); diff --git a/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-lhlo.mlir b/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-lhlo.mlir index aa5d800b82b..789ab82761a 100644 --- a/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-lhlo.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-lhlo.mlir @@ -487,3 +487,26 @@ func @conv(%input: tensor<3x5x5x3xf32>, %filter : tensor<2x2x3x4xf32>) -> tensor } : (tensor<2x2x3x4xf32>, tensor<3x5x5x3xf32>) -> tensor<3x5x5x4xf32> return %out : tensor<3x5x5x4xf32> } + +// ----- + +// BOTH-LABEL: func @reduce +func @reduce(%arg0: tensor<1x8xf32>, %arg1: tensor) -> tensor<1xf32> { + // BOTH: %[[OUT:.*]] = alloc() : memref<1xf32> + // BOTH: "lmhlo.reduce"(%{{.+}}, %{{.+}}, %[[OUT]]) ( { + // BOTH: ^bb0(%[[ARG1:.*]]: memref, %[[ARG2:.*]]: memref, + // BOTH-SAME: %[[ARG3:.*]]: memref): + // BOTH: %[[TMP:.*]] = alloc() : memref + // BOTH: "lmhlo.add"(%[[ARG1]], %[[ARG2]], %[[TMP]]) + // BOTH: "lmhlo.copy"(%[[TMP]], %[[ARG3]]) + // BOTH: "lmhlo.terminator"() : () -> () + // BOTH: }) {dimensions = dense<1> : tensor<1xi64>} + // BOTH-SAME: : (memref<1x8xf32>, memref, memref<1xf32>) -> () + %0 = "mhlo.reduce"(%arg0, %arg1) ( { + ^bb0(%arg2: tensor, %arg3: tensor): // no predecessors + %1 = mhlo.add %arg2, %arg3 : tensor + "mhlo.return"(%1) : (tensor) -> () + }) {dimensions = dense<1> : tensor<1xi64>} + : (tensor<1x8xf32>, tensor) -> tensor<1xf32> + return %0 : tensor<1xf32> +} diff --git a/tensorflow/compiler/mlir/hlo/tests/lhlo-copy-removal.mlir b/tensorflow/compiler/mlir/hlo/tests/lhlo-copy-removal.mlir index 6d7992cb868..3271595900d 100644 --- a/tensorflow/compiler/mlir/hlo/tests/lhlo-copy-removal.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/lhlo-copy-removal.mlir @@ -91,3 +91,25 @@ func @must_be_removed_second(%arg0: memref<2x2xf32>, dealloc %0 : memref<2x2xf32> "lmhlo.terminator"() : () -> () } + +// ----- + +// CHECK-LABEL: func @reduce +func @reduce(%arg0: memref<1x8xf32>, %arg1: memref, %arg2: memref<1xf32>) { + %0 = alloc() : memref<1xf32> + "lmhlo.reduce"(%arg0, %arg1, %0) ( { + // CHECK: ^bb0(%[[ARG0:.*]]: memref, %[[ARG1:.*]]: memref, + // CHECK-SAME: %[[ARG2:.*]]: memref) + ^bb0(%arg3: memref, %arg4: memref, %arg5: memref): + %1 = alloc() : memref + // CHECK: "lmhlo.add"(%[[ARG0]], %[[ARG1]], %[[ARG2]]) + "lmhlo.add"(%arg3, %arg4, %1) + : (memref, memref, memref) -> () + // CHECK-NOT; lmhlo.copy + "lmhlo.copy"(%1, %arg5) : (memref, memref) -> () + "lmhlo.terminator"() : () -> () + }) {dimensions = dense<1> : tensor<1xi64>} + : (memref<1x8xf32>, memref, memref<1xf32>) -> () + "lmhlo.copy"(%0, %arg2) : (memref<1xf32>, memref<1xf32>) -> () + return +} From 126460334278781b5990d43eeb5448807cbdb150 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Thu, 16 Jul 2020 18:05:41 +0700 Subject: [PATCH 0593/2522] add NewAppendableFile --- .../filesystem/plugins/s3/s3_filesystem.cc | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index ff272cae4de..318a64b720a 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -42,6 +42,8 @@ constexpr uint64_t kS3MultiPartDownloadChunkSize = 50 * 1024 * 1024; // 50 MB constexpr size_t kDownloadRetries = 3; constexpr size_t kUploadRetries = 3; +constexpr size_t kS3ReadAppendableFileBufferSize = 1024 * 1024; // 1 MB + static void* plugin_memory_allocate(size_t size) { return calloc(1, size); } static void plugin_memory_free(void* ptr) { free(ptr); } @@ -56,6 +58,9 @@ static inline void TF_SetStatusFromAWSError( case Aws::Http::HttpResponseCode::REQUESTED_RANGE_NOT_SATISFIABLE: TF_SetStatus(status, TF_OUT_OF_RANGE, "Read less bytes than requested"); break; + case Aws::Http::HttpResponseCode::NOT_FOUND: + TF_SetStatus(status, TF_NOT_FOUND, error.GetMessage().c_str()); + break; default: TF_SetStatus( status, TF_UNKNOWN, @@ -510,6 +515,64 @@ void NewWritableFile(const TF_Filesystem* filesystem, const char* path, TF_SetStatus(status, TF_OK, ""); } +void NewAppendableFile(const TF_Filesystem* filesystem, const char* path, + TF_WritableFile* file, TF_Status* status) { + Aws::String bucket, object; + ParseS3Path(path, false, &bucket, &object, status); + if (TF_GetCode(status) != TF_OK) return; + + auto s3_file = static_cast(filesystem->plugin_filesystem); + GetS3Client(s3_file); + GetTransferManager(Aws::Transfer::TransferDirection::UPLOAD, s3_file); + + // We need to delete `file->plugin_file` in case of errors. + std::unique_ptr writer( + file, [](TF_WritableFile* file) { + if (file != nullptr && file->plugin_file != nullptr) { + tf_writable_file::Cleanup(file); + } + }); + writer->plugin_file = new tf_writable_file::S3File( + bucket, object, s3_file->s3_client, + s3_file->transfer_managers[Aws::Transfer::TransferDirection::UPLOAD]); + TF_SetStatus(status, TF_OK, ""); + + // Wraping inside a `std::unique_ptr` to prevent memory-leaking. + std::unique_ptr reader( + new TF_RandomAccessFile, [](TF_RandomAccessFile* file) { + if (file != nullptr) { + tf_random_access_file::Cleanup(file); + delete file; + } + }); + NewRandomAccessFile(filesystem, path, reader.get(), status); + if (TF_GetCode(status) != TF_OK) return; + + uint64_t offset = 0; + std::string buffer(kS3ReadAppendableFileBufferSize, {}); + while (true) { + auto read = tf_random_access_file::Read(reader.get(), offset, + kS3ReadAppendableFileBufferSize, + &buffer[0], status); + if (TF_GetCode(status) == TF_NOT_FOUND) { + break; + } else if (TF_GetCode(status) == TF_OK) { + offset += read; + tf_writable_file::Append(file, buffer.c_str(), read, status); + if (TF_GetCode(status) != TF_OK) return; + } else if (TF_GetCode(status) == TF_OUT_OF_RANGE) { + offset += read; + tf_writable_file::Append(file, buffer.c_str(), read, status); + if (TF_GetCode(status) != TF_OK) return; + break; + } else { + return; + } + } + writer.release(); + TF_SetStatus(status, TF_OK, ""); +} + // TODO(vnvo2409): Implement later } // namespace tf_s3_filesystem From 353d0df84626f37885d58ba7b7d2c87a48e962a5 Mon Sep 17 00:00:00 2001 From: Cheng CHEN Date: Thu, 16 Jul 2020 21:08:50 +0800 Subject: [PATCH 0594/2522] Fix condition error for parallelism output judgement. --- tensorflow/core/kernels/split_v_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/split_v_op.cc b/tensorflow/core/kernels/split_v_op.cc index 1eaeda927f8..bf6ea5cd3c9 100644 --- a/tensorflow/core/kernels/split_v_op.cc +++ b/tensorflow/core/kernels/split_v_op.cc @@ -206,7 +206,7 @@ class SplitVOpCPUImpl { const int num_split = split_start_points.size(); const bool use_parallelism_between_outputs = (num_split >= 4 && - input_element_count >= std::max(num_threads, num_split) * 4096 && + input_element_count >= std::min(num_threads, num_split) * 4096 && input_element_count < num_split * 180 * 1024); auto range_output_func = [&indices, context, &input_shape, split_dim, From 206cfaf4d2e51200c64b35fd4749b3fd09d1be68 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Thu, 16 Jul 2020 07:49:37 -0700 Subject: [PATCH 0595/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/1067d3e176ea PiperOrigin-RevId: 321564409 Change-Id: I1a7169908d8a5100a27a638bed3b4dc37cc1c9d5 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index c95d7a8066e..80f0be2d128 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "b2018198c32a0535bb1f5bb5b40fbcf50d8d47b7" - LLVM_SHA256 = "748e5da6e911046190a0962e7e0bf864f5861da7d52f6db6b547d23a800ca877" + LLVM_COMMIT = "1067d3e176ea7b0b1942c163bf8c6c90107768c1" + LLVM_SHA256 = "6f578d1d669000caef23164074d20d86600abf98adc4d7712fac6fa429383b1a" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From bed612fc5f0b783ad747d579160fffee4316bd7c Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Thu, 16 Jul 2020 11:11:57 -0400 Subject: [PATCH 0596/2522] Add benchmarks for other models. --- tensorflow/python/keras/benchmarks/BUILD | 92 +++++++++++ .../antirectifier_benchmark_test.py | 152 ++++++++++++++++++ .../cifar10_cnn_benchmark_test.py | 141 ++++++++++++++++ .../mnist_conv_benchmark_test.py | 134 +++++++++++++++ .../mnist_hierarchical_rnn_benchmark_test.py | 124 ++++++++++++++ .../mnist_irnn_benchmark_test.py | 128 +++++++++++++++ .../reuters_mlp_benchmark_test.py | 127 +++++++++++++++ 7 files changed, 898 insertions(+) create mode 100644 tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py create mode 100644 tensorflow/python/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py create mode 100644 tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_conv_benchmark_test.py create mode 100644 tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py create mode 100644 tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py create mode 100644 tensorflow/python/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py diff --git a/tensorflow/python/keras/benchmarks/BUILD b/tensorflow/python/keras/benchmarks/BUILD index 87df84f2663..e9fdaa0e5fd 100644 --- a/tensorflow/python/keras/benchmarks/BUILD +++ b/tensorflow/python/keras/benchmarks/BUILD @@ -109,3 +109,95 @@ py_test( "//tensorflow:tensorflow_py", ], ) + +py_test( + name = "antirectifier_benchmark_test", + size = "medium", + srcs = ["keras_examples_benchmarks/antirectifier_benchmark_test.py"], + python_version = "PY3", + tags = [ + "no_pip", # b/161253163 + "no_windows", # b/160628318 + ], + deps = [ + ":benchmark_util", + "//tensorflow:tensorflow_py", + ], +) + +py_test( + name = "mnist_conv_benchmark_test", + size = "medium", + srcs = ["keras_examples_benchmarks/mnist_conv_benchmark_test.py"], + python_version = "PY3", + tags = [ + "no_pip", # b/161253163 + "no_windows", # b/160628318 + ], + deps = [ + ":benchmark_util", + "//tensorflow:tensorflow_py", + "//third_party/py/numpy", + ], +) + +py_test( + name = "mnist_hierarchical_rnn_benchmark_test", + size = "medium", + srcs = ["keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py"], + python_version = "PY3", + tags = [ + "no_pip", # b/161253163 + "no_windows", # b/160628318 + ], + deps = [ + ":benchmark_util", + "//tensorflow:tensorflow_py", + ], +) + +py_test( + name = "mnist_irnn_benchmark_test", + size = "medium", + srcs = ["keras_examples_benchmarks/mnist_irnn_benchmark_test.py"], + python_version = "PY3", + tags = [ + "no_pip", # b/161253163 + "no_windows", # b/160628318 + ], + deps = [ + ":benchmark_util", + "//tensorflow:tensorflow_py", + ], +) + +py_test( + name = "reuters_mlp_benchmark_test", + size = "medium", + srcs = ["keras_examples_benchmarks/reuters_mlp_benchmark_test.py"], + python_version = "PY3", + tags = [ + "no_pip", # b/161253163 + "no_windows", # b/160628318 + ], + deps = [ + ":benchmark_util", + "//tensorflow:tensorflow_py", + "//third_party/py/numpy", + ], +) + +py_test( + name = "cifar10_cnn_benchmark_test", + size = "medium", + srcs = ["keras_examples_benchmarks/cifar10_cnn_benchmark_test.py"], + python_version = "PY3", + tags = [ + "no_pip", # b/161253163 + "no_windows", # b/160628318 + ], + deps = [ + ":benchmark_util", + "//tensorflow:tensorflow_py", + ], +) \ No newline at end of file diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py new file mode 100644 index 00000000000..140c2ee0ccc --- /dev/null +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/antirectifier_benchmark_test.py @@ -0,0 +1,152 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks on Antirectifier.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from tensorflow.python.keras.benchmarks import benchmark_util + + +class AntirectifierBenchmark(tf.test.Benchmark): + """Benchmarks for Antirectifier using `tf.test.Benchmark`.""" + # Required Arguments for measure_performance. + # x: Input data, it could be Numpy or load from tfds. + # y: Target data. If `x` is a dataset, generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Other details can see in `measure_performance()` method of + # benchmark_util. + + def __init__(self): + super(AntirectifierBenchmark, self).__init__() + (self.x_train, self.y_train), _ = tf.keras.datasets.mnist.load_data() + self.x_train = self.x_train.reshape(-1, 784) + self.x_train = self.x_train.astype("float32") / 255 + + def _build_model(self): + """Model from https://keras.io/examples/keras_recipes/antirectifier/.""" + model = tf.keras.Sequential( + [ + tf.keras.Input(shape=(784,)), + tf.keras.layers.Dense(256), + Antirectifier(), + tf.keras.layers.Dense(256), + Antirectifier(), + tf.keras.layers.Dropout(0.5), + tf.keras.layers.Dense(10), + ] + ) + return model + + def benchmark_pixel_cnn_bs_128(self): + """Measure performance with batch_size=128 and run_iters=2.""" + batch_size = 128 + run_iters = 2 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + optimizer="rmsprop", + loss=tf.keras.losses.SparseCategoricalCrossentropy( + from_logits=True), + metrics=['sparse_categorical_accuracy']) + + self.report_benchmark( + iters=run_iters, + wall_time=wall_time, + metrics=metrics, + extras=extras) + + def benchmark_pixel_cnn_bs_256(self): + """Measure performance with batch_size=256 and run_iters=3.""" + batch_size = 256 + run_iters = 3 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + optimizer="rmsprop", + loss=tf.keras.losses.SparseCategoricalCrossentropy( + from_logits=True), + metrics=['sparse_categorical_accuracy']) + + self.report_benchmark( + iters=run_iters, + wall_time=wall_time, + metrics=metrics, + extras=extras) + + def benchmark_pixel_cnn_bs_512(self): + """Measure performance with batch_size=512 and run_iters=4.""" + batch_size = 512 + run_iters = 4 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + optimizer="rmsprop", + loss=tf.keras.losses.SparseCategoricalCrossentropy( + from_logits=True), + metrics=['sparse_categorical_accuracy']) + + self.report_benchmark( + iters=run_iters, + wall_time=wall_time, + metrics=metrics, + extras=extras) + + +class Antirectifier(tf.keras.layers.Layer): + """Build simple custome layer.""" + def __init__(self, initializer="he_normal", **kwargs): + super(Antirectifier, self).__init__(**kwargs) + self.initializer = tf.keras.initializers.get(initializer) + + def build(self, input_shape): + output_dim = input_shape[-1] + self.kernel = self.add_weight( + shape=(output_dim * 2, output_dim), + initializer=self.initializer, + name="kernel", + trainable=True, + ) + + def call(self, inputs): #pylint: disable=arguments-differ + inputs -= tf.reduce_mean(inputs, axis=-1, keepdims=True) + pos = tf.nn.relu(inputs) + neg = tf.nn.relu(-inputs) + concatenated = tf.concat([pos, neg], axis=-1) + mixed = tf.matmul(concatenated, self.kernel) + return mixed + + def get_config(self): + # Implement get_config to enable serialization. This is optional. + base_config = super(Antirectifier, self).get_config() + config = {"initializer": tf.keras.initializers.serialize(self.initializer)} + return dict(list(base_config.items()) + list(config.items())) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py new file mode 100644 index 00000000000..f84f03fc3d8 --- /dev/null +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py @@ -0,0 +1,141 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks on CNN on cifar10 dataset.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from tensorflow.python.keras.benchmarks import benchmark_util + + +class Cifar10CNNBenchmark(tf.test.Benchmark): + """Benchmarks for CNN using `tf.test.Benchmark`.""" + # Required Arguments for measure_performance. + # x: Input data, it could be Numpy or load from tfds. + # y: Target data. If `x` is a dataset, generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Other details can see in `measure_performance()` method of + # benchmark_util. + + def __init__(self): + super(Cifar10CNNBenchmark, self).__init__() + self.num_classes = 10 + (self.x_train, self.y_train), _ = tf.keras.datasets.cifar10.load_data() + self.x_train = self.x_train.astype('float32') / 255 + self.y_train = tf.keras.utils.to_categorical(self.y_train, self.num_classes) + self.epochs = 25 + + def _build_model(self): + """Model from https://github.com/keras-team/keras/blob/master/examples/cifar10_cnn.py.""" + model = tf.keras.Sequential() + model.add(tf.keras.layers.Conv2D( + 32, (3, 3), padding='same', + input_shape=self.x_train.shape[1:])) + model.add(tf.keras.layers.Activation('relu')) + model.add(tf.keras.layers.Conv2D(32, (3, 3))) + model.add(tf.keras.layers.Activation('relu')) + model.add(tf.keras.layers.MaxPooling2D( + pool_size=(2, 2))) + model.add(tf.keras.layers.Dropout(0.25)) + + model.add(tf.keras.layers.Conv2D( + 64, (3, 3), padding='same')) + model.add(tf.keras.layers.Activation('relu')) + model.add(tf.keras.layers.Conv2D(64, (3, 3))) + model.add(tf.keras.layers.Activation('relu')) + model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2))) + model.add(tf.keras.layers.Dropout(0.25)) + + model.add(tf.keras.layers.Flatten()) + model.add(tf.keras.layers.Dense(512)) + model.add(tf.keras.layers.Activation('relu')) + model.add(tf.keras.layers.Dropout(0.5)) + model.add(tf.keras.layers.Dense(self.num_classes)) + model.add(tf.keras.layers.Activation('softmax')) + return model + + def benchmark_cnn_cifar10_bs_1024(self): + """Measure performance with batch_size=1024 and run_iters=2.""" + batch_size = 1024 + run_iters = 2 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + epochs=self.epochs, + optimizer=tf.keras.optimizers.RMSprop( + learning_rate=0.0001, decay=1e-6), + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, + wall_time=wall_time, + metrics=metrics, + extras=extras) + + def benchmark_cnn_cifar10_bs_256(self): + """Measure performance with batch_size=256 and run_iters=3.""" + batch_size = 256 + run_iters = 3 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + epochs=self.epochs, + optimizer=tf.keras.optimizers.RMSprop( + learning_rate=0.0001, decay=1e-6), + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, + wall_time=wall_time, + metrics=metrics, + extras=extras) + + def benchmark_cnn_cifar10_bs_512(self): + """Measure performance with batch_size=512 and run_iters=4.""" + batch_size = 512 + run_iters = 3 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + epochs=self.epochs, + optimizer=tf.keras.optimizers.RMSprop( + learning_rate=0.0001, decay=1e-6), + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, + wall_time=wall_time, + metrics=metrics, + extras=extras) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_conv_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_conv_benchmark_test.py new file mode 100644 index 00000000000..52de9762273 --- /dev/null +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_conv_benchmark_test.py @@ -0,0 +1,134 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks on convnet on MNIST dataset.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +import tensorflow as tf + +from tensorflow.python.keras.benchmarks import benchmark_util + + +class ConvMnistBenchmark(tf.test.Benchmark): + """Benchmarks for convnet using `tf.test.Benchmark`.""" + # Required Arguments for measure_performance. + # x: Input data, it could be Numpy or load from tfds. + # y: Target data. If `x` is a dataset, generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Other details can see in `measure_performance()` method of + # benchmark_util. + + def __init__(self): + super(ConvMnistBenchmark, self).__init__() + self.num_classes = 10 + self.input_shape = (28, 28, 1) + (self.x_train, self.y_train), _ = tf.keras.datasets.mnist.load_data() + self.x_train = self.x_train.astype('float32') / 255 + self.x_train = np.expand_dims(self.x_train, -1) + self.y_train = tf.keras.utils.to_categorical(self.y_train, self.num_classes) + self.epochs = 15 + + def _build_model(self): + """Model from https://keras.io/examples/vision/mnist_convnet/.""" + model = tf.keras.Sequential( + [ + tf.keras.Input(shape=self.input_shape), + tf.keras.layers.Conv2D( + 32, kernel_size=(3, 3), activation="relu"), + tf.keras.layers.MaxPooling2D( + pool_size=(2, 2)), + tf.keras.layers.Conv2D( + 64, kernel_size=(3, 3), activation="relu"), + tf.keras.layers.MaxPooling2D( + pool_size=(2, 2)), + tf.keras.layers.Flatten(), + tf.keras.layers.Dropout(0.5), + tf.keras.layers.Dense( + self.num_classes, activation="softmax"), + ] + ) + return model + + def benchmark_conv_mnist_bs_128(self): + """Measure performance with batch_size=128 and run_iters=2.""" + batch_size = 128 + run_iters = 2 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + epochs=self.epochs, + optimizer="adam", + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, + wall_time=wall_time, + metrics=metrics, + extras=extras) + + def benchmark_conv_mnist_bs_256(self): + """Measure performance with batch_size=256 and run_iters=3.""" + batch_size = 256 + run_iters = 3 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + epochs=self.epochs, + optimizer="adam", + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, + wall_time=wall_time, + metrics=metrics, + extras=extras) + + def benchmark_conv_mnist_bs_512(self): + """Measure performance with batch_size=512 and run_iters=4.""" + batch_size = 512 + run_iters = 3 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + epochs=self.epochs, + optimizer="adam", + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, + wall_time=wall_time, + metrics=metrics, + extras=extras) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py new file mode 100644 index 00000000000..3fadc228481 --- /dev/null +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py @@ -0,0 +1,124 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks on Hierarchical RNN on MNIST digits.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from tensorflow.python.keras.benchmarks import benchmark_util + + +class HierarchicalRNNBenchmark(tf.test.Benchmark): + """Benchmarks for Hierarchical RNN using `tf.test.Benchmark`.""" + # Required Arguments for measure_performance. + # x: Input data, it could be Numpy or load from tfds. + # y: Target data. If `x` is a dataset, generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Other details can see in `measure_performance()` method of + # benchmark_util. + + def __init__(self): + super(HierarchicalRNNBenchmark, self).__init__() + self.num_classes = 10 + self.row_hidden, self.col_hidden = 128, 128 + (self.x_train, self.y_train), _ = tf.keras.datasets.mnist.load_data() + self.x_train = self.x_train.reshape(self.x_train.shape[0], 28, 28, 1) + self.x_train = self.x_train.astype('float32') / 255 + self.y_train = tf.keras.utils.to_categorical( + self.y_train, self.num_classes) + + def _build_model(self): + """Model from https://github.com/keras-team/keras/blob/master/examples + /mnist_hierarchical_rnn.py.""" + row, col, pixel = self.x_train.shape[1: ] + inputs = tf.keras.layers.Input(shape=(row, col, pixel)) + encoded_rows = tf.keras.layers.TimeDistributed( + tf.keras.layers.LSTM(self.row_hidden))(inputs) + encoded_cols = tf.keras.layers.LSTM( + self.col_hidden)(encoded_rows) + outputs = tf.keras.layers.Dense( + self.num_classes, activation='softmax')(encoded_cols) + model = tf.keras.Model(inputs, outputs) + + return model + + def benchmark_hrnn_mnist_bs_256(self): + """Measure performance with batch_size=256 and run_iters=4.""" + batch_size = 256 + run_iters = 4 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + optimizer='rmsprop', + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, + wall_time=wall_time, + metrics=metrics, + extras=extras) + + def benchmark_hrnn_mnist_bs_512(self): + """Measure performance with batch_size=512 and run_iters=5.""" + batch_size = 512 + run_iters = 5 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + optimizer='rmsprop', + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, + wall_time=wall_time, + metrics=metrics, + extras=extras) + + def benchmark_hrnn_mnist_bs_1024(self): + """Measure performance with batch_size=1024 and run_iters=3.""" + batch_size = 1024 + run_iters = 3 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + optimizer='rmsprop', + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, + wall_time=wall_time, + metrics=metrics, + extras=extras) + + +if __name__ == '__main__': + tf.test.main() + \ No newline at end of file diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py new file mode 100644 index 00000000000..8e480ed1d17 --- /dev/null +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py @@ -0,0 +1,128 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks on IRNN on MNIST digits.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from tensorflow.python.keras.benchmarks import benchmark_util + + +class IRNNMnistBenchmark(tf.test.Benchmark): + """Benchmarks for Hierarchical RNN using `tf.test.Benchmark`.""" + # Required Arguments for measure_performance. + # x: Input data, it could be Numpy or load from tfds. + # y: Target data. If `x` is a dataset, generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Other details can see in `measure_performance()` method of + # benchmark_util. + + def __init__(self): + super(IRNNMnistBenchmark, self).__init__() + self.num_classes = 10 + self.hidden_units = 100 + self.learning_rate = 1e-6 + (self.x_train, self.y_train), _ = tf.keras.datasets.mnist.load_data() + self.x_train = self.x_train.reshape(self.x_train.shape[0], -1, 1) + self.x_train = self.x_train.astype('float32') / 255 + self.y_train = tf.keras.utils.to_categorical( + self.y_train, self.num_classes) + + def _build_model(self): + """Model from https://github.com/keras-team/keras/ + blob/master/examples/mnist_irnn.py.""" + model = tf.keras.Sequential() + model.add(tf.keras.layers.SimpleRNN( + self.hidden_units, + kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.001), + recurrent_initializer=tf.keras.initializers.Identity(gain=1.0), + activation='relu', + input_shape=self.x_train.shape[1:])) + model.add(tf.keras.layers.Dense( + self.num_classes)) + model.add(tf.keras.layers.Activation('softmax')) + return model + + def benchmark_irnn_mnist_bs_256(self): + """Measure performance with batch_size=256 and run_iters=4.""" + batch_size = 256 + run_iters = 4 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + optimizer=tf.keras.optimizers.RMSprop( + learning_rate=self.learning_rate), + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, + wall_time=wall_time, + metrics=metrics, + extras=extras) + + def benchmark_irnn_mnist_bs_512(self): + """Measure performance with batch_size=512 and run_iters=3.""" + batch_size = 512 + run_iters = 3 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + optimizer=tf.keras.optimizers.RMSprop( + learning_rate=self.learning_rate), + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, + wall_time=wall_time, + metrics=metrics, + extras=extras) + + def benchmark_irnn_mnist_bs_1024(self): + """Measure performance with batch_size=1024 and run_iters=3.""" + batch_size = 1024 + run_iters = 3 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + optimizer=tf.keras.optimizers.RMSprop( + learning_rate=self.learning_rate), + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, + wall_time=wall_time, + metrics=metrics, + extras=extras) + + +if __name__ == '__main__': + tf.test.main() + \ No newline at end of file diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py new file mode 100644 index 00000000000..064b5a4bc96 --- /dev/null +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/reuters_mlp_benchmark_test.py @@ -0,0 +1,127 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks on MLP on Reuters dataset.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +import tensorflow as tf + +from tensorflow.python.keras.benchmarks import benchmark_util + + +class MLPReutersBenchmark(tf.test.Benchmark): + """Benchmarks for MLP using `tf.test.Benchmark`.""" + # Required Arguments for measure_performance. + # x: Input data, it could be Numpy or load from tfds. + # y: Target data. If `x` is a dataset, generator instance, + # `y` should not be specified. + # loss: Loss function for model. + # optimizer: Optimizer for model. + # Other details can see in `measure_performance()` method of + # benchmark_util. + + def __init__(self): + super(MLPReutersBenchmark, self).__init__() + self.max_words = 1000 + (self.x_train, self.y_train), _ = tf.keras.datasets.reuters.load_data( + num_words=self.max_words) + self.num_classes = np.max(self.y_train) + 1 + tokenizer = tf.keras.preprocessing.text.Tokenizer( + num_words=self.max_words) + self.x_train = tokenizer.sequences_to_matrix(self.x_train, mode='binary') + self.y_train = tf.keras.utils.to_categorical(self.y_train, self.num_classes) + self.epochs = 5 + + def _build_model(self): + """Model from https://github.com/keras-team/keras/blob/master/ + examples/reuters_mlp.py.""" + model = tf.keras.Sequential() + model.add(tf.keras.layers.Dense( + 512, input_shape=(self.max_words,))) + model.add(tf.keras.layers.Activation('relu')) + model.add(tf.keras.layers.Dropout(0.5)) + model.add(tf.keras.layers.Dense(self.num_classes)) + model.add(tf.keras.layers.Activation('softmax')) + return model + + def benchmark_mlp_reuters_bs_128(self): + """Measure performance with batch_size=128 and run_iters=2.""" + batch_size = 128 + run_iters = 2 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + epochs=self.epochs, + optimizer='adam', + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, + wall_time=wall_time, + metrics=metrics, + extras=extras) + + def benchmark_mlp_reuters_bs_256(self): + """Measure performance with batch_size=256 and run_iters=3.""" + batch_size = 256 + run_iters = 3 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + epochs=self.epochs, + optimizer='adam', + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, + wall_time=wall_time, + metrics=metrics, + extras=extras) + + def benchmark_mlp_reuters_bs_512(self): + """Measure performance with batch_size=512 and run_iters=4.""" + batch_size = 512 + run_iters = 4 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + epochs=self.epochs, + optimizer='adam', + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, + wall_time=wall_time, + metrics=metrics, + extras=extras) + + +if __name__ == '__main__': + tf.test.main() From e6b0fb3745700cb76ae8bffc2683e76ebb771149 Mon Sep 17 00:00:00 2001 From: Jakob Buchgraber Date: Thu, 16 Jul 2020 08:24:35 -0700 Subject: [PATCH 0597/2522] remote_config: upload find_cuda_config.py as part of execute() Prior to Bazel 3.1.0 repository_ctx.execute() did not support file uploads. We worked around this limitation by pasting the contents of a file on the command line string. In the case of find_cuda_config.py we would hit command line length limits and worked around this by maintaining a separate gzip compressed base64 encoded version of find_cuda_config.py. Bazel 3.1.0 added support for file uploads [1]. In this change we remove the the hack and upload find_cuda_config.py as part of repository_ctx.execute(). [1] https://github.com/bazelbuild/bazel/commit/54e9a0e7be90a65169b64394f41180a2303c1f99 PiperOrigin-RevId: 321570043 Change-Id: Idaf86f1c4a3acf39ab75ebabd80a92b0a7e4b84f --- tensorflow/opensource_only.files | 1 - third_party/gpus/compress_find_cuda_config.py | 37 ---------------- third_party/gpus/cuda_configure.bzl | 44 ++++++------------- .../gpus/find_cuda_config.py.gz.base64 | 1 - third_party/nccl/nccl_configure.bzl | 18 ++++---- third_party/tensorrt/tensorrt_configure.bzl | 7 ++- 6 files changed, 29 insertions(+), 79 deletions(-) delete mode 100644 third_party/gpus/compress_find_cuda_config.py delete mode 100644 third_party/gpus/find_cuda_config.py.gz.base64 diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files index a0ce4305b16..296722936a8 100644 --- a/tensorflow/opensource_only.files +++ b/tensorflow/opensource_only.files @@ -101,7 +101,6 @@ tensorflow/third_party/gpus/cuda/cuda_config.h.tpl tensorflow/third_party/gpus/cuda/cuda_config.py.tpl tensorflow/third_party/gpus/cuda_configure.bzl tensorflow/third_party/gpus/find_cuda_config.py -tensorflow/third_party/gpus/find_cuda_config.py.gz.base64 tensorflow/third_party/gpus/rocm/BUILD tensorflow/third_party/gpus/rocm/BUILD.tpl tensorflow/third_party/gpus/rocm/build_defs.bzl.tpl diff --git a/third_party/gpus/compress_find_cuda_config.py b/third_party/gpus/compress_find_cuda_config.py deleted file mode 100644 index 606bbf2cdd5..00000000000 --- a/third_party/gpus/compress_find_cuda_config.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Compresses the contents of 'find_cuda.py'. - -The compressed file is what is actually being used. It works around remote -config not being able to upload files yet. -""" -import base64 -import zlib - - -def main(): - with open('find_cuda.py', 'rb') as f: - data = f.read() - - compressed = zlib.compress(data) - b64encoded = base64.b64encode(compressed) - - with open('find_cuda.py.gz.base64', 'wb') as f: - f.write(b64encoded) - - -if __name__ == '__main__': - main() - diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index 70bb91159de..c09a22a73c0 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -605,42 +605,19 @@ def _cudart_static_linkopt(cpu_value): """Returns additional platform-specific linkopts for cudart.""" return "" if cpu_value == "Darwin" else "\"-lrt\"," -def _exec_find_cuda_config(repository_ctx, script_path, cuda_libraries): - python_bin = get_python_bin(repository_ctx) - - # If used with remote execution then repository_ctx.execute() can't - # access files from the source tree. A trick is to read the contents - # of the file in Starlark and embed them as part of the command. In - # this case the trick is not sufficient as the find_cuda_config.py - # script has more than 8192 characters. 8192 is the command length - # limit of cmd.exe on Windows. Thus we additionally need to compress - # the contents locally and decompress them as part of the execute(). - compressed_contents = repository_ctx.read(script_path) - decompress_and_execute_cmd = ( - "from zlib import decompress;" + - "from base64 import b64decode;" + - "from os import system;" + - "script = decompress(b64decode('%s'));" % compressed_contents + - "f = open('script.py', 'wb');" + - "f.write(script);" + - "f.close();" + - "system('\"%s\" script.py %s');" % (python_bin, " ".join(cuda_libraries)) - ) - - return execute(repository_ctx, [python_bin, "-c", decompress_and_execute_cmd]) - # TODO(csigg): Only call once instead of from here, tensorrt_configure.bzl, # and nccl_configure.bzl. -def find_cuda_config(repository_ctx, script_path, cuda_libraries): +def find_cuda_config(repository_ctx, cuda_libraries): """Returns CUDA config dictionary from running find_cuda_config.py""" - exec_result = _exec_find_cuda_config(repository_ctx, script_path, cuda_libraries) + python_bin = get_python_bin(repository_ctx) + exec_result = execute(repository_ctx, [python_bin, repository_ctx.attr._find_cuda_config] + cuda_libraries) if exec_result.return_code: auto_configure_fail("Failed to run find_cuda_config.py: %s" % err_out(exec_result)) # Parse the dict from stdout. return dict([tuple(x.split(": ")) for x in exec_result.stdout.splitlines()]) -def _get_cuda_config(repository_ctx, find_cuda_config_script): +def _get_cuda_config(repository_ctx): """Detects and returns information about the CUDA installation on the system. Args: @@ -655,7 +632,7 @@ def _get_cuda_config(repository_ctx, find_cuda_config_script): compute_capabilities: A list of the system's CUDA compute capabilities. cpu_value: The name of the host operating system. """ - config = find_cuda_config(repository_ctx, find_cuda_config_script, ["cuda", "cudnn"]) + config = find_cuda_config(repository_ctx, ["cuda", "cudnn"]) cpu_value = get_cpu_value(repository_ctx) toolkit_path = config["cuda_toolkit_path"] @@ -951,9 +928,8 @@ def _create_local_cuda_repository(repository_ctx): "cuda:cuda_config.py", ]} tpl_paths["cuda:BUILD"] = _tpl_path(repository_ctx, "cuda:BUILD.windows" if is_windows(repository_ctx) else "cuda:BUILD") - find_cuda_config_script = repository_ctx.path(Label("@org_tensorflow//third_party/gpus:find_cuda_config.py.gz.base64")) - cuda_config = _get_cuda_config(repository_ctx, find_cuda_config_script) + cuda_config = _get_cuda_config(repository_ctx) cuda_include_path = cuda_config.config["cuda_include_dir"] cublas_include_path = cuda_config.config["cublas_include_dir"] @@ -1394,12 +1370,20 @@ remote_cuda_configure = repository_rule( remotable = True, attrs = { "environ": attr.string_dict(), + "_find_cuda_config": attr.label( + default = Label("@org_tensorflow//third_party/gpus:find_cuda_config.py"), + ), }, ) cuda_configure = repository_rule( implementation = _cuda_autoconf_impl, environ = _ENVIRONS + [_TF_CUDA_CONFIG_REPO], + attrs = { + "_find_cuda_config": attr.label( + default = Label("@org_tensorflow//third_party/gpus:find_cuda_config.py"), + ), + }, ) """Detects and configures the local CUDA toolchain. diff --git a/third_party/gpus/find_cuda_config.py.gz.base64 b/third_party/gpus/find_cuda_config.py.gz.base64 deleted file mode 100644 index 981219bb10a..00000000000 --- a/third_party/gpus/find_cuda_config.py.gz.base64 +++ /dev/null @@ -1 +0,0 @@ -eJzdPGtT40iS3/0r6tRHINNGwOzGxJ5vmQsGum/Y5aAD3D23AV5vIZeNpmXJJ8kG78b+98vMqpKqSpINpnu2Y4iYHkuqzMrMyme93rDTdL7KoulDwb47PPoPNngQbCCSPM3ex+kjO1kUD2mWB+wkjtk1NsvZtchFthTjoPOm84ZdRCE0F2O2SMYiYwXAn8x5CP9TX3rsk8jyKE3Yd8Eh87GBpz553f8EDKt0wWZ8xZK0YItcAIooZ5MoFkw8hWJesChhYTqbxxFPQsEeo+KBulFIgAz2F4UivS84tObQfg5PE7Md4wURjH8PRTHvHxw8Pj4GnIgN0mx6EMuG+cHF+em7y5t3+0AwgXxMYpHnLBP/t4gyYPV+xfgc6An5PVAZ80eWZoxPMwHfihTpfcyiIkqmPZank+KRZwKwjKO8yKL7RWEJS1MHPJsNQFw8Yd7JDTu/8diPJzfnNz3A8fP54KerjwP288n19cnl4PzdDbu6ZqdXl2fng/OrS3h6z04u/8L+fH551mMCRAXdiKd5hvQDkRGKkYaO3QhhETBJJUH5XITRJAqBr2S64FPBpulSZAmww+Yim0U5DmYO5I0BSxzNooIX9KbGFHZz/EX/Op7nfciiBNTw9OPZCXR/n/FshcSwB8Gx/zEMUVikWSSIRraU2gcqlQKBKFjicpUXYhZ0OqjweZhFoGe54BnoQk6iaEOPipnbWHow4ii1Iu/AyxmqwFgUKKqERBxlmghCNJf0I3yYJpNoushIgAiXF+N0UQRE1ZwXD7nUJ8JOwAhV6mHJGiiYHjdUwYcsXUwfmEiWUZYmM5EUnSXPItRWMOXzCZgaW/I4GjsEREpIPcmclIoml4gTWUYDn4likZESMHgF4grTsVDSjEGN0fak8HAYAHYSAfGAv6KSI9nTBVIHRN0s5vM0Q82vwNBsaBj8KAnjxRhehYsfL05uuj34cXZ52WOXp6cXPRKMdFrXA3tIC/4ZEZU03XPQdFNDKnrAqJHrwfsR9jn6cDL46aZjiJBpESLl4I9mfD8Xcw6iA+BpnN5TJwEzeo/T9LPUJqk8eQcp1UolNYnc1QPPxvsowjEoIRGaL+5NMidZOkPygHpigXQj6MBQWvSihNGNllyBbNjVTWXTYzHhi7jAduBqx/1Oh4GxJounPnjFg0WeHcRpyOODcDHmPXohpSt1kajYjcdSa9j+fDcA+J9haNPHvM9KOliT2Hrgfzqs+jvt3919yNJpxmfsPYri7u7y0/nZ+Qn77w8fISzN5gt0oWyQpvHnqLi7Q/R3d3udznsQ6D0PP4NfHdNQAHXRfRRHxQod7kyYOhTnKUUVHoNFJtBwKUXYqYk3lc6ygXQYJQolK9vQAhAdqVVHDel+KedGLEracvRZGRahbaVb7GQ8jtAYeVxTVoDef+UfoCCLoj+tOp/eXd9A8NDjgq8GV1cXfz4f0GgCjDS7EgYfLKhmGLBQo5/LS7sj+er88mZwcnGhgdCeS+LwwSGOXpkw0gWMfjq71ii0IyAUA4jjV9fXAwNN+crquvNJ+9JmBQjB090LnVKASYMhPO1iSN19Cla76OLQLQvQB94p1UA5aIjEs3mxwtaLxPDTKeMhZTg8Wem2yn9BFAD9x/64ClzabYDpGlT0O8Vk9Ef17YeRQtJnQFTwd/ub9EEjUKg+C4LA/qh+VF8h0HY6kC6AU2ZRqn+luf6FHk//nse8QGL0M/ht9Qvc2DxLQ8g+yjervANpwXwVQ1jpY86DAj6e7svv++C+9nmxX6TzTpGt+jCgZJz5A3iDmCkkjw9R+NBR6eE5vXuHYahsjqkUAuRBPuePiYbDKDQSTyJcFNqTS1QVQSJppafTCWMOedQpuT/q0H9HNIDIu9j3HD5DM/CybBTlIFTwrD59kQGzlFQgMwe/C/kRpsPQzjMAH6VLfQaocr4m8IyH6XNAz3gG/ZSQM15g8qNVyOdhseDxqFRhnXXoN9QBaMnpgwg/oxwF+U/yvzrdmQmhMh0FLaN9R8UBLCpELF9qpXa7YYnKqsH6MI1Fs4wo+1JIbDpL3PhXw+U0xhY5hEMD5CWeVP8dMffvKLDfDbKFMAGC71wA5/k9xK0NEMHv1kLU/9w+iCg9Etk075fQrtz6lNRoodVSJgivWVAC2yK2QTEtDjFf0vmMjEaQ4BY8jikJ1YiuZYbZZz8rxcLWEnelXlJl7aQ4TUQgNRP+hbjtDDm4z0toIplV9iFlB2+gKoIKbA4s8tDIuspEGxE7+I5dBcQabu53Ow3qd1x7ZbUmWmrYeFbkWPP6NQPUlsuLUSx4XrzMdJUsj9kt+D1/2aU8dUn5qEMClIyF7wVed2gw1QRY584BNThkP1TS0IxMRaGjlGYFE88eS/hMaH+jFAOdgQot0D2AQ7FF2fSpWakFUg2QwhirMfgepUE6Fxqzl3kQnRPIvSHXPPYWxWT/D143yAAFAig/yqSq0QAG9NP33sg+2U7O3vp347ddj+0QoT3qqktgoH/UXNuWEgG9C6ZQps39I2PwvdIbhzDsIo94MgImx4uw8CdRlheQ3wrIvseuMMB6KEfHbPg+SlRBDi6VoCiFl4BKIKq/2zQPECz4JY0SfwLY5WhOUFASFB9JrhJ+aI5VPB7JUmBE9YHfRFUtzbbrh9JMsWxxYmZFJmqPBqO+YCQocPuefu11Mb3yDnJg/6B8CXAqkzo2cpEgxJg1kl/8WwszqMT+3Bt2KZoXWDPIUccyAxQKlHmPHf/A/GCv68mRwwiC6EVBdmzqmuxBmoGlTiq1MTVL9abUC1sHY4E1oe/xPIwirytVSmU9H5MIP55RkzL5MRFWEaGuhUhzwMdjXysADBPqrm9rZtfQzZzKcx+LXF8i6HZNZVCl5QhLR6UP9NP0OWqczfeSJvMNkO7t4ciJWLUHB0KTgJvArMe3AAZoZK+1pKrSrTJwgShU7h8AP74VS72ytvV6zoftytjlDjQEj2Ehs+RFXwy3ees59fn+To4uxwQC5XUaeeqVSbbTJkm8IUir2aL1CCu33Gzn9emLTMSy1C4nsXAuxJgKkbMfjjdSJHolsZ6a+Km9ULy5b/f2yX3sT5NF9VE8FRnPD04/fhicH6xFKNvQt9LP6cLoC3BuTfxsZB1af/9769F6aOYVPzyZYBbzezZrYFojqi1HNIfn43yD5LNXsqGfkWbloUgGVVs0V08GkFvvLtFKtruT76J6krdG7ulHVPqRCkF3KL2M2eFLkdrQQ8NtmcWaFsppuojH5FloWhKr7x05Ewe/jJm3VR+sNEkn9L/KWH1DGHVJGYyV1kN1J0K9SMYmgw0JQTsq5eEokuM6CsBjuR7gP74V8mXEMzvuOqkKfkNp8igX26uMJQhlBxawNjOZQLXVmtrsZGoip7BJQmBmuggQeTWLvKryi4YIYBAI+mZSQMEDLU5HIaeudmG9nXwvGK8QArQTE16tu/TqrYUcnO3aOs3+W5NT3/aPhhShBQT6RrLW0gGRIU+9DdRs6h4ltWc4shZVd93oGu1QHYmXGUzea+fC5aHHMNS9QLW4NKPigRe1mlOqm5ZOqW7Fai5MPwGJNfobsMRbzKB6rFjMYzGsjRp6vlvzefgFHYHWjCixutQW/zp/0VAZG2ImWXTLptG2E07VmJbeqed03O6tFPwaldIRAmxD1UTWEIKySwXAjE7zqYNSa31Oai3THmusVE7V7PK2UFKVWT1bTS2TXWN2TvJnE77Bla1ny5IQJbIy+3RUuqYH0CnCtcwXKEWp9BDnKdonFzxzEURXWarysKqNUmI4gWTWEN7OONgZk+vXnR4csKPDw8NeScUOPcv33U5HJfuGZEuxVnS3ao6HwMGDt0n6zxqRLXDUZdnt1Koxhx32BhclcAJDMH6fLkVQSsEMDiXbTVkC8Z0VXs8plQx9SJZh2KQNRlQ8BVgm63m98J7G0hcIXHK7G7+9C+CfHvvk69/0T9cr3amu8FsnFfRcwr6ixRt2G+YBGiaWysSyfQqgXtG3zyzZSgsvSECU5IAo8CEQT8KrpUiUVMgGnoZSk3GGhMvRWuM+Gmqb+yiRpUivIqfnVLLuYHYl8cuZrSbt3WHbA9CisVhGoVHx5VC0uQUk1k1Q+48jvk+ldSGrdQccyPXKN3vB0WFwH3rKmOdFZFpzO301b+oRMFhzt0T0EoOA9l6DkyyNC6cD5cIeYHKne8ph7WrhGguBje2V/Ltyuvx/L0501zLWKMGVUYkKrHnxxOVmnFJ6AYEPrs6u/DCPptNunxbP0KTuUwA1pw1xeQBkFmiOVB9lpajjuiY2STPcEvBgpy6OMDDCQxLb7T0L2BUNQR+UCLoy6atTd3s4ZP923PThSOV+MlUxq1TvPIEomEOWiGtdtEBiSrWPteoyZzT70+K76/3ReClP8A+t9abBeX3b/qw2apIEOYd2rlK4kaxrAxtiawM29d2BrkZMU2iMoWnqTj+1EauwooVt4sgx564Dvokn14gdpsyh0Wy5eoMQ/3QSpPuY589JkZz42NezoLWVImfyEDzakSebPyfDMnMsc750w0KO0RJDKcUisHvfq3aWjP7n5E9X1x4lZ9W780t815avmE0/nAxOf6rCpc7TdD1sOknGtk/CaDz4PNomFXt1HtachKFbVdt1FrlyyjP+S1rtQUyTeBWo6XNioC1lM2v9w2HHmmh4g+u6mZAbzkrUUTKBQmTJIbHC7RX3AsZXrfGiavXYL4tcTbrRBhZcoTOHoHkq3x2kl8VWY4wc+RhLwrTfEfWQq42l+psPOWu3RVruCKKEXhK5ER2lsibeFkdtNgGPYcur57Rb79xa3BpBrvdrNY/m+qc8jYGkr+qhjoLDf6mHurm6QAfj+Cjz7UYvZTT+lfxUOS6zGZj1t+SsStq2cUJf0Wu4Emul9rVOQCIkN2DjbnEEdqN1rkC13MoZKNhXuoOMJ+PftDO4Prk8c11B9W6jIyib/kpuAMfjG7N+UpFvzPaVmFpIfK3BIzoydxNvi7GbTdaZOrXbytAJ8pVmPpkUv2krf/9+4Bp5+UrauPnqV7JlEPo3ZsqoBt+YJUshNRP4WjsGbGTGBtYWKzZarDNibLaVDSPgK014nCRfZ+ml2l71XPtzrQ9PiRiWR4+V1eEjWdzFu0/vLuQ+03ajQ3+jfuPMHE1x63nx7UzxZebjeyRqNFwmf5YGAmr6QmSvNOXWNZyKqGeY8YuWbpJErtxUPbRNS1Yt5PRY9dyzW21lMAj42kp3zjNg7rcc9m4+nFzfvKtVusbbzZVu1fjXqnRpXL6xyKiU5RsLjqWoWsl8dW1LCGVta+Fuq22tRmtrW9lyu9pWwr7KASRhGH+dgFnZ7nYBk45DlvYqn3S4pCfLENeZ4bZGiLJ5uQF+lWhGw/Q1ghkiBoma+Bt12myAi1LGY89qs4UmE9yrtLigo7LZs6q3f4Emf6qO71YKbb5cF3+slpbOv2F/U0T8Daf+OTCUiIwX0Hl6/4sI8ZBNyh7pdgZoWLCokPur6BIBtaaxyKNkqtDBd3+2iIpojpsEo5nIoVWcPqp1g0oidHbCig1qmxGPY7+SszaIctsGdvJJfpbn+9UtDTP+WdT20TF9NwOd/wg2mnl5FGVbe79cnicTkX1DMVcdkTEW1ctFK304/Hs2S5dibJ7uo9WrbFbexaH4+qQT9OBLSKnE9o0ICxVAe4FnOsoX+cllhFyD5bq9NLpLtxG4NPdVr9Z2C9dZwm7rPtGSR3icbSSSpQ//qS1M6hDU8e3Q3bLp3hhCzoQOxDVdOFBtJNa46ThZeULJOsJUvb7VrYd6zHqecRZCUWcd4RNTHkoOn8UGlzt7rDPA4BkJSzMnOM71ux56TVc5NF7NQAdrihSw7MpzS3jE5Q/fj77/fXUAZpc90pGOeSaWUbrI45VcZKatRPVLfDpMXaDCzuhAMniBFK8IgQZKUtTVbk/eCfIYyeuR8L4RdAzO6Het48brxqt+iPSv/sHtXw/YcK/7lhi7e3xbcXXwX/8OhtM0umsPl97aewCNI2Sb1LZbnQwCPxhHf5eGXIV3QxHKJnIHck/ey6WllMc8f5D3JKkbCpRGK5ehLS0TPK666LSd1FBQCmQe81D43t0dHW4zFRwbKB5qW4rrmjyOQrppJaOrwtZd+KTURbJQXS5zzG55Nl0GEOvB3cujs/iCNoau8gB/3x71h8NObXOsc9jQc+5iQc6IMeuclTN+nnX3z8aA8qxTmrU+aSOrdQLB+AyjpUdSPAFtuRzHofTt6mDsP/4px5W2PtFBzlKCxinONh5rV8t41hEr5QfpPOtiPgbX6td3k1cdOJMxarrBGRuJ7tbeHjdULWmbhCa2IkQbJB3r8N2D8SYmI5p22R+Zf3TYY0dl1veG/VjbNKO2qzzyXN18IbeqlKfUcNOKsfdS36Dg0FoJweRkg0aaN/2UOukKvMrzG7apmVS4e13aRkOtPn9RKYMcD0sp17qoCae2zt8onmonx8sEZO+Tsamp7wRoExOt3X1NIVkd1ETkrIs2CkivcL9MPOa+AZMKd9W0TTC4IPI15WLir4nFXmVqlIpaKlwrlIbFVaNjZ+Gp1ZDkVNdXNSS7i7ohuZOKzYZUThS/0JCsaXibmvq0Y5OYVGBKkpbIlCRVaHITZq+e1zZEJ3dJpTn0V9ejbVQJY7HOINBdWSm5o7mqBuZo/qidt1p+3sCaM7/WwJl5h9sGxsw51Yo2Z5Kt5EqXcE2cleVdO3eNpUYDhw3FcQOX7jVzGzh1591sehtqZeIazfNzT9+0Q1ijQszKJBmk8hkoG8vbgjyqaelSFOstduEZk0yU6nweopCcrH9plujqoiyZW894lMhey8kjIk6sengocSGMc/71PFxTXRJBN4z63k7el4camF9h6jbP52DKo1ZIMNGWN5MGeOuu8PMi84Wa68OPkJkWeBaq08Higiqe0YhuQBuNkJPRyENMkqnO/wPiKjLf \ No newline at end of file diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl index d59e861d70b..03642bcf04a 100644 --- a/third_party/nccl/nccl_configure.bzl +++ b/third_party/nccl/nccl_configure.bzl @@ -64,17 +64,11 @@ def _label(file): return Label("//third_party/nccl:{}".format(file)) def _create_local_nccl_repository(repository_ctx): - # Resolve all labels before doing any real work. Resolving causes the - # function to be restarted with all previous state being lost. This - # can easily lead to a O(n^2) runtime in the number of labels. - # See https://github.com/tensorflow/tensorflow/commit/62bd3534525a036f07d9851b3199d68212904778 - find_cuda_config_path = repository_ctx.path(Label("@org_tensorflow//third_party/gpus:find_cuda_config.py.gz.base64")) - nccl_version = get_host_environ(repository_ctx, _TF_NCCL_VERSION, "") if nccl_version: nccl_version = nccl_version.split(".")[0] - cuda_config = find_cuda_config(repository_ctx, find_cuda_config_path, ["cuda"]) + cuda_config = find_cuda_config(repository_ctx, ["cuda"]) cuda_version = cuda_config["cuda_version"].split(".") cuda_major = cuda_version[0] cuda_minor = cuda_version[1] @@ -96,7 +90,7 @@ def _create_local_nccl_repository(repository_ctx): ) else: # Create target for locally installed NCCL. - config = find_cuda_config(repository_ctx, find_cuda_config_path, ["nccl"]) + config = find_cuda_config(repository_ctx, ["nccl"]) config_wrap = { "%{nccl_version}": config["nccl_version"], "%{nccl_header_dir}": config["nccl_include_dir"], @@ -145,12 +139,20 @@ remote_nccl_configure = repository_rule( remotable = True, attrs = { "environ": attr.string_dict(), + "_find_cuda_config": attr.label( + default = Label("@org_tensorflow//third_party/gpus:find_cuda_config.py"), + ), }, ) nccl_configure = repository_rule( implementation = _nccl_autoconf_impl, environ = _ENVIRONS, + attrs = { + "_find_cuda_config": attr.label( + default = Label("@org_tensorflow//third_party/gpus:find_cuda_config.py"), + ), + }, ) """Detects and configures the NCCL configuration. diff --git a/third_party/tensorrt/tensorrt_configure.bzl b/third_party/tensorrt/tensorrt_configure.bzl index 9c980a92cf8..d26fa2a34d4 100644 --- a/third_party/tensorrt/tensorrt_configure.bzl +++ b/third_party/tensorrt/tensorrt_configure.bzl @@ -88,14 +88,13 @@ def _create_local_tensorrt_repository(repository_ctx): # function to be restarted with all previous state being lost. This # can easily lead to a O(n^2) runtime in the number of labels. # See https://github.com/tensorflow/tensorflow/commit/62bd3534525a036f07d9851b3199d68212904778 - find_cuda_config_path = repository_ctx.path(Label("@org_tensorflow//third_party/gpus:find_cuda_config.py.gz.base64")) tpl_paths = { "build_defs.bzl": _tpl_path(repository_ctx, "build_defs.bzl"), "BUILD": _tpl_path(repository_ctx, "BUILD"), "tensorrt/include/tensorrt_config.h": _tpl_path(repository_ctx, "tensorrt/include/tensorrt_config.h"), } - config = find_cuda_config(repository_ctx, find_cuda_config_path, ["tensorrt"]) + config = find_cuda_config(repository_ctx, ["tensorrt"]) trt_version = config["tensorrt_version"] cpu_value = get_cpu_value(repository_ctx) @@ -191,12 +190,16 @@ remote_tensorrt_configure = repository_rule( remotable = True, attrs = { "environ": attr.string_dict(), + "_find_cuda_config": attr.label(default = "@org_tensorflow//third_party/gpus:find_cuda_config.py"), }, ) tensorrt_configure = repository_rule( implementation = _tensorrt_configure_impl, environ = _ENVIRONS + [_TF_TENSORRT_CONFIG_REPO], + attrs = { + "_find_cuda_config": attr.label(default = "@org_tensorflow//third_party/gpus:find_cuda_config.py"), + }, ) """Detects and configures the local CUDA toolchain. From 917911c8679e3dd72e6903054df877d4faa99a79 Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Thu, 16 Jul 2020 11:38:55 -0400 Subject: [PATCH 0598/2522] Update BUILD file. --- tensorflow/python/keras/benchmarks/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/benchmarks/BUILD b/tensorflow/python/keras/benchmarks/BUILD index e9fdaa0e5fd..448dc151376 100644 --- a/tensorflow/python/keras/benchmarks/BUILD +++ b/tensorflow/python/keras/benchmarks/BUILD @@ -200,4 +200,4 @@ py_test( ":benchmark_util", "//tensorflow:tensorflow_py", ], -) \ No newline at end of file +) From 279ef8e3e65bfcc5be9d03f40946537f5ae4473e Mon Sep 17 00:00:00 2001 From: Tom Hennigan Date: Thu, 16 Jul 2020 09:12:26 -0700 Subject: [PATCH 0599/2522] Correct bounds in docstrings for `tf.image.random_{constrast,hue,saturation}`. These methods use random_ops.random_uniform(..) which is inclusive/exclusive not inclusive/inclusive. PiperOrigin-RevId: 321579005 Change-Id: I69bd09ee7f5c4b1b1a7551f7067cc2ce83c95cd3 --- tensorflow/python/ops/image_ops_impl.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 1d373afdb62..085535435d1 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -1794,7 +1794,7 @@ def random_contrast(image, lower, upper, seed=None): """Adjust the contrast of an image or images by a random factor. Equivalent to `adjust_contrast()` but uses a `contrast_factor` randomly - picked in the interval `[lower, upper]`. + picked in the interval `[lower, upper)`. Args: image: An image tensor with 3 or more dimensions. @@ -1824,7 +1824,6 @@ def random_contrast(image, lower, upper, seed=None): if lower < 0: raise ValueError('lower must be non-negative.') - # Generate an a float in [lower, upper] contrast_factor = random_ops.random_uniform([], lower, upper, seed=seed) return adjust_contrast(image, contrast_factor) @@ -2182,7 +2181,7 @@ def random_hue(image, max_delta, seed=None): """Adjust the hue of RGB images by a random factor. Equivalent to `adjust_hue()` but uses a `delta` randomly - picked in the interval `[-max_delta, max_delta]`. + picked in the interval `[-max_delta, max_delta)`. `max_delta` must be in the interval `[0, 0.5]`. @@ -2392,7 +2391,7 @@ def random_saturation(image, lower, upper, seed=None): """Adjust the saturation of RGB images by a random factor. Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly - picked in the interval `[lower, upper]`. + picked in the interval `[lower, upper)`. Usage Example: @@ -2428,7 +2427,6 @@ def random_saturation(image, lower, upper, seed=None): if lower < 0: raise ValueError('lower must be non-negative.') - # Pick a float in [lower, upper] saturation_factor = random_ops.random_uniform([], lower, upper, seed=seed) return adjust_saturation(image, saturation_factor) From 960358aaa2ee81838bd8bdfc7d7105d9c75771a0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Jul 2020 09:12:50 -0700 Subject: [PATCH 0600/2522] [XLA:SPMD] Support convolution with non contracting spatial dim partitioned at batch dim. PiperOrigin-RevId: 321579087 Change-Id: I42f86b9c281e02f8157287653aa30a54c14a0e72 --- .../xla/service/dot_as_convolution_util.cc | 42 +++++++----- .../xla/service/dot_as_convolution_util.h | 6 ++ .../xla/service/spmd/spmd_partitioner.cc | 66 +++++++++++++++++++ .../xla/service/spmd/spmd_partitioner_util.cc | 8 +++ .../xla/service/spmd/spmd_partitioner_util.h | 3 + 5 files changed, 109 insertions(+), 16 deletions(-) diff --git a/tensorflow/compiler/xla/service/dot_as_convolution_util.cc b/tensorflow/compiler/xla/service/dot_as_convolution_util.cc index 576d9d48ab8..4670ce6940a 100644 --- a/tensorflow/compiler/xla/service/dot_as_convolution_util.cc +++ b/tensorflow/compiler/xla/service/dot_as_convolution_util.cc @@ -24,6 +24,31 @@ limitations under the License. namespace xla { namespace dot_as_convolution_util { +bool ConvSpatialDimensionIsParallel(const WindowDimension& wd, int64 lhs_size) { + // A parallel batch dimension in DotGeneral is represented as a + // spatial dimension with window size B (batch dimension size), + // stride B - 1, and base dilation B. + if (lhs_size == wd.size() && lhs_size == wd.base_dilation() && + ((std::max(1, lhs_size - 1) == wd.stride() && + wd.window_dilation() == 1) || + (std::max(1, lhs_size - 1) == wd.window_dilation() && + wd.stride() == 1)) && + wd.padding_high() == 0 && wd.padding_low() == 0 && + !wd.window_reversal()) { + return true; + } + + // Aternative representation of a batch dimension. + if (wd.size() == lhs_size && wd.padding_high() == lhs_size - 1 && + wd.padding_low() == lhs_size - 1 && wd.window_reversal() && + wd.window_dilation() == 1 && wd.stride() == lhs_size && + wd.base_dilation() == lhs_size - 1) { + return true; + } + + return false; +} + /* static */ absl::optional ParseDotGeneralFromConvolution(const HloInstruction* conv) { CHECK_EQ(conv->opcode(), HloOpcode::kConvolution); @@ -49,22 +74,7 @@ ParseDotGeneralFromConvolution(const HloInstruction* conv) { int64 rhs_size = conv->operand(1)->shape().dimensions(rhs); int64 output = conv_dims.output_spatial_dimensions(i); const auto& wd = conv->window().dimensions(i); - if (lhs_size == wd.size() && lhs_size == wd.base_dilation() && - ((std::max(1, lhs_size - 1) == wd.stride() && - wd.window_dilation() == 1) || - (std::max(1, lhs_size - 1) == wd.window_dilation() && - wd.stride() == 1)) && - wd.padding_high() == 0 && wd.padding_low() == 0 && - !wd.window_reversal()) { - // A batch dimension in DotGeneral is represented as a spatial dimension - // with window size B (batch dimension size), stride B - 1, and base - // dilation B. - dims.batch_dims.push_back({lhs, rhs, output, i}); - } else if (wd.size() == lhs_size && wd.padding_high() == lhs_size - 1 && - wd.padding_low() == lhs_size - 1 && wd.window_reversal() && - wd.window_dilation() == 1 && wd.stride() == lhs_size && - wd.base_dilation() == lhs_size - 1) { - // Aternative representation of a batch dimension. + if (ConvSpatialDimensionIsParallel(wd, lhs_size)) { dims.batch_dims.push_back({lhs, rhs, output, i}); } else if (lhs_size == wd.size() && wd.base_dilation() == 1 && wd.window_dilation() == 1 && wd.padding_high() == 0 && diff --git a/tensorflow/compiler/xla/service/dot_as_convolution_util.h b/tensorflow/compiler/xla/service/dot_as_convolution_util.h index a3e829a3d31..6a7cacf812d 100644 --- a/tensorflow/compiler/xla/service/dot_as_convolution_util.h +++ b/tensorflow/compiler/xla/service/dot_as_convolution_util.h @@ -62,6 +62,12 @@ CreateShardedConvForDotGeneralConvolution( const DotGeneralAsConvolutionDimsInfo& dot_dnums, HloInstruction* sharded_lhs_hlo, HloInstruction* sharded_rhs_hlo); +// Check if a spatial dim is parallel batch dimension. +// A parallel batch dimension in DotGeneral is represented as a spatial +// dimension with window size B (batch dimension size), stride B - 1, and base +// dilation B. +bool ConvSpatialDimensionIsParallel(const WindowDimension& wd, int64 lhs_size); + } // namespace dot_as_convolution_util } // namespace xla diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc index fa28b6f8de9..76014c83340 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc @@ -3149,6 +3149,72 @@ Status SpmdPartitioningVisitor::HandleConvolution(HloInstruction* hlo) { auto aligned_lhs_sharding = hlo_sharding_util::TransposeSharding(rhs.sharding(), lhs_to_rhs_indices); + // Handling cases where all the partitioned dimensions are parallel + // dimensions. + int64 lhs_parallel_dim_partitions = 1; + int64 rhs_parallel_dim_partitions = 1; + std::vector parallel_spatial_dims; + for (int64 i = 0; i < dnums.input_spatial_dimensions_size(); ++i) { + int64 lhs_dim = dnums.input_spatial_dimensions(i); + int64 lhs_size = lhs.base_shape().dimensions(lhs_dim); + const auto& wd = hlo->window().dimensions(i); + int64 rhs_dim = dnums.kernel_spatial_dimensions(i); + // Only non reversal window is supported right now. + if (!wd.window_reversal() && + dot_as_convolution_util::ConvSpatialDimensionIsParallel(wd, lhs_size)) { + parallel_spatial_dims.emplace_back(i); + lhs_parallel_dim_partitions *= ShardCountAtDim(lhs.sharding(), lhs_dim); + rhs_parallel_dim_partitions *= ShardCountAtDim(rhs.sharding(), rhs_dim); + } + } + bool lhs_partition_dims_are_parallel = + (lhs_parallel_dim_partitions == num_partitions_); + bool rhs_partition_dims_are_parallel = + (rhs_parallel_dim_partitions == num_partitions_); + + // If there is a parallel dim and all the partitioned dimensions are parallel + // dimensions in either LHS or RHS, simply create partitioned convolutions. + if (!parallel_spatial_dims.empty() && + (lhs_partition_dims_are_parallel || rhs_partition_dims_are_parallel)) { + // Reshard LHS or RHS to partition at parallel dimensions as the other + // operand. + if (lhs_partition_dims_are_parallel) { + rhs = rhs.Reshard(aligned_rhs_sharding); + } else { + lhs = lhs.Reshard(aligned_lhs_sharding); + } + auto lhs_shard_shape = + MakePartitionedShape(lhs.base_shape(), lhs.sharding()); + auto rhs_shard_shape = + MakePartitionedShape(rhs.base_shape(), rhs.sharding()); + // Update convolution window. + auto new_window = hlo->window(); + for (const auto& spatial_dim : parallel_spatial_dims) { + auto wd = new_window.mutable_dimensions(spatial_dim); + wd->set_size(lhs_shard_shape.dimensions( + dnums.input_spatial_dimensions(spatial_dim))); + wd->set_stride(std::max(1, wd->size() - 1)); + wd->set_base_dilation(wd->size()); + } + TF_ASSIGN_OR_RETURN( + Shape sharded_conv_shape, + ShapeInference::InferConvolveShape( + lhs_shard_shape, rhs_shard_shape, hlo->feature_group_count(), + hlo->batch_group_count(), new_window, dnums)); + *sharded_conv_shape.mutable_layout() = hlo->shape().layout(); + SetPartitionedHlo(hlo, [&]() { + auto sharded_conv = b_.AddInstruction(HloInstruction::CreateConvolve( + sharded_conv_shape, lhs.hlo(), rhs.hlo(), hlo->feature_group_count(), + hlo->batch_group_count(), new_window, dnums, + hlo->precision_config())); + sharded_conv->set_sharding(hlo->sharding()); + return PartitionedHlo(sharded_conv, hlo->shape(), MakePartitioningState()) + .Reshard(hlo->sharding()) + .hlo(); + }); + return Status::OK(); + } + // Handling cases where both operands' shardings are aligned. We check that // the LHS batch dimension is not partitioned because it is mapped to the // output feature dimension in aligned_rhs_sharding, which are not the same diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc index 3354a9c3233..7c4d816fd66 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc @@ -877,5 +877,13 @@ HloInstruction* SliceFirstK(HloInstruction* hlo, SpmdBuilder* builder, output_shape, hlo, start_indices, limit_indices, strides)); } +// Check if a dimension is sharded. +int64 ShardCountAtDim(const HloSharding& sharding, int64 dim) { + if (sharding.IsTileMaximal()) { + return 1; + } + return sharding.tile_assignment().dim(dim); +} + } // namespace spmd } // namespace xla diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h index 5f245667970..8389c2f666a 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.h @@ -262,6 +262,9 @@ absl::optional GetKValueInTopKWhenPartitionSortDim(HloInstruction* hlo); HloInstruction* SliceFirstK(HloInstruction* hlo, SpmdBuilder* builder, int64 slice_dim, int64 k); +// Check if a dimension is sharded. +int64 ShardCountAtDim(const HloSharding& sharding, int64 dim); + } // namespace spmd } // namespace xla From 11b8948857adb985b8a4a9464137e752b7a30292 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Thu, 16 Jul 2020 09:38:12 -0700 Subject: [PATCH 0601/2522] Update private API usage of test_util. deprecated_graph_mode_only. PiperOrigin-RevId: 321583859 Change-Id: I05d02f8f41792099fa9b225a09b4c109c981bfa7 --- .../keras/layers/dense_attention_test.py | 2 -- .../experimental/loss_scale_optimizer_test.py | 25 ++++++++++--------- .../keras/utils/multi_gpu_utils_test.py | 6 ++--- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/tensorflow/python/keras/layers/dense_attention_test.py b/tensorflow/python/keras/layers/dense_attention_test.py index 504c4ab6984..85780900593 100644 --- a/tensorflow/python/keras/layers/dense_attention_test.py +++ b/tensorflow/python/keras/layers/dense_attention_test.py @@ -23,7 +23,6 @@ import numpy as np from tensorflow.python import keras from tensorflow.python.eager import context -from tensorflow.python.framework import test_util from tensorflow.python.keras import combinations from tensorflow.python.keras.layers import core from tensorflow.python.keras.layers import dense_attention @@ -361,7 +360,6 @@ class AttentionTest(test.TestCase, parameterized.TestCase): attention_layer.build(input_shape=([1, 1, 1], [1, 1, 1])) self.assertAllClose(1., attention_layer.scale.value()) - @test_util.deprecated_graph_mode_only def test_scale_init_graph(self): """Tests that scale initializes to 1 when use_scale=True.""" with self.cached_session() as sess: diff --git a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer_test.py b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer_test.py index 350cfe6a09c..9a9d174a64f 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer_test.py +++ b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer_test.py @@ -106,19 +106,20 @@ class LossScaleOptimizerTest(test.TestCase, parameterized.TestCase): # and so the variable will be init_val - grad * lr == 5 - 1 * 2 == 3 self.assertAllClose([3.], self.evaluate(var)) - @test_util.deprecated_graph_mode_only def testFixedLossScaleAppliedToLossWithGetGradients(self): - var = variables.Variable([2.0]) - opt = gradient_descent.SGD(1.0) - loss_scale = 10. - opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale) - grad_check_fn = mp_test_util.create_identity_with_grad_check_fn(loss_scale) - loss = grad_check_fn(var) - run_op = opt.get_gradients(loss, [var]) - self.evaluate(variables.global_variables_initializer()) - # This will cause an assertion to run, as - # mp_test_util.create_identity_with_grad_check_fn added an assertion op. - self.evaluate(run_op) + with ops.Graph().as_default(): + var = variables.Variable([2.0]) + opt = gradient_descent.SGD(1.0) + loss_scale = 10. + opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale) + grad_check_fn = mp_test_util.create_identity_with_grad_check_fn( + loss_scale) + loss = grad_check_fn(var) + run_op = opt.get_gradients(loss, [var]) + self.evaluate(variables.global_variables_initializer()) + # This will cause an assertion to run, as + # mp_test_util.create_identity_with_grad_check_fn added an assertion op. + self.evaluate(run_op) def testGetScaledLoss(self): opt = gradient_descent.SGD(2.0) diff --git a/tensorflow/python/keras/utils/multi_gpu_utils_test.py b/tensorflow/python/keras/utils/multi_gpu_utils_test.py index 465ace7f264..0765afb4db7 100644 --- a/tensorflow/python/keras/utils/multi_gpu_utils_test.py +++ b/tensorflow/python/keras/utils/multi_gpu_utils_test.py @@ -23,7 +23,7 @@ from tensorflow.python import data from tensorflow.python import keras from tensorflow.python.eager import context from tensorflow.python.framework import config -from tensorflow.python.framework import test_util +from tensorflow.python.framework import ops from tensorflow.python.keras.utils import multi_gpu_utils from tensorflow.python.keras.utils import np_utils from tensorflow.python.platform import test @@ -38,7 +38,7 @@ def check_if_compatible_devices(gpus=2): return False return True -@test_util.run_all_in_deprecated_graph_mode_only + class TestMultiGPUModel(test.TestCase): def __init__(self, methodName='runTest'): # pylint: disable=invalid-name @@ -161,7 +161,7 @@ class TestMultiGPUModel(test.TestCase): if not check_if_compatible_devices(gpus=gpus): self.skipTest('multi gpu only') - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): input_shape = (num_samples,) + shape x_train = np.random.randint(0, 255, input_shape) y_train = np.random.randint(0, num_classes, (input_shape[0],)) From 015fc8cd1882d3159076b4b5d36f574d1b91a1ae Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Thu, 16 Jul 2020 09:41:51 -0700 Subject: [PATCH 0602/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/1d3f61f8a799 PiperOrigin-RevId: 321584516 Change-Id: I6b9e6acece9e005ba79dddd8388b64a23902f92b --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 80f0be2d128..1d46b14f73e 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "1067d3e176ea7b0b1942c163bf8c6c90107768c1" - LLVM_SHA256 = "6f578d1d669000caef23164074d20d86600abf98adc4d7712fac6fa429383b1a" + LLVM_COMMIT = "1d3f61f8a799489f7f4d81e46d9e6b31a954ea4e" + LLVM_SHA256 = "4731fa761a0d74f1f5ba28478144f766abc0de879e3e283431d8699456a03181" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 80aaf0c41704e7926a54f0cd0d7dee15cf2374f5 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Tue, 14 Jul 2020 16:10:44 -0700 Subject: [PATCH 0603/2522] Use Bazel's builtin patch support. This removes a dependency on the system having a patch executable. --- ...m_google_absl_fix_mac_and_nvcc_build.patch | 210 +++++++++--------- third_party/eigen3/gpu_packet_math.patch | 2 + third_party/icu/udata.patch | 34 +-- third_party/png_fix_rpi.patch | 6 +- third_party/repo.bzl | 7 +- 5 files changed, 133 insertions(+), 126 deletions(-) diff --git a/third_party/com_google_absl_fix_mac_and_nvcc_build.patch b/third_party/com_google_absl_fix_mac_and_nvcc_build.patch index 271e941bfe8..6301119ab2c 100644 --- a/third_party/com_google_absl_fix_mac_and_nvcc_build.patch +++ b/third_party/com_google_absl_fix_mac_and_nvcc_build.patch @@ -1,103 +1,8 @@ ---- ./absl/time/internal/cctz/BUILD.bazel 2019-09-23 13:20:52.000000000 -0700 -+++ ./absl/time/internal/cctz/BUILD.bazel.fixed 2019-09-23 13:20:48.000000000 -0700 -@@ -74,15 +74,6 @@ - "include/cctz/time_zone.h", - "include/cctz/zone_info_source.h", - ], -- linkopts = select({ -- ":osx": [ -- "-framework Foundation", -- ], -- ":ios": [ -- "-framework Foundation", -- ], -- "//conditions:default": [], -- }), - visibility = ["//visibility:public"], - deps = [ - ":civil_time", ---- ./absl/strings/string_view.h 2019-09-23 13:20:52.000000000 -0700 -+++ ./absl/strings/string_view.h.fixed 2019-09-23 13:20:48.000000000 -0700 -@@ -283,7 +283,14 @@ - // Returns the ith element of the `string_view` using the array operator. - // Note that this operator does not perform any bounds checking. - constexpr const_reference operator[](size_type i) const { -+#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__ < 10 || (__CUDACC_VER_MAJOR__ == 10 && __CUDACC_VER_MINOR__ < 2)) -+ // An NVCC bug treats the original return expression as a non-constant, -+ // which is not allowed in a constexpr function. This will be fixed in the -+ // CUDA 10.2 release. -+ return ptr_[i]; -+#else - return ABSL_ASSERT(i < size()), ptr_[i]; -+#endif - } - - // string_view::at() -@@ -292,25 +299,46 @@ - // and an exception of type `std::out_of_range` will be thrown on invalid - // access. - constexpr const_reference at(size_type i) const { -+#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__ < 10 || (__CUDACC_VER_MAJOR__ == 10 && __CUDACC_VER_MINOR__ < 2)) -+ // An NVCC bug treats the original return expression as a non-constant, -+ // which is not allowed in a constexpr function. This will be fixed in the -+ // CUDA 10.2 release. -+ return ptr_[i]; -+#else - return ABSL_PREDICT_TRUE(i < size()) - ? ptr_[i] - : ((void)base_internal::ThrowStdOutOfRange( - "absl::string_view::at"), - ptr_[i]); -+#endif - } - - // string_view::front() - // - // Returns the first element of a `string_view`. - constexpr const_reference front() const { -+#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__ < 10 || (__CUDACC_VER_MAJOR__ == 10 && __CUDACC_VER_MINOR__ < 2)) -+ // An NVCC bug treats the original return expression as a non-constant, -+ // which is not allowed in a constexpr function. This will be fixed in the -+ // CUDA 10.2 release. -+ return ptr_[0]; -+#else - return ABSL_ASSERT(!empty()), ptr_[0]; -+#endif - } - - // string_view::back() - // - // Returns the last element of a `string_view`. - constexpr const_reference back() const { -+#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__ < 10 || (__CUDACC_VER_MAJOR__ == 10 && __CUDACC_VER_MINOR__ < 2)) -+ // An NVCC bug treats the original return expression as a non-constant, -+ // which is not allowed in a constexpr function. This will be fixed in the -+ // CUDA 10.2 release. -+ return ptr_[size() - 1]; -+#else - return ABSL_ASSERT(!empty()), ptr_[size() - 1]; -+#endif - } - - // string_view::data() -@@ -519,7 +547,14 @@ - (std::numeric_limits::max)(); - - static constexpr size_type CheckLengthInternal(size_type len) { -+#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__ < 10 || (__CUDACC_VER_MAJOR__ == 10 && __CUDACC_VER_MINOR__ < 2)) -+ // An NVCC bug treats the original return expression as a non-constant, -+ // which is not allowed in a constexpr function. This will be fixed in the -+ // CUDA 10.2 release. -+ return len; -+#else - return (void)ABSL_ASSERT(len <= kMaxSize), len; -+#endif - } - - static constexpr size_type StrlenInternal(const char* str) { ---- ./absl/container/internal/compressed_tuple.h 2020-03-04 12:57:37.000000000 -0800 -+++ ./absl/container/internal/compressed_tuple.h.fixed 2019-06-20 11:54:01.000000000 -0700 -@@ -32,7 +32,6 @@ Revert to commit 43ef2148c0936ebf7cb4be6b19927a9d9d145b8f as commit e9324d926a9189e222741fce6e676f0944661a72 includes a change not compatible with CUDA on Windows. +diff --git a/absl/container/internal/compressed_tuple.h b/absl/container/internal/compressed_tuple.h +index 4bfe92f..01db713 100644 +--- a/absl/container/internal/compressed_tuple.h ++++ b/absl/container/internal/compressed_tuple.h +@@ -32,7 +32,6 @@ #ifndef ABSL_CONTAINER_INTERNAL_COMPRESSED_TUPLE_H_ #define ABSL_CONTAINER_INTERNAL_COMPRESSED_TUPLE_H_ @@ -105,7 +10,7 @@ #include #include #include -@@ -77,110 +76,61 @@ +@@ -77,110 +76,61 @@ constexpr bool IsFinal() { #endif } @@ -234,7 +139,7 @@ // // To access the members, use member .get() function. // -@@ -196,58 +146,36 @@ +@@ -196,58 +146,36 @@ using TupleMoveConstructible = typename std::conditional< template class ABSL_INTERNAL_COMPRESSED_TUPLE_DECLSPEC CompressedTuple : private internal_compressed_tuple::CompressedTupleImpl< @@ -302,3 +207,104 @@ } }; +diff --git a/absl/strings/string_view.h b/absl/strings/string_view.h +index 1861ea6..c7a916b 100644 +--- a/absl/strings/string_view.h ++++ b/absl/strings/string_view.h +@@ -283,7 +283,14 @@ class string_view { + // Returns the ith element of the `string_view` using the array operator. + // Note that this operator does not perform any bounds checking. + constexpr const_reference operator[](size_type i) const { ++#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__ < 10 || (__CUDACC_VER_MAJOR__ == 10 && __CUDACC_VER_MINOR__ < 2)) ++ // An NVCC bug treats the original return expression as a non-constant, ++ // which is not allowed in a constexpr function. This will be fixed in the ++ // CUDA 10.2 release. ++ return ptr_[i]; ++#else + return ABSL_ASSERT(i < size()), ptr_[i]; ++#endif + } + + // string_view::at() +@@ -292,25 +299,46 @@ class string_view { + // and an exception of type `std::out_of_range` will be thrown on invalid + // access. + constexpr const_reference at(size_type i) const { ++#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__ < 10 || (__CUDACC_VER_MAJOR__ == 10 && __CUDACC_VER_MINOR__ < 2)) ++ // An NVCC bug treats the original return expression as a non-constant, ++ // which is not allowed in a constexpr function. This will be fixed in the ++ // CUDA 10.2 release. ++ return ptr_[i]; ++#else + return ABSL_PREDICT_TRUE(i < size()) + ? ptr_[i] + : ((void)base_internal::ThrowStdOutOfRange( + "absl::string_view::at"), + ptr_[i]); ++#endif + } + + // string_view::front() + // + // Returns the first element of a `string_view`. + constexpr const_reference front() const { ++#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__ < 10 || (__CUDACC_VER_MAJOR__ == 10 && __CUDACC_VER_MINOR__ < 2)) ++ // An NVCC bug treats the original return expression as a non-constant, ++ // which is not allowed in a constexpr function. This will be fixed in the ++ // CUDA 10.2 release. ++ return ptr_[0]; ++#else + return ABSL_ASSERT(!empty()), ptr_[0]; ++#endif + } + + // string_view::back() + // + // Returns the last element of a `string_view`. + constexpr const_reference back() const { ++#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__ < 10 || (__CUDACC_VER_MAJOR__ == 10 && __CUDACC_VER_MINOR__ < 2)) ++ // An NVCC bug treats the original return expression as a non-constant, ++ // which is not allowed in a constexpr function. This will be fixed in the ++ // CUDA 10.2 release. ++ return ptr_[size() - 1]; ++#else + return ABSL_ASSERT(!empty()), ptr_[size() - 1]; ++#endif + } + + // string_view::data() +@@ -519,7 +547,14 @@ class string_view { + (std::numeric_limits::max)(); + + static constexpr size_type CheckLengthInternal(size_type len) { ++#if defined(__NVCC__) && (__CUDACC_VER_MAJOR__ < 10 || (__CUDACC_VER_MAJOR__ == 10 && __CUDACC_VER_MINOR__ < 2)) ++ // An NVCC bug treats the original return expression as a non-constant, ++ // which is not allowed in a constexpr function. This will be fixed in the ++ // CUDA 10.2 release. ++ return len; ++#else + return (void)ABSL_ASSERT(len <= kMaxSize), len; ++#endif + } + + static constexpr size_type StrlenInternal(const char* str) { +diff --git a/absl/time/internal/cctz/BUILD.bazel b/absl/time/internal/cctz/BUILD.bazel +index 7a53c81..159b0f0 100644 +--- a/absl/time/internal/cctz/BUILD.bazel ++++ b/absl/time/internal/cctz/BUILD.bazel +@@ -74,15 +74,6 @@ cc_library( + "include/cctz/time_zone.h", + "include/cctz/zone_info_source.h", + ], +- linkopts = select({ +- ":osx": [ +- "-framework Foundation", +- ], +- ":ios": [ +- "-framework Foundation", +- ], +- "//conditions:default": [], +- }), + visibility = ["//visibility:public"], + deps = [ + ":civil_time", diff --git a/third_party/eigen3/gpu_packet_math.patch b/third_party/eigen3/gpu_packet_math.patch index 53f411243f8..44e2f9a9e06 100644 --- a/third_party/eigen3/gpu_packet_math.patch +++ b/third_party/eigen3/gpu_packet_math.patch @@ -1,3 +1,4 @@ +diff -ru a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geometry_SSE.h --- a/Eigen/src/Geometry/arch/Geometry_SSE.h +++ b/Eigen/src/Geometry/arch/Geometry_SSE.h @@ -33,13 +33,14 @@ @@ -22,6 +23,7 @@ return res; } }; +diff -ru a/Eigen/src/Core/arch/Default/BFloat16.h a/Eigen/src/Core/arch/Default/BFloat16.h --- a/Eigen/src/Core/arch/Default/BFloat16.h +++ a/Eigen/src/Core/arch/Default/BFloat16.h @@ -291,7 +291,7 @@ diff --git a/third_party/icu/udata.patch b/third_party/icu/udata.patch index db6a06d26ef..0b65e4ed388 100644 --- a/third_party/icu/udata.patch +++ b/third_party/icu/udata.patch @@ -1,19 +1,6 @@ ---- a/icu4c/source/common/unicode/uconfig.h -+++ b/icu4c/source/common/unicode/uconfig.h -@@ -55,6 +55,11 @@ - #include "uconfig_local.h" - #endif - -+// Tensorflow is statically linked on all platforms. -+#ifndef U_STATIC_IMPLEMENTATION -+#define U_STATIC_IMPLEMENTATION -+#endif -+ - /** - * \def U_DEBUG - * Determines whether to include debugging code. ---- a/icu4c/source/common/udata.cpp -+++ b/icu4c/source/common/udata.cpp +diff -ru a/icu4c/source/common/udata.cpp b/icu4c/source/common/udata.cpp +--- a/icu4c/source/common/udata.cpp 2019-04-17 12:03:04.000000000 +0000 ++++ b/icu4c/source/common/udata.cpp 2020-07-14 23:49:37.836668741 +0000 @@ -18,11 +18,10 @@ #include "unicode/utypes.h" /* U_PLATFORM etc. */ @@ -57,3 +44,18 @@ #if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP Platform does not support dll icu data at this time setCommonICUDataPointer(&U_ICUDATA_ENTRY_POINT, FALSE, pErrorCode); { +diff -ru a/icu4c/source/common/unicode/uconfig.h b/icu4c/source/common/unicode/uconfig.h +--- a/icu4c/source/common/unicode/uconfig.h 2019-04-17 12:03:04.000000000 +0000 ++++ b/icu4c/source/common/unicode/uconfig.h 2020-07-14 23:49:37.836668741 +0000 +@@ -55,6 +55,11 @@ + #include "uconfig_local.h" + #endif + ++// Tensorflow is statically linked on all platforms. ++#ifndef U_STATIC_IMPLEMENTATION ++#define U_STATIC_IMPLEMENTATION ++#endif ++ + /** + * \def U_DEBUG + * Determines whether to include debugging code. diff --git a/third_party/png_fix_rpi.patch b/third_party/png_fix_rpi.patch index e07eb2f6c1b..df6cfd7ffae 100644 --- a/third_party/png_fix_rpi.patch +++ b/third_party/png_fix_rpi.patch @@ -1,6 +1,6 @@ -diff -r -u /tmp/libpng-1.6.37/scripts/pnglibconf.h.prebuilt ./scripts/pnglibconf.h.prebuilt ---- /tmp/libpng-1.6.37/scripts/pnglibconf.h.prebuilt 2019-04-14 11:10:32.000000000 -0700 -+++ ./scripts/pnglibconf.h.prebuilt 2019-05-21 09:40:52.138528512 -0700 +diff -r -u ./scripts/pnglibconf.h.prebuilt ./scripts/pnglibconf.h.prebuilt +--- ./scripts/pnglibconf.h.prebuilt ++++ ./scripts/pnglibconf.h.prebuilt @@ -19,6 +19,12 @@ #define PNG_ALIGNED_MEMORY_SUPPORTED /*#undef PNG_ARM_NEON_API_SUPPORTED*/ diff --git a/third_party/repo.bzl b/third_party/repo.bzl index a4d2b899f80..57003287147 100644 --- a/third_party/repo.bzl +++ b/third_party/repo.bzl @@ -60,12 +60,9 @@ def _execute_and_check_ret_code(repo_ctx, cmd_and_args): def _repos_are_siblings(): return Label("@foo//bar").workspace_root.startswith("../") -# Apply a patch_file to the repository root directory -# Runs 'patch -p1' on both Windows and Unix. +# Apply a patch_file to the repository root directory. def _apply_patch(ctx, patch_file): - patch_command = ["patch", "-p1", "-d", ctx.path("."), "-i", ctx.path(patch_file)] - cmd = _wrap_bash_cmd(ctx, patch_command) - _execute_and_check_ret_code(ctx, cmd) + ctx.patch(patch_file, strip=1) def _apply_delete(ctx, paths): for path in paths: From ba14dfe0e6b4c86e5d4f3f9738433c0f338224a4 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Thu, 16 Jul 2020 10:16:08 -0700 Subject: [PATCH 0604/2522] Prototype of special operation - thin depthwise conv + convolution 1x1 fused into one op. PiperOrigin-RevId: 321591721 Change-Id: I8b2b6ecfe237f5b4bc15ed004eee738bcdbf4288 --- .../delegates/gpu/cl/kernels/special/BUILD | 25 ++ .../special/depthwise_conv_plus_1x1_conv.cc | 310 ++++++++++++++++++ .../special/depthwise_conv_plus_1x1_conv.h | 81 +++++ 3 files changed, 416 insertions(+) create mode 100644 tensorflow/lite/delegates/gpu/cl/kernels/special/BUILD create mode 100644 tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.cc create mode 100644 tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.h diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/special/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/special/BUILD new file mode 100644 index 00000000000..d5ff93e6845 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/cl/kernels/special/BUILD @@ -0,0 +1,25 @@ +package( + default_visibility = ["//visibility:public"], + licenses = ["notice"], # Apache 2.0 +) + +cc_library( + name = "depthwise_conv_plus_1x1_conv", + srcs = ["depthwise_conv_plus_1x1_conv.cc"], + hdrs = ["depthwise_conv_plus_1x1_conv.h"], + deps = [ + "//tensorflow/lite/delegates/gpu/cl:buffer", + "//tensorflow/lite/delegates/gpu/cl:cl_device", + "//tensorflow/lite/delegates/gpu/cl:gpu_object", + "//tensorflow/lite/delegates/gpu/cl:util", + "//tensorflow/lite/delegates/gpu/cl/kernels:gpu_operation", + "//tensorflow/lite/delegates/gpu/cl/kernels:util", + "//tensorflow/lite/delegates/gpu/cl/kernels:work_group_picking", + "//tensorflow/lite/delegates/gpu/common:data_type", + "//tensorflow/lite/delegates/gpu/common:operations", + "//tensorflow/lite/delegates/gpu/common:shape", + "//tensorflow/lite/delegates/gpu/common:status", + "//tensorflow/lite/delegates/gpu/common:tensor", + "//tensorflow/lite/delegates/gpu/common:types", + ], +) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.cc b/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.cc new file mode 100644 index 00000000000..8f5d94fdc69 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.cc @@ -0,0 +1,310 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.h" + +#include +#include +#include + +#include "tensorflow/lite/delegates/gpu/cl/cl_device.h" +#include "tensorflow/lite/delegates/gpu/cl/kernels/util.h" +#include "tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h" + +namespace tflite { +namespace gpu { +namespace cl { +namespace { +std::string GenerateCode(const OperationDef& op_def, + const DepthwiseConvolution2DAttributes& dw_attr, + int result_depth, const CLDevice& device, + Arguments* args) { + auto src_desc = absl::make_unique(op_def.src_tensors[0]); + src_desc->SetTextureAddressMode(GetFastestZeroMode(device)); + args->AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc)); + args->AddObjectRef( + "dst_tensor", AccessType::WRITE, + absl::make_unique(op_def.dst_tensors[0])); + + args->AddInt("stride_x", dw_attr.strides.w); + args->AddInt("padding_x", -dw_attr.padding.prepended.w); + args->AddInt("dilation_x", dw_attr.dilations.w); + args->AddInt("stride_y", dw_attr.strides.h); + args->AddInt("padding_y", -dw_attr.padding.prepended.h); + args->AddInt("dilation_y", dw_attr.dilations.h); + + const auto src_tensor_type = op_def.src_tensors[0].storage_type; + + const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER || + src_tensor_type == TensorStorageType::IMAGE_BUFFER; + + std::string c = GetCommonDefines(op_def.precision); + c += "__kernel void main_function(\n"; + c += "$0) {\n"; + if (op_def.dst_tensors[0].HasAxis(Axis::BATCH)) { + c += " int linear_id = get_global_id(0);\n"; + c += " int X = linear_id / args.dst_tensor.Batch();\n"; + c += " int B = linear_id % args.dst_tensor.Batch();\n"; + c += " args.dst_tensor.SetBatchRef(B);\n"; + c += " args.src_tensor.SetBatchRef(B);\n"; + } else { + c += " int X = get_global_id(0);\n"; + } + c += " int Y = get_global_id(1);\n"; + c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height()) { " + "\n"; + c += " return; \n"; + c += " } \n"; + c += " __constant FLT4* constants = args.constants.GetPtr();\n"; + int intermediate_depth = DivideRoundUp(dw_attr.weights.shape.i, 4); + int weights_counter = 0; + for (int d = 0; d < intermediate_depth; ++d) { + c += " FLT4 dw_res_" + std::to_string(d) + " = constants[" + + std::to_string(weights_counter++) + "];\n"; + } + c += " int x_offseted = X * args.stride_x + args.padding_x;\n"; + c += " int y_offseted = Y * args.stride_y + args.padding_y;\n"; + c += " int x_c, y_c;\n"; + if (manual_clamp) { + c += " bool x_in, y_in;\n"; + } + c += " FLT4 src;\n"; + for (int ky = 0; ky < dw_attr.weights.shape.h; ++ky) { + c += " y_c = y_offseted + " + std::to_string(ky) + " * args.dilation_y;\n"; + if (manual_clamp) { + c += " y_in = y_c >= 0 && y_c < args.src_tensor.Height();\n"; + c += " y_c = clamp(y_c, 0, args.src_tensor.Height() - 1);\n"; + } + for (int kx = 0; kx < dw_attr.weights.shape.w; ++kx) { + c += " x_c = x_offseted + " + std::to_string(kx) + + " * args.dilation_x;\n"; + if (manual_clamp) { + c += " x_in = x_c >= 0 && x_c < args.src_tensor.Width();\n"; + c += " x_c = clamp(x_c, 0, args.src_tensor.Width() - 1);\n"; + } + for (int d = 0; d < intermediate_depth; ++d) { + std::string multiplier = manual_clamp ? "* (FLT)(x_in && y_in)" : ""; + c += " src = args.src_tensor.Read(x_c, y_c, " + std::to_string(d) + + ")" + multiplier + ";\n"; + c += " dw_res_" + std::to_string(d) + " += src * constants[" + + std::to_string(weights_counter++) + "];\n"; + } + } + } + for (int d = 0; d < result_depth; ++d) { + c += " FLT4 conv_res_" + std::to_string(d) + " = constants[" + + std::to_string(weights_counter++) + "];\n"; + } + for (int d = 0; d < result_depth; ++d) { + for (int s = 0; s < intermediate_depth; ++s) { + std::string src = "dw_res_" + std::to_string(s); + std::string dst = "conv_res_" + std::to_string(d); + c += " " + dst + " += " + src + ".x * constants[" + + std::to_string(weights_counter++) + "];\n"; + c += " " + dst + " += " + src + ".y * constants[" + + std::to_string(weights_counter++) + "];\n"; + c += " " + dst + " += " + src + ".z * constants[" + + std::to_string(weights_counter++) + "];\n"; + c += " " + dst + " += " + src + ".w * constants[" + + std::to_string(weights_counter++) + "];\n"; + } + c += " args.dst_tensor.Write(conv_res_" + std::to_string(d) + ", X, Y, " + + std::to_string(d) + ");\n"; + } + c += "}\n"; + + return c; +} +} // namespace + +DepthwiseConvPlus1x1Conv::DepthwiseConvPlus1x1Conv( + const OperationDef& definition, + const DepthwiseConvolution2DAttributes& dw_attr, + const Convolution2DAttributes& conv_attr) + : GPUOperation(definition), + dw_attr_(dw_attr), + result_depth_(DivideRoundUp(conv_attr.weights.shape.o, 4)) { + work_group_size_ = int3(8, 8, 1); +} + +DepthwiseConvPlus1x1Conv::DepthwiseConvPlus1x1Conv( + DepthwiseConvPlus1x1Conv&& operation) + : GPUOperation(std::move(operation)), + dw_attr_(std::move(operation.dw_attr_)), + result_depth_(operation.result_depth_) {} + +DepthwiseConvPlus1x1Conv& DepthwiseConvPlus1x1Conv::operator=( + DepthwiseConvPlus1x1Conv&& operation) { + if (this != &operation) { + dw_attr_ = std::move(operation.dw_attr_); + std::swap(result_depth_, operation.result_depth_); + GPUOperation::operator=(std::move(operation)); + } + return *this; +} + +absl::Status DepthwiseConvPlus1x1Conv::UploadWeights( + const DepthwiseConvolution2DAttributes& dw_attr, + const Convolution2DAttributes& conv_attr, CLContext* context) { + int dw_dst_ch_aligned = AlignByN(dw_attr.weights.shape.i, 4); + int dw_weights_count = + dw_dst_ch_aligned * dw_attr.weights.shape.h * dw_attr.weights.shape.w; + int conv_src_ch_aligned = AlignByN(conv_attr.weights.shape.i, 4); + int conv_dst_ch_aligned = AlignByN(conv_attr.weights.shape.o, 4); + int conv_weights_count = conv_src_ch_aligned * conv_dst_ch_aligned; + std::vector gpu_data; + gpu_data.reserve(dw_dst_ch_aligned + dw_weights_count + conv_dst_ch_aligned + + conv_weights_count); + // dw bias loading + for (int i = 0; i < dw_dst_ch_aligned; ++i) { + if (i < dw_attr.bias.shape.v) { + gpu_data.push_back(dw_attr.bias.data[i]); + } else { + gpu_data.push_back(0.0f); + } + } + // dw weights loading + for (int y = 0; y < dw_attr.weights.shape.h; ++y) { + for (int x = 0; x < dw_attr.weights.shape.w; ++x) { + for (int d = 0; d < dw_dst_ch_aligned / 4; ++d) { + for (int i = 0; i < 4; ++i) { + const int d_ch = d * 4 + i; + if (d_ch < dw_attr.weights.shape.i) { + const int f_index = + dw_attr.weights.shape.LinearIndex({0, y, x, d_ch}); + gpu_data.push_back(dw_attr.weights.data[f_index]); + } else { + gpu_data.push_back(0.0f); + } + } + } + } + } + // conv bias loading + for (int i = 0; i < conv_dst_ch_aligned; ++i) { + if (i < conv_attr.bias.shape.v) { + gpu_data.push_back(conv_attr.bias.data[i]); + } else { + gpu_data.push_back(0.0f); + } + } + // conv weights loading + for (int d = 0; d < conv_dst_ch_aligned / 4; ++d) { + for (int s = 0; s < conv_src_ch_aligned / 4; ++s) { + for (int j = 0; j < 4; ++j) { + for (int i = 0; i < 4; ++i) { + const int s_ch = s * 4 + j; + const int d_ch = d * 4 + i; + if (s_ch < conv_attr.weights.shape.i && + d_ch < conv_attr.weights.shape.o) { + const int f_index = + conv_attr.weights.shape.LinearIndex({d_ch, 0, 0, s_ch}); + gpu_data.push_back(conv_attr.weights.data[f_index]); + } else { + gpu_data.push_back(0.0f); + } + } + } + } + } + + Buffer constants_buf; + const bool fp32_weights = definition_.precision == CalculationsPrecision::F32; + const int float_size = fp32_weights ? 4 : 2; + if (fp32_weights) { + RETURN_IF_ERROR(CreateReadOnlyBuffer(float_size * gpu_data.size(), + gpu_data.data(), context, + &constants_buf)); + } else { + std::vector gpu_data_half(gpu_data.size()); + for (int i = 0; i < gpu_data.size(); ++i) { + gpu_data_half[i] = gpu_data[i]; + } + RETURN_IF_ERROR(CreateReadOnlyBuffer(float_size * gpu_data_half.size(), + gpu_data_half.data(), context, + &constants_buf)); + } + + BufferDescriptor desc; + desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16; + desc.element_size = 4; + desc.memory_type = MemoryType::CONSTANT; + args_.AddObject("constants", AccessType::READ, + absl::make_unique(std::move(constants_buf)), + absl::make_unique(desc)); + return absl::OkStatus(); +} + +absl::Status DepthwiseConvPlus1x1Conv::Compile( + const CreationContext& creation_context) { + std::string code = GenerateCode(definition_, dw_attr_, result_depth_, + *creation_context.device, &args_); + std::string element_wise_code; + RETURN_IF_ERROR( + MergeOperations(linked_operations_, &args_, &element_wise_code)); + RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), + {{"dst_tensor", element_wise_code}}, + &code)); + return creation_context.cache->GetOrCreateCLKernel( + code, "main_function", *creation_context.context, + *creation_context.device, &kernel_); +} + +absl::Status DepthwiseConvPlus1x1Conv::BindArguments() { + RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); + RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); + return absl::OkStatus(); +} + +int3 DepthwiseConvPlus1x1Conv::GetGridSize() const { + const int grid_x = dst_[0]->Width() * dst_[0]->Batch(); + const int grid_y = dst_[0]->Height(); + return int3(grid_x, grid_y, 1); +} + +bool IsDepthwiseConvPlus1x1ConvSupported( + const CLDevice& device, const OperationDef& definition, + const DepthwiseConvolution2DAttributes& dw_attr, + const Convolution2DAttributes& conv_attr) { + const auto dw_shape = dw_attr.weights.shape; + const auto conv_shape = conv_attr.weights.shape; + bool good_dw = dw_shape.o == 1; + bool good_conv = + conv_shape.w == 1 && conv_shape.h == 1 && conv_attr.dilations.w == 1 && + conv_attr.dilations.h == 1 && conv_attr.strides.w == 1 && + conv_attr.strides.h == 1 && conv_attr.padding.prepended.w == 0 && + conv_attr.padding.prepended.h == 0 && conv_attr.padding.appended.w == 0 && + conv_attr.padding.appended.h == 0; + bool recommended_dw = + dw_shape.i <= 16 && dw_shape.i * dw_shape.h * dw_shape.w <= 3 * 3 * 16; + bool recommended_conv = + conv_shape.o <= 32 && conv_shape.i * conv_shape.o <= 16 * 32; + return good_dw && good_conv && recommended_dw && recommended_conv; +} + +absl::Status CreateDepthwiseConvPlus1x1Conv( + const CreationContext& creation_context, const OperationDef& definition, + const DepthwiseConvolution2DAttributes& dw_attr, + const Convolution2DAttributes& conv_attr, + DepthwiseConvPlus1x1Conv* result) { + *result = DepthwiseConvPlus1x1Conv(definition, dw_attr, conv_attr); + RETURN_IF_ERROR( + result->UploadWeights(dw_attr, conv_attr, creation_context.context)); + return absl::OkStatus(); +} + +} // namespace cl +} // namespace gpu +} // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.h b/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.h new file mode 100644 index 00000000000..5d8da6ac973 --- /dev/null +++ b/tensorflow/lite/delegates/gpu/cl/kernels/special/depthwise_conv_plus_1x1_conv.h @@ -0,0 +1,81 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_SPECIAL_DEPTHWISE_CONV_PLUS_1X1_CONV_H_ +#define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_SPECIAL_DEPTHWISE_CONV_PLUS_1X1_CONV_H_ + +#include + +#include "tensorflow/lite/delegates/gpu/cl/buffer.h" +#include "tensorflow/lite/delegates/gpu/cl/gpu_object.h" +#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h" +#include "tensorflow/lite/delegates/gpu/cl/util.h" +#include "tensorflow/lite/delegates/gpu/common/data_type.h" +#include "tensorflow/lite/delegates/gpu/common/operations.h" +#include "tensorflow/lite/delegates/gpu/common/shape.h" +#include "tensorflow/lite/delegates/gpu/common/status.h" +#include "tensorflow/lite/delegates/gpu/common/tensor.h" +#include "tensorflow/lite/delegates/gpu/common/types.h" + +namespace tflite { +namespace gpu { +namespace cl { + +class DepthwiseConvPlus1x1Conv : public GPUOperation { + public: + DepthwiseConvPlus1x1Conv() = default; + absl::Status BindArguments() override; + int3 GetGridSize() const override; + absl::Status Compile(const CreationContext& creation_context) override; + + // Move only + DepthwiseConvPlus1x1Conv(DepthwiseConvPlus1x1Conv&& operation); + DepthwiseConvPlus1x1Conv& operator=(DepthwiseConvPlus1x1Conv&& operation); + DepthwiseConvPlus1x1Conv(const DepthwiseConvPlus1x1Conv&) = delete; + DepthwiseConvPlus1x1Conv& operator=(const DepthwiseConvPlus1x1Conv&) = delete; + + private: + friend absl::Status CreateDepthwiseConvPlus1x1Conv( + const CreationContext& creation_context, const OperationDef& definition, + const DepthwiseConvolution2DAttributes& dw_attr, + const Convolution2DAttributes& conv_attr, + DepthwiseConvPlus1x1Conv* result); + DepthwiseConvPlus1x1Conv(const OperationDef& definition, + const DepthwiseConvolution2DAttributes& dw_attr, + const Convolution2DAttributes& conv_attr); + + absl::Status UploadWeights(const DepthwiseConvolution2DAttributes& dw_attr, + const Convolution2DAttributes& conv_attr, + CLContext* context); + + DepthwiseConvolution2DAttributes dw_attr_; + int result_depth_; +}; + +bool IsDepthwiseConvPlus1x1ConvSupported( + const CLDevice& device, const OperationDef& definition, + const DepthwiseConvolution2DAttributes& dw_attr, + const Convolution2DAttributes& conv_attr); + +absl::Status CreateDepthwiseConvPlus1x1Conv( + const CreationContext& creation_context, const OperationDef& definition, + const DepthwiseConvolution2DAttributes& dw_attr, + const Convolution2DAttributes& conv_attr, DepthwiseConvPlus1x1Conv* result); + +} // namespace cl +} // namespace gpu +} // namespace tflite + +#endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_SPECIAL_DEPTHWISE_CONV_PLUS_1X1_CONV_H_ From 08a772722d0a16ef2be2e819aec22650542ab82b Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Thu, 16 Jul 2020 10:29:52 -0700 Subject: [PATCH 0605/2522] Convert tensor_shape.as_shape to tensor_shape.TensorShape which is the public API. PiperOrigin-RevId: 321594755 Change-Id: I70bcef40ff5ed0c545ec52c84c124c83c77f067b --- tensorflow/python/keras/layers/core.py | 2 +- .../keras/layers/legacy_rnn/rnn_cell_impl.py | 6 ++-- tensorflow/python/keras/layers/recurrent.py | 28 ++++++++++--------- .../python/keras/layers/recurrent_test.py | 4 +-- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py index 43f61a0b861..155af8d2398 100644 --- a/tensorflow/python/keras/layers/core.py +++ b/tensorflow/python/keras/layers/core.py @@ -679,7 +679,7 @@ class Flatten(Layer): return array_ops.reshape(inputs, flattened_shape) def compute_output_shape(self, input_shape): - input_shape = tensor_shape.as_shape(input_shape).as_list() + input_shape = tensor_shape.TensorShape(input_shape).as_list() if not input_shape: output_shape = tensor_shape.TensorShape([1]) else: diff --git a/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_impl.py b/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_impl.py index 43bcd799f8b..2276fbf98ee 100644 --- a/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_impl.py +++ b/tensorflow/python/keras/layers/legacy_rnn/rnn_cell_impl.py @@ -133,7 +133,7 @@ def _concat(prefix, suffix, static=False): raise ValueError("prefix tensor must be either a scalar or vector, " "but saw tensor: %s" % p) else: - p = tensor_shape.as_shape(prefix) + p = tensor_shape.TensorShape(prefix) p_static = p.as_list() if p.ndims is not None else None p = ( constant_op.constant(p.as_list(), dtype=dtypes.int32) @@ -147,14 +147,14 @@ def _concat(prefix, suffix, static=False): raise ValueError("suffix tensor must be either a scalar or vector, " "but saw tensor: %s" % s) else: - s = tensor_shape.as_shape(suffix) + s = tensor_shape.TensorShape(suffix) s_static = s.as_list() if s.ndims is not None else None s = ( constant_op.constant(s.as_list(), dtype=dtypes.int32) if s.is_fully_defined() else None) if static: - shape = tensor_shape.as_shape(p_static).concatenate(s_static) + shape = tensor_shape.TensorShape(p_static).concatenate(s_static) shape = shape.as_list() if shape.ndims is not None else None else: if p is None or s is None: diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py index 64bef8eaeac..9d10ad73749 100644 --- a/tensorflow/python/keras/layers/recurrent.py +++ b/tensorflow/python/keras/layers/recurrent.py @@ -176,7 +176,7 @@ class StackedRNNCells(Layer): else: output_dim = cell.state_size input_shape = tuple([input_shape[0]] + - tensor_shape.as_shape(output_dim).as_list()) + tensor_shape.TensorShape(output_dim).as_list()) self.built = True def get_config(self): @@ -465,7 +465,7 @@ class RNN(Layer): # (tensor_shape(1, 2), tensor_shape(3, 4)) or (1, 2, 3) which is from numpy # inputs. try: - input_shape = tensor_shape.as_shape(input_shape) + input_shape = tensor_shape.TensorShape(input_shape) except (ValueError, TypeError): # A nested tensor input input_shape = nest.flatten(input_shape)[0] @@ -481,14 +481,16 @@ class RNN(Layer): state_size = [self.cell.state_size] def _get_output_shape(flat_output_size): - output_dim = tensor_shape.as_shape(flat_output_size).as_list() + output_dim = tensor_shape.TensorShape(flat_output_size).as_list() if self.return_sequences: if self.time_major: - output_shape = tensor_shape.as_shape([time_step, batch] + output_dim) + output_shape = tensor_shape.TensorShape( + [time_step, batch] + output_dim) else: - output_shape = tensor_shape.as_shape([batch, time_step] + output_dim) + output_shape = tensor_shape.TensorShape( + [batch, time_step] + output_dim) else: - output_shape = tensor_shape.as_shape([batch] + output_dim) + output_shape = tensor_shape.TensorShape([batch] + output_dim) return output_shape if getattr(self.cell, 'output_size', None) is not None: @@ -502,8 +504,8 @@ class RNN(Layer): if self.return_state: def _get_state_shape(flat_state): - state_shape = [batch] + tensor_shape.as_shape(flat_state).as_list() - return tensor_shape.as_shape(state_shape) + state_shape = [batch] + tensor_shape.TensorShape(flat_state).as_list() + return tensor_shape.TensorShape(state_shape) state_shape = nest.map_structure(_get_state_shape, state_size) return generic_utils.to_list(output_shape) + nest.flatten(state_shape) else: @@ -552,7 +554,7 @@ class RNN(Layer): # (tensor_shape(1, 2), tensor_shape(3, 4)) or (1, 2, 3) which is from numpy # inputs. try: - input_shape = tensor_shape.as_shape(input_shape) + input_shape = tensor_shape.TensorShape(input_shape) except (ValueError, TypeError): # A nested tensor input pass @@ -589,7 +591,7 @@ class RNN(Layer): self._validate_state_spec(state_size, self.state_spec) else: self.state_spec = [ - InputSpec(shape=[None] + tensor_shape.as_shape(dim).as_list()) + InputSpec(shape=[None] + tensor_shape.TensorShape(dim).as_list()) for dim in state_size ] if self.stateful: @@ -924,7 +926,7 @@ class RNN(Layer): # initialize state if None if nest.flatten(self.states)[0] is None: def create_state_variable(state): - return K.zeros([batch_size] + tensor_shape.as_shape(state).as_list()) + return K.zeros([batch_size] + tensor_shape.TensorShape(state).as_list()) self.states = nest.map_structure( create_state_variable, self.cell.state_size) if not nest.is_sequence(self.states): @@ -933,7 +935,7 @@ class RNN(Layer): for state, size in zip(nest.flatten(self.states), nest.flatten(self.cell.state_size)): K.set_value(state, np.zeros([batch_size] + - tensor_shape.as_shape(size).as_list())) + tensor_shape.TensorShape(size).as_list())) else: flat_states = nest.flatten(self.states) flat_input_states = nest.flatten(states) @@ -3014,7 +3016,7 @@ def _generate_zero_filled_state(batch_size_tensor, state_size, dtype): 'batch_size={}, dtype={}'.format(batch_size_tensor, dtype)) def create_zeros(unnested_state_size): - flat_dims = tensor_shape.as_shape(unnested_state_size).as_list() + flat_dims = tensor_shape.TensorShape(unnested_state_size).as_list() init_state_size = [batch_size_tensor] + flat_dims return array_ops.zeros(init_state_size, dtype=dtype) diff --git a/tensorflow/python/keras/layers/recurrent_test.py b/tensorflow/python/keras/layers/recurrent_test.py index c2c3d135f68..c8785a8eb9e 100644 --- a/tensorflow/python/keras/layers/recurrent_test.py +++ b/tensorflow/python/keras/layers/recurrent_test.py @@ -1745,8 +1745,8 @@ class Minimal2DRNNCell(keras.layers.Layer): def __init__(self, unit_a, unit_b, **kwargs): self.unit_a = unit_a self.unit_b = unit_b - self.state_size = tensor_shape.as_shape([unit_a, unit_b]) - self.output_size = tensor_shape.as_shape([unit_a, unit_b]) + self.state_size = tensor_shape.TensorShape([unit_a, unit_b]) + self.output_size = tensor_shape.TensorShape([unit_a, unit_b]) super(Minimal2DRNNCell, self).__init__(**kwargs) def build(self, input_shape): From 691bce3ed16123c7e9f99825373821f2816c0b2a Mon Sep 17 00:00:00 2001 From: Feng Liu Date: Thu, 16 Jul 2020 10:38:54 -0700 Subject: [PATCH 0606/2522] return the visit result in the CodeGenerator PiperOrigin-RevId: 321596885 Change-Id: I7d044540bbb173c43158989807555a5d7fd1c684 --- tensorflow/python/autograph/pyct/transformer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/autograph/pyct/transformer.py b/tensorflow/python/autograph/pyct/transformer.py index 87abe3d185c..dc0b173aff9 100644 --- a/tensorflow/python/autograph/pyct/transformer.py +++ b/tensorflow/python/autograph/pyct/transformer.py @@ -527,7 +527,7 @@ class CodeGenerator(NodeStateTracker, gast.NodeVisitor): self.ctx.current_origin = anno.getanno(node, anno.Basic.ORIGIN) try: - super(CodeGenerator, self).visit(node) + ret = super(CodeGenerator, self).visit(node) # By default, all replacements receive the origin info of the replaced # node. @@ -537,5 +537,6 @@ class CodeGenerator(NodeStateTracker, gast.NodeVisitor): node, anno.Basic.ORIGIN, default=parent_origin) if inherited_origin is not None: self.source_map[(eof_before, eof_after)] = inherited_origin + return ret finally: self.ctx.current_origin = parent_origin From 2a7ad989f5da5f740e6182fd4bab9dae14422aec Mon Sep 17 00:00:00 2001 From: Brian Zhao Date: Thu, 16 Jul 2020 10:57:42 -0700 Subject: [PATCH 0607/2522] Automated g4 rollback of changelist 321508374. PiperOrigin-RevId: 321601571 Change-Id: If3b349d65d9030dca3a6ddaddc78d05ea9845a05 --- .../writer/option_writer_generator.cc | 33 -------- .../lite/experimental/writer/writer_lib.cc | 4 +- .../experimental/writer/writer_lib_test.cc | 75 ------------------- 3 files changed, 2 insertions(+), 110 deletions(-) diff --git a/tensorflow/lite/experimental/writer/option_writer_generator.cc b/tensorflow/lite/experimental/writer/option_writer_generator.cc index e484c5ba2f4..a565422457c 100644 --- a/tensorflow/lite/experimental/writer/option_writer_generator.cc +++ b/tensorflow/lite/experimental/writer/option_writer_generator.cc @@ -265,32 +265,6 @@ void GenerateImportForResizeBilinearOp(FILE* fp) { " }\n break;\n"); } -// Reshape Op infers output shape either from Parameter or from shape tensor -// that's is an additional input. When we have this additional shape tensor as -// input we don't have the parameter present in this layer. In case of more than -// one input we import an empty vector for the parameters. -void GenerateImportForReshapeOp(FILE* fp) { - fprintf(fp, - " case BuiltinOperator_RESHAPE: {\n" - " const auto* params = reinterpret_cast(builtin_op_data);\n" - " flatbuffers::Offset union_type;\n" - " if ((node.inputs->size > 1) &&\n" - " (params->num_dimensions < 0 ||\n" - " params->num_dimensions >= " - "TFLITE_RESHAPE_PARAMS_MAX_DIMENSION_COUNT)) {\n" - " union_type = CreateReshapeOptions(*fbb).Union();\n" - " } else {\n" - " auto val0 = fbb->CreateVector(std::vector(params->shape, " - "params->shape + params->num_dimensions));\n" - " union_type = CreateReshapeOptions(*fbb, " - "val0).Union();\n" - " }\n" - " return std::make_pair(BuiltinOptions_ReshapeOptions, " - "union_type);\n" - " }\n break;\n"); -} - void GenerateImportForOp(FILE* fp, const std::string& op_name, const std::string& option_name, const std::string& option_type, @@ -302,13 +276,6 @@ void GenerateImportForOp(FILE* fp, const std::string& op_name, return; } - // Special case Reshape that may have 'new_shape' field missing from the - // parameters. - if (struct_name == "TfLiteReshapeParams") { - GenerateImportForReshapeOp(fp); - return; - } - fprintf(fp, " case BuiltinOperator_%s: {\n", op_name.c_str()); if (options->num_elems != 0) { fprintf(fp, diff --git a/tensorflow/lite/experimental/writer/writer_lib.cc b/tensorflow/lite/experimental/writer/writer_lib.cc index 2c71919724c..85f57527c31 100644 --- a/tensorflow/lite/experimental/writer/writer_lib.cc +++ b/tensorflow/lite/experimental/writer/writer_lib.cc @@ -31,7 +31,7 @@ namespace tflite { std::pair> CreateBuiltinUnion( flatbuffers::FlatBufferBuilder* fbb, enum BuiltinOperator op, - void* builtin_op_data, const TfLiteNode& node) { + void* builtin_op_data) { switch (op) { #include "tensorflow/lite/experimental/writer/option_writer_generated.h" } @@ -82,7 +82,7 @@ SubgraphWriter::ExportOperators(flatbuffers::FlatBufferBuilder* fbb) { // builtin auto builtin_options_and_type = CreateBuiltinUnion( fbb, static_cast(registration.builtin_code), - node.builtin_data, node); + node.builtin_data); builtin_options = builtin_options_and_type.second; builtin_options_type = builtin_options_and_type.first; } else { diff --git a/tensorflow/lite/experimental/writer/writer_lib_test.cc b/tensorflow/lite/experimental/writer/writer_lib_test.cc index 4cab27ecb2d..41cca88ead7 100644 --- a/tensorflow/lite/experimental/writer/writer_lib_test.cc +++ b/tensorflow/lite/experimental/writer/writer_lib_test.cc @@ -15,8 +15,6 @@ limitations under the License. #include "tensorflow/lite/experimental/writer/writer_lib.h" -#include - #include #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/interpreter.h" @@ -186,79 +184,6 @@ TEST(Writer, PerTensorQuantizedModelTest) { CHECK_EQ(new_interpreter->AllocateTensors(), kTfLiteOk); } -struct ReshapeTestPattern { - int num_inputs; - bool is_param_valid; -}; - -class ReshapeLayerTest : public ::testing::TestWithParam {}; - -TEST_P(ReshapeLayerTest, ReshapeLayerTest) { - const auto param = GetParam(); - Interpreter interpreter; - const int total_tensors = param.num_inputs + 1; - interpreter.AddTensors(total_tensors); - int output_shape[] = {1, 2, 3}; - interpreter.SetTensorParametersReadWrite(/*tensor_index=*/0, kTfLiteFloat32, - /*name=*/"a", /*dims=*/{6}, - TfLiteQuantization()); - ASSERT_LE(param.num_inputs, 2); - if (param.num_inputs == 2) { - interpreter.SetTensorParametersReadOnly( - /*tensor_index=*/1, kTfLiteInt32, /*name=*/"b", /*dims=*/{3}, - TfLiteQuantization(), reinterpret_cast(output_shape), - sizeof(output_shape)); - } - interpreter.SetTensorParametersReadWrite(/*tensor_index=*/total_tensors - 1, - kTfLiteFloat32, /*name=*/"c", - /*dims=*/{3}, TfLiteQuantization()); - - std::vector input_tensors(param.num_inputs); - std::iota(input_tensors.begin(), input_tensors.end(), 0); - - interpreter.SetInputs(input_tensors); - interpreter.SetOutputs({total_tensors - 1}); - const char* initial_data = ""; - tflite::ops::builtin::BuiltinOpResolver resolver; - TfLiteReshapeParams* builtin_data = reinterpret_cast( - malloc(sizeof(TfLiteReshapeParams))); - if (param.is_param_valid) { - builtin_data->num_dimensions = 3; - for (int dim = 0; dim < builtin_data->num_dimensions; ++dim) { - builtin_data->shape[dim] = output_shape[dim]; - } - } - const TfLiteRegistration* reg = resolver.FindOp(BuiltinOperator_RESHAPE, 1); - interpreter.AddNodeWithParameters(input_tensors, - /*outputs=*/{total_tensors - 1}, - initial_data, /*init_data_size=*/0, - reinterpret_cast(builtin_data), reg); - - SubgraphWriter writer(&interpreter.primary_subgraph()); - std::string filename = absl::StrCat("/tmp/test_reshape_", param.num_inputs, - "_", param.is_param_valid, ".tflite"); - writer.Write(filename); - std::unique_ptr model = - FlatBufferModel::BuildFromFile(filename.c_str()); - InterpreterBuilder builder(*model, resolver); - std::unique_ptr new_interpreter; - builder(&new_interpreter); - ASSERT_EQ(new_interpreter->AllocateTensors(), kTfLiteOk); -} - -INSTANTIATE_TEST_SUITE_P( - Writer, ReshapeLayerTest, - ::testing::Values(ReshapeTestPattern{/*num_inputs=*/2, - /*is_param_valid=*/true}, - ReshapeTestPattern{/*num_inputs=*/2, - /*is_param_valid=*/false}, - ReshapeTestPattern{/*num_inputs=*/1, - /*is_param_valid=*/true}), - [](const ::testing::TestParamInfo& info) { - std::string name = absl::StrCat("num_inputs_", info.param.num_inputs, - "_isvalid_", info.param.is_param_valid); - return name; - }); } // namespace tflite int main(int argc, char** argv) { From aa633f3d0977b05ae17a4576b2b78295e0f4748c Mon Sep 17 00:00:00 2001 From: Brian Zhao Date: Thu, 16 Jul 2020 11:03:24 -0700 Subject: [PATCH 0608/2522] Add a test to verify that the TF Lite C API headers can build, link, and run successfully when compiled as C code. PiperOrigin-RevId: 321603053 Change-Id: If866522197ff1c828591f9bb3342e06f0cf9fbda --- tensorflow/lite/c/BUILD | 15 --- tensorflow/lite/c/c_test.c | 143 ---------------------------- tensorflow/lite/tools/make/Makefile | 1 - 3 files changed, 159 deletions(-) delete mode 100644 tensorflow/lite/c/c_test.c diff --git a/tensorflow/lite/c/BUILD b/tensorflow/lite/c/BUILD index 366b43336b9..1aa043b7c0c 100644 --- a/tensorflow/lite/c/BUILD +++ b/tensorflow/lite/c/BUILD @@ -158,18 +158,3 @@ cc_test( "@com_google_googletest//:gtest", ], ) - -cc_test( - name = "c_test", - size = "small", - srcs = ["c_test.c"], - copts = tflite_copts(), - data = [ - "//tensorflow/lite:testdata/add.bin", - ], - deps = [ - ":c_api", - ":c_api_experimental", - ":common", - ], -) diff --git a/tensorflow/lite/c/c_test.c b/tensorflow/lite/c/c_test.c deleted file mode 100644 index 1c550b9a195..00000000000 --- a/tensorflow/lite/c/c_test.c +++ /dev/null @@ -1,143 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/c/c_api.h" -#include "tensorflow/lite/c/c_api_experimental.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/c/builtin_op_data.h" - -// This file exists just to verify that the above header files above can build, -// link, and run as "C" code. - -#ifdef __cplusplus -#error "This file should be compiled as C code, not as C++." -#endif - -#include -#include -#include - -static void CheckFailed(const char *expression, const char *filename, - int line_number) { - fprintf(stderr, "ERROR: CHECK failed: %s:%d: %s\n", filename, line_number, - expression); - fflush(stderr); - abort(); -} - -// We use an extra level of macro indirection here to ensure that the -// macro arguments get evaluated, so that in a call to CHECK(foo), -// the call to STRINGIZE(condition) in the definition of the CHECK -// macro results in the string "foo" rather than the string "condition". -#define STRINGIZE(expression) STRINGIZE2(expression) -#define STRINGIZE2(expression) #expression - -// Like assert(), but not dependent on NDEBUG. -#define CHECK(condition) \ - ((condition) ? (void)0 \ - : CheckFailed(STRINGIZE(condition), __FILE__, __LINE__)) -#define ASSERT_EQ(expected, actual) CHECK((expected) == (actual)) -#define ASSERT_NE(expected, actual) CHECK((expected) != (actual)) -#define ASSERT_STREQ(expected, actual) \ - ASSERT_EQ(0, strcmp((expected), (actual))) - -// Test the TfLiteVersion function. -static void TestVersion(void) { - const char *version = TfLiteVersion(); - printf("Version = %s\n", version); - CHECK(version[0] != '\0'); -} - -static void TestSmokeTest(void) { - TfLiteModel* model = - TfLiteModelCreateFromFile("third_party/tensorflow/lite/testdata/add.bin"); - ASSERT_NE(model, NULL); - - TfLiteInterpreterOptions* options = TfLiteInterpreterOptionsCreate(); - ASSERT_NE(options, NULL); - TfLiteInterpreterOptionsSetNumThreads(options, 2); - - TfLiteInterpreter* interpreter = TfLiteInterpreterCreate(model, options); - ASSERT_NE(interpreter, NULL); - - // The options/model can be deleted immediately after interpreter creation. - TfLiteInterpreterOptionsDelete(options); - TfLiteModelDelete(model); - - ASSERT_EQ(TfLiteInterpreterAllocateTensors(interpreter), kTfLiteOk); - ASSERT_EQ(TfLiteInterpreterGetInputTensorCount(interpreter), 1); - ASSERT_EQ(TfLiteInterpreterGetOutputTensorCount(interpreter), 1); - - int input_dims[1] = {2}; - ASSERT_EQ(TfLiteInterpreterResizeInputTensor( - interpreter, 0, input_dims, 1), - kTfLiteOk); - ASSERT_EQ(TfLiteInterpreterAllocateTensors(interpreter), kTfLiteOk); - - TfLiteTensor* input_tensor = TfLiteInterpreterGetInputTensor(interpreter, 0); - ASSERT_NE(input_tensor, NULL); - ASSERT_EQ(TfLiteTensorType(input_tensor), kTfLiteFloat32); - ASSERT_EQ(TfLiteTensorNumDims(input_tensor), 1); - ASSERT_EQ(TfLiteTensorDim(input_tensor, 0), 2); - ASSERT_EQ(TfLiteTensorByteSize(input_tensor), sizeof(float) * 2); - ASSERT_NE(TfLiteTensorData(input_tensor), NULL); - ASSERT_STREQ(TfLiteTensorName(input_tensor), "input"); - - TfLiteQuantizationParams input_params = - TfLiteTensorQuantizationParams(input_tensor); - ASSERT_EQ(input_params.scale, 0.f); - ASSERT_EQ(input_params.zero_point, 0); - - float input[2] = {1.f, 3.f}; - ASSERT_EQ(TfLiteTensorCopyFromBuffer(input_tensor, input, - 2 * sizeof(float)), - kTfLiteOk); - - ASSERT_EQ(TfLiteInterpreterInvoke(interpreter), kTfLiteOk); - - const TfLiteTensor* output_tensor = - TfLiteInterpreterGetOutputTensor(interpreter, 0); - ASSERT_NE(output_tensor, NULL); - ASSERT_EQ(TfLiteTensorType(output_tensor), kTfLiteFloat32); - ASSERT_EQ(TfLiteTensorNumDims(output_tensor), 1); - ASSERT_EQ(TfLiteTensorDim(output_tensor, 0), 2); - ASSERT_EQ(TfLiteTensorByteSize(output_tensor), sizeof(float) * 2); - ASSERT_NE(TfLiteTensorData(output_tensor), NULL); - ASSERT_STREQ(TfLiteTensorName(output_tensor), "output"); - - TfLiteQuantizationParams output_params = - TfLiteTensorQuantizationParams(output_tensor); - ASSERT_EQ(output_params.scale, 0.f); - ASSERT_EQ(output_params.zero_point, 0); - - float output[2]; - ASSERT_EQ(TfLiteTensorCopyToBuffer(output_tensor, output, - 2 * sizeof(float)), - kTfLiteOk); - ASSERT_EQ(output[0], 3.f); - ASSERT_EQ(output[1], 9.f); - - TfLiteInterpreterDelete(interpreter); -} - -static void RunTests(void) { - TestVersion(); - TestSmokeTest(); -} - -int main(void) { - RunTests(); - return 0; -} diff --git a/tensorflow/lite/tools/make/Makefile b/tensorflow/lite/tools/make/Makefile index cb1714161c4..f8b67fbbe7d 100644 --- a/tensorflow/lite/tools/make/Makefile +++ b/tensorflow/lite/tools/make/Makefile @@ -148,7 +148,6 @@ endif CORE_CC_ALL_SRCS := $(sort $(CORE_CC_ALL_SRCS)) CORE_CC_EXCLUDE_SRCS := \ $(wildcard tensorflow/lite/*test.cc) \ -$(wildcard tensorflow/lite/*/*test.c) \ $(wildcard tensorflow/lite/*/*test.cc) \ $(wildcard tensorflow/lite/*/*/benchmark.cc) \ $(wildcard tensorflow/lite/*/*/example*.cc) \ From 975812256fe2e502a882cd9e168bb9bd0197d447 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Thu, 16 Jul 2020 11:12:04 -0700 Subject: [PATCH 0609/2522] Workaround compiler flags for api migration --- tensorflow/core/platform/file_system.h | 66 +++++++++++++------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/tensorflow/core/platform/file_system.h b/tensorflow/core/platform/file_system.h index 7ebacc2652b..c29d67e3bc2 100644 --- a/tensorflow/core/platform/file_system.h +++ b/tensorflow/core/platform/file_system.h @@ -364,110 +364,110 @@ class FileSystem { class WrappedFileSystem : public FileSystem { public: virtual tensorflow::Status NewRandomAccessFile( - const string& fname, std::unique_ptr* result, - TransactionToken* token = nullptr) /* override */ { + const string& fname, std::unique_ptr* result + /*, TransactionToken* token = nullptr */) /* override */ { return fs_->NewRandomAccessFile(fname, result /* , (token ? token : token_) */); } virtual tensorflow::Status NewWritableFile( - const string& fname, std::unique_ptr* result, - TransactionToken* token = nullptr) /* override */ { + const string& fname, std::unique_ptr* result + /*, TransactionToken* token = nullptr */) /* override */ { return fs_->NewWritableFile(fname, result /* , (token ? token : token_) */); } virtual tensorflow::Status NewAppendableFile( - const string& fname, std::unique_ptr* result, - TransactionToken* token = nullptr) /* override */ { + const string& fname, std::unique_ptr* result + /*, TransactionToken* token = nullptr */) /* override */ { return fs_->NewAppendableFile(fname, result /* , (token ? token : token_) */); } virtual tensorflow::Status NewReadOnlyMemoryRegionFromFile( - const string& fname, std::unique_ptr* result, - TransactionToken* token = nullptr) /* override */ { + const string& fname, std::unique_ptr* result + /*, TransactionToken* token = nullptr */) /* override */ { return fs_->NewReadOnlyMemoryRegionFromFile( fname, result /* , (token ? token : token_) */); } virtual tensorflow::Status FileExists( - const string& fname, TransactionToken* token = nullptr) /* override */ { + const string& fname/*, TransactionToken* token = nullptr */) /* override */ { return fs_->FileExists(fname /* , (token ? token : token_) */); } virtual bool FilesExist(const std::vector& files, - std::vector* status, - TransactionToken* token = nullptr) /* override */ { + std::vector* status + /*, TransactionToken* token = nullptr */) /* override */ { return fs_->FilesExist(files, status /* , (token ? token : token_) */); } virtual tensorflow::Status GetChildren( - const string& dir, std::vector* result, - TransactionToken* token = nullptr) /* override */ { + const string& dir, std::vector* result + /*, TransactionToken* token = nullptr */) /* override */ { return fs_->GetChildren(dir, result /* , (token ? token : token_) */); } virtual tensorflow::Status GetMatchingPaths( - const string& pattern, std::vector* results, - TransactionToken* token = nullptr) /* override */ { + const string& pattern, std::vector* results + /*, TransactionToken* token = nullptr */) /* override */ { return fs_->GetMatchingPaths(pattern, results /* , (token ? token : token_) */); } - virtual bool Match(const std::string& filename, const std::string& pattern, - TransactionToken* token = nullptr) /* override */ { + virtual bool Match(const std::string& filename, const std::string& pattern + /*, TransactionToken* token = nullptr */) /* override */ { return fs_->Match(filename, pattern /* , (token ? token : token_) */); } virtual tensorflow::Status Stat( - const string& fname, FileStatistics* stat, - TransactionToken* token = nullptr) /* override */ { + const string& fname, FileStatistics* stat + /*, TransactionToken* token = nullptr */) /* override */ { return fs_->Stat(fname, stat /* , (token ? token : token_) */); } virtual tensorflow::Status DeleteFile( - const string& fname, TransactionToken* token = nullptr) /* override */ { + const string& fname/*, TransactionToken* token = nullptr */) /* override */ { return fs_->DeleteFile(fname /* , (token ? token : token_) */); } virtual tensorflow::Status CreateDir( - const string& dirname, TransactionToken* token = nullptr) /* override */ { + const string& dirname/*, TransactionToken* token = nullptr */) /* override */ { return fs_->CreateDir(dirname /* , (token ? token : token_) */); } virtual tensorflow::Status RecursivelyCreateDir( - const string& dirname, TransactionToken* token = nullptr) /* override */ { + const string& dirname/*, TransactionToken* token = nullptr */) /* override */ { return fs_->RecursivelyCreateDir(dirname /* , (token ? token : token_) */); } virtual tensorflow::Status DeleteDir( - const string& dirname, TransactionToken* token = nullptr) /* override */ { + const string& dirname/*, TransactionToken* token = nullptr */) /* override */ { return fs_->DeleteDir(dirname /* , (token ? token : token_) */); } virtual tensorflow::Status DeleteRecursively( - const string& dirname, int64* undeleted_files, int64* undeleted_dirs, - TransactionToken* token = nullptr) /* override */ { + const string& dirname, int64* undeleted_files, int64* undeleted_dirs + /*, TransactionToken* token = nullptr */) /* override */ { return fs_->DeleteRecursively( dirname, undeleted_files, undeleted_dirs /*, (token ? token : token_) */); } virtual tensorflow::Status GetFileSize( - const string& fname, uint64* file_size, - TransactionToken* token = nullptr) /* override */ { + const string& fname, uint64* file_size + /*, TransactionToken* token = nullptr */) /* override */ { return fs_->GetFileSize(fname, file_size /* , (token ? token : token_) */); } virtual tensorflow::Status RenameFile( - const string& src, const string& target, - TransactionToken* token = nullptr) /* override */ { + const string& src, const string& target + /*, TransactionToken* token = nullptr */) /* override */ { return fs_->RenameFile(src, target /* , (token ? token : token_) */); } virtual tensorflow::Status CopyFile( - const string& src, const string& target, - TransactionToken* token = nullptr) /* override */ { + const string& src, const string& target + /*, TransactionToken* token = nullptr */) /* override */ { return fs_->CopyFile(src, target /* , (token ? token : token_) */); } @@ -477,7 +477,7 @@ class WrappedFileSystem : public FileSystem { } virtual tensorflow::Status IsDirectory( - const string& fname, TransactionToken* token = nullptr) /* override */ { + const string& fname/*, TransactionToken* token = nullptr */) /* override */ { return fs_->IsDirectory(fname /* , (token ? token : token_) */); } @@ -486,7 +486,7 @@ class WrappedFileSystem : public FileSystem { return fs_->HasAtomicMove(path, has_atomic_move); } - virtual void FlushCaches(TransactionToken* token = nullptr) /* override */ { + virtual void FlushCaches(/*TransactionToken* token = nullptr */) /* override */ { return fs_->FlushCaches(/* (token ? token : token_) */); } From 0d168d55aa5f22c3c29c400cab83f757b35194c3 Mon Sep 17 00:00:00 2001 From: Bruce Fontaine Date: Thu, 16 Jul 2020 11:08:04 -0700 Subject: [PATCH 0610/2522] Allow slot variable creation function to take the slot initializers. PiperOrigin-RevId: 321604129 Change-Id: I4775102f2bacbe9e0bcbd41b29f2419e91af7e73 --- tensorflow/python/tpu/tpu_embedding_v2.py | 2 +- tensorflow/python/tpu/tpu_embedding_v2_test.py | 4 ++-- tensorflow/python/tpu/tpu_embedding_v2_utils.py | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/tpu/tpu_embedding_v2.py b/tensorflow/python/tpu/tpu_embedding_v2.py index eea2dea53c2..8e23812706b 100644 --- a/tensorflow/python/tpu/tpu_embedding_v2.py +++ b/tensorflow/python/tpu/tpu_embedding_v2.py @@ -1230,7 +1230,7 @@ def _ragged_embedding_lookup_with_reduce(table, ragged, weights, combiner): A Tensor. """ if weights is None: - weights = array_ops.ones_like(ragged) + weights = array_ops.ones_like(ragged, dtype=table.dtype) weights = array_ops.expand_dims(weights, axis=2) ragged_result = embedding_ops.embedding_lookup_ragged(table, ragged) ragged_result = math_ops.reduce_sum(ragged_result * weights, axis=1) diff --git a/tensorflow/python/tpu/tpu_embedding_v2_test.py b/tensorflow/python/tpu/tpu_embedding_v2_test.py index 21c10e8dc6a..c9f9b90ebd0 100644 --- a/tensorflow/python/tpu/tpu_embedding_v2_test.py +++ b/tensorflow/python/tpu/tpu_embedding_v2_test.py @@ -1148,7 +1148,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): @parameterized.parameters([True, False]) def test_optimizer_with_slot_creation_fn(self, use_tpu): - def slot_creation_fn(table, slot_names): + def slot_creation_fn(table, slot_names, _): slots = {} for slot in slot_names: slots[slot] = tf_variables.Variable( @@ -1188,7 +1188,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): self.table_user.dim))) def test_optimizer_with_slot_creation_fn_non_partial(self): - def slot_creation_fn(table, slot_names): + def slot_creation_fn(table, slot_names, _): slots = {} for slot in slot_names: # Note that we don't pass functools.partial here, so on TPU we can't diff --git a/tensorflow/python/tpu/tpu_embedding_v2_utils.py b/tensorflow/python/tpu/tpu_embedding_v2_utils.py index 9d7de203889..86f85392681 100644 --- a/tensorflow/python/tpu/tpu_embedding_v2_utils.py +++ b/tensorflow/python/tpu/tpu_embedding_v2_utils.py @@ -111,7 +111,8 @@ class _Optimizer(object): A dict of variables, keyed by self._slot_names(). """ if self.slot_variable_creation_fn is not None: - return self.slot_variable_creation_fn(table, self._slot_names()) + return self.slot_variable_creation_fn(table, self._slot_names(), + self._slot_initializers()) else: slots = {} for slot, initializer in zip(self._slot_names(), From 4a15bd519b678728268c066469dbb0e5241420af Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Thu, 16 Jul 2020 11:09:03 -0700 Subject: [PATCH 0611/2522] Update kernel_util.cc to use the new GetTensor() API. With upcoming changes to TFLM for reducing runtime RAM, tensor data can be accessed via the new function pointer recently added to TfLiteContext. This new API enables runtimes to manage tensor overhead based on requirements for the platform. This change simply points existing API calls used by TFL and TFLM kernels to get TfLiteTensor structs to the new function pointer if it exists. PiperOrigin-RevId: 321604375 Change-Id: I50f580f2d01b6273b21a07c5c3b95f245e5717b9 --- tensorflow/lite/c/common.h | 11 ++++---- tensorflow/lite/core/subgraph.cc | 3 +++ .../delegates/coreml/builders/util_test.cc | 2 +- tensorflow/lite/kernels/kernel_util.cc | 27 +++++++++++++++---- tensorflow/lite/micro/micro_interpreter.cc | 7 +++++ tensorflow/lite/micro/micro_interpreter.h | 3 +++ .../micro/recording_micro_allocator_test.cc | 2 ++ tensorflow/lite/micro/testing/test_utils.cc | 8 ++++++ .../benchmark/experimental/c/c_api_types.h | 11 ++++---- 9 files changed, 58 insertions(+), 16 deletions(-) diff --git a/tensorflow/lite/c/common.h b/tensorflow/lite/c/common.h index cd6eeec4da2..89b25892914 100644 --- a/tensorflow/lite/c/common.h +++ b/tensorflow/lite/c/common.h @@ -761,16 +761,17 @@ typedef struct TfLiteContext { struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace, TfLiteDelegateParams** partition_params_array, int* num_partitions); - // Returns a TfLiteTensor struct for a given index in the subgraph. + // Returns a TfLiteTensor struct for a given index. // WARNING: This is an experimental interface that is subject to change. // WARNING: This method may not be available on all platforms. - TfLiteTensor* (*GetTensor)(struct TfLiteContext* context, int subgraph_idx); + TfLiteTensor* (*GetTensor)(const struct TfLiteContext* context, + int tensor_idx); - // Returns a TfLiteEvalTensor struct for a given index in the subgraph. + // Returns a TfLiteEvalTensor struct for a given index. // WARNING: This is an experimental interface that is subject to change. // WARNING: This method may not be available on all platforms. - TfLiteEvalTensor* (*GetEvalTensor)(struct TfLiteContext* context, - int subgraph_idx); + TfLiteEvalTensor* (*GetEvalTensor)(const struct TfLiteContext* context, + int tensor_idx); } TfLiteContext; typedef struct TfLiteRegistration { diff --git a/tensorflow/lite/core/subgraph.cc b/tensorflow/lite/core/subgraph.cc index 5ef9b45514b..b087ae1901c 100644 --- a/tensorflow/lite/core/subgraph.cc +++ b/tensorflow/lite/core/subgraph.cc @@ -189,6 +189,7 @@ Subgraph::Subgraph(ErrorReporter* error_reporter, next_execution_plan_index_to_plan_allocation_(0), subgraphs_(subgraphs), resources_(resources) { + // TODO(b/161272052): Consider a better TfLiteContext initialization pattern: context_.impl_ = static_cast(this); context_.ResizeTensor = ResizeTensor; context_.ReportError = ReportErrorC; @@ -200,6 +201,8 @@ Subgraph::Subgraph(ErrorReporter* error_reporter, context_.GetExternalContext = GetExternalContext; context_.SetExternalContext = SetExternalContext; context_.profiler = nullptr; + context_.GetTensor = nullptr; + context_.GetEvalTensor = nullptr; // Reserve some space for the tensors to avoid excessive resizing. tensors_.reserve(kTensorsReservedCapacity); diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/util_test.cc b/tensorflow/lite/experimental/delegates/coreml/builders/util_test.cc index 929bc4a2282..8ba8a9bb5bc 100644 --- a/tensorflow/lite/experimental/delegates/coreml/builders/util_test.cc +++ b/tensorflow/lite/experimental/delegates/coreml/builders/util_test.cc @@ -62,7 +62,7 @@ class IsBinaryOpSupportedTest : public testing::Test { } } - TfLiteContext context_; + TfLiteContext context_ = {}; TfLiteNode node_; std::vector tensors_; }; diff --git a/tensorflow/lite/kernels/kernel_util.cc b/tensorflow/lite/kernels/kernel_util.cc index 11d8cb67dd2..74c8c88d953 100644 --- a/tensorflow/lite/kernels/kernel_util.cc +++ b/tensorflow/lite/kernels/kernel_util.cc @@ -30,18 +30,31 @@ namespace tflite { const TfLiteTensor* GetInput(const TfLiteContext* context, const TfLiteNode* node, int index) { - return &context->tensors[node->inputs->data[index]]; + if (context->tensors != nullptr) { + return &context->tensors[node->inputs->data[index]]; + } else { + return context->GetTensor(context, node->inputs->data[index]); + } } TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node, int index) { - TfLiteTensor* tensor = &context->tensors[node->inputs->data[index]]; - return (tensor->is_variable) ? tensor : nullptr; + TfLiteTensor* tensor = nullptr; + if (context->tensors != nullptr) { + tensor = &context->tensors[node->inputs->data[index]]; + } else { + tensor = context->GetTensor(context, node->inputs->data[index]); + } + return tensor->is_variable ? tensor : nullptr; } TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node, int index) { - return &context->tensors[node->outputs->data[index]]; + if (context->tensors != nullptr) { + return &context->tensors[node->outputs->data[index]]; + } else { + return context->GetTensor(context, node->outputs->data[index]); + } } const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context, @@ -49,7 +62,11 @@ const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context, const bool use_tensor = index < node->inputs->size && node->inputs->data[index] != kTfLiteOptionalTensor; if (use_tensor) { - return &context->tensors[node->inputs->data[index]]; + if (context->tensors != nullptr) { + return &context->tensors[node->inputs->data[index]]; + } else { + return context->GetTensor(context, node->inputs->data[index]); + } } return nullptr; } diff --git a/tensorflow/lite/micro/micro_interpreter.cc b/tensorflow/lite/micro/micro_interpreter.cc index c16ede174aa..a47dba83c6b 100644 --- a/tensorflow/lite/micro/micro_interpreter.cc +++ b/tensorflow/lite/micro/micro_interpreter.cc @@ -70,6 +70,12 @@ void ContextHelper::ReportOpError(struct TfLiteContext* context, va_end(args); } +TfLiteTensor* ContextHelper::GetTensor(const struct TfLiteContext* context, + int tensor_idx) { + // TODO(b/160894903): Return this value from temp allocated memory. + return &context->tensors[tensor_idx]; +} + } // namespace internal MicroInterpreter::MicroInterpreter(const Model* model, @@ -132,6 +138,7 @@ void MicroInterpreter::Init(tflite::Profiler* profiler) { context_.impl_ = static_cast(&context_helper_); context_.ReportError = context_helper_.ReportOpError; + context_.GetTensor = context_helper_.GetTensor; context_.recommended_num_threads = 1; context_.profiler = profiler; diff --git a/tensorflow/lite/micro/micro_interpreter.h b/tensorflow/lite/micro/micro_interpreter.h index 29377e3b940..6e9e5eca572 100644 --- a/tensorflow/lite/micro/micro_interpreter.h +++ b/tensorflow/lite/micro/micro_interpreter.h @@ -53,6 +53,9 @@ class ContextHelper { static void ReportOpError(struct TfLiteContext* context, const char* format, ...); + static TfLiteTensor* GetTensor(const struct TfLiteContext* context, + int tensor_idx); + void SetNodeIndex(int idx) { current_node_idx_ = idx; } private: diff --git a/tensorflow/lite/micro/recording_micro_allocator_test.cc b/tensorflow/lite/micro/recording_micro_allocator_test.cc index 8b8eaa20638..a2aa49f2246 100644 --- a/tensorflow/lite/micro/recording_micro_allocator_test.cc +++ b/tensorflow/lite/micro/recording_micro_allocator_test.cc @@ -58,6 +58,8 @@ TF_LITE_MICRO_TEST(TestRecordsTfLiteTensorArrayData) { TF_LITE_MICRO_EXPECT_EQ(status, kTfLiteOk); if (status != kTfLiteOk) return 1; + micro_allocator->PrintAllocations(); + tflite::RecordedAllocation recorded_allocation = micro_allocator->GetRecordedAllocation( tflite::RecordedAllocationType::kTfLiteTensorArray); diff --git a/tensorflow/lite/micro/testing/test_utils.cc b/tensorflow/lite/micro/testing/test_utils.cc index 4471b2e2929..8860d66efa9 100644 --- a/tensorflow/lite/micro/testing/test_utils.cc +++ b/tensorflow/lite/micro/testing/test_utils.cc @@ -87,6 +87,11 @@ void* GetScratchBuffer(TfLiteContext* context, int buffer_index) { return scratch_buffers_[buffer_index]; } +TfLiteTensor* GetTensor(const struct TfLiteContext* context, int subgraph_idx) { + // TODO(b/160894903): Return this value from temp allocated memory. + return &context->tensors[subgraph_idx]; +} + } // namespace uint8_t F2Q(float value, float min, float max) { @@ -137,6 +142,9 @@ void PopulateContext(TfLiteTensor* tensors, int tensors_size, context->GetExternalContext = nullptr; context->SetExternalContext = nullptr; + context->GetTensor = GetTensor; + context->GetEvalTensor = nullptr; + context->AllocatePersistentBuffer = AllocatePersistentBuffer; context->RequestScratchBufferInArena = RequestScratchBufferInArena; context->GetScratchBuffer = GetScratchBuffer; diff --git a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h index cd6eeec4da2..89b25892914 100644 --- a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h +++ b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h @@ -761,16 +761,17 @@ typedef struct TfLiteContext { struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace, TfLiteDelegateParams** partition_params_array, int* num_partitions); - // Returns a TfLiteTensor struct for a given index in the subgraph. + // Returns a TfLiteTensor struct for a given index. // WARNING: This is an experimental interface that is subject to change. // WARNING: This method may not be available on all platforms. - TfLiteTensor* (*GetTensor)(struct TfLiteContext* context, int subgraph_idx); + TfLiteTensor* (*GetTensor)(const struct TfLiteContext* context, + int tensor_idx); - // Returns a TfLiteEvalTensor struct for a given index in the subgraph. + // Returns a TfLiteEvalTensor struct for a given index. // WARNING: This is an experimental interface that is subject to change. // WARNING: This method may not be available on all platforms. - TfLiteEvalTensor* (*GetEvalTensor)(struct TfLiteContext* context, - int subgraph_idx); + TfLiteEvalTensor* (*GetEvalTensor)(const struct TfLiteContext* context, + int tensor_idx); } TfLiteContext; typedef struct TfLiteRegistration { From 45ad1b64ee1cb3804b58e0cf58ab07c8e7f8b46f Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Thu, 16 Jul 2020 11:21:21 -0700 Subject: [PATCH 0612/2522] Going back to forcing embedding layer variables on the CPU even within a tf.function as this is breaking some user code. PiperOrigin-RevId: 321607029 Change-Id: Id159867f51b26e6604a1186d9ce526658ddd1e19 --- tensorflow/python/keras/layers/embeddings.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/python/keras/layers/embeddings.py b/tensorflow/python/keras/layers/embeddings.py index 3444b3a7665..defa03409a2 100644 --- a/tensorflow/python/keras/layers/embeddings.py +++ b/tensorflow/python/keras/layers/embeddings.py @@ -132,8 +132,7 @@ class Embedding(Layer): # right now. Checking for the presence of GPUs to avoid complicating the # TPU codepaths which can handle sparse optimizers. But if we are within # a tf.function, we go back the graph mode logic and rely on the placer. - if (context.executing_eagerly() and context.context().num_gpus() and - not ops.inside_function()): + if context.executing_eagerly() and context.context().num_gpus(): with ops.device('cpu:0'): self.embeddings = self.add_weight( shape=(self.input_dim, self.output_dim), From 8483d09a207da63d00e03383420c5798f6e68da4 Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Thu, 16 Jul 2020 11:24:31 -0700 Subject: [PATCH 0613/2522] Consistent copts for test and non-test targets. PiperOrigin-RevId: 321607756 Change-Id: Idc3219fb81f395a1b793b8311ec8adc827d45778 --- .../lite/experimental/microfrontend/lib/BUILD | 18 ++ .../microfrontend/lib/frontend_test.cc | 2 +- .../examples/hello_world/hello_world_test.cc | 19 +- .../hello_world/output_handler_test.cc | 3 +- .../image_recognition_test.cc | 8 +- .../examples/magic_wand/magic_wand_test.cc | 16 +- .../magic_wand/output_handler_test.cc | 9 +- .../micro/examples/micro_speech/Makefile.inc | 8 + .../micro_speech/audio_provider_mock_test.cc | 18 +- .../micro_speech/audio_provider_test.cc | 3 +- .../micro_speech/command_responder_test.cc | 3 +- .../feature_provider_mock_test.cc | 8 +- .../micro_speech/feature_provider_test.cc | 3 +- .../micro_features_generator.cc | 2 +- .../micro_features_generator_test.cc | 25 +-- .../micro_speech/micro_speech_test.cc | 16 +- .../micro_speech/recognize_commands_test.cc | 21 +- .../simple_features_generator.cc | 2 +- .../simple_features_generator_test.cc | 17 +- .../network_tester/expected_output_data.h | 1 - .../network_tester/network_tester_test.cc | 21 +- .../detection_responder_test.cc | 5 +- .../person_detection/image_provider_test.cc | 5 +- .../person_detection/person_detection_test.cc | 22 +- .../detection_responder_test.cc | 5 +- .../image_provider_test.cc | 5 +- .../person_detection_test.cc | 20 +- tensorflow/lite/micro/kernels/add_test.cc | 8 - .../lite/micro/kernels/comparisons_test.cc | 30 --- tensorflow/lite/micro/kernels/conv_test.cc | 36 ++-- .../lite/micro/kernels/depthwise_conv_test.cc | 57 ++---- .../lite/micro/kernels/dequantize_test.cc | 3 +- .../micro/kernels/fully_connected_test.cc | 9 +- .../lite/micro/kernels/hard_swish_test.cc | 11 - .../micro/kernels/maximum_minimum_test.cc | 2 +- tensorflow/lite/micro/kernels/pooling_test.cc | 40 ++-- tensorflow/lite/micro/kernels/prelu_test.cc | 4 - .../micro/kernels/quantization_util_test.cc | 191 ++++++++++-------- tensorflow/lite/micro/kernels/reduce_test.cc | 10 +- tensorflow/lite/micro/kernels/reshape_test.cc | 12 +- .../lite/micro/kernels/strided_slice_test.cc | 3 +- tensorflow/lite/micro/kernels/sub_test.cc | 8 - tensorflow/lite/micro/kernels/svdf_test.cc | 19 +- tensorflow/lite/micro/kernels/tanh_test.cc | 4 +- tensorflow/lite/micro/memory_helpers_test.cc | 56 ++--- .../greedy_memory_planner_test.cc | 186 ++++++++--------- .../linear_memory_planner_test.cc | 135 ++++++------- tensorflow/lite/micro/micro_allocator_test.cc | 55 +++-- .../lite/micro/micro_interpreter_test.cc | 70 ++++--- .../micro/micro_mutable_op_resolver_test.cc | 25 ++- tensorflow/lite/micro/micro_string_test.cc | 33 ++- .../recording_simple_memory_allocator_test.cc | 72 ++++--- .../micro/simple_memory_allocator_test.cc | 29 +-- tensorflow/lite/micro/testing/micro_test.bzl | 7 +- tensorflow/lite/micro/testing/micro_test.h | 28 +-- tensorflow/lite/micro/testing/test_utils.h | 5 +- tensorflow/lite/micro/testing/util_test.cc | 8 +- tensorflow/lite/micro/testing_helpers_test.cc | 6 +- 58 files changed, 722 insertions(+), 725 deletions(-) diff --git a/tensorflow/lite/experimental/microfrontend/lib/BUILD b/tensorflow/lite/experimental/microfrontend/lib/BUILD index 18bfdb24a84..57f8055e9df 100644 --- a/tensorflow/lite/experimental/microfrontend/lib/BUILD +++ b/tensorflow/lite/experimental/microfrontend/lib/BUILD @@ -135,6 +135,9 @@ tflite_micro_cc_test( tflite_micro_cc_test( name = "filterbank_test", srcs = ["filterbank_test.cc"], + # Setting copts for experimental code to [], but this code should be fixed + # to build with the default copts (micro_copts()) + copts = [], deps = [ ":filterbank", "//tensorflow/lite/micro/testing:micro_test", @@ -144,6 +147,9 @@ tflite_micro_cc_test( tflite_micro_cc_test( name = "frontend_test", srcs = ["frontend_test.cc"], + # Setting copts for experimental code to [], but this code should be fixed + # to build with the default copts (micro_copts()) + copts = [], deps = [ ":frontend", "//tensorflow/lite/micro/testing:micro_test", @@ -153,6 +159,9 @@ tflite_micro_cc_test( tflite_micro_cc_test( name = "log_scale_test", srcs = ["log_scale_test.cc"], + # Setting copts for experimental code to [], but this code should be fixed + # to build with the default copts (micro_copts()) + copts = [], deps = [ ":log_scale", "//tensorflow/lite/micro/testing:micro_test", @@ -162,6 +171,9 @@ tflite_micro_cc_test( tflite_micro_cc_test( name = "noise_reduction_test", srcs = ["noise_reduction_test.cc"], + # Setting copts for experimental code to [], but this code should be fixed + # to build with the default copts (micro_copts()) + copts = [], deps = [ ":noise_reduction", "//tensorflow/lite/micro/testing:micro_test", @@ -171,6 +183,9 @@ tflite_micro_cc_test( tflite_micro_cc_test( name = "pcan_gain_control_test", srcs = ["pcan_gain_control_test.cc"], + # Setting copts for experimental code to [], but this code should be fixed + # to build with the default copts (micro_copts()) + copts = [], deps = [ ":pcan_gain_control", "//tensorflow/lite/micro/testing:micro_test", @@ -180,6 +195,9 @@ tflite_micro_cc_test( tflite_micro_cc_test( name = "window_test", srcs = ["window_test.cc"], + # Setting copts for experimental code to [], but this code should be fixed + # to build with the default copts (micro_copts()) + copts = [], deps = [ ":window", "//tensorflow/lite/micro/testing:micro_test", diff --git a/tensorflow/lite/experimental/microfrontend/lib/frontend_test.cc b/tensorflow/lite/experimental/microfrontend/lib/frontend_test.cc index adf59a1b8b5..9c981decf48 100644 --- a/tensorflow/lite/experimental/microfrontend/lib/frontend_test.cc +++ b/tensorflow/lite/experimental/microfrontend/lib/frontend_test.cc @@ -123,7 +123,7 @@ TF_LITE_MICRO_TEST(FrontendTest_CheckNotEnoughSamples) { &num_samples_read); TF_LITE_MICRO_EXPECT_EQ(output.size, 0); - TF_LITE_MICRO_EXPECT_EQ(output.values, nullptr); + TF_LITE_MICRO_EXPECT(output.values == nullptr); FrontendFreeStateContents(&state); } diff --git a/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc b/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc index 4da4ba7fa94..c76491b6b7c 100644 --- a/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc +++ b/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc @@ -26,13 +26,12 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(LoadModelAndPerformInference) { // Set up logging tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; // Map the model into a usable data structure. This doesn't involve any // copying or parsing, it's a very lightweight operation. const tflite::Model* model = ::tflite::GetModel(g_model); if (model->version() != TFLITE_SCHEMA_VERSION) { - TF_LITE_REPORT_ERROR(error_reporter, + TF_LITE_REPORT_ERROR(µ_error_reporter, "Model provided is schema version %d not equal " "to supported version %d.\n", model->version(), TFLITE_SCHEMA_VERSION); @@ -52,8 +51,8 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) { uint8_t tensor_arena[tensor_arena_size]; // Build an interpreter to run the model with - tflite::MicroInterpreter interpreter(model, resolver, tensor_arena, - tensor_arena_size, error_reporter); + tflite::MicroInterpreter interpreter( + model, resolver, tensor_arena, tensor_arena_size, µ_error_reporter); // Allocate memory from the tensor_arena for the model's tensors TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk); @@ -95,7 +94,7 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) { // Obtain the output value from the tensor float value = output->data.f[0]; // Check that the output value is within 0.05 of the expected value - TF_LITE_MICRO_EXPECT_NEAR(0., value, 0.05); + TF_LITE_MICRO_EXPECT_NEAR(0.f, value, 0.05f); // Run inference on several more values and confirm the expected outputs input->data.f[0] = 1.; @@ -103,21 +102,21 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) { TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); value = output->data.f[0]; - TF_LITE_MICRO_EXPECT_NEAR(0.841, value, 0.05); + TF_LITE_MICRO_EXPECT_NEAR(0.841f, value, 0.05f); - input->data.f[0] = 3.; + input->data.f[0] = 3.f; invoke_status = interpreter.Invoke(); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); value = output->data.f[0]; - TF_LITE_MICRO_EXPECT_NEAR(0.141, value, 0.05); + TF_LITE_MICRO_EXPECT_NEAR(0.141f, value, 0.05f); - input->data.f[0] = 5.; + input->data.f[0] = 5.f; invoke_status = interpreter.Invoke(); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); value = output->data.f[0]; - TF_LITE_MICRO_EXPECT_NEAR(-0.959, value, 0.05); + TF_LITE_MICRO_EXPECT_NEAR(-0.959f, value, 0.05f); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/examples/hello_world/output_handler_test.cc b/tensorflow/lite/micro/examples/hello_world/output_handler_test.cc index cbed83e1c75..206113d1427 100644 --- a/tensorflow/lite/micro/examples/hello_world/output_handler_test.cc +++ b/tensorflow/lite/micro/examples/hello_world/output_handler_test.cc @@ -22,12 +22,11 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestCallability) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; // This will have external side-effects (like printing to the debug console // or lighting an LED) that are hard to observe, so the most we can do is // make sure the call doesn't crash. - HandleOutput(error_reporter, 0, 0); + HandleOutput(µ_error_reporter, 0, 0); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/examples/image_recognition_experimental/image_recognition_test.cc b/tensorflow/lite/micro/examples/image_recognition_experimental/image_recognition_test.cc index 61071fd5696..ff9ed498137 100644 --- a/tensorflow/lite/micro/examples/image_recognition_experimental/image_recognition_test.cc +++ b/tensorflow/lite/micro/examples/image_recognition_experimental/image_recognition_test.cc @@ -31,11 +31,10 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestImageRecognitionInvoke) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; const tflite::Model* model = ::tflite::GetModel(image_recognition_model_data); if (model->version() != TFLITE_SCHEMA_VERSION) { - TF_LITE_REPORT_ERROR(error_reporter, + TF_LITE_REPORT_ERROR(µ_error_reporter, "Model provided is schema version %d not equal " "to supported version %d.\n", model->version(), TFLITE_SCHEMA_VERSION); @@ -52,7 +51,8 @@ TF_LITE_MICRO_TEST(TestImageRecognitionInvoke) { uint8_t tensor_arena[tensor_arena_size]; tflite::MicroInterpreter interpreter(model, micro_op_resolver, tensor_arena, - tensor_arena_size, error_reporter); + tensor_arena_size, + µ_error_reporter); interpreter.AllocateTensors(); TfLiteTensor* input = interpreter.input(0); @@ -83,7 +83,7 @@ TF_LITE_MICRO_TEST(TestImageRecognitionInvoke) { TfLiteStatus invoke_status = interpreter.Invoke(); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); if (invoke_status != kTfLiteOk) { - TF_LITE_REPORT_ERROR(error_reporter, "Invoke failed\n"); + TF_LITE_REPORT_ERROR(µ_error_reporter, "Invoke failed\n"); } TfLiteTensor* output = interpreter.output(0); diff --git a/tensorflow/lite/micro/examples/magic_wand/magic_wand_test.cc b/tensorflow/lite/micro/examples/magic_wand/magic_wand_test.cc index 96a2b971d9b..920440509f7 100644 --- a/tensorflow/lite/micro/examples/magic_wand/magic_wand_test.cc +++ b/tensorflow/lite/micro/examples/magic_wand/magic_wand_test.cc @@ -28,13 +28,12 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(LoadModelAndPerformInference) { // Set up logging tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; // Map the model into a usable data structure. This doesn't involve any // copying or parsing, it's a very lightweight operation. const tflite::Model* model = ::tflite::GetModel(g_magic_wand_model_data); if (model->version() != TFLITE_SCHEMA_VERSION) { - TF_LITE_REPORT_ERROR(error_reporter, + TF_LITE_REPORT_ERROR(µ_error_reporter, "Model provided is schema version %d not equal " "to supported version %d.\n", model->version(), TFLITE_SCHEMA_VERSION); @@ -59,7 +58,8 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) { // Build an interpreter to run the model with tflite::MicroInterpreter interpreter(model, micro_op_resolver, tensor_arena, - tensor_arena_size, error_reporter); + tensor_arena_size, + µ_error_reporter); // Allocate memory from the tensor_arena for the model's tensors interpreter.AllocateTensors(); @@ -80,15 +80,15 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) { // Provide an input value const float* ring_features_data = g_ring_micro_f9643d42_nohash_4_data; - TF_LITE_REPORT_ERROR(error_reporter, "%d", input->bytes); - for (int i = 0; i < (input->bytes / sizeof(float)); ++i) { + TF_LITE_REPORT_ERROR(µ_error_reporter, "%d", input->bytes); + for (size_t i = 0; i < (input->bytes / sizeof(float)); ++i) { input->data.f[i] = ring_features_data[i]; } // Run the model on this input and check that it succeeds TfLiteStatus invoke_status = interpreter.Invoke(); if (invoke_status != kTfLiteOk) { - TF_LITE_REPORT_ERROR(error_reporter, "Invoke failed\n"); + TF_LITE_REPORT_ERROR(µ_error_reporter, "Invoke failed\n"); } TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); @@ -118,14 +118,14 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) { // Now test with a different input, from a recording of "Slope". const float* slope_features_data = g_slope_micro_f2e59fea_nohash_1_data; - for (int i = 0; i < (input->bytes / sizeof(float)); ++i) { + for (size_t i = 0; i < (input->bytes / sizeof(float)); ++i) { input->data.f[i] = slope_features_data[i]; } // Run the model on this "Slope" input. invoke_status = interpreter.Invoke(); if (invoke_status != kTfLiteOk) { - TF_LITE_REPORT_ERROR(error_reporter, "Invoke failed\n"); + TF_LITE_REPORT_ERROR(µ_error_reporter, "Invoke failed\n"); } TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); diff --git a/tensorflow/lite/micro/examples/magic_wand/output_handler_test.cc b/tensorflow/lite/micro/examples/magic_wand/output_handler_test.cc index 6ac5468531d..133d62427a1 100644 --- a/tensorflow/lite/micro/examples/magic_wand/output_handler_test.cc +++ b/tensorflow/lite/micro/examples/magic_wand/output_handler_test.cc @@ -22,11 +22,10 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestCallability) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; - HandleOutput(error_reporter, 0); - HandleOutput(error_reporter, 1); - HandleOutput(error_reporter, 2); - HandleOutput(error_reporter, 3); + HandleOutput(µ_error_reporter, 0); + HandleOutput(µ_error_reporter, 1); + HandleOutput(µ_error_reporter, 2); + HandleOutput(µ_error_reporter, 3); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/examples/micro_speech/Makefile.inc b/tensorflow/lite/micro/examples/micro_speech/Makefile.inc index 18d5fa52505..70ad3e94238 100644 --- a/tensorflow/lite/micro/examples/micro_speech/Makefile.inc +++ b/tensorflow/lite/micro/examples/micro_speech/Makefile.inc @@ -238,9 +238,17 @@ $(MICRO_FEATURES_GENERATOR_HDRS) include $(wildcard tensorflow/lite/micro/examples/micro_speech/*/Makefile.inc) # Test the code for feature generation. +#TEMP_CXXFLAGS := CXXFLAGS +#CXXFLAGS := $(filter-out $(CC_WARNINGS),$(CXXFLAGS)) + +TEMP_CCFLAGS := CCFLAGS +CCFLAGS := $(filter-out $(CC_WARNINGS),$(CCFLAGS)) + $(eval $(call microlite_test,micro_features_generator_test,\ $(MICRO_FEATURES_GENERATOR_TEST_SRCS), $(MICRO_FEATURES_GENERATOR_TEST_HDRS))) +#CXXFLAGS := TEMP_CXXFLAGS + # Tests loading and running a speech model. $(eval $(call microlite_test,micro_speech_test,\ $(MICRO_SPEECH_TEST_SRCS),$(MICRO_SPEECH_TEST_HDRS))) diff --git a/tensorflow/lite/micro/examples/micro_speech/audio_provider_mock_test.cc b/tensorflow/lite/micro/examples/micro_speech/audio_provider_mock_test.cc index d874210ccea..91419035048 100644 --- a/tensorflow/lite/micro/examples/micro_speech/audio_provider_mock_test.cc +++ b/tensorflow/lite/micro/examples/micro_speech/audio_provider_mock_test.cc @@ -27,12 +27,11 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestAudioProviderMock) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; int audio_samples_size = 0; int16_t* audio_samples = nullptr; TfLiteStatus get_status = - GetAudioSamples(error_reporter, 0, kFeatureSliceDurationMs, + GetAudioSamples(µ_error_reporter, 0, kFeatureSliceDurationMs, &audio_samples_size, &audio_samples); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, get_status); TF_LITE_MICRO_EXPECT_LE(audio_samples_size, kMaxAudioSampleSize); @@ -41,8 +40,9 @@ TF_LITE_MICRO_TEST(TestAudioProviderMock) { TF_LITE_MICRO_EXPECT_EQ(g_yes_1000ms_sample_data[i], audio_samples[i]); } - get_status = GetAudioSamples(error_reporter, 500, kFeatureSliceDurationMs, - &audio_samples_size, &audio_samples); + get_status = + GetAudioSamples(µ_error_reporter, 500, kFeatureSliceDurationMs, + &audio_samples_size, &audio_samples); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, get_status); TF_LITE_MICRO_EXPECT_LE(audio_samples_size, kMaxAudioSampleSize); TF_LITE_MICRO_EXPECT_NE(audio_samples, nullptr); @@ -51,8 +51,9 @@ TF_LITE_MICRO_TEST(TestAudioProviderMock) { audio_samples[i]); } - get_status = GetAudioSamples(error_reporter, 1500, kFeatureSliceDurationMs, - &audio_samples_size, &audio_samples); + get_status = + GetAudioSamples(µ_error_reporter, 1500, kFeatureSliceDurationMs, + &audio_samples_size, &audio_samples); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, get_status); TF_LITE_MICRO_EXPECT_LE(audio_samples_size, kMaxAudioSampleSize); TF_LITE_MICRO_EXPECT_NE(audio_samples, nullptr); @@ -60,8 +61,9 @@ TF_LITE_MICRO_TEST(TestAudioProviderMock) { TF_LITE_MICRO_EXPECT_EQ(0, audio_samples[i]); } - get_status = GetAudioSamples(error_reporter, 12250, kFeatureSliceDurationMs, - &audio_samples_size, &audio_samples); + get_status = + GetAudioSamples(µ_error_reporter, 12250, kFeatureSliceDurationMs, + &audio_samples_size, &audio_samples); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, get_status); TF_LITE_MICRO_EXPECT_LE(audio_samples_size, kMaxAudioSampleSize); TF_LITE_MICRO_EXPECT_NE(audio_samples, nullptr); diff --git a/tensorflow/lite/micro/examples/micro_speech/audio_provider_test.cc b/tensorflow/lite/micro/examples/micro_speech/audio_provider_test.cc index 065f0f6f996..8e32c92b8a2 100644 --- a/tensorflow/lite/micro/examples/micro_speech/audio_provider_test.cc +++ b/tensorflow/lite/micro/examples/micro_speech/audio_provider_test.cc @@ -26,12 +26,11 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestAudioProvider) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; int audio_samples_size = 0; int16_t* audio_samples = nullptr; TfLiteStatus get_status = - GetAudioSamples(error_reporter, 0, kFeatureSliceDurationMs, + GetAudioSamples(µ_error_reporter, 0, kFeatureSliceDurationMs, &audio_samples_size, &audio_samples); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, get_status); TF_LITE_MICRO_EXPECT_LE(audio_samples_size, kMaxAudioSampleSize); diff --git a/tensorflow/lite/micro/examples/micro_speech/command_responder_test.cc b/tensorflow/lite/micro/examples/micro_speech/command_responder_test.cc index fe811ea52bc..818b0840d08 100644 --- a/tensorflow/lite/micro/examples/micro_speech/command_responder_test.cc +++ b/tensorflow/lite/micro/examples/micro_speech/command_responder_test.cc @@ -22,12 +22,11 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestCallability) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; // This will have external side-effects (like printing to the debug console // or lighting an LED) that are hard to observe, so the most we can do is // make sure the call doesn't crash. - RespondToCommand(error_reporter, 0, "foo", 0, true); + RespondToCommand(µ_error_reporter, 0, "foo", 0, true); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/examples/micro_speech/feature_provider_mock_test.cc b/tensorflow/lite/micro/examples/micro_speech/feature_provider_mock_test.cc index aae556bf6e0..c093f31ad10 100644 --- a/tensorflow/lite/micro/examples/micro_speech/feature_provider_mock_test.cc +++ b/tensorflow/lite/micro/examples/micro_speech/feature_provider_mock_test.cc @@ -25,14 +25,13 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestFeatureProviderMockYes) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; int8_t feature_data[kFeatureElementCount]; FeatureProvider feature_provider(kFeatureElementCount, feature_data); int how_many_new_slices = 0; TfLiteStatus populate_status = feature_provider.PopulateFeatureData( - error_reporter, /* last_time_in_ms= */ 0, /* time_in_ms= */ 970, + µ_error_reporter, /* last_time_in_ms= */ 0, /* time_in_ms= */ 970, &how_many_new_slices); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, populate_status); TF_LITE_MICRO_EXPECT_EQ(kFeatureSliceCount, how_many_new_slices); @@ -45,15 +44,14 @@ TF_LITE_MICRO_TEST(TestFeatureProviderMockYes) { TF_LITE_MICRO_TEST(TestFeatureProviderMockNo) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; int8_t feature_data[kFeatureElementCount]; FeatureProvider feature_provider(kFeatureElementCount, feature_data); int how_many_new_slices = 0; TfLiteStatus populate_status = feature_provider.PopulateFeatureData( - error_reporter, /* last_time_in_ms= */ 4000, /* time_in_ms= */ 4970, - &how_many_new_slices); + µ_error_reporter, /* last_time_in_ms= */ 4000, + /* time_in_ms= */ 4970, &how_many_new_slices); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, populate_status); TF_LITE_MICRO_EXPECT_EQ(kFeatureSliceCount, how_many_new_slices); diff --git a/tensorflow/lite/micro/examples/micro_speech/feature_provider_test.cc b/tensorflow/lite/micro/examples/micro_speech/feature_provider_test.cc index 5d6816a91e4..e0fc95c6336 100644 --- a/tensorflow/lite/micro/examples/micro_speech/feature_provider_test.cc +++ b/tensorflow/lite/micro/examples/micro_speech/feature_provider_test.cc @@ -24,14 +24,13 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestFeatureProvider) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; int8_t feature_data[kFeatureElementCount]; FeatureProvider feature_provider(kFeatureElementCount, feature_data); int how_many_new_slices = 0; TfLiteStatus populate_status = feature_provider.PopulateFeatureData( - error_reporter, /* last_time_in_ms= */ 0, /* time_in_ms= */ 10000, + µ_error_reporter, /* last_time_in_ms= */ 0, /* time_in_ms= */ 10000, &how_many_new_slices); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, populate_status); TF_LITE_MICRO_EXPECT_EQ(kFeatureSliceCount, how_many_new_slices); diff --git a/tensorflow/lite/micro/examples/micro_speech/micro_features/micro_features_generator.cc b/tensorflow/lite/micro/examples/micro_speech/micro_features/micro_features_generator.cc index fbb6e6e4a9f..9e076431288 100644 --- a/tensorflow/lite/micro/examples/micro_speech/micro_features/micro_features_generator.cc +++ b/tensorflow/lite/micro/examples/micro_speech/micro_features/micro_features_generator.cc @@ -81,7 +81,7 @@ TfLiteStatus GenerateMicroFeatures(tflite::ErrorReporter* error_reporter, FrontendOutput frontend_output = FrontendProcessSamples( &g_micro_features_state, frontend_input, input_size, num_samples_read); - for (int i = 0; i < frontend_output.size; ++i) { + for (size_t i = 0; i < frontend_output.size; ++i) { // These scaling values are derived from those used in input_data.py in the // training pipeline. // The feature pipeline outputs 16-bit signed integers in roughly a 0 to 670 diff --git a/tensorflow/lite/micro/examples/micro_speech/micro_features/micro_features_generator_test.cc b/tensorflow/lite/micro/examples/micro_speech/micro_features/micro_features_generator_test.cc index ee3ee03763f..083c3cc479d 100644 --- a/tensorflow/lite/micro/examples/micro_speech/micro_features/micro_features_generator_test.cc +++ b/tensorflow/lite/micro/examples/micro_speech/micro_features/micro_features_generator_test.cc @@ -30,9 +30,9 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestMicroFeaturesGeneratorYes) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, InitializeMicroFeatures(error_reporter)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, + InitializeMicroFeatures(µ_error_reporter)); // The micro features pipeline retains state from previous calls to help // estimate the background noise. Unfortunately this makes it harder to @@ -51,8 +51,9 @@ TF_LITE_MICRO_TEST(TestMicroFeaturesGeneratorYes) { int8_t yes_calculated_data[g_yes_feature_data_slice_size]; size_t num_samples_read; TfLiteStatus yes_status = GenerateMicroFeatures( - error_reporter, g_yes_30ms_sample_data, g_yes_30ms_sample_data_size, - g_yes_feature_data_slice_size, yes_calculated_data, &num_samples_read); + µ_error_reporter, g_yes_30ms_sample_data, + g_yes_30ms_sample_data_size, g_yes_feature_data_slice_size, + yes_calculated_data, &num_samples_read); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, yes_status); for (int i = 0; i < g_yes_feature_data_slice_size; ++i) { @@ -60,17 +61,17 @@ TF_LITE_MICRO_TEST(TestMicroFeaturesGeneratorYes) { const int actual = yes_calculated_data[i]; TF_LITE_MICRO_EXPECT_EQ(expected, actual); if (expected != actual) { - TF_LITE_REPORT_ERROR(error_reporter, "Expected value %d but found %d", - expected, actual); + TF_LITE_REPORT_ERROR(µ_error_reporter, + "Expected value %d but found %d", expected, actual); } } } TF_LITE_MICRO_TEST(TestMicroFeaturesGeneratorNo) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, InitializeMicroFeatures(error_reporter)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, + InitializeMicroFeatures(µ_error_reporter)); // As we did for the previous features, set known good noise state // parameters. const uint32_t no_estimate_presets[] = { @@ -85,17 +86,17 @@ TF_LITE_MICRO_TEST(TestMicroFeaturesGeneratorNo) { int8_t no_calculated_data[g_no_feature_data_slice_size]; size_t num_samples_read; TfLiteStatus no_status = GenerateMicroFeatures( - error_reporter, g_no_30ms_sample_data, g_no_30ms_sample_data_size, + µ_error_reporter, g_no_30ms_sample_data, g_no_30ms_sample_data_size, g_no_feature_data_slice_size, no_calculated_data, &num_samples_read); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, no_status); - for (int i = 0; i < g_no_feature_data_slice_size; ++i) { + for (size_t i = 0; i < g_no_feature_data_slice_size; ++i) { const int expected = g_no_feature_data_slice[i]; const int actual = no_calculated_data[i]; TF_LITE_MICRO_EXPECT_EQ(expected, actual); if (expected != actual) { - TF_LITE_REPORT_ERROR(error_reporter, "Expected value %d but found %d", - expected, actual); + TF_LITE_REPORT_ERROR(µ_error_reporter, + "Expected value %d but found %d", expected, actual); } } } diff --git a/tensorflow/lite/micro/examples/micro_speech/micro_speech_test.cc b/tensorflow/lite/micro/examples/micro_speech/micro_speech_test.cc index 4598dd3662f..b58515d1833 100644 --- a/tensorflow/lite/micro/examples/micro_speech/micro_speech_test.cc +++ b/tensorflow/lite/micro/examples/micro_speech/micro_speech_test.cc @@ -28,13 +28,12 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestInvoke) { // Set up logging. tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; // Map the model into a usable data structure. This doesn't involve any // copying or parsing, it's a very lightweight operation. const tflite::Model* model = ::tflite::GetModel(g_model); if (model->version() != TFLITE_SCHEMA_VERSION) { - TF_LITE_REPORT_ERROR(error_reporter, + TF_LITE_REPORT_ERROR(µ_error_reporter, "Model provided is schema version %d not equal " "to supported version %d.\n", model->version(), TFLITE_SCHEMA_VERSION); @@ -59,7 +58,8 @@ TF_LITE_MICRO_TEST(TestInvoke) { // Build an interpreter to run the model with. tflite::MicroInterpreter interpreter(model, micro_op_resolver, tensor_arena, - tensor_arena_size, error_reporter); + tensor_arena_size, + µ_error_reporter); interpreter.AllocateTensors(); // Get information about the memory area to use for the model's input. @@ -75,14 +75,14 @@ TF_LITE_MICRO_TEST(TestInvoke) { // Copy a spectrogram created from a .wav audio file of someone saying "Yes", // into the memory area used for the input. const int8_t* yes_features_data = g_yes_micro_f2e59fea_nohash_1_data; - for (int i = 0; i < input->bytes; ++i) { + for (size_t i = 0; i < input->bytes; ++i) { input->data.int8[i] = yes_features_data[i]; } // Run the model on this input and make sure it succeeds. TfLiteStatus invoke_status = interpreter.Invoke(); if (invoke_status != kTfLiteOk) { - TF_LITE_REPORT_ERROR(error_reporter, "Invoke failed\n"); + TF_LITE_REPORT_ERROR(µ_error_reporter, "Invoke failed\n"); } TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); @@ -111,14 +111,14 @@ TF_LITE_MICRO_TEST(TestInvoke) { // Now test with a different input, from a recording of "No". const int8_t* no_features_data = g_no_micro_f9643d42_nohash_4_data; - for (int i = 0; i < input->bytes; ++i) { + for (size_t i = 0; i < input->bytes; ++i) { input->data.int8[i] = no_features_data[i]; } // Run the model on this "No" input. invoke_status = interpreter.Invoke(); if (invoke_status != kTfLiteOk) { - TF_LITE_REPORT_ERROR(error_reporter, "Invoke failed\n"); + TF_LITE_REPORT_ERROR(µ_error_reporter, "Invoke failed\n"); } TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); @@ -139,7 +139,7 @@ TF_LITE_MICRO_TEST(TestInvoke) { TF_LITE_MICRO_EXPECT_GT(no_score, unknown_score); TF_LITE_MICRO_EXPECT_GT(no_score, yes_score); - TF_LITE_REPORT_ERROR(error_reporter, "Ran successfully\n"); + TF_LITE_REPORT_ERROR(µ_error_reporter, "Ran successfully\n"); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/examples/micro_speech/recognize_commands_test.cc b/tensorflow/lite/micro/examples/micro_speech/recognize_commands_test.cc index 9ad20b68c8c..eff7b4eb37b 100644 --- a/tensorflow/lite/micro/examples/micro_speech/recognize_commands_test.cc +++ b/tensorflow/lite/micro/examples/micro_speech/recognize_commands_test.cc @@ -22,9 +22,8 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(PreviousResultsQueueBasic) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; - PreviousResultsQueue queue(error_reporter); + PreviousResultsQueue queue(µ_error_reporter); TF_LITE_MICRO_EXPECT_EQ(0, queue.size()); int8_t scores_a[4] = {0, 0, 0, 1}; @@ -54,9 +53,8 @@ TF_LITE_MICRO_TEST(PreviousResultsQueueBasic) { TF_LITE_MICRO_TEST(PreviousResultsQueuePushPop) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; - PreviousResultsQueue queue(error_reporter); + PreviousResultsQueue queue(µ_error_reporter); TF_LITE_MICRO_EXPECT_EQ(0, queue.size()); for (int i = 0; i < 123; ++i) { @@ -74,9 +72,8 @@ TF_LITE_MICRO_TEST(PreviousResultsQueuePushPop) { TF_LITE_MICRO_TEST(RecognizeCommandsTestBasic) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; - RecognizeCommands recognize_commands(error_reporter); + RecognizeCommands recognize_commands(µ_error_reporter); std::initializer_list result_data = {127, -128, -128, -128}; auto result_dims = {2, 1, 4}; @@ -94,9 +91,8 @@ TF_LITE_MICRO_TEST(RecognizeCommandsTestBasic) { TF_LITE_MICRO_TEST(RecognizeCommandsTestFindCommands) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; - RecognizeCommands recognize_commands(error_reporter, 1000, 51); + RecognizeCommands recognize_commands(µ_error_reporter, 1000, 51); std::initializer_list yes_data = {-128, -128, 127, -128}; auto yes_dims = {2, 1, 4}; @@ -157,9 +153,8 @@ TF_LITE_MICRO_TEST(RecognizeCommandsTestFindCommands) { TF_LITE_MICRO_TEST(RecognizeCommandsTestBadInputLength) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; - RecognizeCommands recognize_commands(error_reporter, 1000, 51); + RecognizeCommands recognize_commands(µ_error_reporter, 1000, 51); std::initializer_list bad_data = {-128, -128, 127}; auto bad_dims = {2, 1, 3}; @@ -177,9 +172,8 @@ TF_LITE_MICRO_TEST(RecognizeCommandsTestBadInputLength) { TF_LITE_MICRO_TEST(RecognizeCommandsTestBadInputTimes) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; - RecognizeCommands recognize_commands(error_reporter, 1000, 51); + RecognizeCommands recognize_commands(µ_error_reporter, 1000, 51); std::initializer_list result_data = {-128, -128, 127, -128}; auto result_dims = {2, 1, 4}; @@ -200,9 +194,8 @@ TF_LITE_MICRO_TEST(RecognizeCommandsTestBadInputTimes) { TF_LITE_MICRO_TEST(RecognizeCommandsTestTooFewInputs) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; - RecognizeCommands recognize_commands(error_reporter, 1000, 51); + RecognizeCommands recognize_commands(µ_error_reporter, 1000, 51); std::initializer_list result_data = {-128, -128, 127, -128}; auto result_dims = {2, 1, 4}; diff --git a/tensorflow/lite/micro/examples/micro_speech/simple_features/simple_features_generator.cc b/tensorflow/lite/micro/examples/micro_speech/simple_features/simple_features_generator.cc index 22434c995c4..0de36b48e41 100644 --- a/tensorflow/lite/micro/examples/micro_speech/simple_features/simple_features_generator.cc +++ b/tensorflow/lite/micro/examples/micro_speech/simple_features/simple_features_generator.cc @@ -67,7 +67,7 @@ void CalculateDiscreteFourierTransform(float* time_series, int time_series_size, // of the current sample window are weighted more heavily than those at the end. void CalculatePeriodicHann(int window_length, float* window_function) { for (int i = 0; i < window_length; ++i) { - window_function[i] = 0.5 - 0.5 * std::cos((2 * kPi * i) / window_length); + window_function[i] = 0.5f - 0.5f * std::cos((2 * kPi * i) / window_length); } } diff --git a/tensorflow/lite/micro/examples/micro_speech/simple_features/simple_features_generator_test.cc b/tensorflow/lite/micro/examples/micro_speech/simple_features/simple_features_generator_test.cc index 9ac19b374da..f54feecadfa 100644 --- a/tensorflow/lite/micro/examples/micro_speech/simple_features/simple_features_generator_test.cc +++ b/tensorflow/lite/micro/examples/micro_speech/simple_features/simple_features_generator_test.cc @@ -27,34 +27,35 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestSimpleFeaturesGenerator) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; uint8_t yes_calculated_data[g_yes_power_spectrum_data_size]; TfLiteStatus yes_status = GenerateSimpleFeatures( - error_reporter, g_yes_30ms_sample_data, g_yes_30ms_sample_data_size, - g_yes_power_spectrum_data_size, yes_calculated_data); + µ_error_reporter, g_yes_30ms_sample_data, + g_yes_30ms_sample_data_size, g_yes_power_spectrum_data_size, + yes_calculated_data); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, yes_status); for (int i = 0; i < g_yes_power_spectrum_data_size; ++i) { TF_LITE_MICRO_EXPECT_EQ(g_yes_power_spectrum_data[i], yes_calculated_data[i]); if (g_yes_power_spectrum_data[i] != yes_calculated_data[i]) { - TF_LITE_REPORT_ERROR(error_reporter, "Expected value %d but found %d", - g_yes_power_spectrum_data[i], - yes_calculated_data[i]); + TF_LITE_REPORT_ERROR( + µ_error_reporter, "Expected value %d but found %d", + g_yes_power_spectrum_data[i], yes_calculated_data[i]); } } uint8_t no_calculated_data[g_yes_power_spectrum_data_size]; TfLiteStatus no_status = GenerateSimpleFeatures( - error_reporter, g_no_30ms_sample_data, g_no_30ms_sample_data_size, + µ_error_reporter, g_no_30ms_sample_data, g_no_30ms_sample_data_size, g_no_power_spectrum_data_size, no_calculated_data); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, no_status); for (int i = 0; i < g_no_power_spectrum_data_size; ++i) { TF_LITE_MICRO_EXPECT_EQ(g_no_power_spectrum_data[i], no_calculated_data[i]); if (g_no_power_spectrum_data[i] != no_calculated_data[i]) { - TF_LITE_REPORT_ERROR(error_reporter, "Expected value %d but found %d", + TF_LITE_REPORT_ERROR(µ_error_reporter, + "Expected value %d but found %d", g_no_power_spectrum_data[i], no_calculated_data[i]); } } diff --git a/tensorflow/lite/micro/examples/network_tester/expected_output_data.h b/tensorflow/lite/micro/examples/network_tester/expected_output_data.h index 934722bad94..18937a9b601 100644 --- a/tensorflow/lite/micro/examples/network_tester/expected_output_data.h +++ b/tensorflow/lite/micro/examples/network_tester/expected_output_data.h @@ -16,7 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_NETWORK_TESTER_EXPECTED_OUTPUT_DATA_H_ #define TENSORFLOW_LITE_MICRO_EXAMPLES_NETWORK_TESTER_EXPECTED_OUTPUT_DATA_H_ -static unsigned int expected_output_data_len = 4; static unsigned char expected_output_data[1][4] = {6, 8, 14, 16}; #endif // TENSORFLOW_LITE_MICRO_EXAMPLES_NETWORK_TESTER_EXPECTED_OUTPUT_DATA_H_ diff --git a/tensorflow/lite/micro/examples/network_tester/network_tester_test.cc b/tensorflow/lite/micro/examples/network_tester/network_tester_test.cc index 6ea02b3f4a5..563500f2115 100644 --- a/tensorflow/lite/micro/examples/network_tester/network_tester_test.cc +++ b/tensorflow/lite/micro/examples/network_tester/network_tester_test.cc @@ -64,21 +64,20 @@ inline void print_output_data(TfLiteTensor* output) { #endif template -void check_output_elem(TfLiteTensor* output, const T* expected_output_data, +void check_output_elem(TfLiteTensor* output, const T* expected_output, const int index) { TF_LITE_MICRO_EXPECT_EQ(tflite::GetTensorData(output)[index], - expected_output_data[index]); + expected_output[index]); } TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestInvoke) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; const tflite::Model* model = ::tflite::GetModel(network_model); if (model->version() != TFLITE_SCHEMA_VERSION) { - TF_LITE_REPORT_ERROR(error_reporter, + TF_LITE_REPORT_ERROR(µ_error_reporter, "Model provided is schema version %d not equal " "to supported version %d.\n", model->version(), TFLITE_SCHEMA_VERSION); @@ -87,23 +86,23 @@ TF_LITE_MICRO_TEST(TestInvoke) { tflite::AllOpsResolver resolver; - tflite::MicroInterpreter interpreter(model, resolver, tensor_arena, - TENSOR_ARENA_SIZE, error_reporter); + tflite::MicroInterpreter interpreter( + model, resolver, tensor_arena, TENSOR_ARENA_SIZE, µ_error_reporter); TfLiteStatus allocate_status = interpreter.AllocateTensors(); if (allocate_status != kTfLiteOk) { - TF_LITE_REPORT_ERROR(error_reporter, "Tensor allocation failed\n"); + TF_LITE_REPORT_ERROR(µ_error_reporter, "Tensor allocation failed\n"); return kTfLiteError; } for (int n = 0; n < NUM_INFERENCES; n++) { - for (int i = 0; i < interpreter.inputs_size(); ++i) { + for (size_t i = 0; i < interpreter.inputs_size(); ++i) { TfLiteTensor* input = interpreter.input(i); memcpy(input->data.data, input_data[i], input->bytes); } TfLiteStatus invoke_status = interpreter.Invoke(); if (invoke_status != kTfLiteOk) { - TF_LITE_REPORT_ERROR(error_reporter, "Invoke failed\n"); + TF_LITE_REPORT_ERROR(µ_error_reporter, "Invoke failed\n"); return kTfLiteError; } TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); @@ -126,7 +125,7 @@ TF_LITE_MICRO_TEST(TestInvoke) { #endif #ifndef NO_COMPARE_OUTPUT_DATA - for (int i = 0; i < interpreter.outputs_size(); i++) { + for (size_t i = 0; i < interpreter.outputs_size(); i++) { TfLiteTensor* output = interpreter.output(i); for (int j = 0; j < tflite::ElementCount(*(output->dims)); ++j) { check_output_elem(output, expected_output_data[i], j); @@ -134,7 +133,7 @@ TF_LITE_MICRO_TEST(TestInvoke) { } #endif } - TF_LITE_REPORT_ERROR(error_reporter, "Ran successfully\n"); + TF_LITE_REPORT_ERROR(µ_error_reporter, "Ran successfully\n"); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/examples/person_detection/detection_responder_test.cc b/tensorflow/lite/micro/examples/person_detection/detection_responder_test.cc index 6ef17d38dc9..1714079f39a 100644 --- a/tensorflow/lite/micro/examples/person_detection/detection_responder_test.cc +++ b/tensorflow/lite/micro/examples/person_detection/detection_responder_test.cc @@ -22,13 +22,12 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestCallability) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; // This will have external side-effects (like printing to the debug console // or lighting an LED) that are hard to observe, so the most we can do is // make sure the call doesn't crash. - RespondToDetection(error_reporter, 100, 200); - RespondToDetection(error_reporter, 200, 100); + RespondToDetection(µ_error_reporter, 100, 200); + RespondToDetection(µ_error_reporter, 200, 100); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/examples/person_detection/image_provider_test.cc b/tensorflow/lite/micro/examples/person_detection/image_provider_test.cc index 73695035d14..60c89c8aaf2 100644 --- a/tensorflow/lite/micro/examples/person_detection/image_provider_test.cc +++ b/tensorflow/lite/micro/examples/person_detection/image_provider_test.cc @@ -26,11 +26,10 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestImageProvider) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; uint8_t image_data[kMaxImageSize]; - TfLiteStatus get_status = - GetImage(error_reporter, kNumCols, kNumRows, kNumChannels, image_data); + TfLiteStatus get_status = GetImage(µ_error_reporter, kNumCols, kNumRows, + kNumChannels, image_data); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, get_status); TF_LITE_MICRO_EXPECT_NE(image_data, nullptr); diff --git a/tensorflow/lite/micro/examples/person_detection/person_detection_test.cc b/tensorflow/lite/micro/examples/person_detection/person_detection_test.cc index 149d0d50746..548b95e0acc 100644 --- a/tensorflow/lite/micro/examples/person_detection/person_detection_test.cc +++ b/tensorflow/lite/micro/examples/person_detection/person_detection_test.cc @@ -35,18 +35,17 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestInvoke) { // Set up logging. tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; // Map the model into a usable data structure. This doesn't involve any // copying or parsing, it's a very lightweight operation. const tflite::Model* model = ::tflite::GetModel(g_person_detect_model_data); if (model->version() != TFLITE_SCHEMA_VERSION) { - TF_LITE_REPORT_ERROR(error_reporter, + TF_LITE_REPORT_ERROR(µ_error_reporter, "Model provided is schema version %d not equal " "to supported version %d.\n", model->version(), TFLITE_SCHEMA_VERSION); } - PrintModelData(model, error_reporter); + PrintModelData(model, µ_error_reporter); // Pull in only the operation implementations we need. // This relies on a complete list of all the ops needed by this graph. @@ -62,7 +61,8 @@ TF_LITE_MICRO_TEST(TestInvoke) { // Build an interpreter to run the model with. tflite::MicroInterpreter interpreter(model, micro_op_resolver, tensor_arena, - tensor_arena_size, error_reporter); + tensor_arena_size, + µ_error_reporter); interpreter.AllocateTensors(); // Get information about the memory area to use for the model's input. @@ -79,14 +79,14 @@ TF_LITE_MICRO_TEST(TestInvoke) { // Copy an image with a person into the memory area used for the input. const uint8_t* person_data = g_person_data; - for (int i = 0; i < input->bytes; ++i) { + for (size_t i = 0; i < input->bytes; ++i) { input->data.uint8[i] = person_data[i]; } // Run the model on this input and make sure it succeeds. TfLiteStatus invoke_status = interpreter.Invoke(); if (invoke_status != kTfLiteOk) { - TF_LITE_REPORT_ERROR(error_reporter, "Invoke failed\n"); + TF_LITE_REPORT_ERROR(µ_error_reporter, "Invoke failed\n"); } TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); @@ -103,21 +103,21 @@ TF_LITE_MICRO_TEST(TestInvoke) { // Make sure that the expected "Person" score is higher than the other class. uint8_t person_score = output->data.uint8[kPersonIndex]; uint8_t no_person_score = output->data.uint8[kNotAPersonIndex]; - TF_LITE_REPORT_ERROR(error_reporter, + TF_LITE_REPORT_ERROR(µ_error_reporter, "person data. person score: %d, no person score: %d\n", person_score, no_person_score); TF_LITE_MICRO_EXPECT_GT(person_score, no_person_score); // Now test with a different input, from an image without a person. const uint8_t* no_person_data = g_no_person_data; - for (int i = 0; i < input->bytes; ++i) { + for (size_t i = 0; i < input->bytes; ++i) { input->data.uint8[i] = no_person_data[i]; } // Run the model on this "No Person" input. invoke_status = interpreter.Invoke(); if (invoke_status != kTfLiteOk) { - TF_LITE_REPORT_ERROR(error_reporter, "Invoke failed\n"); + TF_LITE_REPORT_ERROR(µ_error_reporter, "Invoke failed\n"); } TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); @@ -135,12 +135,12 @@ TF_LITE_MICRO_TEST(TestInvoke) { person_score = output->data.uint8[kPersonIndex]; no_person_score = output->data.uint8[kNotAPersonIndex]; TF_LITE_REPORT_ERROR( - error_reporter, + µ_error_reporter, "no person data. person score: %d, no person score: %d\n", person_score, no_person_score); TF_LITE_MICRO_EXPECT_GT(no_person_score, person_score); - TF_LITE_REPORT_ERROR(error_reporter, "Ran successfully\n"); + TF_LITE_REPORT_ERROR(µ_error_reporter, "Ran successfully\n"); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/examples/person_detection_experimental/detection_responder_test.cc b/tensorflow/lite/micro/examples/person_detection_experimental/detection_responder_test.cc index 48dbe5e9f7c..3d86baa9d59 100644 --- a/tensorflow/lite/micro/examples/person_detection_experimental/detection_responder_test.cc +++ b/tensorflow/lite/micro/examples/person_detection_experimental/detection_responder_test.cc @@ -22,13 +22,12 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestCallability) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; // This will have external side-effects (like printing to the debug console // or lighting an LED) that are hard to observe, so the most we can do is // make sure the call doesn't crash. - RespondToDetection(error_reporter, -100, 100); - RespondToDetection(error_reporter, 100, 50); + RespondToDetection(µ_error_reporter, -100, 100); + RespondToDetection(µ_error_reporter, 100, 50); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/examples/person_detection_experimental/image_provider_test.cc b/tensorflow/lite/micro/examples/person_detection_experimental/image_provider_test.cc index f282ed55651..cd5022446b6 100644 --- a/tensorflow/lite/micro/examples/person_detection_experimental/image_provider_test.cc +++ b/tensorflow/lite/micro/examples/person_detection_experimental/image_provider_test.cc @@ -26,11 +26,10 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestImageProvider) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; int8_t image_data[kMaxImageSize]; - TfLiteStatus get_status = - GetImage(error_reporter, kNumCols, kNumRows, kNumChannels, image_data); + TfLiteStatus get_status = GetImage(µ_error_reporter, kNumCols, kNumRows, + kNumChannels, image_data); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, get_status); TF_LITE_MICRO_EXPECT_NE(image_data, nullptr); diff --git a/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc b/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc index 4ceeb753283..78ac037be94 100644 --- a/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc +++ b/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc @@ -34,13 +34,12 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestInvoke) { // Set up logging. tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; // Map the model into a usable data structure. This doesn't involve any // copying or parsing, it's a very lightweight operation. const tflite::Model* model = ::tflite::GetModel(g_person_detect_model_data); if (model->version() != TFLITE_SCHEMA_VERSION) { - TF_LITE_REPORT_ERROR(error_reporter, + TF_LITE_REPORT_ERROR(µ_error_reporter, "Model provided is schema version %d not equal " "to supported version %d.\n", model->version(), TFLITE_SCHEMA_VERSION); @@ -60,7 +59,8 @@ TF_LITE_MICRO_TEST(TestInvoke) { // Build an interpreter to run the model with. tflite::MicroInterpreter interpreter(model, micro_op_resolver, tensor_arena, - tensor_arena_size, error_reporter); + tensor_arena_size, + µ_error_reporter); interpreter.AllocateTensors(); // Get information about the memory area to use for the model's input. @@ -76,7 +76,7 @@ TF_LITE_MICRO_TEST(TestInvoke) { TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt8, input->type); // Copy an image with a person into the memory area used for the input. - for (int i = 0; i < input->bytes; ++i) { + for (size_t i = 0; i < input->bytes; ++i) { // Subtract 128 to convert between uint8 and int8. input->data.int8[i] = g_person_data[i] - 128; } @@ -84,7 +84,7 @@ TF_LITE_MICRO_TEST(TestInvoke) { // Run the model on this input and make sure it succeeds. TfLiteStatus invoke_status = interpreter.Invoke(); if (invoke_status != kTfLiteOk) { - TF_LITE_REPORT_ERROR(error_reporter, "Invoke failed\n"); + TF_LITE_REPORT_ERROR(µ_error_reporter, "Invoke failed\n"); } TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); @@ -99,20 +99,20 @@ TF_LITE_MICRO_TEST(TestInvoke) { // Make sure that the expected "Person" score is higher than the other class. int8_t person_score = output->data.int8[kPersonIndex]; int8_t no_person_score = output->data.int8[kNotAPersonIndex]; - TF_LITE_REPORT_ERROR(error_reporter, + TF_LITE_REPORT_ERROR(µ_error_reporter, "person data. person score: %d, no person score: %d\n", person_score, no_person_score); TF_LITE_MICRO_EXPECT_GT(person_score, no_person_score); // Now test with a blank image. - for (int i = 0; i < input->bytes; ++i) { + for (size_t i = 0; i < input->bytes; ++i) { input->data.int8[i] = 0; } // Run the model on this "No Person" input. invoke_status = interpreter.Invoke(); if (invoke_status != kTfLiteOk) { - TF_LITE_REPORT_ERROR(error_reporter, "Invoke failed\n"); + TF_LITE_REPORT_ERROR(µ_error_reporter, "Invoke failed\n"); } TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); @@ -128,12 +128,12 @@ TF_LITE_MICRO_TEST(TestInvoke) { person_score = output->data.int8[kPersonIndex]; no_person_score = output->data.int8[kNotAPersonIndex]; TF_LITE_REPORT_ERROR( - error_reporter, + µ_error_reporter, "no person data. person score: %d, no person score: %d\n", person_score, no_person_score); TF_LITE_MICRO_EXPECT_GT(no_person_score, person_score); - TF_LITE_REPORT_ERROR(error_reporter, "Ran successfully\n"); + TF_LITE_REPORT_ERROR(µ_error_reporter, "Ran successfully\n"); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/kernels/add_test.cc b/tensorflow/lite/micro/kernels/add_test.cc index 2d703600f56..332f3edf865 100644 --- a/tensorflow/lite/micro/kernels/add_test.cc +++ b/tensorflow/lite/micro/kernels/add_test.cc @@ -431,12 +431,6 @@ TF_LITE_MICRO_TEST(QuantizedAddWithScalarBroadcastUint8) { } } TF_LITE_MICRO_TEST(QuantizedAddWithScalarBroadcastFloat) { - const float scales[] = {0.1, 0.05, 0.1}; - const int zero_points[] = {127, 131, 139}; - uint8_t input1_quantized[tflite::testing::broadcast_output_dims_count]; - uint8_t input2_quantized[tflite::testing::broadcast_output_dims_count]; - uint8_t golden_quantized[tflite::testing::broadcast_output_dims_count]; - uint8_t output[tflite::testing::broadcast_output_dims_count]; float output_float[tflite::testing::broadcast_output_dims_count]; for (int i = 0; i < tflite::testing::broadcast_num_shapes; ++i) { @@ -491,7 +485,6 @@ TF_LITE_MICRO_TEST(QuantizedAddWithMixedBroadcastUint8) { uint8_t input2_quantized[tflite::testing::broadcast_output_dims_count]; uint8_t golden_quantized[tflite::testing::broadcast_output_dims_count]; uint8_t output[tflite::testing::broadcast_output_dims_count]; - float output_float[tflite::testing::broadcast_output_dims_count]; for (int i = 0; i < tflite::testing::broadcast_num_shapes; ++i) { tflite::testing::TestAddQuantized( @@ -512,7 +505,6 @@ TF_LITE_MICRO_TEST(QuantizedAddWithMixedBroadcastInt8) { int8_t input2_quantized[tflite::testing::broadcast_output_dims_count]; int8_t golden_quantized[tflite::testing::broadcast_output_dims_count]; int8_t output[tflite::testing::broadcast_output_dims_count]; - float output_float[tflite::testing::broadcast_output_dims_count]; for (int i = 0; i < tflite::testing::broadcast_num_shapes; ++i) { tflite::testing::TestAddQuantized( diff --git a/tensorflow/lite/micro/kernels/comparisons_test.cc b/tensorflow/lite/micro/kernels/comparisons_test.cc index 64c39c5d2c7..c8a1e2646b3 100644 --- a/tensorflow/lite/micro/kernels/comparisons_test.cc +++ b/tensorflow/lite/micro/kernels/comparisons_test.cc @@ -132,7 +132,6 @@ void TestComparisonQuantizedUInt8(tflite::BuiltinOperator op, TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); - const int output_dims_count = ElementCount(*output_dims); TfLiteTensor tensors[tensors_size] = { CreateQuantizedTensor(input1_data, input1_quantized, input1_dims, @@ -156,7 +155,6 @@ void TestComparisonQuantizedInt8(tflite::BuiltinOperator op, TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data); TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data); TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); - const int output_dims_count = ElementCount(*output_dims); TfLiteTensor tensors[tensors_size] = { CreateQuantizedTensor(input1_data, input1_quantized, input1_dims, @@ -749,8 +747,6 @@ TF_LITE_MICRO_TEST(GreaterUInt8EqualQuantized) { const float input1_scale = 0.5; const int input1_zero_point = 128; - const float input2_scale = 0.25; - const int input2_zero_point = 125; uint8_t input1_quantized[4]; uint8_t input2_quantized[4]; @@ -774,8 +770,6 @@ TF_LITE_MICRO_TEST(LessQuantizedUInt8) { const float input1_scale = 0.5; const int input1_zero_point = 128; - const float input2_scale = 0.25; - const int input2_zero_point = 125; uint8_t input1_quantized[4]; uint8_t input2_quantized[4]; @@ -799,8 +793,6 @@ TF_LITE_MICRO_TEST(LessEqualQuantizedUInt8) { const float input1_scale = 0.5; const int input1_zero_point = 128; - const float input2_scale = 0.25; - const int input2_zero_point = 125; uint8_t input1_quantized[4]; uint8_t input2_quantized[4]; @@ -829,8 +821,6 @@ TF_LITE_MICRO_TEST(EqualQuantizedUInt8WithBroadcast) { const float input1_scale = 0.5; const int input1_zero_point = 128; - const float input2_scale = 0.25; - const int input2_zero_point = 125; uint8_t input1_quantized[6]; uint8_t input2_quantized[6]; @@ -860,8 +850,6 @@ TF_LITE_MICRO_TEST(NotEqualQuantizedUInt8WithBroadcast) { const float input1_scale = 0.5; const int input1_zero_point = 128; - const float input2_scale = 0.25; - const int input2_zero_point = 125; uint8_t input1_quantized[6]; uint8_t input2_quantized[6]; @@ -891,8 +879,6 @@ TF_LITE_MICRO_TEST(NotEqualQuantizedInt8WithBroadcast) { const float input1_scale = 0.5; const int input1_zero_point = -9; - const float input2_scale = 0.25; - const int input2_zero_point = 9; int8_t input1_quantized[6]; int8_t input2_quantized[6]; @@ -922,8 +908,6 @@ TF_LITE_MICRO_TEST(GreaterQuantizedUInt8WithBroadcast) { const float input1_scale = 0.5; const int input1_zero_point = 128; - const float input2_scale = 0.25; - const int input2_zero_point = 125; uint8_t input1_quantized[6]; uint8_t input2_quantized[6]; @@ -953,8 +937,6 @@ TF_LITE_MICRO_TEST(GreaterQuantizedInt8WithBroadcast) { const float input1_scale = 0.5; const int input1_zero_point = -9; - const float input2_scale = 0.25; - const int input2_zero_point = 9; int8_t input1_quantized[6]; int8_t input2_quantized[6]; @@ -984,8 +966,6 @@ TF_LITE_MICRO_TEST(GreaterEqualQuantizedUInt8WithBroadcast) { const float input1_scale = 0.5; const int input1_zero_point = 128; - const float input2_scale = 0.25; - const int input2_zero_point = 125; uint8_t input1_quantized[6]; uint8_t input2_quantized[6]; @@ -1015,8 +995,6 @@ TF_LITE_MICRO_TEST(GreaterEqualQuantizedInt8WithBroadcast) { const float input1_scale = 0.5; const int input1_zero_point = -9; - const float input2_scale = 0.25; - const int input2_zero_point = 9; int8_t input1_quantized[6]; int8_t input2_quantized[6]; @@ -1046,8 +1024,6 @@ TF_LITE_MICRO_TEST(LessQuantizedUInt8WithBroadcast) { const float input1_scale = 0.5; const int input1_zero_point = 128; - const float input2_scale = 0.25; - const int input2_zero_point = 125; uint8_t input1_quantized[6]; uint8_t input2_quantized[6]; @@ -1077,8 +1053,6 @@ TF_LITE_MICRO_TEST(LessQuantizedInt8WithBroadcast) { const float input1_scale = 0.5; const int input1_zero_point = -9; - const float input2_scale = 0.25; - const int input2_zero_point = 9; int8_t input1_quantized[6]; int8_t input2_quantized[6]; @@ -1108,8 +1082,6 @@ TF_LITE_MICRO_TEST(LessEqualQuantizedUInt8WithBroadcast) { const float input1_scale = 0.5; const int input1_zero_point = 128; - const float input2_scale = 0.25; - const int input2_zero_point = 125; uint8_t input1_quantized[6]; uint8_t input2_quantized[6]; @@ -1139,8 +1111,6 @@ TF_LITE_MICRO_TEST(LessEqualQuantizedInt8WithBroadcast) { const float input1_scale = 0.5; const int input1_zero_point = -9; - const float input2_scale = 0.25; - const int input2_zero_point = 9; int8_t input1_quantized[6]; int8_t input2_quantized[6]; diff --git a/tensorflow/lite/micro/kernels/conv_test.cc b/tensorflow/lite/micro/kernels/conv_test.cc index 686b3f98ff5..6343496dd5a 100644 --- a/tensorflow/lite/micro/kernels/conv_test.cc +++ b/tensorflow/lite/micro/kernels/conv_test.cc @@ -163,9 +163,9 @@ void TestConvQuantizedPerLayer( // TODO(njeff): Affine Quantization Params should be set on tensor creation. float filter_scales[] = {1, filter_scale}; int filter_zero_points[] = {1, 128}; - TfLiteAffineQuantization filter_quant = { - FloatArrayFromFloats(filter_scales), - IntArrayFromInts(filter_zero_points)}; + TfLiteAffineQuantization filter_quant = {FloatArrayFromFloats(filter_scales), + IntArrayFromInts(filter_zero_points), + 0}; tensors[1].quantization = {kTfLiteAffineQuantization, &filter_quant}; TF_LITE_MICRO_EXPECT_EQ( @@ -209,14 +209,15 @@ void TestConvQuantizedPerChannel( float input_scales[] = {1, input_scale}; int input_zero_points[] = {1, input_zero_point}; TfLiteAffineQuantization input_quant = {FloatArrayFromFloats(input_scales), - IntArrayFromInts(input_zero_points)}; + IntArrayFromInts(input_zero_points), + 0}; input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; float output_scales[] = {1, output_scale}; int output_zero_points[] = {1, output_zero_point}; - TfLiteAffineQuantization output_quant = { - FloatArrayFromFloats(output_scales), - IntArrayFromInts(output_zero_points)}; + TfLiteAffineQuantization output_quant = {FloatArrayFromFloats(output_scales), + IntArrayFromInts(output_zero_points), + 0}; output_tensor.quantization = {kTfLiteAffineQuantization, &output_quant}; constexpr int inputs_size = 3; @@ -401,9 +402,6 @@ TF_LITE_MICRO_TEST(SimpleTestDilatedQuantizedPerChannel) { } TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannelRelu6) { - // conv params: - // padding, stride_, dilation_, activation - TfLiteConvParams conv_params = {kTfLitePaddingValid, 1, 1, kTfLiteActRelu6}; const int output_dims_count = 12; int8_t output_data[output_dims_count]; @@ -565,7 +563,7 @@ TF_LITE_MICRO_TEST(FilterDimsNotMatchingAffineQuantization) { int input_zero_points[] = {1, 128}; TfLiteAffineQuantization input_quant = { tflite::testing::FloatArrayFromFloats(input_scales), - tflite::testing::IntArrayFromInts(input_zero_points)}; + tflite::testing::IntArrayFromInts(input_zero_points), 0}; input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; constexpr int inputs_size = 3; @@ -633,7 +631,7 @@ TF_LITE_MICRO_TEST(BroadcastPerLayerQuantizationToPerChannelShouldMatchGolden) { float input_scales[2] = {1, input_scale}; TfLiteAffineQuantization input_quant = { tflite::testing::FloatArrayFromFloats(input_scales), - tflite::testing::IntArrayFromInts(input_zero_points)}; + tflite::testing::IntArrayFromInts(input_zero_points), 0}; input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; // Create per-layer quantized int8 filter tensor. @@ -644,7 +642,7 @@ TF_LITE_MICRO_TEST(BroadcastPerLayerQuantizationToPerChannelShouldMatchGolden) { float filter_scales[2] = {1, filter_scale}; TfLiteAffineQuantization filter_quant = { tflite::testing::FloatArrayFromFloats(filter_scales), - tflite::testing::IntArrayFromInts(filter_zero_points)}; + tflite::testing::IntArrayFromInts(filter_zero_points), 0}; filter_tensor.quantization = {kTfLiteAffineQuantization, &filter_quant}; // Create per-layer quantized int32 bias tensor. @@ -658,7 +656,7 @@ TF_LITE_MICRO_TEST(BroadcastPerLayerQuantizationToPerChannelShouldMatchGolden) { float bias_scales[2] = {1, input_scale * filter_scale}; TfLiteAffineQuantization bias_quant = { tflite::testing::FloatArrayFromFloats(bias_scales), - tflite::testing::IntArrayFromInts(bias_zero_points)}; + tflite::testing::IntArrayFromInts(bias_zero_points), 0}; bias_tensor.quantization = {kTfLiteAffineQuantization, &bias_quant}; // Create per-layer quantized int8 output tensor. @@ -668,7 +666,7 @@ TF_LITE_MICRO_TEST(BroadcastPerLayerQuantizationToPerChannelShouldMatchGolden) { float output_scales[2] = {1, output_scale}; TfLiteAffineQuantization output_quant = { tflite::testing::FloatArrayFromFloats(output_scales), - tflite::testing::IntArrayFromInts(output_zero_points)}; + tflite::testing::IntArrayFromInts(output_zero_points), 0}; output_tensor.quantization = {kTfLiteAffineQuantization, &output_quant}; constexpr int inputs_size = 3; @@ -757,7 +755,7 @@ TF_LITE_MICRO_TEST(Int8Input32x1Filter32x32ShouldMatchGolden) { float input_scales[] = {1, input_scale}; TfLiteAffineQuantization input_quant = { tflite::testing::FloatArrayFromFloats(input_scales), - tflite::testing::IntArrayFromInts(input_zero_points)}; + tflite::testing::IntArrayFromInts(input_zero_points), 0}; input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; // Create per-tensor quantized int8 filter tensor. @@ -770,7 +768,7 @@ TF_LITE_MICRO_TEST(Int8Input32x1Filter32x32ShouldMatchGolden) { float filter_scales[] = {1, filter_scale}; TfLiteAffineQuantization filter_quant = { tflite::testing::FloatArrayFromFloats(filter_scales), - tflite::testing::IntArrayFromInts(filter_zero_points)}; + tflite::testing::IntArrayFromInts(filter_zero_points), 0}; filter_tensor.quantization = {kTfLiteAffineQuantization, &filter_quant}; // Create per-tensor quantized int32 bias tensor. @@ -786,7 +784,7 @@ TF_LITE_MICRO_TEST(Int8Input32x1Filter32x32ShouldMatchGolden) { float bias_scales[] = {1, input_scale * filter_scale}; TfLiteAffineQuantization bias_quant = { tflite::testing::FloatArrayFromFloats(bias_scales), - tflite::testing::IntArrayFromInts(bias_zero_points)}; + tflite::testing::IntArrayFromInts(bias_zero_points), 0}; bias_tensor.quantization = {kTfLiteAffineQuantization, &bias_quant}; // Create per-tensor quantized int8 output tensor. @@ -798,7 +796,7 @@ TF_LITE_MICRO_TEST(Int8Input32x1Filter32x32ShouldMatchGolden) { float output_scales[] = {1, output_scale}; TfLiteAffineQuantization output_quant = { tflite::testing::FloatArrayFromFloats(output_scales), - tflite::testing::IntArrayFromInts(output_zero_points)}; + tflite::testing::IntArrayFromInts(output_zero_points), 0}; output_tensor.quantization = {kTfLiteAffineQuantization, &output_quant}; // The 3 inputs include the input, filter and bias tensors. diff --git a/tensorflow/lite/micro/kernels/depthwise_conv_test.cc b/tensorflow/lite/micro/kernels/depthwise_conv_test.cc index cd62de0d17e..4b9ac7ee775 100644 --- a/tensorflow/lite/micro/kernels/depthwise_conv_test.cc +++ b/tensorflow/lite/micro/kernels/depthwise_conv_test.cc @@ -157,15 +157,15 @@ void TestDepthwiseConvQuantizedPerLayer( // TODO(njeff): Affine Quantization Params should be set on tensor creation. float filter_scales[] = {1, filter_scale}; int filter_zero_points[] = {1, 128}; - TfLiteAffineQuantization filter_quant = { - FloatArrayFromFloats(filter_scales), - IntArrayFromInts(filter_zero_points)}; + TfLiteAffineQuantization filter_quant = {FloatArrayFromFloats(filter_scales), + IntArrayFromInts(filter_zero_points), + 0}; tensors[1].quantization = {kTfLiteAffineQuantization, &filter_quant}; float bias_scales[] = {1, filter_scale * input_scale}; int bias_zero_points[] = {1, 128}; TfLiteAffineQuantization bias_quant = {FloatArrayFromFloats(bias_scales), - IntArrayFromInts(bias_zero_points)}; + IntArrayFromInts(bias_zero_points), 0}; tensors[2].quantization = {kTfLiteAffineQuantization, &bias_quant}; AsymmetricQuantize(golden, golden_quantized, output_dims_count, output_scale, @@ -213,14 +213,15 @@ void TestDepthwiseConvQuantizedPerChannel( float input_scales[] = {1, input_scale}; int input_zero_points[] = {1, input_zero_point}; TfLiteAffineQuantization input_quant = {FloatArrayFromFloats(input_scales), - IntArrayFromInts(input_zero_points)}; + IntArrayFromInts(input_zero_points), + 0}; input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; float output_scales[] = {1, output_scale}; int output_zero_points[] = {1, output_zero_point}; - TfLiteAffineQuantization output_quant = { - FloatArrayFromFloats(output_scales), - IntArrayFromInts(output_zero_points)}; + TfLiteAffineQuantization output_quant = {FloatArrayFromFloats(output_scales), + IntArrayFromInts(output_zero_points), + 0}; output_tensor.quantization = {kTfLiteAffineQuantization, &output_quant}; constexpr int inputs_size = 3; @@ -249,14 +250,11 @@ void TestDepthwiseConvQuantizedPerChannel( TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(SimpleTest) { - const int input_elements = 12; const int input_shape[] = {4, 1, 3, 2, 2}; const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; - const int filter_elements = 16; const int filter_shape[] = {4, 1, 2, 2, 4}; const float filter_values[] = {1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16}; - const int bias_elements = 4; const int bias_shape[] = {4, 1, 1, 1, 4}; const float bias_values[] = {1, 2, 3, 4}; const float golden[] = { @@ -367,16 +365,12 @@ TF_LITE_MICRO_TEST(SimpleTestDilatedQuantized) { } TF_LITE_MICRO_TEST(SimpleTestRelu) { - const int input_elements = 12; const int input_shape[] = {4, 1, 3, 2, 2}; const float input_values[] = {1, 2, 7, 8, 3, 4, 9, 10, 5, 6, 11, 12}; - const int filter_elements = 16; const int filter_shape[] = {4, 1, 2, 2, 4}; const float filter_values[] = {1, 2, 3, 4, -9, 10, -11, 12, 5, 6, 7, 8, 13, -14, 15, -16}; - const int bias_elements = 4; const int bias_shape[] = {4, 1, 1, 1, 4}; - const int output_elements = 8; const float bias_values[] = {1, 2, 3, 4}; const int output_shape[] = {4, 1, 2, 1, 4}; const int output_dims_count = 8; @@ -505,8 +499,6 @@ TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannel) { int8_t filter_quantized[filter_elements]; int32_t bias_quantized[bias_elements]; int8_t golden_quantized[output_elements]; - int zero_points[bias_elements + 1]; - float scales[bias_elements + 1]; TfLiteDepthwiseConvParams conv_params; conv_params.activation = kTfLiteActNone; @@ -550,8 +542,6 @@ TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannelDepthMultiplier1) { int8_t filter_quantized[filter_elements]; int32_t bias_quantized[bias_elements]; int8_t golden_quantized[output_elements]; - int zero_points[bias_elements + 1]; - float scales[bias_elements + 1]; TfLiteDepthwiseConvParams conv_params; conv_params.activation = kTfLiteActNone; @@ -583,7 +573,6 @@ TF_LITE_MICRO_TEST(TestQuantizedPerChannelDepthMultiplier1Relu6) { }; const int output_shape[] = {4, 1, 2, 1, 4}; int8_t output_data[output_elements]; - float output_float[output_elements]; const float input_scale = 0.023529f; const float output_scale = 0.023529f; @@ -594,8 +583,6 @@ TF_LITE_MICRO_TEST(TestQuantizedPerChannelDepthMultiplier1Relu6) { int8_t filter_quantized[filter_elements]; int32_t bias_quantized[bias_elements]; int8_t golden_quantized[output_elements]; - int zero_points[bias_elements + 1]; - float scales[bias_elements + 1]; TfLiteDepthwiseConvParams conv_params; conv_params.activation = kTfLiteActRelu6; @@ -640,8 +627,6 @@ TF_LITE_MICRO_TEST(SimpleTestDilatedQuantizedPerChannel) { int8_t filter_quantized[filter_elements]; int32_t bias_quantized[bias_elements]; int8_t golden_quantized[output_elements]; - int zero_points[bias_elements + 1]; - float scales[bias_elements + 1]; TfLiteDepthwiseConvParams conv_params; conv_params.activation = kTfLiteActNone; @@ -673,8 +658,6 @@ TF_LITE_MICRO_TEST(TestQuantizedPerChannelCompareWithFloat) { int8_t filter_quantized[filter_size]; int32_t bias_quantized[bias_size]; int8_t golden_quantized[output_size]; - int zero_points[bias_size + 1]; - float scales[bias_size + 1]; int8_t output_data[output_size]; float output_float[output_size]; @@ -707,7 +690,6 @@ TF_LITE_MICRO_TEST(FilterDimsNotMatchingAffineQuantization) { const int bias_shape[] = {4, 1, 1, 1, 4}; const float bias_data[] = {3, -2, 4, 6}; const int output_shape[] = {4, 1, 1, 2, 4}; - const float golden[] = {43, 48, 18, 22, 3, -4, -28, -36}; const int input_size = 12; const int filter_size = 16; @@ -720,7 +702,6 @@ TF_LITE_MICRO_TEST(FilterDimsNotMatchingAffineQuantization) { int zero_points[bias_size + 1]; float scales[bias_size + 1]; int8_t output_data[output_size]; - float output_float[output_size]; const float input_scale = 0.5; const float output_scale = 1.0; @@ -753,7 +734,7 @@ TF_LITE_MICRO_TEST(FilterDimsNotMatchingAffineQuantization) { int input_zero_points[] = {1, input_zero_point}; TfLiteAffineQuantization input_quant = { tflite::testing::FloatArrayFromFloats(input_scales), - tflite::testing::IntArrayFromInts(input_zero_points)}; + tflite::testing::IntArrayFromInts(input_zero_points), 0}; input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; constexpr int inputs_size = 3; @@ -829,7 +810,7 @@ TF_LITE_MICRO_TEST(PerChannelBroadcastQuantizationParams) { float input_scales[2] = {1, input_scale}; TfLiteAffineQuantization input_quant = { tflite::testing::FloatArrayFromFloats(input_scales), - tflite::testing::IntArrayFromInts(input_zero_points)}; + tflite::testing::IntArrayFromInts(input_zero_points), 0}; input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; // Create per-layer quantized int8 filter tensor. @@ -839,7 +820,7 @@ TF_LITE_MICRO_TEST(PerChannelBroadcastQuantizationParams) { float filter_scales[2] = {1, filter_scale}; TfLiteAffineQuantization filter_quant = { tflite::testing::FloatArrayFromFloats(filter_scales), - tflite::testing::IntArrayFromInts(filter_zero_points)}; + tflite::testing::IntArrayFromInts(filter_zero_points), 0}; filter_tensor.quantization = {kTfLiteAffineQuantization, &filter_quant}; // Create per-layer quantized int32 bias tensor. @@ -852,7 +833,7 @@ TF_LITE_MICRO_TEST(PerChannelBroadcastQuantizationParams) { float bias_scales[2] = {1, input_scale * filter_scale}; TfLiteAffineQuantization bias_quant = { tflite::testing::FloatArrayFromFloats(bias_scales), - tflite::testing::IntArrayFromInts(bias_zero_points)}; + tflite::testing::IntArrayFromInts(bias_zero_points), 0}; bias_tensor.quantization = {kTfLiteAffineQuantization, &bias_quant}; // Create per-layer quantized int8 output tensor. @@ -862,7 +843,7 @@ TF_LITE_MICRO_TEST(PerChannelBroadcastQuantizationParams) { float output_scales[2] = {1, output_scale}; TfLiteAffineQuantization output_quant = { tflite::testing::FloatArrayFromFloats(output_scales), - tflite::testing::IntArrayFromInts(output_zero_points)}; + tflite::testing::IntArrayFromInts(output_zero_points), 0}; output_tensor.quantization = {kTfLiteAffineQuantization, &output_quant}; constexpr int inputs_size = 3; @@ -967,7 +948,7 @@ TF_LITE_MICRO_TEST(Int8Input32x4Filter32x4ShouldMatchGolden) { float input_scales[] = {1, input_scale}; TfLiteAffineQuantization input_quant = { tflite::testing::FloatArrayFromFloats(input_scales), - tflite::testing::IntArrayFromInts(input_zero_points)}; + tflite::testing::IntArrayFromInts(input_zero_points), 0}; input_tensor.quantization = {kTfLiteAffineQuantization, &input_quant}; // Create per-tensor quantized int8 filter tensor. @@ -980,7 +961,7 @@ TF_LITE_MICRO_TEST(Int8Input32x4Filter32x4ShouldMatchGolden) { float filter_scales[] = {1, filter_scale}; TfLiteAffineQuantization filter_quant = { tflite::testing::FloatArrayFromFloats(filter_scales), - tflite::testing::IntArrayFromInts(filter_zero_points)}; + tflite::testing::IntArrayFromInts(filter_zero_points), 0}; filter_tensor.quantization = {kTfLiteAffineQuantization, &filter_quant}; // Create per-tensor quantized int32 bias tensor. @@ -997,7 +978,7 @@ TF_LITE_MICRO_TEST(Int8Input32x4Filter32x4ShouldMatchGolden) { float bias_scales[] = {1, input_scale * filter_scale}; TfLiteAffineQuantization bias_quant = { tflite::testing::FloatArrayFromFloats(bias_scales), - tflite::testing::IntArrayFromInts(bias_zero_points)}; + tflite::testing::IntArrayFromInts(bias_zero_points), 0}; bias_tensor.quantization = {kTfLiteAffineQuantization, &bias_quant}; // Create per-tensor quantized int8 output tensor. @@ -1010,7 +991,7 @@ TF_LITE_MICRO_TEST(Int8Input32x4Filter32x4ShouldMatchGolden) { float output_scales[] = {1, output_scale}; TfLiteAffineQuantization output_quant = { tflite::testing::FloatArrayFromFloats(output_scales), - tflite::testing::IntArrayFromInts(output_zero_points)}; + tflite::testing::IntArrayFromInts(output_zero_points), 0}; output_tensor.quantization = {kTfLiteAffineQuantization, &output_quant}; // The 3 inputs include the input, filter and bias tensors. @@ -1035,7 +1016,7 @@ TF_LITE_MICRO_TEST(Int8Input32x4Filter32x4ShouldMatchGolden) { conv_params.activation = kTfLiteActNone; conv_params.dilation_width_factor = 1; conv_params.dilation_height_factor = 1; - TfLiteStatus status = tflite::testing::ValidateDepthwiseConvGoldens( + tflite::testing::ValidateDepthwiseConvGoldens( golden_quantized, output_elements, &conv_params, kQuantizationTolerance, kTensorsSize, tensors); } diff --git a/tensorflow/lite/micro/kernels/dequantize_test.cc b/tensorflow/lite/micro/kernels/dequantize_test.cc index 21b42aedc50..5eb3d80e41e 100644 --- a/tensorflow/lite/micro/kernels/dequantize_test.cc +++ b/tensorflow/lite/micro/kernels/dequantize_test.cc @@ -68,7 +68,7 @@ void ValidateDequantizeGoldens(TfLiteTensor* tensors, int tensors_size, } for (int i = 0; i < output_length; ++i) { - TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i], 0.001); + TF_LITE_MICRO_EXPECT_NEAR(expected_output_data[i], output_data[i], 0.001f); } } @@ -113,7 +113,6 @@ void TestDequantizeToInt32(const int* input_dims_data, const float* input_data, CreateInt32Tensor(output_data, output_dims), }; - TfLiteQuantizationParams output_quant; tensors[1].params.scale = output_scale; tensors[1].params.zero_point = output_zero_point; diff --git a/tensorflow/lite/micro/kernels/fully_connected_test.cc b/tensorflow/lite/micro/kernels/fully_connected_test.cc index 5723248a408..f977904a37c 100644 --- a/tensorflow/lite/micro/kernels/fully_connected_test.cc +++ b/tensorflow/lite/micro/kernels/fully_connected_test.cc @@ -59,9 +59,8 @@ TfLiteStatus TestFullyConnectedFloat( TF_LITE_MICRO_EXPECT_NE(nullptr, registration); TfLiteFullyConnectedParams builtin_data = { - activation, - kTfLiteFullyConnectedWeightsFormatDefault, - }; + activation, kTfLiteFullyConnectedWeightsFormatDefault, false, false}; + const char* init_data = reinterpret_cast(&builtin_data); size_t init_data_size = 0; void* user_data = nullptr; @@ -133,9 +132,7 @@ TfLiteStatus TestFullyConnectedQuantized( TF_LITE_MICRO_EXPECT_NE(nullptr, registration); TfLiteFullyConnectedParams builtin_data = { - activation, - kTfLiteFullyConnectedWeightsFormatDefault, - }; + activation, kTfLiteFullyConnectedWeightsFormatDefault, false, false}; const char* init_data = reinterpret_cast(&builtin_data); size_t init_data_size = 0; void* user_data = nullptr; diff --git a/tensorflow/lite/micro/kernels/hard_swish_test.cc b/tensorflow/lite/micro/kernels/hard_swish_test.cc index cfedd523512..50cafc9b5e5 100644 --- a/tensorflow/lite/micro/kernels/hard_swish_test.cc +++ b/tensorflow/lite/micro/kernels/hard_swish_test.cc @@ -151,10 +151,6 @@ void TestHardSwishQuantizedBias(const int size, const T* output_data, float output_max, float tolerated_bias, float* float_input_values, float* float_ref_output_values) { - const float quantized_type_range = - static_cast(std::numeric_limits::max()) - - static_cast(std::numeric_limits::min()); - const float input_scale = ScaleFromMinMax(input_min, input_max); const float output_scale = ScaleFromMinMax(output_min, output_max); @@ -188,13 +184,6 @@ void TestHardSwishQuantizedBias(const int size, const T* output_data, const int input_dims_data[] = {2, 1, size}; const int output_dims_data[] = {2, 1, size}; - // The numerical error for any 8bit quantized function is at least one half - // times the quantization step: 0.5 * (kOutMax - kOutMin) / 256. - // To that we add again the quantization step (kOutMax - kOutMin) / 256 - // to allow for an off-by-one rounding error. - const float kTolerance = - std::max(input_max - input_min, output_max - output_min) * (1.5f / 256.f); - TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data); TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data); const int output_elements_count = ElementCount(*output_dims); diff --git a/tensorflow/lite/micro/kernels/maximum_minimum_test.cc b/tensorflow/lite/micro/kernels/maximum_minimum_test.cc index 8635db3b60b..39b892a8212 100644 --- a/tensorflow/lite/micro/kernels/maximum_minimum_test.cc +++ b/tensorflow/lite/micro/kernels/maximum_minimum_test.cc @@ -74,7 +74,7 @@ void TestMaxMinFloat(tflite::BuiltinOperator op, for (int i = 0; i < output_dims_count; ++i) { TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i], - 1e-5); + 1e-5f); } } diff --git a/tensorflow/lite/micro/kernels/pooling_test.cc b/tensorflow/lite/micro/kernels/pooling_test.cc index d1f21da7533..23d4b506d8e 100644 --- a/tensorflow/lite/micro/kernels/pooling_test.cc +++ b/tensorflow/lite/micro/kernels/pooling_test.cc @@ -54,8 +54,13 @@ void TestAveragePoolingFloat(std::initializer_list input_dims_data, resolver.FindOp(tflite::BuiltinOperator_AVERAGE_POOL_2D); TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - TfLitePoolParams builtin_data = {padding, stride_width, stride_height, - filter_width, filter_height, activation}; + TfLitePoolParams builtin_data = {padding, + stride_width, + stride_height, + filter_width, + filter_height, + activation, + {}}; const char* init_data = reinterpret_cast(&builtin_data); size_t init_data_size = 0; void* user_data = nullptr; @@ -122,8 +127,13 @@ void TestAveragePoolingQuantized( resolver.FindOp(tflite::BuiltinOperator_AVERAGE_POOL_2D); TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - TfLitePoolParams builtin_data = {padding, stride_width, stride_height, - filter_width, filter_height, activation}; + TfLitePoolParams builtin_data = {padding, + stride_width, + stride_height, + filter_width, + filter_height, + activation, + {}}; const char* init_data = reinterpret_cast(&builtin_data); size_t init_data_size = 0; void* user_data = nullptr; @@ -185,10 +195,13 @@ void TestMaxPoolFloat(std::initializer_list input_dims_data, resolver.FindOp(tflite::BuiltinOperator_MAX_POOL_2D); TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - TfLitePoolParams builtin_data = { - padding, stride_width, stride_height, - filter_width, filter_height, activation, - }; + TfLitePoolParams builtin_data = {padding, + stride_width, + stride_height, + filter_width, + filter_height, + activation, + {}}; const char* init_data = reinterpret_cast(&builtin_data); size_t init_data_size = 0; @@ -255,10 +268,13 @@ void TestMaxPoolQuantized(std::initializer_list input_dims_data, resolver.FindOp(tflite::BuiltinOperator_MAX_POOL_2D); TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - TfLitePoolParams builtin_data = { - padding, stride_width, stride_height, - filter_width, filter_height, activation, - }; + TfLitePoolParams builtin_data = {padding, + stride_width, + stride_height, + filter_width, + filter_height, + activation, + {}}; const char* init_data = reinterpret_cast(&builtin_data); size_t init_data_size = 0; diff --git a/tensorflow/lite/micro/kernels/prelu_test.cc b/tensorflow/lite/micro/kernels/prelu_test.cc index 4b4bfd12e60..ae5bacca988 100644 --- a/tensorflow/lite/micro/kernels/prelu_test.cc +++ b/tensorflow/lite/micro/kernels/prelu_test.cc @@ -170,8 +170,6 @@ TF_LITE_MICRO_TEST(QuantizedUint8PreluActivationsOpTest) { using tflite::testing::F2Q; const float kMin = -4; const float kMax = 127.f / 32.f; - const float kAlphaMin = -0.5f; - const float kAlphaMax = 0.5f; const int output_dims_count = 12; uint8_t output_data[output_dims_count]; tflite::testing::TestPreluQuantized( @@ -197,8 +195,6 @@ TF_LITE_MICRO_TEST(QuantizedInt8PreluActivationsOpTest) { using tflite::testing::F2QS; const float kMin = -1; const float kMax = 127.f / 128.f; - const float kAlphaMin = -0.5f; - const float kAlphaMax = 0.5f; const int output_dims_count = 12; int8_t output_data[output_dims_count]; tflite::testing::TestPreluQuantized( diff --git a/tensorflow/lite/micro/kernels/quantization_util_test.cc b/tensorflow/lite/micro/kernels/quantization_util_test.cc index e9b219128fe..5929f5fd7b5 100644 --- a/tensorflow/lite/micro/kernels/quantization_util_test.cc +++ b/tensorflow/lite/micro/kernels/quantization_util_test.cc @@ -27,40 +27,55 @@ void RunSafeCastTests() { const IntOut imin = std::numeric_limits::min(); const bool s = std::numeric_limits::is_signed; if (s) { - TF_LITE_MICRO_EXPECT_LT(imin, 0); + TF_LITE_MICRO_EXPECT_LT(static_cast(imin), 0); } else { - TF_LITE_MICRO_EXPECT_EQ(0, imin); + TF_LITE_MICRO_EXPECT_EQ(static_cast(0), imin); } // Some basic tests. - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(0.0)), 0); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(-0.0)), 0); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(0.99)), 0); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(1.0)), 1); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(1.01)), 1); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(1.99)), 1); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(2.0)), 2); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(2.01)), 2); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(-0.99)), 0); + TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(0.0)), + static_cast(0)); + TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(-0.0)), + static_cast(0)); + TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(0.99)), + static_cast(0)); + TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(1.0)), + static_cast(1)); + TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(1.01)), + static_cast(1)); + TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(1.99)), + static_cast(1)); + TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(2.0)), + static_cast(2)); + TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(2.01)), + static_cast(2)); + TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(-0.99)), + static_cast(0)); TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(-1.0)), - s ? -1 : 0); + s ? static_cast(-1) : static_cast(0)); TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(-1.01)), - s ? -1 : 0); + s ? static_cast(-1) : static_cast(0)); TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(-1.99)), - s ? -1 : 0); + s ? static_cast(-1) : static_cast(0)); TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(-2.0)), - s ? -2 : 0); + s ? static_cast(-2) : static_cast(0)); TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(-2.01)), - s ? -2 : 0); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(117.9)), 117); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(118.0)), 118); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(118.1)), 118); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(-117.9)), - s ? -117 : 0); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(-118.0)), - s ? -118 : 0); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(-118.1)), - s ? -118 : 0); + s ? static_cast(-2) : static_cast(0)); + TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(117.9)), + static_cast(117)); + TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(118.0)), + static_cast(118)); + TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(118.1)), + static_cast(118)); + TF_LITE_MICRO_EXPECT_EQ( + SafeCast(static_cast(-117.9)), + s ? static_cast(-117) : static_cast(0)); + TF_LITE_MICRO_EXPECT_EQ( + SafeCast(static_cast(-118.0)), + s ? static_cast(-118) : static_cast(0)); + TF_LITE_MICRO_EXPECT_EQ( + SafeCast(static_cast(-118.1)), + s ? static_cast(-118) : static_cast(0)); // Some edge cases. TF_LITE_MICRO_EXPECT_EQ(SafeCast(std::numeric_limits::max()), @@ -72,52 +87,66 @@ void RunSafeCastTests() { TF_LITE_MICRO_EXPECT_EQ( SafeCast(-std::numeric_limits::infinity()), imin); TF_LITE_MICRO_EXPECT_EQ( - SafeCast(std::numeric_limits::quiet_NaN()), 0); + SafeCast(std::numeric_limits::quiet_NaN()), + static_cast(0)); // Some larger numbers. - if (sizeof(IntOut) >= 4 && sizeof(FloatIn) > 4) { + if (sizeof(IntOut) >= static_cast(4) && + sizeof(FloatIn) > static_cast(4)) { TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast(0x76543210)), - 0x76543210); + static_cast(0x76543210)); } if (sizeof(FloatIn) > sizeof(IntOut)) { // Check values near imax. - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast( - static_cast(imax) + 0.1)), - imax); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast( - static_cast(imax) + 0.99)), - imax); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast( - static_cast(imax) + 1.0)), - imax); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast( - static_cast(imax) + 1.99)), - imax); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast( - static_cast(imax) + 2.0)), - imax); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast( - static_cast(imax) - 0.1)), - imax - 1); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast( - static_cast(imax) - 0.99)), - imax - 1); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast( - static_cast(imax) - 1.0)), - imax - 1); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast( - static_cast(imax) - 1.01)), - imax - 2); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast( - static_cast(imax) - 1.99)), - imax - 2); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast( - static_cast(imax) - 2.0)), - imax - 2); - TF_LITE_MICRO_EXPECT_EQ(SafeCast(static_cast( - static_cast(imax) - 2.01)), - imax - 3); + TF_LITE_MICRO_EXPECT_EQ( + SafeCast(static_cast(static_cast(imax) + + static_cast(0.1))), + imax); + TF_LITE_MICRO_EXPECT_EQ( + SafeCast(static_cast(static_cast(imax) + + static_cast(0.99))), + imax); + TF_LITE_MICRO_EXPECT_EQ( + SafeCast(static_cast(static_cast(imax) + + static_cast(1.0))), + imax); + TF_LITE_MICRO_EXPECT_EQ( + SafeCast(static_cast(static_cast(imax) + + static_cast(1.99))), + imax); + TF_LITE_MICRO_EXPECT_EQ( + SafeCast(static_cast(static_cast(imax) + + static_cast(2.0))), + imax); + TF_LITE_MICRO_EXPECT_EQ( + SafeCast(static_cast(static_cast(imax) - + static_cast(0.1))), + imax - 1); + TF_LITE_MICRO_EXPECT_EQ( + SafeCast(static_cast(static_cast(imax) - + static_cast(0.99))), + imax - 1); + TF_LITE_MICRO_EXPECT_EQ( + SafeCast(static_cast(static_cast(imax) - + static_cast(1.0))), + imax - 1); + TF_LITE_MICRO_EXPECT_EQ( + SafeCast(static_cast(static_cast(imax) - + static_cast(1.01))), + imax - 2); + TF_LITE_MICRO_EXPECT_EQ( + SafeCast(static_cast(static_cast(imax) - + static_cast(1.99))), + imax - 2); + TF_LITE_MICRO_EXPECT_EQ( + SafeCast(static_cast(static_cast(imax) - + static_cast(2.0))), + imax - 2); + TF_LITE_MICRO_EXPECT_EQ( + SafeCast(static_cast(static_cast(imax) - + static_cast(2.01))), + imax - 3); } // Check values considerably larger in magnitude than imin and imax @@ -210,30 +239,30 @@ TF_LITE_MICRO_TEST(QuantizationUtilTest_IntegerFrExp) { TF_LITE_MICRO_EXPECT_EQ(0, shift); result = tflite::IntegerFrExp(1.0, &shift); - TF_LITE_MICRO_EXPECT_NEAR(0x40000000, result, 1); + TF_LITE_MICRO_EXPECT_NEAR(0x40000000, result, 1ll); TF_LITE_MICRO_EXPECT_EQ(1, shift); result = tflite::IntegerFrExp(0.25, &shift); - TF_LITE_MICRO_EXPECT_NEAR(0x40000000, result, 1); + TF_LITE_MICRO_EXPECT_NEAR(0x40000000, result, 1ll); TF_LITE_MICRO_EXPECT_EQ(-1, shift); result = tflite::IntegerFrExp(-1.0, &shift); - TF_LITE_MICRO_EXPECT_NEAR(-(1 << 30), result, 1); + TF_LITE_MICRO_EXPECT_NEAR(-(1 << 30), result, 1ll); TF_LITE_MICRO_EXPECT_EQ(1, shift); result = tflite::IntegerFrExp(123.45, &shift); - TF_LITE_MICRO_EXPECT_NEAR(2071147315, result, 1); + TF_LITE_MICRO_EXPECT_NEAR(2071147315, result, 1ll); TF_LITE_MICRO_EXPECT_EQ(7, shift); - result = tflite::IntegerFrExp(NAN, &shift); + result = tflite::IntegerFrExp(static_cast(NAN), &shift); TF_LITE_MICRO_EXPECT_NEAR(0, result, 1); TF_LITE_MICRO_EXPECT_EQ(0x7fffffff, shift); - result = tflite::IntegerFrExp(INFINITY, &shift); + result = tflite::IntegerFrExp(static_cast(INFINITY), &shift); TF_LITE_MICRO_EXPECT_NEAR(std::numeric_limits::max(), result, 1); TF_LITE_MICRO_EXPECT_EQ(0x7fffffff, shift); - result = tflite::IntegerFrExp(-INFINITY, &shift); + result = tflite::IntegerFrExp(-static_cast(INFINITY), &shift); TF_LITE_MICRO_EXPECT_NEAR(std::numeric_limits::min(), result, 1); TF_LITE_MICRO_EXPECT_EQ(0x7fffffff, shift); } @@ -301,11 +330,11 @@ TF_LITE_MICRO_TEST(QuantizationUtilTest_DoubleFromFractionAndShift) { result = tflite::DoubleFromFractionAndShift(fraction, shift); TF_LITE_MICRO_EXPECT_NEAR(-23.232323, result, 1e-5); - fraction = tflite::IntegerFrExp(NAN, &shift); + fraction = tflite::IntegerFrExp(static_cast(NAN), &shift); result = tflite::DoubleFromFractionAndShift(fraction, shift); TF_LITE_MICRO_EXPECT_TRUE(std::isnan(result)); - fraction = tflite::IntegerFrExp(INFINITY, &shift); + fraction = tflite::IntegerFrExp(static_cast(INFINITY), &shift); result = tflite::DoubleFromFractionAndShift(fraction, shift); TF_LITE_MICRO_EXPECT_FALSE(std::isfinite(result)); } @@ -326,10 +355,10 @@ TF_LITE_MICRO_TEST(QuantizationUtilTest_IntegerDoubleMultiply) { 1e-5); TF_LITE_MICRO_EXPECT_NEAR( 15000000.0, tflite::IntegerDoubleMultiply(3000.0, 5000.0), 1e-5); - TF_LITE_MICRO_EXPECT_TRUE( - std::isnan(tflite::IntegerDoubleMultiply(NAN, 5000.0))); - TF_LITE_MICRO_EXPECT_TRUE( - std::isnan(tflite::IntegerDoubleMultiply(3000.0, NAN))); + TF_LITE_MICRO_EXPECT_TRUE(std::isnan( + tflite::IntegerDoubleMultiply(static_cast(NAN), 5000.0))); + TF_LITE_MICRO_EXPECT_TRUE(std::isnan( + tflite::IntegerDoubleMultiply(3000.0, static_cast(NAN)))); } TF_LITE_MICRO_TEST(QuantizationUtilTest_IntegerDoubleCompare) { @@ -339,8 +368,12 @@ TF_LITE_MICRO_TEST(QuantizationUtilTest_IntegerDoubleCompare) { TF_LITE_MICRO_EXPECT_EQ(0, tflite::IntegerDoubleCompare(0.0, 0.0)); TF_LITE_MICRO_EXPECT_EQ(-1, tflite::IntegerDoubleCompare(-10.0, 10.0)); TF_LITE_MICRO_EXPECT_EQ(1, tflite::IntegerDoubleCompare(123.45, 10.0)); - TF_LITE_MICRO_EXPECT_EQ(1, tflite::IntegerDoubleCompare(NAN, INFINITY)); - TF_LITE_MICRO_EXPECT_EQ(1, tflite::IntegerDoubleCompare(INFINITY, NAN)); + TF_LITE_MICRO_EXPECT_EQ( + 1, tflite::IntegerDoubleCompare(static_cast(NAN), + static_cast(INFINITY))); + TF_LITE_MICRO_EXPECT_EQ( + 1, tflite::IntegerDoubleCompare(static_cast(INFINITY), + static_cast(NAN))); } TF_LITE_MICRO_TEST(QuantizationUtilTest_PreprocessSoftmaxScaling) { diff --git a/tensorflow/lite/micro/kernels/reduce_test.cc b/tensorflow/lite/micro/kernels/reduce_test.cc index 928dda287aa..65f32efaf5d 100644 --- a/tensorflow/lite/micro/kernels/reduce_test.cc +++ b/tensorflow/lite/micro/kernels/reduce_test.cc @@ -38,10 +38,6 @@ static const int kOutputElements = 4; static const int kOutputShape[] = {4, 2, 1, 1, 2}; static const float kGoldenData[] = {6, 7, 18, 19}; -static TfLiteReducerParams params = { - true // keep_dims -}; - template TfLiteStatus ValidateReduceGoldens(TfLiteTensor* tensors, int tensors_size, const T* expected_output_data, @@ -135,11 +131,15 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(MeanFloat4DKeepDims) { float output_data[tflite::testing::kOutputElements]; + TfLiteReducerParams params = { + true // keep_dims + }; + tflite::testing::TestMeanFloatInput4D( tflite::testing::kInputShape4D, tflite::testing::kInputData4D, tflite::testing::kAxisShape, tflite::testing::kAxisData, tflite::testing::kOutputShape, tflite::testing::kGoldenData, output_data, - &tflite::testing::params); + ¶ms); } TF_LITE_MICRO_TEST(MeanFloat4DWithoutKeepDims) { diff --git a/tensorflow/lite/micro/kernels/reshape_test.cc b/tensorflow/lite/micro/kernels/reshape_test.cc index 5913c7f86bb..2c84ac1ff04 100644 --- a/tensorflow/lite/micro/kernels/reshape_test.cc +++ b/tensorflow/lite/micro/kernels/reshape_test.cc @@ -46,8 +46,8 @@ void TestReshapeImpl(TfLiteContext* context, TfLiteNode* node, node->custom_initial_data = nullptr; node->custom_initial_data_size = 0; - TF_LITE_MICRO_EXPECT_EQ(registration->init, nullptr); - TF_LITE_MICRO_EXPECT_EQ(registration->free, nullptr); + TF_LITE_MICRO_EXPECT(registration->init == nullptr); + TF_LITE_MICRO_EXPECT(registration->free == nullptr); if (registration->prepare) { // Error can happen either in Prepare or eval stage. @@ -64,14 +64,14 @@ void TestReshapeImpl(TfLiteContext* context, TfLiteNode* node, } TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(context, node)); - const int output_dims_count = ElementCount(*output_tensor->dims); const T* output_data = GetTensorData(output_tensor); - for (int i = 0; i < expected_output.size(); ++i) { + for (size_t i = 0; i < expected_output.size(); ++i) { TF_LITE_MICRO_EXPECT_NEAR(expected_output.begin()[i], output_data[i], 1e-5f); } - TF_LITE_MICRO_EXPECT_EQ(expected_dims.size(), output_tensor->dims->size); - for (int i = 0; i < expected_dims.size(); ++i) { + TF_LITE_MICRO_EXPECT_EQ(expected_dims.size(), + static_cast(output_tensor->dims->size)); + for (size_t i = 0; i < expected_dims.size(); ++i) { TF_LITE_MICRO_EXPECT_NEAR(expected_dims.begin()[i], output_tensor->dims->data[i], 1e-5f); } diff --git a/tensorflow/lite/micro/kernels/strided_slice_test.cc b/tensorflow/lite/micro/kernels/strided_slice_test.cc index 6ef162aea3d..4387e4bdde3 100644 --- a/tensorflow/lite/micro/kernels/strided_slice_test.cc +++ b/tensorflow/lite/micro/kernels/strided_slice_test.cc @@ -124,8 +124,7 @@ void TestStrideSlide(std::initializer_list input_shape, if (registration->free) { registration->free(&context, user_data); } - auto* output_tensor = &context.tensors[node.outputs->data[0]]; - for (int i = 0; i < expected_output.size(); ++i) { + for (size_t i = 0; i < expected_output.size(); ++i) { TF_LITE_MICRO_EXPECT_NEAR(expected_output.begin()[i], output_data[i], 1e-5f); } diff --git a/tensorflow/lite/micro/kernels/sub_test.cc b/tensorflow/lite/micro/kernels/sub_test.cc index 169f3ad9568..9c8d476352e 100644 --- a/tensorflow/lite/micro/kernels/sub_test.cc +++ b/tensorflow/lite/micro/kernels/sub_test.cc @@ -431,12 +431,6 @@ TF_LITE_MICRO_TEST(QuantizedSubWithScalarBroadcastUint8) { } } TF_LITE_MICRO_TEST(QuantizedSubWithScalarBroadcastFloat) { - const float scales[] = {0.1, 0.05, 0.1}; - const int zero_points[] = {127, 131, 139}; - uint8_t input1_quantized[tflite::testing::broadcast_output_dims_count]; - uint8_t input2_quantized[tflite::testing::broadcast_output_dims_count]; - uint8_t golden_quantized[tflite::testing::broadcast_output_dims_count]; - uint8_t output[tflite::testing::broadcast_output_dims_count]; float output_float[tflite::testing::broadcast_output_dims_count]; for (int i = 0; i < tflite::testing::broadcast_num_shapes; ++i) { @@ -491,7 +485,6 @@ TF_LITE_MICRO_TEST(QuantizedSubWithMixedBroadcastUint8) { uint8_t input2_quantized[tflite::testing::broadcast_output_dims_count]; uint8_t golden_quantized[tflite::testing::broadcast_output_dims_count]; uint8_t output[tflite::testing::broadcast_output_dims_count]; - float output_float[tflite::testing::broadcast_output_dims_count]; for (int i = 0; i < tflite::testing::broadcast_num_shapes; ++i) { tflite::testing::TestSubQuantized( @@ -512,7 +505,6 @@ TF_LITE_MICRO_TEST(QuantizedSubWithMixedBroadcastInt8) { int8_t input2_quantized[tflite::testing::broadcast_output_dims_count]; int8_t golden_quantized[tflite::testing::broadcast_output_dims_count]; int8_t output[tflite::testing::broadcast_output_dims_count]; - float output_float[tflite::testing::broadcast_output_dims_count]; for (int i = 0; i < tflite::testing::broadcast_num_shapes; ++i) { tflite::testing::TestSubQuantized( diff --git a/tensorflow/lite/micro/kernels/svdf_test.cc b/tensorflow/lite/micro/kernels/svdf_test.cc index ea129efaaa8..fc0a91481fb 100644 --- a/tensorflow/lite/micro/kernels/svdf_test.cc +++ b/tensorflow/lite/micro/kernels/svdf_test.cc @@ -409,34 +409,35 @@ inline void TestIntegerSVDF( // Input quant params: float input_scales[] = {1, input_scale}; TfLiteAffineQuantization input_quant = {FloatArrayFromFloats(input_scales), - IntArrayFromInts(zero_points)}; + IntArrayFromInts(zero_points), 0}; tensors[0].quantization = {kTfLiteAffineQuantization, &input_quant}; // Weights features quant params: float weights_features_scales[] = {1, weights_feature_scale}; TfLiteAffineQuantization weights_feature_quant = { FloatArrayFromFloats(weights_features_scales), - IntArrayFromInts(zero_points)}; + IntArrayFromInts(zero_points), 0}; tensors[1].quantization = {kTfLiteAffineQuantization, &weights_feature_quant}; // Weights time quant params: float weights_time_scales[] = {1, weights_time_scale}; TfLiteAffineQuantization weights_time_quant = { - FloatArrayFromFloats(weights_time_scales), IntArrayFromInts(zero_points)}; + FloatArrayFromFloats(weights_time_scales), IntArrayFromInts(zero_points), + 0}; tensors[2].quantization = {kTfLiteAffineQuantization, &weights_time_quant}; // Activation state quant params: float activation_state_scales[] = {1, activation_scale}; TfLiteAffineQuantization activation_state_quant = { FloatArrayFromFloats(activation_state_scales), - IntArrayFromInts(zero_points)}; + IntArrayFromInts(zero_points), 0}; tensors[4].quantization = {kTfLiteAffineQuantization, &activation_state_quant}; // Output quant params: float output_scales[] = {1, output_scale}; TfLiteAffineQuantization output_quant = {FloatArrayFromFloats(output_scales), - IntArrayFromInts(zero_points)}; + IntArrayFromInts(zero_points), 0}; tensors[5].quantization = {kTfLiteAffineQuantization, &output_quant}; ValidateIntegerSVDFGoldens( @@ -627,7 +628,6 @@ TF_LITE_MICRO_TEST(SvdfIntegerInputSize2Rank1ShouldMatchGolden) { int8_t weights_feature_data[] = {-81, -92, 2, 96, 57, 32, 71, 70, 100, -92, -17, -27}; - const int weights_feature_dims_count = num_filters * input_size; int16_t weights_time_data[] = { -10464, 12324, 9142, -11842, -11836, 7273, 9029, -2175, 260, 4067, @@ -635,7 +635,6 @@ TF_LITE_MICRO_TEST(SvdfIntegerInputSize2Rank1ShouldMatchGolden) { -12098, 12461, -7072, 8870, 7739, 11447, 5954, 11765, -5733, 10643, -3534, 8912, 4693, -7761, -8886, -519, -4898, 5067, 3205, -1107, }; - const int weights_time_dims_count = num_filters * memory_size; int32_t bias_data[] = {-409707, 641518, 1662434, -113372}; @@ -669,12 +668,6 @@ TF_LITE_MICRO_TEST(SvdfIntegerInputSize2Rank1ShouldMatchGolden) { batch_size * memory_size * num_filters; int16_t activation_state_data[activation_state_dims_count]; - const int scratch_dims_count = batch_size * num_filters; - int32_t scratch_data[scratch_dims_count]; - - const int scratch_output_dims_count = batch_size * num_units; - int32_t scratch_output_data[scratch_output_dims_count]; - const int output_dims_count = batch_size * num_units; int8_t output_data[output_dims_count]; diff --git a/tensorflow/lite/micro/kernels/tanh_test.cc b/tensorflow/lite/micro/kernels/tanh_test.cc index 54c9816c9a9..4ad51a189ec 100644 --- a/tensorflow/lite/micro/kernels/tanh_test.cc +++ b/tensorflow/lite/micro/kernels/tanh_test.cc @@ -217,7 +217,7 @@ TF_LITE_MICRO_TEST(SimpleTestTanhUInt8) { const float input_scale = 16 / 256.f; const int input_zero_point = 128; - const float output_scale = 1.99999955 / 256.f; + const float output_scale = 1.99999955f / 256.f; const int output_zero_point = 128; const int input_shape[] = {2, 1, tanh_vec_size}; @@ -245,7 +245,7 @@ TF_LITE_MICRO_TEST(SimpleTestTanhUInt8) { const float input_scale = 16 / 256.f; const int input_zero_point = 0; - const float output_scale = 1.99999955 / 256.f; + const float output_scale = 1.99999955f / 256.f; const int output_zero_point = 0; const int input_shape[] = {2, 1, tanh_vec_size}; diff --git a/tensorflow/lite/micro/memory_helpers_test.cc b/tensorflow/lite/micro/memory_helpers_test.cc index 82096c6890d..25ade769b01 100644 --- a/tensorflow/lite/micro/memory_helpers_test.cc +++ b/tensorflow/lite/micro/memory_helpers_test.cc @@ -33,78 +33,78 @@ TF_LITE_MICRO_TEST(TestAlignPointerUp) { uint8_t* input0 = reinterpret_cast(0); uint8_t* input0_aligned1 = tflite::AlignPointerUp(input0, 1); - TF_LITE_MICRO_EXPECT_EQ(input0, input0_aligned1); + TF_LITE_MICRO_EXPECT(input0 == input0_aligned1); uint8_t* input0_aligned2 = tflite::AlignPointerUp(input0, 2); - TF_LITE_MICRO_EXPECT_EQ(input0, input0_aligned2); + TF_LITE_MICRO_EXPECT(input0 == input0_aligned2); uint8_t* input0_aligned3 = tflite::AlignPointerUp(input0, 3); - TF_LITE_MICRO_EXPECT_EQ(input0, input0_aligned3); + TF_LITE_MICRO_EXPECT(input0 == input0_aligned3); uint8_t* input0_aligned16 = tflite::AlignPointerUp(input0, 16); - TF_LITE_MICRO_EXPECT_EQ(input0, input0_aligned16); + TF_LITE_MICRO_EXPECT(input0 == input0_aligned16); uint8_t* input23 = reinterpret_cast(23); uint8_t* input23_aligned1 = tflite::AlignPointerUp(input23, 1); - TF_LITE_MICRO_EXPECT_EQ(input23, input23_aligned1); + TF_LITE_MICRO_EXPECT(input23 == input23_aligned1); uint8_t* input23_aligned2 = tflite::AlignPointerUp(input23, 2); uint8_t* expected23_aligned2 = reinterpret_cast(24); - TF_LITE_MICRO_EXPECT_EQ(expected23_aligned2, input23_aligned2); + TF_LITE_MICRO_EXPECT(expected23_aligned2 == input23_aligned2); uint8_t* input23_aligned3 = tflite::AlignPointerUp(input23, 3); uint8_t* expected23_aligned3 = reinterpret_cast(24); - TF_LITE_MICRO_EXPECT_EQ(expected23_aligned3, input23_aligned3); + TF_LITE_MICRO_EXPECT(expected23_aligned3 == input23_aligned3); uint8_t* input23_aligned16 = tflite::AlignPointerUp(input23, 16); uint8_t* expected23_aligned16 = reinterpret_cast(32); - TF_LITE_MICRO_EXPECT_EQ(expected23_aligned16, input23_aligned16); + TF_LITE_MICRO_EXPECT(expected23_aligned16 == input23_aligned16); } TF_LITE_MICRO_TEST(TestAlignPointerDown) { uint8_t* input0 = reinterpret_cast(0); uint8_t* input0_aligned1 = tflite::AlignPointerDown(input0, 1); - TF_LITE_MICRO_EXPECT_EQ(input0, input0_aligned1); + TF_LITE_MICRO_EXPECT(input0 == input0_aligned1); uint8_t* input0_aligned2 = tflite::AlignPointerDown(input0, 2); - TF_LITE_MICRO_EXPECT_EQ(input0, input0_aligned2); + TF_LITE_MICRO_EXPECT(input0 == input0_aligned2); uint8_t* input0_aligned3 = tflite::AlignPointerDown(input0, 3); - TF_LITE_MICRO_EXPECT_EQ(input0, input0_aligned3); + TF_LITE_MICRO_EXPECT(input0 == input0_aligned3); uint8_t* input0_aligned16 = tflite::AlignPointerDown(input0, 16); - TF_LITE_MICRO_EXPECT_EQ(input0, input0_aligned16); + TF_LITE_MICRO_EXPECT(input0 == input0_aligned16); uint8_t* input23 = reinterpret_cast(23); uint8_t* input23_aligned1 = tflite::AlignPointerDown(input23, 1); - TF_LITE_MICRO_EXPECT_EQ(input23, input23_aligned1); + TF_LITE_MICRO_EXPECT(input23 == input23_aligned1); uint8_t* input23_aligned2 = tflite::AlignPointerDown(input23, 2); uint8_t* expected23_aligned2 = reinterpret_cast(22); - TF_LITE_MICRO_EXPECT_EQ(expected23_aligned2, input23_aligned2); + TF_LITE_MICRO_EXPECT(expected23_aligned2 == input23_aligned2); uint8_t* input23_aligned3 = tflite::AlignPointerDown(input23, 3); uint8_t* expected23_aligned3 = reinterpret_cast(21); - TF_LITE_MICRO_EXPECT_EQ(expected23_aligned3, input23_aligned3); + TF_LITE_MICRO_EXPECT(expected23_aligned3 == input23_aligned3); uint8_t* input23_aligned16 = tflite::AlignPointerDown(input23, 16); uint8_t* expected23_aligned16 = reinterpret_cast(16); - TF_LITE_MICRO_EXPECT_EQ(expected23_aligned16, input23_aligned16); + TF_LITE_MICRO_EXPECT(expected23_aligned16 == input23_aligned16); } TF_LITE_MICRO_TEST(TestAlignSizeUp) { - TF_LITE_MICRO_EXPECT_EQ(1, tflite::AlignSizeUp(1, 1)); - TF_LITE_MICRO_EXPECT_EQ(2, tflite::AlignSizeUp(1, 2)); - TF_LITE_MICRO_EXPECT_EQ(3, tflite::AlignSizeUp(1, 3)); - TF_LITE_MICRO_EXPECT_EQ(16, tflite::AlignSizeUp(1, 16)); + TF_LITE_MICRO_EXPECT_EQ(static_cast(1), tflite::AlignSizeUp(1, 1)); + TF_LITE_MICRO_EXPECT_EQ(static_cast(2), tflite::AlignSizeUp(1, 2)); + TF_LITE_MICRO_EXPECT_EQ(static_cast(3), tflite::AlignSizeUp(1, 3)); + TF_LITE_MICRO_EXPECT_EQ(static_cast(16), tflite::AlignSizeUp(1, 16)); - TF_LITE_MICRO_EXPECT_EQ(23, tflite::AlignSizeUp(23, 1)); - TF_LITE_MICRO_EXPECT_EQ(24, tflite::AlignSizeUp(23, 2)); - TF_LITE_MICRO_EXPECT_EQ(24, tflite::AlignSizeUp(23, 3)); - TF_LITE_MICRO_EXPECT_EQ(32, tflite::AlignSizeUp(23, 16)); + TF_LITE_MICRO_EXPECT_EQ(static_cast(23), tflite::AlignSizeUp(23, 1)); + TF_LITE_MICRO_EXPECT_EQ(static_cast(24), tflite::AlignSizeUp(23, 2)); + TF_LITE_MICRO_EXPECT_EQ(static_cast(24), tflite::AlignSizeUp(23, 3)); + TF_LITE_MICRO_EXPECT_EQ(static_cast(32), tflite::AlignSizeUp(23, 16)); } TF_LITE_MICRO_TEST(TestTypeSizeOf) { @@ -157,16 +157,16 @@ TF_LITE_MICRO_TEST(TestBytesRequiredForTensor) { TF_LITE_MICRO_EXPECT_EQ( kTfLiteOk, tflite::BytesRequiredForTensor(*tensor100, &bytes, &type_size, micro_test::reporter)); - TF_LITE_MICRO_EXPECT_EQ(400, bytes); - TF_LITE_MICRO_EXPECT_EQ(4, type_size); + TF_LITE_MICRO_EXPECT_EQ(static_cast(400), bytes); + TF_LITE_MICRO_EXPECT_EQ(static_cast(4), type_size); const tflite::Tensor* tensor200 = tflite::testing::Create1dFlatbufferTensor(200); TF_LITE_MICRO_EXPECT_EQ( kTfLiteOk, tflite::BytesRequiredForTensor(*tensor200, &bytes, &type_size, micro_test::reporter)); - TF_LITE_MICRO_EXPECT_EQ(800, bytes); - TF_LITE_MICRO_EXPECT_EQ(4, type_size); + TF_LITE_MICRO_EXPECT_EQ(static_cast(800), bytes); + TF_LITE_MICRO_EXPECT_EQ(static_cast(4), type_size); } TF_LITE_MICRO_TEST(TestAllocateOutputDimensionsFromInput) { diff --git a/tensorflow/lite/micro/memory_planner/greedy_memory_planner_test.cc b/tensorflow/lite/micro/memory_planner/greedy_memory_planner_test.cc index 923013845fa..12e5b392cc5 100644 --- a/tensorflow/lite/micro/memory_planner/greedy_memory_planner_test.cc +++ b/tensorflow/lite/micro/memory_planner/greedy_memory_planner_test.cc @@ -32,7 +32,6 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestReverseSortInPlace) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; constexpr int a_size = 10; int a_values[a_size] = {10, 9, 8, 7, 6, 5, 4, 3, 2, 1}; @@ -92,179 +91,182 @@ TF_LITE_MICRO_TEST(TestReverseSortInPlace) { TF_LITE_MICRO_TEST(TestGreedyBasics) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; tflite::GreedyMemoryPlanner planner(g_scratch_buffer, kScratchBufferSize); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 10, 0, 1)); + planner.AddBuffer(µ_error_reporter, 10, 0, 1)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 20, 2, 3)); + planner.AddBuffer(µ_error_reporter, 20, 2, 3)); - TF_LITE_MICRO_EXPECT_EQ(false, planner.DoAnyBuffersOverlap(error_reporter)); + TF_LITE_MICRO_EXPECT_EQ(false, + planner.DoAnyBuffersOverlap(µ_error_reporter)); - TF_LITE_MICRO_EXPECT_EQ(20, planner.GetMaximumMemorySize()); + TF_LITE_MICRO_EXPECT_EQ(static_cast(20), + planner.GetMaximumMemorySize()); int offset = -1; TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, planner.GetOffsetForBuffer(error_reporter, 0, &offset)); + kTfLiteOk, planner.GetOffsetForBuffer(µ_error_reporter, 0, &offset)); TF_LITE_MICRO_EXPECT_EQ(0, offset); TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, planner.GetOffsetForBuffer(error_reporter, 1, &offset)); + kTfLiteOk, planner.GetOffsetForBuffer(µ_error_reporter, 1, &offset)); TF_LITE_MICRO_EXPECT_EQ(0, offset); } TF_LITE_MICRO_TEST(TestGreedyMedium) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; tflite::GreedyMemoryPlanner planner(g_scratch_buffer, kScratchBufferSize); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 10, 0, 1)); + planner.AddBuffer(µ_error_reporter, 10, 0, 1)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 20, 1, 2)); + planner.AddBuffer(µ_error_reporter, 20, 1, 2)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 30, 2, 3)); + planner.AddBuffer(µ_error_reporter, 30, 2, 3)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 40, 3, 4)); + planner.AddBuffer(µ_error_reporter, 40, 3, 4)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 50, 0, 1)); + planner.AddBuffer(µ_error_reporter, 50, 0, 1)); int offset = -1; TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, planner.GetOffsetForBuffer(error_reporter, 0, &offset)); + kTfLiteOk, planner.GetOffsetForBuffer(µ_error_reporter, 0, &offset)); TF_LITE_MICRO_EXPECT_EQ(50, offset); TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, planner.GetOffsetForBuffer(error_reporter, 1, &offset)); + kTfLiteOk, planner.GetOffsetForBuffer(µ_error_reporter, 1, &offset)); TF_LITE_MICRO_EXPECT_EQ(70, offset); TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, planner.GetOffsetForBuffer(error_reporter, 2, &offset)); + kTfLiteOk, planner.GetOffsetForBuffer(µ_error_reporter, 2, &offset)); TF_LITE_MICRO_EXPECT_EQ(40, offset); TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, planner.GetOffsetForBuffer(error_reporter, 3, &offset)); + kTfLiteOk, planner.GetOffsetForBuffer(µ_error_reporter, 3, &offset)); TF_LITE_MICRO_EXPECT_EQ(0, offset); TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, planner.GetOffsetForBuffer(error_reporter, 4, &offset)); + kTfLiteOk, planner.GetOffsetForBuffer(µ_error_reporter, 4, &offset)); TF_LITE_MICRO_EXPECT_EQ(0, offset); - planner.PrintMemoryPlan(error_reporter); + planner.PrintMemoryPlan(µ_error_reporter); - TF_LITE_MICRO_EXPECT_EQ(false, planner.DoAnyBuffersOverlap(error_reporter)); + TF_LITE_MICRO_EXPECT_EQ(false, + planner.DoAnyBuffersOverlap(µ_error_reporter)); - TF_LITE_MICRO_EXPECT_EQ(90, planner.GetMaximumMemorySize()); + TF_LITE_MICRO_EXPECT_EQ(static_cast(90), + planner.GetMaximumMemorySize()); } TF_LITE_MICRO_TEST(TestPersonDetectionModel) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; tflite::GreedyMemoryPlanner planner(g_scratch_buffer, kScratchBufferSize); // These buffer sizes and time ranges are taken from the 250KB MobileNet model // used in the person detection example. + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 9216, 0, 29)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 9216, 0, 29)); + planner.AddBuffer(µ_error_reporter, 3, 28, 29)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 256, 27, 28)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 2304, 26, 27)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 2304, 25, 26)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 2304, 24, 25)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 1152, 23, 24)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 22, 23)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 21, 22)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 20, 21)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 19, 20)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 18, 19)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 17, 18)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 16, 17)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 15, 16)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 14, 15)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 13, 14)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 12, 13)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 2304, 11, 12)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 9216, 10, 11)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 9216, 9, 10)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 3, 28, 29)); + planner.AddBuffer(µ_error_reporter, 9216, 8, 9)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 256, 27, 28)); + planner.AddBuffer(µ_error_reporter, 4608, 7, 8)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 18432, 6, 7)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 18432, 5, 6)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 18432, 4, 5)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 2304, 26, 27)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 2304, 25, 26)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 2304, 24, 25)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 1152, 23, 24)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 22, 23)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 21, 22)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 20, 21)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 19, 20)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 18, 19)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 17, 18)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 16, 17)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 15, 16)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 14, 15)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 13, 14)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 12, 13)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 2304, 11, 12)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 9216, 10, 11)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 9216, 9, 10)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 9216, 8, 9)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 7, 8)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 18432, 6, 7)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 18432, 5, 6)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 18432, 4, 5)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 9216, 3, 4)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 36864, 2, 3)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 18432, 1, 2)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 18432, 0, 1)); + planner.AddBuffer(µ_error_reporter, 9216, 3, 4)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 36864, 2, 3)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 18432, 1, 2)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 18432, 0, 1)); - planner.PrintMemoryPlan(error_reporter); + planner.PrintMemoryPlan(µ_error_reporter); - TF_LITE_MICRO_EXPECT_EQ(false, planner.DoAnyBuffersOverlap(error_reporter)); + TF_LITE_MICRO_EXPECT_EQ(false, + planner.DoAnyBuffersOverlap(µ_error_reporter)); // The sum of all the buffers is 241,027 bytes, so we at least expect the plan // to come up with something smaller than this. - TF_LITE_MICRO_EXPECT_GT(241027, planner.GetMaximumMemorySize()); + TF_LITE_MICRO_EXPECT_GT(static_cast(241027), + planner.GetMaximumMemorySize()); } TF_LITE_MICRO_TEST(TestOverlapCase) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; tflite::GreedyMemoryPlanner planner(g_scratch_buffer, kScratchBufferSize); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 100, 0, 1)); + planner.AddBuffer(µ_error_reporter, 100, 0, 1)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 50, 2, 3)); + planner.AddBuffer(µ_error_reporter, 50, 2, 3)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 20, 1, 2)); + planner.AddBuffer(µ_error_reporter, 20, 1, 2)); - planner.PrintMemoryPlan(error_reporter); + planner.PrintMemoryPlan(µ_error_reporter); - TF_LITE_MICRO_EXPECT_EQ(false, planner.DoAnyBuffersOverlap(error_reporter)); + TF_LITE_MICRO_EXPECT_EQ(false, + planner.DoAnyBuffersOverlap(µ_error_reporter)); - TF_LITE_MICRO_EXPECT_EQ(120, planner.GetMaximumMemorySize()); + TF_LITE_MICRO_EXPECT_EQ(static_cast(120), + planner.GetMaximumMemorySize()); } TF_LITE_MICRO_TEST(TestSmallScratch) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; constexpr int scratch_buffer_size = 40; unsigned char scratch_buffer[scratch_buffer_size]; tflite::GreedyMemoryPlanner planner(scratch_buffer, scratch_buffer_size); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 100, 0, 1)); + planner.AddBuffer(µ_error_reporter, 100, 0, 1)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteError, - planner.AddBuffer(error_reporter, 50, 2, 3)); + planner.AddBuffer(µ_error_reporter, 50, 2, 3)); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/memory_planner/linear_memory_planner_test.cc b/tensorflow/lite/micro/memory_planner/linear_memory_planner_test.cc index 61a914b5e91..f0b50383dfd 100644 --- a/tensorflow/lite/micro/memory_planner/linear_memory_planner_test.cc +++ b/tensorflow/lite/micro/memory_planner/linear_memory_planner_test.cc @@ -21,104 +21,103 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestBasics) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; tflite::LinearMemoryPlanner planner; TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 10, 0, 1)); + planner.AddBuffer(µ_error_reporter, 10, 0, 1)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 20, 1, 2)); - TF_LITE_MICRO_EXPECT_EQ(30, planner.GetMaximumMemorySize()); + planner.AddBuffer(µ_error_reporter, 20, 1, 2)); + TF_LITE_MICRO_EXPECT_EQ(static_cast(30), + planner.GetMaximumMemorySize()); int offset = -1; TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, planner.GetOffsetForBuffer(error_reporter, 0, &offset)); + kTfLiteOk, planner.GetOffsetForBuffer(µ_error_reporter, 0, &offset)); TF_LITE_MICRO_EXPECT_EQ(0, offset); TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, planner.GetOffsetForBuffer(error_reporter, 1, &offset)); + kTfLiteOk, planner.GetOffsetForBuffer(µ_error_reporter, 1, &offset)); TF_LITE_MICRO_EXPECT_EQ(10, offset); } TF_LITE_MICRO_TEST(TestErrorHandling) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; tflite::LinearMemoryPlanner planner; TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 10, 0, 1)); + planner.AddBuffer(µ_error_reporter, 10, 0, 1)); int offset = -1; - TF_LITE_MICRO_EXPECT_EQ( - kTfLiteError, planner.GetOffsetForBuffer(error_reporter, 1, &offset)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteError, planner.GetOffsetForBuffer( + µ_error_reporter, 1, &offset)); } TF_LITE_MICRO_TEST(TestPersonDetectionModel) { tflite::MicroErrorReporter micro_error_reporter; - tflite::ErrorReporter* error_reporter = µ_error_reporter; tflite::LinearMemoryPlanner planner; + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 9216, 0, 29)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 9216, 0, 29)); + planner.AddBuffer(µ_error_reporter, 3, 28, 29)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 256, 27, 28)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 2304, 26, 27)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 2304, 25, 26)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 2304, 24, 25)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 1152, 23, 24)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 22, 23)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 21, 22)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 20, 21)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 19, 20)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 18, 19)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 17, 18)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 16, 17)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 15, 16)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 14, 15)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 13, 14)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 4608, 12, 13)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 2304, 11, 12)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 9216, 10, 11)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 9216, 9, 10)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 3, 28, 29)); + planner.AddBuffer(µ_error_reporter, 9216, 8, 9)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 256, 27, 28)); + planner.AddBuffer(µ_error_reporter, 4608, 7, 8)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 18432, 6, 7)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 18432, 5, 6)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 18432, 4, 5)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 2304, 26, 27)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 2304, 25, 26)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 2304, 24, 25)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 1152, 23, 24)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 22, 23)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 21, 22)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 20, 21)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 19, 20)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 18, 19)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 17, 18)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 16, 17)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 15, 16)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 14, 15)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 13, 14)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 12, 13)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 2304, 11, 12)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 9216, 10, 11)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 9216, 9, 10)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 9216, 8, 9)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 4608, 7, 8)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 18432, 6, 7)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 18432, 5, 6)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 18432, 4, 5)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 9216, 3, 4)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 36864, 2, 3)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 18432, 1, 2)); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, - planner.AddBuffer(error_reporter, 18432, 0, 1)); - TF_LITE_MICRO_EXPECT_EQ(241027, planner.GetMaximumMemorySize()); + planner.AddBuffer(µ_error_reporter, 9216, 3, 4)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 36864, 2, 3)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 18432, 1, 2)); + TF_LITE_MICRO_EXPECT_EQ( + kTfLiteOk, planner.AddBuffer(µ_error_reporter, 18432, 0, 1)); + TF_LITE_MICRO_EXPECT_EQ(static_cast(241027), + planner.GetMaximumMemorySize()); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/micro_allocator_test.cc b/tensorflow/lite/micro/micro_allocator_test.cc index 6b63c8ceb4f..67da95c3b0a 100644 --- a/tensorflow/lite/micro/micro_allocator_test.cc +++ b/tensorflow/lite/micro/micro_allocator_test.cc @@ -32,9 +32,9 @@ void VerifyMockTensor(TfLiteTensor* tensor, bool is_variable = false) { TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->size); TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->data[0]); TF_LITE_MICRO_EXPECT_EQ(is_variable, tensor->is_variable); - TF_LITE_MICRO_EXPECT_EQ(4, tensor->bytes); + TF_LITE_MICRO_EXPECT_EQ(static_cast(4), tensor->bytes); TF_LITE_MICRO_EXPECT_NE(nullptr, tensor->data.raw); - TF_LITE_MICRO_EXPECT_EQ(0, + TF_LITE_MICRO_EXPECT_EQ(static_cast(0), (reinterpret_cast(tensor->data.raw) % kExpectedAlignment)); } @@ -43,14 +43,14 @@ void VerifyMockWeightTensor(TfLiteTensor* tensor) { TF_LITE_MICRO_EXPECT_EQ(kTfLiteUInt8, tensor->type); TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->size); TF_LITE_MICRO_EXPECT_EQ(1, tensor->dims->data[0]); - TF_LITE_MICRO_EXPECT_EQ(1, tensor->bytes); + TF_LITE_MICRO_EXPECT_EQ(static_cast(1), tensor->bytes); TF_LITE_MICRO_EXPECT_NE(nullptr, tensor->data.raw); } void EnsureUniqueVariableTensorBuffer(TfLiteContext* context, const int variable_tensor_idx) { - for (int i = 0; i < context->tensors_size; i++) { - if (i != variable_tensor_idx) { + for (size_t i = 0; i < context->tensors_size; i++) { + if (i != static_cast(variable_tensor_idx)) { TF_LITE_MICRO_EXPECT_NE(context->tensors[variable_tensor_idx].data.raw, context->tensors[i].data.raw); } @@ -73,8 +73,6 @@ void VerifyRegistrationAndNodeAllocation( TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestInitializeRuntimeTensor) { - const tflite::Model* model = tflite::testing::GetSimpleMockModel(); - TfLiteContext context; constexpr size_t arena_size = 1024; uint8_t arena[arena_size]; tflite::SimpleMemoryAllocator* simple_allocator = @@ -93,16 +91,14 @@ TF_LITE_MICRO_TEST(TestInitializeRuntimeTensor) { TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, allocated_tensor.type); TF_LITE_MICRO_EXPECT_EQ(1, allocated_tensor.dims->size); TF_LITE_MICRO_EXPECT_EQ(100, allocated_tensor.dims->data[0]); - TF_LITE_MICRO_EXPECT_EQ(400, allocated_tensor.bytes); - TF_LITE_MICRO_EXPECT_EQ(nullptr, allocated_tensor.data.i32); + TF_LITE_MICRO_EXPECT_EQ(static_cast(400), allocated_tensor.bytes); + TF_LITE_MICRO_EXPECT(nullptr == allocated_tensor.data.i32); TF_LITE_MICRO_EXPECT_EQ(kTfLiteArenaRw, allocated_tensor.allocation_type); simple_allocator->~SimpleMemoryAllocator(); } TF_LITE_MICRO_TEST(TestInitializeQuantizedTensor) { - const tflite::Model* model = tflite::testing::GetSimpleMockModel(); - TfLiteContext context; constexpr size_t arena_size = 1024; uint8_t arena[arena_size]; tflite::SimpleMemoryAllocator* simple_allocator = @@ -122,16 +118,14 @@ TF_LITE_MICRO_TEST(TestInitializeQuantizedTensor) { TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, allocated_tensor.type); TF_LITE_MICRO_EXPECT_EQ(1, allocated_tensor.dims->size); TF_LITE_MICRO_EXPECT_EQ(100, allocated_tensor.dims->data[0]); - TF_LITE_MICRO_EXPECT_EQ(400, allocated_tensor.bytes); - TF_LITE_MICRO_EXPECT_EQ(nullptr, allocated_tensor.data.i32); + TF_LITE_MICRO_EXPECT_EQ(static_cast(400), allocated_tensor.bytes); + TF_LITE_MICRO_EXPECT(nullptr == allocated_tensor.data.i32); TF_LITE_MICRO_EXPECT_EQ(kTfLiteArenaRw, allocated_tensor.allocation_type); simple_allocator->~SimpleMemoryAllocator(); } TF_LITE_MICRO_TEST(TestMissingQuantization) { - const tflite::Model* model = tflite::testing::GetSimpleMockModel(); - TfLiteContext context; constexpr size_t arena_size = 1024; uint8_t arena[arena_size]; tflite::SimpleMemoryAllocator* simple_allocator = @@ -151,8 +145,8 @@ TF_LITE_MICRO_TEST(TestMissingQuantization) { TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, allocated_tensor.type); TF_LITE_MICRO_EXPECT_EQ(1, allocated_tensor.dims->size); TF_LITE_MICRO_EXPECT_EQ(100, allocated_tensor.dims->data[0]); - TF_LITE_MICRO_EXPECT_EQ(400, allocated_tensor.bytes); - TF_LITE_MICRO_EXPECT_EQ(nullptr, allocated_tensor.data.i32); + TF_LITE_MICRO_EXPECT_EQ(static_cast(400), allocated_tensor.bytes); + TF_LITE_MICRO_EXPECT(nullptr == allocated_tensor.data.i32); } TF_LITE_MICRO_TEST(TestFailsWhenModelStartsTwice) { @@ -164,7 +158,7 @@ TF_LITE_MICRO_TEST(TestFailsWhenModelStartsTwice) { uint8_t arena[arena_size]; tflite::MicroAllocator* allocator = tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); - TF_LITE_MICRO_EXPECT_NE(nullptr, allocator); + TF_LITE_MICRO_EXPECT(nullptr != allocator); TF_LITE_MICRO_EXPECT_EQ( kTfLiteOk, allocator->StartModelAllocation(model, &context, op_resolver, &node_and_registration)); @@ -177,12 +171,11 @@ TF_LITE_MICRO_TEST(TestFailsWhenModelFinishesBeforeStart) { const tflite::Model* model = tflite::testing::GetSimpleMockModel(); TfLiteContext context; tflite::AllOpsResolver op_resolver = tflite::testing::GetOpResolver(); - tflite::NodeAndRegistration* node_and_registration; constexpr size_t arena_size = 1024; uint8_t arena[arena_size]; tflite::MicroAllocator* allocator = tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); - TF_LITE_MICRO_EXPECT_NE(nullptr, allocator); + TF_LITE_MICRO_EXPECT(nullptr != allocator); TF_LITE_MICRO_EXPECT_EQ(kTfLiteError, allocator->FinishModelAllocation(model, &context)); } @@ -196,14 +189,14 @@ TF_LITE_MICRO_TEST(TestMockModelAllocation) { uint8_t arena[arena_size]; tflite::MicroAllocator* allocator = tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); - TF_LITE_MICRO_EXPECT_NE(nullptr, allocator); + TF_LITE_MICRO_EXPECT(nullptr != allocator); TF_LITE_MICRO_EXPECT_EQ( kTfLiteOk, allocator->StartModelAllocation(model, &context, op_resolver, &node_and_registration)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocator->FinishModelAllocation(model, &context)); - TF_LITE_MICRO_EXPECT_EQ(4, context.tensors_size); + TF_LITE_MICRO_EXPECT_EQ(static_cast(4), context.tensors_size); // NOTE: Tensor indexes match the values in GetSimpleMockModel(). tflite::testing::VerifyMockTensor(&context.tensors[0]); @@ -251,7 +244,7 @@ TF_LITE_MICRO_TEST(TestAllocationForModelsWithBranches) { // t0 is the first tensor, so place it in offset 0. TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start); // bytes = 2 * 2 * 3 * sizeof(float32) = 48, same for other tensors. - TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[0].bytes); + TF_LITE_MICRO_EXPECT_EQ(static_cast(48), context.tensors[0].bytes); // t1 can't reuse any memory, as n0 requires both t0 and t1. TF_LITE_MICRO_EXPECT_EQ(96, context.tensors[1].data.uint8 - start); // t2 can't reuse any memory, as n1 requires both t0 and t2. Also n2 requires @@ -274,14 +267,14 @@ TF_LITE_MICRO_TEST(TestAllocationForComplexModelAllocation) { uint8_t arena[arena_size]; tflite::MicroAllocator* allocator = tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); - TF_LITE_MICRO_EXPECT_NE(nullptr, allocator); + TF_LITE_MICRO_EXPECT(nullptr != allocator); TF_LITE_MICRO_EXPECT_EQ( kTfLiteOk, allocator->StartModelAllocation(model, &context, op_resolver, &node_and_registration)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocator->FinishModelAllocation(model, &context)); - TF_LITE_MICRO_EXPECT_EQ(10, context.tensors_size); + TF_LITE_MICRO_EXPECT_EQ(static_cast(10), context.tensors_size); // NOTE: Tensor indexes match the values in GetComplexMockModel(). tflite::testing::VerifyMockTensor(&context.tensors[0]); @@ -356,7 +349,7 @@ TF_LITE_MICRO_TEST(OfflinePlannerBranchesAllOnline) { // the offsets be should identical to that test. uint8_t* start = context.tensors[0].data.uint8; TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[0].data.uint8 - start); - TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[0].bytes); + TF_LITE_MICRO_EXPECT_EQ(static_cast(48), context.tensors[0].bytes); TF_LITE_MICRO_EXPECT_EQ(96, context.tensors[1].data.uint8 - start); TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[2].data.uint8 - start); TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start); @@ -464,7 +457,7 @@ TF_LITE_MICRO_TEST(OfflinePlannerOverlappingAllocation) { TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[1].data.uint8 - start); TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[2].data.uint8 - start); TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start); - TF_LITE_MICRO_EXPECT_EQ(48, context.tensors[0].bytes); + TF_LITE_MICRO_EXPECT_EQ(static_cast(48), context.tensors[0].bytes); } TF_LITE_MICRO_TEST(OfflinePlannerOfflineOnline) { @@ -562,21 +555,21 @@ TF_LITE_MICRO_TEST(TestAllocateTfLiteTensorWithReset) { uint8_t arena[arena_size]; tflite::MicroAllocator* allocator = tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); - TF_LITE_MICRO_EXPECT_NE(allocator, nullptr); + TF_LITE_MICRO_EXPECT(allocator != nullptr); TfLiteTensor* tensor1 = allocator->AllocateTfLiteTensor(model, /*subgraph_idx=*/1); - TF_LITE_MICRO_EXPECT_NE(tensor1, nullptr); + TF_LITE_MICRO_EXPECT(tensor1 != nullptr); allocator->ResetTempAllocations(); TfLiteTensor* tensor2 = allocator->AllocateTfLiteTensor(model, /*subgraph_idx=*/2); - TF_LITE_MICRO_EXPECT_NE(tensor1, nullptr); + TF_LITE_MICRO_EXPECT(tensor1 != nullptr); // The address of tensor2 should be equal than the address of tensor1 since // allocations were not chained: - TF_LITE_MICRO_EXPECT_EQ(tensor2, tensor1); + TF_LITE_MICRO_EXPECT(tensor2 == tensor1); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/micro_interpreter_test.cc b/tensorflow/lite/micro/micro_interpreter_test.cc index a5be011e2f0..21c7e935f17 100644 --- a/tensorflow/lite/micro/micro_interpreter_test.cc +++ b/tensorflow/lite/micro/micro_interpreter_test.cc @@ -82,15 +82,15 @@ TF_LITE_MICRO_TEST(TestInterpreter) { micro_test::reporter); TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk); TF_LITE_MICRO_EXPECT_LE(interpreter.arena_used_bytes(), 928 + 100); - TF_LITE_MICRO_EXPECT_EQ(1, interpreter.inputs_size()); - TF_LITE_MICRO_EXPECT_EQ(2, interpreter.outputs_size()); + TF_LITE_MICRO_EXPECT_EQ(static_cast(1), interpreter.inputs_size()); + TF_LITE_MICRO_EXPECT_EQ(static_cast(2), interpreter.outputs_size()); TfLiteTensor* input = interpreter.input(0); TF_LITE_MICRO_EXPECT_NE(nullptr, input); TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, input->type); TF_LITE_MICRO_EXPECT_EQ(1, input->dims->size); TF_LITE_MICRO_EXPECT_EQ(1, input->dims->data[0]); - TF_LITE_MICRO_EXPECT_EQ(4, input->bytes); + TF_LITE_MICRO_EXPECT_EQ(static_cast(4), input->bytes); TF_LITE_MICRO_EXPECT_NE(nullptr, input->data.i32); input->data.i32[0] = 21; @@ -101,7 +101,7 @@ TF_LITE_MICRO_TEST(TestInterpreter) { TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, output->type); TF_LITE_MICRO_EXPECT_EQ(1, output->dims->size); TF_LITE_MICRO_EXPECT_EQ(1, output->dims->data[0]); - TF_LITE_MICRO_EXPECT_EQ(4, output->bytes); + TF_LITE_MICRO_EXPECT_EQ(static_cast(4), output->bytes); TF_LITE_MICRO_EXPECT_NE(nullptr, output->data.i32); TF_LITE_MICRO_EXPECT_EQ(42, output->data.i32[0]); @@ -110,7 +110,7 @@ TF_LITE_MICRO_TEST(TestInterpreter) { TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt32, output->type); TF_LITE_MICRO_EXPECT_EQ(1, output->dims->size); TF_LITE_MICRO_EXPECT_EQ(1, output->dims->data[0]); - TF_LITE_MICRO_EXPECT_EQ(4, output->bytes); + TF_LITE_MICRO_EXPECT_EQ(static_cast(4), output->bytes); TF_LITE_MICRO_EXPECT_NE(nullptr, output->data.i32); TF_LITE_MICRO_EXPECT_EQ(42, output->data.i32[0]); @@ -133,8 +133,8 @@ TF_LITE_MICRO_TEST(TestKernelMemoryPlanning) { allocator_buffer_size, micro_test::reporter); TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk); - TF_LITE_MICRO_EXPECT_EQ(1, interpreter.inputs_size()); - TF_LITE_MICRO_EXPECT_EQ(2, interpreter.outputs_size()); + TF_LITE_MICRO_EXPECT_EQ(static_cast(1), interpreter.inputs_size()); + TF_LITE_MICRO_EXPECT_EQ(static_cast(2), interpreter.outputs_size()); TfLiteTensor* input = interpreter.input(0); TF_LITE_MICRO_EXPECT_EQ(1, input->dims->size); @@ -177,8 +177,8 @@ TF_LITE_MICRO_TEST(TestVariableTensorReset) { micro_test::reporter); TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk); TF_LITE_MICRO_EXPECT_LE(interpreter.arena_used_bytes(), 2096 + 100); - TF_LITE_MICRO_EXPECT_EQ(1, interpreter.inputs_size()); - TF_LITE_MICRO_EXPECT_EQ(1, interpreter.outputs_size()); + TF_LITE_MICRO_EXPECT_EQ(static_cast(1), interpreter.inputs_size()); + TF_LITE_MICRO_EXPECT_EQ(static_cast(1), interpreter.outputs_size()); // Assign hard-code values: for (size_t i = 0; i < interpreter.tensors_size(); ++i) { @@ -306,25 +306,28 @@ TF_LITE_MICRO_TEST(TestIncompleteInitializationAllocationsWithSmallArena) { // Ensure allocations are zero (ignore tail since some internal structs are // initialized with this space): TF_LITE_MICRO_EXPECT_EQ( - 0, allocator->GetSimpleMemoryAllocator()->GetHeadUsedBytes()); + static_cast(0), + allocator->GetSimpleMemoryAllocator()->GetHeadUsedBytes()); TF_LITE_MICRO_EXPECT_EQ( - 0, allocator - ->GetRecordedAllocation( - tflite::RecordedAllocationType::kTfLiteTensorArray) - .used_bytes); + static_cast(0), + allocator + ->GetRecordedAllocation( + tflite::RecordedAllocationType::kTfLiteTensorArray) + .used_bytes); TF_LITE_MICRO_EXPECT_EQ( - 0, allocator - ->GetRecordedAllocation(tflite::RecordedAllocationType:: - kTfLiteTensorArrayQuantizationData) - .used_bytes); + static_cast(0), + allocator + ->GetRecordedAllocation(tflite::RecordedAllocationType:: + kTfLiteTensorArrayQuantizationData) + .used_bytes); TF_LITE_MICRO_EXPECT_EQ( - 0, + static_cast(0), allocator ->GetRecordedAllocation( tflite::RecordedAllocationType::kTfLiteTensorVariableBufferData) .used_bytes); TF_LITE_MICRO_EXPECT_EQ( - 0, + static_cast(0), allocator->GetRecordedAllocation(tflite::RecordedAllocationType::kOpData) .used_bytes); } @@ -349,20 +352,22 @@ TF_LITE_MICRO_TEST(TestInterpreterDoesNotAllocateUntilInvoke) { // Ensure allocations are zero (ignore tail since some internal structs are // initialized with this space): TF_LITE_MICRO_EXPECT_EQ( - 0, allocator->GetSimpleMemoryAllocator()->GetHeadUsedBytes()); + static_cast(0), + allocator->GetSimpleMemoryAllocator()->GetHeadUsedBytes()); TF_LITE_MICRO_EXPECT_EQ( - 0, allocator - ->GetRecordedAllocation( - tflite::RecordedAllocationType::kTfLiteTensorArray) - .used_bytes); + static_cast(0), + allocator + ->GetRecordedAllocation( + tflite::RecordedAllocationType::kTfLiteTensorArray) + .used_bytes); TF_LITE_MICRO_EXPECT_EQ( - 0, + static_cast(0), allocator ->GetRecordedAllocation( tflite::RecordedAllocationType::kTfLiteTensorVariableBufferData) .used_bytes); TF_LITE_MICRO_EXPECT_EQ( - 0, + static_cast(0), allocator->GetRecordedAllocation(tflite::RecordedAllocationType::kOpData) .used_bytes); @@ -372,28 +377,29 @@ TF_LITE_MICRO_TEST(TestInterpreterDoesNotAllocateUntilInvoke) { // Allocation sizes vary based on platform - check that allocations are now // non-zero: TF_LITE_MICRO_EXPECT_GT( - allocator->GetSimpleMemoryAllocator()->GetHeadUsedBytes(), 0); + allocator->GetSimpleMemoryAllocator()->GetHeadUsedBytes(), + static_cast(0)); TF_LITE_MICRO_EXPECT_GT( allocator ->GetRecordedAllocation( tflite::RecordedAllocationType::kTfLiteTensorArray) .used_bytes, - 0); + static_cast(0)); TF_LITE_MICRO_EXPECT_GT( allocator ->GetRecordedAllocation( tflite::RecordedAllocationType::kTfLiteTensorVariableBufferData) .used_bytes, - 0); + static_cast(0)); // TODO(b/160160549): This check is mostly meaningless right now because the - // operator creation in our mock models is inconsistent. Revisit what this + // operator creation in our mock models is inconsistent. Revisit what this // check should be once the mock models are properly created. TF_LITE_MICRO_EXPECT_EQ( allocator->GetRecordedAllocation(tflite::RecordedAllocationType::kOpData) .used_bytes, - 0); + static_cast(0)); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/micro_mutable_op_resolver_test.cc b/tensorflow/lite/micro/micro_mutable_op_resolver_test.cc index fe9c8de5959..efe41ff4e2f 100644 --- a/tensorflow/lite/micro/micro_mutable_op_resolver_test.cc +++ b/tensorflow/lite/micro/micro_mutable_op_resolver_test.cc @@ -65,8 +65,11 @@ TF_LITE_MICRO_TEST(TestOperations) { using tflite::MicroMutableOpResolver; using tflite::OpResolver; - static TfLiteRegistration r = {tflite::MockInit, tflite::MockFree, - tflite::MockPrepare, tflite::MockInvoke}; + static TfLiteRegistration r = {}; + r.init = tflite::MockInit; + r.free = tflite::MockFree; + r.prepare = tflite::MockPrepare; + r.invoke = tflite::MockInvoke; MicroMutableOpResolver<1> micro_op_resolver; TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, @@ -78,20 +81,21 @@ TF_LITE_MICRO_TEST(TestOperations) { tflite::MicroOpResolver* resolver = µ_op_resolver; - TF_LITE_MICRO_EXPECT_EQ(1, micro_op_resolver.GetRegistrationLength()); + TF_LITE_MICRO_EXPECT_EQ(static_cast(1), + micro_op_resolver.GetRegistrationLength()); const TfLiteRegistration* registration = resolver->FindOp(BuiltinOperator_RELU); - TF_LITE_MICRO_EXPECT_EQ(nullptr, registration); + TF_LITE_MICRO_EXPECT(nullptr == registration); registration = resolver->FindOp("mock_custom"); - TF_LITE_MICRO_EXPECT_NE(nullptr, registration); - TF_LITE_MICRO_EXPECT_EQ(nullptr, registration->init(nullptr, nullptr, 0)); + TF_LITE_MICRO_EXPECT(nullptr != registration); + TF_LITE_MICRO_EXPECT(nullptr == registration->init(nullptr, nullptr, 0)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(nullptr, nullptr)); TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(nullptr, nullptr)); registration = resolver->FindOp("nonexistent_custom"); - TF_LITE_MICRO_EXPECT_EQ(nullptr, registration); + TF_LITE_MICRO_EXPECT(nullptr == registration); } TF_LITE_MICRO_TEST(TestErrorReporting) { @@ -99,8 +103,11 @@ TF_LITE_MICRO_TEST(TestErrorReporting) { using tflite::BuiltinOperator_RELU; using tflite::MicroMutableOpResolver; - static TfLiteRegistration r = {tflite::MockInit, tflite::MockFree, - tflite::MockPrepare, tflite::MockInvoke}; + static TfLiteRegistration r = {}; + r.init = tflite::MockInit; + r.free = tflite::MockFree; + r.prepare = tflite::MockPrepare; + r.invoke = tflite::MockInvoke; tflite::MockErrorReporter mock_reporter; MicroMutableOpResolver<1> micro_op_resolver(&mock_reporter); diff --git a/tensorflow/lite/micro/micro_string_test.cc b/tensorflow/lite/micro/micro_string_test.cc index fb8183bb492..400f908f97f 100644 --- a/tensorflow/lite/micro/micro_string_test.cc +++ b/tensorflow/lite/micro/micro_string_test.cc @@ -24,7 +24,7 @@ TF_LITE_MICRO_TEST(FormatPositiveIntShouldMatchExpected) { char buffer[kBufferLen]; const char golden[] = "Int: 55"; int bytes_written = MicroSnprintf(buffer, kBufferLen, "Int: %d", 55); - TF_LITE_MICRO_EXPECT_EQ(sizeof(golden), bytes_written); + TF_LITE_MICRO_EXPECT_EQ(static_cast(sizeof(golden)), bytes_written); TF_LITE_MICRO_EXPECT_STRING_EQ(golden, buffer); } @@ -33,7 +33,7 @@ TF_LITE_MICRO_TEST(FormatNegativeIntShouldMatchExpected) { char buffer[kBufferLen]; const char golden[] = "Int: -55"; int bytes_written = MicroSnprintf(buffer, kBufferLen, "Int: %d", -55); - TF_LITE_MICRO_EXPECT_EQ(sizeof(golden), bytes_written); + TF_LITE_MICRO_EXPECT_EQ(static_cast(sizeof(golden)), bytes_written); TF_LITE_MICRO_EXPECT_STRING_EQ(golden, buffer); } @@ -42,7 +42,7 @@ TF_LITE_MICRO_TEST(FormatUnsignedIntShouldMatchExpected) { char buffer[kBufferLen]; const char golden[] = "UInt: 12345"; int bytes_written = MicroSnprintf(buffer, kBufferLen, "UInt: %u", 12345); - TF_LITE_MICRO_EXPECT_EQ(sizeof(golden), bytes_written); + TF_LITE_MICRO_EXPECT_EQ(static_cast(sizeof(golden)), bytes_written); TF_LITE_MICRO_EXPECT_STRING_EQ(golden, buffer); } @@ -51,7 +51,7 @@ TF_LITE_MICRO_TEST(FormatHexShouldMatchExpected) { char buffer[kBufferLen]; const char golden[] = "Hex: 0x12345"; int bytes_written = MicroSnprintf(buffer, kBufferLen, "Hex: %x", 0x12345); - TF_LITE_MICRO_EXPECT_EQ(sizeof(golden), bytes_written); + TF_LITE_MICRO_EXPECT_EQ(static_cast(sizeof(golden)), bytes_written); TF_LITE_MICRO_EXPECT_STRING_EQ(golden, buffer); } @@ -59,8 +59,8 @@ TF_LITE_MICRO_TEST(FormatFloatShouldMatchExpected) { const int kBufferLen = 32; char buffer[kBufferLen]; const char golden[] = "Float: 1.0*2^4"; - int bytes_written = MicroSnprintf(buffer, kBufferLen, "Float: %f", 16.f); - TF_LITE_MICRO_EXPECT_EQ(sizeof(golden), bytes_written); + int bytes_written = MicroSnprintf(buffer, kBufferLen, "Float: %f", 16.); + TF_LITE_MICRO_EXPECT_EQ(static_cast(sizeof(golden)), bytes_written); TF_LITE_MICRO_EXPECT_STRING_EQ(golden, buffer); } @@ -70,7 +70,7 @@ TF_LITE_MICRO_TEST(BadlyFormattedStringShouldProduceReasonableString) { const char golden[] = "Test Badly % formated % string"; int bytes_written = MicroSnprintf(buffer, kBufferLen, "Test Badly %% formated %% string%"); - TF_LITE_MICRO_EXPECT_EQ(sizeof(golden), bytes_written); + TF_LITE_MICRO_EXPECT_EQ(static_cast(sizeof(golden)), bytes_written); TF_LITE_MICRO_EXPECT_STRING_EQ(golden, buffer); } @@ -79,7 +79,7 @@ TF_LITE_MICRO_TEST(IntFormatOverrunShouldTruncate) { char buffer[kBufferLen]; const char golden[] = "Int: "; int bytes_written = MicroSnprintf(buffer, kBufferLen, "Int: %d", 12345); - TF_LITE_MICRO_EXPECT_EQ(sizeof(golden), bytes_written); + TF_LITE_MICRO_EXPECT_EQ(static_cast(sizeof(golden)), bytes_written); TF_LITE_MICRO_EXPECT_STRING_EQ(golden, buffer); } @@ -88,7 +88,7 @@ TF_LITE_MICRO_TEST(UnsignedIntFormatOverrunShouldTruncate) { char buffer[kBufferLen]; const char golden[] = "UInt: "; int bytes_written = MicroSnprintf(buffer, kBufferLen, "UInt: %u", 12345); - TF_LITE_MICRO_EXPECT_EQ(sizeof(golden), bytes_written); + TF_LITE_MICRO_EXPECT_EQ(static_cast(sizeof(golden)), bytes_written); TF_LITE_MICRO_EXPECT_STRING_EQ(golden, buffer); } @@ -97,7 +97,7 @@ TF_LITE_MICRO_TEST(HexFormatOverrunShouldTruncate) { char buffer[kBufferLen]; const char golden[] = "Hex: "; int bytes_written = MicroSnprintf(buffer, kBufferLen, "Hex: %x", 0x12345); - TF_LITE_MICRO_EXPECT_EQ(sizeof(golden), bytes_written); + TF_LITE_MICRO_EXPECT_EQ(static_cast(sizeof(golden)), bytes_written); TF_LITE_MICRO_EXPECT_STRING_EQ(golden, buffer); } @@ -105,8 +105,8 @@ TF_LITE_MICRO_TEST(FloatFormatOverrunShouldTruncate) { const int kBufferLen = 12; char buffer[kBufferLen]; const char golden[] = "Float: "; - int bytes_written = MicroSnprintf(buffer, kBufferLen, "Float: %x", 12345.f); - TF_LITE_MICRO_EXPECT_EQ(sizeof(golden), bytes_written); + int bytes_written = MicroSnprintf(buffer, kBufferLen, "Float: %x", 12345.); + TF_LITE_MICRO_EXPECT_EQ(static_cast(sizeof(golden)), bytes_written); TF_LITE_MICRO_EXPECT_STRING_EQ(golden, buffer); } @@ -115,9 +115,8 @@ TF_LITE_MICRO_TEST(FloatFormatShouldPrintFractionCorrectly) { char buffer[kBufferLen]; const char golden[] = "Float: 1.0625*2^0"; // Add small offset to float value to account for float rounding error. - int bytes_written = - MicroSnprintf(buffer, kBufferLen, "Float: %f", 1.0625001f); - TF_LITE_MICRO_EXPECT_EQ(sizeof(golden), bytes_written); + int bytes_written = MicroSnprintf(buffer, kBufferLen, "Float: %f", 1.0625001); + TF_LITE_MICRO_EXPECT_EQ(static_cast(sizeof(golden)), bytes_written); TF_LITE_MICRO_EXPECT_STRING_EQ(golden, buffer); } @@ -127,7 +126,7 @@ TF_LITE_MICRO_TEST(StringFormatOverrunShouldTruncate) { const char golden[] = "String: h"; int bytes_written = MicroSnprintf(buffer, kBufferLen, "String: %s", "hello world"); - TF_LITE_MICRO_EXPECT_EQ(sizeof(golden), bytes_written); + TF_LITE_MICRO_EXPECT_EQ(static_cast(sizeof(golden)), bytes_written); TF_LITE_MICRO_EXPECT_STRING_EQ(golden, buffer); } @@ -136,7 +135,7 @@ TF_LITE_MICRO_TEST(StringFormatWithExactOutputSizeOverrunShouldTruncate) { char buffer[kBufferLen]; const char golden[] = "format st"; int bytes_written = MicroSnprintf(buffer, kBufferLen, "format str"); - TF_LITE_MICRO_EXPECT_EQ(sizeof(golden), bytes_written); + TF_LITE_MICRO_EXPECT_EQ(static_cast(sizeof(golden)), bytes_written); TF_LITE_MICRO_EXPECT_STRING_EQ(golden, buffer); } diff --git a/tensorflow/lite/micro/recording_simple_memory_allocator_test.cc b/tensorflow/lite/micro/recording_simple_memory_allocator_test.cc index 8fc4745a70e..16dbdb74437 100644 --- a/tensorflow/lite/micro/recording_simple_memory_allocator_test.cc +++ b/tensorflow/lite/micro/recording_simple_memory_allocator_test.cc @@ -30,15 +30,19 @@ TF_LITE_MICRO_TEST(TestRecordsTailAllocations) { uint8_t* result = allocator.AllocateFromTail(/*size=*/10, /*alignment=*/1); TF_LITE_MICRO_EXPECT_NE(result, nullptr); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetUsedBytes(), 10); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), 10); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetAllocatedCount(), 1); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetUsedBytes(), static_cast(10)); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), + static_cast(10)); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetAllocatedCount(), + static_cast(1)); result = allocator.AllocateFromTail(/*size=*/20, /*alignment=*/1); TF_LITE_MICRO_EXPECT_NE(result, nullptr); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetUsedBytes(), 30); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), 30); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetAllocatedCount(), 2); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetUsedBytes(), static_cast(30)); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), + static_cast(30)); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetAllocatedCount(), + static_cast(2)); } TF_LITE_MICRO_TEST(TestRecordsMisalignedTailAllocations) { @@ -50,10 +54,12 @@ TF_LITE_MICRO_TEST(TestRecordsMisalignedTailAllocations) { uint8_t* result = allocator.AllocateFromTail(/*size=*/10, /*alignment=*/12); TF_LITE_MICRO_EXPECT_NE(result, nullptr); // Validate used bytes in 8 byte range that can included alignment of 12: - TF_LITE_MICRO_EXPECT_GE(allocator.GetUsedBytes(), 10); - TF_LITE_MICRO_EXPECT_LE(allocator.GetUsedBytes(), 20); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), 10); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetAllocatedCount(), 1); + TF_LITE_MICRO_EXPECT_GE(allocator.GetUsedBytes(), static_cast(10)); + TF_LITE_MICRO_EXPECT_LE(allocator.GetUsedBytes(), static_cast(20)); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), + static_cast(10)); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetAllocatedCount(), + static_cast(1)); } TF_LITE_MICRO_TEST(TestDoesNotRecordFailedTailAllocations) { @@ -63,10 +69,12 @@ TF_LITE_MICRO_TEST(TestDoesNotRecordFailedTailAllocations) { arena_size); uint8_t* result = allocator.AllocateFromTail(/*size=*/2048, /*alignment=*/1); - TF_LITE_MICRO_EXPECT_EQ(result, nullptr); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetUsedBytes(), 0); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), 0); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetAllocatedCount(), 0); + TF_LITE_MICRO_EXPECT(result == nullptr); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetUsedBytes(), static_cast(0)); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), + static_cast(0)); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetAllocatedCount(), + static_cast(0)); } TF_LITE_MICRO_TEST(TestRecordsHeadAllocations) { @@ -77,15 +85,19 @@ TF_LITE_MICRO_TEST(TestRecordsHeadAllocations) { uint8_t* result = allocator.AllocateFromHead(/*size=*/5, /*alignment=*/1); TF_LITE_MICRO_EXPECT_NE(result, nullptr); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetUsedBytes(), 5); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), 5); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetAllocatedCount(), 1); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetUsedBytes(), static_cast(5)); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), + static_cast(5)); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetAllocatedCount(), + static_cast(1)); result = allocator.AllocateFromTail(/*size=*/15, /*alignment=*/1); TF_LITE_MICRO_EXPECT_NE(result, nullptr); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetUsedBytes(), 20); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), 20); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetAllocatedCount(), 2); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetUsedBytes(), static_cast(20)); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), + static_cast(20)); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetAllocatedCount(), + static_cast(2)); } TF_LITE_MICRO_TEST(TestRecordsMisalignedHeadAllocations) { @@ -97,10 +109,12 @@ TF_LITE_MICRO_TEST(TestRecordsMisalignedHeadAllocations) { uint8_t* result = allocator.AllocateFromHead(/*size=*/10, /*alignment=*/12); TF_LITE_MICRO_EXPECT_NE(result, nullptr); // Validate used bytes in 8 byte range that can included alignment of 12: - TF_LITE_MICRO_EXPECT_GE(allocator.GetUsedBytes(), 10); - TF_LITE_MICRO_EXPECT_LE(allocator.GetUsedBytes(), 20); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), 10); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetAllocatedCount(), 1); + TF_LITE_MICRO_EXPECT_GE(allocator.GetUsedBytes(), static_cast(10)); + TF_LITE_MICRO_EXPECT_LE(allocator.GetUsedBytes(), static_cast(20)); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), + static_cast(10)); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetAllocatedCount(), + static_cast(1)); } TF_LITE_MICRO_TEST(TestDoesNotRecordFailedTailAllocations) { @@ -110,10 +124,12 @@ TF_LITE_MICRO_TEST(TestDoesNotRecordFailedTailAllocations) { arena_size); uint8_t* result = allocator.AllocateFromHead(/*size=*/2048, /*alignment=*/1); - TF_LITE_MICRO_EXPECT_EQ(result, nullptr); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetUsedBytes(), 0); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), 0); - TF_LITE_MICRO_EXPECT_EQ(allocator.GetAllocatedCount(), 0); + TF_LITE_MICRO_EXPECT(result == nullptr); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetUsedBytes(), static_cast(0)); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetRequestedBytes(), + static_cast(0)); + TF_LITE_MICRO_EXPECT_EQ(allocator.GetAllocatedCount(), + static_cast(0)); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/simple_memory_allocator_test.cc b/tensorflow/lite/micro/simple_memory_allocator_test.cc index d9ee979d5b0..ef97089b00b 100644 --- a/tensorflow/lite/micro/simple_memory_allocator_test.cc +++ b/tensorflow/lite/micro/simple_memory_allocator_test.cc @@ -29,7 +29,7 @@ TF_LITE_MICRO_TEST(TestJustFits) { arena_size); uint8_t* result = allocator.AllocateFromTail(arena_size, 1); - TF_LITE_MICRO_EXPECT_NE(nullptr, result); + TF_LITE_MICRO_EXPECT(nullptr != result); } TF_LITE_MICRO_TEST(TestAligned) { @@ -39,11 +39,12 @@ TF_LITE_MICRO_TEST(TestAligned) { arena_size); uint8_t* result = allocator.AllocateFromTail(1, 1); - TF_LITE_MICRO_EXPECT_NE(nullptr, result); + TF_LITE_MICRO_EXPECT(nullptr != result); result = allocator.AllocateFromTail(16, 4); - TF_LITE_MICRO_EXPECT_NE(nullptr, result); - TF_LITE_MICRO_EXPECT_EQ(0, reinterpret_cast(result) & 3); + TF_LITE_MICRO_EXPECT(nullptr != result); + TF_LITE_MICRO_EXPECT_EQ(static_cast(0), + reinterpret_cast(result) & 3); } TF_LITE_MICRO_TEST(TestMultipleTooLarge) { @@ -53,10 +54,10 @@ TF_LITE_MICRO_TEST(TestMultipleTooLarge) { arena_size); uint8_t* result = allocator.AllocateFromTail(768, 1); - TF_LITE_MICRO_EXPECT_NE(nullptr, result); + TF_LITE_MICRO_EXPECT(nullptr != result); result = allocator.AllocateFromTail(768, 1); - TF_LITE_MICRO_EXPECT_EQ(nullptr, result); + TF_LITE_MICRO_EXPECT(nullptr == result); } TF_LITE_MICRO_TEST(TestTempAllocations) { @@ -66,10 +67,10 @@ TF_LITE_MICRO_TEST(TestTempAllocations) { arena_size); uint8_t* temp1 = allocator.AllocateTemp(100, 1); - TF_LITE_MICRO_EXPECT_NE(nullptr, temp1); + TF_LITE_MICRO_EXPECT(nullptr != temp1); uint8_t* temp2 = allocator.AllocateTemp(100, 1); - TF_LITE_MICRO_EXPECT_NE(nullptr, temp2); + TF_LITE_MICRO_EXPECT(nullptr != temp2); // Expect that the next micro allocation is 100 bytes away from each other. TF_LITE_MICRO_EXPECT_EQ(temp2 - temp1, 100); @@ -82,12 +83,12 @@ TF_LITE_MICRO_TEST(TestResetTempAllocations) { arena_size); uint8_t* temp1 = allocator.AllocateTemp(100, 1); - TF_LITE_MICRO_EXPECT_NE(nullptr, temp1); + TF_LITE_MICRO_EXPECT(nullptr != temp1); allocator.ResetTempAllocations(); uint8_t* temp2 = allocator.AllocateTemp(100, 1); - TF_LITE_MICRO_EXPECT_NE(nullptr, temp2); + TF_LITE_MICRO_EXPECT(nullptr != temp2); // Reset temp allocations should have the same start address: TF_LITE_MICRO_EXPECT_EQ(temp2 - temp1, 0); @@ -100,21 +101,21 @@ TF_LITE_MICRO_TEST(TestAllocateHeadWithoutResettingTemp) { arena_size); uint8_t* temp = allocator.AllocateTemp(100, 1); - TF_LITE_MICRO_EXPECT_NE(nullptr, temp); + TF_LITE_MICRO_EXPECT(nullptr != temp); // Allocation should be null since temp allocation was not followed by a call // to ResetTempAllocations(). uint8_t* head = allocator.AllocateFromHead(100, 1); - TF_LITE_MICRO_EXPECT_EQ(nullptr, head); + TF_LITE_MICRO_EXPECT(nullptr == head); allocator.ResetTempAllocations(); head = allocator.AllocateFromHead(100, 1); - TF_LITE_MICRO_EXPECT_NE(nullptr, head); + TF_LITE_MICRO_EXPECT(nullptr != head); // The most recent head allocation should be in the same location as the // original temp allocation pointer. - TF_LITE_MICRO_EXPECT_EQ(temp, head); + TF_LITE_MICRO_EXPECT(temp == head); } // TODO(b/161171251): Add more coverage to this test - specifically around -1 diff --git a/tensorflow/lite/micro/testing/micro_test.bzl b/tensorflow/lite/micro/testing/micro_test.bzl index 532a1a16ac6..5e1a56fdc48 100644 --- a/tensorflow/lite/micro/testing/micro_test.bzl +++ b/tensorflow/lite/micro/testing/micro_test.bzl @@ -1,5 +1,10 @@ """Rules for simple testing without dependencies by parsing output logs.""" +load( + "//tensorflow/lite/micro:build_def.bzl", + "micro_copts", +) + def tflite_micro_cc_test( name, size = "medium", @@ -7,7 +12,7 @@ def tflite_micro_cc_test( srcs = [], includes = [], defines = [], - copts = ["-Werror", "-Wno-unused-variable"], + copts = micro_copts(), nocopts = "", linkopts = [], deps = [], diff --git a/tensorflow/lite/micro/testing/micro_test.h b/tensorflow/lite/micro/testing/micro_test.h index 95796e64ff1..d74d8f4f1a6 100644 --- a/tensorflow/lite/micro/testing/micro_test.h +++ b/tensorflow/lite/micro/testing/micro_test.h @@ -110,13 +110,16 @@ extern tflite::ErrorReporter* reporter; } \ } while (false) +// TODO(b/139142772): this macro is used with types other than ints even though +// the printf specifier is %d. #define TF_LITE_MICRO_EXPECT_EQ(x, y) \ do { \ auto vx = x; \ auto vy = y; \ if ((vx) != (vy)) { \ micro_test::reporter->Report(#x " == " #y " failed at %s:%d (%d vs %d)", \ - __FILE__, __LINE__, (vx), (vy)); \ + __FILE__, __LINE__, static_cast(vx), \ + static_cast(vy)); \ micro_test::did_test_fail = true; \ } \ } while (false) @@ -147,17 +150,18 @@ extern tflite::ErrorReporter* reporter; } \ } while (false) -#define TF_LITE_MICRO_EXPECT_NEAR(x, y, epsilon) \ - do { \ - auto vx = (x); \ - auto vy = (y); \ - auto delta = ((vx) > (vy)) ? ((vx) - (vy)) : ((vy) - (vx)); \ - if (delta > epsilon) { \ - micro_test::reporter->Report( \ - #x " (%f) near " #y " (%f) failed at %s:%d", static_cast(vx), \ - static_cast(vy), __FILE__, __LINE__); \ - micro_test::did_test_fail = true; \ - } \ +#define TF_LITE_MICRO_EXPECT_NEAR(x, y, epsilon) \ + do { \ + auto vx = (x); \ + auto vy = (y); \ + auto delta = ((vx) > (vy)) ? ((vx) - (vy)) : ((vy) - (vx)); \ + if (delta > epsilon) { \ + micro_test::reporter->Report( \ + #x " (%f) near " #y " (%f) failed at %s:%d", \ + static_cast(vx), static_cast(vy), __FILE__, \ + __LINE__); \ + micro_test::did_test_fail = true; \ + } \ } while (false) #define TF_LITE_MICRO_EXPECT_GT(x, y) \ diff --git a/tensorflow/lite/micro/testing/test_utils.h b/tensorflow/lite/micro/testing/test_utils.h index 0165cbb707a..053c4417f52 100644 --- a/tensorflow/lite/micro/testing/test_utils.h +++ b/tensorflow/lite/micro/testing/test_utils.h @@ -53,8 +53,9 @@ inline float MinFromZeroPointScale(const int zero_point, const float scale) { // Derives the quantization scaling factor from a min and max range. template inline float ScaleFromMinMax(const float min, const float max) { - return (max - min) / ((std::numeric_limits::max() * 1.0) - - std::numeric_limits::min()); + return (max - min) / + static_cast((std::numeric_limits::max() * 1.0) - + std::numeric_limits::min()); } // Derives the quantization zero point from a min and max range. diff --git a/tensorflow/lite/micro/testing/util_test.cc b/tensorflow/lite/micro/testing/util_test.cc index f4eb28e121a..261e9f29a25 100644 --- a/tensorflow/lite/micro/testing/util_test.cc +++ b/tensorflow/lite/micro/testing/util_test.cc @@ -21,10 +21,10 @@ TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(ArgumentsExecutedOnlyOnce) { float count = 0.; // Make sure either argument is executed once after macro expansion. - TF_LITE_MICRO_EXPECT_NEAR(0, count++, 0.1); - TF_LITE_MICRO_EXPECT_NEAR(1, count++, 0.1); - TF_LITE_MICRO_EXPECT_NEAR(count++, 2, 0.1); - TF_LITE_MICRO_EXPECT_NEAR(count++, 3, 0.1); + TF_LITE_MICRO_EXPECT_NEAR(0, count++, 0.1f); + TF_LITE_MICRO_EXPECT_NEAR(1, count++, 0.1f); + TF_LITE_MICRO_EXPECT_NEAR(count++, 2, 0.1f); + TF_LITE_MICRO_EXPECT_NEAR(count++, 3, 0.1f); } TF_LITE_MICRO_TESTS_END diff --git a/tensorflow/lite/micro/testing_helpers_test.cc b/tensorflow/lite/micro/testing_helpers_test.cc index 710ca2a4a9e..885bd873b53 100644 --- a/tensorflow/lite/micro/testing_helpers_test.cc +++ b/tensorflow/lite/micro/testing_helpers_test.cc @@ -33,7 +33,7 @@ TF_LITE_MICRO_TEST(CreateQuantizedBiasTensor) { pre_quantized, quantized, dims, input_scale, weight_scale); TF_LITE_MICRO_EXPECT_EQ(result.bytes, tensor_size * sizeof(int32_t)); - TF_LITE_MICRO_EXPECT_EQ(result.dims, dims); + TF_LITE_MICRO_EXPECT(result.dims == dims); TF_LITE_MICRO_EXPECT_EQ(result.params.scale, input_scale * weight_scale); for (int i = 0; i < tensor_size; i++) { TF_LITE_MICRO_EXPECT_EQ(expected_quantized_values[i], result.data.i32[i]); @@ -66,7 +66,7 @@ TF_LITE_MICRO_TEST(CreatePerChannelQuantizedBiasTensor) { } TF_LITE_MICRO_EXPECT_EQ(result.bytes, tensor_size * sizeof(int32_t)); - TF_LITE_MICRO_EXPECT_EQ(result.dims, dims); + TF_LITE_MICRO_EXPECT(result.dims == dims); for (int i = 0; i < tensor_size; i++) { TF_LITE_MICRO_EXPECT_EQ(expected_quantized_values[i], result.data.i32[i]); } @@ -92,7 +92,7 @@ TF_LITE_MICRO_TEST(CreateSymmetricPerChannelQuantizedTensor) { pre_quantized, quantized, dims, scales, zero_points, &quant, 0); TF_LITE_MICRO_EXPECT_EQ(result.bytes, tensor_size * sizeof(int8_t)); - TF_LITE_MICRO_EXPECT_EQ(result.dims, dims); + TF_LITE_MICRO_EXPECT(result.dims == dims); TfLiteFloatArray* result_scales = static_cast(result.quantization.params)->scale; for (int i = 0; i < channels; i++) { From bd3b7979d28153723aa118dcb5bcfc84694319e1 Mon Sep 17 00:00:00 2001 From: Peng Wang Date: Thu, 16 Jul 2020 11:33:54 -0700 Subject: [PATCH 0614/2522] Adds `string_` and `unicode_` to the do-not-descend map of `tf.experimental.numpy`. PiperOrigin-RevId: 321609879 Change-Id: I175fa2cec731de275a8810b94bb19d93fc3edacc --- .../tools/api/tests/api_compatibility_test.py | 5 +- ...ensorflow.experimental.numpy.string_.pbtxt | 339 ----------------- ...nsorflow.experimental.numpy.unicode_.pbtxt | 354 ------------------ 3 files changed, 3 insertions(+), 695 deletions(-) delete mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.string_.pbtxt delete mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.unicode_.pbtxt diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py index aeae307be35..f6d1ba4d8c2 100644 --- a/tensorflow/tools/api/tests/api_compatibility_test.py +++ b/tensorflow/tools/api/tests/api_compatibility_test.py @@ -362,9 +362,10 @@ class ApiCompatibilityTest(test.TestCase): public_api_visitor.private_map['tf'].append('enable_v2_behavior') public_api_visitor.do_not_descend_map['tf.GPUOptions'] = ['Experimental'] - # Do not descend into `iinfo` because np.iinfo's signature is different + # Do not descend into these classes because their signatures are different # between internal and OSS. - public_api_visitor.do_not_descend_map['tf.experimental.numpy'] = ['iinfo'] + public_api_visitor.do_not_descend_map['tf.experimental.numpy'] = [ + 'iinfo', 'string_', 'unicode_'] if FLAGS.only_test_core_api: public_api_visitor.do_not_descend_map['tf'].extend(_NON_CORE_PACKAGES) if additional_private_map: diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.string_.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.string_.pbtxt deleted file mode 100644 index 3297a0bd2a0..00000000000 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.string_.pbtxt +++ /dev/null @@ -1,339 +0,0 @@ -path: "tensorflow.experimental.numpy.string_" -tf_class { - is_instance: "" - member { - name: "T" - mtype: "" - } - member { - name: "base" - mtype: "" - } - member { - name: "data" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "flags" - mtype: "" - } - member { - name: "flat" - mtype: "" - } - member { - name: "imag" - mtype: "" - } - member { - name: "itemsize" - mtype: "" - } - member { - name: "nbytes" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "real" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member { - name: "strides" - mtype: "" - } - member_method { - name: "__init__" - } - member_method { - name: "all" - } - member_method { - name: "any" - } - member_method { - name: "argmax" - } - member_method { - name: "argmin" - } - member_method { - name: "argsort" - } - member_method { - name: "astype" - } - member_method { - name: "byteswap" - } - member_method { - name: "capitalize" - } - member_method { - name: "center" - } - member_method { - name: "choose" - } - member_method { - name: "clip" - } - member_method { - name: "compress" - } - member_method { - name: "conj" - } - member_method { - name: "conjugate" - } - member_method { - name: "copy" - } - member_method { - name: "count" - } - member_method { - name: "cumprod" - } - member_method { - name: "cumsum" - } - member_method { - name: "decode" - } - member_method { - name: "diagonal" - } - member_method { - name: "dump" - } - member_method { - name: "dumps" - } - member_method { - name: "endswith" - } - member_method { - name: "expandtabs" - } - member_method { - name: "fill" - } - member_method { - name: "find" - } - member_method { - name: "flatten" - } - member_method { - name: "fromhex" - } - member_method { - name: "getfield" - } - member_method { - name: "hex" - } - member_method { - name: "index" - } - member_method { - name: "isalnum" - } - member_method { - name: "isalpha" - } - member_method { - name: "isdigit" - } - member_method { - name: "islower" - } - member_method { - name: "isspace" - } - member_method { - name: "istitle" - } - member_method { - name: "isupper" - } - member_method { - name: "item" - } - member_method { - name: "itemset" - } - member_method { - name: "join" - } - member_method { - name: "ljust" - } - member_method { - name: "lower" - } - member_method { - name: "lstrip" - } - member_method { - name: "maketrans" - } - member_method { - name: "max" - } - member_method { - name: "mean" - } - member_method { - name: "min" - } - member_method { - name: "newbyteorder" - } - member_method { - name: "nonzero" - } - member_method { - name: "partition" - } - member_method { - name: "prod" - } - member_method { - name: "ptp" - } - member_method { - name: "put" - } - member_method { - name: "ravel" - } - member_method { - name: "repeat" - } - member_method { - name: "replace" - } - member_method { - name: "reshape" - } - member_method { - name: "resize" - } - member_method { - name: "rfind" - } - member_method { - name: "rindex" - } - member_method { - name: "rjust" - } - member_method { - name: "round" - } - member_method { - name: "rpartition" - } - member_method { - name: "rsplit" - } - member_method { - name: "rstrip" - } - member_method { - name: "searchsorted" - } - member_method { - name: "setfield" - } - member_method { - name: "setflags" - } - member_method { - name: "sort" - } - member_method { - name: "split" - } - member_method { - name: "splitlines" - } - member_method { - name: "squeeze" - } - member_method { - name: "startswith" - } - member_method { - name: "std" - } - member_method { - name: "strip" - } - member_method { - name: "sum" - } - member_method { - name: "swapaxes" - } - member_method { - name: "swapcase" - } - member_method { - name: "take" - } - member_method { - name: "title" - } - member_method { - name: "tobytes" - } - member_method { - name: "tofile" - } - member_method { - name: "tolist" - } - member_method { - name: "tostring" - } - member_method { - name: "trace" - } - member_method { - name: "translate" - } - member_method { - name: "transpose" - } - member_method { - name: "upper" - } - member_method { - name: "var" - } - member_method { - name: "view" - } - member_method { - name: "zfill" - } -} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.unicode_.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.unicode_.pbtxt deleted file mode 100644 index 7cfc7fb56cd..00000000000 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.unicode_.pbtxt +++ /dev/null @@ -1,354 +0,0 @@ -path: "tensorflow.experimental.numpy.unicode_" -tf_class { - is_instance: "" - member { - name: "T" - mtype: "" - } - member { - name: "base" - mtype: "" - } - member { - name: "data" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "flags" - mtype: "" - } - member { - name: "flat" - mtype: "" - } - member { - name: "imag" - mtype: "" - } - member { - name: "itemsize" - mtype: "" - } - member { - name: "nbytes" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "real" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member { - name: "strides" - mtype: "" - } - member_method { - name: "__init__" - } - member_method { - name: "all" - } - member_method { - name: "any" - } - member_method { - name: "argmax" - } - member_method { - name: "argmin" - } - member_method { - name: "argsort" - } - member_method { - name: "astype" - } - member_method { - name: "byteswap" - } - member_method { - name: "capitalize" - } - member_method { - name: "casefold" - } - member_method { - name: "center" - } - member_method { - name: "choose" - } - member_method { - name: "clip" - } - member_method { - name: "compress" - } - member_method { - name: "conj" - } - member_method { - name: "conjugate" - } - member_method { - name: "copy" - } - member_method { - name: "count" - } - member_method { - name: "cumprod" - } - member_method { - name: "cumsum" - } - member_method { - name: "diagonal" - } - member_method { - name: "dump" - } - member_method { - name: "dumps" - } - member_method { - name: "encode" - } - member_method { - name: "endswith" - } - member_method { - name: "expandtabs" - } - member_method { - name: "fill" - } - member_method { - name: "find" - } - member_method { - name: "flatten" - } - member_method { - name: "format" - } - member_method { - name: "format_map" - } - member_method { - name: "getfield" - } - member_method { - name: "index" - } - member_method { - name: "isalnum" - } - member_method { - name: "isalpha" - } - member_method { - name: "isdecimal" - } - member_method { - name: "isdigit" - } - member_method { - name: "isidentifier" - } - member_method { - name: "islower" - } - member_method { - name: "isnumeric" - } - member_method { - name: "isprintable" - } - member_method { - name: "isspace" - } - member_method { - name: "istitle" - } - member_method { - name: "isupper" - } - member_method { - name: "item" - } - member_method { - name: "itemset" - } - member_method { - name: "join" - } - member_method { - name: "ljust" - } - member_method { - name: "lower" - } - member_method { - name: "lstrip" - } - member_method { - name: "maketrans" - } - member_method { - name: "max" - } - member_method { - name: "mean" - } - member_method { - name: "min" - } - member_method { - name: "newbyteorder" - } - member_method { - name: "nonzero" - } - member_method { - name: "partition" - } - member_method { - name: "prod" - } - member_method { - name: "ptp" - } - member_method { - name: "put" - } - member_method { - name: "ravel" - } - member_method { - name: "repeat" - } - member_method { - name: "replace" - } - member_method { - name: "reshape" - } - member_method { - name: "resize" - } - member_method { - name: "rfind" - } - member_method { - name: "rindex" - } - member_method { - name: "rjust" - } - member_method { - name: "round" - } - member_method { - name: "rpartition" - } - member_method { - name: "rsplit" - } - member_method { - name: "rstrip" - } - member_method { - name: "searchsorted" - } - member_method { - name: "setfield" - } - member_method { - name: "setflags" - } - member_method { - name: "sort" - } - member_method { - name: "split" - } - member_method { - name: "splitlines" - } - member_method { - name: "squeeze" - } - member_method { - name: "startswith" - } - member_method { - name: "std" - } - member_method { - name: "strip" - } - member_method { - name: "sum" - } - member_method { - name: "swapaxes" - } - member_method { - name: "swapcase" - } - member_method { - name: "take" - } - member_method { - name: "title" - } - member_method { - name: "tobytes" - } - member_method { - name: "tofile" - } - member_method { - name: "tolist" - } - member_method { - name: "tostring" - } - member_method { - name: "trace" - } - member_method { - name: "translate" - } - member_method { - name: "transpose" - } - member_method { - name: "upper" - } - member_method { - name: "var" - } - member_method { - name: "view" - } - member_method { - name: "zfill" - } -} From 5a244072f2b33d2347e803146c244c179c1ddb75 Mon Sep 17 00:00:00 2001 From: Jakob Buchgraber Date: Thu, 16 Jul 2020 11:36:38 -0700 Subject: [PATCH 0615/2522] Breaks the build. Rollback. PiperOrigin-RevId: 321610483 Change-Id: I0bba8a5d626275be22029da4abb6cb3ac18f03f5 --- tensorflow/opensource_only.files | 1 + third_party/gpus/compress_find_cuda_config.py | 37 ++++++++++++++++ third_party/gpus/cuda_configure.bzl | 44 +++++++++++++------ .../gpus/find_cuda_config.py.gz.base64 | 1 + third_party/nccl/nccl_configure.bzl | 18 ++++---- third_party/tensorrt/tensorrt_configure.bzl | 7 +-- 6 files changed, 79 insertions(+), 29 deletions(-) create mode 100644 third_party/gpus/compress_find_cuda_config.py create mode 100644 third_party/gpus/find_cuda_config.py.gz.base64 diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files index 296722936a8..a0ce4305b16 100644 --- a/tensorflow/opensource_only.files +++ b/tensorflow/opensource_only.files @@ -101,6 +101,7 @@ tensorflow/third_party/gpus/cuda/cuda_config.h.tpl tensorflow/third_party/gpus/cuda/cuda_config.py.tpl tensorflow/third_party/gpus/cuda_configure.bzl tensorflow/third_party/gpus/find_cuda_config.py +tensorflow/third_party/gpus/find_cuda_config.py.gz.base64 tensorflow/third_party/gpus/rocm/BUILD tensorflow/third_party/gpus/rocm/BUILD.tpl tensorflow/third_party/gpus/rocm/build_defs.bzl.tpl diff --git a/third_party/gpus/compress_find_cuda_config.py b/third_party/gpus/compress_find_cuda_config.py new file mode 100644 index 00000000000..606bbf2cdd5 --- /dev/null +++ b/third_party/gpus/compress_find_cuda_config.py @@ -0,0 +1,37 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Compresses the contents of 'find_cuda.py'. + +The compressed file is what is actually being used. It works around remote +config not being able to upload files yet. +""" +import base64 +import zlib + + +def main(): + with open('find_cuda.py', 'rb') as f: + data = f.read() + + compressed = zlib.compress(data) + b64encoded = base64.b64encode(compressed) + + with open('find_cuda.py.gz.base64', 'wb') as f: + f.write(b64encoded) + + +if __name__ == '__main__': + main() + diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index c09a22a73c0..70bb91159de 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -605,19 +605,42 @@ def _cudart_static_linkopt(cpu_value): """Returns additional platform-specific linkopts for cudart.""" return "" if cpu_value == "Darwin" else "\"-lrt\"," +def _exec_find_cuda_config(repository_ctx, script_path, cuda_libraries): + python_bin = get_python_bin(repository_ctx) + + # If used with remote execution then repository_ctx.execute() can't + # access files from the source tree. A trick is to read the contents + # of the file in Starlark and embed them as part of the command. In + # this case the trick is not sufficient as the find_cuda_config.py + # script has more than 8192 characters. 8192 is the command length + # limit of cmd.exe on Windows. Thus we additionally need to compress + # the contents locally and decompress them as part of the execute(). + compressed_contents = repository_ctx.read(script_path) + decompress_and_execute_cmd = ( + "from zlib import decompress;" + + "from base64 import b64decode;" + + "from os import system;" + + "script = decompress(b64decode('%s'));" % compressed_contents + + "f = open('script.py', 'wb');" + + "f.write(script);" + + "f.close();" + + "system('\"%s\" script.py %s');" % (python_bin, " ".join(cuda_libraries)) + ) + + return execute(repository_ctx, [python_bin, "-c", decompress_and_execute_cmd]) + # TODO(csigg): Only call once instead of from here, tensorrt_configure.bzl, # and nccl_configure.bzl. -def find_cuda_config(repository_ctx, cuda_libraries): +def find_cuda_config(repository_ctx, script_path, cuda_libraries): """Returns CUDA config dictionary from running find_cuda_config.py""" - python_bin = get_python_bin(repository_ctx) - exec_result = execute(repository_ctx, [python_bin, repository_ctx.attr._find_cuda_config] + cuda_libraries) + exec_result = _exec_find_cuda_config(repository_ctx, script_path, cuda_libraries) if exec_result.return_code: auto_configure_fail("Failed to run find_cuda_config.py: %s" % err_out(exec_result)) # Parse the dict from stdout. return dict([tuple(x.split(": ")) for x in exec_result.stdout.splitlines()]) -def _get_cuda_config(repository_ctx): +def _get_cuda_config(repository_ctx, find_cuda_config_script): """Detects and returns information about the CUDA installation on the system. Args: @@ -632,7 +655,7 @@ def _get_cuda_config(repository_ctx): compute_capabilities: A list of the system's CUDA compute capabilities. cpu_value: The name of the host operating system. """ - config = find_cuda_config(repository_ctx, ["cuda", "cudnn"]) + config = find_cuda_config(repository_ctx, find_cuda_config_script, ["cuda", "cudnn"]) cpu_value = get_cpu_value(repository_ctx) toolkit_path = config["cuda_toolkit_path"] @@ -928,8 +951,9 @@ def _create_local_cuda_repository(repository_ctx): "cuda:cuda_config.py", ]} tpl_paths["cuda:BUILD"] = _tpl_path(repository_ctx, "cuda:BUILD.windows" if is_windows(repository_ctx) else "cuda:BUILD") + find_cuda_config_script = repository_ctx.path(Label("@org_tensorflow//third_party/gpus:find_cuda_config.py.gz.base64")) - cuda_config = _get_cuda_config(repository_ctx) + cuda_config = _get_cuda_config(repository_ctx, find_cuda_config_script) cuda_include_path = cuda_config.config["cuda_include_dir"] cublas_include_path = cuda_config.config["cublas_include_dir"] @@ -1370,20 +1394,12 @@ remote_cuda_configure = repository_rule( remotable = True, attrs = { "environ": attr.string_dict(), - "_find_cuda_config": attr.label( - default = Label("@org_tensorflow//third_party/gpus:find_cuda_config.py"), - ), }, ) cuda_configure = repository_rule( implementation = _cuda_autoconf_impl, environ = _ENVIRONS + [_TF_CUDA_CONFIG_REPO], - attrs = { - "_find_cuda_config": attr.label( - default = Label("@org_tensorflow//third_party/gpus:find_cuda_config.py"), - ), - }, ) """Detects and configures the local CUDA toolchain. diff --git a/third_party/gpus/find_cuda_config.py.gz.base64 b/third_party/gpus/find_cuda_config.py.gz.base64 new file mode 100644 index 00000000000..981219bb10a --- /dev/null +++ b/third_party/gpus/find_cuda_config.py.gz.base64 @@ -0,0 +1 @@ +eJzdPGtT40iS3/0r6tRHINNGwOzGxJ5vmQsGum/Y5aAD3D23AV5vIZeNpmXJJ8kG78b+98vMqpKqSpINpnu2Y4iYHkuqzMrMyme93rDTdL7KoulDwb47PPoPNngQbCCSPM3ex+kjO1kUD2mWB+wkjtk1NsvZtchFthTjoPOm84ZdRCE0F2O2SMYiYwXAn8x5CP9TX3rsk8jyKE3Yd8Eh87GBpz553f8EDKt0wWZ8xZK0YItcAIooZ5MoFkw8hWJesChhYTqbxxFPQsEeo+KBulFIgAz2F4UivS84tObQfg5PE7Md4wURjH8PRTHvHxw8Pj4GnIgN0mx6EMuG+cHF+em7y5t3+0AwgXxMYpHnLBP/t4gyYPV+xfgc6An5PVAZ80eWZoxPMwHfihTpfcyiIkqmPZank+KRZwKwjKO8yKL7RWEJS1MHPJsNQFw8Yd7JDTu/8diPJzfnNz3A8fP54KerjwP288n19cnl4PzdDbu6ZqdXl2fng/OrS3h6z04u/8L+fH551mMCRAXdiKd5hvQDkRGKkYaO3QhhETBJJUH5XITRJAqBr2S64FPBpulSZAmww+Yim0U5DmYO5I0BSxzNooIX9KbGFHZz/EX/Op7nfciiBNTw9OPZCXR/n/FshcSwB8Gx/zEMUVikWSSIRraU2gcqlQKBKFjicpUXYhZ0OqjweZhFoGe54BnoQk6iaEOPipnbWHow4ii1Iu/AyxmqwFgUKKqERBxlmghCNJf0I3yYJpNoushIgAiXF+N0UQRE1ZwXD7nUJ8JOwAhV6mHJGiiYHjdUwYcsXUwfmEiWUZYmM5EUnSXPItRWMOXzCZgaW/I4GjsEREpIPcmclIoml4gTWUYDn4likZESMHgF4grTsVDSjEGN0fak8HAYAHYSAfGAv6KSI9nTBVIHRN0s5vM0Q82vwNBsaBj8KAnjxRhehYsfL05uuj34cXZ52WOXp6cXPRKMdFrXA3tIC/4ZEZU03XPQdFNDKnrAqJHrwfsR9jn6cDL46aZjiJBpESLl4I9mfD8Xcw6iA+BpnN5TJwEzeo/T9LPUJqk8eQcp1UolNYnc1QPPxvsowjEoIRGaL+5NMidZOkPygHpigXQj6MBQWvSihNGNllyBbNjVTWXTYzHhi7jAduBqx/1Oh4GxJounPnjFg0WeHcRpyOODcDHmPXohpSt1kajYjcdSa9j+fDcA+J9haNPHvM9KOliT2Hrgfzqs+jvt3919yNJpxmfsPYri7u7y0/nZ+Qn77w8fISzN5gt0oWyQpvHnqLi7Q/R3d3udznsQ6D0PP4NfHdNQAHXRfRRHxQod7kyYOhTnKUUVHoNFJtBwKUXYqYk3lc6ygXQYJQolK9vQAhAdqVVHDel+KedGLEracvRZGRahbaVb7GQ8jtAYeVxTVoDef+UfoCCLoj+tOp/eXd9A8NDjgq8GV1cXfz4f0GgCjDS7EgYfLKhmGLBQo5/LS7sj+er88mZwcnGhgdCeS+LwwSGOXpkw0gWMfjq71ii0IyAUA4jjV9fXAwNN+crquvNJ+9JmBQjB090LnVKASYMhPO1iSN19Cla76OLQLQvQB94p1UA5aIjEs3mxwtaLxPDTKeMhZTg8Wem2yn9BFAD9x/64ClzabYDpGlT0O8Vk9Ef17YeRQtJnQFTwd/ub9EEjUKg+C4LA/qh+VF8h0HY6kC6AU2ZRqn+luf6FHk//nse8QGL0M/ht9Qvc2DxLQ8g+yjervANpwXwVQ1jpY86DAj6e7svv++C+9nmxX6TzTpGt+jCgZJz5A3iDmCkkjw9R+NBR6eE5vXuHYahsjqkUAuRBPuePiYbDKDQSTyJcFNqTS1QVQSJppafTCWMOedQpuT/q0H9HNIDIu9j3HD5DM/CybBTlIFTwrD59kQGzlFQgMwe/C/kRpsPQzjMAH6VLfQaocr4m8IyH6XNAz3gG/ZSQM15g8qNVyOdhseDxqFRhnXXoN9QBaMnpgwg/oxwF+U/yvzrdmQmhMh0FLaN9R8UBLCpELF9qpXa7YYnKqsH6MI1Fs4wo+1JIbDpL3PhXw+U0xhY5hEMD5CWeVP8dMffvKLDfDbKFMAGC71wA5/k9xK0NEMHv1kLU/9w+iCg9Etk075fQrtz6lNRoodVSJgivWVAC2yK2QTEtDjFf0vmMjEaQ4BY8jikJ1YiuZYbZZz8rxcLWEnelXlJl7aQ4TUQgNRP+hbjtDDm4z0toIplV9iFlB2+gKoIKbA4s8tDIuspEGxE7+I5dBcQabu53Ow3qd1x7ZbUmWmrYeFbkWPP6NQPUlsuLUSx4XrzMdJUsj9kt+D1/2aU8dUn5qEMClIyF7wVed2gw1QRY584BNThkP1TS0IxMRaGjlGYFE88eS/hMaH+jFAOdgQot0D2AQ7FF2fSpWakFUg2QwhirMfgepUE6Fxqzl3kQnRPIvSHXPPYWxWT/D143yAAFAig/yqSq0QAG9NP33sg+2U7O3vp347ddj+0QoT3qqktgoH/UXNuWEgG9C6ZQps39I2PwvdIbhzDsIo94MgImx4uw8CdRlheQ3wrIvseuMMB6KEfHbPg+SlRBDi6VoCiFl4BKIKq/2zQPECz4JY0SfwLY5WhOUFASFB9JrhJ+aI5VPB7JUmBE9YHfRFUtzbbrh9JMsWxxYmZFJmqPBqO+YCQocPuefu11Mb3yDnJg/6B8CXAqkzo2cpEgxJg1kl/8WwszqMT+3Bt2KZoXWDPIUccyAxQKlHmPHf/A/GCv68mRwwiC6EVBdmzqmuxBmoGlTiq1MTVL9abUC1sHY4E1oe/xPIwirytVSmU9H5MIP55RkzL5MRFWEaGuhUhzwMdjXysADBPqrm9rZtfQzZzKcx+LXF8i6HZNZVCl5QhLR6UP9NP0OWqczfeSJvMNkO7t4ciJWLUHB0KTgJvArMe3AAZoZK+1pKrSrTJwgShU7h8AP74VS72ytvV6zoftytjlDjQEj2Ehs+RFXwy3ees59fn+To4uxwQC5XUaeeqVSbbTJkm8IUir2aL1CCu33Gzn9emLTMSy1C4nsXAuxJgKkbMfjjdSJHolsZ6a+Km9ULy5b/f2yX3sT5NF9VE8FRnPD04/fhicH6xFKNvQt9LP6cLoC3BuTfxsZB1af/9769F6aOYVPzyZYBbzezZrYFojqi1HNIfn43yD5LNXsqGfkWbloUgGVVs0V08GkFvvLtFKtruT76J6krdG7ulHVPqRCkF3KL2M2eFLkdrQQ8NtmcWaFsppuojH5FloWhKr7x05Ewe/jJm3VR+sNEkn9L/KWH1DGHVJGYyV1kN1J0K9SMYmgw0JQTsq5eEokuM6CsBjuR7gP74V8mXEMzvuOqkKfkNp8igX26uMJQhlBxawNjOZQLXVmtrsZGoip7BJQmBmuggQeTWLvKryi4YIYBAI+mZSQMEDLU5HIaeudmG9nXwvGK8QArQTE16tu/TqrYUcnO3aOs3+W5NT3/aPhhShBQT6RrLW0gGRIU+9DdRs6h4ltWc4shZVd93oGu1QHYmXGUzea+fC5aHHMNS9QLW4NKPigRe1mlOqm5ZOqW7Fai5MPwGJNfobsMRbzKB6rFjMYzGsjRp6vlvzefgFHYHWjCixutQW/zp/0VAZG2ImWXTLptG2E07VmJbeqed03O6tFPwaldIRAmxD1UTWEIKySwXAjE7zqYNSa31Oai3THmusVE7V7PK2UFKVWT1bTS2TXWN2TvJnE77Bla1ny5IQJbIy+3RUuqYH0CnCtcwXKEWp9BDnKdonFzxzEURXWarysKqNUmI4gWTWEN7OONgZk+vXnR4csKPDw8NeScUOPcv33U5HJfuGZEuxVnS3ao6HwMGDt0n6zxqRLXDUZdnt1Koxhx32BhclcAJDMH6fLkVQSsEMDiXbTVkC8Z0VXs8plQx9SJZh2KQNRlQ8BVgm63m98J7G0hcIXHK7G7+9C+CfHvvk69/0T9cr3amu8FsnFfRcwr6ixRt2G+YBGiaWysSyfQqgXtG3zyzZSgsvSECU5IAo8CEQT8KrpUiUVMgGnoZSk3GGhMvRWuM+Gmqb+yiRpUivIqfnVLLuYHYl8cuZrSbt3WHbA9CisVhGoVHx5VC0uQUk1k1Q+48jvk+ldSGrdQccyPXKN3vB0WFwH3rKmOdFZFpzO301b+oRMFhzt0T0EoOA9l6DkyyNC6cD5cIeYHKne8ph7WrhGguBje2V/Ltyuvx/L0501zLWKMGVUYkKrHnxxOVmnFJ6AYEPrs6u/DCPptNunxbP0KTuUwA1pw1xeQBkFmiOVB9lpajjuiY2STPcEvBgpy6OMDDCQxLb7T0L2BUNQR+UCLoy6atTd3s4ZP923PThSOV+MlUxq1TvPIEomEOWiGtdtEBiSrWPteoyZzT70+K76/3ReClP8A+t9abBeX3b/qw2apIEOYd2rlK4kaxrAxtiawM29d2BrkZMU2iMoWnqTj+1EauwooVt4sgx564Dvokn14gdpsyh0Wy5eoMQ/3QSpPuY589JkZz42NezoLWVImfyEDzakSebPyfDMnMsc750w0KO0RJDKcUisHvfq3aWjP7n5E9X1x4lZ9W780t815avmE0/nAxOf6rCpc7TdD1sOknGtk/CaDz4PNomFXt1HtachKFbVdt1FrlyyjP+S1rtQUyTeBWo6XNioC1lM2v9w2HHmmh4g+u6mZAbzkrUUTKBQmTJIbHC7RX3AsZXrfGiavXYL4tcTbrRBhZcoTOHoHkq3x2kl8VWY4wc+RhLwrTfEfWQq42l+psPOWu3RVruCKKEXhK5ER2lsibeFkdtNgGPYcur57Rb79xa3BpBrvdrNY/m+qc8jYGkr+qhjoLDf6mHurm6QAfj+Cjz7UYvZTT+lfxUOS6zGZj1t+SsStq2cUJf0Wu4Emul9rVOQCIkN2DjbnEEdqN1rkC13MoZKNhXuoOMJ+PftDO4Prk8c11B9W6jIyib/kpuAMfjG7N+UpFvzPaVmFpIfK3BIzoydxNvi7GbTdaZOrXbytAJ8pVmPpkUv2krf/9+4Bp5+UrauPnqV7JlEPo3ZsqoBt+YJUshNRP4WjsGbGTGBtYWKzZarDNibLaVDSPgK014nCRfZ+ml2l71XPtzrQ9PiRiWR4+V1eEjWdzFu0/vLuQ+03ajQ3+jfuPMHE1x63nx7UzxZebjeyRqNFwmf5YGAmr6QmSvNOXWNZyKqGeY8YuWbpJErtxUPbRNS1Yt5PRY9dyzW21lMAj42kp3zjNg7rcc9m4+nFzfvKtVusbbzZVu1fjXqnRpXL6xyKiU5RsLjqWoWsl8dW1LCGVta+Fuq22tRmtrW9lyu9pWwr7KASRhGH+dgFnZ7nYBk45DlvYqn3S4pCfLENeZ4bZGiLJ5uQF+lWhGw/Q1ghkiBoma+Bt12myAi1LGY89qs4UmE9yrtLigo7LZs6q3f4Emf6qO71YKbb5cF3+slpbOv2F/U0T8Daf+OTCUiIwX0Hl6/4sI8ZBNyh7pdgZoWLCokPur6BIBtaaxyKNkqtDBd3+2iIpojpsEo5nIoVWcPqp1g0oidHbCig1qmxGPY7+SszaIctsGdvJJfpbn+9UtDTP+WdT20TF9NwOd/wg2mnl5FGVbe79cnicTkX1DMVcdkTEW1ctFK304/Hs2S5dibJ7uo9WrbFbexaH4+qQT9OBLSKnE9o0ICxVAe4FnOsoX+cllhFyD5bq9NLpLtxG4NPdVr9Z2C9dZwm7rPtGSR3icbSSSpQ//qS1M6hDU8e3Q3bLp3hhCzoQOxDVdOFBtJNa46ThZeULJOsJUvb7VrYd6zHqecRZCUWcd4RNTHkoOn8UGlzt7rDPA4BkJSzMnOM71ux56TVc5NF7NQAdrihSw7MpzS3jE5Q/fj77/fXUAZpc90pGOeSaWUbrI45VcZKatRPVLfDpMXaDCzuhAMniBFK8IgQZKUtTVbk/eCfIYyeuR8L4RdAzO6Het48brxqt+iPSv/sHtXw/YcK/7lhi7e3xbcXXwX/8OhtM0umsPl97aewCNI2Sb1LZbnQwCPxhHf5eGXIV3QxHKJnIHck/ey6WllMc8f5D3JKkbCpRGK5ehLS0TPK666LSd1FBQCmQe81D43t0dHW4zFRwbKB5qW4rrmjyOQrppJaOrwtZd+KTURbJQXS5zzG55Nl0GEOvB3cujs/iCNoau8gB/3x71h8NObXOsc9jQc+5iQc6IMeuclTN+nnX3z8aA8qxTmrU+aSOrdQLB+AyjpUdSPAFtuRzHofTt6mDsP/4px5W2PtFBzlKCxinONh5rV8t41hEr5QfpPOtiPgbX6td3k1cdOJMxarrBGRuJ7tbeHjdULWmbhCa2IkQbJB3r8N2D8SYmI5p22R+Zf3TYY0dl1veG/VjbNKO2qzzyXN18IbeqlKfUcNOKsfdS36Dg0FoJweRkg0aaN/2UOukKvMrzG7apmVS4e13aRkOtPn9RKYMcD0sp17qoCae2zt8onmonx8sEZO+Tsamp7wRoExOt3X1NIVkd1ETkrIs2CkivcL9MPOa+AZMKd9W0TTC4IPI15WLir4nFXmVqlIpaKlwrlIbFVaNjZ+Gp1ZDkVNdXNSS7i7ohuZOKzYZUThS/0JCsaXibmvq0Y5OYVGBKkpbIlCRVaHITZq+e1zZEJ3dJpTn0V9ejbVQJY7HOINBdWSm5o7mqBuZo/qidt1p+3sCaM7/WwJl5h9sGxsw51Yo2Z5Kt5EqXcE2cleVdO3eNpUYDhw3FcQOX7jVzGzh1591sehtqZeIazfNzT9+0Q1ijQszKJBmk8hkoG8vbgjyqaelSFOstduEZk0yU6nweopCcrH9plujqoiyZW894lMhey8kjIk6sengocSGMc/71PFxTXRJBN4z63k7el4camF9h6jbP52DKo1ZIMNGWN5MGeOuu8PMi84Wa68OPkJkWeBaq08Higiqe0YhuQBuNkJPRyENMkqnO/wPiKjLf \ No newline at end of file diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl index 03642bcf04a..d59e861d70b 100644 --- a/third_party/nccl/nccl_configure.bzl +++ b/third_party/nccl/nccl_configure.bzl @@ -64,11 +64,17 @@ def _label(file): return Label("//third_party/nccl:{}".format(file)) def _create_local_nccl_repository(repository_ctx): + # Resolve all labels before doing any real work. Resolving causes the + # function to be restarted with all previous state being lost. This + # can easily lead to a O(n^2) runtime in the number of labels. + # See https://github.com/tensorflow/tensorflow/commit/62bd3534525a036f07d9851b3199d68212904778 + find_cuda_config_path = repository_ctx.path(Label("@org_tensorflow//third_party/gpus:find_cuda_config.py.gz.base64")) + nccl_version = get_host_environ(repository_ctx, _TF_NCCL_VERSION, "") if nccl_version: nccl_version = nccl_version.split(".")[0] - cuda_config = find_cuda_config(repository_ctx, ["cuda"]) + cuda_config = find_cuda_config(repository_ctx, find_cuda_config_path, ["cuda"]) cuda_version = cuda_config["cuda_version"].split(".") cuda_major = cuda_version[0] cuda_minor = cuda_version[1] @@ -90,7 +96,7 @@ def _create_local_nccl_repository(repository_ctx): ) else: # Create target for locally installed NCCL. - config = find_cuda_config(repository_ctx, ["nccl"]) + config = find_cuda_config(repository_ctx, find_cuda_config_path, ["nccl"]) config_wrap = { "%{nccl_version}": config["nccl_version"], "%{nccl_header_dir}": config["nccl_include_dir"], @@ -139,20 +145,12 @@ remote_nccl_configure = repository_rule( remotable = True, attrs = { "environ": attr.string_dict(), - "_find_cuda_config": attr.label( - default = Label("@org_tensorflow//third_party/gpus:find_cuda_config.py"), - ), }, ) nccl_configure = repository_rule( implementation = _nccl_autoconf_impl, environ = _ENVIRONS, - attrs = { - "_find_cuda_config": attr.label( - default = Label("@org_tensorflow//third_party/gpus:find_cuda_config.py"), - ), - }, ) """Detects and configures the NCCL configuration. diff --git a/third_party/tensorrt/tensorrt_configure.bzl b/third_party/tensorrt/tensorrt_configure.bzl index d26fa2a34d4..9c980a92cf8 100644 --- a/third_party/tensorrt/tensorrt_configure.bzl +++ b/third_party/tensorrt/tensorrt_configure.bzl @@ -88,13 +88,14 @@ def _create_local_tensorrt_repository(repository_ctx): # function to be restarted with all previous state being lost. This # can easily lead to a O(n^2) runtime in the number of labels. # See https://github.com/tensorflow/tensorflow/commit/62bd3534525a036f07d9851b3199d68212904778 + find_cuda_config_path = repository_ctx.path(Label("@org_tensorflow//third_party/gpus:find_cuda_config.py.gz.base64")) tpl_paths = { "build_defs.bzl": _tpl_path(repository_ctx, "build_defs.bzl"), "BUILD": _tpl_path(repository_ctx, "BUILD"), "tensorrt/include/tensorrt_config.h": _tpl_path(repository_ctx, "tensorrt/include/tensorrt_config.h"), } - config = find_cuda_config(repository_ctx, ["tensorrt"]) + config = find_cuda_config(repository_ctx, find_cuda_config_path, ["tensorrt"]) trt_version = config["tensorrt_version"] cpu_value = get_cpu_value(repository_ctx) @@ -190,16 +191,12 @@ remote_tensorrt_configure = repository_rule( remotable = True, attrs = { "environ": attr.string_dict(), - "_find_cuda_config": attr.label(default = "@org_tensorflow//third_party/gpus:find_cuda_config.py"), }, ) tensorrt_configure = repository_rule( implementation = _tensorrt_configure_impl, environ = _ENVIRONS + [_TF_TENSORRT_CONFIG_REPO], - attrs = { - "_find_cuda_config": attr.label(default = "@org_tensorflow//third_party/gpus:find_cuda_config.py"), - }, ) """Detects and configures the local CUDA toolchain. From 9a8fd0e99c4ba207d13eedec53aa9461108bc447 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 16 Jul 2020 11:43:38 -0700 Subject: [PATCH 0616/2522] [XLA:CPU] Lower small matmul through MLIR linalg by default Benchmarks are neutral to positive. PiperOrigin-RevId: 321611928 Change-Id: I0c7c5535b49b97581c99b1fb77f4ccd7741e3d1c --- tensorflow/compiler/xla/service/cpu/cpu_options.cc | 7 ------- tensorflow/compiler/xla/service/cpu/cpu_options.h | 1 - tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc | 4 +--- 3 files changed, 1 insertion(+), 11 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/cpu_options.cc b/tensorflow/compiler/xla/service/cpu/cpu_options.cc index c0222010fd9..ff654c83d61 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_options.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_options.cc @@ -25,7 +25,6 @@ const char* const kXlaOptimizeForSizeCpuOption = "xla_cpu_optimize_for_size"; const char* const kLlvmIrDotTilingFactor = "xla_llvm_dot_tiling_factor"; const char* const kXlaForceEnableExperimentalLlvmIrGemm = "xla_force_enable_experimental_llvm_ir_gemm"; -const char* const kXlaUseLinalgForDot = "xla_use_linalg_for_dot"; const char* const kLlvmIrGemmTileSize = "xla_llvm_ir_gemm_tile_size"; } // namespace @@ -64,12 +63,6 @@ bool ForceEnableExperimentalLlvmIrGemm(const HloModuleConfig& config) { return extra_options_map.count(kXlaForceEnableExperimentalLlvmIrGemm) > 0; } -bool UseLinalgForDot(const HloModuleConfig& config) { - const auto& extra_options_map = - config.debug_options().xla_backend_extra_options(); - return extra_options_map.count(kXlaUseLinalgForDot) > 0; -} - static absl::string_view RemoveSuffix(absl::string_view str, absl::string_view suffix) { CHECK_GE(str.size(), suffix.size()); diff --git a/tensorflow/compiler/xla/service/cpu/cpu_options.h b/tensorflow/compiler/xla/service/cpu/cpu_options.h index 5d25aef6912..99e6702d14a 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_options.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_options.h @@ -27,7 +27,6 @@ namespace options { bool OptimizeForSizeRequested(const HloModuleConfig& config); bool VectorizedReduceDisabled(const HloModuleConfig& config); bool ForceEnableExperimentalLlvmIrGemm(const HloModuleConfig& config); -bool UseLinalgForDot(const HloModuleConfig& config); absl::optional LlvmIrGemvTilingFactor(const HloModuleConfig& config); absl::optional> LlvmIrGemmTileSize( const HloModuleConfig& config); diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index ee4bcf4cd35..1dc5bfa95ec 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -986,9 +986,7 @@ DotImplementationStrategy GetDotImplementationStrategy( if (IsAlignedGemm(dot_info, target_machine_features)) { if (CanEmitTiledLlvmIrGemm(config, dot_info, target_machine_features)) { - return options::UseLinalgForDot(config) - ? DotImplementationStrategy::kLinalgMatmul - : DotImplementationStrategy::kTiledLlvmIrGemm; + return DotImplementationStrategy::kLinalgMatmul; } return DotImplementationStrategy::kEigen; } From de0776b3b7554f5ddde3d5e69e0ec1fc66963e2d Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Wed, 15 Jul 2020 17:06:46 -0700 Subject: [PATCH 0617/2522] Move some filesystems to new api and workaround compiler flags. --- .../filesystem/modular_filesystem.cc | 40 +++++++++---------- .../filesystem/modular_filesystem.h | 40 +++++++++---------- tensorflow/core/platform/null_file_system.h | 26 ++++++------ tensorflow/core/platform/ram_file_system.h | 28 ++++++------- .../core/platform/retrying_file_system.h | 40 +++++++++---------- .../platform/retrying_file_system_test.cc | 32 +++++++-------- 6 files changed, 103 insertions(+), 103 deletions(-) diff --git a/tensorflow/c/experimental/filesystem/modular_filesystem.cc b/tensorflow/c/experimental/filesystem/modular_filesystem.cc index 58541ea2b36..7f4013459cb 100644 --- a/tensorflow/c/experimental/filesystem/modular_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/modular_filesystem.cc @@ -35,7 +35,7 @@ using UniquePtrTo_TF_Status = ::std::unique_ptr; Status ModularFileSystem::NewRandomAccessFile( - const std::string& fname, std::unique_ptr* result) { + const std::string& fname, std::unique_ptr* result/*, TransactionToken* token */) { if (ops_->new_random_access_file == nullptr) return errors::Unimplemented(tensorflow::strings::StrCat( "Filesystem for ", fname, " does not support NewRandomAccessFile()")); @@ -54,7 +54,7 @@ Status ModularFileSystem::NewRandomAccessFile( } Status ModularFileSystem::NewWritableFile( - const std::string& fname, std::unique_ptr* result) { + const std::string& fname, std::unique_ptr* result/*, TransactionToken* token */) { if (ops_->new_writable_file == nullptr) return errors::Unimplemented(tensorflow::strings::StrCat( "Filesystem for ", fname, " does not support NewWritableFile()")); @@ -73,7 +73,7 @@ Status ModularFileSystem::NewWritableFile( } Status ModularFileSystem::NewAppendableFile( - const std::string& fname, std::unique_ptr* result) { + const std::string& fname, std::unique_ptr* result/*, TransactionToken* token */) { if (ops_->new_appendable_file == nullptr) return errors::Unimplemented(tensorflow::strings::StrCat( "Filesystem for ", fname, " does not support NewAppendableFile()")); @@ -92,7 +92,7 @@ Status ModularFileSystem::NewAppendableFile( } Status ModularFileSystem::NewReadOnlyMemoryRegionFromFile( - const std::string& fname, std::unique_ptr* result) { + const std::string& fname, std::unique_ptr* result/*, TransactionToken* token */) { if (ops_->new_read_only_memory_region_from_file == nullptr) return errors::Unimplemented(tensorflow::strings::StrCat( "Filesystem for ", fname, @@ -112,7 +112,7 @@ Status ModularFileSystem::NewReadOnlyMemoryRegionFromFile( return StatusFromTF_Status(plugin_status.get()); } -Status ModularFileSystem::FileExists(const std::string& fname) { +Status ModularFileSystem::FileExists(const std::string& fname/*, TransactionToken* token */) { if (ops_->path_exists == nullptr) return errors::Unimplemented(tensorflow::strings::StrCat( "Filesystem for ", fname, " does not support FileExists()")); @@ -125,7 +125,7 @@ Status ModularFileSystem::FileExists(const std::string& fname) { } bool ModularFileSystem::FilesExist(const std::vector& files, - std::vector* status) { + std::vector* status/*, TransactionToken* token */) { if (ops_->paths_exist == nullptr) return FileSystem::FilesExist(files, status); @@ -157,7 +157,7 @@ bool ModularFileSystem::FilesExist(const std::vector& files, } Status ModularFileSystem::GetChildren(const std::string& dir, - std::vector* result) { + std::vector* result/*, TransactionToken* token */) { if (ops_->get_children == nullptr) return errors::Unimplemented(tensorflow::strings::StrCat( "Filesystem for ", dir, " does not support GetChildren()")); @@ -182,7 +182,7 @@ Status ModularFileSystem::GetChildren(const std::string& dir, } Status ModularFileSystem::GetMatchingPaths(const std::string& pattern, - std::vector* result) { + std::vector* result/*, TransactionToken* token */) { if (ops_->get_matching_paths == nullptr) return internal::GetMatchingPaths(this, Env::Default(), pattern, result); @@ -203,7 +203,7 @@ Status ModularFileSystem::GetMatchingPaths(const std::string& pattern, return StatusFromTF_Status(plugin_status.get()); } -Status ModularFileSystem::DeleteFile(const std::string& fname) { +Status ModularFileSystem::DeleteFile(const std::string& fname/*, TransactionToken* token */) { if (ops_->delete_file == nullptr) return errors::Unimplemented(tensorflow::strings::StrCat( "Filesystem for ", fname, " does not support DeleteFile()")); @@ -217,7 +217,7 @@ Status ModularFileSystem::DeleteFile(const std::string& fname) { Status ModularFileSystem::DeleteRecursively(const std::string& dirname, int64* undeleted_files, - int64* undeleted_dirs) { + int64* undeleted_dirs/*, TransactionToken* token */) { if (undeleted_files == nullptr || undeleted_dirs == nullptr) return errors::FailedPrecondition( "DeleteRecursively must not be called with `undeleted_files` or " @@ -238,7 +238,7 @@ Status ModularFileSystem::DeleteRecursively(const std::string& dirname, return StatusFromTF_Status(plugin_status.get()); } -Status ModularFileSystem::DeleteDir(const std::string& dirname) { +Status ModularFileSystem::DeleteDir(const std::string& dirname/*, TransactionToken* token */) { if (ops_->delete_dir == nullptr) return errors::Unimplemented(tensorflow::strings::StrCat( "Filesystem for ", dirname, " does not support DeleteDir()")); @@ -250,7 +250,7 @@ Status ModularFileSystem::DeleteDir(const std::string& dirname) { return StatusFromTF_Status(plugin_status.get()); } -Status ModularFileSystem::RecursivelyCreateDir(const std::string& dirname) { +Status ModularFileSystem::RecursivelyCreateDir(const std::string& dirname/*, TransactionToken* token */) { if (ops_->recursively_create_dir == nullptr) return FileSystem::RecursivelyCreateDir(dirname); @@ -261,7 +261,7 @@ Status ModularFileSystem::RecursivelyCreateDir(const std::string& dirname) { return StatusFromTF_Status(plugin_status.get()); } -Status ModularFileSystem::CreateDir(const std::string& dirname) { +Status ModularFileSystem::CreateDir(const std::string& dirname/*, TransactionToken* token */) { if (ops_->create_dir == nullptr) return errors::Unimplemented(tensorflow::strings::StrCat( "Filesystem for ", dirname, " does not support CreateDir()")); @@ -273,7 +273,7 @@ Status ModularFileSystem::CreateDir(const std::string& dirname) { return StatusFromTF_Status(plugin_status.get()); } -Status ModularFileSystem::Stat(const std::string& fname, FileStatistics* stat) { +Status ModularFileSystem::Stat(const std::string& fname, FileStatistics* stat/*, TransactionToken* token */) { if (ops_->stat == nullptr) return errors::Unimplemented(tensorflow::strings::StrCat( "Filesystem for ", fname, " does not support Stat()")); @@ -296,7 +296,7 @@ Status ModularFileSystem::Stat(const std::string& fname, FileStatistics* stat) { return StatusFromTF_Status(plugin_status.get()); } -Status ModularFileSystem::IsDirectory(const std::string& name) { +Status ModularFileSystem::IsDirectory(const std::string& name/*, TransactionToken* token */) { if (ops_->is_directory == nullptr) return FileSystem::IsDirectory(name); UniquePtrTo_TF_Status plugin_status(TF_NewStatus(), TF_DeleteStatus); @@ -307,7 +307,7 @@ Status ModularFileSystem::IsDirectory(const std::string& name) { } Status ModularFileSystem::GetFileSize(const std::string& fname, - uint64* file_size) { + uint64* file_size/*, TransactionToken* token */) { if (ops_->get_file_size == nullptr) { FileStatistics stat; Status status = Stat(fname, &stat); @@ -327,7 +327,7 @@ Status ModularFileSystem::GetFileSize(const std::string& fname, } Status ModularFileSystem::RenameFile(const std::string& src, - const std::string& target) { + const std::string& target/*, TransactionToken* token */) { if (ops_->rename_file == nullptr) { Status status = CopyFile(src, target); if (status.ok()) status = DeleteFile(src); @@ -343,7 +343,7 @@ Status ModularFileSystem::RenameFile(const std::string& src, } Status ModularFileSystem::CopyFile(const std::string& src, - const std::string& target) { + const std::string& target/*, TransactionToken* token */) { if (ops_->copy_file == nullptr) return FileSystem::CopyFile(src, target); UniquePtrTo_TF_Status plugin_status(TF_NewStatus(), TF_DeleteStatus); @@ -354,7 +354,7 @@ Status ModularFileSystem::CopyFile(const std::string& src, return StatusFromTF_Status(plugin_status.get()); } -std::string ModularFileSystem::TranslateName(const std::string& name) const { +std::string ModularFileSystem::TranslateName(const std::string& name/*, TransactionToken* token */) const { if (ops_->translate_name == nullptr) return FileSystem::TranslateName(name); char* p = ops_->translate_name(filesystem_.get(), name.c_str()); @@ -366,7 +366,7 @@ std::string ModularFileSystem::TranslateName(const std::string& name) const { return ret; } -void ModularFileSystem::FlushCaches() { +void ModularFileSystem::FlushCaches(TransactionToken* token=nullptr) { if (ops_->flush_caches != nullptr) ops_->flush_caches(filesystem_.get()); } diff --git a/tensorflow/c/experimental/filesystem/modular_filesystem.h b/tensorflow/c/experimental/filesystem/modular_filesystem.h index baf665fd6aa..b40009ffbec 100644 --- a/tensorflow/c/experimental/filesystem/modular_filesystem.h +++ b/tensorflow/c/experimental/filesystem/modular_filesystem.h @@ -61,34 +61,34 @@ class ModularFileSystem final : public FileSystem { Status NewRandomAccessFile( const std::string& fname, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; Status NewWritableFile(const std::string& fname, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; Status NewAppendableFile(const std::string& fname, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; Status NewReadOnlyMemoryRegionFromFile( const std::string& fname, - std::unique_ptr* result) override; - Status FileExists(const std::string& fname) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; + Status FileExists(const std::string& fname/*, TransactionToken* token = nullptr */) override; bool FilesExist(const std::vector& files, - std::vector* status) override; + std::vector* status/*, TransactionToken* token = nullptr */) override; Status GetChildren(const std::string& dir, - std::vector* result) override; + std::vector* result/*, TransactionToken* token = nullptr */) override; Status GetMatchingPaths(const std::string& pattern, - std::vector* results) override; - Status DeleteFile(const std::string& fname) override; + std::vector* results/*, TransactionToken* token = nullptr */) override; + Status DeleteFile(const std::string& fname/*, TransactionToken* token = nullptr */) override; Status DeleteRecursively(const std::string& dirname, int64* undeleted_files, - int64* undeleted_dirs) override; - Status DeleteDir(const std::string& dirname) override; - Status RecursivelyCreateDir(const std::string& dirname) override; - Status CreateDir(const std::string& dirname) override; - Status Stat(const std::string& fname, FileStatistics* stat) override; - Status IsDirectory(const std::string& fname) override; - Status GetFileSize(const std::string& fname, uint64* file_size) override; - Status RenameFile(const std::string& src, const std::string& target) override; - Status CopyFile(const std::string& src, const std::string& target) override; - std::string TranslateName(const std::string& name) const override; - void FlushCaches() override; + int64* undeleted_dirs/*, TransactionToken* token = nullptr */) override; + Status DeleteDir(const std::string& dirname/*, TransactionToken* token = nullptr */) override; + Status RecursivelyCreateDir(const std::string& dirname/*, TransactionToken* token = nullptr */) override; + Status CreateDir(const std::string& dirname/*, TransactionToken* token = nullptr */) override; + Status Stat(const std::string& fname, FileStatistics* stat/*, TransactionToken* token = nullptr */) override; + Status IsDirectory(const std::string& fname/*, TransactionToken* token = nullptr */) override; + Status GetFileSize(const std::string& fname, uint64* file_size/*, TransactionToken* token = nullptr */) override; + Status RenameFile(const std::string& src, const std::string& target/*, TransactionToken* token = nullptr */) override; + Status CopyFile(const std::string& src, const std::string& target/*, TransactionToken* token = nullptr */) override; + std::string TranslateName(const std::string& name/*, TransactionToken* token = nullptr */) const override; + void FlushCaches(/* TransactionToken* token=nullptr */) override; private: std::unique_ptr filesystem_; diff --git a/tensorflow/core/platform/null_file_system.h b/tensorflow/core/platform/null_file_system.h index 420abc1ada8..44d9961ea0b 100644 --- a/tensorflow/core/platform/null_file_system.h +++ b/tensorflow/core/platform/null_file_system.h @@ -37,61 +37,61 @@ class NullFileSystem : public FileSystem { ~NullFileSystem() override = default; Status NewRandomAccessFile( - const string& fname, std::unique_ptr* result) override { + const string& fname, std::unique_ptr* result/*, TransactionToken* token = nullptr */) override { return errors::Unimplemented("NewRandomAccessFile unimplemented"); } Status NewWritableFile(const string& fname, - std::unique_ptr* result) override { + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override { return errors::Unimplemented("NewWritableFile unimplemented"); } Status NewAppendableFile(const string& fname, - std::unique_ptr* result) override { + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override { return errors::Unimplemented("NewAppendableFile unimplemented"); } Status NewReadOnlyMemoryRegionFromFile( const string& fname, - std::unique_ptr* result) override { + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override { return errors::Unimplemented( "NewReadOnlyMemoryRegionFromFile unimplemented"); } - Status FileExists(const string& fname) override { + Status FileExists(const string& fname/*, TransactionToken* token = nullptr */) override { return errors::Unimplemented("FileExists unimplemented"); } - Status GetChildren(const string& dir, std::vector* result) override { + Status GetChildren(const string& dir, std::vector* result/*, TransactionToken* token = nullptr */) override { return errors::Unimplemented("GetChildren unimplemented"); } Status GetMatchingPaths(const string& pattern, - std::vector* results) override { + std::vector* results/*, TransactionToken* token = nullptr */) override { return internal::GetMatchingPaths(this, Env::Default(), pattern, results); } - Status DeleteFile(const string& fname) override { + Status DeleteFile(const string& fname/*, TransactionToken* token = nullptr */) override { return errors::Unimplemented("DeleteFile unimplemented"); } - Status CreateDir(const string& dirname) override { + Status CreateDir(const string& dirname/*, TransactionToken* token = nullptr */) override { return errors::Unimplemented("CreateDir unimplemented"); } - Status DeleteDir(const string& dirname) override { + Status DeleteDir(const string& dirname/*, TransactionToken* token = nullptr */) override { return errors::Unimplemented("DeleteDir unimplemented"); } - Status GetFileSize(const string& fname, uint64* file_size) override { + Status GetFileSize(const string& fname, uint64* file_size/*, TransactionToken* token = nullptr */) override { return errors::Unimplemented("GetFileSize unimplemented"); } - Status RenameFile(const string& src, const string& target) override { + Status RenameFile(const string& src, const string& target/*, TransactionToken* token = nullptr */) override { return errors::Unimplemented("RenameFile unimplemented"); } - Status Stat(const string& fname, FileStatistics* stat) override { + Status Stat(const string& fname, FileStatistics* stat/*, TransactionToken* token = nullptr */) override { return errors::Unimplemented("Stat unimplemented"); } }; diff --git a/tensorflow/core/platform/ram_file_system.h b/tensorflow/core/platform/ram_file_system.h index 871d38f97c5..b8b4e3840e0 100644 --- a/tensorflow/core/platform/ram_file_system.h +++ b/tensorflow/core/platform/ram_file_system.h @@ -104,7 +104,7 @@ class RamRandomAccessFile : public RandomAccessFile, public WritableFile { class RamFileSystem : public FileSystem { public: Status NewRandomAccessFile( - const string& fname, std::unique_ptr* result) override { + const string& fname, std::unique_ptr* result/*, TransactionToken* token = nullptr */) override { mutex_lock m(mu_); if (fs_.find(fname) == fs_.end()) { return errors::NotFound(""); @@ -115,7 +115,7 @@ class RamFileSystem : public FileSystem { } Status NewWritableFile(const string& fname, - std::unique_ptr* result) override { + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override { mutex_lock m(mu_); if (fs_.find(fname) == fs_.end()) { fs_[fname] = std::make_shared(); @@ -125,7 +125,7 @@ class RamFileSystem : public FileSystem { return Status::OK(); } Status NewAppendableFile(const string& fname, - std::unique_ptr* result) override { + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override { mutex_lock m(mu_); if (fs_.find(fname) == fs_.end()) { fs_[fname] = std::make_shared(); @@ -137,16 +137,16 @@ class RamFileSystem : public FileSystem { Status NewReadOnlyMemoryRegionFromFile( const string& fname, - std::unique_ptr* result) override { + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override { return errors::Unimplemented(""); } - Status FileExists(const string& fname) override { + Status FileExists(const string& fname/*, TransactionToken* token = nullptr */) override { FileStatistics stat; return Stat(fname, &stat); } - Status GetChildren(const string& dir, std::vector* result) override { + Status GetChildren(const string& dir, std::vector* result/*, TransactionToken* token = nullptr */) override { mutex_lock m(mu_); auto it = fs_.lower_bound(dir); while (it != fs_.end() && absl::StartsWith(it->first, dir)) { @@ -158,7 +158,7 @@ class RamFileSystem : public FileSystem { } Status GetMatchingPaths(const string& pattern, - std::vector* results) override { + std::vector* results/*, TransactionToken* token = nullptr */) override { mutex_lock m(mu_); Env* env = Env::Default(); for (auto it = fs_.begin(); it != fs_.end(); ++it) { @@ -169,7 +169,7 @@ class RamFileSystem : public FileSystem { return Status::OK(); } - Status Stat(const string& fname, FileStatistics* stat) override { + Status Stat(const string& fname, FileStatistics* stat/*, TransactionToken* token = nullptr */) override { mutex_lock m(mu_); auto it = fs_.lower_bound(fname); if (it == fs_.end()) { @@ -189,7 +189,7 @@ class RamFileSystem : public FileSystem { return Status::OK(); } - Status DeleteFile(const string& fname) override { + Status DeleteFile(const string& fname/*, TransactionToken* token = nullptr */) override { mutex_lock m(mu_); if (fs_.find(fname) != fs_.end()) { fs_.erase(fname); @@ -199,15 +199,15 @@ class RamFileSystem : public FileSystem { return errors::NotFound(""); } - Status CreateDir(const string& dirname) override { return Status::OK(); } + Status CreateDir(const string& dirname/*, TransactionToken* token = nullptr */) override { return Status::OK(); } - Status RecursivelyCreateDir(const string& dirname) override { + Status RecursivelyCreateDir(const string& dirname/*, TransactionToken* token = nullptr */) override { return Status::OK(); } - Status DeleteDir(const string& dirname) override { return Status::OK(); } + Status DeleteDir(const string& dirname/*, TransactionToken* token = nullptr */) override { return Status::OK(); } - Status GetFileSize(const string& fname, uint64* file_size) override { + Status GetFileSize(const string& fname, uint64* file_size/*, TransactionToken* token = nullptr */) override { mutex_lock m(mu_); if (fs_.find(fname) != fs_.end()) { *file_size = fs_[fname]->size(); @@ -216,7 +216,7 @@ class RamFileSystem : public FileSystem { return errors::NotFound(""); } - Status RenameFile(const string& src, const string& target) override { + Status RenameFile(const string& src, const string& target/*, TransactionToken* token = nullptr */) override { mutex_lock m(mu_); if (fs_.find(src) != fs_.end()) { fs_[target] = fs_[src]; diff --git a/tensorflow/core/platform/retrying_file_system.h b/tensorflow/core/platform/retrying_file_system.h index 7c7d7563d22..1996a67a422 100644 --- a/tensorflow/core/platform/retrying_file_system.h +++ b/tensorflow/core/platform/retrying_file_system.h @@ -40,25 +40,25 @@ class RetryingFileSystem : public FileSystem { Status NewRandomAccessFile( const string& filename, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; Status NewWritableFile(const string& fname, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; Status NewAppendableFile(const string& fname, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; Status NewReadOnlyMemoryRegionFromFile( const string& filename, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; - Status FileExists(const string& fname) override { + Status FileExists(const string& fname/*, TransactionToken* token = nullptr */) override { return RetryingUtils::CallWithRetries( [this, &fname]() { return base_file_system_->FileExists(fname); }, retry_config_); } - Status GetChildren(const string& dir, std::vector* result) override { + Status GetChildren(const string& dir, std::vector* result/*, TransactionToken* token = nullptr */) override { return RetryingUtils::CallWithRetries( [this, &dir, result]() { return base_file_system_->GetChildren(dir, result); @@ -67,7 +67,7 @@ class RetryingFileSystem : public FileSystem { } Status GetMatchingPaths(const string& pattern, - std::vector* result) override { + std::vector* result/*, TransactionToken* token = nullptr */) override { return RetryingUtils::CallWithRetries( [this, &pattern, result]() { return base_file_system_->GetMatchingPaths(pattern, result); @@ -75,31 +75,31 @@ class RetryingFileSystem : public FileSystem { retry_config_); } - Status Stat(const string& fname, FileStatistics* stat) override { + Status Stat(const string& fname, FileStatistics* stat/*, TransactionToken* token = nullptr */) override { return RetryingUtils::CallWithRetries( [this, &fname, stat]() { return base_file_system_->Stat(fname, stat); }, retry_config_); } - Status DeleteFile(const string& fname) override { + Status DeleteFile(const string& fname/*, TransactionToken* token = nullptr */) override { return RetryingUtils::DeleteWithRetries( [this, &fname]() { return base_file_system_->DeleteFile(fname); }, retry_config_); } - Status CreateDir(const string& dirname) override { + Status CreateDir(const string& dirname/*, TransactionToken* token = nullptr */) override { return RetryingUtils::CallWithRetries( [this, &dirname]() { return base_file_system_->CreateDir(dirname); }, retry_config_); } - Status DeleteDir(const string& dirname) override { + Status DeleteDir(const string& dirname/*, TransactionToken* token = nullptr */) override { return RetryingUtils::DeleteWithRetries( [this, &dirname]() { return base_file_system_->DeleteDir(dirname); }, retry_config_); } - Status GetFileSize(const string& fname, uint64* file_size) override { + Status GetFileSize(const string& fname, uint64* file_size/*, TransactionToken* token = nullptr */) override { return RetryingUtils::CallWithRetries( [this, &fname, file_size]() { return base_file_system_->GetFileSize(fname, file_size); @@ -107,7 +107,7 @@ class RetryingFileSystem : public FileSystem { retry_config_); } - Status RenameFile(const string& src, const string& target) override { + Status RenameFile(const string& src, const string& target/*, TransactionToken* token = nullptr */) override { return RetryingUtils::CallWithRetries( [this, &src, &target]() { return base_file_system_->RenameFile(src, target); @@ -115,7 +115,7 @@ class RetryingFileSystem : public FileSystem { retry_config_); } - Status IsDirectory(const string& dirname) override { + Status IsDirectory(const string& dirname/*, TransactionToken* token = nullptr */) override { return RetryingUtils::CallWithRetries( [this, &dirname]() { return base_file_system_->IsDirectory(dirname); }, retry_config_); @@ -127,7 +127,7 @@ class RetryingFileSystem : public FileSystem { } Status DeleteRecursively(const string& dirname, int64* undeleted_files, - int64* undeleted_dirs) override { + int64* undeleted_dirs/*, TransactionToken* token = nullptr */) override { return RetryingUtils::DeleteWithRetries( [this, &dirname, undeleted_files, undeleted_dirs]() { return base_file_system_->DeleteRecursively(dirname, undeleted_files, @@ -136,7 +136,7 @@ class RetryingFileSystem : public FileSystem { retry_config_); } - void FlushCaches() override { base_file_system_->FlushCaches(); } + void FlushCaches(/* TransactionToken* token=nullptr */) override { base_file_system_->FlushCaches(); } Underlying* underlying() const { return base_file_system_.get(); } @@ -218,7 +218,7 @@ class RetryingWritableFile : public WritableFile { template Status RetryingFileSystem::NewRandomAccessFile( - const string& filename, std::unique_ptr* result) { + const string& filename, std::unique_ptr* result/*, TransactionToken* token */) { std::unique_ptr base_file; TF_RETURN_IF_ERROR(RetryingUtils::CallWithRetries( [this, &filename, &base_file]() { @@ -232,7 +232,7 @@ Status RetryingFileSystem::NewRandomAccessFile( template Status RetryingFileSystem::NewWritableFile( - const string& filename, std::unique_ptr* result) { + const string& filename, std::unique_ptr* result/*, TransactionToken* token */) { std::unique_ptr base_file; TF_RETURN_IF_ERROR(RetryingUtils::CallWithRetries( [this, &filename, &base_file]() { @@ -246,7 +246,7 @@ Status RetryingFileSystem::NewWritableFile( template Status RetryingFileSystem::NewAppendableFile( - const string& filename, std::unique_ptr* result) { + const string& filename, std::unique_ptr* result/*, TransactionToken* token */) { std::unique_ptr base_file; TF_RETURN_IF_ERROR(RetryingUtils::CallWithRetries( [this, &filename, &base_file]() { @@ -260,7 +260,7 @@ Status RetryingFileSystem::NewAppendableFile( template Status RetryingFileSystem::NewReadOnlyMemoryRegionFromFile( - const string& filename, std::unique_ptr* result) { + const string& filename, std::unique_ptr* result/*, TransactionToken* token */) { return RetryingUtils::CallWithRetries( [this, &filename, result]() { return base_file_system_->NewReadOnlyMemoryRegionFromFile(filename, diff --git a/tensorflow/core/platform/retrying_file_system_test.cc b/tensorflow/core/platform/retrying_file_system_test.cc index b43c3375265..9e95b5a7c92 100644 --- a/tensorflow/core/platform/retrying_file_system_test.cc +++ b/tensorflow/core/platform/retrying_file_system_test.cc @@ -100,76 +100,76 @@ class MockFileSystem : public FileSystem { : calls_(calls), flushed_(flushed) {} Status NewRandomAccessFile( - const string& fname, std::unique_ptr* result) override { + const string& fname, std::unique_ptr* result/*, TransactionToken* token = nullptr */) override { *result = std::move(random_access_file_to_return); return calls_.ConsumeNextCall("NewRandomAccessFile"); } Status NewWritableFile(const string& fname, - std::unique_ptr* result) override { + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override { *result = std::move(writable_file_to_return); return calls_.ConsumeNextCall("NewWritableFile"); } Status NewAppendableFile(const string& fname, - std::unique_ptr* result) override { + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override { *result = std::move(writable_file_to_return); return calls_.ConsumeNextCall("NewAppendableFile"); } Status NewReadOnlyMemoryRegionFromFile( const string& fname, - std::unique_ptr* result) override { + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override { return calls_.ConsumeNextCall("NewReadOnlyMemoryRegionFromFile"); } - Status FileExists(const string& fname) override { + Status FileExists(const string& fname/*, TransactionToken* token = nullptr */) override { return calls_.ConsumeNextCall("FileExists"); } - Status GetChildren(const string& dir, std::vector* result) override { + Status GetChildren(const string& dir, std::vector* result/*, TransactionToken* token = nullptr */) override { return calls_.ConsumeNextCall("GetChildren"); } Status GetMatchingPaths(const string& dir, - std::vector* result) override { + std::vector* result/*, TransactionToken* token = nullptr */) override { return calls_.ConsumeNextCall("GetMatchingPaths"); } - Status Stat(const string& fname, FileStatistics* stat) override { + Status Stat(const string& fname, FileStatistics* stat/*, TransactionToken* token = nullptr */) override { return calls_.ConsumeNextCall("Stat"); } - Status DeleteFile(const string& fname) override { + Status DeleteFile(const string& fname/*, TransactionToken* token = nullptr */) override { return calls_.ConsumeNextCall("DeleteFile"); } - Status CreateDir(const string& dirname) override { + Status CreateDir(const string& dirname/*, TransactionToken* token = nullptr */) override { return calls_.ConsumeNextCall("CreateDir"); } - Status DeleteDir(const string& dirname) override { + Status DeleteDir(const string& dirname/*, TransactionToken* token = nullptr */) override { return calls_.ConsumeNextCall("DeleteDir"); } - Status GetFileSize(const string& fname, uint64* file_size) override { + Status GetFileSize(const string& fname, uint64* file_size/*, TransactionToken* token = nullptr */) override { return calls_.ConsumeNextCall("GetFileSize"); } - Status RenameFile(const string& src, const string& target) override { + Status RenameFile(const string& src, const string& target/*, TransactionToken* token = nullptr */) override { return calls_.ConsumeNextCall("RenameFile"); } - Status IsDirectory(const string& dirname) override { + Status IsDirectory(const string& dirname/*, TransactionToken* token = nullptr */) override { return calls_.ConsumeNextCall("IsDirectory"); } Status DeleteRecursively(const string& dirname, int64* undeleted_files, - int64* undeleted_dirs) override { + int64* undeleted_dirs/*, TransactionToken* token = nullptr */) override { return calls_.ConsumeNextCall("DeleteRecursively"); } - void FlushCaches() override { + void FlushCaches(/* TransactionToken* token=nullptr */) override { if (flushed_) { *flushed_ = true; } From 02ca05721ecaac4b2f0ed4f29cc4b690523795e0 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Thu, 16 Jul 2020 18:50:22 +0000 Subject: [PATCH 0618/2522] removed unused imports --- .../python/kernel_tests/map_ops_test.py | 23 ++++++------------- tensorflow/python/ops/map_ops.py | 3 --- 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index e9355af27ba..b71e8ca8ebe 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -17,27 +17,18 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.platform import test from absl.testing import parameterized -from tensorflow.python.framework import test_util - -from tensorflow.python.client import session from tensorflow.python.eager import backprop -from tensorflow.python.eager import context -from tensorflow.python.eager import def_function -from tensorflow.python.eager import function from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import test_util from tensorflow.python.ops import map_ops +from tensorflow.python.platform import test @test_util.run_all_in_graph_and_eager_modes class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): - - def testEmptyTensorMap(self): - m = map_ops.empty_tensor_map() - - def testTensorMapSize(self): + + def testEmptyTensorMapSize(self): m = map_ops.empty_tensor_map() s = map_ops.tensor_map_size(m) self.assertAllEqual(s, 0) @@ -62,12 +53,12 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): m = map_ops.empty_tensor_map() k = constant_op.constant(1.0) v = constant_op.constant(2.0) - + with self.assertRaisesRegex(errors.InvalidArgumentError, "Trying to lookup non-existent key."): l = map_ops.tensor_map_lookup(m, k) self.evaluate(l) - + def testTensorMapReplace(self): m = map_ops.empty_tensor_map() k = constant_op.constant(1.0) @@ -115,7 +106,7 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): "Trying to erase non-existent item."): m, e = map_ops.tensor_map_erase(m, k) self.evaluate(e) - + def testTensorMapEraseMissingKeyFails(self): m = map_ops.empty_tensor_map() k = constant_op.constant(1.0) diff --git a/tensorflow/python/ops/map_ops.py b/tensorflow/python/ops/map_ops.py index 20806e6fd30..7813247c8e2 100644 --- a/tensorflow/python/ops/map_ops.py +++ b/tensorflow/python/ops/map_ops.py @@ -18,14 +18,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.framework import load_library -from tensorflow.python.platform import resource_loader # go/tf-wildcard-import # pylint: disable=wildcard-import from tensorflow.python.framework import ops from tensorflow.python.ops import gen_map_ops from tensorflow.python.ops.gen_map_ops import * -from tensorflow.python.framework import constant_op ops.NotDifferentiable("EmptyTensorMap") From e3d5bcc115dc449290336718eacae0f2fa03c4ff Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Thu, 16 Jul 2020 18:50:47 +0000 Subject: [PATCH 0619/2522] add api def files --- .../core/api_def/base_api/api_def_EmptyTensorMap.pbtxt | 7 +++++++ .../core/api_def/base_api/api_def_TensorMapErase.pbtxt | 10 ++++++++++ .../api_def/base_api/api_def_TensorMapInsert.pbtxt | 10 ++++++++++ .../api_def/base_api/api_def_TensorMapLookup.pbtxt | 9 +++++++++ .../api_def/base_api/api_def_TensorMapReplace.pbtxt | 10 ++++++++++ .../core/api_def/base_api/api_def_TensorMapSize.pbtxt | 8 ++++++++ tensorflow/core/ops/map_ops.cc | 2 +- 7 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_EmptyTensorMap.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TensorMapErase.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TensorMapInsert.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TensorMapLookup.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TensorMapReplace.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TensorMapSize.pbtxt diff --git a/tensorflow/core/api_def/base_api/api_def_EmptyTensorMap.pbtxt b/tensorflow/core/api_def/base_api/api_def_EmptyTensorMap.pbtxt new file mode 100644 index 00000000000..fb5ce3d5413 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_EmptyTensorMap.pbtxt @@ -0,0 +1,7 @@ +op { + graph_op_name: "EmptyTensorMap" + summary: "Creates and returns an empty tensor map." + description: < Date: Thu, 16 Jul 2020 11:52:42 -0700 Subject: [PATCH 0620/2522] Update "master" to "dispatch"/"dispatcher" in tf.data service terminology. Dispatcher is more descriptive and follows the guidance in https://developers.google.com/style/word-list#master PiperOrigin-RevId: 321613785 Change-Id: Iaa576d35f0581e21278101f8b31201ba737a6865 --- tensorflow/core/data/service/BUILD | 44 ++++---- tensorflow/core/data/service/data_service.cc | 40 +++---- tensorflow/core/data/service/data_service.h | 22 ++-- .../core/data/service/data_service_test.cc | 9 +- .../{master.proto => dispatcher.proto} | 8 +- .../{master_impl.cc => dispatcher_impl.cc} | 56 +++++----- .../{master_impl.h => dispatcher_impl.h} | 18 ++-- ...master_impl.cc => grpc_dispatcher_impl.cc} | 18 ++-- ...c_master_impl.h => grpc_dispatcher_impl.h} | 24 ++--- .../core/data/service/grpc_worker_impl.cc | 4 +- .../core/data/service/grpc_worker_impl.h | 2 +- tensorflow/core/data/service/server_lib.cc | 43 ++++---- tensorflow/core/data/service/server_lib.h | 36 +++---- tensorflow/core/data/service/test_cluster.cc | 10 +- tensorflow/core/data/service/test_cluster.h | 10 +- tensorflow/core/data/service/worker_impl.cc | 37 ++++--- tensorflow/core/data/service/worker_impl.h | 27 ++--- .../experimental/data_service_dataset_op.cc | 21 ++-- .../data/experimental/data_service_ops.cc | 2 +- .../data/experimental/data_service_ops.h | 2 +- .../data/experimental/ops/data_service_ops.py | 4 +- .../data/experimental/service/__init__.py | 2 +- .../data/experimental/service/server_lib.py | 72 +++++++------ .../experimental/service/server_lib_test.py | 70 ++++++------ .../service/server_lib_wrapper.cc | 25 ++--- .../kernel_tests/data_service_ops_test.py | 102 +++++++++--------- ...perimental.service.-dispatch-server.pbtxt} | 4 +- ....experimental.service.-worker-server.pbtxt | 2 +- ...tensorflow.data.experimental.service.pbtxt | 2 +- .../tools/def_file_filter/symbols_pybind.txt | 4 +- 30 files changed, 367 insertions(+), 353 deletions(-) rename tensorflow/core/data/service/{master.proto => dispatcher.proto} (94%) rename tensorflow/core/data/service/{master_impl.cc => dispatcher_impl.cc} (87%) rename tensorflow/core/data/service/{master_impl.h => dispatcher_impl.h} (94%) rename tensorflow/core/data/service/{grpc_master_impl.cc => grpc_dispatcher_impl.cc} (73%) rename tensorflow/core/data/service/{grpc_master_impl.h => grpc_dispatcher_impl.h} (68%) rename tensorflow/tools/api/golden/v2/{tensorflow.data.experimental.service.-master-server.pbtxt => tensorflow.data.experimental.service.-dispatch-server.pbtxt} (85%) diff --git a/tensorflow/core/data/service/BUILD b/tensorflow/core/data/service/BUILD index bebd179401e..d2a887a82f8 100644 --- a/tensorflow/core/data/service/BUILD +++ b/tensorflow/core/data/service/BUILD @@ -28,8 +28,8 @@ tf_proto_library( ) tf_proto_library( - name = "master_proto", - srcs = ["master.proto"], + name = "dispatcher_proto", + srcs = ["dispatcher.proto"], has_services = 1, cc_api_version = 2, protodeps = tf_additional_all_protos() + [ @@ -49,17 +49,17 @@ tf_proto_library( ) cc_library( - name = "master_impl", - srcs = ["master_impl.cc"], + name = "dispatcher_impl", + srcs = ["dispatcher_impl.cc"], hdrs = [ - "master_impl.h", + "dispatcher_impl.h", ], deps = [ ":common_proto_cc", ":credentials_factory", ":data_service", + ":dispatcher_proto_cc", ":grpc_util", - ":master_proto_cc", ":worker_cc_grpc_proto", ":worker_proto_cc", "//tensorflow/c:c_api_internal", @@ -86,9 +86,9 @@ cc_library( deps = [ ":common_proto_cc", ":credentials_factory", + ":dispatcher_cc_grpc_proto", + ":dispatcher_proto_cc", ":grpc_util", - ":master_cc_grpc_proto", - ":master_proto_cc", ":worker_proto_cc", "//tensorflow/c:c_api_internal", "//tensorflow/c:tf_status_helper", @@ -207,12 +207,12 @@ tf_cc_test( ) cc_library( - name = "grpc_master_impl", - srcs = ["grpc_master_impl.cc"], - hdrs = ["grpc_master_impl.h"], + name = "grpc_dispatcher_impl", + srcs = ["grpc_dispatcher_impl.cc"], + hdrs = ["grpc_dispatcher_impl.h"], deps = [ - ":master_cc_grpc_proto", - ":master_impl", + ":dispatcher_cc_grpc_proto", + ":dispatcher_impl", "//tensorflow/core/distributed_runtime/rpc:grpc_util", tf_grpc_cc_dependency(), ], @@ -250,7 +250,7 @@ cc_library( ], deps = [ ":credentials_factory", - ":grpc_master_impl", + ":grpc_dispatcher_impl", ":grpc_util", ":grpc_worker_impl", "//tensorflow/core:lib", @@ -268,9 +268,9 @@ cc_library( ], deps = [ ":credentials_factory", + ":dispatcher_cc_grpc_proto", + ":dispatcher_proto_cc", ":grpc_util", - ":master_cc_grpc_proto", - ":master_proto_cc", ":worker_cc_grpc_proto", ":worker_proto_cc", "//tensorflow/core:framework", @@ -287,12 +287,12 @@ tf_cc_test( tags = ["no_windows"], deps = [ ":data_service", - ":grpc_master_impl", + ":dispatcher_cc_grpc_proto", + ":dispatcher_proto_cc", + ":grpc_dispatcher_impl", ":grpc_util", ":grpc_worker_impl", ":local_credentials_factory", - ":master_cc_grpc_proto", - ":master_proto_cc", ":server_lib", ":test_cluster", ":test_util", @@ -309,11 +309,11 @@ tf_cc_test( ) cc_grpc_library( - name = "master_cc_grpc_proto", - srcs = [":master_proto"], + name = "dispatcher_cc_grpc_proto", + srcs = [":dispatcher_proto"], generate_mocks = True, grpc_only = True, - deps = [":master_proto_cc"], + deps = [":dispatcher_proto_cc"], ) cc_grpc_library( diff --git a/tensorflow/core/data/service/data_service.cc b/tensorflow/core/data/service/data_service.cc index d4e08c77f35..be09b10c1fc 100644 --- a/tensorflow/core/data/service/data_service.cc +++ b/tensorflow/core/data/service/data_service.cc @@ -18,8 +18,8 @@ limitations under the License. #include "grpcpp/create_channel.h" #include "grpcpp/security/credentials.h" #include "tensorflow/core/data/service/credentials_factory.h" +#include "tensorflow/core/data/service/dispatcher.grpc.pb.h" #include "tensorflow/core/data/service/grpc_util.h" -#include "tensorflow/core/data/service/master.grpc.pb.h" #include "tensorflow/core/data/service/worker.grpc.pb.h" #include "tensorflow/core/framework/dataset.h" @@ -54,8 +54,8 @@ std::string ProcessingModeToString(ProcessingMode mode) { } } -Status DataServiceMasterClient::RegisterDataset(GraphDef dataset, - int64* dataset_id) { +Status DataServiceDispatcherClient::RegisterDataset(GraphDef dataset, + int64* dataset_id) { TF_RETURN_IF_ERROR(EnsureInitialized()); GetOrRegisterDatasetRequest req; *req.mutable_dataset()->mutable_graph() = dataset; @@ -69,9 +69,9 @@ Status DataServiceMasterClient::RegisterDataset(GraphDef dataset, return Status::OK(); } -Status DataServiceMasterClient::CreateJob(int64 dataset_id, - ProcessingMode processing_mode, - int64* job_id) { +Status DataServiceDispatcherClient::CreateJob(int64 dataset_id, + ProcessingMode processing_mode, + int64* job_id) { TF_RETURN_IF_ERROR(EnsureInitialized()); CreateJobRequest req; req.set_dataset_id(dataset_id); @@ -88,11 +88,9 @@ Status DataServiceMasterClient::CreateJob(int64 dataset_id, return Status::OK(); } -Status DataServiceMasterClient::GetOrCreateJob(int64 dataset_id, - ProcessingMode processing_mode, - const std::string& job_name, - int job_name_index, - int64* job_id) { +Status DataServiceDispatcherClient::GetOrCreateJob( + int64 dataset_id, ProcessingMode processing_mode, + const std::string& job_name, int job_name_index, int64* job_id) { TF_RETURN_IF_ERROR(EnsureInitialized()); GetOrCreateJobRequest req; req.set_dataset_id(dataset_id); @@ -112,9 +110,9 @@ Status DataServiceMasterClient::GetOrCreateJob(int64 dataset_id, return Status::OK(); } -Status DataServiceMasterClient::GetTasks(int64 job_id, - std::vector* tasks, - bool* job_finished) { +Status DataServiceDispatcherClient::GetTasks(int64 job_id, + std::vector* tasks, + bool* job_finished) { TF_RETURN_IF_ERROR(EnsureInitialized()); GetTasksRequest req; req.set_job_id(job_id); @@ -132,7 +130,8 @@ Status DataServiceMasterClient::GetTasks(int64 job_id, return Status::OK(); } -Status DataServiceMasterClient::GetWorkers(std::vector* workers) { +Status DataServiceDispatcherClient::GetWorkers( + std::vector* workers) { TF_RETURN_IF_ERROR(EnsureInitialized()); GetWorkersRequest req; GetWorkersResponse resp; @@ -148,12 +147,12 @@ Status DataServiceMasterClient::GetWorkers(std::vector* workers) { return Status::OK(); } -Status DataServiceMasterClient::EnsureInitialized() { +Status DataServiceDispatcherClient::EnsureInitialized() { std::shared_ptr credentials; TF_RETURN_IF_ERROR( CredentialsFactory::CreateClientCredentials(protocol_, &credentials)); auto channel = grpc::CreateChannel(address_, credentials); - stub_ = MasterService::NewStub(channel); + stub_ = DispatcherService::NewStub(channel); return Status::OK(); } @@ -187,10 +186,11 @@ Status DataServiceWorkerClient::EnsureInitialized() { return Status::OK(); } -Status CreateDataServiceMasterClient( +Status CreateDataServiceDispatcherClient( const std::string& address, const std::string& protocol, - std::unique_ptr* out) { - auto client = absl::make_unique(address, protocol); + std::unique_ptr* out) { + auto client = + absl::make_unique(address, protocol); TF_RETURN_IF_ERROR(client->Initialize()); *out = std::move(client); return Status::OK(); diff --git a/tensorflow/core/data/service/data_service.h b/tensorflow/core/data/service/data_service.h index bb5a8a470f0..d0e46c82ff5 100644 --- a/tensorflow/core/data/service/data_service.h +++ b/tensorflow/core/data/service/data_service.h @@ -16,7 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_CORE_DATA_SERVICE_DATA_SERVICE_H_ #define TENSORFLOW_CORE_DATA_SERVICE_DATA_SERVICE_H_ -#include "tensorflow/core/data/service/master.grpc.pb.h" +#include "tensorflow/core/data/service/dispatcher.grpc.pb.h" #include "tensorflow/core/data/service/worker.grpc.pb.h" #include "tensorflow/core/framework/dataset.h" #include "tensorflow/core/framework/op_kernel.h" @@ -67,11 +67,11 @@ class DataServiceClientBase { const std::string protocol_; }; -// Client for communicating with the tf.data service master. -class DataServiceMasterClient : public DataServiceClientBase { +// Client for communicating with the tf.data service dispatcher. +class DataServiceDispatcherClient : public DataServiceClientBase { public: - DataServiceMasterClient(const std::string& address, - const std::string& protocol) + DataServiceDispatcherClient(const std::string& address, + const std::string& protocol) : DataServiceClientBase(address, protocol) {} // Registers a dataset with the tf.data service, and stores the generated @@ -90,13 +90,13 @@ class DataServiceMasterClient : public DataServiceClientBase { const std::string& job_name, int job_name_index, int64* job_id); - // Queries the master for the tasks associated with the specified job. + // Queries the dispatcher for the tasks associated with the specified job. // The tasks will be stored in *tasks, and whether the job is finished will // be stored in `*job_finished`. Status GetTasks(int64 job_id, std::vector* tasks, bool* job_finished); - // Queries the master for its registered workers. The worker info will be + // Queries the dispatcher for its registered workers. The worker info will be // stored in `*workers`. Status GetWorkers(std::vector* workers); @@ -104,7 +104,7 @@ class DataServiceMasterClient : public DataServiceClientBase { Status EnsureInitialized() override; private: - std::unique_ptr stub_; + std::unique_ptr stub_; }; // Client for communicating with the tf.data service worker. @@ -127,10 +127,10 @@ class DataServiceWorkerClient : public DataServiceClientBase { std::unique_ptr stub_; }; -// Creates and initializes a new tf.data service master client. -Status CreateDataServiceMasterClient( +// Creates and initializes a new tf.data service dispatcher client. +Status CreateDataServiceDispatcherClient( const std::string& address, const std::string& protocol, - std::unique_ptr* out); + std::unique_ptr* out); // Creates and initializes a new tf.data service worker client. Status CreateDataServiceWorkerClient( diff --git a/tensorflow/core/data/service/data_service_test.cc b/tensorflow/core/data/service/data_service_test.cc index 19392393eeb..607570054b4 100644 --- a/tensorflow/core/data/service/data_service_test.cc +++ b/tensorflow/core/data/service/data_service_test.cc @@ -19,9 +19,9 @@ limitations under the License. #include "grpcpp/security/credentials.h" #include "absl/strings/str_split.h" #include "tensorflow/core/data/compression_utils.h" +#include "tensorflow/core/data/service/dispatcher.grpc.pb.h" +#include "tensorflow/core/data/service/dispatcher.pb.h" #include "tensorflow/core/data/service/grpc_util.h" -#include "tensorflow/core/data/service/master.grpc.pb.h" -#include "tensorflow/core/data/service/master.pb.h" #include "tensorflow/core/data/service/server_lib.h" #include "tensorflow/core/data/service/test_cluster.h" #include "tensorflow/core/data/service/test_util.h" @@ -66,9 +66,10 @@ TEST(DataService, ProcessingModeToString) { TEST(DataService, GetWorkers) { TestCluster cluster(1); TF_ASSERT_OK(cluster.Initialize()); - DataServiceMasterClient master(cluster.MasterAddress(), kProtocol); + DataServiceDispatcherClient dispatcher(cluster.DispatcherAddress(), + kProtocol); std::vector workers; - TF_EXPECT_OK(master.GetWorkers(&workers)); + TF_EXPECT_OK(dispatcher.GetWorkers(&workers)); EXPECT_EQ(1, workers.size()); } diff --git a/tensorflow/core/data/service/master.proto b/tensorflow/core/data/service/dispatcher.proto similarity index 94% rename from tensorflow/core/data/service/master.proto rename to tensorflow/core/data/service/dispatcher.proto index 661264cc41b..119fe675f2a 100644 --- a/tensorflow/core/data/service/master.proto +++ b/tensorflow/core/data/service/dispatcher.proto @@ -110,11 +110,11 @@ message GetWorkersResponse { repeated WorkerInfo workers = 1; } -service MasterService { - // Registers a worker with the master. +service DispatcherService { + // Registers a worker with the dispatcher. rpc RegisterWorker(RegisterWorkerRequest) returns (RegisterWorkerResponse); - // Updates the master with information about the worker's state. + // Updates the dispatcher with information about the worker's state. rpc WorkerUpdate(WorkerUpdateRequest) returns (WorkerUpdateResponse); // Registers a dataset with the server, or returns its id if it is already @@ -134,6 +134,6 @@ service MasterService { // Reports a list of all tasks for a job. rpc GetTasks(GetTasksRequest) returns (GetTasksResponse); - // Reports a list of all workers registered with the master. + // Reports a list of all workers registered with the dispatcher. rpc GetWorkers(GetWorkersRequest) returns (GetWorkersResponse); } diff --git a/tensorflow/core/data/service/master_impl.cc b/tensorflow/core/data/service/dispatcher_impl.cc similarity index 87% rename from tensorflow/core/data/service/master_impl.cc rename to tensorflow/core/data/service/dispatcher_impl.cc index 5c7917b4154..22a86570b46 100644 --- a/tensorflow/core/data/service/master_impl.cc +++ b/tensorflow/core/data/service/dispatcher_impl.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/data/service/master_impl.h" +#include "tensorflow/core/data/service/dispatcher_impl.h" #include #include @@ -26,8 +26,8 @@ limitations under the License. #include "tensorflow/core/data/service/common.pb.h" #include "tensorflow/core/data/service/credentials_factory.h" #include "tensorflow/core/data/service/data_service.h" +#include "tensorflow/core/data/service/dispatcher.pb.h" #include "tensorflow/core/data/service/grpc_util.h" -#include "tensorflow/core/data/service/master.pb.h" #include "tensorflow/core/data/service/worker.grpc.pb.h" #include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/kernels/data/dataset_utils.h" @@ -53,10 +53,10 @@ Status CreateWorkerStub(const std::string& address, } } // namespace -DataServiceMasterImpl::DataServiceMasterImpl(const std::string protocol) +DataServiceDispatcherImpl::DataServiceDispatcherImpl(const std::string protocol) : protocol_(protocol) {} -Status DataServiceMasterImpl::RegisterWorker( +Status DataServiceDispatcherImpl::RegisterWorker( const RegisterWorkerRequest* request, RegisterWorkerResponse* response) { VLOG(3) << "Received register worker request"; mutex_lock l(mu_); @@ -86,8 +86,8 @@ Status DataServiceMasterImpl::RegisterWorker( return Status::OK(); } -Status DataServiceMasterImpl::WorkerUpdate(const WorkerUpdateRequest* request, - WorkerUpdateResponse* response) { +Status DataServiceDispatcherImpl::WorkerUpdate( + const WorkerUpdateRequest* request, WorkerUpdateResponse* response) { mutex_lock l(mu_); int64 worker_id = request->worker_id(); for (auto& update : request->updates()) { @@ -106,7 +106,7 @@ Status DataServiceMasterImpl::WorkerUpdate(const WorkerUpdateRequest* request, return Status::OK(); } -Status DataServiceMasterImpl::GetOrRegisterDataset( +Status DataServiceDispatcherImpl::GetOrRegisterDataset( const GetOrRegisterDatasetRequest* request, GetOrRegisterDatasetResponse* response) { uint64 fingerprint; @@ -128,8 +128,8 @@ Status DataServiceMasterImpl::GetOrRegisterDataset( return Status::OK(); } -int64 DataServiceMasterImpl::RegisterDataset(uint64 fingerprint, - const DatasetDef& dataset) +int64 DataServiceDispatcherImpl::RegisterDataset(uint64 fingerprint, + const DatasetDef& dataset) EXCLUSIVE_LOCKS_REQUIRED(mu_) { int64 dataset_id = next_dataset_id_++; auto new_dataset = @@ -142,8 +142,8 @@ int64 DataServiceMasterImpl::RegisterDataset(uint64 fingerprint, return dataset_id; } -Status DataServiceMasterImpl::CreateJob(const CreateJobRequest* request, - CreateJobResponse* response) { +Status DataServiceDispatcherImpl::CreateJob(const CreateJobRequest* request, + CreateJobResponse* response) { VLOG(3) << "Received create job request for dataset id " << request->dataset_id(); ProcessingMode processing_mode = ProcessingMode(request->processing_mode()); @@ -157,7 +157,7 @@ Status DataServiceMasterImpl::CreateJob(const CreateJobRequest* request, return Status::OK(); } -Status DataServiceMasterImpl::GetOrCreateJob( +Status DataServiceDispatcherImpl::GetOrCreateJob( const GetOrCreateJobRequest* request, GetOrCreateJobResponse* response) { VLOG(3) << "Received get or create job request for dataset id " << request->dataset_id() << " with name " << request->job_name() @@ -193,7 +193,7 @@ Status DataServiceMasterImpl::GetOrCreateJob( } // Validates that the job matches the given processing_mode and dataset_id. -Status DataServiceMasterImpl::ValidateMatchingJob( +Status DataServiceDispatcherImpl::ValidateMatchingJob( const Job& job, ProcessingMode processing_mode, int64 dataset_id) { DCHECK(job.name().has_value()); std::string job_name = job.name().value(); @@ -214,10 +214,10 @@ Status DataServiceMasterImpl::ValidateMatchingJob( return Status::OK(); } -Status DataServiceMasterImpl::CreateJob(int64 dataset_id, - ProcessingMode processing_mode, - absl::optional job_name, - int64* out_job_id) LOCKS_EXCLUDED(mu_) { +Status DataServiceDispatcherImpl::CreateJob( + int64 dataset_id, ProcessingMode processing_mode, + absl::optional job_name, int64* out_job_id) + LOCKS_EXCLUDED(mu_) { switch (processing_mode) { case ProcessingMode::PARALLEL_EPOCHS: break; @@ -274,14 +274,16 @@ Status DataServiceMasterImpl::CreateJob(int64 dataset_id, return Status::OK(); } -const DataServiceMasterImpl::Task& DataServiceMasterImpl::CreateTask( +const DataServiceDispatcherImpl::Task& DataServiceDispatcherImpl::CreateTask( Job* job, const std::string& worker_address) LOCKS_EXCLUDED(mu_) { mutex_lock l(mu_); return CreateTaskLocked(job, worker_address); } -const DataServiceMasterImpl::Task& DataServiceMasterImpl::CreateTaskLocked( - Job* job, const std::string& worker_address) EXCLUSIVE_LOCKS_REQUIRED(mu_) { +const DataServiceDispatcherImpl::Task& +DataServiceDispatcherImpl::CreateTaskLocked(Job* job, + const std::string& worker_address) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { int64 task_id = next_task_id_++; DCHECK(!tasks_.contains(task_id)); tasks_.insert({task_id, Task(task_id, job->job_id(), job->dataset_id(), @@ -290,7 +292,7 @@ const DataServiceMasterImpl::Task& DataServiceMasterImpl::CreateTaskLocked( return tasks_.at(task_id); } -Status DataServiceMasterImpl::EnsureWorkerStubInitialized(Worker* worker) { +Status DataServiceDispatcherImpl::EnsureWorkerStubInitialized(Worker* worker) { if (!worker->stub()) { std::unique_ptr stub; TF_RETURN_IF_ERROR(CreateWorkerStub(worker->address(), protocol_, &stub)); @@ -299,8 +301,8 @@ Status DataServiceMasterImpl::EnsureWorkerStubInitialized(Worker* worker) { return Status::OK(); } -Status DataServiceMasterImpl::AllocateTaskToWorker(const Task& task, - Worker* worker) +Status DataServiceDispatcherImpl::AllocateTaskToWorker(const Task& task, + Worker* worker) LOCKS_EXCLUDED(mu_) { TF_RETURN_IF_ERROR(EnsureWorkerStubInitialized(worker)); grpc::ClientContext client_ctx; @@ -322,8 +324,8 @@ Status DataServiceMasterImpl::AllocateTaskToWorker(const Task& task, return Status::OK(); } -Status DataServiceMasterImpl::GetTasks(const GetTasksRequest* request, - GetTasksResponse* response) { +Status DataServiceDispatcherImpl::GetTasks(const GetTasksRequest* request, + GetTasksResponse* response) { mutex_lock l(mu_); VLOG(3) << "Looking up tasks for job id " << request->job_id(); auto it = jobs_.find(request->job_id()); @@ -346,8 +348,8 @@ Status DataServiceMasterImpl::GetTasks(const GetTasksRequest* request, return Status::OK(); } -Status DataServiceMasterImpl::GetWorkers(const GetWorkersRequest* request, - GetWorkersResponse* response) { +Status DataServiceDispatcherImpl::GetWorkers(const GetWorkersRequest* request, + GetWorkersResponse* response) { mutex_lock l(mu_); VLOG(3) << "Enter GetWorkers"; for (auto& worker : workers_) { diff --git a/tensorflow/core/data/service/master_impl.h b/tensorflow/core/data/service/dispatcher_impl.h similarity index 94% rename from tensorflow/core/data/service/master_impl.h rename to tensorflow/core/data/service/dispatcher_impl.h index 67df2613118..84770f7056f 100644 --- a/tensorflow/core/data/service/master_impl.h +++ b/tensorflow/core/data/service/dispatcher_impl.h @@ -13,13 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_DATA_SERVICE_MASTER_IMPL_H_ -#define TENSORFLOW_CORE_DATA_SERVICE_MASTER_IMPL_H_ +#ifndef TENSORFLOW_CORE_DATA_SERVICE_DISPATCHER_IMPL_H_ +#define TENSORFLOW_CORE_DATA_SERVICE_DISPATCHER_IMPL_H_ #include "absl/container/flat_hash_map.h" #include "tensorflow/core/data/service/common.pb.h" #include "tensorflow/core/data/service/data_service.h" -#include "tensorflow/core/data/service/master.pb.h" +#include "tensorflow/core/data/service/dispatcher.pb.h" #include "tensorflow/core/data/service/worker.grpc.pb.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/mutex.h" @@ -40,11 +40,11 @@ namespace data { // ProcessingModeDef which determines what data it produces. // * Task: A job is broken into multiple tasks, which each represent // iterating over all of or part of the dataset. Workers process tasks. -class DataServiceMasterImpl { +class DataServiceDispatcherImpl { public: - explicit DataServiceMasterImpl(const std::string protocol); + explicit DataServiceDispatcherImpl(const std::string protocol); - // See master.proto for API documentation. + // See dispatcher.proto for API documentation. /// Worker-facing API. Status RegisterWorker(const RegisterWorkerRequest* request, @@ -191,7 +191,7 @@ class DataServiceMasterImpl { // Creates a new task for a job, returning a reference to the task. const Task& CreateTask(Job* job, const std::string& worker_address) LOCKS_EXCLUDED(mu_); - // Same as `CreateTask`, but expects that the master lock is already held. + // Same as `CreateTask`, but expects that the dispatcher lock is already held. const Task& CreateTaskLocked(Job* job, const std::string& worker_address) EXCLUSIVE_LOCKS_REQUIRED(mu_); // Validates that an existing job matches the given processing_mode and @@ -225,10 +225,10 @@ class DataServiceMasterImpl { absl::flat_hash_map> named_jobs_ TF_GUARDED_BY(mu_); - TF_DISALLOW_COPY_AND_ASSIGN(DataServiceMasterImpl); + TF_DISALLOW_COPY_AND_ASSIGN(DataServiceDispatcherImpl); }; } // namespace data } // namespace tensorflow -#endif // TENSORFLOW_CORE_DATA_SERVICE_MASTER_IMPL_H_ +#endif // TENSORFLOW_CORE_DATA_SERVICE_DISPATCHER_IMPL_H_ diff --git a/tensorflow/core/data/service/grpc_master_impl.cc b/tensorflow/core/data/service/grpc_dispatcher_impl.cc similarity index 73% rename from tensorflow/core/data/service/grpc_master_impl.cc rename to tensorflow/core/data/service/grpc_dispatcher_impl.cc index 20ad58a0115..38ecc7057be 100644 --- a/tensorflow/core/data/service/grpc_master_impl.cc +++ b/tensorflow/core/data/service/grpc_dispatcher_impl.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/data/service/grpc_master_impl.h" +#include "tensorflow/core/data/service/grpc_dispatcher_impl.h" #include "grpcpp/server_context.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_util.h" @@ -25,18 +25,18 @@ using ::grpc::ServerBuilder; using ::grpc::ServerContext; using ::grpc::Status; -GrpcMasterImpl::GrpcMasterImpl(ServerBuilder* server_builder, - const std::string& protocol) +GrpcDispatcherImpl::GrpcDispatcherImpl(ServerBuilder* server_builder, + const std::string& protocol) : impl_(protocol) { server_builder->RegisterService(this); - VLOG(1) << "Registered data service master"; + VLOG(1) << "Registered data service dispatcher"; } -#define HANDLER(method) \ - Status GrpcMasterImpl::method(ServerContext* context, \ - const method##Request* request, \ - method##Response* response) { \ - return ToGrpcStatus(impl_.method(request, response)); \ +#define HANDLER(method) \ + Status GrpcDispatcherImpl::method(ServerContext* context, \ + const method##Request* request, \ + method##Response* response) { \ + return ToGrpcStatus(impl_.method(request, response)); \ } HANDLER(RegisterWorker); HANDLER(WorkerUpdate); diff --git a/tensorflow/core/data/service/grpc_master_impl.h b/tensorflow/core/data/service/grpc_dispatcher_impl.h similarity index 68% rename from tensorflow/core/data/service/grpc_master_impl.h rename to tensorflow/core/data/service/grpc_dispatcher_impl.h index d29bb6759f0..f407bd64127 100644 --- a/tensorflow/core/data/service/grpc_master_impl.h +++ b/tensorflow/core/data/service/grpc_dispatcher_impl.h @@ -13,12 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_DATA_SERVICE_GRPC_MASTER_IMPL_H_ -#define TENSORFLOW_CORE_DATA_SERVICE_GRPC_MASTER_IMPL_H_ +#ifndef TENSORFLOW_CORE_DATA_SERVICE_GRPC_DISPATCHER_IMPL_H_ +#define TENSORFLOW_CORE_DATA_SERVICE_GRPC_DISPATCHER_IMPL_H_ #include "grpcpp/server_builder.h" -#include "tensorflow/core/data/service/master.grpc.pb.h" -#include "tensorflow/core/data/service/master_impl.h" +#include "tensorflow/core/data/service/dispatcher.grpc.pb.h" +#include "tensorflow/core/data/service/dispatcher_impl.h" namespace tensorflow { namespace data { @@ -29,14 +29,14 @@ namespace data { // // ::grpc::ServerBuilder builder; // // configure builder -// GrpcMasterImpl data_service(&builder); +// GrpcDispatcherImpl data_service(&builder); // builder.BuildAndStart() // -class GrpcMasterImpl : public MasterService::Service { +class GrpcDispatcherImpl : public DispatcherService::Service { public: - explicit GrpcMasterImpl(grpc::ServerBuilder* server_builder, - const std::string& protocol); - ~GrpcMasterImpl() override {} + explicit GrpcDispatcherImpl(grpc::ServerBuilder* server_builder, + const std::string& protocol); + ~GrpcDispatcherImpl() override {} #define HANDLER(method) \ grpc::Status method(grpc::ServerContext* context, \ @@ -52,12 +52,12 @@ class GrpcMasterImpl : public MasterService::Service { #undef HANDLER private: - DataServiceMasterImpl impl_; + DataServiceDispatcherImpl impl_; - TF_DISALLOW_COPY_AND_ASSIGN(GrpcMasterImpl); + TF_DISALLOW_COPY_AND_ASSIGN(GrpcDispatcherImpl); }; } // namespace data } // namespace tensorflow -#endif // TENSORFLOW_CORE_DATA_SERVICE_GRPC_MASTER_IMPL_H_ +#endif // TENSORFLOW_CORE_DATA_SERVICE_GRPC_DISPATCHER_IMPL_H_ diff --git a/tensorflow/core/data/service/grpc_worker_impl.cc b/tensorflow/core/data/service/grpc_worker_impl.cc index 7884fa063ba..0cddfce4e0b 100644 --- a/tensorflow/core/data/service/grpc_worker_impl.cc +++ b/tensorflow/core/data/service/grpc_worker_impl.cc @@ -26,9 +26,9 @@ using ::grpc::ServerContext; using ::grpc::Status; GrpcWorkerImpl::GrpcWorkerImpl(ServerBuilder* server_builder, - const std::string& master_address, + const std::string& dispatcher_address, const std::string& protocol) - : impl_(master_address, protocol) { + : impl_(dispatcher_address, protocol) { server_builder->RegisterService(this); VLOG(1) << "Registered data service worker"; } diff --git a/tensorflow/core/data/service/grpc_worker_impl.h b/tensorflow/core/data/service/grpc_worker_impl.h index b7ece2a7738..169ae29ea37 100644 --- a/tensorflow/core/data/service/grpc_worker_impl.h +++ b/tensorflow/core/data/service/grpc_worker_impl.h @@ -35,7 +35,7 @@ namespace data { class GrpcWorkerImpl : public WorkerService::Service { public: explicit GrpcWorkerImpl(grpc::ServerBuilder* server_builder, - const std::string& master_address, + const std::string& dispatcher_address, const std::string& protocol); ~GrpcWorkerImpl() override {} diff --git a/tensorflow/core/data/service/server_lib.cc b/tensorflow/core/data/service/server_lib.cc index 33c2232f4dc..4f34bf9d0c7 100644 --- a/tensorflow/core/data/service/server_lib.cc +++ b/tensorflow/core/data/service/server_lib.cc @@ -16,7 +16,7 @@ limitations under the License. #include "tensorflow/core/data/service/server_lib.h" #include "tensorflow/core/data/service/credentials_factory.h" -#include "tensorflow/core/data/service/grpc_master_impl.h" +#include "tensorflow/core/data/service/grpc_dispatcher_impl.h" #include "tensorflow/core/data/service/grpc_util.h" #include "tensorflow/core/data/service/grpc_worker_impl.h" #include "tensorflow/core/platform/errors.h" @@ -72,18 +72,18 @@ void GrpcDataServerBase::Join() { server_->Wait(); } int GrpcDataServerBase::BoundPort() { return bound_port(); } -MasterGrpcDataServer::MasterGrpcDataServer(int port, - const std::string& protocol) +DispatchGrpcDataServer::DispatchGrpcDataServer(int port, + const std::string& protocol) : GrpcDataServerBase(port, protocol) {} -MasterGrpcDataServer::~MasterGrpcDataServer() { delete service_; } +DispatchGrpcDataServer::~DispatchGrpcDataServer() { delete service_; } -void MasterGrpcDataServer::AddServiceToBuilder(grpc::ServerBuilder* builder) { - auto service = absl::make_unique(builder, protocol_); +void DispatchGrpcDataServer::AddServiceToBuilder(grpc::ServerBuilder* builder) { + auto service = absl::make_unique(builder, protocol_); service_ = service.release(); } -Status MasterGrpcDataServer::NumWorkers(int* num_workers) { +Status DispatchGrpcDataServer::NumWorkers(int* num_workers) { GetWorkersRequest req; GetWorkersResponse resp; grpc::ServerContext ctx; @@ -95,19 +95,18 @@ Status MasterGrpcDataServer::NumWorkers(int* num_workers) { return Status::OK(); } -WorkerGrpcDataServer::WorkerGrpcDataServer(int port, - const std::string& protocol, - const std::string& master_address, - const std::string& worker_address) +WorkerGrpcDataServer::WorkerGrpcDataServer( + int port, const std::string& protocol, + const std::string& dispatcher_address, const std::string& worker_address) : GrpcDataServerBase(port, protocol), - master_address_(master_address), + dispatcher_address_(dispatcher_address), worker_address_(worker_address) {} WorkerGrpcDataServer::~WorkerGrpcDataServer() { delete service_; } void WorkerGrpcDataServer::AddServiceToBuilder(grpc::ServerBuilder* builder) { - auto service = - absl::make_unique(builder, master_address_, protocol_); + auto service = absl::make_unique(builder, dispatcher_address_, + protocol_); service_ = service.release(); } @@ -123,25 +122,25 @@ Status WorkerGrpcDataServer::StartServiceInternal() { return Status::OK(); } -Status NewMasterServer(int port, const std::string& protocol, - std::unique_ptr* out_server) { - *out_server = absl::make_unique(port, protocol); +Status NewDispatchServer(int port, const std::string& protocol, + std::unique_ptr* out_server) { + *out_server = absl::make_unique(port, protocol); return Status::OK(); } Status NewWorkerServer(int port, const std::string& protocol, - const std::string& master_address, + const std::string& dispatcher_address, std::unique_ptr* out_server) { - return NewWorkerServer(port, protocol, master_address, /*worker_address=*/"", - out_server); + return NewWorkerServer(port, protocol, dispatcher_address, + /*worker_address=*/"", out_server); } Status NewWorkerServer(int port, const std::string& protocol, - const std::string& master_address, + const std::string& dispatcher_address, const std::string& worker_address, std::unique_ptr* out_server) { *out_server = absl::make_unique( - port, protocol, master_address, worker_address); + port, protocol, dispatcher_address, worker_address); return Status::OK(); } diff --git a/tensorflow/core/data/service/server_lib.h b/tensorflow/core/data/service/server_lib.h index 72bec665c8e..2190c7a56fe 100644 --- a/tensorflow/core/data/service/server_lib.h +++ b/tensorflow/core/data/service/server_lib.h @@ -25,7 +25,7 @@ namespace data { // Forward declared because transitively depending on .grpc.pb.h files causes // issues in the pywrap build. -class GrpcMasterImpl; +class GrpcDispatcherImpl; class GrpcWorkerImpl; // A grpc server for the tf.data service. @@ -35,7 +35,7 @@ class GrpcDataServerBase { // server will find an available port in `Start()`. The chosen port can be // found in the output of `Target()`. // - // master_address is only needed for worker data servers. + // dispatcher_address is only needed for worker data servers. GrpcDataServerBase(int requested_port, const std::string& protocol); virtual ~GrpcDataServerBase() {} @@ -70,12 +70,12 @@ class GrpcDataServerBase { std::unique_ptr server_; }; -class MasterGrpcDataServer : public GrpcDataServerBase { +class DispatchGrpcDataServer : public GrpcDataServerBase { public: - MasterGrpcDataServer(int requested_port, const std::string& protocol); - ~MasterGrpcDataServer() override; + DispatchGrpcDataServer(int requested_port, const std::string& protocol); + ~DispatchGrpcDataServer() override; - // Returns the number of workers registerd with the master. + // Returns the number of workers registerd with the dispatcher. Status NumWorkers(int* num_workers); protected: @@ -83,14 +83,14 @@ class MasterGrpcDataServer : public GrpcDataServerBase { Status StartServiceInternal() override { return Status::OK(); } private: - // Owned. We use a raw pointer because GrpcMasterImpl is forward-declared. - GrpcMasterImpl* service_; + // Owned. We use a raw pointer because GrpcDispatcherImpl is forward-declared. + GrpcDispatcherImpl* service_; }; class WorkerGrpcDataServer : public GrpcDataServerBase { public: WorkerGrpcDataServer(int requested_port, const std::string& protocol, - const std::string& master_address, + const std::string& dispatcher_address, const std::string& worker_address); ~WorkerGrpcDataServer() override; @@ -99,15 +99,15 @@ class WorkerGrpcDataServer : public GrpcDataServerBase { Status StartServiceInternal() override; private: - const std::string master_address_; + const std::string dispatcher_address_; const std::string worker_address_; // Owned. We use a raw pointer because GrpcWorkerImpl is forward-declared. GrpcWorkerImpl* service_; }; -// Creates a master tf.data server and stores it in `*out_server`. -Status NewMasterServer(int port, const std::string& protocol, - std::unique_ptr* out_server); +// Creates a dispatch tf.data server and stores it in `*out_server`. +Status NewDispatchServer(int port, const std::string& protocol, + std::unique_ptr* out_server); // Creates a worker tf.data server and stores it in `*out_server`. // @@ -115,18 +115,18 @@ Status NewMasterServer(int port, const std::string& protocol, // will be chosen in Start(). This value can be queried with BoundPort(). // // The worker_address argument is optional. If left empty, it will default to -// "localhost:%port%". When the worker registers with the master, the worker -// will report the worker address, so that the master can tell clients where to -// read from. The address may contain the placeholder "%port%", which will be +// "localhost:%port%". When the worker registers with the dispatcher, the worker +// will report the worker address, so that the dispatcher can tell clients where +// to read from. The address may contain the placeholder "%port%", which will be // replaced with the value of BoundPort(). Status NewWorkerServer(int port, const std::string& protocol, - const std::string& master_address, + const std::string& dispatcher_address, const std::string& worker_address, std::unique_ptr* out_server); // Creates a worker using the default worker_address. Status NewWorkerServer(int port, const std::string& protocol, - const std::string& master_address, + const std::string& dispatcher_address, std::unique_ptr* out_server); } // namespace data diff --git a/tensorflow/core/data/service/test_cluster.cc b/tensorflow/core/data/service/test_cluster.cc index ded3ebb91b5..4066a75a374 100644 --- a/tensorflow/core/data/service/test_cluster.cc +++ b/tensorflow/core/data/service/test_cluster.cc @@ -45,9 +45,9 @@ Status TestCluster::Initialize() { "Test cluster has already been initialized."); } initialized_ = true; - TF_RETURN_IF_ERROR(NewMasterServer(/*port=*/0, kProtocol, &master_)); - TF_RETURN_IF_ERROR(master_->Start()); - master_address_ = absl::StrCat("localhost:", master_->BoundPort()); + TF_RETURN_IF_ERROR(NewDispatchServer(/*port=*/0, kProtocol, &dispatcher_)); + TF_RETURN_IF_ERROR(dispatcher_->Start()); + dispatcher_address_ = absl::StrCat("localhost:", dispatcher_->BoundPort()); workers_.reserve(num_workers_); worker_addresses_.reserve(num_workers_); for (int i = 0; i < num_workers_; ++i) { @@ -59,14 +59,14 @@ Status TestCluster::Initialize() { Status TestCluster::AddWorker() { std::unique_ptr worker; TF_RETURN_IF_ERROR( - NewWorkerServer(/*port=*/0, kProtocol, master_address_, &worker)); + NewWorkerServer(/*port=*/0, kProtocol, dispatcher_address_, &worker)); TF_RETURN_IF_ERROR(worker->Start()); worker_addresses_.push_back(absl::StrCat("localhost:", worker->BoundPort())); workers_.push_back(std::move(worker)); return Status::OK(); } -std::string TestCluster::MasterAddress() { return master_address_; } +std::string TestCluster::DispatcherAddress() { return dispatcher_address_; } std::string TestCluster::WorkerAddress(int index) { DCHECK_GE(index, 0); diff --git a/tensorflow/core/data/service/test_cluster.h b/tensorflow/core/data/service/test_cluster.h index c4b05ad0543..c5ca3db4c74 100644 --- a/tensorflow/core/data/service/test_cluster.h +++ b/tensorflow/core/data/service/test_cluster.h @@ -24,7 +24,7 @@ namespace data { // Helper class for unit testing a tf.data service cluster. class TestCluster { public: - // Creates a new test cluster with a master and `num_workers` workers. + // Creates a new test cluster with a dispatcher and `num_workers` workers. explicit TestCluster(int num_workers); // Initializes the test cluster. This must be called before interacting with @@ -32,8 +32,8 @@ class TestCluster { Status Initialize(); // Adds a new worker to the cluster. Status AddWorker(); - // Returns the master address in the form "hostname:port". - std::string MasterAddress(); + // Returns the dispatcher address in the form "hostname:port". + std::string DispatcherAddress(); // Returns the address of the worker at the specified index, in the form // "hostname:port". The index must be non-negative and less than the number of // workers in the cluster. @@ -42,8 +42,8 @@ class TestCluster { private: bool initialized_ = false; int num_workers_; - std::unique_ptr master_; - std::string master_address_; + std::unique_ptr dispatcher_; + std::string dispatcher_address_; std::vector> workers_; std::vector worker_addresses_; }; diff --git a/tensorflow/core/data/service/worker_impl.cc b/tensorflow/core/data/service/worker_impl.cc index 151410bb219..00659e1d048 100644 --- a/tensorflow/core/data/service/worker_impl.cc +++ b/tensorflow/core/data/service/worker_impl.cc @@ -21,9 +21,9 @@ limitations under the License. #include "tensorflow/c/tf_status_helper.h" #include "tensorflow/core/data/dataset.pb.h" #include "tensorflow/core/data/service/credentials_factory.h" +#include "tensorflow/core/data/service/dispatcher.grpc.pb.h" +#include "tensorflow/core/data/service/dispatcher.pb.h" #include "tensorflow/core/data/service/grpc_util.h" -#include "tensorflow/core/data/service/master.grpc.pb.h" -#include "tensorflow/core/data/service/master.pb.h" #include "tensorflow/core/data/standalone.h" #include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/lib/core/errors.h" @@ -45,9 +45,9 @@ auto* tf_data_service_created = "has been created."); } // namespace -DataServiceWorkerImpl::DataServiceWorkerImpl(const std::string& master_address, - const std::string& protocol) - : master_address_(master_address), protocol_(protocol) { +DataServiceWorkerImpl::DataServiceWorkerImpl( + const std::string& dispatcher_address, const std::string& protocol) + : dispatcher_address_(dispatcher_address), protocol_(protocol) { tf_data_service_created->GetCell()->Set(true); } @@ -67,14 +67,13 @@ void DataServiceWorkerImpl::Start(const std::string& worker_address) { heartbeat_thread_.reset(thread); Status s = Register(); while (!s.ok()) { - LOG(WARNING) << "Failed to register with master at " << master_address_ - << ": " << s; + LOG(WARNING) << "Failed to register with dispatcher at " + << dispatcher_address_ << ": " << s; Env::Default()->SleepForMicroseconds(kHeartbeatIntervalMicros); s = Register(); } } - Status DataServiceWorkerImpl::ProcessTask(const ProcessTaskRequest* request, ProcessTaskResponse* response) { mutex_lock l(mu_); @@ -169,29 +168,29 @@ Status DataServiceWorkerImpl::GetElement(const GetElementRequest* request, return Status::OK(); } -Status DataServiceWorkerImpl::EnsureMasterStubInitialized() +Status DataServiceWorkerImpl::EnsureDispatcherStubInitialized() EXCLUSIVE_LOCKS_REQUIRED(mu_) { - if (!master_stub_) { + if (!dispatcher_stub_) { ::grpc::ChannelArguments args; std::shared_ptr<::grpc::ChannelCredentials> credentials; TF_RETURN_IF_ERROR( CredentialsFactory::CreateClientCredentials(protocol_, &credentials)); auto channel = - ::grpc::CreateCustomChannel(master_address_, credentials, args); - master_stub_ = MasterService::NewStub(channel); + ::grpc::CreateCustomChannel(dispatcher_address_, credentials, args); + dispatcher_stub_ = DispatcherService::NewStub(channel); } return Status::OK(); } Status DataServiceWorkerImpl::Register() EXCLUSIVE_LOCKS_REQUIRED(mu_) { - VLOG(3) << "Registering with master at " << master_address_; - TF_RETURN_IF_ERROR(EnsureMasterStubInitialized()); + VLOG(3) << "Registering with dispatcher at " << dispatcher_address_; + TF_RETURN_IF_ERROR(EnsureDispatcherStubInitialized()); RegisterWorkerRequest req; req.set_worker_address(worker_address_); RegisterWorkerResponse resp; grpc::ClientContext ctx; - grpc::Status s = master_stub_->RegisterWorker(&ctx, req, &resp); + grpc::Status s = dispatcher_stub_->RegisterWorker(&ctx, req, &resp); if (!s.ok()) { return grpc_util::WrapError("Failed to register worker", s); } @@ -205,8 +204,8 @@ Status DataServiceWorkerImpl::Register() EXCLUSIVE_LOCKS_REQUIRED(mu_) { Status DataServiceWorkerImpl::SendTaskUpdate() EXCLUSIVE_LOCKS_REQUIRED(mu_) { VLOG(3) << "Sending " << pending_completed_tasks_.size() - << " task updates to master"; - TF_RETURN_IF_ERROR(EnsureMasterStubInitialized()); + << " task updates to dispatcher"; + TF_RETURN_IF_ERROR(EnsureDispatcherStubInitialized()); WorkerUpdateRequest req; req.set_worker_id(worker_id_); for (int task_id : pending_completed_tasks_) { @@ -217,7 +216,7 @@ Status DataServiceWorkerImpl::SendTaskUpdate() EXCLUSIVE_LOCKS_REQUIRED(mu_) { WorkerUpdateResponse resp; grpc::ClientContext ctx; - grpc::Status s = master_stub_->WorkerUpdate(&ctx, req, &resp); + grpc::Status s = dispatcher_stub_->WorkerUpdate(&ctx, req, &resp); if (!s.ok()) { return grpc_util::WrapError("Failed to send task updates", s); } @@ -238,7 +237,7 @@ void DataServiceWorkerImpl::HeartbeatThread() { } Status s = SendTaskUpdate(); if (!s.ok()) { - LOG(WARNING) << "Failed to send task updates to master: " << s; + LOG(WARNING) << "Failed to send task updates to dispatcher: " << s; } } } diff --git a/tensorflow/core/data/service/worker_impl.h b/tensorflow/core/data/service/worker_impl.h index 8c5fc2ea51c..adb3e97bbea 100644 --- a/tensorflow/core/data/service/worker_impl.h +++ b/tensorflow/core/data/service/worker_impl.h @@ -17,7 +17,7 @@ limitations under the License. #include "absl/container/flat_hash_map.h" #include "tensorflow/core/data/service/common.pb.h" -#include "tensorflow/core/data/service/master.grpc.pb.h" +#include "tensorflow/core/data/service/dispatcher.grpc.pb.h" #include "tensorflow/core/data/service/worker.pb.h" #include "tensorflow/core/data/standalone.h" #include "tensorflow/core/lib/core/status.h" @@ -29,17 +29,17 @@ namespace data { // A TensorFlow DataService serves dataset elements over RPC. class DataServiceWorkerImpl { public: - explicit DataServiceWorkerImpl(const std::string& master_address, + explicit DataServiceWorkerImpl(const std::string& dispatcher_address, const std::string& protocol); ~DataServiceWorkerImpl(); // Starts the worker. The worker needs to know its own address so that it can - // register with the master. + // register with the dispatcher. void Start(const std::string& worker_address); // See worker.proto for API documentation. - /// Master-facing API. + /// Dispatcher-facing API. Status ProcessTask(const ProcessTaskRequest* request, ProcessTaskResponse* response); @@ -48,15 +48,15 @@ class DataServiceWorkerImpl { GetElementResponse* response); private: - // Sets master_stub_ if it isn't already set. - Status EnsureMasterStubInitialized(); - // Registers the worker with the master. + // Sets dispatcher_stub_ if it isn't already set. + Status EnsureDispatcherStubInitialized(); + // Registers the worker with the dispatcher. Status Register(); - // Sends task status to the master. + // Sends task status to the dispatcher. Status SendTaskUpdate(); // Creates an iterator to process a task. Status ProcessTaskInternal(const TaskDef& task); - // A thread for updating the master with worker status. + // A thread for updating the dispatcher with worker status. void HeartbeatThread(); typedef struct Task { @@ -67,18 +67,19 @@ class DataServiceWorkerImpl { std::unique_ptr iterator; } Task; - const std::string master_address_; - // Protocol for communicating with the master. + const std::string dispatcher_address_; + // Protocol for communicating with the dispatcher. const std::string protocol_; // The worker's own address. std::string worker_address_; mutex mu_; int64 worker_id_ TF_GUARDED_BY(mu_); - std::unique_ptr master_stub_ TF_GUARDED_BY(mu_); + std::unique_ptr dispatcher_stub_ TF_GUARDED_BY(mu_); // Information about tasks, keyed by task ids. absl::flat_hash_map tasks_ TF_GUARDED_BY(mu_); - // List of completed tasks which haven't yet been communicated to the master. + // List of completed tasks which haven't yet been communicated to the + // dispatcher. std::vector pending_completed_tasks_ TF_GUARDED_BY(mu_); bool cancelled_ TF_GUARDED_BY(mu_) = false; // Condition variable for notifying the heartbeat thread. diff --git a/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc b/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc index ee8f72bc663..0c2c5254590 100644 --- a/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc @@ -69,7 +69,7 @@ const int64 kDefaultTaskRefreshIntervalMs = 1000; // 1 second. // Dataset for reading data from the tf.data service non-deterministically. // // This dataset interleaves dataset elements produced by multiple tf.data -// workers. We periodically query the tf.data master to determine which workers +// workers. We periodically query the dispatcher to determine which workers // to read from (in case workers are added or removed). class DataServiceDatasetOp::Dataset : public DatasetBase { public: @@ -199,12 +199,13 @@ class DataServiceDatasetOp::Dataset : public DatasetBase { Status Initialize(IteratorContext* ctx) override { VLOG(3) << "Connecting to " << dataset()->address_ << " in data service dataset op"; - DataServiceMasterClient master(dataset()->address_, dataset()->protocol_); + DataServiceDispatcherClient dispatcher(dataset()->address_, + dataset()->protocol_); if (dataset()->job_name_.empty()) { - TF_RETURN_IF_ERROR(master.CreateJob( + TF_RETURN_IF_ERROR(dispatcher.CreateJob( dataset()->dataset_id_, dataset()->processing_mode_, &job_id_)); } else { - TF_RETURN_IF_ERROR(master.GetOrCreateJob( + TF_RETURN_IF_ERROR(dispatcher.GetOrCreateJob( dataset()->dataset_id_, dataset()->processing_mode_, dataset()->job_name_, iterator_index_, &job_id_)); } @@ -283,11 +284,12 @@ class DataServiceDatasetOp::Dataset : public DatasetBase { // Periodically refresh the task list. // Maintain one thread fetching elements for each task. - // TODO(aaudibert): Instead of polling, have master send updates when + // TODO(aaudibert): Instead of polling, have dispatcher send updates when // the list of tasks changes. void TaskThreadManager(std::unique_ptr ctx) { VLOG(3) << "Starting task thread manager"; - DataServiceMasterClient master(dataset()->address_, dataset()->protocol_); + DataServiceDispatcherClient dispatcher(dataset()->address_, + dataset()->protocol_); uint64 next_check = Env::Default()->NowMicros(); while (true) { { @@ -305,18 +307,19 @@ class DataServiceDatasetOp::Dataset : public DatasetBase { return; } } - UpdateTasks(&master); + UpdateTasks(&dispatcher); UpdateWorkerThreads(ctx.get()); next_check = Env::Default()->NowMicros() + dataset()->task_refresh_interval_ms_ * 1000; } } - void UpdateTasks(DataServiceMasterClient* master) LOCKS_EXCLUDED(mu_) { + void UpdateTasks(DataServiceDispatcherClient* dispatcher) + LOCKS_EXCLUDED(mu_) { VLOG(3) << "Updating tasks"; std::vector tasks; bool job_finished; - Status s = master->GetTasks(job_id_, &tasks, &job_finished); + Status s = dispatcher->GetTasks(job_id_, &tasks, &job_finished); if (!s.ok()) { LOG(WARNING) << "Failed to get task info for job id " << job_id_ << ": " << s; diff --git a/tensorflow/core/kernels/data/experimental/data_service_ops.cc b/tensorflow/core/kernels/data/experimental/data_service_ops.cc index c6a54baad64..d9ef42d4afa 100644 --- a/tensorflow/core/kernels/data/experimental/data_service_ops.cc +++ b/tensorflow/core/kernels/data/experimental/data_service_ops.cc @@ -53,7 +53,7 @@ void RegisterDatasetOp::Compute(OpKernelContext* ctx) { OP_REQUIRES_OK( ctx, AsGraphDef(ctx, dataset, std::move(serialization_ctx), &graph_def)); - DataServiceMasterClient client(address, protocol); + DataServiceDispatcherClient client(address, protocol); int64 dataset_id; OP_REQUIRES_OK(ctx, client.RegisterDataset(graph_def, &dataset_id)); diff --git a/tensorflow/core/kernels/data/experimental/data_service_ops.h b/tensorflow/core/kernels/data/experimental/data_service_ops.h index b7d66938ae6..b3d6233aa52 100644 --- a/tensorflow/core/kernels/data/experimental/data_service_ops.h +++ b/tensorflow/core/kernels/data/experimental/data_service_ops.h @@ -25,7 +25,7 @@ namespace data { // Registers a dataset with the tf.data service. // -// The address and protocol inputs are used to connect to the tf.data master. +// The address and protocol inputs are used to connect to the dispatcher. // The external state policy attribute determines whether to ignore, warn, or // error out when the dataset contains external state. // The op produces a dataset id for identifying the registered dataset. diff --git a/tensorflow/python/data/experimental/ops/data_service_ops.py b/tensorflow/python/data/experimental/ops/data_service_ops.py index d16e9966b23..c564212949f 100644 --- a/tensorflow/python/data/experimental/ops/data_service_ops.py +++ b/tensorflow/python/data/experimental/ops/data_service_ops.py @@ -77,7 +77,7 @@ class _DataServiceDatasetV2(dataset_ops.DatasetSource): amount of memory used, since `distribute` won't use more than `element_size` * `max_outstanding_requests` of memory. task_refresh_interval_hint_ms: (Optional.) A hint for how often to query - the master for task changes. + the dispatcher for task changes. """ if job_name is None: @@ -173,7 +173,7 @@ def _distribute(processing_mode, of memory used, since `distribute` won't use more than `element_size` * `max_outstanding_requests` of memory. task_refresh_interval_hint_ms: (Optional.) A hint for how often to query the - master for task changes. + dispatcher for task changes. Returns: Dataset: A `Dataset` of the elements produced by the data service. diff --git a/tensorflow/python/data/experimental/service/__init__.py b/tensorflow/python/data/experimental/service/__init__.py index aecc07965bb..7887e53600a 100644 --- a/tensorflow/python/data/experimental/service/__init__.py +++ b/tensorflow/python/data/experimental/service/__init__.py @@ -19,5 +19,5 @@ from __future__ import division from __future__ import print_function from tensorflow.python.data.experimental.ops.data_service_ops import distribute -from tensorflow.python.data.experimental.service.server_lib import MasterServer +from tensorflow.python.data.experimental.service.server_lib import DispatchServer from tensorflow.python.data.experimental.service.server_lib import WorkerServer diff --git a/tensorflow/python/data/experimental/service/server_lib.py b/tensorflow/python/data/experimental/service/server_lib.py index f249af671a6..5a7ce73b4c7 100644 --- a/tensorflow/python/data/experimental/service/server_lib.py +++ b/tensorflow/python/data/experimental/service/server_lib.py @@ -24,35 +24,35 @@ from tensorflow.python.data.experimental.service import _pywrap_server_lib from tensorflow.python.util.tf_export import tf_export -@tf_export("data.experimental.service.MasterServer", v1=[]) -class MasterServer(object): - """An in-process tf.data service master server. +@tf_export("data.experimental.service.DispatchServer", v1=[]) +class DispatchServer(object): + """An in-process tf.data service dispatch server. - A `tf.data.experimental.service.MasterServer` coordinates a cluster of + A `tf.data.experimental.service.DispatchServer` coordinates a cluster of `tf.data.experimental.service.WorkerServer`s. When the workers start, they - register themselves with the master. + register themselves with the dispatcher. - >>> master = tf.data.experimental.service.MasterServer(port=0) - >>> master_address = master.target.split("://")[1] + >>> dispatcher = tf.data.experimental.service.DispatchServer(port=0) + >>> dispatcher_address = dispatcher.target.split("://")[1] >>> worker = tf.data.experimental.service.WorkerServer( - ... port=0, master_address=master_address) + ... port=0, dispatcher_address=dispatcher_address) >>> dataset = tf.data.Dataset.range(10) >>> dataset = dataset.apply(tf.data.experimental.service.distribute( - ... processing_mode="parallel_epochs", service=master.target)) + ... processing_mode="parallel_epochs", service=dispatcher.target)) >>> print(list(dataset.as_numpy_iterator())) [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - When starting a dedicated tf.data master process, use join() to block + When starting a dedicated tf.data dispatch process, use join() to block indefinitely after starting up the server. ``` - master = tf.data.experimental.service.MasterServer(port=5050) - master.join() + dispatcher = tf.data.experimental.service.DispatchServer(port=5050) + dispatcher.join() ``` """ def __init__(self, port, protocol=None, start=True): - """Creates a new master server. + """Creates a new dispatch server. Args: port: Specifies the port to bind to. @@ -68,15 +68,16 @@ class MasterServer(object): if protocol is None: protocol = "grpc" self._protocol = protocol - self._server = _pywrap_server_lib.TF_DATA_NewMasterServer(port, protocol) + self._server = _pywrap_server_lib.TF_DATA_NewDispatchServer(port, protocol) if start: self._server.start() def start(self): """Starts this server. - >>> master = tf.data.experimental.service.MasterServer(port=0, start=False) - >>> master.start() + >>> dispatcher = tf.data.experimental.service.DispatchServer(port=0, + ... start=False) + >>> dispatcher.start() Raises: tf.errors.OpError: Or one of its subclasses if an error occurs while @@ -87,11 +88,11 @@ class MasterServer(object): def join(self): """Blocks until the server has shut down. - This is useful when starting a dedicated master process. + This is useful when starting a dedicated dispatch process. ``` - master = tf.data.experimental.service.MasterServer(port=5050) - master.join() + dispatcher = tf.data.experimental.service.DispatchServer(port=5050) + dispatcher.join() ``` Raises: @@ -104,10 +105,10 @@ class MasterServer(object): def target(self): """Returns a target that can be used to connect to the server. - >>> master = tf.data.experimental.service.MasterServer(port=0) + >>> dispatcher = tf.data.experimental.service.DispatchServer(port=0) >>> dataset = tf.data.Dataset.range(10) >>> dataset = dataset.apply(tf.data.experimental.service.distribute( - ... processing_mode="parallel_epochs", service=master.target)) + ... processing_mode="parallel_epochs", service=dispatcher.target)) The returned string will be in the form protocol://address, e.g. "grpc://localhost:5050". @@ -136,7 +137,7 @@ class MasterServer(object): return "localhost:{0}".format(self._server.bound_port()) def _num_workers(self): - """Returns the number of workers registered with the master.""" + """Returns the number of workers registered with the dispatcher.""" return self._server.num_workers() @@ -147,15 +148,15 @@ class WorkerServer(object): A `tf.data.experimental.service.WorkerServer` performs `tf.data.Dataset` processing for user-defined datasets, and provides the resulting elements over RPC. A worker is associated with a single - `tf.data.experimental.service.MasterServer`. + `tf.data.experimental.service.DispatchServer`. - >>> master = tf.data.experimental.service.MasterServer(port=0) - >>> master_address = master.target.split("://")[1] + >>> dispatcher = tf.data.experimental.service.DispatchServer(port=0) + >>> dispatcher_address = dispatcher.target.split("://")[1] >>> worker = tf.data.experimental.service.WorkerServer( - ... port=0, master_address=master_address) + ... port=0, dispatcher_address=dispatcher_address) >>> dataset = tf.data.Dataset.range(10) >>> dataset = dataset.apply(tf.data.experimental.service.distribute( - ... processing_mode="parallel_epochs", service=master.target)) + ... processing_mode="parallel_epochs", service=dispatcher.target)) >>> print(list(dataset.as_numpy_iterator())) [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] @@ -164,14 +165,14 @@ class WorkerServer(object): ``` worker = tf.data.experimental.service.WorkerServer( - port=5051, master_address="grpc://localhost:5050") + port=5051, dispatcher_address="grpc://localhost:5050") worker.join() ``` """ def __init__(self, port, - master_address, + dispatcher_address, worker_address=None, protocol=None, start=True): @@ -180,11 +181,12 @@ class WorkerServer(object): Args: port: Specifies the port to bind to. A value of 0 indicates that the worker can bind to any available port. - master_address: Specifies the address of the master server. + dispatcher_address: Specifies the address of the dispatcher. worker_address: (Optional.) Specifies the address of the worker server. - This address is passed to the master server so that the master can tell - clients how to connect to this worker. Defaults to `"localhost:%port%"`, - where `%port%` will be replaced with the port used by the worker. + This address is passed to the dispatcher so that the dispatcher can + tell clients how to connect to this worker. Defaults to + `"localhost:%port%"`, where `%port%` will be replaced with the port used + by the worker. protocol: (Optional.) Specifies the protocol to be used by the server. Acceptable values include `"grpc", "grpc+local"`. Defaults to `"grpc"`. start: (Optional.) Boolean, indicating whether to start the server after @@ -201,7 +203,7 @@ class WorkerServer(object): self._protocol = protocol self._server = _pywrap_server_lib.TF_DATA_NewWorkerServer( - port, protocol, master_address, worker_address) + port, protocol, dispatcher_address, worker_address) if start: self._server.start() @@ -221,7 +223,7 @@ class WorkerServer(object): ``` worker_server = tf.data.experimental.service.WorkerServer( - port=5051, master_address="grpc://localhost:5050") + port=5051, dispatcher_address="grpc://localhost:5050") worker_server.join() ``` diff --git a/tensorflow/python/data/experimental/service/server_lib_test.py b/tensorflow/python/data/experimental/service/server_lib_test.py index 74eb11dc59c..f7354e64a3a 100644 --- a/tensorflow/python/data/experimental/service/server_lib_test.py +++ b/tensorflow/python/data/experimental/service/server_lib_test.py @@ -25,68 +25,68 @@ from tensorflow.python.platform import test class ServerLibTest(test.TestCase): - def testStartMaster(self): - master = server_lib.MasterServer(0, start=False) - master.start() + def testStartDispatcher(self): + dispatcher = server_lib.DispatchServer(0, start=False) + dispatcher.start() - def testMultipleStartMaster(self): - master = server_lib.MasterServer(0, start=True) - master.start() + def testMultipleStartDispatcher(self): + dispatcher = server_lib.DispatchServer(0, start=True) + dispatcher.start() def testStartWorker(self): - master = server_lib.MasterServer(0) - worker = server_lib.WorkerServer(0, master._address, start=False) + dispatcher = server_lib.DispatchServer(0) + worker = server_lib.WorkerServer(0, dispatcher._address, start=False) worker.start() def testMultipleStartWorker(self): - master = server_lib.MasterServer(0) - worker = server_lib.WorkerServer(0, master._address, start=True) + dispatcher = server_lib.DispatchServer(0) + worker = server_lib.WorkerServer(0, dispatcher._address, start=True) worker.start() - def testStopMaster(self): - master = server_lib.MasterServer(0) - master._stop() - master._stop() + def testStopDispatcher(self): + dispatcher = server_lib.DispatchServer(0) + dispatcher._stop() + dispatcher._stop() def testStopWorker(self): - master = server_lib.MasterServer(0) - worker = server_lib.WorkerServer(0, master._address) + dispatcher = server_lib.DispatchServer(0) + worker = server_lib.WorkerServer(0, dispatcher._address) worker._stop() worker._stop() - def testStopStartMaster(self): - master = server_lib.MasterServer(0) - master._stop() + def testStopStartDispatcher(self): + dispatcher = server_lib.DispatchServer(0) + dispatcher._stop() with self.assertRaisesRegex( RuntimeError, "Server cannot be started after it has been stopped"): - master.start() + dispatcher.start() def testStopStartWorker(self): - master = server_lib.MasterServer(0) - worker = server_lib.WorkerServer(0, master._address) + dispatcher = server_lib.DispatchServer(0) + worker = server_lib.WorkerServer(0, dispatcher._address) worker._stop() with self.assertRaisesRegex( RuntimeError, "Server cannot be started after it has been stopped"): worker.start() - def testJoinMaster(self): - master = server_lib.MasterServer(0) - master._stop() - master.join() + def testJoinDispatcher(self): + dispatcher = server_lib.DispatchServer(0) + dispatcher._stop() + dispatcher.join() def testJoinWorker(self): - master = server_lib.MasterServer(0) - worker = server_lib.WorkerServer(0, master._address) + dispatcher = server_lib.DispatchServer(0) + worker = server_lib.WorkerServer(0, dispatcher._address) worker._stop() worker.join() - def testMasterNumWorkers(self): - master = server_lib.MasterServer(0) - self.assertEqual(0, master._num_workers()) - worker1 = server_lib.WorkerServer(0, master._address) # pylint: disable=unused-variable - self.assertEqual(1, master._num_workers()) - worker2 = server_lib.WorkerServer(0, master._address) # pylint: disable=unused-variable - self.assertEqual(2, master._num_workers()) + def testDispatcherNumWorkers(self): + dispatcher = server_lib.DispatchServer(0) + self.assertEqual(0, dispatcher._num_workers()) + worker1 = server_lib.WorkerServer(0, dispatcher._address) # pylint: disable=unused-variable + self.assertEqual(1, dispatcher._num_workers()) + worker2 = server_lib.WorkerServer(0, dispatcher._address) # pylint: disable=unused-variable + self.assertEqual(2, dispatcher._num_workers()) if __name__ == "__main__": diff --git a/tensorflow/python/data/experimental/service/server_lib_wrapper.cc b/tensorflow/python/data/experimental/service/server_lib_wrapper.cc index 03453a56c7f..e288179dd36 100644 --- a/tensorflow/python/data/experimental/service/server_lib_wrapper.cc +++ b/tensorflow/python/data/experimental/service/server_lib_wrapper.cc @@ -28,13 +28,14 @@ limitations under the License. namespace py = pybind11; PYBIND11_MODULE(_pywrap_server_lib, m) { - py::class_(m, "MasterGrpcDataServer") - .def("start", &tensorflow::data::MasterGrpcDataServer::Start) - .def("stop", &tensorflow::data::MasterGrpcDataServer::Stop) - .def("join", &tensorflow::data::MasterGrpcDataServer::Join) - .def("bound_port", &tensorflow::data::MasterGrpcDataServer::BoundPort) + py::class_(m, + "DispatchGrpcDataServer") + .def("start", &tensorflow::data::DispatchGrpcDataServer::Start) + .def("stop", &tensorflow::data::DispatchGrpcDataServer::Stop) + .def("join", &tensorflow::data::DispatchGrpcDataServer::Join) + .def("bound_port", &tensorflow::data::DispatchGrpcDataServer::BoundPort) .def("num_workers", - [](tensorflow::data::MasterGrpcDataServer* server) -> int { + [](tensorflow::data::DispatchGrpcDataServer* server) -> int { int num_workers; tensorflow::Status status = server->NumWorkers(&num_workers); tensorflow::MaybeRaiseFromStatus(status); @@ -48,12 +49,12 @@ PYBIND11_MODULE(_pywrap_server_lib, m) { .def("bound_port", &tensorflow::data::WorkerGrpcDataServer::BoundPort); m.def( - "TF_DATA_NewMasterServer", + "TF_DATA_NewDispatchServer", [](int port, std::string protocol) - -> std::unique_ptr { - std::unique_ptr server; + -> std::unique_ptr { + std::unique_ptr server; tensorflow::Status status = - tensorflow::data::NewMasterServer(port, protocol, &server); + tensorflow::data::NewDispatchServer(port, protocol, &server); tensorflow::MaybeRaiseFromStatus(status); return server; }, @@ -61,12 +62,12 @@ PYBIND11_MODULE(_pywrap_server_lib, m) { m.def( "TF_DATA_NewWorkerServer", - [](int port, std::string protocol, std::string master_address, + [](int port, std::string protocol, std::string dispatcher_address, std::string worker_address) -> std::unique_ptr { std::unique_ptr server; tensorflow::Status status = tensorflow::data::NewWorkerServer( - port, protocol, master_address, worker_address, &server); + port, protocol, dispatcher_address, worker_address, &server); tensorflow::MaybeRaiseFromStatus(status); return server; }, diff --git a/tensorflow/python/data/kernel_tests/data_service_ops_test.py b/tensorflow/python/data/kernel_tests/data_service_ops_test.py index 488bf97f184..98db4fb0d4b 100644 --- a/tensorflow/python/data/kernel_tests/data_service_ops_test.py +++ b/tensorflow/python/data/kernel_tests/data_service_ops_test.py @@ -59,23 +59,25 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): num_workers: The number of workers in the cluster. Returns: - The address of the master. + The address of the dispatcher. """ - self._master = server_lib.MasterServer(port=0, protocol=PROTOCOL) + self._dispatcher = server_lib.DispatchServer(port=0, protocol=PROTOCOL) self._servers = [] for _ in range(num_workers): self._servers.append( server_lib.WorkerServer( - port=0, master_address=self._master._address, protocol=PROTOCOL)) + port=0, + dispatcher_address=self._dispatcher._address, + protocol=PROTOCOL)) - return self._master._address + return self._dispatcher._address @combinations.generate(test_base.eager_only_combinations()) def testDistributeBasic(self): num_elements = 10 - master_address = self.create_cluster(1) + dispatcher_address = self.create_cluster(1) ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, master_address) + ds = _make_distributed_dataset(ds, dispatcher_address) results = [elem.numpy() for elem in ds] self.assertEqual(list(range(num_elements)), results) @@ -83,10 +85,10 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): def testDifferentShuffleOrders(self): random_seed.set_random_seed(None) num_elements = 100 - master_address = self.create_cluster(2) + dispatcher_address = self.create_cluster(2) ds = dataset_ops.Dataset.range(num_elements) ds = ds.shuffle(num_elements) - ds = _make_distributed_dataset(ds, master_address) + ds = _make_distributed_dataset(ds, dispatcher_address) output = [elem.numpy() for elem in ds] # The output will be two sequences of range(num_elements) @@ -104,9 +106,9 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testMultipleEpochs(self): num_elements = 3 - master_address = self.create_cluster(1) + dispatcher_address = self.create_cluster(1) ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, master_address) + ds = _make_distributed_dataset(ds, dispatcher_address) for _ in range(10): self.assertEqual(list(range(num_elements)), [elem.numpy() for elem in ds]) @@ -114,9 +116,9 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): def testRepeatedDataset(self): num_elements = 10 num_repetitions = 5 - master_address = self.create_cluster(1) + dispatcher_address = self.create_cluster(1) ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, master_address) + ds = _make_distributed_dataset(ds, dispatcher_address) ds = ds.repeat(num_repetitions) self.assertDatasetProduces( ds, expected_output=num_repetitions * list(range(num_elements))) @@ -125,12 +127,12 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): def testConcurrentEpoch(self): num_elements = 10 num_datasets = 3 - master_address = self.create_cluster(1) + dispatcher_address = self.create_cluster(1) iterators = [] results = [] for _ in range(num_datasets): ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, master_address) + ds = _make_distributed_dataset(ds, dispatcher_address) iterators.append(iter(ds)) results.append([]) @@ -146,9 +148,9 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): self.skipTest("Not yet implemented") num_elements = 10 num_iterators = 3 - master_address = self.create_cluster(1) + dispatcher_address = self.create_cluster(1) ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, master_address) + ds = _make_distributed_dataset(ds, dispatcher_address) result = [] iterators = [] for _ in range(num_iterators): @@ -170,20 +172,20 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): def testMultiWorker(self): num_workers = 3 num_elements = 10 - master_address = self.create_cluster(num_workers) + dispatcher_address = self.create_cluster(num_workers) ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, master_address) + ds = _make_distributed_dataset(ds, dispatcher_address) results = [elem.numpy() for elem in ds] self.assertCountEqual(num_workers * list(range(num_elements)), results) @combinations.generate(test_base.eager_only_combinations()) def testAddWorkerMidJob(self): - self._master = server_lib.MasterServer(port=0, protocol=PROTOCOL) + self._dispatcher = server_lib.DispatchServer(port=0, protocol=PROTOCOL) self._worker = server_lib.WorkerServer( - port=0, master_address=self._master._address, protocol=PROTOCOL) + port=0, dispatcher_address=self._dispatcher._address, protocol=PROTOCOL) num_elements = 100 ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, self._master._address) + ds = _make_distributed_dataset(ds, self._dispatcher._address) iterator = iter(ds) results = [] # Read halfway through the dataset. @@ -191,10 +193,10 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): results.append(next(iterator).numpy()) self._new_worker = server_lib.WorkerServer( - port=0, master_address=self._master._address, protocol=PROTOCOL) + port=0, dispatcher_address=self._dispatcher._address, protocol=PROTOCOL) - # Wait for the new worker to register with the master. - while self._master._num_workers() < 2: + # Wait for the new worker to register with the dispatcher. + while self._dispatcher._num_workers() < 2: time.sleep(10 / 1000) # 10ms for elem in iterator: @@ -206,12 +208,12 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): combinations.times(test_base.eager_only_combinations(), combinations.combine(use_same_port=[True, False]))) def testRestartWorker(self, use_same_port): - self._master = server_lib.MasterServer(port=0, protocol=PROTOCOL) + self._dispatcher = server_lib.DispatchServer(port=0, protocol=PROTOCOL) self._worker = server_lib.WorkerServer( - port=0, master_address=self._master._address, protocol=PROTOCOL) + port=0, dispatcher_address=self._dispatcher._address, protocol=PROTOCOL) num_elements = 100 ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, self._master._address) + ds = _make_distributed_dataset(ds, self._dispatcher._address) iterator = iter(ds) # Read halfway through the dataset. midpoint = num_elements // 2 @@ -224,7 +226,9 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): port = int(self._worker._address.split(":")[1]) self._worker._stop() self._new_worker = server_lib.WorkerServer( - port=port, master_address=self._master._address, protocol=PROTOCOL) + port=port, + dispatcher_address=self._dispatcher._address, + protocol=PROTOCOL) # There may have been some elements prefetched from the first worker # before it was stopped. @@ -259,12 +263,12 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): def testInsideFunction(self): num_workers = 3 num_elements = 10 - master_address = self.create_cluster(num_workers) + dispatcher_address = self.create_cluster(num_workers) @def_function.function def f(): ds = dataset_ops.Dataset.range(num_elements) - ds = _make_distributed_dataset(ds, master_address) + ds = _make_distributed_dataset(ds, dispatcher_address) result = tensor_array_ops.TensorArray( dtypes.int64, size=num_workers * num_elements, dynamic_size=True) i = 0 @@ -279,10 +283,10 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testSharedJobName(self): num_elements = 100 - master_address = self.create_cluster(1) + dispatcher_address = self.create_cluster(1) ds = dataset_ops.Dataset.range(num_elements) - ds1 = _make_distributed_dataset(ds, master_address, job_name="job_name") - ds2 = _make_distributed_dataset(ds, master_address, job_name="job_name") + ds1 = _make_distributed_dataset(ds, dispatcher_address, job_name="job_name") + ds2 = _make_distributed_dataset(ds, dispatcher_address, job_name="job_name") iter1 = iter(ds1) iter2 = iter(ds2) results = [] @@ -298,20 +302,22 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testDifferentJobNames(self): num_elements = 10 - master_address = self.create_cluster(1) + dispatcher_address = self.create_cluster(1) ds = dataset_ops.Dataset.range(num_elements) - ds1 = _make_distributed_dataset(ds, master_address, job_name="job_name1") - ds2 = _make_distributed_dataset(ds, master_address, job_name="job_name2") + ds1 = _make_distributed_dataset( + ds, dispatcher_address, job_name="job_name1") + ds2 = _make_distributed_dataset( + ds, dispatcher_address, job_name="job_name2") self.assertDatasetProduces(ds1, list(range(num_elements))) self.assertDatasetProduces(ds2, list(range(num_elements))) @combinations.generate(test_base.eager_only_combinations()) def testSharedJobNameMultiIteration(self): num_elements = 10 - master_address = self.create_cluster(1) + dispatcher_address = self.create_cluster(1) ds = dataset_ops.Dataset.range(num_elements) - ds1 = _make_distributed_dataset(ds, master_address, job_name="job_name") - ds2 = _make_distributed_dataset(ds, master_address, job_name="job_name") + ds1 = _make_distributed_dataset(ds, dispatcher_address, job_name="job_name") + ds2 = _make_distributed_dataset(ds, dispatcher_address, job_name="job_name") # iteration 1 self.assertDatasetProduces(ds1, list(range(num_elements))) self.assertDatasetProduces(ds2, []) @@ -323,11 +329,11 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): def testSharedJobNameRepeat(self): num_elements = 100 num_repetitions = 3 - master_address = self.create_cluster(1) + dispatcher_address = self.create_cluster(1) ds = dataset_ops.Dataset.range(num_elements) - ds1 = _make_distributed_dataset(ds, master_address, job_name="job_name") + ds1 = _make_distributed_dataset(ds, dispatcher_address, job_name="job_name") ds1 = ds1.repeat(num_repetitions) - ds2 = _make_distributed_dataset(ds, master_address, job_name="job_name") + ds2 = _make_distributed_dataset(ds, dispatcher_address, job_name="job_name") ds2 = ds2.repeat(num_repetitions) results = [] iter1 = iter(ds1) @@ -345,7 +351,7 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testApplyDeterminismOption(self): elements = list(range(10)) - master_address = self.create_cluster(1) + dispatcher_address = self.create_cluster(1) def dataset_fn(delay_ms): @@ -362,7 +368,7 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): opts = dataset_ops.Options() opts.experimental_deterministic = False ds = ds.with_options(opts) - ds = _make_distributed_dataset(ds, master_address) + ds = _make_distributed_dataset(ds, dispatcher_address) return ds self.checkDeterminism( @@ -379,8 +385,8 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): options.experimental_external_state_policy = external_state_policy ds = ds.with_options(options) - master_address = self.create_cluster(3) - ds = _make_distributed_dataset(ds, master_address) + dispatcher_address = self.create_cluster(3) + ds = _make_distributed_dataset(ds, dispatcher_address) next(iter(ds)) @combinations.generate( @@ -400,12 +406,12 @@ class DataServiceOpsTest(test_base.DatasetTestBase, parameterized.TestCase): @combinations.generate(test_base.eager_only_combinations()) def testDistributeFromInterleave(self): - master_address = self.create_cluster(1) + dispatcher_address = self.create_cluster(1) ds = dataset_ops.Dataset.range(2) def interleave_fn(_): ds = dataset_ops.Dataset.range(2) - _make_distributed_dataset(ds, master_address) + _make_distributed_dataset(ds, dispatcher_address) return ds with self.assertRaisesRegex( diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.-master-server.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.-dispatch-server.pbtxt similarity index 85% rename from tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.-master-server.pbtxt rename to tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.-dispatch-server.pbtxt index daac7716ca8..86efaf268e0 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.-master-server.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.-dispatch-server.pbtxt @@ -1,6 +1,6 @@ -path: "tensorflow.data.experimental.service.MasterServer" +path: "tensorflow.data.experimental.service.DispatchServer" tf_class { - is_instance: "" + is_instance: "" is_instance: "" member { name: "target" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.-worker-server.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.-worker-server.pbtxt index d0121b7edf2..8d8b1fd8584 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.-worker-server.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.-worker-server.pbtxt @@ -4,7 +4,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'port\', \'master_address\', \'worker_address\', \'protocol\', \'start\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'True\'], " + argspec: "args=[\'self\', \'port\', \'dispatcher_address\', \'worker_address\', \'protocol\', \'start\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'True\'], " } member_method { name: "join" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.pbtxt index 347dd3c74b1..00f0035e082 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.service.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.data.experimental.service" tf_module { member { - name: "MasterServer" + name: "DispatchServer" mtype: "" } member { diff --git a/tensorflow/tools/def_file_filter/symbols_pybind.txt b/tensorflow/tools/def_file_filter/symbols_pybind.txt index 69ccb99c333..3b395fe280a 100644 --- a/tensorflow/tools/def_file_filter/symbols_pybind.txt +++ b/tensorflow/tools/def_file_filter/symbols_pybind.txt @@ -99,8 +99,8 @@ tensorflow::data::GrpcDataServerBase::Join tensorflow::data::GrpcDataServerBase::Start tensorflow::data::GrpcDataServerBase::Stop tensorflow::data::GrpcDataServerBase::BoundPort -tensorflow::data::MasterGrpcDataServer::NumWorkers -tensorflow::data::NewMasterServer +tensorflow::data::DispatchGrpcDataServer::NumWorkers +tensorflow::data::NewDispatchServer tensorflow::data::NewWorkerServer [protos_all] # device_lib, dtypes From ca8d58b9bae1b89e6b39faa2b70c4f32cc8b955e Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Thu, 16 Jul 2020 12:11:37 -0700 Subject: [PATCH 0621/2522] Moving more filesystems to transactions --- .../core/platform/cloud/gcs_file_system.cc | 32 +++++++++---------- .../core/platform/cloud/gcs_file_system.h | 32 +++++++++---------- .../platform/default/posix_file_system.cc | 28 ++++++++-------- .../core/platform/default/posix_file_system.h | 28 ++++++++-------- .../platform/hadoop/hadoop_file_system.cc | 26 +++++++-------- .../core/platform/hadoop/hadoop_file_system.h | 28 ++++++++-------- 6 files changed, 87 insertions(+), 87 deletions(-) diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index 1bd4d86eef6..139ba124ced 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -973,7 +973,7 @@ GcsFileSystem::GcsFileSystem( additional_header_(additional_header) {} Status GcsFileSystem::NewRandomAccessFile( - const string& fname, std::unique_ptr* result) { + const string& fname, std::unique_ptr* result/*, TransactionToken* token */) { string bucket, object; TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object)); TF_RETURN_IF_ERROR(CheckBucketLocationConstraint(bucket)); @@ -1149,7 +1149,7 @@ void GcsFileSystem::ClearFileCaches(const string& fname) { } Status GcsFileSystem::NewWritableFile(const string& fname, - std::unique_ptr* result) { + std::unique_ptr* result/*, TransactionToken* token */) { string bucket, object; TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object)); result->reset(new GcsWritableFile( @@ -1162,7 +1162,7 @@ Status GcsFileSystem::NewWritableFile(const string& fname, // Reads the file from GCS in chunks and stores it in a tmp file, // which is then passed to GcsWritableFile. Status GcsFileSystem::NewAppendableFile(const string& fname, - std::unique_ptr* result) { + std::unique_ptr* result/*, TransactionToken* token */) { std::unique_ptr reader; TF_RETURN_IF_ERROR(NewRandomAccessFile(fname, &reader)); std::unique_ptr buffer(new char[kReadAppendableFileBufferSize]); @@ -1201,7 +1201,7 @@ Status GcsFileSystem::NewAppendableFile(const string& fname, } Status GcsFileSystem::NewReadOnlyMemoryRegionFromFile( - const string& fname, std::unique_ptr* result) { + const string& fname, std::unique_ptr* result/*, TransactionToken* token */) { uint64 size; TF_RETURN_IF_ERROR(GetFileSize(fname, &size)); std::unique_ptr data(new char[size]); @@ -1216,7 +1216,7 @@ Status GcsFileSystem::NewReadOnlyMemoryRegionFromFile( return Status::OK(); } -Status GcsFileSystem::FileExists(const string& fname) { +Status GcsFileSystem::FileExists(const string& fname/*, TransactionToken* token */) { string bucket, object; TF_RETURN_IF_ERROR(ParseGcsPath(fname, true, &bucket, &object)); if (object.empty()) { @@ -1431,14 +1431,14 @@ Status GcsFileSystem::FolderExists(const string& dirname, bool* result) { } Status GcsFileSystem::GetChildren(const string& dirname, - std::vector* result) { + std::vector* result/*, TransactionToken* token */) { return GetChildrenBounded(dirname, UINT64_MAX, result, false /* recursively */, false /* include_self_directory_marker */); } Status GcsFileSystem::GetMatchingPaths(const string& pattern, - std::vector* results) { + std::vector* results/*, TransactionToken* token */) { MatchingPathsCache::ComputeFunc compute_func = [this](const string& pattern, std::vector* results) { results->clear(); @@ -1598,7 +1598,7 @@ Status GcsFileSystem::GetChildrenBounded(const string& dirname, } } -Status GcsFileSystem::Stat(const string& fname, FileStatistics* stat) { +Status GcsFileSystem::Stat(const string& fname, FileStatistics* stat/*, TransactionToken* token */) { if (!stat) { return errors::Internal("'stat' cannot be nullptr."); } @@ -1632,7 +1632,7 @@ Status GcsFileSystem::Stat(const string& fname, FileStatistics* stat) { return errors::NotFound("The specified path ", fname, " was not found."); } -Status GcsFileSystem::DeleteFile(const string& fname) { +Status GcsFileSystem::DeleteFile(const string& fname/*, TransactionToken* token */) { string bucket, object; TF_RETURN_IF_ERROR(ParseGcsPath(fname, false, &bucket, &object)); @@ -1648,7 +1648,7 @@ Status GcsFileSystem::DeleteFile(const string& fname) { return Status::OK(); } -Status GcsFileSystem::CreateDir(const string& dirname) { +Status GcsFileSystem::CreateDir(const string& dirname/*, TransactionToken* token */) { string bucket, object; TF_RETURN_IF_ERROR(ParseGcsPath(dirname, true, &bucket, &object)); if (object.empty()) { @@ -1674,7 +1674,7 @@ Status GcsFileSystem::CreateDir(const string& dirname) { // Checks that the directory is empty (i.e no objects with this prefix exist). // Deletes the GCS directory marker if it exists. -Status GcsFileSystem::DeleteDir(const string& dirname) { +Status GcsFileSystem::DeleteDir(const string& dirname/*, TransactionToken* token */) { std::vector children; // A directory is considered empty either if there are no matching objects // with the corresponding name prefix or if there is exactly one matching @@ -1694,7 +1694,7 @@ Status GcsFileSystem::DeleteDir(const string& dirname) { return Status::OK(); } -Status GcsFileSystem::GetFileSize(const string& fname, uint64* file_size) { +Status GcsFileSystem::GetFileSize(const string& fname, uint64* file_size/*, TransactionToken* token */) { if (!file_size) { return errors::Internal("'file_size' cannot be nullptr."); } @@ -1709,7 +1709,7 @@ Status GcsFileSystem::GetFileSize(const string& fname, uint64* file_size) { return Status::OK(); } -Status GcsFileSystem::RenameFile(const string& src, const string& target) { +Status GcsFileSystem::RenameFile(const string& src, const string& target/*, TransactionToken* token */) { if (!IsDirectory(src).ok()) { return RenameObject(src, target); } @@ -1771,7 +1771,7 @@ Status GcsFileSystem::RenameObject(const string& src, const string& target) { [this, &src]() { return DeleteFile(src); }, retry_config_); } -Status GcsFileSystem::IsDirectory(const string& fname) { +Status GcsFileSystem::IsDirectory(const string& fname/*, TransactionToken* token */) { string bucket, object; TF_RETURN_IF_ERROR(ParseGcsPath(fname, true, &bucket, &object)); if (object.empty()) { @@ -1799,7 +1799,7 @@ Status GcsFileSystem::IsDirectory(const string& fname) { Status GcsFileSystem::DeleteRecursively(const string& dirname, int64* undeleted_files, - int64* undeleted_dirs) { + int64* undeleted_dirs/*, TransactionToken* token */) { if (!undeleted_files || !undeleted_dirs) { return errors::Internal( "'undeleted_files' and 'undeleted_dirs' cannot be nullptr."); @@ -1840,7 +1840,7 @@ Status GcsFileSystem::DeleteRecursively(const string& dirname, // Flushes all caches for filesystem metadata and file contents. Useful for // reclaiming memory once filesystem operations are done (e.g. model is loaded), // or for resetting the filesystem to a consistent state. -void GcsFileSystem::FlushCaches() { +void GcsFileSystem::FlushCaches(/* TransactionToken* token */) { tf_shared_lock l(block_cache_lock_); file_block_cache_->Flush(); stat_cache_->Clear(); diff --git a/tensorflow/core/platform/cloud/gcs_file_system.h b/tensorflow/core/platform/cloud/gcs_file_system.h index f066cc31eb4..e824d16e418 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.h +++ b/tensorflow/core/platform/cloud/gcs_file_system.h @@ -126,43 +126,43 @@ class GcsFileSystem : public FileSystem { bool compose_append); Status NewRandomAccessFile( - const string& fname, std::unique_ptr* result) override; + const string& fname, std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; Status NewWritableFile(const string& fname, - std::unique_ptr* result) override; + std::unique_ptr* result)/*, TransactionToken* token = nullptr */ override; Status NewAppendableFile(const string& fname, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; Status NewReadOnlyMemoryRegionFromFile( const string& fname, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; - Status FileExists(const string& fname) override; + Status FileExists(const string& fname/*, TransactionToken* token = nullptr */) override; - Status Stat(const string& fname, FileStatistics* stat) override; + Status Stat(const string& fname, FileStatistics* stat/*, TransactionToken* token = nullptr */) override; - Status GetChildren(const string& dir, std::vector* result) override; + Status GetChildren(const string& dir, std::vector* result/*, TransactionToken* token = nullptr */) override; Status GetMatchingPaths(const string& pattern, - std::vector* results) override; + std::vector* results/*, TransactionToken* token = nullptr */) override; - Status DeleteFile(const string& fname) override; + Status DeleteFile(const string& fname/*, TransactionToken* token = nullptr */) override; - Status CreateDir(const string& dirname) override; + Status CreateDir(const string& dirname/*, TransactionToken* token = nullptr */) override; - Status DeleteDir(const string& dirname) override; + Status DeleteDir(const string& dirname/*, TransactionToken* token = nullptr */) override; - Status GetFileSize(const string& fname, uint64* file_size) override; + Status GetFileSize(const string& fname, uint64* file_size/*, TransactionToken* token = nullptr */) override; - Status RenameFile(const string& src, const string& target) override; + Status RenameFile(const string& src, const string& target/*, TransactionToken* token = nullptr */) override; - Status IsDirectory(const string& fname) override; + Status IsDirectory(const string& fname/*, TransactionToken* token = nullptr */) override; Status DeleteRecursively(const string& dirname, int64* undeleted_files, - int64* undeleted_dirs) override; + int64* undeleted_dirs/*, TransactionToken* token = nullptr */) override; - void FlushCaches() override; + void FlushCaches(/* TransactionToken* token = nullptr */) override; /// Set an object to collect runtime statistics from the GcsFilesystem. void SetStats(GcsStatsInterface* stats); diff --git a/tensorflow/core/platform/default/posix_file_system.cc b/tensorflow/core/platform/default/posix_file_system.cc index 05c2b2762d4..cc0666f0c08 100644 --- a/tensorflow/core/platform/default/posix_file_system.cc +++ b/tensorflow/core/platform/default/posix_file_system.cc @@ -178,7 +178,7 @@ class PosixReadOnlyMemoryRegion : public ReadOnlyMemoryRegion { }; Status PosixFileSystem::NewRandomAccessFile( - const string& fname, std::unique_ptr* result) { + const string& fname, std::unique_ptr* result/*, TransactionToken* token */) { string translated_fname = TranslateName(fname); Status s; int fd = open(translated_fname.c_str(), O_RDONLY); @@ -191,7 +191,7 @@ Status PosixFileSystem::NewRandomAccessFile( } Status PosixFileSystem::NewWritableFile(const string& fname, - std::unique_ptr* result) { + std::unique_ptr* result/*, TransactionToken* token */) { string translated_fname = TranslateName(fname); Status s; FILE* f = fopen(translated_fname.c_str(), "w"); @@ -204,7 +204,7 @@ Status PosixFileSystem::NewWritableFile(const string& fname, } Status PosixFileSystem::NewAppendableFile( - const string& fname, std::unique_ptr* result) { + const string& fname, std::unique_ptr* result/*, TransactionToken* token */) { string translated_fname = TranslateName(fname); Status s; FILE* f = fopen(translated_fname.c_str(), "a"); @@ -217,7 +217,7 @@ Status PosixFileSystem::NewAppendableFile( } Status PosixFileSystem::NewReadOnlyMemoryRegionFromFile( - const string& fname, std::unique_ptr* result) { + const string& fname, std::unique_ptr* result/*, TransactionToken* token */) { string translated_fname = TranslateName(fname); Status s = Status::OK(); int fd = open(translated_fname.c_str(), O_RDONLY); @@ -240,7 +240,7 @@ Status PosixFileSystem::NewReadOnlyMemoryRegionFromFile( return s; } -Status PosixFileSystem::FileExists(const string& fname) { +Status PosixFileSystem::FileExists(const string& fname/*, TransactionToken* token */) { if (access(TranslateName(fname).c_str(), F_OK) == 0) { return Status::OK(); } @@ -248,7 +248,7 @@ Status PosixFileSystem::FileExists(const string& fname) { } Status PosixFileSystem::GetChildren(const string& dir, - std::vector* result) { + std::vector* result/*, TransactionToken* token */) { string translated_dir = TranslateName(dir); result->clear(); DIR* d = opendir(translated_dir.c_str()); @@ -269,11 +269,11 @@ Status PosixFileSystem::GetChildren(const string& dir, } Status PosixFileSystem::GetMatchingPaths(const string& pattern, - std::vector* results) { + std::vector* results/*, TransactionToken* token */) { return internal::GetMatchingPaths(this, Env::Default(), pattern, results); } -Status PosixFileSystem::DeleteFile(const string& fname) { +Status PosixFileSystem::DeleteFile(const string& fname/*, TransactionToken* token */) { Status result; if (unlink(TranslateName(fname).c_str()) != 0) { result = IOError(fname, errno); @@ -281,7 +281,7 @@ Status PosixFileSystem::DeleteFile(const string& fname) { return result; } -Status PosixFileSystem::CreateDir(const string& name) { +Status PosixFileSystem::CreateDir(const string& name/*, TransactionToken* token */) { string translated = TranslateName(name); if (translated.empty()) { return errors::AlreadyExists(name); @@ -292,7 +292,7 @@ Status PosixFileSystem::CreateDir(const string& name) { return Status::OK(); } -Status PosixFileSystem::DeleteDir(const string& name) { +Status PosixFileSystem::DeleteDir(const string& name/*, TransactionToken* token */) { Status result; if (rmdir(TranslateName(name).c_str()) != 0) { result = IOError(name, errno); @@ -300,7 +300,7 @@ Status PosixFileSystem::DeleteDir(const string& name) { return result; } -Status PosixFileSystem::GetFileSize(const string& fname, uint64* size) { +Status PosixFileSystem::GetFileSize(const string& fname, uint64* size/*, TransactionToken* token */) { Status s; struct stat sbuf; if (stat(TranslateName(fname).c_str(), &sbuf) != 0) { @@ -312,7 +312,7 @@ Status PosixFileSystem::GetFileSize(const string& fname, uint64* size) { return s; } -Status PosixFileSystem::Stat(const string& fname, FileStatistics* stats) { +Status PosixFileSystem::Stat(const string& fname, FileStatistics* stats/*, TransactionToken* token */) { Status s; struct stat sbuf; if (stat(TranslateName(fname).c_str(), &sbuf) != 0) { @@ -325,7 +325,7 @@ Status PosixFileSystem::Stat(const string& fname, FileStatistics* stats) { return s; } -Status PosixFileSystem::RenameFile(const string& src, const string& target) { +Status PosixFileSystem::RenameFile(const string& src, const string& target/*, TransactionToken* token */) { Status result; if (rename(TranslateName(src).c_str(), TranslateName(target).c_str()) != 0) { result = IOError(src, errno); @@ -333,7 +333,7 @@ Status PosixFileSystem::RenameFile(const string& src, const string& target) { return result; } -Status PosixFileSystem::CopyFile(const string& src, const string& target) { +Status PosixFileSystem::CopyFile(const string& src, const string& target/*, TransactionToken* token */) { string translated_src = TranslateName(src); struct stat sbuf; if (stat(translated_src.c_str(), &sbuf) != 0) { diff --git a/tensorflow/core/platform/default/posix_file_system.h b/tensorflow/core/platform/default/posix_file_system.h index c418a08e944..f430978bd73 100644 --- a/tensorflow/core/platform/default/posix_file_system.h +++ b/tensorflow/core/platform/default/posix_file_system.h @@ -29,38 +29,38 @@ class PosixFileSystem : public FileSystem { Status NewRandomAccessFile( const string& filename, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; Status NewWritableFile(const string& fname, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; Status NewAppendableFile(const string& fname, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; Status NewReadOnlyMemoryRegionFromFile( const string& filename, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; - Status FileExists(const string& fname) override; + Status FileExists(const string& fname/*, TransactionToken* token = nullptr */) override; - Status GetChildren(const string& dir, std::vector* result) override; + Status GetChildren(const string& dir, std::vector* result/*, TransactionToken* token = nullptr */) override; - Status Stat(const string& fname, FileStatistics* stats) override; + Status Stat(const string& fname, FileStatistics* stats/*, TransactionToken* token = nullptr */) override; Status GetMatchingPaths(const string& pattern, - std::vector* results) override; + std::vector* results/*, TransactionToken* token = nullptr */) override; - Status DeleteFile(const string& fname) override; + Status DeleteFile(const string& fname/*, TransactionToken* token = nullptr */) override; - Status CreateDir(const string& name) override; + Status CreateDir(const string& name/*, TransactionToken* token = nullptr */) override; - Status DeleteDir(const string& name) override; + Status DeleteDir(const string& name/*, TransactionToken* token = nullptr */) override; - Status GetFileSize(const string& fname, uint64* size) override; + Status GetFileSize(const string& fname, uint64* size/*, TransactionToken* token = nullptr */) override; - Status RenameFile(const string& src, const string& target) override; + Status RenameFile(const string& src, const string& target/*, TransactionToken* token = nullptr */) override; - Status CopyFile(const string& src, const string& target) override; + Status CopyFile(const string& src, const string& target/*, TransactionToken* token = nullptr */) override; }; Status IOError(const string& context, int err_number); diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.cc b/tensorflow/core/platform/hadoop/hadoop_file_system.cc index 1e1062c88c0..088d4ff7894 100644 --- a/tensorflow/core/platform/hadoop/hadoop_file_system.cc +++ b/tensorflow/core/platform/hadoop/hadoop_file_system.cc @@ -280,7 +280,7 @@ class HDFSRandomAccessFile : public RandomAccessFile { }; Status HadoopFileSystem::NewRandomAccessFile( - const string& fname, std::unique_ptr* result) { + const string& fname, std::unique_ptr* result/*, TransactionToken* token */) { hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(fname, &fs)); @@ -372,7 +372,7 @@ class HDFSWritableFile : public WritableFile { }; Status HadoopFileSystem::NewWritableFile( - const string& fname, std::unique_ptr* result) { + const string& fname, std::unique_ptr* result/*, TransactionToken* token */) { hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(fname, &fs)); @@ -386,7 +386,7 @@ Status HadoopFileSystem::NewWritableFile( } Status HadoopFileSystem::NewAppendableFile( - const string& fname, std::unique_ptr* result) { + const string& fname, std::unique_ptr* result/*, TransactionToken* token */) { hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(fname, &fs)); @@ -400,7 +400,7 @@ Status HadoopFileSystem::NewAppendableFile( } Status HadoopFileSystem::NewReadOnlyMemoryRegionFromFile( - const string& fname, std::unique_ptr* result) { + const string& fname, std::unique_ptr* result/*, TransactionToken* token */) { // hadoopReadZero() technically supports this call with the following // caveats: // - It only works up to 2 GB. We'd have to Stat() the file to ensure that @@ -410,7 +410,7 @@ Status HadoopFileSystem::NewReadOnlyMemoryRegionFromFile( return errors::Unimplemented("HDFS does not support ReadOnlyMemoryRegion"); } -Status HadoopFileSystem::FileExists(const string& fname) { +Status HadoopFileSystem::FileExists(const string& fname/*, TransactionToken* token */) { hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(fname, &fs)); if (libhdfs()->hdfsExists(fs, TranslateName(fname).c_str()) == 0) { @@ -420,7 +420,7 @@ Status HadoopFileSystem::FileExists(const string& fname) { } Status HadoopFileSystem::GetChildren(const string& dir, - std::vector* result) { + std::vector* result/*, TransactionToken* token */) { result->clear(); hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(dir, &fs)); @@ -448,11 +448,11 @@ Status HadoopFileSystem::GetChildren(const string& dir, } Status HadoopFileSystem::GetMatchingPaths(const string& pattern, - std::vector* results) { + std::vector* results/*, TransactionToken* token */) { return internal::GetMatchingPaths(this, Env::Default(), pattern, results); } -Status HadoopFileSystem::DeleteFile(const string& fname) { +Status HadoopFileSystem::DeleteFile(const string& fname/*, TransactionToken* token */) { hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(fname, &fs)); @@ -463,7 +463,7 @@ Status HadoopFileSystem::DeleteFile(const string& fname) { return Status::OK(); } -Status HadoopFileSystem::CreateDir(const string& dir) { +Status HadoopFileSystem::CreateDir(const string& dir/*, TransactionToken* token */) { hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(dir, &fs)); @@ -473,7 +473,7 @@ Status HadoopFileSystem::CreateDir(const string& dir) { return Status::OK(); } -Status HadoopFileSystem::DeleteDir(const string& dir) { +Status HadoopFileSystem::DeleteDir(const string& dir/*, TransactionToken* token */) { hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(dir, &fs)); @@ -505,7 +505,7 @@ Status HadoopFileSystem::DeleteDir(const string& dir) { return Status::OK(); } -Status HadoopFileSystem::GetFileSize(const string& fname, uint64* size) { +Status HadoopFileSystem::GetFileSize(const string& fname, uint64* size/*, TransactionToken* token */) { hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(fname, &fs)); @@ -519,7 +519,7 @@ Status HadoopFileSystem::GetFileSize(const string& fname, uint64* size) { return Status::OK(); } -Status HadoopFileSystem::RenameFile(const string& src, const string& target) { +Status HadoopFileSystem::RenameFile(const string& src, const string& target/*, TransactionToken* token */) { hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(src, &fs)); @@ -536,7 +536,7 @@ Status HadoopFileSystem::RenameFile(const string& src, const string& target) { return Status::OK(); } -Status HadoopFileSystem::Stat(const string& fname, FileStatistics* stats) { +Status HadoopFileSystem::Stat(const string& fname, FileStatistics* stats/*, TransactionToken* token */) { hdfsFS fs = nullptr; TF_RETURN_IF_ERROR(Connect(fname, &fs)); diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.h b/tensorflow/core/platform/hadoop/hadoop_file_system.h index f9f2c25e2ea..b2e7abee58b 100644 --- a/tensorflow/core/platform/hadoop/hadoop_file_system.h +++ b/tensorflow/core/platform/hadoop/hadoop_file_system.h @@ -33,38 +33,38 @@ class HadoopFileSystem : public FileSystem { ~HadoopFileSystem(); Status NewRandomAccessFile( - const string& fname, std::unique_ptr* result) override; + const string& fname, std::unique_ptr* result/*, TransactionToken* token = nullptr*/) override; Status NewWritableFile(const string& fname, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr*/) override; Status NewAppendableFile(const string& fname, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr*/) override; Status NewReadOnlyMemoryRegionFromFile( const string& fname, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr*/) override; - Status FileExists(const string& fname) override; + Status FileExists(const string& fname/*, TransactionToken* token = nullptr*/) override; - Status GetChildren(const string& dir, std::vector* result) override; + Status GetChildren(const string& dir, std::vector* result/*, TransactionToken* token = nullptr*/) override; Status GetMatchingPaths(const string& pattern, - std::vector* results) override; + std::vector* results/*, TransactionToken* token = nullptr*/) override; - Status DeleteFile(const string& fname) override; + Status DeleteFile(const string& fname/*, TransactionToken* token = nullptr*/) override; - Status CreateDir(const string& name) override; + Status CreateDir(const string& name/*, TransactionToken* token = nullptr*/) override; - Status DeleteDir(const string& name) override; + Status DeleteDir(const string& name/*, TransactionToken* token = nullptr*/) override; - Status GetFileSize(const string& fname, uint64* size) override; + Status GetFileSize(const string& fname, uint64* size/*, TransactionToken* token = nullptr*/) override; - Status RenameFile(const string& src, const string& target) override; + Status RenameFile(const string& src, const string& target/*, TransactionToken* token = nullptr*/) override; - Status Stat(const string& fname, FileStatistics* stat) override; + Status Stat(const string& fname, FileStatistics* stat/*, TransactionToken* token = nullptr*/) override; - string TranslateName(const string& name) const override; + string TranslateName(const string& name/*, TransactionToken* token = nullptr*/) const override; private: Status Connect(StringPiece fname, hdfsFS* fs); From 446c04c268573094f4a7124ebd1cbf59757ff51b Mon Sep 17 00:00:00 2001 From: "T.J. Alumbaugh" Date: Thu, 16 Jul 2020 12:15:14 -0700 Subject: [PATCH 0622/2522] Hybrid convolutions go through CpuBackendGEMM on x86 PiperOrigin-RevId: 321618575 Change-Id: I7122ab059604557eb7040ee8c6f41a1b2d709ca1 --- tensorflow/lite/kernels/conv.cc | 20 +++-- tensorflow/lite/kernels/conv_test.cc | 41 +++++++++ tensorflow/lite/kernels/internal/BUILD | 2 + .../internal/optimized/sse_tensor_utils.cc | 85 +++++++++++++++++++ .../internal/optimized/sse_tensor_utils.h | 2 +- .../optimized/sse_tensor_utils_impl.h | 8 ++ 6 files changed, 148 insertions(+), 10 deletions(-) diff --git a/tensorflow/lite/kernels/conv.cc b/tensorflow/lite/kernels/conv.cc index 81069de1abe..3c11ecf172b 100644 --- a/tensorflow/lite/kernels/conv.cc +++ b/tensorflow/lite/kernels/conv.cc @@ -888,13 +888,16 @@ void EvalHybrid(TfLiteContext* context, TfLiteNode* node, GetTemporary(context, node, data->scaling_factors_index)); // Per-batch input quantization for higher accuracy. - for (int b = 0; b < batch_size; ++b) { - float unused_min, unused_max; - const int offset = b * input_size; - tensor_utils::SymmetricQuantizeFloats( - input_ptr + offset, input_size, quantized_input_ptr_batch + offset, - &unused_min, &unused_max, &scaling_factors_ptr[b]); - scaling_factors_ptr[b] *= filter->params.scale; + { + ruy::profiler::ScopeLabel label("ConvHybridQuantizeInputs"); + for (int b = 0; b < batch_size; ++b) { + float unused_min, unused_max; + const int offset = b * input_size; + tensor_utils::SymmetricQuantizeFloats( + input_ptr + offset, input_size, quantized_input_ptr_batch + offset, + &unused_min, &unused_max, &scaling_factors_ptr[b]); + scaling_factors_ptr[b] *= filter->params.scale; + } } switch (kernel_type) { @@ -902,8 +905,7 @@ void EvalHybrid(TfLiteContext* context, TfLiteNode* node, case kGenericOptimized: case kMultithreadOptimized: case kCblasOptimized: { - // There is only one implementation for hybrid kernel. Note - // this does not make use of gemmlowp nor supports multithreading. + // There is only one implementation for hybrid kernel. ConvParams op_params; op_params.padding_type = PaddingType::kSame; op_params.padding_values.width = data->padding.width; diff --git a/tensorflow/lite/kernels/conv_test.cc b/tensorflow/lite/kernels/conv_test.cc index ac78bc6b353..d816e08f560 100644 --- a/tensorflow/lite/kernels/conv_test.cc +++ b/tensorflow/lite/kernels/conv_test.cc @@ -1295,6 +1295,47 @@ TEST_P(ConvolutionOpTest, SimpleTestHybridInt8) { 0.16))); } +TEST_P(ConvolutionOpTest, SimpleTestHybridInt8Big) { + // A bigger variant of the simple hybrid test to ensure coverage on + // optimized paths that are only enabled at larger matrix sizes. + HybridConvolutionOpModel m( + GetRegistration(), {TensorType_FLOAT32, {2, 2, 4, 1}}, + {TensorType_INT8, {8, 2, 2, 1}, 0, 0, 4.0 / 127.0, 0}, + {TensorType_FLOAT32, {}}); + + m.SetInput({ + // First batch + 1, 1, 1, 1, // row = 1 + 2, 2, 2, 2, // row = 2 + // Second batch + 1, 2, 3, 4, // row = 1 + 1, 2, 3, 4, // row = 2 + }); + m.SetSignedFilter({ + 1, 2, 3, 4, // first 2x2 filter + -1, 1, -1, 1, // second 2x2 filter + -1, -1, 1, 1, // third 2x2 filter + 1, 1, 3, 3, // fourth 2x2 filter + -1, -1, 3, 3, // fifth 2x2 filter + 4, 3, 2, 1, // sixth 2x2 filter + 2, 1, 1, 2, // seventh 2x2 filter + 1, -1, 2, -2, // eighth 2x2 filter + }); + m.SetBias({1, 2, 3, 4, 5, 6, 7, 8}); + + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear( + { + 18, 2, 5, 18, 15, 19, 16, 8, // first batch, left + 18, 2, 5, 18, 15, 19, 16, 8, // first batch, right + 17, 4, 3, 16, 11, 20, 16, 5, // second batch, left + 37, 4, 3, 32, 19, 40, 28, 5 // second batch, right + }, + 0.17))); +} + // This test's output is equivalent to the SimpleTestHybrid // because we break each input into two channels, each with half of the value, // while keeping the filters for each channel equivalent. diff --git a/tensorflow/lite/kernels/internal/BUILD b/tensorflow/lite/kernels/internal/BUILD index 519dd8611ef..5acabeb45cd 100644 --- a/tensorflow/lite/kernels/internal/BUILD +++ b/tensorflow/lite/kernels/internal/BUILD @@ -682,7 +682,9 @@ cc_library( ":portable_tensor_utils", "//tensorflow/lite/c:common", "//tensorflow/lite/kernels:cpu_backend_context", + "//tensorflow/lite/kernels:cpu_backend_gemm", "//tensorflow/lite/kernels:op_macros", + "@ruy//ruy/profiler:instrumentation", ], ) diff --git a/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.cc b/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.cc index 80cc14c6d26..4c4f39b6300 100644 --- a/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.cc +++ b/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.cc @@ -24,7 +24,10 @@ limitations under the License. #include +#include "ruy/profiler/instrumentation.h" // from @ruy #include "tensorflow/lite/kernels/cpu_backend_context.h" +#include "tensorflow/lite/kernels/cpu_backend_gemm.h" +#include "tensorflow/lite/kernels/cpu_backend_gemm_params.h" #include "tensorflow/lite/kernels/internal/compatibility.h" namespace tflite { @@ -170,6 +173,38 @@ void SseMatrixBatchVectorMultiplyAccumulateImpl( } // for batch } +void SseCpuBackendGemm(const int8_t* input, const int32_t* bias, + const int8_t* input_to_gate_weights, int32_t n_batch, + int32_t n_input, int32_t n_output, int32_t output_zp, + int32_t* scratch, CpuBackendContext* context) { + using ::tflite::cpu_backend_gemm::Gemm; + using ::tflite::cpu_backend_gemm::GemmParams; + using ::tflite::cpu_backend_gemm::MatrixParams; + + MatrixParams lhs_params; + lhs_params.order = cpu_backend_gemm::Order::kRowMajor; + lhs_params.rows = n_output; + lhs_params.cols = n_input; + lhs_params.cache_policy = cpu_backend_gemm::CachePolicy::kCacheIfLargeSpeedup; + + MatrixParams rhs_params; + rhs_params.order = cpu_backend_gemm::Order::kColMajor; + rhs_params.rows = n_input; + rhs_params.cols = n_batch; + + MatrixParams dst_params; + dst_params.order = cpu_backend_gemm::Order::kColMajor; + dst_params.rows = n_output; + dst_params.cols = n_batch; + + GemmParams gemm_params; + if (bias) { + gemm_params.bias = bias; + } + cpu_backend_gemm::Gemm(lhs_params, input_to_gate_weights, rhs_params, input, + dst_params, scratch, gemm_params, context); +} + void SseMatrixBatchVectorMultiplyAccumulate( const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, const int8_t* __restrict__ vectors, @@ -181,6 +216,56 @@ void SseMatrixBatchVectorMultiplyAccumulate( /*row_sums=*/nullptr); } +void SseMatrixBatchVectorMultiplyAccumulate( + const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, + const int8_t* __restrict__ vectors, + const float* __restrict__ scaling_factors, int n_batch, int32_t* scratch, + float* __restrict__ result, CpuBackendContext* context) { + if (m_rows % 4 == 0) { + const int32_t* bias = static_cast(nullptr); + SseCpuBackendGemm(vectors, bias, matrix, n_batch, m_cols, m_rows, + /*output_zp=*/0, scratch, context); + + { + ruy::profiler::ScopeLabel label("HybridMultiplyScalingFactor"); + // Multiply by float scaling factors and write to result + const int total_size = n_batch * m_rows; + int i = 0; + for (; i <= total_size - 8; i += 8, result += 8) { + const float batch_scaling_factor0 = scaling_factors[i / m_rows]; + const float batch_scaling_factor1 = scaling_factors[(i + 4) / m_rows]; + const __m128 scaling_factor0 = _mm_set1_ps(batch_scaling_factor0); + const __m128 scaling_factor1 = _mm_set1_ps(batch_scaling_factor1); + const __m128i scratch_val0 = + _mm_loadu_si128(reinterpret_cast(scratch + i)); + const __m128i scratch_val1 = + _mm_loadu_si128(reinterpret_cast(scratch + i + 4)); + const __m128 float_val0 = _mm_cvtepi32_ps(scratch_val0); + const __m128 float_val1 = _mm_cvtepi32_ps(scratch_val1); + const __m128 prod0 = _mm_mul_ps(float_val0, scaling_factor0); + const __m128 result0 = _mm_add_ps(_mm_load1_ps(result), prod0); + const __m128 prod1 = _mm_mul_ps(float_val1, scaling_factor1); + const __m128 result1 = _mm_add_ps(_mm_load1_ps(result + 4), prod1); + _mm_store_ps(result, result0); + _mm_store_ps(result + 4, result1); + } + scratch += i; + for (; i < total_size; i++) { + const float batch_scaling_factor = scaling_factors[i / m_rows]; + int32_t x = *(scratch++); + *result += x * batch_scaling_factor; + ++result; + } + } + return; + } + + SseMatrixBatchVectorMultiplyAccumulateImpl( + matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, result, + /*per_channel_scale=*/nullptr, /*input_offset=*/nullptr, + /*row_sums=*/nullptr); +} + void SseMatrixBatchVectorMultiplyAccumulate( const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, const int8_t* __restrict__ vectors, diff --git a/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h b/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h index 9f73ef6435a..e416579308b 100644 --- a/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h +++ b/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h @@ -71,7 +71,7 @@ void MatrixBatchVectorMultiplyAccumulate( int32_t* __restrict__ scratch, float* __restrict__ result, CpuBackendContext* __restrict__ context) { SSE_OR_PORTABLE(MatrixBatchVectorMultiplyAccumulate, matrix, m_rows, m_cols, - vectors, scaling_factors, n_batch, result); + vectors, scaling_factors, n_batch, scratch, result, context); } void SparseMatrixBatchVectorMultiplyAccumulate1x4( diff --git a/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils_impl.h b/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils_impl.h index c5ede624762..a77a049b3af 100644 --- a/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils_impl.h +++ b/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils_impl.h @@ -35,6 +35,14 @@ void SseMatrixBatchVectorMultiplyAccumulate( const float* __restrict__ scaling_factors, int n_batch, float* __restrict__ result); +// Matrix multiplication for quantized values using symmetric quantization +// with additional scratch memory for GEMM operation prior to scaling. +void SseMatrixBatchVectorMultiplyAccumulate( + const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, + const int8_t* __restrict__ vectors, + const float* __restrict__ scaling_factors, int n_batch, int32_t* scratch, + float* __restrict__ result, CpuBackendContext* context); + // Matrix multiplication for quantized values using asymmetric quantization. void SseMatrixBatchVectorMultiplyAccumulate( const int8_t* __restrict__ matrix, const int m_rows, const int m_cols, From 96b5aa098a9db00d2405aa30886ac1a1931faa51 Mon Sep 17 00:00:00 2001 From: Wenhao Jia Date: Thu, 16 Jul 2020 12:15:54 -0700 Subject: [PATCH 0623/2522] Refactor some of TpuExecutable methods into a base class, TpuExecutableInterface. PiperOrigin-RevId: 321618679 Change-Id: Ia1cf91d40bf21d19931a4517da786d0e73851361 --- tensorflow/stream_executor/tpu/BUILD | 35 ++- .../stream_executor/tpu/tpu_executable.cc | 244 +++--------------- .../stream_executor/tpu/tpu_executable.h | 51 +--- .../tpu/tpu_executable_interface.cc | 204 +++++++++++++++ .../tpu/tpu_executable_interface.h | 91 +++++++ 5 files changed, 366 insertions(+), 259 deletions(-) create mode 100644 tensorflow/stream_executor/tpu/tpu_executable_interface.cc create mode 100644 tensorflow/stream_executor/tpu/tpu_executable_interface.h diff --git a/tensorflow/stream_executor/tpu/BUILD b/tensorflow/stream_executor/tpu/BUILD index 111a39c84bb..17ea13ed0b3 100644 --- a/tensorflow/stream_executor/tpu/BUILD +++ b/tensorflow/stream_executor/tpu/BUILD @@ -208,25 +208,22 @@ cc_library( ":c_api_conversions", ":proto_helper", ":status_helper", + ":tpu_executable_interface", ":tpu_executor_base", ":tpu_executor_c_api_hdrs", + ":tpu_platform_interface", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status", - "//tensorflow/compiler/xla:status_macros", - "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:types", - "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto_cc", "//tensorflow/compiler/xla/service:executable", "//tensorflow/compiler/xla/service:hlo", - "//tensorflow/compiler/xla/service:hlo_execution_profile", - "//tensorflow/compiler/xla/service:transfer_manager", + "//tensorflow/core:lib", "//tensorflow/core/tpu:tpu_api", "//tensorflow/core/tpu/kernels:tpu_compile_c_api_hdrs", "//tensorflow/core/tpu/kernels:tpu_execute_c_api_hdrs", "//tensorflow/core/tpu/kernels:tpu_program_c_api_hdrs", "//tensorflow/stream_executor", - "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:span", ], @@ -263,3 +260,29 @@ cc_library( "//tensorflow/stream_executor:stream_executor_headers", ], ) + +cc_library( + name = "tpu_executable_interface", + srcs = ["tpu_executable_interface.cc"], + hdrs = ["tpu_executable_interface.h"], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla/service:executable", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_execution_profile", + "//tensorflow/compiler/xla/service:hlo_profile_printer_data_cc", + "//tensorflow/compiler/xla/service:maybe_owning_device_memory", + "//tensorflow/compiler/xla/service:shaped_buffer", + "//tensorflow/compiler/xla/service:transfer_manager", + "//tensorflow/stream_executor:stream_executor_headers", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/types:span", + ], +) diff --git a/tensorflow/stream_executor/tpu/tpu_executable.cc b/tensorflow/stream_executor/tpu/tpu_executable.cc index b8656bc97fd..f6ded8415c1 100644 --- a/tensorflow/stream_executor/tpu/tpu_executable.cc +++ b/tensorflow/stream_executor/tpu/tpu_executable.cc @@ -15,17 +15,8 @@ limitations under the License. #include "tensorflow/stream_executor/tpu/tpu_executable.h" -#include -#include -#include -#include - -#include "absl/algorithm/container.h" -#include "tensorflow/compiler/xla/service/transfer_manager.h" -#include "tensorflow/compiler/xla/shape_util.h" -#include "tensorflow/compiler/xla/status_macros.h" -#include "tensorflow/compiler/xla/util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/casts.h" #include "tensorflow/core/tpu/kernels/tpu_execute_c_api.h" #include "tensorflow/core/tpu/tpu_api.h" #include "tensorflow/stream_executor/tpu/c_api_conversions.h" @@ -33,213 +24,19 @@ limitations under the License. #include "tensorflow/stream_executor/tpu/status_helper.h" #include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" #include "tensorflow/stream_executor/tpu/tpu_platform.h" +#include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" namespace xla { -namespace tpu { -namespace { - -using HostCommandHandler = TpuExecutable::HostCommandHandler; - -// Write the tuple index buffers (arrays of pointers). -static Status PopulateResultTupleBuffers(const ShapedBuffer& result, - se::Stream* stream, - se::Stream* transfer_stream) { - TF_ASSIGN_OR_RETURN(auto transfer_manager, TransferManager::GetForPlatform( - stream->parent()->platform())); - if (transfer_manager->CanShapedBufferBeAccessedNow(stream->parent(), - result)) { - TF_RETURN_IF_ERROR(transfer_manager->WriteTupleIndexTablesAsync( - transfer_stream ? transfer_stream : stream, result)); - if (transfer_stream && transfer_stream != stream) { - stream->ThenWaitFor(transfer_stream); - } - return Status::OK(); - } else { - return transfer_manager->WriteTupleIndexTablesAsync(stream, result); - } -} - -xla::Shape HostShapeToDeviceShape(const xla::Shape& host_shape) { - XLA_Shape c_host_shape; - XLA_Shape c_device_shape; - TpuConversions::XlaShapeToCShape(host_shape, &c_host_shape); - tensorflow::tpu::ExecutorApiFn()->HardwareLayout_HostShapeToDeviceShapeFn( - &c_host_shape, &c_device_shape); - xla::Shape device_shape = TpuConversions::CShapeToXlaShape(&c_device_shape); - TpuConversions::CShapeCleanup(&c_host_shape); - TpuConversions::CShapeCleanup(&c_device_shape); - return device_shape; -} - -int64 ShapeSize(const xla::Shape& shape) { - XLA_Shape c_shape; - TpuConversions::XlaShapeToCShape(shape, &c_shape); - int64 size = - tensorflow::tpu::ExecutorApiFn()->HardwareLayout_ShapeSizeFn(&c_shape); - TpuConversions::CShapeCleanup(&c_shape); - return size; -} - -} // namespace TpuExecutable::TpuExecutable(const XLA_TpuProgram* core_program, std::unique_ptr hlo_module, HostCommandHandler host_command_handler) - : Executable(std::move(hlo_module), /*hlo_profile_printer_data=*/nullptr, - /*hlo_profile_index_map=*/nullptr), + : TpuExecutableInterface(std::move(hlo_module), + /*hlo_profile_printer_data=*/nullptr, + /*hlo_profile_index_map=*/nullptr), core_program_(core_program), host_command_handler_(std::move(host_command_handler)) {} -StatusOr TpuExecutable::AllocateOutputMemoryWithInputReuse( - const Shape& host_shape, const HloInputOutputAliasConfig& alias_config, - se::DeviceMemoryAllocator* allocator, - std::vector* arguments, se::Stream* stream, - se::Stream* transfer_stream) { - auto stream_exec = stream->parent(); - auto device_ordinal = stream_exec->device_ordinal(); - VLOG(3) << "AllocateOutputMemoryWithInputReuse, device = " << device_ordinal - << " host_shape = " << ShapeUtil::HumanStringWithLayout(host_shape); - Shape device_shape = HostShapeToDeviceShape(host_shape); - - if (VLOG_IS_ON(3)) { - VLOG(3) << "AllocateOutputMemoryWithInputReuse, device = " << device_ordinal - << " host_shape = " << ShapeUtil::HumanStringWithLayout(host_shape); - if (!Shape::Equal().MinorToMajorOnlyInLayout()(host_shape, device_shape)) { - VLOG(3) << "Rewrote host_shape to device_shape: " - << ShapeUtil::HumanStringWithLayout(host_shape) << " -> " - << ShapeUtil::HumanStringWithLayout(device_shape); - } - } - - ExecutionOutput result(host_shape, std::move(device_shape), allocator, - device_ordinal); - // Iterate through and allocate a buffer for each shape index, checking for - // possible input buffer reuse. - int64 reused_buffer_bytes = 0; - int64 total_result_buffer_bytes = 0; - for (auto& pair : result.MutableResult()->buffers()) { - const ShapeIndex& result_index = pair.first; - se::DeviceMemoryBase& result_buffer = pair.second; - int64 allocation_bytes = ShapeSize(ShapeUtil::GetSubshape( - result.Result().on_device_shape(), result_index)); - total_result_buffer_bytes += allocation_bytes; - - // Return an InternalError if result_index is invalid. This avoids failing - // the CHECK when calling GetAliasedParameter - if (!ShapeUtil::IndexIsValid(alias_config.shape(), result_index)) { - return InternalError("result_index is invalid: %s", - result_index.ToString()); - } - - absl::optional alias = - alias_config.GetAliasedParameter(result_index); - if (alias) { - TF_RET_CHECK(alias->parameter_number < arguments->size()); - ExecutionInput& input = (*arguments)[alias->parameter_number]; - MaybeOwningDeviceMemory* device_memory = - input.MutableBuffer(alias->parameter_index); - if (auto owning = device_memory->Release()) { - // If the caller passes the ownership of the device memory, reuse it - // as the output buffer. It is up to the caller whether or not to - // donate a buffer; the aliasing information describes which buffers - // may alias, not buffers that must alias. - se::DeviceMemoryBase device_memory_base = owning->Release(); - *device_memory = device_memory_base; - result_buffer = device_memory_base; - reused_buffer_bytes += allocation_bytes; - // The caller is giving us the input buffer, but in case of error of the - // execute call, we should not be releasing it as it contains valid data - // (for example, it is a parameter which the user wants us to alias, in - // a gradient update computation). So we store the index into the result - // in the aliased vactor, which will be fed to the ExecutionOutput, - // which will be using the indices to drop the addresses from its own - // ScopedShapedBuffer result, if the ExecutionOutput is not committed. - result.AddAliasedIndex(result_index); - } - } - - // We need to allocate a new output buffer for two cases: - // - There is no alias between this output and any input. - // - There is an alias, but the xla doesn't own the input memory so it can't - // donate buffer to the computation. - if (result_buffer.is_null()) { - const Shape& on_device_shape = result.Result().on_device_shape(); - const Shape& on_device_subshape = - ShapeUtil::GetSubshape(on_device_shape, result_index); - TF_ASSIGN_OR_RETURN( - auto allocated_buffer, - allocator->Allocate(device_ordinal, allocation_bytes, - /*retry_on_failure=*/true, - on_device_subshape.layout().memory_space())); - // Store the allocated buffer in our ScopedShapedBuffer, which takes - // ownership. - result_buffer = allocated_buffer.Release(); - } - TF_RET_CHECK(allocation_bytes == 0 || result_buffer != nullptr); - } - - VLOG(1) << "Reused " << reused_buffer_bytes - << " parameter buffers (total result buffer size: " - << total_result_buffer_bytes << ")"; - - TF_RETURN_IF_ERROR( - PopulateResultTupleBuffers(result.Result(), stream, transfer_stream)); - return std::move(result); -} - -StatusOr TpuExecutable::ExecuteAsyncOnStream( - const ServiceExecutableRunOptions* run_options, - std::vector arguments, - HloExecutionProfile* /*hlo_execution_profile*/) { - std::vector memory_bases; - memory_bases.reserve(arguments.size()); - for (auto& argument : arguments) { - memory_bases.push_back(argument.Buffer({}).AsDeviceMemoryBase()); - } - se::Stream* stream = run_options->stream(); - - CHECK_NE(run_options->allocator(), nullptr); - const Shape& shape = - hlo_module_ == nullptr ? ShapeUtil::MakeNil() : result_shape(); - const HloInputOutputAliasConfig& alias_config = - hlo_module_ == nullptr ? HloInputOutputAliasConfig() - : hlo_module_->input_output_alias_config(); - TF_ASSIGN_OR_RETURN( - ExecutionOutput result, - AllocateOutputMemoryWithInputReuse( - shape, alias_config, run_options->allocator(), &arguments, stream, - run_options->run_options().host_to_device_stream())); - - MarkToBeReleasedArguments(absl::MakeSpan(arguments), result); - - // Address of the buffer in TPU memory that is being speculated. - absl::optional cross_program_prefetch_addr; - if (hlo_module_) { - for (const auto& [parameter, index] : - hlo_module_->CrossProgramPrefetches()) { - CHECK_LT(parameter, arguments.size()); - // Ensure the cross program prefetched buffer doesn't alias with any - // program outputs. If the input and output aliased, the buffer could be - // invalidated during program execution and the program could read stale - // data instead of fresh data. - auto it = arguments[parameter].MutableBuffers()->find({index}); - CHECK(it != arguments[parameter].MutableBuffers()->end()); - if (absl::c_none_of(result.Result().buffers(), [&](auto index_addr_pair) { - return index_addr_pair.second.IsSameAs( - it->second.AsDeviceMemoryBase()); - })) { - // Supports only one cross-program prefetch address. - cross_program_prefetch_addr = it->second.AsDeviceMemoryBase(); - } - } - } - - TF_RETURN_IF_ERROR(LoadProgramAndEnqueueToStream( - *run_options, memory_bases, result.Result().root_buffer(), - cross_program_prefetch_addr)); - return std::move(result); -} - Status TpuExecutable::LoadProgramAndEnqueueToStream( const ServiceExecutableRunOptions& run_options, absl::Span arguments, @@ -269,7 +66,7 @@ Status TpuExecutable::LoadProgramAndEnqueueToStream( auto dev_assign = run_options.run_options().device_assignment(); stream_executor::tpu::SerializedProto dev_assign_serialized; if (dev_assign != nullptr) { - xla::DeviceAssignmentProto dev_assign_proto; + DeviceAssignmentProto dev_assign_proto; TF_RETURN_IF_ERROR(dev_assign->Serialize(&dev_assign_proto)); dev_assign_serialized = stream_executor::tpu::SerializeProto(dev_assign_proto); @@ -277,9 +74,10 @@ Status TpuExecutable::LoadProgramAndEnqueueToStream( c_dev_assign.size = dev_assign_serialized.size; } - auto stream = - tensorflow::TpuPlatform::GetRegisteredPlatform()->stream_map()->at( - run_options.run_options().stream()->implementation()); + auto platform = tensorflow::down_cast( + tensorflow::tpu::TpuPlatformInterface::GetRegisteredPlatform()); + auto stream = platform->stream_map()->at( + run_options.run_options().stream()->implementation()); StatusHelper status; TpuExecutable_LoadProgramAndEnqueueToStream( @@ -294,5 +92,25 @@ Status TpuExecutable::LoadProgramAndEnqueueToStream( return status.status(); } -} // namespace tpu +Shape TpuExecutable::HostShapeToDeviceShape(const Shape& host_shape) { + XLA_Shape c_host_shape; + XLA_Shape c_device_shape; + TpuConversions::XlaShapeToCShape(host_shape, &c_host_shape); + tensorflow::tpu::ExecutorApiFn()->HardwareLayout_HostShapeToDeviceShapeFn( + &c_host_shape, &c_device_shape); + Shape device_shape = TpuConversions::CShapeToXlaShape(&c_device_shape); + TpuConversions::CShapeCleanup(&c_host_shape); + TpuConversions::CShapeCleanup(&c_device_shape); + return device_shape; +} + +int64 TpuExecutable::ShapeSize(const Shape& shape) { + XLA_Shape c_shape; + TpuConversions::XlaShapeToCShape(shape, &c_shape); + int64 size = + tensorflow::tpu::ExecutorApiFn()->HardwareLayout_ShapeSizeFn(&c_shape); + TpuConversions::CShapeCleanup(&c_shape); + return size; +} + } // namespace xla diff --git a/tensorflow/stream_executor/tpu/tpu_executable.h b/tensorflow/stream_executor/tpu/tpu_executable.h index 74ee0e0379e..3c9e60ba335 100644 --- a/tensorflow/stream_executor/tpu/tpu_executable.h +++ b/tensorflow/stream_executor/tpu/tpu_executable.h @@ -16,75 +16,47 @@ limitations under the License. #ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTABLE_H_ #define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTABLE_H_ +#include #include -#include #include "absl/types/optional.h" #include "absl/types/span.h" -#include "tensorflow/compiler/xla/service/executable.h" -#include "tensorflow/compiler/xla/service/hlo_execution_profile.h" -#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/service_executable_run_options.h" #include "tensorflow/compiler/xla/shape.h" #include "tensorflow/compiler/xla/status.h" -#include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/core/tpu/kernels/tpu_program_c_api.h" #include "tensorflow/stream_executor/device_memory.h" -#include "tensorflow/stream_executor/device_memory_allocator.h" -#include "tensorflow/stream_executor/stream_executor.h" +#include "tensorflow/stream_executor/tpu/tpu_executable_interface.h" namespace xla { -namespace tpu { // An executable capable of being fed to a TPU device via TpuExecutor. -class TpuExecutable : public Executable { +class TpuExecutable : public TpuExecutableInterface { public: using HostCommandHandler = std::function; // Constructs an executable that holds a non-owning reference to an // XLA_TpuProgram. - TpuExecutable(const XLA_TpuProgram* core_program, - std::unique_ptr hlo_module, - HostCommandHandler host_command_handler = nullptr); - + explicit TpuExecutable(const XLA_TpuProgram* core_program, + std::unique_ptr hlo_module, + HostCommandHandler host_command_handler = nullptr); ~TpuExecutable() override = default; - StatusOr ExecuteAsyncOnStream( - const ServiceExecutableRunOptions* run_options, - std::vector arguments, - HloExecutionProfile* hlo_execution_profile) override; - const XLA_TpuProgram* core_program() const { return core_program_; } - // Same as AllocateOutputMemory, except that input buffers can be reused - // as output buffers. See UserBufferAlias class comment for more details on - // the buffer reuse. - // - // `alias_config` indicates which input and output buffers can be aliased. - // - // `arguments` are ExecutionInput containing the input parameters. Currently - // only a single input parameter (typically a tuple) is supported on TPU. For - // each element in the shape tree, if the element holds the ownership of the - // memory, it is considered donated and XLA will potentially reuse it as - // output buffers. - // - // The optional 'transfer_stream' parameter enables transfers (for tuple - // tables) to be performed on a separate stream to 'stream'. - static StatusOr AllocateOutputMemoryWithInputReuse( - const Shape& host_shape, const HloInputOutputAliasConfig& alias_config, - se::DeviceMemoryAllocator* allocator, - std::vector* arguments, se::Stream* stream, - se::Stream* transfer_stream = nullptr); - private: Status LoadProgramAndEnqueueToStream( const ServiceExecutableRunOptions& run_options, absl::Span arguments, stream_executor::DeviceMemoryBase result, absl::optional - cross_program_prefetch_addr); + cross_program_prefetch_addr) override; + + Shape HostShapeToDeviceShape(const Shape& host_shape) override; + + int64 ShapeSize(const Shape& shape) override; const XLA_TpuProgram* const core_program_; @@ -93,7 +65,6 @@ class TpuExecutable : public Executable { TF_DISALLOW_COPY_AND_ASSIGN(TpuExecutable); }; -} // namespace tpu } // namespace xla #endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTABLE_H_ diff --git a/tensorflow/stream_executor/tpu/tpu_executable_interface.cc b/tensorflow/stream_executor/tpu/tpu_executable_interface.cc new file mode 100644 index 00000000000..13f9db98e5d --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_executable_interface.cc @@ -0,0 +1,204 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/stream_executor/tpu/tpu_executable_interface.h" + +#include + +#include "absl/algorithm/container.h" +#include "tensorflow/compiler/xla/service/maybe_owning_device_memory.h" +#include "tensorflow/compiler/xla/service/shaped_buffer.h" +#include "tensorflow/compiler/xla/service/transfer_manager.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/util.h" + +namespace xla { + +namespace { + +// Write the tuple index buffers (arrays of pointers). +static Status PopulateResultTupleBuffers(const ShapedBuffer& result, + se::Stream* stream, + se::Stream* transfer_stream) { + TF_ASSIGN_OR_RETURN(auto transfer_manager, TransferManager::GetForPlatform( + stream->parent()->platform())); + if (transfer_manager->CanShapedBufferBeAccessedNow(stream->parent(), + result)) { + TF_RETURN_IF_ERROR(transfer_manager->WriteTupleIndexTablesAsync( + transfer_stream ? transfer_stream : stream, result)); + if (transfer_stream && transfer_stream != stream) { + stream->ThenWaitFor(transfer_stream); + } + return Status::OK(); + } else { + return transfer_manager->WriteTupleIndexTablesAsync(stream, result); + } +} + +} // namespace + +StatusOr +TpuExecutableInterface::AllocateOutputMemoryWithInputReuse( + const Shape& host_shape, const HloInputOutputAliasConfig& alias_config, + se::DeviceMemoryAllocator* allocator, + std::vector* arguments, se::Stream* stream, + se::Stream* transfer_stream) { + auto stream_exec = stream->parent(); + auto device_ordinal = stream_exec->device_ordinal(); + VLOG(3) << "AllocateOutputMemoryWithInputReuse, device = " << device_ordinal + << " host_shape = " << ShapeUtil::HumanStringWithLayout(host_shape); + Shape device_shape = HostShapeToDeviceShape(host_shape); + + if (VLOG_IS_ON(3)) { + VLOG(3) << "AllocateOutputMemoryWithInputReuse, device = " << device_ordinal + << " host_shape = " << ShapeUtil::HumanStringWithLayout(host_shape); + if (!Shape::Equal().MinorToMajorOnlyInLayout()(host_shape, device_shape)) { + VLOG(3) << "Rewrote host_shape to device_shape: " + << ShapeUtil::HumanStringWithLayout(host_shape) << " -> " + << ShapeUtil::HumanStringWithLayout(device_shape); + } + } + + ExecutionOutput result(host_shape, std::move(device_shape), allocator, + device_ordinal); + // Iterate through and allocate a buffer for each shape index, checking for + // possible input buffer reuse. + int64 reused_buffer_bytes = 0; + int64 total_result_buffer_bytes = 0; + for (auto& pair : result.MutableResult()->buffers()) { + const ShapeIndex& result_index = pair.first; + se::DeviceMemoryBase& result_buffer = pair.second; + int64 allocation_bytes = ShapeSize(ShapeUtil::GetSubshape( + result.Result().on_device_shape(), result_index)); + total_result_buffer_bytes += allocation_bytes; + + // Return an InternalError if result_index is invalid. This avoids failing + // the CHECK when calling GetAliasedParameter + if (!ShapeUtil::IndexIsValid(alias_config.shape(), result_index)) { + return InternalError("result_index is invalid: %s", + result_index.ToString()); + } + + absl::optional alias = + alias_config.GetAliasedParameter(result_index); + if (alias) { + TF_RET_CHECK(alias->parameter_number < arguments->size()); + ExecutionInput& input = (*arguments)[alias->parameter_number]; + MaybeOwningDeviceMemory* device_memory = + input.MutableBuffer(alias->parameter_index); + if (auto owning = device_memory->Release()) { + // If the caller passes the ownership of the device memory, reuse it + // as the output buffer. It is up to the caller whether or not to + // donate a buffer; the aliasing information describes which buffers + // may alias, not buffers that must alias. + se::DeviceMemoryBase device_memory_base = owning->Release(); + *device_memory = device_memory_base; + result_buffer = device_memory_base; + reused_buffer_bytes += allocation_bytes; + // The caller is giving us the input buffer, but in case of error of the + // execute call, we should not be releasing it as it contains valid data + // (for example, it is a parameter which the user wants us to alias, in + // a gradient update computation). So we store the index into the result + // in the aliased vactor, which will be fed to the ExecutionOutput, + // which will be using the indices to drop the addresses from its own + // ScopedShapedBuffer result, if the ExecutionOutput is not committed. + result.AddAliasedIndex(result_index); + } + } + + // We need to allocate a new output buffer for two cases: + // - There is no alias between this output and any input. + // - There is an alias, but the xla doesn't own the input memory so it can't + // donate buffer to the computation. + if (result_buffer.is_null()) { + const Shape& on_device_shape = result.Result().on_device_shape(); + const Shape& on_device_subshape = + ShapeUtil::GetSubshape(on_device_shape, result_index); + TF_ASSIGN_OR_RETURN( + auto allocated_buffer, + allocator->Allocate(device_ordinal, allocation_bytes, + /*retry_on_failure=*/true, + on_device_subshape.layout().memory_space())); + // Store the allocated buffer in our ScopedShapedBuffer, which takes + // ownership. + result_buffer = allocated_buffer.Release(); + } + TF_RET_CHECK(allocation_bytes == 0 || result_buffer != nullptr); + } + + VLOG(1) << "Reused " << reused_buffer_bytes + << " parameter buffers (total result buffer size: " + << total_result_buffer_bytes << ")"; + + TF_RETURN_IF_ERROR( + PopulateResultTupleBuffers(result.Result(), stream, transfer_stream)); + return std::move(result); +} + +StatusOr TpuExecutableInterface::ExecuteAsyncOnStream( + const ServiceExecutableRunOptions* run_options, + std::vector arguments, + HloExecutionProfile* /*hlo_execution_profile*/) { + std::vector memory_bases; + memory_bases.reserve(arguments.size()); + for (auto& argument : arguments) { + memory_bases.push_back(argument.Buffer({}).AsDeviceMemoryBase()); + } + se::Stream* stream = run_options->stream(); + + CHECK_NE(run_options->allocator(), nullptr); + const Shape& shape = + hlo_module_ == nullptr ? ShapeUtil::MakeNil() : result_shape(); + const HloInputOutputAliasConfig& alias_config = + hlo_module_ == nullptr ? HloInputOutputAliasConfig() + : hlo_module_->input_output_alias_config(); + TF_ASSIGN_OR_RETURN( + ExecutionOutput result, + AllocateOutputMemoryWithInputReuse( + shape, alias_config, run_options->allocator(), &arguments, stream, + run_options->run_options().host_to_device_stream())); + + MarkToBeReleasedArguments(absl::MakeSpan(arguments), result); + + // Address of the buffer in TPU memory that is being speculated. + absl::optional cross_program_prefetch_addr; + if (hlo_module_) { + for (const auto& [parameter, index] : + hlo_module_->CrossProgramPrefetches()) { + CHECK_LT(parameter, arguments.size()); + // Ensure the cross program prefetched buffer doesn't alias with any + // program outputs. If the input and output aliased, the buffer could be + // invalidated during program execution and the program could read stale + // data from fast memory instead of fresh data in large memory. + auto it = arguments[parameter].MutableBuffers()->find({index}); + CHECK(it != arguments[parameter].MutableBuffers()->end()); + if (absl::c_none_of(result.Result().buffers(), [&](auto index_addr_pair) { + return index_addr_pair.second.IsSameAs( + it->second.AsDeviceMemoryBase()); + })) { + // Supports only one cross-program prefetch address. + cross_program_prefetch_addr = it->second.AsDeviceMemoryBase(); + } + } + } + + TF_RETURN_IF_ERROR(LoadProgramAndEnqueueToStream( + *run_options, memory_bases, result.Result().root_buffer(), + cross_program_prefetch_addr)); + return std::move(result); +} + +} // namespace xla diff --git a/tensorflow/stream_executor/tpu/tpu_executable_interface.h b/tensorflow/stream_executor/tpu/tpu_executable_interface.h new file mode 100644 index 00000000000..d0e13b8aea8 --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_executable_interface.h @@ -0,0 +1,91 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTABLE_INTERFACE_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTABLE_INTERFACE_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "absl/types/span.h" +#include "tensorflow/compiler/xla/service/executable.h" +#include "tensorflow/compiler/xla/service/hlo_execution_profile.h" +#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_profile_printer_data.pb.h" +#include "tensorflow/compiler/xla/service/service_executable_run_options.h" +#include "tensorflow/compiler/xla/shape.h" +#include "tensorflow/compiler/xla/status.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/stream_executor/device_memory.h" +#include "tensorflow/stream_executor/device_memory_allocator.h" +#include "tensorflow/stream_executor/stream_executor.h" + +namespace xla { + +// An executable capable of being fed to a TPU device. +class TpuExecutableInterface : public Executable { + public: + explicit TpuExecutableInterface( + std::shared_ptr hlo_module, + std::unique_ptr hlo_profile_printer_data, + std::unique_ptr hlo_profile_index_map) + : Executable(std::move(hlo_module), std::move(hlo_profile_printer_data), + std::move(hlo_profile_index_map)) {} + ~TpuExecutableInterface() override = default; + + StatusOr ExecuteAsyncOnStream( + const ServiceExecutableRunOptions* run_options, + std::vector arguments, + HloExecutionProfile* hlo_execution_profile) override; + + // Same as AllocateOutputMemory, except that input buffers can be reused + // as output buffers. See UserBufferAlias class comment for more details on + // the buffer reuse. + // + // `alias_config` indicates which input and output buffers can be aliased. + // + // `arguments` are ExecutionInput containing the input parameters. Currently + // only a single input parameter (typically a tuple) is supported on TPU. For + // each element in the shape tree, if the element holds the ownership of the + // memory, it is considered donated and XLA will potentially reuse it as + // output buffers. + // + // The optional 'transfer_stream' parameter enables transfers (for tuple + // tables) to be performed on a separate stream to 'stream'. + StatusOr AllocateOutputMemoryWithInputReuse( + const Shape& host_shape, const HloInputOutputAliasConfig& alias_config, + se::DeviceMemoryAllocator* allocator, + std::vector* arguments, se::Stream* stream, + se::Stream* transfer_stream = nullptr); + + virtual Status LoadProgramAndEnqueueToStream( + const ServiceExecutableRunOptions& run_options, + absl::Span arguments, + stream_executor::DeviceMemoryBase result, + absl::optional + cross_program_prefetch_addr) = 0; + + protected: + virtual Shape HostShapeToDeviceShape(const Shape& host_shape) = 0; + + virtual int64 ShapeSize(const Shape& shape) = 0; +}; + +} // namespace xla + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTABLE_INTERFACE_H_ From 0a22a9e96de594717f9b698fca3698387d7e3791 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Thu, 16 Jul 2020 12:36:45 -0700 Subject: [PATCH 0624/2522] [XLA] Fix ODR violation coming from Eigen contraction kernels It is unsafe to mix eigen_helpers and eigen_helpers_no_mkl inside one binary because it leads to ODR violations in TensorContraction instantiations. PiperOrigin-RevId: 321622880 Change-Id: Ie88cfa15db216a6a03a67dad3e5e1d0d4c51ed9b --- tensorflow/compiler/xla/service/cpu/BUILD | 4 +++- tensorflow/compiler/xla/service/cpu/runtime_conv2d_impl.h | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 7f051d4d1b2..ac167b00bb3 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -615,7 +615,8 @@ cc_library( deps = [ ":runtime_lightweight_check", "//tensorflow/compiler/xla:executable_run_options", - "//tensorflow/core/kernels:eigen_helpers_no_mkl", + "//tensorflow/core/kernels:eigen_contraction_kernel", + "//tensorflow/core/kernels:eigen_helpers", "//tensorflow/core/platform:dynamic_annotations", "//tensorflow/core/platform:mutex", "//tensorflow/core/platform:types", @@ -703,6 +704,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":runtime_lightweight_check", + "//tensorflow/core/kernels:eigen_contraction_kernel", "//tensorflow/core/kernels:eigen_helpers", "//tensorflow/core/platform:dynamic_annotations", "//tensorflow/core/platform:types", diff --git a/tensorflow/compiler/xla/service/cpu/runtime_conv2d_impl.h b/tensorflow/compiler/xla/service/cpu/runtime_conv2d_impl.h index 193c25f2a4b..ec634e7f738 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_conv2d_impl.h +++ b/tensorflow/compiler/xla/service/cpu/runtime_conv2d_impl.h @@ -19,6 +19,10 @@ limitations under the License. #include "tensorflow/core/kernels/eigen_spatial_convolutions.h" #include "tensorflow/core/platform/types.h" +#if defined(TENSORFLOW_USE_CUSTOM_CONTRACTION_KERNEL) +#include "tensorflow/core/kernels/eigen_contraction_kernel.h" +#endif + // 'tensorflow' namespace is used so that int64 and other types don't require // qualification. namespace tensorflow { From 7e2d0e7434ef261f56f82466b9e0600857c26f11 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Thu, 16 Jul 2020 19:43:31 +0000 Subject: [PATCH 0625/2522] buildifier fix --- tensorflow/core/kernels/BUILD | 5 +++-- tensorflow/python/BUILD | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 85c2b9d175b..28f651fb33c 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2937,6 +2937,7 @@ tf_kernel_library( "//third_party/eigen3", ], ) + cc_library( name = "tensor_map", srcs = ["tensor_map.cc"], @@ -2968,13 +2969,13 @@ tf_cc_tests( name = "tensor_map_test", size = "small", srcs = [ - "tensor_map_test.cc" + "tensor_map_test.cc", ], deps = [ ":tensor_map", - "//tensorflow/core/framework:tensor_testutil", "//tensorflow/core:test", "//tensorflow/core:test_main", + "//tensorflow/core/framework:tensor_testutil", "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 3c465252007..87ad52c3bf8 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -168,9 +168,9 @@ py_library( ":kernels", ":lib", ":list_ops", - ":map_ops", ":manip_ops", ":map_fn", + ":map_ops", ":math_ops", ":metrics", ":nccl_ops", From 0e9e9ea8c0747c6bbbdade12ebf616bc9b66c5d8 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Thu, 16 Jul 2020 12:37:54 -0700 Subject: [PATCH 0626/2522] Migrate the Linux GPU nightly release scripts to use the new bazelrc configs. PiperOrigin-RevId: 321623148 Change-Id: I5e9e5a7aa36099cfed1a2580217e6f9a011a31ec --- .../ubuntu_16/cpu_py35_full/nightly_release.sh | 2 +- .../ubuntu_16/gpu_py35_full/nightly_release.sh | 14 +------------- .../ubuntu_16/gpu_py36_full/nightly_release.sh | 14 +------------- .../ubuntu_16/gpu_py37_full/nightly_release.sh | 14 +------------- .../ubuntu_16/gpu_py38_full/nightly_release.sh | 14 +------------- 5 files changed, 5 insertions(+), 53 deletions(-) diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh index ba1861b221e..664e6cf50b2 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh @@ -32,7 +32,7 @@ export TF_NEED_HDFS=1 export TF_NEED_S3=1 export TF_NEED_CUDA=0 export CC_OPT_FLAGS='-mavx' -export PYTHON_BIN_PATH=$(which python3.5) +export PYTHON_BIN_PATH=$(which python) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh index 2ed5c014c65..addfc59818e 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh @@ -25,23 +25,11 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=1 -export TF_CUDA_VERSION=10 -export TF_CUDNN_VERSION=7 -export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 -export TF_NEED_TENSORRT=1 -export TENSORRT_INSTALL_PATH=/usr/local/tensorrt -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.5) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 \ - --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ - tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_gpu_linux tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --nightly_flag ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --gpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh index 87b2e52d88a..c6fb6d469b1 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh @@ -25,23 +25,11 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=1 -export TF_CUDA_VERSION=10 -export TF_CUDNN_VERSION=7 -export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 -export TF_NEED_TENSORRT=1 -export TENSORRT_INSTALL_PATH=/usr/local/tensorrt -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.6) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 \ - --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ - tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_gpu_linux tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --nightly_flag ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --gpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh index 0436ec32643..6e900d7dba8 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh @@ -25,23 +25,11 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=1 -export TF_CUDA_VERSION=10 -export TF_CUDNN_VERSION=7 -export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 -export TF_NEED_TENSORRT=1 -export TENSORRT_INSTALL_PATH=/usr/local/tensorrt -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.7) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 \ - --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ - tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_gpu_linux tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --nightly_flag ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --gpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh index 783785db7f7..9b968c4c3d6 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh @@ -27,23 +27,11 @@ update_bazel_linux python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=1 -export TF_CUDA_VERSION=10 -export TF_CUDNN_VERSION=7 -export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 -export TF_NEED_TENSORRT=1 -export TENSORRT_INSTALL_PATH=/usr/local/tensorrt -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.8) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 \ - --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ - tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_gpu_linux tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --nightly_flag ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --gpu --nightly_flag From abf6185d24a20e71dc0aa7b2a546aa3f0596c8c6 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Thu, 16 Jul 2020 12:51:13 -0700 Subject: [PATCH 0627/2522] Moving last set of FileSystem derivatives to new API --- tensorflow/core/platform/s3/s3_file_system.cc | 28 ++++++++-------- tensorflow/core/platform/s3/s3_file_system.h | 30 ++++++++--------- .../platform/windows/windows_file_system.cc | 28 ++++++++-------- .../platform/windows/windows_file_system.h | 32 +++++++++---------- tensorflow/core/util/memmapped_file_system.cc | 26 +++++++-------- tensorflow/core/util/memmapped_file_system.h | 26 +++++++-------- .../asset_manager_filesystem.cc | 28 ++++++++-------- .../asset_manager_filesystem.h | 26 +++++++-------- 8 files changed, 112 insertions(+), 112 deletions(-) diff --git a/tensorflow/core/platform/s3/s3_file_system.cc b/tensorflow/core/platform/s3/s3_file_system.cc index 181a39987e4..253d3a6502e 100644 --- a/tensorflow/core/platform/s3/s3_file_system.cc +++ b/tensorflow/core/platform/s3/s3_file_system.cc @@ -568,13 +568,13 @@ S3FileSystem::GetExecutor() { } Status S3FileSystem::NewRandomAccessFile( - const string& fname, std::unique_ptr* result) { + const string& fname, std::unique_ptr* result/*, TransactionToken* token */) { return NewRandomAccessFile(fname, result, true); } Status S3FileSystem::NewRandomAccessFile( const string& fname, std::unique_ptr* result, - bool use_multi_part_download) { + bool use_multi_part_download/*, TransactionToken* token */) { string bucket, object; TF_RETURN_IF_ERROR(ParseS3Path(fname, false, &bucket, &object)); @@ -588,7 +588,7 @@ Status S3FileSystem::NewRandomAccessFile( } Status S3FileSystem::NewWritableFile(const string& fname, - std::unique_ptr* result) { + std::unique_ptr* result/*, TransactionToken* token */) { string bucket, object; TF_RETURN_IF_ERROR(ParseS3Path(fname, false, &bucket, &object)); result->reset(new S3WritableFile( @@ -600,7 +600,7 @@ Status S3FileSystem::NewWritableFile(const string& fname, } Status S3FileSystem::NewAppendableFile(const string& fname, - std::unique_ptr* result) { + std::unique_ptr* result/*, TransactionToken* token */) { std::unique_ptr reader; TF_RETURN_IF_ERROR(NewRandomAccessFile(fname, &reader)); std::unique_ptr buffer(new char[kS3ReadAppendableFileBufferSize]); @@ -634,7 +634,7 @@ Status S3FileSystem::NewAppendableFile(const string& fname, } Status S3FileSystem::NewReadOnlyMemoryRegionFromFile( - const string& fname, std::unique_ptr* result) { + const string& fname, std::unique_ptr* result/*, TransactionToken* token */) { uint64 size; TF_RETURN_IF_ERROR(GetFileSize(fname, &size)); std::unique_ptr data(new char[size]); @@ -649,14 +649,14 @@ Status S3FileSystem::NewReadOnlyMemoryRegionFromFile( return Status::OK(); } -Status S3FileSystem::FileExists(const string& fname) { +Status S3FileSystem::FileExists(const string& fname/*, TransactionToken* token */) { FileStatistics stats; TF_RETURN_IF_ERROR(this->Stat(fname, &stats)); return Status::OK(); } Status S3FileSystem::GetChildren(const string& dir, - std::vector* result) { + std::vector* result/*, TransactionToken* token */) { VLOG(1) << "GetChildren for path: " << dir; string bucket, prefix; TF_RETURN_IF_ERROR(ParseS3Path(dir, true, &bucket, &prefix)); @@ -703,7 +703,7 @@ Status S3FileSystem::GetChildren(const string& dir, return Status::OK(); } -Status S3FileSystem::Stat(const string& fname, FileStatistics* stats) { +Status S3FileSystem::Stat(const string& fname, FileStatistics* stats/*, TransactionToken* token */) { VLOG(1) << "Stat on path: " << fname; string bucket, object; TF_RETURN_IF_ERROR(ParseS3Path(fname, true, &bucket, &object)); @@ -766,11 +766,11 @@ Status S3FileSystem::Stat(const string& fname, FileStatistics* stats) { } Status S3FileSystem::GetMatchingPaths(const string& pattern, - std::vector* results) { + std::vector* results/*, TransactionToken* token */) { return internal::GetMatchingPaths(this, Env::Default(), pattern, results); } -Status S3FileSystem::DeleteFile(const string& fname) { +Status S3FileSystem::DeleteFile(const string& fname/*, TransactionToken* token */) { VLOG(1) << "DeleteFile: " << fname; string bucket, object; TF_RETURN_IF_ERROR(ParseS3Path(fname, false, &bucket, &object)); @@ -786,7 +786,7 @@ Status S3FileSystem::DeleteFile(const string& fname) { return Status::OK(); } -Status S3FileSystem::CreateDir(const string& dirname) { +Status S3FileSystem::CreateDir(const string& dirname/*, TransactionToken* token */) { VLOG(1) << "CreateDir: " << dirname; string bucket, object; TF_RETURN_IF_ERROR(ParseS3Path(dirname, true, &bucket, &object)); @@ -813,7 +813,7 @@ Status S3FileSystem::CreateDir(const string& dirname) { return Status::OK(); } -Status S3FileSystem::DeleteDir(const string& dirname) { +Status S3FileSystem::DeleteDir(const string& dirname/*, TransactionToken* token */) { VLOG(1) << "DeleteDir: " << dirname; string bucket, object; TF_RETURN_IF_ERROR(ParseS3Path(dirname, false, &bucket, &object)); @@ -852,7 +852,7 @@ Status S3FileSystem::DeleteDir(const string& dirname) { return Status::OK(); } -Status S3FileSystem::GetFileSize(const string& fname, uint64* file_size) { +Status S3FileSystem::GetFileSize(const string& fname, uint64* file_size/*, TransactionToken* token */) { FileStatistics stats; TF_RETURN_IF_ERROR(this->Stat(fname, &stats)); *file_size = stats.length; @@ -1123,7 +1123,7 @@ Status S3FileSystem::CompleteMultiPartCopy( return Status::OK(); } -Status S3FileSystem::RenameFile(const string& src, const string& target) { +Status S3FileSystem::RenameFile(const string& src, const string& target/*, TransactionToken* token */) { VLOG(1) << "RenameFile from: " << src << " to: " << target; string src_bucket, src_object, target_bucket, target_object; TF_RETURN_IF_ERROR(ParseS3Path(src, false, &src_bucket, &src_object)); diff --git a/tensorflow/core/platform/s3/s3_file_system.h b/tensorflow/core/platform/s3/s3_file_system.h index c69d678185e..7d47d970425 100644 --- a/tensorflow/core/platform/s3/s3_file_system.h +++ b/tensorflow/core/platform/s3/s3_file_system.h @@ -50,42 +50,42 @@ class S3FileSystem : public FileSystem { ~S3FileSystem(); Status NewRandomAccessFile( - const string& fname, std::unique_ptr* result) override; + const string& fname, std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; Status NewRandomAccessFile(const string& fname, std::unique_ptr* result, - bool use_multi_part_download); + bool use_multi_part_download/*, TransactionToken* token = nullptr */); Status NewWritableFile(const string& fname, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; Status NewAppendableFile(const string& fname, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; Status NewReadOnlyMemoryRegionFromFile( const string& fname, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; - Status FileExists(const string& fname) override; + Status FileExists(const string& fname/*, TransactionToken* token = nullptr */) override; - Status GetChildren(const string& dir, std::vector* result) override; + Status GetChildren(const string& dir, std::vector* result/*, TransactionToken* token = nullptr */) override; - Status Stat(const string& fname, FileStatistics* stat) override; + Status Stat(const string& fname, FileStatistics* stat/*, TransactionToken* token = nullptr */) override; Status GetMatchingPaths(const string& pattern, - std::vector* results) override; + std::vector* results/*, TransactionToken* token = nullptr */) override; - Status DeleteFile(const string& fname) override; + Status DeleteFile(const string& fname/*, TransactionToken* token = nullptr */) override; - Status CreateDir(const string& name) override; + Status CreateDir(const string& name/*, TransactionToken* token = nullptr */) override; - Status DeleteDir(const string& name) override; + Status DeleteDir(const string& name/*, TransactionToken* token = nullptr */) override; - Status GetFileSize(const string& fname, uint64* size) override; + Status GetFileSize(const string& fname, uint64* size/*, TransactionToken* token = nullptr */) override; - Status RenameFile(const string& src, const string& target) override; + Status RenameFile(const string& src, const string& target/*, TransactionToken* token = nullptr */) override; - Status HasAtomicMove(const string& path, bool* has_atomic_move) override; + Status HasAtomicMove(const string& path, bool* has_atomic_move/*, TransactionToken* token = nullptr */) override; private: // Returns the member S3 client, initializing as-needed. diff --git a/tensorflow/core/platform/windows/windows_file_system.cc b/tensorflow/core/platform/windows/windows_file_system.cc index bf8bb8914f6..6c2a1296f96 100644 --- a/tensorflow/core/platform/windows/windows_file_system.cc +++ b/tensorflow/core/platform/windows/windows_file_system.cc @@ -261,7 +261,7 @@ class WinReadOnlyMemoryRegion : public ReadOnlyMemoryRegion { } // namespace Status WindowsFileSystem::NewRandomAccessFile( - const string& fname, std::unique_ptr* result) { + const string& fname, std::unique_ptr* result/*, TransactionToken* token */) { string translated_fname = TranslateName(fname); std::wstring ws_translated_fname = Utf8ToWideChar(translated_fname); result->reset(); @@ -288,7 +288,7 @@ Status WindowsFileSystem::NewRandomAccessFile( } Status WindowsFileSystem::NewWritableFile( - const string& fname, std::unique_ptr* result) { + const string& fname, std::unique_ptr* result/*, TransactionToken* token */) { string translated_fname = TranslateName(fname); std::wstring ws_translated_fname = Utf8ToWideChar(translated_fname); result->reset(); @@ -308,7 +308,7 @@ Status WindowsFileSystem::NewWritableFile( } Status WindowsFileSystem::NewAppendableFile( - const string& fname, std::unique_ptr* result) { + const string& fname, std::unique_ptr* result/*, TransactionToken* token */) { string translated_fname = TranslateName(fname); std::wstring ws_translated_fname = Utf8ToWideChar(translated_fname); result->reset(); @@ -338,7 +338,7 @@ Status WindowsFileSystem::NewAppendableFile( } Status WindowsFileSystem::NewReadOnlyMemoryRegionFromFile( - const string& fname, std::unique_ptr* result) { + const string& fname, std::unique_ptr* result/*, TransactionToken* token */) { string translated_fname = TranslateName(fname); std::wstring ws_translated_fname = Utf8ToWideChar(translated_fname); result->reset(); @@ -414,7 +414,7 @@ Status WindowsFileSystem::NewReadOnlyMemoryRegionFromFile( return s; } -Status WindowsFileSystem::FileExists(const string& fname) { +Status WindowsFileSystem::FileExists(const string& fname/*, TransactionToken* token */) { constexpr int kOk = 0; std::wstring ws_translated_fname = Utf8ToWideChar(TranslateName(fname)); if (_waccess(ws_translated_fname.c_str(), kOk) == 0) { @@ -424,7 +424,7 @@ Status WindowsFileSystem::FileExists(const string& fname) { } Status WindowsFileSystem::GetChildren(const string& dir, - std::vector* result) { + std::vector* result/*, TransactionToken* token */) { string translated_dir = TranslateName(dir); std::wstring ws_translated_dir = Utf8ToWideChar(translated_dir); result->clear(); @@ -459,7 +459,7 @@ Status WindowsFileSystem::GetChildren(const string& dir, return Status::OK(); } -Status WindowsFileSystem::DeleteFile(const string& fname) { +Status WindowsFileSystem::DeleteFile(const string& fname/*, TransactionToken* token */) { Status result; std::wstring file_name = Utf8ToWideChar(fname); if (_wunlink(file_name.c_str()) != 0) { @@ -468,7 +468,7 @@ Status WindowsFileSystem::DeleteFile(const string& fname) { return result; } -Status WindowsFileSystem::CreateDir(const string& name) { +Status WindowsFileSystem::CreateDir(const string& name/*, TransactionToken* token */) { Status result; std::wstring ws_name = Utf8ToWideChar(name); if (ws_name.empty()) { @@ -480,7 +480,7 @@ Status WindowsFileSystem::CreateDir(const string& name) { return result; } -Status WindowsFileSystem::DeleteDir(const string& name) { +Status WindowsFileSystem::DeleteDir(const string& name/*, TransactionToken* token */) { Status result; std::wstring ws_name = Utf8ToWideChar(name); if (_wrmdir(ws_name.c_str()) != 0) { @@ -489,7 +489,7 @@ Status WindowsFileSystem::DeleteDir(const string& name) { return result; } -Status WindowsFileSystem::GetFileSize(const string& fname, uint64* size) { +Status WindowsFileSystem::GetFileSize(const string& fname, uint64* size/*, TransactionToken* token */) { string translated_fname = TranslateName(fname); std::wstring ws_translated_dir = Utf8ToWideChar(translated_fname); Status result; @@ -507,7 +507,7 @@ Status WindowsFileSystem::GetFileSize(const string& fname, uint64* size) { return result; } -Status WindowsFileSystem::IsDirectory(const string& fname) { +Status WindowsFileSystem::IsDirectory(const string& fname/*, TransactionToken* token */) { TF_RETURN_IF_ERROR(FileExists(fname)); std::wstring ws_translated_fname = Utf8ToWideChar(TranslateName(fname)); if (PathIsDirectoryW(ws_translated_fname.c_str())) { @@ -516,7 +516,7 @@ Status WindowsFileSystem::IsDirectory(const string& fname) { return Status(tensorflow::error::FAILED_PRECONDITION, "Not a directory"); } -Status WindowsFileSystem::RenameFile(const string& src, const string& target) { +Status WindowsFileSystem::RenameFile(const string& src, const string& target/*, TransactionToken* token */) { Status result; // rename() is not capable of replacing the existing file as on Linux // so use OS API directly @@ -531,7 +531,7 @@ Status WindowsFileSystem::RenameFile(const string& src, const string& target) { } Status WindowsFileSystem::GetMatchingPaths(const string& pattern, - std::vector* results) { + std::vector* results/*, TransactionToken* token */) { // NOTE(mrry): The existing implementation of FileSystem::GetMatchingPaths() // does not handle Windows paths containing backslashes correctly. Since // Windows APIs will accept forward and backslashes equivalently, we @@ -554,7 +554,7 @@ bool WindowsFileSystem::Match(const string& filename, const string& pattern) { return PathMatchSpecW(ws_path.c_str(), ws_pattern.c_str()) == TRUE; } -Status WindowsFileSystem::Stat(const string& fname, FileStatistics* stat) { +Status WindowsFileSystem::Stat(const string& fname, FileStatistics* stat/*, TransactionToken* token */) { Status result; struct _stat sbuf; std::wstring ws_translated_fname = Utf8ToWideChar(TranslateName(fname)); diff --git a/tensorflow/core/platform/windows/windows_file_system.h b/tensorflow/core/platform/windows/windows_file_system.h index 9bf8a018113..4142cd69967 100644 --- a/tensorflow/core/platform/windows/windows_file_system.h +++ b/tensorflow/core/platform/windows/windows_file_system.h @@ -33,42 +33,42 @@ class WindowsFileSystem : public FileSystem { ~WindowsFileSystem() {} Status NewRandomAccessFile( - const string& fname, std::unique_ptr* result) override; + const string& fname, std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; Status NewWritableFile(const string& fname, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; Status NewAppendableFile(const string& fname, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; Status NewReadOnlyMemoryRegionFromFile( const string& fname, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; - Status FileExists(const string& fname) override; + Status FileExists(const string& fname/*, TransactionToken* token = nullptr */) override; - Status GetChildren(const string& dir, std::vector* result) override; + Status GetChildren(const string& dir, std::vector* result/*, TransactionToken* token = nullptr */) override; Status GetMatchingPaths(const string& pattern, - std::vector* result) override; + std::vector* result/*, TransactionToken* token = nullptr */) override; - bool Match(const string& filename, const string& pattern) override; + bool Match(const string& filename, const string& pattern/*, TransactionToken* token = nullptr */) override; - Status Stat(const string& fname, FileStatistics* stat) override; + Status Stat(const string& fname, FileStatistics* stat/*, TransactionToken* token = nullptr */) override; - Status DeleteFile(const string& fname) override; + Status DeleteFile(const string& fname/*, TransactionToken* token = nullptr */) override; - Status CreateDir(const string& name) override; + Status CreateDir(const string& name/*, TransactionToken* token = nullptr */) override; - Status DeleteDir(const string& name) override; + Status DeleteDir(const string& name/*, TransactionToken* token = nullptr */) override; - Status GetFileSize(const string& fname, uint64* size) override; + Status GetFileSize(const string& fname, uint64* size/*, TransactionToken* token = nullptr */) override; - Status IsDirectory(const string& fname) override; + Status IsDirectory(const string& fname/*, TransactionToken* token = nullptr */) override; - Status RenameFile(const string& src, const string& target) override; + Status RenameFile(const string& src, const string& target/*, TransactionToken* token = nullptr */) override; - string TranslateName(const string& name) const override { return name; } + string TranslateName(const string& name/*, TransactionToken* token = nullptr */) const override { return name; } char Separator() const override { return '\\'; }; }; diff --git a/tensorflow/core/util/memmapped_file_system.cc b/tensorflow/core/util/memmapped_file_system.cc index a07c1fbb7be..7abfeb7f9d8 100644 --- a/tensorflow/core/util/memmapped_file_system.cc +++ b/tensorflow/core/util/memmapped_file_system.cc @@ -86,7 +86,7 @@ class RandomAccessFileFromMemmapped : public RandomAccessFile { MemmappedFileSystem::MemmappedFileSystem() {} -Status MemmappedFileSystem::FileExists(const string& fname) { +Status MemmappedFileSystem::FileExists(const string& fname/*, TransactionToken* token */) { if (!mapped_memory_) { return errors::FailedPrecondition("MemmappedEnv is not initialized"); } @@ -98,7 +98,7 @@ Status MemmappedFileSystem::FileExists(const string& fname) { } Status MemmappedFileSystem::NewRandomAccessFile( - const string& filename, std::unique_ptr* result) { + const string& filename, std::unique_ptr* result/*, TransactionToken* token */) { if (!mapped_memory_) { return errors::FailedPrecondition("MemmappedEnv is not initialized"); } @@ -113,7 +113,7 @@ Status MemmappedFileSystem::NewRandomAccessFile( } Status MemmappedFileSystem::NewReadOnlyMemoryRegionFromFile( - const string& filename, std::unique_ptr* result) { + const string& filename, std::unique_ptr* result/*, TransactionToken* token */) { if (!mapped_memory_) { return errors::FailedPrecondition("MemmappedEnv is not initialized"); } @@ -127,7 +127,7 @@ Status MemmappedFileSystem::NewReadOnlyMemoryRegionFromFile( return Status::OK(); } -Status MemmappedFileSystem::GetFileSize(const string& filename, uint64* size) { +Status MemmappedFileSystem::GetFileSize(const string& filename, uint64* size/*, TransactionToken* token */) { if (!mapped_memory_) { return errors::FailedPrecondition("MemmappedEnv is not initialized"); } @@ -139,7 +139,7 @@ Status MemmappedFileSystem::GetFileSize(const string& filename, uint64* size) { return Status::OK(); } -Status MemmappedFileSystem::Stat(const string& fname, FileStatistics* stat) { +Status MemmappedFileSystem::Stat(const string& fname, FileStatistics* stat/*, TransactionToken* token */) { uint64 size; auto status = GetFileSize(fname, &size); if (status.ok()) { @@ -149,40 +149,40 @@ Status MemmappedFileSystem::Stat(const string& fname, FileStatistics* stat) { } Status MemmappedFileSystem::NewWritableFile(const string& filename, - std::unique_ptr* wf) { + std::unique_ptr* wf/*, TransactionToken* token */) { return errors::Unimplemented("memmapped format doesn't support writing"); } Status MemmappedFileSystem::NewAppendableFile( - const string& filename, std::unique_ptr* result) { + const string& filename, std::unique_ptr* result/*, TransactionToken* token */) { return errors::Unimplemented("memmapped format doesn't support writing"); } Status MemmappedFileSystem::GetChildren(const string& filename, - std::vector* strings) { + std::vector* strings/*, TransactionToken* token */) { return errors::Unimplemented("memmapped format doesn't support GetChildren"); } Status MemmappedFileSystem::GetMatchingPaths(const string& pattern, - std::vector* results) { + std::vector* results/*, TransactionToken* token */) { return errors::Unimplemented( "memmapped format doesn't support GetMatchingPaths"); } -Status MemmappedFileSystem::DeleteFile(const string& filename) { +Status MemmappedFileSystem::DeleteFile(const string& filename/*, TransactionToken* token */) { return errors::Unimplemented("memmapped format doesn't support DeleteFile"); } -Status MemmappedFileSystem::CreateDir(const string& dirname) { +Status MemmappedFileSystem::CreateDir(const string& dirname/*, TransactionToken* token */) { return errors::Unimplemented("memmapped format doesn't support CreateDir"); } -Status MemmappedFileSystem::DeleteDir(const string& dirname) { +Status MemmappedFileSystem::DeleteDir(const string& dirname/*, TransactionToken* token */) { return errors::Unimplemented("memmapped format doesn't support DeleteDir"); } Status MemmappedFileSystem::RenameFile(const string& filename_from, - const string& filename_to) { + const string& filename_to/*, TransactionToken* token */) { return errors::Unimplemented("memmapped format doesn't support RenameFile"); } diff --git a/tensorflow/core/util/memmapped_file_system.h b/tensorflow/core/util/memmapped_file_system.h index 64b8c580fd4..5287b1b393d 100644 --- a/tensorflow/core/util/memmapped_file_system.h +++ b/tensorflow/core/util/memmapped_file_system.h @@ -60,32 +60,32 @@ class MemmappedFileSystem : public FileSystem { MemmappedFileSystem(); ~MemmappedFileSystem() override = default; - Status FileExists(const string& fname) override; + Status FileExists(const string& fname/*, TransactionToken* token = nullptr */) override; Status NewRandomAccessFile( const string& filename, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; Status NewReadOnlyMemoryRegionFromFile( const string& filename, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; // All these functions return Unimplemented error, the memmapped storage is // read only. Status NewWritableFile(const string& fname, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; Status NewAppendableFile(const string& fname, - std::unique_ptr* result) override; - Status GetChildren(const string& dir, std::vector* r) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr */) override; + Status GetChildren(const string& dir, std::vector* r/*, TransactionToken* token = nullptr */) override; Status GetMatchingPaths(const string& pattern, - std::vector* results) override; - Status DeleteFile(const string& f) override; - Status CreateDir(const string& d) override; - Status DeleteDir(const string& d) override; - Status RenameFile(const string& s, const string& t) override; + std::vector* results/*, TransactionToken* token = nullptr */) override; + Status DeleteFile(const string& f/*, TransactionToken* token = nullptr */) override; + Status CreateDir(const string& d/*, TransactionToken* token = nullptr */) override; + Status DeleteDir(const string& d/*, TransactionToken* token = nullptr */) override; + Status RenameFile(const string& s, const string& t/*, TransactionToken* token = nullptr */) override; // These functions are implemented. - Status GetFileSize(const string& f, uint64* s) override; + Status GetFileSize(const string& f, uint64* s/*, TransactionToken* token = nullptr */) override; // Currently just returns size. - Status Stat(const string& fname, FileStatistics* stat) override; + Status Stat(const string& fname, FileStatistics* stat/*, TransactionToken* token = nullptr */) override; // Initializes filesystem from a file in memmapped format. Status InitializeFromFile(Env* env, const string& filename); diff --git a/tensorflow/tools/android/inference_interface/asset_manager_filesystem.cc b/tensorflow/tools/android/inference_interface/asset_manager_filesystem.cc index ee56f9affdf..d54d56d10c0 100644 --- a/tensorflow/tools/android/inference_interface/asset_manager_filesystem.cc +++ b/tensorflow/tools/android/inference_interface/asset_manager_filesystem.cc @@ -124,7 +124,7 @@ AssetManagerFileSystem::AssetManagerFileSystem(AAssetManager* asset_manager, const string& prefix) : asset_manager_(asset_manager), prefix_(prefix) {} -Status AssetManagerFileSystem::FileExists(const string& fname) { +Status AssetManagerFileSystem::FileExists(const string& fname/*, TransactionToken* token */) { string path = RemoveAssetPrefix(fname); auto asset = ScopedAsset( AAssetManager_open(asset_manager_, path.c_str(), AASSET_MODE_RANDOM)); @@ -135,7 +135,7 @@ Status AssetManagerFileSystem::FileExists(const string& fname) { } Status AssetManagerFileSystem::NewRandomAccessFile( - const string& fname, std::unique_ptr* result) { + const string& fname, std::unique_ptr* result/*, TransactionToken* token */) { string path = RemoveAssetPrefix(fname); auto asset = ScopedAsset( AAssetManager_open(asset_manager_, path.c_str(), AASSET_MODE_RANDOM)); @@ -147,7 +147,7 @@ Status AssetManagerFileSystem::NewRandomAccessFile( } Status AssetManagerFileSystem::NewReadOnlyMemoryRegionFromFile( - const string& fname, std::unique_ptr* result) { + const string& fname, std::unique_ptr* result/*, TransactionToken* token */) { string path = RemoveAssetPrefix(fname); auto asset = ScopedAsset( AAssetManager_open(asset_manager_, path.c_str(), AASSET_MODE_STREAMING)); @@ -184,7 +184,7 @@ Status AssetManagerFileSystem::NewReadOnlyMemoryRegionFromFile( } Status AssetManagerFileSystem::GetChildren(const string& prefixed_dir, - std::vector* r) { + std::vector* r/*, TransactionToken* token */) { std::string path = NormalizeDirectoryPath(prefixed_dir); auto dir = ScopedAssetDir(AAssetManager_openDir(asset_manager_, path.c_str())); @@ -199,7 +199,7 @@ Status AssetManagerFileSystem::GetChildren(const string& prefixed_dir, return Status::OK(); } -Status AssetManagerFileSystem::GetFileSize(const string& fname, uint64* s) { +Status AssetManagerFileSystem::GetFileSize(const string& fname, uint64* s/*, TransactionToken* token */) { // If fname corresponds to a directory, return early. It doesn't map to an // AAsset, and would otherwise return NotFound. if (DirectoryExists(fname)) { @@ -216,7 +216,7 @@ Status AssetManagerFileSystem::GetFileSize(const string& fname, uint64* s) { return Status::OK(); } -Status AssetManagerFileSystem::Stat(const string& fname, FileStatistics* stat) { +Status AssetManagerFileSystem::Stat(const string& fname, FileStatistics* stat/*, TransactionToken* token */) { uint64 size; stat->is_directory = DirectoryExists(fname); TF_RETURN_IF_ERROR(GetFileSize(fname, &size)); @@ -234,7 +234,7 @@ string AssetManagerFileSystem::RemoveAssetPrefix(const string& name) { return string(piece); } -bool AssetManagerFileSystem::DirectoryExists(const std::string& fname) { +bool AssetManagerFileSystem::DirectoryExists(const std::string& fname/*, TransactionToken* token */) { std::string path = NormalizeDirectoryPath(fname); auto dir = ScopedAssetDir(AAssetManager_openDir(asset_manager_, path.c_str())); @@ -244,28 +244,28 @@ bool AssetManagerFileSystem::DirectoryExists(const std::string& fname) { } Status AssetManagerFileSystem::GetMatchingPaths(const string& pattern, - std::vector* results) { + std::vector* results/*, TransactionToken* token */) { return internal::GetMatchingPaths(this, Env::Default(), pattern, results); } Status AssetManagerFileSystem::NewWritableFile( - const string& fname, std::unique_ptr* result) { + const string& fname, std::unique_ptr* result/*, TransactionToken* token */) { return errors::Unimplemented("Asset storage is read only."); } Status AssetManagerFileSystem::NewAppendableFile( - const string& fname, std::unique_ptr* result) { + const string& fname, std::unique_ptr* result/*, TransactionToken* token */) { return errors::Unimplemented("Asset storage is read only."); } -Status AssetManagerFileSystem::DeleteFile(const string& f) { +Status AssetManagerFileSystem::DeleteFile(const string& f/*, TransactionToken* token */) { return errors::Unimplemented("Asset storage is read only."); } -Status AssetManagerFileSystem::CreateDir(const string& d) { +Status AssetManagerFileSystem::CreateDir(const string& d/*, TransactionToken* token */) { return errors::Unimplemented("Asset storage is read only."); } -Status AssetManagerFileSystem::DeleteDir(const string& d) { +Status AssetManagerFileSystem::DeleteDir(const string& d/*, TransactionToken* token */) { return errors::Unimplemented("Asset storage is read only."); } -Status AssetManagerFileSystem::RenameFile(const string& s, const string& t) { +Status AssetManagerFileSystem::RenameFile(const string& s, const string& t/*, TransactionToken* token */) { return errors::Unimplemented("Asset storage is read only."); } diff --git a/tensorflow/tools/android/inference_interface/asset_manager_filesystem.h b/tensorflow/tools/android/inference_interface/asset_manager_filesystem.h index a87ff42ae21..951fb034caa 100644 --- a/tensorflow/tools/android/inference_interface/asset_manager_filesystem.h +++ b/tensorflow/tools/android/inference_interface/asset_manager_filesystem.h @@ -42,32 +42,32 @@ class AssetManagerFileSystem : public FileSystem { AssetManagerFileSystem(AAssetManager* asset_manager, const string& prefix); ~AssetManagerFileSystem() override = default; - Status FileExists(const string& fname) override; + Status FileExists(const string& fname/*, TransactionToken* token = nullptr*/) override; Status NewRandomAccessFile( const string& filename, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr*/) override; Status NewReadOnlyMemoryRegionFromFile( const string& filename, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr*/) override; - Status GetFileSize(const string& f, uint64* s) override; + Status GetFileSize(const string& f, uint64* s/*, TransactionToken* token = nullptr*/) override; // Currently just returns size. - Status Stat(const string& fname, FileStatistics* stat) override; - Status GetChildren(const string& dir, std::vector* r) override; + Status Stat(const string& fname, FileStatistics* stat/*, TransactionToken* token = nullptr*/) override; + Status GetChildren(const string& dir, std::vector* r/*, TransactionToken* token = nullptr*/) override; // All these functions return Unimplemented error. Asset storage is // read only. Status NewWritableFile(const string& fname, - std::unique_ptr* result) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr*/) override; Status NewAppendableFile(const string& fname, - std::unique_ptr* result) override; - Status DeleteFile(const string& f) override; - Status CreateDir(const string& d) override; - Status DeleteDir(const string& d) override; - Status RenameFile(const string& s, const string& t) override; + std::unique_ptr* result/*, TransactionToken* token = nullptr*/) override; + Status DeleteFile(const string& f/*, TransactionToken* token = nullptr*/) override; + Status CreateDir(const string& d/*, TransactionToken* token = nullptr*/) override; + Status DeleteDir(const string& d/*, TransactionToken* token = nullptr*/) override; + Status RenameFile(const string& s, const string& t/*, TransactionToken* token = nullptr*/) override; Status GetMatchingPaths(const string& pattern, - std::vector* results) override; + std::vector* results/*, TransactionToken* token = nullptr*/) override; private: string RemoveAssetPrefix(const string& name); From 57680ec9bea22e97b265abfa762618c51407554b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Jul 2020 13:32:46 -0700 Subject: [PATCH 0628/2522] Split convolution tests into two. PiperOrigin-RevId: 321634608 Change-Id: I1dfd1c5ab7010af10962ec021cc66f8fe9c6ce6e --- tensorflow/compiler/xla/tests/BUILD | 51 ++- .../compiler/xla/tests/convolution_test.cc | 313 +-------------- .../compiler/xla/tests/convolution_test_1d.cc | 376 ++++++++++++++++++ 3 files changed, 426 insertions(+), 314 deletions(-) create mode 100644 tensorflow/compiler/xla/tests/convolution_test_1d.cc diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 83851fabd53..b3353cf905c 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1115,10 +1115,24 @@ xla_test( name = "convolution_test", timeout = "long", srcs = ["convolution_test.cc"], - shard_count = 40, + shard_count = 50, + tags = [ + "no_rocm", + "optonly", + ], + deps = CONVOLUTION_TEST_DEPS + [ + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + ], +) + +xla_test( + name = "convolution_test_1d", + timeout = "long", + srcs = ["convolution_test_1d.cc"], + shard_count = 50, tags = [ "no_rocm", - "nozapfhahn", "optonly", ], deps = CONVOLUTION_TEST_DEPS + [ @@ -1147,6 +1161,23 @@ xla_test( ], ) +xla_test( + name = "convolution_test_1d_autotune_disabled", + timeout = "long", + srcs = ["convolution_test_1d.cc"], + args = ["--xla_gpu_autotune_level=0"], + backends = ["gpu"], + shard_count = 40, + tags = [ + "no_rocm", + "optonly", + ], + deps = CONVOLUTION_TEST_DEPS + [ + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + ], +) + xla_test( name = "convolution_test_gpu_alternative_layout", timeout = "long", @@ -1163,6 +1194,22 @@ xla_test( ], ) +xla_test( + name = "convolution_test_1d_gpu_alternative_layout", + timeout = "long", + srcs = ["convolution_test_1d.cc"], + backend_args = {"gpu": ["--xla_backend_extra_options=xla_gpu_experimental_conv_disable_layout_heuristic"]}, + backends = ["gpu"], + shard_count = 25, + tags = [ + "no_rocm", + ], + deps = CONVOLUTION_TEST_DEPS + [ + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + ], +) + xla_test( name = "convolution_variants_test", timeout = "long", diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc index c63f1d0edf3..8021d6fe5db 100644 --- a/tensorflow/compiler/xla/tests/convolution_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// Tests of convolution with trivial kernels and no special variations (like +// Tests of 2+D convolution with trivial kernels and no special variations (like // strides and padding). #include @@ -240,174 +240,6 @@ class Convolve_1x1x4x4_1x1x3x3_Same : public ConvolutionTest { TYPED_TEST_CASE(Convolve_1x1x4x4_1x1x3x3_Same, TestTypes); TYPED_TEST(Convolve_1x1x4x4_1x1x3x3_Same, Types) { this->RunTest(); } -XLA_TEST_F(ConvolutionTest, Convolve1D_1x2x5_1x2x2_Valid) { - XlaBuilder builder(TestName()); - { - Shape input_shape = ShapeUtil::MakeShape(F32, {1, 2, 5}); - Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 2, 2}); - auto input = Parameter(&builder, 0, input_shape, "input"); - auto filter = Parameter(&builder, 1, filter_shape, "filter"); - Conv(input, filter, {1}, Padding::kValid); - } - - Array3D input({{{1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}}); - Array3D filter({{{10, 20}, {30, 40}}}); - - Array3D expected({{{510, 610, 710, 810}}}); - - auto input_literal = - client_->TransferToServer(LiteralUtil::CreateR3FromArray3D(input)) - .ConsumeValueOrDie(); - auto filter_literal = - client_->TransferToServer(LiteralUtil::CreateR3FromArray3D(filter)) - .ConsumeValueOrDie(); - - ComputeAndCompareR3(&builder, expected, - {input_literal.get(), filter_literal.get()}, - error_spec_); -} - -template -class Convolve1D_1x2x5_1x2x2_WithRHSDilation : public ConvolutionTest { - public: - void RunTest() { - XlaBuilder builder(TestName()); - { - Shape input_shape = ShapeUtil::MakeShapeWithType({1, 2, 5}); - Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 2, 2}); - auto input = Parameter(&builder, 0, input_shape, "input"); - auto filter = Parameter(&builder, 1, filter_shape, "filter"); - // Convolution dimensions are bf0_oi0->bo0. - ConvGeneralDilated( - input, filter, /*window_strides=*/{1}, /*padding=*/{{0, 0}}, - /*lhs_dilation=*/{1}, /*rhs_dilation=*/{2}, - /*dimension_numbers=*/builder.CreateDefaultConvDimensionNumbers(1)); - } - - Array3D input( - {{{1.0f, 2.0f, 3.0f, 4.0f, 5.0f}, {6.0f, 7.0f, 8.0f, 9.0f, 10.0f}}}); - Array3D filter({{{10.0f, 20.0f}, {30.0f, 40.0f}}}); - - Array3D expected({{{570.0f, 670.0f, 770.0f}}}); - - auto input_literal = - client_->TransferToServer(LiteralUtil::CreateR3FromArray3D(input)) - .ConsumeValueOrDie(); - auto filter_literal = - client_->TransferToServer(LiteralUtil::CreateR3FromArray3D(filter)) - .ConsumeValueOrDie(); - - ComputeAndCompareR3(&builder, expected, - {input_literal.get(), filter_literal.get()}, - error_spec_); - } -}; // namespace - -TYPED_TEST_CASE(Convolve1D_1x2x5_1x2x2_WithRHSDilation, TestTypes); -TYPED_TEST(Convolve1D_1x2x5_1x2x2_WithRHSDilation, Types) { this->RunTest(); } - -XLA_TEST_F(ConvolutionTest, Convolve1D_1x2x5_1x2x2_WithLHSDilation) { - XlaBuilder builder(TestName()); - { - Shape input_shape = ShapeUtil::MakeShape(F32, {1, 2, 5}); - Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 2, 2}); - auto input = Parameter(&builder, 0, input_shape, "input"); - auto filter = Parameter(&builder, 1, filter_shape, "filter"); - // Convolution dimensions are bf0_oi0->bo0. - ConvGeneralDilated( - input, filter, /*window_strides=*/{1}, /*padding=*/{{0, 0}}, - /*lhs_dilation=*/{2}, /*rhs_dilation=*/{1}, - /*dimension_numbers=*/builder.CreateDefaultConvDimensionNumbers(1)); - } - - Array3D input({{{1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}}); - Array3D filter({{{10, 20}, {30, 40}}}); - - Array3D expected({{{190, 320, 230, 380, 270, 440, 310, 500}}}); - - auto input_literal = - client_->TransferToServer(LiteralUtil::CreateR3FromArray3D(input)) - .ConsumeValueOrDie(); - auto filter_literal = - client_->TransferToServer(LiteralUtil::CreateR3FromArray3D(filter)) - .ConsumeValueOrDie(); - - ComputeAndCompareR3(&builder, expected, - {input_literal.get(), filter_literal.get()}, - error_spec_); -} - -XLA_TEST_F(ConvolutionTest, Convolve1D_1x2x5_1x2x2_WithLHSAndRHSDilation) { - XlaBuilder builder(TestName()); - { - Shape input_shape = ShapeUtil::MakeShape(F32, {1, 2, 5}); - Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 2, 2}); - auto input = Parameter(&builder, 0, input_shape, "input"); - auto filter = Parameter(&builder, 1, filter_shape, "filter"); - // Convolution dimensions are bf0_oi0->bo0. - ConvGeneralDilated( - input, filter, /*window_strides=*/{1}, /*padding=*/{{0, 0}}, - /*lhs_dilation=*/{2}, /*rhs_dilation=*/{2}, - /*dimension_numbers=*/builder.CreateDefaultConvDimensionNumbers(1)); - } - - Array3D input({{{1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}}); - Array3D filter({{{10, 20}, {30, 40}}}); - - Array3D expected({{{510, 0, 610, 0, 710, 0, 810}}}); - - auto input_literal = - client_->TransferToServer(LiteralUtil::CreateR3FromArray3D(input)) - .ConsumeValueOrDie(); - auto filter_literal = - client_->TransferToServer(LiteralUtil::CreateR3FromArray3D(filter)) - .ConsumeValueOrDie(); - - ComputeAndCompareR3(&builder, expected, - {input_literal.get(), filter_literal.get()}, - error_spec_); -} - -template -class Convolve1D_1x2x5_1x2x2_WithPadding : public ConvolutionTest { - public: - void RunTest() { - XlaBuilder builder(TestName()); - { - Shape input_shape = ShapeUtil::MakeShapeWithType({1, 2, 5}); - Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 2, 2}); - auto input = Parameter(&builder, 0, input_shape, "input"); - auto filter = Parameter(&builder, 1, filter_shape, "filter"); - // Convolution dimensions are bf0_oi0->bo0. - ConvGeneralDilated( - input, filter, /*window_strides=*/{1}, /*padding=*/{{2, 2}}, - /*lhs_dilation=*/{1}, /*rhs_dilation=*/{1}, - /*dimension_numbers=*/builder.CreateDefaultConvDimensionNumbers(1)); - } - - Array3D input( - {{{1.0f, 2.0f, 3.0f, 4.0f, 5.0f}, {6.0f, 7.0f, 8.0f, 9.0f, 10.0f}}}); - Array3D filter({{{10.0f, 20.0f}, {30.0f, 40.0f}}}); - - Array3D expected( - {{{0.0f, 260.0f, 510.0f, 610.0f, 710.0f, 810.0f, 350.0f, 0.0f}}}); - - auto input_literal = - client_->TransferToServer(LiteralUtil::CreateR3FromArray3D(input)) - .ConsumeValueOrDie(); - auto filter_literal = - client_->TransferToServer(LiteralUtil::CreateR3FromArray3D(filter)) - .ConsumeValueOrDie(); - - ComputeAndCompareR3(&builder, expected, - {input_literal.get(), filter_literal.get()}, - error_spec_); - } -}; - -TYPED_TEST_CASE(Convolve1D_1x2x5_1x2x2_WithPadding, TestTypes); -TYPED_TEST(Convolve1D_1x2x5_1x2x2_WithPadding, Types) { this->RunTest(); } - XLA_TEST_F(ConvolutionTest, Convolve3D_1x4x2x3x3_2x2x2x3x3_Valid) { XlaBuilder builder(TestName()); std::vector input_dims = {1, 4, 2, 3, 3}; @@ -1714,150 +1546,7 @@ INSTANTIATE_TEST_CASE_P(ConvolveWithAndWithoutCanonicalization_Instantiation, ConvolveWithAndWithoutCanonicalization, ::testing::Values(true, false)); -struct Convolve1DTestParam { - int64 input_feature; - int64 output_feature; - int64 batch; - int64 window_size; - int64 num_windows; -}; -class Convolve1D1WindowTestBase - : public ConvolutionTest, - public ::testing::WithParamInterface { - protected: - template - void TestImpl() { - XlaBuilder builder(TestName()); - int64 input_feature = GetParam().input_feature; - int64 output_feature = GetParam().output_feature; - int64 batch = GetParam().batch; - int64 num_windows = GetParam().num_windows; - int64 window_size = GetParam().window_size; - std::vector input_dims = {batch, window_size + num_windows - 1, - input_feature}; - std::vector filter_dims = {window_size, input_feature, - output_feature}; - Shape input_shape = ShapeUtil::MakeShapeWithType(input_dims); - Shape filter_shape = ShapeUtil::MakeShapeWithType(filter_dims); - { - auto input = Parameter(&builder, 0, input_shape, "input"); - auto filter = Parameter(&builder, 1, filter_shape, "filter"); - - // Tensorflow dimension numbers for 1D convolution. - ConvolutionDimensionNumbers dnums; - dnums.set_input_batch_dimension(0); - dnums.set_output_batch_dimension(0); - dnums.add_input_spatial_dimensions(1); - dnums.add_output_spatial_dimensions(1); - dnums.set_input_feature_dimension(2); - dnums.set_output_feature_dimension(2); - dnums.add_kernel_spatial_dimensions(0); - dnums.set_kernel_input_feature_dimension(1); - dnums.set_kernel_output_feature_dimension(2); - - ConvWithGeneralDimensions(input, filter, {1}, Padding::kValid, dnums); - } - - std::vector input_elems(ShapeUtil::ElementsIn(input_shape), - static_cast(1.0f)); - auto input_r1 = LiteralUtil::CreateR1(input_elems); - auto input_r3 = input_r1.Reshape(input_dims).ConsumeValueOrDie(); - - std::vector filter_elems(ShapeUtil::ElementsIn(filter_shape), - static_cast(1.0f)); - - auto filter_r1 = LiteralUtil::CreateR1(filter_elems); - auto filter_r3 = filter_r1.Reshape(filter_dims).ConsumeValueOrDie(); - - std::vector expect_elems(batch * output_feature * num_windows, - static_cast(window_size * input_feature)); - auto expected_r1 = LiteralUtil::CreateR1(expect_elems); - auto expected_r3 = expected_r1.Reshape({batch, num_windows, output_feature}) - .ConsumeValueOrDie(); - - auto input_literal = - client_->TransferToServer(input_r3).ConsumeValueOrDie(); - auto filter_literal = - client_->TransferToServer(filter_r3).ConsumeValueOrDie(); - ComputeAndCompareLiteral(&builder, expected_r3, - {input_literal.get(), filter_literal.get()}, - error_spec_); - } -}; - -class Convolve1D1WindowTestFloat : public Convolve1D1WindowTestBase {}; - -XLA_TEST_P(Convolve1D1WindowTestFloat, Convolve1D1Window) { TestImpl(); } - -INSTANTIATE_TEST_CASE_P( - Convolve1D1WindowTest_Instantiation, Convolve1D1WindowTestFloat, - ::testing::Values(Convolve1DTestParam{1, 1, 1, 1, 2}, - Convolve1DTestParam{160, 1, 1, 5, 1}, - Convolve1DTestParam{24, 1, 1, 20, 1}, - Convolve1DTestParam{30, 1, 1, 20, 1}, - Convolve1DTestParam{23, 1, 1, 20, 20}, - Convolve1DTestParam{25, 1, 1, 20, 1}, - Convolve1DTestParam{24, 1, 1, 10, 5}, - Convolve1DTestParam{160, 1, 1, 10, 1}, - Convolve1DTestParam{255, 1, 1, 3, 1}, - Convolve1DTestParam{130, 1, 1, 1, 2}, - Convolve1DTestParam{136, 1, 1, 1, 2}, - Convolve1DTestParam{64, 1, 1, 1, 1}, - Convolve1DTestParam{128, 1, 1, 1, 1}, - Convolve1DTestParam{139, 1, 1, 128, 1}, - Convolve1DTestParam{1, 10, 10, 1, 10}, - Convolve1DTestParam{1, 10, 130, 1, 2}, - Convolve1DTestParam{1, 10, 130, 1, 1}, - Convolve1DTestParam{1, 64, 64, 1, 10}, - Convolve1DTestParam{1, 65, 65, 1, 1}, - Convolve1DTestParam{1, 128, 128, 1, 1}, - Convolve1DTestParam{128, 128, 128, 128, 1}, - Convolve1DTestParam{1, 128, 128, 1, 1}, - Convolve1DTestParam{2, 2, 2, 2, 1}, - Convolve1DTestParam{161, 1, 1, 10, 1}, - Convolve1DTestParam{900, 1, 1, 10, 1}, - Convolve1DTestParam{640, 3, 3, 128, 1}) - -); - -#if (XLA_TEST_BACKEND_GPU || XLA_TEST_BACKEND_CPU) -class Convolve1D1WindowTestHalf : public Convolve1D1WindowTestBase {}; - -XLA_TEST_P(Convolve1D1WindowTestHalf, Convolve1D1Window) { - TestImpl(); -} - -INSTANTIATE_TEST_CASE_P( - Convolve1D1WindowTest_Instantiation, Convolve1D1WindowTestHalf, - ::testing::Values(Convolve1DTestParam{1, 1, 1, 1, 2}, - Convolve1DTestParam{160, 1, 1, 5, 1}, - Convolve1DTestParam{24, 1, 1, 20, 1}, - Convolve1DTestParam{30, 1, 1, 20, 1}, - Convolve1DTestParam{23, 1, 1, 20, 20}, - Convolve1DTestParam{25, 1, 1, 20, 1}, - Convolve1DTestParam{24, 1, 1, 10, 5}, - Convolve1DTestParam{160, 1, 1, 10, 1}, - Convolve1DTestParam{255, 1, 1, 3, 1}, - Convolve1DTestParam{130, 1, 1, 1, 3}, - Convolve1DTestParam{64, 1, 1, 1, 1}, - Convolve1DTestParam{128, 1, 1, 1, 1}, - Convolve1DTestParam{139, 1, 1, 128, 1}, - Convolve1DTestParam{640, 3, 3, 128, 1}, - Convolve1DTestParam{900, 1, 1, 10, 1}, - Convolve1DTestParam{1, 10, 10, 1, 10}, - Convolve1DTestParam{1, 10, 130, 1, 1}, - Convolve1DTestParam{1, 10, 130, 1, 2}, - Convolve1DTestParam{1, 64, 64, 1, 10}, - Convolve1DTestParam{1, 65, 65, 1, 1}, - Convolve1DTestParam{1, 128, 128, 1, 1}, - Convolve1DTestParam{128, 128, 128, 128, 1}, - Convolve1DTestParam{1, 128, 128, 1, 1}, - Convolve1DTestParam{2, 2, 2, 2, 1}, - Convolve1DTestParam{161, 1, 1, 10, 1}) - -); -#endif XLA_TEST_F(ConvolutionTest, Convolve_bf16_1x1x1x2_1x1x1x2_Valid) { XlaBuilder builder(TestName()); diff --git a/tensorflow/compiler/xla/tests/convolution_test_1d.cc b/tensorflow/compiler/xla/tests/convolution_test_1d.cc new file mode 100644 index 00000000000..2b2bf098145 --- /dev/null +++ b/tensorflow/compiler/xla/tests/convolution_test_1d.cc @@ -0,0 +1,376 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Tests of 1D convolution with trivial kernels and no special variations (like +// strides and padding). + +#include + +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/array4d.h" +#include "tensorflow/compiler/xla/client/global_data.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/client/padding.h" +#include "tensorflow/compiler/xla/client/xla_builder.h" +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/literal.h" +#include "tensorflow/compiler/xla/reference_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/tests/client_library_test_base.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +class ConvolutionTest : public ClientLibraryTestBase { + protected: +#if XLA_TEST_BACKEND_GPU + // XLA:GPU sometimes uses FFT convolution which isn't as precise as spatial + // convolution. So relax the absolute error threshold. + ErrorSpec error_spec_ = ErrorSpec(1e-2, 1e-3); +#else + ErrorSpec error_spec_ = ErrorSpec(1e-4, 1e-3); +#endif +}; + +#ifdef XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16 +using TestTypes = ::testing::Types; +#else +using TestTypes = ::testing::Types; +#endif + +struct Convolve1DTestParam { + int64 input_feature; + int64 output_feature; + int64 batch; + int64 window_size; + int64 num_windows; +}; + +class Convolve1D1WindowTestBase + : public ConvolutionTest, + public ::testing::WithParamInterface { + protected: + template + void TestImpl() { + XlaBuilder builder(TestName()); + int64 input_feature = GetParam().input_feature; + int64 output_feature = GetParam().output_feature; + int64 batch = GetParam().batch; + int64 num_windows = GetParam().num_windows; + int64 window_size = GetParam().window_size; + std::vector input_dims = {batch, window_size + num_windows - 1, + input_feature}; + std::vector filter_dims = {window_size, input_feature, + output_feature}; + Shape input_shape = ShapeUtil::MakeShapeWithType(input_dims); + Shape filter_shape = ShapeUtil::MakeShapeWithType(filter_dims); + { + auto input = Parameter(&builder, 0, input_shape, "input"); + auto filter = Parameter(&builder, 1, filter_shape, "filter"); + + // Tensorflow dimension numbers for 1D convolution. + ConvolutionDimensionNumbers dnums; + dnums.set_input_batch_dimension(0); + dnums.set_output_batch_dimension(0); + dnums.add_input_spatial_dimensions(1); + dnums.add_output_spatial_dimensions(1); + dnums.set_input_feature_dimension(2); + dnums.set_output_feature_dimension(2); + dnums.add_kernel_spatial_dimensions(0); + dnums.set_kernel_input_feature_dimension(1); + dnums.set_kernel_output_feature_dimension(2); + + ConvWithGeneralDimensions(input, filter, {1}, Padding::kValid, dnums); + } + + std::vector input_elems(ShapeUtil::ElementsIn(input_shape), + static_cast(1.0f)); + auto input_r1 = LiteralUtil::CreateR1(input_elems); + auto input_r3 = input_r1.Reshape(input_dims).ConsumeValueOrDie(); + + std::vector filter_elems(ShapeUtil::ElementsIn(filter_shape), + static_cast(1.0f)); + + auto filter_r1 = LiteralUtil::CreateR1(filter_elems); + auto filter_r3 = filter_r1.Reshape(filter_dims).ConsumeValueOrDie(); + + std::vector expect_elems(batch * output_feature * num_windows, + static_cast(window_size * input_feature)); + auto expected_r1 = LiteralUtil::CreateR1(expect_elems); + auto expected_r3 = expected_r1.Reshape({batch, num_windows, output_feature}) + .ConsumeValueOrDie(); + + auto input_literal = + client_->TransferToServer(input_r3).ConsumeValueOrDie(); + auto filter_literal = + client_->TransferToServer(filter_r3).ConsumeValueOrDie(); + ComputeAndCompareLiteral(&builder, expected_r3, + {input_literal.get(), filter_literal.get()}, + error_spec_); + } +}; + +class Convolve1D1WindowTestFloat : public Convolve1D1WindowTestBase {}; + +XLA_TEST_P(Convolve1D1WindowTestFloat, Convolve1D1Window) { TestImpl(); } + +INSTANTIATE_TEST_CASE_P( + Convolve1D1WindowTest_Instantiation, Convolve1D1WindowTestFloat, + ::testing::Values(Convolve1DTestParam{1, 1, 1, 1, 2}, + Convolve1DTestParam{160, 1, 1, 5, 1}, + Convolve1DTestParam{24, 1, 1, 20, 1}, + Convolve1DTestParam{30, 1, 1, 20, 1}, + Convolve1DTestParam{23, 1, 1, 20, 20}, + Convolve1DTestParam{25, 1, 1, 20, 1}, + Convolve1DTestParam{24, 1, 1, 10, 5}, + Convolve1DTestParam{160, 1, 1, 10, 1}, + Convolve1DTestParam{255, 1, 1, 3, 1}, + Convolve1DTestParam{130, 1, 1, 1, 2}, + Convolve1DTestParam{136, 1, 1, 1, 2}, + Convolve1DTestParam{64, 1, 1, 1, 1}, + Convolve1DTestParam{128, 1, 1, 1, 1}, + Convolve1DTestParam{139, 1, 1, 128, 1}, + Convolve1DTestParam{1, 10, 10, 1, 10}, + Convolve1DTestParam{1, 10, 130, 1, 2}, + Convolve1DTestParam{1, 10, 130, 1, 1}, + Convolve1DTestParam{1, 64, 64, 1, 10}, + Convolve1DTestParam{1, 65, 65, 1, 1}, + Convolve1DTestParam{1, 128, 128, 1, 1}, + Convolve1DTestParam{128, 128, 128, 128, 1}, + Convolve1DTestParam{1, 128, 128, 1, 1}, + Convolve1DTestParam{2, 2, 2, 2, 1}, + Convolve1DTestParam{161, 1, 1, 10, 1}, + Convolve1DTestParam{900, 1, 1, 10, 1}, + Convolve1DTestParam{640, 3, 3, 128, 1}) + +); + +#if (XLA_TEST_BACKEND_GPU || XLA_TEST_BACKEND_CPU) +class Convolve1D1WindowTestHalf : public Convolve1D1WindowTestBase {}; + +XLA_TEST_P(Convolve1D1WindowTestHalf, Convolve1D1Window) { + TestImpl(); +} + +INSTANTIATE_TEST_CASE_P( + Convolve1D1WindowTest_Instantiation, Convolve1D1WindowTestHalf, + ::testing::Values(Convolve1DTestParam{1, 1, 1, 1, 2}, + Convolve1DTestParam{160, 1, 1, 5, 1}, + Convolve1DTestParam{24, 1, 1, 20, 1}, + Convolve1DTestParam{30, 1, 1, 20, 1}, + Convolve1DTestParam{23, 1, 1, 20, 20}, + Convolve1DTestParam{25, 1, 1, 20, 1}, + Convolve1DTestParam{24, 1, 1, 10, 5}, + Convolve1DTestParam{160, 1, 1, 10, 1}, + Convolve1DTestParam{255, 1, 1, 3, 1}, + Convolve1DTestParam{130, 1, 1, 1, 3}, + Convolve1DTestParam{64, 1, 1, 1, 1}, + Convolve1DTestParam{128, 1, 1, 1, 1}, + Convolve1DTestParam{139, 1, 1, 128, 1}, + Convolve1DTestParam{640, 3, 3, 128, 1}, + Convolve1DTestParam{900, 1, 1, 10, 1}, + Convolve1DTestParam{1, 10, 10, 1, 10}, + Convolve1DTestParam{1, 10, 130, 1, 1}, + Convolve1DTestParam{1, 10, 130, 1, 2}, + Convolve1DTestParam{1, 64, 64, 1, 10}, + Convolve1DTestParam{1, 65, 65, 1, 1}, + Convolve1DTestParam{1, 128, 128, 1, 1}, + Convolve1DTestParam{128, 128, 128, 128, 1}, + Convolve1DTestParam{1, 128, 128, 1, 1}, + Convolve1DTestParam{2, 2, 2, 2, 1}, + Convolve1DTestParam{161, 1, 1, 10, 1}) + +); +#endif + +XLA_TEST_F(ConvolutionTest, Convolve1D_1x2x5_1x2x2_Valid) { + XlaBuilder builder(TestName()); + { + Shape input_shape = ShapeUtil::MakeShape(F32, {1, 2, 5}); + Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 2, 2}); + auto input = Parameter(&builder, 0, input_shape, "input"); + auto filter = Parameter(&builder, 1, filter_shape, "filter"); + Conv(input, filter, {1}, Padding::kValid); + } + + Array3D input({{{1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}}); + Array3D filter({{{10, 20}, {30, 40}}}); + + Array3D expected({{{510, 610, 710, 810}}}); + + auto input_literal = + client_->TransferToServer(LiteralUtil::CreateR3FromArray3D(input)) + .ConsumeValueOrDie(); + auto filter_literal = + client_->TransferToServer(LiteralUtil::CreateR3FromArray3D(filter)) + .ConsumeValueOrDie(); + + ComputeAndCompareR3(&builder, expected, + {input_literal.get(), filter_literal.get()}, + error_spec_); +} + +template +class Convolve1D_1x2x5_1x2x2_WithRHSDilation : public ConvolutionTest { + public: + void RunTest() { + XlaBuilder builder(TestName()); + { + Shape input_shape = ShapeUtil::MakeShapeWithType({1, 2, 5}); + Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 2, 2}); + auto input = Parameter(&builder, 0, input_shape, "input"); + auto filter = Parameter(&builder, 1, filter_shape, "filter"); + // Convolution dimensions are bf0_oi0->bo0. + ConvGeneralDilated( + input, filter, /*window_strides=*/{1}, /*padding=*/{{0, 0}}, + /*lhs_dilation=*/{1}, /*rhs_dilation=*/{2}, + /*dimension_numbers=*/builder.CreateDefaultConvDimensionNumbers(1)); + } + + Array3D input( + {{{1.0f, 2.0f, 3.0f, 4.0f, 5.0f}, {6.0f, 7.0f, 8.0f, 9.0f, 10.0f}}}); + Array3D filter({{{10.0f, 20.0f}, {30.0f, 40.0f}}}); + + Array3D expected({{{570.0f, 670.0f, 770.0f}}}); + + auto input_literal = + client_->TransferToServer(LiteralUtil::CreateR3FromArray3D(input)) + .ConsumeValueOrDie(); + auto filter_literal = + client_->TransferToServer(LiteralUtil::CreateR3FromArray3D(filter)) + .ConsumeValueOrDie(); + + ComputeAndCompareR3(&builder, expected, + {input_literal.get(), filter_literal.get()}, + error_spec_); + } +}; // namespace + +TYPED_TEST_CASE(Convolve1D_1x2x5_1x2x2_WithRHSDilation, TestTypes); +TYPED_TEST(Convolve1D_1x2x5_1x2x2_WithRHSDilation, Types) { this->RunTest(); } + +XLA_TEST_F(ConvolutionTest, Convolve1D_1x2x5_1x2x2_WithLHSDilation) { + XlaBuilder builder(TestName()); + { + Shape input_shape = ShapeUtil::MakeShape(F32, {1, 2, 5}); + Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 2, 2}); + auto input = Parameter(&builder, 0, input_shape, "input"); + auto filter = Parameter(&builder, 1, filter_shape, "filter"); + // Convolution dimensions are bf0_oi0->bo0. + ConvGeneralDilated( + input, filter, /*window_strides=*/{1}, /*padding=*/{{0, 0}}, + /*lhs_dilation=*/{2}, /*rhs_dilation=*/{1}, + /*dimension_numbers=*/builder.CreateDefaultConvDimensionNumbers(1)); + } + + Array3D input({{{1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}}); + Array3D filter({{{10, 20}, {30, 40}}}); + + Array3D expected({{{190, 320, 230, 380, 270, 440, 310, 500}}}); + + auto input_literal = + client_->TransferToServer(LiteralUtil::CreateR3FromArray3D(input)) + .ConsumeValueOrDie(); + auto filter_literal = + client_->TransferToServer(LiteralUtil::CreateR3FromArray3D(filter)) + .ConsumeValueOrDie(); + + ComputeAndCompareR3(&builder, expected, + {input_literal.get(), filter_literal.get()}, + error_spec_); +} + +XLA_TEST_F(ConvolutionTest, Convolve1D_1x2x5_1x2x2_WithLHSAndRHSDilation) { + XlaBuilder builder(TestName()); + { + Shape input_shape = ShapeUtil::MakeShape(F32, {1, 2, 5}); + Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 2, 2}); + auto input = Parameter(&builder, 0, input_shape, "input"); + auto filter = Parameter(&builder, 1, filter_shape, "filter"); + // Convolution dimensions are bf0_oi0->bo0. + ConvGeneralDilated( + input, filter, /*window_strides=*/{1}, /*padding=*/{{0, 0}}, + /*lhs_dilation=*/{2}, /*rhs_dilation=*/{2}, + /*dimension_numbers=*/builder.CreateDefaultConvDimensionNumbers(1)); + } + + Array3D input({{{1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}}); + Array3D filter({{{10, 20}, {30, 40}}}); + + Array3D expected({{{510, 0, 610, 0, 710, 0, 810}}}); + + auto input_literal = + client_->TransferToServer(LiteralUtil::CreateR3FromArray3D(input)) + .ConsumeValueOrDie(); + auto filter_literal = + client_->TransferToServer(LiteralUtil::CreateR3FromArray3D(filter)) + .ConsumeValueOrDie(); + + ComputeAndCompareR3(&builder, expected, + {input_literal.get(), filter_literal.get()}, + error_spec_); +} + +template +class Convolve1D_1x2x5_1x2x2_WithPadding : public ConvolutionTest { + public: + void RunTest() { + XlaBuilder builder(TestName()); + { + Shape input_shape = ShapeUtil::MakeShapeWithType({1, 2, 5}); + Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 2, 2}); + auto input = Parameter(&builder, 0, input_shape, "input"); + auto filter = Parameter(&builder, 1, filter_shape, "filter"); + // Convolution dimensions are bf0_oi0->bo0. + ConvGeneralDilated( + input, filter, /*window_strides=*/{1}, /*padding=*/{{2, 2}}, + /*lhs_dilation=*/{1}, /*rhs_dilation=*/{1}, + /*dimension_numbers=*/builder.CreateDefaultConvDimensionNumbers(1)); + } + + Array3D input( + {{{1.0f, 2.0f, 3.0f, 4.0f, 5.0f}, {6.0f, 7.0f, 8.0f, 9.0f, 10.0f}}}); + Array3D filter({{{10.0f, 20.0f}, {30.0f, 40.0f}}}); + + Array3D expected( + {{{0.0f, 260.0f, 510.0f, 610.0f, 710.0f, 810.0f, 350.0f, 0.0f}}}); + + auto input_literal = + client_->TransferToServer(LiteralUtil::CreateR3FromArray3D(input)) + .ConsumeValueOrDie(); + auto filter_literal = + client_->TransferToServer(LiteralUtil::CreateR3FromArray3D(filter)) + .ConsumeValueOrDie(); + + ComputeAndCompareR3(&builder, expected, + {input_literal.get(), filter_literal.get()}, + error_spec_); + } +}; + +TYPED_TEST_CASE(Convolve1D_1x2x5_1x2x2_WithPadding, TestTypes); +TYPED_TEST(Convolve1D_1x2x5_1x2x2_WithPadding, Types) { this->RunTest(); } + +} // namespace +} // namespace xla From 35e20cf53063741304cbcdb3755d5fbe43bea953 Mon Sep 17 00:00:00 2001 From: Niranjan Hasabnis Date: Thu, 16 Jul 2020 13:50:26 -0700 Subject: [PATCH 0629/2522] [Intel MKL] Fixing bfloat16 build failure in MklRelu --- tensorflow/core/kernels/mkl_relu_op.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 5d52742d558..126ca4cc836 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -19,7 +19,6 @@ limitations under the License. #include #include "mkldnn.hpp" -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" @@ -27,6 +26,7 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/util/mkl_types.h" #include "tensorflow/core/util/mkl_util.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" using mkldnn::algorithm; using mkldnn::eltwise_forward; @@ -924,7 +924,7 @@ class MklEluOp : public MklReluOpBase { // return exp(feature) - 1 if feature > 0; feature otherwise T feature = (static_cast(user_i))[0]; if (feature < static_cast(0)) - (static_cast(out_o))[0] = std::exp(feature); + (static_cast(out_o))[0] = Eigen::numext::exp(feature); else (static_cast(out_o))[0] = feature; return; @@ -966,7 +966,7 @@ class MklEluGradOp if (feature > static_cast(0)) { (static_cast(out_o))[0] = (static_cast(user_g))[0]; } else { - T elu = std::exp(feature) - static_cast(1); + T elu = Eigen::numext::exp(feature) - static_cast(1); (static_cast(out_o))[0] = (static_cast(user_g))[0] * (elu + static_cast(1)); } @@ -1004,8 +1004,8 @@ class MklTanhOp : public MklReluOpBase { void* out_o = static_cast(dst_tensor->flat().data()); // tanh(x) = (e^x - e^(-x))/ (e^x + e^(-x)) T feature = (static_cast(user_i))[0]; - T e1 = std::exp(feature); - T e2 = std::exp(-feature); + T e1 = Eigen::numext::exp(feature); + T e2 = Eigen::numext::exp(-feature); (static_cast(out_o))[0] = (e1 - e2) / (e1 + e2); return; } From 02f45635e5f3926d8188ef2c6334497936d29d7b Mon Sep 17 00:00:00 2001 From: Sai Ganesh Bandiatmakuri Date: Thu, 16 Jul 2020 13:44:11 -0700 Subject: [PATCH 0630/2522] Add a sanity check on the benchmark runner to surface user errors in setting the benchmarks regex. PiperOrigin-RevId: 321636971 Change-Id: Ib556e4a552903f2a64bb87a55475f3968bea8e32 --- tensorflow/python/kernel_tests/benchmark_test.py | 3 ++- tensorflow/python/platform/benchmark.py | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/benchmark_test.py b/tensorflow/python/kernel_tests/benchmark_test.py index f4548baddaa..3e64f9d5c15 100644 --- a/tensorflow/python/kernel_tests/benchmark_test.py +++ b/tensorflow/python/kernel_tests/benchmark_test.py @@ -96,7 +96,8 @@ class BenchmarkTest(test.TestCase): self.assertFalse(_ran_somebenchmark_but_shouldnt[0]) # Run other benchmarks, but this wont run the one we care about - benchmark._run_benchmarks("unrelated") + with self.assertRaises(ValueError): + benchmark._run_benchmarks("unrelated") # Validate that SomeBenchmark has not run yet self.assertFalse(_ran_somebenchmark_1[0]) diff --git a/tensorflow/python/platform/benchmark.py b/tensorflow/python/platform/benchmark.py index dcfa4d1ef1a..0f328b2df5f 100644 --- a/tensorflow/python/platform/benchmark.py +++ b/tensorflow/python/platform/benchmark.py @@ -430,9 +430,13 @@ def _run_benchmarks(regex): Args: regex: The string regular expression to match Benchmark classes against. + + Raises: + ValueError: If no benchmarks were selected by the input regex. """ registry = list(GLOBAL_BENCHMARK_REGISTRY) + selected_benchmarks = [] # Match benchmarks in registry against regex for benchmark in registry: benchmark_name = "%s.%s" % (benchmark.__module__, benchmark.__name__) @@ -448,6 +452,7 @@ def _run_benchmarks(regex): continue full_benchmark_name = "%s.%s" % (benchmark_name, attr) if regex == "all" or re.search(regex, full_benchmark_name): + selected_benchmarks.append(full_benchmark_name) # Instantiate the class if it hasn't been instantiated benchmark_instance = benchmark_instance or benchmark() # Get the method tied to the class @@ -455,6 +460,9 @@ def _run_benchmarks(regex): # Call the instance method instance_benchmark_fn() + if not selected_benchmarks: + raise ValueError("No benchmarks matched the pattern: '{}'".format(regex)) + def benchmarks_main(true_main, argv=None): """Run benchmarks as declared in argv. From 842f5e7b44079725fbf2f15b400e5ef32f16ee24 Mon Sep 17 00:00:00 2001 From: Jiho Choi Date: Thu, 16 Jul 2020 13:52:58 -0700 Subject: [PATCH 0631/2522] Update the step marker format. PiperOrigin-RevId: 321638750 Change-Id: I5af55020da7790ee6406c8ff52b0a61febf3b8d3 --- tensorflow/python/keras/engine/training.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index 986d3a05887..60b31e1ee21 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -1090,11 +1090,11 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): with data_handler.catch_stop_iteration(): for step in data_handler.steps(): with trace.Trace( - 'TraceContext', - graph_type='train', + 'train', epoch_num=epoch, step_num=step, - batch_size=batch_size): + batch_size=batch_size, + _r=1): callbacks.on_train_batch_begin(step) tmp_logs = train_function(iterator) if data_handler.should_sync: @@ -1378,7 +1378,7 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): self.reset_metrics() with data_handler.catch_stop_iteration(): for step in data_handler.steps(): - with trace.Trace('TraceContext', graph_type='test', step_num=step): + with trace.Trace('test', step_num=step, _r=1): callbacks.on_test_batch_begin(step) tmp_logs = test_function(iterator) if data_handler.should_sync: From ba134e5995a38456718ccdcdc3df07a7d8be7d27 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Thu, 16 Jul 2020 13:54:22 -0700 Subject: [PATCH 0632/2522] Raise an error when some but not all values passed to the first layer call arg are symbolic. This setting can cause functional models to be constructed incorrectly. Support for this will be added when we enable the KerasTensors refactoring. Addreses GitHub Issue #40638 PiperOrigin-RevId: 321639068 Change-Id: Iebf0e1198018fe44b1f60673bd991a9262ecef7d --- tensorflow/python/keras/engine/base_layer.py | 14 +++- .../python/keras/engine/functional_test.py | 70 ++++++++++++++++++- 2 files changed, 82 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py index 7e21cba15c0..3ea9c537b74 100644 --- a/tensorflow/python/keras/engine/base_layer.py +++ b/tensorflow/python/keras/engine/base_layer.py @@ -3223,7 +3223,19 @@ def _in_functional_construction_mode(inputs, args, kwargs, input_list): # pylin for tensor in nest.flatten([inputs, args, kwargs])) else: if context.executing_eagerly(): - return all(tf_utils.is_symbolic_tensor(t) for t in input_list) + all_inputs_symbolic = all( + tf_utils.is_symbolic_tensor(t) for t in input_list) + if (any(tf_utils.is_symbolic_tensor(t) for t in nest.flatten( + [inputs, args, kwargs])) and not all_inputs_symbolic): + raise ValueError('It appears you are trying to construct a ' + 'functional model, but not all of the inputs in ' + 'the first positional argument of your layer call ' + 'are symbolic tensors. ' + '(Input objects, or the output of another layer) ' + 'Functional models cannot correctly track layers ' + 'unless all values in the first call argument ' + 'are symbolic.') + return all_inputs_symbolic else: return (base_layer_utils.is_in_keras_graph() or all(hasattr(t, '_keras_history') for t in input_list)) diff --git a/tensorflow/python/keras/engine/functional_test.py b/tensorflow/python/keras/engine/functional_test.py index 47e4dc488a3..f8a0c4103c5 100644 --- a/tensorflow/python/keras/engine/functional_test.py +++ b/tensorflow/python/keras/engine/functional_test.py @@ -932,6 +932,72 @@ class NetworkConstructionTest(keras_parameterized.TestCase): # Check that second input was correctly added to first. self.assertEqual(history.history['loss'][0], 0.0) + @combinations.generate(combinations.times( + combinations.keras_mode_combinations(mode='eager'), + combinations.combine(use_keras_tensors=False))) + def test_only_some_in_first_arg_derived_from_keras_layer(self): + class MyAddAll(layers.Layer): + + def call(self, inputs): + x = inputs[0] + for inp in inputs[1:]: + if inp is not None: + x = x + inp + return x + + input1 = input_layer_lib.Input(10) + input2 = input_layer_lib.Input(10) + layer = MyAddAll() + + with self.assertRaisesRegexp(ValueError, 'construct a functional'): + layer([0.0, input1, None, input2, None]) + + @combinations.generate(combinations.times( + combinations.keras_mode_combinations(mode='eager'), + combinations.combine(use_keras_tensors=True))) + def test_only_some_in_first_arg_derived_from_keras_layer_keras_tensors(self): + # This functionality is unsupported in v1 graphs + + class MyAddAll(layers.Layer): + + def call(self, inputs): + x = inputs[0] + for inp in inputs[1:]: + if inp is not None: + x = x + inp + return x + + input1 = input_layer_lib.Input(10) + input2 = input_layer_lib.Input(10) + layer = MyAddAll() + outputs = layer([0.0, input1, None, input2, None]) + model = training_lib.Model([input1, input2], outputs) + self.assertIn(layer, model.layers) + model.compile( + 'sgd', + 'mse', + run_eagerly=testing_utils.should_run_eagerly()) + history = model.fit( + x=[3 * np.ones((10, 10)), 7 * np.ones((10, 10))], + y=10 * np.ones((10, 10)), + batch_size=2) + # Check that second input was correctly added to first. + self.assertEqual(history.history['loss'][0], 0.0) + + # Check serialization. + model = training_lib.Model.from_config( + model.get_config(), custom_objects={'MyAddAll': MyAddAll}) + model.compile( + 'sgd', + 'mse', + run_eagerly=testing_utils.should_run_eagerly()) + history = model.fit( + x=[3 * np.ones((10, 10)), 7 * np.ones((10, 10))], + y=10 * np.ones((10, 10)), + batch_size=2) + # Check that second input was correctly added to first. + self.assertEqual(history.history['loss'][0], 0.0) + @combinations.generate(combinations.keras_mode_combinations()) def test_call_kwarg_derived_from_keras_layer(self): @@ -1141,7 +1207,8 @@ class NetworkConstructionTest(keras_parameterized.TestCase): input2 = input_layer_lib.Input(10) input3 = input_layer_lib.Input(10) - outputs = AddAll()( + layer = AddAll() + outputs = layer( [input1, 4 * array_ops.ones((1, 10))], x3={ 'a': input2, @@ -1149,6 +1216,7 @@ class NetworkConstructionTest(keras_parameterized.TestCase): 'c': 5 * array_ops.ones((1, 10)) }) model = training_lib.Model([input1, input2, input3], outputs) + self.assertIn(layer, model.layers) model.compile( 'sgd', 'mse', From 6b936b21d4f319c46951fc61f3dfae56212e8af2 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Thu, 16 Jul 2020 21:16:21 +0000 Subject: [PATCH 0633/2522] added summary_op dependency to C API build and Python BUILD --- tensorflow/core/BUILD | 2 ++ tensorflow/python/BUILD | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index d1909ea1bac..6da0208c653 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -892,6 +892,7 @@ cc_library( ":user_ops_op_lib", ":word2vec_ops", "//tensorflow/c/kernels:bitcast_op_lib", + "//tensorflow/c/kernels:summary_op_lib", "//tensorflow/compiler/mlir/tensorflow:mlir_passthrough_op", ] + if_chromiumos( [], @@ -998,6 +999,7 @@ cc_library( name = "all_kernels_impl", visibility = [":__subpackages__"], deps = [ + "//tensorflow/c/kernels:summary_op", "//tensorflow/c/kernels:bitcast_op", "//tensorflow/core/kernels:array", "//tensorflow/core/kernels:audio", diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 3901e6a1408..e76a0c60d7a 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -2908,6 +2908,10 @@ tf_gen_op_wrapper_private_py( "//learning/brain/python/ops:__pkg__", "//tensorflow/python/kernel_tests:__pkg__", ], + deps = [ + "//tensorflow/c/kernels:summary_op_lib", + "//tensorflow/core:logging_ops_op_lib", + ], ) tf_gen_op_wrapper_private_py( From 44ce8c4417e4521fc14f5de23ef8a92265a364b3 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Thu, 16 Jul 2020 14:23:00 -0700 Subject: [PATCH 0634/2522] Introduce public version of TpuTopology for chip/core location identification PiperOrigin-RevId: 321644994 Change-Id: I5def78e34c127c1a16ee64da13e83590376b1622 --- tensorflow/core/tpu/tpu_library_init_fns.inc | 14 ++++ tensorflow/stream_executor/tpu/BUILD | 11 +++ .../stream_executor/tpu/tpu_executor_c_api.h | 32 +++++++++ .../stream_executor/tpu/tpu_platform.cc | 4 ++ tensorflow/stream_executor/tpu/tpu_platform.h | 2 + .../tpu/tpu_platform_interface.h | 4 ++ .../stream_executor/tpu/tpu_topology.cc | 69 +++++++++++++++++++ tensorflow/stream_executor/tpu/tpu_topology.h | 66 ++++++++++++++++++ 8 files changed, 202 insertions(+) create mode 100644 tensorflow/stream_executor/tpu/tpu_topology.cc create mode 100644 tensorflow/stream_executor/tpu/tpu_topology.h diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index 4b0cbada649..6737ae42570 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -67,6 +67,7 @@ tensorflow::Status SetExecutorStructFn(void* library_handle) { TFTPU_SET_FN(executor_fn, TpuPlatform_VisibleDeviceCount); TFTPU_SET_FN(executor_fn, TpuPlatform_TpuMemoryLimit); TFTPU_SET_FN(executor_fn, TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopy); + TFTPU_SET_FN(executor_fn, TpuPlatform_GetTopologyPtr); TFTPU_SET_FN(executor_fn, TpuExecutor_Init); TFTPU_SET_FN(executor_fn, TpuExecutor_Free); @@ -150,6 +151,19 @@ tensorflow::Status SetExecutorStructFn(void* library_handle) { TFTPU_SET_FN(executor_fn, TpuComputationPlacer_New); TFTPU_SET_FN(executor_fn, TpuComputationPlacer_Free); + TFTPU_SET_FN(executor_fn, TpuTopology_LogicalDevicesPerHost); + TFTPU_SET_FN(executor_fn, TpuTopology_LogicalDevicesPerChip); + TFTPU_SET_FN(executor_fn, TpuTopology_ChipBounds_X); + TFTPU_SET_FN(executor_fn, TpuTopology_ChipBounds_Y); + TFTPU_SET_FN(executor_fn, TpuTopology_ChipBounds_Z); + TFTPU_SET_FN(executor_fn, TpuTopology_HasChip); + TFTPU_SET_FN(executor_fn, TpuTopology_Core); + TFTPU_SET_FN(executor_fn, TpuCoreLocation_ChipCoordinates_X); + TFTPU_SET_FN(executor_fn, TpuCoreLocation_ChipCoordinates_Y); + TFTPU_SET_FN(executor_fn, TpuCoreLocation_ChipCoordinates_Z); + TFTPU_SET_FN(executor_fn, TpuCoreLocation_Index); + TFTPU_SET_FN(executor_fn, TpuCoreLocation_Id); + return tensorflow::Status::OK(); } diff --git a/tensorflow/stream_executor/tpu/BUILD b/tensorflow/stream_executor/tpu/BUILD index 17ea13ed0b3..e9ba0ce9914 100644 --- a/tensorflow/stream_executor/tpu/BUILD +++ b/tensorflow/stream_executor/tpu/BUILD @@ -286,3 +286,14 @@ cc_library( "@com_google_absl//absl/types:span", ], ) + +cc_library( + name = "tpu_topology_external", + srcs = ["tpu_topology.cc"], + hdrs = ["tpu_topology.h"], + deps = [ + "//tensorflow/core/platform:types", + "//tensorflow/core/tpu:tpu_api", + "//tensorflow/core/tpu/kernels:tpu_util_c_api_hdrs", + ], +) diff --git a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h index 6962ce930bf..1530c00e621 100644 --- a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h +++ b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h @@ -149,6 +149,7 @@ SE_PlatformId TpuPlatform_Id(SE_Platform* platform); int64_t TpuPlatform_VisibleDeviceCount(SE_Platform* platform); int64_t TpuPlatform_TpuMemoryLimit(SE_Platform* platform); bool TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopy(SE_Platform* platform); +void* TpuPlatform_GetTopologyPtr(SE_Platform* platform); void TpuExecutor_Init(SE_StreamExecutor* executor, int device_ordinal, SE_DeviceOptions* device_options, SE_Status* status); @@ -296,6 +297,22 @@ int64_t HardwareLayout_ShapeSize(XLA_Shape* shape); XLA_ComputationPlacer* TpuComputationPlacer_New(); void TpuComputationPlacer_Free(XLA_ComputationPlacer* placer); +int TpuTopology_LogicalDevicesPerHost(void* tpu_topology, + TpuCoreTypeEnum tpu_core_type); +int TpuTopology_LogicalDevicesPerChip(void* tpu_topology, + TpuCoreTypeEnum tpu_core_type); +int TpuTopology_ChipBounds_X(void* tpu_topology); +int TpuTopology_ChipBounds_Y(void* tpu_topology); +int TpuTopology_ChipBounds_Z(void* tpu_topology); +bool TpuTopology_HasChip(void* tpu_topology, int x, int y, int z); +void* TpuTopology_Core(void* tpu_topology, int x, int y, int z, + TpuCoreTypeEnum tpu_core_type, int index); +int TpuCoreLocation_ChipCoordinates_X(void* tpu_core_location); +int TpuCoreLocation_ChipCoordinates_Y(void* tpu_core_location); +int TpuCoreLocation_ChipCoordinates_Z(void* tpu_core_location); +int TpuCoreLocation_Index(void* tpu_core_location); +int TpuCoreLocation_Id(void* tpu_core_location); + struct TfTpu_ExecutorApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_New); TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Free); @@ -306,6 +323,8 @@ struct TfTpu_ExecutorApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_VisibleDeviceCount); TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_TpuMemoryLimit); TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopy); + TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_GetTopologyPtr); + TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Init); TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_Free); TFTPU_ADD_FN_IN_STRUCT(TpuExecutor_PlatformDeviceCount); @@ -387,6 +406,19 @@ struct TfTpu_ExecutorApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_New); TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_Free); + + TFTPU_ADD_FN_IN_STRUCT(TpuTopology_LogicalDevicesPerHost); + TFTPU_ADD_FN_IN_STRUCT(TpuTopology_LogicalDevicesPerChip); + TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_X); + TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_Y); + TFTPU_ADD_FN_IN_STRUCT(TpuTopology_ChipBounds_Z); + TFTPU_ADD_FN_IN_STRUCT(TpuTopology_HasChip); + TFTPU_ADD_FN_IN_STRUCT(TpuTopology_Core); + TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_ChipCoordinates_X); + TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_ChipCoordinates_Y); + TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_ChipCoordinates_Z); + TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Index); + TFTPU_ADD_FN_IN_STRUCT(TpuCoreLocation_Id); }; } diff --git a/tensorflow/stream_executor/tpu/tpu_platform.cc b/tensorflow/stream_executor/tpu/tpu_platform.cc index abebc2042e5..95472417b4e 100644 --- a/tensorflow/stream_executor/tpu/tpu_platform.cc +++ b/tensorflow/stream_executor/tpu/tpu_platform.cc @@ -118,6 +118,10 @@ bool TpuPlatform::ShouldRegisterTpuDeviceToDeviceCopy() { ->TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopyFn(platform_); } +const tensorflow::tpu::TpuTopologyPtr TpuPlatform::GetTopologyPtr() { + return tpu::ExecutorApiFn()->TpuPlatform_GetTopologyPtrFn(platform_); +} + void TpuPlatform::InsertEvent(stream_executor::internal::EventInterface* key, SE_Event* val) { tensorflow::mutex_lock lock(event_map_mu_); diff --git a/tensorflow/stream_executor/tpu/tpu_platform.h b/tensorflow/stream_executor/tpu/tpu_platform.h index 41fc257597d..3704f0268f5 100644 --- a/tensorflow/stream_executor/tpu/tpu_platform.h +++ b/tensorflow/stream_executor/tpu/tpu_platform.h @@ -60,6 +60,8 @@ class TpuPlatform : public ::tensorflow::tpu::TpuPlatformInterface { bool ShouldRegisterTpuDeviceToDeviceCopy() override; + const tensorflow::tpu::TpuTopologyPtr GetTopologyPtr() override; + bool Initialized() const override; Status Initialize( diff --git a/tensorflow/stream_executor/tpu/tpu_platform_interface.h b/tensorflow/stream_executor/tpu/tpu_platform_interface.h index 35a10b8a428..da9e91ffc1c 100644 --- a/tensorflow/stream_executor/tpu/tpu_platform_interface.h +++ b/tensorflow/stream_executor/tpu/tpu_platform_interface.h @@ -22,6 +22,8 @@ limitations under the License. namespace tensorflow { namespace tpu { +typedef void* TpuTopologyPtr; + class TpuPlatformInterface : public stream_executor::Platform { public: using Status = stream_executor::port::Status; @@ -38,6 +40,8 @@ class TpuPlatformInterface : public stream_executor::Platform { virtual int64 TpuMemoryLimit() = 0; virtual bool ShouldRegisterTpuDeviceToDeviceCopy() = 0; + + virtual const TpuTopologyPtr GetTopologyPtr() = 0; }; } // namespace tpu diff --git a/tensorflow/stream_executor/tpu/tpu_topology.cc b/tensorflow/stream_executor/tpu/tpu_topology.cc new file mode 100644 index 00000000000..749cb291940 --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_topology.cc @@ -0,0 +1,69 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/stream_executor/tpu/tpu_topology.h" + +#include "tensorflow/core/tpu/tpu_api.h" + +namespace tensorflow { +namespace tpu { + +TpuChipCoordinatesExternal TpuCoreLocationExternal::chip_coordinates() const { + return { + tpu::ExecutorApiFn()->TpuCoreLocation_ChipCoordinates_XFn(core_location_), + tpu::ExecutorApiFn()->TpuCoreLocation_ChipCoordinates_YFn(core_location_), + tpu::ExecutorApiFn()->TpuCoreLocation_ChipCoordinates_ZFn( + core_location_)}; +} + +int32 TpuCoreLocationExternal::index() const { + return tpu::ExecutorApiFn()->TpuCoreLocation_IndexFn(core_location_); +} + +int32 TpuCoreLocationExternal::Id() const { + return tpu::ExecutorApiFn()->TpuCoreLocation_IdFn(core_location_); +} + +int32 TpuTopologyExternal::LogicalDevicesPerHost( + TpuCoreTypeEnum core_type) const { + return tpu::ExecutorApiFn()->TpuTopology_LogicalDevicesPerHostFn(topology_, + core_type); +} + +int32 TpuTopologyExternal::LogicalDevicesPerChip( + TpuCoreTypeEnum core_type) const { + return tpu::ExecutorApiFn()->TpuTopology_LogicalDevicesPerChipFn(topology_, + core_type); +} + +TpuTopologyChipBoundsExternal TpuTopologyExternal::chip_bounds() const { + return {tpu::ExecutorApiFn()->TpuTopology_ChipBounds_XFn(topology_), + tpu::ExecutorApiFn()->TpuTopology_ChipBounds_YFn(topology_), + tpu::ExecutorApiFn()->TpuTopology_ChipBounds_ZFn(topology_)}; +} + +bool TpuTopologyExternal::HasChip(int x, int y, int z) const { + return tpu::ExecutorApiFn()->TpuTopology_HasChipFn(topology_, x, y, z); +} + +TpuCoreLocationExternal TpuTopologyExternal::Core(int x, int y, int z, + TpuCoreTypeEnum core_type, + int index) const { + return TpuCoreLocationExternal(tpu::ExecutorApiFn()->TpuTopology_CoreFn( + topology_, x, y, z, core_type, index)); +} + +} // namespace tpu +} // namespace tensorflow diff --git a/tensorflow/stream_executor/tpu/tpu_topology.h b/tensorflow/stream_executor/tpu/tpu_topology.h new file mode 100644 index 00000000000..b7d462804c9 --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_topology.h @@ -0,0 +1,66 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_TOPOLOGY_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_TOPOLOGY_H_ + +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/tpu/kernels/tpu_util_c_api.h" + +namespace tensorflow { +namespace tpu { + +struct TpuChipCoordinatesExternal { + int x; + int y; + int z; +}; + +class TpuCoreLocationExternal { + public: + explicit TpuCoreLocationExternal(void* core_location) + : core_location_(core_location) {} + TpuChipCoordinatesExternal chip_coordinates() const; + int32 index() const; + int32 Id() const; + + private: + void* core_location_; +}; + +struct TpuTopologyChipBoundsExternal { + int x; + int y; + int z; +}; + +class TpuTopologyExternal { + public: + explicit TpuTopologyExternal(void* topology) : topology_(topology) {} + int32 LogicalDevicesPerHost(TpuCoreTypeEnum core_type) const; + int32 LogicalDevicesPerChip(TpuCoreTypeEnum core_type) const; + TpuTopologyChipBoundsExternal chip_bounds() const; + bool HasChip(int x, int y, int z) const; + TpuCoreLocationExternal Core(int x, int y, int z, TpuCoreTypeEnum core_type, + int index) const; + + private: + void* topology_; +}; + +} // namespace tpu +} // namespace tensorflow + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_TOPOLOGY_H_ From ba9f34d9dd869e499bc8f3ac9b3a562fa6bace54 Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Thu, 16 Jul 2020 17:32:32 -0400 Subject: [PATCH 0635/2522] Add distribution_util. --- tensorflow/python/keras/benchmarks/BUILD | 9 + .../python/keras/benchmarks/benchmark_util.py | 30 ++- .../keras/benchmarks/distribution_util.py | 182 ++++++++++++++++++ 3 files changed, 211 insertions(+), 10 deletions(-) create mode 100644 tensorflow/python/keras/benchmarks/distribution_util.py diff --git a/tensorflow/python/keras/benchmarks/BUILD b/tensorflow/python/keras/benchmarks/BUILD index 87df84f2663..307c339ca93 100644 --- a/tensorflow/python/keras/benchmarks/BUILD +++ b/tensorflow/python/keras/benchmarks/BUILD @@ -75,6 +75,7 @@ py_library( name = "benchmark_util", srcs = ["benchmark_util.py"], deps = [ + ":distribution_util", "//tensorflow:tensorflow_py", "//third_party/py/numpy", ], @@ -109,3 +110,11 @@ py_test( "//tensorflow:tensorflow_py", ], ) + +py_library( + name = "distribution_util", + srcs = ["distribution_util.py"], + deps = [ + "//tensorflow:tensorflow_py", + ], +) diff --git a/tensorflow/python/keras/benchmarks/benchmark_util.py b/tensorflow/python/keras/benchmarks/benchmark_util.py index f9da4d30843..2ffdb3d520d 100644 --- a/tensorflow/python/keras/benchmarks/benchmark_util.py +++ b/tensorflow/python/keras/benchmarks/benchmark_util.py @@ -22,6 +22,8 @@ import numpy as np import tensorflow as tf +from tensorflow.python.keras.benchmarks import distribution_util + class TimerCallBack(tf.keras.callbacks.Callback): """Callback for logging time in each epoch or batch.""" @@ -108,19 +110,26 @@ def measure_performance(model_fn, avg_epoch_time_list, wall_time_list, exp_per_sec_list = [], [], [] total_num_examples = epochs * num_examples + strategy = distribution_util.get_distribution_strategy( + distribution_strategy=distribution_strategy, + num_gpus=num_gpus) + for _ in range(run_iters): timer = timeit.default_timer t0 = timer() - model = model_fn() - build_time = timer() - t0 + # Init the distribution strategy scope for each iteration. + strategy_scope = distribution_util.get_strategy_scope(strategy) + with strategy_scope: + model = model_fn() + build_time = timer() - t0 - t1 = timer() - model.compile( - optimizer=optimizer, - loss=loss, - metrics=metrics, - ) - compile_time = timer() - t1 + t1 = timer() + model.compile( + optimizer=optimizer, + loss=loss, + metrics=metrics, + ) + compile_time = timer() - t1 # Run one warm up epoch. model.fit(x=x, y=y, batch_size=batch_size, epochs=1) cbk = TimerCallBack() @@ -153,6 +162,7 @@ def measure_performance(model_fn, metrics.append({'name': 'epochs', 'value': epochs}) wall_time = np.mean(wall_time_list) - extras = {'distribution_strategy': distribution_strategy} + extras = {'distribution_strategy': distribution_strategy, + 'num_gpus': num_gpus} return metrics, wall_time, extras diff --git a/tensorflow/python/keras/benchmarks/distribution_util.py b/tensorflow/python/keras/benchmarks/distribution_util.py new file mode 100644 index 00000000000..970269eda8e --- /dev/null +++ b/tensorflow/python/keras/benchmarks/distribution_util.py @@ -0,0 +1,182 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Util for running models in a distribution setting.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +import json + +import tensorflow as tf + + +def _collective_communication(all_reduce_alg): + """Return a CollectiveCommunication based on all_reduce_alg. + + Args: + all_reduce_alg: a string specifying which collective communication to pick, + or None. + + Returns: + tf.distribute.experimental.CollectiveCommunication object + + Raises: + ValueError: if `all_reduce_alg` not in [None, "ring", "nccl"] + """ + collective_communication_options = { + None: tf.distribute.experimental.CollectiveCommunication.AUTO, + "ring": tf.distribute.experimental.CollectiveCommunication.RING, + "nccl": tf.distribute.experimental.CollectiveCommunication.NCCL + } + if all_reduce_alg not in collective_communication_options: + raise ValueError( + "When used with `multi_worker_mirrored`, valid values for " + "all_reduce_alg are [`ring`, `nccl`]. Supplied value: {}".format( + all_reduce_alg)) + return collective_communication_options[all_reduce_alg] + + +def _mirrored_cross_device_ops(all_reduce_alg, num_packs): + """Return a CrossDeviceOps based on all_reduce_alg and num_packs. + + Args: + all_reduce_alg: a string specifying which cross device op to pick, or None. + num_packs: an integer specifying number of packs for the cross device op. + + Returns: + tf.distribute.CrossDeviceOps object or None. + + Raises: + ValueError: if `all_reduce_alg` not in [None, "nccl", "hierarchical_copy"]. + """ + if all_reduce_alg is None: + return None + mirrored_all_reduce_options = { + "nccl": tf.distribute.NcclAllReduce, + "hierarchical_copy": tf.distribute.HierarchicalCopyAllReduce + } + if all_reduce_alg not in mirrored_all_reduce_options: + raise ValueError( + "When used with `mirrored`, valid values for all_reduce_alg are " + "[`nccl`, `hierarchical_copy`]. Supplied value: {}".format( + all_reduce_alg)) + cross_device_ops_class = mirrored_all_reduce_options[all_reduce_alg] + return cross_device_ops_class(num_packs=num_packs) + + +def get_distribution_strategy(distribution_strategy="mirrored", + num_gpus=0, + all_reduce_alg=None, + num_packs=1): + """Return a DistributionStrategy for running the model. + + Args: + distribution_strategy: a string specifying which distribution strategy to + use. Accepted values are "off", "one_device", "mirrored", + and "multi_worker_mirrored" -- case insensitive. + "off" means not to use Distribution Strategy. + num_gpus: Number of GPUs to run this model. + + Returns: + tf.distribute.DistibutionStrategy object. + Raises: + ValueError: if `distribution_strategy` is "off" or "one_device" and + `num_gpus` is larger than 1; or `num_gpus` is negative. + """ + if num_gpus < 0: + raise ValueError("`num_gpus` can not be negative.") + + distribution_strategy = distribution_strategy.lower() + + if distribution_strategy == "off": + if num_gpus > 1: + raise ValueError( + "When {} GPUs are specified, distribution_strategy " + "flag cannot be set to `off`.".format(num_gpus)) + return None + + if distribution_strategy == "multi_worker_mirrored": + return tf.distribute.experimental.MultiWorkerMirroredStrategy( + communication=_collective_communication(all_reduce_alg)) + + if distribution_strategy == "one_device": + if num_gpus == 0: + return tf.distribute.OneDeviceStrategy("device:CPU:0") + if num_gpus > 1: + raise ValueError("`OneDeviceStrategy` can not be used for more than " + "one device.") + return tf.distribute.OneDeviceStrategy("device:GPU:0") + + if distribution_strategy == "mirrored": + if num_gpus == 0: + devices = ["device:CPU:0"] + else: + devices = ["device:GPU:%d" % i for i in range(num_gpus)] + return tf.distribute.MirroredStrategy( + devices=devices, + cross_device_ops=_mirrored_cross_device_ops(all_reduce_alg, num_packs)) + + raise ValueError( + "Unrecognized Distribution Strategy: %r" % distribution_strategy) + + +def configure_cluster(worker_hosts=None, task_index=-1): + """Set multi-worker cluster spec in TF_CONFIG environment variable. + + Args: + worker_hosts: comma-separated list of worker ip:port pairs. + + Returns: + Number of workers in the cluster. + """ + tf_config = json.loads(os.environ.get("TF_CONFIG", "{}")) + if tf_config: + num_workers = (len(tf_config["cluster"].get("chief", [])) + + len(tf_config["cluster"].get("worker", []))) + elif worker_hosts: + workers = worker_hosts.split(",") + num_workers = len(workers) + if num_workers > 1 and task_index < 0: + raise ValueError("Must specify task_index when number of workers > 1") + task_index = 0 if num_workers == 1 else task_index + os.environ["TF_CONFIG"] = json.dumps({ + "cluster": { + "worker": workers + }, + "task": {"type": "worker", "index": task_index} + }) + else: + num_workers = 1 + return num_workers + + +def get_strategy_scope(strategy): + if strategy: + strategy_scope = strategy.scope() + else: + strategy_scope = DummyContextManager() + + return strategy_scope + + +class DummyContextManager(object): + + def __enter__(self): + pass + + def __exit__(self, *args): + pass From 77039288bb269c0f2e506697221a8b2a5c94689d Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Thu, 16 Jul 2020 14:26:34 -0700 Subject: [PATCH 0636/2522] Remove the private API usage of vriables.dtypes. PiperOrigin-RevId: 321645728 Change-Id: I6a91363a43bceb4c0954f48fdf287cdceffebcb7 --- .../python/keras/optimizer_v2/learning_rate_schedule_test.py | 3 ++- .../keras/optimizer_v2/legacy_learning_rate_decay_test.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/keras/optimizer_v2/learning_rate_schedule_test.py b/tensorflow/python/keras/optimizer_v2/learning_rate_schedule_test.py index a6fc22f7927..d2bc7b94ac2 100644 --- a/tensorflow/python/keras/optimizer_v2/learning_rate_schedule_test.py +++ b/tensorflow/python/keras/optimizer_v2/learning_rate_schedule_test.py @@ -25,6 +25,7 @@ from absl.testing import parameterized from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.eager import def_function +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.keras import combinations @@ -143,7 +144,7 @@ class LRDecayTestV2(test_util.TensorFlowTestCase, parameterized.TestCase): def testPiecewiseConstantEdgeCases(self, serialize): # Test casting boundaries from int32 to int64. - x_int64 = variables.Variable(0, dtype=variables.dtypes.int64) + x_int64 = variables.Variable(0, dtype=dtypes.int64) boundaries, values = [1, 2, 3], [0.4, 0.5, 0.6, 0.7] decayed_lr = learning_rate_schedule.PiecewiseConstantDecay( boundaries, values) diff --git a/tensorflow/python/keras/optimizer_v2/legacy_learning_rate_decay_test.py b/tensorflow/python/keras/optimizer_v2/legacy_learning_rate_decay_test.py index 040b3637aa0..b530767b6f8 100644 --- a/tensorflow/python/keras/optimizer_v2/legacy_learning_rate_decay_test.py +++ b/tensorflow/python/keras/optimizer_v2/legacy_learning_rate_decay_test.py @@ -21,6 +21,7 @@ from __future__ import print_function import math from tensorflow.python.eager import context +from tensorflow.python.framework import dtypes from tensorflow.python.keras import combinations from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras.optimizer_v2 import legacy_learning_rate_decay as learning_rate_decay @@ -101,7 +102,7 @@ class LRDecayTest(keras_parameterized.TestCase): self.assertAllClose(self.evaluate(decayed_lr), 0.001, 1e-6) def testPiecewiseConstantEdgeCases(self): - x_int = variables.Variable(0, dtype=variables.dtypes.int32) + x_int = variables.Variable(0, dtype=dtypes.int32) boundaries, values = [-1.0, 1.0], [1, 2, 3] with self.assertRaises(ValueError): decayed_lr = learning_rate_decay.piecewise_constant( @@ -125,7 +126,7 @@ class LRDecayTest(keras_parameterized.TestCase): learning_rate_decay.piecewise_constant(x_ref, boundaries, values) # Test casting boundaries from int32 to int64. - x_int64 = variables.Variable(0, dtype=variables.dtypes.int64) + x_int64 = variables.Variable(0, dtype=dtypes.int64) boundaries, values = [1, 2, 3], [0.4, 0.5, 0.6, 0.7] decayed_lr = learning_rate_decay.piecewise_constant( x_int64, boundaries, values) From 258a4ea89b42dfc10d6ae893ca5d9903bb7ea5e9 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Thu, 16 Jul 2020 14:27:35 -0700 Subject: [PATCH 0637/2522] Creating release configurations for MacOS CPU packages. PiperOrigin-RevId: 321645901 Change-Id: Id362c6d37457395b8eebec42865c44569377cc42 --- .bazelrc | 4 ++++ .../ci_build/release/macos/cpu_py2_full/nightly_release.sh | 4 +--- .../ci_build/release/macos/cpu_py35_full/nightly_release.sh | 4 +--- .../ci_build/release/macos/cpu_py36_full/nightly_release.sh | 4 +--- .../ci_build/release/macos/cpu_py37_full/nightly_release.sh | 4 +--- .../ci_build/release/macos/cpu_py38_full/nightly_release.sh | 4 +--- 6 files changed, 9 insertions(+), 15 deletions(-) diff --git a/.bazelrc b/.bazelrc index 9ef1f8e9c04..a7b768f1b16 100644 --- a/.bazelrc +++ b/.bazelrc @@ -83,6 +83,7 @@ # release_common: Common options for all builds on all operating systems. # release_gpu_common: Common options for GPU builds on Linux and Windows. # release_cpu_linux: Toolchain and CUDA options for Linux CPU builds. +# release_cpu_macos: Toolchain and CUDA options for MacOS CPU builds. # release_gpu_linux: Toolchain and CUDA options for Linux PU builds. # Allow builds using libc++ as a linker library @@ -551,6 +552,9 @@ build:release_cpu_linux --config=avx_linux # Did not add this to the defaults in case this changes. build:release_cpu_linux --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain +build:release_cpu_macos --config=release_common +build:release_cpu_macos --config=avx_linux + build:release_gpu_common --config=release_common build:release_gpu_common --config=cuda build:release_gpu_common --config=tensorrt diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nightly_release.sh b/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nightly_release.sh index 69c57179379..6dc3e3849ad 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nightly_release.sh @@ -30,13 +30,11 @@ sudo pip install twine ./tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python2) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_macos tensorflow/tools/pip_package:build_pip_package mkdir pip_pkg ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nightly_release.sh b/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nightly_release.sh index 1f018136ef9..45a8f435988 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nightly_release.sh @@ -35,13 +35,11 @@ sudo pip install twine ./tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.5) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_macos tensorflow/tools/pip_package:build_pip_package mkdir pip_pkg ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nightly_release.sh b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nightly_release.sh index 3702ec97413..d4cc8d7afac 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nightly_release.sh @@ -33,13 +33,11 @@ sudo pip install twine ./tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.6) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_macos tensorflow/tools/pip_package:build_pip_package mkdir pip_pkg ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nightly_release.sh b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nightly_release.sh index eee97f6e2d2..cd0f8a58ae6 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nightly_release.sh @@ -33,13 +33,11 @@ sudo pip install twine ./tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.7) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_macos tensorflow/tools/pip_package:build_pip_package mkdir pip_pkg ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nightly_release.sh b/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nightly_release.sh index 70773c1b597..11085b08a38 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nightly_release.sh @@ -33,13 +33,11 @@ sudo pip install twine ./tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.8) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_macos tensorflow/tools/pip_package:build_pip_package mkdir pip_pkg ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag From af5c59fbba9be81dcdf6c6a403cedfc2c238d6a9 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Thu, 16 Jul 2020 14:46:47 -0700 Subject: [PATCH 0638/2522] Cleanup the private api usage of math_ops.mul. PiperOrigin-RevId: 321650297 Change-Id: Iecb8e153ec45d852c720834fd2f5db633435c9e5 --- tensorflow/python/keras/backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py index 866f7569ca7..01d3ecd09cf 100644 --- a/tensorflow/python/keras/backend.py +++ b/tensorflow/python/keras/backend.py @@ -4869,7 +4869,7 @@ def hard_sigmoid(x): """ point_two = _constant_to_tensor(0.2, x.dtype.base_dtype) point_five = _constant_to_tensor(0.5, x.dtype.base_dtype) - x = math_ops.mul(x, point_two) + x = math_ops.multiply(x, point_two) x = math_ops.add(x, point_five) x = clip_ops.clip_by_value(x, 0., 1.) return x From fc0f5e4b11cb5aad68e981d87fd1bc007529b95d Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 16 Jul 2020 21:36:38 +0000 Subject: [PATCH 0639/2522] Add palette-based png support for tf.image.decode_png This PR tries to address the issue raised in 28256 where tf.image.decode_png process palette-based png image incorrectly. The issue was an redundant call png_set_rgb_to_gray in tensorflow/core/lib/png/png_io.cc. This PR fixes the issue. This PR fixes 28256. Signed-off-by: Yong Tang --- tensorflow/core/lib/png/BUILD | 1 + tensorflow/core/lib/png/png_io.cc | 9 +++++++-- tensorflow/core/lib/png/testdata/palette_only.png | Bin 0 -> 105 bytes tensorflow/python/ops/image_ops_test.py | 11 +++++++++++ 4 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 tensorflow/core/lib/png/testdata/palette_only.png diff --git a/tensorflow/core/lib/png/BUILD b/tensorflow/core/lib/png/BUILD index 7abc82e6a0f..95debe44e5e 100644 --- a/tensorflow/core/lib/png/BUILD +++ b/tensorflow/core/lib/png/BUILD @@ -33,5 +33,6 @@ filegroup( "testdata/lena_palette.png", "testdata/lena_palette_trns.png", "testdata/lena_rgba.png", + "testdata/palette_only.png", ], ) diff --git a/tensorflow/core/lib/png/png_io.cc b/tensorflow/core/lib/png/png_io.cc index d0014066ce3..35e189e7829 100644 --- a/tensorflow/core/lib/png/png_io.cc +++ b/tensorflow/core/lib/png/png_io.cc @@ -282,8 +282,11 @@ bool CommonInitDecode(StringPiece png_string, int desired_channels, } // convert palette to rgb(a) if needs be. - if (context->color_type == PNG_COLOR_TYPE_PALETTE) + // Note if desired_channels=1 then the original palette indices + // will be presented. + if (context->color_type == PNG_COLOR_TYPE_PALETTE && desired_channels != 1) { png_set_palette_to_rgb(context->png_ptr); + } // handle grayscale case for source or destination const bool want_gray = (context->channels < 3); @@ -294,7 +297,9 @@ bool CommonInitDecode(StringPiece png_string, int desired_channels, } } if (want_gray) { // output is grayscale - if (!is_gray) + // Note if color type is palette and context->channels < 3, + // then the original palette indices will be presented. + if (!is_gray && context->color_type != PNG_COLOR_TYPE_PALETTE) png_set_rgb_to_gray(context->png_ptr, 1, 0.299, 0.587); // 601, JPG } else { // output is rgb(a) if (is_gray) diff --git a/tensorflow/core/lib/png/testdata/palette_only.png b/tensorflow/core/lib/png/testdata/palette_only.png new file mode 100644 index 0000000000000000000000000000000000000000..c3fedd4e1fb592326519aad9337ae3908e856ed3 GIT binary patch literal 105 zcmeAS@N?(olHy`uVBq!ia0vp^A|TAf3?x51|2hvyaR&H=xB}__VEm)uyHDj<^TA14}i)TJYD@<);T3K0RS^l9t!{f literal 0 HcmV?d00001 diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index 59f219beb45..4ac1bbf2224 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -4099,6 +4099,17 @@ class PngTest(test_util.TensorFlowTestCase): self.assertEqual(image.get_shape().as_list(), [None, None, channels or None]) + def testPaletteOnly(self): + filename = "tensorflow/core/lib/png/testdata/palette_only.png" + expected = np.zeros((20, 20, 1), np.uint8) + expected[1, 1:19, :] = 1 + expected[3, 1:19, :] = 2 + with self.cached_session(use_gpu=True) as sess: + channels = 1 + png = image_ops.decode_png(io_ops.read_file(filename), channels=channels) + png = self.evaluate(png) + self.assertAllEqual(expected, png) + class GifTest(test_util.TensorFlowTestCase): From 95123d81ee7fd2d50f07a1e4e407f456ff087400 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Thu, 16 Jul 2020 14:53:54 -0700 Subject: [PATCH 0640/2522] Move more references to TpuCompilationCache to open source resource manager in tpu_configuration_ops_impl.cc PiperOrigin-RevId: 321651750 Change-Id: Id33887f131d475e9aa7872f8cb1eac4104109857 --- .../core/tpu/kernels/tpu_configuration_ops.cc | 20 +++++++++++++++++-- tensorflow/core/tpu/tpu_config_c_api.h | 4 ++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc b/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc index 13efdc46e10..e098dbd682c 100644 --- a/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc +++ b/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc @@ -223,6 +223,10 @@ void ShutdownDistributedTpuOp::Compute(OpKernelContext* ctx) { OP_REQUIRES_OK(ctx, StatusFromTF_Status(status)); TF_DeleteStatus(status); + OP_REQUIRES_OK( + ctx, DeleteIfExists( + GetTPUConfigResourceMgr(), tpu::kCompilationCacheResourceName)); + VLOG(1) << "ShutdownDistributedTpuOp done"; } @@ -257,10 +261,22 @@ void InitializeHostForDistributedTpuOp::Compute(OpKernelContext* ctx) { compilation_cache->Unref(); } + tpu::TpuCompilationCacheInterface* local_compilation_cache; + Status s = rmgr->Lookup(rmgr->default_container(), + tpu::kCompilationCacheResourceName, + &local_compilation_cache); + if (!s.ok()) { + local_compilation_cache = nullptr; + } + tpu::ConfigApiFn()->InitializeHostForDistributedTpuOp_DoWorkFn( tpu_host_config.size(), tpu_host_config.data(), - enable_whole_mesh_compilations_, &device_id_output_size, - &device_id_output, status); + enable_whole_mesh_compilations_, local_compilation_cache, + &device_id_output_size, &device_id_output, status); + + if (local_compilation_cache != nullptr) { + local_compilation_cache->Unref(); + } Tensor* ctx_output; OP_REQUIRES_OK( diff --git a/tensorflow/core/tpu/tpu_config_c_api.h b/tensorflow/core/tpu/tpu_config_c_api.h index 21649050bf7..55de89d17c9 100644 --- a/tensorflow/core/tpu/tpu_config_c_api.h +++ b/tensorflow/core/tpu/tpu_config_c_api.h @@ -49,8 +49,8 @@ TFTPU_CAPI_EXPORT void ShutdownDistributedTpuOp_DoWork(TF_Status* status); TFTPU_CAPI_EXPORT void InitializeHostForDistributedTpuOp_DoWork( const size_t tpu_host_config_size, const char* tpu_host_config, - const bool enable_whole_mesh_compilations, size_t* core_id_output_size, - int32_t** core_id_output, TF_Status* status); + const bool enable_whole_mesh_compilations, void* local_compilation_cache, + size_t* core_id_output_size, int32_t** core_id_output, TF_Status* status); TFTPU_CAPI_EXPORT void SetGlobalTPUArrayOp_DoWork( const size_t tpu_topology_size, const char* tpu_topology, From eecb5e8febddf790b24da8adf75157e4d5d65e7a Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Thu, 16 Jul 2020 14:57:33 -0700 Subject: [PATCH 0641/2522] Update private API usage of profiler.warmup. Copy the content of warmup instead, which is just a start/stop pair. PiperOrigin-RevId: 321652531 Change-Id: I6e99e3f52123a3b9f25960dbceb0f719c1e2f389 --- tensorflow/python/keras/callbacks.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py index 131bc152e51..1b8c9b085ab 100644 --- a/tensorflow/python/keras/callbacks.py +++ b/tensorflow/python/keras/callbacks.py @@ -2139,7 +2139,9 @@ class TensorBoard(Callback, version_utils.TensorBoardVersionSelector): raise ValueError(profile_batch_error_message) if self._start_batch > 0: - profiler.warmup() # Improve the profiling accuracy. + # Warm up and improve the profiling accuracy. + profiler.start('') + profiler.stop(save=False) # True when a trace is running. self._is_tracing = False From 48f21bba1fbc9c06d2422570290e1f82e69c4da0 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Thu, 16 Jul 2020 15:00:06 -0700 Subject: [PATCH 0642/2522] Replace the private API usage of remove_squeezable_dimensions with the forked version in keras. PiperOrigin-RevId: 321653128 Change-Id: I0d726d967760c323ee67f32cfd164ef43c9d5479 --- tensorflow/python/keras/metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py index 0d154dd25d1..12532a54489 100644 --- a/tensorflow/python/keras/metrics.py +++ b/tensorflow/python/keras/metrics.py @@ -553,7 +553,7 @@ class MeanRelativeError(Mean): y_pred, y_true = losses_utils.squeeze_or_expand_dimensions( y_pred, y_true) - y_pred, self.normalizer = confusion_matrix.remove_squeezable_dimensions( + y_pred, self.normalizer = losses_utils.remove_squeezable_dimensions( y_pred, self.normalizer) y_pred.shape.assert_is_compatible_with(y_true.shape) relative_errors = math_ops.div_no_nan( From 6ddf013ac712152f4f2e5c854f5eb8eb343f9bf6 Mon Sep 17 00:00:00 2001 From: Russell Power Date: Thu, 16 Jul 2020 15:00:08 -0700 Subject: [PATCH 0643/2522] Add accessor for aliased indices. PiperOrigin-RevId: 321653135 Change-Id: I0ec1e50b2a94e4c38dd74c3cdeed44124e6b64e7 --- tensorflow/compiler/xla/service/executable.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h index f002807431c..2e3ddedfb8c 100644 --- a/tensorflow/compiler/xla/service/executable.h +++ b/tensorflow/compiler/xla/service/executable.h @@ -105,6 +105,8 @@ class ExecutionInput { unowned_indices_.erase(index); } + const std::set& unowned_indices() { return unowned_indices_; } + const ShapeTree& Buffers() const { return buffers_; } ShapeTree* MutableBuffers() { return &buffers_; } @@ -190,6 +192,12 @@ class ExecutionOutput { return std::move(to_be_released_); } + std::vector ConsumeAliasedIndices() { + auto aliased = std::move(aliased_indices_); + aliased_indices_.clear(); + return aliased; + } + private: ScopedShapedBuffer result_; From f6940f2195b5f3e017ab5b8618d1a6fd5d6671ac Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 16 Jul 2020 22:13:26 +0000 Subject: [PATCH 0644/2522] Change tf.constant to np.array inside generator to avoid unexpected error Signed-off-by: Yong Tang --- tensorflow/python/ops/image_ops_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index a254ce0d8d3..11cc2df9e63 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -1368,13 +1368,13 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): self.assertAllEqual(y_np, y_tf.eval({k_placeholder: k})) def testFlipImageUnknownShape(self): - image_input = constant_op.constant( - [[[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]]]]) - expected_output = constant_op.constant( [[[[3, 4, 5], [0, 1, 2]], [[9, 10, 11], [6, 7, 8]]]]) - def generator(): yield image_input + def generator(): + image_input = np.array( + [[[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]]]], np.int32) + yield image_input dataset = dataset_ops.Dataset.from_generator( generator, From ae26f27ef7b5fc1cd1e3730c1a608eb047d07bf6 Mon Sep 17 00:00:00 2001 From: Feng Liu Date: Thu, 16 Jul 2020 15:40:03 -0700 Subject: [PATCH 0645/2522] Add the quantization op interface to tfl.select No zip tests are available for tfl.select. This will be fixed in a followup cl. PiperOrigin-RevId: 321660762 Change-Id: Ie6d0207a2c648fe8304d3877f23cc2b54fc65c31 --- tensorflow/compiler/mlir/lite/ir/tfl_ops.td | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td index c2d625d07fe..66124ba9982 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td @@ -2673,6 +2673,7 @@ def TFL_ReverseV2Op: TFL_Op<"reverse_v2", [ // are unranked. Therefore, we skip adding shape constraints here. def TFL_SelectOp : TFL_Op<"select", [ NoSideEffect, + SameOperandsAndResultsScale, PredOpTrait<"operands have same element type", TCopVTEtIsSameAs<1, 2>>, PredOpTrait<"operands and result have same element type", TFL_TCresVTEtIsSameAsOp<0, 1>>]> { From 4819021890ba58f17e7d56cc3208930942078705 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Thu, 16 Jul 2020 22:56:19 +0000 Subject: [PATCH 0646/2522] wrapped summary_op.cc in anonymous namespace and fixed naming in summary_op_test --- tensorflow/c/kernels/BUILD | 7 ++-- tensorflow/c/kernels/summary_op.cc | 56 +++++++++++++------------ tensorflow/c/kernels/summary_op_test.cc | 11 +++-- 3 files changed, 38 insertions(+), 36 deletions(-) diff --git a/tensorflow/c/kernels/BUILD b/tensorflow/c/kernels/BUILD index c5c652ab5d7..87ae0339c6f 100644 --- a/tensorflow/c/kernels/BUILD +++ b/tensorflow/c/kernels/BUILD @@ -84,7 +84,7 @@ cc_library( srcs = ["tensor_shape_utils.cc",], hdrs = ["tensor_shape_utils.h",], deps = [ "//tensorflow/c:tf_tensor",], - visibility = ["//visibility:public"], + visibility = ["//visibility:private"], ) tf_cc_test( @@ -116,7 +116,8 @@ filegroup( filegroup( name = "android_all_ops", - srcs = ["ops/bitcast.cc", - "ops/summary.cc" + srcs = [ + "ops/bitcast.cc", + "ops/summary.cc" ], ) diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index 1bd14eaf9c9..6b611be7e4f 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -23,36 +23,37 @@ limitations under the License. #include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/framework/types.h" +namespace { + // Struct that stores the status and TF_Tensor inputs to the opkernel. // Used to delete tensor and status in its destructor upon kernel return. -namespace { - struct Params { - TF_Tensor* tags; - TF_Tensor* values; - TF_Status* status; - Params(TF_OpKernelContext* ctx) : tags(nullptr), - values(nullptr), - status(nullptr) { - status = TF_NewStatus(); - TF_GetInput(ctx, 0, &tags, status); - if (TF_GetCode(status) == TF_OK) { - TF_GetInput(ctx, 1, &values, status); - } - }; - ~Params() { - TF_DeleteStatus(status); - TF_DeleteTensor(tags); - TF_DeleteTensor(values); +struct Params { + TF_Tensor* tags; + TF_Tensor* values; + TF_Status* status; + Params(TF_OpKernelContext* ctx) : tags(nullptr), + values(nullptr), + status(nullptr) { + status = TF_NewStatus(); + TF_GetInput(ctx, 0, &tags, status); + if (TF_GetCode(status) == TF_OK) { + TF_GetInput(ctx, 1, &values, status); } }; -} + ~Params() { + TF_DeleteStatus(status); + TF_DeleteTensor(tags); + TF_DeleteTensor(values); + } +}; + // dummy functions used for kernel registration -static void* ScalarSummaryOp_Create(TF_OpKernelConstruction* ctx) { +void* ScalarSummaryOp_Create(TF_OpKernelConstruction* ctx) { return nullptr; } -static void ScalarSummaryOp_Delete(void* kernel) { +void ScalarSummaryOp_Delete(void* kernel) { return; } @@ -60,10 +61,10 @@ static void ScalarSummaryOp_Delete(void* kernel) { bool IsSameSize(TF_Tensor* tensor1, TF_Tensor* tensor2); // Returns a string representation of a single tag or empty string if there // are multiple tags -static tensorflow::string SingleTag(TF_Tensor* tags); +tensorflow::string SingleTag(TF_Tensor* tags); template -static void ScalarSummaryOp_Compute(void* kernel, TF_OpKernelContext* ctx) { +void ScalarSummaryOp_Compute(void* kernel, TF_OpKernelContext* ctx) { Params params(ctx); if (TF_GetCode(params.status) != TF_OK){ TF_OpKernelContext_Failure(ctx, params.status); @@ -105,7 +106,7 @@ static void ScalarSummaryOp_Compute(void* kernel, TF_OpKernelContext* ctx) { TF_DeleteTensor(summary_tensor); } -bool IsSameSize(TF_Tensor* tensor1, TF_Tensor* tensor2){ +bool IsSameSize(TF_Tensor* tensor1, TF_Tensor* tensor2) { if (TF_NumDims(tensor1) != TF_NumDims(tensor2)) { return false; } @@ -117,7 +118,7 @@ bool IsSameSize(TF_Tensor* tensor1, TF_Tensor* tensor2){ return true; } -static tensorflow::string SingleTag(TF_Tensor* tags){ +tensorflow::string SingleTag(TF_Tensor* tags) { if (TF_TensorElementCount(tags) == 1) { const char* single_tag = static_cast( TF_TensorData(tags))->c_str(); @@ -150,7 +151,7 @@ void RegisterScalarSummaryOpKernel() { // A dummy static variable initialized by a lambda whose side-effect is to // register the ScalarSummary kernel. -TF_ATTRIBUTE_UNUSED static bool IsScalarSummaryOpKernelRegistered = []() { +TF_ATTRIBUTE_UNUSED bool IsScalarSummaryOpKernelRegistered = []() { if (SHOULD_REGISTER_OP_KERNEL("ScalarSummary")) { RegisterScalarSummaryOpKernel(); RegisterScalarSummaryOpKernel(); @@ -166,5 +167,6 @@ TF_ATTRIBUTE_UNUSED static bool IsScalarSummaryOpKernelRegistered = []() { RegisterScalarSummaryOpKernel(); } return true; -}(); +}(); +} // namespace diff --git a/tensorflow/c/kernels/summary_op_test.cc b/tensorflow/c/kernels/summary_op_test.cc index ad5fafe5530..4c691379bed 100644 --- a/tensorflow/c/kernels/summary_op_test.cc +++ b/tensorflow/c/kernels/summary_op_test.cc @@ -32,10 +32,9 @@ class DummyDevice : public DeviceBase { }; // Helper for comparing ouput and expected output -static void EXPECT_SummaryMatches(const Summary& actual, - const string& expected_str) { +void ExpectSummaryMatches(const Summary& actual, const string& expected_str) { Summary expected; - (protobuf::TextFormat::ParseFromString(expected_str, &expected)); + ASSERT_TRUE(protobuf::TextFormat::ParseFromString(expected_str, &expected)); EXPECT_EQ(expected.DebugString(), actual.DebugString()); } @@ -77,8 +76,9 @@ void TestScalarSummaryOp(Tensor* tags, Tensor* values, string expected_summary, ASSERT_EQ(expected_code, ctx.status().code()); if (expected_code == error::OK){ Summary summary; - ParseProtoUnlimited(&summary, ctx.mutable_output(0)->scalar()()); - EXPECT_SummaryMatches(summary, expected_summary); + ASSERT_TRUE(ParseProtoUnlimited(&summary, ctx.mutable_output(0)-> + scalar()())); + ExpectSummaryMatches(summary, expected_summary); } } @@ -133,7 +133,6 @@ TEST(ScalarSummaryOpTest, SimpleHalf) { } TEST(ScalarSummaryOpTest, Error_WrongDimsTags) { - int vectorSize = 3; Tensor tags(DT_STRING, {2, 1}); Tensor values(DT_FLOAT, {2}); tags.matrix()(0, 0) = "tag1"; From 3cc65294f89819163fc1e4d1e96d6807cfc33afc Mon Sep 17 00:00:00 2001 From: Wenhao Jia Date: Thu, 16 Jul 2020 16:07:28 -0700 Subject: [PATCH 0647/2522] Group together execution-related C API methods. PiperOrigin-RevId: 321666252 Change-Id: Ife996fa7c3fca44546487198990a46fd61fdcf62 --- tensorflow/core/tpu/BUILD | 2 ++ tensorflow/core/tpu/kernels/BUILD | 1 + tensorflow/core/tpu/kernels/tpu_execute_c_api.h | 13 ++++++++++++- tensorflow/core/tpu/tpu_api.cc | 5 +++++ tensorflow/core/tpu/tpu_api.h | 3 +++ tensorflow/core/tpu/tpu_api_dlsym_initializer.h | 1 + tensorflow/core/tpu/tpu_library_init_fns.inc | 14 +++++++++++--- tensorflow/stream_executor/tpu/tpu_executable.cc | 14 +++++++------- .../stream_executor/tpu/tpu_executor_c_api.h | 7 ------- 9 files changed, 42 insertions(+), 18 deletions(-) diff --git a/tensorflow/core/tpu/BUILD b/tensorflow/core/tpu/BUILD index 7639cacc378..f9031b440f9 100644 --- a/tensorflow/core/tpu/BUILD +++ b/tensorflow/core/tpu/BUILD @@ -125,6 +125,7 @@ cc_library( ":libtftpu_header", ":tpu_config_c_api", "//tensorflow/core/tpu/kernels:tpu_compile_c_api_hdrs", + "//tensorflow/core/tpu/kernels:tpu_execute_c_api_hdrs", "//tensorflow/core/tpu/kernels:tpu_mesh_state_c_api_hdrs", "//tensorflow/core/tpu/kernels:tpu_util_c_api_hdrs", "//tensorflow/stream_executor/tpu:tpu_executor_c_api_hdrs", @@ -149,6 +150,7 @@ cc_library( "//tensorflow/core/platform:status", "//tensorflow/core/tpu/graph_rewrite:tpu_rewrite_pass_registration", "//tensorflow/core/tpu/kernels:tpu_compile_c_api_hdrs", + "//tensorflow/core/tpu/kernels:tpu_execute_c_api_hdrs", "//tensorflow/core/tpu/kernels:tpu_mesh_state_c_api_hdrs", "//tensorflow/core/tpu/kernels:tpu_util_c_api_hdrs", "//tensorflow/stream_executor/tpu:tpu_executor_c_api_hdrs", diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 89a36ed9ae4..7a6160a2963 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -523,6 +523,7 @@ cc_library( deps = [ ":tpu_program_c_api_hdrs", ":tpu_util_c_api_hdrs", + "//tensorflow/core/tpu:libtftpu_header", "//tensorflow/stream_executor/tpu:tpu_executor_c_api_hdrs", ], ) diff --git a/tensorflow/core/tpu/kernels/tpu_execute_c_api.h b/tensorflow/core/tpu/kernels/tpu_execute_c_api.h index db73af76efd..38a550444a9 100644 --- a/tensorflow/core/tpu/kernels/tpu_execute_c_api.h +++ b/tensorflow/core/tpu/kernels/tpu_execute_c_api.h @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/core/tpu/kernels/tpu_program_c_api.h" #include "tensorflow/core/tpu/kernels/tpu_util_c_api.h" +#include "tensorflow/core/tpu/libtftpu.h" #include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" extern "C" { @@ -26,13 +27,23 @@ typedef struct XLA_DeviceAssignment { size_t size; } XLA_DeviceAssignment; -void TpuExecutable_LoadProgramAndEnqueueToStream( +TFTPU_CAPI_EXPORT void TpuExecutable_LoadProgramAndEnqueueToStream( const XLA_TpuProgram* program, SE_DeviceMemoryBase* arguments, size_t arguments_len, SE_DeviceMemoryBase* result, SE_DeviceMemoryBase* cross_program_prefetch_addr, int32_t rng_seed, XLA_DeviceAssignment* device_assignment, SE_Stream* stream, SE_Status* status); +TFTPU_CAPI_EXPORT void HardwareLayout_HostShapeToDeviceShape( + XLA_Shape* host_shape, XLA_Shape* device_shape); +TFTPU_CAPI_EXPORT int64_t HardwareLayout_ShapeSize(XLA_Shape* shape); + +struct TfTpu_ExecuteApiFn { + TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_LoadProgramAndEnqueueToStream); + TFTPU_ADD_FN_IN_STRUCT(HardwareLayout_HostShapeToDeviceShape); + TFTPU_ADD_FN_IN_STRUCT(HardwareLayout_ShapeSize); +}; + } // extern "C" #endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_EXECUTE_C_API_H_ diff --git a/tensorflow/core/tpu/tpu_api.cc b/tensorflow/core/tpu/tpu_api.cc index 3ce7626de2b..cd6ca80e4e7 100644 --- a/tensorflow/core/tpu/tpu_api.cc +++ b/tensorflow/core/tpu/tpu_api.cc @@ -38,6 +38,11 @@ TfTpu_CompileApiFn* CompileApiFn() { return &compile_api_fn; } +TfTpu_ExecuteApiFn* ExecuteApiFn() { + static TfTpu_ExecuteApiFn execute_api_fn; + return &execute_api_fn; +} + TfTpu_TpuProgramApiFn* TpuProgramApiFn() { static TfTpu_TpuProgramApiFn tpu_program_api_fn; return &tpu_program_api_fn; diff --git a/tensorflow/core/tpu/tpu_api.h b/tensorflow/core/tpu/tpu_api.h index 3467f82a180..b6edbfd14bb 100644 --- a/tensorflow/core/tpu/tpu_api.h +++ b/tensorflow/core/tpu/tpu_api.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_CORE_TPU_TPU_API_H_ #include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" +#include "tensorflow/core/tpu/kernels/tpu_execute_c_api.h" #include "tensorflow/core/tpu/kernels/tpu_mesh_state_c_api.h" #include "tensorflow/core/tpu/kernels/tpu_util_c_api.h" #include "tensorflow/core/tpu/libtftpu.h" @@ -35,6 +36,8 @@ TfTpu_MeshStateApiFn* MeshStateApiFn(); TfTpu_CompileApiFn* CompileApiFn(); +TfTpu_ExecuteApiFn* ExecuteApiFn(); + TfTpu_TpuProgramApiFn* TpuProgramApiFn(); TfTpu_ExecutorApiFn* ExecutorApiFn(); diff --git a/tensorflow/core/tpu/tpu_api_dlsym_initializer.h b/tensorflow/core/tpu/tpu_api_dlsym_initializer.h index 257fa25ad37..1126e132264 100644 --- a/tensorflow/core/tpu/tpu_api_dlsym_initializer.h +++ b/tensorflow/core/tpu/tpu_api_dlsym_initializer.h @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/core/platform/status.h" #include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" +#include "tensorflow/core/tpu/kernels/tpu_execute_c_api.h" #include "tensorflow/core/tpu/kernels/tpu_mesh_state_c_api.h" #include "tensorflow/core/tpu/kernels/tpu_util_c_api.h" #include "tensorflow/core/tpu/libtftpu.h" diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index 6737ae42570..7a7c6ecad30 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -37,6 +37,16 @@ tensorflow::Status SetCompileStructFn(void* library_handle) { return tensorflow::Status::OK(); } +tensorflow::Status SetExecuteStructFn(void* library_handle) { + auto* execute_fn = tensorflow::tpu::ExecuteApiFn(); + + TFTPU_SET_FN(execute_fn, TpuExecutable_LoadProgramAndEnqueueToStream); + TFTPU_SET_FN(execute_fn, HardwareLayout_HostShapeToDeviceShape); + TFTPU_SET_FN(execute_fn, HardwareLayout_ShapeSize); + + return tensorflow::Status::OK(); +} + tensorflow::Status SetTpuProgramStructFn(void* library_handle) { auto* tpu_program_fn = tensorflow::tpu::TpuProgramApiFn(); @@ -145,9 +155,6 @@ tensorflow::Status SetExecutorStructFn(void* library_handle) { TFTPU_SET_FN(executor_fn, TpuTransferManager_GetByteSizeRequirement); TFTPU_SET_FN(executor_fn, TpuTransferManager_WriteSingleTupleIndexTable); - TFTPU_SET_FN(executor_fn, HardwareLayout_HostShapeToDeviceShape); - TFTPU_SET_FN(executor_fn, HardwareLayout_ShapeSize); - TFTPU_SET_FN(executor_fn, TpuComputationPlacer_New); TFTPU_SET_FN(executor_fn, TpuComputationPlacer_Free); @@ -197,6 +204,7 @@ tensorflow::Status InitializeTpuStructFns(void* library_handle) { TF_RETURN_IF_ERROR(SetTpuConfigStructFns(library_handle)); TF_RETURN_IF_ERROR(SetTpuMeshStateStructFns(library_handle)); TF_RETURN_IF_ERROR(SetCompileStructFn(library_handle)); + TF_RETURN_IF_ERROR(SetExecuteStructFn(library_handle)); TF_RETURN_IF_ERROR(SetTpuProgramStructFn(library_handle)); TF_RETURN_IF_ERROR(SetExecutorStructFn(library_handle)); TF_RETURN_IF_ERROR(SetTpuNodeContextStructFns(library_handle)); diff --git a/tensorflow/stream_executor/tpu/tpu_executable.cc b/tensorflow/stream_executor/tpu/tpu_executable.cc index f6ded8415c1..e8ff3a54db8 100644 --- a/tensorflow/stream_executor/tpu/tpu_executable.cc +++ b/tensorflow/stream_executor/tpu/tpu_executable.cc @@ -22,7 +22,6 @@ limitations under the License. #include "tensorflow/stream_executor/tpu/c_api_conversions.h" #include "tensorflow/stream_executor/tpu/proto_helper.h" #include "tensorflow/stream_executor/tpu/status_helper.h" -#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" #include "tensorflow/stream_executor/tpu/tpu_platform.h" #include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" @@ -80,10 +79,11 @@ Status TpuExecutable::LoadProgramAndEnqueueToStream( run_options.run_options().stream()->implementation()); StatusHelper status; - TpuExecutable_LoadProgramAndEnqueueToStream( - core_program_, arguments_bases, arguments.size(), &result_base, - (cross_program_prefetch_addr.has_value() ? &prefetch_base : nullptr), - rng_seed, &c_dev_assign, stream, status.c_status); + tensorflow::tpu::ExecuteApiFn() + ->TpuExecutable_LoadProgramAndEnqueueToStreamFn( + core_program_, arguments_bases, arguments.size(), &result_base, + (cross_program_prefetch_addr.has_value() ? &prefetch_base : nullptr), + rng_seed, &c_dev_assign, stream, status.c_status); if (dev_assign != nullptr) { stream_executor::tpu::SerializedProto_Free(dev_assign_serialized); @@ -96,7 +96,7 @@ Shape TpuExecutable::HostShapeToDeviceShape(const Shape& host_shape) { XLA_Shape c_host_shape; XLA_Shape c_device_shape; TpuConversions::XlaShapeToCShape(host_shape, &c_host_shape); - tensorflow::tpu::ExecutorApiFn()->HardwareLayout_HostShapeToDeviceShapeFn( + tensorflow::tpu::ExecuteApiFn()->HardwareLayout_HostShapeToDeviceShapeFn( &c_host_shape, &c_device_shape); Shape device_shape = TpuConversions::CShapeToXlaShape(&c_device_shape); TpuConversions::CShapeCleanup(&c_host_shape); @@ -108,7 +108,7 @@ int64 TpuExecutable::ShapeSize(const Shape& shape) { XLA_Shape c_shape; TpuConversions::XlaShapeToCShape(shape, &c_shape); int64 size = - tensorflow::tpu::ExecutorApiFn()->HardwareLayout_ShapeSizeFn(&c_shape); + tensorflow::tpu::ExecuteApiFn()->HardwareLayout_ShapeSizeFn(&c_shape); TpuConversions::CShapeCleanup(&c_shape); return size; } diff --git a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h index 1530c00e621..e99151c5dc3 100644 --- a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h +++ b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h @@ -290,10 +290,6 @@ void TpuTransferManager_WriteSingleTupleIndexTable( SE_DeviceMemoryBase* elements, size_t elements_len, XLA_Shape* shape, SE_DeviceMemoryBase* region, SE_Status* status); -void HardwareLayout_HostShapeToDeviceShape(XLA_Shape* host_shape, - XLA_Shape* device_shape); -int64_t HardwareLayout_ShapeSize(XLA_Shape* shape); - XLA_ComputationPlacer* TpuComputationPlacer_New(); void TpuComputationPlacer_Free(XLA_ComputationPlacer* placer); @@ -401,9 +397,6 @@ struct TfTpu_ExecutorApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_GetByteSizeRequirement); TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_WriteSingleTupleIndexTable); - TFTPU_ADD_FN_IN_STRUCT(HardwareLayout_HostShapeToDeviceShape); - TFTPU_ADD_FN_IN_STRUCT(HardwareLayout_ShapeSize); - TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_New); TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_Free); From 75b8d57f5b4eb0b5eca0623119783454ac853bce Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Thu, 16 Jul 2020 23:17:33 +0000 Subject: [PATCH 0648/2522] added wrapper for TF_Tensor in tensor_shape_utils --- .../c/kernels/tensor_shape_utils_test.cc | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tensorflow/c/kernels/tensor_shape_utils_test.cc b/tensorflow/c/kernels/tensor_shape_utils_test.cc index 25620838437..9c7f45c1256 100644 --- a/tensorflow/c/kernels/tensor_shape_utils_test.cc +++ b/tensorflow/c/kernels/tensor_shape_utils_test.cc @@ -23,11 +23,25 @@ limitations under the License. namespace tensorflow { -template -void TestShapeMatch(T shape) { +namespace { + +// A wrapper that will automatically delete the allocated TF_Tensor +// once out of scope. +struct TF_TensorWrapper { + TF_Tensor* tf_tensor; + TF_TensorWrapper(TF_Tensor* tensor){ + tf_tensor = tensor; + } + ~TF_TensorWrapper() { + TF_DeleteTensor(tf_tensor); + } +}; + +void TestShapeMatch(TensorShape shape) { Tensor tensor(DT_FLOAT, shape); Status status; TF_Tensor* tf_tensor = TF_TensorFromTensor(tensor, &status); + TF_TensorWrapper tensor_wrapper = TF_TensorWrapper(tf_tensor); ASSERT_TRUE(status.ok()) << status.ToString(); ASSERT_EQ(tensor.shape().DebugString(), ShapeDebugString(tf_tensor)); } @@ -40,4 +54,5 @@ TEST(ShapeDebugString, ScalarShape) { TestShapeMatch(TensorShape({})); } +} // namespace } // namespace tensorflow From 34f2782a79d339d2f59114210a0250ed6cdf8b7e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Jul 2020 16:18:12 -0700 Subject: [PATCH 0649/2522] Fixes a misleading error message when creating a functional `tf.keras.Model()`. The Keras functional API uses the keyword arguments `inputs` and `outputs`. Currently however, if someone misspells `outputs` (e.g. as `output`), they get the following error: `TypeError: ('Keyword argument not understood:', 'inputs')`. This is confusing as it suggests that there is a problem with `inputs`, not `output`. This error might ideally be surfaced where it is detected (and then ignored in a `try` statement) in `Functional.__init__()`, however that would require a larger change. Instead this error message is fixed by telling `Model.__init__()` to validate `"inputs'` and `'outputs'` with its `kwargs`. This is less ideal because these arguments are not supposed to be passed to `Model.__init__()`, but as the user thinks that they are calling simply calling `Model.__init__()` it should not cause them any confusion. This fixes Keras [issue 13743](https://github.com/keras-team/keras/issues/13743). PiperOrigin-RevId: 321668253 Change-Id: Ideff3cd9298f573b633a2e6e821fa77b1c862570 --- tensorflow/python/keras/engine/functional.py | 4 ---- .../python/keras/engine/functional_test.py | 20 +++++++++++++++++++ tensorflow/python/keras/engine/training.py | 6 ++++-- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/keras/engine/functional.py b/tensorflow/python/keras/engine/functional.py index fd80e7f8bb4..6c725d0d795 100644 --- a/tensorflow/python/keras/engine/functional.py +++ b/tensorflow/python/keras/engine/functional.py @@ -107,10 +107,6 @@ class Functional(training_lib.Model): @trackable.no_automatic_dependency_tracking def __init__(self, inputs=None, outputs=None, name=None, trainable=True): - # generic_utils.validate_kwargs( - # kwargs, {'name', 'trainable'}, - # 'Functional models may only specify `name` and `trainable` keyword ' - # 'arguments during initialization. Got an unexpected argument:') super(Functional, self).__init__(name=name, trainable=trainable) self._init_graph_network(inputs, outputs) diff --git a/tensorflow/python/keras/engine/functional_test.py b/tensorflow/python/keras/engine/functional_test.py index f8a0c4103c5..b104668c9e1 100644 --- a/tensorflow/python/keras/engine/functional_test.py +++ b/tensorflow/python/keras/engine/functional_test.py @@ -2321,5 +2321,25 @@ class CacheCorrectnessTest(keras_parameterized.TestCase): # if training is not passed at runtime self.assertAllEqual(network(x), _call(x, None)) + +class InputsOutputsErrorTest(keras_parameterized.TestCase): + + @testing_utils.enable_v2_dtype_behavior + def test_input_error(self): + inputs = input_layer_lib.Input((10,)) + outputs = layers.Dense(10)(inputs) + with self.assertRaisesRegex( + TypeError, "('Keyword argument not understood:', 'input')"): + models.Model(input=inputs, outputs=outputs) + + @testing_utils.enable_v2_dtype_behavior + def test_output_error(self): + inputs = input_layer_lib.Input((10,)) + outputs = layers.Dense(10)(inputs) + with self.assertRaisesRegex( + TypeError, "('Keyword argument not understood:', 'output')"): + models.Model(inputs=inputs, output=outputs) + + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index 60b31e1ee21..ad72251ed9d 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -258,8 +258,10 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector): # The following are implemented as property functions: # self.trainable_weights # self.non_trainable_weights - generic_utils.validate_kwargs(kwargs, {'trainable', 'dtype', 'dynamic', - 'name', 'autocast'}) + # `inputs` / `outputs` will only appear in kwargs if either are misspelled. + generic_utils.validate_kwargs(kwargs, { + 'trainable', 'dtype', 'dynamic', 'name', 'autocast', 'inputs', 'outputs' + }) super(Model, self).__init__(**kwargs) # By default, Model is a subclass model, which is not in graph network. self._is_graph_network = False From 7a45b98508e4374a04c8ad2db1876b2e53f2e398 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Thu, 16 Jul 2020 23:22:31 +0000 Subject: [PATCH 0650/2522] spacing for tensor_shape_utils --- tensorflow/c/kernels/tensor_shape_utils.cc | 26 +++++++-------- .../c/kernels/tensor_shape_utils_test.cc | 32 +++++++++---------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/tensorflow/c/kernels/tensor_shape_utils.cc b/tensorflow/c/kernels/tensor_shape_utils.cc index 6ca138584b7..0720414dea6 100644 --- a/tensorflow/c/kernels/tensor_shape_utils.cc +++ b/tensorflow/c/kernels/tensor_shape_utils.cc @@ -22,19 +22,19 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" namespace tensorflow { - + std::string ShapeDebugString(TF_Tensor* tensor) { - // A TF_Tensor cannot have an unknown rank. - CHECK_GE(TF_NumDims(tensor), 0); - tensorflow::string s = "["; - for (int i = 0; i < TF_NumDims(tensor); ++i) { - if (i > 0) tensorflow::strings::StrAppend(&s, ","); - int64_t dim = TF_Dim(tensor, i); - // A TF_Tensor cannot have an unknown dimension. - CHECK_GE(dim, 0); - tensorflow::strings::StrAppend(&s, dim); - } - tensorflow::strings::StrAppend(&s, "]"); - return s; + // A TF_Tensor cannot have an unknown rank. + CHECK_GE(TF_NumDims(tensor), 0); + tensorflow::string s = "["; + for (int i = 0; i < TF_NumDims(tensor); ++i) { + if (i > 0) tensorflow::strings::StrAppend(&s, ","); + int64_t dim = TF_Dim(tensor, i); + // A TF_Tensor cannot have an unknown dimension. + CHECK_GE(dim, 0); + tensorflow::strings::StrAppend(&s, dim); + } + tensorflow::strings::StrAppend(&s, "]"); + return s; } } // namespace tensorflow \ No newline at end of file diff --git a/tensorflow/c/kernels/tensor_shape_utils_test.cc b/tensorflow/c/kernels/tensor_shape_utils_test.cc index 9c7f45c1256..a08e4a67e3e 100644 --- a/tensorflow/c/kernels/tensor_shape_utils_test.cc +++ b/tensorflow/c/kernels/tensor_shape_utils_test.cc @@ -28,31 +28,31 @@ namespace { // A wrapper that will automatically delete the allocated TF_Tensor // once out of scope. struct TF_TensorWrapper { - TF_Tensor* tf_tensor; - TF_TensorWrapper(TF_Tensor* tensor){ - tf_tensor = tensor; - } - ~TF_TensorWrapper() { - TF_DeleteTensor(tf_tensor); - } + TF_Tensor* tf_tensor; + TF_TensorWrapper(TF_Tensor* tensor){ + tf_tensor = tensor; + } + ~TF_TensorWrapper() { + TF_DeleteTensor(tf_tensor); + } }; void TestShapeMatch(TensorShape shape) { - Tensor tensor(DT_FLOAT, shape); - Status status; - TF_Tensor* tf_tensor = TF_TensorFromTensor(tensor, &status); - TF_TensorWrapper tensor_wrapper = TF_TensorWrapper(tf_tensor); - ASSERT_TRUE(status.ok()) << status.ToString(); - ASSERT_EQ(tensor.shape().DebugString(), ShapeDebugString(tf_tensor)); + Tensor tensor(DT_FLOAT, shape); + Status status; + TF_Tensor* tf_tensor = TF_TensorFromTensor(tensor, &status); + TF_TensorWrapper tensor_wrapper = TF_TensorWrapper(tf_tensor); + ASSERT_TRUE(status.ok()) << status.ToString(); + ASSERT_EQ(tensor.shape().DebugString(), ShapeDebugString(tf_tensor)); } TEST(ShapeDebugString, RegularShape) { - TestShapeMatch(TensorShape({5, 4, 7})); + TestShapeMatch(TensorShape({5, 4, 7})); } TEST(ShapeDebugString, ScalarShape) { - TestShapeMatch(TensorShape({})); + TestShapeMatch(TensorShape({})); } - + } // namespace } // namespace tensorflow From a7580dc7f24874c25b277261830b7dc15b6c5f43 Mon Sep 17 00:00:00 2001 From: Abolfazl Shahbazi Date: Thu, 16 Jul 2020 16:43:33 -0700 Subject: [PATCH 0651/2522] Adding OneDNN+MPI+Horovod partials and dockerfiles --- .../devel-horovod-jupyter.Dockerfile | 183 -------------- ...vel-onednn-mpi-horovod-jupyter.Dockerfile} | 113 ++++----- ...-16.04-devel-onednn-mpi-horovod.Dockerfile | 118 +++++++++ ....04-onednn-mpi-horovod-jupyter.Dockerfile} | 93 +++----- ...buntu-16.04-onednn-mpi-horovod.Dockerfile} | 79 +++--- ...evel-onednn-mpi-horovod-jupyter.Dockerfile | 132 +++++++++++ ...-18.04-devel-onednn-mpi-horovod.Dockerfile | 118 +++++++++ ...8.04-onednn-mpi-horovod-jupyter.Dockerfile | 112 +++++++++ ...ubuntu-18.04-onednn-mpi-horovod.Dockerfile | 98 ++++++++ ...untu-20.04-devel-onednn-jupyter.Dockerfile | 18 +- ...evel-onednn-mpi-horovod-jupyter.Dockerfile | 142 +++++++++++ ...-20.04-devel-onednn-mpi-horovod.Dockerfile | 128 ++++++++++ .../ubuntu-20.04-devel-onednn.Dockerfile | 18 +- ...0.04-onednn-mpi-horovod-jupyter.Dockerfile | 122 ++++++++++ ...ubuntu-20.04-onednn-mpi-horovod.Dockerfile | 108 +++++++++ .../devel-horovod.partial.Dockerfile | 3 - .../mkl_horovod/horovod.partial.Dockerfile | 3 - .../mkl_horovod/mpi.partial.Dockerfile | 47 ---- .../ubuntu/1604-horovod.partial.Dockerfile | 21 ++ .../ubuntu/2004-horovod.partial.Dockerfile | 16 ++ .../ubuntu/devel-horovod.partial.Dockerfile | 4 + .../onednn/ubuntu/horovod.partial.Dockerfile | 16 ++ .../onednn/ubuntu/mpi.partial.Dockerfile | 28 +++ tensorflow/tools/dockerfiles/spec.yml | 224 ++++++++++++++++-- ...kl-horovod.sh => import-onednn-horovod.sh} | 10 +- 25 files changed, 1520 insertions(+), 434 deletions(-) delete mode 100644 tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod-jupyter.Dockerfile rename tensorflow/tools/dockerfiles/dockerfiles/{mkl_horovod/devel-horovod.Dockerfile => onednn/ubuntu-16.04-devel-onednn-mpi-horovod-jupyter.Dockerfile} (52%) create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-devel-onednn-mpi-horovod.Dockerfile rename tensorflow/tools/dockerfiles/dockerfiles/{mkl_horovod/horovod-jupyter.Dockerfile => onednn/ubuntu-16.04-onednn-mpi-horovod-jupyter.Dockerfile} (50%) rename tensorflow/tools/dockerfiles/dockerfiles/{mkl_horovod/horovod.Dockerfile => onednn/ubuntu-16.04-onednn-mpi-horovod.Dockerfile} (56%) create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn-mpi-horovod-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn-mpi-horovod.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn-mpi-horovod-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn-mpi-horovod.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-mpi-horovod-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-mpi-horovod.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn-mpi-horovod-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn-mpi-horovod.Dockerfile delete mode 100644 tensorflow/tools/dockerfiles/partials/mkl_horovod/devel-horovod.partial.Dockerfile delete mode 100644 tensorflow/tools/dockerfiles/partials/mkl_horovod/horovod.partial.Dockerfile delete mode 100644 tensorflow/tools/dockerfiles/partials/mkl_horovod/mpi.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/ubuntu/1604-horovod.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/ubuntu/2004-horovod.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/ubuntu/devel-horovod.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/ubuntu/horovod.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/onednn/ubuntu/mpi.partial.Dockerfile rename tensorflow/tools/dockerfiles/tests/{import-mkl-horovod.sh => import-onednn-horovod.sh} (60%) diff --git a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod-jupyter.Dockerfile deleted file mode 100644 index 5ed856259a9..00000000000 --- a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod-jupyter.Dockerfile +++ /dev/null @@ -1,183 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -# -# THIS IS A GENERATED DOCKERFILE. -# -# This file was assembled from multiple pieces, whose use is documented -# throughout. Please refer to the TensorFlow dockerfiles documentation -# for more information. - -ARG UBUNTU_VERSION=18.04 - -FROM ubuntu:${UBUNTU_VERSION} AS base - -RUN apt-get update && apt-get install -y --no-install-recommends \ - build-essential \ - curl \ - git \ - libcurl3-dev \ - libfreetype6-dev \ - libhdf5-serial-dev \ - libzmq3-dev \ - pkg-config \ - rsync \ - software-properties-common \ - sudo \ - unzip \ - zip \ - zlib1g-dev \ - openjdk-8-jdk \ - openjdk-8-jre-headless \ - && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -ENV CI_BUILD_PYTHON python - -# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version -ARG CACHE_STOP=1 -# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 -ARG CHECKOUT_TF_SRC=0 -# In case of Python 2.7+ we need to add passwd entries for user and group id -RUN chmod a+w /etc/passwd /etc/group -RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true - -# See http://bugs.python.org/issue19846 -ENV LANG C.UTF-8 - -RUN apt-get update && apt-get install -y \ - python3 \ - python3-pip - -RUN python3 -m pip --no-cache-dir install --upgrade \ - pip \ - setuptools - -# Some TF tools expect a "python" binary -RUN ln -s $(which python3) /usr/local/bin/python - -RUN apt-get update && apt-get install -y \ - build-essential \ - curl \ - git \ - wget \ - openjdk-8-jdk \ - python3-dev \ - virtualenv \ - swig - -RUN python3 -m pip --no-cache-dir install \ - Pillow \ - h5py \ - keras_preprocessing \ - matplotlib \ - mock \ - 'numpy<1.19.0' \ - scipy \ - sklearn \ - pandas \ - future \ - portpicker \ - enum34 - -# Install bazel -ARG BAZEL_VERSION=3.1.0 -RUN mkdir /bazel && \ - wget -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ - wget -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ - chmod +x /bazel/installer.sh && \ - /bazel/installer.sh && \ - rm -f /bazel/installer.sh - -# install libnuma, openssh, wget -RUN ( apt-get update && apt-get install -y --no-install-recommends --fix-missing \ - libnuma-dev \ - openssh-server \ - openssh-client \ - wget && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* ) || \ - ( yum -y update && yum -y install \ - numactl-devel \ - openssh-server \ - openssh-clients \ - wget && \ - yum clean all ) || \ - ( echo "Unsupported Linux distribution. Aborting!" && exit 1 ) - -# Install Open MPI -# download realese version from official website as openmpi github master is not always stable -ARG OPENMPI_VERSION=openmpi-4.0.0 -ARG OPENMPI_DOWNLOAD_URL=https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.0.tar.gz -RUN mkdir /tmp/openmpi && \ - cd /tmp/openmpi && \ - wget ${OPENMPI_DOWNLOAD_URL} && \ - tar zxf ${OPENMPI_VERSION}.tar.gz && \ - cd ${OPENMPI_VERSION} && \ - ./configure --enable-orterun-prefix-by-default && \ - make -j $(nproc) all && \ - make install && \ - ldconfig && \ - rm -rf /tmp/openmpi - -# Create a wrapper for OpenMPI to allow running as root by default -RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \ - echo '#!/bin/bash' > /usr/local/bin/mpirun && \ - echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \ - chmod a+x /usr/local/bin/mpirun - -# Configure OpenMPI to run good defaults: -RUN echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf - -# Install OpenSSH for MPI to communicate between containers -RUN mkdir -p /var/run/sshd - -# Allow OpenSSH to talk to containers without asking for confirmation -RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ - echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ - mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config - -# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 -ARG CHECKOUT_HOROVOD_SRC=0 -RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --recursive https://github.com/uber/horovod.git /horovod_src || true - -COPY bashrc /etc/bash.bashrc -RUN chmod a+rwx /etc/bash.bashrc - -RUN python3 -m pip install --no-cache-dir jupyter matplotlib -# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 -RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 -RUN jupyter serverextension enable --py jupyter_http_over_ws - -RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ -RUN mkdir /.local && chmod a+rwx /.local -RUN apt-get install -y --no-install-recommends wget -# some examples require git to fetch dependencies -RUN apt-get install -y --no-install-recommends git -WORKDIR /tf/tensorflow-tutorials -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/regression.ipynb -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/save_and_load.ipynb -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification.ipynb -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification_with_hub.ipynb -COPY readme-for-jupyter.md README.md -RUN apt-get autoremove -y && apt-get remove -y wget -WORKDIR /tf -EXPOSE 8888 - -RUN python3 -m ipykernel.kernelspec - -CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-devel-onednn-mpi-horovod-jupyter.Dockerfile similarity index 52% rename from tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod.Dockerfile rename to tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-devel-onednn-mpi-horovod-jupyter.Dockerfile index a4a0bee0bc6..34485a528cd 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-devel-onednn-mpi-horovod-jupyter.Dockerfile @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,11 +19,13 @@ # throughout. Please refer to the TensorFlow dockerfiles documentation # for more information. -ARG UBUNTU_VERSION=18.04 +ARG UBUNTU_VERSION=20.04 FROM ubuntu:${UBUNTU_VERSION} AS base -RUN apt-get update && apt-get install -y --no-install-recommends \ +ARG DEBIAN_FRONTEND="noninteractive" + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ build-essential \ curl \ git \ @@ -50,14 +52,13 @@ ENV CI_BUILD_PYTHON python ARG CACHE_STOP=1 # Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 ARG CHECKOUT_TF_SRC=0 -# In case of Python 2.7+ we need to add passwd entries for user and group id -RUN chmod a+w /etc/passwd /etc/group -RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true # See http://bugs.python.org/issue19846 ENV LANG C.UTF-8 -RUN apt-get update && apt-get install -y \ +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ python3 \ python3-pip @@ -68,78 +69,37 @@ RUN python3 -m pip --no-cache-dir install --upgrade \ # Some TF tools expect a "python" binary RUN ln -s $(which python3) /usr/local/bin/python -RUN apt-get update && apt-get install -y \ - build-essential \ - curl \ - git \ - wget \ - openjdk-8-jdk \ - python3-dev \ - virtualenv \ - swig - -RUN python3 -m pip --no-cache-dir install \ - Pillow \ - h5py \ - keras_preprocessing \ - matplotlib \ - mock \ - 'numpy<1.19.0' \ - scipy \ - sklearn \ - pandas \ - future \ - portpicker \ - enum34 +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl # Install bazel ARG BAZEL_VERSION=3.1.0 RUN mkdir /bazel && \ - wget -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ - wget -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ - chmod +x /bazel/installer.sh && \ - /bazel/installer.sh && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ rm -f /bazel/installer.sh -# install libnuma, openssh, wget -RUN ( apt-get update && apt-get install -y --no-install-recommends --fix-missing \ - libnuma-dev \ - openssh-server \ - openssh-client \ - wget && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* ) || \ - ( yum -y update && yum -y install \ - numactl-devel \ - openssh-server \ - openssh-clients \ - wget && \ - yum clean all ) || \ - ( echo "Unsupported Linux distribution. Aborting!" && exit 1 ) +ARG DEBIAN_FRONTEND="noninteractive" -# Install Open MPI -# download realese version from official website as openmpi github master is not always stable -ARG OPENMPI_VERSION=openmpi-4.0.0 -ARG OPENMPI_DOWNLOAD_URL=https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.0.tar.gz -RUN mkdir /tmp/openmpi && \ - cd /tmp/openmpi && \ - wget ${OPENMPI_DOWNLOAD_URL} && \ - tar zxf ${OPENMPI_VERSION}.tar.gz && \ - cd ${OPENMPI_VERSION} && \ - ./configure --enable-orterun-prefix-by-default && \ - make -j $(nproc) all && \ - make install && \ - ldconfig && \ - rm -rf /tmp/openmpi +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* # Create a wrapper for OpenMPI to allow running as root by default -RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \ - echo '#!/bin/bash' > /usr/local/bin/mpirun && \ - echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \ - chmod a+x /usr/local/bin/mpirun +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun # Configure OpenMPI to run good defaults: -RUN echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf # Install OpenSSH for MPI to communicate between containers RUN mkdir -p /var/run/sshd @@ -151,7 +111,22 @@ RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_confi # Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 ARG CHECKOUT_HOROVOD_SRC=0 -RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --recursive https://github.com/uber/horovod.git /horovod_src || true +ARG HOROVOD_BRANCH=master +RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true COPY bashrc /etc/bash.bashrc RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-devel-onednn-mpi-horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-devel-onednn-mpi-horovod.Dockerfile new file mode 100644 index 00000000000..85e271f54f0 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-devel-onednn-mpi-horovod.Dockerfile @@ -0,0 +1,118 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} AS base + +ARG DEBIAN_FRONTEND="noninteractive" + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + curl \ + git \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + sudo \ + unzip \ + zip \ + zlib1g-dev \ + openjdk-8-jdk \ + openjdk-8-jre-headless \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + python3 \ + python3-pip + +RUN python3 -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which python3) /usr/local/bin/python + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +ARG DEBIAN_FRONTEND="noninteractive" + +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + +# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 +ARG CHECKOUT_HOROVOD_SRC=0 +ARG HOROVOD_BRANCH=master +RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-onednn-mpi-horovod-jupyter.Dockerfile similarity index 50% rename from tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod-jupyter.Dockerfile rename to tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-onednn-mpi-horovod-jupyter.Dockerfile index 00c21e287f1..7a46ea0707d 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-onednn-mpi-horovod-jupyter.Dockerfile @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,16 +19,14 @@ # throughout. Please refer to the TensorFlow dockerfiles documentation # for more information. -ARG UBUNTU_VERSION=18.04 +ARG UBUNTU_VERSION=20.04 FROM ubuntu:${UBUNTU_VERSION} as base -RUN apt-get update && apt-get install -y curl - # See http://bugs.python.org/issue19846 ENV LANG C.UTF-8 -RUN apt-get update && apt-get install -y \ +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ python3 \ python3-pip @@ -50,45 +48,26 @@ ARG TF_PACKAGE=tensorflow ARG TF_PACKAGE_VERSION= RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} -# install libnuma, openssh, wget -RUN ( apt-get update && apt-get install -y --no-install-recommends --fix-missing \ - libnuma-dev \ - openssh-server \ - openssh-client \ - wget && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* ) || \ - ( yum -y update && yum -y install \ - numactl-devel \ - openssh-server \ - openssh-clients \ - wget && \ - yum clean all ) || \ - ( echo "Unsupported Linux distribution. Aborting!" && exit 1 ) +ARG DEBIAN_FRONTEND="noninteractive" -# Install Open MPI -# download realese version from official website as openmpi github master is not always stable -ARG OPENMPI_VERSION=openmpi-4.0.0 -ARG OPENMPI_DOWNLOAD_URL=https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.0.tar.gz -RUN mkdir /tmp/openmpi && \ - cd /tmp/openmpi && \ - wget ${OPENMPI_DOWNLOAD_URL} && \ - tar zxf ${OPENMPI_VERSION}.tar.gz && \ - cd ${OPENMPI_VERSION} && \ - ./configure --enable-orterun-prefix-by-default && \ - make -j $(nproc) all && \ - make install && \ - ldconfig && \ - rm -rf /tmp/openmpi +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* # Create a wrapper for OpenMPI to allow running as root by default -RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \ - echo '#!/bin/bash' > /usr/local/bin/mpirun && \ - echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \ - chmod a+x /usr/local/bin/mpirun +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun # Configure OpenMPI to run good defaults: -RUN echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf # Install OpenSSH for MPI to communicate between containers RUN mkdir -p /var/run/sshd @@ -99,8 +78,26 @@ RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_confi mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config # Install Horovod -ARG HOROVOD_VERSION=0.16.4 -RUN python3 -m pip install --no-cache-dir horovod==${HOROVOD_VERSION} +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + software-properties-common + +RUN add-apt-repository ppa:ubuntu-toolchain-r/test + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + g++-8 \ + gcc-8 \ + python3-dev + +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 500 --slave /usr/bin/g++ g++ /usr/bin/g++-5 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8 + +RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} COPY bashrc /etc/bash.bashrc RUN chmod a+rwx /etc/bash.bashrc @@ -110,20 +107,8 @@ RUN python3 -m pip install --no-cache-dir jupyter matplotlib RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 RUN jupyter serverextension enable --py jupyter_http_over_ws -RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ RUN mkdir /.local && chmod a+rwx /.local -RUN apt-get install -y --no-install-recommends wget -# some examples require git to fetch dependencies -RUN apt-get install -y --no-install-recommends git -WORKDIR /tf/tensorflow-tutorials -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/regression.ipynb -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/save_and_load.ipynb -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification.ipynb -RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification_with_hub.ipynb -COPY readme-for-jupyter.md README.md -RUN apt-get autoremove -y && apt-get remove -y wget WORKDIR /tf EXPOSE 8888 diff --git a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-onednn-mpi-horovod.Dockerfile similarity index 56% rename from tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod.Dockerfile rename to tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-onednn-mpi-horovod.Dockerfile index bef75f1e495..8fb1ee56930 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-16.04-onednn-mpi-horovod.Dockerfile @@ -1,4 +1,4 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,16 +19,14 @@ # throughout. Please refer to the TensorFlow dockerfiles documentation # for more information. -ARG UBUNTU_VERSION=18.04 +ARG UBUNTU_VERSION=20.04 FROM ubuntu:${UBUNTU_VERSION} as base -RUN apt-get update && apt-get install -y curl - # See http://bugs.python.org/issue19846 ENV LANG C.UTF-8 -RUN apt-get update && apt-get install -y \ +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ python3 \ python3-pip @@ -50,45 +48,26 @@ ARG TF_PACKAGE=tensorflow ARG TF_PACKAGE_VERSION= RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} -# install libnuma, openssh, wget -RUN ( apt-get update && apt-get install -y --no-install-recommends --fix-missing \ - libnuma-dev \ - openssh-server \ - openssh-client \ - wget && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* ) || \ - ( yum -y update && yum -y install \ - numactl-devel \ - openssh-server \ - openssh-clients \ - wget && \ - yum clean all ) || \ - ( echo "Unsupported Linux distribution. Aborting!" && exit 1 ) +ARG DEBIAN_FRONTEND="noninteractive" -# Install Open MPI -# download realese version from official website as openmpi github master is not always stable -ARG OPENMPI_VERSION=openmpi-4.0.0 -ARG OPENMPI_DOWNLOAD_URL=https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.0.tar.gz -RUN mkdir /tmp/openmpi && \ - cd /tmp/openmpi && \ - wget ${OPENMPI_DOWNLOAD_URL} && \ - tar zxf ${OPENMPI_VERSION}.tar.gz && \ - cd ${OPENMPI_VERSION} && \ - ./configure --enable-orterun-prefix-by-default && \ - make -j $(nproc) all && \ - make install && \ - ldconfig && \ - rm -rf /tmp/openmpi +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* # Create a wrapper for OpenMPI to allow running as root by default -RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \ - echo '#!/bin/bash' > /usr/local/bin/mpirun && \ - echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \ - chmod a+x /usr/local/bin/mpirun +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun # Configure OpenMPI to run good defaults: -RUN echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf # Install OpenSSH for MPI to communicate between containers RUN mkdir -p /var/run/sshd @@ -99,8 +78,26 @@ RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_confi mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config # Install Horovod -ARG HOROVOD_VERSION=0.16.4 -RUN python3 -m pip install --no-cache-dir horovod==${HOROVOD_VERSION} +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + software-properties-common + +RUN add-apt-repository ppa:ubuntu-toolchain-r/test + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + g++-8 \ + gcc-8 \ + python3-dev + +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 500 --slave /usr/bin/g++ g++ /usr/bin/g++-5 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8 + +RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} COPY bashrc /etc/bash.bashrc RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn-mpi-horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn-mpi-horovod-jupyter.Dockerfile new file mode 100644 index 00000000000..34485a528cd --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn-mpi-horovod-jupyter.Dockerfile @@ -0,0 +1,132 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} AS base + +ARG DEBIAN_FRONTEND="noninteractive" + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + curl \ + git \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + sudo \ + unzip \ + zip \ + zlib1g-dev \ + openjdk-8-jdk \ + openjdk-8-jre-headless \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + python3 \ + python3-pip + +RUN python3 -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which python3) /usr/local/bin/python + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +ARG DEBIAN_FRONTEND="noninteractive" + +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + +# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 +ARG CHECKOUT_HOROVOD_SRC=0 +ARG HOROVOD_BRANCH=master +RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn-mpi-horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn-mpi-horovod.Dockerfile new file mode 100644 index 00000000000..85e271f54f0 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-devel-onednn-mpi-horovod.Dockerfile @@ -0,0 +1,118 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} AS base + +ARG DEBIAN_FRONTEND="noninteractive" + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + curl \ + git \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + sudo \ + unzip \ + zip \ + zlib1g-dev \ + openjdk-8-jdk \ + openjdk-8-jre-headless \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + python3 \ + python3-pip + +RUN python3 -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which python3) /usr/local/bin/python + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +ARG DEBIAN_FRONTEND="noninteractive" + +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + +# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 +ARG CHECKOUT_HOROVOD_SRC=0 +ARG HOROVOD_BRANCH=master +RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn-mpi-horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn-mpi-horovod-jupyter.Dockerfile new file mode 100644 index 00000000000..65043d18443 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn-mpi-horovod-jupyter.Dockerfile @@ -0,0 +1,112 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} as base + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + python3 \ + python3-pip + +RUN python3 -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which python3) /usr/local/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION= +RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +ARG DEBIAN_FRONTEND="noninteractive" + +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + +# Install Horovod +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + g++-8 \ + gcc-8 \ + python3-dev + +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 700 --slave /usr/bin/g++ g++ /usr/bin/g++-7 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8 + +RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn-mpi-horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn-mpi-horovod.Dockerfile new file mode 100644 index 00000000000..69efc88cd35 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-onednn-mpi-horovod.Dockerfile @@ -0,0 +1,98 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} as base + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + python3 \ + python3-pip + +RUN python3 -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which python3) /usr/local/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION= +RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +ARG DEBIAN_FRONTEND="noninteractive" + +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + +# Install Horovod +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + g++-8 \ + gcc-8 \ + python3-dev + +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 700 --slave /usr/bin/g++ g++ /usr/bin/g++-7 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8 + +RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-jupyter.Dockerfile index ffc951f3fc3..b1f1edfe36e 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-jupyter.Dockerfile @@ -57,17 +57,27 @@ RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/t # See http://bugs.python.org/issue19846 ENV LANG C.UTF-8 +ARG PYTHON=python3 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ - python3 \ - python3-pip + curl \ + software-properties-common -RUN python3 -m pip --no-cache-dir install --upgrade \ +RUN add-apt-repository ppa:deadsnakes/ppa + +RUN apt-get install -y --no-install-recommends --fix-missing \ + ${PYTHON} + +RUN curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3.7 +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ pip \ setuptools # Some TF tools expect a "python" binary -RUN ln -s $(which python3) /usr/local/bin/python +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/bin/python3 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ curl diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-mpi-horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-mpi-horovod-jupyter.Dockerfile new file mode 100644 index 00000000000..92b8101078c --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-mpi-horovod-jupyter.Dockerfile @@ -0,0 +1,142 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} AS base + +ARG DEBIAN_FRONTEND="noninteractive" + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + curl \ + git \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + sudo \ + unzip \ + zip \ + zlib1g-dev \ + openjdk-8-jdk \ + openjdk-8-jre-headless \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl \ + software-properties-common + +RUN add-apt-repository ppa:deadsnakes/ppa + +RUN apt-get install -y --no-install-recommends --fix-missing \ + ${PYTHON} + +RUN curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3.7 +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/bin/python3 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +ARG DEBIAN_FRONTEND="noninteractive" + +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + +# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 +ARG CHECKOUT_HOROVOD_SRC=0 +ARG HOROVOD_BRANCH=master +RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-mpi-horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-mpi-horovod.Dockerfile new file mode 100644 index 00000000000..72275fce911 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn-mpi-horovod.Dockerfile @@ -0,0 +1,128 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} AS base + +ARG DEBIAN_FRONTEND="noninteractive" + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + curl \ + git \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + sudo \ + unzip \ + zip \ + zlib1g-dev \ + openjdk-8-jdk \ + openjdk-8-jre-headless \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +ARG TF_BRANCH=master +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git --branch "${TF_BRANCH}" --single-branch /tensorflow_src || true + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl \ + software-properties-common + +RUN add-apt-repository ppa:deadsnakes/ppa + +RUN apt-get install -y --no-install-recommends --fix-missing \ + ${PYTHON} + +RUN curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3.7 +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/bin/python3 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl + +# Install bazel +ARG BAZEL_VERSION=3.1.0 +RUN mkdir /bazel && \ + curl -fSsL -o /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ + curl -fSsL -o /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ + bash /bazel/installer.sh && \ + rm -f /bazel/installer.sh + +ARG DEBIAN_FRONTEND="noninteractive" + +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + +# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 +ARG CHECKOUT_HOROVOD_SRC=0 +ARG HOROVOD_BRANCH=master +RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn.Dockerfile index 10ae251d7ae..f8ae3df3f52 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-devel-onednn.Dockerfile @@ -57,17 +57,27 @@ RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/t # See http://bugs.python.org/issue19846 ENV LANG C.UTF-8 +ARG PYTHON=python3 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ - python3 \ - python3-pip + curl \ + software-properties-common -RUN python3 -m pip --no-cache-dir install --upgrade \ +RUN add-apt-repository ppa:deadsnakes/ppa + +RUN apt-get install -y --no-install-recommends --fix-missing \ + ${PYTHON} + +RUN curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3.7 +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ pip \ setuptools # Some TF tools expect a "python" binary -RUN ln -s $(which python3) /usr/local/bin/python +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/bin/python3 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ curl diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn-mpi-horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn-mpi-horovod-jupyter.Dockerfile new file mode 100644 index 00000000000..09527a82523 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn-mpi-horovod-jupyter.Dockerfile @@ -0,0 +1,122 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} as base + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl \ + software-properties-common + +RUN add-apt-repository ppa:deadsnakes/ppa + +RUN apt-get install -y --no-install-recommends --fix-missing \ + ${PYTHON} + +RUN curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3.7 +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/bin/python3 + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION= +RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +ARG DEBIAN_FRONTEND="noninteractive" + +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + +# Install Horovod +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + g++-8 \ + gcc-8 \ + ${PYTHON}-dev + +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 100 --slave /usr/bin/g++ g++ /usr/bin/g++-9 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8 + +RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN python3 -m pip install --no-cache-dir jupyter matplotlib +# Pin ipykernel and nbformat; see https://github.com/ipython/ipykernel/issues/422 +RUN python3 -m pip install --no-cache-dir jupyter_http_over_ws ipykernel==5.1.1 nbformat==4.4.0 +RUN jupyter serverextension enable --py jupyter_http_over_ws + +RUN mkdir -p /tf/ && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +WORKDIR /tf +EXPOSE 8888 + +RUN python3 -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn-mpi-horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn-mpi-horovod.Dockerfile new file mode 100644 index 00000000000..a703ed38dcc --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-onednn-mpi-horovod.Dockerfile @@ -0,0 +1,108 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=20.04 + +FROM ubuntu:${UBUNTU_VERSION} as base + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 +ARG PYTHON=python3 + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + curl \ + software-properties-common + +RUN add-apt-repository ppa:deadsnakes/ppa + +RUN apt-get install -y --no-install-recommends --fix-missing \ + ${PYTHON} + +RUN curl -fSsL https://bootstrap.pypa.io/get-pip.py | python3.7 +RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \ + ln -sf $(which ${PYTHON}) /usr/bin/python && \ + ln -sf $(which ${PYTHON}) /usr/bin/python3 + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +# Set --build-arg TF_PACKAGE_VERSION=1.11.0rc0 to install a specific version. +# Installs the latest version by default. +ARG TF_PACKAGE=tensorflow +ARG TF_PACKAGE_VERSION= +RUN python3 -m pip install --no-cache-dir ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} + +ARG DEBIAN_FRONTEND="noninteractive" + +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config + +# Install Horovod +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + g++-8 \ + gcc-8 \ + ${PYTHON}-dev + +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 100 --slave /usr/bin/g++ g++ /usr/bin/g++-9 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8 + +RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/partials/mkl_horovod/devel-horovod.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/mkl_horovod/devel-horovod.partial.Dockerfile deleted file mode 100644 index dab42914df3..00000000000 --- a/tensorflow/tools/dockerfiles/partials/mkl_horovod/devel-horovod.partial.Dockerfile +++ /dev/null @@ -1,3 +0,0 @@ -# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 -ARG CHECKOUT_HOROVOD_SRC=0 -RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --recursive https://github.com/uber/horovod.git /horovod_src || true diff --git a/tensorflow/tools/dockerfiles/partials/mkl_horovod/horovod.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/mkl_horovod/horovod.partial.Dockerfile deleted file mode 100644 index 1e1704f89a8..00000000000 --- a/tensorflow/tools/dockerfiles/partials/mkl_horovod/horovod.partial.Dockerfile +++ /dev/null @@ -1,3 +0,0 @@ -# Install Horovod -ARG HOROVOD_VERSION=0.16.4 -RUN python3 -m pip install --no-cache-dir horovod==${HOROVOD_VERSION} diff --git a/tensorflow/tools/dockerfiles/partials/mkl_horovod/mpi.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/mkl_horovod/mpi.partial.Dockerfile deleted file mode 100644 index 67055ab244a..00000000000 --- a/tensorflow/tools/dockerfiles/partials/mkl_horovod/mpi.partial.Dockerfile +++ /dev/null @@ -1,47 +0,0 @@ -# install libnuma, openssh, wget -RUN ( apt-get update && apt-get install -y --no-install-recommends --fix-missing \ - libnuma-dev \ - openssh-server \ - openssh-client \ - wget && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* ) || \ - ( yum -y update && yum -y install \ - numactl-devel \ - openssh-server \ - openssh-clients \ - wget && \ - yum clean all ) || \ - ( echo "Unsupported Linux distribution. Aborting!" && exit 1 ) - -# Install Open MPI -# download realese version from official website as openmpi github master is not always stable -ARG OPENMPI_VERSION=openmpi-4.0.0 -ARG OPENMPI_DOWNLOAD_URL=https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.0.tar.gz -RUN mkdir /tmp/openmpi && \ - cd /tmp/openmpi && \ - wget ${OPENMPI_DOWNLOAD_URL} && \ - tar zxf ${OPENMPI_VERSION}.tar.gz && \ - cd ${OPENMPI_VERSION} && \ - ./configure --enable-orterun-prefix-by-default && \ - make -j $(nproc) all && \ - make install && \ - ldconfig && \ - rm -rf /tmp/openmpi - -# Create a wrapper for OpenMPI to allow running as root by default -RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \ - echo '#!/bin/bash' > /usr/local/bin/mpirun && \ - echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \ - chmod a+x /usr/local/bin/mpirun - -# Configure OpenMPI to run good defaults: -RUN echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf - -# Install OpenSSH for MPI to communicate between containers -RUN mkdir -p /var/run/sshd - -# Allow OpenSSH to talk to containers without asking for confirmation -RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ - echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ - mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/1604-horovod.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/1604-horovod.partial.Dockerfile new file mode 100644 index 00000000000..dabe310b306 --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/1604-horovod.partial.Dockerfile @@ -0,0 +1,21 @@ +# Install Horovod +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + software-properties-common + +RUN add-apt-repository ppa:ubuntu-toolchain-r/test + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + g++-8 \ + gcc-8 \ + python3-dev + +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 500 --slave /usr/bin/g++ g++ /usr/bin/g++-5 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8 + +RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/2004-horovod.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/2004-horovod.partial.Dockerfile new file mode 100644 index 00000000000..f018c3a2fc5 --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/2004-horovod.partial.Dockerfile @@ -0,0 +1,16 @@ +# Install Horovod +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + g++-8 \ + gcc-8 \ + ${PYTHON}-dev + +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 100 --slave /usr/bin/g++ g++ /usr/bin/g++-9 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8 + +RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/devel-horovod.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/devel-horovod.partial.Dockerfile new file mode 100644 index 00000000000..3150c7a108b --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/devel-horovod.partial.Dockerfile @@ -0,0 +1,4 @@ +# Check out horovod source code if --build-arg CHECKOUT_HOROVOD_SRC=1 +ARG CHECKOUT_HOROVOD_SRC=0 +ARG HOROVOD_BRANCH=master +RUN test "${CHECKOUT_HOROVOD_SRC}" -eq 1 && git clone --branch "${HOROVOD_BRANCH}" --single-branch --recursive https://github.com/uber/horovod.git /horovod_src || true diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/horovod.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/horovod.partial.Dockerfile new file mode 100644 index 00000000000..63c1e13443c --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/horovod.partial.Dockerfile @@ -0,0 +1,16 @@ +# Install Horovod +ARG HOROVOD_WITHOUT_PYTORCH=1 +ARG HOROVOD_WITHOUT_MXNET=1 +ARG HOROVOD_WITH_TENSORFLOW=1 +ARG HOROVOD_VERSION= + +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + g++-8 \ + gcc-8 \ + python3-dev + +RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 700 --slave /usr/bin/g++ g++ /usr/bin/g++-7 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 800 --slave /usr/bin/g++ g++ /usr/bin/g++-8 + +RUN python3 -m pip install --no-cache-dir horovod${HOROVOD_VERSION:+==${HOROVOD_VERSION}} diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/mpi.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/mpi.partial.Dockerfile new file mode 100644 index 00000000000..cf899900941 --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/mpi.partial.Dockerfile @@ -0,0 +1,28 @@ +ARG DEBIAN_FRONTEND="noninteractive" + +# install libnuma, openssh, wget +RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \ + libopenmpi-dev \ + openmpi-bin \ + openmpi-common \ + openssh-client \ + openssh-server && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create a wrapper for OpenMPI to allow running as root by default +RUN mv /usr/bin/mpirun /usr/bin/mpirun.real && \ + echo '#!/bin/bash' > /usr/bin/mpirun && \ + echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/bin/mpirun && \ + chmod a+x /usr/bin/mpirun + +# Configure OpenMPI to run good defaults: +RUN echo "btl_tcp_if_exclude = lo,docker0" >> /etc/openmpi/openmpi-mca-params.conf + +# Install OpenSSH for MPI to communicate between containers +RUN mkdir -p /var/run/sshd + +# Allow OpenSSH to talk to containers without asking for confirmation +RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ + echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ + mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config diff --git a/tensorflow/tools/dockerfiles/spec.yml b/tensorflow/tools/dockerfiles/spec.yml index 4b52d0553ba..83829d73346 100644 --- a/tensorflow/tools/dockerfiles/spec.yml +++ b/tensorflow/tools/dockerfiles/spec.yml @@ -32,7 +32,6 @@ releases: tag_specs: - "{nightly}{jupyter}" - "{_TAG_PREFIX}{ubuntu-devel}" - # Built per-release and pushed to tensorflow/tensorflow # --arg _TAG_PREFIX= should be set to "1.11" (for example) or "latest". versioned: @@ -44,6 +43,10 @@ releases: - "{_TAG_PREFIX}{ubuntu-onednn}{onednn-jupyter}" - "{_TAG_PREFIX}{ubuntu-devel-onednn}" - "{_TAG_PREFIX}{ubuntu-devel-onednn}{onednn-jupyter}" + - "{_TAG_PREFIX}{ubuntu-onednn-mpi-horovod}" + - "{_TAG_PREFIX}{ubuntu-onednn-mpi-horovod}{onednn-jupyter}" + - "{_TAG_PREFIX}{ubuntu-devel-onednn-mpi-horovod}" + - "{_TAG_PREFIX}{ubuntu-devel-onednn-mpi-horovod}{onednn-jupyter}" # Dockerfiles stored in the TF repo; not pushed anywhere dockerfiles: @@ -54,12 +57,14 @@ releases: - "{ubuntu-devel}{jupyter}" - "{ubuntu-ppc64le}{jupyter}" - "{ubuntu-devel-ppc64le}{jupyter}" - - "{ubuntu-horovod}{jupyter}" - - "{ubuntu-devel-horovod}{jupyter}" - "{ubuntu-onednn}" - "{ubuntu-onednn}{onednn-jupyter}" - "{ubuntu-devel-onednn}" - "{ubuntu-devel-onednn}{onednn-jupyter}" + - "{ubuntu-onednn-mpi-horovod}" + - "{ubuntu-devel-onednn-mpi-horovod}" + - "{ubuntu-onednn-mpi-horovod}{onednn-jupyter}" + - "{ubuntu-devel-onednn-mpi-horovod}{onednn-jupyter}" - "{ubuntu-devel-arm64v8}{jupyter}" slice_sets: @@ -142,40 +147,209 @@ slice_sets: - UBUNTU_VERSION=18.04 - CHECKOUT_TF_SRC=1 - ubuntu-horovod: - - add_to_name: "-horovod" - dockerfile_exclusive_name: "horovod" - dockerfile_subdirectory: "mkl_horovod" + ubuntu-onednn: + - add_to_name: "-16.04-onednn" + dockerfile_exclusive_name: "ubuntu-16.04-onednn" + dockerfile_subdirectory: "onednn" partials: - - ubuntu/version - - ubuntu/cpu - - ubuntu/python + - onednn/ubuntu/version + - onednn/ubuntu/cpu + - onednn/ubuntu/python - tensorflow - - mkl_horovod/mpi - - mkl_horovod/horovod - shell tests: - - import-mkl-horovod.sh + - import-onednn.sh args: - TF_PACKAGE=intel-tensorflow - - ubuntu-devel-horovod: - - add_to_name: "devel-horovod" - dockerfile_exclusive_name: "devel-horovod" - dockerfile_subdirectory: "mkl_horovod" + - UBUNTU_VERSION=16.04 + - add_to_name: "-18.04-onednn" + dockerfile_exclusive_name: "ubuntu-18.04-onednn" + dockerfile_subdirectory: "onednn" partials: - - ubuntu/version - - ubuntu/devel-cpu - - ubuntu/python - - ubuntu/bazel - - mkl_horovod/mpi - - mkl_horovod/devel-horovod + - onednn/ubuntu/version + - onednn/ubuntu/cpu + - onednn/ubuntu/python + - tensorflow - shell tests: - - build-mkl-horovod.sh + - import-onednn.sh args: + - TF_PACKAGE=intel-tensorflow + - UBUNTU_VERSION=18.04 + - add_to_name: "-20.04-onednn" + dockerfile_exclusive_name: "ubuntu-20.04-onednn" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/cpu + - onednn/ubuntu/python3 + - tensorflow + - shell + tests: + - import-onednn.sh + args: + - TF_PACKAGE=intel-tensorflow + - UBUNTU_VERSION=20.04 + - PYTHON=python3.7 + + ubuntu-devel-onednn: + - add_to_name: "-16.04-devel-onednn" + dockerfile_exclusive_name: "ubuntu-16.04-devel-onednn" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/devel + - onednn/ubuntu/python + - onednn/ubuntu/bazel + - shell + tests: + - "" + args: + - UBUNTU_VERSION=16.04 + - CHECKOUT_TF_SRC=1 + - TF_BRANCH=master + - add_to_name: "-18.04-devel-onednn" + dockerfile_exclusive_name: "ubuntu-18.04-devel-onednn" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/devel + - onednn/ubuntu/python + - onednn/ubuntu/bazel + - shell + tests: + - "" + args: + - UBUNTU_VERSION=18.04 + - CHECKOUT_TF_SRC=1 + - TF_BRANCH=master + - add_to_name: "-20.04-devel-onednn" + dockerfile_exclusive_name: "ubuntu-20.04-devel-onednn" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/devel + - onednn/ubuntu/python3 + - onednn/ubuntu/bazel + - shell + tests: + - "" + args: + - UBUNTU_VERSION=20.04 + - PYTHON=python3.7 + - CHECKOUT_TF_SRC=1 + - TF_BRANCH=master + + ubuntu-onednn-mpi-horovod: + - add_to_name: "-16.04-onednn-mpi-horovod" + dockerfile_exclusive_name: "ubuntu-16.04-onednn-mpi-horovod" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/cpu + - onednn/ubuntu/python + - tensorflow + - onednn/ubuntu/mpi + - onednn/ubuntu/1604-horovod + - shell + tests: + - import-onednn-horovod.sh + args: + - UBUNTU_VERSION=16.04 + - DEBIAN_FRONTEND="noninteractive" + - TF_PACKAGE=intel-tensorflow + - add_to_name: "-18.04-onednn-mpi-horovod" + dockerfile_exclusive_name: "ubuntu-18.04-onednn-mpi-horovod" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/cpu + - onednn/ubuntu/python + - tensorflow + - onednn/ubuntu/mpi + - onednn/ubuntu/horovod + - shell + tests: + - import-onednn-horovod.sh + args: + - UBUNTU_VERSION=18.04 + - DEBIAN_FRONTEND="noninteractive" + - TF_PACKAGE=intel-tensorflow + - add_to_name: "-20.04-onednn-mpi-horovod" + dockerfile_exclusive_name: "ubuntu-20.04-onednn-mpi-horovod" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/cpu + - onednn/ubuntu/python3 + - tensorflow + - onednn/ubuntu/mpi + - onednn/ubuntu/2004-horovod + - shell + tests: + - import-onednn-horovod.sh + args: + - UBUNTU_VERSION=20.04 + - PYTHON=python3.7 + - DEBIAN_FRONTEND="noninteractive" + - TF_PACKAGE=intel-tensorflow + + ubuntu-devel-onednn-mpi-horovod: + - add_to_name: "-16.04-onednn-devel-mpi-horovod" + dockerfile_exclusive_name: "ubuntu-16.04-devel-onednn-mpi-horovod" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/devel + - onednn/ubuntu/python + - onednn/ubuntu/bazel + - onednn/ubuntu/mpi + - onednn/ubuntu/devel-horovod + - shell + tests: + - "" + args: + - UBUNTU_VERSION=16.04 - CHECKOUT_TF_SRC=1 - CHECKOUT_HOROVOD_SRC=1 + - HOROVOD_BRANCH=master + - add_to_name: "-18.04-onednn-devel-mpi-horovod" + dockerfile_exclusive_name: "ubuntu-18.04-devel-onednn-mpi-horovod" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/devel + - onednn/ubuntu/python + - onednn/ubuntu/bazel + - onednn/ubuntu/mpi + - onednn/ubuntu/devel-horovod + - shell + tests: + - "" + args: + - UBUNTU_VERSION=18.04 + - CHECKOUT_TF_SRC=1 + - CHECKOUT_HOROVOD_SRC=1 + - HOROVOD_BRANCH=master + - add_to_name: "-20.04-onednn-devel-mpi-horovod" + dockerfile_exclusive_name: "ubuntu-20.04-devel-onednn-mpi-horovod" + dockerfile_subdirectory: "onednn" + partials: + - onednn/ubuntu/version + - onednn/ubuntu/devel + - onednn/ubuntu/python3 + - onednn/ubuntu/bazel + - onednn/ubuntu/mpi + - onednn/ubuntu/devel-horovod + - shell + tests: + - "" + args: + - UBUNTU_VERSION=20.04 + - PYTHON=python3.7 + - CHECKOUT_TF_SRC=1 + - CHECKOUT_HOROVOD_SRC=1 + - HOROVOD_BRANCH=master ubuntu-onednn: diff --git a/tensorflow/tools/dockerfiles/tests/import-mkl-horovod.sh b/tensorflow/tools/dockerfiles/tests/import-onednn-horovod.sh similarity index 60% rename from tensorflow/tools/dockerfiles/tests/import-mkl-horovod.sh rename to tensorflow/tools/dockerfiles/tests/import-onednn-horovod.sh index b1cae48c6ee..18b3b19d69d 100755 --- a/tensorflow/tools/dockerfiles/tests/import-mkl-horovod.sh +++ b/tensorflow/tools/dockerfiles/tests/import-onednn-horovod.sh @@ -15,4 +15,12 @@ # limitations under the License. # ============================================================================ -python -c 'from tensorflow.python import pywrap_tensorflow; pywrap_tensorflow.IsMklEnabled() or exit(1); import horovod.tensorflow as hvd' +{ # try + echo `python -c 'from tensorflow.python import _pywrap_util_port; print(_pywrap_util_port.IsMklEnabled()); import horovod.tensorflow as hvd'` + echo "PASS: Horovod with MKL is enabled" +} || { # catch + echo `python -c 'from tensorflow.python import pywrap_tensorflow; print(pywrap_tensorflow.IsMklEnabled()); import horovod.tensorflow as hvd'` + echo "PASS: Horovod with Old MKL is detected" +} || { # finally + die "FAIL: Horovod with MKL is not enabled" +} From 700a6171ddf5659daf0fc38e0527ddd209acad01 Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Thu, 16 Jul 2020 19:56:49 -0400 Subject: [PATCH 0652/2522] Update BUILD file and fix build_time. --- tensorflow/python/keras/benchmarks/BUILD | 1 + tensorflow/python/keras/benchmarks/benchmark_util.py | 2 +- tensorflow/python/keras/benchmarks/distribution_util.py | 4 +++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/BUILD b/tensorflow/python/keras/benchmarks/BUILD index 307c339ca93..26fee27baaf 100644 --- a/tensorflow/python/keras/benchmarks/BUILD +++ b/tensorflow/python/keras/benchmarks/BUILD @@ -28,6 +28,7 @@ py_library( name = "keras_benchmark_lib_pip", deps = [ ":benchmark_util", + ":distribution_util", "//tensorflow/python/keras/benchmarks/saved_model_benchmarks:saved_model_benchmark_util", ], ) diff --git a/tensorflow/python/keras/benchmarks/benchmark_util.py b/tensorflow/python/keras/benchmarks/benchmark_util.py index 2ffdb3d520d..8fd7e1a2c58 100644 --- a/tensorflow/python/keras/benchmarks/benchmark_util.py +++ b/tensorflow/python/keras/benchmarks/benchmark_util.py @@ -116,10 +116,10 @@ def measure_performance(model_fn, for _ in range(run_iters): timer = timeit.default_timer - t0 = timer() # Init the distribution strategy scope for each iteration. strategy_scope = distribution_util.get_strategy_scope(strategy) with strategy_scope: + t0 = timer() model = model_fn() build_time = timer() - t0 diff --git a/tensorflow/python/keras/benchmarks/distribution_util.py b/tensorflow/python/keras/benchmarks/distribution_util.py index 970269eda8e..4d9134e3e7b 100644 --- a/tensorflow/python/keras/benchmarks/distribution_util.py +++ b/tensorflow/python/keras/benchmarks/distribution_util.py @@ -12,7 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Util for running models in a distribution setting.""" +"""Util for running models in a distribution setting. Mostly from +https://github.com/tensorflow/models/blob/master/official/ +utils/misc/distribution_utils.py.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function From 45066950060ff126b0daee65cf2226e93c30176d Mon Sep 17 00:00:00 2001 From: Russell Power Date: Thu, 16 Jul 2020 16:51:03 -0700 Subject: [PATCH 0653/2522] C API adjustments. PiperOrigin-RevId: 321674570 Change-Id: Ib43f8dfc1506cd34747613edfb309628f811c177 --- .../core/tpu/kernels/tpu_program_group.cc | 9 +- tensorflow/stream_executor/tpu/BUILD | 31 ++++ .../stream_executor/tpu/c_api_conversions.h | 123 +++++++++++++++- .../stream_executor/tpu/tpu_executor.cc | 2 + tensorflow/stream_executor/tpu/tpu_executor.h | 2 + .../stream_executor/tpu/tpu_executor_c_api.h | 138 ++++++++++++++++++ tensorflow/stream_executor/tpu/tpu_platform.h | 12 +- tensorflow/stream_executor/tpu/tpu_stream.h | 2 + .../tpu/tpu_transfer_manager.cc | 2 +- 9 files changed, 314 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/tpu/kernels/tpu_program_group.cc b/tensorflow/core/tpu/kernels/tpu_program_group.cc index 2ee9b459abd..be27b7709dd 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_group.cc +++ b/tensorflow/core/tpu/kernels/tpu_program_group.cc @@ -276,14 +276,15 @@ Status TpuProgramGroup::CompileAndBuild( CompileApiFn()->TpuCompile_CompileAndBuildFn(serialized_compilation_request, mesh_state, &xla_tpu_programs, &count, status.c_status); - // SPMD could return 1 result for all partitions. - TF_RET_CHECK(count == 1 || - count == compilation_request.metadata().num_cores_per_replica()); - if (!status.status().ok()) { + if (!status.ok()) { VLOG(1) << "Run CompileAndBuild failed."; return status.status(); } + // SPMD could return 1 result for all partitions. + TF_RET_CHECK(count == 1 || + count == compilation_request.metadata().num_cores_per_replica()); + VLOG(1) << "CreateTpuProgramGroup"; Status serialize_status = CreateTpuProgramGroup(absl::MakeConstSpan(&xla_tpu_programs[0], count), diff --git a/tensorflow/stream_executor/tpu/BUILD b/tensorflow/stream_executor/tpu/BUILD index e9ba0ce9914..6e00542ddf4 100644 --- a/tensorflow/stream_executor/tpu/BUILD +++ b/tensorflow/stream_executor/tpu/BUILD @@ -44,12 +44,18 @@ cc_library( name = "c_api_conversions", hdrs = ["c_api_conversions.h"], deps = [ + ":device_memory_base_helper", ":tpu_executor_c_api_hdrs", + ":tpu_executor_hdrs", + "//tensorflow/compiler/xla:executable_run_options", "//tensorflow/compiler/xla:literal", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:xla_data_proto_cc", + "//tensorflow/compiler/xla/service:executable", + "//tensorflow/compiler/xla/service:maybe_owning_device_memory", "//tensorflow/compiler/xla/service:shaped_buffer", "//tensorflow/stream_executor:device_memory", + "//tensorflow/stream_executor:device_memory_allocator", "@com_google_absl//absl/container:inlined_vector", ], ) @@ -102,6 +108,31 @@ cc_library( alwayslink = True, ) +cc_library( + name = "tpu_executor_hdrs", + hdrs = [ + "tpu_executor.h", + "tpu_executor_interface.h", + "tpu_platform.h", + "tpu_platform_interface.h", + "tpu_stream.h", + "tpu_stream_interface.h", + "tpu_timer.h", + ], + deps = [ + ":device_memory_base_helper", + ":status_helper", + ":tpu_executor_c_api_hdrs", + "//tensorflow/core:lib", + "//tensorflow/core/platform:mutex", + "//tensorflow/core/platform:types", + "//tensorflow/core/tpu:tpu_api", + "//tensorflow/stream_executor", + "//tensorflow/stream_executor/lib", + "@com_google_absl//absl/container:flat_hash_map", + ], +) + cc_library( name = "tpu_executor_base", srcs = [ diff --git a/tensorflow/stream_executor/tpu/c_api_conversions.h b/tensorflow/stream_executor/tpu/c_api_conversions.h index 1bb9ecee688..8052d0f3154 100644 --- a/tensorflow/stream_executor/tpu/c_api_conversions.h +++ b/tensorflow/stream_executor/tpu/c_api_conversions.h @@ -17,13 +17,19 @@ limitations under the License. #define TENSORFLOW_STREAM_EXECUTOR_TPU_C_API_CONVERSIONS_H_ #include "absl/container/inlined_vector.h" +#include "tensorflow/compiler/xla/executable_run_options.h" #include "tensorflow/compiler/xla/literal.h" +#include "tensorflow/compiler/xla/service/maybe_owning_device_memory.h" +#include "tensorflow/compiler/xla/service/service_executable_run_options.h" #include "tensorflow/compiler/xla/service/shaped_buffer.h" #include "tensorflow/compiler/xla/shape.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/stream_executor/device_memory.h" +#include "tensorflow/stream_executor/device_memory_allocator.h" #include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" +#include "tensorflow/stream_executor/tpu/tpu_platform.h" +#include "tensorflow/stream_executor/tpu/tpu_stream.h" class TpuConversions { public: @@ -58,6 +64,22 @@ class TpuConversions { memcpy(c_shape->bytes, p_str.data(), p_str.size()); } + static XLA_ShapeIndex XlaShapeIndexToCShapeIndex( + const xla::ShapeIndex& xla_shape) { + XLA_ShapeIndex c_shape; + CHECK_LT(xla_shape.size(), 8); + c_shape.count = xla_shape.size(); + for (int i = 0; i < xla_shape.size(); ++i) { + c_shape.indices[i] = xla_shape[i]; + } + return c_shape; + } + + static xla::ShapeIndex CShapeIndexToXlaShapeIndex(XLA_ShapeIndex* c_shape) { + return xla::ShapeIndex(&c_shape->indices[0], + &c_shape->indices[c_shape->count]); + } + static void XLAShapedBufferToCShapedBuffer( const xla::ShapedBuffer& buffer, XLA_ShapedBuffer* c_device_buffer) { XlaShapeToCShape(buffer.on_host_shape(), &c_device_buffer->on_host_shape); @@ -92,7 +114,6 @@ class TpuConversions { static xla::MutableBorrowingLiteral CLiteralToXLALiteral( XLA_Literal* c_literal) { xla::Shape shape = CShapeToXlaShape(&c_literal->shape); - LOG(INFO) << "Shape: " << shape.DebugString(); return xla::MutableBorrowingLiteral( absl::MakeSpan(c_literal->buffers, c_literal->count), shape); } @@ -110,6 +131,106 @@ class TpuConversions { CShapeCleanup(&c_buffer->on_host_shape); delete[] c_buffer->bases; } + + static SE_DeviceMemoryAllocator AllocatorToSE_Allocator( + stream_executor::DeviceMemoryAllocator* allocator) { + SE_DeviceMemoryAllocator se_allocator; + if (allocator == nullptr) { + se_allocator.ctx = nullptr; + se_allocator.platform = nullptr; + se_allocator.allocate = nullptr; + se_allocator.deallocate = nullptr; + return se_allocator; + } + se_allocator.platform = + static_cast(allocator->platform()) + ->se_platform(); + se_allocator.ctx = allocator; + se_allocator.allocate = [](void* ctx, int device_ordinal, uint64_t size, + bool retry_on_failure, int64_t memory_space, + SE_ScopedDeviceMemory* memory, + SE_Status* se_status) { + auto allocation = + reinterpret_cast(ctx) + ->Allocate(device_ordinal, size, retry_on_failure, memory_space); + if (!allocation.ok()) { + auto status = allocation.status(); + TpuStatus_Set(se_status, status.code(), status.error_message().data(), + status.error_message().size()); + } else { + auto& scoped_memory = allocation.ValueOrDie(); + memory->wrapped = + DeviceMemoryBaseToSE_DeviceMemoryBase(scoped_memory.Release()); + memory->device_ordinal = scoped_memory.device_ordinal(); + } + }; + + se_allocator.deallocate = [](void* ctx, SE_DeviceMemoryBase* base, + int device_ordinal, SE_Status* se_status) { + auto status = + reinterpret_cast(ctx) + ->Deallocate(device_ordinal, + SE_DeviceMemoryBaseToDeviceMemoryBase(*base)); + if (!status.ok()) { + TpuStatus_Set(se_status, status.code(), status.error_message().data(), + status.error_message().size()); + } + }; + return se_allocator; + } + + static SE_ExecutableRunOptions ExecutableRunOptionsToSE_ExecutableRunOptions( + const xla::ServiceExecutableRunOptions& options) { + SE_ExecutableRunOptions se_options; + se_options.allocator = + AllocatorToSE_Allocator(options.run_options().allocator()); + se_options.device_ordinal = options.run_options().device_ordinal(); + se_options.stream = + static_cast(options.stream()->implementation()) + ->se_stream(); + return se_options; + } + + static SE_MaybeOwningDeviceMemory SEOwningDeviceMemoryToC( + stream_executor::OwningDeviceMemory* mem) { + SE_MaybeOwningDeviceMemory se_mem; + se_mem.device_ordinal = mem->device_ordinal(); + se_mem.memory = DeviceMemoryBaseToSE_DeviceMemoryBase(mem->Release()); + se_mem.allocator = AllocatorToSE_Allocator(mem->allocator()); + se_mem.owned = true; + return se_mem; + } + + static SE_MaybeOwningDeviceMemory SEMaybeOwningDeviceMemoryToC( + xla::MaybeOwningDeviceMemory& mem) { + SE_MaybeOwningDeviceMemory se_mem; + se_mem.owned = mem.HasOwnership(); + se_mem.memory = + DeviceMemoryBaseToSE_DeviceMemoryBase(mem.AsDeviceMemoryBase()); + if (mem.HasOwnership()) { + auto owned = mem.Release().value(); + se_mem.device_ordinal = owned.device_ordinal(); + se_mem.allocator = + TpuConversions::AllocatorToSE_Allocator(owned.allocator()); + } else { + se_mem.allocator = AllocatorToSE_Allocator(nullptr); + se_mem.device_ordinal = -1; + } + return se_mem; + } + + static xla::MaybeOwningDeviceMemory COwningDeviceMemToSEOwningDeviceMem( + SE_MaybeOwningDeviceMemory* se_mem, + stream_executor::DeviceMemoryAllocator* allocator) { + if (se_mem->owned) { + return xla::MaybeOwningDeviceMemory(stream_executor::OwningDeviceMemory( + SE_DeviceMemoryBaseToDeviceMemoryBase(se_mem->memory), + se_mem->device_ordinal, allocator)); + } else { + return xla::MaybeOwningDeviceMemory( + SE_DeviceMemoryBaseToDeviceMemoryBase(se_mem->memory)); + } + } }; #endif // THIRD_PARTY_TENSORFLOW_STREAM_EXECUTOR_TPU_C_API_CONVERSIONS_H_ diff --git a/tensorflow/stream_executor/tpu/tpu_executor.cc b/tensorflow/stream_executor/tpu/tpu_executor.cc index d85805777dd..8386653da82 100644 --- a/tensorflow/stream_executor/tpu/tpu_executor.cc +++ b/tensorflow/stream_executor/tpu/tpu_executor.cc @@ -169,7 +169,9 @@ std::unique_ptr<::stream_executor::internal::StreamInterface> TpuExecutor::GetStreamImplementation() { SE_Stream* tpu_stream = tpu::ExecutorApiFn()->TpuStream_NewFn(executor_); auto ptr = absl::make_unique(tpu_stream); + tpu_platform().mutex().lock(); stream_map()[ptr.get()] = tpu_stream; + tpu_platform().mutex().unlock(); return ptr; } diff --git a/tensorflow/stream_executor/tpu/tpu_executor.h b/tensorflow/stream_executor/tpu/tpu_executor.h index 720c4ac16d8..6b0ff030e30 100644 --- a/tensorflow/stream_executor/tpu/tpu_executor.h +++ b/tensorflow/stream_executor/tpu/tpu_executor.h @@ -224,6 +224,8 @@ class TpuExecutor : public tensorflow::tpu::TpuExecutorInterface { LOG(FATAL) << "not yet implemented"; } + SE_StreamExecutor* se_executor() { return executor_; } + private: TpuPlatform& tpu_platform() { return *(tensorflow::down_cast(platform_)); diff --git a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h index e99151c5dc3..e80086bcb69 100644 --- a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h +++ b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h @@ -43,6 +43,11 @@ typedef struct SE_DeviceMemoryBase { uint64_t payload; } SE_DeviceMemoryBase; +typedef struct SE_ScopedDeviceMemory { + SE_DeviceMemoryBase wrapped; + int device_ordinal; +} SE_ScopedDeviceMemory; + typedef struct SE_AllocatorStats { int64_t num_allocs; int64_t bytes_in_use; @@ -249,6 +254,8 @@ int64_t TpuTimer_Microseconds(SE_Timer*); SE_Status* TpuStatus_New(); SE_Status* TpuStatus_Create(int32_t code, const char* msg); +void TpuStatus_Set(SE_Status* status, int32_t code, const char* msg, + int32_t len); void TpuStatus_Free(SE_Status* status); const char* TpuStatus_Message(SE_Status* status); int TpuStatus_Code(SE_Status* status); @@ -309,6 +316,137 @@ int TpuCoreLocation_ChipCoordinates_Z(void* tpu_core_location); int TpuCoreLocation_Index(void* tpu_core_location); int TpuCoreLocation_Id(void* tpu_core_location); +// C API for XLA::Compiler interface + +// Note, due to the... odd way in which DeviceMemoryAllocator is used in TF, we +// cannot simply wrap an underlying pointer. Instead, we reverse the call +// direction and request memory via a callback. +typedef void (*SE_AllocateFn)(void* ctx, int device_ordinal, uint64_t size, + bool retry_on_failure, int64_t memory_space, + SE_ScopedDeviceMemory* result, SE_Status* status); + +typedef void (*SE_DeallocateFn)(void* ctx, SE_DeviceMemoryBase* base, + int device_ordinal, SE_Status* status); + +typedef struct SE_DeviceMemoryAllocator { + SE_Platform* platform; + void* ctx; + SE_AllocateFn allocate; + SE_DeallocateFn deallocate; +} SE_DeviceMemoryAllocator; + +typedef struct Tpu_Compiler Tpu_Compiler; +typedef struct SE_Executable SE_Executable; + +typedef struct SE_ExecutableRunOptions { + SE_DeviceMemoryAllocator allocator; + int device_ordinal; + SE_Stream* stream; +} SE_ExecutableRunOptions; + +typedef struct SE_MaybeOwningDeviceMemory { + SE_DeviceMemoryBase memory; + bool owned; + + // Set if owned + int device_ordinal; + SE_DeviceMemoryAllocator allocator; +} SE_MaybeOwningDeviceMemory; + +typedef struct XLA_MaybeOwningDeviceMemoryShapeTree { + XLA_Shape shape; + SE_MaybeOwningDeviceMemory* buffers; +} XLA_MaybeOwningDeviceMemoryShapeTree; + +typedef struct XLA_ShapeIndex { + int64_t indices[8]; + int64_t count; +} XLA_ShapeIndex; + +typedef struct SE_ExecutionInput { + XLA_MaybeOwningDeviceMemoryShapeTree shape_tree; + XLA_ShapeIndex* unowned_indices; + int unowned_indices_size; + XLA_Shape dynamic_shape; + XLA_Shape host_shape; +} SE_ExecutionInput; + +typedef struct SE_ExecutionOutput { + XLA_ShapedBuffer result; + SE_MaybeOwningDeviceMemory* to_be_released; + int to_be_released_size; + XLA_ShapeIndex* aliased_indices; + int aliased_indices_size; +} SE_ExecutionOutput; + +typedef struct XLA_ComputationLayout { + int parameter_count; + XLA_Shape* parameter_layouts; + XLA_Shape result_layout; +} XLA_ComputationLayout; + +typedef struct XLA_HloModuleConfig { + uint64_t seed; + int32_t launch_id; + int64_t replica_count; + int64_t num_partitions; + bool use_spmd_partitioning; + bool has_static_device_assignment; + TpuSerializedProto static_device_assignment; + bool has_entry_computation_layout; + XLA_ComputationLayout entry_computation_layout; +} XLA_HloModuleConfig; + +typedef struct SE_HloExecutionProfile SE_HloExecutionProfile; + +TFTPU_CAPI_EXPORT Tpu_Compiler* TpuCompiler_New(); +TFTPU_CAPI_EXPORT void TpuCompiler_Free(Tpu_Compiler* compiler); + +struct SE_StreamExecutorList { + SE_StreamExecutor** exec; + int count; +}; + +typedef struct XLA_HloModuleGroup { + TpuSerializedProto proto; + XLA_HloModuleConfig* module_config; +} XLA_HloModuleGroup; + +typedef struct XLA_HloModule { + TpuSerializedProto proto; + XLA_HloModuleConfig module_config; +} XLA_HloModule; + +TFTPU_CAPI_EXPORT void TpuCompiler_RunHloPasses( + Tpu_Compiler* compiler, XLA_HloModule* se_hlo_module, + SE_StreamExecutor* stream_executor, SE_DeviceMemoryAllocator* allocator, + XLA_HloModule* result, SE_Status* status); + +TFTPU_CAPI_EXPORT void TpuCompiler_RunBackend( + Tpu_Compiler* compiler, XLA_HloModule* se_hlo_module, + SE_StreamExecutor* stream_executor, SE_DeviceMemoryAllocator* allocator, + SE_Executable** result, SE_Status* status); + +TFTPU_CAPI_EXPORT void TpuCompiler_Compile( + Tpu_Compiler* compiler, XLA_HloModuleGroup* se_hlo_module_group, + SE_StreamExecutorList* stream_exec_lists, int num_lists, + SE_DeviceMemoryAllocator* allocator, SE_Executable** executables, + SE_Status* status); + +TFTPU_CAPI_EXPORT int64_t TpuCompiler_ShapeSize(Tpu_Compiler* compiler, + XLA_Shape* c_shape); + +TFTPU_CAPI_EXPORT void TpuExecutable_HloModule(SE_Executable* executable, + TpuSerializedProto* proto); + +TFTPU_CAPI_EXPORT void TpuExecutable_ExecuteAsyncOnStream( + SE_Executable* executable, SE_ExecutableRunOptions* run_options, + SE_ExecutionInput** se_arguments, int se_arguments_size, + SE_HloExecutionProfile* hlo_execution_profile, SE_ExecutionOutput* output, + SE_Status* status); + +TFTPU_CAPI_EXPORT void TpuExecutable_Free(SE_Executable*); + struct TfTpu_ExecutorApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_New); TFTPU_ADD_FN_IN_STRUCT(TpuPlatform_Free); diff --git a/tensorflow/stream_executor/tpu/tpu_platform.h b/tensorflow/stream_executor/tpu/tpu_platform.h index 3704f0268f5..b01d033feee 100644 --- a/tensorflow/stream_executor/tpu/tpu_platform.h +++ b/tensorflow/stream_executor/tpu/tpu_platform.h @@ -117,16 +117,26 @@ class TpuPlatform : public ::tensorflow::tpu::TpuPlatformInterface { void InsertEvent(stream_executor::internal::EventInterface* key, SE_Event* val); SE_Event* LookupEvent(stream_executor::internal::EventInterface* key); + SE_Stream* LookupStream(stream_executor::internal::StreamInterface* key) { + mutex().lock(); + auto stream = stream_map_.at(key); + mutex().unlock(); + return stream; + } void EraseEvent(stream_executor::internal::EventInterface* key); + SE_Platform* se_platform() const { return platform_; } + // Returns the number of TPUs per host. static Status TpusPerHost(int* tpus); // Returns the memory capacity of the TPUs on this host. static Status TpuMemoryLimit(int64* memory_limit); + tensorflow::mutex& mutex() { return event_map_mu_; } + private: - SE_Platform* platform_; + mutable SE_Platform* platform_; std::string name_; stream_executor::ExecutorCache executor_cache_; StreamMap stream_map_; diff --git a/tensorflow/stream_executor/tpu/tpu_stream.h b/tensorflow/stream_executor/tpu/tpu_stream.h index 09b496bfedc..223341a42e5 100644 --- a/tensorflow/stream_executor/tpu/tpu_stream.h +++ b/tensorflow/stream_executor/tpu/tpu_stream.h @@ -54,6 +54,8 @@ class TpuStream : public tensorflow::tpu::TpuStreamInterface { return status.status(); } + SE_Stream* se_stream() { return stream_; } + private: SE_Stream* stream_; }; diff --git a/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc b/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc index c55af7d58b9..48d3be61b5e 100644 --- a/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc +++ b/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc @@ -117,7 +117,7 @@ void TpuTransferManager::TransferLiteralFromDevice( tpu::ExecutorApiFn()->TpuTransferManager_TransferLiteralFromDeviceFn( manager_, - TpuPlatform::GetRegisteredPlatform()->stream_map()->at( + TpuPlatform::GetRegisteredPlatform()->LookupStream( stream->implementation()), &c_device_buffer, &c_literal, TransferLiteralFromDeviceTrampoline, state); TpuConversions::CShapedBufferCleanup(&c_device_buffer); From d2a72ca245f36665d350cadbf1470336ba8c5725 Mon Sep 17 00:00:00 2001 From: Abolfazl Shahbazi Date: Thu, 16 Jul 2020 17:04:21 -0700 Subject: [PATCH 0654/2522] Update MKL+Horovod test case --- .../tests/import-onednn-horovod.sh | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tensorflow/tools/dockerfiles/tests/import-onednn-horovod.sh b/tensorflow/tools/dockerfiles/tests/import-onednn-horovod.sh index 18b3b19d69d..9331b686cb1 100755 --- a/tensorflow/tools/dockerfiles/tests/import-onednn-horovod.sh +++ b/tensorflow/tools/dockerfiles/tests/import-onednn-horovod.sh @@ -15,12 +15,16 @@ # limitations under the License. # ============================================================================ -{ # try - echo `python -c 'from tensorflow.python import _pywrap_util_port; print(_pywrap_util_port.IsMklEnabled()); import horovod.tensorflow as hvd'` - echo "PASS: Horovod with MKL is enabled" -} || { # catch - echo `python -c 'from tensorflow.python import pywrap_tensorflow; print(pywrap_tensorflow.IsMklEnabled()); import horovod.tensorflow as hvd'` - echo "PASS: Horovod with Old MKL is detected" -} || { # finally - die "FAIL: Horovod with MKL is not enabled" -} +python -c 'from tensorflow.python import _pywrap_util_port; print(_pywrap_util_port.IsMklEnabled()); import horovod.tensorflow as hvd' +new_mkl_horovod_enabled=$? + +python -c 'from tensorflow.python import pywrap_tensorflow; print(pywrap_tensorflow.IsMklEnabled()); import horovod.tensorflow as hvd' +old_mkl_horovod_enabled=$? + +if [[ $new_mkl_horovod_enabled -eq 0 ]]; then + echo "PASS: Horovod with MKL is enabled" +elif [[ $old_mkl_horovod_enabled -eq 0]]; then + echo "PASS: Horovod with Old MKL is detected" +else + die "FAIL: Horovod with MKL is not enabled" +fi From 0151faf9556bd407de1ea83b4d7de96c33a1e21a Mon Sep 17 00:00:00 2001 From: Nat Jeffries Date: Thu, 16 Jul 2020 17:06:57 -0700 Subject: [PATCH 0655/2522] Create int8 person detection benchmark. PiperOrigin-RevId: 321677469 Change-Id: Id89a940311545c6fd94bc102553a9123507baeed --- tensorflow/lite/micro/benchmarks/BUILD | 18 +++++ tensorflow/lite/micro/benchmarks/Makefile.inc | 14 ++++ .../lite/micro/benchmarks/micro_benchmark.h | 3 +- ...person_detection_experimental_benchmark.cc | 75 +++++++++++++++++++ .../person_detection_test.cc | 12 +-- .../tools/make/third_party_downloads.inc | 4 +- 6 files changed, 115 insertions(+), 11 deletions(-) create mode 100644 tensorflow/lite/micro/benchmarks/person_detection_experimental_benchmark.cc diff --git a/tensorflow/lite/micro/benchmarks/BUILD b/tensorflow/lite/micro/benchmarks/BUILD index 7e140b5995d..5a8a88133e6 100644 --- a/tensorflow/lite/micro/benchmarks/BUILD +++ b/tensorflow/lite/micro/benchmarks/BUILD @@ -85,3 +85,21 @@ cc_binary( "//tensorflow/lite/schema:schema_fbs", ], ) + +cc_binary( + name = "person_detection_experimental_benchmark", + srcs = ["person_detection_experimental_benchmark.cc"], + deps = [ + ":micro_benchmark", + "//tensorflow/lite:version", + "//tensorflow/lite/c:common", + "//tensorflow/lite/micro:micro_error_reporter", + "//tensorflow/lite/micro:micro_framework", + "//tensorflow/lite/micro:micro_utils", + "//tensorflow/lite/micro:op_resolvers", + "//tensorflow/lite/micro/examples/person_detection_experimental:model_settings", + "//tensorflow/lite/micro/examples/person_detection_experimental:person_detect_model_data", + "//tensorflow/lite/micro/examples/person_detection_experimental:simple_images_test_data", + "//tensorflow/lite/schema:schema_fbs", + ], +) diff --git a/tensorflow/lite/micro/benchmarks/Makefile.inc b/tensorflow/lite/micro/benchmarks/Makefile.inc index 2a7eefd2596..4a57ef39d69 100644 --- a/tensorflow/lite/micro/benchmarks/Makefile.inc +++ b/tensorflow/lite/micro/benchmarks/Makefile.inc @@ -1,4 +1,6 @@ $(eval $(call add_third_party_download,$(PERSON_MODEL_URL),$(PERSON_MODEL_MD5),person_model_grayscale,)) +$(eval $(call add_third_party_download,$(PERSON_MODEL_INT8_URL),$(PERSON_MODEL_INT8_MD5),person_model_int8,)) + KEYWORD_BENCHMARK_SRCS := \ tensorflow/lite/micro/benchmarks/keyword_benchmark.cc \ @@ -16,9 +18,21 @@ $(MAKEFILE_DIR)/downloads/person_model_grayscale/person_image_data.cc PERSON_DETECTION_BENCHMARK_HDRS := \ tensorflow/lite/micro/examples/person_detection/person_detect_model_data.h +PERSON_DETECTION_EXPERIMENTAL_BENCHMARK_SRCS := \ +tensorflow/lite/micro/benchmarks/person_detection_experimental_benchmark.cc \ +$(MAKEFILE_DIR)/downloads/person_model_int8/no_person_image_data.cc \ +$(MAKEFILE_DIR)/downloads/person_model_int8/person_detect_model_data.cc \ +$(MAKEFILE_DIR)/downloads/person_model_int8/person_image_data.cc + +PERSON_DETECTION_EXPERIMENTAL_BENCHMARK_HDRS := \ +tensorflow/lite/micro/examples/person_detection_experimental/person_detect_model_data.h + # Builds a standalone binary. $(eval $(call microlite_test,keyword_benchmark,\ $(KEYWORD_BENCHMARK_SRCS),$(KEYWORD_BENCHMARK_HDRS))) $(eval $(call microlite_test,person_detection_benchmark,\ $(PERSON_DETECTION_BENCHMARK_SRCS),$(PERSON_DETECTION_BENCHMARK_HDRS))) + +$(eval $(call microlite_test,person_detection_experimental_benchmark,\ +$(PERSON_DETECTION_EXPERIMENTAL_BENCHMARK_SRCS),$(PERSON_DETECTION_EXPERIMENTAL_BENCHMARK_HDRS))) diff --git a/tensorflow/lite/micro/benchmarks/micro_benchmark.h b/tensorflow/lite/micro/benchmarks/micro_benchmark.h index 3c4bfb17dee..2c7390b9cd4 100644 --- a/tensorflow/lite/micro/benchmarks/micro_benchmark.h +++ b/tensorflow/lite/micro/benchmarks/micro_benchmark.h @@ -45,6 +45,8 @@ extern tflite::ErrorReporter* reporter; #define TF_LITE_MICRO_BENCHMARK(func) \ if (tflite::ticks_per_second() == 0) { \ + TF_LITE_REPORT_ERROR(micro_benchmark::reporter, \ + "no timer implementation found"); \ return 0; \ } \ start_ticks = tflite::GetCurrentTimeTicks(); \ @@ -95,7 +97,6 @@ class MicroBenchmarkRunner { } void SetInput(const inputT* custom_input) { - // Populate input tensor with an image with no person. TfLiteTensor* input = interpreter_.input(0); inputT* input_buffer = tflite::GetTensorData(input); int input_length = input->bytes / sizeof(inputT); diff --git a/tensorflow/lite/micro/benchmarks/person_detection_experimental_benchmark.cc b/tensorflow/lite/micro/benchmarks/person_detection_experimental_benchmark.cc new file mode 100644 index 00000000000..65412136bdc --- /dev/null +++ b/tensorflow/lite/micro/benchmarks/person_detection_experimental_benchmark.cc @@ -0,0 +1,75 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/micro/benchmarks/micro_benchmark.h" +#include "tensorflow/lite/micro/examples/person_detection_experimental/model_settings.h" +#include "tensorflow/lite/micro/examples/person_detection_experimental/no_person_image_data.h" +#include "tensorflow/lite/micro/examples/person_detection_experimental/person_detect_model_data.h" +#include "tensorflow/lite/micro/examples/person_detection_experimental/person_image_data.h" +#include "tensorflow/lite/micro/micro_error_reporter.h" +#include "tensorflow/lite/micro/micro_interpreter.h" +#include "tensorflow/lite/micro/micro_mutable_op_resolver.h" +#include "tensorflow/lite/micro/micro_utils.h" +#include "tensorflow/lite/schema/schema_generated.h" +#include "tensorflow/lite/version.h" + +/* + * Person Detection benchmark. Evaluates runtime performance of the visual + * wakewords person detection model. This is the same model found in + * exmaples/person_detection. + */ + +namespace { + +// Create an area of memory to use for input, output, and intermediate arrays. +// Align arena to 16 bytes to avoid alignment warnings on certain platforms. +constexpr int tensor_arena_size = 135 * 1024; +alignas(16) uint8_t tensor_arena[tensor_arena_size]; + +MicroBenchmarkRunner* runner; + +void InitializeBenchmarkRunner() { + // NOLINTNEXTLINE + static MicroBenchmarkRunner benchmark_runner( + g_person_detect_model_data, tensor_arena, tensor_arena_size); + runner = &benchmark_runner; + runner->SetInput(reinterpret_cast(g_person_data)); +} + +void PersonDetectionTenIerationsWithPerson() { + runner->SetInput(reinterpret_cast(g_person_data)); + for (int i = 0; i < 10; i++) { + runner->RunSingleIteration(); + } +} + +void PersonDetectionTenIerationsWithoutPerson() { + runner->SetInput(reinterpret_cast(g_no_person_data)); + for (int i = 0; i < 10; i++) { + runner->RunSingleIteration(); + } +} + +} // namespace + +TF_LITE_MICRO_BENCHMARKS_BEGIN + +TF_LITE_MICRO_BENCHMARK(InitializeBenchmarkRunner()); +TF_LITE_MICRO_BENCHMARK(runner->RunSingleIteration()); +TF_LITE_MICRO_BENCHMARK(PersonDetectionTenIerationsWithPerson()); +TF_LITE_MICRO_BENCHMARK(PersonDetectionTenIerationsWithoutPerson()); + +TF_LITE_MICRO_BENCHMARKS_END diff --git a/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc b/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc index 78ac037be94..6175a59ba52 100644 --- a/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc +++ b/tensorflow/lite/micro/examples/person_detection_experimental/person_detection_test.cc @@ -76,10 +76,8 @@ TF_LITE_MICRO_TEST(TestInvoke) { TF_LITE_MICRO_EXPECT_EQ(kTfLiteInt8, input->type); // Copy an image with a person into the memory area used for the input. - for (size_t i = 0; i < input->bytes; ++i) { - // Subtract 128 to convert between uint8 and int8. - input->data.int8[i] = g_person_data[i] - 128; - } + TFLITE_DCHECK_EQ(input->bytes, static_cast(g_person_data_size)); + memcpy(input->data.int8, g_person_data, input->bytes); // Run the model on this input and make sure it succeeds. TfLiteStatus invoke_status = interpreter.Invoke(); @@ -104,10 +102,8 @@ TF_LITE_MICRO_TEST(TestInvoke) { person_score, no_person_score); TF_LITE_MICRO_EXPECT_GT(person_score, no_person_score); - // Now test with a blank image. - for (size_t i = 0; i < input->bytes; ++i) { - input->data.int8[i] = 0; - } + // TODO(b/161461076): Update model to make this work on real negative inputs. + memset(input->data.int8, 0, input->bytes); // Run the model on this "No Person" input. invoke_status = interpreter.Invoke(); diff --git a/tensorflow/lite/micro/tools/make/third_party_downloads.inc b/tensorflow/lite/micro/tools/make/third_party_downloads.inc index 8590ace9fda..05fc08cc601 100644 --- a/tensorflow/lite/micro/tools/make/third_party_downloads.inc +++ b/tensorflow/lite/micro/tools/make/third_party_downloads.inc @@ -68,8 +68,8 @@ IMAGE_RECOGNITION_MODEL_MD5 := "1f4607b05ac45b8a6146fb883dbc2d7b" PERSON_MODEL_URL := "https://storage.googleapis.com/download.tensorflow.org/data/tf_lite_micro_person_data_grayscale_2020_05_27.zip" PERSON_MODEL_MD5 := "55b85f76e2995153e660391d4a209ef1" -PERSON_MODEL_INT8_URL := "https://storage.googleapis.com/download.tensorflow.org/data/tf_lite_micro_person_data_int8_grayscale_2020_05_27.zip" -PERSON_MODEL_INT8_MD5 := "a0ede2d058aa2a1d413893455dd55352" +PERSON_MODEL_INT8_URL := "https://storage.googleapis.com/download.tensorflow.org/data/tf_lite_micro_person_data_int8_grayscale_2020_06_23.zip" +PERSON_MODEL_INT8_MD5 := "9b5b6d4677dd0a91b1bb992d1c4c0417" EMBARC_MLI_URL := "https://github.com/foss-for-synopsys-dwc-arc-processors/embarc_mli/archive/58284867ca52d1f43b25045e8601999d7359d986.zip" EMBARC_MLI_MD5 := "2bf4982a327fdaa9d475803ce014d1ef" From eb6de2e830eccee393fe76ee4b71cf5221c66bd3 Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Thu, 16 Jul 2020 20:51:02 -0400 Subject: [PATCH 0656/2522] Set default distr-strategy as mirrored and add start_time --- tensorflow/python/keras/benchmarks/benchmark_util.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/benchmark_util.py b/tensorflow/python/keras/benchmarks/benchmark_util.py index 8fd7e1a2c58..7f5d51fbb3a 100644 --- a/tensorflow/python/keras/benchmarks/benchmark_util.py +++ b/tensorflow/python/keras/benchmarks/benchmark_util.py @@ -57,7 +57,7 @@ def measure_performance(model_fn, metrics=None, verbose=0, num_gpus=0, - distribution_strategy='off'): + distribution_strategy='mirrored'): """Run models and measure the performance. Arguments: @@ -81,7 +81,7 @@ def measure_performance(model_fn, num_gpus: Number of GPUs to run the model. distribution_strategy: Distribution strategies. It could be `multi_worker_mirrored`, `one_device`, `mirrored`. If unspecified, - `distribution_strategy` will default to 'off'. Note that, `TPU` + `distribution_strategy` will default to 'mirrored'. Note that, `TPU` and `parameter_server` are not supported yet. Returns: @@ -116,6 +116,7 @@ def measure_performance(model_fn, for _ in range(run_iters): timer = timeit.default_timer + start_time = timer() # Init the distribution strategy scope for each iteration. strategy_scope = distribution_util.get_strategy_scope(strategy) with strategy_scope: @@ -147,7 +148,7 @@ def measure_performance(model_fn, compile_time_list.append(compile_time) startup_time_list.append(cbk.startup_time) avg_epoch_time_list.append(np.mean(cbk.times)) - wall_time_list.append(end_time - t0) + wall_time_list.append(end_time - start_time) exp_per_sec_list.append(total_num_examples / (end_time - t2)) metrics = [] From 5fd4527e9a5091119d5c71a15ee4436a0b83f85b Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Thu, 16 Jul 2020 20:54:55 -0400 Subject: [PATCH 0657/2522] Remove size tag in BUILD and update examples --- tensorflow/python/keras/benchmarks/BUILD | 7 --- .../cifar10_cnn_benchmark_test.py | 46 +++++++++---------- .../mnist_conv_benchmark_test.py | 6 +-- .../mnist_irnn_benchmark_test.py | 2 +- 4 files changed, 27 insertions(+), 34 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/BUILD b/tensorflow/python/keras/benchmarks/BUILD index 448dc151376..da88f3ecafd 100644 --- a/tensorflow/python/keras/benchmarks/BUILD +++ b/tensorflow/python/keras/benchmarks/BUILD @@ -97,7 +97,6 @@ py_test( py_test( name = "text_classification_transformer_benchmark_test", - size = "medium", srcs = ["keras_examples_benchmarks/text_classification_transformer_benchmark_test.py"], python_version = "PY3", tags = [ @@ -112,7 +111,6 @@ py_test( py_test( name = "antirectifier_benchmark_test", - size = "medium", srcs = ["keras_examples_benchmarks/antirectifier_benchmark_test.py"], python_version = "PY3", tags = [ @@ -127,7 +125,6 @@ py_test( py_test( name = "mnist_conv_benchmark_test", - size = "medium", srcs = ["keras_examples_benchmarks/mnist_conv_benchmark_test.py"], python_version = "PY3", tags = [ @@ -143,7 +140,6 @@ py_test( py_test( name = "mnist_hierarchical_rnn_benchmark_test", - size = "medium", srcs = ["keras_examples_benchmarks/mnist_hierarchical_rnn_benchmark_test.py"], python_version = "PY3", tags = [ @@ -158,7 +154,6 @@ py_test( py_test( name = "mnist_irnn_benchmark_test", - size = "medium", srcs = ["keras_examples_benchmarks/mnist_irnn_benchmark_test.py"], python_version = "PY3", tags = [ @@ -173,7 +168,6 @@ py_test( py_test( name = "reuters_mlp_benchmark_test", - size = "medium", srcs = ["keras_examples_benchmarks/reuters_mlp_benchmark_test.py"], python_version = "PY3", tags = [ @@ -189,7 +183,6 @@ py_test( py_test( name = "cifar10_cnn_benchmark_test", - size = "medium", srcs = ["keras_examples_benchmarks/cifar10_cnn_benchmark_test.py"], python_version = "PY3", tags = [ diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py index f84f03fc3d8..9806307d7d3 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/cifar10_cnn_benchmark_test.py @@ -70,28 +70,6 @@ class Cifar10CNNBenchmark(tf.test.Benchmark): model.add(tf.keras.layers.Activation('softmax')) return model - def benchmark_cnn_cifar10_bs_1024(self): - """Measure performance with batch_size=1024 and run_iters=2.""" - batch_size = 1024 - run_iters = 2 - metrics, wall_time, extras = benchmark_util.measure_performance( - self._build_model, - x=self.x_train, - y=self.y_train, - batch_size=batch_size, - run_iters=run_iters, - epochs=self.epochs, - optimizer=tf.keras.optimizers.RMSprop( - learning_rate=0.0001, decay=1e-6), - loss='categorical_crossentropy', - metrics=['accuracy']) - - self.report_benchmark( - iters=run_iters, - wall_time=wall_time, - metrics=metrics, - extras=extras) - def benchmark_cnn_cifar10_bs_256(self): """Measure performance with batch_size=256 and run_iters=3.""" batch_size = 256 @@ -115,7 +93,7 @@ class Cifar10CNNBenchmark(tf.test.Benchmark): extras=extras) def benchmark_cnn_cifar10_bs_512(self): - """Measure performance with batch_size=512 and run_iters=4.""" + """Measure performance with batch_size=512 and run_iters=3.""" batch_size = 512 run_iters = 3 metrics, wall_time, extras = benchmark_util.measure_performance( @@ -136,6 +114,28 @@ class Cifar10CNNBenchmark(tf.test.Benchmark): metrics=metrics, extras=extras) + def benchmark_cnn_cifar10_bs_1024(self): + """Measure performance with batch_size=1024 and run_iters=2.""" + batch_size = 1024 + run_iters = 2 + metrics, wall_time, extras = benchmark_util.measure_performance( + self._build_model, + x=self.x_train, + y=self.y_train, + batch_size=batch_size, + run_iters=run_iters, + epochs=self.epochs, + optimizer=tf.keras.optimizers.RMSprop( + learning_rate=0.0001, decay=1e-6), + loss='categorical_crossentropy', + metrics=['accuracy']) + + self.report_benchmark( + iters=run_iters, + wall_time=wall_time, + metrics=metrics, + extras=extras) + if __name__ == '__main__': tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_conv_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_conv_benchmark_test.py index 52de9762273..d828e267523 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_conv_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_conv_benchmark_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Benchmarks on convnet on MNIST dataset.""" +"""Benchmarks on Convnet on MNIST dataset.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -25,7 +25,7 @@ from tensorflow.python.keras.benchmarks import benchmark_util class ConvMnistBenchmark(tf.test.Benchmark): - """Benchmarks for convnet using `tf.test.Benchmark`.""" + """Benchmarks for Convnet using `tf.test.Benchmark`.""" # Required Arguments for measure_performance. # x: Input data, it could be Numpy or load from tfds. # y: Target data. If `x` is a dataset, generator instance, @@ -109,7 +109,7 @@ class ConvMnistBenchmark(tf.test.Benchmark): extras=extras) def benchmark_conv_mnist_bs_512(self): - """Measure performance with batch_size=512 and run_iters=4.""" + """Measure performance with batch_size=512 and run_iters=3.""" batch_size = 512 run_iters = 3 metrics, wall_time, extras = benchmark_util.measure_performance( diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py index 8e480ed1d17..8b8d838bddc 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/mnist_irnn_benchmark_test.py @@ -23,7 +23,7 @@ from tensorflow.python.keras.benchmarks import benchmark_util class IRNNMnistBenchmark(tf.test.Benchmark): - """Benchmarks for Hierarchical RNN using `tf.test.Benchmark`.""" + """Benchmarks for IRNN using `tf.test.Benchmark`.""" # Required Arguments for measure_performance. # x: Input data, it could be Numpy or load from tfds. # y: Target data. If `x` is a dataset, generator instance, From 30cbb773e39528651177012808077d6e0e19d9dd Mon Sep 17 00:00:00 2001 From: Yujing Zhang Date: Thu, 16 Jul 2020 18:11:00 -0700 Subject: [PATCH 0658/2522] Serialize the dtype and shape of a remote resource variable when the source device is not on the same task of the target device. PiperOrigin-RevId: 321686965 Change-Id: Ie54e9d948e840dbdd88102f56ef382443f513d93 --- tensorflow/c/eager/c_api_distributed_test.cc | 9 ++++++--- .../core/distributed_runtime/eager/remote_copy_node.cc | 5 ++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/tensorflow/c/eager/c_api_distributed_test.cc b/tensorflow/c/eager/c_api_distributed_test.cc index a6547e23454..414adceaeb3 100644 --- a/tensorflow/c/eager/c_api_distributed_test.cc +++ b/tensorflow/c/eager/c_api_distributed_test.cc @@ -174,9 +174,9 @@ void TestFunctionWithPackedInput(const bool remote) { const char task2_name[] = "/job:localhost/replica:0/task:2/device:CPU:0"; // Create one variable per task. - TFE_TensorHandle* h0 = TestVariable(ctx, 1.0, task0_name); - TFE_TensorHandle* h1 = TestVariable(ctx, 2.0, task1_name); - TFE_TensorHandle* h2 = TestVariable(ctx, 3.0, task2_name); + TFE_TensorHandle* h0 = TestVariable(ctx, 1.0, task1_name); + TFE_TensorHandle* h1 = TestVariable(ctx, 2.0, task2_name); + TFE_TensorHandle* h2 = TestVariable(ctx, 3.0, task0_name); // Add a sync point in order to make sure that variables have been initialized // before the function execution starts. @@ -185,6 +185,9 @@ void TestFunctionWithPackedInput(const bool remote) { VarIsInitialized(ctx, h2); // Pack 3 variable handles into one TFE_TensorHandle. + // When remote is false, function device is placed on task0. Handle types are + // REMOTE, REMOTE, LOCAL on task0. When remote is true, function device is + // placed on task1, Handle types are LOCAL, REMOTE, LOCAL on task1. int num_replicas = 3; std::vector handles = {h0, h1, h2}; TFE_TensorHandle* packed_handle = diff --git a/tensorflow/core/distributed_runtime/eager/remote_copy_node.cc b/tensorflow/core/distributed_runtime/eager/remote_copy_node.cc index a5eeed6a0b6..d4b5fe38964 100644 --- a/tensorflow/core/distributed_runtime/eager/remote_copy_node.cc +++ b/tensorflow/core/distributed_runtime/eager/remote_copy_node.cc @@ -314,10 +314,13 @@ Status SerializePackedHandle(const uint64 op_id, TensorHandle* packed_handle, } else if (h->Type() == TensorHandle::REMOTE) { // Only serialize the resource dtype and shape of the first handle, since // all handles are of the same resource dtype and shape. + // If src_device is on the same task of target_device, the handle is a + // local handle on the target device, which means the resource dtype and + // shape are known on the target device. Device* src_device = absl::get(h->device()); const bool serialize_resource_dtype_and_shape = (i == 0) && (h->dtype == DT_RESOURCE) && - (ctx->OnSameTask(src_device, target_device)); + (!ctx->OnSameTask(src_device, target_device)); TF_RETURN_IF_ERROR(ctx->RemoteMgr()->SerializeRemoteTensorHandle( h, /*wait_until_ready=*/false, op->add_handles()->mutable_remote_handle(), src_device, From 90da05cd1c07b0c84e102944a9a634127ecdc52b Mon Sep 17 00:00:00 2001 From: Peng Wang Date: Thu, 16 Jul 2020 18:14:24 -0700 Subject: [PATCH 0659/2522] [TF-numpy] Exports `np.newaxis`. PiperOrigin-RevId: 321687455 Change-Id: I47a9f566b9a961368cfb4f076674fc0b94a6e140 --- tensorflow/python/ops/numpy_ops/np_array_ops.py | 4 ++++ .../py/numpy/tf_numpy_api/tensorflow.experimental.numpy.pbtxt | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/tensorflow/python/ops/numpy_ops/np_array_ops.py b/tensorflow/python/ops/numpy_ops/np_array_ops.py index 2b23dbbab4b..d5b537e5a9f 100644 --- a/tensorflow/python/ops/numpy_ops/np_array_ops.py +++ b/tensorflow/python/ops/numpy_ops/np_array_ops.py @@ -37,10 +37,14 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import sort_ops from tensorflow.python.ops.numpy_ops import np_arrays from tensorflow.python.ops.numpy_ops import np_dtypes +from tensorflow.python.ops.numpy_ops import np_export from tensorflow.python.ops.numpy_ops import np_utils from tensorflow.python.util import nest +newaxis = np_export.np_export_constant(__name__, 'newaxis', np.newaxis) + + @np_utils.np_doc('empty') def empty(shape, dtype=float): # pylint: disable=redefined-outer-name return zeros(shape, dtype) diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.pbtxt index cb1e28ea1db..f5ffcf9e244 100644 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.pbtxt +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.pbtxt @@ -72,6 +72,10 @@ tf_module { name: "ndarray" mtype: "" } + member { + name: "newaxis" + mtype: "" + } member { name: "object_" mtype: "" From 49d86d14d5714184657d87cfb5aff62457617364 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Thu, 16 Jul 2020 18:28:25 -0700 Subject: [PATCH 0660/2522] Special-case user-registered symbolic types in KerasTensors as a workaround to prevent TensorFlow Probability from breaking before they finish transitioning to KerasTensors. Not adding tests on the keras side because the type registration api is private and is used only by TFP, but this was tested against TFP tests w/ KerasTensors enabled. PiperOrigin-RevId: 321689138 Change-Id: I5bf823822b721d921b25a481c85c2d503fd55d45 --- .../python/keras/engine/keras_tensor.py | 49 ++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/keras/engine/keras_tensor.py b/tensorflow/python/keras/engine/keras_tensor.py index b31e71e1fa8..8ab6f674243 100644 --- a/tensorflow/python/keras/engine/keras_tensor.py +++ b/tensorflow/python/keras/engine/keras_tensor.py @@ -246,6 +246,9 @@ class _KerasTensorIterator(object): def keras_tensor_to_placeholder(x): """Construct a graph placeholder to represent a KerasTensor when tracing.""" + if hasattr(x, '_user_registered_symbolic_object'): + return x._user_registered_symbolic_object # pylint: disable=protected-access + if isinstance(x, KerasTensor): spec = x.type_spec @@ -292,11 +295,52 @@ def keras_tensor_to_placeholder(x): return x +class UserRegisteredSpec(type_spec_module.TypeSpec): + """TypeSpec to represent user-registered symbolic objects.""" + + def __init__(self, shape, dtype): + self.shape = shape + self._dtype = dtype + self.dtype = dtype + + def _component_specs(self): + raise NotImplementedError + + def _from_components(self, components): + raise NotImplementedError + + def _serialize(self): + raise NotImplementedError + + def _to_components(self, value): + raise NotImplementedError + + def value_type(self): + raise NotImplementedError + + def keras_tensor_from_tensor(x): """Convert a traced (composite)tensor to a representative KerasTensor.""" name = getattr(x, 'name', None) inferred_shape_value = None - type_spec = type_spec_module.type_spec_from_value(x) + + # TODO(b/161487382): + # Special-case user-registered symbolic objects (registered by the + # private `register_symbolic_tensor_type` method) by passing them between + # scratch graphs directly. + # This is needed to not break Tensorflow probability + # while they finish migrating to composite tensors. + user_registered_symbolic = False + try: + from tensorflow.python.keras.utils import tf_utils # pylint: disable=g-import-not-at-top to prevent circular imports + if isinstance(x, tuple(tf_utils._user_convertible_tensor_types)): # pylint: disable=protected-access + user_registered_symbolic = True + except ImportError: + pass + if user_registered_symbolic: + type_spec = UserRegisteredSpec(x.shape, x.dtype) + else: + type_spec = type_spec_module.type_spec_from_value(x) if (isinstance(type_spec, tensor_spec.TensorSpec) and type_spec.dtype == dtypes.int32 @@ -325,6 +369,9 @@ def keras_tensor_from_tensor(x): out = KerasTensor(type_spec, inferred_shape_value=inferred_shape_value, name=name) + if user_registered_symbolic: + out._user_registered_symbolic_object = x # pylint: disable=protected-access + if hasattr(x, '_keras_mask'): out._keras_mask = KerasTensor( # pylint: disable=protected-access type_spec_module.type_spec_from_value(x._keras_mask)) # pylint: disable=protected-access From 0a80c623e847b1772454d0921b69915ddad8e579 Mon Sep 17 00:00:00 2001 From: Peng Wang Date: Thu, 16 Jul 2020 18:50:00 -0700 Subject: [PATCH 0661/2522] Adds all directly-imported numpy classes to the do-not-descend map for `tf.experimental.numpy`, to prevent future OSS breakages. PiperOrigin-RevId: 321692135 Change-Id: I0b55034776a09c41757d476fab019d0d9b338e66 --- .../tools/api/tests/api_compatibility_test.py | 9 +- .../tensorflow.experimental.numpy.bool_.pbtxt | 222 ----------------- ...orflow.experimental.numpy.complex128.pbtxt | 222 ----------------- ...sorflow.experimental.numpy.complex64.pbtxt | 222 ----------------- ...nsorflow.experimental.numpy.complex_.pbtxt | 222 ----------------- ...ensorflow.experimental.numpy.float16.pbtxt | 222 ----------------- ...ensorflow.experimental.numpy.float32.pbtxt | 222 ----------------- ...ensorflow.experimental.numpy.float64.pbtxt | 234 ------------------ ...tensorflow.experimental.numpy.float_.pbtxt | 234 ------------------ ...ensorflow.experimental.numpy.inexact.pbtxt | 222 ----------------- .../tensorflow.experimental.numpy.int16.pbtxt | 230 ----------------- .../tensorflow.experimental.numpy.int32.pbtxt | 230 ----------------- .../tensorflow.experimental.numpy.int64.pbtxt | 230 ----------------- .../tensorflow.experimental.numpy.int8.pbtxt | 230 ----------------- .../tensorflow.experimental.numpy.int_.pbtxt | 230 ----------------- ...ensorflow.experimental.numpy.object_.pbtxt | 222 ----------------- ...tensorflow.experimental.numpy.uint16.pbtxt | 230 ----------------- ...tensorflow.experimental.numpy.uint32.pbtxt | 230 ----------------- ...tensorflow.experimental.numpy.uint64.pbtxt | 230 ----------------- .../tensorflow.experimental.numpy.uint8.pbtxt | 230 ----------------- 20 files changed, 6 insertions(+), 4317 deletions(-) delete mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.bool_.pbtxt delete mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex128.pbtxt delete mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex64.pbtxt delete mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex_.pbtxt delete mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float16.pbtxt delete mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float32.pbtxt delete mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float64.pbtxt delete mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float_.pbtxt delete mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.inexact.pbtxt delete mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int16.pbtxt delete mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int32.pbtxt delete mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int64.pbtxt delete mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int8.pbtxt delete mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int_.pbtxt delete mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.object_.pbtxt delete mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint16.pbtxt delete mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint32.pbtxt delete mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint64.pbtxt delete mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint8.pbtxt diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py index f6d1ba4d8c2..cdc955f7dac 100644 --- a/tensorflow/tools/api/tests/api_compatibility_test.py +++ b/tensorflow/tools/api/tests/api_compatibility_test.py @@ -362,10 +362,13 @@ class ApiCompatibilityTest(test.TestCase): public_api_visitor.private_map['tf'].append('enable_v2_behavior') public_api_visitor.do_not_descend_map['tf.GPUOptions'] = ['Experimental'] - # Do not descend into these classes because their signatures are different - # between internal and OSS. + # Do not descend into these numpy classes because their signatures may be + # different between internal and OSS. public_api_visitor.do_not_descend_map['tf.experimental.numpy'] = [ - 'iinfo', 'string_', 'unicode_'] + 'bool_', 'complex_', 'complex128', 'complex64', 'float_', 'float16', + 'float32', 'float64', 'inexact', 'int_', 'int16', 'int32', 'int64', + 'int8', 'object_', 'string_', 'uint16', 'uint32', 'uint64', 'uint8', + 'unicode_', 'iinfo'] if FLAGS.only_test_core_api: public_api_visitor.do_not_descend_map['tf'].extend(_NON_CORE_PACKAGES) if additional_private_map: diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.bool_.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.bool_.pbtxt deleted file mode 100644 index cfca8fc9ab8..00000000000 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.bool_.pbtxt +++ /dev/null @@ -1,222 +0,0 @@ -path: "tensorflow.experimental.numpy.bool_" -tf_class { - is_instance: "" - member { - name: "T" - mtype: "" - } - member { - name: "base" - mtype: "" - } - member { - name: "data" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "flags" - mtype: "" - } - member { - name: "flat" - mtype: "" - } - member { - name: "imag" - mtype: "" - } - member { - name: "itemsize" - mtype: "" - } - member { - name: "nbytes" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "real" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member { - name: "strides" - mtype: "" - } - member_method { - name: "__init__" - } - member_method { - name: "all" - } - member_method { - name: "any" - } - member_method { - name: "argmax" - } - member_method { - name: "argmin" - } - member_method { - name: "argsort" - } - member_method { - name: "astype" - } - member_method { - name: "byteswap" - } - member_method { - name: "choose" - } - member_method { - name: "clip" - } - member_method { - name: "compress" - } - member_method { - name: "conj" - } - member_method { - name: "conjugate" - } - member_method { - name: "copy" - } - member_method { - name: "cumprod" - } - member_method { - name: "cumsum" - } - member_method { - name: "diagonal" - } - member_method { - name: "dump" - } - member_method { - name: "dumps" - } - member_method { - name: "fill" - } - member_method { - name: "flatten" - } - member_method { - name: "getfield" - } - member_method { - name: "item" - } - member_method { - name: "itemset" - } - member_method { - name: "max" - } - member_method { - name: "mean" - } - member_method { - name: "min" - } - member_method { - name: "newbyteorder" - } - member_method { - name: "nonzero" - } - member_method { - name: "prod" - } - member_method { - name: "ptp" - } - member_method { - name: "put" - } - member_method { - name: "ravel" - } - member_method { - name: "repeat" - } - member_method { - name: "reshape" - } - member_method { - name: "resize" - } - member_method { - name: "round" - } - member_method { - name: "searchsorted" - } - member_method { - name: "setfield" - } - member_method { - name: "setflags" - } - member_method { - name: "sort" - } - member_method { - name: "squeeze" - } - member_method { - name: "std" - } - member_method { - name: "sum" - } - member_method { - name: "swapaxes" - } - member_method { - name: "take" - } - member_method { - name: "tobytes" - } - member_method { - name: "tofile" - } - member_method { - name: "tolist" - } - member_method { - name: "tostring" - } - member_method { - name: "trace" - } - member_method { - name: "transpose" - } - member_method { - name: "var" - } - member_method { - name: "view" - } -} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex128.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex128.pbtxt deleted file mode 100644 index 65ba7b24e02..00000000000 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex128.pbtxt +++ /dev/null @@ -1,222 +0,0 @@ -path: "tensorflow.experimental.numpy.complex128" -tf_class { - is_instance: "" - member { - name: "T" - mtype: "" - } - member { - name: "base" - mtype: "" - } - member { - name: "data" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "flags" - mtype: "" - } - member { - name: "flat" - mtype: "" - } - member { - name: "imag" - mtype: "" - } - member { - name: "itemsize" - mtype: "" - } - member { - name: "nbytes" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "real" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member { - name: "strides" - mtype: "" - } - member_method { - name: "__init__" - } - member_method { - name: "all" - } - member_method { - name: "any" - } - member_method { - name: "argmax" - } - member_method { - name: "argmin" - } - member_method { - name: "argsort" - } - member_method { - name: "astype" - } - member_method { - name: "byteswap" - } - member_method { - name: "choose" - } - member_method { - name: "clip" - } - member_method { - name: "compress" - } - member_method { - name: "conj" - } - member_method { - name: "conjugate" - } - member_method { - name: "copy" - } - member_method { - name: "cumprod" - } - member_method { - name: "cumsum" - } - member_method { - name: "diagonal" - } - member_method { - name: "dump" - } - member_method { - name: "dumps" - } - member_method { - name: "fill" - } - member_method { - name: "flatten" - } - member_method { - name: "getfield" - } - member_method { - name: "item" - } - member_method { - name: "itemset" - } - member_method { - name: "max" - } - member_method { - name: "mean" - } - member_method { - name: "min" - } - member_method { - name: "newbyteorder" - } - member_method { - name: "nonzero" - } - member_method { - name: "prod" - } - member_method { - name: "ptp" - } - member_method { - name: "put" - } - member_method { - name: "ravel" - } - member_method { - name: "repeat" - } - member_method { - name: "reshape" - } - member_method { - name: "resize" - } - member_method { - name: "round" - } - member_method { - name: "searchsorted" - } - member_method { - name: "setfield" - } - member_method { - name: "setflags" - } - member_method { - name: "sort" - } - member_method { - name: "squeeze" - } - member_method { - name: "std" - } - member_method { - name: "sum" - } - member_method { - name: "swapaxes" - } - member_method { - name: "take" - } - member_method { - name: "tobytes" - } - member_method { - name: "tofile" - } - member_method { - name: "tolist" - } - member_method { - name: "tostring" - } - member_method { - name: "trace" - } - member_method { - name: "transpose" - } - member_method { - name: "var" - } - member_method { - name: "view" - } -} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex64.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex64.pbtxt deleted file mode 100644 index f685e1a218c..00000000000 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex64.pbtxt +++ /dev/null @@ -1,222 +0,0 @@ -path: "tensorflow.experimental.numpy.complex64" -tf_class { - is_instance: "" - member { - name: "T" - mtype: "" - } - member { - name: "base" - mtype: "" - } - member { - name: "data" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "flags" - mtype: "" - } - member { - name: "flat" - mtype: "" - } - member { - name: "imag" - mtype: "" - } - member { - name: "itemsize" - mtype: "" - } - member { - name: "nbytes" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "real" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member { - name: "strides" - mtype: "" - } - member_method { - name: "__init__" - } - member_method { - name: "all" - } - member_method { - name: "any" - } - member_method { - name: "argmax" - } - member_method { - name: "argmin" - } - member_method { - name: "argsort" - } - member_method { - name: "astype" - } - member_method { - name: "byteswap" - } - member_method { - name: "choose" - } - member_method { - name: "clip" - } - member_method { - name: "compress" - } - member_method { - name: "conj" - } - member_method { - name: "conjugate" - } - member_method { - name: "copy" - } - member_method { - name: "cumprod" - } - member_method { - name: "cumsum" - } - member_method { - name: "diagonal" - } - member_method { - name: "dump" - } - member_method { - name: "dumps" - } - member_method { - name: "fill" - } - member_method { - name: "flatten" - } - member_method { - name: "getfield" - } - member_method { - name: "item" - } - member_method { - name: "itemset" - } - member_method { - name: "max" - } - member_method { - name: "mean" - } - member_method { - name: "min" - } - member_method { - name: "newbyteorder" - } - member_method { - name: "nonzero" - } - member_method { - name: "prod" - } - member_method { - name: "ptp" - } - member_method { - name: "put" - } - member_method { - name: "ravel" - } - member_method { - name: "repeat" - } - member_method { - name: "reshape" - } - member_method { - name: "resize" - } - member_method { - name: "round" - } - member_method { - name: "searchsorted" - } - member_method { - name: "setfield" - } - member_method { - name: "setflags" - } - member_method { - name: "sort" - } - member_method { - name: "squeeze" - } - member_method { - name: "std" - } - member_method { - name: "sum" - } - member_method { - name: "swapaxes" - } - member_method { - name: "take" - } - member_method { - name: "tobytes" - } - member_method { - name: "tofile" - } - member_method { - name: "tolist" - } - member_method { - name: "tostring" - } - member_method { - name: "trace" - } - member_method { - name: "transpose" - } - member_method { - name: "var" - } - member_method { - name: "view" - } -} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex_.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex_.pbtxt deleted file mode 100644 index 99b2b699b45..00000000000 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex_.pbtxt +++ /dev/null @@ -1,222 +0,0 @@ -path: "tensorflow.experimental.numpy.complex_" -tf_class { - is_instance: "" - member { - name: "T" - mtype: "" - } - member { - name: "base" - mtype: "" - } - member { - name: "data" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "flags" - mtype: "" - } - member { - name: "flat" - mtype: "" - } - member { - name: "imag" - mtype: "" - } - member { - name: "itemsize" - mtype: "" - } - member { - name: "nbytes" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "real" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member { - name: "strides" - mtype: "" - } - member_method { - name: "__init__" - } - member_method { - name: "all" - } - member_method { - name: "any" - } - member_method { - name: "argmax" - } - member_method { - name: "argmin" - } - member_method { - name: "argsort" - } - member_method { - name: "astype" - } - member_method { - name: "byteswap" - } - member_method { - name: "choose" - } - member_method { - name: "clip" - } - member_method { - name: "compress" - } - member_method { - name: "conj" - } - member_method { - name: "conjugate" - } - member_method { - name: "copy" - } - member_method { - name: "cumprod" - } - member_method { - name: "cumsum" - } - member_method { - name: "diagonal" - } - member_method { - name: "dump" - } - member_method { - name: "dumps" - } - member_method { - name: "fill" - } - member_method { - name: "flatten" - } - member_method { - name: "getfield" - } - member_method { - name: "item" - } - member_method { - name: "itemset" - } - member_method { - name: "max" - } - member_method { - name: "mean" - } - member_method { - name: "min" - } - member_method { - name: "newbyteorder" - } - member_method { - name: "nonzero" - } - member_method { - name: "prod" - } - member_method { - name: "ptp" - } - member_method { - name: "put" - } - member_method { - name: "ravel" - } - member_method { - name: "repeat" - } - member_method { - name: "reshape" - } - member_method { - name: "resize" - } - member_method { - name: "round" - } - member_method { - name: "searchsorted" - } - member_method { - name: "setfield" - } - member_method { - name: "setflags" - } - member_method { - name: "sort" - } - member_method { - name: "squeeze" - } - member_method { - name: "std" - } - member_method { - name: "sum" - } - member_method { - name: "swapaxes" - } - member_method { - name: "take" - } - member_method { - name: "tobytes" - } - member_method { - name: "tofile" - } - member_method { - name: "tolist" - } - member_method { - name: "tostring" - } - member_method { - name: "trace" - } - member_method { - name: "transpose" - } - member_method { - name: "var" - } - member_method { - name: "view" - } -} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float16.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float16.pbtxt deleted file mode 100644 index 92075608841..00000000000 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float16.pbtxt +++ /dev/null @@ -1,222 +0,0 @@ -path: "tensorflow.experimental.numpy.float16" -tf_class { - is_instance: "" - member { - name: "T" - mtype: "" - } - member { - name: "base" - mtype: "" - } - member { - name: "data" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "flags" - mtype: "" - } - member { - name: "flat" - mtype: "" - } - member { - name: "imag" - mtype: "" - } - member { - name: "itemsize" - mtype: "" - } - member { - name: "nbytes" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "real" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member { - name: "strides" - mtype: "" - } - member_method { - name: "__init__" - } - member_method { - name: "all" - } - member_method { - name: "any" - } - member_method { - name: "argmax" - } - member_method { - name: "argmin" - } - member_method { - name: "argsort" - } - member_method { - name: "astype" - } - member_method { - name: "byteswap" - } - member_method { - name: "choose" - } - member_method { - name: "clip" - } - member_method { - name: "compress" - } - member_method { - name: "conj" - } - member_method { - name: "conjugate" - } - member_method { - name: "copy" - } - member_method { - name: "cumprod" - } - member_method { - name: "cumsum" - } - member_method { - name: "diagonal" - } - member_method { - name: "dump" - } - member_method { - name: "dumps" - } - member_method { - name: "fill" - } - member_method { - name: "flatten" - } - member_method { - name: "getfield" - } - member_method { - name: "item" - } - member_method { - name: "itemset" - } - member_method { - name: "max" - } - member_method { - name: "mean" - } - member_method { - name: "min" - } - member_method { - name: "newbyteorder" - } - member_method { - name: "nonzero" - } - member_method { - name: "prod" - } - member_method { - name: "ptp" - } - member_method { - name: "put" - } - member_method { - name: "ravel" - } - member_method { - name: "repeat" - } - member_method { - name: "reshape" - } - member_method { - name: "resize" - } - member_method { - name: "round" - } - member_method { - name: "searchsorted" - } - member_method { - name: "setfield" - } - member_method { - name: "setflags" - } - member_method { - name: "sort" - } - member_method { - name: "squeeze" - } - member_method { - name: "std" - } - member_method { - name: "sum" - } - member_method { - name: "swapaxes" - } - member_method { - name: "take" - } - member_method { - name: "tobytes" - } - member_method { - name: "tofile" - } - member_method { - name: "tolist" - } - member_method { - name: "tostring" - } - member_method { - name: "trace" - } - member_method { - name: "transpose" - } - member_method { - name: "var" - } - member_method { - name: "view" - } -} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float32.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float32.pbtxt deleted file mode 100644 index 8c6c2a7eeac..00000000000 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float32.pbtxt +++ /dev/null @@ -1,222 +0,0 @@ -path: "tensorflow.experimental.numpy.float32" -tf_class { - is_instance: "" - member { - name: "T" - mtype: "" - } - member { - name: "base" - mtype: "" - } - member { - name: "data" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "flags" - mtype: "" - } - member { - name: "flat" - mtype: "" - } - member { - name: "imag" - mtype: "" - } - member { - name: "itemsize" - mtype: "" - } - member { - name: "nbytes" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "real" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member { - name: "strides" - mtype: "" - } - member_method { - name: "__init__" - } - member_method { - name: "all" - } - member_method { - name: "any" - } - member_method { - name: "argmax" - } - member_method { - name: "argmin" - } - member_method { - name: "argsort" - } - member_method { - name: "astype" - } - member_method { - name: "byteswap" - } - member_method { - name: "choose" - } - member_method { - name: "clip" - } - member_method { - name: "compress" - } - member_method { - name: "conj" - } - member_method { - name: "conjugate" - } - member_method { - name: "copy" - } - member_method { - name: "cumprod" - } - member_method { - name: "cumsum" - } - member_method { - name: "diagonal" - } - member_method { - name: "dump" - } - member_method { - name: "dumps" - } - member_method { - name: "fill" - } - member_method { - name: "flatten" - } - member_method { - name: "getfield" - } - member_method { - name: "item" - } - member_method { - name: "itemset" - } - member_method { - name: "max" - } - member_method { - name: "mean" - } - member_method { - name: "min" - } - member_method { - name: "newbyteorder" - } - member_method { - name: "nonzero" - } - member_method { - name: "prod" - } - member_method { - name: "ptp" - } - member_method { - name: "put" - } - member_method { - name: "ravel" - } - member_method { - name: "repeat" - } - member_method { - name: "reshape" - } - member_method { - name: "resize" - } - member_method { - name: "round" - } - member_method { - name: "searchsorted" - } - member_method { - name: "setfield" - } - member_method { - name: "setflags" - } - member_method { - name: "sort" - } - member_method { - name: "squeeze" - } - member_method { - name: "std" - } - member_method { - name: "sum" - } - member_method { - name: "swapaxes" - } - member_method { - name: "take" - } - member_method { - name: "tobytes" - } - member_method { - name: "tofile" - } - member_method { - name: "tolist" - } - member_method { - name: "tostring" - } - member_method { - name: "trace" - } - member_method { - name: "transpose" - } - member_method { - name: "var" - } - member_method { - name: "view" - } -} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float64.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float64.pbtxt deleted file mode 100644 index 44e6fa28648..00000000000 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float64.pbtxt +++ /dev/null @@ -1,234 +0,0 @@ -path: "tensorflow.experimental.numpy.float64" -tf_class { - is_instance: "" - member { - name: "T" - mtype: "" - } - member { - name: "base" - mtype: "" - } - member { - name: "data" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "flags" - mtype: "" - } - member { - name: "flat" - mtype: "" - } - member { - name: "imag" - mtype: "" - } - member { - name: "itemsize" - mtype: "" - } - member { - name: "nbytes" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "real" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member { - name: "strides" - mtype: "" - } - member_method { - name: "__init__" - } - member_method { - name: "all" - } - member_method { - name: "any" - } - member_method { - name: "argmax" - } - member_method { - name: "argmin" - } - member_method { - name: "argsort" - } - member_method { - name: "as_integer_ratio" - } - member_method { - name: "astype" - } - member_method { - name: "byteswap" - } - member_method { - name: "choose" - } - member_method { - name: "clip" - } - member_method { - name: "compress" - } - member_method { - name: "conj" - } - member_method { - name: "conjugate" - } - member_method { - name: "copy" - } - member_method { - name: "cumprod" - } - member_method { - name: "cumsum" - } - member_method { - name: "diagonal" - } - member_method { - name: "dump" - } - member_method { - name: "dumps" - } - member_method { - name: "fill" - } - member_method { - name: "flatten" - } - member_method { - name: "fromhex" - } - member_method { - name: "getfield" - } - member_method { - name: "hex" - } - member_method { - name: "is_integer" - } - member_method { - name: "item" - } - member_method { - name: "itemset" - } - member_method { - name: "max" - } - member_method { - name: "mean" - } - member_method { - name: "min" - } - member_method { - name: "newbyteorder" - } - member_method { - name: "nonzero" - } - member_method { - name: "prod" - } - member_method { - name: "ptp" - } - member_method { - name: "put" - } - member_method { - name: "ravel" - } - member_method { - name: "repeat" - } - member_method { - name: "reshape" - } - member_method { - name: "resize" - } - member_method { - name: "round" - } - member_method { - name: "searchsorted" - } - member_method { - name: "setfield" - } - member_method { - name: "setflags" - } - member_method { - name: "sort" - } - member_method { - name: "squeeze" - } - member_method { - name: "std" - } - member_method { - name: "sum" - } - member_method { - name: "swapaxes" - } - member_method { - name: "take" - } - member_method { - name: "tobytes" - } - member_method { - name: "tofile" - } - member_method { - name: "tolist" - } - member_method { - name: "tostring" - } - member_method { - name: "trace" - } - member_method { - name: "transpose" - } - member_method { - name: "var" - } - member_method { - name: "view" - } -} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float_.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float_.pbtxt deleted file mode 100644 index 781e79962ea..00000000000 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float_.pbtxt +++ /dev/null @@ -1,234 +0,0 @@ -path: "tensorflow.experimental.numpy.float_" -tf_class { - is_instance: "" - member { - name: "T" - mtype: "" - } - member { - name: "base" - mtype: "" - } - member { - name: "data" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "flags" - mtype: "" - } - member { - name: "flat" - mtype: "" - } - member { - name: "imag" - mtype: "" - } - member { - name: "itemsize" - mtype: "" - } - member { - name: "nbytes" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "real" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member { - name: "strides" - mtype: "" - } - member_method { - name: "__init__" - } - member_method { - name: "all" - } - member_method { - name: "any" - } - member_method { - name: "argmax" - } - member_method { - name: "argmin" - } - member_method { - name: "argsort" - } - member_method { - name: "as_integer_ratio" - } - member_method { - name: "astype" - } - member_method { - name: "byteswap" - } - member_method { - name: "choose" - } - member_method { - name: "clip" - } - member_method { - name: "compress" - } - member_method { - name: "conj" - } - member_method { - name: "conjugate" - } - member_method { - name: "copy" - } - member_method { - name: "cumprod" - } - member_method { - name: "cumsum" - } - member_method { - name: "diagonal" - } - member_method { - name: "dump" - } - member_method { - name: "dumps" - } - member_method { - name: "fill" - } - member_method { - name: "flatten" - } - member_method { - name: "fromhex" - } - member_method { - name: "getfield" - } - member_method { - name: "hex" - } - member_method { - name: "is_integer" - } - member_method { - name: "item" - } - member_method { - name: "itemset" - } - member_method { - name: "max" - } - member_method { - name: "mean" - } - member_method { - name: "min" - } - member_method { - name: "newbyteorder" - } - member_method { - name: "nonzero" - } - member_method { - name: "prod" - } - member_method { - name: "ptp" - } - member_method { - name: "put" - } - member_method { - name: "ravel" - } - member_method { - name: "repeat" - } - member_method { - name: "reshape" - } - member_method { - name: "resize" - } - member_method { - name: "round" - } - member_method { - name: "searchsorted" - } - member_method { - name: "setfield" - } - member_method { - name: "setflags" - } - member_method { - name: "sort" - } - member_method { - name: "squeeze" - } - member_method { - name: "std" - } - member_method { - name: "sum" - } - member_method { - name: "swapaxes" - } - member_method { - name: "take" - } - member_method { - name: "tobytes" - } - member_method { - name: "tofile" - } - member_method { - name: "tolist" - } - member_method { - name: "tostring" - } - member_method { - name: "trace" - } - member_method { - name: "transpose" - } - member_method { - name: "var" - } - member_method { - name: "view" - } -} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.inexact.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.inexact.pbtxt deleted file mode 100644 index 3aa95703b61..00000000000 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.inexact.pbtxt +++ /dev/null @@ -1,222 +0,0 @@ -path: "tensorflow.experimental.numpy.inexact" -tf_class { - is_instance: "" - member { - name: "T" - mtype: "" - } - member { - name: "base" - mtype: "" - } - member { - name: "data" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "flags" - mtype: "" - } - member { - name: "flat" - mtype: "" - } - member { - name: "imag" - mtype: "" - } - member { - name: "itemsize" - mtype: "" - } - member { - name: "nbytes" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "real" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member { - name: "strides" - mtype: "" - } - member_method { - name: "__init__" - } - member_method { - name: "all" - } - member_method { - name: "any" - } - member_method { - name: "argmax" - } - member_method { - name: "argmin" - } - member_method { - name: "argsort" - } - member_method { - name: "astype" - } - member_method { - name: "byteswap" - } - member_method { - name: "choose" - } - member_method { - name: "clip" - } - member_method { - name: "compress" - } - member_method { - name: "conj" - } - member_method { - name: "conjugate" - } - member_method { - name: "copy" - } - member_method { - name: "cumprod" - } - member_method { - name: "cumsum" - } - member_method { - name: "diagonal" - } - member_method { - name: "dump" - } - member_method { - name: "dumps" - } - member_method { - name: "fill" - } - member_method { - name: "flatten" - } - member_method { - name: "getfield" - } - member_method { - name: "item" - } - member_method { - name: "itemset" - } - member_method { - name: "max" - } - member_method { - name: "mean" - } - member_method { - name: "min" - } - member_method { - name: "newbyteorder" - } - member_method { - name: "nonzero" - } - member_method { - name: "prod" - } - member_method { - name: "ptp" - } - member_method { - name: "put" - } - member_method { - name: "ravel" - } - member_method { - name: "repeat" - } - member_method { - name: "reshape" - } - member_method { - name: "resize" - } - member_method { - name: "round" - } - member_method { - name: "searchsorted" - } - member_method { - name: "setfield" - } - member_method { - name: "setflags" - } - member_method { - name: "sort" - } - member_method { - name: "squeeze" - } - member_method { - name: "std" - } - member_method { - name: "sum" - } - member_method { - name: "swapaxes" - } - member_method { - name: "take" - } - member_method { - name: "tobytes" - } - member_method { - name: "tofile" - } - member_method { - name: "tolist" - } - member_method { - name: "tostring" - } - member_method { - name: "trace" - } - member_method { - name: "transpose" - } - member_method { - name: "var" - } - member_method { - name: "view" - } -} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int16.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int16.pbtxt deleted file mode 100644 index d972ab938c9..00000000000 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int16.pbtxt +++ /dev/null @@ -1,230 +0,0 @@ -path: "tensorflow.experimental.numpy.int16" -tf_class { - is_instance: "" - member { - name: "T" - mtype: "" - } - member { - name: "base" - mtype: "" - } - member { - name: "data" - mtype: "" - } - member { - name: "denominator" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "flags" - mtype: "" - } - member { - name: "flat" - mtype: "" - } - member { - name: "imag" - mtype: "" - } - member { - name: "itemsize" - mtype: "" - } - member { - name: "nbytes" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "numerator" - mtype: "" - } - member { - name: "real" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member { - name: "strides" - mtype: "" - } - member_method { - name: "__init__" - } - member_method { - name: "all" - } - member_method { - name: "any" - } - member_method { - name: "argmax" - } - member_method { - name: "argmin" - } - member_method { - name: "argsort" - } - member_method { - name: "astype" - } - member_method { - name: "byteswap" - } - member_method { - name: "choose" - } - member_method { - name: "clip" - } - member_method { - name: "compress" - } - member_method { - name: "conj" - } - member_method { - name: "conjugate" - } - member_method { - name: "copy" - } - member_method { - name: "cumprod" - } - member_method { - name: "cumsum" - } - member_method { - name: "diagonal" - } - member_method { - name: "dump" - } - member_method { - name: "dumps" - } - member_method { - name: "fill" - } - member_method { - name: "flatten" - } - member_method { - name: "getfield" - } - member_method { - name: "item" - } - member_method { - name: "itemset" - } - member_method { - name: "max" - } - member_method { - name: "mean" - } - member_method { - name: "min" - } - member_method { - name: "newbyteorder" - } - member_method { - name: "nonzero" - } - member_method { - name: "prod" - } - member_method { - name: "ptp" - } - member_method { - name: "put" - } - member_method { - name: "ravel" - } - member_method { - name: "repeat" - } - member_method { - name: "reshape" - } - member_method { - name: "resize" - } - member_method { - name: "round" - } - member_method { - name: "searchsorted" - } - member_method { - name: "setfield" - } - member_method { - name: "setflags" - } - member_method { - name: "sort" - } - member_method { - name: "squeeze" - } - member_method { - name: "std" - } - member_method { - name: "sum" - } - member_method { - name: "swapaxes" - } - member_method { - name: "take" - } - member_method { - name: "tobytes" - } - member_method { - name: "tofile" - } - member_method { - name: "tolist" - } - member_method { - name: "tostring" - } - member_method { - name: "trace" - } - member_method { - name: "transpose" - } - member_method { - name: "var" - } - member_method { - name: "view" - } -} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int32.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int32.pbtxt deleted file mode 100644 index b5f3ce60b0b..00000000000 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int32.pbtxt +++ /dev/null @@ -1,230 +0,0 @@ -path: "tensorflow.experimental.numpy.int32" -tf_class { - is_instance: "" - member { - name: "T" - mtype: "" - } - member { - name: "base" - mtype: "" - } - member { - name: "data" - mtype: "" - } - member { - name: "denominator" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "flags" - mtype: "" - } - member { - name: "flat" - mtype: "" - } - member { - name: "imag" - mtype: "" - } - member { - name: "itemsize" - mtype: "" - } - member { - name: "nbytes" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "numerator" - mtype: "" - } - member { - name: "real" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member { - name: "strides" - mtype: "" - } - member_method { - name: "__init__" - } - member_method { - name: "all" - } - member_method { - name: "any" - } - member_method { - name: "argmax" - } - member_method { - name: "argmin" - } - member_method { - name: "argsort" - } - member_method { - name: "astype" - } - member_method { - name: "byteswap" - } - member_method { - name: "choose" - } - member_method { - name: "clip" - } - member_method { - name: "compress" - } - member_method { - name: "conj" - } - member_method { - name: "conjugate" - } - member_method { - name: "copy" - } - member_method { - name: "cumprod" - } - member_method { - name: "cumsum" - } - member_method { - name: "diagonal" - } - member_method { - name: "dump" - } - member_method { - name: "dumps" - } - member_method { - name: "fill" - } - member_method { - name: "flatten" - } - member_method { - name: "getfield" - } - member_method { - name: "item" - } - member_method { - name: "itemset" - } - member_method { - name: "max" - } - member_method { - name: "mean" - } - member_method { - name: "min" - } - member_method { - name: "newbyteorder" - } - member_method { - name: "nonzero" - } - member_method { - name: "prod" - } - member_method { - name: "ptp" - } - member_method { - name: "put" - } - member_method { - name: "ravel" - } - member_method { - name: "repeat" - } - member_method { - name: "reshape" - } - member_method { - name: "resize" - } - member_method { - name: "round" - } - member_method { - name: "searchsorted" - } - member_method { - name: "setfield" - } - member_method { - name: "setflags" - } - member_method { - name: "sort" - } - member_method { - name: "squeeze" - } - member_method { - name: "std" - } - member_method { - name: "sum" - } - member_method { - name: "swapaxes" - } - member_method { - name: "take" - } - member_method { - name: "tobytes" - } - member_method { - name: "tofile" - } - member_method { - name: "tolist" - } - member_method { - name: "tostring" - } - member_method { - name: "trace" - } - member_method { - name: "transpose" - } - member_method { - name: "var" - } - member_method { - name: "view" - } -} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int64.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int64.pbtxt deleted file mode 100644 index a6e6661ebd5..00000000000 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int64.pbtxt +++ /dev/null @@ -1,230 +0,0 @@ -path: "tensorflow.experimental.numpy.int64" -tf_class { - is_instance: "" - member { - name: "T" - mtype: "" - } - member { - name: "base" - mtype: "" - } - member { - name: "data" - mtype: "" - } - member { - name: "denominator" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "flags" - mtype: "" - } - member { - name: "flat" - mtype: "" - } - member { - name: "imag" - mtype: "" - } - member { - name: "itemsize" - mtype: "" - } - member { - name: "nbytes" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "numerator" - mtype: "" - } - member { - name: "real" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member { - name: "strides" - mtype: "" - } - member_method { - name: "__init__" - } - member_method { - name: "all" - } - member_method { - name: "any" - } - member_method { - name: "argmax" - } - member_method { - name: "argmin" - } - member_method { - name: "argsort" - } - member_method { - name: "astype" - } - member_method { - name: "byteswap" - } - member_method { - name: "choose" - } - member_method { - name: "clip" - } - member_method { - name: "compress" - } - member_method { - name: "conj" - } - member_method { - name: "conjugate" - } - member_method { - name: "copy" - } - member_method { - name: "cumprod" - } - member_method { - name: "cumsum" - } - member_method { - name: "diagonal" - } - member_method { - name: "dump" - } - member_method { - name: "dumps" - } - member_method { - name: "fill" - } - member_method { - name: "flatten" - } - member_method { - name: "getfield" - } - member_method { - name: "item" - } - member_method { - name: "itemset" - } - member_method { - name: "max" - } - member_method { - name: "mean" - } - member_method { - name: "min" - } - member_method { - name: "newbyteorder" - } - member_method { - name: "nonzero" - } - member_method { - name: "prod" - } - member_method { - name: "ptp" - } - member_method { - name: "put" - } - member_method { - name: "ravel" - } - member_method { - name: "repeat" - } - member_method { - name: "reshape" - } - member_method { - name: "resize" - } - member_method { - name: "round" - } - member_method { - name: "searchsorted" - } - member_method { - name: "setfield" - } - member_method { - name: "setflags" - } - member_method { - name: "sort" - } - member_method { - name: "squeeze" - } - member_method { - name: "std" - } - member_method { - name: "sum" - } - member_method { - name: "swapaxes" - } - member_method { - name: "take" - } - member_method { - name: "tobytes" - } - member_method { - name: "tofile" - } - member_method { - name: "tolist" - } - member_method { - name: "tostring" - } - member_method { - name: "trace" - } - member_method { - name: "transpose" - } - member_method { - name: "var" - } - member_method { - name: "view" - } -} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int8.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int8.pbtxt deleted file mode 100644 index 52b7787ddfc..00000000000 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int8.pbtxt +++ /dev/null @@ -1,230 +0,0 @@ -path: "tensorflow.experimental.numpy.int8" -tf_class { - is_instance: "" - member { - name: "T" - mtype: "" - } - member { - name: "base" - mtype: "" - } - member { - name: "data" - mtype: "" - } - member { - name: "denominator" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "flags" - mtype: "" - } - member { - name: "flat" - mtype: "" - } - member { - name: "imag" - mtype: "" - } - member { - name: "itemsize" - mtype: "" - } - member { - name: "nbytes" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "numerator" - mtype: "" - } - member { - name: "real" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member { - name: "strides" - mtype: "" - } - member_method { - name: "__init__" - } - member_method { - name: "all" - } - member_method { - name: "any" - } - member_method { - name: "argmax" - } - member_method { - name: "argmin" - } - member_method { - name: "argsort" - } - member_method { - name: "astype" - } - member_method { - name: "byteswap" - } - member_method { - name: "choose" - } - member_method { - name: "clip" - } - member_method { - name: "compress" - } - member_method { - name: "conj" - } - member_method { - name: "conjugate" - } - member_method { - name: "copy" - } - member_method { - name: "cumprod" - } - member_method { - name: "cumsum" - } - member_method { - name: "diagonal" - } - member_method { - name: "dump" - } - member_method { - name: "dumps" - } - member_method { - name: "fill" - } - member_method { - name: "flatten" - } - member_method { - name: "getfield" - } - member_method { - name: "item" - } - member_method { - name: "itemset" - } - member_method { - name: "max" - } - member_method { - name: "mean" - } - member_method { - name: "min" - } - member_method { - name: "newbyteorder" - } - member_method { - name: "nonzero" - } - member_method { - name: "prod" - } - member_method { - name: "ptp" - } - member_method { - name: "put" - } - member_method { - name: "ravel" - } - member_method { - name: "repeat" - } - member_method { - name: "reshape" - } - member_method { - name: "resize" - } - member_method { - name: "round" - } - member_method { - name: "searchsorted" - } - member_method { - name: "setfield" - } - member_method { - name: "setflags" - } - member_method { - name: "sort" - } - member_method { - name: "squeeze" - } - member_method { - name: "std" - } - member_method { - name: "sum" - } - member_method { - name: "swapaxes" - } - member_method { - name: "take" - } - member_method { - name: "tobytes" - } - member_method { - name: "tofile" - } - member_method { - name: "tolist" - } - member_method { - name: "tostring" - } - member_method { - name: "trace" - } - member_method { - name: "transpose" - } - member_method { - name: "var" - } - member_method { - name: "view" - } -} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int_.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int_.pbtxt deleted file mode 100644 index 089c50dcf51..00000000000 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int_.pbtxt +++ /dev/null @@ -1,230 +0,0 @@ -path: "tensorflow.experimental.numpy.int_" -tf_class { - is_instance: "" - member { - name: "T" - mtype: "" - } - member { - name: "base" - mtype: "" - } - member { - name: "data" - mtype: "" - } - member { - name: "denominator" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "flags" - mtype: "" - } - member { - name: "flat" - mtype: "" - } - member { - name: "imag" - mtype: "" - } - member { - name: "itemsize" - mtype: "" - } - member { - name: "nbytes" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "numerator" - mtype: "" - } - member { - name: "real" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member { - name: "strides" - mtype: "" - } - member_method { - name: "__init__" - } - member_method { - name: "all" - } - member_method { - name: "any" - } - member_method { - name: "argmax" - } - member_method { - name: "argmin" - } - member_method { - name: "argsort" - } - member_method { - name: "astype" - } - member_method { - name: "byteswap" - } - member_method { - name: "choose" - } - member_method { - name: "clip" - } - member_method { - name: "compress" - } - member_method { - name: "conj" - } - member_method { - name: "conjugate" - } - member_method { - name: "copy" - } - member_method { - name: "cumprod" - } - member_method { - name: "cumsum" - } - member_method { - name: "diagonal" - } - member_method { - name: "dump" - } - member_method { - name: "dumps" - } - member_method { - name: "fill" - } - member_method { - name: "flatten" - } - member_method { - name: "getfield" - } - member_method { - name: "item" - } - member_method { - name: "itemset" - } - member_method { - name: "max" - } - member_method { - name: "mean" - } - member_method { - name: "min" - } - member_method { - name: "newbyteorder" - } - member_method { - name: "nonzero" - } - member_method { - name: "prod" - } - member_method { - name: "ptp" - } - member_method { - name: "put" - } - member_method { - name: "ravel" - } - member_method { - name: "repeat" - } - member_method { - name: "reshape" - } - member_method { - name: "resize" - } - member_method { - name: "round" - } - member_method { - name: "searchsorted" - } - member_method { - name: "setfield" - } - member_method { - name: "setflags" - } - member_method { - name: "sort" - } - member_method { - name: "squeeze" - } - member_method { - name: "std" - } - member_method { - name: "sum" - } - member_method { - name: "swapaxes" - } - member_method { - name: "take" - } - member_method { - name: "tobytes" - } - member_method { - name: "tofile" - } - member_method { - name: "tolist" - } - member_method { - name: "tostring" - } - member_method { - name: "trace" - } - member_method { - name: "transpose" - } - member_method { - name: "var" - } - member_method { - name: "view" - } -} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.object_.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.object_.pbtxt deleted file mode 100644 index 1fe27281e15..00000000000 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.object_.pbtxt +++ /dev/null @@ -1,222 +0,0 @@ -path: "tensorflow.experimental.numpy.object_" -tf_class { - is_instance: "" - member { - name: "T" - mtype: "" - } - member { - name: "base" - mtype: "" - } - member { - name: "data" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "flags" - mtype: "" - } - member { - name: "flat" - mtype: "" - } - member { - name: "imag" - mtype: "" - } - member { - name: "itemsize" - mtype: "" - } - member { - name: "nbytes" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "real" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member { - name: "strides" - mtype: "" - } - member_method { - name: "__init__" - } - member_method { - name: "all" - } - member_method { - name: "any" - } - member_method { - name: "argmax" - } - member_method { - name: "argmin" - } - member_method { - name: "argsort" - } - member_method { - name: "astype" - } - member_method { - name: "byteswap" - } - member_method { - name: "choose" - } - member_method { - name: "clip" - } - member_method { - name: "compress" - } - member_method { - name: "conj" - } - member_method { - name: "conjugate" - } - member_method { - name: "copy" - } - member_method { - name: "cumprod" - } - member_method { - name: "cumsum" - } - member_method { - name: "diagonal" - } - member_method { - name: "dump" - } - member_method { - name: "dumps" - } - member_method { - name: "fill" - } - member_method { - name: "flatten" - } - member_method { - name: "getfield" - } - member_method { - name: "item" - } - member_method { - name: "itemset" - } - member_method { - name: "max" - } - member_method { - name: "mean" - } - member_method { - name: "min" - } - member_method { - name: "newbyteorder" - } - member_method { - name: "nonzero" - } - member_method { - name: "prod" - } - member_method { - name: "ptp" - } - member_method { - name: "put" - } - member_method { - name: "ravel" - } - member_method { - name: "repeat" - } - member_method { - name: "reshape" - } - member_method { - name: "resize" - } - member_method { - name: "round" - } - member_method { - name: "searchsorted" - } - member_method { - name: "setfield" - } - member_method { - name: "setflags" - } - member_method { - name: "sort" - } - member_method { - name: "squeeze" - } - member_method { - name: "std" - } - member_method { - name: "sum" - } - member_method { - name: "swapaxes" - } - member_method { - name: "take" - } - member_method { - name: "tobytes" - } - member_method { - name: "tofile" - } - member_method { - name: "tolist" - } - member_method { - name: "tostring" - } - member_method { - name: "trace" - } - member_method { - name: "transpose" - } - member_method { - name: "var" - } - member_method { - name: "view" - } -} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint16.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint16.pbtxt deleted file mode 100644 index e590dac994d..00000000000 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint16.pbtxt +++ /dev/null @@ -1,230 +0,0 @@ -path: "tensorflow.experimental.numpy.uint16" -tf_class { - is_instance: "" - member { - name: "T" - mtype: "" - } - member { - name: "base" - mtype: "" - } - member { - name: "data" - mtype: "" - } - member { - name: "denominator" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "flags" - mtype: "" - } - member { - name: "flat" - mtype: "" - } - member { - name: "imag" - mtype: "" - } - member { - name: "itemsize" - mtype: "" - } - member { - name: "nbytes" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "numerator" - mtype: "" - } - member { - name: "real" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member { - name: "strides" - mtype: "" - } - member_method { - name: "__init__" - } - member_method { - name: "all" - } - member_method { - name: "any" - } - member_method { - name: "argmax" - } - member_method { - name: "argmin" - } - member_method { - name: "argsort" - } - member_method { - name: "astype" - } - member_method { - name: "byteswap" - } - member_method { - name: "choose" - } - member_method { - name: "clip" - } - member_method { - name: "compress" - } - member_method { - name: "conj" - } - member_method { - name: "conjugate" - } - member_method { - name: "copy" - } - member_method { - name: "cumprod" - } - member_method { - name: "cumsum" - } - member_method { - name: "diagonal" - } - member_method { - name: "dump" - } - member_method { - name: "dumps" - } - member_method { - name: "fill" - } - member_method { - name: "flatten" - } - member_method { - name: "getfield" - } - member_method { - name: "item" - } - member_method { - name: "itemset" - } - member_method { - name: "max" - } - member_method { - name: "mean" - } - member_method { - name: "min" - } - member_method { - name: "newbyteorder" - } - member_method { - name: "nonzero" - } - member_method { - name: "prod" - } - member_method { - name: "ptp" - } - member_method { - name: "put" - } - member_method { - name: "ravel" - } - member_method { - name: "repeat" - } - member_method { - name: "reshape" - } - member_method { - name: "resize" - } - member_method { - name: "round" - } - member_method { - name: "searchsorted" - } - member_method { - name: "setfield" - } - member_method { - name: "setflags" - } - member_method { - name: "sort" - } - member_method { - name: "squeeze" - } - member_method { - name: "std" - } - member_method { - name: "sum" - } - member_method { - name: "swapaxes" - } - member_method { - name: "take" - } - member_method { - name: "tobytes" - } - member_method { - name: "tofile" - } - member_method { - name: "tolist" - } - member_method { - name: "tostring" - } - member_method { - name: "trace" - } - member_method { - name: "transpose" - } - member_method { - name: "var" - } - member_method { - name: "view" - } -} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint32.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint32.pbtxt deleted file mode 100644 index 35b9a962aef..00000000000 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint32.pbtxt +++ /dev/null @@ -1,230 +0,0 @@ -path: "tensorflow.experimental.numpy.uint32" -tf_class { - is_instance: "" - member { - name: "T" - mtype: "" - } - member { - name: "base" - mtype: "" - } - member { - name: "data" - mtype: "" - } - member { - name: "denominator" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "flags" - mtype: "" - } - member { - name: "flat" - mtype: "" - } - member { - name: "imag" - mtype: "" - } - member { - name: "itemsize" - mtype: "" - } - member { - name: "nbytes" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "numerator" - mtype: "" - } - member { - name: "real" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member { - name: "strides" - mtype: "" - } - member_method { - name: "__init__" - } - member_method { - name: "all" - } - member_method { - name: "any" - } - member_method { - name: "argmax" - } - member_method { - name: "argmin" - } - member_method { - name: "argsort" - } - member_method { - name: "astype" - } - member_method { - name: "byteswap" - } - member_method { - name: "choose" - } - member_method { - name: "clip" - } - member_method { - name: "compress" - } - member_method { - name: "conj" - } - member_method { - name: "conjugate" - } - member_method { - name: "copy" - } - member_method { - name: "cumprod" - } - member_method { - name: "cumsum" - } - member_method { - name: "diagonal" - } - member_method { - name: "dump" - } - member_method { - name: "dumps" - } - member_method { - name: "fill" - } - member_method { - name: "flatten" - } - member_method { - name: "getfield" - } - member_method { - name: "item" - } - member_method { - name: "itemset" - } - member_method { - name: "max" - } - member_method { - name: "mean" - } - member_method { - name: "min" - } - member_method { - name: "newbyteorder" - } - member_method { - name: "nonzero" - } - member_method { - name: "prod" - } - member_method { - name: "ptp" - } - member_method { - name: "put" - } - member_method { - name: "ravel" - } - member_method { - name: "repeat" - } - member_method { - name: "reshape" - } - member_method { - name: "resize" - } - member_method { - name: "round" - } - member_method { - name: "searchsorted" - } - member_method { - name: "setfield" - } - member_method { - name: "setflags" - } - member_method { - name: "sort" - } - member_method { - name: "squeeze" - } - member_method { - name: "std" - } - member_method { - name: "sum" - } - member_method { - name: "swapaxes" - } - member_method { - name: "take" - } - member_method { - name: "tobytes" - } - member_method { - name: "tofile" - } - member_method { - name: "tolist" - } - member_method { - name: "tostring" - } - member_method { - name: "trace" - } - member_method { - name: "transpose" - } - member_method { - name: "var" - } - member_method { - name: "view" - } -} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint64.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint64.pbtxt deleted file mode 100644 index 867e55c9712..00000000000 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint64.pbtxt +++ /dev/null @@ -1,230 +0,0 @@ -path: "tensorflow.experimental.numpy.uint64" -tf_class { - is_instance: "" - member { - name: "T" - mtype: "" - } - member { - name: "base" - mtype: "" - } - member { - name: "data" - mtype: "" - } - member { - name: "denominator" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "flags" - mtype: "" - } - member { - name: "flat" - mtype: "" - } - member { - name: "imag" - mtype: "" - } - member { - name: "itemsize" - mtype: "" - } - member { - name: "nbytes" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "numerator" - mtype: "" - } - member { - name: "real" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member { - name: "strides" - mtype: "" - } - member_method { - name: "__init__" - } - member_method { - name: "all" - } - member_method { - name: "any" - } - member_method { - name: "argmax" - } - member_method { - name: "argmin" - } - member_method { - name: "argsort" - } - member_method { - name: "astype" - } - member_method { - name: "byteswap" - } - member_method { - name: "choose" - } - member_method { - name: "clip" - } - member_method { - name: "compress" - } - member_method { - name: "conj" - } - member_method { - name: "conjugate" - } - member_method { - name: "copy" - } - member_method { - name: "cumprod" - } - member_method { - name: "cumsum" - } - member_method { - name: "diagonal" - } - member_method { - name: "dump" - } - member_method { - name: "dumps" - } - member_method { - name: "fill" - } - member_method { - name: "flatten" - } - member_method { - name: "getfield" - } - member_method { - name: "item" - } - member_method { - name: "itemset" - } - member_method { - name: "max" - } - member_method { - name: "mean" - } - member_method { - name: "min" - } - member_method { - name: "newbyteorder" - } - member_method { - name: "nonzero" - } - member_method { - name: "prod" - } - member_method { - name: "ptp" - } - member_method { - name: "put" - } - member_method { - name: "ravel" - } - member_method { - name: "repeat" - } - member_method { - name: "reshape" - } - member_method { - name: "resize" - } - member_method { - name: "round" - } - member_method { - name: "searchsorted" - } - member_method { - name: "setfield" - } - member_method { - name: "setflags" - } - member_method { - name: "sort" - } - member_method { - name: "squeeze" - } - member_method { - name: "std" - } - member_method { - name: "sum" - } - member_method { - name: "swapaxes" - } - member_method { - name: "take" - } - member_method { - name: "tobytes" - } - member_method { - name: "tofile" - } - member_method { - name: "tolist" - } - member_method { - name: "tostring" - } - member_method { - name: "trace" - } - member_method { - name: "transpose" - } - member_method { - name: "var" - } - member_method { - name: "view" - } -} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint8.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint8.pbtxt deleted file mode 100644 index 24d3ed0458a..00000000000 --- a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint8.pbtxt +++ /dev/null @@ -1,230 +0,0 @@ -path: "tensorflow.experimental.numpy.uint8" -tf_class { - is_instance: "" - member { - name: "T" - mtype: "" - } - member { - name: "base" - mtype: "" - } - member { - name: "data" - mtype: "" - } - member { - name: "denominator" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "flags" - mtype: "" - } - member { - name: "flat" - mtype: "" - } - member { - name: "imag" - mtype: "" - } - member { - name: "itemsize" - mtype: "" - } - member { - name: "nbytes" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "numerator" - mtype: "" - } - member { - name: "real" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member { - name: "strides" - mtype: "" - } - member_method { - name: "__init__" - } - member_method { - name: "all" - } - member_method { - name: "any" - } - member_method { - name: "argmax" - } - member_method { - name: "argmin" - } - member_method { - name: "argsort" - } - member_method { - name: "astype" - } - member_method { - name: "byteswap" - } - member_method { - name: "choose" - } - member_method { - name: "clip" - } - member_method { - name: "compress" - } - member_method { - name: "conj" - } - member_method { - name: "conjugate" - } - member_method { - name: "copy" - } - member_method { - name: "cumprod" - } - member_method { - name: "cumsum" - } - member_method { - name: "diagonal" - } - member_method { - name: "dump" - } - member_method { - name: "dumps" - } - member_method { - name: "fill" - } - member_method { - name: "flatten" - } - member_method { - name: "getfield" - } - member_method { - name: "item" - } - member_method { - name: "itemset" - } - member_method { - name: "max" - } - member_method { - name: "mean" - } - member_method { - name: "min" - } - member_method { - name: "newbyteorder" - } - member_method { - name: "nonzero" - } - member_method { - name: "prod" - } - member_method { - name: "ptp" - } - member_method { - name: "put" - } - member_method { - name: "ravel" - } - member_method { - name: "repeat" - } - member_method { - name: "reshape" - } - member_method { - name: "resize" - } - member_method { - name: "round" - } - member_method { - name: "searchsorted" - } - member_method { - name: "setfield" - } - member_method { - name: "setflags" - } - member_method { - name: "sort" - } - member_method { - name: "squeeze" - } - member_method { - name: "std" - } - member_method { - name: "sum" - } - member_method { - name: "swapaxes" - } - member_method { - name: "take" - } - member_method { - name: "tobytes" - } - member_method { - name: "tofile" - } - member_method { - name: "tolist" - } - member_method { - name: "tostring" - } - member_method { - name: "trace" - } - member_method { - name: "transpose" - } - member_method { - name: "var" - } - member_method { - name: "view" - } -} From 3a3a6cc417ce8bd5c4d05445d0ceea7dcd5661a6 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Fri, 17 Jul 2020 08:44:13 +0700 Subject: [PATCH 0662/2522] add tf_read_only_memory_region --- .../filesystem/plugins/s3/s3_filesystem.cc | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 318a64b720a..4dada85946a 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -442,8 +442,25 @@ void Close(const TF_WritableFile* file, TF_Status* status) { // SECTION 3. Implementation for `TF_ReadOnlyMemoryRegion` // ---------------------------------------------------------------------------- namespace tf_read_only_memory_region { +typedef struct S3MemoryRegion { + std::unique_ptr data; + uint64_t length; +} S3MemoryRegion; -// TODO(vnvo2409): Implement later +void Cleanup(TF_ReadOnlyMemoryRegion* region) { + auto r = static_cast(region->plugin_memory_region); + delete r; +} + +const void* Data(const TF_ReadOnlyMemoryRegion* region) { + auto r = static_cast(region->plugin_memory_region); + return reinterpret_cast(r->data.get()); +} + +uint64_t Length(const TF_ReadOnlyMemoryRegion* region) { + auto r = static_cast(region->plugin_memory_region); + return r->length; +} } // namespace tf_read_only_memory_region From e8800102376f2173d9898e9128de7b161701912e Mon Sep 17 00:00:00 2001 From: Peng Wang Date: Thu, 16 Jul 2020 19:28:15 -0700 Subject: [PATCH 0663/2522] [TF-numpy] Fixes numpy link in docstring. PiperOrigin-RevId: 321696462 Change-Id: I3caeec39932748d05c4bb3a9c287be01727c75fc --- tensorflow/python/ops/numpy_ops/np_utils.py | 5 +++-- tensorflow/python/ops/numpy_ops/np_utils_test.py | 11 ++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/ops/numpy_ops/np_utils.py b/tensorflow/python/ops/numpy_ops/np_utils.py index ef55ffff6af..6a3b7a8e105 100644 --- a/tensorflow/python/ops/numpy_ops/np_utils.py +++ b/tensorflow/python/ops/numpy_ops/np_utils.py @@ -212,7 +212,7 @@ def _np_doc_helper(f, np_f, np_fun_name=None, unsupported_params=None): assert np_f or np_fun_name if not np_fun_name: np_fun_name = np_f.__name__ - doc = 'TensorFlow variant of `numpy.%s`.\n\n' % np_fun_name + doc = 'TensorFlow variant of NumPy\'s `%s`.\n\n' % np_fun_name if unsupported_params: doc += 'Unsupported arguments: ' + ', '.join( '`' + name + '`' for name in unsupported_params) + '.\n\n' @@ -293,7 +293,8 @@ def _add_np_doc(doc, np_fun_name, np_f): template = None if template is not None: link = template % np_fun_name - doc += 'See the documentation for `numpy.%s`: [%s]' % (np_fun_name, link) + doc += 'See the NumPy documentation for [`numpy.%s`](%s): %s' % ( + np_fun_name, link, link) return doc diff --git a/tensorflow/python/ops/numpy_ops/np_utils_test.py b/tensorflow/python/ops/numpy_ops/np_utils_test.py index 71444585600..d780c126a1d 100644 --- a/tensorflow/python/ops/numpy_ops/np_utils_test.py +++ b/tensorflow/python/ops/numpy_ops/np_utils_test.py @@ -46,7 +46,7 @@ class UtilsTest(test.TestCase, parameterized.TestCase): def f(): """f docstring.""" return - expected = """TensorFlow variant of `numpy.np_fun`. + expected = """TensorFlow variant of NumPy's `np_fun`. Unsupported arguments: `x`. @@ -75,13 +75,14 @@ np_fun docstring.""" def f(): """f docstring.""" return - expected = """TensorFlow variant of `numpy.np_fun`. + expected = """TensorFlow variant of NumPy's `np_fun`. Unsupported arguments: `x`. f docstring. -See the documentation for `numpy.np_fun`: [%s]""" % link +See the NumPy documentation for [`numpy.np_fun`](%s): %s""" + expected = expected % (link, link) self.assertEqual(expected, f.__doc__) @parameterized.parameters([None, 1, 'a', '1a', '1.1a', '1.1.1a']) @@ -94,7 +95,7 @@ See the documentation for `numpy.np_fun`: [%s]""" % link def f(): """f docstring.""" return - expected = """TensorFlow variant of `numpy.np_fun`. + expected = """TensorFlow variant of NumPy's `np_fun`. Unsupported arguments: `x`. @@ -109,7 +110,7 @@ f docstring. def f(): """f docstring.""" return - expected = """TensorFlow variant of `numpy.foo`. + expected = """TensorFlow variant of NumPy's `foo`. f docstring. From 6b87d35d25c40e328a7f7bc1bc554abdd1c9c30d Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Thu, 16 Jul 2020 19:46:40 -0700 Subject: [PATCH 0664/2522] Add TpuCompileOp implementation. PiperOrigin-RevId: 321698122 Change-Id: I1df8da64549a9404ebef341112eb8d6514d926dc --- tensorflow/core/tpu/kernels/BUILD | 64 ++++++++++++- tensorflow/core/tpu/kernels/tpu_compile_op.cc | 94 +++++++++++++++++++ tensorflow/core/tpu/kernels/tpu_compile_op.h | 29 ++---- .../kernels/tpu_compile_op_impl_factory.cc | 46 +++++++++ .../tpu/kernels/tpu_compile_op_impl_factory.h | 55 +++++++++++ .../kernels/tpu_compile_op_registration.cc | 52 ++++++++++ .../tpu/kernels/tpu_compile_op_support.cc | 81 +++++++++++++--- .../core/tpu/kernels/tpu_compile_op_support.h | 5 + 8 files changed, 390 insertions(+), 36 deletions(-) create mode 100644 tensorflow/core/tpu/kernels/tpu_compile_op.cc create mode 100644 tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.cc create mode 100644 tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.h create mode 100644 tensorflow/core/tpu/kernels/tpu_compile_op_registration.cc diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 7a6160a2963..31435090f88 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -178,12 +178,14 @@ cc_library( "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:xla_data_proto_cc", "//tensorflow/compiler/xla/service:computation_layout", + "//tensorflow/compiler/xla/service:computation_placer", "//tensorflow/compiler/xla/service:dump", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_module_config", "//tensorflow/compiler/xla/service:hlo_module_group", "//tensorflow/core:framework", "//tensorflow/core/framework:protos_all_cc", + "//tensorflow/core/platform:errors", "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", "//tensorflow/stream_executor/tpu:proto_helper", "@com_google_absl//absl/strings", @@ -494,7 +496,10 @@ tf_proto_library_cc( cc_library( name = "tpu_compile_op_hdrs", hdrs = ["tpu_compile_op.h"], - deps = ["//tensorflow/core:framework"], + deps = [ + ":tpu_compile_op_common", + "//tensorflow/core:framework", + ], ) cc_library( @@ -553,3 +558,60 @@ cc_library( ], alwayslink = 1, ) + +cc_library( + name = "tpu_compile_op_registration", + srcs = ["tpu_compile_op_registration.cc"], + deps = [ + ":tpu_compile_op_common", + ":tpu_compile_op_impl", + ":tpu_compile_op_impl_factory", + ":tpu_compile_op_support", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", + ], +) + +cc_library( + name = "tpu_compile_op_impl_factory", + srcs = ["tpu_compile_op_impl_factory.cc"], + hdrs = ["tpu_compile_op_impl_factory.h"], + deps = [ + ":tpu_compile_op_common", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla/service:computation_placer", + "//tensorflow/core:framework", + ], + alwayslink = 1, +) + +cc_library( + name = "tpu_compile_op_lib", + srcs = ["tpu_compile_op.cc"], + deps = [ + ":tpu_compile_op_hdrs", + ":tpu_compile_op_impl_factory", + ":tpu_compile_op_options", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/core/protobuf/tpu:compilation_result_proto_cc", + "//tensorflow/stream_executor/tpu:tpu_node_context", + ], + alwayslink = 1, +) + +cc_library( + name = "tpu_compile_op", + deps = [ + ":tpu_compile_op_hdrs", + ":tpu_compile_op_impl", + ":tpu_compile_op_impl_factory", + ":tpu_compile_op_lib", + ":tpu_compile_op_options", + ":tpu_compile_op_registration", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/core/protobuf/tpu:compilation_result_proto_cc", + "//tensorflow/stream_executor/tpu:tpu_node_context", + ], + alwayslink = 1, +) diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op.cc b/tensorflow/core/tpu/kernels/tpu_compile_op.cc new file mode 100644 index 00000000000..61210ec3baf --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compile_op.cc @@ -0,0 +1,94 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/tpu/kernels/tpu_compile_op.h" + +#include + +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/core/protobuf/tpu/compilation_result.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_op_options.h" +#include "tensorflow/stream_executor/tpu/tpu_node_context.h" + +namespace tensorflow { +namespace tpu { +using ::stream_executor::port::StatusOr; + +TpuCompileOp::TpuCompileOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + StatusOr> compile_op = + (*GetTpuCompileOpCreateFn())(ctx); + OP_REQUIRES_OK(ctx, compile_op.status()); + impl_ = std::move(compile_op.ValueOrDie()); +} + +void TpuCompileOp::Compute(OpKernelContext* ctx) { impl_->Compute(ctx); } + +TpuCompileMlirOp::TpuCompileMlirOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + StatusOr> compile_op = + (*GetTpuCompileOpMlirCreateFn())(ctx); + OP_REQUIRES_OK(ctx, compile_op.status()); + impl_ = std::move(compile_op.ValueOrDie()); +} + +void TpuCompileMlirOp::Compute(OpKernelContext* ctx) { impl_->Compute(ctx); } + +void TpuCompileSucceededAssertOp::Compute(OpKernelContext* ctx) { + const Tensor compilation_result = ctx->input(0); + CompilationResultProto proto; + Status status; + if (!proto.ParseFromString(compilation_result.scalar()())) { + status = + errors::InvalidArgument("Unable to parse compilation result proto"); + } + if (!status.ok() || proto.status_code() != error::Code::OK) { + status.Update(Status(proto.status_code(), proto.status_error_message())); + errors::AppendToMessage(&status, "TPU compilation failed"); + if (tensorflow::internal::TpuCompilationFailureClosesChips()) { + // At this point, if compilation fails we do not know if a task + // is already running that expects results from this compiled + // program to complete. So close the TPU driver to release all + // awaiting interactions (all awaiting interaction will fail and + // continue to fail until reinitialized). + LOG(ERROR) << "Cloud TPU: Closing chips. TPU compilation is considered " + "as part of device state, and a failed compilation results " + "in a device reset."; + + Status close_status = TpuNodeContext::CloseTpuHost(); + + if (!close_status.ok()) { + errors::AppendToMessage(&status, close_status.error_message()); + } + } + ctx->CtxFailureWithWarning(status); + } +} + +REGISTER_MODULE_INITIALIZER(register_tpu_compile_op_impl, { +#if !defined(LIBTFTPU) + VLOG(1) << "register_tpu_compile_op_impl: TpuCompileOpKernelImpl"; + SetTpuCompileOpCreateFn(CreateTpuCompileOpImpl); + SetTpuCompileOpMlirCreateFn(CreateTpuCompileOpMlirImpl); +#endif // LIBTFTPU +}); + +REGISTER_KERNEL_BUILDER(Name("TPUCompile").Device(DEVICE_CPU), TpuCompileOp); +REGISTER_KERNEL_BUILDER(Name("_TPUCompileMlir").Device(DEVICE_CPU), + TpuCompileMlirOp); + +REGISTER_KERNEL_BUILDER(Name("TPUCompileSucceededAssert").Device(DEVICE_CPU), + TpuCompileSucceededAssertOp); + +} // namespace tpu +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op.h b/tensorflow/core/tpu/kernels/tpu_compile_op.h index 0bbf5695400..d0ead41a77e 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_op.h @@ -18,18 +18,10 @@ limitations under the License. #include #include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_op_common.h" namespace tensorflow { namespace tpu { -// Forward declaration. -#if defined(LIBTFTPU) -class TpuCompileOpKernelImpl; -#else -namespace internal { -class TpuCompileOpKernelImpl; -} -#endif -} // namespace tpu // The TPUCompile operator compiles a Tensorflow function into a // TPU executable to be run by TPUExecute. @@ -42,13 +34,9 @@ class TpuCompileOp : public OpKernel { void Compute(OpKernelContext* ctx) override; private: -#if defined(LIBTFTPU) - std::unique_ptr impl_; -#else - std::unique_ptr impl_; -#endif + std::unique_ptr impl_; - DISALLOW_COPY_AND_ASSIGN(TpuCompileOp); + TF_DISALLOW_COPY_AND_ASSIGN(TpuCompileOp); }; // The TPUCompile operator compiles a MLIR module into a @@ -62,13 +50,9 @@ class TpuCompileMlirOp : public OpKernel { void Compute(OpKernelContext* ctx) override; private: -#if defined(LIBTFTPU) - std::unique_ptr impl_; -#else - std::unique_ptr impl_; -#endif + std::unique_ptr impl_; - DISALLOW_COPY_AND_ASSIGN(TpuCompileMlirOp); + TF_DISALLOW_COPY_AND_ASSIGN(TpuCompileMlirOp); }; class TpuCompileSucceededAssertOp : public OpKernel { @@ -80,9 +64,10 @@ class TpuCompileSucceededAssertOp : public OpKernel { void Compute(OpKernelContext* ctx) override; private: - DISALLOW_COPY_AND_ASSIGN(TpuCompileSucceededAssertOp); + TF_DISALLOW_COPY_AND_ASSIGN(TpuCompileSucceededAssertOp); }; +} // namespace tpu } // namespace tensorflow #endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_OP_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.cc new file mode 100644 index 00000000000..f6b4641fe61 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.cc @@ -0,0 +1,46 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.h" + +namespace tensorflow { +namespace tpu { +namespace { +static TpuCompileOpImplCreateFn* tpu_compile_op_impl_creation_fn = + new TpuCompileOpImplCreateFn(CreateTpuCompileOpImpl); +static TpuCompileOpImplCreateFn* tpu_compile_op_mlir_impl_creation_fn = + new TpuCompileOpImplCreateFn(CreateTpuCompileOpMlirImpl); +} // namespace + +TpuCompileOpImplCreateFn* GetTpuCompileOpCreateFn() { + return tpu_compile_op_impl_creation_fn; +} + +TpuCompileOpImplCreateFn* GetTpuCompileOpMlirCreateFn() { + return tpu_compile_op_mlir_impl_creation_fn; +} + +void SetTpuCompileOpCreateFn(TpuCompileOpImplCreateFn fn) { + VLOG(1) << "SetTpuCompileOpCreateFn."; + delete tpu_compile_op_impl_creation_fn; + tpu_compile_op_impl_creation_fn = new TpuCompileOpImplCreateFn(fn); +} + +void SetTpuCompileOpMlirCreateFn(TpuCompileOpImplCreateFn fn) { + VLOG(1) << "SetTpuCompileOpMlirCreateFn."; + delete tpu_compile_op_mlir_impl_creation_fn; + tpu_compile_op_mlir_impl_creation_fn = new TpuCompileOpImplCreateFn(fn); +} +} // namespace tpu +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.h b/tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.h new file mode 100644 index 00000000000..40201f8896d --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.h @@ -0,0 +1,55 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_OP_IMPL_FACTORY_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_OP_IMPL_FACTORY_H_ + +#include +#include + +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_op_common.h" + +namespace tensorflow { +namespace tpu { + +typedef std::function>(OpKernelConstruction*)> + TpuCompileOpImplCreateFn; + +// Creates the callback for creating `TpuCompileOpImpl` instance. +stream_executor::port::StatusOr> +CreateTpuCompileOpImpl(OpKernelConstruction* ctx); + +// Creates the callback for creating Mlir `TpuCompileOpImpl` instance. +stream_executor::port::StatusOr> +CreateTpuCompileOpMlirImpl(OpKernelConstruction* ctx); + +// Gets the callback for creating default `TpuCompileOpImpl` instance. +TpuCompileOpImplCreateFn* GetTpuCompileOpCreateFn(); + +// Gets the callback for creating Mlir `TpuCompileOpImpl` instance. +TpuCompileOpImplCreateFn* GetTpuCompileOpMlirCreateFn(); + +// Sets the callback for creating default `TpuCompileOpImpl` instance. +void SetTpuCompileOpCreateFn(TpuCompileOpImplCreateFn fn); + +// Sets the callback for creating Mlir `TpuCompileOpImpl` instance. +void SetTpuCompileOpMlirCreateFn(TpuCompileOpImplCreateFn fn); +} // namespace tpu +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_OP_IMPL_FACTORY_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_registration.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_registration.cc new file mode 100644 index 00000000000..fe50e01455f --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_registration.cc @@ -0,0 +1,52 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include + +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/protobuf/tpu/compile_metadata.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_op_common.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_op_impl.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_op_support.h" + +namespace tensorflow { +namespace tpu { +using ::stream_executor::port::StatusOr; +StatusOr> CreateTpuCompileOpImpl( + OpKernelConstruction* ctx) { + NameAttrList function_name; + TPUCompileMetadataProto metadata; + TF_RETURN_IF_ERROR(CompileOpMetadataFromContext(ctx, &metadata, + &function_name, + /*mlir_module=*/nullptr)); + VLOG(1) << "Create tensorflow::tpu::TpuCompileOpKernelImpl"; + return {std::make_unique( + function_name, metadata, metadata.num_cores_per_replica(), + /*return_hlo_protos=*/false, + /*unload_cache_on_session_close=*/false)}; +} + +StatusOr> CreateTpuCompileOpMlirImpl( + OpKernelConstruction* ctx) { + TPUCompileMetadataProto metadata; + std::string mlir_module; + TF_RETURN_IF_ERROR(CompileOpMetadataFromContext( + ctx, &metadata, /*function_name=*/nullptr, &mlir_module)); + VLOG(1) << "Create tensorflow::tpu::TpuCompileOpKernelImpl"; + return {std::make_unique( + mlir_module, metadata, metadata.num_cores_per_replica(), + /*return_hlo_protos=*/false, + /*unload_cache_on_session_close=*/false)}; +} +} // namespace tpu +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc index 41e81c6bca7..5cc35a07e66 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc @@ -16,27 +16,28 @@ limitations under the License. #include "tensorflow/compiler/xla/debug_options_flags.h" #include "tensorflow/compiler/xla/service/computation_layout.h" +#include "tensorflow/compiler/xla/service/computation_placer.h" #include "tensorflow/compiler/xla/service/dump.h" #include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/errors.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_key.h" #include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" #include "tensorflow/stream_executor/tpu/proto_helper.h" namespace tensorflow { namespace tpu { - -using stream_executor::port::Status; -using stream_executor::port::StatusOr; -using xla::ComputationLayout; -using xla::DebugOptions; -using xla::DeviceAssignment; -using xla::HloModuleConfig; -using xla::HloSharding; -using xla::InvalidArgument; -using xla::ProgramShape; -using xla::Shape; -using xla::ShapeTree; -using xla::ShapeUtil; +using ::stream_executor::port::Status; +using ::stream_executor::port::StatusOr; +using ::xla::ComputationLayout; +using ::xla::DebugOptions; +using ::xla::DeviceAssignment; +using ::xla::HloModuleConfig; +using ::xla::HloSharding; +using ::xla::InvalidArgument; +using ::xla::ProgramShape; +using ::xla::Shape; +using ::xla::ShapeTree; +using ::xla::ShapeUtil; Status ValidateResultShape(const Shape& client_shape, const Shape& result_shape) { @@ -485,5 +486,59 @@ StatusOr CreateTpuCompilationRequest( VLOG(1) << "TpuCompilationRequest:\n" << compilation_request.DebugString(); return compilation_request; } + +Status CompileOpMetadataFromContext(OpKernelConstruction* ctx, + TPUCompileMetadataProto* metadata, + NameAttrList* function_name, + std::string* mlir_module) { + CHECK_NE(metadata, nullptr); + + int num_computations; + TF_RETURN_IF_ERROR(ctx->GetAttr("num_computations", &num_computations)); + + std::string metadata_string; + TF_RETURN_IF_ERROR(ctx->GetAttr("metadata", &metadata_string)); + if (!metadata->ParsePartialFromString(metadata_string)) { + return errors::InvalidArgument("Unable to parse TPUCompileMetadataProto"); + } + + if (function_name != nullptr) { + TF_RETURN_IF_ERROR(ctx->GetAttr("function", function_name)); + } + + if (mlir_module != nullptr) { + TF_RETURN_IF_ERROR(ctx->GetAttr("mlir_module", mlir_module)); + } + + if (num_computations != metadata->num_cores_per_replica()) { + return errors::InvalidArgument( + "num_computations must be equal to " + "num_cores_per_replica in the 'metadata' " + "attribute (", + num_computations, " vs ", metadata->num_cores_per_replica(), ")"); + } + + if (metadata->has_device_assignment()) { + StatusOr> device_assignment_or_error = + DeviceAssignment::Deserialize(metadata->device_assignment()); + TF_RETURN_IF_ERROR(device_assignment_or_error.status()); + const DeviceAssignment& device_assignment = + *device_assignment_or_error.ValueOrDie(); + const int num_replicas = metadata->num_replicas(); + if (device_assignment.replica_count() != num_replicas) { + return errors::InvalidArgument( + "Device assignment replica_count != num_replicas; ", + device_assignment.replica_count(), " vs ", num_replicas); + } + if (device_assignment.computation_count() != + metadata->num_cores_per_replica()) { + return errors::InvalidArgument( + "Device assignment computation_count != num_cores_per_replica; ", + device_assignment.computation_count(), " vs ", + metadata->num_cores_per_replica()); + } + } + return Status::OK(); +} } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_support.h b/tensorflow/core/tpu/kernels/tpu_compile_op_support.h index d6d407cb28f..bc60f64286a 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_support.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_support.h @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/compiler/xla/shape_tree.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.pb.h" @@ -154,6 +155,10 @@ se::port::StatusOr CreateTpuCompilationRequest( const TPUCompileMetadataProto& metadata, const std::vector& arg_shapes); +se::port::Status CompileOpMetadataFromContext(OpKernelConstruction* ctx, + TPUCompileMetadataProto* metadata, + NameAttrList* function_name, + std::string* mlir_module); } // namespace tpu } // namespace tensorflow From 7160041650fdbae6010662a4ea64c049351e1927 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Thu, 16 Jul 2020 19:47:51 -0700 Subject: [PATCH 0665/2522] Relax the error about functional api construction w/ a mix of symbolic and non-symbolic tensors for built-in layers (such as layers.add and layers.multiply where using constants is a common user pattern) PiperOrigin-RevId: 321698209 Change-Id: Ief13e59aec91b787361a7760318ecd47870d938f --- tensorflow/python/keras/engine/base_layer.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py index 3ea9c537b74..f362588680a 100644 --- a/tensorflow/python/keras/engine/base_layer.py +++ b/tensorflow/python/keras/engine/base_layer.py @@ -922,7 +922,7 @@ class Layer(module.Module, version_utils.LayerVersionSelector): # >> inputs = tf.keras.Input(10) # >> outputs = MyLayer()(inputs) # Functional construction mode. # >> model = tf.keras.Model(inputs, outputs) - if _in_functional_construction_mode(inputs, args, kwargs, input_list): + if _in_functional_construction_mode(self, inputs, args, kwargs, input_list): return self._functional_construction_call(inputs, args, kwargs, input_list) @@ -3213,7 +3213,7 @@ class AddMetric(Layer): return config -def _in_functional_construction_mode(inputs, args, kwargs, input_list): # pylint: disable=unused-argument +def _in_functional_construction_mode(layer, inputs, args, kwargs, input_list): # pylint: disable=unused-argument """Check the arguments to see if we are constructing a functional model.""" if keras_tensor.keras_tensors_enabled(): # We are constructing a functional model if any of the inputs @@ -3225,15 +3225,16 @@ def _in_functional_construction_mode(inputs, args, kwargs, input_list): # pylin if context.executing_eagerly(): all_inputs_symbolic = all( tf_utils.is_symbolic_tensor(t) for t in input_list) - if (any(tf_utils.is_symbolic_tensor(t) for t in nest.flatten( - [inputs, args, kwargs])) and not all_inputs_symbolic): + if (base_layer_utils.is_subclassed(layer) and + any(tf_utils.is_symbolic_tensor(t) for t in nest.flatten( + [inputs, args, kwargs])) and not all_inputs_symbolic): raise ValueError('It appears you are trying to construct a ' 'functional model, but not all of the inputs in ' 'the first positional argument of your layer call ' 'are symbolic tensors. ' '(Input objects, or the output of another layer) ' - 'Functional models cannot correctly track layers ' - 'unless all values in the first call argument ' + 'Functional models cannot correctly track custom ' + 'layers unless all values in the first call argument ' 'are symbolic.') return all_inputs_symbolic else: From 2a3f88d4c8a1595936ff5d90f783fa043b88fa09 Mon Sep 17 00:00:00 2001 From: Yimei Sun Date: Fri, 10 Jul 2020 20:37:17 -0700 Subject: [PATCH 0666/2522] Add auto_mixed_precision_mkl to run-once optimizer list --- tensorflow/core/grappler/optimizers/meta_optimizer.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 2f1c869965d..0df32168787 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -88,7 +88,8 @@ int NumIterations(const RewriterConfig& cfg) { // Check if optimizer is allowed to run only once. bool IsRunOnceOptimizer(const string& name) { return name == "layout" || name == "memory_optimizer" || - name == "loop_optimizer" || name == "auto_mixed_precision"; + name == "loop_optimizer" || name == "auto_mixed_precision" || + name == "auto_mixed_precision_mkl"; } bool IsTFDataFunction(const FunctionDef& func) { From 4f7f17e4694353d1f2c08056fe593c22d651a331 Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Thu, 16 Jul 2020 20:34:15 -0700 Subject: [PATCH 0667/2522] [XLA] Fix bug in Reduce(Dot(X)) simplification. PiperOrigin-RevId: 321703045 Change-Id: Id43655ea10d36f06cba109e8ec11efb2dfb6d80b --- tensorflow/compiler/xla/service/algebraic_simplifier.cc | 4 ++-- tensorflow/compiler/xla/service/algebraic_simplifier_test.cc | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 741edfc7c35..3e012fc41b8 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -4137,13 +4137,13 @@ Status AlgebraicSimplifierVisitor::HandleReduce(HloInstruction* hlo) { new_dnums.add_rhs_contracting_dimensions( dnums.rhs_batch_dimensions(batch_dim)); new_dnums.add_lhs_contracting_dimensions( - dnums.rhs_batch_dimensions(batch_dim)); + dnums.lhs_batch_dimensions(batch_dim)); ++removed_dims; } else { new_dnums.add_rhs_batch_dimensions( dnums.rhs_batch_dimensions(batch_dim)); new_dnums.add_lhs_batch_dimensions( - dnums.rhs_batch_dimensions(batch_dim)); + dnums.lhs_batch_dimensions(batch_dim)); } } std::vector reduce_dims; diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 779d6c9cdc5..d2c32d79a91 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -6145,10 +6145,10 @@ TEST_F(AlgebraicSimplifierTest, ReduceOfBatchDotToContractingDimension) { } test { p0 = f32[32,8,5,6] parameter(0) - p1 = f32[32,8,6,7] parameter(1) + p1 = f32[8,32,6,7] parameter(1) d = f32[32,8,5,7] dot(p0, p1), lhs_batch_dims={0,1}, - rhs_batch_dims={0,1}, + rhs_batch_dims={1,0}, rhs_contracting_dims={2}, lhs_contracting_dims={3} c = f32[] constant(0) From a6af32199a956d73bfce5c4255757195cc9f795e Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Fri, 17 Jul 2020 11:57:10 +0800 Subject: [PATCH 0668/2522] change device_str to const --- tensorflow/core/framework/op_kernel.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc index 5cc5e9a860a..38fe9f7e392 100644 --- a/tensorflow/core/framework/op_kernel.cc +++ b/tensorflow/core/framework/op_kernel.cc @@ -1413,7 +1413,7 @@ Status FindKernelDef( device_type, node_name, has_experimental_debug_info, experimental_debug_info, node_op, node_attrs, ®, &was_attr_mismatch)); if (reg == nullptr) { - std::string device_str = DeviceTypeString(device_type); + const std::string device_str = DeviceTypeString(device_type); Status s = errors::NotFound( "No registered '", node_op, "' OpKernel for ", device_str, " devices compatible with node ", From 3bdbcaa2ef4e3354c2ffab4a927c17fa6c41fd3d Mon Sep 17 00:00:00 2001 From: Reed Date: Thu, 16 Jul 2020 21:08:01 -0700 Subject: [PATCH 0669/2522] Fix tf32 for real. --- tensorflow/python/BUILD | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index f4898897435..64a8ef56112 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -6077,7 +6077,6 @@ pywrap_tensorflow_macro( "//tensorflow/lite/delegates/flex:delegate", "//tensorflow/core/profiler/internal/cpu:python_tracer", "//tensorflow/tools/graph_transforms:transform_graph_lib", - "//tensorflow/core/platform:tf32_utils", "//tensorflow/lite/toco/python:toco_python_api", "//tensorflow/python/eager:pywrap_tfe_lib", "//tensorflow/core/util/tensor_bundle", @@ -6089,7 +6088,9 @@ pywrap_tensorflow_macro( "@ngraph_tf//:ngraph_tf", ]) + if_xla_available([ "//tensorflow/compiler/aot:tfcompile_lib", - ]) + if_tpu(["//tensorflow/core/tpu:tpu_api_dlsym_initializer"]), + ]) + if_tpu([ + "//tensorflow/core/tpu:tpu_api_dlsym_initializer", + ]) + if_static(extra_deps = ["//tensorflow/core/platform:tf32_utils"]), ) # ** Targets for Windows build (start) ** From 580f94ee24d1305cd3cc999c86d5a3f68464a8a8 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Fri, 17 Jul 2020 11:15:25 +0700 Subject: [PATCH 0670/2522] Add stat --- .../filesystem/plugins/s3/s3_filesystem.cc | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 4dada85946a..9632653d988 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -18,6 +18,9 @@ limitations under the License. #include #include #include +#include +#include +#include #include #include @@ -590,6 +593,73 @@ void NewAppendableFile(const TF_Filesystem* filesystem, const char* path, TF_SetStatus(status, TF_OK, ""); } +void Stat(const TF_Filesystem* filesystem, const char* path, + TF_FileStatistics* stats, TF_Status* status) { + Aws::String bucket, object; + ParseS3Path(path, true, &bucket, &object, status); + if (TF_GetCode(status) != TF_OK) return; + auto s3_file = static_cast(filesystem->plugin_filesystem); + GetS3Client(s3_file); + + if (object.empty()) { + Aws::S3::Model::HeadBucketRequest head_bucket_request; + head_bucket_request.WithBucket(bucket); + auto head_bucket_outcome = + s3_file->s3_client->HeadBucket(head_bucket_request); + if (!head_bucket_outcome.IsSuccess()) + return TF_SetStatusFromAWSError(head_bucket_outcome.GetError(), status); + stats->length = 0; + stats->is_directory = 1; + stats->mtime_nsec = 0; + return TF_SetStatus(status, TF_OK, ""); + } + + bool found = false; + Aws::S3::Model::HeadObjectRequest head_object_request; + head_object_request.WithBucket(bucket).WithKey(object); + head_object_request.SetResponseStreamFactory( + []() { return Aws::New(kS3FileSystemAllocationTag); }); + auto head_object_outcome = + s3_file->s3_client->HeadObject(head_object_request); + if (head_object_outcome.IsSuccess()) { + stats->length = head_object_outcome.GetResult().GetContentLength(); + stats->is_directory = 0; + stats->mtime_nsec = + head_object_outcome.GetResult().GetLastModified().Millis() * 1e6; + found = true; + } else { + return TF_SetStatusFromAWSError(head_object_outcome.GetError(), status); + } + + auto prefix = object; + if (prefix.back() != '/') { + prefix.push_back('/'); + } + Aws::S3::Model::ListObjectsRequest list_objects_request; + list_objects_request.WithBucket(bucket).WithPrefix(prefix).WithMaxKeys(1); + list_objects_request.SetResponseStreamFactory( + []() { return Aws::New(kS3FileSystemAllocationTag); }); + auto list_objects_outcome = + s3_file->s3_client->ListObjects(list_objects_request); + if (list_objects_outcome.IsSuccess()) { + auto objects = list_objects_outcome.GetResult().GetContents(); + if (objects.size() > 0) { + stats->length = 0; + stats->is_directory = 1; + stats->mtime_nsec = objects[0].GetLastModified().Millis() * 1e6; + found = true; + } + } else { + TF_SetStatusFromAWSError(list_objects_outcome.GetError(), status); + if (TF_GetCode(status) == TF_FAILED_PRECONDITION) return; + } + if (!found) + return TF_SetStatus( + status, TF_NOT_FOUND, + absl::StrCat("Object ", path, " does not exist").c_str()); + TF_SetStatus(status, TF_OK, ""); +} + // TODO(vnvo2409): Implement later } // namespace tf_s3_filesystem From b440bbb40f2e5240f3fe92b5de358e2608f68405 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Thu, 16 Jul 2020 21:35:45 -0700 Subject: [PATCH 0671/2522] [TF/XLA] Fixup numbering of XLA parameters used for aliasing Previously, the XLA argument parameter was incorrectly assumed to be corresponding to the index in the vector of `XlaCompiler::Argument`. This is not correct, since not all `XlaCompiler::Argument`s become arguments to the compiler: notably, constants and uninitialized resource variables do not. PiperOrigin-RevId: 321709603 Change-Id: I730fd6385949c360b2b831318a5b59c08f8362ef --- tensorflow/compiler/tf2xla/xla_compiler.cc | 19 ++++++--- .../compiler/tf2xla/xla_compiler_test.cc | 41 +++++++++++++++++++ 2 files changed, 54 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index 6d92fd97793..333fa53790d 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -168,7 +168,7 @@ Status BuildComputation( int* num_computation_outputs, int* num_nonconst_outputs, std::vector* outputs, std::vector* resource_updates, - xla::Shape* output_shape) { + xla::Shape* output_shape, absl::Span input_mapping) { // Attach a common operator name as metadata. This has no semantic effect — it // merely makes the HLO graph more readable when visualized via TensorBoard, // since TensorBoard forms groups out of operators with similar names. @@ -268,6 +268,11 @@ Status BuildComputation( return a->arg_num() < b->arg_num(); }); + absl::flat_hash_map argument_to_xla_arg; + for (int xla_arg = 0; xla_arg < input_mapping.size(); xla_arg++) { + argument_to_xla_arg[input_mapping[xla_arg]] = xla_arg; + } + std::vector aliases; for (const XlaResource* resource : arg_resources) { DCHECK_LT(resource->arg_num(), args.size()); @@ -290,19 +295,20 @@ Status BuildComputation( update.type = resource->type(); update.shape = resource->shape(); update.modified = modified; + int param_num = use_tuple_arg ? 0 : update.input_index; if (is_entry_computation && arg.resource_kind != XlaResource::kTensorArray && - alias_resource_update) { + alias_resource_update && argument_to_xla_arg.count(param_num)) { // Assuming tuple arg and results are used. xla::ShapeIndex param_index = use_tuple_arg ? xla::ShapeIndex({update.input_index}) : xla::ShapeIndex{}; - int param_number = use_tuple_arg ? 0 : update.input_index; + int xla_param_num = argument_to_xla_arg[param_num]; int64 output_index_num = elems.size(); xla::ShapeIndex output_index = xla::ShapeIndex({output_index_num}); VLOG(3) << "Storing alias: " << output_index.ToString() << ": (" - << param_number << ", " << param_index.ToString() << ")"; - aliases.push_back({output_index, param_number, param_index}); + << xla_param_num << ", " << param_index.ToString() << ")"; + aliases.push_back({output_index, xla_param_num, param_index}); } for (const auto& grad : resource->tensor_array_gradients()) { update.tensor_array_gradients_accessed.insert(grad.first); @@ -1315,7 +1321,8 @@ Status XlaCompiler::CompileGraph( options.always_return_tuple, options.use_tuple_arg, options.alias_resource_update, &builder, result->computation.get(), &num_computation_outputs, &num_nonconst_outputs, &result->outputs, - &result->resource_updates, &result->xla_output_shape)); + &result->resource_updates, &result->xla_output_shape, + result->input_mapping)); VLOG(2) << "Outputs: total: " << context->retvals().size() << " nonconstant: " << num_nonconst_outputs; diff --git a/tensorflow/compiler/tf2xla/xla_compiler_test.cc b/tensorflow/compiler/tf2xla/xla_compiler_test.cc index 4f1b6c8e7a9..5df508d60b3 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler_test.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler_test.cc @@ -1856,5 +1856,46 @@ TEST_F(XlaCompilerTest, DoNotConstantFoldShapeOp) { EXPECT_TRUE(xla::LiteralTestUtil::Equal(expected_literal, actual_literal)); } +TEST_F(XlaCompilerTest, AliasResourceUpdates) { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto a = ops::Const(scope.WithOpName("A"), {1, 2}); + auto var = ops::_Arg(scope.WithOpName("V"), DT_RESOURCE, 1); + auto write = ops::AssignAddVariableOp(scope, var, a); + auto read = ops::ReadVariableOp( + scope.WithControlDependencies(std::vector{write}), var, + DT_INT32); + auto d = ops::_Retval(scope.WithOpName("D"), read, 0); + std::unique_ptr graph(new Graph(OpRegistry::Global())); + TF_ASSERT_OK(scope.ToGraph(graph.get())); + + // Builds a description of the arguments. + std::vector args(2); + args[0].kind = XlaCompiler::Argument::kConstant; + args[0].type = DT_INT32; + args[0].shape = TensorShape({2}); + args[0].constant_value = Tensor(DT_INT32, {1, 1}); + args[0].initialized = true; + + args[1].kind = XlaCompiler::Argument::kResource; + args[1].resource_kind = XlaResource::kVariable; + args[1].initialized = true; + args[1].type = DT_INT32; + args[1].shape = TensorShape({2}); + + XlaCompiler compiler(DefaultOptions()); + + XlaCompiler::CompileOptions compile_options; + compile_options.alias_resource_update = true; + + XlaCompiler::CompilationResult result; + TF_ASSERT_OK(compiler.CompileGraph(compile_options, "add", std::move(graph), + args, &result)); + + const xla::HloInputOutputAliasProto& alias = + result.computation->proto().input_output_alias(); + EXPECT_EQ(alias.entries_size(), 1); + EXPECT_EQ(alias.entries(0).parameter_number(), 0); +} + } // namespace } // namespace tensorflow From 48c9890e9bbd4db3522712c7d87a0ad2dab897fa Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Fri, 17 Jul 2020 11:57:48 +0700 Subject: [PATCH 0672/2522] Add path exists and get file size --- .../filesystem/plugins/s3/s3_filesystem.cc | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 9632653d988..da5be5583dd 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -660,6 +660,19 @@ void Stat(const TF_Filesystem* filesystem, const char* path, TF_SetStatus(status, TF_OK, ""); } +void PathExists(const TF_Filesystem* filesystem, const char* path, + TF_Status* status) { + TF_FileStatistics stats; + Stat(filesystem, path, &stats, status); +} + +int64_t GetFileSize(const TF_Filesystem* filesystem, const char* path, + TF_Status* status) { + TF_FileStatistics stats; + Stat(filesystem, path, &stats, status); + return stats.length; +} + // TODO(vnvo2409): Implement later } // namespace tf_s3_filesystem From d7918bcd438fc567ac647595c9cba3aeca4ae4a9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 16 Jul 2020 21:36:09 -0700 Subject: [PATCH 0673/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/8ef9e2bf355d PiperOrigin-RevId: 321709644 Change-Id: Ib80f68da9f670386172033a4385c70ad2f429efa --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 1d46b14f73e..9980ffad650 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "1d3f61f8a799489f7f4d81e46d9e6b31a954ea4e" - LLVM_SHA256 = "4731fa761a0d74f1f5ba28478144f766abc0de879e3e283431d8699456a03181" + LLVM_COMMIT = "8ef9e2bf355d05bc81d8b0fe1e5333eec59a0a91" + LLVM_SHA256 = "b64757f390c8e20abaa97fcbf8da9d88db5489a9da827019e83844c62f5790ac" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 11ae1068deb4381669edc2b2eca2f0a6313a55a9 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Fri, 17 Jul 2020 12:27:57 +0700 Subject: [PATCH 0674/2522] Add NewReadOnlyMemoryRegionFromFile --- .../filesystem/plugins/s3/s3_filesystem.cc | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index da5be5583dd..f20c7708384 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -673,6 +673,43 @@ int64_t GetFileSize(const TF_Filesystem* filesystem, const char* path, return stats.length; } +void NewReadOnlyMemoryRegionFromFile(const TF_Filesystem* filesystem, + const char* path, + TF_ReadOnlyMemoryRegion* region, + TF_Status* status) { + Aws::String bucket, object; + ParseS3Path(path, true, &bucket, &object, status); + if (TF_GetCode(status) != TF_OK) return; + + auto s3_file = static_cast(filesystem->plugin_filesystem); + GetS3Client(s3_file); + GetTransferManager(Aws::Transfer::TransferDirection::UPLOAD, s3_file); + + auto size = GetFileSize(filesystem, path, status); + if (TF_GetCode(status) != TF_OK) return; + if (size == 0) + return TF_SetStatus(status, TF_INVALID_ARGUMENT, "File is empty"); + + std::unique_ptr data(new char[size]); + // Wraping inside a `std::unique_ptr` to prevent memory-leaking. + std::unique_ptr reader( + new TF_RandomAccessFile, [](TF_RandomAccessFile* file) { + if (file != nullptr) { + tf_random_access_file::Cleanup(file); + delete file; + } + }); + NewRandomAccessFile(filesystem, path, reader.get(), status); + if (TF_GetCode(status) != TF_OK) return; + auto read = + tf_random_access_file::Read(reader.get(), 0, size, data.get(), status); + if (TF_GetCode(status) != TF_OK) return; + + region->plugin_memory_region = new tf_read_only_memory_region::S3MemoryRegion( + {std::move(data), static_cast(read)}); + TF_SetStatus(status, TF_OK, ""); +} + // TODO(vnvo2409): Implement later } // namespace tf_s3_filesystem From 94d2ab31f87c51972c4c11b808bebd014fc3516e Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Thu, 16 Jul 2020 22:44:59 -0700 Subject: [PATCH 0675/2522] Add TpuCompileOp implementation. PiperOrigin-RevId: 321717418 Change-Id: I4e0fb203014c54252511d6596063461dcc5de250 --- tensorflow/core/tpu/kernels/BUILD | 64 +------------ tensorflow/core/tpu/kernels/tpu_compile_op.cc | 94 ------------------- tensorflow/core/tpu/kernels/tpu_compile_op.h | 29 ++++-- .../kernels/tpu_compile_op_impl_factory.cc | 46 --------- .../tpu/kernels/tpu_compile_op_impl_factory.h | 55 ----------- .../kernels/tpu_compile_op_registration.cc | 52 ---------- .../tpu/kernels/tpu_compile_op_support.cc | 81 +++------------- .../core/tpu/kernels/tpu_compile_op_support.h | 5 - 8 files changed, 36 insertions(+), 390 deletions(-) delete mode 100644 tensorflow/core/tpu/kernels/tpu_compile_op.cc delete mode 100644 tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.cc delete mode 100644 tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.h delete mode 100644 tensorflow/core/tpu/kernels/tpu_compile_op_registration.cc diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 31435090f88..7a6160a2963 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -178,14 +178,12 @@ cc_library( "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:xla_data_proto_cc", "//tensorflow/compiler/xla/service:computation_layout", - "//tensorflow/compiler/xla/service:computation_placer", "//tensorflow/compiler/xla/service:dump", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_module_config", "//tensorflow/compiler/xla/service:hlo_module_group", "//tensorflow/core:framework", "//tensorflow/core/framework:protos_all_cc", - "//tensorflow/core/platform:errors", "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", "//tensorflow/stream_executor/tpu:proto_helper", "@com_google_absl//absl/strings", @@ -496,10 +494,7 @@ tf_proto_library_cc( cc_library( name = "tpu_compile_op_hdrs", hdrs = ["tpu_compile_op.h"], - deps = [ - ":tpu_compile_op_common", - "//tensorflow/core:framework", - ], + deps = ["//tensorflow/core:framework"], ) cc_library( @@ -558,60 +553,3 @@ cc_library( ], alwayslink = 1, ) - -cc_library( - name = "tpu_compile_op_registration", - srcs = ["tpu_compile_op_registration.cc"], - deps = [ - ":tpu_compile_op_common", - ":tpu_compile_op_impl", - ":tpu_compile_op_impl_factory", - ":tpu_compile_op_support", - "//tensorflow/compiler/xla:statusor", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", - ], -) - -cc_library( - name = "tpu_compile_op_impl_factory", - srcs = ["tpu_compile_op_impl_factory.cc"], - hdrs = ["tpu_compile_op_impl_factory.h"], - deps = [ - ":tpu_compile_op_common", - "//tensorflow/compiler/xla:statusor", - "//tensorflow/compiler/xla/service:computation_placer", - "//tensorflow/core:framework", - ], - alwayslink = 1, -) - -cc_library( - name = "tpu_compile_op_lib", - srcs = ["tpu_compile_op.cc"], - deps = [ - ":tpu_compile_op_hdrs", - ":tpu_compile_op_impl_factory", - ":tpu_compile_op_options", - "//tensorflow/compiler/xla:statusor", - "//tensorflow/core/protobuf/tpu:compilation_result_proto_cc", - "//tensorflow/stream_executor/tpu:tpu_node_context", - ], - alwayslink = 1, -) - -cc_library( - name = "tpu_compile_op", - deps = [ - ":tpu_compile_op_hdrs", - ":tpu_compile_op_impl", - ":tpu_compile_op_impl_factory", - ":tpu_compile_op_lib", - ":tpu_compile_op_options", - ":tpu_compile_op_registration", - "//tensorflow/compiler/xla:statusor", - "//tensorflow/core/protobuf/tpu:compilation_result_proto_cc", - "//tensorflow/stream_executor/tpu:tpu_node_context", - ], - alwayslink = 1, -) diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op.cc b/tensorflow/core/tpu/kernels/tpu_compile_op.cc deleted file mode 100644 index 61210ec3baf..00000000000 --- a/tensorflow/core/tpu/kernels/tpu_compile_op.cc +++ /dev/null @@ -1,94 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/core/tpu/kernels/tpu_compile_op.h" - -#include - -#include "tensorflow/compiler/xla/statusor.h" -#include "tensorflow/core/protobuf/tpu/compilation_result.pb.h" -#include "tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.h" -#include "tensorflow/core/tpu/kernels/tpu_compile_op_options.h" -#include "tensorflow/stream_executor/tpu/tpu_node_context.h" - -namespace tensorflow { -namespace tpu { -using ::stream_executor::port::StatusOr; - -TpuCompileOp::TpuCompileOp(OpKernelConstruction* ctx) : OpKernel(ctx) { - StatusOr> compile_op = - (*GetTpuCompileOpCreateFn())(ctx); - OP_REQUIRES_OK(ctx, compile_op.status()); - impl_ = std::move(compile_op.ValueOrDie()); -} - -void TpuCompileOp::Compute(OpKernelContext* ctx) { impl_->Compute(ctx); } - -TpuCompileMlirOp::TpuCompileMlirOp(OpKernelConstruction* ctx) : OpKernel(ctx) { - StatusOr> compile_op = - (*GetTpuCompileOpMlirCreateFn())(ctx); - OP_REQUIRES_OK(ctx, compile_op.status()); - impl_ = std::move(compile_op.ValueOrDie()); -} - -void TpuCompileMlirOp::Compute(OpKernelContext* ctx) { impl_->Compute(ctx); } - -void TpuCompileSucceededAssertOp::Compute(OpKernelContext* ctx) { - const Tensor compilation_result = ctx->input(0); - CompilationResultProto proto; - Status status; - if (!proto.ParseFromString(compilation_result.scalar()())) { - status = - errors::InvalidArgument("Unable to parse compilation result proto"); - } - if (!status.ok() || proto.status_code() != error::Code::OK) { - status.Update(Status(proto.status_code(), proto.status_error_message())); - errors::AppendToMessage(&status, "TPU compilation failed"); - if (tensorflow::internal::TpuCompilationFailureClosesChips()) { - // At this point, if compilation fails we do not know if a task - // is already running that expects results from this compiled - // program to complete. So close the TPU driver to release all - // awaiting interactions (all awaiting interaction will fail and - // continue to fail until reinitialized). - LOG(ERROR) << "Cloud TPU: Closing chips. TPU compilation is considered " - "as part of device state, and a failed compilation results " - "in a device reset."; - - Status close_status = TpuNodeContext::CloseTpuHost(); - - if (!close_status.ok()) { - errors::AppendToMessage(&status, close_status.error_message()); - } - } - ctx->CtxFailureWithWarning(status); - } -} - -REGISTER_MODULE_INITIALIZER(register_tpu_compile_op_impl, { -#if !defined(LIBTFTPU) - VLOG(1) << "register_tpu_compile_op_impl: TpuCompileOpKernelImpl"; - SetTpuCompileOpCreateFn(CreateTpuCompileOpImpl); - SetTpuCompileOpMlirCreateFn(CreateTpuCompileOpMlirImpl); -#endif // LIBTFTPU -}); - -REGISTER_KERNEL_BUILDER(Name("TPUCompile").Device(DEVICE_CPU), TpuCompileOp); -REGISTER_KERNEL_BUILDER(Name("_TPUCompileMlir").Device(DEVICE_CPU), - TpuCompileMlirOp); - -REGISTER_KERNEL_BUILDER(Name("TPUCompileSucceededAssert").Device(DEVICE_CPU), - TpuCompileSucceededAssertOp); - -} // namespace tpu -} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op.h b/tensorflow/core/tpu/kernels/tpu_compile_op.h index d0ead41a77e..0bbf5695400 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_op.h @@ -18,10 +18,18 @@ limitations under the License. #include #include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/tpu/kernels/tpu_compile_op_common.h" namespace tensorflow { namespace tpu { +// Forward declaration. +#if defined(LIBTFTPU) +class TpuCompileOpKernelImpl; +#else +namespace internal { +class TpuCompileOpKernelImpl; +} +#endif +} // namespace tpu // The TPUCompile operator compiles a Tensorflow function into a // TPU executable to be run by TPUExecute. @@ -34,9 +42,13 @@ class TpuCompileOp : public OpKernel { void Compute(OpKernelContext* ctx) override; private: - std::unique_ptr impl_; +#if defined(LIBTFTPU) + std::unique_ptr impl_; +#else + std::unique_ptr impl_; +#endif - TF_DISALLOW_COPY_AND_ASSIGN(TpuCompileOp); + DISALLOW_COPY_AND_ASSIGN(TpuCompileOp); }; // The TPUCompile operator compiles a MLIR module into a @@ -50,9 +62,13 @@ class TpuCompileMlirOp : public OpKernel { void Compute(OpKernelContext* ctx) override; private: - std::unique_ptr impl_; +#if defined(LIBTFTPU) + std::unique_ptr impl_; +#else + std::unique_ptr impl_; +#endif - TF_DISALLOW_COPY_AND_ASSIGN(TpuCompileMlirOp); + DISALLOW_COPY_AND_ASSIGN(TpuCompileMlirOp); }; class TpuCompileSucceededAssertOp : public OpKernel { @@ -64,10 +80,9 @@ class TpuCompileSucceededAssertOp : public OpKernel { void Compute(OpKernelContext* ctx) override; private: - TF_DISALLOW_COPY_AND_ASSIGN(TpuCompileSucceededAssertOp); + DISALLOW_COPY_AND_ASSIGN(TpuCompileSucceededAssertOp); }; -} // namespace tpu } // namespace tensorflow #endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_OP_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.cc deleted file mode 100644 index f6b4641fe61..00000000000 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.cc +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.h" - -namespace tensorflow { -namespace tpu { -namespace { -static TpuCompileOpImplCreateFn* tpu_compile_op_impl_creation_fn = - new TpuCompileOpImplCreateFn(CreateTpuCompileOpImpl); -static TpuCompileOpImplCreateFn* tpu_compile_op_mlir_impl_creation_fn = - new TpuCompileOpImplCreateFn(CreateTpuCompileOpMlirImpl); -} // namespace - -TpuCompileOpImplCreateFn* GetTpuCompileOpCreateFn() { - return tpu_compile_op_impl_creation_fn; -} - -TpuCompileOpImplCreateFn* GetTpuCompileOpMlirCreateFn() { - return tpu_compile_op_mlir_impl_creation_fn; -} - -void SetTpuCompileOpCreateFn(TpuCompileOpImplCreateFn fn) { - VLOG(1) << "SetTpuCompileOpCreateFn."; - delete tpu_compile_op_impl_creation_fn; - tpu_compile_op_impl_creation_fn = new TpuCompileOpImplCreateFn(fn); -} - -void SetTpuCompileOpMlirCreateFn(TpuCompileOpImplCreateFn fn) { - VLOG(1) << "SetTpuCompileOpMlirCreateFn."; - delete tpu_compile_op_mlir_impl_creation_fn; - tpu_compile_op_mlir_impl_creation_fn = new TpuCompileOpImplCreateFn(fn); -} -} // namespace tpu -} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.h b/tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.h deleted file mode 100644 index 40201f8896d..00000000000 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.h +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_OP_IMPL_FACTORY_H_ -#define TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_OP_IMPL_FACTORY_H_ - -#include -#include - -#include "tensorflow/compiler/xla/statusor.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/tpu/kernels/tpu_compile_op_common.h" - -namespace tensorflow { -namespace tpu { - -typedef std::function>(OpKernelConstruction*)> - TpuCompileOpImplCreateFn; - -// Creates the callback for creating `TpuCompileOpImpl` instance. -stream_executor::port::StatusOr> -CreateTpuCompileOpImpl(OpKernelConstruction* ctx); - -// Creates the callback for creating Mlir `TpuCompileOpImpl` instance. -stream_executor::port::StatusOr> -CreateTpuCompileOpMlirImpl(OpKernelConstruction* ctx); - -// Gets the callback for creating default `TpuCompileOpImpl` instance. -TpuCompileOpImplCreateFn* GetTpuCompileOpCreateFn(); - -// Gets the callback for creating Mlir `TpuCompileOpImpl` instance. -TpuCompileOpImplCreateFn* GetTpuCompileOpMlirCreateFn(); - -// Sets the callback for creating default `TpuCompileOpImpl` instance. -void SetTpuCompileOpCreateFn(TpuCompileOpImplCreateFn fn); - -// Sets the callback for creating Mlir `TpuCompileOpImpl` instance. -void SetTpuCompileOpMlirCreateFn(TpuCompileOpImplCreateFn fn); -} // namespace tpu -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_OP_IMPL_FACTORY_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_registration.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_registration.cc deleted file mode 100644 index fe50e01455f..00000000000 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_registration.cc +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include - -#include "tensorflow/compiler/xla/statusor.h" -#include "tensorflow/core/framework/attr_value.pb.h" -#include "tensorflow/core/protobuf/tpu/compile_metadata.pb.h" -#include "tensorflow/core/tpu/kernels/tpu_compile_op_common.h" -#include "tensorflow/core/tpu/kernels/tpu_compile_op_impl.h" -#include "tensorflow/core/tpu/kernels/tpu_compile_op_impl_factory.h" -#include "tensorflow/core/tpu/kernels/tpu_compile_op_support.h" - -namespace tensorflow { -namespace tpu { -using ::stream_executor::port::StatusOr; -StatusOr> CreateTpuCompileOpImpl( - OpKernelConstruction* ctx) { - NameAttrList function_name; - TPUCompileMetadataProto metadata; - TF_RETURN_IF_ERROR(CompileOpMetadataFromContext(ctx, &metadata, - &function_name, - /*mlir_module=*/nullptr)); - VLOG(1) << "Create tensorflow::tpu::TpuCompileOpKernelImpl"; - return {std::make_unique( - function_name, metadata, metadata.num_cores_per_replica(), - /*return_hlo_protos=*/false, - /*unload_cache_on_session_close=*/false)}; -} - -StatusOr> CreateTpuCompileOpMlirImpl( - OpKernelConstruction* ctx) { - TPUCompileMetadataProto metadata; - std::string mlir_module; - TF_RETURN_IF_ERROR(CompileOpMetadataFromContext( - ctx, &metadata, /*function_name=*/nullptr, &mlir_module)); - VLOG(1) << "Create tensorflow::tpu::TpuCompileOpKernelImpl"; - return {std::make_unique( - mlir_module, metadata, metadata.num_cores_per_replica(), - /*return_hlo_protos=*/false, - /*unload_cache_on_session_close=*/false)}; -} -} // namespace tpu -} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc index 5cc35a07e66..41e81c6bca7 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc @@ -16,28 +16,27 @@ limitations under the License. #include "tensorflow/compiler/xla/debug_options_flags.h" #include "tensorflow/compiler/xla/service/computation_layout.h" -#include "tensorflow/compiler/xla/service/computation_placer.h" #include "tensorflow/compiler/xla/service/dump.h" #include "tensorflow/compiler/xla/xla_data.pb.h" -#include "tensorflow/core/platform/errors.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_key.h" #include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" #include "tensorflow/stream_executor/tpu/proto_helper.h" namespace tensorflow { namespace tpu { -using ::stream_executor::port::Status; -using ::stream_executor::port::StatusOr; -using ::xla::ComputationLayout; -using ::xla::DebugOptions; -using ::xla::DeviceAssignment; -using ::xla::HloModuleConfig; -using ::xla::HloSharding; -using ::xla::InvalidArgument; -using ::xla::ProgramShape; -using ::xla::Shape; -using ::xla::ShapeTree; -using ::xla::ShapeUtil; + +using stream_executor::port::Status; +using stream_executor::port::StatusOr; +using xla::ComputationLayout; +using xla::DebugOptions; +using xla::DeviceAssignment; +using xla::HloModuleConfig; +using xla::HloSharding; +using xla::InvalidArgument; +using xla::ProgramShape; +using xla::Shape; +using xla::ShapeTree; +using xla::ShapeUtil; Status ValidateResultShape(const Shape& client_shape, const Shape& result_shape) { @@ -486,59 +485,5 @@ StatusOr CreateTpuCompilationRequest( VLOG(1) << "TpuCompilationRequest:\n" << compilation_request.DebugString(); return compilation_request; } - -Status CompileOpMetadataFromContext(OpKernelConstruction* ctx, - TPUCompileMetadataProto* metadata, - NameAttrList* function_name, - std::string* mlir_module) { - CHECK_NE(metadata, nullptr); - - int num_computations; - TF_RETURN_IF_ERROR(ctx->GetAttr("num_computations", &num_computations)); - - std::string metadata_string; - TF_RETURN_IF_ERROR(ctx->GetAttr("metadata", &metadata_string)); - if (!metadata->ParsePartialFromString(metadata_string)) { - return errors::InvalidArgument("Unable to parse TPUCompileMetadataProto"); - } - - if (function_name != nullptr) { - TF_RETURN_IF_ERROR(ctx->GetAttr("function", function_name)); - } - - if (mlir_module != nullptr) { - TF_RETURN_IF_ERROR(ctx->GetAttr("mlir_module", mlir_module)); - } - - if (num_computations != metadata->num_cores_per_replica()) { - return errors::InvalidArgument( - "num_computations must be equal to " - "num_cores_per_replica in the 'metadata' " - "attribute (", - num_computations, " vs ", metadata->num_cores_per_replica(), ")"); - } - - if (metadata->has_device_assignment()) { - StatusOr> device_assignment_or_error = - DeviceAssignment::Deserialize(metadata->device_assignment()); - TF_RETURN_IF_ERROR(device_assignment_or_error.status()); - const DeviceAssignment& device_assignment = - *device_assignment_or_error.ValueOrDie(); - const int num_replicas = metadata->num_replicas(); - if (device_assignment.replica_count() != num_replicas) { - return errors::InvalidArgument( - "Device assignment replica_count != num_replicas; ", - device_assignment.replica_count(), " vs ", num_replicas); - } - if (device_assignment.computation_count() != - metadata->num_cores_per_replica()) { - return errors::InvalidArgument( - "Device assignment computation_count != num_cores_per_replica; ", - device_assignment.computation_count(), " vs ", - metadata->num_cores_per_replica()); - } - } - return Status::OK(); -} } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_support.h b/tensorflow/core/tpu/kernels/tpu_compile_op_support.h index bc60f64286a..d6d407cb28f 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_op_support.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_support.h @@ -31,7 +31,6 @@ limitations under the License. #include "tensorflow/compiler/xla/shape_tree.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/xla_data.pb.h" -#include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.pb.h" @@ -155,10 +154,6 @@ se::port::StatusOr CreateTpuCompilationRequest( const TPUCompileMetadataProto& metadata, const std::vector& arg_shapes); -se::port::Status CompileOpMetadataFromContext(OpKernelConstruction* ctx, - TPUCompileMetadataProto* metadata, - NameAttrList* function_name, - std::string* mlir_module); } // namespace tpu } // namespace tensorflow From 6b8687f97cc349c5d3cdea39b535ba6292831b2e Mon Sep 17 00:00:00 2001 From: Wenhao Jia Date: Thu, 16 Jul 2020 23:14:25 -0700 Subject: [PATCH 0676/2522] Add a TPUExecute function. PiperOrigin-RevId: 321720840 Change-Id: I9f7304c6f8fd6ffe8266c60b10a6f19a7b3bdc54 --- tensorflow/core/tpu/BUILD | 42 ++ .../core/tpu/kernels/tpu_execute_c_api.h | 10 + tensorflow/core/tpu/tpu_execute.cc | 519 ++++++++++++++++++ tensorflow/core/tpu/tpu_execute.h | 54 ++ tensorflow/core/tpu/tpu_library_init_fns.inc | 3 + 5 files changed, 628 insertions(+) create mode 100644 tensorflow/core/tpu/tpu_execute.cc create mode 100644 tensorflow/core/tpu/tpu_execute.h diff --git a/tensorflow/core/tpu/BUILD b/tensorflow/core/tpu/BUILD index f9031b440f9..d82011c6961 100644 --- a/tensorflow/core/tpu/BUILD +++ b/tensorflow/core/tpu/BUILD @@ -227,3 +227,45 @@ cc_library( "//tensorflow/core:protos_all_cc", ], ) + +cc_library( + name = "tpu_execute", + srcs = ["tpu_execute.cc"], + hdrs = ["tpu_execute.h"], + deps = [ + ":tpu_api", + "//tensorflow/compiler/jit:xla_device", + "//tensorflow/compiler/xla:executable_run_options", + "//tensorflow/compiler/xla:shape_layout", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto_cc", + "//tensorflow/compiler/xla/service:computation_layout", + "//tensorflow/compiler/xla/service:computation_placer", + "//tensorflow/compiler/xla/service:executable", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_module_config", + "//tensorflow/compiler/xla/service:hlo_proto_cc", + "//tensorflow/compiler/xla/service:maybe_owning_device_memory", + "//tensorflow/compiler/xla/service:transfer_manager", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/core/tpu/kernels:tpu_compile_c_api_hdrs", + "//tensorflow/core/tpu/kernels:tpu_executable_info_proto_cc", + "//tensorflow/stream_executor:device_memory", + "//tensorflow/stream_executor:stream", + "//tensorflow/stream_executor/lib", + "//tensorflow/stream_executor/tpu:c_api_conversions", + "//tensorflow/stream_executor/tpu:status_helper", + "//tensorflow/stream_executor/tpu:tpu_executable", + "//tensorflow/stream_executor/tpu:tpu_executor_c_api_hdrs", + "//tensorflow/stream_executor/tpu:tpu_node_context", + "//tensorflow/stream_executor/tpu:tpu_platform_interface", + "@com_google_absl//absl/base", + "@com_google_absl//absl/memory", + ], +) diff --git a/tensorflow/core/tpu/kernels/tpu_execute_c_api.h b/tensorflow/core/tpu/kernels/tpu_execute_c_api.h index 38a550444a9..81d23441ddc 100644 --- a/tensorflow/core/tpu/kernels/tpu_execute_c_api.h +++ b/tensorflow/core/tpu/kernels/tpu_execute_c_api.h @@ -37,11 +37,21 @@ TFTPU_CAPI_EXPORT void TpuExecutable_LoadProgramAndEnqueueToStream( TFTPU_CAPI_EXPORT void HardwareLayout_HostShapeToDeviceShape( XLA_Shape* host_shape, XLA_Shape* device_shape); TFTPU_CAPI_EXPORT int64_t HardwareLayout_ShapeSize(XLA_Shape* shape); +TFTPU_CAPI_EXPORT int64_t HardwareLayout_ShapeSizeCompact(XLA_Shape* shape); +TFTPU_CAPI_EXPORT int64_t HardwareLayout_ShapeSizeCompactRaw(XLA_Shape* shape); + +TFTPU_CAPI_EXPORT void TpuExecute_RuntimeInputToPaddedData( + uint32_t* runtime_input_ptr, size_t runtime_input_size, + int8_t* padded_data_ptr, size_t padded_data_size, XLA_Shape* runtime_shape, + XLA_Shape* compile_time_shape, SE_Status* status); struct TfTpu_ExecuteApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_LoadProgramAndEnqueueToStream); TFTPU_ADD_FN_IN_STRUCT(HardwareLayout_HostShapeToDeviceShape); TFTPU_ADD_FN_IN_STRUCT(HardwareLayout_ShapeSize); + TFTPU_ADD_FN_IN_STRUCT(HardwareLayout_ShapeSizeCompact); + TFTPU_ADD_FN_IN_STRUCT(HardwareLayout_ShapeSizeCompactRaw); + TFTPU_ADD_FN_IN_STRUCT(TpuExecute_RuntimeInputToPaddedData); }; } // extern "C" diff --git a/tensorflow/core/tpu/tpu_execute.cc b/tensorflow/core/tpu/tpu_execute.cc new file mode 100644 index 00000000000..022e8c2a07e --- /dev/null +++ b/tensorflow/core/tpu/tpu_execute.cc @@ -0,0 +1,519 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/tpu_execute.h" + +#include +#include +#include +#include +#include + +#include "absl/base/casts.h" +#include "absl/memory/memory.h" +#include "tensorflow/compiler/jit/xla_device.h" +#include "tensorflow/compiler/xla/executable_run_options.h" +#include "tensorflow/compiler/xla/service/computation_layout.h" +#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_module_config.h" +#include "tensorflow/compiler/xla/service/maybe_owning_device_memory.h" +#include "tensorflow/compiler/xla/service/service_executable_run_options.h" +#include "tensorflow/compiler/xla/service/transfer_manager.h" +#include "tensorflow/compiler/xla/shape.h" +#include "tensorflow/compiler/xla/shape_layout.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/platform/casts.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/core/tpu/tpu_api.h" +#include "tensorflow/stream_executor/device_memory.h" +#include "tensorflow/stream_executor/lib/statusor.h" +#include "tensorflow/stream_executor/tpu/c_api_conversions.h" +#include "tensorflow/stream_executor/tpu/status_helper.h" +#include "tensorflow/stream_executor/tpu/tpu_executable.h" +#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" +#include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" + +namespace tensorflow { + +namespace { + +using ::tensorflow::tpu::TpuNodeContext; + +static bool tpu_cancellation_terminates_process = false; +static bool tpu_cancellation_closes_chips = true; + +// Host-side runtime for transfers between TPU and host. +class HostTransferManager { + public: + using HostCommmandHandler = xla::TpuExecutable::HostCommandHandler; + + explicit HostTransferManager(TpuNodeContext* node_context) + : node_context_(node_context) {} + + // Returns a function to be called when the TPU triggers a host command + // interrupt while executing the current program. + xla::StatusOr Initialize( + const TPUHostTransferInfoProto& program, + const std::string& rendezvous_key_base, OpKernelContext* ctx); + + private: + TpuNodeContext* node_context_; // not owned + + TF_DISALLOW_COPY_AND_ASSIGN(HostTransferManager); +}; + +xla::StatusOr +HostTransferManager::Initialize(const TPUHostTransferInfoProto& program, + const string& rendezvous_key_base, + OpKernelContext* ctx) { + return HostCommmandHandler([](uint32, int64) { + LOG(WARNING) << "HostTransferManager is unimplemented."; + }); +} + +// Sleep for 5 seconds, then call std::quick_exit(42) to quickly restart. +void ExitCountdown(Env* env) { + const int kSleepSeconds = 5; + LOG(INFO) << "TpuExecute was cancelled. Sleeping for " << kSleepSeconds + << " seconds before terminating the process to give time " + "for other errors to propagate"; + env->SleepForMicroseconds(kSleepSeconds * 1000000); + LOG(ERROR) << "Aborting process due to cancelled TPUExecute. Consult " + "the anomalies reported above (if any), run state of job " + "(including failed RPCs) and worker logs. This " + "termination is to ensure a consistent state, if your job " + "does not restart, modify the retries allowed. See " + "b/62262381 and b/65223927."; + std::quick_exit(42); +} + +xla::Shape HostShapeToDeviceShape(const xla::Shape& host_shape) { + XLA_Shape c_host_shape; + XLA_Shape c_device_shape; + TpuConversions::XlaShapeToCShape(host_shape, &c_host_shape); + tensorflow::tpu::ExecuteApiFn()->HardwareLayout_HostShapeToDeviceShapeFn( + &c_host_shape, &c_device_shape); + xla::Shape device_shape = TpuConversions::CShapeToXlaShape(&c_device_shape); + TpuConversions::CShapeCleanup(&c_host_shape); + TpuConversions::CShapeCleanup(&c_device_shape); + return device_shape; +} + +int64 ShapeSizeCompact(const xla::Shape& shape) { + XLA_Shape c_shape; + TpuConversions::XlaShapeToCShape(shape, &c_shape); + int64 size = + tensorflow::tpu::ExecuteApiFn()->HardwareLayout_ShapeSizeCompactFn( + &c_shape); + TpuConversions::CShapeCleanup(&c_shape); + return size; +} + +int64 ShapeSizeCompactRaw(const xla::Shape& shape) { + XLA_Shape c_shape; + TpuConversions::XlaShapeToCShape(shape, &c_shape); + int64 size = + tensorflow::tpu::ExecuteApiFn()->HardwareLayout_ShapeSizeCompactRawFn( + &c_shape); + TpuConversions::CShapeCleanup(&c_shape); + return size; +} + +// Given a tuple, fix all non-leaf nodes (tuples) such that the tuple tables +// point to the correct leaf nodes. +xla::Status FixTupleTableAsync(se::Stream* stream, + const xla::Shape& tuple_shape, + xla::ExecutionInput* mem, + xla::TransferManager* transfer_manager) { + return xla::ShapeUtil::ForEachSubshapeWithStatus( + tuple_shape, + [&](const xla::Shape& element_shape, + const xla::ShapeIndex& index) -> Status { + if (!element_shape.IsTuple()) { + return Status::OK(); + } + std::vector elements; + xla::ShapeIndex element_index = index; + element_index.push_back(0); + for (int64 i = 0; i < element_shape.tuple_shapes_size(); ++i) { + // Gather all children of the tuple element. + element_index.back() = i; + elements.push_back(mem->Buffer(element_index).AsDeviceMemoryBase()); + } + se::DeviceMemoryBase tuple_table_addr = + mem->Buffer(index).AsDeviceMemoryBase(); + return transfer_manager->WriteSingleTupleIndexTable( + stream, elements, element_shape, &tuple_table_addr); + }); +} + +// Returns true if `dynamic_shape` has dimensions that are less-equal to the +// "bounded_shape". +bool DynamicShapeIsCompatible(const xla::Shape& dynamic_shape, + const xla::Shape& bounded_shape) { + if (dynamic_shape.rank() != bounded_shape.rank()) { + return false; + } + for (int64 i = 0; i < dynamic_shape.rank(); ++i) { + if (dynamic_shape.dimensions(i) > bounded_shape.dimensions(i)) { + return false; + } + } + return true; +} + +// For dynamic inputs, copy them and attach metadata of shape sizes to the +// end of the tensor. +// +// The buffer for dynamic shapes contains three parts: +// +--------+ +// |Payload | +// +--------+ +// | Padding| +// +--------+ +// |Metadata| +// +--------+ +// +// Metadata contains the sizes of shape without padding, eventually +// representing the size of valid data. +xla::Status UpdateDynamicInputs( + se::Stream* stream, se::DeviceMemoryAllocator* allocator, + std::vector* runtime_inputs, + const std::vector& compile_time_shapes) { + TF_RET_CHECK(runtime_inputs->size() == compile_time_shapes.size()); + for (int64 i = 0; i < compile_time_shapes.size(); i++) { + // TODO(yunxing): Iterating over thousands of elements can be slow. One way + // to optimize for fast path without dynamic shapes is add a field in + // compilation result indicating if dynamic input is presented. + if (compile_time_shapes[i].is_static()) { + continue; + } + auto& runtime_input = (*runtime_inputs)[i]; + xla::Shape compile_time_shapes_on_device = + HostShapeToDeviceShape(compile_time_shapes[i]); + bool element_modified = false; + TF_RETURN_IF_ERROR(xla::ShapeUtil::ForEachSubshapeWithStatus( + compile_time_shapes_on_device, + [&](const xla::Shape& compile_time_shape, + const xla::ShapeIndex& index) -> Status { + if (compile_time_shape.IsTuple() || compile_time_shape.is_static()) { + return Status::OK(); + } + + const xla::Shape& runtime_shape = + xla::ShapeUtil::GetSubshape(runtime_input.shape(), index); + + TF_RET_CHECK(!runtime_shape.IsTuple()); + TF_RET_CHECK( + DynamicShapeIsCompatible(runtime_shape, compile_time_shape)); + + xla::MaybeOwningDeviceMemory* mutable_input_mem = + runtime_input.MutableBuffer(index); + auto padded_data = std::make_shared>( + ShapeSizeCompact(compile_time_shape), -1); + auto raw_input_runtime = std::make_shared>( + ShapeSizeCompact(runtime_shape) / sizeof(uint32)); + stream->ThenMemcpyD2H( + se::DeviceMemory(mutable_input_mem->AsDeviceMemoryBase()), + absl::MakeSpan(absl::bit_cast(raw_input_runtime->data()), + ShapeSizeCompactRaw(runtime_shape))); + stream->ThenDoHostCallback([raw_input_runtime, padded_data, + runtime_shape, compile_time_shape]() { + // After getting the data onto the host, transpose the data to + // the correct layout by delinearizing it and linearizing it again. + XLA_Shape c_runtime_shape, c_compile_time_shape; + TpuConversions::XlaShapeToCShape(runtime_shape, &c_runtime_shape); + TpuConversions::XlaShapeToCShape(compile_time_shape, + &c_compile_time_shape); + StatusHelper status; + tensorflow::tpu::ExecuteApiFn() + ->TpuExecute_RuntimeInputToPaddedDataFn( + raw_input_runtime->data(), raw_input_runtime->size(), + padded_data->data(), padded_data->size(), &c_runtime_shape, + &c_compile_time_shape, status.c_status); + TpuConversions::CShapeCleanup(&c_runtime_shape); + TpuConversions::CShapeCleanup(&c_compile_time_shape); + return status.status(); + }); + // Allocate new input and transfer the padded and transposed data to + // the new input location. + TF_ASSIGN_OR_RETURN( + auto new_input, + allocator->Allocate(stream->parent()->device_ordinal(), + ShapeSizeCompact(compile_time_shape))); + auto typed_new_input_memory = + se::DeviceMemory(new_input.cref()); + stream->ThenMemcpyH2D(*padded_data, &typed_new_input_memory); + + // Retain the memory until the end of the transfer. + stream->ThenDoHostCallback([padded_data]() { return Status::OK(); }); + + // Modify the memory location in the input shape tree to point to the + // new input. + *mutable_input_mem = + xla::MaybeOwningDeviceMemory(std::move(new_input)); + element_modified = true; + return Status::OK(); + })); + if (element_modified) { + // The input location has been modified, need to fix tuple table to + // point to the correct address. + TF_ASSIGN_OR_RETURN( + auto transfer_manager, + xla::TransferManager::GetForPlatform(stream->parent()->platform())); + TF_RETURN_IF_ERROR(FixTupleTableAsync(stream, + compile_time_shapes_on_device, + &runtime_input, transfer_manager)); + } + } + return Status::OK(); +} + +void TPUCancelExecution(Env* env, int device_ordinal) { + if (tpu_cancellation_terminates_process) { + LOG(INFO) << "TPUCancelExecution StopChipHeartbeats on device " + << device_ordinal; + Status status = TpuNodeContext::StopChipHeartbeats(); + LOG(INFO) << "TPUCancelExecution StopChipHeartbeats done: " << status + << " on device " << device_ordinal; + // Sleep and exit in another thread so the cancellation manager can + // continue running callbacks. The new thread will call quick_exit, + // so we discard the returned Thread pointer because we won't have + // an opportunity to delete it. + (void)env->StartThread(ThreadOptions(), "tpu_execute_exit_countdown", + [env]() { ExitCountdown(env); }); + } else if (tpu_cancellation_closes_chips) { + LOG(INFO) << "TPUCancelExecution CloseTPUHost on device " << device_ordinal; + Status status = TpuNodeContext::CloseTpuHost(); + LOG(INFO) << "TPUCancelExecution CloseTPUHost done: " << status + << " on device " << device_ordinal; + } else { + LOG(INFO) << "TPUCancelExecution CloseTPUHost on device " << device_ordinal + << " is suppressed"; + } +} + +std::pair RegisterCancellation( + OpKernelContext* ctx, CancellationManager* cancellation_manager, + int device_ordinal) { + // Set up a cancellation callback, to ensure the TPU program we run will + // halt if the RPC is cancelled. Without this the TPU program might block + // forever. The mechanism itself is a big hammer; we close all devices + // attached to this host on each cancellation callback. This is necessary to + // ensure the system will eventually halt, since the TensorNodes on each + // chip may be stuck waiting for mutual communication. + // + // By closing all devices, we ensure all subsequent attempts to use the + // device will fail, until the devices are re-initialized via a new call to + // tpu.initialize_system. + // + // In a multi-TensorNode setup, CloseTPUHost may be called once for each + // TensorNode, and each call will close all TensorNodes. This quadratic + // behavior ensures the mechanism is robust to various orderings + // (i.e. races) between the TPU programs, which are run on separate threads. + // In practice the quadratic behavior isn't that bad; the first call will + // actually halt any running TPU programs (which may be expensive), while + // subsequent calls will attempt to close an already-closed device (which is + // cheap). + // + // TODO(b/62262381): The cancellation manager is shared between multiple TPU + // execute ops and the cancellation will not be invoked only when RPC is + // cancelled (it may also be induced by OOM errors from a different TPU + // execute), this results in a pretty coarse cancellation domain. This + // cancellation callback should only execute in a narrower scope to not be + // triggered in such cases. + CancellationToken token = cancellation_manager->get_cancellation_token(); + // Don't rely on OpKernelContext being available when the callback runs. + Env* env = ctx->env(); + bool already_cancelled = !cancellation_manager->RegisterCallback( + token, + [device_ordinal, env]() { TPUCancelExecution(env, device_ordinal); }); + return std::pair(token, already_cancelled); +} + +void UnregisterCancellation( + OpKernelContext* ctx, CancellationManager* cancellation_manager, + se::Stream* stream, int device_ordinal, CancellationToken token, + std::shared_ptr host_transfer_manager) { + // If execution reaches this point, the host callback enqueued below will get + // called regardless of stream status. Call inc_num_deferred_ops_function here + // and dec_num_deferred_ops_function in the host callback. + ctx->inc_num_deferred_ops_function()(); + auto dec_num_deferred_ops_function = ctx->dec_num_deferred_ops_function(); + + // Try to avoid running callbacks on the compute stream, because this reduces + // the frequency of back-to-back programs (which are most efficient because + // they don't require host synchronization). Instead, borrow a substream and + // have the substream wait on the compute stream. + se::Stream* deregister_stream = stream->GetOrCreateSubStream(); + deregister_stream->ThenWaitFor(stream); + deregister_stream->ThenDoHostCallback([=]() { + // Ensure the host_transfer_manager is copied into the callback scope. + (void)host_transfer_manager; + + // We must deregister the callback in the success case, to avoid closing all + // devices. In the failure case we must NOT call DeregisterCallback as that + // waits for all previous cancellation callbacks to complete and any call + // to XlaDevice::Sync() will cause deadlock. Consider: + // 1) CancellationManager::StartCancel() is in progress (state is + // cancelling_). + // 2) The call below to DeregisterCallback will block until state is + // cancelled_ (all callbacks are completed). + // 3) A different cancellation callback has called XlaDevice::Sync(), + // which will block until (2) is done. + // 4) StartCancel() in (1) cannot complete until (3) is done. + // + // Instead, call TryDeregisterCallback. The functional difference is + // TryDeregisterCallback will not block if cancellation is in proress + // so makes no guarantees as to the state of any callbacks. + // This is not a problem, as our cancellation handler does not rely on + // any external state. + VLOG(1) << "cancellation_manager->TryDeregisterCallback on device " + << device_ordinal; + cancellation_manager->TryDeregisterCallback(token); + VLOG(1) << "cancellation_manager->TryDeregisterCallback done on device " + << device_ordinal; + + // ExecutorState is held alive until at least this point to ensure + // cancellation_manager is valid. After all outstanding + // dec_num_deferred_ops_function are called, ExecutorState::Finish will be + // allowed to proceed. + dec_num_deferred_ops_function(); + }); + stream->ReturnSubStream(deregister_stream); +} + +} // namespace + +xla::StatusOr TPUExecute( + const TPUExecutableInfoProto& executable, + const TPUHostTransferInfoProto& host_transfers, + const xla::HloProto& hlo_metadata, + std::vector arguments, + const string& rendezvous_key_base, uint32 rng_seed, + TpuNodeContext* node_context, xla::DeviceAssignment* device_assignment, + CancellationManager* cancellation_manager, OpKernelContext* ctx, + stream_executor::Stream* stream, + stream_executor::Stream* host_to_device_stream, + const XLA_TpuProgram* tpu_program) { + profiler::TraceMe traceme("TPUExecute", 2); + TF_RET_CHECK(tpu::TpuPlatformInterface::GetRegisteredPlatform() != nullptr); + TF_RET_CHECK(tpu_program != nullptr); + VLOG(1) << "TPUExecute on device " << node_context->tensor_core_location(); + + XlaDevice* device = + tensorflow::down_cast(ctx->device()->UnderlyingDevice()); + TF_RET_CHECK(device); + + // Create a HostTransferManager to handle Send/Recv operations from the TPU. + std::shared_ptr host_transfer_manager = + std::make_shared(node_context); + TF_ASSIGN_OR_RETURN(HostTransferManager::HostCommmandHandler handler, + host_transfer_manager->Initialize( + host_transfers, rendezvous_key_base, ctx)); + + VLOG(2) << "Cloud TPU: Executing computation on device " + << node_context->index_on_host(); + + xla::ExecutableRunOptions run_options; + run_options.set_stream(stream); + run_options.set_device_assignment(device_assignment); + run_options.set_rng_seed(rng_seed); + run_options.set_allocator(node_context->memory_allocator()); + run_options.set_host_to_device_stream(host_to_device_stream); + + const xla::ServiceExecutableRunOptions service_run_options(run_options); + + std::unique_ptr module; + std::vector input_shapes; + { + xla::ComputationLayout computation_layout( + xla::ShapeLayout(xla::Shape(executable.output_shape()))); + for (const xla::ShapeProto& shape_proto : executable.input_shapes()) { + xla::Shape shape(shape_proto); + computation_layout.add_parameter_layout(xla::ShapeLayout(shape)); + input_shapes.push_back(std::move(shape)); + } + module = absl::make_unique( + "TpuExecutableModule", + xla::HloModuleConfig(std::move(computation_layout))); + } + + TF_ASSIGN_OR_RETURN( + module->input_output_alias_config(), + xla::HloInputOutputAliasConfig::CreateFromProto( + node_context->transfer_manager()->HostShapeToDeviceShape( + module->config().entry_computation_layout().result_shape()), + hlo_metadata.hlo_module().input_output_alias())); + TF_RET_CHECK(executable.input_shapes().size() == arguments.size()); + + for (auto& prefetch : hlo_metadata.hlo_module().cross_program_prefetches()) { + module->AddCrossProgramPrefetch( + prefetch.parameter(), + xla::ShapeIndex(prefetch.index().begin(), prefetch.index().end())); + } + + TF_RETURN_IF_ERROR(UpdateDynamicInputs( + stream, node_context->memory_allocator(), &arguments, input_shapes)); + + auto tpu_executable = absl::make_unique( + tpu_program, std::move(module), handler); + + const int32 device_ordinal = node_context->device_ordinal(); + CancellationToken token; + bool already_cancelled; + std::tie(token, already_cancelled) = + RegisterCancellation(ctx, cancellation_manager, device_ordinal); + + // If the RPC was already cancelled before we managed to register the + // cancellation callback, we shouldn't attempt to run the TPU program, since + // it might block forever. + if (already_cancelled) { + return errors::Cancelled( + "RPC cancelled, not running TPU program on device ", device_ordinal); + } + + xla::StatusOr output = + tpu_executable->ExecuteAsyncOnStream(&service_run_options, + std::move(arguments), + /*hlo_execution_profile=*/nullptr); + + // If !output.ok(), it means we failed to enqueue the program the TPU. This is + // possibly caused by a failed cancellation callback closing the chips. + if (!output.ok()) { + // If cancellation manager is already cancelled or cancelling, it means + // another failure has occurred earlier and this TpuExecuteOp is cancelled + // regardless of whether itself is an error. + already_cancelled = cancellation_manager->IsCancelling() || + cancellation_manager->IsCancelled(); + if (already_cancelled) { + return errors::Cancelled( + "RPC cancelled, not running TPU program on device ", device_ordinal); + } + } + UnregisterCancellation(ctx, cancellation_manager, stream, device_ordinal, + token, host_transfer_manager); + VLOG(1) << "Cloud TPU: TPUExecute done"; + return output; +} + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/tpu_execute.h b/tensorflow/core/tpu/tpu_execute.h new file mode 100644 index 00000000000..e2142ad7a7a --- /dev/null +++ b/tensorflow/core/tpu/tpu_execute.h @@ -0,0 +1,54 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_TPU_TPU_EXECUTE_H_ +#define TENSORFLOW_CORE_TPU_TPU_EXECUTE_H_ + +#include +#include + +#include "tensorflow/compiler/xla/service/computation_placer.h" +#include "tensorflow/compiler/xla/service/executable.h" +#include "tensorflow/compiler/xla/service/hlo.pb.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/core/framework/cancellation.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" +#include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" +#include "tensorflow/stream_executor/stream.h" +#include "tensorflow/stream_executor/tpu/tpu_node_context.h" + +namespace tensorflow { + +// Runs a TPU executable. `input_allocations` and `output_allocations` are +// non-owning pointers to the root buffers of each argument/result tuple. +// `output_shape` is the output shape of the XLA computation from which +// `program` was derived. If `session_module` is not nullptr, it will be filled +// with the input and output literals of the execution. +xla::StatusOr TPUExecute( + const TPUExecutableInfoProto& executable, + const TPUHostTransferInfoProto& host_transfers, + const xla::HloProto& hlo_metadata, + std::vector arguments, + const std::string& rendezvous_key_base, uint32 rng_seed, + tpu::TpuNodeContext* node_context, xla::DeviceAssignment* device_assignment, + CancellationManager* cancellation_manager, OpKernelContext* ctx, + stream_executor::Stream* stream, + stream_executor::Stream* host_to_device_stream, + const XLA_TpuProgram* tpu_program); + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_TPU_EXECUTE_H_ diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index 7a7c6ecad30..06197870fee 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -43,6 +43,9 @@ tensorflow::Status SetExecuteStructFn(void* library_handle) { TFTPU_SET_FN(execute_fn, TpuExecutable_LoadProgramAndEnqueueToStream); TFTPU_SET_FN(execute_fn, HardwareLayout_HostShapeToDeviceShape); TFTPU_SET_FN(execute_fn, HardwareLayout_ShapeSize); + TFTPU_SET_FN(execute_fn, HardwareLayout_ShapeSizeCompact); + TFTPU_SET_FN(execute_fn, HardwareLayout_ShapeSizeCompactRaw); + TFTPU_SET_FN(execute_fn, TpuExecute_RuntimeInputToPaddedData); return tensorflow::Status::OK(); } From a80239d1573f8b56b9d570e7bf19d00bcb59bede Mon Sep 17 00:00:00 2001 From: Prakalp Srivastava Date: Thu, 16 Jul 2020 23:47:04 -0700 Subject: [PATCH 0677/2522] Fix bug in Return op construction while lifting resource ops. While constructing the return op, the code assumes that the key-value pairs in SmallDenseMap `resource_arg_to_new_output` are in the same order as they are inserted. This is incorrect as SmallDenseMap container does not guarantee the ordering of key-value pairs. Instead we use the `resource_arg_to_new_output` index mapping itself to construct the return op. PiperOrigin-RevId: 321723375 Change-Id: I85a244fcaf7d0339a1ce773b153cd4cb1af48a71 --- .../mlir/tensorflow/transforms/resource_op_lifting.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc index 3e70526b9d3..100893e89b5 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc @@ -762,8 +762,11 @@ LogicalResult HandleCaseOrIfOp(CaseOrIfOp op, ArrayRef branches) { for (auto branch : branches) { auto new_retvals = llvm::to_vector<4>(branch.front().getTerminator()->getOperands()); + new_retvals.resize(new_retvals.size() + resource_arg_to_new_output.size()); for (const auto& entry : resource_arg_to_new_output) { - new_retvals.push_back(branch.getArgument(entry.getFirst())); + int64_t resource_arg_index = entry.getFirst(); + int64_t output_index = entry.getSecond(); + new_retvals[output_index] = branch.getArgument(resource_arg_index); } auto old_return = branch.front().getTerminator(); OpBuilder builder(old_return); From 00c6f88dd19bd5163b01af2ab652c56550ea9486 Mon Sep 17 00:00:00 2001 From: Wenhao Jia Date: Thu, 16 Jul 2020 23:48:25 -0700 Subject: [PATCH 0678/2522] Add a TPU execution op. PiperOrigin-RevId: 321723459 Change-Id: Ib9ba04734177604249899cd8ca6ce810b70d56f8 --- tensorflow/core/tpu/kernels/BUILD | 41 + tensorflow/core/tpu/kernels/tpu_execute_op.cc | 805 ++++++++++++++++++ tensorflow/core/tpu/kernels/tpu_execute_op.h | 66 ++ 3 files changed, 912 insertions(+) create mode 100644 tensorflow/core/tpu/kernels/tpu_execute_op.cc create mode 100644 tensorflow/core/tpu/kernels/tpu_execute_op.h diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 7a6160a2963..af7c9ead791 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -553,3 +553,44 @@ cc_library( ], alwayslink = 1, ) + +cc_library( + name = "tpu_execute_op", + srcs = ["tpu_execute_op.cc"], + hdrs = ["tpu_execute_op.h"], + deps = [ + ":tpu_compilation_cache_entry", + ":tpu_compilation_cache_external", + ":tpu_compilation_cache_local_lookup", + ":tpu_compilation_cache_lookup", + ":tpu_executable_info_proto_cc", + ":tpu_op_consts", + "//tensorflow/compiler/jit:xla_device", + "//tensorflow/compiler/jit:xla_launch_util", + "//tensorflow/compiler/jit:xla_tensor", + "//tensorflow/compiler/tf2xla:common", + "//tensorflow/compiler/tf2xla:tf2xla_util", + "//tensorflow/compiler/xla:debug_options_flags", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:xla_data_proto_cc", + "//tensorflow/compiler/xla/service:dump", + "//tensorflow/compiler/xla/service:executable", + "//tensorflow/compiler/xla/service:maybe_owning_device_memory", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:stream_executor_no_cuda", + "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/core/tpu:tpu_configuration", + "//tensorflow/core/tpu:tpu_defs", + "//tensorflow/core/tpu:tpu_execute", + "//tensorflow/stream_executor:device_memory_allocator", + "//tensorflow/stream_executor/tpu:tpu_node_context", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/types:span", + ], + alwayslink = True, +) diff --git a/tensorflow/core/tpu/kernels/tpu_execute_op.cc b/tensorflow/core/tpu/kernels/tpu_execute_op.cc new file mode 100644 index 00000000000..817649e2fe7 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_execute_op.cc @@ -0,0 +1,805 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/tpu/kernels/tpu_execute_op.h" + +#include "absl/container/flat_hash_map.h" +#include "absl/memory/memory.h" +#include "absl/types/span.h" +#include "tensorflow/compiler/jit/xla_device.h" +#include "tensorflow/compiler/jit/xla_launch_util.h" +#include "tensorflow/compiler/jit/xla_tensor.h" +#include "tensorflow/compiler/tf2xla/shape_util.h" +#include "tensorflow/compiler/tf2xla/tf2xla_util.h" +#include "tensorflow/compiler/xla/debug_options_flags.h" +#include "tensorflow/compiler/xla/service/dump.h" +#include "tensorflow/compiler/xla/service/executable.h" +#include "tensorflow/compiler/xla/service/maybe_owning_device_memory.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/framework/resource_var.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/stream_executor_no_cuda.h" +#include "tensorflow/core/platform/tracing.h" +#include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_local_lookup.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h" +#include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_op_consts.h" +#include "tensorflow/core/tpu/tpu_configuration.h" +#include "tensorflow/core/tpu/tpu_defs.h" +#include "tensorflow/core/tpu/tpu_execute.h" +#include "tensorflow/core/util/stream_executor_util.h" +#include "tensorflow/stream_executor/device_memory_allocator.h" +#include "tensorflow/stream_executor/tpu/tpu_node_context.h" + +namespace tensorflow { + +namespace { + +using ::tensorflow::tpu::TpuNodeContext; +using CompilationCacheEntryRef = ::tensorflow::tpu::CompilationCacheEntryRef< + ::tensorflow::tpu::TpuCompilationCacheEntry>; +using TpuCompilationCacheLookup = + ::tensorflow::tpu::TpuCompilationCacheLookup; + +// Looks up the input `key` in the compilation cache, populating +// `*rendezvous_key_base` and `*entry`. +Status GetComputationCacheEntry( + OpKernelContext* context, string* rendezvous_key_base, + std::unique_ptr* entry) { + const Tensor* key; + TF_RETURN_IF_ERROR(context->input("key", &key)); + profiler::TraceMe trace_me("TpuExecuteOp::LookupProto", /*level=*/2); + if (!TensorShapeUtils::IsVector(key->shape()) || + key->shape().dim_size(0) != 2) { + return errors::InvalidArgument( + "Key argument to TPUExecute must be a 2-element vector"); + } + + ResourceMgr* rmgr = GetTPUConfigResourceMgr(); + TpuCompilationCacheLookup* proto_lookup; + TF_RETURN_IF_ERROR(rmgr->Lookup(rmgr->default_container(), + tpu::kCompiledProtoCacheResourceName, + &proto_lookup)); + core::ScopedUnref lookup_unref(proto_lookup); + TF_RETURN_IF_ERROR(proto_lookup->Lookup(key->vec()(0), entry)); + *rendezvous_key_base = key->vec()(1); + return Status::OK(); +} + +struct VariableUpdateMap { + // Maps input index to the updated output index. If the variable doesn't have + // an updated output, the corresponding output is set to -1. + absl::flat_hash_map input_to_output; + // Maps output index to (the input index, whether the update is generated from + // compilation). + absl::flat_hash_map> output_to_input; + // Part of the input indices that are from the compilation, in the compiled + // order. + std::vector input_in_compiled_update_order; +}; + +// Creates a VariableUpdateMap from both the compilation and the fused variable +// reads/updates. +xla::StatusOr BuildVariableUpdateMap( + absl::Span + compiled_variable_updates, + absl::Span fused_device_var_reads_in_computation_inputs, + const std::vector& fused_device_var_updates_in_computation_outputs, + int64 computation_output_count) { + VariableUpdateMap map; + auto add_pair = [&](int input, int output, bool from_compilation) -> Status { + TF_RET_CHECK(map.input_to_output.emplace(input, output).second) + << "Duplicate variable input index: " << input; + if (output >= 0) { + TF_RET_CHECK(map.output_to_input + .emplace(output, std::pair{input, from_compilation}) + .second) + << "Duplicate variable output index: " << output; + } + return Status::OK(); + }; + + // First add the updates produced by the compilation. Not all variables are + // updated, and if not, they do not have an output in the XLA computation. The + // update output indices in the XLA computation start after the non-variable + // outputs. + int num_updated_variables = 0; + for (int i = 0; i < compiled_variable_updates.size(); ++i) { + const bool updated = compiled_variable_updates[i]->updated(); + if (updated) ++num_updated_variables; + } + TF_RET_CHECK(num_updated_variables <= computation_output_count) + << num_updated_variables << " <= " << computation_output_count; + int64 compiled_variable_output_index = + computation_output_count - num_updated_variables; + for (auto update : compiled_variable_updates) { + map.input_in_compiled_update_order.push_back(update->index()); + if (!update->updated()) { + TF_RETURN_IF_ERROR(add_pair(update->index(), -1, true)); + continue; + } + TF_RETURN_IF_ERROR( + add_pair(update->index(), compiled_variable_output_index, true)); + ++compiled_variable_output_index; + } + + // Now add the updates from the attributes. + TF_RET_CHECK(fused_device_var_reads_in_computation_inputs.size() == + fused_device_var_updates_in_computation_outputs.size()); + for (int64 i = 0; i < fused_device_var_reads_in_computation_inputs.size(); + ++i) { + TF_RETURN_IF_ERROR( + add_pair(fused_device_var_reads_in_computation_inputs[i], + fused_device_var_updates_in_computation_outputs[i], false)); + } + return map; +} + +// Buffers representing the inputs to a computation. +struct InputBuffers { + explicit InputBuffers(xla::Shape device_shape) + : buffers(std::move(device_shape)) {} + + InputBuffers(const InputBuffers&) = delete; + InputBuffers& operator=(const InputBuffers&) = delete; + + ~InputBuffers() = default; + + xla::ShapedBuffer ToShapedBuffer(xla::Shape host_shape, + se::DeviceMemoryAllocator* allocator, + int device_ordinal) { + CHECK_NE(allocator, nullptr); + xla::ShapedBuffer shaped_buffer(std::move(host_shape), buffers.shape(), + allocator->platform(), device_ordinal); + shaped_buffer.set_buffers(buffers.Map( + [](xla::MaybeOwningDeviceMemory* buffer) { + CHECK(buffer); + return buffer->AsDeviceMemoryBase(); + })); + return shaped_buffer; + } + + // Describes the buffer tree. + xla::ShapeTree buffers; + + // Information about resource variables passed directly to TPUExecute. + std::vector variables; + + // Mapping from input index to offsets in 'variables'. < 0 if the input does + // not correspond to a variable in 'variables'. + std::vector variable_index; +}; + +// Builds an InputBuffers object that describes the inputs to the computation. +xla::StatusOr> BuildComputationInputs( + OpKernelContext* context, const xla::Shape& input_host_shape, + const VariableUpdateMap& variable_updates, TpuNodeContext* node_context, + se::Stream* stream) { + profiler::TraceMe trace_me("BuildComputationInputs", /*level=*/2); + OpInputList arg_list; + TF_RETURN_IF_ERROR(context->input_list("args", &arg_list)); + + if (arg_list.size() != xla::ShapeUtil::TupleElementCount(input_host_shape)) { + return errors::InvalidArgument( + "Number of parameters (", arg_list.size(), + ") does not match input shape: ", + xla::ShapeUtil::TupleElementCount(input_host_shape)); + } + + auto validate_shape = [&](int i, const Tensor& tensor) { + const xla::Shape& expected = + xla::ShapeUtil::GetTupleElementShape(input_host_shape, i); + VLOG(4) << "Input " << i << " TF shape " << tensor.shape().DebugString(); + XlaTensor* xla_tensor = XlaTensor::FromTensor(&tensor); + + if (xla_tensor == nullptr) { + // FromTensor failed; tensor must be empty. + if (!xla::ShapeUtil::IsZeroElementArray(expected)) { + return errors::InvalidArgument( + "Run-time shape mismatch for TPUExecute argument[", i, "] (", + context->op_kernel().requested_input(i), "). Expected ", + expected.DebugString(), "; got empty tensor"); + } + } else { + // Compare host shapes, easier than getting the expected device shape. + const xla::Shape& xla_shape = xla_tensor->shaped_buffer().on_host_shape(); + if (!xla::ShapeUtil::Compatible(expected, xla_shape)) { + return errors::InvalidArgument( + "Run-time shape mismatch for TPUExecute argument[", i, "] (", + context->op_kernel().requested_input(i), "). Expected ", + expected.DebugString(), "; got ", xla_shape.DebugString()); + } + } + + return Status::OK(); + }; + + // Iterate over the inputs, validating the shapes of non-variable inputs, + // and creating a VariableInfo object for each variable. We consider variable + // inputs in a separate phase because we must acquire variable locks in order. + std::vector variables; + std::vector variable_index(arg_list.size(), -1); + variables.reserve(arg_list.size()); + for (int i = 0; i < arg_list.size(); ++i) { + // Arguments are assumed to be variables if they have a resource type. + // (Non-variable resources are not supported.) + if (context->input_dtype(i) == DT_RESOURCE) { + variable_index[i] = variables.size(); + // TODO(phawkins): we may be looking up many variables here; it would be + // better if we did not repeatedly acquire the resource manager's lock. + const ResourceHandle& handle = HandleFromInput(context, i); + Var* variable; + TF_RETURN_IF_ERROR(LookupResource(context, handle, &variable)); + variables.push_back(VariableInfo(i, handle.name(), variable)); + } else { + TF_RETURN_IF_ERROR(validate_shape(i, arg_list[i])); + } + } + + // Lock the variables, and validate their shapes. We hold the variable locks + // for the duration of the TPU execution so we can donate the variable buffers + // to the computation. If we copied the variable's Tensor instead, its + // reference count would be greater than one due to the reference the Var + // object holds, and we would never be able to reuse variable buffers. + // TODO(phawkins): add a 'reuse_buffers' attribute to TPUExecute that allows + // the user to elect to copy the buffers and permit concurrent access instead. + TF_RETURN_IF_ERROR(LockVariables(absl::MakeSpan(variables))); + for (int i = 0; i < variables.size(); ++i) { + TF_RETURN_IF_ERROR( + validate_shape(variables[i].index(), *variables[i].var()->tensor())); + } + + se::DeviceMemoryAllocator* const allocator = node_context->memory_allocator(); + xla::TransferManager* const transfer_manager = + node_context->transfer_manager(); + const int device_ordinal = node_context->device_ordinal(); + + auto input_buffers = absl::make_unique( + transfer_manager->HostShapeToDeviceShape(input_host_shape)); + + // Allocates a buffer for the root tuple. + const int64 root_size = + transfer_manager->GetByteSizeRequirement(input_buffers->buffers.shape()); + TF_ASSIGN_OR_RETURN(*input_buffers->buffers.mutable_element({}), + allocator->Allocate(device_ordinal, root_size)); + + // Helper function that sets the input buffers for 'arg_index' to 'buffers'. + // If 'donate_buffers' is true, donates ownership of the buffers in 'buffers' + // to the computation and overwrites the entries in 'buffers' with nulls. + auto set_input_buffers_helper = [&](int arg_index, bool donate_buffers, + xla::ShapedBuffer* buffers) { + buffers->buffers().ForEachMutableElement([&](const xla::ShapeIndex& index, + se::DeviceMemoryBase* buffer) { + xla::ShapeIndex in_index = {arg_index}; + for (int64 j : index) { + in_index.push_back(j); + } + auto* in_buffer = input_buffers->buffers.mutable_element(in_index); + if (donate_buffers) { + *in_buffer = se::OwningDeviceMemory(*buffer, device_ordinal, allocator); + *buffer = se::DeviceMemoryBase(); + } else { + *in_buffer = *buffer; + } + }); + }; + + // Assigns the buffers of 'tensor' as computation input 'i'. Allocates fresh + // buffers for zero-element tensors where required. + auto assign_input = [&](int i, const Tensor& tensor, + bool may_reuse) -> xla::Status { + XlaTensor* xla_tensor = XlaTensor::FromTensor(&tensor); + + // Size 0 tensors have no backing XlaTensor, but may still need to have + // tuple buffers allocated. + if (xla_tensor == nullptr) { + CHECK_EQ(tensor.NumElements(), 0); + const xla::Shape& host_shape = + xla::ShapeUtil::GetSubshape(input_host_shape, {i}); + TF_ASSIGN_OR_RETURN(xla::ScopedShapedBuffer buffers, + transfer_manager->AllocateScopedShapedBuffer( + host_shape, allocator, device_ordinal)); + set_input_buffers_helper(/*arg_index=*/i, /*donate_buffers=*/true, + &buffers); + } else { + bool can_reuse_buffers = tensor.RefCountIsOne() && may_reuse; + set_input_buffers_helper(/*arg_index=*/i, + /*donate_buffers=*/can_reuse_buffers, + &xla_tensor->shaped_buffer()); + xla_tensor->WaitForDefinitionEventOnStream(stream); + } + return Status::OK(); + }; + + for (int i = 0; i < arg_list.size(); ++i) { + auto it = variable_updates.input_to_output.find(i); + if (it == variable_updates.input_to_output.end()) { + TF_RETURN_IF_ERROR(assign_input(i, arg_list[i], /*may_reuse=*/true)); + continue; + } + // input i is a variable + bool updated = it->second >= 0; + if (arg_list[i].dtype() != DT_RESOURCE) { + TF_RETURN_IF_ERROR(assign_input(i, arg_list[i], updated)); + } else { + int vi = variable_index[i]; + TF_RETURN_IF_ERROR( + assign_input(i, *variables[vi].var()->tensor(), updated)); + } + } + + input_buffers->variables = std::move(variables); + input_buffers->variable_index = std::move(variable_index); + + return std::move(input_buffers); +} + +struct OutputBuffers { + OutputBuffers(xla::ScopedShapedBuffer b, se::DeviceMemoryAllocator* allocator) + : owned_buffers(b.on_device_shape(), true), + buffers(b.release()), + memory_allocator(allocator) {} + + ~OutputBuffers() { + buffers.buffers().ForEachElement([&](const xla::ShapeIndex& index, + const se::DeviceMemoryBase& buffer) { + if (owned_buffers.element(index) && !buffer.is_null()) { + Status status = + memory_allocator->Deallocate(buffers.device_ordinal(), buffer); + LOG_IF(ERROR, !status.ok()) << "Error deallocating buffer " << status; + } + }); + } + + // Which of the buffers do we own? + xla::ShapeTree owned_buffers; + + xla::ShapedBuffer buffers; + + se::DeviceMemoryAllocator* const memory_allocator; +}; + +// Allocates Tensors for the outputs of the computation. Ownership of most +// output buffers is passed to the output Tensors. Returns an OutputBuffer that +// owns the root buffer that should be passed to the XLA computation, as well as +// any output buffers that do not have corresponding output tensors. The latter +// may happen for zero-element tensors of type int64 or complex64 which still +// require a tuple buffer but do not have a corresponding XlaTensor. +xla::StatusOr> AllocateOutputTensors( + OpKernelContext* context, xla::ScopedShapedBuffer scoped_buffers, + absl::Span output_tensor_shape_protos, + const VariableUpdateMap& variable_updates, TpuNodeContext* node_context, + se::Stream* stream, int device_ordinal, InputBuffers* input_buffers, + const std::shared_ptr& definition_event) { + VLOG(4) << "Output buffers: " << scoped_buffers.ToString(); + + profiler::TraceMe trace_me("AllocateOutputTensors", /*level=*/2); + // Shapes of the outputs, in TensorShape form. + const int64 sub_elements = + xla::ShapeUtil::TupleElementCount(scoped_buffers.on_host_shape()); + if (sub_elements != output_tensor_shape_protos.size()) { + return errors::InvalidArgument( + "Mismatched numbers of output shapes: ", sub_elements, " vs. ", + output_tensor_shape_protos.size()); + } + + xla::TransferManager* const transfer_manager = + node_context->transfer_manager(); + + std::vector output_tensor_shapes; + output_tensor_shapes.reserve(sub_elements); + for (int64 i = 0; i < sub_elements; ++i) { + TF_RETURN_IF_ERROR( + TensorShape::IsValidShape(*output_tensor_shape_protos[i])); + TensorShape shape(*output_tensor_shape_protos[i]); + const xla::Shape& xla_shape = + xla::ShapeUtil::GetSubshape(scoped_buffers.on_host_shape(), {i}); + if (!xla_shape.IsArray() || + xla::ShapeUtil::ElementsIn(xla_shape) != shape.num_elements()) { + return errors::InvalidArgument( + "Mismatched number of elements in output shape: ", + xla::ShapeUtil::HumanString(xla_shape), " vs ", shape.DebugString()); + } + output_tensor_shapes.push_back(shape); + } + + // Builds a shaped buffer for the outputs. + TF_RET_CHECK(scoped_buffers.on_host_shape().IsTuple()); + TF_RET_CHECK(!xla::ShapeUtil::IsNestedTuple(scoped_buffers.on_host_shape())); + + se::DeviceMemoryAllocator* const allocator = node_context->memory_allocator(); + + auto output_buffers = + absl::MakeUnique(std::move(scoped_buffers), allocator); + + xla::Shape output_host_shape = output_buffers->buffers.on_host_shape(); + xla::Shape output_device_shape = output_buffers->buffers.on_device_shape(); + + if (!output_host_shape.is_static()) { + TF_RETURN_IF_ERROR(transfer_manager->ReadDynamicShapes( + stream, &output_buffers->buffers, &output_host_shape, + &output_device_shape)); + for (int64 i = 0; i < sub_elements; ++i) { + const xla::Shape& subshape = + xla::ShapeUtil::GetSubshape(output_host_shape, {i}); + TensorShape shape; + TF_RETURN_IF_ERROR(XLAShapeToTensorShape(subshape, &shape)); + output_tensor_shapes[i] = shape; + } + } + + // Transfers ownership of the buffers that back XLA computation output 'i' + // to 'output_tensor'. + auto transfer_buffers = [&](int i, Tensor* output_tensor) { + const xla::Shape& host_shape = + xla::ShapeUtil::GetTupleElementShape(output_host_shape, i); + const xla::Shape& device_shape = + xla::ShapeUtil::GetTupleElementShape(output_device_shape, i); + + // Transfers ownership of the output buffers to the output Tensor, if + // there the tensor is backed by an XlaTensor. Tensors of size 0 have no + // backing XlaTensor, so we let retain 'output_buffers' ownership of any + // buffers in that case. + if (output_tensor->NumElements() > 0) { + xla::ScopedShapedBuffer shaped_buffer(host_shape, device_shape, allocator, + device_ordinal); + shaped_buffer.buffers().ForEachMutableElement( + [&](const xla::ShapeIndex& index, se::DeviceMemoryBase* buffer) { + xla::ShapeIndex out_index = {i}; + for (int64 j : index) { + out_index.push_back(j); + } + *buffer = output_buffers->buffers.buffers().element(out_index); + *output_buffers->owned_buffers.mutable_element(out_index) = false; + }); + + XlaTensor* xla_tensor = XlaTensor::FromTensor(output_tensor); + xla_tensor->set_shaped_buffer(std::move(shaped_buffer)); + xla_tensor->ResetDefinitionEvent(definition_event, stream); + } + }; + + const int num_updated_variables = variable_updates.output_to_input.size(); + TF_RET_CHECK(num_updated_variables <= output_tensor_shapes.size()) + << num_updated_variables << " <= " << output_tensor_shapes.size(); + + OpInputList arg_list; + TF_RETURN_IF_ERROR(context->input_list("args", &arg_list)); + + // The TPU program outputs the updated variables including DT_RESOURCE and + // non-DT_RESOURCE. The TPUExecuteOp needs to output all non-DT_RESOURCE + // variables (updated or not). + // + // updated not_updated + // |------------------|------------------| + // DT_RESOURCE | allocate persist | do nothing | + // |------------------|------------------| + // | allocate | forward Op input | + // not DT_RESOURCE | output | to Op output | Op output + // |------------------|------------------| + // program output + + // Allocates a fresh tensor for each updated variable. While the variable + // inputs need come in no particular order, the variable values are + // always added last by XlaCompiler class, in the same order as the + // corresponding input variables. + int op_output_index = 0; + int compiled_update_index = 0; + auto process_non_updated_variable = [&](int input_index) { + const int variable_index = input_buffers->variable_index.at(input_index); + // If a DT_RESOURCE input is not updated, nothing needs to be done + // because there is no corresponding output. If a non-resource input + // is not updated, forward the input to the output. + if (variable_index < 0) { + context->set_output(op_output_index, arg_list[input_index]); + ++op_output_index; + } + }; + for (int i = 0; i < output_tensor_shapes.size(); ++i) { + auto it = variable_updates.output_to_input.find(i); + if (it == variable_updates.output_to_input.end()) { + // Not a variable update. + // Allocates a fresh tensor for each output of the operator. We always + // allocate a new host-side tensor, but the on-device buffers that back + // that tensor may be aliases of input buffers. + Tensor* output_tensor; + TF_RETURN_IF_ERROR(context->allocate_output( + op_output_index, output_tensor_shapes[i], &output_tensor)); + transfer_buffers(i, output_tensor); + ++op_output_index; + continue; + } + const int input_index = it->second.first; + // We must process the compiled updates in order, which includes the + // non-updated variables, i.e., those without an XLA output. + const bool from_compilation = it->second.second; + while (from_compilation && + variable_updates + .input_in_compiled_update_order[compiled_update_index] != + input_index) { + process_non_updated_variable( + variable_updates + .input_in_compiled_update_order[compiled_update_index]); + ++compiled_update_index; + } + ++compiled_update_index; + const int variable_index = input_buffers->variable_index.at(input_index); + PersistentTensor unused; + Tensor* output_tensor; + if (variable_index >= 0) { + // This output corresponds to a DT_RESOURCE input to the TPUExecute + // operator. Update the corresponding variable. + VariableInfo& var = input_buffers->variables[variable_index]; + // TODO(b/35625933): the correct thing to do would be to transfer + // ownership of the PersistentTensor into the Var object. However, Var + // contains a Tensor so we can't. + TF_RETURN_IF_ERROR(context->allocate_persistent( + var.var()->tensor()->dtype(), output_tensor_shapes[i], &unused, + &output_tensor)); + *var.var()->tensor() = *output_tensor; + } else { + // This output corresponds to a non-resource input to the TPUExecute + // operator. This case occurs for the distributed TPU rewrite which + // adds variable values as inputs and outputs rather than passing the + // variables themselves; reading and writing the variable is handled + // outside the op. + // TODO(phawkins): remove this case when placement of variables on TPU + // devices is well supported and we no longer need to place "remote" + // variables on CPU devices. + TF_RETURN_IF_ERROR(context->allocate_output( + op_output_index, output_tensor_shapes[i], &output_tensor)); + ++op_output_index; + } + transfer_buffers(i, output_tensor); + } + + // Process any remaining non-updated variables. + for (; compiled_update_index < + variable_updates.input_in_compiled_update_order.size(); + ++compiled_update_index) { + process_non_updated_variable( + variable_updates.input_in_compiled_update_order[compiled_update_index]); + } + return std::move(output_buffers); +} + +} // namespace + +// TPUExecuteOp + +TPUExecuteOp::TPUExecuteOp(OpKernelConstruction* context) + : AsyncOpKernel(context, /* is_deferred = */ true) {} + +AsyncOpKernel* TPUExecuteOp::AsAsync() { + // If TPU launches are asynchronous, we can perform the launch without + // blocking the calling thread, and so the executor may treat this kernel as + // a regular (synchronous) OpKernel. + return nullptr; +} + +void TPUExecuteOp::Compute(OpKernelContext* context) { + Status s = DoWork(context); + // NOTE: We can't use `OP_REQUIRES_OK()` here because that macro includes + // a dynamic check that we are not in an AsyncOpKernel. + if (TF_PREDICT_FALSE(!s.ok())) { + context->SetStatus(s); + } +} + +void TPUExecuteOp::ComputeAsync(OpKernelContext* context, DoneCallback done) { + // If TPU launches are asynchronous, then perform the launch on this + // thread to avoid a thread hop, which has an observable latency cost. + OP_REQUIRES_OK_ASYNC(context, DoWork(context), done); + done(); +} + +Status TPUExecuteOp::DoWork(OpKernelContext* context) { + VLOG(1) << "Cloud TPU: TPUExecuteOp::Compute"; + + const XlaDevice::Metadata* metadata; + TF_RETURN_IF_ERROR(XlaDevice::GetMetadata(context, &metadata)); + const int device_ordinal = metadata->device_ordinal(); + + // We are guaranteed that the object underlying TpuNodeContext won't be + // deleted out from under us, while node_context is alive. + TF_ASSIGN_OR_RETURN(std::unique_ptr node_context, + TpuNodeContext::Create(device_ordinal)); + + profiler::TraceMe trace_me( + [&, device_ordinal] { + return absl::StrCat("TpuExecuteOp#device_ordinal=", device_ordinal, + ",id=", context->step_id(), + ",iter_num=", context->frame_iter().iter_id, "#"); + }, + /*level=*/2); + profiler::TraceMe trace_me_init("TPUExecuteOp::Init", /*level=*/2); + + string rendezvous_key_base; + std::unique_ptr entry; + TF_RETURN_IF_ERROR( + GetComputationCacheEntry(context, &rendezvous_key_base, &entry)); + + // Shapes of the inputs and outputs, in xla::Shape form. + const TPUExecutableInfoProto* proto = entry->get().get_executable_info(); + + xla::TransferManager* const transfer_manager = + node_context->transfer_manager(); + CHECK(context->op_device_context()); + se::Stream* stream = context->op_device_context()->stream(); + + TF_RET_CHECK(proto->input_shapes_size() == 1); + + xla::Shape host_shape(proto->input_shapes(0)); + + TF_ASSIGN_OR_RETURN( + auto variable_update_map, + BuildVariableUpdateMap(proto->variable_indices(), + fused_device_var_reads_in_computation_inputs_, + fused_device_var_updates_in_computation_outputs_, + proto->output_tensor_shapes().size())); + TF_ASSIGN_OR_RETURN( + std::unique_ptr input_buffers, + BuildComputationInputs(context, host_shape, variable_update_map, + node_context.get(), stream)); + + // Ideally this should be the host-to-device stream from XlaDeviceContext. + // The particular anti-dependency this is avoiding (why we need a separate + // transfer stream) is between the executable writing tuple tables and + // TPUExecute()'s deregister_stream; if they come from the same stream pool + // antidependencies will occur. XlaBackend has a different pool of streams + // to the stream->GetOrCreateSubStream() that TPUExecute() uses, so these + // will never refer to the same stream. + // + // TODO(jmolloy): Add the necessary plumbing to obtain the proper + // host-to-device stream here. + TF_ASSIGN_OR_RETURN(auto transfer_stream_ptr, + node_context->BorrowStream(device_ordinal)); + + se::DeviceMemoryAllocator* const allocator = node_context->memory_allocator(); + auto shaped_buffer = + input_buffers->ToShapedBuffer(host_shape, allocator, device_ordinal); + if (transfer_manager->CanShapedBufferBeAccessedNow(stream->parent(), + shaped_buffer)) { + TF_RETURN_IF_ERROR(transfer_manager->WriteRootTupleIndexTable( + transfer_stream_ptr.get(), shaped_buffer)); + stream->ThenWaitFor(transfer_stream_ptr.get()); + } else { + TF_RETURN_IF_ERROR( + transfer_manager->WriteRootTupleIndexTable(stream, shaped_buffer)); + } + VLOG(4) << "Input buffers: " << shaped_buffer.ToString(); + + // Snapshot the inputs, if a snapshot was requested. + std::shared_ptr hlo_snapshot; + if (proto->has_session_module()) { + hlo_snapshot = std::make_shared(proto->session_module()); + auto literal = + std::make_shared(shaped_buffer.on_host_shape()); + transfer_manager->TransferLiteralFromDevice( + stream, shaped_buffer, literal.get(), + [hlo_snapshot, literal](Status status) { + if (!status.ok()) { + LOG(ERROR) << "TransferLiteralFromDevice for HLO snapshot inputs " + "failed: " + << status; + return; + } + *hlo_snapshot->add_arguments() = literal->ToProto(); + }); + } + + auto definition_event = std::make_shared(stream->parent()); + TF_RET_CHECK(definition_event->Init()) + << "TPU definition event initialization failed"; + + trace_me_init.Stop(); + + const uint32 rng_seed = GetXLARandomSeed(); + + std::unique_ptr device_assignment; + if (proto->has_device_assignment()) { + TF_ASSIGN_OR_RETURN(device_assignment, xla::DeviceAssignment::Deserialize( + proto->device_assignment())); + } + + VLOG(4) << "Input buffers after alias resolution: " + << shaped_buffer.ToString(); + + std::vector input; + input.emplace_back( + xla::ExecutionInput(std::move(input_buffers->buffers), host_shape)); + + // The buffers to be freed are in the `output` and will be automatically + // freed when it goes out of the scope. In async mode, this means the buffers + // will be freed before anyone calls "BlockHostUntilDone", which indicates + // that some of the (input) buffers will be freed while the program is running + // and looks scary. However, this turns out to be not a problem since although + // we free a memory and reassign it to other users while a program is running, + // all subsequent writes to the program that could possibly clobber the memory + // will depend on the program to finish. + const TPUHostTransferInfoProto* host_transfer_info = + entry->get().get_host_transfer_info(); + const xla::HloProto* hlo_metadata = entry->get().get_hlo_metadata(); + TF_ASSIGN_OR_RETURN( + xla::ExecutionOutput output, + TPUExecute(*proto, *host_transfer_info, *hlo_metadata, std::move(input), + rendezvous_key_base, rng_seed, node_context.get(), + device_assignment.get(), context->cancellation_manager(), + context, stream, transfer_stream_ptr.get(), + entry->get().get_tpu_program())); + stream->ThenRecordEvent(definition_event.get()); + + TF_ASSIGN_OR_RETURN( + std::unique_ptr output_buffers, + AllocateOutputTensors(context, output.ConsumeResult(), + proto->output_tensor_shapes(), variable_update_map, + node_context.get(), stream, device_ordinal, + input_buffers.get(), definition_event)); + + // Transfer the outputs and save the snapshot to disk. + if (hlo_snapshot) { + auto literal = + std::make_shared(output_buffers->buffers.on_host_shape()); + transfer_manager->TransferLiteralFromDevice( + stream, output_buffers->buffers, literal.get(), + [hlo_snapshot, literal](Status status) { + if (status.ok()) { + *hlo_snapshot->mutable_result() = literal->ToProto(); + } else { + LOG(ERROR) << "TransferLiteralFromDevice for HLO snapshot " + "outputs failed: " + << status; + } + DumpHloSnapshotIfEnabled(*hlo_snapshot, + xla::GetDebugOptionsFromFlags()); + }); + } + return Status::OK(); +} + +TPUExecuteOp::~TPUExecuteOp() = default; + +TPUExecuteAndUpdateVariablesOp::TPUExecuteAndUpdateVariablesOp( + OpKernelConstruction* context) + : TPUExecuteOp(context) { + OP_REQUIRES_OK(context, context->GetAttr( + "device_var_reads_indices", + &fused_device_var_reads_in_computation_inputs_)); + OP_REQUIRES_OK( + context, + context->GetAttr("device_var_updates_indices", + &fused_device_var_updates_in_computation_outputs_)); +} + +REGISTER_KERNEL_BUILDER( + Name("TPUExecute").Device(DEVICE_TPU_NODE).HostMemory("key"), TPUExecuteOp); + +REGISTER_KERNEL_BUILDER(Name("TPUExecuteAndUpdateVariables") + .Device(DEVICE_TPU_NODE) + .HostMemory("key"), + TPUExecuteAndUpdateVariablesOp); + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_execute_op.h b/tensorflow/core/tpu/kernels/tpu_execute_op.h new file mode 100644 index 00000000000..2079f9afdc5 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_execute_op.h @@ -0,0 +1,66 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_EXECUTE_OP_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TPU_EXECUTE_OP_H_ + +#include +#include + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/platform/mutex.h" + +namespace tensorflow { + +// Op that executes a precompiled TPU computation. +class TPUExecuteOp : public AsyncOpKernel { + public: + explicit TPUExecuteOp(OpKernelConstruction* context); + ~TPUExecuteOp() override; + + AsyncOpKernel* AsAsync() override; + + void Compute(OpKernelContext* context) override; + void ComputeAsync(OpKernelContext* context, DoneCallback done) override; + + protected: + // Used by TPUExecuteAndUpdateVariablesOp to set the fused variable reads and + // updates indices in the XLA computation. The two vectors must have the same + // size, and a pair of read index and write index represents a variable's + // input to the program and its updated value from the program. If the + // variable is not updated, use -1 as the output index. + std::vector fused_device_var_reads_in_computation_inputs_; + std::vector fused_device_var_updates_in_computation_outputs_; + + private: + Status DoWork(OpKernelContext* context); + + DISALLOW_COPY_AND_ASSIGN(TPUExecuteOp); +}; + +// A variant of TPUExecuteOp that contains fused device variable reads and +// updates. +class TPUExecuteAndUpdateVariablesOp : public TPUExecuteOp { + public: + explicit TPUExecuteAndUpdateVariablesOp(OpKernelConstruction* context); + ~TPUExecuteAndUpdateVariablesOp() override = default; + + private: + DISALLOW_COPY_AND_ASSIGN(TPUExecuteAndUpdateVariablesOp); +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_EXECUTE_OP_H_ From 2a4ce9531277d44a1744d21bab6f412ba8c47be7 Mon Sep 17 00:00:00 2001 From: ShengYang1 Date: Fri, 17 Jul 2020 16:44:46 +0800 Subject: [PATCH 0679/2522] Test only for cpu --- tensorflow/python/kernel_tests/cwise_ops_unary_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/cwise_ops_unary_test.py b/tensorflow/python/kernel_tests/cwise_ops_unary_test.py index 33b43f5a086..368f3509dc6 100644 --- a/tensorflow/python/kernel_tests/cwise_ops_unary_test.py +++ b/tensorflow/python/kernel_tests/cwise_ops_unary_test.py @@ -389,7 +389,7 @@ class UnaryOpTest(test.TestCase): 2).reshape(1, 3, 2).astype(dtypes_lib.bfloat16.as_numpy_dtype) self._compareCpu(x, np.abs, math_ops.abs) self._compareCpu(x, np.abs, _ABS) - self._compareBoth(x, np.exp, math_ops.exp) + self._compareCpu(x, np.exp, math_ops.exp) self._compareBoth(x, np.negative, math_ops.negative) self._compareBoth(x, np.negative, _NEG) From 55ae25ce751123570197eba6ea61606ea61a2c2a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Jul 2020 02:01:29 -0700 Subject: [PATCH 0680/2522] Update GraphDef version to 465. PiperOrigin-RevId: 321737345 Change-Id: I529abff30b5951c02a5ff46641ddd4fb682229d9 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 2e02304e18c..a5fc4387ea6 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 464 // Updated: 2020/7/16 +#define TF_GRAPH_DEF_VERSION 465 // Updated: 2020/7/17 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 27948367a2a7d6d7782e62f4afb77232754cb6b0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Jul 2020 02:01:30 -0700 Subject: [PATCH 0681/2522] compat: Update forward compatibility horizon to 2020-07-17 PiperOrigin-RevId: 321737352 Change-Id: I7f66a8beeabe040ec2b8809f5b88d830bde82d66 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 601fac967a3..4ce014416d8 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 16) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 17) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From 3c5d55b335e37d893345c44e9ed8c851057c6f70 Mon Sep 17 00:00:00 2001 From: Elena Zhelezina Date: Fri, 17 Jul 2020 10:23:35 +0100 Subject: [PATCH 0682/2522] Addressed reviewer's comments. Change-Id: I5e13301afa06c73bf727cf2f17b8e8eb41cb31f6 --- .../lite/tools/versioning/op_version_test.cc | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/tools/versioning/op_version_test.cc b/tensorflow/lite/tools/versioning/op_version_test.cc index 2f13b7234e3..a90cb336318 100644 --- a/tensorflow/lite/tools/versioning/op_version_test.cc +++ b/tensorflow/lite/tools/versioning/op_version_test.cc @@ -67,6 +67,18 @@ void SimpleVersioningTest(BuiltinOperator op) { EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 1); } +// Similar to SimpleVersioningTest function, but +// op has 3 versions and the input type includes TensorType_INT16. +void SimpleVersioningTestExtended(BuiltinOperator op) { + OpSignature fake_op_sig = { + .op = op, + .input_types = std::vector{TensorType_INT16}, + }; + EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 3); + + SimpleVersioningTest(op); +} + // Test version for a simple Op with 2 versions and the output type controls the void SimpleOutputVersioningTest(BuiltinOperator op) { OpSignature fake_op_sig = { @@ -281,7 +293,7 @@ TEST(OpVersionTest, VersioningMinTest) { } TEST(OpVersionTest, VersioningMeanTest) { - SimpleVersioningTest(BuiltinOperator_MEAN); + SimpleVersioningTestExtended(BuiltinOperator_MEAN); } TEST(OpVersionTest, VersioningSumTest) { From d0032a56042c1751521f85b26c6a7507d0961d10 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 17 Jul 2020 02:32:28 -0700 Subject: [PATCH 0683/2522] Clearer message when output shape can't be inferred. PiperOrigin-RevId: 321741649 Change-Id: I979385a1788a04fbfcb3c79685a2c58a4a3587c3 --- tensorflow/python/keras/engine/base_layer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py index f362588680a..4a590f8ff21 100644 --- a/tensorflow/python/keras/engine/base_layer.py +++ b/tensorflow/python/keras/engine/base_layer.py @@ -738,7 +738,9 @@ class Layer(module.Module, version_utils.LayerVersionSelector): '`compute_output_shape` method on your layer (%s).' % self.__class__.__name__), e) return nest.map_structure(lambda t: t.shape, outputs) - raise NotImplementedError + raise NotImplementedError( + 'Please run in eager mode or implement the `compute_output_shape` ' + 'method on your layer (%s).' % self.__class__.__name__) @doc_controls.for_subclass_implementers def compute_output_signature(self, input_signature): From 42581584964ecca49891447815b5b23614f407bc Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Fri, 17 Jul 2020 02:37:49 -0700 Subject: [PATCH 0684/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/6bba95831e48 PiperOrigin-RevId: 321742238 Change-Id: Ied6941e662470dc072a6eadea3d133f29b3ae655 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 9980ffad650..84e67a4ee22 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "8ef9e2bf355d05bc81d8b0fe1e5333eec59a0a91" - LLVM_SHA256 = "b64757f390c8e20abaa97fcbf8da9d88db5489a9da827019e83844c62f5790ac" + LLVM_COMMIT = "6bba95831e480656124a5fbcd84f4f2a31e6c0b6" + LLVM_SHA256 = "0156b6feb5b09e653feacec85c7e084c2a12783f7834e7d2943f7cd19dd2405b" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From e36aca0132fbcde0bc820d56185e3078f97a879d Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Fri, 17 Jul 2020 04:16:16 -0700 Subject: [PATCH 0685/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/e297006d6f02 PiperOrigin-RevId: 321753670 Change-Id: Id48a3687bc3d82fdc65db2a8e5cb41285ba06427 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 84e67a4ee22..119f5eae301 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "6bba95831e480656124a5fbcd84f4f2a31e6c0b6" - LLVM_SHA256 = "0156b6feb5b09e653feacec85c7e084c2a12783f7834e7d2943f7cd19dd2405b" + LLVM_COMMIT = "e297006d6f02f0f54a69223b98defde09c43158f" + LLVM_SHA256 = "cad40ccdb48efbe9f5bc093e4bfcffd305c66c7658aaab2bee5e0a22690f967d" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 5bad210b35abea25b5a09f9b2788bbc4b5b69624 Mon Sep 17 00:00:00 2001 From: Koki Ibukuro Date: Wed, 20 Nov 2019 11:35:11 +0100 Subject: [PATCH 0686/2522] Add TfLiteVersion --- .../Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs index 4214b93c17e..fd8818f5f4e 100644 --- a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs +++ b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs @@ -89,12 +89,20 @@ namespace TensorFlowLite tensor, tensorDataPtr, Buffer.ByteLength(outputTensorData))); } + public static string GetVersion() { + return Marshal.PtrToStringAnsi(TfLiteVersion()); + } + private static void ThrowIfError(int resultCode) { if (resultCode != 0) throw new Exception("TensorFlowLite operation failed."); } #region Externs + [DllImport (TensorFlowLibrary)] + private static extern unsafe IntPtr TfLiteVersion(); + + [DllImport (TensorFlowLibrary)] private static extern unsafe TfLiteInterpreter TfLiteModelCreate(IntPtr model_data, int model_size); From 0178d57f7dd445e291e04f96dc04a9c490405054 Mon Sep 17 00:00:00 2001 From: Koki Ibukuro Date: Wed, 20 Nov 2019 12:54:36 +0100 Subject: [PATCH 0687/2522] Add more interpreter functions on Unity Plugin --- .../HelloTFLite/Scripts/HelloTFLite.cs | 19 ++- .../TensorFlowLite/SDK/Scripts/Interpreter.cs | 109 ++++++++++++++++-- 2 files changed, 114 insertions(+), 14 deletions(-) diff --git a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/Examples/HelloTFLite/Scripts/HelloTFLite.cs b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/Examples/HelloTFLite/Scripts/HelloTFLite.cs index 83291e61794..5b885f611c2 100644 --- a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/Examples/HelloTFLite/Scripts/HelloTFLite.cs +++ b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/Examples/HelloTFLite/Scripts/HelloTFLite.cs @@ -44,11 +44,20 @@ public class HelloTFLite : MonoBehaviour { } void Start () { - interpreter = new Interpreter(model.bytes); - Debug.LogFormat( - "InputCount: {0}, OutputCount: {1}", - interpreter.GetInputTensorCount(), - interpreter.GetOutputTensorCount()); + Debug.LogFormat("TensorFlow Lite Verion: {0}", Interpreter.GetVersion()); + + interpreter = new Interpreter( + modelData: model.bytes, + threads: 2); + + int inputCount = interpreter.GetInputTensorCount(); + int outputCount = interpreter.GetOutputTensorCount(); + for (int i = 0; i < inputCount; i++) { + Debug.LogFormat("Input {0}: {1}", i, interpreter.GetInputTensorInfo(i)); + } + for (int i = 0; i < inputCount; i++) { + Debug.LogFormat("Output {0}: {1}", i, interpreter.GetOutputTensorInfo(i)); + } } void Update () { diff --git a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs index fd8818f5f4e..2fc89bdaf0a 100644 --- a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs +++ b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs @@ -19,6 +19,7 @@ using TfLiteInterpreter = System.IntPtr; using TfLiteInterpreterOptions = System.IntPtr; using TfLiteModel = System.IntPtr; using TfLiteTensor = System.IntPtr; +using TfLiteDelegate = System.IntPtr; namespace TensorFlowLite { @@ -31,25 +32,31 @@ namespace TensorFlowLite private TfLiteModel model; private TfLiteInterpreter interpreter; + private TfLiteInterpreterOptions options; - public Interpreter(byte[] modelData) { + public Interpreter(byte[] modelData, int threads) { GCHandle modelDataHandle = GCHandle.Alloc(modelData, GCHandleType.Pinned); IntPtr modelDataPtr = modelDataHandle.AddrOfPinnedObject(); model = TfLiteModelCreate(modelDataPtr, modelData.Length); if (model == IntPtr.Zero) throw new Exception("Failed to create TensorFlowLite Model"); - interpreter = TfLiteInterpreterCreate(model, /*options=*/IntPtr.Zero); + + options = TfLiteInterpreterOptionsCreate(); + + if (threads > 1) { + TfLiteInterpreterOptionsSetNumThreads(options, threads); + } + + interpreter = TfLiteInterpreterCreate(model, options); if (interpreter == IntPtr.Zero) throw new Exception("Failed to create TensorFlowLite Interpreter"); } - ~Interpreter() { - Dispose(); - } - public void Dispose() { - if (interpreter != IntPtr.Zero) TfLiteInterpreterDelete(interpreter); - interpreter = IntPtr.Zero; if (model != IntPtr.Zero) TfLiteModelDelete(model); model = IntPtr.Zero; + if (interpreter != IntPtr.Zero) TfLiteInterpreterDelete(interpreter); + interpreter = IntPtr.Zero; + if (options != IntPtr.Zero) TfLiteInterpreterOptionsDelete(options); + options = IntPtr.Zero; } public void Invoke() { @@ -89,26 +96,89 @@ namespace TensorFlowLite tensor, tensorDataPtr, Buffer.ByteLength(outputTensorData))); } + public string GetInputTensorInfo(int index) { + TfLiteTensor tensor = TfLiteInterpreterGetInputTensor(interpreter, index); + return GetTensorInfo(tensor); + } + + public string GetOutputTensorInfo(int index) { + TfLiteTensor tensor = TfLiteInterpreterGetOutputTensor(interpreter, index); + return GetTensorInfo(tensor); + } + public static string GetVersion() { return Marshal.PtrToStringAnsi(TfLiteVersion()); } + private static string GetTensorName(TfLiteTensor tensor) { + return Marshal.PtrToStringAnsi(TfLiteTensorName(tensor)); + } + + private static string GetTensorInfo(TfLiteTensor tensor) { + var sb = new System.Text.StringBuilder(); + sb.AppendFormat("{0} type:{1}, dims:[", + GetTensorName(tensor), + TfLiteTensorType(tensor)); + + int dims = TfLiteTensorNumDims(tensor); + for (int i = 0; i < dims; i++) { + sb.Append(TfLiteTensorDim(tensor, i)); + sb.Append(i == dims - 1 ? "]" : ", "); + } + return sb.ToString(); + } + private static void ThrowIfError(int resultCode) { if (resultCode != 0) throw new Exception("TensorFlowLite operation failed."); } #region Externs + public enum TfLiteType { + NoType = 0, + Float32 = 1, + Int32 = 2, + UInt8 = 3, + Int64 = 4, + String = 5, + Bool = 6, + Int16 = 7, + Complex64 = 8, + Int8 = 9, + Float16 = 10, + } + + public struct TfLiteQuantizationParams { + public float scale; + public int zero_point; + } + [DllImport (TensorFlowLibrary)] private static extern unsafe IntPtr TfLiteVersion(); - [DllImport (TensorFlowLibrary)] private static extern unsafe TfLiteInterpreter TfLiteModelCreate(IntPtr model_data, int model_size); [DllImport (TensorFlowLibrary)] private static extern unsafe TfLiteInterpreter TfLiteModelDelete(TfLiteModel model); + [DllImport (TensorFlowLibrary)] + private static extern unsafe TfLiteInterpreterOptions TfLiteInterpreterOptionsCreate(); + + [DllImport (TensorFlowLibrary)] + private static extern unsafe void TfLiteInterpreterOptionsDelete(TfLiteInterpreterOptions options); + + [DllImport (TensorFlowLibrary)] + private static extern unsafe TfLiteInterpreterOptions TfLiteInterpreterOptionsSetNumThreads( + TfLiteInterpreterOptions options, + int num_threads + ); + + [DllImport (TensorFlowLibrary)] + private static extern unsafe TfLiteInterpreterOptions TfLiteInterpreterOptionsAddDelegate( + TfLiteInterpreterOptions options, + TfLiteDelegate _delegate); + [DllImport (TensorFlowLibrary)] private static extern unsafe TfLiteInterpreter TfLiteInterpreterCreate( TfLiteModel model, @@ -148,6 +218,27 @@ namespace TensorFlowLite private static extern unsafe TfLiteTensor TfLiteInterpreterGetOutputTensor( TfLiteInterpreter interpreter, int output_index); + + [DllImport (TensorFlowLibrary)] + private static extern unsafe TfLiteType TfLiteTensorType(TfLiteTensor tensor); + + [DllImport (TensorFlowLibrary)] + private static extern unsafe int TfLiteTensorNumDims(TfLiteTensor tensor); + + [DllImport (TensorFlowLibrary)] + private static extern int TfLiteTensorDim(TfLiteTensor tensor, int dim_index); + + [DllImport (TensorFlowLibrary)] + private static extern uint TfLiteTensorByteSize(TfLiteTensor tensor); + + [DllImport (TensorFlowLibrary)] + private static extern unsafe IntPtr TfLiteTensorData(TfLiteTensor tensor); + + [DllImport (TensorFlowLibrary)] + private static extern unsafe IntPtr TfLiteTensorName(TfLiteTensor tensor); + + [DllImport (TensorFlowLibrary)] + private static extern unsafe TfLiteQuantizationParams TfLiteTensorQuantizationParams(TfLiteTensor tensor); [DllImport (TensorFlowLibrary)] private static extern unsafe int TfLiteTensorCopyFromBuffer( From 5dfb75032d593630b2028dda13f8c70210b5267c Mon Sep 17 00:00:00 2001 From: Koki Ibukuro Date: Wed, 20 Nov 2019 14:52:19 +0100 Subject: [PATCH 0688/2522] Use "__Internal" dll name on iOS --- .../Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs index 2fc89bdaf0a..369fac91e49 100644 --- a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs +++ b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs @@ -28,8 +28,6 @@ namespace TensorFlowLite /// public class Interpreter : IDisposable { - private const string TensorFlowLibrary = "tensorflowlite_c"; - private TfLiteModel model; private TfLiteInterpreter interpreter; private TfLiteInterpreterOptions options; @@ -134,6 +132,12 @@ namespace TensorFlowLite #region Externs + #if UNITY_IPHONE && !UNITY_EDITOR + private const string TensorFlowLibrary = "__Internal"; +#else + private const string TensorFlowLibrary = "tensorflowlite_c"; +#endif + public enum TfLiteType { NoType = 0, Float32 = 1, From 4ab3b90cf016216de6cdb4fdb027a77a1790c6ab Mon Sep 17 00:00:00 2001 From: Koki Ibukuro Date: Fri, 22 Nov 2019 10:29:13 +0100 Subject: [PATCH 0689/2522] Dispose interpreter before model --- .../Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs index 369fac91e49..44fcf6cbc62 100644 --- a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs +++ b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs @@ -49,10 +49,10 @@ namespace TensorFlowLite } public void Dispose() { - if (model != IntPtr.Zero) TfLiteModelDelete(model); - model = IntPtr.Zero; if (interpreter != IntPtr.Zero) TfLiteInterpreterDelete(interpreter); interpreter = IntPtr.Zero; + if (model != IntPtr.Zero) TfLiteModelDelete(model); + model = IntPtr.Zero; if (options != IntPtr.Zero) TfLiteInterpreterOptionsDelete(options); options = IntPtr.Zero; } From 5ec8c1df160b1c3d13a0ca1876c742fb092a86fc Mon Sep 17 00:00:00 2001 From: Koki Ibukuro Date: Fri, 22 Nov 2019 10:43:04 +0100 Subject: [PATCH 0690/2522] Remove unwired native-binding-methods --- .../Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs index 44fcf6cbc62..4199af26f3b 100644 --- a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs +++ b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs @@ -19,7 +19,6 @@ using TfLiteInterpreter = System.IntPtr; using TfLiteInterpreterOptions = System.IntPtr; using TfLiteModel = System.IntPtr; using TfLiteTensor = System.IntPtr; -using TfLiteDelegate = System.IntPtr; namespace TensorFlowLite { @@ -178,11 +177,6 @@ namespace TensorFlowLite int num_threads ); - [DllImport (TensorFlowLibrary)] - private static extern unsafe TfLiteInterpreterOptions TfLiteInterpreterOptionsAddDelegate( - TfLiteInterpreterOptions options, - TfLiteDelegate _delegate); - [DllImport (TensorFlowLibrary)] private static extern unsafe TfLiteInterpreter TfLiteInterpreterCreate( TfLiteModel model, @@ -235,9 +229,6 @@ namespace TensorFlowLite [DllImport (TensorFlowLibrary)] private static extern uint TfLiteTensorByteSize(TfLiteTensor tensor); - [DllImport (TensorFlowLibrary)] - private static extern unsafe IntPtr TfLiteTensorData(TfLiteTensor tensor); - [DllImport (TensorFlowLibrary)] private static extern unsafe IntPtr TfLiteTensorName(TfLiteTensor tensor); From be721b824678b670364939495bf8dc22b3ed7f75 Mon Sep 17 00:00:00 2001 From: Koki Ibukuro Date: Fri, 22 Nov 2019 10:43:19 +0100 Subject: [PATCH 0691/2522] Fix return type --- .../Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs index 4199af26f3b..3e04665cf9b 100644 --- a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs +++ b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs @@ -172,7 +172,7 @@ namespace TensorFlowLite private static extern unsafe void TfLiteInterpreterOptionsDelete(TfLiteInterpreterOptions options); [DllImport (TensorFlowLibrary)] - private static extern unsafe TfLiteInterpreterOptions TfLiteInterpreterOptionsSetNumThreads( + private static extern unsafe void TfLiteInterpreterOptionsSetNumThreads( TfLiteInterpreterOptions options, int num_threads ); From 0127c02eb6a6a3c8ac4db6adf738e670f2b56252 Mon Sep 17 00:00:00 2001 From: Koki Ibukuro Date: Fri, 22 Nov 2019 11:17:36 +0100 Subject: [PATCH 0692/2522] Add TensorInfo struct --- .../TensorFlowLite/SDK/Scripts/Interpreter.cs | 44 +++++++++++++------ 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs index 3e04665cf9b..51ded314d57 100644 --- a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs +++ b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs @@ -27,6 +27,21 @@ namespace TensorFlowLite /// public class Interpreter : IDisposable { + public struct TensorInfo { + public string name { get; internal set; } + public TfLiteType type { get; internal set; } + public int[] dimensions { get; internal set; } + public TfLiteQuantizationParams quantizationParams { get; internal set; } + + public override string ToString() { + return string.Format("name: {0}, type: {1}, dimensions: {2}, quantizationParams: {3}", + name, + type, + "[" + string.Join(",", dimensions) + "]", + "{" + quantizationParams + "}"); + } + } + private TfLiteModel model; private TfLiteInterpreter interpreter; private TfLiteInterpreterOptions options; @@ -93,12 +108,12 @@ namespace TensorFlowLite tensor, tensorDataPtr, Buffer.ByteLength(outputTensorData))); } - public string GetInputTensorInfo(int index) { + public TensorInfo GetInputTensorInfo(int index) { TfLiteTensor tensor = TfLiteInterpreterGetInputTensor(interpreter, index); return GetTensorInfo(tensor); } - public string GetOutputTensorInfo(int index) { + public TensorInfo GetOutputTensorInfo(int index) { TfLiteTensor tensor = TfLiteInterpreterGetOutputTensor(interpreter, index); return GetTensorInfo(tensor); } @@ -111,18 +126,17 @@ namespace TensorFlowLite return Marshal.PtrToStringAnsi(TfLiteTensorName(tensor)); } - private static string GetTensorInfo(TfLiteTensor tensor) { - var sb = new System.Text.StringBuilder(); - sb.AppendFormat("{0} type:{1}, dims:[", - GetTensorName(tensor), - TfLiteTensorType(tensor)); - - int dims = TfLiteTensorNumDims(tensor); - for (int i = 0; i < dims; i++) { - sb.Append(TfLiteTensorDim(tensor, i)); - sb.Append(i == dims - 1 ? "]" : ", "); + private static TensorInfo GetTensorInfo(TfLiteTensor tensor) { + int[] dimensions = new int[TfLiteTensorNumDims(tensor)]; + for (int i = 0; i < dimensions.Length; i++) { + dimensions[i] = TfLiteTensorDim(tensor, i); } - return sb.ToString(); + return new TensorInfo() { + name = GetTensorName(tensor), + type = TfLiteTensorType(tensor), + dimensions = dimensions, + quantizationParams = TfLiteTensorQuantizationParams(tensor), + }; } private static void ThrowIfError(int resultCode) { @@ -154,6 +168,10 @@ namespace TensorFlowLite public struct TfLiteQuantizationParams { public float scale; public int zero_point; + + public override string ToString() { + return string.Format("scale: {0} zero_point: {1}", scale, zero_point); + } } [DllImport (TensorFlowLibrary)] From bc5aa830c659e498ed4b5fe4350a2063f95cbf45 Mon Sep 17 00:00:00 2001 From: Koki Ibukuro Date: Fri, 22 Nov 2019 11:21:41 +0100 Subject: [PATCH 0693/2522] Add method documentation --- .../Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs index 51ded314d57..bd608501ddf 100644 --- a/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs +++ b/tensorflow/lite/experimental/examples/unity/TensorFlowLitePlugin/Assets/TensorFlowLite/SDK/Scripts/Interpreter.cs @@ -118,6 +118,11 @@ namespace TensorFlowLite return GetTensorInfo(tensor); } + ///

    u%B89%Qy9^h+>jFcJ$^H^{6uod1F0a$!meIq7MK_5A-JImKIQ6B!ErM^&w z`KDm@U1VA*Y3;oxGYaE&P`6!dj#uini{+h`ZR3L4?U!f+V)I#z-hGhKuVhXmX2(F? zgFZ6*@x7*?(FP?mSF3~usE?dXAY~Hg^$j+V)%Fs5-QDz(Ax^GIJ)iqeXXh8LXSBDKWhK9?*RlZ`wfGq}*zIRw9dk{@DNow3!5!8hryFiNxS4(x)}Mv-^A&nCh{Ht{ zPTQ12RX9{Z&jmRtD;ZevriEFMsyRtmXlC#@pyzPv%S#5&VA3;~9w6=xRO)+F>wjIj z<@GYd9bCgl0`nacW;+B1uL=xG3q`TLj8Nrj(wa$rVrtjmmJ6p-6H=^p0tI|NA@{NG zrWM#wQm&&(pK6|%ec902GG|27e9qTs zL_=&w^^PgEH#x+z6DTgPs19h`zNYQgWKM%gbW%K_mh?-1mKiRNxeRukR+{GXp;?6< zR3^v&Nf`iiucG+(e_onJ2sRT=L5P073Nv8kJkp0LhX+^g8XVmkwTDSqz|vJAlpio7 zsP{sPfPSH)i>&~2Yq8fyBjObS3w-tTm8?F2$$jOmfAtIl61O;Uw@eg7KUP|dDa{+{ zcfLPic^%P@HZq($zgn=rVC9O`ZespbTqx{8RBJo^h#(OO^V`v|^^MKg*Lz3zkMb{G zS!p$u;o8*uUtxSo060XMo!sr(>yfhDz)R3vFt1PPcB9vd^|VK?RMffj{ECLlH~x1# z8JDKQz<{HMwx;Gm+ZWyW7QM!3s*NCuK7|Pau=xsL*FaR-Rbn}SO{yEbn=yC?YP)My z^GBa^%ukGn?_7U5gH?)F4R3cpJqN;V32f0AAi$esBT2}^T;{dNaT&>#XF6Z3q2Vb1dSPfsty~A zq>GMVnc>@bqXO zmfKgO!Yzc`Eb(mz0`M7tpw>h+Uv1L5MFB!6;0R|D`6MEjK@~V){<8?k9Lzc)NHP<| z{v3BpclLvy_^Sxaz$bq#Cor%42twFKz3@-G#gl6mKb6mZSw5@yYDTpfDfo{O z#I>INH6rlj28C8|s`v*??Z|sBdH&F1xwN-&!)n#VIH>SKQ&ZPF?$rCq3g4#0fUEt1 z1YSUV^(k8RO1pC~Z{>2M6p+am7}-g4y3jz0$$H-t_*_M+GKKJ!PVBiZFd1?=_+zgv zaucud%vYT~j?BM_Nh6eNZrqr}96A&=Wck{!8L)% zBXCkmFt@PNjx#6vb*Sb6X~QazpkuEL!Z}7PVc^KjfX%PQ-KBVjN$3*KW$j7!OHZ{N z_1xC>IH2J4a^uv9wTA-=hPJl7BX|iz3R){q^e<{nXmV7nKpOLi1WR# z2y%YD2JUVEtNI87B#zoiSV2H??`%kX5+%(UKw;F z@$m;iSrs{w_=5jgFm!|lIj41Q%*0nC%77>J%VY57D?9bXmMdWeZjkXMl08^!RqAkR z7er<^H+ID>84~r~c3=2mU}>op)O621czx)igz6+Q*;Ofzk4Q>dW{WdT&iW^ukzQaZ zs|vx6_S4t4;xJSCbD(<$bvG0R087S1igh;)7}H=74^ z=sYf=$g3WwXnV&# zaV-j*!AOgwF_6e$b)#I8^Ej`6kJ2w%$CJgAtHiKn3-`R8UgC(u38HrNcS>_GCN`>d zvw>5<_m{dn%Nc8{kYDXvM*O{+l5dr}9MeCXlyr8JA!CE4=i!Qg@`sspq#lRSb?D5J zq*w3(XE@hO+y9$mLk zEz#{!U}Bvh!Y+;(iE#?EmxW~oaHJ+lC_vYsI6w8gEJT~?+CAH+jk@`|w?qYEN0$^% z-2^{}N5Le^%0)$5@8O9#4b=f=vB;R&QW*mDHv_J*yaEk^9Vm0^eh`nxJ%SKkCq6aS9V^1CU zl(1&tE5`Jd27coiYZNU$J=60CT|Hr-69nTT4_Q&$%w)_IjYPQ#)V(kY#jjtKR2F_y zoQ6TMMw~jbMoBN2Mkq0Wn22Z~XAw6)NG-lH#3`n=qZ@;GAU?kl`gTc|^f6Q1-(`R? zQmK44j>lb`&p3}SH?Psj=ipMKOHVEsCoT*zjYwc*l20Vl9XZ{!qr^i6>D`MiuX)Vd z`srT1ZSA?^gKVVTKH#e`FU|O*IoKC2)cH>IcHX_zxK|JBgWq>FN zc3!do%qht=!RKC zv89nq&@>l5ak{bSO!wu7*IMbF@6PU9KXL^=2WHA*l9Z1C@>X`ZW!)?R;hkr`80~{u zmPAC}r?Z0}Y5kEd4UV581Ps4Jb!#6!4|?cZ>j-VK=|lk-1^ImJ0_HL?$galee?uv* zv!29GO|3^Z7`Re3KGaox!g|#=pL4hFZqz@0{75dBlFgCo^A(VDp;@+*e&|flFZi%;em?oe znFHqgPtE!a9MuMiJO?mO?prkhJ1R(2x(xkPDcI&d|IGZaIoS-xZu&$^LDkQ5HfQdanX5PnNz{0ny*GB;!Ws<^x*SYi(xJsAE576u7$O+W*Cqcma4;?o)3JLYp2c|4 z#XsZ8_hFpjTsR@2ki>H-KLs{skxx|zz%Ar6VOf`Y$Tdf@LP3VzW`^N1x9YHL6Ric} zaJJU|y<#H!cFztfK^8o_W4+~0wdcMvlC!UVSc*(21MHtsx#Gjumw(?-y!KF+TGER0`L?@o)(@xBiS~|(CVUji#`EvBe z&-ZE2L)yYkR38D=@VwQL+3nv$cVyj)|0${2YJ-Em7>0}o5g%YimTht!WOg5Mf6}va zbyuRz5zJ)t!eH1gNmzB{@V3ec?2{a9B#QgQ+Ia2%nYE-dC@D$~viXEcgqT7Py8SeREH@fU~5%UUbiu{>2x0RWH9~tG;0ANsfd}x&BAn!DkuGR`6{*rA*)WxagjW{mKZiI z%6c!StU&)CqiBj^H;BAb0JHkYq*1*va~QKjf5Dzm{sI8hj_>g5eDX@~103R+R!s7F z`!wH@bC+-=rEdU&Yd=u7ebAG|FE}#+8UyGb+e`hz&FA-c@9j{c$~BmUaJ`QPi^9Z& z2%R0s2JAst=Bppwdgug4=f;-}soj?Le!qQPE4jkne!78X9lh~1hm@Cu15HH6S&FPh zJK(ISqchn94{PjSi-ZTDO~%pZd^b5AKEgV-bN3>lDcNKD5zO|-${kTvz6#9Sd`#&l zToD}~sWNtas;gy?UKHV0Ae2iYY&3wAa>#x=R_wI$YV>ckwmwr+rEVysjqgI$K~@0TRRD5 zbmCsReZ!O0DnLq}X%?JET%C>J|d^uWSul-;mNrckUPl;tVVKd4o zUcy#Figm)QTk}PQ2%od@NfgO?XH*~4DWZNq=)wAd@e?U0#%*6sA30l&dAkM^B#-k# zokJ_ew(u}${0gob z-|s=lgSDA`tx+7ym<7eN)XG`CB?IBoZHG)sA-0+%=3Q=-l6gnTK1#AfD2cgv?s9=jLDrvmswI;jvLEPmj*LXYF5){pZd=0cyMWiEyZ5qr%gy4c`uMU=*gt67W4$We= zqL2X$Y(;dGbe9u5#YH)XUX?*Qi>bfj16p^Ig=2>g%Nf6?9`<&>(v@o9eU>y2pT=4` zS&Cs?4*u$JVb{%uuDaU_Bsqt}&$;_g+F3$yRveWGx1aL~DuS-KiSwv>bC#OTg5A)tzQrREWPC(t}6z5JLVek`I6jIRsqSmy?W>N3D_}4SozE6DR$b{2X`~s^5^R_%OAhT13jc zO3G|!#Pkj>zeQqqkuuUqSwHbHa_mMv8Mm|2^mXcutobN%RGO&$s{LVnj$Rwuo#2YQ zc8-9BAKcvi)872UP4nl?zdI)NyuWt#Sls@G>bG8)B?_F11YJEwI;tjno*=Crrd;A< z1w6t&Em;vJjed$rk(>4mc->GgCfj(0Y>9Q}T-f0U5N83yPC|nMqt65GPUP|*093TZ zj|XZ|x_!L>MlRRq2*^j}q*;LUeir>ES`z{xF=Jw;t3M z+8G-B2tqX%Lnn@(np$4?u@cLcW2={_vwoh9p{u{Q52ii)?aX4*_w5F2+Of-cxWxSI z-aA8i**o%P)y`LTtVOYRD4Rx|($({cHva$JT&RERL>T|ig;h~B9O z!MG6~Iqnc7-L{7OE}Rm0`BxIPHBfDFoZP}2I=Vjo_-=#mZ(HV$l7RsAA|7rL2iJsB z#H#p>FwXonKBug!?|f^=hgJiZKZ8_h5q>*4iZ(wHyXV7bkB;eN=^b;E?)_n}RoAM6 zTdU(pS)z0nlND_C;~LQF@OIw(?T_`3NF2?FQ}VoE zjc(_bc*R@Vi$_$zg-OSaAyftm?Yny~a{GqgOR-wb{FFw&l{eHvkFU{D{4@D(NKQT^ z`t1;ss&}5W2vJw3*TB+#oS&z>gc`mk#HYJ^Xm89`!BEm z`Koj{>U8yG?(2>N)xMn{LmyxZ)x^c>h4r0w3OT73)fOCfuTs z%)g0!05!EP$=?IItF>=MpLjXo-cBkCmx3Pwub;%7YTSP9u?03G26=b-yalfR#ryEr z^qI#KR{>2O0MT&+90EhF8GZ%qi35FA)cbOPEeAUGY*1dA_}gso1fRAMCEw%>O?AEl z@4kz1C!G%b=MD0{4GnBT-*4bNSp%%ON4v_tgKsR6wx?U4x%;&3)YaQsK~WP2Wn3$` z^ylQle|us-_U?Y>aJM@0bT#+o$Dr+)m@R(m4DsoFVzU;b&L>78#qR5|i|P2?JaUto z*i-*7-tonxi(rRj*`o`y6y5P79vJ^@FkiI=L;$J=IQ=(3;b1E}iShHI)s-3y$N{#U zk<%2wnZN!bY~^3aK1`CnSeLGuzNSawkhdw`>db3_(&LOyAR5paLnnUk7~dbPZ-Ts= z;;EL12cw%D2m6{?lCv6i7tG2Tx%f5s3R7x^=|PwT_()6DB04UcBU6)_9+ ze{J8~PxFkVGAj$U^4(#_&_}z!Ba|UYT`-^Bs$67M!-0N)F0hb@0O*AN=gUsoeeMMMrdT{nGI^WUwx?{AhSt207yOKAZYzsLZA` zbIR{kv%K&AV&xVLnca9Uj{M=N+V#qN)#CoK1!NE5!wRoHq3zrY(^ebP?7{ul=aU1< zidh?K#kNKZfxe1}d7;&z$U1rbA;hBvkQ$$aUsuucSd9lF8`rMUbrrI$Ae#d=3Xq5v zO7OJJh4mG|`0ou;J78tTP~^<8W`B0HfZM?)NYSD|le3w$!lR$KuFy%dcW!6v^5!~< zG1Z$hR6ptgd4#?K=yIJmMJvVBP{B~FmuIH4r%u3@2TS}9wDg%JRrycp*uG#NiVxXx z!QgP*i|KRaC#5@+{3i1(v6pN-3TOd7jZ(?JQ%_WJGw-`!#wEos{?MW09k;~0ai2!I zRC^`aSMq+mZ?J4MjrDM_HOd{|SlkoHZ!L3Uf!Ok>+5`2v@z^Jblkcm5WLgFp4UkX~(xR>PJ6C;IigwY07WmXW^HCt~4 zz^j_v>;T|ym}>Q~M{Tc6uX4~j_7mQq_NJf0%_6mOC;(Gth#sKZ{Qbu5$rYI$`FdCEQUa)>eYxbD>Y-0DZhO{TkM5`(ec zy4;6h3!d@Eg3LM(d27VbuPo$%Pt^9c#1_1ZC$%<`VP+HOp<8;kO%P=-D{=1&o8{$6aVWNNQN_-L!wU}Xc3{P^^Ck$3y-W6}&#txGMviIyp5MX^ zmMyrvlZcpwUY-1>V?pdnX=3w{6=%}iFxJF7rvp00T_cVzt(!|a?Y>DfftFr3N2&8lEv6sP-4Lo;})(y>b| z0fO=>bi>eOI&AdEi`T!&4kQbN&dj2iTkigQ0KN$a#WVO&RL=+25I@yAvv)YFldX?M z{N7$_^;D8<$fc&k6IPEKgQKA>N1V(USI7K@l@e3oZHKz$rxK52UoRD#Bq7t5dT4a` zi4VKpgb!#AZK^oFH*MuIWa;4==}9-2RE{+43hd}0fEGXpVy$a?Z(N%Dd#{^2-wB}g z?(yrFR6GIi{Swo&c%t-%(bClX2E7FeF@Y?Xx-n{xil;H2wF1I(TqLyh(i;ds!p>H1 zt%&=a*Dw5O;}z3dv41-{Kg+z0TIBFaQ9c-E6^e68o**yhbs6SXq<PW1Lc}MG@Ba z`8P@Ol?yv+Zv5PEV0#}aQD}1QlG>fFyXi4X+eX$oChxU%?j`>=9C33kx5ASTF^-&sEW^2v7yf)m4PYZ6QRVMa$fX9* zF$+{E!L1N{lg1igZuKKS^jAJ=D#d$#)Q*7JS6qp+C%`@X04w))*COt>k1ez&X`};4 z82^zwsDIQ}?Q?6nU#Q^_zsP-!k{mM>YV67_wxQCN{JF*8UpYT3pe?m$+nb%6t~uA; zIQM&x;G4{4o-A-UmT<)X#LS_%qS zJIcDS61D0=+_6l6ng zl+Zq79ZDh3a~A)t^xVDWobQ!$BkzLvC^iG>W!T^aL|k-Xn^@Xbl<9YRv!CfzZb$um z&%=h}C0oiVlh-*MNt~j4E4yziN9r^TY16BobZk7aD3o55(oz&TCmj`QlPQ4i+Dld~ zT3#v&(hme{Fw9Pj8Jn(8`S9$+tj*1Ued)al1vkw58tU<8XC43bcA$yB-pqz z6bR>CVAN*U$WF%h?KEx7^0%(4rTT|`zjW+<7sRl^@!{fC*y7b+Blw)AV5!)|Ir#f! z?xV%Q*?E!K;p7NYW+WYJt&;73S!DH-Krij~1d6oH-&D9Lbbj{q@&7R9ElO-5FkJ@F z_)(2Ql){p5Ycb=!z#reGSwx&k56FiokfNgO07{gj{*{0>$rkvv{~uFIbLQo9(`U;+ zRcLvvPJ+t5P5{s9THNB&-H%I~HEb1v2DLDwRVzBZRFz2?4jRsJ$a6f{)=T1(-Ao+{*(-#Bf@9MW7=$rvv^Gs z%C(+}{U$-d=J}D#x!Vudfy|Z~dzOqt3{2^fEmz5`4WwFh>QYsaC;O^$Sg}wg4Iams z&4L!CpmE`TGd^T74O*yZfhbn18k{#3f~GxUs;Jp48A|0B3;8l~X_$Y$Ij$6tC580% zgpGJuo5cfPJ~C|*>xvb%MOWsI*vniAC3@VO&cgv|j6+gK&r*-N*v zmTm@0HqI5F$}j#x4(>SQwrZ^(tB1>ygT_Me=h)JYNx;;`s&#fLO%2)u*dUI?U4e`3 z#4b@uF;6z*07y%MLH(o3JNUzdO|?)c{Ik_x7He(L;v)^Y|Jd# z!^UclOH7TU0b8uE0jX-#T7`2~g?khl{4v{bZ`Rsj@zF!QSElk3eFtwX==88j#-_51 z-1s!wfW_PKL|!iDLy%*@u=LhWtPn2QE+_2lEZM0c?CBxwY$5EBmuyG=*jy@3m@y0v z`~BNNw+CCqKN_2U=7T@;#hqq%#+(+@T0r|b>B8|LPhnAP>9WNDE?t3Jr6{`aPO8!` zTBzuYWXVD`>d4yAytlTN8L})PKJ_%NQzXgeJ&NQ@BG>_I=|!};CCTwl7J7A+VerS{F;!&K~14zWwK|1^f4?_p(>q_sV&l7?! zPUR<)^T&Oj-@4t6nG&7TI)Ex89JS9y!CdinMafQ8$>H=T)k~gCJtUMTJ1&bk9=vm@ zbe&=;?&1H&R;D;EGZA)&a8k~X$d)T`$#Xsb+tGYG51(3S#!-tqBFtTSn4v_B%QVSl z9OCecMQb4Q9_f+)q&W)jZ2q5oM=V<;vDX~ZN~Bymr2(B7q*EFK$SB`9G#6O}eKfuh zYh;Pjx5Z_!i&B#Dj0@hWJMoNV%hw9=xrOr+3DeRi`kalzlGHhrs$OD%iuI>aeW6zO z-rzW0wC^hR(TOYjX2N~^lbya;KlM(0x;xqFK{})K0izQ<>5MC1UsbXD>h70uADmZ3 zq4;erB|8gWOz&Ie{itMrPia%ATjH_9U+bSF-i}FW>;2hyeCgd@U{kln9LNUDVrSiW zbH97YorYRtGn&qa!cRl!mm^_WU<+?885h(tB6KmMvc(ptOdBsz*5WT1lFtWconJPg zrAOzSsm$3ukuB=a;p4N@@d=$-=gn~mL|j+8%qbxz;P|<45eA`S1L;~$xb$bgBn`lM zav(YzeDPjDoi;Ne>Nsg|&hssh4NYfcw(^eZr1#-;wrd1?;hWuA79U zT75WeA?A);9DXzF$-$x*Q_oAhmp^&?kN95=D6TT}uhCUkzYOdU|9Bs>G@|EmqMlu+ zIDB?B_!FB56s2--NqhxuwOz9}=EWi=mXT{nEbPmnQJB z*~wTgDo7y4uf}7kcv(R0v&;(^pH3;e1;Z9XKKxia)`+QS#I&X6!Xhu>Xo68;4HKyj z{!HDPW$j3p{X8v66Wwtq>OAmHH`1U?jTY`9cIOZvnX5=k`@z4l+c4>U;`tV4>*fXN z&(d>xFM#vh=|!HD+sgNedij`Oe%?Yq*+!JGle4n=SI(WSnP#Qp8_(W1-+o_F3eK@@ zN3+|$CJ4XAM4oQrn#_q^iCDWHFc`*XbD}&r*m4hCyioSkvM2$lh!-`B)Jzuz7&Nzx z%PP|HrBe%)4#T6en-0s`YC+Ak%*d>@vKGzC zT5GMWb^EsLZ+?FPk9oj@nR$O+@8_#>I`+2-smXu$D`)k2mLs?si}gvejUk5@XAVEJ z(4DrJ#RVl7yC_jVtvqX1Zr+Krp9Nf1a>rS~NeudnR+xT-JZ7KF?^AeaQ#?3|b?@i& zzL&?h6B2afyK9Rr)qrvZzN(n!{-xU``l>p>#v_4;9x_G`&Bxw+d&R8|B`AYDFD^4X z?q{)mA24(j=l)~}BWBauH$YYU_ujMd)!1yR&M2rJe@2)pES0+69-qf65owpS(i~ee z@;;73;Zek`4O7N*Ki|&#%yI^Q%lrJ0opVBXL3=(80}7A0hgbk$;b|yv+ZREy3^S!d zByK`f#$)0$l%>pD5KVkLXyiEaiEKbW%H2E#+2l%XbEJS8u*G)JwD|@((%Z_f_VImo zjbA^%<|Kb*3g47AyN9A(|F%5If~^5ly;LDBT@muyrEZ{k`*WQ&n9qC4b;$oBQy z;1^3aMZP1Qz4?3%I8O|MJb=jm`-Zw}cl6e8v0J?}wz?Qq+WhHR^p_ct zhx@c;fF1C`xdAsvicb6w)$jSS`P&^rN>R3b0N8WzeuSB1!9bI7U)9aQR^R@0w|9@n zUMqT8I6}U=QXpFKlk#ob+eHZvAweNNtDLm^?YlaXtbXgZg9i&Mm^shZxXv;s143c# zI@kZ&sn9pnybmYjyEvR_{^XIR(IJ45BcqFD=g!FubSg$Np3yE}P$a8~ z254frL9YHb8F2GyExJ(IF@GQLbDYes-NVJknE%~CE00&~&MqQG!nmr$bCk~caa9XZ zfKXv3NV^jH=3az?&34N?4L=ffbx{o1PEG9%MudoJo?p{!j-5mFKHK;OL2&eCPQenfTsS?_~I{`oVV{a=?n1?;~4f}(&#<)ga_BZdt>bvB;NG&$CGsAO$VRM|?il!A%m z#6EgD!-x2&;cNNB5(5f8Q)~30ri@}8oSFxh{EU?bs2p_6%@l*j2bmghVx>dc|hap1)CW_gxLL9fVY3T=3>Fj5gfd4b>dVH6qw1ruS}1@)^E#f3)874vCe_?4sl@Ww}JK zB@U;KJLGQfin6@*`>UA0PSpviU*k>4i;sUW8(t*D)!*fo-%QaZkcU1bS{Lv+PvlDj zIcHVbi=&cC+1tNCW;y$+Z14Z-J@O1tblIkyfMT0Ho0{g3_w_`D-s-LF(5MYZhAzfl7YEfW(8B>t1p&^ zD7R3^xE|MRoQ)gAtWuN`N7XbWxd&uKslVv004Y9B3@w5GKgxC2wJx2Ol|0C5SKz)V z)Nuo6q8|46-=3j;RJ@PfqkUR-rKLGD^s&TZ(bTp5O_OtZ+f*t4y)rs;e83f5xCd`D zt0SbokCA3oib3u$a@vhg+OV$2xC$kt%(R$ICX<5hiTqRQ#>k0Gf_K9sn#8k(DQB*> zU+YI2N1&9 zC$BeF-Kl+(pK*zjENOd0{KM0pN>Lp>Dlr4G<~B&GEVaeQm+VN=FrHQR+4_nx{{h8q zb*-7Dosxd7dsyojZ!r|4&jLAhNNzBqv5MjvBC|cag9Y_Y(iV~u9%ue5Cjk+S$|%J? z#y*ryMKQ=JiSl-l9s{uH;hkLBWHR z;S6(a_?(pJ=A|nVy{o+YaGV2J;nAqUh3ZB*UDS;O0p053*yQVzl9U~ct6=lZ3_!MO zjt6jz21{+WGeNJtc{qv^fMWRa2Un#me-$RuVcuoCQScrm(Kp8PuBfPmB}1eU^M`IR z@m>q(Juc@m8eL{6?z{Fnyx)=5&=sdQu~4eUFzNhD>sH8X4{;|Rfu|w7t_hP6IfGs)+>Wj`QPdv8)|+Lp96lkc$WZmzg@$T+qL5k=`C#j9 z_vom|eSNhBuJ@}8R#C8EHE%`2-5sV^ zA8iu~)!p`LC8Ww|F+IlbHmB|)6pvOK4Jdl;KZ(o!_$VdLm*5P8kX*gRgqjjN378C( zNC6Ryz3ZyY$VR0UGFl#yj7$+FI74E?yk9Q8g2jL z=xz7IGi@nb!t9&4cTGM5@sM&B^)>vBl z;TPYLZu?A}iN$<~J*cgyS+Isy3KM*@!z=!ymQq$=%_B#Ya>txzMsZHK?QO{-%VH0} zFbA=welDT)WMON@547Cr;;a!V=%0gk*$D$Yy%$PH#nO6|Cerw~$_HyLU9-^~{ExrLQ(UdRjKL z_yItzQuh9wS(2@vn9DdT#25Z&Ejj7n(N19f)oo@2?b*;iR@4;mej^jI{)v1TAmho^ z5~3&vG&xir;=bjgK}uCK<0>=VU9X;rFaOg>o&(ZDl~_2s#r#PpfwQxwEbxl5%RHyn zI#gFu@DGB!xf#JfS5;YJfbUjRNaBldxI`2}$Q&b*PG21OJ)kKI4G#Mqu(P=fa+oM4 zPht^~#H{kWhM5SFuOa9YQNsWnt_A2ff_#}_o_xqR3i2EgKj$W~I#%8b>rH7;zJo)HSJe<~_S!O)f8I#Pu8J z0_03wN2ah@4Z6(;J$4FP-kAAL5vKoL=3ON-VImL;a7MwmqcYBn^hT82M<-yrNo`DJ z&a+bAs&2R~@=rufm_;HVq~L@jhN?cSGUooq6q6#9y;EcJT|kP{*t%(0EYQRaXJ3q= zy+Ydwgj?SAq69R>3*7^xFs4Ke3%p09aGX5HGc2Ibf^fy!#0~Uys5O2sjaZKU9gOP9 zRkWh1r-M7;?NJr=jVpe=wZQ)9@W}=>Eg0v(@3u|dH>_n%yc}NmK9Tl)nD!ye{wE}o zN=5yRJ%$D)Q|jWKP4AQXN?G@lv>LnQFy5$o=6?TvT$OI6+Vp#LWx~2oMCdg;|Ho14ePasxaGf+5DcB zK`a@pf_z09pEfz1gGQvQDGI=tm$EM8C1JG6EuS@5q>DJB2O{&{)SY6??OJj0=&rWdE@+^mbJJOy?3UJQ{JNm?$>Czb^LPW>j)U9TTcPzSzCX=fFPS(?hCRxO+^! zDyGkG94+ldN@9c#e7SpOi+d43nSe zUc=H!t!Ff~BT=N(YVHg+Ig-997qVv?Ns7my4=|+wC|3ZcTen>%%Gm|LGg9I(Zp{R+ zy4b~V1hpPO4Tq(Mqo{Se3k%WEFqm}|`G66oDk->I3hFgxe8G_fskQPXtMmV%Y8K2Q zWCBz&XpaX?B>3z7<9%%hZoFCJQO%(I$dGKL(rzxW;L9Wfg2fXW>Cy)am+w7OCl5|N zm@pltPwg_FpgmdXl`4Mwrh6=_WXojF8&`TKz?>G?`-GW9t%31Em$ot34Tn{a7r&Ik zbu#0B&Fp#0uCT#tRiHz$FlOj(|28QpPdZ*M=*-q&i`b+|HHjs_v(S1zVvra~LU3jU zD4p3GxE-OpY090dYJ&+YWkyAUJ62VM0 zrp)1-STMgE@`GTp((Tni-@d%T#1FGoNMu{y+@OR5h4*MPXv<#VX6x3|q6ep$1b$su z@MxGkXz~p8$%RaN{}19-+4e6wfu=_qSe!wOl3Evo1%5J@av{e}PlvJXk%f>qZ@+sn zFJ*6+{jf|dKJz*59dEeHx(G2UAik)OnkWweECdhJtV$OVU@@V15s`_?zpck;T!N1p zP)AJmq^lA16&=8f5vXtfv*o~APjsVlX;!kDkQ`3bMG%0O|2ftXMo<whG(G7-ssboV+18ZX=!rbZVIm}q7kv@3tc7_!ZsK)ZttqE5BslBIF zVqTA@Rp}?-MfirYgbV&l+b@I)ttU_x9`uZRmsufm$IDzsz|kk*m6Sh6#-*Gb4LeU@ zy;EuuE;We~utsobhkCSyNSrJu6m#%;7`-!1zW`RfT1tdck-AO!_(U}}VkWrd8%-U_ zap2>9fY+s^&yoQKv~3zVv= zz7J3PLL2L%D;|jO6y@1_pIWFa8(IOCR$$@n)_p4c!C#SSr))s}PWimPu!lBtwbUm} zU6$YWrf(^8S8czztP_2YlbX2+t#^_!=~Iy)ckVnfP%pIoDd6(Me5%}?tA*Ue=W7ZO zQYK2WYBAY~(%WJUR4ITPVlb((i^GF)RX7-hq7d2$YEV8xa8{LPssH^rO1KhPf2EYL zYI;z!h~VHgh3O5J&DLv6_~Zp1ZzJ<3O4Y>wR$+XHY2J$e#>#LWLdMA8gG{l-8yew= z+BO}}JWTRBhD_~#*1+YJsqeKmExKoFl0^s zpLG6uIA}%RDq?djt;-}>=2HhAEd9&9sEc)_&N|rLx48XA9&S2NV>2ve`v^>OG>UQ| zy-@ns696P3gha&J4K#_PK^(NAf6=Q?gOn92>@o%e)x4TZ&l-KE@B>7-P zLv70n3leFvk8=KIvP3Jeyfn^UiP+&m0w44Y$MsEWY)>8j!+mq4Hs52@Y5RJV@>9LJ zrpxzN&0nt;pRW7hxA=4N4sV}NQpK<|#bo`w*z$Q>FQ#7pS~9-%eDy|i{RRUZ7Aqex zeCbnGk%KzpBl{!5&X(No_r%OgkVEz`05rm7axU4yzAis#SZJFn<<#M9>ag!h=sP7{ z3!_HY=e+IZ8mItLsygQ348d}Rnm3*653Csri1D_Zv%mwhrX!U6Ie)Cl4@0TgPZ2AI z#0z;aF82v%;~&#=D~+gl(4|x3Lw+`XU7AqCXRZ@Zd2OCcT}YXDIkyYN?qP7I?*qTz zi#8W7ioTe}euyb;RqD8F9dlWx4V?I(|qMUa``letEBe&Q^ zC_Z2sITb(QrzXBGiS7=FxofK;>Hl}W?EBjJzY<1&eDG;G3Bu0C29M%Kh`UkWaEhqDC{7E0T6Z&+O-+jcj87rpI>_T z`0G2vDHeAE{{L*bI|O;Hai1IXpJVBde|9b@?^+fbpIl~-v# zG4F0%2%CRl!6`y`?s!-A#cK$~YR9D|zm4^#zwtWEsh1gwXph$Y+IIdCWkldIgTQ<_ zaR%@m!I2mKO_Dtv)Q`R+2ANYj-_iWzJ#l!?jN$T-{p__yE(!6^uTaKMq=%R1f4R~h zwvnOQ@cM24<+~e>h1S|fA7Hse?_*ebruIn8p#^c~o~wEr0xvra9Gk;seYdDB#HXUjd?F&Q zfxqiO&ZDt#vNg=-@Qbw9aB{UNjEE7bw1mWVv+|p|rW(6jV#~^AF89LIygX8KG%eo6 z28I@_?9maB;NX8CGfJluz(=xw}i-qA@=O@;Y+-{=lrUo-7R;5%Y1Ig-#vPDTjBJ+5^wB%LUeHcKPIPK z_Rl}IrEAZ8a7_xu$%9Jirh0EmKYeVs(&12mNi|teH%@7LGA(jYORFfNpYh$nFc>Q&A(Mwd`5@zF4WDN)NsAGW-@dvQk%?g(F$z6bmU#?tdV| zyCqjgzpkD$cHP*6uc`LSR_V1SJf0NvYoJW93I?K`t)1J-Pp~7)YkLhHM)`9qPP{1E z{@=Gq>vthbe=Xa&sKd8b<+>~gIQ$F+uY&(NES-PRbfkBS-^FOv(okKO^$$!J8(3(< zZM);KMQU(Ms;2jdBke?J@jQN)!PWAm?pP`_TR+@lbd9;dz5+Q!{V8U8qH0D9zsx|3 z8g0qJ5kggDC&l|!KZ25{$}^?fpZED^Za}co1EbwWL(>*wy{<=;C;#h$LASRZZE^eiaJN91_48)g%)^`2yABY2E6Cq zf(Co5D2dD`>Jho6gKM~8brjQT2%Gsh7+G*h!EZNq)UdSZ4>2W{m*aPFq1MSDRXLJ> z=|1tm$AbYyT~^eKIA$`>q(9Edefy(IuS_XbCXV#v)OxUXp+;A8!s(Qal_@`4*)MbO zB=9jx*N##ARZ#L;L>}@}kB|bzpglF6M7MhHl{Iemn@6_OJw;?JJ)j{o20Fyd?Dw{w zrjg*Q^UMasTaTI>Q_gbk=FJEI({hvimTPxO3OE0%ZI3g4Ohnlc$4DXNLPmpXsNT+< z8mwxv6dGX^jNIa}f@s>nKbIBNO^*`2e-!hN!dJN_f_f>Wq%lpDww|*$t-WY-Ry+yJSzSSZJmM z?l|AmQC0wU%mH1StOWD#;7y#L)=Dwn%z)VPHJ(v=UIX`stJo#%X|;@f_guR5WbB7x)_GP zGirD=qm}a_Sc#V`!dueKU;g!E8_|DKN{Lg*!h<2EpFs)C*|t*n0yV!M)#p8mUa|3c z&K1CK^wrPje-D-?o~e zfefP+%5Iwzf6DDmoIK?&*0sX!32q1btLWJ`d!|(Kci(0&M%yM-(TcFyKM$>`ettU>C3ymaO7a5N`EY7RNWCr0ZTU3esQsQgqFTqV^?*@;kmi71o1hVE*0jE9;eqS2@ahN`<{(7 zSdOAp+xBHq>-GadPkI-1Tz{}$&%4Fu>?5UAd)#S#ksA6{jc3DBY;6wu zDrF=M388%YU4`@ODyQqh3tB!eIP% zk6uH6zw`m)FTLb!&s=5}`ZH1d#HyF{R>}ULBlcEVw5L-~Ntxjq6QYzwPN%HZuqAy4 z4NB~-g5oL$gOoVR7!)f0w?GXA=@<=4###;Y2dd9+W?n@|SjROC5R>7kvr*G=&}Tcw z=iZ4ZMGt5Bbd!a+Bbiy$JJI2R62x<|J>=0NvHkt|j}eyy13M!%7v#<#3vlnWRM-5|&{nssGGN-e;2V$9f0Oh%jmXti*i z)dTK`awzo^(~8S^DzO?B{YdR!JIS?Qm(Rp`KEZGNU#Ky2**S7*+dJALrvCZ`au7g5 z#YUe1ieID(yDJ^gu)}e?uk=G|wA3Ps)5>(!O(Xp&-pMiZxqU3QrVzgZ=ICZWg zlJ6Oy#hs`H^8reb4#(8_X0(1YR^uloYN{*K8oI$xuu+PRm?0*9i2uG89gh>=n9-Tr zXz*v}7=Z#RUgL=Y_xVcJU>U0cGbrs0t={Jh!G=!ja|frHFpkqCwQ48+@cVfCL-UVG z1%2K5w_g-YO{8O`3G)uaq4Kgr<9_DGcl7Ij=_APTaRn$PFON8KhpkM<^h+oITJzp< z;s=D4)5jc=THb87xG4Znj@#B5n@OZD=cI(~QqmSr(IGR5{AWX96&l5VIFe$4I$6W&d!`aks2O8F|eY@qc z%`3kBxy5w^dk>+Qg#Y!{F_$B8IZDcu*oX%}1v-Nx zz~&$w@divY)?gp;2-Y{VI2vaD7<-~OxvJzviiXM8Fb9>vzq5>mQiEkbU9hTCGo8=G=d)!6{_UT6nvx zG7YabjL{fkPVu;d=Tug-2kEQBWo-M`t7G8$?sq;GNj`)z#uh&7kkp*hj9no!)tm5K zxk4kkm10sSqHvCkS*kVG0d_w;z8G%XOmDpy@L5DiZ6hb@0xq&J=QlPsA?VbSxSlq0 zunxTJnCuKgyeevyctfa|iuM1r0KrijlW=b6<}vmr-0HdP)&72FeF*D{(!ipbc~e5w zf4{fhMniW+fVTjXqoZ^JTW7NRQ`SY1AMztR5?!(h%hs8joQM)J7q-UZ)1O#y5qw4| zh*_9#;bmVVfYLQop~lcp4EmtaR>*eaR&PGyRX?q8Ga;&%-rqtZR(FHOTve3RpVh)@ zA4`ZCGiPgRjlF1h3Tbzr*o{CI2V(r+saBrbU@zZKm-|_~^UZN7kJb1vI{pf%)TO|=(F@lT`Zm1^?oIfTq)G*$@HOU!c z4Y_7fjYPj0#wXS&&;PQNC$KJoR!D^W_Q#c1v` zvX@YdV?!%_#8f_P?Ts$ltKI>nQwRN-2h;&pYD*J)a+5kMZEoZmR^GY`hU_s$lN{6@ z@V77x&r8Qbgsek2<6g<}jK}$An^M2Rh7apcZMpomEl~!U1<04auk)~2u1qukJ?-w? zRd*i}IuqruztMe1(qf0wEOk|>U!Y)s(u;H>Toa!ulR+KX4~9;Njl!kOd$5SBqRB=W z9vVjQ-!wkgtkaOTTgjx38B8k~7`3@cZD7+(|IvaUofDWQ#@X1f%3jlPwy&Zu)4YE+ ziiemVTb9lYJC<`dZj^RSsJ5I7FASHO=jl-pkK*!_bi|GMQ)v<>F#Z>%1S!9YASOpc zsQ&h?>nY)t(Tx;&;qm3=f_SgAX}tyxuwZYof^&d0GPT)X*# z?g266gHorIn3ecqaxwc~^zgqrTE%SgCLH)DLb6C_b}0{&HO%o`))ZRpsdTeZlA3gB z8F!-&{Ox(8JaNO-3jPB8d-rVD-i#lz&-!OgMq&Lw?eNBOic(DaU%Pp;h0 z&Dt+*ZjS!F@Lj>Dx4XZt^%{w2sf7}Oh4mJvLO6lfeKuZZoia2lS&j{U7R#-#IX?cI z+zXp+$5|f&t$VSB;W6X02s9jxT^`M&t>xFo!}oQEJ-MQP#mpQr6T~rYyk`>yru{9D z7kc3>3i4nwA6YefXPOTeu6^3yU{<8t%TN9hr|io}i_CC{+>x+4+4P220t5}+AQb6F z&R$~0X&Cr)T5vP#L^FdbCC&ukgs^{0ScV7ib(Th)FDJyok=syj1$+HR7|I4}Jaxnb zZ$0ahDYsSWn-p`>(J=Kf!mz?ws0AzS1KR)L4og10r7n9<`@7wfbMqQ2@c5FnyDK*B z+14PxO18N`&cU>6F{NG2lyJ}5e210nMVF@*;$v=R@|3hhMFIadfONoLrpKE@0z2Y+hKUCq5Xxq}&q$n64 zr4BF_tabTw{|3~kK@hcPn0V=aZOY}ku{m}g59*H={P;O%7_l2J_+;atKX`io^TEcT zQ+MiO_ggTpC;mKPA@65_KC|H4dcrTL0hvpy(%s(HZ2KI8ZDE6Iz`#pGLzK`IqC#&n zT@UH+>C|twmv3mMA!0B9X8ur9F!l0)nz%_xz)&ETP{m1SZURWioMwY;t-utzy6@Fa zzr7EOE&k`|d6t|ZRCO>dykO-ncYq3>q!?qF1K@7TQR?|k)O_^G%A?hk{k!(t2YUU5 zZ;|7JOo&mZY@*fAOd%$mPKws-YZ!yh3c2m+z8Y<9+G*bffpy{~jQq_H{+M1X{P>uh zgMO(b2*q))4ijwtI$dL-t=#g)zGv^ zzK@iBPYi)Kjiy|g*gon9#<1Z{1}y1D5FiMt=oW)KF-Qh7jJZZuhf>P-;JlQ;?V@DU z?CcfmVxvrze3lZrk?pRI^_bn79lQ!vdC|hKz$>KB!1)lL-ySlBG?ez$M$`TUi*!~m za-!D$diC-<`X`;&JwL?^%EFM|Hi3m&YJ^pgpZ6Ux*3=fk5_{-ol z-DdbZ<`awtu_!V9=?r1+C@222XIz4pCc!=eSWDsm3QH zqbttD(QqKR@`%*le{oar4&IRJ!*~GdkIw)xRjk>2Ba39mW3smM`WfxaFxqx z?3ID#S(@f3qe~I@Hsy#Wgho~?f3U!fcoX;7#+2N9_%qASd%`|>%USnoj&-3w>)x;K z92oa54c+DSMX%{Lo4+b_C-6u>GMu*;h>&g~X=y|Xm!EvgLP#L1RxeX&l{o2>j*b3B zw|>Yq|$QkN-IBa&|Y6-u%IGOXQPuSX^@yPva*?iOcG&sckWymWAD28q(x7 zXO#j2X95@I@9b;tzORfN`qc2dNyXl}xgrO!&+r%i<39U=;Dja2Tz=FNtU!M%?x(opyEc!J2dWq;e>)Tl94^I90 z_<#Bk;fgg8Ne7n1oyvJ~YwlCdfxQ{$lEcYa0#SIPVUbqDC^RIYLxdVVU^46#N|L|R{4O_a zqg$%pQ!#zs>#fI}AEzJPx+kkQJ*9MC%^F#LVjmRbCZhzY5fLt0fxNhm? zdrt?jZ16fYBzb-4w5Ql_@izVNhV1^6X`##zQwffMM+PW(HSwvZ6ieik!wE2MJD=b? zrI7mP!j5fX834wIvs3~`)vwrkCR!FWp`=P4Wlmc9 zO)L$8LLPu!Xc0C!vfx$-u|^*$|BfBtNu4KoY+~h2>$|7r^ww!iuJiF+{WR`iSAkKb zxW_)yvm%fQL$OiGBv2$Z(Ct~grQ&(h+J1|We;)qQbund`8Nsr469C>!rKcSIishK4 zMrfy@USPvSzfI@C7EHCFg;6;z6QJ;TvOtu(TOd7MP0U6rg6@EnfUS+%Vlj)5LPb(aB0{nP#B-M|mo9&v5PvcH_@nTOBdvmuIae~N zkd^3gyZgySv>Y6qm>>oE3hmpT_pP*5NK?<581tg=_U#(7;H*2fSu5YBUnBxU_}K9n zniTG3NfmVBn9|89+7zgoc^aZ=vuId)~6SXG%vc`&pjjB zw%2X@>W^3R;ujo`s52vy;NN6;>G?=%a^f=`AfE5Td?L+)0pwKX*t)z z(;G{Ef}B|Z_bL;bH!5JZio#u)VA zSCr&;7qo5=kYLGS0yDFlt{zXzXr$%giVm{(;X!F3S~^H-z3lG;D> z5__c`hh4gEwcmQsk`)l@Sk*D_&*hb{KA84Xy}v2~N24EYWQe4+^kg9+Usaf(uw%x; z`@eJ{1gzbXxu7MhS^8qmic{y$&S<42M+E>p1>y$ zlz#|p(yz%Ca9xL$p=XYNnJIW1u$R6!xj1R%Ui7FTUPdqf?a{G4>1XGA8-*8`0Pd(P z$5RUCtH}Nea@xT;&C?0z52C1oJbYp+6hUv^pE7~aKPl*@lUvWdNs&1h#hl3en_qO{ z4g>oe$2dPPv24XvyL>J`iMt`&bA$ei={Q-i%(?O(lZWLmmOk<%q&mW&R1~@&ad_{m@bGUmH#pXdW!J>v=*)h$lIqu zI=1f51b^X_lQiDW;$*naauQy7H6)*+42cl5=t^s{5=UV5egFt}_oV9p~T(`+fo4miPbm>>;)3v#w?@A5)YTs_%6>pz#@nWoW za@F=e3#^mCQv`MHBUh?OVY+f6>TK@Ebd~_`G|T`KLXV1!LoHY$uqk=n<$cCO041;y zteU2+yek;c5kpku`v856Prq2s zML7j3+`ML|f=TJp%f<5{HDDyd?NHOOJR-ydhyVaE*O9eWp_29;p8iSl@a%#bVs#T+%V5 zR=VPSN`dOws(igTe~ zoUD5d*S{7^t1OOveQ)ym%htc&r`*b66pBb`2q{&0Dix+QQI(;5WdK(Al%l*h0@6FG z=ufgTefWvkB$AKc43J}$2bBtgNiF@O&TzGAqtyp|3DZ!H5FFtX?h?pPgu}x?&9?o^ zI?mPwlCnYX87c0*D_|m)`17H~%v~oaxfNcuXZ``?o7%SV&PQJ3oeu-eU`WM{UHm+@ zQ>up$l;o*6T+0L?Bi)GA4jmxG)u zMh@z@Hj#zQlo}!dV<_|w&+5jsO&!Do z|JAR0f9gN;1Beb1TAps%Ugd)*J;ch2cR_0|vXXv8&V%v4C!XP^1b3^ zndL7dz=G{Q3xZ+?G>255c{fabCqI`lQD-wI4ubJJRJ6rN;h%abuzzy%q{cNw0_7pp zN|m+h3&2O+olt9c0AgYf1uByGIm@5Iy)8jj;JeW}Q;VaI4ptTxj ztqTeV@H?2qLaD)|vO|zXOhySxF%eedE14_PndDYcc-%LrvQb@e-Qs#bfGtLq=t|_% zcrz5Ym{0PTkfds&zX+T!YFWSTQ0(K)CXHozleq?dyydUwcM#TM3nG);T{OjEjGgvH zAw!_iA8Pb(1kN7-?oX39d=d0WmSH|Wm|6i87_W2EmDiI2!+2&r2dwGK7@zsI2f)G& z-fddiICAMChwgCrX5u#KnWXx``Gw*~vtN%j-Z&Nf=jCJ6+eNh7<120l zAdn}5J-7CGGNCXvp%x+1A?i7`!Fe%p4Pp?{Vxa$1!aQkSvPIGc;5!7sxvRk0+)f$- z-8;ESB*FD`BHo0(O$Yv^kE@9wW366xUeU z1UdGqa<*pt)YnxUqVrZrm%OKcpJH?d1kKWJeL+e3)I}qQFCTKH6u^{JK55I;t?gD< zw$YS=`*(em&|MDJ_CUsx&kYL7A(XJA+2DrCKtGRwBDCWw{52fz`z$su0UU#XYh#<8 z4fhpFFnSg53#{FcVU(1lanr^aJ}Wg2tYyE?UGSll4tMKcv)ORGabRJ^rLOssrUDTJ z97Yu)2)_|W;okrixbhC`z`VPJm0F@ZML2!xj~Cjy2inkS?Q8vg z!VWEY_cSie($nH7#rB+mG|=BQYv|vrHY}ZUMRe}mTYzDevo!*tZrE-FuBQjUP+;dq zg?~sbh1Dg9X{*|ilp8U8Iq|;IDc)nixqj*4GyOOZ(F%iUsESXuyF$xDk|<1I&o?LD zTVi3!gIBa)(*G@gvuyX=Jx>ii(;vfzhqLmwB3mC!)4m!0F>5V)(YSugO8SdE>vfX# zxJC3+j+LJf>9?Dt`~lL2RMJizA>hm@Kxo5%pPJKz)#ahcbQ~fy|JeoRZ2Y~kjR2{^XZA@r?G%inDI1GyhL||K& zH;MTRSNqmrBfx;cN$G=0>C-~}_!gKD&m<3NeSblyPt>#rz{=9yliD}5+Z4k}n-t-e z@IR($e@q*`nKr~>+?uzPbY52^8KHgB88w|sL+-L(;VDPQUL77Qa=TJ`)889zNJJbW z6CB=fI0?w$0Co~C%GoT2yw&)vT0&4JRH(+ANxa<;wagm?3L~uN9V*x>DGYs;bZSE3 zTqPbd+jHRQ#Fpf)zhcfO@6NT^(}hfv$FyW?1vy@%2S;bfrWc18V#xn3F<(iF0|3w5 zr>l-HCa)xw|3k5YMNP>?kI6Ow&5Fi38USLN+Mbt(fd6ByCDnc!sCjG>JGx4J?7UOFAfn&=^3#ZpCR+4UQV{f;;ARP|fiVKDD$q(?mlfXC>Z>4?yteKR6 z;GPtNfAzfm{QGP^{hgfzD5(BFiq6C@rv3lpXPJGUnx>i7X`fOV?b@7a9f_ud5GGP1 z$w;@`wSCVlWu%xC%2G2GvWJl6GLsO6k+>o5MBI>Wh}(7D?(h8mgZVt>ob&m7UhmiI z`EshBe8BQD!>_<~50@9ect|^M+0ek8erHcyQwmKQM@bovOVWH#YF>RRg?3lJ+TlOC zS@dori+o^5zk+uA$E9IF0LaXh`xm{@8auMprjD~KCeq+%(>|5k8_sIDc8s5ATgJO&?i#Y{>Hj2P|jg{v&} zI$bqFU)2sgL`_Bl)h;ZoOE1vinKXY?G-fiE(OrEqMXU}rf$3^@)z}zM1wyUJXJe1? z&n1NN-1U|mehn=(-}l^li&9NwGBgTSpXq@}GDPaUY0J}-_W9-Yh3(yUtAZ1pbD=SK#gJ#ZD~0DTL!p+wgVIJg zi-Z-jJEi>(irQ6&1^&H%4ydN!CFTQMTYk0!&++oO>S05mVj@$Z)-w2>J;PExQs(Ru zcU9J!KqV~Uo%j2CEqy-~{zG>bEbR+ewP8;H$whfkWA*+B9A_qro1MZ2LF=Seb=AD)d$FURLQ{&F|K%OVtYFrZPq9$Dp9~jYNt<;@Y+s)fgUxTb0 zw{`B*X{a|y)f&ldXtD2^<<04smT2R%zc{;kj>BOWoVLDTl6p<|ov$?w8sXw7Ym9 zTY%=!g!~K?B82%O%ivLD51s^YZ2sBoY)aMw#8!xOc@ zbIjgH>#bT>@WtA?vfAZ5rGnG<4TuZF`n9v!d8f}mStQ}E)36f-e}7x7aAB8aW@%E{ zCue-?hj&KEJK@v6UIl2a0-IUMR+BE}n&dlMToxybZhhY!7a&mRa6844qosAL8tV$} z{0|-2wNK7l*O;6Df!WN6n1yBAJhC_bNe-PRIm$_EMCa<_pfgHx8dRqQK`u!zEp1e9 zgBsRvA8vftu9igX5LU{1NoQXdJUWQgm80sALgRE=!oJuG;hPS-)FkG#3Y1nwk7NW+ zxWyn|`ac_1KQSguAZ%C1j4i-#+!x%jN`f(msG}QmbW!8|YGyI>cekoe=1k4H+tx=z zus?I}0K5Bo)L#R#G+d4@!qwEYzlqb%2WMxNqXd zV1D?DgqXzu`R1P8Z-p+Cus!eSBSv_`Oh$S9o~-7wdV?&EwWDg-vBaC_a&sFFN5T^B zdaf2F`XUWb+?3juBRh^w`QE#Ys{k6yTy<^3_-|`lTu77*gpKwJsH9Fph)}7C3j{$x zm5vHI_y9!dV3v`KYeC$efD<_1n`$D<46_ufE8C13a=4hOpH`XAicQ+q1=!m?tcW+e zFn>8?pnug9qem19ZA7ONV_N5}C`Mt;xdS4Hs!O(}DN8qeigCd0xx9t%8*pr{2(^{n$aCdP z;-e(7^M01yT9vR)iNlzP1QI|Uu+cb;2`H(xN`N5pp7fJ#8edJKg)oF^Yfs(JS^$btLLR77C6C!!4P z8*U|r3>%2WK&JzV6^%=Od~E(~z$c!Brpiwm1_js2SKBJhA901FJwVtz>KbPVqT0LA%Z z*d1triwK9=)jCKM#}>e2^GHJm0s}yZ-FH!lBg63dt67R$X!yt=(cb5#kiagN;#3=t zmA|%{6p>Om%L%*^0KWmDO<$MKR_iK*fHNAtgMtt#Ihy_At^PoG1;0;_U|%b`ff$Xr zX{eZ1EO_fUhLQ?ARIBFge-?yup`R*UZ~s}4yYbmvrq&G4Ab#4)e`va>Qql^Sh~)fs z>`~YQ60ItS8JuuJfS@3@E@H{n`Qefoa*z;oD-_8mu}aYut9eGHI2G836M56$o3#;~ zNdZh3An*pG!C7Js9yw~nAYXtGVL}AK%55HIXqpEUS0O_R7-KTpV%`m^%rsBw2cF)r z8^2KxllN_YLMTBX8#xk)-A4#TDCjjCCA9wxgPo}WUlvmts0Tf19ZZd&)3yM4<^sQh zps=GrIN-G#-=XV*9_f@uC0AVT$(r#UacQrlj ziQ+-~AGLlCX)(~J?hQP+=E|v`KZz4{e1CeTdDvB<|05pQV4i1Q&zQJ6tc@PmFF`Wj z+y#ib1fFxD8L|udo7#OwX+I*`>YS~ImYv4*1_WlZ&`H8OHPE$h+_De=aoA8l$PDogNL7k0T72$oMB_#qyfY)?b4@6U>sd74 zh2>&=r8YVd29g5QAR+F=0SXxz zKt9O2#Q0$j#Y=L4FGb@767*VghU`hFKH-Em%Q``_f^AE>bSu<K>VY z@zi|!D(Q`XmW&qGGGi1$y;NIhO(s9+8Qxc@GQgL=ps1@Hb=9JJYIxH>)AiLLQ>Vl{ zh$-75qFb?GB#gAEF@P5ueio$}QF04PW{Dm9)iJGV&tY|{OfxkDng*z&+to98rMXz$ z7NAL(fO{ljK^GXo!*PG8J$ahMF7U!Dc(t4`m{sPfhb66WwBY#4E5~xa_@tG$ra>1@ zyLz1409q+TI2{0GXchtL1QwCOLkAS93y)MYOPrAyWehy(LdJ0_ny)U+Bnd)%0N%{cp&tRV`Ae{1vKoG_|N-)%ggVYSD#WEuWDA zVLrMTBW1CSm|#Ikpr(wkp^O2HG1Ve%**zKbTCZ$wgx9r}CbZxe8-uLEnCCvbpM#E* zswJ%fQ^;LcS9MF`@90>!1bcxY=%1pqD#3HglT#Fky;F?f^N;}J~yTU%(U!V7* z-H~)j_vP*>_vuwr&Mgk=b{bYX4lCWZ-ImvNkJ&Lyf8sye#sxX(61W=Rjm+FN5LOyzZnQOHZCNf?IR6wp+WW|35~ zs$WfQQO8WcF)gKHAxdaLr?W}}DWx8w9S;5O3@uK?f~SqCzC3d#YY%--1++bqiq5VI z_c_~wrr^ynvh^qk0H6@^W2rqbom_MV=mKBR;K&RWO|C+^fV9@qdlM>}c6ae{R2&A! zj^U;Wfl!mFy#$w=RmMf391=3@6tZdYd58`Q3UgG1#?t`6i%l@JY-_jDUGf$KpYlYDe= zq#NVZI^2Ain~k1>J3IvNVq>X`N3YAtvNaN-S1X3O!-cdf!zT_oWT^Zm;8dYT!ovx= z0O5J~C!FT2HDK2crwVY96X?8uKqv$CD{{yQ?Xl;WwRO>g_O5{YzBpPfEK9PWJdEo2Z?+n;yp3J)BdH4%Ptp z{)a!kXiyFVp{^Q_VK{S)kZHkh@=IGQ*%~3Nj?=1$VOKo#zj_Rxbg%$So)+T(x)fol z5v8ZDf16_>Qljyapa01Q3N!tM7^MV~;2iPw^QG{hMQ866j)t z&1|(qr}5Ln5&h6q7W^Iui%HQDq}^o2`IUD~v4enU3=*}&Qw8V=SCCN$LSfznJs=d6 zg_GwCQ_se%12R)RtcI06HIS!7iJJf?6Ix0W1vkka;Jl$QYjar6nw!**Nj!`6*UFm< zhCqz4aXK6K&-z*R@2G+aWx?h)QaZ>G8rQA7wRNJKchY(Ni-y7U1rFy^2`zw~0uq%x z2xwifbgXRCz298k{I*CBhDdOsh%@R2Ff&x1V`y8_af(buAJn}#f1S?P%zUG>BcXOO zNYthB#UOwO)ge;lPxDX%g2Gbt9e&C?$A#N!URK8g)k{KV#G~^A04wZdI|eY#pq&|H zwC{5e-C=e?Q%j(5K*J!-1IGZnS~RKzN0Fd3*)m77D*e5{Dohh!3w9M+pI<{*u29eN zS?tuV#91C+B2R+U7BE3hnCVUsTR?kZY)(rz>2a79^5imp`sI~R##hH%le$lHF15K& zUo@ah08-6&=Knh@(_!oh2X8c->%JPf^X|z8eMXJTt%2?(=ueNY{`<7~#^U7E2@NI& z6pfYjo?qVkwX8r}cJ7M$_Sw2WYJp?+Xw@bUbvtO^q6{hqd8}%?i8EZFrj;*(x>O<| zJcFdB$)><1OR-wJ4~}yRUcG+z_0hXSOJ&>bWzUz=sO6_2v@LL%EeR;g0LC9HXDx*& zEnvhjN)w);O<=h}Yno*Tm2|}l;Ib8fwdg|f6eYDCwUVhXw8E?&jGxlZ)yXcA%NI_r zgE=|~K?&i4CyV5SwC2U>*)%I^nC0FTTo}ArnIwkaqBCWdkd!Af0fsculb=4H)~^3 z=f({-G(T+sSSU{TdRrI0w7?VK&A}2{(&bP03z3~(;Sy=s5 zLeO9S{1{C6+{Dj-zHMGL`T|r;W{)8%`9d=)@T+a5$@9?bG z?*9@NfUt<`fTZOz$o2WtI1)b1g|KP^*`Q$c)Yd(jH!8>{um#u2^XBUozm>lPZIBtO zz9rErpDa9a)A;qS9pT_b9If+l)?`!n?epEPz^l728op1P^suk|BVdaMU$0JRFIxQwm|a$Spe*>^ej3tXXI3Y+x3~TTEG4r}Tm{=sG*tl@bh^x@ zUuk2$dzpTTi-HNnS*@9a7wT^=)Xd+tS`)*=HkvW}m)EZl7BG01uWWj#w#-^Td%>>k zKkQyK#Qw5?FCi};9-A{ZM*mZj((-lD&gHR_q?vXuEBohZ3FhX1y-DkzT(#S~?*7u) zGR1u%5e0{VE(>HUM}xBqUH(q!IQZsd-nU;wUSlymVzHXq0^0R1BOfye1UjbAH)|lVN)g1@od?i@?d7fK@9T zRjZ82!079^IITJio#ASka75c|fpfa;V4MGkU%v)i*mHADPIt6c^YR(>-w0mWqHrhw z@Rj+gIhu!Qe|D>*7mVt_=vF+&1eW$I(na{pKNOjv*q*A9Ol*yBS39(W!dh6^1%k3q z+!P-`{%K~xH>ChLMnxwVyc+qV^#w_Y=KSss@HSq6rd5@0T(@B1&iO!vL;AUA$HdDtm(o-e+V`1{s?RsyCH2+&T>eMRC4Kxu0ETP71AvzDGw}41c{x+>1#o?( z7NcQO1rs>ZbeR#wZ6(osMHYRN8fGygD=bD7PoU<-t(vQjg9zjezRNpFR%cST+m#-h zUe0N@qp`w-Nxk4hoTvagyKsG~YlOG8bn}>AIRAu;blBxv1@BB0pZ9pf+Y$R`*>`mw z>zjvJjgjkKHKzt@x|TRaZ=!uNsYy(yh!UeNGIpmH6`HCW0W%ByD2k5#aghCBGv%dX5fDnOVkwW|yNN`CB>axfhKe(2q!BXb&( zUzCS*Ejk&I)r>m}<5_z9(0tJy1c|lw3?+$VuR8Id{Z8J$ES5kFP$!F>9%oz07~TmX zdSp@D2|NVO&)yH&9~?qfIf%!ASf_(=4`bQkaaQnT)mh2^#25i$HW;Zx7I3BUHUK#i zMW95-Ry5VeL+UyYB8iqmq)4N&jL}?+kQPigFM_7yDkr=s?=yl`#uClfnS>LT$vT+)#lTo$)tmh;?9FRh?| zcGhj-A<-+Gpr@u+Yb}k8NpgEq)zRY^ND^36w*ixB9S+GNk$SfOz*wAp&p7K*oXe8V zSBbtDGr}=aC)(O_4tW^p=ZtqgYrwr$+_L&P0WGcje94xtOfryQz#*S5+p<{M>YFo&2!`b+ueGhF# zsS@wQXfK(cE>LRd&T>T@Zwqfhd1z_Sif+L&37aylS>sCLROr%@yf?Sy%@U%t<=rPM zL?($_G6^uA^7Abuk~v)&F^S-uAx@f# zt8R$1qBdL{Ls&cAT{~b;;MFZyG?F^1bzbxnQkn2>|GaJBf8C<_3qCm&kcbYCU&CHu zJ!7yfkj9PtGkw{wH=ka2I}~8=OxA3XHHJm)8bKyld}?pF6#8#_fRWp^bmyQkSwcJwlN8 zLqCwl8L9F(*91^2&_Q;^0`btCS5@2lNI+2uA#l$pB4J=FZz>5%Af#UoOCSN0eip3g zPO2p8--4-vcn)JP$+}Kk$@S=G+oBKQm4}*~Zc^+w4&A0NH2Mt2n4w82HP-*^A^(-x zk*EIZ^7dY_%VE9dcIV!$k!=rV8^%@lhC*jV*7a`L^!|}gEK7*|JDliQ^(JpDgHsu! zCls?vy=h`RU1o%A#ykje#3aIKqtE3UzkM#0h<+dMsHAncCP4n7(gJ08m)b*R?(OqDkRZLe8 z^IM@;`sWJRryVi>=)7*Qp`*^z?tsa!fZdS-!cWFy&e5$(YC4Lu9bQHhwrgh3d(CZZ z#k+Kxm87Mz2tOlA&qtM%Y=k&R(NPj0bHaQ!EX_@WQ`;}-2U_Lo%QylAPcpA2{MZMcIyCrm_-&kRlaaBl-RK97IYGWM02Ckq z0T>`@7~aN{4>$S)+o+K>WDyWYg@8;zA}e|k{wnrD7`pKBAiLM-MUWK|R|Vwi z?%#;Jm;QZdYUQ%#?=vqqd~558J=yZ4MnI8%>Er|M!Z<7M9ZL z)A!HJ{GzKTCGU0Yi<#c}vf)MS$3ZoV;lh5kwBh5^(apAzJ;^_4&aAEl-gRkm1xg!F ze$WD>1*7KinUi|LIGyf`$NX1-H7=&F2(kf|5M;(vprZ{|u5v)-mkIMZ9bOO7sV7|s zLBNZ70AU~v#?)P3QW|i%>`5w#W%t)Bqgv5@r{?4tcV);o-|$?_Rpzp*cyZ zjW*c#|xi*pI z-ZdS;a(UiAI)m}xdS$lV?LO%6CJ6A?4V9SP&cN?(9H0=i3NxabaR z8YbW_RvU*_o&n@H5o!iXZOxz!OrlH~LhM8A0sM}9%|$s5a)$!jjPyu-P%>7gi+2;g zaTsSPY~sru22%A;Q@c8iU4o3Ir8I8*Dqqq_(1Z7-Msm%qO+);KuYA^Wx6u15Lti@G z{O)b$FzM zHV!6Hm1&*6qv=3L(1Ctyvyz@}d{kNc;<2u8nvv-nw~;~V<~@D?trYzD*Z#;{gzKt)k=1B2~1kfyC@*=rR6=| z$}tiO8s<0LJQR}W7Cofayp7fLdMw%Sy5UaGM2Xme*1#MAaz|)3i(KRbDce*D)RE-! z`dxCF3zw^0altAt10eV+1(Hhd9|;QK*1}<-d><4ZWM&RvQS*b}&%AXm`27XgA^c&o zI#5gR6VHwWcTB^9ae(O3B5FHQPI!xNcSL>>05kz^1S9x%I>arf%f)Vz*f4o4F$i+& zLQ}P!<(ebU;+6KkF`OE-wVxE*^Li#=o631_Y7@-oI?&2jW^nNtuFT2zFZ5S5$gh6t z2vJMJ%Ga#4VNG$~k@+p9&t68IAUjKMExjdP#xi-I^s!M9k`ATj+$is?SBr(ZLQO6O}$>1A~EJTn81}fl) z3N?`ixpg85T1{d`XCf;(c@wFk^@zPk>ip~)ky8)VX+fe((>yH#9XM22m)?1@{t5z+ zYXE2Aa99s~cxh86#h^)$A9=4nvL&y@DaJi+GNwy@hb4j7=rE(U_Za1{409P;%{%!W z!B8yLMqAdf@zE=k(FxEMtEdA^auG_`?T%5Yy!9PtoTl_|I2kI9bq17(sGwQu ziin?QYVF(B1UBCYNryuRRJI)GYV-}`VRaZS=J@$%cw^Jl(p!&TnF8gpp)zbq9l#Hg z;}6r-@AKA%AdG=UVdJrkA11d>qs5VwdSW?R`$F5_72l@Eb5s9V=k&+smj=6dQf*g8 zKwiea`U9UNGk>wUcIwI9%dy3G>bncvjy&J1zpJOzy%S_xjFgu5#~2@v zxvrrJPjcm;?WPZ0h4|{{L)=MUzrJ3Sk$jRW{n+*%;q~l0jJ|DsFD%q}`Uk}pzrq%V z5ZY|KaWMi?M-WBDsaPV!K*J8A5RQsR>%e4p!sD30^jLSymN}k+C4z7YZw5_;4U+6P)EG%hg{XTPfNA|FAGOWxpYV_S1K`{Z2{ z4s`rfvvZ)}+23PmH@D=F5FN`^U(GTSi9f z@V5Hi9DQbvLTTH!&ap)7h=RiNXD~s}*7)Juj;ROxW?~ruo1%M%OH_)X7H?sq%T(gu zGhyB^FzG~V#aP`zS&2WjRQ{Vvbkgtk>w~UND!J`O$Do+|T$OE{5nmmUM@pud#S8ml z{CqoRY}$e2(QQSo#itgPUqBugh^-?{g2)(p_N$dko^0!Sl;|$w{?xZ}$;w1t-u&er zV()=1PCd7`ynjBHF<{e5jqx;7v$uRB} zKGp~+07o>|ucVA&L77H*@++fb?K|h%caB(hU!O@}%$@otc(7h$S+a-Wb~DcRM7-um zT;@*!vD*!cd^^a~=J!7wDIl59m5aIsr6T5?T>vr8w0ohtvjVoc{${LxJr~RU=6u>M zfEJvMpGrr@*g zJ$zv#wgp)Y8?DTU_9;N?G1AdTbfe8tjNdE)`=2@sdX$sJ0DTBx4T!}w1jj+Z5|kK( z2pS17s&A2B=a3h*JFY~+v(c*NS z^EN(zzjV(q_fyB3m&2}m-@VfYuO*FWjgqI^LmJSj^H7$kBXkBFhN_qRetztlFPr;k zPm+AuZzo&@DqFbNp+#w@`9zVdHSd{LKZ0i|raC-GzCS-?<*bmEp&i@ihHPI^?Eb?v z_diq4S|w3#B#KO2v&nh_c{5FHJ%pC@C>>i(bh9w*9?Aha91*eeK*KYP1}rvdrvZc? z#>FqdwkSt$YooW<&-f{!6$wa@b?Ca&W#_+o!Zk+gOawR3J7+zboKSF4(HXETI3cpD zH;i;}KR>ZOb@QAN7PINr%ZppQK4p~oPTpCH1V)3Ltv7yk44d8`+&#Z(>}_yQV<0y% zOp_Ak+NC_w4VLAW@Ve1st|yoO(Vlzd&OUv+OHjuQZH!y{h754!<( zzdC<7r2WXZ;AL0VANl!v>DI~be#y5 zQ3qSj#s*IOYy`Q8+R)bN;hw zanHQZS!{l6YhGx`=^M`L+jM2LL0$84bWZSZ-SlXHtk^`(M9BSs8>jbI<)$v@i7?6K z`qT~611D@|4M#TzYP!Wo{{cac#LG87fo4L;|gq zW!2p6;9Q(PeDssdq{2BkDl$=Y#l1QzBW!qB;H9daVO`SsPB5${5-VJT^ZHEc!?T=4 zVQf|0V=LQ5!)W^5eL<@qoKXJbL(dE=e0}TE{?L@Ai#LqFe^EWfcG;3me~-R>bbP_G z=;{A^gB6z4O0ry|QOT~(Lk!2rDp&rlf#GFMi_RCUd~&sJ{U7&gseX4vHwc`FVu%dI zm9yQvr%%2Yovn7cQA*+X^j|{$=@Ou-^yS3-@iTM|L+L2l9NHW zV4wHl9SoVmtD$7M^avs9v*ai)&d9n!jLUd`l;s;XsATyG3~{s#vS#k8tKZ*GIdj*A z>QHz;ZMEMo!=^L!jvwo~Pi-E1nv1#X;p9qVpTKqIP0p7_M&u~kqZ8X5{p{QmcHEa{x zi+sQqCsI}%)hW#2v))e#!VZ{y@+S96`Ct4q#-kS#?4NhAPi=g)kJ=^p^!dZ-;2}YY zDA;|QTUO*mNpez*aP<(^H}G|D_x9tf4)3dMx)io==ZsT_SNH|OE7XjFk`6{lZEY68 zbJ4XyLMZOc>uoPZUH35I#wg3B;m!-%<8hZ>2StpMva6hf@&uDAFwA1S{2@q%;s_aI z9otK9RU@h+L`kxh|KW?JvcSBxm&% zMLJfg0w6jXl|TA#C@GO9@=wAzf*z>|?@9!U1@V(^9VS{rkcuo35LU-jY2$rW?Clm{ zR`l|)y<_ge%JmrOYZvg3=)T*WGlscy%$&xemc?#9zro4Pmx|8o*-fpt*f)A@&Nxgf zn%!nw+53u7(y8`4`8+jhgZp09Lw`}Bgy($KEb97^#hROkb1C?=^B&DTyK7c`XlqsE zbi1{)N?TB3fLZx$tSmJ0`Rs-*bN4?Rso12k>(k5Cs`r0mF4!&x9A?z!2C4ZwTW$a( zGx`@|!RVv_Mm+`{|G$Q&B|gYi?9{?$q5gsgI3Kf;VNsH;Yn1|SDzt2&j{Qno7Tl4l zrPio8D1arnt4Ik!uZg~^$kfSYT>vB&fYZrIr0ij<${-Inmy(orqs<)q^3}QG5k3Z# zQ^o645i@DCLUs*z_TFr9u(Wt_4wblVUh>H);X8HI-L$oA`iYA4fH%8av#EYR9bK`$ z!Y!l~Ox)IS#*R5}soQ9Yl9rCbs=k&4@QwXvywAqy*K=LymjVf-V{;E4djKiMNb4lA z_U|TQHwr9hu6PQRx*D*}ORZciPv$_p6n3V_+s|Tx9oC46*UY6%U&^XLtf6UmOdI8p z&3VL&Cq!#;p{!WD)OI*fB)XADANGJD;F6Qz2GFw`y0^|?C|um1SLLCqAc1XUhO zUrX4|ix29UT-Vv@&#j$FOAHzErf-ODst;^rM;`79-~8BLw2sw+;j`*~PkkG(y07-u zE@JBAQ+v*zENux5@uV<=f_K_KZYl`bw|{h1U+#GW;=FmklAulB7tlw72^J_p^P0W( z`HgcShaQc@VaL6HO#(vNq3)8Cz=+*VSHe0Xvq*L|s7vWsXa*yIMutZh0N0Lby|iZ; z@0tMb@~5F`ty&IO=$}&f85InGtnKC-uHO{eIQ^fDjj~S1?75I)oz-t8KGwr}p)2fACKvP=g5`Fe-iQsUwvfH=M6+c`sJXY2|0 zNlJO$%obkY(zvRDXKQ4!_QyXatZquBI%eyS+H^gwKas!xdBwO+HB=}0J6-u)iWN-( z&9&GyCAlz*xyTYzGL@wq$xb{%m1Bl(QF(3Dg*dIvhao_RGnirpxQ^Fxbrd9Aa_&(_?Lu{j4o#^Y$N6Paa;5-ktyLVuA~5 zANYund->nWwZ5{q6p=nOI|q5NApxZ+P{0v`iIyV=54;^;sK>>uU#IYCPSF3)0S_^X z1lPGkGHQ_a5W~)_5qAZgv>gV#Cv|rsUV3smTh;b4;>zT?;(b%xDf7MoWKYtXToI}Z}ZO6&r0RJZA?o3ap>QsE&xoGV2C%9!4?3uz?r&;l_EBf1+8xKc16YL$n zoZVP)&9LUz)%JGgt}7Mq%&*OpD_C#1XLIg$jxKiivuiZxV~8c}&kZR~FJH9&sBXAA z>W8fT{nDkg4PCZx#o%wb$cK9yZAR;VPy3aP%2fC25M|ON9fVT0qi{%lm8%&bIspi~ z<+#p^2C+7>H1&&QZ+LwMa=7+BHg4JK*=`B(s};WxK1L~RMyNs<{{TL-?8mj=N~6!9 z0c%AttCDg@Q2p|QX&07=lvT=`YMrtUB2dHUjew= z@1~ya(0p?A>>B9wNB#lBpZ;e4HN&5sq=36!Et7eb(aNoy5PR z+xt%4u@7W^KN14m44Lq|NGPxb<>^9vHWK1{tQe4L4ARTijL67~0199rEH^Sx6wZ4v z*Hz%gwLlqqkRX7D)zs+#D^JNum^85S1+W~2!YyPVi}30+d!HV->q%T>ybuGjvQGu` z46v1wx5vO46>#cJ49s`jeAJtPoji;To?$xW1eMoC=4Ej@4bE2))^ijpy0`U2&WE!r ze!D#9Kb1Ts2?b^vP4!tIU;43z{?wRdCuki#v1aM#0yvZf|2qm91|jnMpJQ}^Wx2D` zx<4JDy-e4A&eVPOKW%??@N+)Ml#zFa`*gP@_ry$X-t?C6S93~%o2941ei}jv@MDe0 z)&gQ?ql~#pPnkhpbfOicNr>21w*_(9k8*02{0eF~xa%#^7+UtU()8$W-RQFnU+J6opN{iR7MS|JU3-(8|> z=FCy>{6jGaY{fQqt%WcGn6S|U1V9Ai{Y@6K#zL&u(;IPDDJL|%L8m$sx7NT(%Hm55 z0sKK?fSzn8Cf;4a8rH*hf(;aq^<9stXivjs!EXg%0kBb#;+EOs?PHl_LnoM;gTZYt}gL6`ZoaCvqUO$;+;?<-eU}AqQ_>+6JOF{MGs}WLm$s=nwL11B z0KbLdHVcy?=3GO`wMzPUQrY(hVcP<|4!!zq&LozEwSyMXq`M3*t zGKQUrq1Q9=W&g@$gdz)}$VkqYu`F*F`vP$}h4%~7Mv|Hjh!VZJcK}l@SZaOt9i`2O z(xOEt9M1avxyvf@9Vg|>xNNc-})6rL~8jocO z?Dbzx^`!kw?U5Xlo=qKmfA^wD#^4&*5*JpjV1uum(OUsPDB!DSeA92P2T1t>B8pN+ zEo<@yVQuAXt`a01Z0WkzZR>IReNu6w%2@7epK&vK^C zdBcgUGVyg_mJ;MS7*e)Uei<0<8mL}=<@55Jh3omjD{chv_nxsoHs~w|_O!4sl7d1h z-cr(`pHh4_a{MPZ-py~y0VtCQbNNoF0Jxrc8vo=2#!;jbve-@_tKNvuL07CsMz(n;l4*CONqbtr~tFW+RU-mE|A0^it z2^eB|gMOvnLazClP$G-j+lZ}XZrhyG1(l?KvH|7ExP*r{82S9Z0*9XEihmy052Qao z_L2LcW}R(lirR&T4%%)pu{DO}TT*wpUy#Oz^3M-S7u2rzE4H@^PW59t`cK`_eB#Tu zHFm^RDYuBE#ebz(jfQkq^zk0G!?Qt^xXO+qD4A}GMH5c{u>c}dW z8v9T_GGWURvk&)nl^=;ZZ$((bPkU3HdUH_f_;`8l^c5?4MWqx{^2LzoJim>tg9((- zTe%liCEeW7i$9L1Z0V-+46`LRYu5QwdjD`+6%r~3Xvc;aa--WjOLYc@iV^&ssmJT~ z!ZG=obqWThr(`-x-6V)O)#@F#%8gpLBdW1I3CX}cq<@ZOZ~vK}g8-9OD0Hn9r?ZwQ z`>wa%M_;&s*SwY#&^La&kiuEV@4vq^ZtGc3)|`mx>khi`f_VHIaZQz>#&NL5Cd)AY zlHFB9l-v84BZ2Gf=dEAGU$K5V|I8q_SJO1)_jW{Eq1Bv4ZsU0)8!WgS3o!{L&VhIRb-^db z0!_%+c?OKA^CuKCCorsL*0bi=zimGkVedO{&cYqCeUsAM@1K|VALiSXT{5LUQeLoq zJmsW;Z`2Qc)9h4Dk%ruzVN;h`QX4`!Bpt@WulENxc~bU-Y;HzEaLP41-nngGo4b8# zc;o9Us3knZQcEyV!v#b_7IVCkN*QDnS>Ub~>L`i%9wmz`)kQI&g8Rhwma9wDu9S5%0n&_nOr({B(9aNt%Z%|_S{$w?~K){xORs;I8_u@N-jRA4D?SSIy zuLZEFnimt^Y`eE@=0=`$@gcUUWYIt8>GRg-K0Unl>iX4J)~{+W+izfYR)pfbiBU;2 zQl7+3)RJHQ#)t+eEof-l&YrUE;LGis4ovDD>dYrZn^ukfg)v@;ESRLRkXhAK_pJHC zQ=kHfTAjrz(z9C%sV@h(;d80^25wvyv8_P(q$k zFmNDzzmghh3W-Kf7=#BonJC= zlPzYmK=#t^=#2-;*8JUHbjQVPB4x0T3~q^T`Guh4*4Z^BHkmgwW0af& zC>d0;yk+e^vb*G)DYOmJAvLx~Mvq?dID6-Xw`d|?d=EuUYT*h=DdBI$&OI?o?OtJkkkdPZ2$Y?#fbn8B!=^4zj~(kK&N_mqrF%D zcu3Opy#3xjw5)Gbo8}5kWFikJt_0Vtma9e>SZ+Rtua?+8L+QrIt{S_DpM7MGw3pUepYEkVCMf(bk<=_uKyoj_h9rW9o;ZG z#nCB_MnD`P4QHf;;zo~wgQPBzYpTpBbDn+r9P)?4VKNS z-3Mm~4d~9^O*6d(XM!pE42-6OYG4h>(4wxHzb1=BOL5b)-#shbu`fULG%z^Hsy9WPd zcvdP`7>T-w1RB|rf%yj=1xwUivc1yIpXW9}2!QTnIF3)gqjScdV1qC@!b)-Kp6wI? zbu`8Tu8K0VQjhh1dg6&dT5+ozl$CL_8}^@M_)wBI2x}xS8W&%Dsz9Y~_`YN5!;$29 z5vSmSuDp98d&$|)$DbFLhL62`*1iCd;IBw~+k5dNqNc-0Af(g1sqF}0l%!UK#KT1u z*ysr^$6XvY@fZ|ojiJNyNIOeMOYQd@?pOo|Gx3$hZF8x4U)sP3%Ct2-r4X~83_khC z({S}ayu}+Z_W!k~ag95bT-%Bg8o0ZCr`6G1pSuQzNB^%9ad2!(Ef=9sCOk@)?E5{Z zct_j2Z*W;oR&hJxD7rr|u75#NE(teiJNA6%NQbmYx9+(svOsJ#eva>~zr`-VH`~&Y z0jgkma!G_)wJw_QS^zw)M*iuRz6|KosK3Lhw+L@S8PS~K;bIU$3voHgJ*=?UzB5D$ zi4*4>3k#SPS|Bq)(KZ;i7D;l1HwU7T(3L-E0978uN_AT(EPWUi?WQjB%iT^jFJie6jPL zz#nWAV;uebUzKZPe?&?DNs}GVG9p0#<&q{2jBvt)X?K_^8%Pd`1hw8Rt1gTHwvra} zMW3vPtMkO~*c1ZlmK(jD`CU*9r9dQrW^YC=vc=eX&6U|>RlZh)mb3|6}&*z9(PJcDTT`2$vaRS(=U#yW<2FB+$(Yqqa8s@<<-G-+= zy^c5Vrvue%!MfC$halr(=h_1gukxOIM>1T z7~pwZb?vDDMb}%=Pmd=gkvkO)uX_GPt#R57B#iU@lw8)|pRbBf0AqK7b-J$+zQj2d zizLRA#x|5cfPQRuLE0z%RT49g5V4paj2WSUSB6S}=?c!6_M0ByVpk5j z{+Wg4?4zu7O^!<;icSF9s#@YtgJZ_Tj-k*0cyKC8AGK5F z*C8X@f}Wf=Hl6yn%dxtzG)Dh*T{LbFta}6`orp$}DYi>3fP%z+YQ73K%{f2L71juH z3uvUS90k}DK5!uF&NCq%Y#aGhDqSdkMksh<0DYDQXCB!*%w>q_DlTkZ8+bHbNY?X> zdsr5mF&A7=Qf(C5Rv^a%zy;}Xu~t5WK;hkjkpZ9&fs{6hW1Q~5%T`}kS_{m7Fl2rJ zB(9QR2K6(-5j`LYCY!G2R|r%V6qhP$VeW+Z;d3J@`%Sgr|7wUXX!r^C2UdYMoU0ZO z05$q9;i=(=LOWzEyRS%F154arr0;AjCP-9aKwUgdKlhKXsns<#vRew`0+}cQFjH*p zzAQwqw}Z!1;Z{5v%&3?Sr}Dr$&LG6VZ{5_JZ$geU$0OS(7Sv)rskR&OvTOST>PvwS zcmK=?Mr^awmxx7{#IW!L0Es4vqd1+{F*=iv-!2jqux6(%FzSkHY`LS+7PsM z3v@wBARt0*YX<%Q4pLYfRNnazB5(_YyCIM-V+YB`Qw3_M3+P}UyDy#N7UA{MFv3HR zPk}wtV#(b6l7DuEO7RN@0;wVtKmrk*Xb@}T*Uv1kN5e^gYz)ECU7z-nz_RT*V_B$a z|DWiTF<5$qSEQZ4?j~Muqf~5D(pLK4zLWoa3Btv>yZ0FOos<55BrWjOTZH?WAxWjD zUL0k7|#0TG~4uMe43!+~wv`5SuCsW&a@H_%)l#E>?B8~|hfmFBz z2>>P=p~z?FOdS``0Or+>gc>=n4+F$`KyN~YadG-(VwOa@4B zf#oDr2d-FtyVC8t92!9iSTLpTC0$1mQk2Ap? zTxyU*xv*N+DnH{K59r24yfzMLC5NOsB67Hh46XoSuoxwr`hS5M_hcJ6YK)A1#}O(& zpq#Rxo&pjDSUGS>Duxo^Um;!iW+cUr0*fTV^lvlEI0z;WA!v+e0FZP-<$(?E)`_kV7j@Km9uUg~p249;Sy+Sc3;SJ(V@&Kj zf>1jRevan%u*L_*jh6eR@vr)krKq#aD*$T+fGo9P%^ya`p$<3zq@!u-ECAW4fpDdr zYau~ZzaZ`qQAJclF~5T_DVa$_P`R*VVpGWoOg*znb35(^QXWCa8F*vAN-;uj2i`WJ zS%tOOGtdp}^ad8XXG>_Kp?Pz=WU>Hi25n{A+qx^5yS7O$owjw`^@gy(ofPaUpIXk( z{7@TppNvjf1|}+oym7dcrm9hZLlkn_3rJvY8oV8ceMmv?h)`D{_3xNMmUyhSxRa+4BGh4xlTL-*GPITrY#d<@0gKQxAd8 z_4!{Ke9!UV7qZF|PaxX3h`>Ey5Es6)d9n-F`@$Es&beYX;oE-IM{cL0T2&N5iure| z!dBo|9|RJMpaERu1_{N|z+B)VN_oRYykQ0hQ9w91o+e<^P@{ZAF$Z2u>RgQNOQ!U# zT<=qvj8psBr^N?pv8sG-7i)6VxIi_XOtdemS}PJe=hB?HcK&H>c;ixaj7-a`z=0lo z`W_p*(a?R72}wJ!uyHg{eUg4{Vite}oc!=wMw%sS#78_N^qh~qFv^9y@qk|(!4^z# z8MEiB^u_b!!5j{g&Mx<$3UtO`H$L<{>B<;jsFj(&s$TzUKXcy~5kdl#p3uDQPf_+a zA~=biaR84NFj+j|RbIn96LIwSa0y}L4jK``E^MJd#BivYUn8E`eO_*(@!rKllg{?! zru&Uelgq_r>L4Gu!GBE5yE~Bv9T=M*Sms2$uA^>X34;;Za;KpM-GhF)BmABt^o-iF zJ%laZ#R^*E1oq7I?-8;TYq-Hg$n1ni8=ZUFw!ynXhl_ZK!;X^>qqOpI^06QQEQkz0 zI3;-0<|oFWg&viS5j0v!*E<1F6E@`PZ=eMZCiHh5JA3RkN2t}*Z^|3~b*W8=uOSde z$zKJ0M@+wm$azzdrlmsv60sawuffiAPIOKY4$(?#s3*c6-oebX;N+#wFkaX|v%751Z4x~HllnX0AkSppN zH{3(EqEUM8*uz$Hoo|uNLB5mmh;|%mBOq#3ecn(k3ml5bVETY@G4{`=X4D>{Nr0DU zhc8kFK5;Ol(q6S>#2gcmO%-SipwqE1F$$uKg!*-G_kIlb{=uIccI-Iy6Spnd#hR3@ zlKPvaIv_+jx0H=QuioS$)S|M{FR5c%eruat>_b`?wKTp(PO8Q#!o~%Cxdq+Ox1*Y{ zj?`ZH=-Mhv3ow#D4Sz0kh5neWEYi>+BE^P}mQh>b8@B&uZU$Uh)R}yG8kNuO$zg`< zFZl9ndlFwycH^-AC(#Y^gK`95NP_0V-}V0nP_*t0A;=Hc{Znpmg!cjE`Rr*1Q!xwh zD3^-fC!rDB$Z$6B91&H)zE5TLDNXhjI70yPH`RZFUOgU#-q_M4fZX@DDzHoM(6?^= ztok}Wwnf42u(03IJBi?d5Y3ea(%wNo4cX5^PjS#UX+qkE)>WUP<4j>Lkt` zy&;dc@44tHLP8`nLi@XzQ^E7GbFw$?siQhXM9A>_EbOZ-#N8ZrD{f=q*wY~6O&Sfp z+${M1U*Epxi;0xyy~O zd+PP37=LrDMTb$=j1L_r?bEQ?k%+hqBx~!9#s~DE&^nzrjb5;m3{ff^p{J2U;d&|w{dcW#=6^lXYwnfhx>jKkahROX!#l|ZHt9{ z$Bri$A`Y|MbY-6%ws`hN=owrPx*`F6n9v7O2_pc(BJUB-t>0iG5e%pgGxowb;w*r- zh{7fDI4<67mW(2z=pA`@Zuf!&&Uc#JW}9CkqG%tn;e5MP%KGrnG69F>rR4Nq)xQV5 zX>Qy75vvjR`utV{kHv3|q&;7*4l+%&DS$!W3`ZURYZG%~FnRfPmwN-{_0DLozuw7y z-I(h>FP9)73|^VSCTP=#F|#E{;!NJrfan*CCpj-0jDyDQ&xxv(Fi+W@hl&~47mKit zW{K*AZPa@zIMPv>DDh&0UE4yG2+Dp*Z1>jc$l!3d&Pl_d^YAr1jzXt{*;s8e8X-O5 z@mQ#u8I?z;3HvOTT}ejP=$^dR_v_8eQmGVoUv|C1XJ1;2Q~5q;S?qFlt5E_%#=V-k zE2J2wIBvF^3@&+u(H=>z$^?5K+PwU^pT*oan4GnJK@-=5#^bnps{=?V-x8QC3qHM3h4 zCi}#tspY~DRUKV=)kPd)aVMY*OCpE1KNekiqS!O2{F2Qu%QS5%{t&sogofuO2kKb3ls+wTm*L5JcJKHZ< zcH>G@OYtv8&ncC6R~iFTw*U}jlfK!q%WOgtMyo_{Fhd;*;LxG-jlECekb0(oM|&vK z0R**Yp)vB;Cwp=IR4yoA#o&&?u;aG?r2=Ey{|?TUAId;XFa?e0@0?fbCV&v(*`6vU zIsT`pY*4F0_FczDZU&$t9NgeBS$u7E72t)rPhOVnL^_o zM;bDom*8ZUE7D$D5G~&pfD&xF=Jx=Y3l2Ilnw+uqFE*n+P{$X!Zg4m+FOU}QUZs?W z$JoGh1Q6Tsl%1^oMY#Edjc_^_Ot=auKHN4yEQp?m*HIvXnw3zYIY*0m5o}zso$PuK zRN-uqS%nKFBH_)J?pVWg27lg{0#D4tRu1E#{#F+^&Jg2rO z+KSj{ckkke#54usK3x1@%m31kkGtpi-kX08lB&-C35yH)-PoV(Ti(onbN<$++8wqh zHi!odu^_yJmr+-=@Pa0tAEZG88W0oA0x1ZXQal9+b0=IBQ73^>WI%`s=VJ70O2FCI zcSQFn;ylXZ7xp5`p5kHX^L$T=kvD?0MTEwfo*{rG z1cCAx9RNtE_<#o5Czd$z+b;0*m%0pIGC%f*dcSx1U$#AtZd(FCcveIHxSxLT=bf6U zUy!}B_{R#TcpYhIJ8~=8mvpS8Lk%QHF||gnL>FI?o~Vyq_>_$94K()as8?%noau1H zwYPrNo+rGEd3f}lLO;STK>o>_0vl~R*|muRwKpYjJEK`#5CKfVn-n|j+hAh@>|{T& z@x$L{a3wnVhn4MAZ10xh2EqfB;?;!GH$cK9!NJ{vOh*L>C!qziGP*xk@8K0w3MAYX z4iH2)DIjSs2X0Nw6Q&Tr(kvR_5FdaU@Dsg9n^zE`7gx$CE7Zq2+bzOHj@n*S)Rovg zaG((%{NnEV827(E_sQGXZ~GsSK}~L-=84{>;$vz;2Cm+NyFP!tu)q1~h^t5Q8=qe} zUbjAOI;tAJKKk{|RU>dD&TK+KXzcyz!4mm-Q4UXO!*>~%=bVpqVqG(N2gKzuiRQkh zQ#Kp3IH$AP3Ww@0BpHE)5AA}auJgh8-yFCdA1F-a;Yk?-^GW4fr3? z6ke{(6Ar|RW`lL-zfe0B0rf7l(~)Z{e*Ow50;b!^p$s6^pYV$B8xq|@vqb7aGjJng ziKhR3e_F}+iL)!;bWco+g872w9s-I~n=D9w{m|q}eX2e8G5Zv#>b=dhC_Mk*U_4r* z&`$Qr7DHS|M=0irtzIhv95TZKZ?AqBq&wRmvL7!L>D*<@`JIeX_O8!mv-3t$2Ah}bB|muAjk?Cq#r}Hgb3!Msoa~Yr zKwa(N?dGMaU2G~oc(*vx?6{H3YnQmW^{3>kUK%&GhEDFtG<~o;ydn6 zrEk11v#?o|yM#dbd6IUYhzBd}@K{7?Y%%!IUWVf}8`l!^+>J+e-_r~Z9RSNZz;>_Q zjypc6q+_DgpUZlkSv0-rnHCjs5`h--n0Ioa9!=njN%Kc#=!eE52SKS z)mh9>A)j&Vpuc|H&=dNp;Np_=rr7-3l%)P(iXs23*s(+}F8XjPTRdkB_3u@GiN6^O z8UxX8cK4hjwsHmBE)?fZLMxqjueE9`65rQKWreSYkJVg9$nFW3q}j@w;9w+3x8xcH zZn+4&5K*@GcLm^=5=k?)Q|sS-ms44v>APtW>Po*>SUDTFY|47$o=VSbwqnaPVBt)J^3P`ga(0Hd8EK-z-hbQ5o9b(ojF^hJ)>*lUEJH=r9 z?3;gQU*8|LzJe}%`x~bD!um7;hf-f$| z9Q;|W@4_@y>@jt)sQqnLYZhKxy(IL>6;`i49W_z<#e%dDL@Ia;gBhVTJ`Lv%a}egT zT^8(iijhVMY*`ks1N70*9|fU&)ucHC z9lHT~ao$YeK()c)GgHOiGmiee1I#S7lkT`>+@awBVmOo-whmo7#jFJ>Oh@%d*hX|G z5+*`CWsibKI-DmGohKIa=3U}7+i^v#`Vn-*65_1ljfsNj{lbI{y(b?rVi?T5`l{6* zmBO-sJD6MKKJtDknE+wTYiDp_yGl1b5zwqgm-f%b6!)7HjH9ZL1^fBzdnMSF z4(oSQvpvtPqf>1|on47#CJEk<7wd~DcH;-HEMye2ubinqD!1f%xc=w>1SD5)XZhsT z4-sVV$BIi;oilG`vY%WEeqCtoHjO%aty|N%b*r$z&`bkV$C#RSt)4(&ilW&CH;b2+ zuV~Hj^U3yik6VeZ0AfnHaST`W;3qlsZ4 z3I#-c44%)kA~Jfd93OP5S1HCUmEHtkK>wCHA_{8ej~^#{n;vsH*r?)gMHQ=V2)}c7 zB<6$GVE3EhV1t{AeOyP}&5QHo4ec=}fEB0L#OcRur;#VlcrohFP2Krpq4>hs)Rj-! zYxu4jSDRwDBuj-9VY!2awdXH9`=9ox(iitk;cZ8G(fyj-v z>GL_;^#c>k|LId$j35L7rRxo(sX)f+pq!oWp{K4cKUQANTmF-K`+y8lme2%y7XGU+ z4+9oiXWwzOm(3qp2ecd2cX4!XOyuXE(0fTrLR3fh6dqhHjOG-?ateZQfo(0C$HaDG z`X9zhK#zq79&~S_e|{;YKE*JU;r|PVXGRzD9yqMMSg@5ap~#vHcKJtW1qqG%Jo$3- z6m4U4)W`kgTHM*Sglcet(K!9bYNFK@jT8i#n(fP;-)rRGNp&vb`aWe*PS!nz zO?`oF`!ZvlOIn_8v%YReZ=Jl_jt{25)T;`!{}hTFT6cUlN53BpG1>^TLq>*g#HHk) zzu_~||BOO?JT6~ohlknoLOh-h>C7p7WY>(O=(f{~z5WeiEYRh!PfTm{rR(ho?{$e2 z!WOLZJpcDcoe(C#pQcMH_D3Et)XOKF&g$KNA1rtSee5LFcj0tjKDm=(l*GS;@*$^^ zPhvcZ&>m00&tYoUDM?$rjiA{F=SM7~o-q2pHIrEKzm@3V}A{8vv; zIY-y-e^m{2&Of}k5iJXQ(;w2M9OjMaqjq6$1j{38UUd16M`@h?BBEn zKL8%+11!R#(PCJPG!`W;DkLk0m61XS35f^`ON$DLNehb!i-?PhiOWhz$Vp2`ic8B% z$jHmcN=i!#i(*vqNJR+=B^e1NNm(UXSxq@PO=(FZ8CfD8rK}*Yr6jMZEM=@FW2lCf zms6CNRh5;oP?A;Dl2y`@*HV#IF_BZzI&5%ALHn?(k+PDKma>wzhKh=kmX@-%j+U01 zy1IdilA)Hep^lb?j+UvqnxVdqrJ;_6k(!3Ok)gJsnYNL!p@F5bp_PTPm64^jrMa<@ zg^s?}Aq6QH6KQvS2}gMqKY10ZysCqlmaVa&{Sjl=BgXbtmaa!E?M*DaEsywHn0n}G zBQOGH=>eum zUb6IVrrV{TS0ww89j7B=pN&Zph%ZC#xlecWyRT%3cv z-2=Rxot^!BU44T5e2#f~Mf$s@1Uf|o`9z)YPYd>sa&w6dKam|5lpYyyCWh=E5b5q0 z7abgNBH~Qk>CCVPriD=akCH6P)%E){O)to66#ga22-K@cFzC)O zC<&GBHn(3lb-Cc^za@YC(B0h7Vpn*S=4I9vW!5l~d#jRr>rT!$x~*Ix zuU?}*WKrMmo`_9lq-PYTpR10h76r%VmE`0N1-VBZ@S*saDAwGe6V4x zr?jfR^Ga9OXm97_Q1|%gMBl(jb4MSytejs_wK7z`IC1q?|G<~_#(N9HZ`ay~rWRWU zX2%vD+_<;*@Zrw#%Do3WfA75D|NZLCt9Rete);qD_rbph2j9N_`1bkzwn#b{X!jZZHjf)qG{a<^Cn>yjW=qA9c@m>x#r(gD)<})yN&z1s1+<)>@A@ zla{Z4jr8jYZe!DJ>onJOJX&J6ZBx-Tx_`CQe=#Wf&^(p#m^!KTVp&XqR)7H-%)=}2 z`XSR45k*0PNxGt2T{G9i?oQp|oFu$R*us6WDca1Ir~6WAk8k?Vp~i?6-R^~vh}T17 z%XMR?(ktJNsch^-M_Ff@H|t0>2NwP>xHb6i`>Mysm36Xe-Nd?&kCKEnM}Biv{5uY; z6)gWOr}w>>`gKI}dq$FuZj^0<;eYR4ri1sn+fC=)$H|_omS*ps%v7m&P zF8;EAnJ%b33G|rpk@K*&`7W1w%32029O^7S zKrRI*)fzcTrR^F!kz2>sU7kECAvMWc_MajdnVt< z>y-KFh{~NK=1&KLo!bkPZKSWr1&z2q+VJG1In|0eCYsk=6PSYEh@B+6DF<)u6umkb z!cBSwEY*|8fM`igB|r^O1?ssRA{Ov zk)bj8P0>YnA)~33xEI-YS?%>=P+#|0O-Ha@>rPz_NvE_XeNw0^PRv8uO+I~9^|ZYA z!x&)7h`Q;CPEWz|b=4jp0ctU~E}V%S+jdV>0>y~m8^@t1R zxY>Nr_G9CGaaTO-MuSa6dWZNqNMW>GSM=Uxt%Q*Usgzq2?NTj^PNoN?lTV8uyG>*J zmCo8_byfGdIxMu1SG~5CYaJ4S@@%E!2ff1{z>g}+6OB)l1G%8Kg8iK~6+i#WUT*J$ zB0}FvHmF`{I86I}1F5zlNRi|TXXeu>^q zSd}Yv%4qPi>Q&`OWy$7Aa@3i%jqu%Q8|nVu3cc;jJCnAxH@#hqpXCp<|L^!v<@e#Q zPkkTT+^Kz{X7uR`L?>_?b&f4@^LOWK-PEnw#>Q@=WaDFq7(6rdYF)kc=~dw8D)Ptu z(^d>4^~}RwvN7su?UnV5O4P@XVMSiZ4M@pyUb>#}?D z-)k4{9s0XkQB&XTi21#0=dq)jAOAZ4EAGjEoH;}GKIU}r8Y+OS+WBAD2?;=l^Qc_`0b(oe0r9#J*jn8DcUA7W*ih_mAD4*ErQ?&)=)dml z{x7z(V-IuN>E#THoUeR~qVJaK)~0x|XSn60&VrRiX4Sz6ptYlK$oYGzN^0P?>lk8L z?`xdoVCxX%xNDJn1f$fWq_8{TTrYP|801@QyBt4*d6vf?y)9-%IzLEBf2|Y{_UP1Cp9?%aGDWc zg8M!Yo95T+y%4BQZ(Fu~^l-Kn3XM=C7NYijal&8;wf z`6YWrOX01H7>#rPJJP7`*+)6@1iR_%>?ifD8R_B1)5`MdHR{xrD!Xzc-GBre(PK;q zE>L<+nI4ME*>{oZq$<6{eRj{-7Cv7m$-2eWI9Y7N%;h6yV>}FPi7kj@vQTH?%xFrJ zVrTrG(mj{i!e*kvC0xt{52H`ffuArJjvHJts~J4rd#~g`CGErH&NrX?Xl`i9tG>L= z-!2ZCJpJB%8p&dBCHazDeXmWmrLWvR0G^RMfV~@w2zsK3xuxM2UDWMp^zkw_E~^^1 z$SWz#JhekG=9D*=CE4aY(k^DSLdWU#vf$*^v8JEhkQnnEi!!E%2H zaEs!>l#k+IDK2l(lx0(vMuF%=gd#~Mxq|XD!DNdDc2J)U*X7b}eL7@9abTZkLy`tb zo9?_F;dS2pY2exwl9}`CFc~ng?q*HM_gV0P^sC2COD`OiUZOaA#AN;0w)@4!1K{o)q91+*<# z({zQRdAX7!EO3{}*UAd*JjIbfs7oRV8gYbuhvo%+sk5d5x@sS%=g{;@}D z1O}1|x6N)inJ-HJ=lb?jJ!~dWxc1!dWB>dXaZVd{xCB-E`3uA6u0WCws%x3K59EHS z1lIYCvJ1rSQ}N!xa8t+8#Uo_MikkVd2%h%pqzB-l6ANL)w$qsvxe#AtEmuypg$F#N zLG3wT(Z%N=ccEMdX37^`zk` zn?mg;`PazmCFkB9fCoP0ozbgO*KR?i*2^!^pnDGk4?%q&)LGZPFPoKpBCA~Z%MSG{ z<&frlGwV#9eQ$BbxARtm%Kb)urGHELbcJbjhjCwmbU7d=KZwpFK+E^!Ml~Rq07S7e zrGF%g;(+*Kx?D6}u0E5n1(4%HIw;t#DC{;rcbkpzpaCVRDFR$z;9}=thRk!&6C4XX|K4(j(Le6V0MQ-AM6LRGt_{bA%pac!*K!Z=v(C4VNl)vUJDRa~RQA!syS z(8{W-&#cfUfg%>YF8Eq~^|DoaAswl7hN^}PBQ<-IAY~@VG#igo?6V;caOH9n0Z@q51 zi3ecV03aD?%|WaK@`xE&J$%b92Sek6Ob#Xh{c5!)PPIBk$r-6B7y)WZft~u95``-m zr=l;B>cWYz(}220(pk@F7=axhSd2||iH_&_Y=~Ma8rvu?7*&x%&e3%XJ;9cg`hC;% zlGhcYfwpfW;h}&A8C$^({&`CiaesE)2^#chR9yh)?1Ue%#vcGB2(lc&C)>&0lbJWA zvnBSMRQD^J)vK<3&oYdyvVPOihA5e5R2v{mF7B7e;;RMd-0bTT2nT@Y!|&2+wkb7_ ziP$kSY!_VUE+393wLV`C5GHnG`2gd;0n3g+SthiQ)%rFQ{e*{|AR!t~oDU^ly2wHv zt4EIz<7El3aUS+RuFJvaHT#nsShT_6==Rf($E5HP0kq06Yc4pyp+b)Y3XiHiIqW3r z1rH@DRA`7)o`8pN;IPSXH(Y!q4Hm^k4Y;BQTvMIlASg9c->K2^SF*Wt_LJo%)uj@B z(-ijYQuZ0f(X75}-x(I>oh}w7#wV(^%sY&KmQG1!gQ!4v5^AlX>oFC(q0ypehP_LK z>t1gbW&?BsBQd1zU1=*}CP0i0amANkqG1J`nV@*pbOaJgf;mT*3yPX2sj$NU&y|RzD^2qqfYkCVw)7DT>lBj}xPu_F0e@!= zgrY_UOabBF0S;y`j|_pvNYEmCt$+tj*^F@)q;u+^w~L_l%(HWKP}*O90HvM`-a!w3a>qHVEa?iJN~xbzk5*J$Mi57{D`VRu#OxMP(*-X;#<_% zZwu&LI9CdN~cor-U(_~ua=5@dlWaoM30tEjsHeXSWaEz4}sBV7heIJ z{6{n{HyzHHwn1kLaREY<@gdU@Rb+1J+tPvkDubVlYyKrZeN~zo&Bx+KVYI4S(F+aV z!36)Y({W>t4OrbCtg{Ao6$ioa+V@Ka5V-D1r&~A@P>cvYn1{2cIYQ%nZP48y?^SbYpQ+>~C}43!iVf0wu7V_B@cA}5z8!cy&fx4^j__VU`%Zdf zwL*qE>;z!yAZ}_Djqc~6u0KItCk~JOUUDR5f+_LdRvlJXM*@%|0SyBNj{r;WI*z?v z*g{vEL{)7yEp+3v!>0j9kT)!ss{+h#z_`isB$RH(*t1``ySU^X0`{4VK+b{C=Vs#p zw-(>1ZIyQG0f2#wi9e683Mljlwn}HX-(eK0?A=m76#|!?IU*_%%$C^9)hV{nP2nBh zH5hEuUP<$#&>t#}}kp#%$Ow~qe4(=U)%9XpHoGi3E{D2f7E zfK5+F(_5udZr<)#{)T^Y0;tUd>(Wx3RyvLcua>_JB?F~!;Nvv#%EN3azHYlLNRJGJuz`cUoYJ@0Ns1rJ z*pqgyb)Sh`%RoC(q1EwduU8IdkWi7;3rib*k}uNQ&xRaY1qCQwSEz|BTTc8fy3>-h zP9eii@P|fw(MPpYA8&P^=B_sohS_}7bsp-+Hpp;1vqdd`_A#IEZ$QJw~tvFLSVbVV}0ysP4mNFx|A)rd-nBY_{WJC}Uq5(bG@N_|cBL`B(!E`Xe zPLGFOr$(Ag`*fYE3>Y_#FmBAnRBd2NbftmP$eJ6bO%O6TorAaoNPd~{@F5{jiU~9) zbP>i80ud*|i14z4k1)pF(xd_pQ6Q`PWgs`SKbZ%qCzA*;CmL^Uf``3;9w*2rC_0KA z+l{s@Jh#0pX_@oNVU|d#Z#tH)lwd^%ooauRC{wkr2Za^#PW6c9tX2V#OmUOy2x;G4#1dz zxWnq&C1LM!Fzys^1p&Lx2fw_TtX`5|&qA`Z(2m@2#LJH^z%73&v?k{OS(XDCr@g+l z0n3d}OYjo?Jec-J->WfBcXDpGfHB*yg@e+cy!%!k3cf0yyQkT@C)!ppHZV2CeEXgz zG|d-AYRIZ83XJnZhHmHqS{(Vr?a%MQc)^ZBAfcjw&u}n24#M6Uh^N3;0YaTLAfAIs zeuBXBvUMJ1^WB=396L;ZBukJO(#w@1R3OQ@%7K^X^|s@ucJ-qbNFEto&KKmUA;J^c zlTHA6HYAOUxXZ}@Rk zE*jW{LZUe6e)6SD6sQyV`6dCDU~p*Lcuns(Yx&@~?p{=Y4I$1K(!=YX(NmKg0WNy*j0%G@eqTHRGkZ7GPlf)B8Cgt#$Me8vNN_wK!i~WK zJhi|;6#ku2lj9pZn9be#%}Fi`ScH}{8=+BmZAOXPaziPCSyA}tE~ihcwR0bD3-9#7d7 zY9e4lTtg5c2sa;KKD! zZ=CrK3|GV8iD|`;ks*KFT3+5MEq$F*=r?fkTFA@2O^<9{{q!3dBEt@D-}Of_-p4w; z#T0{}GTIsEpNnW%b{%^9!%H*REK>eqNDsIz(1OQ8zA_Mm|7GggD-4}RmO_#0EB2g_ z48Df|fLXBHgh-B|%1lSVGK;Qz=!ydz7~K$2-&Zx)`$5?k6y3M?|4BfH)Swr=G99FQ3hQ|<2bOfmQa7d{F zY&!tAYjR0$zuffrk@u?;FN&2mC-pdH5*R%W?s?tGtHZjMr?@}Wtn|jB`=&n5n?0XC z8ErF%5SA$h*nwODoehz$gL6o&%!tCm9*g?ChM4-$_PoOf><$nrkdMQWWTXpfY(G#s z#bfGY?29V3c{4&@oekZ+)Pb2ikY$IlQW&TnIGNUu5Up2}r z2{~yyejp4%F`B%ITvdGA__IS@n4;T}g2r}nYhdMNjT`m*x4VSk!ruh#Y=k7@820Jn7l1Bbh?BGAruPvz@kSH8KEX3 z)W<%iS0Ks}Vxo8#v zD(BThX|IeP`za#0Qv5Mn7vHm{0Iq46Q3A^zs)Whqb3T?xcgzCSnmrR~w0v{NO4TOj zTb=w|^G4+DMlzoTb0$UXxq#rxL;?tozrS`nM)BENV^-*?iM6>NRkun$ zEgy0T?G_guJIevWL;(t=hV0_Fi@}MDnaF>e0&>qc-(GUCG@uu{H7lTHw`gGp_|GGk zPevN#z5&T%3e?x~7mZAkYH$P+Lp2aWg|8ag6!Im2An{%ho?@%msen_ppVN9&zo;m*2_+GgWY?&( z&<_Oc_9_cz0fOJy-;{VbIS`g62v$ob)zp|XoXVX6FiCwZHDWI1L4czWj3`#1K~u8` zC7}SZUJcWK$EF?m^#X_%BsIgj8HO7~ zDcD^Q_)_7pHf6?YgbR_mM~30#R>d|2(Qy8r5oy&X_QGphV(-DXeDl<9?1?xZoRL|+ z0v`Z3E2d+X7D30_jwPr-eDSXqXZPm&g+k0EGM)*B5!~FufeEP0JfKzQtO6!<%U8Pd zIXY#`7JjH$AW(foLknDa1H&^|#~DmqJh-Bhu%I5Ru&9?xIW;>B#J<|BJx#QA3O!Lc z?RC2t{evcm5G`QiS(oCO?H~{?I$&C{+$Mqm_&?upskTj%{C@r_nnHfS7l7^@mA+$+I%k9qCQc03ZU3G_4+O=M{#rHSA$7BAQ z$NY27e9oCO@6YS?eELjUh5C=8&dsNH6?Fue?ufZj@w>U?c2-Jkp4DP*HI=qhqu;5x z(n6%O_ilhCHi<0dc+~4pde$(Ns@?0W!5D;vXchiw0=MYxn^QmCwgQNY6(4G0Tl4;&4v+3Sr4 z0C9VbK}0g`kp7chM*Q>v*M5(*hd%3A=bw5Kg}tcGl0isQ9tLSyO3$MEcIvE)$6QuD zSnwxr|2$K|KYqDwn5y%N9QyS=B}uI?Y;VK54(fDV9|>IFsYWfP-JxG5J(hcgk|>@K zTFB1tf7+oHFo9am(9r5_%yo`$y|eeDtrM``zS^y zm4965M0JXJT72ni?1Li*4~UoYKyn-c<#+cxDa#D;pZgr*A9tERQ{Vb9=_M)`o&F~cB9(m`41K@jS6=QoxBn1kj)qr7DmkXwHO>R zO<8i}3Q(FT1V=Lv3h|x~JTs|-5Wjk2%R!szVMe$TV8010Ce2i{CPf8+y%Ou-W+}hh z&Cq9U-f!-D5=)%}=!voC6NX^mYtr%Xm&ORIVVJv=<OLfnCPKOhRQbBaZ8xNLN3L6@BLuwe!Hph%Ee3KvXIEb#-g`dT=)^WZ{0~}Cex#c6 zu%ZBOug0c{{8^!Lq_pAQern;lTn{;-d8WB$7L1|MiKI|1;C?wErr-50{x-1s&qBUy z%6;BU!##%>v!WDy)MGtt+$?qx+Y_RxgRwTVsH03lO)G45Y+15z`OqO{GEe+)<8OdW zwRp6q`@qsDYiHUbp@q$`kbbP~wDwQ*0dOp&9PI9`{2S@YSbZVFuC(7?`}$Aq=kg1g z&y|}jF}y;~Tc)q+ZCs+a#FZ#>mqI?D@ zE5}F8AjWODt%FX~W{h)2flf2>2vb2}1CIXli@|10fIZGu4)MKMu(Zi4tp}+^MW)tzIQ2v~rm3m9iT;A7!KU15GPN zO%XJ`RqJo%!7G+W($^_AKEJAy=$Q3GX^z3T6>9iq{5T{(b~69x1s^xIgz0f8p6R=s zCm!6^{v+DIj$Mya1HM`H3A2~UGl0W|0{g4zdg6hFO2oFz-B|W#n$Xlxh#0Gj(`6Fu zAaeh3K~NkvYPMK3hw9DGVRlp&z_fhzPAN-v-ZP*B*NuQg0Fr}?Xt=b(g8eF!>eIcZ z7p%(5k7vf=xpL(2@&nLLY>0jSLP2Z9j|K=i>%f!*<^0up+UK;pINkxN5+b1;$kIVi zUm9lRE>@(K(iwrAvr6tUwMHYAsmlYqFp%yS_FpNeA zSG+Fe&1kQb;8Hw-l0?_M7dFZkN+O8YRNHWaUl3^AGV^QEj}#<@189WoQ^^Hn*(D!2 zK5I6}7+|K)Nyu~HNnwGr90|l!oK_-1ZDtFI!M@(bZG)!lWHBX9>ch&{R<`q|pdSgy ze2J#71d!%Lz#kbTn>kESTmFosBWv(_xo-8RogQgz7-$eV+)JCiX&SPtkisMp*|P7m zfayr}_uV&-(k1D&K(q2DK{-rTN)kpoPoYpmCi^y*eg}r{cC%updCOkctjZ*t71~_1 zb0rW_LU&gH81R zX+D3yJt*on$<~3E){IXp1Bqos7k2ad=EB9zvgfNk(nSRe*~l&GNj4k7=W%`75NtEz zhb(c8zGvG8)aMaF8e$TU_lW~Y%p#2!+DruA&?OZicNl*_aNB%-ns>vUvFqjK9ckSd zd>evm1Gex(oo^N7%#r`;10IF~L#GkVDbSfijN_mMzk$cS*QPU(2YV3mj3iOQTYI?e zcnkG{asM4}tr2gnV>7%5g_RGHTWNGwu zUh$$hN?APKSVXGaS-@f<_(Jr*La5OKFjQg;Tk(suj~?@bTttJmqYkb=oVnp{=DRKBK^J~P^-AoT%`5y zp1UoMu4IqWkl*yEdt)!vSKGeEXMK*?wA$8rL}PCULnD{32#kD-95LCwGPri+(V1(< z$1AC4&tISL)iLlSvJs!Sf*?3Rdrp$jjKKqliBKA-L_iv_{|bgULc{0T5Mktg1p;X- zIC4;%3y6(Ke~P*$B{>iY331tLIN z(4pC`)%eCim^(cGx}53uzBp*ar)Z{#Ce(b-wpEj(9YCl5#PY~>fM!W2VlW?Tsd#b> zW~in+Aw(NRovztiAfyFfkfV}CaX);k(WM zymFmy4cqZrX~11vvQ>AeoCzo>@Th+f`!Fp0{b5piM8V= zF=oiI%tO^opgOL0vAxBu=ZLLP`g>f5-I5>JEHR&ZIQXLCVPBb|t+wowF1NiPNJzrN zz@vQBKE>4hwHsGe;m860%QU~WT((2?v^>(j2vzb?kiegkymNJc|*ma7HzDZe953+VF}6%Sy|c zH%|dsq|wFae~*_t{3o%=0t2Qe$D}Qtv#7n>`8VlO4S0zn3?p*E%FCBCVZyG*gPD;@ zl#z-Dp@0|{CADLaXY@J+0Rfo+1Yg*6Ek8|!wVZ=&XZxFi4mzIDTF&!c7oY+)d~E#uF~n={gFn(d z9c4i}+;Wr9>z+fZxy7z?h#maw+`mStQCDw&2w_T5VPgp5QMcgVgLe2~(QhXoV-Q*N z9^kcEY*Jp%RGOIU?tgbd!cr3HyWL){us0fsO&1ii1;ZZJ;7--Tr5HUp0MWQP$loM{d>ur|c1jx&4W-#KBwaFk(-#Ja}zH?4*X;Ef+a^H{JIx zs_H-8rh9=^*3^A<>WO-aIt2focsu}|+Vth_)n{IY_Dv6)H<-#gwY(vB;$gF1Xtw{A zg=j{q>zOHylN2Nlb`Ct8&|4NR!*13oED&C8c#{9_+}(#W^O#l0{b=O4hGtl-fFy1oYD&3UeTJ#bt)PjX$Cu^5ugZ_l$`{^*I@d{TXEPc)(l(lT1-<5_zC|Avf})gX zkLCS;b6;(1+L*J(;D0%<-rsSQZ=w=!cR2~3r%j>AXN08OaB@BGBe>vH5`+pd2 z7Kkm1>&$!egpo$>M!0d~A@-U$&VF}fb9^B#K5IT7;&wbpMJ%|8MOOa%AWT(B6j^l0 zArEJ@>~oypv(?f3SZ^qzk~Pc?>9*)^$fTc7hwQd*skT1$GJh<-_Sn<$kT6{2`FC5` z&*7D?#k|4|Tb4g(2}Vl`QH@q>(Po{SlB9uf8PQv2G8FeJ*k01P_T>TH8_!Nks;`-J zEC=?UC_cVGP=$)&g3(P>{pqCLXt_8ozvM$*1CiXra`lFcuGoKQmK1 zI}weUSm)>#MTOb(x0(lmY?l-H4uzjFZAd`oA-kivP?3(d0MQ%V@c#Sf_d%OKJo^w& ztU4^jcq!BT9<-(Gt~~n3Em=O?vIo?ir*X3J+KV=vdGq3y_Vh@B@@CP$fzzS;Tb4r8 z_g-uraA{6Ag6->GRd9E^w=WwA&00Fr#f;|@6br&FDO(s{v)jDNFT zcI1=Fkm3UJsa?3WW%>3ekgDp8wQ;*X24OVg*yU~Ct@eF)5Q(`mi5GLr+K|T$j$P(? z^acBEymxp#;kq&`Fm-ine&a{}eEHLPu2e{#`#1uzMpm0$0Uz zDV(aC&Wxp-!#2JTft0k!v(cr%(FmhoWHr@l21EbzO)?OGll~2tlioltpv0a4yfw*Zv9sVy9$EjHWZLR#jMkWufL&TW+sL@-;l(*Hw?ZfqC)mmd|&u-S^t` z<>z1NB|bvQN1~1sP^jZ0%F9A~aRePRZ+d#1T!|1V4DUi=IV{B+EeKC!Z>{emVuG}1 z1)umhDhn2ks}&%+A7|s!oW3IU21vHBpYgts8hpYJL%-l2lF-!0yy`qz&or4F%)FCHB0zp3xF?Re(oFX@3YuZZKsWVZlf+1Qd*KgV?~ z3#djeX%{2;;y@zrO73$Krb%1ol6!5>r|1o(hA!KRZ9h&m9@sWBd?}};*LtBA%R6Nx zYqH0`{9_iy*@fRD)-=mD8-8yuZO9aQqOxODa*7M zbO;IV^OLFm4?H5m4i9?_d2=*QPC6iVxX>paNNVqnR$}#}v$XZ$el2mr^N3tV@bMTW znxzO*`V1~kzqnB}0~@lUQ&g)@jzYDW6wF(TsobSxjE z#S`iZv=cl=|DfjqkU>z%R=kZ|_?cw0p=4`cmBu%OVyvXrnxSEC+{&pKyRJn?JXUndVmyX;J%es)-y>DU%WJ4(oYf6Y>GG6uf=3I&yaif#%g0iP`bYpbvax z*zrStTBl=2^KMptz==J2$=G{gC243^TIr%0^L`7eh9~{^m>wdc&C?-56)mkEY7YF zM~V}o<+xX#aYrsPy_I<@uS|hj?8Ku>W=;A+D< zI4oK&w3T)I^nXPXs_RhKMPh-UQe1yt>2bi^0>b`^bO8%Th1qpPDZ5U z9k7MtGy~jy^4z{GK+e%h*EwsEbvm)=w#9Tg8)uUxbrjZ1&ME_*^2&=s3nQgR2Yc{S zJ%EiBMBXQm>L3=C$LFyAkFGE-adXTMQ8iU$p%RzfMhN0rvYekJzADSJFbnfHRtuB7 zx;R=nzAxWFl52*x0La8-HBCJ7Cx<&fns#m{pCVde%%Bx7D9w&BZT(VvQgaYV zbI|>U6^kQxB~!wb{O$NT&=7$+#Oa#d+t-&{c+8@-GCB6uI=!C)IiMv|63%4duU>}q zt7e3RW!^p7X&@FFRGL(Ah!#&25Qzz6tdMT)z`_ELmO_lY@lFLz2re}NF!m!@cnsS1 zfNQ!cJm#Q*V*Rp^*X4)K>1n3}7Teo*!;l%=__u+aSa7)xC6`(CF+~vJL7j^$f*S9c zV$_#G`VH)SOX)qmST%I4R@m-DoFx6d&QCvtb(UAHY(LlEUWuKYha(6G9p;SMOM=}t&Utj?E7^UtB=@PZt>{3 z@FB=ARIZ|K=ex9kUM~$-0RE+mfk!PX%SsP@f_4}fr|F*BI=VzvklHRFcZ#CChgU{0xftL)PHidp@mv;K^B z;VN3Gf&TpN{{UA_XD!;wV*B=$iEBDv=wtSLu{FvbkucT%P@a&4&;oV;^VHhXU7EA} z!hq)l^!TE{l4dOjilb`1T#l!=;)o|5G&tPlnIvq%h1+XGA``Ro;Im%C z20X2+UbX=MGrpBU(T?KnX7)u=D?K{#=7_D19}*c_VXkv?KZsfWilBsvtL6E*_Veu_ zhhUQ7j%AK|y_quLs)4N8h=Xp+OTCtTl0dWhV^(bU4$J zn#oG#{5>L__`IDM!R+~W%`W%HGw*N~#E*A1*X9YEgL{|uo7?%5Ov|J9ciP++8`|V! zbo%6M4WMxL8pfC^KYAeXtNWvH>(j)OU-r|WdcySfORF^3<~5k$vpHF6oysS`apE>_ zZNJd6WX$$944DZQQF$-`4mJt6(&18HCe+e&)7d(EdBRp?rSt3;s+$_?S6QFS38$=T zxNx^g+MRnpuTM)4M%%h~L9RxJ{Jx|VX^j>4w~stEyySrvk%C-VZSydm8g*bb!l%T~ z#hK+_!VL{gmYoc4sms}Y%ADJtwuJ3RO~gp*9u(XTRDa%5qMsG~d+gK$RC~VRr^o9| zbYH}MF|!MhHf+CvG3Wi^K$#McX9@3~nMwhFQJ*zGyp^RtboZ})ac+ww2mE38yTWS z#sezl)ccaEt?@^LpP7V)=Wn(eEjetP1saf>D47;H?m-_>Lqsa=h~?HV#RoD-S-- zPc!28nN%Lml6Tah^MS$qhA8}+LX|n>!WqIqYUQc%Uu4Luhs^vk=6`AoI#jnDx+HPlUCoxuc57NvmvT6@Bh2seq-xY#}f!;$v5 zrRHCu2@mo74T%6o2w<$>)4nj#7a7mdHR7{zweZg0#VT&m;lzSZ>($BKJ@}d9fqUo0 zsWFMj%7~dvc>bd zxq=(|RyPzZXY5^b}1e-0A;*WxfeAJwzEk_{zBwXYG~Le4#|rgFVEl|mQH^NlD5*I>{z?FZTTefo+PAk+ zheS;b?$tc8;l}_?!qCpw);;F_!+1mqRH8lzn*A6PBEr`Kq&S#DNBtk*){Lyjb-+nW znyb6luiwKR6yVv5L3*Z+abba7aL>GG{HBglzMy?TdrYa#v!RStmSk6F&&jii28aCi zuP>-5`;cU4r7$e4EXxK>vrtjObRer|4L68VdHg#c#+4~?Sz3fPgd$uSBs=&=j5KgO z%|YpBGvjw`6e502tI1H!q@zaDJT zRG5!bA}Q)}7g-cZ2<#)mZdZC7=ls^E&X_r&`i}fd4|I>^6x`BeK?3-Pcdn!EJ(Thp z(Q;mMw;A@axFs{XVM3pktz7ohrmg0n!&v5V!D93#`yc%g5Eq-=gWv32s}ksu9?jDy zj$F~Gf6KOKsdO~N-z<#}9#3Ekb{z@05T;}{B-_v&Zv;nd*}U1HKxH)J*Jn>5!i5X- zDZx}kO1%)7MH$(2yUT-kDeFMMI5rF(WYpY`D}lVn`5NLieq8zRY=}=F^EA&T{y5U# z*E;ubd~GIxOGWT!2osx>#z}mXhiZ+(d!E6|$Kudheq(TfF81-Wb4Bb!_%z{wsGE9m zKzKa#amo`mMTsc#?sS?h1DBg|t$Aq{iY28R4dGdH&f0FgiMc@RhbWf{+c-?Hay#LX z2du;6vw$8p@QU_yx#=&Zxi4fn258nA&YKIG)KKrXS$Fr$8zrg;C30=L`Aq+;v-F&e zJaVC(jPe$Ht~o*fpGXO>uz$c|={%JEGfJTZVl4&BW$RpvWw`3V|=SD)efsO z>Cbv>I z%vD#6_s4gYFuNqhj@BDB)iWO@p^FfF+g6eYAoEmFtro5{$qGMf&u(!2QP}?{nvoB7 zU0_`Jp_`%e`u2ELos}Mq_DpB2#JkkPTKAUbwt|VwZlGn|7aWap4bx*8*o>t^I*!X(uCJ*`HOEUtoIm$Ecc*nM}M23UTRwiFc-;7 zQ8KeSZ6*#n4G$a1b6sv~QkuE@2P0c<(xjnZJam+aE{OARr}v?4 zB%Ys_l~^P5c9mk>=d-pJlepM8_)OI*yb#c@>~XIY7aj9JoIURX)E7aIz zJ-Zua6CoohYD)Jn3iBeze8+l2)j5}LfrGNss~bAVglY>!Rd zm_2tWyCdgNw%b3W4B3uN$2yAaIBWIu_*Ma8iq%Sk?Vr@KQ7hJZUQy+#pnyholFNwwG3i=lpZ^&zpa&@E!K}K^}D!3oEr`rOenPy7wJ_(o>+(++J!IdI(FF4 zoX7v!#Rxq8{nFW+kDML0cy7#e&$c|Q+?42mZO+(Yx&QjnYUq^?)_5}Xbjht>N#hw{ zR-X}(jxxJ1b3MeS9q@H$X~xu`#k5jyT&XvKvgzVKdH$R|e2*ctKUWtQ2a__Dgt3D& z3DAv$>DBEk37zR%s|*q%!Ux^FA#WI(`8E9SwPAN83H>3C_>!<;u=#@XeqSZ{?i|j_ z&ui=oJ5>?^L3j-bYZ+*n)sdtedNY1OhZmiTZhg0 z_(zc4SZYU*aW?1U!d?g^P-{hLD02@=NmSOYPp1sQ#x#tbly5qLTKlL>$2BL;kE14< z3hzU6kNc}&0-|h4?_BlI9rvZ%oM#}N*e6Cpl|dc?%+dDFVLS2;BeDNGeIr-u1nlCv zO4Ci*Qh#pWgolZ>ov8ctrl-_jd@jx3s|>P?^mm_KY$@}zlZkCZQ9GGG_Zr_;Dt@v9 zf@$t6OA=1xjMZ;)&zFvOgtORwLnF(^ocQ6jk=0Z4sbu;gb45q;{ghRKp9`?A=~{zyH99^Rla%uRl+7k2bEBXHK@`p6lC2bO;|n z0k`#>5{hR=CdFq6I_5sYEY3+HLEAK82`t`bh#p?v^wfdvmD~#cUpK88KOx-5KzshMfY%@m)tPB*jEuc;(O8H z&Jgo?F+^sv#rGJ6N|0ny2q1)*)9*WQ)Hnbgqh<*s`C9Mwz5daQLn8DT0yQ`x;N4k( z=eDh5r8LM3STLk0D5r9{o!(l2T@WjykTY;U>W6T^c*}_Esl(s=_>Ii(rHYqKK2fi4 zMMh6Nm!1(ie-9{{ALPAYEUl75WV>2nPtl?AIYE$-G1a1&D<-BCviBfGHijNn$-Oi) z_03Z*VvCIChMG6x0~p1dsRTpU);F!zHKG)U;O<*|M^=uUgVzzwDDw+e7pn4^S#rgs z(OxAW(caVcywrrOmW%NW_G6If*oNu`dC(dvm(+Sb*AKll6mZ!60xWsy2+P>RV9)^} zdXJ9W83{5D3!53!lRJTgvI17dgU#+|PpR#NfOxs&`NdOM?#Mb;bkkrHT}vjksWfi5 zh@e;xp_IO0LYTWgO4#*+{@H8c3)u?2*8xTLo8LDSuLw`?qOZtOxzM*U61K-{&DZ?V z^^Z$)H^ZvQdudSv{)E84kel!$nJ9CXqWOk#G*r5MN46T&J?M9cAc*(>gIae6>9p)HJ0W(i=(UT8{YqhtXsfIZQ+B&7`0**g!E<@K`4s>1;y)|gjx-14Z%dp#S`uer$?5o{jm-3#|D+Wyp{CGsTPRk{ zdqjaWMK^Dd+3Plo`i-a1Oh^3mePB1*LIleN0_t0VJ)8 zOR+7>$D-uQ?BUOnpw_s+JLww_*i5F((?@CfTeVbTPc?WYRIZ@amfPy6p~xqK$|i20 zeRP!W$F!WYhrJaf+l7%5DS+iOgcUA{ESg^g*RfQES$4$sbC4Dkl_`_VJJFW{=a_Y&{A|-1HX153~100lkbn zZMeuye67_vrx**Jws^2Wy#LTqT0vPoCj-^WR4a(T4geUeS{f~dXbojOR3@izAswPA zW_k%fgcQx37{YbGg|y2M%@37cWE|Em=pIh41%m@IVHgX!>iUPyaECNax|yTpGNfdc z$o!qUV4(Gl4BAY5qs2Yl^ICM)=a$jKAH`AR=slD8k7xWuFLqL6#_u2ByZaDr<>@Ek zR(#q_Fr-&8yWT2BMZaWSrr#jf_1M=J7$F$hZtio14GR+KKIh(y8i@7VDBC-Z85%at zyB&FYV}twfHr05QfUfZBIDz_Mx=~i$+|lVA-u&p>%~beuyoqvV&aN8DS=?9ahq6|FYgt ze7!HR#S#trJAYvN@60RE;m zo3Qrpe5=@njk?Oeq?Qv1Y4@y$W#(3jf2~TVfauwJR*^pseZ2(ej%>v`Chq&aS_b(H z17?+(+H=#3rUo_1vGx}21`fFa zrPs^pdLRPKKzazNkYg-kx3^+4%@9cY5ahtbD zvbqu`H8~oTp#MD7@aa`1z8_7Z7350xw=;4&YlCG2n|v)x!7N_wR+^JoPYB=eZyfTk z*EARo91I7j1>)(_lr;?ONVAyY{JMg1=SBNzom{rCN==yswp1yJ?Ff1H6}gQ~8BkK$ z7pTo@JeK3qCm(Hxsn_IG?7W?xQm%_v5bMJr7Lq!*e|+{3*tGdAQwU+t&^tDJ#`S_^ zHXxH5y*pc5rNA@QAO{B7?8q`ZrhXxY(C+~SN*Flry#Q?-pf#XceteA^+2}ifaa2o1 zWt=9Ic_d$fFxWSfrZmk;eNy?K;m9`_n@^+9%7R@g$swVZlA%uE?C53P2m>1 zri~*tYJ0u+I{Ig??qxrh+Y+z?+8HPf7E*bW`fe&W(WFg!vTb@YwHaB&LU-=kOzHxt z^Ib4SW8Ihqbi9S2SBtG!(^E%4Blc0Q9Mh10F-_=G@)BXegzFM|%|Rm>0KBn!k|6b)Icij>(o))CTYK>#ph*LqKkq!~h`bE*%M&fus4 zD2-Om$mSU4WiMJiNj(RUx+U~$N=%v@vzbkp2Wa!~IZgZZyL=v3G5{h`OX{LM?b(DY zO1upCDN`6_YNU_Wo=xp|jV+oL8tja;B1Pcu3=sYtJi5 zS4fNGYz%{qKl>anNM(h;Hi z!Aa&Sh5KF5W6^z+5*2x+JLN1gl&R9^K^%e=Ic)|1dgIk9{eNvf54&M417XXjWtK}r zxH7Rfx?!Zu{a&=&RT=x`al~2~=(B#9bY0n0q9*f_4RaI`JM`#qvh7_pac{-Nc{VW{ zAx#x8pHUMEHF5(s)>&!CkrN?n?63JZ(avguS+cGkN*`U$3LttH6pS+w?Rq8dJEGe_ z>YqDR^Dg2|=*l-NHPo>A4GY1rgg_#kN|b>1fU}Dn<0FTB*qDS1I5z~akz-PX;>}8} z@G-5Be8?WboFfD+hcWF@fB13e&)B4AYQiD@Bir+_D;8`yIyF$Lj2s>DtGk7f_)#cL-f*;^CQuvjX&D{dq&?S z-_UJl<+D#sdWld2I1x)x!)(-W1U1Y<8N+ge1W>;e@TDBk{UFq3fHdZ$?mR5^QA3Gx zytfjUAg`L2vhf39k=&TtDiuSZU_dB6XjU511d^2)`ddh#?iaxsJFH@p5a3)X#?K~v zi7+-@Ntx)uXw19wa&2#t^gtH9k|-(Y+tY5a&D7YtthMeu5+!EmyH4f>^(O&X*c`tqm7 z-m%YrZN#r4W@|Pl5Q~jdxV#9=No;hiJ2M5-V9ZN3N{_-&_N95GAE1nbsks zjl>!&`P3l33DK^V+NRqrH>y!lpaXW7*r!+uMh9y8My*p2!XjS!ACf!KcMp2F=~E=~ zsq#fo7+)RER~IYHY|*6050YNw8hE4nWz044$@--^IVDnX0H|l*w4;+1zj`(?nQJ+v#WTaHd88s@wpatF;2AD$a z`UXsekE zotL%yP}5np?kGs_RWN2?lNwN`!b@s9zRwsm+^k?^C|Feh^`Qd`QpE6H{KW;l*`^<~;5KuAae4FFwRb}n z>|Y%&CcShdS|$8j`WClFiA^Nza34)Z_wTS~SO;FYDySi?nj&aG$w|>Z)k0FOP*o4^ zk#iF}vYCC%am18)dB+g^OkIZC;S+&kALdlWie1w9%s|Z<2T#gi@|3|_VA71X%uF(pw*>r*gTpm7bK(5+xus2JZV#;pq7dC(-`@tHr_=PW^9 z2fMW*n|6(@H_O)3$hP0Wc9)bmO;Zk0;t)A41LP&d)3NB*CF>z&Q;aCjME{Xb_mS1l zFY8aRNfT_E58AOvq33p3?`!!4*_Et6ovUA%mldj_`zh9vQm2F~ zR=4+W>pHzUI>BN$kSi1tG9E418jJOVue_Vdd&11Cak*NrexdGI7h9i~4G{MNjZs%f z8TPCmv4GA^3NHU!H{tR+A3CuwIT5(`Vs0)PpHz z_s8$)zqftaUIxL8siTA`KiG6%RabK|t?OJnR}N;#u-ESeM#+857U=3Jm_5m?QN_)F zbq#!0Y;)jmo6HqFQkah^OdDvo|9i~zII1TwG??pLn|vrYK6Z5nG3Of2kL^Yr(RBLZ z{En$_uM{-l{ydI^) zg(gFFHGjQUBaVfgu_v$m^}l0rxI`s}4cM!-Q>*T`e$kn4)@=|_t&*3~USfO!Y>ph* z)3+?~8nGC_#?=s z;8&bWw;L#3-6<&qwF&-ZEjAQR$~uZ=1nh_;-(CHIU&Frozp{mk7>%k$jmOki`;8Cj z)>WuC*v2!$5>UpPWX(Q2r4*)M`8`!i+!A&x2j5!mgHHjHWh~|VVOGmFIS3(uJ$oHd zNFws-)MPu%PjjDj{p;(JV@#4me~r5&IeN1nFH{m3iyr^8iom$~!x!cnj$^7|YK|OJ zqfFUeh4&UNTOusqmiUaRh;fwT`!5vSxChlEx`XK*{??|yIHn_f@}pz}_^3Wt)%7jP z{PCaWzZ4U{ITHuIno!M$a(~i&`IfW4hmzg-zJ3RIrNVM!8}i=E?mskGF3mDNxmI~8 z9LnozY;1qafBA0ez=FmVKG%8|5h8t}4xiFai?_MRj*MSrm;f?_$%ZzP4eiJxF>BL; zI(&w_MUg=Gs!=svPi74`Rs7gz4d1}4-E))|j=}Sc9S)!sG}^p?fG}YpEhN z#_JcRIpX&*wPOc+&(uGC>larof**oY5`w9@jb@xV*{el0^+_C5f&?vFg)h!NFpQ2P zGkmhA$6Y0OI?FH2edU=L+lW)m)$Ti5ANk~-US+g8F#U*6?!flew*(K-(%hDky58WH z=tg^a_|6*0ptZ7Pg+hRekKEDcMjh{XeakAO%sge^*BO=MMk$>aUC85zu@i;q0GE+-*nuN)v{Dm_{}el0lBYSgGcPl2wZMBb9Y^d z$Ayb$q7G}Di@FCs$p&=H;n^_-HGrTsZ|drsmG$YW3YqLBb#ns>g9s*O#canZj-pD( zylM|H8E`}IElGFpY`8yLC58T4@#uYWQ7Jiy_)PdYdElrv!qc);4oAdlNK?_W$;_isvVE}9|=RLt7O7Z zpj?)`R8%clSCV@bHSJ_t>3Fm<^e5QB%hG)QIFGd%83jB12b=_HnV5sIHE_~si%4;5@Jnr z<7HQ#5>g7IlFZ+KyO6KFZ|oD_Jl7EnGLIPpiF9TM!Y4V7NA~Vjcnh@LnUQ5ml+9`8 zciQN#7_zG)YDRyj)DQJK_ojWu*KA&H0II~2cym@0$cAMxFOl^Zf z&=H0S5)WuTztM@V%f|+KDACi&Gw^^p8=1VJm-1E7#W~N?D_|5LieA(8Gi7 z=HGvS=JfCh7^m;So0MuT7>|WWo@0oVT_^i&?#{|;;Nu~(fG@6bj~;S;ynQq{^ySxz z5SLe}qWkt|G&Slgzh?Y*7ydSR&Ivm+vWR4Vs@89S?)qlYy1%?K8c)Vz4m{{prN57~ zN*X=P{;eNQ4SJChMo){=k9TWotXfn2_=3#xK)jLV9a;d+=u%$vS^U@^aFA2|I6IJA zubxyjnp(9Qo6ZR-mtR+JTrwio>6T+c9U#IUo(YM53bT)iW%}?WtQZ8KsS%v_$h>r^ zCe`)FY>y`Dw!pP{*&^({u)LQyz|R58*d)#>f1VmO;!Q_e56ZO)zab1wE89aey7arJ zmys&pMr>EU@U6dG-eE=K-eVMmKccKWvo3MLWZ&hXd)m6MEzSQM-+whaie4ix)N^=K zW!FmcHM+xfSRfW3yk2qAi--z0yjE$`S2c7+Mg-f7D3y49>7R+AuKA>)f4v?uUH&N9 zGyM~#eNa+uBTwmEYU|gtfh9&c047p|^<-X<^2h3RZv9tH$>MQ?BhwZfS|~V?qJ*_G zTZsVj1laW&T}p>^oHWPBRsuv=x%EXgptTqfYvjq$*jL!|kEY|F~*f@$dIdgp2;p;5n0N*H-Z3OJJ@?lQ~82CjA!`0`dng8?&`1gt|iIm z>dyElbr@T||FyMr;;dNTt|s8)v>mZIMx_%P;@&{O4~qEq|6VvMKN>{srgh642To9?@`kx6|mUp6kN(t_G* zB4s3~#fq(35EbD5mrNvS^ON|5%lT+uR!|@ zvc)$N#OE|X-!JQsKCDA~+x0PqN~PD9@$gIYDV_LY(<-eT9&WXu@VMDHamiBSz-an9 za$OwFp4XvWO{~mK>IlA)`Y3Yy?`f0I3H>amC!|Cf6!hN|S8)FG`7mpLd*KUMK=^oH~9GLx?`Z>_M;ysFI9w04qV&h2ri zQ(|q#70ew(tesgD;p!EDDE4{cD0%6(^tBLuf?bWpv(Yu$Y=GL_%YMUiWMm0J8WAK3 zMri+tE{-6d$98XfrkxK~@TVwYtx+n}^pv9d^U53ul7!t4Jy zIunN^^1qMIFx()bqT-D=-gw_S;C*CxRHjyVV_8vInH@VEDy12wmECxi+C^n$Hy)Xl z6_%EjwP;pW*4pOox^3$|^LzdT7?_#w`}6+1UP_4*FPoCZe~UMmn@0Q`_5EGv*_kJZ z!j+2-H%dDs2FtTzf?p2pdGV!r(^u!Aq!ezl-||_8$(O08j7?cds72&bkQZ%Hq=+p8@1e>`3b)GX$7_$>_~c4~uC{ zgcUug)h62ZD_t6z{GJdr@EOr<7MX1ca4yXSoLUEt* zCaSazUIk$AQuS#6g}S4*%m4Jdc<z3etX0OlRe_~T!eSKrmSxJ+S>&#R=Z1Xi)$SaQZGNNkeT$^vqP zYd4Xle$;A~Omsr`DXUe|Hnh9B^s>l*+b8vsh%Yb=R)U~HJ*=-l$U-swmpSB^DeF1; z0esjkeH}&d8pw4qc)4N3Avl=cPKjK3xW>W}y7^*3k#`>c*?WNWeivk>e#$t;!Fe~G z#|n>L-*U2tmoYi3jid2FC0!e15jkyq-PZeXRAH+#;HS^PRD`vf^m9c|lK^PBMxja+9E zpdXu&n)LH4JgdivETZ&RAyC2RV932^LE}#fvz^;6Eri*_$l>M$BWmGFO6+@mvZoyU z1%Z}shKYZEqOaO>@ukjEjS^zn%;E3UJ7a zC97QXE!8a+&S*nu-!{=YF4Fvy*Dh}%cPMFJylk#KHMo3-Y_te>01i|h@K~$OOhxY) zysVQaZCCR-gxo@BTx>B{=CX0!{#oNn@jB%4vd~LdtGD)HT4pGK>dVp!&#(7zq(^h` zD|wVofZQn}E#Yr45kcEwGCH)GhcB+U3NAj|jiP(8pSX~hGFt~7@))claaILsi1icR z5FK=;V-hNtd@tN+2t(T01^*d1nN&pRSjRIXcmB5l3T!baXYYjTteQ(ejtKOffqdxV zpULDWES%w7Bq61g#CO|u5^TvQpR5aFwUN(6~^w0~rqM8HS5I>LLnEQ47Sr zf)qM#$#}v2t}-e?7CKY5+Ip+Td+V<8b(uX|EiW?dJ}^_*4q*ZzZ)15N;(D!d^8^uB z$|C#3;PL_TQN4=eFy*)iXGXVLLnq4=P!a@hj!H}Hbp&VAu1>8ZcBlPJXk1)z;gd-0B)SpOMZ*92BP1117Q ztS9kQGUTJE^A$nv_lQ5gS#9F8aC;XTAsCZ%w>VT4ETA2oA+@Qs^KM@12bFx=9T5&Oa3X@zsU}V`ED3K8bW*u4K7;ee#fZufai;@tt&31 z+8uxodyPJ2_wlwG=!7Q1GT?s2#$;+ z3H-W54dF$%pT7#5$N^u~f^TZU;cwyID!PR-Ai@Vqfr)wp)M0dIB%j4vhe@(y9W)3} zV-b(TIwxjGC>CbYEBVR*c~N^+hl2b;N$&JD_yoKATNl(Rv3nWxsA+6jfjhhdth8iaVw)rbJhrr8^DR9wzJW33k66H@T zC`*w4VHC{}>>81824LP4oi?NwVZnmR^euPr1eO4Z#&>v433;h+Er)of9ZJ9CnB`x2 z@hXng=pO{*PS@_-a>tt~*m!0fGy#yuTjT+e7V|rNe>vNTH0D8JtR_D#7GV)nqIAmO zHQ2o@nZJH)d;L+4&h77OGCQpu1CGxYNv^ZEoNA`o#;!fZAr+_LKvR)PzA`_qjFlgA z@KZtG$bQFnB4I6Nok{FP|8*|=4loaG-5w`2BYSN*0Bw2RSQb;`uY^99K=N<6aymX* zZQ}g@uInoz+!B~_RfLTNa8eFAplr!j8=scI7F*jhJDYx?(;+|w&^YF$0mNTK1YzZp z0R9px!{U^Yr6C6j^^O?wj0dJRX=l?S?auut<#zlTexRw&Q;}36AZ$_*cG8Kz&;wgw z9WkfgvhiejjJ~ZH`znPDESeAci&Ws)#`u1)$`SowB1u%EbwDH5s1=*Dlj0+6CuG>B z%y^u*q7>l^*S?4$*=~*LF>;A&+5>tqe?S4E#_>`6$*BY9cR?TXpb$lp58}$BV3#RR z-S}kUQbwuZoKDw}yF{l`I8Ga3=ys#er~5uv84=j_>=R3|bOAuDSPW?~ukZWY_O1xf zUfO);Y$%k({5#D}36&U|m}?+k4JMpJ@ScH!y`g1@>jn{lsm5*KLrC%xt_I)hMJQ&W zoqggSEtW9@if5?RGkOd9BBh(_P30K2h9u@+Ymt)q2g!boWG5cUC}HiDQpuIBYyaLO zq4S7E^PIpg7SSwUX}+;tSYYVaBjwWzLq#d8H8Et{j#b|OxcTc`$3<%g zbzx1QB5CZO099GdrH;+_CU8<1&yJ_;7oogV{O2lMIzmEUI;;dmy8-;o5HxY*iEKFk!P63WEu5F#7FaxoMC+`Z_cNZ9*^xEMOC? z(@1wax>a0WSU^Q==?YZq%V3s;E0 zF;IHpZu5mB_myyZAxBX7@QcNDP8Mp)h!#D(87_*R5%taEz)DTJ8 z+w&eGlOR$Yh8%}6QYc6!=|eW@7%hGnCKD$D7yRrS;psPLD!;s6Qzs(j-XP_kX2f^j zS$vPgzCg;!BIW(>EOV;S2RTkj@l$+|7#wEbHmr z?LQqYzTCBVed3&RTG#Z=^M{i5G}nFfffgunY7*wgA<{t&+50GI#Q^210GrJx>_*9o zEYi@o_zZBY12eygZ-0Hqa|90k4{P<%N6(CLvO=>9M(y?8Z@VkpA9ckarrph>uGxDd>LCUtCaD zJfm^FURH!)A5hi}xh&Jnr`iSnb9V*C?E3Cklk8IiUbmtS*ZC<2oR>je)7Vm&sPVxI zVxOvj*VkChF*CR-0VVtkZj&$lf(*j`*_>5)NS%GC)dULDBv3@pNow#HJ|zhNa4?|9 z04Z>edBFo8)^lOG`kyn3Tf@H+PgnkY2yC`^L?VGC0ij9+ArApv%`>`655vNt?)jv8 zjC6Az`Ew*C_}af|)b?PF)nRfQzyILs~gjcH?6N*mrlR;TEKY`IQf{hgo;S$xUA-Da zRMcPiC?&-F`?BXe6(O7tRm~8kGf;l0`H!1GyVJ*ci;q86KO=FpUrs8DL$$i94av%n zv^USFfSVB=Hw;r52xj-gN6}l>oRec?>E!oX{dRtT8$Efyj`EL*&;1H=z36y7oT+z- z6bEbSmf=qD$zn}%>Nvh|zjSb_pRQB5VhFmQFy!pJ^_g%tM>~0L<&(F!UQNmeVoudv zVZvfgO z0EUk1;}94c?Z70kBMJ0ZQ=cdRwgNc2V@>1r@!!nv92L;kBILWA{`acS;KR@Ho}^R3 ze|&(aJCNWuWO@zfMIMm%h<1bh0N+w6J-y8K%)^%*CvV(G1x2kKgmH7@)mIZ?e|05( z?cP4-^sp}aR2}#2*N`nx{Dy#4hWKRy;t3R^E+EFh6`rfH33U8+KDkvx>}ncGc78MA zmR%-U^z8BsMR)Xo7Zg|kmZ&#?FhJD;r+xz{9Bj3Um^?39QLTkQ4zT%*f~Ew{{Q4_l zli(J1Bu)Be)iUkWO+6Bayjl5CXC4Jgk1|xKgI8x589#S+bWfQca_MqBXBI-uLDD-yuRvTp)w{vMJ&T&Em+jfua1Qa-)s<|87~|OY@GV z8dYy(b%PknR{a;M+z)!~OwfqR%zXYe{o4GN4=?l8tZodRW)u+1+TW9%vWab{no)m) z@P^Ev0tcOP+#u>b4`}S}$=7L27Ea}TeQ3AgPRhaHlV|JnN6+s|J6Q5*^hEAS(b}N= zv#I*pSVf{|zKe$8pQ=b$*PBH?{9(;#WOnM!vcv4r^Cyd*<_b)u!DaHoQPJHRbK|Ec zZ%>`a*ZgIba2EUbH~jW`+rqwv25W zNod>vk=ZR5lgM9QYTU0+s}p*MEy-TOkt&}aq5a$&vvxbPf;)R|&8G+LtYEHb5ZfDv z0XAVtc9P0#1jN)LYHx?_%+fA%zH(FTEIlHHTJ5V*G5{9UyD?-`{WCC(x>Upw@GQq3180Y_$DpzZP~4rE=0dc_VwZ) z)6Lc`W{F--_Qoqm*H&}|3)**jFhT6Dn1nE%%tv9wa!P~-s%N@-q(wLh7_Xah8&|mzVG&D`K~CO zpS3c~2pDhUBqCDOh5m7zW>L{iNE%c@JQtZRo+vRqNS36^9#2}q+0#+ zXmeO&VElHcSe~xT7MIMor4P0b|5Wxnomd)E_nfrvzcEe4!ip!q%r{MR$K(j^D~{|9 zTW7WtV=@36YtBe?WUSXh4aU3^CS(GAteOG2aCE|GX@eT;rx5FI1IUR;k4@ydD&g3K zkzfw~pfAD%ad;H;*kyP`E3+@jrkBV;tNosp#gD{rDJ#l&jk-ENgzH|QHwfU>tsV{l z@GwHXV!*4RTc%fi$2RdB-k{-bfYQS%v${W?73z%gQuAh{RvmD8fZKblW2`#Di5?Ta zQoF~^J1=g5b8O5$AJQtO=|2tpBVqi;9mxs|&rp{qlyWtW&iupr8xUW%>sozY6TD5SI*;n}7w*=0Y4Fb1b2 zUyZauH#FthL=F;exaq*fpWi*2Epl(~GR6>O!0w_fH4!IBbZ+R@&F4g{e#pnaOd2Q3 z-d5wLOHU|vr6nQQmbj{|Df=wgyQZ_Ssxb*-bU8OUYeX4EWT2&X%yr5U_b>o{aq0wKj!x)De^ zon`#^&Aa2W+VpIpt5JFUJnvxdY>XwQHqo}B`}Wnj-+uA%6u1Di?i~TWlByTr zqkqNpe78Vq*>2WDJGB=QV&MMLGcvi-H=Uqjf=)?y7~bFz!B%GH&rG5Ufe7E zY3CE)UbSZnGCyNtM7m?hG=DgC<4xMw9!l2_QN&%Z zA=+lL&LKs&y=yo5_(0U%B5swp0K+(N3}YS1eIE96gc`(#L83a)t&~d*qC=Kygy2yM ze#m0>8Epz6j|4V8Yn9`@zoHQ^`?@=E<|w#hH(=v=+P$3n=hrqGsx5W`38Oz&-qAm3 zukpLLIDlu^FDUoSYb3|_^Gw{i6?RnGf>#?1{!$3?gE}(1Hh;DmSU2R|Ly?CQB@2%n+o7oX$!Q{u^OlUW(Xs zA-Wq!du`u~JVqXl<5vKEK8HXeXvg{(zwigfPJoI6W9+ZXyp+vXb4|YL>D+h~Typ(_ON2z7>;4{NM{beOe#bh= zG2R!g{#ok1eZxin>lX(U2@=Xb1^5~HIA7;I3dOKjKRhVhXmv`KPJb-w&f-O${;!QgE81+Nr4cZL$?d=8FT z?;fpU{Jn*S)+~=-s9C7L&;R?`Up|L0>~I2B#d2)LirFXur!`b6HgygC`x^IILTEv8 zbU`FJ%FHO5jK+w>QXNi<*imMDQnN_(BmrM-ks)SCP+el~8VWjqfoH z3AHGRHk!LXvI;b6+waJfafrbgU9!b$nXQ2oMW-%QmwU5sG!7`k)Y8yV$b1I0l7l9t zd&~tGt0~Y*O$$b_sA_QDObD8FiK(I!=INMBL4{BtBg-QKODu45K$aHX-4!wDWow=c z%zB?`&t#tYUiJ6Iy^N=T%V_MX>*XaXlzEC6u|Z~oxYiZ4n?t<2WC_E42a4+s+@P*E zi)t{xlTw^?xKqt}8<#AqS=!mYz&hSvDa{D!X#V|ds0dpK5H>R_x3Cv(W-eS0RIZ(? zI9XEhog8|$-eY-D0JDqBRDdQz@aOQtvlD=solX19LYfA21h63-iKh~msKPE#OBB7b zgaPbw4iupzWzIp=OMxLwS(H!~#l%JnWUsbV-OIoi2=O^{ke&rLMQ|j#RN^7WCkrZ~ zmGfe2u=gqxS@t;*U=BE#R4s#2t|LI$?9ptB-?L%n?jl>v9P-KwTAN$BgN;RxOGa8} z1GZRS3sN*AicL0D`Zt}Q4qGODz|D0JGuy4+X!Wf$}RBY^>T5_C&SQ)bARsjc)CIS zqows{3HY-_tTKNv?2~rEWi9&s@5+LQ}#qNWjw7W zx`9{x(cU^omM_F-p2Dd_k^=s-Xn`b(9aKawr_IG+fCD$w$qpYc+06fYE!#VV($=uc zsrr3UGv^~4R07lnz>dgq?UH0pRqg;Fn`=+TrUAyllRQj$qVT$x{QBd$YG0Gl^}$ay zL?{1c>46Ktl#+A@Z5-N&S61Iydb%b&)S3U}QHWL=3RJ8W5zxqP+d$hmyZTZ3g z>9r1ArV2+{%3;YHgM`@VsX)^?z+w(C<hFiY>~@t?J5s%U;wic=3LOuq)kpQT*Z1tqY~AlnZeq{|ztA za9(68>+V9$!!$F5mbosFcw|X z1OG@1mA(anR|01&TO@JR)}s5a6;@`>Aymx*14OJph3E^#%!X#?sq)=7u+NTO z-~B1lFEHKZyX{Ng)R)`SU7jvu$e%J);4xR+uGKYF+iz@toAkwXc?^Qz+*Y}@^v%@n z<-X4<_jJiyc^;{U_RTiENWCAQ(b4_0DQuJvfws;5g8ZKHYjv6r?SOXhzzZlG&bNb8Kw?642p^G8!}IfSxj6iK z=8L`MZ{EMI^j-Yo;~(O`w4k`gFmS)FrtWR7Do~!u z!8Ho7nZkF=Ztd^J$ikH3V0yV=wQQxlLij_%7or$S2|FNb_K7q_fGtSJa*^y5V)6<+ zmWr1JHH_t6hWu1gb{oVNV*CV&4$MI_(V$t|{>yPb!l4x7()~toUFh%BP5HLYblJ~S zk}S~!PomCK-(|*H#7K+6J;a_I0!HR8LTNwv4-RW4e2>1~VAQ_ea@p9j!tTr9Ja;P2 zn{vN#I#sU(3KbNG1<2MSgsq&Vb^jJV*pzE77vCP6K5>7#N)FDk?S~3FW>bW-@zJL` zxTbSrcOurI3k+r93pg=e9PBPHT(VI1(z-kas7e-{5NV9ulwio*A}+HWZa zVD{1QDQ$QxHJXish4@f4Lhb^phkM+1X0Css|IKo3erG*vaH496iJ@XNi~6 z&9w{5=X=HU0Z&80H!`u22B6Q$mN)#Cpp*oBI`esHj*oAu(~b$>eL3N#uZIka)qX^7 zdd1=+QM9z;@R742#IzPh#rn3&`_V5R%zvnwNw_^JHu&#B)!cls*(f+`x_XghUFhM3 zdBZPEwH>B&U{JcuNs0Jrr1^7FlP=J94scdU?dJdo0mh%7Y4`b|*8j4VSOZnY4 zdBEo2cXu0G&qMl;r){PJEjP3xj0e>P&5|&-G^q>2pLu@4Qnla)*c>WZ2v_3WATL<5 z_nswop=f>d2VA(A_{+?KyUQwfKMj?;G@ppdOq))=@*#qc%||NtZhW(7&+=VU57J}4 zr9Yldb)LF)T73KTAJYAM!dzC@&43%%OQ>z26HCSuNx13woPZy1+_PU?z7!2$^*S;= zG@u8N>@kB}YXze@A7kVgVq4?cJ(yqt2EzvMoWHKAdv?Ta{v%G^=$A&-d;4(@JYy&WT2OdNji(&?k=?_%jc+~8BZ`G|G z6A4#JUvC^G-d)D!XZ|GpvE_YI%43MT5t}b3?0Em7o*>z`dCP$V8+Xx#FAAOK=u-h< zEX^wC|JumVABf2S$m3uaufm}FBqSqQB;90W$V~4LpevNn1d=mnq!g`6Hyx|nCXFeT z?1=-Y0x6|v{uU8%^=U3WS9K|TFZ)}f#HP*N$x@&5Cq*MoR_rJ!#YeNi>a;VYuJFX_ z1qguKWz1dP8TRgel)}i!HLrs;#P8`LQH&gpwKeGBqH5TF$L54Kc5!>Hl3U=NJ=k3q zPG!nbgZO$gr`7YLs;e*hA13*)neDpwJ@v75%)6=arzcCCo}Ngu2f>M4=aG(pZCQ7^ zb2}EBFWoY%F};jmdLbl7wvlTzP{%C|NK+2oRp{wja`I2y;o`Be5BE~r_{YnBpIJBH@!N2nZYn-#pE4y2-neLyZo#{K zGJ|+#@4PJgj#gT3vj2}>17sQ{7Wi!AROqFlEKogG0GH^rN5M<<9#r(mjW=pb?^-Qj zTo|wtjEM=BveL#oEa_PH9fC!%N@i8=`i*O`W*zI$z9vr<070G4OjoVwFE|WQt3Rrt z%b^00)toN4>k~>;_j{Ew|AL9Sd|0iQ+K}CE|2ULDc9UM-yVF5u84qt5o2M9pYBK7R zw^)`BC&#%S_1BMgXwh1pbOK|@-kT<6E817j8mIC9vsfSNg`JRU!^ltK2^Rb~-$iFr zK(RD!X?9ej%5Y_yH7##^rq{d{c)}xCF>9ktj&GCPb4)kS?z5=&Ns0G)pm;JrV1<}p zCE6}PDW+Dnblw1~oUwD#_Tf)4!6a{PJkGYfx~In10+oRkXB1quhp?^)r)TM9{Hx;p z{m~9k;od*h7$Z9#8J`%kAp%7kZs>*f94gYoS*2z7Z(8IteBr^^KF>P@MmoKRRJ54k z6lEl`Yj3qHD(i_gyZQW0{BP~`Jo5Ks1LDHgPsYPZJaFG#Yx#`~Z3^+pr!iS+nOqQ7Brp&XUI~2CD2iUEH>`}|0H=ab>Xg@C^b3>F@ z?2XdWbqPLK>a*q6S4vZNyZx@R#e#Yq^qH|CWF3U}4e%sD7@v7~2B5;MjOkVj&hG;R zJAnh1-f$4HXs!pM{E|6bvYq|tj*~>2bcI+ira>Ha-1Wb|j~vfWv%ZLDgup$`qz3p{ z%iCWf=eMw06+>H(T)2Jf$?vt8B%2?vpW{1H|C_8sA**|I#QMm=5xdy^jqM$wa_o(;Rm?GWWbZ zRJn;n1pAx|KucGMUahFWkEy9}dLKqNR{i_DIUvR+3ZPWh|EgSfo$Iry`RR`sZ3^&r zg*tKYRP5tE|68-vFN%)|J2lTLJDVEw!k&sulculK9h>rCZ&79Z_eSqj>!35TU?7%$GR5@4){;SN_0VeP6u{b~f4G23K3l}-q{&-c%$A19{Kao!C>RFP*BT}scj zUFo?if0)M@)5mRJ37U9Bey47qk+MVdbiCLXamUSIAOMg@A6ciDP~UH`_&Fxp4g-O< z>8tfscWU3Q$+%OQOu<{k4{09Ww z?7U}=+AjWH|G3sZ+4M=!d_IO*4;O{N-R7~@I`CZIju7rX2XPZY$(o@5Dn)R-XO&Yn)9)DV7gS_TV=TT*#RT_@aNhhX>h|;s;sNf% zjNu#;O{7akT;t-*H1BHf0g$;L6&{UI&ea^0()hg~2GHiVqRH1+C7~b$+-1ZureKUz zF7Ci_DoboxM#p&VE(S?T0E*{GA6^zS{8cEC4&`08>4of6;(g;i@A6BV7!p_<75?NV z9qY9q_^I4l_n^}($!+^?yN_E7)RxSyY{sBQYG2r4)xlk&yNxgN(te9O?jWSM*a@WR z2j~5W)`t5T6Y;69F5|*DITWja2zO!QqUA@p)D^4Cod#fHa%8iKSlCk;?MaTNPUuzz zJgLRhx?B^i6WTur_2}kpUeSJ&Sa1EJJb)6t&0E^=1i6$~wW{m80sj2@C7)i-l_m!~@N@ z+~Q)R_YTySI6tTRwy55>n+f86M~Knvl2SWMfr&gNlCJuY$^7zlibSB2Xs?E}A!;RFe{>0D~vx zVt~&=&$?=3qFx1wh{&VT;c2`GG*e*cj48k(-t!232|%}xR`0r<*NTM^Y?T0-w$xn1 zF!zJ4^Cv(oiZgO!^O+|b2mFxrAOM$p)DAD#iK}dcaK?8qt2H6@#Wcv zU;PR9+U9`khG7eYe>;(ey?TP3vuU60Gi{`4==LsTD7 z?FklIbIuc#i+B5YTaX*ja7dBU*osnhW`Tu{_3C@3;tMVJkFIe(wN|+J&HABdl}{Ev z1jyCO{-3j{1@n_0x~F;AjsIDQPTIM*;TXU58e2j;*S3w99t-%mjt*J;gg*@uvBVk? zo-f209Fm8+Z8}fMsBYA~OwV?kSI;KPKkG?_7+RPTEk`$*jCA3c+xAojb}D;JgtZo7 z+VYaWVerNV82eIHZH5ATHzPa(hYx~j2m}+EdIasfEbz~OWBEu(#B;#L@-Bu+M^N%4 zS`kUhm*3UNgYleh+ZA3C=jzqJ?%tzq z-y_mnKECn(?$F+!v)Fm~*kn;kvYqtZGRssdlRwTKb>)}WA5&>f`nJEBS}NB~yr*9T z5c9xGdAvq7#&wqGzKz%P&e(SvH}mfj?`nww9flBqIfi*ZCSlHsufI`LtTL?bydf0~L%1i!|Lf=@INP^|EjDH+d4g7~@bPah&BU;(DvA6+Su= zpWz-O)Js!}Fn>lEKa-#!09z5)IPanF0?)*^MI0~PiFMuF@fge^20{Y2&0 zs0gd9h@erfk5Jmw8iD%kf)qTq(#}aG^-1Ig#X{D35di_*pq2=WNNK^w}A!LSN&?s$JJG9+hgNj?4qm#qa9dbv=4A4;&rd zeYDnod{4({{p9K=)~=HO7%231O#MRZ6vsOnbwoxNmlwaCI%xT70Pw%CprSh0d$h-B zR6MEeaqP5ZQ(x-MO|t1(%IJOkz<^AZ@B2 z$JHz12mB_Gihj5}o@d9Ay5%*wm7>rwo=NHd$aRhZpt0hEMQMW1jT7v-C8$@-qEt-d zp!@gT46WE=w!3yTmXK9rJ&Pts;&*bM(cD3T;wi=lm{tI!PJr&(YmmAQ8-!#?t&Y9K)&}X=|J`o17g)A)3sb!qy3Zm+9 z*=sSYUZVf6u`PQ^rxE5#HO5ZHi+^(W-WD;TSUe%;b`^A^i`axIHG#p!GLU^7n35Jv zfI;IDghuZVEQ4vT-Es|@QyZdkVI2*C?^06*h)$>azkR>r>a75GRMd;^fn@eYPj7Qb z!{n(^${gsxU~zgOKZqqzy1p44*jxN4?bBQ}9Nkjv5tOoj<9+HZ(zKhm!J@f?|FDCO z-$mKl^&zD8c}EC6Br* z^qf6irOka4u6xahW%eXFvQ}p|nW#f*V5~a`)3-T$4(l%H7C(IHdWsPhs7CwL%_-*h zvidtz0(PILdBs=YRpi>rlym-z+s=jYEG7{K7UP-tA#az&4J&aP#f*($Ix{|xPKcSp zZlhwZ#Wt})q}U*q%NPZbOX{&EJbp@!lQFULEac7%?Hs21&0;*11=p`%gH2PT5i`zp zjo$PTp&bY71H7%Mc(DjJ-}LyaLj4u{$|XZqw+O%$i*Q+MD88rA|6|M zXviaUZN6XGu|G0AR+ODz(uI5k#m25Yi)}=d^r=XZI(D5H+{d%}$+hN0_*A<&*6^&; zUKWVLnC;X0%D zbynb3%slE&!r6IEqx1%|MvIl@9Ab?7`{*^36>9u{D^PrgVR7bvPn9Jf7SZWvw|`QR^S_JsXYcE5}$g^fr{VUN_!Zc^%fy+zOuG!#o$a}x8<{iHmw2k85MgR-}#-+g)t{8(vD#S!~-Aj7&^${skg?@}jfok@i3oj1(Ok)S}QqjvV zNs}jz*4@lean;T%+%dlb`m@5$)}aea`pY@St4V5CTjYW7^J0f@cYmSd?CPb#gAt)Kz5awFIy4PGP$Ltx(bf?Ez@IRF z>r5}^K6?oSc}m%HV!e>Zgl+17@fb^4Psm=$bwS>QPlTYQ z>8ELg#fo8ZhQaFK1bOi0^H~?amrrayTeHq&ek}z= zYvqGFuYD?a2@ywZ^u4Hv)8!BDd7{iq43p?i0lIlhq}D_`+xj&@!#t}jF|!`DtVgdE z&v#n9l7;A9b$Q=!O;G`aEOq?jS)5s>nmtqG4-}3E#Cw~$#JHoZ=_n<~<=4VB5eOOm zDWYOXERW3si$<91ex32C(j#LrPF>wT#1|7+#VLC@^i_gsuMHmL1*FN>9z6*93}fZ_ zKJrOpFQEz)S(dMsQvoa4X|6Z1RX2SoR&6y=Y_rHEU_)=iS9&tbE zzGIfekXRS@D3|RDK8=2A=@a2L+zlCnG-^*coo6Qpqr=4W2I;+SviI+_7;)%fe3YcfR>1u%it zO=Wc#o+KU1+G_N=de2hCGON&rA;r5cXLNBrN5M0Pr)ev$q?TPJDYob+Vcjux+-q1D z;JKx55HHtX@ZvNd_1v~x2`Rm}^q;g0UBhFSE(I?kDpv@NA6)rWRp zXKGDfbFMAI>ZF1!z7)q7`*d*`bSX>mnozdo*VW3@0xZ4(VcAJo0ET79vuUB&c7bEx z&MrRO`u5InhUuMv|IL zGs%(!TZ+E}lIof@O!r=|z?W9=35zi z#gXD?0pC%OxZp2>w^x~jEr-`~44-deG`Si8e|-Ot)Dr5mtJGvN zDzSmHeZO#MJd$X^;xIX-nk%4GZODQJuvlD9Fmg5bo4%qNzgup_D4;L(!cx85vxMDE z-ZF}gDWhO)6oa$#-=7z&)pCnp=+Z(0YFXWoLnBWww*|y``ga@bQ}HE{ztfG9M#H*~ z^pU8H7LM(_!+_4FPzf*8)<`~lfnEHPQ(d~F=}t(c&o$|Xp*Ob_4j-$q`raq_2g3h0 zXm_d$KfbAFXE>%XgXG{&CiRlN*JpPe-yyo%L-Y>&mP_)A-&t`mW&g)r^&UGA$xf>u zg2_E*L83;87Qk*o@LzK#r1mYUTvD(QMKXpqu|e?zS44@yJG0z4C>>gG&))15Rdu^% zc9j@!$^H^cj5j!gqs59qALGC-nz-~$4yt2RMmSrp=850^flxcOE?zHxE7Hxpxm9q<7Pt)K9IDhC>CrnTFLbK?MBjTMA{T#=~QVuH$Qpm zTv$VTPow=J%J5G4(e*!c*@ypp)o!m8&2&@Xo`5DQUpQWFobf2&E!lNVV6U9h)Of3&Y| z>g&8Q7?1B@Bqq2)53XQ(oCP36Tigdojjnw;jiC^w+U4X=#C+3WY^k?@;OCa_od${U z_yQ_=0F>%;sdbJ1stp*>ZRQ5`W(v6O8$LU`9laAGer?obF&)F3T5YSmYuG^!%0Z1cF*EKQ^Y_J9CGJtpX-!LveJf5OVT zhZ3x7dQIHh8kg~_`tF?W8ovV7yvBgd7GwMxNRy0`!8k_?#}@esqiA_;zmDA)$79!t zSEXhD{Sj^PA$0Ne%56!Pd}~$COM-yIFA&US%)fSv!_OOz_HXh#AE#OzrtPu#fzrhW z7Z_N#+;QI|rrZ>(X?=od8$Kcn=JZf5%NM($t;~Y?;U>K+^cdS+@FDU~0o@Z(>o##J zDH=p?lMuv(sfZ4Wk1KuzrBBQA#G0QEIH#n`_QgL1sum4$F7?8M4CJa&7l7B#rDAhZ zndF~pQlqMuy7w<)jvY_;$~v{Bg|Y$bQyDlBtV6oB1dSE7_6`kPUA_j2@gm@i=(4p~ z&)p>rw&t-SiBIehvFVbvPE2kr-TVof`Pk{1S`$Lvue-E|p+SBKNC|AA-}wa^2TGQ5 zbj^ha`2AlF1eErelh1?nbhg30LdQpUp|h@N-P z+hT@FU|kM29u#aoYNAg%&AeMY%LNSO25XwG+$AVn{j0Y;&G|7IYl9yrgvxok4XP*m zY~08ps$*t6Jro5aHGQhU8#ZvxQ@E=nl* zfH7gyix-dByWe{Tlb_u`wPGLlY=#7KFKyJrH2^7LwWMWvO@`N$C>>dk9PTsWTrI#D z7+K>?e~KB^Laeo^H947@qq}DmA`sOGrB8{A>Ol;cAP!?!jj?F!oe*ik{-hz#?M|tm zh!FpQnl{fle+n*ir<+hYXk05CHF->2`X%{<7e*!4QE0`xdr1E0$8(F<#)>oaY2OcG%&VFk;u9pNlu1W(Sdlf=TgVl3(iub2E#L!a{#eTv@=1# zGgbn39PevMnSc#_P}^cQTm)^#Oo+A<`&igyf}5z3WG(;*8A<}o8OQtTV?P8br4AY` zVPivtv5+nvn%>}bAnx_S*i*?*tPgUQ+dRn^X36B9r*Ym$cSM3(;(MqcGw&zjv2MYD zHeT1?5z#rC)69GoqQr`lux2!q*T0Qy!TV2%Nr?(cWC%p}qm;9lTbA)&Atn!E2fW9S zPD`Jc)?C)R`I?vEe}+iYPS;nCgh?*V?+9A;SGov^aa)Wdj1K4g0ZVa57B!-B$V)Q_ zO|2y?6wT=$SFW-mCX>0_tQqMarHU6CT<|bRT^@VPY6htA811HQ{vU61rn2CFGYyv1 znyQDl0CMy)mA#fHK1*#496Y5?7PY+IN$X2SuRDBoR*f%^a5jFv)VXnKcpTq^JP6RA z5c{&>emja9?{{%aT~1RW=oe}Iz@KkNy=4qB2WGGN&C7d!XG$^ADiyZYoMu4Wt0+*3 zKWfNGeRsZSNsi8{zqN|p1Aa#B1?@p#eNX45%o9a7uFHQ7Cu+PzKF|TN>l~%9B*>yGS zeKFc#HinR`+Lpwwssn;X`jakQeYkp_ebb1!myl88ey90WR@iqn_Wvk454Wbyzm1=f zJr58d>=TAWWP~AGIAIwTH6REgVn9T2(5R?5don<#21NxN1VoG05eF8n+5}M%P$Qzn z*?DVQDCSi$nl!QA18^l9eD+$^qV9nLi4gB*&lB~caj%qXC)vMhe<&biJ^H%;R6@~ z3`02{!>;J?Q2(657qfxN8EKO!+9`K0Bwv!@clXMd)~KvV22v0JOJ&@5%bNHtQ3Q&0 z6UweXN|hYDviG#$;Wev@VW(b$OW_?4QpL9ur(chXUG;M*=egY9dO15eZ|C3U{$-J$ z=ng=Z;@{I`51emMUu*5&nJ71E9NTjl#}r6{!G@yX(Q@g_4R-P#W`maKD(F5kC{hd3 zhv696eL}tn(ovp<`G2GVu6d?;B-ph+}+~vTE=Zf>9HmvK8M~Vqep9@ za4lsy06SKL6dZsK2iD(fPSpkX%$UV12~IB#8pUjo{Hbs2HF} znn1)fHSh2jYdz`Z%bh!_a~iHeA5g0-6D3bZd6)8aIX;psyEJZca5a!lOtFH*43Z&` z0NfXAd3Uz+8VClZJKOnf;Q-2>w5sTi89_n*A%$~0NlK=e_0Y~Daj=`edS8Z&~-a)EB@$=&nt(XqOWY(z)!{GshyQRrT;VSZTA?NT za%#tFf>b8XkST<3`12ix&7n} zWC9zKQ~%AxD1<+t{(eMl@M!N{cxLLqH|miESaPA3J|?pg0&t0mc>q`&X`)<0S=I*P zC+?w^79QV#6c5|IypdT`_9V-Ihz!UbZS?Ov>o^7Tnmgi{JEoO^{y+smQ7Mu6?p8yn zcHUoSJAkCz9WP^T$~)$3$cfSxM;x*{{cO20q%A+-za7vRHf1@wNm-pk(z9n8*aW9| z#4~Vg=?s7BXU}tl`!}eg|KhecA&%)GZNr4 zfng`T!6n@NoBt>F#9$=PkRG*kE)9>MvYH7hmp*u3QKHd?d^I_H(?jl#kj zMvd%;7#V}e|7iuHB?g-WLCo4=yEWjF0J|k&q4qr(@4S}j*n(V^Gb~?wSGgKEJ{a&C zK=>wlC$M3B>8-34653r+Qb*e40&>O*2b-f3DKh(T3Mu!2qW~l2l|uvz^EI0BIT0vd z#)vbpLuF71KEoN?WPLbTgaz#}YkX}e`uTn9sgxbpAZvk6q6p))^4f<}XAV2q8S1Qq zSl8FFZa#1yf*tqH411$nbn*ue)ozX|)bY*K!pRl03?bZ(zrw3cIdAvm+#64H4%;Bt z(kU%aG(g&+XY`^z{w=KIdKxB!_oB#5ao#jROdtrw4olev?(5e!g~N9J7>~dra;Dsr z0(l}WGijJp^%82;=h3`wQSO>Y*YqaauJx?9ESt3_?Zxc#5-r?n74V1_6ek1i;6))a zh6r^D#^==PH^aG%J7LIPeYmsU$;N|rNWU~EAZZzI<%-j6yJ7Aj6=d8S=4hL+GM5k$ z@|r;F8}cK0_lxbW{Cv$ zoov=NEy5UPnzdX4wYf&mbZy~$Yb8CM5|JYVT|E{TEbBPYQ`M92aBD&$#2lJ4a*_Dv z!kfuYS--^TodVF=2?~dm791ks;7TnA^F_XEZIZ>-|KapV?Pn>B@D1c0U%qrbBtN&h zlm$>mWe$>IjjuM0)XGn*a6F`l=jTdn1hm)dkLN8oH3>) zScy+Y=i2>;@Bh!ls+!1L144gdRL5MTOS_+DKt_sqWB86ht*@(=ddQTMcT=+W@4!nH zX+K=35=|$)^(*kXk@rpc(emh!Rs5~*?j&m!U90Deyex{<(_ik~|F6le9kd_Qa*NRb zdljiv%eLgq?GybFv_N}@P(pr1xgtDp6C83FU=SgRC;rLWKFGO&{IqDdVCil}OUsPk z*S#tE@OsVv1$B>iy^De@s6?W%N7uWpSdcRU%c-BO}l~GZvtJK zAeS42VR+d31O^YzAdbiKZ)^Rvaetp_f1p76pA0FKArQ#D^wy3dw!PH~2jQn*#$=aU0oTNO@q|K>IGHovfrNZ#QC32~0=)_rGvVlv= zWkt8}j<#?a3d(pm7>9&dr5uDmON$F3?<0 z(+UaNwT#}bN92MN9wZl@9TUrCg_z)nlb>{+w2WNhR!!&y8oTc(^Sl-wEcRy%bGuP$ z3F`Y=&ylq7oWF3k;b!a2ANxH+_))nh3YS$|)h^m2>HJ;fe3}%F{q184_@3Aqr=Qn? zTXl)K8Yj3xPx}V!{}*TFJmHZPJS`55f86=K>$!*<@veq+z}S|nr%F(gL?3P~UOxHH z-9O+~4Pr^9vhVor$9Ebdk6o{ywc8Q7 znD+gsqv{q93Ymaj?;-z)Gid@=jp^#f7PrR)*cN5(0GL4r7N&*AFdcpw@oBR3n)I(W zRs7Jx!emf5ihR@43F_ruJ!OrSOhAEnaXNl3vId}HQ(Bk>MzJk?Vb}95p|y9_j{kZE zo}lH$={gX{AXK~64`4t)(X9#10dOI&izu{*0T$oE?JT+W*#?#oD-KcE^~zxA zvelRmLikbNaR=F#xv2aah5?aS)ioI;ltDBg&st#R+>}*O2?l9_tEHK?1qBON%#zq@ zMLaEhslZP^%YR}+LG)r=7sQGu1h23q4-BC^zB*tF8|dvztju90b4<=p3MI>by>RXg z=YuJzH&io@^9V?9o7hpWup%PJPfZ=NHq@1(v~qwot5Fa?*+bv*!(Ytf9M{3Gpkusj zc=smbzQ|+yBag*TAcjffA4RH1`^nWZK97PPpY$~%-643AuYCR#00#2FqWWv zv>b(UKdg+Yh(BoAA0Uu!d=BsW>AklWAgonEGA=*Yio{00a>Mdy9V$#YC_Y> zK3NRc>|+ysYjqOzR_Ni0#OM7F)&h0!)M&}rSZE(3M+7jK=00M10r zF5c49a;;CBH1MI}cblr(`T(gX)WiN!bic==KRR_DQY4bhBLW2ZiB6k=&#Z_S z@uJbT6YnQIEl!Nb6!PQ8<{J*{W;=d~Ndjl&}gY*Pn6M*CT=t>qlUh$vYL~rv}ux7EUam@+n@(1L#E>k z(zVp}LItW7S4pE^IP63zBD|&B3wmKS_xsad^np8Awbk)oCB2vKl%3k@OE|g}`pPQv zsi4>XQ@$O)ccoXk$M%@kZwYT`!O>lCq_2`5saN^6irFc?Ei8g^K9zLuAg{a&$rY;I zEcz=qLM8=WMB_bJEvh(RggI}t%leXt8Wlal-6SmYOB*PArU8mr*qs&g&-Fg+C$zyU z^sxLz)A2xA=-iE#!5<24{ge}fOtY1PWD?d-C+R5<0~LfKrvgny!A&Ca)G>`BtO)gL zlPLixC4r|Cb8CJj?3wuX$d_8WmvIh{Uq_oH!=vsB4bJU`;~v%?{O(MOXs_Y!((ok? z)3;E<+i5%REeaI6zZEMpb1x6Rkf_!BPp;d z0zu(fl1wTjRWMDJbJtfrKD7LnKlWh^Z zBKQ~cO4Je2vmLD}Sor{5*hUHD2bLM`_!ZkB*d0ItQZlvV9exTYR2(+n%ELKK4u|kT z`f?n0)RIpIYXh^4u4kO11k|XI!JPw31FPwYi|l{vQr4BPt|=}$$p80MRL#|oIaklM z&-2MjQOrnd4K3|*J8uTPMM{T$;q~LtaQQyQh?O*Rk2e7bh!?&7^~wE1x8#CB@u-Y& zKH0f89N8YxZ=gF)VAL3?f}Vy^odH=u#sbBH$Ehi2W*oVfP<7z2_tD<&fn#7Wq5OpS z69D);AC7t|Mh?po+``Q&|4yKYmdt~Nnpkm1L5j+Kg(|@1A$fuV{nj`W-}f1(hTeo+ z8pKpozMqWbU*qWSEc$&0tAL2~o`xo!B>%ii$RZ(pPl{8rcDt+81hyHcq77QAb3D{| zmqzw%069h43(f=lPr<#4ELQC`7s@o@#W|J@mpF}2o(AEDjJ8(gqr_LwerWsj;Z9e; zl>dEQ=ZP<(eVPqTKE2c@ydCzi;NT=tD{t5;!L94s)JZRo^N#3p_#>CEV5PoDnK{!T zrj9%w+QL=#5P{i&Rk7i@P=u@-&1n)v^^3Xwv=xVzzfC{<$oYO)SQgo=E}~?o=PX%J z)gQQSCdK@Tjt{9=1eGtUg*t76ts#!D!GE}5h+b2zbcGD8qD}#W7cSvF)vgG8xsuBt z1%sv5RS9hl+iK7oGm-?ga@OGE1lRWGSJ)rqKeU(3e7=2JbbTV%;rY|4^144)rrl6< z?4R6qrTxn7)}`SwUNs$2e=ev-Ezzv+`rS2&Ad7Rbkt$|#kzH=8h2zCy8BZX zMkdlN8S~_XiC+J&U;St9`a-2}Wea;%O&;Vj5$_=ZGeW_1A1t4w`!npUU-AC;iDxATgH05Lh9(3G{Y0>voP;8tP9pHJ zU;)*eQ4mD7u~A@eO$7cS}vmf2vBULfQy-2vX(kc3i+F0zC67> zzhuj&kfiweKYKc#^PDwc?{vH_RwpPqwIqaTE=i(R{5SvwN=g45UbhOMZ8Ad~!tRei zKk_T6hG1`529G$Bo+XjF?$`r_8rKEbk&nozYnBlu^b{{5v&yJSHo~jzIWK`~8)3E0 z=&C=~=YKYGGwKyHDN3}5tLH1f0;E_oOknN;Wy(Ll@^*QPI+mpTw#TmC-gXfsX@5pG zbbn*A(U)%o9ZaB`h`dxz&&1SU(+PVQxrIP=nMg{MoN5e z2Rxp(K?}QLaD)bQkQ+h&Dd#Q*Dt`Q_Qfhv3$cox2-u}DIS?Uhrfxl9DBtuc zwj-tJ$IhmWc_kJFw0vu+^MwW5{#hbqN=dFp0K^G$Z7>%HlC}eTcL6jx`26ib|035_ zdfG}OWdLK?=_z6@bwEc8nZXjG3kK&ylw8o;Og)0K6?#&)j4G5LN}kL)Gg|)w-Sa}G zsBvum{MP2hrwxC<&AL*^T_>gHVAO2wv22unh@p)UX~T)Yrz~yn5X9-I;XGKHA0kR4 zCXpi2T7Wi7yHBfOjv6`dP3$GQ)z0roWe8h^k-gBPesVZe3X)J@=f>R`9VhA|s0C2) zxB~3+0c>RQFcCZ#*?N>-R25YB^FM%UP1{D{sia?or{W=d6xIo5&3!zpQ(=KIoEoUv zziukRk^x{fBX})qB30H*AqoOEkwK{@t_q?Xmwk z?8QuQoYV2vO*J8y)?cQrdK0sWKKb$?nE!0yqtqXwU8a(E@isMfjYJ^>=! zZxZg}2fzqX6zHh3pLW~nVQ&pW1!00R`$;8Q883omos*y#x2<@;mA^8hj-Kej+=y>rKKp3OSOl2xF$sPyiil||7 zszOf*lR~l5)>SK-X5C+F)3|-*Xc041xZp)>2YESR5E&J0Woh;k*y*V_%my06;Kr~< z;M88A&rJK_lek-+LGbyZ>?)wddc`DD#U2{KPC@qYp`AT>Bjdkz1BAFCxXs8K!OouH zb3FH7PTQ#XIepKazO(ZmogMg<`g=9?(XY*omySjMd9ImpwUl*rWZ~6t3=YJIy0s?| zf#db$I*h`B87KA3Q!>gjjG5TVwEQVYesU;VFK+`#9b(|*1>i(cCkumne_A7zgWcCi z!KS=c!O=wfwm;sjE#JI0>0S9h_nY2)>$1n74-BUT@UxJ^fe!F5OMex#c*iX2+F9C5INSO`QWHW`TP5sY;oV@<=s#_B_`*9~3ahP>~V z`l=?5aG_$}Th7-pZfAJp1nbHtoNCFQGqnF)lMlTFrDuz%>&LEaaz4M2r4{$x4AH_j z`9#_SjZi)}Xy{Eid2r9N2!5R)<|TkvB=F1dT5tBQb#el|N}38R z{~^yRJ>B4IoXLJvZXHo)_qJ&IyK)YC&GN#|4o$I+SXXtnD^`A}L<$4@ag7uvtp@3$ zOOOVv=x{lbbe+rl@yPs#1+%Xgxgc>K@yxcG z&FMvngXhxvw3AZ&`@Q$f-~aP1&|kWcX@+Y=4EOV_l~_6*0V=;t627LaE4%%i^<(aT z6|XY31ynv{2j<>K+54BS+<f^-92+R1v6gK?`YhA;6Q5_9So}k6`F09Q+e0ouQ@(@-}_Tzkudt{cBjhe zt2!-Z+t1WFO&PmY*db);3pWF#P(ar~S=5HWyJvtajq3(cX}=U|>k6hKKMIVmnX3UV z3XLjmk18Invy7}q$tehJz&Q0sjPXFvx(zHU-!^J|#kxvoSG#7#txx#F%=*L3er0BZ z1a2)@K|N(El@GB#n5+&R%fW8)o|EVYhoA2sF7-WMemN`{ZAimBlTtlj@%gFPoql2y zE-hRugM;;?4MuWgKDgu&>2G4Yye|WQs!;awsa^NCpjA*KWd?-V&DTc^jG)mkH1e8 zbKbbifs!4ryDfJSZzDVcuD+MI{@FcNhiOwYH{#abgu`=L(geob@r0S`Z!_CgoSMt( z)vj>+(JD$@HAQW=$K*d=Mc9?^pnH!C<%MgSZgAG_Fxan zn61RxP|mzyq&`Sw59+6v8OLi-@>V%@GeTQ0W%%Ei8Y*gR&!uXNJ{!#NF?T|wBqxZ` z{@$e8GVjxMW9*_%;2v(!Q<#q7;XPVZP3E%vago`eFK4^^WQ&k46@}~Wcv;_?$OOWy z>1X4QJDf`j<9ljN1rGJBl$E~c)|wP5Dwm^{BZG#!+G$&>M9V`A^hr4Mmd;DGz-5t# zSJo~cm;5>ZRQ8NjF65O{L=t^JCBAxZrYLXKlgNM9F5CI^V(G+SOZ-iyi~rXD*3`9m zi4eay(hAUh#+i|8)h@7N46fjaRLcYN(q=aKEguZu*L0(XH?`nG>(nJLC#AVF`2I2+ zY9Bl(ZH04bL?F9s>W|R0U5$j{fAWT)-3+qCc)-ryA>Wzra%o(7uQ^ygk&UVhS*A;3QWo(${JvaG+s}mGXwHIVgF!``_6E@eii2wFcR#@iM!LAoHGCMf znWJ@F{DLw$In&fK!qnkrAbls2!q6TD{Wg14&(%P_a=-0&Rd^ zZ*3f3)DF)vPCj5hk461&xEtLf-JUl+;+ zR^{2b>J+P!v0sPaol%9|@aZd;0hSK6ot12D5mc#@Z*3DSOcvevwkIJ7m1{^l#gb!X zjYX}EYaIQL9N5)V$Y0%>oCJaS+^Dz(<&|FfoByJR1xk+bQd{wPngr;Kf}RRBDnQVV zrj?epsw$!8wbi4oZ#z|zs2##u*#PbAi{kqSiMVoH6}m;Q3%}rCgiX;Sci1}f2fo9tHkczvW`-eweH+P15 z^-Y^&p~T1B_tdKIw`qSrm8FsjG*Rw`!;5}>`6bSJ)BEdxFaB{)_b*?z{^z%Av1&D- zWV$p>TpL~)xg;rWAwa*rch75~zydq*kKN})HqYQxCGO2_D?ijCOF(wijk=Uh<_oS@ zl1Mbd;jXo-z^T4iGn6o;q5Y>F$ESQ7sI-#x~T^P}XiKeP*Ij7-c5A3&M3Zc3<7 zp-u<}K|q;?3wh)qOyO*l(bqSCq`g5WNWRxqRKy4)a^M9%#u#mUd(eMW3Am9m* z?X`Ng^B~Nux&8m#0MJ(s6klMAS?|Q|8Y#rnqATr>Ul(rElndL2>8nbY+Bnh}nGLm8A|Ud0oC`e(Ps8MM$+^iFIPgaWM?hOkh}pKzgKQXqiOCrpsQ^yKiU@ zm+DAix=P+ty;?}zZheIbY@b^y*9ii=?GlHSYbb*Aii4k%<8$6^oV)qb@S&(N?^xT6 zeUXEPfbRofyvj+%H)%Ure#L7XE*W3N@o0=%VTrwuLwVkLI8<(Oh<&Gy=oj!JdoYUj z5j4I4fay9QWExtLQX<}Z_ZYyHd}O6srnyhqDI;wYSTU|zy|5CXUJwKjhM`&9KWBEk zMD_rHyyH0017!$ABtov%MVmOD0h`llFIy-a0HgNx;t)?p;PY1?@*8;M*f7<}=em$$ zRV5`UHy^KhVQmr7=JN6={8E6t31da9UTLM$)P?|O)DF&aO0?uy{5{g+Qp31ioKLY>H8mtkOb^gWow;2q02Tup2w{)!m-qk+a2y1eo-n| zYZi$L9Xg4>!d{SST|3I*MN)#$;`qj>Mcd~`N@5E^O2~~cY$XjT!lNY@pot`! z%zkIAh~imLn2loaCVh*W#5i^An4W-q0YZ!mQP8E8UPdU;3reVwVZ|J?jJ1$|jVUvv zEBwIIoA!`5YhilR)`yf*46-f6f{A{FQi6ltvvEr21{my40UTs;wZU3&603`=M!W5c zvBv^<1cJhk0g-_B9&(qa2TquI_wUfL2AqBKsmiIrKye6#5mC5-8FEKy-vdwraFBK5 zGQ8{XqyA}pX#aiPr_lBmcEG;igR6c$wc(?9y2inuooyU-muuf70-KEK#u0Ln~kYGSi`qsGe6ab z;iUeo`2s|sA5rODDZ>iCkS;Pp$GJcq%oVlJgD2vJF=ho}EvCC+(6)m{YV^2*6--Gu zaezU`o??H>dc@>W9>ZI5z(I;9pc4FQTc+$`w>IfSMXpVfWQmnM?c$9v|7{vobbnN- zp{H+b{x7HIvx}tH{$2E8O#?Sh9x}kRhgM}fgghqu3Y9JJrO!C-UWmI_;96$n;eR8v zyFso-L1+-?Rz}6>2xBCSwW|n#7an;QXX$ZzJ5EQ$E<>s~oodpkDn+Ia$bfHKutfGP>0a`^aDx$?q&Wo#j-K3B~hRhd%uaA|5WrsQaq zVib-Ssr^j81UgS@gfR_e1#SCEnKDva9*2-pJA5Z;->&I3p`GAIe~ zvlfBY6LEGTaL1HbO?>RQ&BYAQiuoCd1P9n=gnz%eVcD#~xmpch(2TPwU;SC-UHUZ@YT4(Fcx zOz89DA^u4p)8Pa2uPP0Kg5^;Skn+H0+QN2=s{P75?tC{Wz!S> z$?3QB@0~8Ri<-~9r#vI-rkqwO1DzwT&60f=VzZvN_J^K!P;A8qy+GL#5m+FQzwnW9&#u^ zq|TB`iiT9oc2(R29M@hZ7UGn4JOU{TW|Vn}b~q1taw0aA5kwL@qyqOdz1i}o4&Qj8{+;Te{Dxi_V-(duNHSVpJgeZW<80Lh6s|m-#6)%pFu~OV| zqKh^)@zaXyr;>jgS6LFz)?epUTwmyU(c{-xrxum_Kj6zMg`EbplPVw1vX~ z!GO(Ry60~@@6RmgFtWb%?mC>v@r&RX2D%JP|5Byuqr!25mp^{dcG}KmTka(`-kVc} z57z_ap?m9|H7iDeFn6`rD4cDkWShua{8E3AY>yJ|PN+~)!+-VCUhx_|>1+bH{0f2t zC@3LZBZ^6yp-PwOSgG1uhI`{_aq_ti`D%#?q)PG15P&LtwEMP*Y`)KkdPL4#;NKPp z(?Ee9Hd?798nvGmjv9gj5cnMl7Sm#)XnW}Lj-|bZ_+da~hD4ok0E(Y*2RV%(6h4`v z1%zmMBz?Xx_@)6k?u6HE@lthyPVI`2^&lVF?KpA94ye~Th@c*& zNC?N$R4myPxHN}|wfoa$PVvZ#w_onRJ(44VghN`U!qYr=3Q2kzTdH zvKr=TAPlEOq7SnQDXDD>)AGTBSK-#%Ks!sgUBU3Oq}W=gzQR}1tS#3a*Y|y|?khX4 zxV{u@-u#r-GwaIBK0U9BtV^kT)!hHS4{XQXgzB>HvW2=4!R?GGcSe?XlIy>#B8?Ss zDf1GBo7)~W0|-tMzNqYBmlaO}_;Uz{R-oiLx6|Rx?fXd=a^ef-?0o%V*Q-h5MGJO+ zkx;akK0N^Eemd-s34PsKH2bQGup=|Hc<@PpK!nayZ?F$uY-+MoulUi0>U04_VTcPd6 zm7J$DI$~q~^t(Fr(0Zyrkz1bMb^{7ocBSTCAuD-3d+e(7oo^exOl@xbb~qI~y0C}r ze!J}E^WTfH?Zp)&8(v}jy?OvY;y&rfpPh|w%%06(J$AF704T91CB0~a)fh9;U-sZ_^z}bdg(BsjOv6v*DCU?yjFHi zI#jX$b9fMjP*(#p^JI|wlSc_Oa;kt*w29s#NBSEYAEtjmS5mCd-{|S{H49%CJ_l{F z*rKm9S+x%roVc$4((6b$_zQ{E{UFzJxbJ31pF1$p`>gp}pvB9n_D#?>F9)qEsk3|` zP&Ri$&3+^2{2|YpQ2P{@$4H{1%{W6^k>-Vi=q+VtUgl#rK!&Hbn$4isff=jrp83sR zrgOGk0MHowzKp^e92uRryW_oQ==-I?tLkz9pj4Syp=NJ{Ksr!MR>um}>a~j4c2J0& zmGZA#Kc(1kvbmGKaAzkh%r7rHP(JnTeiqj3XjDz_?9{CTa_Gp3%i#GFhwA_WI$dr% zq_8#iUSeOg!@(r#EM4~Sg+tdDsORrmp^oDd4~^OB`ES2cGC2H8zizpwGR;~$J7ZV= z8^>qO@f$N7B=m)&<~e3F`!Ds}_AgmG^WrVE8IFRbL+KS1W7~g|X=@)|cHFo6PELHe z{Em={gQGx?39>K5r{=E_{P~+`Z_70LKTk3x|=3k@bmV-IZZWq-v7@lUxF^|y}qiTFQ!8M{4w*tD1LcXq^p1A z(v>L%>U*hw^{HYqK4`!g9hqPPONZoXB69W{d3G4_rm7_q+Y>ug&YhsJ0T%XvpzNdF zTpytDS=_YkiI z3#7{7Whf;|q_8VhCt_eR* zN{o|*o5VE@-lzTt|9**wl1O=L*UKHIW~?hSQ-1&>MP;eFFLSXn?#(X?|3{jK3?#Le z699fOid>!w+sNvU=$J04NR zYjb2F;lh~%;60M47&^OPZHjx;WE<&LvsO6&gg`nf_*%n1GtGhjVAJa{r^ord8n3l& zqeyG?>XEjTV0F(T*O)D=j|LTu>l#(6N1ra4&gH9eSy>^4#)A@>+4wPNx;27^%e(c% zaTKPVbCsHKxkJl$Tg{J`BZ`(=qzT-`@XePIL0J zs?eUSlTo>Cq_Z#?(K>~#6y3tGc$-OKG?8qin+!Vb9mVSwM?;a&`nA@V~-HG}`ct1a)H{s&^ zg;kf!B+ezqJvWeS&PT7D_;GX+cUr@vcv%_CdPaU}ClR2U9SwGKV^`Y6v;5T;mRTVh zikQ^?pYX}bpKno|5>6jei+ME-p9aAM#s(ccv#of?-EQyZJ3pSCAG^rStL@;_J3+laOxjN?k5Nqb&pK< zC72N|(HhZq*G(i)ACGV*q_ji>Fxx5Ykg3PjeNr5QYnQ3%@kmcu>amlnNl26U-s#iB zUcKk_=`2$NzJpfYwrBdZ)aw|{Ga3UIm^7a`*KOzZ=SQSOHYP9J;k<_Cw&i8ZQs=ny zb>&Er+<~+&hx0Mht*&#%f&cSu7OYPMUUJb}NBpHUT77P6!dT`aL;1h%Q&X}lUu>deVo&o7@P#94WLCu>9o ziAORG(4X@2T|=ewdNSi&+AO1D1sV6f7GFuJ;0dOK1LA)7!`LU4OSSJgY7!+unnK#$ zoM6pt{>6+TJ3ZaI;7P#94Oldm@}a^lYaLdb^mc!GCHTKS(fo{$uEjK}^Me<#cX+=U zwg*z}qW?-;eC7J1-#m^4IeB~3EOgu6KYj0lqpO3y6|p|ublK3kV8PNgBU^rAZu)R5 zj+Xen(UHXNeEoR2hJwA?n&5>@DA3l|^q^!y$=3i<3UDf|+GZ|E!)w2^wlF+;UV&qp_u@vTNb*%Y+YjUZE^p&$bpG zI9_Ceu3pB7%`oVr8BTd3>h^28S0U2*AW|C1LyOY54{vik)6&rfVjdzDl&5MjO5qUn z9cztEDNJxr0xM7r@?NWFW4Y2r`h@d;E zf~tKDrl5&D&OVw=V@0i<*N~Mxeh*%H*VE7a zy2d;0Qw{9Xi5b6l|JGv5#@zyMfypDlo@kV^PJi4jMyFt=;UxRf#Z+OZdUpB?yH*`p z&}~%Ea%53{dYruyS1|H1>Ku7jX^_m7(AgmB>n*P5H>&;?shgRIepT98O~HbE4_YY~ zXw|c` zQ9Wn#>79->8*dSHm7DSKF82TuOKK^KcrD&J?f1iXw^`OUzew5nev`fHux)_nt)E$Q zm4a=deKp)MwaRg{zukGXv3dT+CfbQldrp=30O^OV6$XIOf+Q+~jUY{Kf}AJbQOg}@ z^AcXz)k-Jv3pJqiGHp2z#mF?{3d;8;@YIpv4UsoV_J{SH%1*r>Xm*8sWdMj`00nqT zQ*$QkPu|**pOfe8WGfJ9dEEOI3NLO8zQ!LZeU-kTy@LoL)m3@;g6vT<2J z-=Rms$h^AU&nD_d%0e#o)&?tGr2{r-#m3E<$JSHXqqS?d7}YT^llxkp&wa=W)_gUz zeH?eNGbfY3{x{Fk3|()^u*}_SVarMUao|~aFE?q3xE%qIlA(tsTBV@y87nSd$8nLb zBl~Ej#}=b7EESR~7R6I~Xt2CY>Ealu`llZNVo$9H0ND#ro(}+~o`eBsoGdj#Jmd_p z90wWoIGD54K=u?_RU}K?-2{L~gSaXpTFkX4n0CHqSSV7^y|hH{HAfU?=e&5%B)&6O zHa%khppVtg@V9<)C}LK|$OhrdLvPQl-ZH5Adf~6m=5tR5;#A*_?_R+NE~E6_eZN!p zBck@CpkR_UUU^|3Ud-o@ABdn4ZxhzYoOq^*~jaN8cWjXl9-%1KgGnnyfN+Vf-=^jwEZ)( zKWh%rlJ~g`#zk~LZ+;g4eprQY1Xd$C&F>$5*lHi$pZuNWhV*1iZcHtdg zRNOc|!=j~(v)R66LVpF=km3eQAX{J&MW#Omy4VuoDhHH)*{}nz%ljT4a8f`C0iLA; zl&7gMG2QjmZyuXTeF_#n0?FS2bP&JvY!`D7Bgl{z?s@%-D~rI;zEu+#>FacNjLdn^ zJv$^W%1fEFAYpf6H`xBnIRthZe8wI#u!t{(shsL@a)Ke)6ROfSIyqz*!WVQMY1B_x zeHQI>yR{&wN>6M_K9Bd%MYxNwGI~Nb?I|4k9JVTc5&AHv`QGyzZ=bv0V!2A0E)53? zUb6aH26)E-(;3_%YbklPGxfoXZDa;86`$ctql-X6bd`6#*m@-pK3h*>&9jZJnq1#C zwNUI5-o_bpQB4KN3C{*MKHwS+&V`;XLk4FOlZ%PD<1UnOeXZ~RQFQL{Oz(dj|L%UD z&2{c`$(ibXh1wf4GPP_ zt8S08KrhmtEaZqW6@w?tJA`4gMO3KlM=7O+hdP*V$6tfdPf{oGS02GBGBO$A6&rpYVGkO=gS3b{oF05<@@ zgE;&y!Zbr+j7#VS2xAbPIR?=ILYQ+SYn6JKD0odGfk_B`{U`iB(^c6}3>AYqi;&n( zh!1U`K#P$ap7R%T^rJdHv(od-5LLocp5F@Cyp=5=6t$h8^f~p*4;t*^4V>R+Wq)+d z9^bG(L&g92GVEpQ=)jWk}$#C=>$2m2@Y`sXt#rL z-HZa{M?|+a;}{jdQ{jtpg71KqTLdANY2fMV5bKJfLk)&NKi)t=_LZsaTuloXh0DRj4o?1Y>;_?Br$C@Q^`ce_KL5y#l}d*(Gyo=(%sE=hLeljzc3l-)F9|qtchHr zG<+!9wX@xMgbvSCngiu1W=y>ifx#jqs~~6+T^f`M@1uT|sHRKQ#07*7QyY38HI})$ zK^K{g#4O@TiCoo02t{7h;_2cO25K)#T5bEaNepT{#D)XgQj~?lLk$KP?!0V|paBn( zkIxBok?4x%2ERQ~6*QeOoX%bky}Cdi1A4I`r7B>cwWfywU@8DzlR1YmSYudSC{~#5 z9(!dW>&mJ4CLI?;w{@4H*kO_<}IAl}bZx;Zw zpUzZMhW0x5iyN~X@GjRnrx4=0jX8tQXC*eVuJ&yClKOz3Xzibp-XDR7?HI)AV27`+ z#Gev_0kKRQ?bi|kR=(j)4sGucAXNSSO4j-7xJq2Qs(%P?(yAH@Q9JQTmBZ{^ zZBSjeu$&kNi9W43)>6j;gECg#2!0eTn51MISru;I=+P2>$-3_)Pq0WJ7ls+A0 z(7T(VH__J`$Cb>AZ?fDxLgAWm6J6>IP`v*GT+LM!bJ8sSp2>9I(TbAkq^0G@i@68A zn1D77O3#LE4vilTd)N5FLN#X7XEPT=s>_$h2=dRxCxc?5XfD5IJX#PM&EcwbBx|QL zwLyqUXS-a`Qxkvsc4p(H2`_m@Kqd82V|pg%eUqt)hu%iizg{rP9I&o~7s*-O^l*=( zO~t?bP&$IMmJ`t4b*^f+Sex$hST$44O^hhFjz|08rObU3E|wO3+fJN;QwVA_QCeGY z!7aw~B&L#+gzDr%h?+gP|3&`LGY>d}dV`wx0h;`c;sV46;!(iwYrDsic%L8rlU zbx0t+J*s8lVgtH5bZ0rBHio*)$(xU|bq+f>9Z}secXoxVHHP?_`HmjW_14|BiY_^g%lhe(7U3#}asX}6)iOPoC<7J5 zu|Bh6?aF!G%6V;hci)7BDvsIsH{?=Hwru}7qQL_bdnrg`^oerRk2189$d8@5Ca)A3c2|Ms?5?=fsLq2JaPs z9}^S6X9TfIvz*`T37LGQOC95`rvZfb0BM@Z#53T^3`jbNqBE$q$X$ObqPiK{a#W=< zBXtMJ>A5ugjfI;4@O&_4KrVN5Fo+%EbJ5g(XIx-trze94=MMxPf4vZPZcg=c-=Vj2 z`se58dA5hqi#)OCt3tcW;Ko3ZMC-HP26F(j_W#+ra?gUS`uCW3?17gp`qWG{SejN7 zS0no~*7LCR+@_jE1c|dz>xs|fJ$46o*d4U*%lEe{j7&59De?a;i9XCl#fnihiKy+{5pj?af;V_1cGekMB|E{*f=1JSpJ#{M|ZO3YDR6QKBeI4|NxK~CYxiRUK zQShpMYs6pOF+Ic8f^I!}-!`6$L3c*W@2qP|nFYUL2bEl+6fLMSH~$Z0@nQgY@s zL+QOa)9I6LH{|Ab4TnH6ei9%Ez%&{}*^MjuB++V#M!g3JQm)Q~{0Vop5v^*A7{BZP z+j%XUtzl0{m*}P+v0-%Ge<3lUyEqg201gm80Pgy8uuQKMf=6&HTP`^c2=o!IB(oEF zof3&=@4vGj_am0{+m}u1>U5eAe8BPtV|HJ^4fylCUbjO(Y;?A>(XRW-H`@a}N3Q(w zBPVbD$JccWB51K3Dng7?1mYddh-}XIucFjb3?OI8u$@qp_#Vv({*Uc%;;k9im!{qfs~~*<2pc3yL2c1lZ9YtP!_{W(Kg&0%DXsvl zL)=DmdABFaHprEXXFy0|UAim0=hxY9J>FxU9wv)kp?jt_xlNhgl&Sp8uiqz(o|ucD zIMK6wIjdr6d6(_WQGzpo;heyPGBA^Xf%5oW?ujAYH3!c-HGUbJZPrwF%sJQFWREhh zoX%(}%%B+Ck?;p1d%U7|e2eV9qc@RhG7bG=fB(z2ZPNdo%=>>(f&WmGjI#6Cqw7+v z!32ar{XbBhka%*Ul#=y)|HSi41fWZ8QlgeFXN!Pp)7+cRt~M61)HQKR&uwoGXgB~c zWN|NAUflO9agf&&h_i(5xwW=tBq0Q@s-{X~gKRDgg~W?UiuI#fx@n1X&7awlOt_Xd zClmxDA64Oq4s&x<rbK+be~uBa9~2L_B((-WSw zaQ`zSggT_W8@zqN-Zv~HV z+D@hV>Sfb&2kL@5lMcS-FbgmCYva zfiuvYw>>eFPb06CT-vz9k^bsSB(ZL5I+6f|3iTf#RrDB>&+ws^LsHZXpCtg#*{}z` zKHZ(&k3f=ASR@7;$hv^}<`goRxbOF`v9JGk1S-a2Ah4ur)5bGIHplpKdIakVa_g#R zGu%x~>PNW+Ei{uX9A>yAORCTfpTVwIjqClfVBS8Whto=VygR}2*qo%jMtiAhwC&`| zs~EYv9_CXlo}lV`Jy8BqOLSTW$M{T*=2hJ>hyDijP)>8>=*GL#Db&!!_JcNl9V;@J z5gY^OGd}YBb~5YNB5WBK=VX~U3(=!4thGQ4UB{YGmd5G#U|Y>kZtVG0fa zw1h$t@VK9-A_b_f4#c=jzW@-T6_9)ZS_>&@QFj#seK@KaM~}2Ix|@AwONJ*E0M<C9%YRE7dL$Qa#OxS6 zyp!6!9e(>AeakXBs#KRAPhHQ1ILO3zKG8ElI5KFZuP|q@oRN72<>{lSy&~@Sa!N|8 zCA$OrYRS=yb34JUI_-65w4&$Oxtf)j1~?(TO{@>p2C|q~^K5BwN#^)1)8wiVk!zii zz4w5UOUsBEF-e3EQ=M-;|5oI7Iaw=ZBGaSh*xQVRhqss8ahLC(Y56te!1=CE%+)P? zEHvBD7&KNS_b|P)Dz-oM{$1VegM5s&tQ98M#Iny)v=|cP)SRpXj@KMt-zUY7ADks_ z1=`=0eEBYV7q1QinMiN@HxB>}nVK z?~*FlrZ$Bbn7xGg)Zb3+6p-sggXI2km3FO7sXIH=ipOV(=>u7ot*`yIW*DB&d}>8Y z@ucYXN@+ts!%6<}aJ{6@C6DliQA0aw>_w$cn>7ya$Y}ym)>7`&O0K=r>z$WR`Cph? zEIN^`F(HV}%3SzY?t&d6!W_iB7$v~GEgMSmayPdW1z3MLD~iHd9&L_1Qev9i{u4VTTh^2NEy^U`yKi4>;9W|4bK+cq_TVILtO0?DP#*!OWZI7z$#aG#=INE`8vR~;FbMu5l>mw#L72i6{Jo>NQKgV z{cjweo8WKaarS_Als}Y{tk~w_RV^iVWq5a!1C72bl(%Mf(>cM)e(UO!(h; z!XLC}hZ5CaAJMAJL4~{rP@B`g-4qrNxV9LsV+*RhnkY75Zl%*xhuE%~*OuH9>ilrp zQG#Zbp;t96i5FfMedTAquk!pifcu&yFPZ}CE9J)&pOkQgV3|w$MiO^1%Hs+yR&MB~ z6~xe?y@48T2!IBvI>jr@v`Q4ur9CA20QfXgK-Cr&Z1x~iM$ewRq?s}l6%`;@oonOU=E!ZEiG=Y6oG~-EzJBa6i zfVLbaTEMt;@#EKh0=UbOWR6+AyWq4I0!mD!-dBCk#?g5VL=9;+bI7_?Z4NM9=MQgr zYot6N%F=XU7W?=!tIZ9uf!_d(5&BRJO|HJ^@0Z&>!^xw(zE)dnt2*)7P$Ot*+xCu| z&ho<_bx=$_XHHDcj$9gY{?^|AbaB94Q|rr(<3;y`hwdh5YDkBAisq&7r0bES52`n! zM+TNdwEiAij$X2pIsBdBtMm4a=x6!m-epV1;XiNRzHxa-?IRQNGKTT#(Q%dKnm>2n zC4=nDM^y~2_c{Rr^(_Qpb~UB?QUIj`Fvv~U_{MlB={PC7CYb$!G9kn`@*c~tTk?#$ zWp_L2QQ@T4b{Q39S9V&wDR(oZCU`!!#IvLWZ<#k|yaTnv#f|Dy*GSR=kC$YS%Rk7VZ?kZVxFLBvGGhbMB&F3JK%xcnq?OE+i*X zVX+|8QHBA+k+avy)dFC|2o)^8%6-T6&89^8s)EKVQVZ4dAba@>i{{Yxd5pCPEb0m=Z^` z(GH(6myPu&J|e%@`z9I4#)>pIF|gK_q($~&fT>W+RyZNRZo>q(h#+`Rlzgi@#n8C` zDpzvV`$(io0a;E-3ob#%iscwtSOL4)kHK=-T^x#)y&M8tluxddAr}FO{OSP;K#}6d zCuEpx8LCD=sD<}pTe2xLI#m+YN};lMxVop%T73p(Ex>3nQ6rJ0IRQ+Ant_FoehB0$ z+ABx4?MEsk2{_L2H3)4oHIuCqovrce;m&a2sykDoEgW+PlrvbbuU#)MQUW$N!&8cX zU#zauPDt?}u+{b1YXa|uuoauEsyRm6nacM-OeL4_)jRjc6Nh{o zScQB)M#ZR zcbjVN8=n=koMiJqZPafIofT{?6ps3-1!~Fs)gwh3HF>v^M7x;_C7xILrwqojS^x1^ zqlr{M0h>I{fF-eQzY3s8b{l2cFPaU3vu8%eKGs~tB(bxg;p&N8va<{cozcpO?JDxY#vgIlz2SpHq6=rphF`{gTpy&OU* z3s-J~e;$8ihUo01U1zDB^|SmIVR_xTN3r&5I8{^drEP5YAGk5ktE?M-GYcbKG&WIH zNcPktRZ=r7Vu|BL02Bl)1jKKGlQjT3k%|IA+_LOY{0xj&ypzY(pv>&8f{+^hc1C|{ z4#*v=rF!p+REBcNJRv!hYH}kdS(Kk_!X}0;S#RW#b%guf^^`5j)lV#6bsYBGvS|w( z^>XUy@3;RzJq3$aV!NstAOGZ6=Paidnx~>S?fRhXR4P&DL&CWbik8qf4|{BSZfLqZ zqGvVYLCO(z+sJ-v_4DnTS7&r%fpbmdE;QX9Ysx}59Q8Fju57h_d?c|h8o;hI6e3EX zNx=Qk>xdVh-^(Na`xedGH^)lk61AV zT?Cj2W&sJ#NIYW&Kx*~jT6rcZebrHO$V3oRBSy**%Nc@$0vV>_4`e#q<$SGtCH@_| zt|t|v|BC{+t#0m5`-zF_6G>WQ5uE>?*GvbzzPhCPsUlg;-Zx8+0?w$Nl90275vTml z7~W#J+N*cYu=Z9Su}srcwDq%8(6-umroQFNw?i7JINw1OI^?^r;hur9F6;w zJ_uKK5Ut4$eS7YM_#;yyoC41MeNQjzez{YrUWKN;I%7>unMrOOA1b)AwNnwfUw!w7 za=!;NEbZqJF`FX~Qc`oUXrC@S=XlHGq8WFr{a{R2X8Ne=a@WsmzNbbB6yZ)!l|#u^ z*zvy%;_U2W0sPhX#8|PxysSJ(j*6lF8!ABX&%<02L#sGM>{$9{5O;#QrA=h&5NCj^ zIHE0`6f!V2w~3YBo-p4%F|!mS1iZx?NjXa*jMQ;$5dS-pKm zbv(On-7+J#A9?cA`KE z7M$;MRs_qHgAKmMFJ{Q#(K3`bh}s1!`+m#JMFw4CljDVQqR!t?yInR86+5QtUH?^E zO_e|IlwJGIu&dWuF+Wx#CL7gNa&Jrgo^fxfJvVO^s*45hzh#$XnzQUiwy9KwrdQfw z8(4Gl@ayB56Gqr`b|>o@c5v>YGxG+8hhqviI@Z4H0cDOsvPz@`=SW2%!xg?3vA#V#$ zTwE7uRY_ZDC*qdB%NZ{y5)7R!!;s2xhT(fm+8`WYYeG0FRX}b^!M&YUb@az23RT_0 zq31y57sp$-x|~$-J87R8l9tJM`<*O}Q3lvZMF^>htpJ5+C9x+s1hYNfLKw@O$o_~4KriInw*=CA^ECR8g(9Z(vu1wHoEKrisrqK3iSc5MbwoW^gHFu=gvu=TSFtj54kQPk zdfsE>6z5!@aPRw}F!hvQFOJ^*>)6)08T6OM^2vmZhld>>7GC=IV9+gJ`I)2x+T(FoS;Q`B>H&_gflIO`t+%mi~6Eax*JRl8Gj2#isJb5_!B<(ZZ8N zWhfy53fvQ<0WUs(<9CMC!qiSXDEJL;dKfo6S!In|l`U zRWI0~JsC?sKE0$x^^a7at@9-6Z%(P`KHWNh!9jd% zKFu|z>%y)0T4#q=v9c?&q1-fTk}vFgk5@B_WJKfz2pnxb0zaOD@_$#eI*YSYY@0KQ zx9UwBrrN{M4#(_y447{{GaclZP$T_r1{XSk&4D|7AhcSId{gV#080fOilYTd$i!KO z-HQFpphB}@I)^fS_+luC#gPJNBv~W3%6MDa`))8vK~_D))1fl_rZ$J>b=`bB;#7 zjOd(WL3~w1*%AygVx3>%ubD`Fg;1k5Gl^WZbsp6&QRqQbaWM)(;5qK$Ny^hmT{y{* zA*fP-PlCj(iSeJ_brHq=4tM4wf)Hxw$Sz5U<0Ad(zoTrzY z&g=(BCQ>q7VR9hM3r!V|U%{o4wqCI zaPX}4utT{|yM+2og{NQUP!wc*Zo@-be0!&=7U$Q4$Z+1%p!3CV24m~GHh+A+)3%d7 zqg-l8ItDGfaERYlPFV?-myyw!2qo)+{qrG_PRMK5wN-Gv3oUa!j;;$*Vi45og*V?y8n*TbfZk}_)fhZ~iNw%aN= z;?dYwPlnc2#{*Xt`5CD@udPn|p)pR<@+w0^Qs0Ezs>TX}VF4x3wG|T*c?WVfoV)qN zz3@dx$^h9TG^Zi!aoB9i@poPCatkAGeRx~@0Isq!qVDgudnGESH-Z7hcs%ZT6$Ob= zHIGtxF}MNQxlbzgQAT#ZhDJN%xu`70)X=sAE;_2liFQwy_?!vr%rCPI@zoVA(T`4S3EaO zigG9yUAUMaBbYX;Aj+ZK3P+un8=f9T|7=vh0<@S`j*2TCBQId-peHI)2`+CTE@t>d zJ1YTRH+L>zS{I@n3zuE61a-$UH*|ITlKLnY-ZnEvi zrGpfn=V8+oNjNj@k_Y?V1(BGMi?sYmzf!gbXG!j8rVbRc%5%ZH?CvjeBW(DmB)mYVaFRBl zJ-|X9ERYnOl=h*{#i1uZ2yh4HTqz^$?e6Iedr~YHA0!ghZugDFn$%HTcPoA~WE=Y8 z^1r!QdwiQq^v8owAQmg2(nW^xU#<-VN_&w95GllKm%4gPSTJ371g}Nz&{w;iSrqZk z@?~v(c<%;#&gT!OQ<+6?RlA?Ye-%PcT>wnZ*K-YmAaDz_o-4K$Pd4byy%`)8_ z8rNuZV0AvxdM_7Zau#OAk-UJ~@;uM3@7i75^mtpeJG$C7lN4H9q33OH6ZxWu#1>~! z#XxTOH;BSc5?B?)fFd4%Ms7zKTP}UC*_;`5MxpY`H@gjG5JJ=C*wuVr6O!)Qz?CdD z(l~+2#76+{gym-s1nGO>KV2=v?gh9zXC9~rqMJP?1*hGp5_~ zT`-=v7+3k$aZBIJj5Nb_!`of}q=r{Th-G^zhAE~b9HA+81H!UPi3eeEx0H6`POm*X z5VHw8RkWv0L-P{u_JFN@&#wme@|ME5P4nksNwYAktx)wO9G1m)8M*}IB(-v~HxQ0` zW+!+ftD!rBsyQRu05|Hg7;fSz3EL%NYNv9z#6$@(w7U&=0z~n*&gx3@m8|q1Jip%d zvNxBtF)-mpQG9xTXih;nJzkijB?D0M^te*Z3Y^N8-AA$j=yEVysIpVs7<|R)pOKoJ z&1?A@YkNX&2JWhXF{>w&AFL*kMN(iktBrgOOUa%9}ge4fkZVeSdhrDQgeHj5m0lyEL05 za;-O1*$Wx|V3#OCsCCgYRjlM@c(#65_2j^Y+FuwSS=v&xs4x3$N~ix6lD%!+R!D%$ zK^yYcZaFT<0181HPfBqAXOJR=2yIUtbnqmUbXN{vCbY9`dvc7iXK*l~SbH1%J;F3n>q*IG*b6vkCk_6}UA0 zu_f2c?YCl&ElhnxmS>SYzC*c^X~1I^2n&DDdF&l1Kzn&MUfW4n^w{*9VGg@`pZerj zSaRWk_uKdj=#4gJZ1w<`_|x8Mt!sZI8|iLz=)64G$H(0zWe8E&%bbvj7z8=jywp## z%1`fByZS5=Z4W|ORJ6-ebQ%YCgoWBA!oQGXJ6BMrr9SKZ0h~RkSS11C(q5a(|K}@l zy4g74`Iv5ykRyQU&>)U1^u7-Wxmn~Oh+@m&yQFY72W2&fI4#C!@ijL^r9h415Jp?| z$+_X_C-m565CP<+J(8Y>7>_v_2*mrdkhSh79qAAP2&rRWXZ}JLNb}4hPH~4)yU!tW zCQt1?A7L(l7BoSVZ$y;Dp5A!F;P)SDsBD7+T?P@xYHvBXN)U5_k2#cwxeemxI0QLu z;Vql^7R0%yg&bsY#SfHAZ*ItrEPP{+;<8akspz~IR6a+3t%pdH!aLc7F_5#o0b9et zJr((Nv2djVQs@WF^H}urH4coqj$RIosgncccEdN76K81|T`Z1JhRUL1J6VLMH1tk6 zBankqVE`}|9m_h=Z|^?%2QXl~HJt7~eiKlZLHw!lfgmChJjoUF3u)+6RJ1=U>d9GDLWrY()5 z$Y1y}e+L$BMoJ3Zj;>+h`k&#SuB7*7Wq+MN_Z}hG*AV7qf;}yR5TuYlNZ3vR!8-EH zTtm_=K4FnctOZdg!96c30+7;JtskbpOtucioZvSD&U%0*r|RimB$tF;7Xuh~`_w)F zQ*Da&22WmMAdJ4EM`_qR4w|=8N4>3j3`BFJ$Q0Uzf)1p~u?uFe67FKO(Oi z{P2gt5A8swh)ZtyxGGUvl>pZ?Nt~#<`21DD?Hq&yqDJKA;-k0OTc|#C&}GMrKU@H- zXA>T+aEe!sEuW2f!onRHh9v2SEo{Gh)W|f4gw7S$<}hH=qo`UE;RPEvMdmz$S1$62 zTPOr)6FF!X@oh2|Y8t1n!H$Y`$E~YE-|hd~nFtsH`72)$a}GtBVEzI0>+l!|%jbPTeU& zptQ3|m;)~$o|V~Os{&8TP-ilVllGy7QgrYvBt(judcMDbbnSg0c3j--&>dKNH9%{s zq})gm&4~LoS>hsRthYmGd59n>W`cngnBvdK(1o&_d9s`NVssAmWY03YaWvYx4$oX8@VgVO+^vEO^7F$ljaFU4 zHQ_!?DJ+SJZ#kD<-5~I02=dlAIXyG&dz2>I8D-ra*4MpD$dw*+sq!zel%ijd2we)62RMQ&9oq7OaCxT!758_Bd68}0UI(}Eq@NizL5BfMi7J7tfzWU#h%I|p=%gbl{DmwQT%`a z#p=21e*u-KgvwbPj3eDM&cAT`RcGky3-ddU>XQP`tSBU$!*}xWw*G`+R)e|S{rD}x zn@xA8&zwHr!N_aB*g}aK-`BPz#`Ut~-5+Xm0>ZSopo1Th76aM9R{t1zedxhSxu2Y0 zx>-aA;2cOHoA82Pg!(J&b$9WnrlM`3mt#Iq9K3Xu3=+e zQr8E%+32XfVd*l6_lB*NE_9rr=YCgZF|t?VGo)e9k$Fzrfb6F0`P7eB^z-=W@FGMx z`-mnRJ;uWB4(S}$xNunOVT#kk+2a>JY-t^@zE?`Z6s#zmn!aaShxgCGjnFclvxp@@ zqc6vA1&<7s@fxM>xVtjklHgLe7&pfscCBjb72v9Am;eFfT=EEVHR?_D^-!q#wV9FM zcYy7T(Q)>fku}VS$;4fsu*q2LMjwK%Q-A#@%*Bwv+llB}5_Tde=8?&OjZy|I9G%V& zfRW{ibOQtXV}0yNob<{1 z&%3Uo1e1Law}^OV?8$=4dxe%zbg{HhgvLF(Cq)~_9LIg&+%n(u^tqJq0&L(ECSKA~ zt8j{9df~2I!d10Q866tOSd4u3uF(X8=-LNS?=+gZuSJIBsyeco2-R7{sXDs{9qiHXpr#X1y5S zd*rTIVGbzH7WL-y^)rB%nH=041BZTvi4;Lj(y%3>Cmeo@!R?kDPdK1`&*Wd|$Jd>R zyOU;AsL$MF31MgvcmMvsW&d>Dnq(8E1cYz6sU%oinAu1bV|Kk2#Iy=x3ZH~-Yw{P<+v4oJ@YMTVWJEv2;5}S~M*tUiMadyTX~gSgEn;<^$umLmqM7p(k9*<(Pt#_Jz15KCX*X z(MUpf7hKCEp_ALj_XseTXb4SG;QHMsaqN17S5N*7k)fQWxOJ$30BT5s5_FtXLDUJV zR>>~~EMX`;w>2>MG%b#x@pk6&QftbVt}cE^LLR1yfit~_)6*VbB8_96MyNDM2i-`~ z&?v&%$Z1*Q_RI#2X+YyCkj8pHG#AFS#9m%mzw)4Uw3bs`47M+Rz(fpBpNVfLz3!V> z>#rF3U_RJ9DGH3;{IO(n+yf7~d4l^&DI`Fe8^poPt+Z7NrmnEDjiijqi^Ia8BaVNi z^BE{$*9vs~uHj}l(n|W_f*-E!;mkW3$rlwS2fBz+WUE(7r&`H`i+g_XWGkxLI$efG zE&uDeKO2lZPbx6`6Wb%0y|L@lk`&iNJrvE4viPp#k@N1>NzJ>DO|W%jG7I%YK=?R` z9y=wfAx%8k`6k5nIS8VLFUt4-JFgxG;^TUrogAw$BQA;~vStXO&rw#KB;`)2W^1Wthq7k7AHZCGPTz!jB)W39_v_`qu6#8;H?hA16pURQz5P#q zlXU4ekTJHev-qjt=%j$KC`zQ-pmha1tTf;1I=)>XzD3DHR}4r1AMs4>PzNCYk!PtV z$M5nn=zK%~KmJS?`UD^w#FCO_VlRJ@Ktms$x5&ELKXXQf6xhoWx?~rS_1&T-JRcUS zIqUvgB==$MHt_ge_G|sx(H*Z^qaH;)ZrSrFcxU2d$I*AgoJ#ZwH~BB}-VCcdv=Yl7V(2*JG{+%Nu6WgF7v zc2aRe0snx@X@rus8;>m5mZ4}FF;VHK=g!3*!z%NvXIyfzWUSke(#-uw9ql(8E%)1m zoJKvRkl0)Ze463VhqbskZg{nc4Jp0m)#eZZgNUt1TcID# zMlJ5VFY{}=5m5x7qs8Iw74B}E-aoX~-!$;>3Dy8^?L6VxU+kvkl}#Rm<|p2Cxw=?n z)91Wp)zi0DlB=Cqo!_YHHx%WDg3eYi*}|sW#Gw7V>?3xQC$@l@3XN1^p_gC;emksX zYD4)BQDstrsgWm`>8P`BV9W9>DYlnh;l@Mz=w_Jct4triUR&s*!c;8j zzkPRbTtjfg{^-W&tH&CRkebt87cQN-!7;%VUiDHY**WMJ&fc1GQ`#96{=(IK*F^@X zrUsg???ldOfFL@`;+h3CI+O}@Bl%)fY+u~RbM9-e zWgmVXe<}N#g6**?Uz@=os0ETF$WnG+UE3wF8!|`xf6zeH)t7^q@NW$Eut1rc^03dxQ0;^-cG0K?XUtF2C1L6zim; zyZQ3Q^F_}`=ei+qg}$aTJ}dEGMa_^Jz2jr<)zWmJBobD&>o)KDqesAQ{F$Py`)|Ew zS$x#V!V=&V0T=j4K`34}u;o!Vquzm-qd>$e9JL=ER- zI|C)i9q(-WYMUPIb&}-TPPv1aMA;z^hfH$q*_>EyVGve6bPdb`_%qur(yO|1UQW*6roTn>nqvga7pw6-3 z@|u-!V!ylN0GW`$bJZMgLg<~yb13m*M;%(2RNdMLAcD+~DXmMz*2lKR2K_CcT109mXi97)aRvNLgP!HP0 zZ8~N;4yat@BixGR)iYF>iabyrX9+-wymb&dM=S7H5uTB48fviq!2jGD?!V;q=W#a@ z*QEeJ`<26g`W$@rbJ19FPl%hL^5+tdL`!uC%&KRo2ws*#i#PXWeyDQ@# ztft_u1>5eat2C~1m(;nFYHR+n7@#i3z1X&>*NS!x(w<((VOqFqKJCsiUMN7h()&mu zR2Z9L$J;)~B*X`~YOabXH@`_p1(w;mhOQf2#tKPokwFHD#>BJAek90u+^5`ZTsxB3yoM~AKY${A$00KHI0KRRH;$v_?kI3bfY8dKO z#(TOBqR87^&g)yLJYU}q(w_a0&IH)D&YxWMx83vVZ*setw8d=SwF|tR#zbpx|Mr=H z!|&bht#0++b#Wo!$En@-KRpnVT>qL1Wp!a`5y+?ixLOhIP`s&;1uM)^;Sa$SKaaKu@ZX$NF= z$`CFLAb(H4s&BpW8ON4oYD|maP9T%uv+~~fKor71*VR%>$b0t@LWXf&wXoLk@>Pod zQdQE9@Cs_&?xvrYilljV)mc+PemyI=OSeQfrd-Z6!o$KkQL-#SVVhQNhUc7|%bci4ie~Z3(y%)Vbbl=ol zpR~@ZrAJ2L>SUfnw$?Y&p@Zho(G!d$ojak;K3mo9a4V9km>N6EW7Fs z$z9?6aRrYBpJ-^|g8vl(A%i_e4b~oDShSFL36ARCiYkof!E|QR-LEsf3pQs?ymbA3 z)LLg9Qd9?Xo!{Z>{;ZUXk1=_tx%-bn`+I&zW1_Zx8UQnBWn_GVTF&t{H;fIQ~ME{l@m%klXgpSx~U@N zQ@60cuI^R&EB}FMTkHmY{B&LNefus!!RdErXCVQAM+Vl>k2s7-p&U=D+5=4rF}c&K!+0G59eeI5lyfS#&Mo zjk;v=6s`CA*8{*iwaAy>Zk;r0UJWfc@w9>0~6NgpNZ_HyxZ#tlpyJ0u!hl12x=3r^# z5JM>BRXBV^s>qDZf_XxWyx_xV^W5Z4v(GRFI&0Svs97aYJ$emB&O1gyB~uV}O3nf$ z$6D7ro4xPVD1YT?dZK$araD`Vxh&jtiE42^IC*c$9~mL1 z8sN5tujPcV7$x^xTe`M!d0CR+wsLFTKYDoI)3^OQ3OQEjs$O^Bw$5}rG=F=6P0fwV z2l!{9dc84CDlSotN!0EzKh15hjylioB+u@_tO2h?vszM~pt1uOHH1E)f44g)b}si& z`o`(ccqKgkab?+~wNlKpqUTKQgem-eRtsyckVS>(4_Ks2kuwJO{LqMuYOmTawrrn! zY`nWsfTQSL-;x2cxz2g9r?2Uqb8M zty{UR4Gz_+bloz2e~pZ}_POM8S-s?w#_{RPq4T-UJ9@Dvt~Z)_)=cK+*f^L%&*k^@ zdY5;j@p-YLoO`^X;b!yxm28&Vn6H!KBM4z|x@e*qe^xm!VF($PpUaXKg@}+zm8djo z-a*>E^IDI>v=D}dw+tJe>WgA`b&WPOSQT!5{&~pcJ%c> z{2xW<9hKDo|MAO40mY3QM~Vw^Z^J!PG|Q0|ngdOZ)XcPT8KS6_JDlMv%QBo!W66+qTswzdwHGz#rVhKk&Zyp7(j(=kxJEEZAhM=?1vS%GwU~L1bkivY%te zIIa|am#goCc2cE&>SXD401N$F>WeO{9t#bnZFSfZurXUHmMH31UXy?C?@QrAr;+y? zQ84T4`sXFS`0FRfXtU;net@gE%ieE&wB0rgZ#R+h-*xiXmUsd$E$7ykf@wlWb)ZLa z&^{NvY*PX!gzo6#*mOA_k!EL z-kRM08KBix&>^_KHKJb}d8TVANr)%2xbVMea?T{OYc>3-7E^u~WAR;(! zde5Qhj4D`$?IqUw-AvcBCfR8Iv8b&bQKdd}Wph#a4fT$+nhl%VPQ7z6dHjN5n}#TS z29>(&aQWsN$V}Q;@s7#9u=~9WPldF}!bb117x(JdZs+(HNwz%`(&K8MA+LTzE^I4L z^Dk+7wjlhzz?hFa*Gh<`BT1FKg8z8RHtua-oiHB;cG}J+c*!L1p52qpJ$fyo|H5-R z^T{P`o)-b>y|^>vS+B(nz0acR{%mq9t2q4c4*obx3-{Ea+DcvEMf^af49PnSk8vVD zZ1zLjLw-3-Zt`|}z$YCW@;hXC|3j?w4)nR5D(JwIUp~E^ZM$#nlxzeohZctoDZ+$2 zgS|i+U!m`tUz~lmn7O{F_+wG28=uL$kx!nz|7FV++@=x12Z#2a|<&wV-m@FlYxzy zp^b%^vxS)h$=Jrq!o|kI+1A*EWNTw?<7jScXJhSRXXEN@=W6TX?&4%;>uh1=s-veC zV6Pr*rQ)l-X}k6&rnZrfqnW3jjklX!pqrhyt4pApi?_Y=Ru{KOXNM3=lT0!>#zb?Q zuL3ie5any)OSKF1wD<9F4fb*g32+JbaSIQ0kMwg0542|ZlTrd4a-$sfg_H9WZT6>Q z9Gv{f_JR6(TY_A@{9Jr#uD;oBo-SV7oIQeV{F8#bJp=uGBZ57*2l(&U8XUFN-`{`x zw!nxT+atDwg(pV_X2qDnG{ZM27Y0JnNEp_0vw? zGY$dABf_|8JIeQkw&!f&rl%AhU~uzuiVhyAK6IcqJGDD&U+4DN`+N7lOV4{4M!#v{ za!|6t&-LF5Y>+9sXSu@dzkW$XUnxcc%{H(6ZtghO)vHGA}XK8mYG9L<= zAC{uia`?G<#kq&87|f#BG)~D8PWKV^mptBd0jGddlg~MJfm7J`r z6;zhjb_n=q>TCL%YP*gVceR$CZ$H`BR@2{A-QQk6aG~MK`MN8;jhA}r20KeD1?^`$ zItIGhFZZ6lG%$3&yT7sRyrk^d+KI|ry~ieo&i%g7{jIhB-gw`;>DJz>6HVQtgX0gb z-CKV6@bT2Gdk-GJe!Q~w`qkT4?|;1e_TTqE8-F)8etcj5@%6)xr_+nqMvkx}&>O&K z^w`!E;s|HIqGIr*sO5$;T|cnVzh_y^l1|Q_9-1!oI}zu81iR4DK2^Rm@btIdj-cBX zmb*tA1Nu%^)j7n!YCJn0aQ~D$Cr#Zsf)v*3ej;E_Tz>eQ9* z$=f?)TSP3+TGJVekfyW+&m2rOd2f^8n|0C0#;bWxn3v5~rj+!+LM+6340VEFwR4!R zpf44eHAf?lx15|m_{y&mUk!>69dM^J> z<(^-;`rFO)XWl*wa*Ai2&Hp|G49BiY78;HQX^}JFMud2GWx@@)603&-SROb{*Ol`v zF4C5BPh3iFU@fUL+V&GF{OH9a(|+gqkeNNCim}H5o9YG;LFO0oG?|+|5AMi#84#(t z*J0fFX-vpSgjR^V$4{-CEsh7~V!WG*60>NhRbGa^x2u?|2@zaM%)MhW?o;dh5q>Im zUyZGw+WsXwzayb{Dv)YN0%juVh3cv>IjhMvHrmv6f~bSHpBEAp!Wkrjz^#3Wr**`! zp3oNm&Bs^&?5e^Z?YsLc(t2vX>1Udc{@fnab_I?I-t}1d-^`$A)lZc+ZjNvx!go0( zC00Ckb9&Yt>)%>v=%IdAYe#?JquH><{eCq{zL`$d7p0`&w@PueAj8=C$3?H=c1rd= z+L_%yg;4W^qvz>`OJbP3^?Na!eOzTlC{$!CFucE!tIK)+SwAKf)46(;YVkQ?6uwE% z5B&i0cwR#(WjVS2jPaX^9p_IER=34^H9xMcrdpI%=U$fU*rOC;7^Izh*C%&w? zc0b7>x;;#~d>u8R{SdSA`Qr!D2IrrrJkt6zJ$)LVjaIm+RQc^t<`oyMAp!ch6MmEn zU3g;0RSqN|uhn@Z=C&yxhVvL&9gO8OW*PnCYT5q`wW>8u_&IEpUVc{mBxo3S!QhZr zen-{$K%em@+THL4!y2DVh_=Webfc>;1oC;))KL9X!x#y)rEv9e%ckv-XTpO%>`07% zr&?!pwoVuPb4|v0R@x%r$R8x1_S~&UDgLzf%XgJdV6OzVFYr zJ@L3*^VHb(%Qv`1%!2G;k;?Ty?Qh6A^P~0kr){(BwxCl9&MKfxDHhXg~byoFA@=4y_ z5mOFSAOA`5D^yMzbT}%zBu#*LDu=+skH<6@H@>c9A5M$M{fP)jY@ZCh%)souz4UKd zd)qShrr*mESsK^yAG%dgPEd>f$Yr~rBybut20Q;P?HgHqPbMi#$SK3$o?}cio zN!XI=yL+fAivE==<+d$R&%&YSa^J4MElqXT! z9w-g32`pEnsMDJ+EuTaDJ>}vP^@-MDe(>#X^D7hagL=9_#9xa0zTh0}-%bbda;IL3 zZkg%53s3^Z_kYUR);|BNMH><|92)&XnjcZ`t4}i2CRLM|w<^7k*^;9&JQTN-!|^fd z(}t{g{E^iFwRWb#OZ?a1yaoBAwW`8@Bqnji9_8#c^k`~`jVGlEy+sq@FMp6hZ_sZ~ zUpBZGFv@GB=$*o+J_xb>!if2TJsxU(*0H)LwCi5U#-{xrFF*bfaUUazFL`yIGxsOJ z$8^!EYZatAFW<^K6(s2LsB+n+wF;o}*xk50Y3LnK^|Ak$1Tl(E``Ug!gWHo|g`e~} zqpQqA<%Pe|R)=pDEXSvhHKghq?-|XgCxy3PCSfC?4$syvC4A)aOHJ*2X0oI*z^xg- zk0h91yp?{wUAdEb%;n12WQ72wXP(+0#Y-;p5ci!-=svy~+yc!S+Dx$8mt79i`j3cm zF7CaMLh3Swca-u zy!cptW^pVDGJTfn=>H}`0|u$RUcK|@C~QOh)f2yl{B{qAam(jOyZ3XCLU72-bMaQMlP_0pn&O!)ZQ|dHyn*m6`Scn=we?hPBlsBnmK44NTE^W$ zvK(WPt=cbyb=ZHeIAPAw?~g$QWKt1{ZY-H}08XTMs!o(6Y|0>*=t;H$mBNC0FIwU0 zRD>)A9m~8Se=S`Y8NB}#dBvEWA%r~;4BpYhqI<6WY1-XxvK1Xyg=OgMoX4xQ^esi4 zF@uNsp$91%M;`k1#?(0d{p0W6xAos^J&CoQ>`k4*_lv#3d9B$Jc;5s8?P8S4YVLyR zx;~n)U8SNjhYNkZy)+)84;pN|rNZQ5iBJUw0N0QJ)$+hgQi{`^6uE4aIS7H1r1QWv z7X8!`R=^lHj#2m))Hs5@@dHGe(jgf0C-V1xa0sGwb+NfYk2`Gzrt#a~j|yxQ;YZZtu8FH2d3;sn&bs~$Y_ zr`(G)Qpw@>8?f$=oCB7XB=aV8jzD`7L@Ymy(Lro`Q0rd%p=?z1siqWv{VB1BerP<=g1 z5f330v$Pm2Ex|$JJfJ0kx6yGODY%6-_JRl-0zy=o+0wW`_sN4jKeIlKLd_!KWiTLsE<{QfVpLk2>)EWNz5$lyr}6&LHi+N}9tfkSu{%5f5sd*{4O! zcJc%2qkwufz@eC5U&J?5FK{72Nc_sv#Vk!UySWqS`MJS2yv%|DRi^|j0lJF9=Ogh2 z5DB10LEWL}TQW))D7b}kxg`-+8ru|1X7#?#mt&>}ZBO2+4v7o^l*%DNi>Omfq;KS* zkSOGJ9wM13bMxe({zVjV5u#3kB$Q+3=;%G43)7ii-}f2iwFL7jAX$<4onQx4)Il^0R4bU5~{%PAphiK<;kI{GmH{@r^bKYG+ZYDSP=lB zLEOdY8Gu8{!&wrVmc&?40=3^50xlTW*!$I36lLY*WMidKQ#x|juj~|j;Uy;K6t#9Y z1(^iY)>99KF_1)2dQ34cCxDT@7%{8pqHpJ+KW*;+>$pZ6k|N! zC8OejIt@?h6a1qt_R5i>J<%Xyb4qQL_|VXHNOdHDAWFL&kf#e-r!OD8u3n(B+F-O= z(MYPi_%q)ot+hZ zz~7vI{^C!*vr~J3bBSGam6=nU-LKNCY6VaxB$z5YUDxr1iJLWPvUJ4Vp`gfD8s$X* zIVKrPJ-wvvDqjvLiQs{RW2ZozG-oCiuR8R@sY#u9qCx=gvs~5a`BruHFs`@swJm4Jj3@mc3D3Xcnredy&HZc8b zvXmgqmGVO=<%l8PL4>%%;!&52MuGj$zO8(}t7kb^)eA1%WVxrEzxg)*R441!+e#rL zdy9*7aro9URWLj-@w;^PHvpl)Kq_v5iF-h8dY)GLNQm=G&5wDErig&o`Q7od{oM}0 z?w^2HJr>gX(0stnrb6 zo}DN-VidM*x8R@MHAk&If9Y>cziIh*8G-+tn*gHRjHG~dnQm!{2YhC!ExqY-;bFw-FBnxXa%u(NBoVyvCV;mby--@2(%lGwh4yX|C@ zBFvnUeC#tI2Po_^*p*G#nqhqIT&cp3M|--mwFxgjbzw4?@GH@E3JR=FGiL`oUb%BY231B00~z9!x*po2_r@h zChZyYt;3N!asDQ_yLdQuv30ei8;w7G+3z1b6{18zY>c5q!(zEhYv#~9JGQ1souil; zhD<*b=|_hT(QudWq1zo)=b%v&{c0kp*^5mwL@a^pab#N?!BX5adgF-va#ZW%+~g{~ zJQ6Y*xVo|D>Hr3FVNv$VQ`sw&zQI3}zSM&-diqw^HrI3gQ8N8eb=}sFfXVl5Ti%V& zW2)>^D(4%_y!r z>z)?#TUimKr7S_rjT7fxqsRL!J8xT7>2E0H86bCyng{;ez95BIZ8?Pguh;c`Zwehg zjvO9lu$tAfuitE&`ayUa4KXi=k-==gTWz7ScgtS(-A`-pbR2*9p8rL2BG7r_I;kY~ zSV`R@AV!29Slc&~ikp#4q(4D#ra{#3uuu?o>tTV~8o5;yYDt5@MUb8@ap^nUW%_nm zyD;!@^J=-wbRNc!iKt4)gun7RAcIi2dwgWg%o5%tDy% zPDdRUJ#-%GcMqBqlB#xewLjWOuRJw>_tx9HB&Y0qv4GEgB!f=uE@fJ~NN?P-=|DG_jU!-nhu8=4}ZNAw{ zxr%XY#7MgZpVak7ctiFVuJ!|mMuM_OG-su$Qmt}Wjs)H>EeL^-FcB(O>TeXo%f#5W za+u$fzQC*f4G!nYewEhzYi|5&H&QESu_a`6h`LPmHHQW`4VEiL-v+W?W;}dIf`nzp^C8!&)>5gb5*w5yvR9o7dX0VrVW^_6{ATO^4m3KP(hOTT}1*Yr_=mKebiW2Y%NGC{6`hEf&- zc0}}TVaE~`cSnp3ro&DUaWiYMm)EmMC0v0}MwE~7mFz~p{2Tz8k7OdMk8H@49f4m0 z-~2O+WHa_>ge!jS+5ewac>Nyo~6HOz1DRI8nCJ56F?x98B{Q3b#knSj?5Gp$CfEc?dMtl212z1n4K&~Bx z5X9K5r)a`rfyJYOwV;Mc-!_N!EEOtWeX2r%38DH|`Ydw7-?jZRuX=O~&Y@wBtx0>- zaQUHv%YJ~i2)+i3gz@B^(kWsp2X~hF zU^utQ=t8x!!)*IPi&)5-gzmoA*uyo6>@rK}H=nNjX{byJ9$uWADTy=-7FcUL>7Uae zTgEkuSgnYXD&rl}Son*?drIWkS9dlPX6kM$&3HDW`T8qE-Rx(l^<>?cH$~cfMSn3n?I5J=Hi z65Wu~AaoJ|q_e0+j_Mn4Sndl<30Y*Fvpy&YqjPt4(knsg88ntgWi?XL+12U2b;&o= z&sQWXQ?H0x3bCe+))t0Ry{GcU{BLPDbuttT@u~hTAOKa(BeG=1hTuwD%okqPT6Iri zL{~`->3s9<#X_}6+g}@rZurcRO&XLz;T=A?aq%6WJh(b)Lt4e-q^oGN1?|T|LyL8H zmg*rFqVP-hr?gg&IfS}>xI45`Y%q7(QtYULwG`uD)W)6bBfIR9{4#d69Auoo`gzRp z#c&+M;|5w@qZsgl1_JGM$$>pLWXujG@_3!j0#043AikBOyCG_W%EqkW@l*}A)7n^VqUzx z0`p0P18uWUro-R~vu#dc(QY?X-tOGzBXYdz)aDZVzy2wSQ_RK) zi3qLmPI_wvWAK-P^pWEFXl;7uv>vRwZo~klsZ)W};)*|)sJD$mj2pu;L6Ga@TVd2t z{?3BScB+@TSx;LNBK@g}%K=c7A%zG<5$;dlOx1rrU7sJn>tu!7tF?HZoPcgvU-?hT zmnofq_|wXYgNMWrq#~eKY9lJ%b22t_;-JjmhA8&Y#+#EqF4inwP@^73a~@3CAbjmV z6PIkwehbya7Lum96SnsIs`124CXe0=!~4QmvbHa)LcSYeGv7r#3Gp|*e7LCanbVD~ zozo@TS4K0hc4E5w*AOOXIo_+f7Co*C1ghKxCD1+f+x759-Z#u12`2RA<`7hhf#x)G z6!DRWTevGkIzv&{R_9b6UhYoVFD+K9?yIhL;`<%*2au{(IA-FF><3Z4a!86&p$Ta0 z98bk;lwWzX?Y_ptb%hhH$`j$X%W}CSxD7qh(xioxc%=$y^3`6n2W&XHFk+?U%qc?$MBVt-DHd(rWnHvMu$lN80f>o|LYQA6%!L)41q85qMii zuHG7eax7+HCnum=TDD|tf^Q?dniyRkyCApIQ6=xWbQmFF^SXx+8e>4S#UVXx{QNfc z_7|A!K~I!Uu@s>C2*gNPdEI>@INuTMo^;rWcH%fGO>e?7hrVmH4}yC&SCd5X^ox(? z4Ts+>#;k+V3eh+&U3eJNE7OhM?l0xMe#hxwwQvpel#={hPncVPYwwgdZy6F~EEE5dk zFLh2duYc>W0%QdOz0(Xw1HNyQFGmwTif4*8INFm^rvc3)!*|GDS?7%0M9+<w-k=&N{B{$r`RiFA4iwswf``wxTf1f7W?Kl-=)__ zu3vd;)1Aa1+o&-;SncGZ_KaWnE#$i&TP?p=ng^D+mLl-wvk1~1<6n&+3DYSPET~lF zQDEv5be>?&T&<3nq&@+#d+Sv~95#8|I->ZI6~)yQRq-7V&NDiRhbjgCQo2^+VqZKjdSS79uOyRMxyalTQ8XMD)gEl zjH3xyYQ*8G$d8l`*;o)7L>i}gc>oYN(bs7yF@ix1aQ=d>C=NFKe(%ptlZuOetAd@* zomOaBlFB%i+5>BR6d|AJ(M-3%T0}f6x%4e^uI5-Ndou+h8wElOhsG_$}c|+@ZUq(g&ORl1D&S`U4h;{JN z&0w=E)k%7B=*HMtl^X2?_nIrf@mM-!CJn@(r=6**OY$K4n;pCYVzhEkHAD;$-};ou ztk!8PF?fKf7;a{5AiO`Ms-%%wq-)g%$F2j)v5^;J?ofemam^vGG(nB_d!)@8mhETH zB=L{Qn=gn?PBxFtU?S*-#)ANgSWZE#^Z~kfucl-kGCIL?u4{t3Ndgwy2=r1k@c^P< zt~mCc^=N1|R+&6pXivl0ju_$aAyx0Y?Y5@hZQjD)w_8(1?O_Ia`?j*kmk9ND+StRL z>gVaxvh__fdq2H5zvw<)61UEl`cyp>N<=rgCNjc2?%4nHj9Ppq5 zU@PBuEcrgM<65zYRl)6)3elMKrrsjOH9$}5mztDvR`v2W4>+X*A5@#H(fN3co^5is}KaD#NhyiF0tLa2vh z2}3Hl%7ZrCYO*STBJ)fd0mBX`PzSJmXlh?e2&sb0=@RR2%L*;a?kZ&q>%OB-aRp2Iy6AG=~ule)WQ&x@<_vF6O*VqF-_x>(9L^N0cqBbN&Q z^f+4+IRx7i(FE5Z3VSlU_jkBg6R{Ps2)Hs^W7aOqt^#H~JkzVJBy_2h7&I=o(G*-u zrfaLwLCwXIQ~?J*2|lRe`9{IR)=C)bEX9p=*nw&u6<@e`kgFlMXy@IJ7y=<^04jvI z;1S8iyrL1Ux~?1AJ%$w(&FN7{k`Sy4Duo_|2bvZ-`JW5@-3|pWm=QR>IscsNa_+KW zN1K7QbTGId+&pRLiid5>*Pt1S$sm->ESo0aW@zHXAfG`PRN*V}p9Gc~-ux*D4vYjY zN1cDUNbOEMp81Ep_04h0s@ygXBEi--jxp@$)FE)vJsPCLFl|e`ZxaX1TaAl zkP6wC<9V`zOU^R2M0)F#HWF;QG+ zjY2tbFJ&3}I|ke+llGMWnRNzmI8DZA9p)=4T;=pLC$Cp1)O|i^n=FDsC&8*4__ZnR z!2DtiK?Y3}{8$6DC3QavrcNqzQX7Fz@f1pYPgcx{kqn$+K||Atw@J!(snq*J8WAfL zzc=;U(`9vvjrvTiP-qrvXfTAOxgxoB<2T!B{=VhvE%mSWE$5*l6{i$!Cn7<_z_-G$ z(vU^V@vlO?v0s3E=fwxTzG1gzjLq=Loyg>J2)Z0?N$gDOEOzS@yx3uz%HTN>!CTli zA`wL9k(@;!yc6^U-L1l>jYU9f9vXrNwW5*EQGg7AFZDv}Fd#P-Wcc7+)x%J#XrnOM zvHkAs^@@uADFAK7gf4SeCAJf!-f z3{uDnLl-4QVaSvcmnckmG*XixQ+<%9K>(4(tY5{@4t+pP3{yK>C>7>zUIh`I5H|s* zYQ~tj2$hQhq~Uwrd003ACr`3e*DjYwHCpgXkYdQemKpxWl0r~rWk%=aWB{({51^v} zMX5<1sQcb)n=HO_a7Sr{zv+B>jD zt6*C|qvX-~;pX{j?1hWhpSsGc*rAD_a}>{)>MggzV7Zdcm7$%dO%+pgdB_^mPUCkq^)S{SWxka;>4;OPemjp|e{b{!g0 zTvG0D`ddG?alQ?v12&|es3QP%QH^e<`nO(y#&quAd3oa5LZ42K?)t;Ymz594%3DN@ z<)0Nuy*yvK43Y{wDrA`^XzRYQCXG~@>jA&gb#Da;4vajjpqcW40gM2kbRuGjuuq5T z<9e|)*IU!`Y+LF^U~QbZ`!nQH!FV0+{JG)i(D?IvT+IKv*63m+qI4|f6?^Y8)DnPd zF(GZ&ppk@Kk(6Cg;EQi=u(j`iybw(D-jc3$MJx8kNA{vcJhsdVr=UO|{T12AP~+%Eo|uQ2)Q~?ezjQqBB#DTSp}m?kKZOgTIPUfyfk^q- z$c+Zj{>a?nWIJ<#uRN(jEBLzIoso4nt98(X`n>z6dScks+aCi7To%0}5VUP__<76( zd5`he!dVjnx_kgABK-{U<#e%@u0qlK9*%|>tz2OJYOATbBr?^%gy?_Ib`#{_h|{xb zq*NGhoW?UFnbj(TC34Sxo3gKJztt}v>J)-gO~4sb&}_XZmdY`6WSdD$WekoDOV}2R z&|U)}uj!w;8 zYlYGDDX(CV9Ni3MB33qvevoPhqPF50FQUr}KhzdRGYjcc(1G0?koI$g&A1%?LiCw7s<;k@Czs-PH9V`wAHwx zMGA#=F`U2cF*R%4=PzJn!28F`A5jecg-0QQtN5cU5JCq3xPoTDT|mTIGgS|G(3|RC z6eMeq`wK>5Lg4(UfxNemRg-0hKvL9vps6&DQ)B~XMi(QZnHz;rQvX~cs80g*nS~#G zVYOvpvWydEc?d(lFH!zq3{MtDIzmIMHSUoDhxEtW)0Ho#LQQh_)@^EdRX7{n*zjyV za0`NV@%>)n3+lEvY)Wy;-k=v6{xipmSyHW*OQ!r3CvGA{fOZsU-HCb~V9FWT^{Q0i z=JPgA-LO`F5OCyFNwb50EjUwqmaCJ2e7K6=1>h8*zGh1q6cWD<8MN&Fxr7%^=UNs= zBB>$Ibr#7MXiWl~9DcT?Qs>1x=|&*S@>HQ&@fVl~^iDrvauN~DkeBlZ6(>_Z{P^<0 zH}m83kJ0GbDmu(woa{L#N+_s4dH9x~kb3qIL}3H3mxYvbNjd-a6c9ML?z8Rcg0uE%@etG}Ws%{k6yHj}QD-P7b#x=u-@M{842^C*!Bj zN^sHH&Cr$USKZv?mD)afrTv>D&sHjaDfmB0OvWRqAX~LOvgQ!LDFkirLT0X6G>rb?RP|v^O*{W0CD&g4A+H~&%dUTtKs6*gw3!fTIUm}C_Vp7O zWq}W>J4IIAoVjJs`?gzN-m=v44UcNy6L$k_X+vpNc!qCm1zB&87g-tGNz zfB1oY#@9dpq?S3;IiJw-7C^DQGb4}5x`9B+>o_W>MhV3r8l&o1Os}AFk?Ia1vBZ6? zV`!MK+?xMq4+K_&%9vklfv}1YM$Vn7TN7_WWc5u~9eWZ3PI`cgjeMY@q>Wn|+?M8ST0)|nJ`7c#f?(INE1#LleNJUY_I zyVI!!h>K?EZ1`-AtnH9dvNccr_NnZ+S!C?XxrXs6CHw5=^rio}rRDaa&FFY*Z*=*r z`}RNc3I;r^x@B@7&4cZOrd-+oLI!qPPGGrz_|WI@l;dia*(JuGmODzbSMOcQIeTN1 z6Hdc1LBd=b^{V*fBOBE}xd?%wNqn4G$07f6O7QeM68sa!RQgt4-cpI7LdKN@8Pn=&8UBihR8REh8~8lvG)LZb7}*c*8?Vaa)fz z-~kitd!FuYHnP0!2M-zan!ccHeCUW9^*r{nG9=tsoGEw;!Xy z4m?*GgFI5}|1Ip2??2F4pi$_3>X2eQd>T**rj020;u`|YaxP^#kS-ibm{2$x4~Jmw zs+nH6FR7T119SN#)+Nn{%1i!u85$K8x2So{HtIv(7P~MXn{9R2dahdbk|dmnQeYCPWAminLwyzg|#Wcm5mR@FhZth;BABvM+@Sr9)c)qL*nU zYwXR!(MVP1hgdu_e~4^|gsnQX#x-))!?m*N@NCI7J<`TZa`!zR)s2=PkJ%#j$VIM0)Ih2pdZoS~H>pD3c8!HE7s6lfR{Y~H1aJZ| z>O2#9^)ghcc9o9ua~zdRhQOhdVy#*|wEp83s0@J$GX#g^e2RItXNzINj=Pn3I%KmJ z05g@qsk6}RIg3rF`(HnizLvm;ezQKX8ylQZTEEU}~0=WnGX}~Wc9X@H+GxK7!?FnZb z>tgR2xBZ#2kc77BtK8EbmS}%sULx+;1Hlj(r`Yy@t4zDJX51bb+iMC{Sr;qZD!-Ie zxu$e-%}ceHz8!yDMQNkp7homrtc4ru=jLvU-8t}533lkKv3iz-Ls;~J=Fw$9ypO^! zJKWx(<2gY+6LwFb3w{0u_)S8fB&^ZivN`n}0^MdN#o-2}%fNX(w|527VsORFs)Klr zYONS~-2D!Ei^xDT9aC=)k1warU%-oBb9C0kcCV@+_cN=m}P zxZiZ6H}C{itEOOQp@NN2jDWwoQRGc0!?jNwQ}Xp4i+wZEJ3sT@x}4?vR9|CflxO)V zwQFYQ&nBj8B^GZ7A)C6;ZC{J<&{ot+?xpS0Yjc>2a?XtBZc6k( zQKQ%*M>)^n@_1OT2qsH_@Uavs0GT}PPvYJ??n;23Jv~L7I3yHr{Ih(sW;i3=!uPmahm3Mw|y4B>@8Db2K=2pGslwK zX#ZpTCY^OO^Z}bN2yokTZ^Cpbhs`j{?0F{r{`utu$bZ=9oeys@mG0cXU<5$rdgoLw zFCttXFCDWskC!pp%T^NLi7`)cN=F<=l?8OxW_T-N$7>r{35|u06>dN4S#zYdgr(XK z=)K%KGKUR^>9!-$?wyd#Hl~*IJVbVvh(q-DIF@?nt7$+_Nlfgxed9e_>Ae6gMsN}N zuKq9~yj1%RoCZiSNpV`(va{M~$2$vKsIpQvBw^HImQCufyd!UqakSR2j8Ux~iFP39? z)Du{8Snh#Iz{?hZ5ATFg;F!h|-<3e`JT6>)3MGJieYS<%57Yd@mP>>T`ot?yMijLm zHwQ+e)maMjVw^vONEyLVJaSj4xD_czN<2mJP{UWQKHH`C6!Q14hxR;#_;iE_K&7E* zBOcVi7HYzkz7k9*H>{Yc(jKhcNUMRlr+iz3-Q3(hvSDL zmPbI$9iA*#+_Jj3PwQs;w}>o?Uwu-zLM}^{u=Hxb$E+(Z1+I=;1<$Ig#us3pi?$D%?MJxOfau7NZ(H$nAQTqIj48U}$*sXSWeWid1?F`W%s{3m|AN zi#YE=1lUrSv_5?F+MPs_E0k2?Nvia;nD?}xa6+3~O%m>zUn$()sc-jwmn~tF9hDPm z&9Rtgn+C%4xTDsIg;((O6BI8sGytOmFhgpWD;4bu&gUqodk_(g)PsLZMo9dsSl;KP z#rUC77+5EMekf~Xgj#QFMfe>S1c8)C6q_GJtCEP#GW)_0L zQ=w#_03NZ03&LXWShlhf62fFx0TAlBu4l_fk95e(U}FPrNOx?=SwNaFG;uYyiFoiZ8WaQ7EN76FwEzobAR7Uh zC@Myo=~Q{&t_>xPn&4v{APkwiC$a=N@`G0!Ambb~QJB`+2_^iIglisdLq znAz&Gth%gqVHR5DglAC_ud@7OoSI>aT5)xG7DSuLV#KWYXpHV8`C_V@e|S(4{VlmZZDci-rM@ysmc|a&wji+~D7y5^;_>Hu zE<}i-=}kcJ7$+Ex_$C3KYC)cAU0f^QS>a9Zc*H@MiD_c&EZa7R6?HyPs&P=7thwM% z=mM1C{i#0K?o8>j!L)#9(Nl0uW;qrUBH-pEMBiiK^A+3-7f z=kv45BM3*MM+7cLM1@hGHoFNB6^38mcp9}YEv9)W`|7DcAwPsmV5p?P~p@8yH9h#{Z zT?tSvh=}uk9@p5G>ufXl3z_0VgcwX%tgsY>$V`j7I&HiqzDdN9M z5(pwPeiWHWZXEGflzFtF{IuqZRQ+3?%N&uH5`UReA3PF8^v|#H?%5(Hw8tCa&2I*T z?#;|p;f<)TdXAZ5(9~k5LQDV_Y?YWJ{H&cxV8lMBp|4S&;UBVqI6L!7Ly>*AR{*h*mxJzG&0X z=!Sai2&zmdr>wg=zDDMD8wqJnCIZYmcz0)v(!VqgrI<(Dr|8#n)=50h+9y*Eb^MPt z?mD8I8TWr2od-h`Y1_7ECcO|s2q82Xdg!5dnb1YRP*kL70s;|RR8Z`bLPw0Eh>C7P z6R<2ID)vxCMK>ray6Og0L@bMCU38V_%lrL=OeT}Luj@RIqfKgu#ipbVS3kOoSVV%% z7#_Pw5(`yzB_gx2NAX0dgFFm=W$SnVazr39P=FtOZ8-`#mVy5Tf=sRi{x6Ynp`69u zxZp5TxUKOxC$}=`ByBME)QtARJhAcUhuGDhCpSbf!U%cwRJfdJJj$~b_iV8}wdzH$ zgEIG+UIx}Bf))Y7E`6S(QVTiJOB`t-Nc9fxeUxeLYX=Zfg>!0H&!c$fXPL|;FU{O| zCPn5hsl6s_Fz;^vACW_uj$>&z=KdLokAVEk+~&u~T$FAX)HX4F^OS6OO(eAIb;=bF zjQ`*74K)E5kThlB(__7&}GSCc0S!;&LJfHKS_Quc;PJT z><@!{{kM0Q}xyLaAy?5-XBKxXmW3!u$Lnr=iseyK?o$ z*}WUP3ifXF{OdleZp+%EUFD8eD-4Txwh>Z_?P8;EJ+3iZthAsWqCVJ((8eTN|3VDu z`|myq<50sKpGho&F@`Z?t`JBdhF)B-e>h1uZ=b}>a`ip*w*~@Hl^1O9fscMW6j3nPFCIinK_mZ ziu+B7ObNUD631I=(S=-hljlziS-(;<#?*{w5|}By#tpaH!Sf!L3=|sRQW4S`HDPqm zR2p=1&#J?niwWmfZKyR$Ta+;5=@0oM(A3X+|6aZ4g=R{ANEdxc%NuGv>sA`92H&5- z*@pR!UWQl77C{hRN5YB*ZNwKgmP{A!;xQvYqtxw;`Y?}zUdOm_kGgP=Q*+lUl4USG z430~NhtZ1vBsr_mSP`o5lEKRVUL+Mb`{m2OaFKBW}(~_ z*v9dYud;yU;hg?w-sX0WqVAJxpHqMGIP`E|1!Thy_nHZ@sS9(glRCsn9P7e4S9uO{ z>C-I`Libu+mG)b~Xv12sV)EdvE&t41P$W|1}wbh*$Q!*9Gswxd0=XC$_oRvje4A=oVIe1Tdm#gP*MAL`0d6R#w+F1|AN-ZQi0 zNxmH~=S-?c$_aduO|q-mGrRn&}e-jduJio(k5r6NZ= zOHP%f^oXEHBB#Szu>;XuXWa^vJ56&qij7hyPW==#;1C>m%jx8inRe0{vH2a0@#a?} zr&uqtBr;aoj1pt|9)4}W5wYcm6Q^615EsAI+Mmc0jT54P!#sLYFXv^|&3Whg_s401 z&l%kvX1}a}$ZS|T!YWgPB=a%=C8V7E(1oL>0+LaxSitA$eK7R>i;;>hVz6Sg;IkO+ zT|YdhBil5qNm*iwK&p~`R1W97zaHQiWtv}RHTWR$hhWfby~g82%|BthX7;yg)g+ro zWb`NJ_b%fw^W)}5fbzK^?rT!RMQh_#vbj{?RG~*SV%rsR0>nN5{)*bFx+`#k1fiAkn2mVi4s`Z#po)-sh!Z-0k6r8n3g6Y|;g(gYSV+ zl>*5K6TPd6GK-I&8f7S5L4u;hH2>i`uhS>AP69x>Q1rn>2P4; zr<2>ttFg0Y5>ZyWLv5-x!L?vadeZ*X`DuiGMKA%8-PB4cHRa6fz2x~?N6`StV*~#&iwCeFS8XU@@%c>ikkXP&KAm@cnQgDe2y4%SyS@%SQD@5KUQ~I znA@h2Cfsz|V~l3#Op!q`C467Swf&w4TO*1$Wy~C^O0~AJ>iR@Or}`}aqnFR8X3-1V zD7NeoVvIb?plFEQ=Q$$@8_S|wXu=Ew5l?ymisb+ij&%Y;)(#F-SX-IGu6NT00O9qR9>Jni6 zIoXL^1pHqr0x;?5pDo!EGpseeEi1~S%%CEvyN75xhA_5`m1lS59ltqiwk=^_gAKD~H@UXRpA3to;>s_6Cm9Q3si|7pyD z7EYv7Qli1%xdo?d{8c1}0KQBPV6D~&X)=1T>zYK0>uwA~UwtJaiU@Wy>Ks-tM0TIO5riR?m|FN9dViBUA4q1My6uo)b2Skkha5YW$L zW<=8Y2YZ9~XiR9Ls_%wT@0Rz>_-31F~Vd6FEWHz|6lY^$;W zCPY_F^$~suD7rZ@gzJ6>(G;lekBY%h#X3ff;FNj@IG7TTu~zn6|9C#ZIail%w$gJS zR-0DUg}YurfcEJ+XdQ7{k8`s3t>|>%ZIeeoDiX;_JI3*Up9&Mb-bzgwdvJW$_PzAQ zC!Z$R^5|0wAjbZg9NUy0=0!V&VUyCpdv|~2q8QC)%fQPp5|hCUJTraYNNU(Y+1z#1 z*tliR^WgJ?Uma7IIT}MP&)>h`2=098HIC?M4lMzxLVt3~b+tj&n95+7Ud#~-D;Jeb z7+#!-@>!R-r=Uos3&x4e3TYa_hYeJcNc=bCzl<23U6YMt`GHt=qi%Vctph*$>XS*!w z-AV+LG}XT}$OC6yyFd7yf2sPz_mZCYFj7tTVy7R+DmE$Jn8!}ZdrfGQwsDdT-Fu`= zd{^$?)-KgH>|=3!-kZ^!n9OY~o~Z?wu>TLMriAflA8RMD5$C#^t8xoA@7BsETzP~v zAl%}}TCCIi;99t>*+6|mZu1N!ZJ+VBS+ju>Uf-i%LiA}rtty^_zFq_jG#juk8M}X_ z*Fk~zfO!Yjj7eMiC!1(>5_+BRT8spImH=A+-@@hT#%%s9_OS0z;*MLsbSxU(IzW*!RB_ClU8W+b8$llq0QpjJzn0oK>5iJd5U<_)<6 zrVOg7@L6iB7LT=Z?^oXMM5tGlRP3B1L#;HxtB4KpkSV%ywsdS}FW9o~9a{ilPcggJ z`K0!NWEiMZn!G<sN$p=nn&h~U*kLKyKWkg-eIJ!EHJgxar}z$GA9&(Gj4cPwhX zyllN=%WBOLtrO#ezTxRByWp2f2Lg7GmM@_r0xEah(6h%=G;Y62=&(viZAF7jC0n{p(HHn%cgzGZqkv%4L05EO) zG+zcrzl>$8B~lV(I^g${yl~q~-Qf_=7Mj!wnOuOZl$ezD=&2pC?S1%I(7XvLo^u;= zK|h7PF4k;VS~OMibJ9FPXjI+9-mf`crSjU#Gpe#OTEhF9!ZV86WRWEK(#ai8{e8m) zgj{fP1@kj5^iQF7H0Z>{&&}U+MbEc5sc>doGW#|>;I-DKrL6KTC2k~ zq*FL*goIAt$J%IRT(mJLeVlp*Aa%=_SJjwYC1xE=m;>l@$QfPx^_y}IS2YNtiSv&o z`8dIZ%W8Zb@L!$EWQ|VxXy?d3&Hd!*~?mMSfoD>N=pE!x6hB0`5hw8UNe z&&|QajVNiNV&RmQP^Oa`z*sl6qm_~f*l8LemJMe@V$08>Ka< zao!wXnjHv*~KvoiEpcCNcuEYc?p+Fdub{6M}0`^Kw zmO#2rt(P#W7h4QDp_ns-sD%ioGjV5_74rp5dZ8uk<&7imMy*uRp$>+Vkl`ydtQcj- zZd$&#%PUbx0ymNot5}j;?)~AP!I`_=qV5KPU9$A0n{DC!*M7VI#tt*9tA5ucRX6|W z`2K>qO_|p{&o*$kmNbb{BdivMN{ly3j5QMDA_?oB(kKlytOkN9l?ERL1}u=y9ygdn zq=8x}Ly7lS`O*%P zhN#BDyou5G@b6!n@k@vsb;}9Vdi4aZXpzl;x6L@Q`d*4tE5+`pVfJ4IMSp!IpfF*7 z9_iEoZAQzg){+sP@n+ECscPGXaq>O2XV>2gweKioiNTDP_EA9bT$Nu4=+`+gu4?Bn ztyq1s7BbcTy?jkh>Pk}eIunr~+XXLFR%1}5klF}d5ExvC=rc-^snDQ6Nz;LEt^)d~ znsP))?-I~8OU=|gY820$=-5u}Fz8mA9OJz z8mE^i+UPK%vebaa^TAzdR>mDD`8v#`X#~^S-Q^Em0@W_~Ih9GjKv!g>)GAp=dSZsG z@oosf7SI}+Fl&_b;gk9^0s~Xk!T(t?L~6`8C9C%$%UVU#F=3~bCZ|CCQxXfj%HWhl z(P7m-Cal|fiPkT%n9&;C2bp~;))ZoX1k~T}D|Z;%Z3Y^zQ?c?@rnLa|k+UhJO5wh~ z!2xpGUt+I&m|Zf$DYWV5^Z)7S_(J?>UgE#WQDXS&prO{tXbHI7Phu3f$uP?A4!g8s zU+an$f5iHgrYA^AlP*Ntw12DL;g+kh8H6of_l1(uEp{xs$jet^j*ym25OkpA%%s4> z0#d!8rvco}m9Sh?L4Rneop2u2%6L#soWp{DvxUwxTKyT6QlP{I3SxFGA&Awu0y#cI zEgb5_ZA9KN&f_yK(-|Es?qQ3|60_PTlnepAU&S&7&5rO0-6~V-9@aOCS-Z+$4m3}D za_Sm<#s=he!R`Au(yzjd8JMAyZBHYP7u7gjQw~+*P$fMdq$^0p@T_5cUAF1CR*?4l#?R~odhwO!du8;Ujp z#9csh;$>34lWDJdj+lGd$i*seSle>W&v;l%Y(WiwW=BFtm!s}af%m>Jx2p;bt~aHf-^1oP{#*GD1~m4Wm83>YeQ zuh8_q>ejyoMuEwjoOzqZ3u7LuEJsxqP4qk8pRm0TF=C93X3wt>?k!xlG`)*ha1|E@ zdlEIePCp_{M#BBKHEw=mJNotB;k_e*i&vIePiDC__xx9sm=*{QQf4N0y7hRbEiv>K zwiM1WC|&OKI*Ebq=#`GTl$~GNc=^Wvj;7)=)EF3W($ZGeK4|}P&bUgf0I(_b19!6q?m+E#t!w}2b&36n5er-zE@!bz@cl3|LS?v4VoFq{ zRoWc;vn9|QIja4>;pTbvPoBknp2ZLF4w&ZUv$QbV-zwoOV9 zLIEc@n=HF=2|iFz)A=d!~y&{3Jk4U6Lb1mLq5ItY8Z&St#wkpL%i61yya&rV~Kn;dgb%onm{Is&r@W_vaM#v#U zPS1sZ5-p!xv;3)g_RH#7;50R?U0*ztyh8i>``RyO>73CLVM%J1g1bhirYaM zT|pITF}^=Bt&4sg)w*=SJ0||>&k3mwLLPw=GJ=Jry>6U2-ls>k2+SB#g9JSXRWQye zGM^EF9e2{*K@+3N37_ zI@Y(KEveZ_nXvT;WYm73Em;*KksiEj$Voij_4c-Htim#D_t&W&+0AMuH>r$kCB;57 zwm&|$^Zx5~HTH|--r(m~H*0Pr9wE;4ydo$p%XYs?9kz{LX@N}tv@#<1@6+KC&*w)j zG@kk+yUP3Qxl@TXG)qzU;HSDleM@9!R7H&-=*?Mp1dGL=uJoYeeWY$qL|GKUd|n0Y zGGV2v)witO0gOl7WV|C8?3zgkhkNAEjpWB4gyq%bDB=sjXW`%>8sw7Eu|6ALgQxOYpD62N66Z+ z1xYn260)00krwZwc5(4+HtgktXooStg|KLU(gbGv69Jf*}Dy;Dy- zXZ)77)S_@%-R0+mtg^&R%MbsYEvD@r{lv2@bOEC*Q$|4|lih{#NG@aizFjK+7(FjG zU!j)3R;|4A_6Etrj>m|)(ccN}{@A$awvC3)f!|;1^R1?BtV4gD+B6*C-JE)>!u1H_ zU`oo_4Mxm$+U|!-11lb8(~$-oM&Gd`N19=+X%4Gd^m*X%K}lv+&S1HZy-D`GEr-MF z0$*wR=QVL<{eoVejnWt_||tz@-Zmk!4l__G5yF9X=uy1IO0 zhv)=C?La`#1wjZhZ)mQ#*_CO)!$x|mB`1Yb$e<;Rj^AWteN|ns`rVXK!YV;keHJ-W zFn->dZz>D+pF2TIX2LTBXXwoxm+SeBErm!vqo^D{R`=P`%T(ON!$ag4p7e-U(y+&q z&G#3?O@7@U>;7h?=z-HIU5)zk&#Cb3Wxvc{TE(Bz1d*Ih)Q1f+J*I=QZ}{dnH!j6g zKJ4pR^?`4jdA|n!Wtc#XdYu)|%uO|1=GoF*yS(Db**crbWhORv=@B@Si$zJN@uNGD zDA$HDIFi$#9q(zjux&T9m=#biy*<2ozDBCwt;EDRLxde%a}x6eW;Ywl4&=&AQ&51e zMRER`IeDlq)wOe`R~L2j^Jt9@!j8ljP2L1QM?}LUt0m!Ft;B@;D#>n0saN(7%F?y6 zy>+9@un|W7O64YEr}}kp!=-&)wsg)2t0dtuW$~%(48QUIOT!~HgSR%8-^WU?BqcJB zD9aem)3uK6^k9>_9A`hNbkFtujlM*QAn$Dtjk%=h0)Np0ClTcU-cWumBhI6kH2kmc zBewg_svWQXlh8k^sSf-T`WHI}81{(FqyWJ1MOYv9SvhaCLI3vm!xS->vw)xLw>Kc> zw=6Y6ThmSi(5Jw*w~|mMr0=Txe6SiIBI*sV4+DB3fK(?>hDN_4K8>5eIhq(g$`%j_ z>lP?-L; z!J|re=Gdw5%q}yV4?j0lKRYcobUYIA_bW$YYf6tEe?$1<;QZiI5g_@tb1L)5o8WnB zV4Hgk*dM38Z%^!T_g*}_0u?`S-5Pgd->0ON{a-O=ue9(RuQ=lv$;s*NKj@lqL%m=a z7j?jz+HNjqrD>(A4WRyo5h!u(f|T-zo01F+iZ$leleH>(q0S^23rL8F7IG2u3W7C~ zD}&q;+9`B>FdxJhG(q1k>61QYOZqwuF~(}O@A|QXi*s4$@f8-edIeludQ92zd1J)+ zp=Oau%xveGXI=5VCwpmlu-voeV^dO{Qmj(Q&k7_ zZr$0}L*2|eXg`eQa(ifkAJT)5aNN=6b=F3G>>c?4@oL=W+YOMX3ioJ!1b)1(-xN;dFN&}y|$R@)=-wGs8vFTVmELl#20cvX>Jk51s zi3K2?2$Etn^uI*sAR{eM8k8Zyr)VMilszRzoW)Gn_8WI|@wAiiH$5-ljEh#2UYKG` z|14ePtCTv12`L%;mw3b3_o%M}!_Q@oliorUmuEx=JssKmQ||F_Ksry9Q73;A1D7icfc4et4~o-tq42p0&eQ z;B#QMJT^u32q16fL|E0&5D@-(){Bu|m~BNw6ummDO_a??x-2Adk`Orb4%M%F_&oTb zU!4=Q!L|bhU=$Pxu=7}pbu#-J9OD~GahvfXc4+H7bHSix>aoGjnqxN0zxkfKb$31E z3AXX!o>A52qzD8tvMy7GwD^z}vI4*WkU59bvdW`1pmX9Zpv z``j48cz&!dbVEgYzsFqE^~g;+^;M&sZ4ZoPj9jBl%36}Y}ZRcy5E`NcL4|shp+LzZ34Ql0dfXI#OumC0bxqwsQ9K8-oeiIEdT1XML~o-+d`iqDgX~cT?nW`owjjiB)AZGQ&pp*!ru@?kLczMFz*Oiu zAD7^!@W)Zv&kIsLP>df;{N=HTuK-6pVR`AgPsy$^NLnjO`?Q(XevamsxPEyPxm`*7 z;AwsFf#HP#GHWsJ5E7s~m{ z!*ecmlWPy-^jZCA(?ls}LraBA6!Q3P|Zs~aL?e1+lToFw6j>hEx14XvbzqtsUgHaA~`md^u+Q=leA1A#MH!s;`i0bl&>V%+~K=+j4Ft{*=~k zw#7kTj6%nPi4QQti#NCpu(}R;Jn7!PqBGg{5N0A~eh6%zBC0-ga7)!V_DLQ#3dMb5 zufO(x%vzF8^%|`JIRZizLQJEFUjdhFZ*YJ}P667*i<;xjH4qWM`uZz~_7-QMsIje$ zcR?^|TnkPL7^o#YOeE@taPm1|-);@Tfmy$hUlXSxJn8ZE>%hiw!9VwbFZO|>pF+Jl z=oU)e&RL{Yh1(9+JP_b>uR@!3`L6!f$nYyYcL_&Q`2`}lQ~S!d4tTKzg{Q|s69D~V zYngw9#oR8R-Dgy&Y9(eq+~BLlqA)QjQg0iw4!a+g`!QnL4jkj^-T1ODz01nM|F^HJ zq*pjwPd3tQV%DGJlGdc)KvS_vjxuM#HaI8x@N{nf!&-;eV$nWmgGtOezYWd@53!GI z-?>0!M)ur#2($IEYFl)*pAz%708{n}S4_u8sZE^zzw7#<7`GImToPkr0Gy0V_TRR2 zyR~;yK$DHl>GFo%bUFs;0BBtEB7fp;F+o(i)SthU%CtDinrFntgm{IIc*Fuzo3&q4 zB5Z&EpN!Z3E%Z=BjfWzkKtR~sK`5sachdu!5IqU^z&zE7;%J7A1p8+a8CX0Q_%A8f zp_TdNaHWIpgNdb)Dl2~#meq{SDyMjh+6ZYjNi*&(7nNds-ufreB%AHgy)5U*hP|LC z`v=BftQsG)do^|FYz5}+N=&dK!5ei6qhK?YC$CPMIB%nra8IRb$lc;oNnFQ51am{> zJ$>8z5;GjzCTv-Tr3(OJ$r6kfdVbr_rf-G6_WYKCGa;A+_V*N5C8ogG#9V{%(LkYG zg4YZt$P1H+x@-~?*lOG+J_b!(%G2QcJPB)HU1p!SSBquNfRY(%)r>)We}rty0n;*w zqajK7mz!i{{$aAOitHFhVl7;Mu}FHc`|___B;7t@@f`rAq*HIFOEyZcYZSwcwF zCkOc?=J)pYoolLhp4!sUgPE*L*!u6aD}@nT5>`8b^_ zz7Zu{6P6xGblg1Po&IlVQaAe&6Jt9_7`qJQECh_jtB8`F=~~^i9J60Xz|?)!9p;uP6WjN?0*=>XiQgrm|RyQCS_kGWuIck_6#h!MdEalvNA|HKk=~&?0Nwix4p{j zb^48*xoC29hWOM~hlBV$gLbql$qjey903bIxViJEgT;rN7SCILcS`Agf9>p%guRV5 zZ@n?`N}QSmT|GiNtRZ_HBdr*sToPb~e8L_bSrI0Ue2Ps|n05Di-%u?i+j@s?iu2%J z*yaxqX8@xPLZcGH-~)HZ^92t8Dq8B#2X!dj-W~v>P%yYc@?iyO1|U6obk=t@l@91= z#RbQJ4~G;V4gqv}M6j+u45zMH3+f8(Of7yGp;}0wV@FR+E-Ct0h2PI2u#5S)7cxLnz`3dP=`Y#4FQ)XXK@2K;w( z1!Q(*&!4Y2Cx<+*M;@*5RSr32V!GdBix8s58!w1?&;dNZ3^zv4;3@@_&=0uHKJ-pG z2K%isBkzb>c%<0`GhefiB7RI#gS+{Z1OUJxfB_SvAX(am^@2qE1{7<1Aj|^U6DW&$pfV&}QiRXRtaW(tkTw z+3qjqbblD>)-#(ZyJKP6wKx2=`dUp$TTQ}iY$2aGYUMR2tRr0pDEIf^qXpR41JqCp z(xRNwJHpZCTZo$)qOM%)24imEK^@N<^aWzlh?2$hLKd+y$g+Pd!d zGHeW;JgH^0@dsP!iM4vlewZv0w=ibCUfSTLl3wD7X8r@sbCw?BdyMv0tW#C7^ z`zLX`2DevtY=O;*!9E?nZ-MK7@jv`Eb^7u6RX|%0Ky+L`m%!9$hh71@5D zL;$K5IQcg~;bN;gh>3IJ6;)ac$OX2XR?w8d>A(IWY!+O{K1`9lSe>bzx@JJ)lD8<| z>donZ(qqgHor70z6dnKl%-G%#hAHxHlCPOmTuxX|JBYro4S8C(Cn$M0^&m2FR7_0^ zeZoz^HHgU)I?gQ8D4F}s@lNik=bC!cyw&>jnEP<@Vd)g3wpJq}2XlNuSp_o<3__cLoAI&R@%Bm`adq9Y?o$;h2`;9>x8)Q-0 z>5o6AUY^_f=4p-^?t$<$mVXRf+bc}k%(3m5QGdeVjo3d0_Iky*VV(EfU)9r_qo+y~ zO|ATR*LKrD(&3;JXKEN@XAhX zeZ@56BdytV-gSKX;b^J#$%{0qb%{t(8M|V7k_w&@jgw z28S4es(lfhsY(gKT!exY4kD>r#UFxYce&x?SRYrSE5_Cvu|BYAKS$3frNZOpMJIG& z!=kZvWS~B~;&4QH7CG|u9@~Z(sNA+Jd(!_^i=y}bLe(Y=nbUMGf&AgA#_h^`^}@c< zd1Oz~!%FX7k=^VIvo>3^+=0E<=TZa9OW5n`BzDI0f!@l>Dl^_u)!%0>cxE|0B$3eE)Pm|PyGDuB z{e@{{sa>5_4&K3tPK7AbXaqS4UiE4p1uMSq);nEM_j?aRtVPQm+^RtGw(ju`EG(u% zuk;_N_1^0DRmJ;yU0LHjq7XOMWwHMGvd0hhS&=vQ)zT=s%xz!~Jx6vwMPL>@HQ3u>*_kwst`_vqOE0 z1Ec@9j8ToK@h^((?{zq0N^hx2Q8KCj*+o0UlP&+dw`IO6Aa z{!|V+wk(UTc}zO^zh|0~MJ4xl&oxZ+L}v+ZE9&>p-(Xe_nG7Mun$uD}Is9Ctfy}!Q zLORe7R}Ynm#wJ+H>eN_Yg+zY`K#oIuts^#6ik?ld1hdE+h8POuhEa6KE~9#_+^#@K zFDL^oYduQW+>;YA_ZKankWpSI)>R+mT{_*Ga5Hj5^$r9vzym-RtVLazVUPO!Pr-(rUKpe|7x& z`Fz!`L==+~D~eAvU#tFgB* zX00bv2lt1%&WV&XR6{^^2rz*}nbn{G+ci&!(M@<9<^+VKvIgClNp5vdQ)54>-E^KF z$^IcAhK*U9o{uT@nbAslI!I|F8fKHSw8Cx*We!hCKeDueS2e{#A{Pxc+I;BT&oAzc z7TGr{j3I&?*t04_O~eV}oa%b?bGYGa@AC0a6UK@1msPm;MJ)-$IUU`(Dt>3z5&eyq$=chh3fc=ghpgWwPX!LrYI*xMOUH zcTNU&NIHj|T-!F5cG!O*W^?)+M4!q%bt9|i1!2ZvR^%%cyq=Ixz2^R?EKFxECtsf{ zcJ1CkSvQo;#LcK-U<9ZCOiMF-s@AhlF9m|vRO*MLsdU)*j~B0hlkZCvid_I`gO?J;vP6vm9Mf~4hYV%T&Y{_M2L*v$un?hosO^2M#nO8^s zhg4EC(QU{2B`1=PVqY(kn5O8Y%?4;p#IX-M-bD0k4{WGBx;tapVr0?5TG?@T*L1Ed z{0i*kD1_!g`{HcsdTv~r{d>2&hrk)2_3ZNRlU6HbZ&-q8@0?6kfMDt ztZEeJoH|Zk!tXT7uS|DdDZn_V*N7u+913od6wBtHslD-Y-M+28q-2rlwG(E?T2^)5 z4Yg~p*O%m>e}2b%pgqxKnjex}c#ZaKFQxm3IQ*vP2yF{l?}(zu&ZUR^+fd}qRlG_s z0mM9X9I}ewJ)ZybAvKVLfJAkGYY~qcNXN`mqXhRN@J$B0pS9Va{4hZEsJRU9^-(th z=3H?j&Kv`G?E$PkPPrBHzI|+^>9oa;AYtrB{s7~!oyPap68|uxK|!&{N)C$K%ed&`^c8?L$3-#GVsxA2?Xb&f1_Jd$)M;MnwmgrlrjKm9^| z?fG*y43HF!gt3p5Vka&3Ef0D3ME8RiU`ZAmti$Td^nwefS2@B_1y=VjO+~tEdML|( zz9j$FU2IZKFA0+OzACVgvxCtlaw?n}^c`hiScY16qxywoeKwO~_lI}K@hgCS?;{`) zw1wZsF8Tr43edBhw{CHVKIQE$eR)6WmdCvdWB67i(Tm9ba5)k(szR|gu*^dZ*mbk6U}x#4%g0u-Bt z^e}DlLLx4vs9hp!FV6NqxzXS3D(_6gT=&DqqotcFC==JYTxo)`Ycr>JGgsz31nJVN zUUY0Cu{exgoYq<#H7gsD=#nXb{;EsXtvX&R3exw5Xfdn~jMZCRCSOXP1rP;3mo1lH za>41K0c-%r4Mj~UU(G0&M4={zzNgZCbCWsFTW$2%m{6`~*B7j(64FxxOb)2mEwQb0 zg}h}8Tcz2idLO=1H#IhHGCUL7>6v8f##AEQcR|q`-J&{}-?!3qG0WdNs~0g2`h7XG z`&}^87AJs9mSam+e2o-vn?qz0QjD1wF3Ta}OuIn=M1ho* z<@->g0u87FbV;_LuYLcRQChMupPM>c@u^bBV|5l*^>zSwcIU!Y*RDQX#*9&$2sEsN znRU{IQ&_0UH9Ep@UqjFz^ho|nb)nZl($%1eb|fi6I**7=*D20lBDCR)(=!Kt%lG0t zIRz|~M@~iNq4*PWe4ZGerHE~}Ey>|GODWg7$M>2BhgcLuv1V^STn(~XYaQ5fE-@&r zTfRgsw=tCI(5Z{m#a^7Nsv+fkwJc-|V?F~~mVqWkd(8!q#~;ySSLYN?`6zHkV;oQnxplG0}})bjzsYu3T zBway)`CpF-DZYMXR zjrV4392Xuw&~s&SO|sv>t$7`uwyD^3PO-ayM(ekHJC@AP$9xEO3LKK%+Kv^$rCSw* z?H#4twS-;Wgzc?_ZHm&Z$R8WaBuUdoA>qG&+wcBhljKKJ^UnhCXMv={{LZNJLRu^6 zFe{rsHs~cPjw@Td5WrC&1*!Jh1-eAM zvT9)2sp`-WC;t6=!CF}eP_kZ3(2eYN43+NCl-4{X9DG>1bBwT~qcmo5``)qGwrxsT zP&cnqVtzn^cUo+Hd(oLrGq_tee-@ytp*VDLb2BbeTfE3$cDWsw-hrbmWm+{ zWV&PoC-DT5G4+t1pwZLFQTDpZ=kW8S;ER(5spNt&-{-e(cVQ;Q=X4IBsz@i@b5S^3 zvQ=5SU0r%G^GQwolgWpKid3h?u}4F;FOscRF2X(h-{`V5r^TkCu262;`C<7IB`$Tg z`+wV7Zm+?o7nyT4k~5JOuHCFKBF1%!>RPmmx*hj~%?3s@64M=tVV)N7|`RUG7 z=Leb0vIoo#@VE=EVr_Nh&Z|3LCVX&N9*yF+w3co!dNH+UxzD50z1?NaVeZLC4t{NT zl6*Tht-a@G)6qqDdw>mHma`xSFpryY=g#)exoXU|{qH9XxQ-s3ce*NX=XkETFHeBa&BP~lf=5KGdXUNocV8d=8Pp@%|eLLw{b5*{%P~&wuf3c@o@N!>?iw+UratP^;z=d?LU%# zwV7v(}*K2zq`Fy1$ zh+ZsME6*=05&e+zMLGwVROU26r>z-v_!1#8^IIS3t z2=W!g*|Ueu#KUH-wHFq6i$;=+i)vX&O~_~J<{TR*y8P!!X@>ZY2T|{VPo}XJWodPA z4~Ykt0Lk6NI@%BZjs3a_pJUHAvD!Ay%Y2rZ*K+}!<4rB_qTE)!Pc|sPgb3Em_m{6n z3ER2LYJTP2*_>@&Cb{wKear3lm1W>8$8IFI{cDowYi!iXcAn|1#Epox?*>C)d@eWI zlZ&nJ#3hR4PpyiRfXYO1i&(>QRf55@i+Su~9bY=VNJT!4LpaAmr?jCl)F=)P5#dAB zD7hP`8trv0Pv7{2@oC=r(zldStH-WAF|QKua}7}6tfihxSC?)qpYIvn4?G9~U&tja z4M3ljFR%N1p;GEUefr(9tp7*Ro%kh@|9=1@vKGzCT5GMWb^EsLZ+?FPk9oj@nR$O+ z@8`?McZticao_sP`4%r9ndYng$byuzFJ#B-w_L!;78uQc7HusGnAzJu5fw#M#dRt}40X zEZ`&t{YA@7KSCa}Pv-Y2JhZ7E9L3uAb9&#)Fw*G>iv7}+4yQ~ zHtx+V+&lh^FjZ70b-g`4k5?+vE@`DXwr1vk9EZZAh+7(_jOTv7o&TBT4E~n?`5`;^ zgz$p)d^!dc9&rz`0KlTtP~g@tf)p8MN`*+=gs6x?i2sDhuVw z7j=T7>EAEes21MDwT8$RBjrSQ$P1C}>$kx#mTZiCN0=`q|1!Ps?uzm~k3%Zvw4R7q zo;VeM?Om7%pN*F9D|(ewlU_M_KV`w^l!sGGTqkdzmHu}2oBTjc=$v)?r*TCaWwdsj z3tS?U$OI|G%zz)SJTjhL?T7^MMyHq0)#By?@pGrq8^>{x{PzngKyrKY`5JJZ7zB9$ zk^lD%b=R)wE#G3dcxP^LF{-lp)3f+5Ga?W7Y4ZR(;Dd7mZjKb4_#dj@{bSR&JA~BY z9Qy#U=ivPaGs%L1CgZ;9n}e;s{cCUU8jrnJ{IY0-e0POFwEQRK+t#;>5*|W=B7AlQ zY1i9#btGB+maPX57F9BHpRIPCWljc!!rHa2|Fu)0Z>V`6PRPe6uLZGv5(>K{oN4~# zk)_cgfRQVsi)H7|$qjTWMhc$ME?-bAtBD3^V!1({{x%tK^Jy)cn%D&iQfG3sHbjX(mX&68h#|go4d>%Q_7|5_NS^4A@Rh z?F~kRh`l_&rrBIOhviVSzWsvYfJEh^ zy9y(Q4L@x*p3F2k)^@0LO;1$$3bWM0iIl`XdIrOX_^9D)#lunq3O-9~^r5DlVjY~8 z0zb#)4Yk#dmENvM%-6ibtg+o08(@JFt9IbBAP|uPU)Eb^9!^ zF4zDcs&DaB0T6WMsm0ory*Y;=TJ?K1bTvebgIiO?cYQ+0>RzvHoWBt=LxiY}mNaDa zIz9{`QQhTN_w9BvTp=WyMrA1;L3>l{;>vgOo9 zm>=6A>vvAE%ILAK_DP8Gxvv<~_t~OW*U9z^F~!ujmMQFmOIf=o>>vD;lFYMa$D-{^ ztGo94T4QBk`KdYA9U;6M%I{ismG~k*`#_`Ka8ox_b12V< zV4IlHyE(~c_|pB+de1u~RtmF=l9$YKiC{|{P91m1+tw9jdF%IAF@K$^6H>p%n~)bD z|6n$}NQkSy%dNPXs!JdbeMqz}``ydl=-T3tp3_ge9iJ?>vr_IR8TAG>F4 z3f=$_eFHp65a^>vrU4oPr%oNW=6~rUIf$L`%!WqPy0s37@QddQs1A-JyH1krQWR37 z1r5@OV{ZTbbL3cdBKI$k$k6_2(bx%AuZp)a-IBD)_ie&2Eh_QGij&uj&%je-KjOF5R_X>Pl=nRe_&c)jlVtw7CUCiu&Zs+_l!}vD2bvg_{ zSgR`%9A+^fGV^YFM5dB~xtnJNQ{k&FR)i=wQ^>d;*BqRU8^o+ulo3bOG$f@5WJIaI z=&b-LK28iRf&V|sb=S2ngO;5#$ZA*Mz9`gj181Th_W0kPp?y@mkKL_(T7IRaIV<$B z#A4CZwf#+#b9q};ssFt)I&*x$6<+p?l5xNjZa#?w#T>{C8W-@ zm`$dTg6@g@Q|rdaiA;ib!y}r+vxO;VuCia_S(!a7WQ`gVcCNvh&kg@d+c_=g1sfT9 z@h%gNSt39HP!V}#gHdcG;p|h^8LRHpzA4DOL`jjfJtF?$X-}o7jvkem zfmm}JBvqE#;^RxVCutbZs`_kw#hCwq;S>Nqy}`lKY~1mh~%d@}=(t(xNj9HYTfn{7*WN_(j?psXa4Ad1 z>}jcpr|zv2I|l69PT?uTdEQa6X$H*v|0ke33r&qQZSB+!Em*v%W_e(;nwW2^o+wVz9ZfCSvV7m`4D?hTUoPUHLVOL_~wLH z{zomNEXSHhjwt1hxy_7{+;H35l0}v!9)MvkVom*AO6$qS){Gx$xzi=tBT~>m7w@tI z26%cel#Ys}^%ifB%WB1pxmVExDIX&r(R^@fPuyxQ{~}45yvN7en%aO3hZLF3$1zoB z23YJ|r|!3qUTV8{w7~Vu`rPDK8y`I_A6onXpjIn;f6gq)(ND}}oE74W{R^cvw?Q6Zyzge3V6SP30eO{J`9lYH2C>Jz2R1xC7`JzE;bu;5CGs9i4o{6vc z(@35R(nFP4IJ(9BNhg7`qozFYin7Z*x7IpTS6cWFg1fm1!9Q13TVjCk7F0;$i*UF^ z6hg=xBa%*E68Jr!DH{zA`yH^exeIcbC?-#05s}2~io1qc2$8QL=o3-H035Cb=r@9V znPHxM$Tte|91-M_2nK3aEpwod)Wo*Lb$8i`T*Skb*bA>BV`@Oq?aA}=A{LH=fi z%@P_%G$t5v6P4CAsdVN&J6}yMt6;?S8|MM!EL=yHuvrbd%?Leq2wUEm`A!k0|6S@` zEi+*v5DIWc!MCF_&W!X%l-x%rV7p0eOl8iqQs3%sxGnNeL`|4QA|IsUgd>KkKCCk4 z{)SYOVwAl@WAj}=iqqJ-X;>`K#0_U(f}y=a+X;kQ(edDo#;;E;DJK$|mmGzCwf4#N9{^;<@1~n}h z=fLl_P1`rDWlg*sUidzd_I;T4AATpImv%@i450|WT+_$z?hf1HsmE? zwA!tJHCU{RIHCt4^WW5+V$AJYe(>nPEo>L%6=aouG9u`u*@RQ*in5-mI$ zaioMPEzN&D*=X~k5AeUVu&jEO_ed9eL^`4Ca=v2Eqdhm8ov0fwUj`wAJ?XQe#`A`T0E8oJCCnk)P^?noD}7{FB=f|MgYiHQA3mPtjJ?~K4F zuG@H`&7@lBx5H^6T^PWWSx3p?62apghzq{UumrS^!wfbVaR%qY!Hwprjm6`}K$r0- z0*^>BD9}=*F&x40Ie>U&HHEd9l?7aWLc3n7F4&sAbnip=m`cg5<+?^Oe_ytpzTwTV z3jWQg^o%+tRDFE0dza6FeZHrMRI_mRn0jSQpWirI){B(J2p#xx_pBE8VhkMlmeh!0hn&xc3CKACjie#iNm7IM-rsg%9E_l|A(qsFpH1{P|2V@9yF2QulJAlwH>(eX0^v&2IWVl zWCN9UbAbh4CK(Vcp3q2>A1qw9=S-bEIPGA74*`T43)JW)ig;#tU8A#$Y!bRy|(wQVQ3}jQ=&W=PkX$2Cr3v4kf~vp}YNCrKEi6 zc!i)dM}sY5lP1+9mH^K}>-mU5Vk8N{nH8dRW^dp&gzl!P&|-6HQ>c#dzmiI(X?Qlv2v(%U}hjU`V{BFn(g2hU=R|9?f@&^+?%vK|j zZTWM95)KsIqs^c#dxV>;TThD~oMsaEbz#AyVe+8KGt?&+vh4joh*##=zvu*-9%*24 zCNWBCT>=*R$y_Rg95+23#1JWR7HTSS1xgpx%>CMy574x@1iK59T6G1-%$M$lJu053+MzWvXZ0c$+b4a(%~ z6g43woT!T+05AV@tRsw|EI?Q|s3|jY^>mZB_Kdsjvm|eGq*S2m4fjyVhLi@@hDLPpibd9#5;XPr!@t_2mf{{FB=+gbJ-EP!=Ba zjC+?=DRalmTt>jrC*YOTKS#!;oLmh%UtqmMY7#Cri4w3zaA=2mw1r5VtRR$d@Ol`% zGflq$R=-LnLTO0d#sYkz8XGYa+zO1Qj^sM<@jk%ova)AMh}GtYpA_m(_!o0hOZF!L zgnS7ht-v7vDb{A;69-Y%D|mINW$^_{b#>o|r+uM~bDNS9*Yd0LC-9SB^irw5+F9X?Y_A zh;_i?%SdO0;Omul5mRjD3!3-IDX(M0OF!vRty)b{yV;}neSgn)`h52nCc&XjK5rl_ zWV+j*bi|OZ6$NTb#O?F~@;BlMv)8W7e8JP_K#-@LH!C#?en{G({+9^alyn8}ll>~r z8utma&lNzbbrazb-F=WZps`b(TDe8Q4G`G=4>5qfE?_q{(!iXR00oAp!rv2qc}Xaz zk0+?3|8$>NR6Xb9%QsHv_ctFniXh~vy+EegmK7Ex(&QZF{LN&EmScHooV^mU!-E7q z=oyado7C8zI{b(Grbump$Hvq4^(f`1dQ(l8@2{G_UM)Ud_rY)R=alWWq)V;d{@`LGL zx2-xi?tj7V+)_aEuU{9j7+6^&?WNpvPLr9E_eT$|;<@1-M?SXk33DITKxQ~Pt!qA0 z=um-+43+9lQo7ws-oDX+9vHD6+{M262dGeoclx}#3K)cYjJI5QMxpJwu(_^s#PPT9 z6N?JgE=4Fm1E|Z@#9yxzESHSuAS75rgEgy9Qkv4XvtL%%EJJP5R@<@UM7MNSr@(Ur zckb{MedVwXgTk(oQw!KS!okQ?^w-+8}+o19TV55VVVfqQpWeZ`RwS5N)2P=^Vk% z$q|Ycu_o_U{M3y3NO_5J-Vy1W!ZD28Vi%+MfNA7Z{DhyH__{Q@J0Rw+t%{`o-}&ZOX@>=6QH|RgdvLFBK zSW?loG&IQB{ld1}jOtc*Ih>oBKYW+Z6PU(C{^N;t$ z;XN~lD?;|O*BH4Z#6Q1689$K`UQzJnN`Ke}hHCxmxBZvzu0Ix9Yae}p}?3V>@;wA&ejh-+^~}dR{LgsyfWM3JkoAH_r0}`@YvZivH{=HTU~c9 zp|zsc;z>pPNPIPb`|~5(oD~Gwv3s3X%NXyl8~-VSBYfdUzqGgBcYljJro+Njc>1a>NDINA`^z#vnz%#@$#SZtBZHF z+zBrCxgme|=+$k7)B7sCvG)nl!TJA~oO0Pe|JdfP-SffKsT3y3uA)ctQ43acM^Z;H9O8k^LE_ZBGt)k4!#Yo1G z79LJ|-wjoQL|53IkLl3jen-nQG}UiyGi#+p8{Wq#a*WA20zFEC^DztTRQ7dw1niLo z>Xp5FZ`(wnve8@a&GQ)?$R2Cw4?wvF3SJgI3O>qb~?pK?#8P>=z!M8Kk& zyeM27dkx3yubYcrqVkq78<%qXS^dPB3!x1uUCoY32E)56j&A(H;2r+{;*_IOGOaN{ zx&m5gqTI1kv(&+W*HpKH!0rluOY;fy(x9xY-R5W4WzZu(z|Ey}J01*@**b(2oFmqi z*)~X8jClO^Rkesr4C9DV=-CT97=Olk^<23*5JNo8ii>rJ?q37DT*Wwup4f**10Yoy{)M@tPBdQtLC2g9VrBrw*>! z>5_aH+|gg|0laVR(l6#WTJG_l@N2DKyf;9GmAz6T7?F{u2#ibLnRo^=rL)_2!68qK z;#}2Zj4s_xKpt+fp#`#Rm~AcDmT`GN)oT;Br%x7bGNmK3nNwb<=Nz|8dW<9^&Md7V zV5}=@@+{7Q*Cz&PP2z4kGLviXPg=A8fei1KvN!tm-Z^8}jXn69y?!|=z1D=slY)K? zlnGYhK(w>9b6dp;c4S3uuc5;ze{SW87scED`xa^aE+qNa@*Rshd}~#%OM`&J&rt9x z_^(6q{EMa|y_@|mMyrxTbzRm!FkNh5p$WI`j>l%H!7Ztp-Xo5*6QL#Z_+189E0W!@ zRA!ETxW(uibAf#&a)|m<%=ARnj23>mffhB|oQorbs>n`?_bYz{rA$?1Nwq)k^Uugv z)Ws26d}6rM?coRj;uM^#=~ zQmRZG>B*_}VC_VWuI7f*DI2O%f3~t;=Hf}LMnue2Gvl#ojWyH)nq9&!YCNI#bX80w1IyvJF1%=C3^oT z<{yQxa&^f4Sz_SmBcr7`?haoCNwJ0RKyw~5KlzEHNB>iV`t;tJmGy%2sWQl;xY>x% z03?LgQdVTOnBGugIC-O$(i4NDh=#l46mn9#_km$aeuIFDogRJ&on$!5$xHfmh{2_VS$M_Ro zph{|}&`I}nQT#8Ct;#Q0br|~j8OKqDu?qN*%_w$?*ok9gie$z|s!1VHC6YyKD`aFb zQ-E(K8CTKjj3 z1YO{sOdyR6XMRKEgd<7KSUF_JG)hbRvZOQRe!457f<-J_-OiP$IjH9JT zHQ7!BWphU~^ezAIZBCcx{QsE-Ea%dUh}J-+(Q;+C&51wd_9jlAau@4bVfO^LgZ=Wf zui@IUg))B8pB-0uz2IYqmx>b)%!~&cQkT5Bn71_3aP8ln|2)W6 ztx^4R^WCXP-!n@LXO59V&RB`1^iF<74j?cn6_>fkjZwUcv92`o9h)eliqjZzOO@hN zi1FVqLD-pw$P*@hCSCr0jQs>Iu?-cF(?;x`6#<|%HUX)=BaQ-=Y^?p_V(|zqd$roG z`i{eMb7KkOb2LLL%o{yUuTv5nIve&q8)>i{MX9#!OQY8A2ZEmTF6y}cV4a?Ki_O_b zO5N*mr}afz=vOtK4NI}LJuK+(H=ftIQj*M>Iv9EAR>Zm}3!&-PAA_y-fA7?P82mGX zafUGR&T4jzQKEa{#Wnv;j-G`74pD-mjlRYY`!vSXN3?UZT-pg{g!xZxx%#Pwy~MVM zy-Q8BymA@JRT2-0yMmOEs}c$pKh+}!7MhYl^o3-MeH9?1O2RHN<2ekB^m$oyM|q%l zzOo}T2cL8oZ;54I*2a0D&2SB2nKCw9(;eiswb8t8d;wO?Dn z#Cbl!Z}?xRF>~oTa$4Iv+9Rg^`UP?jKtjbvp8$$qqzbz$9ni4Dal5YcLu$0lB7=q5 zd?O1iTLdgjG%es-OEi|}xmG>R=5NJ}7ML<$ht=m$y2O-VB^aP2t^pv6DjX37z*f(? zN5>LW-hI)rtQ?<#eMh3?aXezQT`M?st|OA~8KA|Ts09lEN{|l6)cIz%elu3%Cnjq4 zR;4#|gP&ldR2?x>O#Bf4eGNJuC%!SGGq=&;&(1Ld1ysDo69euGl&ryWRs&{G+8J8C z&mDpdoz~?IPBUQ~r%7tnPW<8b@%D%2ACn6D+VgL}D43c^$0`%%9fm{IWrxQ7%nk48 z*Z0NL|iJ z4UIF54&0}-!!!`bS_@}bpmfd{-2w)Jl|=z_ePYh|=nM(*k~qagR3Dzu=$Vo5r&r1u z;;9M|8^*&q#2}RFrUQh)H=z@?vh`b5>mN3)6&_-*wMv2;ea}?2mt035(fM#Re||ui zWX+!^bUtBHRvV7H1pbG!vE2?dtY7nX^JAM=eEV~Y>Hg>JQ-CdT3I9t$h}eEW-Q&PL z-jnwpLNN*d>#buhL*jCklqs5EhildoY8DuaJ#84INb-F8fij47oU=K)k;ib|o_ZGQ;}E0_FrsRM}2*gG-bq^u)Z zL5L7FTc9BO!pk*UpSG;I|Lz5+kU4AMZL+F#yxK5EV~9D$;|`uv+0h=PuMU^7?O(5s zf$O{8`B)_R5XKmr`K&`yb51jMh0HW>!gJ+vjpSCUNu7woIWlIc)>sGF{qXo=xOEe~ z^AGkjYmXM2^ubC%_hUCfTu{ z_;7l14e_7ygK zSbu8s<+p8#GRQ1Ip8USf!(y2--Te3TyK`6GeMsm`l)wH)_Z>-(9ZI*%F zO*8#R3x0G?V7eG*W4|(Ib;sGh%Dyb~{@ExVVt#CC1~cqf?%lXi+BKotaxT0uTxy=L zM?pM_%U9A7H|9^JNu0p=Uz8H0{3?Q&91UUbw{Klf39pQ9qyofAv3b;(!cFOqZ{Z|X zSTsq)I2lnUT=Lt^$M^O=a&WJEj?%3(Y5jr~M?ctoZ2qx%@P}6ef%f|W&yj$6Bk~#( zN6%T=N7IE>x&z#rl||^rLZ$0`9Bab0n=j}d5JNsFbxMg@i7zG>v;Rd8|Er@_&Zca{ zfqx<-iwtI$@-SJ$9M5A-p?f`*ZZ=9%lP*2;Zq$LlJ#SPbuD@EzUx0t_p5xk^`9tZQ=0X)}CjxSak>eCnB=dbr#rX0kQ zkl$ejANAY@`LqI5tR(U5KdI%z^j_x475ll_`=!mz(Z3hHEBy3!*Vi>(BM~jNP$ICf z-r`gUC-Az@hRdu|hGwP9u))t_x%D;2$A6Q1VY6*G>qDS*FSal|W_%WbhNH2|qj|Ko z{MvZ+reVOTznGTWL6IP{|-tbC*prIRtV%^BuORP8z1D`<)Zf2cmW>BTXnE;#+_HPNx z^Z>rj(unirgg7{IE9$Lauloo?IY5o4j+o%BXI(Powkmy-Voo|5raeX&RyYf_V1<1^ z`#;=a$)~r}rSEBfw|jDKUSkCwUy^=z`NrK_8{}8XHW$dbn076uw5ypC?pb@hHH&S4 zWYBzc&{LNeo#*u9HyD+(YOaF)6E?V_goX<|sAG&Cm{bV6y;akrnpsxg=)2Hq>XC0mgzgE`RP{j~X=yqIM4xFWs+Ay<9go z$IjzH{n5f7Kj#c1cEg3AY#j6lPw#&|*cf!`PF?JN3+DC2pC>Hj{VdRD7JOSz_ysi} zb7|GO+gqD$pJT8sY_JzF@Y2u_B{YSo(3?!xL;8C<^_y)K>ziqa7z}`!KhzXVy*!{M zZd4L56o@5MaWa@20TME&*&s(NFomw}d$q%F&%+Xn|2cY|C1(m%9ZU-^Sh>?3pn@kU z##rV6xSM*EdVV9d0DZFJ=w8bHo%`(rz5c?t$nilY#Hdp?(duWW5EITIMQipoj6r9G z-1ZD#jkY%ZwC{qzI`I-l!KMd)Os^4sd`!+ozf=)~;<#6b2{wP7uCdToZF>05^k?Ni zD}6bycV8##J(REK!F^O*!63@kXPBjFXj-JeN6Nk@hQOOfQ?5*GAN2!c*zhI;mUJTs z5QJ2Ai$R_kBms}-!^?-~W6DSK2{7YRerQG5QT^e$T+!B7 z(7ux&os|H#TID5Xa59YW943ZK^TYtWIA_I^%mh+&%3aRbCBcQ~m*yfD?dN{sRq96Z zR@ePWKX3nKj4q4?sG}(01~7#gL@yx03>mdrHFeQb43$EBdZlZSl|Y{ ziF<5gYTiBknWg7FVW0eEta~-by3n6>?^ksWjC+@b?)3Vi*L0iBUm3ascqAYh&f5b- zNH>zSG$Mt|Pq}3wB#>3Bma4Q$ob*Y@2LIw)ziJ307m}_u-2x@{$RN| z@<|3Pt~rXQ@l&G2<#pE7wir*#qV6sYY4V!0N`ZkhfeQ{QBHlDtAG+s#W( zda@+5Uz@blwKLbWI(@fqa%yI6dosb@O}YTK#L}9M%B+1$=iJb4>^HJ31|r}Vz>dt_ zMH?rXS7qmom>-)+=(Y99#YNd>%se8QubAZPN%or@5F8;`jbJYkiK^$|Iovx7=NVG%4*pr`gv_MBiE8#zKE^>d(jj(}xI`uZ~DMuq5tO?vq<{pK=cD$vl@5 zPRV` zt7@UeHzC6fuy#R35{+$-S{-H=qKX%?hDMQUIIhBs>K)>Mf^hW_VnyzD#TT-D+{}#< zuI9N{1T;>E~@$>tlA zGS2u12bEe#7HI}qQ*OG3zixf+P3fuh8|Sh2;{DYE!%Z}yoRnB1D9b$R^T%%I*}RmKIdYdf_bJI~MU#Nc31kgn=4y#i8QTXjvazr3{0zY%0V0TEfIN7o{q1w! z%$j@p_YYcR-z}B>Zxc4J*%N*u0fVw&yuroCPp1`9KnWP1wX?*I!wfS;sL=x^!(O2z z2^?VR035MZx0>WPQE9X&MP_nHRI$9eg}p@$5Jz;UmnA7ln|V@LDX0_$zc#OdiF~xV z#}RyOwD{-fHMQqn+McSg?@{+|43?hX>4t4|OVzt8r_X!6^_cVH^rKt%Wc8+}l-lpwX-y+vRc@77GilyhJFqob^{E@lR=#8$7q)$8NrbsY|~r!~ukz_`JZ z$u8p?eA2w&amF@Yse9s3=}QHWM?(hJCSShybO6f+uT?{m*N0Afiv1RE)eo=F=|7ns z$_z1;;0SnRfPz;OpL$BML{0^q0OPjt3BFSbsec~q*d~?%V0<`BC16zlimhj&5~K4T@-$ZMn$z~i_MDy+BL9HqVRpbO^94JI0qx|PM=Lyeb|1#skB>8c zzxAmokSgVHnX=1m5d@on)>~(V>|b8yrf1y5(hw-*0oa8WW0NBbZlw@w^pW!K*a4o@ zd6LH_R^GI}ds?1uZ1DP-s8d~)Qj#+Aib{gshHca%}bUtjsR2y0tRnW2k3a^dm z$?+`J47wMvnaCg@#Sv4KguU)kOIYBaY-DfcFM)hGILaCnvQQGvc`H3rwKf;6=5a~$ ze1>a=vOTLP5i87o>yp)$ZKy8DJIeXrn|al@Kd0Zm+`hy$HC`H>*y2~*WqV~B=ggOx z5Ag1s2FJ<{Qpb%%DfP}65D>oUD)^XtC5sAKi4M2BpIk)C#leXQQlPKMzU_J63R{IV z?VO1*FA8tpt|1G~x>K9A@~!$sA~1xH9gm?&;a-+hK_`wWot&agA&c#I`CY-4Mk4XQ z?swh0x6pE&l)q|@UHL{bpv47L@WvmkZk}cO3tp3zAW_)d0&b9FL#*&{PXBNP{%7W&N3Ye|paNB@sxko3E zM~-7bJjHxLM^3!lZmoQ-^;5zu5&W$&20i!{CHdV2ts4X+SaO)a%<87A2Uq+aP0PV5 zy+h#0bA&Vdcr-kd<3O~GQ$obNGk)_HQtZYoolY?ittHI(H8W&=SYftj<@|sQFi_kB zryt_a8xSx&$ZMO{yiaO%C-Iz`3%k^&Rb+$8Qx~Ts~YZ&j^yr<(1xFf z+qA#BW%x<5zP&K!Re4BoT?E7Y)u^we_RsvpUTMc+m#$mww;r@)2ZTCScg*{9Srx1g zru|g!ua3ac=tmnFA}K8+MMx-66(uO_n6dEwFI@-$Yqz8>kzh;o>i@adePwLUmGSbM z4cAp?g4VNP_EET{e&1lS|Tia{H|z|2Sr8 z5lY&k7RF7J%J}480HUb~u%0FqZX$(}h_ zDsJKVRXzsOg^{F+Uxq+W5&oanqICdy+cZeW*8Q2_FPsXJ#@kuEl!aZAnieIt*;^0z zX^ZBv3CG2xjjORo=sAv9XO&hSr-e3{(Iji^jopM##apaO+)@#b2N;+MG4s;P@n>5EnXtmZS>Nn z?C&Z|{_1?XHZSyDnSo#J+buie?Gr9ujFnEV+}3A-brN`rpw4~dDitYAS3yLb&Hb3p z65ySN8DK)_QIT<|1xo}rrL4WY&v*!+1U7=z)3gCn4ppWtC7c01&p}HiW z`uAwn7iHD?2#;m&tE>aCtPBA`s_zz8dn6oipJvfJvcGO*CtZ}S?qE%Cl!NvQj9_-D zA1v62&jF}ADAWcKFE=F{fEH%*2}3D<0j{ljv=+uGf*J195}EKAWLws-XHbIvstGm=WB^W2$iiS3Y4TF71<}6 z#)Fp)CxgTcobxp4IBY0Y<9oy;o}?+xg?@3e?loNhS}d)$IQI3u$?Gp${(hf&E0<9u zBBdjwH07x@n9@X5hVqpGSm9Hu^4p17BW+6hy;wG&_6t@8`Cy@46Lt8 zmG1OsjDx2<3{L%vFpv%P->0|i5RnQMrvm?1zw-U5|I7~{I!I`Fx@B9n52ExCD=Xgx zt-i=g`Vlz~#{Z%qED#ZIs0}`986Q!DBrzdJy}?qPb5%r@tI6A#1PsSSMNpL78IOuR z#H3%)^rmR*pgLR}lkDmTiL_uwMBd5wikIbZQQ`$xV|Q*ANMmk5H>r)~YW6A9Z&^t=$2Li9HmkNaBIQN0pYMiw$C>+3VXA+B~29wGTK{hc3B`C#2SdFh@ zuE=1LTSeh<-=L~Sb>(%7>-_+>7*(n(l~3c%P~2iZ$zMW}s)_z0aK5Nz-P%L3k2je# zmgi6A8Tj#*y`JAeSc5HyOmcV96o)Z(+82ckfkuC*(Z3Nme*m~YOId&%N`7`} z=vUJ3RitOX9&5aDD)`UK$Edf9X}8Ch-wr?^PXv2z?ek*>aO>oQxoCnC(+{`jz|Z2P9j z4`u&6KJ@NKml*}~pg))s0 zCE)TkOmj9!V}b{aD%$^5CWt2{*KEtf8NPyar}=nM#& zrQQ01lJsedMh;&-T=F+PVkY&}r>!{e8lAEqT{8F5S}8;wZ)ToPjjZ-!*&a->Wt( zopWV$-rQS&VYRb00-1kzS1e)W5BsS z`S6*3oQG(+!8BCOr`lbisWOGYUN#SSXKz-A`~c%b;GJj~r?7r{Mf|VDi51H*g@N{BxO4na8=Tmm&+3fB@rpaSk zvbBO7FVcgfGi1|?LkuzG|CU&wB*g)MXWr9QM}hTvfHb{tQ~IvNeB9h!5O#>?(@1s{ zlRBnp9oofA+s{Ai7PJ0XJc!WD7^ywehE_9%clht&hwI)OJAn&i>$7n%}P4isaba z|3}f8_{FsUfBY=7?^Dw>(>m=_Dx+PSGp!@hln}y1Y9txyx>>(xmNHUIgtF93h3rMh za+ygHg^{=+?qm(=hPYkV?f%a1KbX&B&N-jY=k)kwreZqhCS0_4CrXg+~08GkDpPW2Tg^ zLC#Oerv_sMIOJJPg$4iBjov4t)?;Hs1@(fN_$Z#3EYdnD#s6k- zh^VG1i>NdE{5lFBbHz+b3XB--?}e)<@j6pEOkdFkJU~rG0@W@otV=J@;F&akR5WTb zmeO5)GDWO*HG%1Bchy)QPX$7)$Y*1Z^Uo!O^4#^79DWroHQ)E#T8mOmWHK}gR-fs< zK5BEBC@;uF_C%rgWKR49ToT@pUy(N^`8fA<=JZvL`20yCiLr+eyM1qlXz8kF;s317 z-8pcnc)ZULdyDGmzxm(7%0>?n%P$7E5_Fy3^76)ZM_4%wYZ)SS-jt>3N&Efs`oi|@ zzgf-+&bd$@yzI4Sx+{g}FGHcGzC+S_IE#c8vOA^y4~p7Vg9ZM*e+}48!Ar~sxwial z2cF~QG1Y^*K*e~bK&@r)J$r_vdZg6ZCGM)MIe|)8#5?cz?OOT)D*R4&7A)xtSg~Pm z0LevpNMrTkC>&=diyNK720`njW_9K3UEABCh=eTixp>|aMXZFUbdO^vx~ImS?SVW` zwN|+#)I?3V8a^_jN1LfTueO=DpScEE*Kh0CuhUR(kalY%x1fc-EgeOY@*U#pO03ep zvD|b2il&;kHIois{lZVEX5A~XD`<1^K(+vlp$YjJC`1^RZ|5MxveN4KVLd=bs>vnW z?33jF;S_GOOphZq7wAd)g=159*x7Y~1lF#;simhs-V==Ylr%-!W*bq!KEWlP5}}i5 z_z^h0QDr}Z(^ST3*Q;=#HE_pEAHx$h!L!WX$7-#bm+{5gn$qf}Jf(uu_Z^4}!}_(e z*?DKqKVBf=uFtVl$yO6CWt!yM zTU-_dH}d zNTG2mEn$D`h44*BT&fatngvR$qK7g9C){EXFaDnmtDhJXCJ?r%V@BuUH|`H^Um?Mm zL)4LtIl8E^el>F)^AESm4(4>tnp@V#La;w`-vGP&demP7vNT+dF2dE+u;|L`Z!uOI zK6U=JNS9h>bQUW#$bN>(q(ZaSkZ zes5M|X{|vP$J$XjiM~i36gR25>FAE*lYaDW<0^pqQdeE; z5dQn>CKnPV17V}R0xGG45F%7+;sQYsP^F_n4n6=;I+$hTb=4qlZ@@{MZ>O5bGQ%u| z>dH2wh8!+t>ZerXvtpCBbpiHv56a`sF3j~U2KqNWF?v{`&_;APF{X6fjA9g4ojWLU zsJvurnzVSs=NJdv-pgC~z5&N)J6<0@9PZ2+mLXatBU1!B#%cMvv^Y)E=MuWeg%(2x8jXYP*1U^aYITG%D5hw$ufEa8?C>3xle6arKp3OXSgCqw{DYmTLD0JQy85Noq6 z!MnOr%RfkCy+@9ORkIEO?f~9aZ=^f)!PN4*|IZBoeHB2#1-h8_LF}rNLL9vy-}ZQ? zaEq=~*f>aDRh+x{so=i-TNh@;)m-Mru(&PX1KDNhz?+vb4Oa$8(>0gsnVV3!G^A=> zn6BKWWqk7F4XK8I>p^-gOPf|)%30T@!bNHyr*EeJW17Z;9W!r2SK+da%eUKIpg@hDa7L zwAw{_*wCLVQPat?*W`|y`iA0WTxjz)&VW%P#J+Cb@;JKBZMD-l2HtjzVU_9#F0IN# zUz1}Oe^{Tg@$z78#IV;in|b@f`%II6_JXl$do|CZZ=?AY6!I^d-^6f8lnP;ytq;F+ zX)7F}u<)mS&`j-d;e>Y~1pO0eY(4;!n}MJxfi|^By!rkyfGPP*OPh#towQSi+v38e zTw}jty9hiW2*3hEGngNfIvm5h06=m6ICck`=OV&kcC~iW`0;u0=p52(1Aze`#O^yN z#F1h6{FN-lO*DLXkZAAIDI~DVq&U^a<7Ka`CPbtZ&Qb!e7{G5pXj9kZv(>taAfQFV zcTf-_CC9R#zttZMFX#6O66~u*HxQ!{Hw6{b)(PG^j-sRj57mk}2c8DuTqe1qi&sXmFO8gNKh9F~}DnM3@jkuzZ_` z8Jgk&#g)sD0>-F}wvcy&Dl^Sd`hjOQ?7?r;!{q&&9}|iZ$VQF?V)qe35ejw^5Ds|l!ME$W;JBpwe}xQJqx4$?YWr#v zfgb|1L}4as&^?uH7eJVdg0!1g;av?+dZKvHfrrgsLYfTp$@>Bit-5mh*U#ca9p9gx zX&!P_=-ELhqTdS`b9|Qn>zqem%wu_G(&ble^a|#l=j1-tK-6}xu?D)fja%jeAPyVq2bm$> zL8(fyMT^g!iD=wtgSV$;e5ndWaXky>y0Bb~FV#jT!l2SGs2$HDqfDR@W{M2t!0}jN z^r#Yx7L%P3Xv-lpF>*{v3navyJV+rU1IR~Nml!|9p?FCS@}+2;K!RRt%#c0q&?lVK zW?3glma%O~mu`moZ_%kEdm@TWUERZTFP@%DUm?Bm&!UmSYG#ZgsF!LBt;*yFJ;nP9 zRR;L-R}^)Xqpn(1PYrMQXR3ZT$kZt@4`Rx;i0Ec47zrayY7F3ohMz@gMwHxyl38NM zesxT<+H**qD$`8PfTjTI=r;8S0MU94$Dp{L1m1uRdvI&1ukuGp-)zHh@+N5l#m{8JY!vI)O!G@X!H;>cT_S zG&!y+OT!#eTT=HhNg6StV(3+30URsR_*r~0be_})|!cB?(RVg~lMOFy^^0?&qKrq}`I{fJx-8t1G&%0?+DbH~73h)&G1k4yqkS zUvw-^{HshyLuyxbm3m{F;Q?TOfF=Ow@W660)u4r?kz>q4$=2|3RgQ##LAu0fEoC}j zMPdB2Z6t|TuXv2@cM!7@iv?hFv zT$2F6OjI?k3-!o=yoc12jevav0GiQhL+bBiDhLy#B?6=xWExS1C^U35nloC1acZ3r z;7AaQC{U)~CMa~J*%Mi?eRFw27&v(&ILH0^&WA}k=gqWlcXl)+;>h!ktQWT+RK9-~O%XKN%DkSImB z1p!3ilihbk_|b=yh$r}r`TmVDFbQ-q!e+KwqSN^4;fQ`{G7J8IgT5?q$|j%0iiH&f*ueGO2f%>g{fy_)&QBQ9#%ujo+`*wqQs4Z6A4Wv ziGohD2RP?-n6)`9XH_TlQxeZ2{jH*N-fIveY@Er){j+w4{d=llTv@QWm6Q%LgvK?? zZ*Cp$=ACk0`@C*2eV)VlR6-MAr+`Go_XC>eEgmi1boY0cH@`2?gCP=JDB_H|0n7}Q z=P25mbb=yN(Fb+U&tIqWHPhed>`17c3=(yzd@%^%A$5pU`O6&CfS|AxeUG2?-f{l6 zsu#QCf!&Kjrp2Rk0{|=RWIGBl&7hqbWVG#f5Zz{WL6eK2a6rQ#%>hRNyJ|G57)Ozy zG}#hIvnu_Az$#1=Uk!E@TAyDquT_R6_)Fv=NPMGda5L-ZdVQfxQ zH|bHB74rBpe(L4rkH=QVTa&uaa4xmFPhBveOaM~Nx99#lBhz8@F$Zrnoa??Cx%1Ac zI(nJc$8DQ7H(C{19*5K0ra(8jUcpf%01gG#z$1#sC4z*=-6 zd6JUahFZzg7n)&K55`YvmHgd}F&8d}d>UutPgH5Qj zP*c)TvatE3%iVdC?!C-w!&m)OhnuxAsk7q->l&Zb0W1_Je6_8MUQ*x*@Md8NttpZh z%r^eJyAKmCERM~Ywe#((U2iVffi@Yk;gu$0?@;*o`*lS)w~;Z zulw${oBj+CU~^JjOvWlv@ec8p7iab_-uJw{^8BZmknDZIPs+*{$0Yy))}aX2Va8>p zvqJUjPnA{wYyP+@q78Omt6a+V$Zz+o)gJg77J#sbYk;JsGRXDWlQV55bkfCYyqobh5!Rf2+w_Nv^H9OeM2S}8C--civ${L!z`}`;OkWb44S5+@+#PRvY`^dpwp!`{Yo42oy+t~TogHUM}xBqUHr@30j9nn@>;GAwd*y{iB zw{HO#_I9qy>5kTFUOc7#8^J4G5boq3zC1rQNAn=<&u(?}yb&E3-HgYWz|ww2x(J{7 zPLUak?Wr2c_}2I~wL=>ytcHbMASnCHP4NNbpQjgmR|ntAhg&IQUH($765_XKDE`l2rV-p12ToEfO7?e2$7Ootcgd!?qWi&h?*tU#9Gvm zUE@KP@n^)*>w7w}OoefCeC3Hw-AY_+SCQFUxItWA?REM;`1i{k0S>ox?K%ZNc-}81 zqr}xZ|p+{EwPT z`t(i!hHHNSfR^$r@Z^$tDO2wSaDAl~qhUe?6FAaznGwZpCDD9E7JY*nW-%knEk+bi zpytJ`n5~Y32;>dEOFKwbEveh>N{&x0<22gQSYg7XUhn}e9>(LiM93&C5dD& zJMf_WPTs#PmOu=ARYx`oesr4h-HVz zS-}&PXC?m=V+4qqV5AP2$Cbw00OUv%ff5~?(NrG~sp}kwBw7lQB8|pUMq@QXnm5(F z0P>Q4i*@Z7!g`n+BWK@>_(S+Gd+MFIOLG^NT`iF~6q)zjWMwiwdn}F}6L>PCs-MKl zN@!Novy0oX0@ch&u!A`*pBqc_*IdYDvvdS8uIXRlv-}gc3HEVkj%mc4^6D@0!qv&2 zP3%Oei^S_laeHQPY230I&a*SUw1WPbS+|6TMK5uJo~mB0wKOs&$?b7vdyiuvNnlOg z227;2J0y!l>Y4rnqjB~cxaLt%0=bX{(9QkT2!Wa{G5CBFB%jj+d^R^uF+r5fEqDs9}87?swEklZ5?aK1(N zmCVY$*^CX{~zP{{S3ev|+2&stz}*4!aVz64P*|iB%pFgsc^B-By0ub$7&45)A<1 z)-1ND8L=N4h1gNROpXM2m7J}FhZ)}X?Y}4PQ0=j6Ati>p-cB8U@tGkBu)5BZuZ!T_%b@*NtHzXTQF4+&tdE%S=VSQxE}p%Tl4|E{BVO) zC&hl_>s$2sMxVhLGc+Nk#`>Qe#n6vi1H(FVD zHN9IleR${-%Mv304kdb4zR4TS;8euu3F}xT-ZU|uE;B+lqaFk~ViIAr(Pwgv-+mWL zL_ZJ_(hR%d9^g5pFR8Pd@WFc@Y`ySE#aFk91$$E|Y1v-vjbS8r$skM_&WegIZg=(P zXkJX$xKl~yjl)R!+!hy*o6+HVA%lZYPj%;S)`GyOHm4u!1Zy%m`H!Meq8pGO(BZUy z%KSHn&z@A6} z;TPj^=jdi7H66v-4lN-H+cY!hyyDh3<6Sz;O44Fkgr5orrHCut7 zD0@3fAYdhtD=c&EdV#G@O=lBcd>(Q0b){^dweieOyYlt7v2&HPxnY;wuE$-Glw;5yP=*YhoPFfx$F0nPJY>Q zy2uBZ(_p1E0Tcr(UKMBtNeT<(F#ds9$|uc^d&RAgdh+Bt&?;A7$`K%Vl6fWJ=YH_? z;lW?SZ{chkjErqc2vUh^6!12a_ z=V5o43H{jD5daDEjIczna*;o$#bh@#92LLdee}{}O9U`172>svVhLR&SW%*Kw40*- zy9WTKon8k3vR9xY8vra_alH;GUTT3jtQH^-1u0c1xOlkWq0We-*y2ec!TX zQ=j_#g+JTs&OPgmQU5f5cmp52Dj@If{++mI@!y9hS1f7#G5vDg_tu`+Q%#Sn1QhAl z4o(mP(nRBrZ2D1xubi?l>?pUSNll#y0DAbWMIY$ndDxuo!>Nc0Ha6aH6Pla&mQs+q z0s(NAnC9n@1WDyt;^nEf%EuuUklV>yZjq3C#(WlF+y2XT7yxiRj<0T$0K|SyITYA9 z^LQ0@#*)-H#wTaYO|?+3u1|kIzl2tseqeg$S6wYBd7oon%+!t-bGkPWbi zATyo@9c{33m4hn3OqkDU_j-U%KIK9P0-nzS2m@&_rtbP0Hx4He2SE7~5dRYp^r4rZ z@1pi07#VVc*=l_C+afTed(}9C`=02E;2DoPW(LJXc&HNQ$L)^q0GpmW1i|)w&*{S^ z8um{iYWpe_A7=`5hsyLd_WXIKu=(wWYmDRUucDpKx910x8L=tJ*Rd|L2z3;0iyW6l z4ZtBUV0OW)kjIPb9=y2u{)Ovpnv<03SbZ?(HLcsw%Q+5MTF?oZq3F%dv`4SD;3+^b zI^CB<7J)92WnNWct9&4ArV&S*Z4+7MUDX~e7dwVEGWs0V!2mw)dEfd+OtZ;B?(W!c za==lUh?qI%NEkC#_yXi%&|Mv$yGNuz2?__5TgZ7X0OA0EhEi#>=mQ9| zM@er3=slp*5W)xp=|gdBhss1Db9z4k=m&!)e&Vi+V>#|d$cQ=(gYxrWvZ$lX0a=q( z@=o0SmQPbvOLlY#Oj^$SC?N2KTVm#bWM!748UAowZ;k_zvi2@2uX!Xcr2 zKNKEhW)5Ic^Fu#c-Z~fj@f>Uy{xn$~tfu#gXGVfMrr^LhKy+yVwGAmFyv4UUBI^YJ zO@JH52)-Q-aZBlPv700|Odd-Ng50{$RBcC@=IGOSrM+(qrwVQEC&l)>nhw~eaz2<^ z1#`I$w6f(HTzrNrbK=8u{S^)Jo1Z#D)Ks_p6)SB>vo7!G+@_MJFQQJ8ouxMy-xM!l znY>T=*r*8dtHu+j<+)~KPT%p|;sl?WS?uzJuVRE$lmSIigqTF>eye%b%$DdoK{%E5 z5Q?Fg$&H?LL`q*{gBO{cMUVj3E*LX$i%qu1GJ30bU{Je2F8<=-OwbU>!!%aDlA90m zu%Z@8O!8vXaoBV!_=Y135#)h^3OJ%%O{76?9Y}&!lbF$w$VyJ$L@IASYVVOcH@iyY z)I)Wem*~l8zXcFW{Kd6sx$!l_oagUpb z>5|`ONgy^l#Hj8)PB|jOT!vP2PW?bI6pOXdmNjI2_!4Dw0CdF)>Hw2mgwl0;VpJ+` zeS3@3r2Y-3LZz|JfYPxD+bo^g?>4_M*u|5oyD|duGWORV{4ANi-salr$9FEruDe~^UEp@~vG3j9 z{JXtxTrK%sJtgiPAlqW3G<`VE_;lQL6-{`GD+g^iedH>{SHB$QPWbxu^`eaAQ&j1v z)(;4;Xa5oOZSx0Vp~llcD0baTY+(qY)y5kaBOtX0QB<7rMM4ZT>>vu^sCcw?Om-(c ziU~}Qb;oR(V<}i72&eF7&{WtkDIoZUus9ALH3^1LTVA^p2Tf{2T>9Eitx%d?8qZ1N z5;Jl3EBY?-5!ATo4bC>UId`*9-ZkMs`!7{H2MRuo+#4=kK3xnril873JT)_~d-1Cy zkA1nhQ$G8|xC>&w-ovwh4&T3NWHb+LtL@FvXXYrBwykR%i^Yy8C_LYS33|51kKcDp zKGZiI%Lv#M-91vQQoL^R7ACq(CjLDg<_!T8PPAr>)g6?T_)Am8zo|qg{T{zQ=<0-$ z+h%kOin+&C*~S_1y94q_$uzTgeqW5AZ~L@OJ8(R@t*Ck3=>=sMkVge#>qwIzGKQZ0 za`~di+qxbmy34q~^etbsJdu|-cd3Widti%G&#f)_pK1>~9hQ9Q(wi@JJNU)~qOoCZ z>M-Ku%$Omp`AFlmI=X2scgE13QKdr-M*2Og8QV$|dR0zNqwnP^S>9feMAZ6N*2xRhZ*nMQf?OQU1;d*|x+j#zhJ zpGjcMo%|J%Y5%cyg zfEZ`mGhf|N4%=LRGg`Zri{*ZEKJO7g4n}3<)+tec0w))rkxn}0K7P)&C)s&S>4KfD z>{Tvt?b!?0xks*Z?&l7N_f&v+>!7#e0XipW9cx7;@eB{=GJMHECFDlswrU zQio2SgR(^Jq0`_nRK4hr^P|^%+1x*SlH^N%Kj|t^*~04_nv`~$&lJgO^WG`7!+4fr zvcvu4dvimU&j?u_+P-ae$o6IH+<%&8|7Xfstt85gM3ISWHd&7&Z>ET?U!%o6O2=js z-7E~di*kT=M?~yAQ1=w00gFxADFET8ap7~YHOkT3+UV`|D}GXFc>+>o9lGXB>G^M- zaFx+I6TuDi&RUBmClp*%bObC3PKfO44I>>oz)x&T-8^fU#ca6w;^G#s&l#n@6So&5 zfzcpm>y6(W!>0BJch7AYeH+|UAIMD%)1-vCb}5f`gQdB}yl(Wk>#3#h+H$Yl-mhweRYXNdLdN#e+l&TZhizwE;1 z7-{_gU5~C4ft)yH^GB1z$sFQqfNoYg^(8%mU>BT1J;a)P&aoem{_x^KS&AKliz`h24O=U!Fe_(suNF@RBQQkN*0jWb4F_u?b%WD%4K? zu6-rm_U^~Ya{<4-XutpPgwTBA#7>p)=M6V=_$k{M=VE|Bicl;hx;>5i1;e7yuGshK ze#Ae|3qa0%diwmf4Ivo-WhldpIyUcd)WMdsv4P|N_&qNwj0NCU<7r0BtGVjnj94}e z0a-@ppjh0MfA@U9lGGzf6b?_%ocnZ2+*9v!7Mq{j8s{5w`iAoQHeDI1Q&)W&nHBtd zH$564D>jic5pqA^#_9b{xv9%}JWO)AHg&_)z;T-yL(z?anr`vY|7~d6zJccz!lEr% zc4d0bjPJ|ZF1z%pgagpm(Cc5PO*8)!IqAj^0q_s8AKA5!^2>yo;?@V!1^*9Jr{{0( ztK_L4E$Vx8hz?w|?KjyLV{8%Bc0hS9IyTT3E*du$Ub&Q{K;1h4#IoY?$;a1IcZOm0 z1V*(Xp|n2Ohh+e9TwAV4hRT#Mkw9x^Sv7V$IIqheI`&y+QsEpN6`3fy>~0N}5jHd= z@KRMzvo7v~1_fuSW0 z3(glTe|)uO?Yq0xRKMGz8w5_oI*1I#m9gEtr%rqjo!#wnqlCio>A!{!LM1P*_g++DWi+LHZxkGX1*D#6g`@kM0eWb&Neo)7-!lScOs+ zA_LS1$loIu-prr>-@iR$|L+L2l9NHWV88c~9SoVmtFCyd^e7?fi{uzC&d9n!jLZ0N zjO80PsATyG3~{s#vPSNTt3N(WYPsV=btt@-w$g9?kg28C@l#Fr>CK~0axr&3oLph- z6Sz+A`A5vOF-gb<_fT;>b81@>bVA zQ`|VdG#EKJWKmIlo9|wv1n7;>1VzaEv}4*qnd?>$c{8{(N-jrXW*7kN_{l0R|G2pwp8a2&R+#SR>F*75FGzSd(+wWIG}@NCW^dA;jPJzudfRM_l!M zjS=9EpI7b;-!)xar&2Yy8`d3{E<9Shzz1w{B4x!imyc(HUki#w!S37KvLeTelapeED_?Vc17G!aZ$Gi($o`6k zOJVzWPCI>MnO`8hOwA}LZfAs4S7#AC7hD@8gyLFWZF?c=x{C=nMp-TmcV5sIkGu3L zC}M<^UFjT@Czw!yVHV@%k3lLFN5~j$-(GTaH=;U9lq6d@jv8-Fk1SYM&rSg5*Rykm zhi0NtPAv^P9CC(arS{e2!#H~JX`?GpKSwI2OwyPa>{Rt$@Lp4Mr#ZI4Gc>8gI_BgZ zA4WklZ87)#seLb+dqa)9L&te+wcRa^OX=}T%iHvk)JH%`nq(|VV`WJCciuXcX@ ztNB$P9|cr|q|oP85C7uo4B#B9MGx2(=~yKSfaqvc{_MY@q(qv?zX;<9dZawOD-l>H zh@Wt4H_;M;RAh;Ourj7n8}F-PZ?^z5qL+s48+8{}ti?!QyMTX0_uOW+7-r8hbLxwl z7P|TT4ktHWDmt%cH#Fa5-{`ezahOsxv(>hu_a&paL+y9!S!&b<_kF4d{-Qz&&-toZ z)b%rqH9HUIQt)}_U7CA#*NocG=E}&acB^NUG@--*v-0U^X=voLnRQ!cA9y-kzDZ-( zr>BFm?QC+eG<`SkaWkn#?&@?ovjdIB5Jmkd_qP4hCR;*oW8yqMS-N>U4dB6~G$w_bn z=-C6^-Q^kfDc1ite|+JfW|CHdDvzhHCT!=$2enVE>FDt1R!^rThP?KsZ-{QF4XkHJ z9_b3-{K#LlhSh}OvugfGeH*Z{ulnXLV(O#Qd(WRLX$lSTq%ebmciKN{CTjdLM~9}dT1$Gv||0z%s1?&4FxuwADsVGWU4 zB)b~arF1MbgAqVI!=np;t4Fn7+Ov%J4S;vqlhCwgEr%=gPbvI@3I;&dcJmF_?+R_4 z{!hk6S%+ixY)G-j>U8wU=2SuAEYh%dvpa?;4@W#YRc`j>i-wnWyM~|bn|=1a?q;Yg zlfi$s)}gwDxbQVVoLc<-oS~v4_N4m+rL1Op6EARaT;;&iRkB$76Q2@RHl$J=v-QVp zx}MaY%s=p~d`zc$T_gEBUHMFk6-@z+)z~#9xiE{lz!FnDnWY@gPCQDLV}@=~S#A(? zb&g#e9uT42$UNrYDU;RSb+;JF0Bs#By(U5mr}Y@1nvO<1HjDApBLH8awhs^hE82A+ zQ3jCR$@Cn|W87h322BJ`m+MsIKbE5Su=lZovWJD694og*41iWZJ=@rq5T-jctUzP! zy%&eP{}9i;ll0xbx0n!A`cdY1#zUfXRA5sNC3gxFDHMBF0a4cU^C}Sh{f?w zkJSzKGq#+~J8)DzabziaXYTil2`;F8;6p<0<$o(y`^w%@MEcO|9OV9n1eB&g0Y?lb zT811t_;zf*9v8EAjl!ofLH|DoJj5sxT<5-)QG>LH8FprkxGUh4?GWHSp}QUN!jsd{ zthSF4S0v9C@1NvOne!bWdy-b=iZJ!XbrELw%wl(I+fM}t_%|5$WO_oCr{i1CMPrvg z!A+xJ?Zp_+S{4Cu9UwwzcNoOW4+;?&AHPt zve4nru92KiA(pT|H>5bdc;5VTciq(yKV#}_-27k{*KHlAEGgA9U+HY)B zrn*~$D3d1WAe6Elg+pp9UCjW|2|(CQCv;vkh_#WWsa+s@!|OAU!?pLYam!ZEHcN(NY)1l1C^iW#k0_1uzhn8W|`G=iQ&}DsbakpbR}o5I{p}>QsQ0r{p9|7})s&SdK#B z7BY}UczK4sUk}{zBrY­hvIr-OM0*h@EI zwfIW`9Lj?K9f1sk5c$KeF}lFg+!<-zpAXVrr0c$9>c04&vA;U_B_CwU$UDP*x?7Wb zVkS3kdQ14LF{QxG(o=3f1)&7^v4&-<0Wq^)#@wW*Od~Hi*$mPoL~N_uf;b&OIn_#j zJB!t?XJeGK1v?3HBZiTM%VfvGk&8KcHY46{qCU%^*8sJru*Lu@iHV`JFn(?qE|!A{ z*-S3T8mVM+^$-prju^QeSxyIy)V)f&DD}VaZJAmf=|;Tgs$D)e564B9<=D^+uBKvcP;7yII?d%M#BYAAFHMiLkq6(&U{lE-USg zU%tqwdpv=iO;hat(xenElLme0F4i@2W+{07p%?_Vd>gykLKp^2*ysTQAcFDUCJR|( zA=c{Y^|-5)lN#QjQ;msRZQvwj@g;@;{va_xPqq^i?<`{t>0vv;1`5dfp~qCTC*iW- zw*s&L*r-Tx%WU%Yu}mi=@1GYguv$Mr9odxTGgCt9eVMZTN`F{JCNnjVMSEKy@*3u{PwRAyG{IEE_^`_$1@>tzw&wV zs87k7Nq+?LFBzhl!M@C2Qf=orSZ4E=HQUv0y-?7cCjPKfa$L8?tw$sMM=R~l7o_TC z>;VKWkOhzFA%g5QZzMHG2I6FccY5B|9w!&b)KC^bU&)TP5THRP`!VQ)j6GICAQ%`N z;a*A>22*7gjWI`8@`v=SNgexr+=Vlyj7f8{blk%drXBXZ`-%Wfl3Jlk#;;HnAJz?aJbu#(nO+ z>w`akUUWukvnSQ>`%=F%(O0bMk7o(&^% z{xS}vWNQuVOo8xh$(rVEYlJdJ=BI$kTDFs6QGyG{cQ=32$no&ol87l0IO6Y5fBEml zH%Ou%_BV#q3?|QB^JK5z(+|g<=1iIOh7(z7;_JXHCCGCyq->?E9~kNy*uC`1m!+MB zYx%*;ZUpf6wb&mYbe02qo7fjgL7^0HDe3SpDLxxH{u3MTD%)LHR>&B$LVkq5dUc zqIwz!{Q zfbvva!h;)(e12bnL(fvhKaXk$(w`mw#Qj*c#x^uX?ZQI`ZMT@%8pF~psXN>+NMl3! z=LeRERh*>l&j3n!4(p8%_bEQiU0s&GAH{MtJJM9?|2*11Ks>bv^=g^=#eoarcXcJjJS9e_hp;xGhJ_KGNG&b~Ng|6=4xS?al7g&Oxc; zqoui1mo4WNl~72@7ek`+{5CcZCQw3e=3Z14cXLNB{ydSgrJK$(%#_%yTH{aYedo3! zBvcO2j=yHejc)HPyE8CUjNtD~JzlpDj>*rgQ7|Yy#Zyu0CPBpMX78{SZq%9`QT1&} zNCxf!{Yxx+`>*^Q1emZwp=%{LowY>ScfI)@`rHk?=C!DRzVZ8o6wVrc|GmX=ThDs3 zW<^Y0bI64k#N$_qt11muj)PS;S%$fn?5-N3+&;V*4qR(LXYC69vb9tBErZ-%O~ZuW z2Kf!b=8yZ-#kpIk+LCIixu@wIkus|^40IsF*#KPY zleepkz&kuIi||439~yZ(+8y295pSrVmIn3qWy=W}FsoKc?nil6E}9L9U5-(|uRYod z-OX9xHkLQM!Gg=N5R*{iEO^&n7kpwY(7249XTXR$e?lR%0>i3iJZ+5q+xANl_P+CG z&EFy0KOxQi@nvcM5x!07B~$7{=)wL_HtXRCn$XbJojs-7!57;%9h}fR z)R<3-Hmw->3uC+xSujasA+vT<-LvKjPlE~|YGoFyNY8F6q`ny7hR>$v8@O><&|Z}D zc;l(4n9a=8&7rD=>r}{#zu4wn4j>~~fvhGO69OFyWPJ*{&)g&fER7Rdrfd_ld@ToI4me3oN+5 zTkyJulIWJG;S3oeZB}oJn(kzJbULXIZLOt0G3`@jl04vnxoxTX>UD2%qFZrq0js^H zk+!c-XJul)o|k=19g*=i{%_lYsPl^kI@w}23uG_uj^22%bk*PemDlWhU2aAEzBX_t zD^dpg$l#{vru76Jx5lm^vBA8V8KdMJM9H9%U#qb_GJ52a$JyJ@ zyqLHO)*E5%j_5YU0ypMa27<6iAyur1yzydELF#&lI2 zPXN>cM6MAJ@hPPbqle)IMXo;0eZ3WS6Zl4@U65b{VBkn)V@3`%gt8ZGd2}T(az%9G z%4>hETFfu}_wg^ky;~nOG)Ve7yt{wphFhz{Z*4#H_wtMjHs|eIO?HIB59ETzgrqKT zdE4KgE{+FqATgXT`^_`G2Rhxu8|l6B?g2^D^Y%yE>m_|7+B8>SJQKN3aV5BBHC;8r zz*6(6gat^@(s)_6ImY#p{Fwoiaz$*@GbD;0g63c$M{mkOk;3KKh6K@y4!+K zm=v}?RExkVjpAZdArvf2J-1+cBzkPljAT$$Z@0}mK%VZ$bmE2kvJ7IX-{K4M&xioh zz)q`F$vPv1QmCwS5$pddy3e*I(zgxZlRjw>nsg1lW26^_Pz+TKQba%qMLKLyR1}3o zT4)lAN)ZfIP{1HnDTXRVG$0@%`VUwZP~3=R(N%o%yqXU%$1%C@E9dn)1r5wHE!(qi zdl>QJf|6ncQZxX-o5GD?#3>dMKxhO|?L3Q2?5`U81ZU(K2k3mcdh8)wZP)SevW#(3 z4sj^N|CF*%=?P|n)M8%BD65v1aVGS6Naq?Aa%5KkZ4iwN*D#j9DAY_S6c4AD> zD2r$oTkRw!W3TCtz|kqe@zTr4J#d0LnNubXUu1}-&CNbOmAgiDgeQ^zEx-ku80E+! z98XPY&&nUoHQ0TTM>Q~XBm-iBq*<0qW|G}7K+xt(z{M9w0$o5NZ}t{8sjd)GcV#0m z-U<&MJ&Z1)G1GA!RH&3KmB9p@L+DIGuGh({MiUmr?rnp_XEH3+B8xm}T#`r7r-~xgEpjkB$B$c@(M_m`J)v_?g&KA?Z7{8RPWJ zR7bU?UvKOnaDeG#+%0!%%HW(M(GD~_z&Y*IFjOZ2tu2oRZcDN+Bp$l6wd?_+_w$R( z)YSp`3*m8xe>fyw+uhlS+)@Q>!or#-01v~M}!+83SoYe*l|Nrc1O!-nF-@YJ+ z0c)sPs-jr#fwF<4km@u4Cp;B;B3voW#JtULmSQjXZCP?!Rh4>DMjZ)drNIM)?kG!ix zFbrjov4z_VKvMwUFTE-r0i>zK4nl6*k&*k^cHpkDDwsYw@0)zByfvc8#H!i*&PLgx zL^GRzlo`>M9ygGC`7r0^y|E`C$m|_$+n<#uI&j z>@et`%jBLN6M{Ig&mF1Bg-`>-p;eFbiZjC?RphaB$>rHlZGrTZT_$jCqTJJ2SdYNb zG9-K%juzA`dz}5z;XFsY#7!K@Fb>R1LuH5dnNt;ZT2V?wmA!VKwy3s6&*^VOZh zNAnw>*oITUa?{I%fcz2_S99agg**a{mqz3PnZe&7;)e)~GG8(jG0M<%JWaN){I|;T za@ZB|(p%rnG>Rbj#+#8_*+;Yqd$vbzrO6Uz@kBdZ2=J!5_-c@^s_VnZS4ZL!sCBBw zw>|!%mpHBX5xa#xYA&;zsYS7IF#HPSn&CR=O&Zp)O2}R=uf`k$n1@zI<-N|nPhh7J z!^YABa2*V&vajoDG+_wo#X$yeU0*Kv3sCoYP}mk>yY~-*Wn-E#OG}#Di-)4+b9Q+9@Z} znPRbnVgWs^*ozDl`@rfRezugM>gfF4*5`eh)V7Vy=h!ts^egOk%!eXN60+@j_ z6m0#^FmYfb@Cds*yrySVE82r@KNqVwv(~CT?zgb=Yfv;|8|u4B%CaVfgvJ46#x4TQ zDOH!J5EWsaHKtSWmI6pGG9(&ENRNNMOM3S>%Euo@>bjEw{?%42(E~GKWiRKb^+0X1 z29$aA;GnusTI;`$9yX@=Aaq${cbS{3@mz;Gb(v$3iFanPg9zDf(q$P#Q5c@4+Fial zqEYi3?JhXG8O|L_zj~_Ea|uba-7&4k!$r`BsI^=aZp3ZZ#GRU$Ju>7oiv7s7!&H%FReA!Z{kc>~d&RAde1 zl9dSr&w$jDF)M$AIs7c$kQC;4V$U^X#$w9R>mhm^Xig(E;a*5VWa{2~YJbkjqSDoZ zy46CoWM9*wl8U+2m$ZV^O4~EHXUZS1Ag+3s1<>7 zN8$U5#a2ldx~cSP0V<7%>7?RkNZ=un5lBbLkO3Ig1VgtZh?$$}ccGr>pcgE`%VxL7A_JuBxmxg={4Tb<=^65&Y< zlSj2{v`Omi8i8Ntg2&86whH1L4W%uF9bv<2`E>t-`Qlm^riDqD1Q0hqxNaI)MGZ`( zfGK=1i7$#USalME{vS}|pQxrrbW!mid1CoHv>+>b5QHR(l|#m5qiDXz3gk04I!^o0 zkl`fczDMjl9>^AeqQ>}a0L>&8+Ag5ONTRJ$uoJ*#2@Cb0MbC;*>QSs{F0Au-g&r3s zuL1k)bjC}|*kGN-W}Q(U_1i~@wcJu&4NK~MS53H^93AFPhbi-;?t$koT;v%~V zuu%aN>18v=kW2z*hXiT8J%XMl7*@)W5e zrvs^dzC!m!Og~x~WD<-#@!w>#gCF@lGGpuysdUW7mT}LPaj=bxVsm9T=3nI8&p=op zs<@8!?kbk9)$-#f?cG*9;T%{U4L>cU=L^q&x*GA6iak96iB}EWIC%4nrcMR{%;eQ% zkYW5Ys2T!(fred@pijd~KeENF~I0P8bI?% zDJg4UHc(zojSlCdPqL%yc(?*WoxV}?c{VD#5T$Baq&|`2K55&PUC}Q}sgf|=>>XT! zQN5wg4Wp*|S2?K(g*iQ>F#ZcMkreuobE65AU1I0VmtkYF5H1b!H>FRGn`#yf11ZC< zaJD>m1b`C&LlJ(*yOr`F4}nrnC!m8CAkL-fU(38N2~bxqk&(rr^Sb88Jx#@ zV~X{{@xv}R&d;R2iViIwFOE{EeDBxVD0g<1i=QiNxXy;3*|D;t7-=q_z1uSc;DHmL z))nMAiY7vEf!LTDeWjC+auYzlQeakWSRT9aoBZ`uYFi4A&E)1Eri(aZn41uhN^a@` z?p3kVr?pGpubp4>1_Q}J?s45m$7t%uOo&cW=MKU;GuRw~__UyGgbmvM*PcV{n8Jc# z+{{WET#A4h{Mq4=+-N+Pp>EH+jzB@P)7tuDc=VE1^YJPN}hDId9K)R5}wDnW3<$l5}A@k z0ISGlr6lCS6mEorqW1QCR-j^~P#N1}QG@}l?27v@Is;x-th=32C3s&EipN~Tb+U1$ zp7;rBy`I^_Xlp-9ouf%Bsny>&#;?r=Tv z{VmljHW-|TD5jlOq=ApAn4|uk6ABf7DLp#1|Iuo4#XIYk+46xx0y;-1p0Ye(T8s0% zfSn{=n5T*r_)a~a?ed!(FJd+DDcF7ic9T=t!^5tzCmhOJ`#9Kg655Lcxt1`A+zNXY zem4Lre`jU#&ja8fd1{t+WpW!msXf>49=I5Z+3Swqvwyhm1Nw%)_x)I~hJcy#jhNOR zF_yXj3kH+eUNAzG0S(K-bVv%zG#h%rF)MI)tXU_BZ0 zbLa8XDE`x(U;Pf;813Vathp>*+G0-WK&}B4%;)EFLG1JZAJmFS#=fI>>G{me^YIIe zdU|ebrIKvP{xCZi?5#>{S@V)++$>su^0RA|BE#22`6TL%LKE{PS6!m4RzjAG>dd28 zq2}y=pP%==Ic9MG)k#b`zcGa!xHjr7yxJJQe!qc$KXw9Jrrf4PgapRvj{aTx+n1&{ zbq}O2%pN=4&l6t*^3%C}*=*Gd!1K#=>>3#hzCed^A(u#)0`5~fyIJjibA~e)IA3YJeUWw}9mrvV8b0HgOL3vt){lv5+j*MN)m#UiHQl6fw6 zla1}BmoyN-o}4?E31C9&tPclWNkS+Rym$JaM$zijUOfGuzW`;!$L>JYI8b#G6u;Xh zk%78MR4Vu_j=_&7WwvZ4Z}BzSA3PTI701TD~u^j2d+^(Tkx73n5S-M5TG!QP7-k7NN zJ#F=O5}S+y4hx|>{pyx*q#^%Zg$K6v(aIYE;Rq^F1iJ805{56NQd%VNH++7w1j1d) zySvrT82s1!aMcfaosgsZKTI!Y_1@tRn|y39F)o}PrvI;$Q^uRFON#wZv@x|35>(Vv z4*vZj_&9}IMVK2s^vd6Kp20v(+z|c#KX~!P8yD64?9$^BU9pe6khB$iz&y&3{wc<$ zW}n+@l?V6GY>`=yjN3kYi@%}!1gm>x;@!>$_RLdW?O13sU_|Gk{!^-GvlPH>7FYcr z73{3x1|9_vP#3A6ukT(-dCZT!_Uh;cOKza6(jYtUhFowrd5wWj4hLhB(455$oln>{ zu~}jZc6Nd1#QR|J^wfhw#g;t z)ctSSjr`5~z=g-hJM(8aXBIj5kK9ymCc`;N3^9wolKIP<0HO zbAFU7YqRG^o+yWPRL$v6(eIr*XWU-2giVJ%Y4(}+I~2RvapuiLvA=n|T?P`d(H?Q+ zuX)_@wo?=9_3mY~^`*``$M&9RF^qcP^==#v#mTADxI}$sJ8mfFK#bW(1_b+d>;&&! znW=xb<0VOr9Cna>DniQ0ktM;gy&!22GFR%MLSbUgW29L|EB2Wf35?^o)XKx@j<)s& zgA>O7si+w_0*%Rpaq;>V4C2|imtDbH7IXoVA?`Jn*K`V9VtC?i^UsZUxw5C-y}6|- zU%VNWPWfxR38`DjRVHzuf_pK0MNBnDwKcV^rUoOa@GhoAT}N!3eNiafCm$u@de!$q zeVJd_gL;Ls=z9|iJ28o~no2iQtfIB6bF6fmQppxW82uZw>bDzdkit7Itrj6L2+wG$ z1^QlhO8>#zB9GR4A$b58&I_iLP#pKYePpD%Z{*Qaj2hg?X3lvy-%-gWU1AKH9ouey z`(vK*kd5`0^O2gtOy$_}?1o()<6({{=xX_b_oNwwjfr%L+I zk5Xv$Uq0XV7L^2euU~_r+_^phigQgBm8_rHjX@e8o67wX7Xi4sS@TfdEiN$ur> z)vmJi`3HOaCS%oQHZU**Fn+oI?A%gPW&t%M!XGY?Wt}hWkimk zuF>G4nw4O&VT#p=1U`o4pg7xzP`Q|8QQ$%gi`!V-b)W%2`0AaP+TrsgzK$5JB{X6} zMN8#uQj6ToibR3%__-b!D&=Zc2P2WAu?Qi!jR#y+NXXU9sohl@IYQiFWGN*w0AF|&bXAGIWX{2kS$KNmQ~8ptDR(2J+M$%k@kwV+Bga~t$zB&$ij zr!;zW>lF5mpQ%r6sa6?fu$DrosrsfrLi`xDVEa*}1$6LLUf#&C(|1su>B-RA$U!-n z3=#Z^Fq)hh>)@IlKHAqoaf>V0{(X>;W01Z_-9f|taW0`X)K@K5OYH0%RGcijxLZ+~ z7+>)OtY}uoJjUO<&}2@7huWh6(ds4*D$nPkY)EP1G$Kr%!vJ;*0bHw(bhLW(gzpiR1Grx0?L3;L+zv5-uvVLjH)70n1KN4-a{AS(bcPdziw8V^D zJ=<2i{(9>;uyw%o@Qn?xpD9Nlex9dj8n4@a+qi843ny6gsEBoKo@~oe9+Bh;)aJY= z2x-pgcqh(XvyTu$8k=P4ZQg4)H$-r{sIRj7+LZ(onE37$sO$qFOzuA(%0UPbr}NU} zmMxIwr^c~?fW*H^w^faVpRqK-WrbxPd_Thu?=F0+6%>Z>%yckNs%E`;50QW|9h5Le z@Zv2w)y=Yax8MsBrO-i?iK$G*|NebdDD;YP$e(wQKNErUhAAxoS(+7A$5@8$Y5vmXIu9IJV_)g3cBp5)>s_S++T?87wcsvPf6@PKZ0H2c5b|j^F(tpvnD!>;q;=e3{+5*_>^`1C#ph zOVK}Hc^x;1$)~!UkMh?0YNV~T^I`sG?+QLqE?|WfZgIrKW!)uac=i?b_EDVyy?ZB? z6e>PB>=_rO!r8OpP8;sw(A*8rTO9la+GC=h@(5YKS>iC~%$}~W+-MeT_i7U5uAOU+ z<>k5O4*#Ap+PwqGtA#nNyB(oCD`es#wBINmJvU}P;Bh7*?7)W+#syN51g>7T_r&3A6!f!_1*m%1iX9X-`<0Vedo+D+no zW$o8sXJ;YmP{@m5_#|J_E;0?~4AFFfPk?%v37xv1VPr7v@M)-SDNsIj2PTnqUJjKY zhp3gZ;YeB4I6Sa)9jKE+3X=!^PJXb~ZqtO#{O})A_pQxI5ZU_0hAU-DBOteo4ZplG zN|f631!8pVr*!F2CMc9vVz>P$-si$LS8X&uXm#~Ji>nr)SBuBRwp@{=+Isd46s^?ZNN2EjZ=#LiWBZME`>LF}tgmA;5Z8T+t?cw$eWhoUX4qt>}6-nnNiZNw#3w}z0#Deb_9je{3e`+G7X z*D~Xh_AY6w97WU1iu-10Qw`ZvYxA*psPTmD+QJ0rgO!% zO5?73N^M)=P^D4_>*a^r66iah3vL$G4SrBaUcMQyo@wLOhq-vSLD#uzF*C#1LI-*+ zySLA^xCg{#MRGF+SmP5-dc(qWs^jA$`z5C#__5`@xhC9I>8zM>WMFnCm7nL&MaGw+ z&Jwduk_N6TO$RC=)YZGx^}8x?l#p~vXx)HLz!+`Z-r((!rk87Q&)C+pdxcx~(n?*w zB=jDUaFS_+D2%&`mQ7pTADyW)KX~ErKhf$xe-!W(Z=d9-54YFZyZ1RYlY=D2Yq@T{ zCKVk`9g|ZQ*%cQ_>{m}yCS$dWTx?w+%I|^nv5fE?gh^&PN!LYF#>F{9MbbT7dz+@~0hYSN|XyC&j zHpQuNOi!&hPS5-_OxzXSt|Rs2YXVb_4rzJp+ETL5GWLlDcent^uB_S9Jg5-l<~B6^ z2=T%=PPoU#8iO(EX@_7i5X|R*k+l7!>^u7@&+4>`RHMdo2LK%OcP<6YC>c3&gz9O2 z$Yp1)h{u;yE$SEl=u9D{LuZgrZ=hgC1FFq@3Sr>-2z5@s%L&-eyJP0GJyd7nK`p?9Nhzo2LqhHt1DJhoxy4oosH}hOp8EB{he8p==^b` z{tUmq1*1x(rvz%e4B|-7)BkXJ+*xOjam%xetn;N==dEmJO@_+V&FYNJ+KkQcqS0@L zmviMeNkOwV7e(`)(bKoDcDv(wA{#7xY^nYdzDoJluxdpdr9lg?={JKxu{-Z%H)eBQk8 zg^F?u>eU_NDa(8Feo5KMea02Ogg7!e=n~s@7#U9;Vfl}@dpy1OVwpo7&$!{3_vT5t za4OL`i|_r4LpyQp6|(m$@`X1$+Bv84)eFwI7mr{((^LDsI$B=S=r6mKGaclRj)K6$ukINPt9<5`beuM6Locy$7o3>7%8RUogxQ63_XW82MJx&+*KHUrueStpLkxT0d zhVPcAPMahMZ(_WtiPRIg!&%tFuV8PGS`TOmi-NgVg7|HL(xWt@xCgWi~r2uvsN8G@{RJ9 KSHb`Q>;D6ktN}y- literal 0 HcmV?d00001 diff --git a/tensorflow/lite/micro/tools/make/third_party_downloads.inc b/tensorflow/lite/micro/tools/make/third_party_downloads.inc index 8590ace9fda..d4d5c1c73be 100644 --- a/tensorflow/lite/micro/tools/make/third_party_downloads.inc +++ b/tensorflow/lite/micro/tools/make/third_party_downloads.inc @@ -86,7 +86,7 @@ XTENSA_HIFI4_MD5 :="f234764928f9a42901df33a27e118c8b" ETHOSU_URL := "https://git.mlplatform.org/ml/ethos-u/ethos-u-core-driver.git/snapshot/ethos-u-core-driver-bcb5aaa99756f1b5c1295b079ebdd60996bc75a5.tar.gz" ETHOSU_MD5 := "d2073c8d88fc167fd5c46b5dcda58ea1" -HIMAX_WE1_SDK_URL ="https://www.himax.com.tw/we-i/himax_we1_sdk_v02.zip" -HIMAX_WE1_SDK_MD5 ="9a4b2f29b16052764e437b64bdcba816" +HIMAX_WE1_SDK_URL ="https://www.himax.com.tw/we-i/himax_we1_sdk_v03.zip" +HIMAX_WE1_SDK_MD5 ="1cd9b17f3fdb3e9a1dfd1cc356694325" From f89988b8ed6b70567c60d41aad5c064c8bbd086f Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 14 Jul 2020 17:01:11 +0700 Subject: [PATCH 0367/2522] Add aws crypto --- .../experimental/filesystem/plugins/s3/BUILD | 11 ++ .../filesystem/plugins/s3/aws_crypto.cc | 135 ++++++++++++++++++ .../filesystem/plugins/s3/aws_crypto.h | 47 ++++++ 3 files changed, 193 insertions(+) create mode 100644 tensorflow/c/experimental/filesystem/plugins/s3/aws_crypto.cc create mode 100644 tensorflow/c/experimental/filesystem/plugins/s3/aws_crypto.h diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/BUILD b/tensorflow/c/experimental/filesystem/plugins/s3/BUILD index d72db1a3f41..102c80b7c5c 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/BUILD +++ b/tensorflow/c/experimental/filesystem/plugins/s3/BUILD @@ -30,3 +30,14 @@ cc_library( "@aws", ], ) + +cc_library( + name = "aws_crypto", + srcs = ["aws_crypto.cc"], + hdrs = ["aws_crypto.h"], + deps = [ + "@aws", + "@boringssl//:crypto", + ], + alwayslink = 1, +) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/aws_crypto.cc b/tensorflow/c/experimental/filesystem/plugins/s3/aws_crypto.cc new file mode 100644 index 00000000000..77b01f09dce --- /dev/null +++ b/tensorflow/c/experimental/filesystem/plugins/s3/aws_crypto.cc @@ -0,0 +1,135 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/c/experimental/filesystem/plugins/s3/aws_crypto.h" + +#include +#include +#include +#include +#include + +namespace tf_s3_filesystem { + +class AWSSha256HMACOpenSSLImpl : public Aws::Utils::Crypto::HMAC { + public: + AWSSha256HMACOpenSSLImpl() {} + + virtual ~AWSSha256HMACOpenSSLImpl() = default; + + virtual Aws::Utils::Crypto::HashResult Calculate( + const Aws::Utils::ByteBuffer& toSign, + const Aws::Utils::ByteBuffer& secret) override { + unsigned int length = SHA256_DIGEST_LENGTH; + Aws::Utils::ByteBuffer digest(length); + memset(digest.GetUnderlyingData(), 0, length); + + HMAC_CTX ctx; + HMAC_CTX_init(&ctx); + + HMAC_Init_ex(&ctx, secret.GetUnderlyingData(), + static_cast(secret.GetLength()), EVP_sha256(), NULL); + HMAC_Update(&ctx, toSign.GetUnderlyingData(), toSign.GetLength()); + HMAC_Final(&ctx, digest.GetUnderlyingData(), &length); + HMAC_CTX_cleanup(&ctx); + + return Aws::Utils::Crypto::HashResult(std::move(digest)); + } +}; + +class AWSSha256OpenSSLImpl : public Aws::Utils::Crypto::Hash { + public: + AWSSha256OpenSSLImpl() {} + + virtual ~AWSSha256OpenSSLImpl() = default; + + virtual Aws::Utils::Crypto::HashResult Calculate( + const Aws::String& str) override { + SHA256_CTX sha256; + SHA256_Init(&sha256); + SHA256_Update(&sha256, str.data(), str.size()); + + Aws::Utils::ByteBuffer hash(SHA256_DIGEST_LENGTH); + SHA256_Final(hash.GetUnderlyingData(), &sha256); + + return Aws::Utils::Crypto::HashResult(std::move(hash)); + } + + virtual Aws::Utils::Crypto::HashResult Calculate( + Aws::IStream& stream) override { + SHA256_CTX sha256; + SHA256_Init(&sha256); + + auto currentPos = stream.tellg(); + if (currentPos == std::streampos(std::streamoff(-1))) { + currentPos = 0; + stream.clear(); + } + + stream.seekg(0, stream.beg); + + char streamBuffer + [Aws::Utils::Crypto::Hash::INTERNAL_HASH_STREAM_BUFFER_SIZE]; + while (stream.good()) { + stream.read(streamBuffer, + Aws::Utils::Crypto::Hash::INTERNAL_HASH_STREAM_BUFFER_SIZE); + auto bytesRead = stream.gcount(); + + if (bytesRead > 0) { + SHA256_Update(&sha256, streamBuffer, static_cast(bytesRead)); + } + } + + stream.clear(); + stream.seekg(currentPos, stream.beg); + + Aws::Utils::ByteBuffer hash(SHA256_DIGEST_LENGTH); + SHA256_Final(hash.GetUnderlyingData(), &sha256); + + return Aws::Utils::Crypto::HashResult(std::move(hash)); + } +}; + +class AWSSecureRandomBytesImpl : public Aws::Utils::Crypto::SecureRandomBytes { + public: + AWSSecureRandomBytesImpl() {} + virtual ~AWSSecureRandomBytesImpl() = default; + virtual void GetBytes(unsigned char* buffer, size_t bufferSize) override { + assert(buffer); + int success = RAND_bytes(buffer, static_cast(bufferSize)); + if (success != 1) { + m_failure = true; + } + } + + private: + bool m_failure; +}; + +std::shared_ptr +AWSSHA256Factory::CreateImplementation() const { + return Aws::MakeShared(AWSCryptoAllocationTag); +} + +std::shared_ptr +AWSSHA256HmacFactory::CreateImplementation() const { + return Aws::MakeShared(AWSCryptoAllocationTag); +} + +std::shared_ptr +AWSSecureRandomFactory::CreateImplementation() const { + return Aws::MakeShared(AWSCryptoAllocationTag); +} + +} // namespace tf_s3_filesystem diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/aws_crypto.h b/tensorflow/c/experimental/filesystem/plugins/s3/aws_crypto.h new file mode 100644 index 00000000000..10554c0bcda --- /dev/null +++ b/tensorflow/c/experimental/filesystem/plugins/s3/aws_crypto.h @@ -0,0 +1,47 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_S3_AWS_CRYPTO_H_ +#define TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_S3_AWS_CRYPTO_H_ + +#include +#include +#include +#include +#include + +namespace tf_s3_filesystem { +constexpr char AWSCryptoAllocationTag[] = "AWSCryptoAllocation"; + +class AWSSHA256Factory : public Aws::Utils::Crypto::HashFactory { + public: + std::shared_ptr CreateImplementation() + const override; +}; + +class AWSSHA256HmacFactory : public Aws::Utils::Crypto::HMACFactory { + public: + std::shared_ptr CreateImplementation() + const override; +}; + +class AWSSecureRandomFactory : public Aws::Utils::Crypto::SecureRandomFactory { + public: + std::shared_ptr CreateImplementation() + const override; +}; + +} // namespace tf_s3_filesystem + +#endif // TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_S3_AWS_CRYPTO_H_ \ No newline at end of file From 2f4444c1cff8ac07ab2c31d1ae23d23c66147126 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Tue, 14 Jul 2020 03:20:55 -0700 Subject: [PATCH 0368/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/3667d87a33d3 PiperOrigin-RevId: 321127576 Change-Id: I95e190f84da0df0431ca9fc9e2cbb4865a4db259 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 5b77acb139a..0381eb69058 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "74c14202d90b46dda64a2542602855727b7d7f60" - LLVM_SHA256 = "4d066245a61d94dbab0d15e00e6dffec5754dae0f0914ec47f6ac02e92dffe36" + LLVM_COMMIT = "3667d87a33d3c8d4072a41fd84bb880c59347dc0" + LLVM_SHA256 = "89e0523c771c55bbf0aea55fffffa5fb071ec3788a92f25fda98389a8dfd880e" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 83829b2a157fa15360be63b14c85b5e34a525cba Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Tue, 14 Jul 2020 17:56:15 +0700 Subject: [PATCH 0369/2522] Add init --- .../experimental/filesystem/plugins/s3/BUILD | 2 ++ .../filesystem/plugins/s3/aws_crypto.h | 2 +- .../filesystem/plugins/s3/s3_filesystem.cc | 21 +++++++++++++++++++ .../filesystem/plugins/s3/s3_filesystem.h | 14 +++++++++++++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/BUILD b/tensorflow/c/experimental/filesystem/plugins/s3/BUILD index 102c80b7c5c..2560130062d 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/BUILD +++ b/tensorflow/c/experimental/filesystem/plugins/s3/BUILD @@ -25,9 +25,11 @@ cc_library( "//tensorflow:windows": get_win_copts(), }), deps = [ + ":aws_crypto", "//tensorflow/c:tf_status", "//tensorflow/c/experimental/filesystem:filesystem_interface", "@aws", + "@com_google_absl//absl/synchronization", ], ) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/aws_crypto.h b/tensorflow/c/experimental/filesystem/plugins/s3/aws_crypto.h index 10554c0bcda..a70bf060fc7 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/aws_crypto.h +++ b/tensorflow/c/experimental/filesystem/plugins/s3/aws_crypto.h @@ -44,4 +44,4 @@ class AWSSecureRandomFactory : public Aws::Utils::Crypto::SecureRandomFactory { } // namespace tf_s3_filesystem -#endif // TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_S3_AWS_CRYPTO_H_ \ No newline at end of file +#endif // TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_S3_AWS_CRYPTO_H_ diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 45350565500..f6ec1361335 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -52,6 +52,14 @@ static void ParseS3Path(const Aws::String& fname, bool object_empty_ok, } } +static void ShutdownClient(Aws::S3::S3Client* s3_client) { + if (s3_client != nullptr) { + delete s3_client; + Aws::SDKOptions options; + Aws::ShutdownAPI(options); + } +} + // SECTION 1. Implementation for `TF_RandomAccessFile` // ---------------------------------------------------------------------------- namespace tf_random_access_file { @@ -79,6 +87,19 @@ namespace tf_read_only_memory_region { // SECTION 4. Implementation for `TF_Filesystem`, the actual filesystem // ---------------------------------------------------------------------------- namespace tf_s3_filesystem { +S3File::S3File() + : s3_client(nullptr, ShutdownClient), + executor(nullptr), + initialization_lock() {} +void Init(TF_Filesystem* filesystem, TF_Status* status) { + filesystem->plugin_filesystem = new S3File(); + TF_SetStatus(status, TF_OK, ""); +} + +void Cleanup(TF_Filesystem* filesystem) { + auto s3_file = static_cast(filesystem->plugin_filesystem); + delete s3_file; +} // TODO(vnvo2409): Implement later diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h index 62e2a7e0c06..9086b5d00f4 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h @@ -17,8 +17,22 @@ limitations under the License. #include #include +#include +#include +#include "absl/synchronization/mutex.h" #include "tensorflow/c/experimental/filesystem/filesystem_interface.h" #include "tensorflow/c/tf_status.h" +namespace tf_s3_filesystem { +typedef struct S3File { + std::shared_ptr s3_client; + std::shared_ptr executor; + absl::Mutex initialization_lock; + S3File(); +} S3File; +void Init(TF_Filesystem* filesystem, TF_Status* status); +void Cleanup(TF_Filesystem* filesystem); +} // namespace tf_s3_filesystem + #endif // TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_S3_S3_FILESYSTEM_H_ From b6980e052907899efa32f2fbf3971a643b3528f6 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Tue, 14 Jul 2020 05:04:06 -0700 Subject: [PATCH 0370/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/300156932321 PiperOrigin-RevId: 321137853 Change-Id: I1758f99d2ca7594afba2488622b5d785d2e32930 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 0381eb69058..6ff28a1a9a3 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "3667d87a33d3c8d4072a41fd84bb880c59347dc0" - LLVM_SHA256 = "89e0523c771c55bbf0aea55fffffa5fb071ec3788a92f25fda98389a8dfd880e" + LLVM_COMMIT = "300156932321a8b34b46d6a890cce0699525ed20" + LLVM_SHA256 = "932ca27455d396ba8be72032cbc7e284f882761b0dd9d33af14e354f8f9b5718" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 20982b20523d69f28a546d8d63e9ba5979fc5bc0 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Tue, 14 Jul 2020 06:09:01 -0700 Subject: [PATCH 0371/2522] Revert Split saved model benchmark tests into individual test to avoid timeout. PiperOrigin-RevId: 321144378 Change-Id: Icbdad7cdb1daa873c65ce4000e0ce68ed0f7a829 --- tensorflow/python/keras/benchmarks/BUILD | 25 +++-- .../applications_saved_model_test.py | 93 ++++++++++++++++ .../benchmarks/saved_model_benchmarks/BUILD | 104 ------------------ .../densenet_benchmark_test.py | 43 -------- .../efficientnet_benchmark_test.py | 43 -------- .../inception_resnet_v2_benchmark_test.py | 44 -------- .../mobilenet_benchmark_test.py | 43 -------- .../nasnet_large_benchmark_test.py | 43 -------- .../resnet152_v2_benchmark_test.py | 44 -------- .../saved_model_benchmark_util.py | 70 ------------ .../vgg_benchmark_test.py | 44 -------- .../xception_benchmark_test.py | 44 -------- tensorflow/tools/pip_package/BUILD | 2 +- 13 files changed, 110 insertions(+), 532 deletions(-) create mode 100644 tensorflow/python/keras/benchmarks/applications_saved_model_test.py delete mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/BUILD delete mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/densenet_benchmark_test.py delete mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/efficientnet_benchmark_test.py delete mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/inception_resnet_v2_benchmark_test.py delete mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/mobilenet_benchmark_test.py delete mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/nasnet_large_benchmark_test.py delete mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/resnet152_v2_benchmark_test.py delete mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/saved_model_benchmark_util.py delete mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/vgg_benchmark_test.py delete mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/xception_benchmark_test.py diff --git a/tensorflow/python/keras/benchmarks/BUILD b/tensorflow/python/keras/benchmarks/BUILD index 5e569bc5ad2..2386b01c426 100644 --- a/tensorflow/python/keras/benchmarks/BUILD +++ b/tensorflow/python/keras/benchmarks/BUILD @@ -23,15 +23,6 @@ exports_files(["LICENSE"]) # to the regular expression is executed. # e.g. --benchmarks=".*lstm*." will run all lstm layer related benchmarks. -# Add all benchmarks related utils here for pip testing dependencis. -py_library( - name = "keras_benchmark_lib_pip", - deps = [ - ":benchmark_util", - "//tensorflow/python/keras/benchmarks/saved_model_benchmarks:saved_model_benchmark_util", - ], -) - py_test( name = "keras_cpu_benchmark_test", size = "large", @@ -58,6 +49,22 @@ cuda_py_test( ], ) +cuda_py_test( + name = "applications_saved_model_test", + size = "medium", + srcs = ["applications_saved_model_test.py"], + shard_count = 8, + tags = [ + "no_oss_py38", # b/160170347 + "no_windows", # b/160269052 + ], + deps = [ + "//tensorflow/python:client_testlib", + "//tensorflow/python/keras/applications", + "@absl_py//absl/testing:parameterized", + ], +) + cuda_py_test( name = "model_components_benchmarks_test", srcs = ["model_components_benchmarks_test.py"], diff --git a/tensorflow/python/keras/benchmarks/applications_saved_model_test.py b/tensorflow/python/keras/benchmarks/applications_saved_model_test.py new file mode 100644 index 00000000000..0111c8f13b9 --- /dev/null +++ b/tensorflow/python/keras/benchmarks/applications_saved_model_test.py @@ -0,0 +1,93 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks for Keras applications.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tempfile +import time + +import six + +from tensorflow.python.keras.applications import densenet +from tensorflow.python.keras.applications import efficientnet +from tensorflow.python.keras.applications import inception_resnet_v2 +from tensorflow.python.keras.applications import mobilenet_v2 +from tensorflow.python.keras.applications import nasnet +from tensorflow.python.keras.applications import resnet_v2 +from tensorflow.python.keras.applications import vgg19 +from tensorflow.python.keras.applications import xception +from tensorflow.python.keras.saving.saved_model import load as keras_load +from tensorflow.python.platform import benchmark +from tensorflow.python.platform import gfile +from tensorflow.python.platform import googletest +from tensorflow.python.platform import test + + +class BenchmarkSaveApplications( + six.with_metaclass(benchmark.ParameterizedBenchmark, test.Benchmark)): + + _benchmark_parameters = [ + ('ResNet152V2', resnet_v2.ResNet152V2, 2048), + ('VGG19', vgg19.VGG19, 512), + ('Xception', xception.Xception, 2048), + ('InceptionResNetV2', inception_resnet_v2.InceptionResNetV2, 1536), + ('MobileNetV2', mobilenet_v2.MobileNetV2, 1280), + ('DenseNet201', densenet.DenseNet201, 1920), + ('EfficientNetB7', efficientnet.EfficientNetB7, 2560), + ('NASNetLarge', nasnet.NASNetLarge, 4032), + ] + + def benchmark_save_and_load_applications(self, app, _): + trials = 3 + + model = app(weights=None) + model_name = app.__name__ + + tmp_dir = googletest.GetTempDir() + gfile.MakeDirs(tmp_dir) + save_dir = tempfile.mkdtemp(dir=tmp_dir) + + total_save_time = 0 + total_load_time = 0 + + # Run one untimed iteration of saving/loading. + model.save(save_dir, save_format='tf') + keras_load.load(save_dir) + + for _ in range(trials): + start_time = time.time() + model.save(save_dir, save_format='tf') + total_save_time += time.time() - start_time + + start_time = time.time() + keras_load.load(save_dir) + total_load_time += time.time() - start_time + self.report_benchmark( + iters=trials, + wall_time=total_save_time / trials, + name='{}.save'.format(model_name)) + + self.report_benchmark( + iters=1, + wall_time=total_load_time / trials, + name='{}.load'.format(model_name)) + gfile.DeleteRecursively(save_dir) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/BUILD b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/BUILD deleted file mode 100644 index 147576849a9..00000000000 --- a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/BUILD +++ /dev/null @@ -1,104 +0,0 @@ -# Description: -# Implementation of Keras benchmarks. - -load("//tensorflow:tensorflow.bzl", "cuda_py_test") - -package( - default_visibility = ["//visibility:public"], - licenses = ["notice"], # Apache 2.0 -) - -exports_files(["LICENSE"]) - -# To run CPU benchmarks: -# bazel run -c opt benchmarks_test -- --benchmarks=. - -# To run GPU benchmarks: -# bazel run --config=cuda -c opt --copt="-mavx" benchmarks_test -- \ -# --benchmarks=. - -# To run a subset of benchmarks using --benchmarks flag. -# --benchmarks: the list of benchmarks to run. The specified value is interpreted -# as a regular expression and any benchmark whose name contains a partial match -# to the regular expression is executed. -# e.g. --benchmarks=".*lstm*." will run all lstm layer related benchmarks. - -py_library( - name = "saved_model_benchmark_util", - srcs = ["saved_model_benchmark_util.py"], - deps = [ - "//tensorflow:tensorflow_py", - ], -) - -cuda_py_test( - name = "densenet_benchmark_test", - srcs = ["densenet_benchmark_test.py"], - deps = [ - ":saved_model_benchmark_util", - "//tensorflow:tensorflow_py", - ], -) - -cuda_py_test( - name = "efficientnet_benchmark_test", - srcs = ["efficientnet_benchmark_test.py"], - deps = [ - ":saved_model_benchmark_util", - "//tensorflow:tensorflow_py", - ], -) - -cuda_py_test( - name = "inception_resnet_v2_benchmark_test", - srcs = ["inception_resnet_v2_benchmark_test.py"], - deps = [ - ":saved_model_benchmark_util", - "//tensorflow:tensorflow_py", - ], -) - -cuda_py_test( - name = "mobilenet_benchmark_test", - srcs = ["mobilenet_benchmark_test.py"], - deps = [ - ":saved_model_benchmark_util", - "//tensorflow:tensorflow_py", - ], -) - -cuda_py_test( - name = "nasnet_large_benchmark_test", - srcs = ["nasnet_large_benchmark_test.py"], - deps = [ - ":saved_model_benchmark_util", - "//tensorflow:tensorflow_py", - ], -) - -cuda_py_test( - name = "resnet152_v2_benchmark_test", - srcs = ["resnet152_v2_benchmark_test.py"], - deps = [ - ":saved_model_benchmark_util", - "//tensorflow:tensorflow_py", - ], -) - -cuda_py_test( - name = "vgg_benchmark_test", - srcs = ["vgg_benchmark_test.py"], - deps = [ - ":saved_model_benchmark_util", - "//tensorflow:tensorflow_py", - ], -) - -cuda_py_test( - name = "xception_benchmark_test", - srcs = ["xception_benchmark_test.py"], - deps = [ - ":saved_model_benchmark_util", - "//tensorflow:tensorflow_py", - ], -) diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/densenet_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/densenet_benchmark_test.py deleted file mode 100644 index 3b8e9d632f5..00000000000 --- a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/densenet_benchmark_test.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Benchmarks for saved model on DenseNet201.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf -from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util - - -class BenchmarkSaveApplications(tf.test.Benchmark): - - def benchmark_save_and_load_densenet_201(self): - app = tf.keras.applications.DenseNet201 - save_result, load_result = ( - saved_model_benchmark_util.save_and_load_benchmark(app)) - - self.report_benchmark( - iters=save_result['iters'], - wall_time=save_result['wall_time'], - name=save_result['name']) - - self.report_benchmark( - iters=load_result['iters'], - wall_time=load_result['wall_time'], - name=load_result['name']) - -if __name__ == '__main__': - tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/efficientnet_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/efficientnet_benchmark_test.py deleted file mode 100644 index 27316e2997a..00000000000 --- a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/efficientnet_benchmark_test.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Benchmarks for saved model on EfficientNetB7.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf -from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util - - -class BenchmarkSaveApplications(tf.test.Benchmark): - - def benchmark_save_and_load_efficient_net_b7(self): - app = tf.keras.applications.EfficientNetB7 - save_result, load_result = ( - saved_model_benchmark_util.save_and_load_benchmark(app)) - - self.report_benchmark( - iters=save_result['iters'], - wall_time=save_result['wall_time'], - name=save_result['name']) - - self.report_benchmark( - iters=load_result['iters'], - wall_time=load_result['wall_time'], - name=load_result['name']) - -if __name__ == '__main__': - tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/inception_resnet_v2_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/inception_resnet_v2_benchmark_test.py deleted file mode 100644 index d2d5090e878..00000000000 --- a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/inception_resnet_v2_benchmark_test.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Benchmarks for saved model on InceptionResNetV2.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf -from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util - - -class BenchmarkSaveApplications(tf.test.Benchmark): - - def benchmark_save_and_load_inception_resnet_v2(self): - app = tf.keras.applications.InceptionResNetV2 - save_result, load_result = ( - saved_model_benchmark_util.save_and_load_benchmark(app)) - - self.report_benchmark( - iters=save_result['iters'], - wall_time=save_result['wall_time'], - name=save_result['name']) - - self.report_benchmark( - iters=load_result['iters'], - wall_time=load_result['wall_time'], - name=load_result['name']) - - -if __name__ == '__main__': - tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/mobilenet_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/mobilenet_benchmark_test.py deleted file mode 100644 index 0d6b61f141e..00000000000 --- a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/mobilenet_benchmark_test.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Benchmarks for saved model on MobileNetV2.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf -from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util - - -class BenchmarkSaveApplications(tf.test.Benchmark): - - def benchmark_save_and_load_mobilenet_v2(self): - app = tf.keras.applications.MobileNetV2 - save_result, load_result = ( - saved_model_benchmark_util.save_and_load_benchmark(app)) - - self.report_benchmark( - iters=save_result['iters'], - wall_time=save_result['wall_time'], - name=save_result['name']) - - self.report_benchmark( - iters=load_result['iters'], - wall_time=load_result['wall_time'], - name=load_result['name']) - -if __name__ == '__main__': - tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/nasnet_large_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/nasnet_large_benchmark_test.py deleted file mode 100644 index 864ce1930ee..00000000000 --- a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/nasnet_large_benchmark_test.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Benchmarks for saved model on NASNetLarge.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf -from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util - - -class BenchmarkSaveApplications(tf.test.Benchmark): - - def benchmark_save_and_load_nasnet_large(self): - app = tf.keras.applications.NASNetLarge - save_result, load_result = ( - saved_model_benchmark_util.save_and_load_benchmark(app)) - - self.report_benchmark( - iters=save_result['iters'], - wall_time=save_result['wall_time'], - name=save_result['name']) - - self.report_benchmark( - iters=load_result['iters'], - wall_time=load_result['wall_time'], - name=load_result['name']) - -if __name__ == '__main__': - tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/resnet152_v2_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/resnet152_v2_benchmark_test.py deleted file mode 100644 index a0603eb5136..00000000000 --- a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/resnet152_v2_benchmark_test.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Benchmarks for saved model on ResNet152V2.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf -from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util - - -class BenchmarkSaveApplications(tf.test.Benchmark): - - def benchmark_save_and_load_resnet152_v2(self): - app = tf.keras.applications.ResNet152V2 - save_result, load_result = ( - saved_model_benchmark_util.save_and_load_benchmark(app)) - - self.report_benchmark( - iters=save_result['iters'], - wall_time=save_result['wall_time'], - name=save_result['name']) - - self.report_benchmark( - iters=load_result['iters'], - wall_time=load_result['wall_time'], - name=load_result['name']) - - -if __name__ == '__main__': - tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/saved_model_benchmark_util.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/saved_model_benchmark_util.py deleted file mode 100644 index a0760fa075c..00000000000 --- a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/saved_model_benchmark_util.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utils for saved model benchmarks.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tempfile -import time - -import tensorflow as tf - -from tensorflow.python.platform import gfile -from tensorflow.python.platform import googletest - - -def save_and_load_benchmark(app): - """Util for saved model benchmarks.""" - trials = 3 - - model = app(weights=None) - model_name = app.__name__ - - tmp_dir = googletest.GetTempDir() - gfile.MakeDirs(tmp_dir) - save_dir = tempfile.mkdtemp(dir=tmp_dir) - - total_save_time = 0 - total_load_time = 0 - - # Run one untimed iteration of saving/loading. - model.save(save_dir, save_format='tf') - tf.keras.models.load_model(save_dir) - - for _ in range(trials): - start_time = time.time() - model.save(save_dir, save_format='tf') - total_save_time += time.time() - start_time - - start_time = time.time() - tf.keras.models.load_model(save_dir) - total_load_time += time.time() - start_time - - save_result = { - 'iters': trials, - 'wall_time': total_save_time / trials, - 'name': '{}.save'.format(model_name) - } - - load_result = { - 'iters': trials, - 'wall_time': total_load_time / trials, - 'name': '{}.load'.format(model_name) - } - gfile.DeleteRecursively(save_dir) - return save_result, load_result - diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/vgg_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/vgg_benchmark_test.py deleted file mode 100644 index 3ceebe4fcc4..00000000000 --- a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/vgg_benchmark_test.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Benchmarks for saved model on VGG19.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf -from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util - - -class BenchmarkSaveApplications(tf.test.Benchmark): - - def benchmark_save_and_load_vgg19(self): - app = tf.keras.applications.VGG19 - save_result, load_result = ( - saved_model_benchmark_util.save_and_load_benchmark(app)) - - self.report_benchmark( - iters=save_result['iters'], - wall_time=save_result['wall_time'], - name=save_result['name']) - - self.report_benchmark( - iters=load_result['iters'], - wall_time=load_result['wall_time'], - name=load_result['name']) - - -if __name__ == '__main__': - tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/xception_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/xception_benchmark_test.py deleted file mode 100644 index ddab2f68ffd..00000000000 --- a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/xception_benchmark_test.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Benchmarks for saved model on Xception.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf -from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util - - -class BenchmarkSaveApplications(tf.test.Benchmark): - - def benchmark_save_and_load_xception(self): - app = tf.keras.applications.Xception - save_result, load_result = ( - saved_model_benchmark_util.save_and_load_benchmark(app)) - - self.report_benchmark( - iters=save_result['iters'], - wall_time=save_result['wall_time'], - name=save_result['name']) - - self.report_benchmark( - iters=load_result['iters'], - wall_time=load_result['wall_time'], - name=load_result['name']) - - -if __name__ == '__main__': - tf.test.main() diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 38ff12b100e..4e608360f8b 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -132,7 +132,7 @@ COMMON_PIP_DEPS = [ "//tensorflow/python/keras/mixed_precision/experimental:test_util", "//tensorflow/python/keras/tests:model_subclassing_test_util", "//tensorflow/python/keras/tests:model_architectures", - "//tensorflow/python/keras/benchmarks:keras_benchmark_lib_pip", + "//tensorflow/python/keras/benchmarks:benchmark_util", "//tensorflow/python/kernel_tests:cudnn_deterministic_base", "//tensorflow/python/kernel_tests:bias_op_base", "//tensorflow/python/kernel_tests/random:util", From 39f3153657c27184ac048301708d6a85e2b53cf5 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Tue, 14 Jul 2020 06:18:16 -0700 Subject: [PATCH 0372/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/84a1bc7f2c0c PiperOrigin-RevId: 321145244 Change-Id: Id88f2e7a561e2683c5bd8a3f2e6cb95666c84d77 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 6ff28a1a9a3..a1c48e2675f 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "300156932321a8b34b46d6a890cce0699525ed20" - LLVM_SHA256 = "932ca27455d396ba8be72032cbc7e284f882761b0dd9d33af14e354f8f9b5718" + LLVM_COMMIT = "84a1bc7f2c0c7bd5f18a4ecaf91e27644aa94190" + LLVM_SHA256 = "ae5a0e4bdca845ab4b0af588a38818d4f1005ae391d5dcef800615ec7ebbc370" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From ddf7b6108cc0a0eba025cf617e1feec39c76543b Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Tue, 14 Jul 2020 06:19:33 -0700 Subject: [PATCH 0373/2522] Disable another failing test PiperOrigin-RevId: 321145376 Change-Id: Ib575d5de487fd3747e1c5580555e907887906fd9 --- tensorflow/python/keras/saving/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/keras/saving/BUILD b/tensorflow/python/keras/saving/BUILD index 258c1dbfaec..62000be42d9 100644 --- a/tensorflow/python/keras/saving/BUILD +++ b/tensorflow/python/keras/saving/BUILD @@ -144,6 +144,7 @@ tf_py_test( tags = [ "no_rocm", "no_windows", + "notap", # TODO(b/161198218): flaky timeout ], deps = [ "//tensorflow/python:client_testlib", From 8263f588362f9ef4efdfb268c6d772f7e238891b Mon Sep 17 00:00:00 2001 From: Yixing Fu Date: Tue, 14 Jul 2020 15:31:24 +0000 Subject: [PATCH 0374/2522] pylint fix --- tensorflow/python/saved_model/loader_impl.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/saved_model/loader_impl.py b/tensorflow/python/saved_model/loader_impl.py index 06cd988130d..6daa631084f 100644 --- a/tensorflow/python/saved_model/loader_impl.py +++ b/tensorflow/python/saved_model/loader_impl.py @@ -73,7 +73,8 @@ def parse_saved_model(export_dir): """Reads the savedmodel.pb or savedmodel.pbtxt file containing `SavedModel`. Args: - export_dir: String or Pathlike, path to the directory containing the SavedModel file. + export_dir: String or Pathlike, path to the directory containing the + SavedModel file. Returns: A `SavedModel` protocol buffer. From 9c170cdab6bf72b218c04b69712f834b792df453 Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Tue, 14 Jul 2020 11:42:25 -0400 Subject: [PATCH 0375/2522] Update keras_cpu_benchmark_test because of change in benchmark_util. --- .../keras/benchmarks/keras_cpu_benchmark_test.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/keras_cpu_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_cpu_benchmark_test.py index 07419885fec..633fe139d33 100644 --- a/tensorflow/python/keras/benchmarks/keras_cpu_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_cpu_benchmark_test.py @@ -95,7 +95,7 @@ class KerasModelCPUBenchmark( """Benchmark for MLP model on synthetic mnist data.""" mlp_x = np.random.random((5000, 784)) mlp_y = np.random.random((5000, 10)) - results = benchmark_util.measure_performance( + metrics, wall_time = benchmark_util.measure_performance( self._mnist_mlp, x=mlp_x, y=mlp_y, @@ -104,13 +104,13 @@ class KerasModelCPUBenchmark( optimizer=_OPTIMIZER, loss=_LOSS) self.report_benchmark( - iters=run_iters, wall_time=results['wall_time'], extras=results) + iters=run_iters, wall_time=wall_time, metrics=metrics) def benchmark_mnist_convnet(self, batch_size, run_iters): """Benchmark for Convnet model on synthetic mnist data.""" convnet_x = np.random.random((5000, 28, 28, 1)) convnet_y = np.random.random((5000, 10)) - results = benchmark_util.measure_performance( + metrics, wall_time = benchmark_util.measure_performance( self._mnist_convnet, x=convnet_x, y=convnet_y, @@ -119,13 +119,13 @@ class KerasModelCPUBenchmark( optimizer=_OPTIMIZER, loss=_LOSS) self.report_benchmark( - iters=run_iters, wall_time=results['wall_time'], extras=results) + iters=run_iters, wall_time=wall_time, metrics=metrics) def benchmark_imdb_lstm(self, batch_size, run_iters): """Benchmark for LSTM model on synthetic imdb review dataset.""" lstm_x = np.random.randint(0, 1999, size=(2500, 100)) lstm_y = np.random.random((2500, 1)) - results = benchmark_util.measure_performance( + metrics, wall_time = benchmark_util.measure_performance( self._imdb_lstm, x=lstm_x, y=lstm_y, @@ -134,7 +134,7 @@ class KerasModelCPUBenchmark( optimizer=_OPTIMIZER, loss=_LOSS) self.report_benchmark( - iters=run_iters, wall_time=results['wall_time'], extras=results) + iters=run_iters, wall_time=wall_time, metrics=metrics) if __name__ == '__main__': From e8b76a2c5ba5ac77e50134589473c01721f12447 Mon Sep 17 00:00:00 2001 From: bubblebooy Date: Tue, 14 Jul 2020 10:46:14 -0500 Subject: [PATCH 0376/2522] Revert the changes to the ValueError message --- tensorflow/python/keras/layers/dense_attention.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/layers/dense_attention.py b/tensorflow/python/keras/layers/dense_attention.py index 705e5e75732..dd639e49b18 100644 --- a/tensorflow/python/keras/layers/dense_attention.py +++ b/tensorflow/python/keras/layers/dense_attention.py @@ -194,8 +194,8 @@ class BaseDenseAttention(Layer): 'namely [query_mask, value_mask].'.format(class_name)) if len(mask) < 2 or len(mask) > 3: raise ValueError( - '{} layer mask must be a list of length 2 or 3, namely [query_mask, ' - 'value_mask] or [query_mask, value_mask, key_mask]. Given length: {}'.format(class_name, len(mask))) + '{} layer mask must be a list of length 2, namely [query_mask, ' + 'value_mask]. Given length: {}'.format(class_name, len(mask))) def get_config(self): config = { From 310cab50c6b85daacc23e933c8761852cb4be27e Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Tue, 14 Jul 2020 08:46:22 -0700 Subject: [PATCH 0377/2522] Bump the references to github.com/google/ruy to commit 388ffd28ba00ffb9aacbe538225165c02ea33ee3. PiperOrigin-RevId: 321165585 Change-Id: I4bd156419edcb62a5ff897c84955508953768ec5 --- .../lite/micro/tools/make/third_party_downloads.inc | 4 ++-- tensorflow/lite/tools/make/Makefile | 4 ---- tensorflow/lite/tools/make/download_dependencies.sh | 4 ++-- third_party/ruy/workspace.bzl | 8 ++++---- 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/tensorflow/lite/micro/tools/make/third_party_downloads.inc b/tensorflow/lite/micro/tools/make/third_party_downloads.inc index 8590ace9fda..83be806455d 100644 --- a/tensorflow/lite/micro/tools/make/third_party_downloads.inc +++ b/tensorflow/lite/micro/tools/make/third_party_downloads.inc @@ -56,8 +56,8 @@ SIFIVE_FE310_LIB_MD5 := "06ee24c4956f8e21670ab3395861fe64" KISSFFT_URL="https://github.com/mborgerding/kissfft/archive/v130.zip" KISSFFT_MD5="438ba1fef5783cc5f5f201395cc477ca" -RUY_URL="https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip" -RUY_MD5="18613212e9c01aba85c7d19010b194a9" +RUY_URL="https://github.com/google/ruy/archive/388ffd28ba00ffb9aacbe538225165c02ea33ee3.zip" +RUY_MD5="de57b7bfa37b044c7cb2233dc19f64bb" CIFAR10_DATASET_URL="https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz" CIFAR10_DATASET_MD5="c32a1d4ab5d03f1284b67883e8d87530" diff --git a/tensorflow/lite/tools/make/Makefile b/tensorflow/lite/tools/make/Makefile index f8b67fbbe7d..7d55370818c 100644 --- a/tensorflow/lite/tools/make/Makefile +++ b/tensorflow/lite/tools/make/Makefile @@ -221,10 +221,6 @@ else CORE_CC_EXCLUDE_SRCS += tensorflow/lite/minimal_logging_ios.cc endif -# Temporary fix for ruy compilation error. -# TODO(b/158800055): Remove this hack once the ruy version is correctly bumped. -CORE_CC_EXCLUDE_SRCS += tensorflow/lite/tools/make/downloads/ruy/ruy/prepare_packed_matrices.cc - # Filter out all the excluded files. TF_LITE_CC_SRCS := $(filter-out $(CORE_CC_EXCLUDE_SRCS), $(CORE_CC_ALL_SRCS)) diff --git a/tensorflow/lite/tools/make/download_dependencies.sh b/tensorflow/lite/tools/make/download_dependencies.sh index 0ab8307b07a..e22b33e1188 100755 --- a/tensorflow/lite/tools/make/download_dependencies.sh +++ b/tensorflow/lite/tools/make/download_dependencies.sh @@ -37,8 +37,8 @@ EIGEN_URL="$(grep -o 'https.*gitlab.com/libeigen/eigen/-/archive/.*tar\.gz' "${B EIGEN_SHA="$(eval echo $(grep '# SHARED_EIGEN_SHA' "${BZL_FILE_PATH}" | grep -o '\".*\"'))" GEMMLOWP_URL="$(grep -o 'https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GEMMLOWP_SHA="$(eval echo $(grep '# SHARED_GEMMLOWP_SHA' "${BZL_FILE_PATH}" | grep -o '\".*\"'))" -RUY_URL="https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip" -RUY_SHA="8fd4adeeff4f29796bf7cdda64806ec0495a2435361569f02afe3fe33406f07c" +RUY_URL="https://github.com/google/ruy/archive/388ffd28ba00ffb9aacbe538225165c02ea33ee3.zip" +RUY_SHA="89b8b56b4e1db894e75a0abed8f69757b37c23dde6e64bfb186656197771138a" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" GOOGLETEST_SHA="58a6f4277ca2bc8565222b3bbd58a177609e9c488e8a72649359ba51450db7d8" ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)" diff --git a/third_party/ruy/workspace.bzl b/third_party/ruy/workspace.bzl index ee0faec6eff..b487125a31d 100644 --- a/third_party/ruy/workspace.bzl +++ b/third_party/ruy/workspace.bzl @@ -5,11 +5,11 @@ load("//third_party:repo.bzl", "third_party_http_archive") def repo(): third_party_http_archive( name = "ruy", - sha256 = "8fd4adeeff4f29796bf7cdda64806ec0495a2435361569f02afe3fe33406f07c", - strip_prefix = "ruy-34ea9f4993955fa1ff4eb58e504421806b7f2e8f", + sha256 = "89b8b56b4e1db894e75a0abed8f69757b37c23dde6e64bfb186656197771138a", + strip_prefix = "ruy-388ffd28ba00ffb9aacbe538225165c02ea33ee3", urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip", - "https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip", + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/ruy/archive/388ffd28ba00ffb9aacbe538225165c02ea33ee3.zip", + "https://github.com/google/ruy/archive/388ffd28ba00ffb9aacbe538225165c02ea33ee3.zip", ], build_file = "//third_party/ruy:BUILD", ) From 1fc9c95a100af0689134f8834c04603b7b482dd5 Mon Sep 17 00:00:00 2001 From: Tamas Nyiri Date: Tue, 14 Jul 2020 12:44:56 +0100 Subject: [PATCH 0378/2522] added INT16 support in corresponding python wrappers --- .../optimize/python/modify_model_interface.cc | 2 +- .../modify_model_interface_constants.py | 1 + .../python/modify_model_interface_lib_test.py | 43 ++++++++++++++++++- 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/tools/optimize/python/modify_model_interface.cc b/tensorflow/lite/tools/optimize/python/modify_model_interface.cc index ed67b07cb0f..cd2e4a192e9 100644 --- a/tensorflow/lite/tools/optimize/python/modify_model_interface.cc +++ b/tensorflow/lite/tools/optimize/python/modify_model_interface.cc @@ -33,7 +33,7 @@ PYBIND11_MODULE(_pywrap_modify_model_interface, m) { return tflite::optimize::ModifyModelInterface( input_file, output_file, static_cast(input_type), - static_cast(input_type)); + static_cast(output_type)); }); } diff --git a/tensorflow/lite/tools/optimize/python/modify_model_interface_constants.py b/tensorflow/lite/tools/optimize/python/modify_model_interface_constants.py index 42767268e48..cbe1aa92022 100644 --- a/tensorflow/lite/tools/optimize/python/modify_model_interface_constants.py +++ b/tensorflow/lite/tools/optimize/python/modify_model_interface_constants.py @@ -23,6 +23,7 @@ from tensorflow.lite.python import lite_constants STR_TO_TFLITE_TYPES = { 'INT8': lite_constants.INT8, + 'INT16': lite_constants.INT16, 'UINT8': lite_constants.QUANTIZED_UINT8 } TFLITE_TO_STR_TYPES = {v: k for k, v in STR_TO_TFLITE_TYPES.items()} diff --git a/tensorflow/lite/tools/optimize/python/modify_model_interface_lib_test.py b/tensorflow/lite/tools/optimize/python/modify_model_interface_lib_test.py index e97f0db9bbb..70ae0ad4376 100644 --- a/tensorflow/lite/tools/optimize/python/modify_model_interface_lib_test.py +++ b/tensorflow/lite/tools/optimize/python/modify_model_interface_lib_test.py @@ -28,7 +28,9 @@ from tensorflow.python.framework import test_util from tensorflow.python.platform import test -def build_tflite_model_with_full_integer_quantization(): +def build_tflite_model_with_full_integer_quantization(supported_ops= + tf.lite.OpsSet. + TFLITE_BUILTINS_INT8): # Define TF model input_size = 3 model = tf.keras.Sequential([ @@ -46,7 +48,7 @@ def build_tflite_model_with_full_integer_quantization(): yield [np.array([i] * input_size, dtype=np.float32)] converter.representative_dataset = representative_dataset_gen - converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] + converter.target_spec.supported_ops = [supported_ops] tflite_model = converter.convert() return tflite_model @@ -89,6 +91,43 @@ class ModifyModelInterfaceTest(test_util.TensorFlowTestCase): self.assertEqual(final_input_dtype, np.int8) self.assertEqual(final_output_dtype, np.int8) + def testInt16Interface(self): + # 1. SETUP + # Define the temporary directory and files + temp_dir = self.get_temp_dir() + initial_file = os.path.join(temp_dir, 'initial_model.tflite') + final_file = os.path.join(temp_dir, 'final_model.tflite') + # Define initial model + initial_model = build_tflite_model_with_full_integer_quantization( + supported_ops=tf.lite.OpsSet. + EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8) + with open(initial_file, 'wb') as model_file: + model_file.write(initial_model) + + # 2. INVOKE + # Invoke the modify_model_interface function + modify_model_interface_lib.modify_model_interface(initial_file, final_file, + tf.int16, tf.int16) + + # 3. VALIDATE + # Load TFLite model and allocate tensors. + initial_interpreter = tf.lite.Interpreter(model_path=initial_file) + initial_interpreter.allocate_tensors() + final_interpreter = tf.lite.Interpreter(model_path=final_file) + final_interpreter.allocate_tensors() + + # Get input and output types. + initial_input_dtype = initial_interpreter.get_input_details()[0]['dtype'] + initial_output_dtype = initial_interpreter.get_output_details()[0]['dtype'] + final_input_dtype = final_interpreter.get_input_details()[0]['dtype'] + final_output_dtype = final_interpreter.get_output_details()[0]['dtype'] + + # Validate the model interfaces + self.assertEqual(initial_input_dtype, np.float32) + self.assertEqual(initial_output_dtype, np.float32) + self.assertEqual(final_input_dtype, np.int16) + self.assertEqual(final_output_dtype, np.int16) + def testUInt8Interface(self): # 1. SETUP # Define the temporary directory and files From b9ed815514be0a0e87e3f2a9ad077f3ff893abe1 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Tue, 14 Jul 2020 09:10:20 -0700 Subject: [PATCH 0379/2522] Display verbose diff if API goldens changed. Before this change, we only displayed a summary of the diff but this makes debugging harder. Displaying the verbose diff makes it easier to understand what changed, at the cost of increasing verbosity in logs. Since it is usually rare to have the API goldens changing, we might be ok with this change? PiperOrigin-RevId: 321169903 Change-Id: I0a450356af3082b02e73517a858925e89cd5fee3 --- tensorflow/tools/api/tests/api_compatibility_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py index 1c3b883d40e..67957899214 100644 --- a/tensorflow/tools/api/tests/api_compatibility_test.py +++ b/tensorflow/tools/api/tests/api_compatibility_test.py @@ -298,8 +298,9 @@ class ApiCompatibilityTest(test.TestCase): filepath, text_format.MessageToString(actual_dict[key])) else: # Include the actual differences to help debugging. - for d in diffs: + for d, verbose_d in zip(diffs, verbose_diffs): logging.error(' %s', d) + logging.error(' %s', verbose_d) # Fail if we cannot fix the test by updating goldens. self.fail('%d differences found between API and golden.' % diff_count) From 16c746192d3a28a498e2ab2dae8df8f4555b9dd5 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Tue, 14 Jul 2020 09:19:02 -0700 Subject: [PATCH 0380/2522] Disable flaky ASAN test. Fails due to gRPC timeout PiperOrigin-RevId: 321171451 Change-Id: I9f38d3dec54dbe46c47251cc44a4e65ff7dc2f58 --- tensorflow/python/BUILD | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index fbfa414791c..f4898897435 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -6332,6 +6332,9 @@ tf_py_test( srcs = ["training/server_lib_test.py"], grpc_enabled = True, python_version = "PY3", + tags = [ + "noasan", # TODO(b/161236904): flaky timeout in trying to start gRPC server + ], deps = [ ":array_ops", ":client", From a6207b8f2e5c62524392e1d2b22f87cfc3ec1f10 Mon Sep 17 00:00:00 2001 From: Chao Mei Date: Tue, 14 Jul 2020 09:20:41 -0700 Subject: [PATCH 0381/2522] Provide a way to utilize delegate providers to create TfLite delegates based on local-only parameters. PiperOrigin-RevId: 321171749 Change-Id: Ib05bc8446af7a331f9beee4e746bc5aef1746275 --- tensorflow/lite/kernels/test_util.cc | 5 +++-- tensorflow/lite/kernels/test_util.h | 23 +++++++++++++++++++---- tensorflow/lite/kernels/test_util_test.cc | 5 +++++ 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/tensorflow/lite/kernels/test_util.cc b/tensorflow/lite/kernels/test_util.cc index d77d3367afe..4a4f5c5de3a 100644 --- a/tensorflow/lite/kernels/test_util.cc +++ b/tensorflow/lite/kernels/test_util.cc @@ -417,10 +417,11 @@ bool KernelTestDelegateProviders::InitFromCmdlineArgs(int* argc, } std::vector -KernelTestDelegateProviders::CreateAllDelegates() const { +KernelTestDelegateProviders::CreateAllDelegates( + const tools::ToolParams& params) const { std::vector delegates; for (const auto& one : tools::GetRegisteredDelegateProviders()) { - auto ptr = one->CreateTfLiteDelegate(params_); + auto ptr = one->CreateTfLiteDelegate(params); // It's possible that a delegate of certain type won't be created as // user-specified benchmark params tells not to. if (ptr == nullptr) continue; diff --git a/tensorflow/lite/kernels/test_util.h b/tensorflow/lite/kernels/test_util.h index f58867a5120..78b7c580738 100644 --- a/tensorflow/lite/kernels/test_util.h +++ b/tensorflow/lite/kernels/test_util.h @@ -916,13 +916,28 @@ class KernelTestDelegateProviders { bool InitFromCmdlineArgs(int* argc, const char** argv); // This provides a way to overwrite parameter values programmatically before - // creating TfLite delegates. + // creating TfLite delegates. Note, changes to the returned ToolParams will + // have a global impact on creating TfLite delegates. + // If a local-only change is preferred, recommend using the following workflow + // create TfLite delegates via delegate providers: + // tools::ToolParams local_params; + // local_params.Merge(KernelTestDelegateProviders::Get()->ConstParams()); + // Overwrite params in local_params by calling local_params.Set<...>(...); + // Get TfLite delegates via + // KernelTestDelegateProviders::Get()->CreateAllDelegates(local_params); tools::ToolParams* MutableParams() { return ¶ms_; } const tools::ToolParams& ConstParams() const { return params_; } - // Create a list of TfLite delegates based on what have been initialized (i.e. - // 'params_'). - std::vector CreateAllDelegates() const; + // Create a list of TfLite delegates based on the provided parameters + // `params`. + std::vector CreateAllDelegates( + const tools::ToolParams& params) const; + + // Similar to the above, but creating a list of TfLite delegates based on what + // have been initialized (i.e. 'params_'). + std::vector CreateAllDelegates() const { + return CreateAllDelegates(params_); + } private: // Contain delegate-related parameters that are initialized from command-line diff --git a/tensorflow/lite/kernels/test_util_test.cc b/tensorflow/lite/kernels/test_util_test.cc index 1ac08631079..88dca69b614 100644 --- a/tensorflow/lite/kernels/test_util_test.cc +++ b/tensorflow/lite/kernels/test_util_test.cc @@ -67,6 +67,11 @@ TEST(KernelTestDelegateProvidersTest, CreateTfLiteDelegates) { KernelTestDelegateProviders providers; providers.MutableParams()->Set("use_xnnpack", true); EXPECT_GE(providers.CreateAllDelegates().size(), 1); + + tools::ToolParams local_params; + local_params.Merge(providers.ConstParams()); + local_params.Set("use_xnnpack", false); + EXPECT_TRUE(providers.CreateAllDelegates(local_params).empty()); #endif } } // namespace From 6fe8d6734c4b83d44c406efd90e933abc180077c Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Wed, 15 Jul 2020 00:21:01 +0700 Subject: [PATCH 0382/2522] remove file block cache --- .../experimental/filesystem/plugins/gcs/BUILD | 9 -- .../filesystem/plugins/gcs/file_block_cache.h | 140 ------------------ .../plugins/gcs/ram_file_block_cache.cc | 6 - .../plugins/gcs/ram_file_block_cache.h | 26 ++-- 4 files changed, 11 insertions(+), 170 deletions(-) delete mode 100644 tensorflow/c/experimental/filesystem/plugins/gcs/file_block_cache.h diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/BUILD b/tensorflow/c/experimental/filesystem/plugins/gcs/BUILD index a0c13701766..28371eecaf7 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/BUILD +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/BUILD @@ -44,14 +44,6 @@ cc_library( ], ) -cc_library( - name = "file_block_cache", - hdrs = ["file_block_cache.h"], - deps = [ - "//tensorflow/c:tf_status", - ], -) - cc_library( name = "cleanup", hdrs = ["cleanup.h"], @@ -63,7 +55,6 @@ cc_library( hdrs = ["ram_file_block_cache.h"], deps = [ ":cleanup", - ":file_block_cache", "//tensorflow/c:env", "//tensorflow/c:tf_status", "@com_google_absl//absl/base:core_headers", diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/file_block_cache.h b/tensorflow/c/experimental/filesystem/plugins/gcs/file_block_cache.h deleted file mode 100644 index 3ba7d8d7993..00000000000 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/file_block_cache.h +++ /dev/null @@ -1,140 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_GCS_FILE_BLOCK_CACHE_H_ -#define TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_GCS_FILE_BLOCK_CACHE_H_ - -#include -#include -#include -#include -#include -#include -#include - -#include "tensorflow/c/tf_status.h" - -namespace tf_gcs_filesystem { - -class FileBlockCache; - -/// FileBlockCacheStatsInterface allows for instrumentation of the block cache. -/// -/// FileBlockCacheStatsInterface and its subclasses must be safe to use from -/// multiple threads concurrently. -/// -/// WARNING! This is an experimental interface that may change or go away at any -/// time. -class FileBlockCacheStatsInterface { - public: - /// Configure is called to provide instrumentation hooks. - /// - /// Note: Configure can be called multiple times (e.g. if the block cache is - /// re-initialized). - virtual void Configure(const FileBlockCache* block_cache) = 0; - - /// RecordBlockLoadRequest is called to record the size of a hit block. - virtual void RecordCacheHitBlockSize(size_t bytes_transferred) = 0; - - /// RecordBlockLoadRequest is called to record the size of a missed block. - virtual void RecordCacheMissBlockSize(size_t bytes_transferred) = 0; - - virtual ~FileBlockCacheStatsInterface() = default; -}; - -/// \brief A block cache of file contents, keyed by {filename, offset}. -/// -/// This class should be shared by read-only random access files on a remote -/// filesystem (e.g. GCS). -class FileBlockCache { - public: - /// The callback executed when a block is not found in the cache, and needs to - /// be fetched from the backing filesystem. This callback is provided when the - /// cache is constructed. The `status` should be `TF_OK` as long as the - /// read from the remote filesystem succeeded (similar to the semantics of the - /// read(2) system call). - typedef std::function - BlockFetcher; - - virtual ~FileBlockCache() {} - - /// Read `n` bytes from `filename` starting at `offset` into `buffer`. This - /// method will set `status` to: - /// - /// 1) The error from the remote filesystem, if the read from the remote - /// filesystem failed. - /// 2) `TF_FAILED_PRECONDITION` if the read from the remote filesystem - /// succeeded, - /// but the read returned a partial block, and the LRU cache contained a - /// block at a higher offset (indicating that the partial block should have - /// been a full block). - /// 3) `TF_OUT_OF_RANGE` if the read from the remote filesystem succeeded, but - /// the file contents do not extend past `offset` and thus nothing was - /// placed in `out`. - /// 4) `TF_OK` otherwise (i.e. the read succeeded, and at least one byte was - /// placed - /// in `buffer`). - /// - /// Caller is responsible for allocating memory for `buffer`. - /// `buffer` will be left unchanged in case of errors. - virtual void Read(const std::string& filename, size_t offset, size_t n, - char* buffer, size_t* bytes_transferred, - TF_Status* status) = 0; - - // Validate the given file signature with the existing file signature in the - // cache. Returns true if the signature doesn't change or the file did not - // exist before. If the signature changes, update the existing signature with - // the new one and remove the file from cache. - virtual bool ValidateAndUpdateFileSignature(const std::string& filename, - int64_t file_signature) = 0; - - /// Remove all cached blocks for `filename`. - virtual void RemoveFile(const std::string& filename) = 0; - - /// Remove all cached data. - virtual void Flush() = 0; - - /// Accessors for cache parameters. - virtual size_t block_size() const = 0; - virtual size_t max_bytes() const = 0; - virtual uint64_t max_staleness() const = 0; - - /// The current size (in bytes) of the cache. - virtual size_t CacheSize() const = 0; - - // Returns true if the cache is enabled. If false, the BlockFetcher callback - // is always executed during Read. - virtual bool IsCacheEnabled() const = 0; - - void SetStats(FileBlockCacheStatsInterface* stats) { - if (stats == nullptr) { - std::cerr - << "Attempted to monitor a NULL stats object. This may prevent the " - "corresponding monitoring data from being exported"; - return; - } - cache_stats_ = stats; - cache_stats_->Configure(this); - } - - protected: - FileBlockCacheStatsInterface* cache_stats_ = nullptr; // Not owned. -}; - -} // namespace tf_gcs_filesystem - -#endif // TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_GCS_FILE_BLOCK_CACHE_H_ diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/ram_file_block_cache.cc b/tensorflow/c/experimental/filesystem/plugins/gcs/ram_file_block_cache.cc index 102c7fa175c..14e83d978e7 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/ram_file_block_cache.cc +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/ram_file_block_cache.cc @@ -39,9 +39,6 @@ std::shared_ptr RamFileBlockCache::Lookup( auto entry = block_map_.find(key); if (entry != block_map_.end()) { if (BlockNotStale(entry->second)) { - if (cache_stats_ != nullptr) { - cache_stats_->RecordCacheHitBlockSize(entry->second->data.size()); - } return entry->second; } else { // Remove the stale block and continue. @@ -139,9 +136,6 @@ void RamFileBlockCache::MaybeFetch(const Key& key, size_t bytes_transferred; block_fetcher_(key.first, key.second, block_size_, block->data.data(), &bytes_transferred, status); - if (cache_stats_ != nullptr) { - cache_stats_->RecordCacheMissBlockSize(bytes_transferred); - } block->mu.Lock(); // Reacquire the lock immediately afterwards if (TF_GetCode(status) == TF_OK) { block->data.resize(bytes_transferred, 0); diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/ram_file_block_cache.h b/tensorflow/c/experimental/filesystem/plugins/gcs/ram_file_block_cache.h index 5a82f65db41..37cbc257e9c 100644 --- a/tensorflow/c/experimental/filesystem/plugins/gcs/ram_file_block_cache.h +++ b/tensorflow/c/experimental/filesystem/plugins/gcs/ram_file_block_cache.h @@ -28,7 +28,6 @@ limitations under the License. #include "absl/synchronization/mutex.h" #include "absl/synchronization/notification.h" #include "tensorflow/c/env.h" -#include "tensorflow/c/experimental/filesystem/plugins/gcs/file_block_cache.h" #include "tensorflow/c/tf_status.h" namespace tf_gcs_filesystem { @@ -37,7 +36,7 @@ namespace tf_gcs_filesystem { /// /// This class should be shared by read-only random access files on a remote /// filesystem (e.g. GCS). -class RamFileBlockCache : public FileBlockCache { +class RamFileBlockCache { public: /// The callback executed when a block is not found in the cache, and needs to /// be fetched from the backing filesystem. This callback is provided when the @@ -69,7 +68,7 @@ class RamFileBlockCache : public FileBlockCache { << (IsCacheEnabled() ? "enabled" : "disabled"); } - ~RamFileBlockCache() override { + ~RamFileBlockCache() { if (pruning_thread_) { stop_pruning_thread_.Notify(); // Destroying pruning_thread_ will block until Prune() receives the above @@ -98,36 +97,33 @@ class RamFileBlockCache : public FileBlockCache { /// Caller is responsible for allocating memory for `buffer`. /// `buffer` will be left unchanged in case of errors. void Read(const std::string& filename, size_t offset, size_t n, char* buffer, - size_t* bytes_transferred, TF_Status* status) override; + size_t* bytes_transferred, TF_Status* status); // Validate the given file signature with the existing file signature in the // cache. Returns true if the signature doesn't change or the file doesn't // exist before. If the signature changes, update the existing signature with // the new one and remove the file from cache. bool ValidateAndUpdateFileSignature(const std::string& filename, - int64_t file_signature) override + int64_t file_signature) ABSL_LOCKS_EXCLUDED(mu_); /// Remove all cached blocks for `filename`. - void RemoveFile(const std::string& filename) override - ABSL_LOCKS_EXCLUDED(mu_); + void RemoveFile(const std::string& filename) ABSL_LOCKS_EXCLUDED(mu_); /// Remove all cached data. - void Flush() override ABSL_LOCKS_EXCLUDED(mu_); + void Flush() ABSL_LOCKS_EXCLUDED(mu_); /// Accessors for cache parameters. - size_t block_size() const override { return block_size_; } - size_t max_bytes() const override { return max_bytes_; } - uint64_t max_staleness() const override { return max_staleness_; } + size_t block_size() const { return block_size_; } + size_t max_bytes() const { return max_bytes_; } + uint64_t max_staleness() const { return max_staleness_; } /// The current size (in bytes) of the cache. - size_t CacheSize() const override ABSL_LOCKS_EXCLUDED(mu_); + size_t CacheSize() const ABSL_LOCKS_EXCLUDED(mu_); // Returns true if the cache is enabled. If false, the BlockFetcher callback // is always executed during Read. - bool IsCacheEnabled() const override { - return block_size_ > 0 && max_bytes_ > 0; - } + bool IsCacheEnabled() const { return block_size_ > 0 && max_bytes_ > 0; } // We can not pass a lambda with capture as a function pointer to // `TF_StartThread`, so we have to wrap `Prune` inside a static function. From 6c0fba85904341e9c95b801c80b79972d18bf945 Mon Sep 17 00:00:00 2001 From: Gaurav Jain Date: Tue, 14 Jul 2020 10:31:50 -0700 Subject: [PATCH 0383/2522] Add support for int64 InvertPermutation Op PiperOrigin-RevId: 321185678 Change-Id: I62fe9daa942728c56bc92f730e1ed7c1adf8a511 --- tensorflow/compiler/tests/unary_ops_test.py | 22 +++++---- .../compiler/tf2xla/kernels/transpose_op.cc | 47 ++++++++++++++----- 2 files changed, 50 insertions(+), 19 deletions(-) diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py index 162693a9eb1..31478060084 100644 --- a/tensorflow/compiler/tests/unary_ops_test.py +++ b/tensorflow/compiler/tests/unary_ops_test.py @@ -923,16 +923,22 @@ class UnaryOpsTest(xla_test.XLATestCase): expected=np.array([1, 0x100000003f800000], np.uint64)) def testInvertPermutation(self): - self._assertOpOutputMatchesExpected( - array_ops.invert_permutation, - np.array([1, 2, 0], np.int32), - expected=np.array([2, 0, 1], dtype=np.int32)) + for np_dtype in [np.int32, np.int64]: + self._assertOpOutputMatchesExpected( + array_ops.invert_permutation, + np.array([1, 2, 0], np_dtype), + expected=np.array([2, 0, 1], dtype=np_dtype)) def testInvertPermutationTwiceIsNoop(self): - self._assertOpOutputMatchesExpected( - lambda x: array_ops.invert_permutation(array_ops.invert_permutation(x)), - np.array([1, 2, 0], np.int32), - expected=np.array([1, 2, 0], dtype=np.int32)) + + def invert_twice(x): + return array_ops.invert_permutation(array_ops.invert_permutation(x)) + + for np_dtype in [np.int32, np.int64]: + self._assertOpOutputMatchesExpected( + invert_twice, + np.array([1, 2, 0], np_dtype), + expected=np.array([1, 2, 0], dtype=np_dtype)) def testRank(self): rank_op = lambda x: array_ops.rank_internal(x, optimize=False) diff --git a/tensorflow/compiler/tf2xla/kernels/transpose_op.cc b/tensorflow/compiler/tf2xla/kernels/transpose_op.cc index 65569576d41..9a4722d149e 100644 --- a/tensorflow/compiler/tf2xla/kernels/transpose_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/transpose_op.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/compiler/xla/client/xla_builder.h" +#include "tensorflow/compiler/xla/primitive_util.h" #include "tensorflow/core/framework/bounds_check.h" #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/register_types.h" @@ -110,11 +111,11 @@ REGISTER_XLA_OP(Name("ConjugateTranspose").CompileTimeConstantInput("perm"), // InvertPermutation frequently forms part of the gradient of Transpose. // -// inv = InvertPermutationOp(T p) takes a permutation of +// inv = InvertPermutationOp(p) takes a permutation of // integers 0, 1, ..., n - 1 and returns the inverted // permutation of p. I.e., inv[p[i]] == i, for i in [0 .. n). // -// REQUIRES: input is a vector of int32. +// REQUIRES: input is a vector of int32 or int64. // REQUIRES: input is a permutation of 0, 1, ..., n-1. class InvertPermutationOp : public XlaOpKernel { @@ -122,11 +123,32 @@ class InvertPermutationOp : public XlaOpKernel { explicit InvertPermutationOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} void Compile(XlaOpKernelContext* ctx) override { + DataType dtype = ctx->expected_output_dtype(0); + Status status; + switch (dtype) { + case DT_INT32: + InvertPermutation(ctx); + break; + case DT_INT64: + InvertPermutation(ctx); + break; + default: + // This should never happen since we restrict this kernel to only match + // inputs with supported Tensor datatype. + OP_REQUIRES_OK(ctx, errors::InvalidArgument( + "InvertPermutation expects x as either ", + "int32 or int64, not ", DataTypeString(dtype))); + } + } + + template + void InvertPermutation(XlaOpKernelContext* ctx) { OP_REQUIRES(ctx, FastBoundsCheck(ctx->InputShape(0).num_elements(), - std::numeric_limits::max()), - errors::InvalidArgument("permutation of nonnegative int32s " - "must have <= int32 max elements")); + std::numeric_limits::max()), + errors::InvalidArgument( + "permutation of nonnegative integers must have <= ", + std::numeric_limits::max(), " elements")); auto e = ctx->InputExpression(0); auto tensor_or_status = e.ResolveConstant(ctx->compiler()->client()); @@ -142,7 +164,7 @@ class InvertPermutationOp : public XlaOpKernel { int size = perm.size(); - std::vector output(size); + std::vector output(size); std::fill_n(output.data(), size, -1); for (int i = 0; i < size; ++i) { const int64 d = perm[i]; @@ -153,11 +175,13 @@ class InvertPermutationOp : public XlaOpKernel { output[d] = i; } - ctx->SetOutput(0, xla::ConstantR1(ctx->builder(), output)); + ctx->SetOutput(0, xla::ConstantR1(ctx->builder(), output)); } else { auto indices = ctx->Input(0); - int size = ctx->InputShape(0).num_elements(); - auto iota = xla::Iota(ctx->builder(), xla::S32, size); + T size = ctx->InputShape(0).num_elements(); + auto iota = + xla::Iota(ctx->builder(), + xla::primitive_util::NativeToPrimitiveType(), size); auto result = XlaScatter(iota, iota, indices, /*indices_are_vectors=*/false, /*combiner=*/{}, ctx->builder()); @@ -167,8 +191,9 @@ class InvertPermutationOp : public XlaOpKernel { } }; -REGISTER_XLA_OP(Name("InvertPermutation").TypeConstraint("T", DT_INT32), - InvertPermutationOp); +REGISTER_XLA_OP( + Name("InvertPermutation").TypeConstraint("T", {DT_INT32, DT_INT64}), + InvertPermutationOp); } // namespace } // namespace tensorflow From 8c739441c35b477ddb1f494b4590d43d7cf49afe Mon Sep 17 00:00:00 2001 From: Meghna Natraj Date: Tue, 14 Jul 2020 10:33:42 -0700 Subject: [PATCH 0384/2522] Support integer input and output type for Quantize-Aware Trained models PiperOrigin-RevId: 321186099 Change-Id: I1b45ea11e139efcfdd5ff4520c61fd11d20cba43 --- RELEASE.md | 3 +- tensorflow/lite/python/BUILD | 14 ++ tensorflow/lite/python/lite.py | 29 ++- tensorflow/lite/python/lite_v2_test.py | 45 ++--- tensorflow/lite/python/util.py | 265 +++++++++++++++++++++++++ tensorflow/lite/python/util_test.py | 163 +++++++++++++++ 6 files changed, 488 insertions(+), 31 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 150c7077349..2581b9388ff 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -42,7 +42,8 @@ * `tf.function`/AutoGraph: * * `tf.lite`: - * + * `TFLiteConverter`: + * Support optional flags `inference_input_type` and `inference_output_type` for full integer quantized models. This allows users to modify the model input and output type to integer types (tf.int8, tf.uint8) instead of defaulting to float type (tf.float32). * `tf.random`: * * Math and Linear Algebra: diff --git a/tensorflow/lite/python/BUILD b/tensorflow/lite/python/BUILD index dfcf46baa90..f144ce39d50 100644 --- a/tensorflow/lite/python/BUILD +++ b/tensorflow/lite/python/BUILD @@ -215,8 +215,11 @@ py_library( deps = [ ":lite_constants", ":op_hint", + ":schema_py", "//tensorflow/python:tf_optimizer", "//tensorflow/python/eager:wrap_function", + "@absl_py//absl/logging", + "@flatbuffers//:runtime_py", "@six_archive//:six", ], ) @@ -230,9 +233,20 @@ py_test( "no_windows", ], deps = [ + ":lite_constants", ":util", + "//tensorflow:tensorflow_py", + "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:convert_to_constants", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:session", + "//third_party/py/numpy", + "@absl_py//absl/testing:parameterized", "@six_archive//:six", ], ) diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index e919aa4b00f..a08b40bbed6 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -61,6 +61,7 @@ from tensorflow.lite.python.util import get_grappler_config as _get_grappler_con from tensorflow.lite.python.util import get_tensor_name as _get_tensor_name from tensorflow.lite.python.util import get_tensors_from_tensor_names as _get_tensors_from_tensor_names from tensorflow.lite.python.util import is_frozen_graph as _is_frozen_graph +from tensorflow.lite.python.util import modify_integer_quantized_model_io_type as _modify_integer_quantized_model_io_type from tensorflow.lite.python.util import run_graph_optimizations as _run_graph_optimizations from tensorflow.lite.python.util import set_tensor_shapes as _set_tensor_shapes from tensorflow.python import keras as _keras @@ -314,6 +315,23 @@ class QuantizationMode(object): else: return False, None + def flags_modify_model_io_type( + self, input_type=constants.FLOAT, output_type=constants.FLOAT): + """Flags for modifying the input and output type of a tflite model.""" + is_post_training_quantize = self.quantizer_flags(input_type, output_type)[0] + is_training_time_only_quantize = self.training_time_int8_allow_float() and \ + not is_post_training_quantize + + # TODO(b/153576658): Consolidate post/during training quantization workflows + # to modify model input/output type after MLIR conversion. + if is_training_time_only_quantize: + return { + "inference_input_type": input_type, + "inference_output_type": output_type, + } + else: + return None + # Below are helpers for the above functions. def _validate_int8_required(self): @@ -557,9 +575,8 @@ class TFLiteConverterBaseV2(TFLiteConverterBase): def _validate_inference_input_output_types(self, quant_mode): """Validate inference_input_type and inference_output_type flags.""" default_types = [constants.FLOAT, None] - # We only support integer types for post training integer quantization - # as we have statistical information to quantize the input and output. - if quant_mode.is_post_training_integer_quantize(): + # We support integer input/output for integer quantized models only. + if quant_mode.training_time_int8_allow_float(): all_types = default_types + [constants.INT8, constants.QUANTIZED_UINT8] if self.inference_input_type not in all_types or \ self.inference_output_type not in all_types: @@ -643,6 +660,12 @@ class TFLiteConverterBaseV2(TFLiteConverterBase): if calibrate_and_quantize: result = self._calibrate_quantize_model(result, **flags) + flags_modify_model_io_type = quant_mode.flags_modify_model_io_type( + self.inference_input_type, self.inference_output_type) + if flags_modify_model_io_type: + result = _modify_integer_quantized_model_io_type( + result, **flags_modify_model_io_type) + if self._experimental_sparsify_model: result = _mlir_sparsify(result) diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py index 6fab4fd6086..4093a9d5bb4 100644 --- a/tensorflow/lite/python/lite_v2_test.py +++ b/tensorflow/lite/python/lite_v2_test.py @@ -374,8 +374,12 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): return tf.keras.Sequential(QLinear(3, input_shape=(2,))) + @parameterized.named_parameters( + ('_DefaultFLOAT32InputOutput', lite.constants.FLOAT), + ('_INT8InputOutput', lite.constants.INT8), + ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8)) @test_util.run_v2_only - def testTrainingTimeQuantization(self): + def testTrainingTimeQuantization(self, inference_input_output_type): model = self._getTrainingTimeQuantizedModel() float_converter = lite.TFLiteConverterV2.from_keras_model(model) @@ -384,37 +388,24 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): quantized_converter = lite.TFLiteConverterV2.from_keras_model(model) quantized_converter.optimizations = [lite.Optimize.DEFAULT] + quantized_converter.inference_input_type = inference_input_output_type + quantized_converter.inference_output_type = inference_input_output_type quantized_tflite = quantized_converter.convert() self.assertTrue(quantized_tflite) - # Ensure that the quantized weights tflite model is smaller. - self.assertLess(len(quantized_tflite), len(float_tflite)) - interpreter = Interpreter(model_content=quantized_tflite) - self.assertEqual(np.float32, interpreter.get_input_details()[0]['dtype']) + interpreter.allocate_tensors() + input_details = interpreter.get_input_details() + self.assertLen(input_details, 1) + self.assertEqual(inference_input_output_type.as_numpy_dtype, + input_details[0]['dtype']) + output_details = interpreter.get_output_details() + self.assertLen(output_details, 1) + self.assertEqual(inference_input_output_type.as_numpy_dtype, + output_details[0]['dtype']) - @parameterized.named_parameters( - ('_INT8InputOutput', lite.constants.INT8), - ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8)) - def testInvalidTrainingTimeQuantization(self, inference_input_output_type): - # We currently don't support integer inference_input_type and - # inference_output_type flags for training time quantization. - - model = self._getTrainingTimeQuantizedModel() - - converter = lite.TFLiteConverterV2.from_keras_model(model) - tflite_model = converter.convert() - self.assertTrue(tflite_model) - - quantized_converter = lite.TFLiteConverterV2.from_keras_model(model) - quantized_converter.optimizations = [lite.Optimize.DEFAULT] - with self.assertRaises(ValueError) as error: - quantized_converter.inference_input_type = inference_input_output_type - quantized_converter.inference_output_type = inference_input_output_type - quantized_converter.convert() - self.assertEqual( - 'The inference_input_type and inference_output_type ' - 'must be tf.float32.', str(error.exception)) + # Ensure that the quantized tflite model is smaller. + self.assertLess(len(quantized_tflite), len(float_tflite)) @test_util.run_v2_only def testNewQuantizer(self): diff --git a/tensorflow/lite/python/util.py b/tensorflow/lite/python/util.py index ff7caad0f88..9f84681c12b 100644 --- a/tensorflow/lite/python/util.py +++ b/tensorflow/lite/python/util.py @@ -19,15 +19,21 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import copy import datetime import sys +from absl import logging + import six from six.moves import range +from flatbuffers.python import flatbuffers from tensorflow.core.protobuf import config_pb2 as _config_pb2 from tensorflow.core.protobuf import graph_debug_info_pb2 from tensorflow.core.protobuf import meta_graph_pb2 as _meta_graph_pb2 +from tensorflow.lite.python import lite_constants as _lite_constants +from tensorflow.lite.python import schema_py_generated as _schema_fb from tensorflow.lite.python.op_hint import convert_op_hints_to_stubs from tensorflow.lite.python.op_hint import find_all_hinted_output_nodes from tensorflow.lite.toco import types_pb2 as _types_pb2 @@ -55,6 +61,25 @@ _MAP_TF_TO_TFLITE_TYPES = { dtypes.bool: _types_pb2.BOOL, } +_MAP_TFLITE_ENUM_TO_TF_TYPES = { + 0: dtypes.float32, + 1: dtypes.float16, + 2: dtypes.int32, + 3: dtypes.uint8, + 4: dtypes.int64, + 5: dtypes.string, + 6: dtypes.bool, + 7: dtypes.int16, + 8: dtypes.complex64, + 9: dtypes.int8, + 10: dtypes.float64, +} + +_TFLITE_FILE_IDENTIFIER = b"TFL3" + +_TFLITE_MODEL_INPUT_OUTPUT_TYPES = (_lite_constants.FLOAT, _lite_constants.INT8, + _lite_constants.QUANTIZED_UINT8) + def convert_dtype_to_tflite_type(tf_dtype): """Converts tf.dtype to TFLite proto type. @@ -74,6 +99,31 @@ def convert_dtype_to_tflite_type(tf_dtype): return result +def _convert_tflite_enum_type_to_tf_type(tflite_enum_type): + """Converts tflite enum type (eg: 0) to tf type (eg: tf.float32). + + Args: + tflite_enum_type: tflite enum type (eg: 0, that corresponds to float32) + + Raises: + ValueError: If an invalid tflite enum type is provided. + + Returns: + tf type (eg: tf.float32) + """ + tf_type = _MAP_TFLITE_ENUM_TO_TF_TYPES.get(tflite_enum_type) + if tf_type is None: + raise ValueError( + "Unsupported enum {}. The valid map of enum to tf.dtypes is : {}" + .format(tflite_enum_type, _MAP_TFLITE_ENUM_TO_TF_TYPES)) + return tf_type + + +def _get_dtype_name(tf_type): + """Converts tf.dtype (eg: tf.float32) to str (eg: "tf.float32").""" + return "tf." + tf_type.name + + def get_tensor_name(tensor): """Returns name of the input tensor. @@ -514,3 +564,218 @@ extern const int {array_name}_len; license_text=license_text) return source_text, header_text + + +def _convert_model_from_bytearray_to_object(model_bytearray): + """Converts a tflite model from a bytearray into a parsable object.""" + model_object = _schema_fb.Model.GetRootAsModel(model_bytearray, 0) + model_object = _schema_fb.ModelT.InitFromObj(model_object) + model_object = copy.deepcopy(model_object) + model_object.subgraphs[0].inputs[0] = model_object.subgraphs[0].inputs[0] + return model_object + + +def _convert_model_from_object_to_bytearray(model_object): + """Converts a tflite model from a parsable object into a bytearray.""" + # Initial size of the buffer, which will grow automatically if needed + builder = flatbuffers.Builder(1024) + model_offset = model_object.Pack(builder) + builder.Finish(model_offset, file_identifier=_TFLITE_FILE_IDENTIFIER) + return bytes(builder.Output()) + + +def _remove_tensors_from_model(model, remove_tensors_idxs): + """Remove tensors from model.""" + if not remove_tensors_idxs: + return + if len(model.subgraphs) > 1: + raise ValueError("Model must only have one subgraph. Instead, it has " + "{} subgraphs.".format(len(model.subgraphs))) + subgraph = model.subgraphs[0] + tensors = subgraph.tensors + operators = subgraph.operators + + logging.debug("Removing tensors at indices : %s", remove_tensors_idxs) + # An optimized check to validate if "remove_tensors_idxs" (eg: [4,5,6]) is an + # exact subset, with ordering, of "tensors" indices (eg: [0,1,2,3,4,5,6]). + if min(remove_tensors_idxs) == len(tensors) - len(remove_tensors_idxs): + logging.debug("Removing tensors only at the end of the tensor list") + del tensors[min(remove_tensors_idxs):] + else: + logging.debug("Removing tensors requires updating the model") + # Map the old tensor indices to new tensor indices + d_old_to_new_tensors = {} + left_shift_by = 0 + for idx in range(len(tensors)): + if idx in remove_tensors_idxs: + left_shift_by += 1 + else: + d_old_to_new_tensors[idx] = idx - left_shift_by + logging.debug("Old to new tensors map: %s", d_old_to_new_tensors.__str__()) + # Update tensor indices referenced throughout the model + def update_tensors(tensor_idxs): + for i, ti in enumerate(tensor_idxs): + tensor_idxs[i] = d_old_to_new_tensors.get(ti, -1) + update_tensors(subgraph.inputs) + update_tensors(subgraph.outputs) + for op in operators: + update_tensors(op.inputs) + update_tensors(op.outputs) + # Delete the tensors + for idx in sorted(remove_tensors_idxs, reverse=True): + tensors.pop(idx) + logging.debug("Removed tensors marked for deletion") + + +def _validate_and_find_int8_quantized_inputs_outputs(model): + """Validate that model input is quantized and output is dequantized.""" + if len(model.subgraphs) > 1: + raise ValueError("Model must only have one subgraph. Instead, it has " + "{} subgraphs.".format(len(model.subgraphs))) + subgraph = model.subgraphs[0] + tensors = subgraph.tensors + operators = subgraph.operators + + # Ensure model has atleast one quantize and dequantize operator + quant_opcode_idx, dequant_opcode_idx = None, None + for idx, opcode in enumerate(model.operatorCodes): + if opcode.builtinCode == _schema_fb.BuiltinOperator.QUANTIZE: + quant_opcode_idx = idx + elif opcode.builtinCode == _schema_fb.BuiltinOperator.DEQUANTIZE: + dequant_opcode_idx = idx + if quant_opcode_idx is not None and dequant_opcode_idx is not None: + break + if quant_opcode_idx is None and dequant_opcode_idx is None: + raise ValueError("Model is not integer quantized as it does not " + "contain quantize/dequantize operators.") + + # Ensure model inputs and outputs are integer quantized + input_quant_ops, output_dequant_ops = [], [] + for op in operators: + # Find input quantize operator + if op.opcodeIndex == quant_opcode_idx and op.inputs[0] in subgraph.inputs: + pos, float_tensor, int_tensor = \ + "input", tensors[op.inputs[0]], tensors[op.outputs[0]] + input_quant_ops.append(op) + # Find output dequantize operator + elif op.opcodeIndex == dequant_opcode_idx and \ + op.outputs[0] in subgraph.outputs: + pos, float_tensor, int_tensor = \ + "output", tensors[op.outputs[0]], tensors[op.inputs[0]] + output_dequant_ops.append(op) + # Otherwise, ignore + else: + continue + # If found, validate the input/output tensor type + if float_tensor.type != _schema_fb.TensorType.FLOAT32: + raise ValueError( + "Model {} type must be tf.float32. Expected type for tensor with " + "name '{}' is tf.float32, instead type is tf.{}".format( + pos, float_tensor.name, + _convert_tflite_enum_type_to_tf_type(float_tensor.type).name)) + if int_tensor.type != _schema_fb.TensorType.INT8: + raise ValueError( + "Model is not integer quantized. Expected type for tensor with " + "name '{}' is tf.int8, instead type is tf.{}".format( + int_tensor.name, + _convert_tflite_enum_type_to_tf_type(int_tensor.type).name)) + + return input_quant_ops, output_dequant_ops + + +def modify_integer_quantized_model_io_type( + model, inference_input_type=_lite_constants.FLOAT, + inference_output_type=_lite_constants.FLOAT): + """Modify the float input/output type of an integer quantized model. + + Args: + model: An int8 quantized tflite model with float input and output. + inference_input_type: tf.DType representing final input type. + (default tf.float32) + inference_output_type: tf.DType representing final output type. + (default tf.float32) + + Returns: + An int8 quantized tflite model with modified input and/or output type. + + Raises: + ValueError: If the model is not int8 quantized or the inference_input_type + and/or inference_input_type is unsupported. + RuntimeError: If the modification was unsuccessful. + + """ + # Return if input and output types default to float + if inference_input_type == _lite_constants.FLOAT and \ + inference_output_type == _lite_constants.FLOAT: + return model + + # Validate input and output types + if inference_input_type not in _TFLITE_MODEL_INPUT_OUTPUT_TYPES: + raise ValueError("The `inference_input_type` should be in {}".format( + tuple(_get_dtype_name(t) for t in _TFLITE_MODEL_INPUT_OUTPUT_TYPES))) + if inference_output_type not in _TFLITE_MODEL_INPUT_OUTPUT_TYPES: + raise ValueError("The `inference_output_type` should be in {}".format( + tuple(_get_dtype_name(t) for t in _TFLITE_MODEL_INPUT_OUTPUT_TYPES))) + + logging.debug(("Attempting to modify the model input from tf.float32 to %s " + "and output from tf.float32 to %s"), + _get_dtype_name(inference_input_type), + _get_dtype_name(inference_output_type)) + # Convert the model to an object + model = _convert_model_from_bytearray_to_object(model) + + # Validate the integer quantized model + input_quant_ops, output_dequant_ops = \ + _validate_and_find_int8_quantized_inputs_outputs(model) + + # Initialize references and variables + if len(model.subgraphs) > 1: + raise ValueError("Model must only have one subgraph. Instead, it has " + "{} subgraphs.".format(len(model.subgraphs))) + subgraph = model.subgraphs[0] + tensors = subgraph.tensors + operators = subgraph.operators + remove_tensors_idxs = set() + + # Modify model input type + if inference_input_type == _lite_constants.QUANTIZED_UINT8: + # Change quant op (float to int8) to quant op (uint8 to int8) + for op in input_quant_ops: + int8_quantization = tensors[op.outputs[0]].quantization + uint8_quantization = _schema_fb.QuantizationParametersT() + uint8_quantization.scale = [int8_quantization.scale[0]] + uint8_quantization.zeroPoint = [int8_quantization.zeroPoint[0] + 128] + tensors[op.inputs[0]].quantization = uint8_quantization + tensors[op.inputs[0]].type = _schema_fb.TensorType.UINT8 + elif inference_input_type == _lite_constants.INT8: + # Remove the inputs and the quant operator + for op in input_quant_ops: + subgraph.inputs[subgraph.inputs == op.inputs[0]] = op.outputs[0] + remove_tensors_idxs.add(op.inputs[0]) + operators.remove(op) + + # Modify model output type + if inference_output_type == _lite_constants.QUANTIZED_UINT8: + # Change dequant op (int8 to float) to quant op (int8 to uint8) + for op in output_dequant_ops: + op.opcodeIndex = input_quant_ops[0].opcodeIndex + int8_quantization = tensors[op.inputs[0]].quantization + uint8_quantization = _schema_fb.QuantizationParametersT() + uint8_quantization.scale = [int8_quantization.scale[0]] + uint8_quantization.zeroPoint = [int8_quantization.zeroPoint[0] + 128] + tensors[op.outputs[0]].quantization = uint8_quantization + tensors[op.outputs[0]].type = _schema_fb.TensorType.UINT8 + elif inference_output_type == _lite_constants.INT8: + # Remove the outputs and the dequant operator + for op in output_dequant_ops: + subgraph.outputs[subgraph.outputs == op.outputs[0]] = op.inputs[0] + remove_tensors_idxs.add(op.outputs[0]) + operators.remove(op) + + # Remove tensors marked for deletion. + _remove_tensors_from_model(model, remove_tensors_idxs) + + # Convert the model to a bytearray + model = _convert_model_from_object_to_bytearray(model) + + return model diff --git a/tensorflow/lite/python/util_test.py b/tensorflow/lite/python/util_test.py index f3c287dd7fc..0e9cbc1e58a 100644 --- a/tensorflow/lite/python/util_test.py +++ b/tensorflow/lite/python/util_test.py @@ -19,7 +19,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from absl.testing import parameterized +import numpy as np from six.moves import range +import tensorflow as tf from tensorflow.lite.python import lite_constants from tensorflow.lite.python import util @@ -61,6 +64,31 @@ class UtilTest(test_util.TensorFlowTestCase): self.assertEqual( util.convert_dtype_to_tflite_type(dtypes.bool), _types_pb2.BOOL) + def testConvertEnumToDtype(self): + self.assertEqual( + util._convert_tflite_enum_type_to_tf_type(0), dtypes.float32) + self.assertEqual( + util._convert_tflite_enum_type_to_tf_type(1), dtypes.float16) + self.assertEqual(util._convert_tflite_enum_type_to_tf_type(2), dtypes.int32) + self.assertEqual(util._convert_tflite_enum_type_to_tf_type(3), dtypes.uint8) + self.assertEqual(util._convert_tflite_enum_type_to_tf_type(4), dtypes.int64) + self.assertEqual( + util._convert_tflite_enum_type_to_tf_type(5), dtypes.string) + self.assertEqual(util._convert_tflite_enum_type_to_tf_type(6), dtypes.bool) + self.assertEqual(util._convert_tflite_enum_type_to_tf_type(7), dtypes.int16) + self.assertEqual( + util._convert_tflite_enum_type_to_tf_type(8), dtypes.complex64) + self.assertEqual(util._convert_tflite_enum_type_to_tf_type(9), dtypes.int8) + self.assertEqual( + util._convert_tflite_enum_type_to_tf_type(10), dtypes.float64) + with self.assertRaises(ValueError) as error: + util._convert_tflite_enum_type_to_tf_type(11) + self.assertEqual( + "Unsupported enum 11. The valid map of enum to tf.dtypes is : " + "{0: tf.float32, 1: tf.float16, 2: tf.int32, 3: tf.uint8, 4: tf.int64, " + "5: tf.string, 6: tf.bool, 7: tf.int16, 8: tf.complex64, 9: tf.int8, " + "10: tf.float64}", str(error.exception)) + def testTensorName(self): with ops.Graph().as_default(): in_tensor = array_ops.placeholder(shape=[4], dtype=dtypes.float32) @@ -195,5 +223,140 @@ class TensorFunctionsTest(test_util.TensorFlowTestCase): self.assertEqual([None, 3, 5], tensor.shape.as_list()) +def _generate_integer_tflite_model(): + """Define an integer post-training quantized tflite model.""" + # Load MNIST dataset + n = 10 # Number of samples + (train_images, train_labels), (test_images, test_labels) = \ + tf.keras.datasets.mnist.load_data() + train_images, train_labels, test_images, test_labels = \ + train_images[:n], train_labels[:n], test_images[:n], test_labels[:n] + + # Normalize the input image so that each pixel value is between 0 to 1. + train_images = train_images / 255.0 + test_images = test_images / 255.0 + + # Define TF model + model = tf.keras.Sequential([ + tf.keras.layers.InputLayer(input_shape=(28, 28)), + tf.keras.layers.Reshape(target_shape=(28, 28, 1)), + tf.keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu"), + tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(10) + ]) + + # Train + model.compile( + optimizer="adam", + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=["accuracy"]) + + model.fit( + train_images, + train_labels, + epochs=1, + validation_split=0.1, + ) + + # Convert TF Model to an Integer Quantized TFLite Model + converter = tf.lite.TFLiteConverter.from_keras_model(model) + converter.optimizations = {tf.lite.Optimize.DEFAULT} + def representative_dataset_gen(): + for _ in range(2): + yield [ + np.random.uniform(low=0, high=1, size=(1, 28, 28)).astype( + np.float32) + ] + converter.representative_dataset = representative_dataset_gen + converter.target_spec.supported_ops = {tf.lite.OpsSet.TFLITE_BUILTINS_INT8} + tflite_model = converter.convert() + + return tflite_model + + +def _test_param_modify_integer_model_io_type(): + """Function to generate parameterized inputs for testing.""" + params = [] + str_template = "_{}{}{}" + map_model_type = { + "PostTraining": True, + # "DuringTraining": False, + } + map_types = { + "": lite_constants.FLOAT, + "INT8": lite_constants.INT8, + "UINT8": lite_constants.QUANTIZED_UINT8 + } + for k1, v1 in map_model_type.items(): + for k2, v2 in map_types.items(): + istr = "_Input{}".format(k2) if k2 else "" + for k3, v3 in map_types.items(): + ostr = "_Output{}".format(k3) if k3 else "" if istr else "_NoUpdate" + params.append((str_template.format(k1, istr, ostr), v1, v2, v3)) + return params + + +# TODO(b/161174063): Merge tests for integer input/output type +class UtilModifyIntegerQuantizedModelIOTypeTest( + test_util.TensorFlowTestCase, parameterized.TestCase): + + @classmethod + def setUpClass(cls): + super(UtilModifyIntegerQuantizedModelIOTypeTest, cls).setUpClass() + cls.post_train_integer_model = _generate_integer_tflite_model() + + @parameterized.named_parameters(_test_param_modify_integer_model_io_type()) + def test(self, is_post_train, in_tftype, out_tftype): + """Modify the float input/output type of an integer quantized model.""" + + def _run_tflite_inference(model, in_tftype, out_tftype): + """Run inference on a model with a specific input/output type.""" + # Load TFLite model and allocate tensors. + interpreter = tf.lite.Interpreter(model_content=model) + interpreter.allocate_tensors() + input_details = interpreter.get_input_details()[0] + output_details = interpreter.get_output_details()[0] + + # Validate TFLite model input and output types + self.assertEqual(input_details["dtype"], in_tftype.as_numpy_dtype) + self.assertEqual(output_details["dtype"], out_tftype.as_numpy_dtype) + + # Define Input + np.random.seed(0) + input_data = np.random.uniform(low=0, high=1, size=(1, 28, 28)) + input_data = input_data.astype(np.float32) + if input_details["dtype"] != np.float32: + # quantize float to int + scale, zero_point = input_details["quantization"] + input_data = input_data / scale + zero_point + input_data = input_data.astype(input_details["dtype"]) + + # Run Inference + interpreter.set_tensor(input_details["index"], input_data) + interpreter.invoke() + + # Get output + output_data = interpreter.get_tensor(output_details["index"])[0] + if output_details["dtype"] != np.float32: + # dequantize int to float + scale, zero_point = output_details["quantization"] + output_data = output_data.astype(np.float32) + output_data = (output_data - zero_point) * scale + + return output_data + + model = self.__class__.post_train_integer_model if is_post_train else None + # Run model inference with float input output type + output_data = _run_tflite_inference(model, tf.float32, tf.float32) + # Run model inference with modified integer input output type + model_io = util.modify_integer_quantized_model_io_type( + model, in_tftype, out_tftype) + output_io_data = _run_tflite_inference(model_io, in_tftype, out_tftype) + + # Validate that both the outputs are the same + self.assertTrue(np.allclose(output_data, output_io_data, atol=1.0)) + + if __name__ == "__main__": test.main() From a734bfd83f49de3e093fdfb014cbca7c3f125d36 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Tue, 14 Jul 2020 10:47:43 -0700 Subject: [PATCH 0385/2522] Update function argument resolution to allow caller to control the argument types, based on the module, function and argument names and its annotated type, if available. Update the resolver interface for greater consistency, and to move the responsibility for any name lookup outside the inference logic itself. PiperOrigin-RevId: 321189240 Change-Id: I76830c9582a3f388845712772ffefe1f063aebe0 --- .../pyct/static_analysis/activity.py | 15 +++-- .../pyct/static_analysis/type_inference.py | 65 +++++++------------ .../static_analysis/type_inference_test.py | 18 ++--- 3 files changed, 44 insertions(+), 54 deletions(-) diff --git a/tensorflow/python/autograph/pyct/static_analysis/activity.py b/tensorflow/python/autograph/pyct/static_analysis/activity.py index 0e19da87451..a3228c0a1cc 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/activity.py +++ b/tensorflow/python/autograph/pyct/static_analysis/activity.py @@ -57,6 +57,7 @@ class Scope(object): the terminology of the Python 3 reference documentation, True roughly represents an actual scope, whereas False represents an ordinary code block. + function_name: Optional[str], name of the function owning this scope. isolated_names: Set[qual_names.QN], identifiers that are isolated to this scope (even if the scope is not isolated). annotations: Set[qual_names.QN], identifiers used as type annotations @@ -94,7 +95,7 @@ class Scope(object): # Note: this mutable-immutable pattern is used because using a builder would # have taken a lot more boilerplate. - def __init__(self, parent, isolated=True): + def __init__(self, parent, isolated=True, function_name=None): """Create a new scope. Args: @@ -102,9 +103,11 @@ class Scope(object): isolated: Whether the scope is isolated, that is, whether variables modified in this scope should be considered modified in the parent scope. + function_name: Name of the function owning this scope. """ self.parent = parent self.isolated = isolated + self.function_name = function_name self.isolated_names = set() @@ -321,8 +324,8 @@ class ActivityAnalyzer(transformer.Base): raise ValueError('Unknown context {} for node "{}".'.format( type(node.ctx), qn)) - def _enter_scope(self, isolated): - self.scope = Scope(self.scope, isolated=isolated) + def _enter_scope(self, isolated, f_name=None): + self.scope = Scope(self.scope, isolated=isolated, function_name=f_name) def _exit_scope(self): exited_scope = self.scope @@ -580,10 +583,10 @@ class ActivityAnalyzer(transformer.Base): self._exit_and_record_scope(node) # A separate Scope tracks the actual function definition. - self._enter_scope(True) + self._enter_scope(True, node.name) # Keep a separate scope for the arguments node, which is used in the CFG. - self._enter_scope(False) + self._enter_scope(False, node.name) # Arg declarations only affect the function itself, and have no effect # in the defining context whatsoever. @@ -593,7 +596,7 @@ class ActivityAnalyzer(transformer.Base): # Track the body separately. This is for compatibility reasons, it may not # be strictly needed. - self._enter_scope(False) + self._enter_scope(False, node.name) node.body = self.visit_block(node.body) self._exit_and_record_scope(node, NodeAnno.BODY_SCOPE) diff --git a/tensorflow/python/autograph/pyct/static_analysis/type_inference.py b/tensorflow/python/autograph/pyct/static_analysis/type_inference.py index f684d041c90..f8ddbe4e91c 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/type_inference.py +++ b/tensorflow/python/autograph/pyct/static_analysis/type_inference.py @@ -36,16 +36,28 @@ from tensorflow.python.autograph.pyct.static_analysis import annos class Resolver(object): - """Resolvers allow customizing the process of identifying types.""" + """Resolver objects handle the process of looking up actual names and types. - def resolve_external_type(self, type_): + All resolve_* methods take: + * a first namespace argument, mapping string to actual values + * one or more name arguments, as QN objects + + All resolve_* methods must return either: + * a set of `type` objects + * None + """ + + def resolve_external_name(self, ns, name): + """Resolves the type an external (e.g. closure, global) variable.""" raise NotImplementedError('subclasses must implement') - def resolve_external_value(self, value): + def resolve_external_call(self, ns, name): + """Resolves the return type an external function call.""" + # TODO(mdan): This must accept argument value/types. raise NotImplementedError('subclasses must implement') - def resolve_external_function_call(self, fn): - # TODO(mdan)): This must accept value/types to arguments + def resolve_external_arg(self, ns, f_name, arg_name, type_anno): + """Resolves the type of a (possibly annotated) function argument.""" raise NotImplementedError('subclasses must implement') # TODO(mdan): More resolvers as needed. @@ -95,13 +107,6 @@ class _SymbolTable(object): return 'SymbolTable {}'.format(self.value) -# These special names don't normally show up in globals. -SPECIAL_NAMES = { - 'int': int, - 'float': float, -} - - class Analyzer(cfg.GraphVisitor): """CFG visitor that performs type inference at statement level.""" @@ -122,15 +127,6 @@ class Analyzer(cfg.GraphVisitor): def init_state(self, _): return _SymbolTable() - def _static_value(self, qn): - """Looks up a name in the namespace.""" - # TODO(mdan): This needs to be updated to work for composite symbols. - name = str(qn) - value = self.namespace.get(name, None) - if value is None: - return SPECIAL_NAMES.get(name, None) - return value - def _infer_type(self, node, types_in): """Infers the return type of an expression.""" if isinstance(node, gast.Name): @@ -139,22 +135,15 @@ class Analyzer(cfg.GraphVisitor): types = types_in.value.get(name, None) if types is not None: return types - # If type is unknown, attempt to look the symbol up in the namespace. + # If type is unknown, resolve it. if name not in self.scope.bound: - # TODO(mdan): Might still be able to do something for bound symbols. - static_value = self._static_value(name) - if static_value is not None: - return {self.resolver.resolve_external_value(static_value)} + return self.resolver.resolve_external_name(self.namespace, name) return None if isinstance(node, gast.Call): - # Function calls: infer their return type. + # Function calls: resolve their return type. f_name = anno.getanno(node.func, anno.Basic.QN) - static_value = self._static_value(f_name) - # TODO(mdan): This needs to be updated to work for composite symbols. - if static_value is None: - raise ValueError('cannot infer return type of {}'.format(f_name)) - return {self.resolver.resolve_external_function_call(static_value)} + return self.resolver.resolve_external_call(self.namespace, f_name) else: raise NotImplementedError(node) @@ -178,15 +167,11 @@ class Analyzer(cfg.GraphVisitor): assert isinstance(node, gast.Name) name = anno.getanno(node, anno.Basic.QN) type_name = anno.getanno(node.annotation, anno.Basic.QN, None) - if type_name is None: - return () - static_value = self._static_value(type_name) - if static_value is None: - raise ValueError('cannot resolve type {}'.format(type_name)) - - type_ = self.resolver.resolve_external_type(static_value) - return (name, {type_}), + type_ = self.resolver.resolve_external_arg(self.namespace, + self.scope.function_name, name, + type_name) + return (name, type_), def _args_types(self, node): """Propagates types through argument annotations.""" diff --git a/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py b/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py index a8e956ef558..3291347396f 100644 --- a/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py +++ b/tensorflow/python/autograph/pyct/static_analysis/type_inference_test.py @@ -29,14 +29,16 @@ from tensorflow.python.platform import test class TestResolver(type_inference.Resolver): - def resolve_external_type(self, t): - return t + def resolve_external_name(self, ns, name): + return {type(ns[str(name)])} - def resolve_external_value(self, value): - return type(value) + def resolve_external_call(self, ns, f_name): + return {ns[str(f_name)].__annotations__['return']} - def resolve_external_function_call(self, fn): - return fn.__annotations__['return'] + def resolve_external_arg(self, ns, f_name, arg_name, type_anno): + if type_anno is not None: + return {{'int': int, 'float': float}[str(type_anno)]} + return {'{}_{}'.format(f_name, arg_name)} class TestTranspiler(transpiler.GenericTranspiler): @@ -69,7 +71,7 @@ class TypeInferenceAnalyzerTest(test.TestCase): fn_body = node.body self.assertTypes(fn_body[0].value.elts[0], int) - self.assertTypes(fn_body[0].value.elts[1], ()) + self.assertTypes(fn_body[0].value.elts[1], 'test_fn_b') def test_straightline_assignment(self): @@ -84,7 +86,7 @@ class TypeInferenceAnalyzerTest(test.TestCase): self.assertTypes(fn_body[0].value, int) self.assertTypes(fn_body[1].value.elts[0], int) self.assertTypes(fn_body[1].value.elts[1], int) - self.assertTypes(fn_body[1].value.elts[2], ()) + self.assertTypes(fn_body[1].value.elts[2], 'test_fn_c') def test_assignment_overwrite(self): From fdff59454e3221274ec98fb762d272b2ba5c6f81 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Tue, 14 Jul 2020 10:55:21 -0700 Subject: [PATCH 0386/2522] Remove run_v1_only / run_deprecated_v1 decorators from the legacy learning rate decay tests PiperOrigin-RevId: 321191011 Change-Id: Icc45c8eb0d9ccbf7cf11314433d414769a058e69 --- .../legacy_learning_rate_decay_test.py | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/keras/optimizer_v2/legacy_learning_rate_decay_test.py b/tensorflow/python/keras/optimizer_v2/legacy_learning_rate_decay_test.py index 19a59a64be0..65e2cc93f37 100644 --- a/tensorflow/python/keras/optimizer_v2/legacy_learning_rate_decay_test.py +++ b/tensorflow/python/keras/optimizer_v2/legacy_learning_rate_decay_test.py @@ -59,22 +59,26 @@ class LRDecayTest(test_util.TensorFlowTestCase): self.evaluate(step.assign(100)) self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - @test_util.run_deprecated_v1 + @test_util.run_in_graph_and_eager_modes def testVariables(self): step = variables.VariableV1(1) - assign_1 = step.assign(1) - assign_2 = step.assign(2) - assign_100 = step.assign(100) + decayed_lr = learning_rate_decay.exponential_decay( .1, step, 3, 0.96, staircase=True) self.evaluate(variables.global_variables_initializer()) # No change to learning rate - self.evaluate(assign_1.op) + assign_1 = step.assign(1) + if not context.executing_eagerly(): + self.evaluate(assign_1.op) self.assertAllClose(self.evaluate(decayed_lr), .1, 1e-6) - self.evaluate(assign_2.op) + assign_2 = step.assign(2) + if not context.executing_eagerly(): + self.evaluate(assign_2.op) self.assertAllClose(self.evaluate(decayed_lr), .1, 1e-6) # Decayed learning rate - self.evaluate(assign_100.op) + assign_100 = step.assign(100) + if not context.executing_eagerly(): + self.evaluate(assign_100.op) expected = .1 * 0.96**(100 // 3) self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) @@ -99,7 +103,6 @@ class LRDecayTest(test_util.TensorFlowTestCase): self.assertAllClose(self.evaluate(decayed_lr), 0.001, 1e-6) @test_util.run_in_graph_and_eager_modes - @test_util.run_v1_only("b/120545219") def testPiecewiseConstantEdgeCases(self): x_int = variables.Variable(0, dtype=variables.dtypes.int32) boundaries, values = [-1.0, 1.0], [1, 2, 3] @@ -119,7 +122,7 @@ class LRDecayTest(test_util.TensorFlowTestCase): # Test that ref types are valid. if not context.executing_eagerly(): - x = variables.VariableV1(0.0) + x = variables.VariableV1(0.0, use_resource=False) x_ref = x.op.outputs[0] # float32_ref tensor should be accepted boundaries, values = [1.0, 2.0], [1, 2, 3] learning_rate_decay.piecewise_constant(x_ref, boundaries, values) From 0dbb5392b8f31d501e3bb34056ad6f13d2538fa3 Mon Sep 17 00:00:00 2001 From: Gabriel Rasskin Date: Tue, 14 Jul 2020 11:07:12 -0700 Subject: [PATCH 0387/2522] Increase complexity of arg_def_fuzz @mihaimaruseac --- .../fuzzing/consume_leading_digits_fuzz.cc | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tensorflow/security/fuzzing/consume_leading_digits_fuzz.cc b/tensorflow/security/fuzzing/consume_leading_digits_fuzz.cc index d49bc1f2110..cb000888c02 100644 --- a/tensorflow/security/fuzzing/consume_leading_digits_fuzz.cc +++ b/tensorflow/security/fuzzing/consume_leading_digits_fuzz.cc @@ -18,21 +18,26 @@ limitations under the License. #include "tensorflow/core/platform/str_util.h" #include "tensorflow/core/platform/stringpiece.h" +#include + // This is a fuzzer for tensorflow::str_util::ConsumeLeadingDigits namespace { extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { - uint8_t *byte_data = const_cast(data); - char *char_data = reinterpret_cast(byte_data); + FuzzedDataProvider fuzzed_data(data, size); - tensorflow::StringPiece sp(char_data, size); - tensorflow::uint64 val; + while (fuzzed_data.remaining_bytes() > 0) { + std::string s = fuzzed_data.ConsumeRandomLengthString(5); + tensorflow::StringPiece sp(s); + tensorflow::uint64 val; - const bool leading_digits = - tensorflow::str_util::ConsumeLeadingDigits(&sp, &val); - if (leading_digits) { - assert(val >= 0); + const bool leading_digits = tensorflow::str_util::ConsumeLeadingDigits(&sp, &val); + const char lead_char_consume_digits = *(sp.data()); + if (leading_digits) { + assert(lead_char_consume_digits < '0' && lead_char_consume_digits > '9'); + assert(val >= 0); + } } return 0; From 000b17d9a19643b206c29accb3abbfd7de70ce9c Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Tue, 14 Jul 2020 11:02:34 -0700 Subject: [PATCH 0388/2522] Explicitly raise a (clearer) error message when models end up in invalid states due to interleaving graph and eager. In rare cases code may have run w/o crashing when in these invalid states, but it's safer to error with an explanation rather than risk silent failures/fragile behavior. PiperOrigin-RevId: 321192744 Change-Id: I9e97ac3b7cea27c9b389e5202de9f1c09a4aa2b8 --- .../python/keras/engine/base_layer_v1.py | 19 +++++++++++++++++++ tensorflow/python/keras/engine/training_v1.py | 4 ++++ 2 files changed, 23 insertions(+) diff --git a/tensorflow/python/keras/engine/base_layer_v1.py b/tensorflow/python/keras/engine/base_layer_v1.py index 724559f5823..1b9dcf50123 100644 --- a/tensorflow/python/keras/engine/base_layer_v1.py +++ b/tensorflow/python/keras/engine/base_layer_v1.py @@ -252,6 +252,9 @@ class Layer(base_layer.Layer): # might want to turn it off, like Sequential model. self._auto_track_sub_layers = True + # Mark this layer as having been originally built as a tf1 layer/model + self._originally_built_as_v1 = True + @trackable.no_automatic_dependency_tracking @generic_utils.default def build(self, input_shape): @@ -651,6 +654,8 @@ class Layer(base_layer.Layer): ValueError: if the layer's `call` method returns None (an invalid value). RuntimeError: if `super().__init__()` was not called in the constructor. """ + self._assert_built_as_v1() + if not hasattr(self, '_thread_local'): raise RuntimeError( 'You must call `super().__init__()` in the layer constructor.') @@ -818,6 +823,20 @@ class Layer(base_layer.Layer): return outputs + def _assert_built_as_v1(self): + if not hasattr(self, '_originally_built_as_v1'): + raise ValueError( + 'Your Layer or Model is in an invalid state. This can happen if you ' + 'are interleaving estimator/non-estimator models or ' + 'interleaving models/layers made in tf.compat.v1.Graph.as_default() ' + 'with models/layers created outside of it. ' + 'Converting a model to an estimator (via model_to_estimator) ' + 'invalidates all models/layers made before the conversion (even ' + 'if they were not the model converted to an estimator). ' + 'Similarly, making a layer or a model inside a ' + 'a tf.compat.v1.Graph invalidates all layers/models you previously ' + 'made outside of the graph.') + @property def dtype(self): return self._dtype_policy.variable_dtype diff --git a/tensorflow/python/keras/engine/training_v1.py b/tensorflow/python/keras/engine/training_v1.py index bf518e1e702..b7e1d21326b 100644 --- a/tensorflow/python/keras/engine/training_v1.py +++ b/tensorflow/python/keras/engine/training_v1.py @@ -302,6 +302,7 @@ class Model(training_lib.Model): ValueError: In case of invalid arguments for `optimizer`, `loss`, `metrics` or `sample_weight_mode`. """ + self._assert_built_as_v1() self._run_eagerly = kwargs.pop('run_eagerly', None) self._experimental_run_tf_function = kwargs.pop( 'experimental_run_tf_function', True) @@ -772,6 +773,7 @@ class Model(training_lib.Model): ValueError: In case of mismatch between the provided input data and what the model expects. """ + self._assert_built_as_v1() _keras_api_gauge.get_cell('fit_v1').set(True) # Legacy support if 'nb_epoch' in kwargs: @@ -892,6 +894,7 @@ class Model(training_lib.Model): Raises: ValueError: in case of invalid arguments. """ + self._assert_built_as_v1() _keras_api_gauge.get_cell('evaluate_v1').set(True) self._assert_compile_was_called() self._check_call_args('evaluate') @@ -971,6 +974,7 @@ class Model(training_lib.Model): or in case a stateful model receives a number of samples that is not a multiple of the batch size. """ + self._assert_built_as_v1() _keras_api_gauge.get_cell('predict_v1').set(True) self._check_call_args('predict') From ed6ff2af96d28e4039450dad8e8b576c2a9b7978 Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Tue, 14 Jul 2020 11:03:46 -0700 Subject: [PATCH 0389/2522] [TF-TRT] Skip segments of size 0. PiperOrigin-RevId: 321193018 Change-Id: Ic9513451f699fcbe8655b8275dd4b1dee34a3521 --- tensorflow/compiler/tf2tensorrt/segment/segment.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/tf2tensorrt/segment/segment.cc b/tensorflow/compiler/tf2tensorrt/segment/segment.cc index e7820ca41fe..9f9711f6c48 100644 --- a/tensorflow/compiler/tf2tensorrt/segment/segment.cc +++ b/tensorflow/compiler/tf2tensorrt/segment/segment.cc @@ -1031,7 +1031,8 @@ Status SegmentGraph(const Graph* tf_graph, }); // Don't use segments whose number of effective nodes is small. - if (num_effective_nodes < options.minimum_segment_size) { + if (num_effective_nodes == 0 || + num_effective_nodes < options.minimum_segment_size) { VLOG(1) << "Segment " << segments->size() << " has only " << num_effective_nodes << " effective nodes, dropping"; continue; From 9fd2e39518f637bbcee603ea2d3ccb0e9336d485 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 14 Jul 2020 11:24:45 -0700 Subject: [PATCH 0390/2522] Internal protobuf refactoring PiperOrigin-RevId: 321197814 Change-Id: If6023874e67199f9d66b6380a73838da16837005 --- tensorflow/compiler/xla/tools/hlo_module_loader.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/tools/hlo_module_loader.cc b/tensorflow/compiler/xla/tools/hlo_module_loader.cc index b3aaba7fa25..8b70b0d35a7 100644 --- a/tensorflow/compiler/xla/tools/hlo_module_loader.cc +++ b/tensorflow/compiler/xla/tools/hlo_module_loader.cc @@ -21,7 +21,6 @@ limitations under the License. #include #include -#include "google/protobuf/text_format.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "absl/strings/str_split.h" @@ -32,6 +31,7 @@ limitations under the License. #include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/regexp.h" namespace xla { @@ -87,9 +87,10 @@ StatusOr> LoadModuleFromData( return InvalidArgument("Failed to parse input as HLO protobuf binary"); } } else if (format == "pbtxt") { - if (!google::protobuf::TextFormat::ParseFromString(data, &proto) && - !google::protobuf::TextFormat::ParseFromString(data, proto.mutable_hlo()) && - !google::protobuf::TextFormat::ParseFromString( + if (!tensorflow::protobuf::TextFormat::ParseFromString(data, &proto) && + !tensorflow::protobuf::TextFormat::ParseFromString( + data, proto.mutable_hlo()) && + !tensorflow::protobuf::TextFormat::ParseFromString( data, proto.mutable_hlo()->mutable_hlo_module())) { return InvalidArgument("Failed to parse input as HLO protobuf text"); } From 003aff6323fea2f9b5328f640569a5e3af7480a7 Mon Sep 17 00:00:00 2001 From: Gabriel Rasskin Date: Tue, 14 Jul 2020 18:32:41 +0000 Subject: [PATCH 0391/2522] Change size of random string Co-authored-by: Mihai Maruseac --- tensorflow/security/fuzzing/consume_leading_digits_fuzz.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/security/fuzzing/consume_leading_digits_fuzz.cc b/tensorflow/security/fuzzing/consume_leading_digits_fuzz.cc index cb000888c02..6edf349ff26 100644 --- a/tensorflow/security/fuzzing/consume_leading_digits_fuzz.cc +++ b/tensorflow/security/fuzzing/consume_leading_digits_fuzz.cc @@ -28,7 +28,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { FuzzedDataProvider fuzzed_data(data, size); while (fuzzed_data.remaining_bytes() > 0) { - std::string s = fuzzed_data.ConsumeRandomLengthString(5); + std::string s = fuzzed_data.ConsumeRandomLengthString(25); tensorflow::StringPiece sp(s); tensorflow::uint64 val; From 603104ecf5f02cd2e4406a6a3f47cb6be9849452 Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Tue, 14 Jul 2020 14:33:32 -0400 Subject: [PATCH 0392/2522] Update topological_sort.cc --- tensorflow/core/grappler/utils/topological_sort.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/grappler/utils/topological_sort.cc b/tensorflow/core/grappler/utils/topological_sort.cc index 49a67a3497e..9f108b0f396 100644 --- a/tensorflow/core/grappler/utils/topological_sort.cc +++ b/tensorflow/core/grappler/utils/topological_sort.cc @@ -81,8 +81,8 @@ Status ComputeTopologicalOrder( int ready_node = (*ready_nodes)[front]; for (int fanout : graph_view.GetFanout(ready_node)) { ++num_ready_inputs[fanout]; - const int graph_view_get_fanin_fanout_size = graph_view.GetFanin(fanout).size(); - if (num_ready_inputs[fanout] == graph_view_get_fanin_fanout_size) { + const int max_size = graph_view.GetFanin(fanout).size(); + if (num_ready_inputs[fanout] == max_size) { ready_nodes->push_back(fanout); ++back; } @@ -96,8 +96,8 @@ Status ComputeTopologicalOrder( "at node = " << graph.node(back).DebugString(); for (int i = 0; i < graph_view.num_nodes(); ++i) { - const int graph_view_Get_fanin_i_size = graph_view.GetFanin(i).size(); - if (num_ready_inputs[i] != graph_view_Get_fanin_i_size) { + const int max_size = graph_view.GetFanin(i).size(); + if (num_ready_inputs[i] != max_size) { VLOG(1) << "Node not ready: " << graph.node(i).DebugString(); } } From e9516a8b0e980b1b2478a683bd233d5ac2c9f2e9 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 14 Jul 2020 11:30:37 -0700 Subject: [PATCH 0393/2522] Fix size computation logic in TransposeSimple Use the right type when computing `num_bytes`. This caused the crash observed in the bug, but I could not reproduce in a unit test (even with cuda_asan) since the `InlinedVector` always uses stack storage. PiperOrigin-RevId: 321199018 Change-Id: I339307a2d2d098d4ad73b363b5f96c19ed65ea52 --- tensorflow/core/kernels/transpose_functor_gpu.cu.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/transpose_functor_gpu.cu.cc b/tensorflow/core/kernels/transpose_functor_gpu.cu.cc index cb54533998e..0747685853e 100644 --- a/tensorflow/core/kernels/transpose_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/transpose_functor_gpu.cu.cc @@ -72,7 +72,7 @@ void TransposeSimple(const GPUDevice& d, const Tensor& in, host_buf[ndims * 2 + i] = perm[i]; } // Copies the input strides, output strides and permutation to the device. - auto num_bytes = sizeof(int64) * host_buf.size(); + auto num_bytes = sizeof(int32) * host_buf.size(); auto dev_buf = d.allocate(num_bytes); // NOTE: host_buf is not allocated by GpuHostAllocator, and // therefore we are doing a sync copy effectively. From 09599b6e7bf0add9a8e49da078f66e0420ccd95c Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Tue, 14 Jul 2020 18:37:14 +0000 Subject: [PATCH 0394/2522] Update tensorflow/security/fuzzing/consume_leading_digits_fuzz.cc --- tensorflow/security/fuzzing/consume_leading_digits_fuzz.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/security/fuzzing/consume_leading_digits_fuzz.cc b/tensorflow/security/fuzzing/consume_leading_digits_fuzz.cc index 6edf349ff26..ef2d83c0ffc 100644 --- a/tensorflow/security/fuzzing/consume_leading_digits_fuzz.cc +++ b/tensorflow/security/fuzzing/consume_leading_digits_fuzz.cc @@ -35,7 +35,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { const bool leading_digits = tensorflow::str_util::ConsumeLeadingDigits(&sp, &val); const char lead_char_consume_digits = *(sp.data()); if (leading_digits) { - assert(lead_char_consume_digits < '0' && lead_char_consume_digits > '9'); + assert(lead_char_consume_digits < '0' || lead_char_consume_digits > '9'); assert(val >= 0); } } From 452b4d88d929daba8f0610af7fc3b3c14b8be496 Mon Sep 17 00:00:00 2001 From: Ruoxin Sang Date: Tue, 14 Jul 2020 11:34:12 -0700 Subject: [PATCH 0395/2522] Fix X64 outputs with dynamic shapes. PiperOrigin-RevId: 321199821 Change-Id: I2de7814bcf1071a87e313210abbd505e3c295283 --- .../custom_training_loop_input_test.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tensorflow/python/distribute/custom_training_loop_input_test.py b/tensorflow/python/distribute/custom_training_loop_input_test.py index 9251721f7d0..832dc061f65 100644 --- a/tensorflow/python/distribute/custom_training_loop_input_test.py +++ b/tensorflow/python/distribute/custom_training_loop_input_test.py @@ -533,6 +533,31 @@ class InputIterationTest(test.TestCase, parameterized.TestCase, # This assumes that there are exactly 2 replicas self.assertAllEqual([5.5, 7.], run(input_iterator)) + @combinations.generate( + combinations.combine( + distribution=strategy_combinations.multidevice_strategies, + mode=["eager"])) + def testDynamicOutputsWithX64(self, distribution): + dataset = get_dataset_from_tensor_slices( + [5]).map(lambda x: math_ops.cast(x, dtypes.int64)).batch(2) + input_iterator = iter(distribution.experimental_distribute_dataset(dataset)) + + @def_function.function + def run(iterator): + + def computation(x): + return math_ops.add(x, x) + + inputs = next(iterator) + outputs = distribution.experimental_local_results( + distribution.run(computation, args=(inputs,))) + return outputs + + # This assumes that there are exactly 2 replicas + result = run(input_iterator) + self.assertAllEqual([10], result[0]) + self.assertAllEqual([], result[1]) + @combinations.generate( combinations.combine( distribution=strategy_combinations.multidevice_strategies, From d66b953be2ffd9750410088f78644254db0d9cfc Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Tue, 14 Jul 2020 14:43:04 -0400 Subject: [PATCH 0396/2522] Update benchmark_util and return distribution_strategy. --- .../python/keras/benchmarks/benchmark_util.py | 10 +++++---- .../benchmarks/keras_cpu_benchmark_test.py | 21 +++++++++++++------ .../bidirectional_lstm_benchmark_test.py | 21 +++++++++++++------ ...assification_transformer_benchmark_test.py | 21 +++++++++++++------ 4 files changed, 51 insertions(+), 22 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/benchmark_util.py b/tensorflow/python/keras/benchmarks/benchmark_util.py index be973031150..544c272d865 100644 --- a/tensorflow/python/keras/benchmarks/benchmark_util.py +++ b/tensorflow/python/keras/benchmarks/benchmark_util.py @@ -63,7 +63,7 @@ def measure_performance(model_fn, x: Input data. See `x` in the `fit()` method of `keras.Model`. y: Target data. See `y` in the `fit()` method of `keras.Model`. epochs: Integer. Number of epochs to train the model. - If unspecified, `epoch` will default to 2. + If unspecified, `epochs` will default to 2. batch_size: Integer. Number of samples per gradient update. If unspecified, `batch_size` will default to 32. run_iters: Integer. Number of iterations to run the performance measurement. @@ -84,7 +84,8 @@ def measure_performance(model_fn, Returns: Performance summary, which contains build_time, compile_time, - startup_time, avg_epoch_time, wall_time, exp_per_sec,epochs. + startup_time, avg_epoch_time, wall_time, exp_per_sec, epochs, + distribution_strategy. Raise: ValueError: If `x` is none or if `optimizer` is not provided or @@ -154,6 +155,7 @@ def measure_performance(model_fn, metrics.append({'name': 'epochs', 'value': epochs}) - wall_time = np.mean(wall_time_list) + extras = {'distribution_strategy': distribution_strategy, + 'wall_time': np.mean(wall_time_list)} - return metrics, wall_time + return metrics, extras diff --git a/tensorflow/python/keras/benchmarks/keras_cpu_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_cpu_benchmark_test.py index 633fe139d33..b1828c64eba 100644 --- a/tensorflow/python/keras/benchmarks/keras_cpu_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_cpu_benchmark_test.py @@ -95,7 +95,7 @@ class KerasModelCPUBenchmark( """Benchmark for MLP model on synthetic mnist data.""" mlp_x = np.random.random((5000, 784)) mlp_y = np.random.random((5000, 10)) - metrics, wall_time = benchmark_util.measure_performance( + metrics, extras = benchmark_util.measure_performance( self._mnist_mlp, x=mlp_x, y=mlp_y, @@ -104,13 +104,16 @@ class KerasModelCPUBenchmark( optimizer=_OPTIMIZER, loss=_LOSS) self.report_benchmark( - iters=run_iters, wall_time=wall_time, metrics=metrics) + iters=run_iters, + wall_time=extras['wall_time'], + metrics=metrics, + extras=extras) def benchmark_mnist_convnet(self, batch_size, run_iters): """Benchmark for Convnet model on synthetic mnist data.""" convnet_x = np.random.random((5000, 28, 28, 1)) convnet_y = np.random.random((5000, 10)) - metrics, wall_time = benchmark_util.measure_performance( + metrics, extras = benchmark_util.measure_performance( self._mnist_convnet, x=convnet_x, y=convnet_y, @@ -119,13 +122,16 @@ class KerasModelCPUBenchmark( optimizer=_OPTIMIZER, loss=_LOSS) self.report_benchmark( - iters=run_iters, wall_time=wall_time, metrics=metrics) + iters=run_iters, + wall_time=extras['wall_time'], + metrics=metrics, + extras=extras) def benchmark_imdb_lstm(self, batch_size, run_iters): """Benchmark for LSTM model on synthetic imdb review dataset.""" lstm_x = np.random.randint(0, 1999, size=(2500, 100)) lstm_y = np.random.random((2500, 1)) - metrics, wall_time = benchmark_util.measure_performance( + metrics, extras = benchmark_util.measure_performance( self._imdb_lstm, x=lstm_x, y=lstm_y, @@ -134,7 +140,10 @@ class KerasModelCPUBenchmark( optimizer=_OPTIMIZER, loss=_LOSS) self.report_benchmark( - iters=run_iters, wall_time=wall_time, metrics=metrics) + iters=run_iters, + wall_time=extras['wall_time'], + metrics=metrics, + extras=extras) if __name__ == '__main__': diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py index 4985533e299..1359d951636 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py @@ -59,7 +59,7 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): """Measure performance with batch_size=128 and run_iters=3.""" batch_size = 128 run_iters = 3 - metrics, wall_time = benchmark_util.measure_performance( + metrics, extras = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, y=self.imdb_y, @@ -70,13 +70,16 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): metrics=['accuracy']) self.report_benchmark( - iters=run_iters, wall_time=wall_time, metrics=metrics) + iters=run_iters, + wall_time=extras['wall_time'], + metrics=metrics, + extras=extras) def benchmark_bidirect_lstm_imdb_bs_256(self): """Measure performance with batch_size=256 and run_iters=2.""" batch_size = 256 run_iters = 2 - metrics, wall_time = benchmark_util.measure_performance( + metrics, extras = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, y=self.imdb_y, @@ -87,13 +90,16 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): metrics=['accuracy']) self.report_benchmark( - iters=run_iters, wall_time=wall_time, metrics=metrics) + iters=run_iters, + wall_time=extras['wall_time'], + metrics=metrics, + extras=extras) def benchmark_bidirect_lstm_imdb_bs_512(self): """Measure performance with batch_size=512 and run_iters=4.""" batch_size = 512 run_iters = 4 - metrics, wall_time = benchmark_util.measure_performance( + metrics, extras = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, y=self.imdb_y, @@ -104,7 +110,10 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): metrics=['accuracy']) self.report_benchmark( - iters=run_iters, wall_time=wall_time, metrics=metrics) + iters=run_iters, + wall_time=extras['wall_time'], + metrics=metrics, + extras=extras) if __name__ == '__main__': diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py index e77765e45a9..2bfbd6aefa2 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py @@ -73,7 +73,7 @@ class TextWithTransformerBenchmark(tf.test.Benchmark): """Measure performance with batch_size=128 and run_iters=3.""" batch_size = 128 run_iters = 3 - metrics, wall_time = benchmark_util.measure_performance( + metrics, extras = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, y=self.imdb_y, @@ -84,13 +84,16 @@ class TextWithTransformerBenchmark(tf.test.Benchmark): metrics=['accuracy']) self.report_benchmark( - iters=run_iters, wall_time=wall_time, metrics=metrics) + iters=run_iters, + wall_time=extras['wall_time'], + metrics=metrics, + extras=extras) def benchmark_text_classification_bs_512(self): """Measure performance with batch_size=512 and run_iters=4.""" batch_size = 512 run_iters = 4 - metrics, wall_time = benchmark_util.measure_performance( + metrics, extras = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, y=self.imdb_y, @@ -101,13 +104,16 @@ class TextWithTransformerBenchmark(tf.test.Benchmark): metrics=['accuracy']) self.report_benchmark( - iters=run_iters, wall_time=wall_time, metrics=metrics) + iters=run_iters, + wall_time=extras['wall_time'], + metrics=metrics, + extras=extras) def benchmark_text_classification_bs_256(self): """Measure performance with batch_size=256 and run_iters=3.""" batch_size = 256 run_iters = 3 - metrics, wall_time = benchmark_util.measure_performance( + metrics, extras = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, y=self.imdb_y, @@ -118,7 +124,10 @@ class TextWithTransformerBenchmark(tf.test.Benchmark): metrics=['accuracy']) self.report_benchmark( - iters=run_iters, wall_time=wall_time, metrics=metrics) + iters=run_iters, + wall_time=extras['wall_time'], + metrics=metrics, + extras=extras) class MultiHeadSelfAttention(tf.keras.layers.Layer): """Implement multi head self attention as a Keras layer.""" From d35e45995db03cd361fd0e82a3ee62f93b33e49d Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Tue, 14 Jul 2020 12:11:56 -0700 Subject: [PATCH 0397/2522] [TF/XLA] Enable input/output aliasing in the TF2XLA bridge The change required multiple components: 1) Switching to a different client API which takes a span of `ExecutionInput`s (so that the buffer donation can be specified at runtime). 2) Propagating the flag to set up aliasing for resource variables. 3) Checking the reference count of updated resource variables, and transferring the ownership at runtime if the count is one, and the aliasing specified in the compiled module. In order to do (3), we've needed to transfer from the model where we take a snapshot of reference variables before the execution (and locking only for the duration of the snapshotting) to locking the reference variables for the entire compile+run cycle (only doable from XlaLocalLaunchBase) and instead of taking the snapshot (requiring a copy) propagating the map to the pointers of tensors associated with reference variables. Testing: currently, testing was only done by inspecting the log traces manually, and checking that the buffer was donated, the aliasing was specified at the compile time, and the copy-protection only kicked in in cases where it needed to. In the future, we need to: 1) Test HLO generated from `tf.function(experimental_compile=True)` 2) Find a way to test the exact number of allocations generated during the test. PiperOrigin-RevId: 321207626 Change-Id: I1003f90479d0f8d3cffb8aaf48b746aa1526535d --- tensorflow/compiler/jit/kernels/xla_ops.cc | 109 +++++--- .../compiler/jit/xla_compile_on_demand_op.cc | 36 ++- tensorflow/compiler/jit/xla_device_ops.cc | 12 +- tensorflow/compiler/jit/xla_launch_util.cc | 238 ++++++++++-------- tensorflow/compiler/jit/xla_launch_util.h | 28 ++- .../python/eager/def_function_xla_jit_test.py | 64 +++++ 6 files changed, 319 insertions(+), 168 deletions(-) diff --git a/tensorflow/compiler/jit/kernels/xla_ops.cc b/tensorflow/compiler/jit/kernels/xla_ops.cc index 48347a2915f..38e33a60657 100644 --- a/tensorflow/compiler/jit/kernels/xla_ops.cc +++ b/tensorflow/compiler/jit/kernels/xla_ops.cc @@ -277,7 +277,8 @@ static Status CompileToLocalExecutable( OpKernelContext* ctx, const NameAttrList& function, bool has_ref_vars, const XlaPlatformInfo& platform_info, absl::Span variable_infos, - absl::Span constants, bool lazy, xla::LocalClient** client, + absl::Span constants, bool lazy, bool may_alias_resource_update, + xla::LocalClient** client, const XlaCompiler::CompilationResult** compilation_result, xla::LocalExecutable** executable) { // We store information about the JIT-compiled XLA computation @@ -332,6 +333,9 @@ static Status CompileToLocalExecutable( // Optimization: where possible, have the computation return a naked array // rather than a one-element tuple. compile_options.always_return_tuple = false; + compile_options.alias_resource_update = !has_ref_vars && + !platform_info.is_on_xla_device() && + may_alias_resource_update; std::vector args; TF_RETURN_IF_ERROR(XlaComputationLaunchContext::BuildXlaCompilerArguments( @@ -350,20 +354,22 @@ void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) { const XlaCompiler::CompilationResult* compilation_result; xla::LocalExecutable* executable; - ResourceVarsSnapshot variables_snapshot; + std::vector variable_infos; { - std::vector variable_infos; OP_REQUIRES_OK( ctx, GetVariableInfosFromCtxInputs(ctx, resources_, &variable_infos)); OP_REQUIRES_OK(ctx, LockVariables(absl::MakeSpan(variable_infos))); Status s = CompileToLocalExecutable( ctx, function_, /*has_ref_vars=*/has_ref_vars_, platform_info_, - variable_infos, constants_, /*lazy=*/false, &client, - &compilation_result, &executable); + variable_infos, constants_, /*lazy=*/false, + /*may_alias_resource_update=*/true, &client, &compilation_result, + &executable); OP_REQUIRES_OK(ctx, s); - OP_REQUIRES_OK(ctx, - SnapshotResourceVariables(ctx, resources_, variable_infos, - &variables_snapshot)); + } + + std::map resource_var_ptrs; + for (int i = 0; i < resources_.size(); i++) { + resource_var_ptrs[resources_[i]] = variable_infos[i].var()->tensor(); } se::Stream* stream = @@ -374,12 +380,19 @@ void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) { absl::optional tf_allocator_adapter; se::DeviceMemoryAllocator* allocator = GetAllocator(&tf_allocator_adapter, ctx, platform_info_); + int device_ordinal = stream ? stream->parent()->device_ordinal() + : client->default_device_ordinal(); XlaComputationLaunchContext launch_context( - client, allocator, + client, allocator, device_ordinal, /*allocate_xla_tensors=*/platform_info_.is_on_xla_device(), platform_info_.UseMultipleStreams()); - launch_context.PopulateInputs(ctx, compilation_result, variables_snapshot, - /*missing_ctx_input_prefix=*/0); + const xla::HloInputOutputAliasConfig& input_output_alias = + executable->executable()->module().input_output_alias_config(); + xla::StatusOr> execution_inputs = + launch_context.PopulateInputs(ctx, compilation_result, resource_var_ptrs, + /*missing_ctx_input_prefix=*/0, + input_output_alias); + OP_REQUIRES_OK(ctx, execution_inputs.status()); // Execute the computation. VLOG(2) << "Executing computation."; @@ -403,24 +416,24 @@ void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) { Env* env = Env::Default(); auto start_time = env->NowMicros(); - xla::StatusOr run_result; + xla::StatusOr execution_output; if (!stream || platform_info_.platform_id() == se::host::kHostPlatformId) { - run_result = executable->Run(launch_context.arguments(), run_options); + execution_output = + executable->Run(std::move(*execution_inputs), run_options); } else { - run_result = executable->RunAsync(launch_context.arguments(), run_options); + execution_output = + executable->RunAsync(std::move(*execution_inputs), run_options); } - OP_REQUIRES(ctx, run_result.ok(), run_result.status()); + OP_REQUIRES(ctx, execution_output.ok(), execution_output.status()); auto elapsed = env->NowMicros() - start_time; VLOG(2) << "Elapsed time: " << elapsed << "us"; + OP_REQUIRES_OK( + ctx, launch_context.PopulateOutputs( + ctx, compilation_result, execution_output->ConsumeResult(), + /*missing_ctx_input_prefix=*/0, absl::MakeSpan(variable_infos), + input_output_alias, resource_var_ptrs)); - const xla::HloInputOutputAliasConfig& input_output_alias = - executable->executable()->module().input_output_alias_config(); - OP_REQUIRES_OK(ctx, - launch_context.PopulateOutputs( - ctx, compilation_result, run_result.ConsumeValueOrDie(), - /*missing_ctx_input_prefix=*/0, input_output_alias, - variables_snapshot)); VLOG(1) << "Done"; } @@ -516,10 +529,14 @@ void XlaCompileOp::Compute(OpKernelContext* ctx) { OP_REQUIRES_OK( ctx, GetVariableInfosFromCtxInputs(ctx, resources_, &variable_infos)); OP_REQUIRES_OK(ctx, LockVariables(absl::MakeSpan(variable_infos))); + + // Do not alias resource updates as locking variables in XlaCompile and + // unlocking them in XlaRun may lead to deadlocks. Status status = CompileToLocalExecutable( ctx, function_, has_ref_vars_, platform_info_, variable_infos, constants_, - /*lazy=*/!must_compile_, &client, &kernel, &executable); + /*lazy=*/!must_compile_, + /*may_alias_resource_update=*/false, &client, &kernel, &executable); OP_REQUIRES_OK(ctx, SnapshotResourceVariables(ctx, resources_, variable_infos, &variables)); if (must_compile_ || status.code() != error::UNIMPLEMENTED) { @@ -587,14 +604,22 @@ void XlaRunOp::Compute(OpKernelContext* ctx) { absl::optional tf_allocator_adapter; se::DeviceMemoryAllocator* allocator = GetAllocator(&tf_allocator_adapter, ctx, platform_info_); + se::Stream* stream = + ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; + int device_ordinal = stream ? stream->parent()->device_ordinal() + : closure.client()->default_device_ordinal(); XlaComputationLaunchContext launch_context( - closure.client(), allocator, + closure.client(), allocator, device_ordinal, /*allocate_xla_tensors=*/platform_info_.is_on_xla_device(), /*use_multiple_streams=*/platform_info_.UseMultipleStreams()); // We're missing the must-be-constant inputs, tell `PopulateInputs` // about this. We don't actually need these inputs because they've // already been baked into the compiled kernel. + const xla::HloInputOutputAliasConfig& input_output_alias = + closure.executable()->executable()->module().input_output_alias_config(); + xla::StatusOr> execution_inputs; + std::map snapshot_ptrs; { tensorflow::profiler::TraceMe hlo_module_activity( [&] { @@ -604,13 +629,17 @@ void XlaRunOp::Compute(OpKernelContext* ctx) { }, tensorflow::profiler::TraceMeLevel::kInfo); - launch_context.PopulateInputs( - ctx, closure.compilation_result(), closure.resource_var_snapshots(), - /*missing_ctx_input_prefix=*/closure.num_constant_args()); + for (auto& p : closure.resource_var_snapshots()) { + snapshot_ptrs.emplace(p.first, + p.second.has_value() ? &p.second.value() : nullptr); + } + execution_inputs = launch_context.PopulateInputs( + ctx, closure.compilation_result(), snapshot_ptrs, + /*missing_ctx_input_prefix=*/closure.num_constant_args(), + input_output_alias); + OP_REQUIRES_OK(ctx, execution_inputs.status()); } - se::Stream* stream = - ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; xla::ExecutableRunOptions run_options; run_options.set_stream(stream); run_options.set_allocator(allocator); @@ -631,21 +660,19 @@ void XlaRunOp::Compute(OpKernelContext* ctx) { Env* env = Env::Default(); auto start_time = env->NowMicros(); - xla::StatusOr run_result; + xla::StatusOr execution_output; if (!stream || platform_info_.platform_id() == se::host::kHostPlatformId) { - run_result = - closure.executable()->Run(launch_context.arguments(), run_options); + execution_output = + closure.executable()->Run(std::move(*execution_inputs), run_options); } else { - run_result = - closure.executable()->RunAsync(launch_context.arguments(), run_options); + execution_output = closure.executable()->RunAsync( + std::move(*execution_inputs), run_options); } - OP_REQUIRES(ctx, run_result.ok(), run_result.status()); + OP_REQUIRES(ctx, execution_output.ok(), execution_output.status()); auto elapsed = env->NowMicros() - start_time; VLOG(2) << "Elapsed time in computation: " << elapsed << "us"; - const xla::HloInputOutputAliasConfig& input_output_alias = - closure.executable()->executable()->module().input_output_alias_config(); tensorflow::profiler::TraceMe hlo_module_activity( [&] { @@ -653,12 +680,16 @@ void XlaRunOp::Compute(OpKernelContext* ctx) { }, tensorflow::profiler::TraceMeLevel::kInfo); + xla::StatusOr> variable_infos = GatherVariableInfo( + ctx, *closure.compilation_result(), closure.num_constant_args()); + OP_REQUIRES_OK(ctx, variable_infos.status()); + OP_REQUIRES_OK(ctx, LockVariables(absl::MakeSpan(*variable_infos))); OP_REQUIRES_OK( ctx, launch_context.PopulateOutputs( - ctx, closure.compilation_result(), run_result.ConsumeValueOrDie(), + ctx, closure.compilation_result(), execution_output->ConsumeResult(), /*missing_ctx_input_prefix=*/closure.num_constant_args(), - input_output_alias, closure.resource_var_snapshots())); + absl::MakeSpan(*variable_infos), input_output_alias, snapshot_ptrs)); } XlaMergeOp::XlaMergeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc index afaee614f02..50813859603 100644 --- a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc +++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc @@ -50,35 +50,47 @@ Status XlaCompileOnDemandOp::Run(OpKernelContext* ctx, // Builds an XLA allocator for the device. XlaComputationLaunchContext launch_context( client, client->backend().memory_allocator(), + client->default_device_ordinal(), /*allocate_xla_tensors=*/true, /*use_multiple_streams=*/metadata.UseMultipleStreams()); - launch_context.PopulateInputs(ctx, result, variable_args, - /*missing_ctx_input_prefix=*/0); + std::map snapshot_ptrs; + for (auto& p : variable_args) { + snapshot_ptrs.emplace(p.first, + p.second.has_value() ? &p.second.value() : nullptr); + } + + const xla::HloInputOutputAliasConfig& input_output_alias = + executable->executable()->module().input_output_alias_config(); + xla::StatusOr> execution_inputs = + launch_context.PopulateInputs(ctx, result, snapshot_ptrs, + /*missing_ctx_input_prefix=*/0, + input_output_alias); + TF_RETURN_IF_ERROR(execution_inputs.status()); se::Stream* stream = ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; TF_RET_CHECK(stream); VLOG(2) << "Executing computation: " << name(); - for (const xla::ShapedBuffer* arg : launch_context.arguments()) { - VLOG(2) << name() << ": " << *arg; - } xla::ExecutableRunOptions run_options; run_options.set_stream(stream); run_options.set_allocator(client->backend().memory_allocator()); run_options.set_intra_op_thread_pool(&ctx->eigen_cpu_device()); run_options.set_rng_seed(GetXLARandomSeed()); - xla::StatusOr run_result = - executable->Run(launch_context.arguments(), run_options); + xla::StatusOr run_result = + executable->Run(execution_inputs.ConsumeValueOrDie(), run_options); TF_RETURN_IF_ERROR(run_result.status()); - - const xla::HloInputOutputAliasConfig& input_output_alias = - executable->executable()->module().input_output_alias_config(); + xla::ExecutionOutput execution_output = run_result.ConsumeValueOrDie(); + xla::StatusOr> variable_infos = + GatherVariableInfo(ctx, *result, 0); + TF_RETURN_IF_ERROR(variable_infos.status()); + TF_RETURN_IF_ERROR(LockVariables(absl::MakeSpan(*variable_infos))); TF_RETURN_IF_ERROR(launch_context.PopulateOutputs( - ctx, result, run_result.ConsumeValueOrDie(), - /*missing_ctx_input_prefix=*/0, input_output_alias, variable_args)); + ctx, result, execution_output.ConsumeResult(), + /*missing_ctx_input_prefix=*/0, absl::MakeSpan(*variable_infos), + input_output_alias, snapshot_ptrs)); return Status::OK(); } diff --git a/tensorflow/compiler/jit/xla_device_ops.cc b/tensorflow/compiler/jit/xla_device_ops.cc index 8126059262b..f0555ae32e5 100644 --- a/tensorflow/compiler/jit/xla_device_ops.cc +++ b/tensorflow/compiler/jit/xla_device_ops.cc @@ -59,11 +59,13 @@ void XlaAssignVariableOp::Compute(OpKernelContext* context) { return Status::OK(); })); mutex_lock ml(*variable->mu()); - OP_REQUIRES(context, variable->tensor()->dtype() == dtype_, - errors::InvalidArgument( - "Trying to assign variable with wrong dtype. Expected ", - DataTypeString(variable->tensor()->dtype()), " got ", - DataTypeString(dtype_))); + OP_REQUIRES( + context, + !variable->is_initialized || variable->tensor()->dtype() == dtype_, + errors::InvalidArgument( + "Trying to assign variable with wrong dtype. Expected ", + DataTypeString(variable->tensor()->dtype()), " got ", + DataTypeString(dtype_))); variable->is_initialized = true; *variable->tensor() = value; } diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc index 7f107aaef11..41abe86df6e 100644 --- a/tensorflow/compiler/jit/xla_launch_util.cc +++ b/tensorflow/compiler/jit/xla_launch_util.cc @@ -91,29 +91,19 @@ VariableInfo::~VariableInfo() { Status GetVariableInfosFromCtxInputs(OpKernelContext* ctx, absl::Span variable_indices, std::vector* result) { - std::vector resource_handles; - absl::c_transform( - variable_indices, std::back_inserter(resource_handles), - [&](int variable_idx) { return &HandleFromInput(ctx, variable_idx); }); - - std::vector> variables; - Status s = LookupResources(ctx, resource_handles, &variables); - if (!s.ok()) { - errors::AppendToMessage(&s, kPossibleNonVariableResourceHintMessage); - return s; - } - result->clear(); result->reserve(variable_indices.size()); - for (int i = 0; i < variable_indices.size(); i++) { - // *Release* the variable because we're going to unref it later in - // ~VariableInfo. - Var* variable = variables[i].release(); - int input_idx = variable_indices[i]; - std::string var_name = HandleFromInput(ctx, input_idx).name(); - result->emplace_back(input_idx, var_name, variable); + for (int var_idx : variable_indices) { + Var* variable = nullptr; + ResourceHandle handle = HandleFromInput(ctx, var_idx); + TF_RETURN_IF_ERROR( + LookupOrCreateResource(ctx, handle, &variable, [&](Var** ptr) { + // This var is uninitialized for now. + *ptr = new Var(DT_INVALID); + return Status::OK(); + })); + result->emplace_back(var_idx, handle.name(), variable); } - return Status::OK(); } @@ -176,24 +166,43 @@ Status SnapshotResourceVariables(OpKernelContext* ctx, XlaComputationLaunchContext::XlaComputationLaunchContext( xla::LocalClient* client, se::DeviceMemoryAllocator* xla_allocator, - bool allocate_xla_tensors, bool use_multiple_streams) + int device_ordinal, bool allocate_xla_tensors, bool use_multiple_streams) : client_(client), xla_allocator_(xla_allocator), allocate_xla_tensors_(allocate_xla_tensors), - use_multiple_streams_(use_multiple_streams) { + use_multiple_streams_(use_multiple_streams), + device_ordinal_(device_ordinal) { if (use_multiple_streams_) { CHECK(allocate_xla_tensors_) << "To use multiple streams correctly we must " "be allocating XLA tensors!"; } } -void XlaComputationLaunchContext::PopulateInputs( +// Fills in `execution_input` with `buffer` for `index`. +static void PopulateExecutionInputBuffer(xla::ExecutionInput& execution_input, + xla::ShapeIndex index, + se::DeviceMemoryBase& buffer, + bool donate_buffer, int device_ordinal, + se::DeviceMemoryAllocator* allocator) { + xla::MaybeOwningDeviceMemory* in_buffer = + execution_input.MutableBuffer(index); + if (donate_buffer) { + *in_buffer = se::OwningDeviceMemory(buffer, device_ordinal, allocator); + buffer = se::DeviceMemoryBase(); + } else { + *in_buffer = buffer; + } +} + +xla::StatusOr> +XlaComputationLaunchContext::PopulateInputs( OpKernelContext* ctx, const XlaCompiler::CompilationResult* compilation_result, - const ResourceVarsSnapshot& variables, int missing_ctx_input_prefix) { - // Build ShapedBuffers that point directly to the Tensor buffers. - arg_ptrs_ = - std::vector(compilation_result->xla_input_shapes.size()); + const std::map& resource_vars, + int missing_ctx_input_prefix, + const xla::HloInputOutputAliasConfig& input_output_alias) { + std::vector arguments; + arguments.reserve(compilation_result->xla_input_shapes.size()); xla::TransferManager* transfer_manager = client_->backend().transfer_manager(); @@ -201,10 +210,28 @@ void XlaComputationLaunchContext::PopulateInputs( int arg_num = compilation_result->input_mapping[i]; CHECK_GE(arg_num, missing_ctx_input_prefix); const xla::Shape& shape = compilation_result->xla_input_shapes[i]; - const Tensor* t = variables.count(arg_num) - ? &(variables.at(arg_num).value()) + const xla::Shape& device_shape = + transfer_manager->HostShapeToDeviceShape(shape); + + bool is_resource_variable = resource_vars.count(arg_num); + bool is_updated_resource_variable = + is_resource_variable && + absl::c_any_of(compilation_result->resource_updates, + [&](const XlaCompiler::ResourceUpdate& update) { + return update.input_index == i && update.modified; + }); + + const Tensor* t = is_resource_variable + ? resource_vars.at(arg_num) : &(ctx->input(arg_num - missing_ctx_input_prefix)); CHECK(t); + bool donate_buffer = + t->RefCountIsOne() && is_updated_resource_variable && + input_output_alias.ParameterHasAlias(i, xla::ShapeIndex{}); + VLOG(3) << "Processing input: " << i + << "; is_resource_variable=" << is_resource_variable + << "; is_updated_resource_variable=" << is_updated_resource_variable + << "; donate_buffer=" << donate_buffer; if (use_multiple_streams_) { CHECK(ctx->op_device_context() && ctx->op_device_context()->stream()) @@ -215,23 +242,28 @@ void XlaComputationLaunchContext::PopulateInputs( ctx->op_device_context()->stream()); } - if (xla::Shape::Equal().MinorToMajorOnlyInLayout()( - shape, transfer_manager->HostShapeToDeviceShape(shape))) { + arguments.emplace_back(device_shape, shape); + xla::ExecutionInput& execution_input = arguments.back(); + if (xla::Shape::Equal().MinorToMajorOnlyInLayout()(shape, device_shape)) { se::DeviceMemoryBase dmem = XlaTensor::DeviceMemoryFromTensor(*t); - arg_buffers_.emplace_back( - /*on_host_shape=*/shape, /*on_device_shape=*/shape, - client_->platform(), client_->default_device_ordinal()); - arg_buffers_.back().set_buffer(dmem, /*index=*/{}); - arg_ptrs_[i] = &arg_buffers_.back(); + PopulateExecutionInputBuffer(execution_input, xla::ShapeIndex{}, dmem, + donate_buffer, device_ordinal_, + xla_allocator_); } else { - const XlaTensor* xla_tensor = XlaTensor::FromTensor(t); + XlaTensor* xla_tensor = XlaTensor::FromTensor(t); CHECK(xla_tensor && xla_tensor->has_shaped_buffer()); - arg_ptrs_[i] = const_cast(&xla_tensor->shaped_buffer()); + xla_tensor->shaped_buffer().buffers().ForEachMutableElement( + [&](const xla::ShapeIndex& index, se::DeviceMemoryBase* buffer) { + PopulateExecutionInputBuffer(execution_input, index, *buffer, + donate_buffer, device_ordinal_, + xla_allocator_); + }); } } + return std::move(arguments); } -// Construct the tensor for given type and buffer. +// Construct the tensor for the given type and buffer. static Tensor MakeTensor(DataType dtype, const TensorShape& shape, se::DeviceMemoryBase buffer, Allocator* allocator) { size_t expected_size = shape.num_elements() * DataTypeSize(dtype); @@ -247,28 +279,26 @@ static Tensor GetOrCreateTensorForOutput( int output_num, OpKernelContext* ctx, int missing_ctx_input_prefix, const xla::HloInputOutputAliasConfig& input_output_alias, absl::Span input_mapping, - const ResourceVarsSnapshot& resource_var_snapshots, DataType output_dtype, - const TensorShape& output_shape, se::DeviceMemoryBase output_buffer, - Allocator* output_allocator) { + const std::map& resource_vars_snapshots, + DataType output_dtype, const TensorShape& output_shape, + se::DeviceMemoryBase output_buffer, Allocator* output_allocator) { xla::ShapeIndex output_index = input_output_alias.shape().IsTuple() ? xla::ShapeIndex({output_num}) : xla::ShapeIndex({}); + CHECK(input_output_alias.shape().IsTuple() || output_num == 0); if (absl::optional alias = input_output_alias.GetAliasedParameter(output_index)) { + VLOG(3) << "Found alias: " << alias->ToString(); int tf_param = input_mapping[alias->parameter_number] - missing_ctx_input_prefix; - const Tensor* input_tensor = &ctx->input(tf_param); - - // If input tensor is a resource variable, alias to the snapshot we took at - // entry time. - if (input_tensor->dtype() == DT_RESOURCE) { - const absl::optional& v = - resource_var_snapshots.at(missing_ctx_input_prefix + tf_param); - CHECK(v.has_value()); - return *v; + const Tensor input_tensor = + ctx->input(tf_param).dtype() != DT_RESOURCE + ? ctx->input(tf_param) + : *resource_vars_snapshots.at(missing_ctx_input_prefix + tf_param); + if (output_buffer.opaque() == input_tensor.data()) { + return input_tensor; } - return *input_tensor; } return MakeTensor(output_dtype, output_shape, output_buffer, output_allocator); @@ -291,12 +321,10 @@ static Status SetOutputForConstant( OpKernelContext* ctx, se::Stream* stream, const XlaCompiler::CompilationResult* compilation_result, int output_num) { CHECK(compilation_result->outputs[output_num].is_constant); - // Output is a constant. const Tensor& const_tensor = compilation_result->outputs[output_num].constant_value; Tensor* output_tensor; - const size_t total_bytes = const_tensor.TotalBytes(); - if (stream && total_bytes > 0) { + if (stream && const_tensor.TotalBytes() > 0) { // Copy host -> device. (Empty tensors don't have backing buffers.) // Manually allocate memory using an XlaTensorBuffer so we can allocate // as much memory as the device requires (as given by @@ -335,52 +363,55 @@ static Status SetOutputForConstant( return Status::OK(); } -// Creates a list of updates resource variables. -static xla::StatusOr> GatherVariableInfo( - OpKernelContext* ctx, - const XlaCompiler::CompilationResult* compilation_result, - int missing_ctx_input_prefix) { - std::vector variable_infos; - variable_infos.reserve(compilation_result->resource_updates.size()); +static xla::StatusOr GetOrCreateResourceVar( + OpKernelContext* ctx, const ResourceHandle& handle, + const XlaCompiler::ResourceUpdate& write) { + Var* variable = nullptr; + TF_RETURN_IF_ERROR( + LookupOrCreateResource(ctx, handle, &variable, [&write](Var** ptr) { + *ptr = new Var(write.type); + return Status::OK(); + })); + return variable; +} - for (int i = 0; i < compilation_result->resource_updates.size(); ++i) { +xla::StatusOr> GatherVariableInfo( + OpKernelContext* ctx, + const XlaCompiler::CompilationResult& compilation_result, + int missing_ctx_input_prefix) { + std::vector out; + out.reserve(compilation_result.resource_updates.size()); + for (int i = 0; i < compilation_result.resource_updates.size(); ++i) { const XlaCompiler::ResourceUpdate& write = - compilation_result->resource_updates[i]; + compilation_result.resource_updates[i]; int actual_input_index = write.input_index - missing_ctx_input_prefix; if (actual_input_index < 0 || actual_input_index >= ctx->num_inputs()) { return errors::Internal("Invalid input index for variable write."); } - // TODO(b/35625933): tensorflow::Var should contain a PersistentTensor, - // not a Tensor. - Var* variable = nullptr; const ResourceHandle handle = HandleFromInput(ctx, actual_input_index); - TF_RETURN_IF_ERROR(LookupOrCreateResource(ctx, handle, &variable, - [&write](Var** ptr) { - *ptr = new Var(write.type); - return Status::OK(); - })); - variable_infos.emplace_back(actual_input_index, handle.name(), variable); + TF_ASSIGN_OR_RETURN(Var * variable, + GetOrCreateResourceVar(ctx, handle, write)); + out.emplace_back(actual_input_index, handle.name(), variable); } - return variable_infos; + return std::move(out); } Status XlaComputationLaunchContext::PopulateOutputs( OpKernelContext* ctx, const XlaCompiler::CompilationResult* compilation_result, ScopedShapedBuffer output, int missing_ctx_input_prefix, + absl::Span variable_infos, const xla::HloInputOutputAliasConfig& input_output_alias, - const ResourceVarsSnapshot& resource_var_snapshots) { + const std::map& resource_vars) { se::Stream* stream = ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; Allocator* allocator = ctx->device()->GetAllocator({}); // Computation output should always be a tuple. - if (VLOG_IS_ON(2)) { - VLOG(2) << "Result tuple shape: " << output.on_host_shape().DebugString(); - VLOG(2) << "Result tuple shape (on device): " - << output.on_device_shape().DebugString(); - } + VLOG(2) << "Result tuple shape: " << output.on_host_shape().DebugString(); + VLOG(2) << "Result tuple shape (on device): " + << output.on_device_shape().DebugString(); CHECK_EQ(ctx->num_outputs(), compilation_result->outputs.size()); // If the on-host-shape isn't a tuple, create a new single-element tuple @@ -438,8 +469,8 @@ Status XlaComputationLaunchContext::PopulateOutputs( for (int i = 0; i < ctx->num_outputs(); ++i) { const TensorShape& shape = output_tensor_shapes[i]; const DataType& type = compilation_result->outputs[i].type; - VLOG(2) << "Retval " << i << " shape " << shape.DebugString() << " type " - << DataTypeString(type); + VLOG(2) << "Populating output for retval " << i << " shape " + << shape.DebugString() << " type " << DataTypeString(type); if (type == DT_VARIANT) { return errors::Unimplemented( "Support for TensorList crossing the XLA/TF boundary " @@ -467,30 +498,37 @@ Status XlaComputationLaunchContext::PopulateOutputs( se::DeviceMemoryBase buffer = output.buffer({output_num}); Tensor output_tensor = GetOrCreateTensorForOutput( output_num, ctx, missing_ctx_input_prefix, input_output_alias, - compilation_result->input_mapping, resource_var_snapshots, + compilation_result->input_mapping, resource_vars, ctx->expected_output_dtype(i), shape, buffer, allocator); - output.set_buffer(se::OwningDeviceMemory(), {output_num}); ctx->set_output(i, output_tensor); } + output.set_buffer(se::OwningDeviceMemory(), {output_num}); ++output_num; } - - if (VLOG_IS_ON(3)) { - VLOG(3) << ctx->mutable_output(i)->DeviceSafeDebugString(); - } } - // Apply variable updates, if any. - VLOG(2) << "Applying variable updates"; - TF_ASSIGN_OR_RETURN( - std::vector variable_infos, - GatherVariableInfo(ctx, compilation_result, missing_ctx_input_prefix)); - TF_RETURN_IF_ERROR(LockVariables(absl::MakeSpan(variable_infos))); + // input_index -> index into variable_infos. + absl::flat_hash_map variable_info_lookup; + for (int i = 0; i < variable_infos.size(); i++) { + variable_info_lookup.emplace(variable_infos[i].index(), i); + } + // Apply variable updates, if any. for (int i = 0; i < compilation_result->resource_updates.size(); ++i) { const XlaCompiler::ResourceUpdate& write = compilation_result->resource_updates[i]; - if (variable_infos[i].var()->tensor()->dtype() != write.type) { + int actual_input_index = write.input_index - missing_ctx_input_prefix; + CHECK_GE(actual_input_index, 0); + CHECK_LT(actual_input_index, ctx->num_inputs()); + Var* var = variable_infos[variable_info_lookup[actual_input_index]].var(); + CHECK(var); + + VLOG(2) << "Updating variable #" << i + << " at input index: " << actual_input_index << " with shape " + << write.shape.DebugString() << "; variable tensor has shape: " + << var->tensor()->shape().DebugString(); + + if (var->is_initialized && var->tensor()->dtype() != write.type) { return errors::Internal("Mismatched type in variable write"); } @@ -504,14 +542,14 @@ Status XlaComputationLaunchContext::PopulateOutputs( } } else { se::DeviceMemoryBase buffer = output.buffer({output_num}); - output.set_buffer(se::OwningDeviceMemory(), {output_num}); output_tensor = GetOrCreateTensorForOutput( output_num, ctx, missing_ctx_input_prefix, input_output_alias, - compilation_result->input_mapping, resource_var_snapshots, write.type, + compilation_result->input_mapping, resource_vars, write.type, write.shape, buffer, allocator); } - *variable_infos[i].var()->tensor() = output_tensor; - variable_infos[i].var()->is_initialized |= write.modified; + output.set_buffer(se::OwningDeviceMemory(), {output_num}); + var->is_initialized |= write.modified; + *var->tensor() = output_tensor; ++output_num; } return Status::OK(); @@ -562,7 +600,7 @@ Status XlaComputationLaunchContext::BuildXlaCompilerArguments( arg.name = std::string(variable.name()); arg.kind = XlaCompiler::Argument::kResource; arg.resource_kind = XlaResource::kVariable; - if (variable.var()) { + if (variable.var() && variable.var()->is_initialized) { const Tensor* value = variable.var()->tensor(); arg.type = value->dtype(); arg.shape = value->shape(); diff --git a/tensorflow/compiler/jit/xla_launch_util.h b/tensorflow/compiler/jit/xla_launch_util.h index 92b6c4c8a08..b34b3059a4f 100644 --- a/tensorflow/compiler/jit/xla_launch_util.h +++ b/tensorflow/compiler/jit/xla_launch_util.h @@ -81,6 +81,12 @@ class VariableInfo { bool lock_held_ = false; }; +// Creates a list of updated resource variables. +xla::StatusOr> GatherVariableInfo( + OpKernelContext* ctx, + const XlaCompiler::CompilationResult& compilation_result, + int missing_ctx_input_prefix); + // Takes a snapshot of the values of resource variable arguments, whose indices // are specified in `variable_indices` argument. We snapshot tensors that back // resource variables since concurrent updates may modify the shape, and it is @@ -124,7 +130,7 @@ class XlaComputationLaunchContext { // objects. XlaComputationLaunchContext(xla::LocalClient* client, se::DeviceMemoryAllocator* xla_allocator, - bool allocate_xla_tensors, + int device_ordinal, bool allocate_xla_tensors, bool use_multiple_streams); // Builds a XlaCompiler::Argument vector from the arguments to an XlaLaunch @@ -142,10 +148,12 @@ class XlaComputationLaunchContext { // missing and adjusts input indices accordingly. All elements in kernel's // input_mapping must be greater than or equal to `missing_ctx_input_prefix` // (in other words, no inputs actually required by the kernel can be missing). - void PopulateInputs(OpKernelContext* ctx, - const XlaCompiler::CompilationResult* compilation_result, - const ResourceVarsSnapshot& variables, - int missing_ctx_input_prefix); + xla::StatusOr> PopulateInputs( + OpKernelContext* ctx, + const XlaCompiler::CompilationResult* compilation_result, + const std::map& resource_vars, + int missing_ctx_input_prefix, + const xla::HloInputOutputAliasConfig& input_output_alias); // Given the XLA output in `output`, populate all outputs of `ctx`. Also // writes out the resource variable updates. @@ -161,20 +169,16 @@ class XlaComputationLaunchContext { OpKernelContext* ctx, const XlaCompiler::CompilationResult* compilation_result, xla::ScopedShapedBuffer output, int missing_ctx_input_prefix, + absl::Span variable_infos, const xla::HloInputOutputAliasConfig& input_output_alias, - const ResourceVarsSnapshot& resource_var_snapshots); - - // Return the argument list. Only valid after PopulateInputs() has been - // called. - const std::vector& arguments() const { return arg_ptrs_; } + const std::map& resource_vars); private: xla::LocalClient* client_; se::DeviceMemoryAllocator* xla_allocator_; bool allocate_xla_tensors_; bool use_multiple_streams_; - std::deque arg_buffers_; - std::vector arg_ptrs_; + int device_ordinal_; }; // A simple TensorBuffer implementation that allows us to create Tensors that diff --git a/tensorflow/python/eager/def_function_xla_jit_test.py b/tensorflow/python/eager/def_function_xla_jit_test.py index d55f84863e9..bd7a6ec2279 100644 --- a/tensorflow/python/eager/def_function_xla_jit_test.py +++ b/tensorflow/python/eager/def_function_xla_jit_test.py @@ -32,6 +32,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import tensor_array_ops +from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -403,6 +404,69 @@ class DefFunctionTest(test.TestCase): self.assertEqual(inner_retracings, 1) + def testUpdateVariable(self): + v = variables.Variable(3.1) + + @def_function.function(experimental_compile=True) + def update_var(a, b): + v.assign_add(a * b) + + update_var(constant_op.constant(0.7), constant_op.constant(0.6)) + self.assertAllClose(v, 3.52) + + def testUpdateVariableVector(self): + v = variables.Variable([3.1, 3.1]) + + @def_function.function(experimental_compile=True) + def update_var(a, b): + v.assign_add(a * b) + + update_var( + constant_op.constant([0.7, 0.7]), constant_op.constant([0.6, 0.6])) + self.assertAllClose(v, [3.52, 3.52]) + + def testUpdateVariableInClass(self): + + class C(object): + + @def_function.function(experimental_compile=True) + def update_var(self, a, b): + if not hasattr(self, 'v'): + self.v = variables.Variable(3.1) + self.v.assign_add(a * b) + + c = C() + + @def_function.function + def outer(): + c.update_var(constant_op.constant(0.7), constant_op.constant(0.6)) + + outer() + self.assertAllClose(c.v, 3.52) + + def testUpdateVariableMultipleOutputs(self): + v = variables.Variable(3.1) + + @def_function.function(experimental_compile=True) + def update_var(a, b): + v.assign_add(a * b) + return a * b + v + + out = update_var(constant_op.constant(0.7), constant_op.constant(0.6)) + self.assertAllClose(v, 3.52) + self.assertAllClose(out, 3.94) + + def testReturnIdentity(self): + + @def_function.function(experimental_compile=True) + def f(a, b): + return (a, b) + + a = constant_op.constant([0.7]) + b = constant_op.constant([0.6]) + + f(a, b) + if __name__ == '__main__': ops.enable_eager_execution() From ddc1bbad3dfd4a089eb96014f26cc16664b1b2f8 Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Tue, 14 Jul 2020 12:25:15 -0700 Subject: [PATCH 0398/2522] Revert Support integer input and output type for Quantize-Aware Trained models PiperOrigin-RevId: 321210312 Change-Id: I373ff0060464952648acf048d0732ca4c9e7b16c --- RELEASE.md | 3 +- tensorflow/lite/python/BUILD | 14 -- tensorflow/lite/python/lite.py | 29 +-- tensorflow/lite/python/lite_v2_test.py | 47 +++-- tensorflow/lite/python/util.py | 265 ------------------------- tensorflow/lite/python/util_test.py | 163 --------------- 6 files changed, 32 insertions(+), 489 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 2581b9388ff..150c7077349 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -42,8 +42,7 @@ * `tf.function`/AutoGraph: * * `tf.lite`: - * `TFLiteConverter`: - * Support optional flags `inference_input_type` and `inference_output_type` for full integer quantized models. This allows users to modify the model input and output type to integer types (tf.int8, tf.uint8) instead of defaulting to float type (tf.float32). + * * `tf.random`: * * Math and Linear Algebra: diff --git a/tensorflow/lite/python/BUILD b/tensorflow/lite/python/BUILD index f144ce39d50..dfcf46baa90 100644 --- a/tensorflow/lite/python/BUILD +++ b/tensorflow/lite/python/BUILD @@ -215,11 +215,8 @@ py_library( deps = [ ":lite_constants", ":op_hint", - ":schema_py", "//tensorflow/python:tf_optimizer", "//tensorflow/python/eager:wrap_function", - "@absl_py//absl/logging", - "@flatbuffers//:runtime_py", "@six_archive//:six", ], ) @@ -233,20 +230,9 @@ py_test( "no_windows", ], deps = [ - ":lite_constants", ":util", - "//tensorflow:tensorflow_py", - "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", - "//tensorflow/python:control_flow_ops", - "//tensorflow/python:convert_to_constants", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", "//tensorflow/python:framework_test_lib", - "//tensorflow/python:math_ops", - "//tensorflow/python:session", - "//third_party/py/numpy", - "@absl_py//absl/testing:parameterized", "@six_archive//:six", ], ) diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index a08b40bbed6..e919aa4b00f 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -61,7 +61,6 @@ from tensorflow.lite.python.util import get_grappler_config as _get_grappler_con from tensorflow.lite.python.util import get_tensor_name as _get_tensor_name from tensorflow.lite.python.util import get_tensors_from_tensor_names as _get_tensors_from_tensor_names from tensorflow.lite.python.util import is_frozen_graph as _is_frozen_graph -from tensorflow.lite.python.util import modify_integer_quantized_model_io_type as _modify_integer_quantized_model_io_type from tensorflow.lite.python.util import run_graph_optimizations as _run_graph_optimizations from tensorflow.lite.python.util import set_tensor_shapes as _set_tensor_shapes from tensorflow.python import keras as _keras @@ -315,23 +314,6 @@ class QuantizationMode(object): else: return False, None - def flags_modify_model_io_type( - self, input_type=constants.FLOAT, output_type=constants.FLOAT): - """Flags for modifying the input and output type of a tflite model.""" - is_post_training_quantize = self.quantizer_flags(input_type, output_type)[0] - is_training_time_only_quantize = self.training_time_int8_allow_float() and \ - not is_post_training_quantize - - # TODO(b/153576658): Consolidate post/during training quantization workflows - # to modify model input/output type after MLIR conversion. - if is_training_time_only_quantize: - return { - "inference_input_type": input_type, - "inference_output_type": output_type, - } - else: - return None - # Below are helpers for the above functions. def _validate_int8_required(self): @@ -575,8 +557,9 @@ class TFLiteConverterBaseV2(TFLiteConverterBase): def _validate_inference_input_output_types(self, quant_mode): """Validate inference_input_type and inference_output_type flags.""" default_types = [constants.FLOAT, None] - # We support integer input/output for integer quantized models only. - if quant_mode.training_time_int8_allow_float(): + # We only support integer types for post training integer quantization + # as we have statistical information to quantize the input and output. + if quant_mode.is_post_training_integer_quantize(): all_types = default_types + [constants.INT8, constants.QUANTIZED_UINT8] if self.inference_input_type not in all_types or \ self.inference_output_type not in all_types: @@ -660,12 +643,6 @@ class TFLiteConverterBaseV2(TFLiteConverterBase): if calibrate_and_quantize: result = self._calibrate_quantize_model(result, **flags) - flags_modify_model_io_type = quant_mode.flags_modify_model_io_type( - self.inference_input_type, self.inference_output_type) - if flags_modify_model_io_type: - result = _modify_integer_quantized_model_io_type( - result, **flags_modify_model_io_type) - if self._experimental_sparsify_model: result = _mlir_sparsify(result) diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py index 4093a9d5bb4..6fab4fd6086 100644 --- a/tensorflow/lite/python/lite_v2_test.py +++ b/tensorflow/lite/python/lite_v2_test.py @@ -374,12 +374,8 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): return tf.keras.Sequential(QLinear(3, input_shape=(2,))) - @parameterized.named_parameters( - ('_DefaultFLOAT32InputOutput', lite.constants.FLOAT), - ('_INT8InputOutput', lite.constants.INT8), - ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8)) @test_util.run_v2_only - def testTrainingTimeQuantization(self, inference_input_output_type): + def testTrainingTimeQuantization(self): model = self._getTrainingTimeQuantizedModel() float_converter = lite.TFLiteConverterV2.from_keras_model(model) @@ -388,25 +384,38 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest): quantized_converter = lite.TFLiteConverterV2.from_keras_model(model) quantized_converter.optimizations = [lite.Optimize.DEFAULT] - quantized_converter.inference_input_type = inference_input_output_type - quantized_converter.inference_output_type = inference_input_output_type quantized_tflite = quantized_converter.convert() self.assertTrue(quantized_tflite) - interpreter = Interpreter(model_content=quantized_tflite) - interpreter.allocate_tensors() - input_details = interpreter.get_input_details() - self.assertLen(input_details, 1) - self.assertEqual(inference_input_output_type.as_numpy_dtype, - input_details[0]['dtype']) - output_details = interpreter.get_output_details() - self.assertLen(output_details, 1) - self.assertEqual(inference_input_output_type.as_numpy_dtype, - output_details[0]['dtype']) - - # Ensure that the quantized tflite model is smaller. + # Ensure that the quantized weights tflite model is smaller. self.assertLess(len(quantized_tflite), len(float_tflite)) + interpreter = Interpreter(model_content=quantized_tflite) + self.assertEqual(np.float32, interpreter.get_input_details()[0]['dtype']) + + @parameterized.named_parameters( + ('_INT8InputOutput', lite.constants.INT8), + ('_UINT8InputOutput', lite.constants.QUANTIZED_UINT8)) + def testInvalidTrainingTimeQuantization(self, inference_input_output_type): + # We currently don't support integer inference_input_type and + # inference_output_type flags for training time quantization. + + model = self._getTrainingTimeQuantizedModel() + + converter = lite.TFLiteConverterV2.from_keras_model(model) + tflite_model = converter.convert() + self.assertTrue(tflite_model) + + quantized_converter = lite.TFLiteConverterV2.from_keras_model(model) + quantized_converter.optimizations = [lite.Optimize.DEFAULT] + with self.assertRaises(ValueError) as error: + quantized_converter.inference_input_type = inference_input_output_type + quantized_converter.inference_output_type = inference_input_output_type + quantized_converter.convert() + self.assertEqual( + 'The inference_input_type and inference_output_type ' + 'must be tf.float32.', str(error.exception)) + @test_util.run_v2_only def testNewQuantizer(self): """Test the model quantized by the new converter.""" diff --git a/tensorflow/lite/python/util.py b/tensorflow/lite/python/util.py index 9f84681c12b..ff7caad0f88 100644 --- a/tensorflow/lite/python/util.py +++ b/tensorflow/lite/python/util.py @@ -19,21 +19,15 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import copy import datetime import sys -from absl import logging - import six from six.moves import range -from flatbuffers.python import flatbuffers from tensorflow.core.protobuf import config_pb2 as _config_pb2 from tensorflow.core.protobuf import graph_debug_info_pb2 from tensorflow.core.protobuf import meta_graph_pb2 as _meta_graph_pb2 -from tensorflow.lite.python import lite_constants as _lite_constants -from tensorflow.lite.python import schema_py_generated as _schema_fb from tensorflow.lite.python.op_hint import convert_op_hints_to_stubs from tensorflow.lite.python.op_hint import find_all_hinted_output_nodes from tensorflow.lite.toco import types_pb2 as _types_pb2 @@ -61,25 +55,6 @@ _MAP_TF_TO_TFLITE_TYPES = { dtypes.bool: _types_pb2.BOOL, } -_MAP_TFLITE_ENUM_TO_TF_TYPES = { - 0: dtypes.float32, - 1: dtypes.float16, - 2: dtypes.int32, - 3: dtypes.uint8, - 4: dtypes.int64, - 5: dtypes.string, - 6: dtypes.bool, - 7: dtypes.int16, - 8: dtypes.complex64, - 9: dtypes.int8, - 10: dtypes.float64, -} - -_TFLITE_FILE_IDENTIFIER = b"TFL3" - -_TFLITE_MODEL_INPUT_OUTPUT_TYPES = (_lite_constants.FLOAT, _lite_constants.INT8, - _lite_constants.QUANTIZED_UINT8) - def convert_dtype_to_tflite_type(tf_dtype): """Converts tf.dtype to TFLite proto type. @@ -99,31 +74,6 @@ def convert_dtype_to_tflite_type(tf_dtype): return result -def _convert_tflite_enum_type_to_tf_type(tflite_enum_type): - """Converts tflite enum type (eg: 0) to tf type (eg: tf.float32). - - Args: - tflite_enum_type: tflite enum type (eg: 0, that corresponds to float32) - - Raises: - ValueError: If an invalid tflite enum type is provided. - - Returns: - tf type (eg: tf.float32) - """ - tf_type = _MAP_TFLITE_ENUM_TO_TF_TYPES.get(tflite_enum_type) - if tf_type is None: - raise ValueError( - "Unsupported enum {}. The valid map of enum to tf.dtypes is : {}" - .format(tflite_enum_type, _MAP_TFLITE_ENUM_TO_TF_TYPES)) - return tf_type - - -def _get_dtype_name(tf_type): - """Converts tf.dtype (eg: tf.float32) to str (eg: "tf.float32").""" - return "tf." + tf_type.name - - def get_tensor_name(tensor): """Returns name of the input tensor. @@ -564,218 +514,3 @@ extern const int {array_name}_len; license_text=license_text) return source_text, header_text - - -def _convert_model_from_bytearray_to_object(model_bytearray): - """Converts a tflite model from a bytearray into a parsable object.""" - model_object = _schema_fb.Model.GetRootAsModel(model_bytearray, 0) - model_object = _schema_fb.ModelT.InitFromObj(model_object) - model_object = copy.deepcopy(model_object) - model_object.subgraphs[0].inputs[0] = model_object.subgraphs[0].inputs[0] - return model_object - - -def _convert_model_from_object_to_bytearray(model_object): - """Converts a tflite model from a parsable object into a bytearray.""" - # Initial size of the buffer, which will grow automatically if needed - builder = flatbuffers.Builder(1024) - model_offset = model_object.Pack(builder) - builder.Finish(model_offset, file_identifier=_TFLITE_FILE_IDENTIFIER) - return bytes(builder.Output()) - - -def _remove_tensors_from_model(model, remove_tensors_idxs): - """Remove tensors from model.""" - if not remove_tensors_idxs: - return - if len(model.subgraphs) > 1: - raise ValueError("Model must only have one subgraph. Instead, it has " - "{} subgraphs.".format(len(model.subgraphs))) - subgraph = model.subgraphs[0] - tensors = subgraph.tensors - operators = subgraph.operators - - logging.debug("Removing tensors at indices : %s", remove_tensors_idxs) - # An optimized check to validate if "remove_tensors_idxs" (eg: [4,5,6]) is an - # exact subset, with ordering, of "tensors" indices (eg: [0,1,2,3,4,5,6]). - if min(remove_tensors_idxs) == len(tensors) - len(remove_tensors_idxs): - logging.debug("Removing tensors only at the end of the tensor list") - del tensors[min(remove_tensors_idxs):] - else: - logging.debug("Removing tensors requires updating the model") - # Map the old tensor indices to new tensor indices - d_old_to_new_tensors = {} - left_shift_by = 0 - for idx in range(len(tensors)): - if idx in remove_tensors_idxs: - left_shift_by += 1 - else: - d_old_to_new_tensors[idx] = idx - left_shift_by - logging.debug("Old to new tensors map: %s", d_old_to_new_tensors.__str__()) - # Update tensor indices referenced throughout the model - def update_tensors(tensor_idxs): - for i, ti in enumerate(tensor_idxs): - tensor_idxs[i] = d_old_to_new_tensors.get(ti, -1) - update_tensors(subgraph.inputs) - update_tensors(subgraph.outputs) - for op in operators: - update_tensors(op.inputs) - update_tensors(op.outputs) - # Delete the tensors - for idx in sorted(remove_tensors_idxs, reverse=True): - tensors.pop(idx) - logging.debug("Removed tensors marked for deletion") - - -def _validate_and_find_int8_quantized_inputs_outputs(model): - """Validate that model input is quantized and output is dequantized.""" - if len(model.subgraphs) > 1: - raise ValueError("Model must only have one subgraph. Instead, it has " - "{} subgraphs.".format(len(model.subgraphs))) - subgraph = model.subgraphs[0] - tensors = subgraph.tensors - operators = subgraph.operators - - # Ensure model has atleast one quantize and dequantize operator - quant_opcode_idx, dequant_opcode_idx = None, None - for idx, opcode in enumerate(model.operatorCodes): - if opcode.builtinCode == _schema_fb.BuiltinOperator.QUANTIZE: - quant_opcode_idx = idx - elif opcode.builtinCode == _schema_fb.BuiltinOperator.DEQUANTIZE: - dequant_opcode_idx = idx - if quant_opcode_idx is not None and dequant_opcode_idx is not None: - break - if quant_opcode_idx is None and dequant_opcode_idx is None: - raise ValueError("Model is not integer quantized as it does not " - "contain quantize/dequantize operators.") - - # Ensure model inputs and outputs are integer quantized - input_quant_ops, output_dequant_ops = [], [] - for op in operators: - # Find input quantize operator - if op.opcodeIndex == quant_opcode_idx and op.inputs[0] in subgraph.inputs: - pos, float_tensor, int_tensor = \ - "input", tensors[op.inputs[0]], tensors[op.outputs[0]] - input_quant_ops.append(op) - # Find output dequantize operator - elif op.opcodeIndex == dequant_opcode_idx and \ - op.outputs[0] in subgraph.outputs: - pos, float_tensor, int_tensor = \ - "output", tensors[op.outputs[0]], tensors[op.inputs[0]] - output_dequant_ops.append(op) - # Otherwise, ignore - else: - continue - # If found, validate the input/output tensor type - if float_tensor.type != _schema_fb.TensorType.FLOAT32: - raise ValueError( - "Model {} type must be tf.float32. Expected type for tensor with " - "name '{}' is tf.float32, instead type is tf.{}".format( - pos, float_tensor.name, - _convert_tflite_enum_type_to_tf_type(float_tensor.type).name)) - if int_tensor.type != _schema_fb.TensorType.INT8: - raise ValueError( - "Model is not integer quantized. Expected type for tensor with " - "name '{}' is tf.int8, instead type is tf.{}".format( - int_tensor.name, - _convert_tflite_enum_type_to_tf_type(int_tensor.type).name)) - - return input_quant_ops, output_dequant_ops - - -def modify_integer_quantized_model_io_type( - model, inference_input_type=_lite_constants.FLOAT, - inference_output_type=_lite_constants.FLOAT): - """Modify the float input/output type of an integer quantized model. - - Args: - model: An int8 quantized tflite model with float input and output. - inference_input_type: tf.DType representing final input type. - (default tf.float32) - inference_output_type: tf.DType representing final output type. - (default tf.float32) - - Returns: - An int8 quantized tflite model with modified input and/or output type. - - Raises: - ValueError: If the model is not int8 quantized or the inference_input_type - and/or inference_input_type is unsupported. - RuntimeError: If the modification was unsuccessful. - - """ - # Return if input and output types default to float - if inference_input_type == _lite_constants.FLOAT and \ - inference_output_type == _lite_constants.FLOAT: - return model - - # Validate input and output types - if inference_input_type not in _TFLITE_MODEL_INPUT_OUTPUT_TYPES: - raise ValueError("The `inference_input_type` should be in {}".format( - tuple(_get_dtype_name(t) for t in _TFLITE_MODEL_INPUT_OUTPUT_TYPES))) - if inference_output_type not in _TFLITE_MODEL_INPUT_OUTPUT_TYPES: - raise ValueError("The `inference_output_type` should be in {}".format( - tuple(_get_dtype_name(t) for t in _TFLITE_MODEL_INPUT_OUTPUT_TYPES))) - - logging.debug(("Attempting to modify the model input from tf.float32 to %s " - "and output from tf.float32 to %s"), - _get_dtype_name(inference_input_type), - _get_dtype_name(inference_output_type)) - # Convert the model to an object - model = _convert_model_from_bytearray_to_object(model) - - # Validate the integer quantized model - input_quant_ops, output_dequant_ops = \ - _validate_and_find_int8_quantized_inputs_outputs(model) - - # Initialize references and variables - if len(model.subgraphs) > 1: - raise ValueError("Model must only have one subgraph. Instead, it has " - "{} subgraphs.".format(len(model.subgraphs))) - subgraph = model.subgraphs[0] - tensors = subgraph.tensors - operators = subgraph.operators - remove_tensors_idxs = set() - - # Modify model input type - if inference_input_type == _lite_constants.QUANTIZED_UINT8: - # Change quant op (float to int8) to quant op (uint8 to int8) - for op in input_quant_ops: - int8_quantization = tensors[op.outputs[0]].quantization - uint8_quantization = _schema_fb.QuantizationParametersT() - uint8_quantization.scale = [int8_quantization.scale[0]] - uint8_quantization.zeroPoint = [int8_quantization.zeroPoint[0] + 128] - tensors[op.inputs[0]].quantization = uint8_quantization - tensors[op.inputs[0]].type = _schema_fb.TensorType.UINT8 - elif inference_input_type == _lite_constants.INT8: - # Remove the inputs and the quant operator - for op in input_quant_ops: - subgraph.inputs[subgraph.inputs == op.inputs[0]] = op.outputs[0] - remove_tensors_idxs.add(op.inputs[0]) - operators.remove(op) - - # Modify model output type - if inference_output_type == _lite_constants.QUANTIZED_UINT8: - # Change dequant op (int8 to float) to quant op (int8 to uint8) - for op in output_dequant_ops: - op.opcodeIndex = input_quant_ops[0].opcodeIndex - int8_quantization = tensors[op.inputs[0]].quantization - uint8_quantization = _schema_fb.QuantizationParametersT() - uint8_quantization.scale = [int8_quantization.scale[0]] - uint8_quantization.zeroPoint = [int8_quantization.zeroPoint[0] + 128] - tensors[op.outputs[0]].quantization = uint8_quantization - tensors[op.outputs[0]].type = _schema_fb.TensorType.UINT8 - elif inference_output_type == _lite_constants.INT8: - # Remove the outputs and the dequant operator - for op in output_dequant_ops: - subgraph.outputs[subgraph.outputs == op.outputs[0]] = op.inputs[0] - remove_tensors_idxs.add(op.outputs[0]) - operators.remove(op) - - # Remove tensors marked for deletion. - _remove_tensors_from_model(model, remove_tensors_idxs) - - # Convert the model to a bytearray - model = _convert_model_from_object_to_bytearray(model) - - return model diff --git a/tensorflow/lite/python/util_test.py b/tensorflow/lite/python/util_test.py index 0e9cbc1e58a..f3c287dd7fc 100644 --- a/tensorflow/lite/python/util_test.py +++ b/tensorflow/lite/python/util_test.py @@ -19,10 +19,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from absl.testing import parameterized -import numpy as np from six.moves import range -import tensorflow as tf from tensorflow.lite.python import lite_constants from tensorflow.lite.python import util @@ -64,31 +61,6 @@ class UtilTest(test_util.TensorFlowTestCase): self.assertEqual( util.convert_dtype_to_tflite_type(dtypes.bool), _types_pb2.BOOL) - def testConvertEnumToDtype(self): - self.assertEqual( - util._convert_tflite_enum_type_to_tf_type(0), dtypes.float32) - self.assertEqual( - util._convert_tflite_enum_type_to_tf_type(1), dtypes.float16) - self.assertEqual(util._convert_tflite_enum_type_to_tf_type(2), dtypes.int32) - self.assertEqual(util._convert_tflite_enum_type_to_tf_type(3), dtypes.uint8) - self.assertEqual(util._convert_tflite_enum_type_to_tf_type(4), dtypes.int64) - self.assertEqual( - util._convert_tflite_enum_type_to_tf_type(5), dtypes.string) - self.assertEqual(util._convert_tflite_enum_type_to_tf_type(6), dtypes.bool) - self.assertEqual(util._convert_tflite_enum_type_to_tf_type(7), dtypes.int16) - self.assertEqual( - util._convert_tflite_enum_type_to_tf_type(8), dtypes.complex64) - self.assertEqual(util._convert_tflite_enum_type_to_tf_type(9), dtypes.int8) - self.assertEqual( - util._convert_tflite_enum_type_to_tf_type(10), dtypes.float64) - with self.assertRaises(ValueError) as error: - util._convert_tflite_enum_type_to_tf_type(11) - self.assertEqual( - "Unsupported enum 11. The valid map of enum to tf.dtypes is : " - "{0: tf.float32, 1: tf.float16, 2: tf.int32, 3: tf.uint8, 4: tf.int64, " - "5: tf.string, 6: tf.bool, 7: tf.int16, 8: tf.complex64, 9: tf.int8, " - "10: tf.float64}", str(error.exception)) - def testTensorName(self): with ops.Graph().as_default(): in_tensor = array_ops.placeholder(shape=[4], dtype=dtypes.float32) @@ -223,140 +195,5 @@ class TensorFunctionsTest(test_util.TensorFlowTestCase): self.assertEqual([None, 3, 5], tensor.shape.as_list()) -def _generate_integer_tflite_model(): - """Define an integer post-training quantized tflite model.""" - # Load MNIST dataset - n = 10 # Number of samples - (train_images, train_labels), (test_images, test_labels) = \ - tf.keras.datasets.mnist.load_data() - train_images, train_labels, test_images, test_labels = \ - train_images[:n], train_labels[:n], test_images[:n], test_labels[:n] - - # Normalize the input image so that each pixel value is between 0 to 1. - train_images = train_images / 255.0 - test_images = test_images / 255.0 - - # Define TF model - model = tf.keras.Sequential([ - tf.keras.layers.InputLayer(input_shape=(28, 28)), - tf.keras.layers.Reshape(target_shape=(28, 28, 1)), - tf.keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu"), - tf.keras.layers.MaxPooling2D(pool_size=(2, 2)), - tf.keras.layers.Flatten(), - tf.keras.layers.Dense(10) - ]) - - # Train - model.compile( - optimizer="adam", - loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), - metrics=["accuracy"]) - - model.fit( - train_images, - train_labels, - epochs=1, - validation_split=0.1, - ) - - # Convert TF Model to an Integer Quantized TFLite Model - converter = tf.lite.TFLiteConverter.from_keras_model(model) - converter.optimizations = {tf.lite.Optimize.DEFAULT} - def representative_dataset_gen(): - for _ in range(2): - yield [ - np.random.uniform(low=0, high=1, size=(1, 28, 28)).astype( - np.float32) - ] - converter.representative_dataset = representative_dataset_gen - converter.target_spec.supported_ops = {tf.lite.OpsSet.TFLITE_BUILTINS_INT8} - tflite_model = converter.convert() - - return tflite_model - - -def _test_param_modify_integer_model_io_type(): - """Function to generate parameterized inputs for testing.""" - params = [] - str_template = "_{}{}{}" - map_model_type = { - "PostTraining": True, - # "DuringTraining": False, - } - map_types = { - "": lite_constants.FLOAT, - "INT8": lite_constants.INT8, - "UINT8": lite_constants.QUANTIZED_UINT8 - } - for k1, v1 in map_model_type.items(): - for k2, v2 in map_types.items(): - istr = "_Input{}".format(k2) if k2 else "" - for k3, v3 in map_types.items(): - ostr = "_Output{}".format(k3) if k3 else "" if istr else "_NoUpdate" - params.append((str_template.format(k1, istr, ostr), v1, v2, v3)) - return params - - -# TODO(b/161174063): Merge tests for integer input/output type -class UtilModifyIntegerQuantizedModelIOTypeTest( - test_util.TensorFlowTestCase, parameterized.TestCase): - - @classmethod - def setUpClass(cls): - super(UtilModifyIntegerQuantizedModelIOTypeTest, cls).setUpClass() - cls.post_train_integer_model = _generate_integer_tflite_model() - - @parameterized.named_parameters(_test_param_modify_integer_model_io_type()) - def test(self, is_post_train, in_tftype, out_tftype): - """Modify the float input/output type of an integer quantized model.""" - - def _run_tflite_inference(model, in_tftype, out_tftype): - """Run inference on a model with a specific input/output type.""" - # Load TFLite model and allocate tensors. - interpreter = tf.lite.Interpreter(model_content=model) - interpreter.allocate_tensors() - input_details = interpreter.get_input_details()[0] - output_details = interpreter.get_output_details()[0] - - # Validate TFLite model input and output types - self.assertEqual(input_details["dtype"], in_tftype.as_numpy_dtype) - self.assertEqual(output_details["dtype"], out_tftype.as_numpy_dtype) - - # Define Input - np.random.seed(0) - input_data = np.random.uniform(low=0, high=1, size=(1, 28, 28)) - input_data = input_data.astype(np.float32) - if input_details["dtype"] != np.float32: - # quantize float to int - scale, zero_point = input_details["quantization"] - input_data = input_data / scale + zero_point - input_data = input_data.astype(input_details["dtype"]) - - # Run Inference - interpreter.set_tensor(input_details["index"], input_data) - interpreter.invoke() - - # Get output - output_data = interpreter.get_tensor(output_details["index"])[0] - if output_details["dtype"] != np.float32: - # dequantize int to float - scale, zero_point = output_details["quantization"] - output_data = output_data.astype(np.float32) - output_data = (output_data - zero_point) * scale - - return output_data - - model = self.__class__.post_train_integer_model if is_post_train else None - # Run model inference with float input output type - output_data = _run_tflite_inference(model, tf.float32, tf.float32) - # Run model inference with modified integer input output type - model_io = util.modify_integer_quantized_model_io_type( - model, in_tftype, out_tftype) - output_io_data = _run_tflite_inference(model_io, in_tftype, out_tftype) - - # Validate that both the outputs are the same - self.assertTrue(np.allclose(output_data, output_io_data, atol=1.0)) - - if __name__ == "__main__": test.main() From 89ad1cc24b54b5988f8da3f1f241200c1ce46651 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 14 Jul 2020 12:25:25 -0700 Subject: [PATCH 0399/2522] Increase visibility of core/framework:bounds_check Prepare for the removal of aliases to this target all around. PiperOrigin-RevId: 321210340 Change-Id: Ib802393cf705f9608e3d1b9df604342d3147f244 --- tensorflow/core/framework/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/framework/BUILD b/tensorflow/core/framework/BUILD index d47c74a629d..fd27064df6e 100644 --- a/tensorflow/core/framework/BUILD +++ b/tensorflow/core/framework/BUILD @@ -618,7 +618,7 @@ cc_library( cc_library( name = "bounds_check", hdrs = ["bounds_check.h"], - visibility = ["//tensorflow/core:__pkg__"], + visibility = ["//tensorflow/core/kernels:friends"], deps = [ "//tensorflow/core/platform:macros", "//third_party/eigen3", From 842df9e6b516e42578a8d23b35d41176b9a6cf1d Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Tue, 14 Jul 2020 12:26:19 -0700 Subject: [PATCH 0400/2522] Normalize version-specific differences for Union types. PiperOrigin-RevId: 321210532 Change-Id: I17b33b10c9370d679350957548ae34a1f35e1c88 --- tensorflow/tools/api/lib/python_object_to_proto_visitor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py index 77882d50bae..86994248cc5 100644 --- a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py +++ b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py @@ -95,11 +95,13 @@ else: return False -# Differences created by Generic typing. +# Differences created by typing implementations. _NORMALIZE_TYPE[( 'tensorflow.python.framework.ops.Tensor')] = ( "") _NORMALIZE_TYPE['typing.Generic'] = "" +# TODO(mdan): Remove once the golden files are generated in Python 3.7. +_NORMALIZE_TYPE[""] = 'typing.Union' if sys.version_info.major == 3 and sys.version_info.minor >= 8: From 16ec46209bec10fc5e6851eaef195da0d932aec4 Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Tue, 14 Jul 2020 15:42:33 -0400 Subject: [PATCH 0401/2522] Update code and return wall_time seperately. --- tensorflow/python/keras/benchmarks/benchmark_util.py | 6 +++--- .../keras/benchmarks/keras_cpu_benchmark_test.py | 12 ++++++------ .../bidirectional_lstm_benchmark_test.py | 12 ++++++------ ...text_classification_transformer_benchmark_test.py | 12 ++++++------ 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/tensorflow/python/keras/benchmarks/benchmark_util.py b/tensorflow/python/keras/benchmarks/benchmark_util.py index 544c272d865..4884f8eac4c 100644 --- a/tensorflow/python/keras/benchmarks/benchmark_util.py +++ b/tensorflow/python/keras/benchmarks/benchmark_util.py @@ -155,7 +155,7 @@ def measure_performance(model_fn, metrics.append({'name': 'epochs', 'value': epochs}) - extras = {'distribution_strategy': distribution_strategy, - 'wall_time': np.mean(wall_time_list)} + wall_time = np.mean(wall_time_list) + extras = {'distribution_strategy': distribution_strategy} - return metrics, extras + return metrics, wall_time, extras diff --git a/tensorflow/python/keras/benchmarks/keras_cpu_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_cpu_benchmark_test.py index b1828c64eba..8a329b7dc0f 100644 --- a/tensorflow/python/keras/benchmarks/keras_cpu_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_cpu_benchmark_test.py @@ -95,7 +95,7 @@ class KerasModelCPUBenchmark( """Benchmark for MLP model on synthetic mnist data.""" mlp_x = np.random.random((5000, 784)) mlp_y = np.random.random((5000, 10)) - metrics, extras = benchmark_util.measure_performance( + metrics, wall_time, extras = benchmark_util.measure_performance( self._mnist_mlp, x=mlp_x, y=mlp_y, @@ -105,7 +105,7 @@ class KerasModelCPUBenchmark( loss=_LOSS) self.report_benchmark( iters=run_iters, - wall_time=extras['wall_time'], + wall_time=wall_time, metrics=metrics, extras=extras) @@ -113,7 +113,7 @@ class KerasModelCPUBenchmark( """Benchmark for Convnet model on synthetic mnist data.""" convnet_x = np.random.random((5000, 28, 28, 1)) convnet_y = np.random.random((5000, 10)) - metrics, extras = benchmark_util.measure_performance( + metrics, wall_time, extras = benchmark_util.measure_performance( self._mnist_convnet, x=convnet_x, y=convnet_y, @@ -123,7 +123,7 @@ class KerasModelCPUBenchmark( loss=_LOSS) self.report_benchmark( iters=run_iters, - wall_time=extras['wall_time'], + wall_time=wall_time, metrics=metrics, extras=extras) @@ -131,7 +131,7 @@ class KerasModelCPUBenchmark( """Benchmark for LSTM model on synthetic imdb review dataset.""" lstm_x = np.random.randint(0, 1999, size=(2500, 100)) lstm_y = np.random.random((2500, 1)) - metrics, extras = benchmark_util.measure_performance( + metrics, wall_time, extras = benchmark_util.measure_performance( self._imdb_lstm, x=lstm_x, y=lstm_y, @@ -141,7 +141,7 @@ class KerasModelCPUBenchmark( loss=_LOSS) self.report_benchmark( iters=run_iters, - wall_time=extras['wall_time'], + wall_time=wall_time, metrics=metrics, extras=extras) diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py index 1359d951636..80100bb059c 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py @@ -59,7 +59,7 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): """Measure performance with batch_size=128 and run_iters=3.""" batch_size = 128 run_iters = 3 - metrics, extras = benchmark_util.measure_performance( + metrics, wall_time, extras = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, y=self.imdb_y, @@ -71,7 +71,7 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, - wall_time=extras['wall_time'], + wall_time=wall_time, metrics=metrics, extras=extras) @@ -79,7 +79,7 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): """Measure performance with batch_size=256 and run_iters=2.""" batch_size = 256 run_iters = 2 - metrics, extras = benchmark_util.measure_performance( + metrics, wall_time, extras = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, y=self.imdb_y, @@ -91,7 +91,7 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, - wall_time=extras['wall_time'], + wall_time=wall_time, metrics=metrics, extras=extras) @@ -99,7 +99,7 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): """Measure performance with batch_size=512 and run_iters=4.""" batch_size = 512 run_iters = 4 - metrics, extras = benchmark_util.measure_performance( + metrics, wall_time, extras = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, y=self.imdb_y, @@ -111,7 +111,7 @@ class BidirectionalLSTMBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, - wall_time=extras['wall_time'], + wall_time=wall_time, metrics=metrics, extras=extras) diff --git a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py index 2bfbd6aefa2..48b2d8e9e08 100644 --- a/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py +++ b/tensorflow/python/keras/benchmarks/keras_examples_benchmarks/text_classification_transformer_benchmark_test.py @@ -73,7 +73,7 @@ class TextWithTransformerBenchmark(tf.test.Benchmark): """Measure performance with batch_size=128 and run_iters=3.""" batch_size = 128 run_iters = 3 - metrics, extras = benchmark_util.measure_performance( + metrics, wall_time, extras = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, y=self.imdb_y, @@ -85,7 +85,7 @@ class TextWithTransformerBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, - wall_time=extras['wall_time'], + wall_time=wall_time, metrics=metrics, extras=extras) @@ -93,7 +93,7 @@ class TextWithTransformerBenchmark(tf.test.Benchmark): """Measure performance with batch_size=512 and run_iters=4.""" batch_size = 512 run_iters = 4 - metrics, extras = benchmark_util.measure_performance( + metrics, wall_time, extras = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, y=self.imdb_y, @@ -105,7 +105,7 @@ class TextWithTransformerBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, - wall_time=extras['wall_time'], + wall_time=wall_time, metrics=metrics, extras=extras) @@ -113,7 +113,7 @@ class TextWithTransformerBenchmark(tf.test.Benchmark): """Measure performance with batch_size=256 and run_iters=3.""" batch_size = 256 run_iters = 3 - metrics, extras = benchmark_util.measure_performance( + metrics, wall_time, extras = benchmark_util.measure_performance( self._build_model, x=self.imdb_x, y=self.imdb_y, @@ -125,7 +125,7 @@ class TextWithTransformerBenchmark(tf.test.Benchmark): self.report_benchmark( iters=run_iters, - wall_time=extras['wall_time'], + wall_time=wall_time, metrics=metrics, extras=extras) From b8d8772b542ff353029c0f6f7b8377dc3dfa23c2 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 14 Jul 2020 22:50:03 +0300 Subject: [PATCH 0402/2522] Update densenet.py --- tensorflow/python/keras/applications/densenet.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/keras/applications/densenet.py b/tensorflow/python/keras/applications/densenet.py index 09069556a26..eae03305d37 100644 --- a/tensorflow/python/keras/applications/densenet.py +++ b/tensorflow/python/keras/applications/densenet.py @@ -15,9 +15,9 @@ # pylint: disable=invalid-name """DenseNet models for Keras. -Reference paper: - - [Densely Connected Convolutional Networks] - (https://arxiv.org/abs/1608.06993) (CVPR 2017 Best Paper Award) + Reference: + - [Densely Connected Convolutional Networks]( + https://arxiv.org/abs/1608.06993) (CVPR 2017) """ from __future__ import absolute_import from __future__ import division @@ -375,13 +375,16 @@ decode_predictions.__doc__ = imagenet_utils.decode_predictions.__doc__ DOC = """ - Reference paper: - - [Densely Connected Convolutional Networks] - (https://arxiv.org/abs/1608.06993) (CVPR 2017 Best Paper Award) + Reference: + - [Densely Connected Convolutional Networks]( + https://arxiv.org/abs/1608.06993) (CVPR 2017) Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in your Keras config at `~/.keras/keras.json`. + + Caution: Be sure to properly pre-process your inputs to the application. + Please see `applications.densenet.preprocess_input` for an example. Arguments: include_top: whether to include the fully-connected From f57f560b1acb1a6cab1881b9936f2d7cda0e48e2 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Tue, 14 Jul 2020 12:28:45 -0700 Subject: [PATCH 0403/2522] Enable the allocation of single TfLiteTensor structs from temp memory. In the future, all TfLiteTensor structs should be allocated through this API. This allocation allows for a chain of TfLiteTensor objects that can be reset through "ResetTempAllocations()". PiperOrigin-RevId: 321211032 Change-Id: I6ab86b8749338590f1457486aa81a39e036534ec --- tensorflow/lite/micro/micro_allocator.cc | 21 +++++++ tensorflow/lite/micro/micro_allocator.h | 14 +++++ tensorflow/lite/micro/micro_allocator_test.cc | 57 +++++++++++++++++++ 3 files changed, 92 insertions(+) diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc index abc26c90efb..b9ba89e7dc2 100644 --- a/tensorflow/lite/micro/micro_allocator.cc +++ b/tensorflow/lite/micro/micro_allocator.cc @@ -842,6 +842,27 @@ TfLiteStatus MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer( return kTfLiteOk; } +TfLiteTensor* MicroAllocator::AllocateTfLiteTensor(const Model* model, + int subgraph_idx) { + const SubGraph* subgraph = GetSubGraphFromModel(model); + TFLITE_DCHECK(subgraph != nullptr); + + // This value is allocated from temporary arena space. It is guaranteed to be + // around for at least the scope of the calling function. Since this struct + // allocation takes place in temp space, no need to own or cleanup. + TfLiteTensor* tensor = + reinterpret_cast(memory_allocator_->AllocateTemp( + sizeof(TfLiteTensor), alignof(TfLiteTensor))); + internal::InitializeTfLiteTensorFromFlatbuffer( + memory_allocator_, *subgraph->tensors()->Get(subgraph_idx), + model->buffers(), error_reporter_, tensor); + return tensor; +} + +void MicroAllocator::ResetTempAllocations() { + memory_allocator_->ResetTempAllocations(); +} + TfLiteStatus MicroAllocator::AllocateVariables(TfLiteContext* context, const SubGraph* subgraph) { for (size_t i = 0; i < context->tensors_size; ++i) { diff --git a/tensorflow/lite/micro/micro_allocator.h b/tensorflow/lite/micro/micro_allocator.h index 5fad5a2e5cc..b38b936929b 100644 --- a/tensorflow/lite/micro/micro_allocator.h +++ b/tensorflow/lite/micro/micro_allocator.h @@ -119,6 +119,18 @@ class MicroAllocator { TfLiteStatus FinishModelAllocation(const Model* model, TfLiteContext* context); + // Allocates a TfLiteTensor struct and populates the returned value with + // properties from the model flatbuffer. This struct is allocated from + // temporary arena memory is only guaranteed until a call is made to + // ResetTempAllocations(). + virtual TfLiteTensor* AllocateTfLiteTensor(const Model* model, + int subgraph_idx); + + // Resets all temporary allocations. This method should be called after a + // chain of temp allocations (e.g. chain of TfLiteTensor objects via + // AllocateTfLiteTensor()). + virtual void ResetTempAllocations(); + // Allocates persistent buffer which has the same life time as the allocator. // The memory is immediately available and is allocated from the tail of the // arena. @@ -147,6 +159,7 @@ class MicroAllocator { // Allocates an array in the arena to hold pointers to the tensors required // to initialize and prepare a model. These allocations are stored and // populated on the context. + // TODO(b/160894903): Remove this function when new kernel API is ready. virtual TfLiteStatus AllocateTfLiteTensorArray(TfLiteContext* context, const SubGraph* subgraph); @@ -154,6 +167,7 @@ class MicroAllocator { // prepare a model from data in the flatbuffer (loaded from the TfLiteModel // instance). Persistent data (e.g. quantization params) is allocated from the // arena. + // TODO(b/160894903): Remove this function when new kernel API is ready. virtual TfLiteStatus PopulateTfLiteTensorArrayFromFlatbuffer( const Model* model, TfLiteContext* context, const SubGraph* subgraph); diff --git a/tensorflow/lite/micro/micro_allocator_test.cc b/tensorflow/lite/micro/micro_allocator_test.cc index 510c5ac348e..6b63c8ceb4f 100644 --- a/tensorflow/lite/micro/micro_allocator_test.cc +++ b/tensorflow/lite/micro/micro_allocator_test.cc @@ -522,4 +522,61 @@ TF_LITE_MICRO_TEST(OfflinePlannerOfflineOnline) { TF_LITE_MICRO_EXPECT_EQ(0, context.tensors[3].data.uint8 - start); } +TF_LITE_MICRO_TEST(TestAllocateSingleTfLiteTensor) { + const tflite::Model* model = tflite::testing::GetSimpleMockModel(); + constexpr size_t arena_size = 1024; + uint8_t arena[arena_size]; + tflite::MicroAllocator* allocator = + tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); + TF_LITE_MICRO_EXPECT_NE(allocator, nullptr); + + TfLiteTensor* tensor1 = + allocator->AllocateTfLiteTensor(model, /*subgraph_idx=*/1); + TF_LITE_MICRO_EXPECT_NE(tensor1, nullptr); +} + +TF_LITE_MICRO_TEST(TestAllocateChainOfTfLiteTensor) { + const tflite::Model* model = tflite::testing::GetSimpleMockModel(); + constexpr size_t arena_size = 1024; + uint8_t arena[arena_size]; + tflite::MicroAllocator* allocator = + tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); + TF_LITE_MICRO_EXPECT_NE(allocator, nullptr); + + TfLiteTensor* tensor1 = + allocator->AllocateTfLiteTensor(model, /*subgraph_idx=*/1); + TF_LITE_MICRO_EXPECT_NE(tensor1, nullptr); + + TfLiteTensor* tensor2 = + allocator->AllocateTfLiteTensor(model, /*subgraph_idx=*/2); + TF_LITE_MICRO_EXPECT_NE(tensor1, nullptr); + + // The address of tensor2 should be higher than the address of tensor1 + // (chained allocations): + TF_LITE_MICRO_EXPECT_GT(tensor2, tensor1); +} + +TF_LITE_MICRO_TEST(TestAllocateTfLiteTensorWithReset) { + const tflite::Model* model = tflite::testing::GetSimpleMockModel(); + constexpr size_t arena_size = 1024; + uint8_t arena[arena_size]; + tflite::MicroAllocator* allocator = + tflite::MicroAllocator::Create(arena, arena_size, micro_test::reporter); + TF_LITE_MICRO_EXPECT_NE(allocator, nullptr); + + TfLiteTensor* tensor1 = + allocator->AllocateTfLiteTensor(model, /*subgraph_idx=*/1); + TF_LITE_MICRO_EXPECT_NE(tensor1, nullptr); + + allocator->ResetTempAllocations(); + + TfLiteTensor* tensor2 = + allocator->AllocateTfLiteTensor(model, /*subgraph_idx=*/2); + TF_LITE_MICRO_EXPECT_NE(tensor1, nullptr); + + // The address of tensor2 should be equal than the address of tensor1 since + // allocations were not chained: + TF_LITE_MICRO_EXPECT_EQ(tensor2, tensor1); +} + TF_LITE_MICRO_TESTS_END From a4ca98ed322d4290f1bcef5510bf07359e7c7628 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 14 Jul 2020 22:55:44 +0300 Subject: [PATCH 0404/2522] Standardize references in EfficientNet --- tensorflow/python/keras/applications/efficientnet.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/keras/applications/efficientnet.py b/tensorflow/python/keras/applications/efficientnet.py index e1413b08533..34c4cfa9992 100644 --- a/tensorflow/python/keras/applications/efficientnet.py +++ b/tensorflow/python/keras/applications/efficientnet.py @@ -16,9 +16,9 @@ # pylint: disable=missing-docstring """EfficientNet models for Keras. -Reference paper: - - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks] - (https://arxiv.org/abs/1905.11946) (ICML 2019) + Reference: + - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]( + https://arxiv.org/abs/1905.11946) (ICML 2019) """ from __future__ import absolute_import from __future__ import division @@ -210,7 +210,7 @@ def EfficientNet( classifier_activation='softmax'): """Instantiates the EfficientNet architecture using given scaling coefficients. - Reference paper: + Reference: - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]( https://arxiv.org/abs/1905.11946) (ICML 2019) From d05aa8735f35e6c6d43fdec4c63455912e318298 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Jul 2020 12:38:16 -0700 Subject: [PATCH 0405/2522] Use fully qualified integer typedefs PiperOrigin-RevId: 321213045 Change-Id: I7ac9a06d0ec1a70d7cdcc440553262da4c4571f4 --- tensorflow/c/eager/c_api.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 6804247794b..39b09348642 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -94,7 +94,6 @@ limitations under the License. #include "tensorflow/core/profiler/lib/traceme.h" #include "tensorflow/core/public/version.h" -using tensorflow::int64; using tensorflow::string; namespace { @@ -968,7 +967,7 @@ int64_t TFE_TensorHandleNumElements(TFE_TensorHandle* h, TF_Status* status) { return -1; } - int64 num_elements = -1; + tensorflow::int64 num_elements = -1; status->status = tensorflow::unwrap(h)->NumElements(&num_elements); return num_elements; } @@ -980,7 +979,7 @@ int64_t TFE_TensorHandleDim(TFE_TensorHandle* h, int dim_index, return -1; } - int64 dim = -1; + tensorflow::int64 dim = -1; status->status = tensorflow::unwrap(h)->Dim(dim_index, &dim); return dim; } From dcacc7f08b3ab0a9a5fb651f812b2797b28e1ded Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 14 Jul 2020 23:02:53 +0300 Subject: [PATCH 0406/2522] Standardize references in Inception-Resnet-V2 --- tensorflow/python/keras/applications/inception_resnet_v2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/applications/inception_resnet_v2.py b/tensorflow/python/keras/applications/inception_resnet_v2.py index ec5ae0fb453..fccf84fb63e 100644 --- a/tensorflow/python/keras/applications/inception_resnet_v2.py +++ b/tensorflow/python/keras/applications/inception_resnet_v2.py @@ -16,7 +16,7 @@ """Inception-ResNet V2 model for Keras. -Reference paper: +Reference: - [Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning](https://arxiv.org/abs/1602.07261) (AAAI 2017) @@ -291,7 +291,7 @@ def conv2d_bn(x, def inception_resnet_block(x, scale, block_type, block_idx, activation='relu'): - """Adds a Inception-ResNet block. + """Adds an Inception-ResNet block. This function builds 3 types of Inception-ResNet blocks mentioned in the paper, controlled by the `block_type` argument (which is the From 8db10243f969c9752df782bec03a6bb296c61de2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Jul 2020 12:40:54 -0700 Subject: [PATCH 0407/2522] Allow TPUEmbeddingOps without `_tpu_embedding_layer` attribute in TPUUpdateEmbeddingEnqueueOpInputs pass. PiperOrigin-RevId: 321213539 Change-Id: Ia61d2a961512b925be5d8620685a5392c9f796e1 --- .../tpu_update_embedding_enqueue_op_inputs.mlir | 12 ------------ .../tpu_update_embedding_enqueue_op_inputs.cc | 3 +-- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_update_embedding_enqueue_op_inputs.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_update_embedding_enqueue_op_inputs.mlir index b77e4b1fbd0..09e701e5dd3 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_update_embedding_enqueue_op_inputs.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tpu_update_embedding_enqueue_op_inputs.mlir @@ -65,15 +65,3 @@ func @check_enqueue_ops_with_different_attr_disallowed(%arg0: tensor, % return } -// ----- - -func @check_embedding_ops_with_missing_attribute_disallowed(%arg0: tensor, %arg1: tensor, - %arg2 :tensor, %arg3: tensor, %arg4: tensor, %arg5: tensor, - %arg6: tensor, %arg7: tensor, %arg8: tensor) -> () { - %0 = "tf.Const"() {value = dense<[]> : tensor<0xf32>} : () -> tensor<0xf32> - %1 = "tf.SelectV2"(%arg8, %arg6, %arg7) : (tensor, tensor, tensor) -> tensor - "tf.EnqueueTPUEmbeddingSparseTensorBatch"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %0, %0, %0, %1) {_tpu_embedding_layer = "call_123", _xla_outside_compilation = "0", combiners = ["mean", "sum"], device_ordinal = -1 : i64, max_sequence_lengths = [0, 0, 0], table_ids = [1, 1, 0]} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor<0xf32>, tensor<0xf32>, tensor<0xf32>, tensor) -> () - // expected-error @+1 {{'tf.RecvTPUEmbeddingActivations' op requires attribute '_tpu_embedding_layer'}} - %2:2 = "tf.RecvTPUEmbeddingActivations"() {config = "\0A\0B\0C\0D"} : () -> (tensor<2x2xf32>, tensor<4x4xf32>) - return -} diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_update_embedding_enqueue_op_inputs.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_update_embedding_enqueue_op_inputs.cc index f3588c8359b..820dec02b90 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_update_embedding_enqueue_op_inputs.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_update_embedding_enqueue_op_inputs.cc @@ -44,8 +44,7 @@ struct TPUUpdateEmbeddingEnqueueOpInputs LogicalResult ExtractEmbeddingAttribute( Operation* op, llvm::StringMap* embedding_op_map) { auto embedding_attr = op->getAttrOfType(kTPUEmbeddingAttr); - if (!embedding_attr) - return op->emitOpError("requires attribute '_tpu_embedding_layer'"); + if (!embedding_attr) return mlir::success(); if (!embedding_op_map->insert({embedding_attr.getValue(), op}).second) return op->emitOpError( From 1013a3bf2a52934fb3e74580c2334e15801503fb Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 14 Jul 2020 23:05:37 +0300 Subject: [PATCH 0408/2522] Standardize references in Inception-V3 --- tensorflow/python/keras/applications/inception_v3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/applications/inception_v3.py b/tensorflow/python/keras/applications/inception_v3.py index 89b8398d489..7237cf558e1 100644 --- a/tensorflow/python/keras/applications/inception_v3.py +++ b/tensorflow/python/keras/applications/inception_v3.py @@ -15,7 +15,7 @@ # pylint: disable=invalid-name """Inception V3 model for Keras. -Reference paper: +Reference: - [Rethinking the Inception Architecture for Computer Vision]( http://arxiv.org/abs/1512.00567) (CVPR 2016) """ From dc823cf68a8c36bb97029a385a4e1ab05704724c Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 14 Jul 2020 23:07:41 +0300 Subject: [PATCH 0409/2522] Standardize references in MobileNet --- tensorflow/python/keras/applications/mobilenet.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/keras/applications/mobilenet.py b/tensorflow/python/keras/applications/mobilenet.py index 29a672eed30..c59246fb8ef 100644 --- a/tensorflow/python/keras/applications/mobilenet.py +++ b/tensorflow/python/keras/applications/mobilenet.py @@ -56,9 +56,10 @@ the 100 % MobileNet on various input sizes: | 1.0 MobileNet-128 | 64.4 % | 529 | 4.2 | ------------------------------------------------------------------------ -Reference paper: - - [MobileNets: Efficient Convolutional Neural Networks for - Mobile Vision Applications](https://arxiv.org/abs/1704.04861) +Reference: + - [MobileNets: Efficient Convolutional Neural Networks + for Mobile Vision Applications]( + https://arxiv.org/abs/1704.04861) """ from __future__ import absolute_import from __future__ import division From 8c7cf628fd0d3630d3c61511c17d73e9c55b8c27 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 14 Jul 2020 23:11:59 +0300 Subject: [PATCH 0410/2522] Standardize references in MobileNet-V2 --- tensorflow/python/keras/applications/mobilenet_v2.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/keras/applications/mobilenet_v2.py b/tensorflow/python/keras/applications/mobilenet_v2.py index bc9afb47e23..1891c3ff19f 100644 --- a/tensorflow/python/keras/applications/mobilenet_v2.py +++ b/tensorflow/python/keras/applications/mobilenet_v2.py @@ -69,9 +69,9 @@ MACs stands for Multiply Adds | [mobilenet_v2_0.35_128] | 20 | 1.66 | 50.8 | 75.0 | | [mobilenet_v2_0.35_96] | 11 | 1.66 | 45.5 | 70.4 | - Reference paper: - - [MobileNetV2: Inverted Residuals and Linear Bottlenecks] - (https://arxiv.org/abs/1801.04381) (CVPR 2018) + Reference: + - [MobileNetV2: Inverted Residuals and Linear Bottlenecks]( + https://arxiv.org/abs/1801.04381) (CVPR 2018) """ from __future__ import absolute_import from __future__ import division From 42e4d3ffbab25b76d85f2ab2c0ebb25c89036a95 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 14 Jul 2020 23:17:42 +0300 Subject: [PATCH 0411/2522] Standardize references in NASNet Add cautions to Large/Mobile implementations --- tensorflow/python/keras/applications/nasnet.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/keras/applications/nasnet.py b/tensorflow/python/keras/applications/nasnet.py index cd4979ece10..595f2250879 100644 --- a/tensorflow/python/keras/applications/nasnet.py +++ b/tensorflow/python/keras/applications/nasnet.py @@ -33,9 +33,9 @@ The below table describes the performance on ImageNet 2012: | NASNet-A (6 @ 4032) | 82.7 % | 96.2 % | 23.8 B | 88.9 | -------------------------------------------------------------------------------- -Reference paper: - - [Learning Transferable Architectures for Scalable Image Recognition] - (https://arxiv.org/abs/1707.07012) (CVPR 2018) +Reference: + - [Learning Transferable Architectures for Scalable Image Recognition]( + https://arxiv.org/abs/1707.07012) (CVPR 2018) """ from __future__ import absolute_import from __future__ import division @@ -335,6 +335,9 @@ def NASNetMobile(input_shape=None, Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in your Keras config at `~/.keras/keras.json`. + + Caution: Be sure to properly pre-process your inputs to the application. + Please see `applications.nasnet.preprocess_input` for an example. Arguments: input_shape: Optional shape tuple, only to be specified @@ -403,6 +406,9 @@ def NASNetLarge(input_shape=None, Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in your Keras config at `~/.keras/keras.json`. + + Caution: Be sure to properly pre-process your inputs to the application. + Please see `applications.nasnet.preprocess_input` for an example. Arguments: input_shape: Optional shape tuple, only to be specified From 6e1f041c25a463a0952201eef8b9111ed112a64d Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 14 Jul 2020 23:22:12 +0300 Subject: [PATCH 0412/2522] Standardize references in ResNet-V2 --- tensorflow/python/keras/applications/resnet_v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/applications/resnet_v2.py b/tensorflow/python/keras/applications/resnet_v2.py index 55880bb16ad..212e25350a2 100644 --- a/tensorflow/python/keras/applications/resnet_v2.py +++ b/tensorflow/python/keras/applications/resnet_v2.py @@ -15,7 +15,7 @@ # pylint: disable=invalid-name """ResNet v2 models for Keras. -Reference paper: +Reference: - [Identity Mappings in Deep Residual Networks] (https://arxiv.org/abs/1603.05027) (CVPR 2016) """ From c2f138755681cc29ca920499eace8a5dc00b3dfb Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 14 Jul 2020 23:24:09 +0300 Subject: [PATCH 0413/2522] Add references to NASNet Large/Mobile pages --- tensorflow/python/keras/applications/nasnet.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/python/keras/applications/nasnet.py b/tensorflow/python/keras/applications/nasnet.py index 595f2250879..cb73aff2f07 100644 --- a/tensorflow/python/keras/applications/nasnet.py +++ b/tensorflow/python/keras/applications/nasnet.py @@ -331,6 +331,10 @@ def NASNetMobile(input_shape=None, pooling=None, classes=1000): """Instantiates a Mobile NASNet model in ImageNet mode. + + Reference: + - [Learning Transferable Architectures for Scalable Image Recognition]( + https://arxiv.org/abs/1707.07012) (CVPR 2018) Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is @@ -402,6 +406,10 @@ def NASNetLarge(input_shape=None, pooling=None, classes=1000): """Instantiates a NASNet model in ImageNet mode. + + Reference: + - [Learning Transferable Architectures for Scalable Image Recognition]( + https://arxiv.org/abs/1707.07012) (CVPR 2018) Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is From e9b865853d2ffb79bb9bd0d2720ba84973ad2cd2 Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Tue, 14 Jul 2020 23:25:47 +0300 Subject: [PATCH 0414/2522] Standardize references in VGG-16 --- tensorflow/python/keras/applications/vgg16.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/applications/vgg16.py b/tensorflow/python/keras/applications/vgg16.py index e0780fa9926..0d508997d0f 100644 --- a/tensorflow/python/keras/applications/vgg16.py +++ b/tensorflow/python/keras/applications/vgg16.py @@ -15,7 +15,7 @@ # pylint: disable=invalid-name """VGG16 model for Keras. -Reference paper: +Reference: - [Very Deep Convolutional Networks for Large-Scale Image Recognition] (https://arxiv.org/abs/1409.1556) (ICLR 2015) """ @@ -53,7 +53,7 @@ def VGG16( classifier_activation='softmax'): """Instantiates the VGG16 model. - Reference paper: + Reference: - [Very Deep Convolutional Networks for Large-Scale Image Recognition]( https://arxiv.org/abs/1409.1556) (ICLR 2015) From 5f2717c5996dc242ea7392be13312223a6cc62c9 Mon Sep 17 00:00:00 2001 From: Revan Sopher Date: Tue, 14 Jul 2020 13:55:04 -0700 Subject: [PATCH 0415/2522] Shorten logs for TPU Kokoro nightly build. This should drop us from 64 MB logs back to 1 MB. PiperOrigin-RevId: 321228857 Change-Id: I6a857f9eb8f24293b21148e99a231d02d4f334aa --- .../tools/ci_build/release/ubuntu_16/tpu_py37_full/nonpip.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/tpu_py37_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/tpu_py37_full/nonpip.sh index 1667316d214..327ea62208f 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/tpu_py37_full/nonpip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/tpu_py37_full/nonpip.sh @@ -40,6 +40,7 @@ tag_filters="tpu,-tpu_pod,-no_tpu,-notpu,-no_oss,-no_oss_py37" bazel_args=( --config=opt \ + --config=short_logs \ --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ --linkopt=-lrt \ --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \ From 2dc3665f48a3f0cd4ecce98d669330c749961af0 Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Tue, 14 Jul 2020 13:56:56 -0700 Subject: [PATCH 0416/2522] Add TpuProgramApiFn struct and update references to TpuProgram_CAPI to use TpuProgramApiFn(). PiperOrigin-RevId: 321229240 Change-Id: Iac5d3878a6d7d6a724ea885fb091696c6128ab11 --- tensorflow/core/tpu/kernels/BUILD | 2 + .../core/tpu/kernels/tpu_program_c_api.h | 49 +++++++++++++------ .../core/tpu/kernels/tpu_program_group.cc | 21 ++++---- tensorflow/core/tpu/tpu_api.cc | 5 ++ tensorflow/core/tpu/tpu_api.h | 2 + tensorflow/core/tpu/tpu_library_init_fns.inc | 19 +++++++ 6 files changed, 73 insertions(+), 25 deletions(-) diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index b256790a0fb..6ff0fb1df73 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -297,6 +297,7 @@ cc_library( "//tensorflow/compiler/xla/service:hlo_proto_cc", "//tensorflow/core:lib", "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", + "//tensorflow/core/tpu:tpu_api", "//tensorflow/stream_executor/tpu:proto_helper", "//tensorflow/stream_executor/tpu:status_helper", "//tensorflow/stream_executor/tpu:tpu_platform_interface", @@ -427,6 +428,7 @@ cc_library( hdrs = ["tpu_program_c_api.h"], deps = [ ":tpu_util_c_api_hdrs", + "//tensorflow/core/tpu:libtftpu_header", "//tensorflow/stream_executor/tpu:proto_helper", ], alwayslink = True, diff --git a/tensorflow/core/tpu/kernels/tpu_program_c_api.h b/tensorflow/core/tpu/kernels/tpu_program_c_api.h index 7e5ec3aeaf9..c9951e4d5ce 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_c_api.h +++ b/tensorflow/core/tpu/kernels/tpu_program_c_api.h @@ -16,6 +16,7 @@ limitations under the License. #define TENSORFLOW_CORE_TPU_KERNELS_TPU_PROGRAM_C_API_H_ #include "tensorflow/core/tpu/kernels/tpu_util_c_api.h" +#include "tensorflow/core/tpu/libtftpu.h" #include "tensorflow/stream_executor/tpu/proto_helper.h" typedef struct XLA_TpuProgram XLA_TpuProgram; @@ -23,43 +24,59 @@ typedef struct XLA_TpuProgram XLA_TpuProgram; extern "C" { // Creates a new TPU program. -XLA_TpuProgram* TpuProgram_New(); +TFTPU_CAPI_EXPORT XLA_TpuProgram* TpuProgram_New(); // Destroys the `tpu_program`. -void TpuProgram_Free(XLA_TpuProgram* tpu_program); +TFTPU_CAPI_EXPORT void TpuProgram_Free(XLA_TpuProgram* tpu_program); // Creates an array of `XLA_TpuProgram*`. -XLA_TpuProgram** TpuProgram_NewArray(size_t count); +TFTPU_CAPI_EXPORT XLA_TpuProgram** TpuProgram_NewArray(size_t count); // Destroys an array of `XLA_TpuProgram*`. -void TpuProgram_FreeArray(XLA_TpuProgram* tpu_program[]); +TFTPU_CAPI_EXPORT void TpuProgram_FreeArray(XLA_TpuProgram* tpu_program[]); // Unloads and destroys the `tpu_program`. Once the TPU program is unloaded and // destroyed, it is in an unusable state. -void TpuProgram_UnloadAndDestroy(XLA_TpuProgram* tpu_program, - SE_Status* status); +TFTPU_CAPI_EXPORT void TpuProgram_UnloadAndDestroy(XLA_TpuProgram* tpu_program, + SE_Status* status); // Gets TPU program size in bytes from the `tpu_program`. -int64_t TpuProgram_GetProgramSize(const XLA_TpuProgram* tpu_program); +TFTPU_CAPI_EXPORT int64_t +TpuProgram_GetProgramSize(const XLA_TpuProgram* tpu_program); // Logs the summary of current memory state snapshot of the `tpu_program`. -bool TpuProgram_LogProgramMemorySummary(const XLA_TpuProgram* tpu_program); +TFTPU_CAPI_EXPORT bool TpuProgram_LogProgramMemorySummary( + const XLA_TpuProgram* tpu_program); // Gets TPU program executable info from the `tpu_program`. -void TpuProgram_GetExecutableInfo(const XLA_TpuProgram* tpu_program, - TpuSerializedProto* executable_info); +TFTPU_CAPI_EXPORT void TpuProgram_GetExecutableInfo( + const XLA_TpuProgram* tpu_program, TpuSerializedProto* executable_info); // Gets host transfer info proto. -void TpuProgram_GetHostTransferInfo(const XLA_TpuProgram* tpu_program, - TpuSerializedProto* host_transfer_info); +TFTPU_CAPI_EXPORT void TpuProgram_GetHostTransferInfo( + const XLA_TpuProgram* tpu_program, TpuSerializedProto* host_transfer_info); // Gets HLO metadata proto. -void TpuProgram_GetHloMetadata(const XLA_TpuProgram* tpu_program, - TpuSerializedProto* hlo_metadata); +TFTPU_CAPI_EXPORT void TpuProgram_GetHloMetadata( + const XLA_TpuProgram* tpu_program, TpuSerializedProto* hlo_metadata); // Gets may modify variables boolean value. -void TpuProgram_GetMayModifyVariables(const XLA_TpuProgram* tpu_program, - bool* may_modify_variables); +TFTPU_CAPI_EXPORT void TpuProgram_GetMayModifyVariables( + const XLA_TpuProgram* tpu_program, bool* may_modify_variables); + +struct TfTpu_TpuProgramApiFn { + TFTPU_ADD_FN_IN_STRUCT(TpuProgram_New); + TFTPU_ADD_FN_IN_STRUCT(TpuProgram_Free); + TFTPU_ADD_FN_IN_STRUCT(TpuProgram_NewArray); + TFTPU_ADD_FN_IN_STRUCT(TpuProgram_FreeArray); + TFTPU_ADD_FN_IN_STRUCT(TpuProgram_UnloadAndDestroy); + TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetProgramSize); + TFTPU_ADD_FN_IN_STRUCT(TpuProgram_LogProgramMemorySummary); + TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetExecutableInfo); + TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetHostTransferInfo); + TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetHloMetadata); + TFTPU_ADD_FN_IN_STRUCT(TpuProgram_GetMayModifyVariables); +}; } // extern "C" diff --git a/tensorflow/core/tpu/kernels/tpu_program_group.cc b/tensorflow/core/tpu/kernels/tpu_program_group.cc index c96eb7974df..2ee9b459abd 100644 --- a/tensorflow/core/tpu/kernels/tpu_program_group.cc +++ b/tensorflow/core/tpu/kernels/tpu_program_group.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/tpu/kernels/tpu_compile.pb.h" #include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" #include "tensorflow/core/tpu/kernels/tpu_compile_op_support.h" +#include "tensorflow/core/tpu/tpu_api.h" #include "tensorflow/stream_executor/tpu/proto_helper.h" #include "tensorflow/stream_executor/tpu/status_helper.h" @@ -71,7 +72,7 @@ StatusOr> CompileAheadOfTime( for (size_t i = 0; i < count; ++i) { tpu_programs[i] = xla_tpu_programs[i]; } - TpuProgram_FreeArray(xla_tpu_programs); + TpuProgramApiFn()->TpuProgram_FreeArrayFn(xla_tpu_programs); return tpu_programs; } @@ -109,13 +110,14 @@ Status CreateTpuProgramGroup( // TODO(jiawenhao): Handle the case of xla_tpu_programs.size() > 1. bool may_modify_variables; - TpuProgram_GetMayModifyVariables(xla_tpu_programs[0], &may_modify_variables); + TpuProgramApiFn()->TpuProgram_GetMayModifyVariablesFn(xla_tpu_programs[0], + &may_modify_variables); tpu_program_group->set_may_modify_variables( std::vector(1, may_modify_variables)); TpuSerializedProto serialized_executable_info; - TpuProgram_GetExecutableInfo(xla_tpu_programs[0], - &serialized_executable_info); + TpuProgramApiFn()->TpuProgram_GetExecutableInfoFn( + xla_tpu_programs[0], &serialized_executable_info); TPUExecutableInfoProto executable_info = se_tpu::DeserializeProto( serialized_executable_info); @@ -124,8 +126,8 @@ Status CreateTpuProgramGroup( TPUHostTransferInfoProto host_transfer_info; TpuSerializedProto serialized_host_transfer_info; - TpuProgram_GetHostTransferInfo(xla_tpu_programs[0], - &serialized_host_transfer_info); + TpuProgramApiFn()->TpuProgram_GetHostTransferInfoFn( + xla_tpu_programs[0], &serialized_host_transfer_info); if (serialized_host_transfer_info.size > 0) { host_transfer_info = se_tpu::DeserializeProto( serialized_host_transfer_info); @@ -134,7 +136,8 @@ Status CreateTpuProgramGroup( tpu_program_group->set_host_transfer_info(host_transfer_info); TpuSerializedProto serialized_hlo_metadata; - TpuProgram_GetHloMetadata(xla_tpu_programs[0], &serialized_hlo_metadata); + TpuProgramApiFn()->TpuProgram_GetHloMetadataFn(xla_tpu_programs[0], + &serialized_hlo_metadata); xla::HloProto hlo_metadata = se_tpu::DeserializeProto(serialized_hlo_metadata); tpu_program_group->set_hlo_metadata(hlo_metadata); @@ -148,7 +151,7 @@ Status CreateTpuProgramGroup( int64_t TpuProgramGroup::program_size() const { int64_t total_size = 0; for (const XLA_TpuProgram* tpu_program : tpu_programs_) { - total_size += TpuProgram_GetProgramSize(tpu_program); + total_size += TpuProgramApiFn()->TpuProgram_GetProgramSizeFn(tpu_program); } return total_size; } @@ -287,7 +290,7 @@ Status TpuProgramGroup::CompileAndBuild( tpu_program_group_interface); VLOG(1) << absl::StrCat("Run CreateTpuProgramGroup completed. StatusCode: ", serialize_status.code()); - TpuProgram_FreeArray(xla_tpu_programs); + TpuProgramApiFn()->TpuProgram_FreeArrayFn(xla_tpu_programs); return serialize_status; } diff --git a/tensorflow/core/tpu/tpu_api.cc b/tensorflow/core/tpu/tpu_api.cc index 8dad82b3029..3ce7626de2b 100644 --- a/tensorflow/core/tpu/tpu_api.cc +++ b/tensorflow/core/tpu/tpu_api.cc @@ -38,6 +38,11 @@ TfTpu_CompileApiFn* CompileApiFn() { return &compile_api_fn; } +TfTpu_TpuProgramApiFn* TpuProgramApiFn() { + static TfTpu_TpuProgramApiFn tpu_program_api_fn; + return &tpu_program_api_fn; +} + TfTpu_ExecutorApiFn* ExecutorApiFn() { static TfTpu_ExecutorApiFn executor_api_fn; return &executor_api_fn; diff --git a/tensorflow/core/tpu/tpu_api.h b/tensorflow/core/tpu/tpu_api.h index c47ace6601d..3467f82a180 100644 --- a/tensorflow/core/tpu/tpu_api.h +++ b/tensorflow/core/tpu/tpu_api.h @@ -35,6 +35,8 @@ TfTpu_MeshStateApiFn* MeshStateApiFn(); TfTpu_CompileApiFn* CompileApiFn(); +TfTpu_TpuProgramApiFn* TpuProgramApiFn(); + TfTpu_ExecutorApiFn* ExecutorApiFn(); TfTpu_NodeContextApiFn* NodeContextApiFn(); diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index 2047085d121..3f084241df3 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -37,6 +37,24 @@ tensorflow::Status SetCompileStructFn(void* library_handle) { return tensorflow::Status::OK(); } +tensorflow::Status SetTpuProgramStructFn(void* library_handle) { + auto* tpu_program_fn = tensorflow::tpu::TpuProgramApiFn(); + + TFTPU_SET_FN(tpu_program_fn, TpuProgram_New); + TFTPU_SET_FN(tpu_program_fn, TpuProgram_Free); + TFTPU_SET_FN(tpu_program_fn, TpuProgram_NewArray); + TFTPU_SET_FN(tpu_program_fn, TpuProgram_FreeArray); + TFTPU_SET_FN(tpu_program_fn, TpuProgram_UnloadAndDestroy); + TFTPU_SET_FN(tpu_program_fn, TpuProgram_GetProgramSize); + TFTPU_SET_FN(tpu_program_fn, TpuProgram_LogProgramMemorySummary); + TFTPU_SET_FN(tpu_program_fn, TpuProgram_GetExecutableInfo); + TFTPU_SET_FN(tpu_program_fn, TpuProgram_GetHostTransferInfo); + TFTPU_SET_FN(tpu_program_fn, TpuProgram_GetHloMetadata); + TFTPU_SET_FN(tpu_program_fn, TpuProgram_GetMayModifyVariables); + + return tensorflow::Status::OK(); +} + tensorflow::Status SetExecutorStructFn(void* library_handle) { auto* executor_fn = tensorflow::tpu::ExecutorApiFn(); @@ -162,6 +180,7 @@ tensorflow::Status InitializeTpuStructFns(void* library_handle) { TF_RETURN_IF_ERROR(SetTpuConfigStructFns(library_handle)); TF_RETURN_IF_ERROR(SetTpuMeshStateStructFns(library_handle)); TF_RETURN_IF_ERROR(SetCompileStructFn(library_handle)); + TF_RETURN_IF_ERROR(SetTpuProgramStructFn(library_handle)); TF_RETURN_IF_ERROR(SetExecutorStructFn(library_handle)); TF_RETURN_IF_ERROR(SetTpuNodeContextStructFns(library_handle)); TF_RETURN_IF_ERROR(SetTpuUtilStructFns(library_handle)); From f2587bf5cc5c50706f24ea6f1dc4fe47d9c45fe1 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Tue, 14 Jul 2020 21:03:45 +0000 Subject: [PATCH 0417/2522] added tensor_shape_utils for TF_Tensor DebugString --- tensorflow/c/BUILD | 26 +++++++++ tensorflow/c/kernels/BUILD | 1 + tensorflow/c/kernels/ops/summary.cc | 18 ++---- tensorflow/c/kernels/summary_op.cc | 77 +++++++++++++------------ tensorflow/c/tensor_shape_utils.cc | 38 ++++++++++++ tensorflow/c/tensor_shape_utils.h | 31 ++++++++++ tensorflow/c/tensor_shape_utils_test.cc | 44 ++++++++++++++ tensorflow/c/tf_tensor.cc | 12 +--- tensorflow/c/tf_tensor.h | 3 - tensorflow/c/tf_tensor_internal.h | 2 - 10 files changed, 187 insertions(+), 65 deletions(-) create mode 100644 tensorflow/c/tensor_shape_utils.cc create mode 100644 tensorflow/c/tensor_shape_utils.h create mode 100644 tensorflow/c/tensor_shape_utils_test.cc diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD index 410fc22069f..65bad3b5de9 100644 --- a/tensorflow/c/BUILD +++ b/tensorflow/c/BUILD @@ -513,6 +513,32 @@ tf_cuda_library( alwayslink = 1, ) +tf_cuda_library( + name = "tensor_shape_utils", + srcs = [ + "tensor_shape_utils.cc", + ], + hdrs = [ + "tensor_shape_utils.h", + ], + deps = [ + ":tf_tensor", + ], + copts = tf_copts(), + visibility = ["//visibility:public"], +) + +tf_cc_test( + name = "tensor_shape_utils_test", + srcs = ["tensor_shape_utils_test.cc"], + deps = [ + ":tensor_shape_utils", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + # ----------------------------------------------------------------------------- # Tests diff --git a/tensorflow/c/kernels/BUILD b/tensorflow/c/kernels/BUILD index 7e103514645..77fbd869105 100644 --- a/tensorflow/c/kernels/BUILD +++ b/tensorflow/c/kernels/BUILD @@ -30,6 +30,7 @@ tf_kernel_library( deps = [ "//tensorflow/c:kernels", "//tensorflow/c:tf_tensor", + "//tensorflow/c:tensor_shape_utils", "//tensorflow/core:framework", ], ) diff --git a/tensorflow/c/kernels/ops/summary.cc b/tensorflow/c/kernels/ops/summary.cc index be39cd0f530..9cacda36adf 100644 --- a/tensorflow/c/kernels/ops/summary.cc +++ b/tensorflow/c/kernels/ops/summary.cc @@ -19,20 +19,14 @@ limitations under the License. static void scalar_summary_shape_inference_fn(TF_ShapeInferenceContext* ctx, TF_Status* status) { + TF_SetStatus(status, TF_OK, ""); TF_ShapeHandle* result = TF_NewShapeHandle(); - if (TF_GetCode(status) == TF_OK && - !TF_ShapeInferenceContextRankKnown(ctx, result)) { - TF_ShapeInferenceContextSetUnknownShape(ctx, status); - CHECK_EQ(TF_OK, TF_GetCode(status)) - << "Error while setting unknown shape function"; - TF_DeleteShapeHandle(result); - return; - } // Make shape handle a scalar value (empty shape) - if (TF_GetCode(status) == TF_OK) { - TF_ShapeInferenceContextSetOutput(ctx, 0, result, status); - CHECK_EQ(TF_OK, TF_GetCode(status)) - << "Error while setting shape function"; + TF_ShapeInferenceContextSetOutput(ctx, 0, result, status); + if (TF_GetCode(status) != TF_OK) { + std::ostringstream err; + err << "Error in setting output shape inference"; + TF_SetStatus(status, TF_INVALID_ARGUMENT, err.str().c_str()); } TF_DeleteShapeHandle(result); } diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index 5db4a239905..e373ef13871 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -18,36 +18,39 @@ limitations under the License. #include "tensorflow/c/kernels.h" #include "tensorflow/c/tf_tensor.h" +#include "tensorflow/c/tensor_shape_utils.h" #include "tensorflow/core/framework/selective_registration.h" #include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/framework/types.h" // Struct that stores the status and TF_Tensor inputs to the opkernel. // Used to delete tensor and status in its destructor upon kernel return. -typedef struct Params{ - TF_Tensor* tags; - TF_Tensor* values; - TF_Status* status; - Params(TF_OpKernelContext* ctx) { - status = TF_NewStatus(); - TF_GetInput(ctx, 0, &tags, status); - if (TF_GetCode(status) == TF_OK){ - TF_GetInput(ctx, 1, &values, status); +namespace { + struct Params{ + TF_Tensor* tags; + TF_Tensor* values; + TF_Status* status; + Params(TF_OpKernelContext* ctx) { + status = TF_NewStatus(); + TF_GetInput(ctx, 0, &tags, status); + if (TF_GetCode(status) == TF_OK){ + TF_GetInput(ctx, 1, &values, status); + } + }; + ~Params(){ + TF_DeleteStatus(status); + TF_DeleteTensor(tags); + TF_DeleteTensor(values); } }; - ~Params(){ - TF_DeleteStatus(status); - TF_DeleteTensor(tags); - TF_DeleteTensor(values); - } -}; +} // dummy functions used for kernel registration -static void* SummaryScalarOp_Create(TF_OpKernelConstruction* ctx) { +static void* ScalarSummaryOp_Create(TF_OpKernelConstruction* ctx) { return nullptr; } -static void SummaryScalarOp_Delete(void* kernel) { +static void ScalarSummaryOp_Delete(void* kernel) { return; } @@ -56,7 +59,7 @@ bool IsSameSize(TF_Tensor* tensor1, TF_Tensor* tensor2); static tensorflow::string SingleTag(TF_Tensor* tags); template -static void SummaryScalarOp_Compute(void* kernel, TF_OpKernelContext* ctx) { +static void ScalarSummaryOp_Compute(void* kernel, TF_OpKernelContext* ctx) { Params params(ctx); if (TF_GetCode(params.status) != TF_OK){ TF_OpKernelContext_Failure(ctx, params.status); @@ -125,41 +128,41 @@ static tensorflow::string SingleTag(TF_Tensor* tags){ } template -void RegisterSummaryScalarOpKernel() { +void RegisterScalarSummaryOpKernel() { TF_Status* status = TF_NewStatus(); { auto* builder = TF_NewKernelBuilder("ScalarSummary", tensorflow::DEVICE_CPU, - &SummaryScalarOp_Create, - &SummaryScalarOp_Compute, - &SummaryScalarOp_Delete); + &ScalarSummaryOp_Create, + &ScalarSummaryOp_Compute, + &ScalarSummaryOp_Delete); TF_KernelBuilder_TypeConstraint(builder, "T", static_cast(tensorflow::DataTypeToEnum::v()), status); CHECK_EQ(TF_OK, TF_GetCode(status)) << "Error while adding type constraint"; TF_RegisterKernelBuilder("ScalarSummary", builder, status); CHECK_EQ(TF_OK, TF_GetCode(status)) - << "Error while registering Summary Scalar kernel"; + << "Error while registering Scalar Summmary kernel"; } TF_DeleteStatus(status); } // A dummy static variable initialized by a lambda whose side-effect is to // register the bitcast kernel. -TF_ATTRIBUTE_UNUSED static bool IsSummaryScalarOpKernelRegistered = []() { - if (SHOULD_REGISTER_OP_KERNEL("SummaryScalar")) { - RegisterSummaryScalarOpKernel(); - RegisterSummaryScalarOpKernel(); - RegisterSummaryScalarOpKernel(); - RegisterSummaryScalarOpKernel(); - RegisterSummaryScalarOpKernel(); - RegisterSummaryScalarOpKernel(); - RegisterSummaryScalarOpKernel(); - RegisterSummaryScalarOpKernel(); - RegisterSummaryScalarOpKernel(); - RegisterSummaryScalarOpKernel(); - RegisterSummaryScalarOpKernel(); - RegisterSummaryScalarOpKernel(); +TF_ATTRIBUTE_UNUSED static bool IsScalarSummaryOpKernelRegistered = []() { + if (SHOULD_REGISTER_OP_KERNEL("ScalarSummary")) { + RegisterScalarSummaryOpKernel(); + RegisterScalarSummaryOpKernel(); + RegisterScalarSummaryOpKernel(); + RegisterScalarSummaryOpKernel(); + RegisterScalarSummaryOpKernel(); + RegisterScalarSummaryOpKernel(); + RegisterScalarSummaryOpKernel(); + RegisterScalarSummaryOpKernel(); + RegisterScalarSummaryOpKernel(); + RegisterScalarSummaryOpKernel(); + RegisterScalarSummaryOpKernel(); + RegisterScalarSummaryOpKernel(); } return true; }(); diff --git a/tensorflow/c/tensor_shape_utils.cc b/tensorflow/c/tensor_shape_utils.cc new file mode 100644 index 00000000000..c38eb95724c --- /dev/null +++ b/tensorflow/c/tensor_shape_utils.cc @@ -0,0 +1,38 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/c/tensor_shape_utils.h" + +#include + +#include "tensorflow/c/tf_tensor.h" +#include "tensorflow/core/platform/str_util.h" +#include "tensorflow/core/platform/strcat.h" +#include "tensorflow/core/platform/logging.h" + +std::string TF_ShapeDebugString(TF_Tensor* tensor) { + // A TF_Tensor cannot have an unknown rank + CHECK_GE(TF_NumDims(tensor), 0); + tensorflow::string s = "["; + for (int i = 0; i < TF_NumDims(tensor); ++i) { + if (i > 0) tensorflow::strings::StrAppend(&s, ","); + int64_t dim = TF_Dim(tensor, i); + // A TF_Tensor cannot have an unknown dimension + CHECK_GE(dim, 0); + tensorflow::strings::StrAppend(&s, dim); + } + tensorflow::strings::StrAppend(&s, "]"); + return s; +} \ No newline at end of file diff --git a/tensorflow/c/tensor_shape_utils.h b/tensorflow/c/tensor_shape_utils.h new file mode 100644 index 00000000000..cde929f3f4e --- /dev/null +++ b/tensorflow/c/tensor_shape_utils.h @@ -0,0 +1,31 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_C_TENSOR_SHAPE_UTILS_H_ +#define TENSORFLOW_C_TENSOR_SHAPE_UTILS_H_ + +#include + +#include "tensorflow/c/tf_tensor.h" + +// The following are utils for the shape of a TF_Tensor type. +// These functions may later be subsumed by the methods for a +// TF_TensorShape type + +// Returns a string representation of the TF_Tensor +std::string TF_ShapeDebugString(TF_Tensor* tensor); + +#endif // TENSORFLOW_C_TENSOR_SHAPE_UTILS_H_ + diff --git a/tensorflow/c/tensor_shape_utils_test.cc b/tensorflow/c/tensor_shape_utils_test.cc new file mode 100644 index 00000000000..ef1fd1e839f --- /dev/null +++ b/tensorflow/c/tensor_shape_utils_test.cc @@ -0,0 +1,44 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/c/tensor_shape_utils.h" +#include "tensorflow/c/tf_tensor_internal.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/framework/partial_tensor_shape.h" + +namespace tensorflow { + +template +void TestShapeMatch(T shape) { + Tensor tensor(DT_FLOAT, shape); + Status status; + TF_Tensor* tf_tensor = TF_TensorFromTensor(tensor, &status); + ASSERT_TRUE(status.ok()) << status.ToString(); + ASSERT_EQ(tensor.shape().DebugString(), TF_ShapeDebugString(tf_tensor)); +} + +TEST(ShapeDebugString, RegularShape) { + TestShapeMatch(TensorShape({5, 4, 7})); +} + +TEST(ShapeDebugString, ScalarShape) { + TestShapeMatch(TensorShape({})); +} + +} // namespace tensorflow diff --git a/tensorflow/c/tf_tensor.cc b/tensorflow/c/tf_tensor.cc index b4b8c772341..0feb986ce44 100644 --- a/tensorflow/c/tf_tensor.cc +++ b/tensorflow/c/tf_tensor.cc @@ -17,7 +17,6 @@ limitations under the License. #include #include -#include #include "tensorflow/c/tf_status.h" #include "tensorflow/c/tf_status_helper.h" @@ -181,11 +180,6 @@ void TF_TensorBitcastFrom(const TF_Tensor* from, TF_DataType type, Set_TF_Status_from_Status(status, cc_status); } -std::string TF_ShapeDebugString(const TF_Tensor* t){ - return tensorflow::down_cast(t->tensor) - ->ShapeDebugString(); -} - namespace tensorflow { void TensorInterface::Release() { delete this; } @@ -231,10 +225,6 @@ Status TensorInterface::BitcastFrom(const TensorInterface& from, DataType type, return tensor_.BitcastFrom(from.tensor_, type, s); } -std::string TensorInterface::ShapeDebugString() const { - return tensor_.shape().DebugString(); -} - } // namespace tensorflow // -------------------------------------------------------------------------- @@ -330,4 +320,4 @@ bool TensorInterface::IsAligned() const { return tensor_.IsAligned(); } } // namespace tensorflow -bool TF_TensorIsAligned(const TF_Tensor* t) { return t->tensor->IsAligned(); } \ No newline at end of file +bool TF_TensorIsAligned(const TF_Tensor* t) { return t->tensor->IsAligned(); } diff --git a/tensorflow/c/tf_tensor.h b/tensorflow/c/tf_tensor.h index e4953b53e43..acdf053e63a 100644 --- a/tensorflow/c/tf_tensor.h +++ b/tensorflow/c/tf_tensor.h @@ -21,7 +21,6 @@ limitations under the License. #include "tensorflow/c/tf_datatype.h" #include "tensorflow/c/tf_status.h" -#include // Macro to control visibility of exported symbols in the shared library (.so, // .dylib, .dll). @@ -152,8 +151,6 @@ TF_CAPI_EXPORT extern void TF_TensorBitcastFrom(const TF_Tensor* from, // Returns bool iff this tensor is aligned. TF_CAPI_EXPORT extern bool TF_TensorIsAligned(const TF_Tensor*); -TF_CAPI_EXPORT extern std::string TF_ShapeDebugString(const TF_Tensor*); - #ifdef __cplusplus } /* end extern "C" */ #endif diff --git a/tensorflow/c/tf_tensor_internal.h b/tensorflow/c/tf_tensor_internal.h index 036559da838..7a896dc5d11 100644 --- a/tensorflow/c/tf_tensor_internal.h +++ b/tensorflow/c/tf_tensor_internal.h @@ -17,7 +17,6 @@ limitations under the License. #define TENSORFLOW_C_TF_TENSOR_INTERNAL_H_ #include -#include #include "tensorflow/c/tensor_interface.h" #include "tensorflow/c/tf_datatype.h" @@ -105,7 +104,6 @@ class TensorInterface : public AbstractTensorInterface { void* Data() const override; bool IsAligned() const override; bool CanMove() const override; - std::string ShapeDebugString() const; Status ToTensor(tensorflow::Tensor* dst) const; Status BitcastFrom(const TensorInterface& from, DataType type, From 0257bcd418a88291e6dc4163ca0225154c3d52c3 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Tue, 14 Jul 2020 23:06:58 +0200 Subject: [PATCH 0418/2522] Fix FileIO tests for Python < 3.6 --- tensorflow/python/lib/io/file_io_test.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/lib/io/file_io_test.py b/tensorflow/python/lib/io/file_io_test.py index b2bb1bd6cf5..9b9150daa52 100644 --- a/tensorflow/python/lib/io/file_io_test.py +++ b/tensorflow/python/lib/io/file_io_test.py @@ -39,6 +39,9 @@ class PathLike(object): def __fspath__(self): return self.name + def __str__(self): + return self.name + run_all_path_types = parameterized.named_parameters( ("str", os.path.join), @@ -197,7 +200,7 @@ class FileIoTest(test.TestCase, parameterized.TestCase): dir_path = join(self._base_dir, "temp_dir/temp_dir1/temp_dir2") file_io.recursive_create_dir(dir_path) file_io.recursive_create_dir(dir_path) # repeat creation - file_path = os.path.join(dir_path, "temp_file") + file_path = os.path.join(str(dir_path), "temp_file") file_io.FileIO(file_path, mode="w").write("testing") self.assertTrue(file_io.file_exists(file_path)) file_io.delete_recursively(os.path.join(self._base_dir, "temp_dir")) @@ -271,7 +274,7 @@ class FileIoTest(test.TestCase, parameterized.TestCase): self.assertFalse(file_io.is_directory(dir_path)) file_io.create_dir(dir_path) self.assertTrue(file_io.is_directory(dir_path)) - file_path = join(dir_path, "test_file") + file_path = join(str(dir_path), "test_file") file_io.FileIO(file_path, mode="w").write("test") # False for a file. self.assertFalse(file_io.is_directory(file_path)) @@ -285,11 +288,11 @@ class FileIoTest(test.TestCase, parameterized.TestCase): file_io.create_dir(dir_path) files = ["file1.txt", "file2.txt", "file3.txt"] for name in files: - file_path = join(dir_path, name) + file_path = join(str(dir_path), name) file_io.FileIO(file_path, mode="w").write("testing") - subdir_path = join(dir_path, "sub_dir") + subdir_path = join(str(dir_path), "sub_dir") file_io.create_dir(subdir_path) - subdir_file_path = join(subdir_path, "file4.txt") + subdir_file_path = join(str(subdir_path), "file4.txt") file_io.FileIO(subdir_file_path, mode="w").write("testing") dir_list = file_io.list_directory(dir_path) self.assertItemsEqual(files + ["sub_dir"], dir_list) @@ -319,7 +322,7 @@ class FileIoTest(test.TestCase, parameterized.TestCase): def testWalkInOrder(self, join): dir_path_str = os.path.join(self._base_dir, "test_dir") dir_path = join(self._base_dir, "test_dir") - self._setupWalkDirectories(dir_path) + self._setupWalkDirectories(dir_path_str) # Now test the walk (in_order = True) all_dirs = [] all_subdirs = [] @@ -389,7 +392,7 @@ class FileIoTest(test.TestCase, parameterized.TestCase): file_path = join(self._base_dir, "temp_file") file_io.FileIO(file_path, mode="w").write("testing") file_statistics = file_io.stat(file_path) - os_statistics = os.stat(file_path) + os_statistics = os.stat(str(file_path)) self.assertEqual(7, file_statistics.length) self.assertEqual( int(os_statistics.st_mtime), int(file_statistics.mtime_nsec / 1e9)) From 2f5fbf92c8b3697285b35f9e7977825ba76e402c Mon Sep 17 00:00:00 2001 From: Bruce Fontaine Date: Tue, 14 Jul 2020 14:08:54 -0700 Subject: [PATCH 0419/2522] Split tpu_embedding_v2_test.py into two files to reduce test timeouts. PiperOrigin-RevId: 321231814 Change-Id: Id9e42801a6212509ab7ca17048252da4630e2396 --- tensorflow/python/tpu/BUILD | 28 + .../tpu/tpu_embedding_v2_correctness_test.py | 632 ++++++++++++++++++ .../python/tpu/tpu_embedding_v2_test.py | 392 ----------- 3 files changed, 660 insertions(+), 392 deletions(-) create mode 100644 tensorflow/python/tpu/tpu_embedding_v2_correctness_test.py diff --git a/tensorflow/python/tpu/BUILD b/tensorflow/python/tpu/BUILD index 96b4fda7aa4..dbe99670b99 100644 --- a/tensorflow/python/tpu/BUILD +++ b/tensorflow/python/tpu/BUILD @@ -499,6 +499,34 @@ tpu_py_test( ], ) +tpu_py_test( + name = "tpu_embedding_v2_correctness_test", + srcs = [ + "tpu_embedding_v2_correctness_test.py", + ], + disable_experimental = True, + python_version = "PY3", + shard_count = 4, + srcs_version = "PY2AND3", + deps = [ + ":tpu_embedding", + ":tpu_embedding_v2", + ":tpu_strategy_util", + "//tensorflow/python:init_ops_v2", + "//tensorflow/python:variables", + "//tensorflow/python/compat:v2_compat", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/distribute:distribute_lib", + "//tensorflow/python/distribute:tpu_strategy", + "//tensorflow/python/distribute/cluster_resolver:tpu_cluster_resolver_py", + "//tensorflow/python/eager:backprop", + "//tensorflow/python/eager:def_function", + "//tensorflow/python/eager:remote", + "//tensorflow/python/training/tracking:util", + "//third_party/py/numpy", + ], +) + tf_py_test( name = "tpu_embedding_v2_cpu_test", srcs = [ diff --git a/tensorflow/python/tpu/tpu_embedding_v2_correctness_test.py b/tensorflow/python/tpu/tpu_embedding_v2_correctness_test.py new file mode 100644 index 00000000000..afb6743cbc2 --- /dev/null +++ b/tensorflow/python/tpu/tpu_embedding_v2_correctness_test.py @@ -0,0 +1,632 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for TPU Embeddings mid level API on TPU.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import itertools +import os + +from absl import flags +from absl.testing import parameterized +import numpy as np + +from tensorflow.python.compat import v2_compat +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.distribute import distribute_lib +from tensorflow.python.distribute import distribution_strategy_context +from tensorflow.python.distribute import tpu_strategy +from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver +from tensorflow.python.eager import backprop +from tensorflow.python.eager import def_function +from tensorflow.python.eager import remote +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_math_ops +from tensorflow.python.ops import init_ops_v2 +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import test +from tensorflow.python.tpu import tpu_embedding_v2 +from tensorflow.python.tpu import tpu_embedding_v2_utils +from tensorflow.python.tpu import tpu_strategy_util +from tensorflow.python.util import nest + +FLAGS = flags.FLAGS +flags.DEFINE_string('tpu', '', 'Name of TPU to connect to.') +flags.DEFINE_string('project', None, 'Name of GCP project with TPU.') +flags.DEFINE_string('zone', None, 'Name of GCP zone with TPU.') +flags.DEFINE_string('model_dir', os.environ.get('TEST_TMPDIR'), + 'A temporary directory.') + + +class TPUEmbeddingCorrectness(parameterized.TestCase, test.TestCase): + + def setUp(self): + super(TPUEmbeddingCorrectness, self).setUp() + self.embedding_values = np.array(list(range(32)), dtype=np.float64) + self.initializer = init_ops_v2.Constant(self.embedding_values) + # Embedding for video initialized to + # 0 1 2 3 + # 4 5 6 7 + # ... + self.table_video = tpu_embedding_v2_utils.TableConfig( + vocabulary_size=8, + dim=4, + initializer=self.initializer, + combiner='sum', + name='video') + # Embedding for user initialized to + # 0 1 + # 2 3 + # 4 5 + # 6 7 + # ... + self.table_user = tpu_embedding_v2_utils.TableConfig( + vocabulary_size=16, + dim=2, + initializer=self.initializer, + combiner='mean', + name='user') + self.feature_config = ( + tpu_embedding_v2_utils.FeatureConfig( + table=self.table_video, name='watched'), + tpu_embedding_v2_utils.FeatureConfig( + table=self.table_video, name='favorited'), + tpu_embedding_v2_utils.FeatureConfig( + table=self.table_user, name='friends')) + + self.batch_size = 2 + self.data_batch_size = 4 + + # One (global) batch of inputs + # sparse tensor for watched: + # row 0: 0 + # row 1: 0, 1 + # row 2: 0, 1 + # row 3: 1 + self.feature_watched_indices = [[0, 0], [1, 0], [1, 1], + [2, 0], [2, 1], [3, 0]] + self.feature_watched_values = [0, 0, 1, 0, 1, 1] + self.feature_watched_row_lengths = [1, 2, 2, 1] + # sparse tensor for favorited: + # row 0: 0, 1 + # row 1: 1 + # row 2: 0 + # row 3: 0, 1 + self.feature_favorited_indices = [[0, 0], [0, 1], [1, 0], + [2, 0], [3, 0], [3, 1]] + self.feature_favorited_values = [0, 1, 1, 0, 0, 1] + self.feature_favorited_row_lengths = [2, 1, 1, 2] + # sparse tensor for friends: + # row 0: 3 + # row 1: 0, 1, 2 + # row 2: 3 + # row 3: 0, 1, 2 + self.feature_friends_indices = [[0, 0], [1, 0], [1, 1], [1, 2], + [2, 0], [3, 0], [3, 1], [3, 2]] + self.feature_friends_values = [3, 0, 1, 2, 3, 0, 1, 2] + self.feature_friends_row_lengths = [1, 3, 1, 3] + self.resolver = None + + def tearDown(self): + if self.resolver: + tpu_strategy_util.shutdown_tpu_system(self.resolver) + super(TPUEmbeddingCorrectness, self).tearDown() + + def _get_strategy(self): + self.resolver = tpu_cluster_resolver.TPUClusterResolver( + tpu=FLAGS.tpu, zone=FLAGS.zone, project=FLAGS.project) + remote.connect_to_cluster(self.resolver) + tpu_strategy_util.initialize_tpu_system(self.resolver) + return tpu_strategy.TPUStrategy(self.resolver) + + def _create_strategy_and_mid_level(self, optimizer_name): + strategy = self._get_strategy() + + with strategy.scope(): + if optimizer_name == 'sgd': + optimizer = tpu_embedding_v2_utils.SGD(learning_rate=0.1) + elif optimizer_name == 'adagrad': + optimizer = tpu_embedding_v2_utils.Adagrad(learning_rate=0.1) + elif optimizer_name == 'adam': + optimizer = tpu_embedding_v2_utils.Adam(learning_rate=0.1) + else: + raise ValueError('optimizer is not recognized: ', optimizer_name) + mid_level_api = self._create_mid_level(optimizer=optimizer) + + return strategy, mid_level_api, optimizer + + @parameterized.parameters( + *itertools.product( + ['sgd', 'adagrad', 'adam'], + [True, False])) + def test_embedding(self, optimizer_name, training): + strategy, mid_level_api, optimizer = ( + self._create_strategy_and_mid_level(optimizer_name)) + + dataset = self._create_sparse_dataset(strategy) + dist = strategy.experimental_distribute_dataset( + dataset, + options=distribute_lib.InputOptions( + experimental_prefetch_to_device=False)) + dist_iter = iter(dist) + + @def_function.function + def test_fn(): + + def step(): + """Create and run computation that returns the embedding activations.""" + if not training: + activations = mid_level_api.dequeue() + total_loss = _get_total_loss_tensor(activations) + ret_val = [total_loss] + list(activations) + return ret_val + else: + with backprop.GradientTape() as tape: + activations = mid_level_api.dequeue() + tape.watch(activations) + total_loss = _get_total_loss_tensor(activations) + loss_per_replica = total_loss / strategy.num_replicas_in_sync + gradients = tape.gradient(loss_per_replica, activations) + mid_level_api.apply_gradients(gradients) + ret_val = [total_loss] + list(activations) + return ret_val + + mid_level_api.enqueue(next(dist_iter), training=training) + result = strategy.run(step) + return result + + # Run model. + shard_out_val = test_fn() + + # Retrieve TPU weights to CPU. + mid_level_api._retrieve_variables() + + # Compute sparse tensors for global batch. + input_data = next(iter(self._create_sparse_dataset(strategy))) + + # Check results. + self._check_results(strategy, shard_out_val, training, input_data, + mid_level_api._variables, + optimizer) + + def _create_mid_level(self, optimizer=None): + # Create `TPUEmbedding` object. + if optimizer is None: + optimizer = tpu_embedding_v2_utils.SGD(learning_rate=0.1) + + num_replicas = ( + distribution_strategy_context.get_strategy().num_replicas_in_sync) + return tpu_embedding_v2.TPUEmbedding( + feature_config=self.feature_config, + batch_size=self.batch_size * num_replicas, + optimizer=optimizer) + + def _create_sparse_dataset(self, strategy, include_weights=False, weight=0.5): + # Create dataset for enqueue operation + sparse_features = ( + sparse_tensor.SparseTensor( + indices=self.feature_watched_indices, + values=self.feature_watched_values, + dense_shape=[self.data_batch_size, 2]), + sparse_tensor.SparseTensor( + indices=self.feature_favorited_indices, + values=self.feature_favorited_values, + dense_shape=[self.data_batch_size, 2]), + sparse_tensor.SparseTensor( + indices=self.feature_friends_indices, + values=self.feature_friends_values, + dense_shape=[self.data_batch_size, 3])) + if include_weights: + weights = [] + for sparse in sparse_features: + values = ( + array_ops.ones_like(sparse.values, dtype=dtypes.float32) * weight) + weights.append(sparse_tensor.SparseTensor( + indices=sparse.indices, + values=values, + dense_shape=sparse.dense_shape)) + sparse_features = (sparse_features, tuple(weights)) + + dataset = dataset_ops.DatasetV2.from_tensors(sparse_features) + + # Data is batched to self.data_batch_size, rebatch to global batch size. + return dataset.unbatch().repeat().batch( + self.batch_size * strategy.num_replicas_in_sync, drop_remainder=True) + + def _create_dense_input_fn(self, strategy, include_weights=False, weight=0.5): + + def input_fn(ctx): + del ctx + features = ( + constant_op.constant(self.feature_watched_values[-2:], + dtype=dtypes.int32), + constant_op.constant(self.feature_favorited_values[-2:], + dtype=dtypes.int32), + constant_op.constant(self.feature_friends_values[-2:], + dtype=dtypes.int32)) + if include_weights: + weights = [array_ops.ones_like(t, dtype=dtypes.float32) * weight + for t in features] + features = (features, tuple(weights)) + return dataset_ops.DatasetV2.from_tensors(features).repeat() + + return input_fn + + def _check_results(self, strategy, shard_out_val, training, input_data, + table_to_variable, optimizer): + num_replicas = strategy.num_replicas_in_sync + + # Unpack the values `strategy.run()` returns. + loss = _unpack(strategy, shard_out_val[0]) + activation_watched = _unpack(strategy, shard_out_val[1]) + activation_favorited = _unpack(strategy, shard_out_val[2]) + activation_friends = _unpack(strategy, shard_out_val[3]) + + # Core 0: + # Calculate the values of embedding activations. + activation_watched_gold0 = np.array([[0, 1, 2, 3], [4, 6, 8, 10]]) + activation_favorited_gold0 = np.array([[4, 6, 8, 10], [4, 5, 6, 7]]) + # Second row of `activation_friends_gold0` is the mean of the following. + # row 0: 0 1 + # row 1: 2 3 + # row 2: 4 5 + activation_friends_gold0 = np.array([[6, 7], [2, 3]]) + + loss_gold0 = _compute_loss(activation_watched_gold0, + activation_favorited_gold0, + activation_friends_gold0) + + # Add on values from other cores: + # Activations for watched are an alternating sequence of + # activation_watched_gold0 and activation_favorited_gold0. + # For favorited it is the same but in the opposite order. + activation_watched_gold = np.concatenate( + (np.concatenate((np.expand_dims(activation_watched_gold0, axis=0),) * + (num_replicas // 2)), + np.concatenate((np.expand_dims(activation_favorited_gold0, axis=0),) * + (num_replicas // 2))), + axis=1).reshape([self.batch_size * num_replicas, 4]) + activation_favorited_gold = np.concatenate( + (activation_watched_gold[self.batch_size:,], + activation_watched_gold[0:self.batch_size,])) + activation_friends_gold = np.concatenate( + (activation_friends_gold0,) * num_replicas) + + loss_gold = [loss_gold0] * num_replicas + + # Test values. + self.assertAllClose(activation_watched_gold, activation_watched) + self.assertAllClose(activation_favorited_gold, activation_favorited) + self.assertAllClose(activation_friends_gold, activation_friends) + + self.assertAllClose(loss_gold, loss) + + embedding_table_video_before = np.copy( + np.reshape(self.embedding_values, [8, 4])) + embedding_table_user_before = np.copy( + np.reshape(self.embedding_values, [16, 2])) + + global_batch_size = self.batch_size * num_replicas + if training: + gradient_wrt_watched_gold = (2 * activation_watched_gold / + global_batch_size) + gradient_wrt_favorited_gold = (2 * activation_favorited_gold / + global_batch_size) + gradient_wrt_friends_gold = (2 * activation_friends_gold / + global_batch_size) + + # Calculate gradients wrt embedding tables. + gradients_wrt_user = ( + _compute_gradients_wrt_embedding_table( + global_batch_size, gradient_wrt_friends_gold, + embedding_table_user_before, input_data[2].indices.numpy(), + input_data[2].values.numpy(), self.table_user.combiner)) + gradients_wrt_video = ( + _compute_gradients_wrt_embedding_table( + global_batch_size, gradient_wrt_favorited_gold, + embedding_table_video_before, input_data[1].indices.numpy(), + input_data[1].values.numpy(), self.table_video.combiner) + + _compute_gradients_wrt_embedding_table( + global_batch_size, gradient_wrt_watched_gold, + embedding_table_video_before, input_data[0].indices.numpy(), + input_data[0].values.numpy(), self.table_video.combiner)) + + self._check_embedding_and_slot_variables(embedding_table_user_before, + gradients_wrt_user, + embedding_table_video_before, + gradients_wrt_video, + optimizer, + table_to_variable) + + def _check_embedding_and_slot_variables(self, embedding_table_user_before, + gradients_wrt_user, + embedding_table_video_before, + gradients_wrt_video, + optimizer, + table_to_variable): + if isinstance(optimizer, tpu_embedding_v2_utils.SGD): + check_fn = self._check_embedding_and_slot_variables_for_sgd + elif isinstance(optimizer, tpu_embedding_v2_utils.Adagrad): + check_fn = self._check_embedding_and_slot_variables_for_adagrad + elif isinstance(optimizer, tpu_embedding_v2_utils.Adam): + check_fn = self._check_embedding_and_slot_variables_for_adam + else: + raise ValueError('optimizer is not recognized: ', type(optimizer)) + check_fn(embedding_table_user_before, gradients_wrt_user, + optimizer, table_to_variable[self.table_user.name]) + check_fn(embedding_table_video_before, gradients_wrt_video, + optimizer, table_to_variable[self.table_video.name]) + + def _check_embedding_and_slot_variables_for_sgd(self, embedding_table_before, + gradients, + optimizer, + variables): + embedding_table = np.copy(embedding_table_before) + embedding_table -= optimizer.learning_rate * np.sum(gradients, axis=0) + self.assertAllClose(_get_variable(variables['parameters']).numpy(), + embedding_table) + + def _check_embedding_and_slot_variables_for_adagrad(self, + embedding_table_before, + gradients, + optimizer, + variable): + embedding_table = np.copy(embedding_table_before) + accumulator = ( + optimizer.initial_accumulator_value + np.sum(gradients, axis=0)**2) + embedding_table -= ( + optimizer.learning_rate * np.sum(gradients, axis=0) / + np.sqrt(accumulator)) + self.assertAllClose(_get_variable(variable['parameters']).numpy(), + embedding_table) + self.assertAllClose(_get_variable(variable['accumulators']).numpy(), + accumulator) + + def _check_embedding_and_slot_variables_for_adam(self, embedding_table_before, + gradients, + optimizer, + variable): + embedding_table = np.copy(embedding_table_before) + g = np.sum(gradients, axis=0) + v = g**2 * (1 - optimizer.beta_2) + m = g * (1 - optimizer.beta_1) + epsilon = optimizer.epsilon + # TPU Embeddings don't have the LR decay factor for Adam. + lr_modifier = 1 + embedding_table -= ( + m * optimizer.learning_rate * lr_modifier / (np.sqrt(v) + epsilon)) + self.assertAllClose(_get_variable(variable['parameters']).numpy(), + embedding_table, rtol=1e-4) + self.assertAllClose(_get_variable(variable['momenta']).numpy(), + m, rtol=1e-4) + self.assertAllClose(_get_variable(variable['velocities']).numpy(), + v, rtol=1e-4) + + def _get_replica_numpy(self, structured, strategy, replica_id): + def select_replica(x): + x = strategy.experimental_local_results(x) + if len(x) == 1: + return x.numpy() + return x[replica_id].numpy() + return nest.map_structure(select_replica, structured) + + def test_dense_lookup(self): + strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') + + input_fn = self._create_dense_input_fn(strategy) + dist = strategy.experimental_distribute_datasets_from_function( + input_fn, + options=distribute_lib.InputOptions( + experimental_prefetch_to_device=False)) + dist_iter = iter(dist) + + @def_function.function + def test_fn(): + def step(): + return mid_level_api.dequeue() + + mid_level_api.enqueue(next(dist_iter), training=False) + return strategy.run(step) + + # Run model. + shard0 = self._get_replica_numpy(test_fn(), strategy, 0) + + # embedding_values is a linear list, so we reshape to match the correct + # shape of the corresponding table before performing the lookup. + numpy_videos = np.reshape(self.embedding_values, (8, 4)) + numpy_users = np.reshape(self.embedding_values, (16, 2)) + golden = ((numpy_videos[self.feature_watched_values[-2:]], + numpy_videos[self.feature_favorited_values[-2:]], + numpy_users[self.feature_friends_values[-2:]])) + self.assertAllClose(shard0, golden) + + def test_sequence_embeddings(self): + feature_config = ( + tpu_embedding_v2_utils.FeatureConfig( + table=self.table_video, name='watched', + max_sequence_length=2), + tpu_embedding_v2_utils.FeatureConfig( + table=self.table_video, name='favorited', + max_sequence_length=2), + tpu_embedding_v2_utils.FeatureConfig( + table=self.table_user, name='friends', + max_sequence_length=3)) + optimizer = tpu_embedding_v2_utils.SGD(learning_rate=0.1) + strategy = self._get_strategy() + num_replicas = strategy.num_replicas_in_sync + with strategy.scope(): + mid_level = tpu_embedding_v2.TPUEmbedding( + feature_config=feature_config, + batch_size=self.batch_size * num_replicas, + optimizer=optimizer) + + dataset = self._create_sparse_dataset(strategy) + data = next(iter(strategy.experimental_distribute_dataset( + dataset, + options=distribute_lib.InputOptions( + experimental_prefetch_to_device=False)))) + + @def_function.function + def embedding_and_set_gradients(data): + def tpu_fn(): + activations = mid_level.dequeue() + mid_level.apply_gradients(nest.map_structure(array_ops.ones_like, + activations)) + return activations + mid_level.enqueue(data) + return strategy.run(tpu_fn) + + @def_function.function + def embedding_only(data): + def tpu_fn(): + return mid_level.dequeue() + mid_level.enqueue(data) + return strategy.run(tpu_fn) + + # Only check core 0. + before_update = self._get_replica_numpy( + embedding_and_set_gradients(data), strategy, 0) + after_update = self._get_replica_numpy(embedding_only(data), strategy, 0) + + # For videos table, row 0 and row 1 are looked up 3*num_replicas times as + # they occur 3 times per replica (considering the features 0 and 1 which are + # both looked up in the videos table). + # Feature 0 has ids [0, 0, 1], [0, 1, 1], ... repeated over num_replicas + # Feature 1 has ids [0, 1, 1], [0, 0, 1], ... repeated over num_replicas + # This means that both rows 0 and 1 get a -0.1*3*num_replicas update + # For users table, each row is looked up twice: + # Feature 2 has ids [3, 0, 1, 2], .. repeated over num_replicas + # This means that we get a -0.1*num_replicas update to the third feature. + + # In general this means that after the update, if we lookup feature 0 and 1 + # the values will be 0.3*num_replicas lower per entry and for feature 2 they + # will be 0.1*num_replicas lower. + # The one issue that that these lookups contain padding values. + # For core 0, we get the first 2 elements of the 4 element batch. + # For feature 0, the indices are [[0, 0], [1, 0], [1, 1]] with max sequence + # length of 2, which means that [0, 1] will be 0s. + # For feature 1, the indices are [[0, 0], [0, 1], [1, 0]] with max sequence + # length of 2, which means that [1, 1] will be 0s. + # For feature 2, the indices are [[0, 0], [1, 0], [1, 1], [1, 2]] with max + # sequence length of 3, which means that [0, 1], [0, 2] will be 0s. + # The following masks represent that so that we only apply the above updates + # to the non-padding rows: + masks = ( + np.array([[[1], [0]], [[1], [1]]]), + np.array([[[1], [1]], [[1], [0]]]), + np.array([[[1], [0], [0]], [[1], [1], [1]]])) + + per_row_update = (0.3 * num_replicas, + 0.3 * num_replicas, + 0.1 * num_replicas) + golden = tuple([before - update * mask for before, update, mask in + zip(before_update, per_row_update, masks)]) + self.assertAllClose(golden, after_update) + + +def _compute_gradients_wrt_embedding_table(batch_size, + gradient_wrt_activation, + embedding_table, + feature_indices, + feature_values, + combiner, + max_sequence_length=0): + """Compute gradients wrt embedding_table. + + Args: + batch_size: `int`, batch size. + gradient_wrt_activation: `np.array` with shape `batch_size` by + embedding `dimension`. + embedding_table: `np.array` with shape `vocabulary_size` by embedding + `dimension`. + feature_indices: `indices` as used to construct `SparseTensor`. + feature_values: `values` as used to construct `SparseTensor`. + combiner: `String`, 'mean' or 'sum'. + max_sequence_length: If non-zero, a sequence feature with the given length. + + Returns: + Gradients wrt `embedding_table`, an `np.array`s with shape + `batch_size` by `vocabulary_size` by + embedding `dimension`. + + Raises: + ValueError: if `combiner` is not one of 'mean' or 'sum'. + """ + if combiner not in ('mean', 'sum'): + raise ValueError('`combiner` must be mean or sum; got {}.'.format(combiner)) + grads = [] + for i in range(batch_size): + grad = np.zeros_like(embedding_table) + count = 0 + for (batch_i, seq_index), vocabulary_id in zip(feature_indices, + feature_values): + if batch_i == i: + count += 1 + if max_sequence_length > 0: + if seq_index < max_sequence_length: + grad[vocabulary_id, :] += gradient_wrt_activation[i, seq_index, :] + else: + grad[vocabulary_id, :] += gradient_wrt_activation[i, :] + if combiner == 'mean' and not max_sequence_length: + grad = grad / count + grads.append(grad) + return np.stack(grads) + + +def _unpack(strategy, per_replica_output): + per_replica_output = strategy.experimental_local_results(per_replica_output) + per_replica_output = array_ops.concat(per_replica_output, axis=0).numpy() + return per_replica_output + + +def _get_total_loss_tensor(activations): + losses = [] + for activation in activations: + losses.append( + math_ops.reduce_mean( + math_ops.reduce_sum( + gen_math_ops.squared_difference(activation, 0), 1))) + total_loss = array_ops.expand_dims_v2(sum(losses), 0) + return total_loss + + +def _compute_loss(activation_watched, activation_favorited, activation_friends): + watched_loss = np.mean(np.sum(activation_watched**2, axis=1)) + if len(activation_favorited.shape) == 2: + favorited_loss = np.mean(np.sum(activation_favorited**2, axis=1)) + else: + favorited_loss = np.mean(np.sum(activation_favorited**2, axis=(1, 2))) + if len(activation_friends.shape) == 2: + friends_loss = np.mean(np.sum(activation_friends**2, axis=1)) + else: + friends_loss = np.mean(np.sum(activation_friends**2, axis=(1, 2))) + loss = watched_loss + favorited_loss + friends_loss + return loss + + +def _get_variable(variable): + if isinstance(variable, tpu_embedding_v2.TPUShardedVariable): + return variable.variables[0] + return variable + + +if __name__ == '__main__': + v2_compat.enable_v2_behavior() + test.main() diff --git a/tensorflow/python/tpu/tpu_embedding_v2_test.py b/tensorflow/python/tpu/tpu_embedding_v2_test.py index ff09085f3f1..21c10e8dc6a 100644 --- a/tensorflow/python/tpu/tpu_embedding_v2_test.py +++ b/tensorflow/python/tpu/tpu_embedding_v2_test.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function import functools -import itertools import os from absl import flags @@ -969,60 +968,6 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): return strategy, mid_level_api, optimizer - @parameterized.parameters( - *itertools.product( - ['sgd', 'adagrad', 'adam'], - [True, False])) - def test_embedding(self, optimizer_name, training): - strategy, mid_level_api, optimizer = ( - self._create_strategy_and_mid_level(optimizer_name)) - - dataset = self._create_sparse_dataset(strategy) - dist = strategy.experimental_distribute_dataset( - dataset, - options=distribute_lib.InputOptions( - experimental_prefetch_to_device=False)) - dist_iter = iter(dist) - - @def_function.function - def test_fn(): - - def step(): - """Create and run computation that returns the embedding activations.""" - if not training: - activations = mid_level_api.dequeue() - total_loss = _get_total_loss_tensor(activations) - ret_val = [total_loss] + list(activations) - return ret_val - else: - with backprop.GradientTape() as tape: - activations = mid_level_api.dequeue() - tape.watch(activations) - total_loss = _get_total_loss_tensor(activations) - loss_per_replica = total_loss / strategy.num_replicas_in_sync - gradients = tape.gradient(loss_per_replica, activations) - mid_level_api.apply_gradients(gradients) - ret_val = [total_loss] + list(activations) - return ret_val - - mid_level_api.enqueue(next(dist_iter), training=training) - result = strategy.run(step) - return result - - # Run model. - shard_out_val = test_fn() - - # Retrieve TPU weights to CPU. - mid_level_api._retrieve_variables() - - # Compute sparse tensors for global batch. - input_data = next(iter(self._create_sparse_dataset(strategy))) - - # Check results. - self._check_results(strategy, shard_out_val, training, input_data, - mid_level_api._variables, - optimizer) - def _create_mid_level(self, optimizer=None): # Create `TPUEmbedding` object. if optimizer is None: @@ -1111,156 +1056,6 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): return input_fn - def _check_results(self, strategy, shard_out_val, training, input_data, - table_to_variable, optimizer): - num_replicas = strategy.num_replicas_in_sync - - # Unpack the values `strategy.run()` returns. - loss = _unpack(strategy, shard_out_val[0]) - activation_watched = _unpack(strategy, shard_out_val[1]) - activation_favorited = _unpack(strategy, shard_out_val[2]) - activation_friends = _unpack(strategy, shard_out_val[3]) - - # Core 0: - # Calculate the values of embedding activations. - activation_watched_gold0 = np.array([[0, 1, 2, 3], [4, 6, 8, 10]]) - activation_favorited_gold0 = np.array([[4, 6, 8, 10], [4, 5, 6, 7]]) - # Second row of `activation_friends_gold0` is the mean of the following. - # row 0: 0 1 - # row 1: 2 3 - # row 2: 4 5 - activation_friends_gold0 = np.array([[6, 7], [2, 3]]) - - loss_gold0 = _compute_loss(activation_watched_gold0, - activation_favorited_gold0, - activation_friends_gold0) - - # Add on values from other cores: - # Activations for watched are an alternating sequence of - # activation_watched_gold0 and activation_favorited_gold0. - # For favorited it is the same but in the opposite order. - activation_watched_gold = np.concatenate( - (np.concatenate((np.expand_dims(activation_watched_gold0, axis=0),) * - (num_replicas // 2)), - np.concatenate((np.expand_dims(activation_favorited_gold0, axis=0),) * - (num_replicas // 2))), - axis=1).reshape([self.batch_size * num_replicas, 4]) - activation_favorited_gold = np.concatenate( - (activation_watched_gold[self.batch_size:,], - activation_watched_gold[0:self.batch_size,])) - activation_friends_gold = np.concatenate( - (activation_friends_gold0,) * num_replicas) - - loss_gold = [loss_gold0] * num_replicas - - # Test values. - self.assertAllClose(activation_watched_gold, activation_watched) - self.assertAllClose(activation_favorited_gold, activation_favorited) - self.assertAllClose(activation_friends_gold, activation_friends) - - self.assertAllClose(loss_gold, loss) - - embedding_table_video_before = np.copy( - np.reshape(self.embedding_values, [8, 4])) - embedding_table_user_before = np.copy( - np.reshape(self.embedding_values, [16, 2])) - - global_batch_size = self.batch_size * num_replicas - if training: - gradient_wrt_watched_gold = (2 * activation_watched_gold / - global_batch_size) - gradient_wrt_favorited_gold = (2 * activation_favorited_gold / - global_batch_size) - gradient_wrt_friends_gold = (2 * activation_friends_gold / - global_batch_size) - - # Calculate gradients wrt embedding tables. - gradients_wrt_user = ( - _compute_gradients_wrt_embedding_table( - global_batch_size, gradient_wrt_friends_gold, - embedding_table_user_before, input_data[2].indices.numpy(), - input_data[2].values.numpy(), self.table_user.combiner)) - gradients_wrt_video = ( - _compute_gradients_wrt_embedding_table( - global_batch_size, gradient_wrt_favorited_gold, - embedding_table_video_before, input_data[1].indices.numpy(), - input_data[1].values.numpy(), self.table_video.combiner) + - _compute_gradients_wrt_embedding_table( - global_batch_size, gradient_wrt_watched_gold, - embedding_table_video_before, input_data[0].indices.numpy(), - input_data[0].values.numpy(), self.table_video.combiner)) - - self._check_embedding_and_slot_variables(embedding_table_user_before, - gradients_wrt_user, - embedding_table_video_before, - gradients_wrt_video, - optimizer, - table_to_variable) - - def _check_embedding_and_slot_variables(self, embedding_table_user_before, - gradients_wrt_user, - embedding_table_video_before, - gradients_wrt_video, - optimizer, - table_to_variable): - if isinstance(optimizer, tpu_embedding_v2_utils.SGD): - check_fn = self._check_embedding_and_slot_variables_for_sgd - elif isinstance(optimizer, tpu_embedding_v2_utils.Adagrad): - check_fn = self._check_embedding_and_slot_variables_for_adagrad - elif isinstance(optimizer, tpu_embedding_v2_utils.Adam): - check_fn = self._check_embedding_and_slot_variables_for_adam - else: - raise ValueError('optimizer is not recognized: ', type(optimizer)) - check_fn(embedding_table_user_before, gradients_wrt_user, - optimizer, table_to_variable[self.table_user.name]) - check_fn(embedding_table_video_before, gradients_wrt_video, - optimizer, table_to_variable[self.table_video.name]) - - def _check_embedding_and_slot_variables_for_sgd(self, embedding_table_before, - gradients, - optimizer, - variables): - embedding_table = np.copy(embedding_table_before) - embedding_table -= optimizer.learning_rate * np.sum(gradients, axis=0) - self.assertAllClose(_get_variable(variables['parameters']).numpy(), - embedding_table) - - def _check_embedding_and_slot_variables_for_adagrad(self, - embedding_table_before, - gradients, - optimizer, - variable): - embedding_table = np.copy(embedding_table_before) - accumulator = ( - optimizer.initial_accumulator_value + np.sum(gradients, axis=0)**2) - embedding_table -= ( - optimizer.learning_rate * np.sum(gradients, axis=0) / - np.sqrt(accumulator)) - self.assertAllClose(_get_variable(variable['parameters']).numpy(), - embedding_table) - self.assertAllClose(_get_variable(variable['accumulators']).numpy(), - accumulator) - - def _check_embedding_and_slot_variables_for_adam(self, embedding_table_before, - gradients, - optimizer, - variable): - embedding_table = np.copy(embedding_table_before) - g = np.sum(gradients, axis=0) - v = g**2 * (1 - optimizer.beta_2) - m = g * (1 - optimizer.beta_1) - epsilon = optimizer.epsilon - # TPU Embeddings don't have the LR decay factor for Adam. - lr_modifier = 1 - embedding_table -= ( - m * optimizer.learning_rate * lr_modifier / (np.sqrt(v) + epsilon)) - self.assertAllClose(_get_variable(variable['parameters']).numpy(), - embedding_table, rtol=1e-4) - self.assertAllClose(_get_variable(variable['momenta']).numpy(), - m, rtol=1e-4) - self.assertAllClose(_get_variable(variable['velocities']).numpy(), - v, rtol=1e-4) - def _get_replica_numpy(self, structured, strategy, replica_id): def select_replica(x): x = strategy.experimental_local_results(x) @@ -1269,36 +1064,6 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): return x[replica_id].numpy() return nest.map_structure(select_replica, structured) - def test_dense_lookup(self): - strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd') - - input_fn = self._create_dense_input_fn(strategy) - dist = strategy.experimental_distribute_datasets_from_function( - input_fn, - options=distribute_lib.InputOptions( - experimental_prefetch_to_device=False)) - dist_iter = iter(dist) - - @def_function.function - def test_fn(): - def step(): - return mid_level_api.dequeue() - - mid_level_api.enqueue(next(dist_iter), training=False) - return strategy.run(step) - - # Run model. - shard0 = self._get_replica_numpy(test_fn(), strategy, 0) - - # embedding_values is a linear list, so we reshape to match the correct - # shape of the corresponding table before performing the lookup. - numpy_videos = np.reshape(self.embedding_values, (8, 4)) - numpy_users = np.reshape(self.embedding_values, (16, 2)) - golden = ((numpy_videos[self.feature_watched_values[-2:]], - numpy_videos[self.feature_favorited_values[-2:]], - numpy_users[self.feature_friends_values[-2:]])) - self.assertAllClose(shard0, golden) - def test_variable_learning_rate(self): num_steps = 10 num_steps_float = float(num_steps) @@ -1447,138 +1212,6 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase): batch_size=self.batch_size*num_replicas, optimizer=optimizer) - def test_sequence_embeddings(self): - feature_config = ( - tpu_embedding_v2_utils.FeatureConfig( - table=self.table_video, name='watched', - max_sequence_length=2), - tpu_embedding_v2_utils.FeatureConfig( - table=self.table_video, name='favorited', - max_sequence_length=2), - tpu_embedding_v2_utils.FeatureConfig( - table=self.table_user, name='friends', - max_sequence_length=3)) - optimizer = tpu_embedding_v2_utils.SGD(learning_rate=0.1) - strategy = self._get_strategy() - num_replicas = strategy.num_replicas_in_sync - with strategy.scope(): - mid_level = tpu_embedding_v2.TPUEmbedding( - feature_config=feature_config, - batch_size=self.batch_size * num_replicas, - optimizer=optimizer) - - dataset = self._create_sparse_dataset(strategy) - data = next(iter(strategy.experimental_distribute_dataset( - dataset, - options=distribute_lib.InputOptions( - experimental_prefetch_to_device=False)))) - - @def_function.function - def embedding_and_set_gradients(data): - def tpu_fn(): - activations = mid_level.dequeue() - mid_level.apply_gradients(nest.map_structure(array_ops.ones_like, - activations)) - return activations - mid_level.enqueue(data) - return strategy.run(tpu_fn) - - @def_function.function - def embedding_only(data): - def tpu_fn(): - return mid_level.dequeue() - mid_level.enqueue(data) - return strategy.run(tpu_fn) - - # Only check core 0. - before_update = self._get_replica_numpy( - embedding_and_set_gradients(data), strategy, 0) - after_update = self._get_replica_numpy(embedding_only(data), strategy, 0) - - # For videos table, row 0 and row 1 are looked up 3*num_replicas times as - # they occur 3 times per replica (considering the features 0 and 1 which are - # both looked up in the videos table). - # Feature 0 has ids [0, 0, 1], [0, 1, 1], ... repeated over num_replicas - # Feature 1 has ids [0, 1, 1], [0, 0, 1], ... repeated over num_replicas - # This means that both rows 0 and 1 get a -0.1*3*num_replicas update - # For users table, each row is looked up twice: - # Feature 2 has ids [3, 0, 1, 2], .. repeated over num_replicas - # This means that we get a -0.1*num_replicas update to the third feature. - - # In general this means that after the update, if we lookup feature 0 and 1 - # the values will be 0.3*num_replicas lower per entry and for feature 2 they - # will be 0.1*num_replicas lower. - # The one issue that that these lookups contain padding values. - # For core 0, we get the first 2 elements of the 4 element batch. - # For feature 0, the indices are [[0, 0], [1, 0], [1, 1]] with max sequence - # length of 2, which means that [0, 1] will be 0s. - # For feature 1, the indices are [[0, 0], [0, 1], [1, 0]] with max sequence - # length of 2, which means that [1, 1] will be 0s. - # For feature 2, the indices are [[0, 0], [1, 0], [1, 1], [1, 2]] with max - # sequence length of 3, which means that [0, 1], [0, 2] will be 0s. - # The following masks represent that so that we only apply the above updates - # to the non-padding rows: - masks = ( - np.array([[[1], [0]], [[1], [1]]]), - np.array([[[1], [1]], [[1], [0]]]), - np.array([[[1], [0], [0]], [[1], [1], [1]]])) - - per_row_update = (0.3 * num_replicas, - 0.3 * num_replicas, - 0.1 * num_replicas) - golden = tuple([before - update * mask for before, update, mask in - zip(before_update, per_row_update, masks)]) - self.assertAllClose(golden, after_update) - - -def _compute_gradients_wrt_embedding_table(batch_size, - gradient_wrt_activation, - embedding_table, - feature_indices, - feature_values, - combiner, - max_sequence_length=0): - """Compute gradients wrt embedding_table. - - Args: - batch_size: `int`, batch size. - gradient_wrt_activation: `np.array` with shape `batch_size` by - embedding `dimension`. - embedding_table: `np.array` with shape `vocabulary_size` by embedding - `dimension`. - feature_indices: `indices` as used to construct `SparseTensor`. - feature_values: `values` as used to construct `SparseTensor`. - combiner: `String`, 'mean' or 'sum'. - max_sequence_length: If non-zero, a sequence feature with the given length. - - Returns: - Gradients wrt `embedding_table`, an `np.array`s with shape - `batch_size` by `vocabulary_size` by - embedding `dimension`. - - Raises: - ValueError: if `combiner` is not one of 'mean' or 'sum'. - """ - if combiner not in ('mean', 'sum'): - raise ValueError('`combiner` must be mean or sum; got {}.'.format(combiner)) - grads = [] - for i in range(batch_size): - grad = np.zeros_like(embedding_table) - count = 0 - for (batch_i, seq_index), vocabulary_id in zip(feature_indices, - feature_values): - if batch_i == i: - count += 1 - if max_sequence_length > 0: - if seq_index < max_sequence_length: - grad[vocabulary_id, :] += gradient_wrt_activation[i, seq_index, :] - else: - grad[vocabulary_id, :] += gradient_wrt_activation[i, :] - if combiner == 'mean' and not max_sequence_length: - grad = grad / count - grads.append(grad) - return np.stack(grads) - def _unpack(strategy, per_replica_output): per_replica_output = strategy.experimental_local_results(per_replica_output) @@ -1586,31 +1219,6 @@ def _unpack(strategy, per_replica_output): return per_replica_output -def _get_total_loss_tensor(activations): - losses = [] - for activation in activations: - losses.append( - math_ops.reduce_mean( - math_ops.reduce_sum( - gen_math_ops.squared_difference(activation, 0), 1))) - total_loss = array_ops.expand_dims_v2(sum(losses), 0) - return total_loss - - -def _compute_loss(activation_watched, activation_favorited, activation_friends): - watched_loss = np.mean(np.sum(activation_watched**2, axis=1)) - if len(activation_favorited.shape) == 2: - favorited_loss = np.mean(np.sum(activation_favorited**2, axis=1)) - else: - favorited_loss = np.mean(np.sum(activation_favorited**2, axis=(1, 2))) - if len(activation_friends.shape) == 2: - friends_loss = np.mean(np.sum(activation_friends**2, axis=1)) - else: - friends_loss = np.mean(np.sum(activation_friends**2, axis=(1, 2))) - loss = watched_loss + favorited_loss + friends_loss - return loss - - def _get_tmpdir(name, subdir=''): segments = [FLAGS.model_dir, name] + ([subdir] if subdir else []) return os.path.join(*segments) From 29809909b2191d35f86f111812b6c6b18f24f5ee Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Tue, 14 Jul 2020 14:24:56 -0700 Subject: [PATCH 0420/2522] Enable kernel_util.h tensor getters to use the new TfLiteContext API. With upcoming changes to TFLM for reducing runtime RAM, tensor data can be accessed via the new function pointer recently added to TfLiteContext. This new API enables runtimes to manage tensor overhead based on requirements for the platform. This change simply points existing API calls used by TFL and TFLM kernels to get TfLiteTensor structs to the new function pointer if it exists. PiperOrigin-RevId: 321235071 Change-Id: Ib749aa06406f9eee2566fdf5258b4ba3f95ea40b --- tensorflow/lite/c/common.h | 7 ++--- tensorflow/lite/core/subgraph.cc | 3 +++ .../delegates/coreml/builders/util_test.cc | 2 +- tensorflow/lite/kernels/kernel_util.h | 27 +++++++++++++++---- tensorflow/lite/micro/micro_interpreter.cc | 7 +++++ tensorflow/lite/micro/micro_interpreter.h | 3 +++ tensorflow/lite/micro/testing/test_utils.cc | 8 ++++++ .../benchmark/experimental/c/c_api_types.h | 7 ++--- 8 files changed, 52 insertions(+), 12 deletions(-) diff --git a/tensorflow/lite/c/common.h b/tensorflow/lite/c/common.h index cd6eeec4da2..f5ce5f78dde 100644 --- a/tensorflow/lite/c/common.h +++ b/tensorflow/lite/c/common.h @@ -764,13 +764,14 @@ typedef struct TfLiteContext { // Returns a TfLiteTensor struct for a given index in the subgraph. // WARNING: This is an experimental interface that is subject to change. // WARNING: This method may not be available on all platforms. - TfLiteTensor* (*GetTensor)(struct TfLiteContext* context, int subgraph_idx); + TfLiteTensor* (*GetTensor)(const struct TfLiteContext* context, + int tensor_idx); // Returns a TfLiteEvalTensor struct for a given index in the subgraph. // WARNING: This is an experimental interface that is subject to change. // WARNING: This method may not be available on all platforms. - TfLiteEvalTensor* (*GetEvalTensor)(struct TfLiteContext* context, - int subgraph_idx); + TfLiteEvalTensor* (*GetEvalTensor)(const struct TfLiteContext* context, + int tensor_idx); } TfLiteContext; typedef struct TfLiteRegistration { diff --git a/tensorflow/lite/core/subgraph.cc b/tensorflow/lite/core/subgraph.cc index 5ef9b45514b..b087ae1901c 100644 --- a/tensorflow/lite/core/subgraph.cc +++ b/tensorflow/lite/core/subgraph.cc @@ -189,6 +189,7 @@ Subgraph::Subgraph(ErrorReporter* error_reporter, next_execution_plan_index_to_plan_allocation_(0), subgraphs_(subgraphs), resources_(resources) { + // TODO(b/161272052): Consider a better TfLiteContext initialization pattern: context_.impl_ = static_cast(this); context_.ResizeTensor = ResizeTensor; context_.ReportError = ReportErrorC; @@ -200,6 +201,8 @@ Subgraph::Subgraph(ErrorReporter* error_reporter, context_.GetExternalContext = GetExternalContext; context_.SetExternalContext = SetExternalContext; context_.profiler = nullptr; + context_.GetTensor = nullptr; + context_.GetEvalTensor = nullptr; // Reserve some space for the tensors to avoid excessive resizing. tensors_.reserve(kTensorsReservedCapacity); diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/util_test.cc b/tensorflow/lite/experimental/delegates/coreml/builders/util_test.cc index 929bc4a2282..8ba8a9bb5bc 100644 --- a/tensorflow/lite/experimental/delegates/coreml/builders/util_test.cc +++ b/tensorflow/lite/experimental/delegates/coreml/builders/util_test.cc @@ -62,7 +62,7 @@ class IsBinaryOpSupportedTest : public testing::Test { } } - TfLiteContext context_; + TfLiteContext context_ = {}; TfLiteNode node_; std::vector tensors_; }; diff --git a/tensorflow/lite/kernels/kernel_util.h b/tensorflow/lite/kernels/kernel_util.h index 4660631dded..92c8549ce5e 100644 --- a/tensorflow/lite/kernels/kernel_util.h +++ b/tensorflow/lite/kernels/kernel_util.h @@ -30,19 +30,32 @@ inline int SizeOfDimension(const TfLiteTensor* t, int dim) { } inline const TfLiteTensor* GetInput(const TfLiteContext* context, const TfLiteNode* node, int index) { - return &context->tensors[node->inputs->data[index]]; + if (context->GetTensor != nullptr) { + return context->GetTensor(context, node->inputs->data[index]); + } else { + return &context->tensors[node->inputs->data[index]]; + } } // Note: You must check if result is not null: // TfLiteTensor* my_tensor = GetVariableInput(context, node, kMyTensorIdx); // TF_LITE_ENSURE(context, my_tensor != nullptr); inline TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node, int index) { - TfLiteTensor* tensor = &context->tensors[node->inputs->data[index]]; - return (tensor->is_variable) ? tensor : nullptr; + TfLiteTensor* tensor = nullptr; + if (context->GetTensor != nullptr) { + tensor = context->GetTensor(context, node->inputs->data[index]); + } else { + tensor = &context->tensors[node->inputs->data[index]]; + } + return (tensor != nullptr && tensor->is_variable) ? tensor : nullptr; } inline TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node, int index) { - return &context->tensors[node->outputs->data[index]]; + if (context->GetTensor != nullptr) { + return context->GetTensor(context, node->outputs->data[index]); + } else { + return &context->tensors[node->outputs->data[index]]; + } } #ifndef TF_LITE_STATIC_MEMORY inline TfLiteTensor* GetTemporary(TfLiteContext* context, @@ -78,7 +91,11 @@ inline const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context, const bool use_tensor = index < node->inputs->size && node->inputs->data[index] != kTfLiteOptionalTensor; if (use_tensor) { - return &context->tensors[node->inputs->data[index]]; + if (context->GetTensor != nullptr) { + return context->GetTensor(context, node->inputs->data[index]); + } else { + return &context->tensors[node->inputs->data[index]]; + } } return nullptr; } diff --git a/tensorflow/lite/micro/micro_interpreter.cc b/tensorflow/lite/micro/micro_interpreter.cc index c16ede174aa..a47dba83c6b 100644 --- a/tensorflow/lite/micro/micro_interpreter.cc +++ b/tensorflow/lite/micro/micro_interpreter.cc @@ -70,6 +70,12 @@ void ContextHelper::ReportOpError(struct TfLiteContext* context, va_end(args); } +TfLiteTensor* ContextHelper::GetTensor(const struct TfLiteContext* context, + int tensor_idx) { + // TODO(b/160894903): Return this value from temp allocated memory. + return &context->tensors[tensor_idx]; +} + } // namespace internal MicroInterpreter::MicroInterpreter(const Model* model, @@ -132,6 +138,7 @@ void MicroInterpreter::Init(tflite::Profiler* profiler) { context_.impl_ = static_cast(&context_helper_); context_.ReportError = context_helper_.ReportOpError; + context_.GetTensor = context_helper_.GetTensor; context_.recommended_num_threads = 1; context_.profiler = profiler; diff --git a/tensorflow/lite/micro/micro_interpreter.h b/tensorflow/lite/micro/micro_interpreter.h index 29377e3b940..6e9e5eca572 100644 --- a/tensorflow/lite/micro/micro_interpreter.h +++ b/tensorflow/lite/micro/micro_interpreter.h @@ -53,6 +53,9 @@ class ContextHelper { static void ReportOpError(struct TfLiteContext* context, const char* format, ...); + static TfLiteTensor* GetTensor(const struct TfLiteContext* context, + int tensor_idx); + void SetNodeIndex(int idx) { current_node_idx_ = idx; } private: diff --git a/tensorflow/lite/micro/testing/test_utils.cc b/tensorflow/lite/micro/testing/test_utils.cc index 4471b2e2929..8860d66efa9 100644 --- a/tensorflow/lite/micro/testing/test_utils.cc +++ b/tensorflow/lite/micro/testing/test_utils.cc @@ -87,6 +87,11 @@ void* GetScratchBuffer(TfLiteContext* context, int buffer_index) { return scratch_buffers_[buffer_index]; } +TfLiteTensor* GetTensor(const struct TfLiteContext* context, int subgraph_idx) { + // TODO(b/160894903): Return this value from temp allocated memory. + return &context->tensors[subgraph_idx]; +} + } // namespace uint8_t F2Q(float value, float min, float max) { @@ -137,6 +142,9 @@ void PopulateContext(TfLiteTensor* tensors, int tensors_size, context->GetExternalContext = nullptr; context->SetExternalContext = nullptr; + context->GetTensor = GetTensor; + context->GetEvalTensor = nullptr; + context->AllocatePersistentBuffer = AllocatePersistentBuffer; context->RequestScratchBufferInArena = RequestScratchBufferInArena; context->GetScratchBuffer = GetScratchBuffer; diff --git a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h index cd6eeec4da2..f5ce5f78dde 100644 --- a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h +++ b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h @@ -764,13 +764,14 @@ typedef struct TfLiteContext { // Returns a TfLiteTensor struct for a given index in the subgraph. // WARNING: This is an experimental interface that is subject to change. // WARNING: This method may not be available on all platforms. - TfLiteTensor* (*GetTensor)(struct TfLiteContext* context, int subgraph_idx); + TfLiteTensor* (*GetTensor)(const struct TfLiteContext* context, + int tensor_idx); // Returns a TfLiteEvalTensor struct for a given index in the subgraph. // WARNING: This is an experimental interface that is subject to change. // WARNING: This method may not be available on all platforms. - TfLiteEvalTensor* (*GetEvalTensor)(struct TfLiteContext* context, - int subgraph_idx); + TfLiteEvalTensor* (*GetEvalTensor)(const struct TfLiteContext* context, + int tensor_idx); } TfLiteContext; typedef struct TfLiteRegistration { From 97b946227cfd6a9d1fdc786cedeb09674d7221c4 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Tue, 14 Jul 2020 21:35:35 +0000 Subject: [PATCH 0421/2522] added tensor_shape_utils_tests --- tensorflow/c/kernels/summary_op.cc | 6 ++-- tensorflow/c/tf_shape_utils_test.cc | 43 +++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 tensorflow/c/tf_shape_utils_test.cc diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index e373ef13871..10b46284814 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -26,18 +26,18 @@ limitations under the License. // Struct that stores the status and TF_Tensor inputs to the opkernel. // Used to delete tensor and status in its destructor upon kernel return. namespace { - struct Params{ + struct Params { TF_Tensor* tags; TF_Tensor* values; TF_Status* status; Params(TF_OpKernelContext* ctx) { status = TF_NewStatus(); TF_GetInput(ctx, 0, &tags, status); - if (TF_GetCode(status) == TF_OK){ + if (TF_GetCode(status) == TF_OK) { TF_GetInput(ctx, 1, &values, status); } }; - ~Params(){ + ~Params() { TF_DeleteStatus(status); TF_DeleteTensor(tags); TF_DeleteTensor(values); diff --git a/tensorflow/c/tf_shape_utils_test.cc b/tensorflow/c/tf_shape_utils_test.cc new file mode 100644 index 00000000000..49cf042c5c0 --- /dev/null +++ b/tensorflow/c/tf_shape_utils_test.cc @@ -0,0 +1,43 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/c/tensor_shape_utils.h" +#include "tensorflow/c/tf_tensor_internal.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +void TestShapeMatch(TensorShape shape) { + Tensor tensor(DT_FLOAT, shape); + Status status; + TF_Tensor* tf_tensor = TF_TensorFromTensor(tensor. &status); + ASSERT_EQ(status) + ASSERT_EQ(tensor.shape.DebugString(), TF_ShapeDebugString(tf_tensor)); +} + +TEST(ShapeDebugString, RegularShape) { + TestShapeMatch(TensorShape({5, 4, 7})); +} + +TEST(ShapeDebugString, ShapeWithUnknownDimension) { + TestShapeMatch(TensorShape({5, -1, 7})); +} + + +} // namespace tensorflow From bdf8b75c140233b38e22199820f7023c4af8d5e4 Mon Sep 17 00:00:00 2001 From: Russell Power Date: Tue, 14 Jul 2020 14:35:39 -0700 Subject: [PATCH 0422/2522] TPU rewrite pass refactoring. PiperOrigin-RevId: 321237265 Change-Id: I33c4966fc038816be9d9e9efd936e68a8400cc1a --- tensorflow/compiler/tf2xla/BUILD | 1 + tensorflow/core/tpu/BUILD | 11 + tensorflow/core/tpu/graph_rewrite/BUILD | 54 + .../encapsulate_tpu_computations_pass.cc | 2979 +++++++++++++++++ .../encapsulate_tpu_computations_pass.h | 73 + .../encapsulate_tpu_computations_pass_test.cc | 810 +++++ .../tpu_rewrite_pass_registration.cc | 7 +- tensorflow/core/tpu/tpu_compile_interface.cc | 38 + tensorflow/core/tpu/tpu_compile_interface.h | 33 + tensorflow/core/tpu/tpu_defs.cc | 3 + tensorflow/core/tpu/tpu_defs.h | 3 + 11 files changed, 4010 insertions(+), 2 deletions(-) create mode 100644 tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.cc create mode 100644 tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.h create mode 100644 tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass_test.cc create mode 100644 tensorflow/core/tpu/tpu_compile_interface.cc create mode 100644 tensorflow/core/tpu/tpu_compile_interface.h diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index cac72925dfd..663e34c2b8e 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -27,6 +27,7 @@ package_group( "//tensorflow/compiler/mlir/...", "//tensorflow/compiler/tests/...", "//tensorflow/compiler/tf2xla/...", + "//tensorflow/core/tpu/...", "//tensorflow/python/compiler/...", ], ) diff --git a/tensorflow/core/tpu/BUILD b/tensorflow/core/tpu/BUILD index 8181f682d70..7639cacc378 100644 --- a/tensorflow/core/tpu/BUILD +++ b/tensorflow/core/tpu/BUILD @@ -73,6 +73,17 @@ cc_library( ], ) +cc_library( + name = "tpu_compile_interface", + srcs = ["tpu_compile_interface.cc"], + hdrs = ["tpu_compile_interface.h"], + deps = [ + "//tensorflow/core/platform:fingerprint", + "//tensorflow/core/platform:logging", + "@com_google_absl//absl/strings", + ], +) + cc_library( name = "tpu_defs", srcs = ["tpu_defs.cc"], diff --git a/tensorflow/core/tpu/graph_rewrite/BUILD b/tensorflow/core/tpu/graph_rewrite/BUILD index dcf6af69fce..10218b0a2fb 100644 --- a/tensorflow/core/tpu/graph_rewrite/BUILD +++ b/tensorflow/core/tpu/graph_rewrite/BUILD @@ -18,6 +18,7 @@ cc_library( srcs = ["tpu_rewrite_pass_registration.cc"], deps = [ ":distributed_tpu_configuration_rewrite_pass", + ":encapsulate_tpu_computations_pass", ":variable_merger_pass", "//tensorflow/core:core_cpu", ], @@ -93,3 +94,56 @@ tf_cc_test( "//tensorflow/core:test_main", ], ) + +cc_library( + name = "encapsulate_tpu_computations_pass", + srcs = [ + "encapsulate_tpu_computations_pass.cc", + ], + hdrs = [ + "encapsulate_tpu_computations_pass.h", + ], + deps = [ + "//tensorflow/compiler/jit:compilation_passes", + "//tensorflow/compiler/jit:encapsulate_util", + "//tensorflow/compiler/tf2xla:side_effect_util", + "//tensorflow/compiler/tf2xla:tf2xla_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/core:core_cpu_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:session_options", + "//tensorflow/core/tpu:tpu_compile_interface", + "//tensorflow/core/tpu:tpu_defs", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/container:node_hash_map", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + ], +) + +tf_cc_test( + name = "encapsulate_tpu_computations_pass_test", + srcs = ["encapsulate_tpu_computations_pass_test.cc"], + deps = [ + ":encapsulate_tpu_computations_pass", + "//tensorflow/cc:cc_ops", + "//tensorflow/cc:function_ops", + "//tensorflow/cc:resource_variable_ops", + "//tensorflow/cc:tpu_ops", + "//tensorflow/compiler/jit:compilation_passes", + "//tensorflow/compiler/tf2xla:test_util", + "//tensorflow/core:core_cpu", + "//tensorflow/core:core_cpu_impl", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib_internal", + "//tensorflow/core:ops", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core/tpu:tpu_defs", + ], +) diff --git a/tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.cc b/tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.cc new file mode 100644 index 00000000000..40f9353beb4 --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.cc @@ -0,0 +1,2979 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.h" + +#include + +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "absl/container/node_hash_map.h" +#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h" +#include "tensorflow/compiler/jit/encapsulate_util.h" +#include "tensorflow/compiler/jit/extract_outside_compilation_pass.h" +#include "tensorflow/compiler/tf2xla/side_effect_util.h" +#include "tensorflow/compiler/tf2xla/tf2xla_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/graph_to_functiondef.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/gtl/cleanup.h" +#include "tensorflow/core/lib/gtl/flatset.h" +#include "tensorflow/core/lib/hash/hash.h" +#include "tensorflow/core/lib/strings/proto_serialization.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/public/version.h" +#include "tensorflow/core/tpu/tpu_compile_interface.h" +#include "tensorflow/core/tpu/tpu_defs.h" +#include "tensorflow/core/util/dump_graph.h" + +namespace tensorflow { + +namespace { + +const char* const kTPUReplicatedInput = "TPUReplicatedInput"; +const char* const kTPUReplicatedOutput = "TPUReplicatedOutput"; +const char* const kPivotForClusterAttr = "_pivot_for_cluster"; + +// Finds the `index` of an _Arg or _Retval node. +Status GetIndexAttr(const Node& n, int num_args, int* index) { + TF_RETURN_IF_ERROR(GetNodeAttr(n.attrs(), "index", index)); + if (*index < 0 || *index >= num_args) { + return errors::InvalidArgument("Invalid ", n.type_string(), " number ", + *index); + } + return Status::OK(); +} + +// Rewrite function to be passed to EncapsulateSubgraphsInFunctions that sorts +// the arguments into the order expected by TPUReplicate computations: +// 1) replicated arguments +// 2) non-replicated (broadcast) arguments +// 3) resource variable arguments +// See the documentation of EncapsulateSubgraphsInFunctions for the meaning +// of the arguments. +Status RewriteSubgraph(const std::vector& arg_source_tensors, + std::unique_ptr* graph_ptr, + std::vector* input_permutation, + std::vector* output_permutation, + NodeDef* call_def) { + // Replicated inputs have TPUReplicatedInput nodes as predecessors in the + // input graph. + auto is_replicated_input = [&](const Node& n, bool* is_packed = nullptr) { + CHECK_EQ("_Arg", n.type_string()); + int index; + TF_CHECK_OK(GetIndexAttr(n, arg_source_tensors.size(), &index)); + bool ret = + arg_source_tensors.at(index).node->type_string() == kTPUReplicatedInput; + if (is_packed) { + if (!ret || !GetNodeAttr(arg_source_tensors.at(index).node->attrs(), + "is_packed", is_packed) + .ok()) { + *is_packed = false; + } + } + return ret; + }; + + auto get_replicated_input_index = [&](const Node& n) { + CHECK_EQ("_Arg", n.type_string()); + int index; + TF_CHECK_OK(GetIndexAttr(n, arg_source_tensors.size(), &index)); + if (arg_source_tensors.at(index).node->type_string() != + kTPUReplicatedInput) { + return -1; + } + int replicated_index; + TF_CHECK_OK(GetNodeAttr(arg_source_tensors.at(index).node->attrs(), "index", + &replicated_index)); + + return replicated_index; + }; + + auto is_guaranteed_constant = [&](const Node& n) { + bool guaranteed_constant = false; + if (!GetNodeAttr(n.attrs(), "_is_guaranteed_constant", &guaranteed_constant) + .ok()) { + return false; + } + // Replicated input nodes can be marked as guaranteed constants if they are + // const. + return guaranteed_constant && !is_replicated_input(n); + }; + + Graph* graph = graph_ptr->get(); + Node* metadata_node = nullptr; + const int num_args = input_permutation->size(); + const int num_retvals = output_permutation->size(); + + std::vector args; + std::vector retvals; + args.reserve(num_args); + retvals.reserve(num_retvals); + for (Node* n : graph->nodes()) { + if (n->type_string() == "_Arg") { + args.push_back(n); + } else if (n->type_string() == "_Retval") { + retvals.push_back(n); + } else if (n->type_string() == "TPUReplicateMetadata") { + metadata_node = n; + } else if (!str_util::StrContains(n->requested_device(), + DEVICE_TPU_REPLICATED_CORE)) { + // If an operator isn't assigned to a TPU core device, assign it to + // TPU_REPLICATED_CORE without a specific core ID. For some operators, + // such as variable reads/writes, the operator may be assigned to non-TPU + // devices due to colocation. + n->set_assigned_device_name( + strings::StrCat("/device:", DEVICE_TPU_REPLICATED_CORE)); + } + } + + // Read the metadata node and remove it from the graph. + if (metadata_node == nullptr) { + return errors::InvalidArgument("Missing TPUReplicateMetadata node"); + } + + for (const auto& attr : metadata_node->attrs()) { + if (attr.first == "computation_shape") { + // Convert the deprecated computation_shape attribute into a + // num_cores_per_replica value. If a computation_shape is present, it + // overrides num_cores_per_replica. + std::vector shape; + TF_RETURN_IF_ERROR( + GetNodeAttr(metadata_node->attrs(), "computation_shape", &shape)); + if (!shape.empty()) { + int64 num_cores_per_replica = 1LL; + for (int dim : shape) { + num_cores_per_replica *= dim; + } + call_def->mutable_attr()->erase("num_cores_per_replica"); + AddNodeAttr("num_cores_per_replica", num_cores_per_replica, call_def); + } + } else { + call_def->mutable_attr()->insert(attr); + } + } + MergeDebugInfo(NodeDebugInfo(metadata_node->def()), call_def); + graph->RemoveNode(metadata_node); + + if (std::find(args.begin(), args.end(), nullptr) != args.end()) { + return errors::InvalidArgument("Missing or non-consecutive arguments"); + } + + // Reorders the arguments. + std::sort(args.begin(), args.end(), [&](Node* a, Node* b) { + // Non-constants appear before constants + bool a_is_guaranteed_constant = is_guaranteed_constant(*a); + bool b_is_guaranteed_constant = is_guaranteed_constant(*b); + // Non-packed values appear before packed values. + bool a_is_packed; + bool b_is_packed; + // Replicated values appear before non-replicated values. + bool a_not_replicated = !is_replicated_input(*a, &a_is_packed); + bool b_not_replicated = !is_replicated_input(*b, &b_is_packed); + int a_replicated_index = get_replicated_input_index(*a); + int b_replicated_index = get_replicated_input_index(*b); + // Non-resources appear before resources + bool a_is_resource = (a->output_type(0) == DT_RESOURCE); + bool b_is_resource = (b->output_type(0) == DT_RESOURCE); + // Uses the name as a tiebreaker so the output is deterministic. + StringPiece a_name(a->name()); + StringPiece b_name(b->name()); + return std::tie(a_is_guaranteed_constant, a_not_replicated, a_is_packed, + a_is_resource, a_replicated_index, a_name) < + std::tie(b_is_guaranteed_constant, b_not_replicated, b_is_packed, + b_is_resource, b_replicated_index, b_name); + }); + // Sorts the retvals by name so the order is deterministic. + std::sort(retvals.begin(), retvals.end(), + [](Node* a, Node* b) { return a->name() < b->name(); }); + + // Computes the permutation to produce the correct argument order, and update + // the argument indices. + int variable_start_index = num_args; + int guaranteed_const_start_index = num_args; + for (int i = 0; i < num_args; ++i) { + int index; + TF_RETURN_IF_ERROR(GetIndexAttr(*args[i], num_args, &index)); + if (args[i]->output_type(0) == DT_RESOURCE && + !is_replicated_input(*args[i]) && variable_start_index == num_args) { + variable_start_index = i; + } else if (is_guaranteed_constant(*args[i]) && + guaranteed_const_start_index == num_args) { + guaranteed_const_start_index = i; + } + (*input_permutation)[index] = i; + args[i]->AddAttr("index", i); + } + VLOG(4) << "variable_start_index: " << variable_start_index + << " guaranteed_const_start_index: " << guaranteed_const_start_index; + + // Computes the permutation to produce the correct retval order, and update + // the argument indices. + for (int i = 0; i < num_retvals; ++i) { + int index; + TF_RETURN_IF_ERROR(GetIndexAttr(*retvals[i], num_retvals, &index)); + (*output_permutation)[index] = i; + retvals[i]->AddAttr("index", i); + } + + AddNodeAttr(kTPUReplicateAttr, call_def->name(), call_def); + AddNodeAttr("_variable_start_index", variable_start_index, call_def); + AddNodeAttr("_guaranteed_const_start_index", guaranteed_const_start_index, + call_def); + + // Uniquify the function name. + GraphDef gdef; + graph->ToGraphDef(&gdef); + + // Before serialization, sort each node's control inputs to achieve + // determinism. Sorting control inputs could help (but not necessarily) + // create a deterministic serialization and fingerprint. Other sources of + // nondeterminism include unstable node ordering. + SortControlInputs(&gdef); + // Fingerprint the function. + // Nondeterminism in serialization would not lead to incorrect results, but + // may cause spurious cache misses. DeterministicSerialization is a + // best-effort deterministic serialization. + string serialized; + TF_RET_CHECK(SerializeToStringDeterministic(gdef, &serialized)); + uint64 fingerprint = + TpuCompileInterface::Get()->FingerprintString(serialized); + LOG(INFO) << "Subgraph fingerprint:" << fingerprint; + call_def->set_op(strings::StrCat(call_def->op(), "_", fingerprint)); + return Status::OK(); +} + +DataType EdgeType(const Edge* edge) { + return edge->dst()->input_type(edge->dst_input()); +} + +// Adds the control inputs of `node` to `*deps`. +void AddControlInputs(const Node& node, gtl::FlatSet* deps) { + for (const Edge* edge : node.in_edges()) { + if (edge->IsControlEdge()) { + deps->insert(edge->src()); + } + } +} + +// Adds the control outputs of `node` to `*deps`. +void AddControlOutputs(const Node& node, gtl::FlatSet* deps) { + for (const Edge* edge : node.out_edges()) { + if (edge->IsControlEdge()) { + deps->insert(edge->dst()); + } + } +} + +// We add Identity nodes for _Arg/_Retval in XLA computation. Remove those +// Identity nodes to simplify furthur processing. +Status RemoveIdentityNodesForArgRetval(Graph* g) { + // Collect Identity nodes for _Arg/_Retval. + std::vector identity_nodes; + for (Node* n : g->nodes()) { + if (n->type_string() == "Identity" && + (HasNodeAttr(n->def(), "_tpu_input_identity") || + HasNodeAttr(n->def(), "_tpu_output_identity"))) { + identity_nodes.push_back(n); + } + } + + // Remove those Identity nodes. + for (Node* n : identity_nodes) { + const Edge* input_edge; + TF_RETURN_IF_ERROR(n->input_edge(0, &input_edge)); + + std::vector output_edges; + for (const Edge* e : n->out_edges()) { + output_edges.push_back(e); + } + for (const Edge* e : output_edges) { + if (e->IsControlEdge()) { + Node* dst = e->dst(); + g->RemoveEdge(e); + g->AddControlEdge(input_edge->src(), dst); + } else { + Node* dst = e->dst(); + int dst_input = e->dst_input(); + g->RemoveEdge(e); + g->AddEdge(input_edge->src(), input_edge->src_output(), dst, dst_input); + } + } + g->RemoveNode(n); + } + + return Status::OK(); +} + +// Move outside compilation nodes at the beginning of XLA computation to host. +// For XLA computation graph, we will add new _Arg nodes to replace those +// outside compilation nodes. +// For host graph, we will move those outside compilation nodes to host, +// replicate them, and use them as XLA node's input. +Status MoveHeadOutsideCompilationToHost( + const string& outside_compilation_attr_name, const string& xla_func_name, + const std::string& cluster_name, Graph* g, Graph* xla_graph, Node* xla_node, + Node* pivot_node) { + // Find outside compilation nodes that only have _Arg or other outside + // compilation nodes as input. These nodes will be moved to host graph. + std::vector oc_nodes_at_head; + const string kOnlyArgOrOcInputAttrName = "_xla_only_arg_or_oc_input"; + ReverseDFS( + *xla_graph, /*enter=*/nullptr, + [&](Node* n) { + bool has_non_arg_or_oc_input = false; + for (const Edge* e : n->in_edges()) { + if (e->src() == xla_graph->source_node()) { + continue; + } + if (!e->src()->IsArg() && + (!HasNodeAttr(e->src()->def(), outside_compilation_attr_name) || + !HasNodeAttr(e->src()->def(), kOnlyArgOrOcInputAttrName))) { + has_non_arg_or_oc_input = true; + break; + } + } + if (HasNodeAttr(n->def(), outside_compilation_attr_name) && + !has_non_arg_or_oc_input && + !HasNodeAttr(n->def(), kXlaIsPlaceholderForArg)) { + n->AddAttr(kOnlyArgOrOcInputAttrName, true); + oc_nodes_at_head.push_back(n); + } + }, + NodeComparatorName()); + std::vector const_nodes_to_remove; + for (Node* n : oc_nodes_at_head) { + // If a Const node is in "oc_nodes_at_head" but some of its successors are + // not, copy this Const node and use the copied node for those successors. + if (n->type_string() != "Const") { + continue; + } + + std::vector edges_to_replace; + for (const Edge* e : n->out_edges()) { + if (!e->IsControlEdge() && + HasNodeAttr(e->dst()->def(), outside_compilation_attr_name) && + !HasNodeAttr(e->dst()->def(), kOnlyArgOrOcInputAttrName)) { + edges_to_replace.push_back(e); + } + } + if (edges_to_replace.empty()) { + continue; + } + + Node* const_copy = xla_graph->CopyNode(n); + for (const Edge* e : edges_to_replace) { + Node* dst = e->dst(); + int dst_input = e->dst_input(); + xla_graph->RemoveEdge(e); + xla_graph->AddEdge(const_copy, 0, dst, dst_input); + } + // Make sure the copied node can be traced from source node. + xla_graph->AddControlEdge(xla_graph->source_node(), const_copy); + + // If this Const node has no data output any more, remove it later. + bool has_output_edge = false; + for (const Edge* e : n->out_edges()) { + if (!e->IsControlEdge()) { + has_output_edge = true; + break; + } + } + if (!has_output_edge) { + const_nodes_to_remove.push_back(n); + } + } + for (Node* n : const_nodes_to_remove) { + xla_graph->RemoveNode(n); + oc_nodes_at_head.erase( + std::remove(oc_nodes_at_head.begin(), oc_nodes_at_head.end(), n), + oc_nodes_at_head.end()); + } + if (VLOG_IS_ON(5)) { + for (Node* n : oc_nodes_at_head) { + VLOG(5) << "oc_nodes_at_head: " << n->DebugString(); + } + } + + // Copy all nodes in `oc_nodes_at_head` to host graph, and also replicate + // them. + + // Sometimes `xla_node` can have a lot of inputs, calling Node::input_edge + // will become very expensive in this case because it is doing a linear + // search inside. Create an input_edges vector ahead to make the lookups + // faster. + std::vector input_edges; + TF_RETURN_IF_ERROR(xla_node->input_edges(&input_edges)); + + std::vector input_types; + TF_RETURN_IF_ERROR(GetNodeAttr(xla_node->attrs(), "Tinputs", &input_types)); + int num_distributed_vars; + TF_RETURN_IF_ERROR(GetNodeAttr(xla_node->attrs(), "num_distributed_variables", + &num_distributed_vars)); + int num_replicas; + TF_RETURN_IF_ERROR( + GetNodeAttr(xla_node->attrs(), "num_replicas", &num_replicas)); + int old_num_per_replica_inputs = + (input_types.size() - num_distributed_vars) / num_replicas; + VLOG(5) << "old_num_per_replica_inputs: " << old_num_per_replica_inputs; + std::map> node_images; + for (Node* n : oc_nodes_at_head) { + for (int replica_id = 0; replica_id < num_replicas; replica_id++) { + NodeDef copy_def = n->def(); + copy_def.set_name(absl::StrCat(n->name(), "_head_oc/R", replica_id)); + copy_def.clear_device(); + + Status s; + Node* copy_node = g->AddNode(copy_def, &s); + TF_RETURN_IF_ERROR(s); + + copy_node->AddAttr(kXlaReplicaIdAttrName, replica_id); + copy_node->AddAttr(kTPUReplicateAttr, cluster_name); + + for (const Edge* e : n->in_edges()) { + if (e->src() == xla_graph->source_node()) { + continue; + } + // Either e->src() is _Arg node, or it's in `node_images`. + if (e->src()->IsArg()) { + int index; + TF_RETURN_IF_ERROR(GetNodeAttr(e->src()->attrs(), "index", &index)); + const int new_index = + (index < old_num_per_replica_inputs) + ? (old_num_per_replica_inputs * replica_id + index) + : (old_num_per_replica_inputs * num_replicas + + (index - old_num_per_replica_inputs)); + const Edge* original_edge = input_edges.at(new_index); + g->AddEdge(original_edge->src(), original_edge->src_output(), + copy_node, e->dst_input()); + } else { + g->AddEdge(node_images[e->src()][replica_id], e->src_output(), + copy_node, e->dst_input()); + } + } + + // Add control edge between `copy_node` and `xla_node`, so these outside + // compilation nodes will be executed before XLA computation happens. + g->AddControlEdge(copy_node, xla_node); + + // Add control edge between `pivot_node` and `copy_node`, so `copy_node` + // belongs to same while loop as `xla_node`. + if (pivot_node) { + g->AddControlEdge(pivot_node, copy_node); + } + + node_images[n].push_back(copy_node); + } + } + + // Record output edges from `oc_nodes_at_head`. We will create an _Arg node + // for each of these edges. An obvious optimization here is to deduplicate + // these edges by . But that optimization will complicate + // the code, and in practice we usually do not have output edges with the + // same . + std::vector oc_output_edges; + std::vector new_arg_types; + for (Node* n : oc_nodes_at_head) { + for (const Edge* e : n->out_edges()) { + if (!e->IsControlEdge() && + node_images.find(e->dst()) == node_images.end()) { + VLOG(5) << "oc_output_edges: " << e->DebugString(); + oc_output_edges.push_back(e); + new_arg_types.push_back(e->src()->output_type(e->src_output())); + } + } + } + int new_num_per_replica_inputs = + old_num_per_replica_inputs + oc_output_edges.size(); + VLOG(5) << "new_num_per_replica_inputs: " << new_num_per_replica_inputs; + + // Process input edges for XLA node. + int num_variables; + TF_RETURN_IF_ERROR( + GetNodeAttr(xla_node->attrs(), "NumVariables", &num_variables)); + std::vector broadcast_input_types, guaranteed_constant_types; + TF_RETURN_IF_ERROR(GetNodeAttr(xla_node->attrs(), "Tbroadcast_inputs", + &broadcast_input_types)); + TF_RETURN_IF_ERROR(GetNodeAttr(xla_node->attrs(), "Tguaranteed_constants", + &guaranteed_constant_types)); + int num_other_inputs = num_distributed_vars + num_variables + + broadcast_input_types.size() + + guaranteed_constant_types.size(); + VLOG(5) << "num_other_inputs: " << num_other_inputs; + + // Update `Tinputs` attribute for `xla_node`. + std::vector new_input_types; + // Order of new_input_types: old per-replica inputs -> new per-replica inputs + // -> distributed variables + new_input_types.reserve(num_replicas * new_num_per_replica_inputs + + num_distributed_vars); + for (int replica_id = 0; replica_id < num_replicas; ++replica_id) { + for (int i = 0; i < old_num_per_replica_inputs; ++i) { + new_input_types.push_back(input_types[i]); + } + for (int i = old_num_per_replica_inputs; i < new_num_per_replica_inputs; + ++i) { + new_input_types.push_back(new_arg_types[i - old_num_per_replica_inputs]); + } + } + const int num_new_per_replica_input_types = new_input_types.size(); + for (int i = input_types.size() - num_distributed_vars; + i < input_types.size(); i++) { + new_input_types.push_back(input_types[i]); + } + xla_node->ClearAttr("Tinputs"); + xla_node->AddAttr("Tinputs", new_input_types); + + int new_variable_start_index = + num_new_per_replica_input_types / num_replicas + num_distributed_vars + + broadcast_input_types.size(); + if (xla_node->attrs().Find("_variable_start_index") != nullptr) { + xla_node->ClearAttr("_variable_start_index"); + xla_node->AddAttr("_variable_start_index", new_variable_start_index); + } + int new_guaranteed_const_start_index = + new_variable_start_index + num_variables; + if (xla_node->attrs().Find("_guaranteed_const_start_index") != nullptr) { + xla_node->ClearAttr("_guaranteed_const_start_index"); + xla_node->AddAttr("_guaranteed_const_start_index", + new_guaranteed_const_start_index); + } + + // Move non per-replica input edges. + std::vector new_input_edges( + num_replicas * new_num_per_replica_inputs + num_other_inputs); + int end_input_index = + num_replicas * new_num_per_replica_inputs + num_other_inputs - 1; + int start_input_index = end_input_index + 1 - num_other_inputs; + for (int input_index = end_input_index; input_index >= start_input_index; + input_index--) { + const Edge* e = + input_edges.at(input_index - num_replicas * new_arg_types.size()); + Node* src = e->src(); + int src_output = e->src_output(); + g->RemoveEdge(e); + const Edge* new_input_edge = + g->AddEdge(src, src_output, xla_node, input_index); + new_input_edges[input_index] = new_input_edge; + } + + // Re-order old per-replica inputs edges, and add new per-replica input edges. + std::vector> per_replica_inputs; + std::vector old_per_replica_edges; + for (int i = 0; i < old_num_per_replica_inputs * num_replicas; i++) { + const Edge* e = input_edges.at(i); + per_replica_inputs.push_back(std::make_pair(e->src(), e->src_output())); + old_per_replica_edges.push_back(e); + } + for (const Edge* e : old_per_replica_edges) { + g->RemoveEdge(e); + } + for (int replica_id = 0; replica_id < num_replicas; replica_id++) { + for (int input_index = 0; input_index < old_num_per_replica_inputs; + input_index++) { + Node* src = per_replica_inputs[replica_id * old_num_per_replica_inputs + + input_index] + .first; + int src_output = + per_replica_inputs[replica_id * old_num_per_replica_inputs + + input_index] + .second; + const Edge* new_input_edge = + g->AddEdge(src, src_output, xla_node, + replica_id * new_num_per_replica_inputs + input_index); + new_input_edges[input_index] = new_input_edge; + } + for (int input_index = old_num_per_replica_inputs; + input_index < new_num_per_replica_inputs; input_index++) { + Node* original_src = + oc_output_edges[input_index - old_num_per_replica_inputs]->src(); + int original_src_output = + oc_output_edges[input_index - old_num_per_replica_inputs] + ->src_output(); + Node* src = node_images[original_src][replica_id]; + const Edge* new_input_edge = + g->AddEdge(src, original_src_output, xla_node, + replica_id * new_num_per_replica_inputs + input_index); + new_input_edges[input_index] = new_input_edge; + } + } + + // Adjust original _Arg nodes in `xla_graph`. + for (Node* n : xla_graph->nodes()) { + if (n->IsArg()) { + int index; + TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "index", &index)); + if (index >= old_num_per_replica_inputs) { + index += new_arg_types.size(); + n->ClearAttr("index"); + n->AddAttr("index", index); + } + } + } + + // Create new _Arg nodes in `xla_graph`. + for (int i = old_num_per_replica_inputs; i < new_num_per_replica_inputs; + i++) { + NodeDefBuilder arg_builder(absl::StrCat("arg_", i), + FunctionLibraryDefinition::kArgOp); + arg_builder.Attr("T", new_arg_types[i - old_num_per_replica_inputs]); + arg_builder.Attr("index", i); + NodeDef arg_def; + TF_RETURN_IF_ERROR(arg_builder.Finalize(&arg_def)); + Status s; + Node* arg_node = xla_graph->AddNode(arg_def, &s); + TF_RETURN_IF_ERROR(s); + const Edge* original_edge = oc_output_edges[i - old_num_per_replica_inputs]; + Node* dst = original_edge->dst(); + int dst_input = original_edge->dst_input(); + xla_graph->RemoveEdge(original_edge); + xla_graph->AddEdge(arg_node, 0, dst, dst_input); + } + + // For lifted arg nodes: + // 1. Add a Placeholder node in `xla_graph`. When we build host side graph + // in ExtractOutsideCompilationPass, we will use this new Placeholder node + // instead of lifted arg node here. + // 2. Add an IdentityN node in `g` to indicate its inputs. We will reconnect + // this IdentityN node and this lifted arg node's usage nodes in + // DistributedTPURewritePass. + for (Node* n : oc_nodes_at_head) { + bool is_lifted_arg; + string outside_compilation_attr; + if (!TryGetNodeAttr(n->def(), kXlaIsLiftedArgAttrName, &is_lifted_arg) || + !TryGetNodeAttr(n->def(), kOutsideCompilationAttr, + &outside_compilation_attr)) { + continue; + } + + TF_RET_CHECK(n->IsIdentity()); + NodeDefBuilder ph_builder(absl::StrCat("placeholder_", n->name()), + "Placeholder"); + DataType dtype; + TF_RETURN_IF_ERROR(GetNodeAttr(n->def(), "T", &dtype)); + ph_builder.Attr("dtype", dtype); + ph_builder.Attr(kXlaIsLiftedArgAttrName, true); + ph_builder.Attr(kOutsideCompilationAttr, outside_compilation_attr); + NodeDef ph_def; + TF_RETURN_IF_ERROR(ph_builder.Finalize(&ph_def)); + Status s; + xla_graph->AddNode(ph_def, &s); + TF_RETURN_IF_ERROR(s); + + Node* input_node; + TF_RETURN_IF_ERROR(n->input_node(0, &input_node)); + TF_RET_CHECK(input_node->type_string() == "_Arg"); + int index; + TF_RETURN_IF_ERROR(GetNodeAttr(input_node->def(), "index", &index)); + // TODO(b/74023706): for now we only support resource input (e.g. summary + // writer), which is non-replicated input. Support replicated input as + // well. + TF_RET_CHECK(index >= new_num_per_replica_inputs + num_distributed_vars); + const Edge* input_edge = + new_input_edges.at(num_replicas * new_num_per_replica_inputs + index - + new_num_per_replica_inputs); + NodeDefBuilder id_builder(absl::StrCat("lifted_arg_input_", index), + "IdentityN"); + DataType input_dtype = + input_edge->src()->output_type(input_edge->src_output()); + id_builder.Attr("T", std::vector(num_replicas, input_dtype)); + std::vector inputs( + num_replicas, + NodeDefBuilder::NodeOut{input_edge->src()->name(), + input_edge->src_output(), input_dtype}); + id_builder.Attr(kXlaOutsideCompilationInputsAttrName, + outside_compilation_attr); + id_builder.Input(inputs); + NodeDef id_def; + TF_RETURN_IF_ERROR(id_builder.Finalize(&id_def)); + Node* id_node = g->AddNode(id_def, &s); + TF_RETURN_IF_ERROR(s); + for (int i = 0; i < num_replicas; i++) { + g->AddEdge(input_edge->src(), input_edge->src_output(), id_node, i); + } + } + + // Remove `oc_nodes_at_head`. + for (Node* n : oc_nodes_at_head) { + xla_graph->RemoveNode(n); + } + + VLOG(4) << "MoveHeadOutsideCompilationToHost host graph: " + << DumpGraphToFile(absl::StrCat("move_head_oc_host_", xla_func_name), + *g); + VLOG(4) << "MoveHeadOutsideCompilationToHost XLA graph: " + << DumpGraphToFile(absl::StrCat("move_head_oc_xla_", xla_func_name), + *xla_graph); + + return Status::OK(); +} + +// If there are any unused _Arg nodes in `xla_graph`, remove them from +// `xla_graph` and remove corresponding input edge in host graph `g`. +Status RemoveUnusedXlaInput(const string& xla_func_name, Graph* g, + Graph* xla_graph, Node* xla_node) { + // Find unused _Arg nodes, and remove them. + std::vector input_types; + TF_RETURN_IF_ERROR(GetNodeAttr(xla_node->def(), "Tinputs", &input_types)); + std::vector mirrored_variable_indices; + if (xla_node->attrs().Find(TPUREPLICATE_MIRRORED_VAR_INDICES_ATTR) != + nullptr) { + TF_RETURN_IF_ERROR(GetNodeAttr(xla_node->def(), + TPUREPLICATE_MIRRORED_VAR_INDICES_ATTR, + &mirrored_variable_indices)); + } + std::vector broadcast_input_types; + TF_RETURN_IF_ERROR(GetNodeAttr(xla_node->def(), "Tbroadcast_inputs", + &broadcast_input_types)); + std::vector guaranteed_constant_types; + TF_RETURN_IF_ERROR(GetNodeAttr(xla_node->def(), "Tguaranteed_constants", + &guaranteed_constant_types)); + int num_variables; + TF_RETURN_IF_ERROR( + GetNodeAttr(xla_node->def(), "NumVariables", &num_variables)); + int num_replicas; + TF_RETURN_IF_ERROR( + GetNodeAttr(xla_node->def(), "num_replicas", &num_replicas)); + int num_distributed_vars; + TF_RETURN_IF_ERROR(GetNodeAttr(xla_node->attrs(), "num_distributed_variables", + &num_distributed_vars)); + int num_per_replica_inputs = + (input_types.size() - num_distributed_vars) / num_replicas; + std::set arg_indices_to_remove; + std::vector arg_nodes_to_update, nodes_to_remove; + int num_args = 0, num_removed_per_replica_inputs = 0, + num_removed_distributed_vars = 0; + for (Node* n : xla_graph->nodes()) { + if (!n->IsArg()) { + continue; + } + + bool has_output = false; + for (const Edge* e : n->out_edges()) { + if (e->dst() != xla_graph->sink_node()) { + has_output = true; + break; + } + } + + num_args++; + int index; + TF_RETURN_IF_ERROR(GetNodeAttr(n->def(), "index", &index)); + if (has_output) { + arg_nodes_to_update.push_back(n); + continue; + } + + arg_indices_to_remove.insert(index); + if (index < num_per_replica_inputs) { + num_removed_per_replica_inputs++; + } else if (index < num_per_replica_inputs + num_distributed_vars) { + num_removed_distributed_vars++; + } + nodes_to_remove.push_back(n); + } + for (Node* n : nodes_to_remove) { + xla_graph->RemoveNode(n); + } + + // Update `index` for other _Arg nodes. + std::map arg_index_mapping; + int new_arg_index = 0; + for (int i = 0; i < num_args; i++) { + if (arg_indices_to_remove.find(i) != arg_indices_to_remove.end()) { + continue; + } else { + arg_index_mapping[i] = new_arg_index; + new_arg_index++; + } + } + for (Node* n : arg_nodes_to_update) { + int index; + TF_RETURN_IF_ERROR(GetNodeAttr(n->def(), "index", &index)); + n->ClearAttr("index"); + n->AddAttr("index", arg_index_mapping[index]); + } + + // Re-order replicated index edges for `xla_node`. + + // Sometimes `xla_node` can have a lot of inputs, calling Node::input_edge + // will become very expensive in this case because it is doing a linear search + // inside. Create a input_edges vector ahead to make the lookups faster. + std::vector input_edges; + TF_RETURN_IF_ERROR(xla_node->input_edges(&input_edges)); + + const int num_new_per_replica_inputs = + num_per_replica_inputs - num_removed_per_replica_inputs; + for (int i = 0; i < num_replicas; i++) { + for (int j = 0; j < num_per_replica_inputs; j++) { + auto iter = arg_index_mapping.find(j); + if (iter != arg_index_mapping.end()) { + const Edge* e = input_edges.at(i * num_per_replica_inputs + j); + Node* src = e->src(); + int src_output = e->src_output(); + int dst_input = i * num_new_per_replica_inputs + iter->second; + + g->RemoveEdge(e); + g->AddEdge(src, src_output, xla_node, dst_input); + } else { + const Edge* e = input_edges.at(i * num_per_replica_inputs + j); + g->RemoveEdge(e); + } + } + } + + // Move other data input edges. + for (int i = num_replicas * num_per_replica_inputs; + i < xla_node->num_inputs(); i++) { + int arg_index = + num_per_replica_inputs + i - num_replicas * num_per_replica_inputs; + auto iter = arg_index_mapping.find(arg_index); + if (iter != arg_index_mapping.end()) { + const Edge* e = input_edges.at(i); + Node* src = e->src(); + int src_output = e->src_output(); + int dst_input = num_replicas * num_new_per_replica_inputs + iter->second - + num_new_per_replica_inputs; + + g->RemoveEdge(e); + g->AddEdge(src, src_output, xla_node, dst_input); + } else { + const Edge* e = input_edges.at(i); + g->RemoveEdge(e); + } + } + + // Update attributes for `xla_node`. + std::vector new_input_types; + for (int i = 0; i < num_replicas; i++) { + for (int j = 0; j < num_per_replica_inputs; j++) { + auto iter = arg_index_mapping.find(j); + if (iter != arg_index_mapping.end()) { + new_input_types.push_back(input_types[iter->first]); + } + } + } + for (int i = 0; i < num_distributed_vars; ++i) { + auto iter = arg_index_mapping.find(i + num_per_replica_inputs); + if (iter != arg_index_mapping.end()) { + new_input_types.push_back( + input_types[iter->first - num_per_replica_inputs + + num_per_replica_inputs * num_replicas]); + } + } + xla_node->ClearAttr("Tinputs"); + xla_node->AddAttr("Tinputs", new_input_types); + + const int num_new_distributed_vars = + num_distributed_vars - num_removed_distributed_vars; + xla_node->ClearAttr("num_distributed_variables"); + xla_node->AddAttr("num_distributed_variables", num_new_distributed_vars); + + if (!mirrored_variable_indices.empty()) { + std::vector new_mirrored_variable_indices; + absl::flat_hash_set old_mirrored_variable_indices_set; + for (int index : mirrored_variable_indices) { + old_mirrored_variable_indices_set.insert(index); + } + for (int i = 0; i < num_per_replica_inputs + num_distributed_vars; i++) { + auto iter = arg_index_mapping.find(i); + if (iter != arg_index_mapping.end() && + old_mirrored_variable_indices_set.contains(iter->first)) { + new_mirrored_variable_indices.push_back(iter->second); + } + } + xla_node->ClearAttr(TPUREPLICATE_MIRRORED_VAR_INDICES_ATTR); + xla_node->AddAttr(TPUREPLICATE_MIRRORED_VAR_INDICES_ATTR, + new_mirrored_variable_indices); + } + + int num_replicated_inputs = num_per_replica_inputs + num_distributed_vars; + std::vector new_broadcast_input_types; + for (int i = 0; i < broadcast_input_types.size(); i++) { + int arg_index = num_replicated_inputs + i; + if (arg_index_mapping.find(arg_index) != arg_index_mapping.end()) { + new_broadcast_input_types.push_back(broadcast_input_types[i]); + } + } + xla_node->ClearAttr("Tbroadcast_inputs"); + xla_node->AddAttr("Tbroadcast_inputs", new_broadcast_input_types); + int new_num_variables = 0; + for (int i = 0; i < num_variables; i++) { + int arg_index = num_replicated_inputs + broadcast_input_types.size() + i; + if (arg_index_mapping.find(arg_index) != arg_index_mapping.end()) { + new_num_variables++; + } + } + xla_node->ClearAttr("NumVariables"); + xla_node->AddAttr("NumVariables", new_num_variables); + std::vector new_guaranteed_constant_types; + for (int i = 0; i < guaranteed_constant_types.size(); i++) { + int arg_index = num_replicated_inputs + broadcast_input_types.size() + + num_variables + i; + if (arg_index_mapping.find(arg_index) != arg_index_mapping.end()) { + new_guaranteed_constant_types.push_back(guaranteed_constant_types[i]); + } + } + xla_node->ClearAttr("Tguaranteed_constants"); + xla_node->AddAttr("Tguaranteed_constants", new_guaranteed_constant_types); + + int new_variable_start_index = num_new_per_replica_inputs + + num_new_distributed_vars + + new_broadcast_input_types.size(); + if (xla_node->attrs().Find("_variable_start_index") != nullptr) { + xla_node->ClearAttr("_variable_start_index"); + xla_node->AddAttr("_variable_start_index", new_variable_start_index); + } + int new_guaranteed_const_start_index = + new_variable_start_index + new_num_variables; + if (xla_node->attrs().Find("_guaranteed_const_start_index") != nullptr) { + xla_node->ClearAttr("_guaranteed_const_start_index"); + xla_node->AddAttr("_guaranteed_const_start_index", + new_guaranteed_const_start_index); + } + + VLOG(4) << "RemoveUnusedXlaInput host graph: " + << DumpGraphToFile( + absl::StrCat("remove_unused_input_host_", xla_func_name), *g); + VLOG(4) << "RemoveUnusedXlaInput XLA graph: " + << DumpGraphToFile( + absl::StrCat("remove_unused_input_xla_", xla_func_name), + *xla_graph); + + return Status::OK(); +} + +// Move outside compilation nodes at the end of XLA computation to host. +// For XLA computation graph, we will add new _Retval nodes to replace those +// outside compilation nodes. +// For host graph, we will move those outside compilation nodes to host, +// replicate them, and use them as XLA node's output. +Status MoveTailOutsideCompilationToHost( + const string& outside_compilation_attr_name, const string& xla_func_name, + const std::string& cluster_name, Graph* g, Graph* xla_graph, Node* xla_node, + Node* pivot_node) { + // Find outside compilation nodes that only have _Retval or other outside + // compilation nodes as output. These nodes will be moved to host graph. + std::vector oc_nodes_at_tail; + const string kOnlyRetOrOcOutputAttrName = "_xla_only_ret_or_oc_output"; + DFS( + *xla_graph, /*enter=*/nullptr, + [&](Node* n) { + bool has_non_ret_or_oc_output = false; + for (const Edge* e : n->out_edges()) { + if (e->dst() == xla_graph->sink_node()) { + continue; + } + if (!e->dst()->IsRetval() && + (!HasNodeAttr(e->dst()->def(), outside_compilation_attr_name) || + !HasNodeAttr(e->dst()->def(), kOnlyRetOrOcOutputAttrName))) { + has_non_ret_or_oc_output = true; + break; + } + } + if (HasNodeAttr(n->def(), outside_compilation_attr_name) && + !has_non_ret_or_oc_output) { + n->AddAttr(kOnlyRetOrOcOutputAttrName, true); + oc_nodes_at_tail.push_back(n); + } + }, + NodeComparatorName()); + if (VLOG_IS_ON(5)) { + for (Node* n : oc_nodes_at_tail) { + VLOG(5) << "oc_nodes_at_tail: " << n->DebugString(); + } + } + + // Record input edges from `oc_nodes_at_tail`. We will create an _Retval node + // for each of these edges. An obvious optimization here is to deduplicate + // these edges by . But that optimization will complicate + // the code, and in practice we usually do not have input edges with the + // same . + std::vector oc_input_edges; + std::vector new_ret_types; + for (Node* n : oc_nodes_at_tail) { + for (const Edge* e : n->in_edges()) { + if (!e->IsControlEdge() && + !HasNodeAttr(e->src()->def(), kOnlyRetOrOcOutputAttrName)) { + VLOG(5) << "oc_input_edges: " << e->DebugString(); + oc_input_edges.push_back(e); + new_ret_types.push_back(e->src()->output_type(e->src_output())); + } + } + } + std::vector output_types; + TF_RETURN_IF_ERROR( + GetNodeAttr(xla_node->attrs(), "output_types", &output_types)); + int num_replicas; + TF_RETURN_IF_ERROR( + GetNodeAttr(xla_node->attrs(), "num_replicas", &num_replicas)); + int old_num_replicated_outputs = output_types.size() / num_replicas; + int new_num_replicated_outputs = + old_num_replicated_outputs + oc_input_edges.size(); + VLOG(5) << "old_num_replicated_outputs: " << old_num_replicated_outputs; + VLOG(5) << "new_num_replicated_outputs: " << new_num_replicated_outputs; + + // Update `output_types` attribute for `xla_node`. + std::vector new_output_types; + for (int replica_id = 0; replica_id < num_replicas; replica_id++) { + for (int i = 0; i < old_num_replicated_outputs; i++) { + new_output_types.push_back(output_types[i]); + } + for (int i = old_num_replicated_outputs; i < new_num_replicated_outputs; + i++) { + new_output_types.push_back(new_ret_types[i - old_num_replicated_outputs]); + } + } + xla_node->ClearAttr("output_types"); + xla_node->AddAttr("output_types", new_output_types); + + // Re-order old replicated output edges. Since a node could potentially + // connect to multiple nodes, build a vector> mapping of + // output index to input nodes/index. + // The outer vector represents the output index, the inner vector + // represents the destination node and input index pair with the possibility + // of multiple node/index pairs. + std::vector>> replicated_outputs( + old_num_replicated_outputs * num_replicas); + std::vector old_replicated_edges; + for (const Edge* e : xla_node->out_edges()) { + if (e->src_output() >= 0 && + e->src_output() < old_num_replicated_outputs * num_replicas) { + replicated_outputs[e->src_output()].push_back( + std::make_pair(e->dst(), e->dst_input())); + old_replicated_edges.push_back(e); + } + } + for (const Edge* e : old_replicated_edges) { + g->RemoveEdge(e); + } + for (int replica_id = 0; replica_id < num_replicas; replica_id++) { + for (int output_index = 0; output_index < old_num_replicated_outputs; + output_index++) { + for (const auto& node_input_pair : + replicated_outputs[replica_id * old_num_replicated_outputs + + output_index]) { + Node* dst = node_input_pair.first; + int dst_input = node_input_pair.second; + g->AddEdge(xla_node, + replica_id * new_num_replicated_outputs + output_index, dst, + dst_input); + } + } + } + + // Copy all nodes in `oc_nodes_at_tail` to host graph, and also replicate + // them. + std::map> node_images; + for (Node* n : oc_nodes_at_tail) { + for (int replica_id = 0; replica_id < num_replicas; replica_id++) { + NodeDef copy_def = n->def(); + copy_def.set_name(absl::StrCat(n->name(), "_tail_oc/R", replica_id)); + copy_def.clear_device(); + + Status s; + Node* copy_node = g->AddNode(copy_def, &s); + TF_RETURN_IF_ERROR(s); + + copy_node->AddAttr(kXlaReplicaIdAttrName, replica_id); + copy_node->AddAttr(kTPUReplicateAttr, cluster_name); + + for (const Edge* e : n->out_edges()) { + if (e->dst() == xla_graph->sink_node()) { + continue; + } + // Either e->dst() is _Retval, or it's in `node_images`. + if (e->dst()->IsRetval()) { + int index; + TF_RETURN_IF_ERROR(GetNodeAttr(e->dst()->attrs(), "index", &index)); + for (const auto& output : + replicated_outputs[replica_id * old_num_replicated_outputs + + index]) { + // Remove original input edge, if existent. + const Edge* original_edge; + Status s = output.first->input_edge(output.second, &original_edge); + if (s.ok()) { + g->RemoveEdge(original_edge); + } + g->AddEdge(copy_node, e->src_output(), output.first, output.second); + } + } else { + g->AddEdge(copy_node, e->src_output(), + node_images[e->dst()][replica_id], e->dst_input()); + } + } + + // Add attribute "_xla_tail_outside_compilation" to `copy_node`, and add a + // control edge between `xla_node` and `copy_node`. As a result, in later + // rewriting pass, a control edge will be added between `copy_node` and + // "control_after" node for the XLA computation, so `copy_node` will be + // executed before XLA computation's final results. + copy_node->AddAttr("_xla_tail_outside_compilation", true); + g->AddControlEdge(xla_node, copy_node); + + // Add control edge between `pivot_node` and `copy_node`, so `copy_node` + // belongs to same while loop as `xla_node`. + if (pivot_node) { + g->AddControlEdge(pivot_node, copy_node); + } + + node_images[n].push_back(copy_node); + } + } + + // Connect new output values of `xla_node` to dst nodes of `oc_input_edges`. + for (int i = 0; i < new_ret_types.size(); i++) { + const Edge* original_edge = oc_input_edges[i]; + for (int replica_id = 0; replica_id < num_replicas; replica_id++) { + int src_output = replica_id * new_num_replicated_outputs + + old_num_replicated_outputs + i; + Node* dst = node_images[original_edge->dst()][replica_id]; + g->AddEdge(xla_node, src_output, dst, original_edge->dst_input()); + } + } + + // Create new _Retval nodes in `xla_graph`. + for (int i = old_num_replicated_outputs; i < new_num_replicated_outputs; + i++) { + NodeDefBuilder ret_builder(absl::StrCat("ret_", i), + FunctionLibraryDefinition::kRetOp); + ret_builder.Attr("T", new_ret_types[i - old_num_replicated_outputs]); + ret_builder.Attr("index", i); + const Edge* original_edge = oc_input_edges[i - old_num_replicated_outputs]; + Node* src = original_edge->src(); + int src_output = original_edge->src_output(); + ret_builder.Input(src->name(), src_output, src->output_type(src_output)); + NodeDef ret_def; + TF_RETURN_IF_ERROR(ret_builder.Finalize(&ret_def)); + Status s; + Node* ret_node = xla_graph->AddNode(ret_def, &s); + TF_RETURN_IF_ERROR(s); + xla_graph->RemoveEdge(original_edge); + xla_graph->AddEdge(src, src_output, ret_node, 0); + } + + // Remove `oc_nodes_at_tail`. + for (Node* n : oc_nodes_at_tail) { + xla_graph->RemoveNode(n); + } + + // We cannot leave _Retval with no input. Add a placeholder input, which will + // be removed later with unused _Retval. + std::vector unused_rets; + for (Node* n : xla_graph->nodes()) { + if (n->IsRetval() && n->in_edges().empty()) { + unused_rets.push_back(n); + } + } + for (Node* n : unused_rets) { + NodeDefBuilder builder(absl::StrCat("placeholder_", n->name()), + "Placeholder"); + DataType dtype; + TF_RETURN_IF_ERROR(GetNodeAttr(n->def(), "T", &dtype)); + builder.Attr("dtype", dtype); + builder.Attr(kXlaIsPlaceholderForTailOcAttrName, true); + NodeDef def; + TF_RETURN_IF_ERROR(builder.Finalize(&def)); + Status s; + Node* placeholder = xla_graph->AddNode(def, &s); + TF_RETURN_IF_ERROR(s); + xla_graph->AddEdge(placeholder, 0, n, 0); + } + + VLOG(4) << "MoveTailOutsideCompilationToHost host graph: " + << DumpGraphToFile(absl::StrCat("move_tail_oc_host_", xla_func_name), + *g); + VLOG(4) << "MoveTaildOutsideCompilationToHost XLA graph: " + << DumpGraphToFile(absl::StrCat("move_tail_oc_xla_", xla_func_name), + *xla_graph); + + return Status::OK(); +} + +Status ReplaceArgUsedByOutsideCompilationWithPlaceholder( + const string& outside_compilation_attr_name, const string& xla_func_name, + Graph* g, Graph* xla_graph, Node* xla_node) { + std::vector input_types; + TF_RETURN_IF_ERROR(GetNodeAttr(xla_node->attrs(), "Tinputs", &input_types)); + int num_distributed_vars; + TF_RETURN_IF_ERROR(GetNodeAttr(xla_node->attrs(), "num_distributed_variables", + &num_distributed_vars)); + int num_replicas; + TF_RETURN_IF_ERROR( + GetNodeAttr(xla_node->attrs(), "num_replicas", &num_replicas)); + int num_per_replica_inputs = + (input_types.size() - num_distributed_vars) / num_replicas; + + for (Node* n : xla_graph->op_nodes()) { + if (!n->IsArg()) { + continue; + } + + DataType dtype; + TF_RETURN_IF_ERROR(GetNodeAttr(n->def(), "T", &dtype)); + // TODO(b/74023706): enable moving normal data tensors. + if (dtype != DT_RESOURCE) { + continue; + } + + std::vector oc_out_edges; + for (const Edge* e : n->out_edges()) { + if (e->IsControlEdge() || + !HasNodeAttr(e->dst()->def(), kOutsideCompilationAttr)) { + continue; + } + + oc_out_edges.push_back(e); + } + if (oc_out_edges.empty()) { + continue; + } + + // Sometimes `xla_node` can have a lot of inputs, calling Node::input_edge + // will become very expensive in this case because it is doing a linear + // search inside. Create an input_edges vector ahead to make the lookups + // faster. + std::vector input_edges; + TF_RETURN_IF_ERROR(xla_node->input_edges(&input_edges)); + + // Build an IdentityN node to record inputs for this _Arg node. + int index; + TF_RETURN_IF_ERROR(GetNodeAttr(n->def(), "index", &index)); + string oc_identifier = absl::StrCat("oc_only_arg_", index); + NodeDefBuilder id_builder(absl::StrCat(oc_identifier, "_inputs"), + "IdentityN"); + std::vector dtypes(num_replicas, dtype); + id_builder.Attr("T", dtypes); + id_builder.Attr(kXlaOutsideCompilationInputsAttrName, oc_identifier); + std::vector inputs(num_replicas); + if (index >= num_per_replica_inputs) { + const Edge* e = input_edges.at(num_replicas * num_per_replica_inputs + + (index - num_per_replica_inputs)); + for (int i = 0; i < num_replicas; i++) { + inputs[i] = + NodeDefBuilder::NodeOut{e->src()->name(), e->src_output(), + e->src()->output_type(e->src_output())}; + } + } else { + for (int i = 0; i < num_replicas; i++) { + const Edge* e = input_edges.at(i * num_per_replica_inputs + index); + inputs[i] = + NodeDefBuilder::NodeOut{e->src()->name(), e->src_output(), + e->src()->output_type(e->src_output())}; + } + } + id_builder.Input(inputs); + NodeDef id_def; + TF_RETURN_IF_ERROR(id_builder.Finalize(&id_def)); + Status s; + Node* id_node = g->AddNode(id_def, &s); + TF_RETURN_IF_ERROR(s); + if (index >= num_per_replica_inputs) { + const Edge* e = input_edges.at(num_replicas * num_per_replica_inputs + + (index - num_per_replica_inputs)); + for (int i = 0; i < num_replicas; i++) { + g->AddEdge(e->src(), e->src_output(), id_node, i); + } + } else { + for (int i = 0; i < num_replicas; i++) { + const Edge* e = input_edges.at(i * num_per_replica_inputs + index); + g->AddEdge(e->src(), e->src_output(), id_node, i); + } + } + + for (const Edge* e : oc_out_edges) { + // 'e' will use a new Placeholder node as input. + NodeDefBuilder ph_builder(xla_graph->NewName("ph_for_arg_in_oc_"), + "Placeholder"); + ph_builder.Attr("dtype", dtype); + + string outside_compilation_attr; + TF_RETURN_IF_ERROR(GetNodeAttr(e->dst()->def(), kOutsideCompilationAttr, + &outside_compilation_attr)); + ph_builder.Attr(kOutsideCompilationAttr, outside_compilation_attr); + ph_builder.Attr(kXlaOutsideCompilationInputsAttrName, oc_identifier); + ph_builder.Attr(kXlaIsPlaceholderForArg, true); + NodeDef ph_def; + TF_RETURN_IF_ERROR(ph_builder.Finalize(&ph_def)); + Status s; + Node* ph_node = xla_graph->AddNode(ph_def, &s); + TF_RETURN_IF_ERROR(s); + Node* dst = e->dst(); + int dst_input = e->dst_input(); + xla_graph->RemoveEdge(e); + xla_graph->AddEdge(ph_node, 0, dst, dst_input); + xla_graph->AddControlEdge(xla_graph->source_node(), ph_node); + } + } + VLOG(4) << "ReplaceOutsideCompilationOnlyArgWithPlaceholder host graph: " + << DumpGraphToFile( + absl::StrCat("replace_oc_only_arg_host_", xla_func_name), *g); + VLOG(4) << "ReplaceOutsideCompilationOnlyArgWithPlaceholder XLA graph: " + << DumpGraphToFile( + absl::StrCat("replace_oc_only_arg_xla_", xla_func_name), + *xla_graph); + return Status::OK(); +} + +// If there are any unused _Retval nodes in `xla_graph` (whose input is a +// Placeholder node), remove them from `xla_graph` and remove corresponding +// output edge in host graph `g`. +Status RemoveUnusedXlaOutput(const string& xla_func_name, Graph* g, + Graph* xla_graph, Node* xla_node) { + // Find unused _Retval nodes, and remove them. + std::vector output_types; + TF_RETURN_IF_ERROR( + GetNodeAttr(xla_node->def(), "output_types", &output_types)); + int num_replicas; + TF_RETURN_IF_ERROR( + GetNodeAttr(xla_node->def(), "num_replicas", &num_replicas)); + int num_replicated_outputs = output_types.size() / num_replicas; + std::set ret_indices_to_remove; + std::vector ret_nodes_to_update, nodes_to_remove; + int num_rets = 0; + for (Node* n : xla_graph->nodes()) { + if (!n->IsRetval()) { + continue; + } + + num_rets++; + + const Edge* e; + TF_RETURN_IF_ERROR(n->input_edge(0, &e)); + if (e->src()->type_string() != "Placeholder" || + !HasNodeAttr(e->src()->def(), kXlaIsPlaceholderForTailOcAttrName)) { + ret_nodes_to_update.push_back(n); + continue; + } + + int index; + TF_RETURN_IF_ERROR(GetNodeAttr(n->def(), "index", &index)); + ret_indices_to_remove.insert(index); + nodes_to_remove.push_back(e->src()); + nodes_to_remove.push_back(n); + } + for (Node* n : nodes_to_remove) { + xla_graph->RemoveNode(n); + } + + // Update `index` for other _Arg nodes. + std::map ret_index_mapping; + int new_ret_index = 0; + for (int i = 0; i < num_rets; i++) { + if (ret_indices_to_remove.find(i) != ret_indices_to_remove.end()) { + continue; + } else { + ret_index_mapping[i] = new_ret_index; + new_ret_index++; + } + } + for (Node* n : ret_nodes_to_update) { + int index; + TF_RETURN_IF_ERROR(GetNodeAttr(n->def(), "index", &index)); + n->ClearAttr("index"); + n->AddAttr("index", ret_index_mapping[index]); + } + + // Update `output_types` attribute for `xla_node`. + std::vector new_output_types; + for (int i = 0; i < num_replicas; i++) { + for (const auto& e : ret_index_mapping) { + new_output_types.push_back(output_types[e.first]); + } + } + + xla_node->ClearAttr("output_types"); + xla_node->AddAttr("output_types", new_output_types); + + // Re-order replicated output edges for `xla_node`. + std::vector> output_edges(num_replicas * + num_replicated_outputs); + for (const Edge* e : xla_node->out_edges()) { + if (e->src_output() >= 0 && + e->src_output() < num_replicas * num_replicated_outputs) { + output_edges[e->src_output()].push_back(e); + } + } + for (int i = 0; i < num_replicas; i++) { + for (int j = 0; j < num_replicated_outputs; j++) { + auto iter = ret_index_mapping.find(j); + if (iter != ret_index_mapping.end()) { + for (const Edge* e : output_edges[i * num_replicated_outputs + j]) { + Node* dst = e->dst(); + int dst_input = e->dst_input(); + int src_output = + i * (num_replicated_outputs - ret_indices_to_remove.size()) + + iter->second; + g->RemoveEdge(e); + g->AddEdge(xla_node, src_output, dst, dst_input); + } + } else { + TF_RET_CHECK(output_edges[i * num_replicated_outputs + j].empty()) + << "Output edge not removed: " + << output_edges[i * num_replicated_outputs + j][0]->DebugString(); + } + } + } + + VLOG(4) << "RemoveUnusedXlaOutput host graph: " + << DumpGraphToFile( + absl::StrCat("remove_unused_output_host_", xla_func_name), *g); + VLOG(4) << "RemoveUnusedXlaOutput XLA graph: " + << DumpGraphToFile( + absl::StrCat("remove_unused_output_xla_", xla_func_name), + *xla_graph); + + return Status::OK(); +} + +// For data edges between _Arg and _Retval in `xla_graph`, remove them and +// change input/output edges in `g` (host graph). For now, we only consider +// replicated inputs. +Status RemoveEdgesBetweenArgAndRetval(const string& xla_func_name, Graph* g, + Graph* xla_graph, Node* xla_node) { + // Collect data edges between _Arg and _Retval. + int num_replicas; + TF_RETURN_IF_ERROR( + GetNodeAttr(xla_node->def(), "num_replicas", &num_replicas)); + std::vector input_types; + TF_RETURN_IF_ERROR(GetNodeAttr(xla_node->def(), "Tinputs", &input_types)); + int num_distributed_vars; + TF_RETURN_IF_ERROR(GetNodeAttr(xla_node->attrs(), "num_distributed_variables", + &num_distributed_vars)); + int old_num_per_replica_inputs = + (input_types.size() - num_distributed_vars) / num_replicas; + std::vector output_types; + TF_RETURN_IF_ERROR( + GetNodeAttr(xla_node->def(), "output_types", &output_types)); + int old_num_outputs = output_types.size() / num_replicas; + std::vector edges; + for (const Edge* e : xla_graph->edges()) { + if (!e->IsControlEdge() && e->src()->IsArg() && e->dst()->IsRetval()) { + edges.push_back(e); + } + } + + // In host graph `g`, remove output edge from `xla_node` and connect input & + // output directly. + std::vector> xla_node_out_edges( + xla_node->num_outputs()); + for (const Edge* e : xla_node->out_edges()) { + if (!e->IsControlEdge()) { + xla_node_out_edges[e->src_output()].push_back(e); + } + } + + // Sometimes `xla_node` can have a lot of inputs, calling Node::input_edge + // will become very expensive in this case because it is doing a linear + // search inside. Create an input_edges vector ahead to make the lookups + // faster. + std::vector input_edges; + TF_RETURN_IF_ERROR(xla_node->input_edges(&input_edges)); + for (const Edge* e : edges) { + int arg_index; + TF_RETURN_IF_ERROR(GetNodeAttr(e->src()->def(), "index", &arg_index)); + int ret_index; + TF_RETURN_IF_ERROR(GetNodeAttr(e->dst()->def(), "index", &ret_index)); + + for (int replica_id = 0; replica_id < num_replicas; replica_id++) { + int input_index; + if (arg_index < old_num_per_replica_inputs) { + input_index = replica_id * old_num_per_replica_inputs + arg_index; + } else { + input_index = num_replicas * old_num_per_replica_inputs + + (arg_index - old_num_per_replica_inputs); + } + const Edge* input_edge = input_edges.at(input_index); + + int output_index = replica_id * old_num_outputs + ret_index; + for (const Edge* output_edge : xla_node_out_edges[output_index]) { + Node* dst = output_edge->dst(); + int dst_input = output_edge->dst_input(); + + g->RemoveEdge(output_edge); + g->AddEdge(input_edge->src(), input_edge->src_output(), dst, dst_input); + } + } + } + + // Remove edges from `xla_graph`. Add a Placeholder node for the _Retval node, + // which will be removed by `RemoveUnusedXlaOutput()` later. + for (const Edge* e : edges) { + NodeDefBuilder placeholder_builder( + absl::StrCat("placeholder_", e->dst()->name()), "Placeholder"); + placeholder_builder.Attr("dtype", e->src()->output_type(e->src_output())); + placeholder_builder.Attr(kXlaIsPlaceholderForTailOcAttrName, true); + NodeDef placeholder_def; + TF_RETURN_IF_ERROR(placeholder_builder.Finalize(&placeholder_def)); + Status s; + Node* placeholder_node = xla_graph->AddNode(placeholder_def, &s); + TF_RETURN_IF_ERROR(s); + + Node* dst = e->dst(); + int dst_input = e->dst_input(); + xla_graph->RemoveEdge(e); + xla_graph->AddEdge(placeholder_node, 0, dst, dst_input); + } + + VLOG(4) << "RemoveUnusedArgRetvalPair host graph: " + << DumpGraphToFile( + absl::StrCat("remove_unused_arg_ret_host_", xla_func_name), + *g); + VLOG(4) << "RemoveUnusedArgRetvalPair XLA graph: " + << DumpGraphToFile( + absl::StrCat("remove_unused_arg_ret_xla_", xla_func_name), + *xla_graph); + + return Status::OK(); +} + +// Remove any TPUReplicatedInput nodes with no output edges. Those nodes are +// usually TPUMirroredVariable handles which are not used by any computations. +void RemoveUnusedTPUReplicatedInputs(Graph* graph) { + for (Node* n : graph->nodes()) { + if (n->type_string() == kTPUReplicatedInput) { + bool has_output = false; + for (const Edge* e : n->out_edges()) { + if (!e->dst()->IsSink()) { + has_output = true; + break; + } + } + if (!has_output) { + graph->RemoveNode(n); + } + } + } +} + +// We might have duplicated cluster names in the graph, e.g. when a tf.function +// containing tpu_strategy.run() is called multiple times with +// the same inputs. Find clusters with duplicated names and rename them. +Status RenameClustersWithDuplicatedNames(Graph* g) { + // Find all TPU clusters by finding all TPUReplicateMetadata nodes. + std::unordered_map> cluster_name_to_metadata_nodes; + std::unordered_set cluster_names; + for (Node* n : g->nodes()) { + if (n->type_string() != "TPUReplicateMetadata") { + continue; + } + string cluster_name; + TF_RETURN_IF_ERROR(GetNodeAttr(n->def(), kTPUReplicateAttr, &cluster_name)); + cluster_name_to_metadata_nodes[cluster_name].push_back(n); + cluster_names.insert(cluster_name); + } + // Look for clusters with duplicated name. + for (const auto& iter : cluster_name_to_metadata_nodes) { + if (iter.second.size() == 1) { + continue; + } + + // Rename clusters. + for (int i = 1; i < iter.second.size(); i++) { + // Find an available cluster name. + string new_cluster_name; + int cluster_name_suffix = 1; + while (true) { + new_cluster_name = absl::StrCat(iter.first, "_", cluster_name_suffix); + if (cluster_names.find(new_cluster_name) == cluster_names.end()) { + break; + } + cluster_name_suffix++; + } + cluster_names.insert(new_cluster_name); + + // Change _tpu_replicate attribute for all nodes in this cluster. + // Start with outputs of TPUReplicateMetadata and follow output edges. + std::queue queue; + queue.push(iter.second.at(i)); + std::unordered_set visited; + while (!queue.empty()) { + Node* n = queue.front(); + queue.pop(); + + visited.insert(n); + + n->ClearAttr(kTPUReplicateAttr); + n->AddAttr(kTPUReplicateAttr, new_cluster_name); + + string cluster_name; + for (const Edge* e : n->out_edges()) { + if (GetNodeAttr(e->dst()->def(), kTPUReplicateAttr, &cluster_name) + .ok() && + cluster_name == iter.first && + visited.find(e->dst()) == visited.end()) { + queue.push(e->dst()); + } + } + } + // Change "_tpu_compilation_status" attr for TPUCompilationResult node. + for (const Edge* e : iter.second.at(i)->out_edges()) { + if (e->dst()->type_string() == "TPUCompilationResult") { + e->dst()->ClearAttr("_tpu_compilation_status"); + e->dst()->AddAttr("_tpu_compilation_status", new_cluster_name); + } + } + } + } + return Status::OK(); +} + +// Instantiate a function that is associated with a functional control flow +// node. The function name is found by looking up `function_name_attr` of given +// node. +xla::StatusOr> InstantiateAssociatedFunction( + const Node& n, absl::string_view function_name_attr, + FunctionLibraryDefinition* fld) { + std::unique_ptr fbody; + NameAttrList func_attr_list; + TF_RETURN_IF_ERROR(GetNodeAttr(n.def(), function_name_attr, &func_attr_list)); + const FunctionDef* fdef = fld->Find(func_attr_list.name()); + if (fdef == nullptr) { + return errors::Internal("Cannot find ", function_name_attr, " function", + "for node ", n.DebugString()); + } + TF_RETURN_IF_ERROR(FunctionDefToBodyHelper( + *fdef, AttrSlice(&func_attr_list.attr()), fld, &fbody)); + return fbody; +} + +// Find inputs of If node that are only used for outside compilation if used at +// all in both if/else branches +xla::StatusOr> FindArgsToLiftForIfNode( + const Node& if_node, FunctionLibraryDefinition* fld) { + absl::flat_hash_set args_to_lift_indices; + std::vector dtypes; + TF_RETURN_IF_ERROR(GetNodeAttr(if_node.def(), "Tin", &dtypes)); + + int num_args = dtypes.size(); + + for (int i = 0; i < num_args; i++) { + // TODO(b/74023706): enable non resource inputs as well. + if (dtypes[i] == DT_RESOURCE) { + args_to_lift_indices.insert(i); + } + } + + TF_ASSIGN_OR_RETURN( + std::unique_ptr then_branch_fbody, + InstantiateAssociatedFunction(if_node, "then_branch", fld)); + + TF_ASSIGN_OR_RETURN( + std::unique_ptr else_branch_fbody, + InstantiateAssociatedFunction(if_node, "else_branch", fld)); + + for (int i = 0; i < num_args; ++i) { + bool used = false; + + const Node* then_arg_node = then_branch_fbody->arg_nodes[i]; + for (const Edge* e : then_arg_node->out_edges()) { + used = true; + if (e->IsControlEdge() || + HasNodeAttr(e->dst()->def(), kOutsideCompilationAttr)) + continue; + + args_to_lift_indices.erase(i); + break; + } + + const Node* else_arg_node = else_branch_fbody->arg_nodes[i]; + for (const Edge* e : else_arg_node->out_edges()) { + used = true; + if (e->IsControlEdge() || + HasNodeAttr(e->dst()->def(), kOutsideCompilationAttr)) + continue; + + args_to_lift_indices.erase(i); + break; + } + + // Do not lift arguments that are not used at all. Otherwise, this unused + // arg would be outside compiled, its output tensor will be forced to + // transfer to host needlessly. + if (!used) args_to_lift_indices.erase(i); + } + + return args_to_lift_indices; +} + +// Find inputs of While node that are: +// 1. not used in cond func, +// 2. only used for outside compilation in body func, +// 3. loop invariant. +// These inputs can be lifted out of the while loop. +xla::StatusOr> FindArgsToLiftForWhileNode( + Node* while_node, FunctionLibraryDefinition* fld) { + // DT_RESOURCE inputs are candidates. + absl::flat_hash_set result; + std::vector dtypes; + TF_RETURN_IF_ERROR(GetNodeAttr(while_node->def(), "T", &dtypes)); + for (int i = 0; i < dtypes.size(); i++) { + // TODO(b/74023706): enable non resource inputs as well. + if (dtypes[i] == DT_RESOURCE) { + result.insert(i); + } + } + + // Remove inputs that are used in cond func. + NameAttrList cond_func; + TF_RETURN_IF_ERROR(GetNodeAttr(while_node->def(), "cond", &cond_func)); + const FunctionDef* cond_fdef = fld->Find(cond_func.name()); + if (cond_fdef == nullptr) { + return errors::Internal("Cannot find cond function ", cond_func.name(), + " for while node ", while_node->DebugString()); + } + std::unique_ptr cond_fbody; + TF_RETURN_IF_ERROR(FunctionDefToBodyHelper( + *cond_fdef, AttrSlice(&cond_func.attr()), fld, &cond_fbody)); + for (int i = 0; i < cond_fbody->arg_nodes.size(); i++) { + const Node* arg_node = cond_fbody->arg_nodes[i]; + for (const Edge* e : arg_node->out_edges()) { + if (!e->IsControlEdge()) { + result.erase(i); + } + } + } + + // Remove inputs that are not loop invariant. + NameAttrList body_func; + TF_RETURN_IF_ERROR(GetNodeAttr(while_node->def(), "body", &body_func)); + const FunctionDef* body_fdef = fld->Find(body_func.name()); + if (body_fdef == nullptr) { + return errors::Internal("Cannot find body function ", body_func.name(), + " for while node ", while_node->DebugString()); + } + std::unique_ptr body_fbody; + TF_RETURN_IF_ERROR(FunctionDefToBodyHelper( + *body_fdef, AttrSlice(&body_func.attr()), fld, &body_fbody)); + for (int i = 0; i < body_fbody->ret_nodes.size(); i++) { + const Node* node = body_fbody->ret_nodes[i]; + do { + TF_RETURN_IF_ERROR(node->input_node(0, &node)); + } while (node->IsIdentity()); + if (node != body_fbody->arg_nodes[i]) { + result.erase(i); + } + } + + // Remove inputs that only have one output edge (loop invariant, but not used + // in outside compilation). + for (int i = 0; i < body_fbody->arg_nodes.size(); i++) { + const Node* arg_node = body_fbody->arg_nodes[i]; + int data_edge_count = std::count_if( + arg_node->out_edges().begin(), arg_node->out_edges().end(), + [](const Edge* e) { return !e->IsControlEdge(); }); + if (data_edge_count == 1) { + result.erase(i); + } + } + + // Remove inputs that have non-outside-compilation usage. + for (int i = 0; i < body_fbody->arg_nodes.size(); i++) { + const Node* arg_node = body_fbody->arg_nodes[i]; + for (const Edge* e : arg_node->out_edges()) { + if (!e->dst()->IsRetval() && + !HasNodeAttr(e->dst()->def(), kOutsideCompilationAttr)) { + result.erase(i); + break; + } + } + } + + return result; +} + +// Find inputs of function call node that are only used for outside compilation. +// These inputs can be lifted out of the function call node. +xla::StatusOr> FindArgsToLiftForCallNode( + Node* call_node, const FunctionBody& fbody) { + // DT_RESOURCE inputs are candidates. + absl::flat_hash_set result; + std::vector dtypes(call_node->input_types().begin(), + call_node->input_types().end()); + for (int i = 0; i < dtypes.size(); i++) { + // TODO(b/74023706): enable for non resource inputs as well. + if (dtypes[i] == DT_RESOURCE) { + result.insert(i); + } + } + + // Remove inputs that have non-outside-compilation usage, or not used at all. + for (int i = 0; i < fbody.arg_nodes.size(); i++) { + const Node* arg_node = fbody.arg_nodes[i]; + if (arg_node->out_edges().empty()) { + result.erase(i); + continue; + } + + for (const Edge* e : arg_node->out_edges()) { + if (!HasNodeAttr(e->dst()->def(), kOutsideCompilationAttr)) { + result.erase(i); + break; + } + } + } + return result; +} + +Status LiftOutsideCompilationOnlyArgs(Graph* g, FunctionLibraryRuntime* flr, + FunctionLibraryDefinition* fld, + int* lifted_arg_count, bool* rewritten); + +Status LiftOutsideCompilationOnlyArgsAndReplaceFunctionDef( + const FunctionBody& fbody, FunctionLibraryRuntime* flr, + FunctionLibraryDefinition* fld, int* lifted_arg_count, + absl::optional new_func_name, bool* rewritten) { + *rewritten = false; + TF_RETURN_IF_ERROR(LiftOutsideCompilationOnlyArgs( + fbody.graph, flr, fld, lifted_arg_count, rewritten)); + + if (*rewritten) { + FunctionDef rewritten_fdef; + TF_RETURN_IF_ERROR(GraphToFunctionDef( + *(fbody.graph), fbody.fdef.signature().name(), &rewritten_fdef)); + if (new_func_name) { + rewritten_fdef.mutable_signature()->set_name(*new_func_name); + TF_RETURN_IF_ERROR(fld->AddFunctionDef(rewritten_fdef)); + } else { + TF_RETURN_IF_ERROR( + fld->ReplaceFunction(fbody.fdef.signature().name(), rewritten_fdef)); + } + } + + return Status::OK(); +} + +Status MakeIdentityNodesForArgsToLift( + const absl::flat_hash_set& args_to_lift, + const int arg_to_input_edge_offset, Graph* g, Node* n, + absl::flat_hash_map* lifted_arg_index_to_oc_cluster_name, + int* lifted_arg_count) { + int num_input = n->num_inputs(); + for (int arg_index = 0; arg_index < num_input; ++arg_index) { + if (!args_to_lift.contains(arg_index)) continue; + + int input_edge_index = arg_index + arg_to_input_edge_offset; + const Edge* arg_edge; + TF_RETURN_IF_ERROR(n->input_edge(input_edge_index, &arg_edge)); + + string node_name = + g->NewName(absl::StrCat("lifted_arg", *lifted_arg_count)); + (*lifted_arg_count)++; + (*lifted_arg_index_to_oc_cluster_name)[arg_index] = node_name; + NodeDefBuilder id_builder(node_name, "Identity"); + id_builder.Attr("T", n->input_type(input_edge_index)); + id_builder.Attr(kOutsideCompilationAttr, id_builder.node_name()); + id_builder.Attr(kXlaIsLiftedArgAttrName, true); + id_builder.Input(arg_edge->src()->name(), arg_edge->src_output(), + n->input_type(input_edge_index)); + NodeDef id_def; + TF_RETURN_IF_ERROR(id_builder.Finalize(&id_def)); + Status s; + Node* id_node = g->AddNode(id_def, &s); + TF_RETURN_IF_ERROR(s); + g->AddEdge(arg_edge->src(), arg_edge->src_output(), id_node, 0); + g->AddControlEdge(id_node, n); + } + + return Status::OK(); +} + +// Replaces all usages of lifted args with placeholder nodes. Afterwards, +// removing these args should be safe since they no longer have users. +Status RemoveArgsToLiftFromFunctionBody( + const absl::flat_hash_set& args_to_lift, + const std::vector& arg_dtypes, + const absl::flat_hash_map& lifted_arg_index_to_oc_cluster_name, + const absl::flat_hash_map& index_mapping, + const FunctionBody* fbody) { + for (int i = 0; i < fbody->arg_nodes.size(); ++i) { + Node* arg_node = fbody->arg_nodes[i]; + + if (!args_to_lift.contains(i)) { + int new_index = index_mapping.at(i); + arg_node->ClearAttr("index"); + arg_node->AddAttr("index", new_index); + arg_node->ClearAttr("T"); + arg_node->AddAttr("T", arg_dtypes[i]); + continue; + } + + std::vector out_edges_to_oc; + for (const Edge* e : arg_node->out_edges()) { + if (HasNodeAttr(e->dst()->def(), kOutsideCompilationAttr)) { + out_edges_to_oc.push_back(e); + } + } + + for (const Edge* e : out_edges_to_oc) { + string outside_compilation_cluster; + TF_RETURN_IF_ERROR(GetNodeAttr(e->dst()->def(), kOutsideCompilationAttr, + &outside_compilation_cluster)); + NodeDefBuilder ph_builder(fbody->graph->NewName("lifted_arg"), + "Placeholder"); + ph_builder.Attr("dtype", arg_dtypes[i]); + ph_builder.Attr(kOutsideCompilationAttr, outside_compilation_cluster); + TF_RET_CHECK(lifted_arg_index_to_oc_cluster_name.contains(i)); + ph_builder.Attr(kXlaLiftedArgOutsideCompilationAttrName, + lifted_arg_index_to_oc_cluster_name.at(i)); + + NodeDef ph_def; + TF_RETURN_IF_ERROR(ph_builder.Finalize(&ph_def)); + + Status s; + Node* ph_node = fbody->graph->AddNode(ph_def, &s); + TF_RETURN_IF_ERROR(s); + + Node* dst = e->dst(); + int dst_input = e->dst_input(); + fbody->graph->RemoveEdge(e); + fbody->graph->AddEdge(ph_node, 0, dst, dst_input); + } + + fbody->graph->RemoveNode(arg_node); + } + + return Status::OK(); +} + +Status CleanUpInEdges(const absl::flat_hash_map& index_mapping, + const int arg_to_input_edge_offset, Graph* g, Node* n) { + int num_inputs = n->num_inputs(); + for (int i = 0; i < num_inputs; ++i) { + if (i < arg_to_input_edge_offset) continue; + + int arg_idx = i - arg_to_input_edge_offset; + const Edge* e; + TF_RETURN_IF_ERROR(n->input_edge(i, &e)); + + // If an edge maps to a lifted argument, simply remove that edge from graph. + if (!index_mapping.contains(arg_idx)) { + g->RemoveEdge(e); + continue; + } + + // If an edge maps to same input port, nothing to do. + if (index_mapping.at(arg_idx) == arg_idx) continue; + + g->AddEdge(e->src(), e->src_output(), n, + index_mapping.at(arg_idx) + arg_to_input_edge_offset); + g->RemoveEdge(e); + } + + return Status::OK(); +} + +Status UpdateTypeAttribute(const absl::flat_hash_map& index_mapping, + const string& type_attr_name, + const std::vector& dtypes, Node* n) { + std::vector new_dtypes; + new_dtypes.reserve(index_mapping.size()); + for (int i = 0; i < dtypes.size(); ++i) { + if (index_mapping.contains(i)) { + new_dtypes.emplace_back(dtypes[i]); + } + } + + n->ClearAttr(type_attr_name); + n->AddAttr(type_attr_name, new_dtypes); + + return Status::OK(); +} + +// While V2 always creates Identity node for each While node output, which is +// not necessary for XLA computation. Remove those Identity nodes. +void RemoveOutputIdentityNodesForWhileV2(Graph* g, Node* while_node) { + std::vector edges_to_identity_node; + for (const Edge* e : while_node->out_edges()) { + if (!e->IsControlEdge() && e->dst()->IsIdentity()) { + edges_to_identity_node.push_back(e); + } + } + for (const Edge* e : edges_to_identity_node) { + Node* identity = e->dst(); + std::vector out_edges(identity->out_edges().begin(), + identity->out_edges().end()); + for (const Edge* out_edge : out_edges) { + if (out_edge->IsControlEdge()) { + g->AddControlEdge(while_node, out_edge->dst()); + } else { + Node* dst = out_edge->dst(); + int dst_input = out_edge->dst_input(); + g->RemoveEdge(out_edge); + g->AddEdge(while_node, e->src_output(), dst, dst_input); + } + } + g->RemoveNode(identity); + } +} + +// If corresponding While node output is used, change it to use While node input +// instead. +Status ReplaceOutputEdgesWithInputEdgeSourceForWhile( + const absl::flat_hash_set& args_to_lift, Graph* g, Node* while_node) { + std::vector edges_to_replace; + for (const Edge* e : while_node->out_edges()) { + if (args_to_lift.contains(e->src_output())) { + edges_to_replace.push_back(e); + } + } + for (const Edge* e : edges_to_replace) { + const Edge* input_edge; + TF_RETURN_IF_ERROR(while_node->input_edge(e->src_output(), &input_edge)); + Node* dst = e->dst(); + int dst_input = e->dst_input(); + g->RemoveEdge(e); + g->AddEdge(input_edge->src(), input_edge->src_output(), dst, dst_input); + } + + return Status::OK(); +} + +// Calculates mapping from argument index before lifting to index afterwards. +absl::flat_hash_map ArgIndexMapping( + const int num_args, const absl::flat_hash_set& args_to_lift) { + absl::flat_hash_map index_mapping; + int new_index = 0; + for (int i = 0; i < num_args; i++) { + if (!args_to_lift.contains(i)) { + index_mapping[i] = new_index; + ++new_index; + } + } + + return index_mapping; +} + +// Remove outputs of While node body function that maps to lifted arguments. +void CleanUpRetvalsForWhileBody( + const absl::flat_hash_map& index_mapping, + const std::vector& dtypes, FunctionBody* fbody) { + for (int i = 0; i < fbody->ret_nodes.size(); i++) { + Node* ret_node = fbody->ret_nodes[i]; + if (index_mapping.contains(i)) { + int new_index = index_mapping.at(i); + ret_node->ClearAttr("index"); + ret_node->AddAttr("index", new_index); + ret_node->ClearAttr("T"); + ret_node->AddAttr("T", dtypes[i]); + } else { + fbody->graph->RemoveNode(ret_node); + } + } +} + +Status LiftOutsideCompilationOnlyArgsFromWhileNode( + Graph* g, Node* while_node, FunctionLibraryDefinition* fld, + int* lifted_arg_count, bool* rewritten) { + *rewritten = false; + + TF_ASSIGN_OR_RETURN(absl::flat_hash_set args_to_lift, + FindArgsToLiftForWhileNode(while_node, fld)); + if (args_to_lift.empty()) return Status::OK(); + + RemoveOutputIdentityNodesForWhileV2(g, while_node); + + TF_RETURN_IF_ERROR(ReplaceOutputEdgesWithInputEdgeSourceForWhile( + args_to_lift, g, while_node)); + + std::vector dtypes; + TF_RETURN_IF_ERROR(GetNodeAttr(while_node->def(), "T", &dtypes)); + + absl::flat_hash_map index_mapping = + ArgIndexMapping(dtypes.size(), args_to_lift); + + // For each lifted arg, add an outside compilation Identity node to send + // it to host. + absl::flat_hash_map lifted_arg_index_to_oc_cluster_name; + TF_RETURN_IF_ERROR(MakeIdentityNodesForArgsToLift( + args_to_lift, /*arg_to_input_edge_offset=*/0, g, while_node, + &lifted_arg_index_to_oc_cluster_name, lifted_arg_count)); + + // For cond func, remove _Arg nodes. + TF_ASSIGN_OR_RETURN(std::unique_ptr cond_fbody, + InstantiateAssociatedFunction(*while_node, "cond", fld)); + TF_RETURN_IF_ERROR(RemoveArgsToLiftFromFunctionBody( + args_to_lift, dtypes, lifted_arg_index_to_oc_cluster_name, index_mapping, + cond_fbody.get())); + + FunctionDef rewritten_cond_fdef; + TF_RETURN_IF_ERROR(GraphToFunctionDef(*(cond_fbody->graph), + cond_fbody->fdef.signature().name(), + &rewritten_cond_fdef)); + TF_RETURN_IF_ERROR(fld->ReplaceFunction(cond_fbody->fdef.signature().name(), + rewritten_cond_fdef)); + + // For body func, remove _Retval nodes, and replace _Arg nodes with + // Placeholder nodes. + TF_ASSIGN_OR_RETURN(std::unique_ptr body_fbody, + InstantiateAssociatedFunction(*while_node, "body", fld)); + + TF_RETURN_IF_ERROR(RemoveArgsToLiftFromFunctionBody( + args_to_lift, dtypes, lifted_arg_index_to_oc_cluster_name, index_mapping, + body_fbody.get())); + + CleanUpRetvalsForWhileBody(index_mapping, dtypes, body_fbody.get()); + + FunctionDef rewritten_body_fdef; + TF_RETURN_IF_ERROR(GraphToFunctionDef(*(body_fbody->graph), + body_fbody->fdef.signature().name(), + &rewritten_body_fdef)); + TF_RETURN_IF_ERROR(fld->ReplaceFunction(body_fbody->fdef.signature().name(), + rewritten_body_fdef)); + + // Remove edges from lifted args to While node, and change "T" attr of the + // While node. + TF_RETURN_IF_ERROR(CleanUpInEdges( + index_mapping, /*arg_to_input_edge_offset=*/0, g, while_node)); + + TF_RETURN_IF_ERROR( + UpdateTypeAttribute(index_mapping, "T", dtypes, while_node)); + + *rewritten = true; + + return Status::OK(); +} + +Status LiftOutsideCompilationOnlyArgsFromIfNode(Graph* g, Node* if_node, + FunctionLibraryDefinition* fld, + int* lifted_arg_count, + bool* rewritten) { + *rewritten = false; + TF_ASSIGN_OR_RETURN(absl::flat_hash_set args_to_lift, + FindArgsToLiftForIfNode(*if_node, fld)); + if (args_to_lift.empty()) return Status::OK(); + + std::vector dtypes; + TF_RETURN_IF_ERROR(GetNodeAttr(if_node->def(), "Tin", &dtypes)); + + absl::flat_hash_map index_mapping; + int new_index = 0; + for (int i = 0; i < dtypes.size(); i++) { + if (!args_to_lift.contains(i)) { + index_mapping[i] = new_index; + ++new_index; + } + } + + // For each lifted arg, add an outside compilation Identity node to send + // it to host. + absl::flat_hash_map lifted_arg_index_to_oc_cluster_name; + TF_RETURN_IF_ERROR(MakeIdentityNodesForArgsToLift( + args_to_lift, /*arg_to_input_edge_offset=*/1, g, if_node, + &lifted_arg_index_to_oc_cluster_name, lifted_arg_count)); + + TF_ASSIGN_OR_RETURN( + std::unique_ptr then_branch_fbody, + InstantiateAssociatedFunction(*if_node, "then_branch", fld)); + + TF_RETURN_IF_ERROR(RemoveArgsToLiftFromFunctionBody( + args_to_lift, dtypes, lifted_arg_index_to_oc_cluster_name, index_mapping, + then_branch_fbody.get())); + + FunctionDef rewritten_then_branch_fdef; + TF_RETURN_IF_ERROR(GraphToFunctionDef( + *(then_branch_fbody->graph), then_branch_fbody->fdef.signature().name(), + &rewritten_then_branch_fdef)); + TF_RETURN_IF_ERROR(fld->ReplaceFunction( + then_branch_fbody->fdef.signature().name(), rewritten_then_branch_fdef)); + + TF_ASSIGN_OR_RETURN( + std::unique_ptr else_branch_fbody, + InstantiateAssociatedFunction(*if_node, "else_branch", fld)); + + TF_RETURN_IF_ERROR(RemoveArgsToLiftFromFunctionBody( + args_to_lift, dtypes, lifted_arg_index_to_oc_cluster_name, index_mapping, + else_branch_fbody.get())); + + FunctionDef rewritten_else_branch_fdef; + TF_RETURN_IF_ERROR(GraphToFunctionDef( + *(else_branch_fbody->graph), else_branch_fbody->fdef.signature().name(), + &rewritten_else_branch_fdef)); + TF_RETURN_IF_ERROR(fld->ReplaceFunction( + else_branch_fbody->fdef.signature().name(), rewritten_else_branch_fdef)); + + // Remove edges from lifted args to If node, and change "Tin" attr of the + // If node. + TF_RETURN_IF_ERROR(CleanUpInEdges( + index_mapping, /*arg_to_input_edge_offset=*/1, g, if_node)); + TF_RETURN_IF_ERROR( + UpdateTypeAttribute(index_mapping, "Tin", dtypes, if_node)); + + *rewritten = true; + + return Status::OK(); +} + +Status LiftOutsideCompilationOnlyArgsFromCallNode( + Graph* g, Node* call_node, FunctionLibraryRuntime* flr, + FunctionLibraryDefinition* fld, int* lifted_arg_count, bool* rewritten) { + *rewritten = false; + + // Instantiate the function. + NameAttrList func; + if (fld->Contains(call_node->type_string())) { + func.set_name(call_node->type_string()); + *func.mutable_attr() = call_node->def().attr(); + } else if (call_node->IsPartitionedCall()) { + TF_RETURN_IF_ERROR(GetNodeAttr(call_node->def(), "f", &func)); + } else { + TF_RET_CHECK(call_node->type_string() == + FunctionLibraryDefinition::kGradientOp); + func.set_name(FunctionLibraryDefinition::kGradientOp); + *func.mutable_attr() = call_node->def().attr(); + } + FunctionLibraryRuntime::Handle handle; + TF_RETURN_IF_ERROR( + flr->Instantiate(func.name(), AttrSlice(&func.attr()), &handle)); + auto cleanup_handle = gtl::MakeCleanup( + [&flr, &handle]() { flr->ReleaseHandle(handle).IgnoreError(); }); + const FunctionBody* fbody = flr->GetFunctionBody(handle); + + // Find _Arg nodes to lift. + TF_ASSIGN_OR_RETURN(absl::flat_hash_set args_to_lift, + FindArgsToLiftForCallNode(call_node, *fbody)); + if (args_to_lift.empty()) return Status::OK(); + + std::vector dtypes; + dtypes = std::vector(call_node->input_types().begin(), + call_node->input_types().end()); + + absl::flat_hash_map index_mapping = + ArgIndexMapping(dtypes.size(), args_to_lift); + + // For each lifted arg, add an outside compilation Identity node to send + // it to host. + absl::flat_hash_map lifted_arg_index_to_oc_cluster_name; + TF_RETURN_IF_ERROR(MakeIdentityNodesForArgsToLift( + args_to_lift, /*arg_to_input_edge_offset=*/0, g, call_node, + &lifted_arg_index_to_oc_cluster_name, lifted_arg_count)); + + // Remove _Arg nodes. + TF_RETURN_IF_ERROR(RemoveArgsToLiftFromFunctionBody( + args_to_lift, dtypes, lifted_arg_index_to_oc_cluster_name, index_mapping, + fbody)); + + // Store rewritten function as a new function, because the original function + // might be defined by user and we should not modify it. + FunctionDef rewritten_fdef; + TF_RETURN_IF_ERROR(GraphToFunctionDef( + *(fbody->graph), fbody->fdef.signature().name(), &rewritten_fdef)); + string new_func_name = + fld->UniqueFunctionName(fbody->fdef.signature().name()); + rewritten_fdef.mutable_signature()->set_name(new_func_name); + TF_RETURN_IF_ERROR(fld->AddFunctionDef(rewritten_fdef)); + + // Remove edges from lifted args to call node. + TF_RETURN_IF_ERROR(CleanUpInEdges( + index_mapping, /*arg_to_input_edge_offset=*/0, g, call_node)); + + // Rewrite the call node to use the rewritten function. + NodeDef node_def; + node_def.set_name(g->NewName(call_node->name())); + node_def.set_op(new_func_name); + if (call_node->IsPartitionedCall()) { + NameAttrList f; + TF_RETURN_IF_ERROR(GetNodeAttr(call_node->def(), "f", &f)); + *node_def.mutable_attr() = f.attr(); + } else if (fld->Contains(call_node->type_string())) { + *node_def.mutable_attr() = call_node->def().attr(); + } else { + TF_RET_CHECK(call_node->type_string() == + FunctionLibraryDefinition::kGradientOp); + *node_def.mutable_attr() = call_node->def().attr(); + node_def.mutable_attr()->erase(FunctionLibraryDefinition::kFuncAttr); + } + TF_ASSIGN_OR_RETURN(call_node, ReplaceNode(g, call_node, node_def)); + + *rewritten = true; + + return Status::OK(); +} + +// Lifts outside compilation only _Arg nodes out of If/While/function nodes. +Status LiftOutsideCompilationOnlyArgs(Graph* g, FunctionLibraryRuntime* flr, + FunctionLibraryDefinition* fld, + int* lifted_arg_count, bool* rewritten) { + *rewritten = false; + + // Handle deeper functional nodes first. + std::vector while_nodes, if_nodes, call_nodes; + for (Node* n : g->op_nodes()) { + if (HasNodeAttr(n->def(), kOutsideCompilationAttr)) { + continue; + } + + if (n->IsWhileNode()) { + TF_ASSIGN_OR_RETURN(std::unique_ptr body_fbody, + InstantiateAssociatedFunction(*n, "body", fld)); + bool func_rewritten = false; + TF_RETURN_IF_ERROR(LiftOutsideCompilationOnlyArgsAndReplaceFunctionDef( + *body_fbody, flr, fld, lifted_arg_count, + /*new_func_name=*/absl::nullopt, &func_rewritten)); + *rewritten = *rewritten || func_rewritten; + + while_nodes.push_back(n); + } else if (n->IsIfNode()) { + TF_ASSIGN_OR_RETURN( + std::unique_ptr then_branch_fbody, + InstantiateAssociatedFunction(*n, "then_branch", fld)); + bool func_rewritten = false; + TF_RETURN_IF_ERROR(LiftOutsideCompilationOnlyArgsAndReplaceFunctionDef( + *then_branch_fbody, flr, fld, lifted_arg_count, + /*new_func_name=*/absl::nullopt, &func_rewritten)); + *rewritten |= func_rewritten; + + TF_ASSIGN_OR_RETURN( + std::unique_ptr else_branch_fbody, + InstantiateAssociatedFunction(*n, "else_branch", fld)); + func_rewritten = false; + TF_RETURN_IF_ERROR(LiftOutsideCompilationOnlyArgsAndReplaceFunctionDef( + *else_branch_fbody, flr, fld, lifted_arg_count, + /*new_func_name=*/absl::nullopt, &func_rewritten)); + *rewritten |= func_rewritten; + + if_nodes.push_back(n); + } else if (IsFunctionCall(*fld, *n)) { + // Function call nodes need to be rewritten, so handle them later. + call_nodes.push_back(n); + } + } + + std::vector rewritten_call_nodes; + for (Node* call_node : call_nodes) { + if (call_node->IsPartitionedCall()) { + std::unique_ptr function_fbody; + TF_ASSIGN_OR_RETURN(function_fbody, + InstantiateAssociatedFunction(*call_node, "f", fld)); + bool func_rewritten = false; + string new_func_name = + fld->UniqueFunctionName(function_fbody->fdef.signature().name()); + TF_RETURN_IF_ERROR(LiftOutsideCompilationOnlyArgsAndReplaceFunctionDef( + *function_fbody, flr, fld, lifted_arg_count, new_func_name, + &func_rewritten)); + if (func_rewritten) { + NameAttrList f; + TF_RETURN_IF_ERROR(GetNodeAttr(call_node->def(), "f", &f)); + f.set_name(new_func_name); + call_node->ClearAttr("f"); + call_node->AddAttr("f", f); + } + + *rewritten |= func_rewritten; + rewritten_call_nodes.push_back(call_node); + } else if (fld->Contains(call_node->type_string())) { + std::unique_ptr function_fbody; + const FunctionDef* fdef = fld->Find(call_node->type_string()); + TF_RET_CHECK(fdef); + TF_RETURN_IF_ERROR(FunctionDefToBodyHelper(*fdef, call_node->attrs(), fld, + &function_fbody)); + bool func_rewritten = false; + string new_func_name = + fld->UniqueFunctionName(function_fbody->fdef.signature().name()); + TF_RETURN_IF_ERROR(LiftOutsideCompilationOnlyArgsAndReplaceFunctionDef( + *function_fbody, flr, fld, lifted_arg_count, new_func_name, + &func_rewritten)); + if (func_rewritten) { + NodeDef node_def; + node_def.set_name(g->NewName(call_node->name())); + node_def.set_op(new_func_name); + *node_def.mutable_attr() = call_node->def().attr(); + TF_ASSIGN_OR_RETURN(call_node, ReplaceNode(g, call_node, node_def)); + } + + *rewritten |= func_rewritten; + rewritten_call_nodes.push_back(call_node); + } else { + TF_RET_CHECK(call_node->type_string() == + FunctionLibraryDefinition::kGradientOp); + FunctionLibraryRuntime::Handle handle; + TF_RETURN_IF_ERROR(flr->Instantiate(call_node->type_string(), + call_node->attrs(), &handle)); + auto cleanup_handle = gtl::MakeCleanup( + [&flr, &handle]() { flr->ReleaseHandle(handle).IgnoreError(); }); + bool func_rewritten = false; + string new_func_name = fld->UniqueFunctionName( + absl::StrCat(call_node->name(), "_lift_args")); + const FunctionBody* function_fbody = flr->GetFunctionBody(handle); + TF_RETURN_IF_ERROR(LiftOutsideCompilationOnlyArgsAndReplaceFunctionDef( + *function_fbody, flr, fld, lifted_arg_count, new_func_name, + &func_rewritten)); + if (func_rewritten) { + NodeDef node_def; + node_def.set_name(g->NewName(call_node->name())); + node_def.set_op(new_func_name); + *node_def.mutable_attr() = call_node->def().attr(); + node_def.mutable_attr()->erase(FunctionLibraryDefinition::kFuncAttr); + TF_ASSIGN_OR_RETURN(call_node, ReplaceNode(g, call_node, node_def)); + } + + *rewritten |= func_rewritten; + rewritten_call_nodes.push_back(call_node); + } + } + + for (Node* n : while_nodes) { + bool node_rewritten = false; + TF_RETURN_IF_ERROR(LiftOutsideCompilationOnlyArgsFromWhileNode( + g, n, fld, lifted_arg_count, &node_rewritten)); + *rewritten = *rewritten || node_rewritten; + } + + for (Node* n : if_nodes) { + bool node_rewritten = false; + TF_RETURN_IF_ERROR(LiftOutsideCompilationOnlyArgsFromIfNode( + g, n, fld, lifted_arg_count, &node_rewritten)); + *rewritten = *rewritten || node_rewritten; + } + + for (Node* n : rewritten_call_nodes) { + bool node_rewritten = false; + TF_RETURN_IF_ERROR(LiftOutsideCompilationOnlyArgsFromCallNode( + g, n, flr, fld, lifted_arg_count, &node_rewritten)); + *rewritten = *rewritten || node_rewritten; + } + + if (*rewritten) { + VLOG(4) << DumpGraphToFile("after_lifting_args", *g, fld); + } + + return Status::OK(); +} + +} // namespace + +/*static*/ Status EncapsulateTPUComputationsPass::Encapsulate( + std::unique_ptr* graph, FunctionLibraryDefinition* flib_def) { + // Check for undeclared outputs before Encapsulation, so we can give a better + // error message. + // TODO(phawkins): merge this with the encapsulation code to avoid the extra + // O(n) pass over the edges. + for (const Edge* e : (*graph)->edges()) { + if (!e->IsControlEdge() && + e->src()->attrs().Find(kTPUReplicateAttr) != nullptr && + e->src()->attrs().Find(kOutsideCompilationAttr) == nullptr && + e->dst()->attrs().Find(kTPUReplicateAttr) == nullptr && + e->dst()->type_string() != kTPUReplicatedOutput) { + return errors::InvalidArgument( + "Undeclared output of TPU computation. A common cause of this error " + "is variable initializers that depend on the TPU computation. Edge: ", + FormatNodeForError(*e->src()), ":", e->src_output(), " -> ", + FormatNodeForError(*e->dst()), ":", e->dst_input()); + } + } + + RemoveUnusedTPUReplicatedInputs(graph->get()); + + TF_RETURN_IF_ERROR(RenameClustersWithDuplicatedNames(graph->get())); + + TF_RETURN_IF_ERROR( + PerformStaticShapeInferenceBeforeEncapsulation(graph->get())); + + auto output = absl::make_unique((*graph)->op_registry()); + TF_RETURN_WITH_CONTEXT_IF_ERROR( + EncapsulateSubgraphsInFunctions( + kTPUReplicateAttr, **graph, RewriteSubgraph, + /*reuse_existing_functions=*/true, &output, flib_def), + "EncapsulateTPUComputationsPass failed"); + graph->swap(output); + + return Status::OK(); +} + +/*static*/ Status EncapsulateTPUComputationsPass::BuildTPUReplicateOps( + Graph* graph) { + // Finds all of the replicate function calls, to avoid mutating the graph + // while iterating. + std::vector replicate_nodes; + std::vector guarantee_const_nodes; + for (Node* n : graph->nodes()) { + string name; + if (TryGetNodeAttr(n->attrs(), kTPUReplicateAttr, &name) && + !TryGetNodeAttr(n->attrs(), kOutsideCompilationAttr, &name)) { + replicate_nodes.push_back(n); + } else if (n->type_string() == "GuaranteeConst") { + guarantee_const_nodes.push_back(n); + } + } + + // Replace any GuaranteeConst nodes with Identity nodes. These nodes have now + // served their purpose and have no runtime effect, except increasing + // inference latency due to executor overhead. Subsequent rewrites will remove + // the Identity nodes. + for (Node* n : guarantee_const_nodes) { + std::vector> predecessors; + for (const Edge* e : n->in_edges()) { + predecessors.emplace_back(e->src(), e->src_output()); + } + std::vector> successors; + for (const Edge* e : n->out_edges()) { + successors.emplace_back(e->dst(), e->dst_input()); + } + NodeDef ndef; + ndef.set_name(n->name()); + ndef.set_op("Identity"); + ndef.set_device(n->requested_device()); + MergeDebugInfo(NodeDebugInfo(n->def()), &ndef); + AddNodeAttr("T", n->output_type(0), &ndef); + + graph->RemoveNode(n); + Status s; + Node* id_node = graph->AddNode(ndef, &s); + TF_RETURN_IF_ERROR(s); + + for (const auto& pred : predecessors) { + if (pred.second < 0) { + graph->AddControlEdge(pred.first, id_node); + } else { + graph->AddEdge(pred.first, pred.second, id_node, 0); + } + } + for (const auto& succ : successors) { + if (succ.second < 0) { + graph->AddControlEdge(id_node, succ.first); + } else { + graph->AddEdge(id_node, 0, succ.first, succ.second); + } + } + } + + // Replaces each replicate function call together with its neighboring + // TPUReplicatedInput/TPUReplicatedOutput nodes with a TPUReplicate node. + for (Node* replicate : replicate_nodes) { + int num_replicas; + TF_RETURN_IF_ERROR( + GetNodeAttr(replicate->attrs(), "num_replicas", &num_replicas)); + int variable_start_index; + TF_RETURN_IF_ERROR(GetNodeAttr(replicate->attrs(), "_variable_start_index", + &variable_start_index)); + int guaranteed_const_start_index; + TF_RETURN_IF_ERROR(GetNodeAttr(replicate->attrs(), + "_guaranteed_const_start_index", + &guaranteed_const_start_index)); + + if (HasNodeAttr(replicate->def(), "use_tpu")) { + bool use_tpu; + TF_RETURN_IF_ERROR(GetNodeAttr(replicate->attrs(), "use_tpu", &use_tpu)); + if (!use_tpu) { + LOG(WARNING) << "use_tpu=false attr on a TPUReplicate node is ignored."; + } + } + + std::vector in_edges; + TF_RETURN_IF_ERROR(replicate->input_edges(&in_edges)); + + // Counts the number of replicated, non-replicated, and variable inputs. + int pos = 0; + std::vector mirrored_variable_indices; + int distributed_var_start_index = 0; + while (pos < in_edges.size() && + in_edges[pos]->src()->type_string() == kTPUReplicatedInput) { + // Checks that each TPUReplicatedInput node has the correct number of + // replicas. + int input_num_replicas; + TF_RETURN_IF_ERROR( + GetNodeAttr(in_edges[pos]->src()->attrs(), "N", &input_num_replicas)); + + bool is_mirrored_variable; + CHECK(GetNodeAttr(in_edges[pos]->src()->attrs(), "is_mirrored_variable", + &is_mirrored_variable) + .ok()); + if (is_mirrored_variable) { + mirrored_variable_indices.push_back(pos); + } + + bool is_packed = false; + GetNodeAttr(in_edges[pos]->src()->attrs(), "is_packed", &is_packed) + .IgnoreError(); + + bool is_distributed_variable = + is_packed && (in_edges[pos]->src()->output_type( + in_edges[pos]->src_output()) == DT_RESOURCE); + + if (!is_distributed_variable && input_num_replicas != num_replicas) { + return errors::InvalidArgument( + "Mismatched number of replicas. Computation has ", num_replicas, + " replicas, input '", FormatNodeForError(*in_edges[pos]->src()), + "' has ", input_num_replicas, " replicas."); + } + + if (!is_distributed_variable) { + if (distributed_var_start_index < pos) { + return errors::InvalidArgument( + "Expect a distributed resource after index ", + distributed_var_start_index, + ", but got a replicated resource at index ", pos); + } else { + ++distributed_var_start_index; + } + } + ++pos; + } + const int num_replicated_inputs = distributed_var_start_index; + const int num_distributed_vars = pos - num_replicated_inputs; + + const int num_variables = + std::max(0, guaranteed_const_start_index - variable_start_index); + + const int num_guaranteed_constants = + in_edges.size() - guaranteed_const_start_index; + TF_RET_CHECK(num_guaranteed_constants >= 0); + + VLOG(1) << "Replicate node '" << replicate->name() << "'" + << " input edges: " << in_edges.size() + << " num_replicated_inputs: " << num_replicated_inputs + << " num_distributed_vars: " << num_distributed_vars + << " num_variables: " << num_variables + << " num_guaranteed_constants: " << num_guaranteed_constants + << " num_mirrored_variables: " << mirrored_variable_indices.size(); + + const int num_broadcast_inputs = + in_edges.size() - (num_replicated_inputs + num_distributed_vars + + num_variables + num_guaranteed_constants); + TF_RET_CHECK(num_broadcast_inputs >= 0); + + const int num_inputs = num_replicated_inputs * num_replicas + + num_distributed_vars + num_broadcast_inputs + + num_guaranteed_constants + num_variables; + + std::vector nodes_to_remove = {replicate}; + + // Data and control inputs to the new TPUReplicate node. + std::vector> data_inputs(num_inputs); + gtl::FlatSet control_inputs; + + AddControlInputs(*replicate, &control_inputs); + + // Replicated inputs. Adds the inputs from the TPUReplicatedInput inputs, + // in replica-major order. See the comments in + // distributed_tpu_rewrite_pass.h for a description of the argument order. + DataTypeVector replicated_input_types(num_replicated_inputs * num_replicas + + num_distributed_vars); + + // Inputs with is_distributed_variable = false. + for (int i = 0; i < num_replicated_inputs; ++i) { + std::vector replica_in_edges; + TF_RETURN_IF_ERROR(in_edges[i]->src()->input_edges(&replica_in_edges)); + for (int replica = 0; replica < num_replicas; ++replica) { + int pos = replica * num_replicated_inputs + i; + const Edge* edge = replica_in_edges[replica]; + data_inputs[pos] = {edge->src(), edge->src_output()}; + replicated_input_types[pos] = EdgeType(edge); + } + AddControlInputs(*in_edges[i]->src(), &control_inputs); + nodes_to_remove.push_back(in_edges[i]->src()); + } + + // Inputs with is_distributed_variable = true. + for (int i = 0; i < num_distributed_vars; ++i) { + int pos = num_replicas * num_replicated_inputs + i; + std::vector replica_in_edges; + TF_RETURN_IF_ERROR( + in_edges[num_replicated_inputs + i]->src()->input_edges( + &replica_in_edges)); + TF_RET_CHECK(replica_in_edges.size() == 1); + const Edge* edge = replica_in_edges[0]; + data_inputs[pos] = {edge->src(), edge->src_output()}; + replicated_input_types[pos] = EdgeType(edge); + AddControlInputs(*in_edges[num_replicated_inputs + i]->src(), + &control_inputs); + nodes_to_remove.push_back(in_edges[num_replicated_inputs + i]->src()); + } + + // Appends the broadcast inputs. + DataTypeVector broadcast_input_types(num_broadcast_inputs); + for (int i = 0; i < num_broadcast_inputs; ++i) { + int pos = num_replicas * num_replicated_inputs + num_distributed_vars + i; + const Edge* edge = + in_edges[num_replicated_inputs + num_distributed_vars + i]; + data_inputs[pos] = {edge->src(), edge->src_output()}; + broadcast_input_types[i] = EdgeType(edge); + } + + // Appends the variable inputs. + for (int i = 0; i < num_variables; ++i) { + int pos = num_replicas * num_replicated_inputs + num_distributed_vars + + num_broadcast_inputs + i; + const Edge* edge = in_edges[num_replicated_inputs + num_distributed_vars + + num_broadcast_inputs + i]; + data_inputs[pos] = {edge->src(), edge->src_output()}; + } + + DataTypeVector guaranteed_constant_types(num_guaranteed_constants); + for (int i = 0; i < num_guaranteed_constants; ++i) { + int pos = num_replicas * num_replicated_inputs + num_distributed_vars + + num_broadcast_inputs + num_variables + i; + const Edge* edge = in_edges[num_replicated_inputs + num_distributed_vars + + num_broadcast_inputs + num_variables + i]; + data_inputs[pos] = {edge->src(), edge->src_output()}; + guaranteed_constant_types[i] = EdgeType(edge); + } + + // Outputs. All outputs from a replicated computation are replicated. + const int num_outputs = replicate->output_types().size(); + gtl::FlatSet control_outputs; + std::vector replicated_outputs(num_outputs); + for (const Edge* e : replicate->out_edges()) { + if (e->IsControlEdge()) { + control_outputs.insert(e->dst()); + } else { + TF_RET_CHECK(e->src_output() < num_outputs); + TF_RET_CHECK(e->dst()->type_string() == kTPUReplicatedOutput) + << e->DebugString(); + TF_RET_CHECK(e->dst()->output_types().size() == num_replicas); + replicated_outputs[e->src_output()] = e->dst(); + nodes_to_remove.push_back(e->dst()); + + AddControlOutputs(*e->dst(), &control_outputs); + } + } + + // Flattens the edges outgoing from the TPUReplicatedOutput nodes in + // replica-major order. + std::vector>> data_outputs(num_replicas * + num_outputs); + DataTypeVector output_types(num_replicas * num_outputs); + for (int i = 0; i < num_outputs; ++i) { + std::vector> replica_out_edges(num_replicas); + TF_RET_CHECK(replicated_outputs[i] != nullptr); + for (const Edge* e : replicated_outputs[i]->out_edges()) { + TF_RET_CHECK(!e->IsControlEdge()); + replica_out_edges[e->src_output()].push_back(e); + } + + for (int replica = 0; replica < num_replicas; ++replica) { + const int pos = replica * num_outputs + i; + for (const Edge* edge : replica_out_edges[replica]) { + data_outputs[pos].push_back({edge->dst(), edge->dst_input()}); + } + output_types[pos] = replicated_outputs[i]->input_type(0); + } + } + + // TODO(b/79092708): Consolidate and cleanup to avoid TPU specialization. + NodeDef def; + def.set_name(replicate->name()); + def.set_op("_TPUReplicate"); + MergeDebugInfo(NodeDebugInfo(replicate->def()), &def); + NameAttrList computation; + computation.set_name(replicate->type_string()); + AddNodeAttr("computation", computation, &def); + for (const auto& attr : replicate->attrs()) { + def.mutable_attr()->insert(attr); + } + AddNodeAttr("Tinputs", replicated_input_types, &def); + AddNodeAttr("Tbroadcast_inputs", broadcast_input_types, &def); + AddNodeAttr("NumVariables", num_variables, &def); + AddNodeAttr("Tguaranteed_constants", guaranteed_constant_types, &def); + AddNodeAttr("output_types", output_types, &def); + AddNodeAttr(TPUREPLICATE_MIRRORED_VAR_INDICES_ATTR, + mirrored_variable_indices, &def); + AddNodeAttr("num_distributed_variables", num_distributed_vars, &def); + + for (Node* node : nodes_to_remove) { + VLOG(2) << "Deleting node " << node->DebugString(); + // Ensure that we do not attempt to add control edges to nodes that are + // deleted. + control_inputs.erase(node); + control_outputs.erase(node); + graph->RemoveNode(node); + } + + Status status; + Node* tpu_replicate = graph->AddNode(def, &status); + if (!status.ok()) { + return status; + } + for (int i = 0; i < data_inputs.size(); ++i) { + graph->AddEdge(data_inputs[i].first, data_inputs[i].second, tpu_replicate, + i); + } + for (Node* n : control_inputs) { + graph->AddControlEdge(n, tpu_replicate); + } + for (int i = 0; i < data_outputs.size(); ++i) { + for (const auto& successor : data_outputs[i]) { + graph->AddEdge(tpu_replicate, i, successor.first, successor.second); + } + } + for (Node* n : control_outputs) { + graph->AddControlEdge(tpu_replicate, n); + } + } + return Status::OK(); +} + +Status EncapsulateTPUComputationsPass::Run( + const GraphOptimizationPassOptions& options) { + VLOG(1) << "EncapsulateTPUComputations(): " + << DumpGraphToFile("encapsulate_tpu_computations_before", + **options.graph, options.flib_def); + + TF_RETURN_IF_ERROR(Encapsulate(options.graph, options.flib_def)); + VLOG(1) << "EncapsulateTPUComputations() half-way: " + << DumpGraphToFile("encapsulate_tpu_computations_halfway", + **options.graph, options.flib_def); + + TF_RETURN_IF_ERROR(BuildTPUReplicateOps(options.graph->get())); + VLOG(1) << "EncapsulateTPUComputations() finished: " + << DumpGraphToFile("encapsulate_tpu_computations_after", + **options.graph, options.flib_def); + return Status::OK(); +} + +Status ExtractOutsideCompilationPass::ProcessHeadTailOutsideCompilation( + const string& outside_compilation_attr_name, int* lifted_arg_count, + std::unordered_map* clusters, Graph* g, + FunctionLibraryRuntime* flr, FunctionLibraryDefinition* fld) { + // Gather a list of pivots by cluster so we can easily look them up. + absl::node_hash_map pivots; + string cluster_name; + for (Node* node : g->nodes()) { + if (TryGetNodeAttr(node->attrs(), kPivotForClusterAttr, &cluster_name)) { + pivots[cluster_name] = node; + } + } + for (auto& iter : *clusters) { + // Find pivot node for this XLA cluster. + Node* pivot_node = pivots[iter.first]; + + // Instantiate XLA computation function. + string xla_func_name = iter.second.func_name_attrs.name(); + std::unique_ptr xla_fbody; + TF_RETURN_IF_ERROR(FunctionDefToBodyHelper( + *fld->Find(xla_func_name), + AttrSlice(&iter.second.func_name_attrs.attr()), fld, &xla_fbody)); + Graph* xla_graph = xla_fbody->graph; + + // Make sure all nodes can be traced from sink node. + FixupSourceAndSinkEdges(xla_graph); + + // We create Identity nodes for all _Arg/_Retval nodes in XLA computation. + // Remove those Identity nodes to simplify furthur processing. + TF_RETURN_IF_ERROR(RemoveIdentityNodesForArgRetval(xla_graph)); + + bool rewritten; + TF_RETURN_IF_ERROR(LiftOutsideCompilationOnlyArgs( + xla_graph, flr, fld, lifted_arg_count, &rewritten)); + + // Move head outside compilation to host. + TF_RETURN_IF_ERROR(MoveHeadOutsideCompilationToHost( + outside_compilation_attr_name, iter.second.func_name_attrs.name(), + iter.second.cluster_name, g, xla_graph, iter.second.node, pivot_node)); + + // Move tail outside compilation to host. + TF_RETURN_IF_ERROR(MoveTailOutsideCompilationToHost( + outside_compilation_attr_name, iter.second.func_name_attrs.name(), + iter.second.cluster_name, g, xla_graph, iter.second.node, pivot_node)); + + // Replace outside compilation only _Arg nodes with Placeholder nodes. + TF_RETURN_IF_ERROR(ReplaceArgUsedByOutsideCompilationWithPlaceholder( + outside_compilation_attr_name, xla_func_name, g, xla_graph, + iter.second.node)); + + // There might be direct data edges between _Arg node and _Retval node in + // `xla_graph`. Remove those edges to avoid back-and-forth data transfer + // between host and XLA. + TF_RETURN_IF_ERROR(RemoveEdgesBetweenArgAndRetval( + iter.second.func_name_attrs.name(), g, xla_graph, iter.second.node)); + + // After `MoveHeadOutsideCompilationToHost`, there might be unused XLA + // inputs. Remove them. + TF_RETURN_IF_ERROR(RemoveUnusedXlaInput(iter.second.func_name_attrs.name(), + g, xla_graph, iter.second.node)); + + // After `MoveTailOutsideCompilationToHost`, there might be unused XLA + // outputs. Remove them. + TF_RETURN_IF_ERROR(RemoveUnusedXlaOutput(iter.second.func_name_attrs.name(), + g, xla_graph, iter.second.node)); + + // Replace original function. + FunctionDef replace_fdef; + TF_RETURN_IF_ERROR( + GraphToFunctionDef(*xla_graph, xla_func_name, &replace_fdef)); + TF_RETURN_IF_ERROR(fld->ReplaceFunction(xla_func_name, replace_fdef)); + + FixupSourceAndSinkEdges(g); + } + + return Status::OK(); +} + +Status ExtractOutsideCompilationPass::Run( + const GraphOptimizationPassOptions& options) { + const auto* config = + (options.session_options ? &options.session_options->config : nullptr); + std::unique_ptr pflr( + new ProcessFunctionLibraryRuntime( + /*device_mgr=*/nullptr, options.session_options->env, + /*config=*/config, TF_GRAPH_DEF_VERSION, options.flib_def, + config ? config->graph_options().optimizer_options() + : OptimizerOptions())); + FunctionLibraryRuntime* flr = + pflr->GetFLR(ProcessFunctionLibraryRuntime::kDefaultFLRDevice); + + // Find XLA compile ops and their corresponding FunctionDefs. + static std::map* kNodeTypeToFunctionAttrMapping = + new std::map{ + {"_TPUReplicate", "computation"}, + }; + std::unordered_map clusters; + int lifted_arg_count = 0; + for (Node* n : (*options.graph)->nodes()) { + auto iter = kNodeTypeToFunctionAttrMapping->find(n->type_string()); + if (iter == kNodeTypeToFunctionAttrMapping->end()) { + continue; + } + + string xla_cluster_name = n->name(); + + string func_attr = iter->second; + NameAttrList func; + TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), func_attr, &func)); + + std::vector core_list; + TF_RETURN_IF_ERROR( + GetNodeAttr(n->attrs(), "host_compute_core", &core_list)); + std::map host_compute_core; + TF_RETURN_IF_ERROR(ParseHostComputeCoreList(core_list, &host_compute_core)); + + clusters.emplace(xla_cluster_name, XlaClusterInfo{xla_cluster_name, func, n, + host_compute_core}); + } + TF_RETURN_IF_ERROR(ProcessHeadTailOutsideCompilation( + kOutsideCompilationAttr, &lifted_arg_count, &clusters, + options.graph->get(), flr, options.flib_def)); + bool modified; + TF_RETURN_IF_ERROR(ExtractOutsideCompilation( + kTPUReplicateAttr, kOutsideCompilationAttr, clusters, + options.graph->get(), flr, options.flib_def, &modified)); + if (modified) { + TF_RETURN_IF_ERROR( + PruneUnreachableFunctionsFromGraph(**options.graph, options.flib_def)); + } + + return Status::OK(); +} + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.h b/tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.h new file mode 100644 index 00000000000..91677f7c6cb --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.h @@ -0,0 +1,73 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Rewrites computations generated by the tpu.replicate() Python code into +// TPUReplicate operators. +// +// The tpu.replicate() does two main things: +// a) marks operators that make up a TPU computation with the attribute +// _tpu_replicate=XYZ, where XYZ is a unique key. +// b) adds TPUReplicatedInput and TPUReplicatedOutput nodes to represent +// replicated inputs. These nodes are not marked with the _tpu_replicate +// attribute. + +#ifndef TENSORFLOW_CORE_TPU_GRAPH_REWRITES_ENCAPSULATE_TPU_COMPUTATIONS_PASS_H_ +#define TENSORFLOW_CORE_TPU_GRAPH_REWRITES_ENCAPSULATE_TPU_COMPUTATIONS_PASS_H_ + +#include "tensorflow/compiler/jit/encapsulate_util.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/graph/graph.h" + +namespace tensorflow { + +// Encapsulates nodes marked with the _tpu_replicate attribute into +// TPUReplicate operators. +class EncapsulateTPUComputationsPass : public GraphOptimizationPass { + public: + Status Run(const GraphOptimizationPassOptions& options) override; + + // The following methods are public only for unit tests. + + // This pass has two stages: + // a) first, we call the EncapsulateSubgraphsPass to encapsulate all nodes + // marked with the same _tpu_replicate attribute into functions. These + // functions contain the computations to be passed to TPUReplicate. During + // encapsulation, we sort the arguments into the order expected by + // TPUReplicate. + static Status Encapsulate(std::unique_ptr* graph, + FunctionLibraryDefinition* flib_def); + + // b) we rewrite the function calls generated in phase (a) into TPUReplicate + // operators. We also flatten the TPUReplicatedInput and + // TPUReplicatedOutput replicated input and output nodes of the function + // call into the replicated input and outputs of the TPUReplicate operator. + static Status BuildTPUReplicateOps(Graph* graph); +}; + +// Graph optimization pass that calls `ExtractOutsideCompilation` for all XLA +// computation nodes. +class ExtractOutsideCompilationPass : public GraphOptimizationPass { + public: + Status Run(const GraphOptimizationPassOptions& options) override; + + static Status ProcessHeadTailOutsideCompilation( + const string& outside_compilation_attr_name, int* lifted_arg_count, + std::unordered_map* clusters, Graph* g, + FunctionLibraryRuntime* flr, FunctionLibraryDefinition* fld); +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_GRAPH_REWRITES_ENCAPSULATE_TPU_COMPUTATIONS_PASS_H_ diff --git a/tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass_test.cc b/tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass_test.cc new file mode 100644 index 00000000000..c57d6311f31 --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass_test.cc @@ -0,0 +1,810 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.h" + +#include "tensorflow/cc/ops/array_ops.h" +#include "tensorflow/cc/ops/function_ops.h" +#include "tensorflow/cc/ops/parsing_ops.h" +#include "tensorflow/cc/ops/resource_variable_ops.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/cc/ops/tpu_replication_ops.h" +#include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h" +#include "tensorflow/compiler/tf2xla/test_util.h" +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/graph_constructor.h" +#include "tensorflow/core/common_runtime/process_function_library_runtime.h" +#include "tensorflow/core/framework/graph_to_functiondef.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/hash/hash.h" +#include "tensorflow/core/lib/strings/proto_serialization.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/public/version.h" +#include "tensorflow/core/tpu/tpu_defs.h" +#include "tensorflow/core/util/equal_graph_def.h" +#include "tensorflow/core/util/ptr_util.h" + +namespace tensorflow { + +static std::unique_ptr MakeOuterGraph( + const FunctionLibraryDefinition& flib_def, const string& function) { + Scope scope = Scope::NewRootScope().ExitOnError(); + TF_EXPECT_OK(scope.graph()->AddFunctionLibrary(flib_def.ToProto())); + + int num_replicas = 2; + + auto a0 = ops::Placeholder(scope.WithOpName("A0"), DT_INT32); + auto a1 = ops::Placeholder(scope.WithOpName("A1"), DT_INT32); + auto b0 = ops::Placeholder(scope.WithOpName("B0"), DT_FLOAT); + auto b1 = ops::Placeholder(scope.WithOpName("B1"), DT_FLOAT); + auto u0 = ops::Placeholder(scope.WithOpName("U0"), DT_RESOURCE); + auto u1 = ops::Placeholder(scope.WithOpName("U1"), DT_RESOURCE); + auto z = ops::Placeholder(scope.WithOpName("Z"), DT_RESOURCE); + auto c = ops::Placeholder(scope.WithOpName("C"), DT_INT32); + auto d = ops::Placeholder(scope.WithOpName("D"), DT_FLOAT); + auto v = ops::Placeholder(scope.WithOpName("V"), DT_RESOURCE); + auto w = ops::Placeholder(scope.WithOpName("W"), DT_RESOURCE); + auto x = ops::GuaranteeConst( + scope.WithOpName("X"), + ops::Placeholder(scope.WithOpName("X_Holder"), DT_DOUBLE)); + auto y = ops::GuaranteeConst( + scope.WithOpName("Y"), + ops::Placeholder(scope.WithOpName("Y_Holder"), DT_DOUBLE)); + + auto in0 = ops::TPUReplicatedInput(scope.WithOpName("In0"), + std::initializer_list{a0, a1}); + auto in1 = ops::TPUReplicatedInput(scope.WithOpName("In1"), + std::initializer_list{b0, b1}); + auto in2 = ops::TPUReplicatedInput(scope.WithOpName("In2"), + std::initializer_list{u0, u1}); + auto in3 = ops::TPUReplicatedInput(scope.WithOpName("In3"), + std::initializer_list{z}); + in3.node()->AddAttr("is_packed", true); + + NodeDef def; + TF_CHECK_OK(NodeDefBuilder("replicate0", function, &flib_def) + .Input(in0.node()->name(), 0, DT_INT32) + .Input(in1.node()->name(), 0, DT_FLOAT) + .Input(in2.node()->name(), 0, DT_RESOURCE) + .Input(in3.node()->name(), 0, DT_RESOURCE) + .Input(c.node()->name(), 0, DT_INT32) + .Input(d.node()->name(), 0, DT_FLOAT) + .Input(v.node()->name(), 0, DT_RESOURCE) + .Input(w.node()->name(), 0, DT_RESOURCE) + .Input(x.node()->name(), 0, DT_DOUBLE) + .Input(y.node()->name(), 0, DT_DOUBLE) + .Attr(kTPUReplicateAttr, "replicate0") + .Attr("num_replicas", num_replicas) + .Attr("num_cores_per_replica", 6) + .Attr("topology", "") + .Attr("use_tpu", true) + .Attr("device_assignment", std::vector()) + .Attr("host_compute_core", std::vector()) + .Attr("padding_map", std::vector()) + .Attr("_variable_start_index", 6) + .Attr("_guaranteed_const_start_index", 8) + .Attr("allow_soft_placement", false) + .Attr("step_marker_location", "STEP_MARK_AT_ENTRY") + .Attr("use_spmd_for_xla_partitioning", false) + .Finalize(&def)); + + Status status; + Node* replicate = scope.graph()->AddNode(def, &status); + TF_CHECK_OK(status); + TF_CHECK_OK(scope.DoShapeInference(replicate)); + scope.graph()->AddEdge(in0.node(), 0, replicate, 0); + scope.graph()->AddEdge(in1.node(), 0, replicate, 1); + scope.graph()->AddEdge(in2.node(), 0, replicate, 2); + scope.graph()->AddEdge(in3.node(), 0, replicate, 3); + scope.graph()->AddEdge(c.node(), 0, replicate, 4); + scope.graph()->AddEdge(d.node(), 0, replicate, 5); + scope.graph()->AddEdge(v.node(), 0, replicate, 6); + scope.graph()->AddEdge(w.node(), 0, replicate, 7); + scope.graph()->AddEdge(x.node(), 0, replicate, 8); + scope.graph()->AddEdge(y.node(), 0, replicate, 9); + + auto out0 = ops::TPUReplicatedOutput(scope.WithOpName("Out0"), + Output(replicate, 0), num_replicas); + auto out1 = ops::TPUReplicatedOutput(scope.WithOpName("Out1"), + Output(replicate, 1), num_replicas); + auto out2 = ops::TPUReplicatedOutput(scope.WithOpName("Out2"), + Output(replicate, 2), num_replicas); + auto out3 = ops::TPUReplicatedOutput(scope.WithOpName("Out3"), + Output(replicate, 3), num_replicas); + auto out4 = ops::TPUReplicatedOutput(scope.WithOpName("Out4"), + Output(replicate, 4), num_replicas); + + auto consumer0_0a = ops::Identity(scope.WithOpName("consumer0_0a"), out0[0]); + auto consumer0_0b = ops::Identity(scope.WithOpName("consumer0_0b"), out0[0]); + auto consumer0_1 = ops::Identity(scope.WithOpName("consumer0_1"), out0[1]); + auto consumer1 = ops::Identity(scope.WithOpName("consumer1"), out1[1]); + auto consumer2 = ops::Identity(scope.WithOpName("consumer2"), out2[0]); + auto consumer3a = ops::Identity(scope.WithOpName("consumer3a"), out3[0]); + auto consumer3b = ops::Identity(scope.WithOpName("consumer3b"), out3[1]); + auto consumer4a = ops::Identity(scope.WithOpName("consumer4a"), out4[0]); + auto consumer4b = ops::Identity(scope.WithOpName("consumer4b"), out4[1]); + + std::unique_ptr graph(new Graph(OpRegistry::Global())); + TF_CHECK_OK(scope.ToGraph(graph.get())); + return graph; +} + +// Makes an encapsulate body graph for use in tests. +static std::unique_ptr MakeBodyGraph() { + Scope scope = Scope::NewRootScope().ExitOnError(); + + auto arg0 = ops::_Arg(scope.WithOpName("in0_0_arg"), DT_INT32, 0); + auto arg1 = ops::_Arg(scope.WithOpName("in1_0_arg"), DT_FLOAT, 1); + auto arg2 = ops::_Arg(scope.WithOpName("in2_0_arg"), DT_RESOURCE, 2); + auto arg3 = ops::_Arg(scope.WithOpName("in3_0_arg"), DT_RESOURCE, 3); + auto arg4 = ops::_Arg(scope.WithOpName("c_0_arg"), DT_INT32, 4); + auto arg5 = ops::_Arg(scope.WithOpName("d_0_arg"), DT_FLOAT, 5); + auto arg6 = ops::_Arg(scope.WithOpName("v_0_arg"), DT_RESOURCE, 6); + auto arg7 = ops::_Arg(scope.WithOpName("w_0_arg"), DT_RESOURCE, 7); + + auto add_attrs = [](Node* node) { + node->AddAttr(kTPUReplicateAttr, "replicate0"); + }; + + string device = + tensorflow::strings::StrCat("/device:", DEVICE_TPU_REPLICATED_CORE); + + auto in1_identity = + ops::Identity(scope.WithOpName("In1_identity").WithDevice(device), arg1); + + auto read_u = ops::ReadVariableOp( + scope.WithOpName("ReadU").WithDevice(device), arg2, DT_FLOAT); + add_attrs(read_u.node()); + auto read_z = ops::ReadVariableOp( + scope.WithOpName("ReadZ").WithDevice(device), arg3, DT_FLOAT); + add_attrs(read_z.node()); + auto read_v = ops::ReadVariableOp( + scope.WithOpName("ReadV").WithDevice(device), arg6, DT_FLOAT); + add_attrs(read_v.node()); + auto read_w = ops::ReadVariableOp( + scope.WithOpName("ReadW").WithDevice(device), arg7, DT_FLOAT); + add_attrs(read_w.node()); + + auto e = ops::Add(scope.WithOpName("E").WithDevice(device), arg0, arg4); + add_attrs(e.node()); + auto f = ops::Add(scope.WithOpName("F").WithDevice(device), read_v, read_w); + add_attrs(f.node()); + auto g = ops::Add(scope.WithOpName("G").WithDevice(device), f, arg5); + add_attrs(g.node()); + + auto arg8 = ops::_Arg(scope.WithOpName("x_0_arg"), DT_DOUBLE, 8); + auto arg9 = ops::_Arg(scope.WithOpName("y_0_arg"), DT_DOUBLE, 9); + arg8.node()->AddAttr("_is_guaranteed_constant", true); + arg9.node()->AddAttr("_is_guaranteed_constant", true); + auto h = ops::Add(scope.WithOpName("H").WithDevice(device), arg8, arg9); + add_attrs(h.node()); + + auto out0 = ops::_Retval(scope.WithOpName("e_0_retval_RetVal"), e, 0); + auto out1 = ops::_Retval(scope.WithOpName("g_0_retval_RetVal"), g, 1); + auto out2 = ops::_Retval(scope.WithOpName("in1_identity_0_retval_RetVal"), + in1_identity, 2); + auto out3 = + ops::_Retval(scope.WithOpName("readu_0_retval_RetVal"), read_u, 3); + auto out4 = + ops::_Retval(scope.WithOpName("readz_0_retval_RetVal"), read_z, 4); + + std::unique_ptr graph(new Graph(OpRegistry::Global())); + TF_CHECK_OK(scope.ToGraph(graph.get())); + return graph; +} + +TEST(EncapsulateTPUComputations, DeterministicEncapsulate) { + // Test that control edge insertion order doesn't affect the cache key + // (cluster name) generated by TPU encapsulate pass. + auto get_serialized_graph = [](bool control_input_reversed, + bool operand_reversed) -> string { + FunctionLibraryDefinition flib_def(OpRegistry::Global(), {}); + std::unique_ptr graph(new Graph(&flib_def)); + { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto a0 = ops::Placeholder(scope.WithOpName("A0"), DT_INT32); + auto a1 = ops::Placeholder(scope.WithOpName("A1"), DT_INT32); + + ops::Add e = operand_reversed ? ops::Add(scope.WithOpName("E"), a0, a1) + : ops::Add(scope.WithOpName("E"), a1, a0); + + auto metadata = ops::TPUReplicateMetadata(scope, /*num_replicas=*/2); + auto add_attrs = [](Node* node) { + node->AddAttr(kTPUReplicateAttr, "replicate0"); + }; + add_attrs(metadata.operation.node()); + add_attrs(e.node()); + + TF_CHECK_OK(scope.ToGraph(graph.get())); + auto get_node_in_graph = [&graph](Node* node) { + return graph->FindNodeId(node->id()); + }; + // Insert control edge in different order. The order should not affect + // the encapsulated or serialized graph. + if (!control_input_reversed) { + graph->AddControlEdge(get_node_in_graph(a0.node()), + get_node_in_graph(e.node()), true); + graph->AddControlEdge(get_node_in_graph(a1.node()), + get_node_in_graph(e.node()), true); + } else { + graph->AddControlEdge(get_node_in_graph(a1.node()), + get_node_in_graph(e.node()), true); + graph->AddControlEdge(get_node_in_graph(a0.node()), + get_node_in_graph(e.node()), true); + } + } + TF_CHECK_OK(EncapsulateTPUComputationsPass::Encapsulate(&graph, &flib_def)); + GraphDef gdef; + graph->ToGraphDef(&gdef); + // Before serialization, sort control inputs first to remove + // nondeterminism. + SortControlInputs(&gdef); + string serialized; + SerializeToStringDeterministic(gdef, &serialized); + return serialized; + }; + + // Changing the order of control input shouldn't affect the graph generated. + EXPECT_EQ(get_serialized_graph(/*control_input_reversed=*/true, + /*operand_reversed=*/false), + get_serialized_graph(/*control_input_reversed=*/false, + /*operand_reversed=*/false)); + + // Changing the order of data input should affect the graph generated. + EXPECT_NE(get_serialized_graph(/*control_input_reversed=*/false, + /*operand_reversed=*/true), + get_serialized_graph(/*control_input_reversed=*/false, + /*operand_reversed=*/false)); +} + +TEST(EncapsulateTPUComputations, Encapsulate) { + FunctionLibraryDefinition flib_def(OpRegistry::Global(), {}); + std::unique_ptr graph(new Graph(&flib_def)); + { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto a0 = ops::Placeholder(scope.WithOpName("A0"), DT_INT32); + auto a1 = ops::Placeholder(scope.WithOpName("A1"), DT_INT32); + auto b0 = ops::Placeholder(scope.WithOpName("B0"), DT_FLOAT); + auto b1 = ops::Placeholder(scope.WithOpName("B1"), DT_FLOAT); + auto c = ops::Placeholder(scope.WithOpName("C"), DT_INT32); + auto d = ops::Placeholder(scope.WithOpName("D"), DT_FLOAT); + auto u0 = ops::Placeholder(scope.WithOpName("U0"), DT_RESOURCE); + auto u1 = ops::Placeholder(scope.WithOpName("U1"), DT_RESOURCE); + auto z = ops::Placeholder(scope.WithOpName("Z"), DT_RESOURCE); + auto v = ops::Placeholder(scope.WithOpName("V"), DT_RESOURCE); + auto w = ops::Placeholder(scope.WithOpName("W"), DT_RESOURCE); + auto x = ops::GuaranteeConst( + scope.WithOpName("X"), + ops::Placeholder(scope.WithOpName("X_Holder"), DT_DOUBLE)); + auto y = ops::GuaranteeConst( + scope.WithOpName("Y"), + ops::Placeholder(scope.WithOpName("Y_Holder"), DT_DOUBLE)); + + auto in0 = ops::TPUReplicatedInput(scope.WithOpName("In0"), + std::initializer_list{a0, a1}); + auto in1 = ops::TPUReplicatedInput(scope.WithOpName("In1"), + std::initializer_list{b0, b1}); + auto in2 = ops::TPUReplicatedInput(scope.WithOpName("In2"), + std::initializer_list{u0, u1}); + auto in3 = ops::TPUReplicatedInput(scope.WithOpName("In3"), + std::initializer_list{z}); + in3.node()->AddAttr("is_packed", true); + + auto add_attrs = [](Node* node) { + node->AddAttr(kTPUReplicateAttr, "replicate0"); + }; + auto metadata = ops::TPUReplicateMetadata( + scope, /*num_replicas=*/2, + ops::TPUReplicateMetadata::ComputationShape({2, 3})); + add_attrs(metadata.operation.node()); + + auto in1_identity = ops::Identity(scope.WithOpName("In1_identity"), in1); + add_attrs(in1_identity.node()); + + auto read_u = ops::ReadVariableOp(scope.WithOpName("ReadU"), in2, DT_FLOAT); + add_attrs(read_u.node()); + auto read_z = ops::ReadVariableOp(scope.WithOpName("ReadZ"), in3, DT_FLOAT); + add_attrs(read_z.node()); + auto read_v = ops::ReadVariableOp(scope.WithOpName("ReadV"), v, DT_FLOAT); + add_attrs(read_v.node()); + auto read_w = ops::ReadVariableOp(scope.WithOpName("ReadW"), w, DT_FLOAT); + add_attrs(read_w.node()); + + auto e = ops::Add(scope.WithOpName("E"), in0, c); + add_attrs(e.node()); + auto f = ops::Add(scope.WithOpName("F"), read_v, read_w); + add_attrs(f.node()); + auto g = ops::Add(scope.WithOpName("G"), f, d); + add_attrs(g.node()); + auto h = ops::Add(scope.WithOpName("H"), x, y); + add_attrs(h.node()); + + auto out0 = ops::TPUReplicatedOutput(scope.WithOpName("Out0"), e, 2); + auto out1 = ops::TPUReplicatedOutput(scope.WithOpName("Out1"), g, 2); + auto out2 = + ops::TPUReplicatedOutput(scope.WithOpName("Out2"), in1_identity, 2); + auto out3 = ops::TPUReplicatedOutput(scope.WithOpName("Out3"), read_u, 2); + auto out4 = ops::TPUReplicatedOutput(scope.WithOpName("Out4"), read_z, 2); + + auto consumer0_0a = + ops::Identity(scope.WithOpName("consumer0_0a"), out0[0]); + auto consumer0_0b = + ops::Identity(scope.WithOpName("consumer0_0b"), out0[0]); + auto consumer0_1 = ops::Identity(scope.WithOpName("consumer0_1"), out0[1]); + auto consumer1 = ops::Identity(scope.WithOpName("consumer1"), out1[1]); + auto consumer2 = ops::Identity(scope.WithOpName("consumer2"), out2[0]); + auto consumer3a = ops::Identity(scope.WithOpName("consumer3a"), out3[0]); + auto consumer3b = ops::Identity(scope.WithOpName("consumer3b"), out3[1]); + auto consumer4a = ops::Identity(scope.WithOpName("consumer4a"), out4[0]); + auto consumer4b = ops::Identity(scope.WithOpName("consumer4b"), out4[1]); + TF_ASSERT_OK(scope.ToGraph(graph.get())); + } + + std::unique_ptr graph_copy(new Graph(&flib_def)); + CopyGraph(*graph, graph_copy.get()); + + TF_ASSERT_OK(EncapsulateTPUComputationsPass::Encapsulate(&graph, &flib_def)); + // Remove _xla_inferred_shapes attribute. + for (Node* n : graph->nodes()) { + n->ClearAttr("_xla_inferred_shapes"); + } + + std::unordered_map index = graph->BuildNodeNameIndex(); + string function = index.at("replicate0")->type_string(); + + // Tests the outer graph is as expected. + { + std::unique_ptr outer = MakeOuterGraph(flib_def, function); + GraphDef expected_def; + outer->ToGraphDef(&expected_def); + + GraphDef actual_def; + graph->ToGraphDef(&actual_def); + TF_EXPECT_GRAPH_EQ_INTERNAL(expected_def, actual_def); + } + + // Tests the encapsulated body graph is as expected. + { + std::unique_ptr body = MakeBodyGraph(); + GraphDef expected_body_def; + body->ToGraphDef(&expected_body_def); + + InstantiationResultForTest result; + TF_EXPECT_OK(InstantiateFunctionForTest(function, flib_def, &result)); + + EXPECT_EQ((DataTypeVector{DT_INT32, DT_FLOAT, DT_RESOURCE, DT_RESOURCE, + DT_INT32, DT_FLOAT, DT_RESOURCE, DT_RESOURCE, + DT_DOUBLE, DT_DOUBLE}), + result.arg_types); + EXPECT_EQ( + (DataTypeVector{DT_INT32, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT}), + result.ret_types); + TF_EXPECT_GRAPH_EQ(expected_body_def, result.gdef); + } + + // Encapsulates the same computation again, verifies we reuse the same + // function. Encapsulation should be deterministic to avoid recompilation. + TF_ASSERT_OK( + EncapsulateTPUComputationsPass::Encapsulate(&graph_copy, &flib_def)); + std::unordered_map index_copy = + graph_copy->BuildNodeNameIndex(); + string function_copy = index_copy.at("replicate0")->type_string(); + EXPECT_EQ(function, function_copy); +} + +TEST(EncapsulateTPUComputations, BuildTPUReplicateOps) { + std::unique_ptr body_graph = MakeBodyGraph(); + FunctionDefLibrary flib; + TF_ASSERT_OK( + GraphToFunctionDef(*body_graph, "replicate0", flib.add_function())); + + FunctionLibraryDefinition flib_def(OpRegistry::Global(), flib); + + std::unique_ptr graph = MakeOuterGraph(flib_def, "replicate0"); + TF_ASSERT_OK( + EncapsulateTPUComputationsPass::BuildTPUReplicateOps(graph.get())); + + Scope scope = Scope::NewRootScope().ExitOnError(); + TF_EXPECT_OK(scope.graph()->AddFunctionLibrary(flib)); + + auto a0 = ops::Placeholder(scope.WithOpName("A0"), DT_INT32); + auto a1 = ops::Placeholder(scope.WithOpName("A1"), DT_INT32); + auto b0 = ops::Placeholder(scope.WithOpName("B0"), DT_FLOAT); + auto b1 = ops::Placeholder(scope.WithOpName("B1"), DT_FLOAT); + auto u0 = ops::Placeholder(scope.WithOpName("U0"), DT_RESOURCE); + auto u1 = ops::Placeholder(scope.WithOpName("U1"), DT_RESOURCE); + auto z = ops::Placeholder(scope.WithOpName("Z"), DT_RESOURCE); + auto c = ops::Placeholder(scope.WithOpName("C"), DT_INT32); + auto d = ops::Placeholder(scope.WithOpName("D"), DT_FLOAT); + auto v = ops::Placeholder(scope.WithOpName("V"), DT_RESOURCE); + auto w = ops::Placeholder(scope.WithOpName("W"), DT_RESOURCE); + auto x = + ops::Identity(scope.WithOpName("X"), + ops::Placeholder(scope.WithOpName("X_Holder"), DT_DOUBLE)); + auto y = + ops::Identity(scope.WithOpName("Y"), + ops::Placeholder(scope.WithOpName("Y_Holder"), DT_DOUBLE)); + + NameAttrList function; + function.set_name("replicate0"); + auto replicate = ops::_TPUReplicate( + scope.WithOpName("replicate0"), + std::initializer_list{a0, b0, u0, a1, b1, u1, z}, + std::initializer_list{c, d}, std::initializer_list{v, w}, + std::initializer_list{x, y}, function, + /*num_replicas=*/2, + {DT_INT32, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_INT32, DT_FLOAT, + DT_FLOAT, DT_FLOAT, DT_FLOAT}, + ops::_TPUReplicate::NumCoresPerReplica(6).NumDistributedVariables(1)); + + auto consumer0_0a = + ops::Identity(scope.WithOpName("consumer0_0a"), replicate.outputs[0]); + auto consumer0_0b = + ops::Identity(scope.WithOpName("consumer0_0b"), replicate.outputs[0]); + auto consumer0_1 = + ops::Identity(scope.WithOpName("consumer0_1"), replicate.outputs[5]); + auto consumer1 = + ops::Identity(scope.WithOpName("consumer1"), replicate.outputs[6]); + auto consumer2 = + ops::Identity(scope.WithOpName("consumer2"), replicate.outputs[2]); + auto consumer3a = + ops::Identity(scope.WithOpName("consumer3a"), replicate.outputs[3]); + auto consumer3b = + ops::Identity(scope.WithOpName("consumer3b"), replicate.outputs[8]); + auto consumer4a = + ops::Identity(scope.WithOpName("consumer4a"), replicate.outputs[4]); + auto consumer4b = + ops::Identity(scope.WithOpName("consumer4b"), replicate.outputs[9]); + + GraphDef expected_def; + TF_ASSERT_OK(scope.ToGraphDef(&expected_def)); + + GraphDef actual_def; + graph->ToGraphDef(&actual_def); + TF_EXPECT_GRAPH_EQ(expected_def, actual_def); +} + +class ExtractOutsideCompilationByScope : public ::testing::TestWithParam { +}; + +Status PivotControlExists(const Node* node, const Node* pivot) { + for (const Edge* edge : node->in_edges()) { + if (edge->IsControlEdge() && (edge->src() == pivot)) { + return Status::OK(); + } + } + return errors::NotFound("Control edge with pivot not found."); +} + +TEST_P(ExtractOutsideCompilationByScope, + MoveHeadAndTailOutsideCompilationToHost) { + FunctionLibraryDefinition fld(OpRegistry::Global(), FunctionDefLibrary()); + + // Create FunctionLibraryRuntime. + SessionOptions session_options; + std::vector> devices; + TF_CHECK_OK(DeviceFactory::AddDevices( + session_options, "/job:localhost/replica:0/task:0", &devices)); + OptimizerOptions opts; + auto device_mgr = absl::make_unique(std::move(devices)); + auto pflr = absl::make_unique( + device_mgr.get(), Env::Default(), /*config=*/nullptr, + TF_GRAPH_DEF_VERSION, &fld, opts, + /*default_thread_pool=*/nullptr); + auto flr = pflr->GetFLR("/job:localhost/replica:0/task:0/cpu:0"); + + { + // Build TPU replicate function. + // arg0 = _Arg[index = 0, T = DT_STRING] + // arg1 = _Arg[index = 1, T = DT_INT32] + // arg2 = _Arg[index = 2, T = DT_RESOURCE] + // as_int = StringToNumber[out_type = DT_INT32](arg0) (oc node) + // add = Add(as_int, arg1) + // as_string = AsString(add) (oc node) + // read_var = ops::ReadVariableOp(arg2) + // ret0 = _RetVal[index = 0, T = DT_STRING](as_string) + // ret1 = _RetVal[index = 1, T = DT_INT32](add) + // ret2 = _RetVal[index = 1, T = DT_FLOAT](read_var) + Scope s = Scope::NewRootScope().ExitOnError(); + auto arg0 = ops::_Arg(s.WithOpName("arg0"), DT_STRING, 0); + auto arg1 = ops::_Arg(s.WithOpName("arg1"), DT_INT32, 1); + auto arg2 = ops::_Arg(s.WithOpName("arg2"), DT_RESOURCE, 2); + auto as_int = ops::StringToNumber(s.WithOpName("as_int"), arg0, + ops::StringToNumber::OutType(DT_INT32)); + auto add = ops::Add(s.WithOpName("add"), as_int, arg1); + auto as_string = ops::AsString(s.WithOpName("as_string"), add); + auto read_var = + ops::ReadVariableOp(s.WithOpName("ReadVar"), arg2, DT_FLOAT); + auto ret0 = ops::_Retval(s.WithOpName("ret0"), as_string, 0); + auto ret1 = ops::_Retval(s.WithOpName("ret1"), add, 1); + auto ret2 = ops::_Retval(s.WithOpName("ret2"), read_var, 2); + Graph g(OpRegistry::Global()); + TF_ASSERT_OK(s.ToGraph(&g)); + auto node_name_index = g.BuildNodeNameIndex(); + node_name_index["as_int"]->AddAttr("oc", "0"); + node_name_index["as_string"]->AddAttr("oc", "0"); + FunctionDef fdef; + TF_ASSERT_OK(GraphToFunctionDef(g, "cluster", &fdef)); + TF_ASSERT_OK(fld.AddFunctionDef(fdef)); + } + + string control_flow_scope = GetParam() ? "scope/" : ""; + string pivot_name = absl::StrCat(control_flow_scope, "tpu_replicate/pivot"); + Graph host_graph(OpRegistry::Global()); + NameAttrList function; + function.set_name("cluster"); + { + // Build host graph. + // input00 = Placeholder[T = DT_STRING] + // input01 = Placeholder[T = DT_INT32] + // input10 = Placeholder[T = DT_STRING] + // input11 = Placeholder[T = DT_INT32] + // input2 = Placeholder[T = DT_RESOURCE] + // tpu_replicate = _TPUReplicate(input00, input01, input10, input11) + // output = IdentityN(tpu_replicate, tpu_replicate:1, tpu_replicate:2, + // tpu_replicate:3, tpu_replicate:4, tpu_replicate:5) + Scope s = Scope::NewRootScope().ExitOnError(); + auto pivot = ops::NoOp(s.WithOpName(pivot_name)); + pivot.operation.node()->AddAttr("_pivot_for_cluster", "cluster"); + auto input00 = ops::Placeholder(s.WithOpName("input00"), DT_STRING); + auto input01 = ops::Placeholder(s.WithOpName("input01"), DT_INT32); + auto input10 = ops::Placeholder(s.WithOpName("input10"), DT_STRING); + auto input11 = ops::Placeholder(s.WithOpName("input11"), DT_INT32); + auto input2 = ops::Placeholder(s.WithOpName("input2"), DT_RESOURCE); + auto control_scope = s.WithControlDependencies({pivot}); + auto replicate = ops::_TPUReplicate( + control_scope.WithOpName("tpu_replicate"), + std::initializer_list{input00, input01, input10, input11, + input2}, + std::initializer_list{}, std::initializer_list{}, + std::initializer_list{}, function, + /*num_replicas=*/2, + {DT_STRING, DT_INT32, DT_FLOAT, DT_STRING, DT_INT32, DT_FLOAT}, + ops::_TPUReplicate::NumCoresPerReplica(1).NumDistributedVariables(1)); + auto output = ops::IdentityN( + s.WithOpName("output"), + std::initializer_list{ + replicate.outputs[0], replicate.outputs[1], replicate.outputs[2], + replicate.outputs[3], replicate.outputs[4], replicate.outputs[5]}); + TF_ASSERT_OK(s.ToGraph(&host_graph)); + } + auto node_name_index = host_graph.BuildNodeNameIndex(); + Node* replicate_node = node_name_index["tpu_replicate"]; + + std::unordered_map clusters; + clusters.emplace("cluster", + XlaClusterInfo{"cluster", function, replicate_node, + std::map{}}); + int lifted_arg_count = 0; + TF_ASSERT_OK(ExtractOutsideCompilationPass::ProcessHeadTailOutsideCompilation( + "oc", &lifted_arg_count, &clusters, &host_graph, flr, &fld)); + node_name_index = host_graph.BuildNodeNameIndex(); + replicate_node = node_name_index["tpu_replicate"]; + + { + // Check host graph. + const Edge* e; + Node* pivot = node_name_index[pivot_name]; + // Check that we have input00 -> as_int/R0 -> tpu_replicate. + Node* as_int_R0 = node_name_index["as_int_head_oc/R0"]; + EXPECT_NE(as_int_R0, nullptr); + TF_ASSERT_OK(as_int_R0->input_edge(0, &e)); + EXPECT_EQ(e->src(), node_name_index["input00"]); + TF_ASSERT_OK(replicate_node->input_edge(1, &e)); + EXPECT_EQ(e->src(), as_int_R0); + // Check that as_int/R0 has pivot as control input + TF_EXPECT_OK(PivotControlExists(as_int_R0, pivot)); + // Check that we have input10 -> as_int/R1 -> tpu_replicate. + Node* as_int_R1 = node_name_index["as_int_head_oc/R1"]; + EXPECT_NE(as_int_R1, nullptr); + TF_ASSERT_OK(as_int_R1->input_edge(0, &e)); + EXPECT_EQ(e->src(), node_name_index["input10"]); + TF_ASSERT_OK(replicate_node->input_edge(3, &e)); + EXPECT_EQ(e->src(), as_int_R1); + // Check that as_int/R0 has pivot as control input + TF_EXPECT_OK(PivotControlExists(as_int_R1, pivot)); + // Check that we have tpu_replicate -> as_string/R0 -> output. + Node* as_string_R0 = node_name_index["as_string_tail_oc/R0"]; + EXPECT_NE(as_string_R0, nullptr); + TF_ASSERT_OK(as_string_R0->input_edge(0, &e)); + EXPECT_EQ(e->src(), replicate_node); + TF_ASSERT_OK(node_name_index["output"]->input_edge(0, &e)); + EXPECT_EQ(e->src(), as_string_R0); + // Check that as_string/R0 has pivot as control input + TF_EXPECT_OK(PivotControlExists(as_string_R0, pivot)); + // Check that we have tpu_replicate -> as_string/R1 -> output. + Node* as_string_R1 = node_name_index["as_string_tail_oc/R1"]; + EXPECT_NE(as_string_R1, nullptr); + TF_ASSERT_OK(as_string_R1->input_edge(0, &e)); + EXPECT_EQ(e->src(), replicate_node); + TF_ASSERT_OK(node_name_index["output"]->input_edge(3, &e)); + EXPECT_EQ(e->src(), as_string_R1); + // Check that as_string/R1 has pivot as control input + TF_EXPECT_OK(PivotControlExists(as_string_R1, pivot)); + } + + { + // Check TPU graph. + const FunctionDef* fdef = fld.Find("cluster"); + EXPECT_NE(fdef, nullptr); + // Check its signature, should have 2 DT_INT32 inputs, 1 DT_RESOURCE input, + // 2 DT_INT32 outputs and 1 DT_FLOAT output. + EXPECT_EQ(fdef->signature().input_arg_size(), 3); + EXPECT_EQ(fdef->signature().input_arg(0).type(), DT_INT32); + EXPECT_EQ(fdef->signature().input_arg(1).type(), DT_INT32); + EXPECT_EQ(fdef->signature().input_arg(2).type(), DT_RESOURCE); + EXPECT_EQ(fdef->signature().output_arg_size(), 3); + EXPECT_EQ(fdef->signature().output_arg(0).type(), DT_INT32); + EXPECT_EQ(fdef->signature().output_arg(1).type(), DT_FLOAT); + EXPECT_EQ(fdef->signature().output_arg(2).type(), DT_INT32); + // Check that it has no StringToNumber/AsString op any more. + for (const NodeDef& node_def : fdef->node_def()) { + EXPECT_NE(node_def.op(), "StringToNumber"); + EXPECT_NE(node_def.op(), "AsString"); + } + } +} + +INSTANTIATE_TEST_SUITE_P(All, ExtractOutsideCompilationByScope, + ::testing::ValuesIn({true, false})); + +TEST(ExtractOutsideCompilation, RemoveArgRetvalPair) { + FunctionLibraryDefinition fld(OpRegistry::Global(), FunctionDefLibrary()); + + // Create FunctionLibraryRuntime. + SessionOptions session_options; + std::vector> devices; + TF_CHECK_OK(DeviceFactory::AddDevices( + session_options, "/job:localhost/replica:0/task:0", &devices)); + OptimizerOptions opts; + auto device_mgr = absl::make_unique(std::move(devices)); + auto pflr = absl::make_unique( + device_mgr.get(), Env::Default(), /*config=*/nullptr, + TF_GRAPH_DEF_VERSION, &fld, opts, + /*default_thread_pool=*/nullptr); + auto flr = pflr->GetFLR("/job:localhost/replica:0/task:0/cpu:0"); + + { + // Build TPU replicate function. + // arg0 = _Arg[index = 0, T = DT_STRING] + // arg1 = _Arg[index = 1, T = DT_FLOAT] + // arg2 = _Arg[index = 2, T = DT_INT32] + // arg3 = _Arg[index = 3, T = DT_RESOURCE] + // arg4 = _Arg[index = 4, T = DT_RESOURCE] + // add = Add(arg2, arg2) + // read = ReadVariableOp(arg4) + // ret0 = _RetVal[index = 0, T = DT_STRING](arg0) + // ret1 = _RetVal[index = 1, T = DT_INT32](add) + // ret2 = _RetVal[index = 2, T = DT_FLOAT](read) + // ret3 = _RetVal[index = 3, T = DT_RESOURCE](arg3) + Scope s = Scope::NewRootScope().ExitOnError(); + auto arg0 = ops::_Arg(s.WithOpName("arg0"), DT_STRING, 0); + auto arg1 = ops::_Arg(s.WithOpName("arg1"), DT_FLOAT, 1); + auto arg2 = ops::_Arg(s.WithOpName("arg2"), DT_INT32, 2); + auto arg3 = ops::_Arg(s.WithOpName("arg3"), DT_RESOURCE, 3); + auto arg4 = ops::_Arg(s.WithOpName("arg4"), DT_RESOURCE, 4); + auto add = ops::Add(s.WithOpName("add"), arg2, arg2); + auto ret0 = ops::_Retval(s.WithOpName("ret0"), arg0, 0); + auto ret1 = ops::_Retval(s.WithOpName("ret1"), add, 1); + auto read = ops::ReadVariableOp(s.WithOpName("read"), arg4, DT_FLOAT); + auto ret2 = ops::_Retval(s.WithOpName("ret2"), read, 2); + auto ret3 = ops::_Retval(s.WithOpName("ret3"), arg3, 3); + Graph g(OpRegistry::Global()); + TF_ASSERT_OK(s.ToGraph(&g)); + FunctionDef fdef; + TF_ASSERT_OK(GraphToFunctionDef(g, "cluster", &fdef)); + TF_ASSERT_OK(fld.AddFunctionDef(fdef)); + } + + Graph host_graph(OpRegistry::Global()); + NameAttrList function; + function.set_name("cluster"); + { + // Build host graph. + // input00 = Placeholder[T = DT_STRING] + // input01 = Placeholder[T = DT_FLOAT] + // input02 = Placeholder[T = DT_INT32] + // input10 = Placeholder[T = DT_STRING] + // input11 = Placeholder[T = DT_FLOAT] + // input12 = Placeholder[T = DT_INT32] + // input3 = Placeholder[T = DT_RESOURCE], distributed variable + // input4 = Placeholder[T = DT_RESOURCE], distributed variable + // tpu_replicate = _TPUReplicate(input00, input01, input02, input10, + // input11, input12, input3, input4) + // output = IdentityN(tpu_replicate, tpu_replicate:1, tpu_replicate:2, + // tpu_replicate:3, tpu_replicate:4, tpu_replicate:5, + // tpu_replicate:6, tpu_replicate:7) + Scope s = Scope::NewRootScope().ExitOnError(); + auto input00 = ops::Placeholder(s.WithOpName("input00"), DT_STRING); + auto input01 = ops::Placeholder(s.WithOpName("input01"), DT_FLOAT); + auto input02 = ops::Placeholder(s.WithOpName("input02"), DT_INT32); + auto input10 = ops::Placeholder(s.WithOpName("input10"), DT_STRING); + auto input11 = ops::Placeholder(s.WithOpName("input11"), DT_FLOAT); + auto input12 = ops::Placeholder(s.WithOpName("input12"), DT_INT32); + auto input3 = ops::Placeholder(s.WithOpName("input3"), DT_RESOURCE); + auto input4 = ops::Placeholder(s.WithOpName("input3"), DT_RESOURCE); + auto replicate = ops::_TPUReplicate( + s.WithOpName("tpu_replicate"), + std::initializer_list{input00, input01, input02, input10, + input11, input12, input3, input4}, + std::initializer_list{}, std::initializer_list{}, + std::initializer_list{}, function, + /*num_replicas=*/2, + {DT_STRING, DT_INT32, DT_FLOAT, DT_RESOURCE, DT_STRING, DT_INT32, + DT_FLOAT, DT_RESOURCE}, + ops::_TPUReplicate::NumCoresPerReplica(1).NumDistributedVariables(2)); + auto output = ops::IdentityN( + s.WithOpName("output"), + std::initializer_list{ + replicate.outputs[0], replicate.outputs[1], replicate.outputs[2], + replicate.outputs[3], replicate.outputs[4], replicate.outputs[5], + replicate.outputs[6], replicate.outputs[7]}); + TF_ASSERT_OK(s.ToGraph(&host_graph)); + } + auto node_name_index = host_graph.BuildNodeNameIndex(); + Node* replicate_node = node_name_index["tpu_replicate"]; + + std::unordered_map clusters; + clusters.emplace("cluster", + XlaClusterInfo{"cluster", function, replicate_node, + std::map{}}); + int lifted_arg_count = 0; + TF_ASSERT_OK(ExtractOutsideCompilationPass::ProcessHeadTailOutsideCompilation( + "oc", &lifted_arg_count, &clusters, &host_graph, flr, &fld)); + node_name_index = host_graph.BuildNodeNameIndex(); + replicate_node = node_name_index["tpu_replicate"]; + Node* output = node_name_index["output"]; + + EXPECT_EQ(replicate_node->num_inputs(), 3); + const DataTypeVector expected_input_types = {DT_INT32, DT_INT32, DT_RESOURCE}; + EXPECT_EQ(replicate_node->input_types(), expected_input_types); + EXPECT_EQ(replicate_node->num_outputs(), 4); + const DataTypeVector expected_output_types = {DT_INT32, DT_FLOAT, DT_INT32, + DT_FLOAT}; + EXPECT_EQ(replicate_node->output_types(), expected_output_types); + + { + // Check host graph. + Node* input_node; + // Check that we have input00 -> output:1. + TF_ASSERT_OK(output->input_node(0, &input_node)); + EXPECT_EQ(input_node->name(), "input00"); + // Check that we have input10 -> output:4. + TF_ASSERT_OK(output->input_node(4, &input_node)); + EXPECT_EQ(input_node->name(), "input10"); + // Check that we have input3 -> output:3, output:7. + TF_ASSERT_OK(output->input_node(3, &input_node)); + EXPECT_EQ(input_node->name(), "input3"); + TF_ASSERT_OK(output->input_node(7, &input_node)); + EXPECT_EQ(input_node->name(), "input3"); + } + + { + // Check TPU graph. + const FunctionDef* fdef = fld.Find("cluster"); + EXPECT_NE(fdef, nullptr); + // Check its signature, should have 1 DT_INT32 input, 1 DT_RESOURCE input, + // 1 DT_INT32 output and 1 DT_FLOAT output + EXPECT_EQ(fdef->signature().input_arg_size(), 2); + EXPECT_EQ(fdef->signature().input_arg(0).type(), DT_INT32); + EXPECT_EQ(fdef->signature().input_arg(1).type(), DT_RESOURCE); + EXPECT_EQ(fdef->signature().output_arg_size(), 2); + EXPECT_EQ(fdef->signature().output_arg(0).type(), DT_INT32); + EXPECT_EQ(fdef->signature().output_arg(1).type(), DT_FLOAT); + } +} + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc b/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc index 92173c1e79c..1de4f50da10 100644 --- a/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc +++ b/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/optimization_registry.h" #include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_pass.h" +#include "tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.h" #include "tensorflow/core/tpu/graph_rewrite/variable_merger_pass.h" namespace tensorflow { @@ -25,8 +26,10 @@ REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 20, DistributedTPUConfigurationRewritePass); REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 20, DistributedTPUShutdownRewritePass); -REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, 0, - VariableMergerPass); +REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 34, + EncapsulateTPUComputationsPass); +REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 39, + ExtractOutsideCompilationPass); } // namespace } // namespace tensorflow diff --git a/tensorflow/core/tpu/tpu_compile_interface.cc b/tensorflow/core/tpu/tpu_compile_interface.cc new file mode 100644 index 00000000000..1f585565f3a --- /dev/null +++ b/tensorflow/core/tpu/tpu_compile_interface.cc @@ -0,0 +1,38 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/tpu_compile_interface.h" + +#include "tensorflow/core/platform/fingerprint.h" +#include "tensorflow/core/platform/logging.h" + +class TpuCompileInterfaceExternal : public TpuCompileInterface { + public: + uint64_t FingerprintString(absl::string_view str) override { + return ::tensorflow::Fingerprint64(str); + } +}; + +static TpuCompileInterface* impl_ = new TpuCompileInterfaceExternal; +TpuCompileInterface* TpuCompileInterface::Get() { return impl_; } + +bool TpuCompileInterface::RegisterImplementation(TpuCompileInterface* impl) { + VLOG(1) << "Updating TpuCompileInterface."; + if (impl_ != nullptr) { + delete impl_; + } + impl_ = impl; + return true; +} diff --git a/tensorflow/core/tpu/tpu_compile_interface.h b/tensorflow/core/tpu/tpu_compile_interface.h new file mode 100644 index 00000000000..7e7b1f8315a --- /dev/null +++ b/tensorflow/core/tpu/tpu_compile_interface.h @@ -0,0 +1,33 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_TPU_TPU_COMPILE_INTERFACE_H_ +#define TENSORFLOW_CORE_TPU_TPU_COMPILE_INTERFACE_H_ + +#include "absl/strings/string_view.h" + +// Some legacy code requires different implementations for operations like +// fingerprint/hashing during compilation and/or graph rewriting. These +// alternate implementations can be registered (via a module initializer) to +// change the default behavior. +class TpuCompileInterface { + public: + virtual ~TpuCompileInterface() {} + static TpuCompileInterface* Get(); + static bool RegisterImplementation(TpuCompileInterface* impl); + + virtual uint64_t FingerprintString(absl::string_view str) = 0; +}; + +#endif // TENSORFLOW_CORE_TPU_TPU_COMPILE_INTERFACE_H_ diff --git a/tensorflow/core/tpu/tpu_defs.cc b/tensorflow/core/tpu/tpu_defs.cc index ad7f02a3d95..69669bfdb7b 100644 --- a/tensorflow/core/tpu/tpu_defs.cc +++ b/tensorflow/core/tpu/tpu_defs.cc @@ -24,4 +24,7 @@ const char* const DEVICE_TPU_XLA_JIT = "XLA_TPU_JIT"; const char* const TPUREPLICATE_MIRRORED_VAR_INDICES_ATTR = "_mirrored_variable_indices"; +const char* const kTPUReplicateAttr = "_tpu_replicate"; +const char* const kOutsideCompilationAttr = "_xla_outside_compilation"; + } // namespace tensorflow diff --git a/tensorflow/core/tpu/tpu_defs.h b/tensorflow/core/tpu/tpu_defs.h index 294b4253ee0..db392ddd6eb 100644 --- a/tensorflow/core/tpu/tpu_defs.h +++ b/tensorflow/core/tpu/tpu_defs.h @@ -47,6 +47,9 @@ extern const char* const TPUREPLICATE_MIRRORED_VAR_INDICES_ATTR; // variable. extern const char* const TPU_FAST_MEM_ATTR; // "_TPU_FAST_MEM" +extern const char* const kTPUReplicateAttr; +extern const char* const kOutsideCompilationAttr; + // Supported types for TPUs. static constexpr std::array kTpuAllTypes = { {DT_INT32, DT_UINT32, DT_BFLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL, From e1a29e1726ef9020f9ae3d976881a78c6daa6c93 Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Tue, 14 Jul 2020 14:52:10 -0700 Subject: [PATCH 0423/2522] Increase timeout of //third_party/tensorflow/python/data/cache_test PiperOrigin-RevId: 321240514 Change-Id: I2b747acd7d2ebaf8b7b4a69f78b4c8173f127f36 --- tensorflow/python/data/kernel_tests/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD index ecfb6668909..109a978514e 100644 --- a/tensorflow/python/data/kernel_tests/BUILD +++ b/tensorflow/python/data/kernel_tests/BUILD @@ -29,7 +29,7 @@ tf_py_test( tf_py_test( name = "cache_test", - size = "small", + size = "medium", srcs = ["cache_test.py"], deps = [ ":test_base", From 8d5182ec79bdcadbdee7ada96ee6acca39c767af Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Tue, 14 Jul 2020 14:58:14 -0700 Subject: [PATCH 0424/2522] Use `std::string` instead of `string` PiperOrigin-RevId: 321241741 Change-Id: I25689b66e71fee439740f3f5951dc4cbe6b5cc6b --- tensorflow/lite/delegates/flex/allowlisted_flex_ops_test.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/delegates/flex/allowlisted_flex_ops_test.cc b/tensorflow/lite/delegates/flex/allowlisted_flex_ops_test.cc index 424013d33e0..67cb07769a8 100644 --- a/tensorflow/lite/delegates/flex/allowlisted_flex_ops_test.cc +++ b/tensorflow/lite/delegates/flex/allowlisted_flex_ops_test.cc @@ -24,14 +24,14 @@ namespace tflite { namespace flex { // Get all cpu kernels registered in Tensorflow. -std::set GetAllCpuKernels() { +std::set GetAllCpuKernels() { auto is_cpu_kernel = [](const tensorflow::KernelDef& def) { return (def.device_type() == "CPU" || def.device_type() == "DEFAULT"); }; tensorflow::KernelList kernel_list = tensorflow::GetFilteredRegisteredKernels(is_cpu_kernel); - std::set result; + std::set result; for (int i = 0; i < kernel_list.kernel_size(); ++i) { tensorflow::KernelDef kernel_def = kernel_list.kernel(i); @@ -44,7 +44,7 @@ std::set GetAllCpuKernels() { // This test must be run on both Linux and Android. TEST(AllowlistedFlexOpsTest, EveryOpHasKernel) { const std::set& allowlist = GetFlexAllowlist(); - std::set all_kernels = GetAllCpuKernels(); + std::set all_kernels = GetAllCpuKernels(); for (const std::string& op_name : allowlist) { EXPECT_EQ(all_kernels.count(op_name), 1) From b8e918230000896dae5b2e61c720c39a21aefc5a Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Tue, 14 Jul 2020 15:03:53 -0700 Subject: [PATCH 0425/2522] Split saved model benchmark tests into individual test to avoid timeout. PiperOrigin-RevId: 321242864 Change-Id: I5ac6e9efec62e667f0b67f0e7f7e0c4029263de6 --- tensorflow/python/keras/benchmarks/BUILD | 30 ++-- .../applications_saved_model_test.py | 93 ------------ .../benchmarks/saved_model_benchmarks/BUILD | 136 ++++++++++++++++++ .../densenet_benchmark_test.py | 43 ++++++ .../efficientnet_benchmark_test.py | 43 ++++++ .../inception_resnet_v2_benchmark_test.py | 44 ++++++ .../mobilenet_benchmark_test.py | 43 ++++++ .../nasnet_large_benchmark_test.py | 43 ++++++ .../resnet152_v2_benchmark_test.py | 44 ++++++ .../saved_model_benchmark_util.py | 70 +++++++++ .../vgg_benchmark_test.py | 44 ++++++ .../xception_benchmark_test.py | 44 ++++++ tensorflow/tools/pip_package/BUILD | 2 +- 13 files changed, 569 insertions(+), 110 deletions(-) delete mode 100644 tensorflow/python/keras/benchmarks/applications_saved_model_test.py create mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/BUILD create mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/densenet_benchmark_test.py create mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/efficientnet_benchmark_test.py create mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/inception_resnet_v2_benchmark_test.py create mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/mobilenet_benchmark_test.py create mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/nasnet_large_benchmark_test.py create mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/resnet152_v2_benchmark_test.py create mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/saved_model_benchmark_util.py create mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/vgg_benchmark_test.py create mode 100644 tensorflow/python/keras/benchmarks/saved_model_benchmarks/xception_benchmark_test.py diff --git a/tensorflow/python/keras/benchmarks/BUILD b/tensorflow/python/keras/benchmarks/BUILD index 2386b01c426..95fca2e8bbe 100644 --- a/tensorflow/python/keras/benchmarks/BUILD +++ b/tensorflow/python/keras/benchmarks/BUILD @@ -23,11 +23,24 @@ exports_files(["LICENSE"]) # to the regular expression is executed. # e.g. --benchmarks=".*lstm*." will run all lstm layer related benchmarks. +# Add all benchmarks related utils here for pip testing dependencis. +py_library( + name = "keras_benchmark_lib_pip", + deps = [ + ":benchmark_util", + "//tensorflow/python/keras/benchmarks/saved_model_benchmarks:saved_model_benchmark_util", + ], +) + py_test( name = "keras_cpu_benchmark_test", size = "large", srcs = ["keras_cpu_benchmark_test.py"], python_version = "PY3", + tags = [ + "no_pip", # b/161253163 + "no_windows", # b/160628318 + ], deps = [ ":benchmark_util", "//tensorflow:tensorflow_py", @@ -49,22 +62,6 @@ cuda_py_test( ], ) -cuda_py_test( - name = "applications_saved_model_test", - size = "medium", - srcs = ["applications_saved_model_test.py"], - shard_count = 8, - tags = [ - "no_oss_py38", # b/160170347 - "no_windows", # b/160269052 - ], - deps = [ - "//tensorflow/python:client_testlib", - "//tensorflow/python/keras/applications", - "@absl_py//absl/testing:parameterized", - ], -) - cuda_py_test( name = "model_components_benchmarks_test", srcs = ["model_components_benchmarks_test.py"], @@ -80,6 +77,7 @@ py_test( srcs = ["keras_examples_benchmark_test.py"], python_version = "PY3", tags = [ + "no_pip", # b/161253163 "no_windows", # b/160628318 ], deps = [ diff --git a/tensorflow/python/keras/benchmarks/applications_saved_model_test.py b/tensorflow/python/keras/benchmarks/applications_saved_model_test.py deleted file mode 100644 index 0111c8f13b9..00000000000 --- a/tensorflow/python/keras/benchmarks/applications_saved_model_test.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Benchmarks for Keras applications.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tempfile -import time - -import six - -from tensorflow.python.keras.applications import densenet -from tensorflow.python.keras.applications import efficientnet -from tensorflow.python.keras.applications import inception_resnet_v2 -from tensorflow.python.keras.applications import mobilenet_v2 -from tensorflow.python.keras.applications import nasnet -from tensorflow.python.keras.applications import resnet_v2 -from tensorflow.python.keras.applications import vgg19 -from tensorflow.python.keras.applications import xception -from tensorflow.python.keras.saving.saved_model import load as keras_load -from tensorflow.python.platform import benchmark -from tensorflow.python.platform import gfile -from tensorflow.python.platform import googletest -from tensorflow.python.platform import test - - -class BenchmarkSaveApplications( - six.with_metaclass(benchmark.ParameterizedBenchmark, test.Benchmark)): - - _benchmark_parameters = [ - ('ResNet152V2', resnet_v2.ResNet152V2, 2048), - ('VGG19', vgg19.VGG19, 512), - ('Xception', xception.Xception, 2048), - ('InceptionResNetV2', inception_resnet_v2.InceptionResNetV2, 1536), - ('MobileNetV2', mobilenet_v2.MobileNetV2, 1280), - ('DenseNet201', densenet.DenseNet201, 1920), - ('EfficientNetB7', efficientnet.EfficientNetB7, 2560), - ('NASNetLarge', nasnet.NASNetLarge, 4032), - ] - - def benchmark_save_and_load_applications(self, app, _): - trials = 3 - - model = app(weights=None) - model_name = app.__name__ - - tmp_dir = googletest.GetTempDir() - gfile.MakeDirs(tmp_dir) - save_dir = tempfile.mkdtemp(dir=tmp_dir) - - total_save_time = 0 - total_load_time = 0 - - # Run one untimed iteration of saving/loading. - model.save(save_dir, save_format='tf') - keras_load.load(save_dir) - - for _ in range(trials): - start_time = time.time() - model.save(save_dir, save_format='tf') - total_save_time += time.time() - start_time - - start_time = time.time() - keras_load.load(save_dir) - total_load_time += time.time() - start_time - self.report_benchmark( - iters=trials, - wall_time=total_save_time / trials, - name='{}.save'.format(model_name)) - - self.report_benchmark( - iters=1, - wall_time=total_load_time / trials, - name='{}.load'.format(model_name)) - gfile.DeleteRecursively(save_dir) - - -if __name__ == '__main__': - test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/BUILD b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/BUILD new file mode 100644 index 00000000000..25a81cc41cc --- /dev/null +++ b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/BUILD @@ -0,0 +1,136 @@ +# Description: +# Implementation of Keras benchmarks. + +load("//tensorflow:tensorflow.bzl", "cuda_py_test") + +package( + default_visibility = ["//visibility:public"], + licenses = ["notice"], # Apache 2.0 +) + +exports_files(["LICENSE"]) + +# To run CPU benchmarks: +# bazel run -c opt benchmarks_test -- --benchmarks=. + +# To run GPU benchmarks: +# bazel run --config=cuda -c opt --copt="-mavx" benchmarks_test -- \ +# --benchmarks=. + +# To run a subset of benchmarks using --benchmarks flag. +# --benchmarks: the list of benchmarks to run. The specified value is interpreted +# as a regular expression and any benchmark whose name contains a partial match +# to the regular expression is executed. +# e.g. --benchmarks=".*lstm*." will run all lstm layer related benchmarks. + +py_library( + name = "saved_model_benchmark_util", + srcs = ["saved_model_benchmark_util.py"], + deps = [ + "//tensorflow:tensorflow_py", + ], +) + +cuda_py_test( + name = "densenet_benchmark_test", + srcs = ["densenet_benchmark_test.py"], + tags = [ + "no_pip", # b/161253163 + "no_windows", # b/160628318 + ], + deps = [ + ":saved_model_benchmark_util", + "//tensorflow:tensorflow_py", + ], +) + +cuda_py_test( + name = "efficientnet_benchmark_test", + srcs = ["efficientnet_benchmark_test.py"], + tags = [ + "no_pip", # b/161253163 + "no_windows", # b/160628318 + ], + deps = [ + ":saved_model_benchmark_util", + "//tensorflow:tensorflow_py", + ], +) + +cuda_py_test( + name = "inception_resnet_v2_benchmark_test", + srcs = ["inception_resnet_v2_benchmark_test.py"], + tags = [ + "no_pip", # b/161253163 + "no_windows", # b/160628318 + ], + deps = [ + ":saved_model_benchmark_util", + "//tensorflow:tensorflow_py", + ], +) + +cuda_py_test( + name = "mobilenet_benchmark_test", + srcs = ["mobilenet_benchmark_test.py"], + tags = [ + "no_pip", # b/161253163 + "no_windows", # b/160628318 + ], + deps = [ + ":saved_model_benchmark_util", + "//tensorflow:tensorflow_py", + ], +) + +cuda_py_test( + name = "nasnet_large_benchmark_test", + srcs = ["nasnet_large_benchmark_test.py"], + tags = [ + "no_pip", # b/161253163 + "no_windows", # b/160628318 + ], + deps = [ + ":saved_model_benchmark_util", + "//tensorflow:tensorflow_py", + ], +) + +cuda_py_test( + name = "resnet152_v2_benchmark_test", + srcs = ["resnet152_v2_benchmark_test.py"], + tags = [ + "no_pip", # b/161253163 + "no_windows", # b/160628318 + ], + deps = [ + ":saved_model_benchmark_util", + "//tensorflow:tensorflow_py", + ], +) + +cuda_py_test( + name = "vgg_benchmark_test", + srcs = ["vgg_benchmark_test.py"], + tags = [ + "no_pip", # b/161253163 + "no_windows", # b/160628318 + ], + deps = [ + ":saved_model_benchmark_util", + "//tensorflow:tensorflow_py", + ], +) + +cuda_py_test( + name = "xception_benchmark_test", + srcs = ["xception_benchmark_test.py"], + tags = [ + "no_pip", # b/161253163 + "no_windows", # b/160628318 + ], + deps = [ + ":saved_model_benchmark_util", + "//tensorflow:tensorflow_py", + ], +) diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/densenet_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/densenet_benchmark_test.py new file mode 100644 index 00000000000..3b8e9d632f5 --- /dev/null +++ b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/densenet_benchmark_test.py @@ -0,0 +1,43 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks for saved model on DenseNet201.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util + + +class BenchmarkSaveApplications(tf.test.Benchmark): + + def benchmark_save_and_load_densenet_201(self): + app = tf.keras.applications.DenseNet201 + save_result, load_result = ( + saved_model_benchmark_util.save_and_load_benchmark(app)) + + self.report_benchmark( + iters=save_result['iters'], + wall_time=save_result['wall_time'], + name=save_result['name']) + + self.report_benchmark( + iters=load_result['iters'], + wall_time=load_result['wall_time'], + name=load_result['name']) + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/efficientnet_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/efficientnet_benchmark_test.py new file mode 100644 index 00000000000..27316e2997a --- /dev/null +++ b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/efficientnet_benchmark_test.py @@ -0,0 +1,43 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks for saved model on EfficientNetB7.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util + + +class BenchmarkSaveApplications(tf.test.Benchmark): + + def benchmark_save_and_load_efficient_net_b7(self): + app = tf.keras.applications.EfficientNetB7 + save_result, load_result = ( + saved_model_benchmark_util.save_and_load_benchmark(app)) + + self.report_benchmark( + iters=save_result['iters'], + wall_time=save_result['wall_time'], + name=save_result['name']) + + self.report_benchmark( + iters=load_result['iters'], + wall_time=load_result['wall_time'], + name=load_result['name']) + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/inception_resnet_v2_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/inception_resnet_v2_benchmark_test.py new file mode 100644 index 00000000000..d2d5090e878 --- /dev/null +++ b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/inception_resnet_v2_benchmark_test.py @@ -0,0 +1,44 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks for saved model on InceptionResNetV2.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util + + +class BenchmarkSaveApplications(tf.test.Benchmark): + + def benchmark_save_and_load_inception_resnet_v2(self): + app = tf.keras.applications.InceptionResNetV2 + save_result, load_result = ( + saved_model_benchmark_util.save_and_load_benchmark(app)) + + self.report_benchmark( + iters=save_result['iters'], + wall_time=save_result['wall_time'], + name=save_result['name']) + + self.report_benchmark( + iters=load_result['iters'], + wall_time=load_result['wall_time'], + name=load_result['name']) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/mobilenet_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/mobilenet_benchmark_test.py new file mode 100644 index 00000000000..0d6b61f141e --- /dev/null +++ b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/mobilenet_benchmark_test.py @@ -0,0 +1,43 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks for saved model on MobileNetV2.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util + + +class BenchmarkSaveApplications(tf.test.Benchmark): + + def benchmark_save_and_load_mobilenet_v2(self): + app = tf.keras.applications.MobileNetV2 + save_result, load_result = ( + saved_model_benchmark_util.save_and_load_benchmark(app)) + + self.report_benchmark( + iters=save_result['iters'], + wall_time=save_result['wall_time'], + name=save_result['name']) + + self.report_benchmark( + iters=load_result['iters'], + wall_time=load_result['wall_time'], + name=load_result['name']) + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/nasnet_large_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/nasnet_large_benchmark_test.py new file mode 100644 index 00000000000..864ce1930ee --- /dev/null +++ b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/nasnet_large_benchmark_test.py @@ -0,0 +1,43 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks for saved model on NASNetLarge.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util + + +class BenchmarkSaveApplications(tf.test.Benchmark): + + def benchmark_save_and_load_nasnet_large(self): + app = tf.keras.applications.NASNetLarge + save_result, load_result = ( + saved_model_benchmark_util.save_and_load_benchmark(app)) + + self.report_benchmark( + iters=save_result['iters'], + wall_time=save_result['wall_time'], + name=save_result['name']) + + self.report_benchmark( + iters=load_result['iters'], + wall_time=load_result['wall_time'], + name=load_result['name']) + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/resnet152_v2_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/resnet152_v2_benchmark_test.py new file mode 100644 index 00000000000..a0603eb5136 --- /dev/null +++ b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/resnet152_v2_benchmark_test.py @@ -0,0 +1,44 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks for saved model on ResNet152V2.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util + + +class BenchmarkSaveApplications(tf.test.Benchmark): + + def benchmark_save_and_load_resnet152_v2(self): + app = tf.keras.applications.ResNet152V2 + save_result, load_result = ( + saved_model_benchmark_util.save_and_load_benchmark(app)) + + self.report_benchmark( + iters=save_result['iters'], + wall_time=save_result['wall_time'], + name=save_result['name']) + + self.report_benchmark( + iters=load_result['iters'], + wall_time=load_result['wall_time'], + name=load_result['name']) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/saved_model_benchmark_util.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/saved_model_benchmark_util.py new file mode 100644 index 00000000000..a0760fa075c --- /dev/null +++ b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/saved_model_benchmark_util.py @@ -0,0 +1,70 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Utils for saved model benchmarks.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tempfile +import time + +import tensorflow as tf + +from tensorflow.python.platform import gfile +from tensorflow.python.platform import googletest + + +def save_and_load_benchmark(app): + """Util for saved model benchmarks.""" + trials = 3 + + model = app(weights=None) + model_name = app.__name__ + + tmp_dir = googletest.GetTempDir() + gfile.MakeDirs(tmp_dir) + save_dir = tempfile.mkdtemp(dir=tmp_dir) + + total_save_time = 0 + total_load_time = 0 + + # Run one untimed iteration of saving/loading. + model.save(save_dir, save_format='tf') + tf.keras.models.load_model(save_dir) + + for _ in range(trials): + start_time = time.time() + model.save(save_dir, save_format='tf') + total_save_time += time.time() - start_time + + start_time = time.time() + tf.keras.models.load_model(save_dir) + total_load_time += time.time() - start_time + + save_result = { + 'iters': trials, + 'wall_time': total_save_time / trials, + 'name': '{}.save'.format(model_name) + } + + load_result = { + 'iters': trials, + 'wall_time': total_load_time / trials, + 'name': '{}.load'.format(model_name) + } + gfile.DeleteRecursively(save_dir) + return save_result, load_result + diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/vgg_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/vgg_benchmark_test.py new file mode 100644 index 00000000000..3ceebe4fcc4 --- /dev/null +++ b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/vgg_benchmark_test.py @@ -0,0 +1,44 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks for saved model on VGG19.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util + + +class BenchmarkSaveApplications(tf.test.Benchmark): + + def benchmark_save_and_load_vgg19(self): + app = tf.keras.applications.VGG19 + save_result, load_result = ( + saved_model_benchmark_util.save_and_load_benchmark(app)) + + self.report_benchmark( + iters=save_result['iters'], + wall_time=save_result['wall_time'], + name=save_result['name']) + + self.report_benchmark( + iters=load_result['iters'], + wall_time=load_result['wall_time'], + name=load_result['name']) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/python/keras/benchmarks/saved_model_benchmarks/xception_benchmark_test.py b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/xception_benchmark_test.py new file mode 100644 index 00000000000..ddab2f68ffd --- /dev/null +++ b/tensorflow/python/keras/benchmarks/saved_model_benchmarks/xception_benchmark_test.py @@ -0,0 +1,44 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmarks for saved model on Xception.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow.python.keras.benchmarks.saved_model_benchmarks import saved_model_benchmark_util + + +class BenchmarkSaveApplications(tf.test.Benchmark): + + def benchmark_save_and_load_xception(self): + app = tf.keras.applications.Xception + save_result, load_result = ( + saved_model_benchmark_util.save_and_load_benchmark(app)) + + self.report_benchmark( + iters=save_result['iters'], + wall_time=save_result['wall_time'], + name=save_result['name']) + + self.report_benchmark( + iters=load_result['iters'], + wall_time=load_result['wall_time'], + name=load_result['name']) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 4e608360f8b..38ff12b100e 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -132,7 +132,7 @@ COMMON_PIP_DEPS = [ "//tensorflow/python/keras/mixed_precision/experimental:test_util", "//tensorflow/python/keras/tests:model_subclassing_test_util", "//tensorflow/python/keras/tests:model_architectures", - "//tensorflow/python/keras/benchmarks:benchmark_util", + "//tensorflow/python/keras/benchmarks:keras_benchmark_lib_pip", "//tensorflow/python/kernel_tests:cudnn_deterministic_base", "//tensorflow/python/kernel_tests:bias_op_base", "//tensorflow/python/kernel_tests/random:util", From 0d24c141da3dc00962d56a9fb8472089127f30f3 Mon Sep 17 00:00:00 2001 From: Marat Dukhan Date: Tue, 14 Jul 2020 15:09:42 -0700 Subject: [PATCH 0426/2522] Fix MSVC incompatibilities in XNNPACK delegate tests PiperOrigin-RevId: 321243966 Change-Id: Ifbf69e5fcc5b33960b575499246eb4b0cc031c26 --- tensorflow/lite/delegates/xnnpack/fully_connected_tester.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/delegates/xnnpack/fully_connected_tester.cc b/tensorflow/lite/delegates/xnnpack/fully_connected_tester.cc index 8962b8ba7ba..9696b07b7a3 100644 --- a/tensorflow/lite/delegates/xnnpack/fully_connected_tester.cc +++ b/tensorflow/lite/delegates/xnnpack/fully_connected_tester.cc @@ -201,9 +201,9 @@ std::vector FullyConnectedTester::CreateTfLiteModel() const { sizeof(float) * bias_data.size()))); } - const std::array filter_shape( - {OutputChannels(), InputChannels()}); - const std::array bias_shape({OutputChannels()}); + const std::array filter_shape{ + {OutputChannels(), InputChannels()}}; + const std::array bias_shape{{OutputChannels()}}; const std::vector output_shape = OutputShape(); std::vector> tensors; From 931fdf5ab0c4159d046875b3c42813c473e698b8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Jul 2020 15:29:08 -0700 Subject: [PATCH 0427/2522] Internal change PiperOrigin-RevId: 321247555 Change-Id: I3129daec8e589c62b67c5d1fe5ad12b2d6bd2006 --- tensorflow/compiler/jit/kernels/xla_ops.cc | 109 +++----- .../compiler/jit/xla_compile_on_demand_op.cc | 36 +-- tensorflow/compiler/jit/xla_device_ops.cc | 12 +- tensorflow/compiler/jit/xla_launch_util.cc | 236 ++++++++---------- tensorflow/compiler/jit/xla_launch_util.h | 28 +-- .../python/eager/def_function_xla_jit_test.py | 64 ----- 6 files changed, 167 insertions(+), 318 deletions(-) diff --git a/tensorflow/compiler/jit/kernels/xla_ops.cc b/tensorflow/compiler/jit/kernels/xla_ops.cc index 38e33a60657..48347a2915f 100644 --- a/tensorflow/compiler/jit/kernels/xla_ops.cc +++ b/tensorflow/compiler/jit/kernels/xla_ops.cc @@ -277,8 +277,7 @@ static Status CompileToLocalExecutable( OpKernelContext* ctx, const NameAttrList& function, bool has_ref_vars, const XlaPlatformInfo& platform_info, absl::Span variable_infos, - absl::Span constants, bool lazy, bool may_alias_resource_update, - xla::LocalClient** client, + absl::Span constants, bool lazy, xla::LocalClient** client, const XlaCompiler::CompilationResult** compilation_result, xla::LocalExecutable** executable) { // We store information about the JIT-compiled XLA computation @@ -333,9 +332,6 @@ static Status CompileToLocalExecutable( // Optimization: where possible, have the computation return a naked array // rather than a one-element tuple. compile_options.always_return_tuple = false; - compile_options.alias_resource_update = !has_ref_vars && - !platform_info.is_on_xla_device() && - may_alias_resource_update; std::vector args; TF_RETURN_IF_ERROR(XlaComputationLaunchContext::BuildXlaCompilerArguments( @@ -354,22 +350,20 @@ void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) { const XlaCompiler::CompilationResult* compilation_result; xla::LocalExecutable* executable; - std::vector variable_infos; + ResourceVarsSnapshot variables_snapshot; { + std::vector variable_infos; OP_REQUIRES_OK( ctx, GetVariableInfosFromCtxInputs(ctx, resources_, &variable_infos)); OP_REQUIRES_OK(ctx, LockVariables(absl::MakeSpan(variable_infos))); Status s = CompileToLocalExecutable( ctx, function_, /*has_ref_vars=*/has_ref_vars_, platform_info_, - variable_infos, constants_, /*lazy=*/false, - /*may_alias_resource_update=*/true, &client, &compilation_result, - &executable); + variable_infos, constants_, /*lazy=*/false, &client, + &compilation_result, &executable); OP_REQUIRES_OK(ctx, s); - } - - std::map resource_var_ptrs; - for (int i = 0; i < resources_.size(); i++) { - resource_var_ptrs[resources_[i]] = variable_infos[i].var()->tensor(); + OP_REQUIRES_OK(ctx, + SnapshotResourceVariables(ctx, resources_, variable_infos, + &variables_snapshot)); } se::Stream* stream = @@ -380,19 +374,12 @@ void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) { absl::optional tf_allocator_adapter; se::DeviceMemoryAllocator* allocator = GetAllocator(&tf_allocator_adapter, ctx, platform_info_); - int device_ordinal = stream ? stream->parent()->device_ordinal() - : client->default_device_ordinal(); XlaComputationLaunchContext launch_context( - client, allocator, device_ordinal, + client, allocator, /*allocate_xla_tensors=*/platform_info_.is_on_xla_device(), platform_info_.UseMultipleStreams()); - const xla::HloInputOutputAliasConfig& input_output_alias = - executable->executable()->module().input_output_alias_config(); - xla::StatusOr> execution_inputs = - launch_context.PopulateInputs(ctx, compilation_result, resource_var_ptrs, - /*missing_ctx_input_prefix=*/0, - input_output_alias); - OP_REQUIRES_OK(ctx, execution_inputs.status()); + launch_context.PopulateInputs(ctx, compilation_result, variables_snapshot, + /*missing_ctx_input_prefix=*/0); // Execute the computation. VLOG(2) << "Executing computation."; @@ -416,24 +403,24 @@ void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) { Env* env = Env::Default(); auto start_time = env->NowMicros(); - xla::StatusOr execution_output; + xla::StatusOr run_result; if (!stream || platform_info_.platform_id() == se::host::kHostPlatformId) { - execution_output = - executable->Run(std::move(*execution_inputs), run_options); + run_result = executable->Run(launch_context.arguments(), run_options); } else { - execution_output = - executable->RunAsync(std::move(*execution_inputs), run_options); + run_result = executable->RunAsync(launch_context.arguments(), run_options); } - OP_REQUIRES(ctx, execution_output.ok(), execution_output.status()); + OP_REQUIRES(ctx, run_result.ok(), run_result.status()); auto elapsed = env->NowMicros() - start_time; VLOG(2) << "Elapsed time: " << elapsed << "us"; - OP_REQUIRES_OK( - ctx, launch_context.PopulateOutputs( - ctx, compilation_result, execution_output->ConsumeResult(), - /*missing_ctx_input_prefix=*/0, absl::MakeSpan(variable_infos), - input_output_alias, resource_var_ptrs)); + const xla::HloInputOutputAliasConfig& input_output_alias = + executable->executable()->module().input_output_alias_config(); + OP_REQUIRES_OK(ctx, + launch_context.PopulateOutputs( + ctx, compilation_result, run_result.ConsumeValueOrDie(), + /*missing_ctx_input_prefix=*/0, input_output_alias, + variables_snapshot)); VLOG(1) << "Done"; } @@ -529,14 +516,10 @@ void XlaCompileOp::Compute(OpKernelContext* ctx) { OP_REQUIRES_OK( ctx, GetVariableInfosFromCtxInputs(ctx, resources_, &variable_infos)); OP_REQUIRES_OK(ctx, LockVariables(absl::MakeSpan(variable_infos))); - - // Do not alias resource updates as locking variables in XlaCompile and - // unlocking them in XlaRun may lead to deadlocks. Status status = CompileToLocalExecutable( ctx, function_, has_ref_vars_, platform_info_, variable_infos, constants_, - /*lazy=*/!must_compile_, - /*may_alias_resource_update=*/false, &client, &kernel, &executable); + /*lazy=*/!must_compile_, &client, &kernel, &executable); OP_REQUIRES_OK(ctx, SnapshotResourceVariables(ctx, resources_, variable_infos, &variables)); if (must_compile_ || status.code() != error::UNIMPLEMENTED) { @@ -604,22 +587,14 @@ void XlaRunOp::Compute(OpKernelContext* ctx) { absl::optional tf_allocator_adapter; se::DeviceMemoryAllocator* allocator = GetAllocator(&tf_allocator_adapter, ctx, platform_info_); - se::Stream* stream = - ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; - int device_ordinal = stream ? stream->parent()->device_ordinal() - : closure.client()->default_device_ordinal(); XlaComputationLaunchContext launch_context( - closure.client(), allocator, device_ordinal, + closure.client(), allocator, /*allocate_xla_tensors=*/platform_info_.is_on_xla_device(), /*use_multiple_streams=*/platform_info_.UseMultipleStreams()); // We're missing the must-be-constant inputs, tell `PopulateInputs` // about this. We don't actually need these inputs because they've // already been baked into the compiled kernel. - const xla::HloInputOutputAliasConfig& input_output_alias = - closure.executable()->executable()->module().input_output_alias_config(); - xla::StatusOr> execution_inputs; - std::map snapshot_ptrs; { tensorflow::profiler::TraceMe hlo_module_activity( [&] { @@ -629,17 +604,13 @@ void XlaRunOp::Compute(OpKernelContext* ctx) { }, tensorflow::profiler::TraceMeLevel::kInfo); - for (auto& p : closure.resource_var_snapshots()) { - snapshot_ptrs.emplace(p.first, - p.second.has_value() ? &p.second.value() : nullptr); - } - execution_inputs = launch_context.PopulateInputs( - ctx, closure.compilation_result(), snapshot_ptrs, - /*missing_ctx_input_prefix=*/closure.num_constant_args(), - input_output_alias); - OP_REQUIRES_OK(ctx, execution_inputs.status()); + launch_context.PopulateInputs( + ctx, closure.compilation_result(), closure.resource_var_snapshots(), + /*missing_ctx_input_prefix=*/closure.num_constant_args()); } + se::Stream* stream = + ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; xla::ExecutableRunOptions run_options; run_options.set_stream(stream); run_options.set_allocator(allocator); @@ -660,19 +631,21 @@ void XlaRunOp::Compute(OpKernelContext* ctx) { Env* env = Env::Default(); auto start_time = env->NowMicros(); - xla::StatusOr execution_output; + xla::StatusOr run_result; if (!stream || platform_info_.platform_id() == se::host::kHostPlatformId) { - execution_output = - closure.executable()->Run(std::move(*execution_inputs), run_options); + run_result = + closure.executable()->Run(launch_context.arguments(), run_options); } else { - execution_output = closure.executable()->RunAsync( - std::move(*execution_inputs), run_options); + run_result = + closure.executable()->RunAsync(launch_context.arguments(), run_options); } - OP_REQUIRES(ctx, execution_output.ok(), execution_output.status()); + OP_REQUIRES(ctx, run_result.ok(), run_result.status()); auto elapsed = env->NowMicros() - start_time; VLOG(2) << "Elapsed time in computation: " << elapsed << "us"; + const xla::HloInputOutputAliasConfig& input_output_alias = + closure.executable()->executable()->module().input_output_alias_config(); tensorflow::profiler::TraceMe hlo_module_activity( [&] { @@ -680,16 +653,12 @@ void XlaRunOp::Compute(OpKernelContext* ctx) { }, tensorflow::profiler::TraceMeLevel::kInfo); - xla::StatusOr> variable_infos = GatherVariableInfo( - ctx, *closure.compilation_result(), closure.num_constant_args()); - OP_REQUIRES_OK(ctx, variable_infos.status()); - OP_REQUIRES_OK(ctx, LockVariables(absl::MakeSpan(*variable_infos))); OP_REQUIRES_OK( ctx, launch_context.PopulateOutputs( - ctx, closure.compilation_result(), execution_output->ConsumeResult(), + ctx, closure.compilation_result(), run_result.ConsumeValueOrDie(), /*missing_ctx_input_prefix=*/closure.num_constant_args(), - absl::MakeSpan(*variable_infos), input_output_alias, snapshot_ptrs)); + input_output_alias, closure.resource_var_snapshots())); } XlaMergeOp::XlaMergeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc index 50813859603..afaee614f02 100644 --- a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc +++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc @@ -50,47 +50,35 @@ Status XlaCompileOnDemandOp::Run(OpKernelContext* ctx, // Builds an XLA allocator for the device. XlaComputationLaunchContext launch_context( client, client->backend().memory_allocator(), - client->default_device_ordinal(), /*allocate_xla_tensors=*/true, /*use_multiple_streams=*/metadata.UseMultipleStreams()); - std::map snapshot_ptrs; - for (auto& p : variable_args) { - snapshot_ptrs.emplace(p.first, - p.second.has_value() ? &p.second.value() : nullptr); - } - - const xla::HloInputOutputAliasConfig& input_output_alias = - executable->executable()->module().input_output_alias_config(); - xla::StatusOr> execution_inputs = - launch_context.PopulateInputs(ctx, result, snapshot_ptrs, - /*missing_ctx_input_prefix=*/0, - input_output_alias); - TF_RETURN_IF_ERROR(execution_inputs.status()); + launch_context.PopulateInputs(ctx, result, variable_args, + /*missing_ctx_input_prefix=*/0); se::Stream* stream = ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; TF_RET_CHECK(stream); VLOG(2) << "Executing computation: " << name(); + for (const xla::ShapedBuffer* arg : launch_context.arguments()) { + VLOG(2) << name() << ": " << *arg; + } xla::ExecutableRunOptions run_options; run_options.set_stream(stream); run_options.set_allocator(client->backend().memory_allocator()); run_options.set_intra_op_thread_pool(&ctx->eigen_cpu_device()); run_options.set_rng_seed(GetXLARandomSeed()); - xla::StatusOr run_result = - executable->Run(execution_inputs.ConsumeValueOrDie(), run_options); + xla::StatusOr run_result = + executable->Run(launch_context.arguments(), run_options); TF_RETURN_IF_ERROR(run_result.status()); - xla::ExecutionOutput execution_output = run_result.ConsumeValueOrDie(); - xla::StatusOr> variable_infos = - GatherVariableInfo(ctx, *result, 0); - TF_RETURN_IF_ERROR(variable_infos.status()); - TF_RETURN_IF_ERROR(LockVariables(absl::MakeSpan(*variable_infos))); + + const xla::HloInputOutputAliasConfig& input_output_alias = + executable->executable()->module().input_output_alias_config(); TF_RETURN_IF_ERROR(launch_context.PopulateOutputs( - ctx, result, execution_output.ConsumeResult(), - /*missing_ctx_input_prefix=*/0, absl::MakeSpan(*variable_infos), - input_output_alias, snapshot_ptrs)); + ctx, result, run_result.ConsumeValueOrDie(), + /*missing_ctx_input_prefix=*/0, input_output_alias, variable_args)); return Status::OK(); } diff --git a/tensorflow/compiler/jit/xla_device_ops.cc b/tensorflow/compiler/jit/xla_device_ops.cc index f0555ae32e5..8126059262b 100644 --- a/tensorflow/compiler/jit/xla_device_ops.cc +++ b/tensorflow/compiler/jit/xla_device_ops.cc @@ -59,13 +59,11 @@ void XlaAssignVariableOp::Compute(OpKernelContext* context) { return Status::OK(); })); mutex_lock ml(*variable->mu()); - OP_REQUIRES( - context, - !variable->is_initialized || variable->tensor()->dtype() == dtype_, - errors::InvalidArgument( - "Trying to assign variable with wrong dtype. Expected ", - DataTypeString(variable->tensor()->dtype()), " got ", - DataTypeString(dtype_))); + OP_REQUIRES(context, variable->tensor()->dtype() == dtype_, + errors::InvalidArgument( + "Trying to assign variable with wrong dtype. Expected ", + DataTypeString(variable->tensor()->dtype()), " got ", + DataTypeString(dtype_))); variable->is_initialized = true; *variable->tensor() = value; } diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc index 41abe86df6e..7f107aaef11 100644 --- a/tensorflow/compiler/jit/xla_launch_util.cc +++ b/tensorflow/compiler/jit/xla_launch_util.cc @@ -91,19 +91,29 @@ VariableInfo::~VariableInfo() { Status GetVariableInfosFromCtxInputs(OpKernelContext* ctx, absl::Span variable_indices, std::vector* result) { + std::vector resource_handles; + absl::c_transform( + variable_indices, std::back_inserter(resource_handles), + [&](int variable_idx) { return &HandleFromInput(ctx, variable_idx); }); + + std::vector> variables; + Status s = LookupResources(ctx, resource_handles, &variables); + if (!s.ok()) { + errors::AppendToMessage(&s, kPossibleNonVariableResourceHintMessage); + return s; + } + result->clear(); result->reserve(variable_indices.size()); - for (int var_idx : variable_indices) { - Var* variable = nullptr; - ResourceHandle handle = HandleFromInput(ctx, var_idx); - TF_RETURN_IF_ERROR( - LookupOrCreateResource(ctx, handle, &variable, [&](Var** ptr) { - // This var is uninitialized for now. - *ptr = new Var(DT_INVALID); - return Status::OK(); - })); - result->emplace_back(var_idx, handle.name(), variable); + for (int i = 0; i < variable_indices.size(); i++) { + // *Release* the variable because we're going to unref it later in + // ~VariableInfo. + Var* variable = variables[i].release(); + int input_idx = variable_indices[i]; + std::string var_name = HandleFromInput(ctx, input_idx).name(); + result->emplace_back(input_idx, var_name, variable); } + return Status::OK(); } @@ -166,43 +176,24 @@ Status SnapshotResourceVariables(OpKernelContext* ctx, XlaComputationLaunchContext::XlaComputationLaunchContext( xla::LocalClient* client, se::DeviceMemoryAllocator* xla_allocator, - int device_ordinal, bool allocate_xla_tensors, bool use_multiple_streams) + bool allocate_xla_tensors, bool use_multiple_streams) : client_(client), xla_allocator_(xla_allocator), allocate_xla_tensors_(allocate_xla_tensors), - use_multiple_streams_(use_multiple_streams), - device_ordinal_(device_ordinal) { + use_multiple_streams_(use_multiple_streams) { if (use_multiple_streams_) { CHECK(allocate_xla_tensors_) << "To use multiple streams correctly we must " "be allocating XLA tensors!"; } } -// Fills in `execution_input` with `buffer` for `index`. -static void PopulateExecutionInputBuffer(xla::ExecutionInput& execution_input, - xla::ShapeIndex index, - se::DeviceMemoryBase& buffer, - bool donate_buffer, int device_ordinal, - se::DeviceMemoryAllocator* allocator) { - xla::MaybeOwningDeviceMemory* in_buffer = - execution_input.MutableBuffer(index); - if (donate_buffer) { - *in_buffer = se::OwningDeviceMemory(buffer, device_ordinal, allocator); - buffer = se::DeviceMemoryBase(); - } else { - *in_buffer = buffer; - } -} - -xla::StatusOr> -XlaComputationLaunchContext::PopulateInputs( +void XlaComputationLaunchContext::PopulateInputs( OpKernelContext* ctx, const XlaCompiler::CompilationResult* compilation_result, - const std::map& resource_vars, - int missing_ctx_input_prefix, - const xla::HloInputOutputAliasConfig& input_output_alias) { - std::vector arguments; - arguments.reserve(compilation_result->xla_input_shapes.size()); + const ResourceVarsSnapshot& variables, int missing_ctx_input_prefix) { + // Build ShapedBuffers that point directly to the Tensor buffers. + arg_ptrs_ = + std::vector(compilation_result->xla_input_shapes.size()); xla::TransferManager* transfer_manager = client_->backend().transfer_manager(); @@ -210,28 +201,10 @@ XlaComputationLaunchContext::PopulateInputs( int arg_num = compilation_result->input_mapping[i]; CHECK_GE(arg_num, missing_ctx_input_prefix); const xla::Shape& shape = compilation_result->xla_input_shapes[i]; - const xla::Shape& device_shape = - transfer_manager->HostShapeToDeviceShape(shape); - - bool is_resource_variable = resource_vars.count(arg_num); - bool is_updated_resource_variable = - is_resource_variable && - absl::c_any_of(compilation_result->resource_updates, - [&](const XlaCompiler::ResourceUpdate& update) { - return update.input_index == i && update.modified; - }); - - const Tensor* t = is_resource_variable - ? resource_vars.at(arg_num) + const Tensor* t = variables.count(arg_num) + ? &(variables.at(arg_num).value()) : &(ctx->input(arg_num - missing_ctx_input_prefix)); CHECK(t); - bool donate_buffer = - t->RefCountIsOne() && is_updated_resource_variable && - input_output_alias.ParameterHasAlias(i, xla::ShapeIndex{}); - VLOG(3) << "Processing input: " << i - << "; is_resource_variable=" << is_resource_variable - << "; is_updated_resource_variable=" << is_updated_resource_variable - << "; donate_buffer=" << donate_buffer; if (use_multiple_streams_) { CHECK(ctx->op_device_context() && ctx->op_device_context()->stream()) @@ -242,28 +215,23 @@ XlaComputationLaunchContext::PopulateInputs( ctx->op_device_context()->stream()); } - arguments.emplace_back(device_shape, shape); - xla::ExecutionInput& execution_input = arguments.back(); - if (xla::Shape::Equal().MinorToMajorOnlyInLayout()(shape, device_shape)) { + if (xla::Shape::Equal().MinorToMajorOnlyInLayout()( + shape, transfer_manager->HostShapeToDeviceShape(shape))) { se::DeviceMemoryBase dmem = XlaTensor::DeviceMemoryFromTensor(*t); - PopulateExecutionInputBuffer(execution_input, xla::ShapeIndex{}, dmem, - donate_buffer, device_ordinal_, - xla_allocator_); + arg_buffers_.emplace_back( + /*on_host_shape=*/shape, /*on_device_shape=*/shape, + client_->platform(), client_->default_device_ordinal()); + arg_buffers_.back().set_buffer(dmem, /*index=*/{}); + arg_ptrs_[i] = &arg_buffers_.back(); } else { - XlaTensor* xla_tensor = XlaTensor::FromTensor(t); + const XlaTensor* xla_tensor = XlaTensor::FromTensor(t); CHECK(xla_tensor && xla_tensor->has_shaped_buffer()); - xla_tensor->shaped_buffer().buffers().ForEachMutableElement( - [&](const xla::ShapeIndex& index, se::DeviceMemoryBase* buffer) { - PopulateExecutionInputBuffer(execution_input, index, *buffer, - donate_buffer, device_ordinal_, - xla_allocator_); - }); + arg_ptrs_[i] = const_cast(&xla_tensor->shaped_buffer()); } } - return std::move(arguments); } -// Construct the tensor for the given type and buffer. +// Construct the tensor for given type and buffer. static Tensor MakeTensor(DataType dtype, const TensorShape& shape, se::DeviceMemoryBase buffer, Allocator* allocator) { size_t expected_size = shape.num_elements() * DataTypeSize(dtype); @@ -279,26 +247,28 @@ static Tensor GetOrCreateTensorForOutput( int output_num, OpKernelContext* ctx, int missing_ctx_input_prefix, const xla::HloInputOutputAliasConfig& input_output_alias, absl::Span input_mapping, - const std::map& resource_vars_snapshots, - DataType output_dtype, const TensorShape& output_shape, - se::DeviceMemoryBase output_buffer, Allocator* output_allocator) { + const ResourceVarsSnapshot& resource_var_snapshots, DataType output_dtype, + const TensorShape& output_shape, se::DeviceMemoryBase output_buffer, + Allocator* output_allocator) { xla::ShapeIndex output_index = input_output_alias.shape().IsTuple() ? xla::ShapeIndex({output_num}) : xla::ShapeIndex({}); - CHECK(input_output_alias.shape().IsTuple() || output_num == 0); if (absl::optional alias = input_output_alias.GetAliasedParameter(output_index)) { - VLOG(3) << "Found alias: " << alias->ToString(); int tf_param = input_mapping[alias->parameter_number] - missing_ctx_input_prefix; - const Tensor input_tensor = - ctx->input(tf_param).dtype() != DT_RESOURCE - ? ctx->input(tf_param) - : *resource_vars_snapshots.at(missing_ctx_input_prefix + tf_param); - if (output_buffer.opaque() == input_tensor.data()) { - return input_tensor; + const Tensor* input_tensor = &ctx->input(tf_param); + + // If input tensor is a resource variable, alias to the snapshot we took at + // entry time. + if (input_tensor->dtype() == DT_RESOURCE) { + const absl::optional& v = + resource_var_snapshots.at(missing_ctx_input_prefix + tf_param); + CHECK(v.has_value()); + return *v; } + return *input_tensor; } return MakeTensor(output_dtype, output_shape, output_buffer, output_allocator); @@ -321,10 +291,12 @@ static Status SetOutputForConstant( OpKernelContext* ctx, se::Stream* stream, const XlaCompiler::CompilationResult* compilation_result, int output_num) { CHECK(compilation_result->outputs[output_num].is_constant); + // Output is a constant. const Tensor& const_tensor = compilation_result->outputs[output_num].constant_value; Tensor* output_tensor; - if (stream && const_tensor.TotalBytes() > 0) { + const size_t total_bytes = const_tensor.TotalBytes(); + if (stream && total_bytes > 0) { // Copy host -> device. (Empty tensors don't have backing buffers.) // Manually allocate memory using an XlaTensorBuffer so we can allocate // as much memory as the device requires (as given by @@ -363,55 +335,52 @@ static Status SetOutputForConstant( return Status::OK(); } -static xla::StatusOr GetOrCreateResourceVar( - OpKernelContext* ctx, const ResourceHandle& handle, - const XlaCompiler::ResourceUpdate& write) { - Var* variable = nullptr; - TF_RETURN_IF_ERROR( - LookupOrCreateResource(ctx, handle, &variable, [&write](Var** ptr) { - *ptr = new Var(write.type); - return Status::OK(); - })); - return variable; -} - -xla::StatusOr> GatherVariableInfo( +// Creates a list of updates resource variables. +static xla::StatusOr> GatherVariableInfo( OpKernelContext* ctx, - const XlaCompiler::CompilationResult& compilation_result, + const XlaCompiler::CompilationResult* compilation_result, int missing_ctx_input_prefix) { - std::vector out; - out.reserve(compilation_result.resource_updates.size()); - for (int i = 0; i < compilation_result.resource_updates.size(); ++i) { + std::vector variable_infos; + variable_infos.reserve(compilation_result->resource_updates.size()); + + for (int i = 0; i < compilation_result->resource_updates.size(); ++i) { const XlaCompiler::ResourceUpdate& write = - compilation_result.resource_updates[i]; + compilation_result->resource_updates[i]; int actual_input_index = write.input_index - missing_ctx_input_prefix; if (actual_input_index < 0 || actual_input_index >= ctx->num_inputs()) { return errors::Internal("Invalid input index for variable write."); } + // TODO(b/35625933): tensorflow::Var should contain a PersistentTensor, + // not a Tensor. + Var* variable = nullptr; const ResourceHandle handle = HandleFromInput(ctx, actual_input_index); - TF_ASSIGN_OR_RETURN(Var * variable, - GetOrCreateResourceVar(ctx, handle, write)); - out.emplace_back(actual_input_index, handle.name(), variable); + TF_RETURN_IF_ERROR(LookupOrCreateResource(ctx, handle, &variable, + [&write](Var** ptr) { + *ptr = new Var(write.type); + return Status::OK(); + })); + variable_infos.emplace_back(actual_input_index, handle.name(), variable); } - return std::move(out); + return variable_infos; } Status XlaComputationLaunchContext::PopulateOutputs( OpKernelContext* ctx, const XlaCompiler::CompilationResult* compilation_result, ScopedShapedBuffer output, int missing_ctx_input_prefix, - absl::Span variable_infos, const xla::HloInputOutputAliasConfig& input_output_alias, - const std::map& resource_vars) { + const ResourceVarsSnapshot& resource_var_snapshots) { se::Stream* stream = ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; Allocator* allocator = ctx->device()->GetAllocator({}); // Computation output should always be a tuple. - VLOG(2) << "Result tuple shape: " << output.on_host_shape().DebugString(); - VLOG(2) << "Result tuple shape (on device): " - << output.on_device_shape().DebugString(); + if (VLOG_IS_ON(2)) { + VLOG(2) << "Result tuple shape: " << output.on_host_shape().DebugString(); + VLOG(2) << "Result tuple shape (on device): " + << output.on_device_shape().DebugString(); + } CHECK_EQ(ctx->num_outputs(), compilation_result->outputs.size()); // If the on-host-shape isn't a tuple, create a new single-element tuple @@ -469,8 +438,8 @@ Status XlaComputationLaunchContext::PopulateOutputs( for (int i = 0; i < ctx->num_outputs(); ++i) { const TensorShape& shape = output_tensor_shapes[i]; const DataType& type = compilation_result->outputs[i].type; - VLOG(2) << "Populating output for retval " << i << " shape " - << shape.DebugString() << " type " << DataTypeString(type); + VLOG(2) << "Retval " << i << " shape " << shape.DebugString() << " type " + << DataTypeString(type); if (type == DT_VARIANT) { return errors::Unimplemented( "Support for TensorList crossing the XLA/TF boundary " @@ -498,37 +467,30 @@ Status XlaComputationLaunchContext::PopulateOutputs( se::DeviceMemoryBase buffer = output.buffer({output_num}); Tensor output_tensor = GetOrCreateTensorForOutput( output_num, ctx, missing_ctx_input_prefix, input_output_alias, - compilation_result->input_mapping, resource_vars, + compilation_result->input_mapping, resource_var_snapshots, ctx->expected_output_dtype(i), shape, buffer, allocator); + output.set_buffer(se::OwningDeviceMemory(), {output_num}); ctx->set_output(i, output_tensor); } - output.set_buffer(se::OwningDeviceMemory(), {output_num}); ++output_num; } + + if (VLOG_IS_ON(3)) { + VLOG(3) << ctx->mutable_output(i)->DeviceSafeDebugString(); + } } - // input_index -> index into variable_infos. - absl::flat_hash_map variable_info_lookup; - for (int i = 0; i < variable_infos.size(); i++) { - variable_info_lookup.emplace(variable_infos[i].index(), i); - } - // Apply variable updates, if any. + VLOG(2) << "Applying variable updates"; + TF_ASSIGN_OR_RETURN( + std::vector variable_infos, + GatherVariableInfo(ctx, compilation_result, missing_ctx_input_prefix)); + TF_RETURN_IF_ERROR(LockVariables(absl::MakeSpan(variable_infos))); + for (int i = 0; i < compilation_result->resource_updates.size(); ++i) { const XlaCompiler::ResourceUpdate& write = compilation_result->resource_updates[i]; - int actual_input_index = write.input_index - missing_ctx_input_prefix; - CHECK_GE(actual_input_index, 0); - CHECK_LT(actual_input_index, ctx->num_inputs()); - Var* var = variable_infos[variable_info_lookup[actual_input_index]].var(); - CHECK(var); - - VLOG(2) << "Updating variable #" << i - << " at input index: " << actual_input_index << " with shape " - << write.shape.DebugString() << "; variable tensor has shape: " - << var->tensor()->shape().DebugString(); - - if (var->is_initialized && var->tensor()->dtype() != write.type) { + if (variable_infos[i].var()->tensor()->dtype() != write.type) { return errors::Internal("Mismatched type in variable write"); } @@ -542,14 +504,14 @@ Status XlaComputationLaunchContext::PopulateOutputs( } } else { se::DeviceMemoryBase buffer = output.buffer({output_num}); + output.set_buffer(se::OwningDeviceMemory(), {output_num}); output_tensor = GetOrCreateTensorForOutput( output_num, ctx, missing_ctx_input_prefix, input_output_alias, - compilation_result->input_mapping, resource_vars, write.type, + compilation_result->input_mapping, resource_var_snapshots, write.type, write.shape, buffer, allocator); } - output.set_buffer(se::OwningDeviceMemory(), {output_num}); - var->is_initialized |= write.modified; - *var->tensor() = output_tensor; + *variable_infos[i].var()->tensor() = output_tensor; + variable_infos[i].var()->is_initialized |= write.modified; ++output_num; } return Status::OK(); @@ -600,7 +562,7 @@ Status XlaComputationLaunchContext::BuildXlaCompilerArguments( arg.name = std::string(variable.name()); arg.kind = XlaCompiler::Argument::kResource; arg.resource_kind = XlaResource::kVariable; - if (variable.var() && variable.var()->is_initialized) { + if (variable.var()) { const Tensor* value = variable.var()->tensor(); arg.type = value->dtype(); arg.shape = value->shape(); diff --git a/tensorflow/compiler/jit/xla_launch_util.h b/tensorflow/compiler/jit/xla_launch_util.h index b34b3059a4f..92b6c4c8a08 100644 --- a/tensorflow/compiler/jit/xla_launch_util.h +++ b/tensorflow/compiler/jit/xla_launch_util.h @@ -81,12 +81,6 @@ class VariableInfo { bool lock_held_ = false; }; -// Creates a list of updated resource variables. -xla::StatusOr> GatherVariableInfo( - OpKernelContext* ctx, - const XlaCompiler::CompilationResult& compilation_result, - int missing_ctx_input_prefix); - // Takes a snapshot of the values of resource variable arguments, whose indices // are specified in `variable_indices` argument. We snapshot tensors that back // resource variables since concurrent updates may modify the shape, and it is @@ -130,7 +124,7 @@ class XlaComputationLaunchContext { // objects. XlaComputationLaunchContext(xla::LocalClient* client, se::DeviceMemoryAllocator* xla_allocator, - int device_ordinal, bool allocate_xla_tensors, + bool allocate_xla_tensors, bool use_multiple_streams); // Builds a XlaCompiler::Argument vector from the arguments to an XlaLaunch @@ -148,12 +142,10 @@ class XlaComputationLaunchContext { // missing and adjusts input indices accordingly. All elements in kernel's // input_mapping must be greater than or equal to `missing_ctx_input_prefix` // (in other words, no inputs actually required by the kernel can be missing). - xla::StatusOr> PopulateInputs( - OpKernelContext* ctx, - const XlaCompiler::CompilationResult* compilation_result, - const std::map& resource_vars, - int missing_ctx_input_prefix, - const xla::HloInputOutputAliasConfig& input_output_alias); + void PopulateInputs(OpKernelContext* ctx, + const XlaCompiler::CompilationResult* compilation_result, + const ResourceVarsSnapshot& variables, + int missing_ctx_input_prefix); // Given the XLA output in `output`, populate all outputs of `ctx`. Also // writes out the resource variable updates. @@ -169,16 +161,20 @@ class XlaComputationLaunchContext { OpKernelContext* ctx, const XlaCompiler::CompilationResult* compilation_result, xla::ScopedShapedBuffer output, int missing_ctx_input_prefix, - absl::Span variable_infos, const xla::HloInputOutputAliasConfig& input_output_alias, - const std::map& resource_vars); + const ResourceVarsSnapshot& resource_var_snapshots); + + // Return the argument list. Only valid after PopulateInputs() has been + // called. + const std::vector& arguments() const { return arg_ptrs_; } private: xla::LocalClient* client_; se::DeviceMemoryAllocator* xla_allocator_; bool allocate_xla_tensors_; bool use_multiple_streams_; - int device_ordinal_; + std::deque arg_buffers_; + std::vector arg_ptrs_; }; // A simple TensorBuffer implementation that allows us to create Tensors that diff --git a/tensorflow/python/eager/def_function_xla_jit_test.py b/tensorflow/python/eager/def_function_xla_jit_test.py index bd7a6ec2279..d55f84863e9 100644 --- a/tensorflow/python/eager/def_function_xla_jit_test.py +++ b/tensorflow/python/eager/def_function_xla_jit_test.py @@ -32,7 +32,6 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import tensor_array_ops -from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -404,69 +403,6 @@ class DefFunctionTest(test.TestCase): self.assertEqual(inner_retracings, 1) - def testUpdateVariable(self): - v = variables.Variable(3.1) - - @def_function.function(experimental_compile=True) - def update_var(a, b): - v.assign_add(a * b) - - update_var(constant_op.constant(0.7), constant_op.constant(0.6)) - self.assertAllClose(v, 3.52) - - def testUpdateVariableVector(self): - v = variables.Variable([3.1, 3.1]) - - @def_function.function(experimental_compile=True) - def update_var(a, b): - v.assign_add(a * b) - - update_var( - constant_op.constant([0.7, 0.7]), constant_op.constant([0.6, 0.6])) - self.assertAllClose(v, [3.52, 3.52]) - - def testUpdateVariableInClass(self): - - class C(object): - - @def_function.function(experimental_compile=True) - def update_var(self, a, b): - if not hasattr(self, 'v'): - self.v = variables.Variable(3.1) - self.v.assign_add(a * b) - - c = C() - - @def_function.function - def outer(): - c.update_var(constant_op.constant(0.7), constant_op.constant(0.6)) - - outer() - self.assertAllClose(c.v, 3.52) - - def testUpdateVariableMultipleOutputs(self): - v = variables.Variable(3.1) - - @def_function.function(experimental_compile=True) - def update_var(a, b): - v.assign_add(a * b) - return a * b + v - - out = update_var(constant_op.constant(0.7), constant_op.constant(0.6)) - self.assertAllClose(v, 3.52) - self.assertAllClose(out, 3.94) - - def testReturnIdentity(self): - - @def_function.function(experimental_compile=True) - def f(a, b): - return (a, b) - - a = constant_op.constant([0.7]) - b = constant_op.constant([0.6]) - - f(a, b) - if __name__ == '__main__': ops.enable_eager_execution() From 37a194ac9bed74c01183cc5c4b5db2ebcc0d858c Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Wed, 15 Jul 2020 01:00:09 +0200 Subject: [PATCH 0428/2522] Log learning rate to TensorBoard when using schedules `keras.callbacks.LearningRateScheduler` automatically logs the learning rate to TensorBoard. However, when using `keras.optimizers.schedules.LearningRateSchedule` for step wise learning rate scheduling there is no builtin way of observing the learning rate in TensorBoard which can be crucial for debugging and experimenting with different schedules. This PR adds support for automatic logging of the learning rate and other decaying hyper parameters of custom optimizers that use `keras.optimizers.schedules.LearningRateSchedule`. --- tensorflow/python/keras/callbacks.py | 8 +++++ tensorflow/python/keras/callbacks_test.py | 39 +++++++++++++++++++---- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py index 131bc152e51..f3763b8ec1d 100644 --- a/tensorflow/python/keras/callbacks.py +++ b/tensorflow/python/keras/callbacks.py @@ -41,6 +41,7 @@ from tensorflow.python.eager import context from tensorflow.python.framework import ops from tensorflow.python.keras import backend as K from tensorflow.python.keras.distribute import worker_training_state +from tensorflow.python.keras.optimizer_v2 import learning_rate_schedule from tensorflow.python.keras.utils import generic_utils from tensorflow.python.keras.utils import tf_utils from tensorflow.python.keras.utils import version_utils @@ -2210,6 +2211,12 @@ class TensorBoard(Callback, version_utils.TensorBoardVersionSelector): profiler.stop() self._is_tracing = False + def _collect_learning_rate(self, logs): + lr_schedule = getattr(self.model.optimizer, "lr", None) + if isinstance(lr_schedule, learning_rate_schedule.LearningRateSchedule): + logs["learning_rate"] = lr_schedule(self.model.optimizer.iterations) + return logs + def _log_epoch_metrics(self, epoch, logs): """Writes epoch metrics out as scalar summaries. @@ -2222,6 +2229,7 @@ class TensorBoard(Callback, version_utils.TensorBoardVersionSelector): train_logs = {k: v for k, v in logs.items() if not k.startswith('val_')} val_logs = {k: v for k, v in logs.items() if k.startswith('val_')} + train_logs = self._collect_learning_rate(train_logs) with summary_ops_v2.always_record_summaries(): if train_logs: diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py index fdaf2e24227..2c1ce26ce7c 100644 --- a/tensorflow/python/keras/callbacks_test.py +++ b/tensorflow/python/keras/callbacks_test.py @@ -1789,18 +1789,19 @@ class TestTensorBoardV2(keras_parameterized.TestCase): self.train_dir = os.path.join(self.logdir, 'train') self.validation_dir = os.path.join(self.logdir, 'validation') - def _get_model(self): + def _get_model(self, compile_model=True): layers = [ keras.layers.Conv2D(8, (3, 3)), keras.layers.Flatten(), keras.layers.Dense(1) ] model = testing_utils.get_model_from_layers(layers, input_shape=(10, 10, 1)) - opt = gradient_descent.SGD(learning_rate=0.001) - model.compile( - opt, - 'mse', - run_eagerly=testing_utils.should_run_eagerly()) + if compile_model: + opt = gradient_descent.SGD(learning_rate=0.001) + model.compile( + opt, + 'mse', + run_eagerly=testing_utils.should_run_eagerly()) return model def test_TensorBoard_default_logdir(self): @@ -1914,6 +1915,32 @@ class TestTensorBoardV2(keras_parameterized.TestCase): }, ) + def test_TensorBoard_learning_rate_schedules(self): + model = self._get_model(compile_model=False) + opt = gradient_descent.SGD(learning_rate_schedule.CosineDecay(0.01, 1)) + model.compile( + opt, + 'mse', + run_eagerly=testing_utils.should_run_eagerly()) + + x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) + + model.fit( + x, + y, + batch_size=2, + epochs=2, + callbacks=[keras.callbacks.TensorBoard(self.logdir)]) + + summary_file = list_summaries(self.logdir) + self.assertEqual( + summary_file.scalars, + { + _ObservedSummary(logdir=self.train_dir, tag='epoch_loss'), + _ObservedSummary(logdir=self.train_dir, tag='epoch_learning_rate'), + }, + ) + def test_TensorBoard_weight_histograms(self): model = self._get_model() x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) From 9cea3cdbd5db621697398a80abf075862856a380 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Tue, 14 Jul 2020 15:59:13 -0700 Subject: [PATCH 0429/2522] Internal cleanup: consolidate and simplify tests. PiperOrigin-RevId: 321253126 Change-Id: I0bdf8674308cac3ab1eed58042577bac2756cf16 --- tensorflow/python/autograph/impl/BUILD | 22 +--- .../python/autograph/impl/api_py3_test.py | 84 ------------- tensorflow/python/autograph/impl/api_test.py | 115 ++++++++++++------ 3 files changed, 81 insertions(+), 140 deletions(-) delete mode 100644 tensorflow/python/autograph/impl/api_py3_test.py diff --git a/tensorflow/python/autograph/impl/BUILD b/tensorflow/python/autograph/impl/BUILD index a21b0df2ce8..ae7f610ea48 100644 --- a/tensorflow/python/autograph/impl/BUILD +++ b/tensorflow/python/autograph/impl/BUILD @@ -37,9 +37,11 @@ py_library( ], ) -tf_py_test( +py_test( name = "api_test", srcs = ["api_test.py"], + python_version = "PY3", + srcs_version = "PY3", deps = [ ":impl", "//tensorflow/python:client_testlib", @@ -49,24 +51,6 @@ tf_py_test( ], ) -py_test( - name = "api_py3_test", - srcs = ["api_py3_test.py"], - python_version = "PY3", - srcs_version = "PY3", - tags = [ - "no_oss_py2", - "no_pip", - "nopip", - ], - deps = [ - ":impl", - "//tensorflow/python:client_testlib", - "//tensorflow/python:constant_op", - "//tensorflow/python/autograph/core", - ], -) - tf_py_test( name = "conversion_test", srcs = ["conversion_test.py"], diff --git a/tensorflow/python/autograph/impl/api_py3_test.py b/tensorflow/python/autograph/impl/api_py3_test.py deleted file mode 100644 index c460e478008..00000000000 --- a/tensorflow/python/autograph/impl/api_py3_test.py +++ /dev/null @@ -1,84 +0,0 @@ -# python3 -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for api module.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import os - -from tensorflow.python.autograph.core import converter -from tensorflow.python.autograph.impl import api -from tensorflow.python.framework import constant_op -from tensorflow.python.platform import test - -DEFAULT_RECURSIVE = converter.ConversionOptions(recursive=True) - - -class ApiTest(test.TestCase): - - def test_converted_call_kwonly_args(self): - - def test_fn(*, a): - return a - - x = api.converted_call( - test_fn, (), {'a': constant_op.constant(-1)}, options=DEFAULT_RECURSIVE) - self.assertEqual(-1, self.evaluate(x)) - - def test_super_with_no_arg(self): - test_case_self = self - - class TestBase: - - def plus_three(self, x): - return x + 3 - - class TestSubclass(TestBase): - - def plus_three(self, x): - test_case_self.fail('This should never be called.') - - def no_arg(self, x): - return super().plus_three(x) - - tc = api.converted_call(TestSubclass, (), None, options=DEFAULT_RECURSIVE) - - self.assertEqual(5, tc.no_arg(2)) - - def test_converted_call_avoids_triggering_operators(self): - - test_self = self - - class Pair(collections.namedtuple('Pair', ['a', 'b'])): - - def __call__(self): - return self.a + self.b - - def __eq__(self, other): - test_self.fail('Triggered operator') - - p = Pair(constant_op.constant(1), constant_op.constant(2)) - - x = api.converted_call(p, (), {}, options=DEFAULT_RECURSIVE) - self.assertIsNotNone(self.evaluate(x), 3) - - -if __name__ == '__main__': - os.environ['AUTOGRAPH_STRICT_CONVERSION'] = '1' - test.main() diff --git a/tensorflow/python/autograph/impl/api_test.py b/tensorflow/python/autograph/impl/api_test.py index 5b885af43ac..ad7e8e9fb37 100644 --- a/tensorflow/python/autograph/impl/api_test.py +++ b/tensorflow/python/autograph/impl/api_test.py @@ -33,7 +33,6 @@ import types import numpy as np import six -from tensorflow.python.autograph import utils from tensorflow.python.autograph.core import ag_ctx from tensorflow.python.autograph.core import converter from tensorflow.python.autograph.core import converter_testing @@ -47,15 +46,15 @@ from tensorflow.python.data.ops import dataset_ops from tensorflow.python.eager import def_function from tensorflow.python.eager import function from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops from tensorflow.python.framework import test_util -from tensorflow.python.ops import gen_math_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.util import function_utils from tensorflow.python.util import tf_decorator from tensorflow.python.util import tf_inspect -tf = utils.fake_tf() global_n = 2 @@ -90,6 +89,52 @@ class ApiTest(test.TestCase): self.assertEmpty( tuple(o for o in objects_after if isinstance(o, TestResource))) + def test_converted_call_kwonly_args(self): + + def test_fn(*, a): + return a + + x = api.converted_call( + test_fn, (), {'a': constant_op.constant(-1)}, options=DEFAULT_RECURSIVE) + self.assertEqual(-1, self.evaluate(x)) + + def test_super_with_no_arg(self): + test_case_self = self + + class TestBase: + + def plus_three(self, x): + return x + 3 + + class TestSubclass(TestBase): + + def plus_three(self, x): + test_case_self.fail('This should never be called.') + + def no_arg(self, x): + return super().plus_three(x) + + tc = api.converted_call(TestSubclass, (), None, options=DEFAULT_RECURSIVE) + + self.assertEqual(5, tc.no_arg(2)) + + def test_converted_call_avoids_triggering_operators(self): + + test_self = self + + class Pair(collections.namedtuple('Pair', ['a', 'b'])): + + def __call__(self): + return self.a + self.b + + def __eq__(self, other): + test_self.fail('Triggered operator') + + p = Pair(constant_op.constant(1), constant_op.constant(2)) + + x = api.converted_call(p, (), {}, options=DEFAULT_RECURSIVE) + self.assertIsNotNone(self.evaluate(x), 3) + @test_util.run_deprecated_v1 def test_decorator_recursive(self): @@ -102,16 +147,15 @@ class ApiTest(test.TestCase): @api.convert(recursive=True) def test_method(self, x, s, a): - while tf.reduce_sum(x) > s: + while math_ops.reduce_sum(x) > s: x //= self.called_member(a) return x tc = TestClass() - with self.cached_session() as sess: - x = tc.test_method( - constant_op.constant([2, 4]), constant_op.constant(1), - constant_op.constant(-2)) - self.assertListEqual([0, 1], self.evaluate(x).tolist()) + x = tc.test_method( + constant_op.constant([2, 4]), constant_op.constant(1), + constant_op.constant(-2)) + self.assertListEqual([0, 1], self.evaluate(x).tolist()) @test_util.run_deprecated_v1 def test_decorator_not_recursive(self): @@ -119,20 +163,19 @@ class ApiTest(test.TestCase): class TestClass(object): def called_member(self, a): - return tf.negative(a) + return math_ops.negative(a) @api.convert(recursive=False) def test_method(self, x, s, a): - while tf.reduce_sum(x) > s: + while math_ops.reduce_sum(x) > s: x //= self.called_member(a) return x tc = TestClass() - with self.cached_session() as sess: - x = tc.test_method( - constant_op.constant([2, 4]), constant_op.constant(1), - constant_op.constant(-2)) - self.assertListEqual([0, 1], self.evaluate(x).tolist()) + x = tc.test_method( + constant_op.constant([2, 4]), constant_op.constant(1), + constant_op.constant(-2)) + self.assertListEqual([0, 1], self.evaluate(x).tolist()) @test_util.run_deprecated_v1 def test_convert_then_do_not_convert(self): @@ -141,11 +184,11 @@ class ApiTest(test.TestCase): @api.do_not_convert def called_member(self, a): - return tf.negative(a) + return math_ops.negative(a) @api.convert(recursive=True) def test_method(self, x, s, a): - while tf.reduce_sum(x) > s: + while math_ops.reduce_sum(x) > s: x //= self.called_member(a) return x @@ -168,16 +211,15 @@ class ApiTest(test.TestCase): @api.convert(recursive=True) def test_method(self, x, s, a): - while tf.reduce_sum(x) > s: + while math_ops.reduce_sum(x) > s: x //= self.called_member(a) return x tc = TestClass() - with self.cached_session() as sess: - x = tc.test_method( - constant_op.constant([2, 4]), constant_op.constant(1), - constant_op.constant(-2)) - self.assertListEqual([0, 1], self.evaluate(x).tolist()) + x = tc.test_method( + constant_op.constant([2, 4]), constant_op.constant(1), + constant_op.constant(-2)) + self.assertListEqual([0, 1], self.evaluate(x).tolist()) def test_decorator_preserves_argspec(self): @@ -234,7 +276,7 @@ class ApiTest(test.TestCase): @api.convert(recursive=True) def test_method(self, x, s, a): - while tf.reduce_sum(x) > s: + while math_ops.reduce_sum(x) > s: x //= api.converted_call( self.called_member, (a,), None, options=DEFAULT_RECURSIVE) return x @@ -644,7 +686,7 @@ class ApiTest(test.TestCase): opts = converter.ConversionOptions( user_requested=True, optional_features=None) - x = api.converted_call(gen_math_ops.add, (1, 1), None, options=opts) + x = api.converted_call(math_ops.add, (1, 1), None, options=opts) self.assertAllEqual(self.evaluate(x), 2) @@ -685,7 +727,7 @@ class ApiTest(test.TestCase): class TestClass(collections.namedtuple('TestNamedtuple', ('a', 'b'))): def test_method(self, x): - while tf.reduce_sum(x) > self.a: + while math_ops.reduce_sum(x) > self.a: x //= self.b return x @@ -713,7 +755,7 @@ class ApiTest(test.TestCase): class TestClass(collections.namedtuple('TestNamedtuple', ('a', 'b'))): def test_method(self, x): - while tf.reduce_sum(x) > self.a: + while math_ops.reduce_sum(x) > self.a: x //= self.b return x @@ -786,7 +828,7 @@ class ApiTest(test.TestCase): def f(): return dataset_ops.Dataset.range(-3, 3).map(other_fn) - # Dataset iteration only works inside tf. + # Dataset iteration only works inside math_ops. @def_function.function def graph_fn(): ds = api.converted_call(f, (), None, options=DEFAULT_RECURSIVE) @@ -912,13 +954,13 @@ class ApiTest(test.TestCase): def test_to_graph_basic(self): def test_fn(x, s): - while tf.reduce_sum(x) > s: + while math_ops.reduce_sum(x) > s: x //= 2 return x compiled_fn = api.to_graph(test_fn) - with tf.Graph().as_default(): + with ops.Graph().as_default(): x = compiled_fn(constant_op.constant((4, 8)), 4) self.assertAllEqual(self.evaluate(x), (1, 2)) @@ -928,15 +970,14 @@ class ApiTest(test.TestCase): foo = 4 def test_fn(x, s=foo): - while tf.reduce_sum(x) > s: + while math_ops.reduce_sum(x) > s: x //= 2 return x compiled_fn = api.to_graph(test_fn) - with self.cached_session() as sess: - x = compiled_fn(constant_op.constant([4, 8])) - self.assertListEqual([1, 2], self.evaluate(x).tolist()) + x = compiled_fn(constant_op.constant([4, 8])) + self.assertListEqual([1, 2], self.evaluate(x).tolist()) def test_to_graph_with_globals(self): @@ -1056,7 +1097,7 @@ class ApiTest(test.TestCase): def test_to_code_basic(self): def test_fn(x, s): - while tf.reduce_sum(x) > s: + while math_ops.reduce_sum(x) > s: x /= 2 return x @@ -1067,7 +1108,7 @@ class ApiTest(test.TestCase): @def_function.function def test_fn(x, s): - while tf.reduce_sum(x) > s: + while math_ops.reduce_sum(x) > s: x /= 2 return x From 4ada0936240434bc9744db87f4a96c69617533bb Mon Sep 17 00:00:00 2001 From: Xingyu Long Date: Tue, 14 Jul 2020 19:26:45 -0400 Subject: [PATCH 0430/2522] Add tags to avoid breaks. --- tensorflow/python/keras/benchmarks/BUILD | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/python/keras/benchmarks/BUILD b/tensorflow/python/keras/benchmarks/BUILD index b0175a25278..87df84f2663 100644 --- a/tensorflow/python/keras/benchmarks/BUILD +++ b/tensorflow/python/keras/benchmarks/BUILD @@ -85,6 +85,10 @@ py_test( size = "medium", srcs = ["keras_examples_benchmarks/bidirectional_lstm_benchmark_test.py"], python_version = "PY3", + tags = [ + "no_pip", # b/161253163 + "no_windows", # b/160628318 + ], deps = [ ":benchmark_util", "//tensorflow:tensorflow_py", @@ -96,6 +100,10 @@ py_test( size = "medium", srcs = ["keras_examples_benchmarks/text_classification_transformer_benchmark_test.py"], python_version = "PY3", + tags = [ + "no_pip", # b/161253163 + "no_windows", # b/160628318 + ], deps = [ ":benchmark_util", "//tensorflow:tensorflow_py", From c8b226206c7397502f193f5678959251c97106f9 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Tue, 14 Jul 2020 16:29:17 -0700 Subject: [PATCH 0431/2522] Migrate the MacOS nightly release scripts to use the new bazelrc configs. PiperOrigin-RevId: 321258589 Change-Id: Ib1f9249e3dabe5c69318a11080c667808b5c7317 --- .bazelrc | 5 ----- .../ci_build/release/macos/cpu_py2_full/nightly_release.sh | 4 +++- .../ci_build/release/macos/cpu_py35_full/nightly_release.sh | 4 +++- .../ci_build/release/macos/cpu_py36_full/nightly_release.sh | 4 +++- .../ci_build/release/macos/cpu_py37_full/nightly_release.sh | 4 +++- .../ci_build/release/macos/cpu_py38_full/nightly_release.sh | 4 +++- 6 files changed, 15 insertions(+), 10 deletions(-) diff --git a/.bazelrc b/.bazelrc index 801c5f0ec55..f23fdb7b7e8 100644 --- a/.bazelrc +++ b/.bazelrc @@ -83,7 +83,6 @@ # release_common: Common options for all builds on all operating systems. # release_gpu_common: Common options for GPU builds on Linux and Windows. # release_cpu_linux: Toolchain and CUDA options for Linux CPU builds. -# release_cpu_macos: Toolchain and CUDA options for MacOS CPU builds. # release_gpu_linux: Toolchain and CUDA options for Linux PU builds. # Allow builds using libc++ as a linker library @@ -552,10 +551,6 @@ build:release_cpu_linux --action_env=CC_OPT_FLAGS="-mavx" # Did not add this to the defaults in case this changes. build:release_cpu_linux --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain -build:release_cpu_macos --config=release_common -build:release_cpu_macos --action_env=TF_NEED_CUDA=0 -build:release_cpu_macos --action_env=CC_OPT_FLAGS="-mavx" - build:release_gpu_common --config=release_common build:release_gpu_common --action_env=TF_NEED_CUDA=1 build:release_gpu_common --action_env=TF_CUDA_VERSION=10.1 diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nightly_release.sh b/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nightly_release.sh index 6dc3e3849ad..69c57179379 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py2_full/nightly_release.sh @@ -30,11 +30,13 @@ sudo pip install twine ./tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python2) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=release_cpu_macos tensorflow/tools/pip_package:build_pip_package +bazel build --config=opt --config=v2 tensorflow/tools/pip_package:build_pip_package mkdir pip_pkg ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nightly_release.sh b/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nightly_release.sh index 45a8f435988..1f018136ef9 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py35_full/nightly_release.sh @@ -35,11 +35,13 @@ sudo pip install twine ./tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.5) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=release_cpu_macos tensorflow/tools/pip_package:build_pip_package +bazel build --config=opt --config=v2 tensorflow/tools/pip_package:build_pip_package mkdir pip_pkg ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nightly_release.sh b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nightly_release.sh index d4cc8d7afac..3702ec97413 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py36_full/nightly_release.sh @@ -33,11 +33,13 @@ sudo pip install twine ./tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.6) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=release_cpu_macos tensorflow/tools/pip_package:build_pip_package +bazel build --config=opt --config=v2 tensorflow/tools/pip_package:build_pip_package mkdir pip_pkg ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nightly_release.sh b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nightly_release.sh index cd0f8a58ae6..eee97f6e2d2 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py37_full/nightly_release.sh @@ -33,11 +33,13 @@ sudo pip install twine ./tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.7) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=release_cpu_macos tensorflow/tools/pip_package:build_pip_package +bazel build --config=opt --config=v2 tensorflow/tools/pip_package:build_pip_package mkdir pip_pkg ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nightly_release.sh b/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nightly_release.sh index 11085b08a38..70773c1b597 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_py38_full/nightly_release.sh @@ -33,11 +33,13 @@ sudo pip install twine ./tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.8) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=release_cpu_macos tensorflow/tools/pip_package:build_pip_package +bazel build --config=opt --config=v2 tensorflow/tools/pip_package:build_pip_package mkdir pip_pkg ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag From 17c1b96ca3e6508a3282314ccb42f496ad352966 Mon Sep 17 00:00:00 2001 From: Yash Katariya Date: Tue, 14 Jul 2020 16:43:59 -0700 Subject: [PATCH 0432/2522] Fix the formatting of mathjax on tf.org PiperOrigin-RevId: 321261654 Change-Id: Ie53004a0b8e545f4f8ecea649334eff20e3e69c4 --- tensorflow/python/ops/math_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index b14b392e550..ee113b565cc 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -366,8 +366,8 @@ def abs(x, name=None): # pylint: disable=redefined-builtin Given a tensor `x` of complex numbers, this operation returns a tensor of type `float32` or `float64` that is the absolute value of each element in `x`. For - a complex number \\(a + bj\\), its absolute value is computed as \\(\sqrt{a^2 - + b^2}\\). For example: + a complex number \\(a + bj\\), its absolute value is computed as + \\(\sqrt{a^2 + b^2}\\). For example: >>> x = tf.constant([[-2.25 + 4.75j], [-3.25 + 5.75j]]) >>> tf.abs(x) From 0aa0c33c58228869e11e949bb2e03cfb4a8b28a1 Mon Sep 17 00:00:00 2001 From: Peng Wang Date: Tue, 14 Jul 2020 16:46:27 -0700 Subject: [PATCH 0433/2522] Removes run_deprecated_v1 in python/kernel_tests/random/stateless_random_ops_test.py PiperOrigin-RevId: 321262167 Change-Id: Ic3b863bcbb0eb692530389d2466548ec4109c434 --- tensorflow/python/kernel_tests/random/BUILD | 2 +- .../random/stateless_random_ops_test.py | 38 ++++++++++--------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/tensorflow/python/kernel_tests/random/BUILD b/tensorflow/python/kernel_tests/random/BUILD index 6e404b4cd5f..f7d031154e3 100644 --- a/tensorflow/python/kernel_tests/random/BUILD +++ b/tensorflow/python/kernel_tests/random/BUILD @@ -119,7 +119,7 @@ cuda_py_test( name = "stateless_random_ops_test", size = "medium", srcs = ["stateless_random_ops_test.py"], - shard_count = 2, + shard_count = 5, tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", diff --git a/tensorflow/python/kernel_tests/random/stateless_random_ops_test.py b/tensorflow/python/kernel_tests/random/stateless_random_ops_test.py index d7e50083deb..27b10ea2258 100644 --- a/tensorflow/python/kernel_tests/random/stateless_random_ops_test.py +++ b/tensorflow/python/kernel_tests/random/stateless_random_ops_test.py @@ -22,6 +22,7 @@ import functools from absl.testing import parameterized import numpy as np +from tensorflow.python.eager import context from tensorflow.python.eager import def_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -64,22 +65,33 @@ class StatelessOpsTest(test.TestCase, parameterized.TestCase): random_seed.set_random_seed(seed[0]) with test_util.use_gpu(): for stateless_op, stateful_op in cases: + if context.executing_eagerly(): + # Call set_random_seed in order to clear kernel cache, to prevent + # kernel reusing for the stateful op + random_seed.set_random_seed(seed[0]) stateful = stateful_op(seed=seed[1]) pure = stateless_op(seed=preseed) - self.assertAllEqual(self.evaluate(stateful), self.evaluate(pure)) + self.assertAllEqual(stateful, pure) def _test_determinism(self, cases): # Stateless values should be equal iff the seeds are equal (roughly) cases = tuple(cases) - with self.test_session(use_gpu=True): + seeds = [(x, y) for x in range(5) for y in range(5)] * 3 + with self.test_session(use_gpu=True), test_util.use_gpu(): for seed_type in [dtypes.int32, dtypes.int64]: - seed_t = array_ops.placeholder(seed_type, shape=[2]) - seeds = [(x, y) for x in range(5) for y in range(5)] * 3 for stateless_op, _ in cases: - pure = stateless_op(seed=seed_t) - values = [ - (seed, pure.eval(feed_dict={seed_t: seed})) for seed in seeds - ] + if context.executing_eagerly(): + values = [ + (seed, stateless_op(seed=constant_op.constant(seed, seed_type))) + for seed in seeds] + else: + # Have this branch because the above branch is too slow in graph + # mode + seed_t = array_ops.placeholder(seed_type, shape=[2]) + pure = stateless_op(seed=seed_t) + values = [ + (seed, pure.eval(feed_dict={seed_t: seed})) for seed in seeds + ] for s0, v0 in values: for s1, v1 in values: self.assertEqual(s0 == s1, np.all(v0 == v1)) @@ -155,54 +167,44 @@ class StatelessOpsTest(test.TestCase, parameterized.TestCase): functools.partial(random_ops.random_poisson, shape=(10,), **kwds)) @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') - @test_util.run_deprecated_v1 def testMatchFloat(self): self._test_match(self._float_cases()) @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') - @test_util.run_deprecated_v1 def testMatchInt(self): self._test_match(self._int_cases()) @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') - @test_util.run_deprecated_v1 def testMatchMultinomial(self): self._test_match(self._multinomial_cases()) @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') - @test_util.run_deprecated_v1 def testMatchGamma(self): self._test_match(self._gamma_cases()) @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') - @test_util.run_deprecated_v1 def testMatchPoisson(self): self._test_match(self._poisson_cases()) @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') - @test_util.run_deprecated_v1 def testDeterminismFloat(self): self._test_determinism( self._float_cases(shape_dtypes=(dtypes.int32, dtypes.int64))) @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') - @test_util.run_deprecated_v1 def testDeterminismInt(self): self._test_determinism( self._int_cases(shape_dtypes=(dtypes.int32, dtypes.int64))) @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') - @test_util.run_deprecated_v1 def testDeterminismMultinomial(self): self._test_determinism(self._multinomial_cases()) @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') - @test_util.run_deprecated_v1 def testDeterminismGamma(self): self._test_determinism(self._gamma_cases()) @test_util.disable_tfrt('tensorflow::DirectSession::Run crashes. b/156187396') - @test_util.run_deprecated_v1 def testDeterminismPoisson(self): self._test_determinism(self._poisson_cases()) From bfeba4d9ec9acc70453cf2c5b6dddb483934f4b3 Mon Sep 17 00:00:00 2001 From: Brian Zhao Date: Tue, 14 Jul 2020 16:48:14 -0700 Subject: [PATCH 0434/2522] Experimental implementation of saved model C API. This supports loading constants, unpartitioned variables, and tf.functions annotated with an input signature from a TF 2 saved model. See RFC 207: https://github.com/tensorflow/community/pull/207 Future CLs will flesh out some of the missing pieces (specifying a tag, loading models with resources/assets, batching tensor restores per device, etc). PiperOrigin-RevId: 321262527 Change-Id: I76dbe4617acd7bdbe5f093e6e22b328842a65780 --- .../c/experimental/saved_model/core/BUILD | 15 + .../saved_model/core/tf_saved_model_api.cc | 333 +++++++++++++++++- .../saved_model/core/tf_saved_model_api.h | 20 +- .../internal/saved_model_api_test.cc | 54 ++- .../tests/saved_model_api_test.cc | 6 +- 5 files changed, 406 insertions(+), 22 deletions(-) diff --git a/tensorflow/c/experimental/saved_model/core/BUILD b/tensorflow/c/experimental/saved_model/core/BUILD index 5931e229e28..38bdbee1fdc 100644 --- a/tensorflow/c/experimental/saved_model/core/BUILD +++ b/tensorflow/c/experimental/saved_model/core/BUILD @@ -113,8 +113,23 @@ cc_library( deps = [ ":concrete_function", ":saved_model_api", + ":saved_model_utils", + "//tensorflow/c:tensor_interface", "//tensorflow/c/eager:immediate_execution_context", + "//tensorflow/c/eager:immediate_execution_tensor_handle", + "//tensorflow/c/experimental/saved_model/core/ops:restore_ops", + "//tensorflow/c/experimental/saved_model/core/revived_types:constant", + "//tensorflow/c/experimental/saved_model/core/revived_types:tensorhandle_convertible", + "//tensorflow/c/experimental/saved_model/core/revived_types:tf_concrete_function", + "//tensorflow/c/experimental/saved_model/core/revived_types:variable", + "//tensorflow/cc/saved_model:bundle_v2", + "//tensorflow/cc/saved_model:constants", + "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/common_runtime/eager:tensor_handle", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", ], ) diff --git a/tensorflow/c/experimental/saved_model/core/tf_saved_model_api.cc b/tensorflow/c/experimental/saved_model/core/tf_saved_model_api.cc index 225ba1db9f4..c22f8d86174 100644 --- a/tensorflow/c/experimental/saved_model/core/tf_saved_model_api.cc +++ b/tensorflow/c/experimental/saved_model/core/tf_saved_model_api.cc @@ -15,47 +15,360 @@ limitations under the License. #include "tensorflow/c/experimental/saved_model/core/tf_saved_model_api.h" +#include #include +#include #include #include +#include "absl/algorithm/container.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" #include "absl/types/optional.h" #include "tensorflow/c/eager/immediate_execution_context.h" +#include "tensorflow/c/eager/immediate_execution_tensor_handle.h" #include "tensorflow/c/experimental/saved_model/core/concrete_function.h" +#include "tensorflow/c/experimental/saved_model/core/ops/restore_ops.h" +#include "tensorflow/c/experimental/saved_model/core/revived_types/constant.h" +#include "tensorflow/c/experimental/saved_model/core/revived_types/tensorhandle_convertible.h" +#include "tensorflow/c/experimental/saved_model/core/revived_types/tf_concrete_function.h" +#include "tensorflow/c/experimental/saved_model/core/revived_types/variable.h" +#include "tensorflow/c/experimental/saved_model/core/saved_model_utils.h" +#include "tensorflow/cc/saved_model/bundle_v2.h" +#include "tensorflow/cc/saved_model/constants.h" +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/function.pb.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/lib/hash/hash.h" +#include "tensorflow/core/platform/casts.h" #include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/path.h" +#include "tensorflow/core/platform/stringpiece.h" +#include "tensorflow/core/platform/tstring.h" +#include "tensorflow/core/protobuf/meta_graph.pb.h" +#include "tensorflow/core/protobuf/saved_model.pb.h" +#include "tensorflow/core/protobuf/saved_object_graph.pb.h" +#include "tensorflow/core/protobuf/trackable_object_graph.pb.h" namespace tensorflow { +// Maps from a FunctionDef's name to FunctionDef, for a given FunctionDefLibrary +using FunctionDefMap = + std::unordered_map; + +// Maps from a Nodedef's name to its corresponding AttrValues, for a given +// Graphdef +using NodeAttrMap = + std::unordered_map; + +// Maps from Node ID to an "Revived Object" implementing +// "TensorHandleConvertible" +using RevivedObjectMap = + std::unordered_map>; + +// Maps from a functiondef's name to the corresponding "TFConcreteFunction" +using ConcreteFunctionMap = + std::unordered_map>; + +namespace { + +Status ConstantFromSavedConstant( + ImmediateExecutionContext* ctx, + const tensorflow::SavedConstant& saved_constant, + const NodeAttrMap& node_attr_map, std::unique_ptr* output) { + const std::string& const_op_name = saved_constant.operation(); + const auto& node_name_and_attrs = node_attr_map.find(const_op_name); + if (node_name_and_attrs == node_attr_map.end()) { + return errors::FailedPrecondition( + "Unable to find Const operation with name'", const_op_name, + "' in SavedModel graphdef"); + } + const AttrValueMap* attrs = node_name_and_attrs->second; + const auto& attr_name_and_value = attrs->find("value"); + if (attr_name_and_value == attrs->end()) { + return errors::FailedPrecondition("Unable to find Const operation '", + const_op_name, "'s value attribute"); + } + const TensorProto& tensor_proto = attr_name_and_value->second.tensor(); + return internal::TensorProtoToConstant(ctx, tensor_proto, output); +} + +// Restores all non-function objects in the SavedModel's object graph. +// This function walks through the metagraph's saved object graph, and +// constructs revived versions of SavedVariable, SavedConstant, SavedAsset, and +// SavedResources. These are returned via the `out` parameter. +Status ReviveObjects( + const MetaGraphDef& metagraph, ImmediateExecutionContext* context, + std::unordered_map>* + revived_objects) { + // This is needed to restore "Constant" nodes by looking up their + // "Value" attribute. + NodeAttrMap node_attr_map = internal::NodeToAttrMap(metagraph.graph_def()); + + // Iterate through all the saved objects, restoring objects as we go. + // We don't recreate functions until all other objects have been created. + for (int i = 0; i < metagraph.object_graph_def().nodes_size(); ++i) { + const SavedObject& node = metagraph.object_graph_def().nodes(i); + if (node.kind_case() == SavedObject::kVariable) { + std::unique_ptr variable; + TF_RETURN_IF_ERROR( + internal::LoadSavedVariable(context, node.variable(), &variable)); + (*revived_objects)[i] = std::move(variable); + } else if (node.kind_case() == SavedObject::kConstant) { + std::unique_ptr constant; + TF_RETURN_IF_ERROR(ConstantFromSavedConstant(context, node.constant(), + node_attr_map, &constant)); + (*revived_objects)[i] = std::move(constant); + } else if (node.kind_case() == SavedObject::kAsset) { + // TODO(bmzhao): Implement Asset C++ class. This should be just recreating + // the full path to the asset file: + // https://github.com/tensorflow/tensorflow/blob/6a0bdbdb7c48a3491ae1277083ae3dafb4ab4d7a/tensorflow/python/saved_model/load.py#L395-L396 + // and storing it as a string tensor: + // https://github.com/tensorflow/tensorflow/blob/6a0bdbdb7c48a3491ae1277083ae3dafb4ab4d7a/tensorflow/python/training/tracking/tracking.py#L324-L325 + return errors::Unimplemented("SavedAsset loading is not implemented yet"); + } else if (node.kind_case() == SavedObject::kResource) { + // TODO(bmzhao): Figure out how resource loading works and implement it + return errors::Unimplemented( + "SavedResource loading is not implemented yet"); + } + } + return Status(); +} + +Status ReviveFunctions(const MetaGraphDef& metagraph, + const RevivedObjectMap& revived_objects, + ImmediateExecutionContext* context, + ConcreteFunctionMap* restored_functions) { + const FunctionDefMap function_def_map = + internal::FunctionNameToFunctionDefMap(metagraph.graph_def().library()); + + // Iterate through all objects, only examining functions. + for (const SavedObject& node : metagraph.object_graph_def().nodes()) { + if (node.kind_case() == SavedObject::kBareConcreteFunction) { + const std::string& function_name = + node.bare_concrete_function().concrete_function_name(); + + const SavedConcreteFunction& saved_concrete_function = + metagraph.object_graph_def().concrete_functions().at(function_name); + + const FunctionDef* function_def = function_def_map.at(function_name); + std::unique_ptr concrete_function; + TF_RETURN_IF_ERROR(internal::LoadTFConcreteFunction( + saved_concrete_function, function_def, revived_objects, context, + &concrete_function)); + (*restored_functions)[function_name] = std::move(concrete_function); + } else if (node.kind_case() == SavedObject::kFunction) { + // We only allow loading functions that have an annotated input signature, + // which means there is 1:1 correspondence between tf.function + // <=> SavedFunction <=> SavedConcreteFunction <=> FunctionDef. This is + // the same restriction that MLIR has: + // https://github.com/tensorflow/tensorflow/blob/1c064ab76064c58e54261b805027474885a1534d/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc#L2677-L2707 + const SavedFunction& saved_function = node.function(); + if (saved_function.concrete_functions_size() != 1) { + return errors::FailedPrecondition( + "Only tf.functions annotated with an input signature are supported " + "by SavedModelAPI. This means that there should only be a single " + "ConcreteFunction per tf.function"); + } + const std::string& function_name = saved_function.concrete_functions(0); + const SavedConcreteFunction& saved_concrete_function = + metagraph.object_graph_def().concrete_functions().at(function_name); + + const FunctionDef* function_def = function_def_map.at(function_name); + + std::unique_ptr concrete_function; + TF_RETURN_IF_ERROR(internal::LoadTFConcreteFunction( + saved_concrete_function, function_def, revived_objects, context, + &concrete_function)); + (*restored_functions)[function_name] = std::move(concrete_function); + } + } + return Status(); +} + +const TrackableObjectGraph::TrackableObject::SerializedTensor* +FindSerializedTensorInTrackable( + const TrackableObjectGraph::TrackableObject& trackable_object, + absl::string_view name) { + for (const auto& maybe_serialized_tensor : trackable_object.attributes()) { + if (maybe_serialized_tensor.name() == name) { + return &maybe_serialized_tensor; + } + } + return nullptr; +} + +// This function reads the Checkpoint embedded in the SavedModel, and calls the +// appropriate Restore ops on each of the variables. +// Note(bmzhao): Conceptually, objects that contain checkpointable state +// implement the "_gather_saveables_for_checkpoint" method +// https://github.com/tensorflow/tensorflow/blob/ddc1bbad3dfd4a089eb96014f26cc16664b1b2f8/tensorflow/python/training/tracking/base.py#L953-L983 +// which returns a dict of string key -> EITHER: +// 1. python callable (taking a checkpoint key) returning SaveableObject OR +// 2. variable (partitioned/resource/reference or otherwise) +// https://github.com/tensorflow/tensorflow/blob/ddc1bbad3dfd4a089eb96014f26cc16664b1b2f8/tensorflow/python/training/saving/saveable_object.py#L58. +// The string key becomes the "name" attribute of the SerializedTensor proto +// in the TrackableObjectGraph, +// https://github.com/tensorflow/tensorflow/blob/ddc1bbad3dfd4a089eb96014f26cc16664b1b2f8/tensorflow/core/protobuf/trackable_object_graph.proto#L26 +// And the checkpoint_key is a globally unique string derived from this name: +// https://github.com/tensorflow/tensorflow/blob/842df9e6b516e42578a8d23b35d41176b9a6cf1d/tensorflow/python/training/tracking/graph_view.py#L236-L241 +// SaveableObjects model the information needed to pass to the SaveV2/RestoreV2 +// ops via their SaveSpec members +// https://github.com/tensorflow/tensorflow/blob/ddc1bbad3dfd4a089eb96014f26cc16664b1b2f8/tensorflow/python/training/saving/saveable_object.py#L21, +// which contain the "real" checkpoint keys into the TensorBundle SSTable. +// They also contain the logic needed to take the restored tensors from +// RestoreV2 and load them back into the "object" they came from via their +// overridden "restore" method: +// https://github.com/tensorflow/tensorflow/blob/ddc1bbad3dfd4a089eb96014f26cc16664b1b2f8/tensorflow/python/training/saving/saveable_object.py#L85 +Status RestoreCheckpoint(SavedModelV2Bundle* bundle, + const RevivedObjectMap& revived_objects, + const std::string& directory, + ImmediateExecutionContext* context) { + // TODO(bmzhao): Batch up all the restores into a single restore op per + // device, following logic in MultiDeviceSaver. + TF_RETURN_IF_ERROR(bundle->VisitObjectsToRestore( + [&revived_objects, &directory, context, bundle]( + int node, const TrackableObjectGraph::TrackableObject& trackable) { + if (bundle->saved_object_graph().nodes(node).kind_case() != + SavedObject::kVariable) { + // TODO(bmzhao): This requires using the newly added Save/Restore + // functions from + // https://github.com/tensorflow/tensorflow/commit/df6b21c13c82b5d0981642cfe18f10e60f78ea5c + return errors::Unimplemented( + "Restoring non-variable objects has not been implemented yet. "); + } + + Variable* variable = + down_cast(revived_objects.at(node).get()); + + // Restore the tensor's value from the checkpoint + const TrackableObjectGraph::TrackableObject::SerializedTensor* + attribute = + FindSerializedTensorInTrackable(trackable, "VARIABLE_VALUE"); + if (attribute == nullptr) { + return errors::FailedPrecondition( + "Could not find SerializedTensor with name VARIABLE_VALUE for " + "saved variable"); + } + + const std::string& checkpoint_key = attribute->checkpoint_key(); + std::string variables_path_prefix = + io::JoinPath(directory, kSavedModelVariablesDirectory, + kSavedModelVariablesFilename); + ImmediateTensorHandlePtr restored_output; + TF_RETURN_IF_ERROR(internal::SingleRestore( + context, variables_path_prefix, checkpoint_key, variable->dtype(), + &restored_output)); + + // Assign the restored tensor's value to the variable + return variable->Assign(restored_output.get()); + })); + + return Status(); +} + +} // namespace + Status TFSavedModelAPI::GetFunction(const std::string& function_path, ConcreteFunction** function) { - // TODO(bmzhao): Add support for retrieving a function. - return errors::Unimplemented( - "Retrieving functions is unimplemented currently"); + const SavedObject* object = + internal::FindNodeAtPath(function_path, bundle_.saved_object_graph()); + if (object == nullptr) { + return errors::NotFound("No saved object found at path ", function_path); + } + + if (object->kind_case() == SavedObject::kBareConcreteFunction) { + *function = + concrete_functions_ + .at(object->bare_concrete_function().concrete_function_name()) + .get(); + } else if (object->kind_case() == SavedObject::kFunction) { + *function = + concrete_functions_.at(object->function().concrete_functions(0)).get(); + } else { + return errors::InvalidArgument(function_path, + " is not a path to a Function."); + } + + return Status(); } Status TFSavedModelAPI::GetSignatureDefFunction( const std::string& signature_def_key, ConcreteFunction** function) { // TODO(bmzhao): Add support for retrieving a signaturedef function. return errors::Unimplemented( - "Retrieving functions is unimplemented currently"); + "Retrieving SignatureDef functions is unimplemented currently"); } std::vector TFSavedModelAPI::ListFunctions() { std::vector result; - result.reserve(functions_.size()); - for (ConcreteFunction& function : functions_) { - result.push_back(&function); + result.reserve(concrete_functions_.size()); + for (auto& index_and_function : concrete_functions_) { + result.push_back(index_and_function.second.get()); } return result; } +TFSavedModelAPI::TFSavedModelAPI( + const std::string& directory, SavedModelV2Bundle bundle, + std::unordered_map> + revived_objects, + std::unordered_map> + concrete_functions) + : directory_(directory), + bundle_(std::move(bundle)), + revived_objects_(std::move(revived_objects)), + concrete_functions_(std::move(concrete_functions)) {} + Status TFSavedModelAPI::Load( const std::string& directory, const absl::optional>& tags, ImmediateExecutionContext* context, std::unique_ptr* out) { - // TODO(bmzhao): Add support for loading a TFSavedModelImpl. - return errors::Unimplemented( - "TFSavedModelAPIImpl loading is unimplemented currently"); + // TODO(bmzhao): Add support for loading a TF1 SavedModel. + if (tags) { + return errors::Unimplemented( + "Loading saved models with explicit tags will be supported in the " + "future"); + } + + SavedModelV2Bundle bundle; + TF_RETURN_IF_ERROR(SavedModelV2Bundle::Load(directory, &bundle)); + + // TODO(bmzhao): Mangle loaded function names so that different + // models loaded in the same runtime Context don't clobber eachother. + // This occurs in python here: + // https://github.com/tensorflow/tensorflow/blob/285b5fa15405c5e2c084080f52a1818be8648079/tensorflow/python/saved_model/function_deserialization.py#L438-L454 + + RevivedObjectMap revived_objects; + TF_RETURN_IF_ERROR( + ReviveObjects(bundle.meta_graph_def(), context, &revived_objects)); + + // TODO(bmzhao): When we later add support for loading resources, we need to + // handle the case where materializing a function's captures requires invoking + // other functions. This occurs when retrieving the resource handle for a + // TrackableResource: + // https://github.com/tensorflow/tensorflow/blob/f19c6efb4a8ba60e2492eedc98ef5375abb39dc7/tensorflow/python/saved_model/load.py#L240 + // https://github.com/tensorflow/tensorflow/blob/f19c6efb4a8ba60e2492eedc98ef5375abb39dc7/tensorflow/python/training/tracking/tracking.py#L233 + // This requires restoring functions in a topological sort order by capture + // dependencies. + ConcreteFunctionMap function_map; + TF_RETURN_IF_ERROR(ReviveFunctions(bundle.meta_graph_def(), revived_objects, + context, &function_map)); + + TF_RETURN_IF_ERROR( + RestoreCheckpoint(&bundle, revived_objects, directory, context)); + + out->reset(new TFSavedModelAPI(directory, std::move(bundle), + std::move(revived_objects), + std::move(function_map))); + return Status(); } } // namespace tensorflow diff --git a/tensorflow/c/experimental/saved_model/core/tf_saved_model_api.h b/tensorflow/c/experimental/saved_model/core/tf_saved_model_api.h index cc631a9f3ae..fc8e738e86f 100644 --- a/tensorflow/c/experimental/saved_model/core/tf_saved_model_api.h +++ b/tensorflow/c/experimental/saved_model/core/tf_saved_model_api.h @@ -16,14 +16,19 @@ limitations under the License. #ifndef TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_CORE_TF_SAVED_MODEL_IMPL_H_ #define TENSORFLOW_C_EXPERIMENTAL_SAVED_MODEL_CORE_TF_SAVED_MODEL_IMPL_H_ +#include #include +#include #include #include #include "absl/types/optional.h" #include "tensorflow/c/eager/immediate_execution_context.h" #include "tensorflow/c/experimental/saved_model/core/concrete_function.h" +#include "tensorflow/c/experimental/saved_model/core/revived_types/tensorhandle_convertible.h" +#include "tensorflow/c/experimental/saved_model/core/revived_types/tf_concrete_function.h" #include "tensorflow/c/experimental/saved_model/core/saved_model_api.h" +#include "tensorflow/cc/saved_model/bundle_v2.h" #include "tensorflow/core/platform/status.h" namespace tensorflow { @@ -63,8 +68,19 @@ class TFSavedModelAPI : public SavedModelAPI { ~TFSavedModelAPI() override = default; private: - TFSavedModelAPI() = default; - std::vector functions_; + TFSavedModelAPI( + const std::string& directory, SavedModelV2Bundle bundle, + std::unordered_map> + revived_objects, + std::unordered_map> + concrete_functions); + + std::string directory_; + SavedModelV2Bundle bundle_; + std::unordered_map> + revived_objects_; + std::unordered_map> + concrete_functions_; }; } // namespace tensorflow diff --git a/tensorflow/c/experimental/saved_model/internal/saved_model_api_test.cc b/tensorflow/c/experimental/saved_model/internal/saved_model_api_test.cc index aa0b00ab847..3d490fe7e08 100644 --- a/tensorflow/c/experimental/saved_model/internal/saved_model_api_test.cc +++ b/tensorflow/c/experimental/saved_model/internal/saved_model_api_test.cc @@ -16,10 +16,15 @@ limitations under the License. #include "tensorflow/c/experimental/saved_model/public/saved_model_api.h" #include +#include #include "tensorflow/c/eager/c_api.h" #include "tensorflow/c/eager/c_api_experimental.h" +#include "tensorflow/c/eager/c_api_test_util.h" +#include "tensorflow/c/experimental/saved_model/public/concrete_function.h" +#include "tensorflow/c/experimental/saved_model/public/tensorhandle_list.h" #include "tensorflow/c/tf_status.h" +#include "tensorflow/c/tf_tensor.h" #include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/platform/stringpiece.h" #include "tensorflow/core/platform/test.h" @@ -92,12 +97,51 @@ TEST_P(CSavedModelAPITest, LoadsSavedModel) { TF_SavedModel* saved_model = TF_LoadSavedModel(model_dir.c_str(), ctx, status); - // TODO(bmzhao): Change this to expect TF_OK when loading is implemented. - // That unblocks writing other tests that require a TF_SavedModel*, - // like loading a ConcreteFunction. This test at least checks that the - // C API builds and can be minimally run. - EXPECT_EQ(TF_GetCode(status), TF_UNIMPLEMENTED); + EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); + TF_ConcreteFunction* compute_fn = + TF_GetSavedModelConcreteFunction(saved_model, "compute", status); + EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); + TFE_Op* compute_fn_op = TF_ConcreteFunctionGetCallOp(compute_fn, status); + EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); + + const TF_TensorHandleList* captures = + TF_ConcreteFunctionGetCaptures(compute_fn); + + // TODO(bmzhao): Finish API on FunctionMetadata args, so we know how many + // inputs + outputs a function has. + std::vector compute_fn_inputs; + TFE_TensorHandle* input_a = TestScalarTensorHandle(ctx, 2.0f); + TFE_TensorHandle* input_b = TestScalarTensorHandle(ctx, 1.0f); + compute_fn_inputs.reserve(2 + TF_TensorHandleListSize(captures)); + compute_fn_inputs.push_back(input_a); + compute_fn_inputs.push_back(input_b); + for (int i = 0; i < TF_TensorHandleListSize(captures); ++i) { + compute_fn_inputs.push_back(TF_TensorHandleListGet(captures, i)); + } + TFE_OpAddInputList(compute_fn_op, compute_fn_inputs.data(), + compute_fn_inputs.size(), status); + EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); + + TFE_TensorHandle* compute_fn_outputs[1] = {nullptr}; + int num_retvals = 1; + + TFE_Execute(compute_fn_op, &compute_fn_outputs[0], &num_retvals, status); + EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); + + TF_Tensor* result = TFE_TensorHandleResolve(compute_fn_outputs[0], status); + EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status); + + EXPECT_EQ(TF_NumDims(result), 0); + float output_value = *static_cast(TF_TensorData(result)); + // (1 + 2) * (2 + 1) / 3 + 5 should be 8 + EXPECT_FLOAT_EQ(output_value, 8.0); + + TF_DeleteTensor(result); + TFE_DeleteTensorHandle(compute_fn_outputs[0]); + TFE_DeleteTensorHandle(input_a); + TFE_DeleteTensorHandle(input_b); + TFE_DeleteOp(compute_fn_op); TF_DeleteSavedModel(saved_model); TF_DeleteStatus(status); TFE_DeleteContext(ctx); diff --git a/tensorflow/cc/saved_model/experimental/tests/saved_model_api_test.cc b/tensorflow/cc/saved_model/experimental/tests/saved_model_api_test.cc index ad80b74f1d5..cf5f742538e 100644 --- a/tensorflow/cc/saved_model/experimental/tests/saved_model_api_test.cc +++ b/tensorflow/cc/saved_model/experimental/tests/saved_model_api_test.cc @@ -86,11 +86,7 @@ TEST_P(CPPSavedModelAPITest, LoadsSavedModel) { std::unique_ptr model = SavedModelAPI::Load(model_dir, *runtime, &status); - // TODO(bmzhao): Change this to expect TF_OK when loading is implemented. - // That unblocks writing other tests that require a TF_SavedModel*, - // like loading a ConcreteFunction. This test at least checks that the - // C API builds and can be minimally run. - EXPECT_EQ(status.code(), TF_UNIMPLEMENTED) << status.message(); + EXPECT_EQ(status.code(), TF_OK) << status.message(); } INSTANTIATE_TEST_SUITE_P(RuntimeAgnosticCPPSavedModelTests, From a41366eee035777f38ab1f06e7cba12b1b533a9b Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Wed, 15 Jul 2020 00:12:54 +0000 Subject: [PATCH 0435/2522] moved tf_shape_utils under kernels --- tensorflow/c/BUILD | 26 ----------- tensorflow/c/kernels/BUILD | 34 ++++++++++++++- tensorflow/c/kernels/summary_op.cc | 6 +-- .../c/{ => kernels}/tensor_shape_utils.cc | 4 +- .../c/{ => kernels}/tensor_shape_utils.h | 9 +++- .../{ => kernels}/tensor_shape_utils_test.cc | 5 +-- tensorflow/c/tf_shape_utils_test.cc | 43 ------------------- 7 files changed, 47 insertions(+), 80 deletions(-) rename tensorflow/c/{ => kernels}/tensor_shape_utils.cc (92%) rename tensorflow/c/{ => kernels}/tensor_shape_utils.h (81%) rename tensorflow/c/{ => kernels}/tensor_shape_utils_test.cc (88%) delete mode 100644 tensorflow/c/tf_shape_utils_test.cc diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD index 65bad3b5de9..410fc22069f 100644 --- a/tensorflow/c/BUILD +++ b/tensorflow/c/BUILD @@ -513,32 +513,6 @@ tf_cuda_library( alwayslink = 1, ) -tf_cuda_library( - name = "tensor_shape_utils", - srcs = [ - "tensor_shape_utils.cc", - ], - hdrs = [ - "tensor_shape_utils.h", - ], - deps = [ - ":tf_tensor", - ], - copts = tf_copts(), - visibility = ["//visibility:public"], -) - -tf_cc_test( - name = "tensor_shape_utils_test", - srcs = ["tensor_shape_utils_test.cc"], - deps = [ - ":tensor_shape_utils", - "//tensorflow/core:lib", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - ], -) - # ----------------------------------------------------------------------------- # Tests diff --git a/tensorflow/c/kernels/BUILD b/tensorflow/c/kernels/BUILD index 77fbd869105..e8354a8941d 100644 --- a/tensorflow/c/kernels/BUILD +++ b/tensorflow/c/kernels/BUILD @@ -5,6 +5,11 @@ load( "tf_kernel_library", ) +load( + "//tensorflow/core/platform:rules_cc.bzl", + "cc_library" +) + package( default_visibility = ["//visibility:public"], licenses = ["notice"], # Apache 2.0 @@ -28,10 +33,10 @@ tf_kernel_library( name = "summary_op", prefix = "summary_op", deps = [ + "//tensorflow/c/kernels:tensor_shape_utils", "//tensorflow/c:kernels", "//tensorflow/c:tf_tensor", - "//tensorflow/c:tensor_shape_utils", - "//tensorflow/core:framework", + "//tensorflow/core:framework" ], ) @@ -79,6 +84,31 @@ tf_cc_test( ], ) +cc_library( + name = "tensor_shape_utils", + srcs = [ + "tensor_shape_utils.cc", + ], + hdrs = [ + "tensor_shape_utils.h", + ], + deps = [ + "//tensorflow/c:tf_tensor", + ], + visibility = ["//visibility:public"], +) + +tf_cc_test( + name = "tensor_shape_utils_test", + srcs = ["tensor_shape_utils_test.cc"], + deps = [ + ":tensor_shape_utils", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + # Changes to the Android srcs here should be replicated in # tensorflow/contrib/makefile/tf_op_files.txt. # diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index 10b46284814..87418c6ccea 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -18,7 +18,7 @@ limitations under the License. #include "tensorflow/c/kernels.h" #include "tensorflow/c/tf_tensor.h" -#include "tensorflow/c/tensor_shape_utils.h" +#include "tensorflow/c/kernels/tensor_shape_utils.h" #include "tensorflow/core/framework/selective_registration.h" #include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/framework/types.h" @@ -68,8 +68,8 @@ static void ScalarSummaryOp_Compute(void* kernel, TF_OpKernelContext* ctx) { if (!IsSameSize(params.tags, params.values)) { std::ostringstream err; err << "tags and values not the same shape: " - << TF_ShapeDebugString(params.tags) << " != " - << TF_ShapeDebugString(params.values) + << ShapeDebugString(params.tags) << " != " + << ShapeDebugString(params.values) << SingleTag(params.tags); TF_SetStatus(params.status, TF_INVALID_ARGUMENT, err.str().c_str()); } diff --git a/tensorflow/c/tensor_shape_utils.cc b/tensorflow/c/kernels/tensor_shape_utils.cc similarity index 92% rename from tensorflow/c/tensor_shape_utils.cc rename to tensorflow/c/kernels/tensor_shape_utils.cc index c38eb95724c..062cdbd049a 100644 --- a/tensorflow/c/tensor_shape_utils.cc +++ b/tensorflow/c/kernels/tensor_shape_utils.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/c/tensor_shape_utils.h" +#include "tensorflow/c/kernels/tensor_shape_utils.h" #include @@ -22,7 +22,7 @@ limitations under the License. #include "tensorflow/core/platform/strcat.h" #include "tensorflow/core/platform/logging.h" -std::string TF_ShapeDebugString(TF_Tensor* tensor) { +std::string ShapeDebugString(TF_Tensor* tensor) { // A TF_Tensor cannot have an unknown rank CHECK_GE(TF_NumDims(tensor), 0); tensorflow::string s = "["; diff --git a/tensorflow/c/tensor_shape_utils.h b/tensorflow/c/kernels/tensor_shape_utils.h similarity index 81% rename from tensorflow/c/tensor_shape_utils.h rename to tensorflow/c/kernels/tensor_shape_utils.h index cde929f3f4e..a62f460998b 100644 --- a/tensorflow/c/tensor_shape_utils.h +++ b/tensorflow/c/kernels/tensor_shape_utils.h @@ -13,6 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +// This file contains shape utilities to be used by kernels and is not part of +// the C API. As such, it is subject to change at any time. + #ifndef TENSORFLOW_C_TENSOR_SHAPE_UTILS_H_ #define TENSORFLOW_C_TENSOR_SHAPE_UTILS_H_ @@ -20,12 +23,16 @@ limitations under the License. #include "tensorflow/c/tf_tensor.h" +namespace tensorflow { + // The following are utils for the shape of a TF_Tensor type. // These functions may later be subsumed by the methods for a // TF_TensorShape type // Returns a string representation of the TF_Tensor -std::string TF_ShapeDebugString(TF_Tensor* tensor); +std::string ShapeDebugString(TF_Tensor* tensor); + +} // namespace tensorflow #endif // TENSORFLOW_C_TENSOR_SHAPE_UTILS_H_ diff --git a/tensorflow/c/tensor_shape_utils_test.cc b/tensorflow/c/kernels/tensor_shape_utils_test.cc similarity index 88% rename from tensorflow/c/tensor_shape_utils_test.cc rename to tensorflow/c/kernels/tensor_shape_utils_test.cc index ef1fd1e839f..25620838437 100644 --- a/tensorflow/c/tensor_shape_utils_test.cc +++ b/tensorflow/c/kernels/tensor_shape_utils_test.cc @@ -13,14 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/c/tensor_shape_utils.h" #include "tensorflow/c/tf_tensor_internal.h" +#include "tensorflow/c/kernels/tensor_shape_utils.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/framework/partial_tensor_shape.h" namespace tensorflow { @@ -30,7 +29,7 @@ void TestShapeMatch(T shape) { Status status; TF_Tensor* tf_tensor = TF_TensorFromTensor(tensor, &status); ASSERT_TRUE(status.ok()) << status.ToString(); - ASSERT_EQ(tensor.shape().DebugString(), TF_ShapeDebugString(tf_tensor)); + ASSERT_EQ(tensor.shape().DebugString(), ShapeDebugString(tf_tensor)); } TEST(ShapeDebugString, RegularShape) { diff --git a/tensorflow/c/tf_shape_utils_test.cc b/tensorflow/c/tf_shape_utils_test.cc deleted file mode 100644 index 49cf042c5c0..00000000000 --- a/tensorflow/c/tf_shape_utils_test.cc +++ /dev/null @@ -1,43 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/c/tensor_shape_utils.h" -#include "tensorflow/c/tf_tensor_internal.h" -#include "tensorflow/core/platform/test.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/platform/types.h" -#include "tensorflow/core/lib/core/status.h" - -namespace tensorflow { - -void TestShapeMatch(TensorShape shape) { - Tensor tensor(DT_FLOAT, shape); - Status status; - TF_Tensor* tf_tensor = TF_TensorFromTensor(tensor. &status); - ASSERT_EQ(status) - ASSERT_EQ(tensor.shape.DebugString(), TF_ShapeDebugString(tf_tensor)); -} - -TEST(ShapeDebugString, RegularShape) { - TestShapeMatch(TensorShape({5, 4, 7})); -} - -TEST(ShapeDebugString, ShapeWithUnknownDimension) { - TestShapeMatch(TensorShape({5, -1, 7})); -} - - -} // namespace tensorflow From d42df65885a8bd4a1850e2575561e03c7c73703b Mon Sep 17 00:00:00 2001 From: Yash Katariya Date: Tue, 14 Jul 2020 17:08:40 -0700 Subject: [PATCH 0436/2522] Add utilities so as to generate pages on tf.org for type aliases. The `sys.version_info` guard will make sure it works with all versions of python that TF supports. The API docs will be generated in kokoro in python 3.7. PiperOrigin-RevId: 321266183 Change-Id: I90178f93227e932ff0dc464a4293c399a99d0b9f --- tensorflow/python/types/BUILD | 13 ++++++ tensorflow/python/types/core.py | 50 +++++++++++++----------- tensorflow/python/types/doc_typealias.py | 35 +++++++++++++++++ 3 files changed, 76 insertions(+), 22 deletions(-) create mode 100644 tensorflow/python/types/doc_typealias.py diff --git a/tensorflow/python/types/BUILD b/tensorflow/python/types/BUILD index 1d9d87903e0..5f3f4fd0e31 100644 --- a/tensorflow/python/types/BUILD +++ b/tensorflow/python/types/BUILD @@ -32,7 +32,20 @@ py_strict_library( "//tensorflow:types_whitelist", ], deps = [ + ":doc_typealias", "//tensorflow/python:tf_export", "//third_party/py/numpy", ], ) + +py_strict_library( + name = "doc_typealias", + srcs = [ + "doc_typealias.py", + ], + srcs_version = "PY2AND3", + visibility = [ + "//tensorflow:__subpackages__", + ], + deps = [], +) diff --git a/tensorflow/python/types/core.py b/tensorflow/python/types/core.py index cfb6364a5e5..dcda7b68271 100644 --- a/tensorflow/python/types/core.py +++ b/tensorflow/python/types/core.py @@ -18,9 +18,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import textwrap + from typing import Union import numpy as np +from tensorflow.python.types import doc_typealias from tensorflow.python.util.tf_export import tf_export # TODO(mdan): Consider adding ABC once the dependence on isinstance is reduced. @@ -64,29 +67,32 @@ class Value(Tensor): pass -# TODO(rahulkamat): Add missing types that are convertible to tensor -# A `Union` type which can be used to denote "Tensor or other values that -# TensorFlow implicitly converts to Tensor". For example, it includes -# `list` and `ndarray`. -# -# This union will contain `tf.Tensor` and all types which can be successfully -# converted to a `tf.Tensor` by `tf.convert_to_tensor`. -# -# This definition may be used in user code. Additional types may be added in the -# future as more input types are supported. -# -# Example: -# -# def foo(tensor_like: TensorLike): -# pass -# -# This definition passes static type verification for: -# -# foo(tf.constant([1, 2, 3])) -# foo([1, 2, 3]) -# foo(np.array([1, 2, 3])) -# +# TODO(rahulkamat): Add missing types that are convertible to Tensor. TensorLike = Union[Tensor, int, float, bool, str, complex, tuple, list, np.ndarray] +doc_typealias.document( + obj=TensorLike, + doc=textwrap.dedent("""\ + Union of all types that can be converted to a `tf.Tensor` by `tf.convert_to_tensor`. + + This definition may be used in user code. Additional types may be added + in the future as more input types are supported. + + # Example: + + ``` + def foo(x: TensorLike): + pass + ``` + + This definition passes static type verification for: + + ``` + foo(tf.constant([1, 2, 3])) + foo([1, 2, 3]) + foo(np.array([1, 2, 3])) + ``` + """), +) tf_export("types.experimental.TensorLike").export_constant( __name__, "TensorLike") diff --git a/tensorflow/python/types/doc_typealias.py b/tensorflow/python/types/doc_typealias.py new file mode 100644 index 00000000000..93dae5ed2b5 --- /dev/null +++ b/tensorflow/python/types/doc_typealias.py @@ -0,0 +1,35 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Helper functions to add documentation to type aliases.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys + + +def document(obj, doc): + """Adds a docstring to typealias by overriding the `__doc__` attribute. + + Note: Overriding `__doc__` is only possible after python 3.7. + + Args: + obj: Typealias object that needs to be documented. + doc: Docstring of the typealias. It should follow the standard pystyle + docstring rules. + """ + if sys.version_info >= (3, 7): + obj.__doc__ = doc From a574b291479712e8fe60a9b029a8b30566e31749 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Wed, 15 Jul 2020 00:20:37 +0000 Subject: [PATCH 0437/2522] added namespace tensorflow --- tensorflow/c/kernels/summary_op.cc | 4 ++-- tensorflow/c/kernels/tensor_shape_utils.cc | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index 87418c6ccea..7500c3046e1 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -68,8 +68,8 @@ static void ScalarSummaryOp_Compute(void* kernel, TF_OpKernelContext* ctx) { if (!IsSameSize(params.tags, params.values)) { std::ostringstream err; err << "tags and values not the same shape: " - << ShapeDebugString(params.tags) << " != " - << ShapeDebugString(params.values) + << tensorflow::ShapeDebugString(params.tags) << " != " + << tensorflow::ShapeDebugString(params.values) << SingleTag(params.tags); TF_SetStatus(params.status, TF_INVALID_ARGUMENT, err.str().c_str()); } diff --git a/tensorflow/c/kernels/tensor_shape_utils.cc b/tensorflow/c/kernels/tensor_shape_utils.cc index 062cdbd049a..b3cba8cb99f 100644 --- a/tensorflow/c/kernels/tensor_shape_utils.cc +++ b/tensorflow/c/kernels/tensor_shape_utils.cc @@ -22,6 +22,8 @@ limitations under the License. #include "tensorflow/core/platform/strcat.h" #include "tensorflow/core/platform/logging.h" +namespace tensorflow { + std::string ShapeDebugString(TF_Tensor* tensor) { // A TF_Tensor cannot have an unknown rank CHECK_GE(TF_NumDims(tensor), 0); @@ -35,4 +37,5 @@ std::string ShapeDebugString(TF_Tensor* tensor) { } tensorflow::strings::StrAppend(&s, "]"); return s; -} \ No newline at end of file +} +} // namespace tensorflow \ No newline at end of file From 47150b58d33ac9d2475dd9f38789cf47c5883228 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Tue, 14 Jul 2020 17:30:52 -0700 Subject: [PATCH 0438/2522] Migrate the Linux nightly release scripts to use the new bazelrc configs. PiperOrigin-RevId: 321269423 Change-Id: Ib982abec3bc13b5af158b8bc08feb67169740f5e --- .../ubuntu_16/cpu_py35_full/nightly_release.sh | 9 ++++++++- .../ubuntu_16/cpu_py36_full/nightly_release.sh | 9 ++++++++- .../ubuntu_16/cpu_py37_full/nightly_release.sh | 9 ++++++++- .../ubuntu_16/cpu_py38_full/nightly_release.sh | 9 ++++++++- .../ubuntu_16/gpu_py35_full/nightly_release.sh | 14 +++++++++++++- .../ubuntu_16/gpu_py36_full/nightly_release.sh | 14 +++++++++++++- .../ubuntu_16/gpu_py37_full/nightly_release.sh | 14 +++++++++++++- .../ubuntu_16/gpu_py38_full/nightly_release.sh | 14 +++++++++++++- 8 files changed, 84 insertions(+), 8 deletions(-) diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh index 690bfe219aa..ba1861b221e 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py35_full/nightly_release.sh @@ -27,11 +27,18 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.5) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=release_cpu_linux tensorflow/tools/pip_package:build_pip_package +bazel build --config=opt --config=v2 \ + --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ + tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nightly_release.sh index bd686959209..2b770867099 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nightly_release.sh @@ -27,11 +27,18 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.6) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=release_cpu_linux tensorflow/tools/pip_package:build_pip_package +bazel build --config=opt --config=v2 \ + --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ + tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nightly_release.sh index 62c0439e4b0..25e59a5b096 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py37_full/nightly_release.sh @@ -27,11 +27,18 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.7) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=release_cpu_linux tensorflow/tools/pip_package:build_pip_package +bazel build --config=opt --config=v2 \ + --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ + tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/nightly_release.sh index 86add0707ba..e82064f7221 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py38_full/nightly_release.sh @@ -27,11 +27,18 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.8) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=release_cpu_linux tensorflow/tools/pip_package:build_pip_package +bazel build --config=opt --config=v2 \ + --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ + tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh index addfc59818e..2ed5c014c65 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh @@ -25,11 +25,23 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=1 +export TF_CUDA_VERSION=10 +export TF_CUDNN_VERSION=7 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 +export TF_NEED_TENSORRT=1 +export TENSORRT_INSTALL_PATH=/usr/local/tensorrt +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.5) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=release_gpu_linux tensorflow/tools/pip_package:build_pip_package +bazel build --config=opt --config=v2 \ + --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ + tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --nightly_flag ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --gpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh index c6fb6d469b1..87b2e52d88a 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh @@ -25,11 +25,23 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=1 +export TF_CUDA_VERSION=10 +export TF_CUDNN_VERSION=7 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 +export TF_NEED_TENSORRT=1 +export TENSORRT_INSTALL_PATH=/usr/local/tensorrt +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.6) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=release_gpu_linux tensorflow/tools/pip_package:build_pip_package +bazel build --config=opt --config=v2 \ + --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ + tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --nightly_flag ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --gpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh index 6e900d7dba8..0436ec32643 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh @@ -25,11 +25,23 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=1 +export TF_CUDA_VERSION=10 +export TF_CUDNN_VERSION=7 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 +export TF_NEED_TENSORRT=1 +export TENSORRT_INSTALL_PATH=/usr/local/tensorrt +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.7) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=release_gpu_linux tensorflow/tools/pip_package:build_pip_package +bazel build --config=opt --config=v2 \ + --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ + tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --nightly_flag ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --gpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh index 9b968c4c3d6..783785db7f7 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh @@ -27,11 +27,23 @@ update_bazel_linux python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=1 +export TF_CUDA_VERSION=10 +export TF_CUDNN_VERSION=7 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 +export TF_NEED_TENSORRT=1 +export TENSORRT_INSTALL_PATH=/usr/local/tensorrt +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.8) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=release_gpu_linux tensorflow/tools/pip_package:build_pip_package +bazel build --config=opt --config=v2 \ + --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ + tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --nightly_flag ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --gpu --nightly_flag From 33ecf41b885546def3d218a773c54e80319a4e39 Mon Sep 17 00:00:00 2001 From: YoungSeok Yoon Date: Tue, 14 Jul 2020 17:39:47 -0700 Subject: [PATCH 0439/2522] Check if iOS support is enabled before building the benchmark framework PiperOrigin-RevId: 321270672 Change-Id: Ia1910a672c2864372696c13119d9de7c4cdc0ea6 --- .../ios/build_benchmark_framework.sh | 43 +++++++++++++------ 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/tensorflow/lite/tools/benchmark/ios/build_benchmark_framework.sh b/tensorflow/lite/tools/benchmark/ios/build_benchmark_framework.sh index ed1b3dcef21..cedf043df57 100755 --- a/tensorflow/lite/tools/benchmark/ios/build_benchmark_framework.sh +++ b/tensorflow/lite/tools/benchmark/ios/build_benchmark_framework.sh @@ -15,14 +15,13 @@ # ============================================================================== set -e -set -x -WORKSPACE_ROOT=$(bazel info workspace) +WORKSPACE_ROOT=$(bazel info workspace 2> /dev/null) BENCHMARK_DIR=tensorflow/lite/tools/benchmark DEST_DIR="${BENCHMARK_DIR}/ios/TFLiteBenchmark/TFLiteBenchmark/Frameworks" FRAMEWORK_TARGET=TensorFlowLiteBenchmarkC_framework -usage() { +function usage() { echo "Usage: $(basename "$0") [-p]" echo "-p enable profiling" exit 1 @@ -37,19 +36,35 @@ while getopts "p" opt_name; do done shift $(($OPTIND - 1)) -pushd "${WORKSPACE_ROOT}" +function check_ios_configured() { + if [ ! -f "${WORKSPACE_ROOT}/${BENCHMARK_DIR}/experimental/ios/BUILD" ]; then + echo "ERROR: Benchmark framework BUILD file not found." + echo "Please enable iOS support by running the \"./configure\" script" \ + "from the workspace root." + exit 1 + fi +} + +function build_framework() { + set -x + pushd "${WORKSPACE_ROOT}" # Build the framework. -bazel build --config=ios_fat -c opt ${PROFILING_ARGS} \ - "//${BENCHMARK_DIR}/experimental/ios:${FRAMEWORK_TARGET}" + bazel build --config=ios_fat -c opt ${PROFILING_ARGS} \ + "//${BENCHMARK_DIR}/experimental/ios:${FRAMEWORK_TARGET}" # Copy the framework into the destination and unzip. -mkdir -p "${DEST_DIR}" -cp -f "bazel-bin/${BENCHMARK_DIR}/experimental/ios/${FRAMEWORK_TARGET}.zip" \ - "${DEST_DIR}" -pushd "${DEST_DIR}" -unzip -o "${FRAMEWORK_TARGET}.zip" -rm -f "${FRAMEWORK_TARGET}.zip" + mkdir -p "${DEST_DIR}" + cp -f "bazel-bin/${BENCHMARK_DIR}/experimental/ios/${FRAMEWORK_TARGET}.zip" \ + "${DEST_DIR}" + pushd "${DEST_DIR}" + unzip -o "${FRAMEWORK_TARGET}.zip" + rm -f "${FRAMEWORK_TARGET}.zip" + + popd + popd +} + +check_ios_configured +build_framework -popd -popd From 2256faa6b78c87ebf6d92bc8f2d1b5f4b6fa7591 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 14 Jul 2020 17:46:15 -0700 Subject: [PATCH 0440/2522] Remove the alias for tensorflow/core/framework:tensor_testutil PiperOrigin-RevId: 321271557 Change-Id: I7267ee3cd1d87868f7641136bc6d7cb7047b7eca --- tensorflow/core/grappler/BUILD | 2 +- tensorflow/core/grappler/costs/BUILD | 4 ++-- tensorflow/core/grappler/optimizers/BUILD | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/grappler/BUILD b/tensorflow/core/grappler/BUILD index 48c733c0987..fdd72968a40 100644 --- a/tensorflow/core/grappler/BUILD +++ b/tensorflow/core/grappler/BUILD @@ -58,10 +58,10 @@ tf_cc_test( "//tensorflow/core:all_kernels", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", - "//tensorflow/core:tensor_testutil", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", + "//tensorflow/core/framework:tensor_testutil", "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD index 36abb756dd0..edbdaffa1c8 100644 --- a/tensorflow/core/grappler/costs/BUILD +++ b/tensorflow/core/grappler/costs/BUILD @@ -100,9 +100,9 @@ tf_cc_test( "//tensorflow/core:lib", "//tensorflow/core:lib_proto_parsing", "//tensorflow/core:protos_all_cc", - "//tensorflow/core:tensor_testutil", "//tensorflow/core:test", "//tensorflow/core:test_main", + "//tensorflow/core/framework:tensor_testutil", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler/clusters:single_machine", "//tensorflow/core/grappler/inputs:trivial_test_graph_input_yielder", @@ -191,10 +191,10 @@ tf_cc_test( "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", "//tensorflow/core:protos_all_cc", - "//tensorflow/core:tensor_testutil", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", + "//tensorflow/core/framework:tensor_testutil", ], ) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 5f19398bf89..d3db2f19596 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -422,9 +422,9 @@ tf_cuda_cc_test( ":model_pruner", "//tensorflow/cc:cc_ops", "//tensorflow/core:protos_all_cc", - "//tensorflow/core:tensor_testutil", "//tensorflow/core:test", "//tensorflow/core:test_main", + "//tensorflow/core/framework:tensor_testutil", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/inputs:trivial_test_graph_input_yielder", @@ -547,9 +547,9 @@ tf_cuda_cc_test( "//tensorflow/core:lib", "//tensorflow/core:ops", "//tensorflow/core:protos_all_cc", - "//tensorflow/core:tensor_testutil", "//tensorflow/core:test", "//tensorflow/core:test_main", + "//tensorflow/core/framework:tensor_testutil", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/clusters:virtual_cluster", @@ -802,9 +802,9 @@ tf_cuda_cc_test( ":loop_optimizer", "//tensorflow/cc:cc_ops", "//tensorflow/core:protos_all_cc", - "//tensorflow/core:tensor_testutil", "//tensorflow/core:test", "//tensorflow/core:test_main", + "//tensorflow/core/framework:tensor_testutil", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/inputs:trivial_test_graph_input_yielder", @@ -1163,11 +1163,11 @@ tf_cuda_cc_test( ":generic_layout_optimizer", "//tensorflow/cc:cc_ops", "//tensorflow/core:framework", - "//tensorflow/core:tensor_testutil", "//tensorflow/core:tensorflow", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", + "//tensorflow/core/framework:tensor_testutil", "//tensorflow/core/grappler:devices", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler/clusters:cluster", @@ -1211,10 +1211,10 @@ tf_cuda_cc_test( "//tensorflow/cc:cc_ops", "//tensorflow/cc:cc_ops_internal", "//tensorflow/core:protos_all_cc", - "//tensorflow/core:tensor_testutil", "//tensorflow/core:tensorflow", "//tensorflow/core:test", "//tensorflow/core:test_main", + "//tensorflow/core/framework:tensor_testutil", "//tensorflow/core/grappler:devices", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler/clusters:cluster", From d956c282fbb4a104fe2ca6092a9f2735a5014c26 Mon Sep 17 00:00:00 2001 From: Chao Mei Date: Tue, 14 Jul 2020 17:47:34 -0700 Subject: [PATCH 0441/2522] Use the global delegate providers to drive the NNAPI delegate test. PiperOrigin-RevId: 321271726 Change-Id: If50f8ca0bd712fbab9c041bf2db416b00820a251 --- tensorflow/lite/kernels/test_main.cc | 10 ++-- tensorflow/lite/kernels/test_util.cc | 71 ++++++++++------------------ tensorflow/lite/kernels/test_util.h | 4 +- 3 files changed, 35 insertions(+), 50 deletions(-) diff --git a/tensorflow/lite/kernels/test_main.cc b/tensorflow/lite/kernels/test_main.cc index a99109080fa..dd8fb0405ab 100644 --- a/tensorflow/lite/kernels/test_main.cc +++ b/tensorflow/lite/kernels/test_main.cc @@ -26,9 +26,13 @@ void InitKernelTest(int* argc, char** argv) { tflite::KernelTestDelegateProviders::Get(); delegate_providers->InitFromCmdlineArgs(argc, const_cast(argv)); - // TODO(b/160764491): remove the special handling of NNAPI delegate test. - tflite::SingleOpModel::SetForceUseNnapi( - delegate_providers->ConstParams().Get("use_nnapi")); + if (delegate_providers->ConstParams().Get("use_nnapi")) { + // In Android Q, the NNAPI delegate avoids delegation if the only device + // is the reference CPU. However, for testing purposes, we still want + // delegation coverage, so force use of this reference path. + delegate_providers->MutableParams()->Set( + "nnapi_accelerator_name", "nnapi-reference"); + } } } // namespace diff --git a/tensorflow/lite/kernels/test_util.cc b/tensorflow/lite/kernels/test_util.cc index 4a4f5c5de3a..80ed66a4340 100644 --- a/tensorflow/lite/kernels/test_util.cc +++ b/tensorflow/lite/kernels/test_util.cc @@ -55,26 +55,6 @@ namespace tflite { using ::testing::FloatNear; using ::testing::Matcher; -namespace { - -// Whether to enable (global) use of NNAPI. Note that this will typically -// be set via a command-line flag. -static bool force_use_nnapi = false; - -TfLiteDelegate* TestNnApiDelegate() { - static TfLiteDelegate* delegate = [] { - StatefulNnApiDelegate::Options options; - // In Android Q, the NNAPI delegate avoids delegation if the only device - // is the reference CPU. However, for testing purposes, we still want - // delegation coverage, so force use of this reference path. - options.accelerator_name = "nnapi-reference"; - return new StatefulNnApiDelegate(options); - }(); - return delegate; -} - -} // namespace - std::vector> ArrayFloatNear(const std::vector& values, float max_abs_error) { std::vector> matchers; @@ -221,26 +201,22 @@ void SingleOpModel::BuildInterpreter(std::vector> input_shapes, } TfLiteStatus SingleOpModel::ApplyDelegate() { - auto* delegate_providers = tflite::KernelTestDelegateProviders::Get(); - - if (force_use_nnapi) { - delegate_ = TestNnApiDelegate(); - - // As we currently have special handling of nnapi delegate in kernel tests, - // we turn off the nnapi delegate provider to avoid re-applying it later. - // TODO(b/160764491): remove this special handling for NNAPI delegate test. - delegate_providers->MutableParams()->Set("use_nnapi", false); - } - if (delegate_) { TFLITE_LOG(WARN) << "Having a manually-set TfLite delegate, and bypassing " "KernelTestDelegateProviders"; - return interpreter_->ModifyGraphWithDelegate(delegate_); - } - - for (auto& one : delegate_providers->CreateAllDelegates()) { - TF_LITE_ENSURE_STATUS( - interpreter_->ModifyGraphWithDelegate(std::move(one))); + TF_LITE_ENSURE_STATUS(interpreter_->ModifyGraphWithDelegate(delegate_)); + ++num_applied_delegates_; + } else { + auto* delegate_providers = tflite::KernelTestDelegateProviders::Get(); + for (auto& one : delegate_providers->CreateAllDelegates()) { + // The raw ptr always points to the actual TfLiteDegate object. + auto* delegate_raw_ptr = one.get(); + TF_LITE_ENSURE_STATUS( + interpreter_->ModifyGraphWithDelegate(std::move(one))); + // Note: 'delegate_' is always set to the last successfully applied one. + delegate_ = delegate_raw_ptr; + ++num_applied_delegates_; + } } return kTfLiteOk; } @@ -257,13 +233,11 @@ void SingleOpModel::BuildInterpreter( } // static -void SingleOpModel::SetForceUseNnapi(bool use_nnapi) { - force_use_nnapi = use_nnapi; +bool SingleOpModel::GetForceUseNnapi() { + return tflite::KernelTestDelegateProviders::Get()->ConstParams().Get( + "use_nnapi"); } -// static -bool SingleOpModel::GetForceUseNnapi() { return force_use_nnapi; } - int32_t SingleOpModel::GetTensorSize(int index) const { TfLiteTensor* t = interpreter_->tensor(index); CHECK(t); @@ -342,20 +316,27 @@ void SingleOpModel::ExpectOpAcceleratedWithNnapi(const std::string& test_id) { return; } + // If we have multiple delegates applied, we would skip this check at the + // moment. + if (num_applied_delegates_ > 1) { + TFLITE_LOG(WARN) << "Skipping ExpectOpAcceleratedWithNnapi as " + << num_applied_delegates_ + << " delegates have been successfully applied."; + return; + } TFLITE_LOG(INFO) << "Validating acceleration"; const NnApi* nnapi = NnApiImplementation(); if (nnapi && nnapi->nnapi_exists && nnapi->android_sdk_version >= validation_params.value().MinAndroidSdkVersion()) { - EXPECT_EQ( - CountPartitionsDelegatedTo(interpreter_.get(), TestNnApiDelegate()), 1) + EXPECT_EQ(CountPartitionsDelegatedTo(interpreter_.get(), delegate_), 1) << "Expecting operation to be accelerated but cannot find a partition " "associated to the NNAPI delegate"; } } void SingleOpModel::ValidateAcceleration() { - if (force_use_nnapi) { + if (GetForceUseNnapi()) { ExpectOpAcceleratedWithNnapi(GetCurrentTestId()); } } diff --git a/tensorflow/lite/kernels/test_util.h b/tensorflow/lite/kernels/test_util.h index 78b7c580738..27b59cf3c4c 100644 --- a/tensorflow/lite/kernels/test_util.h +++ b/tensorflow/lite/kernels/test_util.h @@ -515,8 +515,7 @@ class SingleOpModel { resolver_ = std::move(resolver); } - // Enables NNAPI delegate application during interpreter creation. - static void SetForceUseNnapi(bool use_nnapi); + // Indicate whether the test has the NNAPI delegate applied. static bool GetForceUseNnapi(); int CountOpsExecutedByCpuKernel(); @@ -769,6 +768,7 @@ class SingleOpModel { std::vector> tensors_; std::vector> buffers_; TfLiteDelegate* delegate_ = nullptr; + int num_applied_delegates_ = 0; }; // Populate string tensors. From ded156cbbf5006d2a124f2777bf5e16be4710ccb Mon Sep 17 00:00:00 2001 From: Russell Power Date: Tue, 14 Jul 2020 18:06:39 -0700 Subject: [PATCH 0442/2522] Cleanup dependencies for TPU tests. PiperOrigin-RevId: 321274280 Change-Id: I572365c25e9362b397623574e9939b2ec9533a2f --- tensorflow/core/tpu/graph_rewrite/BUILD | 58 +- .../encapsulate_tpu_computations_pass_test.cc | 810 ------------------ .../variable_merger_pass_test.cc | 205 ----- 3 files changed, 6 insertions(+), 1067 deletions(-) delete mode 100644 tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass_test.cc delete mode 100644 tensorflow/core/tpu/graph_rewrite/variable_merger_pass_test.cc diff --git a/tensorflow/core/tpu/graph_rewrite/BUILD b/tensorflow/core/tpu/graph_rewrite/BUILD index 10218b0a2fb..69238456d57 100644 --- a/tensorflow/core/tpu/graph_rewrite/BUILD +++ b/tensorflow/core/tpu/graph_rewrite/BUILD @@ -1,10 +1,5 @@ # Contains graph rewrites for TPU runtimes and optimizations. -load( - "//tensorflow:tensorflow.bzl", - "tf_cc_test", -) - package( default_visibility = [ "//tensorflow/core/tpu:__subpackages__", @@ -35,14 +30,12 @@ cc_library( ], deps = [ ":distributed_tpu_rewrite_helpers", - "//tensorflow/cc:scope", "//tensorflow/compiler/xla:status_macros", "//tensorflow/core:core_cpu", "//tensorflow/core:core_cpu_lib", "//tensorflow/core:framework", "//tensorflow/core:framework_internal", "//tensorflow/core:lib", - "//tensorflow/core/protobuf/tpu:topology_proto_cc", "//tensorflow/core/tpu:tpu_init_mode", "//tensorflow/core/tpu/kernels:tpu_compile_op_options", ], @@ -54,9 +47,9 @@ cc_library( hdrs = ["distributed_tpu_rewrite_helpers.h"], deps = [ "//tensorflow/compiler/xla:status_macros", - "//tensorflow/core:core_cpu_lib", "//tensorflow/core:framework", "//tensorflow/core:protos_all_cc", + "//tensorflow/core/common_runtime:device_set", "//tensorflow/core/tpu:tpu_defs", ], ) @@ -67,10 +60,10 @@ cc_library( hdrs = ["variable_merger_pass.h"], deps = [ "//tensorflow/compiler/xla:status_macros", - "//tensorflow/core:core_cpu", - "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", + "//tensorflow/core:graph", "//tensorflow/core:lib", + "//tensorflow/core/common_runtime:optimization_registry", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/strings", @@ -78,23 +71,6 @@ cc_library( ], ) -tf_cc_test( - name = "variable_merger_pass_test", - size = "small", - srcs = ["variable_merger_pass_test.cc"], - deps = [ - ":variable_merger_pass", - "//tensorflow/cc:cc_ops", - "//tensorflow/cc:ops", - "//tensorflow/cc:resource_variable_ops", - "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", - "//tensorflow/core:ops", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - ], -) - cc_library( name = "encapsulate_tpu_computations_pass", srcs = [ @@ -109,12 +85,13 @@ cc_library( "//tensorflow/compiler/tf2xla:side_effect_util", "//tensorflow/compiler/tf2xla:tf2xla_util", "//tensorflow/compiler/xla:status_macros", - "//tensorflow/core:core_cpu_lib", "//tensorflow/core:framework", + "//tensorflow/core:graph", "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core:session_options", + "//tensorflow/core/common_runtime:function", + "//tensorflow/core/common_runtime:optimization_registry", "//tensorflow/core/tpu:tpu_compile_interface", "//tensorflow/core/tpu:tpu_defs", "@com_google_absl//absl/container:flat_hash_map", @@ -124,26 +101,3 @@ cc_library( "@com_google_absl//absl/strings", ], ) - -tf_cc_test( - name = "encapsulate_tpu_computations_pass_test", - srcs = ["encapsulate_tpu_computations_pass_test.cc"], - deps = [ - ":encapsulate_tpu_computations_pass", - "//tensorflow/cc:cc_ops", - "//tensorflow/cc:function_ops", - "//tensorflow/cc:resource_variable_ops", - "//tensorflow/cc:tpu_ops", - "//tensorflow/compiler/jit:compilation_passes", - "//tensorflow/compiler/tf2xla:test_util", - "//tensorflow/core:core_cpu", - "//tensorflow/core:core_cpu_impl", - "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", - "//tensorflow/core:lib_internal", - "//tensorflow/core:ops", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core/tpu:tpu_defs", - ], -) diff --git a/tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass_test.cc b/tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass_test.cc deleted file mode 100644 index c57d6311f31..00000000000 --- a/tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass_test.cc +++ /dev/null @@ -1,810 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/tpu/graph_rewrite/encapsulate_tpu_computations_pass.h" - -#include "tensorflow/cc/ops/array_ops.h" -#include "tensorflow/cc/ops/function_ops.h" -#include "tensorflow/cc/ops/parsing_ops.h" -#include "tensorflow/cc/ops/resource_variable_ops.h" -#include "tensorflow/cc/ops/standard_ops.h" -#include "tensorflow/cc/ops/tpu_replication_ops.h" -#include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h" -#include "tensorflow/compiler/tf2xla/test_util.h" -#include "tensorflow/core/common_runtime/device_factory.h" -#include "tensorflow/core/common_runtime/graph_constructor.h" -#include "tensorflow/core/common_runtime/process_function_library_runtime.h" -#include "tensorflow/core/framework/graph_to_functiondef.h" -#include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/lib/hash/hash.h" -#include "tensorflow/core/lib/strings/proto_serialization.h" -#include "tensorflow/core/platform/test.h" -#include "tensorflow/core/public/session_options.h" -#include "tensorflow/core/public/version.h" -#include "tensorflow/core/tpu/tpu_defs.h" -#include "tensorflow/core/util/equal_graph_def.h" -#include "tensorflow/core/util/ptr_util.h" - -namespace tensorflow { - -static std::unique_ptr MakeOuterGraph( - const FunctionLibraryDefinition& flib_def, const string& function) { - Scope scope = Scope::NewRootScope().ExitOnError(); - TF_EXPECT_OK(scope.graph()->AddFunctionLibrary(flib_def.ToProto())); - - int num_replicas = 2; - - auto a0 = ops::Placeholder(scope.WithOpName("A0"), DT_INT32); - auto a1 = ops::Placeholder(scope.WithOpName("A1"), DT_INT32); - auto b0 = ops::Placeholder(scope.WithOpName("B0"), DT_FLOAT); - auto b1 = ops::Placeholder(scope.WithOpName("B1"), DT_FLOAT); - auto u0 = ops::Placeholder(scope.WithOpName("U0"), DT_RESOURCE); - auto u1 = ops::Placeholder(scope.WithOpName("U1"), DT_RESOURCE); - auto z = ops::Placeholder(scope.WithOpName("Z"), DT_RESOURCE); - auto c = ops::Placeholder(scope.WithOpName("C"), DT_INT32); - auto d = ops::Placeholder(scope.WithOpName("D"), DT_FLOAT); - auto v = ops::Placeholder(scope.WithOpName("V"), DT_RESOURCE); - auto w = ops::Placeholder(scope.WithOpName("W"), DT_RESOURCE); - auto x = ops::GuaranteeConst( - scope.WithOpName("X"), - ops::Placeholder(scope.WithOpName("X_Holder"), DT_DOUBLE)); - auto y = ops::GuaranteeConst( - scope.WithOpName("Y"), - ops::Placeholder(scope.WithOpName("Y_Holder"), DT_DOUBLE)); - - auto in0 = ops::TPUReplicatedInput(scope.WithOpName("In0"), - std::initializer_list{a0, a1}); - auto in1 = ops::TPUReplicatedInput(scope.WithOpName("In1"), - std::initializer_list{b0, b1}); - auto in2 = ops::TPUReplicatedInput(scope.WithOpName("In2"), - std::initializer_list{u0, u1}); - auto in3 = ops::TPUReplicatedInput(scope.WithOpName("In3"), - std::initializer_list{z}); - in3.node()->AddAttr("is_packed", true); - - NodeDef def; - TF_CHECK_OK(NodeDefBuilder("replicate0", function, &flib_def) - .Input(in0.node()->name(), 0, DT_INT32) - .Input(in1.node()->name(), 0, DT_FLOAT) - .Input(in2.node()->name(), 0, DT_RESOURCE) - .Input(in3.node()->name(), 0, DT_RESOURCE) - .Input(c.node()->name(), 0, DT_INT32) - .Input(d.node()->name(), 0, DT_FLOAT) - .Input(v.node()->name(), 0, DT_RESOURCE) - .Input(w.node()->name(), 0, DT_RESOURCE) - .Input(x.node()->name(), 0, DT_DOUBLE) - .Input(y.node()->name(), 0, DT_DOUBLE) - .Attr(kTPUReplicateAttr, "replicate0") - .Attr("num_replicas", num_replicas) - .Attr("num_cores_per_replica", 6) - .Attr("topology", "") - .Attr("use_tpu", true) - .Attr("device_assignment", std::vector()) - .Attr("host_compute_core", std::vector()) - .Attr("padding_map", std::vector()) - .Attr("_variable_start_index", 6) - .Attr("_guaranteed_const_start_index", 8) - .Attr("allow_soft_placement", false) - .Attr("step_marker_location", "STEP_MARK_AT_ENTRY") - .Attr("use_spmd_for_xla_partitioning", false) - .Finalize(&def)); - - Status status; - Node* replicate = scope.graph()->AddNode(def, &status); - TF_CHECK_OK(status); - TF_CHECK_OK(scope.DoShapeInference(replicate)); - scope.graph()->AddEdge(in0.node(), 0, replicate, 0); - scope.graph()->AddEdge(in1.node(), 0, replicate, 1); - scope.graph()->AddEdge(in2.node(), 0, replicate, 2); - scope.graph()->AddEdge(in3.node(), 0, replicate, 3); - scope.graph()->AddEdge(c.node(), 0, replicate, 4); - scope.graph()->AddEdge(d.node(), 0, replicate, 5); - scope.graph()->AddEdge(v.node(), 0, replicate, 6); - scope.graph()->AddEdge(w.node(), 0, replicate, 7); - scope.graph()->AddEdge(x.node(), 0, replicate, 8); - scope.graph()->AddEdge(y.node(), 0, replicate, 9); - - auto out0 = ops::TPUReplicatedOutput(scope.WithOpName("Out0"), - Output(replicate, 0), num_replicas); - auto out1 = ops::TPUReplicatedOutput(scope.WithOpName("Out1"), - Output(replicate, 1), num_replicas); - auto out2 = ops::TPUReplicatedOutput(scope.WithOpName("Out2"), - Output(replicate, 2), num_replicas); - auto out3 = ops::TPUReplicatedOutput(scope.WithOpName("Out3"), - Output(replicate, 3), num_replicas); - auto out4 = ops::TPUReplicatedOutput(scope.WithOpName("Out4"), - Output(replicate, 4), num_replicas); - - auto consumer0_0a = ops::Identity(scope.WithOpName("consumer0_0a"), out0[0]); - auto consumer0_0b = ops::Identity(scope.WithOpName("consumer0_0b"), out0[0]); - auto consumer0_1 = ops::Identity(scope.WithOpName("consumer0_1"), out0[1]); - auto consumer1 = ops::Identity(scope.WithOpName("consumer1"), out1[1]); - auto consumer2 = ops::Identity(scope.WithOpName("consumer2"), out2[0]); - auto consumer3a = ops::Identity(scope.WithOpName("consumer3a"), out3[0]); - auto consumer3b = ops::Identity(scope.WithOpName("consumer3b"), out3[1]); - auto consumer4a = ops::Identity(scope.WithOpName("consumer4a"), out4[0]); - auto consumer4b = ops::Identity(scope.WithOpName("consumer4b"), out4[1]); - - std::unique_ptr graph(new Graph(OpRegistry::Global())); - TF_CHECK_OK(scope.ToGraph(graph.get())); - return graph; -} - -// Makes an encapsulate body graph for use in tests. -static std::unique_ptr MakeBodyGraph() { - Scope scope = Scope::NewRootScope().ExitOnError(); - - auto arg0 = ops::_Arg(scope.WithOpName("in0_0_arg"), DT_INT32, 0); - auto arg1 = ops::_Arg(scope.WithOpName("in1_0_arg"), DT_FLOAT, 1); - auto arg2 = ops::_Arg(scope.WithOpName("in2_0_arg"), DT_RESOURCE, 2); - auto arg3 = ops::_Arg(scope.WithOpName("in3_0_arg"), DT_RESOURCE, 3); - auto arg4 = ops::_Arg(scope.WithOpName("c_0_arg"), DT_INT32, 4); - auto arg5 = ops::_Arg(scope.WithOpName("d_0_arg"), DT_FLOAT, 5); - auto arg6 = ops::_Arg(scope.WithOpName("v_0_arg"), DT_RESOURCE, 6); - auto arg7 = ops::_Arg(scope.WithOpName("w_0_arg"), DT_RESOURCE, 7); - - auto add_attrs = [](Node* node) { - node->AddAttr(kTPUReplicateAttr, "replicate0"); - }; - - string device = - tensorflow::strings::StrCat("/device:", DEVICE_TPU_REPLICATED_CORE); - - auto in1_identity = - ops::Identity(scope.WithOpName("In1_identity").WithDevice(device), arg1); - - auto read_u = ops::ReadVariableOp( - scope.WithOpName("ReadU").WithDevice(device), arg2, DT_FLOAT); - add_attrs(read_u.node()); - auto read_z = ops::ReadVariableOp( - scope.WithOpName("ReadZ").WithDevice(device), arg3, DT_FLOAT); - add_attrs(read_z.node()); - auto read_v = ops::ReadVariableOp( - scope.WithOpName("ReadV").WithDevice(device), arg6, DT_FLOAT); - add_attrs(read_v.node()); - auto read_w = ops::ReadVariableOp( - scope.WithOpName("ReadW").WithDevice(device), arg7, DT_FLOAT); - add_attrs(read_w.node()); - - auto e = ops::Add(scope.WithOpName("E").WithDevice(device), arg0, arg4); - add_attrs(e.node()); - auto f = ops::Add(scope.WithOpName("F").WithDevice(device), read_v, read_w); - add_attrs(f.node()); - auto g = ops::Add(scope.WithOpName("G").WithDevice(device), f, arg5); - add_attrs(g.node()); - - auto arg8 = ops::_Arg(scope.WithOpName("x_0_arg"), DT_DOUBLE, 8); - auto arg9 = ops::_Arg(scope.WithOpName("y_0_arg"), DT_DOUBLE, 9); - arg8.node()->AddAttr("_is_guaranteed_constant", true); - arg9.node()->AddAttr("_is_guaranteed_constant", true); - auto h = ops::Add(scope.WithOpName("H").WithDevice(device), arg8, arg9); - add_attrs(h.node()); - - auto out0 = ops::_Retval(scope.WithOpName("e_0_retval_RetVal"), e, 0); - auto out1 = ops::_Retval(scope.WithOpName("g_0_retval_RetVal"), g, 1); - auto out2 = ops::_Retval(scope.WithOpName("in1_identity_0_retval_RetVal"), - in1_identity, 2); - auto out3 = - ops::_Retval(scope.WithOpName("readu_0_retval_RetVal"), read_u, 3); - auto out4 = - ops::_Retval(scope.WithOpName("readz_0_retval_RetVal"), read_z, 4); - - std::unique_ptr graph(new Graph(OpRegistry::Global())); - TF_CHECK_OK(scope.ToGraph(graph.get())); - return graph; -} - -TEST(EncapsulateTPUComputations, DeterministicEncapsulate) { - // Test that control edge insertion order doesn't affect the cache key - // (cluster name) generated by TPU encapsulate pass. - auto get_serialized_graph = [](bool control_input_reversed, - bool operand_reversed) -> string { - FunctionLibraryDefinition flib_def(OpRegistry::Global(), {}); - std::unique_ptr graph(new Graph(&flib_def)); - { - Scope scope = Scope::NewRootScope().ExitOnError(); - auto a0 = ops::Placeholder(scope.WithOpName("A0"), DT_INT32); - auto a1 = ops::Placeholder(scope.WithOpName("A1"), DT_INT32); - - ops::Add e = operand_reversed ? ops::Add(scope.WithOpName("E"), a0, a1) - : ops::Add(scope.WithOpName("E"), a1, a0); - - auto metadata = ops::TPUReplicateMetadata(scope, /*num_replicas=*/2); - auto add_attrs = [](Node* node) { - node->AddAttr(kTPUReplicateAttr, "replicate0"); - }; - add_attrs(metadata.operation.node()); - add_attrs(e.node()); - - TF_CHECK_OK(scope.ToGraph(graph.get())); - auto get_node_in_graph = [&graph](Node* node) { - return graph->FindNodeId(node->id()); - }; - // Insert control edge in different order. The order should not affect - // the encapsulated or serialized graph. - if (!control_input_reversed) { - graph->AddControlEdge(get_node_in_graph(a0.node()), - get_node_in_graph(e.node()), true); - graph->AddControlEdge(get_node_in_graph(a1.node()), - get_node_in_graph(e.node()), true); - } else { - graph->AddControlEdge(get_node_in_graph(a1.node()), - get_node_in_graph(e.node()), true); - graph->AddControlEdge(get_node_in_graph(a0.node()), - get_node_in_graph(e.node()), true); - } - } - TF_CHECK_OK(EncapsulateTPUComputationsPass::Encapsulate(&graph, &flib_def)); - GraphDef gdef; - graph->ToGraphDef(&gdef); - // Before serialization, sort control inputs first to remove - // nondeterminism. - SortControlInputs(&gdef); - string serialized; - SerializeToStringDeterministic(gdef, &serialized); - return serialized; - }; - - // Changing the order of control input shouldn't affect the graph generated. - EXPECT_EQ(get_serialized_graph(/*control_input_reversed=*/true, - /*operand_reversed=*/false), - get_serialized_graph(/*control_input_reversed=*/false, - /*operand_reversed=*/false)); - - // Changing the order of data input should affect the graph generated. - EXPECT_NE(get_serialized_graph(/*control_input_reversed=*/false, - /*operand_reversed=*/true), - get_serialized_graph(/*control_input_reversed=*/false, - /*operand_reversed=*/false)); -} - -TEST(EncapsulateTPUComputations, Encapsulate) { - FunctionLibraryDefinition flib_def(OpRegistry::Global(), {}); - std::unique_ptr graph(new Graph(&flib_def)); - { - Scope scope = Scope::NewRootScope().ExitOnError(); - auto a0 = ops::Placeholder(scope.WithOpName("A0"), DT_INT32); - auto a1 = ops::Placeholder(scope.WithOpName("A1"), DT_INT32); - auto b0 = ops::Placeholder(scope.WithOpName("B0"), DT_FLOAT); - auto b1 = ops::Placeholder(scope.WithOpName("B1"), DT_FLOAT); - auto c = ops::Placeholder(scope.WithOpName("C"), DT_INT32); - auto d = ops::Placeholder(scope.WithOpName("D"), DT_FLOAT); - auto u0 = ops::Placeholder(scope.WithOpName("U0"), DT_RESOURCE); - auto u1 = ops::Placeholder(scope.WithOpName("U1"), DT_RESOURCE); - auto z = ops::Placeholder(scope.WithOpName("Z"), DT_RESOURCE); - auto v = ops::Placeholder(scope.WithOpName("V"), DT_RESOURCE); - auto w = ops::Placeholder(scope.WithOpName("W"), DT_RESOURCE); - auto x = ops::GuaranteeConst( - scope.WithOpName("X"), - ops::Placeholder(scope.WithOpName("X_Holder"), DT_DOUBLE)); - auto y = ops::GuaranteeConst( - scope.WithOpName("Y"), - ops::Placeholder(scope.WithOpName("Y_Holder"), DT_DOUBLE)); - - auto in0 = ops::TPUReplicatedInput(scope.WithOpName("In0"), - std::initializer_list{a0, a1}); - auto in1 = ops::TPUReplicatedInput(scope.WithOpName("In1"), - std::initializer_list{b0, b1}); - auto in2 = ops::TPUReplicatedInput(scope.WithOpName("In2"), - std::initializer_list{u0, u1}); - auto in3 = ops::TPUReplicatedInput(scope.WithOpName("In3"), - std::initializer_list{z}); - in3.node()->AddAttr("is_packed", true); - - auto add_attrs = [](Node* node) { - node->AddAttr(kTPUReplicateAttr, "replicate0"); - }; - auto metadata = ops::TPUReplicateMetadata( - scope, /*num_replicas=*/2, - ops::TPUReplicateMetadata::ComputationShape({2, 3})); - add_attrs(metadata.operation.node()); - - auto in1_identity = ops::Identity(scope.WithOpName("In1_identity"), in1); - add_attrs(in1_identity.node()); - - auto read_u = ops::ReadVariableOp(scope.WithOpName("ReadU"), in2, DT_FLOAT); - add_attrs(read_u.node()); - auto read_z = ops::ReadVariableOp(scope.WithOpName("ReadZ"), in3, DT_FLOAT); - add_attrs(read_z.node()); - auto read_v = ops::ReadVariableOp(scope.WithOpName("ReadV"), v, DT_FLOAT); - add_attrs(read_v.node()); - auto read_w = ops::ReadVariableOp(scope.WithOpName("ReadW"), w, DT_FLOAT); - add_attrs(read_w.node()); - - auto e = ops::Add(scope.WithOpName("E"), in0, c); - add_attrs(e.node()); - auto f = ops::Add(scope.WithOpName("F"), read_v, read_w); - add_attrs(f.node()); - auto g = ops::Add(scope.WithOpName("G"), f, d); - add_attrs(g.node()); - auto h = ops::Add(scope.WithOpName("H"), x, y); - add_attrs(h.node()); - - auto out0 = ops::TPUReplicatedOutput(scope.WithOpName("Out0"), e, 2); - auto out1 = ops::TPUReplicatedOutput(scope.WithOpName("Out1"), g, 2); - auto out2 = - ops::TPUReplicatedOutput(scope.WithOpName("Out2"), in1_identity, 2); - auto out3 = ops::TPUReplicatedOutput(scope.WithOpName("Out3"), read_u, 2); - auto out4 = ops::TPUReplicatedOutput(scope.WithOpName("Out4"), read_z, 2); - - auto consumer0_0a = - ops::Identity(scope.WithOpName("consumer0_0a"), out0[0]); - auto consumer0_0b = - ops::Identity(scope.WithOpName("consumer0_0b"), out0[0]); - auto consumer0_1 = ops::Identity(scope.WithOpName("consumer0_1"), out0[1]); - auto consumer1 = ops::Identity(scope.WithOpName("consumer1"), out1[1]); - auto consumer2 = ops::Identity(scope.WithOpName("consumer2"), out2[0]); - auto consumer3a = ops::Identity(scope.WithOpName("consumer3a"), out3[0]); - auto consumer3b = ops::Identity(scope.WithOpName("consumer3b"), out3[1]); - auto consumer4a = ops::Identity(scope.WithOpName("consumer4a"), out4[0]); - auto consumer4b = ops::Identity(scope.WithOpName("consumer4b"), out4[1]); - TF_ASSERT_OK(scope.ToGraph(graph.get())); - } - - std::unique_ptr graph_copy(new Graph(&flib_def)); - CopyGraph(*graph, graph_copy.get()); - - TF_ASSERT_OK(EncapsulateTPUComputationsPass::Encapsulate(&graph, &flib_def)); - // Remove _xla_inferred_shapes attribute. - for (Node* n : graph->nodes()) { - n->ClearAttr("_xla_inferred_shapes"); - } - - std::unordered_map index = graph->BuildNodeNameIndex(); - string function = index.at("replicate0")->type_string(); - - // Tests the outer graph is as expected. - { - std::unique_ptr outer = MakeOuterGraph(flib_def, function); - GraphDef expected_def; - outer->ToGraphDef(&expected_def); - - GraphDef actual_def; - graph->ToGraphDef(&actual_def); - TF_EXPECT_GRAPH_EQ_INTERNAL(expected_def, actual_def); - } - - // Tests the encapsulated body graph is as expected. - { - std::unique_ptr body = MakeBodyGraph(); - GraphDef expected_body_def; - body->ToGraphDef(&expected_body_def); - - InstantiationResultForTest result; - TF_EXPECT_OK(InstantiateFunctionForTest(function, flib_def, &result)); - - EXPECT_EQ((DataTypeVector{DT_INT32, DT_FLOAT, DT_RESOURCE, DT_RESOURCE, - DT_INT32, DT_FLOAT, DT_RESOURCE, DT_RESOURCE, - DT_DOUBLE, DT_DOUBLE}), - result.arg_types); - EXPECT_EQ( - (DataTypeVector{DT_INT32, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT}), - result.ret_types); - TF_EXPECT_GRAPH_EQ(expected_body_def, result.gdef); - } - - // Encapsulates the same computation again, verifies we reuse the same - // function. Encapsulation should be deterministic to avoid recompilation. - TF_ASSERT_OK( - EncapsulateTPUComputationsPass::Encapsulate(&graph_copy, &flib_def)); - std::unordered_map index_copy = - graph_copy->BuildNodeNameIndex(); - string function_copy = index_copy.at("replicate0")->type_string(); - EXPECT_EQ(function, function_copy); -} - -TEST(EncapsulateTPUComputations, BuildTPUReplicateOps) { - std::unique_ptr body_graph = MakeBodyGraph(); - FunctionDefLibrary flib; - TF_ASSERT_OK( - GraphToFunctionDef(*body_graph, "replicate0", flib.add_function())); - - FunctionLibraryDefinition flib_def(OpRegistry::Global(), flib); - - std::unique_ptr graph = MakeOuterGraph(flib_def, "replicate0"); - TF_ASSERT_OK( - EncapsulateTPUComputationsPass::BuildTPUReplicateOps(graph.get())); - - Scope scope = Scope::NewRootScope().ExitOnError(); - TF_EXPECT_OK(scope.graph()->AddFunctionLibrary(flib)); - - auto a0 = ops::Placeholder(scope.WithOpName("A0"), DT_INT32); - auto a1 = ops::Placeholder(scope.WithOpName("A1"), DT_INT32); - auto b0 = ops::Placeholder(scope.WithOpName("B0"), DT_FLOAT); - auto b1 = ops::Placeholder(scope.WithOpName("B1"), DT_FLOAT); - auto u0 = ops::Placeholder(scope.WithOpName("U0"), DT_RESOURCE); - auto u1 = ops::Placeholder(scope.WithOpName("U1"), DT_RESOURCE); - auto z = ops::Placeholder(scope.WithOpName("Z"), DT_RESOURCE); - auto c = ops::Placeholder(scope.WithOpName("C"), DT_INT32); - auto d = ops::Placeholder(scope.WithOpName("D"), DT_FLOAT); - auto v = ops::Placeholder(scope.WithOpName("V"), DT_RESOURCE); - auto w = ops::Placeholder(scope.WithOpName("W"), DT_RESOURCE); - auto x = - ops::Identity(scope.WithOpName("X"), - ops::Placeholder(scope.WithOpName("X_Holder"), DT_DOUBLE)); - auto y = - ops::Identity(scope.WithOpName("Y"), - ops::Placeholder(scope.WithOpName("Y_Holder"), DT_DOUBLE)); - - NameAttrList function; - function.set_name("replicate0"); - auto replicate = ops::_TPUReplicate( - scope.WithOpName("replicate0"), - std::initializer_list{a0, b0, u0, a1, b1, u1, z}, - std::initializer_list{c, d}, std::initializer_list{v, w}, - std::initializer_list{x, y}, function, - /*num_replicas=*/2, - {DT_INT32, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_INT32, DT_FLOAT, - DT_FLOAT, DT_FLOAT, DT_FLOAT}, - ops::_TPUReplicate::NumCoresPerReplica(6).NumDistributedVariables(1)); - - auto consumer0_0a = - ops::Identity(scope.WithOpName("consumer0_0a"), replicate.outputs[0]); - auto consumer0_0b = - ops::Identity(scope.WithOpName("consumer0_0b"), replicate.outputs[0]); - auto consumer0_1 = - ops::Identity(scope.WithOpName("consumer0_1"), replicate.outputs[5]); - auto consumer1 = - ops::Identity(scope.WithOpName("consumer1"), replicate.outputs[6]); - auto consumer2 = - ops::Identity(scope.WithOpName("consumer2"), replicate.outputs[2]); - auto consumer3a = - ops::Identity(scope.WithOpName("consumer3a"), replicate.outputs[3]); - auto consumer3b = - ops::Identity(scope.WithOpName("consumer3b"), replicate.outputs[8]); - auto consumer4a = - ops::Identity(scope.WithOpName("consumer4a"), replicate.outputs[4]); - auto consumer4b = - ops::Identity(scope.WithOpName("consumer4b"), replicate.outputs[9]); - - GraphDef expected_def; - TF_ASSERT_OK(scope.ToGraphDef(&expected_def)); - - GraphDef actual_def; - graph->ToGraphDef(&actual_def); - TF_EXPECT_GRAPH_EQ(expected_def, actual_def); -} - -class ExtractOutsideCompilationByScope : public ::testing::TestWithParam { -}; - -Status PivotControlExists(const Node* node, const Node* pivot) { - for (const Edge* edge : node->in_edges()) { - if (edge->IsControlEdge() && (edge->src() == pivot)) { - return Status::OK(); - } - } - return errors::NotFound("Control edge with pivot not found."); -} - -TEST_P(ExtractOutsideCompilationByScope, - MoveHeadAndTailOutsideCompilationToHost) { - FunctionLibraryDefinition fld(OpRegistry::Global(), FunctionDefLibrary()); - - // Create FunctionLibraryRuntime. - SessionOptions session_options; - std::vector> devices; - TF_CHECK_OK(DeviceFactory::AddDevices( - session_options, "/job:localhost/replica:0/task:0", &devices)); - OptimizerOptions opts; - auto device_mgr = absl::make_unique(std::move(devices)); - auto pflr = absl::make_unique( - device_mgr.get(), Env::Default(), /*config=*/nullptr, - TF_GRAPH_DEF_VERSION, &fld, opts, - /*default_thread_pool=*/nullptr); - auto flr = pflr->GetFLR("/job:localhost/replica:0/task:0/cpu:0"); - - { - // Build TPU replicate function. - // arg0 = _Arg[index = 0, T = DT_STRING] - // arg1 = _Arg[index = 1, T = DT_INT32] - // arg2 = _Arg[index = 2, T = DT_RESOURCE] - // as_int = StringToNumber[out_type = DT_INT32](arg0) (oc node) - // add = Add(as_int, arg1) - // as_string = AsString(add) (oc node) - // read_var = ops::ReadVariableOp(arg2) - // ret0 = _RetVal[index = 0, T = DT_STRING](as_string) - // ret1 = _RetVal[index = 1, T = DT_INT32](add) - // ret2 = _RetVal[index = 1, T = DT_FLOAT](read_var) - Scope s = Scope::NewRootScope().ExitOnError(); - auto arg0 = ops::_Arg(s.WithOpName("arg0"), DT_STRING, 0); - auto arg1 = ops::_Arg(s.WithOpName("arg1"), DT_INT32, 1); - auto arg2 = ops::_Arg(s.WithOpName("arg2"), DT_RESOURCE, 2); - auto as_int = ops::StringToNumber(s.WithOpName("as_int"), arg0, - ops::StringToNumber::OutType(DT_INT32)); - auto add = ops::Add(s.WithOpName("add"), as_int, arg1); - auto as_string = ops::AsString(s.WithOpName("as_string"), add); - auto read_var = - ops::ReadVariableOp(s.WithOpName("ReadVar"), arg2, DT_FLOAT); - auto ret0 = ops::_Retval(s.WithOpName("ret0"), as_string, 0); - auto ret1 = ops::_Retval(s.WithOpName("ret1"), add, 1); - auto ret2 = ops::_Retval(s.WithOpName("ret2"), read_var, 2); - Graph g(OpRegistry::Global()); - TF_ASSERT_OK(s.ToGraph(&g)); - auto node_name_index = g.BuildNodeNameIndex(); - node_name_index["as_int"]->AddAttr("oc", "0"); - node_name_index["as_string"]->AddAttr("oc", "0"); - FunctionDef fdef; - TF_ASSERT_OK(GraphToFunctionDef(g, "cluster", &fdef)); - TF_ASSERT_OK(fld.AddFunctionDef(fdef)); - } - - string control_flow_scope = GetParam() ? "scope/" : ""; - string pivot_name = absl::StrCat(control_flow_scope, "tpu_replicate/pivot"); - Graph host_graph(OpRegistry::Global()); - NameAttrList function; - function.set_name("cluster"); - { - // Build host graph. - // input00 = Placeholder[T = DT_STRING] - // input01 = Placeholder[T = DT_INT32] - // input10 = Placeholder[T = DT_STRING] - // input11 = Placeholder[T = DT_INT32] - // input2 = Placeholder[T = DT_RESOURCE] - // tpu_replicate = _TPUReplicate(input00, input01, input10, input11) - // output = IdentityN(tpu_replicate, tpu_replicate:1, tpu_replicate:2, - // tpu_replicate:3, tpu_replicate:4, tpu_replicate:5) - Scope s = Scope::NewRootScope().ExitOnError(); - auto pivot = ops::NoOp(s.WithOpName(pivot_name)); - pivot.operation.node()->AddAttr("_pivot_for_cluster", "cluster"); - auto input00 = ops::Placeholder(s.WithOpName("input00"), DT_STRING); - auto input01 = ops::Placeholder(s.WithOpName("input01"), DT_INT32); - auto input10 = ops::Placeholder(s.WithOpName("input10"), DT_STRING); - auto input11 = ops::Placeholder(s.WithOpName("input11"), DT_INT32); - auto input2 = ops::Placeholder(s.WithOpName("input2"), DT_RESOURCE); - auto control_scope = s.WithControlDependencies({pivot}); - auto replicate = ops::_TPUReplicate( - control_scope.WithOpName("tpu_replicate"), - std::initializer_list{input00, input01, input10, input11, - input2}, - std::initializer_list{}, std::initializer_list{}, - std::initializer_list{}, function, - /*num_replicas=*/2, - {DT_STRING, DT_INT32, DT_FLOAT, DT_STRING, DT_INT32, DT_FLOAT}, - ops::_TPUReplicate::NumCoresPerReplica(1).NumDistributedVariables(1)); - auto output = ops::IdentityN( - s.WithOpName("output"), - std::initializer_list{ - replicate.outputs[0], replicate.outputs[1], replicate.outputs[2], - replicate.outputs[3], replicate.outputs[4], replicate.outputs[5]}); - TF_ASSERT_OK(s.ToGraph(&host_graph)); - } - auto node_name_index = host_graph.BuildNodeNameIndex(); - Node* replicate_node = node_name_index["tpu_replicate"]; - - std::unordered_map clusters; - clusters.emplace("cluster", - XlaClusterInfo{"cluster", function, replicate_node, - std::map{}}); - int lifted_arg_count = 0; - TF_ASSERT_OK(ExtractOutsideCompilationPass::ProcessHeadTailOutsideCompilation( - "oc", &lifted_arg_count, &clusters, &host_graph, flr, &fld)); - node_name_index = host_graph.BuildNodeNameIndex(); - replicate_node = node_name_index["tpu_replicate"]; - - { - // Check host graph. - const Edge* e; - Node* pivot = node_name_index[pivot_name]; - // Check that we have input00 -> as_int/R0 -> tpu_replicate. - Node* as_int_R0 = node_name_index["as_int_head_oc/R0"]; - EXPECT_NE(as_int_R0, nullptr); - TF_ASSERT_OK(as_int_R0->input_edge(0, &e)); - EXPECT_EQ(e->src(), node_name_index["input00"]); - TF_ASSERT_OK(replicate_node->input_edge(1, &e)); - EXPECT_EQ(e->src(), as_int_R0); - // Check that as_int/R0 has pivot as control input - TF_EXPECT_OK(PivotControlExists(as_int_R0, pivot)); - // Check that we have input10 -> as_int/R1 -> tpu_replicate. - Node* as_int_R1 = node_name_index["as_int_head_oc/R1"]; - EXPECT_NE(as_int_R1, nullptr); - TF_ASSERT_OK(as_int_R1->input_edge(0, &e)); - EXPECT_EQ(e->src(), node_name_index["input10"]); - TF_ASSERT_OK(replicate_node->input_edge(3, &e)); - EXPECT_EQ(e->src(), as_int_R1); - // Check that as_int/R0 has pivot as control input - TF_EXPECT_OK(PivotControlExists(as_int_R1, pivot)); - // Check that we have tpu_replicate -> as_string/R0 -> output. - Node* as_string_R0 = node_name_index["as_string_tail_oc/R0"]; - EXPECT_NE(as_string_R0, nullptr); - TF_ASSERT_OK(as_string_R0->input_edge(0, &e)); - EXPECT_EQ(e->src(), replicate_node); - TF_ASSERT_OK(node_name_index["output"]->input_edge(0, &e)); - EXPECT_EQ(e->src(), as_string_R0); - // Check that as_string/R0 has pivot as control input - TF_EXPECT_OK(PivotControlExists(as_string_R0, pivot)); - // Check that we have tpu_replicate -> as_string/R1 -> output. - Node* as_string_R1 = node_name_index["as_string_tail_oc/R1"]; - EXPECT_NE(as_string_R1, nullptr); - TF_ASSERT_OK(as_string_R1->input_edge(0, &e)); - EXPECT_EQ(e->src(), replicate_node); - TF_ASSERT_OK(node_name_index["output"]->input_edge(3, &e)); - EXPECT_EQ(e->src(), as_string_R1); - // Check that as_string/R1 has pivot as control input - TF_EXPECT_OK(PivotControlExists(as_string_R1, pivot)); - } - - { - // Check TPU graph. - const FunctionDef* fdef = fld.Find("cluster"); - EXPECT_NE(fdef, nullptr); - // Check its signature, should have 2 DT_INT32 inputs, 1 DT_RESOURCE input, - // 2 DT_INT32 outputs and 1 DT_FLOAT output. - EXPECT_EQ(fdef->signature().input_arg_size(), 3); - EXPECT_EQ(fdef->signature().input_arg(0).type(), DT_INT32); - EXPECT_EQ(fdef->signature().input_arg(1).type(), DT_INT32); - EXPECT_EQ(fdef->signature().input_arg(2).type(), DT_RESOURCE); - EXPECT_EQ(fdef->signature().output_arg_size(), 3); - EXPECT_EQ(fdef->signature().output_arg(0).type(), DT_INT32); - EXPECT_EQ(fdef->signature().output_arg(1).type(), DT_FLOAT); - EXPECT_EQ(fdef->signature().output_arg(2).type(), DT_INT32); - // Check that it has no StringToNumber/AsString op any more. - for (const NodeDef& node_def : fdef->node_def()) { - EXPECT_NE(node_def.op(), "StringToNumber"); - EXPECT_NE(node_def.op(), "AsString"); - } - } -} - -INSTANTIATE_TEST_SUITE_P(All, ExtractOutsideCompilationByScope, - ::testing::ValuesIn({true, false})); - -TEST(ExtractOutsideCompilation, RemoveArgRetvalPair) { - FunctionLibraryDefinition fld(OpRegistry::Global(), FunctionDefLibrary()); - - // Create FunctionLibraryRuntime. - SessionOptions session_options; - std::vector> devices; - TF_CHECK_OK(DeviceFactory::AddDevices( - session_options, "/job:localhost/replica:0/task:0", &devices)); - OptimizerOptions opts; - auto device_mgr = absl::make_unique(std::move(devices)); - auto pflr = absl::make_unique( - device_mgr.get(), Env::Default(), /*config=*/nullptr, - TF_GRAPH_DEF_VERSION, &fld, opts, - /*default_thread_pool=*/nullptr); - auto flr = pflr->GetFLR("/job:localhost/replica:0/task:0/cpu:0"); - - { - // Build TPU replicate function. - // arg0 = _Arg[index = 0, T = DT_STRING] - // arg1 = _Arg[index = 1, T = DT_FLOAT] - // arg2 = _Arg[index = 2, T = DT_INT32] - // arg3 = _Arg[index = 3, T = DT_RESOURCE] - // arg4 = _Arg[index = 4, T = DT_RESOURCE] - // add = Add(arg2, arg2) - // read = ReadVariableOp(arg4) - // ret0 = _RetVal[index = 0, T = DT_STRING](arg0) - // ret1 = _RetVal[index = 1, T = DT_INT32](add) - // ret2 = _RetVal[index = 2, T = DT_FLOAT](read) - // ret3 = _RetVal[index = 3, T = DT_RESOURCE](arg3) - Scope s = Scope::NewRootScope().ExitOnError(); - auto arg0 = ops::_Arg(s.WithOpName("arg0"), DT_STRING, 0); - auto arg1 = ops::_Arg(s.WithOpName("arg1"), DT_FLOAT, 1); - auto arg2 = ops::_Arg(s.WithOpName("arg2"), DT_INT32, 2); - auto arg3 = ops::_Arg(s.WithOpName("arg3"), DT_RESOURCE, 3); - auto arg4 = ops::_Arg(s.WithOpName("arg4"), DT_RESOURCE, 4); - auto add = ops::Add(s.WithOpName("add"), arg2, arg2); - auto ret0 = ops::_Retval(s.WithOpName("ret0"), arg0, 0); - auto ret1 = ops::_Retval(s.WithOpName("ret1"), add, 1); - auto read = ops::ReadVariableOp(s.WithOpName("read"), arg4, DT_FLOAT); - auto ret2 = ops::_Retval(s.WithOpName("ret2"), read, 2); - auto ret3 = ops::_Retval(s.WithOpName("ret3"), arg3, 3); - Graph g(OpRegistry::Global()); - TF_ASSERT_OK(s.ToGraph(&g)); - FunctionDef fdef; - TF_ASSERT_OK(GraphToFunctionDef(g, "cluster", &fdef)); - TF_ASSERT_OK(fld.AddFunctionDef(fdef)); - } - - Graph host_graph(OpRegistry::Global()); - NameAttrList function; - function.set_name("cluster"); - { - // Build host graph. - // input00 = Placeholder[T = DT_STRING] - // input01 = Placeholder[T = DT_FLOAT] - // input02 = Placeholder[T = DT_INT32] - // input10 = Placeholder[T = DT_STRING] - // input11 = Placeholder[T = DT_FLOAT] - // input12 = Placeholder[T = DT_INT32] - // input3 = Placeholder[T = DT_RESOURCE], distributed variable - // input4 = Placeholder[T = DT_RESOURCE], distributed variable - // tpu_replicate = _TPUReplicate(input00, input01, input02, input10, - // input11, input12, input3, input4) - // output = IdentityN(tpu_replicate, tpu_replicate:1, tpu_replicate:2, - // tpu_replicate:3, tpu_replicate:4, tpu_replicate:5, - // tpu_replicate:6, tpu_replicate:7) - Scope s = Scope::NewRootScope().ExitOnError(); - auto input00 = ops::Placeholder(s.WithOpName("input00"), DT_STRING); - auto input01 = ops::Placeholder(s.WithOpName("input01"), DT_FLOAT); - auto input02 = ops::Placeholder(s.WithOpName("input02"), DT_INT32); - auto input10 = ops::Placeholder(s.WithOpName("input10"), DT_STRING); - auto input11 = ops::Placeholder(s.WithOpName("input11"), DT_FLOAT); - auto input12 = ops::Placeholder(s.WithOpName("input12"), DT_INT32); - auto input3 = ops::Placeholder(s.WithOpName("input3"), DT_RESOURCE); - auto input4 = ops::Placeholder(s.WithOpName("input3"), DT_RESOURCE); - auto replicate = ops::_TPUReplicate( - s.WithOpName("tpu_replicate"), - std::initializer_list{input00, input01, input02, input10, - input11, input12, input3, input4}, - std::initializer_list{}, std::initializer_list{}, - std::initializer_list{}, function, - /*num_replicas=*/2, - {DT_STRING, DT_INT32, DT_FLOAT, DT_RESOURCE, DT_STRING, DT_INT32, - DT_FLOAT, DT_RESOURCE}, - ops::_TPUReplicate::NumCoresPerReplica(1).NumDistributedVariables(2)); - auto output = ops::IdentityN( - s.WithOpName("output"), - std::initializer_list{ - replicate.outputs[0], replicate.outputs[1], replicate.outputs[2], - replicate.outputs[3], replicate.outputs[4], replicate.outputs[5], - replicate.outputs[6], replicate.outputs[7]}); - TF_ASSERT_OK(s.ToGraph(&host_graph)); - } - auto node_name_index = host_graph.BuildNodeNameIndex(); - Node* replicate_node = node_name_index["tpu_replicate"]; - - std::unordered_map clusters; - clusters.emplace("cluster", - XlaClusterInfo{"cluster", function, replicate_node, - std::map{}}); - int lifted_arg_count = 0; - TF_ASSERT_OK(ExtractOutsideCompilationPass::ProcessHeadTailOutsideCompilation( - "oc", &lifted_arg_count, &clusters, &host_graph, flr, &fld)); - node_name_index = host_graph.BuildNodeNameIndex(); - replicate_node = node_name_index["tpu_replicate"]; - Node* output = node_name_index["output"]; - - EXPECT_EQ(replicate_node->num_inputs(), 3); - const DataTypeVector expected_input_types = {DT_INT32, DT_INT32, DT_RESOURCE}; - EXPECT_EQ(replicate_node->input_types(), expected_input_types); - EXPECT_EQ(replicate_node->num_outputs(), 4); - const DataTypeVector expected_output_types = {DT_INT32, DT_FLOAT, DT_INT32, - DT_FLOAT}; - EXPECT_EQ(replicate_node->output_types(), expected_output_types); - - { - // Check host graph. - Node* input_node; - // Check that we have input00 -> output:1. - TF_ASSERT_OK(output->input_node(0, &input_node)); - EXPECT_EQ(input_node->name(), "input00"); - // Check that we have input10 -> output:4. - TF_ASSERT_OK(output->input_node(4, &input_node)); - EXPECT_EQ(input_node->name(), "input10"); - // Check that we have input3 -> output:3, output:7. - TF_ASSERT_OK(output->input_node(3, &input_node)); - EXPECT_EQ(input_node->name(), "input3"); - TF_ASSERT_OK(output->input_node(7, &input_node)); - EXPECT_EQ(input_node->name(), "input3"); - } - - { - // Check TPU graph. - const FunctionDef* fdef = fld.Find("cluster"); - EXPECT_NE(fdef, nullptr); - // Check its signature, should have 1 DT_INT32 input, 1 DT_RESOURCE input, - // 1 DT_INT32 output and 1 DT_FLOAT output - EXPECT_EQ(fdef->signature().input_arg_size(), 2); - EXPECT_EQ(fdef->signature().input_arg(0).type(), DT_INT32); - EXPECT_EQ(fdef->signature().input_arg(1).type(), DT_RESOURCE); - EXPECT_EQ(fdef->signature().output_arg_size(), 2); - EXPECT_EQ(fdef->signature().output_arg(0).type(), DT_INT32); - EXPECT_EQ(fdef->signature().output_arg(1).type(), DT_FLOAT); - } -} - -} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/variable_merger_pass_test.cc b/tensorflow/core/tpu/graph_rewrite/variable_merger_pass_test.cc deleted file mode 100644 index 4241a481d09..00000000000 --- a/tensorflow/core/tpu/graph_rewrite/variable_merger_pass_test.cc +++ /dev/null @@ -1,205 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/core/tpu/graph_rewrite/variable_merger_pass.h" - -#include -#include - -#include "tensorflow/cc/framework/ops.h" -#include "tensorflow/cc/ops/resource_variable_ops.h" -#include "tensorflow/cc/ops/standard_ops.h" -#include "tensorflow/core/framework/partial_tensor_shape.h" -#include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/platform/test.h" -#include "tensorflow/core/util/equal_graph_def.h" - -namespace tensorflow { -namespace { - -TEST(VarHandleMerger, SimpleMergesWork) { - std::unique_ptr graph(new Graph(OpRegistry::Global())); - { - Scope scope = Scope::NewRootScope().ExitOnError(); - auto v = ops::VarHandleOp( - scope.WithOpName("V"), DT_FLOAT, TensorShape({}), - ops::VarHandleOp::Attrs().Container("c1").SharedName("n1")); - auto w = ops::VarHandleOp( - scope.WithOpName("W"), DT_INT32, TensorShape({77}), - ops::VarHandleOp::Attrs().Container("c2").SharedName("n2")); - auto v_read = ops::ReadVariableOp(scope.WithOpName("VRead"), v, DT_FLOAT); - auto w_read = ops::ReadVariableOp(scope.WithOpName("WRead"), w, DT_INT32); - auto w_cast = ops::Cast(scope.WithOpName("Cast"), w_read, DT_FLOAT); - ops::Sub(scope.WithOpName("Sub"), v_read, w_cast); - TF_ASSERT_OK(scope.ToGraph(graph.get())); - } - - VariableMergerPass merger; - GraphOptimizationPassOptions options; - options.graph = &graph; - TF_ASSERT_OK(merger.Run(options)); - GraphDef actual; - graph->ToGraphDef(&actual); - - GraphDef expected; - { - Scope scope = Scope::NewRootScope().ExitOnError(); - auto handles = ops::_VarHandlesOp( - scope.WithOpName("VarHandles_10315266686041849873/_0"), - /*containers=*/{"c1", "c2"}, - /*shared_names=*/{"n1", "n2"}, /*N=*/2, /*dtypes=*/{DT_FLOAT, DT_INT32}, - /*shapes=*/{TensorShape({}), TensorShape({77})}); - auto read = ops::_ReadVariablesOp( - scope.WithOpName("ReadVariables_13269360303885824085/_1"), - /*resources=*/{handles[0], handles[1]}, - /*dtypes=*/{DT_FLOAT, DT_INT32}); - auto w_cast = ops::Cast(scope.WithOpName("Cast"), read[1], DT_FLOAT); - ops::Sub(scope.WithOpName("Sub"), read[0], w_cast); - TF_ASSERT_OK(scope.ToGraphDef(&expected)); - } - - TF_EXPECT_GRAPH_EQ(expected, actual); -} - -TEST(VarHandleMerger, VarHandlesWithControlDepsDontMerge) { - std::unique_ptr graph(new Graph(OpRegistry::Global())); - { - Scope scope = Scope::NewRootScope().ExitOnError(); - auto v = ops::VarHandleOp( - scope.WithOpName("V"), DT_FLOAT, TensorShape({}), - ops::VarHandleOp::Attrs().Container("c1").SharedName("n1")); - auto w = ops::VarHandleOp( - scope.WithOpName("W").WithControlDependencies(v), DT_INT32, - TensorShape({77}), - ops::VarHandleOp::Attrs().Container("c2").SharedName("n2")); - TF_ASSERT_OK(scope.ToGraph(graph.get())); - } - - GraphDef expected; - graph->ToGraphDef(&expected); - - VariableMergerPass merger; - GraphOptimizationPassOptions options; - options.graph = &graph; - TF_ASSERT_OK(merger.Run(options)); - GraphDef actual; - graph->ToGraphDef(&actual); - - TF_EXPECT_GRAPH_EQ(expected, actual); -} - -TEST(VarHandleMerger, ReadVariableOpsWithDifferentControlDepsDontMerge) { - std::unique_ptr graph(new Graph(OpRegistry::Global())); - { - Scope scope = Scope::NewRootScope().ExitOnError(); - auto w = ops::VarHandleOp( - scope.WithOpName("W"), DT_INT32, TensorShape({77}), - ops::VarHandleOp::Attrs().Container("c2").SharedName("n2")); - auto v = ops::VarHandleOp( - scope.WithOpName("V"), DT_FLOAT, TensorShape({}), - ops::VarHandleOp::Attrs().Container("c1").SharedName("n1")); - auto w_read = ops::ReadVariableOp(scope.WithOpName("WRead"), w, DT_INT32); - auto w_cast = ops::Cast(scope.WithOpName("Cast"), w_read, DT_FLOAT); - auto v_read = ops::ReadVariableOp( - scope.WithOpName("VRead").WithControlDependencies(w_cast), v, DT_FLOAT); - ops::Sub(scope.WithOpName("Sub"), v_read, w_cast); - TF_ASSERT_OK(scope.ToGraph(graph.get())); - } - - VariableMergerPass merger; - GraphOptimizationPassOptions options; - options.graph = &graph; - TF_ASSERT_OK(merger.Run(options)); - GraphDef actual; - graph->ToGraphDef(&actual); - - GraphDef expected; - { - Scope scope = Scope::NewRootScope().ExitOnError(); - auto handles = ops::_VarHandlesOp( - scope.WithOpName("VarHandles_10315266686041849873/_0"), - /*containers=*/{"c1", "c2"}, - /*shared_names=*/{"n1", "n2"}, /*N=*/2, /*dtypes=*/{DT_FLOAT, DT_INT32}, - /*shapes=*/{TensorShape({}), TensorShape({77})}); - auto w_read = - ops::ReadVariableOp(scope.WithOpName("WRead"), handles[1], DT_INT32); - auto w_cast = ops::Cast(scope.WithOpName("Cast"), w_read, DT_FLOAT); - auto v_read = ops::ReadVariableOp( - scope.WithOpName("VRead").WithControlDependencies(w_cast), handles[0], - DT_FLOAT); - ops::Sub(scope.WithOpName("Sub"), v_read, w_cast); - TF_ASSERT_OK(scope.ToGraphDef(&expected)); - } - - TF_EXPECT_GRAPH_EQ(expected, actual); -} - -TEST(VarHandleMerger, ReadVariableOpsWithSameControlDepsMerge) { - std::unique_ptr graph(new Graph(OpRegistry::Global())); - { - Scope scope = Scope::NewRootScope().ExitOnError(); - auto u = ops::VarHandleOp( - scope.WithOpName("U"), DT_FLOAT, TensorShape({}), - ops::VarHandleOp::Attrs().Container("c1").SharedName("n1")); - auto v = ops::VarHandleOp( - scope.WithOpName("V"), DT_FLOAT, TensorShape({}), - ops::VarHandleOp::Attrs().Container("c2").SharedName("n2")); - auto w = ops::VarHandleOp( - scope.WithOpName("W"), DT_INT32, TensorShape({77}), - ops::VarHandleOp::Attrs().Container("c3").SharedName("n3")); - - auto w_read = ops::ReadVariableOp(scope.WithOpName("WRead"), w, DT_INT32); - auto w_cast = ops::Cast(scope.WithOpName("Cast"), w_read, DT_FLOAT); - auto v_read = ops::ReadVariableOp( - scope.WithOpName("VRead").WithControlDependencies(w_cast), v, DT_FLOAT); - auto u_read = ops::ReadVariableOp( - scope.WithOpName("URead").WithControlDependencies(w_cast), u, DT_FLOAT); - auto d = ops::Sub(scope.WithOpName("Sub"), v_read, w_cast); - ops::Sub(scope.WithOpName("Add"), d, u_read); - TF_ASSERT_OK(scope.ToGraph(graph.get())); - } - - VariableMergerPass merger; - GraphOptimizationPassOptions options; - options.graph = &graph; - TF_ASSERT_OK(merger.Run(options)); - GraphDef actual; - graph->ToGraphDef(&actual); - - GraphDef expected; - { - Scope scope = Scope::NewRootScope().ExitOnError(); - auto handles = ops::_VarHandlesOp( - scope.WithOpName("VarHandles_15520412301618992443/_0"), - /*containers=*/{"c1", "c2", "c3"}, - /*shared_names=*/{"n1", "n2", "n3"}, /*N=*/3, - /*dtypes=*/{DT_FLOAT, DT_FLOAT, DT_INT32}, - /*shapes=*/{TensorShape({}), TensorShape({}), TensorShape({77})}); - auto w_read = - ops::ReadVariableOp(scope.WithOpName("WRead"), handles[2], DT_INT32); - auto w_cast = ops::Cast(scope.WithOpName("Cast"), w_read, DT_FLOAT); - auto read = ops::_ReadVariablesOp( - scope.WithOpName("ReadVariables_8281595736094071329/_1") - .WithControlDependencies(w_cast), - /*resources=*/{handles[0], handles[1]}, - /*dtypes=*/{DT_FLOAT, DT_FLOAT}); - auto d = ops::Sub(scope.WithOpName("Sub"), read[1], w_cast); - ops::Sub(scope.WithOpName("Add"), d, read[0]); - TF_ASSERT_OK(scope.ToGraphDef(&expected)); - } - - TF_EXPECT_GRAPH_EQ(expected, actual); -} -} // namespace -} // namespace tensorflow From 9c4aaa21db2c8133e41e7bfdbe3a09d6e055af23 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Tue, 14 Jul 2020 18:08:16 -0700 Subject: [PATCH 0443/2522] Internal cleanup: consolidate and simplify tests. PiperOrigin-RevId: 321274479 Change-Id: I3399f4f60b5ef2c9c74911f0bdbc0ae556bb11ec --- tensorflow/python/autograph/impl/BUILD | 22 +++- .../python/autograph/impl/api_py3_test.py | 84 +++++++++++++ tensorflow/python/autograph/impl/api_test.py | 115 ++++++------------ 3 files changed, 140 insertions(+), 81 deletions(-) create mode 100644 tensorflow/python/autograph/impl/api_py3_test.py diff --git a/tensorflow/python/autograph/impl/BUILD b/tensorflow/python/autograph/impl/BUILD index ae7f610ea48..a21b0df2ce8 100644 --- a/tensorflow/python/autograph/impl/BUILD +++ b/tensorflow/python/autograph/impl/BUILD @@ -37,11 +37,9 @@ py_library( ], ) -py_test( +tf_py_test( name = "api_test", srcs = ["api_test.py"], - python_version = "PY3", - srcs_version = "PY3", deps = [ ":impl", "//tensorflow/python:client_testlib", @@ -51,6 +49,24 @@ py_test( ], ) +py_test( + name = "api_py3_test", + srcs = ["api_py3_test.py"], + python_version = "PY3", + srcs_version = "PY3", + tags = [ + "no_oss_py2", + "no_pip", + "nopip", + ], + deps = [ + ":impl", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python/autograph/core", + ], +) + tf_py_test( name = "conversion_test", srcs = ["conversion_test.py"], diff --git a/tensorflow/python/autograph/impl/api_py3_test.py b/tensorflow/python/autograph/impl/api_py3_test.py new file mode 100644 index 00000000000..c460e478008 --- /dev/null +++ b/tensorflow/python/autograph/impl/api_py3_test.py @@ -0,0 +1,84 @@ +# python3 +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for api module.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import os + +from tensorflow.python.autograph.core import converter +from tensorflow.python.autograph.impl import api +from tensorflow.python.framework import constant_op +from tensorflow.python.platform import test + +DEFAULT_RECURSIVE = converter.ConversionOptions(recursive=True) + + +class ApiTest(test.TestCase): + + def test_converted_call_kwonly_args(self): + + def test_fn(*, a): + return a + + x = api.converted_call( + test_fn, (), {'a': constant_op.constant(-1)}, options=DEFAULT_RECURSIVE) + self.assertEqual(-1, self.evaluate(x)) + + def test_super_with_no_arg(self): + test_case_self = self + + class TestBase: + + def plus_three(self, x): + return x + 3 + + class TestSubclass(TestBase): + + def plus_three(self, x): + test_case_self.fail('This should never be called.') + + def no_arg(self, x): + return super().plus_three(x) + + tc = api.converted_call(TestSubclass, (), None, options=DEFAULT_RECURSIVE) + + self.assertEqual(5, tc.no_arg(2)) + + def test_converted_call_avoids_triggering_operators(self): + + test_self = self + + class Pair(collections.namedtuple('Pair', ['a', 'b'])): + + def __call__(self): + return self.a + self.b + + def __eq__(self, other): + test_self.fail('Triggered operator') + + p = Pair(constant_op.constant(1), constant_op.constant(2)) + + x = api.converted_call(p, (), {}, options=DEFAULT_RECURSIVE) + self.assertIsNotNone(self.evaluate(x), 3) + + +if __name__ == '__main__': + os.environ['AUTOGRAPH_STRICT_CONVERSION'] = '1' + test.main() diff --git a/tensorflow/python/autograph/impl/api_test.py b/tensorflow/python/autograph/impl/api_test.py index ad7e8e9fb37..5b885af43ac 100644 --- a/tensorflow/python/autograph/impl/api_test.py +++ b/tensorflow/python/autograph/impl/api_test.py @@ -33,6 +33,7 @@ import types import numpy as np import six +from tensorflow.python.autograph import utils from tensorflow.python.autograph.core import ag_ctx from tensorflow.python.autograph.core import converter from tensorflow.python.autograph.core import converter_testing @@ -46,15 +47,15 @@ from tensorflow.python.data.ops import dataset_ops from tensorflow.python.eager import def_function from tensorflow.python.eager import function from tensorflow.python.framework import constant_op -from tensorflow.python.framework import ops from tensorflow.python.framework import test_util -from tensorflow.python.ops import math_ops +from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.util import function_utils from tensorflow.python.util import tf_decorator from tensorflow.python.util import tf_inspect +tf = utils.fake_tf() global_n = 2 @@ -89,52 +90,6 @@ class ApiTest(test.TestCase): self.assertEmpty( tuple(o for o in objects_after if isinstance(o, TestResource))) - def test_converted_call_kwonly_args(self): - - def test_fn(*, a): - return a - - x = api.converted_call( - test_fn, (), {'a': constant_op.constant(-1)}, options=DEFAULT_RECURSIVE) - self.assertEqual(-1, self.evaluate(x)) - - def test_super_with_no_arg(self): - test_case_self = self - - class TestBase: - - def plus_three(self, x): - return x + 3 - - class TestSubclass(TestBase): - - def plus_three(self, x): - test_case_self.fail('This should never be called.') - - def no_arg(self, x): - return super().plus_three(x) - - tc = api.converted_call(TestSubclass, (), None, options=DEFAULT_RECURSIVE) - - self.assertEqual(5, tc.no_arg(2)) - - def test_converted_call_avoids_triggering_operators(self): - - test_self = self - - class Pair(collections.namedtuple('Pair', ['a', 'b'])): - - def __call__(self): - return self.a + self.b - - def __eq__(self, other): - test_self.fail('Triggered operator') - - p = Pair(constant_op.constant(1), constant_op.constant(2)) - - x = api.converted_call(p, (), {}, options=DEFAULT_RECURSIVE) - self.assertIsNotNone(self.evaluate(x), 3) - @test_util.run_deprecated_v1 def test_decorator_recursive(self): @@ -147,15 +102,16 @@ class ApiTest(test.TestCase): @api.convert(recursive=True) def test_method(self, x, s, a): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x //= self.called_member(a) return x tc = TestClass() - x = tc.test_method( - constant_op.constant([2, 4]), constant_op.constant(1), - constant_op.constant(-2)) - self.assertListEqual([0, 1], self.evaluate(x).tolist()) + with self.cached_session() as sess: + x = tc.test_method( + constant_op.constant([2, 4]), constant_op.constant(1), + constant_op.constant(-2)) + self.assertListEqual([0, 1], self.evaluate(x).tolist()) @test_util.run_deprecated_v1 def test_decorator_not_recursive(self): @@ -163,19 +119,20 @@ class ApiTest(test.TestCase): class TestClass(object): def called_member(self, a): - return math_ops.negative(a) + return tf.negative(a) @api.convert(recursive=False) def test_method(self, x, s, a): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x //= self.called_member(a) return x tc = TestClass() - x = tc.test_method( - constant_op.constant([2, 4]), constant_op.constant(1), - constant_op.constant(-2)) - self.assertListEqual([0, 1], self.evaluate(x).tolist()) + with self.cached_session() as sess: + x = tc.test_method( + constant_op.constant([2, 4]), constant_op.constant(1), + constant_op.constant(-2)) + self.assertListEqual([0, 1], self.evaluate(x).tolist()) @test_util.run_deprecated_v1 def test_convert_then_do_not_convert(self): @@ -184,11 +141,11 @@ class ApiTest(test.TestCase): @api.do_not_convert def called_member(self, a): - return math_ops.negative(a) + return tf.negative(a) @api.convert(recursive=True) def test_method(self, x, s, a): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x //= self.called_member(a) return x @@ -211,15 +168,16 @@ class ApiTest(test.TestCase): @api.convert(recursive=True) def test_method(self, x, s, a): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x //= self.called_member(a) return x tc = TestClass() - x = tc.test_method( - constant_op.constant([2, 4]), constant_op.constant(1), - constant_op.constant(-2)) - self.assertListEqual([0, 1], self.evaluate(x).tolist()) + with self.cached_session() as sess: + x = tc.test_method( + constant_op.constant([2, 4]), constant_op.constant(1), + constant_op.constant(-2)) + self.assertListEqual([0, 1], self.evaluate(x).tolist()) def test_decorator_preserves_argspec(self): @@ -276,7 +234,7 @@ class ApiTest(test.TestCase): @api.convert(recursive=True) def test_method(self, x, s, a): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x //= api.converted_call( self.called_member, (a,), None, options=DEFAULT_RECURSIVE) return x @@ -686,7 +644,7 @@ class ApiTest(test.TestCase): opts = converter.ConversionOptions( user_requested=True, optional_features=None) - x = api.converted_call(math_ops.add, (1, 1), None, options=opts) + x = api.converted_call(gen_math_ops.add, (1, 1), None, options=opts) self.assertAllEqual(self.evaluate(x), 2) @@ -727,7 +685,7 @@ class ApiTest(test.TestCase): class TestClass(collections.namedtuple('TestNamedtuple', ('a', 'b'))): def test_method(self, x): - while math_ops.reduce_sum(x) > self.a: + while tf.reduce_sum(x) > self.a: x //= self.b return x @@ -755,7 +713,7 @@ class ApiTest(test.TestCase): class TestClass(collections.namedtuple('TestNamedtuple', ('a', 'b'))): def test_method(self, x): - while math_ops.reduce_sum(x) > self.a: + while tf.reduce_sum(x) > self.a: x //= self.b return x @@ -828,7 +786,7 @@ class ApiTest(test.TestCase): def f(): return dataset_ops.Dataset.range(-3, 3).map(other_fn) - # Dataset iteration only works inside math_ops. + # Dataset iteration only works inside tf. @def_function.function def graph_fn(): ds = api.converted_call(f, (), None, options=DEFAULT_RECURSIVE) @@ -954,13 +912,13 @@ class ApiTest(test.TestCase): def test_to_graph_basic(self): def test_fn(x, s): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x //= 2 return x compiled_fn = api.to_graph(test_fn) - with ops.Graph().as_default(): + with tf.Graph().as_default(): x = compiled_fn(constant_op.constant((4, 8)), 4) self.assertAllEqual(self.evaluate(x), (1, 2)) @@ -970,14 +928,15 @@ class ApiTest(test.TestCase): foo = 4 def test_fn(x, s=foo): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x //= 2 return x compiled_fn = api.to_graph(test_fn) - x = compiled_fn(constant_op.constant([4, 8])) - self.assertListEqual([1, 2], self.evaluate(x).tolist()) + with self.cached_session() as sess: + x = compiled_fn(constant_op.constant([4, 8])) + self.assertListEqual([1, 2], self.evaluate(x).tolist()) def test_to_graph_with_globals(self): @@ -1097,7 +1056,7 @@ class ApiTest(test.TestCase): def test_to_code_basic(self): def test_fn(x, s): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x /= 2 return x @@ -1108,7 +1067,7 @@ class ApiTest(test.TestCase): @def_function.function def test_fn(x, s): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x /= 2 return x From 5260ff1352d40ca3b75179150fefaa5fafbc8b93 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 14 Jul 2020 18:37:28 -0700 Subject: [PATCH 0444/2522] Remove the alias for tensorflow/core/framework:tensor_testutil PiperOrigin-RevId: 321277979 Change-Id: Icba7c61ed06a4339be284541bfbb67ac601d61be --- tensorflow/core/distributed_runtime/BUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/distributed_runtime/BUILD b/tensorflow/core/distributed_runtime/BUILD index ae1253e7dc6..30512295a7e 100644 --- a/tensorflow/core/distributed_runtime/BUILD +++ b/tensorflow/core/distributed_runtime/BUILD @@ -79,9 +79,9 @@ tf_cc_test( deps = [ ":message_wrappers", "//tensorflow/core:protos_all_cc", - "//tensorflow/core:tensor_testutil", "//tensorflow/core:test", "//tensorflow/core:test_main", + "//tensorflow/core/framework:tensor_testutil", ], ) @@ -262,10 +262,10 @@ tf_cc_test( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", - "//tensorflow/core:tensor_testutil", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:worker_proto_cc", + "//tensorflow/core/framework:tensor_testutil", ], ) From d670379085561f5ee3a3e96a8e27ae2b0593c966 Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Wed, 15 Jul 2020 11:49:11 +1000 Subject: [PATCH 0445/2522] [Grappler] Fix bug in UpdateSqueezeDims - The dim logic in generic_layout_transposer's SqueezeTransposer was incorrect, which caused two problems: - Negative dims were transformed incorrectly. - The dim check incorrectly failed when dim = rank - 1 (which can occur when transforming NCHW to NHWC). - Fixes the dim logic and adds tests to check these cases. --- .../generic_layout_optimizer_transposer.cc | 10 +- ...eneric_layout_optimizer_transposer_test.cc | 127 ++++++++++++++++++ 2 files changed, 132 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc index bedd16a18ce..a3449621405 100644 --- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc +++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer.cc @@ -1493,21 +1493,21 @@ Status SqueezeTransposer::UpdateSqueezeDims(TransposeContext* context, if (squeeze_dims_attr == nullptr) { return errors::InvalidArgument("Missing attribute ", kAttrSqueezeDims); } - const int max_num_squeeze_dim = context->src_format.length() - 1; - const int min_squeeze_dim = -(max_num_squeeze_dim + 1); + const int num_input_dims = context->src_format.length(); + const int min_squeeze_dim = -num_input_dims; std::vector squeeze_dims_mapped; const int squeeze_dims_size = squeeze_dims_attr->list().i_size(); squeeze_dims_mapped.reserve(squeeze_dims_size); for (int i = 0; i < squeeze_dims_size; ++i) { int dim = squeeze_dims_attr->list().i(i); - if (dim < min_squeeze_dim || dim >= max_num_squeeze_dim) { + if (dim < min_squeeze_dim || dim >= num_input_dims) { return errors::InvalidArgument( "Attribute '", kAttrSqueezeDims, "' contains out of range index '", dim, "', index must be between [", min_squeeze_dim, ", ", - max_num_squeeze_dim, ")"); + num_input_dims, ")"); } if (dim < 0) { - dim += max_num_squeeze_dim; + dim += num_input_dims; } squeeze_dims_mapped.push_back(context->dst_to_src[dim]); } diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer_test.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer_test.cc index 90e96fec673..bf938b650bf 100644 --- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer_test.cc +++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer_transposer_test.cc @@ -2137,6 +2137,133 @@ TEST_F(TransposerTest, SqueezeTransposerTestSqueezeDimsUpdated) { VerifyRegularFaninMatch(z_output_node, 0, squeeze_node->GetName(), 0); } +// Same as SqueezeTransposerTestSqueezeDimsUpdated but with squeeze dims +// specified with negative values. +TEST_F(TransposerTest, SqueezeTransposerTestNegativeSqueezeDimsUpdated) { +#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM) + GTEST_SKIP() << "Neither CUDA nor ROCm is enabled"; +#endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM) + GrapplerItem item; + Scope scope = Scope::NewRootScope(); + auto input = + ops::RandomUniform(scope.WithOpName("input"), {1, 1, 1, 8}, DT_FLOAT); + auto filter = + ops::RandomUniform(scope.WithOpName("filter"), {1, 1, 8, 1}, DT_FLOAT); + auto conv2d = ops::Conv2D( + scope.WithOpName("conv2d").WithDevice("/device:GPU:0"), input, filter, + {1, 1, 1, 1}, "SAME", ops::Conv2D::DataFormat(kSrcFormat)); + + auto squeeze_op = + ops::Squeeze(scope.WithOpName("squeeze").WithDevice("/device:GPU:0"), + conv2d, ops::Squeeze::Attrs().Axis({-3, -2})); + auto z = ops::Identity(scope.WithOpName("z"), squeeze_op); + TF_ASSERT_OK(scope.ToGraphDef(&item.graph)); + TransposeContext context; + TF_ASSERT_OK(TransposeContext::InitializeTransposeContext( + item, virtual_cluster_.get(), &context)); + context.AssignDeviceAndDataFormats(kGPU, kSrcFormat, kDstFormat); + + DefaultLayoutSensitiveOpTransposer conv2d_transposer; + auto* c2d = context.graph_view->GetNode("conv2d"); + ASSERT_NE(c2d, nullptr); + TF_ASSERT_OK(conv2d_transposer.TransposeNode(&context, c2d)); + + SqueezeTransposer squeeze_transposer; + auto* squeeze = context.graph_view->GetNode("squeeze"); + ASSERT_NE(squeeze, nullptr); + TF_ASSERT_OK(squeeze_transposer.TransposeNode(&context, squeeze)); + + auto* input_transpose_node1 = context.graph_view->GetNode( + "squeeze-0-TransposeNHWCToNCHW-LayoutOptimizer"); + ASSERT_NE(input_transpose_node1, nullptr); + ASSERT_EQ(input_transpose_node1->NumRegularFanins(), 2); + VerifyRegularFaninMatch(input_transpose_node1, 0, + "conv2d-0-0-TransposeNCHWToNHWC-LayoutOptimizer", 0); + + auto* squeeze_node = context.graph_view->GetNode("squeeze"); + ASSERT_NE(squeeze_node, nullptr); + ASSERT_EQ(squeeze_node->NumRegularFanins(), 1); + VerifyRegularFaninMatch(squeeze_node, 0, input_transpose_node1->GetName(), 0); + const auto* squeeze_dims_attr = squeeze_node->GetAttr("squeeze_dims"); + const auto& list = squeeze_dims_attr->list(); + ASSERT_EQ(list.i_size(), 2); + EXPECT_EQ(list.i(0), 2); + EXPECT_EQ(list.i(1), 3); + + auto* output_transpose_node = context.graph_view->GetNode( + "squeeze-0-0-TransposeNCHWToNHWC-LayoutOptimizer"); + EXPECT_EQ(output_transpose_node, nullptr); + + auto* z_output_node = context.graph_view->GetNode("z"); + ASSERT_NE(z_output_node, nullptr); + ASSERT_EQ(z_output_node->NumRegularFanins(), 1); + VerifyRegularFaninMatch(z_output_node, 0, squeeze_node->GetName(), 0); +} + +// Same as SqueezeTransposerTestSqueezeDimsUpdated but with the source and +// destination formats swapped (as is used in some cases when the data type is +// DT_HALF). +TEST_F(TransposerTest, SqueezeTransposerTestNCHWToNHWCSqueezeDimsUpdated) { +#if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM) + GTEST_SKIP() << "Neither CUDA nor ROCm is enabled"; +#endif // !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM) + GrapplerItem item; + Scope scope = Scope::NewRootScope(); + auto input = + ops::RandomUniform(scope.WithOpName("input"), {1, 8, 1, 1}, DT_FLOAT); + auto filter = + ops::RandomUniform(scope.WithOpName("filter"), {1, 1, 8, 1}, DT_FLOAT); + auto conv2d = ops::Conv2D( + scope.WithOpName("conv2d").WithDevice("/device:GPU:0"), input, filter, + {1, 1, 1, 1}, "SAME", ops::Conv2D::DataFormat(kDstFormat)); + + auto squeeze_op = + ops::Squeeze(scope.WithOpName("squeeze").WithDevice("/device:GPU:0"), + conv2d, ops::Squeeze::Attrs().Axis({2, 3})); + auto z = ops::Identity(scope.WithOpName("z"), squeeze_op); + TF_ASSERT_OK(scope.ToGraphDef(&item.graph)); + TransposeContext context; + TF_ASSERT_OK(TransposeContext::InitializeTransposeContext( + item, virtual_cluster_.get(), &context)); + context.AssignDeviceAndDataFormats(kGPU, kDstFormat, kSrcFormat); + + DefaultLayoutSensitiveOpTransposer conv2d_transposer; + auto* c2d = context.graph_view->GetNode("conv2d"); + ASSERT_NE(c2d, nullptr); + TF_ASSERT_OK(conv2d_transposer.TransposeNode(&context, c2d)); + + SqueezeTransposer squeeze_transposer; + auto* squeeze = context.graph_view->GetNode("squeeze"); + ASSERT_NE(squeeze, nullptr); + TF_ASSERT_OK(squeeze_transposer.TransposeNode(&context, squeeze)); + + auto* input_transpose_node1 = context.graph_view->GetNode( + "squeeze-0-TransposeNCHWToNHWC-LayoutOptimizer"); + ASSERT_NE(input_transpose_node1, nullptr); + ASSERT_EQ(input_transpose_node1->NumRegularFanins(), 2); + VerifyRegularFaninMatch(input_transpose_node1, 0, + "conv2d-0-0-TransposeNHWCToNCHW-LayoutOptimizer", 0); + + auto* squeeze_node = context.graph_view->GetNode("squeeze"); + ASSERT_NE(squeeze_node, nullptr); + ASSERT_EQ(squeeze_node->NumRegularFanins(), 1); + VerifyRegularFaninMatch(squeeze_node, 0, input_transpose_node1->GetName(), 0); + const auto* squeeze_dims_attr = squeeze_node->GetAttr("squeeze_dims"); + const auto& list = squeeze_dims_attr->list(); + ASSERT_EQ(list.i_size(), 2); + EXPECT_EQ(list.i(0), 1); + EXPECT_EQ(list.i(1), 2); + + auto* output_transpose_node = context.graph_view->GetNode( + "squeeze-0-0-TransposeNHWCToNCHW-LayoutOptimizer"); + EXPECT_EQ(output_transpose_node, nullptr); + + auto* z_output_node = context.graph_view->GetNode("z"); + ASSERT_NE(z_output_node, nullptr); + ASSERT_EQ(z_output_node->NumRegularFanins(), 1); + VerifyRegularFaninMatch(z_output_node, 0, squeeze_node->GetName(), 0); +} + TEST_F(TransposerTest, MaxPoolV2Transposer) { #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM) GTEST_SKIP() << "Neither CUDA nor ROCm is enabled"; From 5f967321df70a90cef61242f259a16b3a84f64da Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 14 Jul 2020 18:45:45 -0700 Subject: [PATCH 0446/2522] Allow selective registration to take a arbitrary dependency. Teams sharing the same repo can provide their own ops_to_register.h file via this mechanism. PiperOrigin-RevId: 321278953 Change-Id: Ic6a3cee1814aa0b505a4b0cd626f464f2da73743 --- tensorflow/core/framework/BUILD | 4 ++++ tensorflow/tensorflow.bzl | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/tensorflow/core/framework/BUILD b/tensorflow/core/framework/BUILD index fd27064df6e..3aa0681c8ff 100644 --- a/tensorflow/core/framework/BUILD +++ b/tensorflow/core/framework/BUILD @@ -20,6 +20,9 @@ load( "cc_library", ) +# buildifier: disable=same-origin-load +load("//tensorflow:tensorflow.bzl", "tf_selective_registration_deps") + package( default_visibility = [ "//tensorflow/core:__subpackages__", @@ -937,6 +940,7 @@ cc_library( cc_library( name = "selective_registration", hdrs = ["selective_registration.h"], + deps = tf_selective_registration_deps(), ) cc_library( diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index b6bb0e37c94..e5c29d26f27 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -2907,6 +2907,11 @@ def tf_monitoring_python_deps(): "//conditions:default": [], }) +# Teams sharing the same repo can provide their own ops_to_register.h file using +# this function, and pass in -Ipath/to/repo flag when building the target. +def tf_selective_registration_deps(): + return [] + def tf_jit_compilation_passes_extra_deps(): return [] From 28d4d7306f1273edd17fcb9ce887b0d356390333 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 14 Jul 2020 18:52:40 -0700 Subject: [PATCH 0447/2522] Remove all BUILD aliases to tensorflow/core/framework:bounds_check Use the actual rule directly everywhere. PiperOrigin-RevId: 321279700 Change-Id: I30297d53fc6a27774ee55aae9047ecc9d489f55c --- tensorflow/core/kernels/neon/BUILD | 2 +- tensorflow/core/kernels/special_math/BUILD | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/neon/BUILD b/tensorflow/core/kernels/neon/BUILD index 724051471d9..0a9bd5a7fd5 100644 --- a/tensorflow/core/kernels/neon/BUILD +++ b/tensorflow/core/kernels/neon/BUILD @@ -23,7 +23,7 @@ tf_kernel_library( "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core/kernels:bounds_check", + "//tensorflow/core/framework:bounds_check", "//tensorflow/core/kernels:ops_util", "@gemmlowp", ], diff --git a/tensorflow/core/kernels/special_math/BUILD b/tensorflow/core/kernels/special_math/BUILD index d659bdfa5dd..9a49f8263e4 100644 --- a/tensorflow/core/kernels/special_math/BUILD +++ b/tensorflow/core/kernels/special_math/BUILD @@ -20,7 +20,7 @@ tf_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:math_grad", - "//tensorflow/core/kernels:bounds_check", + "//tensorflow/core/framework:bounds_check", "//tensorflow/core/kernels:cwise_op", "//tensorflow/core/kernels:fill_functor", "//third_party/eigen3", From c2de6d4be00db7bd5fe8d43a2c18b978ee0eaa73 Mon Sep 17 00:00:00 2001 From: Haifeng Jin Date: Tue, 14 Jul 2020 19:05:41 -0700 Subject: [PATCH 0448/2522] Remove the private API usage for run_in_graph_and_eager_modes PiperOrigin-RevId: 321281150 Change-Id: I7d6b4609630dfb4ea081236d8b2d0471076ac9d8 --- tensorflow/python/keras/feature_column/BUILD | 2 + .../feature_column/dense_features_test.py | 6 +- .../feature_column/dense_features_v2_test.py | 6 +- .../sequence_feature_column_test.py | 15 +- .../mixed_precision_graph_rewrite_test.py | 11 +- tensorflow/python/keras/optimizer_v2/BUILD | 1 + .../legacy_learning_rate_decay_test.py | 64 +- .../python/keras/saving/hdf5_format_test.py | 5 +- tensorflow/python/keras/tests/BUILD | 5 + .../python/keras/tests/op_callbacks_test.py | 8 +- .../keras/tests/serialization_util_test.py | 8 +- .../python/keras/tests/tracking_test.py | 195 +++-- .../python/keras/tests/tracking_util_test.py | 805 +++++++++--------- .../tracking_util_with_v1_optimizers_test.py | 501 +++++------ 14 files changed, 821 insertions(+), 811 deletions(-) diff --git a/tensorflow/python/keras/feature_column/BUILD b/tensorflow/python/keras/feature_column/BUILD index 6af53646d2f..f1acbeba66c 100644 --- a/tensorflow/python/keras/feature_column/BUILD +++ b/tensorflow/python/keras/feature_column/BUILD @@ -18,6 +18,7 @@ py_library( ":dense_features", ":dense_features_v2", ":sequence_feature_column", + "//tensorflow/python/keras:combinations", ], ) @@ -59,6 +60,7 @@ py_library( "//tensorflow/python:framework_ops", "//tensorflow/python:tf_export", "//tensorflow/python/feature_column:feature_column_v2", + "//tensorflow/python/keras:combinations", ], ) diff --git a/tensorflow/python/keras/feature_column/dense_features_test.py b/tensorflow/python/keras/feature_column/dense_features_test.py index ef132b67707..3c4fb28749e 100644 --- a/tensorflow/python/keras/feature_column/dense_features_test.py +++ b/tensorflow/python/keras/feature_column/dense_features_test.py @@ -32,6 +32,8 @@ from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util +from tensorflow.python.keras import combinations +from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras.feature_column import dense_features as df from tensorflow.python.ops import array_ops from tensorflow.python.ops import lookup_ops @@ -48,9 +50,9 @@ def _initialized_session(config=None): return sess -class DenseFeaturesTest(test.TestCase): +class DenseFeaturesTest(keras_parameterized.TestCase): - @test_util.run_in_graph_and_eager_modes() + @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_retrieving_input(self): features = {'a': [0.]} dense_features = df.DenseFeatures(fc.numeric_column('a')) diff --git a/tensorflow/python/keras/feature_column/dense_features_v2_test.py b/tensorflow/python/keras/feature_column/dense_features_v2_test.py index 384d6424f47..fd915cc722e 100644 --- a/tensorflow/python/keras/feature_column/dense_features_v2_test.py +++ b/tensorflow/python/keras/feature_column/dense_features_v2_test.py @@ -30,6 +30,8 @@ from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util +from tensorflow.python.keras import combinations +from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras.feature_column import dense_features_v2 as df from tensorflow.python.ops import array_ops from tensorflow.python.ops import lookup_ops @@ -44,9 +46,9 @@ def _initialized_session(config=None): return sess -class DenseFeaturesTest(test.TestCase): +class DenseFeaturesTest(keras_parameterized.TestCase): - @test_util.run_in_graph_and_eager_modes() + @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_retrieving_input(self): features = {'a': [0.]} dense_features = df.DenseFeatures(fc.numeric_column('a')) diff --git a/tensorflow/python/keras/feature_column/sequence_feature_column_test.py b/tensorflow/python/keras/feature_column/sequence_feature_column_test.py index f1ce83b0855..ea07a6bdcc8 100644 --- a/tensorflow/python/keras/feature_column/sequence_feature_column_test.py +++ b/tensorflow/python/keras/feature_column/sequence_feature_column_test.py @@ -31,7 +31,6 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor -from tensorflow.python.framework import test_util from tensorflow.python.keras import combinations from tensorflow.python.keras.feature_column import sequence_feature_column as ksfc from tensorflow.python.keras.saving import model_config @@ -47,6 +46,7 @@ def _initialized_session(config=None): return sess +@combinations.generate(combinations.combine(mode=['graph', 'eager'])) class SequenceFeaturesTest(test.TestCase, parameterized.TestCase): @parameterized.named_parameters( @@ -91,7 +91,6 @@ class SequenceFeaturesTest(test.TestCase, parameterized.TestCase): [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]]], 'expected_sequence_length': [2, 2]}, ) - @test_util.run_in_graph_and_eager_modes def test_embedding_column( self, sparse_input_args_a, sparse_input_args_b, expected_input_layer, expected_sequence_length): @@ -151,7 +150,6 @@ class SequenceFeaturesTest(test.TestCase, parameterized.TestCase): self.assertAllEqual( expected_sequence_length, self.evaluate(sequence_length)) - @test_util.run_in_graph_and_eager_modes def test_embedding_column_with_non_sequence_categorical(self): """Tests that error is raised for non-sequence embedding column.""" vocabulary_size = 3 @@ -173,7 +171,6 @@ class SequenceFeaturesTest(test.TestCase, parameterized.TestCase): r'type SequenceCategoricalColumn to use SequenceFeatures\.'): _, _ = sequence_input_layer({'aaa': sparse_input}) - @test_util.run_in_graph_and_eager_modes def test_shared_embedding_column(self): with ops.Graph().as_default(): vocabulary_size = 3 @@ -316,7 +313,6 @@ class SequenceFeaturesTest(test.TestCase, parameterized.TestCase): [[2., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]], 'expected_sequence_length': [2, 2]}, ) - @test_util.run_in_graph_and_eager_modes def test_indicator_column( self, sparse_input_args_a, sparse_input_args_b, expected_input_layer, expected_sequence_length): @@ -342,7 +338,6 @@ class SequenceFeaturesTest(test.TestCase, parameterized.TestCase): self.assertAllEqual( expected_sequence_length, self.evaluate(sequence_length)) - @test_util.run_in_graph_and_eager_modes def test_indicator_column_with_non_sequence_categorical(self): """Tests that error is raised for non-sequence categorical column.""" vocabulary_size = 3 @@ -388,7 +383,6 @@ class SequenceFeaturesTest(test.TestCase, parameterized.TestCase): [[3.], [0.], [8.], [0.]]], 'expected_sequence_length': [2, 2]}, ) - @test_util.run_in_graph_and_eager_modes def test_numeric_column( self, sparse_input_args, expected_input_layer, expected_sequence_length): sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args) @@ -431,7 +425,6 @@ class SequenceFeaturesTest(test.TestCase, parameterized.TestCase): [[10., 11., 12., 13.], [0., 0., 0., 0.]]], 'expected_sequence_length': [2, 1]}, ) - @test_util.run_in_graph_and_eager_modes def test_numeric_column_multi_dim( self, sparse_input_args, expected_input_layer, expected_sequence_length): """Tests SequenceFeatures for multi-dimensional numeric_column.""" @@ -446,7 +439,6 @@ class SequenceFeaturesTest(test.TestCase, parameterized.TestCase): self.assertAllEqual( expected_sequence_length, self.evaluate(sequence_length)) - @test_util.run_in_graph_and_eager_modes def test_sequence_length_not_equal(self): """Tests that an error is raised when sequence lengths are not equal.""" # Input a with sequence_length = [2, 1] @@ -494,7 +486,6 @@ class SequenceFeaturesTest(test.TestCase, parameterized.TestCase): 'dense_shape': (2, 2, 4)}, 'expected_shape': [2, 2, 4]}, ) - @test_util.run_in_graph_and_eager_modes def test_static_shape_from_tensors_numeric( self, sparse_input_args, expected_shape): """Tests that we return a known static shape when we have one.""" @@ -529,7 +520,6 @@ class SequenceFeaturesTest(test.TestCase, parameterized.TestCase): 'dense_shape': (4, 2, 2)}, 'expected_shape': [4, 2, 3]} ) - @test_util.run_in_graph_and_eager_modes def test_static_shape_from_tensors_indicator( self, sparse_input_args, expected_shape): """Tests that we return a known static shape when we have one.""" @@ -543,7 +533,6 @@ class SequenceFeaturesTest(test.TestCase, parameterized.TestCase): shape = input_layer.get_shape() self.assertEqual(shape, expected_shape) - @test_util.run_in_graph_and_eager_modes def test_compute_output_shape(self): price1 = sfc.sequence_numeric_column('price1', shape=2) price2 = sfc.sequence_numeric_column('price2') @@ -580,7 +569,7 @@ class SequenceFeaturesTest(test.TestCase, parameterized.TestCase): self.assertAllClose([2, 1, 1, 1], self.evaluate(seq_len)) -@test_util.run_all_in_graph_and_eager_modes +@combinations.generate(combinations.combine(mode=['graph', 'eager'])) class SequenceFeaturesSerializationTest(test.TestCase, parameterized.TestCase): @parameterized.named_parameters(('default', None, None), diff --git a/tensorflow/python/keras/mixed_precision/experimental/mixed_precision_graph_rewrite_test.py b/tensorflow/python/keras/mixed_precision/experimental/mixed_precision_graph_rewrite_test.py index b2c5f80544e..d0fea573bd0 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/mixed_precision_graph_rewrite_test.py +++ b/tensorflow/python/keras/mixed_precision/experimental/mixed_precision_graph_rewrite_test.py @@ -19,11 +19,10 @@ from __future__ import print_function import os -from absl.testing import parameterized - from tensorflow.python import tf2 from tensorflow.python.framework import config -from tensorflow.python.framework import test_util +from tensorflow.python.keras import combinations +from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras import testing_utils from tensorflow.python.keras.mixed_precision.experimental import loss_scale_optimizer as loss_scale_optimizer_v2 from tensorflow.python.keras.mixed_precision.experimental import policy @@ -40,7 +39,7 @@ else: mixed_precision.enable_mixed_precision_graph_rewrite_v1) -class MixedPrecisionTest(test.TestCase, parameterized.TestCase): +class MixedPrecisionTest(keras_parameterized.TestCase): IGNORE_PERF_VAR = 'TF_AUTO_MIXED_PRECISION_GRAPH_REWRITE_IGNORE_PERFORMANCE' @@ -61,7 +60,7 @@ class MixedPrecisionTest(test.TestCase, parameterized.TestCase): mixed_precision.disable_mixed_precision_graph_rewrite() super(MixedPrecisionTest, self).tearDown() - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_wrap_optimizer(self): opt = gradient_descent_v2.SGD(1.0) opt = enable_mixed_precision_graph_rewrite(opt, 123.) @@ -69,7 +68,7 @@ class MixedPrecisionTest(test.TestCase, parameterized.TestCase): opt, loss_scale_optimizer_v2.LossScaleOptimizer) self.assertEqual(self.evaluate(opt._loss_scale()), 123.) - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_optimizer_errors(self): opt = gradient_descent_v2.SGD(1.0) opt = loss_scale_optimizer_v2.LossScaleOptimizer(opt, 'dynamic') diff --git a/tensorflow/python/keras/optimizer_v2/BUILD b/tensorflow/python/keras/optimizer_v2/BUILD index be793378538..42a1e8ac93a 100644 --- a/tensorflow/python/keras/optimizer_v2/BUILD +++ b/tensorflow/python/keras/optimizer_v2/BUILD @@ -273,6 +273,7 @@ cuda_py_test( "//tensorflow/python:training_lib", "//tensorflow/python:variables", "//tensorflow/python/eager:context", + "//tensorflow/python/keras:combinations", ], ) diff --git a/tensorflow/python/keras/optimizer_v2/legacy_learning_rate_decay_test.py b/tensorflow/python/keras/optimizer_v2/legacy_learning_rate_decay_test.py index 65e2cc93f37..aa566e15135 100644 --- a/tensorflow/python/keras/optimizer_v2/legacy_learning_rate_decay_test.py +++ b/tensorflow/python/keras/optimizer_v2/legacy_learning_rate_decay_test.py @@ -22,14 +22,16 @@ import math from tensorflow.python.eager import context from tensorflow.python.framework import test_util +from tensorflow.python.keras import combinations +from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras.optimizer_v2 import legacy_learning_rate_decay as learning_rate_decay from tensorflow.python.ops import variables from tensorflow.python.platform import googletest -class LRDecayTest(test_util.TensorFlowTestCase): +class LRDecayTest(keras_parameterized.TestCase): - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testContinuous(self): self.evaluate(variables.global_variables_initializer()) step = 5 @@ -37,7 +39,7 @@ class LRDecayTest(test_util.TensorFlowTestCase): expected = .05 * 0.96**(5.0 / 10.0) self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testStaircase(self): if context.executing_eagerly(): step = variables.Variable(0) @@ -82,7 +84,7 @@ class LRDecayTest(test_util.TensorFlowTestCase): expected = .1 * 0.96**(100 // 3) self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testPiecewiseConstant(self): x = variables.Variable(-999) decayed_lr = learning_rate_decay.piecewise_constant( @@ -102,7 +104,7 @@ class LRDecayTest(test_util.TensorFlowTestCase): self.evaluate(x.assign(999)) self.assertAllClose(self.evaluate(decayed_lr), 0.001, 1e-6) - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testPiecewiseConstantEdgeCases(self): x_int = variables.Variable(0, dtype=variables.dtypes.int32) boundaries, values = [-1.0, 1.0], [1, 2, 3] @@ -145,9 +147,9 @@ class LRDecayTest(test_util.TensorFlowTestCase): self.assertAllClose(self.evaluate(decayed_lr), 0.7, 1e-6) -class LinearDecayTest(test_util.TensorFlowTestCase): +@combinations.generate(combinations.combine(mode=["graph", "eager"])) +class LinearDecayTest(keras_parameterized.TestCase): - @test_util.run_in_graph_and_eager_modes def testHalfWay(self): step = 5 lr = 0.05 @@ -156,7 +158,6 @@ class LinearDecayTest(test_util.TensorFlowTestCase): expected = lr * 0.5 self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - @test_util.run_in_graph_and_eager_modes def testEnd(self): step = 10 lr = 0.05 @@ -165,7 +166,6 @@ class LinearDecayTest(test_util.TensorFlowTestCase): expected = end_lr self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - @test_util.run_in_graph_and_eager_modes def testHalfWayWithEnd(self): step = 5 lr = 0.05 @@ -174,7 +174,6 @@ class LinearDecayTest(test_util.TensorFlowTestCase): expected = (lr + end_lr) * 0.5 self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - @test_util.run_in_graph_and_eager_modes def testBeyondEnd(self): step = 15 lr = 0.05 @@ -183,7 +182,6 @@ class LinearDecayTest(test_util.TensorFlowTestCase): expected = end_lr self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - @test_util.run_in_graph_and_eager_modes def testBeyondEndWithCycle(self): step = 15 lr = 0.05 @@ -194,9 +192,9 @@ class LinearDecayTest(test_util.TensorFlowTestCase): self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) -class SqrtDecayTest(test_util.TensorFlowTestCase): +@combinations.generate(combinations.combine(mode=["graph", "eager"])) +class SqrtDecayTest(keras_parameterized.TestCase): - @test_util.run_in_graph_and_eager_modes def testHalfWay(self): step = 5 lr = 0.05 @@ -207,7 +205,6 @@ class SqrtDecayTest(test_util.TensorFlowTestCase): expected = lr * 0.5**power self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - @test_util.run_in_graph_and_eager_modes def testEnd(self): step = 10 lr = 0.05 @@ -218,7 +215,6 @@ class SqrtDecayTest(test_util.TensorFlowTestCase): expected = end_lr self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - @test_util.run_in_graph_and_eager_modes def testHalfWayWithEnd(self): step = 5 lr = 0.05 @@ -229,7 +225,6 @@ class SqrtDecayTest(test_util.TensorFlowTestCase): expected = (lr - end_lr) * 0.5**power + end_lr self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - @test_util.run_in_graph_and_eager_modes def testBeyondEnd(self): step = 15 lr = 0.05 @@ -240,7 +235,6 @@ class SqrtDecayTest(test_util.TensorFlowTestCase): expected = end_lr self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - @test_util.run_in_graph_and_eager_modes def testBeyondEndWithCycle(self): step = 15 lr = 0.05 @@ -252,9 +246,9 @@ class SqrtDecayTest(test_util.TensorFlowTestCase): self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) -class PolynomialDecayTest(test_util.TensorFlowTestCase): +@combinations.generate(combinations.combine(mode=["graph", "eager"])) +class PolynomialDecayTest(keras_parameterized.TestCase): - @test_util.run_in_graph_and_eager_modes def testBeginWithCycle(self): lr = 0.001 decay_steps = 10 @@ -265,9 +259,9 @@ class PolynomialDecayTest(test_util.TensorFlowTestCase): self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) -class ExponentialDecayTest(test_util.TensorFlowTestCase): +@combinations.generate(combinations.combine(mode=["graph", "eager"])) +class ExponentialDecayTest(keras_parameterized.TestCase): - @test_util.run_in_graph_and_eager_modes def testDecay(self): initial_lr = 0.1 k = 10 @@ -282,7 +276,6 @@ class ExponentialDecayTest(test_util.TensorFlowTestCase): self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) self.evaluate(step.assign_add(1)) - @test_util.run_in_graph_and_eager_modes def testStaircase(self): initial_lr = 0.1 k = 10 @@ -298,9 +291,9 @@ class ExponentialDecayTest(test_util.TensorFlowTestCase): self.evaluate(step.assign_add(1)) -class InverseDecayTest(test_util.TensorFlowTestCase): +@combinations.generate(combinations.combine(mode=["graph", "eager"])) +class InverseDecayTest(keras_parameterized.TestCase): - @test_util.run_in_graph_and_eager_modes def testDecay(self): initial_lr = 0.1 k = 10 @@ -315,7 +308,6 @@ class InverseDecayTest(test_util.TensorFlowTestCase): self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) self.evaluate(step.assign_add(1)) - @test_util.run_in_graph_and_eager_modes def testStaircase(self): initial_lr = 0.1 k = 10 @@ -331,7 +323,8 @@ class InverseDecayTest(test_util.TensorFlowTestCase): self.evaluate(step.assign_add(1)) -class CosineDecayTest(test_util.TensorFlowTestCase): +@combinations.generate(combinations.combine(mode=["graph", "eager"])) +class CosineDecayTest(keras_parameterized.TestCase): def np_cosine_decay(self, step, decay_steps, alpha=0.0): step = min(step, decay_steps) @@ -339,7 +332,6 @@ class CosineDecayTest(test_util.TensorFlowTestCase): decay = 0.5 * (1.0 + math.cos(math.pi * completed_fraction)) return (1.0 - alpha) * decay + alpha - @test_util.run_in_graph_and_eager_modes def testDecay(self): num_training_steps = 1000 initial_lr = 1.0 @@ -349,7 +341,6 @@ class CosineDecayTest(test_util.TensorFlowTestCase): expected = self.np_cosine_decay(step, num_training_steps) self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - @test_util.run_in_graph_and_eager_modes def testAlpha(self): num_training_steps = 1000 initial_lr = 1.0 @@ -361,7 +352,8 @@ class CosineDecayTest(test_util.TensorFlowTestCase): self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) -class CosineDecayRestartsTest(test_util.TensorFlowTestCase): +@combinations.generate(combinations.combine(mode=["graph", "eager"])) +class CosineDecayRestartsTest(keras_parameterized.TestCase): def np_cosine_decay_restarts(self, step, decay_steps, t_mul=2.0, m_mul=1.0, alpha=0.0): @@ -375,7 +367,6 @@ class CosineDecayRestartsTest(test_util.TensorFlowTestCase): decay = fac * 0.5 * (1.0 + math.cos(math.pi * completed_fraction)) return (1.0 - alpha) * decay + alpha - @test_util.run_in_graph_and_eager_modes def testDecay(self): num_training_steps = 1000 initial_lr = 1.0 @@ -385,7 +376,6 @@ class CosineDecayRestartsTest(test_util.TensorFlowTestCase): expected = self.np_cosine_decay_restarts(step, num_training_steps) self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - @test_util.run_in_graph_and_eager_modes def testAlpha(self): num_training_steps = 1000 initial_lr = 1.0 @@ -397,7 +387,6 @@ class CosineDecayRestartsTest(test_util.TensorFlowTestCase): step, num_training_steps, alpha=alpha) self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - @test_util.run_in_graph_and_eager_modes def testMMul(self): num_training_steps = 1000 initial_lr = 1.0 @@ -409,7 +398,6 @@ class CosineDecayRestartsTest(test_util.TensorFlowTestCase): step, num_training_steps, m_mul=m_mul) self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - @test_util.run_in_graph_and_eager_modes def testTMul(self): num_training_steps = 1000 initial_lr = 1.0 @@ -422,7 +410,8 @@ class CosineDecayRestartsTest(test_util.TensorFlowTestCase): self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) -class LinearCosineDecayTest(test_util.TensorFlowTestCase): +@combinations.generate(combinations.combine(mode=["graph", "eager"])) +class LinearCosineDecayTest(keras_parameterized.TestCase): def np_linear_cosine_decay(self, step, @@ -436,7 +425,6 @@ class LinearCosineDecayTest(test_util.TensorFlowTestCase): cosine_decayed = 0.5 * (1.0 + math.cos(math.pi * fraction)) return (alpha + linear_decayed) * cosine_decayed + beta - @test_util.run_in_graph_and_eager_modes def testDefaultDecay(self): num_training_steps = 1000 initial_lr = 1.0 @@ -446,7 +434,6 @@ class LinearCosineDecayTest(test_util.TensorFlowTestCase): expected = self.np_linear_cosine_decay(step, num_training_steps) self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - @test_util.run_in_graph_and_eager_modes def testNonDefaultDecay(self): num_training_steps = 1000 initial_lr = 1.0 @@ -463,9 +450,9 @@ class LinearCosineDecayTest(test_util.TensorFlowTestCase): self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) -class NoisyLinearCosineDecayTest(test_util.TensorFlowTestCase): +@combinations.generate(combinations.combine(mode=["graph", "eager"])) +class NoisyLinearCosineDecayTest(keras_parameterized.TestCase): - @test_util.run_in_graph_and_eager_modes def testDefaultNoisyLinearCosine(self): num_training_steps = 1000 initial_lr = 1.0 @@ -476,7 +463,6 @@ class NoisyLinearCosineDecayTest(test_util.TensorFlowTestCase): # Cannot be deterministically tested self.evaluate(decayed_lr) - @test_util.run_in_graph_and_eager_modes def testNonDefaultNoisyLinearCosine(self): num_training_steps = 1000 initial_lr = 1.0 diff --git a/tensorflow/python/keras/saving/hdf5_format_test.py b/tensorflow/python/keras/saving/hdf5_format_test.py index 1bd3cd614e8..8d88bf8bb09 100644 --- a/tensorflow/python/keras/saving/hdf5_format_test.py +++ b/tensorflow/python/keras/saving/hdf5_format_test.py @@ -32,7 +32,6 @@ from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import test_util from tensorflow.python.keras import combinations from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras import optimizers @@ -451,7 +450,7 @@ class TestWholeModelSaving(keras_parameterized.TestCase): eval_out2 = loaded_model.evaluate(x, y) self.assertArrayNear(eval_out, eval_out2, 0.001) - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_sequential_model_saving_without_input_shape(self): saved_model_dir = self._save_model_dir() save_format = testing_utils.get_save_format() @@ -486,7 +485,7 @@ class TestWholeModelSaving(keras_parameterized.TestCase): out2 = new_model.predict(x) self.assertAllClose(out, out2, atol=1e-05) - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_sequential_model_saving_without_compile(self): saved_model_dir = self._save_model_dir() save_format = testing_utils.get_save_format() diff --git a/tensorflow/python/keras/tests/BUILD b/tensorflow/python/keras/tests/BUILD index 52131bab7ff..4db3327d1f6 100644 --- a/tensorflow/python/keras/tests/BUILD +++ b/tensorflow/python/keras/tests/BUILD @@ -284,6 +284,7 @@ cuda_py_test( "//tensorflow/python:util", "//tensorflow/python/eager:context", "//tensorflow/python/eager:test", + "//tensorflow/python/keras:combinations", ], ) @@ -356,6 +357,7 @@ tf_py_test( "//tensorflow/python:constant_op", "//tensorflow/python:framework_test_lib", "//tensorflow/python:util", + "//tensorflow/python/keras:combinations", "//tensorflow/python/keras/engine", "//tensorflow/python/keras/layers:core", ], @@ -391,6 +393,7 @@ tf_py_test( "//tensorflow/python:variables", "//tensorflow/python/eager:context", "//tensorflow/python/eager:test", + "//tensorflow/python/keras:combinations", "//tensorflow/python/keras/engine", "//tensorflow/python/keras/layers:core", "//tensorflow/python/keras/layers:normalization", @@ -428,6 +431,7 @@ tf_py_test( "//tensorflow/python/eager:backprop", "//tensorflow/python/eager:context", "//tensorflow/python/eager:def_function", + "//tensorflow/python/keras:combinations", "//tensorflow/python/keras/engine", "//tensorflow/python/keras/layers:core", "//tensorflow/python/keras/optimizer_v2", @@ -461,6 +465,7 @@ tf_py_test( "//tensorflow/python/eager:context", "//tensorflow/python/eager:def_function", "//tensorflow/python/eager:test", + "//tensorflow/python/keras:combinations", "//tensorflow/python/keras/engine", "//tensorflow/python/keras/layers:core", "//tensorflow/python/training/tracking", diff --git a/tensorflow/python/keras/tests/op_callbacks_test.py b/tensorflow/python/keras/tests/op_callbacks_test.py index a8abc07e3d4..ca50bbb1a81 100644 --- a/tensorflow/python/keras/tests/op_callbacks_test.py +++ b/tensorflow/python/keras/tests/op_callbacks_test.py @@ -25,7 +25,8 @@ from tensorflow.python.eager import context from tensorflow.python.eager import test from tensorflow.python.framework import op_callbacks from tensorflow.python.framework import ops -from tensorflow.python.framework import test_util +from tensorflow.python.keras import combinations +from tensorflow.python.keras import keras_parameterized from tensorflow.python.ops import script_ops from tensorflow.python.util import compat @@ -128,13 +129,13 @@ class _NumpyFunctionCallback(object): self.graph_internal_ndarrays = {} -class OpCallbacksTest(test_util.TensorFlowTestCase): +@combinations.generate(combinations.combine(mode=["graph", "eager"])) +class OpCallbacksTest(keras_parameterized.TestCase): def tearDown(self): op_callbacks.clear_op_callbacks() super(OpCallbacksTest, self).tearDown() - @test_util.run_in_graph_and_eager_modes def testKerasLSTMPredict(self): instrument = _NumpyFunctionCallback(float_only=True) @@ -153,7 +154,6 @@ class OpCallbacksTest(test_util.TensorFlowTestCase): # recorded by the callback. self.assertTrue(instrument.graph_internal_ndarrays) - @test_util.run_in_graph_and_eager_modes def testKeraModelFit(self): # TODO(cais): The purely PyFunc (numpy_function) based instrumentation # doesn't work for the entire Keras model and its fit() call, due to some diff --git a/tensorflow/python/keras/tests/serialization_util_test.py b/tensorflow/python/keras/tests/serialization_util_test.py index 058bdaec56c..0736d2217c8 100644 --- a/tensorflow/python/keras/tests/serialization_util_test.py +++ b/tensorflow/python/keras/tests/serialization_util_test.py @@ -21,7 +21,8 @@ from __future__ import print_function import json from tensorflow.python.framework import constant_op -from tensorflow.python.framework import test_util +from tensorflow.python.keras import combinations +from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras.engine import input_layer from tensorflow.python.keras.engine import sequential from tensorflow.python.keras.engine import training @@ -30,7 +31,8 @@ from tensorflow.python.platform import test from tensorflow.python.util import serialization -class SerializationTests(test.TestCase): +@combinations.generate(combinations.combine(mode=["graph", "eager"])) +class SerializationTests(keras_parameterized.TestCase): def test_serialize_dense(self): dense = core.Dense(3) @@ -39,7 +41,6 @@ class SerializationTests(test.TestCase): dense, default=serialization.get_json_type)) self.assertEqual(3, round_trip["config"]["units"]) - @test_util.run_in_graph_and_eager_modes def test_serialize_sequential(self): model = sequential.Sequential() model.add(core.Dense(4)) @@ -52,7 +53,6 @@ class SerializationTests(test.TestCase): # (but not in V1) 5, sequential_round_trip["config"]["layers"][-1]["config"]["units"]) - @test_util.run_in_graph_and_eager_modes def test_serialize_model(self): x = input_layer.Input(shape=[3]) y = core.Dense(10)(x) diff --git a/tensorflow/python/keras/tests/tracking_test.py b/tensorflow/python/keras/tests/tracking_test.py index a05706eec7a..281539fb5a6 100644 --- a/tensorflow/python/keras/tests/tracking_test.py +++ b/tensorflow/python/keras/tests/tracking_test.py @@ -28,6 +28,8 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.keras import combinations +from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras.engine import sequential from tensorflow.python.keras.engine import training from tensorflow.python.keras.layers import core @@ -73,43 +75,45 @@ class HasList(training.Model): return bn(x) / aggregation -class ListTests(test.TestCase): +class ListTests(keras_parameterized.TestCase): - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) @test_util.run_v1_only("b/120545219") def testTracking(self): - model = HasList() - output = model(array_ops.ones([32, 2])) - self.assertAllEqual([32, 12], output.shape) - self.assertEqual(11, len(model.layers)) - self.assertEqual(10, len(model.layer_list.layers)) - six.assertCountEqual( - self, - model.layers, - model.layer_list.layers + model.layers_with_updates) - for index in range(10): - self.assertEqual(3 + index, model.layer_list.layers[index].units) - self.assertEqual(2, len(model._checkpoint_dependencies)) - self.assertIs(model.layer_list, model._checkpoint_dependencies[0].ref) - self.assertIs(model.layers_with_updates, - model._checkpoint_dependencies[1].ref) - self.assertEqual( - 10, len(model._checkpoint_dependencies[0].ref._checkpoint_dependencies)) - self.evaluate([v.initializer for v in model.variables]) - self.evaluate(model.variables[0].assign([[1., 2., 3.], [4., 5., 6.]])) - save_path = os.path.join(self.get_temp_dir(), "ckpt") - model.save_weights(save_path) - self.evaluate(model.variables[0].assign(array_ops.zeros([2, 3]))) - model.load_weights(save_path) - self.assertAllEqual([[1., 2., 3.], [4., 5., 6.]], - self.evaluate(model.variables[0])) - v = variables.Variable(1.) - model.var_list = [v] - self.assertIn(v, model.variables) - self.assertIn(v, model.trainable_variables) - self.assertNotIn(v, model.non_trainable_variables) - self.assertIn(model.layer_list[0].trainable_weights[0], - model.trainable_weights) + with self.test_session(): + model = HasList() + output = model(array_ops.ones([32, 2])) + self.assertAllEqual([32, 12], output.shape) + self.assertEqual(11, len(model.layers)) + self.assertEqual(10, len(model.layer_list.layers)) + six.assertCountEqual( + self, + model.layers, + model.layer_list.layers + model.layers_with_updates) + for index in range(10): + self.assertEqual(3 + index, model.layer_list.layers[index].units) + self.assertEqual(2, len(model._checkpoint_dependencies)) + self.assertIs(model.layer_list, model._checkpoint_dependencies[0].ref) + self.assertIs(model.layers_with_updates, + model._checkpoint_dependencies[1].ref) + self.assertEqual( + 10, + len(model._checkpoint_dependencies[0].ref._checkpoint_dependencies)) + self.evaluate([v.initializer for v in model.variables]) + self.evaluate(model.variables[0].assign([[1., 2., 3.], [4., 5., 6.]])) + save_path = os.path.join(self.get_temp_dir(), "ckpt") + model.save_weights(save_path) + self.evaluate(model.variables[0].assign(array_ops.zeros([2, 3]))) + model.load_weights(save_path) + self.assertAllEqual([[1., 2., 3.], [4., 5., 6.]], + self.evaluate(model.variables[0])) + v = variables.Variable(1.) + model.var_list = [v] + self.assertIn(v, model.variables) + self.assertIn(v, model.trainable_variables) + self.assertNotIn(v, model.non_trainable_variables) + self.assertIn(model.layer_list[0].trainable_weights[0], + model.trainable_weights) def testSubModelTracking(self): model = training.Model() @@ -192,7 +196,7 @@ class ListTests(test.TestCase): model(model_input) self.assertEqual(0, len(model.updates)) - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) @test_util.run_v1_only("b/120545219") def testLossesForwarded(self): model = HasList() @@ -215,7 +219,7 @@ class ListTests(test.TestCase): model.l2.append(second_layer) self.assertEqual([first_layer, second_layer], model.layers) - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testTensorConversion(self): class ListToTensor(training.Model): @@ -266,26 +270,27 @@ class HasMapping(training.Model): return self.layer_dict["output"](x) / aggregation -class MappingTests(test.TestCase): +class MappingTests(keras_parameterized.TestCase): - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testTracking(self): - model = HasMapping() - output = model(array_ops.ones([32, 2])) - self.assertAllEqual([32, 7], output.shape.as_list()) - self.assertEqual(5, len(model.layers)) - six.assertCountEqual(self, model.layers, model.layer_dict.layers) - self.assertEqual(1, len(model._checkpoint_dependencies)) - self.assertIs(model.layer_dict, model._checkpoint_dependencies[0].ref) - self.evaluate([v.initializer for v in model.variables]) - test_var = model.layer_dict["output"].kernel - self.evaluate(test_var.assign(array_ops.ones([6, 7]))) - save_path = os.path.join(self.get_temp_dir(), "ckpt") - model.save_weights(save_path) - self.evaluate(test_var.assign(array_ops.zeros([6, 7]))) - model.load_weights(save_path) - self.assertAllEqual(numpy.ones([6, 7]), - self.evaluate(test_var)) + with self.test_session(): + model = HasMapping() + output = model(array_ops.ones([32, 2])) + self.assertAllEqual([32, 7], output.shape.as_list()) + self.assertEqual(5, len(model.layers)) + six.assertCountEqual(self, model.layers, model.layer_dict.layers) + self.assertEqual(1, len(model._checkpoint_dependencies)) + self.assertIs(model.layer_dict, model._checkpoint_dependencies[0].ref) + self.evaluate([v.initializer for v in model.variables]) + test_var = model.layer_dict["output"].kernel + self.evaluate(test_var.assign(array_ops.ones([6, 7]))) + save_path = os.path.join(self.get_temp_dir(), "ckpt") + model.save_weights(save_path) + self.evaluate(test_var.assign(array_ops.zeros([6, 7]))) + model.load_weights(save_path) + self.assertAllEqual(numpy.ones([6, 7]), + self.evaluate(test_var)) def testLayerCollectionWithExternalMutation(self): d = {} @@ -415,43 +420,45 @@ class HasTuple(training.Model): return bn(x) / aggregation -class TupleTests(test.TestCase, parameterized.TestCase): +class TupleTests(keras_parameterized.TestCase): - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testTracking(self): - model = HasTuple() - output = model(array_ops.ones([32, 2])) - self.assertAllEqual([32, 5], output.shape.as_list()) - self.assertLen(model.layers, 4) - self.assertLen(model.layer_list.layers, 3) - six.assertCountEqual( - self, - model.layers, - tuple(model.layer_list.layers) + model.layers_with_updates) - self.assertEqual(3, model.layer_list.layers[0].units) - self.assertEqual(4, model.layer_list.layers[1].units) - self.assertEqual(5, model.layer_list.layers[2].units) - self.assertLen(model._checkpoint_dependencies, 2) - self.assertIs(model.layer_list, model._checkpoint_dependencies[0].ref) - self.assertIs(model.layers_with_updates, - model._checkpoint_dependencies[1].ref) - self.assertLen( - model._checkpoint_dependencies[0].ref._checkpoint_dependencies, 3) - self.evaluate([v.initializer for v in model.variables]) - self.evaluate(model.variables[0].assign([[1., 2., 3.], [4., 5., 6.]])) - save_path = os.path.join(self.get_temp_dir(), "ckpt") - model.save_weights(save_path) - self.evaluate(model.variables[0].assign(array_ops.zeros([2, 3]))) - model.load_weights(save_path) - self.assertAllEqual([[1., 2., 3.], [4., 5., 6.]], - self.evaluate(model.variables[0])) - v = variables.Variable(1.) - model.var_list = (v,) - self.assertIn(id(v), [id(obj) for obj in model.variables]) - self.assertIn(id(v), [id(obj) for obj in model.trainable_variables]) - self.assertNotIn(id(v), [id(obj) for obj in model.non_trainable_variables]) - self.assertIn(id(model.layer_list[0].trainable_weights[0]), - [id(obj) for obj in model.trainable_weights]) + with self.test_session(): + model = HasTuple() + output = model(array_ops.ones([32, 2])) + self.assertAllEqual([32, 5], output.shape.as_list()) + self.assertLen(model.layers, 4) + self.assertLen(model.layer_list.layers, 3) + six.assertCountEqual( + self, + model.layers, + tuple(model.layer_list.layers) + model.layers_with_updates) + self.assertEqual(3, model.layer_list.layers[0].units) + self.assertEqual(4, model.layer_list.layers[1].units) + self.assertEqual(5, model.layer_list.layers[2].units) + self.assertLen(model._checkpoint_dependencies, 2) + self.assertIs(model.layer_list, model._checkpoint_dependencies[0].ref) + self.assertIs(model.layers_with_updates, + model._checkpoint_dependencies[1].ref) + self.assertLen( + model._checkpoint_dependencies[0].ref._checkpoint_dependencies, 3) + self.evaluate([v.initializer for v in model.variables]) + self.evaluate(model.variables[0].assign([[1., 2., 3.], [4., 5., 6.]])) + save_path = os.path.join(self.get_temp_dir(), "ckpt") + model.save_weights(save_path) + self.evaluate(model.variables[0].assign(array_ops.zeros([2, 3]))) + model.load_weights(save_path) + self.assertAllEqual([[1., 2., 3.], [4., 5., 6.]], + self.evaluate(model.variables[0])) + v = variables.Variable(1.) + model.var_list = (v,) + self.assertIn(id(v), [id(obj) for obj in model.variables]) + self.assertIn(id(v), [id(obj) for obj in model.trainable_variables]) + self.assertNotIn(id(v), + [id(obj) for obj in model.non_trainable_variables]) + self.assertIn(id(model.layer_list[0].trainable_weights[0]), + [id(obj) for obj in model.trainable_weights]) @parameterized.named_parameters( ("Module", module.Module), @@ -498,7 +505,7 @@ class TupleTests(test.TestCase, parameterized.TestCase): model(model_input) self.assertEmpty(model.updates) - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testLossesForwarded(self): model = HasTuple() model_input = array_ops.ones([32, 2]) @@ -526,7 +533,7 @@ class TupleTests(test.TestCase, parameterized.TestCase): self.assertEqual(2, d[(second_layer,)]) self.assertEqual([first_layer, second_layer], model.layers) - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testTensorConversion(self): class TupleToTensor(training.Model): @@ -544,7 +551,7 @@ class TupleTests(test.TestCase, parameterized.TestCase): self.evaluate(array_ops.pack(TupleToTensor().l))) -class InterfaceTests(test.TestCase): +class InterfaceTests(keras_parameterized.TestCase): def testNoDependency(self): root = tracking.AutoTrackable() @@ -568,7 +575,7 @@ class InterfaceTests(test.TestCase): nodeps = NoDependencyModel() self.assertEqual([nodeps], util.list_objects(nodeps)) - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testDictionariesBasic(self): a = training.Model() b = training.Model() @@ -592,7 +599,7 @@ class InterfaceTests(test.TestCase): with self.cached_session(): checkpoint.restore(save_path).assert_consumed().initialize_or_restore() - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testNoDepList(self): a = training.Model() a.l1 = data_structures.NoDependency([]) diff --git a/tensorflow/python/keras/tests/tracking_util_test.py b/tensorflow/python/keras/tests/tracking_util_test.py index 1c55c366d82..a609d4f711e 100644 --- a/tensorflow/python/keras/tests/tracking_util_test.py +++ b/tensorflow/python/keras/tests/tracking_util_test.py @@ -20,7 +20,6 @@ import functools import os import weakref -from absl.testing import parameterized import six from tensorflow.python.eager import backprop @@ -29,6 +28,8 @@ from tensorflow.python.eager import def_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.keras import combinations +from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras.engine import input_layer from tensorflow.python.keras.engine import sequential from tensorflow.python.keras.engine import training @@ -114,7 +115,7 @@ class InterfaceTests(test.TestCase): self.assertIn("dense/kernel", all_variable_names) -class CheckpointingTests(parameterized.TestCase, test.TestCase): +class CheckpointingTests(keras_parameterized.TestCase): @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) def testNamingWithOptimizer(self): @@ -210,73 +211,74 @@ class CheckpointingTests(parameterized.TestCase, test.TestCase): [key + suffix for key in expected_slot_keys], serialized_slot_keys) - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testSaveRestore(self): - model = MyModel() - optimizer = adam.Adam(0.001) - root_trackable = trackable_utils.Checkpoint( - optimizer=optimizer, model=model) - input_value = constant_op.constant([[3.]]) - with backprop.GradientTape() as tape: - loss = model(input_value) - variables = model.trainable_variables - gradients = tape.gradient(loss, variables) - train_op = optimizer.apply_gradients(zip(gradients, variables)) - self.assertFalse(root_trackable.save_counter.trainable) - self.evaluate(trackable_utils.gather_initializers( - root_trackable)) - self.evaluate(train_op) - prefix = os.path.join(self.get_temp_dir(), "ckpt") - self.evaluate(state_ops.assign(model._named_dense.variables[1], [42.])) - m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m") - self.evaluate(state_ops.assign(m_bias_slot, [1.5])) - save_path = root_trackable.save(file_prefix=prefix) - self.evaluate(state_ops.assign(model._named_dense.variables[1], [43.])) - self.evaluate(state_ops.assign(root_trackable.save_counter, 3)) - optimizer_variables = self.evaluate( - sorted(optimizer.variables(), key=lambda v: v.name)) - self.evaluate(state_ops.assign(m_bias_slot, [-2.])) - # Immediate restoration - status = root_trackable.restore(save_path=save_path).assert_consumed() - status.run_restore_ops() - self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1])) - self.assertAllEqual(1, self.evaluate(root_trackable.save_counter)) - self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) - if not context.executing_eagerly(): - return # Restore-on-create is only supported when executing eagerly - on_create_model = MyModel() - on_create_optimizer = adam.Adam(0.001) - on_create_root = trackable_utils.Checkpoint( - optimizer=on_create_optimizer, model=on_create_model) - # Deferred restoration - status = on_create_root.restore(save_path=save_path) - status.assert_nontrivial_match() - status.assert_existing_objects_matched() - with self.assertRaises(AssertionError): - status.assert_consumed() - on_create_model(constant_op.constant([[3.]])) # create variables - self.assertAllEqual(1, self.evaluate(on_create_root.save_counter)) - self.assertAllEqual([42.], - self.evaluate( - on_create_model._named_dense.variables[1])) - on_create_m_bias_slot = on_create_optimizer.get_slot( - on_create_model._named_dense.variables[1], "m") - status.assert_existing_objects_matched() - if not context.executing_eagerly(): + with self.test_session(): + model = MyModel() + optimizer = adam.Adam(0.001) + root_trackable = trackable_utils.Checkpoint( + optimizer=optimizer, model=model) + input_value = constant_op.constant([[3.]]) + with backprop.GradientTape() as tape: + loss = model(input_value) + variables = model.trainable_variables + gradients = tape.gradient(loss, variables) + train_op = optimizer.apply_gradients(zip(gradients, variables)) + self.assertFalse(root_trackable.save_counter.trainable) + self.evaluate(trackable_utils.gather_initializers( + root_trackable)) + self.evaluate(train_op) + prefix = os.path.join(self.get_temp_dir(), "ckpt") + self.evaluate(state_ops.assign(model._named_dense.variables[1], [42.])) + m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m") + self.evaluate(state_ops.assign(m_bias_slot, [1.5])) + save_path = root_trackable.save(file_prefix=prefix) + self.evaluate(state_ops.assign(model._named_dense.variables[1], [43.])) + self.evaluate(state_ops.assign(root_trackable.save_counter, 3)) + optimizer_variables = self.evaluate( + sorted(optimizer.variables(), key=lambda v: v.name)) + self.evaluate(state_ops.assign(m_bias_slot, [-2.])) + # Immediate restoration + status = root_trackable.restore(save_path=save_path).assert_consumed() + status.run_restore_ops() + self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1])) + self.assertAllEqual(1, self.evaluate(root_trackable.save_counter)) + self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) + if not context.executing_eagerly(): + return # Restore-on-create is only supported when executing eagerly + on_create_model = MyModel() + on_create_optimizer = adam.Adam(0.001) + on_create_root = trackable_utils.Checkpoint( + optimizer=on_create_optimizer, model=on_create_model) + # Deferred restoration + status = on_create_root.restore(save_path=save_path) + status.assert_nontrivial_match() + status.assert_existing_objects_matched() with self.assertRaises(AssertionError): status.assert_consumed() - # Optimizer slot variables are created when the original variable is - # restored. - self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) - dummy_var = resource_variable_ops.ResourceVariable([1.]) - on_create_optimizer.minimize(loss=dummy_var.read_value, - var_list=[dummy_var]) - status.assert_existing_objects_matched() - status.assert_consumed() - self.assertAllEqual( - optimizer_variables, - # Creation order is different, so .variables() needs to be re-sorted. - self.evaluate(sorted(optimizer.variables(), key=lambda v: v.name))) + on_create_model(constant_op.constant([[3.]])) # create variables + self.assertAllEqual(1, self.evaluate(on_create_root.save_counter)) + self.assertAllEqual([42.], + self.evaluate( + on_create_model._named_dense.variables[1])) + on_create_m_bias_slot = on_create_optimizer.get_slot( + on_create_model._named_dense.variables[1], "m") + status.assert_existing_objects_matched() + if not context.executing_eagerly(): + with self.assertRaises(AssertionError): + status.assert_consumed() + # Optimizer slot variables are created when the original variable is + # restored. + self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) + dummy_var = resource_variable_ops.ResourceVariable([1.]) + on_create_optimizer.minimize(loss=dummy_var.read_value, + var_list=[dummy_var]) + status.assert_existing_objects_matched() + status.assert_consumed() + self.assertAllEqual( + optimizer_variables, + # Creation order is different, so .variables() needs to be re-sorted. + self.evaluate(sorted(optimizer.variables(), key=lambda v: v.name))) # TODO(allenl): Debug garbage created by this test in python3. def testDeferredRestorationUsageEager(self): @@ -344,39 +346,40 @@ class CheckpointingTests(parameterized.TestCase, test.TestCase): self.assertEqual(training_continuation + 1, session.run(root.save_counter)) - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testAgnosticUsage(self): """Graph/eager agnostic usage.""" # Does create garbage when executing eagerly due to ops.Graph() creation. - num_training_steps = 10 - checkpoint_directory = self.get_temp_dir() - def _train_fn(model, input_value): - with backprop.GradientTape() as tape: - loss = model(input_value) - variables = model.trainable_variables - gradients = tape.gradient(loss, variables) - return optimizer.apply_gradients(zip(gradients, variables)) - for training_continuation in range(3): - with test_util.device(use_gpu=True): - model = MyModel() - optimizer = adam.Adam(0.001) - root = trackable_utils.Checkpoint( - optimizer=optimizer, model=model) - manager = checkpoint_management.CheckpointManager( - root, checkpoint_directory, max_to_keep=1) - status = root.restore(save_path=manager.latest_checkpoint) - input_value = constant_op.constant([[3.]]) - train_fn = functools.partial(_train_fn, model, input_value) - if not context.executing_eagerly(): - train_fn = functools.partial(self.evaluate, train_fn()) - status.initialize_or_restore() - for _ in range(num_training_steps): - train_fn() - manager.save() - self.assertEqual((training_continuation + 1) * num_training_steps, - self.evaluate(root.optimizer.iterations)) - self.assertEqual(training_continuation + 1, - self.evaluate(root.save_counter)) + with self.test_session(): + num_training_steps = 10 + checkpoint_directory = self.get_temp_dir() + def _train_fn(model, input_value): + with backprop.GradientTape() as tape: + loss = model(input_value) + variables = model.trainable_variables + gradients = tape.gradient(loss, variables) + return optimizer.apply_gradients(zip(gradients, variables)) + for training_continuation in range(3): + with test_util.device(use_gpu=True): + model = MyModel() + optimizer = adam.Adam(0.001) + root = trackable_utils.Checkpoint( + optimizer=optimizer, model=model) + manager = checkpoint_management.CheckpointManager( + root, checkpoint_directory, max_to_keep=1) + status = root.restore(save_path=manager.latest_checkpoint) + input_value = constant_op.constant([[3.]]) + train_fn = functools.partial(_train_fn, model, input_value) + if not context.executing_eagerly(): + train_fn = functools.partial(self.evaluate, train_fn()) + status.initialize_or_restore() + for _ in range(num_training_steps): + train_fn() + manager.save() + self.assertEqual((training_continuation + 1) * num_training_steps, + self.evaluate(root.optimizer.iterations)) + self.assertEqual(training_continuation + 1, + self.evaluate(root.save_counter)) def testPartialRestoreWarningObject(self): with context.eager_mode(): @@ -404,46 +407,47 @@ class CheckpointingTests(parameterized.TestCase, test.TestCase): self.assertIn("expect_partial()", messages) # pylint: disable=cell-var-from-loop - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) @test_util.run_v1_only("b/120545219") def testWithDefun(self): - num_training_steps = 2 - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - for training_continuation in range(3): - with test_util.device(use_gpu=True): - model = MyModel() - # Don't actually train so we can test variable values - optimizer = adam.Adam(0.) - root = trackable_utils.Checkpoint( - optimizer=optimizer, model=model) - checkpoint_path = checkpoint_management.latest_checkpoint( - checkpoint_directory) - status = root.restore(save_path=checkpoint_path) - def train_fn(): - @def_function.function - def _call_model(x): - return model(x) - with backprop.GradientTape() as tape: - loss = _call_model(constant_op.constant([[3.]])) - gradients = tape.gradient(loss, model.variables) - return optimizer.apply_gradients(zip(gradients, model.variables)) - if not context.executing_eagerly(): - train_fn = functools.partial( - self.evaluate, train_fn()) - status.initialize_or_restore() - for _ in range(num_training_steps): - train_fn() - if training_continuation > 0: - status.assert_consumed() - self.assertAllClose([[42.]], self.evaluate(model.variables[0])) - else: - self.evaluate(model.variables[0].assign([[42.]])) - root.save(file_prefix=checkpoint_prefix) - self.assertEqual((training_continuation + 1) * num_training_steps, - self.evaluate(optimizer.iterations)) - self.assertEqual(training_continuation + 1, - self.evaluate(root.save_counter)) + with self.test_session(): + num_training_steps = 2 + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + for training_continuation in range(3): + with test_util.device(use_gpu=True): + model = MyModel() + # Don't actually train so we can test variable values + optimizer = adam.Adam(0.) + root = trackable_utils.Checkpoint( + optimizer=optimizer, model=model) + checkpoint_path = checkpoint_management.latest_checkpoint( + checkpoint_directory) + status = root.restore(save_path=checkpoint_path) + def train_fn(): + @def_function.function + def _call_model(x): + return model(x) + with backprop.GradientTape() as tape: + loss = _call_model(constant_op.constant([[3.]])) + gradients = tape.gradient(loss, model.variables) + return optimizer.apply_gradients(zip(gradients, model.variables)) + if not context.executing_eagerly(): + train_fn = functools.partial( + self.evaluate, train_fn()) + status.initialize_or_restore() + for _ in range(num_training_steps): + train_fn() + if training_continuation > 0: + status.assert_consumed() + self.assertAllClose([[42.]], self.evaluate(model.variables[0])) + else: + self.evaluate(model.variables[0].assign([[42.]])) + root.save(file_prefix=checkpoint_prefix) + self.assertEqual((training_continuation + 1) * num_training_steps, + self.evaluate(optimizer.iterations)) + self.assertEqual(training_continuation + 1, + self.evaluate(root.save_counter)) # pylint: enable=cell-var-from-loop def testAnonymousVarsInInit(self): @@ -475,71 +479,73 @@ class CheckpointingTests(parameterized.TestCase, test.TestCase): optimizer.apply_gradients( [(g, v) for g, v in zip(grad, model.vars)]) - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testDeferredSlotRestoration(self): - checkpoint_directory = self.get_temp_dir() + with self.test_session(): + checkpoint_directory = self.get_temp_dir() - root = trackable_utils.Checkpoint() - root.var = trackable_utils.add_variable( - root, name="var", initializer=0.) - optimizer = adam.Adam(0.1) - variables = [root.var] - gradients = [1.] - train_op = optimizer.apply_gradients(zip(gradients, variables)) - # Note that `optimizer` has not been added as a dependency of - # `root`. Create a one-off grouping so that slot variables for `root.var` - # get initialized too. - self.evaluate(trackable_utils.gather_initializers( - trackable_utils.Checkpoint(root=root, optimizer=optimizer))) - self.evaluate(train_op) - self.evaluate(state_ops.assign(root.var, 12.)) - no_slots_path = root.save(os.path.join(checkpoint_directory, "no_slots")) - root.optimizer = optimizer - self.evaluate(state_ops.assign(root.var, 13.)) - self.evaluate(state_ops.assign( - optimizer.get_slot(slot_name="m", var=root.var), - 14.)) - slots_path = root.save(os.path.join(checkpoint_directory, "with_slots")) - new_root = trackable_utils.Checkpoint() - # Load the slot-containing checkpoint (deferred), then immediately overwrite - # the non-slot variable (also deferred). - slot_status = new_root.restore(slots_path) - no_slot_status = new_root.restore(no_slots_path) - with self.assertRaises(AssertionError): + root = trackable_utils.Checkpoint() + root.var = trackable_utils.add_variable( + root, name="var", initializer=0.) + optimizer = adam.Adam(0.1) + variables = [root.var] + gradients = [1.] + train_op = optimizer.apply_gradients(zip(gradients, variables)) + # Note that `optimizer` has not been added as a dependency of + # `root`. Create a one-off grouping so that slot variables for `root.var` + # get initialized too. + self.evaluate(trackable_utils.gather_initializers( + trackable_utils.Checkpoint(root=root, optimizer=optimizer))) + self.evaluate(train_op) + self.evaluate(state_ops.assign(root.var, 12.)) + no_slots_path = root.save(os.path.join(checkpoint_directory, "no_slots")) + root.optimizer = optimizer + self.evaluate(state_ops.assign(root.var, 13.)) + self.evaluate(state_ops.assign( + optimizer.get_slot(slot_name="m", var=root.var), + 14.)) + slots_path = root.save(os.path.join(checkpoint_directory, "with_slots")) + new_root = trackable_utils.Checkpoint() + # Load the slot-containing checkpoint (deferred), then immediately + # overwrite the non-slot variable (also deferred). + slot_status = new_root.restore(slots_path) + no_slot_status = new_root.restore(no_slots_path) + with self.assertRaises(AssertionError): + no_slot_status.assert_consumed() + new_root.var = trackable_utils.add_variable( + new_root, name="var", shape=[]) no_slot_status.assert_consumed() - new_root.var = trackable_utils.add_variable( - new_root, name="var", shape=[]) - no_slot_status.assert_consumed() - no_slot_status.run_restore_ops() - self.assertEqual(12., self.evaluate(new_root.var)) - new_root.optimizer = adam.Adam(0.1) - slot_status.assert_existing_objects_matched() - if not context.executing_eagerly(): - with self.assertRaisesRegex(AssertionError, "Unresolved object"): - slot_status.assert_consumed() - self.assertEqual(12., self.evaluate(new_root.var)) - if context.executing_eagerly(): - # Slot variables are only created with restoring initializers when - # executing eagerly. - self.assertEqual(14., self.evaluate( - new_root.optimizer.get_slot(slot_name="m", var=new_root.var))) - else: - # Slot variables are not created eagerly when graph building. - with self.assertRaises(KeyError): - new_root.optimizer.get_slot(slot_name="m", var=new_root.var) - variables = [new_root.var] - gradients = [1.] - train_op = new_root.optimizer.apply_gradients(zip(gradients, variables)) - # The slot variable now exists; restore() didn't create it, but we should - # now have a restore op for it. - slot_status.run_restore_ops() - if not context.executing_eagerly(): - # The train op hasn't run when graph building, so the slot variable has - # its restored value. It has run in eager, so the value will be different. - self.assertEqual(14., self.evaluate( - new_root.optimizer.get_slot(slot_name="m", var=new_root.var))) - self.evaluate(train_op) - slot_status.assert_consumed() + no_slot_status.run_restore_ops() + self.assertEqual(12., self.evaluate(new_root.var)) + new_root.optimizer = adam.Adam(0.1) + slot_status.assert_existing_objects_matched() + if not context.executing_eagerly(): + with self.assertRaisesRegex(AssertionError, "Unresolved object"): + slot_status.assert_consumed() + self.assertEqual(12., self.evaluate(new_root.var)) + if context.executing_eagerly(): + # Slot variables are only created with restoring initializers when + # executing eagerly. + self.assertEqual(14., self.evaluate( + new_root.optimizer.get_slot(slot_name="m", var=new_root.var))) + else: + # Slot variables are not created eagerly when graph building. + with self.assertRaises(KeyError): + new_root.optimizer.get_slot(slot_name="m", var=new_root.var) + variables = [new_root.var] + gradients = [1.] + train_op = new_root.optimizer.apply_gradients(zip(gradients, variables)) + # The slot variable now exists; restore() didn't create it, but we should + # now have a restore op for it. + slot_status.run_restore_ops() + if not context.executing_eagerly(): + # The train op hasn't run when graph building, so the slot variable has + # its restored value. It has run in eager, so the value will + # be different. + self.assertEqual(14., self.evaluate( + new_root.optimizer.get_slot(slot_name="m", var=new_root.var))) + self.evaluate(train_op) + slot_status.assert_consumed() def testManySavesGraph(self): """Saves after the first should not modify the graph.""" @@ -578,126 +584,129 @@ class CheckpointingTests(parameterized.TestCase, test.TestCase): graph.finalize() obj.restore(save_path) - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def test_sequential(self): - model = sequential.Sequential() - checkpoint = trackable_utils.Checkpoint(model=model) - model.add(core.Dense(4)) - second_dense = core.Dense(5) - model.add(second_dense) - model(constant_op.constant([[1.]])) - checkpoint.restore(None).initialize_or_restore() - self.evaluate(second_dense.bias.assign( - constant_op.constant([1., 2., 3., 4., 5.]))) - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - save_path = checkpoint.save(checkpoint_prefix) - self.evaluate(second_dense.bias.assign( - constant_op.constant([5., 6., 7., 8., 9.]))) - checkpoint.restore(save_path).assert_consumed().run_restore_ops() - self.assertAllEqual([1., 2., 3., 4., 5.], self.evaluate(second_dense.bias)) + with self.test_session(): + model = sequential.Sequential() + checkpoint = trackable_utils.Checkpoint(model=model) + model.add(core.Dense(4)) + second_dense = core.Dense(5) + model.add(second_dense) + model(constant_op.constant([[1.]])) + checkpoint.restore(None).initialize_or_restore() + self.evaluate(second_dense.bias.assign( + constant_op.constant([1., 2., 3., 4., 5.]))) + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + save_path = checkpoint.save(checkpoint_prefix) + self.evaluate(second_dense.bias.assign( + constant_op.constant([5., 6., 7., 8., 9.]))) + checkpoint.restore(save_path).assert_consumed().run_restore_ops() + self.assertAllEqual([1., 2., 3., 4., 5.], + self.evaluate(second_dense.bias)) - deferred_sequential = sequential.Sequential() - deferred_sequential_checkpoint = trackable_utils.Checkpoint( - model=deferred_sequential) - status = deferred_sequential_checkpoint.restore(save_path) - deferred_sequential.add(core.Dense(4)) - deferred_second_dense = core.Dense(5) - deferred_sequential.add(deferred_second_dense) - deferred_sequential(constant_op.constant([[1.]])) - status.run_restore_ops() - self.assertAllEqual([1., 2., 3., 4., 5.], - self.evaluate(deferred_second_dense.bias)) + deferred_sequential = sequential.Sequential() + deferred_sequential_checkpoint = trackable_utils.Checkpoint( + model=deferred_sequential) + status = deferred_sequential_checkpoint.restore(save_path) + deferred_sequential.add(core.Dense(4)) + deferred_second_dense = core.Dense(5) + deferred_sequential.add(deferred_second_dense) + deferred_sequential(constant_op.constant([[1.]])) + status.run_restore_ops() + self.assertAllEqual([1., 2., 3., 4., 5.], + self.evaluate(deferred_second_dense.bias)) - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def test_initialize_if_not_restoring(self): - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - optimizer_only_prefix = os.path.join(checkpoint_directory, "opt") - with test_util.device(use_gpu=True): - model = MyModel() - optimizer = adam.Adam(0.001) - root = trackable_utils.Checkpoint( - model=model) # Do not save the optimizer with the checkpoint. - optimizer_checkpoint = trackable_utils.Checkpoint( - optimizer=optimizer) + with self.test_session(): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + optimizer_only_prefix = os.path.join(checkpoint_directory, "opt") + with test_util.device(use_gpu=True): + model = MyModel() + optimizer = adam.Adam(0.001) + root = trackable_utils.Checkpoint( + model=model) # Do not save the optimizer with the checkpoint. + optimizer_checkpoint = trackable_utils.Checkpoint( + optimizer=optimizer) - checkpoint_path = checkpoint_management.latest_checkpoint( - checkpoint_directory) - status = root.restore(save_path=checkpoint_path) - input_value = constant_op.constant([[3.]]) - def train_fn(): - with backprop.GradientTape() as tape: - loss = model(input_value) - variables = model.trainable_variables - gradients = tape.gradient(loss, variables) - return optimizer.apply_gradients(zip(gradients, variables)) - if not context.executing_eagerly(): - train_fn = functools.partial(self.evaluate, train_fn()) - status.initialize_or_restore() - # TODO(tanzheny): Add hyper variables to .variables(), and set them with - # set_weights etc. - variables_not_in_the_variables_property = [ - obj for obj in optimizer._hyper.values() - if isinstance(obj, variables_lib.Variable)] - self.evaluate([v.initializer for v - in optimizer.variables() - + variables_not_in_the_variables_property]) - train_fn() - model_save_path = root.save(file_prefix=checkpoint_prefix) - self.evaluate(optimizer.beta_1.assign(42.)) - optimizer_save_path = optimizer_checkpoint.save(optimizer_only_prefix) - del train_fn + checkpoint_path = checkpoint_management.latest_checkpoint( + checkpoint_directory) + status = root.restore(save_path=checkpoint_path) + input_value = constant_op.constant([[3.]]) + def train_fn(): + with backprop.GradientTape() as tape: + loss = model(input_value) + variables = model.trainable_variables + gradients = tape.gradient(loss, variables) + return optimizer.apply_gradients(zip(gradients, variables)) + if not context.executing_eagerly(): + train_fn = functools.partial(self.evaluate, train_fn()) + status.initialize_or_restore() + # TODO(tanzheny): Add hyper variables to .variables(), and set them with + # set_weights etc. + variables_not_in_the_variables_property = [ + obj for obj in optimizer._hyper.values() + if isinstance(obj, variables_lib.Variable)] + self.evaluate([v.initializer for v + in optimizer.variables() + + variables_not_in_the_variables_property]) + train_fn() + model_save_path = root.save(file_prefix=checkpoint_prefix) + self.evaluate(optimizer.beta_1.assign(42.)) + optimizer_save_path = optimizer_checkpoint.save(optimizer_only_prefix) + del train_fn - # Restore into a graph with the optimizer - with test_util.device(use_gpu=True): - model = MyModel() - optimizer = adam.Adam(0.001) - root = trackable_utils.Checkpoint( - optimizer=optimizer, model=model) - status = root.restore(save_path=model_save_path) - input_value = constant_op.constant([[3.]]) - def train_fn1(): - with backprop.GradientTape() as tape: - loss = model(input_value) - variables = model.trainable_variables - gradients = tape.gradient(loss, variables) - return optimizer.apply_gradients(zip(gradients, variables)) - if not context.executing_eagerly(): - train_fn1 = functools.partial(self.evaluate, train_fn1()) - status.initialize_or_restore() - train_fn1() - with self.assertRaises(AssertionError): - status.assert_existing_objects_matched() - with self.assertRaises(AssertionError): - status.assert_consumed() - del train_fn1 + # Restore into a graph with the optimizer + with test_util.device(use_gpu=True): + model = MyModel() + optimizer = adam.Adam(0.001) + root = trackable_utils.Checkpoint( + optimizer=optimizer, model=model) + status = root.restore(save_path=model_save_path) + input_value = constant_op.constant([[3.]]) + def train_fn1(): + with backprop.GradientTape() as tape: + loss = model(input_value) + variables = model.trainable_variables + gradients = tape.gradient(loss, variables) + return optimizer.apply_gradients(zip(gradients, variables)) + if not context.executing_eagerly(): + train_fn1 = functools.partial(self.evaluate, train_fn1()) + status.initialize_or_restore() + train_fn1() + with self.assertRaises(AssertionError): + status.assert_existing_objects_matched() + with self.assertRaises(AssertionError): + status.assert_consumed() + del train_fn1 - # Make sure initialization doesn't clobber later restores - with test_util.device(use_gpu=True): - model = MyModel() - optimizer = adam.Adam(0.001, beta_1=1.0) - root = trackable_utils.Checkpoint( - optimizer=optimizer, model=model) - opt_root = trackable_utils.Checkpoint( - optimizer=optimizer) - status = root.restore(save_path=model_save_path) - init_only_optimizer_status = opt_root.restore(save_path=None) - optimizer_status = opt_root.restore(save_path=optimizer_save_path) - input_value = constant_op.constant([[3.]]) - def train_fn2(): - with backprop.GradientTape() as tape: - loss = model(input_value) - variables = model.trainable_variables - gradients = tape.gradient(loss, variables) - return optimizer.apply_gradients(zip(gradients, variables)) - if not context.executing_eagerly(): - train_fn2 = functools.partial(self.evaluate, train_fn2()) - optimizer_status.run_restore_ops() - status.initialize_or_restore() - init_only_optimizer_status.initialize_or_restore() - train_fn2() - self.assertEqual(42., self.evaluate(optimizer.beta_1)) + # Make sure initialization doesn't clobber later restores + with test_util.device(use_gpu=True): + model = MyModel() + optimizer = adam.Adam(0.001, beta_1=1.0) + root = trackable_utils.Checkpoint( + optimizer=optimizer, model=model) + opt_root = trackable_utils.Checkpoint( + optimizer=optimizer) + status = root.restore(save_path=model_save_path) + init_only_optimizer_status = opt_root.restore(save_path=None) + optimizer_status = opt_root.restore(save_path=optimizer_save_path) + input_value = constant_op.constant([[3.]]) + def train_fn2(): + with backprop.GradientTape() as tape: + loss = model(input_value) + variables = model.trainable_variables + gradients = tape.gradient(loss, variables) + return optimizer.apply_gradients(zip(gradients, variables)) + if not context.executing_eagerly(): + train_fn2 = functools.partial(self.evaluate, train_fn2()) + optimizer_status.run_restore_ops() + status.initialize_or_restore() + init_only_optimizer_status.initialize_or_restore() + train_fn2() + self.assertEqual(42., self.evaluate(optimizer.beta_1)) class _ManualScope(tracking.AutoTrackable): @@ -712,65 +721,65 @@ class _ManualScope(tracking.AutoTrackable): return variable_scope.get_variable(name="in_manual_scope", shape=[]) -class TemplateTests(parameterized.TestCase, test.TestCase): +@combinations.generate(combinations.combine(mode=["graph", "eager"])) +class TemplateTests(keras_parameterized.TestCase): - @test_util.run_in_graph_and_eager_modes def test_trackable_save_restore(self): + with self.test_session(): + def _templated(): + v = variable_scope.get_variable( + "v", shape=[1], initializer=init_ops.zeros_initializer(), + use_resource=True) + v2 = variable_scope.get_variable( + "v2", shape=[1], initializer=init_ops.zeros_initializer(), + use_resource=True) + manual = _ManualScope() + return v, v + 1., v2, manual, manual() - def _templated(): - v = variable_scope.get_variable( - "v", shape=[1], initializer=init_ops.zeros_initializer(), - use_resource=True) - v2 = variable_scope.get_variable( - "v2", shape=[1], initializer=init_ops.zeros_initializer(), - use_resource=True) - manual = _ManualScope() - return v, v + 1., v2, manual, manual() + save_template = template.make_template("s1", _templated) + v1_save, _, v2_save, manual_scope, manual_scope_v = save_template() + six.assertCountEqual( + self, + [id(v1_save), id(v2_save), id(manual_scope), + id(manual_scope_v), id(save_template)], + map(id, trackable_utils.list_objects(save_template))) + manual_dep, = manual_scope._checkpoint_dependencies + self.assertEqual("in_manual_scope", manual_dep.name) + self.assertIs(manual_scope_v, manual_dep.ref) + optimizer = adam.Adam(0.0) + save_root = trackable_utils.Checkpoint( + my_template=save_template, optimizer=optimizer) + optimizer.minimize(v1_save.read_value, + var_list=[v1_save]) + self.evaluate([v.initializer for v in save_template.variables]) + optimizer_variables = optimizer.variables() + list( + optimizer._hyper.values()) + self.evaluate([v.initializer for v in optimizer_variables]) + self.evaluate(v1_save.assign([12.])) + self.evaluate(v2_save.assign([14.])) + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + save_path = save_root.save(checkpoint_prefix) - save_template = template.make_template("s1", _templated) - v1_save, _, v2_save, manual_scope, manual_scope_v = save_template() - six.assertCountEqual( - self, - [id(v1_save), id(v2_save), id(manual_scope), - id(manual_scope_v), id(save_template)], - map(id, trackable_utils.list_objects(save_template))) - manual_dep, = manual_scope._checkpoint_dependencies - self.assertEqual("in_manual_scope", manual_dep.name) - self.assertIs(manual_scope_v, manual_dep.ref) - optimizer = adam.Adam(0.0) - save_root = trackable_utils.Checkpoint( - my_template=save_template, optimizer=optimizer) - optimizer.minimize(v1_save.read_value, - var_list=[v1_save]) - self.evaluate([v.initializer for v in save_template.variables]) - optimizer_variables = optimizer.variables() + list( - optimizer._hyper.values()) - self.evaluate([v.initializer for v in optimizer_variables]) - self.evaluate(v1_save.assign([12.])) - self.evaluate(v2_save.assign([14.])) - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - save_path = save_root.save(checkpoint_prefix) - - load_template = template.make_template("s2", _templated) - load_optimizer = adam.Adam(0.0) - load_root = trackable_utils.Checkpoint( - my_template=load_template, optimizer=load_optimizer) - status = load_root.restore(save_path) - var, var_plus_one, var2, _, _ = load_template() - load_optimizer.minimize(var.read_value, var_list=[var]) - self.assertLen(load_template._checkpoint_dependencies, 3) - self.assertEqual("v", load_template._checkpoint_dependencies[0].name) - self.assertEqual("v2", load_template._checkpoint_dependencies[1].name) - self.assertEqual("ManualScope", - load_template._checkpoint_dependencies[2].name) - status.assert_consumed().run_restore_ops() - self.assertAllEqual([12.], self.evaluate(var)) - self.assertAllEqual([13.], self.evaluate(var_plus_one)) - self.assertAllEqual([14.], self.evaluate(var2)) + load_template = template.make_template("s2", _templated) + load_optimizer = adam.Adam(0.0) + load_root = trackable_utils.Checkpoint( + my_template=load_template, optimizer=load_optimizer) + status = load_root.restore(save_path) + var, var_plus_one, var2, _, _ = load_template() + load_optimizer.minimize(var.read_value, var_list=[var]) + self.assertLen(load_template._checkpoint_dependencies, 3) + self.assertEqual("v", load_template._checkpoint_dependencies[0].name) + self.assertEqual("v2", load_template._checkpoint_dependencies[1].name) + self.assertEqual("ManualScope", + load_template._checkpoint_dependencies[2].name) + status.assert_consumed().run_restore_ops() + self.assertAllEqual([12.], self.evaluate(var)) + self.assertAllEqual([13.], self.evaluate(var_plus_one)) + self.assertAllEqual([14.], self.evaluate(var2)) -class CheckpointCompatibilityTests(test.TestCase): +class CheckpointCompatibilityTests(keras_parameterized.TestCase): def _initialized_model(self): input_value = constant_op.constant([[3.]]) @@ -825,47 +834,49 @@ class CheckpointCompatibilityTests(test.TestCase): save_path=checkpoint_prefix, global_step=root.optimizer.iterations) - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testLoadFromNameBasedSaver(self): """Save a name-based checkpoint, load it using the object-based API.""" with test_util.device(use_gpu=True): - save_path = self._write_name_based_checkpoint() - root = self._initialized_model() - self._set_sentinels(root) - with self.assertRaises(AssertionError): - self._check_sentinels(root) - object_saver = trackable_utils.TrackableSaver( - graph_view.ObjectGraphView(root)) - self._set_sentinels(root) - status = object_saver.restore(save_path) - if context.executing_eagerly(): - self._check_sentinels(root) - if context.executing_eagerly(): - status.assert_consumed() - status.assert_existing_objects_matched() - status.assert_nontrivial_match() - else: - # When graph building, we haven't read any keys, so we don't know - # whether the restore will be complete. - with self.assertRaisesRegex(AssertionError, "not restored"): + with self.test_session(): + save_path = self._write_name_based_checkpoint() + root = self._initialized_model() + self._set_sentinels(root) + with self.assertRaises(AssertionError): + self._check_sentinels(root) + object_saver = trackable_utils.TrackableSaver( + graph_view.ObjectGraphView(root)) + self._set_sentinels(root) + status = object_saver.restore(save_path) + if context.executing_eagerly(): + self._check_sentinels(root) + if context.executing_eagerly(): status.assert_consumed() - with self.assertRaisesRegex(AssertionError, "not restored"): status.assert_existing_objects_matched() - with self.assertRaisesRegex(AssertionError, "not restored"): status.assert_nontrivial_match() - status.run_restore_ops() - self._check_sentinels(root) - self._set_sentinels(root) - status = object_saver.restore(save_path) - status.initialize_or_restore() - status.assert_nontrivial_match() - self._check_sentinels(root) - # Check that there is no error when keys are missing from the name-based - # checkpoint. - root.not_in_name_checkpoint = resource_variable_ops.ResourceVariable([1.]) - status = object_saver.restore(save_path) - with self.assertRaises(AssertionError): - status.assert_existing_objects_matched() + else: + # When graph building, we haven't read any keys, so we don't know + # whether the restore will be complete. + with self.assertRaisesRegex(AssertionError, "not restored"): + status.assert_consumed() + with self.assertRaisesRegex(AssertionError, "not restored"): + status.assert_existing_objects_matched() + with self.assertRaisesRegex(AssertionError, "not restored"): + status.assert_nontrivial_match() + status.run_restore_ops() + self._check_sentinels(root) + self._set_sentinels(root) + status = object_saver.restore(save_path) + status.initialize_or_restore() + status.assert_nontrivial_match() + self._check_sentinels(root) + # Check that there is no error when keys are missing from the name-based + # checkpoint. + root.not_in_name_checkpoint = resource_variable_ops.ResourceVariable( + [1.]) + status = object_saver.restore(save_path) + with self.assertRaises(AssertionError): + status.assert_existing_objects_matched() def testSaveGraphLoadEager(self): checkpoint_directory = self.get_temp_dir() diff --git a/tensorflow/python/keras/tests/tracking_util_with_v1_optimizers_test.py b/tensorflow/python/keras/tests/tracking_util_with_v1_optimizers_test.py index 1a699803e1a..4583616f4d9 100644 --- a/tensorflow/python/keras/tests/tracking_util_with_v1_optimizers_test.py +++ b/tensorflow/python/keras/tests/tracking_util_with_v1_optimizers_test.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. + # Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -31,6 +31,8 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.keras import combinations +from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras.engine import training from tensorflow.python.keras.layers import core from tensorflow.python.ops import state_ops @@ -68,7 +70,7 @@ class MyModel(training.Model): return ret -class CheckpointingTests(test.TestCase): +class CheckpointingTests(keras_parameterized.TestCase): @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) def testNamingWithOptimizer(self): @@ -178,77 +180,78 @@ class CheckpointingTests(test.TestCase): optimizer_node.slot_variables[0] .slot_variable_node_id].attributes[0].checkpoint_key) - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testSaveRestore(self): - model = MyModel() - optimizer = adam.AdamOptimizer(0.001) - root_trackable = trackable_utils.Checkpoint( - optimizer=optimizer, model=model) - input_value = constant_op.constant([[3.]]) - if context.executing_eagerly(): - optimizer.minimize( - lambda: model(input_value)) - else: - train_op = optimizer.minimize(model(input_value)) - # TODO(allenl): Make initialization more pleasant when graph building. - root_trackable.save_counter # pylint: disable=pointless-statement - self.evaluate(trackable_utils.gather_initializers( - root_trackable)) - self.evaluate(train_op) - prefix = os.path.join(self.get_temp_dir(), "ckpt") - self.evaluate(state_ops.assign(model._named_dense.variables[1], [42.])) - m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m") - self.evaluate(state_ops.assign(m_bias_slot, [1.5])) - save_path = root_trackable.save(file_prefix=prefix) - self.evaluate(state_ops.assign(model._named_dense.variables[1], [43.])) - self.evaluate(state_ops.assign(root_trackable.save_counter, 3)) - optimizer_variables = self.evaluate(optimizer.variables()) - self.evaluate(state_ops.assign(m_bias_slot, [-2.])) - # Immediate restoration - status = root_trackable.restore(save_path=save_path).assert_consumed() - status.run_restore_ops() - self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1])) - self.assertAllEqual(1, self.evaluate(root_trackable.save_counter)) - self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) - if not context.executing_eagerly(): - return # Restore-on-create is only supported when executing eagerly - on_create_model = MyModel() - on_create_optimizer = adam.AdamOptimizer( - 0.001, - # Preserve beta1_power and beta2_power when applying gradients so we can - # test that they've been restored correctly. - beta1=1.0, - beta2=1.0) - on_create_root = trackable_utils.Checkpoint( - optimizer=on_create_optimizer, model=on_create_model) - # Deferred restoration - status = on_create_root.restore(save_path=save_path) - status.assert_nontrivial_match() - status.assert_existing_objects_matched() - with self.assertRaises(AssertionError): + with self.test_session(): + model = MyModel() + optimizer = adam.AdamOptimizer(0.001) + root_trackable = trackable_utils.Checkpoint( + optimizer=optimizer, model=model) + input_value = constant_op.constant([[3.]]) + if context.executing_eagerly(): + optimizer.minimize( + lambda: model(input_value)) + else: + train_op = optimizer.minimize(model(input_value)) + # TODO(allenl): Make initialization more pleasant when graph building. + root_trackable.save_counter # pylint: disable=pointless-statement + self.evaluate(trackable_utils.gather_initializers( + root_trackable)) + self.evaluate(train_op) + prefix = os.path.join(self.get_temp_dir(), "ckpt") + self.evaluate(state_ops.assign(model._named_dense.variables[1], [42.])) + m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m") + self.evaluate(state_ops.assign(m_bias_slot, [1.5])) + save_path = root_trackable.save(file_prefix=prefix) + self.evaluate(state_ops.assign(model._named_dense.variables[1], [43.])) + self.evaluate(state_ops.assign(root_trackable.save_counter, 3)) + optimizer_variables = self.evaluate(optimizer.variables()) + self.evaluate(state_ops.assign(m_bias_slot, [-2.])) + # Immediate restoration + status = root_trackable.restore(save_path=save_path).assert_consumed() + status.run_restore_ops() + self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1])) + self.assertAllEqual(1, self.evaluate(root_trackable.save_counter)) + self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) + if not context.executing_eagerly(): + return # Restore-on-create is only supported when executing eagerly + on_create_model = MyModel() + on_create_optimizer = adam.AdamOptimizer( + 0.001, + # Preserve beta1_power and beta2_power when applying gradients + # so we can test that they've been restored correctly. + beta1=1.0, + beta2=1.0) + on_create_root = trackable_utils.Checkpoint( + optimizer=on_create_optimizer, model=on_create_model) + # Deferred restoration + status = on_create_root.restore(save_path=save_path) + status.assert_nontrivial_match() + status.assert_existing_objects_matched() + with self.assertRaises(AssertionError): + status.assert_consumed() + on_create_model(constant_op.constant([[3.]])) # create variables + self.assertAllEqual(1, self.evaluate(on_create_root.save_counter)) + self.assertAllEqual([42.], + self.evaluate( + on_create_model._named_dense.variables[1])) + on_create_m_bias_slot = on_create_optimizer.get_slot( + on_create_model._named_dense.variables[1], "m") + status.assert_existing_objects_matched() + with self.assertRaises(AssertionError): + status.assert_consumed() + # Optimizer slot variables are created when the original variable is + # restored. + self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) + self.assertAllEqual(optimizer_variables[2:], + self.evaluate(on_create_optimizer.variables())) + dummy_var = variables.Variable([1.]) + on_create_optimizer.minimize(loss=dummy_var.read_value) + status.assert_existing_objects_matched() status.assert_consumed() - on_create_model(constant_op.constant([[3.]])) # create variables - self.assertAllEqual(1, self.evaluate(on_create_root.save_counter)) - self.assertAllEqual([42.], - self.evaluate( - on_create_model._named_dense.variables[1])) - on_create_m_bias_slot = on_create_optimizer.get_slot( - on_create_model._named_dense.variables[1], "m") - status.assert_existing_objects_matched() - with self.assertRaises(AssertionError): - status.assert_consumed() - # Optimizer slot variables are created when the original variable is - # restored. - self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) - self.assertAllEqual(optimizer_variables[2:], - self.evaluate(on_create_optimizer.variables())) - dummy_var = variables.Variable([1.]) - on_create_optimizer.minimize(loss=dummy_var.read_value) - status.assert_existing_objects_matched() - status.assert_consumed() - beta1_power, beta2_power = on_create_optimizer._get_beta_accumulators() - self.assertAllEqual(optimizer_variables[0], self.evaluate(beta1_power)) - self.assertAllEqual(optimizer_variables[1], self.evaluate(beta2_power)) + beta1_power, beta2_power = on_create_optimizer._get_beta_accumulators() + self.assertAllEqual(optimizer_variables[0], self.evaluate(beta1_power)) + self.assertAllEqual(optimizer_variables[1], self.evaluate(beta2_power)) # TODO(allenl): Debug garbage created by this test in python3. def testDeferredRestorationUsageEager(self): @@ -378,80 +381,82 @@ class CheckpointingTests(test.TestCase): self.assertEqual(training_continuation + 1, session.run(root.save_counter)) - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testAgnosticUsage(self): """Graph/eager agnostic usage.""" # Does create garbage when executing eagerly due to ops.Graph() creation. - num_training_steps = 10 - checkpoint_directory = self.get_temp_dir() - for training_continuation in range(3): - with test_util.device(use_gpu=True): - model = MyModel() - optimizer = adam.AdamOptimizer(0.001) - root = trackable_utils.Checkpoint( - optimizer=optimizer, model=model, - global_step=training_util.get_or_create_global_step()) - manager = checkpoint_management.CheckpointManager( - root, checkpoint_directory, max_to_keep=1) - status = root.restore(save_path=manager.latest_checkpoint) - input_value = constant_op.constant([[3.]]) - train_fn = functools.partial( - optimizer.minimize, - functools.partial(model, input_value), - global_step=root.global_step) - if not context.executing_eagerly(): - train_fn = functools.partial(self.evaluate, train_fn()) - status.initialize_or_restore() - for _ in range(num_training_steps): - train_fn() - manager.save() - self.assertEqual((training_continuation + 1) * num_training_steps, - self.evaluate(root.global_step)) - self.assertEqual(training_continuation + 1, - self.evaluate(root.save_counter)) + with self.test_session(): + num_training_steps = 10 + checkpoint_directory = self.get_temp_dir() + for training_continuation in range(3): + with test_util.device(use_gpu=True): + model = MyModel() + optimizer = adam.AdamOptimizer(0.001) + root = trackable_utils.Checkpoint( + optimizer=optimizer, model=model, + global_step=training_util.get_or_create_global_step()) + manager = checkpoint_management.CheckpointManager( + root, checkpoint_directory, max_to_keep=1) + status = root.restore(save_path=manager.latest_checkpoint) + input_value = constant_op.constant([[3.]]) + train_fn = functools.partial( + optimizer.minimize, + functools.partial(model, input_value), + global_step=root.global_step) + if not context.executing_eagerly(): + train_fn = functools.partial(self.evaluate, train_fn()) + status.initialize_or_restore() + for _ in range(num_training_steps): + train_fn() + manager.save() + self.assertEqual((training_continuation + 1) * num_training_steps, + self.evaluate(root.global_step)) + self.assertEqual(training_continuation + 1, + self.evaluate(root.save_counter)) # pylint: disable=cell-var-from-loop - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testWithDefun(self): - num_training_steps = 2 - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - for training_continuation in range(3): - with test_util.device(use_gpu=True): - model = MyModel() - # Don't actually train so we can test variable values - optimizer = adam.AdamOptimizer(0.) - root = trackable_utils.Checkpoint( - optimizer=optimizer, model=model, - global_step=training_util.get_or_create_global_step()) - checkpoint_path = checkpoint_management.latest_checkpoint( - checkpoint_directory) - status = root.restore(save_path=checkpoint_path) - def train_fn(): - @def_function.function - def _call_model(x): - return model(x) - with backprop.GradientTape() as tape: - loss = _call_model(constant_op.constant([[3.]])) - gradients = tape.gradient(loss, model.variables) - return optimizer.apply_gradients(zip(gradients, model.variables), - global_step=root.global_step) - if not context.executing_eagerly(): - train_fn = functools.partial( - self.evaluate, train_fn()) - status.initialize_or_restore() - for _ in range(num_training_steps): - train_fn() - if training_continuation > 0: - status.assert_consumed() - self.assertAllClose([[42.]], self.evaluate(model.variables[0])) - else: - self.evaluate(model.variables[0].assign([[42.]])) - root.save(file_prefix=checkpoint_prefix) - self.assertEqual((training_continuation + 1) * num_training_steps, - self.evaluate(root.global_step)) - self.assertEqual(training_continuation + 1, - self.evaluate(root.save_counter)) + with self.test_session(): + num_training_steps = 2 + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + for training_continuation in range(3): + with test_util.device(use_gpu=True): + model = MyModel() + # Don't actually train so we can test variable values + optimizer = adam.AdamOptimizer(0.) + root = trackable_utils.Checkpoint( + optimizer=optimizer, model=model, + global_step=training_util.get_or_create_global_step()) + checkpoint_path = checkpoint_management.latest_checkpoint( + checkpoint_directory) + status = root.restore(save_path=checkpoint_path) + def train_fn(): + @def_function.function + def _call_model(x): + return model(x) + with backprop.GradientTape() as tape: + loss = _call_model(constant_op.constant([[3.]])) + gradients = tape.gradient(loss, model.variables) + return optimizer.apply_gradients(zip(gradients, model.variables), + global_step=root.global_step) + if not context.executing_eagerly(): + train_fn = functools.partial( + self.evaluate, train_fn()) + status.initialize_or_restore() + for _ in range(num_training_steps): + train_fn() + if training_continuation > 0: + status.assert_consumed() + self.assertAllClose([[42.]], self.evaluate(model.variables[0])) + else: + self.evaluate(model.variables[0].assign([[42.]])) + root.save(file_prefix=checkpoint_prefix) + self.assertEqual((training_continuation + 1) * num_training_steps, + self.evaluate(root.global_step)) + self.assertEqual(training_continuation + 1, + self.evaluate(root.save_counter)) # pylint: enable=cell-var-from-loop def _get_checkpoint_name(self, name): @@ -493,86 +498,87 @@ class CheckpointingTests(test.TestCase): optimizer.apply_gradients( [(g, v) for g, v in zip(grad, model.vars)]) - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def test_initialize_if_not_restoring(self): - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - optimizer_only_prefix = os.path.join(checkpoint_directory, "opt") - with test_util.device(use_gpu=True): - model = MyModel() - optimizer = adam.AdamOptimizer(0.001) - root = trackable_utils.Checkpoint( - model=model, # Do not save the optimizer with the checkpoint. - global_step=training_util.get_or_create_global_step()) - optimizer_checkpoint = trackable_utils.Checkpoint( - optimizer=optimizer) + with self.test_session(): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + optimizer_only_prefix = os.path.join(checkpoint_directory, "opt") + with test_util.device(use_gpu=True): + model = MyModel() + optimizer = adam.AdamOptimizer(0.001) + root = trackable_utils.Checkpoint( + model=model, # Do not save the optimizer with the checkpoint. + global_step=training_util.get_or_create_global_step()) + optimizer_checkpoint = trackable_utils.Checkpoint( + optimizer=optimizer) - checkpoint_path = checkpoint_management.latest_checkpoint( - checkpoint_directory) - status = root.restore(save_path=checkpoint_path) - input_value = constant_op.constant([[3.]]) - train_fn = functools.partial( - optimizer.minimize, - functools.partial(model, input_value), - global_step=root.global_step) - if not context.executing_eagerly(): - train_fn = functools.partial(self.evaluate, train_fn()) - status.initialize_or_restore() - self.evaluate([v.initializer for v in optimizer.variables()]) - train_fn() - model_save_path = root.save(file_prefix=checkpoint_prefix) - self.evaluate(optimizer.variables()[0].assign(42.)) - optimizer_save_path = optimizer_checkpoint.save(optimizer_only_prefix) + checkpoint_path = checkpoint_management.latest_checkpoint( + checkpoint_directory) + status = root.restore(save_path=checkpoint_path) + input_value = constant_op.constant([[3.]]) + train_fn = functools.partial( + optimizer.minimize, + functools.partial(model, input_value), + global_step=root.global_step) + if not context.executing_eagerly(): + train_fn = functools.partial(self.evaluate, train_fn()) + status.initialize_or_restore() + self.evaluate([v.initializer for v in optimizer.variables()]) + train_fn() + model_save_path = root.save(file_prefix=checkpoint_prefix) + self.evaluate(optimizer.variables()[0].assign(42.)) + optimizer_save_path = optimizer_checkpoint.save(optimizer_only_prefix) - # Restore into a graph with the optimizer - with test_util.device(use_gpu=True): - model = MyModel() - optimizer = adam.AdamOptimizer(0.001) - root = trackable_utils.Checkpoint( - optimizer=optimizer, model=model, - global_step=training_util.get_or_create_global_step()) - status = root.restore(save_path=model_save_path) - input_value = constant_op.constant([[3.]]) - train_fn = functools.partial( - optimizer.minimize, - functools.partial(model, input_value), - global_step=root.global_step) - if not context.executing_eagerly(): - train_fn = functools.partial(self.evaluate, train_fn()) - status.initialize_or_restore() - train_fn() - with self.assertRaises(AssertionError): - status.assert_existing_objects_matched() - with self.assertRaises(AssertionError): - status.assert_consumed() + # Restore into a graph with the optimizer + with test_util.device(use_gpu=True): + model = MyModel() + optimizer = adam.AdamOptimizer(0.001) + root = trackable_utils.Checkpoint( + optimizer=optimizer, model=model, + global_step=training_util.get_or_create_global_step()) + status = root.restore(save_path=model_save_path) + input_value = constant_op.constant([[3.]]) + train_fn = functools.partial( + optimizer.minimize, + functools.partial(model, input_value), + global_step=root.global_step) + if not context.executing_eagerly(): + train_fn = functools.partial(self.evaluate, train_fn()) + status.initialize_or_restore() + train_fn() + with self.assertRaises(AssertionError): + status.assert_existing_objects_matched() + with self.assertRaises(AssertionError): + status.assert_consumed() - # Make sure initialization doesn't clobber later restores - with test_util.device(use_gpu=True): - model = MyModel() - optimizer = adam.AdamOptimizer(0.001, beta1=1.0) - root = trackable_utils.Checkpoint( - optimizer=optimizer, model=model, - global_step=training_util.get_or_create_global_step()) - opt_root = trackable_utils.Checkpoint( - optimizer=optimizer) - status = root.restore(save_path=model_save_path) - init_only_optimizer_status = opt_root.restore(save_path=None) - optimizer_status = opt_root.restore(save_path=optimizer_save_path) - input_value = constant_op.constant([[3.]]) - train_fn = functools.partial( - optimizer.minimize, - functools.partial(model, input_value), - global_step=root.global_step) - if not context.executing_eagerly(): - train_fn = functools.partial(self.evaluate, train_fn()) - optimizer_status.run_restore_ops() - status.initialize_or_restore() - init_only_optimizer_status.initialize_or_restore() - train_fn() - self.assertEqual(42., self.evaluate(optimizer.variables()[0])) + # Make sure initialization doesn't clobber later restores + with test_util.device(use_gpu=True): + model = MyModel() + optimizer = adam.AdamOptimizer(0.001, beta1=1.0) + root = trackable_utils.Checkpoint( + optimizer=optimizer, model=model, + global_step=training_util.get_or_create_global_step()) + opt_root = trackable_utils.Checkpoint( + optimizer=optimizer) + status = root.restore(save_path=model_save_path) + init_only_optimizer_status = opt_root.restore(save_path=None) + optimizer_status = opt_root.restore(save_path=optimizer_save_path) + input_value = constant_op.constant([[3.]]) + train_fn = functools.partial( + optimizer.minimize, + functools.partial(model, input_value), + global_step=root.global_step) + if not context.executing_eagerly(): + train_fn = functools.partial(self.evaluate, train_fn()) + optimizer_status.run_restore_ops() + status.initialize_or_restore() + init_only_optimizer_status.initialize_or_restore() + train_fn() + self.assertEqual(42., self.evaluate(optimizer.variables()[0])) -class CheckpointCompatibilityTests(test.TestCase): +class CheckpointCompatibilityTests(keras_parameterized.TestCase): def _initialized_model(self): input_value = constant_op.constant([[3.]]) @@ -627,46 +633,47 @@ class CheckpointCompatibilityTests(test.TestCase): sess=session, save_path=checkpoint_prefix, global_step=root.optimizer_step) - @test_util.run_in_graph_and_eager_modes + @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testLoadFromNameBasedSaver(self): """Save a name-based checkpoint, load it using the object-based API.""" with test_util.device(use_gpu=True): - save_path = self._write_name_based_checkpoint() - root = self._initialized_model() - self._set_sentinels(root) - with self.assertRaises(AssertionError): - self._check_sentinels(root) - object_saver = trackable_utils.TrackableSaver( - graph_view.ObjectGraphView(root)) - self._set_sentinels(root) - status = object_saver.restore(save_path) - if context.executing_eagerly(): - self._check_sentinels(root) - if context.executing_eagerly(): - status.assert_consumed() - status.assert_existing_objects_matched() - status.assert_nontrivial_match() - else: - # When graph building, we haven't read any keys, so we don't know - # whether the restore will be complete. - with self.assertRaisesRegex(AssertionError, "not restored"): + with self.test_session(): + save_path = self._write_name_based_checkpoint() + root = self._initialized_model() + self._set_sentinels(root) + with self.assertRaises(AssertionError): + self._check_sentinels(root) + object_saver = trackable_utils.TrackableSaver( + graph_view.ObjectGraphView(root)) + self._set_sentinels(root) + status = object_saver.restore(save_path) + if context.executing_eagerly(): + self._check_sentinels(root) + if context.executing_eagerly(): status.assert_consumed() - with self.assertRaisesRegex(AssertionError, "not restored"): status.assert_existing_objects_matched() - with self.assertRaisesRegex(AssertionError, "not restored"): status.assert_nontrivial_match() - status.run_restore_ops() - self._check_sentinels(root) - self._set_sentinels(root) - status = object_saver.restore(save_path) - status.initialize_or_restore() - self._check_sentinels(root) - # Check that there is no error when keys are missing from the name-based - # checkpoint. - root.not_in_name_checkpoint = variables.Variable([1.]) - status = object_saver.restore(save_path) - with self.assertRaises(AssertionError): - status.assert_existing_objects_matched() + else: + # When graph building, we haven't read any keys, so we don't know + # whether the restore will be complete. + with self.assertRaisesRegex(AssertionError, "not restored"): + status.assert_consumed() + with self.assertRaisesRegex(AssertionError, "not restored"): + status.assert_existing_objects_matched() + with self.assertRaisesRegex(AssertionError, "not restored"): + status.assert_nontrivial_match() + status.run_restore_ops() + self._check_sentinels(root) + self._set_sentinels(root) + status = object_saver.restore(save_path) + status.initialize_or_restore() + self._check_sentinels(root) + # Check that there is no error when keys are missing from the name-based + # checkpoint. + root.not_in_name_checkpoint = variables.Variable([1.]) + status = object_saver.restore(save_path) + with self.assertRaises(AssertionError): + status.assert_existing_objects_matched() def testSaveGraphLoadEager(self): checkpoint_directory = self.get_temp_dir() From db36410606354ae16a7a53b438379daa87ee8ed4 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 14 Jul 2020 19:29:53 -0700 Subject: [PATCH 0449/2522] Remove all BUILD aliases to tensorflow/core/framework:bounds_check Use the actual rule directly everywhere. PiperOrigin-RevId: 321283405 Change-Id: I8d099c60ff96cc1f4410ab867546aa8058fb8adb --- tensorflow/compiler/jit/BUILD | 4 ++-- tensorflow/compiler/tf2xla/kernels/BUILD | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index 5ec0575ed77..ccb83954d24 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -648,11 +648,11 @@ cc_library( "//tensorflow/core:core_cpu", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", - "//tensorflow/core:framework_bounds_check", "//tensorflow/core:graph", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", + "//tensorflow/core/framework:bounds_check", "//tensorflow/stream_executor/lib", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/base", @@ -677,11 +677,11 @@ cc_library( "//tensorflow/compiler/xla:statusor", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", - "//tensorflow/core:framework_bounds_check", "//tensorflow/core:framework_internal", "//tensorflow/core:graph", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", + "//tensorflow/core/framework:bounds_check", "//tensorflow/stream_executor/lib", "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:flat_hash_map", diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index e072225566d..ec0cb9c0b66 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -230,7 +230,7 @@ cc_library( "//tensorflow/compiler/xla/client/lib:arithmetic", "//tensorflow/compiler/xla/client/lib:constants", "//tensorflow/core:framework", - "//tensorflow/core:framework_bounds_check", + "//tensorflow/core/framework:bounds_check", "//tensorflow/core/kernels:conv_grad_shape_utils", "@com_google_absl//absl/types:span", ], From a98244e44577749ca1d90945771e738af4e56efc Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Tue, 14 Jul 2020 19:33:46 -0700 Subject: [PATCH 0450/2522] Enable kernel_util.h tensor getters to use the new TfLiteContext API. With upcoming changes to TFLM for reducing runtime RAM, tensor data can be accessed via the new function pointer recently added to TfLiteContext. This new API enables runtimes to manage tensor overhead based on requirements for the platform. This change simply points existing API calls used by TFL and TFLM kernels to get TfLiteTensor structs to the new function pointer if it exists. PiperOrigin-RevId: 321283768 Change-Id: I2b20e4aea99ef6eab6d363517feecd0fbd200531 --- tensorflow/lite/c/common.h | 7 +++-- tensorflow/lite/core/subgraph.cc | 3 --- .../delegates/coreml/builders/util_test.cc | 2 +- tensorflow/lite/kernels/kernel_util.h | 27 ++++--------------- tensorflow/lite/micro/micro_interpreter.cc | 7 ----- tensorflow/lite/micro/micro_interpreter.h | 3 --- tensorflow/lite/micro/testing/test_utils.cc | 8 ------ .../benchmark/experimental/c/c_api_types.h | 7 +++-- 8 files changed, 12 insertions(+), 52 deletions(-) diff --git a/tensorflow/lite/c/common.h b/tensorflow/lite/c/common.h index f5ce5f78dde..cd6eeec4da2 100644 --- a/tensorflow/lite/c/common.h +++ b/tensorflow/lite/c/common.h @@ -764,14 +764,13 @@ typedef struct TfLiteContext { // Returns a TfLiteTensor struct for a given index in the subgraph. // WARNING: This is an experimental interface that is subject to change. // WARNING: This method may not be available on all platforms. - TfLiteTensor* (*GetTensor)(const struct TfLiteContext* context, - int tensor_idx); + TfLiteTensor* (*GetTensor)(struct TfLiteContext* context, int subgraph_idx); // Returns a TfLiteEvalTensor struct for a given index in the subgraph. // WARNING: This is an experimental interface that is subject to change. // WARNING: This method may not be available on all platforms. - TfLiteEvalTensor* (*GetEvalTensor)(const struct TfLiteContext* context, - int tensor_idx); + TfLiteEvalTensor* (*GetEvalTensor)(struct TfLiteContext* context, + int subgraph_idx); } TfLiteContext; typedef struct TfLiteRegistration { diff --git a/tensorflow/lite/core/subgraph.cc b/tensorflow/lite/core/subgraph.cc index b087ae1901c..5ef9b45514b 100644 --- a/tensorflow/lite/core/subgraph.cc +++ b/tensorflow/lite/core/subgraph.cc @@ -189,7 +189,6 @@ Subgraph::Subgraph(ErrorReporter* error_reporter, next_execution_plan_index_to_plan_allocation_(0), subgraphs_(subgraphs), resources_(resources) { - // TODO(b/161272052): Consider a better TfLiteContext initialization pattern: context_.impl_ = static_cast(this); context_.ResizeTensor = ResizeTensor; context_.ReportError = ReportErrorC; @@ -201,8 +200,6 @@ Subgraph::Subgraph(ErrorReporter* error_reporter, context_.GetExternalContext = GetExternalContext; context_.SetExternalContext = SetExternalContext; context_.profiler = nullptr; - context_.GetTensor = nullptr; - context_.GetEvalTensor = nullptr; // Reserve some space for the tensors to avoid excessive resizing. tensors_.reserve(kTensorsReservedCapacity); diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/util_test.cc b/tensorflow/lite/experimental/delegates/coreml/builders/util_test.cc index 8ba8a9bb5bc..929bc4a2282 100644 --- a/tensorflow/lite/experimental/delegates/coreml/builders/util_test.cc +++ b/tensorflow/lite/experimental/delegates/coreml/builders/util_test.cc @@ -62,7 +62,7 @@ class IsBinaryOpSupportedTest : public testing::Test { } } - TfLiteContext context_ = {}; + TfLiteContext context_; TfLiteNode node_; std::vector tensors_; }; diff --git a/tensorflow/lite/kernels/kernel_util.h b/tensorflow/lite/kernels/kernel_util.h index 92c8549ce5e..4660631dded 100644 --- a/tensorflow/lite/kernels/kernel_util.h +++ b/tensorflow/lite/kernels/kernel_util.h @@ -30,32 +30,19 @@ inline int SizeOfDimension(const TfLiteTensor* t, int dim) { } inline const TfLiteTensor* GetInput(const TfLiteContext* context, const TfLiteNode* node, int index) { - if (context->GetTensor != nullptr) { - return context->GetTensor(context, node->inputs->data[index]); - } else { - return &context->tensors[node->inputs->data[index]]; - } + return &context->tensors[node->inputs->data[index]]; } // Note: You must check if result is not null: // TfLiteTensor* my_tensor = GetVariableInput(context, node, kMyTensorIdx); // TF_LITE_ENSURE(context, my_tensor != nullptr); inline TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node, int index) { - TfLiteTensor* tensor = nullptr; - if (context->GetTensor != nullptr) { - tensor = context->GetTensor(context, node->inputs->data[index]); - } else { - tensor = &context->tensors[node->inputs->data[index]]; - } - return (tensor != nullptr && tensor->is_variable) ? tensor : nullptr; + TfLiteTensor* tensor = &context->tensors[node->inputs->data[index]]; + return (tensor->is_variable) ? tensor : nullptr; } inline TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node, int index) { - if (context->GetTensor != nullptr) { - return context->GetTensor(context, node->outputs->data[index]); - } else { - return &context->tensors[node->outputs->data[index]]; - } + return &context->tensors[node->outputs->data[index]]; } #ifndef TF_LITE_STATIC_MEMORY inline TfLiteTensor* GetTemporary(TfLiteContext* context, @@ -91,11 +78,7 @@ inline const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context, const bool use_tensor = index < node->inputs->size && node->inputs->data[index] != kTfLiteOptionalTensor; if (use_tensor) { - if (context->GetTensor != nullptr) { - return context->GetTensor(context, node->inputs->data[index]); - } else { - return &context->tensors[node->inputs->data[index]]; - } + return &context->tensors[node->inputs->data[index]]; } return nullptr; } diff --git a/tensorflow/lite/micro/micro_interpreter.cc b/tensorflow/lite/micro/micro_interpreter.cc index a47dba83c6b..c16ede174aa 100644 --- a/tensorflow/lite/micro/micro_interpreter.cc +++ b/tensorflow/lite/micro/micro_interpreter.cc @@ -70,12 +70,6 @@ void ContextHelper::ReportOpError(struct TfLiteContext* context, va_end(args); } -TfLiteTensor* ContextHelper::GetTensor(const struct TfLiteContext* context, - int tensor_idx) { - // TODO(b/160894903): Return this value from temp allocated memory. - return &context->tensors[tensor_idx]; -} - } // namespace internal MicroInterpreter::MicroInterpreter(const Model* model, @@ -138,7 +132,6 @@ void MicroInterpreter::Init(tflite::Profiler* profiler) { context_.impl_ = static_cast(&context_helper_); context_.ReportError = context_helper_.ReportOpError; - context_.GetTensor = context_helper_.GetTensor; context_.recommended_num_threads = 1; context_.profiler = profiler; diff --git a/tensorflow/lite/micro/micro_interpreter.h b/tensorflow/lite/micro/micro_interpreter.h index 6e9e5eca572..29377e3b940 100644 --- a/tensorflow/lite/micro/micro_interpreter.h +++ b/tensorflow/lite/micro/micro_interpreter.h @@ -53,9 +53,6 @@ class ContextHelper { static void ReportOpError(struct TfLiteContext* context, const char* format, ...); - static TfLiteTensor* GetTensor(const struct TfLiteContext* context, - int tensor_idx); - void SetNodeIndex(int idx) { current_node_idx_ = idx; } private: diff --git a/tensorflow/lite/micro/testing/test_utils.cc b/tensorflow/lite/micro/testing/test_utils.cc index 8860d66efa9..4471b2e2929 100644 --- a/tensorflow/lite/micro/testing/test_utils.cc +++ b/tensorflow/lite/micro/testing/test_utils.cc @@ -87,11 +87,6 @@ void* GetScratchBuffer(TfLiteContext* context, int buffer_index) { return scratch_buffers_[buffer_index]; } -TfLiteTensor* GetTensor(const struct TfLiteContext* context, int subgraph_idx) { - // TODO(b/160894903): Return this value from temp allocated memory. - return &context->tensors[subgraph_idx]; -} - } // namespace uint8_t F2Q(float value, float min, float max) { @@ -142,9 +137,6 @@ void PopulateContext(TfLiteTensor* tensors, int tensors_size, context->GetExternalContext = nullptr; context->SetExternalContext = nullptr; - context->GetTensor = GetTensor; - context->GetEvalTensor = nullptr; - context->AllocatePersistentBuffer = AllocatePersistentBuffer; context->RequestScratchBufferInArena = RequestScratchBufferInArena; context->GetScratchBuffer = GetScratchBuffer; diff --git a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h index f5ce5f78dde..cd6eeec4da2 100644 --- a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h +++ b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h @@ -764,14 +764,13 @@ typedef struct TfLiteContext { // Returns a TfLiteTensor struct for a given index in the subgraph. // WARNING: This is an experimental interface that is subject to change. // WARNING: This method may not be available on all platforms. - TfLiteTensor* (*GetTensor)(const struct TfLiteContext* context, - int tensor_idx); + TfLiteTensor* (*GetTensor)(struct TfLiteContext* context, int subgraph_idx); // Returns a TfLiteEvalTensor struct for a given index in the subgraph. // WARNING: This is an experimental interface that is subject to change. // WARNING: This method may not be available on all platforms. - TfLiteEvalTensor* (*GetEvalTensor)(const struct TfLiteContext* context, - int tensor_idx); + TfLiteEvalTensor* (*GetEvalTensor)(struct TfLiteContext* context, + int subgraph_idx); } TfLiteContext; typedef struct TfLiteRegistration { From 245a6bfd8fcfd1d332f2d8b18abe9969fb254502 Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Tue, 14 Jul 2020 19:57:53 -0700 Subject: [PATCH 0451/2522] Remove the usage of TF private API ops.convert_n_to_tensor from Keras PiperOrigin-RevId: 321285810 Change-Id: Ia8e6b0047259d0a84ef23adf403a8d22a63af7ff --- tensorflow/python/keras/metrics_test.py | 2 +- .../python/keras/optimizer_v2/learning_rate_schedule.py | 7 +++++-- .../keras/optimizer_v2/legacy_learning_rate_decay.py | 6 ++++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/keras/metrics_test.py b/tensorflow/python/keras/metrics_test.py index 90d87b4041e..7b339fc5a47 100644 --- a/tensorflow/python/keras/metrics_test.py +++ b/tensorflow/python/keras/metrics_test.py @@ -71,7 +71,7 @@ class KerasSumTest(test.TestCase, parameterized.TestCase): self.assertEqual(self.evaluate(m.total), 100) # check update_state() and result() + state accumulation + tensor input - update_op = m.update_state(ops.convert_n_to_tensor([1, 5])) + update_op = m.update_state(ops.convert_to_tensor_v2([1, 5])) self.evaluate(update_op) self.assertAlmostEqual(self.evaluate(m.result()), 106) self.assertEqual(self.evaluate(m.total), 106) # 100 + 1 + 5 diff --git a/tensorflow/python/keras/optimizer_v2/learning_rate_schedule.py b/tensorflow/python/keras/optimizer_v2/learning_rate_schedule.py index 9efda8faa5d..4dcff3d6c44 100644 --- a/tensorflow/python/keras/optimizer_v2/learning_rate_schedule.py +++ b/tensorflow/python/keras/optimizer_v2/learning_rate_schedule.py @@ -26,6 +26,7 @@ from tensorflow.python.keras.utils import generic_utils from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops +from tensorflow.python.util import nest from tensorflow.python.util.tf_export import keras_export @@ -236,8 +237,10 @@ class PiecewiseConstantDecay(LearningRateSchedule): def __call__(self, step): with ops.name_scope_v2(self.name or "PiecewiseConstant"): - boundaries = ops.convert_n_to_tensor(self.boundaries) - values = ops.convert_n_to_tensor(self.values) + boundaries = nest.map_structure(ops.convert_to_tensor_v2, + nest.flatten(self.boundaries)) + values = nest.map_structure(ops.convert_to_tensor_v2, + nest.flatten(self.values)) x_recomp = ops.convert_to_tensor_v2(step) for i, b in enumerate(boundaries): if b.dtype.base_dtype != x_recomp.dtype.base_dtype: diff --git a/tensorflow/python/keras/optimizer_v2/legacy_learning_rate_decay.py b/tensorflow/python/keras/optimizer_v2/legacy_learning_rate_decay.py index f86e68d188f..ad280568fc7 100644 --- a/tensorflow/python/keras/optimizer_v2/legacy_learning_rate_decay.py +++ b/tensorflow/python/keras/optimizer_v2/legacy_learning_rate_decay.py @@ -24,6 +24,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.keras.optimizer_v2 import learning_rate_schedule from tensorflow.python.ops import math_ops +from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export @@ -147,8 +148,9 @@ def piecewise_constant(x, boundaries, values, name=None): the learning rate value across different invocations of optimizer functions. @end_compatibility """ - boundaries = ops.convert_n_to_tensor(boundaries) - values = ops.convert_n_to_tensor(values) + boundaries = nest.map_structure(ops.convert_to_tensor_v2, + nest.flatten(boundaries)) + values = nest.map_structure(ops.convert_to_tensor_v2, nest.flatten(values)) x_recomp = ops.convert_to_tensor(x) # Avoid explicit conversion to x's dtype. This could result in faulty # comparisons, for example if floats are converted to integers. From 96fdd415cbb1a9b03b6e81f546f30a96bf021fbc Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Tue, 14 Jul 2020 20:12:07 -0700 Subject: [PATCH 0452/2522] Remove the run_v1_only annotation in the keras/test/tracking_test.py Fix the test with assertIn() which is not working with variable in eager. PiperOrigin-RevId: 321287169 Change-Id: Ie3915cfcc0724228eb5b9483fbd225a968715bcb --- tensorflow/python/keras/tests/tracking_test.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/keras/tests/tracking_test.py b/tensorflow/python/keras/tests/tracking_test.py index 281539fb5a6..cef5e603dfd 100644 --- a/tensorflow/python/keras/tests/tracking_test.py +++ b/tensorflow/python/keras/tests/tracking_test.py @@ -27,7 +27,6 @@ from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import test_util from tensorflow.python.keras import combinations from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras.engine import sequential @@ -78,7 +77,6 @@ class HasList(training.Model): class ListTests(keras_parameterized.TestCase): @combinations.generate(combinations.combine(mode=["graph", "eager"])) - @test_util.run_v1_only("b/120545219") def testTracking(self): with self.test_session(): model = HasList() @@ -109,11 +107,11 @@ class ListTests(keras_parameterized.TestCase): self.evaluate(model.variables[0])) v = variables.Variable(1.) model.var_list = [v] - self.assertIn(v, model.variables) - self.assertIn(v, model.trainable_variables) - self.assertNotIn(v, model.non_trainable_variables) - self.assertIn(model.layer_list[0].trainable_weights[0], - model.trainable_weights) + self.assertTrue(any(v is t for t in model.variables)) + self.assertTrue(any(v is t for t in model.trainable_variables)) + self.assertFalse(any(v is t for t in model.non_trainable_variables)) + self.assertTrue(any(model.layer_list[0].trainable_weights[0] + is t for t in model.trainable_weights)) def testSubModelTracking(self): model = training.Model() @@ -180,7 +178,6 @@ class ListTests(keras_parameterized.TestCase): m2(m2.null_input()) self.assertLen(m2.trainable_variables, 6) - @test_util.run_v1_only("b/120545219") def testUpdatesForwarded(self): with context.graph_mode(): model = HasList() @@ -197,7 +194,6 @@ class ListTests(keras_parameterized.TestCase): self.assertEqual(0, len(model.updates)) @combinations.generate(combinations.combine(mode=["graph", "eager"])) - @test_util.run_v1_only("b/120545219") def testLossesForwarded(self): model = HasList() model_input = array_ops.ones([32, 2]) From 01635a48b125ea465fe5c37bcfe3d33484355299 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 14 Jul 2020 20:43:03 -0700 Subject: [PATCH 0453/2522] Enable info about no MLIR passes scheduled by default. This is really important for debugging to know what runs and does not run. PiperOrigin-RevId: 321290297 Change-Id: I38b9b9dbb28f75751e0aa6b30490af234afbef4d --- .../mlir/mlir_graph_optimization_pass.cc | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc b/tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc index 3a31f553b9a..8e6d9042987 100644 --- a/tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc +++ b/tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc @@ -115,13 +115,15 @@ Status MlirFunctionOptimizationPass::Run( }); if (!is_enabled) { - VLOG(0) << "None of the MLIR optimization passes are enabled " - << "(registered " << registry_->passes().size() << ")"; + LOG_FIRST_N(INFO, 1) + << "None of the MLIR optimization passes are enabled " + << "(registered " << registry_->passes().size() << ")"; return Status::OK(); } - VLOG(0) << "Running MLIR Graph Optimization Passes " - << "(registered " << registry_->passes().size() << " passes)"; + LOG_FIRST_N(INFO, 1) << "Running MLIR Graph Optimization Passes " + << "(registered " << registry_->passes().size() + << " passes)"; GraphDebugInfo debug_info; RegisterDialects(); @@ -187,13 +189,15 @@ Status MlirV1CompatGraphOptimizationPass::Run( }); if (!is_enabled) { - VLOG(0) << "None of the MLIR optimization passes are enabled " - << "(registered " << registry_->passes().size() << " passes)"; + LOG_FIRST_N(INFO, 1) + << "None of the MLIR optimization passes are enabled " + << "(registered " << registry_->passes().size() << " passes)"; return Status::OK(); } - VLOG(0) << "Running MLIR Graph Optimization V1 Compat Passes " - << "(registered " << registry_->passes().size() << " passes)"; + LOG_FIRST_N(INFO, 1) << "Running MLIR Graph Optimization V1 Compat Passes " + << "(registered " << registry_->passes().size() + << " passes)"; GraphDebugInfo debug_info; RegisterDialects(); From da99d39369b5e85a41b078018f6419e37f289cdf Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Tue, 14 Jul 2020 20:50:30 -0700 Subject: [PATCH 0454/2522] [TF] More informative message upon DataLoss when reading checkpoint. PiperOrigin-RevId: 321291062 Change-Id: I59396d9323ebca965859dac8970a5efc2fd100ba --- tensorflow/core/util/tensor_bundle/tensor_bundle.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc index 17718cf7bb6..bb18000fcfe 100644 --- a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc +++ b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc @@ -925,7 +925,8 @@ Status BundleReader::GetValue(const BundleEntryProto& entry, Tensor* val) { } if (crc32c::Unmask(entry.crc32c()) != actual_crc32c) { return errors::DataLoss( - "Checksum does not match: stored ", + "TensorBundle at ", prefix_, " shard ", entry.shard_id(), " (", + entry.size(), " bytes): Checksum does not match: stored ", strings::Printf("%08u", crc32c::Unmask(entry.crc32c())), " vs. calculated on the restored bytes ", actual_crc32c); } From eb384a3ff6d9313cd17a0642362ba09dc34c8e87 Mon Sep 17 00:00:00 2001 From: Haifeng Jin Date: Tue, 14 Jul 2020 20:52:22 -0700 Subject: [PATCH 0455/2522] Remove usage of run_all_in_graph_and_eager_modes in keras. PiperOrigin-RevId: 321291244 Change-Id: I3912400f37bc2aea2a930015b6ca80e48e1fca43 --- .../python/keras/feature_column/dense_features_test.py | 4 ++-- .../keras/optimizer_v2/legacy_learning_rate_decay_test.py | 7 +------ 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/keras/feature_column/dense_features_test.py b/tensorflow/python/keras/feature_column/dense_features_test.py index 3c4fb28749e..a9fcb4ad315 100644 --- a/tensorflow/python/keras/feature_column/dense_features_test.py +++ b/tensorflow/python/keras/feature_column/dense_features_test.py @@ -1015,7 +1015,7 @@ class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase): self._test_dense_features(trainable=False) -@test_util.run_all_in_graph_and_eager_modes +@combinations.generate(combinations.combine(mode=['graph', 'eager'])) class DenseFeaturesSerializationTest(test.TestCase, parameterized.TestCase): @parameterized.named_parameters( @@ -1080,7 +1080,7 @@ class DenseFeaturesSerializationTest(test.TestCase, parameterized.TestCase): self.assertEqual(new_layer._feature_columns[0].name, 'a_X_b_indicator') -@test_util.run_all_in_graph_and_eager_modes +@combinations.generate(combinations.combine(mode=['graph', 'eager'])) class SequenceFeatureColumnsTest(test.TestCase): """Tests DenseFeatures with sequence feature columns.""" diff --git a/tensorflow/python/keras/optimizer_v2/legacy_learning_rate_decay_test.py b/tensorflow/python/keras/optimizer_v2/legacy_learning_rate_decay_test.py index aa566e15135..040b3637aa0 100644 --- a/tensorflow/python/keras/optimizer_v2/legacy_learning_rate_decay_test.py +++ b/tensorflow/python/keras/optimizer_v2/legacy_learning_rate_decay_test.py @@ -21,7 +21,6 @@ from __future__ import print_function import math from tensorflow.python.eager import context -from tensorflow.python.framework import test_util from tensorflow.python.keras import combinations from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras.optimizer_v2 import legacy_learning_rate_decay as learning_rate_decay @@ -29,9 +28,9 @@ from tensorflow.python.ops import variables from tensorflow.python.platform import googletest +@combinations.generate(combinations.combine(mode=["graph", "eager"])) class LRDecayTest(keras_parameterized.TestCase): - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testContinuous(self): self.evaluate(variables.global_variables_initializer()) step = 5 @@ -39,7 +38,6 @@ class LRDecayTest(keras_parameterized.TestCase): expected = .05 * 0.96**(5.0 / 10.0) self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testStaircase(self): if context.executing_eagerly(): step = variables.Variable(0) @@ -61,7 +59,6 @@ class LRDecayTest(keras_parameterized.TestCase): self.evaluate(step.assign(100)) self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - @test_util.run_in_graph_and_eager_modes def testVariables(self): step = variables.VariableV1(1) @@ -84,7 +81,6 @@ class LRDecayTest(keras_parameterized.TestCase): expected = .1 * 0.96**(100 // 3) self.assertAllClose(self.evaluate(decayed_lr), expected, 1e-6) - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testPiecewiseConstant(self): x = variables.Variable(-999) decayed_lr = learning_rate_decay.piecewise_constant( @@ -104,7 +100,6 @@ class LRDecayTest(keras_parameterized.TestCase): self.evaluate(x.assign(999)) self.assertAllClose(self.evaluate(decayed_lr), 0.001, 1e-6) - @combinations.generate(combinations.combine(mode=["graph", "eager"])) def testPiecewiseConstantEdgeCases(self): x_int = variables.Variable(0, dtype=variables.dtypes.int32) boundaries, values = [-1.0, 1.0], [1, 2, 3] From 41339588d9b81f509fd6b52594c4646d9350d58a Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Tue, 14 Jul 2020 21:47:49 -0700 Subject: [PATCH 0456/2522] Remove run_deprecated_v1 in proximal_gradient_descent_test. All the test case has been updated to run with graph context, since the API expect to run in v1 graph context. PiperOrigin-RevId: 321296244 Change-Id: I045584d2003febc0dd32b94abccc7382f07eb3d8 --- .../proximal_gradient_descent_test.py | 23 +++++-------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/tensorflow/python/training/proximal_gradient_descent_test.py b/tensorflow/python/training/proximal_gradient_descent_test.py index 603807332ca..994590840af 100644 --- a/tensorflow/python/training/proximal_gradient_descent_test.py +++ b/tensorflow/python/training/proximal_gradient_descent_test.py @@ -23,7 +23,6 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import test_util from tensorflow.python.ops import embedding_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops @@ -37,7 +36,7 @@ class ProximalGradientDescentOptimizerTest(test.TestCase): def doTestProximalGradientDescentwithoutRegularization( self, use_resource=False): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): if use_resource: var0 = resource_variable_ops.ResourceVariable([0.0, 0.0]) var1 = resource_variable_ops.ResourceVariable([0.0, 0.0]) @@ -63,17 +62,14 @@ class ProximalGradientDescentOptimizerTest(test.TestCase): self.assertAllClose(np.array([-0.9, -1.8]), v0_val) self.assertAllClose(np.array([-0.09, -0.18]), v1_val) - @test_util.run_deprecated_v1 def testProximalGradientDescentwithoutRegularization(self): self.doTestProximalGradientDescentwithoutRegularization(use_resource=False) - @test_util.run_deprecated_v1 def testResourceProximalGradientDescentwithoutRegularization(self): self.doTestProximalGradientDescentwithoutRegularization(use_resource=True) - @test_util.run_deprecated_v1 def testProximalGradientDescentwithoutRegularization2(self): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): var0 = variables.Variable([1.0, 2.0]) var1 = variables.Variable([4.0, 3.0]) grads0 = constant_op.constant([0.1, 0.2]) @@ -96,10 +92,9 @@ class ProximalGradientDescentOptimizerTest(test.TestCase): self.assertAllClose(np.array([0.1, 0.2]), v0_val) self.assertAllClose(np.array([3.91, 2.82]), v1_val) - @test_util.run_deprecated_v1 def testMinimizeSparseResourceVariable(self): for dtype in [dtypes.float32, dtypes.float64]: - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) x = constant_op.constant([[4.0], [5.0]], dtype=dtype) pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) @@ -116,9 +111,8 @@ class ProximalGradientDescentOptimizerTest(test.TestCase): self.evaluate(var0), atol=0.01) - @test_util.run_deprecated_v1 def testProximalGradientDescentWithL1_L2(self): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): var0 = variables.Variable([1.0, 2.0]) var1 = variables.Variable([4.0, 3.0]) grads0 = constant_op.constant([0.1, 0.2]) @@ -164,7 +158,6 @@ class ProximalGradientDescentOptimizerTest(test.TestCase): update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) self.evaluate(variables.global_variables_initializer()) - sess = ops.get_default_session() v0_val, v1_val = self.evaluate([var0, var1]) if is_sparse: self.assertAllClose([[1.0], [2.0]], v0_val) @@ -180,9 +173,8 @@ class ProximalGradientDescentOptimizerTest(test.TestCase): v0_val, v1_val = self.evaluate([var0, var1]) return v0_val, v1_val - @test_util.run_deprecated_v1 def testEquivSparseGradientDescentwithoutRegularization(self): - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): val0, val1 = self.applyOptimizer( proximal_gradient_descent.ProximalGradientDescentOptimizer( 3.0, @@ -190,23 +182,20 @@ class ProximalGradientDescentOptimizerTest(test.TestCase): l2_regularization_strength=0.0), is_sparse=True) - with self.cached_session(): val2, val3 = self.applyOptimizer( gradient_descent.GradientDescentOptimizer(3.0), is_sparse=True) self.assertAllClose(val0, val2) self.assertAllClose(val1, val3) - @test_util.run_deprecated_v1 def testEquivGradientDescentwithoutRegularization(self): - with self.cached_session(): + with ops.Graph().as_default(), self.cached_session(): val0, val1 = self.applyOptimizer( proximal_gradient_descent.ProximalGradientDescentOptimizer( 3.0, l1_regularization_strength=0.0, l2_regularization_strength=0.0)) - with self.cached_session(): val2, val3 = self.applyOptimizer( gradient_descent.GradientDescentOptimizer(3.0)) From 83a712dc5397a67e69fbdee3ec4b833923fc8727 Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Tue, 14 Jul 2020 21:48:19 -0700 Subject: [PATCH 0457/2522] Remove `Autotrackable` from the Keras test files and replacing it with `Module`. PiperOrigin-RevId: 321296275 Change-Id: Idba3ced8d12b163daa254f04535a4542a1d9a45b --- .../keras/tests/convert_to_constants_test.py | 6 ++--- tensorflow/python/keras/tests/saver_test.py | 4 ++-- .../python/keras/tests/tracking_test.py | 23 +++++++++---------- .../python/keras/tests/tracking_util_test.py | 6 ++--- .../tracking_util_with_v1_optimizers_test.py | 6 ++--- .../keras/tests/tracking_util_xla_test.py | 4 ++-- 6 files changed, 24 insertions(+), 25 deletions(-) diff --git a/tensorflow/python/keras/tests/convert_to_constants_test.py b/tensorflow/python/keras/tests/convert_to_constants_test.py index 21081682089..f59c83b79dc 100644 --- a/tensorflow/python/keras/tests/convert_to_constants_test.py +++ b/tensorflow/python/keras/tests/convert_to_constants_test.py @@ -29,12 +29,12 @@ from tensorflow.python.framework import convert_to_constants from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_spec from tensorflow.python.framework import test_util +from tensorflow.python.module import module from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.platform import test from tensorflow.python.saved_model.load import load from tensorflow.python.saved_model.save import save -from tensorflow.python.training.tracking import tracking from tensorflow.python.util import nest @@ -50,7 +50,7 @@ class VariablesToConstantsTest(test.TestCase): root: AutoTrackable object with original ConcreteFunction. output_func: frozen ConcreteFunction. """ - root = tracking.AutoTrackable() + root = module.Module() root.f = model input_func = root.f.get_concrete_function() @@ -91,7 +91,7 @@ class VariablesToConstantsTest(test.TestCase): # Save the converted ConcreteFunction as a signature. save_dir = os.path.join(self.get_temp_dir(), "frozen_saved_model") - root = tracking.AutoTrackable() + root = module.Module() root.f = converted_concrete_func save(root, save_dir, {"mykey": converted_concrete_func}) diff --git a/tensorflow/python/keras/tests/saver_test.py b/tensorflow/python/keras/tests/saver_test.py index 28c65961a53..03496544033 100644 --- a/tensorflow/python/keras/tests/saver_test.py +++ b/tensorflow/python/keras/tests/saver_test.py @@ -27,16 +27,16 @@ from tensorflow.python.framework import errors from tensorflow.python.framework import ops as ops_lib from tensorflow.python.keras.engine import training from tensorflow.python.keras.layers import core +from tensorflow.python.module import module from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.training import adam from tensorflow.python.training import saver as saver_module from tensorflow.python.training import training_util -from tensorflow.python.training.tracking import tracking as trackable_tracking from tensorflow.python.training.tracking import util as trackable_utils -class NonLayerTrackable(trackable_tracking.AutoTrackable): +class NonLayerTrackable(module.Module): def __init__(self): super(NonLayerTrackable, self).__init__() diff --git a/tensorflow/python/keras/tests/tracking_test.py b/tensorflow/python/keras/tests/tracking_test.py index cef5e603dfd..02d5cd519ab 100644 --- a/tensorflow/python/keras/tests/tracking_test.py +++ b/tensorflow/python/keras/tests/tracking_test.py @@ -39,7 +39,6 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables from tensorflow.python.training.tracking import base from tensorflow.python.training.tracking import data_structures -from tensorflow.python.training.tracking import tracking from tensorflow.python.training.tracking import util @@ -290,7 +289,7 @@ class MappingTests(keras_parameterized.TestCase): def testLayerCollectionWithExternalMutation(self): d = {} - root = tracking.AutoTrackable() + root = module.Module() root.wrapper = d self.assertEqual([], root.wrapper.layers) self.assertEqual([], root.wrapper.trainable_weights) @@ -303,7 +302,7 @@ class MappingTests(keras_parameterized.TestCase): self.assertEqual([], root.wrapper.trainable_weights) def testDictWrapperBadKeys(self): - a = tracking.AutoTrackable() + a = module.Module() a.d = {} a.d[1] = data_structures.List() model = training.Model() @@ -313,7 +312,7 @@ class MappingTests(keras_parameterized.TestCase): model.save_weights(save_path) def testDictWrapperNoDependency(self): - a = tracking.AutoTrackable() + a = module.Module() a.d = data_structures.NoDependency({}) a.d[1] = [3] self.assertEqual([a], util.list_objects(a)) @@ -324,7 +323,7 @@ class MappingTests(keras_parameterized.TestCase): model.load_weights(save_path) def testNonStringKeyNotTrackableValue(self): - a = tracking.AutoTrackable() + a = module.Module() a.d = {} a.d["a"] = [3] a.d[1] = data_structures.NoDependency([3]) @@ -338,15 +337,15 @@ class MappingTests(keras_parameterized.TestCase): def testNonAppendNotTrackable(self): # Non-append mutations (deleting or overwriting values) are OK when the # values aren't tracked. - a = tracking.AutoTrackable() + a = module.Module() a.d = {} a.d["a"] = [3] a.d[1] = 3 a.d[1] = 2 self.assertEqual(2, a.d[1]) del a.d[1] - a.d[2] = data_structures.NoDependency(tracking.AutoTrackable()) - second = tracking.AutoTrackable() + a.d[2] = data_structures.NoDependency(module.Module()) + second = module.Module() a.d[2] = data_structures.NoDependency(second) self.assertIs(second, a.d[2]) self.assertEqual([a, a.d, a.d["a"]], util.list_objects(a)) @@ -550,10 +549,10 @@ class TupleTests(keras_parameterized.TestCase): class InterfaceTests(keras_parameterized.TestCase): def testNoDependency(self): - root = tracking.AutoTrackable() - hasdep = tracking.AutoTrackable() + root = module.Module() + hasdep = module.Module() root.hasdep = hasdep - nodep = tracking.AutoTrackable() + nodep = module.Module() root.nodep = data_structures.NoDependency(nodep) self.assertEqual(1, len(root._checkpoint_dependencies)) self.assertIs(root._checkpoint_dependencies[0].ref, root.hasdep) @@ -566,7 +565,7 @@ class InterfaceTests(keras_parameterized.TestCase): def __init__(self): super(NoDependencyModel, self).__init__() self.a = [] - self.b = tracking.AutoTrackable() + self.b = module.Module() nodeps = NoDependencyModel() self.assertEqual([nodeps], util.list_objects(nodeps)) diff --git a/tensorflow/python/keras/tests/tracking_util_test.py b/tensorflow/python/keras/tests/tracking_util_test.py index a609d4f711e..32b3ceec6f6 100644 --- a/tensorflow/python/keras/tests/tracking_util_test.py +++ b/tensorflow/python/keras/tests/tracking_util_test.py @@ -35,6 +35,7 @@ from tensorflow.python.keras.engine import sequential from tensorflow.python.keras.engine import training from tensorflow.python.keras.layers import core from tensorflow.python.keras.optimizer_v2 import adam +from tensorflow.python.module import module from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import resource_variable_ops @@ -48,7 +49,6 @@ from tensorflow.python.training import checkpoint_management from tensorflow.python.training import saver as saver_lib from tensorflow.python.training import training_util from tensorflow.python.training.tracking import graph_view -from tensorflow.python.training.tracking import tracking from tensorflow.python.training.tracking import util as trackable_utils @@ -68,7 +68,7 @@ class MyModel(training.Model): return ret -class NonLayerTrackable(tracking.AutoTrackable): +class NonLayerTrackable(module.Module): def __init__(self): super(NonLayerTrackable, self).__init__() @@ -709,7 +709,7 @@ class CheckpointingTests(keras_parameterized.TestCase): self.assertEqual(42., self.evaluate(optimizer.beta_1)) -class _ManualScope(tracking.AutoTrackable): +class _ManualScope(module.Module): def __call__(self): with variable_scope.variable_scope("ManualScope") as vs: diff --git a/tensorflow/python/keras/tests/tracking_util_with_v1_optimizers_test.py b/tensorflow/python/keras/tests/tracking_util_with_v1_optimizers_test.py index 4583616f4d9..1ba76c19866 100644 --- a/tensorflow/python/keras/tests/tracking_util_with_v1_optimizers_test.py +++ b/tensorflow/python/keras/tests/tracking_util_with_v1_optimizers_test.py @@ -35,6 +35,7 @@ from tensorflow.python.keras import combinations from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras.engine import training from tensorflow.python.keras.layers import core +from tensorflow.python.module import module from tensorflow.python.ops import state_ops from tensorflow.python.ops import variables from tensorflow.python.training import adam @@ -42,11 +43,10 @@ from tensorflow.python.training import checkpoint_management from tensorflow.python.training import saver as saver_lib from tensorflow.python.training import training_util from tensorflow.python.training.tracking import graph_view -from tensorflow.python.training.tracking import tracking from tensorflow.python.training.tracking import util as trackable_utils -class NonLayerTrackable(tracking.AutoTrackable): +class NonLayerTrackable(module.Module): def __init__(self): super(NonLayerTrackable, self).__init__() @@ -460,7 +460,7 @@ class CheckpointingTests(keras_parameterized.TestCase): # pylint: enable=cell-var-from-loop def _get_checkpoint_name(self, name): - root = tracking.AutoTrackable() + root = module.Module() trackable_utils.add_variable( root, name=name, shape=[1, 2], dtype=dtypes.float64) (named_variable,), _, _ = trackable_utils._serialize_object_graph( diff --git a/tensorflow/python/keras/tests/tracking_util_xla_test.py b/tensorflow/python/keras/tests/tracking_util_xla_test.py index 4e8dd0a6fd3..0a311011c5a 100644 --- a/tensorflow/python/keras/tests/tracking_util_xla_test.py +++ b/tensorflow/python/keras/tests/tracking_util_xla_test.py @@ -23,13 +23,13 @@ from tensorflow.python.framework import ops from tensorflow.python.keras.engine import training from tensorflow.python.keras.layers import core from tensorflow.python.keras.optimizer_v2 import adam +from tensorflow.python.module import module from tensorflow.python.platform import test from tensorflow.python.training import checkpoint_management -from tensorflow.python.training.tracking import tracking from tensorflow.python.training.tracking import util as trackable_utils -class NonLayerTrackable(tracking.AutoTrackable): +class NonLayerTrackable(module.Module): def __init__(self): super(NonLayerTrackable, self).__init__() From fb9af2de14ca3613f1c312505d70cbc35cf85d6e Mon Sep 17 00:00:00 2001 From: Xinyi Wang Date: Tue, 14 Jul 2020 21:51:04 -0700 Subject: [PATCH 0458/2522] Update API docs for distribution strategy dataset APIs PiperOrigin-RevId: 321296509 Change-Id: I5563e9a1234e568ad88d6a775f9f52cf64346f2b --- .../python/distribute/distribute_lib.py | 229 +++++++++--------- tensorflow/python/distribute/input_lib.py | 10 +- 2 files changed, 126 insertions(+), 113 deletions(-) diff --git a/tensorflow/python/distribute/distribute_lib.py b/tensorflow/python/distribute/distribute_lib.py index c659ae6205c..216f8a6ff66 100644 --- a/tensorflow/python/distribute/distribute_lib.py +++ b/tensorflow/python/distribute/distribute_lib.py @@ -949,97 +949,92 @@ class StrategyBase(object): return self.run(fn, args=args) def experimental_distribute_dataset(self, dataset, options=None): + # pylint: disable=line-too-long """Creates `tf.distribute.DistributedDataset` from `tf.data.Dataset`. The returned `tf.distribute.DistributedDataset` can be iterated over - similar to how regular datasets can. + similar to regular datasets. NOTE: The user cannot add any more transformations to a - `tf.distribute.DistributedDataset`. + `tf.distribute.DistributedDataset`. You can only create an iterator or + examine the `tf.TypeSpec` of the data generated by it. See API docs of + `tf.distribute.DistributedDataset` to learn more. The following is an example: - ```python - strategy = tf.distribute.MirroredStrategy() + >>> global_batch_size = 2 + >>> # Passing the devices is optional. + ... strategy = tf.distribute.MirroredStrategy(devices=["GPU:0", "GPU:1"]) + >>> # Create a dataset + ... dataset = tf.data.Dataset.range(4).batch(global_batch_size) + >>> # Distribute that dataset + ... dist_dataset = strategy.experimental_distribute_dataset(dataset) + >>> @tf.function + ... def replica_fn(input): + ... return input*2 + >>> result = [] + >>> # Iterate over the `tf.distribute.DistributedDataset` + ... for x in dist_dataset: + ... # process dataset elements + ... result.append(strategy.run(replica_fn, args=(x,))) + >>> print(result) + [PerReplica:{ + 0: , + 1: + }, PerReplica:{ + 0: , + 1: + }] - # Create a dataset - dataset = dataset_ops.Dataset.TFRecordDataset([ - "/a/1.tfr", "/a/2.tfr", "/a/3.tfr", "/a/4.tfr"]) - # Distribute that dataset - dist_dataset = strategy.experimental_distribute_dataset(dataset) + Three key actions happending under the hood of this method are batching, + sharding, and prefetching. - # Iterate over the `tf.distribute.DistributedDataset` - for x in dist_dataset: - # process dataset elements - strategy.run(replica_fn, args=(x,)) - ``` + In the code snippet above, `dataset` is batched by `global_batch_size`, and + calling `experimental_distribute_dataset` on it rebatches `dataset` to a + new batch size that is equal to the global batch size divided by the number + of replicas in sync. We iterate through it using a Pythonic for loop. + `x` is a `tf.distribute.DistributedValues` containing data for all replicas, + and each replica gets data of the new batch size. + `tf.distribute.Strategy.run` will take care of feeding the right per-replica + data in `x` to the right `replica_fn` executed on each replica. - In the code snippet above, the `tf.distribute.DistributedDataset` - `dist_dataset` is batched by `GLOBAL_BATCH_SIZE`, and we iterate through it - using `for x in dist_dataset`. `x` a `tf.distribute.DistributedValues` - containing data for all replicas, which aggregates to a batch of - `GLOBAL_BATCH_SIZE`. `tf.distribute.Strategy.run` will take care of feeding - the right per-replica data in `x` to the right `replica_fn` executed on each - replica. + Sharding contains autosharding across multiple workers and within every + worker. First, in multi-worker distributed training (i.e. when you use + `tf.distribute.experimental.MultiWorkerMirroredStrategy` + or `tf.distribute.TPUStrategy`), autosharding a dataset over a set of + workers means that each worker is assigned a subset of the entire dataset + (if the right `tf.data.experimental.AutoShardPolicy` is set). This is to + ensure that at each step, a global batch size of non-overlapping dataset + elements will be processed by each worker. Autosharding has a couple of + different options that can be specified using + `tf.data.experimental.DistributeOptions`. Then, sharding within each worker + means the method will split the data among all the worker devices (if more + than one a present). This will happen regardless of multi-worker + autosharding. - What's under the hood of this method, when we say the `tf.data.Dataset` - instance - `dataset` - gets distributed? It depends on how you set the - `tf.data.experimental.AutoShardPolicy` through - `tf.data.experimental.DistributeOptions`. By default, it is set to - `tf.data.experimental.AutoShardPolicy.AUTO`. In a multi-worker setting, we - will first attempt to distribute `dataset` by detecting whether `dataset` is + Note: for autosharding across multiple workers, the default mode is + `tf.data.experimental.AutoShardPolicy.AUTO`. This mode + will attempt to shard the input dataset by files if the dataset is being created out of reader datasets (e.g. `tf.data.TFRecordDataset`, - `tf.data.TextLineDataset`, etc.) and if so, try to shard the input files. - Note that there has to be at least one input file per worker. If you have - less than one input file per worker, we suggest that you disable dataset - sharding across workers, by setting the - `tf.data.experimental.DistributeOptions.auto_shard_policy` to be + `tf.data.TextLineDataset`, etc.) or otherwise shard the dataset by data, + where each of the workers will read the entire dataset and only process the + shard assigned to it. However, if you have less than one input file per + worker, we suggest that you disable dataset autosharding across workers by + setting the `tf.data.experimental.DistributeOptions.auto_shard_policy` to be `tf.data.experimental.AutoShardPolicy.OFF`. - If the attempt to shard by file is unsuccessful (i.e. the dataset is not - read from files), we will shard the dataset evenly at the end by - appending a `.shard` operation to the end of the processing pipeline. This - will cause the entire preprocessing pipeline for all the data to be run on - every worker, and each worker will do redundant work. We will print a - warning if this route is selected. - - As mentioned before, within each worker, we will also split the data among - all the worker devices (if more than one a present). This will happen - even if multi-worker sharding is disabled. + By default, this method adds a prefetch transformation at the end of the + user provided `tf.data.Dataset` instance. The argument to the prefetch + transformation which is `buffer_size` is equal to the number of replicas in + sync. If the above batch splitting and dataset sharding logic is undesirable, please use `tf.distribute.Strategy.experimental_distribute_datasets_from_function` - instead, which does not do any automatic splitting or sharding. + instead, which does not do any automatic batching or sharding for you. - You can also use the `element_spec` property of the - `tf.distribute.DistributedDataset` instance returned by this API to query - the `tf.TypeSpec` of the elements returned - by the iterator. This can be used to set the `input_signature` property - of a `tf.function`. - - ```python - strategy = tf.distribute.MirroredStrategy() - - # Create a dataset - dataset = dataset_ops.Dataset.TFRecordDataset([ - "/a/1.tfr", "/a/2.tfr", "/a/3.tfr", "/a/4.tfr"]) - - # Distribute that dataset - dist_dataset = strategy.experimental_distribute_dataset(dataset) - - @tf.function(input_signature=[dist_dataset.element_spec]) - def train_step(inputs): - # train model with inputs - return - - # Iterate over the `tf.distribute.DistributedDataset` - for x in dist_dataset: - # process dataset elements - strategy.run(train_step, args=(x,)) - ``` - - Note: The order in which the data is processed by the workers when using + Note: If you are using TPUStrategy, the order in which the data is processed + by the workers when using `tf.distribute.Strategy.experimental_distribute_dataset` or `tf.distribute.Strategy.experimental_distribute_datasets_from_function` is not guaranteed. This is typically required if you are using @@ -1048,6 +1043,18 @@ class StrategyBase(object): snippet](https://www.tensorflow.org/tutorials/distribute/input#caveats) for an example of how to order outputs. + Note: Stateful dataset transformations are currently not supported with + `tf.distribute.experimental_distribute_dataset` or + `tf.distribute.experimental_distribute_datasets_from_function`. Any stateful + ops that the dataset may have are currently ignored. For example, if your + dataset has a `map_fn` that uses `tf.random.uniform` to rotate an image, + then you have a dataset graph that depends on state (i.e the random seed) on + the local machine where the python process is being executed. + + For a tutorial on more usage and properties of this method, refer to the + [tutorial on distributed input](https://www.tensorflow.org/tutorials/distribute/input#tfdistributestrategyexperimental_distribute_dataset). + If you are interested in last partial batch handling, read [this section](https://www.tensorflow.org/tutorials/distribute/input#partial_batches). + Args: dataset: `tf.data.Dataset` that will be sharded across all replicas using the rules stated above. @@ -1057,64 +1064,53 @@ class StrategyBase(object): Returns: A `tf.distribute.DistributedDataset`. """ + # pylint: enable=line-too-long return self._extended._experimental_distribute_dataset(dataset, options) # pylint: disable=protected-access def experimental_distribute_datasets_from_function(self, dataset_fn, options=None): + # pylint: disable=line-too-long """Distributes `tf.data.Dataset` instances created by calls to `dataset_fn`. - `dataset_fn` will be called once for each worker in the strategy. Each - replica on that worker will dequeue one batch of inputs from the local - `Dataset` (i.e. if a worker has two replicas, two batches will be dequeued - from the `Dataset` every step). + The argument `dataset_fn` that users pass in is an input function that has a + `tf.distribute.InputContext` argument and returns a `tf.data.Dataset` + instance. It is expected that the returned dataset from `dataset_fn` is + already batched by per-replica batch size (i.e. global batch size divided by + the number of replicas in sync) and sharded. + `tf.distribute.Strategy.experimental_distribute_datasets_from_function` does + not batch or shard the `tf.data.Dataset` instance + returned from the input function. `dataset_fn` will be called on the CPU + device of each of the workers and each generates a dataset where every + replica on that worker will dequeue one batch of inputs (i.e. if a worker + has two replicas, two batches will be dequeued from the `Dataset` every + step). - This method can be used for several purposes. For example, where + This method can be used for several purposes. First, it allows you to + specify your own batching and sharding logic. (In contrast, + `tf.distribute.experimental_distribute_dataset` does batching and sharding + for you.)For example, where `experimental_distribute_dataset` is unable to shard the input files, this method might be used to manually shard the dataset (avoiding the slow fallback behavior in `experimental_distribute_dataset`). In cases where the dataset is infinite, this sharding can be done by creating dataset replicas that differ only in their random seed. - `experimental_distribute_dataset` may also sometimes fail to split the - batch across replicas on a worker. In that case, this method can be used - where that limitation does not exist. The `dataset_fn` should take an `tf.distribute.InputContext` instance where information about batching and input replication can be accessed. - You can also use the `element_spec` property of the + You can use `element_spec` property of the `tf.distribute.DistributedDataset` returned by this API to query the `tf.TypeSpec` of the elements returned by the iterator. This can be used to - set the `input_signature` property of a `tf.function`. - - >>> global_batch_size = 8 - >>> def dataset_fn(input_context): - ... batch_size = input_context.get_per_replica_batch_size( - ... global_batch_size) - ... d = tf.data.Dataset.from_tensors([[1.]]).repeat().batch(batch_size) - ... return d.shard( - ... input_context.num_input_pipelines, - ... input_context.input_pipeline_id) - - >>> strategy = tf.distribute.MirroredStrategy() - >>> ds = strategy.experimental_distribute_datasets_from_function(dataset_fn) - - >>> def train(ds): - ... @tf.function(input_signature=[ds.element_spec]) - ... def step_fn(inputs): - ... # train the model with inputs - ... return inputs - - ... for batch in ds: - ... replica_results = strategy.run(replica_fn, args=(batch,)) - >>> train(ds) + set the `input_signature` property of a `tf.function`. Follow + `tf.distribute.DistributedDataset.element_spec` to see an example. IMPORTANT: The `tf.data.Dataset` returned by `dataset_fn` should have a per-replica batch size, unlike `experimental_distribute_dataset`, which uses - the global batch size. This may be computed using + the global batch size. This may be computed using `input_context.get_per_replica_batch_size`. - - Note: The order in which the data is processed by the workers when using + Note: If you are using TPUStrategy, the order in which the data is processed + by the workers when using `tf.distribute.Strategy.experimental_distribute_dataset` or `tf.distribute.Strategy.experimental_distribute_datasets_from_function` is not guaranteed. This is typically required if you are using @@ -1123,6 +1119,18 @@ class StrategyBase(object): snippet](https://www.tensorflow.org/tutorials/distribute/input#caveats) for an example of how to order outputs. + Note: Stateful dataset transformations are currently not supported with + `tf.distribute.experimental_distribute_dataset` or + `tf.distribute.experimental_distribute_datasets_from_function`. Any stateful + ops that the dataset may have are currently ignored. For example, if your + dataset has a `map_fn` that uses `tf.random.uniform` to rotate an image, + then you have a dataset graph that depends on state (i.e the random seed) on + the local machine where the python process is being executed. + + For a tutorial on more usage and properties of this method, refer to the + [tutorial on distributed input](https://www.tensorflow.org/tutorials/distribute/input#tfdistributestrategyexperimental_distribute_datasets_from_function). + If you are interested in last partial batch handling, read [this section](https://www.tensorflow.org/tutorials/distribute/input#partial_batches). + Args: dataset_fn: A function taking a `tf.distribute.InputContext` instance and returning a `tf.data.Dataset`. @@ -1132,6 +1140,7 @@ class StrategyBase(object): Returns: A `tf.distribute.DistributedDataset`. """ + # pylint: enable=line-too-long return self._extended._experimental_distribute_datasets_from_function( # pylint: disable=protected-access dataset_fn, options) @@ -1238,7 +1247,7 @@ class StrategyBase(object): example with MirroredStrategy with 2 GPUs: ```python - strategy = tf.distribute.MirroredStrategy(devices=["gpu:0", "gpu:1"]) + strategy = tf.distribute.MirroredStrategy(devices=["GPU:0", "GPU:1"]) def step_fn(): i = tf.distribute.get_replica_context().replica_id_in_sync_group return tf.identity(i) @@ -1517,7 +1526,7 @@ class StrategyBase(object): In general, when using a multi-worker `tf.distribute` strategy such as `tf.distribute.experimental.MultiWorkerMirroredStrategy` or - `tf.distribute.experimental.TPUStrategy()`, there is a + `tf.distribute.TPUStrategy()`, there is a `tf.distribute.cluster_resolver.ClusterResolver` associated with the strategy used, and such an instance is returned by this property. @@ -2417,7 +2426,7 @@ class StrategyExtendedV2(object): Example usage: ```python - strategy = tf.distribute.MirroredStrategy(['/gpu:0', '/gpu:1']) # With 2 devices + strategy = tf.distribute.MirroredStrategy(['GPU:0', 'GPU:1']) # With 2 devices with strategy.scope(): v = tf.Variable(5.0, aggregation=tf.VariableAggregation.SUM) def update_fn(v): @@ -2927,7 +2936,7 @@ class ReplicaContext(object): NOTE: For `tf.distribute.MirroredStrategy` and `tf.distribute.experimental.MultiWorkerMirroredStrategy`, this returns a nested - list of device strings, e.g, [["gpu:0"]]. + list of device strings, e.g, [["GPU:0"]]. """ require_replica_context(self) return (device_util.current(),) diff --git a/tensorflow/python/distribute/input_lib.py b/tensorflow/python/distribute/input_lib.py index 387836c0d77..b50037b348f 100644 --- a/tensorflow/python/distribute/input_lib.py +++ b/tensorflow/python/distribute/input_lib.py @@ -216,6 +216,7 @@ class DistributedIteratorInterface(collections_abc.Iterator, "DistributedIterator.element_spec() must be implemented in descendants") def get_next_as_optional(self): + # pylint: disable=line-too-long """Returns a `tf.experimental.Optional` that contains the next value for all replicas. If the `tf.distribute.DistributedIterator` has reached the end of the @@ -230,6 +231,7 @@ class DistributedIteratorInterface(collections_abc.Iterator, >>> distributed_iterator = iter( ... strategy.experimental_distribute_dataset(dataset)) >>> def step_fn(x): + ... # train the model with inputs ... return x >>> @tf.function ... def train_fn(distributed_iterator): @@ -237,15 +239,17 @@ class DistributedIteratorInterface(collections_abc.Iterator, ... optional_data = distributed_iterator.get_next_as_optional() ... if not optional_data.has_value(): ... break - ... tf.print(strategy.run(step_fn, args=(optional_data.get_value(),))) + ... per_replica_results = strategy.run(step_fn, args=(optional_data.get_value(),)) + ... tf.print(strategy.experimental_local_results(per_replica_results)) >>> train_fn(distributed_iterator) - ... # ([0 1],) - ... # ([2 3],) + ... # ([0 1], [2 3]) + ... # ([4], []) Returns: An `tf.experimental.Optional` object representing the next value from the `tf.distribute.DistributedIterator` (if it has one) or no value. """ + # pylint: enable=line-too-long raise NotImplementedError( "get_next_as_optional() not implemented in descendants") From d09fd4d5f1a490e8ca0a2c439959757c952625a2 Mon Sep 17 00:00:00 2001 From: ShengYang1 Date: Wed, 15 Jul 2020 13:18:23 +0800 Subject: [PATCH 0459/2522] Refine UT code --- .../core/grappler/optimizers/remapper.cc | 2 +- .../core/grappler/optimizers/remapper_test.cc | 110 +++++++++--------- 2 files changed, 54 insertions(+), 58 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/remapper.cc b/tensorflow/core/grappler/optimizers/remapper.cc index 4785e2a633f..661ad7895c2 100644 --- a/tensorflow/core/grappler/optimizers/remapper.cc +++ b/tensorflow/core/grappler/optimizers/remapper.cc @@ -87,7 +87,7 @@ struct FusedBatchNorm { int fused_batch_norm = kMissingIndex; }; -// Comparison op with cast +// Comparison op followed by a cast, e.g., GreaterEqual + Cast. struct ComparisonWithCast { ComparisonWithCast() = default; diff --git a/tensorflow/core/grappler/optimizers/remapper_test.cc b/tensorflow/core/grappler/optimizers/remapper_test.cc index eac6b291af4..417ecd6dd44 100644 --- a/tensorflow/core/grappler/optimizers/remapper_test.cc +++ b/tensorflow/core/grappler/optimizers/remapper_test.cc @@ -925,64 +925,60 @@ TEST_F(RemapperTest, FuseConv2DWithSqueezeAndBias) { } #endif -#define REGISTER_TEST_ALL_TYPES(TEST) \ - REGISTER_TEST(TEST, DT_FLOAT); \ - REGISTER_TEST(TEST, DT_BFLOAT16); - -#define REGISTER_TEST(CMP, TYPE) \ - TEST_F(RemapperTest, Fuse##CMP##WithCast_##TYPE) { \ - using ::tensorflow::ops::Placeholder; \ - for (bool is_training : {true, false}) { \ - tensorflow::Scope s = tensorflow::Scope::NewRootScope(); \ - const int num_channels = 24; \ - TensorShape channel_shape({num_channels}); \ - TensorShape empty_shape({0}); \ - auto x = Placeholder(s.WithOpName("x"), TYPE, \ - ops::Placeholder::Shape({2, 8, 8, num_channels})); \ - auto y = Placeholder(s.WithOpName("y"), TYPE, \ - ops::Placeholder::Shape({2, 8, 8, num_channels})); \ - float epsilon = 0.1f; \ - auto comparator = ops::CMP(s.WithOpName("cmp_op"), x, y); \ - auto cast = ops::Cast(s.WithOpName("cast"), comparator.z, TYPE); \ - auto fetch = ops::Identity(s.WithOpName("fetch"), cast); \ - auto input1_t = GenerateRandomTensor({2, 8, 8, num_channels}); \ - auto input2_t = GenerateRandomTensor({2, 8, 8, num_channels}); \ - GrapplerItem item; \ - item.fetch = {"fetch"}; \ - item.feed = {{"x", input1_t}, {"y", input2_t}}; \ - TF_ASSERT_OK(s.ToGraphDef(&item.graph)); \ - for (int i = 0; i < item.graph.node_size(); ++i) { \ - item.graph.mutable_node(i)->set_device("/device:CPU:0"); \ - } \ - Remapper optimizer(RewriterConfig::AGGRESSIVE); \ - GraphDef output; \ - TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); \ - int found = 0; \ - for (const NodeDef& node : output.node()) { \ - if (node.name() == "cast") { \ - EXPECT_EQ(node.op(), "_" #CMP "WithCast"); \ - ASSERT_EQ(node.input_size(), 2); \ - EXPECT_EQ(node.input(0), "x"); \ - EXPECT_EQ(node.input(1), "y"); \ - found++; \ - } \ - } \ - EXPECT_EQ(found, 1); \ - auto tensors_expected = \ - EvaluateNodes(item.graph, item.fetch, item.feed); \ - ASSERT_EQ(tensors_expected.size(), 1); \ - auto tensors = EvaluateNodes(output, item.fetch, item.feed); \ - ASSERT_EQ(tensors.size(), 1); \ - test::ExpectClose(tensors[0], tensors_expected[0], 1e-2, 1e-2); \ - } \ +class FusedCmpAndCastTest : public GrapplerTest { + protected: + template + void TestFusedCmpAndCast() { + using ::tensorflow::ops::Placeholder; + for (bool is_training : {true, false}) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + const int num_channels = 24; + TensorShape channel_shape({num_channels}); + TensorShape empty_shape({0}); + auto x = Placeholder(s.WithOpName("x"), TYPE, + ops::Placeholder::Shape({2, 8, 8, num_channels})); + auto y = Placeholder(s.WithOpName("y"), TYPE, + ops::Placeholder::Shape({2, 8, 8, num_channels})); + float epsilon = 0.1f; + auto comparator = ops::Equal(s.WithOpName("Equal"), x, y); + auto cast = ops::Cast(s.WithOpName("cast"), comparator.z, TYPE); + auto fetch = ops::Identity(s.WithOpName("fetch"), cast); + auto input1_t = GenerateRandomTensor({2, 8, 8, num_channels}); + auto input2_t = GenerateRandomTensor({2, 8, 8, num_channels}); + GrapplerItem item; + item.fetch = {"fetch"}; + item.feed = {{"x", input1_t}, {"y", input2_t}}; + TF_ASSERT_OK(s.ToGraphDef(&item.graph)); + for (int i = 0; i < item.graph.node_size(); ++i) { + item.graph.mutable_node(i)->set_device("/device:CPU:0"); + } + Remapper optimizer(RewriterConfig::AGGRESSIVE); + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(nullptr, item, &output)); + int found = 0; + for (const NodeDef& node : output.node()) { + if (node.name() == "cast") { + EXPECT_EQ(node.op(), "_EqualWithCast"); + ASSERT_EQ(node.input_size(), 2); + EXPECT_EQ(node.input(0), "x"); + EXPECT_EQ(node.input(1), "y"); + found++; + } + } + EXPECT_EQ(found, 1); + auto tensors_expected = EvaluateNodes(item.graph, item.fetch, item.feed); + ASSERT_EQ(tensors_expected.size(), 1); + auto tensors = EvaluateNodes(output, item.fetch, item.feed); + ASSERT_EQ(tensors.size(), 1); + test::ExpectClose(tensors[0], tensors_expected[0], 1e-2, 1e-2); + } } -REGISTER_TEST_ALL_TYPES(GreaterEqual) -REGISTER_TEST_ALL_TYPES(Greater) -REGISTER_TEST_ALL_TYPES(LessEqual) -REGISTER_TEST_ALL_TYPES(Less) -REGISTER_TEST_ALL_TYPES(Equal) -REGISTER_TEST_ALL_TYPES(NotEqual) -#undef REGISTER_TEST +}; + +TEST_F(FusedCmpAndCastTest, FusedCmpAndCast) { + TestFusedCmpAndCast(); + TestFusedCmpAndCast(); +} } // namespace grappler } // namespace tensorflow From 6e7992cae2f4af31ebc9753d7baf21ede69c39b3 Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Tue, 14 Jul 2020 22:20:32 -0700 Subject: [PATCH 0460/2522] Remove the usage of TF private API math_ops._bucketize from Keras. PiperOrigin-RevId: 321299177 Change-Id: I00e976e9244f1b7b4e341f37d6ffa1aa4f2e6ee1 --- .../keras/layers/preprocessing/discretization.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/keras/layers/preprocessing/discretization.py b/tensorflow/python/keras/layers/preprocessing/discretization.py index d621410146c..0b6cf89009b 100644 --- a/tensorflow/python/keras/layers/preprocessing/discretization.py +++ b/tensorflow/python/keras/layers/preprocessing/discretization.py @@ -22,7 +22,7 @@ from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_spec from tensorflow.python.keras.engine.base_layer import Layer from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops +from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops.ragged import ragged_functional_ops from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.util.tf_export import keras_export @@ -84,18 +84,17 @@ class Discretization(Layer): def call(self, inputs): if ragged_tensor.is_ragged(inputs): integer_buckets = ragged_functional_ops.map_flat_values( - math_ops._bucketize, inputs, boundaries=self.bins) # pylint: disable=protected-access + gen_math_ops.Bucketize, input=inputs, boundaries=self.bins) # Ragged map_flat_values doesn't touch the non-values tensors in the # ragged composite tensor. If this op is the only op a Keras model, # this can cause errors in Graph mode, so wrap the tensor in an identity. return array_ops.identity(integer_buckets) elif isinstance(inputs, sparse_tensor.SparseTensor): - integer_buckets = math_ops._bucketize( # pylint: disable=protected-access - inputs.values, - boundaries=self.bins) + integer_buckets = gen_math_ops.Bucketize( + input=inputs.values, boundaries=self.bins) return sparse_tensor.SparseTensor( indices=array_ops.identity(inputs.indices), values=integer_buckets, dense_shape=array_ops.identity(inputs.dense_shape)) else: - return math_ops._bucketize(inputs, boundaries=self.bins) # pylint: disable=protected-access + return gen_math_ops.Bucketize(input=inputs, boundaries=self.bins) From 0848e16fb190707fbb131f1ab99f89deea4e233e Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 14 Jul 2020 22:40:02 -0700 Subject: [PATCH 0461/2522] [MLIR:TF] Replace RealDiv with constant divisor with multiplication by divisor reciprocal PiperOrigin-RevId: 321301105 Change-Id: If0b55598897dbc4a26bba5eb8af552199f3f28ae --- tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc | 2 +- .../compiler/mlir/tensorflow/tests/canonicalize.mlir | 12 ++++++++++++ .../mlir/tensorflow/transforms/canonicalize.td | 8 ++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc index cfd3b61b2c3..98bc6b3089a 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc @@ -2774,7 +2774,7 @@ OpFoldResult RankOp::fold(ArrayRef operands) { void RealDivOp::getCanonicalizationPatterns(OwningRewritePatternList &results, MLIRContext *context) { - results.insert(context); + results.insert(context); } OpFoldResult RealDivOp::fold(ArrayRef operands) { diff --git a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir index d61fc66a5e6..514db1f4f08 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir @@ -586,6 +586,18 @@ func @testRealDivWithSqrtDivisor(%arg0: tensor<8x16xf32>, %arg1: tensor<8x16xf32 // CHECK: return %1 } +// CHECK-LABEL: testRealDivWithConstDivisor +func @testRealDivWithConstDivisor(%arg0: tensor<8x2xf32>) -> tensor<8x2xf32> { + %0 = "tf.Const"() {value = dense<[2.0, 4.0]> : tensor<2xf32>} : () -> tensor<2xf32> + %1 = "tf.RealDiv"(%arg0, %0) : (tensor<8x2xf32>, tensor<2xf32>) -> tensor<8x2xf32> + return %1: tensor<8x2xf32> + + // CHECK: %0 = "tf.Const" + // CHECK-SAME: value = dense<[5.000000e-01, 2.500000e-01] + // CHECK: %1 = "tf.Mul"(%arg0, %0) + // CHECK: return %1 +} + // CHECK-LABEL: testTruncateDivWithSqrtDivisor func @testTruncateDivWithSqrtDivisor(%arg0: tensor<8x16xf32>, %arg1: tensor<8x16xf32>) -> tensor<8x16xf32> { %0 = "tf.Sqrt"(%arg1) : (tensor<8x16xf32>) -> tensor<8x16xf32> diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/canonicalize.td b/tensorflow/compiler/mlir/tensorflow/transforms/canonicalize.td index 9d72284da91..3f0b5b48af9 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/canonicalize.td +++ b/tensorflow/compiler/mlir/tensorflow/transforms/canonicalize.td @@ -150,6 +150,7 @@ def LogToLog1p : Pat< // LogicalNot op patterns. //===----------------------------------------------------------------------===// +// TODO(ezhulenev): Generalize this pattern for all involutions. def LogicalNotNested : Pat<(TF_LogicalNotOp (TF_LogicalNotOp $arg)), (replaceWithValue $arg)>; @@ -187,6 +188,13 @@ def NegNested : Pat<(TF_NegOp (TF_NegOp $arg)), (replaceWithValue $arg)>; def RealDivWithSqrtDivisor : Pat<(TF_RealDivOp $arg0, (TF_SqrtOp $arg1)), (TF_MulOp $arg0, (TF_RsqrtOp $arg1))>; +// Replace division by a constant with a multiplication by a reciprocal of that +// constant. Floating point division can be ~10x more expensive than a +// multiplication. +def RealDivWithConstDivisor : Pat< + (TF_RealDivOp $arg0, (TF_ConstOp FloatElementsAttr<32>:$value)), + (TF_MulOp $arg0, (TF_ReciprocalOp (TF_ConstOp $value)))>; + //===----------------------------------------------------------------------===// // Reciprocal op patterns. //===----------------------------------------------------------------------===// From 6d73eb4c3f8664ba5bbc12028c3ece32d433d23f Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Tue, 14 Jul 2020 22:58:15 -0700 Subject: [PATCH 0462/2522] Remove the v1_only annotation in the conv test for legacy tf layers. Most of the tests were relying on v1 behavior like tensor.op.name, placeholder, variable_scope etc. Make them to run under graph context, since those layer are expect to run in v1. PiperOrigin-RevId: 321302900 Change-Id: Ifedadceaa7da6055aa2aca427d52d66683a0caad --- .../legacy_tf_layers/convolutional_test.py | 864 +++++++++--------- 1 file changed, 437 insertions(+), 427 deletions(-) diff --git a/tensorflow/python/keras/legacy_tf_layers/convolutional_test.py b/tensorflow/python/keras/legacy_tf_layers/convolutional_test.py index a6a4bc7a088..8a05380543e 100644 --- a/tensorflow/python/keras/legacy_tf_layers/convolutional_test.py +++ b/tensorflow/python/keras/legacy_tf_layers/convolutional_test.py @@ -20,9 +20,9 @@ from __future__ import print_function import numpy as np +from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import test_util from tensorflow.python.keras.legacy_tf_layers import convolutional as conv_layers from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops @@ -60,13 +60,13 @@ class ConvTest(test.TestCase): with self.assertRaisesRegex(ValueError, 'kernel_size'): conv_layers.conv2d(images, 32, None) - @test_util.run_deprecated_v1 def testCreateConv2D(self): height, width = 7, 9 images = random_ops.random_uniform((5, height, width, 4)) layer = conv_layers.Conv2D(32, [3, 3], activation=nn_ops.relu) output = layer.apply(images) - self.assertEqual(output.op.name, 'conv2d/Relu') + if not context.executing_eagerly(): + self.assertEqual(output.op.name, 'conv2d/Relu') self.assertListEqual(output.get_shape().as_list(), [5, height - 2, width - 2, 32]) self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 4, 32]) @@ -89,32 +89,32 @@ class ConvTest(test.TestCase): self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 4, 32]) self.assertListEqual(layer.bias.get_shape().as_list(), [32]) - @test_util.run_deprecated_v1 def testCreateConv2DChannelsFirst(self): - height, width = 7, 9 - images = random_ops.random_uniform((5, 4, height, width)) - layer = conv_layers.Conv2D(32, [3, 3], data_format='channels_first') - output = layer.apply(images) - self.assertListEqual(output.get_shape().as_list(), - [5, 32, height - 2, width - 2]) - self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 4, 32]) - self.assertListEqual(layer.bias.get_shape().as_list(), [32]) + with ops.Graph().as_default(): + height, width = 7, 9 + images = random_ops.random_uniform((5, 4, height, width)) + layer = conv_layers.Conv2D(32, [3, 3], data_format='channels_first') + output = layer.apply(images) + self.assertListEqual(output.get_shape().as_list(), + [5, 32, height - 2, width - 2]) + self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 4, 32]) + self.assertListEqual(layer.bias.get_shape().as_list(), [32]) - @test_util.run_deprecated_v1 def testUnknownInputChannels(self): - images = array_ops.placeholder(dtypes.float32, (5, 7, 9, None)) - layer = conv_layers.Conv2D(32, [3, 3], activation=nn_ops.relu) - with self.assertRaisesRegex( - ValueError, 'The channel dimension of the inputs ' - 'should be defined. Found `None`.'): - _ = layer.apply(images) + with ops.Graph().as_default(): + images = array_ops.placeholder(dtypes.float32, (5, 7, 9, None)) + layer = conv_layers.Conv2D(32, [3, 3], activation=nn_ops.relu) + with self.assertRaisesRegex( + ValueError, 'The channel dimension of the inputs ' + 'should be defined. Found `None`.'): + _ = layer.apply(images) - images = array_ops.placeholder(dtypes.float32, (5, None, 7, 9)) - layer = conv_layers.Conv2D(32, [3, 3], data_format='channels_first') - with self.assertRaisesRegex( - ValueError, 'The channel dimension of the inputs ' - 'should be defined. Found `None`.'): - _ = layer.apply(images) + images = array_ops.placeholder(dtypes.float32, (5, None, 7, 9)) + layer = conv_layers.Conv2D(32, [3, 3], data_format='channels_first') + with self.assertRaisesRegex( + ValueError, 'The channel dimension of the inputs ' + 'should be defined. Found `None`.'): + _ = layer.apply(images) def testConv2DPaddingSame(self): height, width = 7, 9 @@ -144,13 +144,13 @@ class ConvTest(test.TestCase): self.assertListEqual(output.get_shape().as_list(), [5, height / 2, width, 32]) - @test_util.run_deprecated_v1 def testCreateConv1D(self): width = 7 data = random_ops.random_uniform((5, width, 4)) layer = conv_layers.Conv1D(32, 3, activation=nn_ops.relu) output = layer.apply(data) - self.assertEqual(output.op.name, 'conv1d/Relu') + if not context.executing_eagerly(): + self.assertEqual(output.op.name, 'conv1d/Relu') self.assertListEqual(output.get_shape().as_list(), [5, width - 2, 32]) self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 4, 32]) self.assertListEqual(layer.bias.get_shape().as_list(), [32]) @@ -161,85 +161,87 @@ class ConvTest(test.TestCase): output = conv_layers.conv1d(data, 32, 3, activation=nn_ops.relu) self.assertListEqual(output.get_shape().as_list(), [5, width - 2, 32]) - @test_util.run_deprecated_v1 def testCreateConv1DChannelsFirst(self): - width = 7 - data = random_ops.random_uniform((5, 4, width)) - layer = conv_layers.Conv1D(32, 3, data_format='channels_first') - output = layer.apply(data) - self.assertListEqual(output.get_shape().as_list(), [5, 32, width - 2]) - self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 4, 32]) - self.assertListEqual(layer.bias.get_shape().as_list(), [32]) + with ops.Graph().as_default(): + width = 7 + data = random_ops.random_uniform((5, 4, width)) + layer = conv_layers.Conv1D(32, 3, data_format='channels_first') + output = layer.apply(data) + self.assertListEqual(output.get_shape().as_list(), [5, 32, width - 2]) + self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 4, 32]) + self.assertListEqual(layer.bias.get_shape().as_list(), [32]) - @test_util.run_deprecated_v1 def testUnknownInputChannelsConv1D(self): - data = array_ops.placeholder(dtypes.float32, (5, 4, None)) - layer = conv_layers.Conv1D(32, 3, activation=nn_ops.relu) - with self.assertRaisesRegex( - ValueError, 'The channel dimension of the inputs ' - 'should be defined. Found `None`.'): - _ = layer.apply(data) + with ops.Graph().as_default(): + data = array_ops.placeholder(dtypes.float32, (5, 4, None)) + layer = conv_layers.Conv1D(32, 3, activation=nn_ops.relu) + with self.assertRaisesRegex( + ValueError, 'The channel dimension of the inputs ' + 'should be defined. Found `None`.'): + _ = layer.apply(data) - data = array_ops.placeholder(dtypes.float32, (5, None, 4)) - layer = conv_layers.Conv1D(32, 3, data_format='channels_first') - with self.assertRaisesRegex( - ValueError, 'The channel dimension of the inputs ' - 'should be defined. Found `None`.'): - _ = layer.apply(data) + data = array_ops.placeholder(dtypes.float32, (5, None, 4)) + layer = conv_layers.Conv1D(32, 3, data_format='channels_first') + with self.assertRaisesRegex( + ValueError, 'The channel dimension of the inputs ' + 'should be defined. Found `None`.'): + _ = layer.apply(data) - @test_util.run_deprecated_v1 def testCreateConv3D(self): depth, height, width = 6, 7, 9 volumes = random_ops.random_uniform((5, depth, height, width, 4)) layer = conv_layers.Conv3D(32, [3, 3, 3], activation=nn_ops.relu) output = layer.apply(volumes) - self.assertEqual(output.op.name, 'conv3d/Relu') + if not context.executing_eagerly(): + self.assertEqual(output.op.name, 'conv3d/Relu') self.assertListEqual(output.get_shape().as_list(), [5, depth - 2, height - 2, width - 2, 32]) self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 3, 4, 32]) self.assertListEqual(layer.bias.get_shape().as_list(), [32]) - @test_util.run_deprecated_v1 def testUnknownInputChannelsConv3D(self): - volumes = array_ops.placeholder(dtypes.float32, (5, 6, 7, 9, None)) - layer = conv_layers.Conv3D(32, [3, 3, 3], activation=nn_ops.relu) - with self.assertRaisesRegex( - ValueError, 'The channel dimension of the inputs ' - 'should be defined. Found `None`.'): - _ = layer.apply(volumes) + with ops.Graph().as_default(): + volumes = array_ops.placeholder(dtypes.float32, (5, 6, 7, 9, None)) + layer = conv_layers.Conv3D(32, [3, 3, 3], activation=nn_ops.relu) + with self.assertRaisesRegex( + ValueError, 'The channel dimension of the inputs ' + 'should be defined. Found `None`.'): + _ = layer.apply(volumes) - @test_util.run_deprecated_v1 def testConv2DKernelRegularizer(self): - height, width = 7, 9 - images = random_ops.random_uniform((5, height, width, 4)) - reg = lambda x: 0.1 * math_ops.reduce_sum(x) - layer = conv_layers.Conv2D(32, [3, 3], kernel_regularizer=reg) - layer.apply(images) - loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys)) + with ops.Graph().as_default(): + height, width = 7, 9 + images = random_ops.random_uniform((5, height, width, 4)) + reg = lambda x: 0.1 * math_ops.reduce_sum(x) + layer = conv_layers.Conv2D(32, [3, 3], kernel_regularizer=reg) + layer.apply(images) + loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys)) - @test_util.run_deprecated_v1 def testConv2DBiasRegularizer(self): - height, width = 7, 9 - images = random_ops.random_uniform((5, height, width, 4)) - reg = lambda x: 0.1 * math_ops.reduce_sum(x) - layer = conv_layers.Conv2D(32, [3, 3], bias_regularizer=reg) - layer.apply(images) - loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys)) + with ops.Graph().as_default(): + height, width = 7, 9 + images = random_ops.random_uniform((5, height, width, 4)) + reg = lambda x: 0.1 * math_ops.reduce_sum(x) + layer = conv_layers.Conv2D(32, [3, 3], bias_regularizer=reg) + layer.apply(images) + loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys)) - @test_util.run_deprecated_v1 def testConv2DNoBias(self): height, width = 7, 9 images = random_ops.random_uniform((5, height, width, 4)) layer = conv_layers.Conv2D( 32, [3, 3], activation=nn_ops.relu, use_bias=False) output = layer.apply(images) - self.assertEqual(output.op.name, 'conv2d/Relu') + if not context.executing_eagerly(): + self.assertEqual(output.op.name, 'conv2d/Relu') self.assertListEqual(output.get_shape().as_list(), [5, height - 2, width - 2, 32]) self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 4, 32]) @@ -259,29 +261,28 @@ class ConvTest(test.TestCase): output = layer.apply(images) self.assertListEqual(output.get_shape().as_list(), [5, height - 2, 3, 32]) - @test_util.run_deprecated_v1 def testFunctionalConv2DReuse(self): - height, width = 7, 9 - images = random_ops.random_uniform((5, height, width, 3), seed=1) - conv_layers.conv2d(images, 32, [3, 3], name='conv1') - self.assertEqual(len(variables.trainable_variables()), 2) - conv_layers.conv2d(images, 32, [3, 3], name='conv1', reuse=True) - self.assertEqual(len(variables.trainable_variables()), 2) - - @test_util.run_deprecated_v1 - def testFunctionalConv2DReuseFromScope(self): - with variable_scope.variable_scope('scope'): + with ops.Graph().as_default(): height, width = 7, 9 images = random_ops.random_uniform((5, height, width, 3), seed=1) conv_layers.conv2d(images, 32, [3, 3], name='conv1') self.assertEqual(len(variables.trainable_variables()), 2) - with variable_scope.variable_scope('scope', reuse=True): - conv_layers.conv2d(images, 32, [3, 3], name='conv1') + conv_layers.conv2d(images, 32, [3, 3], name='conv1', reuse=True) self.assertEqual(len(variables.trainable_variables()), 2) - @test_util.run_deprecated_v1 + def testFunctionalConv2DReuseFromScope(self): + with ops.Graph().as_default(): + with variable_scope.variable_scope('scope'): + height, width = 7, 9 + images = random_ops.random_uniform((5, height, width, 3), seed=1) + conv_layers.conv2d(images, 32, [3, 3], name='conv1') + self.assertEqual(len(variables.trainable_variables()), 2) + with variable_scope.variable_scope('scope', reuse=True): + conv_layers.conv2d(images, 32, [3, 3], name='conv1') + self.assertEqual(len(variables.trainable_variables()), 2) + def testFunctionalConv2DInitializerFromScope(self): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): with variable_scope.variable_scope( 'scope', initializer=init_ops.ones_initializer()): height, width = 7, 9 @@ -298,14 +299,14 @@ class ConvTest(test.TestCase): # Check that the bias still got initialized to zeros. self.assertAllClose(weights[1], np.zeros((32))) - @test_util.run_deprecated_v1 def testFunctionalConv2DNoReuse(self): - height, width = 7, 9 - images = random_ops.random_uniform((5, height, width, 3), seed=1) - conv_layers.conv2d(images, 32, [3, 3]) - self.assertEqual(len(variables.trainable_variables()), 2) - conv_layers.conv2d(images, 32, [3, 3]) - self.assertEqual(len(variables.trainable_variables()), 4) + with ops.Graph().as_default(): + height, width = 7, 9 + images = random_ops.random_uniform((5, height, width, 3), seed=1) + conv_layers.conv2d(images, 32, [3, 3]) + self.assertEqual(len(variables.trainable_variables()), 2) + conv_layers.conv2d(images, 32, [3, 3]) + self.assertEqual(len(variables.trainable_variables()), 4) def testConstraints(self): # Conv1D @@ -341,12 +342,12 @@ class ConvTest(test.TestCase): self.assertEqual(conv3d.kernel_constraint, k_constraint) self.assertEqual(conv3d.bias_constraint, b_constraint) - @test_util.run_deprecated_v1 def testConv3DChannelsFirst(self): # Test case for GitHub issue 15655 - images = array_ops.placeholder( - dtype=dtypes.float32, shape=[None, 1, 32, 32, 32]) - conv_layers.conv3d(images, 32, 9, data_format='channels_first') + with ops.Graph().as_default(): + images = array_ops.placeholder( + dtype=dtypes.float32, shape=[None, 1, 32, 32, 32]) + conv_layers.conv3d(images, 32, 9, data_format='channels_first') class SeparableConv1DTest(test.TestCase): @@ -375,13 +376,13 @@ class SeparableConv1DTest(test.TestCase): with self.assertRaisesRegex(ValueError, 'kernel_size'): conv_layers.separable_conv1d(data, 32, None) - @test_util.run_deprecated_v1 def testCreateSeparableConv1D(self): length = 9 data = random_ops.random_uniform((5, length, 4)) layer = conv_layers.SeparableConv1D(32, 3, activation=nn_ops.relu) output = layer.apply(data) - self.assertEqual(output.op.name, 'separable_conv1d/Relu') + if not context.executing_eagerly(): + self.assertEqual(output.op.name, 'separable_conv1d/Relu') self.assertEqual(output.get_shape().as_list(), [5, length - 2, 32]) self.assertEqual(layer.depthwise_kernel.get_shape().as_list(), [3, 4, 1]) self.assertEqual(layer.pointwise_kernel.get_shape().as_list(), [1, 4, 32]) @@ -397,16 +398,16 @@ class SeparableConv1DTest(test.TestCase): self.assertEqual(layer.pointwise_kernel.get_shape().as_list(), [1, 8, 32]) self.assertEqual(layer.bias.get_shape().as_list(), [32]) - @test_util.run_deprecated_v1 def testCreateSeparableConv1DChannelsFirst(self): - length = 9 - data = random_ops.random_uniform((5, 4, length)) - layer = conv_layers.SeparableConv1D(32, 3, data_format='channels_first') - output = layer.apply(data) - self.assertEqual(output.get_shape().as_list(), [5, 32, length - 2]) - self.assertEqual(layer.depthwise_kernel.get_shape().as_list(), [3, 4, 1]) - self.assertEqual(layer.pointwise_kernel.get_shape().as_list(), [1, 4, 32]) - self.assertEqual(layer.bias.get_shape().as_list(), [32]) + with ops.Graph().as_default(): + length = 9 + data = random_ops.random_uniform((5, 4, length)) + layer = conv_layers.SeparableConv1D(32, 3, data_format='channels_first') + output = layer.apply(data) + self.assertEqual(output.get_shape().as_list(), [5, 32, length - 2]) + self.assertEqual(layer.depthwise_kernel.get_shape().as_list(), [3, 4, 1]) + self.assertEqual(layer.pointwise_kernel.get_shape().as_list(), [1, 4, 32]) + self.assertEqual(layer.bias.get_shape().as_list(), [32]) def testSeparableConv1DPaddingSame(self): length = 9 @@ -423,90 +424,93 @@ class SeparableConv1DTest(test.TestCase): output = layer.apply(data) self.assertEqual(output.get_shape().as_list(), [5, length // 2, 32]) - @test_util.run_deprecated_v1 def testCreateSeparableConv1DWithStridesChannelsFirst(self): - data_format = 'channels_first' - length = 10 - data = random_ops.random_uniform((5, 3, length), seed=1) - layer = conv_layers.SeparableConv1D( - 32, 3, strides=2, padding='same', data_format=data_format) - output = layer.apply(data) - self.assertEqual(output.get_shape().as_list(), [5, 32, length // 2]) + with ops.Graph().as_default(): + data_format = 'channels_first' + length = 10 + data = random_ops.random_uniform((5, 3, length), seed=1) + layer = conv_layers.SeparableConv1D( + 32, 3, strides=2, padding='same', data_format=data_format) + output = layer.apply(data) + self.assertEqual(output.get_shape().as_list(), [5, 32, length // 2]) - @test_util.run_deprecated_v1 def testFunctionalConv1DReuse(self): - length = 10 - data = random_ops.random_uniform((5, length, 3), seed=1) - conv_layers.separable_conv1d(data, 32, 3, name='sepconv1') - self.assertEqual(len(variables.trainable_variables()), 3) - conv_layers.separable_conv1d(data, 32, 3, name='sepconv1', reuse=True) - self.assertEqual(len(variables.trainable_variables()), 3) - - @test_util.run_deprecated_v1 - def testFunctionalConv1DReuseFromScope(self): - with variable_scope.variable_scope('scope'): + with ops.Graph().as_default(): length = 10 data = random_ops.random_uniform((5, length, 3), seed=1) conv_layers.separable_conv1d(data, 32, 3, name='sepconv1') self.assertEqual(len(variables.trainable_variables()), 3) - with variable_scope.variable_scope('scope', reuse=True): - conv_layers.separable_conv1d(data, 32, 3, name='sepconv1') + conv_layers.separable_conv1d(data, 32, 3, name='sepconv1', reuse=True) self.assertEqual(len(variables.trainable_variables()), 3) - @test_util.run_deprecated_v1 + def testFunctionalConv1DReuseFromScope(self): + with ops.Graph().as_default(): + with variable_scope.variable_scope('scope'): + length = 10 + data = random_ops.random_uniform((5, length, 3), seed=1) + conv_layers.separable_conv1d(data, 32, 3, name='sepconv1') + self.assertEqual(len(variables.trainable_variables()), 3) + with variable_scope.variable_scope('scope', reuse=True): + conv_layers.separable_conv1d(data, 32, 3, name='sepconv1') + self.assertEqual(len(variables.trainable_variables()), 3) + def testFunctionalConv1DNoReuse(self): - length = 10 - data = random_ops.random_uniform((5, length, 3), seed=1) - conv_layers.separable_conv1d(data, 32, 3) - self.assertEqual(len(variables.trainable_variables()), 3) - conv_layers.separable_conv1d(data, 32, 3) - self.assertEqual(len(variables.trainable_variables()), 6) + with ops.Graph().as_default(): + length = 10 + data = random_ops.random_uniform((5, length, 3), seed=1) + conv_layers.separable_conv1d(data, 32, 3) + self.assertEqual(len(variables.trainable_variables()), 3) + conv_layers.separable_conv1d(data, 32, 3) + self.assertEqual(len(variables.trainable_variables()), 6) - @test_util.run_deprecated_v1 def testSeparableConv1DDepthwiseRegularizer(self): - length = 9 - data = random_ops.random_uniform((5, length, 4)) - reg = lambda x: 0.1 * math_ops.reduce_sum(x) - layer = conv_layers.SeparableConv1D(32, 3, depthwise_regularizer=reg) - layer.apply(data) - loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys)) + with ops.Graph().as_default(): + length = 9 + data = random_ops.random_uniform((5, length, 4)) + reg = lambda x: 0.1 * math_ops.reduce_sum(x) + layer = conv_layers.SeparableConv1D(32, 3, depthwise_regularizer=reg) + layer.apply(data) + loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys)) - @test_util.run_deprecated_v1 def testSeparableConv1DPointwiseRegularizer(self): - length = 9 - data = random_ops.random_uniform((5, length, 4)) - reg = lambda x: 0.1 * math_ops.reduce_sum(x) - layer = conv_layers.SeparableConv1D(32, 3, pointwise_regularizer=reg) - layer.apply(data) - loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys)) + with ops.Graph().as_default(): + length = 9 + data = random_ops.random_uniform((5, length, 4)) + reg = lambda x: 0.1 * math_ops.reduce_sum(x) + layer = conv_layers.SeparableConv1D(32, 3, pointwise_regularizer=reg) + layer.apply(data) + loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys)) - @test_util.run_deprecated_v1 def testSeparableConv1DBiasRegularizer(self): - length = 9 - data = random_ops.random_uniform((5, length, 4)) - reg = lambda x: 0.1 * math_ops.reduce_sum(x) - layer = conv_layers.SeparableConv1D(32, 3, bias_regularizer=reg) - layer.apply(data) - loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys)) + with ops.Graph().as_default(): + length = 9 + data = random_ops.random_uniform((5, length, 4)) + reg = lambda x: 0.1 * math_ops.reduce_sum(x) + layer = conv_layers.SeparableConv1D(32, 3, bias_regularizer=reg) + layer.apply(data) + loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys)) - @test_util.run_deprecated_v1 def testSeparableConv1DNoBias(self): - length = 9 - data = random_ops.random_uniform((5, length, 4)) - layer = conv_layers.SeparableConv1D( - 32, 3, activation=nn_ops.relu, use_bias=False) - output = layer.apply(data) - self.assertEqual(output.op.name, 'separable_conv1d/Relu') - self.assertEqual(layer.bias, None) + with ops.Graph().as_default(): + length = 9 + data = random_ops.random_uniform((5, length, 4)) + layer = conv_layers.SeparableConv1D( + 32, 3, activation=nn_ops.relu, use_bias=False) + output = layer.apply(data) + self.assertEqual(output.op.name, 'separable_conv1d/Relu') + self.assertEqual(layer.bias, None) def testConstraints(self): d_constraint = lambda x: x / math_ops.reduce_sum(x) @@ -549,13 +553,13 @@ class SeparableConv2DTest(test.TestCase): with self.assertRaisesRegex(ValueError, 'kernel_size'): conv_layers.separable_conv2d(images, 32, None) - @test_util.run_deprecated_v1 def testCreateSeparableConv2D(self): height, width = 7, 9 images = random_ops.random_uniform((5, height, width, 4)) layer = conv_layers.SeparableConv2D(32, [3, 3], activation=nn_ops.relu) output = layer.apply(images) - self.assertEqual(output.op.name, 'separable_conv2d/Relu') + if not context.executing_eagerly(): + self.assertEqual(output.op.name, 'separable_conv2d/Relu') self.assertListEqual(output.get_shape().as_list(), [5, height - 2, width - 2, 32]) self.assertListEqual(layer.depthwise_kernel.get_shape().as_list(), @@ -590,20 +594,20 @@ class SeparableConv2DTest(test.TestCase): [1, 1, 4, 32]) self.assertListEqual(layer.bias.get_shape().as_list(), [32]) - @test_util.run_deprecated_v1 def testCreateSeparableConv2DChannelsFirst(self): - height, width = 7, 9 - images = random_ops.random_uniform((5, 4, height, width)) - layer = conv_layers.SeparableConv2D( - 32, [3, 3], data_format='channels_first') - output = layer.apply(images) - self.assertListEqual(output.get_shape().as_list(), - [5, 32, height - 2, width - 2]) - self.assertListEqual(layer.depthwise_kernel.get_shape().as_list(), - [3, 3, 4, 1]) - self.assertListEqual(layer.pointwise_kernel.get_shape().as_list(), - [1, 1, 4, 32]) - self.assertListEqual(layer.bias.get_shape().as_list(), [32]) + with ops.Graph().as_default(): + height, width = 7, 9 + images = random_ops.random_uniform((5, 4, height, width)) + layer = conv_layers.SeparableConv2D( + 32, [3, 3], data_format='channels_first') + output = layer.apply(images) + self.assertListEqual(output.get_shape().as_list(), + [5, 32, height - 2, width - 2]) + self.assertListEqual(layer.depthwise_kernel.get_shape().as_list(), + [3, 3, 4, 1]) + self.assertListEqual(layer.pointwise_kernel.get_shape().as_list(), + [1, 1, 4, 32]) + self.assertListEqual(layer.bias.get_shape().as_list(), [32]) def testSeparableConv2DPaddingSame(self): height, width = 7, 9 @@ -613,80 +617,79 @@ class SeparableConv2DTest(test.TestCase): output = layer.apply(images) self.assertListEqual(output.get_shape().as_list(), [5, height, width, 64]) - @test_util.run_deprecated_v1 def testCreateSeparableConvWithStrides(self): - height, width = 6, 8 - # Test strides tuple - images = random_ops.random_uniform((5, height, width, 3), seed=1) - layer = conv_layers.SeparableConv2D( - 32, [3, 3], strides=(2, 2), padding='same') - output = layer.apply(images) - self.assertListEqual(output.get_shape().as_list(), - [5, height / 2, width / 2, 32]) + with ops.Graph().as_default(): + height, width = 6, 8 + # Test strides tuple + images = random_ops.random_uniform((5, height, width, 3), seed=1) + layer = conv_layers.SeparableConv2D( + 32, [3, 3], strides=(2, 2), padding='same') + output = layer.apply(images) + self.assertListEqual(output.get_shape().as_list(), + [5, height / 2, width / 2, 32]) - # Test strides integer - layer = conv_layers.SeparableConv2D(32, [3, 3], strides=2, padding='same') - output = layer.apply(images) - self.assertListEqual(output.get_shape().as_list(), - [5, height / 2, width / 2, 32]) + # Test strides integer + layer = conv_layers.SeparableConv2D(32, [3, 3], strides=2, padding='same') + output = layer.apply(images) + self.assertListEqual(output.get_shape().as_list(), + [5, height / 2, width / 2, 32]) - # Test unequal strides - layer = conv_layers.SeparableConv2D( - 32, [3, 3], strides=(2, 1), padding='same') - output = layer.apply(images) - self.assertListEqual(output.get_shape().as_list(), - [5, height / 2, width, 32]) + # Test unequal strides + layer = conv_layers.SeparableConv2D( + 32, [3, 3], strides=(2, 1), padding='same') + output = layer.apply(images) + self.assertListEqual(output.get_shape().as_list(), + [5, height / 2, width, 32]) - @test_util.run_deprecated_v1 def testCreateSeparableConvWithStridesChannelsFirst(self): - data_format = 'channels_first' - height, width = 6, 8 - # Test strides tuple - images = random_ops.random_uniform((5, 3, height, width), seed=1) - layer = conv_layers.SeparableConv2D( - 32, [3, 3], strides=(2, 2), padding='same', data_format=data_format) - output = layer.apply(images) - self.assertListEqual(output.get_shape().as_list(), - [5, 32, height / 2, width / 2]) + with ops.Graph().as_default(): + data_format = 'channels_first' + height, width = 6, 8 + # Test strides tuple + images = random_ops.random_uniform((5, 3, height, width), seed=1) + layer = conv_layers.SeparableConv2D( + 32, [3, 3], strides=(2, 2), padding='same', data_format=data_format) + output = layer.apply(images) + self.assertListEqual(output.get_shape().as_list(), + [5, 32, height / 2, width / 2]) - # Test strides integer - layer = conv_layers.SeparableConv2D(32, [3, 3], strides=2, padding='same', - data_format=data_format) - output = layer.apply(images) - self.assertListEqual(output.get_shape().as_list(), - [5, 32, height / 2, width / 2]) + # Test strides integer + layer = conv_layers.SeparableConv2D(32, [3, 3], strides=2, padding='same', + data_format=data_format) + output = layer.apply(images) + self.assertListEqual(output.get_shape().as_list(), + [5, 32, height / 2, width / 2]) - # Test unequal strides - layer = conv_layers.SeparableConv2D( - 32, [3, 3], strides=(2, 1), padding='same', data_format=data_format) - output = layer.apply(images) - self.assertListEqual(output.get_shape().as_list(), - [5, 32, height / 2, width]) + # Test unequal strides + layer = conv_layers.SeparableConv2D( + 32, [3, 3], strides=(2, 1), padding='same', data_format=data_format) + output = layer.apply(images) + self.assertListEqual(output.get_shape().as_list(), + [5, 32, height / 2, width]) - @test_util.run_deprecated_v1 def testFunctionalConv2DReuse(self): - height, width = 7, 9 - images = random_ops.random_uniform((5, height, width, 3), seed=1) - conv_layers.separable_conv2d(images, 32, [3, 3], name='sepconv1') - self.assertEqual(len(variables.trainable_variables()), 3) - conv_layers.separable_conv2d( - images, 32, [3, 3], name='sepconv1', reuse=True) - self.assertEqual(len(variables.trainable_variables()), 3) - - @test_util.run_deprecated_v1 - def testFunctionalConv2DReuseFromScope(self): - with variable_scope.variable_scope('scope'): + with ops.Graph().as_default(): height, width = 7, 9 images = random_ops.random_uniform((5, height, width, 3), seed=1) conv_layers.separable_conv2d(images, 32, [3, 3], name='sepconv1') self.assertEqual(len(variables.trainable_variables()), 3) - with variable_scope.variable_scope('scope', reuse=True): - conv_layers.separable_conv2d(images, 32, [3, 3], name='sepconv1') + conv_layers.separable_conv2d( + images, 32, [3, 3], name='sepconv1', reuse=True) self.assertEqual(len(variables.trainable_variables()), 3) - @test_util.run_deprecated_v1 + def testFunctionalConv2DReuseFromScope(self): + with ops.Graph().as_default(): + with variable_scope.variable_scope('scope'): + height, width = 7, 9 + images = random_ops.random_uniform((5, height, width, 3), seed=1) + conv_layers.separable_conv2d(images, 32, [3, 3], name='sepconv1') + self.assertEqual(len(variables.trainable_variables()), 3) + with variable_scope.variable_scope('scope', reuse=True): + conv_layers.separable_conv2d(images, 32, [3, 3], name='sepconv1') + self.assertEqual(len(variables.trainable_variables()), 3) + def testFunctionalConv2DInitializerFromScope(self): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): with variable_scope.variable_scope( 'scope', initializer=init_ops.ones_initializer()): height, width = 7, 9 @@ -705,66 +708,69 @@ class SeparableConv2DTest(test.TestCase): # Check that the bias still got initialized to zeros. self.assertAllClose(weights[2], np.zeros((32))) - @test_util.run_deprecated_v1 def testFunctionalConv2DNoReuse(self): - height, width = 7, 9 - images = random_ops.random_uniform((5, height, width, 3), seed=1) - conv_layers.separable_conv2d(images, 32, [3, 3]) - self.assertEqual(len(variables.trainable_variables()), 3) - conv_layers.separable_conv2d(images, 32, [3, 3]) - self.assertEqual(len(variables.trainable_variables()), 6) + with ops.Graph().as_default(): + height, width = 7, 9 + images = random_ops.random_uniform((5, height, width, 3), seed=1) + conv_layers.separable_conv2d(images, 32, [3, 3]) + self.assertEqual(len(variables.trainable_variables()), 3) + conv_layers.separable_conv2d(images, 32, [3, 3]) + self.assertEqual(len(variables.trainable_variables()), 6) - @test_util.run_deprecated_v1 def testSeparableConv2DDepthwiseRegularizer(self): - height, width = 7, 9 - images = random_ops.random_uniform((5, height, width, 4)) - reg = lambda x: 0.1 * math_ops.reduce_sum(x) - layer = conv_layers.SeparableConv2D(32, [3, 3], depthwise_regularizer=reg) - layer.apply(images) - loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys)) + with ops.Graph().as_default(): + height, width = 7, 9 + images = random_ops.random_uniform((5, height, width, 4)) + reg = lambda x: 0.1 * math_ops.reduce_sum(x) + layer = conv_layers.SeparableConv2D(32, [3, 3], depthwise_regularizer=reg) + layer.apply(images) + loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys)) - @test_util.run_deprecated_v1 def testSeparableConv2DPointwiseRegularizer(self): - height, width = 7, 9 - images = random_ops.random_uniform((5, height, width, 4)) - reg = lambda x: 0.1 * math_ops.reduce_sum(x) - layer = conv_layers.SeparableConv2D(32, [3, 3], pointwise_regularizer=reg) - layer.apply(images) - loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys)) + with ops.Graph().as_default(): + height, width = 7, 9 + images = random_ops.random_uniform((5, height, width, 4)) + reg = lambda x: 0.1 * math_ops.reduce_sum(x) + layer = conv_layers.SeparableConv2D(32, [3, 3], pointwise_regularizer=reg) + layer.apply(images) + loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys)) - @test_util.run_deprecated_v1 def testSeparableConv2DBiasRegularizer(self): - height, width = 7, 9 - images = random_ops.random_uniform((5, height, width, 4)) - reg = lambda x: 0.1 * math_ops.reduce_sum(x) - layer = conv_layers.SeparableConv2D(32, [3, 3], bias_regularizer=reg) - layer.apply(images) - loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys)) + with ops.Graph().as_default(): + height, width = 7, 9 + images = random_ops.random_uniform((5, height, width, 4)) + reg = lambda x: 0.1 * math_ops.reduce_sum(x) + layer = conv_layers.SeparableConv2D(32, [3, 3], bias_regularizer=reg) + layer.apply(images) + loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys)) - @test_util.run_deprecated_v1 def testSeparableConv2DNoBias(self): - height, width = 7, 9 - images = random_ops.random_uniform((5, height, width, 4)) - layer = conv_layers.SeparableConv2D( - 32, [3, 3], activation=nn_ops.relu, use_bias=False) - output = layer.apply(images) - self.assertEqual(output.op.name, 'separable_conv2d/Relu') - self.assertListEqual(output.get_shape().as_list(), - [5, height - 2, width - 2, 32]) - self.assertListEqual(layer.depthwise_kernel.get_shape().as_list(), - [3, 3, 4, 1]) - self.assertListEqual(layer.pointwise_kernel.get_shape().as_list(), - [1, 1, 4, 32]) - self.assertEqual(layer.bias, None) + with ops.Graph().as_default(): + height, width = 7, 9 + images = random_ops.random_uniform((5, height, width, 4)) + layer = conv_layers.SeparableConv2D( + 32, [3, 3], activation=nn_ops.relu, use_bias=False) + output = layer.apply(images) + self.assertEqual(output.op.name, 'separable_conv2d/Relu') + self.assertListEqual(output.get_shape().as_list(), + [5, height - 2, width - 2, 32]) + self.assertListEqual(layer.depthwise_kernel.get_shape().as_list(), + [3, 3, 4, 1]) + self.assertListEqual(layer.pointwise_kernel.get_shape().as_list(), + [1, 1, 4, 32]) + self.assertEqual(layer.bias, None) def testConstraints(self): d_constraint = lambda x: x / math_ops.reduce_sum(x) @@ -807,13 +813,13 @@ class Conv2DTransposeTest(test.TestCase): with self.assertRaisesRegex(ValueError, 'kernel_size'): conv_layers.conv2d_transpose(images, 32, None) - @test_util.run_deprecated_v1 def testCreateConv2DTranspose(self): height, width = 7, 9 images = random_ops.random_uniform((5, height, width, 4)) layer = conv_layers.Conv2DTranspose(32, [3, 3], activation=nn_ops.relu) output = layer.apply(images) - self.assertEqual(output.op.name, 'conv2d_transpose/Relu') + if not context.executing_eagerly(): + self.assertEqual(output.op.name, 'conv2d_transpose/Relu') self.assertListEqual(output.get_shape().as_list(), [5, height + 2, width + 2, 32]) self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 32, 4]) @@ -879,66 +885,68 @@ class Conv2DTransposeTest(test.TestCase): self.assertListEqual(output.get_shape().as_list(), [5, height * 2, width, 32]) - @test_util.run_deprecated_v1 def testConv2DTransposeKernelRegularizer(self): - height, width = 7, 9 - images = random_ops.random_uniform((5, height, width, 4)) - reg = lambda x: 0.1 * math_ops.reduce_sum(x) - layer = conv_layers.Conv2DTranspose(32, [3, 3], kernel_regularizer=reg) - layer.apply(images) - loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys)) + with ops.Graph().as_default(): + height, width = 7, 9 + images = random_ops.random_uniform((5, height, width, 4)) + reg = lambda x: 0.1 * math_ops.reduce_sum(x) + layer = conv_layers.Conv2DTranspose(32, [3, 3], kernel_regularizer=reg) + layer.apply(images) + loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys)) - @test_util.run_deprecated_v1 def testConv2DTransposeBiasRegularizer(self): - height, width = 7, 9 - images = random_ops.random_uniform((5, height, width, 4)) - reg = lambda x: 0.1 * math_ops.reduce_sum(x) - layer = conv_layers.Conv2DTranspose(32, [3, 3], bias_regularizer=reg) - layer.apply(images) - loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys)) + with ops.Graph().as_default(): + height, width = 7, 9 + images = random_ops.random_uniform((5, height, width, 4)) + reg = lambda x: 0.1 * math_ops.reduce_sum(x) + layer = conv_layers.Conv2DTranspose(32, [3, 3], bias_regularizer=reg) + layer.apply(images) + loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys)) - @test_util.run_deprecated_v1 def testConv2DTransposeNoBias(self): - height, width = 7, 9 - images = random_ops.random_uniform((5, height, width, 4)) - layer = conv_layers.Conv2DTranspose( - 32, [3, 3], activation=nn_ops.relu, use_bias=False) - output = layer.apply(images) - self.assertEqual(output.op.name, 'conv2d_transpose/Relu') - self.assertListEqual(output.get_shape().as_list(), - [5, height + 2, width + 2, 32]) - self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 32, 4]) - self.assertEqual(layer.bias, None) + with ops.Graph().as_default(): + height, width = 7, 9 + images = random_ops.random_uniform((5, height, width, 4)) + layer = conv_layers.Conv2DTranspose( + 32, [3, 3], activation=nn_ops.relu, use_bias=False) + output = layer.apply(images) + self.assertEqual(output.op.name, 'conv2d_transpose/Relu') + self.assertListEqual(output.get_shape().as_list(), + [5, height + 2, width + 2, 32]) + self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 32, 4]) + self.assertEqual(layer.bias, None) - @test_util.run_deprecated_v1 def testFunctionalConv2DTransposeReuse(self): - height, width = 7, 9 - images = random_ops.random_uniform((5, height, width, 3), seed=1) - conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1') - self.assertEqual(len(variables.trainable_variables()), 2) - conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1', reuse=True) - self.assertEqual(len(variables.trainable_variables()), 2) - - @test_util.run_deprecated_v1 - def testFunctionalConv2DTransposeReuseFromScope(self): - with variable_scope.variable_scope('scope'): + with ops.Graph().as_default(): height, width = 7, 9 images = random_ops.random_uniform((5, height, width, 3), seed=1) conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1') self.assertEqual(len(variables.trainable_variables()), 2) - with variable_scope.variable_scope('scope', reuse=True): - conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1') + conv_layers.conv2d_transpose( + images, 32, [3, 3], name='deconv1', reuse=True) self.assertEqual(len(variables.trainable_variables()), 2) - @test_util.run_deprecated_v1 + def testFunctionalConv2DTransposeReuseFromScope(self): + with ops.Graph().as_default(): + with variable_scope.variable_scope('scope'): + height, width = 7, 9 + images = random_ops.random_uniform((5, height, width, 3), seed=1) + conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1') + self.assertEqual(len(variables.trainable_variables()), 2) + with variable_scope.variable_scope('scope', reuse=True): + conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1') + self.assertEqual(len(variables.trainable_variables()), 2) + def testFunctionalConv2DTransposeInitializerFromScope(self): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): with variable_scope.variable_scope( 'scope', initializer=init_ops.ones_initializer()): height, width = 7, 9 @@ -955,14 +963,14 @@ class Conv2DTransposeTest(test.TestCase): # Check that the bias still got initialized to zeros. self.assertAllClose(weights[1], np.zeros((32))) - @test_util.run_deprecated_v1 def testFunctionalConv2DTransposeNoReuse(self): - height, width = 7, 9 - images = random_ops.random_uniform((5, height, width, 3), seed=1) - conv_layers.conv2d_transpose(images, 32, [3, 3]) - self.assertEqual(len(variables.trainable_variables()), 2) - conv_layers.conv2d_transpose(images, 32, [3, 3]) - self.assertEqual(len(variables.trainable_variables()), 4) + with ops.Graph().as_default(): + height, width = 7, 9 + images = random_ops.random_uniform((5, height, width, 3), seed=1) + conv_layers.conv2d_transpose(images, 32, [3, 3]) + self.assertEqual(len(variables.trainable_variables()), 2) + conv_layers.conv2d_transpose(images, 32, [3, 3]) + self.assertEqual(len(variables.trainable_variables()), 4) def testConstraints(self): k_constraint = lambda x: x / math_ops.reduce_sum(x) @@ -1002,13 +1010,13 @@ class Conv3DTransposeTest(test.TestCase): with self.assertRaisesRegex(ValueError, 'kernel_size'): conv_layers.conv3d_transpose(volumes, 4, None) - @test_util.run_deprecated_v1 def testCreateConv3DTranspose(self): depth, height, width = 5, 7, 9 volumes = random_ops.random_uniform((5, depth, height, width, 32)) layer = conv_layers.Conv3DTranspose(4, [3, 3, 3], activation=nn_ops.relu) output = layer.apply(volumes) - self.assertEqual(output.op.name, 'conv3d_transpose/Relu') + if not context.executing_eagerly(): + self.assertEqual(output.op.name, 'conv3d_transpose/Relu') self.assertListEqual(output.get_shape().as_list(), [5, depth + 2, height + 2, width + 2, 4]) self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 3, 4, 32]) @@ -1024,17 +1032,17 @@ class Conv3DTransposeTest(test.TestCase): self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 3, 4, 32]) self.assertListEqual(layer.bias.get_shape().as_list(), [4]) - @test_util.run_deprecated_v1 def testCreateConv3DTransposeChannelsFirst(self): - depth, height, width = 5, 7, 9 - volumes = random_ops.random_uniform((5, 32, depth, height, width)) - layer = conv_layers.Conv3DTranspose( - 4, [3, 3, 3], data_format='channels_first') - output = layer.apply(volumes) - self.assertListEqual(output.get_shape().as_list(), - [5, 4, depth + 2, height + 2, width + 2]) - self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 3, 4, 32]) - self.assertListEqual(layer.bias.get_shape().as_list(), [4]) + with ops.Graph().as_default(): + depth, height, width = 5, 7, 9 + volumes = random_ops.random_uniform((5, 32, depth, height, width)) + layer = conv_layers.Conv3DTranspose( + 4, [3, 3, 3], data_format='channels_first') + output = layer.apply(volumes) + self.assertListEqual(output.get_shape().as_list(), + [5, 4, depth + 2, height + 2, width + 2]) + self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 3, 4, 32]) + self.assertListEqual(layer.bias.get_shape().as_list(), [4]) def testConv3DTransposePaddingSame(self): depth, height, width = 5, 7, 9 @@ -1068,67 +1076,69 @@ class Conv3DTransposeTest(test.TestCase): self.assertListEqual(output.get_shape().as_list(), [5, depth * 2, height, width, 4]) - @test_util.run_deprecated_v1 def testConv3DTransposeKernelRegularizer(self): - depth, height, width = 5, 7, 9 - volumes = random_ops.random_uniform((5, depth, height, width, 32)) - reg = lambda x: 0.1 * math_ops.reduce_sum(x) - layer = conv_layers.Conv3DTranspose(4, [3, 3, 3], kernel_regularizer=reg) - layer.apply(volumes) - loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys)) + with ops.Graph().as_default(): + depth, height, width = 5, 7, 9 + volumes = random_ops.random_uniform((5, depth, height, width, 32)) + reg = lambda x: 0.1 * math_ops.reduce_sum(x) + layer = conv_layers.Conv3DTranspose(4, [3, 3, 3], kernel_regularizer=reg) + layer.apply(volumes) + loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys)) - @test_util.run_deprecated_v1 def testConv3DTransposeBiasRegularizer(self): - depth, height, width = 5, 7, 9 - volumes = random_ops.random_uniform((5, depth, height, width, 32)) - reg = lambda x: 0.1 * math_ops.reduce_sum(x) - layer = conv_layers.Conv3DTranspose(4, [3, 3, 3], bias_regularizer=reg) - layer.apply(volumes) - loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(loss_keys), 1) - self.evaluate([v.initializer for v in layer.variables]) - self.assertListEqual(self.evaluate(layer.losses), self.evaluate(loss_keys)) + with ops.Graph().as_default(): + depth, height, width = 5, 7, 9 + volumes = random_ops.random_uniform((5, depth, height, width, 32)) + reg = lambda x: 0.1 * math_ops.reduce_sum(x) + layer = conv_layers.Conv3DTranspose(4, [3, 3, 3], bias_regularizer=reg) + layer.apply(volumes) + loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) + self.assertEqual(len(loss_keys), 1) + self.evaluate([v.initializer for v in layer.variables]) + self.assertListEqual( + self.evaluate(layer.losses), self.evaluate(loss_keys)) - @test_util.run_deprecated_v1 def testConv3DTransposeNoBias(self): - depth, height, width = 5, 7, 9 - volumes = random_ops.random_uniform((5, depth, height, width, 32)) - layer = conv_layers.Conv3DTranspose( - 4, [3, 3, 3], activation=nn_ops.relu, use_bias=False) - output = layer.apply(volumes) - self.assertEqual(output.op.name, 'conv3d_transpose/Relu') - self.assertListEqual(output.get_shape().as_list(), - [5, depth + 2, height + 2, width + 2, 4]) - self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 3, 4, 32]) - self.assertEqual(layer.bias, None) + with ops.Graph().as_default(): + depth, height, width = 5, 7, 9 + volumes = random_ops.random_uniform((5, depth, height, width, 32)) + layer = conv_layers.Conv3DTranspose( + 4, [3, 3, 3], activation=nn_ops.relu, use_bias=False) + output = layer.apply(volumes) + self.assertEqual(output.op.name, 'conv3d_transpose/Relu') + self.assertListEqual(output.get_shape().as_list(), + [5, depth + 2, height + 2, width + 2, 4]) + self.assertListEqual(layer.kernel.get_shape().as_list(), [3, 3, 3, 4, 32]) + self.assertEqual(layer.bias, None) - @test_util.run_deprecated_v1 def testFunctionalConv3DTransposeReuse(self): - depth, height, width = 5, 7, 9 - volumes = random_ops.random_uniform((5, depth, height, width, 32), seed=1) - conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3], name='deconv1') - self.assertEqual(len(variables.trainable_variables()), 2) - conv_layers.conv3d_transpose( - volumes, 4, [3, 3, 3], name='deconv1', reuse=True) - self.assertEqual(len(variables.trainable_variables()), 2) - - @test_util.run_deprecated_v1 - def testFunctionalConv3DTransposeReuseFromScope(self): - with variable_scope.variable_scope('scope'): + with ops.Graph().as_default(): depth, height, width = 5, 7, 9 volumes = random_ops.random_uniform((5, depth, height, width, 32), seed=1) conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3], name='deconv1') self.assertEqual(len(variables.trainable_variables()), 2) - with variable_scope.variable_scope('scope', reuse=True): - conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3], name='deconv1') + conv_layers.conv3d_transpose( + volumes, 4, [3, 3, 3], name='deconv1', reuse=True) self.assertEqual(len(variables.trainable_variables()), 2) - @test_util.run_deprecated_v1 + def testFunctionalConv3DTransposeReuseFromScope(self): + with ops.Graph().as_default(): + with variable_scope.variable_scope('scope'): + depth, height, width = 5, 7, 9 + volumes = random_ops.random_uniform( + (5, depth, height, width, 32), seed=1) + conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3], name='deconv1') + self.assertEqual(len(variables.trainable_variables()), 2) + with variable_scope.variable_scope('scope', reuse=True): + conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3], name='deconv1') + self.assertEqual(len(variables.trainable_variables()), 2) + def testFunctionalConv3DTransposeInitializerFromScope(self): - with self.cached_session() as sess: + with ops.Graph().as_default(), self.cached_session(): with variable_scope.variable_scope( 'scope', initializer=init_ops.ones_initializer()): depth, height, width = 5, 7, 9 @@ -1146,14 +1156,14 @@ class Conv3DTransposeTest(test.TestCase): # Check that the bias still got initialized to zeros. self.assertAllClose(weights[1], np.zeros((4))) - @test_util.run_deprecated_v1 def testFunctionalConv3DTransposeNoReuse(self): - depth, height, width = 5, 7, 9 - volumes = random_ops.random_uniform((5, depth, height, width, 32), seed=1) - conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3]) - self.assertEqual(len(variables.trainable_variables()), 2) - conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3]) - self.assertEqual(len(variables.trainable_variables()), 4) + with ops.Graph().as_default(): + depth, height, width = 5, 7, 9 + volumes = random_ops.random_uniform((5, depth, height, width, 32), seed=1) + conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3]) + self.assertEqual(len(variables.trainable_variables()), 2) + conv_layers.conv3d_transpose(volumes, 4, [3, 3, 3]) + self.assertEqual(len(variables.trainable_variables()), 4) def testConstraints(self): k_constraint = lambda x: x / math_ops.reduce_sum(x) From 000ee104615458157262164c3e967c3492c1cb08 Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Tue, 14 Jul 2020 23:07:31 -0700 Subject: [PATCH 0463/2522] [XLA/GPU] Use Status instead of LOG(FATAL) for valid user input. PiperOrigin-RevId: 321303766 Change-Id: I74a31753ade58a8039aa65b42b04dc6380683ba5 --- .../compiler/xla/service/gpu/gemm_thunk.cc | 42 ++++++++++--------- .../xla/service/gpu/gpu_conv_runner.cc | 5 ++- 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc b/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc index 35dad1c84ac..e55df0bb230 100644 --- a/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc @@ -83,24 +83,28 @@ static bool DoGemmWithAlgorithm( // Converts from an XLA PrimitiveType to a blas::ComputationType, which is // used to specify the precision with which matmul computations should be // performed, separately from the precision of the inputs and result. - se::blas::ComputationType computation_type = [&](PrimitiveType type) { - switch (type) { - case F16: - // Use F32 as computation type for F16 as we currently only implement - // the cuDNN pseudo half configuration for half precision. - return se::blas::ComputationType::kF32; - case F32: - return se::blas::ComputationType::kF32; - case F64: - return se::blas::ComputationType::kF64; - case C64: - return se::blas::ComputationType::kComplexF32; - case C128: - return se::blas::ComputationType::kComplexF64; - default: - LOG(FATAL) << "Unsupported type."; - } - }(type); + se::blas::ComputationType computation_type; + switch (type) { + case F16: + // Use F32 as computation type for F16 as we currently only implement + // the cuDNN pseudo half configuration for half precision. + computation_type = se::blas::ComputationType::kF32; + break; + case F32: + computation_type = se::blas::ComputationType::kF32; + break; + case F64: + computation_type = se::blas::ComputationType::kF64; + break; + case C64: + computation_type = se::blas::ComputationType::kComplexF32; + break; + case C128: + computation_type = se::blas::ComputationType::kComplexF64; + break; + default: + return false; + } se::DeviceMemory lhs_data(lhs_matrix.data); se::DeviceMemory rhs_data(rhs_matrix.data); @@ -297,7 +301,7 @@ Status RunGemm(const HloInstruction *gemm, stream, best_algorithm, /*output_profile_result=*/profile_result); default: - LOG(FATAL) << "Unsupported type."; + return false; } }(); diff --git a/tensorflow/compiler/xla/service/gpu/gpu_conv_runner.cc b/tensorflow/compiler/xla/service/gpu/gpu_conv_runner.cc index a6fc4686143..5cc5fa7d16d 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_conv_runner.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_conv_runner.cc @@ -484,11 +484,12 @@ Status RunGpuConv(const HloCustomCallInstruction* conv, return RunGpuConvImpl(params, scratch_allocator, stream, options); default: - LOG(FATAL) << conv->ToString(); + return Unimplemented("Unimplemented convolution %s", + conv->ToString()); } } default: - LOG(FATAL) << conv->ToString(); + return Unimplemented("Unimplemented convolution %s", conv->ToString()); } } From f6f6b66d500051c85bea2ce6131edc2a255b3655 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Tue, 14 Jul 2020 23:10:32 -0700 Subject: [PATCH 0464/2522] Update session_manager_test wrt run_v1_only annotation. PiperOrigin-RevId: 321304049 Change-Id: I0dd2b297bac8b0257a5a1dcac7ff1124714ae531 --- .../python/training/session_manager_test.py | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/tensorflow/python/training/session_manager_test.py b/tensorflow/python/training/session_manager_test.py index df795ff5f7e..4e05b73110b 100644 --- a/tensorflow/python/training/session_manager_test.py +++ b/tensorflow/python/training/session_manager_test.py @@ -28,6 +28,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import gfile from tensorflow.python.platform import test @@ -39,6 +40,11 @@ from tensorflow.python.training import session_manager class SessionManagerTest(test.TestCase): + @classmethod + def setUpClass(cls): + super(SessionManagerTest, cls).setUpClass() + variable_scope.disable_resource_variables() + def testPrepareSessionSucceeds(self): with ops.Graph().as_default(): v = variables.VariableV1([1.0, 2.0, 3.0], name="v") @@ -81,7 +87,6 @@ class SessionManagerTest(test.TestCase): sess = sm.prepare_session("") self.assertAllClose([1.0, 2.0, 3.0], sess.run(v)) - @test_util.run_v1_only("b/120545219") def testPrepareSessionFails(self): checkpoint_dir = os.path.join(self.get_temp_dir(), "prepare_session") checkpoint_dir2 = os.path.join(self.get_temp_dir(), "prepare_session2") @@ -166,7 +171,6 @@ class SessionManagerTest(test.TestCase): sess.graph.get_tensor_by_name("v:0")).eval(session=sess)) self.assertEqual(1, sess.run(v)) - @test_util.run_v1_only("b/120545219") def testRecoverSession(self): # Create a checkpoint. checkpoint_dir = os.path.join(self.get_temp_dir(), "recover_session") @@ -199,7 +203,6 @@ class SessionManagerTest(test.TestCase): checkpoint_filename_with_path=checkpoint_management.latest_checkpoint( checkpoint_dir)) - @test_util.run_v1_only("b/120545219") def testWaitForSessionReturnsNoneAfterTimeout(self): with ops.Graph().as_default(): variables.VariableV1(1, name="v") @@ -222,7 +225,6 @@ class SessionManagerTest(test.TestCase): variables.global_variables()), local_init_op=None) - @test_util.run_v1_only("b/120545219") def testRecoverSessionWithReadyForLocalInitOp(self): # Create a checkpoint. checkpoint_dir = os.path.join(self.get_temp_dir(), @@ -276,7 +278,6 @@ class SessionManagerTest(test.TestCase): self.assertEqual(1, sess.run(v)) self.assertEqual(1, sess.run(w)) - @test_util.run_v1_only("b/120545219") def testRecoverSessionWithReadyForLocalInitOpFailsToReadyLocal(self): # We use ready_for_local_init_op=report_uninitialized_variables(), # which causes recover_session to not run local_init_op, and to return @@ -333,7 +334,6 @@ class SessionManagerTest(test.TestCase): sess.graph.get_tensor_by_name("w:0")).eval(session=sess)) self.assertEqual(1, sess.run(v)) - @test_util.run_v1_only("b/120545219") def testRecoverSessionNoChkptStillRunsLocalInitOp(self): # This test checks for backwards compatibility. # In particular, we continue to ensure that recover_session will execute @@ -362,7 +362,6 @@ class SessionManagerTest(test.TestCase): sess.graph.get_tensor_by_name("w:0")).eval(session=sess)) self.assertEqual(1, sess.run(w)) - @test_util.run_v1_only("b/120545219") def testRecoverSessionFailsStillRunsLocalInitOp(self): # Create a checkpoint. checkpoint_dir = os.path.join( @@ -406,7 +405,6 @@ class SessionManagerTest(test.TestCase): sess.graph.get_tensor_by_name("w:0")).eval(session=sess)) self.assertEqual(1, sess.run(w)) - @test_util.run_v1_only("b/120545219") def testWaitForSessionLocalInit(self): server = server_lib.Server.create_local_server() with ops.Graph().as_default() as graph: @@ -458,7 +456,7 @@ class SessionManagerTest(test.TestCase): # because of overly restrictive ready_for_local_init_op sm.wait_for_session("", max_wait_secs=3) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("Requires TF V1 variable behavior.") def testWaitForSessionInsufficientReadyForLocalInitCheck(self): with ops.Graph().as_default() as graph: v = variables.VariableV1(1, name="v") @@ -476,7 +474,6 @@ class SessionManagerTest(test.TestCase): "Session was not ready after waiting.*"): sm.wait_for_session("", max_wait_secs=3) - @test_util.run_v1_only("b/120545219") def testPrepareSessionWithReadyForLocalInitOp(self): with ops.Graph().as_default(): v = variables.VariableV1(1, name="v") @@ -516,7 +513,7 @@ class SessionManagerTest(test.TestCase): self.assertEqual(1, sess.run(w)) self.assertEqual(3, sess.run(x)) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("Requires TF V1 variable behavior.") def testPrepareSessionWithPartialInitOp(self): with ops.Graph().as_default(): v = variables.VariableV1(1, name="v") @@ -583,7 +580,6 @@ class SessionManagerTest(test.TestCase): self.assertEqual(1, sess.run(w_res)) self.assertEqual(3, sess.run(x_res)) - @test_util.run_v1_only("b/120545219") def testPrepareSessionWithCyclicInitializer(self): # Regression test. Previously Variable._build_initializer_expr would enter # into an infinite recursion when the variable's initial_value involved @@ -657,7 +653,7 @@ class SessionManagerTest(test.TestCase): "Init operations did not make model ready for local_init"): sm2.prepare_session("", init_op=None) - @test_util.run_v1_only("b/120545219") + @test_util.run_v1_only("Requires TF V1 variable behavior.") def testPrepareSessionWithInsufficientReadyForLocalInitCheck(self): with ops.Graph().as_default(): v = variables.VariableV1(1, name="v") @@ -680,6 +676,11 @@ class SessionManagerTest(test.TestCase): class ObsoleteSessionManagerTest(test.TestCase): + @classmethod + def setUpClass(cls): + super(ObsoleteSessionManagerTest, cls).setUpClass() + variable_scope.disable_resource_variables() + def testPrepareSessionSucceeds(self): with ops.Graph().as_default(): v = variables.VariableV1([1.0, 2.0, 3.0], name="v") @@ -710,7 +711,6 @@ class ObsoleteSessionManagerTest(test.TestCase): "", init_fn=lambda sess: sess.run(v.initializer)) self.assertAllClose([125], sess.run(v)) - @test_util.run_v1_only("b/120545219") def testPrepareSessionFails(self): checkpoint_dir = os.path.join(self.get_temp_dir(), "prepare_session") checkpoint_dir2 = os.path.join(self.get_temp_dir(), "prepare_session2") @@ -772,7 +772,6 @@ class ObsoleteSessionManagerTest(test.TestCase): variables.is_variable_initialized( sess.graph.get_tensor_by_name("v:0")).eval(session=sess)) - @test_util.run_v1_only("b/120545219") def testRecoverSession(self): # Create a checkpoint. checkpoint_dir = os.path.join(self.get_temp_dir(), "recover_session") @@ -811,7 +810,6 @@ class ObsoleteSessionManagerTest(test.TestCase): sess.graph.get_tensor_by_name("v:0")).eval(session=sess)) self.assertEqual(1, sess.run(v)) - @test_util.run_v1_only("b/120545219") def testWaitForSessionReturnsNoneAfterTimeout(self): with ops.Graph().as_default(): variables.VariableV1(1, name="v") From 6d26d5d108a1f5c53ead190726195b504f097b58 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Wed, 15 Jul 2020 12:57:22 +0700 Subject: [PATCH 0465/2522] Add GetDefaultClientConfig --- .../experimental/filesystem/plugins/s3/BUILD | 1 + .../filesystem/plugins/s3/s3_filesystem.cc | 79 +++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/BUILD b/tensorflow/c/experimental/filesystem/plugins/s3/BUILD index 2560130062d..11cca8ac5f1 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/BUILD +++ b/tensorflow/c/experimental/filesystem/plugins/s3/BUILD @@ -29,6 +29,7 @@ cc_library( "//tensorflow/c:tf_status", "//tensorflow/c/experimental/filesystem:filesystem_interface", "@aws", + "@com_google_absl//absl/strings", "@com_google_absl//absl/synchronization", ], ) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index f6ec1361335..9a9d52c8382 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -14,14 +14,18 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h" +#include #include #include +#include "absl/strings/ascii.h" +#include "absl/strings/numbers.h" #include "tensorflow/c/experimental/filesystem/filesystem_interface.h" #include "tensorflow/c/tf_status.h" // Implementation of a filesystem for S3 environments. // This filesystem will support `s3://` URI schemes. +constexpr int64_t kS3TimeoutMsec = 300000; // 5 min static void* plugin_memory_allocate(size_t size) { return calloc(1, size); } static void plugin_memory_free(void* ptr) { free(ptr); } @@ -52,6 +56,81 @@ static void ParseS3Path(const Aws::String& fname, bool object_empty_ok, } } +static Aws::Client::ClientConfiguration& GetDefaultClientConfig() { + ABSL_CONST_INIT static absl::Mutex cfg_lock(absl::kConstInit); + static bool init(false); + static Aws::Client::ClientConfiguration cfg; + + absl::MutexLock l(&cfg_lock); + + if (!init) { + const char* endpoint = getenv("S3_ENDPOINT"); + if (endpoint) cfg.endpointOverride = Aws::String(endpoint); + const char* region = getenv("AWS_REGION"); + // TODO (yongtang): `S3_REGION` should be deprecated after 2.0. + if (!region) region = getenv("S3_REGION"); + if (region) { + cfg.region = Aws::String(region); + } else { + // Load config file (e.g., ~/.aws/config) only if AWS_SDK_LOAD_CONFIG + // is set with a truthy value. + const char* load_config_env = getenv("AWS_SDK_LOAD_CONFIG"); + std::string load_config = + load_config_env ? absl::AsciiStrToLower(load_config_env) : ""; + if (load_config == "true" || load_config == "1") { + Aws::String config_file; + // If AWS_CONFIG_FILE is set then use it, otherwise use ~/.aws/config. + const char* config_file_env = getenv("AWS_CONFIG_FILE"); + if (config_file_env) { + config_file = config_file_env; + } else { + const char* home_env = getenv("HOME"); + if (home_env) { + config_file = home_env; + config_file += "/.aws/config"; + } + } + Aws::Config::AWSConfigFileProfileConfigLoader loader(config_file); + loader.Load(); + auto profiles = loader.GetProfiles(); + if (!profiles["default"].GetRegion().empty()) + cfg.region = profiles["default"].GetRegion(); + } + } + const char* use_https = getenv("S3_USE_HTTPS"); + if (use_https) { + if (use_https[0] == '0') + cfg.scheme = Aws::Http::Scheme::HTTP; + else + cfg.scheme = Aws::Http::Scheme::HTTPS; + } + const char* verify_ssl = getenv("S3_VERIFY_SSL"); + if (verify_ssl) { + if (verify_ssl[0] == '0') + cfg.verifySSL = false; + else + cfg.verifySSL = true; + } + // if these timeouts are low, you may see an error when + // uploading/downloading large files: Unable to connect to endpoint + int64_t timeout; + cfg.connectTimeoutMs = + absl::SimpleAtoi(getenv("S3_CONNECT_TIMEOUT_MSEC"), &timeout) + ? timeout + : kS3TimeoutMsec; + cfg.requestTimeoutMs = + absl::SimpleAtoi(getenv("S3_REQUEST_TIMEOUT_MSEC"), &timeout) + ? timeout + : kS3TimeoutMsec; + const char* ca_file = getenv("S3_CA_FILE"); + if (ca_file) cfg.caFile = Aws::String(ca_file); + const char* ca_path = getenv("S3_CA_PATH"); + if (ca_path) cfg.caPath = Aws::String(ca_path); + init = true; + } + return cfg; +}; + static void ShutdownClient(Aws::S3::S3Client* s3_client) { if (s3_client != nullptr) { delete s3_client; From 2191a9d795259b211dfecadb8a9e3a471b1488da Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Wed, 15 Jul 2020 06:23:33 +0000 Subject: [PATCH 0466/2522] update map test cases for insert and erase --- tensorflow/core/kernels/map_kernels.h | 8 ++-- .../python/kernel_tests/map_ops_test.py | 41 +++++++++++++++++-- tensorflow/python/ops/map_ops.py | 5 +-- 3 files changed, 44 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/kernels/map_kernels.h b/tensorflow/core/kernels/map_kernels.h index 98ce1bfac1b..33a950ee63a 100644 --- a/tensorflow/core/kernels/map_kernels.h +++ b/tensorflow/core/kernels/map_kernels.h @@ -144,6 +144,10 @@ class TensorMapLookup : public OpKernel { const TensorKey& key = c->input(1); const TensorMap* m = nullptr; OP_REQUIRES_OK(c, GetInputMap(c, 0, &m)); + + OP_REQUIRES(c, m->tensors().find(key) != m->tensors().end(), + errors::InvalidArgument("Trying to lookup non-existent key.")); + c->set_output(0, m->tensors().find(key)->second); } @@ -159,11 +163,9 @@ class TensorMapErase : public OpKernel { } void Compute(OpKernelContext* c) override { - std::cout << "hello TensorMapErase op" << std::endl; const TensorMap* m = nullptr; OP_REQUIRES_OK(c, GetInputMap(c, 0, &m)); - const Tensor& temp_key = c->input(1); - const TensorKey key = TensorKey(temp_key); + const TensorKey& key = c->input(1); OP_REQUIRES(c, !m->tensors().empty(), errors::InvalidArgument("Trying to erase from an empty map.")); diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index e95a1ab9bec..d8a075c7b4e 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -1,4 +1,4 @@ -# Copyright 2018 The Sonnet Authors. All Rights Reserved. +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ from tensorflow.python.eager import def_function from tensorflow.python.eager import function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.ops import map_ops @test_util.run_all_in_graph_and_eager_modes @@ -56,7 +57,17 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): m = map_ops.tensor_map_insert(m, k, v) l = map_ops.tensor_map_lookup(m, k) self.assertAllClose(l, v) - + + def testTensorMapLookupMissingKeyFails(self): + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + v = constant_op.constant(2.0) + + with self.assertRaisesRegex(errors.InvalidArgumentError, + "Trying to lookup non-existent key."): + l = map_ops.tensor_map_lookup(m, k) + self.evaluate(l) + def testTensorMapReplace(self): m = map_ops.empty_tensor_map() k = constant_op.constant(1.0) @@ -76,13 +87,35 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): v = constant_op.constant(2.0) m = map_ops.tensor_map_insert(m, k, v) s = map_ops.tensor_map_size(m) - self.assertAllClose(s, 1) + self.assertAllEqual(s, 1) m, e = map_ops.tensor_map_erase(m, k) s = map_ops.tensor_map_size(m) - self.assertAllClose(s, 0) + self.assertAllEqual(s, 0) self.assertAllClose(e, v) + def testTensorMapEraseFromEmptyMapFails(self): + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + v = constant_op.constant(2.0) + + with self.assertRaisesRegex(errors.InvalidArgumentError, + "Trying to erase from an empty map."): + m, e = map_ops.tensor_map_erase(m, k) + self.evaluate(e) + + def testTensorMapEraseMissingKeyFails(self): + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + k2 = constant_op.constant(2.0) + v = constant_op.constant(2.0) + m = map_ops.tensor_map_insert(m, k2, v) + + with self.assertRaisesRegex(errors.InvalidArgumentError, + "Trying to erase non-existent item."): + m, e = map_ops.tensor_map_erase(m, k) + self.evaluate(e) + def testInsertLookupGrad(self): with backprop.GradientTape() as tape: m = map_ops.empty_tensor_map() diff --git a/tensorflow/python/ops/map_ops.py b/tensorflow/python/ops/map_ops.py index 4ea50e114ac..20806e6fd30 100644 --- a/tensorflow/python/ops/map_ops.py +++ b/tensorflow/python/ops/map_ops.py @@ -1,4 +1,4 @@ -# Copyright 2018 The Sonnet Authors. All Rights Reserved. +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -52,14 +52,13 @@ def LookupGrad(op, dval): m, k = op.inputs map_grad = empty_tensor_map() map_grad = tensor_map_insert(map_grad, k, dval) - key = op.inputs[1] key_grad = None return map_grad, key_grad @ops.RegisterGradient("TensorMapInsert") def InsertGrad(op, dmap): _, key, val = op.inputs - map_grad, _ = gen_map_ops.tensor_map_erase(dmap, key) + map_grad = None key_grad = None value_grad = tensor_map_lookup(dmap, key) return map_grad, key_grad, value_grad From 78c8935f7c9c08e060b19dfe742f1d471bf54ccb Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Tue, 14 Jul 2020 23:31:12 -0700 Subject: [PATCH 0467/2522] Introduce TpuCompilationCache create function registration PiperOrigin-RevId: 321305853 Change-Id: I4ac165aa06a0c5c12c51e0032d9f159152b275d4 --- tensorflow/core/tpu/kernels/BUILD | 17 ++++++ .../kernels/tpu_compilation_cache_factory.cc | 55 +++++++++++++++++++ .../kernels/tpu_compilation_cache_factory.h | 33 +++++++++++ .../core/tpu/kernels/tpu_configuration_ops.cc | 29 +++++++++- .../core/tpu/kernels/tpu_configuration_ops.h | 4 ++ tensorflow/core/tpu/tpu_config_c_api.h | 4 +- 6 files changed, 139 insertions(+), 3 deletions(-) create mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.cc create mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.h diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 6ff0fb1df73..70d6ccff4df 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -79,7 +79,10 @@ tf_kernel_library( srcs = ["tpu_configuration_ops.cc"], hdrs = ["tpu_configuration_ops.h"], deps = [ + ":tpu_compilation_cache_factory", + ":tpu_compilation_cache_interface", ":tpu_mesh_state_interface", + ":tpu_op_consts", "//tensorflow/c:tf_status", "//tensorflow/c:tf_status_helper", "//tensorflow/compiler/xla:util", @@ -133,6 +136,20 @@ tf_proto_library_cc( ], ) +cc_library( + name = "tpu_compilation_cache_factory", + srcs = ["tpu_compilation_cache_factory.cc"], + hdrs = ["tpu_compilation_cache_factory.h"], + deps = [ + ":tpu_compilation_cache_external", + ":tpu_compilation_cache_interface", + ":tpu_op_consts", + "//tensorflow/core:framework", + "//tensorflow/core/platform:status", + "//tensorflow/core/platform:types", + ], +) + cc_library( name = "tpu_compilation_cache_key", srcs = [], diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.cc new file mode 100644 index 00000000000..86469ae7ebb --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.cc @@ -0,0 +1,55 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.h" + +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" +#include "tensorflow/core/tpu/kernels/tpu_op_consts.h" + +namespace tensorflow { +namespace tpu { +namespace { + +TpuCompilationCacheInterface* CreateCompilationCacheExternal() { + // NOTE: Change the 1 << 33 value to change the compilation cache size. + // TODO(frankchn): Make this configurable. + return new TpuCompilationCacheExternal(int64{1} << 33); // 8 GB +} + +// Using a pointer here to fulfill the trivially destructible requirement for +// static variables. +static std::function* + compilation_cache_creation_fn = + new std::function( + CreateCompilationCacheExternal); + +} // namespace + +std::function GetCompilationCacheCreateFn() { + return *compilation_cache_creation_fn; +} + +void SetCompilationCacheCreateFn( + std::function fn) { + delete compilation_cache_creation_fn; + compilation_cache_creation_fn = + new std::function(fn); +} + +} // namespace tpu +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.h new file mode 100644 index 00000000000..4710f916c48 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.h @@ -0,0 +1,33 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_FACTORY_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_FACTORY_H_ + +#include + +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" + +namespace tensorflow { +namespace tpu { + +std::function GetCompilationCacheCreateFn(); + +void SetCompilationCacheCreateFn( + std::function fn); + +} // namespace tpu +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_FACTORY_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc b/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc index 065a7f77dd6..13efdc46e10 100644 --- a/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc +++ b/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc @@ -23,7 +23,10 @@ limitations under the License. #include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/platform/refcount.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" #include "tensorflow/core/tpu/kernels/tpu_mesh_state_interface.h" +#include "tensorflow/core/tpu/kernels/tpu_op_consts.h" #include "tensorflow/core/tpu/tpu_api.h" #include "tensorflow/core/tpu/tpu_config_c_api.h" #include "tensorflow/core/tpu/tpu_configuration.h" @@ -67,6 +70,16 @@ Status DeleteIfExists(ResourceMgr* resource_manager, } // namespace +Status CreateTpuCompilationCache( + ResourceMgr* rmgr, tpu::TpuCompilationCacheInterface** compilation_cache) { + return rmgr->LookupOrCreate( + rmgr->default_container(), tpu::kCompilationCacheResourceName, + compilation_cache, [&](tpu::TpuCompilationCacheInterface** new_cache) { + *new_cache = tpu::GetCompilationCacheCreateFn()(); + return Status::OK(); + }); +} + void ConfigureDistributedTpuOp::Compute(OpKernelContext* ctx) { VLOG(1) << "ConfigureDistributedTpuOp"; XLA_SCOPED_LOGGING_TIMER("ConfigureDistributedTpuOp"); @@ -98,9 +111,15 @@ void ConfigureDistributedTpuOp::Compute(OpKernelContext* ctx) { OP_REQUIRES_OK(ctx, DeleteIfExists( rmgr, tpu::kTpuMeshStateInterfaceResourceName)); + // Create the subgraph compilation cache and put it in the local resource + // manager. + tpu::TpuCompilationCacheInterface* compilation_cache; + OP_REQUIRES_OK(ctx, CreateTpuCompilationCache(rmgr, &compilation_cache)); + core::ScopedUnref compilation_cache_ref(compilation_cache); + tpu::ConfigApiFn()->ConfigureDistributedTpuOp_DoWorkFn( num_devices_per_host.size(), num_devices_per_host.data(), - &host_config_output_size, &host_config_output, status); + compilation_cache, &host_config_output_size, &host_config_output, status); auto* tpu_mesh = tpu::TpuMeshStateInterface::Create(); OP_REQUIRES_OK( @@ -230,6 +249,14 @@ void InitializeHostForDistributedTpuOp::Compute(OpKernelContext* ctx) { mesh_state_interface)); } + if (enable_whole_mesh_compilations_) { + // If this is a whole mesh compilation mode, create the compilation cache, + // if missing. + tpu::TpuCompilationCacheInterface* compilation_cache; + OP_REQUIRES_OK(ctx, CreateTpuCompilationCache(rmgr, &compilation_cache)); + compilation_cache->Unref(); + } + tpu::ConfigApiFn()->InitializeHostForDistributedTpuOp_DoWorkFn( tpu_host_config.size(), tpu_host_config.data(), enable_whole_mesh_compilations_, &device_id_output_size, diff --git a/tensorflow/core/tpu/kernels/tpu_configuration_ops.h b/tensorflow/core/tpu/kernels/tpu_configuration_ops.h index f75a47e5aaf..d0bf5809842 100644 --- a/tensorflow/core/tpu/kernels/tpu_configuration_ops.h +++ b/tensorflow/core/tpu/kernels/tpu_configuration_ops.h @@ -16,9 +16,13 @@ limitations under the License. #define TENSORFLOW_CORE_TPU_KERNELS_TPU_CONFIGURATION_OPS_H_ #include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" namespace tensorflow { +Status CreateTpuCompilationCache( + ResourceMgr* rmgr, tpu::TpuCompilationCacheInterface** compilation_cache); + // The ConfigureDistributedTpu op is used to start an TPUDriver from // TensorFlow. It should be run on a TPU_SYSTEM device and returns the // connection host:port for the CompilationCacheServer. The diff --git a/tensorflow/core/tpu/tpu_config_c_api.h b/tensorflow/core/tpu/tpu_config_c_api.h index a96cbf38f64..21649050bf7 100644 --- a/tensorflow/core/tpu/tpu_config_c_api.h +++ b/tensorflow/core/tpu/tpu_config_c_api.h @@ -35,8 +35,8 @@ extern "C" { TFTPU_CAPI_EXPORT void ConfigureDistributedTpuOp_DoWork( const size_t num_cores_per_host_size, const int32_t* num_cores_per_host, - size_t* host_config_output_size, char** host_config_output, - TF_Status* status); + void* tpu_compilation_cache_interface, size_t* host_config_output_size, + char** host_config_output, TF_Status* status); TFTPU_CAPI_EXPORT void WaitForDistributedTpuOp_DoWork( const size_t num_hosts, const size_t num_cores_per_host, From 13e3f357196e7fb2d80b7b105800784e5ce0f27a Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Wed, 15 Jul 2020 06:34:38 +0000 Subject: [PATCH 0468/2522] test replace missing key fails --- tensorflow/core/kernels/map_kernels.h | 3 +++ tensorflow/python/kernel_tests/map_ops_test.py | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/tensorflow/core/kernels/map_kernels.h b/tensorflow/core/kernels/map_kernels.h index 33a950ee63a..00a6a654b54 100644 --- a/tensorflow/core/kernels/map_kernels.h +++ b/tensorflow/core/kernels/map_kernels.h @@ -199,6 +199,9 @@ class TensorMapReplace : public OpKernel { const TensorMap* m = nullptr; OP_REQUIRES_OK(c, GetInputMap(c, 0, &m)); + OP_REQUIRES(c, m->tensors().find(key) != m->tensors().end(), + errors::InvalidArgument("Trying to replace non-existent key.")); + TensorMap* output_map = nullptr; OP_REQUIRES_OK(c, ForwardInputOrCreateNewMap(c, 0, 0, *m, &output_map)); output_map->replace(key,value); diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index d8a075c7b4e..a0bfd104a9c 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -81,6 +81,18 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): l = map_ops.tensor_map_lookup(m, k) self.assertAllClose(l, v2) + def testTensorMapReplaceMissingKeyFails(self): + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + k2 = constant_op.constant(2.0) + v = constant_op.constant(2.0) + m = map_ops.tensor_map_insert(m, k2, v) + + with self.assertRaisesRegex(errors.InvalidArgumentError, + "Trying to replace non-existent key."): + m = map_ops.tensor_map_replace(m, k, v) + self.evaluate(m) + def testTensorMapErase(self): m = map_ops.empty_tensor_map() k = constant_op.constant(1.0) From fb1c37b0db7862c305bb04f69b7ee945a919b58d Mon Sep 17 00:00:00 2001 From: Xinyi Wang Date: Tue, 14 Jul 2020 23:32:12 -0700 Subject: [PATCH 0469/2522] Enable last partial batch for MWMS in TF2.x PiperOrigin-RevId: 321305950 Change-Id: Id7c93066f0ae27b86e73dbe02d74edf5fc428418 --- .../collective_all_reduce_strategy.py | 1 + .../collective_all_reduce_strategy_test.py | 52 ++++++++++--------- tensorflow/python/distribute/input_lib.py | 18 +++---- .../python/distribute/input_lib_test.py | 1 - .../python/distribute/strategy_common_test.py | 50 ++++++++++++++++++ 5 files changed, 87 insertions(+), 35 deletions(-) diff --git a/tensorflow/python/distribute/collective_all_reduce_strategy.py b/tensorflow/python/distribute/collective_all_reduce_strategy.py index 4a7ac9796e6..2281f1ac984 100644 --- a/tensorflow/python/distribute/collective_all_reduce_strategy.py +++ b/tensorflow/python/distribute/collective_all_reduce_strategy.py @@ -190,6 +190,7 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended): self._communication = communication self._initialize_strategy(self._cluster_resolver) self._cfer_fn_cache = weakref.WeakKeyDictionary() + self.experimental_enable_get_next_as_optional = True assert isinstance(self._cross_device_ops, cross_device_ops_lib.CollectiveAllReduce) diff --git a/tensorflow/python/distribute/collective_all_reduce_strategy_test.py b/tensorflow/python/distribute/collective_all_reduce_strategy_test.py index 2f114ef11a9..6cfb007bd79 100644 --- a/tensorflow/python/distribute/collective_all_reduce_strategy_test.py +++ b/tensorflow/python/distribute/collective_all_reduce_strategy_test.py @@ -351,31 +351,35 @@ class DistributedCollectiveAllReduceStrategyTest( combinations.combine( mode=['graph'], required_gpus=[0, 1, 2], use_dataset=[True, False])) def testMakeInputFnIterator(self, required_gpus, use_dataset): - if use_dataset: - fn = lambda: dataset_ops.Dataset.range(100) - else: - def fn(): - dataset = dataset_ops.Dataset.range(100) - it = dataset_ops.make_one_shot_iterator(dataset) - return it.get_next - # We use CPU as the device when required_gpus = 0 - devices_per_worker = max(1, required_gpus) - expected_values = [[i+j for j in range(devices_per_worker)] - for i in range(0, 100, devices_per_worker)] + def _worker_fn(task_type, task_id, required_gpus): + if use_dataset: + fn = lambda: dataset_ops.Dataset.range(20) + else: + def fn(): + dataset = dataset_ops.Dataset.range(20) + it = dataset_ops.make_one_shot_iterator(dataset) + return it.get_next + # We use CPU as the device when required_gpus = 0 + devices_per_worker = max(1, required_gpus) + expected_values = [[i+j for j in range(devices_per_worker)] + for i in range(0, 20, devices_per_worker)] - input_fn = self._input_fn_to_test_input_context( - fn, - expected_num_replicas_in_sync=3*devices_per_worker, - expected_num_input_pipelines=3, - expected_input_pipeline_id=1) # because task_id = 1 - self._test_input_fn_iterator( - 'worker', - 1, - required_gpus, - input_fn, - expected_values, - test_reinitialize=use_dataset, - ignore_order=not use_dataset) + input_fn = self._input_fn_to_test_input_context( + fn, + expected_num_replicas_in_sync=3*devices_per_worker, + expected_num_input_pipelines=3, + expected_input_pipeline_id=task_id) + self._test_input_fn_iterator( + task_type, + task_id, + required_gpus, + input_fn, + expected_values, + test_reinitialize=use_dataset, + ignore_order=not use_dataset) + + self._run_between_graph_clients(_worker_fn, self._cluster_spec, + required_gpus) @combinations.generate(combinations.combine(mode=['graph'])) def testUpdateConfigProto(self): diff --git a/tensorflow/python/distribute/input_lib.py b/tensorflow/python/distribute/input_lib.py index b50037b348f..708d5ebca75 100644 --- a/tensorflow/python/distribute/input_lib.py +++ b/tensorflow/python/distribute/input_lib.py @@ -517,7 +517,7 @@ def _get_next_as_optional(iterator, strategy, name=None): # Collective all-reduce requires explicit devices for inputs. with ops.device("/cpu:0"): # Converting to integers for all-reduce. - worker_has_value = math_ops.cast(worker_has_value, dtypes.int32) + worker_has_value = math_ops.cast(worker_has_value, dtypes.int64) worker_devices.append(worker_has_value.device) worker_has_values.append(worker_has_value) # Make `replicas` a flat list of values across all replicas. @@ -592,16 +592,12 @@ class DistributedIteratorBase(DistributedIteratorInterface): # get_next_as_optional(). And we only enable get_next_as_optional when the # output shapes are not static. # - # TODO(yuefengz): Currently `experimental_enable_get_next_as_optional` is - # always set to False in CollectiveAllReduceStrategy. We want to have a way - # to distinguish multi workers/single worker between graph, so we can enable - # the behavior in single worker case. - # # TODO(rxsang): We want to always enable the get_next_as_optional behavior # when user passed input_fn instead of dataset. if getattr( strategy.extended, "experimental_enable_get_next_as_optional", False): - self._enable_get_next_as_optional = not static_shape + self._enable_get_next_as_optional = ( + not static_shape) or strategy.extended._in_multi_worker_mode() else: self._enable_get_next_as_optional = False @@ -872,9 +868,10 @@ class DistributedIterator(DistributedIteratorBase, self._iterators = components static_shape = _get_static_shape(self._iterators) self._strategy = strategy - if getattr( - strategy.extended, "experimental_enable_get_next_as_optional", False): - self._enable_get_next_as_optional = not static_shape + if getattr(strategy.extended, + "experimental_enable_get_next_as_optional", False): + self._enable_get_next_as_optional = ( + not static_shape) or strategy.extended._in_multi_worker_mode() else: self._enable_get_next_as_optional = False else: @@ -1273,6 +1270,7 @@ class InputFunctionIterator(DistributedIteratorV1): super(InputFunctionIterator, self).__init__(input_workers, iterators, strategy) + self._enable_get_next_as_optional = False # TODO(anjalisridhar): This class will soon be removed and users should move diff --git a/tensorflow/python/distribute/input_lib_test.py b/tensorflow/python/distribute/input_lib_test.py index 7f02d0121d0..23397bf5070 100644 --- a/tensorflow/python/distribute/input_lib_test.py +++ b/tensorflow/python/distribute/input_lib_test.py @@ -1144,7 +1144,6 @@ class DistributedIteratorMultiWorkerTest( expected_values = [[[0, 1]], [[2, 3]], [[4]]] input_context = None - strategy.extended.experimental_enable_get_next_as_optional = True self._test_input_iteration( input_type, api_type, diff --git a/tensorflow/python/distribute/strategy_common_test.py b/tensorflow/python/distribute/strategy_common_test.py index 9021c53e129..b1dfe78326c 100644 --- a/tensorflow/python/distribute/strategy_common_test.py +++ b/tensorflow/python/distribute/strategy_common_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function from absl.testing import parameterized +import numpy as np from tensorflow.python.data.ops import dataset_ops from tensorflow.python.distribute import combinations @@ -102,6 +103,55 @@ class DistributedCollectiveAllReduceStrategyTest( sum_value.numpy(), expected_sum_on_workers[multi_worker_test_base.get_task_type()]) + def testSimpleInputFromDatasetLastPartialBatch(self, strategy): + global_batch_size = 8 + dataset = dataset_ops.DatasetV2.range(14).batch( + global_batch_size, drop_remainder=False) + input_iterator = iter(strategy.experimental_distribute_dataset(dataset)) + + @def_function.function + def run(input_iterator): + return strategy.run(lambda x: x, args=(next(input_iterator),)) + + # Let the complete batch go. + run(input_iterator) + + # `result` is an incomplete batch + result = run(input_iterator) + expected_data_on_workers = {'chief': [8, 9, 10], 'worker': [11, 12, 13]} + self.assertTrue( + np.array_equal( + result.numpy(), + expected_data_on_workers[multi_worker_test_base.get_task_type()])) + + def testSimpleInputFromFnLastPartialBatch(self, strategy): + + def dataset_fn(input_context): + global_batch_size = 8 + batch_size = input_context.get_per_replica_batch_size(global_batch_size) + dataset = dataset_ops.DatasetV2.range(14).batch( + batch_size, drop_remainder=False) + return dataset.shard(input_context.num_input_pipelines, + input_context.input_pipeline_id) + + input_iterator = iter( + strategy.experimental_distribute_datasets_from_function(dataset_fn)) + + @def_function.function + def run(input_iterator): + return strategy.run(lambda x: x, args=(next(input_iterator),)) + + # Let the complete batch go. + run(input_iterator) + # `result` is an incomplete batch + result = run(input_iterator) + + expected_data_on_worker = {'chief': [8, 9, 10, 11], 'worker': [12, 13]} + self.assertTrue( + np.array_equal( + result.numpy(), expected_data_on_worker[ + multi_worker_test_base.get_task_type()])) + def testReduceHostTensor(self, strategy): reduced = strategy.reduce( reduce_util.ReduceOp.SUM, array_ops.identity(1.), axis=None) From 03cb974777d00ddf05957d41aa3d44d7c76af924 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 15 Jul 2020 00:24:00 -0700 Subject: [PATCH 0470/2522] Work around the bazel bug around /showIncludes logic on windows GPU builds. PiperOrigin-RevId: 321310704 Change-Id: Ie5182dd706696f8c5f425cec02919ca998bb15e8 --- third_party/gpus/cuda_configure.bzl | 1 + 1 file changed, 1 insertion(+) diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index a192c022a47..70bb91159de 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -187,6 +187,7 @@ def _get_win_cuda_defines(repository_ctx): # the same tmp directory escaped_cxx_include_directories = [ _get_nvcc_tmp_dir_for_windows(repository_ctx), + "C:\\\\botcode\\\\w", ] for path in escaped_include_paths.split(";"): if path: From ee0a146af144d74acafccd7e772fdd027cfd8325 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Jul 2020 00:29:59 -0700 Subject: [PATCH 0471/2522] Introduce TpuCompilationCache create function registration PiperOrigin-RevId: 321311264 Change-Id: Ia1a4568df6ce5a36abb8119ede603cd724409038 --- tensorflow/core/tpu/kernels/BUILD | 17 ------ .../kernels/tpu_compilation_cache_factory.cc | 55 ------------------- .../kernels/tpu_compilation_cache_factory.h | 33 ----------- .../core/tpu/kernels/tpu_configuration_ops.cc | 29 +--------- .../core/tpu/kernels/tpu_configuration_ops.h | 4 -- tensorflow/core/tpu/tpu_config_c_api.h | 4 +- 6 files changed, 3 insertions(+), 139 deletions(-) delete mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.cc delete mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.h diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 70d6ccff4df..6ff0fb1df73 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -79,10 +79,7 @@ tf_kernel_library( srcs = ["tpu_configuration_ops.cc"], hdrs = ["tpu_configuration_ops.h"], deps = [ - ":tpu_compilation_cache_factory", - ":tpu_compilation_cache_interface", ":tpu_mesh_state_interface", - ":tpu_op_consts", "//tensorflow/c:tf_status", "//tensorflow/c:tf_status_helper", "//tensorflow/compiler/xla:util", @@ -136,20 +133,6 @@ tf_proto_library_cc( ], ) -cc_library( - name = "tpu_compilation_cache_factory", - srcs = ["tpu_compilation_cache_factory.cc"], - hdrs = ["tpu_compilation_cache_factory.h"], - deps = [ - ":tpu_compilation_cache_external", - ":tpu_compilation_cache_interface", - ":tpu_op_consts", - "//tensorflow/core:framework", - "//tensorflow/core/platform:status", - "//tensorflow/core/platform:types", - ], -) - cc_library( name = "tpu_compilation_cache_key", srcs = [], diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.cc deleted file mode 100644 index 86469ae7ebb..00000000000 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.cc +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.h" - -#include "tensorflow/core/framework/resource_mgr.h" -#include "tensorflow/core/platform/types.h" -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h" -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" -#include "tensorflow/core/tpu/kernels/tpu_op_consts.h" - -namespace tensorflow { -namespace tpu { -namespace { - -TpuCompilationCacheInterface* CreateCompilationCacheExternal() { - // NOTE: Change the 1 << 33 value to change the compilation cache size. - // TODO(frankchn): Make this configurable. - return new TpuCompilationCacheExternal(int64{1} << 33); // 8 GB -} - -// Using a pointer here to fulfill the trivially destructible requirement for -// static variables. -static std::function* - compilation_cache_creation_fn = - new std::function( - CreateCompilationCacheExternal); - -} // namespace - -std::function GetCompilationCacheCreateFn() { - return *compilation_cache_creation_fn; -} - -void SetCompilationCacheCreateFn( - std::function fn) { - delete compilation_cache_creation_fn; - compilation_cache_creation_fn = - new std::function(fn); -} - -} // namespace tpu -} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.h deleted file mode 100644 index 4710f916c48..00000000000 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.h +++ /dev/null @@ -1,33 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_FACTORY_H_ -#define TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_FACTORY_H_ - -#include - -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" - -namespace tensorflow { -namespace tpu { - -std::function GetCompilationCacheCreateFn(); - -void SetCompilationCacheCreateFn( - std::function fn); - -} // namespace tpu -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_FACTORY_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc b/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc index 13efdc46e10..065a7f77dd6 100644 --- a/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc +++ b/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc @@ -23,10 +23,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/platform/refcount.h" -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.h" -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" #include "tensorflow/core/tpu/kernels/tpu_mesh_state_interface.h" -#include "tensorflow/core/tpu/kernels/tpu_op_consts.h" #include "tensorflow/core/tpu/tpu_api.h" #include "tensorflow/core/tpu/tpu_config_c_api.h" #include "tensorflow/core/tpu/tpu_configuration.h" @@ -70,16 +67,6 @@ Status DeleteIfExists(ResourceMgr* resource_manager, } // namespace -Status CreateTpuCompilationCache( - ResourceMgr* rmgr, tpu::TpuCompilationCacheInterface** compilation_cache) { - return rmgr->LookupOrCreate( - rmgr->default_container(), tpu::kCompilationCacheResourceName, - compilation_cache, [&](tpu::TpuCompilationCacheInterface** new_cache) { - *new_cache = tpu::GetCompilationCacheCreateFn()(); - return Status::OK(); - }); -} - void ConfigureDistributedTpuOp::Compute(OpKernelContext* ctx) { VLOG(1) << "ConfigureDistributedTpuOp"; XLA_SCOPED_LOGGING_TIMER("ConfigureDistributedTpuOp"); @@ -111,15 +98,9 @@ void ConfigureDistributedTpuOp::Compute(OpKernelContext* ctx) { OP_REQUIRES_OK(ctx, DeleteIfExists( rmgr, tpu::kTpuMeshStateInterfaceResourceName)); - // Create the subgraph compilation cache and put it in the local resource - // manager. - tpu::TpuCompilationCacheInterface* compilation_cache; - OP_REQUIRES_OK(ctx, CreateTpuCompilationCache(rmgr, &compilation_cache)); - core::ScopedUnref compilation_cache_ref(compilation_cache); - tpu::ConfigApiFn()->ConfigureDistributedTpuOp_DoWorkFn( num_devices_per_host.size(), num_devices_per_host.data(), - compilation_cache, &host_config_output_size, &host_config_output, status); + &host_config_output_size, &host_config_output, status); auto* tpu_mesh = tpu::TpuMeshStateInterface::Create(); OP_REQUIRES_OK( @@ -249,14 +230,6 @@ void InitializeHostForDistributedTpuOp::Compute(OpKernelContext* ctx) { mesh_state_interface)); } - if (enable_whole_mesh_compilations_) { - // If this is a whole mesh compilation mode, create the compilation cache, - // if missing. - tpu::TpuCompilationCacheInterface* compilation_cache; - OP_REQUIRES_OK(ctx, CreateTpuCompilationCache(rmgr, &compilation_cache)); - compilation_cache->Unref(); - } - tpu::ConfigApiFn()->InitializeHostForDistributedTpuOp_DoWorkFn( tpu_host_config.size(), tpu_host_config.data(), enable_whole_mesh_compilations_, &device_id_output_size, diff --git a/tensorflow/core/tpu/kernels/tpu_configuration_ops.h b/tensorflow/core/tpu/kernels/tpu_configuration_ops.h index d0bf5809842..f75a47e5aaf 100644 --- a/tensorflow/core/tpu/kernels/tpu_configuration_ops.h +++ b/tensorflow/core/tpu/kernels/tpu_configuration_ops.h @@ -16,13 +16,9 @@ limitations under the License. #define TENSORFLOW_CORE_TPU_KERNELS_TPU_CONFIGURATION_OPS_H_ #include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" namespace tensorflow { -Status CreateTpuCompilationCache( - ResourceMgr* rmgr, tpu::TpuCompilationCacheInterface** compilation_cache); - // The ConfigureDistributedTpu op is used to start an TPUDriver from // TensorFlow. It should be run on a TPU_SYSTEM device and returns the // connection host:port for the CompilationCacheServer. The diff --git a/tensorflow/core/tpu/tpu_config_c_api.h b/tensorflow/core/tpu/tpu_config_c_api.h index 21649050bf7..a96cbf38f64 100644 --- a/tensorflow/core/tpu/tpu_config_c_api.h +++ b/tensorflow/core/tpu/tpu_config_c_api.h @@ -35,8 +35,8 @@ extern "C" { TFTPU_CAPI_EXPORT void ConfigureDistributedTpuOp_DoWork( const size_t num_cores_per_host_size, const int32_t* num_cores_per_host, - void* tpu_compilation_cache_interface, size_t* host_config_output_size, - char** host_config_output, TF_Status* status); + size_t* host_config_output_size, char** host_config_output, + TF_Status* status); TFTPU_CAPI_EXPORT void WaitForDistributedTpuOp_DoWork( const size_t num_hosts, const size_t num_cores_per_host, From 028bf2894679862bdbb73691202f3d6059c85f79 Mon Sep 17 00:00:00 2001 From: Stephan Herhut Date: Wed, 15 Jul 2020 00:37:58 -0700 Subject: [PATCH 0472/2522] Add canonicalization patterns for dynamic_broadcast_in_dim where the target shape is the shape of the operand. PiperOrigin-RevId: 321312182 Change-Id: Ifc32a437de063c419cc2773d2e5e784026c44acb --- tensorflow/compiler/mlir/hlo/BUILD | 20 +++++++++++++ .../mlir-hlo/Dialect/mhlo/IR/hlo_ops.td | 6 ++-- .../mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc | 5 +++- .../hlo/lib/Dialect/mhlo/IR/hlo_patterns.td | 29 +++++++++++++++++++ .../compiler/mlir/hlo/tests/canonicalize.mlir | 10 +++++++ third_party/mlir/BUILD | 8 ++--- 6 files changed, 68 insertions(+), 10 deletions(-) create mode 100644 tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_patterns.td diff --git a/tensorflow/compiler/mlir/hlo/BUILD b/tensorflow/compiler/mlir/hlo/BUILD index c7bda887db0..5cbf305bb4d 100644 --- a/tensorflow/compiler/mlir/hlo/BUILD +++ b/tensorflow/compiler/mlir/hlo/BUILD @@ -106,6 +106,25 @@ gentbl( td_srcs = [":hlo_ops_td_files"], ) +gentbl( + name = "hlo_ops_pattern_gen", + strip_include_prefix = "include", + tbl_outs = [ + ( + "-gen-rewriters", + "include/mlir-hlo/Dialect/mhlo/IR/hlo_patterns.cc.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "lib/Dialect/mhlo/IR/hlo_patterns.td", + td_srcs = [ + ":hlo_ops_td_files", + "@llvm-project//mlir:StdOpsTdFiles", + "@llvm-project//mlir:include/mlir/Dialect/Shape/IR/ShapeBase.td", + "@llvm-project//mlir:include/mlir/Dialect/Shape/IR/ShapeOps.td", + ], +) + gentbl( name = "lhlo_ops_inc_gen", strip_include_prefix = "include", @@ -203,6 +222,7 @@ cc_library( ], includes = ["include"], deps = [ + "hlo_ops_pattern_gen", ":canonicalize_inc_gen", ":chlo_ops_inc_gen", ":convert_op_folder", diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td index 0ed4235e23f..c88a6138b95 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td @@ -21,9 +21,9 @@ limitations under the License. include "mlir/IR/OpBase.td" include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/SideEffectInterfaces.td" -include "mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.td" -include "mlir-hlo/Dialect/mhlo/IR/hlo_utils.td" -include "mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.td" +include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.td" +include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_utils.td" +include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.td" def HLO_Dialect : Dialect { let name = "mhlo"; diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc index cbd478a0283..ee898828b76 100644 --- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc @@ -35,6 +35,7 @@ limitations under the License. #include "llvm/Support/Casting.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" +#include "mlir/Dialect/Shape/IR/Shape.h" // from @llvm-project #include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project #include "mlir/IR/Attributes.h" // from @llvm-project #include "mlir/IR/Builders.h" // from @llvm-project @@ -59,6 +60,7 @@ limitations under the License. #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/hlo_utils.h" namespace mlir { +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_patterns.cc.inc" #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_structs.cc.inc" namespace mhlo { @@ -744,7 +746,8 @@ class DynamicBroadcastInDimOpNotActuallyDynamic void DynamicBroadcastInDimOp::getCanonicalizationPatterns( OwningRewritePatternList& results, MLIRContext* context) { - results.insert(context); + results.insert(context); } //===----------------------------------------------------------------------===// diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_patterns.td b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_patterns.td new file mode 100644 index 00000000000..e598465f034 --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_patterns.td @@ -0,0 +1,29 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Canonicalization patterns for the MHLO dialect. + +include "mlir/Dialect/Shape/IR/ShapeOps.td" +include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.td" + +def EqualBinaryOperands : Constraint>; + +// Canonicalization patterns. + +def DynamicBroadcastToOwnShape : Pat< + (HLO_DynamicBroadcastInDimOp:$op $arg0, + (Shape_ToExtentTensorOp (Shape_ShapeOfOp $arg1)), $attr), + (replaceWithValue $arg0), [(EqualBinaryOperands $arg0, $arg1)]>; + diff --git a/tensorflow/compiler/mlir/hlo/tests/canonicalize.mlir b/tensorflow/compiler/mlir/hlo/tests/canonicalize.mlir index 87774129ffb..f773c95237e 100644 --- a/tensorflow/compiler/mlir/hlo/tests/canonicalize.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/canonicalize.mlir @@ -365,6 +365,16 @@ func @dynamic_broadcast_in_dim_op_not_actually_dynamic(%arg0: tensor<4xf32>, %ar return %0 : tensor<5x4xf32> } +// CHECK-LABEL: func @dynamic_broadcast_in_dim_to_same_shape +func @dynamic_broadcast_in_dim_to_same_shape(%arg0: tensor) -> tensor { +// CHECK-SAME: %[[ARG:.*]]: tensor + %0 = shape.shape_of %arg0 : tensor + %1 = shape.to_extent_tensor %0 : tensor<1xindex> + %2 = "mhlo.dynamic_broadcast_in_dim"(%arg0, %1) { broadcast_dimensions = dense<0> : tensor<1xi64> } : (tensor, tensor<1xindex>) -> tensor + // CHECK: return %[[ARG]] : tensor + return %2 : tensor +} + // CHECK-LABEL: func @broadcast_in_dim_constant_fold_0d func @broadcast_in_dim_constant_fold_0d() -> tensor<1x64x224x224xf32> { %cst = mhlo.constant dense<0.000000e+00> : tensor diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD index 18a7c4df7d7..46ce6833de3 100644 --- a/third_party/mlir/BUILD +++ b/third_party/mlir/BUILD @@ -3732,16 +3732,12 @@ exports_files( "include/mlir/Interfaces/ViewLikeInterface.td", "include/mlir/Dialect/LLVMIR/LLVMOpBase.td", "include/mlir/Dialect/StandardOps/IR/Ops.td", + "include/mlir/Dialect/Shape/IR/ShapeOps.td", + "include/mlir/Dialect/Shape/IR/ShapeBase.td", "include/mlir/IR/OpAsmInterface.td", "include/mlir/IR/OpBase.td", "include/mlir/IR/SymbolInterfaces.td", "include/mlir/Transforms/InliningUtils.h", - ], - visibility = [":friends"], -) - -exports_files( - [ "include/mlir/Interfaces/InferTypeOpInterface.td", "include/mlir/Interfaces/LoopLikeInterface.td", ], From f24d063dec14479e793c3a061c80a4ef8a156616 Mon Sep 17 00:00:00 2001 From: Marcel Hlopko Date: Wed, 15 Jul 2020 01:26:52 -0700 Subject: [PATCH 0473/2522] Add artifact name patterns to the cuda toolchain on Windows With these Bazel generates typical Windows names (e.g. adds .exe extension to binaries, .dll to shared libraries etc.). PiperOrigin-RevId: 321317292 Change-Id: I5d2f25cc918c81b3fdb7d924b93124ec9a5481b4 --- .../crosstool/cc_toolchain_config.bzl.tpl | 37 ++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/third_party/gpus/crosstool/cc_toolchain_config.bzl.tpl b/third_party/gpus/crosstool/cc_toolchain_config.bzl.tpl index eb320a94201..afc8132bd15 100644 --- a/third_party/gpus/crosstool/cc_toolchain_config.bzl.tpl +++ b/third_party/gpus/crosstool/cc_toolchain_config.bzl.tpl @@ -3,6 +3,7 @@ load( "@bazel_tools//tools/cpp:cc_toolchain_config_lib.bzl", "action_config", + "artifact_name_pattern", "env_entry", "env_set", "feature", @@ -971,6 +972,7 @@ def _impl(ctx): linker_path = ctx.attr.host_compiler_path, strip_path = ctx.attr.host_compiler_prefix + "/strip", ) + artifact_name_patterns = [] elif (cpu == "local"): toolchain_identifier = "local_linux" target_cpu = "local" @@ -984,6 +986,7 @@ def _impl(ctx): linker_path = ctx.attr.host_compiler_path, strip_path = ctx.attr.host_compiler_prefix + "/strip", ) + artifact_name_patterns = [] elif (cpu == "x64_windows"): toolchain_identifier = "local_windows" target_cpu = "x64_windows" @@ -997,6 +1000,38 @@ def _impl(ctx): linker_path = ctx.attr.msvc_link_path, strip_path = "fake_tool_strip_not_supported", ) + artifact_name_patterns = [ + artifact_name_pattern( + category_name = "object_file", + prefix = "", + extension = ".obj", + ), + artifact_name_pattern( + category_name = "static_library", + prefix = "", + extension = ".lib", + ), + artifact_name_pattern( + category_name = "alwayslink_static_library", + prefix = "", + extension = ".lo.lib", + ), + artifact_name_pattern( + category_name = "executable", + prefix = "", + extension = ".exe", + ), + artifact_name_pattern( + category_name = "dynamic_library", + prefix = "", + extension = ".dll", + ), + artifact_name_pattern( + category_name = "interface_library", + prefix = "", + extension = ".if.lib", + ), + ] else: fail("Unreachable") @@ -1007,7 +1042,7 @@ def _impl(ctx): ctx = ctx, features = _features(cpu, compiler, ctx), action_configs = action_configs, - artifact_name_patterns = [], + artifact_name_patterns = artifact_name_patterns, cxx_builtin_include_directories = ctx.attr.builtin_include_directories, toolchain_identifier = toolchain_identifier, host_system_name = "local", From e34825b8cb47aa363e7e3b694527c2fcff052e2a Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Wed, 15 Jul 2020 01:33:05 -0700 Subject: [PATCH 0474/2522] Remove the usage of TF private API resource_variable_ops.resource_scatter_add from Keras. PiperOrigin-RevId: 321317842 Change-Id: If545338f654dde1042f6b360c47feb041d034795 --- tensorflow/python/keras/optimizer_v2/gradient_descent.py | 8 +++++--- tensorflow/python/keras/optimizer_v2/optimizer_v2.py | 7 +++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/keras/optimizer_v2/gradient_descent.py b/tensorflow/python/keras/optimizer_v2/gradient_descent.py index 856cc692431..017c4e5db25 100644 --- a/tensorflow/python/keras/optimizer_v2/gradient_descent.py +++ b/tensorflow/python/keras/optimizer_v2/gradient_descent.py @@ -21,7 +21,7 @@ from __future__ import print_function from tensorflow.python.framework import ops from tensorflow.python.keras.optimizer_v2 import optimizer_v2 from tensorflow.python.ops import array_ops -from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import gen_resource_variable_ops from tensorflow.python.training import training_ops from tensorflow.python.util.tf_export import keras_export @@ -158,8 +158,10 @@ class SGD(optimizer_v2.OptimizerV2): coefficients = (kwargs.get("apply_state", {}).get((var_device, var_dtype)) or self._fallback_apply_state(var_device, var_dtype)) - return resource_variable_ops.resource_scatter_add( - var.handle, indices, -grad * coefficients["lr_t"]) + return gen_resource_variable_ops.ResourceScatterAdd( + resource=var.handle, + indices=indices, + updates=-grad * coefficients["lr_t"]) def _resource_apply_sparse(self, grad, var, indices, apply_state=None): # This method is only needed for momentum optimization. diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py index c9ce3b043e8..fb149999141 100644 --- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py +++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py @@ -43,6 +43,7 @@ from tensorflow.python.keras.utils import tf_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import gen_resource_variable_ops from tensorflow.python.ops import gradients from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops @@ -1150,8 +1151,10 @@ class OptimizerV2(trackable.Trackable): raise NotImplementedError("Must be implemented in subclasses.") def _resource_scatter_add(self, x, i, v): - with ops.control_dependencies( - [resource_variable_ops.resource_scatter_add(x.handle, i, v)]): + with ops.control_dependencies([ + gen_resource_variable_ops.ResourceScatterAdd( + resource=x.handle, indices=i, updates=v) + ]): return x.value() def _resource_scatter_update(self, x, i, v): From d5b2cfd27d9c53cced5d9d7bb23fa5117c655ef2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Jul 2020 02:01:43 -0700 Subject: [PATCH 0475/2522] Update GraphDef version to 463. PiperOrigin-RevId: 321320444 Change-Id: I6686853311b4ba3f6be548b526e3783151ed05ea --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 1715c650b56..8f071beac51 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 462 // Updated: 2020/7/14 +#define TF_GRAPH_DEF_VERSION 463 // Updated: 2020/7/15 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 4d05cc7f44ed5a38381a53dcd62d9e5bbf6d9655 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Jul 2020 02:01:46 -0700 Subject: [PATCH 0476/2522] compat: Update forward compatibility horizon to 2020-07-15 PiperOrigin-RevId: 321320451 Change-Id: I727b66cbf16ad8659fa92581e7b6ed60af868d8a --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index c2b66b48178..f6671b19a9c 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 14) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 7, 15) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From d50a80787759ce9b423ba5a87d733d32e2899de3 Mon Sep 17 00:00:00 2001 From: Thomas Joerg Date: Wed, 15 Jul 2020 03:32:29 -0700 Subject: [PATCH 0477/2522] [XLA:CPU] Allow HLO tracing independently of HLO profiling. So far the `--xla_cpu_enable_xprof_traceme` defaults to true, but tracing is only enabled when both `--xla_cpu_enable_xprof_traceme` and `--xla_hlo_profile` are set. To allow tracing without the more heavy-weight profiling, this change allows tracing with just `--xla_cpu_enable_xprof_traceme`. To avoid tracing overhead in existing models, the default is changed to false. PiperOrigin-RevId: 321330365 Change-Id: Ia897f25c7bb6ca388d9b671dc8f6191442742cf8 --- .../compiler/xla/debug_options_flags.cc | 2 +- .../compiler/xla/service/cpu/ir_emitter.cc | 20 +++++++++++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/debug_options_flags.cc b/tensorflow/compiler/xla/debug_options_flags.cc index 8ca6e2b294c..552dfcd15c3 100644 --- a/tensorflow/compiler/xla/debug_options_flags.cc +++ b/tensorflow/compiler/xla/debug_options_flags.cc @@ -71,7 +71,7 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() { opts.set_xla_allow_excess_precision(true); opts.set_xla_force_host_platform_device_count(1); opts.set_xla_gpu_deterministic_reductions(false); - opts.set_xla_cpu_enable_xprof_traceme(true); + opts.set_xla_cpu_enable_xprof_traceme(false); opts.set_xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found(false); return opts; diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index ebb2df23805..16d92f622d2 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -3037,10 +3037,21 @@ void IrEmitter::TracingState::EmitTracingEnd(llvm::IRBuilder<>* b, {b->CreateBitCast(run_options, void_ptr_type), activity_id}); } +namespace { +bool IsHloVeryCheap(const HloInstruction* hlo) { + return hlo->opcode() == HloOpcode::kBitcast || + hlo->opcode() == HloOpcode::kTuple || + hlo->opcode() == HloOpcode::kGetTupleElement || + hlo->opcode() == HloOpcode::kParameter || + hlo->opcode() == HloOpcode::kConstant; +} +} // namespace + Status IrEmitter::Preprocess(HloInstruction* hlo) { VLOG(3) << "Visiting: " << hlo->ToString(); - if (instruction_to_profile_idx_.count(hlo)) { - // Only trace the same HLOs that the profiler does. + // When profiling is enabled, trace the same HLOs that the profiler does. + if (instruction_to_profile_idx_.count(hlo) || + (hlo_module_config_.cpu_traceme_enabled() && !IsHloVeryCheap(hlo))) { tracing_state_.EmitTracingStart(&b_, hlo, GetExecutableRunOptionsArgument()); profiling_state_.RecordCycleStart(&b_, hlo); @@ -3052,8 +3063,9 @@ Status IrEmitter::Postprocess(HloInstruction* hlo) { if (auto* prof_counter = GetProfileCounterFor(*hlo)) { profiling_state_.RecordCycleDelta(&b_, hlo, prof_counter); } - // Only trace the same HLOs that the profiler does. - if (instruction_to_profile_idx_.count(hlo)) { + // When profiling is enabled, trace the same HLOs that the profiler does. + if (instruction_to_profile_idx_.count(hlo) || + (hlo_module_config_.cpu_traceme_enabled() && !IsHloVeryCheap(hlo))) { tracing_state_.EmitTracingEnd(&b_, hlo, GetExecutableRunOptionsArgument()); } return Status::OK(); From fedf6fae5dffb70037bfafad0905a6f8a9fee8a3 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Wed, 15 Jul 2020 18:10:11 +0700 Subject: [PATCH 0478/2522] Add get s3 client --- .../filesystem/plugins/s3/s3_filesystem.cc | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 9a9d52c8382..ac873f5e9bf 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -21,10 +21,12 @@ limitations under the License. #include "absl/strings/ascii.h" #include "absl/strings/numbers.h" #include "tensorflow/c/experimental/filesystem/filesystem_interface.h" +#include "tensorflow/c/experimental/filesystem/plugins/s3/aws_crypto.h" #include "tensorflow/c/tf_status.h" // Implementation of a filesystem for S3 environments. // This filesystem will support `s3://` URI schemes. +constexpr char kS3ClientAllocationTag[] = "S3ClientAllocation"; constexpr int64_t kS3TimeoutMsec = 300000; // 5 min static void* plugin_memory_allocate(size_t size) { return calloc(1, size); } @@ -131,6 +133,40 @@ static Aws::Client::ClientConfiguration& GetDefaultClientConfig() { return cfg; }; +static void GetS3Client(TF_Filesystem* filesystem) { + auto s3_file = + static_cast(filesystem->plugin_filesystem); + absl::MutexLock l(&s3_file->initialization_lock); + + if (s3_file->s3_client.get() == nullptr) { + Aws::SDKOptions options; + options.cryptoOptions.sha256Factory_create_fn = []() { + return Aws::MakeShared( + tf_s3_filesystem::AWSCryptoAllocationTag); + }; + options.cryptoOptions.sha256HMACFactory_create_fn = []() { + return Aws::MakeShared( + tf_s3_filesystem::AWSCryptoAllocationTag); + }; + options.cryptoOptions.secureRandomFactory_create_fn = []() { + return Aws::MakeShared( + tf_s3_filesystem::AWSCryptoAllocationTag); + }; + Aws::InitAPI(options); + + // The creation of S3Client disables virtual addressing: + // S3Client(clientConfiguration, signPayloads, useVirtualAddressing = + // true) + // The purpose is to address the issue encountered when there is an `.` + // in the bucket name. Due to TLS hostname validation or DNS rules, + // the bucket may not be resolved. Disabling of virtual addressing + // should address the issue. See GitHub issue 16397 for details. + s3_file->s3_client = Aws::MakeShared( + kS3ClientAllocationTag, GetDefaultClientConfig(), + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, false); + } +} + static void ShutdownClient(Aws::S3::S3Client* s3_client) { if (s3_client != nullptr) { delete s3_client; From d1d7fbded0f1c6cc44c9556a07600e3d6d647ac4 Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Wed, 15 Jul 2020 19:33:37 +0800 Subject: [PATCH 0479/2522] reuse HumanString in HumanStringWithLayout --- tensorflow/compiler/xla/shape_util.cc | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index bce40578132..ff7d69a37a4 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -522,13 +522,7 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( text += ")"; return text; } - string result = StrCat( - primitive_util::LowercasePrimitiveTypeName(shape.element_type()), "["); - for (int i = 0; i < shape.dimensions().size(); i++) { - StrAppend(&result, (i > 0) ? "," : "", - shape.is_dynamic_dimension(i) ? "<=" : "", shape.dimensions(i)); - } - result += "]"; + string result = HumanString(shape); if (IsScalar(shape)) { string layout_str = LayoutUtil::HumanString(shape.layout()); // Don't print "{}" as layout for scalars. From 3b0642fbcd761649c9a30fe3a28e784a3f105d83 Mon Sep 17 00:00:00 2001 From: Prasad Nikam Date: Tue, 30 Jun 2020 12:25:57 +0530 Subject: [PATCH 0480/2522] Cadence HiFi4 NN Library v2.2.0 update The following changes are done to the HiFi4 implementation of TFLM 1. Update the kernel files and the make setup to use HiFi4 NN Library v2.2.0. 2. Update the kernel files as per the latest reference implementation. 3. Add xtenas_hifi kernel implementations for the add and mul operators. --- .../micro/kernels/xtensa_hifi/activations.cc | 71 ++-- .../lite/micro/kernels/xtensa_hifi/add.cc | 279 +++++++++++++++ .../lite/micro/kernels/xtensa_hifi/conv.cc | 334 +++++++++--------- .../kernels/xtensa_hifi/depthwise_conv.cc | 289 ++++++++------- .../lite/micro/kernels/xtensa_hifi/floor.cc | 47 +-- .../kernels/xtensa_hifi/fully_connected.cc | 168 +++++---- .../micro/kernels/xtensa_hifi/logistic.cc | 103 ++++-- .../lite/micro/kernels/xtensa_hifi/mul.cc | 235 ++++++++++++ .../lite/micro/kernels/xtensa_hifi/pooling.cc | 76 ++-- .../lite/micro/kernels/xtensa_hifi/softmax.cc | 48 +-- .../lite/micro/kernels/xtensa_hifi/svdf.cc | 305 +++++++++------- .../make/ext_libs/xtensa_hifi_nn_library.inc | 104 +++--- .../make/targets/xtensa_hifi_makefile.inc | 2 +- .../tools/make/third_party_downloads.inc | 4 +- 14 files changed, 1393 insertions(+), 672 deletions(-) create mode 100644 tensorflow/lite/micro/kernels/xtensa_hifi/add.cc create mode 100644 tensorflow/lite/micro/kernels/xtensa_hifi/mul.cc diff --git a/tensorflow/lite/micro/kernels/xtensa_hifi/activations.cc b/tensorflow/lite/micro/kernels/xtensa_hifi/activations.cc index 2582bf322e5..03df266d862 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifi/activations.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifi/activations.cc @@ -1,24 +1,24 @@ -/****************************************************************************** - * Copyright (C) 2019 Cadence Design Systems, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to use this Software with Cadence processor cores only and - * not with any other processors and platforms, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ******************************************************************************/ +/******************************************************************************* +* Copyright (c) 2019-2020 Cadence Design Systems, Inc. +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to use this Software with Cadence processor cores only and +* not with any other processors and platforms, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be included +* in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +******************************************************************************/ /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); @@ -42,7 +42,7 @@ limitations under the License. #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/op_macros.h" #include "tensorflow/lite/micro/micro_utils.h" -#include "xtensa_tf_micro_common.h" +#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h" namespace tflite { namespace ops { @@ -109,6 +109,7 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) { switch (input->type) { case kTfLiteFloat32: { +#if HIFI_VFPU int err; const float* inp_data_ptr; float* out_data_ptr; @@ -119,11 +120,13 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) { inp_data_ptr = GetTensorData(input); out_data_ptr = GetTensorData(output); - const float f32_pos_inf = 0x7F800000; - err = xa_nn_vec_relu_f32_f32(out_data_ptr, inp_data_ptr, f32_pos_inf, - flat_size); + err = xa_nn_vec_relu_std_f32_f32(out_data_ptr, inp_data_ptr, flat_size); - CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_relu1_f32_f32 failed"); + CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_relu_std_f32_f32 failed"); +#else + ReluFloat(GetTensorShape(input), GetTensorData(input), + GetTensorShape(output), GetTensorData(output)); +#endif /* HIFI_VFPU */ return kTfLiteOk; } case kTfLiteInt8: { @@ -140,14 +143,17 @@ TfLiteStatus ReluEval(TfLiteContext* context, TfLiteNode* node) { const RuntimeShape& input_shape = GetTensorShape(input); const RuntimeShape& output_shape = GetTensorShape(output); const int flat_size = MatchingFlatSize(input_shape, output_shape); + const uint8_t zero = input->params.zero_point; inp_data_ptr = GetTensorData(input); out_data_ptr = GetTensorData(output); - err = xa_nn_vec_activation_min_max_asym8_asym8( - out_data_ptr, inp_data_ptr, 0, 255, flat_size); // Is 255 right? + err = xa_nn_vec_activation_min_max_asym8_asym8(out_data_ptr, inp_data_ptr, + zero, + std::numeric_limits::max(), + flat_size); - CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_activation_min_max_8_8 failed"); + CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_activation_min_max_asym8_asym8 failed"); return kTfLiteOk; } default: { @@ -168,6 +174,7 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) { switch (input->type) { case kTfLiteFloat32: { +#if HIFI_VFPU int err; const float* inp_data_ptr; float* out_data_ptr; @@ -180,7 +187,11 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) { err = xa_nn_vec_relu6_f32_f32(out_data_ptr, inp_data_ptr, flat_size); - CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_relu1_f32_f32 failed"); + CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_relu6_f32_f32 failed"); +#else + Relu6Float(GetTensorShape(input), GetTensorData(input), + GetTensorShape(output), GetTensorData(output)); +#endif /* HIFI_VFPU */ return kTfLiteOk; } case kTfLiteInt8: { @@ -209,7 +220,7 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) { err = xa_nn_vec_activation_min_max_asym8_asym8(out_data_ptr, inp_data_ptr, zero, six, flat_size); - CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_activation_min_max_8_8 failed"); + CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_activation_min_max_asym8_asym8 failed"); return kTfLiteOk; } default: { diff --git a/tensorflow/lite/micro/kernels/xtensa_hifi/add.cc b/tensorflow/lite/micro/kernels/xtensa_hifi/add.cc new file mode 100644 index 00000000000..d2bdcdd1192 --- /dev/null +++ b/tensorflow/lite/micro/kernels/xtensa_hifi/add.cc @@ -0,0 +1,279 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/kernels/internal/reference/add.h" + +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/quantization_util.h" +#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h" +#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/kernels/op_macros.h" +#include "tensorflow/lite/micro/memory_helpers.h" +#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h" + +namespace tflite { +namespace ops { +namespace micro { +namespace add { + +constexpr int kInputTensor1 = 0; +constexpr int kInputTensor2 = 1; +constexpr int kOutputTensor = 0; + +struct OpData { + bool requires_broadcast; + + // These fields are used in both the general 8-bit -> 8bit quantized path, + // and the special 16-bit -> 16bit quantized path + int input1_shift; + int input2_shift; + int32 output_activation_min; + int32 output_activation_max; + + // These fields are used only in the general 8-bit -> 8bit quantized path + int32 input1_multiplier; + int32 input2_multiplier; + int32 output_multiplier; + int output_shift; + int left_shift; + int32 input1_offset; + int32 input2_offset; + int32 output_offset; +}; + +TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params, + const TfLiteTensor* input1, + const TfLiteTensor* input2, TfLiteTensor* output, + OpData* data) { + data->requires_broadcast = !HaveSameShapes(input1, input2); + + if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { + // 8bit -> 8bit general quantized path, with general rescalings + data->input1_offset = -input1->params.zero_point; + data->input2_offset = -input2->params.zero_point; + data->output_offset = output->params.zero_point; + data->left_shift = 20; + const double twice_max_input_scale = + 2 * static_cast( + std::max(input1->params.scale, input2->params.scale)); + const double real_input1_multiplier = + static_cast(input1->params.scale) / twice_max_input_scale; + const double real_input2_multiplier = + static_cast(input2->params.scale) / twice_max_input_scale; + const double real_output_multiplier = + twice_max_input_scale / + ((1 << data->left_shift) * static_cast(output->params.scale)); + + QuantizeMultiplierSmallerThanOneExp( + real_input1_multiplier, &data->input1_multiplier, &data->input1_shift); + + QuantizeMultiplierSmallerThanOneExp( + real_input2_multiplier, &data->input2_multiplier, &data->input2_shift); + + QuantizeMultiplierSmallerThanOneExp( + real_output_multiplier, &data->output_multiplier, &data->output_shift); + + TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( + context, params->activation, output, &data->output_activation_min, + &data->output_activation_max)); + } + + return kTfLiteOk; +} + +TfLiteStatus EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params, + const OpData* data, const TfLiteTensor* input1, + const TfLiteTensor* input2, TfLiteTensor* output) { + float output_activation_min, output_activation_max; + CalculateActivationRange(params->activation, &output_activation_min, + &output_activation_max); + tflite::ArithmeticParams op_params; + SetActivationParams(output_activation_min, output_activation_max, &op_params); +#define TF_LITE_ADD(opname) \ + reference_ops::opname(op_params, GetTensorShape(input1), \ + GetTensorData(input1), GetTensorShape(input2), \ + GetTensorData(input2), GetTensorShape(output), \ + GetTensorData(output)) + if (data->requires_broadcast) { + TF_LITE_ADD(BroadcastAdd4DSlow); + } else { +#if HIFI_VFPU + int err; + const RuntimeShape& input1_shape = GetTensorShape(input1); + const RuntimeShape& input2_shape = GetTensorShape(input2); + const RuntimeShape& output_shape = GetTensorShape(output); + const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape); + + err = xa_nn_elm_add_f32xf32_f32(GetTensorData(output), + GetTensorData(input1), + GetTensorData(input2), + flat_size); + + CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_elm_add_f32xf32_f32 failed"); + + err = xa_nn_vec_activation_min_max_f32_f32(GetTensorData(output), + GetTensorData(output), + output_activation_min, + output_activation_max, + flat_size); + + CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_activation_min_max_f32_f32 failed"); +#else + TF_LITE_ADD(Add); +#endif /* HIFI_VFPU */ + } +#undef TF_LITE_ADD + return kTfLiteOk; +} + +TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteAddParams* params, const OpData* data, + const TfLiteTensor* input1, + const TfLiteTensor* input2, + TfLiteTensor* output) { + if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { + tflite::ArithmeticParams op_params; + op_params.left_shift = data->left_shift; + op_params.input1_offset = data->input1_offset; + op_params.input1_multiplier = data->input1_multiplier; + op_params.input1_shift = data->input1_shift; + op_params.input2_offset = data->input2_offset; + op_params.input2_multiplier = data->input2_multiplier; + op_params.input2_shift = data->input2_shift; + op_params.output_offset = data->output_offset; + op_params.output_multiplier = data->output_multiplier; + op_params.output_shift = data->output_shift; + SetActivationParams(data->output_activation_min, + data->output_activation_max, &op_params); + bool need_broadcast = reference_ops::ProcessBroadcastShapes( + GetTensorShape(input1), GetTensorShape(input2), &op_params); +#define TF_LITE_ADD(type, opname, dtype) \ + type::opname(op_params, GetTensorShape(input1), \ + GetTensorData(input1), GetTensorShape(input2), \ + GetTensorData(input2), GetTensorShape(output), \ + GetTensorData(output)); + if (output->type == kTfLiteInt8) { + if (need_broadcast) { + TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t); + } else { + TF_LITE_ADD(reference_integer_ops, Add, int8_t); + } + } else { + if (need_broadcast) { + TF_LITE_ADD(reference_ops, BroadcastAdd4DSlow, uint8_t); + } else { + int err; + const RuntimeShape& input1_shape = GetTensorShape(input1); + const RuntimeShape& input2_shape = GetTensorShape(input2); + const RuntimeShape& output_shape = GetTensorShape(output); + const int flat_size = MatchingElementsSize(input1_shape, input2_shape, output_shape); + + err = xa_nn_elm_add_asym8xasym8_asym8(GetTensorData(output), + op_params.output_offset, + op_params.output_shift, + op_params.output_multiplier, + op_params.quantized_activation_min, + op_params.quantized_activation_max, + GetTensorData(input1) , + op_params.input1_offset, + op_params.input1_shift, + op_params.input1_multiplier, + GetTensorData(input2), + op_params.input2_offset, + op_params.input2_shift, + op_params.input2_multiplier, + op_params.left_shift, + flat_size); + + CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_elm_add_asym8xasym8_asym8 failed"); + } + } +#undef TF_LITE_ADD + } + + return kTfLiteOk; +} + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + void* data = nullptr; + if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == + kTfLiteError) { + return nullptr; + } + return data; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); + const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + OpData* data = static_cast(node->user_data); + auto* params = reinterpret_cast(node->builtin_data); + + TF_LITE_ENSURE_STATUS( + CalculateOpData(context, params, input1, input2, output, data)); + + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + + TFLITE_DCHECK(node->user_data != nullptr); + const OpData* data = static_cast(node->user_data); + + const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); + const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + if (output->type == kTfLiteFloat32) { + TF_LITE_ENSURE_OK(context, EvalAdd(context, node, params, data, input1, + input2, output)); + } else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { + TF_LITE_ENSURE_OK(context, EvalAddQuantized(context, node, params, data, + input1, input2, output)); + } else { + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(output->type), output->type); + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace add + +TfLiteRegistration Register_ADD() { + return {/*init=*/add::Init, + /*free=*/nullptr, + /*prepare=*/add::Prepare, + /*invoke=*/add::Eval, + /*profiling_string=*/nullptr, + /*builtin_code=*/0, + /*custom_name=*/nullptr, + /*version=*/0}; +} + +} // namespace micro +} // namespace ops +} // namespace tflite diff --git a/tensorflow/lite/micro/kernels/xtensa_hifi/conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifi/conv.cc index 5f91282c7e1..c576fb8aef5 100755 --- a/tensorflow/lite/micro/kernels/xtensa_hifi/conv.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifi/conv.cc @@ -1,24 +1,24 @@ -/****************************************************************************** - * Copyright (C) 2019 Cadence Design Systems, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to use this Software with Cadence processor cores only and - * not with any other processors and platforms, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ******************************************************************************/ +/******************************************************************************* +* Copyright (c) 2019-2020 Cadence Design Systems, Inc. +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to use this Software with Cadence processor cores only and +* not with any other processors and platforms, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be included +* in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +******************************************************************************/ /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); @@ -44,7 +44,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" -#include "xtensa_tf_micro_common.h" +#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h" namespace tflite { namespace ops { @@ -55,7 +55,6 @@ constexpr int kInputTensor = 0; constexpr int kFilterTensor = 1; constexpr int kBiasTensor = 2; constexpr int kOutputTensor = 0; -constexpr int kMaxChannels = 256; // Conv is quantized along dimension 0: // https://www.tensorflow.org/lite/performance/quantization_spec @@ -71,9 +70,8 @@ struct OpData { int output_shift; // Per channel output multiplier and shift. - // (b/141139247): Allocate these dynamically when possible. - int32_t per_channel_output_multiplier[kMaxChannels]; - int32_t per_channel_output_shift[kMaxChannels]; + int32_t* per_channel_output_multiplier; + int32_t* per_channel_output_shift; // The range of the fused activation layer. For example for kNone and // uint8_t these would be 0 and 255. @@ -94,10 +92,10 @@ inline PaddingType RuntimePaddingType(TfLitePadding padding) { } TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, - TfLiteConvParams* params, int width, int height, - int filter_width, int filter_height, int out_width, - int out_height, const TfLiteType data_type, - OpData* data) { + const TfLiteConvParams* params, int width, + int height, int filter_width, int filter_height, + int out_width, int out_height, + const TfLiteType data_type, OpData* data) { bool has_bias = node->inputs->size == 3; // Check number of inputs/outputs TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2); @@ -131,21 +129,81 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, return kTfLiteOk; } +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + void* data = nullptr; + if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == + kTfLiteError) { + return nullptr; + } + return data; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + OpData* data = static_cast(node->user_data); + const auto params = static_cast(node->builtin_data); + + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); + + int input_width = input->dims->data[2]; + int input_height = input->dims->data[1]; + int filter_width = filter->dims->data[2]; + int filter_height = filter->dims->data[1]; + int output_width = output->dims->data[2]; + int output_height = output->dims->data[1]; + + // Dynimically allocate per-channel quantization parameters. + const int num_channels = filter->dims->data[kConvQuantizedDimension]; + TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t), + reinterpret_cast(&data->per_channel_output_multiplier))); + TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t), + reinterpret_cast(&data->per_channel_output_shift))); + + // All per-channel quantized tensors need valid zero point and scale arrays. + if (input->type == kTfLiteInt8) { + TF_LITE_ENSURE_EQ(context, filter->quantization.type, + kTfLiteAffineQuantization); + + const auto* affine_quantization = + static_cast(filter->quantization.params); + TF_LITE_ENSURE(context, affine_quantization); + TF_LITE_ENSURE(context, affine_quantization->scale); + TF_LITE_ENSURE(context, affine_quantization->zero_point); + + TF_LITE_ENSURE(context, + affine_quantization->scale->size == 1 || + affine_quantization->scale->size == + filter->dims->data[kConvQuantizedDimension]); + TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, + affine_quantization->zero_point->size); + } + + return CalculateOpData(context, node, params, input_width, input_height, + filter_width, filter_height, output_width, + output_height, input->type, data); +} // namespace conv + TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, - TfLiteConvParams* params, OpData* data, - const TfLiteTensor* input, - const TfLiteTensor* filter, const TfLiteTensor* bias, - TfLiteTensor* im2col, TfLiteTensor* hwcn_weights, - TfLiteTensor* output) { + TfLiteConvParams* params, const OpData& data, + const TfLiteTensor* input, const TfLiteTensor* filter, + const TfLiteTensor* bias, TfLiteTensor* im2col, + TfLiteTensor* hwcn_weights, TfLiteTensor* output) { const int32_t input_offset = -input->params.zero_point; const int32_t filter_offset = -filter->params.zero_point; const int32_t output_offset = output->params.zero_point; if ((params->dilation_width_factor == 1) && (params->dilation_height_factor == 1)) { - const uint8 *input_data, *filter_data; + const uint8_t *input_data, *filter_data; const int32_t* bias_data; - uint8* output_data; + uint8_t* output_data; const RuntimeShape& input_shape = GetTensorShape(input); const RuntimeShape& filter_shape = GetTensorShape(filter); const RuntimeShape& output_shape = GetTensorShape(output); @@ -158,14 +216,12 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, const int stride_width = params->stride_width; const int stride_height = params->stride_height; - const int dilation_width_factor = 1; - const int dilation_height_factor = 1; - const int pad_width = data->padding.width; - const int pad_height = data->padding.height; - const int32 output_activation_min = data->output_activation_min; - const int32 output_activation_max = data->output_activation_max; - const int32 output_multiplier = data->output_multiplier; - const int output_shift = -data->output_shift; + const int pad_width = data.padding.width; + const int pad_height = data.padding.height; + const int32 output_activation_min = data.output_activation_min; + const int32 output_activation_max = data.output_activation_max; + const int32 output_multiplier = data.output_multiplier; + const int output_shift = -data.output_shift; TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); @@ -186,13 +242,14 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, const int filter_depth = filter_shape.Dims(3); int err, output_data_format = 0; - void* p_scratch; - uint8 *p_filter, *p_out_scratch; + uint8_t* p_scratch; + uint8_t *p_filter; // Calculate filter_depth_padded as next near multiple of 4 int filter_depth_padded = (filter_depth + 3) & (~3); int out_length = output_height * output_width * output_depth; + int filter_size_padded = filter_height * filter_width * filter_depth_padded; int required_scratch, input_precision = PREC_ASYM8; - int h, w, c; + int h, c; required_scratch = xa_nn_conv2d_std_getsize( input_height, input_depth, filter_height, filter_width, stride_height, @@ -200,30 +257,20 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, if (required_scratch <= 0) { TF_LITE_KERNEL_LOG(context, - "conv2d_std_asym8: xa_nn_conv2d_std_getsize failed"); + "conv2d_std_asym8: xa_nn_conv2d_std_getsize failed"); return kTfLiteError; } ALLOCATE_XTENSA_NNLIB_SCRATCH_MEM; p_scratch = xtensa_nnlib_scratch_buf; - p_filter = (uint8*)p_scratch; - p_out_scratch = - (p_filter + - ALIGNED_SIZE((sizeof(uint8_t) * filter_height * filter_width * - filter_depth_padded * output_depth), - 8)); - required_scratch += - ALIGNED_SIZE((sizeof(uint8_t) * filter_height * filter_width * - filter_depth_padded * output_depth), - 8); - p_scratch = - (uint8*)(p_out_scratch + ALIGNED_SIZE(sizeof(uint8_t) * out_length, 8)); - required_scratch += ALIGNED_SIZE(sizeof(uint8_t) * out_length, 8); + p_filter = p_scratch; + required_scratch += ALIGNED_SIZE((sizeof(uint8_t) * filter_size_padded * output_depth), 8); + p_scratch += ALIGNED_SIZE(sizeof(uint8_t) * filter_size_padded * output_depth, 8); if (required_scratch > (int)XTENSA_NNLIB_MAX_SCRATCH_SIZE) { TF_LITE_KERNEL_LOG(context, - "conv2d_std_asym8: insufficient scratch memory"); + "conv2d_std_asym8: insufficient scratch memory"); return kTfLiteError; } @@ -231,18 +278,17 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, for (h = 0; h < filter_height * filter_width * output_depth; h++) { for (c = 0; c < filter_depth; c++) { p_filter[h * filter_depth_padded + c] = - filter_data[h * filter_depth + c]; + filter_data[h * filter_depth + c]; } for (c = input_depth; c < filter_depth_padded; c++) { p_filter[h * filter_depth_padded + c] = - -filter_offset; // filter_depth[h*input_depth + c]; + -filter_offset; // filter_depth[h*input_depth + c]; } } for (int batch = 0; batch < batches; ++batch) { - uint8* p_out_temp; - p_out_temp = (uint8*)&p_out_scratch[0]; - p_out_temp = (uint8*)ALIGN_PTR(p_out_temp, 8); + uint8_t* p_out_temp; + p_out_temp = &output_data[batch * out_length]; err = xa_nn_conv2d_std_asym8xasym8( p_out_temp, @@ -252,24 +298,26 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, filter_width, output_depth, stride_width, stride_height, pad_width, pad_height, output_height, output_width, input_offset, filter_offset, output_multiplier, output_shift, output_offset, output_data_format, - p_scratch); + static_cast(p_scratch)); CHECK_ERR_HIFI_NNLIB_KER( err, "conv2d_std_asym8: xa_nn_conv2d_std_asym8xasym8 failed"); - for (int i = 0; i < out_length; i++) { - uint8* p_temp; - p_temp = &output_data[batch * out_length]; + err = xa_nn_vec_activation_min_max_asym8_asym8(p_out_temp, + p_out_temp, + output_activation_min, + output_activation_max, + out_length); - ACTIVATION_MIN_MAX_ASYM8(p_temp[i], p_out_temp[i], - output_activation_min, output_activation_max) - } + CHECK_ERR_HIFI_NNLIB_KER( + err, "xa_nn_vec_activation_min_max_asym8_asym8 failed"); } } else { + // TODO(b/154032858): Investigate removing extra copies. ConvParams op_params; op_params.padding_type = RuntimePaddingType(params->padding); - op_params.padding_values.width = data->padding.width; - op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data.padding.width; + op_params.padding_values.height = data.padding.height; op_params.stride_width = params->stride_width; op_params.stride_height = params->stride_height; op_params.dilation_width_factor = params->dilation_width_factor; @@ -277,10 +325,10 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, op_params.input_offset = input_offset; op_params.weights_offset = filter_offset; op_params.output_offset = output_offset; - op_params.output_multiplier = data->output_multiplier; - op_params.output_shift = -data->output_shift; - op_params.quantized_activation_min = data->output_activation_min; - op_params.quantized_activation_max = data->output_activation_max; + op_params.output_multiplier = data.output_multiplier; + op_params.output_shift = -data.output_shift; + op_params.quantized_activation_min = data.output_activation_min; + op_params.quantized_activation_max = data.output_activation_max; reference_ops::Conv(op_params, GetTensorShape(input), GetTensorData(input), GetTensorShape(filter), GetTensorData(filter), GetTensorShape(bias), @@ -292,11 +340,12 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, } void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, - TfLiteConvParams* params, OpData* data, + TfLiteConvParams* params, const OpData& data, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output, TfLiteTensor* im2col) { + // TODO(b/154032858): Investigate removing extra copies. ConvParams op_params; op_params.input_offset = -input->params.zero_point; op_params.output_offset = output->params.zero_point; @@ -304,14 +353,14 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, op_params.stride_width = params->stride_width; op_params.dilation_height_factor = params->dilation_height_factor; op_params.dilation_width_factor = params->dilation_width_factor; - op_params.padding_values.height = data->padding.height; - op_params.padding_values.width = data->padding.width; - op_params.quantized_activation_min = data->output_activation_min; - op_params.quantized_activation_max = data->output_activation_max; + op_params.padding_values.height = data.padding.height; + op_params.padding_values.width = data.padding.width; + op_params.quantized_activation_min = data.output_activation_min; + op_params.quantized_activation_max = data.output_activation_max; reference_integer_ops::ConvPerChannel( - op_params, data->per_channel_output_multiplier, - data->per_channel_output_shift, GetTensorShape(input), + op_params, data.per_channel_output_multiplier, + data.per_channel_output_shift, GetTensorShape(input), GetTensorData(input), GetTensorShape(filter), GetTensorData(filter), GetTensorShape(bias), GetTensorData(bias), GetTensorShape(output), @@ -319,7 +368,7 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, } TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, - TfLiteConvParams* params, OpData* data, + TfLiteConvParams* params, const OpData& data, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* im2col, TfLiteTensor* hwcn_weights, TfLiteTensor* output) { @@ -327,6 +376,7 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, CalculateActivationRange(params->activation, &output_activation_min, &output_activation_max); +#if HIFI_VFPU if ((params->dilation_width_factor == 1) && (params->dilation_height_factor == 1)) { const float *input_data, *filter_data; @@ -344,10 +394,8 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, const int stride_width = params->stride_width; const int stride_height = params->stride_height; - const int dilation_width_factor = 1; - const int dilation_height_factor = 1; - const int pad_width = data->padding.width; - const int pad_height = data->padding.height; + const int pad_width = data.padding.width; + const int pad_height = data.padding.height; TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); @@ -366,13 +414,14 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, const int output_width = output_shape.Dims(2); const int filter_depth = filter_shape.Dims(3); int err, output_data_format = 0; - void* p_scratch; - float *p_filter, *p_out_scratch; + uint8_t* p_scratch; + float *p_filter; // Calculate filter_depth_padded as next near multiple of 2 int filter_depth_padded = (filter_depth + 1) & (~1); int out_length = output_height * output_width * output_depth; + int filter_size_padded = filter_height * filter_width * filter_depth_padded; int required_scratch, input_precision = PREC_F32; - int h, w, c; + int h, c; required_scratch = xa_nn_conv2d_std_getsize( input_height, input_depth, filter_height, filter_width, stride_height, @@ -380,30 +429,20 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, if (required_scratch <= 0) { TF_LITE_KERNEL_LOG(context, - "conv2d_std_f32: xa_nn_conv2d_std_getsize failed"); + "conv2d_std_f32: xa_nn_conv2d_std_getsize failed"); return kTfLiteError; } ALLOCATE_XTENSA_NNLIB_SCRATCH_MEM; p_scratch = xtensa_nnlib_scratch_buf; - p_filter = (float*)p_scratch; - p_out_scratch = - (float*)((uint8_t*)p_filter + - ALIGNED_SIZE((sizeof(float) * filter_height * filter_width * - filter_depth_padded * output_depth), - 8)); - required_scratch += - ALIGNED_SIZE((sizeof(float) * filter_height * filter_width * - filter_depth_padded * output_depth), - 8); - p_scratch = (float*)((uint8_t*)p_out_scratch + - ALIGNED_SIZE(sizeof(float) * out_length, 8)); - required_scratch += ALIGNED_SIZE(sizeof(float) * out_length, 8); + p_filter = reinterpret_cast(p_scratch); + p_scratch += ALIGNED_SIZE((sizeof(float) * filter_size_padded * output_depth), 8); + required_scratch += ALIGNED_SIZE((sizeof(float) * filter_size_padded * output_depth), 8); if (required_scratch > (int)XTENSA_NNLIB_MAX_SCRATCH_SIZE) { TF_LITE_KERNEL_LOG(context, - "conv2d_std_f32: insufficient scratch memory"); + "conv2d_std_f32: insufficient scratch memory"); return kTfLiteError; } @@ -411,7 +450,7 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, for (h = 0; h < filter_height * filter_width * output_depth; h++) { for (c = 0; c < filter_depth; c++) { p_filter[h * filter_depth_padded + c] = - filter_data[h * filter_depth + c]; + filter_data[h * filter_depth + c]; } for (c = input_depth; c < filter_depth_padded; c++) { p_filter[h * filter_depth_padded + c] = 0; @@ -420,8 +459,7 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, for (int batch = 0; batch < batches; ++batch) { float* p_out_temp; - p_out_temp = (float*)&p_out_scratch[0]; - p_out_temp = (float*)ALIGN_PTR(p_out_temp, 8); + p_out_temp = &output_data[batch * out_length]; err = xa_nn_conv2d_std_f32( p_out_temp, @@ -429,23 +467,29 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, p_filter, bias_data, input_height, input_width, input_depth, filter_height, filter_width, output_depth, stride_width, stride_height, pad_width, pad_height, output_height, output_width, - output_data_format, p_scratch); + output_data_format, static_cast(p_scratch)); CHECK_ERR_HIFI_NNLIB_KER( err, "conv2d_std_f32: xa_nn_conv2d_std_f32xf32 failed"); - for (int i = 0; i < out_length; i++) { - float* p_temp; - p_temp = &output_data[batch * out_length]; - ACTIVATION_MIN_MAX(float, p_temp[i], p_out_temp[i], - output_activation_min, output_activation_max) - } + err = xa_nn_vec_activation_min_max_f32_f32(p_out_temp, + p_out_temp, + output_activation_min, + output_activation_max, + out_length); + + CHECK_ERR_HIFI_NNLIB_KER( + err, "xa_nn_vec_activation_min_max_f32_f32 failed"); } - } else { + } + else +#endif /* HIFI_VFPU */ + { + // TODO(b/154032858): Investigate removing extra copies. ConvParams op_params; op_params.padding_type = RuntimePaddingType(params->padding); - op_params.padding_values.width = data->padding.width; - op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data.padding.width; + op_params.padding_values.height = data.padding.height; op_params.stride_width = params->stride_width; op_params.stride_height = params->stride_height; op_params.dilation_width_factor = params->dilation_width_factor; @@ -471,50 +515,20 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); - int input_width = input->dims->data[2]; - int input_height = input->dims->data[1]; - int filter_width = filter->dims->data[2]; - int filter_height = filter->dims->data[1]; - int output_width = output->dims->data[2]; - int output_height = output->dims->data[1]; - - OpData data; - - // All per-channel quantized tensors need valid zero point and scale arrays. - if (input->type == kTfLiteInt8) { - TF_LITE_ENSURE_EQ(context, filter->quantization.type, - kTfLiteAffineQuantization); - - const auto* affine_quantization = - reinterpret_cast( - filter->quantization.params); - TF_LITE_ENSURE(context, affine_quantization); - TF_LITE_ENSURE(context, affine_quantization->scale); - TF_LITE_ENSURE(context, affine_quantization->zero_point); - - TF_LITE_ENSURE(context, - affine_quantization->scale->size == 1 || - affine_quantization->scale->size == - filter->dims->data[kConvQuantizedDimension]); - TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, - affine_quantization->zero_point->size); - } - - TF_LITE_ENSURE_STATUS(CalculateOpData( - context, node, params, input_width, input_height, filter_width, - filter_height, output_width, output_height, input->type, &data)); + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); switch (input->type) { // Already know in/out types are same. case kTfLiteFloat32: - EvalFloat(context, node, params, &data, input, filter, bias, nullptr, + EvalFloat(context, node, params, data, input, filter, bias, nullptr, nullptr, output); break; case kTfLiteInt8: - EvalQuantizedPerChannel(context, node, params, &data, input, filter, bias, + EvalQuantizedPerChannel(context, node, params, data, input, filter, bias, output, nullptr); break; case kTfLiteUInt8: - EvalQuantized(context, node, params, &data, input, filter, bias, nullptr, + EvalQuantized(context, node, params, data, input, filter, bias, nullptr, nullptr, output); break; default: @@ -528,9 +542,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace conv TfLiteRegistration Register_CONV_2D() { - return {/*init=*/nullptr, + return {/*init=*/conv::Init, /*free=*/nullptr, - /*prepare=*/nullptr, + /*prepare=*/conv::Prepare, /*invoke=*/conv::Eval, /*profiling_string=*/nullptr, /*builtin_code=*/0, diff --git a/tensorflow/lite/micro/kernels/xtensa_hifi/depthwise_conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifi/depthwise_conv.cc index e01a5916fca..fe3aaf5ed0c 100755 --- a/tensorflow/lite/micro/kernels/xtensa_hifi/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifi/depthwise_conv.cc @@ -1,24 +1,24 @@ -/****************************************************************************** - * Copyright (C) 2019 Cadence Design Systems, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to use this Software with Cadence processor cores only and - * not with any other processors and platforms, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ******************************************************************************/ +/******************************************************************************* +* Copyright (c) 2019-2020 Cadence Design Systems, Inc. +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to use this Software with Cadence processor cores only and +* not with any other processors and platforms, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be included +* in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +******************************************************************************/ /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); @@ -45,7 +45,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" -#include "xtensa_tf_micro_common.h" +#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h" namespace tflite { namespace ops { @@ -57,8 +57,6 @@ constexpr int kInputTensor = 0; constexpr int kFilterTensor = 1; constexpr int kBiasTensor = 2; constexpr int kOutputTensor = 0; -// Per channel quantization is not needed for any model on xtensa. -constexpr int kMaxChannels = 256; // Depthwise conv is quantized along dimension 3: // https://www.tensorflow.org/lite/performance/quantization_spec @@ -72,10 +70,8 @@ struct OpData { int output_shift; // Per channel output multiplier and shift. - // (b/141139247): Allocate these dynamically when possible. - int32_t per_channel_output_multiplier[kMaxChannels]; - int32_t per_channel_output_shift[kMaxChannels]; - + int32_t* per_channel_output_multiplier; + int32_t* per_channel_output_shift; // The range of the fused activation layer. For example for kNone and // uint8_t these would be 0 and 255. int32_t output_activation_min; @@ -107,26 +103,88 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, TfLiteTensor* output = GetOutput(context, node, kOutputTensor); int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; - TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams( + return tflite::PopulateConvolutionQuantizationParams( context, input, filter, bias, output, params->activation, &data->output_multiplier, &data->output_shift, &data->output_activation_min, &data->output_activation_max, data->per_channel_output_multiplier, - reinterpret_cast(data->per_channel_output_shift), num_channels)); + reinterpret_cast(data->per_channel_output_shift), num_channels); } return kTfLiteOk; } } // namespace +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + void* data = nullptr; + if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == + kTfLiteError) { + return nullptr; + } + return data; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + auto* params = + reinterpret_cast(node->builtin_data); + OpData* data = static_cast(node->user_data); + + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + const TfLiteTensor* filter = GetInput(context, node, kFilterTensor); + + const TfLiteType data_type = input->type; + int width = SizeOfDimension(input, 2); + int height = SizeOfDimension(input, 1); + int filter_width = SizeOfDimension(filter, 2); + int filter_height = SizeOfDimension(filter, 1); + + // Per channel quantization is only needed for int8 inference. For other + // quantized types, only a single scale and zero point is needed. + const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension]; + // Dynimically allocate per-channel quantization parameters. + TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t), + reinterpret_cast(&data->per_channel_output_multiplier))); + TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer( + context, num_channels * sizeof(int32_t), + reinterpret_cast(&data->per_channel_output_shift))); + + // All per-channel quantized tensors need valid zero point and scale arrays. + if (input->type == kTfLiteInt8) { + TF_LITE_ENSURE_EQ(context, filter->quantization.type, + kTfLiteAffineQuantization); + + const auto* affine_quantization = + reinterpret_cast( + filter->quantization.params); + TF_LITE_ENSURE(context, affine_quantization); + TF_LITE_ENSURE(context, affine_quantization->scale); + TF_LITE_ENSURE(context, affine_quantization->zero_point); + TF_LITE_ENSURE( + context, affine_quantization->scale->size == 1 || + affine_quantization->scale->size == + filter->dims->data[kDepthwiseConvQuantizedDimension]); + TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, + affine_quantization->zero_point->size); + } + + return CalculateOpData(context, node, params, width, height, filter_width, + filter_height, data_type, data); +} + TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, - TfLiteDepthwiseConvParams* params, OpData* data, + TfLiteDepthwiseConvParams* params, const OpData* data, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRange(params->activation, &output_activation_min, &output_activation_max); +#if HIFI_VFPU if ((params->dilation_width_factor == 1) && (params->dilation_height_factor == 1)) { const float *input_data, *filter_data, *bias_data; @@ -143,10 +201,6 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, const int stride_width = params->stride_width; const int stride_height = params->stride_height; - const int dilation_width_factor = 1; - const int dilation_height_factor = 1; - // const int dilation_width_factor = params->dilation_width_factor;; - // const int dilation_height_factor = params->dilation_height_factor; const int pad_width = data->padding.width; const int pad_height = data->padding.height; const int depth_multiplier = params->depth_multiplier; @@ -168,7 +222,7 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); int32_t err, input_data_format = 0, output_data_format = 0; - void* p_scratch; + uint8_t* p_scratch; float* p_filter; int filter_depth_padded, filter_size_padded, required_scratch; int input_precision = PREC_F32; @@ -194,18 +248,17 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, required_scratch += ALIGNED_SIZE(sizeof(float) * filter_size_padded, 8); if (required_scratch > (int)XTENSA_NNLIB_MAX_SCRATCH_SIZE) { TF_LITE_KERNEL_LOG(context, - "DepthwiseConvFloat: insufficient scratch memory"); + "DepthwiseConvFloat: insufficient scratch memory"); return kTfLiteError; } - p_filter = (float*)p_scratch; - p_scratch = (void*)((uint8_t*)p_filter + - ALIGNED_SIZE(sizeof(float) * filter_size_padded, 8)); + p_filter = reinterpret_cast(p_scratch); + p_scratch += ALIGNED_SIZE(sizeof(float) * filter_size_padded, 8); for (h = 0; h < filter_height * filter_width; h++) { for (c = 0; c < filter_depth; c++) { p_filter[h * filter_depth_padded + c] = - filter_data[h * filter_depth + c]; + filter_data[h * filter_depth + c]; } for (c = filter_depth; c < filter_depth_padded; c++) { p_filter[h * filter_depth_padded + c] = 0; @@ -220,37 +273,23 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, input_height, input_width, input_depth, filter_height, filter_width, depth_multiplier, stride_width, stride_height, pad_width, pad_height, output_height, output_width, input_data_format, output_data_format, - p_scratch); + static_cast(p_scratch)); CHECK_ERR_HIFI_NNLIB_KER( err, "DepthwiseConvFloat: xa_nn_conv2d_depthwise_f32 failed"); } - // pre loop for activation_min_max to handle alignment int out_length = batches * output_height * output_width * output_depth; - uint32 p_unalign_val = (uint32)output_data, p_align_val; - p_align_val = (p_unalign_val + 7) & (~7); + err = xa_nn_vec_activation_min_max_f32_f32(output_data, output_data, + output_activation_min, + output_activation_max, out_length); - int pre_loop_count = p_align_val - p_unalign_val; - pre_loop_count = MIN(pre_loop_count, out_length); - - for (i = 0; i < pre_loop_count; i++) { - ACTIVATION_MIN_MAX(float, output_data[i], output_data[i], - output_activation_min, output_activation_max) - } - - out_length = out_length - pre_loop_count; - - if (out_length) { - err = xa_nn_vec_activation_min_max_f32_f32( - &output_data[i], &output_data[i], output_activation_min, - output_activation_max, out_length); - - CHECK_ERR_HIFI_NNLIB_KER( - err, - "DepthwiseConvFloat: xa_nn_vec_activation_min_max_f32_f32 failed"); - } - } else { + CHECK_ERR_HIFI_NNLIB_KER(err, + "DepthwiseConvFloat: xa_nn_vec_activation_min_max_f32_f32 failed"); + } + else +#endif /* HIFI_VFPU */ + { tflite::DepthwiseParams op_params; // Padding type is ignored, but still set. op_params.padding_type = PaddingType::kSame; @@ -267,15 +306,15 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, tflite::reference_ops::DepthwiseConv( op_params, GetTensorShape(input), GetTensorData(input), GetTensorShape(filter), GetTensorData(filter), - GetTensorShape(bias), GetTensorData(bias), - GetTensorShape(output), GetTensorData(output)); + GetTensorShape(bias), GetTensorData(bias), GetTensorShape(output), + GetTensorData(output)); } return kTfLiteOk; } void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, - TfLiteDepthwiseConvParams* params, OpData* data, - const TfLiteTensor* input, + TfLiteDepthwiseConvParams* params, + const OpData* data, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output) { DepthwiseParams op_params; @@ -290,7 +329,7 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, op_params.input_offset = -input->params.zero_point; op_params.weights_offset = 0; op_params.output_offset = output->params.zero_point; - // (b/130439627): Use calculated value for clamping. + // TODO(b/130439627): Use calculated value for clamping. op_params.quantized_activation_min = std::numeric_limits::min(); op_params.quantized_activation_max = std::numeric_limits::max(); @@ -304,19 +343,18 @@ void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node, } TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, - TfLiteDepthwiseConvParams* params, OpData* data, - const TfLiteTensor* input, - const TfLiteTensor* filter, const TfLiteTensor* bias, - TfLiteTensor* output) { + TfLiteDepthwiseConvParams* params, const OpData* data, + const TfLiteTensor* input, const TfLiteTensor* filter, + const TfLiteTensor* bias, TfLiteTensor* output) { const int32_t input_offset = -input->params.zero_point; const int32_t filter_offset = -filter->params.zero_point; const int32_t output_offset = output->params.zero_point; if ((params->dilation_width_factor == 1) && (params->dilation_height_factor == 1)) { - const uint8 *input_data, *filter_data; + const uint8_t *input_data, *filter_data; const int32_t* bias_data; - uint8* output_data; + uint8_t* output_data; const RuntimeShape& input_shape = GetTensorShape(input); const RuntimeShape& filter_shape = GetTensorShape(filter); const RuntimeShape& output_shape = GetTensorShape(output); @@ -329,10 +367,6 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, const int stride_width = params->stride_width; const int stride_height = params->stride_height; - const int dilation_width_factor = 1; - const int dilation_height_factor = 1; - // const int dilation_width_factor = params->dilation_width_factor; - // const int dilation_height_factor = params->dilation_height_factor; const int pad_width = data->padding.width; const int pad_height = data->padding.height; const int depth_multiplier = params->depth_multiplier; @@ -360,11 +394,11 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); int32_t err, i, input_data_format = 0, output_data_format = 0; - void* p_scratch; - uint8* p_filter; + uint8_t* p_scratch; + uint8_t* p_filter; int filter_depth_padded, filter_size_padded, required_scratch; int input_precision = PREC_ASYM8; - int h, c; + int h; ALLOCATE_XTENSA_NNLIB_SCRATCH_MEM; p_scratch = xtensa_nnlib_scratch_buf; @@ -386,22 +420,17 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, if (required_scratch > (int)XTENSA_NNLIB_MAX_SCRATCH_SIZE) { TF_LITE_KERNEL_LOG(context, - "DepthwiseConvAsym8: insufficient scratch memory"); + "DepthwiseConvAsym8: insufficient scratch memory"); return kTfLiteError; } - p_filter = (uint8*)p_scratch; - p_scratch = (void*)(p_filter + - ALIGNED_SIZE(sizeof(uint8_t) * filter_size_padded, 8)); + p_filter = p_scratch; + p_scratch += ALIGNED_SIZE(sizeof(uint8_t) * filter_size_padded, 8); + int pad_value = filter_depth_padded - filter_depth; for (h = 0; h < filter_height * filter_width; h++) { - for (c = 0; c < filter_depth; c++) { - p_filter[h * filter_depth_padded + c] = - filter_data[h * filter_depth + c]; - } - for (c = filter_depth; c < filter_depth_padded; c++) { - p_filter[h * filter_depth_padded + c] = -filter_offset; - } + memcpy(&p_filter[h*filter_depth_padded], &filter_data[h*filter_depth], filter_depth); + memset(&p_filter[h*filter_depth_padded + filter_depth], -filter_offset, pad_value); } for (i = 0; i < batches; i++) { @@ -413,37 +442,24 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, depth_multiplier, stride_width, stride_height, pad_width, pad_height, output_height, output_width, input_offset, filter_offset, output_multiplier, output_shift, output_offset, input_data_format, - output_data_format, p_scratch); + output_data_format, static_cast(p_scratch)); CHECK_ERR_HIFI_NNLIB_KER( err, "DepthwiseConvAsym8: xa_nn_conv2d_depthwise_asym8xasym8 failed"); } - // pre loop for activation_min_max to handle alignment int out_length = batches * output_height * output_width * output_depth; - uint32 p_unalign_val = (uint32)output_data, p_align_val; - p_align_val = (p_unalign_val + 7) & (~7); + err = xa_nn_vec_activation_min_max_asym8_asym8(output_data, + output_data, + output_activation_min, + output_activation_max, + out_length); - int pre_loop_count = p_align_val - p_unalign_val; - pre_loop_count = MIN(pre_loop_count, out_length); + CHECK_ERR_HIFI_NNLIB_KER( + err, + "DepthwiseConvAsym8: xa_nn_vec_activation_min_max_asym8_asym8 " + "failed"); - for (i = 0; i < pre_loop_count; i++) { - ACTIVATION_MIN_MAX_ASYM8(output_data[i], output_data[i], - output_activation_min, output_activation_max) - } - - out_length = out_length - pre_loop_count; - - if (out_length > 0) { - err = xa_nn_vec_activation_min_max_asym8_asym8( - &output_data[i], &output_data[i], output_activation_min, - output_activation_max, out_length); - - CHECK_ERR_HIFI_NNLIB_KER( - err, - "DepthwiseConvAsym8: xa_nn_vec_activation_min_max_asym8_asym8 " - "failed"); - } } else { tflite::DepthwiseParams op_params; // Padding type is ignored, but still set. @@ -474,8 +490,12 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + auto* params = reinterpret_cast(node->builtin_data); + const OpData& data = *(static_cast(node->user_data)); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); const TfLiteTensor* input = GetInput(context, node, kInputTensor); @@ -483,38 +503,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* bias = (NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr; - const TfLiteType data_type = input->type; - int width = SizeOfDimension(input, 2); - int height = SizeOfDimension(input, 1); - int filter_width = SizeOfDimension(filter, 2); - int filter_height = SizeOfDimension(filter, 1); - - OpData data; - - // All per-channel quantized tensors need valid zero point and scale arrays. - if (input->type == kTfLiteInt8) { - TF_LITE_ENSURE_EQ(context, filter->quantization.type, - kTfLiteAffineQuantization); - - const auto* affine_quantization = - reinterpret_cast( - filter->quantization.params); - TF_LITE_ENSURE(context, affine_quantization); - TF_LITE_ENSURE(context, affine_quantization->scale); - TF_LITE_ENSURE(context, affine_quantization->zero_point); - TF_LITE_ENSURE( - context, affine_quantization->scale->size == 1 || - affine_quantization->scale->size == - filter->dims->data[kDepthwiseConvQuantizedDimension]); - TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, - affine_quantization->zero_point->size); - } - - TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, width, height, - filter_width, filter_height, data_type, - &data)); - - // (aselle): Consider whether float conv and quantized conv should be + // TODO(aselle): Consider whether float conv and quantized conv should be // separate ops to avoid dispatch overhead here. switch (input->type) { // Already know in/out types are same. case kTfLiteFloat32: @@ -538,9 +527,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace depthwise_conv TfLiteRegistration Register_DEPTHWISE_CONV_2D() { - return {/*init=*/nullptr, + return {/*init=*/depthwise_conv::Init, /*free=*/nullptr, - /*prepare=*/nullptr, + /*prepare=*/depthwise_conv::Prepare, /*invoke=*/depthwise_conv::Eval, /*profiling_string=*/nullptr, /*builtin_code=*/0, diff --git a/tensorflow/lite/micro/kernels/xtensa_hifi/floor.cc b/tensorflow/lite/micro/kernels/xtensa_hifi/floor.cc index 0e597465260..44aac921c87 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifi/floor.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifi/floor.cc @@ -1,24 +1,24 @@ -/****************************************************************************** - * Copyright (C) 2019 Cadence Design Systems, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to use this Software with Cadence processor cores only and - * not with any other processors and platforms, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ******************************************************************************/ +/******************************************************************************* +* Copyright (c) 2019-2020 Cadence Design Systems, Inc. +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to use this Software with Cadence processor cores only and +* not with any other processors and platforms, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be included +* in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +******************************************************************************/ /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); @@ -39,7 +39,7 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" -#include "xtensa_tf_micro_common.h" +#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h" namespace tflite { namespace ops { @@ -53,6 +53,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input = GetInput(context, node, kInputTensor); TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); +#if HIFI_VFPU int err; const float* inp_data_ptr; float* out_data_ptr; @@ -66,6 +67,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { err = xa_nn_elm_floor_f32_f32(out_data_ptr, inp_data_ptr, flat_size); CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_elm_floor_f32_f32 failed"); +#else + reference_ops::Floor(GetTensorShape(input), GetTensorData(input), + GetTensorShape(output), GetTensorData(output)); +#endif /* HIFI_VFPU */ return kTfLiteOk; } } // namespace floor diff --git a/tensorflow/lite/micro/kernels/xtensa_hifi/fully_connected.cc b/tensorflow/lite/micro/kernels/xtensa_hifi/fully_connected.cc index 74d148200cb..b070aad2bf5 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifi/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifi/fully_connected.cc @@ -1,24 +1,24 @@ -/****************************************************************************** - * Copyright (C) 2019 Cadence Design Systems, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to use this Software with Cadence processor cores only and - * not with any other processors and platforms, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ******************************************************************************/ +/******************************************************************************* +* Copyright (c) 2019-2020 Cadence Design Systems, Inc. +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to use this Software with Cadence processor cores only and +* not with any other processors and platforms, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be included +* in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +******************************************************************************/ /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); @@ -43,7 +43,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" -#include "xtensa_tf_micro_common.h" +#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h" namespace tflite { namespace ops { @@ -70,7 +70,7 @@ constexpr int kBiasTensor = 2; constexpr int kOutputTensor = 0; TfLiteStatus CalculateOpData(TfLiteContext* context, - TfLiteFullyConnectedParams* params, + TfLiteFusedActivation activation, TfLiteType data_type, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output, @@ -84,7 +84,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, QuantizeMultiplier(real_multiplier, &data->output_multiplier, &exponent); data->output_shift = -exponent; TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( - context, params->activation, output, &data->output_activation_min, + context, activation, output, &data->output_activation_min, &data->output_activation_max)); } return status; @@ -92,20 +92,50 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, } // namespace +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + void* data = nullptr; + if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == + kTfLiteError) { + return nullptr; + } + return data; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TFLITE_DCHECK(node->user_data != nullptr); + TFLITE_DCHECK(node->builtin_data != nullptr); + + OpData* data = static_cast(node->user_data); + const auto params = + static_cast(node->builtin_data); + + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor); + const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + TF_LITE_ENSURE_EQ(context, input->type, output->type); + TF_LITE_ENSURE_MSG(context, input->type == filter->type, + "Hybrid models are not supported on TFLite Micro."); + + return CalculateOpData(context, params->activation, input->type, input, + filter, bias, output, data); +} + TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, - TfLiteFullyConnectedParams* params, OpData* data, - const TfLiteTensor* input, + const OpData& data, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output) { - FullyConnectedParams op_params; + tflite::FullyConnectedParams op_params; op_params.input_offset = -input->params.zero_point; op_params.weights_offset = -filter->params.zero_point; op_params.output_offset = output->params.zero_point; - op_params.output_multiplier = data->output_multiplier; - // (b/138810107): Figure out whether output shift should be inverted - op_params.output_shift = -data->output_shift; - op_params.quantized_activation_min = data->output_activation_min; - op_params.quantized_activation_max = data->output_activation_max; + op_params.output_multiplier = data.output_multiplier; + // TODO(b/138810107): Figure out whether output shift should be inverted + op_params.output_shift = -data.output_shift; + op_params.quantized_activation_min = data.output_activation_min; + op_params.quantized_activation_max = data.output_activation_max; reference_integer_ops::FullyConnected( op_params, GetTensorShape(input), GetTensorData(input), @@ -116,8 +146,7 @@ TfLiteStatus EvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node, } TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, - TfLiteFullyConnectedParams* params, OpData* data, - const TfLiteTensor* input, + const OpData& data, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output) { const int32_t input_offset = -input->params.zero_point; @@ -128,11 +157,11 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, op_params.input_offset = input_offset; op_params.weights_offset = filter_offset; op_params.output_offset = output_offset; - op_params.output_multiplier = data->output_multiplier; + op_params.output_multiplier = data.output_multiplier; // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. - op_params.output_shift = -data->output_shift; - op_params.quantized_activation_min = data->output_activation_min; - op_params.quantized_activation_max = data->output_activation_max; + op_params.output_shift = -data.output_shift; + op_params.quantized_activation_min = data.output_activation_min; + op_params.quantized_activation_max = data.output_activation_max; #define TF_LITE_FULLY_CONNECTED(output_data_type) \ reference_ops::FullyConnected( \ @@ -162,11 +191,14 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, CHECK_ERR_HIFI_NNLIB_KER( ret, "xa_nn_fully_connected_asym8xasym8_asym8 failed"); } - for (int i = 0; i < batches * out_depth; i++) { - ACTIVATION_MIN_MAX_ASYM8(p_out[i], p_out[i], - data->output_activation_min, - data->output_activation_max) - } + ret = xa_nn_vec_activation_min_max_asym8_asym8(p_out, + p_out, + data.output_activation_min, + data.output_activation_max, + batches*out_depth); + + CHECK_ERR_HIFI_NNLIB_KER( + ret, "xa_nn_vec_activation_min_max_asym8_asym8 failed"); break; } case kTfLiteInt16: @@ -182,15 +214,16 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, } TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, - TfLiteFullyConnectedParams* params, OpData* data, + TfLiteFusedActivation activation, const TfLiteTensor* input, const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output) { float output_activation_min, output_activation_max; - CalculateActivationRange(params->activation, &output_activation_min, + CalculateActivationRange(activation, &output_activation_min, &output_activation_max); tflite::FullyConnectedParams op_params; op_params.float_activation_min = output_activation_min; op_params.float_activation_max = output_activation_max; +#if HIFI_VFPU int ret, b, weight_depth, out_depth, batches; weight_depth = GetTensorShape(filter).Dims(GetTensorShape(filter).DimensionsCount() - 1); @@ -208,43 +241,52 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, CHECK_ERR_HIFI_NNLIB_KER(ret, "xa_nn_fully_connected_f32 failed."); } float* p_out = GetTensorData(output); - for (int i = 0; i < batches * out_depth; i++) { - ACTIVATION_MIN_MAX(float, p_out[i], p_out[i], output_activation_min, - output_activation_max) - } + ret = xa_nn_vec_activation_min_max_f32_f32(p_out, + p_out, + output_activation_min, + output_activation_max, + batches*out_depth + ); + CHECK_ERR_HIFI_NNLIB_KER( + ret, "xa_nn_vec_activation_min_max_f32_f32 failed"); +#else + tflite::reference_ops::FullyConnected( + op_params, GetTensorShape(input), GetTensorData(input), + GetTensorShape(filter), GetTensorData(filter), + GetTensorShape(bias), GetTensorData(bias), GetTensorShape(output), + GetTensorData(output)); +#endif /* HIFI_VFPU */ return kTfLiteOk; } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - auto* params = - reinterpret_cast(node->builtin_data); + TFLITE_DCHECK(node->builtin_data != nullptr); + const auto* params = + static_cast(node->builtin_data); const TfLiteTensor* input = GetInput(context, node, kInputTensor); const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor); const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - TfLiteType data_type = input->type; - OpData local_data_object; - OpData* data = &local_data_object; - TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, data_type, input, - filter, bias, output, data)); + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); - switch (filter->type) { // Already know in/out types are same. + // Checks in Prepare ensure input, output and filter types are all the same. + switch (input->type) { case kTfLiteFloat32: - return EvalFloat(context, node, params, data, input, filter, bias, + return EvalFloat(context, node, params->activation, input, filter, bias, output); case kTfLiteInt8: - return EvalQuantizedInt8(context, node, params, data, input, filter, bias, + return EvalQuantizedInt8(context, node, data, input, filter, bias, output); case kTfLiteUInt8: - return EvalQuantized(context, node, params, data, input, filter, bias, - output); + return EvalQuantized(context, node, data, input, filter, bias, output); default: TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", - TfLiteTypeGetName(filter->type), filter->type); + TfLiteTypeGetName(input->type), input->type); return kTfLiteError; } return kTfLiteOk; @@ -253,9 +295,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace fully_connected TfLiteRegistration Register_FULLY_CONNECTED() { - return {/*init=*/nullptr, + return {/*init=*/fully_connected::Init, /*free=*/nullptr, - /*prepare=*/nullptr, + /*prepare=*/fully_connected::Prepare, /*invoke=*/fully_connected::Eval, /*profiling_string=*/nullptr, /*builtin_code=*/0, diff --git a/tensorflow/lite/micro/kernels/xtensa_hifi/logistic.cc b/tensorflow/lite/micro/kernels/xtensa_hifi/logistic.cc index aee48a571c1..764bc88ceb1 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifi/logistic.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifi/logistic.cc @@ -1,24 +1,24 @@ -/****************************************************************************** - * Copyright (C) 2019 Cadence Design Systems, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to use this Software with Cadence processor cores only and - * not with any other processors and platforms, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ******************************************************************************/ +/******************************************************************************* +* Copyright (c) 2019-2020 Cadence Design Systems, Inc. +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to use this Software with Cadence processor cores only and +* not with any other processors and platforms, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be included +* in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +******************************************************************************/ /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); @@ -34,32 +34,68 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/lite/kernels/internal/reference/logistic.h" +#include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h" #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/common.h" #include "tensorflow/lite/kernels/internal/quantization_util.h" +#include "tensorflow/lite/kernels/internal/reference/logistic.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/op_macros.h" -#include "xtensa_tf_micro_common.h" +#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h" namespace tflite { namespace ops { namespace micro { namespace activations { - +namespace { constexpr int kInputTensor = 0; constexpr int kOutputTensor = 0; -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { +struct OpData { + int32_t input_zero_point; + int32_t input_range_radius; + int32_t input_multiplier; + int input_left_shift; +}; + +TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node, + OpData* data) { const TfLiteTensor* input = GetInput(context, node, kInputTensor); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE_EQ(context, input->type, output->type); + if (input->type == kTfLiteInt8) { + TF_LITE_ENSURE_EQ(context, output->params.zero_point, + std::numeric_limits::min()); + + static constexpr int kInputIntegerBits = 4; + const double input_real_multiplier = + static_cast(input->params.scale) * + static_cast(1 << (31 - kInputIntegerBits)); + + const double q = std::frexp(input_real_multiplier, &data->input_left_shift); + data->input_multiplier = static_cast(TfLiteRound(q * (1ll << 31))); + + data->input_range_radius = + CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31); + } + return kTfLiteOk; +} +} // namespace + +TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + OpData data; + CalculateArithmeticOpData(context, node, &data); + if (input->type == kTfLiteFloat32) { switch (output->type) { case kTfLiteFloat32: { +#if HIFI_VFPU int err; const float* inp_data_ptr; float* out_data_ptr; @@ -73,6 +109,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { err = xa_nn_vec_sigmoid_f32_f32(out_data_ptr, inp_data_ptr, flat_size); CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_sigmoid_f32_f32 failed"); +#else + reference_ops::Logistic( + GetTensorShape(input), GetTensorData(input), + GetTensorShape(output), GetTensorData(output)); +#endif /* HIFI_VFPU */ return kTfLiteOk; } default: @@ -84,11 +125,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } else if (input->type == kTfLiteInt8) { switch (output->type) { case kTfLiteInt8: { - reference_ops::Logistic( - GetTensorShape(input), GetTensorData(input), - input->params.scale, input->params.zero_point, - GetTensorShape(output), GetTensorData(output), - output->params.scale, output->params.zero_point); + reference_integer_ops::Logistic( + input->params.zero_point, data.input_range_radius, + data.input_multiplier, data.input_left_shift, + NumElements(input->dims), GetTensorData(input), + GetTensorData(output)); return kTfLiteOk; } default: @@ -98,7 +139,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteError; } } else { - // (b/141211002): Also support other data types once we have supported + // TODO(b/141211002): Also support other data types once we have supported // temporary tensors in TFLM. TF_LITE_KERNEL_LOG(context, "Input %s, output %s not supported.", TfLiteTypeGetName(input->type), @@ -114,7 +155,7 @@ TfLiteRegistration Register_LOGISTIC() { return {/*init=*/nullptr, /*free=*/nullptr, /*prepare=*/nullptr, - /*invoke=*/activations::Eval, + /*invoke=*/activations::LogisticEval, /*profiling_string=*/nullptr, /*builtin_code=*/0, /*custom_name=*/nullptr, diff --git a/tensorflow/lite/micro/kernels/xtensa_hifi/mul.cc b/tensorflow/lite/micro/kernels/xtensa_hifi/mul.cc new file mode 100644 index 00000000000..1fb0af60a2e --- /dev/null +++ b/tensorflow/lite/micro/kernels/xtensa_hifi/mul.cc @@ -0,0 +1,235 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/kernels/internal/reference/mul.h" + +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/internal/quantization_util.h" +#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h" +#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h" +#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/micro/memory_helpers.h" +#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h" + +namespace tflite { +namespace ops { +namespace micro { +namespace mul { + +constexpr int kInput1Tensor = 0; +constexpr int kInput2Tensor = 1; +constexpr int kOutputTensor = 0; + +struct OpData { + int32_t output_activation_min; + int32_t output_activation_max; + + int32_t output_multiplier; + int output_shift; +}; + +TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node, + TfLiteMulParams* params, OpData* data) { + const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor); + const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type); + + if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { + TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( + context, params->activation, output, &data->output_activation_min, + &data->output_activation_max)); + + double real_multiplier = static_cast(input1->params.scale) * + static_cast(input2->params.scale) / + static_cast(output->params.scale); + QuantizeMultiplier(real_multiplier, &data->output_multiplier, + &data->output_shift); + } + + return kTfLiteOk; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor); + const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + if (output->dims->size == 0) { + return AllocateOutputDimensionsFromInput(context, input1, input2, output); + } + + return kTfLiteOk; +} + +TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteMulParams* params, OpData* data, + const TfLiteTensor* input1, const TfLiteTensor* input2, + TfLiteTensor* output) { + if (output->type == kTfLiteInt8 || output->type == kTfLiteUInt8) { + tflite::ArithmeticParams op_params; + SetActivationParams(data->output_activation_min, + data->output_activation_max, &op_params); + op_params.input1_offset = -input1->params.zero_point; + op_params.input2_offset = -input2->params.zero_point; + op_params.output_offset = output->params.zero_point; + op_params.output_multiplier = data->output_multiplier; + op_params.output_shift = data->output_shift; + bool need_broadcast = reference_ops::ProcessBroadcastShapes( + GetTensorShape(input1), GetTensorShape(input2), &op_params); + +#define TF_LITE_MUL(type, opname, dtype) \ + type::opname(op_params, GetTensorShape(input1), \ + GetTensorData(input1), GetTensorShape(input2), \ + GetTensorData(input2), GetTensorShape(output), \ + GetTensorData(output)); + + if (output->type == kTfLiteInt8) { + if (need_broadcast) { + TF_LITE_MUL(reference_integer_ops, BroadcastMul4DSlow, int8_t); + } else { + TF_LITE_MUL(reference_integer_ops, Mul, int8_t); + } + } else if (output->type == kTfLiteUInt8) { + if (need_broadcast) { + TF_LITE_MUL(reference_ops, BroadcastMul4DSlow, uint8_t); + } else { + int err; + const RuntimeShape& input1_shape = GetTensorShape(input1); + const RuntimeShape& input2_shape = GetTensorShape(input2); + const RuntimeShape& output_shape = GetTensorShape(output); + const int flat_size = MatchingElementsSize(input1_shape, input2_shape, + output_shape); + + err = xa_nn_elm_mul_asym8xasym8_asym8(GetTensorData(output), + op_params.output_offset, + op_params.output_shift, + op_params.output_multiplier, + op_params.quantized_activation_min, + op_params.quantized_activation_max, + GetTensorData(input1) , + op_params.input1_offset, + GetTensorData(input2) , + op_params.input2_offset, + flat_size); + + CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_elm_mul_asym8xasym8_asym8 failed"); + } + } +#undef TF_LITE_MUL + } + return kTfLiteOk; +} + +TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLiteMulParams* params, OpData* data, + const TfLiteTensor* input1, const TfLiteTensor* input2, + TfLiteTensor* output) { + float output_activation_min, output_activation_max; + CalculateActivationRange(params->activation, &output_activation_min, + &output_activation_max); + tflite::ArithmeticParams op_params; + SetActivationParams(output_activation_min, output_activation_max, &op_params); + + bool need_broadcast = reference_ops::ProcessBroadcastShapes( + GetTensorShape(input1), GetTensorShape(input2), &op_params); +#define TF_LITE_MUL(opname) \ + reference_ops::opname(op_params, GetTensorShape(input1), \ + GetTensorData(input1), GetTensorShape(input2), \ + GetTensorData(input2), GetTensorShape(output), \ + GetTensorData(output)); + + if (need_broadcast) { + TF_LITE_MUL(BroadcastMul4DSlow); + } else { +#if HIFI_VFPU + int err; + const RuntimeShape& input1_shape = GetTensorShape(input1); + const RuntimeShape& input2_shape = GetTensorShape(input2); + const RuntimeShape& output_shape = GetTensorShape(output); + const int flat_size = MatchingElementsSize(input1_shape, input2_shape, + output_shape); + + err = xa_nn_elm_mul_f32xf32_f32(GetTensorData(output), + GetTensorData(input1), + GetTensorData(input2), + flat_size); + + CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_elm_mul_f32xf32_f32 failed"); + + err = xa_nn_vec_activation_min_max_f32_f32(GetTensorData(output), + GetTensorData(output), + output_activation_min, + output_activation_max, + flat_size); + + CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_activation_min_max_f32_f32 failed"); +#else + TF_LITE_MUL(Mul); +#endif /* HIFI_VFPU */ + } +#undef TF_LITE_MUL + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->builtin_data); + OpData data; + + const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor); + const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + TF_LITE_ENSURE_STATUS(CalculateOpData(context, node, params, &data)); + + switch (input1->type) { + case kTfLiteUInt8: + case kTfLiteInt8: + TF_LITE_ENSURE_OK(context, EvalQuantized(context, node, params, + &data, input1, input2, output)); + break; + case kTfLiteFloat32: + TF_LITE_ENSURE_OK(context, EvalFloat(context, node, params, + &data, input1, input2, output)); + break; + default: + TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", + TfLiteTypeGetName(input1->type), input1->type); + return kTfLiteError; + } + + return kTfLiteOk; +} +} // namespace mul + +TfLiteRegistration Register_MUL() { + return {/*init=*/nullptr, + /*free=*/nullptr, + /*prepare=*/nullptr, + /*invoke=*/mul::Eval, + /*profiling_string=*/nullptr, + /*builtin_code=*/0, + /*custom_name=*/nullptr, + /*version=*/0}; +} + +} // namespace micro +} // namespace ops +} // namespace tflite diff --git a/tensorflow/lite/micro/kernels/xtensa_hifi/pooling.cc b/tensorflow/lite/micro/kernels/xtensa_hifi/pooling.cc index 162f3e069f9..0e6f0d0ab30 100755 --- a/tensorflow/lite/micro/kernels/xtensa_hifi/pooling.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifi/pooling.cc @@ -1,24 +1,24 @@ -/****************************************************************************** - * Copyright (C) 2019 Cadence Design Systems, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to use this Software with Cadence processor cores only and - * not with any other processors and platforms, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ******************************************************************************/ +/******************************************************************************* +* Copyright (c) 2019-2020 Cadence Design Systems, Inc. +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to use this Software with Cadence processor cores only and +* not with any other processors and platforms, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be included +* in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +******************************************************************************/ /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); @@ -40,7 +40,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" -#include "xtensa_tf_micro_common.h" +#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h" namespace tflite { namespace ops { @@ -83,6 +83,7 @@ TfLiteStatus AverageEvalFloat(TfLiteContext* context, const TfLiteNode* node, CalculateActivationRange(params->activation, &activation_min, &activation_max); +#if HIFI_VFPU const int stride_height = params->stride_height; const int stride_width = params->stride_width; const int pad_width = data->padding.width; @@ -168,6 +169,20 @@ TfLiteStatus AverageEvalFloat(TfLiteContext* context, const TfLiteNode* node, CHECK_ERR_HIFI_NNLIB_KER( err, "AveragepoolFloat: xa_nn_vec_activation_min_max_f32_f32 failed"); } +#else + PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data->padding.width; + op_params.float_activation_min = activation_min; + op_params.float_activation_max = activation_max; + reference_ops::AveragePool( + op_params, GetTensorShape(input), GetTensorData(input), + GetTensorShape(output), GetTensorData(output)); +#endif /* HIFI_VFPU */ return kTfLiteOk; } @@ -177,7 +192,6 @@ TfLiteStatus AverageEvalQuantized(TfLiteContext* context, const OpData* data, const TfLiteTensor* input, TfLiteTensor* output) { TFLITE_DCHECK(input->type == kTfLiteUInt8 || input->type == kTfLiteInt8); - int32_t activation_min, activation_max; (void)CalculateActivationRangeQuantized(context, params->activation, output, &activation_min, &activation_max); @@ -295,6 +309,7 @@ TfLiteStatus MaxEvalFloat(TfLiteContext* context, TfLiteNode* node, CalculateActivationRange(params->activation, &activation_min, &activation_max); +#if HIFI_VFPU const int stride_height = params->stride_height; const int stride_width = params->stride_width; const int pad_width = data->padding.width; @@ -378,6 +393,20 @@ TfLiteStatus MaxEvalFloat(TfLiteContext* context, TfLiteNode* node, CHECK_ERR_HIFI_NNLIB_KER( err, "MaxpoolFloat: xa_nn_vec_activation_min_max_f32_f32 failed"); } +#else + tflite::PoolParams op_params; + op_params.stride_height = params->stride_height; + op_params.stride_width = params->stride_width; + op_params.filter_height = params->filter_height; + op_params.filter_width = params->filter_width; + op_params.padding_values.height = data->padding.height; + op_params.padding_values.width = data->padding.width; + op_params.float_activation_min = activation_min; + op_params.float_activation_max = activation_max; + reference_ops::MaxPool(op_params, GetTensorShape(input), + GetTensorData(input), GetTensorShape(output), + GetTensorData(output)); +#endif /* HIFI_VFPU */ return kTfLiteOk; } @@ -491,7 +520,6 @@ TfLiteStatus MaxEvalQuantized(TfLiteContext* context, TfLiteNode* node, } return kTfLiteOk; } - } // namespace @@ -504,7 +532,7 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, &data)); - // Inputs and outputs share the same type, guarenteed by the converter. + // Inputs and outputs share the same type, guaranteed by the converter. switch (input->type) { case kTfLiteFloat32: AverageEvalFloat(context, node, params, &data, input, output); diff --git a/tensorflow/lite/micro/kernels/xtensa_hifi/softmax.cc b/tensorflow/lite/micro/kernels/xtensa_hifi/softmax.cc index de949a631cf..e4fa19671c2 100755 --- a/tensorflow/lite/micro/kernels/xtensa_hifi/softmax.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifi/softmax.cc @@ -1,24 +1,24 @@ -/****************************************************************************** - * Copyright (C) 2019 Cadence Design Systems, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to use this Software with Cadence processor cores only and - * not with any other processors and platforms, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ******************************************************************************/ +/******************************************************************************* +* Copyright (c) 2019-2020 Cadence Design Systems, Inc. +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to use this Software with Cadence processor cores only and +* not with any other processors and platforms, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be included +* in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +******************************************************************************/ /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); @@ -43,7 +43,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/op_macros.h" -#include "xtensa_tf_micro_common.h" +#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h" namespace tflite { namespace ops { namespace micro { @@ -105,6 +105,7 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) { // Takes a tensor and performs softmax along the last dimension. TfLiteStatus SoftmaxFloat(TfLiteContext* context, const TfLiteTensor* input, TfLiteTensor* output, const SoftmaxParams& op_data) { +#if HIFI_VFPU const RuntimeShape& input_shape = GetTensorShape(input); const float* input_data = GetTensorData(input); const RuntimeShape& output_shape = GetTensorShape(output); @@ -133,6 +134,11 @@ TfLiteStatus SoftmaxFloat(TfLiteContext* context, const TfLiteTensor* input, xa_nn_vec_softmax_f32_f32(&output_data[i * depth], p_scratch, depth); CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_softmax_f32_f32 failed"); } +#else + tflite::reference_ops::Softmax( + op_data, GetTensorShape(input), GetTensorData(input), + GetTensorShape(output), GetTensorData(output)); +#endif /* HIFI_VFPU */ return kTfLiteOk; } diff --git a/tensorflow/lite/micro/kernels/xtensa_hifi/svdf.cc b/tensorflow/lite/micro/kernels/xtensa_hifi/svdf.cc index a85f796693e..514f17678be 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifi/svdf.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifi/svdf.cc @@ -1,24 +1,23 @@ -/****************************************************************************** - * Copyright (C) 2019 Cadence Design Systems, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to use this Software with Cadence processor cores only and - * not with any other processors and platforms, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ******************************************************************************/ - +/******************************************************************************* +* Copyright (c) 2019-2020 Cadence Design Systems, Inc. +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to use this Software with Cadence processor cores only and +* not with any other processors and platforms, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be included +* in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +******************************************************************************/ /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); @@ -45,7 +44,7 @@ limitations under the License. #include "tensorflow/lite/kernels/op_macros.h" #include "tensorflow/lite/micro/kernels/activation_utils.h" #include "tensorflow/lite/micro/micro_utils.h" -#include "xtensa_tf_micro_common.h" +#include "tensorflow/lite/micro/kernels/xtensa_hifi/xtensa_tf_micro_common.h" namespace tflite { namespace ops { @@ -53,10 +52,6 @@ namespace micro { namespace svdf { namespace { -// These constants represent constants specific to the hotword "OK G" model. -// They exist until (b/132070898) is fixed. -constexpr int kScratchTensorMaxSize = 64; - struct OpData { int32 effective_scale_1_a; int32 effective_scale_2_a; @@ -64,6 +59,8 @@ struct OpData { // shift value - typically between [-32, 32]. int effective_scale_1_b; int effective_scale_2_b; + int scratch_tensor_index; + int scratch_output_tensor_index; }; /** @@ -84,6 +81,7 @@ static inline TfLiteStatus ApplyTimeWeightsBiasAndActivation( float* const __restrict__ state_ptr, float* const __restrict__ scratch_ptr, float* const __restrict__ output_ptr) { // Compute matmul(activation_state, weights_time). +#if HIFI_VFPU float* scratch_bias = scratch_ptr; if (bias_ptr) { const float* bias_data = bias_ptr; @@ -111,6 +109,51 @@ static inline TfLiteStatus ApplyTimeWeightsBiasAndActivation( weights_time_vec += memory_size * rank; } } +#else + for (int b = 0; b < batch_size; ++b) { + // Perform batched vector dot product: + float* scratch_ptr_batch = scratch_ptr + b * num_filters; + const float* vector1_ptr = weights_time_ptr; + const float* vector2_ptr = state_ptr + b * memory_size * num_filters; + for (int i = 0; i < num_filters; ++i) { + *scratch_ptr_batch = 0.f; + for (int j = 0; j < memory_size; ++j) { + *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++; + } + scratch_ptr_batch++; + } + } + + // Initialize output with bias if provided. + if (bias_ptr) { + // VectorBatchVectorAssign + for (int i = 0; i < batch_size; ++i) { + float* output_data = output_ptr + i * num_units; + const float* bias_data = bias_ptr; + for (int j = 0; j < num_units; ++j) { + *output_data++ = *bias_data++; + } + } + } else { + float* output_data = output_ptr; + for (int i = 0; i < batch_size * num_units; ++i) { + *output_data++ = 0.0f; + } + } + + // Reduction sum. + for (int b = 0; b < batch_size; ++b) { + float* output_ptr_batch = output_ptr + b * num_units; + float* scratch_ptr_batch = scratch_ptr + b * num_filters; + + // Reduction sum vector + for (int i = 0; i < num_units; ++i) { + for (int j = 0; j < rank; j++) { + output_ptr_batch[i] += *scratch_ptr_batch++; + } + } + } +#endif /* HIFI_VFPU */ // Apply activation. for (int b = 0; b < batch_size; ++b) { @@ -127,7 +170,8 @@ inline TfLiteStatus EvalFloatSVDF( TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input, const TfLiteTensor* weights_feature, const TfLiteTensor* weights_time, const TfLiteTensor* bias, const TfLiteSVDFParams* params, - TfLiteTensor* activation_state, TfLiteTensor* output) { + int scratch_tensor_index, TfLiteTensor* activation_state, + TfLiteTensor* output) { const int rank = params->rank; const int batch_size = input->dims->data[0]; const int input_size = input->dims->data[1]; @@ -142,10 +186,11 @@ inline TfLiteStatus EvalFloatSVDF( float* state_ptr = GetTensorData(activation_state); - // TODO(b/132070898): Move this temp variable to the new scratch buffer API - // when ready. - float scratch_tensor[kScratchTensorMaxSize]; - float* scratch_ptr = scratch_tensor; + TFLITE_DCHECK(context != nullptr); + TFLITE_DCHECK(context->GetScratchBuffer != nullptr); + + float* scratch_ptr = static_cast( + context->GetScratchBuffer(context, scratch_tensor_index)); float* output_ptr = GetTensorData(output); @@ -174,6 +219,7 @@ inline TfLiteStatus EvalFloatSVDF( float* result = &state_ptr[memory_size - 1]; float* result_in_batch = result; +#if HIFI_VFPU float* out_scratch = scratch_ptr; float* bias_scratch = output_ptr; for (int i = 0; i < num_units; i++) bias_scratch[i] = 0.0f; @@ -195,6 +241,20 @@ inline TfLiteStatus EvalFloatSVDF( result_in_batch += memory_size; } } +#else + for (int i = 0; i < batch_size; ++i) { + const float* matrix_ptr = matrix; + for (int j = 0; j < num_filters; ++j) { + float dot_prod = 0.0f; + const float* vector_in_batch = vector + i * input_size; + for (int k = 0; k < input_size; ++k) { + dot_prod += *matrix_ptr++ * *vector_in_batch++; + } + *result_in_batch = dot_prod; + result_in_batch += memory_size; + } + } +#endif /* HIFI_VFPU */ } return ApplyTimeWeightsBiasAndActivation( @@ -203,13 +263,15 @@ inline TfLiteStatus EvalFloatSVDF( output_ptr); } -void EvalIntegerSVDF( - TfLiteContext* context, TfLiteNode* node, const TfLiteTensor* input_tensor, +void EvalIntegerSVDF(TfLiteContext* context, TfLiteNode* node, + const TfLiteTensor* input_tensor, const TfLiteTensor* weights_feature_tensor, - const TfLiteTensor* weights_time_tensor, const TfLiteTensor* bias_tensor, - const TfLiteSVDFParams* params, TfLiteTensor* activation_state_tensor, - TfLiteTensor* output_tensor, int32_t scale_1_a, int scale_1_b, - int32_t scale_2_a, int scale_2_b, int32_t input_zp, int32_t output_zp) { + const TfLiteTensor* weights_time_tensor, + const TfLiteTensor* bias_tensor, + const TfLiteSVDFParams* params, + TfLiteTensor* activation_state_tensor, + TfLiteTensor* output_tensor, const OpData& data, + int32_t input_zp, int32_t output_zp) { const int n_rank = params->rank; const int n_batch = input_tensor->dims->data[0]; const int n_input = input_tensor->dims->data[1]; @@ -217,10 +279,13 @@ void EvalIntegerSVDF( const int n_unit = n_filter / n_rank; const int n_memory = weights_time_tensor->dims->data[1]; - // TODO(b/132070898): Move these temp variables to the new scratch buffer API - // when ready. - int32_t scratch_tensor[kScratchTensorMaxSize]; - int32_t scratch_output_tensor[kScratchTensorMaxSize]; + TFLITE_DCHECK(context != nullptr); + TFLITE_DCHECK(context->GetScratchBuffer != nullptr); + + int32_t* scratch_tensor = static_cast( + context->GetScratchBuffer(context, data.scratch_tensor_index)); + int32_t* scratch_output_tensor = static_cast( + context->GetScratchBuffer(context, data.scratch_output_tensor_index)); // Shift states. int16_t* const state_ptr = GetTensorData(activation_state_tensor); @@ -254,8 +319,8 @@ void EvalIntegerSVDF( for (int c = 0; c < n_input; c++) { dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp); } - dot_prod = - MultiplyByQuantizedMultiplier(dot_prod, scale_1_a, scale_1_b); + dot_prod = MultiplyByQuantizedMultiplier( + dot_prod, data.effective_scale_1_a, data.effective_scale_1_b); dot_prod = std::min(std::max(output_min, dot_prod), output_max); // This assumes state is symmetrically quantized. Otherwise last bit of // state should be initialized to its zero point and accumulate the @@ -328,7 +393,8 @@ void EvalIntegerSVDF( const int32_t output_min = std::numeric_limits::min(); for (int i = 0; i < n_batch * n_unit; ++i) { int32_t x1 = scratch_output_tensor[i]; - int32_t x2 = MultiplyByQuantizedMultiplier(x1, scale_2_a, scale_2_b); + int32_t x2 = MultiplyByQuantizedMultiplier(x1, data.effective_scale_2_a, + data.effective_scale_2_b); int32_t x3 = x2 + output_zp; int32_t x4 = std::min(std::max(output_min, x3), output_max); GetTensorData(output_tensor)[i] = static_cast(x4); @@ -349,8 +415,20 @@ constexpr int kInputActivationStateTensor = 4; // Output tensor. constexpr int kOutputTensor = 0; +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr); + void* data = nullptr; + if (context->AllocatePersistentBuffer(context, sizeof(OpData), &data) == + kTfLiteError) { + return nullptr; + } + return data; +} + TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - const auto* params = reinterpret_cast(node->builtin_data); + TFLITE_DCHECK(node->builtin_data != nullptr); + + const auto* params = static_cast(node->builtin_data); // Validate Tensor Inputs (dtype depends on quantization): // [0] = Input, {2, batch_size, input_size} @@ -359,7 +437,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // [3] = Bias (optional), {1, num_units} // [4] = Activation State (variable), // {2, batch_size, memory_size * num_filters} - const TfLiteTensor* input = GetInput(context, node, kInputTensor); const TfLiteTensor* weights_feature = GetInput(context, node, kWeightsFeatureTensor); @@ -378,8 +455,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const int num_units = num_filters / rank; const int memory_size = weights_time->dims->data[1]; - const bool is_full_integer = input->type == kTfLiteInt8; - // Validate Input Tensor: TF_LITE_ENSURE(context, input->type == kTfLiteFloat32 || input->type == kTfLiteInt8); @@ -403,7 +478,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, weights_time->dims->data[1], memory_size); // Validate Optional Bias Input Tensor: - if (bias) { + if (bias != nullptr) { TF_LITE_ENSURE_EQ(context, bias->dims->data[0], num_units); } @@ -413,51 +488,74 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, activation_state->dims->data[1], memory_size * num_filters); - if (is_full_integer) { TF_LITE_ENSURE_EQ(context, node->inputs->size, 5); + if (input->type == kTfLiteInt8) { TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteInt8); TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteInt16); - - if (bias) { + TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteInt16); + if (bias != nullptr) { TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32); } - TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteInt16); - - // Validate Scratch Tensors: - // [0] = (shared - see float block below for usage) - // [1] = Output Temp, int8_t, {2, num_units, batch_size} - // TODO(b/132070898): Scratch values are used as stack variables in - // EvalIntegerSVDF(). - - // Validate output tensor: TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8); - } else { - TF_LITE_ENSURE_EQ(context, node->inputs->size, 5); - // Validate Input Tensor dtypes: + const auto* input_params = + reinterpret_cast(input->quantization.params); + const auto* weights_feature_params = + static_cast( + weights_feature->quantization.params); + const auto* state_params = static_cast( + activation_state->quantization.params); + const auto* weight_time_params = + static_cast( + weights_time->quantization.params); + const auto* output_params = static_cast( + output->quantization.params); + const double effective_scale_1 = static_cast( + input_params->scale->data[0] * weights_feature_params->scale->data[0] / + state_params->scale->data[0]); + const double effective_scale_2 = static_cast( + state_params->scale->data[0] * weight_time_params->scale->data[0] / + output_params->scale->data[0]); + + TFLITE_DCHECK(node->user_data != nullptr); + OpData* data = static_cast(node->user_data); + + QuantizeMultiplier(effective_scale_1, &(data->effective_scale_1_a), + &(data->effective_scale_1_b)); + QuantizeMultiplier(effective_scale_2, &(data->effective_scale_2_a), + &(data->effective_scale_2_b)); + + TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr); + + const TfLiteStatus scratch_status = context->RequestScratchBufferInArena( + context, batch_size * num_filters * sizeof(int32_t), + &(data->scratch_tensor_index)); + TF_LITE_ENSURE_OK(context, scratch_status); + + const TfLiteStatus scratch_output_status = + context->RequestScratchBufferInArena( + context, batch_size * num_units * sizeof(int32_t), + &(data->scratch_output_tensor_index)); + TF_LITE_ENSURE_OK(context, scratch_output_status); + } else { TF_LITE_ENSURE_EQ(context, weights_feature->type, kTfLiteFloat32); TF_LITE_ENSURE_EQ(context, weights_time->type, kTfLiteFloat32); TF_LITE_ENSURE_EQ(context, activation_state->type, kTfLiteFloat32); - - if (bias) { + if (bias != nullptr) { TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32); } - - // Validate shared Scratch Tensor: - // [0] = Holds dot-product of time-forward calculations in - // ApplyTimeWeightsBiasAndActivation(): - // float/int32, {2, batch_size, num_filters} - // TODO(b/132070898): Scratch values are used as stack variables in - // EvalIntegerSVDF(). - - // Full-float SVDF only uses the one shared scratch tensor (see above for - // usage). - // TODO(b/132070898): Use input tensor as variable until scratch tensor - // allocation has been implemented. - // TF_LITE_ENSURE_EQ(context, node->temporaries->size, 1); TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32); + + TFLITE_DCHECK(node->user_data != nullptr); + OpData* data = static_cast(node->user_data); + + TFLITE_DCHECK(context->RequestScratchBufferInArena != nullptr); + const TfLiteStatus scratch_status = context->RequestScratchBufferInArena( + context, batch_size * num_filters * sizeof(float), + &(data->scratch_tensor_index)); + TF_LITE_ENSURE_OK(context, scratch_status); } return kTfLiteOk; @@ -476,56 +574,23 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { GetVariableInput(context, node, kInputActivationStateTensor); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - const bool is_full_integer = input->type == kTfLiteInt8; + TFLITE_DCHECK(node->user_data != nullptr); + const OpData& data = *(static_cast(node->user_data)); switch (weights_feature->type) { case kTfLiteFloat32: { - // TODO(b/132070898): Use input tensor as variable until scratch tensor - // allocation has been implemented. - // TfLiteTensor* scratch = GetTemporary(context, node, /*index=*/0); return EvalFloatSVDF(context, node, input, weights_feature, weights_time, - bias, params, activation_state, output); + bias, params, data.scratch_tensor_index, activation_state, output); break; } case kTfLiteInt8: { - if (is_full_integer) { - // TODO(b/132070898): Store these values in ::Prepare() instead of - // ::Eval(): - // Calculate effective scales. - OpData op_data; - auto* input_params = reinterpret_cast( - input->quantization.params); - auto* weights_feature_params = - reinterpret_cast( - weights_feature->quantization.params); - auto* state_params = reinterpret_cast( - activation_state->quantization.params); - auto* weight_time_params = reinterpret_cast( - weights_time->quantization.params); - auto* output_params = reinterpret_cast( - output->quantization.params); - const double effective_scale_1 = - static_cast(input_params->scale->data[0] * - weights_feature_params->scale->data[0] / - state_params->scale->data[0]); - const double effective_scale_2 = static_cast( - state_params->scale->data[0] * weight_time_params->scale->data[0] / - output_params->scale->data[0]); - QuantizeMultiplier(effective_scale_1, &op_data.effective_scale_1_a, - &op_data.effective_scale_1_b); - QuantizeMultiplier(effective_scale_2, &op_data.effective_scale_2_a, - &op_data.effective_scale_2_b); - TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActRelu); - EvalIntegerSVDF( - context, node, input, weights_feature, weights_time, bias, params, - activation_state, output, op_data.effective_scale_1_a, - op_data.effective_scale_1_b, op_data.effective_scale_2_a, - op_data.effective_scale_2_b, input->params.zero_point, - output->params.zero_point); + + EvalIntegerSVDF(context, node, input, weights_feature, weights_time, bias, + params, activation_state, output, data, + input->params.zero_point, output->params.zero_point); return kTfLiteOk; - } break; } @@ -540,7 +605,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace svdf TfLiteRegistration Register_SVDF() { - return {/*init=*/nullptr, + return {/*init=*/svdf::Init, /*free=*/nullptr, /*prepare=*/svdf::Prepare, /*invoke=*/svdf::Eval, diff --git a/tensorflow/lite/micro/tools/make/ext_libs/xtensa_hifi_nn_library.inc b/tensorflow/lite/micro/tools/make/ext_libs/xtensa_hifi_nn_library.inc index bd79d9cacca..7e8fe2b26f1 100644 --- a/tensorflow/lite/micro/tools/make/ext_libs/xtensa_hifi_nn_library.inc +++ b/tensorflow/lite/micro/tools/make/ext_libs/xtensa_hifi_nn_library.inc @@ -4,6 +4,8 @@ ifneq ($(filter xtensa_hifi, $(ALL_TAGS)),) ifneq (,$(filter hifi4%, $(TARGET_ARCH))) + NNLIB = xa_nnlib_hifi4 + CCFLAGS += -DNNLIB_V2 \ -DXTENSA_NNLIB_MAX_SCRATCH_SIZE=70*1024 @@ -11,56 +13,60 @@ ifneq ($(filter xtensa_hifi, $(ALL_TAGS)),) -DXTENSA_NNLIB_MAX_SCRATCH_SIZE=70*1024 MICROLITE_CC_SRCS += \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/activations/hifi4/xa_nn_activations_f32_f32.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/activations/hifi4/xa_nn_activations_asym8_asym8.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/activations/hifi4/xa_nn_activations_32_16.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/activations/hifi4/xa_nn_activations_32_8.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/activations/hifi4/xa_nn_softmax_asym8_asym8.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/basic/hifi4/xa_nn_floor_f32.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/cnn/hifi4/xa_nn_conv2d_std_circ_buf.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/cnn/hifi4/xa_nn_conv2d_std_asym8xasym8.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/cnn/hifi4/xa_nn_conv2d_std_f32.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/cnn/hifi4/xa_nn_matXvec_asym8xasym8_asym8_circ.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/cnn/hifi4/xa_nn_matXvec_f32_circ.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/cnn/hifi4/xa_nn_conv2d_depthwise.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/cnn/hifi4/xa_nn_conv2d_depthwise_f32.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/cnn/hifi4/xa_nn_conv2d_depthwise_asym8xasym8.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/cnn/hifi4/xa_nn_circ_buf.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/fc/hifi4/xa_nn_fully_connected.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/matXvec/hifi4/xa_nn_matXvec_f32.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/matXvec/hifi4/xa_nn_matXvec_16x16.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/matXvec/hifi4/xa_nn_matXvec_8x16.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/matXvec/hifi4/xa_nn_matXvec_8x8.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/matXvec/hifi4/xa_nn_matXvec_asym8xasym8.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_avgpool.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_avgpool_f32.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_avgpool_asym8.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_maxpool.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_maxpool_f32.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_maxpool_asym8.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_avgpool_f32_nhwc.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_avgpool_asym8_nhwc.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_maxpool_f32_nhwc.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_maxpool_asym8_nhwc.c \ - $(XTENSA_PATH)/xa_nnlib/algo/kernels/pool/hifi4/xa_nn_inv_256_tbl.c \ - $(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/vec_sigmoidf_hifi4.c \ - $(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/vec_tanhf_hifi4.c \ - $(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/vec_reluf_hifi4.c \ - $(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/vec_softmaxf_hifi4.c \ - $(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/vec_alognf_hifi4.c \ - $(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/scl_sigmoidf_hifi4.c \ - $(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/scl_tanhf_hifi4.c \ - $(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/expf_tbl.c \ - $(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/pow2f_tbl.c \ - $(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/inff_tbl.c \ - $(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/tanhf_tbl.c \ - $(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/src/nanf_tbl.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/activations/hifi4/xa_nn_activations_f32_f32.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/activations/hifi4/xa_nn_activations_asym8_asym8.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/activations/hifi4/xa_nn_activations_32_16.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/activations/hifi4/xa_nn_activations_32_8.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/activations/hifi4/xa_nn_softmax_asym8_asym8.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/basic/hifi4/xa_nn_floor_f32.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/basic/hifi4/xa_nn_elm_add_f32.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/basic/hifi4/xa_nn_elm_add_quant8.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/basic/hifi4/xa_nn_elm_mul_f32.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/basic/hifi4/xa_nn_elm_mul_quant8.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/cnn/hifi4/xa_nn_conv2d_std_circ_buf.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/cnn/hifi4/xa_nn_conv2d_std_asym8xasym8.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/cnn/hifi4/xa_nn_conv2d_std_f32.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/cnn/hifi4/xa_nn_matXvec_asym8xasym8_asym8_circ.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/cnn/hifi4/xa_nn_matXvec_f32_circ.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/cnn/hifi4/xa_nn_conv2d_depthwise.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/cnn/hifi4/xa_nn_conv2d_depthwise_f32.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/cnn/hifi4/xa_nn_conv2d_depthwise_asym8xasym8.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/cnn/hifi4/xa_nn_circ_buf.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/fc/hifi4/xa_nn_fully_connected.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/matXvec/hifi4/xa_nn_matXvec_f32.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/matXvec/hifi4/xa_nn_matXvec_16x16.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/matXvec/hifi4/xa_nn_matXvec_8x16.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/matXvec/hifi4/xa_nn_matXvec_8x8.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/matXvec/hifi4/xa_nn_matXvec_asym8xasym8.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_avgpool.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_avgpool_f32.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_avgpool_asym8.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_maxpool.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_maxpool_f32.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_maxpool_asym8.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_avgpool_f32_nhwc.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_avgpool_asym8_nhwc.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_maxpool_f32_nhwc.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_maxpool_asym8_nhwc.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/kernels/pool/hifi4/xa_nn_inv_256_tbl.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/vec_sigmoidf_hifi4.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/vec_tanhf_hifi4.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/vec_reluf_hifi4.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/vec_softmaxf_hifi4.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/vec_alognf_hifi4.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/scl_sigmoidf_hifi4.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/scl_tanhf_hifi4.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/expf_tbl.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/pow2f_tbl.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/inff_tbl.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/tanhf_tbl.c \ + $(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/src/nanf_tbl.c \ - INCLUDES += -I$(XTENSA_PATH)/xa_nnlib/algo/kernels/ \ - -I$(XTENSA_PATH)/xa_nnlib/include/nnlib/ \ - -I$(XTENSA_PATH)/xa_nnlib/include/ \ - -I$(XTENSA_PATH)/xa_nnlib/algo/common/include/ \ - -I$(XTENSA_PATH)/xa_nnlib/algo/ndsp/hifi4/include/ \ + INCLUDES += -I$(XTENSA_PATH)/$(NNLIB)/algo/kernels/ \ + -I$(XTENSA_PATH)/$(NNLIB)/include/nnlib/ \ + -I$(XTENSA_PATH)/$(NNLIB)/include/ \ + -I$(XTENSA_PATH)/$(NNLIB)/algo/common/include/ \ + -I$(XTENSA_PATH)/$(NNLIB)/algo/ndsp/hifi4/include/ \ endif diff --git a/tensorflow/lite/micro/tools/make/targets/xtensa_hifi_makefile.inc b/tensorflow/lite/micro/tools/make/targets/xtensa_hifi_makefile.inc index aa7d8cfb1c3..539f0b87ee8 100644 --- a/tensorflow/lite/micro/tools/make/targets/xtensa_hifi_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/xtensa_hifi_makefile.inc @@ -5,7 +5,7 @@ ifeq ($(TARGET), xtensa_hifi) TARGET_ARCH := hifi3_bd5 -$(eval $(call add_third_party_download,$(XTENSA_HIFI4_URL),$(XTENSA_HIFI4_MD5),xa_nnlib,)) +$(eval $(call add_third_party_download,$(XTENSA_HIFI4_URL),$(XTENSA_HIFI4_MD5),xa_nnlib_hifi4,)) PLATFORM_ARGS = \ -mno-mul16 \ diff --git a/tensorflow/lite/micro/tools/make/third_party_downloads.inc b/tensorflow/lite/micro/tools/make/third_party_downloads.inc index 8590ace9fda..174dcbf4515 100644 --- a/tensorflow/lite/micro/tools/make/third_party_downloads.inc +++ b/tensorflow/lite/micro/tools/make/third_party_downloads.inc @@ -80,8 +80,8 @@ EMBARC_MLI_PRE_COMPILED_MD5 := "a95ff9e0370434484f14e7e4114327f6" ZEPHYR_URL := "https://github.com/antmicro/zephyr/archive/55e36b9.zip" ZEPHYR_MD5 := "755622eb4812fde918a6382b65d50c3b" -XTENSA_HIFI4_URL :="https://github.com/foss-xtensa/nnlib-hifi4/raw/master/archive/xa_nnlib_04_07.zip" -XTENSA_HIFI4_MD5 :="f234764928f9a42901df33a27e118c8b" +XTENSA_HIFI4_URL :="https://github.com/foss-xtensa/nnlib-hifi4/raw/master/archive/xa_nnlib_06_27.zip" +XTENSA_HIFI4_MD5 :="45fdc1209a8da62ab568aa6040f7eabf" ETHOSU_URL := "https://git.mlplatform.org/ml/ethos-u/ethos-u-core-driver.git/snapshot/ethos-u-core-driver-bcb5aaa99756f1b5c1295b079ebdd60996bc75a5.tar.gz" ETHOSU_MD5 := "d2073c8d88fc167fd5c46b5dcda58ea1" From 254477b747870f2ff1f826b0d550f7982cb886fc Mon Sep 17 00:00:00 2001 From: Denisa Roberts Date: Mon, 13 Jul 2020 18:05:52 -0400 Subject: [PATCH 0481/2522] Add error message fix --- tensorflow/python/ops/linalg_grad.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/python/ops/linalg_grad.py b/tensorflow/python/ops/linalg_grad.py index 53708dde221..40f03491e6d 100644 --- a/tensorflow/python/ops/linalg_grad.py +++ b/tensorflow/python/ops/linalg_grad.py @@ -495,7 +495,7 @@ def _QrGrad(op, dq, dr): raise NotImplementedError("QrGrad not implemented with dynamic shapes.") if (r.shape.dims[-2].value > r.shape.dims[-1].value and q.shape.dims[-2].value == q.shape.dims[-1].value): - raise NotImplementedError("QrGrad not implemented when ncols > nrows " + raise NotImplementedError("QrGrad not implemented when nrows > ncols " "and full_matrices is true.") def _TriangularSolve(x, r): @@ -506,7 +506,6 @@ def _QrGrad(op, dq, dr): def _QrGradSquareAndDeepMatrices(q, r, dq, dr): """Gradient for matrix orders num_rows >= num_cols - and full_matrices is false. """ qdq = math_ops.matmul(q, dq, adjoint_a=True) From bc9a9a35bcf59ca37ba86fa95b1710d9712a3aa8 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Wed, 15 Jul 2020 05:38:16 -0700 Subject: [PATCH 0482/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/f782d9c7002e PiperOrigin-RevId: 321343246 Change-Id: I0815ae123e5492ac6f960cd1cfb214610ab384eb --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index a1c48e2675f..b2298949af8 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "84a1bc7f2c0c7bd5f18a4ecaf91e27644aa94190" - LLVM_SHA256 = "ae5a0e4bdca845ab4b0af588a38818d4f1005ae391d5dcef800615ec7ebbc370" + LLVM_COMMIT = "f782d9c7002edaaf56c06a6cc1775f8f67713a29" + LLVM_SHA256 = "78604619434a7a33fa7087fe9090de38be56ae866e41e7b4e897ea62d0a3c623" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From b7cbbb454d0402fd33217202f95783c981dc9b15 Mon Sep 17 00:00:00 2001 From: Tom Hennigan Date: Wed, 15 Jul 2020 06:12:51 -0700 Subject: [PATCH 0483/2522] Fix typo: BufferFromPyal -> BufferFromPyval. PiperOrigin-RevId: 321346825 Change-Id: Ica5e622513464f42c2e44d65bd63efe9a97d585e --- tensorflow/compiler/xla/python/py_client.cc | 2 +- tensorflow/compiler/xla/python/py_client.h | 2 +- tensorflow/compiler/xla/python/xla.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/python/py_client.cc b/tensorflow/compiler/xla/python/py_client.cc index 20f2b782fdb..f0273d5ed4b 100644 --- a/tensorflow/compiler/xla/python/py_client.cc +++ b/tensorflow/compiler/xla/python/py_client.cc @@ -83,7 +83,7 @@ PyClient::GetDefaultDeviceAssignment1D(int num_replicas) { return result; } -StatusOr> PyClient::BufferFromPyal( +StatusOr> PyClient::BufferFromPyval( const pybind11::object& argument, Device* device, bool force_copy, PjRtBuffer::HostBufferSemantics host_buffer_semantics) { if (device == nullptr) { diff --git a/tensorflow/compiler/xla/python/py_client.h b/tensorflow/compiler/xla/python/py_client.h index be61bd74419..d33f3dadd7d 100644 --- a/tensorflow/compiler/xla/python/py_client.h +++ b/tensorflow/compiler/xla/python/py_client.h @@ -120,7 +120,7 @@ class PyClient : public std::enable_shared_from_this { return pjrt_client_->client()->CreateHostToDeviceChannelHandle(); } - StatusOr> BufferFromPyal( + StatusOr> BufferFromPyval( const pybind11::object& argument, Device* device, bool force_copy, PjRtBuffer::HostBufferSemantics host_buffer_semantics); diff --git a/tensorflow/compiler/xla/python/xla.cc b/tensorflow/compiler/xla/python/xla.cc index ed9b80775d8..9590c5d57c3 100644 --- a/tensorflow/compiler/xla/python/xla.cc +++ b/tensorflow/compiler/xla/python/xla.cc @@ -539,7 +539,7 @@ PYBIND11_MODULE(xla_extension, m) { &PyClient::CreateDeviceToHostChannelHandle) .def("create_host_to_device_channel_handle", &PyClient::CreateHostToDeviceChannelHandle) - .def("buffer_from_pyval", &PyClient::BufferFromPyal, py::arg("argument"), + .def("buffer_from_pyval", &PyClient::BufferFromPyval, py::arg("argument"), py::arg("device") = nullptr, py::arg("force_copy") = false, py::arg("host_buffer_semantics") = PjRtBuffer::HostBufferSemantics::kZeroCopy) From ecdbb645a80332fcf500eea07d247c8ca556d3fc Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Thu, 9 Jul 2020 20:07:12 +0000 Subject: [PATCH 0484/2522] Prepping for switch to ROCm 3.5+ Starting with ROCm 3.5 the underlying compiler used by hipcc will change from HCC to hip-clang. There will be a corresponding change in the HIP Runtime as well. This commit is part of a series which are intended to make the transition to ROCm 3.5+ easier. ROCm 3.5+ (more specifically the hip-clang compiler) assumes a default value of 256 (for max threads per block) for GPU kernels, for cases where that value is not explicitly specified via the __launch_bounds__ attribute. If such a kernel (which has no __launch_bounds__ attribute specified) is called at runtime with threads_per_block value greater than 256, it is possible to run into some undetermined behaviour. This will be changed to become a runtime error instead (in a forthcoming ROCm release). This change adds the `__launch_bounds__(1024)` attribute to some GPU kernels which have the possibiliy of being launched with a threads_per_block value > 256 --- .../core/kernels/reduction_gpu_kernels.cu.h | 29 +++++------ .../core/kernels/training_ops_gpu.cu.cc | 48 +++++++++---------- 2 files changed, 36 insertions(+), 41 deletions(-) diff --git a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h index c043c6a8e33..103d01b6c58 100644 --- a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h +++ b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h @@ -147,7 +147,7 @@ struct Or { // each block does a grid strided loop and reduces its values locally // the case of one block is used for low latency small reductions to scalars template -__global__ void BlockReduceKernel( +__global__ __launch_bounds__(1024) void BlockReduceKernel( T in, OUT_T out, int num_elems, Op op, typename std::iterator_traits::value_type initVal) { const int bid = blockIdx.x; @@ -184,7 +184,7 @@ __global__ void BlockReduceKernel( // maps a warp to each row template -__global__ void RowReduceKernel( +__global__ __launch_bounds__(1024) void RowReduceKernel( T in, OUT_T out, int num_rows, int num_cols, Op op, typename std::iterator_traits::value_type initVal) { typedef typename std::iterator_traits::value_type value_type; @@ -252,7 +252,7 @@ struct storage_type> { // Works only if there are <= 16 columns // each warps sums over multiple rows at once template -__global__ void ColumnReduceMax16ColumnsKernel( +__global__ __launch_bounds__(1024) void ColumnReduceMax16ColumnsKernel( T in, OUT_T out, int num_rows, int num_cols, Op op, typename std::iterator_traits::value_type initVal) { typedef typename std::iterator_traits::value_type value_type; @@ -322,7 +322,7 @@ __global__ void ColumnReduceMax16ColumnsKernel( // Maps each block to a column range TF_RED_WARPSIZE wide template -__global__ void ColumnReduceKernel( +__global__ __launch_bounds__(1024) void ColumnReduceKernel( T in, OUT_T out, int num_rows, int num_cols, Op op, typename std::iterator_traits::value_type initVal) { typedef typename std::iterator_traits::value_type value_type; @@ -388,7 +388,7 @@ __global__ void ColumnReduceKernel( // segments cannot cross warp boundaries (mainly used for reducing the segments // that come from the Max16Columns column reduction kernel) template -__global__ void CleanupSegments( +__global__ __launch_bounds__(1024) void CleanupSegments( T partial_sums, OUT_T out, int num_rows, int num_cols, int segment_size, Op op, typename std::iterator_traits::value_type initVal) { typedef typename std::iterator_traits::value_type value_type; @@ -412,8 +412,8 @@ __global__ void CleanupSegments( // assigns one thread to a column template -__global__ void ColumnReduceSimpleKernel(T in, OUT_T out, int num_planes, - int num_rows, int num_cols, Op op) { +__global__ __launch_bounds__(1024) void ColumnReduceSimpleKernel( + T in, OUT_T out, int num_planes, int num_rows, int num_cols, Op op) { typedef typename std::iterator_traits::value_type value_type; const int gid = threadIdx.x + blockIdx.x * blockDim.x; const int elems_per_plane = num_rows * num_cols; @@ -479,11 +479,9 @@ __device__ __inline__ T ComputeSum(IN_T in_, const int plane, } template -__global__ void ColumnReduceInToTempKernel(void* __restrict__ temp, - int temp_in_offset, - int temp_out_offset, IN_T in, - int num_planes, int num_rows, - int num_cols, Op op) { +__global__ __launch_bounds__(1024) void ColumnReduceInToTempKernel( + void* __restrict__ temp, int temp_in_offset, int temp_out_offset, IN_T in, + int num_planes, int num_rows, int num_cols, Op op) { typedef typename std::iterator_traits::value_type value_type; value_type* t = (value_type*)temp; @@ -510,10 +508,9 @@ __global__ void ColumnReduceInToTempKernel(void* __restrict__ temp, } template -__global__ void ColumnReduceTempToOutKernel(void* __restrict__ temp, - int temp_in_offset, T in, OUT_T out, - int num_planes, int num_rows, - int num_cols, Op op) { +__global__ __launch_bounds__(1024) void ColumnReduceTempToOutKernel( + void* __restrict__ temp, int temp_in_offset, T in, OUT_T out, + int num_planes, int num_rows, int num_cols, Op op) { typedef typename std::iterator_traits::value_type value_type; value_type* t = (value_type*)temp; const int tid = threadIdx.x; diff --git a/tensorflow/core/kernels/training_ops_gpu.cu.cc b/tensorflow/core/kernels/training_ops_gpu.cu.cc index 92496e63e1a..bbd22bae859 100644 --- a/tensorflow/core/kernels/training_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/training_ops_gpu.cu.cc @@ -28,12 +28,11 @@ typedef Eigen::GpuDevice GPUDevice; namespace functor { template -__global__ void ApplyAdamKernel(int32 data_dim, T* var, T* m, T* v, - const T* const beta1_power_, - const T* const beta2_power_, const T* const lr_, - const T* const beta1_, const T* const beta2_, - const T* const epsilon_, const T* grad, - bool use_nesterov) { +__global__ __launch_bounds__(1024) void ApplyAdamKernel( + int32 data_dim, T* var, T* m, T* v, const T* const beta1_power_, + const T* const beta2_power_, const T* const lr_, const T* const beta1_, + const T* const beta2_, const T* const epsilon_, const T* grad, + bool use_nesterov) { eigen_assert(blockDim.y == 1); eigen_assert(blockDim.z == 1); eigen_assert(gridDim.y == 1); @@ -68,7 +67,7 @@ __global__ void ApplyAdamKernel(int32 data_dim, T* var, T* m, T* v, } template -__global__ void SparseApplyKerasMomentumKernel( +__global__ __launch_bounds__(1024) void SparseApplyKerasMomentumKernel( T* var, T* accum, const T* lr, const T* grad, const Tindex* indices, const T* momentum, bool use_nesterov, Tindex param_rows, Tindex updates_size, Tindex indices_size) { @@ -186,9 +185,11 @@ __device__ std::complex impl_rsqrt(std::complex x) { } template -__global__ void ApplyAdagradKernel(GpuLaunchConfig cfg, T* var, T* accum, - const T* lr, const T* grad, - bool update_slots) { +__global__ __launch_bounds__(1024) void ApplyAdagradKernel(GpuLaunchConfig cfg, + T* var, T* accum, + const T* lr, + const T* grad, + bool update_slots) { GPU_1D_KERNEL_LOOP(i, cfg.virtual_thread_count) { if (update_slots) accum[i] += grad[i] * grad[i]; var[i] -= lr[0] * grad[i] * impl_rsqrt(accum[i]); @@ -196,9 +197,9 @@ __global__ void ApplyAdagradKernel(GpuLaunchConfig cfg, T* var, T* accum, } template -__global__ void ApplyAdagradV2Kernel(GpuLaunchConfig cfg, T* var, T* accum, - const T* lr, const T* epsilon, - const T* grad, bool update_slots) { +__global__ __launch_bounds__(1024) void ApplyAdagradV2Kernel( + GpuLaunchConfig cfg, T* var, T* accum, const T* lr, const T* epsilon, + const T* grad, bool update_slots) { GPU_1D_KERNEL_LOOP(i, cfg.virtual_thread_count) { if (update_slots) accum[i] += grad[i] * grad[i]; T update = grad[i] / (impl_sqrt(accum[i]) + epsilon[0]); @@ -207,10 +208,9 @@ __global__ void ApplyAdagradV2Kernel(GpuLaunchConfig cfg, T* var, T* accum, } template -__global__ void ApplyAdadeltaKernel(GpuLaunchConfig cfg, T* var, T* accum, - T* accum_update, const T* plr, - const T* prho, const T* peps, - const T* grad) { +__global__ __launch_bounds__(1024) void ApplyAdadeltaKernel( + GpuLaunchConfig cfg, T* var, T* accum, T* accum_update, const T* plr, + const T* prho, const T* peps, const T* grad) { T rho = prho[0]; T eps = peps[0]; T lr = plr[0]; @@ -224,10 +224,9 @@ __global__ void ApplyAdadeltaKernel(GpuLaunchConfig cfg, T* var, T* accum, } template -__global__ void ApplyRMSPropKernel(GpuLaunchConfig cfg, T* var, T* ms, T* mom, - const T* plr, const T* prho, - const T* pmomentum, const T* peps, - const T* grad) { +__global__ __launch_bounds__(1024) void ApplyRMSPropKernel( + GpuLaunchConfig cfg, T* var, T* ms, T* mom, const T* plr, const T* prho, + const T* pmomentum, const T* peps, const T* grad) { T rho = prho[0]; T eps = peps[0]; T lr = plr[0]; @@ -240,10 +239,9 @@ __global__ void ApplyRMSPropKernel(GpuLaunchConfig cfg, T* var, T* ms, T* mom, } template -__global__ void ApplyCenteredRMSPropKernel(GpuLaunchConfig cfg, T* var, T* mg, - T* ms, T* mom, const T* plr, - const T* prho, const T* pmomentum, - const T* peps, const T* grad) { +__global__ __launch_bounds__(1024) void ApplyCenteredRMSPropKernel( + GpuLaunchConfig cfg, T* var, T* mg, T* ms, T* mom, const T* plr, + const T* prho, const T* pmomentum, const T* peps, const T* grad) { T rho = prho[0]; T eps = peps[0]; T lr = plr[0]; From bdbfaaf389111fdd9332a643315e2d4e1a9ae361 Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Wed, 24 Jun 2020 14:57:33 +0000 Subject: [PATCH 0485/2522] Prepping for switch to ROCm 3.5+ Starting with ROCm 3.5 the underlying compiler used by hipcc will change from HCC to hip-clang. There will be a corresponding change in the HIP Runtime as well. This commit is part of a series which are intended to make the transition to ROCm 3.5+ easier. ROCm 3.5+ (more specifically the hip-clang compiler) seems to be picky about not using a `__launch_bounds__` attribute value which is lower than the threads_per_warp value (64 on the ROCm platform). This change is to accomodate that pickiness. The macro TENSORFLOW_COMPILE_IS_HIP_CLANG is only true when compiling TF with ROCm 3.5 and higher. The macro is a temporary construct to aid with the transition. Once the transition is complete, it will removed and the code updated appropriately. --- tensorflow/core/kernels/scan_ops_gpu.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/core/kernels/scan_ops_gpu.h b/tensorflow/core/kernels/scan_ops_gpu.h index aca2a8985de..d5ceca92a80 100644 --- a/tensorflow/core/kernels/scan_ops_gpu.h +++ b/tensorflow/core/kernels/scan_ops_gpu.h @@ -248,6 +248,10 @@ void LaunchScan(const GPUDevice& d, typename TTypes::ConstTensor in, int num_blocks = dimx * dimz; int ideal_block_size = dimy / items_per_thread; +#if TENSORFLOW_COMPILER_IS_HIP_CLANG + const int rocm_threads_per_warp = 64; + ideal_block_size = std::max(ideal_block_size, rocm_threads_per_warp); +#endif // There seems to be a bug when the type is not float and block_size 1024. // Launch on the smallest power of 2 block size that we can. From 0951baf109f24b66226def8c056971d60629ee7c Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Wed, 15 Jul 2020 07:23:56 -0700 Subject: [PATCH 0486/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/c11c78a1bd0b PiperOrigin-RevId: 321355968 Change-Id: Ie6b0731c01e5eb1956ab4823e287e8fc9170078f --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index b2298949af8..946e56b5b1e 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "f782d9c7002edaaf56c06a6cc1775f8f67713a29" - LLVM_SHA256 = "78604619434a7a33fa7087fe9090de38be56ae866e41e7b4e897ea62d0a3c623" + LLVM_COMMIT = "c11c78a1bd0b3275bf845604aae3c94e97acceed" + LLVM_SHA256 = "f4ec2918bbde384121152d72b46b8c7094aed08fa61a04803ff6c7b3c18448c1" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From ebcd2928f5b0356338bacee1a52bed09d675ad55 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Jul 2020 07:45:32 -0700 Subject: [PATCH 0487/2522] Add TPUUpdateEmbeddingEnqueueOpInputsPass to MLIR bridge pipeline. PiperOrigin-RevId: 321358813 Change-Id: I4778190d67b6eccf4bc3acd6c9888ef078221f2d --- tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc index 1963931b497..35ffabb9131 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc @@ -87,6 +87,7 @@ void CreateTPUBridgePipeline(OpPassManager &pm) { // changed constants out of tf_device.Launch. func_pm.addPass(TFDevice::CreateDecomposeResourceOpsPass()); func_pm.addPass(CreateTPUHostComputationExpansionPass()); + pm.addNestedPass(CreateTPUUpdateEmbeddingEnqueueOpInputsPass()); pm.addPass(CreateTPUExtractHeadTailOutsideCompilationPass()); // Run another shape inference pass because resource decomposition might have // created new partial types. From 16dcd3474774170e45a0e3302001b67c421d0b9e Mon Sep 17 00:00:00 2001 From: tg-at-google Date: Wed, 15 Jul 2020 11:17:35 -0400 Subject: [PATCH 0488/2522] Update bcast.h --- tensorflow/core/util/bcast.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/util/bcast.h b/tensorflow/core/util/bcast.h index 4337b0dc4fe..0a2c68d3f82 100644 --- a/tensorflow/core/util/bcast.h +++ b/tensorflow/core/util/bcast.h @@ -176,7 +176,7 @@ BCastList::BCastList(const BCastList::Vec (&x)[N], // 1-extend and align all vectors. for (int i = 0; i < N; ++i) { - if (copy_i_size < largest_rank) { + if (copy[i].size() < largest_rank) { copy[i].resize(largest_rank, 1); } } From c4b16f1c258cfd6320bd92eb60de8be0899daf14 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Wed, 15 Jul 2020 15:35:45 +0000 Subject: [PATCH 0489/2522] restore list_ops_test.py --- .../python/kernel_tests/list_ops_test.py | 132 +++++++++--------- 1 file changed, 66 insertions(+), 66 deletions(-) diff --git a/tensorflow/python/kernel_tests/list_ops_test.py b/tensorflow/python/kernel_tests/list_ops_test.py index ce20cf489e6..53ebdd3ab88 100644 --- a/tensorflow/python/kernel_tests/list_ops_test.py +++ b/tensorflow/python/kernel_tests/list_ops_test.py @@ -78,8 +78,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, element_shape=[], max_num_elements=1) l = list_ops.tensor_list_push_back(l, constant_op.constant(1.0)) - with self.assertRaisesRegex(errors.InvalidArgumentError, - "Tried to push item into a full list"): + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "Tried to push item into a full list"): l = list_ops.tensor_list_push_back(l, 2.) self.evaluate(l) @@ -91,8 +91,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): element_dtype=dtypes.float32, element_shape=[], max_num_elements=max_num_elements) - with self.assertRaisesRegex(errors.InvalidArgumentError, - "Trying to pop from an empty list"): + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "Trying to pop from an empty list"): l = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32) self.evaluate(l) @@ -115,7 +115,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testPopUninitializedTensorWithInvalidElementShapeFails(self): l = list_ops.tensor_list_reserve( element_dtype=dtypes.float32, element_shape=None, num_elements=3) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, "Trying to read an uninitialized tensor but " "element_shape is not fully defined"): @@ -124,7 +124,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): l = list_ops.tensor_list_reserve( element_dtype=dtypes.float32, element_shape=[None, 2], num_elements=3) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, r"Incompatible shapes during merge: \[1,3\] vs. \[\?,2\]"): _, e = gen_list_ops.tensor_list_pop_back( @@ -191,8 +191,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): # Should raise an error when the element tensors do not all have the same # shape. - with self.assertRaisesRegex(errors.InvalidArgumentError, - "Incompatible ranks during merge: 0 vs. 1"): + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "Incompatible ranks during merge: 0 vs. 1"): l = list_ops.tensor_list_push_back(l, constant_op.constant([3.0, 4.0])) t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32) self.evaluate(t) @@ -213,7 +213,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): # Should raise an error when the element tensors do not all have the same # shape. - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, r"Incompatible shapes during merge: \[1\] vs. \[2\]"): l = list_ops.tensor_list_push_back(l, constant_op.constant([2.0, 3.0])) @@ -234,8 +234,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): # Should not be able to stack empty lists with partially defined # element_shape. - with self.assertRaisesRegex(errors.InvalidArgumentError, - "non-fully-defined"): + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "non-fully-defined"): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, element_shape=[None, 2], @@ -244,8 +244,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): self.evaluate(t) # Should not be able to stack empty lists with undefined element_shape. - with self.assertRaisesRegex(errors.InvalidArgumentError, - "non-fully-defined"): + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "non-fully-defined"): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, element_shape=None, @@ -285,10 +285,10 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testStackReservedListWithNoElementsAndPartialElementShapeFails(self): l = list_ops.tensor_list_reserve( element_dtype=dtypes.float32, element_shape=None, num_elements=3) - with self.assertRaisesRegex( - errors.InvalidArgumentError, "Tried to stack list which only contains " - "uninitialized tensors and has a " - "non-fully-defined element_shape: "): + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "Tried to stack list which only contains " + "uninitialized tensors and has a " + "non-fully-defined element_shape: "): t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32) self.evaluate(t) @@ -341,8 +341,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): # Should raise an error when the requested tensors do not all have the same # shape. - with self.assertRaisesRegex(errors.InvalidArgumentError, - "Incompatible ranks during merge: 0 vs. 1"): + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "Incompatible ranks during merge: 0 vs. 1"): t = list_ops.tensor_list_gather(l, [0, 2], element_dtype=dtypes.float32) self.evaluate(t) @@ -366,7 +366,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): # Should raise an error when the requested tensors do not all have the same # shape. - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, r"Incompatible shapes during merge: \[1\] vs. \[2\]"): t = list_ops.tensor_list_gather(l, [0, 2], element_dtype=dtypes.float32) @@ -387,8 +387,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): # Should not be able to gather from empty lists with partially defined # element_shape. - with self.assertRaisesRegex(errors.InvalidArgumentError, - "non-fully-defined"): + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "non-fully-defined"): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, element_shape=[None, 2], @@ -398,8 +398,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): # Should not be able to gather from empty lists with undefined # element_shape. - with self.assertRaisesRegex(errors.InvalidArgumentError, - "non-fully-defined"): + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "non-fully-defined"): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, element_shape=None, @@ -455,7 +455,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testGatherReservedListWithNoElementsAndPartialElementShapeFails(self): l = list_ops.tensor_list_reserve( element_dtype=dtypes.float32, element_shape=None, num_elements=3) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, "Tried to gather uninitialized tensors from a" " list with non-fully-defined element_shape"): @@ -485,7 +485,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testScatterFailsWhenIndexLargerThanNumElements(self): c0 = constant_op.constant([1.0, 2.0]) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, "TensorListScatter: Trying to scatter at index 3 in list with size 3"): l = gen_list_ops.tensor_list_scatter_v2( @@ -494,7 +494,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testScatterFailsWithInvalidNumElements(self): c0 = constant_op.constant([1.0, 2.0]) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, "TensorListScatter expects num_elements >= -1, found: -2"): l = gen_list_ops.tensor_list_scatter_v2( @@ -503,7 +503,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testScatterWithInvalidRowsInInputTensorFails(self): c0 = constant_op.constant([1.0, 2.0]) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, "Invalid number of rows in input tensor. Expected: 3 Actual: 2"): l = list_ops.tensor_list_scatter(c0, [1, 0, 2], []) @@ -511,7 +511,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testScatterWithNegativeIndicesFails(self): c0 = constant_op.constant([1.0, 2.0]) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, "Indices in TensorListScatter must all be non-negative."): l = list_ops.tensor_list_scatter(c0, [-1, -2], element_shape=[]) @@ -658,7 +658,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testGetUninitializedTensorWithInvalidElementShapeFails(self): l = list_ops.tensor_list_reserve( element_dtype=dtypes.float32, element_shape=None, num_elements=3) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, "Trying to read an uninitialized tensor but " "element_shape is not fully defined"): @@ -676,7 +676,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): error_type = errors.InvalidArgumentError else: error_type = ValueError - with self.assertRaisesRegex(error_type, r"shapes"): + with self.assertRaisesRegexp(error_type, r"shapes"): e0 = gen_list_ops.tensor_list_get_item( l, 0, element_dtype=dtypes.float32, element_shape=[1, 3]) self.evaluate(e0) @@ -699,7 +699,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testSetOnEmptyListWithMaxNumElementsFails(self): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, element_shape=[], max_num_elements=3) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, "Trying to modify element 0 in a list with 0 elements."): l = list_ops.tensor_list_set_item(l, 0, 1.) @@ -882,8 +882,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): with ops.device("/job:ps"): l_ps = array_ops.identity(l) l_ps = list_ops.tensor_list_push_back(l_ps, 2.) - with self.assertRaisesRegex(errors.InvalidArgumentError, - "Tried to push item into a full list"): + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "Tried to push item into a full list"): with ops.device("/job:worker"): l_worker = array_ops.identity(l_ps) l_worker = list_ops.tensor_list_push_back(l_worker, 3.0) @@ -943,8 +943,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): # at graph building time. l = list_ops.tensor_list_set_item(l, 0, ph) l_0 = list_ops.tensor_list_get_item(l, 0, element_dtype=dtypes.float32) - with self.assertRaisesRegex(errors.InvalidArgumentError, - "incompatible shape"): + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "incompatible shape"): sess.run(l_0, {ph: [3.0]}) def testResourceVariableScatterGather(self): @@ -1021,7 +1021,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): "element shapes are not identical at index 0") else: expected_error = (ValueError, "Shapes must be equal rank") - with self.assertRaisesRegex(*expected_error): + with self.assertRaisesRegexp(*expected_error): l_batch_of_vec_tls = array_ops.stack( [list_ops.tensor_list_from_tensor([[1.0]], element_shape=[1])] * 2) self.evaluate( @@ -1033,7 +1033,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): r"input_b\[0\].dtype != element_dtype.") else: expected_error = (ValueError, "input_b.type != element_dtype") - with self.assertRaisesRegex(*expected_error): + with self.assertRaisesRegexp(*expected_error): l_batch_of_int_tls = array_ops.stack( [list_ops.tensor_list_from_tensor([1], element_shape=[])] * 2) self.evaluate( @@ -1073,8 +1073,8 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): with self.assertRaises((errors.InvalidArgumentError, ValueError)): self.evaluate(list_ops.tensor_list_push_back_batch(l_batch, [])) - with self.assertRaisesRegex(errors.InvalidArgumentError, - "incompatible shape to a list at index 0"): + with self.assertRaisesRegexp(errors.InvalidArgumentError, + "incompatible shape to a list at index 0"): self.evaluate( list_ops.tensor_list_push_back_batch(l_batch, [[3.0], [4.0]])) @@ -1082,7 +1082,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): expected_error = (errors.InvalidArgumentError, "Invalid data type") else: expected_error = (ValueError, "wrong element dtype") - with self.assertRaisesRegex(*expected_error): + with self.assertRaisesRegexp(*expected_error): self.evaluate(list_ops.tensor_list_push_back_batch(l_batch, [3, 4])) def testZerosLike(self): @@ -1246,7 +1246,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): element_shape=[], element_dtype=dtypes.float32, num_elements=2) l2 = list_ops.tensor_list_reserve( element_shape=[], element_dtype=dtypes.float32, num_elements=3) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, "Trying to add two lists of tensors with different lengths"): l = math_ops.add_n([l1, l2]) @@ -1268,7 +1268,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): element_dtype=dtypes.float32, num_elements=3) l = math_ops.add_n([l1, l2]) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, "Trying to add two lists of tensors with incompatible element shapes" ): @@ -1314,7 +1314,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): element_dtype=dtypes.float32, element_shape=None) l = list_ops.tensor_list_push_back(l, [[0., 1.]]) l = list_ops.tensor_list_push_back(l, [[2.], [4.]]) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, r"Incompatible shapes during merge: " r"\[2\] vs. \[1\]"): t = list_ops.tensor_list_concat(l, element_dtype=dtypes.float32) @@ -1333,7 +1333,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testConcatEmptyListWithUnknownElementShapeFails(self): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, element_shape=None) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, "All except the first dimension must be fully" " defined when concating an empty tensor list"): @@ -1343,7 +1343,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testConcatEmptyListWithPartiallyDefinedElementShapeFails(self): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, element_shape=[2, None]) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, "All except the first dimension must be fully" " defined when concating an empty tensor list"): @@ -1354,7 +1354,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, element_shape=tensor_shape.TensorShape([])) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, "Concat requires elements to be at least vectors, " "found scalars instead"): @@ -1365,14 +1365,14 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): l = list_ops.empty_tensor_list( element_dtype=dtypes.float32, element_shape=None) l1 = list_ops.tensor_list_push_back(l, 1.) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, "Concat saw a scalar shape at index 0" " but requires at least vectors"): t = list_ops.tensor_list_concat(l1, element_dtype=dtypes.float32) self.evaluate(t) l1 = list_ops.tensor_list_push_back(l, [1.]) l1 = list_ops.tensor_list_push_back(l1, 2.) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, "Concat saw a scalar shape at index 1" " but requires at least vectors"): t = list_ops.tensor_list_concat(l1, element_dtype=dtypes.float32) @@ -1420,7 +1420,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testConcatWithUninitializedTensorsFailsIfNoElementShape(self): l = list_ops.tensor_list_reserve( element_dtype=dtypes.float32, element_shape=None, num_elements=3) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, r"Trying to concat list with only uninitialized tensors " r"but element_shape_except_first_dim_ is not fully defined"): @@ -1430,7 +1430,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testConcatWithUninitializedTensorsFailsIfNoInputLengths(self): l = list_ops.tensor_list_reserve( element_dtype=dtypes.float32, element_shape=[None, 3], num_elements=3) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, r"List contains uninitialized tensor at index 0" r" but leading_dims has only 0 elements."): @@ -1467,7 +1467,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): with self.cached_session(): tensor = array_ops.placeholder(dtype=dtypes.float32) l = list_ops.tensor_list_split(tensor, element_shape=None, lengths=[1]) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, r"Tensor must be at least a vector, but saw shape: \[\]"): l.eval({tensor: 1}) @@ -1479,24 +1479,24 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): l = list_ops.tensor_list_split([1., 2.], element_shape=None, lengths=lengths) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, r"Expected lengths to be a vector, received shape: \[\]"): l.eval({lengths: 1}) def testSplitWithInvalidLengthsFails(self): - with self.assertRaisesRegex(errors.InvalidArgumentError, - r"Invalid value in lengths: -1"): + with self.assertRaisesRegexp(errors.InvalidArgumentError, + r"Invalid value in lengths: -1"): l = list_ops.tensor_list_split([1., 2.], element_shape=None, lengths=[1, -1]) self.evaluate(l) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, r"Attempting to slice \[0, 3\] from tensor with length 2"): l = list_ops.tensor_list_split([1., 2.], element_shape=None, lengths=[3]) self.evaluate(l) - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, r"Unused values in tensor. Length of tensor: 2 Values used: 1"): l = list_ops.tensor_list_split([1., 2.], element_shape=None, lengths=[1]) @@ -1504,11 +1504,11 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): @test_util.run_deprecated_v1 def testSkipEagerSplitWithScalarElementShapeFails(self): - with self.assertRaisesRegex(ValueError, - r"Shapes must be equal rank, but are 1 and 0"): + with self.assertRaisesRegexp(ValueError, + r"Shapes must be equal rank, but are 1 and 0"): l = list_ops.tensor_list_split([1., 2.], element_shape=[], lengths=[1, 1]) with self.cached_session(): - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, r"TensorListSplit requires element_shape to be at least of rank 1, " r"but saw: \[\]"): @@ -1520,7 +1520,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testEagerOnlySplitWithScalarElementShapeFails(self): if context.executing_eagerly(): - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, r"TensorListSplit requires element_shape to be at least of rank 1, " r"but saw: \[\]"): @@ -1528,14 +1528,14 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): @test_util.run_deprecated_v1 def testSkipEagerSplitWithIncompatibleTensorShapeAndElementShapeFails(self): - with self.assertRaisesRegex(ValueError, - r"Shapes must be equal rank, but are 2 and 1"): + with self.assertRaisesRegexp(ValueError, + r"Shapes must be equal rank, but are 2 and 1"): l = list_ops.tensor_list_split([[1.], [2.]], element_shape=[1], lengths=[1, 1]) with self.cached_session(): - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, r"tensor shape \[2,1\] is not compatible with element_shape \[1\]"): element_shape = array_ops.placeholder(dtype=dtypes.int32) @@ -1546,7 +1546,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def testEagerOnlySplitWithIncompatibleTensorShapeAndElementShapeFails(self): if context.executing_eagerly(): - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, r"tensor shape \[2,1\] is not compatible with element_shape \[1\]"): list_ops.tensor_list_split([[1.], [2.]], @@ -1576,7 +1576,7 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): [1., 2.]) def testResizeWithInvalidSizeFails(self): - with self.assertRaisesRegex( + with self.assertRaisesRegexp( errors.InvalidArgumentError, "TensorListSlice expects size to be non-negative"): l = list_ops.tensor_list_from_tensor([1., 2., 3.], element_shape=[]) From c4f258a8e111d2a039c644174fb16d4cf3f4c7d1 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Wed, 15 Jul 2020 18:21:50 +0700 Subject: [PATCH 0490/2522] Add get executor --- .../filesystem/plugins/s3/s3_filesystem.cc | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index ac873f5e9bf..414d5de80d4 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -29,6 +29,9 @@ limitations under the License. constexpr char kS3ClientAllocationTag[] = "S3ClientAllocation"; constexpr int64_t kS3TimeoutMsec = 300000; // 5 min +constexpr char kExecutorTag[] = "TransferManagerExecutorAllocation"; +constexpr int kExecutorPoolSize = 25; + static void* plugin_memory_allocate(size_t size) { return calloc(1, size); } static void plugin_memory_free(void* ptr) { free(ptr); } @@ -167,6 +170,18 @@ static void GetS3Client(TF_Filesystem* filesystem) { } } +static void GetExecutor(TF_Filesystem* filesystem) { + auto s3_file = + static_cast(filesystem->plugin_filesystem); + absl::MutexLock l(&s3_file->initialization_lock); + + if (s3_file->executor.get() == nullptr) { + s3_file->executor = + Aws::MakeShared( + kExecutorTag, kExecutorPoolSize); + } +} + static void ShutdownClient(Aws::S3::S3Client* s3_client) { if (s3_client != nullptr) { delete s3_client; From 77787199e48764a439558e9d0b7368d96e730c2e Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Wed, 15 Jul 2020 18:58:06 +0300 Subject: [PATCH 0491/2522] Fix indent in efficientnet.py --- tensorflow/python/keras/applications/efficientnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/applications/efficientnet.py b/tensorflow/python/keras/applications/efficientnet.py index 34c4cfa9992..bc31dbf5a13 100644 --- a/tensorflow/python/keras/applications/efficientnet.py +++ b/tensorflow/python/keras/applications/efficientnet.py @@ -16,7 +16,7 @@ # pylint: disable=missing-docstring """EfficientNet models for Keras. - Reference: +Reference: - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]( https://arxiv.org/abs/1905.11946) (ICML 2019) """ From 5f1b79f00f04cf26292a5a2d75eb8360ecaee090 Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Wed, 15 Jul 2020 08:51:15 -0700 Subject: [PATCH 0492/2522] [TF:TRT] Avoid null pointer accesses in TRTOptimizationPass::PrintDebugInfo. Cluster::GetDeviceSet may return a null pointer. Skip printing the DeviceSet when the routine returns nullptr. Move the printing of DeviceSet to closer to the other information for the cluster and before the information for the grappler item. PiperOrigin-RevId: 321368504 Change-Id: Ic59c9f79fe759a40558fbf2377818e3d8999d752 --- .../tf2tensorrt/convert/trt_optimization_pass.cc | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc b/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc index 1cf98d135cb..4d6f8fa1b31 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc @@ -86,6 +86,7 @@ void TRTOptimizationPass::PrintDebugInfo(grappler::Cluster* cluster, string offset2 = StrCat(offset, offset); string offset3 = StrCat(offset2, offset); string offset4 = StrCat(offset2, offset2); + if (cluster) { LOG(INFO) << offset << "type = " << cluster->type(); LOG(INFO) << offset << "num warmup steps = " << cluster->NumWarmupSteps(); @@ -132,7 +133,15 @@ void TRTOptimizationPass::PrintDebugInfo(grappler::Cluster* cluster, } } } + + if (cluster->GetDeviceSet()) { + for (const auto dev : cluster->GetDeviceSet()->devices()) { + LOG(INFO) << "Device name= " << dev->name() << "Pased name= " + << DeviceNameUtils::ParsedNameToString(dev->parsed_name()); + } + } } + LOG(INFO) << "item: " << item.id; if (!item.feed.empty()) { LOG(INFO) << offset << "Feeds :"; @@ -171,13 +180,6 @@ void TRTOptimizationPass::PrintDebugInfo(grappler::Cluster* cluster, } else { LOG(INFO) << offset << "No keep ops"; } - for (const auto dev : cluster->GetDeviceSet()->devices()) { - const auto& pname = dev->parsed_name(); - LOG(INFO) << "Device name= " << dev->name() - << " parsedname job= " << pname.job << " id= " << pname.id - << " has_id: " << pname.has_id << " has_job: " << pname.has_job - << "has_type: " << pname.has_type << " type =" << pname.type; - } } Status TRTOptimizationPass::Optimize(grappler::Cluster* cluster, From 49369a6652621d797643a2fbd440b812ced620ef Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Wed, 15 Jul 2020 08:52:47 -0700 Subject: [PATCH 0493/2522] Internal cleanup: consolidate and simplify tests. PiperOrigin-RevId: 321368769 Change-Id: I857172b5df41fa1e81bc1e1739c715c071b1b85e --- tensorflow/python/autograph/impl/BUILD | 20 +-- .../python/autograph/impl/api_py3_test.py | 84 ------------- tensorflow/python/autograph/impl/api_test.py | 115 ++++++++++++------ .../windows/cpu/pip/build_tf_windows.sh | 1 + 4 files changed, 81 insertions(+), 139 deletions(-) delete mode 100644 tensorflow/python/autograph/impl/api_py3_test.py diff --git a/tensorflow/python/autograph/impl/BUILD b/tensorflow/python/autograph/impl/BUILD index a21b0df2ce8..866314f5b76 100644 --- a/tensorflow/python/autograph/impl/BUILD +++ b/tensorflow/python/autograph/impl/BUILD @@ -40,6 +40,8 @@ py_library( tf_py_test( name = "api_test", srcs = ["api_test.py"], + python_version = "PY3", + srcs_version = "PY3", deps = [ ":impl", "//tensorflow/python:client_testlib", @@ -49,24 +51,6 @@ tf_py_test( ], ) -py_test( - name = "api_py3_test", - srcs = ["api_py3_test.py"], - python_version = "PY3", - srcs_version = "PY3", - tags = [ - "no_oss_py2", - "no_pip", - "nopip", - ], - deps = [ - ":impl", - "//tensorflow/python:client_testlib", - "//tensorflow/python:constant_op", - "//tensorflow/python/autograph/core", - ], -) - tf_py_test( name = "conversion_test", srcs = ["conversion_test.py"], diff --git a/tensorflow/python/autograph/impl/api_py3_test.py b/tensorflow/python/autograph/impl/api_py3_test.py deleted file mode 100644 index c460e478008..00000000000 --- a/tensorflow/python/autograph/impl/api_py3_test.py +++ /dev/null @@ -1,84 +0,0 @@ -# python3 -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for api module.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import os - -from tensorflow.python.autograph.core import converter -from tensorflow.python.autograph.impl import api -from tensorflow.python.framework import constant_op -from tensorflow.python.platform import test - -DEFAULT_RECURSIVE = converter.ConversionOptions(recursive=True) - - -class ApiTest(test.TestCase): - - def test_converted_call_kwonly_args(self): - - def test_fn(*, a): - return a - - x = api.converted_call( - test_fn, (), {'a': constant_op.constant(-1)}, options=DEFAULT_RECURSIVE) - self.assertEqual(-1, self.evaluate(x)) - - def test_super_with_no_arg(self): - test_case_self = self - - class TestBase: - - def plus_three(self, x): - return x + 3 - - class TestSubclass(TestBase): - - def plus_three(self, x): - test_case_self.fail('This should never be called.') - - def no_arg(self, x): - return super().plus_three(x) - - tc = api.converted_call(TestSubclass, (), None, options=DEFAULT_RECURSIVE) - - self.assertEqual(5, tc.no_arg(2)) - - def test_converted_call_avoids_triggering_operators(self): - - test_self = self - - class Pair(collections.namedtuple('Pair', ['a', 'b'])): - - def __call__(self): - return self.a + self.b - - def __eq__(self, other): - test_self.fail('Triggered operator') - - p = Pair(constant_op.constant(1), constant_op.constant(2)) - - x = api.converted_call(p, (), {}, options=DEFAULT_RECURSIVE) - self.assertIsNotNone(self.evaluate(x), 3) - - -if __name__ == '__main__': - os.environ['AUTOGRAPH_STRICT_CONVERSION'] = '1' - test.main() diff --git a/tensorflow/python/autograph/impl/api_test.py b/tensorflow/python/autograph/impl/api_test.py index 5b885af43ac..ad7e8e9fb37 100644 --- a/tensorflow/python/autograph/impl/api_test.py +++ b/tensorflow/python/autograph/impl/api_test.py @@ -33,7 +33,6 @@ import types import numpy as np import six -from tensorflow.python.autograph import utils from tensorflow.python.autograph.core import ag_ctx from tensorflow.python.autograph.core import converter from tensorflow.python.autograph.core import converter_testing @@ -47,15 +46,15 @@ from tensorflow.python.data.ops import dataset_ops from tensorflow.python.eager import def_function from tensorflow.python.eager import function from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops from tensorflow.python.framework import test_util -from tensorflow.python.ops import gen_math_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.util import function_utils from tensorflow.python.util import tf_decorator from tensorflow.python.util import tf_inspect -tf = utils.fake_tf() global_n = 2 @@ -90,6 +89,52 @@ class ApiTest(test.TestCase): self.assertEmpty( tuple(o for o in objects_after if isinstance(o, TestResource))) + def test_converted_call_kwonly_args(self): + + def test_fn(*, a): + return a + + x = api.converted_call( + test_fn, (), {'a': constant_op.constant(-1)}, options=DEFAULT_RECURSIVE) + self.assertEqual(-1, self.evaluate(x)) + + def test_super_with_no_arg(self): + test_case_self = self + + class TestBase: + + def plus_three(self, x): + return x + 3 + + class TestSubclass(TestBase): + + def plus_three(self, x): + test_case_self.fail('This should never be called.') + + def no_arg(self, x): + return super().plus_three(x) + + tc = api.converted_call(TestSubclass, (), None, options=DEFAULT_RECURSIVE) + + self.assertEqual(5, tc.no_arg(2)) + + def test_converted_call_avoids_triggering_operators(self): + + test_self = self + + class Pair(collections.namedtuple('Pair', ['a', 'b'])): + + def __call__(self): + return self.a + self.b + + def __eq__(self, other): + test_self.fail('Triggered operator') + + p = Pair(constant_op.constant(1), constant_op.constant(2)) + + x = api.converted_call(p, (), {}, options=DEFAULT_RECURSIVE) + self.assertIsNotNone(self.evaluate(x), 3) + @test_util.run_deprecated_v1 def test_decorator_recursive(self): @@ -102,16 +147,15 @@ class ApiTest(test.TestCase): @api.convert(recursive=True) def test_method(self, x, s, a): - while tf.reduce_sum(x) > s: + while math_ops.reduce_sum(x) > s: x //= self.called_member(a) return x tc = TestClass() - with self.cached_session() as sess: - x = tc.test_method( - constant_op.constant([2, 4]), constant_op.constant(1), - constant_op.constant(-2)) - self.assertListEqual([0, 1], self.evaluate(x).tolist()) + x = tc.test_method( + constant_op.constant([2, 4]), constant_op.constant(1), + constant_op.constant(-2)) + self.assertListEqual([0, 1], self.evaluate(x).tolist()) @test_util.run_deprecated_v1 def test_decorator_not_recursive(self): @@ -119,20 +163,19 @@ class ApiTest(test.TestCase): class TestClass(object): def called_member(self, a): - return tf.negative(a) + return math_ops.negative(a) @api.convert(recursive=False) def test_method(self, x, s, a): - while tf.reduce_sum(x) > s: + while math_ops.reduce_sum(x) > s: x //= self.called_member(a) return x tc = TestClass() - with self.cached_session() as sess: - x = tc.test_method( - constant_op.constant([2, 4]), constant_op.constant(1), - constant_op.constant(-2)) - self.assertListEqual([0, 1], self.evaluate(x).tolist()) + x = tc.test_method( + constant_op.constant([2, 4]), constant_op.constant(1), + constant_op.constant(-2)) + self.assertListEqual([0, 1], self.evaluate(x).tolist()) @test_util.run_deprecated_v1 def test_convert_then_do_not_convert(self): @@ -141,11 +184,11 @@ class ApiTest(test.TestCase): @api.do_not_convert def called_member(self, a): - return tf.negative(a) + return math_ops.negative(a) @api.convert(recursive=True) def test_method(self, x, s, a): - while tf.reduce_sum(x) > s: + while math_ops.reduce_sum(x) > s: x //= self.called_member(a) return x @@ -168,16 +211,15 @@ class ApiTest(test.TestCase): @api.convert(recursive=True) def test_method(self, x, s, a): - while tf.reduce_sum(x) > s: + while math_ops.reduce_sum(x) > s: x //= self.called_member(a) return x tc = TestClass() - with self.cached_session() as sess: - x = tc.test_method( - constant_op.constant([2, 4]), constant_op.constant(1), - constant_op.constant(-2)) - self.assertListEqual([0, 1], self.evaluate(x).tolist()) + x = tc.test_method( + constant_op.constant([2, 4]), constant_op.constant(1), + constant_op.constant(-2)) + self.assertListEqual([0, 1], self.evaluate(x).tolist()) def test_decorator_preserves_argspec(self): @@ -234,7 +276,7 @@ class ApiTest(test.TestCase): @api.convert(recursive=True) def test_method(self, x, s, a): - while tf.reduce_sum(x) > s: + while math_ops.reduce_sum(x) > s: x //= api.converted_call( self.called_member, (a,), None, options=DEFAULT_RECURSIVE) return x @@ -644,7 +686,7 @@ class ApiTest(test.TestCase): opts = converter.ConversionOptions( user_requested=True, optional_features=None) - x = api.converted_call(gen_math_ops.add, (1, 1), None, options=opts) + x = api.converted_call(math_ops.add, (1, 1), None, options=opts) self.assertAllEqual(self.evaluate(x), 2) @@ -685,7 +727,7 @@ class ApiTest(test.TestCase): class TestClass(collections.namedtuple('TestNamedtuple', ('a', 'b'))): def test_method(self, x): - while tf.reduce_sum(x) > self.a: + while math_ops.reduce_sum(x) > self.a: x //= self.b return x @@ -713,7 +755,7 @@ class ApiTest(test.TestCase): class TestClass(collections.namedtuple('TestNamedtuple', ('a', 'b'))): def test_method(self, x): - while tf.reduce_sum(x) > self.a: + while math_ops.reduce_sum(x) > self.a: x //= self.b return x @@ -786,7 +828,7 @@ class ApiTest(test.TestCase): def f(): return dataset_ops.Dataset.range(-3, 3).map(other_fn) - # Dataset iteration only works inside tf. + # Dataset iteration only works inside math_ops. @def_function.function def graph_fn(): ds = api.converted_call(f, (), None, options=DEFAULT_RECURSIVE) @@ -912,13 +954,13 @@ class ApiTest(test.TestCase): def test_to_graph_basic(self): def test_fn(x, s): - while tf.reduce_sum(x) > s: + while math_ops.reduce_sum(x) > s: x //= 2 return x compiled_fn = api.to_graph(test_fn) - with tf.Graph().as_default(): + with ops.Graph().as_default(): x = compiled_fn(constant_op.constant((4, 8)), 4) self.assertAllEqual(self.evaluate(x), (1, 2)) @@ -928,15 +970,14 @@ class ApiTest(test.TestCase): foo = 4 def test_fn(x, s=foo): - while tf.reduce_sum(x) > s: + while math_ops.reduce_sum(x) > s: x //= 2 return x compiled_fn = api.to_graph(test_fn) - with self.cached_session() as sess: - x = compiled_fn(constant_op.constant([4, 8])) - self.assertListEqual([1, 2], self.evaluate(x).tolist()) + x = compiled_fn(constant_op.constant([4, 8])) + self.assertListEqual([1, 2], self.evaluate(x).tolist()) def test_to_graph_with_globals(self): @@ -1056,7 +1097,7 @@ class ApiTest(test.TestCase): def test_to_code_basic(self): def test_fn(x, s): - while tf.reduce_sum(x) > s: + while math_ops.reduce_sum(x) > s: x /= 2 return x @@ -1067,7 +1108,7 @@ class ApiTest(test.TestCase): @def_function.function def test_fn(x, s): - while tf.reduce_sum(x) > s: + while math_ops.reduce_sum(x) > s: x /= 2 return x diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh index 0152e9decc7..4e5d769cf6b 100644 --- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh @@ -24,6 +24,7 @@ # - Msys2 # - Anaconda3 # * Bazel windows executable copied as "bazel.exe" and included in PATH. +# change # All commands shall pass, and all should be visible. set -x From 5c21f176b94e05da7ca04c76aafb0dd1185f5e0d Mon Sep 17 00:00:00 2001 From: Dmitry Volodin Date: Wed, 15 Jul 2020 19:09:56 +0300 Subject: [PATCH 0494/2522] Fix indent in densenet.py --- tensorflow/python/keras/applications/densenet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/applications/densenet.py b/tensorflow/python/keras/applications/densenet.py index eae03305d37..1302598f8e9 100644 --- a/tensorflow/python/keras/applications/densenet.py +++ b/tensorflow/python/keras/applications/densenet.py @@ -15,7 +15,7 @@ # pylint: disable=invalid-name """DenseNet models for Keras. - Reference: +Reference: - [Densely Connected Convolutional Networks]( https://arxiv.org/abs/1608.06993) (CVPR 2017) """ From dada5c989e865898f3aa968c5715eda335caca40 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Wed, 15 Jul 2020 09:02:28 -0700 Subject: [PATCH 0495/2522] [MLIR] Extend LowerStaticTensorListPass to handle WhileRegion PiperOrigin-RevId: 321370545 Change-Id: Ieb8de21a584df9770d8806fccfa811c85c2d76ee --- .../lite/tests/lower-static-tensor-list.mlir | 39 ++++++ .../transforms/lower_static_tensor_list.cc | 117 +++++++++++------- 2 files changed, 113 insertions(+), 43 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/tests/lower-static-tensor-list.mlir b/tensorflow/compiler/mlir/lite/tests/lower-static-tensor-list.mlir index 1a61bc3f517..1ebe912284b 100644 --- a/tensorflow/compiler/mlir/lite/tests/lower-static-tensor-list.mlir +++ b/tensorflow/compiler/mlir/lite/tests/lower-static-tensor-list.mlir @@ -277,6 +277,45 @@ func @tensorlistWhileCond(%arg0: tensor, %arg1: tensor) -> ten // CHECK: return %[[RESULT]] : tensor } +// CHECK-LABEL: func @tensorlistWhileRegion +func @tensorlistWhileRegion(%arg0: tensor<2x3xf32>) -> tensor<*xf32> { + %cst = constant dense<3> : tensor<1xi32> + %cst_0 = constant dense<0> : tensor + %cst_1 = constant dense<-1> : tensor + %0 = "tf.TensorListFromTensor"(%arg0, %cst) : (tensor<2x3xf32>, tensor<1xi32>) -> tensor>> + // CHECK: "tf.WhileRegion" + %1:2 = "tf.WhileRegion"(%cst_0, %0) ({ + ^bb0(%carg0: tensor, %carg1: tensor): + %cst_2 = constant dense<2> : tensor + %1 = "tf.Less"(%carg0, %cst_2) : (tensor, tensor) -> tensor + "tf.Yield"(%1) : (tensor) -> () + + // verify condition types + // CHECK: ^bb0(%[[CARG0:.*]]: tensor, %[[CARG1:.*]]: tensor<*xf32>): + // CHECK: %[[COND:.*]] = "tf.Less"(%[[CARG0]], {{.*}}) : (tensor, tensor) -> tensor + // CHECK: "tf.Yield"(%[[COND]]) : (tensor) -> () + + }, + { + ^bb0(%barg0: tensor, %barg1: tensor): + %1 = "tf.TensorListLength"(%barg1) : (tensor) -> tensor + "tf.Yield"(%1, %barg1) : (tensor, tensor) -> () + + // verify body types + // CHECK: ^bb0(%[[BARG0:.*]]: tensor, %[[BARG1:.*]]: tensor<*xf32>): + // CHECK-NOT: tensor + // CHECK: %[[LEN:.*]] = "tf.Gather" + // CHECK-NOT: tensor + // CHECK: "tf.Yield"(%[[LEN]], %[[BARG1]]) : (tensor, tensor<*xf32>) -> () + + }) {is_stateless = false} : (tensor, tensor>>) -> (tensor, tensor>>) + // make sure the variant types in input/output have been updated + // CHECK: {is_stateless = false} : (tensor, tensor<2x3xf32>) -> (tensor, tensor<*xf32>) + %2 = "tf.TensorListStack"(%1#1, %cst_1) : (tensor>>, tensor) -> tensor<*xf32> + // CHECK: return %0#1 : tensor<*xf32> + return %2 : tensor<*xf32> +} + func @tensorlistResize(%arg0: tensor<3x10xf32>, %arg1: tensor<1xi32>, %arg2: tensor) -> tensor { %0 = "tf.TensorListFromTensor"(%arg0, %arg1) : (tensor<3x10xf32>, tensor<1xi32>) -> tensor>> %1 = "tf.TensorListResize"(%0, %arg2) : (tensor>>, tensor) -> tensor>> diff --git a/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc b/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc index c76a6cfafab..439c44dc77e 100644 --- a/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc +++ b/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc @@ -17,7 +17,7 @@ limitations under the License. // converting Tensorlist operations in TensorFlow dialect into operations that // can be legalized to TensorFlow Lite dialect with simple replacements. The // newly created operations are in the TensorFlow dialect if the operation can -// be represented using a TensorFlow op. Otherwise, TensorFlow Lite dialect op +// be represented using a TensorFlow op. Otherwise, TensorFlow Lite dialect op // is used. #include @@ -738,9 +738,17 @@ struct ConvertIdentity : public OpConversionPattern { } }; +// Returns an unranked tensor type with an element of the same type as `value` +// if `type` is a tensor of variant. Otherwise, returns `type` unmodified. +Type VariantToUnrankedTensorType(Type type, Value value) { + if (getElementTypeOrSelf(type).isa()) + return UnrankedTensorType::get(getElementTypeOrSelf(value.getType())); + return type; +} + // Changes the function type of `cond_func` and `body_func` for the given While // op. -static LogicalResult UpdateFunctionTypes(TF::WhileOp op) { +LogicalResult UpdateFunctionTypes(TF::WhileOp op) { auto module = op.getParentOfType(); auto *context = module.getContext(); @@ -756,30 +764,18 @@ static LogicalResult UpdateFunctionTypes(TF::WhileOp op) { // tensor type if it's a variant type. SmallVector updated_argument_types; updated_argument_types.reserve(num_inputs); - for (int i = 0; i < num_inputs; ++i) { - Type arg_type = func_type.getInput(i); - if (getElementTypeOrSelf(arg_type).isa()) { - arg_type = UnrankedTensorType::get( - getElementTypeOrSelf(op.getOperand(i).getType())); - } - updated_argument_types.push_back(arg_type); - } + for (auto it : llvm::zip(func_type.getInputs(), op.getOperands())) + updated_argument_types.push_back( + VariantToUnrankedTensorType(std::get<0>(it), std::get<1>(it))); - // For each result type in function's results, change it to unranked tensor - // type if it's a variant type. + // Change all DT_VARIANT result types in function results to unranked tensor + // type with element type derived from the corresponding input operand. This + // is correct because while body's inputs and results have the same type. SmallVector updated_result_types; updated_result_types.reserve(num_results); - for (int i = 0; i < num_results; ++i) { - Type result_type = func_type.getResult(i); - if (getElementTypeOrSelf(result_type).isa()) { - // Here update the variant type with the unranked tensor type derived - // from the corresponding input operand. This is correct because while - // body's inputs and results have the same type. - result_type = UnrankedTensorType::get( - getElementTypeOrSelf(op.getOperand(i).getType())); - } - updated_result_types.push_back(result_type); - } + for (auto it : llvm::zip(func_type.getResults(), op.getOperands())) + updated_result_types.push_back( + VariantToUnrankedTensorType(std::get<0>(it), std::get<1>(it))); // Change `func`'s argument type to `unranked_argument_types`. If it // return types contain a `DT_VARIANT`, change it to the unranked type @@ -788,10 +784,9 @@ static LogicalResult UpdateFunctionTypes(TF::WhileOp op) { context)); // Change the argument type for the first block. - Block &body_first_bb = func.front(); - for (int i = 0; i < body_first_bb.getNumArguments(); ++i) { - body_first_bb.getArgument(i).setType(updated_argument_types[i]); - } + llvm::for_each(func.getArguments(), [&](BlockArgument &arg) { + arg.setType(updated_argument_types[arg.getArgNumber()]); + }); } return success(); } @@ -804,25 +799,60 @@ struct ConvertWhile : public OpConversionPattern { ConversionPatternRewriter &rewriter) const override { llvm::SmallVector result_types; result_types.reserve(op.getNumOperands()); - for (int i = 0, e = operands.size(); i != e; ++i) { - Type result_ty = op.getResult(i).getType(); + // Change all DT_VARIANT result types to unranked tensor type. + for (auto it : llvm::zip(op.getResultTypes(), operands)) + result_types.push_back( + VariantToUnrankedTensorType(std::get<0>(it), std::get<1>(it))); - // If we notice the result type is a DT_VARIANT, we change the - // corresponding result type to unranked tensor type. - if (getElementTypeOrSelf(result_ty).isa()) { - Type element_ty = getElementTypeOrSelf(operands[i].getType()); - result_ty = UnrankedTensorType::get(element_ty); + // Create a new while op with new operands and updated result types. + auto converted = rewriter.create(op.getLoc(), result_types, + operands, op.getAttrs()); + converted.removeAttr("T"); + UpdateFunctionTypes(converted); + + rewriter.replaceOp(op, converted.getResults()); + return success(); + } +}; + +struct ConvertWhileRegion : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult matchAndRewrite( + TF::WhileRegionOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + llvm::SmallVector result_types; + result_types.reserve(op.getNumOperands()); + // Change all DT_VARIANT result types to unranked tensor type. + for (auto it : llvm::zip(op.getResultTypes(), operands)) + result_types.push_back( + VariantToUnrankedTensorType(std::get<0>(it), std::get<1>(it))); + + // Create a new while op with new operands and updated result types. + auto converted = rewriter.create( + op.getLoc(), result_types, operands, op.getAttrs()); + + // Inline the regions from the old while into the new one, and apply + // signature conversion to inlined region. + for (auto it : llvm::zip(op.getRegions(), converted.getRegions())) { + Region &old_region = *std::get<0>(it); + Region &new_region = *std::get<1>(it); + + Block &entry = old_region.front(); + // Build signature conversion for the region. + TypeConverter::SignatureConversion signature_conversion(operands.size()); + for (auto it : llvm::zip(entry.getArguments(), operands)) { + BlockArgument arg = std::get<0>(it); + signature_conversion.addInputs( + arg.getArgNumber(), + VariantToUnrankedTensorType(arg.getType(), std::get<1>(it))); } - result_types.push_back(result_ty); + + rewriter.inlineRegionBefore(old_region, new_region, new_region.end()); + rewriter.applySignatureConversion(&new_region, signature_conversion); } - // Clone original while op with new operands and updated result types. - auto cloned = rewriter.create(op.getLoc(), result_types, - operands, op.getAttrs()); - cloned.removeAttr("T"); - UpdateFunctionTypes(cloned); - - rewriter.replaceOp(op, cloned.getResults()); + rewriter.replaceOp(op, converted.getResults()); return success(); } }; @@ -871,7 +901,8 @@ LogicalResult LowerStaticTensorListPass::RewriteFunction( ConvertTensorListGetItem, ConvertTensorListLength, ConvertTensorListPushBack, ConvertTensorListReserve, ConvertTensorListSetItem, ConvertTensorListStack, - ConvertTensorListResize, ConvertWhile>(context); + ConvertTensorListResize, ConvertWhile, ConvertWhileRegion>( + context); return applyPartialConversion(func, target, patterns); } From de473e40fc1388b367fb7a86db7cd13a5b536331 Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Wed, 15 Jul 2020 09:02:53 -0700 Subject: [PATCH 0496/2522] Expand inference pass to refine types even where the shape can't Previously the shape inference pass would not attempt to refine the type unless their is a refinement of the shape. This is insufficient for shaped types where the element type has a shape. Having resource subtypes propagated is necessary for certain op decompositions, like tf.VariableShape. This will also allow more compile time information about resources for resources passed across functions. PiperOrigin-RevId: 321370649 Change-Id: Iaacce31605f04b556a3c1b3cc050fa52670c9d66 --- .../tensorflow/tests/shape_inference.mlir | 12 +++++++ .../tensorflow/transforms/shape_inference.cc | 33 +++++++++++++------ 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir b/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir index 4193edf8cc6..7d2f630869a 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir @@ -499,4 +499,16 @@ func @multiple_blocks_one_return(%arg0: tensor) -> tensor<*xf32> { %outputs_2 = "tf.TensorSliceDataset"(%outputs_0) {device = "", output_shapes = [#tf.shape<>]} : (tensor<*xf32>) -> tensor return } + + // Test resource result subtypes are propagated to call op results. + // CHECK-LABEL: func @pcall_resource_result + func @pcall_resource_result(%arg0: tensor<*x!tf.resource>>) { + // CHECK: "tf.StatefulPartitionedCall" + // CHECK-SAME: (tensor<*x!tf.resource>>) -> tensor<*x!tf.resource>> + %0 = "tf.StatefulPartitionedCall"(%arg0) {config = "", config_proto = "", executor_type = "", f = @pcall_resource_result_func} : (tensor<*x!tf.resource>>) -> tensor<*x!tf.resource> + return + } + func @pcall_resource_result_func(%arg0: tensor<*x!tf.resource>>) -> tensor<*x!tf.resource>> { + return %arg0 : tensor<*x!tf.resource>> + } } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc index f9c81634ae5..d2e497a1dec 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc @@ -210,6 +210,21 @@ bool CanBeRefined(Type type) { shape_type.getElementType().isa()); } +// Returns whether `original_type` type can be refined with +// `potential_refined_type` type. +bool CanRefineTypeWith(Type original_type, Type potential_refined_type) { + if (!CanBeRefined(original_type)) return false; + + auto shape_type = potential_refined_type.dyn_cast(); + if (!shape_type) return false; + if (shape_type.hasRank()) return true; + + auto element_type_with_subtype = + shape_type.getElementType().dyn_cast(); + return element_type_with_subtype && + !element_type_with_subtype.GetSubtypes().empty(); +} + // Infers the shape from a (Stateful)PartionedCall operation by looking up the // called function and propagating the return type. bool InferShapeForCall(Operation* op) { @@ -224,20 +239,18 @@ bool InferShapeForCall(Operation* op) { // Map each of the results of the call to the returned type of the // function. for (auto result : zip(op->getResults(), func.getType().getResults())) { - if (std::get<0>(result).getType() == std::get<1>(result)) continue; - // Skip already statically shaped results. - if (!CanBeRefined(std::get<0>(result).getType())) continue; - - auto shaped_type = std::get<0>(result).getType().cast(); - auto new_type = std::get<1>(result).dyn_cast(); - if (!new_type) continue; + auto call_op_result = std::get<0>(result); + auto func_result_type = std::get<1>(result); + if (call_op_result.getType() == func_result_type) continue; + if (!CanRefineTypeWith(call_op_result.getType(), func_result_type)) + continue; // Inserts a cast back to the original type if any user is not in the // TF dialect. - AddCastBackForUnsupportedNonTFUses(op, std::get<0>(result), - op->getDialect(), shaped_type); + AddCastBackForUnsupportedNonTFUses(op, call_op_result, op->getDialect(), + call_op_result.getType()); // Finally we inferred the shape and replace the type for this result. - std::get<0>(result).setType(new_type); + call_op_result.setType(func_result_type); changed = true; } return changed; From c9db66374c611722683e27520ed29c66e1540359 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Wed, 15 Jul 2020 09:05:30 -0700 Subject: [PATCH 0497/2522] Increase number of shards to prevent flaky timeouts PiperOrigin-RevId: 321371211 Change-Id: I3508bd5ffc367edec5bad81c19f2ae4219841c47 --- tensorflow/python/kernel_tests/random/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/random/BUILD b/tensorflow/python/kernel_tests/random/BUILD index f7d031154e3..06360fc2095 100644 --- a/tensorflow/python/kernel_tests/random/BUILD +++ b/tensorflow/python/kernel_tests/random/BUILD @@ -119,7 +119,7 @@ cuda_py_test( name = "stateless_random_ops_test", size = "medium", srcs = ["stateless_random_ops_test.py"], - shard_count = 5, + shard_count = 10, tfrt_enabled = True, deps = [ "//tensorflow/python:array_ops", From 5a3e6e1050e1b4398bea98e047ea41a8dd5cb298 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Jul 2020 09:10:27 -0700 Subject: [PATCH 0498/2522] Fix model zoo documentation link PiperOrigin-RevId: 321372183 Change-Id: I30e04212794580a7da716d8b971622b7ef5a62a0 --- .../lite/tools/evaluation/tasks/coco_object_detection/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/tools/evaluation/tasks/coco_object_detection/README.md b/tensorflow/lite/tools/evaluation/tasks/coco_object_detection/README.md index 590c15cc133..faac6f7fedf 100644 --- a/tensorflow/lite/tools/evaluation/tasks/coco_object_detection/README.md +++ b/tensorflow/lite/tools/evaluation/tasks/coco_object_detection/README.md @@ -135,7 +135,7 @@ above, and you we still need an output labels file. To compute mAP in a consistent and interpretable way, we utilize the same 2014 COCO 'minival' dataset that is mentioned in the -[Tensorflow detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md). +[Tensorflow detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf1_detection_zoo.md). The links to download the components of the validation set are: From be262cc42cb45d6f3bd70f0a580bccc1ac5fd18c Mon Sep 17 00:00:00 2001 From: bubblebooy Date: Wed, 15 Jul 2020 11:35:38 -0500 Subject: [PATCH 0499/2522] mask len then should not be greater then inputs len. inputs len is either 2 or 3 --- tensorflow/python/keras/layers/dense_attention.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/layers/dense_attention.py b/tensorflow/python/keras/layers/dense_attention.py index dd639e49b18..d9a087ccb38 100644 --- a/tensorflow/python/keras/layers/dense_attention.py +++ b/tensorflow/python/keras/layers/dense_attention.py @@ -192,7 +192,7 @@ class BaseDenseAttention(Layer): raise ValueError( '{} layer mask must be a list, ' 'namely [query_mask, value_mask].'.format(class_name)) - if len(mask) < 2 or len(mask) > 3: + if len(mask) < 2 or len(mask) > len(inputs): raise ValueError( '{} layer mask must be a list of length 2, namely [query_mask, ' 'value_mask]. Given length: {}'.format(class_name, len(mask))) From b4cb31ff3d2910aa5c9c7fe7825c527fb91ac000 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Wed, 15 Jul 2020 09:34:09 -0700 Subject: [PATCH 0500/2522] Enable previously disabled test which no longer flakes PiperOrigin-RevId: 321376476 Change-Id: I2a02ccd79e7a495004137b080dd7f15ae9032b39 --- tensorflow/python/keras/distribute/BUILD | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/keras/distribute/BUILD b/tensorflow/python/keras/distribute/BUILD index b089b9ad16b..83c25562323 100644 --- a/tensorflow/python/keras/distribute/BUILD +++ b/tensorflow/python/keras/distribute/BUILD @@ -412,7 +412,6 @@ distribute_py_test( tags = [ "multi_and_single_gpu", "no_windows_gpu", - "notap", # TODO(b/161144880): flaky "notsan", ], deps = [ From 6c0f93a2ddc8221b2d6ba87c43804b476a8e1fd2 Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Wed, 15 Jul 2020 10:06:27 -0700 Subject: [PATCH 0501/2522] Export tf.func attributes as AttrValue.NameAttrList. Add support for exporting #tf.func<@name, {}> attributes as AttrValue.NameAttrList when converting TF MLIR to GraphDef. On import #tf.func may be introduced, modeling AttrValue.NameAttrList (func) attributes in TensorFlow. This updates the export path so round-tripping from Graph -> TF MLIR -> Graph will preserve such attributes properly. PiperOrigin-RevId: 321382977 Change-Id: Ica0aa2eede960e76b69074e0a8fc7f9306dc6a0c --- .../tests/mlir2graphdef/func_attr.mlir | 40 +++++++++++++++++++ .../mlir/tensorflow/utils/export_utils.cc | 29 ++++++++++---- 2 files changed, 62 insertions(+), 7 deletions(-) create mode 100644 tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/func_attr.mlir diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/func_attr.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/func_attr.mlir new file mode 100644 index 00000000000..fadb62c44b8 --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/func_attr.mlir @@ -0,0 +1,40 @@ +// RUN: tf-mlir-translate -mlir-to-graphdef %s | tf-mlir-translate -graphdef-to-mlir | tf-mlir-translate -mlir-to-graphdef | FileCheck %s + +// Tests #tf.func attributes are exported as AttrValue.NameAttrList attributes +// with its attr field populated with nested attributes. + +module attributes {tf.versions = {bad_consumers = [], min_consumer = 12 : i32, producer = 458 : i32}} { + func @main() { + tf_executor.graph { + %control = tf_executor.island wraps "tf.NoOp"() {_f = #tf.func<@callee, {attr2 = true, attr3 = 8.0 : f32}>} : () -> () + tf_executor.fetch + } + return + } + func @callee() { + tf_executor.graph { + tf_executor.fetch + } + return + } +} + +// CHECK: op: "NoOp" +// CHECK-NEXT: attr +// CHECK-NEXT: key: "_f" +// CHECK-NEXT: value +// CHECK-NEXT: func +// CHECK-NEXT: name: [[FUNC_NAME:".*"]] +// CHECK-NEXT: attr +// CHECK-NEXT: key: "attr2" +// CHECK-NEXT: value +// CHECK-NEXT: b: true +// CHECK: attr +// CHECK-NEXT: key: "attr3" +// CHECK-NEXT: value +// CHECK-NEXT: f: 8 + +// CHECK: library +// CHECK-NEXT: function +// CHECK-NEXT: signature +// CHECK-NEXT: name: [[FUNC_NAME]] diff --git a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc index 852bc72d7de..7e018966396 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc @@ -121,6 +121,20 @@ Status ConvertAttribute(const mlir::TF::ShapeAttr& attr, AttrValue* value) { return Status::OK(); } +Status ConvertAttribute(const mlir::FlatSymbolRefAttr& attr, AttrValue* value) { + value->mutable_func()->set_name(attr.getValue().str()); + return Status::OK(); +} + +Status ConvertAttribute(const mlir::TF::FuncAttr& attr, AttrValue* value) { + TF_RETURN_IF_ERROR( + ConvertAttribute(attr.GetName().cast(), value)); + TF_RETURN_IF_ERROR(ConvertAttributes(attr.GetAttrs().getValue(), + /*attrs_to_ignore=*/{}, + value->mutable_func()->mutable_attr())); + return Status::OK(); +} + Status ConvertAttribute(const mlir::StringAttr& attr, AttrValue* value) { absl::string_view attr_value(attr.getValue().data(), attr.getValue().size()); switch (mangling_util::GetMangledKind(attr_value)) { @@ -160,11 +174,6 @@ Status ConvertAttribute(const mlir::UnitAttr& attr, AttrValue* value) { return Status::OK(); } -Status ConvertAttribute(const mlir::FlatSymbolRefAttr& attr, AttrValue* value) { - value->mutable_func()->set_name(std::string(attr.getValue())); - return Status::OK(); -} - Status ConvertAttribute(const mlir::ArrayAttr& attr, AttrValue* value) { auto* list = value->mutable_list(); for (mlir::Attribute a : attr.getValue()) { @@ -372,8 +381,8 @@ Status ConvertAttributes( AttrValue value; switch (attr.getKind()) { case mlir::StandardAttributes::SymbolRef: { - auto func_attr = attr.cast(); - value.mutable_func()->set_name(std::string(func_attr.getValue())); + TF_RETURN_IF_ERROR( + ConvertAttribute(attr.cast(), &value)); func_call_attrs[string(name)] = value; continue; } @@ -415,6 +424,12 @@ Status ConvertAttributes( TF_RETURN_IF_ERROR( ConvertAttribute(attr.cast(), &value)); break; + case static_cast(mlir::TF::AttrKind::FUNC): { + TF_RETURN_IF_ERROR( + ConvertAttribute(attr.cast(), &value)); + func_call_attrs[string(name)] = value; + continue; + } // AffineMap kind is not implemented. case mlir::StandardAttributes::AffineMap: return errors::Unimplemented("AffineMap attribute (needed for '", From 791f4f2b773208b21ece71e1d37204a91720346d Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Wed, 15 Jul 2020 10:23:45 -0700 Subject: [PATCH 0502/2522] Bump the references to github.com/google/ruy to commit 388ffd28ba00ffb9aacbe538225165c02ea33ee3. PiperOrigin-RevId: 321386605 Change-Id: Icf91c0c46fa4260c0910e927ade04eb142d1c61a --- .../lite/micro/tools/make/third_party_downloads.inc | 4 ++-- tensorflow/lite/tools/make/Makefile | 4 ++++ tensorflow/lite/tools/make/download_dependencies.sh | 4 ++-- third_party/ruy/workspace.bzl | 8 ++++---- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/tensorflow/lite/micro/tools/make/third_party_downloads.inc b/tensorflow/lite/micro/tools/make/third_party_downloads.inc index 83be806455d..8590ace9fda 100644 --- a/tensorflow/lite/micro/tools/make/third_party_downloads.inc +++ b/tensorflow/lite/micro/tools/make/third_party_downloads.inc @@ -56,8 +56,8 @@ SIFIVE_FE310_LIB_MD5 := "06ee24c4956f8e21670ab3395861fe64" KISSFFT_URL="https://github.com/mborgerding/kissfft/archive/v130.zip" KISSFFT_MD5="438ba1fef5783cc5f5f201395cc477ca" -RUY_URL="https://github.com/google/ruy/archive/388ffd28ba00ffb9aacbe538225165c02ea33ee3.zip" -RUY_MD5="de57b7bfa37b044c7cb2233dc19f64bb" +RUY_URL="https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip" +RUY_MD5="18613212e9c01aba85c7d19010b194a9" CIFAR10_DATASET_URL="https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz" CIFAR10_DATASET_MD5="c32a1d4ab5d03f1284b67883e8d87530" diff --git a/tensorflow/lite/tools/make/Makefile b/tensorflow/lite/tools/make/Makefile index 7d55370818c..f8b67fbbe7d 100644 --- a/tensorflow/lite/tools/make/Makefile +++ b/tensorflow/lite/tools/make/Makefile @@ -221,6 +221,10 @@ else CORE_CC_EXCLUDE_SRCS += tensorflow/lite/minimal_logging_ios.cc endif +# Temporary fix for ruy compilation error. +# TODO(b/158800055): Remove this hack once the ruy version is correctly bumped. +CORE_CC_EXCLUDE_SRCS += tensorflow/lite/tools/make/downloads/ruy/ruy/prepare_packed_matrices.cc + # Filter out all the excluded files. TF_LITE_CC_SRCS := $(filter-out $(CORE_CC_EXCLUDE_SRCS), $(CORE_CC_ALL_SRCS)) diff --git a/tensorflow/lite/tools/make/download_dependencies.sh b/tensorflow/lite/tools/make/download_dependencies.sh index e22b33e1188..0ab8307b07a 100755 --- a/tensorflow/lite/tools/make/download_dependencies.sh +++ b/tensorflow/lite/tools/make/download_dependencies.sh @@ -37,8 +37,8 @@ EIGEN_URL="$(grep -o 'https.*gitlab.com/libeigen/eigen/-/archive/.*tar\.gz' "${B EIGEN_SHA="$(eval echo $(grep '# SHARED_EIGEN_SHA' "${BZL_FILE_PATH}" | grep -o '\".*\"'))" GEMMLOWP_URL="$(grep -o 'https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GEMMLOWP_SHA="$(eval echo $(grep '# SHARED_GEMMLOWP_SHA' "${BZL_FILE_PATH}" | grep -o '\".*\"'))" -RUY_URL="https://github.com/google/ruy/archive/388ffd28ba00ffb9aacbe538225165c02ea33ee3.zip" -RUY_SHA="89b8b56b4e1db894e75a0abed8f69757b37c23dde6e64bfb186656197771138a" +RUY_URL="https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip" +RUY_SHA="8fd4adeeff4f29796bf7cdda64806ec0495a2435361569f02afe3fe33406f07c" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" GOOGLETEST_SHA="58a6f4277ca2bc8565222b3bbd58a177609e9c488e8a72649359ba51450db7d8" ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)" diff --git a/third_party/ruy/workspace.bzl b/third_party/ruy/workspace.bzl index b487125a31d..ee0faec6eff 100644 --- a/third_party/ruy/workspace.bzl +++ b/third_party/ruy/workspace.bzl @@ -5,11 +5,11 @@ load("//third_party:repo.bzl", "third_party_http_archive") def repo(): third_party_http_archive( name = "ruy", - sha256 = "89b8b56b4e1db894e75a0abed8f69757b37c23dde6e64bfb186656197771138a", - strip_prefix = "ruy-388ffd28ba00ffb9aacbe538225165c02ea33ee3", + sha256 = "8fd4adeeff4f29796bf7cdda64806ec0495a2435361569f02afe3fe33406f07c", + strip_prefix = "ruy-34ea9f4993955fa1ff4eb58e504421806b7f2e8f", urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/ruy/archive/388ffd28ba00ffb9aacbe538225165c02ea33ee3.zip", - "https://github.com/google/ruy/archive/388ffd28ba00ffb9aacbe538225165c02ea33ee3.zip", + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip", + "https://github.com/google/ruy/archive/34ea9f4993955fa1ff4eb58e504421806b7f2e8f.zip", ], build_file = "//third_party/ruy:BUILD", ) From 11a1a56ebb4ff7f1342df45a65ac035fce30f1eb Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Thu, 16 Jul 2020 00:32:00 +0700 Subject: [PATCH 0503/2522] Pass `s3_file` instead of `filesystem` --- .../c/experimental/filesystem/plugins/s3/s3_filesystem.cc | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 414d5de80d4..313e73153ae 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -136,9 +136,7 @@ static Aws::Client::ClientConfiguration& GetDefaultClientConfig() { return cfg; }; -static void GetS3Client(TF_Filesystem* filesystem) { - auto s3_file = - static_cast(filesystem->plugin_filesystem); +static void GetS3Client(tf_s3_filesystem::S3File* s3_file) { absl::MutexLock l(&s3_file->initialization_lock); if (s3_file->s3_client.get() == nullptr) { @@ -170,9 +168,7 @@ static void GetS3Client(TF_Filesystem* filesystem) { } } -static void GetExecutor(TF_Filesystem* filesystem) { - auto s3_file = - static_cast(filesystem->plugin_filesystem); +static void GetExecutor(tf_s3_filesystem::S3File* s3_file) { absl::MutexLock l(&s3_file->initialization_lock); if (s3_file->executor.get() == nullptr) { From e7dca4ea0e9f7744a0afd2f2d73cc14ad60bde64 Mon Sep 17 00:00:00 2001 From: Marissa Ikonomidis Date: Wed, 15 Jul 2020 10:26:26 -0700 Subject: [PATCH 0504/2522] Internal Build Change PiperOrigin-RevId: 321387151 Change-Id: I49b2bbc4d78d510c27a226db150599b6fafca6ed --- tensorflow/python/BUILD | 14 ++++++++-- tensorflow/tensorflow.bzl | 27 ++++++++----------- .../tools/pip_package/pip_smoke_test.py | 1 - 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index f4898897435..d68bb928233 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3,7 +3,7 @@ # ":platform" - Low-level and platform-specific Python code. load("//tensorflow:tensorflow.bzl", "py_strict_library") -load("//tensorflow:tensorflow.bzl", "cc_header_only_library", "if_mlir", "if_not_windows", "if_tpu", "if_xla_available", "py_test", "py_tests", "tf_cc_shared_object", "tf_cc_test", "tf_cuda_library", "tf_gen_op_wrapper_py") +load("//tensorflow:tensorflow.bzl", "cc_header_only_library", "if_mlir", "if_not_windows", "if_tpu", "if_xla_available", "py_test", "py_tests", "tf_cc_shared_object", "tf_cc_test", "tf_cuda_library", "tf_enable_mlir_bridge", "tf_gen_op_wrapper_py") # buildifier: disable=same-origin-load load("//tensorflow:tensorflow.bzl", "tf_monitoring_python_deps") @@ -5670,6 +5670,12 @@ cc_library( ], ) +py_library( + name = "global_test_configuration", + deps = if_mlir(["//tensorflow/compiler/mlir/tensorflow:mlir_roundtrip_pass_registration"]) + + tf_enable_mlir_bridge(), +) + py_library( name = "util", srcs = glob( @@ -5693,6 +5699,10 @@ py_library( ], deps = [ ":_pywrap_tf32_execution", + # global_test_configuration is added here because all major tests depend on this + # library. It isn't possible to add these test dependencies via tensorflow.bzl's + # py_test because not all tensorflow tests use tensorflow.bzl's py_test. + ":global_test_configuration", ":tf_decorator", ":tf_export", ":tf_stack", @@ -5702,7 +5712,7 @@ py_library( "@six_archive//:six", "@wrapt", "//tensorflow/tools/compatibility:all_renames_v2", - ] + if_mlir(["//tensorflow/compiler/mlir/tensorflow:mlir_roundtrip_pass_registration"]), + ], ) tf_py_test( diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index e5c29d26f27..6139fc9372a 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -2145,12 +2145,6 @@ def pywrap_tensorflow_macro( # This macro is for running python tests against system installed pip package # on Windows. # -# This macro can also enable testing with the experimental mlir bridge when -# enable_mlir_bridge is true. When it is enabled tests are run both with and without -# the mlir bridge. Support for enabling the mlir bridge is added here because -# it allows all tensorflow tests to be configured to be run with and without the -# mlir bridge. -# # py_test is built as an executable python zip file on Windows, which contains all # dependencies of the target. Because of the C++ extensions, it would be very # inefficient if the py_test zips all runfiles, plus we don't need them when running @@ -2168,19 +2162,12 @@ def py_test(deps = [], data = [], kernels = [], **kwargs): # Python version placeholder if kwargs.get("python_version", None) == "PY3": kwargs["tags"] = kwargs.get("tags", []) + ["no_oss_py2"] - deps = deps.to_list() if type(deps) == "depset" else deps native.py_test( # TODO(jlebar): Ideally we'd use tcmalloc here., deps = select({ - "//conditions:default": deps, - clean_dep("//tensorflow:no_tensorflow_py_deps"): [], - }) + - select({ - str(Label("//tensorflow:enable_mlir_bridge")): [ - "//tensorflow/python:is_mlir_bridge_test_true", - ], - "//conditions:default": [], - }), + "//conditions:default": deps, + clean_dep("//tensorflow:no_tensorflow_py_deps"): [], + }), data = data + select({ "//conditions:default": kernels, clean_dep("//tensorflow:no_tensorflow_py_deps"): ["//tensorflow/tools/pip_package:win_pip_package_marker"], @@ -2921,6 +2908,14 @@ def if_mlir(if_true, if_false = []): "//conditions:default": if_false, }) +def tf_enable_mlir_bridge(): + return select({ + str(Label("//tensorflow:enable_mlir_bridge")): [ + "//tensorflow/python:is_mlir_bridge_test_true", + ], + "//conditions:default": [], + }) + def if_tpu(if_true, if_false = []): """Shorthand for select()ing whether to build for TPUs.""" return select({ diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py index d06cee1e038..40d2cff56b4 100644 --- a/tensorflow/tools/pip_package/pip_smoke_test.py +++ b/tensorflow/tools/pip_package/pip_smoke_test.py @@ -75,7 +75,6 @@ PYTHON_TARGETS, PY_TEST_QUERY_EXPRESSION = BuildPyTestDependencies() DEPENDENCY_DENYLIST = [ "//tensorflow/python:extra_py_tests_deps", "//tensorflow/cc/saved_model:saved_model_half_plus_two", - "//tensorflow:enable_mlir_bridge", "//tensorflow:no_tensorflow_py_deps", "//tensorflow/tools/pip_package:win_pip_package_marker", "//tensorflow/python:test_ops_2", From c936a4f5b39289851cad664bb9cb9c1a6e3a00db Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Wed, 15 Jul 2020 22:29:20 +0700 Subject: [PATCH 0505/2522] Add transfer manager --- .../filesystem/plugins/s3/s3_filesystem.cc | 25 ++++++++++++++++++- .../filesystem/plugins/s3/s3_filesystem.h | 9 +++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 313e73153ae..2f3403f10cb 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -32,6 +32,9 @@ constexpr int64_t kS3TimeoutMsec = 300000; // 5 min constexpr char kExecutorTag[] = "TransferManagerExecutorAllocation"; constexpr int kExecutorPoolSize = 25; +constexpr uint64_t kS3MultiPartUploadChunkSize = 50 * 1024 * 1024; // 50 MB +constexpr uint64_t kS3MultiPartDownloadChunkSize = 50 * 1024 * 1024; // 50 MB + static void* plugin_memory_allocate(size_t size) { return calloc(1, size); } static void plugin_memory_free(void* ptr) { free(ptr); } @@ -216,7 +219,27 @@ namespace tf_s3_filesystem { S3File::S3File() : s3_client(nullptr, ShutdownClient), executor(nullptr), - initialization_lock() {} + transfer_managers(), + multi_part_chunk_sizes(), + use_multi_part_download(true), + initialization_lock() { + uint64_t temp_value; + multi_part_chunk_sizes[Aws::Transfer::TransferDirection::UPLOAD] = + absl::SimpleAtoi(getenv("S3_MULTI_PART_UPLOAD_CHUNK_SIZE"), &temp_value) + ? temp_value + : kS3MultiPartUploadChunkSize; + multi_part_chunk_sizes[Aws::Transfer::TransferDirection::DOWNLOAD] = + absl::SimpleAtoi(getenv("S3_MULTI_PART_DOWNLOAD_CHUNK_SIZE"), &temp_value) + ? temp_value + : kS3MultiPartDownloadChunkSize; + use_multi_part_download = + absl::SimpleAtoi(getenv("S3_DISABLE_MULTI_PART_DOWNLOAD"), &temp_value) + ? (temp_value != 1) + : use_multi_part_download; + transfer_managers.emplace(Aws::Transfer::TransferDirection::UPLOAD, nullptr); + transfer_managers.emplace(Aws::Transfer::TransferDirection::DOWNLOAD, + nullptr); +} void Init(TF_Filesystem* filesystem, TF_Status* status) { filesystem->plugin_filesystem = new S3File(); TF_SetStatus(status, TF_OK, ""); diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h index 9086b5d00f4..42cf3134b47 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h @@ -17,8 +17,10 @@ limitations under the License. #include #include +#include #include #include +#include #include "absl/synchronization/mutex.h" #include "tensorflow/c/experimental/filesystem/filesystem_interface.h" @@ -28,6 +30,13 @@ namespace tf_s3_filesystem { typedef struct S3File { std::shared_ptr s3_client; std::shared_ptr executor; + // We need 2 `TransferManager`, for multipart upload/download. + Aws::Map> + transfer_managers; + // Sizes to split objects during multipart upload/download. + Aws::Map multi_part_chunk_sizes; + bool use_multi_part_download; absl::Mutex initialization_lock; S3File(); } S3File; From a1b6ec5f2a9259f8e1319251b767f4e999f496c7 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Wed, 15 Jul 2020 22:59:48 +0700 Subject: [PATCH 0506/2522] Add get transfer manager --- .../filesystem/plugins/s3/s3_filesystem.cc | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 2f3403f10cb..938fe322a39 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -181,6 +181,25 @@ static void GetExecutor(tf_s3_filesystem::S3File* s3_file) { } } +static void GetTransferManager( + const Aws::Transfer::TransferDirection& direction, + tf_s3_filesystem::S3File* s3_file) { + absl::MutexLock l(&s3_file->initialization_lock); + + if (s3_file->transfer_managers[direction].get() == nullptr) { + GetS3Client(s3_file); + GetExecutor(s3_file); + Aws::Transfer::TransferManagerConfiguration config(s3_file->executor.get()); + config.s3Client = s3_file->s3_client; + config.bufferSize = s3_file->multi_part_chunk_sizes[direction]; + // must be larger than pool size * multi part chunk size + config.transferBufferMaxHeapSize = + (kExecutorPoolSize + 1) * s3_file->multi_part_chunk_sizes[direction]; + s3_file->transfer_managers[direction] = + Aws::Transfer::TransferManager::Create(config); + } +} + static void ShutdownClient(Aws::S3::S3Client* s3_client) { if (s3_client != nullptr) { delete s3_client; From 3822d5f11482a5421e6cfa7fafb967da3de04ca3 Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Wed, 15 Jul 2020 10:37:46 -0700 Subject: [PATCH 0507/2522] Move TF_FusedBatchNormV2Op and TF_FusedBatchNormV3Op to tf_ops.td. These ops are not based on the autogenerated form from the TensorFlow op registry. PiperOrigin-RevId: 321390079 Change-Id: I9cc5baad159fee7e97813cf560839c1b61189a58 --- .../mlir/tensorflow/ir/tf_generated_ops.td | 57 ------------------- .../compiler/mlir/tensorflow/ir/tf_ops.td | 57 +++++++++++++++++++ 2 files changed, 57 insertions(+), 57 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index 0ef650487a8..3509ebfba4b 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -3540,63 +3540,6 @@ The size of 1D Tensors matches the dimension C of the 4D Tensors. }]; } -class TF_FusedBatchNormOpBase : TF_Op { - let summary = "Batch normalization."; - - let description = [{ -Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". -The size of 1D Tensors matches the dimension C of the 4D Tensors. - }]; - - let arguments = (ins - TensorOf<[BF16, F16, F32]>:$x, - F32Tensor:$scale, - F32Tensor:$offset, - F32Tensor:$mean, - F32Tensor:$variance, - - DefaultValuedAttr:$epsilon, - DefaultValuedAttr:$exponential_avg_factor, - DefaultValuedAttr:$data_format, - DefaultValuedAttr:$is_training - ); - - TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; - TF_DerivedOperandTypeAttr U = TF_DerivedOperandTypeAttr<1>; - - let extraClassDeclaration = [{ - // TF_FoldOperandsTransposeInterface: - SmallVector GetLayoutDependentArgs() { return {0}; } - SmallVector GetLayoutDependentResults() { return {0}; } - LogicalResult FoldOperandsPermutation(ArrayRef permutation); - - // TF_LayoutSensitiveInterface: - StringRef GetOptimalLayout(const RuntimeDevices& devices); - LogicalResult UpdateDataFormat(StringRef data_format); - }]; -} - -def TF_FusedBatchNormV2Op : TF_FusedBatchNormOpBase<"FusedBatchNormV2"> { - let results = (outs - TensorOf<[BF16, F16, F32]>:$y, - F32Tensor:$batch_mean, - F32Tensor:$batch_variance, - F32Tensor:$reserve_space_1, - F32Tensor:$reserve_space_2 - ); -} - -def TF_FusedBatchNormV3Op : TF_FusedBatchNormOpBase<"FusedBatchNormV3"> { - let results = (outs - TensorOf<[BF16, F16, F32]>:$y, - F32Tensor:$batch_mean, - F32Tensor:$batch_variance, - F32Tensor:$reserve_space_1, - F32Tensor:$reserve_space_2, - F32Tensor:$reserve_space_3 - ); -} - def TF_GatherOp : TF_Op<"Gather", [NoSideEffect]> { let summary = "Gather slices from `params` according to `indices`."; diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td index 7c6e6c672ae..1fe301696a7 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td @@ -1196,4 +1196,61 @@ def TF_TPUPartitionedCallOp : TF_Op<"TPUPartitionedCall", [CallOpInterface]> { let verifier = [{ return VerifyPartitionedCall(*this); }]; } +class TF_FusedBatchNormOpBase : TF_Op { + let summary = "Batch normalization."; + + let description = [{ +Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". +The size of 1D Tensors matches the dimension C of the 4D Tensors. + }]; + + let arguments = (ins + TensorOf<[BF16, F16, F32]>:$x, + F32Tensor:$scale, + F32Tensor:$offset, + F32Tensor:$mean, + F32Tensor:$variance, + + DefaultValuedAttr:$epsilon, + DefaultValuedAttr:$exponential_avg_factor, + DefaultValuedAttr:$data_format, + DefaultValuedAttr:$is_training + ); + + TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; + TF_DerivedOperandTypeAttr U = TF_DerivedOperandTypeAttr<1>; + + let extraClassDeclaration = [{ + // TF_FoldOperandsTransposeInterface: + SmallVector GetLayoutDependentArgs() { return {0}; } + SmallVector GetLayoutDependentResults() { return {0}; } + LogicalResult FoldOperandsPermutation(ArrayRef permutation); + + // TF_LayoutSensitiveInterface: + StringRef GetOptimalLayout(const RuntimeDevices& devices); + LogicalResult UpdateDataFormat(StringRef data_format); + }]; +} + +def TF_FusedBatchNormV2Op : TF_FusedBatchNormOpBase<"FusedBatchNormV2"> { + let results = (outs + TensorOf<[BF16, F16, F32]>:$y, + F32Tensor:$batch_mean, + F32Tensor:$batch_variance, + F32Tensor:$reserve_space_1, + F32Tensor:$reserve_space_2 + ); +} + +def TF_FusedBatchNormV3Op : TF_FusedBatchNormOpBase<"FusedBatchNormV3"> { + let results = (outs + TensorOf<[BF16, F16, F32]>:$y, + F32Tensor:$batch_mean, + F32Tensor:$batch_variance, + F32Tensor:$reserve_space_1, + F32Tensor:$reserve_space_2, + F32Tensor:$reserve_space_3 + ); +} + #endif // TF_OPS From dff1ceaabefa21525f590f19b868b87f9218ba90 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Thu, 16 Jul 2020 00:44:50 +0700 Subject: [PATCH 0508/2522] Add TF_SetStatusFromAWSError --- .../filesystem/plugins/s3/s3_filesystem.cc | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 938fe322a39..6b9be12e5a3 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -38,6 +38,25 @@ constexpr uint64_t kS3MultiPartDownloadChunkSize = 50 * 1024 * 1024; // 50 MB static void* plugin_memory_allocate(size_t size) { return calloc(1, size); } static void plugin_memory_free(void* ptr) { free(ptr); } +static inline void TF_SetStatusFromAWSError( + const Aws::Client::AWSError& error, TF_Status* status) { + switch (error.GetResponseCode()) { + case Aws::Http::HttpResponseCode::FORBIDDEN: + TF_SetStatus(status, TF_FAILED_PRECONDITION, + "AWS Credentials have not been set properly. " + "Unable to access the specified S3 location"); + break; + case Aws::Http::HttpResponseCode::REQUESTED_RANGE_NOT_SATISFIABLE: + TF_SetStatus(status, TF_OUT_OF_RANGE, "Read less bytes than requested"); + break; + default: + TF_SetStatus( + status, TF_UNKNOWN, + (error.GetExceptionName() + ": " + error.GetMessage()).c_str()); + break; + } +} + static void ParseS3Path(const Aws::String& fname, bool object_empty_ok, Aws::String* bucket, Aws::String* object, TF_Status* status) { From 09b0d550ac1a0f9cc30d0f4e1fff3e420e882bd8 Mon Sep 17 00:00:00 2001 From: Vo Van Nghia Date: Thu, 16 Jul 2020 00:45:35 +0700 Subject: [PATCH 0509/2522] Add new random access file --- .../filesystem/plugins/s3/s3_filesystem.cc | 71 ++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc index 6b9be12e5a3..f23274e6191 100644 --- a/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc +++ b/tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.cc @@ -15,17 +15,20 @@ limitations under the License. #include "tensorflow/c/experimental/filesystem/plugins/s3/s3_filesystem.h" #include +#include #include #include #include "absl/strings/ascii.h" #include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" #include "tensorflow/c/experimental/filesystem/filesystem_interface.h" #include "tensorflow/c/experimental/filesystem/plugins/s3/aws_crypto.h" #include "tensorflow/c/tf_status.h" // Implementation of a filesystem for S3 environments. // This filesystem will support `s3://` URI schemes. +constexpr char kS3FileSystemAllocationTag[] = "S3FileSystemAllocation"; constexpr char kS3ClientAllocationTag[] = "S3ClientAllocation"; constexpr int64_t kS3TimeoutMsec = 300000; // 5 min @@ -230,8 +233,58 @@ static void ShutdownClient(Aws::S3::S3Client* s3_client) { // SECTION 1. Implementation for `TF_RandomAccessFile` // ---------------------------------------------------------------------------- namespace tf_random_access_file { +typedef struct S3File { + Aws::String bucket; + Aws::String object; + std::shared_ptr s3_client; + std::shared_ptr transfer_manager; + bool use_multi_part_download; +} S3File; -// TODO(vnvo2409): Implement later +void Cleanup(TF_RandomAccessFile* file) { + auto s3_file = static_cast(file->plugin_file); + delete s3_file; +} + +static int64_t ReadS3Client(S3File* s3_file, uint64_t offset, size_t n, + char* buffer, TF_Status* status) { + Aws::S3::Model::GetObjectRequest get_object_request; + get_object_request.WithBucket(s3_file->bucket).WithKey(s3_file->bucket); + Aws::String bytes = + absl::StrCat("bytes=", offset, "-", offset + n - 1).c_str(); + get_object_request.SetRange(bytes); + get_object_request.SetResponseStreamFactory( + []() { return Aws::New(kS3FileSystemAllocationTag); }); + + auto get_object_outcome = s3_file->s3_client->GetObject(get_object_request); + if (!get_object_outcome.IsSuccess()) + TF_SetStatusFromAWSError(get_object_outcome.GetError(), status); + else + TF_SetStatus(status, TF_OK, ""); + if (TF_GetCode(status) != TF_OK && TF_GetCode(status) != TF_OUT_OF_RANGE) + return -1; + + int64_t read = get_object_outcome.GetResult().GetContentLength(); + if (read < n) + TF_SetStatus(status, TF_OUT_OF_RANGE, "Read less bytes than requested"); + get_object_outcome.GetResult().GetBody().read(buffer, read); + return read; +} + +static int64_t ReadS3TransferManager(S3File* s3_file, uint64_t offset, size_t n, + char* buffer, TF_Status* status) { + // TODO(vnvo2409): Implement this function. + return -1; +} + +int64_t Read(const TF_RandomAccessFile* file, uint64_t offset, size_t n, + char* buffer, TF_Status* status) { + auto s3_file = static_cast(file->plugin_file); + if (s3_file->use_multi_part_download) + return ReadS3TransferManager(s3_file, offset, n, buffer, status); + else + return ReadS3Client(s3_file, offset, n, buffer, status); +} } // namespace tf_random_access_file @@ -288,6 +341,22 @@ void Cleanup(TF_Filesystem* filesystem) { delete s3_file; } +void NewRandomAccessFile(const TF_Filesystem* filesystem, const char* path, + TF_RandomAccessFile* file, TF_Status* status) { + Aws::String bucket, object; + ParseS3Path(path, false, &bucket, &object, status); + if (TF_GetCode(status) != TF_OK) return; + + auto s3_file = static_cast(filesystem->plugin_filesystem); + GetS3Client(s3_file); + GetTransferManager(Aws::Transfer::TransferDirection::DOWNLOAD, s3_file); + file->plugin_file = new tf_random_access_file::S3File( + {bucket, object, s3_file->s3_client, + s3_file->transfer_managers[Aws::Transfer::TransferDirection::DOWNLOAD], + s3_file->use_multi_part_download}); + TF_SetStatus(status, TF_OK, ""); +} + // TODO(vnvo2409): Implement later } // namespace tf_s3_filesystem From 37e9ec4b3b849f9c13c82ce468386c4d11577293 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Jul 2020 10:51:07 -0700 Subject: [PATCH 0510/2522] Remove a div from an inner loop. The div is the single most costly operation in this function, amounting to ~10% of its runtime. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 10% saving is consistent with the benchmark results: ``` name old time/op new time/op delta BM_PackRhs_float_32_64x64_IC32_FC64_5x5_VALID_s1x1_is1x1_B256x56 [patch: 5x5 D32; num_patches=3600 patch_size=800 num_inputs=32 padding=1 ] 3.06µs ±10% 3.02µs ± 9% ~ (p=0.661 n=10+9) BM_PackRhs_float_32_64x64_IC32_FC64_5x5_SAME_s1x1_is1x1_B256x56 [patch: 5x5 D32; num_patches=4096 patch_size=800 num_inputs=32 padding=2 ] 3.61µs ±12% 3.25µs ± 4% -9.95% (p=0.000 n=10+9) BM_PackRhs_float_32_64x64_IC32_FC64_5x5_VALID_s2x2_is1x1_B256x56 [patch: 5x5 D32; num_patches=900 patch_size=800 num_inputs=32 padding=1 ] 3.65µs ±12% 3.46µs ± 7% -5.09% (p=0.028 n=10+9) BM_PackRhs_float_32_64x64_IC32_FC64_5x5_SAME_s2x2_is1x1_B256x56 [patch: 5x5 D32; num_patches=1024 patch_size=800 num_inputs=32 padding=2 ] 4.03µs ±12% 3.64µs ± 3% -9.80% (p=0.000 n=10+8) BM_PackRhs_float_32_64x64_IC30_FC64_5x5_SAME_s1x1_is1x1_B256x56 [patch: 5x5 D30; num_patches=4096 patch_size=750 num_inputs=34 padding=2 ] 4.10µs ±14% 3.71µs ± 4% -9.68% (p=0.001 n=10+9) BM_PackRhs_float_32_64x64_IC30_FC64_5x5_VALID_s1x1_is1x1_B256x56 [patch: 5x5 D30; num_patches=3600 patch_size=750 num_inputs=34 padding=1 ] 3.36µs ± 3% 3.33µs ± 4% ~ (p=0.356 n=9+10) BM_PackRhs_float_32_64x64_IC30_FC64_5x5_SAME_s2x2_is1x1_B256x56 [patch: 5x5 D30; num_patches=1024 patch_size=750 num_inputs=34 padding=2 ] 4.12µs ± 7% 4.03µs ± 5% ~ (p=0.278 n=9+10) BM_PackRhs_float_32_64x64_IC30_FC64_5x5_VALID_s2x2_is1x1_B256x56 [patch: 5x5 D30; num_patches=900 patch_size=750 num_inputs=34 padding=1 ] 3.78µs ±10% 3.63µs ± 3% -4.14% (p=0.008 n=9+8) BM_PackRhs_float_32_256x256_IC4_FC16_8x8_SAME_s1x1_is1x1_B256x56 [patch: 8x8 D4; num_patches=65536 patch_size=256 num_inputs=16 padding=2 ] 5.80µs ±11% 5.40µs ± 3% -6.84% (p=0.001 n=10+9) BM_PackRhs_float_32_256x256_IC4_FC16_8x8_VALID_s1x1_is1x1_B256x56 [patch: 8x8 D4; num_patches=62001 patch_size=256 num_inputs=16 padding=1 ] 4.87µs ±10% 4.73µs ± 4% ~ (p=0.190 n=10+10) BM_PackRhs_float_32_256x256_IC4_FC16_8x8_SAME_s2x4_is1x1_B256x56 [patch: 8x8 D4; num_patches=8192 patch_size=256 num_inputs=16 padding=2 ] 6.04µs ± 1% 6.06µs ± 5% ~ (p=0.762 n=8+10) BM_PackRhs_float_32_256x256_IC4_FC16_8x8_VALID_s2x4_is1x1_B256x56 [patch: 8x8 D4; num_patches=7875 patch_size=256 num_inputs=16 padding=1 ] 5.29µs ± 2% 5.38µs ± 6% ~ (p=0.400 n=9+10) BM_PackRhs_float_32_64x64_IC4_FC16_3x3_SAME_s1x1_is1x1_B36x432 [patch: 3x3 D4; num_patches=4096 patch_size=36 num_inputs=256 padding=2 ] 13.2µs ± 4% 11.8µs ± 6% -10.43% (p=0.000 n=9+8) BM_PackRhs_float_32_64x64_IC4_FC16_3x3_VALID_s1x1_is1x1_B36x432 [patch: 3x3 D4; num_patches=3844 patch_size=36 num_inputs=256 padding=1 ] 10.9µs ±10% 10.9µs ±13% ~ (p=0.780 n=9+10) BM_PackRhs_float_32_64x64_IC4_FC16_3x3_SAME_s2x2_is1x1_B36x432 [patch: 3x3 D4; num_patches=1024 patch_size=36 num_inputs=256 padding=2 ] 11.3µs ± 3% 10.9µs ±16% ~ (p=0.052 n=10+10) BM_PackRhs_float_32_64x64_IC4_FC16_3x3_VALID_s2x2_is1x1_B36x432 [patch: 3x3 D4; num_patches=961 patch_size=36 num_inputs=256 padding=1 ] 9.18µs ± 4% 9.21µs ±14% ~ (p=0.481 n=10+10) BM_PackRhs_float_32_32x32_IC96_FC96_5x5_SAME_s1x1_is2x2_B272x240 [patch: 5x5 D96; num_patches=3969 patch_size=2400 num_inputs=42 padding=2] 16.5µs ± 0% 17.0µs ±17% ~ (p=0.931 n=9+9) BM_PackRhs_float_32_32x32_IC96_FC96_5x5_VALID_s1x1_is2x2_B272x240 [patch: 5x5 D96; num_patches=3481 patch_size=2400 num_inputs=42 padding=1] 16.5µs ± 1% 16.7µs ± 8% ~ (p=0.423 n=8+9) BM_PackRhs_qint8_32_64x64_IC32_FC64_5x5_SAME_s1x1_is1x1_B256x56 [patch: 5x5 D32; num_patches=4096 patch_size=800 num_inputs=128 padding=2] 3.83µs ±11% 3.61µs ± 1% -5.85% (p=0.000 n=10+8) BM_PackLhs_float_128_FC1024_3x3_B256x56 [filter: count=1024 dims=3x3; input: depth=128; num_filers=113 ] 8.07µs ± 2% 8.07µs ± 0% ~ (p=1.000 n=9+9) BM_PackLhs_float_128_FC1024_3x3_B56x256 [filter: count=1024 dims=3x3; input: depth=128; num_filers=113 ] 10.0µs ± 3% 10.0µs ± 4% ~ (p=0.796 n=10+10) BM_PackLhs_float_30_FC64_3x3_B256x56 [filter: count=64 dims=3x3; input: depth=30; num_filers=7767 ] 1.25µs ± 2% 1.26µs ± 4% ~ (p=0.447 n=9+10) BM_PackLhs_float_50_FC64_3x3_B56x256 [filter: count=64 dims=3x3; input: depth=50; num_filers=4660 ] 4.11µs ± 2% 4.09µs ± 2% ~ (p=0.780 n=10+9) ``` PiperOrigin-RevId: 321393418 Change-Id: I501c54635e2bc7e8f506f80fca70a860050905ce --- tensorflow/core/kernels/eigen_spatial_convolutions.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/kernels/eigen_spatial_convolutions.h b/tensorflow/core/kernels/eigen_spatial_convolutions.h index c163eb887d7..ac02d3bb5cd 100644 --- a/tensorflow/core/kernels/eigen_spatial_convolutions.h +++ b/tensorflow/core/kernels/eigen_spatial_convolutions.h @@ -180,6 +180,7 @@ struct gemm_pack_colmajor_block< const StorageIndex start_col = rhs.colOffset(); const StorageIndex max_col = rhs.maxCol(peeled_k); + const StorageIndex rhs_depth_offset = rhs.depthOffset(); for (StorageIndex col = 0; col < cols; ++col) { SubMapper lm = rhs.getLinearMapper(0, col); @@ -199,7 +200,7 @@ struct gemm_pack_colmajor_block< if (!has_padding || (!pad_col && !lm.padAnyRow(start_row, max_row - 1))) { const StorageIndex start_depth = - (c == start_col) ? rhs.depthOffset() : 0; + (c == start_col) ? rhs_depth_offset : 0; const StorageIndex max_depth = std::min(start_depth + (peeled_k - k), @@ -286,7 +287,7 @@ struct gemm_pack_colmajor_block< eigen_assert(k <= peeled_k); const StorageIndex start_depth = - ((c == start_col) && (r == start_row)) ? rhs.depthOffset() : 0; + ((c == start_col) && (r == start_row)) ? rhs_depth_offset : 0; const StorageIndex max_depth = rhs.maxDepth(peeled_k - k, start_depth); @@ -359,6 +360,7 @@ struct gemm_pack_colmajor_block< const StorageIndex start_col = rhs.colOffset(); const StorageIndex max_col = rhs.maxCol(peeled_k); + const StorageIndex rhs_depth_offset = rhs.depthOffset(); // Original input column and row after applying all non-standard strides and // dilations. Computed by padOrSkip{Row,Col}. @@ -380,7 +382,7 @@ struct gemm_pack_colmajor_block< eigen_assert(k <= peeled_k); const StorageIndex start_depth = - ((c == start_col) && (r == start_row)) ? rhs.depthOffset() : 0; + ((c == start_col) && (r == start_row)) ? rhs_depth_offset : 0; const StorageIndex max_depth = rhs.maxDepth(peeled_k - k, start_depth); From dfb9a633cf303cb6f7182145b3297aa5a64c19ec Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Wed, 15 Jul 2020 10:54:36 -0700 Subject: [PATCH 0511/2522] Introduce TpuCompilationCache create function registration PiperOrigin-RevId: 321394258 Change-Id: I61aa4c39762b078ae4349f4b456a258ac1d13bde --- tensorflow/core/tpu/kernels/BUILD | 18 +++++- .../kernels/tpu_compilation_cache_factory.cc | 55 +++++++++++++++++++ .../kernels/tpu_compilation_cache_factory.h | 33 +++++++++++ .../kernels/tpu_compilation_cache_interface.h | 1 - .../core/tpu/kernels/tpu_configuration_ops.cc | 29 +++++++++- .../core/tpu/kernels/tpu_configuration_ops.h | 4 ++ tensorflow/core/tpu/tpu_config_c_api.h | 4 +- 7 files changed, 139 insertions(+), 5 deletions(-) create mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.cc create mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.h diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 6ff0fb1df73..89a36ed9ae4 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -79,7 +79,10 @@ tf_kernel_library( srcs = ["tpu_configuration_ops.cc"], hdrs = ["tpu_configuration_ops.h"], deps = [ + ":tpu_compilation_cache_factory", + ":tpu_compilation_cache_interface", ":tpu_mesh_state_interface", + ":tpu_op_consts", "//tensorflow/c:tf_status", "//tensorflow/c:tf_status_helper", "//tensorflow/compiler/xla:util", @@ -133,6 +136,20 @@ tf_proto_library_cc( ], ) +cc_library( + name = "tpu_compilation_cache_factory", + srcs = ["tpu_compilation_cache_factory.cc"], + hdrs = ["tpu_compilation_cache_factory.h"], + deps = [ + ":tpu_compilation_cache_external", + ":tpu_compilation_cache_interface", + ":tpu_op_consts", + "//tensorflow/core:framework", + "//tensorflow/core/platform:status", + "//tensorflow/core/platform:types", + ], +) + cc_library( name = "tpu_compilation_cache_key", srcs = [], @@ -323,7 +340,6 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", - "//tensorflow/core/distributed_runtime/rpc:grpc_call", "//tensorflow/core/platform:casts", # buildcleaner: keep "//tensorflow/core/profiler/lib:traceme", "@com_google_absl//absl/base:core_headers", diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.cc new file mode 100644 index 00000000000..86469ae7ebb --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.cc @@ -0,0 +1,55 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.h" + +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" +#include "tensorflow/core/tpu/kernels/tpu_op_consts.h" + +namespace tensorflow { +namespace tpu { +namespace { + +TpuCompilationCacheInterface* CreateCompilationCacheExternal() { + // NOTE: Change the 1 << 33 value to change the compilation cache size. + // TODO(frankchn): Make this configurable. + return new TpuCompilationCacheExternal(int64{1} << 33); // 8 GB +} + +// Using a pointer here to fulfill the trivially destructible requirement for +// static variables. +static std::function* + compilation_cache_creation_fn = + new std::function( + CreateCompilationCacheExternal); + +} // namespace + +std::function GetCompilationCacheCreateFn() { + return *compilation_cache_creation_fn; +} + +void SetCompilationCacheCreateFn( + std::function fn) { + delete compilation_cache_creation_fn; + compilation_cache_creation_fn = + new std::function(fn); +} + +} // namespace tpu +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.h new file mode 100644 index 00000000000..4710f916c48 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.h @@ -0,0 +1,33 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_FACTORY_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_FACTORY_H_ + +#include + +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" + +namespace tensorflow { +namespace tpu { + +std::function GetCompilationCacheCreateFn(); + +void SetCompilationCacheCreateFn( + std::function fn); + +} // namespace tpu +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_FACTORY_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h index 9726d5b78b9..cde6467b7af 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h @@ -25,7 +25,6 @@ limitations under the License. #include "absl/synchronization/mutex.h" #include "tensorflow/compiler/tf2xla/host_compute_metadata.pb.h" #include "tensorflow/compiler/xla/util.h" -#include "tensorflow/core/distributed_runtime/rpc/grpc_call.h" #include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/lib/core/refcount.h" #include "tensorflow/core/lib/core/threadpool.h" diff --git a/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc b/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc index 065a7f77dd6..13efdc46e10 100644 --- a/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc +++ b/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc @@ -23,7 +23,10 @@ limitations under the License. #include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/platform/refcount.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_factory.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" #include "tensorflow/core/tpu/kernels/tpu_mesh_state_interface.h" +#include "tensorflow/core/tpu/kernels/tpu_op_consts.h" #include "tensorflow/core/tpu/tpu_api.h" #include "tensorflow/core/tpu/tpu_config_c_api.h" #include "tensorflow/core/tpu/tpu_configuration.h" @@ -67,6 +70,16 @@ Status DeleteIfExists(ResourceMgr* resource_manager, } // namespace +Status CreateTpuCompilationCache( + ResourceMgr* rmgr, tpu::TpuCompilationCacheInterface** compilation_cache) { + return rmgr->LookupOrCreate( + rmgr->default_container(), tpu::kCompilationCacheResourceName, + compilation_cache, [&](tpu::TpuCompilationCacheInterface** new_cache) { + *new_cache = tpu::GetCompilationCacheCreateFn()(); + return Status::OK(); + }); +} + void ConfigureDistributedTpuOp::Compute(OpKernelContext* ctx) { VLOG(1) << "ConfigureDistributedTpuOp"; XLA_SCOPED_LOGGING_TIMER("ConfigureDistributedTpuOp"); @@ -98,9 +111,15 @@ void ConfigureDistributedTpuOp::Compute(OpKernelContext* ctx) { OP_REQUIRES_OK(ctx, DeleteIfExists( rmgr, tpu::kTpuMeshStateInterfaceResourceName)); + // Create the subgraph compilation cache and put it in the local resource + // manager. + tpu::TpuCompilationCacheInterface* compilation_cache; + OP_REQUIRES_OK(ctx, CreateTpuCompilationCache(rmgr, &compilation_cache)); + core::ScopedUnref compilation_cache_ref(compilation_cache); + tpu::ConfigApiFn()->ConfigureDistributedTpuOp_DoWorkFn( num_devices_per_host.size(), num_devices_per_host.data(), - &host_config_output_size, &host_config_output, status); + compilation_cache, &host_config_output_size, &host_config_output, status); auto* tpu_mesh = tpu::TpuMeshStateInterface::Create(); OP_REQUIRES_OK( @@ -230,6 +249,14 @@ void InitializeHostForDistributedTpuOp::Compute(OpKernelContext* ctx) { mesh_state_interface)); } + if (enable_whole_mesh_compilations_) { + // If this is a whole mesh compilation mode, create the compilation cache, + // if missing. + tpu::TpuCompilationCacheInterface* compilation_cache; + OP_REQUIRES_OK(ctx, CreateTpuCompilationCache(rmgr, &compilation_cache)); + compilation_cache->Unref(); + } + tpu::ConfigApiFn()->InitializeHostForDistributedTpuOp_DoWorkFn( tpu_host_config.size(), tpu_host_config.data(), enable_whole_mesh_compilations_, &device_id_output_size, diff --git a/tensorflow/core/tpu/kernels/tpu_configuration_ops.h b/tensorflow/core/tpu/kernels/tpu_configuration_ops.h index f75a47e5aaf..d0bf5809842 100644 --- a/tensorflow/core/tpu/kernels/tpu_configuration_ops.h +++ b/tensorflow/core/tpu/kernels/tpu_configuration_ops.h @@ -16,9 +16,13 @@ limitations under the License. #define TENSORFLOW_CORE_TPU_KERNELS_TPU_CONFIGURATION_OPS_H_ #include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_interface.h" namespace tensorflow { +Status CreateTpuCompilationCache( + ResourceMgr* rmgr, tpu::TpuCompilationCacheInterface** compilation_cache); + // The ConfigureDistributedTpu op is used to start an TPUDriver from // TensorFlow. It should be run on a TPU_SYSTEM device and returns the // connection host:port for the CompilationCacheServer. The diff --git a/tensorflow/core/tpu/tpu_config_c_api.h b/tensorflow/core/tpu/tpu_config_c_api.h index a96cbf38f64..21649050bf7 100644 --- a/tensorflow/core/tpu/tpu_config_c_api.h +++ b/tensorflow/core/tpu/tpu_config_c_api.h @@ -35,8 +35,8 @@ extern "C" { TFTPU_CAPI_EXPORT void ConfigureDistributedTpuOp_DoWork( const size_t num_cores_per_host_size, const int32_t* num_cores_per_host, - size_t* host_config_output_size, char** host_config_output, - TF_Status* status); + void* tpu_compilation_cache_interface, size_t* host_config_output_size, + char** host_config_output, TF_Status* status); TFTPU_CAPI_EXPORT void WaitForDistributedTpuOp_DoWork( const size_t num_hosts, const size_t num_cores_per_host, From 001d585562ccfbb7a529021b6e0c6b5601812337 Mon Sep 17 00:00:00 2001 From: Robert Suderman Date: Wed, 15 Jul 2020 10:56:59 -0700 Subject: [PATCH 0512/2522] Add an optimization that converts some Gathers to Slices. Some Gathers can be represented as slices. This lowering transforms these gathers into slices. PiperOrigin-RevId: 321394868 Change-Id: I905a235e951bf1034a31cc89a86126e830e15495 --- tensorflow/compiler/mlir/hlo/BUILD | 2 + .../Dialect/mhlo/transforms/rewriters.h | 3 + .../Dialect/mhlo/transforms/optimize_mhlo.cc | 187 ++++++++++++++++++ .../mhlo/transforms/optimize_mhlo_pass.cc | 49 +++++ .../compiler/mlir/hlo/tests/optimize-hlo.mlir | 64 ++++++ 5 files changed, 305 insertions(+) create mode 100644 tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/optimize_mhlo.cc create mode 100644 tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/optimize_mhlo_pass.cc create mode 100644 tensorflow/compiler/mlir/hlo/tests/optimize-hlo.mlir diff --git a/tensorflow/compiler/mlir/hlo/BUILD b/tensorflow/compiler/mlir/hlo/BUILD index 5cbf305bb4d..bc6393fa3f3 100644 --- a/tensorflow/compiler/mlir/hlo/BUILD +++ b/tensorflow/compiler/mlir/hlo/BUILD @@ -610,6 +610,7 @@ cc_library( "lib/Dialect/mhlo/transforms/generated_lower_complex.inc", "lib/Dialect/mhlo/transforms/lower_complex.cc", "lib/Dialect/mhlo/transforms/lower_general_dot.cc", + "lib/Dialect/mhlo/transforms/optimize_mhlo.cc", ], hdrs = [ "include/mlir-hlo/Dialect/mhlo/transforms/passes.h", @@ -681,6 +682,7 @@ cc_library( "lib/Dialect/mhlo/transforms/chlo_legalize_to_hlo_pass.cc", "lib/Dialect/mhlo/transforms/lhlo_legalize_to_llvm_pass.cc", "lib/Dialect/mhlo/transforms/materialize_broadcasts_pass.cc", + "lib/Dialect/mhlo/transforms/optimize_mhlo_pass.cc", "lib/Dialect/mhlo/transforms/test_infer_shaped_type_pass.cc", "lib/Dialect/mhlo/transforms/unfuse_batch_norm_pass.cc", ], diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h index cb9a85a658a..f3f4405ffa6 100644 --- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h +++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h @@ -38,6 +38,9 @@ void PopulateGeneralDotOpLoweringPatterns(OwningRewritePatternList *patterns, void PopulateComplexLoweringPatterns(MLIRContext *context, OwningRewritePatternList *patterns); +void PopulateOptimizeMHLOPatterns(MLIRContext *context, + OwningRewritePatternList *patterns); + void PopulateMhloToStdPatterns(OwningRewritePatternList *patterns, MLIRContext *ctx); diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/optimize_mhlo.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/optimize_mhlo.cc new file mode 100644 index 00000000000..dfed951e19f --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/optimize_mhlo.cc @@ -0,0 +1,187 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This file provides optional optimization patterns for mhlo, canonocalizing +// operations to equivalent but potentially more efficient operations. + +#include +#include +#include +#include + +#include "llvm/ADT/STLExtras.h" +#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/PatternMatch.h" // from @llvm-project +#include "mlir/IR/TypeUtilities.h" // from @llvm-project +#include "mlir/IR/Types.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "mlir/Pass/PassRegistry.h" // from @llvm-project +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/passes.h" +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/hlo_utils.h" + +using mlir::OwningRewritePatternList; + +namespace mlir { +namespace mhlo { +namespace { + +// Returns 1D 64-bit dense elements attribute with the given values. +static DenseIntElementsAttr GetI64ElementsAttr(ArrayRef values, + Builder* builder) { + RankedTensorType ty = RankedTensorType::get( + {static_cast(values.size())}, builder->getIntegerType(64)); + return DenseIntElementsAttr::get(ty, values); +} + +//===----------------------------------------------------------------------===// +// GatherOp +//===----------------------------------------------------------------------===// + +class GatherIsSlice : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(GatherOp gather, + PatternRewriter& rewriter) const override { + auto dimension_numbers = gather.dimension_numbers(); + + // Inputs need to be ranked to lower. + if (!gather.operand().getType().cast().hasRank() || + !gather.operand().getType().cast().hasStaticShape() || + !gather.start_indices().getType().cast().hasRank() || + !gather.start_indices().getType().cast().hasStaticShape()) { + return failure(); + } + + if (dimension_numbers.index_vector_dim().getValue().getSExtValue() != 0) { + return failure(); + } + + // TODO(suderman): Handle start index map != {0}. + if (!dimension_numbers.start_index_map() || + dimension_numbers.start_index_map().getType().getRank() != 1 || + dimension_numbers.start_index_map().getType().getDimSize(0) != 1 || + dimension_numbers.start_index_map() + .getValue({0}) + .cast() + .getValue() != 0) { + return failure(); + } + + auto result_ty = gather.getResult().getType().dyn_cast(); + + // Requires a ranked output. + if (!result_ty) { + return failure(); + } + if (dimension_numbers.offset_dims().getType().getNumElements() != + result_ty.getRank()) { + return failure(); + } + for (auto it : llvm::enumerate(dimension_numbers.offset_dims())) { + if (it.index() != it.value()) { + return failure(); + } + } + + // Verify the gather slice sizes are correct. + if (gather.slice_sizes().getNumElements() != + gather.operand().getType().cast().getRank()) { + return failure(); + } + + // Validate the slice sizes are correct. + if (gather.slice_sizes().getType().cast().getNumElements() < + result_ty.getShape().size() + 1) { + return failure(); + } + + for (auto it : llvm::enumerate(result_ty.getShape())) { + if (gather.slice_sizes() + .getValue(it.index() + 1) + .cast() + .getValue() != it.value()) { + return failure(); + } + } + + auto gather_start_indices = gather.start_indices(); + auto gather_start_indices_ty = + gather_start_indices.getType().cast(); + + llvm::SmallVector slice_start_indices; + + if (gather_start_indices_ty.getRank() == 0) { + slice_start_indices.push_back(gather_start_indices); + } else if (gather_start_indices_ty.getRank() == 1) { + for (int i = 0; i < gather_start_indices_ty.getDimSize(0); i++) { + auto start = GetI64ElementsAttr({i}, &rewriter); + auto limit = GetI64ElementsAttr({i + 1}, &rewriter); + auto stride = GetI64ElementsAttr({1}, &rewriter); + auto indicesSlice = rewriter.create( + gather.getLoc(), gather_start_indices, start, limit, stride); + auto reshaped = rewriter.create( + gather.getLoc(), + RankedTensorType::get( + {}, indicesSlice.getType().cast().getElementType()), + indicesSlice); + slice_start_indices.push_back(reshaped); + } + } else { + return failure(); + } + + auto sliceSizes = gather.slice_sizes(); + auto sliceSizesTy = sliceSizes.getType(); + if (sliceSizesTy.getRank() != 1) { + return failure(); + } + + // Start indices have implicit zeros when not specified. This is because + // Gather occurs similar to slicing where full slices are inferred. Add any + // missing zeros as necessary. + auto zero = rewriter.create( + gather.getLoc(), rewriter.getZeroAttr(RankedTensorType::get( + {}, gather_start_indices_ty.getElementType()))); + while (slice_start_indices.size() < sliceSizesTy.getDimSize(0)) { + slice_start_indices.push_back(zero); + } + + SmallVector sliceShape; + for (auto shapeValue : gather.slice_sizes().getIntValues()) { + sliceShape.push_back(shapeValue.getSExtValue()); + } + + auto sliceTy = + RankedTensorType::get(sliceShape, result_ty.getElementType()); + auto slice = rewriter.create( + gather.getLoc(), sliceTy, gather.operand(), slice_start_indices, + gather.slice_sizes()); + + rewriter.replaceOpWithNewOp(gather, gather.getType(), slice); + + return success(); + } +}; + +} // end anonymous namespace + +void PopulateOptimizeMHLOPatterns(MLIRContext* context, + OwningRewritePatternList* patterns) { + patterns->insert(context); +} +} // end namespace mhlo +} // end namespace mlir diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/optimize_mhlo_pass.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/optimize_mhlo_pass.cc new file mode 100644 index 00000000000..3d1f29e0ca6 --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/optimize_mhlo_pass.cc @@ -0,0 +1,49 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/PatternMatch.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "mlir/Transforms/DialectConversion.h" // from @llvm-project +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h" +#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h" + +using mlir::FunctionPass; +using mlir::PassRegistration; +using mlir::PassWrapper; + +namespace { +class OptimizeMhlo : public PassWrapper { + public: + explicit OptimizeMhlo() : PassWrapper() {} + + /// Performs the lowering to MHLO dialect. + void runOnFunction() override; +}; +} // end anonymous namespace + +// Lowers the complex operations that can be represented using other operations. +void OptimizeMhlo::runOnFunction() { + // Add lowering patterns to the list. + mlir::OwningRewritePatternList patterns; + mlir::mhlo::PopulateOptimizeMHLOPatterns(&getContext(), &patterns); + + applyPatternsAndFoldGreedily(getFunction(), patterns); +} + +static PassRegistration pass("mhlo-test-optimize", + "Run optional HLO optimizations."); diff --git a/tensorflow/compiler/mlir/hlo/tests/optimize-hlo.mlir b/tensorflow/compiler/mlir/hlo/tests/optimize-hlo.mlir new file mode 100644 index 00000000000..c20de0b2a9f --- /dev/null +++ b/tensorflow/compiler/mlir/hlo/tests/optimize-hlo.mlir @@ -0,0 +1,64 @@ +// RUN: mlir-hlo-opt %s -pass-pipeline='func(mhlo-test-optimize)' | FileCheck %s + +// CHECK-LABEL: @gather_is_slice_no_rank +func @gather_is_slice_no_rank(%arg0: tensor<2x1x2xi32>, %arg1: tensor) -> tensor<1x2xi32> { + // CHECK: [[CST:%.+]] = mhlo.constant dense<0> : tensor + // CHECK: [[SLICE:%.+]] = "mhlo.dynamic-slice"(%arg0, %arg1, [[CST]], [[CST]]) {slice_sizes = dense<[1, 1, 2]> : tensor<3xi64>} + // CHECK: [[RESHAPE:%.+]] = "mhlo.reshape"([[SLICE]]) + %res = "mhlo.gather"(%arg0, %arg1) { + dimension_numbers = { + collapsed_slice_dims = dense<0> : tensor<1xi64>, + index_vector_dim = 0 : i64, + offset_dims = dense<[0, 1]> : tensor<2xi64>, + start_index_map = dense<0> : tensor<1xi64> + }, + slice_sizes = dense<[1, 1, 2]> : tensor<3xi64> + } : (tensor<2x1x2xi32>, tensor) -> tensor<1x2xi32> + + // CHECK: return [[RESHAPE]] + return %res : tensor<1x2xi32> +} + +// CHECK-LABEL: @gather_is_slice +func @gather_is_slice(%arg0: tensor<2x1x2xi32>, %arg1: tensor<1xi64>) -> tensor<1x2xi32> { + // CHECK: [[CST:%.+]] = mhlo.constant dense<0> : tensor + // CHECK: [[RESHAPE:%.+]] = "mhlo.reshape"(%arg1) + // CHECK: [[SLICE:%.+]] = "mhlo.dynamic-slice"(%arg0, [[RESHAPE]], [[CST]], [[CST]]) {slice_sizes = dense<[1, 1, 2]> : tensor<3xi64>} + // CHECK: [[RES:%.+]] = "mhlo.reshape"([[SLICE]]) + + %res = "mhlo.gather"(%arg0, %arg1) { + dimension_numbers = { + collapsed_slice_dims = dense<0> : tensor<1xi64>, + index_vector_dim = 0 : i64, + offset_dims = dense<[0, 1]> : tensor<2xi64>, + start_index_map = dense<0> : tensor<1xi64> + }, + slice_sizes = dense<[1, 1, 2]> : tensor<3xi64> + } : (tensor<2x1x2xi32>, tensor<1xi64>) -> tensor<1x2xi32> + + // CHECK: return [[RES]] + return %res : tensor<1x2xi32> +} + +// CHECK-LABEL: @gather_is_slice_multiple_start_indices +func @gather_is_slice_multiple_start_indices(%arg0: tensor<2x1x2xi32>, %arg1: tensor<2xi64>) -> tensor<1x2xi32> { + // CHECK-DAG: [[CST:%.+]] = mhlo.constant dense<0> + // CHECK-DAG: [[SLICE1:%.+]] = "mhlo.slice"(%arg1) {limit_indices = dense<1> : tensor<1xi64>, start_indices = dense<0> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + // CHECK-DAG: [[RESHAPE1:%.+]] = "mhlo.reshape"([[SLICE1]]) + // CHECK-DAG: [[SLICE2:%.+]] = "mhlo.slice"(%arg1) {limit_indices = dense<2> : tensor<1xi64>, start_indices = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + // CHECK-DAG: [[RESHAPE2:%.+]] = "mhlo.reshape"([[SLICE2]]) + // CHECK-DAG: [[DSLICE:%.+]] = "mhlo.dynamic-slice"(%arg0, [[RESHAPE1]], [[RESHAPE2]], [[CST]]) {slice_sizes = dense<[1, 1, 2]> : tensor<3xi64>} + // CHECK-DAG: [[RES:%.+]] = "mhlo.reshape"([[DSLICE]]) + %res = "mhlo.gather"(%arg0, %arg1) { + dimension_numbers = { + collapsed_slice_dims = dense<0> : tensor<1xi64>, + index_vector_dim = 0 : i64, + offset_dims = dense<[0, 1]> : tensor<2xi64>, + start_index_map = dense<0> : tensor<1xi64> + }, + slice_sizes = dense<[1, 1, 2]> : tensor<3xi64> + } : (tensor<2x1x2xi32>, tensor<2xi64>) -> tensor<1x2xi32> + + // CHECK: return [[RES]] + return %res : tensor<1x2xi32> +} From 0dcb8001adb8565cc35559d90b269201a7005c71 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Jul 2020 11:15:00 -0700 Subject: [PATCH 0513/2522] Fix whitespace in Swift module PiperOrigin-RevId: 321399202 Change-Id: I0fe62c9c101104809e7452fe1ff206cc3614ad02 --- tensorflow/lite/experimental/swift/Sources/Interpreter.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/experimental/swift/Sources/Interpreter.swift b/tensorflow/lite/experimental/swift/Sources/Interpreter.swift index d15a0ad9b5d..60cdc698e88 100644 --- a/tensorflow/lite/experimental/swift/Sources/Interpreter.swift +++ b/tensorflow/lite/experimental/swift/Sources/Interpreter.swift @@ -338,7 +338,7 @@ extension String { /// - arguments: A C pointer to a `va_list` of arguments to substitute into `cFormat`. init?(cFormat: UnsafePointer, arguments: CVaListPointer) { #if os(Linux) - let length = Int(vsnprintf(nil, 0, cFormat, arguments) + 1) // null terminator + let length = Int(vsnprintf(nil, 0, cFormat, arguments) + 1) // null terminator guard length > 0 else { return nil } let buffer = UnsafeMutablePointer.allocate(capacity: length) defer { From 9edc5097b9fd997ec95da3d66f2bd6ec53bdf5e1 Mon Sep 17 00:00:00 2001 From: Karim Nosir Date: Wed, 15 Jul 2020 11:15:12 -0700 Subject: [PATCH 0514/2522] Enable Reduce min/max quantization for post training quantization. Also, enable zip tests Fixes #39601 PiperOrigin-RevId: 321399245 Change-Id: Ieae5cbf7b916241a4a15635e3a467e630fdf7595 --- tensorflow/lite/kernels/reduce.cc | 3 ++- tensorflow/lite/testing/op_tests/reduce.py | 8 ++++++-- tensorflow/lite/tools/optimize/operator_property.cc | 7 +++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/kernels/reduce.cc b/tensorflow/lite/kernels/reduce.cc index 6107b01cd46..20065883f9d 100644 --- a/tensorflow/lite/kernels/reduce.cc +++ b/tensorflow/lite/kernels/reduce.cc @@ -516,7 +516,8 @@ TfLiteStatus EvalLogic(TfLiteContext* context, TfLiteNode* node, ResizeTempAxis(context, op_context, resolved_axis)); TF_LITE_ENSURE_OK(context, ResizeOutputTensor(context, op_context)); } - if (op_context->input->type == kTfLiteUInt8) { + if (op_context->input->type == kTfLiteUInt8 || + op_context->input->type == kTfLiteInt8) { TF_LITE_ENSURE_EQ(context, op_context->input->params.scale, op_context->output->params.scale); TF_LITE_ENSURE_EQ(context, op_context->input->params.zero_point, diff --git a/tensorflow/lite/testing/op_tests/reduce.py b/tensorflow/lite/testing/op_tests/reduce.py index 259dcad68f3..72324010f4b 100644 --- a/tensorflow/lite/testing/op_tests/reduce.py +++ b/tensorflow/lite/testing/op_tests/reduce.py @@ -249,13 +249,17 @@ def make_reduce_prod_tests(options): @register_make_test_function() def make_reduce_max_tests(options): """Make a set of tests to do max.""" - return make_reduce_tests(tf.reduce_max)(options) + return make_reduce_tests( + tf.reduce_max, allow_fully_quantize=True, min_value=-1, max_value=1)( + options) @register_make_test_function() def make_reduce_min_tests(options): """Make a set of tests to do min.""" - return make_reduce_tests(tf.reduce_min)(options) + return make_reduce_tests( + tf.reduce_min, allow_fully_quantize=True, min_value=-1, max_value=1)( + options) @register_make_test_function() diff --git a/tensorflow/lite/tools/optimize/operator_property.cc b/tensorflow/lite/tools/optimize/operator_property.cc index f2cb98ef31a..e105c0f2d64 100644 --- a/tensorflow/lite/tools/optimize/operator_property.cc +++ b/tensorflow/lite/tools/optimize/operator_property.cc @@ -975,6 +975,13 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, property.restrict_same_input_output_scale = true; property.version = 2; break; + case BuiltinOperator_REDUCE_MAX: + case BuiltinOperator_REDUCE_MIN: + property.inputs = {{0, {}}}; + property.outputs = {{0, {}}}; + property.restrict_same_input_output_scale = true; + property.version = 2; + break; default: // No quantized implementation exists for this operation. property.quantizable = false; From ee781437e8672914e5c14a006a515ffcf607527b Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Wed, 15 Jul 2020 11:16:07 -0700 Subject: [PATCH 0515/2522] Verify filter dimensions sanity in Conv2DBackpropInput PiperOrigin-RevId: 321399463 Change-Id: Ic196445c1cb120168fff16acc44e1e5a080c5c5c --- tensorflow/core/kernels/conv_grad_input_ops.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc index 2dd63d1f4d0..d9743a1dc57 100644 --- a/tensorflow/core/kernels/conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/conv_grad_input_ops.cc @@ -561,6 +561,16 @@ class Conv2DCustomBackpropInputOp : public OpKernel { /*dilations=*/{1, 1, 1, 1}, strides_, padding_, explicit_paddings_, data_format_, &dims)); + OP_REQUIRES(context, dims.in_depth == filter.shape().dim_size(2), + errors::InvalidArgument("Computed input depth ", dims.in_depth, + " doesn't match filter input depth ", + filter.shape().dim_size(2))); + OP_REQUIRES( + context, dims.out_depth == filter.shape().dim_size(3), + errors::InvalidArgument("Computed output depth ", dims.out_depth, + " doesn't match filter output depth ", + filter.shape().dim_size(3))); + Tensor* in_backprop = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, input_shape, &in_backprop)); From 73a621c1424c9b1c0b50dc28a2d05c4f8e4d5677 Mon Sep 17 00:00:00 2001 From: Sidong-Wei Date: Wed, 15 Jul 2020 14:32:18 -0400 Subject: [PATCH 0516/2522] Sort header files alphabetically --- tensorflow/core/platform/default/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/platform/default/BUILD b/tensorflow/core/platform/default/BUILD index 9226c042bd3..4ae7ddbc539 100644 --- a/tensorflow/core/platform/default/BUILD +++ b/tensorflow/core/platform/default/BUILD @@ -268,9 +268,9 @@ cc_library( "//tensorflow/core/platform:init_main.h", "//tensorflow/core/platform:mem.h", "//tensorflow/core/platform:numa.h", - "//tensorflow/core/platform:snappy.h", "//tensorflow/core/platform:profile_utils/cpu_utils.h", "//tensorflow/core/platform:profile_utils/i_cpu_utils_helper.h", + "//tensorflow/core/platform:snappy.h", ], copts = tf_copts(), defines = ["TF_USE_SNAPPY"] + select({ From 4c7d80b96a9541471afcf784d6816f44e66efaab Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Jul 2020 11:17:18 -0700 Subject: [PATCH 0517/2522] Fully qualifying uses of tensorflow::int64. PiperOrigin-RevId: 321399703 Change-Id: I42732ead99e062444fa5c507f9fce10f1ace765c --- .../xla/service/cpu/runtime_conv2d.cc | 36 ++++++---- .../xla/service/cpu/runtime_key_value_sort.cc | 43 +++++------- .../xla/service/cpu/runtime_matmul.cc | 56 ++++++++------- .../cpu/runtime_single_threaded_conv2d.cc | 36 ++++++---- .../cpu/runtime_single_threaded_matmul.cc | 69 ++++++++++--------- 5 files changed, 127 insertions(+), 113 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/runtime_conv2d.cc b/tensorflow/compiler/xla/service/cpu/runtime_conv2d.cc index 84cb41a8f17..eac0371b76d 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_conv2d.cc +++ b/tensorflow/compiler/xla/service/cpu/runtime_conv2d.cc @@ -23,16 +23,18 @@ limitations under the License. #include "tensorflow/core/platform/dynamic_annotations.h" #include "tensorflow/core/platform/types.h" -using tensorflow::int64; - TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenConvF32( const void* run_options_ptr, float* out, float* lhs, float* rhs, - int64 input_batch, int64 input_rows, int64 input_cols, int64 input_channels, - int64 kernel_rows, int64 kernel_cols, int64 kernel_channels, - int64 kernel_filters, int64 output_rows, int64 output_cols, - int64 row_stride, int64 col_stride, int64 padding_top, int64 padding_bottom, - int64 padding_left, int64 padding_right, int64 lhs_row_dilation, - int64 lhs_col_dilation, int64 rhs_row_dilation, int64 rhs_col_dilation) { + tensorflow::int64 input_batch, tensorflow::int64 input_rows, + tensorflow::int64 input_cols, tensorflow::int64 input_channels, + tensorflow::int64 kernel_rows, tensorflow::int64 kernel_cols, + tensorflow::int64 kernel_channels, tensorflow::int64 kernel_filters, + tensorflow::int64 output_rows, tensorflow::int64 output_cols, + tensorflow::int64 row_stride, tensorflow::int64 col_stride, + tensorflow::int64 padding_top, tensorflow::int64 padding_bottom, + tensorflow::int64 padding_left, tensorflow::int64 padding_right, + tensorflow::int64 lhs_row_dilation, tensorflow::int64 lhs_col_dilation, + tensorflow::int64 rhs_row_dilation, tensorflow::int64 rhs_col_dilation) { const xla::ExecutableRunOptions* run_options = static_cast(run_options_ptr); XLA_LIGHTWEIGHT_CHECK(run_options->intra_op_thread_pool() != nullptr); @@ -46,13 +48,17 @@ TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenConvF32( TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenConvF16( const void* run_options_ptr, Eigen::half* out, Eigen::half* lhs, - Eigen::half* rhs, int64 input_batch, int64 input_rows, int64 input_cols, - int64 input_channels, int64 kernel_rows, int64 kernel_cols, - int64 kernel_channels, int64 kernel_filters, int64 output_rows, - int64 output_cols, int64 row_stride, int64 col_stride, int64 padding_top, - int64 padding_bottom, int64 padding_left, int64 padding_right, - int64 lhs_row_dilation, int64 lhs_col_dilation, int64 rhs_row_dilation, - int64 rhs_col_dilation) { + Eigen::half* rhs, tensorflow::int64 input_batch, + tensorflow::int64 input_rows, tensorflow::int64 input_cols, + tensorflow::int64 input_channels, tensorflow::int64 kernel_rows, + tensorflow::int64 kernel_cols, tensorflow::int64 kernel_channels, + tensorflow::int64 kernel_filters, tensorflow::int64 output_rows, + tensorflow::int64 output_cols, tensorflow::int64 row_stride, + tensorflow::int64 col_stride, tensorflow::int64 padding_top, + tensorflow::int64 padding_bottom, tensorflow::int64 padding_left, + tensorflow::int64 padding_right, tensorflow::int64 lhs_row_dilation, + tensorflow::int64 lhs_col_dilation, tensorflow::int64 rhs_row_dilation, + tensorflow::int64 rhs_col_dilation) { const xla::ExecutableRunOptions* run_options = static_cast(run_options_ptr); XLA_LIGHTWEIGHT_CHECK(run_options->intra_op_thread_pool() != nullptr); diff --git a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc index 0d4e7055ddb..2cee58162fc 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc +++ b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc @@ -25,21 +25,16 @@ limitations under the License. #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" -namespace { -using tensorflow::int32; -using tensorflow::int64; -} // namespace - TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSort( - int64 a, int64 b, int64 c, char** values, int32 values_count, - int32* values_primitive_type_size_in_bytes, bool is_stable, - char* run_options, int64* prof_counters, + tensorflow::int64 a, tensorflow::int64 b, tensorflow::int64 c, char** values, tensorflow::int32 values_count, + tensorflow::int32* values_primitive_type_size_in_bytes, bool is_stable, + char* run_options, tensorflow::int64* prof_counters, void (*less_than)(char*, char*, char**, char**, tensorflow::int64*)) { // 'values' and 'values_primitive_type_size_in_bytes' are managed by the JIT // code, so msan can't tell they are initialized. TF_ANNOTATE_MEMORY_IS_INITIALIZED(values, values_count * sizeof(char*)); TF_ANNOTATE_MEMORY_IS_INITIALIZED(values_primitive_type_size_in_bytes, - values_count * sizeof(int32)); + values_count * sizeof(tensorflow::int32)); // High-level idea of the iteration/sorting logic: // Conceptually we have a 3-dimensional shape [a, b, c]. b corresponds to the @@ -50,16 +45,16 @@ TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSort( // 'base_offset' value which points to the first element in that row, and add // i * c for accessing the 'i'-th element in that row. - int64 sort_dimension_elements = b; - int64 num_iteration_elements = a * c; - int64 sort_dimension_offset = c; + tensorflow::int64 sort_dimension_elements = b; + tensorflow::int64 num_iteration_elements = a * c; + tensorflow::int64 sort_dimension_offset = c; - std::unique_ptr indices(new int64[sort_dimension_elements]); + std::unique_ptr indices(new tensorflow::int64[sort_dimension_elements]); std::unique_ptr comparison_values(new char*[2 * values_count]); std::iota(indices.get(), indices.get() + sort_dimension_elements, 0); std::unique_ptr reordered_values( new std::string[sort_dimension_elements]); - for (int64 index = 0; index < num_iteration_elements; ++index) { + for (tensorflow::int64 index = 0; index < num_iteration_elements; ++index) { // If the sort should be stable, we have to reinitialize indices to iota to // guarantee that we still keep the relative order in case of ties. if (is_stable && index > 0) { @@ -71,14 +66,14 @@ TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSort( // calculating the base offset, we need to multiply the index into the 'a' // dimension with 'b' * 'c'. // 'index' / 'c' * 'c' * 'b' = ('index' - 'index' % 'c') * 'b'. - int64 base_offset = + tensorflow::int64 base_offset = index % sort_dimension_offset + (index - index % sort_dimension_offset) * sort_dimension_elements; - auto compare_function = [&](int64 a, int64 b) -> bool { - for (int32 i = 0; i < values_count; ++i) { - int64 memory_index_lhs = (base_offset + a * sort_dimension_offset) * + auto compare_function = [&](tensorflow::int64 a, tensorflow::int64 b) -> bool { + for (tensorflow::int32 i = 0; i < values_count; ++i) { + tensorflow::int64 memory_index_lhs = (base_offset + a * sort_dimension_offset) * values_primitive_type_size_in_bytes[i]; - int64 memory_index_rhs = (base_offset + b * sort_dimension_offset) * + tensorflow::int64 memory_index_rhs = (base_offset + b * sort_dimension_offset) * values_primitive_type_size_in_bytes[i]; comparison_values[i * 2] = values[i] + memory_index_lhs; comparison_values[i * 2 + 1] = values[i] + memory_index_rhs; @@ -97,9 +92,9 @@ TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSort( } // Reorder the values according to the order defined by 'indices'. - for (int32 idx = 0; idx < values_count; ++idx) { - for (int64 i = 0; i < sort_dimension_elements; ++i) { - int64 memory_index = + for (tensorflow::int32 idx = 0; idx < values_count; ++idx) { + for (tensorflow::int64 i = 0; i < sort_dimension_elements; ++i) { + tensorflow::int64 memory_index = (base_offset + indices[i] * sort_dimension_offset) * values_primitive_type_size_in_bytes[idx]; @@ -107,8 +102,8 @@ TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSort( std::string(values[idx] + memory_index, values_primitive_type_size_in_bytes[idx]); } - for (int64 i = 0; i < sort_dimension_elements; ++i) { - int64 memory_index = (base_offset + i * sort_dimension_offset) * + for (tensorflow::int64 i = 0; i < sort_dimension_elements; ++i) { + tensorflow::int64 memory_index = (base_offset + i * sort_dimension_offset) * values_primitive_type_size_in_bytes[idx]; memcpy(values[idx] + memory_index, reordered_values[i].c_str(), values_primitive_type_size_in_bytes[idx]); diff --git a/tensorflow/compiler/xla/service/cpu/runtime_matmul.cc b/tensorflow/compiler/xla/service/cpu/runtime_matmul.cc index 35db15fed2c..7e19b383d6f 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_matmul.cc +++ b/tensorflow/compiler/xla/service/cpu/runtime_matmul.cc @@ -27,9 +27,6 @@ limitations under the License. #include "tensorflow/core/kernels/eigen_contraction_kernel.h" #endif -using tensorflow::int32; -using tensorflow::int64; - namespace { bool Is16BytesAligned(void* ptr) { @@ -37,19 +34,20 @@ bool Is16BytesAligned(void* ptr) { } template -void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m, - int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) { +void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, + tensorflow::int64 m, tensorflow::int64 n, tensorflow::int64 k, + tensorflow::int32 transpose_lhs, tensorflow::int32 transpose_rhs) { const xla::ExecutableRunOptions* run_options = static_cast(run_options_ptr); - int64 lhs_rows = m; - int64 lhs_cols = k; + tensorflow::int64 lhs_rows = m; + tensorflow::int64 lhs_cols = k; if (transpose_lhs) { std::swap(lhs_rows, lhs_cols); } - int64 rhs_rows = k; - int64 rhs_cols = n; + tensorflow::int64 rhs_rows = k; + tensorflow::int64 rhs_cols = n; if (transpose_rhs) { std::swap(rhs_rows, rhs_cols); } @@ -75,8 +73,9 @@ void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m, template void MatMulDispatch(const void* run_options_ptr, T* out, T* lhs, T* rhs, - int64 m, int64 n, int64 k, int32 transpose_lhs, - int32 transpose_rhs) { + tensorflow::int64 m, tensorflow::int64 n, + tensorflow::int64 k, tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs) { bool all_buffers_16b_aligned = Is16BytesAligned(out) && Is16BytesAligned(lhs) && Is16BytesAligned(rhs); @@ -94,45 +93,52 @@ void MatMulDispatch(const void* run_options_ptr, T* out, T* lhs, T* rhs, TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenMatMulF16( const void* run_options_ptr, Eigen::half* out, Eigen::half* lhs, - Eigen::half* rhs, int64 m, int64 n, int64 k, int32 transpose_lhs, - int32 transpose_rhs) { + Eigen::half* rhs, tensorflow::int64 m, tensorflow::int64 n, + tensorflow::int64 k, tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs) { MatMulDispatch(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); } TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenMatMulF32( - const void* run_options_ptr, float* out, float* lhs, float* rhs, int64 m, - int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) { + const void* run_options_ptr, float* out, float* lhs, float* rhs, + tensorflow::int64 m, tensorflow::int64 n, tensorflow::int64 k, + tensorflow::int32 transpose_lhs, tensorflow::int32 transpose_rhs) { MatMulDispatch(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); } TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenMatMulF64( - const void* run_options_ptr, double* out, double* lhs, double* rhs, int64 m, - int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) { + const void* run_options_ptr, double* out, double* lhs, double* rhs, + tensorflow::int64 m, tensorflow::int64 n, tensorflow::int64 k, + tensorflow::int32 transpose_lhs, tensorflow::int32 transpose_rhs) { MatMulDispatch(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); } TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenMatMulC64( const void* run_options_ptr, std::complex* out, - std::complex* lhs, std::complex* rhs, int64 m, int64 n, - int64 k, int32 transpose_lhs, int32 transpose_rhs) { + std::complex* lhs, std::complex* rhs, tensorflow::int64 m, + tensorflow::int64 n, tensorflow::int64 k, tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs) { MatMulDispatch>(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); } TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenMatMulC128( const void* run_options_ptr, std::complex* out, - std::complex* lhs, std::complex* rhs, int64 m, int64 n, - int64 k, int32 transpose_lhs, int32 transpose_rhs) { + std::complex* lhs, std::complex* rhs, tensorflow::int64 m, + tensorflow::int64 n, tensorflow::int64 k, tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs) { MatMulDispatch>(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); } TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenMatMulS32( - const void* run_options_ptr, int32* out, int32* lhs, int32* rhs, int64 m, - int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) { - MatMulDispatch(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, - transpose_rhs); + const void* run_options_ptr, tensorflow::int32* out, tensorflow::int32* lhs, + tensorflow::int32* rhs, tensorflow::int64 m, tensorflow::int64 n, + tensorflow::int64 k, tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs) { + MatMulDispatch(run_options_ptr, out, lhs, rhs, m, n, k, + transpose_lhs, transpose_rhs); } diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc index 5afccc6a86e..360ce57e808 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc +++ b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc @@ -19,18 +19,20 @@ limitations under the License. #include "tensorflow/core/platform/dynamic_annotations.h" #include "tensorflow/core/platform/types.h" -using tensorflow::int64; - TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenSingleThreadedConvF16( const void* run_options_ptr, Eigen::half* out, Eigen::half* lhs, - Eigen::half* rhs, int64 input_batch, int64 input_rows, int64 input_cols, - int64 input_channels, int64 kernel_rows, int64 kernel_cols, - int64 kernel_channels, int64 kernel_filters, int64 output_rows, - int64 output_cols, int64 row_stride, int64 col_stride, int64 padding_top, - int64 padding_bottom, int64 padding_left, int64 padding_right, - int64 lhs_row_dilation, int64 lhs_col_dilation, int64 rhs_row_dilation, - int64 rhs_col_dilation) { + Eigen::half* rhs, tensorflow::int64 input_batch, + tensorflow::int64 input_rows, tensorflow::int64 input_cols, + tensorflow::int64 input_channels, tensorflow::int64 kernel_rows, + tensorflow::int64 kernel_cols, tensorflow::int64 kernel_channels, + tensorflow::int64 kernel_filters, tensorflow::int64 output_rows, + tensorflow::int64 output_cols, tensorflow::int64 row_stride, + tensorflow::int64 col_stride, tensorflow::int64 padding_top, + tensorflow::int64 padding_bottom, tensorflow::int64 padding_left, + tensorflow::int64 padding_right, tensorflow::int64 lhs_row_dilation, + tensorflow::int64 lhs_col_dilation, tensorflow::int64 rhs_row_dilation, + tensorflow::int64 rhs_col_dilation) { tensorflow::xla::EigenConvImpl( Eigen::DefaultDevice(), out, lhs, rhs, input_batch, input_rows, input_cols, input_channels, kernel_rows, kernel_cols, kernel_channels, @@ -42,12 +44,16 @@ __xla_cpu_runtime_EigenSingleThreadedConvF16( TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenSingleThreadedConvF32( const void* run_options_ptr, float* out, float* lhs, float* rhs, - int64 input_batch, int64 input_rows, int64 input_cols, int64 input_channels, - int64 kernel_rows, int64 kernel_cols, int64 kernel_channels, - int64 kernel_filters, int64 output_rows, int64 output_cols, - int64 row_stride, int64 col_stride, int64 padding_top, int64 padding_bottom, - int64 padding_left, int64 padding_right, int64 lhs_row_dilation, - int64 lhs_col_dilation, int64 rhs_row_dilation, int64 rhs_col_dilation) { + tensorflow::int64 input_batch, tensorflow::int64 input_rows, + tensorflow::int64 input_cols, tensorflow::int64 input_channels, + tensorflow::int64 kernel_rows, tensorflow::int64 kernel_cols, + tensorflow::int64 kernel_channels, tensorflow::int64 kernel_filters, + tensorflow::int64 output_rows, tensorflow::int64 output_cols, + tensorflow::int64 row_stride, tensorflow::int64 col_stride, + tensorflow::int64 padding_top, tensorflow::int64 padding_bottom, + tensorflow::int64 padding_left, tensorflow::int64 padding_right, + tensorflow::int64 lhs_row_dilation, tensorflow::int64 lhs_col_dilation, + tensorflow::int64 rhs_row_dilation, tensorflow::int64 rhs_col_dilation) { tensorflow::xla::EigenConvImpl( Eigen::DefaultDevice(), out, lhs, rhs, input_batch, input_rows, input_cols, input_channels, kernel_rows, kernel_cols, kernel_channels, diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.cc b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.cc index c7601f939c7..a8112c1106b 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.cc +++ b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.cc @@ -23,9 +23,6 @@ limitations under the License. #include "tensorflow/core/kernels/eigen_contraction_kernel.h" #endif -using tensorflow::int32; -using tensorflow::int64; - namespace { bool Is16BytesAligned(void* ptr) { @@ -33,16 +30,17 @@ bool Is16BytesAligned(void* ptr) { } template -void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m, - int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) { - int64 lhs_rows = m; - int64 lhs_cols = k; +void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, + tensorflow::int64 m, tensorflow::int64 n, tensorflow::int64 k, + tensorflow::int32 transpose_lhs, tensorflow::int32 transpose_rhs) { + tensorflow::int64 lhs_rows = m; + tensorflow::int64 lhs_cols = k; if (transpose_lhs) { std::swap(lhs_rows, lhs_cols); } - int64 rhs_rows = k; - int64 rhs_cols = n; + tensorflow::int64 rhs_rows = k; + tensorflow::int64 rhs_cols = n; if (transpose_rhs) { std::swap(rhs_rows, rhs_cols); } @@ -67,8 +65,10 @@ void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m, template void SingleThreadedMatMulDispatch(const void* run_options_ptr, T* out, T* lhs, - T* rhs, int64 m, int64 n, int64 k, - int32 transpose_lhs, int32 transpose_rhs) { + T* rhs, tensorflow::int64 m, + tensorflow::int64 n, tensorflow::int64 k, + tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs) { bool all_buffers_16b_aligned = Is16BytesAligned(out) && Is16BytesAligned(lhs) && Is16BytesAligned(rhs); @@ -86,28 +86,27 @@ void SingleThreadedMatMulDispatch(const void* run_options_ptr, T* out, T* lhs, TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenSingleThreadedMatMulF16( const void* run_options_ptr, Eigen::half* out, Eigen::half* lhs, - Eigen::half* rhs, int64 m, int64 n, int64 k, int32 transpose_lhs, - int32 transpose_rhs) { + Eigen::half* rhs, tensorflow::int64 m, tensorflow::int64 n, + tensorflow::int64 k, tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs) { SingleThreadedMatMulDispatch(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); } TF_ATTRIBUTE_NO_SANITIZE_MEMORY void -__xla_cpu_runtime_EigenSingleThreadedMatMulF32(const void* run_options_ptr, - float* out, float* lhs, - float* rhs, int64 m, int64 n, - int64 k, int32 transpose_lhs, - int32 transpose_rhs) { +__xla_cpu_runtime_EigenSingleThreadedMatMulF32( + const void* run_options_ptr, float* out, float* lhs, float* rhs, + tensorflow::int64 m, tensorflow::int64 n, tensorflow::int64 k, + tensorflow::int32 transpose_lhs, tensorflow::int32 transpose_rhs) { SingleThreadedMatMulDispatch(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); } TF_ATTRIBUTE_NO_SANITIZE_MEMORY void -__xla_cpu_runtime_EigenSingleThreadedMatMulF64(const void* run_options_ptr, - double* out, double* lhs, - double* rhs, int64 m, int64 n, - int64 k, int32 transpose_lhs, - int32 transpose_rhs) { +__xla_cpu_runtime_EigenSingleThreadedMatMulF64( + const void* run_options_ptr, double* out, double* lhs, double* rhs, + tensorflow::int64 m, tensorflow::int64 n, tensorflow::int64 k, + tensorflow::int32 transpose_lhs, tensorflow::int32 transpose_rhs) { SingleThreadedMatMulDispatch(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); } @@ -115,8 +114,9 @@ __xla_cpu_runtime_EigenSingleThreadedMatMulF64(const void* run_options_ptr, TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenSingleThreadedMatMulC64( const void* run_options_ptr, std::complex* out, - std::complex* lhs, std::complex* rhs, int64 m, int64 n, - int64 k, int32 transpose_lhs, int32 transpose_rhs) { + std::complex* lhs, std::complex* rhs, tensorflow::int64 m, + tensorflow::int64 n, tensorflow::int64 k, tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs) { SingleThreadedMatMulDispatch>( run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); } @@ -124,18 +124,19 @@ __xla_cpu_runtime_EigenSingleThreadedMatMulC64( TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenSingleThreadedMatMulC128( const void* run_options_ptr, std::complex* out, - std::complex* lhs, std::complex* rhs, int64 m, int64 n, - int64 k, int32 transpose_lhs, int32 transpose_rhs) { + std::complex* lhs, std::complex* rhs, tensorflow::int64 m, + tensorflow::int64 n, tensorflow::int64 k, tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs) { SingleThreadedMatMulDispatch>( run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); } TF_ATTRIBUTE_NO_SANITIZE_MEMORY void -__xla_cpu_runtime_EigenSingleThreadedMatMulS32(const void* run_options_ptr, - int32* out, int32* lhs, - int32* rhs, int64 m, int64 n, - int64 k, int32 transpose_lhs, - int32 transpose_rhs) { - SingleThreadedMatMulDispatch(run_options_ptr, out, lhs, rhs, m, n, k, - transpose_lhs, transpose_rhs); +__xla_cpu_runtime_EigenSingleThreadedMatMulS32( + const void* run_options_ptr, tensorflow::int32* out, tensorflow::int32* lhs, + tensorflow::int32* rhs, tensorflow::int64 m, tensorflow::int64 n, + tensorflow::int64 k, tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs) { + SingleThreadedMatMulDispatch( + run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); } From ac988f3bb88e23139659dff8ddf95457d82c2f3e Mon Sep 17 00:00:00 2001 From: Michael Banfield Date: Wed, 15 Jul 2020 11:24:33 -0700 Subject: [PATCH 0518/2522] Return a iterator from summary_iterator instead of using a generator, to allow reusing the iterator after end of file. PiperOrigin-RevId: 321401390 Change-Id: I2d08d6312cead7f97fb572360a631f8c8754d418 --- tensorflow/python/BUILD | 1 + tensorflow/python/summary/summary_iterator.py | 38 ++++++++++-- .../python/summary/summary_iterator_test.py | 61 +++++++++++++++++++ 3 files changed, 94 insertions(+), 6 deletions(-) create mode 100644 tensorflow/python/summary/summary_iterator_test.py diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index d68bb928233..42fafc5d9cc 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -7275,6 +7275,7 @@ py_tests( size = "small", srcs = [ "summary/plugin_asset_test.py", + "summary/summary_iterator_test.py", "summary/summary_test.py", "summary/writer/writer_test.py", ], diff --git a/tensorflow/python/summary/summary_iterator.py b/tensorflow/python/summary/summary_iterator.py index 5840a7a124e..35c6fa03039 100644 --- a/tensorflow/python/summary/summary_iterator.py +++ b/tensorflow/python/summary/summary_iterator.py @@ -24,10 +24,26 @@ from tensorflow.python.lib.io import tf_record from tensorflow.python.util.tf_export import tf_export +class _SummaryIterator(object): + """Yields `Event` protocol buffers from a given path.""" + + def __init__(self, path): + self._tf_record_iterator = tf_record.tf_record_iterator(path) + + def __iter__(self): + return self + + def __next__(self): + r = next(self._tf_record_iterator) + return event_pb2.Event.FromString(r) + + next = __next__ + + @tf_export(v1=['train.summary_iterator']) def summary_iterator(path): # pylint: disable=line-too-long - """An iterator for reading `Event` protocol buffers from an event file. + """Returns a iterator for reading `Event` protocol buffers from an event file. You can use this function to read events written to an event file. It returns a Python iterator that yields `Event` protocol buffers. @@ -51,6 +67,18 @@ def summary_iterator(path): if v.tag == 'loss': print(v.simple_value) ``` + Example: Continuously check for new summary values. + + ```python + summaries = tf.compat.v1.train.summary_iterator(path to events file) + while True: + for e in summaries: + for v in e.summary.value: + if v.tag == 'loss': + print(v.simple_value) + # Wait for a bit before checking the file for any new events + time.sleep(wait time) + ``` See the protocol buffer definitions of [Event](https://www.tensorflow.org/code/tensorflow/core/util/event.proto) @@ -61,9 +89,7 @@ def summary_iterator(path): Args: path: The path to an event file created by a `SummaryWriter`. - Yields: - `Event` protocol buffers. + Returns: + A iterator that yields `Event` protocol buffers """ - # pylint: enable=line-too-long - for r in tf_record.tf_record_iterator(path): - yield event_pb2.Event.FromString(r) + return _SummaryIterator(path) diff --git a/tensorflow/python/summary/summary_iterator_test.py b/tensorflow/python/summary/summary_iterator_test.py new file mode 100644 index 00000000000..d41d8d4c775 --- /dev/null +++ b/tensorflow/python/summary/summary_iterator_test.py @@ -0,0 +1,61 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for tensorflow.python.summary.summary_iterator.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import glob +import os.path + +from tensorflow.core.util import event_pb2 +from tensorflow.python.framework import test_util +from tensorflow.python.platform import test +from tensorflow.python.summary import summary_iterator +from tensorflow.python.summary.writer import writer + + +class SummaryIteratorTestCase(test.TestCase): + + @test_util.run_deprecated_v1 + def testSummaryIteratorEventsAddedAfterEndOfFile(self): + test_dir = os.path.join(self.get_temp_dir(), "events") + with writer.FileWriter(test_dir) as w: + session_log_start = event_pb2.SessionLog.START + w.add_session_log(event_pb2.SessionLog(status=session_log_start), 1) + w.flush() + path = glob.glob(os.path.join(test_dir, "event*"))[0] + rr = summary_iterator.summary_iterator(path) + # The first event should list the file_version. + ev = next(rr) + self.assertEqual("brain.Event:2", ev.file_version) + # The next event should be the START message. + ev = next(rr) + self.assertEqual(1, ev.step) + self.assertEqual(session_log_start, ev.session_log.status) + # Reached EOF. + self.assertRaises(StopIteration, lambda: next(rr)) + w.add_session_log(event_pb2.SessionLog(status=session_log_start), 2) + w.flush() + # The new event is read, after previously seeing EOF. + ev = next(rr) + self.assertEqual(2, ev.step) + self.assertEqual(session_log_start, ev.session_log.status) + # Get EOF again. + self.assertRaises(StopIteration, lambda: next(rr)) + +if __name__ == "__main__": + test.main() From 2d3ab484dd84e473adbaf76320a539e1120e9ece Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Jul 2020 11:28:11 -0700 Subject: [PATCH 0519/2522] Modify type-specific mappings to be explicit about long/long long rather than relying on a specific definition meaning int64. PiperOrigin-RevId: 321402137 Change-Id: I8f3e753725551bdba4eb0b2855e8515a3a7828b4 --- tensorflow/core/framework/types.h | 55 ++++++++++++++++++- tensorflow/core/kernels/sparse_cross_op.cc | 4 +- .../core/platform/default/strong_hash.h | 6 +- tensorflow/core/platform/strong_hash.h | 2 +- .../core/profiler/utils/xplane_builder.h | 20 ++++++- 5 files changed, 78 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/framework/types.h b/tensorflow/core/framework/types.h index fe52f8b2b59..2b5f41be0de 100644 --- a/tensorflow/core/framework/types.h +++ b/tensorflow/core/framework/types.h @@ -395,8 +395,6 @@ MATCH_TYPE_AND_ENUM(int8, DT_INT8); MATCH_TYPE_AND_ENUM(tstring, DT_STRING); MATCH_TYPE_AND_ENUM(complex64, DT_COMPLEX64); MATCH_TYPE_AND_ENUM(complex128, DT_COMPLEX128); -MATCH_TYPE_AND_ENUM(int64, DT_INT64); -MATCH_TYPE_AND_ENUM(uint64, DT_UINT64); MATCH_TYPE_AND_ENUM(bool, DT_BOOL); MATCH_TYPE_AND_ENUM(qint8, DT_QINT8); MATCH_TYPE_AND_ENUM(quint8, DT_QUINT8); @@ -408,6 +406,59 @@ MATCH_TYPE_AND_ENUM(Eigen::half, DT_HALF); MATCH_TYPE_AND_ENUM(ResourceHandle, DT_RESOURCE); MATCH_TYPE_AND_ENUM(Variant, DT_VARIANT); +template <> +struct DataTypeToEnum { + static DataType v() { return value; } + static DataType ref() { return MakeRefType(value); } + static constexpr DataType value = sizeof(long) == 4 ? DT_INT32 : DT_INT64; +}; +template <> +struct IsValidDataType { + static constexpr bool value = true; +}; +template <> +struct EnumToDataType { + typedef tensorflow::int64 Type; +}; + +template <> +struct DataTypeToEnum { + static DataType v() { return value; } + static DataType ref() { return MakeRefType(value); } + static constexpr DataType value = + sizeof(unsigned long) == 4 ? DT_UINT32 : DT_UINT64; +}; +template <> +struct IsValidDataType { + static constexpr bool value = true; +}; +template <> +struct EnumToDataType { + typedef tensorflow::uint64 Type; +}; + +template <> +struct DataTypeToEnum { + static DataType v() { return DT_INT64; } + static DataType ref() { return MakeRefType(DT_INT64); } + static constexpr DataType value = DT_INT64; +}; +template <> +struct IsValidDataType { + static constexpr bool value = true; +}; + +template <> +struct DataTypeToEnum { + static DataType v() { return DT_UINT64; } + static DataType ref() { return MakeRefType(DT_UINT64); } + static constexpr DataType value = DT_UINT64; +}; +template <> +struct IsValidDataType { + static constexpr bool value = true; +}; + #undef MATCH_TYPE_AND_ENUM // All types not specialized are marked invalid. diff --git a/tensorflow/core/kernels/sparse_cross_op.cc b/tensorflow/core/kernels/sparse_cross_op.cc index 9a80aad5d04..583235b4a30 100644 --- a/tensorflow/core/kernels/sparse_cross_op.cc +++ b/tensorflow/core/kernels/sparse_cross_op.cc @@ -101,7 +101,7 @@ class KeyedSparseTensorColumn : public ColumnInterface { private: const Tensor& values_; - uint64 key_[2]; + tensorflow::uint64 key_[2]; std::vector feature_counts_; std::vector feature_start_indices_; }; @@ -201,7 +201,7 @@ class KeyedDenseTensorColumn : public ColumnInterface { private: const Tensor& tensor_; - uint64 key_[2]; + tensorflow::uint64 key_[2]; }; // InternalType is int64 only when using HashCrosser. diff --git a/tensorflow/core/platform/default/strong_hash.h b/tensorflow/core/platform/default/strong_hash.h index f04f1b7b6ae..e7c8047235c 100644 --- a/tensorflow/core/platform/default/strong_hash.h +++ b/tensorflow/core/platform/default/strong_hash.h @@ -21,8 +21,10 @@ limitations under the License. namespace tensorflow { -inline uint64 StrongKeyedHash(const uint64 (&key)[2], const string& s) { - return highwayhash::StringHasher()(key, s); +inline uint64 StrongKeyedHash(const tensorflow::uint64 (&key)[2], + const string& s) { + return highwayhash::StringHasher()( + {key[0], key[1]}, s); } } // namespace tensorflow diff --git a/tensorflow/core/platform/strong_hash.h b/tensorflow/core/platform/strong_hash.h index cbd267f90ed..987df5da59d 100644 --- a/tensorflow/core/platform/strong_hash.h +++ b/tensorflow/core/platform/strong_hash.h @@ -32,7 +32,7 @@ namespace tensorflow { // string input = "input string"; // uint64 hash_value = StrongKeyedHash(key, input); // -uint64 StrongKeyedHash(const uint64 (&)[2], const string&); +uint64 StrongKeyedHash(const tensorflow::uint64 (&)[2], const string&); } // namespace tensorflow diff --git a/tensorflow/core/profiler/utils/xplane_builder.h b/tensorflow/core/profiler/utils/xplane_builder.h index d5a4d443e21..01d38f0aa11 100644 --- a/tensorflow/core/profiler/utils/xplane_builder.h +++ b/tensorflow/core/profiler/utils/xplane_builder.h @@ -44,13 +44,29 @@ class XStatsBuilder { void AddStatValue(const XStatMetadata& metadata, uint32 value) { AddStat(metadata)->set_uint64_value(value); } - void AddStatValue(const XStatMetadata& metadata, uint64 value) { + void AddStatValue(const XStatMetadata& metadata, + unsigned long value) { // NOLINT + if constexpr (sizeof(unsigned long) == 8) { // NOLINT + AddStat(metadata)->set_uint64_value(value); + } else { + AddStat(metadata)->set_uint32_value(value); + } + } + void AddStatValue(const XStatMetadata& metadata, + unsigned long long value) { // NOLINT AddStat(metadata)->set_uint64_value(value); } void AddStatValue(const XStatMetadata& metadata, int32 value) { AddStat(metadata)->set_int64_value(value); } - void AddStatValue(const XStatMetadata& metadata, int64 value) { + void AddStatValue(const XStatMetadata& metadata, long value) { // NOLINT + if constexpr (sizeof(long) == 8) { // NOLINT + AddStat(metadata)->set_int64_value(value); + } else { + AddStat(metadata)->set_int32_value(value); + } + } + void AddStatValue(const XStatMetadata& metadata, long long value) { // NOLINT AddStat(metadata)->set_int64_value(value); } void AddStatValue(const XStatMetadata& metadata, double value) { From aae848f286d387e40194b2a863e23228f22a1169 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Jul 2020 11:32:32 -0700 Subject: [PATCH 0520/2522] Internal code change. PiperOrigin-RevId: 321403145 Change-Id: I4dfc15175dc02bc13beeee2f1de93fa29e7869b5 --- tensorflow/core/common_runtime/BUILD | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/common_runtime/BUILD b/tensorflow/core/common_runtime/BUILD index e8f1dd1c5b5..bb50aca07e4 100644 --- a/tensorflow/core/common_runtime/BUILD +++ b/tensorflow/core/common_runtime/BUILD @@ -42,11 +42,13 @@ load( "mkl_deps", ) +default_package_visibility = [ + "//tensorflow:internal", + "//tensorflow_models:__subpackages__", +] + package( - default_visibility = [ - "//tensorflow:internal", - "//tensorflow_models:__subpackages__", - ], + default_visibility = default_package_visibility, licenses = ["notice"], # Apache 2.0 ) @@ -757,6 +759,10 @@ cc_library( "shape_refiner.h", ], copts = tf_copts(), + visibility = default_package_visibility + [ + "//platforms/performance/autograppler:__subpackages__", + "//platforms/performance/tf_sim:__subpackages__", + ], deps = [ ":device", ":device_factory", @@ -959,6 +965,10 @@ cc_library( srcs = ["lower_functional_ops.cc"], hdrs = ["lower_functional_ops.h"], copts = tf_copts(), + visibility = default_package_visibility + [ + "//platforms/performance/autograppler:__subpackages__", + "//platforms/performance/tf_sim:__subpackages__", + ], deps = [ ":function_utils", ":inline_function_utils", From e7e6de2e925848f5d1a95a7d576ca3128625b23e Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Wed, 15 Jul 2020 12:06:41 -0700 Subject: [PATCH 0521/2522] [XLA] Propagate operand layouts from larger concat operands instead of smaller concat operands. PiperOrigin-RevId: 321410489 Change-Id: Ic0f4d60fe49fd76609ead8fc30fcf076b1f0a04b --- .../compiler/xla/service/layout_assignment.cc | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc index 3c48668e742..bea0f1fb93c 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/layout_assignment.cc @@ -1357,6 +1357,20 @@ Status LayoutAssignment::PropagateOperandConstraint( // Propagate layouts between operands of the same instruction. This is a // constraint on non-layout-changing instructions. if (!instruction_can_change_layout_func_(user)) { + // Only propgate the layout of the largest concatenate operand. + if (user->opcode() == HloOpcode::kConcatenate) { + for (int64 operand_no = 0; operand_no < user->operand_count(); + ++operand_no) { + const HloInstruction* sibling = user->operand(operand_no); + if (sibling == operand) { + continue; + } + if (sibling->shape().dimensions(user->concatenate_dimension()) > + operand->shape().dimensions(user->concatenate_dimension())) { + return Status::OK(); + } + } + } // Make sure all siblings have the same layout as the operand. for (int64 operand_no = 0; operand_no < user->operand_count(); ++operand_no) { From f29ace3cc878b96becc9ab18808273a541e7384d Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 15 Jul 2020 12:13:26 -0700 Subject: [PATCH 0522/2522] [XLA:CPU] Register collective-permute IR Otherwise any use of collective-permute as an argument to another operation crashes. PiperOrigin-RevId: 321411964 Change-Id: I5375762204a83536451f6ec472ae91712722fee2 --- tensorflow/compiler/xla/service/cpu/ir_emitter.cc | 1 + tensorflow/compiler/xla/tests/collective_ops_test.cc | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 16d92f622d2..278e6479e48 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -1397,6 +1397,7 @@ Status IrEmitter::HandleAllToAll(HloInstruction* instruction) { Status IrEmitter::HandleCollectivePermute(HloInstruction* crs) { auto* instr = Cast(crs); + TF_RETURN_IF_ERROR(EmitTargetAddressForOp(instr)); std::string source_target_pairs = absl::StrJoin( instr->source_target_pairs(), ",", absl::PairFormatter("=")); llvm::Value* source_target_pairs_v = diff --git a/tensorflow/compiler/xla/tests/collective_ops_test.cc b/tensorflow/compiler/xla/tests/collective_ops_test.cc index 7459b3d3f1f..ed5fabb663e 100644 --- a/tensorflow/compiler/xla/tests/collective_ops_test.cc +++ b/tensorflow/compiler/xla/tests/collective_ops_test.cc @@ -568,7 +568,8 @@ XLA_TEST_F(CollectiveOpsTest, CollectivePermute_Simple) { ten = u32[] constant(10) sum = u32[] add(replica, ten) p = u32[2] broadcast(sum), dimensions={} - ROOT permute = u32[2] collective-permute(p), source_target_pairs={{1,0}, {0,1}, {2,2}} + permute = u32[2] collective-permute(p), source_target_pairs={{1,0}, {0,1}, {2,2}} + ROOT copy = u32[2] copy(permute) } )"; const int64 kNumReplicas = 4; From 344c570221846e2ac205862083317f3e01fa63b0 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Wed, 15 Jul 2020 19:22:46 +0000 Subject: [PATCH 0523/2522] update TensorMapDeviceCopy --- tensorflow/core/kernels/map_kernels.h | 1 + tensorflow/core/kernels/tensor_map.cc | 16 +++++++++------- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/kernels/map_kernels.h b/tensorflow/core/kernels/map_kernels.h index 00a6a654b54..9f18e4242d5 100644 --- a/tensorflow/core/kernels/map_kernels.h +++ b/tensorflow/core/kernels/map_kernels.h @@ -211,6 +211,7 @@ class TensorMapReplace : public OpKernel { DataType element_dtype_; }; + } // namespace tensorflow #endif // TENSORFLOW_CORE_KERNELS_MAP_KERNELS_H_ diff --git a/tensorflow/core/kernels/tensor_map.cc b/tensorflow/core/kernels/tensor_map.cc index bcb2abebe01..4f694a37b17 100644 --- a/tensorflow/core/kernels/tensor_map.cc +++ b/tensorflow/core/kernels/tensor_map.cc @@ -33,10 +33,10 @@ void TensorMap::Encode(VariantTensorData* data) const { Tensor k = map_it->first; Tensor v = map_it->second; // TODO: k should also not be DT_RESOURCE or DT_VARIANT - if(k.dtype() != DT_INVALID && v.dtype() != DT_INVALID) { - *data->add_tensors() = k; - *data->add_tensors() = v; - } + CHECK_NE(k.dtype(), DT_INVALID); + CHECK_NE(v.dtype(), DT_INVALID); + *data->add_tensors() = k; + *data->add_tensors() = v; map_it++; } string metadata; @@ -56,9 +56,11 @@ static Status TensorMapDeviceCopy( to->element_shape = from.element_shape; to->element_dtype = from.element_dtype; for (const std::pair& p : from.tensors()) { - if (p.first.dtype() != DT_INVALID && p.second.dtype() != DT_INVALID) { - to->tensors().emplace(p.first, p.second); - } + TensorKey to_key(p.first.dtype()); + Tensor to_val(p.second.dtype()); + copy(p.first, &to_key); + copy(p.second, &to_val); + to->tensors().emplace(to_key, to_val); } return Status::OK(); } From 6c95280f3a61fc896613aa4c49eb357bd9b9957d Mon Sep 17 00:00:00 2001 From: Russell Power Date: Wed, 15 Jul 2020 12:19:52 -0700 Subject: [PATCH 0524/2522] Restore registration for variable merger pass. PiperOrigin-RevId: 321413258 Change-Id: Ie250a93a2e45a9685c5ce480c9a59ce09eff23a9 --- .../core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc b/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc index 1de4f50da10..ef1831464e2 100644 --- a/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc +++ b/tensorflow/core/tpu/graph_rewrite/tpu_rewrite_pass_registration.cc @@ -30,6 +30,8 @@ REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 34, EncapsulateTPUComputationsPass); REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 39, ExtractOutsideCompilationPass); +REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, 0, + VariableMergerPass); } // namespace } // namespace tensorflow From 9d4b10b477d5fd8179017cd7f0484bcbb84a5c75 Mon Sep 17 00:00:00 2001 From: Brian Zhao Date: Wed, 15 Jul 2020 12:40:00 -0700 Subject: [PATCH 0525/2522] Adding device_mgr header to core:core_cpu, since it is used on the Session interface. PiperOrigin-RevId: 321417432 Change-Id: I39312ed35c31164f03ca7440b8a6e5a23fb9918c --- tensorflow/core/common_runtime/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/common_runtime/BUILD b/tensorflow/core/common_runtime/BUILD index bb50aca07e4..b8ccb0d9039 100644 --- a/tensorflow/core/common_runtime/BUILD +++ b/tensorflow/core/common_runtime/BUILD @@ -58,6 +58,7 @@ tf_cuda_library( "composite_device.h", "device.h", "device_factory.h", + "device_mgr.h", "function.h", "function_optimization_registry.h", "gradients.h", From f18240b8761942524fb5c8049169f2e99b9440a5 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Wed, 15 Jul 2020 19:58:45 +0000 Subject: [PATCH 0526/2522] minor fix --- tensorflow/core/kernels/map_kernels.h | 5 +---- tensorflow/python/kernel_tests/map_ops_test.py | 10 ---------- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/tensorflow/core/kernels/map_kernels.h b/tensorflow/core/kernels/map_kernels.h index 9f18e4242d5..1ab6fbd2323 100644 --- a/tensorflow/core/kernels/map_kernels.h +++ b/tensorflow/core/kernels/map_kernels.h @@ -167,9 +167,6 @@ class TensorMapErase : public OpKernel { OP_REQUIRES_OK(c, GetInputMap(c, 0, &m)); const TensorKey& key = c->input(1); - OP_REQUIRES(c, !m->tensors().empty(), - errors::InvalidArgument("Trying to erase from an empty map.")); - OP_REQUIRES(c, m->tensors().find(key) != m->tensors().end(), errors::InvalidArgument("Trying to erase non-existent item.")); @@ -204,7 +201,7 @@ class TensorMapReplace : public OpKernel { TensorMap* output_map = nullptr; OP_REQUIRES_OK(c, ForwardInputOrCreateNewMap(c, 0, 0, *m, &output_map)); - output_map->replace(key,value); + output_map->replace(key, value); } private: diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index a0bfd104a9c..09df2cca134 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -105,16 +105,6 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): s = map_ops.tensor_map_size(m) self.assertAllEqual(s, 0) self.assertAllClose(e, v) - - def testTensorMapEraseFromEmptyMapFails(self): - m = map_ops.empty_tensor_map() - k = constant_op.constant(1.0) - v = constant_op.constant(2.0) - - with self.assertRaisesRegex(errors.InvalidArgumentError, - "Trying to erase from an empty map."): - m, e = map_ops.tensor_map_erase(m, k) - self.evaluate(e) def testTensorMapEraseMissingKeyFails(self): m = map_ops.empty_tensor_map() From ed60f21980a1b9ebd85d4913a731c8b05b843cea Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Wed, 15 Jul 2020 13:09:29 -0700 Subject: [PATCH 0527/2522] Update release configs in .bazelrc. PiperOrigin-RevId: 321423561 Change-Id: Ifd3343552ec225b2aa3dbca7861c40722b811a91 --- .bazelrc | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/.bazelrc b/.bazelrc index f23fdb7b7e8..9ef1f8e9c04 100644 --- a/.bazelrc +++ b/.bazelrc @@ -543,20 +543,26 @@ try-import %workspace%/.bazelrc.user # Here are bazelrc configs for release builds build:release_common --config=opt build:release_common --config=v2 +build:release_common --action_env TF_CONFIGURE_IOS="0" build:release_cpu_linux --config=release_common -build:release_cpu_linux --action_env=TF_NEED_CUDA=0 -build:release_cpu_linux --action_env=CC_OPT_FLAGS="-mavx" +build:release_cpu_linux --config=avx_linux # We use the same toolchain for CPU/GPU packages. # Did not add this to the defaults in case this changes. build:release_cpu_linux --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain build:release_gpu_common --config=release_common -build:release_gpu_common --action_env=TF_NEED_CUDA=1 -build:release_gpu_common --action_env=TF_CUDA_VERSION=10.1 -build:release_gpu_common --action_env=TF_CUDNN_VERSION=7 +build:release_gpu_common --config=cuda +build:release_gpu_common --config=tensorrt +build:release_gpu_common --action_env CUDA_TOOLKIT_PATH="/usr/local/cuda-10.1" +build:release_gpu_common --action_env=TF_CUDA_VERSION="10" +build:release_gpu_common --action_env=TF_CUDNN_VERSION="7" build:release_gpu_common --action_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_35,sm_37,sm_52,sm_60,sm_61,compute_70" build:release_gpu_common --action_env=TENSORRT_INSTALL_PATH="/usr/local/tensorrt" +build:release_gpu_common --action_env=LD_LIBRARY_PATH="/usr/local/tensorrt/lib" +build:release_gpu_common --action_env=GCC_HOST_COMPILER_PATH="/usr/bin/gcc-5" + build:release_gpu_linux --config=release_gpu_common +build:release_gpu_linux --config=avx_linux build:release_gpu_linux --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain From b4c9d56fff8c1747bad6811599b8e8a9b793d3fe Mon Sep 17 00:00:00 2001 From: Brian Zhao Date: Wed, 15 Jul 2020 13:16:40 -0700 Subject: [PATCH 0528/2522] Disabling test failing under ASAN. PiperOrigin-RevId: 321424899 Change-Id: I37833366cf33c8ebf8319c708c9983f7c88feb35 --- tensorflow/python/keras/layers/preprocessing/BUILD | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/python/keras/layers/preprocessing/BUILD b/tensorflow/python/keras/layers/preprocessing/BUILD index 9adf97d1fa5..adf10787f1e 100644 --- a/tensorflow/python/keras/layers/preprocessing/BUILD +++ b/tensorflow/python/keras/layers/preprocessing/BUILD @@ -541,6 +541,9 @@ tf_py_test( srcs = ["text_vectorization_test.py"], python_version = "PY3", shard_count = 4, + tags = [ + "noasan", #TODO(b/161376526): Enable when bug fix lands. + ], deps = [ ":preprocessing_test_utils", ":text_vectorization", From ad38b201b37aa3d502441101235d98d1fb27e28f Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 15 Jul 2020 13:23:10 -0700 Subject: [PATCH 0529/2522] Expand visibility of tensorflow/core/framework:tensor_testutil. This is to prepare for the removal of tensorflow/core:tensor_testutil alias. PiperOrigin-RevId: 321426229 Change-Id: I525e618cf217d608c5a76bc310118e33b9b3009d --- tensorflow/core/framework/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/framework/BUILD b/tensorflow/core/framework/BUILD index 3aa0681c8ff..df454f4c876 100644 --- a/tensorflow/core/framework/BUILD +++ b/tensorflow/core/framework/BUILD @@ -510,7 +510,7 @@ cc_library( srcs = ["tensor_testutil.cc"], hdrs = ["tensor_testutil.h"], copts = tf_copts(), - visibility = ["//tensorflow/core:__subpackages__"], + visibility = ["//tensorflow:internal"], deps = [ "//tensorflow/core:framework", "//tensorflow/core:lib", From 600b4f145b9a3fcc2d6295fa10c4dce766b6c136 Mon Sep 17 00:00:00 2001 From: Ayush Dubey Date: Wed, 15 Jul 2020 13:27:14 -0700 Subject: [PATCH 0530/2522] Share ownership of CollectiveContext with kernels. BaseCollectiveExecutor creates a CollectiveContext and passes a pointer to each collective kernel implementation. The CollectiveContext is deleted in the done callback. However, for some kernels like the NCCL reducer, it is possible that the CollectiveContext is accessed after the NCCL CUDA kernel is enqueued on the GPU stream. This creates a race between the access and destruction. This change changes CollectiveContext from a raw pointer to a shared pointer, essentially sharing ownership of this object with the kernel. Thus, even if the done callback runs first, the kernel can still safely access the context. Resolves #41113. PiperOrigin-RevId: 321426944 Change-Id: I9f12fe403bf2cc0939006dbde38ec2985d75cfcd --- .../base_collective_executor.cc | 8 +++--- .../hierarchical_tree_broadcaster.cc | 2 +- .../hierarchical_tree_broadcaster.h | 5 ++-- .../hierarchical_tree_broadcaster_test.cc | 8 +++--- tensorflow/core/common_runtime/ring_alg.cc | 4 ++- tensorflow/core/common_runtime/ring_alg.h | 5 ++-- .../core/common_runtime/ring_gatherer_test.cc | 8 +++--- .../core/common_runtime/ring_reducer_test.cc | 8 +++--- ...lective_param_resolver_distributed_test.cc | 3 ++- tensorflow/core/framework/collective.h | 3 ++- tensorflow/core/kernels/collective_nccl.cc | 3 ++- tensorflow/core/kernels/collective_nccl.h | 5 ++-- .../core/kernels/collective_nccl_test.cc | 26 +++++++++---------- 13 files changed, 47 insertions(+), 41 deletions(-) diff --git a/tensorflow/core/common_runtime/base_collective_executor.cc b/tensorflow/core/common_runtime/base_collective_executor.cc index 1dfe2eed426..5d5100e7f2e 100644 --- a/tensorflow/core/common_runtime/base_collective_executor.cc +++ b/tensorflow/core/common_runtime/base_collective_executor.cc @@ -271,13 +271,12 @@ void BaseCollectiveExecutor::ExecuteAsync(OpKernelContext* ctx, DCHECK_EQ(nullptr, col_impl); return; } - CollectiveContext* col_ctx = - new CollectiveContext(this, dev_mgr_, ctx, CtxParams(ctx), col_params, - exec_key, step_id_, input, output); + auto col_ctx = std::make_shared( + this, dev_mgr_, ctx, CtxParams(ctx), col_params, exec_key, step_id_, + input, output); status = col_impl->InitializeCollectiveContext(col_ctx); if (!status.ok()) { done_safe(status); - delete col_ctx; delete col_impl; return; } @@ -293,7 +292,6 @@ void BaseCollectiveExecutor::ExecuteAsync(OpKernelContext* ctx, profiler::TraceMeLevel::kInfo); col_impl->Run([col_impl, col_ctx, done_safe](const Status& s) { done_safe(s); - delete col_ctx; delete col_impl; }); }); diff --git a/tensorflow/core/common_runtime/hierarchical_tree_broadcaster.cc b/tensorflow/core/common_runtime/hierarchical_tree_broadcaster.cc index d4cb79e3c05..decf8b2ccb5 100644 --- a/tensorflow/core/common_runtime/hierarchical_tree_broadcaster.cc +++ b/tensorflow/core/common_runtime/hierarchical_tree_broadcaster.cc @@ -186,7 +186,7 @@ Status HierarchicalTreeBroadcaster::InitializeCollectiveParams( } Status HierarchicalTreeBroadcaster::InitializeCollectiveContext( - CollectiveContext* col_ctx) { + std::shared_ptr col_ctx) { CHECK(col_ctx->dev_mgr); col_ctx_ = col_ctx; col_params_ = &col_ctx->col_params; diff --git a/tensorflow/core/common_runtime/hierarchical_tree_broadcaster.h b/tensorflow/core/common_runtime/hierarchical_tree_broadcaster.h index 38954e7dfaf..40ee3f82d48 100644 --- a/tensorflow/core/common_runtime/hierarchical_tree_broadcaster.h +++ b/tensorflow/core/common_runtime/hierarchical_tree_broadcaster.h @@ -39,7 +39,8 @@ class HierarchicalTreeBroadcaster : public CollectiveImplementationInterface { // Initializes members of CollectiveContext not yet initialized, i.e. device // and device_locality. Also saves the CollectiveContext in this object. - Status InitializeCollectiveContext(CollectiveContext* col_ctx) override; + Status InitializeCollectiveContext( + std::shared_ptr col_ctx) override; // No-op for hierarchical tree broadcaster. Status InitializeCollectiveGroupRuntimeDetails( @@ -80,7 +81,7 @@ class HierarchicalTreeBroadcaster : public CollectiveImplementationInterface { // Executes the hierarchical broadcast defined by this op. void RunTree(); - CollectiveContext* col_ctx_; // Not owned + std::shared_ptr col_ctx_; const CollectiveParams* col_params_; // Not owned StatusCallback done_; Status status_; diff --git a/tensorflow/core/common_runtime/hierarchical_tree_broadcaster_test.cc b/tensorflow/core/common_runtime/hierarchical_tree_broadcaster_test.cc index 2006947258c..333a70adc27 100644 --- a/tensorflow/core/common_runtime/hierarchical_tree_broadcaster_test.cc +++ b/tensorflow/core/common_runtime/hierarchical_tree_broadcaster_test.cc @@ -670,10 +670,10 @@ class HierarchicalTreeBroadcasterTest : public ::testing::Test { string exec_key = strings::StrCat(col_params_.instance.instance_key, ":0:0"); HierarchicalTreeBroadcaster broadcaster; - CollectiveContext col_ctx(parent_->col_exec_, parent_->dev_mgr_.get(), - &ctx, &op_params, col_params_, exec_key, - kStepId, input_tensor_ptr, output_tensor_ptr); - TF_CHECK_OK(broadcaster.InitializeCollectiveContext(&col_ctx)); + auto col_ctx = std::make_shared( + parent_->col_exec_, parent_->dev_mgr_.get(), &ctx, &op_params, + col_params_, exec_key, kStepId, input_tensor_ptr, output_tensor_ptr); + TF_CHECK_OK(broadcaster.InitializeCollectiveContext(col_ctx)); // Run the broadcast. broadcaster.Run([this](Status s) { status_ = s; }); diff --git a/tensorflow/core/common_runtime/ring_alg.cc b/tensorflow/core/common_runtime/ring_alg.cc index 3a1a84a376d..753f6ba982e 100644 --- a/tensorflow/core/common_runtime/ring_alg.cc +++ b/tensorflow/core/common_runtime/ring_alg.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/ring_alg.h" #include + #include #include #include @@ -240,7 +241,8 @@ Status RingAlg::InitializeCollectiveParams(CollectiveParams* col_params) { return Status::OK(); } -Status RingAlg::InitializeCollectiveContext(CollectiveContext* col_ctx) { +Status RingAlg::InitializeCollectiveContext( + std::shared_ptr col_ctx) { DCHECK(col_ctx->dev_mgr); col_ctx_ = col_ctx; col_params_ = &col_ctx->col_params; diff --git a/tensorflow/core/common_runtime/ring_alg.h b/tensorflow/core/common_runtime/ring_alg.h index c2da62c86d7..3ccb07f6d5c 100644 --- a/tensorflow/core/common_runtime/ring_alg.h +++ b/tensorflow/core/common_runtime/ring_alg.h @@ -39,7 +39,8 @@ class RingAlg : public CollectiveImplementationInterface { // Initializes members of CollectiveContext not yet initialized, i.e. device // and device_locality. Also saves the CollectiveContext in this object. - Status InitializeCollectiveContext(CollectiveContext* col_ctx) override; + Status InitializeCollectiveContext( + std::shared_ptr col_ctx) override; // No-op for ring alg. Status InitializeCollectiveGroupRuntimeDetails( @@ -108,7 +109,7 @@ class RingAlg : public CollectiveImplementationInterface { const CollectiveType type_; const string name_; - CollectiveContext* col_ctx_; // Not owned + std::shared_ptr col_ctx_; const CollectiveParams* col_params_; // Not owned StatusCallback done_; int group_size_; diff --git a/tensorflow/core/common_runtime/ring_gatherer_test.cc b/tensorflow/core/common_runtime/ring_gatherer_test.cc index 3af4890e3d3..124965b6c6a 100644 --- a/tensorflow/core/common_runtime/ring_gatherer_test.cc +++ b/tensorflow/core/common_runtime/ring_gatherer_test.cc @@ -477,10 +477,10 @@ class RingGathererTest : public ::testing::Test { string exec_key = strings::StrCat(col_params_.instance.instance_key, ":0:0"); RingGatherer gatherer; - CollectiveContext col_ctx(parent_->col_exec_, parent_->dev_mgr_.get(), - &ctx, &op_params, col_params_, exec_key, - kStepId, &input_tensor_, output_tensor_ptr); - TF_CHECK_OK(gatherer.InitializeCollectiveContext(&col_ctx)); + auto col_ctx = std::make_shared( + parent_->col_exec_, parent_->dev_mgr_.get(), &ctx, &op_params, + col_params_, exec_key, kStepId, &input_tensor_, output_tensor_ptr); + TF_CHECK_OK(gatherer.InitializeCollectiveContext(col_ctx)); // Run the all-gather. gatherer.Run([this](Status s) { status_ = s; }); diff --git a/tensorflow/core/common_runtime/ring_reducer_test.cc b/tensorflow/core/common_runtime/ring_reducer_test.cc index 318d6e91afb..678153c3603 100644 --- a/tensorflow/core/common_runtime/ring_reducer_test.cc +++ b/tensorflow/core/common_runtime/ring_reducer_test.cc @@ -507,10 +507,10 @@ class RingReducerTest : public ::testing::Test { string exec_key = strings::StrCat(col_params_.instance.instance_key, ":0:0"); RingReducer reducer; - CollectiveContext col_ctx(parent_->col_exec_, parent_->dev_mgr_.get(), - &ctx, &op_params, col_params_, exec_key, - kStepId, &tensor_, &tensor_); - TF_CHECK_OK(reducer.InitializeCollectiveContext(&col_ctx)); + auto col_ctx = std::make_shared( + parent_->col_exec_, parent_->dev_mgr_.get(), &ctx, &op_params, + col_params_, exec_key, kStepId, &tensor_, &tensor_); + TF_CHECK_OK(reducer.InitializeCollectiveContext(col_ctx)); // Run the all-reduce. reducer.Run([this](Status s) { status_ = s; }); diff --git a/tensorflow/core/distributed_runtime/collective_param_resolver_distributed_test.cc b/tensorflow/core/distributed_runtime/collective_param_resolver_distributed_test.cc index 13e61e55ee0..130a48e80d2 100644 --- a/tensorflow/core/distributed_runtime/collective_param_resolver_distributed_test.cc +++ b/tensorflow/core/distributed_runtime/collective_param_resolver_distributed_test.cc @@ -327,7 +327,8 @@ class MockNcclReducer : public CollectiveImplementationInterface { Status InitializeCollectiveParams(CollectiveParams*) override { return Status::OK(); } - Status InitializeCollectiveContext(CollectiveContext*) override { + Status InitializeCollectiveContext( + std::shared_ptr) override { return Status::OK(); } Status InitializeCollectiveGroupRuntimeDetails( diff --git a/tensorflow/core/framework/collective.h b/tensorflow/core/framework/collective.h index 3726fde9809..24507b901a7 100644 --- a/tensorflow/core/framework/collective.h +++ b/tensorflow/core/framework/collective.h @@ -399,7 +399,8 @@ class CollectiveImplementationInterface { // Called from CollectiveExecutor right before calling Run(). The // CollectiveContext passed in must outlive the CollectiveImplementation // object. - virtual Status InitializeCollectiveContext(CollectiveContext* col_ctx) = 0; + virtual Status InitializeCollectiveContext( + std::shared_ptr col_ctx) = 0; // Performs collective implementation specific group initialization. The // intention is to do group-specific initialization of runtime details for the diff --git a/tensorflow/core/kernels/collective_nccl.cc b/tensorflow/core/kernels/collective_nccl.cc index 013e06cc374..74ad24abfaa 100644 --- a/tensorflow/core/kernels/collective_nccl.cc +++ b/tensorflow/core/kernels/collective_nccl.cc @@ -58,7 +58,8 @@ Status NcclBase::InitializeCollectiveParams(CollectiveParams* col_params) { return Status::OK(); } -Status NcclBase::InitializeCollectiveContext(CollectiveContext* col_ctx) { +Status NcclBase::InitializeCollectiveContext( + std::shared_ptr col_ctx) { col_ctx_ = col_ctx; col_params_ = &col_ctx->col_params; return collective_util::InitializeDeviceAndLocality( diff --git a/tensorflow/core/kernels/collective_nccl.h b/tensorflow/core/kernels/collective_nccl.h index 5ef0d61aee5..b076272b6a5 100644 --- a/tensorflow/core/kernels/collective_nccl.h +++ b/tensorflow/core/kernels/collective_nccl.h @@ -29,7 +29,8 @@ class NcclBase : public CollectiveImplementationInterface { Status InitializeCollectiveParams(CollectiveParams* col_params) override; // Initializes the device objects and device localities. - Status InitializeCollectiveContext(CollectiveContext* col_ctx) override; + Status InitializeCollectiveContext( + std::shared_ptr col_ctx) override; // Initialize nccl communicator key. Status InitializeCollectiveGroupRuntimeDetails( @@ -40,7 +41,7 @@ class NcclBase : public CollectiveImplementationInterface { const CollectiveType type_; const string name_; - CollectiveContext* col_ctx_; // Not owned + std::shared_ptr col_ctx_; const CollectiveParams* col_params_; // Not owned }; diff --git a/tensorflow/core/kernels/collective_nccl_test.cc b/tensorflow/core/kernels/collective_nccl_test.cc index 8f3a958149b..ce4aca1cdcc 100644 --- a/tensorflow/core/kernels/collective_nccl_test.cc +++ b/tensorflow/core/kernels/collective_nccl_test.cc @@ -314,11 +314,11 @@ class NcclTestBase : public ::testing::Test { string exec_key = strings::StrCat(col_params_.instance.instance_key, ":0:0"); NcclReducer reducer; - CollectiveContext col_ctx(parent_->col_exec_, parent_->dev_mgr_.get(), - /*OpKernelContext=*/&ctx, &op_params, - col_params_, exec_key, kStepId, - /*input=*/&input_, /*output=*/&input_); - TF_CHECK_OK(reducer.InitializeCollectiveContext(&col_ctx)); + auto col_ctx = std::make_shared( + parent_->col_exec_, parent_->dev_mgr_.get(), + /*OpKernelContext=*/&ctx, &op_params, col_params_, exec_key, kStepId, + /*input=*/&input_, /*output=*/&input_); + TF_CHECK_OK(reducer.InitializeCollectiveContext(col_ctx)); Notification note; reducer.Run([this, ¬e](Status s) { status_ = s; @@ -344,12 +344,12 @@ class NcclTestBase : public ::testing::Test { string exec_key = strings::StrCat(col_params_.instance.instance_key, ":0:0"); NcclBroadcaster broadcaster; - CollectiveContext col_ctx( + auto col_ctx = std::make_shared( parent_->col_exec_, parent_->dev_mgr_.get(), /*OpKernelContext=*/&ctx, &op_params, col_params_, exec_key, kStepId, /*input=*/col_params_.is_source ? &input_ : nullptr, /*output=*/&input_); - TF_CHECK_OK(broadcaster.InitializeCollectiveContext(&col_ctx)); + TF_CHECK_OK(broadcaster.InitializeCollectiveContext(col_ctx)); Notification note; broadcaster.Run([this, ¬e](Status s) { status_ = s; @@ -383,12 +383,12 @@ class NcclTestBase : public ::testing::Test { string exec_key = strings::StrCat(col_params_.instance.instance_key, ":0:0"); NcclGatherer gatherer; - CollectiveContext col_ctx(parent_->col_exec_, parent_->dev_mgr_.get(), - /*OpKernelContext=*/&ctx, &op_params, - col_params_, exec_key, kStepId, - /*input=*/&input_, - /*output=*/&output_); - TF_CHECK_OK(gatherer.InitializeCollectiveContext(&col_ctx)); + auto col_ctx = std::make_shared( + parent_->col_exec_, parent_->dev_mgr_.get(), + /*OpKernelContext=*/&ctx, &op_params, col_params_, exec_key, kStepId, + /*input=*/&input_, + /*output=*/&output_); + TF_CHECK_OK(gatherer.InitializeCollectiveContext(col_ctx)); Notification note; gatherer.Run([this, ¬e](Status s) { status_ = s; From cdd1357a0b912a69a1754b3a266d3a2ce0a0d29e Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Wed, 15 Jul 2020 13:29:34 -0700 Subject: [PATCH 0531/2522] Changed GPUOperation interface. AddToQueue changed to non virtual method. New virtual methods BindArguments and GetGridSize. Using default Tuning method for majority of ops. PiperOrigin-RevId: 321427369 Change-Id: I7186945a1f9e744c9ea6ec0c8d29612622845c77 --- .../delegates/gpu/cl/inference_context.cc | 8 +++++ .../lite/delegates/gpu/cl/inference_context.h | 1 + .../lite/delegates/gpu/cl/kernels/cl_test.cc | 1 + .../delegates/gpu/cl/kernels/concat_xy.cc | 14 +------- .../lite/delegates/gpu/cl/kernels/concat_xy.h | 8 ++--- .../lite/delegates/gpu/cl/kernels/concat_z.cc | 14 +------- .../lite/delegates/gpu/cl/kernels/concat_z.h | 8 ++--- .../lite/delegates/gpu/cl/kernels/conv_3d.cc | 21 ++++-------- .../lite/delegates/gpu/cl/kernels/conv_3d.h | 6 ++-- .../gpu/cl/kernels/conv_buffer_1x1.cc | 19 ++++------- .../gpu/cl/kernels/conv_buffer_1x1.h | 6 ++-- .../gpu/cl/kernels/conv_constants.cc | 14 +------- .../delegates/gpu/cl/kernels/conv_constants.h | 7 ++-- .../delegates/gpu/cl/kernels/conv_powervr.cc | 16 +++------- .../delegates/gpu/cl/kernels/conv_powervr.h | 6 ++-- .../delegates/gpu/cl/kernels/conv_texture.cc | 12 ++----- .../delegates/gpu/cl/kernels/conv_texture.h | 7 ++-- .../gpu/cl/kernels/conv_weights_converter.cc | 14 +------- .../gpu/cl/kernels/conv_weights_converter.h | 8 ++--- .../gpu/cl/kernels/convolution_transposed.cc | 14 ++------ .../gpu/cl/kernels/convolution_transposed.h | 7 ++-- .../cl/kernels/convolution_transposed_3d.cc | 16 +++------- .../cl/kernels/convolution_transposed_3d.h | 7 ++-- .../cl/kernels/convolution_transposed_3x3.cc | 10 +----- .../cl/kernels/convolution_transposed_3x3.h | 9 +++--- .../convolution_transposed_3x3_thin.cc | 15 +-------- .../kernels/convolution_transposed_3x3_thin.h | 8 ++--- .../cl/kernels/convolution_transposed_4x4.cc | 9 +----- .../cl/kernels/convolution_transposed_4x4.h | 9 +++--- .../cl/kernels/convolution_transposed_thin.cc | 14 +------- .../cl/kernels/convolution_transposed_thin.h | 8 ++--- .../gpu/cl/kernels/depthwise_conv.cc | 13 +------- .../delegates/gpu/cl/kernels/depthwise_conv.h | 8 ++--- .../gpu/cl/kernels/depthwise_conv_3x3.cc | 11 ++----- .../gpu/cl/kernels/depthwise_conv_3x3.h | 7 ++-- .../gpu/cl/kernels/fully_connected.cc | 12 +++---- .../gpu/cl/kernels/fully_connected.h | 7 ++-- .../delegates/gpu/cl/kernels/gpu_operation.cc | 17 ++-------- .../delegates/gpu/cl/kernels/gpu_operation.h | 32 +++++++++++++------ .../lite/delegates/gpu/cl/kernels/lstm.cc | 12 +------ .../lite/delegates/gpu/cl/kernels/lstm.h | 8 ++--- .../delegates/gpu/cl/kernels/max_unpooling.cc | 13 +------- .../delegates/gpu/cl/kernels/max_unpooling.h | 7 ++-- .../lite/delegates/gpu/cl/kernels/mean.cc | 8 +---- .../lite/delegates/gpu/cl/kernels/mean.h | 10 +++--- .../lite/delegates/gpu/cl/kernels/padding.cc | 13 +------- .../lite/delegates/gpu/cl/kernels/padding.h | 7 ++-- .../lite/delegates/gpu/cl/kernels/pooling.cc | 13 +------- .../lite/delegates/gpu/cl/kernels/pooling.h | 7 ++-- .../lite/delegates/gpu/cl/kernels/reshape.cc | 13 +------- .../lite/delegates/gpu/cl/kernels/reshape.h | 8 ++--- .../delegates/gpu/cl/kernels/reshapex4.cc | 13 +------- .../lite/delegates/gpu/cl/kernels/reshapex4.h | 8 ++--- .../lite/delegates/gpu/cl/kernels/resize.cc | 26 ++------------- .../lite/delegates/gpu/cl/kernels/resize.h | 16 +++------- .../lite/delegates/gpu/cl/kernels/softmax.cc | 13 +------- .../lite/delegates/gpu/cl/kernels/softmax.h | 8 ++--- .../delegates/gpu/cl/kernels/softmax1x1.cc | 10 +++--- .../delegates/gpu/cl/kernels/softmax1x1.h | 7 ++-- .../gpu/cl/kernels/space_to_depth.cc | 13 +------- .../delegates/gpu/cl/kernels/space_to_depth.h | 7 ++-- .../delegates/gpu/cl/kernels/strided_slice.cc | 13 +------- .../delegates/gpu/cl/kernels/strided_slice.h | 8 ++--- .../delegates/gpu/cl/kernels/transpose.cc | 13 +------- .../lite/delegates/gpu/cl/kernels/transpose.h | 7 ++-- .../lite/delegates/gpu/cl/kernels/winograd.cc | 25 ++++----------- .../lite/delegates/gpu/cl/kernels/winograd.h | 12 +++---- 67 files changed, 186 insertions(+), 555 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.cc b/tensorflow/lite/delegates/gpu/cl/inference_context.cc index 7ff10f16fe1..9e57dd175bc 100644 --- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc +++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc @@ -197,6 +197,7 @@ absl::Status InferenceContext::InitFromGraph( RETURN_IF_ERROR(AllocateMemory(env->device(), creation_context.context)); BindMemoryToOperations(); RETURN_IF_ERROR(Compile(creation_context)); + RETURN_IF_ERROR(UpdateParams()); TuningParameters tuning_parameters; tuning_parameters.queue = env->profiling_queue(); @@ -554,6 +555,13 @@ absl::Status InferenceContext::Tune(const TuningParameters& tuning_parameters) { return absl::OkStatus(); } +absl::Status InferenceContext::UpdateParams() { + for (auto& node : nodes_) { + RETURN_IF_ERROR(node.operations[0]->UpdateParams()); + } + return absl::OkStatus(); +} + absl::Status InferenceContext::AddToQueue(CLCommandQueue* queue) { if (need_manual_release_) { if (prev_enqueue_start_point_.is_valid()) { diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.h b/tensorflow/lite/delegates/gpu/cl/inference_context.h index 75365258e41..3f05026b795 100644 --- a/tensorflow/lite/delegates/gpu/cl/inference_context.h +++ b/tensorflow/lite/delegates/gpu/cl/inference_context.h @@ -114,6 +114,7 @@ class InferenceContext { void BindMemoryToOperations(); absl::Status Compile(const CreationContext& creation_context); absl::Status Tune(const TuningParameters& tuning_parameters); + absl::Status UpdateParams(); // performance hacks bool need_flush_ = false; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/cl_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/cl_test.cc index deb0ebf67c4..f864a731446 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/cl_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/cl_test.cc @@ -56,6 +56,7 @@ absl::Status ExecuteGPUOperation(const std::vector& src_cpu, } RETURN_IF_ERROR(operation->Compile(creation_context)); + RETURN_IF_ERROR(operation->UpdateParams()); RETURN_IF_ERROR(operation->AddToQueue(creation_context.queue)); RETURN_IF_ERROR(creation_context.queue->WaitForCompletion()); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.cc b/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.cc index 0a84d8a95b1..9feb3ace50e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.cc @@ -154,9 +154,7 @@ absl::Status ConcatXY::BindArguments() { RETURN_IF_ERROR( args_.SetObjectRef("src_tensor_" + std::to_string(i), src_[i])); } - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return args_.SetObjectRef("dst_tensor", dst_[0]); } int3 ConcatXY::GetGridSize() const { @@ -166,16 +164,6 @@ int3 ConcatXY::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status ConcatXY::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); -} - -absl::Status ConcatXY::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - ConcatXY CreateConcatXY(const OperationDef& definition, const ConcatAttributes& attr, int tensors_count) { return ConcatXY(definition, attr, tensors_count); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.h b/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.h index a82ffb22709..011d8fb191f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.h @@ -31,10 +31,9 @@ class ConcatXY : public GPUOperation { ConcatXY(const OperationDef& definition, const ConcatAttributes& attr, int tensors_count) : GPUOperation(definition), attr_(attr), tensors_count_(tensors_count) {} - absl::Status AddToQueue(CLCommandQueue* queue) override; - absl::Status Tune(const TuningParameters& params) override; - absl::Status Compile(const CreationContext& creation_context) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; // Move only ConcatXY(ConcatXY&& operation); @@ -43,9 +42,6 @@ class ConcatXY : public GPUOperation { ConcatXY& operator=(const ConcatXY&) = delete; private: - absl::Status BindArguments(); - int3 GetGridSize() const; - ConcatAttributes attr_; int tensors_count_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.cc b/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.cc index 93bc7b4a9dc..7878919d6a0 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.cc @@ -176,9 +176,7 @@ absl::Status ConcatZ::BindArguments() { RETURN_IF_ERROR( args_.SetObjectRef("src_tensor_" + std::to_string(i), src_[i])); } - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return args_.SetObjectRef("dst_tensor", dst_[0]); } int3 ConcatZ::GetGridSize() const { @@ -188,16 +186,6 @@ int3 ConcatZ::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status ConcatZ::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); -} - -absl::Status ConcatZ::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - ConcatZ CreateConcatZ(const OperationDef& definition, const std::vector& channels) { return ConcatZ(definition, channels); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h b/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h index 6595432677c..496b9437706 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h @@ -32,10 +32,9 @@ class ConcatZ : public GPUOperation { public: ConcatZ(const OperationDef& definition, const std::vector& channels) : GPUOperation(definition), channels_(channels) {} - absl::Status AddToQueue(CLCommandQueue* queue) override; - absl::Status Tune(const TuningParameters& params) override; - absl::Status Compile(const CreationContext& creation_context) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; // Move only ConcatZ(ConcatZ&& kernel); @@ -44,9 +43,6 @@ class ConcatZ : public GPUOperation { ConcatZ& operator=(const ConcatZ&) = delete; private: - absl::Status BindArguments(); - int3 GetGridSize() const; - std::vector channels_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc index 1d9eaef38c1..788b56c73cc 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.cc @@ -67,6 +67,7 @@ absl::Status Conv3D::Compile(const CreationContext& creation_context) { definition_.IsBatchSupported() && stride_.x != 1; std::string code = GenerateConv3D(definition_, stride_correction, conv_params_, &args_); + work_group_size_ = conv_params_.work_group_size; std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -105,11 +106,8 @@ absl::Status Conv3D::BindArguments() { RETURN_IF_ERROR(args_.SetInt("kernel_size_z", kernel_size_.z)); RETURN_IF_ERROR(args_.SetInt("dilation_z", dilation_.z)); } - RETURN_IF_ERROR(args_.SetInt( - "grid_size_s", - DivideRoundUp(dst_[0]->Slices(), conv_params_.block_size.w))); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return args_.SetInt("grid_size_s", DivideRoundUp(dst_[0]->Slices(), + conv_params_.block_size.w)); } int3 Conv3D::GetGridSize() const { @@ -142,19 +140,14 @@ absl::Status Conv3D::Tune(const TuningParameters& params) { if (conv_params_.work_group_launch_order[0] == 0 && conv_params_.work_group_launch_order[1] == 1 && conv_params_.work_group_launch_order[2] == 2) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroupConv(params, kernel_, GetGridSize(), - &conv_params_.work_group_size); + RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); + RETURN_IF_ERROR(GetBestWorkGroupConv(params, kernel_, grid_size_, + &conv_params_.work_group_size)); + work_group_size_ = conv_params_.work_group_size; } return absl::OkStatus(); } -absl::Status Conv3D::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), - conv_params_.work_group_size); -} - namespace { std::string GenerateUploadByThreads(const std::string& local_ptr_name, const std::string& global_ptr_name, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h index 501aa0578ed..720f1edb22e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_3d.h @@ -39,9 +39,10 @@ namespace cl { class Conv3D : public GPUOperation { public: Conv3D() = default; - absl::Status AddToQueue(CLCommandQueue* queue) override; absl::Status Tune(const TuningParameters& params) override; absl::Status Compile(const CreationContext& creation_context) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; // Move only Conv3D(Conv3D&& operation); @@ -105,9 +106,6 @@ class Conv3D : public GPUOperation { int dst_slices, bool x_kernel_is_1, bool y_kernel_is_1, bool z_kernel_is_1) const; - absl::Status BindArguments(); - int3 GetGridSize() const; - int3 stride_; int3 padding_; int3 kernel_size_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc index 6fab26ac5da..90071554d0f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.cc @@ -293,6 +293,7 @@ ConvBuffer1x1& ConvBuffer1x1::operator=(ConvBuffer1x1&& operation) { absl::Status ConvBuffer1x1::Compile(const CreationContext& creation_context) { std::string code = GenerateConvBuffer1x1(definition_, conv_params_, &args_); + work_group_size_ = conv_params_.work_group_size; std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -310,9 +311,7 @@ absl::Status ConvBuffer1x1::BindArguments() { RETURN_IF_ERROR(args_.SetObjectRef("weights", src_[1])); } RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return args_.SetObjectRef("dst_tensor", dst_[0]); } int3 ConvBuffer1x1::GetGridSize() const { @@ -328,15 +327,11 @@ int3 ConvBuffer1x1::GetGridSize() const { } absl::Status ConvBuffer1x1::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroupConv(params, kernel_, GetGridSize(), - &conv_params_.work_group_size); -} - -absl::Status ConvBuffer1x1::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), - conv_params_.work_group_size); + RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); + RETURN_IF_ERROR(GetBestWorkGroupConv(params, kernel_, grid_size_, + &conv_params_.work_group_size)); + work_group_size_ = conv_params_.work_group_size; + return absl::OkStatus(); } bool IsConvBuffer1x1Supported(const OperationDef& definition, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h index 1be023fae82..9f549d33e71 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_buffer_1x1.h @@ -47,9 +47,10 @@ class ConvBuffer1x1 : public GPUOperation { ConvBuffer1x1(const ConvBuffer1x1&) = delete; ConvBuffer1x1& operator=(const ConvBuffer1x1&) = delete; - absl::Status AddToQueue(CLCommandQueue* queue) override; absl::Status Tune(const TuningParameters& params) override; absl::Status Compile(const CreationContext& creation_context) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; ConvWeightsDescription GetConvWeightsDescription() const { ConvWeightsDescription desc; @@ -106,9 +107,6 @@ class ConvBuffer1x1 : public GPUOperation { absl::Status UploadBiases(const tflite::gpu::Tensor& biases, CLContext* context); - absl::Status BindArguments(); - int3 GetGridSize() const; - ConvParams conv_params_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc index e2d0e821b5e..83c4300c219 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.cc @@ -251,9 +251,7 @@ absl::Status ConvConstants::BindArguments() { RETURN_IF_ERROR(args_.SetInt("padding_x", padding_.x * src_[0]->Batch())); RETURN_IF_ERROR(args_.SetInt("padding_y", padding_.y)); RETURN_IF_ERROR(args_.SetInt("dilation_x", dilation_.x * src_[0]->Batch())); - RETURN_IF_ERROR(args_.SetInt("dilation_y", dilation_.y)); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return args_.SetInt("dilation_y", dilation_.y); } int3 ConvConstants::GetGridSize() const { @@ -262,16 +260,6 @@ int3 ConvConstants::GetGridSize() const { return int3(grid_x, grid_y, 1); } -absl::Status ConvConstants::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); -} - -absl::Status ConvConstants::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - bool IsConvConstantsSupported(const CLDevice& device, const OperationDef& definition, const Convolution2DAttributes& attr) { diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h index f3f0025bf91..d434af0a337 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_constants.h @@ -35,10 +35,10 @@ namespace cl { class ConvConstants : public GPUOperation { public: ConvConstants() = default; - absl::Status AddToQueue(CLCommandQueue* queue) override; - absl::Status Tune(const TuningParameters& params) override; absl::Status Compile(const CreationContext& creation_context) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; // Move only ConvConstants(ConvConstants&& kernel); @@ -68,9 +68,6 @@ class ConvConstants : public GPUOperation { void RearrangeWeightsData(const tflite::gpu::Tensor& weights, absl::Span dst); - absl::Status BindArguments(); - int3 GetGridSize() const; - int2 kernel_size_; int2 stride_; int2 padding_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc index 551f5f33ff8..76ae58a0c55 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc @@ -184,6 +184,7 @@ absl::Status ConvPowerVR::Compile(const CreationContext& creation_context) { definition_.IsBatchSupported() && stride_padding_.x != 1; std::string code = GenerateConv(*creation_context.device, definition_, stride_correction, conv_params_, &args_); + work_group_size_ = conv_params_.work_group_size; std::string element_wise_code; RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); @@ -226,8 +227,6 @@ absl::Status ConvPowerVR::BindArguments() { conv_params_.block_size.x); RETURN_IF_ERROR(args_.SetInt("task_size_x", grid_x)); } - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); return absl::OkStatus(); } @@ -272,19 +271,14 @@ absl::Status ConvPowerVR::Tune(const TuningParameters& params) { if (conv_params_.work_group_launch_order[0] == 0 && conv_params_.work_group_launch_order[1] == 1 && conv_params_.work_group_launch_order[2] == 2) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroupConv(params, kernel_, GetGridSize(), - &conv_params_.work_group_size); + RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); + RETURN_IF_ERROR(GetBestWorkGroupConv(params, kernel_, grid_size_, + &conv_params_.work_group_size)); + work_group_size_ = conv_params_.work_group_size; } return absl::OkStatus(); } -absl::Status ConvPowerVR::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), - conv_params_.work_group_size); -} - std::string GenerateConv(const CLDevice& device, const OperationDef& op_def, bool stride_correction, const ConvPowerVR::ConvParams& conv_params, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h index 07bcf2c1f86..8ef8bc6fbde 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.h @@ -41,9 +41,10 @@ namespace cl { class ConvPowerVR : public GPUOperation { public: ConvPowerVR() = default; - absl::Status AddToQueue(CLCommandQueue* queue) override; absl::Status Tune(const TuningParameters& params) override; absl::Status Compile(const CreationContext& creation_context) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; ConvWeightsDescription GetConvWeightsDescription() const { ConvWeightsDescription desc; @@ -205,9 +206,6 @@ class ConvPowerVR : public GPUOperation { bool different_weights_for_height, const BHWC* dst_shape = nullptr) const; - absl::Status BindArguments(); - int3 GetGridSize() const; - int4 stride_padding_; int4 kernel_dilation_; ConvParams conv_params_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc index d81c7e83b83..a31674de2fd 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc @@ -420,8 +420,6 @@ absl::Status ConvTexture::BindArguments() { RETURN_IF_ERROR(args_.SetInt("stride_y", stride_.y)); RETURN_IF_ERROR(args_.SetInt("padding_x", padding_.x * src_[0]->Batch())); RETURN_IF_ERROR(args_.SetInt("padding_y", padding_.y)); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); return absl::OkStatus(); } @@ -434,14 +432,8 @@ int3 ConvTexture::GetGridSize() const { } absl::Status ConvTexture::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroupConv(params, kernel_, GetGridSize(), - &work_group_size_); -} - -absl::Status ConvTexture::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); + RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); + return GetBestWorkGroupConv(params, kernel_, grid_size_, &work_group_size_); } absl::Status CreateConvTexture(const CreationContext& creation_context, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h index c21d5b1deaa..80a328e4eef 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.h @@ -42,10 +42,10 @@ namespace cl { class ConvTexture : public GPUOperation { public: ConvTexture() = default; - absl::Status AddToQueue(CLCommandQueue* queue) override; absl::Status Tune(const TuningParameters& params) override; - absl::Status Compile(const CreationContext& creation_context) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; // Move only ConvTexture(ConvTexture&& operation); @@ -89,9 +89,6 @@ class ConvTexture : public GPUOperation { absl::Span dst_0, absl::Span dst_1, absl::Span dst_2, absl::Span dst_3); - absl::Status BindArguments(); - int3 GetGridSize() const; - int2 kernel_size_; int2 stride_; int2 padding_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc index 063b20edd8a..ce973115266 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.cc @@ -127,9 +127,7 @@ absl::Status ConverterToConvWeights::BindArguments() { RETURN_IF_ERROR(args_.SetFloat("mask_x", mask.x)); RETURN_IF_ERROR(args_.SetFloat("mask_y", mask.y)); RETURN_IF_ERROR(args_.SetFloat("mask_z", mask.z)); - RETURN_IF_ERROR(args_.SetFloat("mask_w", mask.w)); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return args_.SetFloat("mask_w", mask.w); } int3 ConverterToConvWeights::GetGridSize() const { @@ -140,16 +138,6 @@ int3 ConverterToConvWeights::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status ConverterToConvWeights::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); -} - -absl::Status ConverterToConvWeights::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - ConverterToConvWeights CreateConverterToConvWeights( const OperationDef& definition, const ConvWeightsDescription& conv_weights_desc) { diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h index 3bf17fac939..d8d84b8f5b5 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_weights_converter.h @@ -32,10 +32,9 @@ class ConverterToConvWeights : public GPUOperation { ConverterToConvWeights(const OperationDef& definition, const ConvWeightsDescription& conv_weights_desc) : GPUOperation(definition), conv_weights_desc_(conv_weights_desc) {} - absl::Status AddToQueue(CLCommandQueue* queue) override; - absl::Status Tune(const TuningParameters& params) override; - absl::Status Compile(const CreationContext& creation_context) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; // Move only ConverterToConvWeights(ConverterToConvWeights&& operation); @@ -44,9 +43,6 @@ class ConverterToConvWeights : public GPUOperation { ConverterToConvWeights& operator=(const ConverterToConvWeights&) = delete; private: - absl::Status BindArguments(); - int3 GetGridSize() const; - ConvWeightsDescription conv_weights_desc_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc index 85456fc5140..dc146c48895 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.cc @@ -362,9 +362,7 @@ absl::Status ConvolutionTransposed::BindArguments() { RETURN_IF_ERROR(args_.SetInt("padding_x", padding_.x)); RETURN_IF_ERROR(args_.SetInt("padding_y", padding_.y)); RETURN_IF_ERROR(args_.SetInt("kernel_size_x", kernel_size_.x)); - RETURN_IF_ERROR(args_.SetInt("kernel_size_y", kernel_size_.y)); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return args_.SetInt("kernel_size_y", kernel_size_.y); } int3 ConvolutionTransposed::GetGridSize() const { @@ -377,14 +375,8 @@ int3 ConvolutionTransposed::GetGridSize() const { } absl::Status ConvolutionTransposed::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroupConv(params, kernel_, GetGridSize(), - &work_group_size_); -} - -absl::Status ConvolutionTransposed::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); + RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); + return GetBestWorkGroupConv(params, kernel_, grid_size_, &work_group_size_); } absl::Status CreateConvolutionTransposed( diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h index cf70799f5d4..fc53884bcc7 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed.h @@ -38,10 +38,10 @@ namespace cl { class ConvolutionTransposed : public GPUOperation { public: ConvolutionTransposed() = default; - absl::Status AddToQueue(CLCommandQueue* queue) override; absl::Status Tune(const TuningParameters& params) override; - absl::Status Compile(const CreationContext& creation_context) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; // Move only ConvolutionTransposed(ConvolutionTransposed&& operation); @@ -65,9 +65,6 @@ class ConvolutionTransposed : public GPUOperation { void RearrangeWeightsData(const tflite::gpu::Tensor& weights, absl::Span dst); - absl::Status BindArguments(); - int3 GetGridSize() const; - bool weights_are_buffer_; int2 kernel_size_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc index 53f24cb7a29..409f7e3716b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.cc @@ -399,10 +399,8 @@ absl::Status ConvolutionTransposed3D::BindArguments() { RETURN_IF_ERROR(args_.SetInt("kernel_size_x", kernel_size_.x)); RETURN_IF_ERROR(args_.SetInt("kernel_size_y", kernel_size_.y)); RETURN_IF_ERROR(args_.SetInt("kernel_size_z", kernel_size_.z)); - RETURN_IF_ERROR(args_.SetInt( - "grid_size_s", DivideRoundUp(dst_[0]->Slices(), block_size_.w))); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return args_.SetInt("grid_size_s", + DivideRoundUp(dst_[0]->Slices(), block_size_.w)); } int3 ConvolutionTransposed3D::GetGridSize() const { @@ -417,14 +415,8 @@ int3 ConvolutionTransposed3D::GetGridSize() const { } absl::Status ConvolutionTransposed3D::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroupConv(params, kernel_, GetGridSize(), - &work_group_size_); -} - -absl::Status ConvolutionTransposed3D::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); + RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); + return GetBestWorkGroupConv(params, kernel_, grid_size_, &work_group_size_); } absl::Status CreateConvolutionTransposed3D( diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h index 4b76e617e08..09f7e700967 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3d.h @@ -38,10 +38,10 @@ namespace cl { class ConvolutionTransposed3D : public GPUOperation { public: ConvolutionTransposed3D() = default; - absl::Status AddToQueue(CLCommandQueue* queue) override; absl::Status Tune(const TuningParameters& params) override; - absl::Status Compile(const CreationContext& creation_context) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; // Move only ConvolutionTransposed3D(ConvolutionTransposed3D&& operation); @@ -65,9 +65,6 @@ class ConvolutionTransposed3D : public GPUOperation { void RearrangeWeightsData(const tflite::gpu::Tensor& weights, absl::Span dst); - absl::Status BindArguments(); - int3 GetGridSize() const; - bool weights_are_buffer_; int3 kernel_size_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc index 0da4ca67a4f..9446f0f7e3d 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.cc @@ -333,9 +333,7 @@ absl::Status ConvolutionTransposed3x3::BindArguments() { const int padding_y = padding_.y >= 1 ? (padding_.y - 1) / 2 : (padding_.y - 2) / 2; RETURN_IF_ERROR(args_.SetInt("padding_x", padding_x * src_[0]->Batch())); - RETURN_IF_ERROR(args_.SetInt("padding_y", padding_y)); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return args_.SetInt("padding_y", padding_y); } int3 ConvolutionTransposed3x3::GetGridSize() const { @@ -349,12 +347,6 @@ int3 ConvolutionTransposed3x3::GetGridSize() const { return int3(wg[work_group_launch_order_[0]] * work_group_size_.x, wg[work_group_launch_order_[1]] * work_group_size_.y, wg[work_group_launch_order_[2]] * work_group_size_.z); - return int3(grid_x, grid_y, grid_z); -} - -absl::Status ConvolutionTransposed3x3::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); } bool IsConvolutionTransposed3x3Supported( diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h index 3792acd174e..0dc42a7563d 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3.h @@ -37,8 +37,12 @@ namespace cl { class ConvolutionTransposed3x3 : public GPUOperation { public: ConvolutionTransposed3x3() = default; - absl::Status AddToQueue(CLCommandQueue* queue) override; + absl::Status Tune(const TuningParameters& params) override { + return absl::OkStatus(); + } absl::Status Compile(const CreationContext& creation_context) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; // Move only ConvolutionTransposed3x3(ConvolutionTransposed3x3&& operation); @@ -68,9 +72,6 @@ class ConvolutionTransposed3x3 : public GPUOperation { void RearrangeWeightsData(const tflite::gpu::Tensor& weights, absl::Span dst); - absl::Status BindArguments(); - int3 GetGridSize() const; - int2 padding_; int3 work_group_launch_order_; WeightsUploadType weights_upload_type_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc index 934c7198fa8..56a21cb7239 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.cc @@ -207,9 +207,7 @@ absl::Status ConvolutionTransposed3x3Thin::Compile( absl::Status ConvolutionTransposed3x3Thin::BindArguments() { RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return args_.SetObjectRef("dst_tensor", dst_[0]); } int3 ConvolutionTransposed3x3Thin::GetGridSize() const { @@ -219,17 +217,6 @@ int3 ConvolutionTransposed3x3Thin::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status ConvolutionTransposed3x3Thin::Tune( - const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); -} - -absl::Status ConvolutionTransposed3x3Thin::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - bool IsConvolutionTransposed3x3ThinSupported( const CLDevice& device, const ConvolutionTransposedAttributes& attr) { return attr.weights.shape.o <= 8 && attr.weights.shape.w == 3 && diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h index 2e272835818..282f1b3b476 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_3x3_thin.h @@ -37,10 +37,9 @@ namespace cl { class ConvolutionTransposed3x3Thin : public GPUOperation { public: ConvolutionTransposed3x3Thin() = default; - absl::Status AddToQueue(CLCommandQueue* queue) override; - absl::Status Tune(const TuningParameters& params) override; - absl::Status Compile(const CreationContext& creation_context) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; // Move only ConvolutionTransposed3x3Thin(ConvolutionTransposed3x3Thin&& operation); @@ -67,9 +66,6 @@ class ConvolutionTransposed3x3Thin : public GPUOperation { void RearrangeWeightsData(const tflite::gpu::Tensor& weights, absl::Span dst); - absl::Status BindArguments(); - int3 GetGridSize() const; - int src_channels_; int dst_channels_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc index 6c81457cd8c..d7660fca097 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.cc @@ -318,9 +318,7 @@ absl::Status ConvolutionTransposed4x4::Compile( absl::Status ConvolutionTransposed4x4::BindArguments() { RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - RETURN_IF_ERROR(args_.SetInt("filter_offset", 4 * 16 * src_[0]->Slices())); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return args_.SetInt("filter_offset", 4 * 16 * src_[0]->Slices()); } int3 ConvolutionTransposed4x4::GetGridSize() const { @@ -330,11 +328,6 @@ int3 ConvolutionTransposed4x4::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status ConvolutionTransposed4x4::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - bool IsConvolutionTransposed4x4Supported( const CLDevice& device, const OperationDef& definition, const ConvolutionTransposedAttributes& attr) { diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h index 1cf3b836d24..982937486dc 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_4x4.h @@ -37,8 +37,12 @@ namespace cl { class ConvolutionTransposed4x4 : public GPUOperation { public: ConvolutionTransposed4x4() = default; - absl::Status AddToQueue(CLCommandQueue* queue) override; + absl::Status Tune(const TuningParameters& params) override { + return absl::OkStatus(); + } absl::Status Compile(const CreationContext& creation_context) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; // Move only ConvolutionTransposed4x4(ConvolutionTransposed4x4&& operation); @@ -68,9 +72,6 @@ class ConvolutionTransposed4x4 : public GPUOperation { void RearrangeWeightsData(const tflite::gpu::Tensor& weights, absl::Span dst); - absl::Status BindArguments(); - int3 GetGridSize() const; - WeightsUploadType weights_upload_type_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc index 90b1a4c1da5..5b31c98cb02 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.cc @@ -183,9 +183,7 @@ absl::Status ConvolutionTransposedThin::Compile( absl::Status ConvolutionTransposedThin::BindArguments() { RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return args_.SetObjectRef("dst_tensor", dst_[0]); } int3 ConvolutionTransposedThin::GetGridSize() const { @@ -195,16 +193,6 @@ int3 ConvolutionTransposedThin::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status ConvolutionTransposedThin::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); -} - -absl::Status ConvolutionTransposedThin::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - bool IsConvolutionTransposedThinSupported( const CLDevice& device, const ConvolutionTransposedAttributes& attr) { return attr.weights.shape.o <= 4 && attr.weights.shape.w == attr.stride.w && diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h index bb06202739d..90a1b026369 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/convolution_transposed_thin.h @@ -37,10 +37,9 @@ namespace cl { class ConvolutionTransposedThin : public GPUOperation { public: ConvolutionTransposedThin() = default; - absl::Status AddToQueue(CLCommandQueue* queue) override; - absl::Status Tune(const TuningParameters& params) override; - absl::Status Compile(const CreationContext& creation_context) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; // Move only ConvolutionTransposedThin(ConvolutionTransposedThin&& operation); @@ -65,9 +64,6 @@ class ConvolutionTransposedThin : public GPUOperation { void RearrangeWeightsData(const tflite::gpu::Tensor& weights, absl::Span dst); - absl::Status BindArguments(); - int3 GetGridSize() const; - int2 kernel_size_; int src_channels_; int dst_channels_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc index 82658d62f10..7d6bee6877b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc @@ -306,8 +306,7 @@ absl::Status DepthwiseConvolution::BindArguments() { if (!IsSpecializedCase(channel_multiplier_)) { RETURN_IF_ERROR(args_.SetInt("ch_multiplier", channel_multiplier_)); } - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return absl::OkStatus(); } int3 DepthwiseConvolution::GetGridSize() const { @@ -317,16 +316,6 @@ int3 DepthwiseConvolution::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status DepthwiseConvolution::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); -} - -absl::Status DepthwiseConvolution::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - absl::Status CreateDepthwiseConvolution( const CreationContext& creation_context, const OperationDef& definition, const DepthwiseConvolution2DAttributes& attr, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h index 6433e8d0a3b..51cf68aaf9e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h @@ -38,10 +38,9 @@ namespace cl { class DepthwiseConvolution : public GPUOperation { public: DepthwiseConvolution() = default; - absl::Status AddToQueue(CLCommandQueue* queue) override; - absl::Status Tune(const TuningParameters& params) override; - absl::Status Compile(const CreationContext& creation_context) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; // Move only DepthwiseConvolution(DepthwiseConvolution&& operation); @@ -81,9 +80,6 @@ class DepthwiseConvolution : public GPUOperation { void RearrangeWeightsData(const tflite::gpu::Tensor& weights, absl::Span dst); - absl::Status BindArguments(); - int3 GetGridSize() const; - bool weights_are_buffer_; int4 kernel_size_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc index 0494038e5b9..97afea4fcd4 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc @@ -303,9 +303,7 @@ absl::Status DepthwiseConv3x3::Compile( absl::Status DepthwiseConv3x3::BindArguments() { RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return args_.SetObjectRef("dst_tensor", dst_[0]); } int3 DepthwiseConv3x3::GetGridSize() const { @@ -319,15 +317,10 @@ absl::Status DepthwiseConv3x3::Tune(const TuningParameters& params) { if (local_mem_uploads_) { return absl::OkStatus(); } - RETURN_IF_ERROR(BindArguments()); + RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); } -absl::Status DepthwiseConv3x3::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - bool IsDepthwiseConv3x3Supported(const DepthwiseConvolution2DAttributes& attr) { return attr.weights.shape.o == 1 && attr.dilations.w == 1 && attr.dilations.h == 1 && attr.weights.shape.w == 3 && diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h index fd1dca4ca98..ce5b2d82981 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.h @@ -38,10 +38,10 @@ namespace cl { class DepthwiseConv3x3 : public GPUOperation { public: DepthwiseConv3x3() = default; - absl::Status AddToQueue(CLCommandQueue* queue) override; absl::Status Tune(const TuningParameters& params) override; - absl::Status Compile(const CreationContext& creation_context) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; // Move only DepthwiseConv3x3(DepthwiseConv3x3&& operation); @@ -66,9 +66,6 @@ class DepthwiseConv3x3 : public GPUOperation { const tflite::gpu::Tensor& weights, const tflite::gpu::Tensor& biases, absl::Span dst); - absl::Status BindArguments(); - int3 GetGridSize() const; - bool weights_are_buffer_; bool local_mem_uploads_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc index 1685d4f505f..944af0a2280 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.cc @@ -129,13 +129,13 @@ absl::Status FullyConnected::Compile(const CreationContext& creation_context) { return absl::OkStatus(); } -absl::Status FullyConnected::AddToQueue(CLCommandQueue* queue) { +absl::Status FullyConnected::BindArguments() { RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); - RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); - return queue->DispatchImplicit(kernel_, {dst_[0]->Slices(), 1, 1}, - work_group_size_); + return args_.SetObjectRef("dst_tensor", dst_[0]); +} + +int3 FullyConnected::GetGridSize() const { + return int3(dst_[0]->Slices(), 1, 1); } absl::Status CreateFullyConnected(const CreationContext& creation_context, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h index 2adff4fb685..138db001332 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/fully_connected.h @@ -37,8 +37,11 @@ namespace cl { class FullyConnected : public GPUOperation { public: FullyConnected() = default; - absl::Status AddToQueue(CLCommandQueue* queue) override; - + absl::Status Tune(const TuningParameters& params) override { + return absl::OkStatus(); + } + absl::Status BindArguments() override; + int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; // Move only diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc index 2310ee5fb98..d0d1f88c9e6 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc @@ -125,6 +125,7 @@ GPUOperation::GPUOperation(GPUOperation&& operation) args_(std::move(operation.args_)), kernel_(std::move(operation.kernel_)), work_group_size_(operation.work_group_size_), + grid_size_(operation.grid_size_), linked_operations_(std::move(operation.linked_operations_)) {} GPUOperation& GPUOperation::operator=(GPUOperation&& operation) { @@ -135,6 +136,7 @@ GPUOperation& GPUOperation::operator=(GPUOperation&& operation) { args_ = std::move(operation.args_); kernel_ = std::move(operation.kernel_); std::swap(work_group_size_, operation.work_group_size_); + std::swap(grid_size_, operation.grid_size_); linked_operations_ = std::move(operation.linked_operations_); } return *this; @@ -162,10 +164,7 @@ ElementwiseOperation& ElementwiseOperation::operator=( absl::Status ElementwiseOperation::BindArguments() { RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - RETURN_IF_ERROR(SetArgs("", &args_)); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); - return absl::OkStatus(); + return SetArgs("", &args_); } int3 ElementwiseOperation::GetGridSize() const { @@ -192,16 +191,6 @@ absl::Status ElementwiseOperation::Compile( *creation_context.device, &kernel_); } -absl::Status ElementwiseOperation::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - -absl::Status ElementwiseOperation::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); -} - absl::Status MergeOperations( const std::vector& linked_ops, Arguments* merged_args, std::string* merged_code) { diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h index 34d6d8c2141..88d0ff0b46f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/lite/delegates/gpu/cl/cl_context.h" #include "tensorflow/lite/delegates/gpu/cl/cl_device.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h" +#include "tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h" #include "tensorflow/lite/delegates/gpu/cl/precision.h" #include "tensorflow/lite/delegates/gpu/cl/program_cache.h" #include "tensorflow/lite/delegates/gpu/cl/tensor.h" @@ -59,6 +60,9 @@ struct OperationDef { class ElementwiseOperation; +absl::Status SetArguments(const std::vector& linked_ops, + Arguments* args); + // GPUOperation represents some implementation of neural network operation on // GPU. GPUOperation can contain ElementwiseOperation operations, in this case, // ElementwiseOperation still hold necessary data and should be alive. @@ -86,11 +90,22 @@ class GPUOperation { void SetSrc(Tensor* ptr, int index = 0); void SetDst(Tensor* ptr, int index = 0); - virtual absl::Status AddToQueue(CLCommandQueue* queue) { + // should be called after changes of inputs/outputs. + absl::Status UpdateParams() { + RETURN_IF_ERROR(BindArguments()); + RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); + grid_size_ = GetGridSize(); return absl::OkStatus(); } + + absl::Status AddToQueue(CLCommandQueue* queue) { + RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); + return queue->DispatchImplicit(kernel_, grid_size_, work_group_size_); + } + virtual absl::Status Tune(const TuningParameters& params) { - return absl::OkStatus(); + RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); + return GetBestWorkGroup(params, kernel_, grid_size_, &work_group_size_); } virtual absl::Status Compile(const CreationContext& creation_context) { @@ -100,6 +115,9 @@ class GPUOperation { const OperationDef& GetDefinition() const { return definition_; } protected: + virtual absl::Status BindArguments() = 0; + virtual int3 GetGridSize() const = 0; + // Defines operation calculation precision and format of src/dst tensors. OperationDef definition_; std::vector src_; @@ -107,6 +125,7 @@ class GPUOperation { Arguments args_; CLKernel kernel_; int3 work_group_size_ = int3(8, 4, 1); + int3 grid_size_ = int3(0, 0, 0); std::vector linked_operations_; }; @@ -124,10 +143,10 @@ class ElementwiseOperation : public GPUOperation { : GPUOperation(definition) {} virtual ~ElementwiseOperation() {} - absl::Status AddToQueue(CLCommandQueue* queue) override; - absl::Status Tune(const TuningParameters& params) override; absl::Status Compile(const CreationContext& creation_context) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; // Move only ElementwiseOperation(ElementwiseOperation&& operation); @@ -149,17 +168,12 @@ class ElementwiseOperation : public GPUOperation { protected: bool check_src_channels_size_ = false; std::string code_; - absl::Status BindArguments(); - int3 GetGridSize() const; }; absl::Status MergeOperations( const std::vector& linked_ops, Arguments* merged_args, std::string* merged_code); -absl::Status SetArguments(const std::vector& linked_ops, - Arguments* args); - } // namespace cl } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc index 66d6b3d51cb..ab61fcb0b62 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.cc @@ -125,7 +125,7 @@ absl::Status LSTM::BindArguments() { RETURN_IF_ERROR(args_.SetObjectRef("prev_state", src_[1])); RETURN_IF_ERROR(args_.SetObjectRef("new_state", dst_[0])); RETURN_IF_ERROR(args_.SetObjectRef("activation", dst_[1])); - return args_.Bind(kernel_.kernel()); + return absl::OkStatus(); } int3 LSTM::GetGridSize() const { @@ -135,16 +135,6 @@ int3 LSTM::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status LSTM::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); -} - -absl::Status LSTM::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - LSTM CreateLSTM(const OperationDef& definition) { return LSTM(definition); } } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h index 5310e19951d..6490f396709 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/lstm.h @@ -28,8 +28,8 @@ namespace cl { class LSTM : public GPUOperation { public: explicit LSTM(const OperationDef& definition); - absl::Status AddToQueue(CLCommandQueue* queue) override; - absl::Status Tune(const TuningParameters& params) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; // Move only @@ -37,10 +37,6 @@ class LSTM : public GPUOperation { LSTM& operator=(LSTM&& kernel); LSTM(const LSTM&) = delete; LSTM& operator=(const LSTM&) = delete; - - private: - absl::Status BindArguments(); - int3 GetGridSize() const; }; LSTM CreateLSTM(const OperationDef& definition); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc index 58ace7229ad..bef4c26b177 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.cc @@ -205,8 +205,7 @@ absl::Status MaxUnpooling::BindArguments() { RETURN_IF_ERROR(args_.SetInt("padding_z", padding_.z)); RETURN_IF_ERROR(args_.SetInt("kernel_size_z", kernel_size_.z)); } - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return absl::OkStatus(); } int3 MaxUnpooling::GetGridSize() const { @@ -216,16 +215,6 @@ int3 MaxUnpooling::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status MaxUnpooling::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); -} - -absl::Status MaxUnpooling::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - MaxUnpooling CreateMaxUnpooling(const OperationDef& definition, const MaxUnpooling2DAttributes& attr) { return MaxUnpooling(definition, attr); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h index dae35e90604..38f47df1527 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h @@ -31,9 +31,9 @@ class MaxUnpooling : public GPUOperation { const MaxUnpooling2DAttributes& attr); MaxUnpooling(const OperationDef& definition, const MaxUnpooling3DAttributes& attr); - absl::Status AddToQueue(CLCommandQueue* queue) override; - absl::Status Tune(const TuningParameters& params) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; // Move only @@ -43,9 +43,6 @@ class MaxUnpooling : public GPUOperation { MaxUnpooling& operator=(const MaxUnpooling&) = delete; private: - absl::Status BindArguments(); - int3 GetGridSize() const; - int4 stride_; int4 padding_; int4 kernel_size_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc b/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc index 334181b98d5..e3fa023633f 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean.cc @@ -129,8 +129,7 @@ absl::Status Mean::BindArguments() { const double size_1 = total_size / size_0; RETURN_IF_ERROR(args_.SetFloat("inv_multiplier_1", 1.0 / size_1)); RETURN_IF_ERROR(args_.SetFloat("inv_multiplier_2", 1.0 / size_0)); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return absl::OkStatus(); } int3 Mean::GetGridSize() const { @@ -140,11 +139,6 @@ int3 Mean::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status Mean::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - Mean CreateMean(const OperationDef& definition) { return Mean(definition); } } // namespace cl diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/mean.h b/tensorflow/lite/delegates/gpu/cl/kernels/mean.h index 028e0013ed4..0552f167d92 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/mean.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/mean.h @@ -30,8 +30,12 @@ class Mean : public GPUOperation { public: Mean() = default; explicit Mean(const OperationDef& definition) : GPUOperation(definition) {} - absl::Status AddToQueue(CLCommandQueue* queue) override; + absl::Status Tune(const TuningParameters& params) override { + return absl::OkStatus(); + } + absl::Status BindArguments() override; + int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; // Move only @@ -39,10 +43,6 @@ class Mean : public GPUOperation { Mean& operator=(Mean&& operation); Mean(const Mean&) = delete; Mean& operator=(const Mean&) = delete; - - private: - absl::Status BindArguments(); - int3 GetGridSize() const; }; Mean CreateMean(const OperationDef& definition); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc b/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc index 8576475462d..ebd2809b97c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/padding.cc @@ -175,8 +175,7 @@ absl::Status Padding::BindArguments() { RETURN_IF_ERROR(args_.SetInt("prepended_y", attributes_.prepended.h)); RETURN_IF_ERROR(args_.SetInt("prepended_z", attributes_.prepended.c)); RETURN_IF_ERROR(args_.SetInt("prepended_w", attributes_.prepended.b)); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return absl::OkStatus(); } int3 Padding::GetGridSize() const { @@ -186,16 +185,6 @@ int3 Padding::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status Padding::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); -} - -absl::Status Padding::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - Padding CreatePadding(const OperationDef& definition, const PadAttributes& attr) { return Padding(definition, attr); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/padding.h b/tensorflow/lite/delegates/gpu/cl/kernels/padding.h index d87a3a87be3..12a83a4f360 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/padding.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/padding.h @@ -28,9 +28,9 @@ namespace cl { class Padding : public GPUOperation { public: Padding(const OperationDef& definition, const PadAttributes& attr); - absl::Status AddToQueue(CLCommandQueue* queue) override; - absl::Status Tune(const TuningParameters& params) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; // Move only @@ -40,9 +40,6 @@ class Padding : public GPUOperation { Padding& operator=(const Padding&) = delete; private: - absl::Status BindArguments(); - int3 GetGridSize() const; - PadAttributes attributes_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc index 966c655b975..6ba49e335e0 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.cc @@ -399,8 +399,7 @@ absl::Status Pooling::BindArguments() { if (output_indices_) { RETURN_IF_ERROR(args_.SetObjectRef("dst_indices", dst_[1])); } - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return absl::OkStatus(); } int3 Pooling::GetGridSize() const { @@ -410,16 +409,6 @@ int3 Pooling::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status Pooling::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); -} - -absl::Status Pooling::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - Pooling CreatePooling(const OperationDef& definition, const Pooling2DAttributes& attr) { return Pooling(definition, attr); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h index 67d290eccb3..c0199d6de71 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/pooling.h @@ -31,9 +31,9 @@ class Pooling : public GPUOperation { public: Pooling(const OperationDef& definition, const Pooling2DAttributes& attr); Pooling(const OperationDef& definition, const Pooling3DAttributes& attr); - absl::Status AddToQueue(CLCommandQueue* queue) override; - absl::Status Tune(const TuningParameters& params) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; // Move only @@ -43,9 +43,6 @@ class Pooling : public GPUOperation { Pooling& operator=(const Pooling&) = delete; private: - absl::Status BindArguments(); - int3 GetGridSize() const; - int4 stride_; int4 padding_; int4 kernel_size_; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc b/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc index 4cc5b1278f8..a2e1092b387 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshape.cc @@ -114,8 +114,7 @@ absl::Status Reshape::Compile(const CreationContext& creation_context) { absl::Status Reshape::BindArguments() { RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return absl::OkStatus(); } int3 Reshape::GetGridSize() const { @@ -125,16 +124,6 @@ int3 Reshape::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status Reshape::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); -} - -absl::Status Reshape::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - Reshape CreateReshape(const OperationDef& definition) { return Reshape(definition); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshape.h b/tensorflow/lite/delegates/gpu/cl/kernels/reshape.h index 8d95bbc86bc..571a225d02d 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/reshape.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshape.h @@ -28,9 +28,9 @@ namespace cl { class Reshape : public GPUOperation { public: explicit Reshape(const OperationDef& definition) : GPUOperation(definition) {} - absl::Status AddToQueue(CLCommandQueue* queue) override; - absl::Status Tune(const TuningParameters& params) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; // Move only @@ -38,10 +38,6 @@ class Reshape : public GPUOperation { Reshape& operator=(Reshape&& operation); Reshape(const Reshape&) = delete; Reshape& operator=(const Reshape&) = delete; - - private: - absl::Status BindArguments(); - int3 GetGridSize() const; }; Reshape CreateReshape(const OperationDef& definition); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc index e4c47b70a2c..1036dd8ef4e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.cc @@ -99,8 +99,7 @@ absl::Status Reshapex4::Compile(const CreationContext& creation_context) { absl::Status Reshapex4::BindArguments() { RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return absl::OkStatus(); } int3 Reshapex4::GetGridSize() const { @@ -110,16 +109,6 @@ int3 Reshapex4::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status Reshapex4::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); -} - -absl::Status Reshapex4::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - Reshapex4 CreateReshapex4(const OperationDef& definition) { return Reshapex4(definition); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h index f7c98ab63f6..040b5b82e70 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/reshapex4.h @@ -30,9 +30,9 @@ class Reshapex4 : public GPUOperation { public: explicit Reshapex4(const OperationDef& definition) : GPUOperation(definition) {} - absl::Status AddToQueue(CLCommandQueue* queue) override; - absl::Status Tune(const TuningParameters& params) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; // Move only @@ -40,10 +40,6 @@ class Reshapex4 : public GPUOperation { Reshapex4& operator=(Reshapex4&& operation); Reshapex4(const Reshapex4&) = delete; Reshapex4& operator=(const Reshapex4&) = delete; - - private: - absl::Status BindArguments(); - int3 GetGridSize() const; }; // More optimized, but require src_channels % 4 == 0 and dst_channels % 4 == 0 diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/resize.cc b/tensorflow/lite/delegates/gpu/cl/kernels/resize.cc index a47fff96d85..33bb3b8f4cb 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/resize.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/resize.cc @@ -227,8 +227,7 @@ absl::Status Resize::BindArguments() { RETURN_IF_ERROR(args_.SetFloat( "scale_factor_y", CalculateResizeScale(src_[0]->Height(), dst_[0]->Height(), attr_))); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return absl::OkStatus(); } int3 Resize::GetGridSize() const { @@ -238,16 +237,6 @@ int3 Resize::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status Resize::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - -absl::Status Resize::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); -} - Resize CreateResize(const OperationDef& definition, const Resize2DAttributes& attr) { return Resize(definition, attr); @@ -292,8 +281,7 @@ absl::Status Resize3D::BindArguments() { RETURN_IF_ERROR(args_.SetFloat( "scale_factor_z", CalculateResizeScale(src_[0]->Depth(), dst_[0]->Depth(), attr_))); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return absl::OkStatus(); } int3 Resize3D::GetGridSize() const { @@ -303,16 +291,6 @@ int3 Resize3D::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status Resize3D::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - -absl::Status Resize3D::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); -} - Resize3D CreateResize3D(const OperationDef& definition, const Resize3DAttributes& attr) { return Resize3D(definition, attr); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/resize.h b/tensorflow/lite/delegates/gpu/cl/kernels/resize.h index 10fb414214b..899c85b7758 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/resize.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/resize.h @@ -27,9 +27,8 @@ namespace cl { class Resize : public GPUOperation { public: - absl::Status AddToQueue(CLCommandQueue* queue) override; - absl::Status Tune(const TuningParameters& params) override; - + absl::Status BindArguments() override; + int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; // Move only @@ -45,9 +44,6 @@ class Resize : public GPUOperation { Resize(const OperationDef& definition, const Resize2DAttributes& attr) : GPUOperation(definition), attr_(attr) {} - absl::Status BindArguments(); - int3 GetGridSize() const; - Resize2DAttributes attr_; }; @@ -56,9 +52,8 @@ Resize CreateResize(const OperationDef& definition, class Resize3D : public GPUOperation { public: - absl::Status AddToQueue(CLCommandQueue* queue) override; - absl::Status Tune(const TuningParameters& params) override; - + absl::Status BindArguments() override; + int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; // Move only @@ -74,9 +69,6 @@ class Resize3D : public GPUOperation { Resize3D(const OperationDef& definition, const Resize3DAttributes& attr) : GPUOperation(definition), attr_(attr) {} - absl::Status BindArguments(); - int3 GetGridSize() const; - Resize3DAttributes attr_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc index ea8671bac68..edc720dbdb8 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc @@ -91,8 +91,7 @@ absl::Status Softmax::Compile(const CreationContext& creation_context) { absl::Status Softmax::BindArguments() { RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return absl::OkStatus(); } int3 Softmax::GetGridSize() const { @@ -102,16 +101,6 @@ int3 Softmax::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status Softmax::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); -} - -absl::Status Softmax::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - Softmax CreateSoftmax(const OperationDef& definition) { return Softmax(definition); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h index 5f974ef7e6d..eac06caa767 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h @@ -30,9 +30,9 @@ class Softmax : public GPUOperation { public: Softmax() = default; explicit Softmax(const OperationDef& definition) : GPUOperation(definition) {} - absl::Status AddToQueue(CLCommandQueue* queue) override; - absl::Status Tune(const TuningParameters& params) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; // Move only @@ -42,10 +42,6 @@ class Softmax : public GPUOperation { Softmax& operator=(const Softmax&) = delete; friend Softmax CreateSoftmax(); - - private: - absl::Status BindArguments(); - int3 GetGridSize() const; }; Softmax CreateSoftmax(const OperationDef& definition); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc index 28ebd8a2b13..33dd2857262 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.cc @@ -112,6 +112,7 @@ Softmax1x1& Softmax1x1::operator=(Softmax1x1&& kernel) { absl::Status Softmax1x1::Compile(const CreationContext& creation_context) { std::string code = GetSoftmaxKernelCode(definition_, &args_); std::string element_wise_code; + work_group_size_ = int3(32, 1, 1); RETURN_IF_ERROR( MergeOperations(linked_operations_, &args_, &element_wise_code)); RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(), @@ -122,7 +123,7 @@ absl::Status Softmax1x1::Compile(const CreationContext& creation_context) { *creation_context.device, &kernel_); } -absl::Status Softmax1x1::AddToQueue(CLCommandQueue* queue) { +absl::Status Softmax1x1::BindArguments() { RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); float4 mask = GetMaskForLastPlane(src_[0]->Channels()); @@ -132,12 +133,11 @@ absl::Status Softmax1x1::AddToQueue(CLCommandQueue* queue) { RETURN_IF_ERROR(args_.SetFloat("mask_w", mask.w)); RETURN_IF_ERROR( args_.SetInt("slices_x32", DivideRoundUp(src_[0]->Slices(), 32))); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); - return queue->DispatchImplicit(kernel_, {32, dst_[0]->Batch(), 1}, - {32, 1, 1}); + return absl::OkStatus(); } +int3 Softmax1x1::GetGridSize() const { return int3(32, dst_[0]->Batch(), 1); } + Softmax1x1 CreateSoftmax1x1(const OperationDef& definition) { return Softmax1x1(definition); } diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h index d5ae037a695..f749a7b3db6 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax1x1.h @@ -30,8 +30,11 @@ class Softmax1x1 : public GPUOperation { Softmax1x1() = default; explicit Softmax1x1(const OperationDef& definition) : GPUOperation(definition) {} - absl::Status AddToQueue(CLCommandQueue* queue) override; - + absl::Status Tune(const TuningParameters& params) override { + return absl::OkStatus(); + } + absl::Status BindArguments() override; + int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; // Move only diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.cc b/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.cc index 6b5cc9f484e..37c3e092995 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.cc @@ -106,8 +106,7 @@ absl::Status SpaceToDepth::BindArguments() { RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); RETURN_IF_ERROR(args_.SetInt("block_size", attr_.block_size)); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return absl::OkStatus(); } int3 SpaceToDepth::GetGridSize() const { @@ -117,16 +116,6 @@ int3 SpaceToDepth::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status SpaceToDepth::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); -} - -absl::Status SpaceToDepth::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - SpaceToDepth CreateSpaceToDepth(const OperationDef& op_def, const SpaceToDepthAttributes& attr) { return SpaceToDepth(op_def, attr); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.h b/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.h index 62689200643..99a0ca0c55c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/space_to_depth.h @@ -30,8 +30,8 @@ class SpaceToDepth : public GPUOperation { public: SpaceToDepth(const OperationDef& op_def, const SpaceToDepthAttributes& attr) : GPUOperation(op_def), attr_(attr) {} - absl::Status AddToQueue(CLCommandQueue* queue) override; - absl::Status Tune(const TuningParameters& params) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; SpaceToDepth(SpaceToDepth&& operation); @@ -40,9 +40,6 @@ class SpaceToDepth : public GPUOperation { SpaceToDepth& operator=(const SpaceToDepth&) = delete; private: - absl::Status BindArguments(); - int3 GetGridSize() const; - SpaceToDepthAttributes attr_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc index 904e7fc08ce..443c4a403c1 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.cc @@ -185,8 +185,7 @@ absl::Status StridedSlice::BindArguments() { RETURN_IF_ERROR(args_.SetInt("stride_y", attributes_.strides.h)); RETURN_IF_ERROR(args_.SetInt("stride_z", attributes_.strides.c)); RETURN_IF_ERROR(args_.SetInt("stride_b", attributes_.strides.b)); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return absl::OkStatus(); } int3 StridedSlice::GetGridSize() const { @@ -196,16 +195,6 @@ int3 StridedSlice::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status StridedSlice::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); -} - -absl::Status StridedSlice::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - StridedSlice CreateStridedSlice(const OperationDef& definition, const SliceAttributes& attr) { return StridedSlice(definition, attr); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.h b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.h index 3d88bd9e96b..40005db7b21 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/strided_slice.h @@ -27,9 +27,8 @@ namespace cl { class StridedSlice : public GPUOperation { public: StridedSlice(const OperationDef& definition, const SliceAttributes& attr); - absl::Status AddToQueue(CLCommandQueue* queue) override; - absl::Status Tune(const TuningParameters& params) override; - + absl::Status BindArguments() override; + int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; // Move only @@ -39,9 +38,6 @@ class StridedSlice : public GPUOperation { StridedSlice& operator=(const StridedSlice&) = delete; private: - absl::Status BindArguments(); - int3 GetGridSize() const; - SliceAttributes attributes_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc index bd5df56f6ad..eb62e1e35f7 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc @@ -130,8 +130,7 @@ absl::Status Transpose::Compile(const CreationContext& creation_context) { absl::Status Transpose::BindArguments() { RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", src_[0])); RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return absl::OkStatus(); } int3 Transpose::GetGridSize() const { @@ -141,16 +140,6 @@ int3 Transpose::GetGridSize() const { return int3(grid_x, grid_y, grid_z); } -absl::Status Transpose::Tune(const TuningParameters& params) { - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), &work_group_size_); -} - -absl::Status Transpose::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - Transpose CreateTranspose(const OperationDef& definition, const TransposeAttributes& attr) { return Transpose(definition, attr); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h index 2c32fc439d9..36976d57ea6 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h @@ -28,8 +28,8 @@ class Transpose : public GPUOperation { public: Transpose(const OperationDef& definition, const TransposeAttributes& attr) : GPUOperation(definition), attr_(attr) {} - absl::Status AddToQueue(CLCommandQueue* queue) override; - absl::Status Tune(const TuningParameters& params) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; absl::Status Compile(const CreationContext& creation_context) override; // Move only @@ -39,9 +39,6 @@ class Transpose : public GPUOperation { Transpose& operator=(const Transpose&) = delete; private: - absl::Status BindArguments(); - int3 GetGridSize() const; - TransposeAttributes attr_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc index a0f923861fa..d64b61a6a8e 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc @@ -403,8 +403,7 @@ absl::Status Winograd4x4To36::BindArguments() { RETURN_IF_ERROR(args_.SetInt("padding_y", -padding_.prepended.h)); RETURN_IF_ERROR(args_.SetInt("tiles_total", tiles_total)); RETURN_IF_ERROR(args_.SetInt("tiles_x", tiles_x)); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return absl::OkStatus(); } int3 Winograd4x4To36::GetGridSize() const { @@ -417,9 +416,8 @@ int3 Winograd4x4To36::GetGridSize() const { absl::Status Winograd4x4To36::Tune(const TuningParameters& params) { switch (params.tuning_type) { case TuningType::EXHAUSTIVE: - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), - &work_group_size_); + RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); + return GetBestWorkGroup(params, kernel_, grid_size_, &work_group_size_); case TuningType::FAST: default: work_group_size_ = SelectBestWorkGroup(); @@ -427,11 +425,6 @@ absl::Status Winograd4x4To36::Tune(const TuningParameters& params) { } } -absl::Status Winograd4x4To36::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} - absl::Status CreateWinograd4x4To36(const CreationContext& creation_context, const OperationDef& definition, const Padding2D& padding, @@ -506,8 +499,7 @@ absl::Status Winograd36To4x4::BindArguments() { RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", dst_[0])); const int tiles_x = DivideRoundUp(dst_[0]->Width(), 4); RETURN_IF_ERROR(args_.SetInt("tiles_x", tiles_x)); - RETURN_IF_ERROR(SetArguments(linked_operations_, &args_)); - return args_.Bind(kernel_.kernel()); + return absl::OkStatus(); } int3 Winograd36To4x4::GetGridSize() const { @@ -522,9 +514,8 @@ int3 Winograd36To4x4::GetGridSize() const { absl::Status Winograd36To4x4::Tune(const TuningParameters& params) { switch (params.tuning_type) { case TuningType::EXHAUSTIVE: - RETURN_IF_ERROR(BindArguments()); - return GetBestWorkGroup(params, kernel_, GetGridSize(), - &work_group_size_); + RETURN_IF_ERROR(args_.Bind(kernel_.kernel())); + return GetBestWorkGroup(params, kernel_, grid_size_, &work_group_size_); case TuningType::FAST: default: work_group_size_ = SelectBestWorkGroup(); @@ -532,10 +523,6 @@ absl::Status Winograd36To4x4::Tune(const TuningParameters& params) { } } -absl::Status Winograd36To4x4::AddToQueue(CLCommandQueue* queue) { - RETURN_IF_ERROR(BindArguments()); - return queue->DispatchImplicit(kernel_, GetGridSize(), work_group_size_); -} absl::Status CreateWinograd36To4x4( const CreationContext& creation_context, const OperationDef& definition, diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h index 3f57342201b..7fe0fc071ca 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h @@ -38,7 +38,8 @@ class Winograd4x4To36 : public GPUOperation { : GPUOperation(definition), padding_(padding) { work_group_size_ = int3(128, 1, 1); } - absl::Status AddToQueue(CLCommandQueue* queue) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; absl::Status Tune(const TuningParameters& params) override; absl::Status Compile(const CreationContext& creation_context) override; @@ -58,9 +59,6 @@ class Winograd4x4To36 : public GPUOperation { // Must be called after kernel compilation int3 SelectBestWorkGroup(); - absl::Status BindArguments(); - int3 GetGridSize() const; - Padding2D padding_; }; @@ -76,7 +74,8 @@ class Winograd36To4x4 : public GPUOperation { : GPUOperation(definition) { work_group_size_ = int3(128, 1, 1); } - absl::Status AddToQueue(CLCommandQueue* queue) override; + absl::Status BindArguments() override; + int3 GetGridSize() const override; absl::Status Tune(const TuningParameters& params) override; absl::Status Compile(const CreationContext& creation_context) override; @@ -96,9 +95,6 @@ class Winograd36To4x4 : public GPUOperation { // Must be called after kernel compilation int3 SelectBestWorkGroup(); - - absl::Status BindArguments(); - int3 GetGridSize() const; }; absl::Status CreateWinograd36To4x4( From 31119371a8fa5879428ef7661d4ce410b91e6d30 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Wed, 15 Jul 2020 13:30:02 -0700 Subject: [PATCH 0532/2522] Test nightly_release config. PiperOrigin-RevId: 321427460 Change-Id: I1c4f7cad71942289ab2be4e60e86ca27fab6f3c4 --- .../ubuntu_16/gpu_py36_full/nightly_release.sh | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh index 87b2e52d88a..c6fb6d469b1 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh @@ -25,23 +25,11 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=1 -export TF_CUDA_VERSION=10 -export TF_CUDNN_VERSION=7 -export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 -export TF_NEED_TENSORRT=1 -export TENSORRT_INSTALL_PATH=/usr/local/tensorrt -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.6) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 \ - --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ - tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_gpu_linux tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --nightly_flag ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --gpu --nightly_flag From c504fee3471e2f4e2bf257b4894db90b4395a5b5 Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Wed, 15 Jul 2020 13:41:19 -0700 Subject: [PATCH 0533/2522] Change approximate default to False --- tensorflow/python/keras/activations.py | 10 +++++----- tensorflow/python/ops/nn_ops.py | 12 ++++++------ .../api/golden/v2/tensorflow.keras.activations.pbtxt | 2 +- tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tensorflow/python/keras/activations.py b/tensorflow/python/keras/activations.py index 5dba2be1536..fe0bf5977f9 100644 --- a/tensorflow/python/keras/activations.py +++ b/tensorflow/python/keras/activations.py @@ -304,7 +304,7 @@ def relu(x, alpha=0., max_value=None, threshold=0): @keras_export('keras.activations.gelu', v1=[]) @dispatch.add_dispatch_support -def gelu(x, approximate=True): +def gelu(x, approximate=False): """Applies the Gaussian error linear unit (GELU) activation function. Gaussian error linear unit (GELU) computes @@ -317,12 +317,12 @@ def gelu(x, approximate=True): >>> x = tf.constant([-3.0, -1.0, 0.0, 1.0, 3.0], dtype=tf.float32) >>> y = tf.keras.activations.gelu(x) >>> y.numpy() - array([-0.00363752, -0.158808 , 0. , 0.841192 , 2.9963627 ], - dtype=float32) - >>> y = tf.keras.activations.gelu(x, approximate=False) - >>> y.numpy() array([-0.00404951, -0.15865529, 0. , 0.8413447 , 2.9959507 ], dtype=float32) + >>> y = tf.keras.activations.gelu(x, approximate=True) + >>> y.numpy() + array([-0.00363752, -0.15880796, 0. , 0.841192 , 2.9963627 ], + dtype=float32) Arguments: x: Input tensor. diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 158a10e2382..4dc7d5be03f 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -3494,7 +3494,7 @@ def leaky_relu(features, alpha=0.2, name=None): @tf_export("nn.gelu", v1=[]) @dispatch.add_dispatch_support -def gelu(features, approximate=True, name=None): +def gelu(features, approximate=False, name=None): """Compute the Gaussian Error Linear Unit (GELU) activation function. Gaussian error linear unit (GELU) computes @@ -3507,16 +3507,16 @@ def gelu(features, approximate=True, name=None): >>> x = tf.constant([-3.0, -1.0, 0.0, 1.0, 3.0], dtype=tf.float32) >>> y = tf.nn.gelu(x) >>> y.numpy() - array([-0.00363752, -0.158808 , 0. , 0.841192 , 2.9963627 ], - dtype=float32) - >>> y = tf.nn.gelu(x, approximate=False) - >>> y.numpy() array([-0.00404951, -0.15865529, 0. , 0.8413447 , 2.9959507 ], dtype=float32) + >>> y = tf.nn.gelu(x, approximate=True) + >>> y.numpy() + array([-0.00363752, -0.15880796, 0. , 0.841192 , 2.9963627 ], + dtype=float32) Args: features: A `Tensor` representing preactivation values. - approximate: An optional `bool`. Defaults to `True`. + approximate: An optional `bool`. Defaults to `False`. Whether to enable approximation. name: A name for the operation (optional). diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt index 93daa37930c..b7b98a9d0ce 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.activations.pbtxt @@ -14,7 +14,7 @@ tf_module { } member_method { name: "gelu" - argspec: "args=[\'x\', \'approximate\'], varargs=None, keywords=None, defaults=[\'True\'], " + argspec: "args=[\'x\', \'approximate\'], varargs=None, keywords=None, defaults=[\'False\'], " } member_method { name: "get" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt index d0c48e2ae85..741ab7fe017 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt @@ -170,7 +170,7 @@ tf_module { } member_method { name: "gelu" - argspec: "args=[\'features\', \'approximate\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " + argspec: "args=[\'features\', \'approximate\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " } member_method { name: "in_top_k" From 5a93bb92d4c4472d5d821d5aa2e5b1a8bc4b6b52 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Wed, 15 Jul 2020 20:42:32 +0000 Subject: [PATCH 0534/2522] BUILD file and summary.cc style --- tensorflow/c/kernels/BUILD | 17 +++-------------- tensorflow/c/kernels/ops/summary.cc | 5 ++--- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/tensorflow/c/kernels/BUILD b/tensorflow/c/kernels/BUILD index e8354a8941d..c5c652ab5d7 100644 --- a/tensorflow/c/kernels/BUILD +++ b/tensorflow/c/kernels/BUILD @@ -5,11 +5,6 @@ load( "tf_kernel_library", ) -load( - "//tensorflow/core/platform:rules_cc.bzl", - "cc_library" -) - package( default_visibility = ["//visibility:public"], licenses = ["notice"], # Apache 2.0 @@ -86,15 +81,9 @@ tf_cc_test( cc_library( name = "tensor_shape_utils", - srcs = [ - "tensor_shape_utils.cc", - ], - hdrs = [ - "tensor_shape_utils.h", - ], - deps = [ - "//tensorflow/c:tf_tensor", - ], + srcs = ["tensor_shape_utils.cc",], + hdrs = ["tensor_shape_utils.h",], + deps = [ "//tensorflow/c:tf_tensor",], visibility = ["//visibility:public"], ) diff --git a/tensorflow/c/kernels/ops/summary.cc b/tensorflow/c/kernels/ops/summary.cc index 9cacda36adf..98b8b743fa1 100644 --- a/tensorflow/c/kernels/ops/summary.cc +++ b/tensorflow/c/kernels/ops/summary.cc @@ -24,9 +24,8 @@ static void scalar_summary_shape_inference_fn(TF_ShapeInferenceContext* ctx, // Make shape handle a scalar value (empty shape) TF_ShapeInferenceContextSetOutput(ctx, 0, result, status); if (TF_GetCode(status) != TF_OK) { - std::ostringstream err; - err << "Error in setting output shape inference"; - TF_SetStatus(status, TF_INVALID_ARGUMENT, err.str().c_str()); + TF_SetStatus(status, TF_INVALID_ARGUMENT, + "Error in setting output shape inference"); } TF_DeleteShapeHandle(result); } From 39add97b3a36be8bfc77fb0b65f31bd3c8b3411f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Jul 2020 13:36:50 -0700 Subject: [PATCH 0535/2522] Add a test to verify that the TF Lite C API headers can build, link, and run successfully when compiled as C code. PiperOrigin-RevId: 321428801 Change-Id: I0cc6fc82006e27a22c73450b9a0cb86d5c50a3f6 --- tensorflow/lite/c/BUILD | 15 +++ tensorflow/lite/c/c_test.c | 143 ++++++++++++++++++++++++++++ tensorflow/lite/tools/make/Makefile | 1 + 3 files changed, 159 insertions(+) create mode 100644 tensorflow/lite/c/c_test.c diff --git a/tensorflow/lite/c/BUILD b/tensorflow/lite/c/BUILD index 1aa043b7c0c..366b43336b9 100644 --- a/tensorflow/lite/c/BUILD +++ b/tensorflow/lite/c/BUILD @@ -158,3 +158,18 @@ cc_test( "@com_google_googletest//:gtest", ], ) + +cc_test( + name = "c_test", + size = "small", + srcs = ["c_test.c"], + copts = tflite_copts(), + data = [ + "//tensorflow/lite:testdata/add.bin", + ], + deps = [ + ":c_api", + ":c_api_experimental", + ":common", + ], +) diff --git a/tensorflow/lite/c/c_test.c b/tensorflow/lite/c/c_test.c new file mode 100644 index 00000000000..1c550b9a195 --- /dev/null +++ b/tensorflow/lite/c/c_test.c @@ -0,0 +1,143 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/c/c_api.h" +#include "tensorflow/lite/c/c_api_experimental.h" +#include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/c/builtin_op_data.h" + +// This file exists just to verify that the above header files above can build, +// link, and run as "C" code. + +#ifdef __cplusplus +#error "This file should be compiled as C code, not as C++." +#endif + +#include +#include +#include + +static void CheckFailed(const char *expression, const char *filename, + int line_number) { + fprintf(stderr, "ERROR: CHECK failed: %s:%d: %s\n", filename, line_number, + expression); + fflush(stderr); + abort(); +} + +// We use an extra level of macro indirection here to ensure that the +// macro arguments get evaluated, so that in a call to CHECK(foo), +// the call to STRINGIZE(condition) in the definition of the CHECK +// macro results in the string "foo" rather than the string "condition". +#define STRINGIZE(expression) STRINGIZE2(expression) +#define STRINGIZE2(expression) #expression + +// Like assert(), but not dependent on NDEBUG. +#define CHECK(condition) \ + ((condition) ? (void)0 \ + : CheckFailed(STRINGIZE(condition), __FILE__, __LINE__)) +#define ASSERT_EQ(expected, actual) CHECK((expected) == (actual)) +#define ASSERT_NE(expected, actual) CHECK((expected) != (actual)) +#define ASSERT_STREQ(expected, actual) \ + ASSERT_EQ(0, strcmp((expected), (actual))) + +// Test the TfLiteVersion function. +static void TestVersion(void) { + const char *version = TfLiteVersion(); + printf("Version = %s\n", version); + CHECK(version[0] != '\0'); +} + +static void TestSmokeTest(void) { + TfLiteModel* model = + TfLiteModelCreateFromFile("third_party/tensorflow/lite/testdata/add.bin"); + ASSERT_NE(model, NULL); + + TfLiteInterpreterOptions* options = TfLiteInterpreterOptionsCreate(); + ASSERT_NE(options, NULL); + TfLiteInterpreterOptionsSetNumThreads(options, 2); + + TfLiteInterpreter* interpreter = TfLiteInterpreterCreate(model, options); + ASSERT_NE(interpreter, NULL); + + // The options/model can be deleted immediately after interpreter creation. + TfLiteInterpreterOptionsDelete(options); + TfLiteModelDelete(model); + + ASSERT_EQ(TfLiteInterpreterAllocateTensors(interpreter), kTfLiteOk); + ASSERT_EQ(TfLiteInterpreterGetInputTensorCount(interpreter), 1); + ASSERT_EQ(TfLiteInterpreterGetOutputTensorCount(interpreter), 1); + + int input_dims[1] = {2}; + ASSERT_EQ(TfLiteInterpreterResizeInputTensor( + interpreter, 0, input_dims, 1), + kTfLiteOk); + ASSERT_EQ(TfLiteInterpreterAllocateTensors(interpreter), kTfLiteOk); + + TfLiteTensor* input_tensor = TfLiteInterpreterGetInputTensor(interpreter, 0); + ASSERT_NE(input_tensor, NULL); + ASSERT_EQ(TfLiteTensorType(input_tensor), kTfLiteFloat32); + ASSERT_EQ(TfLiteTensorNumDims(input_tensor), 1); + ASSERT_EQ(TfLiteTensorDim(input_tensor, 0), 2); + ASSERT_EQ(TfLiteTensorByteSize(input_tensor), sizeof(float) * 2); + ASSERT_NE(TfLiteTensorData(input_tensor), NULL); + ASSERT_STREQ(TfLiteTensorName(input_tensor), "input"); + + TfLiteQuantizationParams input_params = + TfLiteTensorQuantizationParams(input_tensor); + ASSERT_EQ(input_params.scale, 0.f); + ASSERT_EQ(input_params.zero_point, 0); + + float input[2] = {1.f, 3.f}; + ASSERT_EQ(TfLiteTensorCopyFromBuffer(input_tensor, input, + 2 * sizeof(float)), + kTfLiteOk); + + ASSERT_EQ(TfLiteInterpreterInvoke(interpreter), kTfLiteOk); + + const TfLiteTensor* output_tensor = + TfLiteInterpreterGetOutputTensor(interpreter, 0); + ASSERT_NE(output_tensor, NULL); + ASSERT_EQ(TfLiteTensorType(output_tensor), kTfLiteFloat32); + ASSERT_EQ(TfLiteTensorNumDims(output_tensor), 1); + ASSERT_EQ(TfLiteTensorDim(output_tensor, 0), 2); + ASSERT_EQ(TfLiteTensorByteSize(output_tensor), sizeof(float) * 2); + ASSERT_NE(TfLiteTensorData(output_tensor), NULL); + ASSERT_STREQ(TfLiteTensorName(output_tensor), "output"); + + TfLiteQuantizationParams output_params = + TfLiteTensorQuantizationParams(output_tensor); + ASSERT_EQ(output_params.scale, 0.f); + ASSERT_EQ(output_params.zero_point, 0); + + float output[2]; + ASSERT_EQ(TfLiteTensorCopyToBuffer(output_tensor, output, + 2 * sizeof(float)), + kTfLiteOk); + ASSERT_EQ(output[0], 3.f); + ASSERT_EQ(output[1], 9.f); + + TfLiteInterpreterDelete(interpreter); +} + +static void RunTests(void) { + TestVersion(); + TestSmokeTest(); +} + +int main(void) { + RunTests(); + return 0; +} diff --git a/tensorflow/lite/tools/make/Makefile b/tensorflow/lite/tools/make/Makefile index f8b67fbbe7d..cb1714161c4 100644 --- a/tensorflow/lite/tools/make/Makefile +++ b/tensorflow/lite/tools/make/Makefile @@ -148,6 +148,7 @@ endif CORE_CC_ALL_SRCS := $(sort $(CORE_CC_ALL_SRCS)) CORE_CC_EXCLUDE_SRCS := \ $(wildcard tensorflow/lite/*test.cc) \ +$(wildcard tensorflow/lite/*/*test.c) \ $(wildcard tensorflow/lite/*/*test.cc) \ $(wildcard tensorflow/lite/*/*/benchmark.cc) \ $(wildcard tensorflow/lite/*/*/example*.cc) \ From 0568e58089d44d502944c513bcd0d2cbd726650c Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Wed, 15 Jul 2020 13:40:38 -0700 Subject: [PATCH 0536/2522] Relax assert_next op version checking. We no longer specify the transformation version. Instead of assert_next(["BatchV2"]), we now use assert_next(["Batch"]). This avoids unit test churn when updating op versions. PiperOrigin-RevId: 321429583 Change-Id: I700207510f3911420500bb3a15a05c27942c11bf --- tensorflow/core/kernels/data/experimental/BUILD | 1 + .../data/experimental/assert_next_dataset_op.cc | 16 +++++++++++----- .../kernel_tests/assert_next_test.py | 17 +++++++++++++---- .../kernel_tests/auto_shard_dataset_test.py | 2 +- .../optimization/inject_prefetch_test.py | 6 +++--- .../optimization/map_vectorization_test.py | 8 ++++---- .../optimization/noop_elimination_test.py | 2 +- .../python/data/experimental/ops/testing.py | 8 ++++---- 8 files changed, 38 insertions(+), 22 deletions(-) diff --git a/tensorflow/core/kernels/data/experimental/BUILD b/tensorflow/core/kernels/data/experimental/BUILD index 8457cfa6145..35446bdfbea 100644 --- a/tensorflow/core/kernels/data/experimental/BUILD +++ b/tensorflow/core/kernels/data/experimental/BUILD @@ -32,6 +32,7 @@ tf_kernel_library( deps = [ "//tensorflow/core:experimental_dataset_ops_op_lib", "//tensorflow/core:framework", + "//tensorflow/core:regexp_internal", "//tensorflow/core/kernels/data:name_utils", ], ) diff --git a/tensorflow/core/kernels/data/experimental/assert_next_dataset_op.cc b/tensorflow/core/kernels/data/experimental/assert_next_dataset_op.cc index 0fe35ed4b15..adda54a0cd9 100644 --- a/tensorflow/core/kernels/data/experimental/assert_next_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/assert_next_dataset_op.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/core/framework/partial_tensor_shape.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/kernels/data/name_utils.h" +#include "tensorflow/core/platform/regexp.h" namespace tensorflow { namespace data { @@ -96,11 +97,16 @@ class AssertNextDatasetOp::Dataset : public DatasetBase { } int n = tokens.size(); for (size_t i = 0; i < dataset()->transformations_.size(); ++i) { - if (dataset()->transformations_[i] != tokens[n - 2 - i]) { - return errors::InvalidArgument( - "Asserted ", dataset()->transformations_[i], - " transformation at offset ", i, " but encountered ", - tokens[n - 2 - i], " transformation instead."); + std::string transformation_escaped = + RE2::QuoteMeta(dataset()->transformations_[i]); + std::string version_suffix = "(V\\d+)?"; + std::string expected_re = + absl::StrCat(transformation_escaped, version_suffix); + if (!RE2::FullMatch(tokens[n - 2 - i], expected_re)) { + return errors::InvalidArgument("Asserted transformation matching ", + expected_re, " at offset ", i, + " but encountered ", tokens[n - 2 - i], + " transformation instead."); } } return dataset()->input_->MakeIterator(ctx, this, prefix(), &input_impl_); diff --git a/tensorflow/python/data/experimental/kernel_tests/assert_next_test.py b/tensorflow/python/data/experimental/kernel_tests/assert_next_test.py index 37d0f1586a4..17419c9b299 100644 --- a/tensorflow/python/data/experimental/kernel_tests/assert_next_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/assert_next_test.py @@ -38,6 +38,17 @@ class AssertNextTest(test_base.DatasetTestBase, parameterized.TestCase): dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, expected_output=[0]) + @combinations.generate(test_base.default_test_combinations()) + def testIgnoreVersionSuffix(self): + # The `batch` transformation creates a "BatchV2" dataset, but we should + # still match that with "Batch". + dataset = dataset_ops.Dataset.from_tensors(0).apply( + testing.assert_next(["Map", "Batch"])).map(lambda x: x).batch(1) + options = dataset_ops.Options() + options.experimental_optimization.apply_default_optimizations = False + dataset = dataset.with_options(options) + self.assertDatasetProduces(dataset, expected_output=[[0]]) + @combinations.generate(test_base.default_test_combinations()) def testAssertNextInvalid(self): dataset = dataset_ops.Dataset.from_tensors(0).apply( @@ -47,10 +58,8 @@ class AssertNextTest(test_base.DatasetTestBase, parameterized.TestCase): dataset = dataset.with_options(options) self.assertDatasetProduces( dataset, - expected_error=( - errors.InvalidArgumentError, - "Asserted Whoops transformation at offset 0 but encountered " - "Map transformation instead.")) + expected_error=(errors.InvalidArgumentError, + "Asserted transformation matching Whoops")) @combinations.generate(test_base.default_test_combinations()) def testAssertNextShort(self): diff --git a/tensorflow/python/data/experimental/kernel_tests/auto_shard_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/auto_shard_dataset_test.py index 8271dbada7a..cd3432f28b5 100644 --- a/tensorflow/python/data/experimental/kernel_tests/auto_shard_dataset_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/auto_shard_dataset_test.py @@ -391,7 +391,7 @@ class AutoShardDatasetTest(reader_dataset_ops_test_base.TFRecordDatasetTestBase, # Tests that Rebatch is a passthrough op. dataset = dataset_ops.Dataset.list_files(self.test_filenames, shuffle=False) dataset = dataset.apply( - testing.assert_next(["Shard", "FlatMap", "BatchV2", "Rebatch"])) + testing.assert_next(["Shard", "FlatMap", "Batch", "Rebatch"])) dataset = dataset.flat_map(core_readers.TFRecordDataset) dataset = dataset.batch(5) dataset = distribute._RebatchDataset(dataset, num_replicas=1) diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/inject_prefetch_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/inject_prefetch_test.py index 4e908ead618..a4d5e9db785 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimization/inject_prefetch_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/inject_prefetch_test.py @@ -37,7 +37,7 @@ class InjectPrefetchTest(test_base.DatasetTestBase, parameterized.TestCase): def testParallelMap(self): dataset = dataset_ops.Dataset.range(100) dataset = dataset.apply( - testing.assert_next(["ParallelMapV2", "Prefetch", "FiniteTake"])) + testing.assert_next(["ParallelMap", "Prefetch", "FiniteTake"])) dataset = dataset.map( lambda x: x + 1, num_parallel_calls=dataset_ops.AUTOTUNE) dataset = dataset.take(50) @@ -61,7 +61,7 @@ class InjectPrefetchTest(test_base.DatasetTestBase, parameterized.TestCase): def testParallelInterleave(self): dataset = dataset_ops.Dataset.range(100) dataset = dataset.apply( - testing.assert_next(["ParallelInterleaveV4", "Prefetch", "FiniteTake"])) + testing.assert_next(["ParallelInterleave", "Prefetch", "FiniteTake"])) dataset = dataset.interleave( lambda x: dataset_ops.Dataset.from_tensors(x + 1), num_parallel_calls=dataset_ops.AUTOTUNE) @@ -74,7 +74,7 @@ class InjectPrefetchTest(test_base.DatasetTestBase, parameterized.TestCase): dataset = dataset_ops.Dataset.range(100) dataset = dataset.apply( testing.assert_next([ - "ParallelMapV2", "Prefetch", "ParallelInterleaveV4", "Prefetch", + "ParallelMap", "Prefetch", "ParallelInterleave", "Prefetch", "MapAndBatch", "Prefetch", "FiniteTake" ])) dataset = dataset.map( diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py index 080a03c76dd..3876408697f 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py @@ -223,7 +223,7 @@ class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase): """ map_node_name = "Map" if num_parallel_calls is not None: - map_node_name = "ParallelMapV2" + map_node_name = "ParallelMap" def _make_dataset(node_names): dataset = base_dataset.apply(testing.assert_next(node_names)) @@ -235,11 +235,11 @@ class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase): dataset = dataset.with_options(options) return dataset - unoptimized = _make_dataset([map_node_name, "BatchV2"]) + unoptimized = _make_dataset([map_node_name, "Batch"]) # Note that because of the `ChooseDataset` fork, we can't use `assert_next` # to verify the optimization result. - optimized = _make_dataset(["ChooseFastestBranch"] if expect_optimized else - [map_node_name, "BatchV2"]) + optimized = _make_dataset(["ChooseFastestBranch"] + if expect_optimized else [map_node_name, "Batch"]) optimized = self._enable_map_vectorization(optimized) return unoptimized, optimized diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py index f564fac4f1b..e8fdf5f2e24 100644 --- a/tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py @@ -50,7 +50,7 @@ def _test_combinations(): ds = ds.map(lambda x: (x, x), num_parallel_calls=2) # Not eliminated return ds.map(lambda x, y: (x, y)) # Eliminated - parallel_map_name = "ParallelMapV2" + parallel_map_name = "ParallelMap" cases = [ ("Skip0", lambda ds: ds.skip(0), None), diff --git a/tensorflow/python/data/experimental/ops/testing.py b/tensorflow/python/data/experimental/ops/testing.py index ec5b4810b23..327bce225e4 100644 --- a/tensorflow/python/data/experimental/ops/testing.py +++ b/tensorflow/python/data/experimental/ops/testing.py @@ -23,11 +23,13 @@ from tensorflow.python.framework import ops from tensorflow.python.ops import gen_experimental_dataset_ops -# TODO(jsimsa): Support RE matching for both individual transformation (e.g. to -# account for indexing) and transformation sequence. def assert_next(transformations): """A transformation that asserts which transformations happen next. + Transformations should be referred to by their base name, not including + version suffix. For example, use "Batch" instead of "BatchV2". "Batch" will + match any of "Batch", "BatchV1", "BatchV2", etc. + Args: transformations: A `tf.string` vector `tf.Tensor` identifying the transformations that are expected to happen next. @@ -119,5 +121,3 @@ class _SleepDataset(dataset_ops.UnaryUnchangedStructureDataset): self._sleep_microseconds, **self._flat_structure) super(_SleepDataset, self).__init__(input_dataset, variant_tensor) - - From e42e9de4b7c9302010e64c5557f17f378d6eb134 Mon Sep 17 00:00:00 2001 From: Sachin Joglekar Date: Wed, 15 Jul 2020 13:41:18 -0700 Subject: [PATCH 0537/2522] Set the dynamic tensor flag on Hexagon delegate if required PiperOrigin-RevId: 321429702 Change-Id: I8c98e1f09c394a1bb362b441b6ff1ea368f4e985 --- tensorflow/lite/delegates/hexagon/hexagon_delegate.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/delegates/hexagon/hexagon_delegate.cc b/tensorflow/lite/delegates/hexagon/hexagon_delegate.cc index 83437df8cd6..ef3162739a9 100644 --- a/tensorflow/lite/delegates/hexagon/hexagon_delegate.cc +++ b/tensorflow/lite/delegates/hexagon/hexagon_delegate.cc @@ -83,9 +83,13 @@ class HexagonDelegate : public SimpleDelegateInterface { TfLiteDelegate* TfLiteHexagonDelegateCreate( const TfLiteHexagonDelegateOptions* options) { - // return tflite::CreateDelegate(options); - return tflite::TfLiteDelegateFactory::CreateSimpleDelegate( - std::make_unique(options)); + auto* initialized_delegate = + tflite::TfLiteDelegateFactory::CreateSimpleDelegate( + std::make_unique(options)); + if (options->enable_dynamic_batch_size) { + initialized_delegate->flags |= kTfLiteDelegateFlagsAllowDynamicTensors; + } + return initialized_delegate; } TfLiteHexagonDelegateOptions TfLiteHexagonDelegateOptionsDefault() { From 7b0a2473467e922f7a3b00076d7c3878e7a132ae Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Wed, 15 Jul 2020 20:57:34 +0000 Subject: [PATCH 0538/2522] added Scalar Shape to TF_ShapeInfererence functions --- tensorflow/c/ops.cc | 6 ++++++ tensorflow/c/ops.h | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/tensorflow/c/ops.cc b/tensorflow/c/ops.cc index 118385ed72c..b1a69922e5d 100644 --- a/tensorflow/c/ops.cc +++ b/tensorflow/c/ops.cc @@ -104,6 +104,12 @@ TF_ShapeHandle* TF_NewShapeHandle() { return reinterpret_cast(new ShapeHandle); } +TF_ShapeHandle* TF_ShapeInferenceContextScalar(TF_ShapeInferenceContext* ctx) { + auto* handle = new ShapeHandle; + *handle = reinterpret_cast(ctx)->Scalar(); + return reinterpret_cast(handle); +} + TF_ShapeHandle* TF_ShapeInferenceContextVectorFromSize( TF_ShapeInferenceContext* ctx, size_t size) { auto* handle = new ShapeHandle; diff --git a/tensorflow/c/ops.h b/tensorflow/c/ops.h index 14868e40260..91cad73101f 100644 --- a/tensorflow/c/ops.h +++ b/tensorflow/c/ops.h @@ -280,6 +280,11 @@ extern void TF_ShapeInferenceContextSetOutput(TF_ShapeInferenceContext* ctx, int i, TF_ShapeHandle* handle, TF_Status* status); +// Returns a newly-allocated scalar shape handle. The returned handle should +// be freed with TF_DeleteShapeHandle. +TF_CAPI_EXPORT extern TF_ShapeHandle* TF_ShapeInferenceContextScalar( + TF_ShapeInferenceContext* ctx); + // Returns a newly-allocate shape handle representing a vector of the given // size. The returned handle should be freed with TF_DeleteShapeHandle. TF_CAPI_EXPORT extern TF_ShapeHandle* TF_ShapeInferenceContextVectorFromSize( From 806a053eb5b54575480bd22609a4286ca929941e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Jul 2020 13:49:59 -0700 Subject: [PATCH 0539/2522] Internal change PiperOrigin-RevId: 321431476 Change-Id: I9907a93b99cd08a05699096e9314c34cbd55601f --- tensorflow/core/kernels/BUILD | 1 - tensorflow/core/kernels/random_binomial_op.cc | 4 + tensorflow/core/kernels/random_op.cc | 2 +- tensorflow/core/kernels/random_op_cpu.h | 13 +- tensorflow/core/kernels/random_op_test.cc | 38 ++--- tensorflow/core/lib/random/BUILD | 1 - .../core/lib/random/random_distributions.h | 139 ++++-------------- .../kernel_tests/random/random_ops_test.py | 12 +- 8 files changed, 60 insertions(+), 150 deletions(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index baaf4c129fe..b4129e05f91 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2424,7 +2424,6 @@ cc_library( deps = [ "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core/framework:numeric_types", ], ) diff --git a/tensorflow/core/kernels/random_binomial_op.cc b/tensorflow/core/kernels/random_binomial_op.cc index 172d04940cd..4647457ff6f 100644 --- a/tensorflow/core/kernels/random_binomial_op.cc +++ b/tensorflow/core/kernels/random_binomial_op.cc @@ -183,6 +183,10 @@ struct RandomBinomialFunctor { // We have B1 * ... * Bk samples per batch member we need. auto DoWork = [num_batches, samples_per_batch, &bcast, &counts, &probs, &gen, &output](int start_output, int limit_output) { + // Vectorized intermediate calculations for uniform rejection sampling. + // We always generate at most 4 samples. + Eigen::array z; + Eigen::array g; const bool should_bcast = bcast.IsBroadcastingRequired(); const auto& counts_batch_indices = bcast.x_batch_indices(); const auto& probs_batch_indices = bcast.y_batch_indices(); diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc index 3f55fc84b46..152ab5f7d1e 100644 --- a/tensorflow/core/kernels/random_op.cc +++ b/tensorflow/core/kernels/random_op.cc @@ -344,7 +344,7 @@ class RandomGammaOp : public OpKernel { .HostMemory("shape") \ .TypeConstraint("dtype"), \ PhiloxRandomOp>); \ + random::PhiloxRandom, TYPE>>); \ REGISTER_KERNEL_BUILDER( \ Name("RandomStandardNormal") \ .Device(DEVICE_CPU) \ diff --git a/tensorflow/core/kernels/random_op_cpu.h b/tensorflow/core/kernels/random_op_cpu.h index 232651dc429..eac1faee2e4 100644 --- a/tensorflow/core/kernels/random_op_cpu.h +++ b/tensorflow/core/kernels/random_op_cpu.h @@ -86,13 +86,7 @@ struct FillPhiloxRandomTask { int64 start_group, int64 limit_group, Distribution dist) { const int kGroupSize = Distribution::kResultElementCount; - // Decide skip strides according to different kResultElementCount: - // * `1 = (4 + 3) / 4` for normal Distribution. - // * `1 = (2 + 3) / 4` for double/int64 Distribution. - // * `4 = (16 + 3) / 4` for vectorized float/bfloat16 Distribution. - const int skip_strides = - (kGroupSize + gen.kResultElementCount - 1) / gen.kResultElementCount; - gen.Skip(start_group * skip_strides); + gen.Skip(start_group); int64 offset = start_group * kGroupSize; // First fill all the full-size groups @@ -172,8 +166,9 @@ void FillPhiloxRandom::operator()( int64 total_group_count = (size + kGroupSize - 1) / kGroupSize; - const int kGroupCost = kGroupSize * (random::PhiloxRandom::kElementCost + - Distribution::kElementCost); + const int kGroupCost = + random::PhiloxRandom::kResultElementCount * + (random::PhiloxRandom::kElementCost + Distribution::kElementCost); Shard(worker_threads.num_threads, worker_threads.workers, total_group_count, kGroupCost, [&gen, data, size, dist](int64 start_group, int64 limit_group) { diff --git a/tensorflow/core/kernels/random_op_test.cc b/tensorflow/core/kernels/random_op_test.cc index 84a2c2b988a..47d94ad9028 100644 --- a/tensorflow/core/kernels/random_op_test.cc +++ b/tensorflow/core/kernels/random_op_test.cc @@ -37,41 +37,41 @@ Tensor VecShape(int64 v) { } } -Graph* RandomUniform(int64 n, DataType dtype) { +Graph* RandomUniform(int64 n) { Graph* g = new Graph(OpRegistry::Global()); - test::graph::RandomUniform(g, test::graph::Constant(g, VecShape(n)), dtype); + test::graph::RandomUniform(g, test::graph::Constant(g, VecShape(n)), + DT_FLOAT); return g; } -Graph* RandomNormal(int64 n, DataType dtype) { +Graph* RandomNormal(int64 n) { Graph* g = new Graph(OpRegistry::Global()); - test::graph::RandomGaussian(g, test::graph::Constant(g, VecShape(n)), dtype); + test::graph::RandomGaussian(g, test::graph::Constant(g, VecShape(n)), + DT_FLOAT); return g; } -Graph* TruncatedNormal(int64 n, DataType dtype) { +Graph* TruncatedNormal(int64 n) { Graph* g = new Graph(OpRegistry::Global()); - test::graph::TruncatedNormal(g, test::graph::Constant(g, VecShape(n)), dtype); + test::graph::TruncatedNormal(g, test::graph::Constant(g, VecShape(n)), + DT_FLOAT); return g; } -#define BM_RNG(DEVICE, RNG, DTYPE) \ - void BM_##DEVICE##_##RNG##_##DTYPE(int iters, int arg) { \ +#define BM_RNG(DEVICE, RNG) \ + void BM_##DEVICE##_##RNG(int iters, int arg) { \ testing::ItemsProcessed(static_cast(iters) * arg); \ - test::Benchmark(#DEVICE, RNG(arg, DTYPE)).Run(iters); \ + test::Benchmark(#DEVICE, RNG(arg)).Run(iters); \ } \ - BENCHMARK(BM_##DEVICE##_##RNG##_##DTYPE)->Range(1 << 20, 8 << 20); + BENCHMARK(BM_##DEVICE##_##RNG)->Range(1 << 20, 8 << 20); -BM_RNG(cpu, RandomUniform, DT_FLOAT); -BM_RNG(cpu, RandomUniform, DT_BFLOAT16); -BM_RNG(cpu, RandomNormal, DT_FLOAT); -BM_RNG(cpu, TruncatedNormal, DT_FLOAT); +BM_RNG(cpu, RandomUniform); +BM_RNG(cpu, RandomNormal); +BM_RNG(cpu, TruncatedNormal); -#ifdef GOOGLE_CUDA -BM_RNG(gpu, RandomUniform, DT_FLOAT); -BM_RNG(gpu, RandomNormal, DT_FLOAT); -BM_RNG(gpu, TruncatedNormal, DT_FLOAT); -#endif +BM_RNG(gpu, RandomUniform); +BM_RNG(gpu, RandomNormal); +BM_RNG(gpu, TruncatedNormal); Tensor VecAlphas(int64 n) { Tensor alphas(DT_DOUBLE, TensorShape({n})); diff --git a/tensorflow/core/lib/random/BUILD b/tensorflow/core/lib/random/BUILD index 02ead082245..1487a813149 100644 --- a/tensorflow/core/lib/random/BUILD +++ b/tensorflow/core/lib/random/BUILD @@ -40,7 +40,6 @@ cc_library( deps = [ ":exact_uniform_int", ":philox_random", - "//tensorflow/core/framework:numeric_types", "//tensorflow/core/lib/bfloat16", "//tensorflow/core/lib/gtl:array_slice", "//tensorflow/core/platform:logging", diff --git a/tensorflow/core/lib/random/random_distributions.h b/tensorflow/core/lib/random/random_distributions.h index fcc269dd15b..386f13347d7 100644 --- a/tensorflow/core/lib/random/random_distributions.h +++ b/tensorflow/core/lib/random/random_distributions.h @@ -18,12 +18,12 @@ limitations under the License. #include -#include #include + +#include #include #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/lib/bfloat16/bfloat16.h" #include "tensorflow/core/lib/random/philox_random.h" @@ -32,56 +32,13 @@ namespace random { // Helper function to convert a 16-bit integer to a half between [0..1). PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x); -// Helper function to convert a 16-bit integer to a bfloat16 between [1..2). -PHILOX_DEVICE_INLINE bfloat16 InternalUint16ToBfloat16(uint16 x); // Helper function to convert a 16-bit integer to a bfloat16 between [0..1). -PHILOX_DEVICE_INLINE bfloat16 Uint16ToBfloat16(uint16 x); -// Helper function to convert a 32-bit integer to a float between [1..2). -PHILOX_DEVICE_INLINE float InternalUint32ToFloat(uint32 x); +PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x); // Helper function to convert a 32-bit integer to a float between [0..1). PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x); // Helper function to convert two 32-bit integers to a double between [0..1). PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1); -// Helper function to format distribution result in vectorization path, -// it creates Eigen::Tensor and reuses packet feature with SIMD. -// This function can only work on CPU -template -PHILOX_DEVICE_INLINE typename Distribution::ResultType VectorizedFormat( - Generator* gen, typename Distribution::FormatFunc functor) { - typename Generator::ResultType sample; - typename Distribution::ResultType result; - const int kResultElementCount = Distribution::kResultElementCount; - const int inner_count = Generator::kResultElementCount; - const int outer_count = kResultElementCount / inner_count; - int offset = 0; - - for (int k = 0; k < outer_count; k++) { - sample = (*gen)(); - for (int i = 0; i < inner_count; i++, offset++) { - result[offset] = (*functor)(sample[i]); - } - } - // Tail processing if any. - // Put the tail condition out of above loop to improve performance: - // it will be executed only once and save time on CPU. - if (offset < kResultElementCount) { - sample = (*gen)(); - for (int i = 0; offset < kResultElementCount; i++, offset++) { - result[offset] = (*functor)(sample[i]); - } - } - - typedef Eigen::TensorMap< - Eigen::Tensor, - Eigen::Aligned> - Tensor; - auto tensor_result = Tensor(&result[0], kResultElementCount); - tensor_result = tensor_result - typename Distribution::ResultElementType(1.0); - return result; -} - // Computes a + b. Requires that the result is representable in the destination // type and that b is not maximal (i.e. b + 1 is not 0). Notably, the addend b // need *not* be representable in that type. (The condition on b excludes the @@ -105,15 +62,13 @@ PHILOX_DEVICE_INLINE Int SignedAdd(Int a, // actual returned sample type. // RealType: the data type of the real numbers that will be returned by the // distribution. This could be either float or double for now. -// IsVec: mark this UniformDistribution can be vectorized or not by SIMD on -// CPU. Note this should always be false on GPU. // This class is meant to be implemented through specialization. The default // is not defined by design. -template +template class UniformDistribution; -template -class UniformDistribution { +template +class UniformDistribution { public: // The number of elements that will be returned. static constexpr int kResultElementCount = Generator::kResultElementCount; @@ -136,17 +91,11 @@ class UniformDistribution { } }; -template -class UniformDistribution { +template +class UniformDistribution { public: // The number of elements that will be returned. - // Set the number to be Eigen packet size of type at least, so computations - // can be vectorized using SIMD on CPU. - static constexpr int kVectorLength = std::max( - static_cast(Eigen::internal::packet_traits::size), - Generator::kResultElementCount); - static constexpr int kResultElementCount = - IsVec ? kVectorLength : Generator::kResultElementCount; + static constexpr int kResultElementCount = Generator::kResultElementCount; // Cost of generation of a single element (in cycles). static constexpr int kElementCost = 3; // Indicate that this distribution may take variable number of samples @@ -154,37 +103,23 @@ class UniformDistribution { static constexpr bool kVariableSamplesPerOutput = false; typedef Array ResultType; typedef bfloat16 ResultElementType; - // Helper definition for the format function. - typedef bfloat16 (*FormatFunc)(uint16); PHILOX_DEVICE_INLINE ResultType operator()(Generator* gen) { -#ifdef __CUDA_ARCH__ - static_assert(!IsVec, "Can't vectorize Distribution on GPU"); typename Generator::ResultType sample = (*gen)(); ResultType result; for (int i = 0; i < kResultElementCount; ++i) { - result[i] = Uint16ToBfloat16(sample[i]); + result[i] = Uint16ToGfloat16(sample[i]); } return result; -#else - return VectorizedFormat, - Generator>(gen, InternalUint16ToBfloat16); -#endif // __CUDA_ARCH__ } }; -template -class UniformDistribution { +template +class UniformDistribution { public: // The number of elements that will be returned. - // Set the number to be Eigen packet size of type at least, so computations - // can be vectorized using SIMD on CPU. - static constexpr int kVectorLength = std::max( - static_cast(Eigen::internal::packet_traits::size), - Generator::kResultElementCount); - static constexpr int kResultElementCount = - IsVec ? kVectorLength : Generator::kResultElementCount; + static constexpr int kResultElementCount = Generator::kResultElementCount; // Cost of generation of a single element (in cycles). static constexpr int kElementCost = 3; // Indicate that this distribution may take variable number of samples @@ -192,28 +127,20 @@ class UniformDistribution { static constexpr bool kVariableSamplesPerOutput = false; typedef Array ResultType; typedef float ResultElementType; - // Helper definition for the format function. - typedef float (*FormatFunc)(uint32); PHILOX_DEVICE_INLINE ResultType operator()(Generator* gen) { -#ifdef __CUDA_ARCH__ - static_assert(!IsVec, "Can't vectorize Distribution on GPU"); typename Generator::ResultType sample = (*gen)(); ResultType result; for (int i = 0; i < kResultElementCount; ++i) { result[i] = Uint32ToFloat(sample[i]); } return result; -#else - return VectorizedFormat, - Generator>(gen, InternalUint32ToFloat); -#endif // __CUDA_ARCH__ } }; -template -class UniformDistribution { +template +class UniformDistribution { public: // The number of elements that will be returned. static constexpr int kResultElementCount = Generator::kResultElementCount / 2; @@ -236,8 +163,8 @@ class UniformDistribution { } }; -template -class UniformDistribution { +template +class UniformDistribution { public: // The number of elements that will be returned. static constexpr int kResultElementCount = Generator::kResultElementCount; @@ -271,8 +198,8 @@ class UniformDistribution { uint32 range_; }; -template -class UniformDistribution { +template +class UniformDistribution { public: // The number of elements that will be returned. static constexpr int kResultElementCount = Generator::kResultElementCount / 2; @@ -837,9 +764,9 @@ PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x) { return result - Eigen::half(1.0); } -// Helper function to convert an 16-bit integer to a bfloat16 between [1..2). -// This can create a uniform distribution of values between [1..2). -PHILOX_DEVICE_INLINE bfloat16 InternalUint16ToBfloat16(uint16 x) { +// Helper function to convert an 16-bit integer to a bfloat16 between [0..1). +// This can create a uniform distribution of values between [0..1). +PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x) { // bfloat are formatted as follows (MSB first): // sign(1) exponent(8) mantissa(7) // Conceptually construct the following: @@ -853,20 +780,13 @@ PHILOX_DEVICE_INLINE bfloat16 InternalUint16ToBfloat16(uint16 x) { bfloat16 result; memcpy(&result, &val, sizeof(val)); // The mantissa has an implicit leading 1, so the above code creates a value - // in [1, 2). - return result; -} - -// Helper function to convert an 16-bit integer to a bfloat16 between [0..1). -// This can create a uniform distribution of values between [0..1). -PHILOX_DEVICE_INLINE bfloat16 Uint16ToBfloat16(uint16 x) { - // The minus will not cause a rounding that makes the result 1. + // in [1, 2). The minus will not cause a rounding that makes the result 1. // Instead it will just be close to 1. - return InternalUint16ToBfloat16(x) - bfloat16(1.0); + return result - bfloat16(1.0); } -// Helper function to convert an 32-bit integer to a float between [1..2). -PHILOX_DEVICE_INLINE float InternalUint32ToFloat(uint32 x) { +// Helper function to convert an 32-bit integer to a float between [0..1). +PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x) { // IEEE754 floats are formatted as follows (MSB first): // sign(1) exponent(8) mantissa(23) // Conceptually construct the following: @@ -880,12 +800,7 @@ PHILOX_DEVICE_INLINE float InternalUint32ToFloat(uint32 x) { // Assumes that endian-ness is same for float and uint32. float result; memcpy(&result, &val, sizeof(val)); - return result; -} - -// Helper function to convert an 32-bit integer to a float between [0..1). -PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x) { - return InternalUint32ToFloat(x) - 1.0f; + return result - 1.0f; } // Helper function to convert two 32-bit integers to a double between [0..1). diff --git a/tensorflow/python/kernel_tests/random/random_ops_test.py b/tensorflow/python/kernel_tests/random/random_ops_test.py index 7bbb50db031..c361f79fb1f 100644 --- a/tensorflow/python/kernel_tests/random/random_ops_test.py +++ b/tensorflow/python/kernel_tests/random/random_ops_test.py @@ -276,9 +276,8 @@ class RandomUniformTest(RandomOpTestCommon): def testRange(self): for dt in (dtypes.float16, dtypes.float32, dtypes.float64, dtypes.int32, - dtypes.int64, dtypes.bfloat16): - use_gpu = (dt != dtypes.bfloat16) - sampler = self._Sampler(1000, minv=-2, maxv=8, dtype=dt, use_gpu=use_gpu) + dtypes.int64): + sampler = self._Sampler(1000, minv=-2, maxv=8, dtype=dt, use_gpu=True) x = sampler() self.assertTrue(-2 <= np.min(x)) self.assertTrue(np.max(x) < 8) @@ -364,11 +363,10 @@ class RandomUniformTest(RandomOpTestCommon): @test_util.run_deprecated_v1 def testSeed(self): for dt in (dtypes.float16, dtypes.float32, dtypes.float64, dtypes.int32, - dtypes.int64, dtypes.bfloat16): + dtypes.int64): for seed in [345, 2**100, -2**100]: - use_gpu = (dt != dtypes.bfloat16) - sx = self._Sampler(1000, 0, 17, dtype=dt, use_gpu=use_gpu, seed=seed) - sy = self._Sampler(1000, 0, 17, dtype=dt, use_gpu=use_gpu, seed=seed) + sx = self._Sampler(1000, 0, 17, dtype=dt, use_gpu=True, seed=seed) + sy = self._Sampler(1000, 0, 17, dtype=dt, use_gpu=True, seed=seed) self.assertAllEqual(sx(), sy()) @test_util.run_deprecated_v1 From 822b035fd1382128eb8ac65b9a89c2348b8fb7cf Mon Sep 17 00:00:00 2001 From: Michael Gester Date: Wed, 15 Jul 2020 14:02:00 -0700 Subject: [PATCH 0540/2522] Support SAME padding, non-default data formats and AvgPool3D in AvgPool lowering Also added unit tests and enabled all related functional tests for pooling ops. PiperOrigin-RevId: 321433980 Change-Id: Ia175333d81398deadfcc9e18bb7eeabe8e0c67de --- .../mlir/tensorflow/ir/tf_generated_ops.td | 24 +++ .../compiler/mlir/xla/tests/legalize-tf.mlir | 191 +++++++++++++++--- .../mlir/xla/transforms/legalize_tf.cc | 166 ++++++++------- tensorflow/compiler/tests/BUILD | 4 +- .../compiler/tests/pooling_ops_3d_test.py | 9 - tensorflow/compiler/tests/pooling_ops_test.py | 6 - 6 files changed, 278 insertions(+), 122 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index 3509ebfba4b..863de28df51 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -745,6 +745,30 @@ def TF_AvgPool3DGradOp : TF_Op<"AvgPool3DGrad", [NoSideEffect]> { TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<1>; } +def TF_AvgPool3DOp : TF_Op<"AvgPool3D", [NoSideEffect]> { + let summary = "Performs 3D average pooling on the input."; + + let description = [{ +Each entry in `output` is the mean of the corresponding size `ksize` +window in `value`. + }]; + + let arguments = (ins + TF_FpTensor:$value, + + Confined]>:$ksize, + Confined]>:$strides, + TF_AnyStrAttrOf<["SAME", "VALID"]>:$padding, + DefaultValuedAttr, "NDHWC">:$data_format + ); + + let results = (outs + TF_FpTensor:$output + ); + + TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>; +} + def TF_AvgPoolGradOp : TF_Op<"AvgPoolGrad", [NoSideEffect]> { let summary = "Computes gradients of the average pooling function."; diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir index 28a31058e99..7e897c36fe7 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir @@ -980,7 +980,7 @@ func @test_sparse_mat_mul(%arg0: tensor<3x4xf32>, %arg1: tensor<4x5xf32>) -> ten // SparseMatMul where one operand needs to be transposed and the other one not. // -// CHECK-LABEL: func @test_sparse_mat_mul_with_transpose +// CHECK-LABEL: @test_sparse_mat_mul_with_transpose // CHECK-SAME: %[[ARG0:.*]]: tensor<3x4xf32> // CHECK-SAME: %[[ARG1:.*]]: tensor<5x4xf32> // CHECK-SAME: -> tensor<3x5xf32> @@ -990,7 +990,6 @@ func @test_sparse_mat_mul(%arg0: tensor<3x4xf32>, %arg1: tensor<4x5xf32>) -> ten // CHECK: %[[RESULT:.*]] = "mhlo.dot"(%[[ARG0]], %[[TRANSPOSE]]) // CHECK-SAME: -> tensor<3x5xf32> // CHECK: return %[[RESULT]] -// CHECK: } func @test_sparse_mat_mul_with_transpose(%arg0: tensor<3x4xf32>, %arg1: tensor<5x4xf32>) -> tensor<3x5xf32> { %0 = "tf.SparseMatMul"(%arg0, %arg1) {a_is_sparse = true, b_is_sparse = false, transpose_a = false, transpose_b = true} : (tensor<3x4xf32>, tensor<5x4xf32>) -> tensor<3x5xf32> return %0: tensor<3x5xf32> @@ -998,7 +997,7 @@ func @test_sparse_mat_mul_with_transpose(%arg0: tensor<3x4xf32>, %arg1: tensor<5 // SparseMatMul where one operand needs to be casted and the other one not. // -// CHECK-LABEL: func @test_sparse_mat_mul_with_cast +// CHECK-LABEL: @test_sparse_mat_mul_with_cast // CHECK-SAME: %[[ARG0:.*]]: tensor<3x4xf32> // CHECK-SAME: %[[ARG1:.*]]: tensor<4x5xbf16> // CHECK-SAME: -> tensor<3x5xf32> @@ -1007,7 +1006,6 @@ func @test_sparse_mat_mul_with_transpose(%arg0: tensor<3x4xf32>, %arg1: tensor<5 // CHECK: %[[RESULT:.*]] = "mhlo.dot"(%[[ARG0]], %[[CAST]]) // CHECK-SAME: -> tensor<3x5xf32> // CHECK: return %[[RESULT]] -// CHECK: } func @test_sparse_mat_mul_with_cast(%arg0: tensor<3x4xf32>, %arg1: tensor<4x5xbf16>) -> tensor<3x5xf32> { %0 = "tf.SparseMatMul"(%arg0, %arg1) {a_is_sparse = true, b_is_sparse = false, transpose_a = false, transpose_b = false} : (tensor<3x4xf32>, tensor<4x5xbf16>) -> tensor<3x5xf32> return %0: tensor<3x5xf32> @@ -3868,36 +3866,167 @@ func @random_shuffle_3D(%input: tensor<4x?x16xf32>) -> tensor<4x?x16xf32> { // tf.AvgPool legalization //===----------------------------------------------------------------------===// -// CHECK-LABEL: avgpool_valid_padding -// CHECK-SAME: [[ARG:%.+]]: tensor<2x12x20x7xf16> -func @avgpool_valid_padding(%arg0: tensor<2x12x20x7xf16>) -> tensor<2x3x5x7xf16> { - // CHECK: [[CONV32:%.+]] = "mhlo.convert"(%arg0) : (tensor<2x12x20x7xf16>) -> tensor<2x12x20x7xf32> - // CHECK: [[INIT:%.+]] = mhlo.constant dense<0.000000e+00> : tensor - // CHECK: [[REDUCE:%.+]] = "mhlo.reduce_window"([[CONV32]], [[INIT]]) ( { - // CHECK: ^bb0([[ARG1:%.+]]: tensor, [[ARG2:%.+]]: tensor): - // CHECK: [[ADD:%.+]] = mhlo.add [[ARG1]], [[ARG2]] - // CHECK: "mhlo.return"([[ADD]]) - // CHECK: }) {window_dimensions = dense<[1, 2, 2, 1]> : tensor<4xi64>, window_strides = dense<[1, 4, 4, 1]> : tensor<4xi64>} : (tensor<2x12x20x7xf32>, tensor) -> tensor<2x3x5x7xf32> - // CHECK: [[COUNT:%.+]] = mhlo.constant dense<4.000000e+00> : tensor - // CHECK: [[DIV:%.+]] = chlo.broadcast_divide [[REDUCE]], [[COUNT]] {broadcast_dimensions = dense<> : tensor<0xi64>} : (tensor<2x3x5x7xf32>, tensor) -> tensor<2x3x5x7xf32> - // CHECK: [[CONV16:%.+]] = "mhlo.convert"([[DIV]]) : (tensor<2x3x5x7xf32>) -> tensor<2x3x5x7xf16> - // CHECK: return [[CONV16]] - %0 = "tf.AvgPool"(%arg0) {data_format = "NHWC", ksize = [1, 2, 2, 1], padding = "VALID", strides = [1, 4, 4, 1]} : (tensor<2x12x20x7xf16>) -> tensor<2x3x5x7xf16> +// CHECK-LABEL: @avgpool_valid_padding +// CHECK-SAME: [[ARG:%.+]]: tensor<2x12x21x7xf16> +// CHECK: [[CONV32:%.+]] = "mhlo.convert"(%arg0) : (tensor<2x12x21x7xf16>) -> tensor<2x12x21x7xf32> +// CHECK: [[ZERO:%.+]] = mhlo.constant dense<0.000000e+00> : tensor +// CHECK: [[DIVIDEND:%.+]] = "mhlo.reduce_window"([[CONV32]], [[ZERO]]) ( { +// CHECK: ^bb0([[ARG1:%.+]]: tensor, [[ARG2:%.+]]: tensor): +// CHECK: [[ADD:%.+]] = mhlo.add [[ARG1]], [[ARG2]] +// CHECK: "mhlo.return"([[ADD]]) +// CHECK: }) +// CHECK-SAME: window_dimensions = dense<[1, 2, 2, 1]> +// CHECK-SAME: window_strides = dense<[1, 4, 4, 1]> +// CHECK-SAME: -> tensor<2x3x5x7xf32> +// CHECK: [[COUNT:%.+]] = mhlo.constant dense<4.000000e+00> : tensor +// CHECK: [[DIV_RESULT:%.+]] = chlo.broadcast_divide [[DIVIDEND]], [[COUNT]] +// CHECK-SAME: broadcast_dimensions = dense<> +// CHECK-SAME: -> tensor<2x3x5x7xf32> +// CHECK: [[CONV16:%.+]] = "mhlo.convert"([[DIV_RESULT]]) +// CHECK-SAME: -> tensor<2x3x5x7xf16> +// CHECK: return [[CONV16]] +func @avgpool_valid_padding(%arg0: tensor<2x12x21x7xf16>) -> tensor<2x3x5x7xf16> { + %0 = "tf.AvgPool"(%arg0) {data_format = "NHWC", ksize = [1, 2, 2, 1], padding = "VALID", strides = [1, 4, 4, 1]} : (tensor<2x12x21x7xf16>) -> tensor<2x3x5x7xf16> return %0 : tensor<2x3x5x7xf16> } -// CHECK-LABEL: avgpool_same_padding -func @avgpool_same_padding(%arg0: tensor<2x13x25x7xf32>) -> tensor<2x4x7x7xf32> { - // CHECK: tf.AvgPool - %0 = "tf.AvgPool"(%arg0) {data_format = "NHWC", ksize = [1, 2, 3, 1], padding = "SAME", strides = [1, 4, 4, 1]} : (tensor<2x13x25x7xf32>) -> tensor<2x4x7x7xf32> - return %0 : tensor<2x4x7x7xf32> +// CHECK-LABEL: @avgpool_3d_valid_padding +// CHECK-SAME: [[ARG:%.+]]: tensor<2x4x12x21x7xf16> +// CHECK: [[CONV32:%.+]] = "mhlo.convert"(%arg0) : (tensor<2x4x12x21x7xf16>) -> tensor<2x4x12x21x7xf32> +// CHECK: [[ZERO:%.+]] = mhlo.constant dense<0.000000e+00> : tensor +// CHECK: [[DIVIDEND:%.+]] = "mhlo.reduce_window"([[CONV32]], [[ZERO]]) ( { +// CHECK: ^bb0([[ARG1:%.+]]: tensor, [[ARG2:%.+]]: tensor): +// CHECK: [[ADD:%.+]] = mhlo.add [[ARG1]], [[ARG2]] +// CHECK: "mhlo.return"([[ADD]]) +// CHECK: }) +// CHECK-SAME: window_dimensions = dense<[1, 1, 2, 2, 1]> +// CHECK-SAME: window_strides = dense<[1, 1, 4, 4, 1]> +// CHECK-SAME: -> tensor<2x4x3x5x7xf32> +// CHECK: [[COUNT:%.+]] = mhlo.constant dense<4.000000e+00> : tensor +// CHECK: [[DIV_RESULT:%.+]] = chlo.broadcast_divide [[DIVIDEND]], [[COUNT]] +// CHECK-SAME: broadcast_dimensions = dense<> +// CHECK-SAME: -> tensor<2x4x3x5x7xf32> +// CHECK: [[CONV16:%.+]] = "mhlo.convert"([[DIV_RESULT]]) +// CHECK-SAME: -> tensor<2x4x3x5x7xf16> +// CHECK: return [[CONV16]] +func @avgpool_3d_valid_padding(%arg0: tensor<2x4x12x21x7xf16>) -> tensor<2x4x3x5x7xf16> { + %0 = "tf.AvgPool3D"(%arg0) {data_format = "NDHWC", ksize = [1, 1, 2, 2, 1], padding = "VALID", strides = [1, 1, 4, 4, 1]} : (tensor<2x4x12x21x7xf16>) -> tensor<2x4x3x5x7xf16> + return %0 : tensor<2x4x3x5x7xf16> +} + +// CHECK-LABEL: @avgpool_nchw_format +// CHECK-SAME: [[ARG:%.+]]: tensor<2x7x12x21xf16> +// CHECK: [[CONV32:%.+]] = "mhlo.convert"(%arg0) : (tensor<2x7x12x21xf16>) -> tensor<2x7x12x21xf32> +// CHECK: [[ZERO:%.+]] = mhlo.constant dense<0.000000e+00> : tensor +// CHECK: [[DIVIDEND:%.+]] = "mhlo.reduce_window"([[CONV32]], [[ZERO]]) ( { +// CHECK: ^bb0([[ARG1:%.+]]: tensor, [[ARG2:%.+]]: tensor): +// CHECK: [[ADD:%.+]] = mhlo.add [[ARG1]], [[ARG2]] +// CHECK: "mhlo.return"([[ADD]]) +// CHECK: }) +// CHECK-SAME: window_dimensions = dense<[1, 1, 2, 2]> +// CHECK-SAME: window_strides = dense<[1, 1, 4, 4]> +// CHECK-SAME: -> tensor<2x7x3x5xf32> +// CHECK: [[COUNT:%.+]] = mhlo.constant dense<4.000000e+00> : tensor +// CHECK: [[DIV_RESULT:%.+]] = chlo.broadcast_divide [[DIVIDEND]], [[COUNT]] +// CHECK-SAME: broadcast_dimensions = dense<> +// CHECK-SAME: -> tensor<2x7x3x5xf32> +// CHECK: [[CONV16:%.+]] = "mhlo.convert"([[DIV_RESULT]]) +// CHECK-SAME: -> tensor<2x7x3x5xf16> +// CHECK: return [[CONV16]] +func @avgpool_nchw_format(%arg0: tensor<2x7x12x21xf16>) -> tensor<2x7x3x5xf16> { + %0 = "tf.AvgPool"(%arg0) {data_format = "NCHW", ksize = [1, 1, 2, 2], padding = "VALID", strides = [1, 1, 4, 4]} : (tensor<2x7x12x21xf16>) -> tensor<2x7x3x5xf16> + return %0 : tensor<2x7x3x5xf16> +} + +// CHECK-LABEL: @avgpool_3d_ncdhw_format +// CHECK-SAME: [[ARG:%.+]]: tensor<2x7x4x12x21xf16> +// CHECK: [[CONV32:%.+]] = "mhlo.convert"(%arg0) : (tensor<2x7x4x12x21xf16>) -> tensor<2x7x4x12x21xf32> +// CHECK: [[ZERO:%.+]] = mhlo.constant dense<0.000000e+00> : tensor +// CHECK: [[DIVIDEND:%.+]] = "mhlo.reduce_window"([[CONV32]], [[ZERO]]) ( { +// CHECK: ^bb0([[ARG1:%.+]]: tensor, [[ARG2:%.+]]: tensor): +// CHECK: [[ADD:%.+]] = mhlo.add [[ARG1]], [[ARG2]] +// CHECK: "mhlo.return"([[ADD]]) +// CHECK: }) +// CHECK-SAME: window_dimensions = dense<[1, 1, 1, 2, 2]> +// CHECK-SAME: window_strides = dense<[1, 1, 1, 4, 4]> +// CHECK-SAME: -> tensor<2x7x4x3x5xf32> +// CHECK: [[COUNT:%.+]] = mhlo.constant dense<4.000000e+00> : tensor +// CHECK: [[DIV_RESULT:%.+]] = chlo.broadcast_divide [[DIVIDEND]], [[COUNT]] +// CHECK-SAME: broadcast_dimensions = dense<> +// CHECK-SAME: -> tensor<2x7x4x3x5xf32> +// CHECK: [[CONV16:%.+]] = "mhlo.convert"([[DIV_RESULT]]) +// CHECK-SAME: -> tensor<2x7x4x3x5xf16> +// CHECK: return [[CONV16]] +func @avgpool_3d_ncdhw_format(%arg0: tensor<2x7x4x12x21xf16>) -> tensor<2x7x4x3x5xf16> { + %0 = "tf.AvgPool3D"(%arg0) {data_format = "NCDHW", ksize = [1, 1, 1, 2, 2], padding = "VALID", strides = [1, 1, 1, 4, 4]} : (tensor<2x7x4x12x21xf16>) -> tensor<2x7x4x3x5xf16> + return %0 : tensor<2x7x4x3x5xf16> +} + +// CHECK-LABEL: @avgpool_same_padding( +// CHECK-SAME: %[[ARG0:.*]]: tensor<2x12x21x7xf32>) -> tensor<2x4x6x7xf32> +// CHECK: %[[ZERO:.*]] = mhlo.constant dense<0.000000e+00> : tensor +// CHECK: %[[DIVIDEND:.*]] = "mhlo.reduce_window"(%[[ARG0]], %[[ZERO]]) ( { +// CHECK: ^bb0(%[[ARG1:.*]]: tensor, %[[ARG2:.*]]: tensor): +// CHECK: %[[SUM1:.*]] = mhlo.add %[[ARG1]], %[[ARG2]] : tensor +// CHECK: "mhlo.return"(%[[SUM1]]) : (tensor) -> () +// CHECK: }) +// CHECK-SAME: padding = dense<{{\[\[}}0, 0], [1, 1], [0, 1], [0, 0]]> +// CHECK-SAME: window_dimensions = dense<[1, 5, 2, 1]> +// CHECK-SAME: window_strides = dense<[1, 3, 4, 1]> +// CHECK-SAME: -> tensor<2x4x6x7xf32> +// CHECK: %[[ONES:.*]] = mhlo.constant dense<1.000000e+00> : tensor<2x12x21x7xf32> +// CHECK: %[[DIVISOR:.*]] = "mhlo.reduce_window"(%[[ONES]], %[[ZERO]]) ( { +// CHECK: ^bb0(%[[ARG3:.*]]: tensor, %[[ARG4:.*]]: tensor): +// CHECK: %[[SUM2:.*]] = mhlo.add %[[ARG3]], %[[ARG4]] : tensor +// CHECK: "mhlo.return"(%[[SUM2]]) : (tensor) -> () +// CHECK: }) +// CHECK-SAME: padding = dense<{{\[\[}}0, 0], [1, 1], [0, 1], [0, 0]]> +// CHECK-SAME: window_dimensions = dense<[1, 5, 2, 1]> +// CHECK-SAME: window_strides = dense<[1, 3, 4, 1]> +// CHECK-SAME: -> tensor<2x4x6x7xf32> +// CHECK: %[[RESULT:.*]] = mhlo.divide %[[DIVIDEND]], %[[DIVISOR]] : tensor<2x4x6x7xf32> +// CHECK: return %[[RESULT]] : tensor<2x4x6x7xf32> +// CHECK: } +func @avgpool_same_padding(%arg0: tensor<2x12x21x7xf32>) -> tensor<2x4x6x7xf32> { + %0 = "tf.AvgPool"(%arg0) {data_format = "NHWC", ksize = [1, 5, 2, 1], padding = "SAME", strides = [1, 3, 4, 1]} : (tensor<2x12x21x7xf32>) -> tensor<2x4x6x7xf32> + return %0 : tensor<2x4x6x7xf32> +} + +// CHECK-LABEL: @avgpool_3d_same_padding( +// CHECK-SAME: %[[ARG0:.*]]: tensor<2x4x12x21x7xf32>) -> tensor<2x4x4x6x7xf32> +// CHECK: %[[ZERO:.*]] = mhlo.constant dense<0.000000e+00> : tensor +// CHECK: %[[DIVIDEND:.*]] = "mhlo.reduce_window"(%[[ARG0]], %[[ZERO]]) ( { +// CHECK: ^bb0(%[[ARG1:.*]]: tensor, %[[ARG2:.*]]: tensor): +// CHECK: %[[SUM1:.*]] = mhlo.add %[[ARG1]], %[[ARG2]] : tensor +// CHECK: "mhlo.return"(%[[SUM1]]) : (tensor) -> () +// CHECK: }) +// CHECK-SAME: padding = dense<{{\[\[}}0, 0], [0, 0], [1, 1], [0, 1], [0, 0]]> +// CHECK-SAME: window_dimensions = dense<[1, 1, 5, 2, 1]> +// CHECK-SAME: window_strides = dense<[1, 1, 3, 4, 1]> +// CHECK-SAME: -> tensor<2x4x4x6x7xf32> +// CHECK: %[[ONES:.*]] = mhlo.constant dense<1.000000e+00> : tensor<2x4x12x21x7xf32> +// CHECK: %[[DIVISOR:.*]] = "mhlo.reduce_window"(%[[ONES]], %[[ZERO]]) ( { +// CHECK: ^bb0(%[[ARG3:.*]]: tensor, %[[ARG4:.*]]: tensor): +// CHECK: %[[SUM2:.*]] = mhlo.add %[[ARG3]], %[[ARG4]] : tensor +// CHECK: "mhlo.return"(%[[SUM2]]) : (tensor) -> () +// CHECK: }) +// CHECK-SAME: padding = dense<{{\[\[}}0, 0], [0, 0], [1, 1], [0, 1], [0, 0]]> +// CHECK-SAME: window_dimensions = dense<[1, 1, 5, 2, 1]> +// CHECK-SAME: window_strides = dense<[1, 1, 3, 4, 1]> +// CHECK-SAME: -> tensor<2x4x4x6x7xf32> +// CHECK: %[[RESULT:.*]] = mhlo.divide %[[DIVIDEND]], %[[DIVISOR]] +// CHECK: return %[[RESULT]] : tensor<2x4x4x6x7xf32> +// CHECK: } +func @avgpool_3d_same_padding(%arg0: tensor<2x4x12x21x7xf32>) -> tensor<2x4x4x6x7xf32> { + %0 = "tf.AvgPool3D"(%arg0) {data_format = "NDHWC", ksize = [1, 1, 5, 2, 1], padding = "SAME", strides = [1, 1, 3, 4, 1]} : (tensor<2x4x12x21x7xf32>) -> tensor<2x4x4x6x7xf32> + return %0 : tensor<2x4x4x6x7xf32> } //===----------------------------------------------------------------------===// // AvgPoolGrad op legalizations. //===----------------------------------------------------------------------===// -// CHECK-LABEL: func @avgpool_grad_valid_padding( +// CHECK-LABEL: @avgpool_grad_valid_padding( // CHECK-SAME: %[[OUT_GRAD:.*]]: tensor<10x12x16x64xf32>) -> tensor<10x24x32x64xf32> { // CHECK: %[[ZERO:.*]] = mhlo.constant dense<0.000000e+00> : tensor // CHECK: %[[DIVISOR:.*]] = mhlo.constant dense<4.000000e+00> : tensor @@ -3929,7 +4058,7 @@ func @avgpool_grad_valid_padding(%grad: tensor<10x12x16x64xf32>) -> tensor<10x24 return %result : tensor<10x24x32x64xf32> } -// CHECK-LABEL: func @avgpool_3d_grad_valid_padding( +// CHECK-LABEL: @avgpool_3d_grad_valid_padding( // CHECK-SAME: %[[OUT_GRAD:.*]]: tensor<10x8x12x16x64xf32>) -> tensor<10x8x24x32x64xf32> { // CHECK: %[[ZERO:.*]] = mhlo.constant dense<0.000000e+00> : tensor // CHECK: %[[DIVISOR:.*]] = mhlo.constant dense<4.000000e+00> : tensor @@ -3958,7 +4087,7 @@ func @avgpool_3d_grad_valid_padding(%grad: tensor<10x8x12x16x64xf32>) -> tensor< return %result : tensor<10x8x24x32x64xf32> } -// CHECK-LABEL: func @avgpool_grad_same_padding( +// CHECK-LABEL: @avgpool_grad_same_padding( // CHECK-SAME: %[[OUT_GRAD:.*]]: tensor<2x4x7x9xf32>) -> tensor<2x13x25x9xf32> { // CHECK: %[[ZERO:.*]] = mhlo.constant dense<0.000000e+00> : tensor // CHECK: %[[ALL_ONES:.*]] = mhlo.constant dense<1.000000e+00> : tensor<2x13x25x9xf32> @@ -3997,7 +4126,7 @@ func @avgpool_grad_same_padding(%grad: tensor<2x4x7x9xf32>) -> tensor<2x13x25x9x return %result : tensor<2x13x25x9xf32> } -// CHECK-LABEL: func @avgpool_3d_grad_same_padding( +// CHECK-LABEL: @avgpool_3d_grad_same_padding( // CHECK-SAME: %[[OUT_GRAD:.*]]: tensor<2x8x4x7x9xf32>) -> tensor<2x8x13x25x9xf32> { // CHECK: %[[ZERO:.*]] = mhlo.constant dense<0.000000e+00> : tensor // CHECK: %[[ALL_ONES:.*]] = mhlo.constant dense<1.000000e+00> : tensor<2x8x13x25x9xf32> @@ -4035,7 +4164,7 @@ func @avgpool_3d_grad_same_padding(%grad: tensor<2x8x4x7x9xf32>) -> tensor<2x8x1 return %result : tensor<2x8x13x25x9xf32> } -// CHECK-LABEL: func @avgpool_grad_nchw_format( +// CHECK-LABEL: @avgpool_grad_nchw_format( // CHECK-SAME: %[[OUT_GRAD:.*]]: tensor<2x9x4x7xf32>) -> tensor<2x9x13x25xf32> { // CHECK: %[[ZERO:.*]] = mhlo.constant dense<0.000000e+00> : tensor // CHECK: %[[ALL_ONES:.*]] = mhlo.constant dense<1.000000e+00> : tensor<2x9x13x25xf32> @@ -4074,7 +4203,7 @@ func @avgpool_grad_nchw_format(%grad: tensor<2x9x4x7xf32>) -> tensor<2x9x13x25xf return %result : tensor<2x9x13x25xf32> } -// CHECK-LABEL: func @avgpool_3d_grad_ncdwh_format( +// CHECK-LABEL: @avgpool_3d_grad_ncdwh_format( // CHECK-SAME: %[[OUT_GRAD:.*]]: tensor<2x9x8x4x7xf32>) -> tensor<2x9x8x13x25xf32> { // CHECK: %[[ZERO:.*]] = mhlo.constant dense<0.000000e+00> : tensor // CHECK: %[[ALL_ONES:.*]] = mhlo.constant dense<1.000000e+00> : tensor<2x9x8x13x25xf32> @@ -4112,7 +4241,7 @@ func @avgpool_3d_grad_ncdwh_format(%grad: tensor<2x9x8x4x7xf32>) -> tensor<2x9x8 return %result : tensor<2x9x8x13x25xf32> } -// CHECK-LABEL: func @avgpool_grad_bf16( +// CHECK-LABEL: @avgpool_grad_bf16( // CHECK-SAME: %[[OUT_GRAD:.*]]: tensor<10x12x16x64xbf16>) -> tensor<10x24x32x64xbf16> { // CHECK: %[[ZERO:.*]] = mhlo.constant dense<0.000000e+00> : tensor // CHECK: %[[DIVISOR:.*]] = mhlo.constant dense<4.000000e+00> : tensor diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc index af176dc1324..4549386ce16 100644 --- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc +++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc @@ -1770,30 +1770,93 @@ static DenseIntElementsAttr GetReduceWindowPaddingAsAttr( flatten_paddings); } +// Helper function for dividing each entry of `pooled` by the count of its +// corresponding window, i.e., the number of non-padding entries of the window +// which an `AvgPool` operation performed on an `input_shape`-tensor would map +// to this entry, depending on `ksize` and `strides`. This function is used for +// `AvgPool` and `AvgPoolGrad` legalizations. +// `zero` is passed as a parameter because it can be reused from caller level. +// `pooled` must have `RankedTensorType`. +template +Operation *AvgPoolDivideByCount( + Value pooled, const SmallVector &input_shape, + const SmallVector &ksize, + const SmallVector &strides, OpTy op, Value zero, + PatternRewriter &rewriter) { + Location loc = op.getLoc(); + RankedTensorType pooled_type = + pooled.getType().template cast(); + Type element_type = pooled_type.getElementType(); + Operation *result = nullptr; + RankedTensorType orig_input_type = + RankedTensorType::get(input_shape, element_type); + + if (op.padding() == "VALID") { + // All window counts are equal here because we don't have padding + // (each entry of `pooled` corresponds to a window that consists of + // original input entries only). + int64_t window_count = std::accumulate(ksize.begin(), ksize.end(), 1, + std::multiplies()); + // Divide `pooled` by window counts. + Value divisor = + GetScalarConstOfType(element_type, loc, window_count, &rewriter); + auto scalar_broadcast_dims = GetI64ElementsAttr({}, &rewriter); + result = rewriter.create( + loc, pooled_type, pooled, divisor, scalar_broadcast_dims); + } else { + assert(op.padding() == "SAME"); + // For SAME padding, only original entries that contributed to a window + // are counted for the average of this window, not padded entries. + + // Build all-ones tensor of same shape as the original input. + ElementsAttr splat = hlo::getSplat(&rewriter, orig_input_type, 1); + auto all_ones_tensor = rewriter.create(loc, splat); + + // Get padding for the input. + DenseIntElementsAttr input_padding_attr = + GetReduceWindowPaddingAsAttr( + input_shape, op.ksize(), op.strides(), op.padding(), &rewriter); + + // Count the 1's in each window, using the same padding as for the input, + // which gives us the window counts by which `pooled` needs to be divided. + auto divisor = rewriter.create( + loc, pooled_type, + /*operand=*/all_ones_tensor, + /*init_value=*/zero, + /*window_dimensions=*/GetI64ElementsAttr(op.ksize()), + /*window_strides=*/GetI64ElementsAttr(op.strides()), + /*base_dilations=*/DenseIntElementsAttr(), + /*window_dilations=*/DenseIntElementsAttr(), + /*padding=*/input_padding_attr); + BuildReduceBody(element_type, &divisor.body(), &rewriter); + + // Divide `pooled` by window counts. + result = rewriter.create(loc, pooled_type, pooled, divisor); + } + return result; +} + // Converts AvgPool op to HLO ReduceWindow op by setting appropriate window // dimensions with add as the reduction function. The reduction result is // then divided by the number of elements in the window. -class ConvertAvgPoolOp : public OpRewritePattern { +template +class ConvertAvgPoolOp : public OpRewritePattern { public: - using OpRewritePattern::OpRewritePattern; + using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(TF::AvgPoolOp op, + LogicalResult matchAndRewrite(OpTy op, PatternRewriter &rewriter) const override { - auto input_type = op.value().getType().dyn_cast(); + auto input_type = + op.value().getType().template dyn_cast(); if (!input_type) return failure(); - // TODO(b/147217034): support other data formats. - if (!IsDefaultDataFormat(op.data_format())) return failure(); - // TODO(b/147217034): support "SAME" padding. - if (op.padding() != "VALID") return failure(); - // We will do accumulation first; use a larger bitwidth if suitable. Type input_element_type = input_type.getElementType(); Type sum_element_type = GetSumAccumulationType(input_element_type); Type result_type; // The result type for reduction and division with the proper element type. - if (auto ranked_type = op.getType().dyn_cast()) + if (auto ranked_type = op.getType().template dyn_cast()) result_type = RankedTensorType::get(ranked_type.getShape(), sum_element_type); else @@ -1809,9 +1872,9 @@ class ConvertAvgPoolOp : public OpRewritePattern { // Create the tf.ReduceWindow op. Value init = GetScalarConstOfType(sum_element_type, op.getLoc(), 0, &rewriter); - DenseIntElementsAttr paddings_attr = - GetReduceWindowPaddingAsAttr<4>(input_type.getShape(), op.ksize(), - op.strides(), op.padding(), &rewriter); + DenseIntElementsAttr paddings_attr = GetReduceWindowPaddingAsAttr( + input_type.getShape(), op.ksize(), op.strides(), op.padding(), + &rewriter); auto reduce = rewriter.create( op.getLoc(), result_type, input_value, init, GetI64ElementsAttr(op.ksize()), GetI64ElementsAttr(op.strides()), @@ -1821,19 +1884,17 @@ class ConvertAvgPoolOp : public OpRewritePattern { // Count the number of elements in the window. The following calculation // is only valid for no paddings. - SmallVector ksize; + SmallVector input_shape( + llvm::to_vector(input_type.getShape())); + SmallVector ksize, strides; GetI64ArrayAttrValues(op.ksize(), &ksize); - int64_t count = std::accumulate(ksize.begin(), ksize.end(), 1, - std::multiplies()); + GetI64ArrayAttrValues(op.strides(), &strides); - // Divide by the number of elements in the window. - Value divisor = - GetScalarConstOfType(sum_element_type, op.getLoc(), count, &rewriter); - auto scalar_broadcast_dims = GetI64ElementsAttr({}, &rewriter); - Value result = rewriter.create( - op.getLoc(), result_type, reduce, divisor, scalar_broadcast_dims); + Operation *result_op = AvgPoolDivideByCount( + reduce.getResult(), input_shape, ksize, strides, op, init, rewriter); // Convert back if we enlarged the element type's bitwidth. + Value result = result_op->getOpResult(0); if (input_element_type != sum_element_type) result = rewriter.create(op.getLoc(), result, input_element_type); @@ -1843,6 +1904,9 @@ class ConvertAvgPoolOp : public OpRewritePattern { } }; +using ConvertAvgPool2DOp = ConvertAvgPoolOp; +using ConvertAvgPool3DOp = ConvertAvgPoolOp; + // `AvgPoolGradOp` is converted to the following operations: // 1. Divide each entry of the output gradient (the gradient for the previous // layer in backpropagation order) by the count of the corresponding window @@ -1916,59 +1980,13 @@ class ConvertAvgPoolGradOp : public OpRewritePattern { auto orig_input_shape_values = orig_input_shape_attr.getValues(); DimVector orig_input_shape(orig_input_shape_values.begin(), orig_input_shape_values.end()); - RankedTensorType orig_input_type = - RankedTensorType::get(orig_input_shape, element_type); DimVector ksize, strides; GetI64ArrayAttrValues(op.ksize(), &ksize); GetI64ArrayAttrValues(op.strides(), &strides); Value zero = GetScalarConstOfType(element_type, loc, 0, &rewriter); - Operation *out_grad_divided = nullptr; - if (op.padding() == "VALID") { - // All window counts are equal here because we don't have padding - // (each entry of `out_grad` corresponds to a window that consists of - // original input entries only). - int64_t window_count = std::accumulate(ksize.begin(), ksize.end(), 1, - std::multiplies()); - // Divide `out_grad` by window counts. - Value divisor = - GetScalarConstOfType(element_type, loc, window_count, &rewriter); - auto scalar_broadcast_dims = GetI64ElementsAttr({}, &rewriter); - out_grad_divided = rewriter.create( - loc, out_grad_type, out_grad, divisor, scalar_broadcast_dims); - } else { - assert(op.padding() == "SAME"); - // For SAME padding, only original entries that contributed to a window - // are counted for the average of this window, not padded entries. - - // Build all-ones tensor of same shape as the original input. - ElementsAttr splat = hlo::getSplat(&rewriter, orig_input_type, 1); - auto all_ones_tensor = rewriter.create(loc, splat); - - // Get the same padding as for the original input. - DenseIntElementsAttr orig_padding_attr = - GetReduceWindowPaddingAsAttr(orig_input_shape, op.ksize(), - op.strides(), op.padding(), - &rewriter); - - // Count the 1's in each window, using the same padding as for the - // original input, which gives us the window counts by which `out_grad` - // needs to be divided. - auto window_counts = rewriter.create( - loc, out_grad_type, - /*operand=*/all_ones_tensor, - /*init_value=*/zero, - /*window_dimensions=*/GetI64ElementsAttr(op.ksize()), - /*window_strides=*/GetI64ElementsAttr(op.strides()), - /*base_dilations=*/DenseIntElementsAttr(), - /*window_dilations=*/DenseIntElementsAttr(), - /*padding=*/orig_padding_attr); - BuildReduceBody(element_type, &window_counts.body(), &rewriter); - - // Divide `out_grad` by window counts. - out_grad_divided = rewriter.create(loc, out_grad_type, - out_grad, window_counts); - } + auto out_grad_divided = AvgPoolDivideByCount( + out_grad, orig_input_shape, ksize, strides, op, zero, rewriter); // Get same padding as for original input. PaddingArray orig_padding = GetReduceWindowPaddingAsArray( @@ -5506,10 +5524,10 @@ LogicalResult legalizeTF(Operation *op, bool allow_partial_conversion, ConvertFusedBatchNormGradV3Op, ConvertFusedBatchNormV2Op, ConvertFusedBatchNormV3Op, ConvertInfeedDequeueTupleOp, ConvertInplaceUpdateOp, ConvertLinSpaceOp, ConvertMaxOp, ConvertMinOp, - ConvertAvgPoolOp, ConvertAvgPool2DGradOp, ConvertAvgPool3DGradOp, - ConvertMaxPool2DOp, ConvertMaxPool3DOp, ConvertMaxPool2DGradOp, - ConvertMaxPool3DGradOp, ConvertMeanOp, ConvertOneHotOp, - ConvertOutfeedEnqueueTupleOp, ConvertProdOp, ConvertQrOp, + ConvertAvgPool2DOp, ConvertAvgPool3DOp, ConvertAvgPool2DGradOp, + ConvertAvgPool3DGradOp, ConvertMaxPool2DOp, ConvertMaxPool3DOp, + ConvertMaxPool2DGradOp, ConvertMaxPool3DGradOp, ConvertMeanOp, + ConvertOneHotOp, ConvertOutfeedEnqueueTupleOp, ConvertProdOp, ConvertQrOp, ConvertDynamicRangeOp, ConvertRangeOp, ConvertSelectV2Op, ConvertSigmoidOp, ConvertShapeOp, ConvertSizeOp, ConvertSoftmaxOp, diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 0b5a6c147dc..1dd61c235a8 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -936,7 +936,7 @@ tf_xla_py_test( srcs = ["pooling_ops_test.py"], enable_mlir_bridge = True, python_version = "PY3", - shard_count = 10, + shard_count = 20, tags = [ "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip ], @@ -956,7 +956,7 @@ tf_xla_py_test( srcs = ["pooling_ops_3d_test.py"], enable_mlir_bridge = True, python_version = "PY3", - shard_count = 10, + shard_count = 20, tags = [ "no_pip", # TODO(b/149738646): fix pip install so these tests run on kokoro pip ], diff --git a/tensorflow/compiler/tests/pooling_ops_3d_test.py b/tensorflow/compiler/tests/pooling_ops_3d_test.py index a833daa39be..9eda74b55a9 100644 --- a/tensorflow/compiler/tests/pooling_ops_3d_test.py +++ b/tensorflow/compiler/tests/pooling_ops_3d_test.py @@ -75,9 +75,6 @@ class Pooling3DTest(xla_test.XLATestCase): actual = vals.flatten() self.assertAllClose(expected, actual) - @test_util.disable_mlir_bridge("TODO(b/159812644): AvgPool TF to HLO lowering" - " doesn't support all paddings and data " - "formats") def testAvgPool3dValidPadding(self): expected_output = [20.5, 21.5, 22.5] self._VerifyValues( @@ -88,9 +85,6 @@ class Pooling3DTest(xla_test.XLATestCase): padding="VALID", expected=expected_output) - @test_util.disable_mlir_bridge("TODO(b/159812644): AvgPool TF to HLO lowering" - " doesn't support all paddings and data " - "formats") def testAvgPool3dSamePadding(self): expected_output = [20.5, 21.5, 22.5, 26.5, 27.5, 28.5] self._VerifyValues( @@ -101,9 +95,6 @@ class Pooling3DTest(xla_test.XLATestCase): padding="SAME", expected=expected_output) - @test_util.disable_mlir_bridge("TODO(b/159812644): AvgPool TF to HLO lowering" - " doesn't support all paddings and data " - "formats") def testAvgPool3dSamePaddingDifferentStrides(self): expected_output = [1.5, 4.5, 7.5, 17.5, 20.5, 23.5, 33.5, 36.5, 39.5] self._VerifyValues( diff --git a/tensorflow/compiler/tests/pooling_ops_test.py b/tensorflow/compiler/tests/pooling_ops_test.py index 293e1010b08..d9393387c0d 100644 --- a/tensorflow/compiler/tests/pooling_ops_test.py +++ b/tensorflow/compiler/tests/pooling_ops_test.py @@ -268,9 +268,6 @@ class PoolingTest(xla_test.XLATestCase): expected=[1, 3, 9, 11]) # Average pooling - @test_util.disable_mlir_bridge("TODO(b/159812644): AvgPool TF to HLO lowering" - " doesn't support all paddings and data " - "formats") def testAvgPoolValidPadding(self): expected_output = [7, 8, 9] self._VerifyValues( @@ -281,9 +278,6 @@ class PoolingTest(xla_test.XLATestCase): padding="VALID", expected=expected_output) - @test_util.disable_mlir_bridge("TODO(b/159812644): AvgPool TF to HLO lowering" - " doesn't support all paddings and data " - "formats") def testAvgPoolSamePadding(self): expected_output = [7., 8., 9., 11.5, 12.5, 13.5] self._VerifyValues( From 5a85ea8b4316984b9c6581d81faa002a34f5b8af Mon Sep 17 00:00:00 2001 From: WindQAQ Date: Wed, 15 Jul 2020 14:08:43 -0700 Subject: [PATCH 0541/2522] Update tests --- tensorflow/python/keras/activations_test.py | 6 +++--- tensorflow/python/ops/nn_test.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/keras/activations_test.py b/tensorflow/python/keras/activations_test.py index 4a6bb7ce271..53edb01b85e 100644 --- a/tensorflow/python/keras/activations_test.py +++ b/tensorflow/python/keras/activations_test.py @@ -171,7 +171,7 @@ class KerasActivationsTest(test.TestCase, parameterized.TestCase): self.assertAllClose(result, expected, rtol=1e-05) def test_gelu(self): - def gelu(x, approximate=True): + def gelu(x, approximate=False): if approximate: return 0.5 * x * (1.0 + np.tanh(np.sqrt(2.0 / np.pi) * (x + 0.044715 * np.power(x, 3)))) @@ -185,10 +185,10 @@ class KerasActivationsTest(test.TestCase, parameterized.TestCase): expected = gelu(test_values) self.assertAllClose(result, expected, rtol=1e-05) - f = backend.function([x], [activations.gelu(x, False)]) + f = backend.function([x], [activations.gelu(x, True)]) test_values = np.random.random((2, 5)) result = f([test_values])[0] - expected = gelu(test_values, False) + expected = gelu(test_values, True) self.assertAllClose(result, expected, rtol=1e-05) def test_elu(self): diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py index 009fa9f804e..5d1ab65aa90 100644 --- a/tensorflow/python/ops/nn_test.py +++ b/tensorflow/python/ops/nn_test.py @@ -1063,7 +1063,7 @@ class GeluTest(test_lib.TestCase): def test(self): - def gelu(x, approximate=True): + def gelu(x, approximate=False): if approximate: return 0.5 * x * (1.0 + np.tanh(np.sqrt(2.0 / np.pi) * (x + 0.044715 * np.power(x, 3)))) @@ -1077,8 +1077,8 @@ class GeluTest(test_lib.TestCase): z = self.evaluate(nn_ops.gelu(x)) self.assertAllClose(y, z) - y = gelu(x, False) - z = self.evaluate(nn_ops.gelu(x, False)) + y = gelu(x, True) + z = self.evaluate(nn_ops.gelu(x, True)) self.assertAllClose(y, z) From 745ee69ccceecdaa114f6557b617001397f0a726 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Wed, 15 Jul 2020 14:02:07 -0700 Subject: [PATCH 0542/2522] Added batch support for DepthwiseConv3x3. PiperOrigin-RevId: 321434005 Change-Id: I1ecc3c500a4a23ac40d662819a7a78ea2bd22bed --- .../gpu/cl/kernels/depthwise_conv_3x3.cc | 15 +++++++++++---- .../gpu/cl/selectors/dw_convolution_selector.cc | 7 +++---- tensorflow/lite/delegates/gpu/cl/tensor_type.cc | 16 +++++++++++++++- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc index 97afea4fcd4..9e58ce78cf0 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv_3x3.cc @@ -49,7 +49,15 @@ std::string GenerateDepthwiseConvCode(const OperationDef& op_def, } c += "__kernel void main_function(\n"; c += "$0) {\n"; - c += " int X = get_global_id(0) * 2;\n"; + if (op_def.dst_tensors[0].HasAxis(Axis::BATCH)) { + c += " int linear_id = get_global_id(0);\n"; + c += " int X = (linear_id / args.dst_tensor.Batch()) * 2;\n"; + c += " int B = linear_id % args.dst_tensor.Batch();\n"; + c += " args.dst_tensor.SetBatchRef(B);\n"; + c += " args.src_tensor.SetBatchRef(B);\n"; + } else { + c += " int X = get_global_id(0) * 2;\n"; + } c += " int Y = get_global_id(1) * 2;\n"; c += " int S = get_global_id(2);\n"; c += " ACCUM_FLT4 r0 = (ACCUM_FLT4)(0.0f);\n"; @@ -224,8 +232,7 @@ std::string GenerateDepthwiseConvCode(const OperationDef& op_def, c += " r3 += TO_ACCUM_TYPE(" + bias + ");\n"; if (local_mem_uploads) { c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() " - "|| " - "S >= args.dst_tensor.Slices()) { \n"; + "|| S >= args.dst_tensor.Slices()) { \n"; c += " return; \n"; c += " } \n"; } @@ -307,7 +314,7 @@ absl::Status DepthwiseConv3x3::BindArguments() { } int3 DepthwiseConv3x3::GetGridSize() const { - const int grid_x = DivideRoundUp(dst_[0]->Width(), 2); + const int grid_x = DivideRoundUp(dst_[0]->Width(), 2) * dst_[0]->Batch(); const int grid_y = DivideRoundUp(dst_[0]->Height(), 2); const int grid_z = dst_[0]->Slices(); return int3(grid_x, grid_y, grid_z); diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc index 9ae87c6ba07..54ff45d182a 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.cc @@ -30,7 +30,7 @@ absl::Status SelectDWConvolutionAdreno( const DepthwiseConvolution2DAttributes& attr, const CreationContext& creation_context, const OperationDef& op_def, std::unique_ptr* ptr) { - if (!op_def.IsBatchSupported() && IsDepthwiseConv3x3Supported(attr)) { + if (IsDepthwiseConv3x3Supported(attr)) { DepthwiseConv3x3 dw_conv; RETURN_IF_ERROR( CreateDepthwiseConv3x3(creation_context, op_def, attr, &dw_conv)); @@ -48,7 +48,7 @@ absl::Status SelectDWConvolutionPowerVR( const DepthwiseConvolution2DAttributes& attr, const CreationContext& creation_context, const OperationDef& op_def, std::unique_ptr* ptr) { - if (!op_def.IsBatchSupported() && IsDepthwiseConv3x3Supported(attr)) { + if (IsDepthwiseConv3x3Supported(attr)) { DepthwiseConv3x3 dw_conv; RETURN_IF_ERROR( CreateDepthwiseConv3x3(creation_context, op_def, attr, &dw_conv)); @@ -71,8 +71,7 @@ absl::Status SelectDWConvolutionMali( storage_type == TensorStorageType::IMAGE_BUFFER; MaliInfo mali_info = creation_context.device->GetInfo().mali_info; if (IsDepthwiseConv3x3Supported(attr) && !mali_info.IsMidgard() && - !buffer_type && !op_def.IsBatchSupported() && - op_def.precision != CalculationsPrecision::F32) { + !buffer_type && op_def.precision != CalculationsPrecision::F32) { DepthwiseConv3x3 dw_conv; RETURN_IF_ERROR( CreateDepthwiseConv3x3(creation_context, op_def, attr, &dw_conv)); diff --git a/tensorflow/lite/delegates/gpu/cl/tensor_type.cc b/tensorflow/lite/delegates/gpu/cl/tensor_type.cc index 9070cadfb85..d8455648907 100644 --- a/tensorflow/lite/delegates/gpu/cl/tensor_type.cc +++ b/tensorflow/lite/delegates/gpu/cl/tensor_type.cc @@ -395,7 +395,21 @@ absl::Status TensorDescriptor::PerformGetWHOffsetSelector( "GetWHOffset require two arguments(X and Y coordinates), but ", args.size(), " was passed")); } - *result = absl::StrCat(args[1], " * ", GetWidth(), " + ", args[0]); + if (HasAxis(Axis::BATCH) && !IsBatchedWidth()) { + auto it = state_vars_.find("batch_id"); + std::string batch_id; + if (it == state_vars_.end()) { + return absl::NotFoundError( + "Not found batch_id. Should be setted up by SetBatchRef(). method"); + } else { + batch_id = it->second; + } + *result = absl::StrCat("((", args[1], ") * ", GetWidth(), " + (", args[0], + ")) * batch + (", batch_id, ")"); + } else { + *result = + absl::StrCat("(", args[1], ") * ", GetWidth(), " + (", args[0], ")"); + } return absl::OkStatus(); } From 7745d79cd6e24892af3a21e8caea0fdbdb008bad Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Wed, 15 Jul 2020 14:02:55 -0700 Subject: [PATCH 0543/2522] Convolution improvements for NVidia. PiperOrigin-RevId: 321434207 Change-Id: I48d8b4adb601b2b94f4365d9b177515de0576e12 --- .../delegates/gpu/cl/kernels/conv_powervr.cc | 20 ++++++++++++++++++- .../gpu/cl/selectors/convolution_selector.cc | 11 ++++++---- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc index 76ae58a0c55..a3ad9a4eb39 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_powervr.cc @@ -714,7 +714,7 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( conv_params.work_group_launch_order = int3(1, 0, 2); conv_params.fixed_work_group_size = true; } - conv_params.block_size = int3(1, 1, 4); + conv_params.block_size = int3(2, 1, 4); conv_params.src_depth_loop_size = 1; conv_params.weights_upload_type = WeightsUploadType::LOCAL_MEM_BY_THREADS; if (dst_depth % 4 == 0 || dst_depth >= 8) { @@ -724,6 +724,24 @@ ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams( } else { conv_params.block_size.z = dst_depth; } + if (dst_shape) { + int task_size = dst_shape->w * dst_shape->b * dst_shape->h * dst_depth; + float task_size_per_cu = + static_cast(task_size) / device.GetInfo().compute_units_count; + int block_size = conv_params.block_size.x * conv_params.block_size.y * + conv_params.block_size.z; + float threads_per_cu = task_size_per_cu / block_size; + float warps_per_cu = threads_per_cu / 32 /*warp_size*/; + if (warps_per_cu < 8.0f) { + conv_params.block_size.x = 1; + } + if (warps_per_cu < 4.0f && conv_params.block_size.z >= 4) { + conv_params.block_size.z /= 2; + } + if (warps_per_cu < 2.0f && conv_params.block_size.z >= 2) { + conv_params.block_size.z /= 2; + } + } if (src_depth % 2 == 0) { conv_params.src_depth_loop_size = 2; } diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc index 3841c415301..3e2531c02b3 100644 --- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc +++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc @@ -73,6 +73,7 @@ absl::Status SelectConvolutionDynamicWeightsAdreno( } absl::Status SelectConvolutionNVidia(const Convolution2DAttributes& attr, + const BHWC& dst_shape, const CreationContext& creation_context, const OperationDef& op_def, std::unique_ptr* ptr) { @@ -82,7 +83,8 @@ absl::Status SelectConvolutionNVidia(const Convolution2DAttributes& attr, *ptr = absl::make_unique(std::move(conv)); } else { ConvPowerVR conv; - RETURN_IF_ERROR(CreateConvPowerVR(creation_context, op_def, attr, &conv)); + RETURN_IF_ERROR( + CreateConvPowerVR(creation_context, op_def, attr, &conv, &dst_shape)); *ptr = absl::make_unique(std::move(conv)); } return absl::OkStatus(); @@ -174,7 +176,8 @@ absl::Status SelectConvolution(const Convolution2DAttributes& attr, case Vendor::AMD: return SelectConvolutionPowerVR(attr, creation_context, op_def, ptr); case Vendor::NVIDIA: - return SelectConvolutionNVidia(attr, creation_context, op_def, ptr); + return SelectConvolutionNVidia(attr, dst_shape, creation_context, op_def, + ptr); case Vendor::MALI: return SelectConvolutionMali(attr, dst_shape, creation_context, op_def, ptr); @@ -197,8 +200,8 @@ absl::Status SelectConvolutionForWinograd( case Vendor::INTEL: case Vendor::NVIDIA: { ConvPowerVR conv; - RETURN_IF_ERROR( - CreateConvPowerVRWino4x4To6x6(creation_context, op_def, attr, &conv)); + RETURN_IF_ERROR(CreateConvPowerVRWino4x4To6x6(creation_context, op_def, + attr, &conv, &dst_shape)); *ptr = absl::make_unique(std::move(conv)); return absl::OkStatus(); } From b9a65d13537196e2d04af9c00b1cde56efab736e Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Wed, 15 Jul 2020 21:22:18 +0000 Subject: [PATCH 0544/2522] fixed style errors: --- tensorflow/c/kernels/summary_op.cc | 7 +++---- tensorflow/c/kernels/tensor_shape_utils.cc | 5 ++--- tensorflow/c/kernels/tensor_shape_utils.h | 4 ++-- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index 7500c3046e1..9ebda7188cb 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -56,6 +56,8 @@ static void ScalarSummaryOp_Delete(void* kernel) { // Helper functions for compute method bool IsSameSize(TF_Tensor* tensor1, TF_Tensor* tensor2); +// Returns a string representation of a single tag or empty string if there +// are multiple tags static tensorflow::string SingleTag(TF_Tensor* tags); template @@ -72,12 +74,9 @@ static void ScalarSummaryOp_Compute(void* kernel, TF_OpKernelContext* ctx) { << tensorflow::ShapeDebugString(params.values) << SingleTag(params.tags); TF_SetStatus(params.status, TF_INVALID_ARGUMENT, err.str().c_str()); - } - if (TF_GetCode(params.status) != TF_OK){ TF_OpKernelContext_Failure(ctx, params.status); return; } - // Convert tags and values tensor to array to access elements by index tensorflow::Summary s; auto tags_array = static_cast( @@ -148,7 +147,7 @@ void RegisterScalarSummaryOpKernel() { } // A dummy static variable initialized by a lambda whose side-effect is to -// register the bitcast kernel. +// register the ScalarSummary kernel. TF_ATTRIBUTE_UNUSED static bool IsScalarSummaryOpKernelRegistered = []() { if (SHOULD_REGISTER_OP_KERNEL("ScalarSummary")) { RegisterScalarSummaryOpKernel(); diff --git a/tensorflow/c/kernels/tensor_shape_utils.cc b/tensorflow/c/kernels/tensor_shape_utils.cc index b3cba8cb99f..6ca138584b7 100644 --- a/tensorflow/c/kernels/tensor_shape_utils.cc +++ b/tensorflow/c/kernels/tensor_shape_utils.cc @@ -18,20 +18,19 @@ limitations under the License. #include #include "tensorflow/c/tf_tensor.h" -#include "tensorflow/core/platform/str_util.h" #include "tensorflow/core/platform/strcat.h" #include "tensorflow/core/platform/logging.h" namespace tensorflow { std::string ShapeDebugString(TF_Tensor* tensor) { - // A TF_Tensor cannot have an unknown rank + // A TF_Tensor cannot have an unknown rank. CHECK_GE(TF_NumDims(tensor), 0); tensorflow::string s = "["; for (int i = 0; i < TF_NumDims(tensor); ++i) { if (i > 0) tensorflow::strings::StrAppend(&s, ","); int64_t dim = TF_Dim(tensor, i); - // A TF_Tensor cannot have an unknown dimension + // A TF_Tensor cannot have an unknown dimension. CHECK_GE(dim, 0); tensorflow::strings::StrAppend(&s, dim); } diff --git a/tensorflow/c/kernels/tensor_shape_utils.h b/tensorflow/c/kernels/tensor_shape_utils.h index a62f460998b..7b48a8939ae 100644 --- a/tensorflow/c/kernels/tensor_shape_utils.h +++ b/tensorflow/c/kernels/tensor_shape_utils.h @@ -27,9 +27,9 @@ namespace tensorflow { // The following are utils for the shape of a TF_Tensor type. // These functions may later be subsumed by the methods for a -// TF_TensorShape type +// TF_TensorShape type. -// Returns a string representation of the TF_Tensor +// Returns a string representation of the TF_Tensor shape. std::string ShapeDebugString(TF_Tensor* tensor); } // namespace tensorflow From 23ebf04dbf7d11f7989bd85085844a9ffecd3c2f Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Wed, 15 Jul 2020 21:26:21 +0000 Subject: [PATCH 0545/2522] updated paths to upstream master for cleaner merge --- tensorflow/c/kernels.cc | 25 ------ tensorflow/c/kernels.h | 11 --- tensorflow/c/kernels_test.cc | 166 ++++++++--------------------------- 3 files changed, 36 insertions(+), 166 deletions(-) diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc index 7aef824cc69..8fa50711a8d 100644 --- a/tensorflow/c/kernels.cc +++ b/tensorflow/c/kernels.cc @@ -25,7 +25,6 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/platform/types.h" -#include "tensorflow/core/framework/tensor_shape.h" // This file forms the basis of a stable ABI for third-party kernel // implementations. It is crucial that changes to this file are made cautiously @@ -273,27 +272,3 @@ TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int index, } return tf_tensor; } - -TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, TF_DataType dtype, - int64_t* dims, int num_dims, TF_Status* status){ - auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelContext*>(context); - TF_SetStatus(status, TF_OK, ""); - tensorflow::TensorShape shape; - for(int i = 0; i < num_dims; ++i){ - shape.AddDim(dims[i]); - } - tensorflow::Status s; - tensorflow::Tensor tensor; - TF_Tensor* tf_tensor; - s = cc_ctx->allocate_temp(static_cast(dtype), shape, &tensor); - if (!s.ok()){ - ::tensorflow::Set_TF_Status_from_Status(status, s); - return nullptr; - } - tf_tensor = TF_TensorFromTensor(tensor, &s); - if (!s.ok()){ - ::tensorflow::Set_TF_Status_from_Status(status, s); - return nullptr; - } - return tf_tensor; -} diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h index e1397402dae..1428f7ab928 100644 --- a/tensorflow/c/kernels.h +++ b/tensorflow/c/kernels.h @@ -194,17 +194,6 @@ TF_CAPI_EXPORT TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int64_t* dims, int num_dims, size_t len, TF_Status* status); -// Allocates a temporary Tensor of the specified type and shape. Devices -// such as GPUs that enqueue Ops for lazy execution may retain references -// to the temporary tensors after the Op's Compute method has run. - -// num_dims must equal the size of array dims -TF_CAPI_EXPORT extern TF_Tensor* TF_AllocateTemp(TF_OpKernelContext* context, - TF_DataType dtype, - int64_t* dims, int num_dims, - TF_Status* status); - - #ifdef __cplusplus } /* end extern "C" */ #endif diff --git a/tensorflow/c/kernels_test.cc b/tensorflow/c/kernels_test.cc index 738c1e12c80..423302741de 100644 --- a/tensorflow/c/kernels_test.cc +++ b/tensorflow/c/kernels_test.cc @@ -360,17 +360,6 @@ class DeviceKernelOpTest : public OpsTestBase { #endif }; -// Helper function for tests that validates that the tensor has -// shape and type corresponding to dims and dtype. -void validate_tensor(TF_Tensor* tensor, int64_t* dims, int64_t num_dims, - TF_DataType dtype); - -// Helper function for tests that copies data of length -// tensor_size_bytes from values to tensor -template -void set_tensor_data(TF_Tensor* tensor, T* values, size_t tensor_size_bytes, - TF_OpKernelContext* ctx); - REGISTER_OP("AllocateOutputOp1").Output("output1: float"); TEST_F(DeviceKernelOpTest, TestAllocateOutputSizeOne) { @@ -382,11 +371,22 @@ TEST_F(DeviceKernelOpTest, TestAllocateOutputSizeOne) { TF_Tensor* output = TF_AllocateOutput( /*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/&dim, /*num_dims=*/1, /*len=*/tensor_size_bytes, s); - validate_tensor(output, &dim, 1, TF_FLOAT); - + EXPECT_EQ(TF_OK, TF_GetCode(s)); + EXPECT_EQ(TF_FLOAT, TF_TensorType(output)); + EXPECT_EQ(1, TF_NumDims(output)); + EXPECT_EQ(1, TF_Dim(output, 0)); + // Set output to 3 - float values[1] = {3.0f}; - set_tensor_data(output, values, tensor_size_bytes, ctx); + float* data = reinterpret_cast(TF_TensorData(output)); + float value = 3.0f; +#if GOOGLE_CUDA + OpKernelContext* cc_ctx = reinterpret_cast(ctx); + cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, &value, + tensor_size_bytes); +#else + *data = value; +#endif + TF_DeleteStatus(s); TF_DeleteTensor(output); }; @@ -409,8 +409,12 @@ TEST_F(DeviceKernelOpTest, TestAllocateEmptyOutput) { TF_Tensor* output = TF_AllocateOutput( /*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/&dim, /*num_dims=*/1, /*len=*/0, s); + EXPECT_EQ(TF_OK, TF_GetCode(s)); - validate_tensor(output, &dim, 1, TF_FLOAT); + EXPECT_EQ(TF_FLOAT, TF_TensorType(output)); + EXPECT_EQ(1, TF_NumDims(output)); + EXPECT_EQ(0, TF_Dim(output, 0)); + TF_DeleteStatus(s); TF_DeleteTensor(output); }; @@ -430,16 +434,27 @@ TEST_F(DeviceKernelOpTest, TestAllocateOutputSize2x3) { TF_Status* s = TF_NewStatus(); // Allocate 2x3 output int64_t dim[2] = {2, 3}; - size_t tensor_size_bytes = TF_DataTypeSize(TF_FLOAT) * 6; + size_t tensor_size_bytes = 6 * TF_DataTypeSize(TF_FLOAT); TF_Tensor* output = TF_AllocateOutput( /*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/dim, /*num_dims=*/2, /*len=*/tensor_size_bytes, s); EXPECT_EQ(TF_OK, TF_GetCode(s)); - validate_tensor(output, dim, 2, TF_FLOAT); + EXPECT_EQ(TF_FLOAT, TF_TensorType(output)); + EXPECT_EQ(2, TF_NumDims(output)); + EXPECT_EQ(2, TF_Dim(output, 0)); + EXPECT_EQ(3, TF_Dim(output, 1)); // Set output to [1 2 3 4 5 6] - float values[6] = {1, 2, 3, 4, 5, 6}; - set_tensor_data(output, values, tensor_size_bytes, ctx); + void* data = TF_TensorData(output); + float value[6] = {1, 2, 3, 4, 5, 6}; +#if GOOGLE_CUDA + OpKernelContext* cc_ctx = reinterpret_cast(ctx); + cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, value, + tensor_size_bytes); +#else + memcpy(data, value, tensor_size_bytes); +#endif + TF_DeleteStatus(s); TF_DeleteTensor(output); }; @@ -451,113 +466,4 @@ TEST_F(DeviceKernelOpTest, TestAllocateOutputSize2x3) { EXPECT_EQ("Tensor", output->DebugString(100)); } - -REGISTER_OP("AllocateTempOp1").Output("output1: float"); - -TEST_F(DeviceKernelOpTest, TestAllocateTempSizeOne) { - auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) { - // Allocate output - TF_Status* s = TF_NewStatus(); - int64_t dim = 1; - TF_Tensor* output = TF_AllocateTemp( - /*context=*/ctx, /*dtype=*/TF_FLOAT, /*dims=*/&dim, - /*num_dims=*/1, s); - size_t tensor_size_bytes = TF_DataTypeSize(TF_FLOAT); - EXPECT_EQ(TF_OK, TF_GetCode(s)); - validate_tensor(output, &dim, 1, TF_FLOAT); - - // Set output to 3 - float values[1] = {3.0f}; - set_tensor_data(output, values, tensor_size_bytes, ctx); - TF_SetOutput(ctx, 0, output, s); - TF_DeleteStatus(s); - TF_DeleteTensor(output); - }; - - SetupOp("AllocateTempOp1", "AllocateTemp1", my_compute_func); - - TF_ASSERT_OK(RunOpKernel()); - Tensor* output = GetOutput(0); - EXPECT_EQ("Tensor", - output->DebugString(100)); -} - -REGISTER_OP("AllocateTempOp0").Output("output1: float"); - -TEST_F(DeviceKernelOpTest, TestAllocateTempEmpty) { - auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) { - TF_Status* s = TF_NewStatus(); - // Allocate empty output - int64_t dim = 0; - TF_Tensor* output = TF_AllocateTemp( - /*context=*/ctx, /*dtype=*/TF_FLOAT, /*dims=*/&dim, - /*num_dims=*/1, s); - EXPECT_EQ(TF_OK, TF_GetCode(s)); - validate_tensor(output, &dim, 1, TF_FLOAT); - TF_SetOutput(ctx, 0, output, s); - TF_DeleteStatus(s); - TF_DeleteTensor(output); - }; - - SetupOp("AllocateTempOp0", "AllocateTemp0", my_compute_func); - - TF_ASSERT_OK(RunOpKernel()); - Tensor* output = GetOutput(0); - EXPECT_EQ("Tensor", - output->DebugString(100)); -} - -REGISTER_OP("AllocateTempOp2x3").Output("output1: float"); - -TEST_F(DeviceKernelOpTest, TestAllocateTempSize2x3) { - auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) { - TF_Status* s = TF_NewStatus(); - size_t tensor_size_bytes = 6 * TF_DataTypeSize(TF_FLOAT); - // Allocate 2x3 output - int64_t dim[2] = {2, 3}; - TF_Tensor* output = TF_AllocateTemp( - /*context=*/ctx, /*dtype=*/TF_FLOAT, /*dims=*/dim, - /*num_dims=*/2, s); - EXPECT_EQ(TF_OK, TF_GetCode(s)); - validate_tensor(output, dim, 2, TF_FLOAT); - - // Set output to [1 2 3 4 5 6] - void* data = TF_TensorData(output); - float values[6] = {1, 2, 3, 4, 5, 6}; - set_tensor_data(output, values, tensor_size_bytes, ctx); - TF_SetOutput(ctx, 0, output, s); - TF_DeleteStatus(s); - TF_DeleteTensor(output); - }; - - SetupOp("AllocateTempOp2x3", "AllocateTempOp2x3", my_compute_func); - - TF_ASSERT_OK(RunOpKernel()); - Tensor* output = GetOutput(0); - EXPECT_EQ("Tensor", - output->DebugString(100)); -} - -void validate_tensor(TF_Tensor* tensor, int64_t* dims, int64_t num_dims, - TF_DataType dtype){ - EXPECT_EQ(TF_FLOAT, TF_TensorType(tensor)); - EXPECT_EQ(num_dims, TF_NumDims(tensor)); - for(int i = 0; i < num_dims; ++i){ - EXPECT_EQ(dims[i], TF_Dim(tensor, i)); - } -} - -template -void set_tensor_data(TF_Tensor* tensor, T* values, size_t tensor_size_bytes, - TF_OpKernelContext* ctx){ - T* data = reinterpret_cast(TF_TensorData(tensor)); -#if GOOGLE_CUDA - OpKernelContext* cc_ctx = reinterpret_cast(ctx); - cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, values, - tensor_size_bytes); -#else - memcpy(data, values, tensor_size_bytes); -#endif -} - -} // namespace tensorflow \ No newline at end of file +} // namespace tensorflow From 0a92de8932eceeca244ecb9658d65865f8301fbe Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 15 Jul 2020 14:26:56 -0700 Subject: [PATCH 0546/2522] Prune unneeded visibility rules PiperOrigin-RevId: 321439117 Change-Id: I9249eef99b74f20ab239f2a6c0babe8b01b1b47c --- tensorflow/core/kernels/BUILD | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index b4129e05f91..eb1e658a744 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -70,12 +70,8 @@ package( package_group( name = "friends", packages = [ - "//learning/brain/contrib/...", - "//learning/brain/research/...", - "//learning/faster_training/...", "//tensorflow/...", "//tensorflow_text/...", - "//third_party/car/...", ], ) From 116df300f9f544a703d3ca32afc45a9c3caf0179 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Wed, 15 Jul 2020 14:33:54 -0700 Subject: [PATCH 0547/2522] Adding filesystem_interface methods for transactions --- .../filesystem/filesystem_interface.h | 133 ++++++++++++++++++ 1 file changed, 133 insertions(+) diff --git a/tensorflow/c/experimental/filesystem/filesystem_interface.h b/tensorflow/c/experimental/filesystem/filesystem_interface.h index 5463eb35088..1e62a948c6d 100644 --- a/tensorflow/c/experimental/filesystem/filesystem_interface.h +++ b/tensorflow/c/experimental/filesystem/filesystem_interface.h @@ -78,6 +78,11 @@ typedef struct TF_Filesystem { void* plugin_filesystem; } TF_Filesystem; +typedef struct TF_TransactionToken{ + void* token; + TF_Filesystem* owner; +} TF_TransactionToken; + /// SECTION 2. Function tables for functionality provided by plugins /// ---------------------------------------------------------------------------- /// @@ -679,6 +684,134 @@ typedef struct TF_FilesystemOps { /// /// DEFAULT IMPLEMENTATION: No op. void (*flush_caches)(const TF_Filesystem* filesystem); + + /// Starts a new transaction. + /// + /// An opaque transaction token is returned in `token`. Ownership of the token is + /// in filesystem. Token will be freed in `end_transaction` call and any + /// access to token after that is invalid. + /// + /// In case of error, plugins must set `status` to a value different than + /// `TF_OK`, free memory allocated for `token` and return -1. + /// + /// The allocation and freeing of memory must happen via the functions sent to + /// core TensorFlow upon registration (see the `TF_FilesystemPluginInfo` + /// structure in Section 4). + /// + /// Plugins: + /// * Must set `status` to `TF_OK` if transaction successfuly started. + /// * Must set `status` to `TF_FAILED_PRECONDITION` if multiple transactions + /// are not supported + /// * Might use any other error value for `status` to signal other errors. + int (*start_transaction)(const TF_Filesystem* filesystem, + TF_TransactionToken** token, TF_Status* status); + + /// Ends transaction and free the `token`. Any access to token after + /// that will be invalid. + /// + /// In case of error, plugins must set `status` to a value different than + /// `TF_OK`, free memory allocated for `token` and return -1. + /// + /// The allocation and freeing of memory must happen via the functions sent to + /// core TensorFlow upon registration (see the `TF_FilesystemPluginInfo` + /// structure in Section 4). + /// + /// Plugins: + /// * Must set `status` to `TF_OK` if transaction successfuly finalized. + /// * Must set `status` to `TF_NOT_FOUND` if token is invalid/not found + /// * Might use any other error value for `status` to signal other errors. + + int (*end_transaction)(const TF_Filesystem* filesystem, + TF_TransactionToken* token, TF_Status* status); + + /// Adds file/directory in the `path` to transaction in `token`. It is a valid + /// operation to add a path that doesn't exist yet to a transaction. + /// + /// In case of error, plugins must set `status` to a value different than + /// `TF_OK`, free memory allocated for `token` and return -1. + /// + /// The allocation and freeing of memory must happen via the functions sent to + /// core TensorFlow upon registration (see the `TF_FilesystemPluginInfo` + /// structure in Section 4). + /// + /// Plugins: + /// * Must set `status` to `TF_OK` if path added to transaction successful. + /// * Must set `status` to `TF_NOT_FOUND` if `token` is invalid. + /// * Must set `status` to `TF_FAILED_PRECONDITION` if file/directory is in + /// another transaction and multiple transactions are not supported + /// * Might use any other error value for `status` to signal other errors. + int (*add_to_transaction)(const TF_Filesystem* filesystem, const char* path, + TF_TransactionToken* token, TF_Status* status); + + /// Returns transaction token for file/directory in the `path`. Note that path + /// may not exist yet but still might be part of a transaction. + /// + /// Transaction token is returned in `token`. Ownership of the token is in + /// filesystem. Token will be freed in `end_transaction` call and any access + /// to token after that is invalid. + /// + /// In case of error, plugins must set `status` to a value different than + /// `TF_OK`, free memory allocated for `token` and return -1. + /// + /// The allocation and freeing of memory must happen via the functions sent to + /// core TensorFlow upon registration (see the `TF_FilesystemPluginInfo` + /// structure in Section 4). + /// + /// Plugins: + /// * Must set `status` to `TF_OK` if a transaction for path is found + /// * Must set `status` to `TF_NOT_FOUND` if `path` is not part of any + /// transaction + /// * Must set `status` to `TF_FAILED_PRECONDITION` if `path` is + /// not in this filesystem. + /// * Might use any other error value for `status` to signal other errors. + int (*get_transaction_for_path)(const TF_Filesystem* filesystem, + const char* path, TF_TransactionToken** token, + TF_Status* status); + + /// Returns transaction token for `path` if it is part of a transaction else + /// starts a new transaction and adds `path` to that transaction + /// + /// Transaction token is returned in `token`. Ownership of the token is in + /// filesystem. Token will be freed in `end_transaction` call and any access + /// to token after that is invalid. + /// + /// In case of error, plugins must set `status` to a value different than + /// `TF_OK`, free memory allocated for `token` and return -1. + /// + /// The allocation and freeing of memory must happen via the functions sent to + /// core TensorFlow upon registration (see the `TF_FilesystemPluginInfo` + /// structure in Section 4). + /// + /// Plugins: + /// * Must set `status` to `TF_OK` if transaction found or successfuly + /// started. + /// * Must set `status` to `TF_NOT_FOUND` if `path` doesn't point to this + /// filesystem + /// * Must set `status` to `TF_FAILED_PRECONDITION` if file/directory is + /// not in any transaction and multiple transactions are not supported. + /// * Might use any other error value for `status` to signal other errors. + int (*get_or_start_transaction_for_path)(const TF_Filesystem* filesystem, + const char* path, + TF_TransactionToken** token, + TF_Status* status); + + /// Decodes transaction token in `token` to human readable format for + /// debugging. + /// + /// A new `char*` buffer must be allocated by this method. Core TensorFlow + /// manages the lifetime of the buffer after the call. Thus, all callers of + /// this method must take ownership of the returned pointer. + /// + /// Plugins must not return `nullptr`. Returning empty strings is allowed. + /// + /// The allocation and freeing of memory must happen via the functions sent to + /// core TensorFlow upon registration (see the `TF_FilesystemPluginInfo` + /// structure in Section 4). + /// + /// DEFAULT IMPLEMENTATION: Dump token and owner address. + char* (*decode_transaction_token)(const TF_Filesystem* filesystem, + const TF_TransactionToken* token); + } TF_FilesystemOps; // LINT.ThenChange(:filesystem_ops_version) From 3dc50547109fb2aa183b8c7de42111c1188f53b7 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Wed, 15 Jul 2020 14:40:18 -0700 Subject: [PATCH 0548/2522] Test nightly_release CPU config. PiperOrigin-RevId: 321441962 Change-Id: Ib76ba9a064ffeb8168975c7ff09d7fe9baf922d8 --- .../release/ubuntu_16/cpu_py36_full/nightly_release.sh | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nightly_release.sh index 2b770867099..bd686959209 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nightly_release.sh @@ -27,18 +27,11 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. -export TF_NEED_GCP=1 -export TF_NEED_HDFS=1 -export TF_NEED_S3=1 -export TF_NEED_CUDA=0 -export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.6) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=opt --config=v2 \ - --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ - tensorflow/tools/pip_package:build_pip_package +bazel build --config=release_cpu_linux tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag From 2f48b664cdd3956d3454a5be9e5d4a7c7654e428 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Jul 2020 14:42:03 -0700 Subject: [PATCH 0549/2522] Remove C++17-only if-constexpr. Both paths will compile and the optimizer will remove the invalid path. PiperOrigin-RevId: 321442313 Change-Id: I3f2544a2f6598e5dc1f4f7b62458ad907e861d75 --- tensorflow/core/profiler/utils/xplane_builder.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/profiler/utils/xplane_builder.h b/tensorflow/core/profiler/utils/xplane_builder.h index 01d38f0aa11..10b3727876f 100644 --- a/tensorflow/core/profiler/utils/xplane_builder.h +++ b/tensorflow/core/profiler/utils/xplane_builder.h @@ -45,8 +45,8 @@ class XStatsBuilder { AddStat(metadata)->set_uint64_value(value); } void AddStatValue(const XStatMetadata& metadata, - unsigned long value) { // NOLINT - if constexpr (sizeof(unsigned long) == 8) { // NOLINT + unsigned long value) { // NOLINT + if (sizeof(unsigned long) == 8) { // NOLINT AddStat(metadata)->set_uint64_value(value); } else { AddStat(metadata)->set_uint32_value(value); @@ -60,7 +60,7 @@ class XStatsBuilder { AddStat(metadata)->set_int64_value(value); } void AddStatValue(const XStatMetadata& metadata, long value) { // NOLINT - if constexpr (sizeof(long) == 8) { // NOLINT + if (sizeof(long) == 8) { // NOLINT AddStat(metadata)->set_int64_value(value); } else { AddStat(metadata)->set_int32_value(value); From 3be6cd533ba59ad8edcfd35af4d6fadae01891f6 Mon Sep 17 00:00:00 2001 From: Gaurav Jain Date: Wed, 15 Jul 2020 15:05:08 -0700 Subject: [PATCH 0550/2522] Handle int64 paddings in grappler optimization PiperOrigin-RevId: 321447207 Change-Id: Ie17be34ccc959471d668454fbd21a5bc1132e4ca --- .../grappler/optimizers/constant_folding.cc | 15 ++- .../optimizers/constant_folding_test.cc | 105 ++++++++++-------- 2 files changed, 66 insertions(+), 54 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index bcb8ad37d6c..ce4e101e419 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -2331,11 +2331,16 @@ Status ConstantFolding::SimplifyPad(const GraphProperties& properties, if (GetTensorFromConstNode(node->input(1), &paddings)) { // The node is replaceable iff all values in paddings are 0. bool replaceable = true; - // The operation requires it to be int32 value so we don't check for - // 1nt64. - const auto flatten = paddings.flat(); - for (int j = 0; replaceable && j < flatten.size(); ++j) { - replaceable &= flatten(j) == 0; + if (paddings.dtype() == DT_INT32) { + const auto flatten = paddings.flat(); + for (int j = 0; replaceable && j < flatten.size(); ++j) { + replaceable &= flatten(j) == 0; + } + } else { + const auto flatten = paddings.flat(); + for (int j = 0; replaceable && j < flatten.size(); ++j) { + replaceable &= flatten(j) == 0; + } } if (replaceable) { ReplaceOperationWithIdentity(0, properties, node, optimized_graph); diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 87cf18548b6..59e236d2454 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -220,6 +220,60 @@ class ConstantFoldingTest : public GrapplerTest { auto expected = EvaluateNodes(item.graph, fetch, {{"x", value}}); test::ExpectTensorEqual(expected[0], actual[0]); } + + template + void PaddingWithZeroSize() { + tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); + + auto in1 = ops::Variable(scope.WithOpName("in1"), {4, 6}, DT_INT32); + auto in2 = ops::Variable(scope.WithOpName("in2"), {2, 2}, DT_INT32); + auto paddings1 = + ops::Const(scope.WithOpName("paddings1"), {0, 0, 0, 0}, {2, 2}); + auto paddings2 = + ops::Const(scope.WithOpName("paddings2"), {1, 1, 2, 2}, {2, 2}); + auto c1 = ops::Const(scope.WithOpName("c1"), 1); + auto c2 = ops::Const(scope.WithOpName("c2"), 1); + + ops::PadV2 p1(scope.WithOpName("p1"), in1, paddings1, c1); + ops::PadV2 p2(scope.WithOpName("p2"), in2, paddings2, c2); + + ops::Add out(scope.WithOpName("out"), p1, p2); + + GrapplerItem item; + item.fetch = {"out"}; + TF_CHECK_OK(scope.ToGraphDef(&item.graph)); + + ConstantFolding optimizer(/*cpu_device=*/nullptr); + GraphDef got; + Status status = optimizer.Optimize(/*cluster=*/nullptr, item, &got); + TF_EXPECT_OK(status); + + GraphDef want; + AddNode("in1", "VariableV2", {}, {}, &want); + AddNode("in2", "VariableV2", {}, {}, &want); + AddNode("paddings1", "Const", {}, {}, &want); + AddNode("paddings2", "Const", {}, {}, &want); + AddNode("c1", "Const", {}, {}, &want); + AddNode("c2", "Const", {}, {}, &want); + AddNode( + "p1", "Identity", + {"in1", AsControlDependency("paddings1"), AsControlDependency("c1")}, + {}, &want); + AddNode("p2", "PadV2", {"in2", "paddings2", "c2"}, {}, &want); + AddNode("out", "Add", {"p1", "p2"}, {}, &want); + + CompareGraphs(want, got); + + auto in1_t = GenerateRandomTensor(TensorShape({4, 6})); + auto in2_t = GenerateRandomTensor(TensorShape({2, 2})); + auto tensors_expected = + EvaluateNodes(item.graph, item.fetch, {{"in1", in1_t}, {"in2", in2_t}}); + EXPECT_EQ(1, tensors_expected.size()); + auto tensors = + EvaluateNodes(got, item.fetch, {{"in1", in1_t}, {"in2", in2_t}}); + EXPECT_EQ(1, tensors.size()); + test::ExpectTensorEqual(tensors_expected[0], tensors[0]); + } }; TEST_F(ConstantFoldingTest, SimpleFolding) { @@ -2617,55 +2671,8 @@ TEST_F(ConstantFoldingTest, MergeConcat_PartialFolding) { } TEST_F(ConstantFoldingTest, PaddingWithZeroSize) { - tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); - - auto in1 = ops::Variable(scope.WithOpName("in1"), {4, 6}, DT_INT32); - auto in2 = ops::Variable(scope.WithOpName("in2"), {2, 2}, DT_INT32); - auto paddings1 = - ops::Const(scope.WithOpName("paddings1"), {0, 0, 0, 0}, {2, 2}); - auto paddings2 = - ops::Const(scope.WithOpName("paddings2"), {1, 1, 2, 2}, {2, 2}); - auto c1 = ops::Const(scope.WithOpName("c1"), 1); - auto c2 = ops::Const(scope.WithOpName("c2"), 1); - - ops::PadV2 p1(scope.WithOpName("p1"), in1, paddings1, c1); - ops::PadV2 p2(scope.WithOpName("p2"), in2, paddings2, c2); - - ops::Add out(scope.WithOpName("out"), p1, p2); - - GrapplerItem item; - item.fetch = {"out"}; - TF_CHECK_OK(scope.ToGraphDef(&item.graph)); - - ConstantFolding optimizer(/*cpu_device=*/nullptr); - GraphDef got; - Status status = optimizer.Optimize(/*cluster=*/nullptr, item, &got); - TF_EXPECT_OK(status); - - GraphDef want; - AddNode("in1", "VariableV2", {}, {}, &want); - AddNode("in2", "VariableV2", {}, {}, &want); - AddNode("paddings1", "Const", {}, {}, &want); - AddNode("paddings2", "Const", {}, {}, &want); - AddNode("c1", "Const", {}, {}, &want); - AddNode("c2", "Const", {}, {}, &want); - AddNode("p1", "Identity", - {"in1", AsControlDependency("paddings1"), AsControlDependency("c1")}, - {}, &want); - AddNode("p2", "PadV2", {"in2", "paddings2", "c2"}, {}, &want); - AddNode("out", "Add", {"p1", "p2"}, {}, &want); - - CompareGraphs(want, got); - - auto in1_t = GenerateRandomTensor(TensorShape({4, 6})); - auto in2_t = GenerateRandomTensor(TensorShape({2, 2})); - auto tensors_expected = - EvaluateNodes(item.graph, item.fetch, {{"in1", in1_t}, {"in2", in2_t}}); - EXPECT_EQ(1, tensors_expected.size()); - auto tensors = - EvaluateNodes(got, item.fetch, {{"in1", in1_t}, {"in2", in2_t}}); - EXPECT_EQ(1, tensors.size()); - test::ExpectTensorEqual(tensors_expected[0], tensors[0]); + PaddingWithZeroSize(); + PaddingWithZeroSize(); } TEST_F(ConstantFoldingTest, SqueezeWithAllDimensionsGreaterThanOne) { From c3330f33d1e4ecaefd4f7b0a53d6018b5246665b Mon Sep 17 00:00:00 2001 From: Wenhao Jia Date: Wed, 15 Jul 2020 15:10:13 -0700 Subject: [PATCH 0551/2522] Add a TpuExecutable class. PiperOrigin-RevId: 321448211 Change-Id: Ib24a519b70c166a0546a923d15784f4e051162ac --- tensorflow/core/tpu/tpu_library_init_fns.inc | 3 + tensorflow/stream_executor/tpu/BUILD | 32 ++ .../stream_executor/tpu/tpu_executable.cc | 298 ++++++++++++++++++ .../stream_executor/tpu/tpu_executable.h | 99 ++++++ .../stream_executor/tpu/tpu_executor_c_api.h | 8 +- 5 files changed, 439 insertions(+), 1 deletion(-) create mode 100644 tensorflow/stream_executor/tpu/tpu_executable.cc create mode 100644 tensorflow/stream_executor/tpu/tpu_executable.h diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc index 3f084241df3..4b0cbada649 100644 --- a/tensorflow/core/tpu/tpu_library_init_fns.inc +++ b/tensorflow/core/tpu/tpu_library_init_fns.inc @@ -144,6 +144,9 @@ tensorflow::Status SetExecutorStructFn(void* library_handle) { TFTPU_SET_FN(executor_fn, TpuTransferManager_GetByteSizeRequirement); TFTPU_SET_FN(executor_fn, TpuTransferManager_WriteSingleTupleIndexTable); + TFTPU_SET_FN(executor_fn, HardwareLayout_HostShapeToDeviceShape); + TFTPU_SET_FN(executor_fn, HardwareLayout_ShapeSize); + TFTPU_SET_FN(executor_fn, TpuComputationPlacer_New); TFTPU_SET_FN(executor_fn, TpuComputationPlacer_Free); diff --git a/tensorflow/stream_executor/tpu/BUILD b/tensorflow/stream_executor/tpu/BUILD index 41b9d35bed7..111a39c84bb 100644 --- a/tensorflow/stream_executor/tpu/BUILD +++ b/tensorflow/stream_executor/tpu/BUILD @@ -200,6 +200,38 @@ cc_library( alwayslink = True, ) +cc_library( + name = "tpu_executable", + srcs = ["tpu_executable.cc"], + hdrs = ["tpu_executable.h"], + deps = [ + ":c_api_conversions", + ":proto_helper", + ":status_helper", + ":tpu_executor_base", + ":tpu_executor_c_api_hdrs", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto_cc", + "//tensorflow/compiler/xla/service:executable", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_execution_profile", + "//tensorflow/compiler/xla/service:transfer_manager", + "//tensorflow/core/tpu:tpu_api", + "//tensorflow/core/tpu/kernels:tpu_compile_c_api_hdrs", + "//tensorflow/core/tpu/kernels:tpu_execute_c_api_hdrs", + "//tensorflow/core/tpu/kernels:tpu_program_c_api_hdrs", + "//tensorflow/stream_executor", + "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/types:span", + ], +) + cc_library( name = "tpu_platform_interface", srcs = ["tpu_platform_interface.cc"], diff --git a/tensorflow/stream_executor/tpu/tpu_executable.cc b/tensorflow/stream_executor/tpu/tpu_executable.cc new file mode 100644 index 00000000000..b8656bc97fd --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_executable.cc @@ -0,0 +1,298 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/stream_executor/tpu/tpu_executable.h" + +#include +#include +#include +#include + +#include "absl/algorithm/container.h" +#include "tensorflow/compiler/xla/service/transfer_manager.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_execute_c_api.h" +#include "tensorflow/core/tpu/tpu_api.h" +#include "tensorflow/stream_executor/tpu/c_api_conversions.h" +#include "tensorflow/stream_executor/tpu/proto_helper.h" +#include "tensorflow/stream_executor/tpu/status_helper.h" +#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" +#include "tensorflow/stream_executor/tpu/tpu_platform.h" + +namespace xla { +namespace tpu { +namespace { + +using HostCommandHandler = TpuExecutable::HostCommandHandler; + +// Write the tuple index buffers (arrays of pointers). +static Status PopulateResultTupleBuffers(const ShapedBuffer& result, + se::Stream* stream, + se::Stream* transfer_stream) { + TF_ASSIGN_OR_RETURN(auto transfer_manager, TransferManager::GetForPlatform( + stream->parent()->platform())); + if (transfer_manager->CanShapedBufferBeAccessedNow(stream->parent(), + result)) { + TF_RETURN_IF_ERROR(transfer_manager->WriteTupleIndexTablesAsync( + transfer_stream ? transfer_stream : stream, result)); + if (transfer_stream && transfer_stream != stream) { + stream->ThenWaitFor(transfer_stream); + } + return Status::OK(); + } else { + return transfer_manager->WriteTupleIndexTablesAsync(stream, result); + } +} + +xla::Shape HostShapeToDeviceShape(const xla::Shape& host_shape) { + XLA_Shape c_host_shape; + XLA_Shape c_device_shape; + TpuConversions::XlaShapeToCShape(host_shape, &c_host_shape); + tensorflow::tpu::ExecutorApiFn()->HardwareLayout_HostShapeToDeviceShapeFn( + &c_host_shape, &c_device_shape); + xla::Shape device_shape = TpuConversions::CShapeToXlaShape(&c_device_shape); + TpuConversions::CShapeCleanup(&c_host_shape); + TpuConversions::CShapeCleanup(&c_device_shape); + return device_shape; +} + +int64 ShapeSize(const xla::Shape& shape) { + XLA_Shape c_shape; + TpuConversions::XlaShapeToCShape(shape, &c_shape); + int64 size = + tensorflow::tpu::ExecutorApiFn()->HardwareLayout_ShapeSizeFn(&c_shape); + TpuConversions::CShapeCleanup(&c_shape); + return size; +} + +} // namespace + +TpuExecutable::TpuExecutable(const XLA_TpuProgram* core_program, + std::unique_ptr hlo_module, + HostCommandHandler host_command_handler) + : Executable(std::move(hlo_module), /*hlo_profile_printer_data=*/nullptr, + /*hlo_profile_index_map=*/nullptr), + core_program_(core_program), + host_command_handler_(std::move(host_command_handler)) {} + +StatusOr TpuExecutable::AllocateOutputMemoryWithInputReuse( + const Shape& host_shape, const HloInputOutputAliasConfig& alias_config, + se::DeviceMemoryAllocator* allocator, + std::vector* arguments, se::Stream* stream, + se::Stream* transfer_stream) { + auto stream_exec = stream->parent(); + auto device_ordinal = stream_exec->device_ordinal(); + VLOG(3) << "AllocateOutputMemoryWithInputReuse, device = " << device_ordinal + << " host_shape = " << ShapeUtil::HumanStringWithLayout(host_shape); + Shape device_shape = HostShapeToDeviceShape(host_shape); + + if (VLOG_IS_ON(3)) { + VLOG(3) << "AllocateOutputMemoryWithInputReuse, device = " << device_ordinal + << " host_shape = " << ShapeUtil::HumanStringWithLayout(host_shape); + if (!Shape::Equal().MinorToMajorOnlyInLayout()(host_shape, device_shape)) { + VLOG(3) << "Rewrote host_shape to device_shape: " + << ShapeUtil::HumanStringWithLayout(host_shape) << " -> " + << ShapeUtil::HumanStringWithLayout(device_shape); + } + } + + ExecutionOutput result(host_shape, std::move(device_shape), allocator, + device_ordinal); + // Iterate through and allocate a buffer for each shape index, checking for + // possible input buffer reuse. + int64 reused_buffer_bytes = 0; + int64 total_result_buffer_bytes = 0; + for (auto& pair : result.MutableResult()->buffers()) { + const ShapeIndex& result_index = pair.first; + se::DeviceMemoryBase& result_buffer = pair.second; + int64 allocation_bytes = ShapeSize(ShapeUtil::GetSubshape( + result.Result().on_device_shape(), result_index)); + total_result_buffer_bytes += allocation_bytes; + + // Return an InternalError if result_index is invalid. This avoids failing + // the CHECK when calling GetAliasedParameter + if (!ShapeUtil::IndexIsValid(alias_config.shape(), result_index)) { + return InternalError("result_index is invalid: %s", + result_index.ToString()); + } + + absl::optional alias = + alias_config.GetAliasedParameter(result_index); + if (alias) { + TF_RET_CHECK(alias->parameter_number < arguments->size()); + ExecutionInput& input = (*arguments)[alias->parameter_number]; + MaybeOwningDeviceMemory* device_memory = + input.MutableBuffer(alias->parameter_index); + if (auto owning = device_memory->Release()) { + // If the caller passes the ownership of the device memory, reuse it + // as the output buffer. It is up to the caller whether or not to + // donate a buffer; the aliasing information describes which buffers + // may alias, not buffers that must alias. + se::DeviceMemoryBase device_memory_base = owning->Release(); + *device_memory = device_memory_base; + result_buffer = device_memory_base; + reused_buffer_bytes += allocation_bytes; + // The caller is giving us the input buffer, but in case of error of the + // execute call, we should not be releasing it as it contains valid data + // (for example, it is a parameter which the user wants us to alias, in + // a gradient update computation). So we store the index into the result + // in the aliased vactor, which will be fed to the ExecutionOutput, + // which will be using the indices to drop the addresses from its own + // ScopedShapedBuffer result, if the ExecutionOutput is not committed. + result.AddAliasedIndex(result_index); + } + } + + // We need to allocate a new output buffer for two cases: + // - There is no alias between this output and any input. + // - There is an alias, but the xla doesn't own the input memory so it can't + // donate buffer to the computation. + if (result_buffer.is_null()) { + const Shape& on_device_shape = result.Result().on_device_shape(); + const Shape& on_device_subshape = + ShapeUtil::GetSubshape(on_device_shape, result_index); + TF_ASSIGN_OR_RETURN( + auto allocated_buffer, + allocator->Allocate(device_ordinal, allocation_bytes, + /*retry_on_failure=*/true, + on_device_subshape.layout().memory_space())); + // Store the allocated buffer in our ScopedShapedBuffer, which takes + // ownership. + result_buffer = allocated_buffer.Release(); + } + TF_RET_CHECK(allocation_bytes == 0 || result_buffer != nullptr); + } + + VLOG(1) << "Reused " << reused_buffer_bytes + << " parameter buffers (total result buffer size: " + << total_result_buffer_bytes << ")"; + + TF_RETURN_IF_ERROR( + PopulateResultTupleBuffers(result.Result(), stream, transfer_stream)); + return std::move(result); +} + +StatusOr TpuExecutable::ExecuteAsyncOnStream( + const ServiceExecutableRunOptions* run_options, + std::vector arguments, + HloExecutionProfile* /*hlo_execution_profile*/) { + std::vector memory_bases; + memory_bases.reserve(arguments.size()); + for (auto& argument : arguments) { + memory_bases.push_back(argument.Buffer({}).AsDeviceMemoryBase()); + } + se::Stream* stream = run_options->stream(); + + CHECK_NE(run_options->allocator(), nullptr); + const Shape& shape = + hlo_module_ == nullptr ? ShapeUtil::MakeNil() : result_shape(); + const HloInputOutputAliasConfig& alias_config = + hlo_module_ == nullptr ? HloInputOutputAliasConfig() + : hlo_module_->input_output_alias_config(); + TF_ASSIGN_OR_RETURN( + ExecutionOutput result, + AllocateOutputMemoryWithInputReuse( + shape, alias_config, run_options->allocator(), &arguments, stream, + run_options->run_options().host_to_device_stream())); + + MarkToBeReleasedArguments(absl::MakeSpan(arguments), result); + + // Address of the buffer in TPU memory that is being speculated. + absl::optional cross_program_prefetch_addr; + if (hlo_module_) { + for (const auto& [parameter, index] : + hlo_module_->CrossProgramPrefetches()) { + CHECK_LT(parameter, arguments.size()); + // Ensure the cross program prefetched buffer doesn't alias with any + // program outputs. If the input and output aliased, the buffer could be + // invalidated during program execution and the program could read stale + // data instead of fresh data. + auto it = arguments[parameter].MutableBuffers()->find({index}); + CHECK(it != arguments[parameter].MutableBuffers()->end()); + if (absl::c_none_of(result.Result().buffers(), [&](auto index_addr_pair) { + return index_addr_pair.second.IsSameAs( + it->second.AsDeviceMemoryBase()); + })) { + // Supports only one cross-program prefetch address. + cross_program_prefetch_addr = it->second.AsDeviceMemoryBase(); + } + } + } + + TF_RETURN_IF_ERROR(LoadProgramAndEnqueueToStream( + *run_options, memory_bases, result.Result().root_buffer(), + cross_program_prefetch_addr)); + return std::move(result); +} + +Status TpuExecutable::LoadProgramAndEnqueueToStream( + const ServiceExecutableRunOptions& run_options, + absl::Span arguments, + se::DeviceMemoryBase result, + absl::optional cross_program_prefetch_addr) { + SE_DeviceMemoryBase* arguments_bases = nullptr; + if (!arguments.empty()) { + arguments_bases = new SE_DeviceMemoryBase[arguments.size()]; + for (int i = 0; i < arguments.size(); i++) { + arguments_bases[i] = + SE_DeviceMemoryBase{const_cast(arguments[i].opaque()), + arguments[i].size(), arguments[i].payload()}; + } + } + + SE_DeviceMemoryBase result_base{result.opaque(), result.size(), + result.payload()}; + SE_DeviceMemoryBase prefetch_base; + if (cross_program_prefetch_addr.has_value()) { + prefetch_base = SE_DeviceMemoryBase{cross_program_prefetch_addr->opaque(), + cross_program_prefetch_addr->size(), + cross_program_prefetch_addr->payload()}; + } + int32 rng_seed = run_options.run_options().rng_seed(); + + XLA_DeviceAssignment c_dev_assign{/*bytes=*/nullptr, /*size=*/0}; + auto dev_assign = run_options.run_options().device_assignment(); + stream_executor::tpu::SerializedProto dev_assign_serialized; + if (dev_assign != nullptr) { + xla::DeviceAssignmentProto dev_assign_proto; + TF_RETURN_IF_ERROR(dev_assign->Serialize(&dev_assign_proto)); + dev_assign_serialized = + stream_executor::tpu::SerializeProto(dev_assign_proto); + c_dev_assign.bytes = dev_assign_serialized.bytes; + c_dev_assign.size = dev_assign_serialized.size; + } + + auto stream = + tensorflow::TpuPlatform::GetRegisteredPlatform()->stream_map()->at( + run_options.run_options().stream()->implementation()); + StatusHelper status; + + TpuExecutable_LoadProgramAndEnqueueToStream( + core_program_, arguments_bases, arguments.size(), &result_base, + (cross_program_prefetch_addr.has_value() ? &prefetch_base : nullptr), + rng_seed, &c_dev_assign, stream, status.c_status); + + if (dev_assign != nullptr) { + stream_executor::tpu::SerializedProto_Free(dev_assign_serialized); + } + delete[] arguments_bases; + return status.status(); +} + +} // namespace tpu +} // namespace xla diff --git a/tensorflow/stream_executor/tpu/tpu_executable.h b/tensorflow/stream_executor/tpu/tpu_executable.h new file mode 100644 index 00000000000..74ee0e0379e --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_executable.h @@ -0,0 +1,99 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTABLE_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTABLE_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "absl/types/span.h" +#include "tensorflow/compiler/xla/service/executable.h" +#include "tensorflow/compiler/xla/service/hlo_execution_profile.h" +#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/service_executable_run_options.h" +#include "tensorflow/compiler/xla/shape.h" +#include "tensorflow/compiler/xla/status.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/core/tpu/kernels/tpu_program_c_api.h" +#include "tensorflow/stream_executor/device_memory.h" +#include "tensorflow/stream_executor/device_memory_allocator.h" +#include "tensorflow/stream_executor/stream_executor.h" + +namespace xla { +namespace tpu { + +// An executable capable of being fed to a TPU device via TpuExecutor. +class TpuExecutable : public Executable { + public: + using HostCommandHandler = std::function; + + // Constructs an executable that holds a non-owning reference to an + // XLA_TpuProgram. + TpuExecutable(const XLA_TpuProgram* core_program, + std::unique_ptr hlo_module, + HostCommandHandler host_command_handler = nullptr); + + ~TpuExecutable() override = default; + + StatusOr ExecuteAsyncOnStream( + const ServiceExecutableRunOptions* run_options, + std::vector arguments, + HloExecutionProfile* hlo_execution_profile) override; + + const XLA_TpuProgram* core_program() const { return core_program_; } + + // Same as AllocateOutputMemory, except that input buffers can be reused + // as output buffers. See UserBufferAlias class comment for more details on + // the buffer reuse. + // + // `alias_config` indicates which input and output buffers can be aliased. + // + // `arguments` are ExecutionInput containing the input parameters. Currently + // only a single input parameter (typically a tuple) is supported on TPU. For + // each element in the shape tree, if the element holds the ownership of the + // memory, it is considered donated and XLA will potentially reuse it as + // output buffers. + // + // The optional 'transfer_stream' parameter enables transfers (for tuple + // tables) to be performed on a separate stream to 'stream'. + static StatusOr AllocateOutputMemoryWithInputReuse( + const Shape& host_shape, const HloInputOutputAliasConfig& alias_config, + se::DeviceMemoryAllocator* allocator, + std::vector* arguments, se::Stream* stream, + se::Stream* transfer_stream = nullptr); + + private: + Status LoadProgramAndEnqueueToStream( + const ServiceExecutableRunOptions& run_options, + absl::Span arguments, + stream_executor::DeviceMemoryBase result, + absl::optional + cross_program_prefetch_addr); + + const XLA_TpuProgram* const core_program_; + + const HostCommandHandler host_command_handler_; + + TF_DISALLOW_COPY_AND_ASSIGN(TpuExecutable); +}; + +} // namespace tpu +} // namespace xla + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTABLE_H_ diff --git a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h index 5911d651b66..6962ce930bf 100644 --- a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h +++ b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h @@ -282,7 +282,6 @@ void TpuTransferManager_TransferLiteralFromDevice( XLA_TransferManager* manager, SE_Stream* stream, XLA_ShapedBuffer* device_buffer, XLA_Literal* literal, XLA_StatusCallbackFn callback, void* ctx); - int64_t TpuTransferManager_GetByteSizeRequirement(XLA_TransferManager* manager, XLA_Shape* shape); void TpuTransferManager_WriteSingleTupleIndexTable( @@ -290,6 +289,10 @@ void TpuTransferManager_WriteSingleTupleIndexTable( SE_DeviceMemoryBase* elements, size_t elements_len, XLA_Shape* shape, SE_DeviceMemoryBase* region, SE_Status* status); +void HardwareLayout_HostShapeToDeviceShape(XLA_Shape* host_shape, + XLA_Shape* device_shape); +int64_t HardwareLayout_ShapeSize(XLA_Shape* shape); + XLA_ComputationPlacer* TpuComputationPlacer_New(); void TpuComputationPlacer_Free(XLA_ComputationPlacer* placer); @@ -379,6 +382,9 @@ struct TfTpu_ExecutorApiFn { TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_GetByteSizeRequirement); TFTPU_ADD_FN_IN_STRUCT(TpuTransferManager_WriteSingleTupleIndexTable); + TFTPU_ADD_FN_IN_STRUCT(HardwareLayout_HostShapeToDeviceShape); + TFTPU_ADD_FN_IN_STRUCT(HardwareLayout_ShapeSize); + TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_New); TFTPU_ADD_FN_IN_STRUCT(TpuComputationPlacer_Free); }; From 0f26eee0730f276715bf4463495322d619444130 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Wed, 15 Jul 2020 15:10:25 -0700 Subject: [PATCH 0552/2522] Added method to get storage types that support automatic zero clamping. PiperOrigin-RevId: 321448239 Change-Id: Ic6ff2bfea8694e5f6aed1398b082b5d95f723a24 --- tensorflow/lite/delegates/gpu/cl/environment.cc | 13 +++++++++++++ tensorflow/lite/delegates/gpu/cl/environment.h | 4 ++++ 2 files changed, 17 insertions(+) diff --git a/tensorflow/lite/delegates/gpu/cl/environment.cc b/tensorflow/lite/delegates/gpu/cl/environment.cc index 6b6ab84f148..c8b0b56978c 100644 --- a/tensorflow/lite/delegates/gpu/cl/environment.cc +++ b/tensorflow/lite/delegates/gpu/cl/environment.cc @@ -198,6 +198,19 @@ std::vector Environment::GetSupportedStorages() const { return storage_types; } +std::vector +Environment::GetSupportedStoragesWithHWZeroClampSupport() const { + std::vector storage_types; + for (auto storage_type : + {TensorStorageType::TEXTURE_2D, TensorStorageType::TEXTURE_ARRAY, + TensorStorageType::TEXTURE_3D}) { + if (IsSupported(storage_type)) { + storage_types.push_back(storage_type); + } + } + return storage_types; +} + bool Environment::IsSupported(TensorStorageType storage_type) const { switch (storage_type) { case TensorStorageType::TEXTURE_2D: diff --git a/tensorflow/lite/delegates/gpu/cl/environment.h b/tensorflow/lite/delegates/gpu/cl/environment.h index b40d22d3dd6..640f2d8cac3 100644 --- a/tensorflow/lite/delegates/gpu/cl/environment.h +++ b/tensorflow/lite/delegates/gpu/cl/environment.h @@ -55,6 +55,10 @@ class Environment { std::vector GetSupportedPrecisions() const; bool IsSupported(CalculationsPrecision precision) const; std::vector GetSupportedStorages() const; + // returns storage types that support zero clamping when reading OOB in HW + // (Height/Width) dimensions. + std::vector GetSupportedStoragesWithHWZeroClampSupport() + const; bool IsSupported(TensorStorageType storage_type) const; absl::Status Init(); From ae85ce56b77da3534428415996f5832caa24a248 Mon Sep 17 00:00:00 2001 From: Katherine Tian Date: Wed, 15 Jul 2020 22:29:01 +0000 Subject: [PATCH 0553/2522] update tests --- tensorflow/python/kernel_tests/map_ops_test.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tensorflow/python/kernel_tests/map_ops_test.py b/tensorflow/python/kernel_tests/map_ops_test.py index 09df2cca134..e9355af27ba 100644 --- a/tensorflow/python/kernel_tests/map_ops_test.py +++ b/tensorflow/python/kernel_tests/map_ops_test.py @@ -105,6 +105,16 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase): s = map_ops.tensor_map_size(m) self.assertAllEqual(s, 0) self.assertAllClose(e, v) + + def testTensorMapEraseFromEmptyMapFails(self): + m = map_ops.empty_tensor_map() + k = constant_op.constant(1.0) + v = constant_op.constant(2.0) + + with self.assertRaisesRegex(errors.InvalidArgumentError, + "Trying to erase non-existent item."): + m, e = map_ops.tensor_map_erase(m, k) + self.evaluate(e) def testTensorMapEraseMissingKeyFails(self): m = map_ops.empty_tensor_map() From ad98d28ec2a8cf32de790d173c6ed9f6305fd251 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Jul 2020 15:48:16 -0700 Subject: [PATCH 0554/2522] Integrate LLVM at https://github.com/llvm/llvm-project/commit/f233b92f92a6 PiperOrigin-RevId: 321454533 Change-Id: Icb4ebf5eb2889fbe494d521657dd20a66a243bb4 --- .../mlir/hlo/tests/chlo_infer_shape_type_methods.mlir | 2 +- .../mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir | 6 +++--- .../compiler/mlir/xla/tests/legalize-tf-BatchMatMulV2.mlir | 6 +++--- .../mlir/xla/tests/legalize-tf-binary-elementwise.mlir | 6 +++--- tensorflow/workspace.bzl | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tensorflow/compiler/mlir/hlo/tests/chlo_infer_shape_type_methods.mlir b/tensorflow/compiler/mlir/hlo/tests/chlo_infer_shape_type_methods.mlir index 65074325563..99aab532688 100644 --- a/tensorflow/compiler/mlir/hlo/tests/chlo_infer_shape_type_methods.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/chlo_infer_shape_type_methods.mlir @@ -8,7 +8,7 @@ func @broadcast_add(%arg0: tensor, %arg1: tensor) -> tensor<1xindex> { // CHECK-DAG: %[[ARG0_S:.+]] = shape.shape_of %[[ARG0]] // CHECK-DAG: %[[ARG1_S:.+]] = shape.shape_of %[[ARG1]] - // CHECK-DAG: %[[BCAST_S:.+]] = "shape.broadcast"(%[[ARG0_S]], %[[ARG1_S]]) + // CHECK-DAG: %[[BCAST_S:.+]] = shape.broadcast %[[ARG0_S]], %[[ARG1_S]] // CHECK: %[[EXTENTS:.+]] = shape.to_extent_tensor %[[BCAST_S]] // CHECK: return %[[EXTENTS]] %0 = chlo.broadcast_add %arg0, %arg1 : (tensor, tensor) -> tensor diff --git a/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir b/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir index 2c0e2d7f170..20ad579c9cf 100644 --- a/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir +++ b/tensorflow/compiler/mlir/hlo/tests/chlo_legalize_to_hlo_broadcasts.mlir @@ -18,7 +18,7 @@ func @dynamicBroadcast(%arg0: tensor, %arg1: tensor) -> tensor : tensor<1xi64>} // CHECK-DAG: %[[ARG1_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG1]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} @@ -39,7 +39,7 @@ func @dynamicBroadcastComplex(%arg0: tensor, %arg1: tensor) -> t // CHECK-DAG: %[[ARG1_S:.+]] = shape.shape_of %[[ARG1]] // CHECK-NEXT: %[[WITNESS:.+]] = shape.cstr_broadcastable %[[ARG0_S]], %[[ARG1_S]] // CHECK-NEXT: %[[FINAL_RESULT:.+]] = shape.assuming %[[WITNESS]] - // CHECK-NEXT: %[[RESULT_S:.+]] = "shape.broadcast"(%[[ARG0_S]], %[[ARG1_S]]) + // CHECK-NEXT: %[[RESULT_S:.+]] = shape.broadcast %[[ARG0_S]], %[[ARG1_S]] // CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_S]] // CHECK-DAG: %[[ARG0_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG0]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor, tensor<2xindex>) -> tensor // CHECK-DAG: %[[ARG1_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG1]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor, tensor<2xindex>) -> tensor @@ -60,7 +60,7 @@ func @dynamicBroadcastCompare(%arg0: tensor, %arg1: tensor) -> t // CHECK-DAG: %[[ARG1_S:.+]] = shape.shape_of %[[ARG1]] // CHECK: %[[WITNESS:.+]] = shape.cstr_broadcastable %[[ARG0_S]], %[[ARG1_S]] // CHECK: %[[FINAL_RESULT:.+]] = shape.assuming %[[WITNESS]] - // CHECK: %[[RESULT_S:.+]] = "shape.broadcast"(%[[ARG0_S]], %[[ARG1_S]]) + // CHECK: %[[RESULT_S:.+]] = shape.broadcast %[[ARG0_S]], %[[ARG1_S]] // CHECK: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_S]] // CHECK-DAG: %[[ARG0_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG0]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor, tensor<2xindex>) -> tensor // CHECK-DAG: %[[ARG1_B:.+]] = "mhlo.dynamic_broadcast_in_dim"(%[[ARG1]], %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} : (tensor, tensor<2xindex>) -> tensor diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf-BatchMatMulV2.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf-BatchMatMulV2.mlir index de03921f091..ddfc02af7c4 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf-BatchMatMulV2.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf-BatchMatMulV2.mlir @@ -12,11 +12,11 @@ func @batchmatmulv2_basic(%arg0: tensor<1x4x2xf32>, %arg1: tensor<3x2x4xf32>) -> // CHECK: [[CM2:%.*]] = constant -2 : i32 // CHECK: [[LHSHEAD:%.*]], [[LHSTAIL:%.*]] = "shape.split_at"([[LHSSHAPE]], [[CM2]]) : (!shape.shape, i32) -> (!shape.shape, !shape.shape) // CHECK: [[RHSHEAD:%.*]], [[RHSTAIL:%.*]] = "shape.split_at"([[RHSSHAPE]], [[CM2]]) : (!shape.shape, i32) -> (!shape.shape, !shape.shape) -// CHECK: [[BCASTHEAD:%.*]] = "shape.broadcast"([[LHSHEAD]], [[RHSHEAD]]) : (!shape.shape, !shape.shape) -> !shape.shape -// CHECK: [[LHSBCASTSHAPE:%.*]] = "shape.concat"([[BCASTHEAD]], [[LHSTAIL]]) : (!shape.shape, !shape.shape) -> !shape.shape +// CHECK: [[BCASTHEAD:%.*]] = shape.broadcast [[LHSHEAD]], [[RHSHEAD]] +// CHECK: [[LHSBCASTSHAPE:%.*]] = shape.concat [[BCASTHEAD]], [[LHSTAIL]] // CHECK: [[LHSSHAPEEXTENTS:%.*]] = shape.to_extent_tensor [[LHSBCASTSHAPE]] : tensor<3xindex> // CHECK: [[LHSBCAST:%.*]] = "mhlo.dynamic_broadcast_in_dim"([[LHS]], [[LHSSHAPEEXTENTS]]) {broadcast_dimensions = dense<[0, 1, 2]> : tensor<3xi64>} : (tensor<1x4x2xf32>, tensor<3xindex>) -> tensor<3x4x2xf32> -// CHECK: [[RHSBCASTSHAPE:%.*]] = "shape.concat"([[BCASTHEAD]], [[RHSTAIL]]) : (!shape.shape, !shape.shape) -> !shape.shape +// CHECK: [[RHSBCASTSHAPE:%.*]] = shape.concat [[BCASTHEAD]], [[RHSTAIL]] // CHECK: [[RHSSHAPEEXTENTS:%.*]] = shape.to_extent_tensor [[RHSBCASTSHAPE]] : tensor<3xindex> // CHECK: [[RHSBCAST:%.*]] = "mhlo.dynamic_broadcast_in_dim"([[RHS]], [[RHSSHAPEEXTENTS]]) {broadcast_dimensions = dense<[0, 1, 2]> : tensor<3xi64>} : (tensor<3x2x4xf32>, tensor<3xindex>) -> tensor<3x2x4xf32> // CHECK: [[RESULT:%.*]] = "mhlo.dot_general"([[LHSBCAST]], [[RHSBCAST]]) {dot_dimension_numbers = {lhs_batching_dimensions = dense<0> : tensor<1xi64>, lhs_contracting_dimensions = dense<2> : tensor<1xi64>, rhs_batching_dimensions = dense<0> : tensor<1xi64>, rhs_contracting_dimensions = dense<1> : tensor<1xi64>}} : (tensor<3x4x2xf32>, tensor<3x2x4xf32>) -> tensor<3x4x4xf32> diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf-binary-elementwise.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf-binary-elementwise.mlir index 45c90d26ab4..fd9c14c7c0f 100644 --- a/tensorflow/compiler/mlir/xla/tests/legalize-tf-binary-elementwise.mlir +++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf-binary-elementwise.mlir @@ -48,7 +48,7 @@ func @add_dynamic(%arg0: tensor, %arg1: tensor) -> tensor : tensor<1xi64>} // CHECK-NEXT: %[[RHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg1, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<[0, 1]> : tensor<2xi64>} @@ -201,7 +201,7 @@ func @equal_dynamic(%arg0: tensor, %arg1: tensor<1xi32>) -> tensor // NOT-CHECK-NEXT: %[[WITNESS:.+]] = shape.cstr_broadcastable %[[LHS_SHAPE]], %[[RHS_SHAPE]] // NOT-CHECK-NEXT: shape.assuming %[[WITNESS]] -> (tensor) { // NOT-CHECK-DAG: %[[LHS_SHAPE1:.+]] = shape.shape_of %arg0 - // NOT-CHECK-NEXT: %[[RESULT_SHAPE:.+]] = "shape.broadcast"(%[[LHS_SHAPE1]], %[[RHS_SHAPE]]) + // NOT-CHECK-NEXT: %[[RESULT_SHAPE:.+]] = shape.broadcast %[[LHS_SHAPE1]], %[[RHS_SHAPE]] // NOT-CHECK-NEXT: %[[RESULT_EXTENTS:.+]] = shape.to_extent_tensor %[[RESULT_SHAPE]] // NOT-CHECK-DAG: %[[LHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg0, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} // NOT-CHECK-DAG: %[[RHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg1, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} @@ -290,7 +290,7 @@ func @greater_dynamic(%arg0: tensor, %arg1: tensor) -> tensor : tensor<1xi64>} // CHECK-DAG: %[[RHS_BCAST:.+]] = "mhlo.dynamic_broadcast_in_dim"(%arg1, %[[RESULT_EXTENTS]]) {broadcast_dimensions = dense<0> : tensor<1xi64>} diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 946e56b5b1e..eec97c99111 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -710,8 +710,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "c11c78a1bd0b3275bf845604aae3c94e97acceed" - LLVM_SHA256 = "f4ec2918bbde384121152d72b46b8c7094aed08fa61a04803ff6c7b3c18448c1" + LLVM_COMMIT = "f233b92f92a669f9f2cc6d08d57ca4931dd61b78" + LLVM_SHA256 = "020aff0a2cb5246e152d274a59006d81eb8338549b03c0e2e7fd1b630736fd53" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 9d94482224acde044692d74107339a29f862cbac Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Wed, 15 Jul 2020 16:20:40 -0700 Subject: [PATCH 0555/2522] Change some getters to not be inline. This enables some memmory savings without having a meaningful impact on the performance. Tested latency w/ benchmark_model on Pixel 4 using MobileNet + MobileBERT, no observed latency change. PiperOrigin-RevId: 321459641 Change-Id: I774adfbe60afe06984793912d37f8f016574702e --- tensorflow/lite/kernels/kernel_util.cc | 26 ++++++++++++++ tensorflow/lite/kernels/kernel_util.h | 48 +++++++++++--------------- 2 files changed, 47 insertions(+), 27 deletions(-) diff --git a/tensorflow/lite/kernels/kernel_util.cc b/tensorflow/lite/kernels/kernel_util.cc index 164aec3f224..11d8cb67dd2 100644 --- a/tensorflow/lite/kernels/kernel_util.cc +++ b/tensorflow/lite/kernels/kernel_util.cc @@ -28,6 +28,32 @@ limitations under the License. namespace tflite { +const TfLiteTensor* GetInput(const TfLiteContext* context, + const TfLiteNode* node, int index) { + return &context->tensors[node->inputs->data[index]]; +} + +TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node, + int index) { + TfLiteTensor* tensor = &context->tensors[node->inputs->data[index]]; + return (tensor->is_variable) ? tensor : nullptr; +} + +TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node, + int index) { + return &context->tensors[node->outputs->data[index]]; +} + +const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context, + const TfLiteNode* node, int index) { + const bool use_tensor = index < node->inputs->size && + node->inputs->data[index] != kTfLiteOptionalTensor; + if (use_tensor) { + return &context->tensors[node->inputs->data[index]]; + } + return nullptr; +} + // Per-axis TfLiteStatus PopulateConvolutionQuantizationParams( TfLiteContext* context, const TfLiteTensor* input, diff --git a/tensorflow/lite/kernels/kernel_util.h b/tensorflow/lite/kernels/kernel_util.h index 4660631dded..0d6aa8fc790 100644 --- a/tensorflow/lite/kernels/kernel_util.h +++ b/tensorflow/lite/kernels/kernel_util.h @@ -24,26 +24,31 @@ limitations under the License. namespace tflite { +// A fair number of functions in this header have historically been inline. +// It is ok to change functions to not be inline if the latency with +// benchmark_model for MobileNet + MobileBERT is unaffected. If such a change is +// made, move the newly non-inlined function declarations to the top of this +// header file. +const TfLiteTensor* GetInput(const TfLiteContext* context, + const TfLiteNode* node, int index); + +// Note: You must check if result is not null: +// TfLiteTensor* my_tensor = GetVariableInput(context, node, kMyTensorIdx); +// TF_LITE_ENSURE(context, my_tensor != nullptr); +TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node, + int index); + +TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node, + int index); + +const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context, + const TfLiteNode* node, int index); + inline int NumDimensions(const TfLiteTensor* t) { return t->dims->size; } inline int SizeOfDimension(const TfLiteTensor* t, int dim) { return t->dims->data[dim]; } -inline const TfLiteTensor* GetInput(const TfLiteContext* context, - const TfLiteNode* node, int index) { - return &context->tensors[node->inputs->data[index]]; -} -// Note: You must check if result is not null: -// TfLiteTensor* my_tensor = GetVariableInput(context, node, kMyTensorIdx); -// TF_LITE_ENSURE(context, my_tensor != nullptr); -inline TfLiteTensor* GetVariableInput(TfLiteContext* context, - const TfLiteNode* node, int index) { - TfLiteTensor* tensor = &context->tensors[node->inputs->data[index]]; - return (tensor->is_variable) ? tensor : nullptr; -} -inline TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node, - int index) { - return &context->tensors[node->outputs->data[index]]; -} + #ifndef TF_LITE_STATIC_MEMORY inline TfLiteTensor* GetTemporary(TfLiteContext* context, const TfLiteNode* node, int index) { @@ -72,17 +77,6 @@ inline int64_t NumElements(const TfLiteTensor* t) { return NumElements(t->dims); } -inline const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context, - const TfLiteNode* node, - int index) { - const bool use_tensor = index < node->inputs->size && - node->inputs->data[index] != kTfLiteOptionalTensor; - if (use_tensor) { - return &context->tensors[node->inputs->data[index]]; - } - return nullptr; -} - // Determines whether tensor is constant. // TODO(b/138199592): Introduce new query which checks for constant OR // persistent-read-only, which would be useful for most tensor kernels that From 9ee1f7058ecddf555991e4e86400b46482b8952e Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Wed, 15 Jul 2020 16:31:33 -0700 Subject: [PATCH 0556/2522] Introducing WrappedFileSystem for transactions --- tensorflow/core/platform/file_system.h | 197 +++++++++++++++++++++++++ 1 file changed, 197 insertions(+) diff --git a/tensorflow/core/platform/file_system.h b/tensorflow/core/platform/file_system.h index 640d3b3c027..7ebacc2652b 100644 --- a/tensorflow/core/platform/file_system.h +++ b/tensorflow/core/platform/file_system.h @@ -44,6 +44,12 @@ class RandomAccessFile; class ReadOnlyMemoryRegion; class WritableFile; +class FileSystem; +struct TransactionToken { + FileSystem* owner; + void* token; +}; + /// A generic interface for accessing a file system. Implementations /// of custom filesystem adapters must implement this interface, /// RandomAccessFile, WritableFile, and ReadOnlyMemoryRegion classes. @@ -345,6 +351,197 @@ class FileSystem { virtual ~FileSystem() = default; }; +/// A Wrapper class for Transactional FileSystem support. +/// This provides means to make use of the transactions with minimal code change +/// Any operations that are done through this interface will be through the +/// transaction created at the time of construction of this instance. +/// See FileSystem documentation for method descriptions. +/// This class simply forwards all calls to wrapped filesystem either with given +/// transaction token or with token used in its construction. This allows doing +/// transactional filesystem access with minimal code change. +/// TODO(sami): remove override and extra argument comments when PR changing +/// FileSystem signatures are in. +class WrappedFileSystem : public FileSystem { + public: + virtual tensorflow::Status NewRandomAccessFile( + const string& fname, std::unique_ptr* result, + TransactionToken* token = nullptr) /* override */ { + return fs_->NewRandomAccessFile(fname, + result /* , (token ? token : token_) */); + } + + virtual tensorflow::Status NewWritableFile( + const string& fname, std::unique_ptr* result, + TransactionToken* token = nullptr) /* override */ { + return fs_->NewWritableFile(fname, result /* , (token ? token : token_) */); + } + + virtual tensorflow::Status NewAppendableFile( + const string& fname, std::unique_ptr* result, + TransactionToken* token = nullptr) /* override */ { + return fs_->NewAppendableFile(fname, + result /* , (token ? token : token_) */); + } + + virtual tensorflow::Status NewReadOnlyMemoryRegionFromFile( + const string& fname, std::unique_ptr* result, + TransactionToken* token = nullptr) /* override */ { + return fs_->NewReadOnlyMemoryRegionFromFile( + fname, result /* , (token ? token : token_) */); + } + + virtual tensorflow::Status FileExists( + const string& fname, TransactionToken* token = nullptr) /* override */ { + return fs_->FileExists(fname /* , (token ? token : token_) */); + } + + virtual bool FilesExist(const std::vector& files, + std::vector* status, + TransactionToken* token = nullptr) /* override */ { + return fs_->FilesExist(files, status /* , (token ? token : token_) */); + } + + virtual tensorflow::Status GetChildren( + const string& dir, std::vector* result, + TransactionToken* token = nullptr) /* override */ { + return fs_->GetChildren(dir, result /* , (token ? token : token_) */); + } + + virtual tensorflow::Status GetMatchingPaths( + const string& pattern, std::vector* results, + TransactionToken* token = nullptr) /* override */ { + return fs_->GetMatchingPaths(pattern, + results /* , (token ? token : token_) */); + } + + virtual bool Match(const std::string& filename, const std::string& pattern, + TransactionToken* token = nullptr) /* override */ { + return fs_->Match(filename, pattern /* , (token ? token : token_) */); + } + + virtual tensorflow::Status Stat( + const string& fname, FileStatistics* stat, + TransactionToken* token = nullptr) /* override */ { + return fs_->Stat(fname, stat /* , (token ? token : token_) */); + } + + virtual tensorflow::Status DeleteFile( + const string& fname, TransactionToken* token = nullptr) /* override */ { + return fs_->DeleteFile(fname /* , (token ? token : token_) */); + } + + virtual tensorflow::Status CreateDir( + const string& dirname, TransactionToken* token = nullptr) /* override */ { + return fs_->CreateDir(dirname /* , (token ? token : token_) */); + } + + virtual tensorflow::Status RecursivelyCreateDir( + const string& dirname, TransactionToken* token = nullptr) /* override */ { + return fs_->RecursivelyCreateDir(dirname /* , (token ? token : token_) */); + } + + virtual tensorflow::Status DeleteDir( + const string& dirname, TransactionToken* token = nullptr) /* override */ { + return fs_->DeleteDir(dirname /* , (token ? token : token_) */); + } + + virtual tensorflow::Status DeleteRecursively( + const string& dirname, int64* undeleted_files, int64* undeleted_dirs, + TransactionToken* token = nullptr) /* override */ { + return fs_->DeleteRecursively( + dirname, undeleted_files, + undeleted_dirs /*, (token ? token : token_) */); + } + + virtual tensorflow::Status GetFileSize( + const string& fname, uint64* file_size, + TransactionToken* token = nullptr) /* override */ { + return fs_->GetFileSize(fname, file_size /* , (token ? token : token_) */); + } + + virtual tensorflow::Status RenameFile( + const string& src, const string& target, + TransactionToken* token = nullptr) /* override */ { + return fs_->RenameFile(src, target /* , (token ? token : token_) */); + } + + virtual tensorflow::Status CopyFile( + const string& src, const string& target, + TransactionToken* token = nullptr) /* override */ { + return fs_->CopyFile(src, target /* , (token ? token : token_) */); + } + + virtual std::string TranslateName(const std::string& name) const + /* override */ { + return fs_->TranslateName(name); + } + + virtual tensorflow::Status IsDirectory( + const string& fname, TransactionToken* token = nullptr) /* override */ { + return fs_->IsDirectory(fname /* , (token ? token : token_) */); + } + + virtual Status HasAtomicMove(const string& path, + bool* has_atomic_move) /* override */ { + return fs_->HasAtomicMove(path, has_atomic_move); + } + + virtual void FlushCaches(TransactionToken* token = nullptr) /* override */ { + return fs_->FlushCaches(/* (token ? token : token_) */); + } + + virtual char Separator() const /* override */ { return fs_->Separator(); } + + virtual StringPiece Basename(StringPiece path) const /* override */ { + return fs_->Basename(path); + } + + virtual tensorflow::Status StartTransaction( + TransactionToken** token) /* override */ { + /* return fs_->StartTransaction(token); */ + return Status::OK(); + } + + virtual tensorflow::Status AddToTransaction( + const string& path, TransactionToken* token) /* override */ { + /* return fs_->AddToTransaction(path, (token ? token : token_) ); */ + return Status::OK(); + } + + virtual tensorflow::Status EndTransaction( + TransactionToken* token) /* override */ { + /* return fs_->EndTransaction(token); */ + return Status::OK(); + } + + virtual tensorflow::Status GetTransactionForPath( + const string& path, TransactionToken** token) /* override */ { + /* return fs_->GetTransactionForPath(path, token); */ + return Status::OK(); + } + + virtual tensorflow::Status GetTokenOrStartTransaction( + const string& path, TransactionToken** token) /* override */ { + /* return fs_->GetTokenOrStartTransaction(path, token); */ + return Status::OK(); + } + + virtual string DecodeTransaction( + const TransactionToken* token) /* override */ { + return ""; + /*return fs_->DecodeTransaction((token ? token : token_)); */ + } + + WrappedFileSystem(FileSystem* file_system, TransactionToken* token) + : fs_(file_system), token_(token) {} + + virtual ~WrappedFileSystem() = default; + + private: + FileSystem* fs_; + TransactionToken* token_; +}; + /// A file abstraction for randomly reading the contents of a file. class RandomAccessFile { public: From 843987549843c7113efccf998c65fb004006a116 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Wed, 15 Jul 2020 23:40:50 +0000 Subject: [PATCH 0557/2522] added test for TF_ShapeInferenceContextScalar --- tensorflow/c/ops_test.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tensorflow/c/ops_test.cc b/tensorflow/c/ops_test.cc index 482413f966c..63105b1ad00 100644 --- a/tensorflow/c/ops_test.cc +++ b/tensorflow/c/ops_test.cc @@ -316,5 +316,15 @@ TEST(OpsTest, ShapeInferenceSubshape) { TF_DeleteShapeHandle(handle); } +TEST(OpsTest, ShapeInferenceScalarShape) { + NodeDef def; + shape_inference::InferenceContext c(0, def, MakeOpDef(0, 0), + {S({})}, {}, {}, {}); + TF_ShapeHandle* TF_scalar_shape = TF_ShapeInferenceContextScalar(C_CTX(&c)); + shape_inference::ShapeHandle* scalar_shape = + reinterpret_cast(TF_scalar_shape); + ASSERT_EQ("[]", c.DebugString(*scalar_shape)); +} + } // namespace } // namespace tensorflow From 381c6e98d2b0429abe66ee53a48a03dd5d868447 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 15 Jul 2020 16:31:29 -0700 Subject: [PATCH 0558/2522] Use internal linkage for classes defined in compute_task.mm. PiperOrigin-RevId: 321461539 Change-Id: Ie6b9755897088b5363c62d9ab8e8d93ca89af9a8 --- .../lite/delegates/gpu/metal/compute_task.mm | 36 +++++++++++-------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/metal/compute_task.mm b/tensorflow/lite/delegates/gpu/metal/compute_task.mm index 88be8676651..7bfbb55feff 100644 --- a/tensorflow/lite/delegates/gpu/metal/compute_task.mm +++ b/tensorflow/lite/delegates/gpu/metal/compute_task.mm @@ -39,22 +39,28 @@ using ::tflite::gpu::metal::UniformsFunction; using ::tflite::gpu::uint3; using ::tflite::gpu::ValueId; -@implementation TFLComputeTask { - struct InputBuffer { - ValueId uid; - id metalHandle; - }; - struct OutputBuffer { - ValueId uid; - id metalHandle; - OutputDimensions dimensionsFunction; - std::vector alias; - }; - struct UniformBuffer { - std::vector data; - UniformsFunction dataFunction; - }; +namespace { +struct InputBuffer { + ValueId uid; + id metalHandle; +}; + +struct OutputBuffer { + ValueId uid; + id metalHandle; + OutputDimensions dimensionsFunction; + std::vector alias; +}; + +struct UniformBuffer { + std::vector data; + UniformsFunction dataFunction; +}; + +} // namespace + +@implementation TFLComputeTask { id _program; std::vector _inputBuffers; std::vector _outputBuffers; From c710a5f32724c4b0c01d98ba789d60c8d7305c9f Mon Sep 17 00:00:00 2001 From: Yash Katariya Date: Wed, 15 Jul 2020 16:39:31 -0700 Subject: [PATCH 0559/2522] Remvove H1 heading from the docstring. PiperOrigin-RevId: 321463037 Change-Id: Idfb7213b7d25973c7a723e22fac7e597fb5196b8 --- tensorflow/python/types/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/types/core.py b/tensorflow/python/types/core.py index dcda7b68271..bec5aecaba0 100644 --- a/tensorflow/python/types/core.py +++ b/tensorflow/python/types/core.py @@ -78,7 +78,7 @@ doc_typealias.document( This definition may be used in user code. Additional types may be added in the future as more input types are supported. - # Example: + Example: ``` def foo(x: TensorLike): From 2e79e5c34953819027585d959d7a7b2b9aa14129 Mon Sep 17 00:00:00 2001 From: Daniel Nguyen Date: Wed, 15 Jul 2020 23:47:40 +0000 Subject: [PATCH 0560/2522] initialized pointers for params struct and added CHECK for SerializeToTString --- tensorflow/c/kernels/summary_op.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/c/kernels/summary_op.cc b/tensorflow/c/kernels/summary_op.cc index 9ebda7188cb..1bd14eaf9c9 100644 --- a/tensorflow/c/kernels/summary_op.cc +++ b/tensorflow/c/kernels/summary_op.cc @@ -30,7 +30,9 @@ namespace { TF_Tensor* tags; TF_Tensor* values; TF_Status* status; - Params(TF_OpKernelContext* ctx) { + Params(TF_OpKernelContext* ctx) : tags(nullptr), + values(nullptr), + status(nullptr) { status = TF_NewStatus(); TF_GetInput(ctx, 0, &tags, status); if (TF_GetCode(status) == TF_OK) { @@ -99,7 +101,7 @@ static void ScalarSummaryOp_Compute(void* kernel, TF_OpKernelContext* ctx) { } tensorflow::tstring* output_tstring = reinterpret_cast( TF_TensorData(summary_tensor)); - SerializeToTString(s, output_tstring); + CHECK(SerializeToTString(s, output_tstring)); TF_DeleteTensor(summary_tensor); } From 14418df4b18bfb38dd3e7bf2a888d3d0aeef91ff Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Wed, 15 Jul 2020 23:49:10 +0000 Subject: [PATCH 0561/2522] Update tensorflow/c/experimental/filesystem/filesystem_interface.h --- tensorflow/c/experimental/filesystem/filesystem_interface.h | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/c/experimental/filesystem/filesystem_interface.h b/tensorflow/c/experimental/filesystem/filesystem_interface.h index 1e62a948c6d..72265a47592 100644 --- a/tensorflow/c/experimental/filesystem/filesystem_interface.h +++ b/tensorflow/c/experimental/filesystem/filesystem_interface.h @@ -720,7 +720,6 @@ typedef struct TF_FilesystemOps { /// * Must set `status` to `TF_OK` if transaction successfuly finalized. /// * Must set `status` to `TF_NOT_FOUND` if token is invalid/not found /// * Might use any other error value for `status` to signal other errors. - int (*end_transaction)(const TF_Filesystem* filesystem, TF_TransactionToken* token, TF_Status* status); From 864be79fe44254717c39dfaafbedc445fcdfaef6 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Wed, 15 Jul 2020 23:49:17 +0000 Subject: [PATCH 0562/2522] Update tensorflow/c/experimental/filesystem/filesystem_interface.h --- tensorflow/c/experimental/filesystem/filesystem_interface.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/c/experimental/filesystem/filesystem_interface.h b/tensorflow/c/experimental/filesystem/filesystem_interface.h index 72265a47592..f85d92451a8 100644 --- a/tensorflow/c/experimental/filesystem/filesystem_interface.h +++ b/tensorflow/c/experimental/filesystem/filesystem_interface.h @@ -701,7 +701,7 @@ typedef struct TF_FilesystemOps { /// Plugins: /// * Must set `status` to `TF_OK` if transaction successfuly started. /// * Must set `status` to `TF_FAILED_PRECONDITION` if multiple transactions - /// are not supported + /// are not supported /// * Might use any other error value for `status` to signal other errors. int (*start_transaction)(const TF_Filesystem* filesystem, TF_TransactionToken** token, TF_Status* status); From 17520c056c5d729fdfc1de78eb32aa1bc8358772 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Wed, 15 Jul 2020 23:49:25 +0000 Subject: [PATCH 0563/2522] Update tensorflow/c/experimental/filesystem/filesystem_interface.h --- tensorflow/c/experimental/filesystem/filesystem_interface.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/c/experimental/filesystem/filesystem_interface.h b/tensorflow/c/experimental/filesystem/filesystem_interface.h index f85d92451a8..e5ff87ea2aa 100644 --- a/tensorflow/c/experimental/filesystem/filesystem_interface.h +++ b/tensorflow/c/experimental/filesystem/filesystem_interface.h @@ -783,7 +783,7 @@ typedef struct TF_FilesystemOps { /// /// Plugins: /// * Must set `status` to `TF_OK` if transaction found or successfuly - /// started. + /// started. /// * Must set `status` to `TF_NOT_FOUND` if `path` doesn't point to this /// filesystem /// * Must set `status` to `TF_FAILED_PRECONDITION` if file/directory is From 967f9e5b54fb02780d46f184c5181a8f16473c68 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Wed, 15 Jul 2020 23:49:31 +0000 Subject: [PATCH 0564/2522] Update tensorflow/c/experimental/filesystem/filesystem_interface.h --- tensorflow/c/experimental/filesystem/filesystem_interface.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/c/experimental/filesystem/filesystem_interface.h b/tensorflow/c/experimental/filesystem/filesystem_interface.h index e5ff87ea2aa..cb05494d168 100644 --- a/tensorflow/c/experimental/filesystem/filesystem_interface.h +++ b/tensorflow/c/experimental/filesystem/filesystem_interface.h @@ -759,7 +759,7 @@ typedef struct TF_FilesystemOps { /// Plugins: /// * Must set `status` to `TF_OK` if a transaction for path is found /// * Must set `status` to `TF_NOT_FOUND` if `path` is not part of any - /// transaction + /// transaction /// * Must set `status` to `TF_FAILED_PRECONDITION` if `path` is /// not in this filesystem. /// * Might use any other error value for `status` to signal other errors. From 093491a562cba2a89d4d5b9d79bc40fd2c077581 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Wed, 15 Jul 2020 16:54:54 -0700 Subject: [PATCH 0565/2522] Revert test script changes. PiperOrigin-RevId: 321465711 Change-Id: I922ffe1a11ad658bd87bd99ec310a6a0a9cade6e --- .../ubuntu_16/cpu_py36_full/nightly_release.sh | 9 ++++++++- .../ubuntu_16/gpu_py36_full/nightly_release.sh | 14 +++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nightly_release.sh index bd686959209..2b770867099 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/cpu_py36_full/nightly_release.sh @@ -27,11 +27,18 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=0 +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.6) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=release_cpu_linux tensorflow/tools/pip_package:build_pip_package +bazel build --config=opt --config=v2 \ + --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ + tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --cpu --nightly_flag diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh index c6fb6d469b1..87b2e52d88a 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh @@ -25,11 +25,23 @@ install_bazelisk python2.7 tensorflow/tools/ci_build/update_version.py --nightly # Run configure. +export TF_NEED_GCP=1 +export TF_NEED_HDFS=1 +export TF_NEED_S3=1 +export TF_NEED_CUDA=1 +export TF_CUDA_VERSION=10 +export TF_CUDNN_VERSION=7 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 +export TF_NEED_TENSORRT=1 +export TENSORRT_INSTALL_PATH=/usr/local/tensorrt +export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.6) yes "" | "$PYTHON_BIN_PATH" configure.py # Build the pip package -bazel build --config=release_gpu_linux tensorflow/tools/pip_package:build_pip_package +bazel build --config=opt --config=v2 \ + --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \ + tensorflow/tools/pip_package:build_pip_package ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --nightly_flag ./bazel-bin/tensorflow/tools/pip_package/build_pip_package pip_pkg --gpu --nightly_flag From 0d38f3e81c0cd5a05738a59a66dd6eb2b1a4cd35 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Wed, 15 Jul 2020 16:55:57 -0700 Subject: [PATCH 0566/2522] Splitting up tf_ops Given the switch intended for SOT, just split these alphabetically. PiperOrigin-RevId: 321465891 Change-Id: Ie261af7b2bd4a5a2dd80fc3865ed3159f0be155d --- tensorflow/compiler/mlir/tensorflow/BUILD | 157 +- .../compiler/mlir/tensorflow/ir/tf_op_base.td | 2 +- .../mlir/tensorflow/ir/tf_op_interfaces.cc | 20 + .../compiler/mlir/tensorflow/ir/tf_ops.cc | 4481 +---------------- .../compiler/mlir/tensorflow/ir/tf_ops.h | 14 +- .../compiler/mlir/tensorflow/ir/tf_ops_a_m.cc | 1807 +++++++ .../compiler/mlir/tensorflow/ir/tf_ops_a_m.h | 61 + .../mlir/tensorflow/ir/tf_ops_helpers.inc | 580 +++ .../compiler/mlir/tensorflow/ir/tf_ops_n_z.cc | 2270 +++++++++ .../compiler/mlir/tensorflow/ir/tf_ops_n_z.h | 51 + .../mlir/tensorflow/ir/tf_remaining_ops.cc | 87 + .../mlir/tensorflow/ir/tf_remaining_ops.h | 50 + 12 files changed, 5077 insertions(+), 4503 deletions(-) create mode 100644 tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.cc create mode 100644 tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc create mode 100644 tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h create mode 100644 tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc create mode 100644 tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc create mode 100644 tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h create mode 100644 tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.cc create mode 100644 tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.h diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index 14d7faecdca..c5e2b089c0d 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -88,6 +88,7 @@ gentbl( cc_library( name = "tensorflow_op_interfaces", srcs = [ + "ir/tf_op_interfaces.cc", "ir/tf_op_interfaces.cc.inc", "ir/tf_op_interfaces.h.inc", "ir/tf_verifiers.cc", @@ -105,15 +106,67 @@ cc_library( ) gentbl( - name = "tensorflow_ops_inc_gen", + name = "tensorflow_all_ops_inc_gen", tbl_outs = [ ( "-gen-op-decls", - "ir/tf_ops.h.inc", + "ir/tf_all_ops.h.inc", ), ( "-gen-op-defs", - "ir/tf_ops.cc.inc", + "ir/tf_all_ops.cc.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "ir/tf_ops.td", + td_srcs = [ + ":tensorflow_ops_td_files", + ], +) + +# We only shard tf_op on name for build performance reasons. +tf_ops_category_list = [ + { + "name": "ops_a_m", + "include": "tf.[A-M].*$$", + }, + { + "name": "ops_n_z", + "include": "tf.[N-Z].*$$", + }, +] + +[[ + gentbl( + name = "tensorflow_" + target["name"] + "_inc_gen", + tbl_outs = [ + ( + "-gen-op-decls -op-include-regex='" + target["include"] + "'", + "ir/tf_" + target["name"] + ".h.inc", + ), + ( + "-gen-op-defs -op-include-regex='" + target["include"] + "'", + "ir/tf_" + target["name"] + ".cc.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "ir/tf_ops.td", + td_srcs = [ + ":tensorflow_ops_td_files", + ], + ), +] for target in tf_ops_category_list] + +gentbl( + name = "tensorflow_remaining_ops_inc_gen", + tbl_outs = [ + ( + "-gen-op-decls -op-exclude-regex='" + "|".join([target["include"] for target in tf_ops_category_list]) + "' ", + "ir/tf_remaining_ops.h.inc", + ), + ( + "-gen-op-defs -op-exclude-regex='" + "|".join([target["include"] for target in tf_ops_category_list]) + "' ", + "ir/tf_remaining_ops.cc.inc", ), ], tblgen = "@llvm-project//mlir:mlir-tblgen", @@ -179,7 +232,7 @@ gentbl( name = "tensorflow_device_ops_inc_gen", tbl_outs = [ ( - "-gen-op-decls", + "-gen-op-decls ", "ir/tf_device.h.inc", ), ( @@ -284,24 +337,67 @@ cc_library( ], ) +[[ + cc_library( + name = "tensorflow_" + target["name"], + srcs = [ + "ir/tf_ops.h", + "ir/tf_remaining_ops.h", + "ir/tf_" + target["name"] + ".cc", + "ir/tf_" + target["name"] + ".cc.inc", + ] + ["ir/tf_" + target["name"] + ".h" for target in tf_ops_category_list], + hdrs = [ + ], + textual_hdrs = [ + "ir/tf_all_ops.h.inc", + "ir/tf_ops_helpers.inc", + "ir/tf_remaining_ops.h.inc", + ] + ["ir/tf_" + target["name"] + ".h.inc" for target in tf_ops_category_list], + deps = [ + ":tensorflow_attributes", + ":tensorflow_canonicalize_inc_gen", + ":tensorflow_op_interfaces", + ":tensorflow_op_interfaces_inc_gen", + ":tensorflow_side_effects", + ":tensorflow_structs", + ":tensorflow_traits", + ":tensorflow_types", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:DerivedAttributeOpInterface", + "@llvm-project//mlir:Dialect", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:InferTypeOpInterface", + "@llvm-project//mlir:LoopLikeInterface", + "@llvm-project//mlir:Parser", + "@llvm-project//mlir:SideEffects", + "@llvm-project//mlir:StandardOps", + "@llvm-project//mlir:Support", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ] + [":tensorflow_" + target["name"] + "_inc_gen"], + ), +] for target in tf_ops_category_list] + cc_library( - name = "tensorflow_ops", + name = "tensorflow_remaining_ops", srcs = [ - "ir/tf_ops.cc", - "ir/tf_ops.cc.inc", "ir/tf_ops.h", - ], + "ir/tf_remaining_ops.h", + "ir/tf_remaining_ops.cc", + ] + ["ir/tf_" + target["name"] + ".h" for target in tf_ops_category_list], hdrs = [ ], textual_hdrs = [ - "ir/tf_ops.h.inc", - ], + "ir/tf_all_ops.h.inc", + "ir/tf_ops_helpers.inc", + "ir/tf_remaining_ops.h.inc", + ] + ["ir/tf_" + target["name"] + ".h.inc" for target in tf_ops_category_list], deps = [ ":tensorflow_attributes", ":tensorflow_canonicalize_inc_gen", ":tensorflow_op_interfaces", ":tensorflow_op_interfaces_inc_gen", - ":tensorflow_ops_inc_gen", + ":tensorflow_remaining_ops_inc_gen", ":tensorflow_side_effects", ":tensorflow_structs", ":tensorflow_traits", @@ -321,6 +417,43 @@ cc_library( ], ) +cc_library( + name = "tensorflow_ops", + srcs = [ + "ir/tf_ops.cc", + "ir/tf_ops.h", + ], + textual_hdrs = [ + "ir/tf_all_ops.h.inc", + "ir/tf_remaining_ops.h", + ] + ["ir/tf_" + target["name"] + ".h" for target in tf_ops_category_list], + deps = [ + ":tensorflow_all_ops_inc_gen", + ":tensorflow_remaining_ops_inc_gen", + ":tensorflow_attributes", + ":tensorflow_canonicalize_inc_gen", + ":tensorflow_op_interfaces", + ":tensorflow_op_interfaces_inc_gen", + ":tensorflow_side_effects", + ":tensorflow_structs", + ":tensorflow_traits", + ":tensorflow_types", + ":tensorflow_remaining_ops", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:DerivedAttributeOpInterface", + "@llvm-project//mlir:Dialect", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:InferTypeOpInterface", + "@llvm-project//mlir:LoopLikeInterface", + "@llvm-project//mlir:Parser", + "@llvm-project//mlir:SideEffects", + "@llvm-project//mlir:StandardOps", + "@llvm-project//mlir:Support", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ] + [":tensorflow_" + target["name"] for target in tf_ops_category_list], +) + cc_library( name = "tensorflow_structs", srcs = [ @@ -393,12 +526,14 @@ cc_library( includes = ["include"], deps = [ ":error_util", + ":tensorflow_all_ops_inc_gen", ":tensorflow_attributes", ":tensorflow_canonicalize_inc_gen", ":tensorflow_device_ops_inc_gen", ":tensorflow_executor_inc_gen", ":tensorflow_op_interfaces", ":tensorflow_ops", + ":tensorflow_side_effects", ":tensorflow_structs", ":tensorflow_traits", ":tensorflow_types", diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td index 17424b54fc2..aaaf9c2fc5c 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_base.td @@ -332,7 +332,7 @@ class TF_DerivedOperandTypeListAttr : DerivedAttr< // This returns a list of shapes so it is used for variadic operands that // can have different shapes. class TF_DerivedOperandShapeListAttr : DerivedAttr< - "mlir::TF::OperandShapeRange", + "::mlir::TF::OperandShapeRange", "auto values = getODSOperands(" # idx # ");\n" "return {mlir::TF::OperandShapeIterator(values.begin()), " "mlir::TF::OperandShapeIterator(values.end())};", diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.cc new file mode 100644 index 00000000000..3e99f1e162b --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.cc @@ -0,0 +1,20 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.h" + +namespace mlir::TF { +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.cc.inc" +} // namespace mlir::TF diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc index 98bc6b3089a..61935153c18 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc @@ -69,4483 +69,6 @@ limitations under the License. namespace mlir { namespace TF { -// Propagates underscore and device attributes from src to dst. -// TODO(b/158769932): This should be a general feature instead post some policy -// discussion. -static void PropagateAttributes(Operation *src, Operation *dst) { - auto device = mlir::Identifier::get("device", src->getContext()); - for (auto named_attr : src->getAttrs()) { - if (*named_attr.first.begin() == '_' || named_attr.first == device) - dst->setAttr(named_attr.first, named_attr.second); - } -} - -//===----------------------------------------------------------------------===// -// TF op helper functions -//===----------------------------------------------------------------------===// - -// Returns the RankedTensorType for the given operand. TensorFlow constant ops -// may have non-static shape because the shape is not propagated during constant -// folding. If the defining op for the given operand is a constant op, this -// routine uses the constant op's attribute to get the actual shape. -static RankedTensorType GetRankedTensorTypeForOperand(Value operand) { - DenseElementsAttr attr; - if (matchPattern(operand, m_Constant(&attr))) { - return attr.getType().dyn_cast(); - } - return operand.getType().dyn_cast(); -} - -// Returns true if the given `value` is of ranked float tensor type with the -// given `rank`. -static inline bool IsOfRankedFloatTensorType(RankedTensorType type, int rank) { - return type && type.getRank() == rank && - type.getElementType().isa(); -} - -// Returns true if the given `value` has the specified rank or has unranked -// type. -static inline bool IsOfRankOrUnranked(Value value, int64_t rank) { - RankedTensorType type = GetRankedTensorTypeForOperand(value); - return !type || type.getRank() == rank; -} - -// Returns true if the given `value` has at least the specified rank or has -// unranked type. -static inline bool HasRankAtLeast(Value value, int64_t rank) { - RankedTensorType type = GetRankedTensorTypeForOperand(value); - return !type || type.getRank() >= rank; -} - -// Returns true if the given `value` has at most the specified rank or has -// unranked type. -static inline bool HasRankAtMost(Value value, int64_t rank) { - RankedTensorType type = GetRankedTensorTypeForOperand(value); - return !type || type.getRank() <= rank; -} - -static bool IsUnknownDimOrRank(int64_t dim_or_rank) { - return dim_or_rank == -1; -} - -// Returns the tf.Equal/tf.NotEqual result type given `x` and `y` and inputs. If -// `incompatible_shape_error` is true, reports error if `x` and `y` has -// incompatible shapes. Otherwise, returns a tensor type with unknown rank. -static Type DeduceEqualCmpOpType(Builder *builder, Location loc, Value x, - Value y, BoolAttr incompatible_shape_error) { - auto result_type = - OpTrait::util::getBroadcastedType(x.getType(), y.getType()); - if (!result_type) { - if (incompatible_shape_error.getValue()) { - mlir::emitError(loc, "non-broadcastable operands"); - } else { - return UnrankedTensorType::get(builder->getI1Type()); - } - } - - auto ranked_type = result_type.dyn_cast(); - if (!ranked_type) return UnrankedTensorType::get(builder->getI1Type()); - - return RankedTensorType::get(ranked_type.getShape(), builder->getI1Type()); -} - -// Returns dimension index for the given TensorFlow axis that supports negative -// indexing. -static int64_t GetDimForAxis(int64_t axis, int64_t rank) { - return axis >= 0 ? axis : axis + rank; -} - -// Infers output type for reduction ops such as SumOp, MaxOp etc. -// TODO(b/e667204a): Move this logic to shape inference once it supports custom -// inference functions. -static Type InferReductionOpType(Value input, Value reduction_indices, - BoolAttr keep_dims, Builder *builder) { - Type input_ty = input.getType(); - Type element_ty = getElementTypeOrSelf(input_ty); - - // Output type is unranked if input type is not ranked. - auto ranked_ty = input_ty.dyn_cast(); - if (!ranked_ty) return UnrankedTensorType::get(element_ty); - int64_t rank = ranked_ty.getRank(); - - DenseIntElementsAttr indices; - if (!matchPattern(reduction_indices, m_Constant(&indices))) { - // Output type is unranked if reduction indices are not constant and reduced - // dimensions are not kept. - if (!keep_dims.getValue()) return UnrankedTensorType::get(element_ty); - - // Otherwise, output type has same rank as the input. - return RankedTensorType::get(SmallVector(rank, -1), element_ty); - } - - int64_t num_reduce_dim = 0; - llvm::SmallVector is_reduce_dim(rank, false); - for (const APInt &index : indices.getValues()) { - int64_t dim = GetDimForAxis(index.getSExtValue(), rank); - // Invalid input. - if (dim < 0 || dim >= rank) return UnrankedTensorType::get(element_ty); - - if (!is_reduce_dim[dim]) { - is_reduce_dim[dim] = true; - num_reduce_dim++; - } - } - - ArrayRef shape = ranked_ty.getShape(); - SmallVector out_shape; - out_shape.reserve(rank - (keep_dims.getValue() ? 0 : num_reduce_dim)); - for (int64_t i = 0; i < rank; ++i) { - if (!is_reduce_dim[i]) - out_shape.push_back(shape[i]); - else if (keep_dims.getValue()) - out_shape.push_back(1); - } - return RankedTensorType::get(out_shape, element_ty); -} - -// Verifies that the given types are cast compatible. If not, emits appropriate -// error for the given op. If mask_one_dim is set to true, then the types are -// allowed to have one mismatching dimension. Masking one of the dimensions is -// useful for ops like Concat that requires all ranked inputs to have the same -// rank and match dimension sizes for all but one of the dimensions. -static LogicalResult VerifyTypesCompatibility( - Operation::operand_type_range types, bool mask_one_dim, Operation *op) { - constexpr int64_t kUninitialized = -1; - int64_t common_rank = kUninitialized; - llvm::SmallVector common_dims; - int64_t dim_to_mask = kUninitialized; - - // Initialize common_rank with rank of the first ranked type and verify that - // following ranked types have the same rank. - // Similarly, initialize each of the dimensions with the first type that has - // the dimension size available and verify that all following types have the - // same size for the dimension. However, if mask_one_dim is true, note down - // the dimension index on the first mismatch and ignore dimension at that - // index in following types. - for (Type ty : types) { - RankedTensorType ranked_ty = ty.dyn_cast(); - if (!ranked_ty) continue; - - int64_t rank = ranked_ty.getRank(); - if (common_rank == kUninitialized) { - common_rank = rank; - common_dims.resize(common_rank, kUninitialized); - } else if (common_rank != rank) { - return op->emitError() - << "operand type " << ranked_ty - << " is not compatible with preceding operands; expected rank: " - << common_rank; - } - - for (int64_t i = 0, e = common_rank; i != e; i++) { - if (i == dim_to_mask) continue; - - int64_t dim = ranked_ty.getDimSize(i); - if (dim == kUninitialized) continue; - - int64_t &common_dim = common_dims[i]; - if (common_dim == kUninitialized) { - common_dim = dim; - } else if (common_dim != dim) { - // If mask_one_dim is true, do not emit an error if this is the only - // dimension with mismatches. Note down the dimension to mask it from - // the following types. - if (mask_one_dim && dim_to_mask == kUninitialized) { - dim_to_mask = i; - continue; - } - - return op->emitError() << "operand type " << ranked_ty - << " is not compatible with preceding operands; " - "expected dimension at index " - << i << ": " << common_dim; - } - } - } - return success(); -} - -// This is a helper for the Select to SelectV2 canonicalization. The `data` rank -// refers to the rank of `t`/`e` (these two inputs have equal rank; this is -// checked in the verifier). -// -// In most cases, the predicate for Select can be used directly as the predicate -// for SelectV2. However, there is one case that varies, which is when the -// predicate is a tensor and the data is multidimensional. In this case, Select -// op semantics dictate that the predicate tensor length must match the size of -// the first data dimension. This varies from normal broadcasting semantics -// (which are used in SelectV2), so we must reshape the tensor in this case to -// be compatible. -static Value ReshapeSelectPredIfNecessary(OpBuilder *builder, Location loc, - Value cond, int data_rank) { - auto cond_tensor = cond.getType().cast(); - // Reshape is only needed in the case that the cond rank is 1 (i.e. it is - // a vector) AND t/e rank is > 1. - if (cond_tensor.getRank() != 1 || data_rank <= 1) { - // No reshape necessary. Leave cond as it is. - return cond; - } - - // This is the case where a reshape is needed. We want to construct the - // shape [x,1,...1], where x is the value in the pred tensor and the - // length of the shape is equal to data_rank. - SmallVector shape(data_rank, 1); - shape[0] = cond_tensor.getShape().front(); - auto new_shape_type = - RankedTensorType::get({data_rank}, builder->getIntegerType(64)); - auto shape_attr = DenseIntElementsAttr::get(new_shape_type, shape); - auto new_shape = builder->create(loc, shape_attr); - return builder->create(loc, cond, new_shape); -} - -//===----------------------------------------------------------------------===// -// Helper functions detect device capabilities from RuntimeDevices. -//===----------------------------------------------------------------------===// - -namespace { -using DeviceNameUtils = ::tensorflow::DeviceNameUtils; -using ParsedName = ::tensorflow::DeviceNameUtils::ParsedName; - -bool IsGpuDevice(const DeviceNameUtils::ParsedName &device) { - return device.type == ::tensorflow::DEVICE_GPU; -} - -} // namespace - -// Returns true if at least one GPU device is available at runtime. -bool CanUseGpuDevice(const RuntimeDevices &devices) { - return llvm::any_of(devices.device_names(), IsGpuDevice); -} - -// Returns true if all of the GPUs available at runtime support TensorCores -// (NVIDIA compute capability >= 7.0). -bool CanUseTensorCores(const RuntimeDevices &devices) { - auto has_tensor_cores = [&](const DeviceNameUtils::ParsedName &device) { - auto md = devices.GetGpuDeviceMetadata(device); - return md ? md->cc_major().getInt() >= 7 : false; - }; - return llvm::all_of( - llvm::make_filter_range(devices.device_names(), IsGpuDevice), - has_tensor_cores); -} - -// Returns true if operation does not have explicit device placement that would -// prevent it from running on GPU device. -bool CanUseGpuDevice(Operation *op) { - auto device_attr = op->getAttrOfType("device"); - if (!device_attr || device_attr.getValue().empty()) return true; - - DeviceNameUtils::ParsedName device; - if (!DeviceNameUtils::ParseFullName(device_attr.getValue().str(), &device)) - return false; - - // We can't use GPU if operation explicitly placed on non-GPU device. - return !device.has_type || device.type == ::tensorflow::DEVICE_GPU; -} - -//===----------------------------------------------------------------------===// -// TF op helper functions to work with layout transformation. -//===----------------------------------------------------------------------===// - -SmallVector ReversePermutation(ArrayRef permutation) { - SmallVector reverse(permutation.size()); - for (size_t i = 0; i < permutation.size(); ++i) { - reverse[permutation[i]] = i; - } - return reverse; -} - -SmallVector GetDataFormatPermutation(StringRef from, StringRef to) { - if (from == "NHWC" && to == "NCHW") { - return {0, 3, 1, 2}; - } else if (from == "NCHW" && to == "NHWC") { - return {0, 2, 3, 1}; - } else { - return {}; - } -} - -// Shuffle elements in the `attr` according to the permutation. Optional -// `inner_size` allows to shuffle array attributes created from rank 2 tensors -// on outer dimension only. -ArrayAttr ShuffleArrayAttr(ArrayAttr attr, ArrayRef permutation, - int inner_size = 1) { - if (attr.size() == 0) return attr; - - assert(attr.size() % inner_size == 0); - assert(attr.size() / inner_size == permutation.size()); - - SmallVector values{attr.begin(), attr.end()}; - SmallVector shuffled(values.size()); - - for (size_t i = 0; i < permutation.size(); ++i) { - for (size_t j = 0; j < inner_size; ++j) { - shuffled[i * inner_size + j] = values[permutation[i] * inner_size + j]; - } - } - - return ArrayAttr::get(shuffled, attr.getContext()); -} - -// Shuffle ranked tensor dimensions according to the permutation. -Type ShuffleRankedTensorType(Type type, ArrayRef permutation) { - if (auto ranked_type = type.dyn_cast()) { - ArrayRef shape = ranked_type.getShape(); - assert(permutation.size() == shape.size()); - - SmallVector new_shape(permutation.size()); - for (size_t i = 0; i < permutation.size(); ++i) - new_shape[i] = shape[permutation[i]]; - - return RankedTensorType::get(new_shape, ranked_type.getElementType()); - } - - return type; -} - -static bool AreCancellablePermutations(DenseIntElementsAttr perm0, - DenseIntElementsAttr perm1) { - if (perm0.getNumElements() == 0 || perm1.getNumElements() == 0) return false; - if (perm0.getNumElements() != perm1.getNumElements()) return false; - - SmallVector perm0_values; - for (const auto &value : perm0.getIntValues()) - perm0_values.push_back(value.getSExtValue()); - - SmallVector perm1_values; - for (const auto &value : perm1.getIntValues()) - perm1_values.push_back(value.getSExtValue()); - - for (int i = 0; i < perm0_values.size(); ++i) { - if (perm0_values[perm1_values[i]] != i) return false; - } - - return true; -} - -// Default implementation of `LayoutSensitiveInterface::UpdateDataFormat` for -// layout sensitive operations that do not have any additional layout dependent -// attributes besides `data_format` string. -template -LogicalResult UpdateDataFormat(StringRef data_format, Op *op) { - auto perm = GetDataFormatPermutation(op->data_format(), data_format); - if (perm.empty()) return failure(); - - // Update data format attribute. - op->setAttr("data_format", StringAttr::get(data_format, op->getContext())); - - // Update types for all layout sensitive results. - auto layout_sensitive = cast(op->getOperation()); - for (unsigned idx : layout_sensitive.GetLayoutDependentResults()) { - OpResult result = op->getOperation()->getResult(idx); - result.setType(ShuffleRankedTensorType(result.getType(), perm)); - } - - return success(); -} - -// Default implementation for folding operand transpose into the operation. -// See `FoldOperandsTransposeInterface::FoldOperandsPermutation`. -template -LogicalResult FoldOperandsPermutation( - ArrayRef permutation, Op *op, - ArrayRef> shuffle_attrs = {}) { - MLIRContext *context = op->template getParentOfType().getContext(); - - // We only support NHWC <-> NCHW permutations. - static constexpr std::array kNchwToNhwc = {0, 2, 3, 1}; - static constexpr std::array kNhwcToNchw = {0, 3, 1, 2}; - - // Operation data format after folding `permutation`. - StringRef target_data_format = [&]() -> StringRef { - if (op->data_format() == "NHWC" && permutation.equals(kNchwToNhwc)) { - return "NCHW"; // cancel NCHW->NHWC operand permutation - } else if (op->data_format() == "NCHW" && permutation.equals(kNhwcToNchw)) { - return "NHWC"; // cancel NHWC->NCHW operand permutation - } else { - return ""; - } - }(); - if (target_data_format.empty()) return failure(); - - // To fold operand `permutation` into the `op` we need shuffle all layout - // dependent attributes and types with a reverse permutation, and change - // operation data format to `target_data_format`. - // - // Example: - // %1 = SomeOp(...) {data_format = NHWC} - // %2 = Transpose(%1) {permutation = NHWC->NCHW} - // %3 = Op(%2) {data_format = NCHW} - // - // To bypass %2 we have to change data format to shuffle data format from NCHW - // to NHWC, which is the reverse of operand permutation (function argument). - auto reverse_permutation = - GetDataFormatPermutation(op->data_format(), target_data_format); - if (reverse_permutation.empty()) return failure(); - - op->setAttr("data_format", StringAttr::get(target_data_format, context)); - - for (auto pair : shuffle_attrs) { - StringRef attr_name = pair.first; - ArrayAttr attr_value = pair.second; - op->setAttr(attr_name, ShuffleArrayAttr(attr_value, reverse_permutation)); - } - - auto fold = cast(op->getOperation()); - for (unsigned idx : fold.GetLayoutDependentResults()) { - OpResult result = op->getOperation()->getResult(idx); - result.setType( - ShuffleRankedTensorType(result.getType(), reverse_permutation)); - } - - return success(); -} - -//===----------------------------------------------------------------------===// -// Rewrite Pattern for removing trivial Arithmetic op. -//===----------------------------------------------------------------------===// - -namespace { -// Fold Arithmetic Op if one of the operands is a constant known to be an -// Identity (e.g. X+0, X*1, etc...). For commutative operations fold if -// known identity value is either lhs or rhs. -template < - typename OpT, - typename std::enable_if::value>::type * = nullptr> -OpFoldResult IdentityArithmeticOpFolder(OpT arithmetic_op, - ArrayRef operands) { - auto lhs_type = arithmetic_op.x().getType().template cast(); - auto rhs_type = arithmetic_op.y().getType().template cast(); - auto result_type = - arithmetic_op.getResult().getType().template cast(); - - // We can fold arithmetic operation only of we can prove that we will not - // accidentally hide a broadcasting error. - auto is_valid_broadcasting = [](ShapedType operand_ty, ShapedType identity_ty, - ShapedType result_ty) -> bool { - // Scalar identity is broadcastable to any operand shape, we only need to - // check that operand has the same shape as a result. - bool scalar_identity = identity_ty.hasRank() && identity_ty.getRank() == 0; - if (scalar_identity) return operand_ty == result_ty; - - // If identity is not a scalar, we must verify that all shapes are equal - // and statically known. - // - // TODO(ezhulenev): Fold if identity shape is statically know to be - // broadcastable to the operand shape. - return operand_ty == result_ty && identity_ty == result_ty && - result_ty.hasStaticShape(); - }; - - // Check that we have a constant operand on one side (candidate for identity). - const bool is_commutative = - (std::is_same::value || std::is_same::value); - auto lhs_attr = operands[0].dyn_cast_or_null(); - auto rhs_attr = operands[1].dyn_cast_or_null(); - if (!rhs_attr && !(is_commutative && lhs_attr)) return {}; - - // Mul and Div ops have identity value one while AddV2 and SubOp have identity - // value zero. - const int identity = - (std::is_same::value || std::is_same::value || - std::is_same::value) - ? 1 - : 0; - - Type element_ty = lhs_type.getElementType(); - Attribute identity_attr; - if (auto ty = element_ty.template dyn_cast()) { - identity_attr = FloatAttr::get(ty, static_cast(identity)); - } else if (auto ty = element_ty.template dyn_cast()) { - identity_attr = IntegerAttr::get(ty, static_cast(identity)); - } else { - return {}; - } - - // Fold: Op(Operand, Identity) -> Operand. - if (rhs_attr && is_valid_broadcasting(lhs_type, rhs_type, result_type)) { - if (rhs_attr.isSplat() && rhs_attr.getSplatValue() == identity_attr) - return arithmetic_op.x(); - } - - // Fold: Op(Identity, Operand) -> Operand for commutative operations. - if (lhs_attr && is_commutative && - is_valid_broadcasting(rhs_type, lhs_type, result_type)) { - if (lhs_attr.isSplat() && lhs_attr.getSplatValue() == identity_attr) - return arithmetic_op.y(); - } - - return {}; -} -} // namespace - -namespace { -#include "tensorflow/compiler/mlir/tensorflow/transforms/generated_canonicalize.inc" -} // namespace - -//===----------------------------------------------------------------------===// -// AddOp -//===----------------------------------------------------------------------===// - -void AddOp::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { - results.insert(context); -} - -//===----------------------------------------------------------------------===// -// AddNOp -//===----------------------------------------------------------------------===// - -OpFoldResult AddNOp::fold(ArrayRef operands) { - if (operands.size() == 1) return *inputs().begin(); - return {}; -} - -//===----------------------------------------------------------------------===// -// AddV2Op -//===----------------------------------------------------------------------===// - -void AddV2Op::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { - results.insert(context); -} - -OpFoldResult AddV2Op::fold(ArrayRef operands) { - return IdentityArithmeticOpFolder(*this, operands); -} - -//===----------------------------------------------------------------------===// -// AllOp -//===----------------------------------------------------------------------===// - -// Verifies an reduction op's `input` and reduction `dims`. -static LogicalResult VerifyReductionInputAndDims(Value input, Value dims, - Location loc) { - auto dims_type = dims.getType().dyn_cast(); - if (!dims_type) return success(); - if (dims_type.getRank() > 1) - return emitError(loc, "dimensions can only be 0D or 1D tensor"); - - auto input_type = input.getType().dyn_cast(); - if (!input_type) return success(); - int64_t rank = input_type.getRank(); - - DenseIntElementsAttr dims_attr; - if (!matchPattern(dims, m_Constant(&dims_attr))) return success(); - for (const auto &dim_pair : llvm::enumerate(dims_attr)) { - int64_t cur_dim = dim_pair.value().getSExtValue(); - if (cur_dim < -rank || cur_dim >= rank) - return emitError(loc) - << dim_pair.index() << "-th dimension should be in the range of [-" - << rank << ", " << rank << ")"; - } - - return success(); -} - -static LogicalResult Verify(AllOp op) { - return VerifyReductionInputAndDims(op.input(), op.reduction_indices(), - op.getLoc()); -} - -//===----------------------------------------------------------------------===// -// AnyOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(AnyOp op) { - return VerifyReductionInputAndDims(op.input(), op.reduction_indices(), - op.getLoc()); -} - -//===----------------------------------------------------------------------===// -// AssertOp -//===----------------------------------------------------------------------===// - -namespace { - -// Removes Assert with constant true predicate. -struct AssertWithTrue : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(AssertOp op, - PatternRewriter &rewriter) const override { - ElementsAttr cst; - if (matchPattern(op.condition(), m_Constant(&cst))) { - if (cst.getValue({}).getValue()) { - rewriter.eraseOp(op); - return success(); - } - } - return failure(); - } -}; -} // namespace - -void AssertOp::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { - results.insert(context); -} - -//===----------------------------------------------------------------------===// -// BatchMatMulOp -//===----------------------------------------------------------------------===// - -void BatchMatMulOp::getCanonicalizationPatterns( - OwningRewritePatternList &results, MLIRContext *context) { - results.insert(context); -} - -//===----------------------------------------------------------------------===// -// BatchMatMulV2Op -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(BatchMatMulV2Op op) { - if (!HasRankAtLeast(op.x(), 2)) { - return op.emitOpError("requires lhs operand to have rank at least two"); - } - if (!HasRankAtLeast(op.y(), 2)) { - return op.emitOpError("requires rhs operand to have rank at least two"); - } - return success(); -} - -void BatchMatMulV2Op::getCanonicalizationPatterns( - OwningRewritePatternList &results, MLIRContext *context) { - results.insert(context); -} - -//===----------------------------------------------------------------------===// -// BatchToSpaceOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(BatchToSpaceOp op) { - // Op already has a constraint that block_size >= 2. - int64_t block_size = op.block_size().getSExtValue(); - - llvm::SmallVector input_shape(4, ShapedType::kDynamicSize); - auto input_type = op.input().getType().cast(); - if (input_type.hasRank()) { - if (input_type.getRank() != 4) - return op.emitOpError() - << "requires input to be a 4D tensor, but got " << input_type; - - int64_t input_batch = input_type.getDimSize(0); - if (input_batch != ShapedType::kDynamicSize && - input_batch % (block_size * block_size) != 0) { - return op.emitOpError() - << "requires input batch (dimension 0) to be evenly divisible " - "by (block_size * block_size), but got input batch " - << input_batch << " and block_size " << block_size; - } - - input_shape.assign(input_type.getShape().begin(), - input_type.getShape().end()); - } - - auto crops_type = op.crops().getType().cast(); - if (crops_type.hasRank()) { - if (crops_type.getRank() != 2) - return op.emitOpError() - << "requires crops to be a 2D tensor, but got " << crops_type; - - auto dim_of_size = [&](int64_t dim, int64_t size) { - if (crops_type.isDynamicDim(dim)) return true; - return crops_type.getDimSize(dim) == size; - }; - if (!dim_of_size(0, 2) || !dim_of_size(1, 2)) - return op.emitOpError() - << "requires crops to be a tensor<2x2>, but got " << crops_type; - } - - DenseIntElementsAttr crops_attr; - // Crops are defined as [[crop_top, crop_bottom], [crop_left, crop_right]], - // and flattened as [crop_top, crop_bottom, crop_left, crop_right] - llvm::SmallVector crops_values; - if (matchPattern(op.crops(), m_Constant(&crops_attr))) { - assert(crops_attr.getNumElements() == 4 && - "tf.BatchToSpace crops must have 4 elements"); - - auto crops_range = crops_attr.getIntValues(); - for (const auto &crops_value : crops_range) { - int64_t crops_value_int = crops_value.getSExtValue(); - if (crops_value_int < 0) - return op.emitOpError() - << "requires all crop values to be nonnegative, but got " - << crops_attr; - - crops_values.push_back(crops_value_int); - } - } - - auto output_type = op.output().getType().cast(); - if (output_type.hasRank()) { - if (output_type.getRank() != 4) - return op.emitOpError() - << "requires output to be a 4D tensor, but got " << output_type; - - auto static_dims = [](int64_t dim_a, int64_t dim_b) { - return dim_a != ShapedType::kDynamicSize && - dim_b != ShapedType::kDynamicSize; - }; - - auto output_shape = output_type.getShape(); - - // output batch = input batch / (block_size * block_size). - int64_t input_batch = input_shape[0]; - int64_t output_batch = output_shape[0]; - if (static_dims(input_batch, output_batch) && - (output_batch * block_size * block_size) != input_batch) - return op.emitOpError() - << "requires output batch (dimension 0) to be equal to input " - "batch (dimension 0) / (block_size * block_size), but got " - "output batch " - << output_batch << ", input batch " << input_batch - << ", and block_size " << block_size; - - auto check_spatial_dim = [&](int64_t spatial_dim_index, - llvm::StringRef dim_name, - llvm::StringRef crop_a_name, - llvm::StringRef crop_b_name) -> LogicalResult { - int64_t input_dim = input_shape[spatial_dim_index]; - int64_t output_dim = output_shape[spatial_dim_index]; - if (!static_dims(input_dim, output_dim)) return success(); - - int64_t input_dim_pad = input_dim * block_size; - // If crops are unknown, the maximum output spatial dim size is input - // spatial dim size * block_size, as crops can be minimum 0. - if (crops_values.empty() && output_dim > input_dim * block_size) - return op.emitOpError() - << "requires output " << dim_name << " (dimension " - << spatial_dim_index << ") to be less than or equal to input " - << dim_name << " (dimension " << spatial_dim_index - << ") * block_size, but got output " << dim_name << " " - << output_dim << ", input " << dim_name << " " << input_dim - << ", and block_size " << block_size; - - if (!crops_values.empty()) { - // output spatial dim = input spatial dim * block_size - crops. - int64_t crop_a = crops_values[2 * (spatial_dim_index - 1)]; - int64_t crop_b = crops_values[2 * (spatial_dim_index - 1) + 1]; - if (output_dim != input_dim_pad - crop_a - crop_b) - return op.emitOpError() - << "requires output " << dim_name << " (dimension " - << spatial_dim_index << ") to be equal to input " << dim_name - << " (dimension " << spatial_dim_index << ") * block_size - " - << crop_a_name << " - " << crop_b_name << ", but got output " - << dim_name << " " << output_dim << ", input " << dim_name - << " " << input_dim << ", " << crop_a_name << " " << crop_a - << ", " << crop_b_name << " " << crop_b << ", and block_size " - << block_size; - } - - return success(); - }; - - if (failed(check_spatial_dim(1, "height", "crop_top", "crop_bottom")) || - failed(check_spatial_dim(2, "width", "crop_left", "crop_right"))) - return failure(); - - int64_t input_depth = input_shape[3]; - int64_t output_depth = output_shape[3]; - if (static_dims(input_depth, output_depth) && output_depth != input_depth) - return op.emitOpError() - << "requires output depth (dimension 3) to be equal to input " - "depth (dimension 3), but got output depth " - << output_depth << " and input depth " << input_depth; - } - - return success(); -} - -void BatchToSpaceOp::getCanonicalizationPatterns( - OwningRewritePatternList &results, MLIRContext *context) { - results.insert(context); -} - -//===----------------------------------------------------------------------===// -// BiasAddOp -//===----------------------------------------------------------------------===// - -// Verifies that, -// * the value and bias operands have valid ranks or are unranked. -// * Channel dimension of the value operand and length of bias matches if they -// are not unknown. -// -static LogicalResult Verify(BiasAddOp op) { - StringRef format = op.data_format(); - if (format == "NHWC") { - if (!HasRankAtLeast(op.value(), 2)) - return op.emitOpError( - "requires value operand to have rank at least two with `NHWC` data " - "format"); - } else { - // Op definition requires data_format to be either NHWC or NCHW. - DCHECK_EQ(format.str(), "NCHW"); - if (!HasRankAtLeast(op.value(), 3)) - return op.emitOpError( - "requires value operand to have rank at least three with `NCHW` data " - "format"); - } - - if (!IsOfRankOrUnranked(op.bias(), 1)) - return op.emitOpError("requires bias operand to have rank exactly one"); - - RankedTensorType value_ty = op.value().getType().dyn_cast(); - RankedTensorType bias_ty = op.bias().getType().dyn_cast(); - if (!bias_ty || !value_ty) return success(); - - // TODO(hinsu): Leverage tensor_format.h utility in TensorFlow to compute - // dimension indices based on format. - int64_t feature_dim_idx = format == "NHWC" ? value_ty.getRank() - 1 : 1; - int64_t feature_dim = value_ty.getDimSize(feature_dim_idx); - int64_t bias_len = bias_ty.getDimSize(0); - if (feature_dim != -1 && bias_len != -1 && feature_dim != bias_len) { - return op.emitOpError() - << "requires channel dimension and feature dimension to match; " - "found " - << feature_dim << " and " << bias_len << ", respectively"; - } - return success(); -} - -//===----------------------------------------------------------------------===// -// BiasAddGradOp -//===----------------------------------------------------------------------===// - -// Verifies that, -// * the out_backprop operands have valid ranks or are unranked. -// -static LogicalResult Verify(BiasAddGradOp op) { - StringRef format = op.data_format(); - if (format == "NHWC") { - if (!HasRankAtLeast(op.out_backprop(), 2)) - return op.emitOpError( - "requires out_backprop operand to have rank at least two with `NHWC` " - "data format"); - } else { - // Op definition requires data_format to be either NHWC or NCHW. - DCHECK_EQ(format.str(), "NCHW"); - if (!HasRankAtLeast(op.out_backprop(), 3)) - return op.emitOpError( - "requires out_backprop operand to have rank at least three with " - "`NCHW` data format"); - } - - return success(); -} - -//===----------------------------------------------------------------------===// -// BiasAddV1Op -//===----------------------------------------------------------------------===// - -void BiasAddV1Op::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { - results.insert(context); -} - -//===----------------------------------------------------------------------===// -// BitcastOp -//===----------------------------------------------------------------------===// - -void BitcastOp::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { - results.insert(context); -} - -//===----------------------------------------------------------------------===// -// BroadcastToOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(BroadcastToOp op) { - // TODO(antiagainst): check that - // * The 'shape' input is an 1-D int tensor. - // * Each dimension pair of the source and target shapes are either equal - // or one of them is one. - return success(); -} - -//===----------------------------------------------------------------------===// -// CaseOp -//===----------------------------------------------------------------------===// - -class FoldConstantCaseOp : public OpRewritePattern { - public: - explicit FoldConstantCaseOp(MLIRContext *context) - : OpRewritePattern(context) {} - LogicalResult matchAndRewrite(TF::CaseOp op, - PatternRewriter &rewriter) const override; -}; - -LogicalResult FoldConstantCaseOp::matchAndRewrite( - TF::CaseOp op, PatternRewriter &rewriter) const { - // Extract the constant cond value. - DenseIntElementsAttr branch; - if (!matchPattern(op.branch_index(), m_Constant(&branch))) return failure(); - - // Only attempt to fold scalar valued case statements. - // TODO(jpienaar): This can be removed if CaseOp's verifier covers it. - if (!branch.getType().cast().getShape().empty()) - return failure(); - - int index = *branch.getValues().begin(); - // TODO(jpienaar): This can be removed if CaseOp's verifier covers it. - if (index >= op.branches().size()) return failure(); - - auto func = op.branches()[index].cast(); - auto empty = rewriter.getStringAttr(""); - auto call_op = rewriter.create( - op.getLoc(), op.getResultTypes(), op.getOperands().drop_front(), func, - /*config=*/empty, /*config_proto=*/empty, /*executor_type=*/empty); - PropagateAttributes(op.getOperation(), call_op); - rewriter.replaceOp(op, call_op.getResults()); - return success(); -} - -void CaseOp::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { - results.insert(context); -} - -//===----------------------------------------------------------------------===// -// CastOp -//===----------------------------------------------------------------------===// - -OpFoldResult CastOp::fold(ArrayRef operands) { - // Cast with the same type is a no-op. - Value operand = getOperand(); - if (getType() == operand.getType()) return operand; - return {}; -} - -//===----------------------------------------------------------------------===// -// ConcatOp and ConcatV2Op -//===----------------------------------------------------------------------===// - -template ::value>::type * = nullptr> -static LogicalResult Verify(OpT op) { - // TODO(hinsu): Convert variadic length attributes to derived attributes. - Operation::operand_range values = op.values(); - - int axis_idx = std::is_same() ? 0 : 1; - Value axis = *op.getODSOperands(axis_idx).begin(); - if (!HasRankAtMost(axis, 1)) { - return op.emitOpError( - "requires axis to be of scalar type (or vector type for older " - "versions)"); - } - - return VerifyTypesCompatibility(values, - /*mask_one_dim=*/true, op.getOperation()); -} - -void ConcatOp::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { - results.insert(context); -} - -//===----------------------------------------------------------------------===// -// ConcatOffsetOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(ConcatOffsetOp op) { - if (op.N() < 2) - return op.emitOpError() << "requires N to be at least 2, got " << op.N(); - - if (op.shape().size() != op.offset().size()) - return op.emitOpError() - << "requires sizes of shapes and offsets to be the same, got sizes " - << op.shape().size() << " and " << op.offset().size(); - - auto ranked_dim = op.concat_dim().getType().dyn_cast(); - if (ranked_dim && ranked_dim.getRank() != 0) - return op.emitOpError() - << "requires concat_dim to be a scalar, got tensor of rank " - << ranked_dim.getRank(); - - int64_t num_dims = -1; - for (auto shape_offset_idx : - llvm::enumerate(llvm::zip(op.shape(), op.offset()))) { - Value shape = std::get<0>(shape_offset_idx.value()); - Value offset = std::get<1>(shape_offset_idx.value()); - const size_t idx = shape_offset_idx.index(); - - if (failed(verifyCompatibleShape(shape.getType(), offset.getType()))) - return op.emitOpError() << "requires operand and result " << idx - << " to have compatible shapes"; - - auto ranked_shape = shape.getType().dyn_cast(); - if (!ranked_shape) continue; - - if (ranked_shape.getRank() != 1) - return op.emitOpError() << "requires shape tensor operand " << idx - << " to be of rank 1, got tensor of rank " - << ranked_shape.getRank(); - - if (!ranked_shape.hasStaticShape()) continue; - - int64_t ranked_shape_dim = ranked_shape.getDimSize(0); - if (num_dims == -1) - num_dims = ranked_shape_dim; - else if (ranked_shape_dim != num_dims) - return op.emitOpError() - << "requires shape tensor (rank 1) operand " << idx - << " to be of length " << num_dims - << ", got tensor (rank 1) of length " << ranked_shape_dim; - } - - return success(); -} - -LogicalResult ConcatOffsetOp::fold(ArrayRef operands, - SmallVectorImpl &results) { - // ConcatOffset must have its first operand be concat_dim and at least two - // shape tensors in variadic shapes operand. - if (operands.size() < 3) return failure(); - - // Check concat_dim is a scalar. - auto concat_dim_attr = operands[0].dyn_cast_or_null(); - if (!concat_dim_attr || concat_dim_attr.getType().getRank() != 0) - return failure(); - - llvm::SmallVector shapes; - shapes.reserve(operands.size() - 1); - for (Attribute shape : llvm::drop_begin(operands, 1)) - if (auto shape_attr = shape.dyn_cast_or_null()) - shapes.push_back(shape_attr); - else - return failure(); - - // Check all shapes are vectors of the same length. - if (shapes.front().getType().getRank() != 1) return success(); - const int64_t num_dims = shapes.front().getNumElements(); - for (DenseIntElementsAttr shape : llvm::drop_begin(shapes, 1)) - if (shape.getType().getRank() != 1 || shape.getNumElements() != num_dims) - return failure(); - - // Check concat_dim is within [-num_dims, num_dims). - int32_t concat_dim = (*concat_dim_attr.getValues().begin()); - if (concat_dim < 0) concat_dim += num_dims; - if (concat_dim >= num_dims || concat_dim < 0) return failure(); - - // Check all elements besides at concat_dim match across all shape tensors. - SmallVector shape0; - shape0.reserve(num_dims); - for (int32_t dim : shapes.front().getValues()) shape0.push_back(dim); - - for (DenseIntElementsAttr shape : llvm::drop_begin(shapes, 1)) { - for (auto dims_and_idx : llvm::enumerate(llvm::zip(shape0, shape))) { - if (dims_and_idx.index() == concat_dim) continue; - - if (std::get<0>(dims_and_idx.value()) != - std::get<1>(dims_and_idx.value()).getSExtValue()) - return failure(); - } - } - - // Compute an exclusive cumulative sum of elements at concat_dim. - results.reserve(shapes.size()); - SmallVector cumulative_sum(num_dims, 0); - RankedTensorType offset_type = - RankedTensorType::get({num_dims}, IntegerType::get(32, getContext())); - for (DenseIntElementsAttr shape : shapes) { - results.push_back(DenseIntElementsAttr::get(offset_type, cumulative_sum)); - cumulative_sum[concat_dim] += shape.getValue(concat_dim); - } - - return success(); -} - -//===----------------------------------------------------------------------===// -// ConjOp -//===----------------------------------------------------------------------===// - -void ConjOp::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { - results.insert(context); -} - -//===----------------------------------------------------------------------===// -// ConstOp -//===----------------------------------------------------------------------===// - -OpFoldResult ConstOp::fold(ArrayRef operands) { - assert(operands.empty() && "constant has no operands"); - - // Return the held attribute value. - return value(); -} - -// Builds a constant op with the specified attribute `value`. The result -// op's type is deduced from `value`; if `value` is of scalar type, -// wraps it up with a tensor type of empty shape. -// TODO(jpienaar): This one differs from the autogenerated one as it takes an -// attribute but always creates an ElementsAttr internally. -void ConstOp::build(OpBuilder &builder, OperationState &result, - Attribute value) { - ShapedType type; - if (auto elem_attr = value.dyn_cast()) { - return ConstOp::build(builder, result, elem_attr); - } else if (value.isa()) { - // All TensorFlow types must be tensor types. In the build() method, - // we want to provide more flexibility by allowing attributes of scalar - // types. But we need to wrap it up with ElementsAttr to construct - // valid TensorFlow constants. - type = RankedTensorType::get(/*shape=*/{}, value.getType()); - return ConstOp::build(builder, result, DenseElementsAttr::get(type, value)); - } - // TODO(jpienaar): support other TensorFlow specific types. - llvm_unreachable("unsupported attribute type for building tf.Const"); -} - -void ConstOp::build(OpBuilder &builder, OperationState &result, Type type, - Attribute value) { - // Handle the case where the type and value are already tensors. - if (type.isa() && value.isa()) { - result.addTypes(type); - result.addAttribute("value", value); - return; - } - - // Otherwise, default to the attribute builder. - ConstOp::build(builder, result, value); - assert(type == result.types[0] && "type mismatch in construction"); -} - -LogicalResult ConstOp::inferReturnTypes( - MLIRContext *context, Optional location, ValueRange operands, - DictionaryAttr attributes, RegionRange regions, - SmallVectorImpl &inferredReturnTypes) { - auto value = attributes.get("value"); - if (!value) return emitOptionalError(location, "missing attribute 'value'"); - if (auto elem_attr = value.dyn_cast()) { - inferredReturnTypes.assign({elem_attr.getType()}); - return success(); - } - return emitOptionalError(location, - "attribute 'value' failed to satisfy constraint: " - "constant vector/tensor"); -} - -//===----------------------------------------------------------------------===// -// Conv2DOp and Conv3DOp -//===----------------------------------------------------------------------===// - -template -static LogicalResult VerifyConvOpAttributes(OpT op, int num_dims) { - if (!IsOfRankOrUnranked(op.getResult(), num_dims)) - return op.emitOpError() - << "requires result to be " << num_dims << "D tensor"; - - auto is_not_positive = [](Attribute val) { - return val.cast().getValue().getSExtValue() <= 0; - }; - - int64_t strides_size = op.strides().size(); - if (strides_size != num_dims) - return op.emitOpError() << "requires strides attribute length to be " - << num_dims << "; actual length " << strides_size; - if (llvm::any_of(op.strides().getValue(), is_not_positive)) - return op.emitOpError("requires positive strides"); - - int64_t dilations_size = op.strides().size(); - if (op.dilations().size() != num_dims) - return op.emitOpError() << "requires dilations attribute length to be " - << num_dims << "; actual length " << dilations_size; - if (llvm::any_of(op.dilations().getValue(), is_not_positive)) - return op.emitOpError("requires positive dilations"); - - return success(); -} - -// Verifies that, -// * Ranks of operands and result are valid -// * Number of input channels is divisible by the number of filter input -// channels -// * Length of explicit_paddings attribute is valid and has non negative -// elements -// * strides and dilations attributes have positive elements -template ::value>::type * = nullptr> -static LogicalResult Verify(OpT op) { - int num_spatial_dims = std::is_same() ? 2 : 3; - int num_dims = 2 + num_spatial_dims; - - if (!IsOfRankOrUnranked(op.input(), num_dims) || - !IsOfRankOrUnranked(op.filter(), num_dims)) - return op.emitOpError() - << "requires operands to be " << num_dims << "D tensor"; - - // EXPLICIT padding mode and the associated attribute is limited to Conv2D. - // So, fetch attribute by string instead of the op.explicit_paddings() - // attribute getter. - if (op.padding() == "EXPLICIT") { - auto paddings = op.template getAttrOfType("explicit_paddings"); - if (!paddings) - return op.emitOpError() << "requires attribute 'explicit_paddings' with " - "'EXPLICIT' padding mode"; - - int64_t paddings_size = paddings.size(); - int64_t expected_size = 2 * num_dims; - - if (paddings_size != expected_size) - return op.emitOpError() - << "requires explicit_paddings attribute length to be " - << expected_size << "; actual length " << paddings_size; - - auto is_negative = [](Attribute val) { - return val.cast().getValue().getSExtValue() < 0; - }; - if (llvm::any_of(paddings.getValue(), is_negative)) - return op.emitOpError("requires non negative explicit paddings"); - } - - LogicalResult verify_result = VerifyConvOpAttributes(op, num_dims); - if (failed(verify_result)) { - return verify_result; - } - - int64_t input_channels = -1; - if (auto ty = op.input().getType().template dyn_cast()) { - std::string data_format = op.data_format().str(); - tensorflow::TensorFormat format; - auto is_valid = FormatFromString(data_format, &format); - DCHECK(is_valid) << data_format; - int idx = tensorflow::GetTensorFeatureDimIndex(num_dims, format); - input_channels = ty.getDimSize(idx); - } - - int64_t filter_channels = -1; - if (auto ty = op.filter().getType().template dyn_cast()) { - int idx = tensorflow::GetFilterTensorInputChannelsDimIndex( - num_dims, tensorflow::FORMAT_HWIO); - filter_channels = ty.getDimSize(idx); - } - - if (input_channels != -1 && filter_channels != -1 && - input_channels % filter_channels != 0) - return op.emitOpError() - << "requires the number of input channels to be divisible by the " - "number of filter input channels; found " - << input_channels << " and " << filter_channels << ", respectively"; - - return success(); -} - -LogicalResult Conv2DOp::UpdateDataFormat(StringRef data_format) { - auto perm = GetDataFormatPermutation(this->data_format(), data_format); - if (perm.empty()) return failure(); - - // Update data_format attribute and result types. - if (failed(::mlir::TF::UpdateDataFormat(data_format, this))) return failure(); - - // Update convolution attributes. - setAttr("dilations", ShuffleArrayAttr(dilations(), perm)); - setAttr("strides", ShuffleArrayAttr(strides(), perm)); - setAttr("explicit_paddings", ShuffleArrayAttr(explicit_paddings(), perm, 2)); - - return success(); -} - -StringRef Conv2DOp::GetOptimalLayout(const RuntimeDevices &devices) { - // Keep current data format if no GPUs are available or if explicit placement - // does not allow to use GPU for this operation. - if (!CanUseGpuDevice(devices) || !CanUseGpuDevice(getOperation())) - return data_format(); - - // Input must be a tensor. - auto input_ty = input().getType().dyn_cast(); - if (!input_ty) return data_format(); - - // For f16 data type on devices with Tensor Cores support NHWC data format - // is up to ~2x faster. - const bool is_f16 = input_ty.getElementType().isF16(); - if (is_f16 && CanUseTensorCores(devices)) return "NHWC"; - - // For f32/f16 data type decision depends on the filter size in spatial - // dimensions, for other data types we keep current data format. - if (!input_ty.getElementType().isF32() && !input_ty.getElementType().isF16()) - return data_format(); - - // Keep current data format if filter rank is unknown or not equal to 4. - auto filter_ty = filter().getType().dyn_cast(); - if (!filter_ty || filter_ty.getRank() != 4) return data_format(); - - const int64_t d0 = filter_ty.getDimSize(0); - const int64_t d1 = filter_ty.getDimSize(1); - - auto all_ones = [](ArrayAttr arr) -> bool { - return llvm::all_of(arr, [](Attribute attr) -> bool { - return attr.cast().getInt() == 1; - }); - }; - - // Convolutions with 1x1 filter and with strides and dilations all ones, can - // be computed as a GEMM in NHWC data format, and can be up to ~2x times - // faster than convolution in NCHW. - const bool one_by_one = d0 == 1 && d1 == 1; - const bool trivial_strides = all_ones(strides()); - const bool trivial_dilations = all_ones(dilations()); - - // TODO(ezhulenev): This might lead to excessive transposes in the final IR, - // if the ratio of 1x1 convolutions to regular convolutions is close to 1:1. - // Also FusedBatchNorm in training mode prefers NCHW data format. Check if all - // users can efficiently use NHWC data format? - if (one_by_one && trivial_strides && trivial_dilations) { - return "NHWC"; - } - - // If filter spatial dimensions are unknown or not 1x1 we prefer NCHW, because - // it's the fastest option on NVIDIA GPUs with cuDNN library support. - return "NCHW"; -} - -//===----------------------------------------------------------------------===// -// Conv2dBackpropFilterOp -//===----------------------------------------------------------------------===// - -LogicalResult Conv2DBackpropFilterOp::UpdateDataFormat(StringRef data_format) { - StringRef src_data_format = this->data_format(); - - auto perm = GetDataFormatPermutation(src_data_format, data_format); - if (perm.empty()) return failure(); - - // Update data_format attribute and result types. - if (failed(::mlir::TF::UpdateDataFormat(data_format, this))) return failure(); - - // Update convolution attributes. - setAttr("dilations", ShuffleArrayAttr(dilations(), perm)); - setAttr("strides", ShuffleArrayAttr(strides(), perm)); - setAttr("explicit_paddings", ShuffleArrayAttr(explicit_paddings(), perm, 2)); - - // Permute filter sizes operand. - OpBuilder builder(getOperation()); - auto filter_sizes_permuted = builder.create( - getLoc(), filter_sizes(), StringAttr::get(src_data_format, getContext()), - StringAttr::get(data_format, getContext())); - setOperand(1, filter_sizes_permuted); - - return success(); -} - -StringRef Conv2DBackpropFilterOp::GetOptimalLayout( - const RuntimeDevices &devices) { - // Keep current data format if no GPUs are available or if explicit placement - // does not allow to use GPU for this operation. - if (!CanUseGpuDevice(devices) || !CanUseGpuDevice(getOperation())) - return data_format(); - - // Input must be a tensor. - auto input_ty = input().getType().dyn_cast(); - if (!input_ty) return data_format(); - - // For f16 data type on devices with Tensor Cores support NHWC data format - // is up to ~2x faster. - const bool is_f16 = input_ty.getElementType().isF16(); - if (is_f16 && CanUseTensorCores(devices)) return "NHWC"; - - // Otherwise always use "NCHW". - return "NCHW"; -} - -//===----------------------------------------------------------------------===// -// Conv2DBackpropInputOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(Conv2DBackpropInputOp op) { - int num_spatial_dims = 2; - int num_dims = 2 + num_spatial_dims; - - if (!IsOfRankOrUnranked(op.out_backprop(), num_dims) || - !IsOfRankOrUnranked(op.filter(), num_dims)) - return op.emitOpError() - << "requires operands to be " << num_dims << "D tensor"; - - LogicalResult verify_result = VerifyConvOpAttributes(op, num_dims); - if (failed(verify_result)) { - return verify_result; - } - - return success(); -} - -LogicalResult Conv2DBackpropInputOp::UpdateDataFormat(StringRef data_format) { - StringRef src_data_format = this->data_format(); - - auto perm = GetDataFormatPermutation(src_data_format, data_format); - if (perm.empty()) return failure(); - - // Update data_format attribute and result types. - if (failed(::mlir::TF::UpdateDataFormat(data_format, this))) return failure(); - - // Update convolution attributes. - setAttr("dilations", ShuffleArrayAttr(dilations(), perm)); - setAttr("strides", ShuffleArrayAttr(strides(), perm)); - setAttr("explicit_paddings", ShuffleArrayAttr(explicit_paddings(), perm, 2)); - - // Permute input sizes operand. - OpBuilder builder(getOperation()); - auto input_sizes_permuted = builder.create( - getLoc(), input_sizes(), StringAttr::get(src_data_format, getContext()), - StringAttr::get(data_format, getContext())); - setOperand(0, input_sizes_permuted); - - return success(); -} - -StringRef Conv2DBackpropInputOp::GetOptimalLayout( - const RuntimeDevices &devices) { - // Keep current data format if no GPUs are available or if explicit placement - // does not allow to use GPU for this operation. - if (!CanUseGpuDevice(devices) || !CanUseGpuDevice(getOperation())) - return data_format(); - - // Filter must be a tensor. - auto filter_ty = filter().getType().dyn_cast(); - if (!filter_ty) return data_format(); - - // For f16 data type on devices with Tensor Cores support NHWC data format - // is up to ~2x faster. - const bool is_f16 = filter_ty.getElementType().isF16(); - if (is_f16 && CanUseTensorCores(devices)) return "NHWC"; - - // Otherwise always use "NCHW". - return "NCHW"; -} - -//===----------------------------------------------------------------------===// -// DataFormatVecPermuteOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(DataFormatVecPermuteOp op) { - auto input_ty = op.x().getType().dyn_cast(); - if (!input_ty) return success(); - - int rank = input_ty.getRank(); - if (rank != 1 && rank != 2) - return op.emitOpError("requires input of rank 1 or 2"); - - if (rank == 1) { - int64_t dim0 = input_ty.getDimSize(0); - if (dim0 != ShapedType::kDynamicSize && dim0 != 4 && dim0 != 2) - return op.emitOpError("requires 1D input of size 4 or size 2"); - } - - if (rank == 2) { - int64_t dim0 = input_ty.getDimSize(0); - if (dim0 != ShapedType::kDynamicSize && dim0 != 4) - return op.emitOpError( - "requires first dimensions of 2D input to be of size 4"); - - int64_t dim1 = input_ty.getDimSize(1); - if (dim1 != ShapedType::kDynamicSize && dim1 != 2) - return op.emitOpError( - "requires second dimensions of 2D input to be of size 2"); - } - - return success(); -} - -//===----------------------------------------------------------------------===// -// DivOp -//===----------------------------------------------------------------------===// - -void DivOp::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { - results.insert(context); -} - -OpFoldResult DivOp::fold(ArrayRef operands) { - return IdentityArithmeticOpFolder(*this, operands); -} - -//===----------------------------------------------------------------------===// -// DynamicStitchOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(DynamicStitchOp op) { - if (op.N() < 1) return op.emitOpError("requires attribute N with value >= 1"); - - if (RankedTensorType out_ty = op.getType().dyn_cast()) { - if (out_ty.getRank() == 0) { - return op.emitOpError("requires non scalar output"); - } - } - - llvm::SmallDenseSet index_values; - bool all_indices_const = true; - int32_t max_index = -1; - llvm::Optional> inferred_item_shape; - for (auto it : llvm::zip(op.indices(), op.data())) { - Value index = std::get<0>(it); - - DenseIntElementsAttr index_attr; - if (matchPattern(index, m_Constant(&index_attr))) { - for (int32_t index : index_attr.getValues()) { - if (index < 0) - return op.emitOpError() - << "requires non-negative index values; found " << index; - max_index = std::max(index, max_index); - index_values.insert(index); - } - } else { - all_indices_const = false; - } - - Value data = std::get<1>(it); - RankedTensorType index_ty = index.getType().dyn_cast(); - RankedTensorType data_ty = data.getType().dyn_cast(); - if (!index_ty || !data_ty) continue; - - int64_t index_rank = index_ty.getRank(); - ArrayRef data_shape = data_ty.getShape(); - ArrayRef index_shape = index_ty.getShape(); - if (failed(mlir::verifyCompatibleShape(index_shape, - data_shape.take_front(index_rank)))) - return op.emitOpError() << "requires shape of data with type " << data_ty - << " to have prefix matching with shape of the " - "corresponding index type " - << index_ty; - - ArrayRef item_shape = data_shape.drop_front(index_rank); - if (!inferred_item_shape) { - inferred_item_shape = llvm::to_vector<4>(item_shape); - continue; - } - - if (failed(mlir::verifyCompatibleShape(item_shape, *inferred_item_shape))) - return op.emitOpError() << "has inconsistent shaped data and index " - "pairs; inferred item shapes [" - << llvm::makeArrayRef(*inferred_item_shape) - << "] and [" << item_shape << "] don't match"; - for (int i = 0, e = item_shape.size(); i < e; ++i) { - int64_t &inferred_dim = (*inferred_item_shape)[i]; - int64_t dim = item_shape[i]; - if (ShapedType::isDynamic(inferred_dim)) inferred_dim = dim; - } - } - - // If all indices are constants, then verify that they cover all indices in - // the range [0, max_index] and the output type is legal. - if (all_indices_const) { - for (int32_t i = 0; i <= max_index; i++) { - if (!index_values.count(i)) - return op.emitOpError() << "missing index " << i; - } - - if (inferred_item_shape) { - SmallVector expected_shape; - expected_shape.push_back(max_index + 1); - expected_shape.append(inferred_item_shape->begin(), - inferred_item_shape->end()); - - auto out_ty = op.getType().cast(); - auto expected_out_ty = - RankedTensorType::get(expected_shape, out_ty.getElementType()); - - if (!AreCastCompatible({out_ty, expected_out_ty})) { - return op.emitOpError() << "has invalid output type; should be " - "compatible with inferred type " - << expected_out_ty; - } - } - } - - return success(); -} - -//===----------------------------------------------------------------------===// -// EinsumOp -//===----------------------------------------------------------------------===// - -// Verifies that, -// * Arity of the op is at most two. -// -// TODO(hinsu): Verify einsum equation attribute. -static LogicalResult Verify(EinsumOp op) { - if (op.N() > 2) { - return op.emitOpError("supports at most two operands"); - } - return success(); -} - -//===----------------------------------------------------------------------===// -// EmptyOp -//===----------------------------------------------------------------------===// - -OpFoldResult EmptyOp::fold(ArrayRef operands) { - assert(operands.size() == 1 && "empty op has one operand"); - - Attribute attr = operands.front(); - if (!attr) return {}; - - auto int_attr = attr.cast(); - SmallVector out_shape; - for (const auto val : int_attr.getValues()) { - out_shape.push_back(val); - } - - auto type = getResult().getType().cast(); - auto etype = type.getElementType(); - - // We can not fold if the result is not static. - if (!type.hasStaticShape()) return {}; - - if (auto float_type = etype.dyn_cast()) { - auto out_type = RankedTensorType::get(out_shape, float_type); - return DenseElementsAttr::get(out_type, - {APFloat(float_type.getFloatSemantics())}); - } - - if (auto int_type = etype.dyn_cast()) { - auto out_type = RankedTensorType::get(out_shape, etype); - APInt val(int_type.getWidth(), 0, int_type.getSignedness()); - return DenseElementsAttr::get(out_type, val); - } - - return {}; -} - -//===----------------------------------------------------------------------===// -// EmptyTensorListOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(EmptyTensorListOp op) { - if (!IsOfRankOrUnranked(op.element_shape(), 0) && - !IsOfRankOrUnranked(op.element_shape(), 1)) { - return op.emitOpError("requires element_shape operand to be 0D/1D tensor"); - } - - if (!IsOfRankOrUnranked(op.max_num_elements(), 0)) { - return op.emitOpError("requires max_num_elements operand to be 0D tensor"); - } - return success(); -} - -//===----------------------------------------------------------------------===// -// EqualOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(EqualOp op) { - // If we allow inputs to have incompatible type, then nothing to do. - if (!op.incompatible_shape_error()) return success(); - - // Otherwise, check inputs are broadcastable. - return mlir::OpTrait::impl::verifyCompatibleOperandBroadcast( - op.getOperation()); -} - -void EqualOp::build(OpBuilder &builder, OperationState &result, Value x, - Value y, BoolAttr incompatible_shape_error) { - auto result_type = DeduceEqualCmpOpType(&builder, result.location, x, y, - incompatible_shape_error); - return build(builder, result, result_type, x, y, incompatible_shape_error); -} - -//===----------------------------------------------------------------------===// -// ExpandDimsOp -//===----------------------------------------------------------------------===// - -Type InferExpandDimsOpType(Value input, Value dim) { - Type element_ty = input.getType().cast().getElementType(); - auto unranked_ty = UnrankedTensorType::get(element_ty); - - auto input_ty = input.getType().dyn_cast(); - if (!input_ty) return unranked_ty; - - DenseIntElementsAttr dim_attr; - if (!matchPattern(dim, m_Constant(&dim_attr)) || - dim_attr.getNumElements() != 1) - return unranked_ty; - int64_t dim_val = (*dim_attr.begin()).getSExtValue(); - int64_t input_rank = input_ty.getRank(); - - if (dim_val < -input_rank - 1 || dim_val > input_rank + 1) return unranked_ty; - if (dim_val < 0) dim_val += input_rank + 1; - - SmallVector shape = llvm::to_vector<4>(input_ty.getShape()); - shape.insert(shape.begin() + dim_val, 1); - return RankedTensorType::get(shape, element_ty); -} - -void ExpandDimsOp::build(OpBuilder &builder, OperationState &result, - Value input, Value dim) { - return build(builder, result, InferExpandDimsOpType(input, dim), input, dim); -} - -//===----------------------------------------------------------------------===// -// FakeQuantWithMinMaxArgsOp -//===----------------------------------------------------------------------===// -static LogicalResult Verify(FakeQuantWithMinMaxArgsOp op) { - // TODO(fengliuai): moving the following to an utility method. - const llvm::fltSemantics &semantics = op.min().getSemantics(); - float rmin, rmax; - if (&semantics == &APFloat::IEEEsingle()) { - rmin = op.min().convertToFloat(); - rmax = op.max().convertToFloat(); - } else { - rmin = op.min().convertToDouble(); - rmax = op.max().convertToDouble(); - } - // Range boundaries must be valid. - if (rmin >= rmax) { - return op.emitOpError("range is invalid: [" + Twine(std::to_string(rmin)) + - "," + Twine(std::to_string(rmax)) + "]"); - } - int64_t num_bits = op.num_bits().getSExtValue(); - if (num_bits < 2 || num_bits > 16) { - return op.emitOpError( - "requires num_bits to be between 2 and 16, inclusive"); - } - return success(); -} - -//===----------------------------------------------------------------------===// -// FakeQuantWithMinMaxVarsOp -//===----------------------------------------------------------------------===// -static LogicalResult Verify(FakeQuantWithMinMaxVarsOp op) { - auto min = GetRankedTensorTypeForOperand(op.min()); - if (min && !IsOfRankedFloatTensorType(min, 0)) - return op.emitOpError("requires min to be a 0d float tensor"); - - auto max = GetRankedTensorTypeForOperand(op.max()); - if (max && !IsOfRankedFloatTensorType(max, 0)) - return op.emitOpError("requires max to be a 0d float tensor"); - - int64_t num_bits = op.num_bits().getSExtValue(); - if (num_bits < 2 || num_bits > 16) { - return op.emitOpError( - "requires num_bits to be between 2 and 16, inclusive"); - } - return success(); -} - -//===----------------------------------------------------------------------===// -// FakeQuantWithMinMaxVarsPerChannelOp -//===----------------------------------------------------------------------===// -static LogicalResult Verify(FakeQuantWithMinMaxVarsPerChannelOp op) { - auto min = GetRankedTensorTypeForOperand(op.min()); - if (min && !IsOfRankedFloatTensorType(min, 1)) - return op.emitOpError("requires min to be a 1d float tensor"); - - auto max = GetRankedTensorTypeForOperand(op.max()); - if (max && !IsOfRankedFloatTensorType(max, 1)) - return op.emitOpError("requires max to be a 1d float tensor"); - - Value inputs = op.inputs(); - if (!HasRankAtLeast(inputs, 1)) - return op.emitError("requires inputs to be at least 1d float tensor"); - - int64_t num_bits = op.num_bits().getSExtValue(); - if (num_bits < 2 || num_bits > 16) { - return op.emitOpError( - "requires num_bits to be between 2 and 16, inclusive"); - } - - auto inputs_type = inputs.getType().dyn_cast(); - if (!inputs_type) return success(); - int depth = inputs_type.getDimSize(inputs_type.getRank() - 1); - if ((min && min.getDimSize(0) != depth) || - (max && max.getDimSize(0) != depth)) { - return op.emitOpError( - "requires min and max to have same size as last dimension of inputs"); - } - - return success(); -} - -//===----------------------------------------------------------------------===// -// FillOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(FillOp op) { - if (!IsOfRankOrUnranked(op.dims(), 1)) - return op.emitOpError() << "requires dims to be a 1D tensor"; - if (!IsOfRankOrUnranked(op.value(), 0)) - return op.emitOpError() << "requires value to be a scalar"; - - return success(); -} - -static ShapedType InferFillOpType(Value dims, Value value) { - Type etype = value.getType().cast().getElementType(); - - DenseIntElementsAttr dims_attr; - if (!matchPattern(dims, m_Constant(&dims_attr))) { - return UnrankedTensorType::get(etype); - } - - llvm::SmallVector shape; - shape.reserve(dims_attr.getNumElements()); - for (const APInt dim : dims_attr.getValues()) { - shape.push_back(dim.getSExtValue()); - } - return RankedTensorType::get(shape, etype); -} - -void FillOp::build(OpBuilder &builder, OperationState &result, Value dims, - Value value) { - FillOp::build(builder, result, InferFillOpType(dims, value), dims, value); -} - -OpFoldResult FillOp::fold(ArrayRef operands) { - assert(operands.size() == 2 && "fill op has two operand"); - - auto type = getType().cast(); - // DenseElementsAttr that is used in this folder only supports int and float - // types. - // TODO(hinsu): Handle complex types once there is a attribute kind for - // complex. - if (!type.getElementType().isIntOrFloat()) return {}; - - auto value = operands[1].dyn_cast_or_null(); - if (!value) return {}; - - if (type.hasStaticShape()) - return DenseElementsAttr::get(type, value.getValue({})); - - auto dims = operands[0].dyn_cast_or_null(); - if (!dims) return {}; - - llvm::SmallVector shape; - shape.reserve(dims.getNumElements()); - for (const APInt dim : dims.getValues()) { - shape.push_back(dim.getSExtValue()); - } - type = RankedTensorType::get(shape, type.getElementType()); - - return DenseElementsAttr::get(type, value.getValue({})); -} - -//===----------------------------------------------------------------------===// -// FusedBatchNormGradOp -//===----------------------------------------------------------------------===// - -// TODO(b/150954845): Add benchmarks to verify that layout preference didn't -// change in the latest GPU generations. - -LogicalResult FusedBatchNormGradV3Op::UpdateDataFormat(StringRef data_format) { - return ::mlir::TF::UpdateDataFormat(data_format, this); -} - -StringRef FusedBatchNormGradV3Op::GetOptimalLayout( - const RuntimeDevices &devices) { - // Keep current data format if no GPUs are available or if explicit placement - // does not allow to use GPU for this operation. - if (!CanUseGpuDevice(devices) || !CanUseGpuDevice(getOperation())) - return data_format(); - - // For f16 data type on devices with Tensor Cores support NHWC data format - // is up to ~2x faster. - auto x_ty = x().getType().cast(); - const bool is_f16 = x_ty.getElementType().isF16(); - if (is_f16 && CanUseTensorCores(devices)) return "NHWC"; - - // For all other data types prefer NCHW. - return "NCHW"; -} - -//===----------------------------------------------------------------------===// -// FusedBatchNormOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(FusedBatchNormOp op) { - auto x = GetRankedTensorTypeForOperand(op.x()); - if (x && !IsOfRankedFloatTensorType(x, 4)) - return op.emitOpError("requires x to be a 4D float tensor"); - - auto scale = GetRankedTensorTypeForOperand(op.scale()); - if (scale && !IsOfRankedFloatTensorType(scale, 1)) - return op.emitOpError("requires scale to be a 1D float tensor"); - - auto offset = GetRankedTensorTypeForOperand(op.offset()); - if (offset && !IsOfRankedFloatTensorType(offset, 1)) - return op.emitOpError("requires offset to be a 1D float tensor"); - - auto mean = GetRankedTensorTypeForOperand(op.mean()); - if (mean && !IsOfRankedFloatTensorType(mean, 1)) - return op.emitOpError("requires mean to be a 1D float tensor"); - - auto variance = GetRankedTensorTypeForOperand(op.variance()); - if (variance && !IsOfRankedFloatTensorType(variance, 1)) - return op.emitOpError("requires variance to be a 1D float tensor"); - - // TODO(antiagainst): check attributes - - return success(); -} - -//===----------------------------------------------------------------------===// -// FusedBatchNormV2Op / FusedBatchNormV3Op -//===----------------------------------------------------------------------===// - -template -static LogicalResult InferenceFoldOperandsPermutation( - ArrayRef permutation, Op *op) { - // FusedBatchNorm in training mode is a layout sentitive operation, and should - // have already assigned an optimal data format. - if (op->is_training()) return failure(); - return ::mlir::TF::FoldOperandsPermutation(permutation, op); -} - -template -static StringRef GetOptimalLayout(const RuntimeDevices &devices, Op *op) { - // In inference mode FusedBatchNorm is not sensitive to data layout. - if (!op->is_training()) return op->data_format(); - - // Keep current data format if no GPUs are available or if explicit placement - // does not allow to use GPU for this operation. - if (!CanUseGpuDevice(devices) || !CanUseGpuDevice(op->getOperation())) - return op->data_format(); - - // For f16 data type on devices with Tensor Cores support NHWC data format - // is up to ~2x faster. - auto x_ty = op->x().getType().template cast(); - const bool is_f16 = x_ty.getElementType().isF16(); - if (is_f16 && CanUseTensorCores(devices)) return "NHWC"; - - // For all other data types prefer NCHW. - return "NCHW"; -} - -LogicalResult FusedBatchNormV2Op::FoldOperandsPermutation( - ArrayRef permutation) { - return ::mlir::TF::InferenceFoldOperandsPermutation(permutation, this); -} - -LogicalResult FusedBatchNormV2Op::UpdateDataFormat(StringRef data_format) { - return ::mlir::TF::UpdateDataFormat(data_format, this); -} - -StringRef FusedBatchNormV2Op::GetOptimalLayout(const RuntimeDevices &devices) { - return ::mlir::TF::GetOptimalLayout(devices, this); -} - -LogicalResult FusedBatchNormV3Op::FoldOperandsPermutation( - ArrayRef permutation) { - return ::mlir::TF::InferenceFoldOperandsPermutation(permutation, this); -} - -LogicalResult FusedBatchNormV3Op::UpdateDataFormat(StringRef data_format) { - return ::mlir::TF::UpdateDataFormat(data_format, this); -} - -StringRef FusedBatchNormV3Op::GetOptimalLayout(const RuntimeDevices &devices) { - return ::mlir::TF::GetOptimalLayout(devices, this); -} - -//===----------------------------------------------------------------------===// -// GatherV2Op -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(GatherV2Op op) { - int64_t batch_dims = op.batch_dims().getSExtValue(); - if (auto ty = op.indices().getType().dyn_cast()) { - int64_t rank = ty.getRank(); - if (batch_dims > rank || batch_dims < -rank) - return op.emitOpError() - << "batch_dims (" << batch_dims << ") must be in range [" << -rank - << ", " << rank + 1 << ")"; - if (batch_dims < 0) batch_dims += rank; - } - - if (!HasRankAtMost(op.axis(), 1)) - return op.emitOpError("requires axis to have rank at most 1"); - - DenseIntElementsAttr axis_attr; - if (matchPattern(op.axis(), m_Constant(&axis_attr))) { - int64_t axis = (*axis_attr.begin()).getSExtValue(); - if (auto ty = op.params().getType().dyn_cast()) { - int64_t rank = ty.getRank(); - if (axis >= rank || axis < -rank) - return op.emitOpError() << "axis (" << axis << ") must be in range [" - << -rank << ", " << rank << ")"; - if (axis < 0) axis += rank; - } - - if (batch_dims >= 0 && axis >= 0 && axis < batch_dims) { - return op.emitOpError() << "requires axis (" << axis - << ") to be greater than or equal to batch_dims (" - << batch_dims << ")"; - } - } - return success(); -} - -//===----------------------------------------------------------------------===// -// IfOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(IfOp op) { - auto module = op.getParentOfType(); - auto then_fn = module.lookupSymbol(op.then_branch()); - if (!then_fn) - return op.emitOpError("then_branch refers to an undefined function : ") - << op.then_branch(); - auto else_fn = module.lookupSymbol(op.else_branch()); - if (!else_fn) - return op.emitOpError("else_branch refers to an undefined function : ") - << op.else_branch(); - auto then_fn_type = then_fn.getType(); - auto else_fn_type = else_fn.getType(); - - // Non-conditional operands starting with the second operand are passed to - // branches and should be pair-wise compatible with branches' inputs. - unsigned expected_num_inputs = op.getNumOperands() - 1; - if (then_fn_type.getNumInputs() != expected_num_inputs || - else_fn_type.getNumInputs() != expected_num_inputs) - return op.emitError("branches should have " + Twine(expected_num_inputs) + - " inputs"); - - for (unsigned i = 0; i < expected_num_inputs; ++i) { - auto operand_type = op.getOperand(i + 1).getType().cast(); - auto then_input_type = then_fn_type.getInput(i).cast(); - if (!AreCastCompatible({operand_type, then_input_type})) - return op.emitError( - llvm::formatv("then branch input type {0} is incompatible with " - "operand type {1} at index {2}", - then_input_type, operand_type, i)); - - auto else_input_type = else_fn_type.getInput(i).cast(); - if (!AreCastCompatible({operand_type, else_input_type})) - return op.emitError( - llvm::formatv("else branch input type {0} is incompatible with " - "operand type {1} at index {2}", - else_input_type, operand_type, i)); - - // If branches have incompatible input types that means that no tensor can - // serve as input to both the functions. Hence, the op is invalid. - if (!AreCastCompatible({then_input_type, else_input_type})) - return op.emitError(llvm::formatv( - "branches inputs have incompatible types {0} and {1} at index {2}", - then_input_type, else_input_type, i)); - } - - // Branches' results should be pair-wise compatible with the op results. - unsigned expected_num_results = op.getNumResults(); - if (then_fn_type.getNumResults() != expected_num_results || - else_fn_type.getNumResults() != expected_num_results) - return op.emitError("branches should have " + Twine(expected_num_results) + - " results"); - - for (unsigned i = 0; i < expected_num_results; ++i) { - auto result_type = op.getResult(i).getType().cast(); - auto then_result_type = then_fn_type.getResult(i).cast(); - if (!AreCastCompatible({then_result_type, result_type})) - return op.emitError( - llvm::formatv("then branch result type {0} is incompatible with op " - "result type {1} at index {2}", - then_result_type, result_type, i)); - - auto else_result_type = else_fn_type.getResult(i).cast(); - if (!AreCastCompatible({else_result_type, result_type})) - return op.emitError( - llvm::formatv("else branch result type {0} is incompatible with op " - "result type {1} at index {2}", - else_result_type, result_type, i)); - } - return success(); -} - -//===----------------------------------------------------------------------===// -// IfRegionOp -//===----------------------------------------------------------------------===// - -LogicalResult VerifyRegionResults(Operation *op, Region ®ion, - StringRef region_name) { - auto op_name = op->getName().getStringRef(); - // verify that op outputs match yield inputs - YieldOp yield = cast(region.front().getTerminator()); - unsigned expected_num_results = op->getNumResults(); - if (yield.getNumOperands() != expected_num_results) - return op->emitOpError() - << region_name + " should have same number (" << expected_num_results - << ") of results as " << op_name << " but has " - << yield.getNumOperands() << " results"; - - for (int idx : llvm::seq(0, expected_num_results)) { - auto op_result_type = op->getResult(idx).getType().cast(); - auto region_result_type = - yield.getOperand(idx).getType().cast(); - if (!AreCastCompatible({region_result_type, op_result_type})) - return op->emitError(llvm::formatv( - "{0} result type {1} is incompatible with {2} " - "result type {3} at index {4}", - region_name, region_result_type, op_name, op_result_type, idx)); - } - return success(); -} - -static LogicalResult Verify(IfRegionOp op) { - if (failed(VerifyRegionResults(op, op.then_branch(), "then"))) - return failure(); - if (failed(VerifyRegionResults(op, op.else_branch(), "else"))) - return failure(); - return success(); -} - -//===----------------------------------------------------------------------===// -// InvertOp -//===----------------------------------------------------------------------===// - -void InvertOp::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { - results.insert(context); -} - -//===----------------------------------------------------------------------===// -// InvertPermutationOp -//===----------------------------------------------------------------------===// - -// Verifies that the input is 1D. -static LogicalResult Verify(InvertPermutationOp op) { - auto x_type = op.x().getType().cast(); - if (!x_type.hasRank()) return success(); - if (x_type.getShape().size() != 1) - return op.emitOpError() << "requires input x to be 1-dimensional"; - - return success(); -} - -//===----------------------------------------------------------------------===// -// LeakyReluOp -//===----------------------------------------------------------------------===// - -OpFoldResult LeakyReluOp::fold(ArrayRef operands) { - assert(operands.size() == 1 && "leaky relu has one operand"); - - // leaky_relu(x, alpha: 1) -> x - if (alpha().convertToFloat() == 1.0f) return getOperand(); - - auto calculate = [&](FloatAttr arg) { - APFloat val = arg.getValue(); - if (val.isNegative()) val = alpha() * val; - return FloatAttr::get(arg.getType(), val); - }; - - if (auto arg = operands[0].dyn_cast_or_null()) { - return calculate(arg); - } else if (auto arg = operands[0].dyn_cast_or_null()) { - if (auto elementAttr = arg.getSplatValue().dyn_cast()) - return DenseElementsAttr::get(arg.getType(), calculate(elementAttr)); - } - return {}; -} - -//===----------------------------------------------------------------------===// -// LogOp -//===----------------------------------------------------------------------===// - -void LogOp::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { - results.insert(context); -} - -//===----------------------------------------------------------------------===// -// ReadVariableOp -//===----------------------------------------------------------------------===// - -void ReadVariableOp::getCanonicalizationPatterns( - OwningRewritePatternList &results, MLIRContext *context) { - results.insert(context); -} - -//===----------------------------------------------------------------------===// -// VarIsInitializedOp -//===----------------------------------------------------------------------===// - -namespace { - -/// Erase VarIsInitializedOp operations with no uses. This op has side effect on -/// resources (read-only), but can still be deleted if it has zero uses. -struct EraseDeadVarIsInitializedOp - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(VarIsInitializedOp op, - PatternRewriter &rewriter) const override { - if (!op.use_empty()) return failure(); - rewriter.eraseOp(op); - return success(); - } -}; -} // end anonymous namespace. - -void VarIsInitializedOp::getCanonicalizationPatterns( - OwningRewritePatternList &patterns, MLIRContext *context) { - patterns.insert(context); -} - -//===----------------------------------------------------------------------===// -// LogicalNotOp -//===----------------------------------------------------------------------===// - -void LogicalNotOp::getCanonicalizationPatterns( - OwningRewritePatternList &results, MLIRContext *context) { - results.insert(context); -} - -//===----------------------------------------------------------------------===// -// MatrixBandPartOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(MatrixBandPartOp op) { - if (!HasRankAtLeast(op.input(), 2)) { - return op.emitOpError() - << "requires `input` to have rank of at least 2, but found " - << op.input().getType(); - } - if (!IsOfRankOrUnranked(op.num_lower(), 0)) { - return op.emitOpError() - << "requires `num_lower` to have 0 dimensions, but found " - << op.num_lower().getType(); - } - if (!IsOfRankOrUnranked(op.num_upper(), 0)) { - return op.emitOpError() - << "requires `num_upper` to have 0 dimensions, but found " - << op.num_upper().getType(); - } - return success(); -} - -//===----------------------------------------------------------------------===// -// MaxOp -//===----------------------------------------------------------------------===// - -void MaxOp::build(OpBuilder &builder, OperationState &result, Value input, - Value reduction_indices, BoolAttr keep_dims) { - Type out_ty = - InferReductionOpType(input, reduction_indices, keep_dims, &builder); - build(builder, result, out_ty, input, reduction_indices, keep_dims); -} - -//===----------------------------------------------------------------------===// -// MaxPoolOp -//===----------------------------------------------------------------------===// - -LogicalResult MaxPoolOp::FoldOperandsPermutation( - ArrayRef permutation) { - return ::mlir::TF::FoldOperandsPermutation( - permutation, this, {{"strides", strides()}, {"ksize", ksize()}}); -} - -//===----------------------------------------------------------------------===// -// MaxPoolGradOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(MaxPoolGradOp op) { - if (!IsOfRankOrUnranked(op.orig_input(), 4)) { - return op.emitOpError() << "requires orig_input to be rank 4"; - } - if (!IsOfRankOrUnranked(op.orig_output(), 4)) { - return op.emitOpError() << "requires orig_output to be rank 4"; - } - if (!IsOfRankOrUnranked(op.grad(), 4)) { - return op.emitOpError() << "requires grad to be rank 4"; - } - return success(); -} - -//===----------------------------------------------------------------------===// -// MeanOp -//===----------------------------------------------------------------------===// - -LogicalResult MeanOp::FoldOperandsPermutation(ArrayRef permutation) { - // Reduction indices must be defined by a constant operation. - auto reduction_op = - dyn_cast_or_null(reduction_indices().getDefiningOp()); - if (!reduction_op) return failure(); - - auto reductions_value = reduction_op.value().dyn_cast(); - if (!reductions_value) return failure(); - - // Prepare new reduction indices according to operand permutation. - SmallVector shuffled_reduction; - llvm::transform(reductions_value.getIntValues(), - std::back_inserter(shuffled_reduction), - [&](APInt idx) { return permutation[idx.getSExtValue()]; }); - - // Add constant operation with a new reduction indices. - OpBuilder builder(getOperation()); - auto type = mlir::RankedTensorType::get(shuffled_reduction.size(), - builder.getIntegerType(32)); - auto values = mlir::DenseIntElementsAttr::get(type, shuffled_reduction); - auto shuffled_reduction_op = builder.create(getLoc(), values); - - // Use new reduction indices. - setOperand(1, shuffled_reduction_op); - - return success(); -} - -//===----------------------------------------------------------------------===// -// MulOp -//===----------------------------------------------------------------------===// - -OpFoldResult MulOp::fold(ArrayRef operands) { - return IdentityArithmeticOpFolder(*this, operands); -} - -//===----------------------------------------------------------------------===// -// NegOp -//===----------------------------------------------------------------------===// - -void NegOp::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { - results.insert(context); -} - -//===----------------------------------------------------------------------===// -// NotEqualOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(NotEqualOp op) { - // If we allow inputs to have incompatible type, then nothing to do. - if (!op.incompatible_shape_error()) return success(); - - // Otherwise, check inputs are broadcastable. - return mlir::OpTrait::impl::verifyCompatibleOperandBroadcast( - op.getOperation()); -} - -void NotEqualOp::build(OpBuilder &builder, OperationState &result, Value x, - Value y, BoolAttr incompatible_shape_error) { - auto result_type = DeduceEqualCmpOpType(&builder, result.location, x, y, - incompatible_shape_error); - return build(builder, result, result_type, x, y, incompatible_shape_error); -} - -//===----------------------------------------------------------------------===// -// OneHotOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(OneHotOp op) { - int64_t axis = op.axis().getSExtValue(); - - auto indices_ty = op.indices().getType().dyn_cast(); - if (indices_ty && - !(axis == -1 || (axis >= 0 && axis <= indices_ty.getShape().size()))) { - return op.emitOpError() - << "expected axis (" << axis << ") to be -1 or between [0, " - << indices_ty.getShape().size() << "]"; - } - - if (axis < -1) { - return op.emitOpError() << "expected axis (" << axis - << ") to be -1 or between [0, rank(indices()))"; - } - - if (!IsOfRankOrUnranked(op.depth(), 0)) { - return op.emitOpError() << "requires depth to be a scalar"; - } - if (!IsOfRankOrUnranked(op.on_value(), 0)) { - return op.emitOpError() << "requires on_value to be a scalar"; - } - if (!IsOfRankOrUnranked(op.off_value(), 0)) { - return op.emitOpError() << "requires off_value to be a scalar"; - } - - DenseIntElementsAttr depth_attr; - if (matchPattern(op.depth(), m_Constant(&depth_attr))) { - if (depth_attr.getType().getRank() != 0) - return op.emitOpError() << "requires depth to be a scalar"; - int64_t depth = depth_attr.getValue({}).getSExtValue(); - if (depth < 0) { - return op.emitOpError() << "depth must be non-negative, got: " << depth; - } - } - - return success(); -} - -static TensorType InferOneHotOpType(Value indices, Value depth, Value on_value, - Value off_value, IntegerAttr axis) { - int64_t axis_val = axis.getInt(); - Type element_ty = on_value.getType().cast().getElementType(); - auto unranked_ty = UnrankedTensorType::get(element_ty); - if (axis_val < -1) return unranked_ty; - - auto indices_ty = indices.getType().dyn_cast(); - if (!indices_ty) return unranked_ty; - - auto shape = llvm::to_vector<2>(indices_ty.getShape()); - if (axis_val == -1) axis_val = shape.size(); - - int64_t depth_val = ShapedType::kDynamicSize; - DenseIntElementsAttr depth_attr; - if (matchPattern(depth, m_Constant(&depth_attr)) && - depth_attr.getNumElements() == 1) - depth_val = (*depth_attr.begin()).getSExtValue(); - shape.insert(shape.begin() + axis_val, depth_val); - return RankedTensorType::get(shape, element_ty); -} - -void OneHotOp::build(OpBuilder &builder, OperationState &result, Value indices, - Value depth, Value on_value, Value off_value, - IntegerAttr axis) { - build(builder, result, - InferOneHotOpType(indices, depth, on_value, off_value, axis), indices, - depth, on_value, off_value, axis); -} - -//===----------------------------------------------------------------------===// -// PackOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(PackOp op) { - // TODO(hinsu): Convert variadic length attributes to derived attributes. - Operation::operand_range values = op.values(); - - if (failed(VerifyTypesCompatibility(values, - /*mask_one_dim=*/false, - op.getOperation()))) { - return failure(); - } - - int64_t inputs_rank = -1; - for (Value value : values) { - if (auto ty = value.getType().dyn_cast()) { - // Exit early as input types are verified to be compatible so all ranked - // tensors have the same rank. - inputs_rank = ty.getRank(); - break; - } - } - if (inputs_rank == -1) return success(); - - // The values can be packed along any of the dimensions between 0 and - // inputs rank, inclusive. Also, as the negative axis values wrap around so - // the axis value range is [-(R+1), R+1). - int64_t range_begin = -inputs_rank - 1; // Inclusive - int64_t range_end = inputs_rank + 1; // Exclusive - int64_t axis = op.axis().getSExtValue(); - if (axis < range_begin || axis >= range_end) { - return op.emitError() << "attribute 'axis' should be within range [" - << range_begin << ", " << range_end - << "); actual value: " << axis; - } - - return success(); -} - -//===----------------------------------------------------------------------===// -// PadOp -//===----------------------------------------------------------------------===// - -LogicalResult PadOp::FoldOperandsPermutation(ArrayRef permutation) { - // Paddings must be defined by a constant operation. - auto paddings_op = dyn_cast_or_null(paddings().getDefiningOp()); - if (!paddings_op) return failure(); - - auto paddings_value = paddings_op.value().dyn_cast(); - if (!paddings_value || - paddings_value.getNumElements() != permutation.size() * 2) - return failure(); - - SmallVector shuffled_paddings(paddings_value.getNumElements()); - for (auto index_pair : llvm::enumerate(paddings_value.getIntValues())) { - size_t outer_idx = index_pair.index() / 2; - size_t inner_idx = index_pair.index() % 2; - - shuffled_paddings[permutation[outer_idx] * 2 + inner_idx] = - index_pair.value().getSExtValue(); - } - - // Add constant operation with a new paddings. - OpBuilder builder(getOperation()); - auto type = mlir::RankedTensorType::get(paddings_value.getType().getShape(), - builder.getIntegerType(32)); - auto values = mlir::DenseIntElementsAttr::get(type, shuffled_paddings); - auto shuffled_paddings_op = builder.create(getLoc(), values); - - // Use new paddings. - setOperand(1, shuffled_paddings_op); - - // Change the result type. - getResult().setType(ShuffleRankedTensorType(getResult().getType(), - ReversePermutation(permutation))); - - return success(); -} - -//===----------------------------------------------------------------------===// -// ParseExampleV2Op -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(ParseExampleV2Op op) { - // NOTE(mrry): This validates properties of an op that would previously be - // validated by the TensorFlow OpDef type checker. In addition to these - // checks, the shape inference function for ParseExampleV2 validates the - // consistency of the argument and result types. - - // Validate dense variadic input and output lengths. - // NOTE(mrry): The Tdense attr is derived from dense_defaults, so we - // do not need to validate dense_defaults. - auto dense_types_count = - std::distance(op.Tdense().begin(), op.Tdense().end()); - auto dense_values_count = - std::distance(op.dense_values().begin(), op.dense_values().end()); - if (dense_values_count != dense_types_count) { - return op.emitError() << "output 'dense_values' should have same length " - << "as attribute 'Tdense'"; - } - - // Validate sparse variadic output lengths. - // NOTE(mrry): The sparse_types attr is derived from sparse_values, so we - // do not need to validate sparse_values. - auto sparse_types_count = - std::distance(op.sparse_types().begin(), op.sparse_types().end()); - if (op.num_sparse() != sparse_types_count) { - return op.emitError() << "attribute 'num_sparse' should be the same as " - << "the length of attribute 'sparse_types'"; - } - if (op.sparse_indices().size() != sparse_types_count) { - return op.emitError() << "output 'sparse_indices' should have same length " - << "as attribute 'sparse_types'"; - } - if (op.sparse_shapes().size() != sparse_types_count) { - return op.emitError() << "output 'sparse_shapes' should have same length " - << "as attribute 'sparse_types'"; - } - - // Validate ragged variadic output lengths. - auto ragged_value_types_count = std::distance(op.ragged_value_types().begin(), - op.ragged_value_types().end()); - auto ragged_split_types_count = std::distance(op.ragged_split_types().begin(), - op.ragged_split_types().end()); - if (ragged_value_types_count != ragged_split_types_count) { - return op.emitError() << "attribute 'ragged_value_types' should have same " - << "length as attribute 'ragged_split_types'"; - } - - return success(); -} - -//===----------------------------------------------------------------------===// -// PartitionedCallOp -//===----------------------------------------------------------------------===// - -template -static LogicalResult VerifyPartitionedCall(OpClass op) { - auto module = op.template getParentOfType(); - SymbolRefAttr func = op.getAttr("f").template cast(); - - auto function = - dyn_cast_or_null(SymbolTable::lookupSymbolIn(module, func)); - - if (!function) { - return op.emitError("'f' attribute refers to an undefined function: ") - << func; - } - - FunctionType function_ty = function.getType(); - int func_arg_count = function_ty.getNumInputs(); - int arg_count = op.args().size(); - - if (arg_count != func_arg_count) { - return op.emitError() << "argument count mismatch: 'args' has " << arg_count - << " arguments, but '" << func << "' expects " - << func_arg_count; - } - - return success(); -} - -//===----------------------------------------------------------------------===// -// PowOp -//===----------------------------------------------------------------------===// - -OpFoldResult PowOp::fold(ArrayRef operands) { - auto constant_y = operands[1].dyn_cast_or_null(); - if (constant_y && constant_y.isSplat()) { - APFloat y_value = constant_y.getSplatValue(); - auto output_type = getType().cast(); - if (y_value.isZero() && output_type.hasStaticShape()) { - return DenseElementsAttr::get( - output_type, - FloatAttr::get(output_type.getElementType(), /*value=*/1.0)); - } - if (y_value.isExactlyValue(1.0)) { - return x(); - } - } - return {}; -} - -//===----------------------------------------------------------------------===// -// QrOp -//===----------------------------------------------------------------------===// - -// Verifies that, -// -// * Input type, if ranked, must have at least 2 dimensions and at most -// INT32_MAX dimensions. -// -static LogicalResult Verify(QrOp op) { - auto ttype = op.input().getType().cast(); - if (!ttype.hasRank()) return success(); - if (!HasRankAtLeast(op.input(), 2)) - return op.emitOpError( - "requires ranked input tensor to be of rank 2 or more"); - if (!HasRankAtMost(op.input(), std::numeric_limits::max())) - return op.emitOpError( - "requires ranked input tensor to be of rank INT32_MAX or less"); - - return success(); -} - -//===----------------------------------------------------------------------===// -// ReciprocalOp -//===----------------------------------------------------------------------===// - -void ReciprocalOp::getCanonicalizationPatterns( - OwningRewritePatternList &results, MLIRContext *context) { - results.insert(context); -} - -//===----------------------------------------------------------------------===// -// RandomUniformOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(RandomUniformOp op) { - if (!IsOfRankOrUnranked(op.shape(), 1)) - return op.emitOpError("shape must be 1D tensor"); - return success(); -} - -//===----------------------------------------------------------------------===// -// RangeOp -//===----------------------------------------------------------------------===// - -void RangeOp::build(OpBuilder &builder, OperationState &result, Value start, - Value limit, Value delta) { - assert(start.getType() == limit.getType()); - assert(start.getType() == delta.getType()); - DenseIntElementsAttr start_val; - DenseIntElementsAttr limit_val; - DenseIntElementsAttr delta_val; - if (matchPattern(start, m_Constant(&start_val)) && - matchPattern(limit, m_Constant(&limit_val)) && - matchPattern(delta, m_Constant(&delta_val))) { - auto size = llvm::APIntOps::RoundingSDiv( - *limit_val.begin() - *start_val.begin(), *delta_val.begin(), - llvm::APInt::Rounding::DOWN); - return RangeOp::build( - builder, result, - RankedTensorType::get( - size.getSExtValue(), - start.getType().cast().getElementType()), - start, limit, delta); - } - return RangeOp::build( - builder, result, - RankedTensorType::get( - {-1}, start.getType().cast().getElementType()), - start, limit, delta); -} -//===----------------------------------------------------------------------===// -// RankOp -//===----------------------------------------------------------------------===// - -void RankOp::build(OpBuilder &builder, OperationState &result, Value input) { - return RankOp::build(builder, result, - RankedTensorType::get({}, builder.getIntegerType(32)), - input); -} - -// This will create a constant value for RankOp of a ranked tensor. -OpFoldResult RankOp::fold(ArrayRef operands) { - auto type = input().getType(); - auto ranked_type = type.dyn_cast(); - if (!ranked_type) return {}; - - auto output_type = getType().cast(); - int32_t rank = ranked_type.getRank(); - return DenseIntElementsAttr::get(output_type, rank); -} - -//===----------------------------------------------------------------------===// -// RealDivOp -//===----------------------------------------------------------------------===// - -void RealDivOp::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { - results.insert(context); -} - -OpFoldResult RealDivOp::fold(ArrayRef operands) { - return IdentityArithmeticOpFolder(*this, operands); -} - -//===----------------------------------------------------------------------===// -// ReshapeOp -//===----------------------------------------------------------------------===// - -// TODO(b/128020684): Verify the output type. -static LogicalResult Verify(ReshapeOp op) { - auto shape_type = op.shape().getType().cast(); - if (!shape_type.hasRank()) return success(); - if (shape_type.getRank() != 1) - return op.emitOpError("shape must be 1D tensor"); - auto rank_by_shape = shape_type.getShape()[0]; - auto type_of_tensor = op.tensor().getType().cast(); - // No compile time verification for unknown sized shape. - if (rank_by_shape == -1 || !type_of_tensor.hasStaticShape()) return success(); - int64_t num_by_tensor = type_of_tensor.getNumElements(); - - auto out_ty = op.getType().dyn_cast(); - if (out_ty && out_ty.hasStaticShape()) { - int64_t num_output_elements = out_ty.getNumElements(); - if (num_by_tensor != num_output_elements) - return op.emitOpError() - << "number of output elements (" << num_output_elements - << ") does not match expected number of elements (" - << num_by_tensor << ")"; - } - - // Check values if constant shape. No compiling time verification for - // non-constant shape. - auto *shape_op = op.shape().getDefiningOp(); - if (!shape_op) return success(); - Attribute shape_cst; - if (!matchPattern(shape_op, m_Constant(&shape_cst))) return success(); - auto shape_cst_attr = shape_cst.dyn_cast(); - if (!shape_cst_attr) return op.emitOpError("shape must be a valid tensor"); - - if (auto opaque_attr = shape_cst_attr.dyn_cast()) { - opaque_attr.decode(shape_cst_attr); - } - - // We know the shape is a 1-D Tensor, then let us get the number of - // elements it implies. - unsigned num_by_shape = 1; - unsigned unknown_dim_count = 0; - for (int i = 0, e = rank_by_shape; i != e; ++i) { - auto num = shape_cst_attr.getValue(i).getInt(); - // The dimension size value can be -1, and that the real size needs to - // be computed so that the total size remains constant. At most one - // component of shape can be -1. - if (num == -1) { - if (++unknown_dim_count > 1) { - return op.emitOpError("more than one component of shape are -1"); - } - } else { - num_by_shape *= num; - } - } - // If there is one component of shape is -1, the dimension should be - // computed so that the total size remains constant. - if (unknown_dim_count == 1) { - if (num_by_tensor % num_by_shape != 0) - return op.emitOpError( - "one component of shape is -1 but couldn't infer the dimension"); - return success(); - } - // If the elements by the tensor and implies by the shape don't match, - // fail this static check. - if (num_by_tensor != num_by_shape) { - return op.emitOpError( - "mismatch in tensor elements and shape implied elements"); - } - return success(); -} - -void ReshapeOp::build(OpBuilder &builder, OperationState &result, Value tensor, - Value shape) { - auto ttype = tensor.getType().cast(); - auto etype = ttype.getElementType(); - - auto unranked = [&builder, etype, &result, shape, tensor]() { - return ReshapeOp::build(builder, result, UnrankedTensorType::get(etype), - tensor, shape); - }; - - // If tensor is unranked then we have no info about output of shape. - if (!ttype.hasRank()) return unranked(); - - DenseIntElementsAttr attr_shape; - if (matchPattern(shape, m_Constant(&attr_shape))) { - llvm::SmallVector const_shape; - const_shape.reserve(attr_shape.getNumElements()); - - // Detect if reshape output shape is folded. - bool flatten = false; - int unknown_index = -1; - // The product of constant shape argument excluding unknown dimension. - int64_t product_cshape = 1; - for (auto e : llvm::enumerate(attr_shape)) { - int64_t val = e.value().getSExtValue(); - if (IsUnknownDimOrRank(val)) { - if (flatten) { - mlir::emitError(result.location) - << "only one unknown dimension allowed"; - return; - } - flatten = true; - unknown_index = e.index(); - } else { - product_cshape *= val; - } - const_shape.push_back(val); - } - - // Compute the value of the unknown dimension. - if (flatten) { - // Compute number of elements in tensor shape. - auto tshape = ttype.getShape(); - int64_t product_tshape = std::accumulate(tshape.begin(), tshape.end(), 1, - std::multiplies()); - // Set the unknown dimension such that total number of elements remain - // constant. - // Note: The case where the ratio is not integral, and so the total size - // of reshape not constant, is checked in verify function. - const_shape[unknown_index] = product_tshape / product_cshape; - } - return ReshapeOp::build(builder, result, - RankedTensorType::get(const_shape, etype), tensor, - shape); - } - return unranked(); -} - -void ReshapeOp::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { - results.insert(context); -} - -OpFoldResult ReshapeOp::fold(ArrayRef operands) { - Value tensor = this->tensor(); - Value shape = this->shape(); - - // Fold reshape if operand and result types are the same and all dimensions - // are statically known (no-op reshape). - // TODO(ezhulenev): Add the same folding for BroadcastToOp. - auto result_ty = getType().dyn_cast(); - if (result_ty && result_ty.hasStaticShape() && - result_ty == tensor.getType()) { - return tensor; - } - - // Fold reshape if the shape is computed from the input tensor: - // - // %shape = tf.Shape(%arg) // [? x ...] - // %dim0 = tf.StridedSlice(%shape, 0, 1, 1) // get unknown dim value - // %new_shape = tf.Pack(dim0, ...) { axis = 0 } // [? x ...] - // %reshape = tf.Reshape(%arg, %new_shape) // this is no-op - // - // Where `...` are some statically known dimensions. In this case reshape is - // a no-op and can be replaced by %arg (assuming `...` are equal). - auto pack_op = dyn_cast_or_null(shape.getDefiningOp()); - if (!pack_op || pack_op.values().size() < 2) return {}; - - // Dimensions packed along axis = 0 (pack scalars into vector). - if (pack_op.axis().getSExtValue() != 0) return {}; - - // First packed value is defined by a strided slice operation. - auto slice_op = - dyn_cast_or_null(pack_op.values()[0].getDefiningOp()); - if (!slice_op) return {}; - - // Input to the slice op is defined by shape operation. - auto shape_op = dyn_cast_or_null(slice_op.input().getDefiningOp()); - if (!shape_op || shape_op.input() != tensor) return {}; - - // All masks are `0` except `shrink_axis_mask` which is equal to `1` (slicing - // scalar value from input vector). - if (slice_op.begin_mask().getSExtValue() != 0 || - slice_op.ellipsis_mask().getSExtValue() != 0 || - slice_op.end_mask().getSExtValue() != 0 || - slice_op.new_axis_mask().getSExtValue() != 0 || - slice_op.shrink_axis_mask().getSExtValue() != 1) - return {}; - - // Returns a value if the `value` is defined by a ConstOp with a single - // integer element in it and has an expected rank. - auto get_value = [](Value value, int expected_rank) -> Optional { - auto const_op = dyn_cast_or_null(value.getDefiningOp()); - if (!const_op) return None; - - auto value_attr = const_op.value().dyn_cast(); - if (!value_attr || value_attr.getNumElements() != 1) return None; - - auto value_ty = value_attr.getType(); - if (!value_ty.hasRank() || value_ty.getRank() != expected_rank) return None; - - auto splat = value_attr.getSplatValue(); - return splat.getValue().getSExtValue(); - }; - - // All other packed values are scalar constants. - SmallVector packed_dims; - packed_dims.reserve(pack_op.values().size() - 1); - for (Value operand : llvm::drop_begin(pack_op.values(), 1)) { - if (auto dim = get_value(operand, /*expected_rank=*/0)) { - packed_dims.push_back(*dim); - } else { - return {}; - } - } - - // Slice exactly the first shape dimension: - // begin = [0] end = [1], strides = [1] - auto begin = get_value(slice_op.begin(), /*expected_rank=*/1); - auto end = get_value(slice_op.end(), /*expected_rank=*/1); - auto strides = get_value(slice_op.strides(), /*expected_rank=*/1); - if (!begin.hasValue() || !end.hasValue() || !strides.hasValue() || - *begin != 0 || *end != 1 || *strides != 1) - return {}; - - // First tensor dimension is dynamic. - auto arg_ty = tensor.getType().dyn_cast(); - if (!arg_ty || arg_ty.getNumDynamicDims() != 1 || !arg_ty.isDynamicDim(0)) - return {}; - - // Argument tensor rank is equal to the number of packed dimensions. - if (arg_ty.getRank() != pack_op.values().size()) return {}; - - // All other dimensions are statically known and equal to packed dims. - auto arg_dims = llvm::drop_begin(arg_ty.getShape(), 1); - if (!std::equal(arg_dims.begin(), arg_dims.end(), packed_dims.begin())) - return {}; - - return tensor; -} - -//===----------------------------------------------------------------------===// -// SelectOp -//===----------------------------------------------------------------------===// - -void SelectOp::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { - results.insert(context); -} - -// Verifies a few extra requirements on SelectOp: -// (1) `then` and `else` must have same shape -// (2) At least one of the following must be true: -// (a) `cond` has the same rank as `then` and `else` -// (b) `cond` is a scalar -// (c) `cond` is a vector AND `then` and `else` are non-scalar with their -// first dimension equal to `cond`. -static LogicalResult Verify(SelectOp op) { - auto then_tensor = op.t().getType().cast(); - auto else_tensor = op.e().getType().cast(); - // Check (1). - if (!AreCastCompatible({then_tensor, else_tensor})) - return op.emitOpError() << "requires t and e have compatible shapes"; - - // Get data rank (if exists). - int data_rank; - // If data is unranked or data_rank is 0, this will remain -2. Otherwise - // refers to first dimension of then and/or else. - int data_first_dim = -2; - bool then_has_rank = then_tensor.hasRank(); - bool else_has_rank = else_tensor.hasRank(); - if (then_has_rank && else_has_rank) { - data_rank = then_tensor.getRank(); - if (then_tensor.getRank() > 0) - data_first_dim = then_tensor.getShape().front(); - if (else_tensor.getRank() > 0) - data_first_dim = std::max( - static_cast(else_tensor.getShape().front()), data_first_dim); - } else if (then_has_rank) { - data_rank = then_tensor.getRank(); - if (then_tensor.getRank() > 0) - data_first_dim = then_tensor.getShape().front(); - } else if (else_has_rank) { - data_rank = else_tensor.getRank(); - if (else_tensor.getRank() > 0) - data_first_dim = else_tensor.getShape().front(); - } else { - // Neither has a rank. - return success(); - } - - auto cond_tensor = op.condition().getType().dyn_cast(); - if (!cond_tensor) return success(); - auto cond_rank = cond_tensor.getRank(); - // Check (2a) and (2b). - if (cond_rank == 0 || cond_rank == data_rank) return success(); - // Check (2c). - if (cond_rank == 1) { - auto cond_shape = cond_tensor.getShape().front(); - if (data_rank == 0) { - return op.emitOpError() - << "requires that t and e are nonscalar when pred is a vector"; - } - // We know `data` tensor has a rank of at least 1. - if (data_first_dim != -1 && cond_shape != -1 && - data_first_dim != cond_shape) { - return op.emitOpError() << "requires that, when pred is a vector, the " - "shape matches the first dimension of t and e"; - } - return success(); - } - // None of (2a,b,c) were true; fail. - return op.emitOpError() << "requires that pred is a scalar OR has the same " - "rank as t and e OR is a vector"; -} - -//===----------------------------------------------------------------------===// -// SelectV2Op -//===----------------------------------------------------------------------===// - -static Type InferSelectV2OpType(Value condition, Value e, Value t) { - Type element_ty = e.getType().cast().getElementType(); - auto unranked_ty = UnrankedTensorType::get(element_ty); - - Type broadcasted_ty = - OpTrait::util::getBroadcastedType(e.getType(), t.getType()); - if (!broadcasted_ty) return unranked_ty; - - auto cond_ranked_ty = condition.getType().dyn_cast(); - auto broadcasted_ranked_ty = broadcasted_ty.dyn_cast(); - if (!cond_ranked_ty || !broadcasted_ranked_ty) return unranked_ty; - - // Explicitly get broadcasted output type as element types of condition may - // not be same as the broadcated type's element type. - SmallVector result_shape; - if (!OpTrait::util::getBroadcastedShape(cond_ranked_ty.getShape(), - broadcasted_ranked_ty.getShape(), - result_shape)) - return unranked_ty; - return RankedTensorType::get(result_shape, element_ty); -} - -void SelectV2Op::build(OpBuilder &builder, OperationState &result, - Value condition, Value e, Value t) { - build(builder, result, InferSelectV2OpType(condition, e, t), condition, e, t); -} - -//===----------------------------------------------------------------------===// -// ShapeOp -//===----------------------------------------------------------------------===// - -namespace { -// Validates Shape/ShapeN/VariableShape operand and associated result types. -LogicalResult VerifyShapeOperandAndResult(Operation *op, Type operand_type, - Type result_type, - int variadic_idx = -1) { - std::string variadic_idx_str = - variadic_idx < 0 ? "" : llvm::formatv(" #{0}", variadic_idx).str(); - - auto result_ranked_type = result_type.dyn_cast(); - if (!result_ranked_type) return success(); - if (result_ranked_type.getShape().size() != 1) - return op->emitOpError("requires 1D type for result") << variadic_idx_str; - - auto operand_ranked_type = operand_type.dyn_cast_or_null(); - if (operand_ranked_type) { - // The operand is a ranked tensor. - if (result_ranked_type.hasStaticShape() && - !operand_ranked_type.getShape().empty() && - result_ranked_type.getDimSize(0) != - operand_ranked_type.getShape().size()) - return op->emitOpError("requires dimension size of result") - << variadic_idx_str << " to match rank of operand" - << variadic_idx_str; - } else if (result_ranked_type.hasStaticShape()) { - // The operand is an unranked tensor, print a warning if the result - // is static. - // Note: We do not handle this situation as an error, this would be too - // restrictive due to incompleteness of shape inference at this point. - op->emitWarning("has static shape result") - << variadic_idx_str << " for unranked operand" << variadic_idx_str; - } - - Type element_type = result_ranked_type.getElementType(); - if (!element_type.isSignlessInteger(32) && - !element_type.isSignlessInteger(64)) - return op->emitOpError("requires int32 or int64 return type for result") - << variadic_idx_str; - - return success(); -} -} // anonymous namespace - -static LogicalResult Verify(ShapeOp op) { - return VerifyShapeOperandAndResult(op, op.input().getType(), op.getType()); -} - -// Converts shape of the given type to attribute if it is of ranked tensor type. -// Returned attribute has integer elements of the given width. -static Attribute ConvertShapeToAttr(Type input_ty, int out_width) { - auto ranked_ty = input_ty.dyn_cast(); - if (!ranked_ty || !ranked_ty.hasStaticShape()) return {}; - - auto shape = ranked_ty.getShape(); - int rank = shape.size(); - - SmallVector dimensions; - dimensions.reserve(rank); - for (int i = 0; i < rank; ++i) - dimensions.push_back(APInt(out_width, shape[i])); - - auto result_type = RankedTensorType::get( - {rank}, IntegerType::get(out_width, input_ty.getContext())); - return DenseElementsAttr::get(result_type, dimensions); -} - -OpFoldResult ShapeOp::fold(ArrayRef operands) { - int width = - getType().cast().getElementType().getIntOrFloatBitWidth(); - return ConvertShapeToAttr(getOperand().getType(), width); -} - -void ShapeOp::build(OpBuilder &builder, OperationState &result, Value input, - BoolAttr use32Bit) { - auto rankedTensorType = input.getType().dyn_cast(); - int64_t rank = rankedTensorType ? rankedTensorType.getRank() : -1; - auto out_type = use32Bit.getValue() ? builder.getIntegerType(32) - : builder.getIntegerType(64); - return ShapeOp::build(builder, result, - RankedTensorType::get({rank}, out_type), input); -} - -//===----------------------------------------------------------------------===// -// ShapeNOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(ShapeNOp op) { - const size_t num_tensors = op.N(); - - if (op.getNumOperands() != num_tensors) - return op.emitOpError() << "requires " << num_tensors << " operand(s), got " - << op.getNumOperands() << " operand(s)"; - - if (op.getNumResults() != num_tensors) - return op.emitOpError() << "requires " << num_tensors << " result(s), got " - << op.getNumResults() << " result(s)"; - - for (auto i : llvm::seq(0, num_tensors)) { - auto verification = VerifyShapeOperandAndResult( - op, op.getOperand(i).getType(), op.getResult(i).getType(), i); - if (failed(verification)) return verification; - } - - return success(); -} - -LogicalResult ShapeNOp::fold(ArrayRef operands, - SmallVectorImpl &results) { - if (getNumOperands() == 0) return success(); - int width = - getType(0).cast().getElementType().getIntOrFloatBitWidth(); - - for (Type input_ty : getOperandTypes()) { - OpFoldResult result = ConvertShapeToAttr(input_ty, width); - if (!result) return failure(); - - results.push_back(result); - } - return success(); -} - -// TODO(hinsu): Add canonicalization pattern for ShapeN ops that don't have all -// static input shapes. Replacing output values corresponding to static input -// types may enable optimizations in users of the values. - -//===----------------------------------------------------------------------===// -// SizeOp -//===----------------------------------------------------------------------===// - -// Verifies that, -// -// * Input type, if is a ranked tensor, has at most INT32_MAX dimensions. -// -static LogicalResult Verify(SizeOp op) { - if (!HasRankAtMost(op.input(), std::numeric_limits::max())) - return op.emitOpError( - "requires ranked input tensor to be of rank INT32_MAX or less"); - - return success(); -} - -//===----------------------------------------------------------------------===// -// SliceOp -//===----------------------------------------------------------------------===// - -// Verifies that: -// -// - operands begin and size are 1D with the same number of elements. -// - if the input is a ranked tensor, the rank of the input equals the number -// of elements in operands begin and size. -// - if begin are constants, that -// 0 <= begin[i] <= begin[i] + size[i] <= input_ty.getShape()[i] -// - if begins aren't constant but the input is a ranked tensor, that -// size[i] <= input_ty.getShape()[i] -// -static LogicalResult Verify(SliceOp op) { - RankedTensorType begin_ty = GetRankedTensorTypeForOperand(op.begin()); - if (begin_ty && begin_ty.getRank() != 1) { - return op.emitOpError() << "requires begin operand to be 1D tensor"; - } - - RankedTensorType size_ty = GetRankedTensorTypeForOperand(op.size()); - if (size_ty && size_ty.getRank() != 1) { - return op.emitOpError() << "requires size operand to be 1D tensor"; - } - - if (!begin_ty || !size_ty || !begin_ty.hasStaticShape() || - !size_ty.hasStaticShape()) - return success(); - - if (begin_ty.getNumElements() != size_ty.getNumElements()) { - return op.emitOpError() << "requires begin and size operands to have the" - " same number of elements"; - } - - auto input_ty = op.input().getType().dyn_cast(); - if (input_ty && begin_ty.getNumElements() != input_ty.getRank()) { - return op.emitOpError() << "requires number of elements in begin and size" - "are equal to input rank"; - } - - DenseIntElementsAttr begin_indices; - if (matchPattern(op.begin(), m_Constant(&begin_indices))) { - DenseIntElementsAttr slice_sizes; - bool constant_slice_sizes = - matchPattern(op.size(), m_Constant(&slice_sizes)); - int dim = 0; - for (const APInt &raw_begin_index : begin_indices.getValues()) { - int64_t begin_index = raw_begin_index.getSExtValue(); - int64_t input_size = input_ty ? input_ty.getShape()[dim] : -1; - int64_t slice_size = constant_slice_sizes - ? slice_sizes.getValue(dim).getSExtValue() - : 0; - if (slice_size == -1 && input_size != -1) { - slice_size = input_size - begin_index; - } - if (begin_index < 0 || - (input_size != -1 && begin_index + slice_size > input_size)) { - return op.emitOpError() - << "requires 0 <= begin[i] <= begin[i] + size[i] <= Di"; - } - ++dim; - } - } else if (input_ty) { - // If the inputs are ranked, we can do a few more sanity checks. - DenseIntElementsAttr slice_sizes; - if (matchPattern(op.size(), m_Constant(&slice_sizes))) { - auto input_shape = input_ty.getShape(); - for (int64_t i = 0; i < input_ty.getRank(); ++i) { - int64_t slice_size = slice_sizes.getValue(i).getInt(); - int64_t input_size = input_shape[i]; - if (slice_size != -1 && input_size != -1 && slice_size > input_size) { - return op.emitOpError() << "requires size[i] <= Di, even if begin[i] " - "is unknown at compile time"; - } - } - } - } - - return success(); -} - -//===----------------------------------------------------------------------===// -// SoftmaxOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(SoftmaxOp op) { - if (!HasRankAtLeast(op.logits(), 1)) { - return op.emitOpError("requires operand to have rank at least 1"); - } - return success(); -} - -//===----------------------------------------------------------------------===// -// SoftmaxCrossEntropyWithLogitsOp -//===----------------------------------------------------------------------===// - -// Verifies that, -// -// * Input types are broadcast compatible and the broadcasted type has rank two. -// -static LogicalResult Verify(SoftmaxCrossEntropyWithLogitsOp op) { - auto broadcasted_ty = OpTrait::util::getBroadcastedType( - op.features().getType(), op.labels().getType()) - .dyn_cast_or_null(); - if (!broadcasted_ty || - (broadcasted_ty.hasRank() && broadcasted_ty.getRank() != 2)) - return op.emitOpError( - "requires features and labels to be broadcast compatible to rank two"); - - return success(); -} - -//===----------------------------------------------------------------------===// -// SparseSoftmaxCrossEntropyWithLogitsOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(SparseSoftmaxCrossEntropyWithLogitsOp op) { - if (!IsOfRankOrUnranked(op.features(), 2)) { - return op.emitOpError("requires features operand of rank two"); - } - if (!IsOfRankOrUnranked(op.labels(), 1)) { - return op.emitOpError("requires labels operand of rank one"); - } - auto features_ty = op.features().getType().dyn_cast(); - auto labels_ty = op.labels().getType().dyn_cast(); - if (features_ty && labels_ty) { - int64_t features_batches = features_ty.getDimSize(0); - int64_t labels_batches = labels_ty.getDimSize(0); - if (!ShapedType::isDynamic(features_batches) && - !ShapedType::isDynamic(labels_batches) && - features_batches != labels_batches) - return op.emitOpError( - "requires features and labels with matching first dimension"); - } - return success(); -} - -//===----------------------------------------------------------------------===// -// SplitOp -//===----------------------------------------------------------------------===// - -// Verifies the input and split dimension operands for tf.Split/tf.SplitV. -// Writes the split dimension's index (adjusted with input rank) via `dim_index` -// if it's a constant. -template -LogicalResult VerifySplitInputAndSplitDim(Op op, Optional *dim_index) { - *dim_index = llvm::None; - - Value split_dim = op.split_dim(); - if (auto split_dim_type = split_dim.getType().dyn_cast()) - if (split_dim_type.getRank() != 0) - return op.emitOpError( - "split dimension should be an integer scalar tensor"); - - // We can perform further verification if the input tensor to be split has - // known rank and the split dimension tensor is a constant. - - auto input_type = op.value().getType().template dyn_cast(); - if (!input_type) return success(); - - int64_t input_rank = input_type.getRank(); - if (input_rank == 0) - return op.emitOpError("cannot split scalar input tensor"); - - DenseIntElementsAttr split_dim_attr; - if (!matchPattern(split_dim, m_Constant(&split_dim_attr))) return success(); - - int64_t index = (*split_dim_attr.begin()).getSExtValue(); - - if (index + input_rank < 0 || index >= input_rank) { - return op.emitOpError("split dimension must be in range [-") - << input_rank << ", " << input_rank << ")"; - } - - if (index < 0) index += input_rank; - *dim_index = index; - - return success(); -} - -static LogicalResult Verify(SplitOp op) { - Optional dim_index; - if (failed(VerifySplitInputAndSplitDim(op, &dim_index))) return failure(); - if (!dim_index) return success(); - - int64_t input_dim_size = - op.value().getType().cast().getDimSize(*dim_index); - if (input_dim_size == ShapedType::kDynamicSize) return success(); - - if (input_dim_size % op.getNumResults() != 0) - return op.emitOpError("dimension #") - << *dim_index << " not divisible by the number of result tensors"; - - return success(); -} - -//===----------------------------------------------------------------------===// -// SplitVOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(SplitVOp op) { - auto split_sizes_type = - op.size_splits().getType().dyn_cast(); - if (!split_sizes_type) return success(); - - if (split_sizes_type.getRank() != 1 || - split_sizes_type.getDimSize(0) != op.getNumResults()) - return op.emitOpError("split sizes should be a 1D tensor of ") - << op.getNumResults() << " elements"; - - Optional dim_index = 0; - if (failed(VerifySplitInputAndSplitDim(op, &dim_index))) return failure(); - if (!dim_index) return success(); - - int64_t input_dim_size = - op.value().getType().cast().getDimSize(*dim_index); - if (input_dim_size == ShapedType::kDynamicSize) return success(); - - // If split sizes come from a constant, they must sum to the dimension size - // along split_dim, and we can have no more than one dynamic dimension. - DenseIntElementsAttr split_sizes_attr; - if (!matchPattern(op.size_splits(), m_Constant(&split_sizes_attr))) - return success(); - - int64_t total_dim_size = 0; // Total dimension size assigned to splits - llvm::Optional dynamic_dim_index; - - SmallVector split_sizes; - split_sizes.reserve( - split_sizes_attr.getType().cast().getNumElements()); - - for (auto dim : llvm::enumerate(split_sizes_attr)) { - int64_t dim_val = dim.value().getSExtValue(); - split_sizes.push_back(dim_val); - if (dim_val == ShapedType::kDynamicSize) { - // We cannot have more than one dynamic dimension. - if (dynamic_dim_index) - return op.emitOpError( - "cannot have more than one dynamic dimension in split sizes"); - dynamic_dim_index = dim.index(); - } else { - total_dim_size += dim_val; - } - } - - if (!dynamic_dim_index && total_dim_size != input_dim_size) - return op.emitOpError( - "split sizes must sum up to the dimension size along split " - "dimension, found ") - << total_dim_size << " vs " << input_dim_size; - - if (dynamic_dim_index && total_dim_size > input_dim_size) - return op.emitOpError( - "split sizes must sum up to be less than or equal to the " - "dimension size along split dimension, found ") - << total_dim_size << " vs " << input_dim_size; - - return success(); -} - -//===----------------------------------------------------------------------===// -// SquareOp -//===----------------------------------------------------------------------===// - -void SquareOp::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { - results.insert(context); -} - -//===----------------------------------------------------------------------===// -// SubOp -//===----------------------------------------------------------------------===// - -void SubOp::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { - results.insert(context); -} - -OpFoldResult SubOp::fold(ArrayRef operands) { - return IdentityArithmeticOpFolder(*this, operands); -} - -//===----------------------------------------------------------------------===// -// SumOp -//===----------------------------------------------------------------------===// - -void SumOp::build(OpBuilder &builder, OperationState &result, Value input, - Value reduction_indices, BoolAttr keep_dims) { - Type out_ty = - InferReductionOpType(input, reduction_indices, keep_dims, &builder); - build(builder, result, out_ty, input, reduction_indices, keep_dims); -} - -//===----------------------------------------------------------------------===// -// StridedSliceOp -//===----------------------------------------------------------------------===// - -// TODO(b/154160827): Add a canonicalization pattern from tf.StridedSliceOp to -// tf.SliceOp if both of the following are true: -// - All strides have a known value equal to 1 -// - No masks are set (or masks can be applied by transforming the inputs to -// Slice) - -// Verifies that, -// -// - begin, end and strides operands are 1D and they have the same number of -// elements. Here, the number of elements should be less than 32 to support -// 32-bit mask attributes. -// - None of the strides values are zero. -// - Ellipsis mask can have at most one bit set. - -template -static LogicalResult VerifyStridedSliceBase(OpTy op) { - // Expected size for operands begin, end and strides vector operands. - int64_t expected_size = -1; - - for (Value val : {op.begin(), op.end(), op.strides()}) { - auto operand_ty = val.getType().dyn_cast(); - if (!operand_ty || !operand_ty.hasStaticShape()) { - // TensorFlow constant ops may have non-static shape because the shape is - // not propagated during constant folding. If the defining op for this - // operand is a constant op, use the constant op's attribute to get the - // actual shape. - DenseIntElementsAttr attr; - if (!matchPattern(val, m_Constant(&attr))) continue; - operand_ty = attr.getType(); - } - - if (operand_ty.getRank() != 1) - return op.emitOpError() - << "requires begin, end and strides to be 1D tensors"; - - int64_t length = operand_ty.getDimSize(0); - if (length == -1) continue; - - if (expected_size == -1) { - // This op uses 32-bit masks. - if (length >= 32) - return op.emitOpError( - "requires begin, end and strides operands with less than 32 " - "elements"); - - expected_size = length; - } else if (length != expected_size) { - return op.emitOpError() << "requires begin, end and strides to have the " - "same number of elements"; - } - } - - // If strides are constants, verify that none of the element is zero. - DenseIntElementsAttr strides; - if (matchPattern(op.strides(), m_Constant(&strides))) { - if (llvm::is_contained(strides.getValues(), 0)) - return op.emitOpError("requires non-zero strides"); - } - - // Use bit compares to ensure ellipsis_mask is 0 or a power of 2, i.e. there - // exists only no more than one ellipsis. - uint32_t ellipsis_mask = op.ellipsis_mask().getZExtValue(); - if (ellipsis_mask != 0 && !llvm::isPowerOf2_32(ellipsis_mask)) - return op.emitOpError("cannot have multiple ellipses"); - - return success(); -} - -// Clamps the given `val`: returns `low` if `val` is less than `low`; returns -// `high` if `high` is less than `val`; otherwise returns `val`. -template -constexpr const T &Clamp(const T &val, const T &low, const T &high) { - assert(!(high < low)); - return (val < low) ? low : (high < val) ? high : val; -} - -// Checks if the `index` bit of `val` is set. -template -constexpr bool IsSet(const T &val, unsigned index) { - return (val & (1 << index)) != 0; -} - -// Sets the `index` bit of `val`. -template -constexpr void Set(T &val, unsigned index) { - val |= (1 << index); -} - -// Unset the `index` bit of `val`. -template -constexpr void Unset(T &val, unsigned index) { - val &= ~(1 << index); -} - -// Copy the `src_index` bit of `src` to `dst_index` bit of `dst`. -template -constexpr void CopyBit(const T &src, unsigned src_index, T &dst, - unsigned dst_index) { - if (IsSet(src, src_index)) - Set(dst, dst_index); - else - Unset(dst, dst_index); -} - -// The sparse spec of strided slice does not correspond to the number of -// dimensions. For example, sparse spec for foo[..., 3:10] for foo of shape (2, -// 4, 8) would have dims = 2. -struct SparseSliceSpec { - int64_t dims; - int32_t begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask; - const ArrayRef &begin; - const ArrayRef &end; - const ArrayRef &strides; -}; - -// The dense spec of strided slice is the canonicalized version of sparse spec. -// The number of dimensions of dense spec correspond to the number of dimensions -// in operand tensor. -struct DenseSliceSpec { - int64_t dims; - int32_t begin_mask, end_mask, shrink_axis_mask; - SmallVectorImpl &begin; - SmallVectorImpl &end; - SmallVectorImpl &strides; -}; - -// Make a sparse spec into a dense index spec. -// The sparse spec does not correspond to the number of dimensions -// Make a dense spec that corresponds to the number of dimensions -// -// For example suppose foo[...,3:, 2] on foo.shape=(2,2,3,4) then -// we need to produce the missing begin_mask, end_mask for the first two -// dimensions i.e. foo[:, :, 3:, 2]. -static void BuildDenseSliceSpec(const SparseSliceSpec &sparse, - DenseSliceSpec *dense) { - // Build expanded dense begin, end, strides, begin_mask, end_mask, and - // shrink_axis_mask. - dense->begin.resize(dense->dims); - dense->end.resize(dense->dims); - dense->strides.resize(dense->dims); - dense->begin_mask = 0; - dense->end_mask = 0; - dense->shrink_axis_mask = 0; - - // Count number of new_axis after ellipsis. This helps in calculating the - // number of dimensions ellipsis represents in the sparse spec. - bool ellipsis_seen = false; - int num_new_axis_after_ellipsis = 0; - for (int sparse_index = 0; sparse_index < sparse.dims; ++sparse_index) { - if (ellipsis_seen && IsSet(sparse.new_axis_mask, sparse_index)) - num_new_axis_after_ellipsis++; - if (IsSet(sparse.ellipsis_mask, sparse_index)) ellipsis_seen = true; - } - - int dense_index = 0; - for (int sparse_index = 0; sparse_index < sparse.dims; ++sparse_index) { - if (IsSet(sparse.new_axis_mask, sparse_index)) continue; - if (IsSet(sparse.ellipsis_mask, sparse_index)) { - auto next_index = std::min(dense->dims - (sparse.dims - sparse_index) + - 1 + num_new_axis_after_ellipsis, - dense->dims); - // Expand ellipsis into the appropriate dense indices. From current index - // until next_index, all dimensions would have begin and end masks set and - // stride 1, i.e., get all elements in those dimensions. - for (; dense_index < next_index; ++dense_index) { - dense->begin[dense_index] = dense->end[dense_index] = 0; - dense->strides[dense_index] = 1; - Set(dense->begin_mask, dense_index); - Set(dense->end_mask, dense_index); - } - continue; - } - assert(dense_index < dense->dims); - // Copy over the sparse indices to dense indices if ellipsis_mask and - // new_axis_mask are not set. - dense->begin[dense_index] = sparse.begin[sparse_index]; - dense->end[dense_index] = sparse.end[sparse_index]; - dense->strides[dense_index] = sparse.strides[sparse_index]; - CopyBit(sparse.begin_mask, sparse_index, dense->begin_mask, dense_index); - CopyBit(sparse.end_mask, sparse_index, dense->end_mask, dense_index); - CopyBit(sparse.shrink_axis_mask, sparse_index, dense->shrink_axis_mask, - dense_index); - dense_index++; - } -} - -// For the given `input_shape`, calculates the sliced shape using the given -// `begin`, `end`, and `stride` ranges and `begin_mask`, `end_mask`, and -// `shrink_axis_mask` masks. Updates the result back to `input_shape`. If -// `shrink_axis_mask` is not zero, this function will not drop the corresponding -// dimensions in `input_shape`; it will turn them into 1s. At the same time, -// canonicalizes `begin`, `end`, and `strides. The calculation follows -// tf.StridedSlice op semantics. -static void CalculateSlicedShapeFromDenseIndices( - MutableArrayRef input_shape, int32_t begin_mask, int32_t end_mask, - int32_t shrink_axis_mask, MutableArrayRef begin, - MutableArrayRef end, MutableArrayRef stride) { - assert(input_shape.size() <= 32); // Only 32-bit masks are supported. - - // Make sure ranges' ranks are consistent with the input. - assert(input_shape.size() == begin.size()); - assert(input_shape.size() == end.size()); - assert(input_shape.size() == stride.size()); - - for (int i = 0, e = input_shape.size(); i < e; ++i) { - if (ShapedType::isDynamic(input_shape[i])) continue; - - int64_t dim_i = input_shape[i]; - int64_t begin_i = begin[i]; - int64_t end_i = end[i]; - int64_t stride_i = stride[i]; - - // [0]: mask for begin, [1]: mask for end - int64_t masks[] = {begin_mask & (1 << i), end_mask & (1 << i)}; - // [0]: bound for begin, [1]: bound for end - int64_t bounds[] = {stride_i > 0 ? 0 : -1, - stride_i > 0 ? dim_i : dim_i - 1}; - - // Canonicalizes the given range `point` (begin/end) according to the - // current dimension. `c` means case: 0 for begin, 1 for end. - auto canonicalize = [&](int64_t point, int c) { - if (masks[c]) return stride_i > 0 ? bounds[c] : bounds[(c + 1) & 1]; - - // Add dim as offset to negative range point. - point = point < 0 ? dim_i + point : point; - return Clamp(point, bounds[0], bounds[1]); - }; - - begin_i = canonicalize(begin_i, 0); - end_i = canonicalize(end_i, 1); - - int64_t interval_len = end_i - begin_i; - int64_t size_i = 0; - // If internal length is zero or has different sign from stride, it's a - // degenerated case: we are slicing nothing. Otherwise, calculate the sliced - // size. - if (interval_len != 0 && (interval_len < 0) == (stride_i < 0)) - size_i = (interval_len / stride_i) + (interval_len % stride_i != 0); - - begin[i] = begin_i; - if (IsSet(shrink_axis_mask, i)) { - // Shrink this dimension. It means we only take the element at begin_i. - input_shape[i] = 1; - end[i] = begin_i + 1; - stride[i] = 1; - } else { - input_shape[i] = size_i; - end[i] = end_i; - stride[i] = stride_i; - } - } -} - -// For the given `input_shape`, calculates the sliced shape using the given -// `sparse_begin`, `sparse_end`, and `sparse_strides` ranges and `begin_mask`, -// `end_mask`, `ellipsis_mask` , `new_axis_mask` and `shrink_axis_mask` masks. -// Updates the result back to `input_shape`. -static void CalculateSlicedShapeFromSparseIndices( - MutableArrayRef input_shape, ArrayRef sparse_begin, - ArrayRef sparse_end, ArrayRef sparse_strides, - int32_t begin_mask, int32_t end_mask, int32_t ellipsis_mask, - int32_t new_axis_mask, int32_t shrink_axis_mask, - SmallVectorImpl *begin, SmallVectorImpl *end, - SmallVectorImpl *stride) { - int64_t num_sparse_indices = sparse_begin.size(); - SparseSliceSpec sparse = {num_sparse_indices, begin_mask, end_mask, - ellipsis_mask, new_axis_mask, shrink_axis_mask, - sparse_begin, sparse_end, sparse_strides}; - - // If no ellipsis_mask exists then an implicit ellipsis_mask at the end is - // inserted. This handles cases where foo[2:4] (foo.shape() = [4, 8]) yields - // a tensor of shape [2, 8], i.e., foo[2:4] is same as foo[2:4, ...]. - if (sparse.ellipsis_mask == 0) { - Set(sparse.ellipsis_mask, sparse.dims); - sparse.dims++; - } - - int64_t dims = input_shape.size(); - DenseSliceSpec dense = {dims, - /*begin_mask = */ 0, - /*end_mask = */ 0, - /*shrink_axis_mask = */ 0, - *begin, - *end, - *stride}; - - BuildDenseSliceSpec(sparse, &dense); - CalculateSlicedShapeFromDenseIndices(input_shape, dense.begin_mask, - dense.end_mask, dense.shrink_axis_mask, - *begin, *end, *stride); -} - -bool StridedSliceOp::GetSlicedBoundRanges( - SmallVectorImpl *slice_begin, SmallVectorImpl *slice_end, - SmallVectorImpl *slice_stride) { - // TODO(hinsu): Support lowering for ops with dynamic begin and end values - // when it is possible to derive indices based on mask attributes. - DenseIntElementsAttr sparse_begin_attr, sparse_end_attr, sparse_strides_attr; - if (!matchPattern(begin(), m_Constant(&sparse_begin_attr)) || - !matchPattern(end(), m_Constant(&sparse_end_attr)) || - !matchPattern(strides(), m_Constant(&sparse_strides_attr))) - return false; - - auto input_ty = this->input().getType().dyn_cast(); - if (!input_ty || !input_ty.hasStaticShape()) return false; - auto input_shape = llvm::to_vector<4>(input_ty.getShape()); - - SmallVector sparse_begin, sparse_end, sparse_strides; - - for (const APInt &index : sparse_begin_attr) - sparse_begin.push_back(index.getSExtValue()); - for (const APInt &index : sparse_end_attr) - sparse_end.push_back(index.getSExtValue()); - for (const APInt &stride : sparse_strides_attr) - sparse_strides.push_back(stride.getSExtValue()); - - CalculateSlicedShapeFromSparseIndices( - input_shape, sparse_begin, sparse_end, sparse_strides, - begin_mask().getZExtValue(), end_mask().getZExtValue(), - ellipsis_mask().getZExtValue(), new_axis_mask().getZExtValue(), - shrink_axis_mask().getZExtValue(), slice_begin, slice_end, slice_stride); - return true; -} - -//===----------------------------------------------------------------------===// -// StridedSliceGradOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(StridedSliceGradOp op) { - auto shape_type = op.shape().getType().dyn_cast(); - if (shape_type && shape_type.getRank() != 1) - return op.emitOpError("'shape' operand must be 1D tensor, but got ") - << shape_type.getRank() << "D tensor"; - - if (failed(VerifyStridedSliceBase(op))) return failure(); - - // TODO(antiagainst): verify the gradient op.dy()'s shape is consistent with - // the sliced type from StridedSlice. - - return success(); -} - -bool StridedSliceGradOp::GetSlicedShapeAndBoundRanges( - SmallVectorImpl *input_shape, - SmallVectorImpl *slice_begin, SmallVectorImpl *slice_end, - SmallVectorImpl *slice_stride) { - DenseIntElementsAttr shape_attr; - DenseIntElementsAttr sparse_begin_attr, sparse_end_attr, sparse_strides_attr; - if (!matchPattern(shape(), m_Constant(&shape_attr)) || - !matchPattern(begin(), m_Constant(&sparse_begin_attr)) || - !matchPattern(end(), m_Constant(&sparse_end_attr)) || - !matchPattern(strides(), m_Constant(&sparse_strides_attr))) - return false; - - int rank = std::distance(shape_attr.begin(), shape_attr.end()); - - input_shape->clear(); - input_shape->reserve(rank); - for (const APInt &dim : shape_attr) - input_shape->push_back(dim.getSExtValue()); - - SmallVector sparse_begin, sparse_end, sparse_strides; - - for (const APInt &index : sparse_begin_attr) - sparse_begin.push_back(index.getSExtValue()); - for (const APInt &index : sparse_end_attr) - sparse_end.push_back(index.getSExtValue()); - for (const APInt &stride : sparse_strides_attr) - sparse_strides.push_back(stride.getSExtValue()); - - CalculateSlicedShapeFromSparseIndices( - *input_shape, sparse_begin, sparse_end, sparse_strides, - begin_mask().getZExtValue(), end_mask().getZExtValue(), - ellipsis_mask().getZExtValue(), new_axis_mask().getZExtValue(), - shrink_axis_mask().getZExtValue(), slice_begin, slice_end, slice_stride); - return true; -} - -//===----------------------------------------------------------------------===// -// TensorListReserveOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(TensorListReserveOp op) { - if (!IsOfRankOrUnranked(op.element_shape(), 0) && - !IsOfRankOrUnranked(op.element_shape(), 1)) { - return op.emitOpError("requires element_shape operand to be 0D/1D tensor"); - } - - if (!IsOfRankOrUnranked(op.num_elements(), 0)) { - return op.emitOpError("requires num_elements operand to be 0D tensor"); - } - return success(); -} - -//===----------------------------------------------------------------------===// -// TensorListElementShapeOp -//===----------------------------------------------------------------------===// - -OpFoldResult TensorListElementShapeOp::fold(ArrayRef operands) { - int width = - getType().cast().getElementType().getIntOrFloatBitWidth(); - auto variant_type = - getElementTypeOrSelf(getOperand().getType()).cast(); - if (variant_type.getSubtypes().empty()) return {}; - return ConvertShapeToAttr(variant_type.getSubtypes()[0], width); -} - -//===----------------------------------------------------------------------===// -// TensorListStackOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(TensorListStackOp op) { - if (!IsOfRankOrUnranked(op.element_shape(), 0) && - !IsOfRankOrUnranked(op.element_shape(), 1)) { - return op.emitOpError("requires element_shape operand to be 0D/1D tensor"); - } - return success(); -} - -//===----------------------------------------------------------------------===// -// TensorScatterUpdateOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(TensorScatterUpdateOp op) { - if (!HasRankAtLeast(op.tensor(), 1)) - return op.emitOpError( - "requires tensor operand to have at least 1 dimension"); - if (!HasRankAtLeast(op.indices(), 1)) - return op.emitOpError( - "requires indices operand to have at least 1 dimension"); - if (!HasRankAtLeast(op.updates(), 1)) - return op.emitOpError( - "requires updates operand to have at least 1 dimension"); - - auto tensor_ty = op.tensor().getType().dyn_cast(); - auto indices_ty = op.indices().getType().dyn_cast(); - if (!tensor_ty || !indices_ty) return success(); - - int64_t num_index_dims = indices_ty.getShape().back(); - if (ShapedType::isDynamic(num_index_dims)) return success(); - - if (num_index_dims > tensor_ty.getRank()) - return op.emitOpError( - "requires tensor operand with rank greater than or equal to the " - "indices operand's last dimensions"); - return success(); -} - -//===----------------------------------------------------------------------===// -// TopKV2Op -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(TopKV2Op op) { - if (!HasRankAtLeast(op.input(), 1)) - return op.emitOpError( - "requires input operand to have at least 1 dimension"); - - if (!IsOfRankOrUnranked(op.k(), 0)) - return op.emitOpError("requires k operand to be 0D tensor"); - - return success(); -} - -//===----------------------------------------------------------------------===// -// ToBoolOp -//===----------------------------------------------------------------------===// - -namespace { -// If the input to ToBoolOp is a `tensor`, then the ToBoolOp is an identity -// function and can be removed. -class ToBoolOfZeroDBoolTensor : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(ToBoolOp op, - PatternRewriter &rewriter) const override { - if (auto type = op.getOperand().getType().dyn_cast()) { - if (type.getRank() == 0 && type.getElementType().isInteger(1)) { - rewriter.replaceOp(op, op.getOperand()); - return success(); - } - } - return failure(); - } -}; -} // namespace - -void ToBoolOp::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { - results.insert(context); -} - -//===----------------------------------------------------------------------===// -// TransposeOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(TransposeOp op) { - // TODO(hinsu): Verify using a custom verifier that, - // * Transpose permutation is 1-D of size equal to the rank of the first - // input, if the shapes are partially known. Requires use of a more - // restrictive type than TF_Tensor. - // * Result shape dimensions are possible based on the input shape. - return success(); -} - -// TODO(jpienaar): perm could be optional too. -void TransposeOp::build(OpBuilder &builder, OperationState &result, Value x, - Value perm) { - auto x_type = x.getType().cast(); - // If value is unranked, then so is results. - if (!x_type.hasRank()) - return TransposeOp::build(builder, result, - UnrankedTensorType::get(x_type.getElementType()), - x, perm); - - // TODO(jpienaar): Handle unknown perm case. - - // TODO(jpienaar): Extract utility function. - auto etype = x_type.cast().getElementType(); - DenseIntElementsAttr attr_shape; - if (matchPattern(perm, m_Constant(&attr_shape))) { - llvm::SmallVector const_shape; - if (attr_shape.isSplat()) { - const_shape.assign( - attr_shape.getNumElements(), - x_type.getDimSize((*attr_shape.begin()).getSExtValue())); - } else { - const_shape.reserve(attr_shape.getNumElements()); - for (const auto &dim : attr_shape) - const_shape.push_back(x_type.getDimSize(dim.getSExtValue())); - } - return TransposeOp::build( - builder, result, RankedTensorType::get(const_shape, etype), x, perm); - } - return TransposeOp::build(builder, result, UnrankedTensorType::get(etype), x, - perm); -} - -namespace { - -OpFoldResult FoldIdentityTranspose(TransposeOp op) { - auto const_perm = dyn_cast_or_null(op.perm().getDefiningOp()); - if (!const_perm) return {}; - - auto const_value = const_perm.value(); - const auto elements = const_value.getValues(); - - for (auto it : llvm::enumerate(elements)) { - if (it.index() != it.value()) return {}; - } - - // TODO(jpienaar): Remove if/when we handle this more generally. - if (op.getType() != op.x().getType()) { - // If the types don't match then only fold if all the operands are in the TF - // dialect. - for (auto user : op.getOperation()->getUsers()) - if (user->getDialect() != op.getDialect()) return {}; - } - - return op.x(); -} - -OpFoldResult FoldCancellableTranspose(TransposeOp op) { - // Operand is a TransposeOp. - auto transpose = dyn_cast_or_null(op.x().getDefiningOp()); - if (!transpose) return {}; - - // Permutations defined by constant operations. - auto perm0 = dyn_cast_or_null(op.perm().getDefiningOp()); - auto perm1 = dyn_cast_or_null(transpose.perm().getDefiningOp()); - if (!perm0 || !perm1) return {}; - - // With permutation indices that cancel each other - auto perm0_value = perm0.value().cast(); - auto perm1_value = perm1.value().cast(); - if (!AreCancellablePermutations(perm0_value, perm1_value)) return {}; - - return transpose.x(); -} - -} // namespace - -OpFoldResult TransposeOp::fold(ArrayRef operands) { - if (auto folded = FoldIdentityTranspose(*this)) return folded; - if (auto folded = FoldCancellableTranspose(*this)) return folded; - return {}; -} - -//===----------------------------------------------------------------------===// -// TruncateDivOp -//===----------------------------------------------------------------------===// - -void TruncateDivOp::getCanonicalizationPatterns( - OwningRewritePatternList &results, MLIRContext *context) { - results.insert(context); -} - -//===----------------------------------------------------------------------===// -// UnpackOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(UnpackOp op) { - auto value_type = op.value().getType().dyn_cast(); - if (!value_type) return success(); - - int64_t value_rank = value_type.getRank(); - int64_t axis = op.axis().getSExtValue(); - if (axis < -value_rank || axis >= value_rank) - return op.emitOpError("axis attribute must be in the range of [-") - << value_rank << ", " << value_rank << ')'; - - axis = GetDimForAxis(axis, value_rank); - int64_t dim_size = value_type.getDimSize(axis); - if (ShapedType::isDynamic(dim_size)) return success(); - - if (dim_size != op.getNumResults()) - return op.emitOpError("result count must be equal to ") << dim_size; - - return success(); -} - -//===----------------------------------------------------------------------===// -// Unsorted segment reduction ops -//===----------------------------------------------------------------------===// - -template -static LogicalResult VerifyUnsortedSegmentReduction(Op op) { - if (!HasRankAtMost(op.num_segments(), 0)) - return op.emitOpError("number of segments should be a 0-D tensor"); - - auto data_type = op.data().getType().template dyn_cast(); - auto segment_ids_type = - op.segment_ids().getType().template dyn_cast(); - if (data_type && segment_ids_type) { - if (data_type.getRank() < segment_ids_type.getRank()) - return op.emitOpError( - "requires segment ids rank to be less than or equal to data's rank"); - - int index = 0; - for (auto shape_pair : - llvm::zip_first(segment_ids_type.getShape(), data_type.getShape())) { - int64_t segment_id_dim = std::get<0>(shape_pair); - int64_t data_dim = std::get<1>(shape_pair); - if (!ShapedType::isDynamic(segment_id_dim) && - !ShapedType::isDynamic(data_dim) && segment_id_dim != data_dim) - return op.emitOpError( - "requires segment ids shape to be a prefix of data shape, " - "but dimension #") - << index << " differs: " << segment_id_dim << " vs. " - << data_dim; - ++index; - } - } - - DenseIntElementsAttr num_segments_attr; - if (matchPattern(op.num_segments(), m_Constant(&num_segments_attr))) { - int64_t num_segments = (*num_segments_attr.begin()).getSExtValue(); - if (num_segments < 0) - return op.emitOpError("num of segments cannot be negative"); - } - - return success(); -} - -//===----------------------------------------------------------------------===// -// VariableShapeOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(VariableShapeOp op) { - auto input_type = op.input().getType().cast(); - if (input_type.hasStaticShape() && input_type.getNumElements() != 1) - return op.emitOpError("requires input to have one resource"); - - auto resource_type = input_type.getElementType().cast(); - auto subtypes = resource_type.getSubtypes(); - switch (subtypes.size()) { - case 1: - return VerifyShapeOperandAndResult( - op, resource_type.getSubtypes().front(), op.getType()); - case 0: - return VerifyShapeOperandAndResult(op, Type(), op.getType()); - default: - return op.emitOpError( - "requires resource input type to have at most 1 subtype"); - } -} - -OpFoldResult VariableShapeOp::fold(ArrayRef operands) { - int width = - getType().cast().getElementType().getIntOrFloatBitWidth(); - auto resource_type = - getElementTypeOrSelf(getOperand().getType()).cast(); - if (resource_type.getSubtypes().empty()) return {}; - return ConvertShapeToAttr(resource_type.getSubtypes()[0], width); -} - -//===----------------------------------------------------------------------===// -// WhileOp -//===----------------------------------------------------------------------===// - -static LogicalResult Verify(WhileOp op) { - auto module = op.getParentOfType(); - auto cond_fn = module.lookupSymbol(op.cond()); - auto body_fn = module.lookupSymbol(op.body()); - if (!cond_fn) { - return op.emitOpError("cond refers to an undefined function : ") - << op.cond(); - } - if (!body_fn) { - return op.emitOpError("body refers to an undefined function : ") - << op.body(); - } - - auto cond_fn_type = cond_fn.getType(); - auto body_fn_type = body_fn.getType(); - - // Verify that the cond function has exactly one result. - if (cond_fn_type.getNumResults() != 1) - return op.emitOpError("requires cond function to have exactly one result"); - - SmallVector operands(op.getOperandTypes()); - - // Collect all the type lists for the op so that different pairs of type lists - // can be compared for the compatibility. - constexpr int kNumTypeLists = 5; - const std::array>, kNumTypeLists> - type_lists = {{ - {"operand", operands}, - {"body function result", body_fn_type.getResults()}, - {"result", op.getResultTypes()}, - {"cond function input", cond_fn_type.getInputs()}, - {"body function input", body_fn_type.getInputs()}, - }}; - - // A pair of type lists should be cast compatible with each other if one is - // converted to the another for a function call or assignment or there is a - // common source of inputs for both. Therefore, the While op requires the - // following pairs of type lists to be cast compatible for the tensor_cast - // operation: - // - // * Operands and cond inputs to call the cond function before the - // first iteration. - // * Operands and body inputs to call the body function for the first - // iteration if the cond functions returns True or equivalent result. - // * Operands and results to assign cond function arguments to op results if - // the cond function returns False or equivalent result. - // * All three pairs using cond inputs, body inputs and results as operand is - // a common source for all three. - // * Body result and cond inputs to call the cond function for the subsequent - // iterations. Similarly, Body result should be compatible with body inputs - // and op results. - // - // Note that the operands and body results need not be compatible as they are - // never converted from one to the another nor there is a common source - // tensors. Compatibility requirement is not transitive. - - for (int i = 0; i < kNumTypeLists; ++i) { - // Skip the first pair as the While op operands and body function results - // does not need to be compatible with each other. - for (int j = std::max(2, i + 1); j < kNumTypeLists; ++j) { - auto &a = type_lists[i]; - auto &b = type_lists[j]; - - int a_size = a.second.size(); - if (a_size != b.second.size()) - return op.emitOpError( - llvm::formatv("requires the number of {0}s to be equal to the " - "number of {1}s. Found {2} and {3}, respectively", - a.first, b.first, a_size, b.second.size())); - - for (int idx = 0; idx < a_size; ++idx) { - auto a_type = a.second[idx]; - auto b_type = b.second[idx]; - - if (!AreCastCompatible({a_type, b_type})) - return op.emitError(llvm::formatv( - "{0} type {1} is incompatible with {2} type {3} at index {4}", - a.first, a_type, b.first, b_type, idx)); - } - } - } - return success(); -} - -//===----------------------------------------------------------------------===// -// WhileRegionOp -//===----------------------------------------------------------------------===// -static LogicalResult Verify(WhileRegionOp op) { - // Verify that the condition generates a single tensor result. - YieldOp yield = cast(op.cond().front().getTerminator()); - if (yield.getNumOperands() != 1) - return op.emitOpError() - << "condition should have a single tensor result"; - - auto cond_type = yield.getOperand(0).getType().dyn_cast(); - if (!cond_type || !cond_type.getShape().equals({}) || - !cond_type.getElementType().isInteger(/*width=*/1)) - return op.emitOpError() - << "condition should have a single tensor result"; - - // The body result types should match while op result types. - if (failed(VerifyRegionResults(op, op.body(), "body"))) return failure(); - - // Both condition and body should have same number and type of operands as - // the WhileRegion inputs. - const int num_inputs = op.getNumOperands(); - auto block_inputs_match_op_inputs = [&](Region ®ion, - StringRef name) -> LogicalResult { - Block &block = region.front(); - if (block.getNumArguments() != num_inputs) - return op.emitOpError() - << name << " should have same number of inputs (" << num_inputs - << ") as " << WhileRegionOp::getOperationName() << " but has " - << block.getNumArguments() << " inputs"; - - for (auto types_idx : llvm::enumerate( - llvm::zip(op.getOperandTypes(), block.getArgumentTypes()))) { - auto op_input_type = std::get<0>(types_idx.value()); - auto block_input_type = std::get<1>(types_idx.value()); - if (!AreCastCompatible({block_input_type, op_input_type})) - return op.emitOpError(llvm::formatv( - "{0} input type {1} is incompatible with {2} " - "input type {3} at index {4}", - name, block_input_type, WhileRegionOp::getOperationName(), - op_input_type, types_idx.index())); - } - return success(); - }; - - if (failed(block_inputs_match_op_inputs(op.cond(), "condition")) || - failed(block_inputs_match_op_inputs(op.body(), "body"))) - return failure(); - - return success(); -} - -//===----------------------------------------------------------------------===// -// WhileRegionOp LoopLikeOpInterface -//===----------------------------------------------------------------------===// - -Region &WhileRegionOp::getLoopBody() { return body(); } - -bool WhileRegionOp::isDefinedOutsideOfLoop(Value value) { - // If the Op defining the value exists and the defining op is outside the - // scope of this WhileRegion, then we can infer that its defined outside. - // The defining Op is outside the scope of this WhileRegion if this - // WhileRegionOp is not an ancestor of the defining op in the parent chain. - Operation *def_op = value.getDefiningOp(); - return def_op && !getOperation()->isAncestor(def_op); -} - -LogicalResult WhileRegionOp::moveOutOfLoop( - llvm::ArrayRef ops) { - // Move the hoisted value to just before the while. - Operation *while_op = this->getOperation(); - for (auto op : ops) op->moveBefore(while_op); - return success(); -} - -//===----------------------------------------------------------------------===// -// WhileRegionOp canonicalization -//===----------------------------------------------------------------------===// -namespace { -// Eliminate values that pass through the WhileRegionOp body. -struct WhileRegionEliminatePassThrough - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(WhileRegionOp while_op, - PatternRewriter &rewriter) const override { - // Replace values that simply passthrough the body with extern values. The - // block arguments of body and while match and so the corresponding cond - // argument can be easily found. - int old_num_operands = while_op.getNumOperands(); - int new_num_operands = old_num_operands; - auto &body_block = while_op.body().front(); - auto &cond_block = while_op.cond().front(); - auto &yield = *body_block.getTerminator(); - - // Bit mask indicating which operands will be removed. - SmallVector removed_operand(old_num_operands, false); - - for (int op_idx : llvm::seq(0, old_num_operands)) { - auto body_arg = body_block.getArgument(op_idx); - if (body_arg == yield.getOperand(op_idx)) { - // Replace the use of the passthrough value with the while operand - // in the body and condition regions, as well as the while output (if - // type match) - // TODO(jurahul): Use PatternRewriter API for IR modification. - auto value = while_op.getOperand(op_idx); - if (body_arg.getType() == value.getType()) - body_arg.replaceAllUsesWith(value); - - auto cond_arg = cond_block.getArgument(op_idx); - if (cond_arg.getType() == value.getType()) - cond_arg.replaceAllUsesWith(value); - - auto result = while_op.getResult(op_idx); - if (result.getType() == value.getType()) - result.replaceAllUsesWith(value); - } - - // Now check if the operand is unused in both regions as well as the - // result. If so, mark it for removal. - if (body_block.getArgument(op_idx).use_empty() && - cond_block.getArgument(op_idx).use_empty() && - while_op.getResult(op_idx).use_empty()) { - removed_operand[op_idx] = true; - new_num_operands--; - } - } - - if (new_num_operands == old_num_operands) return failure(); - - // Compress the operands, region arguments, and outputs. - SmallVector new_while_operands; - SmallVector new_result_types; - new_while_operands.reserve(new_num_operands); - new_result_types.reserve(new_num_operands); - - // Build new operands and result type. - int next_idx = 0; - for (int op_idx : llvm::seq(0, old_num_operands)) { - if (removed_operand[op_idx]) continue; - new_while_operands.push_back(while_op.getOperand(op_idx)); - new_result_types.push_back(while_op.getResult(op_idx).getType()); - next_idx++; - } - - // Create the new while operation. - auto new_while_op = - rewriter.create(while_op.getLoc(), new_result_types, - new_while_operands, while_op.getAttrs()); - - // Move region bodies to the new while. - rewriter.inlineRegionBefore(while_op.cond(), new_while_op.cond(), - new_while_op.cond().end()); - rewriter.inlineRegionBefore(while_op.body(), new_while_op.body(), - new_while_op.body().end()); - - auto &new_cond_block = new_while_op.cond().front(); - auto &new_body_block = new_while_op.body().front(); - auto &new_yield = *new_body_block.getTerminator(); - - // Build a vector of new results. Also patch up the region bodies and yield. - SmallVector new_results; - next_idx = 0; - for (int op_idx : llvm::seq(0, old_num_operands)) { - if (removed_operand[op_idx]) { - new_cond_block.eraseArgument(next_idx); - new_body_block.eraseArgument(next_idx); - new_yield.eraseOperand(next_idx); - new_results.push_back(nullptr); - } else { - new_results.push_back(new_while_op.getResult(next_idx++)); - } - } - - rewriter.replaceOp(while_op, new_results); - return success(); - } -}; - -} // anonymous namespace - -void WhileRegionOp::getCanonicalizationPatterns( - OwningRewritePatternList &results, MLIRContext *context) { - results.insert(context); -} - -//===----------------------------------------------------------------------===// -// XdivyOp -//===----------------------------------------------------------------------===// - -void XdivyOp::getCanonicalizationPatterns(OwningRewritePatternList &results, - MLIRContext *context) { - results.insert(context); -} - -//===----------------------------------------------------------------------===// -// TableGen'd op method definitions -//===----------------------------------------------------------------------===// - -#define GET_OP_CLASSES -#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc.inc" - //===----------------------------------------------------------------------===// // TF Dialect Interfaces //===----------------------------------------------------------------------===// @@ -4601,8 +124,6 @@ struct TFInlinerInterface : public DialectInlinerInterface { // TF Dialect //===----------------------------------------------------------------------===// -#include "tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.cc.inc" - std::vector *TensorFlowDialect::additional_operation_hooks_ = new std::vector(); @@ -4611,7 +132,7 @@ TensorFlowDialect::TensorFlowDialect(MLIRContext *context) : Dialect(/*name=*/"tf", context) { addOperations< #define GET_OP_LIST -#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc.inc" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_all_ops.cc.inc" >(); addTypes< #define HANDLE_TF_TYPE(tftype, enumerant, name) tftype##Type, diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h index f37b71575f6..d06dce81e09 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h @@ -35,6 +35,9 @@ limitations under the License. #include "mlir/Interfaces/SideEffectInterfaces.h" // from @llvm-project #include "tensorflow/compiler/mlir/tensorflow/ir/tf_attributes.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h" #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" @@ -112,17 +115,6 @@ class TensorFlowDialect : public Dialect { static std::vector *additional_operation_hooks_; }; -// TODO(b/131258166): TensorFlow's mutex.h defines a `mutex_lock` macro, whose -// purpose is to catch bug on `tensorflow::mutex_lock`. We don't use -// `tensorflow::mutex_lock` here but we have ops (`tf.MutexLock` and -// `tf.ConsumeMutexLock`) with getter methods named as `mutex_lock()`. Need to -// undefine here to avoid expanding the getter symbol as macro when including -// both mutex.h and this header file. -#undef mutex_lock - -#define GET_OP_CLASSES -#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h.inc" - } // namespace TF } // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc new file mode 100644 index 00000000000..af7a16ba127 --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc @@ -0,0 +1,1807 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/FormatVariadic.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/Dialect/Traits.h" // from @llvm-project +#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/Diagnostics.h" // from @llvm-project +#include "mlir/IR/DialectImplementation.h" // from @llvm-project +#include "mlir/IR/Function.h" // from @llvm-project +#include "mlir/IR/Identifier.h" // from @llvm-project +#include "mlir/IR/Location.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/Matchers.h" // from @llvm-project +#include "mlir/IR/OpDefinition.h" // from @llvm-project +#include "mlir/IR/OpImplementation.h" // from @llvm-project +#include "mlir/IR/PatternMatch.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "mlir/IR/TypeUtilities.h" // from @llvm-project +#include "mlir/IR/Types.h" // from @llvm-project +#include "mlir/IR/Value.h" // from @llvm-project +#include "mlir/Parser.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project +#include "mlir/Support/LogicalResult.h" // from @llvm-project +#include "mlir/Transforms/InliningUtils.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_attributes.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_side_effects.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/util/tensor_format.h" + +namespace mlir { +namespace TF { + +namespace { +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc" +#include "tensorflow/compiler/mlir/tensorflow/transforms/generated_canonicalize.inc" +} // namespace + +//===----------------------------------------------------------------------===// +// AddOp +//===----------------------------------------------------------------------===// + +void AddOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// AddNOp +//===----------------------------------------------------------------------===// + +OpFoldResult AddNOp::fold(ArrayRef operands) { + if (operands.size() == 1) return *inputs().begin(); + return {}; +} + +//===----------------------------------------------------------------------===// +// AddV2Op +//===----------------------------------------------------------------------===// + +void AddV2Op::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + +OpFoldResult AddV2Op::fold(ArrayRef operands) { + return IdentityArithmeticOpFolder(*this, operands); +} + +//===----------------------------------------------------------------------===// +// AllOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(AllOp op) { + return VerifyReductionInputAndDims(op.input(), op.reduction_indices(), + op.getLoc()); +} + +//===----------------------------------------------------------------------===// +// AnyOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(AnyOp op) { + return VerifyReductionInputAndDims(op.input(), op.reduction_indices(), + op.getLoc()); +} + +//===----------------------------------------------------------------------===// +// AssertOp +//===----------------------------------------------------------------------===// + +namespace { + +// Removes Assert with constant true predicate. +struct AssertWithTrue : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(AssertOp op, + PatternRewriter &rewriter) const override { + ElementsAttr cst; + if (matchPattern(op.condition(), m_Constant(&cst))) { + if (cst.getValue({}).getValue()) { + rewriter.eraseOp(op); + return success(); + } + } + return failure(); + } +}; +} // namespace + +void AssertOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// BatchMatMulOp +//===----------------------------------------------------------------------===// + +void BatchMatMulOp::getCanonicalizationPatterns( + OwningRewritePatternList &results, MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// BatchMatMulV2Op +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(BatchMatMulV2Op op) { + if (!HasRankAtLeast(op.x(), 2)) { + return op.emitOpError("requires lhs operand to have rank at least two"); + } + if (!HasRankAtLeast(op.y(), 2)) { + return op.emitOpError("requires rhs operand to have rank at least two"); + } + return success(); +} + +void BatchMatMulV2Op::getCanonicalizationPatterns( + OwningRewritePatternList &results, MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// BatchToSpaceOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(BatchToSpaceOp op) { + // Op already has a constraint that block_size >= 2. + int64_t block_size = op.block_size().getSExtValue(); + + llvm::SmallVector input_shape(4, ShapedType::kDynamicSize); + auto input_type = op.input().getType().cast(); + if (input_type.hasRank()) { + if (input_type.getRank() != 4) + return op.emitOpError() + << "requires input to be a 4D tensor, but got " << input_type; + + int64_t input_batch = input_type.getDimSize(0); + if (input_batch != ShapedType::kDynamicSize && + input_batch % (block_size * block_size) != 0) { + return op.emitOpError() + << "requires input batch (dimension 0) to be evenly divisible " + "by (block_size * block_size), but got input batch " + << input_batch << " and block_size " << block_size; + } + + input_shape.assign(input_type.getShape().begin(), + input_type.getShape().end()); + } + + auto crops_type = op.crops().getType().cast(); + if (crops_type.hasRank()) { + if (crops_type.getRank() != 2) + return op.emitOpError() + << "requires crops to be a 2D tensor, but got " << crops_type; + + auto dim_of_size = [&](int64_t dim, int64_t size) { + if (crops_type.isDynamicDim(dim)) return true; + return crops_type.getDimSize(dim) == size; + }; + if (!dim_of_size(0, 2) || !dim_of_size(1, 2)) + return op.emitOpError() + << "requires crops to be a tensor<2x2>, but got " << crops_type; + } + + DenseIntElementsAttr crops_attr; + // Crops are defined as [[crop_top, crop_bottom], [crop_left, crop_right]], + // and flattened as [crop_top, crop_bottom, crop_left, crop_right] + llvm::SmallVector crops_values; + if (matchPattern(op.crops(), m_Constant(&crops_attr))) { + assert(crops_attr.getNumElements() == 4 && + "tf.BatchToSpace crops must have 4 elements"); + + auto crops_range = crops_attr.getIntValues(); + for (const auto &crops_value : crops_range) { + int64_t crops_value_int = crops_value.getSExtValue(); + if (crops_value_int < 0) + return op.emitOpError() + << "requires all crop values to be nonnegative, but got " + << crops_attr; + + crops_values.push_back(crops_value_int); + } + } + + auto output_type = op.output().getType().cast(); + if (output_type.hasRank()) { + if (output_type.getRank() != 4) + return op.emitOpError() + << "requires output to be a 4D tensor, but got " << output_type; + + auto static_dims = [](int64_t dim_a, int64_t dim_b) { + return dim_a != ShapedType::kDynamicSize && + dim_b != ShapedType::kDynamicSize; + }; + + auto output_shape = output_type.getShape(); + + // output batch = input batch / (block_size * block_size). + int64_t input_batch = input_shape[0]; + int64_t output_batch = output_shape[0]; + if (static_dims(input_batch, output_batch) && + (output_batch * block_size * block_size) != input_batch) + return op.emitOpError() + << "requires output batch (dimension 0) to be equal to input " + "batch (dimension 0) / (block_size * block_size), but got " + "output batch " + << output_batch << ", input batch " << input_batch + << ", and block_size " << block_size; + + auto check_spatial_dim = [&](int64_t spatial_dim_index, + llvm::StringRef dim_name, + llvm::StringRef crop_a_name, + llvm::StringRef crop_b_name) -> LogicalResult { + int64_t input_dim = input_shape[spatial_dim_index]; + int64_t output_dim = output_shape[spatial_dim_index]; + if (!static_dims(input_dim, output_dim)) return success(); + + int64_t input_dim_pad = input_dim * block_size; + // If crops are unknown, the maximum output spatial dim size is input + // spatial dim size * block_size, as crops can be minimum 0. + if (crops_values.empty() && output_dim > input_dim * block_size) + return op.emitOpError() + << "requires output " << dim_name << " (dimension " + << spatial_dim_index << ") to be less than or equal to input " + << dim_name << " (dimension " << spatial_dim_index + << ") * block_size, but got output " << dim_name << " " + << output_dim << ", input " << dim_name << " " << input_dim + << ", and block_size " << block_size; + + if (!crops_values.empty()) { + // output spatial dim = input spatial dim * block_size - crops. + int64_t crop_a = crops_values[2 * (spatial_dim_index - 1)]; + int64_t crop_b = crops_values[2 * (spatial_dim_index - 1) + 1]; + if (output_dim != input_dim_pad - crop_a - crop_b) + return op.emitOpError() + << "requires output " << dim_name << " (dimension " + << spatial_dim_index << ") to be equal to input " << dim_name + << " (dimension " << spatial_dim_index << ") * block_size - " + << crop_a_name << " - " << crop_b_name << ", but got output " + << dim_name << " " << output_dim << ", input " << dim_name + << " " << input_dim << ", " << crop_a_name << " " << crop_a + << ", " << crop_b_name << " " << crop_b << ", and block_size " + << block_size; + } + + return success(); + }; + + if (failed(check_spatial_dim(1, "height", "crop_top", "crop_bottom")) || + failed(check_spatial_dim(2, "width", "crop_left", "crop_right"))) + return failure(); + + int64_t input_depth = input_shape[3]; + int64_t output_depth = output_shape[3]; + if (static_dims(input_depth, output_depth) && output_depth != input_depth) + return op.emitOpError() + << "requires output depth (dimension 3) to be equal to input " + "depth (dimension 3), but got output depth " + << output_depth << " and input depth " << input_depth; + } + + return success(); +} + +void BatchToSpaceOp::getCanonicalizationPatterns( + OwningRewritePatternList &results, MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// BiasAddOp +//===----------------------------------------------------------------------===// + +// Verifies that, +// * the value and bias operands have valid ranks or are unranked. +// * Channel dimension of the value operand and length of bias matches if they +// are not unknown. +// +static LogicalResult Verify(BiasAddOp op) { + StringRef format = op.data_format(); + if (format == "NHWC") { + if (!HasRankAtLeast(op.value(), 2)) + return op.emitOpError( + "requires value operand to have rank at least two with `NHWC` data " + "format"); + } else { + // Op definition requires data_format to be either NHWC or NCHW. + DCHECK_EQ(format.str(), "NCHW"); + if (!HasRankAtLeast(op.value(), 3)) + return op.emitOpError( + "requires value operand to have rank at least three with `NCHW` data " + "format"); + } + + if (!IsOfRankOrUnranked(op.bias(), 1)) + return op.emitOpError("requires bias operand to have rank exactly one"); + + RankedTensorType value_ty = op.value().getType().dyn_cast(); + RankedTensorType bias_ty = op.bias().getType().dyn_cast(); + if (!bias_ty || !value_ty) return success(); + + // TODO(hinsu): Leverage tensor_format.h utility in TensorFlow to compute + // dimension indices based on format. + int64_t feature_dim_idx = format == "NHWC" ? value_ty.getRank() - 1 : 1; + int64_t feature_dim = value_ty.getDimSize(feature_dim_idx); + int64_t bias_len = bias_ty.getDimSize(0); + if (feature_dim != -1 && bias_len != -1 && feature_dim != bias_len) { + return op.emitOpError() + << "requires channel dimension and feature dimension to match; " + "found " + << feature_dim << " and " << bias_len << ", respectively"; + } + return success(); +} + +//===----------------------------------------------------------------------===// +// BiasAddGradOp +//===----------------------------------------------------------------------===// + +// Verifies that, +// * the out_backprop operands have valid ranks or are unranked. +// +static LogicalResult Verify(BiasAddGradOp op) { + StringRef format = op.data_format(); + if (format == "NHWC") { + if (!HasRankAtLeast(op.out_backprop(), 2)) + return op.emitOpError( + "requires out_backprop operand to have rank at least two with `NHWC` " + "data format"); + } else { + // Op definition requires data_format to be either NHWC or NCHW. + DCHECK_EQ(format.str(), "NCHW"); + if (!HasRankAtLeast(op.out_backprop(), 3)) + return op.emitOpError( + "requires out_backprop operand to have rank at least three with " + "`NCHW` data format"); + } + + return success(); +} + +//===----------------------------------------------------------------------===// +// BiasAddV1Op +//===----------------------------------------------------------------------===// + +void BiasAddV1Op::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// BitcastOp +//===----------------------------------------------------------------------===// + +void BitcastOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// BroadcastToOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(BroadcastToOp op) { + // TODO(antiagainst): check that + // * The 'shape' input is an 1-D int tensor. + // * Each dimension pair of the source and target shapes are either equal + // or one of them is one. + return success(); +} + +//===----------------------------------------------------------------------===// +// CaseOp +//===----------------------------------------------------------------------===// + +class FoldConstantCaseOp : public OpRewritePattern { + public: + explicit FoldConstantCaseOp(MLIRContext *context) + : OpRewritePattern(context) {} + LogicalResult matchAndRewrite(TF::CaseOp op, + PatternRewriter &rewriter) const override; +}; + +LogicalResult FoldConstantCaseOp::matchAndRewrite( + TF::CaseOp op, PatternRewriter &rewriter) const { + // Extract the constant cond value. + DenseIntElementsAttr branch; + if (!matchPattern(op.branch_index(), m_Constant(&branch))) return failure(); + + // Only attempt to fold scalar valued case statements. + // TODO(jpienaar): This can be removed if CaseOp's verifier covers it. + if (!branch.getType().cast().getShape().empty()) + return failure(); + + int index = *branch.getValues().begin(); + // TODO(jpienaar): This can be removed if CaseOp's verifier covers it. + if (index >= op.branches().size()) return failure(); + + auto func = op.branches()[index].cast(); + auto empty = rewriter.getStringAttr(""); + auto call_op = rewriter.create( + op.getLoc(), op.getResultTypes(), op.getOperands().drop_front(), func, + /*config=*/empty, /*config_proto=*/empty, /*executor_type=*/empty); + PropagateAttributes(op.getOperation(), call_op); + rewriter.replaceOp(op, call_op.getResults()); + return success(); +} + +void CaseOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// CastOp +//===----------------------------------------------------------------------===// + +OpFoldResult CastOp::fold(ArrayRef operands) { + // Cast with the same type is a no-op. + Value operand = getOperand(); + if (getType() == operand.getType()) return operand; + return {}; +} + +//===----------------------------------------------------------------------===// +// ConcatOp and ConcatV2Op +//===----------------------------------------------------------------------===// + +template ::value>::type * = nullptr> +static LogicalResult Verify(OpT op) { + // TODO(hinsu): Convert variadic length attributes to derived attributes. + Operation::operand_range values = op.values(); + + int axis_idx = std::is_same() ? 0 : 1; + Value axis = *op.getODSOperands(axis_idx).begin(); + if (!HasRankAtMost(axis, 1)) { + return op.emitOpError( + "requires axis to be of scalar type (or vector type for older " + "versions)"); + } + + return VerifyTypesCompatibility(values, + /*mask_one_dim=*/true, op.getOperation()); +} + +void ConcatOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// ConcatOffsetOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(ConcatOffsetOp op) { + if (op.N() < 2) + return op.emitOpError() << "requires N to be at least 2, got " << op.N(); + + if (op.shape().size() != op.offset().size()) + return op.emitOpError() + << "requires sizes of shapes and offsets to be the same, got sizes " + << op.shape().size() << " and " << op.offset().size(); + + auto ranked_dim = op.concat_dim().getType().dyn_cast(); + if (ranked_dim && ranked_dim.getRank() != 0) + return op.emitOpError() + << "requires concat_dim to be a scalar, got tensor of rank " + << ranked_dim.getRank(); + + int64_t num_dims = -1; + for (auto shape_offset_idx : + llvm::enumerate(llvm::zip(op.shape(), op.offset()))) { + Value shape = std::get<0>(shape_offset_idx.value()); + Value offset = std::get<1>(shape_offset_idx.value()); + const size_t idx = shape_offset_idx.index(); + + if (failed(verifyCompatibleShape(shape.getType(), offset.getType()))) + return op.emitOpError() << "requires operand and result " << idx + << " to have compatible shapes"; + + auto ranked_shape = shape.getType().dyn_cast(); + if (!ranked_shape) continue; + + if (ranked_shape.getRank() != 1) + return op.emitOpError() << "requires shape tensor operand " << idx + << " to be of rank 1, got tensor of rank " + << ranked_shape.getRank(); + + if (!ranked_shape.hasStaticShape()) continue; + + int64_t ranked_shape_dim = ranked_shape.getDimSize(0); + if (num_dims == -1) + num_dims = ranked_shape_dim; + else if (ranked_shape_dim != num_dims) + return op.emitOpError() + << "requires shape tensor (rank 1) operand " << idx + << " to be of length " << num_dims + << ", got tensor (rank 1) of length " << ranked_shape_dim; + } + + return success(); +} + +LogicalResult ConcatOffsetOp::fold(ArrayRef operands, + SmallVectorImpl &results) { + // ConcatOffset must have its first operand be concat_dim and at least two + // shape tensors in variadic shapes operand. + if (operands.size() < 3) return failure(); + + // Check concat_dim is a scalar. + auto concat_dim_attr = operands[0].dyn_cast_or_null(); + if (!concat_dim_attr || concat_dim_attr.getType().getRank() != 0) + return failure(); + + llvm::SmallVector shapes; + shapes.reserve(operands.size() - 1); + for (Attribute shape : llvm::drop_begin(operands, 1)) + if (auto shape_attr = shape.dyn_cast_or_null()) + shapes.push_back(shape_attr); + else + return failure(); + + // Check all shapes are vectors of the same length. + if (shapes.front().getType().getRank() != 1) return success(); + const int64_t num_dims = shapes.front().getNumElements(); + for (DenseIntElementsAttr shape : llvm::drop_begin(shapes, 1)) + if (shape.getType().getRank() != 1 || shape.getNumElements() != num_dims) + return failure(); + + // Check concat_dim is within [-num_dims, num_dims). + int32_t concat_dim = (*concat_dim_attr.getValues().begin()); + if (concat_dim < 0) concat_dim += num_dims; + if (concat_dim >= num_dims || concat_dim < 0) return failure(); + + // Check all elements besides at concat_dim match across all shape tensors. + SmallVector shape0; + shape0.reserve(num_dims); + for (int32_t dim : shapes.front().getValues()) shape0.push_back(dim); + + for (DenseIntElementsAttr shape : llvm::drop_begin(shapes, 1)) { + for (auto dims_and_idx : llvm::enumerate(llvm::zip(shape0, shape))) { + if (dims_and_idx.index() == concat_dim) continue; + + if (std::get<0>(dims_and_idx.value()) != + std::get<1>(dims_and_idx.value()).getSExtValue()) + return failure(); + } + } + + // Compute an exclusive cumulative sum of elements at concat_dim. + results.reserve(shapes.size()); + SmallVector cumulative_sum(num_dims, 0); + RankedTensorType offset_type = + RankedTensorType::get({num_dims}, IntegerType::get(32, getContext())); + for (DenseIntElementsAttr shape : shapes) { + results.push_back(DenseIntElementsAttr::get(offset_type, cumulative_sum)); + cumulative_sum[concat_dim] += shape.getValue(concat_dim); + } + + return success(); +} + +//===----------------------------------------------------------------------===// +// ConjOp +//===----------------------------------------------------------------------===// + +void ConjOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// ConstOp +//===----------------------------------------------------------------------===// + +OpFoldResult ConstOp::fold(ArrayRef operands) { + assert(operands.empty() && "constant has no operands"); + + // Return the held attribute value. + return value(); +} + +// Builds a constant op with the specified attribute `value`. The result +// op's type is deduced from `value`; if `value` is of scalar type, +// wraps it up with a tensor type of empty shape. +// TODO(jpienaar): This one differs from the autogenerated one as it takes an +// attribute but always creates an ElementsAttr internally. +void ConstOp::build(OpBuilder &builder, OperationState &result, + Attribute value) { + ShapedType type; + if (auto elem_attr = value.dyn_cast()) { + return ConstOp::build(builder, result, elem_attr); + } else if (value.isa()) { + // All TensorFlow types must be tensor types. In the build() method, + // we want to provide more flexibility by allowing attributes of scalar + // types. But we need to wrap it up with ElementsAttr to construct + // valid TensorFlow constants. + type = RankedTensorType::get(/*shape=*/{}, value.getType()); + return ConstOp::build(builder, result, DenseElementsAttr::get(type, value)); + } + // TODO(jpienaar): support other TensorFlow specific types. + llvm_unreachable("unsupported attribute type for building tf.Const"); +} + +void ConstOp::build(OpBuilder &builder, OperationState &result, Type type, + Attribute value) { + // Handle the case where the type and value are already tensors. + if (type.isa() && value.isa()) { + result.addTypes(type); + result.addAttribute("value", value); + return; + } + + // Otherwise, default to the attribute builder. + ConstOp::build(builder, result, value); + assert(type == result.types[0] && "type mismatch in construction"); +} + +LogicalResult ConstOp::inferReturnTypes( + MLIRContext *context, Optional location, ValueRange operands, + DictionaryAttr attributes, RegionRange regions, + SmallVectorImpl &inferredReturnTypes) { + auto value = attributes.get("value"); + if (!value) return emitOptionalError(location, "missing attribute 'value'"); + if (auto elem_attr = value.dyn_cast()) { + inferredReturnTypes.assign({elem_attr.getType()}); + return success(); + } + return emitOptionalError(location, + "attribute 'value' failed to satisfy constraint: " + "constant vector/tensor"); +} + +//===----------------------------------------------------------------------===// +// Conv2DOp and Conv3DOp +//===----------------------------------------------------------------------===// + +template +static LogicalResult VerifyConvOpAttributes(OpT op, int num_dims) { + if (!IsOfRankOrUnranked(op.getResult(), num_dims)) + return op.emitOpError() + << "requires result to be " << num_dims << "D tensor"; + + auto is_not_positive = [](Attribute val) { + return val.cast().getValue().getSExtValue() <= 0; + }; + + int64_t strides_size = op.strides().size(); + if (strides_size != num_dims) + return op.emitOpError() << "requires strides attribute length to be " + << num_dims << "; actual length " << strides_size; + if (llvm::any_of(op.strides().getValue(), is_not_positive)) + return op.emitOpError("requires positive strides"); + + int64_t dilations_size = op.strides().size(); + if (op.dilations().size() != num_dims) + return op.emitOpError() << "requires dilations attribute length to be " + << num_dims << "; actual length " << dilations_size; + if (llvm::any_of(op.dilations().getValue(), is_not_positive)) + return op.emitOpError("requires positive dilations"); + + return success(); +} + +// Verifies that, +// * Ranks of operands and result are valid +// * Number of input channels is divisible by the number of filter input +// channels +// * Length of explicit_paddings attribute is valid and has non negative +// elements +// * strides and dilations attributes have positive elements +template ::value>::type * = nullptr> +static LogicalResult Verify(OpT op) { + int num_spatial_dims = std::is_same() ? 2 : 3; + int num_dims = 2 + num_spatial_dims; + + if (!IsOfRankOrUnranked(op.input(), num_dims) || + !IsOfRankOrUnranked(op.filter(), num_dims)) + return op.emitOpError() + << "requires operands to be " << num_dims << "D tensor"; + + // EXPLICIT padding mode and the associated attribute is limited to Conv2D. + // So, fetch attribute by string instead of the op.explicit_paddings() + // attribute getter. + if (op.padding() == "EXPLICIT") { + auto paddings = op.template getAttrOfType("explicit_paddings"); + if (!paddings) + return op.emitOpError() << "requires attribute 'explicit_paddings' with " + "'EXPLICIT' padding mode"; + + int64_t paddings_size = paddings.size(); + int64_t expected_size = 2 * num_dims; + + if (paddings_size != expected_size) + return op.emitOpError() + << "requires explicit_paddings attribute length to be " + << expected_size << "; actual length " << paddings_size; + + auto is_negative = [](Attribute val) { + return val.cast().getValue().getSExtValue() < 0; + }; + if (llvm::any_of(paddings.getValue(), is_negative)) + return op.emitOpError("requires non negative explicit paddings"); + } + + LogicalResult verify_result = VerifyConvOpAttributes(op, num_dims); + if (failed(verify_result)) { + return verify_result; + } + + int64_t input_channels = -1; + if (auto ty = op.input().getType().template dyn_cast()) { + std::string data_format = op.data_format().str(); + tensorflow::TensorFormat format; + auto is_valid = FormatFromString(data_format, &format); + DCHECK(is_valid) << data_format; + int idx = tensorflow::GetTensorFeatureDimIndex(num_dims, format); + input_channels = ty.getDimSize(idx); + } + + int64_t filter_channels = -1; + if (auto ty = op.filter().getType().template dyn_cast()) { + int idx = tensorflow::GetFilterTensorInputChannelsDimIndex( + num_dims, tensorflow::FORMAT_HWIO); + filter_channels = ty.getDimSize(idx); + } + + if (input_channels != -1 && filter_channels != -1 && + input_channels % filter_channels != 0) + return op.emitOpError() + << "requires the number of input channels to be divisible by the " + "number of filter input channels; found " + << input_channels << " and " << filter_channels << ", respectively"; + + return success(); +} + +LogicalResult Conv2DOp::UpdateDataFormat(StringRef data_format) { + auto perm = GetDataFormatPermutation(this->data_format(), data_format); + if (perm.empty()) return failure(); + + // Update data_format attribute and result types. + if (failed(::mlir::TF::UpdateDataFormat(data_format, this))) return failure(); + + // Update convolution attributes. + setAttr("dilations", ShuffleArrayAttr(dilations(), perm)); + setAttr("strides", ShuffleArrayAttr(strides(), perm)); + setAttr("explicit_paddings", ShuffleArrayAttr(explicit_paddings(), perm, 2)); + + return success(); +} + +StringRef Conv2DOp::GetOptimalLayout(const RuntimeDevices &devices) { + // Keep current data format if no GPUs are available or if explicit placement + // does not allow to use GPU for this operation. + if (!CanUseGpuDevice(devices) || !CanUseGpuDevice(getOperation())) + return data_format(); + + // Input must be a tensor. + auto input_ty = input().getType().dyn_cast(); + if (!input_ty) return data_format(); + + // For f16 data type on devices with Tensor Cores support NHWC data format + // is up to ~2x faster. + const bool is_f16 = input_ty.getElementType().isF16(); + if (is_f16 && CanUseTensorCores(devices)) return "NHWC"; + + // For f32/f16 data type decision depends on the filter size in spatial + // dimensions, for other data types we keep current data format. + if (!input_ty.getElementType().isF32() && !input_ty.getElementType().isF16()) + return data_format(); + + // Keep current data format if filter rank is unknown or not equal to 4. + auto filter_ty = filter().getType().dyn_cast(); + if (!filter_ty || filter_ty.getRank() != 4) return data_format(); + + const int64_t d0 = filter_ty.getDimSize(0); + const int64_t d1 = filter_ty.getDimSize(1); + + auto all_ones = [](ArrayAttr arr) -> bool { + return llvm::all_of(arr, [](Attribute attr) -> bool { + return attr.cast().getInt() == 1; + }); + }; + + // Convolutions with 1x1 filter and with strides and dilations all ones, can + // be computed as a GEMM in NHWC data format, and can be up to ~2x times + // faster than convolution in NCHW. + const bool one_by_one = d0 == 1 && d1 == 1; + const bool trivial_strides = all_ones(strides()); + const bool trivial_dilations = all_ones(dilations()); + + // TODO(ezhulenev): This might lead to excessive transposes in the final IR, + // if the ratio of 1x1 convolutions to regular convolutions is close to 1:1. + // Also FusedBatchNorm in training mode prefers NCHW data format. Check if all + // users can efficiently use NHWC data format? + if (one_by_one && trivial_strides && trivial_dilations) { + return "NHWC"; + } + + // If filter spatial dimensions are unknown or not 1x1 we prefer NCHW, because + // it's the fastest option on NVIDIA GPUs with cuDNN library support. + return "NCHW"; +} + +//===----------------------------------------------------------------------===// +// Conv2dBackpropFilterOp +//===----------------------------------------------------------------------===// + +LogicalResult Conv2DBackpropFilterOp::UpdateDataFormat(StringRef data_format) { + StringRef src_data_format = this->data_format(); + + auto perm = GetDataFormatPermutation(src_data_format, data_format); + if (perm.empty()) return failure(); + + // Update data_format attribute and result types. + if (failed(::mlir::TF::UpdateDataFormat(data_format, this))) return failure(); + + // Update convolution attributes. + setAttr("dilations", ShuffleArrayAttr(dilations(), perm)); + setAttr("strides", ShuffleArrayAttr(strides(), perm)); + setAttr("explicit_paddings", ShuffleArrayAttr(explicit_paddings(), perm, 2)); + + // Permute filter sizes operand. + OpBuilder builder(getOperation()); + auto filter_sizes_permuted = builder.create( + getLoc(), filter_sizes(), StringAttr::get(src_data_format, getContext()), + StringAttr::get(data_format, getContext())); + setOperand(1, filter_sizes_permuted); + + return success(); +} + +StringRef Conv2DBackpropFilterOp::GetOptimalLayout( + const RuntimeDevices &devices) { + // Keep current data format if no GPUs are available or if explicit placement + // does not allow to use GPU for this operation. + if (!CanUseGpuDevice(devices) || !CanUseGpuDevice(getOperation())) + return data_format(); + + // Input must be a tensor. + auto input_ty = input().getType().dyn_cast(); + if (!input_ty) return data_format(); + + // For f16 data type on devices with Tensor Cores support NHWC data format + // is up to ~2x faster. + const bool is_f16 = input_ty.getElementType().isF16(); + if (is_f16 && CanUseTensorCores(devices)) return "NHWC"; + + // Otherwise always use "NCHW". + return "NCHW"; +} + +//===----------------------------------------------------------------------===// +// Conv2DBackpropInputOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(Conv2DBackpropInputOp op) { + int num_spatial_dims = 2; + int num_dims = 2 + num_spatial_dims; + + if (!IsOfRankOrUnranked(op.out_backprop(), num_dims) || + !IsOfRankOrUnranked(op.filter(), num_dims)) + return op.emitOpError() + << "requires operands to be " << num_dims << "D tensor"; + + LogicalResult verify_result = VerifyConvOpAttributes(op, num_dims); + if (failed(verify_result)) { + return verify_result; + } + + return success(); +} + +LogicalResult Conv2DBackpropInputOp::UpdateDataFormat(StringRef data_format) { + StringRef src_data_format = this->data_format(); + + auto perm = GetDataFormatPermutation(src_data_format, data_format); + if (perm.empty()) return failure(); + + // Update data_format attribute and result types. + if (failed(::mlir::TF::UpdateDataFormat(data_format, this))) return failure(); + + // Update convolution attributes. + setAttr("dilations", ShuffleArrayAttr(dilations(), perm)); + setAttr("strides", ShuffleArrayAttr(strides(), perm)); + setAttr("explicit_paddings", ShuffleArrayAttr(explicit_paddings(), perm, 2)); + + // Permute input sizes operand. + OpBuilder builder(getOperation()); + auto input_sizes_permuted = builder.create( + getLoc(), input_sizes(), StringAttr::get(src_data_format, getContext()), + StringAttr::get(data_format, getContext())); + setOperand(0, input_sizes_permuted); + + return success(); +} + +StringRef Conv2DBackpropInputOp::GetOptimalLayout( + const RuntimeDevices &devices) { + // Keep current data format if no GPUs are available or if explicit placement + // does not allow to use GPU for this operation. + if (!CanUseGpuDevice(devices) || !CanUseGpuDevice(getOperation())) + return data_format(); + + // Filter must be a tensor. + auto filter_ty = filter().getType().dyn_cast(); + if (!filter_ty) return data_format(); + + // For f16 data type on devices with Tensor Cores support NHWC data format + // is up to ~2x faster. + const bool is_f16 = filter_ty.getElementType().isF16(); + if (is_f16 && CanUseTensorCores(devices)) return "NHWC"; + + // Otherwise always use "NCHW". + return "NCHW"; +} + +//===----------------------------------------------------------------------===// +// DataFormatVecPermuteOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(DataFormatVecPermuteOp op) { + auto input_ty = op.x().getType().dyn_cast(); + if (!input_ty) return success(); + + int rank = input_ty.getRank(); + if (rank != 1 && rank != 2) + return op.emitOpError("requires input of rank 1 or 2"); + + if (rank == 1) { + int64_t dim0 = input_ty.getDimSize(0); + if (dim0 != ShapedType::kDynamicSize && dim0 != 4 && dim0 != 2) + return op.emitOpError("requires 1D input of size 4 or size 2"); + } + + if (rank == 2) { + int64_t dim0 = input_ty.getDimSize(0); + if (dim0 != ShapedType::kDynamicSize && dim0 != 4) + return op.emitOpError( + "requires first dimensions of 2D input to be of size 4"); + + int64_t dim1 = input_ty.getDimSize(1); + if (dim1 != ShapedType::kDynamicSize && dim1 != 2) + return op.emitOpError( + "requires second dimensions of 2D input to be of size 2"); + } + + return success(); +} + +//===----------------------------------------------------------------------===// +// DivOp +//===----------------------------------------------------------------------===// + +void DivOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + +OpFoldResult DivOp::fold(ArrayRef operands) { + return IdentityArithmeticOpFolder(*this, operands); +} + +//===----------------------------------------------------------------------===// +// DynamicStitchOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(DynamicStitchOp op) { + if (op.N() < 1) return op.emitOpError("requires attribute N with value >= 1"); + + if (RankedTensorType out_ty = op.getType().dyn_cast()) { + if (out_ty.getRank() == 0) { + return op.emitOpError("requires non scalar output"); + } + } + + llvm::SmallDenseSet index_values; + bool all_indices_const = true; + int32_t max_index = -1; + llvm::Optional> inferred_item_shape; + for (auto it : llvm::zip(op.indices(), op.data())) { + Value index = std::get<0>(it); + + DenseIntElementsAttr index_attr; + if (matchPattern(index, m_Constant(&index_attr))) { + for (int32_t index : index_attr.getValues()) { + if (index < 0) + return op.emitOpError() + << "requires non-negative index values; found " << index; + max_index = std::max(index, max_index); + index_values.insert(index); + } + } else { + all_indices_const = false; + } + + Value data = std::get<1>(it); + RankedTensorType index_ty = index.getType().dyn_cast(); + RankedTensorType data_ty = data.getType().dyn_cast(); + if (!index_ty || !data_ty) continue; + + int64_t index_rank = index_ty.getRank(); + ArrayRef data_shape = data_ty.getShape(); + ArrayRef index_shape = index_ty.getShape(); + if (failed(mlir::verifyCompatibleShape(index_shape, + data_shape.take_front(index_rank)))) + return op.emitOpError() << "requires shape of data with type " << data_ty + << " to have prefix matching with shape of the " + "corresponding index type " + << index_ty; + + ArrayRef item_shape = data_shape.drop_front(index_rank); + if (!inferred_item_shape) { + inferred_item_shape = llvm::to_vector<4>(item_shape); + continue; + } + + if (failed(mlir::verifyCompatibleShape(item_shape, *inferred_item_shape))) + return op.emitOpError() << "has inconsistent shaped data and index " + "pairs; inferred item shapes [" + << llvm::makeArrayRef(*inferred_item_shape) + << "] and [" << item_shape << "] don't match"; + for (int i = 0, e = item_shape.size(); i < e; ++i) { + int64_t &inferred_dim = (*inferred_item_shape)[i]; + int64_t dim = item_shape[i]; + if (ShapedType::isDynamic(inferred_dim)) inferred_dim = dim; + } + } + + // If all indices are constants, then verify that they cover all indices in + // the range [0, max_index] and the output type is legal. + if (all_indices_const) { + for (int32_t i = 0; i <= max_index; i++) { + if (!index_values.count(i)) + return op.emitOpError() << "missing index " << i; + } + + if (inferred_item_shape) { + SmallVector expected_shape; + expected_shape.push_back(max_index + 1); + expected_shape.append(inferred_item_shape->begin(), + inferred_item_shape->end()); + + auto out_ty = op.getType().cast(); + auto expected_out_ty = + RankedTensorType::get(expected_shape, out_ty.getElementType()); + + if (!AreCastCompatible({out_ty, expected_out_ty})) { + return op.emitOpError() << "has invalid output type; should be " + "compatible with inferred type " + << expected_out_ty; + } + } + } + + return success(); +} + +//===----------------------------------------------------------------------===// +// EinsumOp +//===----------------------------------------------------------------------===// + +// Verifies that, +// * Arity of the op is at most two. +// +// TODO(hinsu): Verify einsum equation attribute. +static LogicalResult Verify(EinsumOp op) { + if (op.N() > 2) { + return op.emitOpError("supports at most two operands"); + } + return success(); +} + +//===----------------------------------------------------------------------===// +// EmptyOp +//===----------------------------------------------------------------------===// + +OpFoldResult EmptyOp::fold(ArrayRef operands) { + assert(operands.size() == 1 && "empty op has one operand"); + + Attribute attr = operands.front(); + if (!attr) return {}; + + auto int_attr = attr.cast(); + SmallVector out_shape; + for (const auto val : int_attr.getValues()) { + out_shape.push_back(val); + } + + auto type = getResult().getType().cast(); + auto etype = type.getElementType(); + + // We can not fold if the result is not static. + if (!type.hasStaticShape()) return {}; + + if (auto float_type = etype.dyn_cast()) { + auto out_type = RankedTensorType::get(out_shape, float_type); + return DenseElementsAttr::get(out_type, + {APFloat(float_type.getFloatSemantics())}); + } + + if (auto int_type = etype.dyn_cast()) { + auto out_type = RankedTensorType::get(out_shape, etype); + APInt val(int_type.getWidth(), 0, int_type.getSignedness()); + return DenseElementsAttr::get(out_type, val); + } + + return {}; +} + +//===----------------------------------------------------------------------===// +// EmptyTensorListOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(EmptyTensorListOp op) { + if (!IsOfRankOrUnranked(op.element_shape(), 0) && + !IsOfRankOrUnranked(op.element_shape(), 1)) { + return op.emitOpError("requires element_shape operand to be 0D/1D tensor"); + } + + if (!IsOfRankOrUnranked(op.max_num_elements(), 0)) { + return op.emitOpError("requires max_num_elements operand to be 0D tensor"); + } + return success(); +} + +//===----------------------------------------------------------------------===// +// EqualOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(EqualOp op) { + // If we allow inputs to have incompatible type, then nothing to do. + if (!op.incompatible_shape_error()) return success(); + + // Otherwise, check inputs are broadcastable. + return mlir::OpTrait::impl::verifyCompatibleOperandBroadcast( + op.getOperation()); +} + +void EqualOp::build(OpBuilder &builder, OperationState &result, Value x, + Value y, BoolAttr incompatible_shape_error) { + auto result_type = DeduceEqualCmpOpType(&builder, result.location, x, y, + incompatible_shape_error); + return build(builder, result, result_type, x, y, incompatible_shape_error); +} + +//===----------------------------------------------------------------------===// +// ExpandDimsOp +//===----------------------------------------------------------------------===// + +Type InferExpandDimsOpType(Value input, Value dim) { + Type element_ty = input.getType().cast().getElementType(); + auto unranked_ty = UnrankedTensorType::get(element_ty); + + auto input_ty = input.getType().dyn_cast(); + if (!input_ty) return unranked_ty; + + DenseIntElementsAttr dim_attr; + if (!matchPattern(dim, m_Constant(&dim_attr)) || + dim_attr.getNumElements() != 1) + return unranked_ty; + int64_t dim_val = (*dim_attr.begin()).getSExtValue(); + int64_t input_rank = input_ty.getRank(); + + if (dim_val < -input_rank - 1 || dim_val > input_rank + 1) return unranked_ty; + if (dim_val < 0) dim_val += input_rank + 1; + + SmallVector shape = llvm::to_vector<4>(input_ty.getShape()); + shape.insert(shape.begin() + dim_val, 1); + return RankedTensorType::get(shape, element_ty); +} + +void ExpandDimsOp::build(OpBuilder &builder, OperationState &result, + Value input, Value dim) { + return build(builder, result, InferExpandDimsOpType(input, dim), input, dim); +} + +//===----------------------------------------------------------------------===// +// FakeQuantWithMinMaxArgsOp +//===----------------------------------------------------------------------===// +static LogicalResult Verify(FakeQuantWithMinMaxArgsOp op) { + // TODO(fengliuai): moving the following to an utility method. + const llvm::fltSemantics &semantics = op.min().getSemantics(); + float rmin, rmax; + if (&semantics == &APFloat::IEEEsingle()) { + rmin = op.min().convertToFloat(); + rmax = op.max().convertToFloat(); + } else { + rmin = op.min().convertToDouble(); + rmax = op.max().convertToDouble(); + } + // Range boundaries must be valid. + if (rmin >= rmax) { + return op.emitOpError("range is invalid: [" + Twine(std::to_string(rmin)) + + "," + Twine(std::to_string(rmax)) + "]"); + } + int64_t num_bits = op.num_bits().getSExtValue(); + if (num_bits < 2 || num_bits > 16) { + return op.emitOpError( + "requires num_bits to be between 2 and 16, inclusive"); + } + return success(); +} + +//===----------------------------------------------------------------------===// +// FakeQuantWithMinMaxVarsOp +//===----------------------------------------------------------------------===// +static LogicalResult Verify(FakeQuantWithMinMaxVarsOp op) { + auto min = GetRankedTensorTypeForOperand(op.min()); + if (min && !IsOfRankedFloatTensorType(min, 0)) + return op.emitOpError("requires min to be a 0d float tensor"); + + auto max = GetRankedTensorTypeForOperand(op.max()); + if (max && !IsOfRankedFloatTensorType(max, 0)) + return op.emitOpError("requires max to be a 0d float tensor"); + + int64_t num_bits = op.num_bits().getSExtValue(); + if (num_bits < 2 || num_bits > 16) { + return op.emitOpError( + "requires num_bits to be between 2 and 16, inclusive"); + } + return success(); +} + +//===----------------------------------------------------------------------===// +// FakeQuantWithMinMaxVarsPerChannelOp +//===----------------------------------------------------------------------===// +static LogicalResult Verify(FakeQuantWithMinMaxVarsPerChannelOp op) { + auto min = GetRankedTensorTypeForOperand(op.min()); + if (min && !IsOfRankedFloatTensorType(min, 1)) + return op.emitOpError("requires min to be a 1d float tensor"); + + auto max = GetRankedTensorTypeForOperand(op.max()); + if (max && !IsOfRankedFloatTensorType(max, 1)) + return op.emitOpError("requires max to be a 1d float tensor"); + + Value inputs = op.inputs(); + if (!HasRankAtLeast(inputs, 1)) + return op.emitError("requires inputs to be at least 1d float tensor"); + + int64_t num_bits = op.num_bits().getSExtValue(); + if (num_bits < 2 || num_bits > 16) { + return op.emitOpError( + "requires num_bits to be between 2 and 16, inclusive"); + } + + auto inputs_type = inputs.getType().dyn_cast(); + if (!inputs_type) return success(); + int depth = inputs_type.getDimSize(inputs_type.getRank() - 1); + if ((min && min.getDimSize(0) != depth) || + (max && max.getDimSize(0) != depth)) { + return op.emitOpError( + "requires min and max to have same size as last dimension of inputs"); + } + + return success(); +} + +//===----------------------------------------------------------------------===// +// FillOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(FillOp op) { + if (!IsOfRankOrUnranked(op.dims(), 1)) + return op.emitOpError() << "requires dims to be a 1D tensor"; + if (!IsOfRankOrUnranked(op.value(), 0)) + return op.emitOpError() << "requires value to be a scalar"; + + return success(); +} + +static ShapedType InferFillOpType(Value dims, Value value) { + Type etype = value.getType().cast().getElementType(); + + DenseIntElementsAttr dims_attr; + if (!matchPattern(dims, m_Constant(&dims_attr))) { + return UnrankedTensorType::get(etype); + } + + llvm::SmallVector shape; + shape.reserve(dims_attr.getNumElements()); + for (const APInt dim : dims_attr.getValues()) { + shape.push_back(dim.getSExtValue()); + } + return RankedTensorType::get(shape, etype); +} + +void FillOp::build(OpBuilder &builder, OperationState &result, Value dims, + Value value) { + FillOp::build(builder, result, InferFillOpType(dims, value), dims, value); +} + +OpFoldResult FillOp::fold(ArrayRef operands) { + assert(operands.size() == 2 && "fill op has two operand"); + + auto type = getType().cast(); + // DenseElementsAttr that is used in this folder only supports int and float + // types. + // TODO(hinsu): Handle complex types once there is a attribute kind for + // complex. + if (!type.getElementType().isIntOrFloat()) return {}; + + auto value = operands[1].dyn_cast_or_null(); + if (!value) return {}; + + if (type.hasStaticShape()) + return DenseElementsAttr::get(type, value.getValue({})); + + auto dims = operands[0].dyn_cast_or_null(); + if (!dims) return {}; + + llvm::SmallVector shape; + shape.reserve(dims.getNumElements()); + for (const APInt dim : dims.getValues()) { + shape.push_back(dim.getSExtValue()); + } + type = RankedTensorType::get(shape, type.getElementType()); + + return DenseElementsAttr::get(type, value.getValue({})); +} + +//===----------------------------------------------------------------------===// +// FusedBatchNormGradOp +//===----------------------------------------------------------------------===// + +// TODO(b/150954845): Add benchmarks to verify that layout preference didn't +// change in the latest GPU generations. + +LogicalResult FusedBatchNormGradV3Op::UpdateDataFormat(StringRef data_format) { + return ::mlir::TF::UpdateDataFormat(data_format, this); +} + +StringRef FusedBatchNormGradV3Op::GetOptimalLayout( + const RuntimeDevices &devices) { + // Keep current data format if no GPUs are available or if explicit placement + // does not allow to use GPU for this operation. + if (!CanUseGpuDevice(devices) || !CanUseGpuDevice(getOperation())) + return data_format(); + + // For f16 data type on devices with Tensor Cores support NHWC data format + // is up to ~2x faster. + auto x_ty = x().getType().cast(); + const bool is_f16 = x_ty.getElementType().isF16(); + if (is_f16 && CanUseTensorCores(devices)) return "NHWC"; + + // For all other data types prefer NCHW. + return "NCHW"; +} + +//===----------------------------------------------------------------------===// +// FusedBatchNormOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(FusedBatchNormOp op) { + auto x = GetRankedTensorTypeForOperand(op.x()); + if (x && !IsOfRankedFloatTensorType(x, 4)) + return op.emitOpError("requires x to be a 4D float tensor"); + + auto scale = GetRankedTensorTypeForOperand(op.scale()); + if (scale && !IsOfRankedFloatTensorType(scale, 1)) + return op.emitOpError("requires scale to be a 1D float tensor"); + + auto offset = GetRankedTensorTypeForOperand(op.offset()); + if (offset && !IsOfRankedFloatTensorType(offset, 1)) + return op.emitOpError("requires offset to be a 1D float tensor"); + + auto mean = GetRankedTensorTypeForOperand(op.mean()); + if (mean && !IsOfRankedFloatTensorType(mean, 1)) + return op.emitOpError("requires mean to be a 1D float tensor"); + + auto variance = GetRankedTensorTypeForOperand(op.variance()); + if (variance && !IsOfRankedFloatTensorType(variance, 1)) + return op.emitOpError("requires variance to be a 1D float tensor"); + + // TODO(antiagainst): check attributes + + return success(); +} + +//===----------------------------------------------------------------------===// +// FusedBatchNormV2Op / FusedBatchNormV3Op +//===----------------------------------------------------------------------===// + +template +static LogicalResult InferenceFoldOperandsPermutation( + ArrayRef permutation, Op *op) { + // FusedBatchNorm in training mode is a layout sentitive operation, and should + // have already assigned an optimal data format. + if (op->is_training()) return failure(); + return ::mlir::TF::FoldOperandsPermutation(permutation, op); +} + +template +static StringRef GetOptimalLayout(const RuntimeDevices &devices, Op *op) { + // In inference mode FusedBatchNorm is not sensitive to data layout. + if (!op->is_training()) return op->data_format(); + + // Keep current data format if no GPUs are available or if explicit placement + // does not allow to use GPU for this operation. + if (!CanUseGpuDevice(devices) || !CanUseGpuDevice(op->getOperation())) + return op->data_format(); + + // For f16 data type on devices with Tensor Cores support NHWC data format + // is up to ~2x faster. + auto x_ty = op->x().getType().template cast(); + const bool is_f16 = x_ty.getElementType().isF16(); + if (is_f16 && CanUseTensorCores(devices)) return "NHWC"; + + // For all other data types prefer NCHW. + return "NCHW"; +} + +LogicalResult FusedBatchNormV2Op::FoldOperandsPermutation( + ArrayRef permutation) { + return ::mlir::TF::InferenceFoldOperandsPermutation(permutation, this); +} + +LogicalResult FusedBatchNormV2Op::UpdateDataFormat(StringRef data_format) { + return ::mlir::TF::UpdateDataFormat(data_format, this); +} + +StringRef FusedBatchNormV2Op::GetOptimalLayout(const RuntimeDevices &devices) { + return ::mlir::TF::GetOptimalLayout(devices, this); +} + +LogicalResult FusedBatchNormV3Op::FoldOperandsPermutation( + ArrayRef permutation) { + return ::mlir::TF::InferenceFoldOperandsPermutation(permutation, this); +} + +LogicalResult FusedBatchNormV3Op::UpdateDataFormat(StringRef data_format) { + return ::mlir::TF::UpdateDataFormat(data_format, this); +} + +StringRef FusedBatchNormV3Op::GetOptimalLayout(const RuntimeDevices &devices) { + return ::mlir::TF::GetOptimalLayout(devices, this); +} + +//===----------------------------------------------------------------------===// +// GatherV2Op +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(GatherV2Op op) { + int64_t batch_dims = op.batch_dims().getSExtValue(); + if (auto ty = op.indices().getType().dyn_cast()) { + int64_t rank = ty.getRank(); + if (batch_dims > rank || batch_dims < -rank) + return op.emitOpError() + << "batch_dims (" << batch_dims << ") must be in range [" << -rank + << ", " << rank + 1 << ")"; + if (batch_dims < 0) batch_dims += rank; + } + + if (!HasRankAtMost(op.axis(), 1)) + return op.emitOpError("requires axis to have rank at most 1"); + + DenseIntElementsAttr axis_attr; + if (matchPattern(op.axis(), m_Constant(&axis_attr))) { + int64_t axis = (*axis_attr.begin()).getSExtValue(); + if (auto ty = op.params().getType().dyn_cast()) { + int64_t rank = ty.getRank(); + if (axis >= rank || axis < -rank) + return op.emitOpError() << "axis (" << axis << ") must be in range [" + << -rank << ", " << rank << ")"; + if (axis < 0) axis += rank; + } + + if (batch_dims >= 0 && axis >= 0 && axis < batch_dims) { + return op.emitOpError() << "requires axis (" << axis + << ") to be greater than or equal to batch_dims (" + << batch_dims << ")"; + } + } + return success(); +} + +//===----------------------------------------------------------------------===// +// IfOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(IfOp op) { + auto module = op.getParentOfType(); + auto then_fn = module.lookupSymbol(op.then_branch()); + if (!then_fn) + return op.emitOpError("then_branch refers to an undefined function : ") + << op.then_branch(); + auto else_fn = module.lookupSymbol(op.else_branch()); + if (!else_fn) + return op.emitOpError("else_branch refers to an undefined function : ") + << op.else_branch(); + auto then_fn_type = then_fn.getType(); + auto else_fn_type = else_fn.getType(); + + // Non-conditional operands starting with the second operand are passed to + // branches and should be pair-wise compatible with branches' inputs. + unsigned expected_num_inputs = op.getNumOperands() - 1; + if (then_fn_type.getNumInputs() != expected_num_inputs || + else_fn_type.getNumInputs() != expected_num_inputs) + return op.emitError("branches should have " + Twine(expected_num_inputs) + + " inputs"); + + for (unsigned i = 0; i < expected_num_inputs; ++i) { + auto operand_type = op.getOperand(i + 1).getType().cast(); + auto then_input_type = then_fn_type.getInput(i).cast(); + if (!AreCastCompatible({operand_type, then_input_type})) + return op.emitError( + llvm::formatv("then branch input type {0} is incompatible with " + "operand type {1} at index {2}", + then_input_type, operand_type, i)); + + auto else_input_type = else_fn_type.getInput(i).cast(); + if (!AreCastCompatible({operand_type, else_input_type})) + return op.emitError( + llvm::formatv("else branch input type {0} is incompatible with " + "operand type {1} at index {2}", + else_input_type, operand_type, i)); + + // If branches have incompatible input types that means that no tensor can + // serve as input to both the functions. Hence, the op is invalid. + if (!AreCastCompatible({then_input_type, else_input_type})) + return op.emitError(llvm::formatv( + "branches inputs have incompatible types {0} and {1} at index {2}", + then_input_type, else_input_type, i)); + } + + // Branches' results should be pair-wise compatible with the op results. + unsigned expected_num_results = op.getNumResults(); + if (then_fn_type.getNumResults() != expected_num_results || + else_fn_type.getNumResults() != expected_num_results) + return op.emitError("branches should have " + Twine(expected_num_results) + + " results"); + + for (unsigned i = 0; i < expected_num_results; ++i) { + auto result_type = op.getResult(i).getType().cast(); + auto then_result_type = then_fn_type.getResult(i).cast(); + if (!AreCastCompatible({then_result_type, result_type})) + return op.emitError( + llvm::formatv("then branch result type {0} is incompatible with op " + "result type {1} at index {2}", + then_result_type, result_type, i)); + + auto else_result_type = else_fn_type.getResult(i).cast(); + if (!AreCastCompatible({else_result_type, result_type})) + return op.emitError( + llvm::formatv("else branch result type {0} is incompatible with op " + "result type {1} at index {2}", + else_result_type, result_type, i)); + } + return success(); +} + +//===----------------------------------------------------------------------===// +// IfRegionOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(IfRegionOp op) { + if (failed(VerifyRegionResults(op, op.then_branch(), "then"))) + return failure(); + if (failed(VerifyRegionResults(op, op.else_branch(), "else"))) + return failure(); + return success(); +} + +//===----------------------------------------------------------------------===// +// InvertOp +//===----------------------------------------------------------------------===// + +void InvertOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// InvertPermutationOp +//===----------------------------------------------------------------------===// + +// Verifies that the input is 1D. +static LogicalResult Verify(InvertPermutationOp op) { + auto x_type = op.x().getType().cast(); + if (!x_type.hasRank()) return success(); + if (x_type.getShape().size() != 1) + return op.emitOpError() << "requires input x to be 1-dimensional"; + + return success(); +} + +//===----------------------------------------------------------------------===// +// LeakyReluOp +//===----------------------------------------------------------------------===// + +OpFoldResult LeakyReluOp::fold(ArrayRef operands) { + assert(operands.size() == 1 && "leaky relu has one operand"); + + // leaky_relu(x, alpha: 1) -> x + if (alpha().convertToFloat() == 1.0f) return getOperand(); + + auto calculate = [&](FloatAttr arg) { + APFloat val = arg.getValue(); + if (val.isNegative()) val = alpha() * val; + return FloatAttr::get(arg.getType(), val); + }; + + if (auto arg = operands[0].dyn_cast_or_null()) { + return calculate(arg); + } else if (auto arg = operands[0].dyn_cast_or_null()) { + if (auto elementAttr = arg.getSplatValue().dyn_cast()) + return DenseElementsAttr::get(arg.getType(), calculate(elementAttr)); + } + return {}; +} + +//===----------------------------------------------------------------------===// +// LogOp +//===----------------------------------------------------------------------===// + +void LogOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// LogicalNotOp +//===----------------------------------------------------------------------===// + +void LogicalNotOp::getCanonicalizationPatterns( + OwningRewritePatternList &results, MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// MatrixBandPartOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(MatrixBandPartOp op) { + if (!HasRankAtLeast(op.input(), 2)) { + return op.emitOpError() + << "requires `input` to have rank of at least 2, but found " + << op.input().getType(); + } + if (!IsOfRankOrUnranked(op.num_lower(), 0)) { + return op.emitOpError() + << "requires `num_lower` to have 0 dimensions, but found " + << op.num_lower().getType(); + } + if (!IsOfRankOrUnranked(op.num_upper(), 0)) { + return op.emitOpError() + << "requires `num_upper` to have 0 dimensions, but found " + << op.num_upper().getType(); + } + return success(); +} + +//===----------------------------------------------------------------------===// +// MaxOp +//===----------------------------------------------------------------------===// + +void MaxOp::build(OpBuilder &builder, OperationState &result, Value input, + Value reduction_indices, BoolAttr keep_dims) { + Type out_ty = + InferReductionOpType(input, reduction_indices, keep_dims, &builder); + build(builder, result, out_ty, input, reduction_indices, keep_dims); +} + +//===----------------------------------------------------------------------===// +// MaxPoolOp +//===----------------------------------------------------------------------===// + +LogicalResult MaxPoolOp::FoldOperandsPermutation( + ArrayRef permutation) { + return ::mlir::TF::FoldOperandsPermutation( + permutation, this, {{"strides", strides()}, {"ksize", ksize()}}); +} + +//===----------------------------------------------------------------------===// +// MaxPoolGradOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(MaxPoolGradOp op) { + if (!IsOfRankOrUnranked(op.orig_input(), 4)) { + return op.emitOpError() << "requires orig_input to be rank 4"; + } + if (!IsOfRankOrUnranked(op.orig_output(), 4)) { + return op.emitOpError() << "requires orig_output to be rank 4"; + } + if (!IsOfRankOrUnranked(op.grad(), 4)) { + return op.emitOpError() << "requires grad to be rank 4"; + } + return success(); +} + +//===----------------------------------------------------------------------===// +// MeanOp +//===----------------------------------------------------------------------===// + +LogicalResult MeanOp::FoldOperandsPermutation(ArrayRef permutation) { + // Reduction indices must be defined by a constant operation. + auto reduction_op = + dyn_cast_or_null(reduction_indices().getDefiningOp()); + if (!reduction_op) return failure(); + + auto reductions_value = reduction_op.value().dyn_cast(); + if (!reductions_value) return failure(); + + // Prepare new reduction indices according to operand permutation. + SmallVector shuffled_reduction; + llvm::transform(reductions_value.getIntValues(), + std::back_inserter(shuffled_reduction), + [&](APInt idx) { return permutation[idx.getSExtValue()]; }); + + // Add constant operation with a new reduction indices. + OpBuilder builder(getOperation()); + auto type = mlir::RankedTensorType::get(shuffled_reduction.size(), + builder.getIntegerType(32)); + auto values = mlir::DenseIntElementsAttr::get(type, shuffled_reduction); + auto shuffled_reduction_op = builder.create(getLoc(), values); + + // Use new reduction indices. + setOperand(1, shuffled_reduction_op); + + return success(); +} + +//===----------------------------------------------------------------------===// +// MulOp +//===----------------------------------------------------------------------===// + +OpFoldResult MulOp::fold(ArrayRef operands) { + return IdentityArithmeticOpFolder(*this, operands); +} + +//===----------------------------------------------------------------------===// +// TableGen'd op method definitions +//===----------------------------------------------------------------------===// + +#define GET_OP_CLASSES +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc.inc" + +} // namespace TF +} // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h new file mode 100644 index 00000000000..b2b78da8993 --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h @@ -0,0 +1,61 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_OPS_A_M_H_ +#define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_OPS_A_M_H_ + +#include "mlir/Dialect/Traits.h" // from @llvm-project +#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/Dialect.h" // from @llvm-project +#include "mlir/IR/Matchers.h" // from @llvm-project +#include "mlir/IR/Module.h" // from @llvm-project +#include "mlir/IR/OpImplementation.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "mlir/IR/TypeUtilities.h" // from @llvm-project +#include "mlir/Interfaces/CallInterfaces.h" // from @llvm-project +#include "mlir/Interfaces/DerivedAttributeOpInterface.h" // from @llvm-project +#include "mlir/Interfaces/InferTypeOpInterface.h" // from @llvm-project +#include "mlir/Interfaces/LoopLikeInterface.h" // from @llvm-project +#include "mlir/Interfaces/SideEffectInterfaces.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_attributes.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_verifiers.h" + +namespace mlir { +namespace TF { + +class YieldOp; + +// TODO(b/131258166): TensorFlow's mutex.h defines a `mutex_lock` macro, whose +// purpose is to catch bug on `tensorflow::mutex_lock`. We don't use +// `tensorflow::mutex_lock` here but we have ops (`tf.MutexLock` and +// `tf.ConsumeMutexLock`) with getter methods named as `mutex_lock()`. Need to +// undefine here to avoid expanding the getter symbol as macro when including +// both mutex.h and this header file. +#undef mutex_lock + +#define GET_OP_FWD_DEFINES +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_all_ops.h.inc" +#define GET_OP_CLASSES +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h.inc" + +} // namespace TF +} // namespace mlir + +#endif // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_OPS_A_M_H_ diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc new file mode 100644 index 00000000000..cea2aa17d46 --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc @@ -0,0 +1,580 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This is a simple include file used to simplify the splitting of the +// tf_ops.cc file. The helpers in here should be refactored and moved to +// tf_verifiers or tf_ops. +// TODO(jpienaar): Remove this file post refactoring. + +// Propagates underscore and device attributes from src to dst. +// TODO(b/158769932): This should be a general feature instead post some policy +// discussion. +static void PropagateAttributes(Operation *src, Operation *dst) { + auto device = mlir::Identifier::get("device", src->getContext()); + for (auto named_attr : src->getAttrs()) { + if (*named_attr.first.begin() == '_' || named_attr.first == device) + dst->setAttr(named_attr.first, named_attr.second); + } +} + +//===----------------------------------------------------------------------===// +// TF op helper functions +//===----------------------------------------------------------------------===// + +// Returns the RankedTensorType for the given operand. TensorFlow constant ops +// may have non-static shape because the shape is not propagated during constant +// folding. If the defining op for the given operand is a constant op, this +// routine uses the constant op's attribute to get the actual shape. +static RankedTensorType GetRankedTensorTypeForOperand(Value operand) { + DenseElementsAttr attr; + if (matchPattern(operand, m_Constant(&attr))) { + return attr.getType().dyn_cast(); + } + return operand.getType().dyn_cast(); +} + +// Returns true if the given `value` is of ranked float tensor type with the +// given `rank`. +static inline bool IsOfRankedFloatTensorType(RankedTensorType type, int rank) { + return type && type.getRank() == rank && + type.getElementType().isa(); +} + +// Returns true if the given `value` has the specified rank or has unranked +// type. +static inline bool IsOfRankOrUnranked(Value value, int64_t rank) { + RankedTensorType type = GetRankedTensorTypeForOperand(value); + return !type || type.getRank() == rank; +} + +// Returns true if the given `value` has at least the specified rank or has +// unranked type. +static inline bool HasRankAtLeast(Value value, int64_t rank) { + RankedTensorType type = GetRankedTensorTypeForOperand(value); + return !type || type.getRank() >= rank; +} + +// Returns true if the given `value` has at most the specified rank or has +// unranked type. +static inline bool HasRankAtMost(Value value, int64_t rank) { + RankedTensorType type = GetRankedTensorTypeForOperand(value); + return !type || type.getRank() <= rank; +} + +static bool IsUnknownDimOrRank(int64_t dim_or_rank) { + return dim_or_rank == -1; +} + +// Returns the tf.Equal/tf.NotEqual result type given `x` and `y` and inputs. If +// `incompatible_shape_error` is true, reports error if `x` and `y` has +// incompatible shapes. Otherwise, returns a tensor type with unknown rank. +static Type DeduceEqualCmpOpType(Builder *builder, Location loc, Value x, + Value y, BoolAttr incompatible_shape_error) { + auto result_type = + OpTrait::util::getBroadcastedType(x.getType(), y.getType()); + if (!result_type) { + if (incompatible_shape_error.getValue()) { + mlir::emitError(loc, "non-broadcastable operands"); + } else { + return UnrankedTensorType::get(builder->getI1Type()); + } + } + + auto ranked_type = result_type.dyn_cast(); + if (!ranked_type) return UnrankedTensorType::get(builder->getI1Type()); + + return RankedTensorType::get(ranked_type.getShape(), builder->getI1Type()); +} + +// Returns dimension index for the given TensorFlow axis that supports negative +// indexing. +static int64_t GetDimForAxis(int64_t axis, int64_t rank) { + return axis >= 0 ? axis : axis + rank; +} + +// Infers output type for reduction ops such as SumOp, MaxOp etc. +// TODO(b/e667204a): Move this logic to shape inference once it supports custom +// inference functions. +static Type InferReductionOpType(Value input, Value reduction_indices, + BoolAttr keep_dims, Builder *builder) { + Type input_ty = input.getType(); + Type element_ty = getElementTypeOrSelf(input_ty); + + // Output type is unranked if input type is not ranked. + auto ranked_ty = input_ty.dyn_cast(); + if (!ranked_ty) return UnrankedTensorType::get(element_ty); + int64_t rank = ranked_ty.getRank(); + + DenseIntElementsAttr indices; + if (!matchPattern(reduction_indices, m_Constant(&indices))) { + // Output type is unranked if reduction indices are not constant and reduced + // dimensions are not kept. + if (!keep_dims.getValue()) return UnrankedTensorType::get(element_ty); + + // Otherwise, output type has same rank as the input. + return RankedTensorType::get(SmallVector(rank, -1), element_ty); + } + + int64_t num_reduce_dim = 0; + llvm::SmallVector is_reduce_dim(rank, false); + for (const APInt &index : indices.getValues()) { + int64_t dim = GetDimForAxis(index.getSExtValue(), rank); + // Invalid input. + if (dim < 0 || dim >= rank) return UnrankedTensorType::get(element_ty); + + if (!is_reduce_dim[dim]) { + is_reduce_dim[dim] = true; + num_reduce_dim++; + } + } + + ArrayRef shape = ranked_ty.getShape(); + SmallVector out_shape; + out_shape.reserve(rank - (keep_dims.getValue() ? 0 : num_reduce_dim)); + for (int64_t i = 0; i < rank; ++i) { + if (!is_reduce_dim[i]) + out_shape.push_back(shape[i]); + else if (keep_dims.getValue()) + out_shape.push_back(1); + } + return RankedTensorType::get(out_shape, element_ty); +} + +// Verifies that the given types are cast compatible. If not, emits appropriate +// error for the given op. If mask_one_dim is set to true, then the types are +// allowed to have one mismatching dimension. Masking one of the dimensions is +// useful for ops like Concat that requires all ranked inputs to have the same +// rank and match dimension sizes for all but one of the dimensions. +static LogicalResult VerifyTypesCompatibility( + Operation::operand_type_range types, bool mask_one_dim, Operation *op) { + constexpr int64_t kUninitialized = -1; + int64_t common_rank = kUninitialized; + llvm::SmallVector common_dims; + int64_t dim_to_mask = kUninitialized; + + // Initialize common_rank with rank of the first ranked type and verify that + // following ranked types have the same rank. + // Similarly, initialize each of the dimensions with the first type that has + // the dimension size available and verify that all following types have the + // same size for the dimension. However, if mask_one_dim is true, note down + // the dimension index on the first mismatch and ignore dimension at that + // index in following types. + for (Type ty : types) { + RankedTensorType ranked_ty = ty.dyn_cast(); + if (!ranked_ty) continue; + + int64_t rank = ranked_ty.getRank(); + if (common_rank == kUninitialized) { + common_rank = rank; + common_dims.resize(common_rank, kUninitialized); + } else if (common_rank != rank) { + return op->emitError() + << "operand type " << ranked_ty + << " is not compatible with preceding operands; expected rank: " + << common_rank; + } + + for (int64_t i = 0, e = common_rank; i != e; i++) { + if (i == dim_to_mask) continue; + + int64_t dim = ranked_ty.getDimSize(i); + if (dim == kUninitialized) continue; + + int64_t &common_dim = common_dims[i]; + if (common_dim == kUninitialized) { + common_dim = dim; + } else if (common_dim != dim) { + // If mask_one_dim is true, do not emit an error if this is the only + // dimension with mismatches. Note down the dimension to mask it from + // the following types. + if (mask_one_dim && dim_to_mask == kUninitialized) { + dim_to_mask = i; + continue; + } + + return op->emitError() << "operand type " << ranked_ty + << " is not compatible with preceding operands; " + "expected dimension at index " + << i << ": " << common_dim; + } + } + } + return success(); +} + +// This is a helper for the Select to SelectV2 canonicalization. The `data` rank +// refers to the rank of `t`/`e` (these two inputs have equal rank; this is +// checked in the verifier). +// +// In most cases, the predicate for Select can be used directly as the predicate +// for SelectV2. However, there is one case that varies, which is when the +// predicate is a tensor and the data is multidimensional. In this case, Select +// op semantics dictate that the predicate tensor length must match the size of +// the first data dimension. This varies from normal broadcasting semantics +// (which are used in SelectV2), so we must reshape the tensor in this case to +// be compatible. +static Value ReshapeSelectPredIfNecessary(OpBuilder *builder, Location loc, + Value cond, int data_rank) { + auto cond_tensor = cond.getType().cast(); + // Reshape is only needed in the case that the cond rank is 1 (i.e. it is + // a vector) AND t/e rank is > 1. + if (cond_tensor.getRank() != 1 || data_rank <= 1) { + // No reshape necessary. Leave cond as it is. + return cond; + } + + // This is the case where a reshape is needed. We want to construct the + // shape [x,1,...1], where x is the value in the pred tensor and the + // length of the shape is equal to data_rank. + SmallVector shape(data_rank, 1); + shape[0] = cond_tensor.getShape().front(); + auto new_shape_type = + RankedTensorType::get({data_rank}, builder->getIntegerType(64)); + auto shape_attr = DenseIntElementsAttr::get(new_shape_type, shape); + auto new_shape = builder->create(loc, shape_attr); + return builder->create(loc, cond, new_shape); +} + +//===----------------------------------------------------------------------===// +// Helper functions detect device capabilities from RuntimeDevices. +//===----------------------------------------------------------------------===// + +namespace { +using DeviceNameUtils = ::tensorflow::DeviceNameUtils; +using ParsedName = ::tensorflow::DeviceNameUtils::ParsedName; + +bool IsGpuDevice(const DeviceNameUtils::ParsedName &device) { + return device.type == ::tensorflow::DEVICE_GPU; +} + +} // namespace + +// Returns true if at least one GPU device is available at runtime. +bool CanUseGpuDevice(const RuntimeDevices &devices) { + return llvm::any_of(devices.device_names(), IsGpuDevice); +} + +// Returns true if all of the GPUs available at runtime support TensorCores +// (NVIDIA compute capability >= 7.0). +bool CanUseTensorCores(const RuntimeDevices &devices) { + auto has_tensor_cores = [&](const DeviceNameUtils::ParsedName &device) { + auto md = devices.GetGpuDeviceMetadata(device); + return md ? md->cc_major().getInt() >= 7 : false; + }; + return llvm::all_of( + llvm::make_filter_range(devices.device_names(), IsGpuDevice), + has_tensor_cores); +} + +// Returns true if operation does not have explicit device placement that would +// prevent it from running on GPU device. +bool CanUseGpuDevice(Operation *op) { + auto device_attr = op->getAttrOfType("device"); + if (!device_attr || device_attr.getValue().empty()) return true; + + DeviceNameUtils::ParsedName device; + if (!DeviceNameUtils::ParseFullName(device_attr.getValue().str(), &device)) + return false; + + // We can't use GPU if operation explicitly placed on non-GPU device. + return !device.has_type || device.type == ::tensorflow::DEVICE_GPU; +} + +//===----------------------------------------------------------------------===// +// TF op helper functions to work with layout transformation. +//===----------------------------------------------------------------------===// + +SmallVector ReversePermutation(ArrayRef permutation) { + SmallVector reverse(permutation.size()); + for (size_t i = 0; i < permutation.size(); ++i) { + reverse[permutation[i]] = i; + } + return reverse; +} + +SmallVector GetDataFormatPermutation(StringRef from, StringRef to) { + if (from == "NHWC" && to == "NCHW") { + return {0, 3, 1, 2}; + } else if (from == "NCHW" && to == "NHWC") { + return {0, 2, 3, 1}; + } else { + return {}; + } +} + +// Shuffle elements in the `attr` according to the permutation. Optional +// `inner_size` allows to shuffle array attributes created from rank 2 tensors +// on outer dimension only. +ArrayAttr ShuffleArrayAttr(ArrayAttr attr, ArrayRef permutation, + int inner_size = 1) { + if (attr.size() == 0) return attr; + + assert(attr.size() % inner_size == 0); + assert(attr.size() / inner_size == permutation.size()); + + SmallVector values{attr.begin(), attr.end()}; + SmallVector shuffled(values.size()); + + for (size_t i = 0; i < permutation.size(); ++i) { + for (size_t j = 0; j < inner_size; ++j) { + shuffled[i * inner_size + j] = values[permutation[i] * inner_size + j]; + } + } + + return ArrayAttr::get(shuffled, attr.getContext()); +} + +// Shuffle ranked tensor dimensions according to the permutation. +Type ShuffleRankedTensorType(Type type, ArrayRef permutation) { + if (auto ranked_type = type.dyn_cast()) { + ArrayRef shape = ranked_type.getShape(); + assert(permutation.size() == shape.size()); + + SmallVector new_shape(permutation.size()); + for (size_t i = 0; i < permutation.size(); ++i) + new_shape[i] = shape[permutation[i]]; + + return RankedTensorType::get(new_shape, ranked_type.getElementType()); + } + + return type; +} + +static bool AreCancellablePermutations(DenseIntElementsAttr perm0, + DenseIntElementsAttr perm1) { + if (perm0.getNumElements() == 0 || perm1.getNumElements() == 0) return false; + if (perm0.getNumElements() != perm1.getNumElements()) return false; + + SmallVector perm0_values; + for (const auto &value : perm0.getIntValues()) + perm0_values.push_back(value.getSExtValue()); + + SmallVector perm1_values; + for (const auto &value : perm1.getIntValues()) + perm1_values.push_back(value.getSExtValue()); + + for (int i = 0; i < perm0_values.size(); ++i) { + if (perm0_values[perm1_values[i]] != i) return false; + } + + return true; +} + +// Default implementation of `LayoutSensitiveInterface::UpdateDataFormat` for +// layout sensitive operations that do not have any additional layout dependent +// attributes besides `data_format` string. +template +LogicalResult UpdateDataFormat(StringRef data_format, Op *op) { + auto perm = GetDataFormatPermutation(op->data_format(), data_format); + if (perm.empty()) return failure(); + + // Update data format attribute. + op->setAttr("data_format", StringAttr::get(data_format, op->getContext())); + + // Update types for all layout sensitive results. + auto layout_sensitive = cast(op->getOperation()); + for (unsigned idx : layout_sensitive.GetLayoutDependentResults()) { + OpResult result = op->getOperation()->getResult(idx); + result.setType(ShuffleRankedTensorType(result.getType(), perm)); + } + + return success(); +} + +// Default implementation for folding operand transpose into the operation. +// See `FoldOperandsTransposeInterface::FoldOperandsPermutation`. +template +LogicalResult FoldOperandsPermutation( + ArrayRef permutation, Op *op, + ArrayRef> shuffle_attrs = {}) { + MLIRContext *context = op->template getParentOfType().getContext(); + + // We only support NHWC <-> NCHW permutations. + static constexpr std::array kNchwToNhwc = {0, 2, 3, 1}; + static constexpr std::array kNhwcToNchw = {0, 3, 1, 2}; + + // Operation data format after folding `permutation`. + StringRef target_data_format = [&]() -> StringRef { + if (op->data_format() == "NHWC" && permutation.equals(kNchwToNhwc)) { + return "NCHW"; // cancel NCHW->NHWC operand permutation + } else if (op->data_format() == "NCHW" && permutation.equals(kNhwcToNchw)) { + return "NHWC"; // cancel NHWC->NCHW operand permutation + } else { + return ""; + } + }(); + if (target_data_format.empty()) return failure(); + + // To fold operand `permutation` into the `op` we need shuffle all layout + // dependent attributes and types with a reverse permutation, and change + // operation data format to `target_data_format`. + // + // Example: + // %1 = SomeOp(...) {data_format = NHWC} + // %2 = Transpose(%1) {permutation = NHWC->NCHW} + // %3 = Op(%2) {data_format = NCHW} + // + // To bypass %2 we have to change data format to shuffle data format from NCHW + // to NHWC, which is the reverse of operand permutation (function argument). + auto reverse_permutation = + GetDataFormatPermutation(op->data_format(), target_data_format); + if (reverse_permutation.empty()) return failure(); + + op->setAttr("data_format", StringAttr::get(target_data_format, context)); + + for (auto pair : shuffle_attrs) { + StringRef attr_name = pair.first; + ArrayAttr attr_value = pair.second; + op->setAttr(attr_name, ShuffleArrayAttr(attr_value, reverse_permutation)); + } + + auto fold = cast(op->getOperation()); + for (unsigned idx : fold.GetLayoutDependentResults()) { + OpResult result = op->getOperation()->getResult(idx); + result.setType( + ShuffleRankedTensorType(result.getType(), reverse_permutation)); + } + + return success(); +} + +//===----------------------------------------------------------------------===// +// Rewrite Pattern for removing trivial Arithmetic op. +//===----------------------------------------------------------------------===// + +namespace { +// Fold Arithmetic Op if one of the operands is a constant known to be an +// Identity (e.g. X+0, X*1, etc...). For commutative operations fold if +// known identity value is either lhs or rhs. +template < + typename OpT, + typename std::enable_if::value>::type * = nullptr> +OpFoldResult IdentityArithmeticOpFolder(OpT arithmetic_op, + ArrayRef operands) { + auto lhs_type = arithmetic_op.x().getType().template cast(); + auto rhs_type = arithmetic_op.y().getType().template cast(); + auto result_type = + arithmetic_op.getResult().getType().template cast(); + + // We can fold arithmetic operation only of we can prove that we will not + // accidentally hide a broadcasting error. + auto is_valid_broadcasting = [](ShapedType operand_ty, ShapedType identity_ty, + ShapedType result_ty) -> bool { + // Scalar identity is broadcastable to any operand shape, we only need to + // check that operand has the same shape as a result. + bool scalar_identity = identity_ty.hasRank() && identity_ty.getRank() == 0; + if (scalar_identity) return operand_ty == result_ty; + + // If identity is not a scalar, we must verify that all shapes are equal + // and statically known. + // + // TODO(ezhulenev): Fold if identity shape is statically know to be + // broadcastable to the operand shape. + return operand_ty == result_ty && identity_ty == result_ty && + result_ty.hasStaticShape(); + }; + + // Check that we have a constant operand on one side (candidate for identity). + const bool is_commutative = + (std::is_same::value || std::is_same::value); + auto lhs_attr = operands[0].dyn_cast_or_null(); + auto rhs_attr = operands[1].dyn_cast_or_null(); + if (!rhs_attr && !(is_commutative && lhs_attr)) return {}; + + // Mul and Div ops have identity value one while AddV2 and SubOp have identity + // value zero. + const int identity = + (std::is_same::value || std::is_same::value || + std::is_same::value) + ? 1 + : 0; + + Type element_ty = lhs_type.getElementType(); + Attribute identity_attr; + if (auto ty = element_ty.template dyn_cast()) { + identity_attr = FloatAttr::get(ty, static_cast(identity)); + } else if (auto ty = element_ty.template dyn_cast()) { + identity_attr = IntegerAttr::get(ty, static_cast(identity)); + } else { + return {}; + } + + // Fold: Op(Operand, Identity) -> Operand. + if (rhs_attr && is_valid_broadcasting(lhs_type, rhs_type, result_type)) { + if (rhs_attr.isSplat() && rhs_attr.getSplatValue() == identity_attr) + return arithmetic_op.x(); + } + + // Fold: Op(Identity, Operand) -> Operand for commutative operations. + if (lhs_attr && is_commutative && + is_valid_broadcasting(rhs_type, lhs_type, result_type)) { + if (lhs_attr.isSplat() && lhs_attr.getSplatValue() == identity_attr) + return arithmetic_op.y(); + } + + return {}; +} +} // namespace + +// Verifies an reduction op's `input` and reduction `dims`. +static LogicalResult VerifyReductionInputAndDims(Value input, Value dims, + Location loc) { + auto dims_type = dims.getType().dyn_cast(); + if (!dims_type) return success(); + if (dims_type.getRank() > 1) + return emitError(loc, "dimensions can only be 0D or 1D tensor"); + + auto input_type = input.getType().dyn_cast(); + if (!input_type) return success(); + int64_t rank = input_type.getRank(); + + DenseIntElementsAttr dims_attr; + if (!matchPattern(dims, m_Constant(&dims_attr))) return success(); + for (const auto &dim_pair : llvm::enumerate(dims_attr)) { + int64_t cur_dim = dim_pair.value().getSExtValue(); + if (cur_dim < -rank || cur_dim >= rank) + return emitError(loc) + << dim_pair.index() << "-th dimension should be in the range of [-" + << rank << ", " << rank << ")"; + } + + return success(); +} + +LogicalResult VerifyRegionResults(Operation *op, Region ®ion, + StringRef region_name) { + auto op_name = op->getName().getStringRef(); + // verify that op outputs match yield inputs + YieldOp yield = cast(region.front().getTerminator()); + unsigned expected_num_results = op->getNumResults(); + if (yield.getNumOperands() != expected_num_results) + return op->emitOpError() + << region_name + " should have same number (" << expected_num_results + << ") of results as " << op_name << " but has " + << yield.getNumOperands() << " results"; + + for (int idx : llvm::seq(0, expected_num_results)) { + auto op_result_type = op->getResult(idx).getType().cast(); + auto region_result_type = + yield.getOperand(idx).getType().cast(); + if (!AreCastCompatible({region_result_type, op_result_type})) + return op->emitError(llvm::formatv( + "{0} result type {1} is incompatible with {2} " + "result type {3} at index {4}", + region_name, region_result_type, op_name, op_result_type, idx)); + } + return success(); +} diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc new file mode 100644 index 00000000000..c5c729a600e --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc @@ -0,0 +1,2270 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/FormatVariadic.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/Dialect/Traits.h" // from @llvm-project +#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/Diagnostics.h" // from @llvm-project +#include "mlir/IR/DialectImplementation.h" // from @llvm-project +#include "mlir/IR/Function.h" // from @llvm-project +#include "mlir/IR/Identifier.h" // from @llvm-project +#include "mlir/IR/Location.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/Matchers.h" // from @llvm-project +#include "mlir/IR/OpDefinition.h" // from @llvm-project +#include "mlir/IR/OpImplementation.h" // from @llvm-project +#include "mlir/IR/PatternMatch.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "mlir/IR/TypeUtilities.h" // from @llvm-project +#include "mlir/IR/Types.h" // from @llvm-project +#include "mlir/IR/Value.h" // from @llvm-project +#include "mlir/Parser.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project +#include "mlir/Support/LogicalResult.h" // from @llvm-project +#include "mlir/Transforms/InliningUtils.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_attributes.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_side_effects.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/util/tensor_format.h" + +namespace mlir { +namespace TF { + +namespace { +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc" +#include "tensorflow/compiler/mlir/tensorflow/transforms/generated_canonicalize.inc" +} // namespace + +//===----------------------------------------------------------------------===// +// NegOp +//===----------------------------------------------------------------------===// + +void NegOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// NotEqualOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(NotEqualOp op) { + // If we allow inputs to have incompatible type, then nothing to do. + if (!op.incompatible_shape_error()) return success(); + + // Otherwise, check inputs are broadcastable. + return mlir::OpTrait::impl::verifyCompatibleOperandBroadcast( + op.getOperation()); +} + +void NotEqualOp::build(OpBuilder &builder, OperationState &result, Value x, + Value y, BoolAttr incompatible_shape_error) { + auto result_type = DeduceEqualCmpOpType(&builder, result.location, x, y, + incompatible_shape_error); + return build(builder, result, result_type, x, y, incompatible_shape_error); +} + +//===----------------------------------------------------------------------===// +// OneHotOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(OneHotOp op) { + int64_t axis = op.axis().getSExtValue(); + + auto indices_ty = op.indices().getType().dyn_cast(); + if (indices_ty && + !(axis == -1 || (axis >= 0 && axis <= indices_ty.getShape().size()))) { + return op.emitOpError() + << "expected axis (" << axis << ") to be -1 or between [0, " + << indices_ty.getShape().size() << "]"; + } + + if (axis < -1) { + return op.emitOpError() << "expected axis (" << axis + << ") to be -1 or between [0, rank(indices()))"; + } + + if (!IsOfRankOrUnranked(op.depth(), 0)) { + return op.emitOpError() << "requires depth to be a scalar"; + } + if (!IsOfRankOrUnranked(op.on_value(), 0)) { + return op.emitOpError() << "requires on_value to be a scalar"; + } + if (!IsOfRankOrUnranked(op.off_value(), 0)) { + return op.emitOpError() << "requires off_value to be a scalar"; + } + + DenseIntElementsAttr depth_attr; + if (matchPattern(op.depth(), m_Constant(&depth_attr))) { + if (depth_attr.getType().getRank() != 0) + return op.emitOpError() << "requires depth to be a scalar"; + int64_t depth = depth_attr.getValue({}).getSExtValue(); + if (depth < 0) { + return op.emitOpError() << "depth must be non-negative, got: " << depth; + } + } + + return success(); +} + +static TensorType InferOneHotOpType(Value indices, Value depth, Value on_value, + Value off_value, IntegerAttr axis) { + int64_t axis_val = axis.getInt(); + Type element_ty = on_value.getType().cast().getElementType(); + auto unranked_ty = UnrankedTensorType::get(element_ty); + if (axis_val < -1) return unranked_ty; + + auto indices_ty = indices.getType().dyn_cast(); + if (!indices_ty) return unranked_ty; + + auto shape = llvm::to_vector<2>(indices_ty.getShape()); + if (axis_val == -1) axis_val = shape.size(); + + int64_t depth_val = ShapedType::kDynamicSize; + DenseIntElementsAttr depth_attr; + if (matchPattern(depth, m_Constant(&depth_attr)) && + depth_attr.getNumElements() == 1) + depth_val = (*depth_attr.begin()).getSExtValue(); + shape.insert(shape.begin() + axis_val, depth_val); + return RankedTensorType::get(shape, element_ty); +} + +void OneHotOp::build(OpBuilder &builder, OperationState &result, Value indices, + Value depth, Value on_value, Value off_value, + IntegerAttr axis) { + build(builder, result, + InferOneHotOpType(indices, depth, on_value, off_value, axis), indices, + depth, on_value, off_value, axis); +} + +//===----------------------------------------------------------------------===// +// PackOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(PackOp op) { + // TODO(hinsu): Convert variadic length attributes to derived attributes. + Operation::operand_range values = op.values(); + + if (failed(VerifyTypesCompatibility(values, + /*mask_one_dim=*/false, + op.getOperation()))) { + return failure(); + } + + int64_t inputs_rank = -1; + for (Value value : values) { + if (auto ty = value.getType().dyn_cast()) { + // Exit early as input types are verified to be compatible so all ranked + // tensors have the same rank. + inputs_rank = ty.getRank(); + break; + } + } + if (inputs_rank == -1) return success(); + + // The values can be packed along any of the dimensions between 0 and + // inputs rank, inclusive. Also, as the negative axis values wrap around so + // the axis value range is [-(R+1), R+1). + int64_t range_begin = -inputs_rank - 1; // Inclusive + int64_t range_end = inputs_rank + 1; // Exclusive + int64_t axis = op.axis().getSExtValue(); + if (axis < range_begin || axis >= range_end) { + return op.emitError() << "attribute 'axis' should be within range [" + << range_begin << ", " << range_end + << "); actual value: " << axis; + } + + return success(); +} + +//===----------------------------------------------------------------------===// +// PadOp +//===----------------------------------------------------------------------===// + +LogicalResult PadOp::FoldOperandsPermutation(ArrayRef permutation) { + // Paddings must be defined by a constant operation. + auto paddings_op = dyn_cast_or_null(paddings().getDefiningOp()); + if (!paddings_op) return failure(); + + auto paddings_value = paddings_op.value().dyn_cast(); + if (!paddings_value || + paddings_value.getNumElements() != permutation.size() * 2) + return failure(); + + SmallVector shuffled_paddings(paddings_value.getNumElements()); + for (auto index_pair : llvm::enumerate(paddings_value.getIntValues())) { + size_t outer_idx = index_pair.index() / 2; + size_t inner_idx = index_pair.index() % 2; + + shuffled_paddings[permutation[outer_idx] * 2 + inner_idx] = + index_pair.value().getSExtValue(); + } + + // Add constant operation with a new paddings. + OpBuilder builder(getOperation()); + auto type = mlir::RankedTensorType::get(paddings_value.getType().getShape(), + builder.getIntegerType(32)); + auto values = mlir::DenseIntElementsAttr::get(type, shuffled_paddings); + auto shuffled_paddings_op = builder.create(getLoc(), values); + + // Use new paddings. + setOperand(1, shuffled_paddings_op); + + // Change the result type. + getResult().setType(ShuffleRankedTensorType(getResult().getType(), + ReversePermutation(permutation))); + + return success(); +} + +//===----------------------------------------------------------------------===// +// ParseExampleV2Op +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(ParseExampleV2Op op) { + // NOTE(mrry): This validates properties of an op that would previously be + // validated by the TensorFlow OpDef type checker. In addition to these + // checks, the shape inference function for ParseExampleV2 validates the + // consistency of the argument and result types. + + // Validate dense variadic input and output lengths. + // NOTE(mrry): The Tdense attr is derived from dense_defaults, so we + // do not need to validate dense_defaults. + auto dense_types_count = + std::distance(op.Tdense().begin(), op.Tdense().end()); + auto dense_values_count = + std::distance(op.dense_values().begin(), op.dense_values().end()); + if (dense_values_count != dense_types_count) { + return op.emitError() << "output 'dense_values' should have same length " + << "as attribute 'Tdense'"; + } + + // Validate sparse variadic output lengths. + // NOTE(mrry): The sparse_types attr is derived from sparse_values, so we + // do not need to validate sparse_values. + auto sparse_types_count = + std::distance(op.sparse_types().begin(), op.sparse_types().end()); + if (op.num_sparse() != sparse_types_count) { + return op.emitError() << "attribute 'num_sparse' should be the same as " + << "the length of attribute 'sparse_types'"; + } + if (op.sparse_indices().size() != sparse_types_count) { + return op.emitError() << "output 'sparse_indices' should have same length " + << "as attribute 'sparse_types'"; + } + if (op.sparse_shapes().size() != sparse_types_count) { + return op.emitError() << "output 'sparse_shapes' should have same length " + << "as attribute 'sparse_types'"; + } + + // Validate ragged variadic output lengths. + auto ragged_value_types_count = std::distance(op.ragged_value_types().begin(), + op.ragged_value_types().end()); + auto ragged_split_types_count = std::distance(op.ragged_split_types().begin(), + op.ragged_split_types().end()); + if (ragged_value_types_count != ragged_split_types_count) { + return op.emitError() << "attribute 'ragged_value_types' should have same " + << "length as attribute 'ragged_split_types'"; + } + + return success(); +} + +//===----------------------------------------------------------------------===// +// PartitionedCallOp +//===----------------------------------------------------------------------===// + +template +static LogicalResult VerifyPartitionedCall(OpClass op) { + auto module = op.template getParentOfType(); + SymbolRefAttr func = op.getAttr("f").template cast(); + + auto function = + dyn_cast_or_null(SymbolTable::lookupSymbolIn(module, func)); + + if (!function) { + return op.emitError("'f' attribute refers to an undefined function: ") + << func; + } + + FunctionType function_ty = function.getType(); + int func_arg_count = function_ty.getNumInputs(); + int arg_count = op.args().size(); + + if (arg_count != func_arg_count) { + return op.emitError() << "argument count mismatch: 'args' has " << arg_count + << " arguments, but '" << func << "' expects " + << func_arg_count; + } + + return success(); +} + +//===----------------------------------------------------------------------===// +// PowOp +//===----------------------------------------------------------------------===// + +OpFoldResult PowOp::fold(ArrayRef operands) { + auto constant_y = operands[1].dyn_cast_or_null(); + if (constant_y && constant_y.isSplat()) { + APFloat y_value = constant_y.getSplatValue(); + auto output_type = getType().cast(); + if (y_value.isZero() && output_type.hasStaticShape()) { + return DenseElementsAttr::get( + output_type, + FloatAttr::get(output_type.getElementType(), /*value=*/1.0)); + } + if (y_value.isExactlyValue(1.0)) { + return x(); + } + } + return {}; +} + +//===----------------------------------------------------------------------===// +// QrOp +//===----------------------------------------------------------------------===// + +// Verifies that, +// +// * Input type, if ranked, must have at least 2 dimensions and at most +// INT32_MAX dimensions. +// +static LogicalResult Verify(QrOp op) { + auto ttype = op.input().getType().cast(); + if (!ttype.hasRank()) return success(); + if (!HasRankAtLeast(op.input(), 2)) + return op.emitOpError( + "requires ranked input tensor to be of rank 2 or more"); + if (!HasRankAtMost(op.input(), std::numeric_limits::max())) + return op.emitOpError( + "requires ranked input tensor to be of rank INT32_MAX or less"); + + return success(); +} + +//===----------------------------------------------------------------------===// +// ReadVariableOp +//===----------------------------------------------------------------------===// + +void ReadVariableOp::getCanonicalizationPatterns( + OwningRewritePatternList &results, MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// ReciprocalOp +//===----------------------------------------------------------------------===// + +void ReciprocalOp::getCanonicalizationPatterns( + OwningRewritePatternList &results, MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// RandomUniformOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(RandomUniformOp op) { + if (!IsOfRankOrUnranked(op.shape(), 1)) + return op.emitOpError("shape must be 1D tensor"); + return success(); +} + +//===----------------------------------------------------------------------===// +// RangeOp +//===----------------------------------------------------------------------===// + +void RangeOp::build(OpBuilder &builder, OperationState &result, Value start, + Value limit, Value delta) { + assert(start.getType() == limit.getType()); + assert(start.getType() == delta.getType()); + DenseIntElementsAttr start_val; + DenseIntElementsAttr limit_val; + DenseIntElementsAttr delta_val; + if (matchPattern(start, m_Constant(&start_val)) && + matchPattern(limit, m_Constant(&limit_val)) && + matchPattern(delta, m_Constant(&delta_val))) { + auto size = llvm::APIntOps::RoundingSDiv( + *limit_val.begin() - *start_val.begin(), *delta_val.begin(), + llvm::APInt::Rounding::DOWN); + return RangeOp::build( + builder, result, + RankedTensorType::get( + size.getSExtValue(), + start.getType().cast().getElementType()), + start, limit, delta); + } + return RangeOp::build( + builder, result, + RankedTensorType::get( + {-1}, start.getType().cast().getElementType()), + start, limit, delta); +} +//===----------------------------------------------------------------------===// +// RankOp +//===----------------------------------------------------------------------===// + +void RankOp::build(OpBuilder &builder, OperationState &result, Value input) { + return RankOp::build(builder, result, + RankedTensorType::get({}, builder.getIntegerType(32)), + input); +} + +// This will create a constant value for RankOp of a ranked tensor. +OpFoldResult RankOp::fold(ArrayRef operands) { + auto type = input().getType(); + auto ranked_type = type.dyn_cast(); + if (!ranked_type) return {}; + + auto output_type = getType().cast(); + int32_t rank = ranked_type.getRank(); + return DenseIntElementsAttr::get(output_type, rank); +} + +//===----------------------------------------------------------------------===// +// RealDivOp +//===----------------------------------------------------------------------===// + +void RealDivOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + +OpFoldResult RealDivOp::fold(ArrayRef operands) { + return IdentityArithmeticOpFolder(*this, operands); +} + +//===----------------------------------------------------------------------===// +// ReshapeOp +//===----------------------------------------------------------------------===// + +// TODO(b/128020684): Verify the output type. +static LogicalResult Verify(ReshapeOp op) { + auto shape_type = op.shape().getType().cast(); + if (!shape_type.hasRank()) return success(); + if (shape_type.getRank() != 1) + return op.emitOpError("shape must be 1D tensor"); + auto rank_by_shape = shape_type.getShape()[0]; + auto type_of_tensor = op.tensor().getType().cast(); + // No compile time verification for unknown sized shape. + if (rank_by_shape == -1 || !type_of_tensor.hasStaticShape()) return success(); + int64_t num_by_tensor = type_of_tensor.getNumElements(); + + auto out_ty = op.getType().dyn_cast(); + if (out_ty && out_ty.hasStaticShape()) { + int64_t num_output_elements = out_ty.getNumElements(); + if (num_by_tensor != num_output_elements) + return op.emitOpError() + << "number of output elements (" << num_output_elements + << ") does not match expected number of elements (" + << num_by_tensor << ")"; + } + + // Check values if constant shape. No compiling time verification for + // non-constant shape. + auto *shape_op = op.shape().getDefiningOp(); + if (!shape_op) return success(); + Attribute shape_cst; + if (!matchPattern(shape_op, m_Constant(&shape_cst))) return success(); + auto shape_cst_attr = shape_cst.dyn_cast(); + if (!shape_cst_attr) return op.emitOpError("shape must be a valid tensor"); + + if (auto opaque_attr = shape_cst_attr.dyn_cast()) { + opaque_attr.decode(shape_cst_attr); + } + + // We know the shape is a 1-D Tensor, then let us get the number of + // elements it implies. + unsigned num_by_shape = 1; + unsigned unknown_dim_count = 0; + for (int i = 0, e = rank_by_shape; i != e; ++i) { + auto num = shape_cst_attr.getValue(i).getInt(); + // The dimension size value can be -1, and that the real size needs to + // be computed so that the total size remains constant. At most one + // component of shape can be -1. + if (num == -1) { + if (++unknown_dim_count > 1) { + return op.emitOpError("more than one component of shape are -1"); + } + } else { + num_by_shape *= num; + } + } + // If there is one component of shape is -1, the dimension should be + // computed so that the total size remains constant. + if (unknown_dim_count == 1) { + if (num_by_tensor % num_by_shape != 0) + return op.emitOpError( + "one component of shape is -1 but couldn't infer the dimension"); + return success(); + } + // If the elements by the tensor and implies by the shape don't match, + // fail this static check. + if (num_by_tensor != num_by_shape) { + return op.emitOpError( + "mismatch in tensor elements and shape implied elements"); + } + return success(); +} + +void ReshapeOp::build(OpBuilder &builder, OperationState &result, Value tensor, + Value shape) { + auto ttype = tensor.getType().cast(); + auto etype = ttype.getElementType(); + + auto unranked = [&builder, etype, &result, shape, tensor]() { + return ReshapeOp::build(builder, result, UnrankedTensorType::get(etype), + tensor, shape); + }; + + // If tensor is unranked then we have no info about output of shape. + if (!ttype.hasRank()) return unranked(); + + DenseIntElementsAttr attr_shape; + if (matchPattern(shape, m_Constant(&attr_shape))) { + llvm::SmallVector const_shape; + const_shape.reserve(attr_shape.getNumElements()); + + // Detect if reshape output shape is folded. + bool flatten = false; + int unknown_index = -1; + // The product of constant shape argument excluding unknown dimension. + int64_t product_cshape = 1; + for (auto e : llvm::enumerate(attr_shape)) { + int64_t val = e.value().getSExtValue(); + if (IsUnknownDimOrRank(val)) { + if (flatten) { + mlir::emitError(result.location) + << "only one unknown dimension allowed"; + return; + } + flatten = true; + unknown_index = e.index(); + } else { + product_cshape *= val; + } + const_shape.push_back(val); + } + + // Compute the value of the unknown dimension. + if (flatten) { + // Compute number of elements in tensor shape. + auto tshape = ttype.getShape(); + int64_t product_tshape = std::accumulate(tshape.begin(), tshape.end(), 1, + std::multiplies()); + // Set the unknown dimension such that total number of elements remain + // constant. + // Note: The case where the ratio is not integral, and so the total size + // of reshape not constant, is checked in verify function. + const_shape[unknown_index] = product_tshape / product_cshape; + } + return ReshapeOp::build(builder, result, + RankedTensorType::get(const_shape, etype), tensor, + shape); + } + return unranked(); +} + +void ReshapeOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + +OpFoldResult ReshapeOp::fold(ArrayRef operands) { + Value tensor = this->tensor(); + Value shape = this->shape(); + + // Fold reshape if operand and result types are the same and all dimensions + // are statically known (no-op reshape). + // TODO(ezhulenev): Add the same folding for BroadcastToOp. + auto result_ty = getType().dyn_cast(); + if (result_ty && result_ty.hasStaticShape() && + result_ty == tensor.getType()) { + return tensor; + } + + // Fold reshape if the shape is computed from the input tensor: + // + // %shape = tf.Shape(%arg) // [? x ...] + // %dim0 = tf.StridedSlice(%shape, 0, 1, 1) // get unknown dim value + // %new_shape = tf.Pack(dim0, ...) { axis = 0 } // [? x ...] + // %reshape = tf.Reshape(%arg, %new_shape) // this is no-op + // + // Where `...` are some statically known dimensions. In this case reshape is + // a no-op and can be replaced by %arg (assuming `...` are equal). + auto pack_op = dyn_cast_or_null(shape.getDefiningOp()); + if (!pack_op || pack_op.values().size() < 2) return {}; + + // Dimensions packed along axis = 0 (pack scalars into vector). + if (pack_op.axis().getSExtValue() != 0) return {}; + + // First packed value is defined by a strided slice operation. + auto slice_op = + dyn_cast_or_null(pack_op.values()[0].getDefiningOp()); + if (!slice_op) return {}; + + // Input to the slice op is defined by shape operation. + auto shape_op = dyn_cast_or_null(slice_op.input().getDefiningOp()); + if (!shape_op || shape_op.input() != tensor) return {}; + + // All masks are `0` except `shrink_axis_mask` which is equal to `1` (slicing + // scalar value from input vector). + if (slice_op.begin_mask().getSExtValue() != 0 || + slice_op.ellipsis_mask().getSExtValue() != 0 || + slice_op.end_mask().getSExtValue() != 0 || + slice_op.new_axis_mask().getSExtValue() != 0 || + slice_op.shrink_axis_mask().getSExtValue() != 1) + return {}; + + // Returns a value if the `value` is defined by a ConstOp with a single + // integer element in it and has an expected rank. + auto get_value = [](Value value, int expected_rank) -> Optional { + auto const_op = dyn_cast_or_null(value.getDefiningOp()); + if (!const_op) return None; + + auto value_attr = const_op.value().dyn_cast(); + if (!value_attr || value_attr.getNumElements() != 1) return None; + + auto value_ty = value_attr.getType(); + if (!value_ty.hasRank() || value_ty.getRank() != expected_rank) return None; + + auto splat = value_attr.getSplatValue(); + return splat.getValue().getSExtValue(); + }; + + // All other packed values are scalar constants. + SmallVector packed_dims; + packed_dims.reserve(pack_op.values().size() - 1); + for (Value operand : llvm::drop_begin(pack_op.values(), 1)) { + if (auto dim = get_value(operand, /*expected_rank=*/0)) { + packed_dims.push_back(*dim); + } else { + return {}; + } + } + + // Slice exactly the first shape dimension: + // begin = [0] end = [1], strides = [1] + auto begin = get_value(slice_op.begin(), /*expected_rank=*/1); + auto end = get_value(slice_op.end(), /*expected_rank=*/1); + auto strides = get_value(slice_op.strides(), /*expected_rank=*/1); + if (!begin.hasValue() || !end.hasValue() || !strides.hasValue() || + *begin != 0 || *end != 1 || *strides != 1) + return {}; + + // First tensor dimension is dynamic. + auto arg_ty = tensor.getType().dyn_cast(); + if (!arg_ty || arg_ty.getNumDynamicDims() != 1 || !arg_ty.isDynamicDim(0)) + return {}; + + // Argument tensor rank is equal to the number of packed dimensions. + if (arg_ty.getRank() != pack_op.values().size()) return {}; + + // All other dimensions are statically known and equal to packed dims. + auto arg_dims = llvm::drop_begin(arg_ty.getShape(), 1); + if (!std::equal(arg_dims.begin(), arg_dims.end(), packed_dims.begin())) + return {}; + + return tensor; +} + +//===----------------------------------------------------------------------===// +// SelectOp +//===----------------------------------------------------------------------===// + +void SelectOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + +// Verifies a few extra requirements on SelectOp: +// (1) `then` and `else` must have same shape +// (2) At least one of the following must be true: +// (a) `cond` has the same rank as `then` and `else` +// (b) `cond` is a scalar +// (c) `cond` is a vector AND `then` and `else` are non-scalar with their +// first dimension equal to `cond`. +static LogicalResult Verify(SelectOp op) { + auto then_tensor = op.t().getType().cast(); + auto else_tensor = op.e().getType().cast(); + // Check (1). + if (!AreCastCompatible({then_tensor, else_tensor})) + return op.emitOpError() << "requires t and e have compatible shapes"; + + // Get data rank (if exists). + int data_rank; + // If data is unranked or data_rank is 0, this will remain -2. Otherwise + // refers to first dimension of then and/or else. + int data_first_dim = -2; + bool then_has_rank = then_tensor.hasRank(); + bool else_has_rank = else_tensor.hasRank(); + if (then_has_rank && else_has_rank) { + data_rank = then_tensor.getRank(); + if (then_tensor.getRank() > 0) + data_first_dim = then_tensor.getShape().front(); + if (else_tensor.getRank() > 0) + data_first_dim = std::max( + static_cast(else_tensor.getShape().front()), data_first_dim); + } else if (then_has_rank) { + data_rank = then_tensor.getRank(); + if (then_tensor.getRank() > 0) + data_first_dim = then_tensor.getShape().front(); + } else if (else_has_rank) { + data_rank = else_tensor.getRank(); + if (else_tensor.getRank() > 0) + data_first_dim = else_tensor.getShape().front(); + } else { + // Neither has a rank. + return success(); + } + + auto cond_tensor = op.condition().getType().dyn_cast(); + if (!cond_tensor) return success(); + auto cond_rank = cond_tensor.getRank(); + // Check (2a) and (2b). + if (cond_rank == 0 || cond_rank == data_rank) return success(); + // Check (2c). + if (cond_rank == 1) { + auto cond_shape = cond_tensor.getShape().front(); + if (data_rank == 0) { + return op.emitOpError() + << "requires that t and e are nonscalar when pred is a vector"; + } + // We know `data` tensor has a rank of at least 1. + if (data_first_dim != -1 && cond_shape != -1 && + data_first_dim != cond_shape) { + return op.emitOpError() << "requires that, when pred is a vector, the " + "shape matches the first dimension of t and e"; + } + return success(); + } + // None of (2a,b,c) were true; fail. + return op.emitOpError() << "requires that pred is a scalar OR has the same " + "rank as t and e OR is a vector"; +} + +//===----------------------------------------------------------------------===// +// SelectV2Op +//===----------------------------------------------------------------------===// + +static Type InferSelectV2OpType(Value condition, Value e, Value t) { + Type element_ty = e.getType().cast().getElementType(); + auto unranked_ty = UnrankedTensorType::get(element_ty); + + Type broadcasted_ty = + OpTrait::util::getBroadcastedType(e.getType(), t.getType()); + if (!broadcasted_ty) return unranked_ty; + + auto cond_ranked_ty = condition.getType().dyn_cast(); + auto broadcasted_ranked_ty = broadcasted_ty.dyn_cast(); + if (!cond_ranked_ty || !broadcasted_ranked_ty) return unranked_ty; + + // Explicitly get broadcasted output type as element types of condition may + // not be same as the broadcated type's element type. + SmallVector result_shape; + if (!OpTrait::util::getBroadcastedShape(cond_ranked_ty.getShape(), + broadcasted_ranked_ty.getShape(), + result_shape)) + return unranked_ty; + return RankedTensorType::get(result_shape, element_ty); +} + +void SelectV2Op::build(OpBuilder &builder, OperationState &result, + Value condition, Value e, Value t) { + build(builder, result, InferSelectV2OpType(condition, e, t), condition, e, t); +} + +//===----------------------------------------------------------------------===// +// ShapeOp +//===----------------------------------------------------------------------===// + +namespace { +// Validates Shape/ShapeN/VariableShape operand and associated result types. +LogicalResult VerifyShapeOperandAndResult(Operation *op, Type operand_type, + Type result_type, + int variadic_idx = -1) { + std::string variadic_idx_str = + variadic_idx < 0 ? "" : llvm::formatv(" #{0}", variadic_idx).str(); + + auto result_ranked_type = result_type.dyn_cast(); + if (!result_ranked_type) return success(); + if (result_ranked_type.getShape().size() != 1) + return op->emitOpError("requires 1D type for result") << variadic_idx_str; + + auto operand_ranked_type = operand_type.dyn_cast_or_null(); + if (operand_ranked_type) { + // The operand is a ranked tensor. + if (result_ranked_type.hasStaticShape() && + !operand_ranked_type.getShape().empty() && + result_ranked_type.getDimSize(0) != + operand_ranked_type.getShape().size()) + return op->emitOpError("requires dimension size of result") + << variadic_idx_str << " to match rank of operand" + << variadic_idx_str; + } else if (result_ranked_type.hasStaticShape()) { + // The operand is an unranked tensor, print a warning if the result + // is static. + // Note: We do not handle this situation as an error, this would be too + // restrictive due to incompleteness of shape inference at this point. + op->emitWarning("has static shape result") + << variadic_idx_str << " for unranked operand" << variadic_idx_str; + } + + Type element_type = result_ranked_type.getElementType(); + if (!element_type.isSignlessInteger(32) && + !element_type.isSignlessInteger(64)) + return op->emitOpError("requires int32 or int64 return type for result") + << variadic_idx_str; + + return success(); +} +} // anonymous namespace + +static LogicalResult Verify(ShapeOp op) { + return VerifyShapeOperandAndResult(op, op.input().getType(), op.getType()); +} + +// Converts shape of the given type to attribute if it is of ranked tensor type. +// Returned attribute has integer elements of the given width. +static Attribute ConvertShapeToAttr(Type input_ty, int out_width) { + auto ranked_ty = input_ty.dyn_cast(); + if (!ranked_ty || !ranked_ty.hasStaticShape()) return {}; + + auto shape = ranked_ty.getShape(); + int rank = shape.size(); + + SmallVector dimensions; + dimensions.reserve(rank); + for (int i = 0; i < rank; ++i) + dimensions.push_back(APInt(out_width, shape[i])); + + auto result_type = RankedTensorType::get( + {rank}, IntegerType::get(out_width, input_ty.getContext())); + return DenseElementsAttr::get(result_type, dimensions); +} + +OpFoldResult ShapeOp::fold(ArrayRef operands) { + int width = + getType().cast().getElementType().getIntOrFloatBitWidth(); + return ConvertShapeToAttr(getOperand().getType(), width); +} + +void ShapeOp::build(OpBuilder &builder, OperationState &result, Value input, + BoolAttr use32Bit) { + auto rankedTensorType = input.getType().dyn_cast(); + int64_t rank = rankedTensorType ? rankedTensorType.getRank() : -1; + auto out_type = use32Bit.getValue() ? builder.getIntegerType(32) + : builder.getIntegerType(64); + return ShapeOp::build(builder, result, + RankedTensorType::get({rank}, out_type), input); +} + +//===----------------------------------------------------------------------===// +// ShapeNOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(ShapeNOp op) { + const size_t num_tensors = op.N(); + + if (op.getNumOperands() != num_tensors) + return op.emitOpError() << "requires " << num_tensors << " operand(s), got " + << op.getNumOperands() << " operand(s)"; + + if (op.getNumResults() != num_tensors) + return op.emitOpError() << "requires " << num_tensors << " result(s), got " + << op.getNumResults() << " result(s)"; + + for (auto i : llvm::seq(0, num_tensors)) { + auto verification = VerifyShapeOperandAndResult( + op, op.getOperand(i).getType(), op.getResult(i).getType(), i); + if (failed(verification)) return verification; + } + + return success(); +} + +LogicalResult ShapeNOp::fold(ArrayRef operands, + SmallVectorImpl &results) { + if (getNumOperands() == 0) return success(); + int width = + getType(0).cast().getElementType().getIntOrFloatBitWidth(); + + for (Type input_ty : getOperandTypes()) { + OpFoldResult result = ConvertShapeToAttr(input_ty, width); + if (!result) return failure(); + + results.push_back(result); + } + return success(); +} + +// TODO(hinsu): Add canonicalization pattern for ShapeN ops that don't have all +// static input shapes. Replacing output values corresponding to static input +// types may enable optimizations in users of the values. + +//===----------------------------------------------------------------------===// +// SizeOp +//===----------------------------------------------------------------------===// + +// Verifies that, +// +// * Input type, if is a ranked tensor, has at most INT32_MAX dimensions. +// +static LogicalResult Verify(SizeOp op) { + if (!HasRankAtMost(op.input(), std::numeric_limits::max())) + return op.emitOpError( + "requires ranked input tensor to be of rank INT32_MAX or less"); + + return success(); +} + +//===----------------------------------------------------------------------===// +// SliceOp +//===----------------------------------------------------------------------===// + +// Verifies that: +// +// - operands begin and size are 1D with the same number of elements. +// - if the input is a ranked tensor, the rank of the input equals the number +// of elements in operands begin and size. +// - if begin are constants, that +// 0 <= begin[i] <= begin[i] + size[i] <= input_ty.getShape()[i] +// - if begins aren't constant but the input is a ranked tensor, that +// size[i] <= input_ty.getShape()[i] +// +static LogicalResult Verify(SliceOp op) { + RankedTensorType begin_ty = GetRankedTensorTypeForOperand(op.begin()); + if (begin_ty && begin_ty.getRank() != 1) { + return op.emitOpError() << "requires begin operand to be 1D tensor"; + } + + RankedTensorType size_ty = GetRankedTensorTypeForOperand(op.size()); + if (size_ty && size_ty.getRank() != 1) { + return op.emitOpError() << "requires size operand to be 1D tensor"; + } + + if (!begin_ty || !size_ty || !begin_ty.hasStaticShape() || + !size_ty.hasStaticShape()) + return success(); + + if (begin_ty.getNumElements() != size_ty.getNumElements()) { + return op.emitOpError() << "requires begin and size operands to have the" + " same number of elements"; + } + + auto input_ty = op.input().getType().dyn_cast(); + if (input_ty && begin_ty.getNumElements() != input_ty.getRank()) { + return op.emitOpError() << "requires number of elements in begin and size" + "are equal to input rank"; + } + + DenseIntElementsAttr begin_indices; + if (matchPattern(op.begin(), m_Constant(&begin_indices))) { + DenseIntElementsAttr slice_sizes; + bool constant_slice_sizes = + matchPattern(op.size(), m_Constant(&slice_sizes)); + int dim = 0; + for (const APInt &raw_begin_index : begin_indices.getValues()) { + int64_t begin_index = raw_begin_index.getSExtValue(); + int64_t input_size = input_ty ? input_ty.getShape()[dim] : -1; + int64_t slice_size = constant_slice_sizes + ? slice_sizes.getValue(dim).getSExtValue() + : 0; + if (slice_size == -1 && input_size != -1) { + slice_size = input_size - begin_index; + } + if (begin_index < 0 || + (input_size != -1 && begin_index + slice_size > input_size)) { + return op.emitOpError() + << "requires 0 <= begin[i] <= begin[i] + size[i] <= Di"; + } + ++dim; + } + } else if (input_ty) { + // If the inputs are ranked, we can do a few more sanity checks. + DenseIntElementsAttr slice_sizes; + if (matchPattern(op.size(), m_Constant(&slice_sizes))) { + auto input_shape = input_ty.getShape(); + for (int64_t i = 0; i < input_ty.getRank(); ++i) { + int64_t slice_size = slice_sizes.getValue(i).getInt(); + int64_t input_size = input_shape[i]; + if (slice_size != -1 && input_size != -1 && slice_size > input_size) { + return op.emitOpError() << "requires size[i] <= Di, even if begin[i] " + "is unknown at compile time"; + } + } + } + } + + return success(); +} + +//===----------------------------------------------------------------------===// +// SoftmaxOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(SoftmaxOp op) { + if (!HasRankAtLeast(op.logits(), 1)) { + return op.emitOpError("requires operand to have rank at least 1"); + } + return success(); +} + +//===----------------------------------------------------------------------===// +// SoftmaxCrossEntropyWithLogitsOp +//===----------------------------------------------------------------------===// + +// Verifies that, +// +// * Input types are broadcast compatible and the broadcasted type has rank two. +// +static LogicalResult Verify(SoftmaxCrossEntropyWithLogitsOp op) { + auto broadcasted_ty = OpTrait::util::getBroadcastedType( + op.features().getType(), op.labels().getType()) + .dyn_cast_or_null(); + if (!broadcasted_ty || + (broadcasted_ty.hasRank() && broadcasted_ty.getRank() != 2)) + return op.emitOpError( + "requires features and labels to be broadcast compatible to rank two"); + + return success(); +} + +//===----------------------------------------------------------------------===// +// SparseSoftmaxCrossEntropyWithLogitsOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(SparseSoftmaxCrossEntropyWithLogitsOp op) { + if (!IsOfRankOrUnranked(op.features(), 2)) { + return op.emitOpError("requires features operand of rank two"); + } + if (!IsOfRankOrUnranked(op.labels(), 1)) { + return op.emitOpError("requires labels operand of rank one"); + } + auto features_ty = op.features().getType().dyn_cast(); + auto labels_ty = op.labels().getType().dyn_cast(); + if (features_ty && labels_ty) { + int64_t features_batches = features_ty.getDimSize(0); + int64_t labels_batches = labels_ty.getDimSize(0); + if (!ShapedType::isDynamic(features_batches) && + !ShapedType::isDynamic(labels_batches) && + features_batches != labels_batches) + return op.emitOpError( + "requires features and labels with matching first dimension"); + } + return success(); +} + +//===----------------------------------------------------------------------===// +// SplitOp +//===----------------------------------------------------------------------===// + +// Verifies the input and split dimension operands for tf.Split/tf.SplitV. +// Writes the split dimension's index (adjusted with input rank) via `dim_index` +// if it's a constant. +template +LogicalResult VerifySplitInputAndSplitDim(Op op, Optional *dim_index) { + *dim_index = llvm::None; + + Value split_dim = op.split_dim(); + if (auto split_dim_type = split_dim.getType().dyn_cast()) + if (split_dim_type.getRank() != 0) + return op.emitOpError( + "split dimension should be an integer scalar tensor"); + + // We can perform further verification if the input tensor to be split has + // known rank and the split dimension tensor is a constant. + + auto input_type = op.value().getType().template dyn_cast(); + if (!input_type) return success(); + + int64_t input_rank = input_type.getRank(); + if (input_rank == 0) + return op.emitOpError("cannot split scalar input tensor"); + + DenseIntElementsAttr split_dim_attr; + if (!matchPattern(split_dim, m_Constant(&split_dim_attr))) return success(); + + int64_t index = (*split_dim_attr.begin()).getSExtValue(); + + if (index + input_rank < 0 || index >= input_rank) { + return op.emitOpError("split dimension must be in range [-") + << input_rank << ", " << input_rank << ")"; + } + + if (index < 0) index += input_rank; + *dim_index = index; + + return success(); +} + +static LogicalResult Verify(SplitOp op) { + Optional dim_index; + if (failed(VerifySplitInputAndSplitDim(op, &dim_index))) return failure(); + if (!dim_index) return success(); + + int64_t input_dim_size = + op.value().getType().cast().getDimSize(*dim_index); + if (input_dim_size == ShapedType::kDynamicSize) return success(); + + if (input_dim_size % op.getNumResults() != 0) + return op.emitOpError("dimension #") + << *dim_index << " not divisible by the number of result tensors"; + + return success(); +} + +//===----------------------------------------------------------------------===// +// SplitVOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(SplitVOp op) { + auto split_sizes_type = + op.size_splits().getType().dyn_cast(); + if (!split_sizes_type) return success(); + + if (split_sizes_type.getRank() != 1 || + split_sizes_type.getDimSize(0) != op.getNumResults()) + return op.emitOpError("split sizes should be a 1D tensor of ") + << op.getNumResults() << " elements"; + + Optional dim_index = 0; + if (failed(VerifySplitInputAndSplitDim(op, &dim_index))) return failure(); + if (!dim_index) return success(); + + int64_t input_dim_size = + op.value().getType().cast().getDimSize(*dim_index); + if (input_dim_size == ShapedType::kDynamicSize) return success(); + + // If split sizes come from a constant, they must sum to the dimension size + // along split_dim, and we can have no more than one dynamic dimension. + DenseIntElementsAttr split_sizes_attr; + if (!matchPattern(op.size_splits(), m_Constant(&split_sizes_attr))) + return success(); + + int64_t total_dim_size = 0; // Total dimension size assigned to splits + llvm::Optional dynamic_dim_index; + + SmallVector split_sizes; + split_sizes.reserve( + split_sizes_attr.getType().cast().getNumElements()); + + for (auto dim : llvm::enumerate(split_sizes_attr)) { + int64_t dim_val = dim.value().getSExtValue(); + split_sizes.push_back(dim_val); + if (dim_val == ShapedType::kDynamicSize) { + // We cannot have more than one dynamic dimension. + if (dynamic_dim_index) + return op.emitOpError( + "cannot have more than one dynamic dimension in split sizes"); + dynamic_dim_index = dim.index(); + } else { + total_dim_size += dim_val; + } + } + + if (!dynamic_dim_index && total_dim_size != input_dim_size) + return op.emitOpError( + "split sizes must sum up to the dimension size along split " + "dimension, found ") + << total_dim_size << " vs " << input_dim_size; + + if (dynamic_dim_index && total_dim_size > input_dim_size) + return op.emitOpError( + "split sizes must sum up to be less than or equal to the " + "dimension size along split dimension, found ") + << total_dim_size << " vs " << input_dim_size; + + return success(); +} + +//===----------------------------------------------------------------------===// +// SquareOp +//===----------------------------------------------------------------------===// + +void SquareOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// SubOp +//===----------------------------------------------------------------------===// + +void SubOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + +OpFoldResult SubOp::fold(ArrayRef operands) { + return IdentityArithmeticOpFolder(*this, operands); +} + +//===----------------------------------------------------------------------===// +// SumOp +//===----------------------------------------------------------------------===// + +void SumOp::build(OpBuilder &builder, OperationState &result, Value input, + Value reduction_indices, BoolAttr keep_dims) { + Type out_ty = + InferReductionOpType(input, reduction_indices, keep_dims, &builder); + build(builder, result, out_ty, input, reduction_indices, keep_dims); +} + +//===----------------------------------------------------------------------===// +// StridedSliceOp +//===----------------------------------------------------------------------===// + +// TODO(b/154160827): Add a canonicalization pattern from tf.StridedSliceOp to +// tf.SliceOp if both of the following are true: +// - All strides have a known value equal to 1 +// - No masks are set (or masks can be applied by transforming the inputs to +// Slice) + +// Verifies that, +// +// - begin, end and strides operands are 1D and they have the same number of +// elements. Here, the number of elements should be less than 32 to support +// 32-bit mask attributes. +// - None of the strides values are zero. +// - Ellipsis mask can have at most one bit set. + +template +static LogicalResult VerifyStridedSliceBase(OpTy op) { + // Expected size for operands begin, end and strides vector operands. + int64_t expected_size = -1; + + for (Value val : {op.begin(), op.end(), op.strides()}) { + auto operand_ty = val.getType().dyn_cast(); + if (!operand_ty || !operand_ty.hasStaticShape()) { + // TensorFlow constant ops may have non-static shape because the shape is + // not propagated during constant folding. If the defining op for this + // operand is a constant op, use the constant op's attribute to get the + // actual shape. + DenseIntElementsAttr attr; + if (!matchPattern(val, m_Constant(&attr))) continue; + operand_ty = attr.getType(); + } + + if (operand_ty.getRank() != 1) + return op.emitOpError() + << "requires begin, end and strides to be 1D tensors"; + + int64_t length = operand_ty.getDimSize(0); + if (length == -1) continue; + + if (expected_size == -1) { + // This op uses 32-bit masks. + if (length >= 32) + return op.emitOpError( + "requires begin, end and strides operands with less than 32 " + "elements"); + + expected_size = length; + } else if (length != expected_size) { + return op.emitOpError() << "requires begin, end and strides to have the " + "same number of elements"; + } + } + + // If strides are constants, verify that none of the element is zero. + DenseIntElementsAttr strides; + if (matchPattern(op.strides(), m_Constant(&strides))) { + if (llvm::is_contained(strides.getValues(), 0)) + return op.emitOpError("requires non-zero strides"); + } + + // Use bit compares to ensure ellipsis_mask is 0 or a power of 2, i.e. there + // exists only no more than one ellipsis. + uint32_t ellipsis_mask = op.ellipsis_mask().getZExtValue(); + if (ellipsis_mask != 0 && !llvm::isPowerOf2_32(ellipsis_mask)) + return op.emitOpError("cannot have multiple ellipses"); + + return success(); +} + +// Clamps the given `val`: returns `low` if `val` is less than `low`; returns +// `high` if `high` is less than `val`; otherwise returns `val`. +template +constexpr const T &Clamp(const T &val, const T &low, const T &high) { + assert(!(high < low)); + return (val < low) ? low : (high < val) ? high : val; +} + +// Checks if the `index` bit of `val` is set. +template +constexpr bool IsSet(const T &val, unsigned index) { + return (val & (1 << index)) != 0; +} + +// Sets the `index` bit of `val`. +template +constexpr void Set(T &val, unsigned index) { + val |= (1 << index); +} + +// Unset the `index` bit of `val`. +template +constexpr void Unset(T &val, unsigned index) { + val &= ~(1 << index); +} + +// Copy the `src_index` bit of `src` to `dst_index` bit of `dst`. +template +constexpr void CopyBit(const T &src, unsigned src_index, T &dst, + unsigned dst_index) { + if (IsSet(src, src_index)) + Set(dst, dst_index); + else + Unset(dst, dst_index); +} + +// The sparse spec of strided slice does not correspond to the number of +// dimensions. For example, sparse spec for foo[..., 3:10] for foo of shape (2, +// 4, 8) would have dims = 2. +struct SparseSliceSpec { + int64_t dims; + int32_t begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask; + const ArrayRef &begin; + const ArrayRef &end; + const ArrayRef &strides; +}; + +// The dense spec of strided slice is the canonicalized version of sparse spec. +// The number of dimensions of dense spec correspond to the number of dimensions +// in operand tensor. +struct DenseSliceSpec { + int64_t dims; + int32_t begin_mask, end_mask, shrink_axis_mask; + SmallVectorImpl &begin; + SmallVectorImpl &end; + SmallVectorImpl &strides; +}; + +// Make a sparse spec into a dense index spec. +// The sparse spec does not correspond to the number of dimensions +// Make a dense spec that corresponds to the number of dimensions +// +// For example suppose foo[...,3:, 2] on foo.shape=(2,2,3,4) then +// we need to produce the missing begin_mask, end_mask for the first two +// dimensions i.e. foo[:, :, 3:, 2]. +static void BuildDenseSliceSpec(const SparseSliceSpec &sparse, + DenseSliceSpec *dense) { + // Build expanded dense begin, end, strides, begin_mask, end_mask, and + // shrink_axis_mask. + dense->begin.resize(dense->dims); + dense->end.resize(dense->dims); + dense->strides.resize(dense->dims); + dense->begin_mask = 0; + dense->end_mask = 0; + dense->shrink_axis_mask = 0; + + // Count number of new_axis after ellipsis. This helps in calculating the + // number of dimensions ellipsis represents in the sparse spec. + bool ellipsis_seen = false; + int num_new_axis_after_ellipsis = 0; + for (int sparse_index = 0; sparse_index < sparse.dims; ++sparse_index) { + if (ellipsis_seen && IsSet(sparse.new_axis_mask, sparse_index)) + num_new_axis_after_ellipsis++; + if (IsSet(sparse.ellipsis_mask, sparse_index)) ellipsis_seen = true; + } + + int dense_index = 0; + for (int sparse_index = 0; sparse_index < sparse.dims; ++sparse_index) { + if (IsSet(sparse.new_axis_mask, sparse_index)) continue; + if (IsSet(sparse.ellipsis_mask, sparse_index)) { + auto next_index = std::min(dense->dims - (sparse.dims - sparse_index) + + 1 + num_new_axis_after_ellipsis, + dense->dims); + // Expand ellipsis into the appropriate dense indices. From current index + // until next_index, all dimensions would have begin and end masks set and + // stride 1, i.e., get all elements in those dimensions. + for (; dense_index < next_index; ++dense_index) { + dense->begin[dense_index] = dense->end[dense_index] = 0; + dense->strides[dense_index] = 1; + Set(dense->begin_mask, dense_index); + Set(dense->end_mask, dense_index); + } + continue; + } + assert(dense_index < dense->dims); + // Copy over the sparse indices to dense indices if ellipsis_mask and + // new_axis_mask are not set. + dense->begin[dense_index] = sparse.begin[sparse_index]; + dense->end[dense_index] = sparse.end[sparse_index]; + dense->strides[dense_index] = sparse.strides[sparse_index]; + CopyBit(sparse.begin_mask, sparse_index, dense->begin_mask, dense_index); + CopyBit(sparse.end_mask, sparse_index, dense->end_mask, dense_index); + CopyBit(sparse.shrink_axis_mask, sparse_index, dense->shrink_axis_mask, + dense_index); + dense_index++; + } +} + +// For the given `input_shape`, calculates the sliced shape using the given +// `begin`, `end`, and `stride` ranges and `begin_mask`, `end_mask`, and +// `shrink_axis_mask` masks. Updates the result back to `input_shape`. If +// `shrink_axis_mask` is not zero, this function will not drop the corresponding +// dimensions in `input_shape`; it will turn them into 1s. At the same time, +// canonicalizes `begin`, `end`, and `strides. The calculation follows +// tf.StridedSlice op semantics. +static void CalculateSlicedShapeFromDenseIndices( + MutableArrayRef input_shape, int32_t begin_mask, int32_t end_mask, + int32_t shrink_axis_mask, MutableArrayRef begin, + MutableArrayRef end, MutableArrayRef stride) { + assert(input_shape.size() <= 32); // Only 32-bit masks are supported. + + // Make sure ranges' ranks are consistent with the input. + assert(input_shape.size() == begin.size()); + assert(input_shape.size() == end.size()); + assert(input_shape.size() == stride.size()); + + for (int i = 0, e = input_shape.size(); i < e; ++i) { + if (ShapedType::isDynamic(input_shape[i])) continue; + + int64_t dim_i = input_shape[i]; + int64_t begin_i = begin[i]; + int64_t end_i = end[i]; + int64_t stride_i = stride[i]; + + // [0]: mask for begin, [1]: mask for end + int64_t masks[] = {begin_mask & (1 << i), end_mask & (1 << i)}; + // [0]: bound for begin, [1]: bound for end + int64_t bounds[] = {stride_i > 0 ? 0 : -1, + stride_i > 0 ? dim_i : dim_i - 1}; + + // Canonicalizes the given range `point` (begin/end) according to the + // current dimension. `c` means case: 0 for begin, 1 for end. + auto canonicalize = [&](int64_t point, int c) { + if (masks[c]) return stride_i > 0 ? bounds[c] : bounds[(c + 1) & 1]; + + // Add dim as offset to negative range point. + point = point < 0 ? dim_i + point : point; + return Clamp(point, bounds[0], bounds[1]); + }; + + begin_i = canonicalize(begin_i, 0); + end_i = canonicalize(end_i, 1); + + int64_t interval_len = end_i - begin_i; + int64_t size_i = 0; + // If internal length is zero or has different sign from stride, it's a + // degenerated case: we are slicing nothing. Otherwise, calculate the sliced + // size. + if (interval_len != 0 && (interval_len < 0) == (stride_i < 0)) + size_i = (interval_len / stride_i) + (interval_len % stride_i != 0); + + begin[i] = begin_i; + if (IsSet(shrink_axis_mask, i)) { + // Shrink this dimension. It means we only take the element at begin_i. + input_shape[i] = 1; + end[i] = begin_i + 1; + stride[i] = 1; + } else { + input_shape[i] = size_i; + end[i] = end_i; + stride[i] = stride_i; + } + } +} + +// For the given `input_shape`, calculates the sliced shape using the given +// `sparse_begin`, `sparse_end`, and `sparse_strides` ranges and `begin_mask`, +// `end_mask`, `ellipsis_mask` , `new_axis_mask` and `shrink_axis_mask` masks. +// Updates the result back to `input_shape`. +static void CalculateSlicedShapeFromSparseIndices( + MutableArrayRef input_shape, ArrayRef sparse_begin, + ArrayRef sparse_end, ArrayRef sparse_strides, + int32_t begin_mask, int32_t end_mask, int32_t ellipsis_mask, + int32_t new_axis_mask, int32_t shrink_axis_mask, + SmallVectorImpl *begin, SmallVectorImpl *end, + SmallVectorImpl *stride) { + int64_t num_sparse_indices = sparse_begin.size(); + SparseSliceSpec sparse = {num_sparse_indices, begin_mask, end_mask, + ellipsis_mask, new_axis_mask, shrink_axis_mask, + sparse_begin, sparse_end, sparse_strides}; + + // If no ellipsis_mask exists then an implicit ellipsis_mask at the end is + // inserted. This handles cases where foo[2:4] (foo.shape() = [4, 8]) yields + // a tensor of shape [2, 8], i.e., foo[2:4] is same as foo[2:4, ...]. + if (sparse.ellipsis_mask == 0) { + Set(sparse.ellipsis_mask, sparse.dims); + sparse.dims++; + } + + int64_t dims = input_shape.size(); + DenseSliceSpec dense = {dims, + /*begin_mask = */ 0, + /*end_mask = */ 0, + /*shrink_axis_mask = */ 0, + *begin, + *end, + *stride}; + + BuildDenseSliceSpec(sparse, &dense); + CalculateSlicedShapeFromDenseIndices(input_shape, dense.begin_mask, + dense.end_mask, dense.shrink_axis_mask, + *begin, *end, *stride); +} + +bool StridedSliceOp::GetSlicedBoundRanges( + SmallVectorImpl *slice_begin, SmallVectorImpl *slice_end, + SmallVectorImpl *slice_stride) { + // TODO(hinsu): Support lowering for ops with dynamic begin and end values + // when it is possible to derive indices based on mask attributes. + DenseIntElementsAttr sparse_begin_attr, sparse_end_attr, sparse_strides_attr; + if (!matchPattern(begin(), m_Constant(&sparse_begin_attr)) || + !matchPattern(end(), m_Constant(&sparse_end_attr)) || + !matchPattern(strides(), m_Constant(&sparse_strides_attr))) + return false; + + auto input_ty = this->input().getType().dyn_cast(); + if (!input_ty || !input_ty.hasStaticShape()) return false; + auto input_shape = llvm::to_vector<4>(input_ty.getShape()); + + SmallVector sparse_begin, sparse_end, sparse_strides; + + for (const APInt &index : sparse_begin_attr) + sparse_begin.push_back(index.getSExtValue()); + for (const APInt &index : sparse_end_attr) + sparse_end.push_back(index.getSExtValue()); + for (const APInt &stride : sparse_strides_attr) + sparse_strides.push_back(stride.getSExtValue()); + + CalculateSlicedShapeFromSparseIndices( + input_shape, sparse_begin, sparse_end, sparse_strides, + begin_mask().getZExtValue(), end_mask().getZExtValue(), + ellipsis_mask().getZExtValue(), new_axis_mask().getZExtValue(), + shrink_axis_mask().getZExtValue(), slice_begin, slice_end, slice_stride); + return true; +} + +//===----------------------------------------------------------------------===// +// StridedSliceGradOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(StridedSliceGradOp op) { + auto shape_type = op.shape().getType().dyn_cast(); + if (shape_type && shape_type.getRank() != 1) + return op.emitOpError("'shape' operand must be 1D tensor, but got ") + << shape_type.getRank() << "D tensor"; + + if (failed(VerifyStridedSliceBase(op))) return failure(); + + // TODO(antiagainst): verify the gradient op.dy()'s shape is consistent with + // the sliced type from StridedSlice. + + return success(); +} + +bool StridedSliceGradOp::GetSlicedShapeAndBoundRanges( + SmallVectorImpl *input_shape, + SmallVectorImpl *slice_begin, SmallVectorImpl *slice_end, + SmallVectorImpl *slice_stride) { + DenseIntElementsAttr shape_attr; + DenseIntElementsAttr sparse_begin_attr, sparse_end_attr, sparse_strides_attr; + if (!matchPattern(shape(), m_Constant(&shape_attr)) || + !matchPattern(begin(), m_Constant(&sparse_begin_attr)) || + !matchPattern(end(), m_Constant(&sparse_end_attr)) || + !matchPattern(strides(), m_Constant(&sparse_strides_attr))) + return false; + + int rank = std::distance(shape_attr.begin(), shape_attr.end()); + + input_shape->clear(); + input_shape->reserve(rank); + for (const APInt &dim : shape_attr) + input_shape->push_back(dim.getSExtValue()); + + SmallVector sparse_begin, sparse_end, sparse_strides; + + for (const APInt &index : sparse_begin_attr) + sparse_begin.push_back(index.getSExtValue()); + for (const APInt &index : sparse_end_attr) + sparse_end.push_back(index.getSExtValue()); + for (const APInt &stride : sparse_strides_attr) + sparse_strides.push_back(stride.getSExtValue()); + + CalculateSlicedShapeFromSparseIndices( + *input_shape, sparse_begin, sparse_end, sparse_strides, + begin_mask().getZExtValue(), end_mask().getZExtValue(), + ellipsis_mask().getZExtValue(), new_axis_mask().getZExtValue(), + shrink_axis_mask().getZExtValue(), slice_begin, slice_end, slice_stride); + return true; +} + +//===----------------------------------------------------------------------===// +// TensorListReserveOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(TensorListReserveOp op) { + if (!IsOfRankOrUnranked(op.element_shape(), 0) && + !IsOfRankOrUnranked(op.element_shape(), 1)) { + return op.emitOpError("requires element_shape operand to be 0D/1D tensor"); + } + + if (!IsOfRankOrUnranked(op.num_elements(), 0)) { + return op.emitOpError("requires num_elements operand to be 0D tensor"); + } + return success(); +} + +//===----------------------------------------------------------------------===// +// TensorListElementShapeOp +//===----------------------------------------------------------------------===// + +OpFoldResult TensorListElementShapeOp::fold(ArrayRef operands) { + int width = + getType().cast().getElementType().getIntOrFloatBitWidth(); + auto variant_type = + getElementTypeOrSelf(getOperand().getType()).cast(); + if (variant_type.getSubtypes().empty()) return {}; + return ConvertShapeToAttr(variant_type.getSubtypes()[0], width); +} + +//===----------------------------------------------------------------------===// +// TensorListStackOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(TensorListStackOp op) { + if (!IsOfRankOrUnranked(op.element_shape(), 0) && + !IsOfRankOrUnranked(op.element_shape(), 1)) { + return op.emitOpError("requires element_shape operand to be 0D/1D tensor"); + } + return success(); +} + +//===----------------------------------------------------------------------===// +// TensorScatterUpdateOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(TensorScatterUpdateOp op) { + if (!HasRankAtLeast(op.tensor(), 1)) + return op.emitOpError( + "requires tensor operand to have at least 1 dimension"); + if (!HasRankAtLeast(op.indices(), 1)) + return op.emitOpError( + "requires indices operand to have at least 1 dimension"); + if (!HasRankAtLeast(op.updates(), 1)) + return op.emitOpError( + "requires updates operand to have at least 1 dimension"); + + auto tensor_ty = op.tensor().getType().dyn_cast(); + auto indices_ty = op.indices().getType().dyn_cast(); + if (!tensor_ty || !indices_ty) return success(); + + int64_t num_index_dims = indices_ty.getShape().back(); + if (ShapedType::isDynamic(num_index_dims)) return success(); + + if (num_index_dims > tensor_ty.getRank()) + return op.emitOpError( + "requires tensor operand with rank greater than or equal to the " + "indices operand's last dimensions"); + return success(); +} + +//===----------------------------------------------------------------------===// +// TopKV2Op +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(TopKV2Op op) { + if (!HasRankAtLeast(op.input(), 1)) + return op.emitOpError( + "requires input operand to have at least 1 dimension"); + + if (!IsOfRankOrUnranked(op.k(), 0)) + return op.emitOpError("requires k operand to be 0D tensor"); + + return success(); +} + +//===----------------------------------------------------------------------===// +// ToBoolOp +//===----------------------------------------------------------------------===// + +namespace { +// If the input to ToBoolOp is a `tensor`, then the ToBoolOp is an identity +// function and can be removed. +class ToBoolOfZeroDBoolTensor : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(ToBoolOp op, + PatternRewriter &rewriter) const override { + if (auto type = op.getOperand().getType().dyn_cast()) { + if (type.getRank() == 0 && type.getElementType().isInteger(1)) { + rewriter.replaceOp(op, op.getOperand()); + return success(); + } + } + return failure(); + } +}; +} // namespace + +void ToBoolOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// TransposeOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(TransposeOp op) { + // TODO(hinsu): Verify using a custom verifier that, + // * Transpose permutation is 1-D of size equal to the rank of the first + // input, if the shapes are partially known. Requires use of a more + // restrictive type than TF_Tensor. + // * Result shape dimensions are possible based on the input shape. + return success(); +} + +// TODO(jpienaar): perm could be optional too. +void TransposeOp::build(OpBuilder &builder, OperationState &result, Value x, + Value perm) { + auto x_type = x.getType().cast(); + // If value is unranked, then so is results. + if (!x_type.hasRank()) + return TransposeOp::build(builder, result, + UnrankedTensorType::get(x_type.getElementType()), + x, perm); + + // TODO(jpienaar): Handle unknown perm case. + + // TODO(jpienaar): Extract utility function. + auto etype = x_type.cast().getElementType(); + DenseIntElementsAttr attr_shape; + if (matchPattern(perm, m_Constant(&attr_shape))) { + llvm::SmallVector const_shape; + if (attr_shape.isSplat()) { + const_shape.assign( + attr_shape.getNumElements(), + x_type.getDimSize((*attr_shape.begin()).getSExtValue())); + } else { + const_shape.reserve(attr_shape.getNumElements()); + for (const auto &dim : attr_shape) + const_shape.push_back(x_type.getDimSize(dim.getSExtValue())); + } + return TransposeOp::build( + builder, result, RankedTensorType::get(const_shape, etype), x, perm); + } + return TransposeOp::build(builder, result, UnrankedTensorType::get(etype), x, + perm); +} + +namespace { + +OpFoldResult FoldIdentityTranspose(TransposeOp op) { + auto const_perm = dyn_cast_or_null(op.perm().getDefiningOp()); + if (!const_perm) return {}; + + auto const_value = const_perm.value(); + const auto elements = const_value.getValues(); + + for (auto it : llvm::enumerate(elements)) { + if (it.index() != it.value()) return {}; + } + + // TODO(jpienaar): Remove if/when we handle this more generally. + if (op.getType() != op.x().getType()) { + // If the types don't match then only fold if all the operands are in the TF + // dialect. + for (auto user : op.getOperation()->getUsers()) + if (user->getDialect() != op.getDialect()) return {}; + } + + return op.x(); +} + +OpFoldResult FoldCancellableTranspose(TransposeOp op) { + // Operand is a TransposeOp. + auto transpose = dyn_cast_or_null(op.x().getDefiningOp()); + if (!transpose) return {}; + + // Permutations defined by constant operations. + auto perm0 = dyn_cast_or_null(op.perm().getDefiningOp()); + auto perm1 = dyn_cast_or_null(transpose.perm().getDefiningOp()); + if (!perm0 || !perm1) return {}; + + // With permutation indices that cancel each other + auto perm0_value = perm0.value().cast(); + auto perm1_value = perm1.value().cast(); + if (!AreCancellablePermutations(perm0_value, perm1_value)) return {}; + + return transpose.x(); +} + +} // namespace + +OpFoldResult TransposeOp::fold(ArrayRef operands) { + if (auto folded = FoldIdentityTranspose(*this)) return folded; + if (auto folded = FoldCancellableTranspose(*this)) return folded; + return {}; +} + +//===----------------------------------------------------------------------===// +// TruncateDivOp +//===----------------------------------------------------------------------===// + +void TruncateDivOp::getCanonicalizationPatterns( + OwningRewritePatternList &results, MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// UnpackOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(UnpackOp op) { + auto value_type = op.value().getType().dyn_cast(); + if (!value_type) return success(); + + int64_t value_rank = value_type.getRank(); + int64_t axis = op.axis().getSExtValue(); + if (axis < -value_rank || axis >= value_rank) + return op.emitOpError("axis attribute must be in the range of [-") + << value_rank << ", " << value_rank << ')'; + + axis = GetDimForAxis(axis, value_rank); + int64_t dim_size = value_type.getDimSize(axis); + if (ShapedType::isDynamic(dim_size)) return success(); + + if (dim_size != op.getNumResults()) + return op.emitOpError("result count must be equal to ") << dim_size; + + return success(); +} + +//===----------------------------------------------------------------------===// +// Unsorted segment reduction ops +//===----------------------------------------------------------------------===// + +template +static LogicalResult VerifyUnsortedSegmentReduction(Op op) { + if (!HasRankAtMost(op.num_segments(), 0)) + return op.emitOpError("number of segments should be a 0-D tensor"); + + auto data_type = op.data().getType().template dyn_cast(); + auto segment_ids_type = + op.segment_ids().getType().template dyn_cast(); + if (data_type && segment_ids_type) { + if (data_type.getRank() < segment_ids_type.getRank()) + return op.emitOpError( + "requires segment ids rank to be less than or equal to data's rank"); + + int index = 0; + for (auto shape_pair : + llvm::zip_first(segment_ids_type.getShape(), data_type.getShape())) { + int64_t segment_id_dim = std::get<0>(shape_pair); + int64_t data_dim = std::get<1>(shape_pair); + if (!ShapedType::isDynamic(segment_id_dim) && + !ShapedType::isDynamic(data_dim) && segment_id_dim != data_dim) + return op.emitOpError( + "requires segment ids shape to be a prefix of data shape, " + "but dimension #") + << index << " differs: " << segment_id_dim << " vs. " + << data_dim; + ++index; + } + } + + DenseIntElementsAttr num_segments_attr; + if (matchPattern(op.num_segments(), m_Constant(&num_segments_attr))) { + int64_t num_segments = (*num_segments_attr.begin()).getSExtValue(); + if (num_segments < 0) + return op.emitOpError("num of segments cannot be negative"); + } + + return success(); +} + +//===----------------------------------------------------------------------===// +// VarIsInitializedOp +//===----------------------------------------------------------------------===// + +namespace { + +/// Erase VarIsInitializedOp operations with no uses. This op has side effect on +/// resources (read-only), but can still be deleted if it has zero uses. +struct EraseDeadVarIsInitializedOp + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(VarIsInitializedOp op, + PatternRewriter &rewriter) const override { + if (!op.use_empty()) return failure(); + rewriter.eraseOp(op); + return success(); + } +}; +} // end anonymous namespace. + +void VarIsInitializedOp::getCanonicalizationPatterns( + OwningRewritePatternList &patterns, MLIRContext *context) { + patterns.insert(context); +} + +//===----------------------------------------------------------------------===// +// VariableShapeOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(VariableShapeOp op) { + auto input_type = op.input().getType().cast(); + if (input_type.hasStaticShape() && input_type.getNumElements() != 1) + return op.emitOpError("requires input to have one resource"); + + auto resource_type = input_type.getElementType().cast(); + auto subtypes = resource_type.getSubtypes(); + switch (subtypes.size()) { + case 1: + return VerifyShapeOperandAndResult( + op, resource_type.getSubtypes().front(), op.getType()); + case 0: + return VerifyShapeOperandAndResult(op, Type(), op.getType()); + default: + return op.emitOpError( + "requires resource input type to have at most 1 subtype"); + } +} + +OpFoldResult VariableShapeOp::fold(ArrayRef operands) { + int width = + getType().cast().getElementType().getIntOrFloatBitWidth(); + auto resource_type = + getElementTypeOrSelf(getOperand().getType()).cast(); + if (resource_type.getSubtypes().empty()) return {}; + return ConvertShapeToAttr(resource_type.getSubtypes()[0], width); +} + +//===----------------------------------------------------------------------===// +// WhileOp +//===----------------------------------------------------------------------===// + +static LogicalResult Verify(WhileOp op) { + auto module = op.getParentOfType(); + auto cond_fn = module.lookupSymbol(op.cond()); + auto body_fn = module.lookupSymbol(op.body()); + if (!cond_fn) { + return op.emitOpError("cond refers to an undefined function : ") + << op.cond(); + } + if (!body_fn) { + return op.emitOpError("body refers to an undefined function : ") + << op.body(); + } + + auto cond_fn_type = cond_fn.getType(); + auto body_fn_type = body_fn.getType(); + + // Verify that the cond function has exactly one result. + if (cond_fn_type.getNumResults() != 1) + return op.emitOpError("requires cond function to have exactly one result"); + + SmallVector operands(op.getOperandTypes()); + + // Collect all the type lists for the op so that different pairs of type lists + // can be compared for the compatibility. + constexpr int kNumTypeLists = 5; + const std::array>, kNumTypeLists> + type_lists = {{ + {"operand", operands}, + {"body function result", body_fn_type.getResults()}, + {"result", op.getResultTypes()}, + {"cond function input", cond_fn_type.getInputs()}, + {"body function input", body_fn_type.getInputs()}, + }}; + + // A pair of type lists should be cast compatible with each other if one is + // converted to the another for a function call or assignment or there is a + // common source of inputs for both. Therefore, the While op requires the + // following pairs of type lists to be cast compatible for the tensor_cast + // operation: + // + // * Operands and cond inputs to call the cond function before the + // first iteration. + // * Operands and body inputs to call the body function for the first + // iteration if the cond functions returns True or equivalent result. + // * Operands and results to assign cond function arguments to op results if + // the cond function returns False or equivalent result. + // * All three pairs using cond inputs, body inputs and results as operand is + // a common source for all three. + // * Body result and cond inputs to call the cond function for the subsequent + // iterations. Similarly, Body result should be compatible with body inputs + // and op results. + // + // Note that the operands and body results need not be compatible as they are + // never converted from one to the another nor there is a common source + // tensors. Compatibility requirement is not transitive. + + for (int i = 0; i < kNumTypeLists; ++i) { + // Skip the first pair as the While op operands and body function results + // does not need to be compatible with each other. + for (int j = std::max(2, i + 1); j < kNumTypeLists; ++j) { + auto &a = type_lists[i]; + auto &b = type_lists[j]; + + int a_size = a.second.size(); + if (a_size != b.second.size()) + return op.emitOpError( + llvm::formatv("requires the number of {0}s to be equal to the " + "number of {1}s. Found {2} and {3}, respectively", + a.first, b.first, a_size, b.second.size())); + + for (int idx = 0; idx < a_size; ++idx) { + auto a_type = a.second[idx]; + auto b_type = b.second[idx]; + + if (!AreCastCompatible({a_type, b_type})) + return op.emitError(llvm::formatv( + "{0} type {1} is incompatible with {2} type {3} at index {4}", + a.first, a_type, b.first, b_type, idx)); + } + } + } + return success(); +} + +//===----------------------------------------------------------------------===// +// WhileRegionOp +//===----------------------------------------------------------------------===// +static LogicalResult Verify(WhileRegionOp op) { + // Verify that the condition generates a single tensor result. + YieldOp yield = cast(op.cond().front().getTerminator()); + if (yield.getNumOperands() != 1) + return op.emitOpError() + << "condition should have a single tensor result"; + + auto cond_type = yield.getOperand(0).getType().dyn_cast(); + if (!cond_type || !cond_type.getShape().equals({}) || + !cond_type.getElementType().isInteger(/*width=*/1)) + return op.emitOpError() + << "condition should have a single tensor result"; + + // The body result types should match while op result types. + if (failed(VerifyRegionResults(op, op.body(), "body"))) return failure(); + + // Both condition and body should have same number and type of operands as + // the WhileRegion inputs. + const int num_inputs = op.getNumOperands(); + auto block_inputs_match_op_inputs = [&](Region ®ion, + StringRef name) -> LogicalResult { + Block &block = region.front(); + if (block.getNumArguments() != num_inputs) + return op.emitOpError() + << name << " should have same number of inputs (" << num_inputs + << ") as " << WhileRegionOp::getOperationName() << " but has " + << block.getNumArguments() << " inputs"; + + for (auto types_idx : llvm::enumerate( + llvm::zip(op.getOperandTypes(), block.getArgumentTypes()))) { + auto op_input_type = std::get<0>(types_idx.value()); + auto block_input_type = std::get<1>(types_idx.value()); + if (!AreCastCompatible({block_input_type, op_input_type})) + return op.emitOpError(llvm::formatv( + "{0} input type {1} is incompatible with {2} " + "input type {3} at index {4}", + name, block_input_type, WhileRegionOp::getOperationName(), + op_input_type, types_idx.index())); + } + return success(); + }; + + if (failed(block_inputs_match_op_inputs(op.cond(), "condition")) || + failed(block_inputs_match_op_inputs(op.body(), "body"))) + return failure(); + + return success(); +} + +//===----------------------------------------------------------------------===// +// WhileRegionOp LoopLikeOpInterface +//===----------------------------------------------------------------------===// + +Region &WhileRegionOp::getLoopBody() { return body(); } + +bool WhileRegionOp::isDefinedOutsideOfLoop(Value value) { + // If the Op defining the value exists and the defining op is outside the + // scope of this WhileRegion, then we can infer that its defined outside. + // The defining Op is outside the scope of this WhileRegion if this + // WhileRegionOp is not an ancestor of the defining op in the parent chain. + Operation *def_op = value.getDefiningOp(); + return def_op && !getOperation()->isAncestor(def_op); +} + +LogicalResult WhileRegionOp::moveOutOfLoop( + llvm::ArrayRef ops) { + // Move the hoisted value to just before the while. + Operation *while_op = this->getOperation(); + for (auto op : ops) op->moveBefore(while_op); + return success(); +} + +//===----------------------------------------------------------------------===// +// WhileRegionOp canonicalization +//===----------------------------------------------------------------------===// +namespace { +// Eliminate values that pass through the WhileRegionOp body. +struct WhileRegionEliminatePassThrough + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(WhileRegionOp while_op, + PatternRewriter &rewriter) const override { + // Replace values that simply passthrough the body with extern values. The + // block arguments of body and while match and so the corresponding cond + // argument can be easily found. + int old_num_operands = while_op.getNumOperands(); + int new_num_operands = old_num_operands; + auto &body_block = while_op.body().front(); + auto &cond_block = while_op.cond().front(); + auto &yield = *body_block.getTerminator(); + + // Bit mask indicating which operands will be removed. + SmallVector removed_operand(old_num_operands, false); + + for (int op_idx : llvm::seq(0, old_num_operands)) { + auto body_arg = body_block.getArgument(op_idx); + if (body_arg == yield.getOperand(op_idx)) { + // Replace the use of the passthrough value with the while operand + // in the body and condition regions, as well as the while output (if + // type match) + // TODO(jurahul): Use PatternRewriter API for IR modification. + auto value = while_op.getOperand(op_idx); + if (body_arg.getType() == value.getType()) + body_arg.replaceAllUsesWith(value); + + auto cond_arg = cond_block.getArgument(op_idx); + if (cond_arg.getType() == value.getType()) + cond_arg.replaceAllUsesWith(value); + + auto result = while_op.getResult(op_idx); + if (result.getType() == value.getType()) + result.replaceAllUsesWith(value); + } + + // Now check if the operand is unused in both regions as well as the + // result. If so, mark it for removal. + if (body_block.getArgument(op_idx).use_empty() && + cond_block.getArgument(op_idx).use_empty() && + while_op.getResult(op_idx).use_empty()) { + removed_operand[op_idx] = true; + new_num_operands--; + } + } + + if (new_num_operands == old_num_operands) return failure(); + + // Compress the operands, region arguments, and outputs. + SmallVector new_while_operands; + SmallVector new_result_types; + new_while_operands.reserve(new_num_operands); + new_result_types.reserve(new_num_operands); + + // Build new operands and result type. + int next_idx = 0; + for (int op_idx : llvm::seq(0, old_num_operands)) { + if (removed_operand[op_idx]) continue; + new_while_operands.push_back(while_op.getOperand(op_idx)); + new_result_types.push_back(while_op.getResult(op_idx).getType()); + next_idx++; + } + + // Create the new while operation. + auto new_while_op = + rewriter.create(while_op.getLoc(), new_result_types, + new_while_operands, while_op.getAttrs()); + + // Move region bodies to the new while. + rewriter.inlineRegionBefore(while_op.cond(), new_while_op.cond(), + new_while_op.cond().end()); + rewriter.inlineRegionBefore(while_op.body(), new_while_op.body(), + new_while_op.body().end()); + + auto &new_cond_block = new_while_op.cond().front(); + auto &new_body_block = new_while_op.body().front(); + auto &new_yield = *new_body_block.getTerminator(); + + // Build a vector of new results. Also patch up the region bodies and yield. + SmallVector new_results; + next_idx = 0; + for (int op_idx : llvm::seq(0, old_num_operands)) { + if (removed_operand[op_idx]) { + new_cond_block.eraseArgument(next_idx); + new_body_block.eraseArgument(next_idx); + new_yield.eraseOperand(next_idx); + new_results.push_back(nullptr); + } else { + new_results.push_back(new_while_op.getResult(next_idx++)); + } + } + + rewriter.replaceOp(while_op, new_results); + return success(); + } +}; + +} // anonymous namespace + +void WhileRegionOp::getCanonicalizationPatterns( + OwningRewritePatternList &results, MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// XdivyOp +//===----------------------------------------------------------------------===// + +void XdivyOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + +//===----------------------------------------------------------------------===// +// TableGen'd op method definitions +//===----------------------------------------------------------------------===// + +#define GET_OP_CLASSES +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc.inc" + +} // namespace TF +} // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h new file mode 100644 index 00000000000..b6e9222a370 --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h @@ -0,0 +1,51 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_OPS_N_Z_H_ +#define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_OPS_N_Z_H_ + +#include "mlir/Dialect/Traits.h" // from @llvm-project +#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/Dialect.h" // from @llvm-project +#include "mlir/IR/Matchers.h" // from @llvm-project +#include "mlir/IR/Module.h" // from @llvm-project +#include "mlir/IR/OpImplementation.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "mlir/IR/TypeUtilities.h" // from @llvm-project +#include "mlir/Interfaces/CallInterfaces.h" // from @llvm-project +#include "mlir/Interfaces/DerivedAttributeOpInterface.h" // from @llvm-project +#include "mlir/Interfaces/InferTypeOpInterface.h" // from @llvm-project +#include "mlir/Interfaces/LoopLikeInterface.h" // from @llvm-project +#include "mlir/Interfaces/SideEffectInterfaces.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_attributes.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_verifiers.h" + +namespace mlir { +namespace TF { + +#define GET_OP_FWD_DEFINES +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_all_ops.h.inc" +#define GET_OP_CLASSES +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h.inc" + +} // namespace TF +} // namespace mlir + +#endif // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_OPS_N_Z_H_ diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.cc new file mode 100644 index 00000000000..e87cc494a4a --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.cc @@ -0,0 +1,87 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/FormatVariadic.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project +#include "mlir/Dialect/Traits.h" // from @llvm-project +#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/Diagnostics.h" // from @llvm-project +#include "mlir/IR/DialectImplementation.h" // from @llvm-project +#include "mlir/IR/Function.h" // from @llvm-project +#include "mlir/IR/Identifier.h" // from @llvm-project +#include "mlir/IR/Location.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/Matchers.h" // from @llvm-project +#include "mlir/IR/OpDefinition.h" // from @llvm-project +#include "mlir/IR/OpImplementation.h" // from @llvm-project +#include "mlir/IR/PatternMatch.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "mlir/IR/TypeUtilities.h" // from @llvm-project +#include "mlir/IR/Types.h" // from @llvm-project +#include "mlir/IR/Value.h" // from @llvm-project +#include "mlir/Parser.h" // from @llvm-project +#include "mlir/Support/LLVM.h" // from @llvm-project +#include "mlir/Support/LogicalResult.h" // from @llvm-project +#include "mlir/Transforms/InliningUtils.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_attributes.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_side_effects.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/util/tensor_format.h" + +namespace mlir { +namespace TF { + +namespace { +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc" +#include "tensorflow/compiler/mlir/tensorflow/transforms/generated_canonicalize.inc" +} // namespace + +//===----------------------------------------------------------------------===// +// TableGen'd op method definitions +//===----------------------------------------------------------------------===// + +#define GET_OP_CLASSES +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.cc.inc" + +} // namespace TF +} // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.h new file mode 100644 index 00000000000..8586515edee --- /dev/null +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.h @@ -0,0 +1,50 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_REMAINING_OPS_H_ +#define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_REMAINING_OPS_H_ + +#include "mlir/Dialect/Traits.h" // from @llvm-project +#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/Matchers.h" // from @llvm-project +#include "mlir/IR/Module.h" // from @llvm-project +#include "mlir/IR/OpImplementation.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project +#include "mlir/IR/TypeUtilities.h" // from @llvm-project +#include "mlir/Interfaces/CallInterfaces.h" // from @llvm-project +#include "mlir/Interfaces/DerivedAttributeOpInterface.h" // from @llvm-project +#include "mlir/Interfaces/InferTypeOpInterface.h" // from @llvm-project +#include "mlir/Interfaces/LoopLikeInterface.h" // from @llvm-project +#include "mlir/Interfaces/SideEffectInterfaces.h" // from @llvm-project +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_attributes.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_op_interfaces.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h" +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_verifiers.h" + +namespace mlir { +namespace TF { + +#define GET_OP_FWD_DEFINES +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_all_ops.h.inc" +#define GET_OP_CLASSES +#include "tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.h.inc" + +} // namespace TF +} // namespace mlir + +#endif // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_REMAINING_OPS_H_ From f09611a4b86c65f1ab8843cd88a4077f64a1a67e Mon Sep 17 00:00:00 2001 From: Peng Wang Date: Wed, 15 Jul 2020 17:17:38 -0700 Subject: [PATCH 0567/2522] Exports tf-numpy symbols under `tf.experimental.numpy`. Golden files for `tf.experimental.numpy` are put under tensorflow/third_party/py/numpy/tf_numpy_api instead of the usual golden file location. PiperOrigin-RevId: 321469615 Change-Id: I3f20ce057e16db1438c853162474fc8fd19c2e2a --- RELEASE.md | 1 + tensorflow/python/__init__.py | 1 + .../ops/numpy_ops/integration_test/BUILD | 10 + .../integration_test/public_symbol_test.py | 38 ++ tensorflow/python/ops/numpy_ops/np_export.py | 14 +- .../tools/api/generator/api_init_files.bzl | 2 + .../golden/v2/tensorflow.experimental.pbtxt | 4 + tensorflow/tools/api/tests/BUILD | 1 + .../tools/api/tests/api_compatibility_test.py | 42 ++- third_party/py/numpy/tf_numpy_api/BUILD | 11 + .../tensorflow.experimental.numpy.bool_.pbtxt | 222 +++++++++++ ...orflow.experimental.numpy.complex128.pbtxt | 222 +++++++++++ ...sorflow.experimental.numpy.complex64.pbtxt | 222 +++++++++++ ...nsorflow.experimental.numpy.complex_.pbtxt | 222 +++++++++++ ...ensorflow.experimental.numpy.float16.pbtxt | 222 +++++++++++ ...ensorflow.experimental.numpy.float32.pbtxt | 222 +++++++++++ ...ensorflow.experimental.numpy.float64.pbtxt | 234 ++++++++++++ ...tensorflow.experimental.numpy.float_.pbtxt | 234 ++++++++++++ ...ensorflow.experimental.numpy.inexact.pbtxt | 222 +++++++++++ .../tensorflow.experimental.numpy.int16.pbtxt | 230 ++++++++++++ .../tensorflow.experimental.numpy.int32.pbtxt | 230 ++++++++++++ .../tensorflow.experimental.numpy.int64.pbtxt | 230 ++++++++++++ .../tensorflow.experimental.numpy.int8.pbtxt | 230 ++++++++++++ .../tensorflow.experimental.numpy.int_.pbtxt | 230 ++++++++++++ ...nsorflow.experimental.numpy.ndarray.pbtxt} | 2 +- ...ensorflow.experimental.numpy.object_.pbtxt | 222 +++++++++++ ...xt => tensorflow.experimental.numpy.pbtxt} | 8 +- ...ensorflow.experimental.numpy.random.pbtxt} | 2 +- ...ensorflow.experimental.numpy.string_.pbtxt | 339 +++++++++++++++++ ...tensorflow.experimental.numpy.uint16.pbtxt | 230 ++++++++++++ ...tensorflow.experimental.numpy.uint32.pbtxt | 230 ++++++++++++ ...tensorflow.experimental.numpy.uint64.pbtxt | 230 ++++++++++++ .../tensorflow.experimental.numpy.uint8.pbtxt | 230 ++++++++++++ ...nsorflow.experimental.numpy.unicode_.pbtxt | 354 ++++++++++++++++++ 34 files changed, 5115 insertions(+), 28 deletions(-) create mode 100644 tensorflow/python/ops/numpy_ops/integration_test/BUILD create mode 100644 tensorflow/python/ops/numpy_ops/integration_test/public_symbol_test.py create mode 100644 third_party/py/numpy/tf_numpy_api/BUILD create mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.bool_.pbtxt create mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex128.pbtxt create mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex64.pbtxt create mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex_.pbtxt create mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float16.pbtxt create mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float32.pbtxt create mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float64.pbtxt create mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float_.pbtxt create mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.inexact.pbtxt create mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int16.pbtxt create mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int32.pbtxt create mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int64.pbtxt create mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int8.pbtxt create mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int_.pbtxt rename third_party/py/numpy/tf_numpy_api/{numpy_ops.ndarray.pbtxt => tensorflow.experimental.numpy.ndarray.pbtxt} (97%) create mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.object_.pbtxt rename third_party/py/numpy/tf_numpy_api/{numpy_ops.pbtxt => tensorflow.experimental.numpy.pbtxt} (98%) rename third_party/py/numpy/tf_numpy_api/{numpy_ops.random.pbtxt => tensorflow.experimental.numpy.random.pbtxt} (94%) create mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.string_.pbtxt create mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint16.pbtxt create mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint32.pbtxt create mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint64.pbtxt create mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint8.pbtxt create mode 100644 third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.unicode_.pbtxt diff --git a/RELEASE.md b/RELEASE.md index 150c7077349..d68eca00f44 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -21,6 +21,7 @@ * * +* A new module named `tf.experimental.numpy` is added, which is a NumPy-compatible API for writing TF programs. This module provides class `ndarray`, which mimics the `ndarray` class in NumPy, and wraps an immutable `tf.Tensor` under the hood. A subset of NumPy functions (e.g. `numpy.add`) are provided. Their inter-operation with TF facilities is seamless in most cases. See tensorflow/python/ops/numpy_ops/README.md for details of what are supported and what are the differences with NumPy. ## Bug Fixes and Other Changes diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py index 698a0d120c1..b5acf23ba79 100644 --- a/tensorflow/python/__init__.py +++ b/tensorflow/python/__init__.py @@ -56,6 +56,7 @@ from tensorflow.python.ops import image_ops as image from tensorflow.python.ops import manip_ops as manip from tensorflow.python.ops import metrics from tensorflow.python.ops import nn +from tensorflow.python.ops import numpy_ops from tensorflow.python.ops import ragged from tensorflow.python.ops import sets from tensorflow.python.ops import stateful_random_ops diff --git a/tensorflow/python/ops/numpy_ops/integration_test/BUILD b/tensorflow/python/ops/numpy_ops/integration_test/BUILD new file mode 100644 index 00000000000..05162a4e26a --- /dev/null +++ b/tensorflow/python/ops/numpy_ops/integration_test/BUILD @@ -0,0 +1,10 @@ +licenses(["notice"]) + +py_test( + name = "public_symbol_test", + srcs = ["public_symbol_test.py"], + python_version = "PY3", + deps = [ + "//tensorflow:tensorflow_py", + ], +) diff --git a/tensorflow/python/ops/numpy_ops/integration_test/public_symbol_test.py b/tensorflow/python/ops/numpy_ops/integration_test/public_symbol_test.py new file mode 100644 index 00000000000..f0c41cd21e5 --- /dev/null +++ b/tensorflow/python/ops/numpy_ops/integration_test/public_symbol_test.py @@ -0,0 +1,38 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests using module `tf.experimental.numpy` via an alias.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as onp +import tensorflow as tf + + +np = tf.experimental.numpy + + +class PublicSymbolTest(tf.test.TestCase): + + def testSimple(self): + a = 0.1 + b = 0.2 + self.assertAllClose(onp.add(a, b), np.add(a, b)) + + +if __name__ == "__main__": + tf.compat.v1.enable_eager_execution() + tf.test.main() diff --git a/tensorflow/python/ops/numpy_ops/np_export.py b/tensorflow/python/ops/numpy_ops/np_export.py index b431db54d58..7a6424cbc77 100644 --- a/tensorflow/python/ops/numpy_ops/np_export.py +++ b/tensorflow/python/ops/numpy_ops/np_export.py @@ -18,23 +18,17 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.util import tf_export + def public_name(np_fun_name): return "experimental.numpy." + np_fun_name def np_export(np_fun_name): - # TODO(wangpeng): Remove the following two lines and do actual exporting using - # this: - # return tf_export.tf_export(public_name(np_fun_name), v1=[]) - del np_fun_name - return lambda f: f + return tf_export.tf_export(public_name(np_fun_name), v1=[]) def np_export_constant(module_name, name, value): - # TODO(wangpeng): Remove the following two lines and do actual exporting using - # this: - # np_export(name).export_constant(module_name, name) - del module_name - del name + np_export(name).export_constant(module_name, name) return value diff --git a/tensorflow/python/tools/api/generator/api_init_files.bzl b/tensorflow/python/tools/api/generator/api_init_files.bzl index 6f1a826c315..d2770e92b2e 100644 --- a/tensorflow/python/tools/api/generator/api_init_files.bzl +++ b/tensorflow/python/tools/api/generator/api_init_files.bzl @@ -26,6 +26,8 @@ TENSORFLOW_API_INIT_FILES = [ "dtypes/__init__.py", "errors/__init__.py", "experimental/__init__.py", + "experimental/numpy/__init__.py", + "experimental/numpy/random/__init__.py", "experimental/tensorrt/__init__.py", "experimental/dlpack/__init__.py", "feature_column/__init__.py", diff --git a/tensorflow/tools/api/golden/v2/tensorflow.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.experimental.pbtxt index 95e06075952..58384846276 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.experimental.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.experimental.pbtxt @@ -8,6 +8,10 @@ tf_module { name: "dlpack" mtype: "" } + member { + name: "numpy" + mtype: "" + } member { name: "tensorrt" mtype: "" diff --git a/tensorflow/tools/api/tests/BUILD b/tensorflow/tools/api/tests/BUILD index 8a2880bcb64..8ad25045941 100644 --- a/tensorflow/tools/api/tests/BUILD +++ b/tensorflow/tools/api/tests/BUILD @@ -24,6 +24,7 @@ py_test( "//tensorflow/tools/api/golden:api_golden_v2", "//tensorflow/tools/api/tests:API_UPDATE_WARNING.txt", "//tensorflow/tools/api/tests:README.txt", + "//third_party/py/numpy/tf_numpy_api:api_golden", ], python_version = "PY3", srcs_version = "PY2AND3", diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py index 67957899214..aeae307be35 100644 --- a/tensorflow/tools/api/tests/api_compatibility_test.py +++ b/tensorflow/tools/api/tests/api_compatibility_test.py @@ -132,6 +132,13 @@ def _KeyToFilePath(key, api_version): six.ensure_str(key)) api_folder = ( _API_GOLDEN_FOLDER_V2 if api_version == 2 else _API_GOLDEN_FOLDER_V1) + if key.startswith('tensorflow.experimental.numpy'): + # Jumps up one more level in order to let Copybara find the + # 'tensorflow/third_party' string to replace + api_folder = os.path.join( + api_folder, '..', '..', '..', '..', '../third_party', + 'py', 'numpy', 'tf_numpy_api') + api_folder = os.path.normpath(api_folder) return os.path.join(api_folder, '%s.pbtxt' % case_insensitive_key) @@ -199,6 +206,12 @@ def _FilterGoldenProtoDict(golden_proto_dict, omit_golden_symbols_map): return filtered_proto_dict +def _GetTFNumpyGoldenPattern(api_version): + return os.path.join(resource_loader.get_root_dir_with_all_resources(), + _KeyToFilePath('tensorflow.experimental.numpy*', + api_version)) + + class ApiCompatibilityTest(test.TestCase): def __init__(self, *args, **kwargs): @@ -336,7 +349,7 @@ class ApiCompatibilityTest(test.TestCase): def _checkBackwardsCompatibility(self, root, - golden_file_pattern, + golden_file_patterns, api_version, additional_private_map=None, omit_golden_symbols_map=None): @@ -349,6 +362,9 @@ class ApiCompatibilityTest(test.TestCase): public_api_visitor.private_map['tf'].append('enable_v2_behavior') public_api_visitor.do_not_descend_map['tf.GPUOptions'] = ['Experimental'] + # Do not descend into `iinfo` because np.iinfo's signature is different + # between internal and OSS. + public_api_visitor.do_not_descend_map['tf.experimental.numpy'] = ['iinfo'] if FLAGS.only_test_core_api: public_api_visitor.do_not_descend_map['tf'].extend(_NON_CORE_PACKAGES) if additional_private_map: @@ -358,7 +374,7 @@ class ApiCompatibilityTest(test.TestCase): proto_dict = visitor.GetProtos() # Read all golden files. - golden_file_list = file_io.get_matching_files(golden_file_pattern) + golden_file_list = file_io.get_matching_files(golden_file_patterns) if FLAGS.only_test_core_api: golden_file_list = _FilterNonCoreGoldenFiles(golden_file_list) @@ -388,9 +404,10 @@ class ApiCompatibilityTest(test.TestCase): api_version = 1 if hasattr(tf, '_major_api_version') and tf._major_api_version == 2: api_version = 2 - golden_file_pattern = os.path.join( - resource_loader.get_root_dir_with_all_resources(), - _KeyToFilePath('*', api_version)) + golden_file_patterns = [ + os.path.join(resource_loader.get_root_dir_with_all_resources(), + _KeyToFilePath('*', api_version)), + _GetTFNumpyGoldenPattern(api_version)] omit_golden_symbols_map = {} if (api_version == 2 and FLAGS.only_test_core_api and not _TENSORBOARD_AVAILABLE): @@ -401,7 +418,7 @@ class ApiCompatibilityTest(test.TestCase): self._checkBackwardsCompatibility( tf, - golden_file_pattern, + golden_file_patterns, api_version, # Skip compat.v1 and compat.v2 since they are validated # in separate tests. @@ -413,12 +430,12 @@ class ApiCompatibilityTest(test.TestCase): def testAPIBackwardsCompatibilityV1(self): api_version = 1 - golden_file_pattern = os.path.join( + golden_file_patterns = os.path.join( resource_loader.get_root_dir_with_all_resources(), _KeyToFilePath('*', api_version)) self._checkBackwardsCompatibility( tf.compat.v1, - golden_file_pattern, + golden_file_patterns, api_version, additional_private_map={ 'tf': ['pywrap_tensorflow'], @@ -428,9 +445,10 @@ class ApiCompatibilityTest(test.TestCase): def testAPIBackwardsCompatibilityV2(self): api_version = 2 - golden_file_pattern = os.path.join( - resource_loader.get_root_dir_with_all_resources(), - _KeyToFilePath('*', api_version)) + golden_file_patterns = [ + os.path.join(resource_loader.get_root_dir_with_all_resources(), + _KeyToFilePath('*', api_version)), + _GetTFNumpyGoldenPattern(api_version)] omit_golden_symbols_map = {} if FLAGS.only_test_core_api and not _TENSORBOARD_AVAILABLE: # In TF 2.0 these summary symbols are imported from TensorBoard. @@ -439,7 +457,7 @@ class ApiCompatibilityTest(test.TestCase): ] self._checkBackwardsCompatibility( tf.compat.v2, - golden_file_pattern, + golden_file_patterns, api_version, additional_private_map={'tf.compat': ['v1', 'v2']}, omit_golden_symbols_map=omit_golden_symbols_map) diff --git a/third_party/py/numpy/tf_numpy_api/BUILD b/third_party/py/numpy/tf_numpy_api/BUILD new file mode 100644 index 00000000000..fc95ad62e40 --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/BUILD @@ -0,0 +1,11 @@ +# TensorFlow API backwards compatibility test goldens for tf.experimental.numpy. + +package( + default_visibility = ["//visibility:public"], + licenses = ["notice"], +) + +filegroup( + name = "api_golden", + srcs = glob(["*.pbtxt"]), +) diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.bool_.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.bool_.pbtxt new file mode 100644 index 00000000000..cfca8fc9ab8 --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.bool_.pbtxt @@ -0,0 +1,222 @@ +path: "tensorflow.experimental.numpy.bool_" +tf_class { + is_instance: "" + member { + name: "T" + mtype: "" + } + member { + name: "base" + mtype: "" + } + member { + name: "data" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "flags" + mtype: "" + } + member { + name: "flat" + mtype: "" + } + member { + name: "imag" + mtype: "" + } + member { + name: "itemsize" + mtype: "" + } + member { + name: "nbytes" + mtype: "" + } + member { + name: "ndim" + mtype: "" + } + member { + name: "real" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "size" + mtype: "" + } + member { + name: "strides" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "all" + } + member_method { + name: "any" + } + member_method { + name: "argmax" + } + member_method { + name: "argmin" + } + member_method { + name: "argsort" + } + member_method { + name: "astype" + } + member_method { + name: "byteswap" + } + member_method { + name: "choose" + } + member_method { + name: "clip" + } + member_method { + name: "compress" + } + member_method { + name: "conj" + } + member_method { + name: "conjugate" + } + member_method { + name: "copy" + } + member_method { + name: "cumprod" + } + member_method { + name: "cumsum" + } + member_method { + name: "diagonal" + } + member_method { + name: "dump" + } + member_method { + name: "dumps" + } + member_method { + name: "fill" + } + member_method { + name: "flatten" + } + member_method { + name: "getfield" + } + member_method { + name: "item" + } + member_method { + name: "itemset" + } + member_method { + name: "max" + } + member_method { + name: "mean" + } + member_method { + name: "min" + } + member_method { + name: "newbyteorder" + } + member_method { + name: "nonzero" + } + member_method { + name: "prod" + } + member_method { + name: "ptp" + } + member_method { + name: "put" + } + member_method { + name: "ravel" + } + member_method { + name: "repeat" + } + member_method { + name: "reshape" + } + member_method { + name: "resize" + } + member_method { + name: "round" + } + member_method { + name: "searchsorted" + } + member_method { + name: "setfield" + } + member_method { + name: "setflags" + } + member_method { + name: "sort" + } + member_method { + name: "squeeze" + } + member_method { + name: "std" + } + member_method { + name: "sum" + } + member_method { + name: "swapaxes" + } + member_method { + name: "take" + } + member_method { + name: "tobytes" + } + member_method { + name: "tofile" + } + member_method { + name: "tolist" + } + member_method { + name: "tostring" + } + member_method { + name: "trace" + } + member_method { + name: "transpose" + } + member_method { + name: "var" + } + member_method { + name: "view" + } +} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex128.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex128.pbtxt new file mode 100644 index 00000000000..65ba7b24e02 --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex128.pbtxt @@ -0,0 +1,222 @@ +path: "tensorflow.experimental.numpy.complex128" +tf_class { + is_instance: "" + member { + name: "T" + mtype: "" + } + member { + name: "base" + mtype: "" + } + member { + name: "data" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "flags" + mtype: "" + } + member { + name: "flat" + mtype: "" + } + member { + name: "imag" + mtype: "" + } + member { + name: "itemsize" + mtype: "" + } + member { + name: "nbytes" + mtype: "" + } + member { + name: "ndim" + mtype: "" + } + member { + name: "real" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "size" + mtype: "" + } + member { + name: "strides" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "all" + } + member_method { + name: "any" + } + member_method { + name: "argmax" + } + member_method { + name: "argmin" + } + member_method { + name: "argsort" + } + member_method { + name: "astype" + } + member_method { + name: "byteswap" + } + member_method { + name: "choose" + } + member_method { + name: "clip" + } + member_method { + name: "compress" + } + member_method { + name: "conj" + } + member_method { + name: "conjugate" + } + member_method { + name: "copy" + } + member_method { + name: "cumprod" + } + member_method { + name: "cumsum" + } + member_method { + name: "diagonal" + } + member_method { + name: "dump" + } + member_method { + name: "dumps" + } + member_method { + name: "fill" + } + member_method { + name: "flatten" + } + member_method { + name: "getfield" + } + member_method { + name: "item" + } + member_method { + name: "itemset" + } + member_method { + name: "max" + } + member_method { + name: "mean" + } + member_method { + name: "min" + } + member_method { + name: "newbyteorder" + } + member_method { + name: "nonzero" + } + member_method { + name: "prod" + } + member_method { + name: "ptp" + } + member_method { + name: "put" + } + member_method { + name: "ravel" + } + member_method { + name: "repeat" + } + member_method { + name: "reshape" + } + member_method { + name: "resize" + } + member_method { + name: "round" + } + member_method { + name: "searchsorted" + } + member_method { + name: "setfield" + } + member_method { + name: "setflags" + } + member_method { + name: "sort" + } + member_method { + name: "squeeze" + } + member_method { + name: "std" + } + member_method { + name: "sum" + } + member_method { + name: "swapaxes" + } + member_method { + name: "take" + } + member_method { + name: "tobytes" + } + member_method { + name: "tofile" + } + member_method { + name: "tolist" + } + member_method { + name: "tostring" + } + member_method { + name: "trace" + } + member_method { + name: "transpose" + } + member_method { + name: "var" + } + member_method { + name: "view" + } +} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex64.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex64.pbtxt new file mode 100644 index 00000000000..f685e1a218c --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex64.pbtxt @@ -0,0 +1,222 @@ +path: "tensorflow.experimental.numpy.complex64" +tf_class { + is_instance: "" + member { + name: "T" + mtype: "" + } + member { + name: "base" + mtype: "" + } + member { + name: "data" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "flags" + mtype: "" + } + member { + name: "flat" + mtype: "" + } + member { + name: "imag" + mtype: "" + } + member { + name: "itemsize" + mtype: "" + } + member { + name: "nbytes" + mtype: "" + } + member { + name: "ndim" + mtype: "" + } + member { + name: "real" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "size" + mtype: "" + } + member { + name: "strides" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "all" + } + member_method { + name: "any" + } + member_method { + name: "argmax" + } + member_method { + name: "argmin" + } + member_method { + name: "argsort" + } + member_method { + name: "astype" + } + member_method { + name: "byteswap" + } + member_method { + name: "choose" + } + member_method { + name: "clip" + } + member_method { + name: "compress" + } + member_method { + name: "conj" + } + member_method { + name: "conjugate" + } + member_method { + name: "copy" + } + member_method { + name: "cumprod" + } + member_method { + name: "cumsum" + } + member_method { + name: "diagonal" + } + member_method { + name: "dump" + } + member_method { + name: "dumps" + } + member_method { + name: "fill" + } + member_method { + name: "flatten" + } + member_method { + name: "getfield" + } + member_method { + name: "item" + } + member_method { + name: "itemset" + } + member_method { + name: "max" + } + member_method { + name: "mean" + } + member_method { + name: "min" + } + member_method { + name: "newbyteorder" + } + member_method { + name: "nonzero" + } + member_method { + name: "prod" + } + member_method { + name: "ptp" + } + member_method { + name: "put" + } + member_method { + name: "ravel" + } + member_method { + name: "repeat" + } + member_method { + name: "reshape" + } + member_method { + name: "resize" + } + member_method { + name: "round" + } + member_method { + name: "searchsorted" + } + member_method { + name: "setfield" + } + member_method { + name: "setflags" + } + member_method { + name: "sort" + } + member_method { + name: "squeeze" + } + member_method { + name: "std" + } + member_method { + name: "sum" + } + member_method { + name: "swapaxes" + } + member_method { + name: "take" + } + member_method { + name: "tobytes" + } + member_method { + name: "tofile" + } + member_method { + name: "tolist" + } + member_method { + name: "tostring" + } + member_method { + name: "trace" + } + member_method { + name: "transpose" + } + member_method { + name: "var" + } + member_method { + name: "view" + } +} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex_.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex_.pbtxt new file mode 100644 index 00000000000..99b2b699b45 --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.complex_.pbtxt @@ -0,0 +1,222 @@ +path: "tensorflow.experimental.numpy.complex_" +tf_class { + is_instance: "" + member { + name: "T" + mtype: "" + } + member { + name: "base" + mtype: "" + } + member { + name: "data" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "flags" + mtype: "" + } + member { + name: "flat" + mtype: "" + } + member { + name: "imag" + mtype: "" + } + member { + name: "itemsize" + mtype: "" + } + member { + name: "nbytes" + mtype: "" + } + member { + name: "ndim" + mtype: "" + } + member { + name: "real" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "size" + mtype: "" + } + member { + name: "strides" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "all" + } + member_method { + name: "any" + } + member_method { + name: "argmax" + } + member_method { + name: "argmin" + } + member_method { + name: "argsort" + } + member_method { + name: "astype" + } + member_method { + name: "byteswap" + } + member_method { + name: "choose" + } + member_method { + name: "clip" + } + member_method { + name: "compress" + } + member_method { + name: "conj" + } + member_method { + name: "conjugate" + } + member_method { + name: "copy" + } + member_method { + name: "cumprod" + } + member_method { + name: "cumsum" + } + member_method { + name: "diagonal" + } + member_method { + name: "dump" + } + member_method { + name: "dumps" + } + member_method { + name: "fill" + } + member_method { + name: "flatten" + } + member_method { + name: "getfield" + } + member_method { + name: "item" + } + member_method { + name: "itemset" + } + member_method { + name: "max" + } + member_method { + name: "mean" + } + member_method { + name: "min" + } + member_method { + name: "newbyteorder" + } + member_method { + name: "nonzero" + } + member_method { + name: "prod" + } + member_method { + name: "ptp" + } + member_method { + name: "put" + } + member_method { + name: "ravel" + } + member_method { + name: "repeat" + } + member_method { + name: "reshape" + } + member_method { + name: "resize" + } + member_method { + name: "round" + } + member_method { + name: "searchsorted" + } + member_method { + name: "setfield" + } + member_method { + name: "setflags" + } + member_method { + name: "sort" + } + member_method { + name: "squeeze" + } + member_method { + name: "std" + } + member_method { + name: "sum" + } + member_method { + name: "swapaxes" + } + member_method { + name: "take" + } + member_method { + name: "tobytes" + } + member_method { + name: "tofile" + } + member_method { + name: "tolist" + } + member_method { + name: "tostring" + } + member_method { + name: "trace" + } + member_method { + name: "transpose" + } + member_method { + name: "var" + } + member_method { + name: "view" + } +} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float16.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float16.pbtxt new file mode 100644 index 00000000000..92075608841 --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float16.pbtxt @@ -0,0 +1,222 @@ +path: "tensorflow.experimental.numpy.float16" +tf_class { + is_instance: "" + member { + name: "T" + mtype: "" + } + member { + name: "base" + mtype: "" + } + member { + name: "data" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "flags" + mtype: "" + } + member { + name: "flat" + mtype: "" + } + member { + name: "imag" + mtype: "" + } + member { + name: "itemsize" + mtype: "" + } + member { + name: "nbytes" + mtype: "" + } + member { + name: "ndim" + mtype: "" + } + member { + name: "real" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "size" + mtype: "" + } + member { + name: "strides" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "all" + } + member_method { + name: "any" + } + member_method { + name: "argmax" + } + member_method { + name: "argmin" + } + member_method { + name: "argsort" + } + member_method { + name: "astype" + } + member_method { + name: "byteswap" + } + member_method { + name: "choose" + } + member_method { + name: "clip" + } + member_method { + name: "compress" + } + member_method { + name: "conj" + } + member_method { + name: "conjugate" + } + member_method { + name: "copy" + } + member_method { + name: "cumprod" + } + member_method { + name: "cumsum" + } + member_method { + name: "diagonal" + } + member_method { + name: "dump" + } + member_method { + name: "dumps" + } + member_method { + name: "fill" + } + member_method { + name: "flatten" + } + member_method { + name: "getfield" + } + member_method { + name: "item" + } + member_method { + name: "itemset" + } + member_method { + name: "max" + } + member_method { + name: "mean" + } + member_method { + name: "min" + } + member_method { + name: "newbyteorder" + } + member_method { + name: "nonzero" + } + member_method { + name: "prod" + } + member_method { + name: "ptp" + } + member_method { + name: "put" + } + member_method { + name: "ravel" + } + member_method { + name: "repeat" + } + member_method { + name: "reshape" + } + member_method { + name: "resize" + } + member_method { + name: "round" + } + member_method { + name: "searchsorted" + } + member_method { + name: "setfield" + } + member_method { + name: "setflags" + } + member_method { + name: "sort" + } + member_method { + name: "squeeze" + } + member_method { + name: "std" + } + member_method { + name: "sum" + } + member_method { + name: "swapaxes" + } + member_method { + name: "take" + } + member_method { + name: "tobytes" + } + member_method { + name: "tofile" + } + member_method { + name: "tolist" + } + member_method { + name: "tostring" + } + member_method { + name: "trace" + } + member_method { + name: "transpose" + } + member_method { + name: "var" + } + member_method { + name: "view" + } +} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float32.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float32.pbtxt new file mode 100644 index 00000000000..8c6c2a7eeac --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float32.pbtxt @@ -0,0 +1,222 @@ +path: "tensorflow.experimental.numpy.float32" +tf_class { + is_instance: "" + member { + name: "T" + mtype: "" + } + member { + name: "base" + mtype: "" + } + member { + name: "data" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "flags" + mtype: "" + } + member { + name: "flat" + mtype: "" + } + member { + name: "imag" + mtype: "" + } + member { + name: "itemsize" + mtype: "" + } + member { + name: "nbytes" + mtype: "" + } + member { + name: "ndim" + mtype: "" + } + member { + name: "real" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "size" + mtype: "" + } + member { + name: "strides" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "all" + } + member_method { + name: "any" + } + member_method { + name: "argmax" + } + member_method { + name: "argmin" + } + member_method { + name: "argsort" + } + member_method { + name: "astype" + } + member_method { + name: "byteswap" + } + member_method { + name: "choose" + } + member_method { + name: "clip" + } + member_method { + name: "compress" + } + member_method { + name: "conj" + } + member_method { + name: "conjugate" + } + member_method { + name: "copy" + } + member_method { + name: "cumprod" + } + member_method { + name: "cumsum" + } + member_method { + name: "diagonal" + } + member_method { + name: "dump" + } + member_method { + name: "dumps" + } + member_method { + name: "fill" + } + member_method { + name: "flatten" + } + member_method { + name: "getfield" + } + member_method { + name: "item" + } + member_method { + name: "itemset" + } + member_method { + name: "max" + } + member_method { + name: "mean" + } + member_method { + name: "min" + } + member_method { + name: "newbyteorder" + } + member_method { + name: "nonzero" + } + member_method { + name: "prod" + } + member_method { + name: "ptp" + } + member_method { + name: "put" + } + member_method { + name: "ravel" + } + member_method { + name: "repeat" + } + member_method { + name: "reshape" + } + member_method { + name: "resize" + } + member_method { + name: "round" + } + member_method { + name: "searchsorted" + } + member_method { + name: "setfield" + } + member_method { + name: "setflags" + } + member_method { + name: "sort" + } + member_method { + name: "squeeze" + } + member_method { + name: "std" + } + member_method { + name: "sum" + } + member_method { + name: "swapaxes" + } + member_method { + name: "take" + } + member_method { + name: "tobytes" + } + member_method { + name: "tofile" + } + member_method { + name: "tolist" + } + member_method { + name: "tostring" + } + member_method { + name: "trace" + } + member_method { + name: "transpose" + } + member_method { + name: "var" + } + member_method { + name: "view" + } +} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float64.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float64.pbtxt new file mode 100644 index 00000000000..44e6fa28648 --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float64.pbtxt @@ -0,0 +1,234 @@ +path: "tensorflow.experimental.numpy.float64" +tf_class { + is_instance: "" + member { + name: "T" + mtype: "" + } + member { + name: "base" + mtype: "" + } + member { + name: "data" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "flags" + mtype: "" + } + member { + name: "flat" + mtype: "" + } + member { + name: "imag" + mtype: "" + } + member { + name: "itemsize" + mtype: "" + } + member { + name: "nbytes" + mtype: "" + } + member { + name: "ndim" + mtype: "" + } + member { + name: "real" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "size" + mtype: "" + } + member { + name: "strides" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "all" + } + member_method { + name: "any" + } + member_method { + name: "argmax" + } + member_method { + name: "argmin" + } + member_method { + name: "argsort" + } + member_method { + name: "as_integer_ratio" + } + member_method { + name: "astype" + } + member_method { + name: "byteswap" + } + member_method { + name: "choose" + } + member_method { + name: "clip" + } + member_method { + name: "compress" + } + member_method { + name: "conj" + } + member_method { + name: "conjugate" + } + member_method { + name: "copy" + } + member_method { + name: "cumprod" + } + member_method { + name: "cumsum" + } + member_method { + name: "diagonal" + } + member_method { + name: "dump" + } + member_method { + name: "dumps" + } + member_method { + name: "fill" + } + member_method { + name: "flatten" + } + member_method { + name: "fromhex" + } + member_method { + name: "getfield" + } + member_method { + name: "hex" + } + member_method { + name: "is_integer" + } + member_method { + name: "item" + } + member_method { + name: "itemset" + } + member_method { + name: "max" + } + member_method { + name: "mean" + } + member_method { + name: "min" + } + member_method { + name: "newbyteorder" + } + member_method { + name: "nonzero" + } + member_method { + name: "prod" + } + member_method { + name: "ptp" + } + member_method { + name: "put" + } + member_method { + name: "ravel" + } + member_method { + name: "repeat" + } + member_method { + name: "reshape" + } + member_method { + name: "resize" + } + member_method { + name: "round" + } + member_method { + name: "searchsorted" + } + member_method { + name: "setfield" + } + member_method { + name: "setflags" + } + member_method { + name: "sort" + } + member_method { + name: "squeeze" + } + member_method { + name: "std" + } + member_method { + name: "sum" + } + member_method { + name: "swapaxes" + } + member_method { + name: "take" + } + member_method { + name: "tobytes" + } + member_method { + name: "tofile" + } + member_method { + name: "tolist" + } + member_method { + name: "tostring" + } + member_method { + name: "trace" + } + member_method { + name: "transpose" + } + member_method { + name: "var" + } + member_method { + name: "view" + } +} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float_.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float_.pbtxt new file mode 100644 index 00000000000..781e79962ea --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.float_.pbtxt @@ -0,0 +1,234 @@ +path: "tensorflow.experimental.numpy.float_" +tf_class { + is_instance: "" + member { + name: "T" + mtype: "" + } + member { + name: "base" + mtype: "" + } + member { + name: "data" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "flags" + mtype: "" + } + member { + name: "flat" + mtype: "" + } + member { + name: "imag" + mtype: "" + } + member { + name: "itemsize" + mtype: "" + } + member { + name: "nbytes" + mtype: "" + } + member { + name: "ndim" + mtype: "" + } + member { + name: "real" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "size" + mtype: "" + } + member { + name: "strides" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "all" + } + member_method { + name: "any" + } + member_method { + name: "argmax" + } + member_method { + name: "argmin" + } + member_method { + name: "argsort" + } + member_method { + name: "as_integer_ratio" + } + member_method { + name: "astype" + } + member_method { + name: "byteswap" + } + member_method { + name: "choose" + } + member_method { + name: "clip" + } + member_method { + name: "compress" + } + member_method { + name: "conj" + } + member_method { + name: "conjugate" + } + member_method { + name: "copy" + } + member_method { + name: "cumprod" + } + member_method { + name: "cumsum" + } + member_method { + name: "diagonal" + } + member_method { + name: "dump" + } + member_method { + name: "dumps" + } + member_method { + name: "fill" + } + member_method { + name: "flatten" + } + member_method { + name: "fromhex" + } + member_method { + name: "getfield" + } + member_method { + name: "hex" + } + member_method { + name: "is_integer" + } + member_method { + name: "item" + } + member_method { + name: "itemset" + } + member_method { + name: "max" + } + member_method { + name: "mean" + } + member_method { + name: "min" + } + member_method { + name: "newbyteorder" + } + member_method { + name: "nonzero" + } + member_method { + name: "prod" + } + member_method { + name: "ptp" + } + member_method { + name: "put" + } + member_method { + name: "ravel" + } + member_method { + name: "repeat" + } + member_method { + name: "reshape" + } + member_method { + name: "resize" + } + member_method { + name: "round" + } + member_method { + name: "searchsorted" + } + member_method { + name: "setfield" + } + member_method { + name: "setflags" + } + member_method { + name: "sort" + } + member_method { + name: "squeeze" + } + member_method { + name: "std" + } + member_method { + name: "sum" + } + member_method { + name: "swapaxes" + } + member_method { + name: "take" + } + member_method { + name: "tobytes" + } + member_method { + name: "tofile" + } + member_method { + name: "tolist" + } + member_method { + name: "tostring" + } + member_method { + name: "trace" + } + member_method { + name: "transpose" + } + member_method { + name: "var" + } + member_method { + name: "view" + } +} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.inexact.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.inexact.pbtxt new file mode 100644 index 00000000000..3aa95703b61 --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.inexact.pbtxt @@ -0,0 +1,222 @@ +path: "tensorflow.experimental.numpy.inexact" +tf_class { + is_instance: "" + member { + name: "T" + mtype: "" + } + member { + name: "base" + mtype: "" + } + member { + name: "data" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "flags" + mtype: "" + } + member { + name: "flat" + mtype: "" + } + member { + name: "imag" + mtype: "" + } + member { + name: "itemsize" + mtype: "" + } + member { + name: "nbytes" + mtype: "" + } + member { + name: "ndim" + mtype: "" + } + member { + name: "real" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "size" + mtype: "" + } + member { + name: "strides" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "all" + } + member_method { + name: "any" + } + member_method { + name: "argmax" + } + member_method { + name: "argmin" + } + member_method { + name: "argsort" + } + member_method { + name: "astype" + } + member_method { + name: "byteswap" + } + member_method { + name: "choose" + } + member_method { + name: "clip" + } + member_method { + name: "compress" + } + member_method { + name: "conj" + } + member_method { + name: "conjugate" + } + member_method { + name: "copy" + } + member_method { + name: "cumprod" + } + member_method { + name: "cumsum" + } + member_method { + name: "diagonal" + } + member_method { + name: "dump" + } + member_method { + name: "dumps" + } + member_method { + name: "fill" + } + member_method { + name: "flatten" + } + member_method { + name: "getfield" + } + member_method { + name: "item" + } + member_method { + name: "itemset" + } + member_method { + name: "max" + } + member_method { + name: "mean" + } + member_method { + name: "min" + } + member_method { + name: "newbyteorder" + } + member_method { + name: "nonzero" + } + member_method { + name: "prod" + } + member_method { + name: "ptp" + } + member_method { + name: "put" + } + member_method { + name: "ravel" + } + member_method { + name: "repeat" + } + member_method { + name: "reshape" + } + member_method { + name: "resize" + } + member_method { + name: "round" + } + member_method { + name: "searchsorted" + } + member_method { + name: "setfield" + } + member_method { + name: "setflags" + } + member_method { + name: "sort" + } + member_method { + name: "squeeze" + } + member_method { + name: "std" + } + member_method { + name: "sum" + } + member_method { + name: "swapaxes" + } + member_method { + name: "take" + } + member_method { + name: "tobytes" + } + member_method { + name: "tofile" + } + member_method { + name: "tolist" + } + member_method { + name: "tostring" + } + member_method { + name: "trace" + } + member_method { + name: "transpose" + } + member_method { + name: "var" + } + member_method { + name: "view" + } +} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int16.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int16.pbtxt new file mode 100644 index 00000000000..d972ab938c9 --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int16.pbtxt @@ -0,0 +1,230 @@ +path: "tensorflow.experimental.numpy.int16" +tf_class { + is_instance: "" + member { + name: "T" + mtype: "" + } + member { + name: "base" + mtype: "" + } + member { + name: "data" + mtype: "" + } + member { + name: "denominator" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "flags" + mtype: "" + } + member { + name: "flat" + mtype: "" + } + member { + name: "imag" + mtype: "" + } + member { + name: "itemsize" + mtype: "" + } + member { + name: "nbytes" + mtype: "" + } + member { + name: "ndim" + mtype: "" + } + member { + name: "numerator" + mtype: "" + } + member { + name: "real" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "size" + mtype: "" + } + member { + name: "strides" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "all" + } + member_method { + name: "any" + } + member_method { + name: "argmax" + } + member_method { + name: "argmin" + } + member_method { + name: "argsort" + } + member_method { + name: "astype" + } + member_method { + name: "byteswap" + } + member_method { + name: "choose" + } + member_method { + name: "clip" + } + member_method { + name: "compress" + } + member_method { + name: "conj" + } + member_method { + name: "conjugate" + } + member_method { + name: "copy" + } + member_method { + name: "cumprod" + } + member_method { + name: "cumsum" + } + member_method { + name: "diagonal" + } + member_method { + name: "dump" + } + member_method { + name: "dumps" + } + member_method { + name: "fill" + } + member_method { + name: "flatten" + } + member_method { + name: "getfield" + } + member_method { + name: "item" + } + member_method { + name: "itemset" + } + member_method { + name: "max" + } + member_method { + name: "mean" + } + member_method { + name: "min" + } + member_method { + name: "newbyteorder" + } + member_method { + name: "nonzero" + } + member_method { + name: "prod" + } + member_method { + name: "ptp" + } + member_method { + name: "put" + } + member_method { + name: "ravel" + } + member_method { + name: "repeat" + } + member_method { + name: "reshape" + } + member_method { + name: "resize" + } + member_method { + name: "round" + } + member_method { + name: "searchsorted" + } + member_method { + name: "setfield" + } + member_method { + name: "setflags" + } + member_method { + name: "sort" + } + member_method { + name: "squeeze" + } + member_method { + name: "std" + } + member_method { + name: "sum" + } + member_method { + name: "swapaxes" + } + member_method { + name: "take" + } + member_method { + name: "tobytes" + } + member_method { + name: "tofile" + } + member_method { + name: "tolist" + } + member_method { + name: "tostring" + } + member_method { + name: "trace" + } + member_method { + name: "transpose" + } + member_method { + name: "var" + } + member_method { + name: "view" + } +} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int32.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int32.pbtxt new file mode 100644 index 00000000000..b5f3ce60b0b --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int32.pbtxt @@ -0,0 +1,230 @@ +path: "tensorflow.experimental.numpy.int32" +tf_class { + is_instance: "" + member { + name: "T" + mtype: "" + } + member { + name: "base" + mtype: "" + } + member { + name: "data" + mtype: "" + } + member { + name: "denominator" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "flags" + mtype: "" + } + member { + name: "flat" + mtype: "" + } + member { + name: "imag" + mtype: "" + } + member { + name: "itemsize" + mtype: "" + } + member { + name: "nbytes" + mtype: "" + } + member { + name: "ndim" + mtype: "" + } + member { + name: "numerator" + mtype: "" + } + member { + name: "real" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "size" + mtype: "" + } + member { + name: "strides" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "all" + } + member_method { + name: "any" + } + member_method { + name: "argmax" + } + member_method { + name: "argmin" + } + member_method { + name: "argsort" + } + member_method { + name: "astype" + } + member_method { + name: "byteswap" + } + member_method { + name: "choose" + } + member_method { + name: "clip" + } + member_method { + name: "compress" + } + member_method { + name: "conj" + } + member_method { + name: "conjugate" + } + member_method { + name: "copy" + } + member_method { + name: "cumprod" + } + member_method { + name: "cumsum" + } + member_method { + name: "diagonal" + } + member_method { + name: "dump" + } + member_method { + name: "dumps" + } + member_method { + name: "fill" + } + member_method { + name: "flatten" + } + member_method { + name: "getfield" + } + member_method { + name: "item" + } + member_method { + name: "itemset" + } + member_method { + name: "max" + } + member_method { + name: "mean" + } + member_method { + name: "min" + } + member_method { + name: "newbyteorder" + } + member_method { + name: "nonzero" + } + member_method { + name: "prod" + } + member_method { + name: "ptp" + } + member_method { + name: "put" + } + member_method { + name: "ravel" + } + member_method { + name: "repeat" + } + member_method { + name: "reshape" + } + member_method { + name: "resize" + } + member_method { + name: "round" + } + member_method { + name: "searchsorted" + } + member_method { + name: "setfield" + } + member_method { + name: "setflags" + } + member_method { + name: "sort" + } + member_method { + name: "squeeze" + } + member_method { + name: "std" + } + member_method { + name: "sum" + } + member_method { + name: "swapaxes" + } + member_method { + name: "take" + } + member_method { + name: "tobytes" + } + member_method { + name: "tofile" + } + member_method { + name: "tolist" + } + member_method { + name: "tostring" + } + member_method { + name: "trace" + } + member_method { + name: "transpose" + } + member_method { + name: "var" + } + member_method { + name: "view" + } +} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int64.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int64.pbtxt new file mode 100644 index 00000000000..a6e6661ebd5 --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int64.pbtxt @@ -0,0 +1,230 @@ +path: "tensorflow.experimental.numpy.int64" +tf_class { + is_instance: "" + member { + name: "T" + mtype: "" + } + member { + name: "base" + mtype: "" + } + member { + name: "data" + mtype: "" + } + member { + name: "denominator" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "flags" + mtype: "" + } + member { + name: "flat" + mtype: "" + } + member { + name: "imag" + mtype: "" + } + member { + name: "itemsize" + mtype: "" + } + member { + name: "nbytes" + mtype: "" + } + member { + name: "ndim" + mtype: "" + } + member { + name: "numerator" + mtype: "" + } + member { + name: "real" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "size" + mtype: "" + } + member { + name: "strides" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "all" + } + member_method { + name: "any" + } + member_method { + name: "argmax" + } + member_method { + name: "argmin" + } + member_method { + name: "argsort" + } + member_method { + name: "astype" + } + member_method { + name: "byteswap" + } + member_method { + name: "choose" + } + member_method { + name: "clip" + } + member_method { + name: "compress" + } + member_method { + name: "conj" + } + member_method { + name: "conjugate" + } + member_method { + name: "copy" + } + member_method { + name: "cumprod" + } + member_method { + name: "cumsum" + } + member_method { + name: "diagonal" + } + member_method { + name: "dump" + } + member_method { + name: "dumps" + } + member_method { + name: "fill" + } + member_method { + name: "flatten" + } + member_method { + name: "getfield" + } + member_method { + name: "item" + } + member_method { + name: "itemset" + } + member_method { + name: "max" + } + member_method { + name: "mean" + } + member_method { + name: "min" + } + member_method { + name: "newbyteorder" + } + member_method { + name: "nonzero" + } + member_method { + name: "prod" + } + member_method { + name: "ptp" + } + member_method { + name: "put" + } + member_method { + name: "ravel" + } + member_method { + name: "repeat" + } + member_method { + name: "reshape" + } + member_method { + name: "resize" + } + member_method { + name: "round" + } + member_method { + name: "searchsorted" + } + member_method { + name: "setfield" + } + member_method { + name: "setflags" + } + member_method { + name: "sort" + } + member_method { + name: "squeeze" + } + member_method { + name: "std" + } + member_method { + name: "sum" + } + member_method { + name: "swapaxes" + } + member_method { + name: "take" + } + member_method { + name: "tobytes" + } + member_method { + name: "tofile" + } + member_method { + name: "tolist" + } + member_method { + name: "tostring" + } + member_method { + name: "trace" + } + member_method { + name: "transpose" + } + member_method { + name: "var" + } + member_method { + name: "view" + } +} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int8.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int8.pbtxt new file mode 100644 index 00000000000..52b7787ddfc --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int8.pbtxt @@ -0,0 +1,230 @@ +path: "tensorflow.experimental.numpy.int8" +tf_class { + is_instance: "" + member { + name: "T" + mtype: "" + } + member { + name: "base" + mtype: "" + } + member { + name: "data" + mtype: "" + } + member { + name: "denominator" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "flags" + mtype: "" + } + member { + name: "flat" + mtype: "" + } + member { + name: "imag" + mtype: "" + } + member { + name: "itemsize" + mtype: "" + } + member { + name: "nbytes" + mtype: "" + } + member { + name: "ndim" + mtype: "" + } + member { + name: "numerator" + mtype: "" + } + member { + name: "real" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "size" + mtype: "" + } + member { + name: "strides" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "all" + } + member_method { + name: "any" + } + member_method { + name: "argmax" + } + member_method { + name: "argmin" + } + member_method { + name: "argsort" + } + member_method { + name: "astype" + } + member_method { + name: "byteswap" + } + member_method { + name: "choose" + } + member_method { + name: "clip" + } + member_method { + name: "compress" + } + member_method { + name: "conj" + } + member_method { + name: "conjugate" + } + member_method { + name: "copy" + } + member_method { + name: "cumprod" + } + member_method { + name: "cumsum" + } + member_method { + name: "diagonal" + } + member_method { + name: "dump" + } + member_method { + name: "dumps" + } + member_method { + name: "fill" + } + member_method { + name: "flatten" + } + member_method { + name: "getfield" + } + member_method { + name: "item" + } + member_method { + name: "itemset" + } + member_method { + name: "max" + } + member_method { + name: "mean" + } + member_method { + name: "min" + } + member_method { + name: "newbyteorder" + } + member_method { + name: "nonzero" + } + member_method { + name: "prod" + } + member_method { + name: "ptp" + } + member_method { + name: "put" + } + member_method { + name: "ravel" + } + member_method { + name: "repeat" + } + member_method { + name: "reshape" + } + member_method { + name: "resize" + } + member_method { + name: "round" + } + member_method { + name: "searchsorted" + } + member_method { + name: "setfield" + } + member_method { + name: "setflags" + } + member_method { + name: "sort" + } + member_method { + name: "squeeze" + } + member_method { + name: "std" + } + member_method { + name: "sum" + } + member_method { + name: "swapaxes" + } + member_method { + name: "take" + } + member_method { + name: "tobytes" + } + member_method { + name: "tofile" + } + member_method { + name: "tolist" + } + member_method { + name: "tostring" + } + member_method { + name: "trace" + } + member_method { + name: "transpose" + } + member_method { + name: "var" + } + member_method { + name: "view" + } +} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int_.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int_.pbtxt new file mode 100644 index 00000000000..089c50dcf51 --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.int_.pbtxt @@ -0,0 +1,230 @@ +path: "tensorflow.experimental.numpy.int_" +tf_class { + is_instance: "" + member { + name: "T" + mtype: "" + } + member { + name: "base" + mtype: "" + } + member { + name: "data" + mtype: "" + } + member { + name: "denominator" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "flags" + mtype: "" + } + member { + name: "flat" + mtype: "" + } + member { + name: "imag" + mtype: "" + } + member { + name: "itemsize" + mtype: "" + } + member { + name: "nbytes" + mtype: "" + } + member { + name: "ndim" + mtype: "" + } + member { + name: "numerator" + mtype: "" + } + member { + name: "real" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "size" + mtype: "" + } + member { + name: "strides" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "all" + } + member_method { + name: "any" + } + member_method { + name: "argmax" + } + member_method { + name: "argmin" + } + member_method { + name: "argsort" + } + member_method { + name: "astype" + } + member_method { + name: "byteswap" + } + member_method { + name: "choose" + } + member_method { + name: "clip" + } + member_method { + name: "compress" + } + member_method { + name: "conj" + } + member_method { + name: "conjugate" + } + member_method { + name: "copy" + } + member_method { + name: "cumprod" + } + member_method { + name: "cumsum" + } + member_method { + name: "diagonal" + } + member_method { + name: "dump" + } + member_method { + name: "dumps" + } + member_method { + name: "fill" + } + member_method { + name: "flatten" + } + member_method { + name: "getfield" + } + member_method { + name: "item" + } + member_method { + name: "itemset" + } + member_method { + name: "max" + } + member_method { + name: "mean" + } + member_method { + name: "min" + } + member_method { + name: "newbyteorder" + } + member_method { + name: "nonzero" + } + member_method { + name: "prod" + } + member_method { + name: "ptp" + } + member_method { + name: "put" + } + member_method { + name: "ravel" + } + member_method { + name: "repeat" + } + member_method { + name: "reshape" + } + member_method { + name: "resize" + } + member_method { + name: "round" + } + member_method { + name: "searchsorted" + } + member_method { + name: "setfield" + } + member_method { + name: "setflags" + } + member_method { + name: "sort" + } + member_method { + name: "squeeze" + } + member_method { + name: "std" + } + member_method { + name: "sum" + } + member_method { + name: "swapaxes" + } + member_method { + name: "take" + } + member_method { + name: "tobytes" + } + member_method { + name: "tofile" + } + member_method { + name: "tolist" + } + member_method { + name: "tostring" + } + member_method { + name: "trace" + } + member_method { + name: "transpose" + } + member_method { + name: "var" + } + member_method { + name: "view" + } +} diff --git a/third_party/py/numpy/tf_numpy_api/numpy_ops.ndarray.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.ndarray.pbtxt similarity index 97% rename from third_party/py/numpy/tf_numpy_api/numpy_ops.ndarray.pbtxt rename to third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.ndarray.pbtxt index 8492a30d81b..f54ecbdbf47 100644 --- a/third_party/py/numpy/tf_numpy_api/numpy_ops.ndarray.pbtxt +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.ndarray.pbtxt @@ -1,4 +1,4 @@ -path: "numpy_ops.ndarray" +path: "tensorflow.experimental.numpy.ndarray" tf_class { is_instance: "" is_instance: "" diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.object_.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.object_.pbtxt new file mode 100644 index 00000000000..1fe27281e15 --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.object_.pbtxt @@ -0,0 +1,222 @@ +path: "tensorflow.experimental.numpy.object_" +tf_class { + is_instance: "" + member { + name: "T" + mtype: "" + } + member { + name: "base" + mtype: "" + } + member { + name: "data" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "flags" + mtype: "" + } + member { + name: "flat" + mtype: "" + } + member { + name: "imag" + mtype: "" + } + member { + name: "itemsize" + mtype: "" + } + member { + name: "nbytes" + mtype: "" + } + member { + name: "ndim" + mtype: "" + } + member { + name: "real" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "size" + mtype: "" + } + member { + name: "strides" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "all" + } + member_method { + name: "any" + } + member_method { + name: "argmax" + } + member_method { + name: "argmin" + } + member_method { + name: "argsort" + } + member_method { + name: "astype" + } + member_method { + name: "byteswap" + } + member_method { + name: "choose" + } + member_method { + name: "clip" + } + member_method { + name: "compress" + } + member_method { + name: "conj" + } + member_method { + name: "conjugate" + } + member_method { + name: "copy" + } + member_method { + name: "cumprod" + } + member_method { + name: "cumsum" + } + member_method { + name: "diagonal" + } + member_method { + name: "dump" + } + member_method { + name: "dumps" + } + member_method { + name: "fill" + } + member_method { + name: "flatten" + } + member_method { + name: "getfield" + } + member_method { + name: "item" + } + member_method { + name: "itemset" + } + member_method { + name: "max" + } + member_method { + name: "mean" + } + member_method { + name: "min" + } + member_method { + name: "newbyteorder" + } + member_method { + name: "nonzero" + } + member_method { + name: "prod" + } + member_method { + name: "ptp" + } + member_method { + name: "put" + } + member_method { + name: "ravel" + } + member_method { + name: "repeat" + } + member_method { + name: "reshape" + } + member_method { + name: "resize" + } + member_method { + name: "round" + } + member_method { + name: "searchsorted" + } + member_method { + name: "setfield" + } + member_method { + name: "setflags" + } + member_method { + name: "sort" + } + member_method { + name: "squeeze" + } + member_method { + name: "std" + } + member_method { + name: "sum" + } + member_method { + name: "swapaxes" + } + member_method { + name: "take" + } + member_method { + name: "tobytes" + } + member_method { + name: "tofile" + } + member_method { + name: "tolist" + } + member_method { + name: "tostring" + } + member_method { + name: "trace" + } + member_method { + name: "transpose" + } + member_method { + name: "var" + } + member_method { + name: "view" + } +} diff --git a/third_party/py/numpy/tf_numpy_api/numpy_ops.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.pbtxt similarity index 98% rename from third_party/py/numpy/tf_numpy_api/numpy_ops.pbtxt rename to third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.pbtxt index 30913665f14..cb1e28ea1db 100644 --- a/third_party/py/numpy/tf_numpy_api/numpy_ops.pbtxt +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.pbtxt @@ -1,4 +1,4 @@ -path: "numpy_ops" +path: "tensorflow.experimental.numpy" tf_module { member { name: "bool_" @@ -474,7 +474,7 @@ tf_module { } member_method { name: "imag" - argspec: "args=[\'a\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'val\'], varargs=None, keywords=None, defaults=None" } member_method { name: "inner" @@ -522,7 +522,7 @@ tf_module { } member_method { name: "isscalar" - argspec: "args=[\'a\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'num\'], varargs=None, keywords=None, defaults=None" } member_method { name: "issubdtype" @@ -682,7 +682,7 @@ tf_module { } member_method { name: "pad" - argspec: "args=[\'ary\', \'pad_width\', \'mode\', \'constant_values\'], varargs=None, keywords=None, defaults=[\'0\'], " + argspec: "args=[\'array\', \'pad_width\', \'mode\'], varargs=None, keywords=kwargs, defaults=None" } member_method { name: "polyval" diff --git a/third_party/py/numpy/tf_numpy_api/numpy_ops.random.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.random.pbtxt similarity index 94% rename from third_party/py/numpy/tf_numpy_api/numpy_ops.random.pbtxt rename to third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.random.pbtxt index 3e6bb720e8c..ad8e752a7ec 100644 --- a/third_party/py/numpy/tf_numpy_api/numpy_ops.random.pbtxt +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.random.pbtxt @@ -1,4 +1,4 @@ -path: "numpy_ops.random" +path: "tensorflow.experimental.numpy.random" tf_module { member_method { name: "rand" diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.string_.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.string_.pbtxt new file mode 100644 index 00000000000..3297a0bd2a0 --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.string_.pbtxt @@ -0,0 +1,339 @@ +path: "tensorflow.experimental.numpy.string_" +tf_class { + is_instance: "" + member { + name: "T" + mtype: "" + } + member { + name: "base" + mtype: "" + } + member { + name: "data" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "flags" + mtype: "" + } + member { + name: "flat" + mtype: "" + } + member { + name: "imag" + mtype: "" + } + member { + name: "itemsize" + mtype: "" + } + member { + name: "nbytes" + mtype: "" + } + member { + name: "ndim" + mtype: "" + } + member { + name: "real" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "size" + mtype: "" + } + member { + name: "strides" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "all" + } + member_method { + name: "any" + } + member_method { + name: "argmax" + } + member_method { + name: "argmin" + } + member_method { + name: "argsort" + } + member_method { + name: "astype" + } + member_method { + name: "byteswap" + } + member_method { + name: "capitalize" + } + member_method { + name: "center" + } + member_method { + name: "choose" + } + member_method { + name: "clip" + } + member_method { + name: "compress" + } + member_method { + name: "conj" + } + member_method { + name: "conjugate" + } + member_method { + name: "copy" + } + member_method { + name: "count" + } + member_method { + name: "cumprod" + } + member_method { + name: "cumsum" + } + member_method { + name: "decode" + } + member_method { + name: "diagonal" + } + member_method { + name: "dump" + } + member_method { + name: "dumps" + } + member_method { + name: "endswith" + } + member_method { + name: "expandtabs" + } + member_method { + name: "fill" + } + member_method { + name: "find" + } + member_method { + name: "flatten" + } + member_method { + name: "fromhex" + } + member_method { + name: "getfield" + } + member_method { + name: "hex" + } + member_method { + name: "index" + } + member_method { + name: "isalnum" + } + member_method { + name: "isalpha" + } + member_method { + name: "isdigit" + } + member_method { + name: "islower" + } + member_method { + name: "isspace" + } + member_method { + name: "istitle" + } + member_method { + name: "isupper" + } + member_method { + name: "item" + } + member_method { + name: "itemset" + } + member_method { + name: "join" + } + member_method { + name: "ljust" + } + member_method { + name: "lower" + } + member_method { + name: "lstrip" + } + member_method { + name: "maketrans" + } + member_method { + name: "max" + } + member_method { + name: "mean" + } + member_method { + name: "min" + } + member_method { + name: "newbyteorder" + } + member_method { + name: "nonzero" + } + member_method { + name: "partition" + } + member_method { + name: "prod" + } + member_method { + name: "ptp" + } + member_method { + name: "put" + } + member_method { + name: "ravel" + } + member_method { + name: "repeat" + } + member_method { + name: "replace" + } + member_method { + name: "reshape" + } + member_method { + name: "resize" + } + member_method { + name: "rfind" + } + member_method { + name: "rindex" + } + member_method { + name: "rjust" + } + member_method { + name: "round" + } + member_method { + name: "rpartition" + } + member_method { + name: "rsplit" + } + member_method { + name: "rstrip" + } + member_method { + name: "searchsorted" + } + member_method { + name: "setfield" + } + member_method { + name: "setflags" + } + member_method { + name: "sort" + } + member_method { + name: "split" + } + member_method { + name: "splitlines" + } + member_method { + name: "squeeze" + } + member_method { + name: "startswith" + } + member_method { + name: "std" + } + member_method { + name: "strip" + } + member_method { + name: "sum" + } + member_method { + name: "swapaxes" + } + member_method { + name: "swapcase" + } + member_method { + name: "take" + } + member_method { + name: "title" + } + member_method { + name: "tobytes" + } + member_method { + name: "tofile" + } + member_method { + name: "tolist" + } + member_method { + name: "tostring" + } + member_method { + name: "trace" + } + member_method { + name: "translate" + } + member_method { + name: "transpose" + } + member_method { + name: "upper" + } + member_method { + name: "var" + } + member_method { + name: "view" + } + member_method { + name: "zfill" + } +} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint16.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint16.pbtxt new file mode 100644 index 00000000000..e590dac994d --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint16.pbtxt @@ -0,0 +1,230 @@ +path: "tensorflow.experimental.numpy.uint16" +tf_class { + is_instance: "" + member { + name: "T" + mtype: "" + } + member { + name: "base" + mtype: "" + } + member { + name: "data" + mtype: "" + } + member { + name: "denominator" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "flags" + mtype: "" + } + member { + name: "flat" + mtype: "" + } + member { + name: "imag" + mtype: "" + } + member { + name: "itemsize" + mtype: "" + } + member { + name: "nbytes" + mtype: "" + } + member { + name: "ndim" + mtype: "" + } + member { + name: "numerator" + mtype: "" + } + member { + name: "real" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "size" + mtype: "" + } + member { + name: "strides" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "all" + } + member_method { + name: "any" + } + member_method { + name: "argmax" + } + member_method { + name: "argmin" + } + member_method { + name: "argsort" + } + member_method { + name: "astype" + } + member_method { + name: "byteswap" + } + member_method { + name: "choose" + } + member_method { + name: "clip" + } + member_method { + name: "compress" + } + member_method { + name: "conj" + } + member_method { + name: "conjugate" + } + member_method { + name: "copy" + } + member_method { + name: "cumprod" + } + member_method { + name: "cumsum" + } + member_method { + name: "diagonal" + } + member_method { + name: "dump" + } + member_method { + name: "dumps" + } + member_method { + name: "fill" + } + member_method { + name: "flatten" + } + member_method { + name: "getfield" + } + member_method { + name: "item" + } + member_method { + name: "itemset" + } + member_method { + name: "max" + } + member_method { + name: "mean" + } + member_method { + name: "min" + } + member_method { + name: "newbyteorder" + } + member_method { + name: "nonzero" + } + member_method { + name: "prod" + } + member_method { + name: "ptp" + } + member_method { + name: "put" + } + member_method { + name: "ravel" + } + member_method { + name: "repeat" + } + member_method { + name: "reshape" + } + member_method { + name: "resize" + } + member_method { + name: "round" + } + member_method { + name: "searchsorted" + } + member_method { + name: "setfield" + } + member_method { + name: "setflags" + } + member_method { + name: "sort" + } + member_method { + name: "squeeze" + } + member_method { + name: "std" + } + member_method { + name: "sum" + } + member_method { + name: "swapaxes" + } + member_method { + name: "take" + } + member_method { + name: "tobytes" + } + member_method { + name: "tofile" + } + member_method { + name: "tolist" + } + member_method { + name: "tostring" + } + member_method { + name: "trace" + } + member_method { + name: "transpose" + } + member_method { + name: "var" + } + member_method { + name: "view" + } +} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint32.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint32.pbtxt new file mode 100644 index 00000000000..35b9a962aef --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint32.pbtxt @@ -0,0 +1,230 @@ +path: "tensorflow.experimental.numpy.uint32" +tf_class { + is_instance: "" + member { + name: "T" + mtype: "" + } + member { + name: "base" + mtype: "" + } + member { + name: "data" + mtype: "" + } + member { + name: "denominator" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "flags" + mtype: "" + } + member { + name: "flat" + mtype: "" + } + member { + name: "imag" + mtype: "" + } + member { + name: "itemsize" + mtype: "" + } + member { + name: "nbytes" + mtype: "" + } + member { + name: "ndim" + mtype: "" + } + member { + name: "numerator" + mtype: "" + } + member { + name: "real" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "size" + mtype: "" + } + member { + name: "strides" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "all" + } + member_method { + name: "any" + } + member_method { + name: "argmax" + } + member_method { + name: "argmin" + } + member_method { + name: "argsort" + } + member_method { + name: "astype" + } + member_method { + name: "byteswap" + } + member_method { + name: "choose" + } + member_method { + name: "clip" + } + member_method { + name: "compress" + } + member_method { + name: "conj" + } + member_method { + name: "conjugate" + } + member_method { + name: "copy" + } + member_method { + name: "cumprod" + } + member_method { + name: "cumsum" + } + member_method { + name: "diagonal" + } + member_method { + name: "dump" + } + member_method { + name: "dumps" + } + member_method { + name: "fill" + } + member_method { + name: "flatten" + } + member_method { + name: "getfield" + } + member_method { + name: "item" + } + member_method { + name: "itemset" + } + member_method { + name: "max" + } + member_method { + name: "mean" + } + member_method { + name: "min" + } + member_method { + name: "newbyteorder" + } + member_method { + name: "nonzero" + } + member_method { + name: "prod" + } + member_method { + name: "ptp" + } + member_method { + name: "put" + } + member_method { + name: "ravel" + } + member_method { + name: "repeat" + } + member_method { + name: "reshape" + } + member_method { + name: "resize" + } + member_method { + name: "round" + } + member_method { + name: "searchsorted" + } + member_method { + name: "setfield" + } + member_method { + name: "setflags" + } + member_method { + name: "sort" + } + member_method { + name: "squeeze" + } + member_method { + name: "std" + } + member_method { + name: "sum" + } + member_method { + name: "swapaxes" + } + member_method { + name: "take" + } + member_method { + name: "tobytes" + } + member_method { + name: "tofile" + } + member_method { + name: "tolist" + } + member_method { + name: "tostring" + } + member_method { + name: "trace" + } + member_method { + name: "transpose" + } + member_method { + name: "var" + } + member_method { + name: "view" + } +} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint64.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint64.pbtxt new file mode 100644 index 00000000000..867e55c9712 --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint64.pbtxt @@ -0,0 +1,230 @@ +path: "tensorflow.experimental.numpy.uint64" +tf_class { + is_instance: "" + member { + name: "T" + mtype: "" + } + member { + name: "base" + mtype: "" + } + member { + name: "data" + mtype: "" + } + member { + name: "denominator" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "flags" + mtype: "" + } + member { + name: "flat" + mtype: "" + } + member { + name: "imag" + mtype: "" + } + member { + name: "itemsize" + mtype: "" + } + member { + name: "nbytes" + mtype: "" + } + member { + name: "ndim" + mtype: "" + } + member { + name: "numerator" + mtype: "" + } + member { + name: "real" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "size" + mtype: "" + } + member { + name: "strides" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "all" + } + member_method { + name: "any" + } + member_method { + name: "argmax" + } + member_method { + name: "argmin" + } + member_method { + name: "argsort" + } + member_method { + name: "astype" + } + member_method { + name: "byteswap" + } + member_method { + name: "choose" + } + member_method { + name: "clip" + } + member_method { + name: "compress" + } + member_method { + name: "conj" + } + member_method { + name: "conjugate" + } + member_method { + name: "copy" + } + member_method { + name: "cumprod" + } + member_method { + name: "cumsum" + } + member_method { + name: "diagonal" + } + member_method { + name: "dump" + } + member_method { + name: "dumps" + } + member_method { + name: "fill" + } + member_method { + name: "flatten" + } + member_method { + name: "getfield" + } + member_method { + name: "item" + } + member_method { + name: "itemset" + } + member_method { + name: "max" + } + member_method { + name: "mean" + } + member_method { + name: "min" + } + member_method { + name: "newbyteorder" + } + member_method { + name: "nonzero" + } + member_method { + name: "prod" + } + member_method { + name: "ptp" + } + member_method { + name: "put" + } + member_method { + name: "ravel" + } + member_method { + name: "repeat" + } + member_method { + name: "reshape" + } + member_method { + name: "resize" + } + member_method { + name: "round" + } + member_method { + name: "searchsorted" + } + member_method { + name: "setfield" + } + member_method { + name: "setflags" + } + member_method { + name: "sort" + } + member_method { + name: "squeeze" + } + member_method { + name: "std" + } + member_method { + name: "sum" + } + member_method { + name: "swapaxes" + } + member_method { + name: "take" + } + member_method { + name: "tobytes" + } + member_method { + name: "tofile" + } + member_method { + name: "tolist" + } + member_method { + name: "tostring" + } + member_method { + name: "trace" + } + member_method { + name: "transpose" + } + member_method { + name: "var" + } + member_method { + name: "view" + } +} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint8.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint8.pbtxt new file mode 100644 index 00000000000..24d3ed0458a --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.uint8.pbtxt @@ -0,0 +1,230 @@ +path: "tensorflow.experimental.numpy.uint8" +tf_class { + is_instance: "" + member { + name: "T" + mtype: "" + } + member { + name: "base" + mtype: "" + } + member { + name: "data" + mtype: "" + } + member { + name: "denominator" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "flags" + mtype: "" + } + member { + name: "flat" + mtype: "" + } + member { + name: "imag" + mtype: "" + } + member { + name: "itemsize" + mtype: "" + } + member { + name: "nbytes" + mtype: "" + } + member { + name: "ndim" + mtype: "" + } + member { + name: "numerator" + mtype: "" + } + member { + name: "real" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "size" + mtype: "" + } + member { + name: "strides" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "all" + } + member_method { + name: "any" + } + member_method { + name: "argmax" + } + member_method { + name: "argmin" + } + member_method { + name: "argsort" + } + member_method { + name: "astype" + } + member_method { + name: "byteswap" + } + member_method { + name: "choose" + } + member_method { + name: "clip" + } + member_method { + name: "compress" + } + member_method { + name: "conj" + } + member_method { + name: "conjugate" + } + member_method { + name: "copy" + } + member_method { + name: "cumprod" + } + member_method { + name: "cumsum" + } + member_method { + name: "diagonal" + } + member_method { + name: "dump" + } + member_method { + name: "dumps" + } + member_method { + name: "fill" + } + member_method { + name: "flatten" + } + member_method { + name: "getfield" + } + member_method { + name: "item" + } + member_method { + name: "itemset" + } + member_method { + name: "max" + } + member_method { + name: "mean" + } + member_method { + name: "min" + } + member_method { + name: "newbyteorder" + } + member_method { + name: "nonzero" + } + member_method { + name: "prod" + } + member_method { + name: "ptp" + } + member_method { + name: "put" + } + member_method { + name: "ravel" + } + member_method { + name: "repeat" + } + member_method { + name: "reshape" + } + member_method { + name: "resize" + } + member_method { + name: "round" + } + member_method { + name: "searchsorted" + } + member_method { + name: "setfield" + } + member_method { + name: "setflags" + } + member_method { + name: "sort" + } + member_method { + name: "squeeze" + } + member_method { + name: "std" + } + member_method { + name: "sum" + } + member_method { + name: "swapaxes" + } + member_method { + name: "take" + } + member_method { + name: "tobytes" + } + member_method { + name: "tofile" + } + member_method { + name: "tolist" + } + member_method { + name: "tostring" + } + member_method { + name: "trace" + } + member_method { + name: "transpose" + } + member_method { + name: "var" + } + member_method { + name: "view" + } +} diff --git a/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.unicode_.pbtxt b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.unicode_.pbtxt new file mode 100644 index 00000000000..7cfc7fb56cd --- /dev/null +++ b/third_party/py/numpy/tf_numpy_api/tensorflow.experimental.numpy.unicode_.pbtxt @@ -0,0 +1,354 @@ +path: "tensorflow.experimental.numpy.unicode_" +tf_class { + is_instance: "" + member { + name: "T" + mtype: "" + } + member { + name: "base" + mtype: "" + } + member { + name: "data" + mtype: "" + } + member { + name: "dtype" + mtype: "" + } + member { + name: "flags" + mtype: "" + } + member { + name: "flat" + mtype: "" + } + member { + name: "imag" + mtype: "" + } + member { + name: "itemsize" + mtype: "" + } + member { + name: "nbytes" + mtype: "" + } + member { + name: "ndim" + mtype: "" + } + member { + name: "real" + mtype: "" + } + member { + name: "shape" + mtype: "" + } + member { + name: "size" + mtype: "" + } + member { + name: "strides" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "all" + } + member_method { + name: "any" + } + member_method { + name: "argmax" + } + member_method { + name: "argmin" + } + member_method { + name: "argsort" + } + member_method { + name: "astype" + } + member_method { + name: "byteswap" + } + member_method { + name: "capitalize" + } + member_method { + name: "casefold" + } + member_method { + name: "center" + } + member_method { + name: "choose" + } + member_method { + name: "clip" + } + member_method { + name: "compress" + } + member_method { + name: "conj" + } + member_method { + name: "conjugate" + } + member_method { + name: "copy" + } + member_method { + name: "count" + } + member_method { + name: "cumprod" + } + member_method { + name: "cumsum" + } + member_method { + name: "diagonal" + } + member_method { + name: "dump" + } + member_method { + name: "dumps" + } + member_method { + name: "encode" + } + member_method { + name: "endswith" + } + member_method { + name: "expandtabs" + } + member_method { + name: "fill" + } + member_method { + name: "find" + } + member_method { + name: "flatten" + } + member_method { + name: "format" + } + member_method { + name: "format_map" + } + member_method { + name: "getfield" + } + member_method { + name: "index" + } + member_method { + name: "isalnum" + } + member_method { + name: "isalpha" + } + member_method { + name: "isdecimal" + } + member_method { + name: "isdigit" + } + member_method { + name: "isidentifier" + } + member_method { + name: "islower" + } + member_method { + name: "isnumeric" + } + member_method { + name: "isprintable" + } + member_method { + name: "isspace" + } + member_method { + name: "istitle" + } + member_method { + name: "isupper" + } + member_method { + name: "item" + } + member_method { + name: "itemset" + } + member_method { + name: "join" + } + member_method { + name: "ljust" + } + member_method { + name: "lower" + } + member_method { + name: "lstrip" + } + member_method { + name: "maketrans" + } + member_method { + name: "max" + } + member_method { + name: "mean" + } + member_method { + name: "min" + } + member_method { + name: "newbyteorder" + } + member_method { + name: "nonzero" + } + member_method { + name: "partition" + } + member_method { + name: "prod" + } + member_method { + name: "ptp" + } + member_method { + name: "put" + } + member_method { + name: "ravel" + } + member_method { + name: "repeat" + } + member_method { + name: "replace" + } + member_method { + name: "reshape" + } + member_method { + name: "resize" + } + member_method { + name: "rfind" + } + member_method { + name: "rindex" + } + member_method { + name: "rjust" + } + member_method { + name: "round" + } + member_method { + name: "rpartition" + } + member_method { + name: "rsplit" + } + member_method { + name: "rstrip" + } + member_method { + name: "searchsorted" + } + member_method { + name: "setfield" + } + member_method { + name: "setflags" + } + member_method { + name: "sort" + } + member_method { + name: "split" + } + member_method { + name: "splitlines" + } + member_method { + name: "squeeze" + } + member_method { + name: "startswith" + } + member_method { + name: "std" + } + member_method { + name: "strip" + } + member_method { + name: "sum" + } + member_method { + name: "swapaxes" + } + member_method { + name: "swapcase" + } + member_method { + name: "take" + } + member_method { + name: "title" + } + member_method { + name: "tobytes" + } + member_method { + name: "tofile" + } + member_method { + name: "tolist" + } + member_method { + name: "tostring" + } + member_method { + name: "trace" + } + member_method { + name: "translate" + } + member_method { + name: "transpose" + } + member_method { + name: "upper" + } + member_method { + name: "var" + } + member_method { + name: "view" + } + member_method { + name: "zfill" + } +} From 51901d95c3197fd9a0c98f31c9a984cac01056d1 Mon Sep 17 00:00:00 2001 From: Gaurav Jain Date: Wed, 15 Jul 2020 17:18:28 -0700 Subject: [PATCH 0568/2522] Fix tf.pad gradient to respect paddings dtype PiperOrigin-RevId: 321469718 Change-Id: I0585e76ced5026f637157e588241bb4d9a103d74 --- tensorflow/python/kernel_tests/pad_op_test.py | 35 +++++++++++++------ tensorflow/python/ops/array_grad.py | 2 +- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/kernel_tests/pad_op_test.py b/tensorflow/python/kernel_tests/pad_op_test.py index 06f4780421d..eff99f993b3 100644 --- a/tensorflow/python/kernel_tests/pad_op_test.py +++ b/tensorflow/python/kernel_tests/pad_op_test.py @@ -93,11 +93,16 @@ class PadOpTest(test.TestCase): self.assertAllEqual(np_val, out) self.assertShapeEqual(np_val, tf_val) - def _testGradient(self, x, a, mode, constant_values): + def _testGradient(self, + x, + a, + mode, + constant_values, + paddings_dtype=dtypes.int32): with self.cached_session(use_gpu=True): inx = ops.convert_to_tensor(x) xs = list(x.shape) - ina = ops.convert_to_tensor(a) + ina = ops.convert_to_tensor(a, paddings_dtype) y = array_ops.pad(inx, ina, mode=mode, constant_values=constant_values) # Expected y's shape to be: ys = list(np.array(x.shape) + np.sum(np.array(a), axis=1)) @@ -202,24 +207,34 @@ class PadOpTest(test.TestCase): array_ops.pad(x, [[1, 0], [2, 1]], mode="weird").eval() def testPaddingTypes(self): - paddings = [[1, 0], [2, 3], [0, 2]] - inputs = np.random.randint(-100, 100, (4, 4, 3)).astype(np.float32) + paddings = [[1, 0], [2, 0]] + inputs = np.random.rand(2, 5).astype(np.float32) for mode in ("CONSTANT", "REFLECT", "SYMMETRIC", "reflect", "symmetric", "constant"): - for padding_dtype in [dtypes.int32, dtypes.int64]: + for paddings_dtype in [dtypes.int32, dtypes.int64]: np_val = self._npPad(inputs, paddings, mode=mode, constant_values=0) - with self.cached_session(use_gpu=True): - tf_val = array_ops.pad(inputs, - constant_op.constant(paddings, padding_dtype), - mode=mode, - constant_values=0) + with test_util.use_gpu(): + tf_val = array_ops.pad( + inputs, + constant_op.constant(paddings, paddings_dtype), + mode=mode, + constant_values=0) out = self.evaluate(tf_val) self.assertAllEqual(np_val, out) self.assertShapeEqual(np_val, tf_val) + if mode.upper() != "REFLECT": + with ops.Graph().as_default(): + self._testGradient( + inputs, + paddings, + mode=mode, + constant_values=0, + paddings_dtype=paddings_dtype) + def testIntTypes(self): # TODO(touts): Figure out why the padding tests do not work on GPU # for int types and rank > 2. diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index c9a634230e0..5576ce5e538 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -847,7 +847,7 @@ def _PadGrad(op, grad): array_ops.stack([array_ops.rank(x), 1])) # Make it a 1-D tensor. begin = array_ops.reshape(pad_before, [-1]) - sizes = array_ops.shape(x) + sizes = array_ops.shape(x, out_type=begin.dtype) x_grad = array_ops.slice(grad, begin, sizes) if len(op.inputs) == 3: return x_grad, None, None From d707965f7363c5b1777b738f7925b914277afdcc Mon Sep 17 00:00:00 2001 From: Jing Pu Date: Wed, 15 Jul 2020 17:19:16 -0700 Subject: [PATCH 0569/2522] Fix an assertion failure in the tf.Reshape op folder. Previously, "getNumDynamicDims" asserts when "arg_ty" has a dynamic rank. PiperOrigin-RevId: 321469821 Change-Id: I66824676f55525f23a2268dbdc7c65e0cba46349 --- tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc | 3 ++- .../compiler/mlir/tensorflow/tests/canonicalize.mlir | 10 ++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc index c5c729a600e..0d9b2610492 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc @@ -695,7 +695,8 @@ OpFoldResult ReshapeOp::fold(ArrayRef operands) { // First tensor dimension is dynamic. auto arg_ty = tensor.getType().dyn_cast(); - if (!arg_ty || arg_ty.getNumDynamicDims() != 1 || !arg_ty.isDynamicDim(0)) + if (!arg_ty || !arg_ty.hasRank() || arg_ty.getNumDynamicDims() != 1 || + !arg_ty.isDynamicDim(0)) return {}; // Argument tensor rank is equal to the number of packed dimensions. diff --git a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir index 514db1f4f08..c67725fbccf 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir @@ -386,7 +386,7 @@ func @testReshapeNoOp(%arg0: tensor<2x4xf32>, %arg1: tensor<2xi32>) -> tensor<2x } // CHECK-LABEL: func @testReshapeNoOpShapeComputation -func @testReshapeNoOpShapeComputation(%arg0: tensor, %arg1: tensor) -> (tensor, tensor, tensor, tensor, tensor, tensor) { +func @testReshapeNoOpShapeComputation(%arg0: tensor, %arg1: tensor, %arg2: tensor<*xf32>) -> (tensor, tensor, tensor, tensor, tensor, tensor, tensor<*xf32>) { // Test dimensions sizes. %d1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor %d2 = "tf.Const"() {value = dense<2> : tensor} : () -> tensor @@ -447,8 +447,14 @@ func @testReshapeNoOpShapeComputation(%arg0: tensor, %arg1: tensor, tensor<3xi32>) -> tensor + // Make sure a dynamic ranked shape doesn't crash the "canonicalize" pass + %23 = "tf.Shape"(%arg2) : (tensor<*xf32>) -> tensor<*xi32> + %24 = "tf.StridedSlice"(%23, %0, %1, %1) {shrink_axis_mask = 1 : i64} : (tensor<*xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<*xi32> + %25 = "tf.Pack"(%24, %d1) {axis = 0 : i64} : (tensor<*xi32>, tensor) -> tensor<*xi32> + %26 = "tf.Reshape"(%arg2, %25) : (tensor<*xf32>, tensor<*xi32>) -> tensor<*xf32> + // CHECK: return %arg0, %arg1, %[[RESHAPE0]], %[[RESHAPE1]], %[[RESHAPE2]], %[[RESHAPE3]] - return %6, %10, %13, %16, %19, %22 : tensor, tensor, tensor, tensor, tensor, tensor + return %6, %10, %13, %16, %19, %22, %26 : tensor, tensor, tensor, tensor, tensor, tensor, tensor<*xf32> } // CHECK-LABEL: testSelectScalarPred From 3d1dd32fc4bbd8d8120f755adf417c1094dec420 Mon Sep 17 00:00:00 2001 From: Karim Nosir Date: Wed, 15 Jul 2020 17:47:38 -0700 Subject: [PATCH 0570/2522] Remove hardware interface. PiperOrigin-RevId: 321473847 Change-Id: I726bfddfc46810ff517a1bbc610c7645b665c215 --- tensorflow/compiler/mlir/lite/BUILD | 2 - .../mlir/lite/experimental/estimators/BUILD | 3 - .../estimators/arithmetic_count_util.h | 20 +- .../experimental/estimators/cpu_estimators.h | 149 ----- .../lite/experimental/estimators/estimator.h | 51 -- .../experimental/estimators/gpu_estimators.h | 543 ------------------ .../experimental/tfl_hardware_interfaces.td | 76 --- .../mlir/lite/ir/tfl_op_interfaces.td | 1 - tensorflow/compiler/mlir/lite/ir/tfl_ops.h | 5 - tensorflow/compiler/mlir/lite/ir/tfl_ops.td | 107 ++-- 10 files changed, 49 insertions(+), 908 deletions(-) delete mode 100644 tensorflow/compiler/mlir/lite/experimental/estimators/cpu_estimators.h delete mode 100644 tensorflow/compiler/mlir/lite/experimental/estimators/estimator.h delete mode 100644 tensorflow/compiler/mlir/lite/experimental/estimators/gpu_estimators.h delete mode 100644 tensorflow/compiler/mlir/lite/experimental/tfl_hardware_interfaces.td diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD index ab523e9cb8d..a6b5327c15d 100644 --- a/tensorflow/compiler/mlir/lite/BUILD +++ b/tensorflow/compiler/mlir/lite/BUILD @@ -25,7 +25,6 @@ package_group( filegroup( name = "tensorflow_lite_ops_td_files", srcs = [ - "experimental/tfl_hardware_interfaces.td", "ir/tfl_op_interfaces.td", "ir/tfl_ops.td", "//tensorflow/compiler/mlir/lite/quantization:quantization_td_files", @@ -482,7 +481,6 @@ gentbl( td_srcs = [ "@llvm-project//mlir:include/mlir/Interfaces/LoopLikeInterface.td", "//tensorflow/compiler/mlir/lite/quantization:quantization_td_files", - "experimental/tfl_hardware_interfaces.td", "ir/tfl_op_interfaces.td", ], ) diff --git a/tensorflow/compiler/mlir/lite/experimental/estimators/BUILD b/tensorflow/compiler/mlir/lite/experimental/estimators/BUILD index 04d5d3db918..373c95f6bf5 100644 --- a/tensorflow/compiler/mlir/lite/experimental/estimators/BUILD +++ b/tensorflow/compiler/mlir/lite/experimental/estimators/BUILD @@ -8,9 +8,6 @@ package( cc_library( name = "cost_estimators", textual_hdrs = [ - "estimator.h", - "cpu_estimators.h", - "gpu_estimators.h", "hardware.h", "arithmetic_count_util.h", ], diff --git a/tensorflow/compiler/mlir/lite/experimental/estimators/arithmetic_count_util.h b/tensorflow/compiler/mlir/lite/experimental/estimators/arithmetic_count_util.h index 2ca49e4e1e5..782714f5955 100644 --- a/tensorflow/compiler/mlir/lite/experimental/estimators/arithmetic_count_util.h +++ b/tensorflow/compiler/mlir/lite/experimental/estimators/arithmetic_count_util.h @@ -15,13 +15,17 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_MLIR_LITE_EXPERIMENTAL_ESTIMATORS_ARITHMETIC_COUNT_UTIL_H_ #define TENSORFLOW_COMPILER_MLIR_LITE_EXPERIMENTAL_ESTIMATORS_ARITHMETIC_COUNT_UTIL_H_ +#include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/StandardTypes.h" // from @llvm-project + // For add/mul/div/sub and other broadcastable ops. class ArithmeticCountUtilHelper { public: static bool GetArithmeticCountForBroadcastableOp(mlir::Operation* op, int64_t* count) { auto output = op->getResult(0); - auto output_type = output.getType().dyn_cast_or_null(); + auto output_type = + output.getType().dyn_cast_or_null(); if (!output_type || !output_type.hasStaticShape()) return false; *count = output_type.getNumElements(); @@ -31,7 +35,8 @@ class ArithmeticCountUtilHelper { static bool GetInputTensorTotalSize(mlir::Operation* op, int64_t* count) { int64_t total_count = 0; for (auto input : op->getOperands()) { - auto input_type = input.getType().dyn_cast_or_null(); + auto input_type = + input.getType().dyn_cast_or_null(); if (!input_type || !input_type.hasStaticShape()) { return false; } @@ -43,14 +48,16 @@ class ArithmeticCountUtilHelper { // For conv2d/depthwise_conv/fully_connected ops. // This algorithm actually comes from TOCO tooling_util.cc - static bool GetArithmeticCountForConvAndFullyconnectedOp(Operation* op, + static bool GetArithmeticCountForConvAndFullyconnectedOp(mlir::Operation* op, int64_t* count) { auto weight = op->getOperand(1); - auto weight_type = weight.getType().dyn_cast_or_null(); + auto weight_type = + weight.getType().dyn_cast_or_null(); if (weight_type == nullptr || !weight_type.hasStaticShape()) return false; auto output = op->getResult(0); - auto output_type = output.getType().dyn_cast_or_null(); + auto output_type = + output.getType().dyn_cast_or_null(); if (output_type == nullptr || !output_type.hasStaticShape()) return false; int64_t cols = 1; @@ -63,7 +70,8 @@ class ArithmeticCountUtilHelper { auto bias = op->getOperand(2); if (bias) { - auto bias_type = bias.getType().dyn_cast_or_null(); + auto bias_type = + bias.getType().dyn_cast_or_null(); if (bias_type && bias_type.hasStaticShape()) { *count += bias_type.getNumElements(); } diff --git a/tensorflow/compiler/mlir/lite/experimental/estimators/cpu_estimators.h b/tensorflow/compiler/mlir/lite/experimental/estimators/cpu_estimators.h deleted file mode 100644 index b47c08c7cb4..00000000000 --- a/tensorflow/compiler/mlir/lite/experimental/estimators/cpu_estimators.h +++ /dev/null @@ -1,149 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_COMPILER_MLIR_LITE_EXPERIMENTAL_ESTIMATORS_CPU_ESTIMATORS_H_ -#define TENSORFLOW_COMPILER_MLIR_LITE_EXPERIMENTAL_ESTIMATORS_CPU_ESTIMATORS_H_ - -// CPU -constexpr float kCPUArithmeticUnitCost = 1.0; - -// This basically assumes pure load/store. This is just fake data. -constexpr float kCPUCopyUnitCost = 0.5; -constexpr float kCPUDefaultCost = 3.0f; - -// Default values. -constexpr float kCPUDefaultFixedValuedCost = 10000.0; - -// tfl.add -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - int64_t count; - if (ArithmeticCountUtilHelper::GetArithmeticCountForBroadcastableOp(op, - &count)) - return kCPUArithmeticUnitCost * count; - return kCPUDefaultFixedValuedCost; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.concatenation -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - int64_t count; - if (ArithmeticCountUtilHelper::GetInputTensorTotalSize(op, &count)) - return kCPUCopyUnitCost * count; - return kCPUDefaultFixedValuedCost; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.conv_2d -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - int64_t arithmetic_count; - if (ArithmeticCountUtilHelper::GetArithmeticCountForConvAndFullyconnectedOp( - op, &arithmetic_count)) { - return arithmetic_count * kCPUArithmeticUnitCost; - } - return kCPUDefaultFixedValuedCost; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.depthwise_conv_2d -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - int64_t arithmetic_count; - if (ArithmeticCountUtilHelper::GetArithmeticCountForConvAndFullyconnectedOp( - op, &arithmetic_count)) { - return arithmetic_count * kCPUArithmeticUnitCost; - } - return kCPUDefaultFixedValuedCost; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.fully_connected -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - int64_t arithmetic_count; - if (ArithmeticCountUtilHelper::GetArithmeticCountForConvAndFullyconnectedOp( - op, &arithmetic_count)) { - return arithmetic_count * kCPUArithmeticUnitCost; - } - return kCPUDefaultFixedValuedCost; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.mul -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - int64_t count; - if (ArithmeticCountUtilHelper::GetArithmeticCountForBroadcastableOp(op, - &count)) - return kCPUArithmeticUnitCost * count; - return kCPUDefaultFixedValuedCost; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.pack -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - int64_t count; - if (ArithmeticCountUtilHelper::GetInputTensorTotalSize(op, &count)) - return kCPUCopyUnitCost * count; - return kCPUDefaultFixedValuedCost; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.reshape -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - int64_t count; - if (ArithmeticCountUtilHelper::GetInputTensorTotalSize(op, &count)) - return kCPUCopyUnitCost * count; - return kCPUDefaultFixedValuedCost; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -#endif // TENSORFLOW_COMPILER_MLIR_LITE_EXPERIMENTAL_ESTIMATORS_CPU_ESTIMATORS_H_ diff --git a/tensorflow/compiler/mlir/lite/experimental/estimators/estimator.h b/tensorflow/compiler/mlir/lite/experimental/estimators/estimator.h deleted file mode 100644 index c4a509945fa..00000000000 --- a/tensorflow/compiler/mlir/lite/experimental/estimators/estimator.h +++ /dev/null @@ -1,51 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_MLIR_LITE_EXPERIMENTAL_ESTIMATORS_ESTIMATOR_H_ -#define TENSORFLOW_COMPILER_MLIR_LITE_EXPERIMENTAL_ESTIMATORS_ESTIMATOR_H_ - -#include "llvm/Support/raw_ostream.h" -#include "mlir/IR/Operation.h" // from @llvm-project -#include "tensorflow/compiler/mlir/lite/experimental/estimators/hardware.h" -#include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h.inc" - -template -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { - llvm::errs() << "No defined support for op: " - << op->getName().getStringRef().str(); - return false; - } -}; - -// All ops on CPU are supported. -// TODO(karimnosseir): Only allow TFL ops in the "TFL_OP" param. -template -class TFLiteCostEstimator { - public: - // TODO(karimnosseir): Update and use table based method and lookup - // cost from a loadable table ? - static double GetCost(mlir::Operation* op) { return 0.0; } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -#endif // TENSORFLOW_COMPILER_MLIR_LITE_EXPERIMENTAL_ESTIMATORS_ESTIMATOR_H_ diff --git a/tensorflow/compiler/mlir/lite/experimental/estimators/gpu_estimators.h b/tensorflow/compiler/mlir/lite/experimental/estimators/gpu_estimators.h deleted file mode 100644 index 45e8707ef44..00000000000 --- a/tensorflow/compiler/mlir/lite/experimental/estimators/gpu_estimators.h +++ /dev/null @@ -1,543 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_COMPILER_MLIR_LITE_EXPERIMENTAL_ESTIMATORS_GPU_ESTIMATORS_H_ -#define TENSORFLOW_COMPILER_MLIR_LITE_EXPERIMENTAL_ESTIMATORS_GPU_ESTIMATORS_H_ - -// GPU -constexpr float kGPUArithmeticUnitCost = 0.2; - -// The copy can be non-consectutive copy. This is just fake data. -constexpr float kGPUCopyUnitCost = 0.2; -constexpr float kGPUDefaultCost = 1.0f; - -// Default values. -constexpr float kGPUDefaultFixedValuedCost = 10000.0; - -// tfl.abs -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.add -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - int64_t count; - if (ArithmeticCountUtilHelper::GetArithmeticCountForBroadcastableOp(op, - &count)) - return kGPUArithmeticUnitCost * count; - return kGPUDefaultFixedValuedCost; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.average_pool_2d -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.concatenation -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - int64_t count; - if (ArithmeticCountUtilHelper::GetInputTensorTotalSize(op, &count)) - return kGPUCopyUnitCost * count; - return kGPUDefaultFixedValuedCost; - } - - // TODO(renjieliu): We probably need to check for dynamic weights. - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.conv_2d -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - int64_t arithmetic_count; - if (ArithmeticCountUtilHelper::GetArithmeticCountForConvAndFullyconnectedOp( - op, &arithmetic_count)) { - return arithmetic_count * kGPUArithmeticUnitCost; - } - return kGPUDefaultFixedValuedCost; - } - - // TODO(renjieliu): We probably need to check for dynamic weights. - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.cos -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.depthwise_conv_2d -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - int64_t arithmetic_count; - if (ArithmeticCountUtilHelper::GetArithmeticCountForConvAndFullyconnectedOp( - op, &arithmetic_count)) { - return arithmetic_count * kGPUArithmeticUnitCost; - } - return kGPUDefaultFixedValuedCost; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.div -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.exp -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.fully_connected -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - int64_t arithmetic_count; - if (ArithmeticCountUtilHelper::GetArithmeticCountForConvAndFullyconnectedOp( - op, &arithmetic_count)) { - return arithmetic_count * kGPUArithmeticUnitCost; - } - return kGPUDefaultFixedValuedCost; - } - - // TODO(renjieliu): we need to check for dynamic weights. - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.hard_swish -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.log -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.logistic -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.max_pool_2d -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.mirror_pad -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.maximum -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.custom -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.mean -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - // TODO(renjieiu): check for constraints. - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.minimum -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.mul -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - int64_t count; - if (ArithmeticCountUtilHelper::GetArithmeticCountForBroadcastableOp(op, - &count)) - return kGPUArithmeticUnitCost * count; - return kGPUDefaultFixedValuedCost; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.pad -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.pow -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.prelu -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.relu -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.relu6 -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.reshape -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - int64_t count; - if (ArithmeticCountUtilHelper::GetInputTensorTotalSize(op, &count)) - return kGPUCopyUnitCost * count; - return kGPUDefaultFixedValuedCost; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.rsqrt -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.sin -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.slice -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.softmax -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.space_to_depth -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.sqrt -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.square -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.squared_difference -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.strided_slice -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.tanh -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.transpose -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -// tfl.transpose_conv -template <> -class TFLiteCostEstimator { - public: - static double GetCost(mlir::Operation* op) { - llvm::errs() << "No defined cost function for op: " - << op->getName().getStringRef().str(); - return 0.0; - } - - static bool IsSupported(mlir::Operation* op) { return true; } -}; - -#endif // TENSORFLOW_COMPILER_MLIR_LITE_EXPERIMENTAL_ESTIMATORS_GPU_ESTIMATORS_H_ - diff --git a/tensorflow/compiler/mlir/lite/experimental/tfl_hardware_interfaces.td b/tensorflow/compiler/mlir/lite/experimental/tfl_hardware_interfaces.td deleted file mode 100644 index 5c3ec6c206c..00000000000 --- a/tensorflow/compiler/mlir/lite/experimental/tfl_hardware_interfaces.td +++ /dev/null @@ -1,76 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// WARNING: This Interface is experimental, DO NOT USE. - -// This is the Target Hardware operation interfacea definition file -// for TensorFlow Lite. - -#ifndef TFL_TARGET_HARDWARE_OP_INTERFACES -#define TFL_TARGET_HARDWARE_OP_INTERFACES - -def TFL_CpuTargetOp : OpInterface<"CpuOpTargetInterface"> { - let description = [{ - Interface for ops to run on CPU. - }]; - - let methods = [ - InterfaceMethod< - [{Returns the cost of running this op on CPU.}], - // TODO(karimnosseir): Change to return Cost object instead. - "double", "GetOpCost", (ins "mlir::Operation*":$op_to_check), [{ - // TODO(karimnosseir): Consider changing to another way that doesn't - // rely on template param name. - return TFL::TFLiteCostEstimator::GetCost(op_to_check); - }] - >, - InterfaceMethod< - [{Returns whether this op can be run on CPU.}], - "bool", "IsSupported", (ins "mlir::Operation*":$op_to_check), [{ - // TODO(karimnosseir): Consider changing to another way that doesn't - // rely on template param name. - return TFL::TFLiteCostEstimator::IsSupported(op_to_check); - }] - >, - ]; -} - -def TFL_GpuTargetOp : OpInterface<"GpuOpTargetInterface"> { - let description = [{ - Interface for ops to run on GPU. - }]; - - let methods = [ - InterfaceMethod< - [{Returns the cost of running this op on GPU.}], - // TODO(karimnosseir): Change to return Cost object instead. - "double", "GetOpCost", (ins "Operation*":$op_to_check), [{ - // TODO(karimnosseir): Consider changing to another way that doesn't - // rely on template param name. - return TFL::TFLiteCostEstimator::GetCost(op_to_check); - }] - >, - InterfaceMethod< - [{Returns whether this op can be run on GPU.}], - "bool", "IsSupported", (ins "Operation*":$op_to_check), [{ - // TODO(karimnosseir): Consider changing to another way that doesn't - // rely on template param name. - return TFL::TFLiteCostEstimator::IsSupported(op_to_check); - }] - >, - ]; -} - -#endif // TFL_TARGET_HARDWARE_OP_INTERFACES diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_op_interfaces.td b/tensorflow/compiler/mlir/lite/ir/tfl_op_interfaces.td index becc2f7ab85..e14178d6f6d 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_op_interfaces.td +++ b/tensorflow/compiler/mlir/lite/ir/tfl_op_interfaces.td @@ -19,7 +19,6 @@ limitations under the License. #define TFL_OP_INTERFACES include "mlir/IR/OpBase.td" -include "tensorflow/compiler/mlir/lite/experimental/tfl_hardware_interfaces.td" //===----------------------------------------------------------------------===// // TFL op interface for stateful operands. diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.h b/tensorflow/compiler/mlir/lite/ir/tfl_ops.h index 5f619503e56..caed0bb3ad9 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.h +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.h @@ -48,14 +48,9 @@ class TensorFlowLiteDialect : public Dialect { Location loc) override; }; -#include "tensorflow/compiler/mlir/lite/experimental/estimators/estimator.h" #include "tensorflow/compiler/mlir/lite/ir/tfl_ops_interface.h.inc" #define GET_OP_CLASSES #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h.inc" -// Include all specializes estimators below this line -#include "tensorflow/compiler/mlir/lite/experimental/estimators/arithmetic_count_util.h" -#include "tensorflow/compiler/mlir/lite/experimental/estimators/cpu_estimators.h" -#include "tensorflow/compiler/mlir/lite/experimental/estimators/gpu_estimators.h" } // end namespace TFL } // end namespace mlir diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td index f462eee5622..04e143541c3 100644 --- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td +++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td @@ -410,10 +410,7 @@ def TFL_ComparisonBinaryBuilder : OpBuilder< class TFL_Op traits = []> : Op, - // All TFL ops are supported on CPU. - DeclareOpInterfaceMethods - ])> { + [DeclareOpInterfaceMethods])> { // FlatBuffer generation specific information. // ------------------------------------------- // When generating the FlatBuffer output some operations have @@ -435,8 +432,7 @@ class TFL_Op traits = []> : class TFL_ConvOp : TFL_Op, - AffineQuantizedOpInterface, AffineOpCoefficient, - TFL_GpuTargetOp, TFL_SparseOp]> { + AffineQuantizedOpInterface, AffineOpCoefficient, TFL_SparseOp]> { let summary = opSummary # " operator"; let description = [{ @@ -473,8 +469,7 @@ def TFL_AbsOp : TFL_Op<"abs", [ NoSideEffect, SameOperandsAndResultShape, SameOperandsAndResultType, - NoQuantizableResult, - TFL_GpuTargetOp]> { + NoQuantizableResult]> { let summary = "Absolute value operator"; let description = [{ @@ -495,8 +490,7 @@ def TFL_AddOp : TFL_Op<"add", [ CPred<"TFL::VerifyAddOpShapeConstraints(llvm::cast($_op))">>, ResultsBroadcastableShape, NoSideEffect, - Commutative, - TFL_GpuTargetOp]> { + Commutative]> { let summary = "Addition operator"; let description = [{ @@ -573,7 +567,6 @@ def TFL_TransposeConvOp: TFL_Op<"transpose_conv", [ TFL_TCresVTEtIsSameAsOp<0, 2>>, AccumulatorUniformScale<3, 1, 2>, AffineQuantizedOpInterface, AffineOpCoefficient<0, 2>, - TFL_GpuTargetOp, TFL_SparseOp]> { let summary = "Transpose convolution operator"; @@ -612,8 +605,7 @@ def TFL_TransposeConvOp: TFL_Op<"transpose_conv", [ def TFL_AveragePool2DOp: TFL_Op<"average_pool_2d", [NoSideEffect, - SameOperandsAndResultsScale, - TFL_GpuTargetOp]> { + SameOperandsAndResultsScale]> { let summary = "Average_pool_2d operator"; let description = [{ @@ -713,8 +705,7 @@ def TFL_ConcatenationOp : TFL_Op<"concatenation", NoSideEffect, PredOpTrait<"values and output must have same element type", TFL_TCresVTEtIsSameAsOp<0, 0>>, - SameOperandsAndResultsScale, - TFL_GpuTargetOp + SameOperandsAndResultsScale ]> { let summary = "Concatenation operator"; @@ -861,8 +852,7 @@ def TFL_CosOp: TFL_Op<"cos", [ NoSideEffect, SameOperandsAndResultShape, SameOperandsAndResultType, - NoQuantizableResult, - TFL_GpuTargetOp]> { + NoQuantizableResult]> { let summary = "Cosine operator"; let description = [{ @@ -916,8 +906,7 @@ def TFL_FullyConnectedOp : TFL_Op<"fully_connected", [ NoSideEffect, AccumulatorUniformScale<2, 0, 1>, AffineQuantizedOpInterface, AffineOpCoefficient<-1, 1>, - TFL_SparseOp, - TFL_GpuTargetOp]> { + TFL_SparseOp]> { let summary = "Fully connected op"; let arguments = (ins @@ -1360,8 +1349,7 @@ def TFL_DivOp : TFL_Op<"div", [ TFL_OperandsHaveSameShapesOrBroadcastableShape<[0, 1], 5>, ResultsBroadcastableShape, NoSideEffect, - NoQuantizableResult, - TFL_GpuTargetOp]> { + NoQuantizableResult]> { let summary = "Division operator"; let description = [{ @@ -1449,8 +1437,7 @@ def TFL_EqualOp: TFL_Op<"equal", [ } def TFL_ExpOp: TFL_Op<"exp", [NoSideEffect, - SameOperandsAndResultType, - TFL_GpuTargetOp]> { + SameOperandsAndResultType]> { let summary = "Natural exponentiation operator"; let description = [{ @@ -1659,8 +1646,7 @@ def TFL_HardSwishOp: TFL_Op<"hard_swish", [ NoSideEffect, SameOperandsAndResultShape, PredOpTrait<"input and output must have same element type", - TFL_TCresVTEtIsSameAsOp<0, 0>>, - TFL_GpuTargetOp]> { + TFL_TCresVTEtIsSameAsOp<0, 0>>]> { let summary = "Hardswish activation function."; let description = [{ Computes hard-swish activation function @@ -1812,8 +1798,7 @@ def TFL_LogisticOp: TFL_Op<"logistic", [ PredOpTrait<"x and y must have same element type", TFL_TCresVTEtIsSameAsOp<0, 0>>, SameOperandsAndResultShape, - FixedOutputRangeInterface, - TFL_GpuTargetOp]> { + FixedOutputRangeInterface]> { let summary = "Logistic operator"; let description = [{ @@ -1841,8 +1826,7 @@ def TFL_LogOp: TFL_Op<"log", [ NoSideEffect, SameOperandsAndResultShape, SameOperandsAndResultType, - NoQuantizableResult, - TFL_GpuTargetOp]> { + NoQuantizableResult]> { let summary = "Natural logarithm operator"; let description = [{ @@ -1908,8 +1892,7 @@ def TFL_MaxPool2DOp : TFL_Op<"max_pool_2d", [ TFL_TCresVTEtIsSameAsOp<0, 0>>, NoSideEffect, MaxPoolOperandAndResultConstraints, - SameOperandsAndResultsScale, - TFL_GpuTargetOp]> { + SameOperandsAndResultsScale]> { let summary = "Max Pool 2D op"; let description = [{ @@ -1941,8 +1924,7 @@ def TFL_MaximumOp : TFL_Op<"maximum", [ NoSideEffect, TFL_OperandsHaveSameShapesOrBroadcastableShape<[0, 1], 5>, Commutative, - SameOperandsAndResultsScale, - TFL_GpuTargetOp]> { + SameOperandsAndResultsScale]> { let summary = "Max operator"; let description = [{ Element-wise max operation. @@ -1965,8 +1947,7 @@ def TFL_MaximumOp : TFL_Op<"maximum", [ def TFL_MeanOp : TFL_Op<"mean", [ PredOpTrait<"input and output must have same element type", TFL_TCresVTEtIsSameAsOp<0, 0>>, - NoSideEffect, - TFL_GpuTargetOp]> { + NoSideEffect]> { let summary = "Mean operator"; let description = [{ @@ -2044,8 +2025,7 @@ def TFL_SliceOp : TFL_Op<"slice", [ SameOperandsAndResultsScale, TFL_OperandHasRankAtMost<0, 4>, TFL_OperandHasRankAtMost<1, 1>, - TFL_OperandHasRankAtMost<2, 1>, - TFL_GpuTargetOp]> { + TFL_OperandHasRankAtMost<2, 1>]> { let summary = "Return a slice from 'input'."; let description = [{ @@ -2176,8 +2156,7 @@ def TFL_MinimumOp : TFL_Op<"minimum", [ NoSideEffect, TFL_OperandsHaveSameShapesOrBroadcastableShape<[0, 1], 5>, Commutative, - SameOperandsAndResultsScale, - TFL_GpuTargetOp]> { + SameOperandsAndResultsScale]> { let summary = "Min operator"; let description = [{ Element-wise min operation. @@ -2203,8 +2182,7 @@ def TFL_MulOp : TFL_Op<"mul", [ Commutative, BinaryOpSameElementTypeConstraint, TFL_RuntimePredOpTrait<"Operands do not have valid shapes", - CPred<"TFL::VerifyMulOpShapeConstraints(llvm::cast($_op))">>, - TFL_GpuTargetOp]> { + CPred<"TFL::VerifyMulOpShapeConstraints(llvm::cast($_op))">>]> { let summary = "Multiplication operator"; let description = [{ @@ -2310,8 +2288,7 @@ def TFL_PadOp : TFL_Op<"pad", [ TFL_OperandRankEquals1DimOfOperand<0, 1>, PredOpTrait<"the first dim size of the padding argument must be at most 4", Or<[TFL_OperandIsUnrankedPred<1>, - TFL_OperandDimIsAtMost<1, 0, 4>]>>, - TFL_GpuTargetOp]> { + TFL_OperandDimIsAtMost<1, 0, 4>]>>]> { let summary = "Padding operator"; let description = [{ @@ -2404,8 +2381,7 @@ def TFL_PowOp : TFL_Op<"pow", [ ResultsBroadcastableShape, NoSideEffect, TFL_OperandsHaveSameShapesOrBroadcastableShape<[0, 1], 4>, - NoQuantizableResult, - TFL_GpuTargetOp]> { + NoQuantizableResult]> { let summary = "Power operator"; let description = [{ @@ -2428,7 +2404,6 @@ def TFL_PowOp : TFL_Op<"pow", [ def TFL_PReluOp : TFL_Op<"prelu", [ NoSideEffect, ResultsBroadcastableShape, - TFL_GpuTargetOp, TFL_OperandsHaveSameShapesOrBroadcastableShape<[0, 1], 4>, BinaryOpSameElementTypeConstraint, PredOpTrait<"input and output must have the same element type", @@ -2470,8 +2445,7 @@ def TFL_ReluOp: TFL_Op<"relu", [ TFL_TCresVTEtIsSameAsOp<0, 0>>, NoSideEffect, SameOperandsAndResultShape, - SameOperandsAndResultsScale, - TFL_GpuTargetOp]> { + SameOperandsAndResultsScale]> { let summary = "Relu operator"; let description = [{ @@ -2500,8 +2474,7 @@ def TFL_Relu6Op: TFL_Op<"relu6", [ TFL_TCresVTEtIsSameAsOp<0, 0>>, NoSideEffect, SameOperandsAndResultShape, - SameOperandsAndResultsScale, - TFL_GpuTargetOp]> { + SameOperandsAndResultsScale]> { let summary = "Relu6 operator"; let description = [{ @@ -2555,7 +2528,7 @@ def TFL_Relu1Op: TFL_Op<"relu_n1_to_1", [ } def TFL_ReshapeOp: TFL_Op<"reshape", [ - NoSideEffect, SameOperandsAndResultsScale, TFL_GpuTargetOp]> { + NoSideEffect, SameOperandsAndResultsScale]> { let summary = "Reshape operator"; let description = [{ @@ -2610,8 +2583,7 @@ slice `i`, with the first `seq_lengths[i]` slices along dimension def TFL_RsqrtOp: TFL_Op<"rsqrt", [NoSideEffect, SameOperandsAndResultType, SameOperandsAndResultShape, - NoQuantizableResult, - TFL_GpuTargetOp]> { + NoQuantizableResult]> { let summary = "Reciprocal of square root operator"; let description = [{ @@ -2777,8 +2749,7 @@ def TFL_SinOp: TFL_Op<"sin", [ NoSideEffect, SameOperandsAndResultShape, SameOperandsAndResultType, - NoQuantizableResult, - TFL_GpuTargetOp]> { + NoQuantizableResult]> { let summary = "Sine operator"; let description = [{ @@ -2798,8 +2769,7 @@ def TFL_SoftmaxOp : TFL_Op<"softmax", [ TFL_TCresVTEtIsSameAsOp<0, 0>>, TFL_OperandHasRankRange<0, 1, 4>, SameOperandsAndResultShape, - FixedOutputRangeInterface, - TFL_GpuTargetOp]> { + FixedOutputRangeInterface]> { let summary = "Softmax operator"; let description = [{ @@ -2834,8 +2804,7 @@ def TFL_SqrtOp: TFL_Op<"sqrt", [ NoSideEffect, SameOperandsAndResultShape, SameOperandsAndResultType, - NoQuantizableResult, - TFL_GpuTargetOp]> { + NoQuantizableResult]> { let summary = "Square root operator"; let description = [{ @@ -2853,8 +2822,7 @@ def TFL_SquareOp: TFL_Op<"square", [ NoSideEffect, SameOperandsAndResultShape, SameOperandsAndResultType, - NoQuantizableResult, - TFL_GpuTargetOp]> { + NoQuantizableResult]> { let summary = "Square operator"; let description = [{ @@ -2907,8 +2875,7 @@ def TFL_SquaredDifferenceOp : TFL_Op<"squared_difference", [ SameOperandsAndResultElementType, ResultsBroadcastableShape, NoSideEffect, - NoQuantizableResult, - TFL_GpuTargetOp]> { + NoQuantizableResult]> { let summary = "Squared difference operator"; let description = [{ @@ -2933,8 +2900,7 @@ def TFL_TanhOp: TFL_Op<"tanh", [ SameOperandsAndResultShape, PredOpTrait<"input and output must have same element type", TFL_TCresVTEtIsSameAsOp<0, 0>>, - FixedOutputRangeInterface, - TFL_GpuTargetOp]> { + FixedOutputRangeInterface]> { let summary = "Hyperbolic tangent operator"; let description = [{ @@ -3035,8 +3001,7 @@ def TFL_TransposeOp : TFL_Op<"transpose", [ TFL_OperandHasRank<1, 1>, PredOpTrait<"input and output must have same element type", TFL_TCresVTEtIsSameAsOp<0, 0>>, - SameOperandsAndResultsScale, - TFL_GpuTargetOp]> { + SameOperandsAndResultsScale]> { let summary = "Transpose operator"; let description = [{ @@ -3170,8 +3135,7 @@ def TFL_SpaceToDepthOp: TFL_Op<"space_to_depth", [ SameOperandsAndResultsScale, PredOpTrait<"input and output must have same element type", TFL_TCresVTEtIsSameAsOp<0, 0>>, - TFL_OperandHasRankAtMost<0, 4>, - TFL_GpuTargetOp + TFL_OperandHasRankAtMost<0, 4> ]> { let summary = "SpaceToDepth operator"; @@ -3383,8 +3347,7 @@ def TFL_StridedSliceOp: TFL_Op<"strided_slice", [ TFL_OperandHasRankAtMost<0, 5>, TFL_OperandHasRank<1, 1>, TFL_OperandHasRank<2, 1>, - TFL_OperandHasRank<3, 1>, - TFL_GpuTargetOp + TFL_OperandHasRank<3, 1> ]> { let summary = "StridedSlice Op"; @@ -3434,7 +3397,7 @@ def TFL_CastOp : TFL_Op<"cast", [ } def TFL_MirrorPadOp: TFL_Op<"mirror_pad", [ - NoSideEffect, TFL_OperandHasRank<1, 2>, TFL_GpuTargetOp]> { + NoSideEffect, TFL_OperandHasRank<1, 2>]> { let summary = "MirrorPad Operator. Pads a tensor with mirrored values."; let description = [{ From 1a290148dcc1996be4fe08dcecfe750a481c6bf4 Mon Sep 17 00:00:00 2001 From: "902449@58880@bigcat_chen@ASIC" Date: Thu, 16 Jul 2020 08:57:33 +0800 Subject: [PATCH 0571/2522] TFLM:replace Himax WE1 EVB micro speech example animation with external link --- .../micro/examples/micro_speech/README.md | 2 +- .../images/animation_on_himax_we1_evb.gif | Bin 1063997 -> 0 bytes 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 tensorflow/lite/micro/examples/micro_speech/images/animation_on_himax_we1_evb.gif diff --git a/tensorflow/lite/micro/examples/micro_speech/README.md b/tensorflow/lite/micro/examples/micro_speech/README.md index 0ee367bd854..5b291a4d6cf 100644 --- a/tensorflow/lite/micro/examples/micro_speech/README.md +++ b/tensorflow/lite/micro/examples/micro_speech/README.md @@ -660,7 +660,7 @@ Following the Steps to run micro speech example at HIMAX WE1 EVB platform. After these steps, press reset button on the HIMAX WE1 EVB, you will see application output in the serial terminal and lighting LED. -![Animation on Himax WE1 EVB](images/animation_on_himax_we1_evb.gif) +![Animation on Himax WE1 EVB](https://github.com/HimaxWiseEyePlus/bsp_tflu/tree/master/HIMAX_WE1_EVB_user_guide/images/tflm_example_micro_speech_int8_led.gif) ## Run on macOS diff --git a/tensorflow/lite/micro/examples/micro_speech/images/animation_on_himax_we1_evb.gif b/tensorflow/lite/micro/examples/micro_speech/images/animation_on_himax_we1_evb.gif deleted file mode 100644 index 5897c43d5a20c735f8f6fd086720fce083ffb30c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1063997 zcmWh!XIN89*F7h_5C|m@dT0`=p%+mTYN&?ZF*E@wf)r8ldJ+-{2nY&D6GIgdH6SQp z3!x(_s3o-s9ovZtoCF27|!=0RTaUAcI9J z5(s1hMoEF7u1HW-fy>G&5M)&qWEE9pNwNxxvP!Coic0DvB@I<&l9HO5vYMuv8cCHb zE05PFV|A5DdTJy+4RvEp^-ZcOn^lSWI+`YW8pir6HdHk$6N0j`j=Gwjx~hS)5>-!A zm#V3^S<_fc$3$OG&%{8_%-Fy{&&1fk)Xc=hkV>^Q(6chtw=^?xFr`{qm~F8#b+9%x zGqp4{vavF^wl}x2wzAq{W3|=M#@5qdU-HIf_#Ibf&wGLLj3##62rXm zqnPo*feDd8snL;1kzuL9to;d|Y4K4h31J1Pk-14>xoIIG;Yq&1sY%iC+0n6)@%tm< z(o=UO=A8X<2zOv60)HO^YJUi<9h*rqeod ztV-h53sWPCaw3lH^=;qhU$if!FgLsKU^egI!6SzbmK?|}%T24_v-4t^U46CAM62Wa z5Au(H4cFK0?%Xl2=d(mdcMNp~locK<;q9vt?5`=^+g+VCa5`tK!RPilzuOlw?w?J# zd&lS7H{aDq8P~^bcct-Ca|F5hg4}{*Q7MJRe-&2!Rd}$l<{+oy2%leDTGUuAC@wkC zP*Yc3QPWXh(%DdZuKrk0YuUi*(w?^JbFGbMI!|0|tL{HrbFsZ~;9TSIx%%P0lb3rN z2Ky>&>O0$;TQ4@Zbhmd7ba!02*m1f4@`ZE#^_>G#7t2Nm&%V4~a%;T*&ApR-LsQM& zBa-Wqq0tA|ZoRxUdw2ff>XTRhtgih1=H2(-8ynx&|NZcJ{f%_#P@#{`4+O>%005o; z@P2MVfi4V|o25Ao4*uyf{nLQ|rwM@B0N!8t(}~q{dX) z?D|l0N85M>#U(`5v#-6TDzLjfXnJp=ceUB+sV0X+Pv3g4cOA zuAKF03_Jb4U2+*mG6{OngY~J~Of;q|n8-@81|P&3Fbbw9vZN4+eZxKpB*F#8P=0-o(vUVv8K&E*20gVXG%-g44>|kCK^Tu zG1CkU_PGa)-wzA(53V`jJw;jx_-AGO=?M?4O-MM&9W@V9EMEsD7+vX^vMKQ}K6)l(O8`{h-;y3F(5ZEoJ7P2S2Cw`e zwo*g&`oT))=J5IOYknRCHIF0a2h`FG_b2>)L;q2Jir68}@%SRPN`SWoyxDT-5x4P9icM1Eys|r5PsN0kbzWKN7VQt=_ z#*Vsf{YyUBcOB{^jNuN29dD+;ivHQ{zM?g*>9OBLAw6dN;rXm5KP{w#AJeZ*Mdl76 z6eGNP$6wqVxn&>t&(^Ee^2{$`K{26^+E0CdaNR+A+u`LAC8d5!)&~l@GWIE7ncBrk zo=EXM`|!R(;+4DBZ*{M|jBXcCFrKd)?4N4GreCeqqtwjocj>HAe0KRNU2Wo@fnMNQJSt#mG- z`penPMmp6m-!Yo0Sl+4k%fy2;38{=#{RY!vpBh(@<$l*8=5|RAwp%e?O`nJGnmU5; ze|)zt#C@*aqWtoe=#RAZR%ZCceK#BZJ7qRGJJk|sl=phRepDr0O@l3OT4v;XLYehE zgUp+EU ?RNS1p}RMnYF|3Yjwu=n!X41QM{J7q)}*@YBs9#YJ9Fx`}ET0U^>S;B&KuFyEzaS9yYDs&Vn1s zpe+8^4%>FF!O$StIVv9cIINHjSDT}wY`Q=~91*U!ScBR38<79{6C|+d2;DjeA;SiU z8gVMaQxfG$p;n*%U61itMZceRSnwmq?i7-Tqx4Jr;g8~?~GKElgD!L+_TUKO-GC>+)LwN|w8m4YeY zm~Y$1hU?6UEh@uDaQ2nl57(tgjrS+c^+@mGL%@UKnRA;^xoEU@l~QN& z{O@U|G6I|~&d;YE{CsO?0*t)vafqS(&3LUFmS;ETexY`Kt*H3@_sLI9mS(jwUwYF- zwWcyKl|d2TDx()YbeD}ZB0J;k+0W2nkFL7ZiQ${*fZ3i$Gl<16Fts@dh7kkm>^&Pw zVhYYbCr zz4+^mvskRY>>heUQMD(rq%2wYr`NeQ*_sPCfYm&H+!#LwCcWa@eo zt_Jzd@Y!FpcVdp#3}-}Zq6fF|VkZIY=~a}?FsCA;DGoRK>=S}aNrzcOu=tN(PTW{p zF}GvG5-#lpZRtFlY{{)XAIBO~EbtKm3Em&zpo@fWbn+qM6+k3?{ZGeGeh&42`&&1k zSr48+gqLhz;}nMMTYkW|sQ1yEax+b41bCZ`>EAx#7uxhT_@AuJ{Tt;<8*xe&FMhwW z_i?D$cfY?jWY^tOEo1uI(&q|`uje9-StB9?3Yet=0d%BMpzhG8kDtdthyY?@%BRaW70KDnCIHv0fT7g1iXh)Vm*JNgr+89`yfU(e zAbvdo`@W55KGs`oW<3)AapdlSkN=|?B%eS0;O7d-CRL0!sHDVveF0OHq{#WVTdQ-X zNPVLH#FJAvM*|D>y~lTlqI0`M+ujC;+m9{B9P{y)BoXrGLevM@NKGk(o=HuF11Dh! zVB0+3ufQCA2&N>pJfR7JCibI$`ySnJXlI~FOkgPiD!Y2W4;@wCn%z`hMFR&FomH;P?K|+$HW)6_0XRyUswkU76 z9iAmdMy(+a)aAXMz-4|%y0F0C50MA=VK$D~*hO$i>;Gu%K4$e>hpcB4Ag-n~@S|l^ zBI>d@ACFmwi&3h3cw%L;5clK1Rlj z+Qz(0OpMsXXa5iI$M6Xhz|B54>;y0PL_uh1ZtP{im0IrNQ7&5v+e0CEJ>>g(5Z=rY zZqqR|I*`EJ6`xmeH}6o43Ln1)sIcLwC-QPINtv54Ip2{xrFo}Vhu7`!zM>=w3tgPQ z{k@W#Ppd^jZ2p0H?b4)xT$uGAe`Mtg*?0{`o(dqIP?gh{KhBZY$8bB6RilMSy)&iR z#3K&zd*wTKq>w_l5JS^R8}L0OxF&Sa@!^q`h3vQM>CT2_qaM7V%e-W2wc8Xg=wVs# zVwtxf*ZT)Auo!d#u+JBBd?w4iR0w}j;0em*cd6yiFICK_*RahrJQsi^Bne&+aj@^oj@J3lmlb^xnk5glv--@^+4&=gX!%F^ z6>nfhY@LQF)yB`(H~VAPEYY=D)S7wZL7*Xe4!ORL^cNz7GZE|UScVX5D<<0t3$593 z3womy`44uYwT00(^#@14H-;(m#@2I}RcfCq^8-&DiZiTsJ>8@Pr7zkRM5k0l) ztA*pXJwis!A=yL}TU5MrE=KlSg6&+Z+1rrEZlQyojX5=qmsQ}GPc+&YfpjXUE3AmR zl;|ZWnEAnb^u26$y6H}GZfH`w2a`WNUEq_6do-7`FefX0M0f%bmgaJvun2GHgj6XI zXIOr3SCO=~d3mBa@$X$xB5bP&|22gWK~eUoB)QWO(G<)@(a}@V42lSh0HBQ_A=2?x z&^v!pgr#=wyz&(#dSP!}PHQ-EI=r5`-*iRwLmbiXq7H1iWpXcaC%yHZJANRu`~3vg z2nRFY-I#9EXy)kO=NM`NH4dJM)~L*09*^GYh>%WZzr*Iu{4afakoTmx$UUXnn^``4 zAa{RldJwu=mIAM!6PAVui*HXZNl!j|loR||Mgb~&Jq^4X23|Ytiv5vGVj=R@u~9;N zcBxUJjxw8p*h#~mTl;IzYlJ!xdGG{2Oz2TDO(j|U-VIA<6;T> zR5iX>H_2yVK*p(FbcN#1)Q7l%Is7$1MmMtW8Xcb#x=)MVcqwTEuJ{c$lmg!@?K5v| zG+9e{_O0>8kF(t1^wi<9>5@ZH)9ta~xXd>?o|d{tS88M=rLPyo?SM zA+@&P2iEX6*nPTNWUfKO*J$|M)7T!zHiuzYg9v7&e#JiZ;vg<2FeHtE3A>BhHD=6T zyj(MFnB#u)kjqWpLJI#uZI2T1oGb-QmC8J;>sZ+UnioWb=W8ABp>xmagxeIj>{?BN zdwK}7{NM5Hmk*F{l~GYN-1{}zArUfgZQH#pjp)SC|3%=xoz8GuLk_VC!*qxDN+v!u zgIEczZ2zEa3MFrE2wiVzMdF6j-%tA2`lJO_DT^AtHl#m;(|LmLqxI>&!x^MWbZ_Ef z#W57x71~+sW?U#f4%q16paJOlZ@8Z)ZwdA-1*8Gswtl8gg1HLXJvvg5oem4 zjy}%4@?_wMwGifbHChg6&+klpZhKu(3QR5m@^b)jt!Edmd>&i=@KTe{U^U}M53}*m zt5sMg1$+NV&Z4+H1~+j~2&#&~?P8QuW=y64G+Kk@@9Ht4fGTTX!QA9G5BwEC*rsG3 z{8_7X40}b2jUu92SqMLx5bk8@s6Y5}SyQ!8Wz)A{+`ndVBBrh0VF%b761^pSx9Prp)407!!#&;z3fufKa&)01+VV+=i8w@~vdx^;xJ7u- zLJZ&d3TO~#AF7OnD36W#j71LBq%*6hyP9tO81S3{7TwE2UIWNAK$C?mWaIA(3BkkT zJ0l1SM2t2Yj7`VhVq=+Nxb+%fl8J`qPyzqD8Z`;JQ!q_L)LYwq{%a<#kWw+izO5SJ zD@I-iZf5;L_|ovO9tFtTn;WV0yU65q)iDQ`Y2t1QyxmP9%^W-CD6BEpJ~@J0M#(Sx zc zXg2Cx`a`2JKku<;?$ytV(r3xl3q4JY>7De|1v%-03iQY0MAx#%7c(Ese8*@?A9w)R z`;~rm=ft`Lj5$R55S z#t#Vb?_VP$h)7=|^3%;f%$E$TA$x{l&TP1^-$ko;eJNGwLln5C5N0t4=TWdnA%r#& zJqTP6hQQDV*)LNP-=_Cj3JQB0+3Qb1-h@f zh6-*XJQfn>**FUbRAIpjX@r~Zga!6Ft4gqnNVp{(kmm6y(!bum#_hh0{Ny@VHe>&n z2cl_h@#BFxS0ZkhetN{sJ7_+vxI4c6%T#5-i=a7D?%t5Po@t^4W-6T~&*+>xnNnO%UiA^Tawx?AuR%5lkuh77lCMK|Q$IhorKng+lvSnxx1 z!ksy^HXVJ3_26jbvwI&BeF}x<1LO_|=yP5bP2-KVdKlyD8tYpec9SDaiL2o+Y77Gqz0g>T=7QHne zFJKdqa(^-ZiA97YB7D~n7uWDT!aVj|jOZ^yws_9Jk2FGhk@k}0baBwZ!V#BR5-~>J zCd_me&3{VMVUGb_y=zaSC;y(g%A1SiKf6)F9$K@kZr)-!d?%pxc zZ6VVg$bxTWAt7mb2Mew__wf)!SOl=z5HkLwOfCy>I*=$wLol3Tu8}jAYhjjBGNv8& z@+=Jf6@Wl(=MA?C1o}h_`kXeC08%?)|1~Ya|R_RL)rFs^WfT+&<@F zYn~(MjYF1eTJvTw&PaibtQ^KEQ`m~ua0>o>KWkaY*Yiv?WmkS)Rno%3)S9B{LdIhh zf+iN^n5z$D@4c0n{d$TZN7git$|o*&_(KpFiW>49jTDkhuA^h3Z!*KL<5+|ZB z`AmFfo^y#7pM9C?vc0aZu#i%w1}cu<#f1n@^X($JAcs zZ&Ht}6~!J6(5N4Q8E7!vNKx(b3M#?PSKrl}qNEg*1MAF$_jp>#GAvTo+khCXk&P-u zcSi|1CN3FLP>nMEnl?W3!}7)E%bYSp$M$u4kpZRTwP-K<8S3?9&&Kkuh9sf?7zon{ z13;wGA_Yeqq})FqA+w^3(<`f#6bX*yzrg$NdZ#aAJ_y7@axx?Q#;x0a_oIyt(4DbH z%o=qER!yC*qrrpx+m7$fzXLJa+|`d))T1&vR~=r^KV6Htb+GN>|0?-M!#E#r>e6Yu z>V1__hrp*N!x8=5>&BhZOC(-1U$>o}AMZOa2k{YjD>zC*+2@0oniRxmB z)PD!r*vPQ+6CGpS%cz+vnSTRtWBrt8^@XENEs%lx`-$2lts|ox*y9K`xAI=;3?WJM zxvw$(dYY=m*o;oGS|H~0sQm7}<)Zk!2UZRPZE8=16Gj_fDhqG=uwjHs=!Zaxq{+Nh zJSQE$FP`I5oftURRpGsB70kvLzl))y9|V-nB-DSmYdnO z<*L1p=%|XfqgKrvzJXKWUklW6?F|v;W$sJWy>%mcUGwDuIc#i~{j7JxXY(MYuA1JA z1Swr>Px(56fkarFd>SAFaMg}3lhFuv?L`^{JCOw;@+l=hb6={Px9zl3%AVd^H>$Dw zm-e0O{ZWePs=NTCPp<9jC?XgR&4&%_8Kx{9rNqT^FR#U?-)HL5kbJk^OYGiWjCwWZ41D5H} zxg_1_x>oLiWn73-j`F~quF6>vvItgP3LjU(L{SiGb@JH5f+DL8Hg9_+*ZgT+krl(y$wj|vgWy54EAqyo_G1+^NY8%Mz$za?IRaCA2N80znKKd9 z2b3>gWwbwEbqCrUZv;Kyo1ge~%wf2SkoGlAePw+j^v2t|T^h?E4uavh20-%}$|ne!NXBPr6}lz&#v$0Pn96qiBpex%<_E-B)H8uc;g!oI#2(kn~UAfCuIpo%5D9JnH8h#N#tU=0Q zUC51~5#OfSp)47bf19rVFbnajs*&se=cLAzG}DyHcy+AHb^7VOtv_bD>OBzOZfTZR5 zm0-u@U(H=jdgXBS>3SimbYC4knT|ZPe#|&it$Z&%4p6QG^@kbdZN_TEdze@9I104U zz1a*%rsFhP%6E%Dc`vaHg5|R7(lZN_@P1{Xa*G!o-FoDdMPk7GA4mzqfOTdZ0gc&( z=1w0{vY)!9+xC_)_#?dFf5j-A$dH_*qw-)8uCNX1qFQvk?+i?mcyM+!bi+S(%ab2E zRg5joj#nA7-nB~#NL}>*SPu9!_PGP7pvnkJ!X;z}-`rK6ZxYl(j~s+?9T`WS7geR% z3P3^>y->K;am!}vAK}^MGJ9GqHiOFdRsIax`Lt==p;hTzM+nej;BF=lM6G4k1$0Se z90Dg$#uZ>^Z_w5dGUBQ;46-WbaH@Cg0}QjZE*{H9WT zm#cgp;+qw;JVGP&Oq%Hks~HN_TmY^el7p+>hhz`>@a5UGLcOtN-}X9Cc_Or2zd$T6 zZ^Cc*DVl7K2V`H99X{7EPFRVBSfS8cyJ%Z+%3}M%BQg(d?gC)}%Ap{yEi!K@*zNy) zN{oM6WUq6jZxofoDw~)aXx1~u=Huj`!X#R8vVhmqb!@2Z%K7Ch*IvMIkFUtWK=`7( z>1W6U!%v|ivRt_Obf*_=uD?S?c3{o*WsA4@6`I{nBdQvjWL;g$)&pOz7TmL&KWn)& zw(zb6@$g%if0*W-zmX;BAz9&LASjbxxYZF!5v zp5>PvDu%;ruFJzI$4U94BYArbNm3_mQ?q;BeK%DlM(Wr8>OP79QT$9kZymXlh?-v% z4*p3st5aiu!b)b@3#!5-}i;c#>t^f3S<-rkjoR~2#@ zG-TD!?(i>;=i5%T?vSt{z)Vsn*cWS6@d)#EDOc8z4e zbsw3&Nm0<%jpJnTjuu=w{e$UY+$LvAmE3()TLdeN2|LMw75%a8lv2@|zt6_xPs+UY zP+xBm-y& z{^X3??+AFH5S5f)w96i8KX<`t3U0Jm1!IBsLKqHjAQM*Tm5FeWg0geR+s@p+L~DQz zLV9b3k#(qOa!WM5BWT!sx1bQmykk8Lx>1V9^6MjX(QaLLh?Ie?st}G0!(o8Frei1w zz>w=v&fZ3}InPy4j|GzoO$>lu#L!Up-xf0!FN*wgaRvP@e%zg zpxhemGzclJ7N)EY{ke<{DtR7k&c%&N$gT^9$|?O%S5{vN$yP#_*6aFBZuX_&v6})(B(JpXEvttuAaFjYfOZ{ z?sC#zpU}YQ>Ux3s#*_B~bZzqs4;lOaWcrv?S{|<9k||t2(S!W{c~0NF6lYF5NR%U% zs;v2IM}wq%kOcV3(P4qfMGh=bjk>6o&tr6fH^f{99flPHczdpzD0J)JKvyLopI^G; z7*v^63W)qRF3yR}4hfC0(dfBt>((88-wD#D9X#1`7we@ z%je0mY}wLiVjcWcEB?Y`RazB-s1IOh5Dh@Fr@~=jkg(hlwH6atVhvKu3{EiMI!vU}kkfFowRx9RJYX8hC^5}w*r5jK}f(g0- zfG6|AFh#U{m<#btnlvtL3MoehWL=)I^SMbh@qc>4;4kpza|+LpDA|9W3BNu2@HD2% z^OT9s0%{QkUn$bZ#3rXLXyreaGIuW2>1aLa`4J4W!1P^3=teR$#7O5!)dvM?&guRh zebI2ENI0woW>WXZAfPUlYiQqvEMZ?c$M=I^x@&@K3p|x6>Frmu5wB-&&q~RZ>!fwy zkGd{P4a>3ym1*GC-$zuoUDBxo5@pNMpBx{&87m%D_;uXTcm&W|FMOS%Ra>G+>uvL1 zgBuGt=)tE(8T?QIpSCVm8!Y@^Khj$O!nf(FD_MysH@XyRM&oN7ePbRjAFWX;GUa5T$M zKOWBZKU$M<^gDAh=e?og0QZli=y#rLgIPvnOAMvl$1nJ(jg*LFBy}p}@Yi{jRiC!C zC)&TArT<8T`5e+*B>OK=k;?gW43HZH2y=i8YnPE0S1b+#S7Nm$j`{OnpzML)0ieU$ znawHadS8QoTZ(qoA)Ho0QYO@^feM58w8}SImm!%;oSXJxax}=y9+A#0V zi=tLJST>}C35E;!p_NG4Ht562mm532y^jofhr)nJ8rNmLIC_eU*qAX}hpwhnKM&h! z1i+pYUBxa0cMww~^);Fpn9k}=6{_xlCNbJ15atpzf%?H1mFDk7#qkfKxdC(C|5*w5K(y{Js1pKIWIoRk10yCQ{~$C37)i=F^5X~u>w81o#8M?DNLmGUJ^=OiZ*APg_Sc@F+JGVO`$d57sZ;idy#K(J+HAC8IFPP?1tF+pHJ^ditdTK(Rm7pyBO_$Yb*r9R}rb@a+^ z|F7%h!@z(lLaJgkt+&WWtP z0b_C;>yWW)@1h4$scXp7a;R`(Q7jo1E?SEv2A?keiGL$9M*SNG{CDAhsqgIoRW!(k zey;uZGjuRhmza=ItWL>P|8XPu?JCDwYMNpCzmHv{GND7=T=8<*u66qS=SLdKXwar`6X&CyfB-nfLBD!cT@ zWjZfU`_!8Rb$yk_7S79Qm#JyK;1uGOH_^hnSZrn?R*_5;ZqPV%xG45wX}U;g3c_G0 zr7n4F=)m5E;oRd-($%Af^M`U@hNM5a6fNc=)jLfZ44k(*Ddn^2u*S$Myphs@cMZ}o z5%i%Ucmx9366w|!He0|6y^wIIlfLZP6WW_F0MqOY7R?O!7f$`&oqg7DN5qKnw)Z7< zS~FE&*>_#AL?e2q_X%e{j`;o_2Z5Db`&U+bO9p00XAOCzFdWx?_}<*|zeOQSVeNxi zddj~tYE2o=0lT~?S9uEFo5zVRKHIOgA7^eE7v95Jx=tPAxtyD0pglMMOx~6G86i(8 z0Jvz)E+Gs>cFGI}6%#sYK!lG-Tt-+NgvvMKS1`(9vKR^-H2%uL+dLxIKa9>dz>?Wd zG46}BhfQtews(9iC(#x`StWMIgF}wkykIHcq+NsnwdAfLTM$VaLzRK&Rq0E+caWuZ zAc^N6MN8%aGN!r8^UI2{$(!&PFWN(RHl22QEIWwWax(8p`Wu&h41wV@*_EL=2_0{w zaBZc@b(`%8#$G{YeGTsZK9sY#!$v zzZJEAd0d};Tpl%Rv#wTuYs307eJ|c-$w&D};)AG{-f0P_;!O=oG%*A=fd8oO9;Ina zq&!s%=9r2Q@u$JVZ@;?|Jjgmx8GTN@NVy~CYMR7K*gQP1Cgkl&xov=a&S|?Tsbs&Y z(9uIVP5AcfnJ&lv?jEJ!*i&v9QRfC!GwOO>iI&yY?{VR>$pI{MbNhGC&9A;v2-|)r z={&I@C%Ve}X5O7P?O%`fHtR2wV3igf$48bm#6lS8B7Bl@znpJ-0AbvGK_ZZ{g+U?#kA2|wukq}m!Igp?|fqO_20@e>6F4tq=`Kfo+HGO zv0Y+Cb>p32R&RgYM#D@mueVUiD;XwHgd|^h1P|Kq#kM=kJ&2r|h#AeF(kx>h-d?7? zpMg2^zh`HZdpYB-UxS1A>OX=xZlv>_^0w4iw1-~F6O=1lW$hIMJyXL2s>N|`RK_G1 zl&Le&H7wvL{`Hb?5;lU55wLj8Qs*zt$6&@{1}aHhh^16Q8YhpKZn;|(-rrg?6?-1g zu!m{sf0o&OAG!HG70vGc+|JH?Z7w?NZo{5Dk$PW}U;Fu{x!-Bkgej$Z!7G4f>RkH0(zmG{xQ8 z!a&`8&5p4{#{ZWB;rO8c++0V5VN(fb%6RHXvde40Tm7EA# zOXgoB@shx-iapwU@j36pMvlEzckVIxcE}jmq2LWMuh#Fv3C7f zvr7A)vlB|a`z6o;#7cl4QnI;JYht3(GofBDyQGFL-I!9a%ntQtO5uvrubKVjr%ZE7MI!X;We3Wcfj2A6L1D$fsUs zAq-t(727DWW?}Osx=JAF$RePx8w=vA7r-q`{#K7_YWtgRnjG&0qSQs~!9PhE6`B~Z zq@jbURToq$G>@&ul}D7NsPOW+G>l%{9C6R7m<{)BOCy?#bks3Q>BuZ9^GervQP%b4 zRQ4{Pi-Rzv-@EY=qX+Q`rT1X^H`A=Q>e#s)mLz=)SGAXqw6kTy6x*R|cP3znJKyyx zt7s>izooUs5hK!5uUuDOt*t7r-eBsl1fKetrm(&5*g{SGFwZJkUti8?uCFMuP>#9J z@GkPL^e}9s4ch?^Z#7WqoWV!@l4!nXPi{|>iPk5#U|C&!^DR2h?ZfD@-4Nrj*(CU% z7agXEc?gf$j=-*GM!HBLguU=z*{~@?D~LhZ4HW8=b+BPfuEq<9Z(q>TJ6^&kJtWJ- zMs?=pb<^)%8JwWko64kG(&_u&{B}tH{G;KF4{btDEp`eeV-p5btY8-kEdRwk`=>d3 zOg7=~v|?O}{qr=?_gG@&ev#bqUOIz`a1qX&1?Jp>Zn=kz4gL8$WHh3{V#o zb*D%Mzhuhvt`1n*z;Fl@Jj=9_PaSW38h}}lY%W7N2}Vixs|Y@vn`DNPz}bv9XIj8e zY#KpXkLZ4d@J$$>pgG_Bu7vl%(XD6&A5gu3r+?B*bB>`!lxPzr6eS786=pyZD-sS%O9$jzxQy=Yh>;piko7C-~AOy7NLRymD>dfey>j4x?0mjFIjpX4uMD+;&=KXW&W3oSo#2~5U@ z9FwDD8dY4tF#9So2V;r042qPaKE|;_!88GhHXxz=8lli7dUR)_EG6qWThrOlEXU#Q) zQtr^k8fb(5u73KoS?3fo00@h~^*>Y`-A-nx(m1AEz)3Kn_!UT9);0)nnNI_%M;F9B+})^Mbj`IBxpf-+TJcX={_UFQN700mh~`u#g!MU zrx+Z~Tm%$E4=y4V4BRLG0~xupxAs9E$Eu1mek4$j>3WnssrF>|M3o2F8XO4WDCv(Z z5Q$!_71J@pkh`r+jN887tvkki_c7*VC<1;LfYx_YntJrgznQi!F?StNcG8ny1k4X$ zyB*JVEHoB!jSE(_H(VvU2#Gd=tMPtBp=U((t5_8PAAEjNGDas% z0WZ>S!kqvlbrB8#KvYF(F30?pWLHPorl^taPK6HE5?ojUNCwJh8{Gclm(ca zgEGL(2bCe8hTgYb8$+y!sbJaHEWp;Eu31-TT>v3_nmP$1_$7eY12l#;q(;p)m;!JL z6q17knEq)4*QHKUF}d3fuu1=KABMflV$F%Mc3Uee9x1a zg%%5rG5gbILj*L(XN|YGn-{`Q&cg&uj{NNNNHtJCnZt{SH`Koagh5fCnc>!`qhmEI zOJ~EZ1EJ-2eFu>J5oWb7FmO5m2sjSbqcTc#1e-n>bobZ zO+KxQ9c%Uy7uetO@_%1INgO5_4_hQ2=sun83Ci_|&%WkPD^wiWPrq(aDRWZz`e6z+OwK3QG#XU<$_2;YIWsFG_mA9>ihsoVaK48?xkIEX(X0y)3!z1rod5b;t zTeOuco-HI1gG3oo7#q=Ift}yS0wBN;diLdkDme%9dB}(9%4qt{osaWIR=b5iohpku zZq2ArZ$lC~E@(W&RLo&u)&Sxa;NUtcPlZrR96cso%}xxmQkISZ&)&`v5e#)9A%f;R;A<0uLC2&$D{0UgP8w z7zGojsji%W*;`D+h^m<5co$Tk;~23vX)e`RHA&RFOuZLxp>r6wOIao&iD@P%Pi7GG zIXBZd<(5BO{sY-u(Wus5C+KU!)-(( zGH!In<+Do}NW`o_!Ftr`1=rf%#hU42?K*0-1YriBo!ACpzK+;i!#DXjZ?cE&h<(>?JmxdC^f!NmWbH%Yo{@6FQJ9 zb#4_wUuEi1AX~h(nS*TNST~L8>5Idje$aG*5CS^ZXMARv5gWNlN6OI!ob{btEPp{V ztB~w3=;H>Y%N*5Rbe%aMCcffqYJ;{jpg#Z!_~#vIX*Y37H^#CT)D!O)E59F!#&apNBcXT2vKk|5 zS6?c)ave6dxmb5`wZC$#$Xq>pIBAaHJxfnGII=%D81qfC`Mz_r8+d9z!|*kolrjU#c0Hf0*$aE3+wq}@E+to$Qi;lnmTG`0|qv$*w zn!5fren<8og8&Hv-h_>SVG4+N!!isdOc7DTMnnvXii&zOfQX6_(ITQv01=d0#8Diz zm!-II<7_}w9JOfGqU|qz^Zo_7CucwBc|KoY2%TmG9fp;&!l8irK1Kyf%Ef58IL|-A z_p`)VnlJzVm+EjoppRE(v>E}XwFiSUFlgXDnGLzi`=>=_CqDrWY@D=AIz2M@CUxNx z(kjuD|M|MLVU(`=#JaWa`4lyzu zXQWK*@6^Ih=%V?ynaZYLUb7z@|A-y@(|9rUd>4K9`pR8yaKLAfkpcxYnA;{f^Uu)B z@eTg`{4DOahMeBcA*IOV=sM zwc`P_Z71itlDG%5e~**z4J3TVZQWoevE;a`a#l!q00Htw!;v7^?s_3}0B2lBwHV47 z4(~o0PI2GF_#QqJxY_a56i(^2+15F9MUs(u@Y-1se18k=4{5~odrB{T z_jDsPYiLjKgLRbOzK4<5Cow%0A3m5Kj_w!<_-Ef2(_osXMvsDfr1uV8DIJf0 z?2g{*T7EZrLfx^rQ`9A#y2zae&Mse7E8X}i9Mn*l6a|ud_3>WY=%UYE z2KS*|4s*(%zAPR0wnXM&z-}lqM&2*MrT|rt^o4!+x~DxKn~8Lu$f+mmMgGpcH&+BL zG$g(BTYh`RKJC)(#+o|!xO%5VQ54PUtbcKj{=|W3#_v5jWqM%_i51))8&&i9$o}q> zo?6e|%D4CGY<;uscE50;TB7r7v}1!;_s;nA^7an}gGcWk5g`uzGTq56nDX^wM9-?- zb4~r*{`)T?4&7$$HmR(R_w8_^u#HzZMK1gM1UYNqmU{|w z+xAmV^zs%{N+UTglhRkbka2OPE~e=$@rQy5-RSOujfDV6aVf{u@$t9Y=+laakDWL_ zcJ-YW(e)f7J0Ew7kL!p2GWc9pm3wwyNtgDh!;Rl)ds)a7X!1r=4YS zsxuC%t}No*+awIaF!V z^6b5dg@RibzFc3tcW+wY21rA9D951PcKn#R|3gBK&Hayis$E2Kqop}*cQGmStz`f3 zGZG?TXybV{8T!HLjgkRyd%1^Xx1U_+cs8iMgHW67a-wy=Bt1|Hs$iyJFg|I30IcX%tc}e7gG1`=SnL9;-2@oO7lJy(+J^b6R8Y zwT_lbGyz@moSnCxXZHJhWIA{Scg5r{dCiG^;Q!wL zWxSM0=`dVb@J+-2`Q17=^!a2KoBJDFLt70=D3QuNAgZLsKsG)^t--W?;df?8Z*I^VbYEhp|O>t`x zq$xEQ&fHf&3T_tc4H|T>8{H9r3xj{#BcXZgAVHUqxK$A?K)NmnWAql)_@J;ze$H)? z`^-J3xN;!5!MCt8PARaxT!Od>dyExMev1yI<=fuVx-9U|d3ob~!Q6*YEmMbS%X=1oUfCNhziVNjZVc3!CPs8S)9Xe2EUtZ zaw-8LMNL6&L8HV(?HK(}P(t)pfDEaeIHLy}YcQLq+~^`nrgbzygP8ay=7wL7@NM^@ zxIDehJEB8P=X}fXH&3!2PB!3_8LqnSwp)5^1CM?W7G2IMSH4 zExA5CdL=O6?yttPS6{VT4y=YG&lmb>$lx-W*@Aamh=Fs{|n{ z>%ZpsAXPcrK8<2FZ~{VuT9{AnxDlLml@am%EU7k^HTJ3NT}&sr;_UXppf@oIC13W~sTmf1 zMf z4rBwcHS)?iMHD$C33vBMJm3=B$Ky|`|Bz3@LG`1Q5WTOH#|cf`(>`mTy`V!YSlunf zJuQa%oDTJ-5a!j9!_dhuV|8=lKg8!Or}U*+CTC3_m&Z@c-a|2tXEh9Ndwk-QrK!mM z0B|Y4Aggl+h@>Mhrv zTuHd-wY}XA_Lo~Xh;e0RQo?;++|2z|5C2JAHONWksfH5LFu9=n&U48+0OQj;# z5$&bBEd~`oSVVTgl$ZSgiZrN?tLV`&$;GxiGB^P8hx6)ToB^pWNt%Vo#A#=Kv` zjZ>aDs+kiLHl3pUQ+|2}C4ODw4~u%4cbE1zeSU3g`ncnx)cA-~KU@Mn7jol`5=K~8 z3C;VhSD-k2kI~2|K*KLpvm@P746Dk5V4LJg^j@wWav=N*Pe;OW^FfB+fD;4_666r4 zI^+H~ggAk^r<~BPmcdp5ZuqE0iqk0fo>2XKT13U#>7vweB7Yd?Hl5HG)I+2j3c_nP z0s=uO@wS4*;>l7>&0Fi!ISOoAbjr?%#bcZ!J+RP0!hL#-ICPv_L1vkMD+A|vXK5aw z78wH^I|=DKYTHS~BaQ7POq(VHX{wL34mcXw)#2T76f86V+C{CWL|?{z0)EVd=O zzrPLJo8Ly`&HVWF^**~l_iY5I`w;3zG|;H$KG2iS?4W4%)H9y6CNb@d{MxvYTW4U^ zUtxchv!>*>t49SEqXIi6>Fsi2_ROzx1n#sTWuSj2A@EH$3F;M?FqnakTZ|#KESv0` zx=COa5`GiO{>8431)p%rcqgLOq$lB=eR@{0ffYJ#(JbMtd+C&fx$VPTPyH-JOw6(m z{SnrnRjcH4W3RQ@K{SbT;thf#qeY{T`vJQ%^3J00D9i(BH@``z_qZ|E1YslPi;0%{~M)uUZH(LX=x}i z7d5w;nJWJ~-bmoDP7cAD1&Bp^FFOPkxgo#UA+Sl$Yli3)<&r8q4GD*$Az%wdVy%H3 znnfPX;~ryxc}N`XqcotlJ@3ewB@h7x#JF`;ENFTk0lMI~eWBML23nj%ApKdepqKO3 z$N~&3`A7E|24WdTW%ruAuF`hZ&8 ztNicc0^p}Uj#%&cxgR_#=avodTL>SDj2%wV*$s(tUswBlO(M>Yrc|K5B}Uu-yonhd zllfINIWPKWX7q~uz;&1GHprG^Msk`b^(oH2fY5N^-C=00U_YPS#4j^{{$Za$T*fA|PvuPHi z;V-fyX7P?TJ7qh(?+x=m7Fq9O`V{G?u3u^--pqX*=^4-&5Y4eX-m{auwQKPSg8goM z-hXmVGe8Ud%VzkCO_FnJd6s44DRz4=@41}zR8Jezug9~A4)chac!F3?%s2Pv{rFcM za1c%_5nh#nyvUzfX~zh*czironu||02;# z*YEE2xHJ<(B5iW|Y$I&v^P*CDq(tN1xAPWU{1{Li{qan|yel7^b7B|gkvK|ImE<#%hm|xHret*I278((6+nVg!vFZ0Y+<-odz+*(;$z24 zcI}O8@)7G+7B&X>t_J!u*~D0U!HCBKspmfaNuO2Q3bu6F{XIthmQC%8xpon=F3PeA zvT-mf?E@ve-1Zn|@Bb$Ctgm`n8%AE&QW}Jld`FR0k?gY zFpb&|Je;aJ&PM+{qT^`rg&7K)6-&uZArbRDve~QaG~xiU=2XlM$}Wt zO}hs1QxA#tM3ID(jxCHSP~F0)@l5txjQRi}#R5l+{!lm$78}_EqS(;G-cjt{9j9lW z(dEMB4A*@$KRby7A?T<15ZKu&$Il8TNB z6cVfW0oy4ryAoyhA)&n!Xfr&@8JacYF=*og+V>eOo=fa}vusTR>uoO>nt#gT{%f>; zg>+CPG#4fNE5^3SBGxsMj$<@`C4A~(_@DJ`N)|0v@)?l8fkiu_$xx&b&}!1_oIxIT zv0Scf9O9S&))th5ZEzj&g)Y35bc0v!fcB3Kf-*hVLvsCkGuRefJ4fN|NIhV=qgQ-I zyanHLs>Y}Ok7Lp+#AR{rX>0+Y{5Ghf=i6N8LI(U&j258$OhezH8y-bne5 zyj%`vIZdA&F%uZrwC$TJWR6!VDPoIA`HI%65=gDzYptthd0YXeyl6 zrcW69CxJFdDtdS}YaNmFui-Vu{(3lXm5^TZSYX#n2%1-xxu~Ve5(=vcvqCu@O5S78 z{@?HR&q4bMoFf_~@o;Fejj#^ohz$1c!2!urhqf$|$iOAJ5JU)xqU4o;-XX-ojdHT> z`G`$O_s0h_2aF%sj;l5!6+^E{3;d%_?9NR`O5f$4Jc0&xhaQjq$7gT;;4o+PedeKs zowQRJ?+ii zkeq~84oaR3vx4F|g_dtI{LK^686zzWA)P^6c9RHh$6)^~A}XiI>~ntn z=PdOp9422&d}f%dC)+(H<>Cpvd8B=th@JA?KGy^cA@OSSUc-vH4P-b8CnA}1lfg?_ zcJ}c{FZ!ckuAKP^v9QZx4}*^-{|eU0)2QcHg&5f`N*=?r?)d(PAyI^_`s{OO#b57S zv21yMg`AtjBez{3bf=UqesS{QqEqUic?++X{&t}ka--7>^h22C#;iF{|FujvScmPg z)}zzoK%1`sAq0!d+lRk-e*b`~CbO6o*SL zel^MM@%7K)hVhLn0YZb*gMh|v|8p@W0@dx>-6ixlaf_PdNSzv}&kAW2#v zeeMUk*|j?1WG5NtI(9s`;>b@DyMbQYPJrbhA&2zv6h&_wK$|k5-GIcVbJSj8jwZ7& zU+#oI_}et(dJE2ZS6gioJrUD#KF+#yWl*1^wsY&@SAOZ;lCLb*XMv-uevt6*dwKM7P z+}5%;SJ%3wcxR@Lr8EkSQooq>-}$j$u0I&e9C;r6Lg}_hv!CtK8RYN&u--Pb=-$Zf z95Txxe8Xq)WB3XSLF@gANhB@51FaCBe(Sj6!L;BKW1$izZ|BBdO-d9UUjFEGtj>S! z_SVb09Z{Wl+w31Mb6n=uPRth_F@sSf_1E9Ntoh;CpmxLUnxye~;bK*2k4za{b<1i- z=140eiP19y%mupXu2^_CTv~c++!}fj5 z=rhGgH<6%a-CK$Ap}wl>_xI=id*-9Cs{RjYaT$6Cgh_Pg^dSXDbF*bmN7Lc4(n&o| zvg9t%0!nJFKP+{=igr_`SKEoC>XSxM<+R-sa?s&MEBA_MtFe1$z}<;#=B(H0Nf8cL zv;ab|D4!hiI(vOIIbrq)epeRY@FjwUihEO15`BoRDPi%4_p&mdVdD!uyD(^R(do4d z>!l-k50@%#I6vg9{)i9AqMg1(Pd|cwlJE`&rNw2R{v00X)uQMg5L7)~t!4V0!UfFe z`*Jle1@|E`&!6prm^1WW``C6tGQb3~cyc8vI>?v8a_egrv* z{M#Cip^c{Ev~o+M)-9ABspJIao){#T6@GkV7oB;cnv?uqImpww?N$<;>*Xr?%n|vZ z)8_WE9bWNnI+=v(BbLCF8=7jrn{LUmcALv~U-Nu(^uTsbtY@eQ3cUmNhS z_%1W#meJ5>f1?%YC6G3uLtZ_@J`o9$5q1hY@*`5pTJ1$_O_n7F+)qtSec5jj{lU1G zp1m_QZAt9s;j~4i&xiw@(i@&OmNCoUEW8=Y?>)jv{Va@eKfFa)<91Nd7j7NKf?B*iZD24ee)f5ajXjUZM^EW3^*IyTT1@+8Cl?R^2<= z#MxB#QE9Cj86Ff~vCodQ(MYqVtQXnU4O&%R7ZQbQDs;r37v*yLt8G3HJA@smpZodvUcDyA4g z?UR_&{zexdA#nZ01Yz(WSpf-i7tN_p9DCREB6*tM{S&&#qP0$S-k#ean&PLY4{8<8 zYZtNtXGGa>Gu|FP>3C003O&}#HHSqD?0PB7D1#o6w+SLPm^wem!RclfIb?Y@asMd7 zEW%8u9b`(9kDHJjV(hfIfWa&ZWOX$SgE9eFJOMjBKLarsqo9NI0C_^8JwLyhz6lw0 zI3uQd0W%hE8d5Qj%a1)PFCaeuSZ~#lGdTaCc#a*WWH_-whUXiiw-oaDj?oMB;Z$n6 zHry7~Y21v2?PlkvwX!Bfr{bM&sf=2;o6hR&<(ggA9OQO2V*s?7m?Q=Ee3X`%6$)E% zgkOftlH-ZPf`LJYif*-SXmNH_a4hRP*Xc;Tg!;zuPHIiP`nb_Z@(dEt>zcdI8E@oK2FGUgL$nzm%O)}+rDBPBE(}F0Z*=yMv5%fM$QYmz_BPPhwaa#M^HTW}UTi^9^5T5U!fYxrGtR>E+5jBf_)AoHPQfIu| zo>zyfAUi#l7jrvt+qcJaw+r*!wosQPEUh5eU(X|jNL4(S-9*?|XmMB^xprL+?8Q^@ z(#2686XN-T&nA?%wgE2uc50gBwpP<1{J^#~y0Owv!D}cQPMH1{{leED?b4ND!P>{$ z*7@I9aj1Rcy-tVS77MFw_Tuy;@oipv^eumO2c-&)7T|h;QbzPXG)f}dIn`1YVeyuF zKrm9DS*j(t8)ws(49%j>(?$wn^#J`I&hS-e0{cGx{@+GNmRPJ5)a_R9H_NeWktlD< z=&v$Uu3H#?dTmp8nor^NoK^p8NSla#_%k*|I7bNI=2exwtoD|h*Yo{Li(1vi1qaB| zo<8AMvw^ZFi@K(}mU%{2?Ooir;i2ZU z>9oDBAGyeYt%7i{=1yTjl3L(hGe2PEE4O*~pC%5^=biOLOKA80dV1o2|NK64M9C3Ry2p zz7t$zqqo@8Gw7Jji|sx)Rg~G+@U31(GKb=&UH^7z2gmj;Gu6cn*FJiD_TpN5`2li> zTxBt_MdLn|#a#>#oThMc65oGzkWk|xQdUYYoF^R~?d63(Iqgs2Lyt#>9FDgVu~+ZkFt zZMANPuNz8W>j3bTLc5qZHqLpzcG2a97mswUc7HRC`{>aRa1a}lmqMa5>)OU`=5bcy zyo|t4*#XG<5IOw#fn~_>$})s1R**U{`bjxOmq_SKjMTBo>67Gklp@c8Q-I((?7Y1T z13au@-s=QAEYAYV^s6VRvET!hbGW*U>h1*|0BmQppKg@VofBQ}MQ~3qSgf)2QH1BO zbNke`_%}nsz{`Z0Xh?~SmN7|o+f9C$Y{^srzW0IOUrsjxz;ccrRLUvuaVD~wc}~on z`b=KQ1}`nNwA54Q{mt-hW+s2J*o?q$E?016z~p-RZn2VM9V>j19TXlG+r^tJE|w$} zD_+9Eawm{)?bhYw@nGR*;Yyk`Yt%8FHDaj>hka%9H=ZD^(b4P`gg7H{X&1E#p`FBu z35eaA+k`ABWws(b3CIXE3Xb#P?=D2|AlR=Vu)Q>oh=9{7!GKB1F1hWpQ=8V@oo0^v zHs)=Y=fmP*(3$5TPYZuj=y2GO@Mrr1T-&M`8AGdS;M6G;FHcrgM6M_DgzKV8EH`{ z+qGRb-wYsY8g}_rMn8?xn`DGrw+WjSbWNA!aDe0aQPr;o*?A>l$2?8@Xa?VCE2;!p zGBX}xhwpHy>7ILZA;AxTquH`_{D){cL2OK)E^hFX1(1ljFGs=TJK!)Ejz%f807{%3 z-eM#vPC~&(=-+a}VT`y*7p>Mog+Xh`i*&RrfJhH;jO6z&6f2d3jOW8noL``9VpN)$tUk zgCpCrlrV7!s<4Wu4P`H$#XDym8!{=G?!;?yR^InVrN+wnGQzSh>bXh!IU|2dB>jMl z5UC)P=xAFJc&6O#f)IXpui-+Lz#lTWw%XdH0+2khs09i{DV5m1X+PMeq`=9ThZ!px z$8I=%XV$gbgiS^;5T7$$Oii5xH;IF{h{=9>;+9D;03#kwBz_+#r@F#<4+Kdq1))Yt z2|_+>WYl6{2z&?V$#13IIsqwb?AtG7`eQwI@1aYEG3%!hxhK+(QoOQ zZ;TYNoVv2BQGO3d5mV2gv|SBr!-eTqf5D-U)59366s6UFui@2J*0U7Qx=N@|b7*1m z$~RXmXX{-*T!AOwSnfYJEdV2=%4qX}y)8!iMWe+<+{~k+otorFiQ!Od>a=lz>{Wop z<;Bx=_3N_pdsE#k#DH~IRDeJAcLU_0gAztSwVpVgO}Rp={;i+ft|D4$nwNbAmysMC zu&CrRO_E#f9qoR_8uyl{aH*J*tWUoIP+ib6Kk{rE2>!gL@!<S;G7MQi`T{N(RibJdq^=i5y&7tR(2Mv+D(9+euD7?!V4na3GB`;eoBP>PfEi~3#I;**h zdJ3UMP73r2&pElUUmx6dJMsNJLEG(9)YtiT!*KC8K%hwU>mt_;!)6A{KlcQ`egt1i z?1_1Vy9WD$f6UovaJrZ%^wL$=$;idJvl^qh&j~*$ z$j)NR_tk{pEPAMAtjP2Nv%LmywIyIR2OmH3DIxln-nW`H3=!P2U!OgHW7@%)2hQuX z^(p;}0v5xE*KDt1Qx+h5=g?Un^lO&k#FHpJ0VR|Gw8L`ZA=N6gmrC|z!MjtdDxf6} zSmuRTz$R5=h9EX9wv}Fi+p2#OPlA4^y#UBB+y0XP`{}WAngg#`9jW>@>b0VaQf?%zVv|?Mz0Fc# zt)Aj4PHw%%-k_&o7+GValuwfX%b=FhKo0>Zk)PlHUGnIX<;FWJY+4R34`@gGvK-^O zvl1~=^He*l;l#s#x<3SGa4sGCIVi*T{3>6xV@?-Ak#05u(>ujx+$3%3BysHjF|mY3 zF)?s2k@0xi*@gLeq%+BDe*L$qphE&%qGmwLrlTjwTL4NLM&5vt_p>uLZJ#DKS_&q6 zpIrq%2NP$=$kHxoQ9rl=CEMYo4Q#?HJ;fb|F|lneo8SwOH{f7kH5gk3-un{8)Im2d zxp~N_Hvrh2i8%aiHI%5MbOHYpf_amaT7a@L#~VWjYUN}u&NjbV2oWfFn6%zV`tt&z zq>B{aMaq{!_x~E;$l%m!`aH2R&CyqI>_jls9@^~F7kI%gr}+6Blhx3%3xQ+Z0o<%m zS;yJ94xib-PA6Qh6v5%Lkgg%vzLc`Q^XQK_F$*VNn4%t#(T}Kz+l_}ut_mu`;e>Lx z2FcY>9U&C|seL!3&_jDW9V(`gl`@AUF?oZWINJzG^^lo*Sw1XQDBK(ez$887JA8%J zMJmR@Sd_T-D#_9a#mb=HW?6a&?O+TyDI<$G7-#z!}4$ zTXJG^D$IYE{cAq2_Zlx=Y&EwpcBQE@?Ae>Fr9PKDWgWxET;B__Tz|RrL2%M{aY%_L z>@#`k(h@jEN34_)5)j&Hoc5#q*&)p3)Fg2+n_Q)*Z4yWM=(9CRc@0+<#Vb~&l0s%+ zdDbrz#4_?e1;I=JRn@uuaVo-^o#8TU4sq{f0>Crhhjsv9CkA=C8w8`*ate^&0K8%V zfGUqHuYiVAEp$K#0x}l>wP%d&M6a}N8B0?vAH$_g~(UrC@LewVkr zI|5)j#$FxGeA!YWOpb8Hdsom)w)Pf6ZXzmYvz4L4E9OqZncOF@Thb_AR{s?2kvGBe z+tBGUNc!lIsmUt#5BK#h>wo>ZykqIoUoUyNefa10;ojiY01ZL!T)^p_Mp`FI|J6v} zHA=#Cv@;4yJl)R1k>RMH1o;7*)Qh?Nz5rKlooI!KeacK%{ zExVF_#>kXcJP!x!p8^hHZ>gm~KxtLS2Y`8~im3pI9#hE>mPa;*&i=K+y!jX ziRssW(X}22LW~q{qSP8eYIX3`^@^iZutv`)kkJk+2y0s@@%loyX1Rxg(rRRsAQ}FO z^>JGQ=E~Oy97)}CUdqXT_PrX<)~$3p02BGvA3oN&tnaYeS$~%P2${}ox}woF?8fzKwuK&;JIn}q$AimkT&_aPhq@c#KWsmdv5$A^cT ze$BFYWxM2+@3Gqj$6u{H1mC7OtP>Ga#gr{)iAxY#*@D+;Z-@&_!-NqN3rHlrs_pd$wJn|tnxa;>E~;ZoR9cFQ#r*KR!&SAm{_7@;iFc}N9p-KXz)um3 zYrbgq&H6>r+ZqncTJqMozyCGWPLek~ENq!bY>C-^N0%6JDY-4-aO_XJbD}NyK4Has zeFr%Xo}1AQuJh4kSFudXUk=UN?VECVylleQ3zIEO9-mfE=ob=~SZ$r%IecKOS547OJ(81aPfp-3dM!f6|iQwWXyRlKCe%KZBTig7`Q);N? ziNB=|^Vbk95r4K3V4V(RR?K3DDa}IP#THfpV)uPlZb7_k#^Asr|AG=xC}sr02^_Lj zRL<&4!abI@_0H4md&80)gj3<1t5jE?>>?S%Wnjv%WBQg! z3B!J1ayi2u6UA29qrB3iM`~=3c62m19<3DQ>$0l&#Xv7?TRUBg zr_k+w-?ap*Or%#tVx{z~=(3nJ(TcmQn%~C}1<*+)*+Q-#+Zph@q1PuRqz6^?H*7*x zvmM?TSo1<&peoXY-5;mONe3{DsCu#HmNEO}eAyiya(riDirXK+t769qi=xSLL`CId zM(p(=wfx+Cfia~zcdQHDpKQ`s*{&A(^ePGOR~QlE2komJN%dLToXnQQ0VOvQLkUz1 z;mds?JC0^K)fI`i;nOoVy7x7?6uY|=PV1EQ9TeY7+hJ%JGemNe8MB`9bKQQ3vd$U4 zO7Z}<%BVp+Qi(i9l*=<4kG39r(h-woWi=+KS8-#-+iRfS0a zp8wLe90%z}?JEm1z-L0F#;Ti1afV|h$&}U7OFJE}mTIU`jb?BxS3mpD3!krQqxZ+& z3>sxbi4t2Y)wDVQo)>@O@Ge2^%TcE~)3jj}w#n4Bk!=X!;XV%W6(8+zX6eyOtN05& z__^&DR$p6!87i$0eMg_!w)Z%4XEA&RYi1>ZgQSba?PL4m3mbCSDTvSDt{KJYqe?aF zqP)^pXT;SGM=`iIq(L^une|HH!VL+-`>YGf6{s(@+kEM8b}H@HTGVRW$h3*plS&ii zUW!-19gMUwKwp}_*Wad*BU=6>cG?gqC^o<_=nsI*p?iup$J4~(STjsX;_R8Vt#HD; z%N9mBUzc}$UU?Lux`CAo6c91pd|W@pir(@ClRSlN%4F6|(;9BxH-utUbY&EpWuQE2 zWian#2`Yvnrjt%E@{w%8p-(+WdT^qzESu|kd4O*&uHu{COKS~^5}eYno-Ver8e?yB zCX89H(W*VrQkC_99*$`kKnlggkkCHPvz|f3EX`dgH-dblBPkmp&KSmEpYj|BL{dq| zQDRIN(Oae@I_myk-2+H==7iy7B5__1Om%PA$ zZsh`A1KL7C%z=Be-U}c_@{_itNG1g5J|?h{CQ6EolZOmBt1<9*EuLga0SPX~8AP-0 zbKeuZU-5EA9CysxbO4loZG)#%CRWXrg`ZqE72$lr?+{Cs#@<$*Q1Mu~E$K=x_wTc} zoQpoVzf>vd&FS8rX}4<*`aPuGJ537K-DJm@!-lb0s?aSs>Bf+TM@g)i!w#n}Y*#Wp zFl9g%23xsdv>h@ci-+#AYwvf|DU^&d6pnGraT2{@1ZUoXIOy`_awEmbbJY4K27`88 zK*+%(iGIgkrmRf9AN_JB*lAMb4a@s|lem{5Vi!vE&?}jl$Shw+JxK0p$|<^wQ1vU0 zW#qM|C3+DKH9u}k??O3+YY4Q-R7oTPpga65=0ED)`u)r-2miQk->iZKZYd;}>$&0K z1Fm-O71g&~#w^zBYQ~}Ox3+qh)*L%5vI;mNtMo`Brg}c)JYa8g^pw=;WKoE(OerkK z4T0%u-Y3+ofs1%+#HuAh3Z(_o+qy0WvhKN~W|@7D8fhP|NT+e(_b!>Q81Oft54eo` zP(<9049TU7Ng;BmMY+oKe#L>$hgpGih5@3n@iT4to1Z*EIp>U(Gs_M13~@N|!3>N{ zL64J1O!X@RE3u;Hgnpu-QPKHmAv z{YY40^+&Ve_4;ttt=hDq_5WWSYvR^j=<^Wyev@R`lW`9kk3`!EX36DGB*>__HzK`L zZWBcqK$tozarlH+&C`o)R#*%=6tmS2{t#L*O8OlBRcPbehq;06{p*~4v_T``+@n)! z4&!5J^Kx8-_}v(8Dq8ma!SajWw*NTOt7)#9u{Q6wiUmas)O>f>Q!9WU7u44NRv>S>5IzUt=fi_9-(Sp#z=qUX&>f_3oq58)-z5~jX$s5xEdd(; z^T+Q40K~%q2SxbNv*ws^&?x=w`jET-{6X}>fCqDaxu?*#%s-oPx4(z~EDMY>Uuk(0|CUz2cS|M70tPcgquP4}_L>5g zB+Xz=s^_$;JR2mX8`blAh#u{#Su&*nFbgh~C=QKxS1y-U4)Bjg9drKHEcoZ@*_bE6 z59Ixww)GEt#3OouG+_yeF`5?II(x3GLiKq&bYxA4Yfu1jg^DTfU=;D-w{w@gIQG-K zV_l9u6!vT?J8%aZT(%#hl&j$;!s$5ILvod1Q=nbr`Lq3==PcEM{N_O5r;-&K{2x5F zosiQ*a9+_}IwSPT;$B`^=(5aT@Zsk2I90H|*KHznnVAyyUXwUX^hQ*3rr;!`QutIo zN6v-h43-%3Ite-rK~6ew)U>)MY%#$qU7b{3H7^JX#t511q{WzW?;gOn0(f(B<^iAn zfD1FXwE7=B)^(p^rkY|j1WFvyw8s7RWkmo#Na-T&nvbbw8ezhw(}&;q6W{XMZVy@H z3>2NC#MvE)_w@8XSt*tKn2QXa`3p=ej-i1nJFaMY#-Q-rk=Z!xca0E>YhtCO94Sff zPjVGq@I2b&aYy7Fa)Fzow2@x0kzeI0dTq*Mt%g8LGiy`d<29vPB#mBgp7!O{i!jw9 ze&tM^8cipRXPnH`9+E{cCeC5>DQs4Q>U_G=1Babus#r|fvYI3v!B~Y*N&_jcK^gbG z;l0l;)2!TM$LeNwZalTuXXhurcH!BXqwNvzj`frH=e&)CnX;iwpwbLrV+NUzOvhX% zp~OkmmKQL41Oow!*A1tmK6hPzKi3SaX0w%S?Z7J9%@rZL0vf`TjN7aiYS#i3^P8E6%UsVnRfaCZy*FVLM=m#3KE|6Q+254E8o1 z3kk)#eTQ0;=Pr>~qg@zR%i^LpnEZgm^pLikj*KE(b=Pk@a|d>gpPS#&w(@FH*1P#} zjPvg0=5VpjY&jM-A(Puo21pNlSN^FBs(`Y zw|d5w?Gd$gHg}XIOnr`XFtdQPD(?p3t-mUl0BUb7^!cC4R0<(QMohw0b8waCqK)yITJ_8-N1>=yUM&*pokx z4AuW@xDDeMN#^6h6tqAqb=4EM7FIKHAl^>N70);G2}CBNW^2T05g(%rC$jFkbUu-q ziTS2~ZY&$U!-Zws#k^W@C6@jvl^d1oa=dM*wJ8^c`yVgXQD zG@(A3aW>|5ie<=t?C)-dY}3yB>4{r*0A&5`TziZOJlt1F-a18MnMoCR z+TZ|2{jN$xhvpZmAVmLGckJlh8zu`n$U5fYnfKK5F`E?)|y- zdmlb0SUZ#xGJ8yw&I-J2>|x^dPH6ubBtz|6sGe|Yo-KwW^@Qc=BUO3NHcPXepM$pT z&s?_99#_^pn$!L9`jQ!qvF?qr>lQbc9|PUR;mDiNh4RX@NkZCi?~+OLcqXhbBaPc? z4xUzdq{CjG#Ce$7JqU8wsXcXs0x>a8R+W##_Gsl7rY2}4rJ#W%9mY=D_J2v5X$Vg= z^kEDwW(Kg*r65a(CCvu~IOZIyw4bn>Za%^fc>0yAedVxom)fsTCD5jQ{)@O3BTL$y zET{N;V?mx9$Y{q>Co~BYzgUz5H&OuZuykH)(vtIcRwiXx)n=0LNKTh>ULk3*DP3J( zal&`v;no|TXHUAOUU&6FeDh`GO~YiTwCwdI*=_C5Mt0I3?WH|(PIz`&WHa%srFfP= z+dF4SW%d!@y4LIWK$5}V9=)1WAT#5+Vc|E-ln&49df|+59EaV^?$d$>f`<;CWo|&b zreH!nmlBaOLP`~@E@Umw>i}(&RRJ<+7N%~r$UuA4k}37va*b3$khT{}F-GIj-{>s9MDD3y5p8Jf2IkfIbC=@2|`NIiQ(?MKmw6!On^ zD-&feXwn@F;2n23&AJDesVBM_XE}CVCe#4jc9(=l$1XkNE$hGIS@}-ay3qQ=4Q{&9 z-M@0xhZTn=6p0r+ORYz4N4;NjMja}yD#yvym8N2w-$9ps#n(!*U2DzLc!~4X_d8OX zw>)-j89%b|iQ2XS3QMm{mETyzu1sYU!$x*HmXdmq%9tPx|0|3d-KAQBPa|o`aMj{r zC{uRXu^r@30KeT!Nt*cS?>~m;dl8cqYozTOuQE;S)bh8)S$o>xSvvLpXedvqd>ALO zlz|KigDO*UwLp)LlB>WN>3b*?C1V62*A6A7CrJv$;lHOvKLq3#0g0aIJ*DwR&cJ99vW2{;pr15M3gLR+gAjIO~s?7F3!h49x?dn-tLhzJ%ki$vo zgcP7Ms(}>TpuF`4d+|UvzBstK*O@*!^KKV#cNlxhZ@d%4oPBoBn0VK&Wyj8>%6nd_ z=Nk!bL#x~-%)9-Ju@Kv)LN%iTgR0S>qI5I zw$2AyNs4%{q(Yd4oVRs8$Vw%ZtfM4^5aPXcpd7LiLRb{>&ij*d$lkx#_xE47{nc*Q zb-Q+5kH_PAe{9)Vfk$;vWO6EDU}u7D^YlRNnv9#thmI2sI%8}y(B3?hNo8hmf40`9 z66KI0vytRk$7C%Qzk2u6ZAp7haZZW@exo-#YupJEjG_dU&`5RG@)>DR%&MSD>0%hO zJ};~Mq2zmM=x^O)+K44 zylKP%^_Qo2$d1UfmeShm!<{>4-k&(IDKhR=(c2d_+KfXhNf}w}xuna#cYJ$;kKVZu5TgbKH0rXvbkmtzui*#fjkTt_yUw>>3nw|h%Skzs)LE%n)jcftz$ zj502gd{C2gjm8`Xgc9EZydlrCd`2Y8#7A>P_n!fRtTQo|o)WcUVs+f zS)O=f3kag2?A^zElJ&|Wsslbf>q8=;L=$(GlWU0Bz&RqvCq==|LeXP6_ts0i+>&S< zpMx*wym}$E<+2mxZL6}Nz~*IFhw9$sl5Ma8T0jTIZc-Zkx>5OB`? z@BM=(d=C!hEUliusYOVj_Xbco3vWK0brJ{8$}j|D7S}hohvhW(Kj3qYx{Mt2cC_@3 zO{sn4sj>+4fJIK@Ym$>Zs2kE ze6A37DYU&d_qvcP^5Of@{P%J0I5mjJ{S92AV9CnZY>6^%Yqyc@krU<7Pm(S2c&OSv zpS`-FteUg|K`ioMM zGpT&Z6<^(nrd744UkTBa)(-J~IF|DqlYu zTJZ06XK2a4o$FS_EY(<|X+f}^j@Z*8({do_Of`p zV(?v?xS6_aoH%>Ov6L-?lg%mH3fhk)TZGLtC!d=15^O4k6KHW!vU|s&sS9Kw^yV^>Rw`NQZ*!S#mf`-ugnGW#+ozSui zw*mdVYL!XGS8*>gT%Jj^E3p~}ai;9}Ox?^d0ghyx!tSwTMY#}IEKj8hYp;G0h&9>= zCxFv8P0nH7SopkwL{jagUw^OpdAI!Tf&=BZt*yt#5nNs85XssuPoEFw_^Waa;^6UK z+lGwk!iE3hdj_3t4xS${2w^wmugWiE(OVfSbFnYW9x&B9c)rqi|eYG_j*E&>|1d=h>6Lzk4ppyBIeDR-B`CBc1t$~ zD5`4fl2E(bv)mZ`>Srv@g7xY{4Qze8HywOR@~@N@lTN2`_*iL9&|p}sb9r>*_jPh&H_4yed(WM#Jed_`=gI}JJ+N?U8zx%o_hLX znQ~vZJ3?k#$F{X5k~hfAn0l#XysldAtEX)Nbf~GQ^#qWkGB|nc#l2;(^-lcyuC+oj zirA|({*-eh3B91>#ZKtBXe21W;;GNhSh4$+TtBM~drns^9N!Rr zk|RZ~N`#G8>*fc|duC(M)KVoV@GO;OntoM0+@WbGTfC##^4dJ@NYnv)os~ibRsLb* z!liokxUMZqWpo(&(`pn28{97%T{eW6zu;W-gWC-Hoo=(K z5S$mOo)X~M>z*`(;j=6Z9xJmQUw$gaEYhG{xP7jn<2mgW0)kitXWX&|N6Y;imaGbe z=pubU*$I_%nI0iR^hQqs>iKVj=(p<)9hHywteuq^6y45+{Xncq5BNy z=lh-0L5u1b7+e3P&<7D=LsY_%3-1pNRWx(M$+yY3&iCy26mh9`%N<(f%)h(-5$3w5 zZ%$phXzT9Yil~*rD^5iz(40INa+W)UPS0Q*r>_@iEm+7C0h)3A1>-MbL+MxGKFh&^ zXkX9QjDr#>Q|yxOG_BH6sQsPhMESuP>|R42Ke)mO7dk31u1I-}E)x^jZGB*B>}7lk z3oIgsHT+6I**DhZfk?ttEc_n5r9&SRB?8buwtekfwzHDSSk*FB=BSWh9RNow<;Skg zcNbM1irQzzUAkfMo1i20Nf6gj`T&s?lcT!lS5NEyNm{kM*?)oOE?$pm+g;M`M1u#Y z8tAsmCww4P(6XmfVBCeB_hmGa4MqISNYp>w{23-}HO$hj2e4~qWu`NuKkXIlf;i=O z%0oqfMi{RXHP+3z+}Y>2OqCv|g-s1wgjjo$OQz*aJY^M}%>K^CnII5col>|fOq>h@G( z?O5(p;gNqr$+s?De-R2Cq*qo#{ca`s;jti|9@cv2!8)^V?til{d@kO#r7y?VaGX%D z#yF3GlwJ2wB%!2(eN9e`3PbBl^Lp4p(t^kuR7zOqR3i|^ab-YUq)%;?gm`D`nYjsU^9{H{b8m#AbXs4)8g9905!`9ozAY_HT6;T}XG zW`lrKSxlTzUA8lcojDFsfjA!DOjL}uc|LwgTYuq_iafI>(NiM#m*BYos**3nAG#_L zcyN12Jk={MiV6ehiTrnuf(J*Q|42DD=t&o!GNacAPyzrdEO0FoH1w|B{sOC29&0qj z_2tu<^8**J(tY@G-4gN%;eMh%WbEtO=-V79FrMIZtxTT5lzX?0_rZ)J)Vz#tTcyps z8G&<3uz_6Q5#>w#!?#zM{{AM_??R?fRc0jFUz37NeU#2I38oUpTB$g3Abp |VJ8 z!2gMObD;fPB%LZ_-2?KKg(Nl@XQf*~YLo!(5*=*^&~;B~!9}}Sx3~j&uA?dh)$gIzF~|l8F)i=MS1vIky|jK!#~i^V$XTg6y1UQOL%2rF7k!q@m=CqNH`p<9Ch9#vkW0<)A%uU^Ug8edZ% zU+)pATURC{PT;*SZlPzl{eT|rHly&+?1M#Pedo8O!{GD^CNsJ7^;+LT=>?VsxIY+^`p-Z&6@mBFB@bOy&r%4 zxBfrkZzKY8mK;^;!ri|&Jht&zIox5Hu4;G{Nwg5j+|}sCsYkbub&of1eOs{41j%sfgeeUwET z0rW?tt7+Y4VFIR7VjhOJQyQCR37il^zH9C_8%6%zZdRC- zkuLQ#lIo5?*Yob)6iMp$NOXjJU9mDtEV1cT5+SKh27vqo*gOPq3@G&6SDiKhCsraK zhEe!ScLBH-KyM)ZdgtBcBq_GJA5)o4;(`wIBm@V(TMf$Xo0KR3P^sEmo8RlL8Fj54 z`wSJqn1e63?hbd(o!XE(v!Mgde zJ>w<&1U^d2S<0tF8(L4p?epRnLb@{mwOwj$sKSY)ObJTO7jW?a;ufON5!H> zFC=NARb2z`mR_pK2*3HF=toh*hjM^e16vh2n`zbIgxib;?nPUVi?1Tb+BL zecMiYHIx0%E@{Y4_A~xj7NyWF+YO@L4WV>OEv;0@HG}YWcaG z=rwT4Y!Gil$7b=LG-YfGp&iZ-@nxkB8~SiQVrI2dW_|iN3-@m6lguR(Jv$e5FWIwF z_rGHUr{YtXQ2e}!60<}i+pb57gI>k?6bjQqUEkcS2=w6KUKb$ubma2r4e|H3hYNZ?Q2wYNU&w`@}#2z zb5;U&wMTVn+~2Rq~>ZqJFP&{5uBn0d#r`$+K%v{TJ3& z)Na+E8~p0EF?EK3a5*{y+%zhJd&t7UobE?vz5XF?{~R_=@mVyMx#VkRIQVEmK4;)? z&A_pc$(`Qul=pWn6ZiDrAvYd+##OPxQ1mjChhJuLW;v6}e^-k9Qb_HEX!{m{l}NRP zEVHW7@qe(vDaFufrq`-VwPSPw=gw!RN>dNQH{yVc>m&?+3AU!V(=)vz>s43zZHk7T$u#hVh_fFB%EL4Es$E@n4U2HCu zh7#bh9KW#HpwA6>x`khnL@4mfqFD+mJ;DA`N1wpnDK)3LsMs~3vhrtqbCFR~=+T$w z*S#Gy4N;Fgz5ee1G|Sajci#8oE*}ls(f{Ep%Br8taY|8({z*8q`s!8$r#{aYzxJ~0 z?6nqtwr(BHX96mC*qDFB!hgP>U1Cbf#f^g@M)u$GMl6#b{@ORg>%CRC`r($3L#`UVt zIWqh^)*j*2J~WI5@g_Wxg2%s&PbyP+E^j^Tx-t2e@VXwqPEf-34PzUj;IbI|x-2}N zHJ-)v1N`X6v-Svc%@2rPw-Ap-ekeMraP1*4P#6w~T^Hxz_qo zX3{KUK|-f8lk%NhENH7m&*L5NUxe7n1$< zhxwG8_I?`^L79Ix!v$}gRe3kgVB|)`i%TC?#h=;nPg zy(=WojHt+Qvh|?!^+u;HSF5~cmtVolzQlSFxQ~i6ja_#)>ms6&q6;&BGa!ZNd9c*7 z@fe4?ewk-|jmHQo{QEZr(e@3AQNVsvTL)NCf%`(;MYp#|UbdJZqqvYRM|-<8&7 zP7uw{-EnbeLJrEa6ZRrGDDR^Jim^#NQWi2IJ|m~ zCtmv5Mg)NE-T=e`(Cc~Qq*I>VgjO0(PjVkc%$KyyzA-O_MbN07uUmaxQIw~z5j!30 zh0MhVH+EC{hgZKkAk1N2Zs!IYu`VrUSnSo#`kKdX7>gbiVeEnNTt3y}T^!(SLEpA! z*Za7sfIw%XuOdQI3W|1(SFonD$6!X1>)fd%Ec2A&gY*16gb~EJGJ*c(Jjlvy93Kk5 zrXS4nW1Ge2@ks)*aX*S3){yO#!q*Alq3H7VEW30K_-b#|Y`rg6fRrjxDkWB?JZbe$ z(vinQjSnDy?u$y%eutU0jWuroQl1j^Y};URw6X*34xru>*bu7H3}eWpH*w#P+!dq+ zF)*#TQ+KBT1?Jo%9s}$_KcD_AOP}r7^)XrQHGGG-IhCz_5rPbu5H$FN=7bK(qgp-} zpPul|U6|-gap(9gtY?rJmhO{bxRzl=hqPq7HzcHaWnNPbdWvKWNh!>$jtcwkhc+2QCZIjT zQB(!cGKm8%JYf_|gZ6S#fkYsSyI+LGYm#NZREHtC(UHm`d%Dr65}2<5b!zHZcjTFN zHJ|WiiEh2W7`c|b|AktBHX%#hZV8TiNq_B^zOqY^-0cxgaHx|)aQK=f# ztrei@9W`jmQt57)^`*PELxG<4SkL{y&$Kt(X~w0Nhxx_o_KOey=g$uRxafbIZ*4;9 z&kdZiZr@6DQnNySlH9X+x2*x{C7~Ul>bOUVHGu(~au_L|0CqCi?lCurD~%4!J6mxo zGqTKr+yZC2wsLS-?{$>s46IeLa)E*K<@175!pPz@&Op^VvuF;@+vv9b{23AM#GmPu z1@FT5+|gU>JadxzVZ+&>oy7w`tUeQWQnQq^n;5k-y|#;07|HGgrna6d(6hQ@v&6H9 zDUU&!wz0G4-HWG02BFyak}TM<9GgDH$2llK-9!zgCB|3x!f%{mj0P2X#%j;*jU(UA zE+}2Rn!Im+lJxoPiUq$eIdhMV<=oRge{iiL& zz%?6q;nRU-CjV;a6+ySHvGf(L%=39WqZ*7q;(KY$v_p&bwKKdcrQvB*jy~33kdRV$ zxtu=Oe;~#5Qzs|?SBd_CuNlXq*B6!@N=+OpczoiU`-RefV#5u(fLEVx?z;ItIMXbO zLubSEd6)04kDjS#UZP&4O^9^jdO_?xkC{YQwREw=IGXG^h&I-+A{PFNIO$+*z3x`=lMsHVQEK>MHp;E@^(sa>0d+aQR zvNo`*jaSSn-D*mBcRD(sc^hViA85bdH-D1=KJ_V*9zjT{v@{)Uoi6@;o^tEVc3i7t z!u98ewvsi~FGFtrDEYK#kNROXI`CFaP4IcCMMgXWdg3}S=2PKc;%;n!TIT#2~im>tLvlA zt1g|iW!@zv+#^9J?UYMRdq39PIpa6Dblz+oxlxJzmH%aKK)Y7+e(DT*ne(fuL$&Z$ z%;IpOZ#cF{gAa*EBLxU7LOaDIP|r`75h-Q#CBWaVQino z!##Za7l$Gd^_R(OE7}PwG?+Htx5dui6zVVVIkPuICvqgUsQ|%(kNav*ByMD6@I_@4 zv)^HQ?YNNxNJxOm8x>e4hx$uG!H!cA<@hX{(5%2l!RROv;qpI7hf95j2(6tDVC4Wd zgMD2C<2GW^Iq)9@|L8pwkydJB`!Sh5FgF&C4gs{T0Q%q452qh!wTb`jOWMCvME2VBbSEi{05DvY=NuHoBn>Jx z-WUb$3W?v^u>pJLBlf=w=b?ApX*P|^F}hWU+kVg7=A1bbcl%2t>b0VS~&}nC1Z_$v(luRUuJ|Xfrs)sBC4d81DGw>sk=#-fR*z-A}($)5_w-LRz zwqFvv4mdrOMJ_1V@^=-{?k~-p7>$yxL-Al@)i~2r!!{!UbjewtY^H&HgJ?s=KGDYz z4aN$FB9+rNa)UnCN3wcYz^-%!oE+KB?wZVBY>^Xwd8S)>rfc7<_Y^I>sh5;CJbdq5xNI55n~i?^U;*{(oDKWm`LsD3 zc+IUhZ0!-wFWV&ue#BC7@Ny)yR`YR{=H;N%^cD$>k3w0qq4ov}ljGzlB2l9!|jogs;qIkw+$Pmtk)bf#zoc|hHJUV@%16IC6vHj?(S}GZYL;i zr}P7VZo^5~6o8NYpsbIoqjtdhQFXjuu)gbu%rXvLqi85oVD~7nDWbKTHRuElS_EKS z71%Nb1r={BxTPZi5%Qi+RDttnCF%QvlB}M&Tl2i`}R;llLN^!z3BJjl4 z8;>?;7T|{qZ)A2ke+D94T9qdV8 zx?t^-IZp}quOAD%;hn$T?GN6GuoQVe&Lvi<5N4-+GGX}!-9-uC&&# zZT)`G3G-O_Axr`Pf@zfsiUX)$RmX5uQhzxeEsG~4va9Q(vAA$SLYu8NQcS!U)8`T! zn;!dR++I|MEt~$rd${1Neju%5voBJ%v_tZq7MT5Y0W!>6ro291iC-{ zGM<+~oB;IwQr0AENC6~9WS!|~yvf;d<8%(Ao{exvD|SdLRG@w{Fk7L?uIqKL(x43| z@eGi>N|e6+$&s|r5k5+AZY2D8!{2F6!iqN$aSQVE-{fvc^4_ztD_}d%lMQa5p&%vn z3?{c{0;*ZVbrC2Dz#qy;j#chyT7bW!(B)2(>^bB~#fldqY$V4RR?*7pj9NdfY(3+< zUN|#r6B8O`N0n}gWW=s4E4lm0{$DWQkb_j~IGJkqwDfs}g#WD(li|+4bmX8fCRKq& zfX&H(&UYpLS~~$@>XM`x8#N?m-Bvz=VT@-MTx(GQByv1c5YKG3HL$4rKLF>%lnbej zN*t$60ZoZYnpUYR_@Ub8S%rR7Qq?nJ#;(@8zhljTHbDtwSg`BEX!Uf)N2xIs)%OK zp+06uSBr>@G-?unoHrDkECRyuuQjYX{wDpt_eh23Dg2~csUbSa;H1U$j{Uy>r6nDy z8jOfm;6^1_V=Y#i-Yyv0)$^hc>En5zj%F^@8Z*tW=GuqG~OVEe;s|R z<&gGS#dQVM%#hCh7kzC8ovNT>RaotB5@YCQMNesE&)b`N`73|ySaCFOhwqtlu}5wO z?#t*|;IvKey?3%x2`-NPe=D{^lCuacFJ%*20ULfjcyk%*8Ngri@u7SRI{DAdVSwzw z$Au|2x{q9($3NLdi8OueuIcK{q6{YuTAn2$M+WC9P^l-WBW0AR@q0%Tmp+MNz*0Rw z&b@jKRaCI9gH1(A$rh!^brpsEgeTvJ>^GDmT|MS!Zd&#YS<}u8RaM^zOwqo)>s`|` zebeosW$H-4=B@-0u_~TE{1h=z2_Hn1x4`_v=cuhPIZbnJon1wof*$hh;q+;@F+v3p$@y+&yTCmGlyQl!TOj3|8R=7xC}eYtSte1`1>d0;sc! znFah$M&_io%c)gtU38s}K+3?!PtE~!h8cGLUP7b>AF0@@*3m2ckn4Q@Z`P^5yWH~8 znAV%`-fSj&Tb)T<@Tn_kXIC6Ri2_D2Fp#9lB9D8Jb{c1Cwfr=$WT` zo>WF9yJNCo!ju9Nm;G!0SA3PI?68Q|%F+KEW3;8cqwaCtUW@vR!7KU|TeZ#S?|9O&Z z=CkvJ_l~s4@t$KL3e+|fCWLD4teBO2w$Ll%My!EigA=%NMcmaOqkK+asfjLc>g!bz zKOYI3TKQy9FSSCaBTR#CNI3RSPOou^1LoCDkF?uIHRgGLlV+Zxne`sJBZbjqW{akd z(<5y4U2}nD;jx)wwFt%LE>^_&5gFDNfk_csS$00nGqOt^aT$Vil0qDZMc^%*4s$SS z`6Yb$MK|w+cZdUdnaoO9HmBwU+%_}~tIT`vc&o_B|IWmz>Rk^`{Jl^8d-9*a<9ik_ z;;&7y;mXhjjAB!Bhp7yO;3M`DWw(mQQRUHAKy?>#@nsQz`*^?l1{ItPH&LpwA=3Wd z+|Yeioxw6$#diNv+O&|b zm&dbQhFgvlFDeo&c#-~RrYC&y+;rv(ohj4AT0Ep1VaCVqKPgYbx+HG&rK(PLli|eA zy#^g~uXTL>EkgC#YImSy*!6z&V3`ZYP%l$9YE{70&k$x>O5N-Fm@7u)e0tMtE6GY; z<{D(=A`-twN1k6%Uc~ChH$amClTv8ini*xx=0`31*PK81SD*#;hEm9Nnuq7mscZln zXjPI$3rMN+r_bNlj-}XS?1fQOHcy4OD}s+*Sy#G2ptGqZn~$a?QdNX{m%2Od?>e^k z(gwMDdG>>2T9qI9%KhdqC;rYqU;O|sjBjxna(L3xAP=xV)p+zKPyLXU{;m>wwfjdU ztYDUH;Gev3dxsyc?_@qB%kWblw_!>UYAZ~CJ@C9iqL$K2%1~`={AU&6Jb94rk1^